diff --git a/README.md b/README.md index 682d5961226399a3e1b7c343ca88298b89219a92..4a9096f842541985d121b87f165913f6f63a892e 100644 --- a/README.md +++ b/README.md @@ -6,21 +6,28 @@ The Xenomai Cobalt real-time core depends on a patch to the mainline Linux kerne 内核版本:openeuler kernel-4.19.90-2012.4.0.0053 xenomai版本:xenomai-3.1 - patch名称: - 1.ipipe-core-4.19.55-oe1.patch - 2.enable_irq.patch - 3.cobalt-core-3.1-4.19.90.patch - 4.cobalt-core-3.1-4.19.90-oe1.patch + arm64 patch名称: + 1.ipipe-core-4.19.55-oe1_arm64.patch + 2.enable_irq_arm64.patch + 3.cobalt-core-3.1-4.19.90_arm64.patch + 4.cobalt-core-3.1-4.19.90-oe1_arm64.patch 5.openeuler_defconfig_arm64.patch 6.openeuler_defconfig_arm64_2.patch + x86_64 patch名称 + 1.cobalt-core-3.1-4.19.90-oe1_x86.patch + 2.cobalt-core-3.1-4.19.90_x86.patch + 3.ipipe-core-4.19.90-oe1_x86.patch + 4.openeuler_defconfig_x86_2.patch + 5.openeuler_defconfig_x86.patch + + #### 软件架构 -软件架构说明:该版本适配ARM架构,具体为鲲鹏920和FT 2000/4的机器。 +软件架构说明:ARM架构:鲲鹏920和FT 2000/4的机器。 #### 存在问题 - +ARM架构: 1. 飞腾网卡驱动问题 2. RTOS环境串口驱动问题 -3. 后续将上传x86架构的xenomai实时内核patch #### 安装教程 1. xxxx diff --git a/cobalt-core-3.1-4.19.90-oe1.patch b/cobalt-core-3.1-4.19.90-oe1_arm64.patch similarity index 100% rename from cobalt-core-3.1-4.19.90-oe1.patch rename to cobalt-core-3.1-4.19.90-oe1_arm64.patch diff --git a/cobalt-core-3.1-4.19.90-oe1_x86.patch b/cobalt-core-3.1-4.19.90-oe1_x86.patch new file mode 100755 index 0000000000000000000000000000000000000000..51abbbca9a5bc87d689e4dc9c2d36934a22f0478 --- /dev/null +++ b/cobalt-core-3.1-4.19.90-oe1_x86.patch @@ -0,0 +1,16 @@ +--- kernel/include/asm-generic/xenomai/syscall.h 2020-02-04 01:35:56.000000000 +0800 ++++ kernel_new/include/asm-generic/xenomai/syscall.h 2021-04-07 13:58:24.939549909 +0800 +@@ -27,13 +27,8 @@ + #include + #include + +-#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 0, 0) + #define access_rok(addr, size) access_ok((addr), (size)) + #define access_wok(addr, size) access_ok((addr), (size)) +-#else +-#define access_rok(addr, size) access_ok(VERIFY_READ, (addr), (size)) +-#define access_wok(addr, size) access_ok(VERIFY_WRITE, (addr), (size)) +-#endif + + #define __xn_reg_arglist(regs) \ + __xn_reg_arg1(regs), \ diff --git a/cobalt-core-3.1-4.19.90.patch b/cobalt-core-3.1-4.19.90_arm64.patch similarity index 100% rename from cobalt-core-3.1-4.19.90.patch rename to cobalt-core-3.1-4.19.90_arm64.patch diff --git a/cobalt-core-3.1-4.19.90_x86.patch b/cobalt-core-3.1-4.19.90_x86.patch new file mode 100755 index 0000000000000000000000000000000000000000..cfba05e8d6c109ff2839cb5ab9a875f18ffe3689 --- /dev/null +++ b/cobalt-core-3.1-4.19.90_x86.patch @@ -0,0 +1,264343 @@ +--- linux/include/xenomai/version.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/version.h 2021-04-07 16:01:28.509632334 +0800 +@@ -0,0 +1,34 @@ ++/* ++ * Copyright (C) 2001-2013 Philippe Gerum . ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. ++ */ ++#ifndef _XENOMAI_VERSION_H ++#define _XENOMAI_VERSION_H ++ ++#ifndef __KERNEL__ ++#include ++#include ++#endif ++ ++#define XENO_VERSION(maj, min, rev) (((maj)<<16)|((min)<<8)|(rev)) ++ ++#define XENO_VERSION_CODE XENO_VERSION(CONFIG_XENO_VERSION_MAJOR, \ ++ CONFIG_XENO_VERSION_MINOR, \ ++ CONFIG_XENO_REVISION_LEVEL) ++ ++#define XENO_VERSION_STRING CONFIG_XENO_VERSION_STRING ++ ++#endif /* _XENOMAI_VERSION_H */ +--- linux/include/xenomai/rtdm/uapi/udd.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/rtdm/uapi/udd.h 2021-04-07 16:01:28.502632344 +0800 +@@ -0,0 +1,98 @@ ++/** ++ * @file ++ * This file is part of the Xenomai project. ++ * ++ * @author Copyright (C) 2014 Philippe Gerum ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. ++ */ ++#ifndef _RTDM_UAPI_UDD_H ++#define _RTDM_UAPI_UDD_H ++ ++/** ++ * @addtogroup rtdm_udd ++ * ++ * @{ ++ */ ++ ++/** ++ * @anchor udd_signotify ++ * @brief UDD event notification descriptor ++ * ++ * This structure shall be used to pass the information required to ++ * enable/disable the notification by signal upon interrupt receipt. ++ * ++ * If PID is zero or negative, the notification is disabled. ++ * Otherwise, the Cobalt thread whose PID is given will receive the ++ * Cobalt signal also mentioned, along with the count of interrupts at ++ * the time of the receipt stored in siginfo.si_int. A Cobalt thread ++ * must explicitly wait for notifications using the sigwaitinfo() or ++ * sigtimedwait() services (no asynchronous mode available). ++ */ ++struct udd_signotify { ++ /** ++ * PID of the Cobalt thread to notify upon interrupt ++ * receipt. If @a pid is zero or negative, the notification is ++ * disabled. ++ */ ++ pid_t pid; ++ /** ++ * Signal number to send to PID for notifying, which must be ++ * in the range [SIGRTMIN .. SIGRTMAX] inclusive. This value ++ * is not considered if @a pid is zero or negative. ++ */ ++ int sig; ++}; ++ ++/** ++ * @anchor udd_ioctl_codes @name UDD_IOCTL ++ * IOCTL requests ++ * ++ * @{ ++ */ ++ ++/** ++ * Enable the interrupt line. The UDD-class mini-driver should handle ++ * this request when received through its ->ioctl() handler if ++ * provided. Otherwise, the UDD core enables the interrupt line in the ++ * interrupt controller before returning to the caller. ++ */ ++#define UDD_RTIOC_IRQEN _IO(RTDM_CLASS_UDD, 0) ++/** ++ * Disable the interrupt line. The UDD-class mini-driver should handle ++ * this request when received through its ->ioctl() handler if ++ * provided. Otherwise, the UDD core disables the interrupt line in ++ * the interrupt controller before returning to the caller. ++ * ++ * @note The mini-driver must handle the UDD_RTIOC_IRQEN request for a ++ * custom IRQ from its ->ioctl() handler, otherwise such request ++ * receives -EIO from the UDD core. ++ */ ++#define UDD_RTIOC_IRQDIS _IO(RTDM_CLASS_UDD, 1) ++/** ++ * Enable/Disable signal notification upon interrupt event. A valid ++ * @ref udd_signotify "notification descriptor" must be passed along ++ * with this request, which is handled by the UDD core directly. ++ * ++ * @note The mini-driver must handle the UDD_RTIOC_IRQDIS request for ++ * a custom IRQ from its ->ioctl() handler, otherwise such request ++ * receives -EIO from the UDD core. ++ */ ++#define UDD_RTIOC_IRQSIG _IOW(RTDM_CLASS_UDD, 2, struct udd_signotify) ++ ++/** @} */ ++/** @} */ ++ ++#endif /* !_RTDM_UAPI_UDD_H */ +--- linux/include/xenomai/rtdm/uapi/spi.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/rtdm/uapi/spi.h 2021-04-07 16:01:28.497632352 +0800 +@@ -0,0 +1,42 @@ ++/** ++ * @note Copyright (C) 2016 Philippe Gerum ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#ifndef _RTDM_UAPI_SPI_H ++#define _RTDM_UAPI_SPI_H ++ ++#include ++ ++struct rtdm_spi_config { ++ __u32 speed_hz; ++ __u16 mode; ++ __u8 bits_per_word; ++}; ++ ++struct rtdm_spi_iobufs { ++ __u32 io_len; ++ __u32 i_offset; ++ __u32 o_offset; ++ __u32 map_len; ++}; ++ ++#define SPI_RTIOC_SET_CONFIG _IOW(RTDM_CLASS_SPI, 0, struct rtdm_spi_config) ++#define SPI_RTIOC_GET_CONFIG _IOR(RTDM_CLASS_SPI, 1, struct rtdm_spi_config) ++#define SPI_RTIOC_SET_IOBUFS _IOR(RTDM_CLASS_SPI, 2, struct rtdm_spi_iobufs) ++#define SPI_RTIOC_TRANSFER _IO(RTDM_CLASS_SPI, 3) ++#define SPI_RTIOC_TRANSFER_N _IOR(RTDM_CLASS_SPI, 4, int) ++ ++#endif /* !_RTDM_UAPI_SPI_H */ +--- linux/include/xenomai/rtdm/uapi/serial.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/rtdm/uapi/serial.h 2021-04-07 16:01:28.493632357 +0800 +@@ -0,0 +1,407 @@ ++/** ++ * @file ++ * Real-Time Driver Model for Xenomai, serial device profile header ++ * ++ * @note Copyright (C) 2005-2007 Jan Kiszka ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. ++ * ++ * @ingroup rtserial ++ */ ++#ifndef _RTDM_UAPI_SERIAL_H ++#define _RTDM_UAPI_SERIAL_H ++ ++#define RTSER_PROFILE_VER 3 ++ ++/*! ++ * @anchor RTSER_DEF_BAUD @name RTSER_DEF_BAUD ++ * Default baud rate ++ * @{ */ ++#define RTSER_DEF_BAUD 9600 ++/** @} */ ++ ++/*! ++ * @anchor RTSER_xxx_PARITY @name RTSER_xxx_PARITY ++ * Number of parity bits ++ * @{ */ ++#define RTSER_NO_PARITY 0x00 ++#define RTSER_ODD_PARITY 0x01 ++#define RTSER_EVEN_PARITY 0x03 ++#define RTSER_DEF_PARITY RTSER_NO_PARITY ++/** @} */ ++ ++/*! ++ * @anchor RTSER_xxx_BITS @name RTSER_xxx_BITS ++ * Number of data bits ++ * @{ */ ++#define RTSER_5_BITS 0x00 ++#define RTSER_6_BITS 0x01 ++#define RTSER_7_BITS 0x02 ++#define RTSER_8_BITS 0x03 ++#define RTSER_DEF_BITS RTSER_8_BITS ++/** @} */ ++ ++/*! ++ * @anchor RTSER_xxx_STOPB @name RTSER_xxx_STOPB ++ * Number of stop bits ++ * @{ */ ++#define RTSER_1_STOPB 0x00 ++/** valid only in combination with 5 data bits */ ++#define RTSER_1_5_STOPB 0x01 ++#define RTSER_2_STOPB 0x01 ++#define RTSER_DEF_STOPB RTSER_1_STOPB ++/** @} */ ++ ++/*! ++ * @anchor RTSER_xxx_HAND @name RTSER_xxx_HAND ++ * Handshake mechanisms ++ * @{ */ ++#define RTSER_NO_HAND 0x00 ++#define RTSER_RTSCTS_HAND 0x01 ++#define RTSER_DEF_HAND RTSER_NO_HAND ++/** @} */ ++ ++/*! ++ * @anchor RTSER_RS485_xxx @name RTSER_RS485_xxx ++ * RS485 mode with automatic RTS handling ++ * @{ */ ++#define RTSER_RS485_DISABLE 0x00 ++#define RTSER_RS485_ENABLE 0x01 ++#define RTSER_DEF_RS485 RTSER_RS485_DISABLE ++/** @} */ ++ ++/*! ++ * @anchor RTSER_FIFO_xxx @name RTSER_FIFO_xxx ++ * Reception FIFO interrupt threshold ++ * @{ */ ++#define RTSER_FIFO_DEPTH_1 0x00 ++#define RTSER_FIFO_DEPTH_4 0x40 ++#define RTSER_FIFO_DEPTH_8 0x80 ++#define RTSER_FIFO_DEPTH_14 0xC0 ++#define RTSER_DEF_FIFO_DEPTH RTSER_FIFO_DEPTH_1 ++/** @} */ ++ ++/*! ++ * @anchor RTSER_TIMEOUT_xxx @name RTSER_TIMEOUT_xxx ++ * Special timeout values, see also @ref RTDM_TIMEOUT_xxx ++ * @{ */ ++#define RTSER_TIMEOUT_INFINITE RTDM_TIMEOUT_INFINITE ++#define RTSER_TIMEOUT_NONE RTDM_TIMEOUT_NONE ++#define RTSER_DEF_TIMEOUT RTDM_TIMEOUT_INFINITE ++/** @} */ ++ ++/*! ++ * @anchor RTSER_xxx_TIMESTAMP_HISTORY @name RTSER_xxx_TIMESTAMP_HISTORY ++ * Timestamp history control ++ * @{ */ ++#define RTSER_RX_TIMESTAMP_HISTORY 0x01 ++#define RTSER_DEF_TIMESTAMP_HISTORY 0x00 ++/** @} */ ++ ++/*! ++ * @anchor RTSER_EVENT_xxx @name RTSER_EVENT_xxx ++ * Events bits ++ * @{ */ ++#define RTSER_EVENT_RXPEND 0x01 ++#define RTSER_EVENT_ERRPEND 0x02 ++#define RTSER_EVENT_MODEMHI 0x04 ++#define RTSER_EVENT_MODEMLO 0x08 ++#define RTSER_EVENT_TXEMPTY 0x10 ++#define RTSER_DEF_EVENT_MASK 0x00 ++/** @} */ ++ ++ ++/*! ++ * @anchor RTSER_SET_xxx @name RTSER_SET_xxx ++ * Configuration mask bits ++ * @{ */ ++#define RTSER_SET_BAUD 0x0001 ++#define RTSER_SET_PARITY 0x0002 ++#define RTSER_SET_DATA_BITS 0x0004 ++#define RTSER_SET_STOP_BITS 0x0008 ++#define RTSER_SET_HANDSHAKE 0x0010 ++#define RTSER_SET_FIFO_DEPTH 0x0020 ++#define RTSER_SET_TIMEOUT_RX 0x0100 ++#define RTSER_SET_TIMEOUT_TX 0x0200 ++#define RTSER_SET_TIMEOUT_EVENT 0x0400 ++#define RTSER_SET_TIMESTAMP_HISTORY 0x0800 ++#define RTSER_SET_EVENT_MASK 0x1000 ++#define RTSER_SET_RS485 0x2000 ++/** @} */ ++ ++ ++/*! ++ * @anchor RTSER_LSR_xxx @name RTSER_LSR_xxx ++ * Line status bits ++ * @{ */ ++#define RTSER_LSR_DATA 0x01 ++#define RTSER_LSR_OVERRUN_ERR 0x02 ++#define RTSER_LSR_PARITY_ERR 0x04 ++#define RTSER_LSR_FRAMING_ERR 0x08 ++#define RTSER_LSR_BREAK_IND 0x10 ++#define RTSER_LSR_THR_EMTPY 0x20 ++#define RTSER_LSR_TRANSM_EMPTY 0x40 ++#define RTSER_LSR_FIFO_ERR 0x80 ++#define RTSER_SOFT_OVERRUN_ERR 0x0100 ++/** @} */ ++ ++ ++/*! ++ * @anchor RTSER_MSR_xxx @name RTSER_MSR_xxx ++ * Modem status bits ++ * @{ */ ++#define RTSER_MSR_DCTS 0x01 ++#define RTSER_MSR_DDSR 0x02 ++#define RTSER_MSR_TERI 0x04 ++#define RTSER_MSR_DDCD 0x08 ++#define RTSER_MSR_CTS 0x10 ++#define RTSER_MSR_DSR 0x20 ++#define RTSER_MSR_RI 0x40 ++#define RTSER_MSR_DCD 0x80 ++/** @} */ ++ ++ ++/*! ++ * @anchor RTSER_MCR_xxx @name RTSER_MCR_xxx ++ * Modem control bits ++ * @{ */ ++#define RTSER_MCR_DTR 0x01 ++#define RTSER_MCR_RTS 0x02 ++#define RTSER_MCR_OUT1 0x04 ++#define RTSER_MCR_OUT2 0x08 ++#define RTSER_MCR_LOOP 0x10 ++/** @} */ ++ ++ ++/*! ++ * @anchor RTSER_BREAK_xxx @name RTSER_BREAK_xxx ++ * Break control ++ * @{ */ ++#define RTSER_BREAK_CLR 0x00 ++#define RTSER_BREAK_SET 0x01 ++ ++ ++/** ++ * Serial device configuration ++ */ ++typedef struct rtser_config { ++ /** mask specifying valid fields, see @ref RTSER_SET_xxx */ ++ int config_mask; ++ ++ /** baud rate, default @ref RTSER_DEF_BAUD */ ++ int baud_rate; ++ ++ /** number of parity bits, see @ref RTSER_xxx_PARITY */ ++ int parity; ++ ++ /** number of data bits, see @ref RTSER_xxx_BITS */ ++ int data_bits; ++ ++ /** number of stop bits, see @ref RTSER_xxx_STOPB */ ++ int stop_bits; ++ ++ /** handshake mechanisms, see @ref RTSER_xxx_HAND */ ++ int handshake; ++ ++ /** reception FIFO interrupt threshold, see @ref RTSER_FIFO_xxx */ ++ int fifo_depth; ++ ++ int reserved; ++ ++ /** reception timeout, see @ref RTSER_TIMEOUT_xxx for special ++ * values */ ++ nanosecs_rel_t rx_timeout; ++ ++ /** transmission timeout, see @ref RTSER_TIMEOUT_xxx for special ++ * values */ ++ nanosecs_rel_t tx_timeout; ++ ++ /** event timeout, see @ref RTSER_TIMEOUT_xxx for special values */ ++ nanosecs_rel_t event_timeout; ++ ++ /** enable timestamp history, see @ref RTSER_xxx_TIMESTAMP_HISTORY */ ++ int timestamp_history; ++ ++ /** event mask to be used with @ref RTSER_RTIOC_WAIT_EVENT, see ++ * @ref RTSER_EVENT_xxx */ ++ int event_mask; ++ ++ /** enable RS485 mode, see @ref RTSER_RS485_xxx */ ++ int rs485; ++} rtser_config_t; ++ ++/** ++ * Serial device status ++ */ ++typedef struct rtser_status { ++ /** line status register, see @ref RTSER_LSR_xxx */ ++ int line_status; ++ ++ /** modem status register, see @ref RTSER_MSR_xxx */ ++ int modem_status; ++} rtser_status_t; ++ ++/** ++ * Additional information about serial device events ++ */ ++typedef struct rtser_event { ++ /** signalled events, see @ref RTSER_EVENT_xxx */ ++ int events; ++ ++ /** number of pending input characters */ ++ int rx_pending; ++ ++ /** last interrupt timestamp */ ++ nanosecs_abs_t last_timestamp; ++ ++ /** reception timestamp of oldest character in input queue */ ++ nanosecs_abs_t rxpend_timestamp; ++} rtser_event_t; ++ ++ ++#define RTIOC_TYPE_SERIAL RTDM_CLASS_SERIAL ++ ++ ++/*! ++ * @name Sub-Classes of RTDM_CLASS_SERIAL ++ * @{ */ ++#define RTDM_SUBCLASS_16550A 0 ++/** @} */ ++ ++ ++/*! ++ * @anchor SERIOCTLs @name IOCTLs ++ * Serial device IOCTLs ++ * @{ */ ++ ++/** ++ * Get serial device configuration ++ * ++ * @param[out] arg Pointer to configuration buffer (struct rtser_config) ++ * ++ * @return 0 on success, otherwise negative error code ++ * ++ * @coretags{task-unrestricted} ++ */ ++#define RTSER_RTIOC_GET_CONFIG \ ++ _IOR(RTIOC_TYPE_SERIAL, 0x00, struct rtser_config) ++ ++/** ++ * Set serial device configuration ++ * ++ * @param[in] arg Pointer to configuration buffer (struct rtser_config) ++ * ++ * @return 0 on success, otherwise: ++ * ++ * - -EPERM is returned if the caller's context is invalid, see note below. ++ * ++ * - -ENOMEM is returned if a new history buffer for timestamps cannot be ++ * allocated. ++ * ++ * @coretags{task-unrestricted} ++ * ++ * @note If rtser_config contains a valid timestamp_history and the ++ * addressed device has been opened in non-real-time context, this IOCTL must ++ * be issued in non-real-time context as well. Otherwise, this command will ++ * fail. ++ */ ++#define RTSER_RTIOC_SET_CONFIG \ ++ _IOW(RTIOC_TYPE_SERIAL, 0x01, struct rtser_config) ++ ++/** ++ * Get serial device status ++ * ++ * @param[out] arg Pointer to status buffer (struct rtser_status) ++ * ++ * @return 0 on success, otherwise negative error code ++ * ++ * @coretags{task-unrestricted} ++ * ++ * @note The error states @c RTSER_LSR_OVERRUN_ERR, @c RTSER_LSR_PARITY_ERR, ++ * @c RTSER_LSR_FRAMING_ERR, and @c RTSER_SOFT_OVERRUN_ERR that may have ++ * occured during previous read accesses to the device will be saved for being ++ * reported via this IOCTL. Upon return from @c RTSER_RTIOC_GET_STATUS, the ++ * saved state will be cleared. ++ */ ++#define RTSER_RTIOC_GET_STATUS \ ++ _IOR(RTIOC_TYPE_SERIAL, 0x02, struct rtser_status) ++ ++/** ++ * Get serial device's modem contol register ++ * ++ * @param[out] arg Pointer to variable receiving the content (int, see ++ * @ref RTSER_MCR_xxx) ++ * ++ * @return 0 on success, otherwise negative error code ++ * ++ * @coretags{task-unrestricted} ++ */ ++#define RTSER_RTIOC_GET_CONTROL \ ++ _IOR(RTIOC_TYPE_SERIAL, 0x03, int) ++ ++/** ++ * Set serial device's modem contol register ++ * ++ * @param[in] arg New control register content (int, see @ref RTSER_MCR_xxx) ++ * ++ * @return 0 on success, otherwise negative error code ++ * ++ * @coretags{task-unrestricted} ++ */ ++#define RTSER_RTIOC_SET_CONTROL \ ++ _IOW(RTIOC_TYPE_SERIAL, 0x04, int) ++ ++/** ++ * Wait on serial device events according to previously set mask ++ * ++ * @param[out] arg Pointer to event information buffer (struct rtser_event) ++ * ++ * @return 0 on success, otherwise: ++ * ++ * - -EBUSY is returned if another task is already waiting on events of this ++ * device. ++ * ++ * - -EBADF is returned if the file descriptor is invalid or the device has ++ * just been closed. ++ * ++ * @coretags{mode-unrestricted} ++ */ ++#define RTSER_RTIOC_WAIT_EVENT \ ++ _IOR(RTIOC_TYPE_SERIAL, 0x05, struct rtser_event) ++/** @} */ ++ ++/** ++ * Set or clear break on UART output line ++ * ++ * @param[in] arg @c RTSER_BREAK_SET or @c RTSER_BREAK_CLR (int) ++ * ++ * @return 0 on success, otherwise negative error code ++ * ++ * @coretags{task-unrestricted} ++ * ++ * @note A set break condition may also be cleared on UART line ++ * reconfiguration. ++ */ ++#define RTSER_RTIOC_BREAK_CTL \ ++ _IOR(RTIOC_TYPE_SERIAL, 0x06, int) ++/** @} */ ++ ++/*! ++ * @anchor SERutils @name RT Serial example and utility programs ++ * @{ */ ++/** @example cross-link.c */ ++/** @} */ ++ ++#endif /* !_RTDM_UAPI_SERIAL_H */ +--- linux/include/xenomai/rtdm/uapi/gpio.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/rtdm/uapi/gpio.h 2021-04-07 16:01:28.478632379 +0800 +@@ -0,0 +1,41 @@ ++/** ++ * @note Copyright (C) 2016 Philippe Gerum ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#ifndef _RTDM_UAPI_GPIO_H ++#define _RTDM_UAPI_GPIO_H ++ ++struct rtdm_gpio_readout { ++ nanosecs_abs_t timestamp; ++ __s32 value; ++}; ++ ++#define GPIO_RTIOC_DIR_OUT _IOW(RTDM_CLASS_GPIO, 0, int) ++#define GPIO_RTIOC_DIR_IN _IO(RTDM_CLASS_GPIO, 1) ++#define GPIO_RTIOC_IRQEN _IOW(RTDM_CLASS_GPIO, 2, int) /* GPIO trigger */ ++#define GPIO_RTIOC_IRQDIS _IO(RTDM_CLASS_GPIO, 3) ++#define GPIO_RTIOC_REQS _IO(RTDM_CLASS_GPIO, 4) ++#define GPIO_RTIOC_RELS _IO(RTDM_CLASS_GPIO, 5) ++#define GPIO_RTIOC_TS _IOR(RTDM_CLASS_GPIO, 7, int) ++ ++#define GPIO_TRIGGER_NONE 0x0 /* unspecified */ ++#define GPIO_TRIGGER_EDGE_RISING 0x1 ++#define GPIO_TRIGGER_EDGE_FALLING 0x2 ++#define GPIO_TRIGGER_LEVEL_HIGH 0x4 ++#define GPIO_TRIGGER_LEVEL_LOW 0x8 ++#define GPIO_TRIGGER_MASK 0xf ++ ++#endif /* !_RTDM_UAPI_GPIO_H */ +--- linux/include/xenomai/rtdm/uapi/testing.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/rtdm/uapi/testing.h 2021-04-07 16:01:28.473632386 +0800 +@@ -0,0 +1,198 @@ ++/** ++ * @file ++ * Real-Time Driver Model for Xenomai, testing device profile header ++ * ++ * @note Copyright (C) 2005 Jan Kiszka ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. ++ * ++ * @ingroup rttesting ++ */ ++#ifndef _RTDM_UAPI_TESTING_H ++#define _RTDM_UAPI_TESTING_H ++ ++#include ++ ++#define RTTST_PROFILE_VER 2 ++ ++typedef struct rttst_bench_res { ++ __s32 avg; ++ __s32 min; ++ __s32 max; ++ __s32 overruns; ++ __s32 test_loops; ++} rttst_bench_res_t; ++ ++typedef struct rttst_interm_bench_res { ++ struct rttst_bench_res last; ++ struct rttst_bench_res overall; ++} rttst_interm_bench_res_t; ++ ++typedef struct rttst_overall_bench_res { ++ struct rttst_bench_res result; ++ __s32 *histogram_avg; ++ __s32 *histogram_min; ++ __s32 *histogram_max; ++} rttst_overall_bench_res_t; ++ ++#define RTTST_TMBENCH_INVALID -1 /* internal use only */ ++#define RTTST_TMBENCH_TASK 0 ++#define RTTST_TMBENCH_HANDLER 1 ++ ++typedef struct rttst_tmbench_config { ++ int mode; ++ int priority; ++ __u64 period; ++ int warmup_loops; ++ int histogram_size; ++ int histogram_bucketsize; ++ int freeze_max; ++} rttst_tmbench_config_t; ++ ++struct rttst_swtest_task { ++ unsigned int index; ++ unsigned int flags; ++}; ++ ++/* Possible values for struct rttst_swtest_task::flags. */ ++#define RTTST_SWTEST_FPU 0x1 ++#define RTTST_SWTEST_USE_FPU 0x2 /* Only for kernel-space tasks. */ ++#define RTTST_SWTEST_FREEZE 0x4 /* Only for kernel-space tasks. */ ++ ++struct rttst_swtest_dir { ++ unsigned int from; ++ unsigned int to; ++}; ++ ++struct rttst_swtest_error { ++ struct rttst_swtest_dir last_switch; ++ unsigned int fp_val; ++}; ++ ++#define RTTST_RTDM_NORMAL_CLOSE 0 ++#define RTTST_RTDM_DEFER_CLOSE_CONTEXT 1 ++ ++#define RTTST_RTDM_MAGIC_PRIMARY 0xfefbfefb ++#define RTTST_RTDM_MAGIC_SECONDARY 0xa5b9a5b9 ++ ++#define RTTST_HEAPCHECK_ZEROOVRD 1 ++#define RTTST_HEAPCHECK_SHUFFLE 2 ++#define RTTST_HEAPCHECK_PATTERN 4 ++#define RTTST_HEAPCHECK_HOT 8 ++ ++struct rttst_heap_parms { ++ __u64 heap_size; ++ __u64 block_size; ++ int flags; ++ int nrstats; ++}; ++ ++struct rttst_heap_stats { ++ __u64 heap_size; ++ __u64 user_size; ++ __u64 block_size; ++ __s64 alloc_avg_ns; ++ __s64 alloc_max_ns; ++ __s64 free_avg_ns; ++ __s64 free_max_ns; ++ __u64 maximum_free; ++ __u64 largest_free; ++ int nrblocks; ++ int flags; ++}; ++ ++struct rttst_heap_stathdr { ++ int nrstats; ++ struct rttst_heap_stats *buf; ++}; ++ ++#define RTIOC_TYPE_TESTING RTDM_CLASS_TESTING ++ ++/*! ++ * @name Sub-Classes of RTDM_CLASS_TESTING ++ * @{ */ ++/** subclass name: "timerbench" */ ++#define RTDM_SUBCLASS_TIMERBENCH 0 ++/** subclass name: "irqbench" */ ++#define RTDM_SUBCLASS_IRQBENCH 1 ++/** subclass name: "switchtest" */ ++#define RTDM_SUBCLASS_SWITCHTEST 2 ++/** subclase name: "rtdm" */ ++#define RTDM_SUBCLASS_RTDMTEST 3 ++/** subclase name: "heapcheck" */ ++#define RTDM_SUBCLASS_HEAPCHECK 4 ++/** @} */ ++ ++/*! ++ * @anchor TSTIOCTLs @name IOCTLs ++ * Testing device IOCTLs ++ * @{ */ ++#define RTTST_RTIOC_INTERM_BENCH_RES \ ++ _IOWR(RTIOC_TYPE_TESTING, 0x00, struct rttst_interm_bench_res) ++ ++#define RTTST_RTIOC_TMBENCH_START \ ++ _IOW(RTIOC_TYPE_TESTING, 0x10, struct rttst_tmbench_config) ++ ++#define RTTST_RTIOC_TMBENCH_STOP \ ++ _IOWR(RTIOC_TYPE_TESTING, 0x11, struct rttst_overall_bench_res) ++ ++#define RTTST_RTIOC_SWTEST_SET_TASKS_COUNT \ ++ _IOW(RTIOC_TYPE_TESTING, 0x30, __u32) ++ ++#define RTTST_RTIOC_SWTEST_SET_CPU \ ++ _IOW(RTIOC_TYPE_TESTING, 0x31, __u32) ++ ++#define RTTST_RTIOC_SWTEST_REGISTER_UTASK \ ++ _IOW(RTIOC_TYPE_TESTING, 0x32, struct rttst_swtest_task) ++ ++#define RTTST_RTIOC_SWTEST_CREATE_KTASK \ ++ _IOWR(RTIOC_TYPE_TESTING, 0x33, struct rttst_swtest_task) ++ ++#define RTTST_RTIOC_SWTEST_PEND \ ++ _IOR(RTIOC_TYPE_TESTING, 0x34, struct rttst_swtest_task) ++ ++#define RTTST_RTIOC_SWTEST_SWITCH_TO \ ++ _IOR(RTIOC_TYPE_TESTING, 0x35, struct rttst_swtest_dir) ++ ++#define RTTST_RTIOC_SWTEST_GET_SWITCHES_COUNT \ ++ _IOR(RTIOC_TYPE_TESTING, 0x36, __u32) ++ ++#define RTTST_RTIOC_SWTEST_GET_LAST_ERROR \ ++ _IOR(RTIOC_TYPE_TESTING, 0x37, struct rttst_swtest_error) ++ ++#define RTTST_RTIOC_SWTEST_SET_PAUSE \ ++ _IOW(RTIOC_TYPE_TESTING, 0x38, __u32) ++ ++#define RTTST_RTIOC_RTDM_DEFER_CLOSE \ ++ _IOW(RTIOC_TYPE_TESTING, 0x40, __u32) ++ ++#define RTTST_RTIOC_RTDM_ACTOR_GET_CPU \ ++ _IOR(RTIOC_TYPE_TESTING, 0x41, __u32) ++ ++#define RTTST_RTIOC_RTDM_PING_PRIMARY \ ++ _IOR(RTIOC_TYPE_TESTING, 0x42, __u32) ++ ++#define RTTST_RTIOC_RTDM_PING_SECONDARY \ ++ _IOR(RTIOC_TYPE_TESTING, 0x43, __u32) ++ ++#define RTTST_RTIOC_HEAP_CHECK \ ++ _IOR(RTIOC_TYPE_TESTING, 0x44, struct rttst_heap_parms) ++ ++#define RTTST_RTIOC_HEAP_STAT_COLLECT \ ++ _IOR(RTIOC_TYPE_TESTING, 0x45, int) ++ ++/** @} */ ++ ++#endif /* !_RTDM_UAPI_TESTING_H */ +--- linux/include/xenomai/rtdm/uapi/analogy.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/rtdm/uapi/analogy.h 2021-04-07 16:01:28.464632399 +0800 +@@ -0,0 +1,743 @@ ++/** ++ * @file ++ * Analogy for Linux, UAPI bits ++ * @note Copyright (C) 1997-2000 David A. Schleef ++ * @note Copyright (C) 2008 Alexis Berlemont ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. ++ */ ++#ifndef _RTDM_UAPI_ANALOGY_H ++#define _RTDM_UAPI_ANALOGY_H ++ ++/* --- Misc precompilation constant --- */ ++#define A4L_NAMELEN 20 ++ ++#define A4L_INFINITE 0 ++#define A4L_NONBLOCK (-1) ++ ++/* --- Common Analogy types --- */ ++ ++typedef unsigned short sampl_t; ++typedef unsigned long lsampl_t; ++ ++/* MMAP ioctl argument structure */ ++struct a4l_mmap_arg { ++ unsigned int idx_subd; ++ unsigned long size; ++ void *ptr; ++}; ++typedef struct a4l_mmap_arg a4l_mmap_t; ++ ++/* Constants related with buffer size ++ (might be used with BUFCFG ioctl) */ ++#define A4L_BUF_MAXSIZE 0x1000000 ++#define A4L_BUF_DEFSIZE 0x10000 ++#define A4L_BUF_DEFMAGIC 0xffaaff55 ++ ++/* BUFCFG ioctl argument structure */ ++struct a4l_buffer_config { ++ /* NOTE: with the last buffer implementation, the field ++ idx_subd became useless; the buffer are now ++ per-context. So, the buffer size configuration is specific ++ to an opened device. There is a little exception: we can ++ define a default buffer size for a device. ++ So far, a hack is used to implement the configuration of ++ the default buffer size */ ++ unsigned int idx_subd; ++ unsigned long buf_size; ++}; ++typedef struct a4l_buffer_config a4l_bufcfg_t; ++ ++/* BUFINFO ioctl argument structure */ ++struct a4l_buffer_info { ++ unsigned int idx_subd; ++ unsigned long buf_size; ++ unsigned long rw_count; ++}; ++typedef struct a4l_buffer_info a4l_bufinfo_t; ++ ++/* BUFCFG2 / BUFINFO2 ioctl argument structure */ ++struct a4l_buffer_config2 { ++ unsigned long wake_count; ++ unsigned long reserved[3]; ++}; ++typedef struct a4l_buffer_config2 a4l_bufcfg2_t; ++ ++/* POLL ioctl argument structure */ ++struct a4l_poll { ++ unsigned int idx_subd; ++ unsigned long arg; ++}; ++typedef struct a4l_poll a4l_poll_t; ++ ++/* DEVCFG ioctl argument structure */ ++struct a4l_link_desc { ++ unsigned char bname_size; ++ char *bname; ++ unsigned int opts_size; ++ void *opts; ++}; ++typedef struct a4l_link_desc a4l_lnkdesc_t; ++ ++/* DEVINFO ioctl argument structure */ ++struct a4l_dev_info { ++ char board_name[A4L_NAMELEN]; ++ char driver_name[A4L_NAMELEN]; ++ int nb_subd; ++ int idx_read_subd; ++ int idx_write_subd; ++}; ++typedef struct a4l_dev_info a4l_dvinfo_t; ++ ++#define CIO 'd' ++#define A4L_DEVCFG _IOW(CIO,0,a4l_lnkdesc_t) ++#define A4L_DEVINFO _IOR(CIO,1,a4l_dvinfo_t) ++#define A4L_SUBDINFO _IOR(CIO,2,a4l_sbinfo_t) ++#define A4L_CHANINFO _IOR(CIO,3,a4l_chinfo_arg_t) ++#define A4L_RNGINFO _IOR(CIO,4,a4l_rnginfo_arg_t) ++#define A4L_CMD _IOWR(CIO,5,a4l_cmd_t) ++#define A4L_CANCEL _IOR(CIO,6,unsigned int) ++#define A4L_INSNLIST _IOR(CIO,7,unsigned int) ++#define A4L_INSN _IOR(CIO,8,unsigned int) ++#define A4L_BUFCFG _IOR(CIO,9,a4l_bufcfg_t) ++#define A4L_BUFINFO _IOWR(CIO,10,a4l_bufinfo_t) ++#define A4L_POLL _IOR(CIO,11,unsigned int) ++#define A4L_MMAP _IOWR(CIO,12,unsigned int) ++#define A4L_NBCHANINFO _IOR(CIO,13,a4l_chinfo_arg_t) ++#define A4L_NBRNGINFO _IOR(CIO,14,a4l_rnginfo_arg_t) ++ ++/* These IOCTLs are bound to be merged with A4L_BUFCFG and A4L_BUFINFO ++ at the next major release */ ++#define A4L_BUFCFG2 _IOR(CIO,15,a4l_bufcfg_t) ++#define A4L_BUFINFO2 _IOWR(CIO,16,a4l_bufcfg_t) ++ ++/*! ++ * @addtogroup analogy_lib_async1 ++ * @{ ++ */ ++ ++/*! ++ * @anchor ANALOGY_CMD_xxx @name ANALOGY_CMD_xxx ++ * @brief Common command flags definitions ++ * @{ ++ */ ++ ++/** ++ * Do not execute the command, just check it ++ */ ++#define A4L_CMD_SIMUL 0x1 ++/** ++ * Perform data recovery / transmission in bulk mode ++ */ ++#define A4L_CMD_BULK 0x2 ++/** ++ * Perform a command which will write data to the device ++ */ ++#define A4L_CMD_WRITE 0x4 ++ ++ /*! @} ANALOGY_CMD_xxx */ ++ ++/*! ++ * @anchor TRIG_xxx @name TRIG_xxx ++ * @brief Command triggers flags definitions ++ * @{ ++ */ ++ ++/** ++ * Never trigger ++ */ ++#define TRIG_NONE 0x00000001 ++/** ++ * Trigger now + N ns ++ */ ++#define TRIG_NOW 0x00000002 ++/** ++ * Trigger on next lower level trig ++ */ ++#define TRIG_FOLLOW 0x00000004 ++/** ++ * Trigger at time N ns ++ */ ++#define TRIG_TIME 0x00000008 ++/** ++ * Trigger at rate N ns ++ */ ++#define TRIG_TIMER 0x00000010 ++/** ++ * Trigger when count reaches N ++ */ ++#define TRIG_COUNT 0x00000020 ++/** ++ * Trigger on external signal N ++ */ ++#define TRIG_EXT 0x00000040 ++/** ++ * Trigger on analogy-internal signal N ++ */ ++#define TRIG_INT 0x00000080 ++/** ++ * Driver defined trigger ++ */ ++#define TRIG_OTHER 0x00000100 ++/** ++ * Wake up on end-of-scan ++ */ ++#define TRIG_WAKE_EOS 0x0020 ++/** ++ * Trigger not implemented yet ++ */ ++#define TRIG_ROUND_MASK 0x00030000 ++/** ++ * Trigger not implemented yet ++ */ ++#define TRIG_ROUND_NEAREST 0x00000000 ++/** ++ * Trigger not implemented yet ++ */ ++#define TRIG_ROUND_DOWN 0x00010000 ++/** ++ * Trigger not implemented yet ++ */ ++#define TRIG_ROUND_UP 0x00020000 ++/** ++ * Trigger not implemented yet ++ */ ++#define TRIG_ROUND_UP_NEXT 0x00030000 ++ ++ /*! @} TRIG_xxx */ ++ ++/*! ++ * @anchor CHAN_RNG_AREF @name Channel macros ++ * @brief Specific precompilation macros and constants useful for the ++ * channels descriptors tab located in the command structure ++ * @{ ++ */ ++ ++/** ++ * Channel indication macro ++ */ ++#define CHAN(a) ((a) & 0xffff) ++/** ++ * Range definition macro ++ */ ++#define RNG(a) (((a) & 0xff) << 16) ++/** ++ * Reference definition macro ++ */ ++#define AREF(a) (((a) & 0x03) << 24) ++/** ++ * Flags definition macro ++ */ ++#define FLAGS(a) ((a) & CR_FLAGS_MASK) ++/** ++ * Channel + range + reference definition macro ++ */ ++#define PACK(a, b, c) (a | RNG(b) | AREF(c)) ++/** ++ * Channel + range + reference + flags definition macro ++ */ ++#define PACK_FLAGS(a, b, c, d) (PACK(a, b, c) | FLAGS(d)) ++ ++/** ++ * Analog reference is analog ground ++ */ ++#define AREF_GROUND 0x00 ++/** ++ * Analog reference is analog common ++ */ ++#define AREF_COMMON 0x01 ++/** ++ * Analog reference is differential ++ */ ++#define AREF_DIFF 0x02 ++/** ++ * Analog reference is undefined ++ */ ++#define AREF_OTHER 0x03 ++ ++ /*! @} CHAN_RNG_AREF */ ++ ++#if !defined(DOXYGEN_CPP) ++ ++#define CR_FLAGS_MASK 0xfc000000 ++#define CR_ALT_FILTER (1<<26) ++#define CR_DITHER CR_ALT_FILTER ++#define CR_DEGLITCH CR_ALT_FILTER ++#define CR_ALT_SOURCE (1<<27) ++#define CR_EDGE (1<<30) ++#define CR_INVERT (1<<31) ++ ++#endif /* !DOXYGEN_CPP */ ++ ++/*! ++ * @brief Structure describing the asynchronous instruction ++ * @see a4l_snd_command() ++ */ ++ ++struct a4l_cmd_desc { ++ unsigned char idx_subd; ++ /**< Subdevice to which the command will be applied. */ ++ ++ unsigned long flags; ++ /**< Command flags */ ++ ++ /* Command trigger characteristics */ ++ unsigned int start_src; ++ /**< Start trigger type */ ++ unsigned int start_arg; ++ /**< Start trigger argument */ ++ unsigned int scan_begin_src; ++ /**< Scan begin trigger type */ ++ unsigned int scan_begin_arg; ++ /**< Scan begin trigger argument */ ++ unsigned int convert_src; ++ /**< Convert trigger type */ ++ unsigned int convert_arg; ++ /**< Convert trigger argument */ ++ unsigned int scan_end_src; ++ /**< Scan end trigger type */ ++ unsigned int scan_end_arg; ++ /**< Scan end trigger argument */ ++ unsigned int stop_src; ++ /**< Stop trigger type */ ++ unsigned int stop_arg; ++ /**< Stop trigger argument */ ++ ++ unsigned char nb_chan; ++ /**< Count of channels related with the command */ ++ unsigned int *chan_descs; ++ /**< Tab containing channels descriptors */ ++ ++ /* Driver specific fields */ ++ unsigned int valid_simul_stages; ++ /** < cmd simulation valid stages (driver dependent) */ ++ ++ unsigned int data_len; ++ /**< Driver specific buffer size */ ++ sampl_t *data; ++ /**< Driver specific buffer pointer */ ++}; ++typedef struct a4l_cmd_desc a4l_cmd_t; ++ ++/*! @} analogy_lib_async1 */ ++ ++/* --- Range section --- */ ++ ++/** Constant for internal use only (must not be used by driver ++ developer). */ ++#define A4L_RNG_FACTOR 1000000 ++ ++/** ++ * Volt unit range flag ++ */ ++#define A4L_RNG_VOLT_UNIT 0x0 ++/** ++ * MilliAmpere unit range flag ++ */ ++#define A4L_RNG_MAMP_UNIT 0x1 ++/** ++ * No unit range flag ++ */ ++#define A4L_RNG_NO_UNIT 0x2 ++/** ++ * External unit range flag ++ */ ++#define A4L_RNG_EXT_UNIT 0x4 ++ ++/** ++ * Macro to retrieve the range unit from the range flags ++ */ ++#define A4L_RNG_UNIT(x) (x & (A4L_RNG_VOLT_UNIT | \ ++ A4L_RNG_MAMP_UNIT | \ ++ A4L_RNG_NO_UNIT | \ ++ A4L_RNG_EXT_UNIT)) ++ ++/* --- Subdevice flags desc stuff --- */ ++ ++/* TODO: replace ANALOGY_SUBD_AI with ANALOGY_SUBD_ANALOG ++ and ANALOGY_SUBD_INPUT */ ++ ++/* Subdevice types masks */ ++#define A4L_SUBD_MASK_READ 0x80000000 ++#define A4L_SUBD_MASK_WRITE 0x40000000 ++#define A4L_SUBD_MASK_SPECIAL 0x20000000 ++ ++/*! ++ * @addtogroup analogy_subdevice ++ * @{ ++ */ ++ ++/*! ++ * @anchor ANALOGY_SUBD_xxx @name Subdevices types ++ * @brief Flags to define the subdevice type ++ * @{ ++ */ ++ ++/** ++ * Unused subdevice ++ */ ++#define A4L_SUBD_UNUSED (A4L_SUBD_MASK_SPECIAL|0x1) ++/** ++ * Analog input subdevice ++ */ ++#define A4L_SUBD_AI (A4L_SUBD_MASK_READ|0x2) ++/** ++ * Analog output subdevice ++ */ ++#define A4L_SUBD_AO (A4L_SUBD_MASK_WRITE|0x4) ++/** ++ * Digital input subdevice ++ */ ++#define A4L_SUBD_DI (A4L_SUBD_MASK_READ|0x8) ++/** ++ * Digital output subdevice ++ */ ++#define A4L_SUBD_DO (A4L_SUBD_MASK_WRITE|0x10) ++/** ++ * Digital input/output subdevice ++ */ ++#define A4L_SUBD_DIO (A4L_SUBD_MASK_SPECIAL|0x20) ++/** ++ * Counter subdevice ++ */ ++#define A4L_SUBD_COUNTER (A4L_SUBD_MASK_SPECIAL|0x40) ++/** ++ * Timer subdevice ++ */ ++#define A4L_SUBD_TIMER (A4L_SUBD_MASK_SPECIAL|0x80) ++/** ++ * Memory, EEPROM, DPRAM ++ */ ++#define A4L_SUBD_MEMORY (A4L_SUBD_MASK_SPECIAL|0x100) ++/** ++ * Calibration subdevice DACs ++ */ ++#define A4L_SUBD_CALIB (A4L_SUBD_MASK_SPECIAL|0x200) ++/** ++ * Processor, DSP ++ */ ++#define A4L_SUBD_PROC (A4L_SUBD_MASK_SPECIAL|0x400) ++/** ++ * Serial IO subdevice ++ */ ++#define A4L_SUBD_SERIAL (A4L_SUBD_MASK_SPECIAL|0x800) ++/** ++ * Mask which gathers all the types ++ */ ++#define A4L_SUBD_TYPES (A4L_SUBD_UNUSED | \ ++ A4L_SUBD_AI | \ ++ A4L_SUBD_AO | \ ++ A4L_SUBD_DI | \ ++ A4L_SUBD_DO | \ ++ A4L_SUBD_DIO | \ ++ A4L_SUBD_COUNTER | \ ++ A4L_SUBD_TIMER | \ ++ A4L_SUBD_MEMORY | \ ++ A4L_SUBD_CALIB | \ ++ A4L_SUBD_PROC | \ ++ A4L_SUBD_SERIAL) ++ ++/*! @} ANALOGY_SUBD_xxx */ ++ ++/*! ++ * @anchor ANALOGY_SUBD_FT_xxx @name Subdevice features ++ * @brief Flags to define the subdevice's capabilities ++ * @{ ++ */ ++ ++/* Subdevice capabilities */ ++/** ++ * The subdevice can handle command (i.e it can perform asynchronous ++ * acquisition) ++ */ ++#define A4L_SUBD_CMD 0x1000 ++/** ++ * The subdevice support mmap operations (technically, any driver can ++ * do it; however, the developer might want that his driver must be ++ * accessed through read / write ++ */ ++#define A4L_SUBD_MMAP 0x8000 ++ ++/*! @} ANALOGY_SUBD_FT_xxx */ ++ ++/*! ++ * @anchor ANALOGY_SUBD_ST_xxx @name Subdevice status ++ * @brief Flags to define the subdevice's status ++ * @{ ++ */ ++ ++/* Subdevice status flag(s) */ ++/** ++ * The subdevice is busy, a synchronous or an asynchronous acquisition ++ * is occuring ++ */ ++#define A4L_SUBD_BUSY_NR 0 ++#define A4L_SUBD_BUSY (1 << A4L_SUBD_BUSY_NR) ++ ++/** ++ * The subdevice is about to be cleaned in the middle of the detach ++ * procedure ++ */ ++#define A4L_SUBD_CLEAN_NR 1 ++#define A4L_SUBD_CLEAN (1 << A4L_SUBD_CLEAN_NR) ++ ++ ++/*! @} ANALOGY_SUBD_ST_xxx */ ++ ++/* --- Subdevice related IOCTL arguments structures --- */ ++ ++/* SUDBINFO IOCTL argument */ ++struct a4l_subd_info { ++ unsigned long flags; ++ unsigned long status; ++ unsigned char nb_chan; ++}; ++typedef struct a4l_subd_info a4l_sbinfo_t; ++ ++/* CHANINFO / NBCHANINFO IOCTL arguments */ ++struct a4l_chan_info { ++ unsigned long chan_flags; ++ unsigned char nb_rng; ++ unsigned char nb_bits; ++}; ++typedef struct a4l_chan_info a4l_chinfo_t; ++ ++struct a4l_chinfo_arg { ++ unsigned int idx_subd; ++ void *info; ++}; ++typedef struct a4l_chinfo_arg a4l_chinfo_arg_t; ++ ++/* RNGINFO / NBRNGINFO IOCTL arguments */ ++struct a4l_rng_info { ++ long min; ++ long max; ++ unsigned long flags; ++}; ++typedef struct a4l_rng_info a4l_rnginfo_t; ++ ++struct a4l_rng_info_arg { ++ unsigned int idx_subd; ++ unsigned int idx_chan; ++ void *info; ++}; ++typedef struct a4l_rng_info_arg a4l_rnginfo_arg_t; ++ ++/*! @} */ ++ ++#define A4L_INSN_MASK_READ 0x8000000 ++#define A4L_INSN_MASK_WRITE 0x4000000 ++#define A4L_INSN_MASK_SPECIAL 0x2000000 ++ ++/*! ++ * @addtogroup analogy_lib_sync1 ++ * @{ ++ */ ++ ++/*! ++ * @anchor ANALOGY_INSN_xxx @name Instruction type ++ * @brief Flags to define the type of instruction ++ * @{ ++ */ ++ ++/** ++ * Read instruction ++ */ ++#define A4L_INSN_READ (0 | A4L_INSN_MASK_READ) ++/** ++ * Write instruction ++ */ ++#define A4L_INSN_WRITE (1 | A4L_INSN_MASK_WRITE) ++/** ++ * "Bits" instruction ++ */ ++#define A4L_INSN_BITS (2 | A4L_INSN_MASK_READ | \ ++ A4L_INSN_MASK_WRITE) ++/** ++ * Configuration instruction ++ */ ++#define A4L_INSN_CONFIG (3 | A4L_INSN_MASK_READ | \ ++ A4L_INSN_MASK_WRITE) ++/** ++ * Get time instruction ++ */ ++#define A4L_INSN_GTOD (4 | A4L_INSN_MASK_READ | \ ++ A4L_INSN_MASK_SPECIAL) ++/** ++ * Wait instruction ++ */ ++#define A4L_INSN_WAIT (5 | A4L_INSN_MASK_WRITE | \ ++ A4L_INSN_MASK_SPECIAL) ++/** ++ * Trigger instruction (to start asynchronous acquisition) ++ */ ++#define A4L_INSN_INTTRIG (6 | A4L_INSN_MASK_WRITE | \ ++ A4L_INSN_MASK_SPECIAL) ++ ++ /*! @} ANALOGY_INSN_xxx */ ++ ++/** ++ * Maximal wait duration ++ */ ++#define A4L_INSN_WAIT_MAX 100000 ++ ++/*! ++ * @anchor INSN_CONFIG_xxx @name Configuration instruction type ++ * @brief Values to define the type of configuration instruction ++ * @{ ++ */ ++ ++#define A4L_INSN_CONFIG_DIO_INPUT 0 ++#define A4L_INSN_CONFIG_DIO_OUTPUT 1 ++#define A4L_INSN_CONFIG_DIO_OPENDRAIN 2 ++#define A4L_INSN_CONFIG_ANALOG_TRIG 16 ++#define A4L_INSN_CONFIG_ALT_SOURCE 20 ++#define A4L_INSN_CONFIG_DIGITAL_TRIG 21 ++#define A4L_INSN_CONFIG_BLOCK_SIZE 22 ++#define A4L_INSN_CONFIG_TIMER_1 23 ++#define A4L_INSN_CONFIG_FILTER 24 ++#define A4L_INSN_CONFIG_CHANGE_NOTIFY 25 ++#define A4L_INSN_CONFIG_SERIAL_CLOCK 26 ++#define A4L_INSN_CONFIG_BIDIRECTIONAL_DATA 27 ++#define A4L_INSN_CONFIG_DIO_QUERY 28 ++#define A4L_INSN_CONFIG_PWM_OUTPUT 29 ++#define A4L_INSN_CONFIG_GET_PWM_OUTPUT 30 ++#define A4L_INSN_CONFIG_ARM 31 ++#define A4L_INSN_CONFIG_DISARM 32 ++#define A4L_INSN_CONFIG_GET_COUNTER_STATUS 33 ++#define A4L_INSN_CONFIG_RESET 34 ++#define A4L_INSN_CONFIG_GPCT_SINGLE_PULSE_GENERATOR 1001 /* Use CTR as single pulsegenerator */ ++#define A4L_INSN_CONFIG_GPCT_PULSE_TRAIN_GENERATOR 1002 /* Use CTR as pulsetraingenerator */ ++#define A4L_INSN_CONFIG_GPCT_QUADRATURE_ENCODER 1003 /* Use the counter as encoder */ ++#define A4L_INSN_CONFIG_SET_GATE_SRC 2001 /* Set gate source */ ++#define A4L_INSN_CONFIG_GET_GATE_SRC 2002 /* Get gate source */ ++#define A4L_INSN_CONFIG_SET_CLOCK_SRC 2003 /* Set master clock source */ ++#define A4L_INSN_CONFIG_GET_CLOCK_SRC 2004 /* Get master clock source */ ++#define A4L_INSN_CONFIG_SET_OTHER_SRC 2005 /* Set other source */ ++#define A4L_INSN_CONFIG_SET_COUNTER_MODE 4097 ++#define A4L_INSN_CONFIG_SET_ROUTING 4099 ++#define A4L_INSN_CONFIG_GET_ROUTING 4109 ++ ++/*! @} INSN_CONFIG_xxx */ ++ ++/*! ++ * @anchor ANALOGY_COUNTER_xxx @name Counter status bits ++ * @brief Status bits for INSN_CONFIG_GET_COUNTER_STATUS ++ * @{ ++ */ ++ ++#define A4L_COUNTER_ARMED 0x1 ++#define A4L_COUNTER_COUNTING 0x2 ++#define A4L_COUNTER_TERMINAL_COUNT 0x4 ++ ++ /*! @} ANALOGY_COUNTER_xxx */ ++ ++/*! ++ * @anchor ANALOGY_IO_DIRECTION @name IO direction ++ * @brief Values to define the IO polarity ++ * @{ ++ */ ++ ++#define A4L_INPUT 0 ++#define A4L_OUTPUT 1 ++#define A4L_OPENDRAIN 2 ++ ++ /*! @} ANALOGY_IO_DIRECTION */ ++ ++ ++/*! ++ * @anchor ANALOGY_EV_xxx @name Events types ++ * @brief Values to define the Analogy events. They might used to send ++ * some specific events through the instruction interface. ++ * @{ ++ */ ++ ++#define A4L_EV_START 0x00040000 ++#define A4L_EV_SCAN_BEGIN 0x00080000 ++#define A4L_EV_CONVERT 0x00100000 ++#define A4L_EV_SCAN_END 0x00200000 ++#define A4L_EV_STOP 0x00400000 ++ ++/*! @} ANALOGY_EV_xxx */ ++ ++/*! ++ * @brief Structure describing the synchronous instruction ++ * @see a4l_snd_insn() ++ */ ++ ++struct a4l_instruction { ++ unsigned int type; ++ /**< Instruction type */ ++ unsigned int idx_subd; ++ /**< Subdevice to which the instruction will be applied. */ ++ unsigned int chan_desc; ++ /**< Channel descriptor */ ++ unsigned int data_size; ++ /**< Size of the intruction data */ ++ void *data; ++ /**< Instruction data */ ++}; ++typedef struct a4l_instruction a4l_insn_t; ++ ++/*! ++ * @brief Structure describing the list of synchronous instructions ++ * @see a4l_snd_insnlist() ++ */ ++ ++struct a4l_instruction_list { ++ unsigned int count; ++ /**< Instructions count */ ++ a4l_insn_t *insns; ++ /**< Tab containing the instructions pointers */ ++}; ++typedef struct a4l_instruction_list a4l_insnlst_t; ++ ++/*! @} analogy_lib_sync1 */ ++ ++struct a4l_calibration_subdev { ++ a4l_sbinfo_t *info; ++ char *name; ++ int slen; ++ int idx; ++}; ++ ++struct a4l_calibration_subdev_data { ++ int index; ++ int channel; ++ int range; ++ int expansion; ++ int nb_coeff; ++ double *coeff; ++ ++}; ++ ++struct a4l_calibration_data { ++ char *driver_name; ++ char *board_name; ++ int nb_ai; ++ struct a4l_calibration_subdev_data *ai; ++ int nb_ao; ++ struct a4l_calibration_subdev_data *ao; ++}; ++ ++struct a4l_polynomial { ++ int expansion; ++ int order; ++ int nb_coeff; ++ double *coeff; ++}; ++ ++ ++#endif /* _RTDM_UAPI_ANALOGY_H */ +--- linux/include/xenomai/rtdm/uapi/gpiopwm.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/rtdm/uapi/gpiopwm.h 2021-04-07 16:01:28.459632406 +0800 +@@ -0,0 +1,56 @@ ++/** ++ * @file ++ * Real-Time Driver Model for Xenomai, pwm header ++ * ++ * @note Copyright (C) 2015 Jorge Ramirez ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. ++ * ++ * @ingroup rttesting ++ */ ++#ifndef _RTDM_UAPI_PWM_H ++#define _RTDM_UAPI_PWM_H ++ ++#include ++ ++#define RTPWM_PROFILE_VER 1 ++ ++struct gpiopwm { ++ unsigned int duty_cycle; ++ unsigned int range_min; ++ unsigned int range_max; ++ unsigned int period; ++ unsigned int gpio; ++}; ++ ++#define RTIOC_TYPE_PWM RTDM_CLASS_PWM ++ ++#define GPIOPWM_RTIOC_SET_CONFIG \ ++ _IOW(RTIOC_TYPE_PWM, 0x00, struct gpiopwm) ++ ++#define GPIOPWM_RTIOC_GET_CONFIG \ ++ _IOR(RTIOC_TYPE_PWM, 0x10, struct gpiopwm) ++ ++#define GPIOPWM_RTIOC_START \ ++ _IO(RTIOC_TYPE_PWM, 0x20) ++ ++#define GPIOPWM_RTIOC_STOP \ ++ _IO(RTIOC_TYPE_PWM, 0x30) ++ ++#define GPIOPWM_RTIOC_CHANGE_DUTY_CYCLE \ ++ _IOW(RTIOC_TYPE_PWM, 0x40, unsigned int) ++ ++ ++#endif /* !_RTDM_UAPI_TESTING_H */ +--- linux/include/xenomai/rtdm/uapi/net.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/rtdm/uapi/net.h 2021-04-07 16:01:28.455632411 +0800 +@@ -0,0 +1,75 @@ ++/*** ++ * ++ * RTnet - real-time networking subsystem ++ * Copyright (C) 2005-2011 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ * As a special exception to the GNU General Public license, the RTnet ++ * project allows you to use this header file in unmodified form to produce ++ * application programs executing in user-space which use RTnet services by ++ * normal system calls. The resulting executable will not be covered by the ++ * GNU General Public License merely as a result of this header file use. ++ * Instead, this header file use will be considered normal use of RTnet and ++ * not a "derived work" in the sense of the GNU General Public License. ++ * ++ * This exception does not apply when the application code is built as a ++ * static or dynamically loadable portion of the Linux kernel nor does the ++ * exception override other reasons justifying application of the GNU General ++ * Public License. ++ * ++ * This exception applies only to the code released by the RTnet project ++ * under the name RTnet and bearing this exception notice. If you copy code ++ * from other sources into a copy of RTnet, the exception does not apply to ++ * the code that you add in this way. ++ * ++ */ ++ ++#ifndef _RTDM_UAPI_NET_H ++#define _RTDM_UAPI_NET_H ++ ++/* sub-classes: RTDM_CLASS_NETWORK */ ++#define RTDM_SUBCLASS_RTNET 0 ++ ++#define RTIOC_TYPE_NETWORK RTDM_CLASS_NETWORK ++ ++/* RTnet-specific IOCTLs */ ++#define RTNET_RTIOC_XMITPARAMS _IOW(RTIOC_TYPE_NETWORK, 0x10, unsigned int) ++#define RTNET_RTIOC_PRIORITY RTNET_RTIOC_XMITPARAMS /* legacy */ ++#define RTNET_RTIOC_TIMEOUT _IOW(RTIOC_TYPE_NETWORK, 0x11, int64_t) ++/* RTNET_RTIOC_CALLBACK _IOW(RTIOC_TYPE_NETWORK, 0x12, ... ++ * IOCTL only usable inside the kernel. */ ++/* RTNET_RTIOC_NONBLOCK _IOW(RTIOC_TYPE_NETWORK, 0x13, unsigned int) ++ * This IOCTL is no longer supported (and it was buggy anyway). ++ * Use RTNET_RTIOC_TIMEOUT with any negative timeout value instead. */ ++#define RTNET_RTIOC_EXTPOOL _IOW(RTIOC_TYPE_NETWORK, 0x14, unsigned int) ++#define RTNET_RTIOC_SHRPOOL _IOW(RTIOC_TYPE_NETWORK, 0x15, unsigned int) ++ ++/* socket transmission priorities */ ++#define SOCK_MAX_PRIO 0 ++#define SOCK_DEF_PRIO SOCK_MAX_PRIO + \ ++ (SOCK_MIN_PRIO-SOCK_MAX_PRIO+1)/2 ++#define SOCK_MIN_PRIO SOCK_NRT_PRIO - 1 ++#define SOCK_NRT_PRIO 31 ++ ++/* socket transmission channels */ ++#define SOCK_DEF_RT_CHANNEL 0 /* default rt xmit channel */ ++#define SOCK_DEF_NRT_CHANNEL 1 /* default non-rt xmit channel */ ++#define SOCK_USER_CHANNEL 2 /* first user-defined channel */ ++ ++/* argument construction for RTNET_RTIOC_XMITPARAMS */ ++#define SOCK_XMIT_PARAMS(priority, channel) ((priority) | ((channel) << 16)) ++ ++#endif /* !_RTDM_UAPI_NET_H */ +--- linux/include/xenomai/rtdm/uapi/can.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/rtdm/uapi/can.h 2021-04-07 16:01:28.450632419 +0800 +@@ -0,0 +1,905 @@ ++/** ++ * @file ++ * Real-Time Driver Model for RT-Socket-CAN, CAN device profile header ++ * ++ * @note Copyright (C) 2006 Wolfgang Grandegger ++ * ++ * @note Copyright (C) 2005, 2006 Sebastian Smolorz ++ * ++ * ++ * This RTDM CAN device profile header is based on: ++ * ++ * include/linux/can.h, include/linux/socket.h, net/can/pf_can.h in ++ * linux-can.patch, a CAN socket framework for Linux ++ * ++ * Copyright (C) 2004, 2005, ++ * Robert Schwebel, Benedikt Spranger, Marc Kleine-Budde, Pengutronix ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#ifndef _RTDM_UAPI_CAN_H ++#define _RTDM_UAPI_CAN_H ++ ++/** ++ * @addtogroup rtdm_can ++ * @{ ++ */ ++ ++#define RTCAN_PROFILE_VER 2 ++ ++#ifndef AF_CAN ++ ++/** CAN address family */ ++#define AF_CAN 29 ++ ++/** CAN protocol family */ ++#define PF_CAN AF_CAN ++ ++#endif ++ ++/** CAN socket levels ++ * ++ * Used for @ref Sockopts for the particular protocols. ++ */ ++#define SOL_CAN_RAW 103 ++ ++/** Type of CAN id (see @ref CAN_xxx_MASK and @ref CAN_xxx_FLAG) */ ++typedef uint32_t can_id_t; ++typedef uint32_t canid_t; ++ ++/** Type of CAN error mask */ ++typedef can_id_t can_err_mask_t; ++ ++/*! ++ * @anchor CAN_xxx_MASK @name CAN ID masks ++ * Bit masks for masking CAN IDs ++ * @{ */ ++ ++/** Bit mask for extended CAN IDs */ ++#define CAN_EFF_MASK 0x1FFFFFFF ++ ++/** Bit mask for standard CAN IDs */ ++#define CAN_SFF_MASK 0x000007FF ++ ++/** @} */ ++ ++/*! ++ * @anchor CAN_xxx_FLAG @name CAN ID flags ++ * Flags within a CAN ID indicating special CAN frame attributes ++ * @{ */ ++/** Extended frame */ ++#define CAN_EFF_FLAG 0x80000000 ++/** Remote transmission frame */ ++#define CAN_RTR_FLAG 0x40000000 ++/** Error frame (see @ref Errors), not valid in struct can_filter */ ++#define CAN_ERR_FLAG 0x20000000 ++/** Invert CAN filter definition, only valid in struct can_filter */ ++#define CAN_INV_FILTER CAN_ERR_FLAG ++ ++/** @} */ ++ ++/*! ++ * @anchor CAN_PROTO @name Particular CAN protocols ++ * Possible protocols for the PF_CAN protocol family ++ * ++ * Currently only the RAW protocol is supported. ++ * @{ */ ++/** Raw protocol of @c PF_CAN, applicable to socket type @c SOCK_RAW */ ++#define CAN_RAW 1 ++/** @} */ ++ ++#define CAN_BAUDRATE_UNKNOWN ((uint32_t)-1) ++#define CAN_BAUDRATE_UNCONFIGURED 0 ++ ++/** ++ * Baudrate definition in bits per second ++ */ ++typedef uint32_t can_baudrate_t; ++ ++/** ++ * Supported CAN bit-time types ++ */ ++enum CAN_BITTIME_TYPE { ++ /** Standard bit-time definition according to Bosch */ ++ CAN_BITTIME_STD, ++ /** Hardware-specific BTR bit-time definition */ ++ CAN_BITTIME_BTR ++}; ++ ++/** ++ * See @ref CAN_BITTIME_TYPE ++ */ ++typedef enum CAN_BITTIME_TYPE can_bittime_type_t; ++ ++/** ++ * Standard bit-time parameters according to Bosch ++ */ ++struct can_bittime_std { ++ uint32_t brp; /**< Baud rate prescaler */ ++ uint8_t prop_seg; /**< from 1 to 8 */ ++ uint8_t phase_seg1; /**< from 1 to 8 */ ++ uint8_t phase_seg2; /**< from 1 to 8 */ ++ uint8_t sjw:7; /**< from 1 to 4 */ ++ uint8_t sam:1; /**< 1 - enable triple sampling */ ++}; ++ ++/** ++ * Hardware-specific BTR bit-times ++ */ ++struct can_bittime_btr { ++ ++ uint8_t btr0; /**< Bus timing register 0 */ ++ uint8_t btr1; /**< Bus timing register 1 */ ++}; ++ ++/** ++ * Custom CAN bit-time definition ++ */ ++struct can_bittime { ++ /** Type of bit-time definition */ ++ can_bittime_type_t type; ++ ++ union { ++ /** Standard bit-time */ ++ struct can_bittime_std std; ++ /** Hardware-spcific BTR bit-time */ ++ struct can_bittime_btr btr; ++ }; ++}; ++ ++/*! ++ * @anchor CAN_MODE @name CAN operation modes ++ * Modes into which CAN controllers can be set ++ * @{ */ ++enum CAN_MODE { ++ /*! Set controller in Stop mode (no reception / transmission possible) */ ++ CAN_MODE_STOP = 0, ++ ++ /*! Set controller into normal operation. @n ++ * Coming from stopped mode or bus off, the controller begins with no ++ * errors in @ref CAN_STATE_ACTIVE. */ ++ CAN_MODE_START, ++ ++ /*! Set controller into Sleep mode. @n ++ * This is only possible if the controller is not stopped or bus-off. @n ++ * Notice that sleep mode will only be entered when there is no bus ++ * activity. If the controller detects bus activity while "sleeping" ++ * it will go into operating mode again. @n ++ * To actively leave sleep mode again trigger @c CAN_MODE_START. */ ++ CAN_MODE_SLEEP ++}; ++/** @} */ ++ ++/** See @ref CAN_MODE */ ++typedef enum CAN_MODE can_mode_t; ++ ++/*! ++ * @anchor CAN_CTRLMODE @name CAN controller modes ++ * Special CAN controllers modes, which can be or'ed together. ++ * ++ * @note These modes are hardware-dependent. Please consult the hardware ++ * manual of the CAN controller for more detailed information. ++ * ++ * @{ */ ++ ++/*! Listen-Only mode ++ * ++ * In this mode the CAN controller would give no acknowledge to the CAN-bus, ++ * even if a message is received successfully and messages would not be ++ * transmitted. This mode might be useful for bus-monitoring, hot-plugging ++ * or throughput analysis. */ ++#define CAN_CTRLMODE_LISTENONLY 0x1 ++ ++/*! Loopback mode ++ * ++ * In this mode the CAN controller does an internal loop-back, a message is ++ * transmitted and simultaneously received. That mode can be used for self ++ * test operation. */ ++#define CAN_CTRLMODE_LOOPBACK 0x2 ++ ++/*! Triple sampling mode ++ * ++ * In this mode the CAN controller uses Triple sampling. */ ++#define CAN_CTRLMODE_3_SAMPLES 0x4 ++ ++/** @} */ ++ ++/** See @ref CAN_CTRLMODE */ ++typedef int can_ctrlmode_t; ++ ++/*! ++ * @anchor CAN_STATE @name CAN controller states ++ * States a CAN controller can be in. ++ * @{ */ ++enum CAN_STATE { ++ /** CAN controller is error active */ ++ CAN_STATE_ERROR_ACTIVE = 0, ++ /** CAN controller is active */ ++ CAN_STATE_ACTIVE = 0, ++ ++ /** CAN controller is error active, warning level is reached */ ++ CAN_STATE_ERROR_WARNING = 1, ++ /** CAN controller is error active, warning level is reached */ ++ CAN_STATE_BUS_WARNING = 1, ++ ++ /** CAN controller is error passive */ ++ CAN_STATE_ERROR_PASSIVE = 2, ++ /** CAN controller is error passive */ ++ CAN_STATE_BUS_PASSIVE = 2, ++ ++ /** CAN controller went into Bus Off */ ++ CAN_STATE_BUS_OFF, ++ ++ /** CAN controller is scanning to get the baudrate */ ++ CAN_STATE_SCANNING_BAUDRATE, ++ ++ /** CAN controller is in stopped mode */ ++ CAN_STATE_STOPPED, ++ ++ /** CAN controller is in Sleep mode */ ++ CAN_STATE_SLEEPING, ++}; ++/** @} */ ++ ++/** See @ref CAN_STATE */ ++typedef enum CAN_STATE can_state_t; ++ ++#define CAN_STATE_OPERATING(state) ((state) < CAN_STATE_BUS_OFF) ++ ++/** ++ * Filter for reception of CAN messages. ++ * ++ * This filter works as follows: ++ * A received CAN ID is AND'ed bitwise with @c can_mask and then compared to ++ * @c can_id. This also includes the @ref CAN_EFF_FLAG and @ref CAN_RTR_FLAG ++ * of @ref CAN_xxx_FLAG. If this comparison is true, the message will be ++ * received by the socket. The logic can be inverted with the @c can_id flag ++ * @ref CAN_INV_FILTER : ++ * ++ * @code ++ * if (can_id & CAN_INV_FILTER) { ++ * if ((received_can_id & can_mask) != (can_id & ~CAN_INV_FILTER)) ++ * accept-message; ++ * } else { ++ * if ((received_can_id & can_mask) == can_id) ++ * accept-message; ++ * } ++ * @endcode ++ * ++ * Multiple filters can be arranged in a filter list and set with ++ * @ref Sockopts. If one of these filters matches a CAN ID upon reception ++ * of a CAN frame, this frame is accepted. ++ * ++ */ ++typedef struct can_filter { ++ /** CAN ID which must match with incoming IDs after passing the mask. ++ * The filter logic can be inverted with the flag @ref CAN_INV_FILTER. */ ++ uint32_t can_id; ++ ++ /** Mask which is applied to incoming IDs. See @ref CAN_xxx_MASK ++ * "CAN ID masks" if exactly one CAN ID should come through. */ ++ uint32_t can_mask; ++} can_filter_t; ++ ++/** ++ * Socket address structure for the CAN address family ++ */ ++struct sockaddr_can { ++ /** CAN address family, must be @c AF_CAN */ ++ sa_family_t can_family; ++ ++ /** Interface index of CAN controller. See @ref SIOCGIFINDEX. */ ++ int can_ifindex; ++}; ++ ++/** ++ * Raw CAN frame ++ * ++ * Central structure for receiving and sending CAN frames. ++ */ ++typedef struct can_frame { ++ /** CAN ID of the frame ++ * ++ * See @ref CAN_xxx_FLAG "CAN ID flags" for special bits. ++ */ ++ can_id_t can_id; ++ ++ /** Size of the payload in bytes */ ++ uint8_t can_dlc; ++ ++ /** Payload data bytes */ ++ uint8_t data[8] __attribute__ ((aligned(8))); ++} can_frame_t; ++ ++/** ++ * CAN interface request descriptor ++ * ++ * Parameter block for submitting CAN control requests. ++ */ ++struct can_ifreq { ++ union { ++ char ifrn_name[IFNAMSIZ]; ++ } ifr_ifrn; ++ ++ union { ++ struct can_bittime bittime; ++ can_baudrate_t baudrate; ++ can_ctrlmode_t ctrlmode; ++ can_mode_t mode; ++ can_state_t state; ++ int ifru_ivalue; ++ } ifr_ifru; ++}; ++ ++/*! ++ * @anchor RTCAN_TIMESTAMPS @name Timestamp switches ++ * Arguments to pass to @ref RTCAN_RTIOC_TAKE_TIMESTAMP ++ * @{ */ ++#define RTCAN_TAKE_NO_TIMESTAMPS 0 /**< Switch off taking timestamps */ ++#define RTCAN_TAKE_TIMESTAMPS 1 /**< Do take timestamps */ ++/** @} */ ++ ++#define RTIOC_TYPE_CAN RTDM_CLASS_CAN ++ ++/*! ++ * @anchor Rawsockopts @name RAW socket options ++ * Setting and getting CAN RAW socket options. ++ * @{ */ ++ ++/** ++ * CAN filter definition ++ * ++ * A CAN raw filter list with elements of struct can_filter can be installed ++ * with @c setsockopt. This list is used upon reception of CAN frames to ++ * decide whether the bound socket will receive a frame. An empty filter list ++ * can also be defined using optlen = 0, which is recommanded for write-only ++ * sockets. ++ * @n ++ * If the socket was already bound with @ref Bind, the old filter list ++ * gets replaced with the new one. Be aware that already received, but ++ * not read out CAN frames may stay in the socket buffer. ++ * @n ++ * @n ++ * @param [in] level @b SOL_CAN_RAW ++ * ++ * @param [in] optname @b CAN_RAW_FILTER ++ * ++ * @param [in] optval Pointer to array of struct can_filter. ++ * ++ * @param [in] optlen Size of filter list: count * sizeof( struct can_filter). ++ * @n ++ * @coretags{task-unrestricted} ++ * @n ++ * Specific return values: ++ * - -EFAULT (It was not possible to access user space memory area at the ++ * specified address.) ++ * - -ENOMEM (Not enough memory to fulfill the operation) ++ * - -EINVAL (Invalid length "optlen") ++ * - -ENOSPC (No space to store filter list, check RT-Socket-CAN kernel ++ * parameters) ++ * . ++ */ ++#define CAN_RAW_FILTER 0x1 ++ ++/** ++ * CAN error mask ++ * ++ * A CAN error mask (see @ref Errors) can be set with @c setsockopt. This ++ * mask is then used to decide if error frames are delivered to this socket ++ * in case of error condidtions. The error frames are marked with the ++ * @ref CAN_ERR_FLAG of @ref CAN_xxx_FLAG and must be handled by the ++ * application properly. A detailed description of the errors can be ++ * found in the @c can_id and the @c data fields of struct can_frame ++ * (see @ref Errors for futher details). ++ * ++ * @n ++ * @param [in] level @b SOL_CAN_RAW ++ * ++ * @param [in] optname @b CAN_RAW_ERR_FILTER ++ * ++ * @param [in] optval Pointer to error mask of type can_err_mask_t. ++ * ++ * @param [in] optlen Size of error mask: sizeof(can_err_mask_t). ++ * ++ * @coretags{task-unrestricted} ++ * @n ++ * Specific return values: ++ * - -EFAULT (It was not possible to access user space memory area at the ++ * specified address.) ++ * - -EINVAL (Invalid length "optlen") ++ * . ++ */ ++#define CAN_RAW_ERR_FILTER 0x2 ++ ++/** ++ * CAN TX loopback ++ * ++ * The TX loopback to other local sockets can be selected with this ++ * @c setsockopt. ++ * ++ * @note The TX loopback feature must be enabled in the kernel and then ++ * the loopback to other local TX sockets is enabled by default. ++ * ++ * @n ++ * @param [in] level @b SOL_CAN_RAW ++ * ++ * @param [in] optname @b CAN_RAW_LOOPBACK ++ * ++ * @param [in] optval Pointer to integer value. ++ * ++ * @param [in] optlen Size of int: sizeof(int). ++ * ++ * @coretags{task-unrestricted} ++ * @n ++ * Specific return values: ++ * - -EFAULT (It was not possible to access user space memory area at the ++ * specified address.) ++ * - -EINVAL (Invalid length "optlen") ++ * - -EOPNOTSUPP (not supported, check RT-Socket-CAN kernel parameters). ++ */ ++#define CAN_RAW_LOOPBACK 0x3 ++ ++/** ++ * CAN receive own messages ++ * ++ * Not supported by RT-Socket-CAN, but defined for compatibility with ++ * Socket-CAN. ++ */ ++#define CAN_RAW_RECV_OWN_MSGS 0x4 ++ ++/** @} */ ++ ++/*! ++ * @anchor CANIOCTLs @name IOCTLs ++ * CAN device IOCTLs ++ * ++ * @deprecated Passing \c struct \c ifreq as a request descriptor ++ * for CAN IOCTLs is still accepted for backward compatibility, ++ * however it is recommended to switch to \c struct \c can_ifreq at ++ * the first opportunity. ++ * ++ * @{ */ ++ ++/** ++ * Get CAN interface index by name ++ * ++ * @param [in,out] arg Pointer to interface request structure buffer ++ * (struct can_ifreq). If ++ * ifr_name holds a valid CAN interface ++ * name ifr_ifindex will be filled with ++ * the corresponding interface index. ++ * ++ * @return 0 on success, otherwise: ++ * - -EFAULT: It was not possible to access user space memory area at the ++ * specified address. ++ * - -ENODEV: No device with specified name exists. ++ * ++ * @coretags{task-unrestricted} ++ */ ++#ifdef DOXYGEN_CPP /* For Doxygen only, already defined by kernel headers */ ++#define SIOCGIFINDEX defined_by_kernel_header_file ++#endif ++ ++/** ++ * Set baud rate ++ * ++ * The baudrate must be specified in bits per second. The driver will ++ * try to calculate resonable CAN bit-timing parameters. You can use ++ * @ref SIOCSCANCUSTOMBITTIME to set custom bit-timing. ++ * ++ * @param [in] arg Pointer to interface request structure buffer ++ * (struct can_ifreq). ++ * ifr_name must hold a valid CAN interface name, ++ * ifr_ifru must be filled with an instance of ++ * @ref can_baudrate_t. ++ * ++ * @return 0 on success, otherwise: ++ * - -EFAULT: It was not possible to access user space memory area at the ++ * specified address. ++ * - -ENODEV: No device with specified name exists. ++ * - -EINVAL: No valid baud rate, see @ref can_baudrate_t. ++ * - -EDOM : Baud rate not possible. ++ * - -EAGAIN: Request could not be successully fulfilled. Try again. ++ * ++ * @coretags{task-unrestricted, might-switch} ++ * ++ * @note Setting the baud rate is a configuration task. It should ++ * be done deliberately or otherwise CAN messages will likely be lost. ++ */ ++#define SIOCSCANBAUDRATE _IOW(RTIOC_TYPE_CAN, 0x01, struct can_ifreq) ++ ++/** ++ * Get baud rate ++ * ++ * @param [in,out] arg Pointer to interface request structure buffer ++ * (struct can_ifreq). ++ * ifr_name must hold a valid CAN interface name, ++ * ifr_ifru will be filled with an instance of ++ * @ref can_baudrate_t. ++ * ++ * @return 0 on success, otherwise: ++ * - -EFAULT: It was not possible to access user space memory area at the ++ * specified address. ++ * - -ENODEV: No device with specified name exists. ++ * - -EINVAL: No baud rate was set yet. ++ * ++ * @coretags{task-unrestricted} ++ */ ++#define SIOCGCANBAUDRATE _IOWR(RTIOC_TYPE_CAN, 0x02, struct can_ifreq) ++ ++/** ++ * Set custom bit time parameter ++ * ++ * Custem-bit time could be defined in various formats (see ++ * struct can_bittime). ++ * ++ * @param [in] arg Pointer to interface request structure buffer ++ * (struct can_ifreq). ++ * ifr_name must hold a valid CAN interface name, ++ * ifr_ifru must be filled with an instance of ++ * struct can_bittime. ++ * ++ * @return 0 on success, otherwise: ++ * - -EFAULT: It was not possible to access user space memory area at the ++ * specified address. ++ * - -ENODEV: No device with specified name exists. ++ * - -EINVAL: No valid baud rate, see @ref can_baudrate_t. ++ * - -EAGAIN: Request could not be successully fulfilled. Try again. ++ * ++ * @coretags{task-unrestricted, might-switch} ++ * ++ * @note Setting the bit-time is a configuration task. It should ++ * be done deliberately or otherwise CAN messages will likely be lost. ++ */ ++#define SIOCSCANCUSTOMBITTIME _IOW(RTIOC_TYPE_CAN, 0x03, struct can_ifreq) ++ ++/** ++ * Get custom bit-time parameters ++ * ++ * @param [in,out] arg Pointer to interface request structure buffer ++ * (struct can_ifreq). ++ * ifr_name must hold a valid CAN interface name, ++ * ifr_ifru will be filled with an instance of ++ * struct can_bittime. ++ * ++ * @return 0 on success, otherwise: ++ * - -EFAULT: It was not possible to access user space memory area at the ++ * specified address. ++ * - -ENODEV: No device with specified name exists. ++ * - -EINVAL: No baud rate was set yet. ++ * ++ * @coretags{task-unrestricted} ++ */ ++#define SIOCGCANCUSTOMBITTIME _IOWR(RTIOC_TYPE_CAN, 0x04, struct can_ifreq) ++ ++/** ++ * Set operation mode of CAN controller ++ * ++ * See @ref CAN_MODE "CAN controller modes" for available modes. ++ * ++ * @param [in] arg Pointer to interface request structure buffer ++ * (struct can_ifreq). ++ * ifr_name must hold a valid CAN interface name, ++ * ifr_ifru must be filled with an instance of ++ * @ref can_mode_t. ++ * ++ * @return 0 on success, otherwise: ++ * - -EFAULT: It was not possible to access user space memory area at the ++ * specified address. ++ * - -ENODEV: No device with specified name exists. ++ * - -EAGAIN: (@ref CAN_MODE_START, @ref CAN_MODE_STOP) Could not successfully ++ * set mode, hardware is busy. Try again. ++ * - -EINVAL: (@ref CAN_MODE_START) Cannot start controller, ++ * set baud rate first. ++ * - -ENETDOWN: (@ref CAN_MODE_SLEEP) Cannot go into sleep mode because ++ controller is stopped or bus off. ++ * - -EOPNOTSUPP: unknown mode ++ * ++ * @coretags{task-unrestricted, might-switch} ++ * ++ * @note Setting a CAN controller into normal operation after a bus-off can ++ * take some time (128 occurrences of 11 consecutive recessive bits). ++ * In such a case, although this IOCTL will return immediately with success ++ * and @ref SIOCGCANSTATE will report @ref CAN_STATE_ACTIVE, ++ * bus-off recovery may still be in progress. @n ++ * If a controller is bus-off, setting it into stop mode will return no error ++ * but the controller remains bus-off. ++ */ ++#define SIOCSCANMODE _IOW(RTIOC_TYPE_CAN, 0x05, struct can_ifreq) ++ ++/** ++ * Get current state of CAN controller ++ * ++ * States are divided into main states and additional error indicators. A CAN ++ * controller is always in exactly one main state. CAN bus errors are ++ * registered by the CAN hardware and collected by the driver. There is one ++ * error indicator (bit) per error type. If this IOCTL is triggered the error ++ * types which occured since the last call of this IOCTL are reported and ++ * thereafter the error indicators are cleared. See also ++ * @ref CAN_STATE "CAN controller states". ++ * ++ * @param [in,out] arg Pointer to interface request structure buffer ++ * (struct can_ifreq). ++ * ifr_name must hold a valid CAN interface name, ++ * ifr_ifru will be filled with an instance of ++ * @ref can_mode_t. ++ * ++ * @return 0 on success, otherwise: ++ * - -EFAULT: It was not possible to access user space memory area at the ++ * specified address. ++ * - -ENODEV: No device with specified name exists. ++ * ++ * @coretags{task-unrestricted, might-switch} ++ */ ++#define SIOCGCANSTATE _IOWR(RTIOC_TYPE_CAN, 0x06, struct can_ifreq) ++ ++/** ++ * Set special controller modes ++ * ++ * Various special controller modes could be or'ed together (see ++ * @ref CAN_CTRLMODE for further information). ++ * ++ * @param [in] arg Pointer to interface request structure buffer ++ * (struct can_ifreq). ++ * ifr_name must hold a valid CAN interface name, ++ * ifr_ifru must be filled with an instance of ++ * @ref can_ctrlmode_t. ++ * ++ * @return 0 on success, otherwise: ++ * - -EFAULT: It was not possible to access user space memory area at the ++ * specified address. ++ * - -ENODEV: No device with specified name exists. ++ * - -EINVAL: No valid baud rate, see @ref can_baudrate_t. ++ * - -EAGAIN: Request could not be successully fulfilled. Try again. ++ * ++ * @coretags{task-unrestricted, might-switch} ++ * ++ * @note Setting special controller modes is a configuration task. It should ++ * be done deliberately or otherwise CAN messages will likely be lost. ++ */ ++#define SIOCSCANCTRLMODE _IOW(RTIOC_TYPE_CAN, 0x07, struct can_ifreq) ++ ++/** ++ * Get special controller modes ++ * ++ * ++ * @param [in] arg Pointer to interface request structure buffer ++ * (struct can_ifreq). ++ * ifr_name must hold a valid CAN interface name, ++ * ifr_ifru must be filled with an instance of ++ * @ref can_ctrlmode_t. ++ * ++ * @return 0 on success, otherwise: ++ * - -EFAULT: It was not possible to access user space memory area at the ++ * specified address. ++ * - -ENODEV: No device with specified name exists. ++ * - -EINVAL: No baud rate was set yet. ++ * ++ * @coretags{task-unrestricted, might-switch} ++ */ ++#define SIOCGCANCTRLMODE _IOWR(RTIOC_TYPE_CAN, 0x08, struct can_ifreq) ++ ++/** ++ * Enable or disable storing a high precision timestamp upon reception of ++ * a CAN frame. ++ * ++ * A newly created socket takes no timestamps by default. ++ * ++ * @param [in] arg int variable, see @ref RTCAN_TIMESTAMPS "Timestamp switches" ++ * ++ * @return 0 on success. ++ * ++ * @coretags{task-unrestricted} ++ * ++ * @note Activating taking timestamps only has an effect on newly received ++ * CAN messages from the bus. Frames that already are in the socket buffer do ++ * not have timestamps if it was deactivated before. See @ref Recv "Receive" ++ * for more details. ++ */ ++#define RTCAN_RTIOC_TAKE_TIMESTAMP _IOW(RTIOC_TYPE_CAN, 0x09, int) ++ ++/** ++ * Specify a reception timeout for a socket ++ * ++ * Defines a timeout for all receive operations via a ++ * socket which will take effect when one of the @ref Recv "receive functions" ++ * is called without the @c MSG_DONTWAIT flag set. ++ * ++ * The default value for a newly created socket is an infinite timeout. ++ * ++ * @note The setting of the timeout value is not done atomically to avoid ++ * locks. Please set the value before receiving messages from the socket. ++ * ++ * @param [in] arg Pointer to @ref nanosecs_rel_t variable. The value is ++ * interpreted as relative timeout in nanoseconds in case ++ * of a positive value. ++ * See @ref RTDM_TIMEOUT_xxx "Timeouts" for special timeouts. ++ * ++ * @return 0 on success, otherwise: ++ * - -EFAULT: It was not possible to access user space memory area at the ++ * specified address. ++ * ++ * @coretags{task-unrestricted} ++ */ ++#define RTCAN_RTIOC_RCV_TIMEOUT _IOW(RTIOC_TYPE_CAN, 0x0A, nanosecs_rel_t) ++ ++/** ++ * Specify a transmission timeout for a socket ++ * ++ * Defines a timeout for all send operations via a ++ * socket which will take effect when one of the @ref Send "send functions" ++ * is called without the @c MSG_DONTWAIT flag set. ++ * ++ * The default value for a newly created socket is an infinite timeout. ++ * ++ * @note The setting of the timeout value is not done atomically to avoid ++ * locks. Please set the value before sending messages to the socket. ++ * ++ * @param [in] arg Pointer to @ref nanosecs_rel_t variable. The value is ++ * interpreted as relative timeout in nanoseconds in case ++ * of a positive value. ++ * See @ref RTDM_TIMEOUT_xxx "Timeouts" for special timeouts. ++ * ++ * @return 0 on success, otherwise: ++ * - -EFAULT: It was not possible to access user space memory area at the ++ * specified address. ++ * ++ * @coretags{task-unrestricted} ++ */ ++#define RTCAN_RTIOC_SND_TIMEOUT _IOW(RTIOC_TYPE_CAN, 0x0B, nanosecs_rel_t) ++/** @} */ ++ ++#define CAN_ERR_DLC 8 /* dlc for error frames */ ++ ++/*! ++ * @anchor Errors @name Error mask ++ * Error class (mask) in @c can_id field of struct can_frame to ++ * be used with @ref CAN_RAW_ERR_FILTER. ++ * ++ * @b Note: Error reporting is hardware dependent and most CAN controllers ++ * report less detailed error conditions than the SJA1000. ++ * ++ * @b Note: In case of a bus-off error condition (@ref CAN_ERR_BUSOFF), the ++ * CAN controller is @b not restarted automatically. It is the application's ++ * responsibility to react appropriately, e.g. calling @ref CAN_MODE_START. ++ * ++ * @b Note: Bus error interrupts (@ref CAN_ERR_BUSERROR) are enabled when an ++ * application is calling a @ref Recv function on a socket listening ++ * on bus errors (using @ref CAN_RAW_ERR_FILTER). After one bus error has ++ * occured, the interrupt will be disabled to allow the application time for ++ * error processing and to efficiently avoid bus error interrupt flooding. ++ * @{ */ ++ ++/** TX timeout (netdevice driver) */ ++#define CAN_ERR_TX_TIMEOUT 0x00000001U ++ ++/** Lost arbitration (see @ref Error0 "data[0]") */ ++#define CAN_ERR_LOSTARB 0x00000002U ++ ++/** Controller problems (see @ref Error1 "data[1]") */ ++#define CAN_ERR_CRTL 0x00000004U ++ ++/** Protocol violations (see @ref Error2 "data[2]", ++ @ref Error3 "data[3]") */ ++#define CAN_ERR_PROT 0x00000008U ++ ++/** Transceiver status (see @ref Error4 "data[4]") */ ++#define CAN_ERR_TRX 0x00000010U ++ ++/** Received no ACK on transmission */ ++#define CAN_ERR_ACK 0x00000020U ++ ++/** Bus off */ ++#define CAN_ERR_BUSOFF 0x00000040U ++ ++/** Bus error (may flood!) */ ++#define CAN_ERR_BUSERROR 0x00000080U ++ ++/** Controller restarted */ ++#define CAN_ERR_RESTARTED 0x00000100U ++ ++/** Omit EFF, RTR, ERR flags */ ++#define CAN_ERR_MASK 0x1FFFFFFFU ++ ++/** @} */ ++ ++/*! ++ * @anchor Error0 @name Arbitration lost error ++ * Error in the data[0] field of struct can_frame. ++ * @{ */ ++/* arbitration lost in bit ... / data[0] */ ++#define CAN_ERR_LOSTARB_UNSPEC 0x00 /**< unspecified */ ++ /**< else bit number in bitstream */ ++/** @} */ ++ ++/*! ++ * @anchor Error1 @name Controller problems ++ * Error in the data[1] field of struct can_frame. ++ * @{ */ ++/* error status of CAN-controller / data[1] */ ++#define CAN_ERR_CRTL_UNSPEC 0x00 /**< unspecified */ ++#define CAN_ERR_CRTL_RX_OVERFLOW 0x01 /**< RX buffer overflow */ ++#define CAN_ERR_CRTL_TX_OVERFLOW 0x02 /**< TX buffer overflow */ ++#define CAN_ERR_CRTL_RX_WARNING 0x04 /**< reached warning level for RX errors */ ++#define CAN_ERR_CRTL_TX_WARNING 0x08 /**< reached warning level for TX errors */ ++#define CAN_ERR_CRTL_RX_PASSIVE 0x10 /**< reached passive level for RX errors */ ++#define CAN_ERR_CRTL_TX_PASSIVE 0x20 /**< reached passive level for TX errors */ ++/** @} */ ++ ++/*! ++ * @anchor Error2 @name Protocol error type ++ * Error in the data[2] field of struct can_frame. ++ * @{ */ ++/* error in CAN protocol (type) / data[2] */ ++#define CAN_ERR_PROT_UNSPEC 0x00 /**< unspecified */ ++#define CAN_ERR_PROT_BIT 0x01 /**< single bit error */ ++#define CAN_ERR_PROT_FORM 0x02 /**< frame format error */ ++#define CAN_ERR_PROT_STUFF 0x04 /**< bit stuffing error */ ++#define CAN_ERR_PROT_BIT0 0x08 /**< unable to send dominant bit */ ++#define CAN_ERR_PROT_BIT1 0x10 /**< unable to send recessive bit */ ++#define CAN_ERR_PROT_OVERLOAD 0x20 /**< bus overload */ ++#define CAN_ERR_PROT_ACTIVE 0x40 /**< active error announcement */ ++#define CAN_ERR_PROT_TX 0x80 /**< error occured on transmission */ ++/** @} */ ++ ++/*! ++ * @anchor Error3 @name Protocol error location ++ * Error in the data[3] field of struct can_frame. ++ * @{ */ ++/* error in CAN protocol (location) / data[3] */ ++#define CAN_ERR_PROT_LOC_UNSPEC 0x00 /**< unspecified */ ++#define CAN_ERR_PROT_LOC_SOF 0x03 /**< start of frame */ ++#define CAN_ERR_PROT_LOC_ID28_21 0x02 /**< ID bits 28 - 21 (SFF: 10 - 3) */ ++#define CAN_ERR_PROT_LOC_ID20_18 0x06 /**< ID bits 20 - 18 (SFF: 2 - 0 )*/ ++#define CAN_ERR_PROT_LOC_SRTR 0x04 /**< substitute RTR (SFF: RTR) */ ++#define CAN_ERR_PROT_LOC_IDE 0x05 /**< identifier extension */ ++#define CAN_ERR_PROT_LOC_ID17_13 0x07 /**< ID bits 17-13 */ ++#define CAN_ERR_PROT_LOC_ID12_05 0x0F /**< ID bits 12-5 */ ++#define CAN_ERR_PROT_LOC_ID04_00 0x0E /**< ID bits 4-0 */ ++#define CAN_ERR_PROT_LOC_RTR 0x0C /**< RTR */ ++#define CAN_ERR_PROT_LOC_RES1 0x0D /**< reserved bit 1 */ ++#define CAN_ERR_PROT_LOC_RES0 0x09 /**< reserved bit 0 */ ++#define CAN_ERR_PROT_LOC_DLC 0x0B /**< data length code */ ++#define CAN_ERR_PROT_LOC_DATA 0x0A /**< data section */ ++#define CAN_ERR_PROT_LOC_CRC_SEQ 0x08 /**< CRC sequence */ ++#define CAN_ERR_PROT_LOC_CRC_DEL 0x18 /**< CRC delimiter */ ++#define CAN_ERR_PROT_LOC_ACK 0x19 /**< ACK slot */ ++#define CAN_ERR_PROT_LOC_ACK_DEL 0x1B /**< ACK delimiter */ ++#define CAN_ERR_PROT_LOC_EOF 0x1A /**< end of frame */ ++#define CAN_ERR_PROT_LOC_INTERM 0x12 /**< intermission */ ++/** @} */ ++ ++/*! ++ * @anchor Error4 @name Protocol error location ++ * Error in the data[4] field of struct can_frame. ++ * @{ */ ++/* error status of CAN-transceiver / data[4] */ ++/* CANH CANL */ ++#define CAN_ERR_TRX_UNSPEC 0x00 /**< 0000 0000 */ ++#define CAN_ERR_TRX_CANH_NO_WIRE 0x04 /**< 0000 0100 */ ++#define CAN_ERR_TRX_CANH_SHORT_TO_BAT 0x05 /**< 0000 0101 */ ++#define CAN_ERR_TRX_CANH_SHORT_TO_VCC 0x06 /**< 0000 0110 */ ++#define CAN_ERR_TRX_CANH_SHORT_TO_GND 0x07 /**< 0000 0111 */ ++#define CAN_ERR_TRX_CANL_NO_WIRE 0x40 /**< 0100 0000 */ ++#define CAN_ERR_TRX_CANL_SHORT_TO_BAT 0x50 /**< 0101 0000 */ ++#define CAN_ERR_TRX_CANL_SHORT_TO_VCC 0x60 /**< 0110 0000 */ ++#define CAN_ERR_TRX_CANL_SHORT_TO_GND 0x70 /**< 0111 0000 */ ++#define CAN_ERR_TRX_CANL_SHORT_TO_CANH 0x80 /**< 1000 0000 */ ++/** @} */ ++ ++/** @} */ ++ ++#endif /* !_RTDM_UAPI_CAN_H */ +--- linux/include/xenomai/rtdm/uapi/autotune.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/rtdm/uapi/autotune.h 2021-04-07 16:01:28.445632426 +0800 +@@ -0,0 +1,40 @@ ++/* ++ * This file is part of the Xenomai project. ++ * ++ * Copyright (C) 2014 Philippe Gerum ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. ++ */ ++#ifndef _RTDM_UAPI_AUTOTUNE_H ++#define _RTDM_UAPI_AUTOTUNE_H ++ ++#include ++ ++#define RTDM_CLASS_AUTOTUNE RTDM_CLASS_MISC ++#define RTDM_SUBCLASS_AUTOTUNE 0 ++ ++struct autotune_setup { ++ __u32 period; ++ __u32 quiet; ++}; ++ ++#define AUTOTUNE_RTIOC_IRQ _IOW(RTDM_CLASS_AUTOTUNE, 0, struct autotune_setup) ++#define AUTOTUNE_RTIOC_KERN _IOW(RTDM_CLASS_AUTOTUNE, 1, struct autotune_setup) ++#define AUTOTUNE_RTIOC_USER _IOW(RTDM_CLASS_AUTOTUNE, 2, struct autotune_setup) ++#define AUTOTUNE_RTIOC_PULSE _IOW(RTDM_CLASS_AUTOTUNE, 3, __u64) ++#define AUTOTUNE_RTIOC_RUN _IOR(RTDM_CLASS_AUTOTUNE, 4, __u32) ++#define AUTOTUNE_RTIOC_RESET _IO(RTDM_CLASS_AUTOTUNE, 5) ++ ++#endif /* !_RTDM_UAPI_AUTOTUNE_H */ +--- linux/include/xenomai/rtdm/uapi/ipc.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/rtdm/uapi/ipc.h 2021-04-07 16:01:28.440632433 +0800 +@@ -0,0 +1,881 @@ ++/** ++ * @file ++ * This file is part of the Xenomai project. ++ * ++ * @note Copyright (C) 2009 Philippe Gerum ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. ++ */ ++ ++#ifndef _RTDM_UAPI_IPC_H ++#define _RTDM_UAPI_IPC_H ++ ++/** ++ * @ingroup rtdm_profiles ++ * @defgroup rtdm_ipc Real-time IPC ++ * ++ * @b Profile @b Revision: 1 ++ * @n ++ * @n ++ * @par Device Characteristics ++ * @n ++ * @ref rtdm_driver_flags "Device Flags": @c RTDM_PROTOCOL_DEVICE @n ++ * @n ++ * @ref rtdm_driver.protocol_family "Protocol Family": @c PF_RTIPC @n ++ * @n ++ * @ref rtdm_driver.socket_type "Socket Type": @c SOCK_DGRAM @n ++ * @n ++ * @ref rtdm_driver_profile "Device Class": @c RTDM_CLASS_RTIPC @n ++ * @n ++ * @{ ++ * ++ * @anchor rtipc_operations @name Supported operations ++ * Standard socket operations supported by the RTIPC protocols. ++ * @{ ++ */ ++ ++/** Create an endpoint for communication in the AF_RTIPC domain. ++ * ++ * @param[in] domain The communication domain. Must be AF_RTIPC. ++ * ++ * @param[in] type The socket type. Must be SOCK_DGRAM. ++ * ++ * @param [in] protocol Any of @ref IPCPROTO_XDDP, @ref IPCPROTO_IDDP, ++ * or @ref IPCPROTO_BUFP. @ref IPCPROTO_IPC is also valid, and refers ++ * to the default RTIPC protocol, namely @ref IPCPROTO_IDDP. ++ * ++ * @return In addition to the standard error codes for @c socket(2), ++ * the following specific error code may be returned: ++ * - -ENOPROTOOPT (Protocol is known, but not compiled in the RTIPC driver). ++ * See @ref RTIPC_PROTO "RTIPC protocols" ++ * for available protocols. ++ * ++ * @par Calling context: ++ * non-RT ++ */ ++#ifdef DOXYGEN_CPP ++int socket__AF_RTIPC(int domain =AF_RTIPC, int type =SOCK_DGRAM, int protocol); ++#endif ++ ++/** ++ * Close a RTIPC socket descriptor. ++ * ++ * Blocking calls to any of the @ref sendmsg__AF_RTIPC "sendmsg" or @ref ++ * recvmsg__AF_RTIPC "recvmsg" functions will be unblocked when the socket ++ * is closed and return with an error. ++ * ++ * @param[in] sockfd The socket descriptor to close. ++ * ++ * @return In addition to the standard error codes for @c close(2), ++ * the following specific error code may be returned: ++ * none ++ * ++ * @par Calling context: ++ * non-RT ++ */ ++#ifdef DOXYGEN_CPP ++int close__AF_RTIPC(int sockfd); ++#endif ++ ++/** ++ * Bind a RTIPC socket to a port. ++ * ++ * Bind the socket to a destination port. ++ * ++ * @param[in] sockfd The RTDM file descriptor obtained from the socket ++ * creation call. ++ * ++ * @param [in] addr The address to bind the socket to (see struct ++ * sockaddr_ipc). The meaning of such address depends on the RTIPC ++ * protocol in use for the socket: ++ * ++ * - IPCPROTO_XDDP ++ * ++ * This action creates an endpoint for channelling traffic between ++ * the Xenomai and Linux domains. ++ * ++ * @em sipc_family must be AF_RTIPC, @em sipc_port is either -1, ++ * or a valid free port number between 0 and ++ * CONFIG_XENO_OPT_PIPE_NRDEV-1. ++ * ++ * If @em sipc_port is -1, a free port will be assigned automatically. ++ * ++ * Upon success, the pseudo-device /dev/rtp@em N will be reserved ++ * for this communication channel, where @em N is the assigned port ++ * number. The non real-time side shall open this device to exchange ++ * data over the bound socket. ++ * ++ * @anchor xddp_label_binding ++ * If a label was assigned (see @ref XDDP_LABEL) prior to ++ * binding the socket to a port, a registry link referring to the ++ * created pseudo-device will be automatically set up as ++ * @c /proc/xenomai/registry/rtipc/xddp/@em label, where @em label is the ++ * label string passed to setsockopt() for the @ref XDDP_LABEL option. ++ * ++ * - IPCPROTO_IDDP ++ * ++ * This action creates an endpoint for exchanging datagrams within ++ * the Xenomai domain. ++ * ++ * @em sipc_family must be AF_RTIPC, @em sipc_port is either -1, ++ * or a valid free port number between 0 and ++ * CONFIG_XENO_OPT_IDDP_NRPORT-1. ++ * ++ * If @em sipc_port is -1, a free port will be assigned ++ * automatically. The real-time peer shall connect to the same port ++ * for exchanging data over the bound socket. ++ * ++ * @anchor iddp_label_binding ++ * If a label was assigned (see @ref IDDP_LABEL) prior to binding ++ * the socket to a port, a registry link referring to the assigned ++ * port number will be automatically set up as @c ++ * /proc/xenomai/registry/rtipc/iddp/@em label, where @em label is ++ * the label string passed to setsockopt() for the @ref IDDP_LABEL ++ * option. ++ * ++ * - IPCPROTO_BUFP ++ * ++ * This action creates an endpoint for a one-way byte ++ * stream within the Xenomai domain. ++ * ++ * @em sipc_family must be AF_RTIPC, @em sipc_port is either -1, ++ * or a valid free port number between 0 and CONFIG_XENO_OPT_BUFP_NRPORT-1. ++ * ++ * If @em sipc_port is -1, an available port will be assigned ++ * automatically. The real-time peer shall connect to the same port ++ * for exchanging data over the bound socket. ++ * ++ * @anchor bufp_label_binding ++ * If a label was assigned (see @ref BUFP_LABEL) prior to binding ++ * the socket to a port, a registry link referring to the assigned ++ * port number will be automatically set up as @c ++ * /proc/xenomai/registry/rtipc/bufp/@em label, where @em label is ++ * the label string passed to setsockopt() for the @a BUFP_LABEL ++ * option. ++ * ++ * @param[in] addrlen The size in bytes of the structure pointed to by ++ * @a addr. ++ * ++ * @return In addition to the standard error codes for @c ++ * bind(2), the following specific error code may be returned: ++ * - -EFAULT (Invalid data address given) ++ * - -ENOMEM (Not enough memory) ++ * - -EINVAL (Invalid parameter) ++ * - -EADDRINUSE (Socket already bound to a port, or no port available) ++ * - -EAGAIN (no registry slot available, check/raise ++ * CONFIG_XENO_OPT_REGISTRY_NRSLOTS) . ++ * ++ * @par Calling context: ++ * non-RT ++ */ ++#ifdef DOXYGEN_CPP ++int bind__AF_RTIPC(int sockfd, const struct sockaddr_ipc *addr, ++ socklen_t addrlen); ++#endif ++ ++/** ++ * Initiate a connection on a RTIPC socket. ++ * ++ * @param[in] sockfd The RTDM file descriptor obtained from the socket ++ * creation call. ++ * ++ * @param [in] addr The address to connect the socket to (see struct ++ * sockaddr_ipc). ++ * ++ * - If sipc_port is a valid port for the protocol, it is used ++ * verbatim and the connection succeeds immediately, regardless of ++ * whether the destination is bound at the time of the call. ++ * ++ * - If sipc_port is -1 and a label was assigned to the socket, ++ * connect() blocks for the requested amount of time (see @ref ++ * SO_RCVTIMEO) until a socket is bound to the same label via @c ++ * bind(2) (see @ref XDDP_LABEL, @ref IDDP_LABEL, @ref BUFP_LABEL), in ++ * which case a connection is established between both endpoints. ++ * ++ * - If sipc_port is -1 and no label was assigned to the socket, the ++ * default destination address is cleared, meaning that any subsequent ++ * write to the socket will return -EDESTADDRREQ, until a valid ++ * destination address is set via @c connect(2) or @c bind(2). ++ * ++ * @param[in] addrlen The size in bytes of the structure pointed to by ++ * @a addr. ++ * ++ * @return In addition to the standard error codes for @c connect(2), ++ * the following specific error code may be returned: ++ * none. ++ * ++ * @par Calling context: ++ * RT/non-RT ++ */ ++#ifdef DOXYGEN_CPP ++int connect__AF_RTIPC(int sockfd, const struct sockaddr_ipc *addr, ++ socklen_t addrlen); ++#endif ++ ++/** ++ * Set options on RTIPC sockets. ++ * ++ * These functions allow to set various socket options. ++ * Supported Levels and Options: ++ * ++ * - Level @ref sockopts_socket "SOL_SOCKET" ++ * - Level @ref sockopts_xddp "SOL_XDDP" ++ * - Level @ref sockopts_iddp "SOL_IDDP" ++ * - Level @ref sockopts_bufp "SOL_BUFP" ++ * . ++ * ++ * @return In addition to the standard error codes for @c ++ * setsockopt(2), the following specific error code may ++ * be returned: ++ * follow the option links above. ++ * ++ * @par Calling context: ++ * non-RT ++ */ ++#ifdef DOXYGEN_CPP ++int setsockopt__AF_RTIPC(int sockfd, int level, int optname, ++ const void *optval, socklen_t optlen); ++#endif ++/** ++ * Get options on RTIPC sockets. ++ * ++ * These functions allow to get various socket options. ++ * Supported Levels and Options: ++ * ++ * - Level @ref sockopts_socket "SOL_SOCKET" ++ * - Level @ref sockopts_xddp "SOL_XDDP" ++ * - Level @ref sockopts_iddp "SOL_IDDP" ++ * - Level @ref sockopts_bufp "SOL_BUFP" ++ * . ++ * ++ * @return In addition to the standard error codes for @c ++ * getsockopt(2), the following specific error code may ++ * be returned: ++ * follow the option links above. ++ * ++ * @par Calling context: ++ * RT/non-RT ++ */ ++#ifdef DOXYGEN_CPP ++int getsockopt__AF_RTIPC(int sockfd, int level, int optname, ++ void *optval, socklen_t *optlen); ++#endif ++ ++/** ++ * Send a message on a RTIPC socket. ++ * ++ * @param[in] sockfd The RTDM file descriptor obtained from the socket ++ * creation call. ++ * ++ * @param[in] msg The address of the message header conveying the ++ * datagram. ++ * ++ * @param [in] flags Operation flags: ++ * ++ * - MSG_OOB Send out-of-band message. For all RTIPC protocols except ++ * @ref IPCPROTO_BUFP, sending out-of-band data actually means ++ * pushing them to the head of the receiving queue, so that the ++ * reader will always receive them before normal messages. @ref ++ * IPCPROTO_BUFP does not support out-of-band sending. ++ * ++ * - MSG_DONTWAIT Non-blocking I/O operation. The caller will not be ++ * blocked whenever the message cannot be sent immediately at the ++ * time of the call (e.g. memory shortage), but will rather return ++ * with -EWOULDBLOCK. Unlike other RTIPC protocols, @ref ++ * IPCPROTO_XDDP accepts but never considers MSG_DONTWAIT since ++ * writing to a real-time XDDP endpoint is inherently a non-blocking ++ * operation. ++ * ++ * - MSG_MORE Accumulate data before sending. This flag is accepted by ++ * the @ref IPCPROTO_XDDP protocol only, and tells the send service ++ * to accumulate the outgoing data into an internal streaming ++ * buffer, instead of issuing a datagram immediately for it. See ++ * @ref XDDP_BUFSZ for more. ++ * ++ * @note No RTIPC protocol allows for short writes, and only complete ++ * messages are sent to the peer. ++ * ++ * @return In addition to the standard error codes for @c sendmsg(2), ++ * the following specific error code may be returned: ++ * none. ++ * ++ * @par Calling context: ++ * RT ++ */ ++#ifdef DOXYGEN_CPP ++ssize_t sendmsg__AF_RTIPC(int sockfd, const struct msghdr *msg, int flags); ++#endif ++ ++/** ++ * Receive a message from a RTIPC socket. ++ * ++ * @param[in] sockfd The RTDM file descriptor obtained from the socket ++ * creation call. ++ * ++ * @param[out] msg The address the message header will be copied at. ++ * ++ * @param [in] flags Operation flags: ++ * ++ * - MSG_DONTWAIT Non-blocking I/O operation. The caller will not be ++ * blocked whenever no message is immediately available for receipt ++ * at the time of the call, but will rather return with ++ * -EWOULDBLOCK. ++ * ++ * @note @ref IPCPROTO_BUFP does not allow for short reads and always ++ * returns the requested amount of bytes, except in one situation: ++ * whenever some writer is waiting for sending data upon a buffer full ++ * condition, while the caller would have to wait for receiving a ++ * complete message. This is usually the sign of a pathological use ++ * of the BUFP socket, like defining an incorrect buffer size via @ref ++ * BUFP_BUFSZ. In that case, a short read is allowed to prevent a ++ * deadlock. ++ * ++ * @return In addition to the standard error codes for @c recvmsg(2), ++ * the following specific error code may be returned: ++ * none. ++ * ++ * @par Calling context: ++ * RT ++ */ ++#ifdef DOXYGEN_CPP ++ssize_t recvmsg__AF_RTIPC(int sockfd, struct msghdr *msg, int flags); ++#endif ++ ++/** ++ * Get socket name. ++ * ++ * The name of the local endpoint for the socket is copied back (see ++ * struct sockaddr_ipc). ++ * ++ * @return In addition to the standard error codes for @c getsockname(2), ++ * the following specific error code may be returned: ++ * none. ++ * ++ * @par Calling context: ++ * RT/non-RT ++ */ ++#ifdef DOXYGEN_CPP ++int getsockname__AF_RTIPC(int sockfd, struct sockaddr_ipc *addr, socklen_t *addrlen); ++#endif ++ ++/** ++ * Get socket peer. ++ * ++ * The name of the remote endpoint for the socket is copied back (see ++ * struct sockaddr_ipc). This is the default destination address for ++ * messages sent on the socket. It can be set either explicitly via @c ++ * connect(2), or implicitly via @c bind(2) if no @c connect(2) was ++ * called prior to binding the socket to a port, in which case both ++ * the local and remote names are equal. ++ * ++ * @return In addition to the standard error codes for @c getpeername(2), ++ * the following specific error code may be returned: ++ * none. ++ * ++ * @par Calling context: ++ * RT/non-RT ++ */ ++#ifdef DOXYGEN_CPP ++int getpeername__AF_RTIPC(int sockfd, struct sockaddr_ipc *addr, socklen_t *addrlen); ++#endif ++ ++/** @} */ ++ ++#include ++#include ++#include ++ ++/* Address family */ ++#define AF_RTIPC 111 ++ ++/* Protocol family */ ++#define PF_RTIPC AF_RTIPC ++ ++/** ++ * @anchor RTIPC_PROTO @name RTIPC protocol list ++ * protocols for the PF_RTIPC protocol family ++ * ++ * @{ */ ++enum { ++/** Default protocol (IDDP) */ ++ IPCPROTO_IPC = 0, ++/** ++ * Cross-domain datagram protocol (RT <-> non-RT). ++ * ++ * Real-time Xenomai threads and regular Linux threads may want to ++ * exchange data in a way that does not require the former to leave ++ * the real-time domain (i.e. primary mode). The RTDM-based XDDP ++ * protocol is available for this purpose. ++ * ++ * On the Linux domain side, pseudo-device files named /dev/rtp@em \ ++ * give regular POSIX threads access to non real-time communication ++ * endpoints, via the standard character-based I/O interface. On the ++ * Xenomai domain side, sockets may be bound to XDDP ports, which act ++ * as proxies to send and receive data to/from the associated ++ * pseudo-device files. Ports and pseudo-device minor numbers are ++ * paired, meaning that e.g. socket port 7 will proxy the traffic to/from ++ * /dev/rtp7. ++ * ++ * All data sent through a bound/connected XDDP socket via @c ++ * sendto(2) or @c write(2) will be passed to the peer endpoint in the ++ * Linux domain, and made available for reading via the standard @c ++ * read(2) system call. Conversely, all data sent using @c write(2) ++ * through the non real-time endpoint will be conveyed to the ++ * real-time socket endpoint, and made available to the @c recvfrom(2) ++ * or @c read(2) system calls. ++ */ ++ IPCPROTO_XDDP = 1, ++/** ++ * Intra-domain datagram protocol (RT <-> RT). ++ * ++ * The RTDM-based IDDP protocol enables real-time threads to exchange ++ * datagrams within the Xenomai domain, via socket endpoints. ++ */ ++ IPCPROTO_IDDP = 2, ++/** ++ * Buffer protocol (RT <-> RT, byte-oriented). ++ * ++ * The RTDM-based BUFP protocol implements a lightweight, ++ * byte-oriented, one-way Producer-Consumer data path. All messages ++ * written are buffered into a single memory area in strict FIFO ++ * order, until read by the consumer. ++ * ++ * This protocol always prevents short writes, and only allows short ++ * reads when a potential deadlock situation arises (i.e. readers and ++ * writers waiting for each other indefinitely). ++ */ ++ IPCPROTO_BUFP = 3, ++ IPCPROTO_MAX ++}; ++/** @} */ ++ ++/** ++ * Port number type for the RTIPC address family. ++ */ ++typedef int16_t rtipc_port_t; ++ ++/** ++ * Port label information structure. ++ */ ++struct rtipc_port_label { ++ /** Port label string, null-terminated. */ ++ char label[XNOBJECT_NAME_LEN]; ++}; ++ ++/** ++ * Socket address structure for the RTIPC address family. ++ */ ++struct sockaddr_ipc { ++ /** RTIPC address family, must be @c AF_RTIPC */ ++ sa_family_t sipc_family; ++ /** Port number. */ ++ rtipc_port_t sipc_port; ++}; ++ ++#define SOL_XDDP 311 ++/** ++ * @anchor sockopts_xddp @name XDDP socket options ++ * Setting and getting XDDP socket options. ++ * @{ */ ++/** ++ * XDDP label assignment ++ * ++ * ASCII label strings can be attached to XDDP ports, so that opening ++ * the non-RT endpoint can be done by specifying this symbolic device ++ * name rather than referring to a raw pseudo-device entry ++ * (i.e. /dev/rtp@em N). ++ * ++ * When available, this label will be registered when binding, in ++ * addition to the port number (see @ref xddp_label_binding ++ * "XDDP port binding"). ++ * ++ * It is not allowed to assign a label after the socket was ++ * bound. However, multiple assignment calls are allowed prior to the ++ * binding; the last label set will be used. ++ * ++ * @param [in] level @ref sockopts_xddp "SOL_XDDP" ++ * @param [in] optname @b XDDP_LABEL ++ * @param [in] optval Pointer to struct rtipc_port_label ++ * @param [in] optlen sizeof(struct rtipc_port_label) ++ * ++ * @return 0 is returned upon success. Otherwise: ++ * ++ * - -EFAULT (Invalid data address given) ++ * - -EALREADY (socket already bound) ++ * - -EINVAL (@a optlen invalid) ++ * . ++ * ++ * @par Calling context: ++ * RT/non-RT ++ */ ++#define XDDP_LABEL 1 ++/** ++ * XDDP local pool size configuration ++ * ++ * By default, the memory needed to convey the data is pulled from ++ * Xenomai's system pool. Setting a local pool size overrides this ++ * default for the socket. ++ * ++ * If a non-zero size was configured, a local pool is allocated at ++ * binding time. This pool will provide storage for pending datagrams. ++ * ++ * It is not allowed to configure a local pool size after the socket ++ * was bound. However, multiple configuration calls are allowed prior ++ * to the binding; the last value set will be used. ++ * ++ * @note: the pool memory is obtained from the host allocator by the ++ * @ref bind__AF_RTIPC "bind call". ++ * ++ * @param [in] level @ref sockopts_xddp "SOL_XDDP" ++ * @param [in] optname @b XDDP_POOLSZ ++ * @param [in] optval Pointer to a variable of type size_t, containing ++ * the required size of the local pool to reserve at binding time ++ * @param [in] optlen sizeof(size_t) ++ * ++ * @return 0 is returned upon success. Otherwise: ++ * ++ * - -EFAULT (Invalid data address given) ++ * - -EALREADY (socket already bound) ++ * - -EINVAL (@a optlen invalid or *@a optval is zero) ++ * . ++ * ++ * @par Calling context: ++ * RT/non-RT ++ */ ++#define XDDP_POOLSZ 2 ++/** ++ * XDDP streaming buffer size configuration ++ * ++ * In addition to sending datagrams, real-time threads may stream data ++ * in a byte-oriented mode through the port as well. This increases ++ * the bandwidth and reduces the overhead, when the overall data to ++ * send to the Linux domain is collected by bits, and keeping the ++ * message boundaries is not required. ++ * ++ * This feature is enabled when a non-zero buffer size is set for the ++ * socket. In that case, the real-time data accumulates into the ++ * streaming buffer when MSG_MORE is passed to any of the @ref ++ * sendmsg__AF_RTIPC "send functions", until: ++ * ++ * - the receiver from the Linux domain wakes up and consumes it, ++ * - a different source port attempts to send data to the same ++ * destination port, ++ * - MSG_MORE is absent from the send flags, ++ * - the buffer is full, ++ * . ++ * whichever comes first. ++ * ++ * Setting *@a optval to zero disables the streaming buffer, in which ++ * case all sendings are conveyed in separate datagrams, regardless of ++ * MSG_MORE. ++ * ++ * @note only a single streaming buffer exists per socket. When this ++ * buffer is full, the real-time data stops accumulating and sending ++ * operations resume in mere datagram mode. Accumulation may happen ++ * again after some or all data in the streaming buffer is consumed ++ * from the Linux domain endpoint. ++ * ++ * The streaming buffer size may be adjusted multiple times during the ++ * socket lifetime; the latest configuration change will take effect ++ * when the accumulation resumes after the previous buffer was ++ * flushed. ++ * ++ * @param [in] level @ref sockopts_xddp "SOL_XDDP" ++ * @param [in] optname @b XDDP_BUFSZ ++ * @param [in] optval Pointer to a variable of type size_t, containing ++ * the required size of the streaming buffer ++ * @param [in] optlen sizeof(size_t) ++ * ++ * @return 0 is returned upon success. Otherwise: ++ * ++ * - -EFAULT (Invalid data address given) ++ * - -ENOMEM (Not enough memory) ++ * - -EINVAL (@a optlen is invalid) ++ * . ++ * ++ * @par Calling context: ++ * RT/non-RT ++ */ ++#define XDDP_BUFSZ 3 ++/** ++ * XDDP monitoring callback ++ * ++ * Other RTDM drivers may install a user-defined callback via the @ref ++ * rtdm_setsockopt call from the inter-driver API, in order to collect ++ * particular events occurring on the channel. ++ * ++ * This notification mechanism is particularly useful to monitor a ++ * channel asynchronously while performing other tasks. ++ * ++ * The user-provided routine will be passed the RTDM file descriptor ++ * of the socket receiving the event, the event code, and an optional ++ * argument. Four events are currently defined, see @ref XDDP_EVENTS. ++ * ++ * The XDDP_EVTIN and XDDP_EVTOUT events are fired on behalf of a ++ * fully atomic context; therefore, care must be taken to keep their ++ * overhead low. In those cases, the Xenomai services that may be ++ * called from the callback are restricted to the set allowed to a ++ * real-time interrupt handler. ++ * ++ * @param [in] level @ref sockopts_xddp "SOL_XDDP" ++ * @param [in] optname @b XDDP_MONITOR ++ * @param [in] optval Pointer to a pointer to function of type int ++ * (*)(int fd, int event, long arg), containing the address of the ++ * user-defined callback.Passing a NULL callback pointer ++ * in @a optval disables monitoring. ++ * @param [in] optlen sizeof(int (*)(int fd, int event, long arg)) ++ * ++ * @return 0 is returned upon success. Otherwise: ++ * ++ * - -EFAULT (Invalid data address given) ++ * - -EPERM (Operation not allowed from user-space) ++ * - -EINVAL (@a optlen is invalid) ++ * . ++ * ++ * @par Calling context: ++ * RT/non-RT, kernel space only ++ */ ++#define XDDP_MONITOR 4 ++/** @} */ ++ ++/** ++ * @anchor XDDP_EVENTS @name XDDP events ++ * Specific events occurring on XDDP channels, which can be monitored ++ * via the @ref XDDP_MONITOR socket option. ++ * ++ * @{ */ ++/** ++ * @ref XDDP_MONITOR "Monitor" writes to the non real-time endpoint. ++ * ++ * XDDP_EVTIN is sent when data is written to the non real-time ++ * endpoint the socket is bound to (i.e. via /dev/rtp@em N), which ++ * means that some input is pending for the real-time endpoint. The ++ * argument is the size of the incoming message. ++ */ ++#define XDDP_EVTIN 1 ++/** ++ * @ref XDDP_MONITOR "Monitor" reads from the non real-time endpoint. ++ * ++ * XDDP_EVTOUT is sent when the non real-time endpoint successfully ++ * reads a complete message (i.e. via /dev/rtp@em N). The argument is ++ * the size of the outgoing message. ++ */ ++#define XDDP_EVTOUT 2 ++/** ++ * @ref XDDP_MONITOR "Monitor" close from the non real-time endpoint. ++ * ++ * XDDP_EVTDOWN is sent when the non real-time endpoint is closed. The ++ * argument is always 0. ++ */ ++#define XDDP_EVTDOWN 3 ++/** ++ * @ref XDDP_MONITOR "Monitor" memory shortage for non real-time ++ * datagrams. ++ * ++ * XDDP_EVTNOBUF is sent when no memory is available from the pool to ++ * hold the message currently sent from the non real-time ++ * endpoint. The argument is the size of the failed allocation. Upon ++ * return from the callback, the caller will block and retry until ++ * enough space is available from the pool; during that process, the ++ * callback might be invoked multiple times, each time a new attempt ++ * to get the required memory fails. ++ */ ++#define XDDP_EVTNOBUF 4 ++/** @} */ ++ ++#define SOL_IDDP 312 ++/** ++ * @anchor sockopts_iddp @name IDDP socket options ++ * Setting and getting IDDP socket options. ++ * @{ */ ++/** ++ * IDDP label assignment ++ * ++ * ASCII label strings can be attached to IDDP ports, in order to ++ * connect sockets to them in a more descriptive way than using plain ++ * numeric port values. ++ * ++ * When available, this label will be registered when binding, in ++ * addition to the port number (see @ref iddp_label_binding ++ * "IDDP port binding"). ++ * ++ * It is not allowed to assign a label after the socket was ++ * bound. However, multiple assignment calls are allowed prior to the ++ * binding; the last label set will be used. ++ * ++ * @param [in] level @ref sockopts_iddp "SOL_IDDP" ++ * @param [in] optname @b IDDP_LABEL ++ * @param [in] optval Pointer to struct rtipc_port_label ++ * @param [in] optlen sizeof(struct rtipc_port_label) ++ * ++ * @return 0 is returned upon success. Otherwise: ++ * ++ * - -EFAULT (Invalid data address given) ++ * - -EALREADY (socket already bound) ++ * - -EINVAL (@a optlen is invalid) ++ * . ++ * ++ * @par Calling context: ++ * RT/non-RT ++ */ ++#define IDDP_LABEL 1 ++/** ++ * IDDP local pool size configuration ++ * ++ * By default, the memory needed to convey the data is pulled from ++ * Xenomai's system pool. Setting a local pool size overrides this ++ * default for the socket. ++ * ++ * If a non-zero size was configured, a local pool is allocated at ++ * binding time. This pool will provide storage for pending datagrams. ++ * ++ * It is not allowed to configure a local pool size after the socket ++ * was bound. However, multiple configuration calls are allowed prior ++ * to the binding; the last value set will be used. ++ * ++ * @note: the pool memory is obtained from the host allocator by the ++ * @ref bind__AF_RTIPC "bind call". ++ * ++ * @param [in] level @ref sockopts_iddp "SOL_IDDP" ++ * @param [in] optname @b IDDP_POOLSZ ++ * @param [in] optval Pointer to a variable of type size_t, containing ++ * the required size of the local pool to reserve at binding time ++ * @param [in] optlen sizeof(size_t) ++ * ++ * @return 0 is returned upon success. Otherwise: ++ * ++ * - -EFAULT (Invalid data address given) ++ * - -EALREADY (socket already bound) ++ * - -EINVAL (@a optlen is invalid or *@a optval is zero) ++ * . ++ * ++ * @par Calling context: ++ * RT/non-RT ++ */ ++#define IDDP_POOLSZ 2 ++/** @} */ ++ ++#define SOL_BUFP 313 ++/** ++ * @anchor sockopts_bufp @name BUFP socket options ++ * Setting and getting BUFP socket options. ++ * @{ */ ++/** ++ * BUFP label assignment ++ * ++ * ASCII label strings can be attached to BUFP ports, in order to ++ * connect sockets to them in a more descriptive way than using plain ++ * numeric port values. ++ * ++ * When available, this label will be registered when binding, in ++ * addition to the port number (see @ref bufp_label_binding ++ * "BUFP port binding"). ++ * ++ * It is not allowed to assign a label after the socket was ++ * bound. However, multiple assignment calls are allowed prior to the ++ * binding; the last label set will be used. ++ * ++ * @param [in] level @ref sockopts_bufp "SOL_BUFP" ++ * @param [in] optname @b BUFP_LABEL ++ * @param [in] optval Pointer to struct rtipc_port_label ++ * @param [in] optlen sizeof(struct rtipc_port_label) ++ * ++ * @return 0 is returned upon success. Otherwise: ++ * ++ * - -EFAULT (Invalid data address given) ++ * - -EALREADY (socket already bound) ++ * - -EINVAL (@a optlen is invalid) ++ * . ++ * ++ * @par Calling context: ++ * RT/non-RT ++ */ ++#define BUFP_LABEL 1 ++/** ++ * BUFP buffer size configuration ++ * ++ * All messages written to a BUFP socket are buffered in a single ++ * per-socket memory area. Configuring the size of such buffer prior ++ * to binding the socket to a destination port is mandatory. ++ * ++ * It is not allowed to configure a buffer size after the socket was ++ * bound. However, multiple configuration calls are allowed prior to ++ * the binding; the last value set will be used. ++ * ++ * @note: the buffer memory is obtained from the host allocator by the ++ * @ref bind__AF_RTIPC "bind call". ++ * ++ * @param [in] level @ref sockopts_bufp "SOL_BUFP" ++ * @param [in] optname @b BUFP_BUFSZ ++ * @param [in] optval Pointer to a variable of type size_t, containing ++ * the required size of the buffer to reserve at binding time ++ * @param [in] optlen sizeof(size_t) ++ * ++ * @return 0 is returned upon success. Otherwise: ++ * ++ * - -EFAULT (Invalid data address given) ++ * - -EALREADY (socket already bound) ++ * - -EINVAL (@a optlen is invalid or *@a optval is zero) ++ * . ++ * ++ * @par Calling context: ++ * RT/non-RT ++ */ ++#define BUFP_BUFSZ 2 ++/** @} */ ++ ++/** ++ * @anchor sockopts_socket @name Socket level options ++ * Setting and getting supported standard socket level options. ++ * @{ */ ++/** ++ * ++ * @ref IPCPROTO_IDDP and @ref IPCPROTO_BUFP protocols support the ++ * standard SO_SNDTIMEO socket option, from the @c SOL_SOCKET level. ++ * ++ * @see @c setsockopt(), @c getsockopt() in IEEE Std 1003.1, ++ * http://www.opengroup.org/onlinepubs/009695399/ ++ */ ++#ifdef DOXYGEN_CPP ++#define SO_SNDTIMEO defined_by_kernel_header_file ++#endif ++/** ++ * ++ * All RTIPC protocols support the standard SO_RCVTIMEO socket option, ++ * from the @c SOL_SOCKET level. ++ * ++ * @see @c setsockopt(), @c getsockopt() in IEEE Std 1003.1, ++ * http://www.opengroup.org/onlinepubs/009695399/ ++ */ ++#ifdef DOXYGEN_CPP ++#define SO_RCVTIMEO defined_by_kernel_header_file ++#endif ++/** @} */ ++ ++/** ++ * @anchor rtdm_ipc_examples @name RTIPC examples ++ * @{ */ ++/** @example bufp-readwrite.c */ ++/** @example bufp-label.c */ ++/** @example iddp-label.c */ ++/** @example iddp-sendrecv.c */ ++/** @example xddp-echo.c */ ++/** @example xddp-label.c */ ++/** @example xddp-stream.c */ ++/** @} */ ++ ++/** @} */ ++ ++#endif /* !_RTDM_UAPI_IPC_H */ +--- linux/include/xenomai/rtdm/uapi/rtdm.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/rtdm/uapi/rtdm.h 2021-04-07 16:01:28.436632439 +0800 +@@ -0,0 +1,203 @@ ++/** ++ * @file ++ * Real-Time Driver Model for Xenomai, user API header. ++ * ++ * @note Copyright (C) 2005, 2006 Jan Kiszka ++ * @note Copyright (C) 2005 Joerg Langenberg ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. ++ * @ingroup rtdm_user_api ++ */ ++#ifndef _RTDM_UAPI_RTDM_H ++#define _RTDM_UAPI_RTDM_H ++ ++/*! ++ * @addtogroup rtdm ++ * @{ ++ */ ++ ++/*! ++ * @anchor rtdm_api_versioning @name API Versioning ++ * @{ */ ++/** Common user and driver API version */ ++#define RTDM_API_VER 9 ++ ++/** Minimum API revision compatible with the current release */ ++#define RTDM_API_MIN_COMPAT_VER 9 ++/** @} API Versioning */ ++ ++/** RTDM type for representing absolute dates. Its base type is a 64 bit ++ * unsigned integer. The unit is 1 nanosecond. */ ++typedef uint64_t nanosecs_abs_t; ++ ++/** RTDM type for representing relative intervals. Its base type is a 64 bit ++ * signed integer. The unit is 1 nanosecond. Relative intervals can also ++ * encode the special timeouts "infinite" and "non-blocking", see ++ * @ref RTDM_TIMEOUT_xxx. */ ++typedef int64_t nanosecs_rel_t; ++ ++/*! ++ * @anchor RTDM_TIMEOUT_xxx @name RTDM_TIMEOUT_xxx ++ * Special timeout values ++ * @{ */ ++/** Block forever. */ ++#define RTDM_TIMEOUT_INFINITE 0 ++ ++/** Any negative timeout means non-blocking. */ ++#define RTDM_TIMEOUT_NONE (-1) ++/** @} RTDM_TIMEOUT_xxx */ ++/** @} rtdm */ ++ ++/*! ++ * @addtogroup rtdm_profiles ++ * @{ ++ */ ++ ++/*! ++ * @anchor RTDM_CLASS_xxx @name RTDM_CLASS_xxx ++ * Device classes ++ * @{ */ ++#define RTDM_CLASS_PARPORT 1 ++#define RTDM_CLASS_SERIAL 2 ++#define RTDM_CLASS_CAN 3 ++#define RTDM_CLASS_NETWORK 4 ++#define RTDM_CLASS_RTMAC 5 ++#define RTDM_CLASS_TESTING 6 ++#define RTDM_CLASS_RTIPC 7 ++#define RTDM_CLASS_COBALT 8 ++#define RTDM_CLASS_UDD 9 ++#define RTDM_CLASS_MEMORY 10 ++#define RTDM_CLASS_GPIO 11 ++#define RTDM_CLASS_SPI 12 ++#define RTDM_CLASS_PWM 13 ++ ++#define RTDM_CLASS_MISC 223 ++#define RTDM_CLASS_EXPERIMENTAL 224 ++#define RTDM_CLASS_MAX 255 ++/** @} RTDM_CLASS_xxx */ ++ ++#define RTDM_SUBCLASS_GENERIC (-1) ++ ++#define RTIOC_TYPE_COMMON 0 ++ ++/*! ++ * @anchor device_naming @name Device Naming ++ * Maximum length of device names (excluding the final null character) ++ * @{ ++ */ ++#define RTDM_MAX_DEVNAME_LEN 31 ++/** @} Device Naming */ ++ ++/** ++ * Device information ++ */ ++typedef struct rtdm_device_info { ++ /** Device flags, see @ref dev_flags "Device Flags" for details */ ++ int device_flags; ++ ++ /** Device class ID, see @ref RTDM_CLASS_xxx */ ++ int device_class; ++ ++ /** Device sub-class, either RTDM_SUBCLASS_GENERIC or a ++ * RTDM_SUBCLASS_xxx definition of the related @ref rtdm_profiles ++ * "Device Profile" */ ++ int device_sub_class; ++ ++ /** Supported device profile version */ ++ int profile_version; ++} rtdm_device_info_t; ++ ++/*! ++ * @anchor RTDM_PURGE_xxx_BUFFER @name RTDM_PURGE_xxx_BUFFER ++ * Flags selecting buffers to be purged ++ * @{ */ ++#define RTDM_PURGE_RX_BUFFER 0x0001 ++#define RTDM_PURGE_TX_BUFFER 0x0002 ++/** @} RTDM_PURGE_xxx_BUFFER*/ ++ ++/*! ++ * @anchor common_IOCTLs @name Common IOCTLs ++ * The following IOCTLs are common to all device rtdm_profiles. ++ * @{ ++ */ ++ ++/** ++ * Retrieve information about a device or socket. ++ * @param[out] arg Pointer to information buffer (struct rtdm_device_info) ++ */ ++#define RTIOC_DEVICE_INFO \ ++ _IOR(RTIOC_TYPE_COMMON, 0x00, struct rtdm_device_info) ++ ++/** ++ * Purge internal device or socket buffers. ++ * @param[in] arg Purge mask, see @ref RTDM_PURGE_xxx_BUFFER ++ */ ++#define RTIOC_PURGE _IOW(RTIOC_TYPE_COMMON, 0x10, int) ++/** @} Common IOCTLs */ ++/** @} rtdm */ ++ ++/* Internally used for mapping socket functions on IOCTLs */ ++struct _rtdm_getsockopt_args { ++ int level; ++ int optname; ++ void *optval; ++ socklen_t *optlen; ++}; ++ ++struct _rtdm_setsockopt_args { ++ int level; ++ int optname; ++ const void *optval; ++ socklen_t optlen; ++}; ++ ++struct _rtdm_getsockaddr_args { ++ struct sockaddr *addr; ++ socklen_t *addrlen; ++}; ++ ++struct _rtdm_setsockaddr_args { ++ const struct sockaddr *addr; ++ socklen_t addrlen; ++}; ++ ++#define _RTIOC_GETSOCKOPT _IOW(RTIOC_TYPE_COMMON, 0x20, \ ++ struct _rtdm_getsockopt_args) ++#define _RTIOC_SETSOCKOPT _IOW(RTIOC_TYPE_COMMON, 0x21, \ ++ struct _rtdm_setsockopt_args) ++#define _RTIOC_BIND _IOW(RTIOC_TYPE_COMMON, 0x22, \ ++ struct _rtdm_setsockaddr_args) ++#define _RTIOC_CONNECT _IOW(RTIOC_TYPE_COMMON, 0x23, \ ++ struct _rtdm_setsockaddr_args) ++#define _RTIOC_LISTEN _IOW(RTIOC_TYPE_COMMON, 0x24, \ ++ int) ++#define _RTIOC_ACCEPT _IOW(RTIOC_TYPE_COMMON, 0x25, \ ++ struct _rtdm_getsockaddr_args) ++#define _RTIOC_GETSOCKNAME _IOW(RTIOC_TYPE_COMMON, 0x26, \ ++ struct _rtdm_getsockaddr_args) ++#define _RTIOC_GETPEERNAME _IOW(RTIOC_TYPE_COMMON, 0x27, \ ++ struct _rtdm_getsockaddr_args) ++#define _RTIOC_SHUTDOWN _IOW(RTIOC_TYPE_COMMON, 0x28, \ ++ int) ++ ++/* Internally used for mmap() */ ++struct _rtdm_mmap_request { ++ __u64 offset; ++ size_t length; ++ int prot; ++ int flags; ++}; ++ ++#endif /* !_RTDM_UAPI_RTDM_H */ +--- linux/include/xenomai/rtdm/udd.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/rtdm/udd.h 2021-04-07 16:01:28.319632606 +0800 +@@ -0,0 +1,340 @@ ++/** ++ * @file ++ * Copyright (C) 2014 Philippe Gerum ++ * ++ * Xenomai is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#ifndef _COBALT_RTDM_UDD_H ++#define _COBALT_RTDM_UDD_H ++ ++#include ++#include ++#include ++ ++/** ++ * @ingroup rtdm_profiles ++ * @defgroup rtdm_udd User-space driver core ++ * ++ * This profile includes all mini-drivers sitting on top of the ++ * User-space Device Driver framework (UDD). The generic UDD core ++ * driver enables interrupt control and I/O memory access interfaces ++ * to user-space device drivers, as defined by the mini-drivers when ++ * registering. ++ * ++ * A mini-driver supplements the UDD core with ancillary functions for ++ * dealing with @ref udd_memory_region "memory mappings" and @ref ++ * udd_irq_handler "interrupt control" for a particular I/O ++ * card/device. ++ * ++ * UDD-compliant mini-drivers only have to provide the basic support ++ * for dealing with the interrupt sources present in the device, so ++ * that most part of the device requests can be handled from a Xenomai ++ * application running in user-space. Typically, a mini-driver would ++ * handle the interrupt top-half, and the user-space application would ++ * handle the bottom-half. ++ * ++ * This profile is reminiscent of the UIO framework available with the ++ * Linux kernel, adapted to the dual kernel Cobalt environment. ++ * ++ * @{ ++ */ ++ ++/** ++ * @anchor udd_irq_special ++ * Special IRQ values for udd_device.irq ++ * ++ * @{ ++ */ ++/** ++ * No IRQ managed. Passing this code implicitly disables all ++ * interrupt-related services, including control (disable/enable) and ++ * notification. ++ */ ++#define UDD_IRQ_NONE 0 ++/** ++ * IRQ directly managed from the mini-driver on top of the UDD ++ * core. The mini-driver is in charge of attaching the handler(s) to ++ * the IRQ(s) it manages, notifying the Cobalt threads waiting for IRQ ++ * events by calling the udd_notify_event() service. ++ */ ++#define UDD_IRQ_CUSTOM (-1) ++/** @} */ ++ ++/** ++ * @anchor udd_memory_types @name Memory types for mapping ++ * Types of memory for mapping ++ * ++ * The UDD core implements a default ->mmap() handler which first ++ * attempts to hand over the request to the corresponding handler ++ * defined by the mini-driver. If not present, the UDD core ++ * establishes the mapping automatically, depending on the memory ++ * type defined for the region. ++ * ++ * @{ ++ */ ++/** ++ * No memory region. Use this type code to disable an entry in the ++ * array of memory mappings, i.e. udd_device.mem_regions[]. ++ */ ++#define UDD_MEM_NONE 0 ++/** ++ * Physical I/O memory region. By default, the UDD core maps such ++ * memory to a virtual user range by calling the rtdm_mmap_iomem() ++ * service. ++ */ ++#define UDD_MEM_PHYS 1 ++/** ++ * Kernel logical memory region (e.g. kmalloc()). By default, the UDD ++ * core maps such memory to a virtual user range by calling the ++ * rtdm_mmap_kmem() service. */ ++#define UDD_MEM_LOGICAL 2 ++/** ++ * Virtual memory region with no direct physical mapping ++ * (e.g. vmalloc()). By default, the UDD core maps such memory to a ++ * virtual user range by calling the rtdm_mmap_vmem() service. ++ */ ++#define UDD_MEM_VIRTUAL 3 ++/** @} */ ++ ++#define UDD_NR_MAPS 5 ++ ++/** ++ * @anchor udd_memory_region ++ * UDD memory region descriptor. ++ * ++ * This descriptor defines the characteristics of a memory region ++ * declared to the UDD core by the mini-driver. All valid regions ++ * should be declared in the udd_device.mem_regions[] array, ++ * invalid/unassigned ones should bear the UDD_MEM_NONE type. ++ * ++ * The UDD core exposes each region via the mmap(2) interface to the ++ * application. To this end, a companion mapper device is created ++ * automatically when registering the mini-driver. ++ * ++ * The mapper device creates special files in the RTDM namespace for ++ * reaching the individual regions, which the application can open ++ * then map to its address space via the mmap(2) system call. ++ * ++ * For instance, declaring a region of physical memory at index #2 of ++ * the memory region array could be done as follows: ++ * ++ * @code ++ * static struct udd_device udd; ++ * ++ * static int foocard_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) ++ * { ++ * udd.device_name = "foocard"; ++ * ... ++ * udd.mem_regions[2].name = "ADC"; ++ * udd.mem_regions[2].addr = pci_resource_start(dev, 1); ++ * udd.mem_regions[2].len = pci_resource_len(dev, 1); ++ * udd.mem_regions[2].type = UDD_MEM_PHYS; ++ * ... ++ * return udd_register_device(&udd); ++ * } ++ * @endcode ++ * ++ * This will make such region accessible via the mapper device using ++ * the following sequence of code (see note), via the default ++ * ->mmap() handler from the UDD core: ++ * ++ * @code ++ * int fd, fdm; ++ * void *p; ++ * ++ * fd = open("/dev/rtdm/foocard", O_RDWR); ++ * fdm = open("/dev/rtdm/foocard,mapper2", O_RDWR); ++ * p = mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_SHARED, fdm, 0); ++ * @endcode ++ * ++ * if no valid region has been declared in the ++ * udd_device.mem_regions[] array, no mapper device is created. ++ * ++ * @note The example code assumes that @ref cobalt_api POSIX symbol ++ * wrapping is in effect, so that RTDM performs the memory mapping ++ * operation (not the regular kernel). ++ */ ++struct udd_memregion { ++ /** Name of the region (informational but required) */ ++ const char *name; ++ /** ++ * Start address of the region. This may be a physical or ++ * virtual address, depending on the @ref udd_memory_types ++ * "memory type". ++ */ ++ unsigned long addr; ++ /** ++ * Length (in bytes) of the region. This value must be ++ * PAGE_SIZE aligned. ++ */ ++ size_t len; ++ /** ++ * Type of the region. See the discussion about @ref ++ * udd_memory_types "UDD memory types" for possible values. ++ */ ++ int type; ++}; ++ ++/** ++ * @anchor udd_device ++ * UDD device descriptor. ++ * ++ * This descriptor defines the characteristics of a UDD-based ++ * mini-driver when registering via a call to udd_register_device(). ++ */ ++struct udd_device { ++ /** ++ * Name of the device managed by the mini-driver, appears ++ * automatically in the /dev/rtdm namespace upon creation. ++ */ ++ const char *device_name; ++ /** ++ * Additional device flags (e.g. RTDM_EXCLUSIVE) ++ * RTDM_NAMED_DEVICE may be omitted). ++ */ ++ int device_flags; ++ /** ++ * Subclass code of the device managed by the mini-driver (see ++ * RTDM_SUBCLASS_xxx definition in the @ref rtdm_profiles ++ * "Device Profiles"). The main class code is pre-set to ++ * RTDM_CLASS_UDD. ++ */ ++ int device_subclass; ++ struct { ++ /** ++ * Ancillary open() handler, optional. See ++ * rtdm_open_handler(). ++ * ++ * @note This handler is called from secondary mode ++ * only. ++ */ ++ int (*open)(struct rtdm_fd *fd, int oflags); ++ /** ++ * Ancillary close() handler, optional. See ++ * rtdm_close_handler(). ++ * ++ * @note This handler is called from secondary mode ++ * only. ++ */ ++ void (*close)(struct rtdm_fd *fd); ++ /** ++ * Ancillary ioctl() handler, optional. See ++ * rtdm_ioctl_handler(). ++ * ++ * If this routine returns -ENOSYS, the default action ++ * implemented by the UDD core for the corresponding ++ * request will be applied, as if no ioctl handler had ++ * been defined. ++ * ++ * @note This handler is called from primary mode ++ * only. ++ */ ++ int (*ioctl)(struct rtdm_fd *fd, ++ unsigned int request, void *arg); ++ /** ++ * Ancillary mmap() handler for the mapper device, ++ * optional. See rtdm_mmap_handler(). The mapper ++ * device operates on a valid region defined in the @a ++ * mem_regions[] array. A pointer to the region ++ * can be obtained by a call to udd_get_region(). ++ * ++ * If this handler is NULL, the UDD core establishes ++ * the mapping automatically, depending on the memory ++ * type defined for the region. ++ * ++ * @note This handler is called from secondary mode ++ * only. ++ */ ++ int (*mmap)(struct rtdm_fd *fd, ++ struct vm_area_struct *vma); ++ /** ++ * @anchor udd_irq_handler ++ * ++ * Ancillary handler for receiving interrupts. This ++ * handler must be provided if the mini-driver hands ++ * over IRQ handling to the UDD core, by setting the ++ * @a irq field to a valid value, different from ++ * UDD_IRQ_CUSTOM and UDD_IRQ_NONE. ++ * ++ * The ->interrupt() handler shall return one of the ++ * following status codes: ++ * ++ * - RTDM_IRQ_HANDLED, if the mini-driver successfully ++ * handled the IRQ. This flag can be combined with ++ * RTDM_IRQ_DISABLE to prevent the Cobalt kernel from ++ * re-enabling the interrupt line upon return, ++ * otherwise it is re-enabled automatically. ++ * ++ * - RTDM_IRQ_NONE, if the interrupt does not match ++ * any IRQ the mini-driver can handle. ++ * ++ * Once the ->interrupt() handler has returned, the ++ * UDD core notifies user-space Cobalt threads waiting ++ * for IRQ events (if any). ++ * ++ * @note This handler is called from primary mode ++ * only. ++ */ ++ int (*interrupt)(struct udd_device *udd); ++ } ops; ++ /** ++ * IRQ number. If valid, the UDD core manages the ++ * corresponding interrupt line, installing a base handler. ++ * Otherwise, a special value can be passed for declaring ++ * @ref udd_irq_special "unmanaged IRQs". ++ */ ++ int irq; ++ /** ++ * Array of memory regions defined by the device. The array ++ * can be sparse, with some entries bearing the UDD_MEM_NONE ++ * type interleaved with valid ones. See the discussion about ++ * @ref udd_memory_region "UDD memory regions". ++ */ ++ struct udd_memregion mem_regions[UDD_NR_MAPS]; ++ /** Reserved to the UDD core. */ ++ struct udd_reserved { ++ rtdm_irq_t irqh; ++ u32 event_count; ++ struct udd_signotify signfy; ++ struct rtdm_event pulse; ++ struct rtdm_driver driver; ++ struct rtdm_device device; ++ struct rtdm_driver mapper_driver; ++ struct udd_mapper { ++ struct udd_device *udd; ++ struct rtdm_device dev; ++ } mapdev[UDD_NR_MAPS]; ++ char *mapper_name; ++ int nr_maps; ++ } __reserved; ++}; ++ ++int udd_register_device(struct udd_device *udd); ++ ++int udd_unregister_device(struct udd_device *udd); ++ ++struct udd_device *udd_get_device(struct rtdm_fd *fd); ++ ++void udd_notify_event(struct udd_device *udd); ++ ++void udd_enable_irq(struct udd_device *udd, ++ rtdm_event_t *done); ++ ++void udd_disable_irq(struct udd_device *udd, ++ rtdm_event_t *done); ++ ++/** @} */ ++ ++#endif /* !_COBALT_RTDM_UDD_H */ +--- linux/include/xenomai/rtdm/serial.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/rtdm/serial.h 2021-04-07 16:01:28.315632612 +0800 +@@ -0,0 +1,24 @@ ++/* ++ * Copyright (C) 2005-2007 Jan Kiszka ++ * ++ * Xenomai is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#ifndef _COBALT_RTDM_SERIAL_H ++#define _COBALT_RTDM_SERIAL_H ++ ++#include ++#include ++ ++#endif /* !_COBALT_RTDM_SERIAL_H */ +--- linux/include/xenomai/rtdm/analogy/device.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/rtdm/analogy/device.h 2021-04-07 16:01:28.310632619 +0800 +@@ -0,0 +1,67 @@ ++/* ++ * Analogy for Linux, device related features ++ * ++ * Copyright (C) 1997-2000 David A. Schleef ++ * Copyright (C) 2008 Alexis Berlemont ++ * ++ * Xenomai is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#ifndef _COBALT_RTDM_ANALOGY_DEVICE_H ++#define _COBALT_RTDM_ANALOGY_DEVICE_H ++ ++#include ++#include ++#include ++ ++#define A4L_NB_DEVICES 10 ++ ++#define A4L_DEV_ATTACHED_NR 0 ++ ++struct a4l_device { ++ ++ /* Spinlock for global device use */ ++ rtdm_lock_t lock; ++ ++ /* Device specific flags */ ++ unsigned long flags; ++ ++ /* Driver assigned to this device thanks to attaching ++ procedure */ ++ struct a4l_driver *driver; ++ ++ /* Hidden description stuff */ ++ struct list_head subdvsq; ++ ++ /* Context-dependent stuff */ ++ struct a4l_transfer transfer; ++ ++ /* Private data useful for drivers functioning */ ++ void *priv; ++}; ++ ++/* --- Devices tab related functions --- */ ++void a4l_init_devs(void); ++int a4l_check_cleanup_devs(void); ++int a4l_rdproc_devs(struct seq_file *p, void *data); ++ ++/* --- Context related function / macro --- */ ++void a4l_set_dev(struct a4l_device_context *cxt); ++#define a4l_get_dev(x) ((x)->dev) ++ ++/* --- Upper layer functions --- */ ++int a4l_ioctl_devcfg(struct a4l_device_context * cxt, void *arg); ++int a4l_ioctl_devinfo(struct a4l_device_context * cxt, void *arg); ++ ++#endif /* !_COBALT_RTDM_ANALOGY_DEVICE_H */ +--- linux/include/xenomai/rtdm/analogy/instruction.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/rtdm/analogy/instruction.h 2021-04-07 16:01:28.305632626 +0800 +@@ -0,0 +1,45 @@ ++/* ++ * Analogy for Linux, instruction related features ++ * ++ * Copyright (C) 1997-2000 David A. Schleef ++ * Copyright (C) 2008 Alexis Berlemont ++ * ++ * Xenomai is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#ifndef _COBALT_RTDM_ANALOGY_INSTRUCTION_H ++#define _COBALT_RTDM_ANALOGY_INSTRUCTION_H ++ ++struct a4l_kernel_instruction { ++ unsigned int type; ++ unsigned int idx_subd; ++ unsigned int chan_desc; ++ unsigned int data_size; ++ void *data; ++ void *__udata; ++}; ++ ++struct a4l_kernel_instruction_list { ++ unsigned int count; ++ struct a4l_kernel_instruction *insns; ++ a4l_insn_t *__uinsns; ++}; ++ ++/* Instruction related functions */ ++ ++/* Upper layer functions */ ++int a4l_ioctl_insnlist(struct a4l_device_context * cxt, void *arg); ++int a4l_ioctl_insn(struct a4l_device_context * cxt, void *arg); ++ ++#endif /* !_COBALT_RTDM_ANALOGY_BUFFER_H */ +--- linux/include/xenomai/rtdm/analogy/channel_range.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/rtdm/analogy/channel_range.h 2021-04-07 16:01:28.301632632 +0800 +@@ -0,0 +1,272 @@ ++/** ++ * @file ++ * Analogy for Linux, channel, range related features ++ * ++ * Copyright (C) 1997-2000 David A. Schleef ++ * Copyright (C) 2008 Alexis Berlemont ++ * ++ * Xenomai is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#ifndef _COBALT_RTDM_ANALOGY_CHANNEL_RANGE_H ++#define _COBALT_RTDM_ANALOGY_CHANNEL_RANGE_H ++ ++#include ++ ++/** ++ * @ingroup analogy_driver_facilities ++ * @defgroup analogy_channel_range Channels and ranges ++ * ++ * Channels ++ * ++ * According to the Analogy nomenclature, the channel is the elementary ++ * acquisition entity. One channel is supposed to acquire one data at ++ * a time. A channel can be: ++ * - an analog input or an analog ouput; ++ * - a digital input or a digital ouput; ++ * ++ * Channels are defined by their type and by some other ++ * characteristics like: ++ * - their resolutions for analog channels (which usually ranges from ++ 8 to 32 bits); ++ * - their references; ++ * ++ * Such parameters must be declared for each channel composing a ++ * subdevice. The structure a4l_channel (struct a4l_channel) is used to ++ * define one channel. ++ * ++ * Another structure named a4l_channels_desc (struct a4l_channels_desc) ++ * gathers all channels for a specific subdevice. This latter ++ * structure also stores : ++ * - the channels count; ++ * - the channels declaration mode (A4L_CHAN_GLOBAL_CHANDESC or ++ A4L_CHAN_PERCHAN_CHANDESC): if all the channels composing a ++ subdevice are identical, there is no need to declare the ++ parameters for each channel; the global declaration mode eases ++ the structure composition. ++ * ++ * Usually the channels descriptor looks like this: ++ * @verbatim ++struct a4l_channels_desc example_chan = { ++ mode: A4L_CHAN_GLOBAL_CHANDESC, -> Global declaration ++ mode is set ++ length: 8, -> 8 channels ++ chans: { ++ {A4L_CHAN_AREF_GROUND, 16}, -> Each channel is 16 bits ++ wide with the ground as ++ reference ++ }, ++}; ++@endverbatim ++ * ++ * Ranges ++ * ++ * So as to perform conversion from logical values acquired by the ++ * device to physical units, some range structure(s) must be declared ++ * on the driver side. ++ * ++ * Such structures contain: ++ * - the physical unit type (Volt, Ampere, none); ++ * - the minimal and maximal values; ++ * ++ * These range structures must be associated with the channels at ++ * subdevice registration time as a channel can work with many ++ * ranges. At configuration time (thanks to an Analogy command), one ++ * range will be selected for each enabled channel. ++ * ++ * Consequently, for each channel, the developer must declare all the ++ * possible ranges in a structure called struct a4l_rngtab. Here is an ++ * example: ++ * @verbatim ++struct a4l_rngtab example_tab = { ++ length: 2, ++ rngs: { ++ RANGE_V(-5,5), ++ RANGE_V(-10,10), ++ }, ++}; ++@endverbatim ++ * ++ * For each subdevice, a specific structure is designed to gather all ++ * the ranges tabs of all the channels. In this structure, called ++ * struct a4l_rngdesc, three fields must be filled: ++ * - the declaration mode (A4L_RNG_GLOBAL_RNGDESC or ++ * A4L_RNG_PERCHAN_RNGDESC); ++ * - the number of ranges tab; ++ * - the tab of ranges tabs pointers; ++ * ++ * Most of the time, the channels which belong to the same subdevice ++ * use the same set of ranges. So, there is no need to declare the ++ * same ranges for each channel. A macro is defined to prevent ++ * redundant declarations: RNG_GLOBAL(). ++ * ++ * Here is an example: ++ * @verbatim ++struct a4l_rngdesc example_rng = RNG_GLOBAL(example_tab); ++@endverbatim ++ * ++ * @{ ++ */ ++ ++ ++/* --- Channel section --- */ ++ ++/*! ++ * @anchor A4L_CHAN_AREF_xxx @name Channel reference ++ * @brief Flags to define the channel's reference ++ * @{ ++ */ ++ ++/** ++ * Ground reference ++ */ ++#define A4L_CHAN_AREF_GROUND 0x1 ++/** ++ * Common reference ++ */ ++#define A4L_CHAN_AREF_COMMON 0x2 ++/** ++ * Differential reference ++ */ ++#define A4L_CHAN_AREF_DIFF 0x4 ++/** ++ * Misc reference ++ */ ++#define A4L_CHAN_AREF_OTHER 0x8 ++ ++ /*! @} A4L_CHAN_AREF_xxx */ ++ ++/** ++ * Internal use flag (must not be used by driver developer) ++ */ ++#define A4L_CHAN_GLOBAL 0x10 ++ ++/*! ++ * @brief Structure describing some channel's characteristics ++ */ ++ ++struct a4l_channel { ++ unsigned long flags; /*!< Channel flags to define the reference. */ ++ unsigned long nb_bits; /*!< Channel resolution. */ ++}; ++ ++/*! ++ * @anchor A4L_CHAN_xxx @name Channels declaration mode ++ * @brief Constant to define whether the channels in a descriptor are ++ * identical ++ * @{ ++ */ ++ ++/** ++ * Global declaration, the set contains channels with similar ++ * characteristics ++ */ ++#define A4L_CHAN_GLOBAL_CHANDESC 0 ++/** ++ * Per channel declaration, the decriptor gathers differents channels ++ */ ++#define A4L_CHAN_PERCHAN_CHANDESC 1 ++ ++ /*! @} A4L_CHAN_xxx */ ++ ++/*! ++ * @brief Structure describing a channels set ++ */ ++ ++struct a4l_channels_desc { ++ unsigned long mode; /*!< Declaration mode (global or per channel) */ ++ unsigned long length; /*!< Channels count */ ++ struct a4l_channel chans[]; /*!< Channels tab */ ++}; ++ ++/** ++ * Internal use flag (must not be used by driver developer) ++ */ ++#define A4L_RNG_GLOBAL 0x8 ++ ++/*! ++ * @brief Structure describing a (unique) range ++ */ ++ ++struct a4l_range { ++ long min; /*!< Minimal value */ ++ long max; /*!< Maximal falue */ ++ unsigned long flags; /*!< Range flags (unit, etc.) */ ++}; ++ ++/** ++ * Macro to declare a (unique) range with no unit defined ++ */ ++#define RANGE(x,y) {(x * A4L_RNG_FACTOR), (y * A4L_RNG_FACTOR), \ ++ A4L_RNG_NO_UNIT} ++/** ++ * Macro to declare a (unique) range in Volt ++ */ ++#define RANGE_V(x,y) {(x * A4L_RNG_FACTOR),(y * A4L_RNG_FACTOR), \ ++ A4L_RNG_VOLT_UNIT} ++/** ++ * Macro to declare a (unique) range in milliAmpere ++ */ ++#define RANGE_mA(x,y) {(x * A4L_RNG_FACTOR),(y * A4L_RNG_FACTOR), \ ++ A4L_RNG_MAMP_UNIT} ++/** ++ * Macro to declare a (unique) range in some external reference ++ */ ++#define RANGE_ext(x,y) {(x * A4L_RNG_FACTOR),(y * A4L_RNG_FACTOR), \ ++ A4L_RNG_EXT_UNIT} ++ ++ ++/* Ranges tab descriptor */ ++struct a4l_rngtab { ++ unsigned char length; ++ struct a4l_range rngs[]; ++}; ++ ++/** ++ * Constant to define a ranges descriptor as global (inter-channel) ++ */ ++#define A4L_RNG_GLOBAL_RNGDESC 0 ++/** ++ * Constant to define a ranges descriptor as specific for a channel ++ */ ++#define A4L_RNG_PERCHAN_RNGDESC 1 ++ ++/* Global ranges descriptor */ ++struct a4l_rngdesc { ++ unsigned char mode; ++ unsigned char length; ++ struct a4l_rngtab *rngtabs[]; ++}; ++ ++/** ++ * Macro to declare a ranges global descriptor in one line ++ */ ++#define RNG_GLOBAL(x) { \ ++ .mode = A4L_RNG_GLOBAL_RNGDESC, \ ++ .length = 1, \ ++ .rngtabs = {&(x)}, \ ++} ++ ++extern struct a4l_rngdesc a4l_range_bipolar10; ++extern struct a4l_rngdesc a4l_range_bipolar5; ++extern struct a4l_rngdesc a4l_range_unipolar10; ++extern struct a4l_rngdesc a4l_range_unipolar5; ++extern struct a4l_rngdesc a4l_range_unknown; ++extern struct a4l_rngdesc a4l_range_fake; ++ ++#define range_digital a4l_range_unipolar5 ++ ++/*! @} channelrange */ ++ ++#endif /* !_COBALT_RTDM_ANALOGY_CHANNEL_RANGE_H */ +--- linux/include/xenomai/rtdm/analogy/buffer.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/rtdm/analogy/buffer.h 2021-04-07 16:01:28.296632639 +0800 +@@ -0,0 +1,461 @@ ++/* ++ * Analogy for Linux, buffer related features ++ * ++ * Copyright (C) 1997-2000 David A. Schleef ++ * Copyright (C) 2008 Alexis Berlemont ++ * ++ * Xenomai is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#ifndef _COBALT_RTDM_ANALOGY_BUFFER_H ++#define _COBALT_RTDM_ANALOGY_BUFFER_H ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* --- Events bits / flags --- */ ++ ++#define A4L_BUF_EOBUF_NR 0 ++#define A4L_BUF_EOBUF (1 << A4L_BUF_EOBUF_NR) ++ ++#define A4L_BUF_ERROR_NR 1 ++#define A4L_BUF_ERROR (1 << A4L_BUF_ERROR_NR) ++ ++#define A4L_BUF_EOA_NR 2 ++#define A4L_BUF_EOA (1 << A4L_BUF_EOA_NR) ++ ++/* --- Status bits / flags --- */ ++ ++#define A4L_BUF_BULK_NR 8 ++#define A4L_BUF_BULK (1 << A4L_BUF_BULK_NR) ++ ++#define A4L_BUF_MAP_NR 9 ++#define A4L_BUF_MAP (1 << A4L_BUF_MAP_NR) ++ ++ ++/* Buffer descriptor structure */ ++struct a4l_buffer { ++ ++ /* Added by the structure update */ ++ struct a4l_subdevice *subd; ++ ++ /* Buffer's first virtual page pointer */ ++ void *buf; ++ ++ /* Buffer's global size */ ++ unsigned long size; ++ /* Tab containing buffer's pages pointers */ ++ unsigned long *pg_list; ++ ++ /* RT/NRT synchronization element */ ++ struct a4l_sync sync; ++ ++ /* Counters needed for transfer */ ++ unsigned long end_count; ++ unsigned long prd_count; ++ unsigned long cns_count; ++ unsigned long tmp_count; ++ ++ /* Status + events occuring during transfer */ ++ unsigned long flags; ++ ++ /* Command on progress */ ++ struct a4l_cmd_desc *cur_cmd; ++ ++ /* Munge counter */ ++ unsigned long mng_count; ++ ++ /* Theshold below which the user process should not be ++ awakened */ ++ unsigned long wake_count; ++}; ++ ++static inline void __dump_buffer_counters(struct a4l_buffer *buf) ++{ ++ __a4l_dbg(1, core_dbg, "a4l_buffer=0x%p, p=0x%p \n", buf, buf->buf); ++ __a4l_dbg(1, core_dbg, "end=%06ld, prd=%06ld, cns=%06ld, tmp=%06ld \n", ++ buf->end_count, buf->prd_count, buf->cns_count, buf->tmp_count); ++} ++ ++/* --- Static inline functions related with ++ user<->kernel data transfers --- */ ++ ++/* The function __produce is an inline function which copies data into ++ the asynchronous buffer and takes care of the non-contiguous issue ++ when looping. This function is used in read and write operations */ ++static inline int __produce(struct a4l_device_context *cxt, ++ struct a4l_buffer *buf, void *pin, unsigned long count) ++{ ++ unsigned long start_ptr = (buf->prd_count % buf->size); ++ struct rtdm_fd *fd = rtdm_private_to_fd(cxt); ++ unsigned long tmp_cnt = count; ++ int ret = 0; ++ ++ while (ret == 0 && tmp_cnt != 0) { ++ /* Check the data copy can be performed contiguously */ ++ unsigned long blk_size = (start_ptr + tmp_cnt > buf->size) ? ++ buf->size - start_ptr : tmp_cnt; ++ ++ /* Perform the copy */ ++ if (cxt == NULL) ++ memcpy(buf->buf + start_ptr, pin, blk_size); ++ else ++ ret = rtdm_safe_copy_from_user(fd, ++ buf->buf + start_ptr, ++ pin, blk_size); ++ ++ /* Update pointers/counts */ ++ pin += blk_size; ++ tmp_cnt -= blk_size; ++ start_ptr = 0; ++ } ++ ++ return ret; ++} ++ ++/* The function __consume is an inline function which copies data from ++ the asynchronous buffer and takes care of the non-contiguous issue ++ when looping. This function is used in read and write operations */ ++static inline int __consume(struct a4l_device_context *cxt, ++ struct a4l_buffer *buf, void *pout, unsigned long count) ++{ ++ unsigned long start_ptr = (buf->cns_count % buf->size); ++ struct rtdm_fd *fd = rtdm_private_to_fd(cxt); ++ unsigned long tmp_cnt = count; ++ int ret = 0; ++ ++ while (ret == 0 && tmp_cnt != 0) { ++ /* Check the data copy can be performed contiguously */ ++ unsigned long blk_size = (start_ptr + tmp_cnt > buf->size) ? ++ buf->size - start_ptr : tmp_cnt; ++ ++ /* Perform the copy */ ++ if (cxt == NULL) ++ memcpy(pout, buf->buf + start_ptr, blk_size); ++ else ++ ret = rtdm_safe_copy_to_user(fd, ++ pout, ++ buf->buf + start_ptr, ++ blk_size); ++ ++ /* Update pointers/counts */ ++ pout += blk_size; ++ tmp_cnt -= blk_size; ++ start_ptr = 0; ++ } ++ ++ return ret; ++} ++ ++/* The function __munge is an inline function which calls the ++ subdevice specific munge callback on contiguous windows within the ++ whole buffer. This function is used in read and write operations */ ++static inline void __munge(struct a4l_subdevice * subd, ++ void (*munge) (struct a4l_subdevice *, ++ void *, unsigned long), ++ struct a4l_buffer * buf, unsigned long count) ++{ ++ unsigned long start_ptr = (buf->mng_count % buf->size); ++ unsigned long tmp_cnt = count; ++ ++ while (tmp_cnt != 0) { ++ /* Check the data copy can be performed contiguously */ ++ unsigned long blk_size = (start_ptr + tmp_cnt > buf->size) ? ++ buf->size - start_ptr : tmp_cnt; ++ ++ /* Perform the munge operation */ ++ munge(subd, buf->buf + start_ptr, blk_size); ++ ++ /* Update the start pointer and the count */ ++ tmp_cnt -= blk_size; ++ start_ptr = 0; ++ } ++} ++ ++/* The function __handle_event can only be called from process context ++ (not interrupt service routine). It allows the client process to ++ retrieve the buffer status which has been updated by the driver */ ++static inline int __handle_event(struct a4l_buffer * buf) ++{ ++ int ret = 0; ++ ++ /* The event "End of acquisition" must not be cleaned ++ before the complete flush of the buffer */ ++ if (test_bit(A4L_BUF_EOA_NR, &buf->flags)) ++ ret = -ENOENT; ++ ++ if (test_bit(A4L_BUF_ERROR_NR, &buf->flags)) ++ ret = -EPIPE; ++ ++ return ret; ++} ++ ++/* --- Counters management functions --- */ ++ ++/* Here, we may wonder why we need more than two counters / pointers. ++ ++ Theoretically, we only need two counters (or two pointers): ++ - one which tells where the reader should be within the buffer ++ - one which tells where the writer should be within the buffer ++ ++ With these two counters (or pointers), we just have to check that ++ the writer does not overtake the reader inside the ring buffer ++ BEFORE any read / write operations. ++ ++ However, if one element is a DMA controller, we have to be more ++ careful. Generally a DMA transfer occurs like this: ++ DMA shot ++ |-> then DMA interrupt ++ |-> then DMA soft handler which checks the counter ++ ++ So, the checkings occur AFTER the write operations. ++ ++ Let's take an example: the reader is a software task and the writer ++ is a DMA controller. At the end of the DMA shot, the write counter ++ is higher than the read counter. Unfortunately, a read operation ++ occurs between the DMA shot and the DMA interrupt, so the handler ++ will not notice that an overflow occured. ++ ++ That is why tmp_count comes into play: tmp_count records the ++ read/consumer current counter before the next DMA shot and once the ++ next DMA shot is done, we check that the updated writer/producer ++ counter is not higher than tmp_count. Thus we are sure that the DMA ++ writer has not overtaken the reader because it was not able to ++ overtake the n-1 value. */ ++ ++static inline int __pre_abs_put(struct a4l_buffer * buf, unsigned long count) ++{ ++ if (count - buf->tmp_count > buf->size) { ++ set_bit(A4L_BUF_ERROR_NR, &buf->flags); ++ return -EPIPE; ++ } ++ ++ buf->tmp_count = buf->cns_count; ++ ++ return 0; ++} ++ ++static inline int __pre_put(struct a4l_buffer * buf, unsigned long count) ++{ ++ return __pre_abs_put(buf, buf->tmp_count + count); ++} ++ ++static inline int __pre_abs_get(struct a4l_buffer * buf, unsigned long count) ++{ ++ /* The first time, we expect the buffer to be properly filled ++ before the trigger occurence; by the way, we need tmp_count to ++ have been initialized and tmp_count is updated right here */ ++ if (buf->tmp_count == 0 || buf->cns_count == 0) ++ goto out; ++ ++ /* At the end of the acquisition, the user application has ++ written the defined amount of data into the buffer; so the ++ last time, the DMA channel can easily overtake the tmp ++ frontier because no more data were sent from user space; ++ therefore no useless alarm should be sent */ ++ if (buf->end_count != 0 && (long)(count - buf->end_count) > 0) ++ goto out; ++ ++ /* Once the exception are passed, we check that the DMA ++ transfer has not overtaken the last record of the production ++ count (tmp_count was updated with prd_count the last time ++ __pre_abs_get was called). We must understand that we cannot ++ compare the current DMA count with the current production ++ count because even if, right now, the production count is ++ higher than the DMA count, it does not mean that the DMA count ++ was not greater a few cycles before; in such case, the DMA ++ channel would have retrieved the wrong data */ ++ if ((long)(count - buf->tmp_count) > 0) { ++ set_bit(A4L_BUF_ERROR_NR, &buf->flags); ++ return -EPIPE; ++ } ++ ++out: ++ buf->tmp_count = buf->prd_count; ++ ++ return 0; ++} ++ ++static inline int __pre_get(struct a4l_buffer * buf, unsigned long count) ++{ ++ return __pre_abs_get(buf, buf->tmp_count + count); ++} ++ ++static inline int __abs_put(struct a4l_buffer * buf, unsigned long count) ++{ ++ unsigned long old = buf->prd_count; ++ ++ if ((long)(buf->prd_count - count) >= 0) ++ return -EINVAL; ++ ++ buf->prd_count = count; ++ ++ if ((old / buf->size) != (count / buf->size)) ++ set_bit(A4L_BUF_EOBUF_NR, &buf->flags); ++ ++ if (buf->end_count != 0 && (long)(count - buf->end_count) >= 0) ++ set_bit(A4L_BUF_EOA_NR, &buf->flags); ++ ++ return 0; ++} ++ ++static inline int __put(struct a4l_buffer * buf, unsigned long count) ++{ ++ return __abs_put(buf, buf->prd_count + count); ++} ++ ++static inline int __abs_get(struct a4l_buffer * buf, unsigned long count) ++{ ++ unsigned long old = buf->cns_count; ++ ++ if ((long)(buf->cns_count - count) >= 0) ++ return -EINVAL; ++ ++ buf->cns_count = count; ++ ++ if ((old / buf->size) != count / buf->size) ++ set_bit(A4L_BUF_EOBUF_NR, &buf->flags); ++ ++ if (buf->end_count != 0 && (long)(count - buf->end_count) >= 0) ++ set_bit(A4L_BUF_EOA_NR, &buf->flags); ++ ++ return 0; ++} ++ ++static inline int __get(struct a4l_buffer * buf, unsigned long count) ++{ ++ return __abs_get(buf, buf->cns_count + count); ++} ++ ++static inline unsigned long __count_to_put(struct a4l_buffer * buf) ++{ ++ unsigned long ret; ++ ++ if ((long) (buf->size + buf->cns_count - buf->prd_count) > 0) ++ ret = buf->size + buf->cns_count - buf->prd_count; ++ else ++ ret = 0; ++ ++ return ret; ++} ++ ++static inline unsigned long __count_to_get(struct a4l_buffer * buf) ++{ ++ unsigned long ret; ++ ++ /* If the acquisition is unlimited (end_count == 0), we must ++ not take into account end_count */ ++ if (buf->end_count == 0 || (long)(buf->end_count - buf->prd_count) > 0) ++ ret = buf->prd_count; ++ else ++ ret = buf->end_count; ++ ++ if ((long)(ret - buf->cns_count) > 0) ++ ret -= buf->cns_count; ++ else ++ ret = 0; ++ ++ return ret; ++} ++ ++static inline unsigned long __count_to_end(struct a4l_buffer * buf) ++{ ++ unsigned long ret = buf->end_count - buf->cns_count; ++ ++ if (buf->end_count == 0) ++ return ULONG_MAX; ++ ++ return ((long)ret) < 0 ? 0 : ret; ++} ++ ++/* --- Buffer internal functions --- */ ++ ++int a4l_alloc_buffer(struct a4l_buffer *buf_desc, int buf_size); ++ ++void a4l_free_buffer(struct a4l_buffer *buf_desc); ++ ++void a4l_init_buffer(struct a4l_buffer * buf_desc); ++ ++void a4l_cleanup_buffer(struct a4l_buffer * buf_desc); ++ ++int a4l_setup_buffer(struct a4l_device_context *cxt, struct a4l_cmd_desc *cmd); ++ ++void a4l_cancel_buffer(struct a4l_device_context *cxt); ++ ++int a4l_buf_prepare_absput(struct a4l_subdevice *subd, ++ unsigned long count); ++ ++int a4l_buf_commit_absput(struct a4l_subdevice *subd, ++ unsigned long count); ++ ++int a4l_buf_prepare_put(struct a4l_subdevice *subd, ++ unsigned long count); ++ ++int a4l_buf_commit_put(struct a4l_subdevice *subd, ++ unsigned long count); ++ ++int a4l_buf_put(struct a4l_subdevice *subd, ++ void *bufdata, unsigned long count); ++ ++int a4l_buf_prepare_absget(struct a4l_subdevice *subd, ++ unsigned long count); ++ ++int a4l_buf_commit_absget(struct a4l_subdevice *subd, ++ unsigned long count); ++ ++int a4l_buf_prepare_get(struct a4l_subdevice *subd, ++ unsigned long count); ++ ++int a4l_buf_commit_get(struct a4l_subdevice *subd, ++ unsigned long count); ++ ++int a4l_buf_get(struct a4l_subdevice *subd, ++ void *bufdata, unsigned long count); ++ ++int a4l_buf_evt(struct a4l_subdevice *subd, unsigned long evts); ++ ++unsigned long a4l_buf_count(struct a4l_subdevice *subd); ++ ++/* --- Current Command management function --- */ ++ ++static inline struct a4l_cmd_desc *a4l_get_cmd(struct a4l_subdevice *subd) ++{ ++ return (subd->buf) ? subd->buf->cur_cmd : NULL; ++} ++ ++/* --- Munge related function --- */ ++ ++int a4l_get_chan(struct a4l_subdevice *subd); ++ ++/* --- IOCTL / FOPS functions --- */ ++ ++int a4l_ioctl_mmap(struct a4l_device_context * cxt, void *arg); ++int a4l_ioctl_bufcfg(struct a4l_device_context * cxt, void *arg); ++int a4l_ioctl_bufcfg2(struct a4l_device_context * cxt, void *arg); ++int a4l_ioctl_bufinfo(struct a4l_device_context * cxt, void *arg); ++int a4l_ioctl_bufinfo2(struct a4l_device_context * cxt, void *arg); ++int a4l_ioctl_poll(struct a4l_device_context * cxt, void *arg); ++ssize_t a4l_read_buffer(struct a4l_device_context * cxt, void *bufdata, size_t nbytes); ++ssize_t a4l_write_buffer(struct a4l_device_context * cxt, const void *bufdata, size_t nbytes); ++int a4l_select(struct a4l_device_context *cxt, ++ rtdm_selector_t *selector, ++ enum rtdm_selecttype type, unsigned fd_index); ++ ++#endif /* !_COBALT_RTDM_ANALOGY_BUFFER_H */ +--- linux/include/xenomai/rtdm/analogy/context.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/rtdm/analogy/context.h 2021-04-07 16:01:28.292632644 +0800 +@@ -0,0 +1,48 @@ ++/* ++ * Analogy for Linux, context structure / macros declarations ++ * ++ * Copyright (C) 1997-2000 David A. Schleef ++ * Copyright (C) 2008 Alexis Berlemont ++ * ++ * Xenomai is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#ifndef _COBALT_RTDM_ANALOGY_CONTEXT_H ++#define _COBALT_RTDM_ANALOGY_CONTEXT_H ++ ++#include ++ ++struct a4l_device; ++struct a4l_buffer; ++ ++struct a4l_device_context { ++ /* The adequate device pointer ++ (retrieved thanks to minor at open time) */ ++ struct a4l_device *dev; ++ ++ /* The buffer structure contains everything to transfer data ++ from asynchronous acquisition operations on a specific ++ subdevice */ ++ struct a4l_buffer *buffer; ++}; ++ ++static inline int a4l_get_minor(struct a4l_device_context *cxt) ++{ ++ /* Get a pointer on the container structure */ ++ struct rtdm_fd *fd = rtdm_private_to_fd(cxt); ++ /* Get the minor index */ ++ return rtdm_fd_minor(fd); ++} ++ ++#endif /* !_COBALT_RTDM_ANALOGY_CONTEXT_H */ +--- linux/include/xenomai/rtdm/analogy/driver.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/rtdm/analogy/driver.h 2021-04-07 16:01:28.287632652 +0800 +@@ -0,0 +1,74 @@ ++/** ++ * @file ++ * Analogy for Linux, driver facilities ++ * ++ * Copyright (C) 1997-2000 David A. Schleef ++ * Copyright (C) 2008 Alexis Berlemont ++ * ++ * Xenomai is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#ifndef _COBALT_RTDM_ANALOGY_DRIVER_H ++#define _COBALT_RTDM_ANALOGY_DRIVER_H ++ ++#include ++#include ++#include ++#include ++ ++struct seq_file; ++struct a4l_link_desc; ++struct a4l_device; ++ ++/** Structure containing driver declaration data. ++ * ++ * @see rt_task_inquire() ++ */ ++/* Analogy driver descriptor */ ++struct a4l_driver { ++ ++ /* List stuff */ ++ struct list_head list; ++ /**< List stuff */ ++ ++ /* Visible description stuff */ ++ struct module *owner; ++ /**< Pointer to module containing the code */ ++ unsigned int flags; ++ /**< Type / status driver's flags */ ++ char *board_name; ++ /**< Board name */ ++ char *driver_name; ++ /**< driver name */ ++ int privdata_size; ++ /**< Size of the driver's private data */ ++ ++ /* Init/destroy procedures */ ++ int (*attach) (struct a4l_device *, struct a4l_link_desc *); ++ /**< Attach procedure */ ++ int (*detach) (struct a4l_device *); ++ /**< Detach procedure */ ++ ++}; ++ ++/* Driver list related functions */ ++ ++int a4l_register_drv(struct a4l_driver * drv); ++int a4l_unregister_drv(struct a4l_driver * drv); ++int a4l_lct_drv(char *pin, struct a4l_driver ** pio); ++#ifdef CONFIG_PROC_FS ++int a4l_rdproc_drvs(struct seq_file *p, void *data); ++#endif /* CONFIG_PROC_FS */ ++ ++#endif /* !_COBALT_RTDM_ANALOGY_DRIVER_H */ +--- linux/include/xenomai/rtdm/analogy/command.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/rtdm/analogy/command.h 2021-04-07 16:01:28.282632659 +0800 +@@ -0,0 +1,35 @@ ++/** ++ * Copyright (C) 2008 Alexis Berlemont ++ * ++ * Xenomai is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#ifndef _COBALT_RTDM_ANALOGY_COMMAND_H ++#define _COBALT_RTDM_ANALOGY_COMMAND_H ++ ++#include ++#include ++ ++#define CR_CHAN(a) CHAN(a) ++#define CR_RNG(a) (((a)>>16)&0xff) ++#define CR_AREF(a) (((a)>>24)&0xf) ++ ++/* --- Command related function --- */ ++void a4l_free_cmddesc(struct a4l_cmd_desc * desc); ++ ++/* --- Upper layer functions --- */ ++int a4l_check_cmddesc(struct a4l_device_context * cxt, struct a4l_cmd_desc * desc); ++int a4l_ioctl_cmd(struct a4l_device_context * cxt, void *arg); ++ ++#endif /* !_COBALT_RTDM_ANALOGY_COMMAND_H */ +--- linux/include/xenomai/rtdm/analogy/subdevice.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/rtdm/analogy/subdevice.h 2021-04-07 16:01:28.278632664 +0800 +@@ -0,0 +1,118 @@ ++/** ++ * @file ++ * Analogy for Linux, subdevice related features ++ * ++ * Copyright (C) 1997-2000 David A. Schleef ++ * Copyright (C) 2008 Alexis Berlemont ++ * ++ * Xenomai is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#ifndef _COBALT_RTDM_ANALOGY_SUBDEVICE_H ++#define _COBALT_RTDM_ANALOGY_SUBDEVICE_H ++ ++#include ++#include ++#include ++#include ++ ++/* --- Subdevice descriptor structure --- */ ++ ++struct a4l_device; ++struct a4l_buffer; ++ ++/*! ++ * @brief Structure describing the subdevice ++ * @see a4l_add_subd() ++ */ ++ ++struct a4l_subdevice { ++ ++ struct list_head list; ++ /**< List stuff */ ++ ++ struct a4l_device *dev; ++ /**< Containing device */ ++ ++ unsigned int idx; ++ /**< Subdevice index */ ++ ++ struct a4l_buffer *buf; ++ /**< Linked buffer */ ++ ++ /* Subdevice's status (busy, linked?) */ ++ unsigned long status; ++ /**< Subdevice's status */ ++ ++ /* Descriptors stuff */ ++ unsigned long flags; ++ /**< Type flags */ ++ struct a4l_channels_desc *chan_desc; ++ /**< Tab of channels descriptors pointers */ ++ struct a4l_rngdesc *rng_desc; ++ /**< Tab of ranges descriptors pointers */ ++ struct a4l_cmd_desc *cmd_mask; ++ /**< Command capabilities mask */ ++ ++ /* Functions stuff */ ++ int (*insn_read) (struct a4l_subdevice *, struct a4l_kernel_instruction *); ++ /**< Callback for the instruction "read" */ ++ int (*insn_write) (struct a4l_subdevice *, struct a4l_kernel_instruction *); ++ /**< Callback for the instruction "write" */ ++ int (*insn_bits) (struct a4l_subdevice *, struct a4l_kernel_instruction *); ++ /**< Callback for the instruction "bits" */ ++ int (*insn_config) (struct a4l_subdevice *, struct a4l_kernel_instruction *); ++ /**< Callback for the configuration instruction */ ++ int (*do_cmd) (struct a4l_subdevice *, struct a4l_cmd_desc *); ++ /**< Callback for command handling */ ++ int (*do_cmdtest) (struct a4l_subdevice *, struct a4l_cmd_desc *); ++ /**< Callback for command checking */ ++ void (*cancel) (struct a4l_subdevice *); ++ /**< Callback for asynchronous transfer cancellation */ ++ void (*munge) (struct a4l_subdevice *, void *, unsigned long); ++ /**< Callback for munge operation */ ++ int (*trigger) (struct a4l_subdevice *, lsampl_t); ++ /**< Callback for trigger operation */ ++ ++ char priv[0]; ++ /**< Private data */ ++}; ++ ++/* --- Subdevice related functions and macros --- */ ++ ++struct a4l_channel *a4l_get_chfeat(struct a4l_subdevice * sb, int idx); ++struct a4l_range *a4l_get_rngfeat(struct a4l_subdevice * sb, int chidx, int rngidx); ++int a4l_check_chanlist(struct a4l_subdevice * subd, ++ unsigned char nb_chan, unsigned int *chans); ++ ++#define a4l_subd_is_input(x) ((A4L_SUBD_MASK_READ & (x)->flags) != 0) ++/* The following macro considers that a DIO subdevice is firstly an ++ output subdevice */ ++#define a4l_subd_is_output(x) \ ++ ((A4L_SUBD_MASK_WRITE & (x)->flags) != 0 || \ ++ (A4L_SUBD_DIO & (x)->flags) != 0) ++ ++/* --- Upper layer functions --- */ ++ ++struct a4l_subdevice * a4l_get_subd(struct a4l_device *dev, int idx); ++struct a4l_subdevice * a4l_alloc_subd(int sizeof_priv, ++ void (*setup)(struct a4l_subdevice *)); ++int a4l_add_subd(struct a4l_device *dev, struct a4l_subdevice * subd); ++int a4l_ioctl_subdinfo(struct a4l_device_context * cxt, void *arg); ++int a4l_ioctl_chaninfo(struct a4l_device_context * cxt, void *arg); ++int a4l_ioctl_rnginfo(struct a4l_device_context * cxt, void *arg); ++int a4l_ioctl_nbchaninfo(struct a4l_device_context * cxt, void *arg); ++int a4l_ioctl_nbrnginfo(struct a4l_device_context * cxt, void *arg); ++ ++#endif /* !_COBALT_RTDM_ANALOGY_SUBDEVICE_H */ +--- linux/include/xenomai/rtdm/analogy/transfer.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/rtdm/analogy/transfer.h 2021-04-07 16:01:28.273632672 +0800 +@@ -0,0 +1,78 @@ ++/* ++ * Analogy for Linux, transfer related features ++ * ++ * Copyright (C) 1997-2000 David A. Schleef ++ * Copyright (C) 2008 Alexis Berlemont ++ * ++ * Xenomai is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#ifndef _COBALT_RTDM_ANALOGY_TRANSFER_H ++#define _COBALT_RTDM_ANALOGY_TRANSFER_H ++ ++#include ++ ++/* IRQ types */ ++#define A4L_IRQ_DISABLED 0 ++ ++/* Fields init values */ ++#define A4L_IRQ_UNUSED (unsigned int)((unsigned short)(~0)) ++#define A4L_IDX_UNUSED (unsigned int)(~0) ++ ++/* TODO: IRQ handling must leave transfer for os_facilities */ ++ ++struct a4l_device; ++/* Analogy transfer descriptor */ ++struct a4l_transfer { ++ ++ /* Subdevices desc */ ++ unsigned int nb_subd; ++ struct a4l_subdevice **subds; ++ ++ /* Buffer stuff: the default size */ ++ unsigned int default_bufsize; ++ ++ /* IRQ in use */ ++ /* TODO: irq_desc should vanish */ ++ struct a4l_irq_descriptor irq_desc; ++}; ++ ++/* --- Proc function --- */ ++ ++int a4l_rdproc_transfer(struct seq_file *p, void *data); ++ ++/* --- Upper layer functions --- */ ++ ++void a4l_presetup_transfer(struct a4l_device_context * cxt); ++int a4l_setup_transfer(struct a4l_device_context * cxt); ++int a4l_precleanup_transfer(struct a4l_device_context * cxt); ++int a4l_cleanup_transfer(struct a4l_device_context * cxt); ++int a4l_reserve_transfer(struct a4l_device_context * cxt, int idx_subd); ++int a4l_init_transfer(struct a4l_device_context * cxt, struct a4l_cmd_desc * cmd); ++int a4l_cancel_transfer(struct a4l_device_context * cxt, int idx_subd); ++int a4l_cancel_transfers(struct a4l_device_context * cxt); ++ ++ssize_t a4l_put(struct a4l_device_context * cxt, void *buf, size_t nbytes); ++ssize_t a4l_get(struct a4l_device_context * cxt, void *buf, size_t nbytes); ++ ++int a4l_request_irq(struct a4l_device *dev, ++ unsigned int irq, ++ a4l_irq_hdlr_t handler, ++ unsigned long flags, void *cookie); ++int a4l_free_irq(struct a4l_device *dev, unsigned int irq); ++unsigned int a4l_get_irq(struct a4l_device *dev); ++ ++int a4l_ioctl_cancel(struct a4l_device_context * cxt, void *arg); ++ ++#endif /* !_COBALT_RTDM_ANALOGY_TRANSFER_H */ +--- linux/include/xenomai/rtdm/analogy/rtdm_helpers.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/rtdm/analogy/rtdm_helpers.h 2021-04-07 16:01:28.268632679 +0800 +@@ -0,0 +1,143 @@ ++/* ++ * Analogy for Linux, Operation system facilities ++ * ++ * Copyright (C) 1997-2000 David A. Schleef ++ * Copyright (C) 2008 Alexis Berlemont ++ * ++ * Xenomai is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#ifndef _COBALT_RTDM_ANALOGY_RTDM_HELPERS_H ++#define _COBALT_RTDM_ANALOGY_RTDM_HELPERS_H ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* --- Trace section --- */ ++#define A4L_PROMPT "Analogy: " ++ ++#define RTDM_SUBCLASS_ANALOGY 0 ++ ++#define __a4l_err(fmt, args...) rtdm_printk(KERN_ERR A4L_PROMPT fmt, ##args) ++#define __a4l_warn(fmt, args...) rtdm_printk(KERN_WARNING A4L_PROMPT fmt, ##args) ++ ++#ifdef CONFIG_XENO_DRIVERS_ANALOGY_DEBUG_FTRACE ++#define __a4l_info(fmt, args...) trace_printk(fmt, ##args) ++#else ++#define __a4l_info(fmt, args...) \ ++ rtdm_printk(KERN_INFO A4L_PROMPT "%s: " fmt, __FUNCTION__, ##args) ++#endif ++ ++#ifdef CONFIG_XENO_DRIVERS_ANALOGY_DEBUG ++#ifdef CONFIG_XENO_DRIVERS_ANALOGY_DEBUG_FTRACE ++#define __a4l_dbg(level, debug, fmt, args...) \ ++ do { \ ++ if ((debug) >= (level)) \ ++ trace_printk(fmt, ##args); \ ++ } while (0) ++#else ++#define __a4l_dbg(level, debug, fmt, args...) \ ++ do { \ ++ if ((debug) >= (level)) \ ++ rtdm_printk(KERN_DEBUG A4L_PROMPT "%s: " fmt, __FUNCTION__ , ##args); \ ++ } while (0) ++#endif ++ ++#define core_dbg CONFIG_XENO_DRIVERS_ANALOGY_DEBUG_LEVEL ++#define drv_dbg CONFIG_XENO_DRIVERS_ANALOGY_DRIVER_DEBUG_LEVEL ++ ++#else /* !CONFIG_XENO_DRIVERS_ANALOGY_DEBUG */ ++ ++#define __a4l_dbg(level, debug, fmt, args...) ++ ++#endif /* CONFIG_XENO_DRIVERS_ANALOGY_DEBUG */ ++ ++#define __a4l_dev_name(dev) \ ++ (dev->driver == NULL) ? "unattached dev" : dev->driver->board_name ++ ++#define a4l_err(dev, fmt, args...) \ ++ __a4l_err("%s: " fmt, __a4l_dev_name(dev), ##args) ++ ++#define a4l_warn(dev, fmt, args...) \ ++ __a4l_warn("%s: " fmt, __a4l_dev_name(dev), ##args) ++ ++#define a4l_info(dev, fmt, args...) \ ++ __a4l_info("%s: " fmt, __a4l_dev_name(dev), ##args) ++ ++#define a4l_dbg(level, debug, dev, fmt, args...) \ ++ __a4l_dbg(level, debug, "%s: " fmt, __a4l_dev_name(dev), ##args) ++ ++ ++/* --- Time section --- */ ++static inline void a4l_udelay(unsigned int us) ++{ ++ rtdm_task_busy_sleep(((nanosecs_rel_t) us) * 1000); ++} ++ ++/* Function which gives absolute time */ ++nanosecs_abs_t a4l_get_time(void); ++ ++/* Function for setting up the absolute time recovery */ ++void a4l_init_time(void); ++ ++/* --- IRQ section --- */ ++#define A4L_IRQ_DISABLED 0 ++ ++typedef int (*a4l_irq_hdlr_t) (unsigned int irq, void *d); ++ ++struct a4l_irq_descriptor { ++ /* These fields are useful to launch the IRQ trampoline; ++ that is the reason why a structure has been defined */ ++ a4l_irq_hdlr_t handler; ++ unsigned int irq; ++ void *cookie; ++ rtdm_irq_t rtdm_desc; ++}; ++ ++int __a4l_request_irq(struct a4l_irq_descriptor * dsc, ++ unsigned int irq, ++ a4l_irq_hdlr_t handler, ++ unsigned long flags, void *cookie); ++int __a4l_free_irq(struct a4l_irq_descriptor * dsc); ++ ++/* --- Synchronization section --- */ ++#define __NRT_WAITER 1 ++#define __RT_WAITER 2 ++#define __EVT_PDING 3 ++ ++struct a4l_sync { ++ unsigned long status; ++ rtdm_event_t rtdm_evt; ++ rtdm_nrtsig_t nrt_sig; ++ wait_queue_head_t wq; ++}; ++ ++#define a4l_select_sync(snc, slr, type, fd) \ ++ rtdm_event_select(&((snc)->rtdm_evt), slr, type, fd) ++ ++int a4l_init_sync(struct a4l_sync * snc); ++void a4l_cleanup_sync(struct a4l_sync * snc); ++void a4l_flush_sync(struct a4l_sync * snc); ++int a4l_wait_sync(struct a4l_sync * snc, int rt); ++int a4l_timedwait_sync(struct a4l_sync * snc, ++ int rt, unsigned long long ns_timeout); ++void a4l_signal_sync(struct a4l_sync * snc); ++ ++#endif /* !_COBALT_RTDM_ANALOGY_RTDM_HELPERS_H */ +--- linux/include/xenomai/rtdm/gpio.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/rtdm/gpio.h 2021-04-07 16:01:28.264632685 +0800 +@@ -0,0 +1,77 @@ ++/** ++ * Copyright (C) 2016 Philippe Gerum ++ * ++ * Xenomai is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#ifndef _COBALT_RTDM_GPIO_H ++#define _COBALT_RTDM_GPIO_H ++ ++#include ++#include ++#include ++ ++struct class; ++struct device_node; ++struct gpio_desc; ++ ++struct rtdm_gpio_pin { ++ struct rtdm_device dev; ++ struct list_head next; ++ rtdm_irq_t irqh; ++ rtdm_event_t event; ++ char *name; ++ struct gpio_desc *desc; ++ nanosecs_abs_t timestamp; ++}; ++ ++struct rtdm_gpio_chip { ++ struct gpio_chip *gc; ++ struct rtdm_driver driver; ++ struct class *devclass; ++ struct list_head next; ++ rtdm_lock_t lock; ++ struct rtdm_gpio_pin pins[0]; ++}; ++ ++int rtdm_gpiochip_add(struct rtdm_gpio_chip *rgc, ++ struct gpio_chip *gc, ++ int gpio_subclass); ++ ++struct rtdm_gpio_chip * ++rtdm_gpiochip_alloc(struct gpio_chip *gc, ++ int gpio_subclass); ++ ++void rtdm_gpiochip_remove(struct rtdm_gpio_chip *rgc); ++ ++int rtdm_gpiochip_add_by_name(struct rtdm_gpio_chip *rgc, ++ const char *label, int gpio_subclass); ++ ++int rtdm_gpiochip_post_event(struct rtdm_gpio_chip *rgc, ++ unsigned int offset); ++ ++#ifdef CONFIG_OF ++ ++int rtdm_gpiochip_scan_of(struct device_node *from, ++ const char *compat, int type); ++ ++int rtdm_gpiochip_scan_array_of(struct device_node *from, ++ const char *compat[], ++ int nentries, int type); ++ ++void rtdm_gpiochip_remove_of(int type); ++ ++#endif ++ ++#endif /* !_COBALT_RTDM_GPIO_H */ +--- linux/include/xenomai/rtdm/driver.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/rtdm/driver.h 2021-04-07 16:01:28.259632692 +0800 +@@ -0,0 +1,1342 @@ ++/** ++ * @file ++ * Real-Time Driver Model for Xenomai, driver API header ++ * ++ * Copyright (C) 2005-2007 Jan Kiszka ++ * Copyright (C) 2005 Joerg Langenberg ++ * Copyright (C) 2008 Gilles Chanteperdrix ++ * Copyright (C) 2014 Philippe Gerum ++ * ++ * Xenomai is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ * ++ * @ingroup driverapi ++ */ ++#ifndef _COBALT_RTDM_DRIVER_H ++#define _COBALT_RTDM_DRIVER_H ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* debug support */ ++#include ++#include ++#ifdef CONFIG_PCI ++#include ++#endif /* CONFIG_PCI */ ++#include ++ ++struct class; ++typedef struct xnselector rtdm_selector_t; ++enum rtdm_selecttype; ++ ++/*! ++ * @addtogroup rtdm_device_register ++ * @{ ++ */ ++ ++/*! ++ * @anchor dev_flags @name Device Flags ++ * Static flags describing a RTDM device ++ * @{ ++ */ ++/** If set, only a single instance of the device can be requested by an ++ * application. */ ++#define RTDM_EXCLUSIVE 0x0001 ++ ++/** ++ * Use fixed minor provided in the rtdm_device description for ++ * registering. If this flag is absent, the RTDM core assigns minor ++ * numbers to devices managed by a driver in order of registration. ++ */ ++#define RTDM_FIXED_MINOR 0x0002 ++ ++/** If set, the device is addressed via a clear-text name. */ ++#define RTDM_NAMED_DEVICE 0x0010 ++ ++/** If set, the device is addressed via a combination of protocol ID and ++ * socket type. */ ++#define RTDM_PROTOCOL_DEVICE 0x0020 ++ ++/** Mask selecting the device type. */ ++#define RTDM_DEVICE_TYPE_MASK 0x00F0 ++ ++/** Flag indicating a secure variant of RTDM (not supported here) */ ++#define RTDM_SECURE_DEVICE 0x80000000 ++/** @} Device Flags */ ++ ++/** Maximum number of named devices per driver. */ ++#define RTDM_MAX_MINOR 4096 ++ ++/** @} rtdm_device_register */ ++ ++/*! ++ * @addtogroup rtdm_sync ++ * @{ ++ */ ++ ++/*! ++ * @anchor RTDM_SELECTTYPE_xxx @name RTDM_SELECTTYPE_xxx ++ * Event types select can bind to ++ * @{ ++ */ ++enum rtdm_selecttype { ++ /** Select input data availability events */ ++ RTDM_SELECTTYPE_READ = XNSELECT_READ, ++ ++ /** Select ouput buffer availability events */ ++ RTDM_SELECTTYPE_WRITE = XNSELECT_WRITE, ++ ++ /** Select exceptional events */ ++ RTDM_SELECTTYPE_EXCEPT = XNSELECT_EXCEPT ++}; ++/** @} RTDM_SELECTTYPE_xxx */ ++ ++/** @} rtdm_sync */ ++ ++/** ++ * @brief Device context ++ * ++ * A device context structure is associated with every open device instance. ++ * RTDM takes care of its creation and destruction and passes it to the ++ * operation handlers when being invoked. ++ * ++ * Drivers can attach arbitrary data immediately after the official ++ * structure. The size of this data is provided via ++ * rtdm_driver.context_size during device registration. ++ */ ++struct rtdm_dev_context { ++ struct rtdm_fd fd; ++ ++ /** Set of active device operation handlers */ ++ /** Reference to owning device */ ++ struct rtdm_device *device; ++ ++ /** Begin of driver defined context data structure */ ++ char dev_private[0]; ++}; ++ ++static inline struct rtdm_dev_context *rtdm_fd_to_context(struct rtdm_fd *fd) ++{ ++ return container_of(fd, struct rtdm_dev_context, fd); ++} ++ ++/** ++ * Locate the driver private area associated to a device context structure ++ * ++ * @param[in] fd File descriptor structure associated with opened ++ * device instance ++ * ++ * @return The address of the private driver area associated to @a ++ * file descriptor. ++ */ ++static inline void *rtdm_fd_to_private(struct rtdm_fd *fd) ++{ ++ return &rtdm_fd_to_context(fd)->dev_private[0]; ++} ++ ++/** ++ * Locate a device file descriptor structure from its driver private area ++ * ++ * @param[in] dev_private Address of a private context area ++ * ++ * @return The address of the file descriptor structure defining @a ++ * dev_private. ++ */ ++static inline struct rtdm_fd *rtdm_private_to_fd(void *dev_private) ++{ ++ struct rtdm_dev_context *ctx; ++ ctx = container_of(dev_private, struct rtdm_dev_context, dev_private); ++ return &ctx->fd; ++} ++ ++/** ++ * Tell whether the passed file descriptor belongs to an application. ++ * ++ * @param[in] fd File descriptor ++ * ++ * @return true if passed file descriptor belongs to an application, ++ * false otherwise. ++ */ ++static inline bool rtdm_fd_is_user(struct rtdm_fd *fd) ++{ ++ return rtdm_fd_owner(fd) != &cobalt_kernel_ppd; ++} ++ ++/** ++ * Locate a device structure from a file descriptor. ++ * ++ * @param[in] fd File descriptor ++ * ++ * @return The address of the device structure to which this file ++ * descriptor is attached. ++ */ ++static inline struct rtdm_device *rtdm_fd_device(struct rtdm_fd *fd) ++{ ++ return rtdm_fd_to_context(fd)->device; ++} ++ ++/** ++ * @brief RTDM profile information ++ * ++ * This descriptor details the profile information associated to a ++ * RTDM class of device managed by a driver. ++ * ++ * @anchor rtdm_profile_info ++ */ ++struct rtdm_profile_info { ++ /** Device class name */ ++ const char *name; ++ /** Device class ID, see @ref RTDM_CLASS_xxx */ ++ int class_id; ++ /** Device sub-class, see RTDM_SUBCLASS_xxx definition in the ++ @ref rtdm_profiles "Device Profiles" */ ++ int subclass_id; ++ /** Supported device profile version */ ++ int version; ++ /** Reserved */ ++ unsigned int magic; ++ struct module *owner; ++ struct class *kdev_class; ++}; ++ ++struct rtdm_driver; ++ ++/** ++ * @brief RTDM state management handlers ++ */ ++struct rtdm_sm_ops { ++ /** Handler called upon transition to COBALT_STATE_WARMUP */ ++ int (*start)(struct rtdm_driver *drv); ++ /** Handler called upon transition to COBALT_STATE_TEARDOWN */ ++ int (*stop)(struct rtdm_driver *drv); ++}; ++ ++/** ++ * @brief RTDM driver ++ * ++ * This descriptor describes a RTDM device driver. The structure holds ++ * runtime data, therefore it must reside in writable memory. ++ */ ++struct rtdm_driver { ++ /** ++ * Class profile information. The RTDM_PROFILE_INFO() macro @b ++ * must be used for filling up this field. ++ * @anchor rtdm_driver_profile ++ */ ++ struct rtdm_profile_info profile_info; ++ /** ++ * Device flags, see @ref dev_flags "Device Flags" for details ++ * @anchor rtdm_driver_flags ++ */ ++ int device_flags; ++ /** ++ * Size of the private memory area the core should ++ * automatically allocate for each open file descriptor, which ++ * is usable for storing the context data associated to each ++ * connection. The allocated memory is zero-initialized. The ++ * start of this area can be retrieved by a call to ++ * rtdm_fd_to_private(). ++ */ ++ size_t context_size; ++ /** Protocol device identification: protocol family (PF_xxx) */ ++ int protocol_family; ++ /** Protocol device identification: socket type (SOCK_xxx) */ ++ int socket_type; ++ /** I/O operation handlers */ ++ struct rtdm_fd_ops ops; ++ /** State management handlers */ ++ struct rtdm_sm_ops smops; ++ /** ++ * Count of devices this driver manages. This value is used to ++ * allocate a chrdev region for named devices. ++ */ ++ int device_count; ++ /** Base minor for named devices. */ ++ int base_minor; ++ /** Reserved area */ ++ struct { ++ union { ++ struct { ++ struct cdev cdev; ++ int major; ++ } named; ++ }; ++ atomic_t refcount; ++ struct notifier_block nb_statechange; ++ DECLARE_BITMAP(minor_map, RTDM_MAX_MINOR); ++ }; ++}; ++ ++#define RTDM_CLASS_MAGIC 0x8284636c ++ ++/** ++ * @brief Initializer for class profile information. ++ * ++ * This macro must be used to fill in the @ref rtdm_profile_info ++ * "class profile information" field from a RTDM driver. ++ * ++ * @param __name Class name (unquoted). ++ * ++ * @param __id Class major identification number ++ * (profile_version.class_id). ++ * ++ * @param __subid Class minor identification number ++ * (profile_version.subclass_id). ++ * ++ * @param __version Profile version number. ++ * ++ * @note See @ref rtdm_profiles "Device Profiles". ++ */ ++#define RTDM_PROFILE_INFO(__name, __id, __subid, __version) \ ++{ \ ++ .name = ( # __name ), \ ++ .class_id = (__id), \ ++ .subclass_id = (__subid), \ ++ .version = (__version), \ ++ .magic = ~RTDM_CLASS_MAGIC, \ ++ .owner = THIS_MODULE, \ ++ .kdev_class = NULL, \ ++} ++ ++int rtdm_drv_set_sysclass(struct rtdm_driver *drv, struct class *cls); ++ ++/** ++ * @brief RTDM device ++ * ++ * This descriptor describes a RTDM device instance. The structure ++ * holds runtime data, therefore it must reside in writable memory. ++ */ ++struct rtdm_device { ++ /** Device driver. */ ++ struct rtdm_driver *driver; ++ /** Driver definable device data */ ++ void *device_data; ++ /** ++ * Device label template for composing the device name. A ++ * limited printf-like format string is assumed, with a ++ * provision for replacing the first %d/%i placeholder found ++ * in the string by the device minor number. It is up to the ++ * driver to actually mention this placeholder or not, ++ * depending on the naming convention for its devices. For ++ * named devices, the corresponding device node will ++ * automatically appear in the /dev/rtdm hierachy with ++ * hotplug-enabled device filesystems (DEVTMPFS). ++ */ ++ const char *label; ++ /** ++ * Minor number of the device. If RTDM_FIXED_MINOR is present ++ * in the driver flags, the value stored in this field is used ++ * verbatim by rtdm_dev_register(). Otherwise, the RTDM core ++ * automatically assigns minor numbers to all devices managed ++ * by the driver referred to by @a driver, in order of ++ * registration, storing the resulting values into this field. ++ * ++ * Device nodes created for named devices in the Linux /dev ++ * hierarchy are assigned this minor number. ++ * ++ * The minor number of the current device handling an I/O ++ * request can be retreived by a call to rtdm_fd_minor(). ++ */ ++ int minor; ++ /** Reserved area. */ ++ struct { ++ unsigned int magic; ++ char *name; ++ union { ++ struct { ++ xnhandle_t handle; ++ } named; ++ struct { ++ struct xnid id; ++ } proto; ++ }; ++ dev_t rdev; ++ struct device *kdev; ++ struct class *kdev_class; ++ atomic_t refcount; ++ struct rtdm_fd_ops ops; ++ wait_queue_head_t putwq; ++ struct list_head openfd_list; ++ }; ++}; ++ ++/* --- device registration --- */ ++ ++int rtdm_dev_register(struct rtdm_device *device); ++ ++void rtdm_dev_unregister(struct rtdm_device *device); ++ ++#ifndef DOXYGEN_CPP /* Avoid static inline tags for RTDM in doxygen */ ++ ++static inline struct device *rtdm_dev_to_kdev(struct rtdm_device *device) ++{ ++ return device->kdev; ++} ++ ++/* --- clock services --- */ ++static inline nanosecs_abs_t rtdm_clock_read(void) ++{ ++ return xnclock_read_realtime(&nkclock); ++} ++ ++static inline nanosecs_abs_t rtdm_clock_read_monotonic(void) ++{ ++ return xnclock_read_monotonic(&nkclock); ++} ++#endif /* !DOXYGEN_CPP */ ++ ++/* --- timeout sequences */ ++ ++typedef nanosecs_abs_t rtdm_toseq_t; ++ ++void rtdm_toseq_init(rtdm_toseq_t *timeout_seq, nanosecs_rel_t timeout); ++ ++/*! ++ * @addtogroup rtdm_sync ++ * @{ ++ */ ++ ++/*! ++ * @defgroup rtdm_sync_biglock Big dual kernel lock ++ * @{ ++ */ ++ ++/** ++ * @brief Enter atomic section (dual kernel only) ++ * ++ * This call opens a fully atomic section, serializing execution with ++ * respect to all interrupt handlers (including for real-time IRQs) ++ * and Xenomai threads running on all CPUs. ++ * ++ * @param __context name of local variable to store the context ++ * in. This variable updated by the real-time core will hold the ++ * information required to leave the atomic section properly. ++ * ++ * @note Atomic sections may be nested. The caller is allowed to sleep ++ * on a blocking Xenomai service from primary mode within an atomic ++ * section delimited by cobalt_atomic_enter/cobalt_atomic_leave calls. ++ * On the contrary, sleeping on a regular Linux kernel service while ++ * holding such lock is NOT valid. ++ * ++ * @note Since the strongest lock is acquired by this service, it can ++ * be used to synchronize real-time and non-real-time contexts. ++ * ++ * @warning This service is not portable to the Mercury core, and ++ * should be restricted to Cobalt-specific use cases, mainly for the ++ * purpose of porting existing dual-kernel drivers which still depend ++ * on the obsolete RTDM_EXECUTE_ATOMICALLY() construct. ++ */ ++#define cobalt_atomic_enter(__context) \ ++ do { \ ++ xnlock_get_irqsave(&nklock, (__context)); \ ++ xnsched_lock(); \ ++ } while (0) ++ ++/** ++ * @brief Leave atomic section (dual kernel only) ++ * ++ * This call closes an atomic section previously opened by a call to ++ * cobalt_atomic_enter(), restoring the preemption and interrupt state ++ * which prevailed prior to entering the exited section. ++ * ++ * @param __context name of local variable which stored the context. ++ * ++ * @warning This service is not portable to the Mercury core, and ++ * should be restricted to Cobalt-specific use cases. ++ */ ++#define cobalt_atomic_leave(__context) \ ++ do { \ ++ xnsched_unlock(); \ ++ xnlock_put_irqrestore(&nklock, (__context)); \ ++ } while (0) ++ ++/** ++ * @brief Execute code block atomically (DEPRECATED) ++ * ++ * Generally, it is illegal to suspend the current task by calling ++ * rtdm_task_sleep(), rtdm_event_wait(), etc. while holding a spinlock. In ++ * contrast, this macro allows to combine several operations including ++ * a potentially rescheduling call to an atomic code block with respect to ++ * other RTDM_EXECUTE_ATOMICALLY() blocks. The macro is a light-weight ++ * alternative for protecting code blocks via mutexes, and it can even be used ++ * to synchronise real-time and non-real-time contexts. ++ * ++ * @param code_block Commands to be executed atomically ++ * ++ * @note It is not allowed to leave the code block explicitly by using ++ * @c break, @c return, @c goto, etc. This would leave the global lock held ++ * during the code block execution in an inconsistent state. Moreover, do not ++ * embed complex operations into the code bock. Consider that they will be ++ * executed under preemption lock with interrupts switched-off. Also note that ++ * invocation of rescheduling calls may break the atomicity until the task ++ * gains the CPU again. ++ * ++ * @coretags{unrestricted} ++ * ++ * @deprecated This construct will be phased out in Xenomai ++ * 3.0. Please use rtdm_waitqueue services instead. ++ * ++ * @see cobalt_atomic_enter(). ++ */ ++#ifdef DOXYGEN_CPP /* Beautify doxygen output */ ++#define RTDM_EXECUTE_ATOMICALLY(code_block) \ ++{ \ ++ \ ++ code_block; \ ++ \ ++} ++#else /* This is how it really works */ ++static inline __attribute__((deprecated)) void ++rtdm_execute_atomically(void) { } ++ ++#define RTDM_EXECUTE_ATOMICALLY(code_block) \ ++{ \ ++ spl_t __rtdm_s; \ ++ \ ++ rtdm_execute_atomically(); \ ++ xnlock_get_irqsave(&nklock, __rtdm_s); \ ++ xnsched_lock(); \ ++ code_block; \ ++ xnsched_unlock(); \ ++ xnlock_put_irqrestore(&nklock, __rtdm_s); \ ++} ++#endif ++ ++/** @} Big dual kernel lock */ ++ ++/** ++ * @defgroup rtdm_sync_spinlock Spinlock with preemption deactivation ++ * @{ ++ */ ++ ++/** ++ * Static lock initialisation ++ */ ++#define RTDM_LOCK_UNLOCKED(__name) IPIPE_SPIN_LOCK_UNLOCKED ++ ++#define DEFINE_RTDM_LOCK(__name) \ ++ rtdm_lock_t __name = RTDM_LOCK_UNLOCKED(__name) ++ ++/** Lock variable */ ++typedef ipipe_spinlock_t rtdm_lock_t; ++ ++/** Variable to save the context while holding a lock */ ++typedef unsigned long rtdm_lockctx_t; ++ ++/** ++ * Dynamic lock initialisation ++ * ++ * @param lock Address of lock variable ++ * ++ * @coretags{task-unrestricted} ++ */ ++static inline void rtdm_lock_init(rtdm_lock_t *lock) ++{ ++ raw_spin_lock_init(lock); ++} ++ ++/** ++ * Acquire lock from non-preemptible contexts ++ * ++ * @param lock Address of lock variable ++ * ++ * @coretags{unrestricted} ++ */ ++static inline void rtdm_lock_get(rtdm_lock_t *lock) ++{ ++ XENO_BUG_ON(COBALT, !spltest()); ++ raw_spin_lock(lock); ++ xnsched_lock(); ++} ++ ++/** ++ * Release lock without preemption restoration ++ * ++ * @param lock Address of lock variable ++ * ++ * @coretags{unrestricted, might-switch} ++ */ ++static inline void rtdm_lock_put(rtdm_lock_t *lock) ++{ ++ raw_spin_unlock(lock); ++ xnsched_unlock(); ++} ++ ++/** ++ * Acquire lock and disable preemption, by stalling the head domain. ++ * ++ * @param __lock Address of lock variable ++ * @param __context name of local variable to store the context in ++ * ++ * @coretags{unrestricted} ++ */ ++#define rtdm_lock_get_irqsave(__lock, __context) \ ++ ((__context) = __rtdm_lock_get_irqsave(__lock)) ++ ++static inline rtdm_lockctx_t __rtdm_lock_get_irqsave(rtdm_lock_t *lock) ++{ ++ rtdm_lockctx_t context; ++ ++ context = ipipe_test_and_stall_head(); ++ raw_spin_lock(lock); ++ xnsched_lock(); ++ ++ return context; ++} ++ ++/** ++ * Release lock and restore preemption state ++ * ++ * @param lock Address of lock variable ++ * @param context name of local variable which stored the context ++ * ++ * @coretags{unrestricted} ++ */ ++static inline ++void rtdm_lock_put_irqrestore(rtdm_lock_t *lock, rtdm_lockctx_t context) ++{ ++ raw_spin_unlock(lock); ++ xnsched_unlock(); ++ ipipe_restore_head(context); ++} ++ ++/** ++ * Disable preemption locally ++ * ++ * @param __context name of local variable to store the context in ++ * ++ * @coretags{unrestricted} ++ */ ++#define rtdm_lock_irqsave(__context) \ ++ splhigh(__context) ++ ++/** ++ * Restore preemption state ++ * ++ * @param __context name of local variable which stored the context ++ * ++ * @coretags{unrestricted} ++ */ ++#define rtdm_lock_irqrestore(__context) \ ++ splexit(__context) ++ ++/** @} Spinlock with Preemption Deactivation */ ++ ++#ifndef DOXYGEN_CPP ++ ++struct rtdm_waitqueue { ++ struct xnsynch wait; ++}; ++typedef struct rtdm_waitqueue rtdm_waitqueue_t; ++ ++#define RTDM_WAITQUEUE_INITIALIZER(__name) { \ ++ .wait = XNSYNCH_WAITQUEUE_INITIALIZER((__name).wait), \ ++ } ++ ++#define DEFINE_RTDM_WAITQUEUE(__name) \ ++ struct rtdm_waitqueue __name = RTDM_WAITQUEUE_INITIALIZER(__name) ++ ++#define DEFINE_RTDM_WAITQUEUE_ONSTACK(__name) \ ++ DEFINE_RTDM_WAITQUEUE(__name) ++ ++static inline void rtdm_waitqueue_init(struct rtdm_waitqueue *wq) ++{ ++ *wq = (struct rtdm_waitqueue)RTDM_WAITQUEUE_INITIALIZER(*wq); ++} ++ ++static inline void rtdm_waitqueue_destroy(struct rtdm_waitqueue *wq) ++{ ++ xnsynch_destroy(&wq->wait); ++} ++ ++static inline int __rtdm_dowait(struct rtdm_waitqueue *wq, ++ nanosecs_rel_t timeout, xntmode_t timeout_mode) ++{ ++ int ret; ++ ++ ret = xnsynch_sleep_on(&wq->wait, timeout, timeout_mode); ++ if (ret & XNBREAK) ++ return -EINTR; ++ if (ret & XNTIMEO) ++ return -ETIMEDOUT; ++ if (ret & XNRMID) ++ return -EIDRM; ++ return 0; ++} ++ ++static inline int __rtdm_timedwait(struct rtdm_waitqueue *wq, ++ nanosecs_rel_t timeout, rtdm_toseq_t *toseq) ++{ ++ if (toseq && timeout > 0) ++ return __rtdm_dowait(wq, *toseq, XN_ABSOLUTE); ++ ++ return __rtdm_dowait(wq, timeout, XN_RELATIVE); ++} ++ ++#define rtdm_timedwait_condition_locked(__wq, __cond, __timeout, __toseq) \ ++ ({ \ ++ int __ret = 0; \ ++ while (__ret == 0 && !(__cond)) \ ++ __ret = __rtdm_timedwait(__wq, __timeout, __toseq); \ ++ __ret; \ ++ }) ++ ++#define rtdm_wait_condition_locked(__wq, __cond) \ ++ ({ \ ++ int __ret = 0; \ ++ while (__ret == 0 && !(__cond)) \ ++ __ret = __rtdm_dowait(__wq, \ ++ XN_INFINITE, XN_RELATIVE); \ ++ __ret; \ ++ }) ++ ++#define rtdm_timedwait_condition(__wq, __cond, __timeout, __toseq) \ ++ ({ \ ++ spl_t __s; \ ++ int __ret; \ ++ xnlock_get_irqsave(&nklock, __s); \ ++ __ret = rtdm_timedwait_condition_locked(__wq, __cond, \ ++ __timeout, __toseq); \ ++ xnlock_put_irqrestore(&nklock, __s); \ ++ __ret; \ ++ }) ++ ++#define rtdm_timedwait(__wq, __timeout, __toseq) \ ++ __rtdm_timedwait(__wq, __timeout, __toseq) ++ ++#define rtdm_timedwait_locked(__wq, __timeout, __toseq) \ ++ rtdm_timedwait(__wq, __timeout, __toseq) ++ ++#define rtdm_wait_condition(__wq, __cond) \ ++ ({ \ ++ spl_t __s; \ ++ int __ret; \ ++ xnlock_get_irqsave(&nklock, __s); \ ++ __ret = rtdm_wait_condition_locked(__wq, __cond); \ ++ xnlock_put_irqrestore(&nklock, __s); \ ++ __ret; \ ++ }) ++ ++#define rtdm_wait(__wq) \ ++ __rtdm_dowait(__wq, XN_INFINITE, XN_RELATIVE) ++ ++#define rtdm_wait_locked(__wq) rtdm_wait(__wq) ++ ++#define rtdm_waitqueue_lock(__wq, __context) cobalt_atomic_enter(__context) ++ ++#define rtdm_waitqueue_unlock(__wq, __context) cobalt_atomic_leave(__context) ++ ++#define rtdm_waitqueue_signal(__wq) \ ++ ({ \ ++ struct xnthread *__waiter; \ ++ __waiter = xnsynch_wakeup_one_sleeper(&(__wq)->wait); \ ++ xnsched_run(); \ ++ __waiter != NULL; \ ++ }) ++ ++#define __rtdm_waitqueue_flush(__wq, __reason) \ ++ ({ \ ++ int __ret; \ ++ __ret = xnsynch_flush(&(__wq)->wait, __reason); \ ++ xnsched_run(); \ ++ __ret == XNSYNCH_RESCHED; \ ++ }) ++ ++#define rtdm_waitqueue_broadcast(__wq) \ ++ __rtdm_waitqueue_flush(__wq, 0) ++ ++#define rtdm_waitqueue_flush(__wq) \ ++ __rtdm_waitqueue_flush(__wq, XNBREAK) ++ ++#define rtdm_waitqueue_wakeup(__wq, __waiter) \ ++ do { \ ++ xnsynch_wakeup_this_sleeper(&(__wq)->wait, __waiter); \ ++ xnsched_run(); \ ++ } while (0) ++ ++#define rtdm_for_each_waiter(__pos, __wq) \ ++ xnsynch_for_each_sleeper(__pos, &(__wq)->wait) ++ ++#define rtdm_for_each_waiter_safe(__pos, __tmp, __wq) \ ++ xnsynch_for_each_sleeper_safe(__pos, __tmp, &(__wq)->wait) ++ ++#endif /* !DOXYGEN_CPP */ ++ ++/** @} rtdm_sync */ ++ ++/* --- Interrupt management services --- */ ++/*! ++ * @addtogroup rtdm_irq ++ * @{ ++ */ ++ ++typedef struct xnintr rtdm_irq_t; ++ ++/*! ++ * @anchor RTDM_IRQTYPE_xxx @name RTDM_IRQTYPE_xxx ++ * Interrupt registrations flags ++ * @{ ++ */ ++/** Enable IRQ-sharing with other real-time drivers */ ++#define RTDM_IRQTYPE_SHARED XN_IRQTYPE_SHARED ++/** Mark IRQ as edge-triggered, relevant for correct handling of shared ++ * edge-triggered IRQs */ ++#define RTDM_IRQTYPE_EDGE XN_IRQTYPE_EDGE ++/** @} RTDM_IRQTYPE_xxx */ ++ ++/** ++ * Interrupt handler ++ * ++ * @param[in] irq_handle IRQ handle as returned by rtdm_irq_request() ++ * ++ * @return 0 or a combination of @ref RTDM_IRQ_xxx flags ++ */ ++typedef int (*rtdm_irq_handler_t)(rtdm_irq_t *irq_handle); ++ ++/*! ++ * @anchor RTDM_IRQ_xxx @name RTDM_IRQ_xxx ++ * Return flags of interrupt handlers ++ * @{ ++ */ ++/** Unhandled interrupt */ ++#define RTDM_IRQ_NONE XN_IRQ_NONE ++/** Denote handled interrupt */ ++#define RTDM_IRQ_HANDLED XN_IRQ_HANDLED ++/** Request interrupt disabling on exit */ ++#define RTDM_IRQ_DISABLE XN_IRQ_DISABLE ++/** @} RTDM_IRQ_xxx */ ++ ++/** ++ * Retrieve IRQ handler argument ++ * ++ * @param irq_handle IRQ handle ++ * @param type Type of the pointer to return ++ * ++ * @return The argument pointer registered on rtdm_irq_request() is returned, ++ * type-casted to the specified @a type. ++ * ++ * @coretags{unrestricted} ++ */ ++#define rtdm_irq_get_arg(irq_handle, type) ((type *)irq_handle->cookie) ++/** @} rtdm_irq */ ++ ++int rtdm_irq_request(rtdm_irq_t *irq_handle, unsigned int irq_no, ++ rtdm_irq_handler_t handler, unsigned long flags, ++ const char *device_name, void *arg); ++ ++#ifndef DOXYGEN_CPP /* Avoid static inline tags for RTDM in doxygen */ ++static inline int rtdm_irq_free(rtdm_irq_t *irq_handle) ++{ ++ if (!XENO_ASSERT(COBALT, xnsched_root_p())) ++ return -EPERM; ++ xnintr_detach(irq_handle); ++ return 0; ++} ++ ++static inline int rtdm_irq_enable(rtdm_irq_t *irq_handle) ++{ ++ xnintr_enable(irq_handle); ++ return 0; ++} ++ ++static inline int rtdm_irq_disable(rtdm_irq_t *irq_handle) ++{ ++ xnintr_disable(irq_handle); ++ return 0; ++} ++#endif /* !DOXYGEN_CPP */ ++ ++/* --- non-real-time signalling services --- */ ++ ++/*! ++ * @addtogroup rtdm_nrtsignal ++ * @{ ++ */ ++ ++typedef struct rtdm_nrtsig rtdm_nrtsig_t; ++/** ++ * Non-real-time signal handler ++ * ++ * @param[in] nrt_sig Signal handle pointer as passed to rtdm_nrtsig_init() ++ * @param[in] arg Argument as passed to rtdm_nrtsig_init() ++ * ++ * @note The signal handler will run in soft-IRQ context of the non-real-time ++ * subsystem. Note the implications of this context, e.g. no invocation of ++ * blocking operations. ++ */ ++typedef void (*rtdm_nrtsig_handler_t)(rtdm_nrtsig_t *nrt_sig, void *arg); ++ ++struct rtdm_nrtsig { ++ rtdm_nrtsig_handler_t handler; ++ void *arg; ++}; ++ ++void rtdm_schedule_nrt_work(struct work_struct *lostage_work); ++/** @} rtdm_nrtsignal */ ++ ++#ifndef DOXYGEN_CPP /* Avoid static inline tags for RTDM in doxygen */ ++static inline void rtdm_nrtsig_init(rtdm_nrtsig_t *nrt_sig, ++ rtdm_nrtsig_handler_t handler, void *arg) ++{ ++ nrt_sig->handler = handler; ++ nrt_sig->arg = arg; ++} ++ ++static inline void rtdm_nrtsig_destroy(rtdm_nrtsig_t *nrt_sig) ++{ ++ nrt_sig->handler = NULL; ++ nrt_sig->arg = NULL; ++} ++ ++void rtdm_nrtsig_pend(rtdm_nrtsig_t *nrt_sig); ++#endif /* !DOXYGEN_CPP */ ++ ++/* --- timer services --- */ ++ ++/*! ++ * @addtogroup rtdm_timer ++ * @{ ++ */ ++ ++typedef struct xntimer rtdm_timer_t; ++ ++/** ++ * Timer handler ++ * ++ * @param[in] timer Timer handle as returned by rtdm_timer_init() ++ */ ++typedef void (*rtdm_timer_handler_t)(rtdm_timer_t *timer); ++ ++/*! ++ * @anchor RTDM_TIMERMODE_xxx @name RTDM_TIMERMODE_xxx ++ * Timer operation modes ++ * @{ ++ */ ++enum rtdm_timer_mode { ++ /** Monotonic timer with relative timeout */ ++ RTDM_TIMERMODE_RELATIVE = XN_RELATIVE, ++ ++ /** Monotonic timer with absolute timeout */ ++ RTDM_TIMERMODE_ABSOLUTE = XN_ABSOLUTE, ++ ++ /** Adjustable timer with absolute timeout */ ++ RTDM_TIMERMODE_REALTIME = XN_REALTIME ++}; ++/** @} RTDM_TIMERMODE_xxx */ ++ ++/** @} rtdm_timer */ ++ ++int rtdm_timer_init(rtdm_timer_t *timer, rtdm_timer_handler_t handler, ++ const char *name); ++ ++void rtdm_timer_destroy(rtdm_timer_t *timer); ++ ++int rtdm_timer_start(rtdm_timer_t *timer, nanosecs_abs_t expiry, ++ nanosecs_rel_t interval, enum rtdm_timer_mode mode); ++ ++void rtdm_timer_stop(rtdm_timer_t *timer); ++ ++#ifndef DOXYGEN_CPP /* Avoid static inline tags for RTDM in doxygen */ ++static inline int rtdm_timer_start_in_handler(rtdm_timer_t *timer, ++ nanosecs_abs_t expiry, ++ nanosecs_rel_t interval, ++ enum rtdm_timer_mode mode) ++{ ++ return xntimer_start(timer, expiry, interval, (xntmode_t)mode); ++} ++ ++static inline void rtdm_timer_stop_in_handler(rtdm_timer_t *timer) ++{ ++ xntimer_stop(timer); ++} ++#endif /* !DOXYGEN_CPP */ ++ ++/* --- task services --- */ ++/*! ++ * @addtogroup rtdm_task ++ * @{ ++ */ ++ ++typedef struct xnthread rtdm_task_t; ++ ++/** ++ * Real-time task procedure ++ * ++ * @param[in,out] arg argument as passed to rtdm_task_init() ++ */ ++typedef void (*rtdm_task_proc_t)(void *arg); ++ ++/** ++ * @anchor rtdmtaskprio @name Task Priority Range ++ * Maximum and minimum task priorities ++ * @{ */ ++#define RTDM_TASK_LOWEST_PRIORITY 0 ++#define RTDM_TASK_HIGHEST_PRIORITY 99 ++/** @} Task Priority Range */ ++ ++/** ++ * @anchor rtdmchangetaskprio @name Task Priority Modification ++ * Raise or lower task priorities by one level ++ * @{ */ ++#define RTDM_TASK_RAISE_PRIORITY (+1) ++#define RTDM_TASK_LOWER_PRIORITY (-1) ++/** @} Task Priority Modification */ ++ ++/** @} rtdm_task */ ++ ++int rtdm_task_init(rtdm_task_t *task, const char *name, ++ rtdm_task_proc_t task_proc, void *arg, ++ int priority, nanosecs_rel_t period); ++int __rtdm_task_sleep(xnticks_t timeout, xntmode_t mode); ++void rtdm_task_busy_sleep(nanosecs_rel_t delay); ++ ++#ifndef DOXYGEN_CPP /* Avoid static inline tags for RTDM in doxygen */ ++static inline void rtdm_task_destroy(rtdm_task_t *task) ++{ ++ xnthread_cancel(task); ++ xnthread_join(task, true); ++} ++ ++static inline int rtdm_task_should_stop(void) ++{ ++ return xnthread_test_info(xnthread_current(), XNCANCELD); ++} ++ ++void rtdm_task_join(rtdm_task_t *task); ++ ++static inline void __deprecated rtdm_task_join_nrt(rtdm_task_t *task, ++ unsigned int poll_delay) ++{ ++ rtdm_task_join(task); ++} ++ ++static inline void rtdm_task_set_priority(rtdm_task_t *task, int priority) ++{ ++ union xnsched_policy_param param = { .rt = { .prio = priority } }; ++ spl_t s; ++ ++ splhigh(s); ++ xnthread_set_schedparam(task, &xnsched_class_rt, ¶m); ++ xnsched_run(); ++ splexit(s); ++} ++ ++static inline int rtdm_task_set_period(rtdm_task_t *task, ++ nanosecs_abs_t start_date, ++ nanosecs_rel_t period) ++{ ++ if (period < 0) ++ period = 0; ++ if (start_date == 0) ++ start_date = XN_INFINITE; ++ ++ return xnthread_set_periodic(task, start_date, XN_ABSOLUTE, period); ++} ++ ++static inline int rtdm_task_unblock(rtdm_task_t *task) ++{ ++ spl_t s; ++ int res; ++ ++ splhigh(s); ++ res = xnthread_unblock(task); ++ xnsched_run(); ++ splexit(s); ++ ++ return res; ++} ++ ++static inline rtdm_task_t *rtdm_task_current(void) ++{ ++ return xnthread_current(); ++} ++ ++static inline int rtdm_task_wait_period(unsigned long *overruns_r) ++{ ++ if (!XENO_ASSERT(COBALT, !xnsched_unblockable_p())) ++ return -EPERM; ++ return xnthread_wait_period(overruns_r); ++} ++ ++static inline int rtdm_task_sleep(nanosecs_rel_t delay) ++{ ++ return __rtdm_task_sleep(delay, XN_RELATIVE); ++} ++ ++static inline int ++rtdm_task_sleep_abs(nanosecs_abs_t wakeup_date, enum rtdm_timer_mode mode) ++{ ++ /* For the sake of a consistent API usage... */ ++ if (mode != RTDM_TIMERMODE_ABSOLUTE && mode != RTDM_TIMERMODE_REALTIME) ++ return -EINVAL; ++ return __rtdm_task_sleep(wakeup_date, (xntmode_t)mode); ++} ++ ++/* rtdm_task_sleep_abs shall be used instead */ ++static inline int __deprecated rtdm_task_sleep_until(nanosecs_abs_t wakeup_time) ++{ ++ return __rtdm_task_sleep(wakeup_time, XN_REALTIME); ++} ++ ++#define rtdm_task_busy_wait(__condition, __spin_ns, __sleep_ns) \ ++ ({ \ ++ __label__ done; \ ++ nanosecs_abs_t __end; \ ++ int __ret = 0; \ ++ for (;;) { \ ++ __end = rtdm_clock_read_monotonic() + __spin_ns; \ ++ for (;;) { \ ++ if (__condition) \ ++ goto done; \ ++ if (rtdm_clock_read_monotonic() >= __end) \ ++ break; \ ++ } \ ++ __ret = rtdm_task_sleep(__sleep_ns); \ ++ if (__ret) \ ++ break; \ ++ } \ ++ done: \ ++ __ret; \ ++ }) ++ ++#define rtdm_wait_context xnthread_wait_context ++ ++static inline ++void rtdm_wait_complete(struct rtdm_wait_context *wc) ++{ ++ xnthread_complete_wait(wc); ++} ++ ++static inline ++int rtdm_wait_is_completed(struct rtdm_wait_context *wc) ++{ ++ return xnthread_wait_complete_p(wc); ++} ++ ++static inline void rtdm_wait_prepare(struct rtdm_wait_context *wc) ++{ ++ xnthread_prepare_wait(wc); ++} ++ ++static inline ++struct rtdm_wait_context *rtdm_wait_get_context(rtdm_task_t *task) ++{ ++ return xnthread_get_wait_context(task); ++} ++ ++#endif /* !DOXYGEN_CPP */ ++ ++/* --- event services --- */ ++ ++typedef struct rtdm_event { ++ struct xnsynch synch_base; ++ DECLARE_XNSELECT(select_block); ++} rtdm_event_t; ++ ++#define RTDM_EVENT_PENDING XNSYNCH_SPARE1 ++ ++void rtdm_event_init(rtdm_event_t *event, unsigned long pending); ++int rtdm_event_select(rtdm_event_t *event, rtdm_selector_t *selector, ++ enum rtdm_selecttype type, unsigned fd_index); ++int rtdm_event_wait(rtdm_event_t *event); ++int rtdm_event_timedwait(rtdm_event_t *event, nanosecs_rel_t timeout, ++ rtdm_toseq_t *timeout_seq); ++void rtdm_event_signal(rtdm_event_t *event); ++ ++void rtdm_event_clear(rtdm_event_t *event); ++ ++void rtdm_event_pulse(rtdm_event_t *event); ++ ++void rtdm_event_destroy(rtdm_event_t *event); ++ ++/* --- semaphore services --- */ ++ ++typedef struct rtdm_sem { ++ unsigned long value; ++ struct xnsynch synch_base; ++ DECLARE_XNSELECT(select_block); ++} rtdm_sem_t; ++ ++void rtdm_sem_init(rtdm_sem_t *sem, unsigned long value); ++int rtdm_sem_select(rtdm_sem_t *sem, rtdm_selector_t *selector, ++ enum rtdm_selecttype type, unsigned fd_index); ++int rtdm_sem_down(rtdm_sem_t *sem); ++int rtdm_sem_timeddown(rtdm_sem_t *sem, nanosecs_rel_t timeout, ++ rtdm_toseq_t *timeout_seq); ++void rtdm_sem_up(rtdm_sem_t *sem); ++ ++void rtdm_sem_destroy(rtdm_sem_t *sem); ++ ++/* --- mutex services --- */ ++ ++typedef struct rtdm_mutex { ++ struct xnsynch synch_base; ++ atomic_t fastlock; ++} rtdm_mutex_t; ++ ++void rtdm_mutex_init(rtdm_mutex_t *mutex); ++int rtdm_mutex_lock(rtdm_mutex_t *mutex); ++int rtdm_mutex_timedlock(rtdm_mutex_t *mutex, nanosecs_rel_t timeout, ++ rtdm_toseq_t *timeout_seq); ++void rtdm_mutex_unlock(rtdm_mutex_t *mutex); ++void rtdm_mutex_destroy(rtdm_mutex_t *mutex); ++ ++/* --- utility functions --- */ ++ ++#define rtdm_printk(format, ...) printk(format, ##__VA_ARGS__) ++ ++#define rtdm_printk_ratelimited(fmt, ...) do { \ ++ if (xnclock_ratelimit()) \ ++ printk(fmt, ##__VA_ARGS__); \ ++} while (0) ++ ++#ifndef DOXYGEN_CPP /* Avoid static inline tags for RTDM in doxygen */ ++static inline void *rtdm_malloc(size_t size) ++{ ++ return xnmalloc(size); ++} ++ ++static inline void rtdm_free(void *ptr) ++{ ++ xnfree(ptr); ++} ++ ++int rtdm_mmap_to_user(struct rtdm_fd *fd, ++ void *src_addr, size_t len, ++ int prot, void **pptr, ++ struct vm_operations_struct *vm_ops, ++ void *vm_private_data); ++ ++int rtdm_iomap_to_user(struct rtdm_fd *fd, ++ phys_addr_t src_addr, size_t len, ++ int prot, void **pptr, ++ struct vm_operations_struct *vm_ops, ++ void *vm_private_data); ++ ++int rtdm_mmap_kmem(struct vm_area_struct *vma, void *va); ++ ++int rtdm_mmap_vmem(struct vm_area_struct *vma, void *va); ++ ++int rtdm_mmap_iomem(struct vm_area_struct *vma, phys_addr_t pa); ++ ++int rtdm_munmap(void *ptr, size_t len); ++ ++static inline int rtdm_read_user_ok(struct rtdm_fd *fd, ++ const void __user *ptr, size_t size) ++{ ++ return access_rok(ptr, size); ++} ++ ++static inline int rtdm_rw_user_ok(struct rtdm_fd *fd, ++ const void __user *ptr, size_t size) ++{ ++ return access_wok(ptr, size); ++} ++ ++static inline int rtdm_copy_from_user(struct rtdm_fd *fd, ++ void *dst, const void __user *src, ++ size_t size) ++{ ++ return __xn_copy_from_user(dst, src, size) ? -EFAULT : 0; ++} ++ ++static inline int rtdm_safe_copy_from_user(struct rtdm_fd *fd, ++ void *dst, const void __user *src, ++ size_t size) ++{ ++ return cobalt_copy_from_user(dst, src, size); ++} ++ ++static inline int rtdm_copy_to_user(struct rtdm_fd *fd, ++ void __user *dst, const void *src, ++ size_t size) ++{ ++ return __xn_copy_to_user(dst, src, size) ? -EFAULT : 0; ++} ++ ++static inline int rtdm_safe_copy_to_user(struct rtdm_fd *fd, ++ void __user *dst, const void *src, ++ size_t size) ++{ ++ return cobalt_copy_to_user(dst, src, size); ++} ++ ++static inline int rtdm_strncpy_from_user(struct rtdm_fd *fd, ++ char *dst, ++ const char __user *src, size_t count) ++{ ++ return cobalt_strncpy_from_user(dst, src, count); ++} ++ ++static inline bool rtdm_available(void) ++{ ++ return realtime_core_enabled(); ++} ++ ++static inline int rtdm_rt_capable(struct rtdm_fd *fd) ++{ ++ if (!XENO_ASSERT(COBALT, !xnsched_interrupt_p())) ++ return 0; ++ ++ if (!rtdm_fd_is_user(fd)) ++ return !xnsched_root_p(); ++ ++ return xnthread_current() != NULL; ++} ++ ++static inline int rtdm_in_rt_context(void) ++{ ++ return (ipipe_current_domain != ipipe_root_domain); ++} ++ ++#define RTDM_IOV_FASTMAX 16 ++ ++int rtdm_get_iovec(struct rtdm_fd *fd, struct iovec **iov, ++ const struct user_msghdr *msg, ++ struct iovec *iov_fast); ++ ++int rtdm_put_iovec(struct rtdm_fd *fd, struct iovec *iov, ++ const struct user_msghdr *msg, ++ struct iovec *iov_fast); ++ ++static inline ++void rtdm_drop_iovec(struct iovec *iov, struct iovec *iov_fast) ++{ ++ if (iov != iov_fast) ++ xnfree(iov); ++} ++ ++ssize_t rtdm_get_iov_flatlen(struct iovec *iov, int iovlen); ++ ++#endif /* !DOXYGEN_CPP */ ++ ++#endif /* _COBALT_RTDM_DRIVER_H */ +--- linux/include/xenomai/rtdm/testing.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/rtdm/testing.h 2021-04-07 16:01:28.254632699 +0800 +@@ -0,0 +1,40 @@ ++/* ++ * Copyright (C) 2005 Jan Kiszka ++ * ++ * Xenomai is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#ifndef _COBALT_RTDM_TESTING_H ++#define _COBALT_RTDM_TESTING_H ++ ++#include ++#include ++ ++#ifdef CONFIG_XENO_ARCH_SYS3264 ++ ++#include ++ ++struct compat_rttst_overall_bench_res { ++ struct rttst_bench_res result; ++ compat_uptr_t histogram_avg; ++ compat_uptr_t histogram_min; ++ compat_uptr_t histogram_max; ++}; ++ ++#define RTTST_RTIOC_TMBENCH_STOP_COMPAT \ ++ _IOWR(RTIOC_TYPE_TESTING, 0x11, struct compat_rttst_overall_bench_res) ++ ++#endif /* CONFIG_XENO_ARCH_SYS3264 */ ++ ++#endif /* !_COBALT_RTDM_TESTING_H */ +--- linux/include/xenomai/rtdm/net.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/rtdm/net.h 2021-04-07 16:01:28.250632704 +0800 +@@ -0,0 +1,45 @@ ++/* ++ * RTnet - real-time networking subsystem ++ * Copyright (C) 2005-2011 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ */ ++ ++#ifndef _COBALT_RTDM_NET_H ++#define _COBALT_RTDM_NET_H ++ ++#include ++#include ++#include ++ ++struct rtnet_callback { ++ void (*func)(struct rtdm_fd *, void *); ++ void *arg; ++}; ++ ++#define RTNET_RTIOC_CALLBACK _IOW(RTIOC_TYPE_NETWORK, 0x12, \ ++ struct rtnet_callback) ++ ++/* utility functions */ ++ ++/* provided by rt_ipv4 */ ++unsigned long rt_inet_aton(const char *ip); ++ ++/* provided by rt_packet */ ++int rt_eth_aton(unsigned char *addr_buf, const char *mac); ++ ++#define RTNET_RTDM_VER 914 ++ ++#endif /* _COBALT_RTDM_NET_H */ +--- linux/include/xenomai/rtdm/can.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/rtdm/can.h 2021-04-07 16:01:28.245632712 +0800 +@@ -0,0 +1,31 @@ ++/* ++ * Copyright (C) 2006 Wolfgang Grandegger ++ * ++ * Copyright (C) 2005, 2006 Sebastian Smolorz ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#ifndef _COBALT_RTDM_CAN_H ++#define _COBALT_RTDM_CAN_H ++ ++#include ++#include ++#include ++#include ++#include ++ ++#endif /* _COBALT_RTDM_CAN_H */ +--- linux/include/xenomai/rtdm/compat.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/rtdm/compat.h 2021-04-07 16:01:28.240632719 +0800 +@@ -0,0 +1,75 @@ ++/* ++ * Copyright (C) 2014 Philippe Gerum ++ * ++ * Xenomai is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#ifndef _COBALT_RTDM_COMPAT_H ++#define _COBALT_RTDM_COMPAT_H ++ ++#ifdef CONFIG_XENO_ARCH_SYS3264 ++ ++#include ++#include ++ ++struct compat_rtdm_getsockopt_args { ++ int level; ++ int optname; ++ compat_uptr_t optval; ++ compat_uptr_t optlen; ++}; ++ ++struct compat_rtdm_setsockopt_args { ++ int level; ++ int optname; ++ const compat_uptr_t optval; ++ socklen_t optlen; ++}; ++ ++struct compat_rtdm_getsockaddr_args { ++ compat_uptr_t addr; ++ compat_uptr_t addrlen; ++}; ++ ++struct compat_rtdm_setsockaddr_args { ++ const compat_uptr_t addr; ++ socklen_t addrlen; ++}; ++ ++#define _RTIOC_GETSOCKOPT_COMPAT _IOW(RTIOC_TYPE_COMMON, 0x20, \ ++ struct compat_rtdm_getsockopt_args) ++#define _RTIOC_SETSOCKOPT_COMPAT _IOW(RTIOC_TYPE_COMMON, 0x21, \ ++ struct compat_rtdm_setsockopt_args) ++#define _RTIOC_BIND_COMPAT _IOW(RTIOC_TYPE_COMMON, 0x22, \ ++ struct compat_rtdm_setsockaddr_args) ++#define _RTIOC_CONNECT_COMPAT _IOW(RTIOC_TYPE_COMMON, 0x23, \ ++ struct compat_rtdm_setsockaddr_args) ++#define _RTIOC_ACCEPT_COMPAT _IOW(RTIOC_TYPE_COMMON, 0x25, \ ++ struct compat_rtdm_getsockaddr_args) ++#define _RTIOC_GETSOCKNAME_COMPAT _IOW(RTIOC_TYPE_COMMON, 0x26, \ ++ struct compat_rtdm_getsockaddr_args) ++#define _RTIOC_GETPEERNAME_COMPAT _IOW(RTIOC_TYPE_COMMON, 0x27, \ ++ struct compat_rtdm_getsockaddr_args) ++ ++#define __COMPAT_CASE(__op) : case __op ++ ++#else /* !CONFIG_XENO_ARCH_SYS3264 */ ++ ++#define __COMPAT_CASE(__op) ++ ++#endif /* !CONFIG_XENO_ARCH_SYS3264 */ ++ ++#define COMPAT_CASE(__op) case __op __COMPAT_CASE(__op ## _COMPAT) ++ ++#endif /* !_COBALT_RTDM_COMPAT_H */ +--- linux/include/xenomai/rtdm/fd.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/rtdm/fd.h 2021-04-07 16:01:28.235632726 +0800 +@@ -0,0 +1,410 @@ ++/* ++ * Copyright (C) 2005-2007 Jan Kiszka ++ * Copyright (C) 2005 Joerg Langenberg ++ * Copyright (C) 2008,2013,2014 Gilles Chanteperdrix . ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#ifndef _COBALT_KERNEL_FD_H ++#define _COBALT_KERNEL_FD_H ++ ++#include ++#include ++#include ++#include ++#include ++ ++struct vm_area_struct; ++struct rtdm_fd; ++struct _rtdm_mmap_request; ++struct xnselector; ++struct cobalt_ppd; ++struct rtdm_device; ++ ++/** ++ * @file ++ * @anchor File operation handlers ++ * @addtogroup rtdm_device_register ++ * @{ ++ */ ++ ++/** ++ * Open handler for named devices ++ * ++ * @param[in] fd File descriptor associated with opened device instance ++ * @param[in] oflags Open flags as passed by the user ++ * ++ * The file descriptor carries a device minor information which can be ++ * retrieved by a call to rtdm_fd_minor(fd). The minor number can be ++ * used for distinguishing devices managed by a driver. ++ * ++ * @return 0 on success. On failure, a negative error code is returned. ++ * ++ * @see @c open() in IEEE Std 1003.1, ++ * http://www.opengroup.org/onlinepubs/009695399 ++ */ ++int rtdm_open_handler(struct rtdm_fd *fd, int oflags); ++ ++/** ++ * Socket creation handler for protocol devices ++ * ++ * @param[in] fd File descriptor associated with opened device instance ++ * @param[in] protocol Protocol number as passed by the user ++ * ++ * @return 0 on success. On failure, a negative error code is returned. ++ * ++ * @see @c socket() in IEEE Std 1003.1, ++ * http://www.opengroup.org/onlinepubs/009695399 ++ */ ++int rtdm_socket_handler(struct rtdm_fd *fd, int protocol); ++ ++/** ++ * Close handler ++ * ++ * @param[in] fd File descriptor associated with opened ++ * device instance. ++ * ++ * @see @c close() in IEEE Std 1003.1, ++ * http://www.opengroup.org/onlinepubs/009695399 ++ */ ++void rtdm_close_handler(struct rtdm_fd *fd); ++ ++/** ++ * IOCTL handler ++ * ++ * @param[in] fd File descriptor ++ * @param[in] request Request number as passed by the user ++ * @param[in,out] arg Request argument as passed by the user ++ * ++ * @return A positive value or 0 on success. On failure return either ++ * -ENOSYS, to request that the function be called again from the opposite ++ * realtime/non-realtime context, or another negative error code. ++ * ++ * @see @c ioctl() in IEEE Std 1003.1, ++ * http://www.opengroup.org/onlinepubs/009695399 ++ */ ++int rtdm_ioctl_handler(struct rtdm_fd *fd, unsigned int request, void __user *arg); ++ ++/** ++ * Read handler ++ * ++ * @param[in] fd File descriptor ++ * @param[out] buf Input buffer as passed by the user ++ * @param[in] size Number of bytes the user requests to read ++ * ++ * @return On success, the number of bytes read. On failure return either ++ * -ENOSYS, to request that this handler be called again from the opposite ++ * realtime/non-realtime context, or another negative error code. ++ * ++ * @see @c read() in IEEE Std 1003.1, ++ * http://www.opengroup.org/onlinepubs/009695399 ++ */ ++ssize_t rtdm_read_handler(struct rtdm_fd *fd, void __user *buf, size_t size); ++ ++/** ++ * Write handler ++ * ++ * @param[in] fd File descriptor ++ * @param[in] buf Output buffer as passed by the user ++ * @param[in] size Number of bytes the user requests to write ++ * ++ * @return On success, the number of bytes written. On failure return ++ * either -ENOSYS, to request that this handler be called again from the ++ * opposite realtime/non-realtime context, or another negative error code. ++ * ++ * @see @c write() in IEEE Std 1003.1, ++ * http://www.opengroup.org/onlinepubs/009695399 ++ */ ++ssize_t rtdm_write_handler(struct rtdm_fd *fd, const void __user *buf, size_t size); ++ ++/** ++ * Receive message handler ++ * ++ * @param[in] fd File descriptor ++ * @param[in,out] msg Message descriptor as passed by the user, automatically ++ * mirrored to safe kernel memory in case of user mode call ++ * @param[in] flags Message flags as passed by the user ++ * ++ * @return On success, the number of bytes received. On failure return ++ * either -ENOSYS, to request that this handler be called again from the ++ * opposite realtime/non-realtime context, or another negative error code. ++ * ++ * @see @c recvmsg() in IEEE Std 1003.1, ++ * http://www.opengroup.org/onlinepubs/009695399 ++ */ ++ssize_t rtdm_recvmsg_handler(struct rtdm_fd *fd, struct user_msghdr *msg, int flags); ++ ++/** ++ * Transmit message handler ++ * ++ * @param[in] fd File descriptor ++ * @param[in] msg Message descriptor as passed by the user, automatically ++ * mirrored to safe kernel memory in case of user mode call ++ * @param[in] flags Message flags as passed by the user ++ * ++ * @return On success, the number of bytes transmitted. On failure return ++ * either -ENOSYS, to request that this handler be called again from the ++ * opposite realtime/non-realtime context, or another negative error code. ++ * ++ * @see @c sendmsg() in IEEE Std 1003.1, ++ * http://www.opengroup.org/onlinepubs/009695399 ++ */ ++ssize_t rtdm_sendmsg_handler(struct rtdm_fd *fd, const struct user_msghdr *msg, int flags); ++ ++/** ++ * Select handler ++ * ++ * @param[in] fd File descriptor ++ * @param selector Pointer to the selector structure ++ * @param type Type of events (@a XNSELECT_READ, @a XNSELECT_WRITE, or @a ++ * XNSELECT_EXCEPT) ++ * @param index Index of the file descriptor ++ * ++ * @return 0 on success. On failure, a negative error code is ++ * returned. ++ * ++ * @see @c select() in POSIX.1-2001, ++ * http://pubs.opengroup.org/onlinepubs/007908799/xsh/select.html ++ */ ++int rtdm_select_handler(struct rtdm_fd *fd, struct xnselector *selector, ++ unsigned int type, unsigned int index); ++ ++/** ++ * Memory mapping handler ++ * ++ * @param[in] fd File descriptor ++ * @param[in] vma Virtual memory area descriptor ++ * ++ * @return 0 on success. On failure, a negative error code is ++ * returned. ++ * ++ * @see @c mmap() in POSIX.1-2001, ++ * http://pubs.opengroup.org/onlinepubs/7908799/xsh/mmap.html ++ * ++ * @note The address hint passed to the mmap() request is deliberately ++ * ignored by RTDM. ++ */ ++int rtdm_mmap_handler(struct rtdm_fd *fd, struct vm_area_struct *vma); ++ ++/** ++ * Allocate mapping region in address space ++ * ++ * When present, this optional handler should return the start address ++ * of a free region in the process's address space, large enough to ++ * cover the ongoing mmap() operation. If unspecified, the default ++ * architecture-defined handler is invoked. ++ * ++ * Most drivers can omit this handler, except on MMU-less platforms ++ * (see second note). ++ * ++ * @param[in] fd File descriptor ++ * @param[in] len Length of the requested region ++ * @param[in] pgoff Page frame number to map to (see second note). ++ * @param[in] flags Requested mapping flags ++ * ++ * @return The start address of the mapping region on success. On ++ * failure, a negative error code should be returned, with -ENOSYS ++ * meaning that the driver does not want to provide such information, ++ * in which case the ongoing mmap() operation will fail. ++ * ++ * @note The address hint passed to the mmap() request is deliberately ++ * ignored by RTDM, and therefore not passed to this handler. ++ * ++ * @note On MMU-less platforms, this handler is required because RTDM ++ * issues mapping requests over a shareable character device ++ * internally. In such context, the RTDM core may pass a null @a pgoff ++ * argument to the handler, for probing for the logical start address ++ * of the memory region to map to. Otherwise, when @a pgoff is ++ * non-zero, pgoff << PAGE_SHIFT is usually returned. ++ */ ++unsigned long ++rtdm_get_unmapped_area_handler(struct rtdm_fd *fd, ++ unsigned long len, unsigned long pgoff, ++ unsigned long flags); ++/** ++ * @anchor rtdm_fd_ops ++ * @brief RTDM file operation descriptor. ++ * ++ * This structure describes the operations available with a RTDM ++ * device, defining handlers for submitting I/O requests. Those ++ * handlers are implemented by RTDM device drivers. ++ */ ++struct rtdm_fd_ops { ++ /** See rtdm_open_handler(). */ ++ int (*open)(struct rtdm_fd *fd, int oflags); ++ /** See rtdm_socket_handler(). */ ++ int (*socket)(struct rtdm_fd *fd, int protocol); ++ /** See rtdm_close_handler(). */ ++ void (*close)(struct rtdm_fd *fd); ++ /** See rtdm_ioctl_handler(). */ ++ int (*ioctl_rt)(struct rtdm_fd *fd, ++ unsigned int request, void __user *arg); ++ /** See rtdm_ioctl_handler(). */ ++ int (*ioctl_nrt)(struct rtdm_fd *fd, ++ unsigned int request, void __user *arg); ++ /** See rtdm_read_handler(). */ ++ ssize_t (*read_rt)(struct rtdm_fd *fd, ++ void __user *buf, size_t size); ++ /** See rtdm_read_handler(). */ ++ ssize_t (*read_nrt)(struct rtdm_fd *fd, ++ void __user *buf, size_t size); ++ /** See rtdm_write_handler(). */ ++ ssize_t (*write_rt)(struct rtdm_fd *fd, ++ const void __user *buf, size_t size); ++ /** See rtdm_write_handler(). */ ++ ssize_t (*write_nrt)(struct rtdm_fd *fd, ++ const void __user *buf, size_t size); ++ /** See rtdm_recvmsg_handler(). */ ++ ssize_t (*recvmsg_rt)(struct rtdm_fd *fd, ++ struct user_msghdr *msg, int flags); ++ /** See rtdm_recvmsg_handler(). */ ++ ssize_t (*recvmsg_nrt)(struct rtdm_fd *fd, ++ struct user_msghdr *msg, int flags); ++ /** See rtdm_sendmsg_handler(). */ ++ ssize_t (*sendmsg_rt)(struct rtdm_fd *fd, ++ const struct user_msghdr *msg, int flags); ++ /** See rtdm_sendmsg_handler(). */ ++ ssize_t (*sendmsg_nrt)(struct rtdm_fd *fd, ++ const struct user_msghdr *msg, int flags); ++ /** See rtdm_select_handler(). */ ++ int (*select)(struct rtdm_fd *fd, ++ struct xnselector *selector, ++ unsigned int type, unsigned int index); ++ /** See rtdm_mmap_handler(). */ ++ int (*mmap)(struct rtdm_fd *fd, ++ struct vm_area_struct *vma); ++ /** See rtdm_get_unmapped_area_handler(). */ ++ unsigned long (*get_unmapped_area)(struct rtdm_fd *fd, ++ unsigned long len, ++ unsigned long pgoff, ++ unsigned long flags); ++}; ++ ++/** @} File operation handlers */ ++ ++struct rtdm_fd { ++ unsigned int magic; ++ struct rtdm_fd_ops *ops; ++ struct cobalt_ppd *owner; ++ unsigned int refs; ++ int ufd; ++ int minor; ++ int oflags; ++#ifdef CONFIG_XENO_ARCH_SYS3264 ++ int compat; ++#endif ++ bool stale; ++ struct list_head cleanup; ++ struct list_head next; /* in dev->openfd_list */ ++}; ++ ++#define RTDM_FD_MAGIC 0x52544446 ++ ++#define RTDM_FD_COMPAT __COBALT_COMPAT_BIT ++#define RTDM_FD_COMPATX __COBALT_COMPATX_BIT ++ ++int __rtdm_anon_getfd(const char *name, int flags); ++ ++void __rtdm_anon_putfd(int ufd); ++ ++static inline struct cobalt_ppd *rtdm_fd_owner(const struct rtdm_fd *fd) ++{ ++ return fd->owner; ++} ++ ++static inline int rtdm_fd_ufd(const struct rtdm_fd *fd) ++{ ++ return fd->ufd; ++} ++ ++static inline int rtdm_fd_minor(const struct rtdm_fd *fd) ++{ ++ return fd->minor; ++} ++ ++static inline int rtdm_fd_flags(const struct rtdm_fd *fd) ++{ ++ return fd->oflags; ++} ++ ++#ifdef CONFIG_XENO_ARCH_SYS3264 ++static inline int rtdm_fd_is_compat(const struct rtdm_fd *fd) ++{ ++ return fd->compat; ++} ++#else ++static inline int rtdm_fd_is_compat(const struct rtdm_fd *fd) ++{ ++ return 0; ++} ++#endif ++ ++int rtdm_fd_enter(struct rtdm_fd *rtdm_fd, int ufd, ++ unsigned int magic, struct rtdm_fd_ops *ops); ++ ++int rtdm_fd_register(struct rtdm_fd *fd, int ufd); ++ ++struct rtdm_fd *rtdm_fd_get(int ufd, unsigned int magic); ++ ++int rtdm_fd_lock(struct rtdm_fd *fd); ++ ++void rtdm_fd_put(struct rtdm_fd *fd); ++ ++void rtdm_fd_unlock(struct rtdm_fd *fd); ++ ++int rtdm_fd_fcntl(int ufd, int cmd, ...); ++ ++int rtdm_fd_ioctl(int ufd, unsigned int request, ...); ++ ++ssize_t rtdm_fd_read(int ufd, void __user *buf, size_t size); ++ ++ssize_t rtdm_fd_write(int ufd, const void __user *buf, size_t size); ++ ++int rtdm_fd_close(int ufd, unsigned int magic); ++ ++ssize_t rtdm_fd_recvmsg(int ufd, struct user_msghdr *msg, int flags); ++ ++int __rtdm_fd_recvmmsg(int ufd, void __user *u_msgvec, unsigned int vlen, ++ unsigned int flags, void __user *u_timeout, ++ int (*get_mmsg)(struct mmsghdr *mmsg, void __user *u_mmsg), ++ int (*put_mmsg)(void __user **u_mmsg_p, const struct mmsghdr *mmsg), ++ int (*get_timespec)(struct timespec *ts, const void __user *u_ts)); ++ ++ssize_t rtdm_fd_sendmsg(int ufd, const struct user_msghdr *msg, ++ int flags); ++ ++int __rtdm_fd_sendmmsg(int ufd, void __user *u_msgvec, unsigned int vlen, ++ unsigned int flags, ++ int (*get_mmsg)(struct mmsghdr *mmsg, void __user *u_mmsg), ++ int (*put_mmsg)(void __user **u_mmsg_p, const struct mmsghdr *mmsg)); ++ ++int rtdm_fd_mmap(int ufd, struct _rtdm_mmap_request *rma, ++ void **u_addrp); ++ ++int rtdm_fd_valid_p(int ufd); ++ ++int rtdm_fd_select(int ufd, struct xnselector *selector, ++ unsigned int type); ++ ++int rtdm_device_new_fd(struct rtdm_fd *fd, int ufd, ++ struct rtdm_device *dev); ++ ++void rtdm_device_flush_fds(struct rtdm_device *dev); ++ ++void rtdm_fd_cleanup(struct cobalt_ppd *p); ++ ++void rtdm_fd_init(void); ++ ++#endif /* _COBALT_KERNEL_FD_H */ +--- linux/include/xenomai/rtdm/autotune.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/rtdm/autotune.h 2021-04-07 16:01:28.231632732 +0800 +@@ -0,0 +1,24 @@ ++/* ++ * Copyright (C) 2014 Philippe Gerum ++ * ++ * Xenomai is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#ifndef _COBALT_RTDM_AUTOTUNE_H ++#define _COBALT_RTDM_AUTOTUNE_H ++ ++#include ++#include ++ ++#endif /* !_COBALT_RTDM_AUTOTUNE_H */ +--- linux/include/xenomai/rtdm/ipc.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/rtdm/ipc.h 2021-04-07 16:01:28.226632739 +0800 +@@ -0,0 +1,30 @@ ++/* ++ * This file is part of the Xenomai project. ++ * ++ * Copyright (C) 2009 Philippe Gerum ++ * ++ * Xenomai is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#ifndef _COBALT_RTDM_IPC_H ++#define _COBALT_RTDM_IPC_H ++ ++#include ++#include ++#include ++#include ++#include ++ ++#endif /* !_COBALT_RTDM_IPC_H */ +--- linux/include/xenomai/rtdm/rtdm.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/rtdm/rtdm.h 2021-04-07 16:01:28.221632746 +0800 +@@ -0,0 +1,218 @@ ++/* ++ * Copyright (C) 2005, 2006 Jan Kiszka ++ * Copyright (C) 2005 Joerg Langenberg ++ * ++ * Xenomai is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#ifndef _COBALT_RTDM_RTDM_H ++#define _COBALT_RTDM_RTDM_H ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++typedef __u32 socklen_t; ++ ++#include ++ ++int __rtdm_dev_open(const char *path, int oflag); ++ ++int __rtdm_dev_socket(int protocol_family, ++ int socket_type, int protocol); ++ ++static inline int rtdm_open(const char *path, int oflag, ...) ++{ ++ return __rtdm_dev_open(path, oflag); ++} ++ ++static inline int rtdm_socket(int protocol_family, ++ int socket_type, int protocol) ++{ ++ return __rtdm_dev_socket(protocol_family, socket_type, protocol); ++} ++ ++static inline int rtdm_close(int fd) ++{ ++ return rtdm_fd_close(fd, RTDM_FD_MAGIC); ++} ++ ++#define rtdm_fcntl(__fd, __cmd, __args...) \ ++ rtdm_fd_fcntl(__fd, __cmd, ##__args) ++ ++#define rtdm_ioctl(__fd, __request, __args...) \ ++ rtdm_fd_ioctl(__fd, __request, ##__args) ++ ++static inline ssize_t rtdm_read(int fd, void *buf, size_t count) ++{ ++ return rtdm_fd_read(fd, buf, count); ++} ++ ++static inline ssize_t rtdm_write(int fd, const void *buf, size_t count) ++{ ++ return rtdm_fd_write(fd, buf, count); ++} ++ ++static inline ssize_t rtdm_recvmsg(int s, struct user_msghdr *msg, int flags) ++{ ++ return rtdm_fd_recvmsg(s, msg, flags); ++} ++ ++static inline ssize_t rtdm_sendmsg(int s, const struct user_msghdr *msg, int flags) ++{ ++ return rtdm_fd_sendmsg(s, msg, flags); ++} ++ ++static inline ++ssize_t rtdm_recvfrom(int s, void *buf, size_t len, int flags, ++ struct sockaddr *from, ++ socklen_t *fromlen) ++{ ++ struct user_msghdr msg; ++ struct iovec iov; ++ ssize_t ret; ++ ++ iov.iov_base = buf; ++ iov.iov_len = len; ++ msg.msg_name = from; ++ msg.msg_namelen = from ? *fromlen : 0; ++ msg.msg_iov = &iov; ++ msg.msg_iovlen = 1; ++ msg.msg_control = NULL; ++ msg.msg_controllen = 0; ++ ++ ret = rtdm_recvmsg(s, &msg, flags); ++ if (ret < 0) ++ return ret; ++ ++ if (from) ++ *fromlen = msg.msg_namelen; ++ ++ return ret; ++} ++ ++static inline ssize_t rtdm_recv(int s, void *buf, size_t len, int flags) ++{ ++ return rtdm_recvfrom(s, buf, len, flags, NULL, NULL); ++} ++ ++static inline ssize_t rtdm_sendto(int s, const void *buf, size_t len, ++ int flags, const struct sockaddr *to, ++ socklen_t tolen) ++{ ++ struct user_msghdr msg; ++ struct iovec iov; ++ ++ iov.iov_base = (void *)buf; ++ iov.iov_len = len; ++ msg.msg_name = (struct sockaddr *)to; ++ msg.msg_namelen = tolen; ++ msg.msg_iov = &iov; ++ msg.msg_iovlen = 1; ++ msg.msg_control = NULL; ++ msg.msg_controllen = 0; ++ ++ return rtdm_sendmsg(s, &msg, flags); ++} ++ ++static inline ssize_t rtdm_send(int s, const void *buf, size_t len, int flags) ++{ ++ return rtdm_sendto(s, buf, len, flags, NULL, 0); ++} ++ ++static inline int rtdm_getsockopt(int s, int level, int optname, ++ void *optval, socklen_t *optlen) ++{ ++ struct _rtdm_getsockopt_args args = { ++ level, optname, optval, optlen ++ }; ++ ++ return rtdm_ioctl(s, _RTIOC_GETSOCKOPT, &args); ++} ++ ++static inline int rtdm_setsockopt(int s, int level, int optname, ++ const void *optval, socklen_t optlen) ++{ ++ struct _rtdm_setsockopt_args args = { ++ level, optname, (void *)optval, optlen ++ }; ++ ++ return rtdm_ioctl(s, _RTIOC_SETSOCKOPT, &args); ++} ++ ++static inline int rtdm_bind(int s, const struct sockaddr *my_addr, ++ socklen_t addrlen) ++{ ++ struct _rtdm_setsockaddr_args args = { ++ my_addr, addrlen ++ }; ++ ++ return rtdm_ioctl(s, _RTIOC_BIND, &args); ++} ++ ++static inline int rtdm_connect(int s, const struct sockaddr *serv_addr, ++ socklen_t addrlen) ++{ ++ struct _rtdm_setsockaddr_args args = { ++ serv_addr, addrlen ++ }; ++ ++ return rtdm_ioctl(s, _RTIOC_CONNECT, &args); ++} ++ ++static inline int rtdm_listen(int s, int backlog) ++{ ++ return rtdm_ioctl(s, _RTIOC_LISTEN, backlog); ++} ++ ++static inline int rtdm_accept(int s, struct sockaddr *addr, ++ socklen_t *addrlen) ++{ ++ struct _rtdm_getsockaddr_args args = { ++ addr, addrlen ++ }; ++ ++ return rtdm_ioctl(s, _RTIOC_ACCEPT, &args); ++} ++ ++static inline int rtdm_getsockname(int s, struct sockaddr *name, ++ socklen_t *namelen) ++{ ++ struct _rtdm_getsockaddr_args args = { ++ name, namelen ++ }; ++ ++ return rtdm_ioctl(s, _RTIOC_GETSOCKNAME, &args); ++} ++ ++static inline int rtdm_getpeername(int s, struct sockaddr *name, ++ socklen_t *namelen) ++{ ++ struct _rtdm_getsockaddr_args args = { ++ name, namelen ++ }; ++ ++ return rtdm_ioctl(s, _RTIOC_GETPEERNAME, &args); ++} ++ ++static inline int rtdm_shutdown(int s, int how) ++{ ++ return rtdm_ioctl(s, _RTIOC_SHUTDOWN, how); ++} ++ ++#endif /* _COBALT_RTDM_RTDM_H */ +--- linux/include/xenomai/rtdm/cobalt.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/rtdm/cobalt.h 2021-04-07 16:01:28.217632752 +0800 +@@ -0,0 +1,33 @@ ++/* ++ * This file is part of the Xenomai project. ++ * ++ * Copyright (C) 2013 Philippe Gerum ++ * ++ * Xenomai is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#ifndef _COBALT_RTDM_COBALT_H ++#define _COBALT_RTDM_COBALT_H ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#endif /* !_COBALT_RTDM_COBALT_H */ +--- linux/include/xenomai/cobalt/uapi/corectl.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/cobalt/uapi/corectl.h 2021-04-07 16:01:28.429632449 +0800 +@@ -0,0 +1,74 @@ ++/* ++ * Copyright (C) 2015 Philippe Gerum . ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. ++ */ ++#ifndef _COBALT_UAPI_CORECTL_H ++#define _COBALT_UAPI_CORECTL_H ++ ++#define _CC_COBALT_GET_VERSION 0 ++#define _CC_COBALT_GET_NR_PIPES 1 ++#define _CC_COBALT_GET_NR_TIMERS 2 ++ ++#define _CC_COBALT_GET_DEBUG 3 ++# define _CC_COBALT_DEBUG_ASSERT 1 ++# define _CC_COBALT_DEBUG_CONTEXT 2 ++# define _CC_COBALT_DEBUG_LOCKING 4 ++# define _CC_COBALT_DEBUG_USER 8 ++# define _CC_COBALT_DEBUG_MUTEX_RELAXED 16 ++# define _CC_COBALT_DEBUG_MUTEX_SLEEP 32 ++/* bit 6 (64) formerly used for DEBUG_POSIX_SYNCHRO */ ++# define _CC_COBALT_DEBUG_LEGACY 128 ++# define _CC_COBALT_DEBUG_TRACE_RELAX 256 ++# define _CC_COBALT_DEBUG_NET 512 ++ ++#define _CC_COBALT_GET_POLICIES 4 ++# define _CC_COBALT_SCHED_FIFO 1 ++# define _CC_COBALT_SCHED_RR 2 ++# define _CC_COBALT_SCHED_WEAK 4 ++# define _CC_COBALT_SCHED_SPORADIC 8 ++# define _CC_COBALT_SCHED_QUOTA 16 ++# define _CC_COBALT_SCHED_TP 32 ++ ++#define _CC_COBALT_GET_WATCHDOG 5 ++#define _CC_COBALT_GET_CORE_STATUS 6 ++#define _CC_COBALT_START_CORE 7 ++#define _CC_COBALT_STOP_CORE 8 ++ ++#define _CC_COBALT_GET_NET_CONFIG 9 ++# define _CC_COBALT_NET 0x00000001 ++# define _CC_COBALT_NET_ETH_P_ALL 0x00000002 ++# define _CC_COBALT_NET_IPV4 0x00000004 ++# define _CC_COBALT_NET_ICMP 0x00000008 ++# define _CC_COBALT_NET_NETROUTING 0x00000010 ++# define _CC_COBALT_NET_ROUTER 0x00000020 ++# define _CC_COBALT_NET_UDP 0x00000040 ++# define _CC_COBALT_NET_AF_PACKET 0x00000080 ++# define _CC_COBALT_NET_TDMA 0x00000100 ++# define _CC_COBALT_NET_NOMAC 0x00000200 ++# define _CC_COBALT_NET_CFG 0x00000400 ++# define _CC_COBALT_NET_CAP 0x00000800 ++# define _CC_COBALT_NET_PROXY 0x00001000 ++ ++ ++enum cobalt_run_states { ++ COBALT_STATE_DISABLED, ++ COBALT_STATE_RUNNING, ++ COBALT_STATE_STOPPED, ++ COBALT_STATE_TEARDOWN, ++ COBALT_STATE_WARMUP, ++}; ++ ++#endif /* !_COBALT_UAPI_CORECTL_H */ +--- linux/include/xenomai/cobalt/uapi/cond.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/cobalt/uapi/cond.h 2021-04-07 16:01:28.424632456 +0800 +@@ -0,0 +1,39 @@ ++/* ++ * Written by Gilles Chanteperdrix . ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. ++ */ ++#ifndef _COBALT_UAPI_COND_H ++#define _COBALT_UAPI_COND_H ++ ++#include ++ ++#define COBALT_COND_MAGIC 0x86860505 ++ ++struct cobalt_cond_state { ++ __u32 pending_signals; ++ __u32 mutex_state_offset; ++}; ++ ++union cobalt_cond_union { ++ pthread_cond_t native_cond; ++ struct cobalt_cond_shadow { ++ __u32 magic; ++ __u32 state_offset; ++ xnhandle_t handle; ++ } shadow_cond; ++}; ++ ++#endif /* !_COBALT_UAPI_COND_H */ +--- linux/include/xenomai/cobalt/uapi/event.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/cobalt/uapi/event.h 2021-04-07 16:01:28.419632463 +0800 +@@ -0,0 +1,55 @@ ++/* ++ * Copyright (C) 2013 Philippe Gerum . ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. ++ */ ++#ifndef _COBALT_UAPI_EVENT_H ++#define _COBALT_UAPI_EVENT_H ++ ++#include ++ ++struct cobalt_event_state { ++ __u32 value; ++ __u32 flags; ++#define COBALT_EVENT_PENDED 0x1 ++ __u32 nwaiters; ++}; ++ ++struct cobalt_event; ++ ++/* Creation flags. */ ++#define COBALT_EVENT_FIFO 0x0 ++#define COBALT_EVENT_PRIO 0x1 ++#define COBALT_EVENT_SHARED 0x2 ++ ++/* Wait mode. */ ++#define COBALT_EVENT_ALL 0x0 ++#define COBALT_EVENT_ANY 0x1 ++ ++struct cobalt_event_shadow { ++ __u32 state_offset; ++ __u32 flags; ++ xnhandle_t handle; ++}; ++ ++struct cobalt_event_info { ++ unsigned int value; ++ int flags; ++ int nrwait; ++}; ++ ++typedef struct cobalt_event_shadow cobalt_event_t; ++ ++#endif /* !_COBALT_UAPI_EVENT_H */ +--- linux/include/xenomai/cobalt/uapi/syscall.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/cobalt/uapi/syscall.h 2021-04-07 16:01:28.415632469 +0800 +@@ -0,0 +1,128 @@ ++/* ++ * Copyright (C) 2005 Philippe Gerum . ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. ++ */ ++#ifndef _COBALT_UAPI_SYSCALL_H ++#define _COBALT_UAPI_SYSCALL_H ++ ++#include ++ ++#define sc_cobalt_bind 0 ++#define sc_cobalt_thread_create 1 ++#define sc_cobalt_thread_getpid 2 ++#define sc_cobalt_thread_setmode 3 ++#define sc_cobalt_thread_setname 4 ++#define sc_cobalt_thread_join 5 ++#define sc_cobalt_thread_kill 6 ++#define sc_cobalt_thread_setschedparam_ex 7 ++#define sc_cobalt_thread_getschedparam_ex 8 ++#define sc_cobalt_thread_getstat 9 ++#define sc_cobalt_sem_init 10 ++#define sc_cobalt_sem_destroy 11 ++#define sc_cobalt_sem_post 12 ++#define sc_cobalt_sem_wait 13 ++#define sc_cobalt_sem_trywait 14 ++#define sc_cobalt_sem_getvalue 15 ++#define sc_cobalt_sem_open 16 ++#define sc_cobalt_sem_close 17 ++#define sc_cobalt_sem_unlink 18 ++#define sc_cobalt_sem_timedwait 19 ++#define sc_cobalt_sem_inquire 20 ++#define sc_cobalt_sem_broadcast_np 21 ++#define sc_cobalt_clock_getres 22 ++#define sc_cobalt_clock_gettime 23 ++#define sc_cobalt_clock_settime 24 ++#define sc_cobalt_clock_nanosleep 25 ++#define sc_cobalt_mutex_init 26 ++#define sc_cobalt_mutex_check_init 27 ++#define sc_cobalt_mutex_destroy 28 ++#define sc_cobalt_mutex_lock 29 ++#define sc_cobalt_mutex_timedlock 30 ++#define sc_cobalt_mutex_trylock 31 ++#define sc_cobalt_mutex_unlock 32 ++#define sc_cobalt_cond_init 33 ++#define sc_cobalt_cond_destroy 34 ++#define sc_cobalt_cond_wait_prologue 35 ++#define sc_cobalt_cond_wait_epilogue 36 ++#define sc_cobalt_mq_open 37 ++#define sc_cobalt_mq_close 38 ++#define sc_cobalt_mq_unlink 39 ++#define sc_cobalt_mq_getattr 40 ++#define sc_cobalt_mq_timedsend 41 ++#define sc_cobalt_mq_timedreceive 42 ++#define sc_cobalt_mq_notify 43 ++#define sc_cobalt_sched_minprio 44 ++#define sc_cobalt_sched_maxprio 45 ++#define sc_cobalt_sched_weightprio 46 ++#define sc_cobalt_sched_yield 47 ++#define sc_cobalt_sched_setscheduler_ex 48 ++#define sc_cobalt_sched_getscheduler_ex 49 ++#define sc_cobalt_sched_setconfig_np 50 ++#define sc_cobalt_sched_getconfig_np 51 ++#define sc_cobalt_timer_create 52 ++#define sc_cobalt_timer_delete 53 ++#define sc_cobalt_timer_settime 54 ++#define sc_cobalt_timer_gettime 55 ++#define sc_cobalt_timer_getoverrun 56 ++#define sc_cobalt_timerfd_create 57 ++#define sc_cobalt_timerfd_settime 58 ++#define sc_cobalt_timerfd_gettime 59 ++#define sc_cobalt_sigwait 60 ++#define sc_cobalt_sigwaitinfo 61 ++#define sc_cobalt_sigtimedwait 62 ++#define sc_cobalt_sigpending 63 ++#define sc_cobalt_kill 64 ++#define sc_cobalt_sigqueue 65 ++#define sc_cobalt_monitor_init 66 ++#define sc_cobalt_monitor_destroy 67 ++#define sc_cobalt_monitor_enter 68 ++#define sc_cobalt_monitor_wait 69 ++#define sc_cobalt_monitor_sync 70 ++#define sc_cobalt_monitor_exit 71 ++#define sc_cobalt_event_init 72 ++#define sc_cobalt_event_wait 73 ++#define sc_cobalt_event_sync 74 ++#define sc_cobalt_event_destroy 75 ++#define sc_cobalt_event_inquire 76 ++#define sc_cobalt_open 77 ++#define sc_cobalt_socket 78 ++#define sc_cobalt_close 79 ++#define sc_cobalt_ioctl 80 ++#define sc_cobalt_read 81 ++#define sc_cobalt_write 82 ++#define sc_cobalt_recvmsg 83 ++#define sc_cobalt_sendmsg 84 ++#define sc_cobalt_mmap 85 ++#define sc_cobalt_select 86 ++#define sc_cobalt_fcntl 87 ++#define sc_cobalt_migrate 88 ++#define sc_cobalt_archcall 89 ++#define sc_cobalt_trace 90 ++#define sc_cobalt_corectl 91 ++#define sc_cobalt_get_current 92 ++/* 93: formerly mayday */ ++#define sc_cobalt_backtrace 94 ++#define sc_cobalt_serialdbg 95 ++#define sc_cobalt_extend 96 ++#define sc_cobalt_ftrace_puts 97 ++#define sc_cobalt_recvmmsg 98 ++#define sc_cobalt_sendmmsg 99 ++#define sc_cobalt_clock_adjtime 100 ++#define sc_cobalt_thread_setschedprio 101 ++ ++#define __NR_COBALT_SYSCALLS 128 /* Power of 2 */ ++ ++#endif /* !_COBALT_UAPI_SYSCALL_H */ +--- linux/include/xenomai/cobalt/uapi/kernel/urw.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/cobalt/uapi/kernel/urw.h 2021-04-07 16:01:28.410632476 +0800 +@@ -0,0 +1,113 @@ ++/* ++ * Copyright (C) 2013 Philippe Gerum . ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. ++ */ ++#ifndef _COBALT_UAPI_KERNEL_URW_H ++#define _COBALT_UAPI_KERNEL_URW_H ++ ++#include ++ ++/* ++ * A restricted version of the kernel seqlocks with a slightly ++ * different interface, allowing for unsynced reads with concurrent ++ * write detection, without serializing writers. Caller should ++ * provide for proper locking to deal with concurrent updates. ++ * ++ * urw_t lock = URW_INITIALIZER; ++ * urwstate_t tmp; ++ * ++ * unsynced_read_block(&tmp, &lock) { ++ * (will redo until clean read)... ++ * } ++ * ++ * unsynced_write_block(&tmp, &lock) { ++ * ... ++ * } ++ * ++ * This code was inspired by Wolfgang Mauerer's linux/seqlock.h ++ * adaptation for Xenomai 2.6 to support the VDSO feature. ++ */ ++ ++typedef struct { ++ __u32 sequence; ++} urw_t; ++ ++typedef struct { ++ __u32 token; ++ __u32 dirty; ++} urwstate_t; ++ ++#define URW_INITIALIZER { 0 } ++#define DEFINE_URW(__name) urw_t __name = URW_INITIALIZER ++ ++#ifndef READ_ONCE ++#define READ_ONCE ACCESS_ONCE ++#endif ++ ++static inline void __try_read_start(const urw_t *urw, urwstate_t *tmp) ++{ ++ __u32 token; ++repeat: ++ token = READ_ONCE(urw->sequence); ++ smp_rmb(); ++ if (token & 1) { ++ cpu_relax(); ++ goto repeat; ++ } ++ ++ tmp->token = token; ++ tmp->dirty = 1; ++} ++ ++static inline void __try_read_end(const urw_t *urw, urwstate_t *tmp) ++{ ++ smp_rmb(); ++ if (urw->sequence != tmp->token) { ++ __try_read_start(urw, tmp); ++ return; ++ } ++ ++ tmp->dirty = 0; ++} ++ ++static inline void __do_write_start(urw_t *urw, urwstate_t *tmp) ++{ ++ urw->sequence++; ++ tmp->dirty = 1; ++ smp_wmb(); ++} ++ ++static inline void __do_write_end(urw_t *urw, urwstate_t *tmp) ++{ ++ smp_wmb(); ++ tmp->dirty = 0; ++ urw->sequence++; ++} ++ ++static inline void unsynced_rw_init(urw_t *urw) ++{ ++ urw->sequence = 0; ++} ++ ++#define unsynced_read_block(__tmp, __urw) \ ++ for (__try_read_start(__urw, __tmp); \ ++ (__tmp)->dirty; __try_read_end(__urw, __tmp)) ++ ++#define unsynced_write_block(__tmp, __urw) \ ++ for (__do_write_start(__urw, __tmp); \ ++ (__tmp)->dirty; __do_write_end(__urw, __tmp)) ++ ++#endif /* !_COBALT_UAPI_KERNEL_URW_H */ +--- linux/include/xenomai/cobalt/uapi/kernel/vdso.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/cobalt/uapi/kernel/vdso.h 2021-04-07 16:01:28.405632483 +0800 +@@ -0,0 +1,61 @@ ++/* ++ * Copyright (C) 2009 Wolfgang Mauerer . ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. ++ */ ++#ifndef _COBALT_UAPI_KERNEL_VDSO_H ++#define _COBALT_UAPI_KERNEL_VDSO_H ++ ++#include ++ ++struct xnvdso_hostrt_data { ++ __u64 wall_sec; ++ __u64 wtom_sec; ++ __u64 cycle_last; ++ __u64 mask; ++ __u32 wall_nsec; ++ __u32 wtom_nsec; ++ __u32 mult; ++ __u32 shift; ++ __u32 live; ++ urw_t lock; ++}; ++ ++/* ++ * Data shared between the Cobalt kernel and applications, which lives ++ * in the shared memory heap (COBALT_MEMDEV_SHARED). ++ * xnvdso_hostrt_data.features tells which data is present. Notice ++ * that struct xnvdso may only grow, but never shrink. ++ */ ++struct xnvdso { ++ __u64 features; ++ /* XNVDSO_FEAT_HOST_REALTIME */ ++ struct xnvdso_hostrt_data hostrt_data; ++ /* XNVDSO_FEAT_WALLCLOCK_OFFSET */ ++ __u64 wallclock_offset; ++}; ++ ++/* For each shared feature, add a flag below. */ ++ ++#define XNVDSO_FEAT_HOST_REALTIME 0x0000000000000001ULL ++#define XNVDSO_FEAT_WALLCLOCK_OFFSET 0x0000000000000002ULL ++ ++static inline int xnvdso_test_feature(struct xnvdso *vdso, ++ __u64 feature) ++{ ++ return (vdso->features & feature) != 0; ++} ++ ++#endif /* !_COBALT_UAPI_KERNEL_VDSO_H */ +--- linux/include/xenomai/cobalt/uapi/kernel/synch.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/cobalt/uapi/kernel/synch.h 2021-04-07 16:01:28.401632489 +0800 +@@ -0,0 +1,84 @@ ++/* ++ * Copyright (C) 2001-2013 Philippe Gerum . ++ * Copyright (C) 2008, 2009 Jan Kiszka . ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. ++ */ ++#ifndef _COBALT_UAPI_KERNEL_SYNCH_H ++#define _COBALT_UAPI_KERNEL_SYNCH_H ++ ++#include ++ ++/* Creation flags */ ++#define XNSYNCH_FIFO 0x0 ++#define XNSYNCH_PRIO 0x1 ++#define XNSYNCH_PI 0x2 ++#define XNSYNCH_DREORD 0x4 ++#define XNSYNCH_OWNER 0x8 ++#define XNSYNCH_PP 0x10 ++ ++/* Fast lock API */ ++static inline int xnsynch_fast_is_claimed(xnhandle_t handle) ++{ ++ return (handle & XNSYNCH_FLCLAIM) != 0; ++} ++ ++static inline xnhandle_t xnsynch_fast_claimed(xnhandle_t handle) ++{ ++ return handle | XNSYNCH_FLCLAIM; ++} ++ ++static inline xnhandle_t xnsynch_fast_ceiling(xnhandle_t handle) ++{ ++ return handle | XNSYNCH_FLCEIL; ++} ++ ++static inline int ++xnsynch_fast_owner_check(atomic_t *fastlock, xnhandle_t ownerh) ++{ ++ return (xnhandle_get_id(atomic_read(fastlock)) == ownerh) ? ++ 0 : -EPERM; ++} ++ ++static inline ++int xnsynch_fast_acquire(atomic_t *fastlock, xnhandle_t new_ownerh) ++{ ++ xnhandle_t h; ++ ++ h = atomic_cmpxchg(fastlock, XN_NO_HANDLE, new_ownerh); ++ if (h != XN_NO_HANDLE) { ++ if (xnhandle_get_id(h) == new_ownerh) ++ return -EBUSY; ++ ++ return -EAGAIN; ++ } ++ ++ return 0; ++} ++ ++static inline ++int xnsynch_fast_release(atomic_t *fastlock, xnhandle_t cur_ownerh) ++{ ++ return atomic_cmpxchg(fastlock, cur_ownerh, XN_NO_HANDLE) ++ == cur_ownerh; ++} ++ ++/* Local/shared property */ ++static inline int xnsynch_is_shared(xnhandle_t handle) ++{ ++ return (handle & XNSYNCH_PSHARED) != 0; ++} ++ ++#endif /* !_COBALT_UAPI_KERNEL_SYNCH_H */ +--- linux/include/xenomai/cobalt/uapi/kernel/pipe.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/cobalt/uapi/kernel/pipe.h 2021-04-07 16:01:28.396632496 +0800 +@@ -0,0 +1,37 @@ ++/* ++ * Copyright (C) 2014 Philippe Gerum . ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. ++ */ ++#ifndef _COBALT_UAPI_KERNEL_PIPE_H ++#define _COBALT_UAPI_KERNEL_PIPE_H ++ ++#define XNPIPE_IOCTL_BASE 'p' ++ ++#define XNPIPEIOC_GET_NRDEV _IOW(XNPIPE_IOCTL_BASE, 0, int) ++#define XNPIPEIOC_IFLUSH _IO(XNPIPE_IOCTL_BASE, 1) ++#define XNPIPEIOC_OFLUSH _IO(XNPIPE_IOCTL_BASE, 2) ++#define XNPIPEIOC_FLUSH XNPIPEIOC_OFLUSH ++#define XNPIPEIOC_SETSIG _IO(XNPIPE_IOCTL_BASE, 3) ++ ++#define XNPIPE_NORMAL 0x0 ++#define XNPIPE_URGENT 0x1 ++ ++#define XNPIPE_IFLUSH 0x1 ++#define XNPIPE_OFLUSH 0x2 ++ ++#define XNPIPE_MINOR_AUTO (-1) ++ ++#endif /* !_COBALT_UAPI_KERNEL_PIPE_H */ +--- linux/include/xenomai/cobalt/uapi/kernel/types.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/cobalt/uapi/kernel/types.h 2021-04-07 16:01:28.392632502 +0800 +@@ -0,0 +1,60 @@ ++/* ++ * Copyright (C) 2013 Philippe Gerum . ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. ++ */ ++#ifndef _COBALT_UAPI_KERNEL_TYPES_H ++#define _COBALT_UAPI_KERNEL_TYPES_H ++ ++#include ++#include ++ ++typedef __u64 xnticks_t; ++ ++typedef __s64 xnsticks_t; ++ ++typedef __u32 xnhandle_t; ++ ++#define XN_NO_HANDLE ((xnhandle_t)0) ++#define XN_HANDLE_INDEX_MASK ((xnhandle_t)0xf0000000) ++ ++/* Fixed bits (part of the identifier) */ ++#define XNSYNCH_PSHARED ((xnhandle_t)0x40000000) ++ ++/* Transient bits (expressing a status) */ ++#define XNSYNCH_FLCLAIM ((xnhandle_t)0x80000000) /* Contended. */ ++#define XNSYNCH_FLCEIL ((xnhandle_t)0x20000000) /* Ceiling active. */ ++ ++#define XN_HANDLE_TRANSIENT_MASK (XNSYNCH_FLCLAIM|XNSYNCH_FLCEIL) ++ ++/* ++ * Strip all special bits from the handle, only retaining the object ++ * index value in the registry. ++ */ ++static inline xnhandle_t xnhandle_get_index(xnhandle_t handle) ++{ ++ return handle & ~XN_HANDLE_INDEX_MASK; ++} ++ ++/* ++ * Strip the transient bits from the handle, only retaining the fixed ++ * part making the identifier. ++ */ ++static inline xnhandle_t xnhandle_get_id(xnhandle_t handle) ++{ ++ return handle & ~XN_HANDLE_TRANSIENT_MASK; ++} ++ ++#endif /* !_COBALT_UAPI_KERNEL_TYPES_H */ +--- linux/include/xenomai/cobalt/uapi/kernel/trace.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/cobalt/uapi/kernel/trace.h 2021-04-07 16:01:28.387632509 +0800 +@@ -0,0 +1,30 @@ ++/* ++ * Copyright (C) 2006 Jan Kiszka . ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. ++ */ ++#ifndef _COBALT_UAPI_KERNEL_TRACE_H ++#define _COBALT_UAPI_KERNEL_TRACE_H ++ ++#define __xntrace_op_max_begin 0 ++#define __xntrace_op_max_end 1 ++#define __xntrace_op_max_reset 2 ++#define __xntrace_op_user_start 3 ++#define __xntrace_op_user_stop 4 ++#define __xntrace_op_user_freeze 5 ++#define __xntrace_op_special 6 ++#define __xntrace_op_special_u64 7 ++ ++#endif /* !_COBALT_UAPI_KERNEL_TRACE_H */ +--- linux/include/xenomai/cobalt/uapi/kernel/limits.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/cobalt/uapi/kernel/limits.h 2021-04-07 16:01:28.382632516 +0800 +@@ -0,0 +1,23 @@ ++/* ++ * Copyright (C) 2014 Philippe Gerum . ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. ++ */ ++#ifndef _COBALT_UAPI_KERNEL_LIMITS_H ++#define _COBALT_UAPI_KERNEL_LIMITS_H ++ ++#define XNOBJECT_NAME_LEN 32 ++ ++#endif /* !_COBALT_UAPI_KERNEL_LIMITS_H */ +--- linux/include/xenomai/cobalt/uapi/kernel/heap.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/cobalt/uapi/kernel/heap.h 2021-04-07 16:01:28.378632521 +0800 +@@ -0,0 +1,34 @@ ++/* ++ * Copyright (C) 2013 Philippe Gerum . ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. ++ */ ++#ifndef _COBALT_UAPI_KERNEL_HEAP_H ++#define _COBALT_UAPI_KERNEL_HEAP_H ++ ++#include ++ ++#define COBALT_MEMDEV_PRIVATE "memdev-private" ++#define COBALT_MEMDEV_SHARED "memdev-shared" ++#define COBALT_MEMDEV_SYS "memdev-sys" ++ ++struct cobalt_memdev_stat { ++ __u32 size; ++ __u32 free; ++}; ++ ++#define MEMDEV_RTIOC_STAT _IOR(RTDM_CLASS_MEMORY, 0, struct cobalt_memdev_stat) ++ ++#endif /* !_COBALT_UAPI_KERNEL_HEAP_H */ +--- linux/include/xenomai/cobalt/uapi/kernel/thread.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/cobalt/uapi/kernel/thread.h 2021-04-07 16:01:28.373632529 +0800 +@@ -0,0 +1,116 @@ ++/* ++ * Copyright (C) 2013 Philippe Gerum . ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. ++ */ ++#ifndef _COBALT_UAPI_KERNEL_THREAD_H ++#define _COBALT_UAPI_KERNEL_THREAD_H ++ ++#include ++ ++/** ++ * @ingroup cobalt_core_thread ++ * @defgroup cobalt_core_thread_states Thread state flags ++ * @brief Bits reporting permanent or transient states of threads ++ * @{ ++ */ ++ ++/* State flags (shared) */ ++ ++#define XNSUSP 0x00000001 /**< Suspended. */ ++#define XNPEND 0x00000002 /**< Sleep-wait for a resource. */ ++#define XNDELAY 0x00000004 /**< Delayed */ ++#define XNREADY 0x00000008 /**< Linked to the ready queue. */ ++#define XNDORMANT 0x00000010 /**< Not started yet */ ++#define XNZOMBIE 0x00000020 /**< Zombie thread in deletion process */ ++#define XNMAPPED 0x00000040 /**< Thread is mapped to a linux task */ ++#define XNRELAX 0x00000080 /**< Relaxed shadow thread (blocking bit) */ ++#define XNMIGRATE 0x00000100 /**< Thread is currently migrating to another CPU. */ ++#define XNHELD 0x00000200 /**< Thread is held to process emergency. */ ++#define XNBOOST 0x00000400 /**< PI/PP boost undergoing */ ++#define XNSSTEP 0x00000800 /**< Single-stepped by debugger */ ++#define XNLOCK 0x00001000 /**< Scheduler lock control (pseudo-bit, not in ->state) */ ++#define XNRRB 0x00002000 /**< Undergoes a round-robin scheduling */ ++#define XNWARN 0x00004000 /**< Issue SIGDEBUG on error detection */ ++#define XNFPU 0x00008000 /**< Thread uses FPU */ ++#define XNROOT 0x00010000 /**< Root thread (that is, Linux/IDLE) */ ++#define XNWEAK 0x00020000 /**< Non real-time shadow (from the WEAK class) */ ++#define XNUSER 0x00040000 /**< Shadow thread running in userland */ ++#define XNJOINED 0x00080000 /**< Another thread waits for joining this thread */ ++#define XNTRAPLB 0x00100000 /**< Trap lock break (i.e. may not sleep with sched lock) */ ++#define XNDEBUG 0x00200000 /**< User-level debugging enabled */ ++#define XNDBGSTOP 0x00400000 /**< Stopped for synchronous debugging */ ++ ++/** @} */ ++ ++/** ++ * @ingroup cobalt_core_thread ++ * @defgroup cobalt_core_thread_info Thread information flags ++ * @brief Bits reporting events notified to threads ++ * @{ ++ */ ++ ++/* Information flags (shared) */ ++ ++#define XNTIMEO 0x00000001 /**< Woken up due to a timeout condition */ ++#define XNRMID 0x00000002 /**< Pending on a removed resource */ ++#define XNBREAK 0x00000004 /**< Forcibly awaken from a wait state */ ++#define XNKICKED 0x00000008 /**< Forced out of primary mode */ ++#define XNWAKEN 0x00000010 /**< Thread waken up upon resource availability */ ++#define XNROBBED 0x00000020 /**< Robbed from resource ownership */ ++#define XNCANCELD 0x00000040 /**< Cancellation request is pending */ ++#define XNPIALERT 0x00000080 /**< Priority inversion alert (SIGDEBUG sent) */ ++#define XNSCHEDP 0x00000100 /**< schedparam propagation is pending */ ++#define XNCONTHI 0x00000200 /**< Continue in primary mode after debugging */ ++ ++/* Local information flags (private to current thread) */ ++ ++#define XNMOVED 0x00000001 /**< CPU migration in primary mode occurred */ ++#define XNLBALERT 0x00000002 /**< Scheduler lock break alert (SIGDEBUG sent) */ ++#define XNDESCENT 0x00000004 /**< Adaptive transitioning to secondary mode */ ++#define XNSYSRST 0x00000008 /**< Thread awaiting syscall restart after signal */ ++#define XNHICCUP 0x00000010 /**< Just left from ptracing */ ++ ++/** @} */ ++ ++/* ++ * Must follow strictly the declaration order of the state flags ++ * defined above. Status symbols are defined as follows: ++ * ++ * 'S' -> Forcibly suspended. ++ * 'w'/'W' -> Waiting for a resource, with or without timeout. ++ * 'D' -> Delayed (without any other wait condition). ++ * 'R' -> Runnable. ++ * 'U' -> Unstarted or dormant. ++ * 'X' -> Relaxed shadow. ++ * 'H' -> Held in emergency. ++ * 'b' -> Priority boost undergoing. ++ * 'T' -> Ptraced and stopped. ++ * 'l' -> Locks scheduler. ++ * 'r' -> Undergoes round-robin. ++ * 't' -> Runtime mode errors notified. ++ * 'L' -> Lock breaks trapped. ++ * 's' -> Ptraced, stopped synchronously. ++ */ ++#define XNTHREAD_STATE_LABELS "SWDRU..X.HbTlrt.....L.s" ++ ++struct xnthread_user_window { ++ __u32 state; ++ __u32 info; ++ __u32 grant_value; ++ __u32 pp_pending; ++}; ++ ++#endif /* !_COBALT_UAPI_KERNEL_THREAD_H */ +--- linux/include/xenomai/cobalt/uapi/asm-generic/syscall.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/cobalt/uapi/asm-generic/syscall.h 2021-04-07 16:01:28.368632536 +0800 +@@ -0,0 +1,39 @@ ++/* ++ * Copyright (C) 2013 Philippe Gerum . ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. ++ */ ++#ifndef _COBALT_UAPI_ASM_GENERIC_SYSCALL_H ++#define _COBALT_UAPI_ASM_GENERIC_SYSCALL_H ++ ++#include ++#include ++#include ++ ++#define __COBALT_SYSCALL_BIT 0x10000000 ++ ++struct cobalt_bindreq { ++ /** Features userland requires. */ ++ __u32 feat_req; ++ /** ABI revision userland uses. */ ++ __u32 abi_rev; ++ /** Features the Cobalt core provides. */ ++ struct cobalt_featinfo feat_ret; ++}; ++ ++#define COBALT_SECONDARY 0 ++#define COBALT_PRIMARY 1 ++ ++#endif /* !_COBALT_UAPI_ASM_GENERIC_SYSCALL_H */ +--- linux/include/xenomai/cobalt/uapi/asm-generic/features.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/cobalt/uapi/asm-generic/features.h 2021-04-07 16:01:28.364632542 +0800 +@@ -0,0 +1,114 @@ ++/* ++ * Copyright (C) 2005 Philippe Gerum . ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. ++ */ ++#ifndef _COBALT_UAPI_ASM_GENERIC_FEATURES_H ++#define _COBALT_UAPI_ASM_GENERIC_FEATURES_H ++ ++#include ++ ++#define XNFEAT_STRING_LEN 64 ++ ++struct cobalt_featinfo { ++ /** Real-time clock frequency */ ++ __u64 clock_freq; ++ /** Offset of nkvdso in the sem heap. */ ++ __u32 vdso_offset; ++ /** ABI revision level. */ ++ __u32 feat_abirev; ++ /** Available feature set. */ ++ __u32 feat_all; ++ /** Mandatory features (when requested). */ ++ __u32 feat_man; ++ /** Requested feature set. */ ++ __u32 feat_req; ++ /** Missing features. */ ++ __u32 feat_mis; ++ char feat_all_s[XNFEAT_STRING_LEN]; ++ char feat_man_s[XNFEAT_STRING_LEN]; ++ char feat_req_s[XNFEAT_STRING_LEN]; ++ char feat_mis_s[XNFEAT_STRING_LEN]; ++ /* Architecture-specific features. */ ++ struct cobalt_featinfo_archdep feat_arch; ++}; ++ ++#define __xn_feat_smp 0x80000000 ++#define __xn_feat_nosmp 0x40000000 ++#define __xn_feat_fastsynch 0x20000000 ++#define __xn_feat_nofastsynch 0x10000000 ++#define __xn_feat_control 0x08000000 ++#define __xn_feat_prioceiling 0x04000000 ++ ++#ifdef CONFIG_SMP ++#define __xn_feat_smp_mask __xn_feat_smp ++#else ++#define __xn_feat_smp_mask __xn_feat_nosmp ++#endif ++ ++/* ++ * Revisit: all archs currently support fast locking, and there is no ++ * reason for any future port not to provide this. This will be ++ * written in stone at the next ABI update, when fastsynch support is ++ * dropped from the optional feature set. ++ */ ++#define __xn_feat_fastsynch_mask __xn_feat_fastsynch ++ ++/* List of generic features kernel or userland may support */ ++#define __xn_feat_generic_mask \ ++ (__xn_feat_smp_mask | \ ++ __xn_feat_fastsynch_mask | \ ++ __xn_feat_prioceiling) ++ ++/* ++ * List of features both sides have to agree on: If userland supports ++ * it, the kernel has to provide it, too. This means backward ++ * compatibility between older userland and newer kernel may be ++ * supported for those features, but forward compatibility between ++ * newer userland and older kernel cannot. ++ */ ++#define __xn_feat_generic_man_mask \ ++ (__xn_feat_fastsynch | \ ++ __xn_feat_nofastsynch | \ ++ __xn_feat_nosmp | \ ++ __xn_feat_prioceiling) ++ ++static inline ++const char *get_generic_feature_label(unsigned int feature) ++{ ++ switch (feature) { ++ case __xn_feat_smp: ++ return "smp"; ++ case __xn_feat_nosmp: ++ return "nosmp"; ++ case __xn_feat_fastsynch: ++ return "fastsynch"; ++ case __xn_feat_nofastsynch: ++ return "nofastsynch"; ++ case __xn_feat_control: ++ return "control"; ++ case __xn_feat_prioceiling: ++ return "prioceiling"; ++ default: ++ return 0; ++ } ++} ++ ++static inline int check_abi_revision(unsigned long abirev) ++{ ++ return abirev == XENOMAI_ABI_REV; ++} ++ ++#endif /* !_COBALT_UAPI_ASM_GENERIC_FEATURES_H */ +--- linux/include/xenomai/cobalt/uapi/asm-generic/arith.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/cobalt/uapi/asm-generic/arith.h 2021-04-07 16:01:28.359632549 +0800 +@@ -0,0 +1,365 @@ ++/** ++ * Generic arithmetic/conversion routines. ++ * Copyright © 2005 Stelian Pop. ++ * Copyright © 2005 Gilles Chanteperdrix. ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. ++ */ ++#ifndef _COBALT_UAPI_ASM_GENERIC_ARITH_H ++#define _COBALT_UAPI_ASM_GENERIC_ARITH_H ++ ++#ifndef xnarch_u64tou32 ++#define xnarch_u64tou32(ull, h, l) ({ \ ++ union { \ ++ unsigned long long _ull; \ ++ struct endianstruct _s; \ ++ } _u; \ ++ _u._ull = (ull); \ ++ (h) = _u._s._h; \ ++ (l) = _u._s._l; \ ++}) ++#endif /* !xnarch_u64tou32 */ ++ ++#ifndef xnarch_u64fromu32 ++#define xnarch_u64fromu32(h, l) ({ \ ++ union { \ ++ unsigned long long _ull; \ ++ struct endianstruct _s; \ ++ } _u; \ ++ _u._s._h = (h); \ ++ _u._s._l = (l); \ ++ _u._ull; \ ++}) ++#endif /* !xnarch_u64fromu32 */ ++ ++#ifndef xnarch_ullmul ++static inline __attribute__((__const__)) unsigned long long ++xnarch_generic_ullmul(const unsigned m0, const unsigned m1) ++{ ++ return (unsigned long long) m0 * m1; ++} ++#define xnarch_ullmul(m0,m1) xnarch_generic_ullmul((m0),(m1)) ++#endif /* !xnarch_ullmul */ ++ ++#ifndef xnarch_ulldiv ++static inline unsigned long long xnarch_generic_ulldiv (unsigned long long ull, ++ const unsigned uld, ++ unsigned long *const rp) ++{ ++ const unsigned r = do_div(ull, uld); ++ ++ if (rp) ++ *rp = r; ++ ++ return ull; ++} ++#define xnarch_ulldiv(ull,uld,rp) xnarch_generic_ulldiv((ull),(uld),(rp)) ++#endif /* !xnarch_ulldiv */ ++ ++#ifndef xnarch_uldivrem ++#define xnarch_uldivrem(ull,ul,rp) ((unsigned) xnarch_ulldiv((ull),(ul),(rp))) ++#endif /* !xnarch_uldivrem */ ++ ++#ifndef xnarch_divmod64 ++static inline unsigned long long ++xnarch_generic_divmod64(unsigned long long a, ++ unsigned long long b, ++ unsigned long long *rem) ++{ ++ unsigned long long q; ++#if defined(__KERNEL__) && BITS_PER_LONG < 64 ++ unsigned long long ++ xnarch_generic_full_divmod64(unsigned long long a, ++ unsigned long long b, ++ unsigned long long *rem); ++ if (b <= 0xffffffffULL) { ++ unsigned long r; ++ q = xnarch_ulldiv(a, b, &r); ++ if (rem) ++ *rem = r; ++ } else { ++ if (a < b) { ++ if (rem) ++ *rem = a; ++ return 0; ++ } ++ ++ return xnarch_generic_full_divmod64(a, b, rem); ++ } ++#else /* !(__KERNEL__ && BITS_PER_LONG < 64) */ ++ q = a / b; ++ if (rem) ++ *rem = a % b; ++#endif /* !(__KERNEL__ && BITS_PER_LONG < 64) */ ++ return q; ++} ++#define xnarch_divmod64(a,b,rp) xnarch_generic_divmod64((a),(b),(rp)) ++#endif /* !xnarch_divmod64 */ ++ ++#ifndef xnarch_imuldiv ++static inline __attribute__((__const__)) int xnarch_generic_imuldiv(int i, ++ int mult, ++ int div) ++{ ++ /* (int)i = (unsigned long long)i*(unsigned)(mult)/(unsigned)div. */ ++ const unsigned long long ull = xnarch_ullmul(i, mult); ++ return xnarch_uldivrem(ull, div, NULL); ++} ++#define xnarch_imuldiv(i,m,d) xnarch_generic_imuldiv((i),(m),(d)) ++#endif /* !xnarch_imuldiv */ ++ ++#ifndef xnarch_imuldiv_ceil ++static inline __attribute__((__const__)) int xnarch_generic_imuldiv_ceil(int i, ++ int mult, ++ int div) ++{ ++ /* Same as xnarch_generic_imuldiv, rounding up. */ ++ const unsigned long long ull = xnarch_ullmul(i, mult); ++ return xnarch_uldivrem(ull + (unsigned)div - 1, div, NULL); ++} ++#define xnarch_imuldiv_ceil(i,m,d) xnarch_generic_imuldiv_ceil((i),(m),(d)) ++#endif /* !xnarch_imuldiv_ceil */ ++ ++/* Division of an unsigned 96 bits ((h << 32) + l) by an unsigned 32 bits. ++ Building block for llimd. Without const qualifiers, gcc reload registers ++ after each call to uldivrem. */ ++static inline unsigned long long ++xnarch_generic_div96by32(const unsigned long long h, ++ const unsigned l, ++ const unsigned d, ++ unsigned long *const rp) ++{ ++ unsigned long rh; ++ const unsigned qh = xnarch_uldivrem(h, d, &rh); ++ const unsigned long long t = xnarch_u64fromu32(rh, l); ++ const unsigned ql = xnarch_uldivrem(t, d, rp); ++ ++ return xnarch_u64fromu32(qh, ql); ++} ++ ++#ifndef xnarch_llimd ++static inline __attribute__((__const__)) ++unsigned long long xnarch_generic_ullimd(const unsigned long long op, ++ const unsigned m, ++ const unsigned d) ++{ ++ unsigned int oph, opl, tlh, tll; ++ unsigned long long th, tl; ++ ++ xnarch_u64tou32(op, oph, opl); ++ tl = xnarch_ullmul(opl, m); ++ xnarch_u64tou32(tl, tlh, tll); ++ th = xnarch_ullmul(oph, m); ++ th += tlh; ++ ++ return xnarch_generic_div96by32(th, tll, d, NULL); ++} ++ ++static inline __attribute__((__const__)) long long ++xnarch_generic_llimd (long long op, unsigned m, unsigned d) ++{ ++ long long ret; ++ int sign = 0; ++ ++ if (op < 0LL) { ++ sign = 1; ++ op = -op; ++ } ++ ret = xnarch_generic_ullimd(op, m, d); ++ ++ return sign ? -ret : ret; ++} ++#define xnarch_llimd(ll,m,d) xnarch_generic_llimd((ll),(m),(d)) ++#endif /* !xnarch_llimd */ ++ ++#ifndef _xnarch_u96shift ++#define xnarch_u96shift(h, m, l, s) ({ \ ++ unsigned int _l = (l); \ ++ unsigned int _m = (m); \ ++ unsigned int _s = (s); \ ++ _l >>= _s; \ ++ _l |= (_m << (32 - _s)); \ ++ _m >>= _s; \ ++ _m |= ((h) << (32 - _s)); \ ++ xnarch_u64fromu32(_m, _l); \ ++}) ++#endif /* !xnarch_u96shift */ ++ ++static inline long long xnarch_llmi(int i, int j) ++{ ++ /* Fast 32x32->64 signed multiplication */ ++ return (long long) i * j; ++} ++ ++#ifndef xnarch_llmulshft ++/* Fast scaled-math-based replacement for long long multiply-divide */ ++static inline long long ++xnarch_generic_llmulshft(const long long op, ++ const unsigned m, ++ const unsigned s) ++{ ++ unsigned int oph, opl, tlh, tll, thh, thl; ++ unsigned long long th, tl; ++ ++ xnarch_u64tou32(op, oph, opl); ++ tl = xnarch_ullmul(opl, m); ++ xnarch_u64tou32(tl, tlh, tll); ++ th = xnarch_llmi(oph, m); ++ th += tlh; ++ xnarch_u64tou32(th, thh, thl); ++ ++ return xnarch_u96shift(thh, thl, tll, s); ++} ++#define xnarch_llmulshft(ll, m, s) xnarch_generic_llmulshft((ll), (m), (s)) ++#endif /* !xnarch_llmulshft */ ++ ++#ifdef XNARCH_HAVE_NODIV_LLIMD ++ ++/* Representation of a 32 bits fraction. */ ++struct xnarch_u32frac { ++ unsigned long long frac; ++ unsigned integ; ++}; ++ ++static inline void xnarch_init_u32frac(struct xnarch_u32frac *const f, ++ const unsigned m, ++ const unsigned d) ++{ ++ /* ++ * Avoid clever compiler optimizations to occur when d is ++ * known at compile-time. The performance of this function is ++ * not critical since it is only called at init time. ++ */ ++ volatile unsigned vol_d = d; ++ f->integ = m / d; ++ f->frac = xnarch_generic_div96by32 ++ (xnarch_u64fromu32(m % d, 0), 0, vol_d, NULL); ++} ++ ++#ifndef xnarch_nodiv_imuldiv ++static inline __attribute__((__const__)) unsigned ++xnarch_generic_nodiv_imuldiv(unsigned op, const struct xnarch_u32frac f) ++{ ++ return (xnarch_ullmul(op, f.frac >> 32) >> 32) + f.integ * op; ++} ++#define xnarch_nodiv_imuldiv(op, f) xnarch_generic_nodiv_imuldiv((op),(f)) ++#endif /* xnarch_nodiv_imuldiv */ ++ ++#ifndef xnarch_nodiv_imuldiv_ceil ++static inline __attribute__((__const__)) unsigned ++xnarch_generic_nodiv_imuldiv_ceil(unsigned op, const struct xnarch_u32frac f) ++{ ++ unsigned long long full = xnarch_ullmul(op, f.frac >> 32) + ~0U; ++ return (full >> 32) + f.integ * op; ++} ++#define xnarch_nodiv_imuldiv_ceil(op, f) \ ++ xnarch_generic_nodiv_imuldiv_ceil((op),(f)) ++#endif /* xnarch_nodiv_imuldiv_ceil */ ++ ++#ifndef xnarch_nodiv_ullimd ++ ++#ifndef xnarch_add96and64 ++#error "xnarch_add96and64 must be implemented." ++#endif ++ ++static inline __attribute__((__const__)) unsigned long long ++xnarch_mul64by64_high(const unsigned long long op, const unsigned long long m) ++{ ++ /* Compute high 64 bits of multiplication 64 bits x 64 bits. */ ++ register unsigned long long t0, t1, t2, t3; ++ register unsigned int oph, opl, mh, ml, t0h, t0l, t1h, t1l, t2h, t2l, t3h, t3l; ++ ++ xnarch_u64tou32(op, oph, opl); ++ xnarch_u64tou32(m, mh, ml); ++ t0 = xnarch_ullmul(opl, ml); ++ xnarch_u64tou32(t0, t0h, t0l); ++ t3 = xnarch_ullmul(oph, mh); ++ xnarch_u64tou32(t3, t3h, t3l); ++ xnarch_add96and64(t3h, t3l, t0h, 0, t0l >> 31); ++ t1 = xnarch_ullmul(oph, ml); ++ xnarch_u64tou32(t1, t1h, t1l); ++ xnarch_add96and64(t3h, t3l, t0h, t1h, t1l); ++ t2 = xnarch_ullmul(opl, mh); ++ xnarch_u64tou32(t2, t2h, t2l); ++ xnarch_add96and64(t3h, t3l, t0h, t2h, t2l); ++ ++ return xnarch_u64fromu32(t3h, t3l); ++} ++ ++static inline unsigned long long ++xnarch_generic_nodiv_ullimd(const unsigned long long op, ++ const unsigned long long frac, ++ unsigned int integ) ++{ ++ return xnarch_mul64by64_high(op, frac) + integ * op; ++} ++#define xnarch_nodiv_ullimd(op, f, i) xnarch_generic_nodiv_ullimd((op),(f), (i)) ++#endif /* !xnarch_nodiv_ullimd */ ++ ++#ifndef xnarch_nodiv_llimd ++static inline __attribute__((__const__)) long long ++xnarch_generic_nodiv_llimd(long long op, unsigned long long frac, ++ unsigned int integ) ++{ ++ long long ret; ++ int sign = 0; ++ ++ if (op < 0LL) { ++ sign = 1; ++ op = -op; ++ } ++ ret = xnarch_nodiv_ullimd(op, frac, integ); ++ ++ return sign ? -ret : ret; ++} ++#define xnarch_nodiv_llimd(ll,frac,integ) xnarch_generic_nodiv_llimd((ll),(frac),(integ)) ++#endif /* !xnarch_nodiv_llimd */ ++ ++#endif /* XNARCH_HAVE_NODIV_LLIMD */ ++ ++static inline void xnarch_init_llmulshft(const unsigned m_in, ++ const unsigned d_in, ++ unsigned *m_out, ++ unsigned *s_out) ++{ ++ /* ++ * Avoid clever compiler optimizations to occur when d is ++ * known at compile-time. The performance of this function is ++ * not critical since it is only called at init time. ++ */ ++ volatile unsigned int vol_d = d_in; ++ unsigned long long mult; ++ ++ *s_out = 31; ++ while (1) { ++ mult = ((unsigned long long)m_in) << *s_out; ++ do_div(mult, vol_d); ++ if (mult <= 0x7FFFFFFF) ++ break; ++ (*s_out)--; ++ } ++ *m_out = (unsigned int)mult; ++} ++ ++#define xnarch_ullmod(ull,uld,rem) ({ xnarch_ulldiv(ull,uld,rem); (*rem); }) ++#define xnarch_uldiv(ull, d) xnarch_uldivrem(ull, d, NULL) ++#define xnarch_ulmod(ull, d) ({ unsigned long _rem; \ ++ xnarch_uldivrem(ull,d,&_rem); _rem; }) ++ ++#define xnarch_div64(a,b) xnarch_divmod64((a),(b),NULL) ++#define xnarch_mod64(a,b) ({ unsigned long long _rem; \ ++ xnarch_divmod64((a),(b),&_rem); _rem; }) ++ ++#endif /* _COBALT_UAPI_ASM_GENERIC_ARITH_H */ +--- linux/include/xenomai/cobalt/uapi/mutex.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/cobalt/uapi/mutex.h 2021-04-07 16:01:28.354632556 +0800 +@@ -0,0 +1,44 @@ ++/* ++ * Written by Gilles Chanteperdrix . ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. ++ */ ++#ifndef _COBALT_UAPI_MUTEX_H ++#define _COBALT_UAPI_MUTEX_H ++ ++#include ++ ++#define COBALT_MUTEX_MAGIC 0x86860303 ++ ++struct cobalt_mutex_state { ++ atomic_t owner; ++ __u32 flags; ++#define COBALT_MUTEX_COND_SIGNAL 0x00000001 ++#define COBALT_MUTEX_ERRORCHECK 0x00000002 ++ __u32 ceiling; ++}; ++ ++union cobalt_mutex_union { ++ pthread_mutex_t native_mutex; ++ struct cobalt_mutex_shadow { ++ __u32 magic; ++ __u32 lockcnt; ++ __u32 state_offset; ++ xnhandle_t handle; ++ struct cobalt_mutexattr attr; ++ } shadow_mutex; ++}; ++ ++#endif /* !_COBALT_UAPI_MUTEX_H */ +--- linux/include/xenomai/cobalt/uapi/time.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/cobalt/uapi/time.h 2021-04-07 16:01:28.350632562 +0800 +@@ -0,0 +1,55 @@ ++/* ++ * Copyright (C) 2013 Philippe Gerum . ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. ++ */ ++#ifndef _COBALT_UAPI_TIME_H ++#define _COBALT_UAPI_TIME_H ++ ++#ifndef CLOCK_MONOTONIC_RAW ++#define CLOCK_MONOTONIC_RAW 4 ++#endif ++ ++/* ++ * Additional clock ids we manage are supposed not to collide with any ++ * of the POSIX and Linux kernel definitions so that no ambiguities ++ * arise when porting applications in both directions. ++ * ++ * 0 .. 31 regular POSIX/linux clock ids. ++ * 32 .. 63 statically reserved Cobalt clocks ++ * 64 .. 127 dynamically registered Cobalt clocks (external) ++ * ++ * CAUTION: clock ids must fit within a 7bit value, see ++ * include/cobalt/uapi/thread.h (e.g. cobalt_condattr). ++ */ ++#define __COBALT_CLOCK_STATIC(nr) ((clockid_t)(nr + 32)) ++ ++#define CLOCK_HOST_REALTIME __COBALT_CLOCK_STATIC(0) ++ ++#define COBALT_MAX_EXTCLOCKS 64 ++ ++#define __COBALT_CLOCK_EXT(nr) ((clockid_t)(nr) | (1 << 6)) ++#define __COBALT_CLOCK_EXT_P(id) ((int)(id) >= 64 && (int)(id) < 128) ++#define __COBALT_CLOCK_EXT_INDEX(id) ((int)(id) & ~(1 << 6)) ++ ++/* ++ * Additional timerfd defines ++ * ++ * when passing TFD_WAKEUP to timer_settime, any timer expiration ++ * unblocks the thread having issued timer_settime. ++ */ ++#define TFD_WAKEUP (1 << 2) ++ ++#endif /* !_COBALT_UAPI_TIME_H */ +--- linux/include/xenomai/cobalt/uapi/sem.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/cobalt/uapi/sem.h 2021-04-07 16:01:28.345632569 +0800 +@@ -0,0 +1,56 @@ ++/* ++ * Written by Gilles Chanteperdrix . ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. ++ */ ++#ifndef _COBALT_UAPI_SEM_H ++#define _COBALT_UAPI_SEM_H ++ ++#include ++ ++#define COBALT_SEM_MAGIC (0x86860707) ++#define COBALT_NAMED_SEM_MAGIC (0x86860D0D) ++ ++struct cobalt_sem; ++ ++struct cobalt_sem_state { ++ atomic_t value; ++ __u32 flags; ++}; ++ ++union cobalt_sem_union { ++ sem_t native_sem; ++ struct cobalt_sem_shadow { ++ __u32 magic; ++ __s32 state_offset; ++ xnhandle_t handle; ++ } shadow_sem; ++}; ++ ++struct cobalt_sem_info { ++ unsigned int value; ++ int flags; ++ int nrwait; ++}; ++ ++#define SEM_FIFO 0x1 ++#define SEM_PULSE 0x2 ++#define SEM_PSHARED 0x4 ++#define SEM_REPORT 0x8 ++#define SEM_WARNDEL 0x10 ++#define SEM_RAWCLOCK 0x20 ++#define SEM_NOBUSYDEL 0x40 ++ ++#endif /* !_COBALT_UAPI_SEM_H */ +--- linux/include/xenomai/cobalt/uapi/thread.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/cobalt/uapi/thread.h 2021-04-07 16:01:28.341632574 +0800 +@@ -0,0 +1,55 @@ ++/* ++ * Copyright (C) 2005 Philippe Gerum . ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. ++ */ ++#ifndef _COBALT_UAPI_THREAD_H ++#define _COBALT_UAPI_THREAD_H ++ ++#include ++ ++#define PTHREAD_WARNSW XNWARN ++#define PTHREAD_LOCK_SCHED XNLOCK ++#define PTHREAD_DISABLE_LOCKBREAK XNTRAPLB ++#define PTHREAD_CONFORMING 0 ++ ++struct cobalt_mutexattr { ++ int type : 3; ++ int protocol : 3; ++ int pshared : 1; ++ int __pad : 1; ++ int ceiling : 8; /* prio-1, (XN)SCHED_FIFO range. */ ++}; ++ ++struct cobalt_condattr { ++ int clock : 7; ++ int pshared : 1; ++}; ++ ++struct cobalt_threadstat { ++ __u64 xtime; ++ __u64 timeout; ++ __u64 msw; ++ __u64 csw; ++ __u64 xsc; ++ __u32 status; ++ __u32 pf; ++ int cpu; ++ int cprio; ++ char name[XNOBJECT_NAME_LEN]; ++ char personality[XNOBJECT_NAME_LEN]; ++}; ++ ++#endif /* !_COBALT_UAPI_THREAD_H */ +--- linux/include/xenomai/cobalt/uapi/monitor.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/cobalt/uapi/monitor.h 2021-04-07 16:01:28.336632582 +0800 +@@ -0,0 +1,46 @@ ++/* ++ * Copyright (C) 2013 Philippe Gerum . ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. ++ */ ++#ifndef _COBALT_UAPI_MONITOR_H ++#define _COBALT_UAPI_MONITOR_H ++ ++#include ++ ++struct cobalt_monitor_state { ++ atomic_t owner; ++ __u32 flags; ++#define COBALT_MONITOR_GRANTED 0x01 ++#define COBALT_MONITOR_DRAINED 0x02 ++#define COBALT_MONITOR_SIGNALED 0x03 /* i.e. GRANTED or DRAINED */ ++#define COBALT_MONITOR_BROADCAST 0x04 ++#define COBALT_MONITOR_PENDED 0x08 ++}; ++ ++struct cobalt_monitor; ++ ++struct cobalt_monitor_shadow { ++ __u32 state_offset; ++ __u32 flags; ++ xnhandle_t handle; ++#define COBALT_MONITOR_SHARED 0x1 ++#define COBALT_MONITOR_WAITGRANT 0x0 ++#define COBALT_MONITOR_WAITDRAIN 0x1 ++}; ++ ++typedef struct cobalt_monitor_shadow cobalt_monitor_t; ++ ++#endif /* !_COBALT_UAPI_MONITOR_H */ +--- linux/include/xenomai/cobalt/uapi/sched.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/cobalt/uapi/sched.h 2021-04-07 16:01:28.331632589 +0800 +@@ -0,0 +1,136 @@ ++/* ++ * Copyright (C) 2005 Philippe Gerum . ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. ++ */ ++#ifndef _COBALT_UAPI_SCHED_H ++#define _COBALT_UAPI_SCHED_H ++ ++#define SCHED_COBALT 42 ++#define SCHED_WEAK 43 ++ ++#ifndef SCHED_SPORADIC ++#define SCHED_SPORADIC 10 ++#define sched_ss_low_priority sched_u.ss.__sched_low_priority ++#define sched_ss_repl_period sched_u.ss.__sched_repl_period ++#define sched_ss_init_budget sched_u.ss.__sched_init_budget ++#define sched_ss_max_repl sched_u.ss.__sched_max_repl ++#endif /* !SCHED_SPORADIC */ ++ ++struct __sched_ss_param { ++ int __sched_low_priority; ++ struct timespec __sched_repl_period; ++ struct timespec __sched_init_budget; ++ int __sched_max_repl; ++}; ++ ++#define sched_rr_quantum sched_u.rr.__sched_rr_quantum ++ ++struct __sched_rr_param { ++ struct timespec __sched_rr_quantum; ++}; ++ ++#ifndef SCHED_TP ++#define SCHED_TP 11 ++#define sched_tp_partition sched_u.tp.__sched_partition ++#endif /* !SCHED_TP */ ++ ++struct __sched_tp_param { ++ int __sched_partition; ++}; ++ ++struct sched_tp_window { ++ struct timespec offset; ++ struct timespec duration; ++ int ptid; ++}; ++ ++enum { ++ sched_tp_install, ++ sched_tp_uninstall, ++ sched_tp_start, ++ sched_tp_stop, ++}; ++ ++struct __sched_config_tp { ++ int op; ++ int nr_windows; ++ struct sched_tp_window windows[0]; ++}; ++ ++#define sched_tp_confsz(nr_win) \ ++ (sizeof(struct __sched_config_tp) + nr_win * sizeof(struct sched_tp_window)) ++ ++#ifndef SCHED_QUOTA ++#define SCHED_QUOTA 12 ++#define sched_quota_group sched_u.quota.__sched_group ++#endif /* !SCHED_QUOTA */ ++ ++struct __sched_quota_param { ++ int __sched_group; ++}; ++ ++enum { ++ sched_quota_add, ++ sched_quota_remove, ++ sched_quota_force_remove, ++ sched_quota_set, ++ sched_quota_get, ++}; ++ ++struct __sched_config_quota { ++ int op; ++ union { ++ struct { ++ int pshared; ++ } add; ++ struct { ++ int tgid; ++ } remove; ++ struct { ++ int tgid; ++ int quota; ++ int quota_peak; ++ } set; ++ struct { ++ int tgid; ++ } get; ++ }; ++ struct __sched_quota_info { ++ int tgid; ++ int quota; ++ int quota_peak; ++ int quota_sum; ++ } info; ++}; ++ ++#define sched_quota_confsz() sizeof(struct __sched_config_quota) ++ ++struct sched_param_ex { ++ int sched_priority; ++ union { ++ struct __sched_ss_param ss; ++ struct __sched_rr_param rr; ++ struct __sched_tp_param tp; ++ struct __sched_quota_param quota; ++ } sched_u; ++}; ++ ++union sched_config { ++ struct __sched_config_tp tp; ++ struct __sched_config_quota quota; ++}; ++ ++#endif /* !_COBALT_UAPI_SCHED_H */ +--- linux/include/xenomai/cobalt/uapi/signal.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/cobalt/uapi/signal.h 2021-04-07 16:01:28.327632594 +0800 +@@ -0,0 +1,141 @@ ++/* ++ * Copyright (C) 2006 Gilles Chanteperdrix . ++ * Copyright (C) 2013 Philippe Gerum . ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. ++ */ ++#ifndef _COBALT_UAPI_SIGNAL_H ++#define _COBALT_UAPI_SIGNAL_H ++ ++/* ++ * Those are pseudo-signals only available with pthread_kill() to ++ * suspend/resume/unblock threads synchronously, force them out of ++ * primary mode or even demote them to the SCHED_OTHER class via the ++ * low-level nucleus interface. Can't block those signals, queue them, ++ * or even set them in a sigset. Those are nasty, strictly anti-POSIX ++ * things; we do provide them nevertheless only because we are mean ++ * people doing harmful code for no valid reason. Can't go against ++ * your nature, right? Nah... (this said, don't blame us for POSIX, ++ * we are not _that_ mean). ++ */ ++#define SIGSUSP (SIGRTMAX + 1) ++#define SIGRESM (SIGRTMAX + 2) ++#define SIGRELS (SIGRTMAX + 3) ++#define SIGKICK (SIGRTMAX + 4) ++#define SIGDEMT (SIGRTMAX + 5) ++ ++/* ++ * Regular POSIX signals with specific handling by Xenomai. ++ */ ++#define SIGSHADOW SIGWINCH ++#define sigshadow_action(code) ((code) & 0xff) ++#define sigshadow_arg(code) (((code) >> 8) & 0xff) ++#define sigshadow_int(action, arg) ((action) | ((arg) << 8)) ++ ++/* SIGSHADOW action codes. */ ++#define SIGSHADOW_ACTION_HARDEN 1 ++#define SIGSHADOW_ACTION_BACKTRACE 2 ++#define SIGSHADOW_ACTION_HOME 3 ++#define SIGSHADOW_BACKTRACE_DEPTH 16 ++ ++#define SIGDEBUG SIGXCPU ++#define sigdebug_code(si) ((si)->si_value.sival_int) ++#define sigdebug_reason(si) (sigdebug_code(si) & 0xff) ++#define sigdebug_marker 0xfccf0000 ++#define sigdebug_marked(si) \ ++ ((sigdebug_code(si) & 0xffff0000) == sigdebug_marker) ++ ++/* Possible values of sigdebug_reason() */ ++#define SIGDEBUG_UNDEFINED 0 ++#define SIGDEBUG_MIGRATE_SIGNAL 1 ++#define SIGDEBUG_MIGRATE_SYSCALL 2 ++#define SIGDEBUG_MIGRATE_FAULT 3 ++#define SIGDEBUG_MIGRATE_PRIOINV 4 ++#define SIGDEBUG_NOMLOCK 5 ++#define SIGDEBUG_WATCHDOG 6 ++#define SIGDEBUG_RESCNT_IMBALANCE 7 ++#define SIGDEBUG_LOCK_BREAK 8 ++#define SIGDEBUG_MUTEX_SLEEP 9 ++ ++#define COBALT_DELAYMAX 2147483647U ++ ++/* ++ * Internal accessors to extra siginfo/sigevent fields, extending some ++ * existing base field. The extra data should be grouped in a ++ * dedicated struct type. The extra space is taken from the padding ++ * area available from the original structure definitions. ++ * ++ * e.g. getting the address of the following extension to ++ * _sifields._rt from siginfo_t, ++ * ++ * struct bar { ++ * int foo; ++ * }; ++ * ++ * would be noted as: ++ * ++ * siginfo_t si; ++ * struct bar *p = __cobalt_si_extra(&si, _rt, struct bar); ++ * ++ * This code is shared between kernel and user space. Proper ++ * definitions of siginfo_t and sigevent_t should have been read prior ++ * to including this file. ++ * ++ * CAUTION: this macro does not handle alignment issues for the extra ++ * data. The extra type definition should take care of this. ++ */ ++#ifdef __OPTIMIZE__ ++extern void *__siginfo_overflow(void); ++static inline ++const void *__check_si_overflow(size_t fldsz, size_t extrasz, const void *p) ++{ ++ siginfo_t *si __attribute__((unused)); ++ ++ if (fldsz + extrasz <= sizeof(si->_sifields)) ++ return p; ++ ++ return __siginfo_overflow(); ++} ++#define __cobalt_si_extra(__si, __basefield, __type) \ ++ ((__type *)__check_si_overflow(sizeof(__si->_sifields.__basefield), \ ++ sizeof(__type), &(__si->_sifields.__basefield) + 1)) ++#else ++#define __cobalt_si_extra(__si, __basefield, __type) \ ++ ((__type *)((&__si->_sifields.__basefield) + 1)) ++#endif ++ ++/* Same approach, this time for extending sigevent_t. */ ++ ++#ifdef __OPTIMIZE__ ++extern void *__sigevent_overflow(void); ++static inline ++const void *__check_sev_overflow(size_t fldsz, size_t extrasz, const void *p) ++{ ++ sigevent_t *sev __attribute__((unused)); ++ ++ if (fldsz + extrasz <= sizeof(sev->_sigev_un)) ++ return p; ++ ++ return __sigevent_overflow(); ++} ++#define __cobalt_sev_extra(__sev, __basefield, __type) \ ++ ((__type *)__check_sev_overflow(sizeof(__sev->_sigev_un.__basefield), \ ++ sizeof(__type), &(__sev->_sigev_un.__basefield) + 1)) ++#else ++#define __cobalt_sev_extra(__sev, __basefield, __type) \ ++ ((__type *)((&__sev->_sigev_un.__basefield) + 1)) ++#endif ++ ++#endif /* !_COBALT_UAPI_SIGNAL_H */ +--- linux/include/xenomai/cobalt/kernel/schedqueue.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/cobalt/kernel/schedqueue.h 2021-04-07 16:01:28.210632762 +0800 +@@ -0,0 +1,106 @@ ++/* ++ * Copyright (C) 2008 Philippe Gerum . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#ifndef _COBALT_KERNEL_SCHEDQUEUE_H ++#define _COBALT_KERNEL_SCHEDQUEUE_H ++ ++#include ++ ++/** ++ * @addtogroup cobalt_core_sched ++ * @{ ++ */ ++ ++#define XNSCHED_CLASS_WEIGHT_FACTOR 1024 ++ ++#ifdef CONFIG_XENO_OPT_SCALABLE_SCHED ++ ++#include ++ ++/* ++ * Multi-level priority queue, suitable for handling the runnable ++ * thread queue of the core scheduling class with O(1) property. We ++ * only manage a descending queuing order, i.e. highest numbered ++ * priorities come first. ++ */ ++#define XNSCHED_MLQ_LEVELS 260 /* i.e. XNSCHED_CORE_NR_PRIO */ ++ ++struct xnsched_mlq { ++ int elems; ++ DECLARE_BITMAP(prio_map, XNSCHED_MLQ_LEVELS); ++ struct list_head heads[XNSCHED_MLQ_LEVELS]; ++}; ++ ++struct xnthread; ++ ++void xnsched_initq(struct xnsched_mlq *q); ++ ++void xnsched_addq(struct xnsched_mlq *q, ++ struct xnthread *thread); ++ ++void xnsched_addq_tail(struct xnsched_mlq *q, ++ struct xnthread *thread); ++ ++void xnsched_delq(struct xnsched_mlq *q, ++ struct xnthread *thread); ++ ++struct xnthread *xnsched_getq(struct xnsched_mlq *q); ++ ++static inline int xnsched_emptyq_p(struct xnsched_mlq *q) ++{ ++ return q->elems == 0; ++} ++ ++static inline int xnsched_weightq(struct xnsched_mlq *q) ++{ ++ return find_first_bit(q->prio_map, XNSCHED_MLQ_LEVELS); ++} ++ ++typedef struct xnsched_mlq xnsched_queue_t; ++ ++#else /* ! CONFIG_XENO_OPT_SCALABLE_SCHED */ ++ ++typedef struct list_head xnsched_queue_t; ++ ++#define xnsched_initq(__q) INIT_LIST_HEAD(__q) ++#define xnsched_emptyq_p(__q) list_empty(__q) ++#define xnsched_addq(__q, __t) list_add_prilf(__t, __q, cprio, rlink) ++#define xnsched_addq_tail(__q, __t) list_add_priff(__t, __q, cprio, rlink) ++#define xnsched_delq(__q, __t) (void)(__q), list_del(&(__t)->rlink) ++#define xnsched_getq(__q) \ ++ ({ \ ++ struct xnthread *__t = NULL; \ ++ if (!list_empty(__q)) \ ++ __t = list_get_entry(__q, struct xnthread, rlink); \ ++ __t; \ ++ }) ++#define xnsched_weightq(__q) \ ++ ({ \ ++ struct xnthread *__t; \ ++ __t = list_first_entry(__q, struct xnthread, rlink); \ ++ __t->cprio; \ ++ }) ++ ++ ++#endif /* !CONFIG_XENO_OPT_SCALABLE_SCHED */ ++ ++struct xnthread *xnsched_findq(xnsched_queue_t *q, int prio); ++ ++/** @} */ ++ ++#endif /* !_COBALT_KERNEL_SCHEDQUEUE_H */ +--- linux/include/xenomai/cobalt/kernel/stat.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/cobalt/kernel/stat.h 2021-04-07 16:01:28.205632769 +0800 +@@ -0,0 +1,152 @@ ++/* ++ * Copyright (C) 2006 Jan Kiszka . ++ * Copyright (C) 2006 Dmitry Adamushko . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#ifndef _COBALT_KERNEL_STAT_H ++#define _COBALT_KERNEL_STAT_H ++ ++#include ++ ++/** ++ * @ingroup cobalt_core_thread ++ * @defgroup cobalt_core_stat Thread runtime statistics ++ * @{ ++ */ ++#ifdef CONFIG_XENO_OPT_STATS ++ ++typedef struct xnstat_exectime { ++ ++ xnticks_t start; /* Start of execution time accumulation */ ++ ++ xnticks_t total; /* Accumulated execution time */ ++ ++} xnstat_exectime_t; ++ ++#define xnstat_percpu_data raw_cpu_ptr(nktimer.stats) ++ ++/* Return current date which can be passed to other xnstat services for ++ immediate or lazy accounting. */ ++#define xnstat_exectime_now() xnclock_core_read_raw() ++ ++/* Accumulate exectime of the current account until the given date. */ ++#define xnstat_exectime_update(sched, date) \ ++do { \ ++ (sched)->current_account->total += \ ++ date - (sched)->last_account_switch; \ ++ (sched)->last_account_switch = date; \ ++ /* All changes must be committed before changing the current_account \ ++ reference in sched (required for xnintr_sync_stat_references) */ \ ++ smp_wmb(); \ ++} while (0) ++ ++/* Update the current account reference, returning the previous one. */ ++#define xnstat_exectime_set_current(sched, new_account) \ ++({ \ ++ xnstat_exectime_t *__prev; \ ++ __prev = (xnstat_exectime_t *)atomic_long_xchg(&(sched)->current_account, (long)(new_account)); \ ++ __prev; \ ++}) ++ ++/* Return the currently active accounting entity. */ ++#define xnstat_exectime_get_current(sched) ((sched)->current_account) ++ ++/* Finalize an account (no need to accumulate the exectime, just mark the ++ switch date and set the new account). */ ++#define xnstat_exectime_finalize(sched, new_account) \ ++do { \ ++ (sched)->last_account_switch = xnclock_core_read_raw(); \ ++ (sched)->current_account = (new_account); \ ++} while (0) ++ ++/* Obtain content of xnstat_exectime_t */ ++#define xnstat_exectime_get_start(account) ((account)->start) ++#define xnstat_exectime_get_total(account) ((account)->total) ++ ++/* Obtain last account switch date of considered sched */ ++#define xnstat_exectime_get_last_switch(sched) ((sched)->last_account_switch) ++ ++/* Reset statistics from inside the accounted entity (e.g. after CPU ++ migration). */ ++#define xnstat_exectime_reset_stats(stat) \ ++do { \ ++ (stat)->total = 0; \ ++ (stat)->start = xnclock_core_read_raw(); \ ++} while (0) ++ ++ ++typedef struct xnstat_counter { ++ unsigned long counter; ++} xnstat_counter_t; ++ ++static inline unsigned long xnstat_counter_inc(xnstat_counter_t *c) ++{ ++ return c->counter++; ++} ++ ++static inline unsigned long xnstat_counter_get(xnstat_counter_t *c) ++{ ++ return c->counter; ++} ++ ++static inline void xnstat_counter_set(xnstat_counter_t *c, unsigned long value) ++{ ++ c->counter = value; ++} ++ ++#else /* !CONFIG_XENO_OPT_STATS */ ++typedef struct xnstat_exectime { ++} xnstat_exectime_t; ++ ++#define xnstat_percpu_data NULL ++#define xnstat_exectime_now() ({ 0; }) ++#define xnstat_exectime_update(sched, date) do { } while (0) ++#define xnstat_exectime_set_current(sched, new_account) ({ (void)sched; NULL; }) ++#define xnstat_exectime_get_current(sched) ({ (void)sched; NULL; }) ++#define xnstat_exectime_finalize(sched, new_account) do { } while (0) ++#define xnstat_exectime_get_start(account) ({ 0; }) ++#define xnstat_exectime_get_total(account) ({ 0; }) ++#define xnstat_exectime_get_last_switch(sched) ({ 0; }) ++#define xnstat_exectime_reset_stats(account) do { } while (0) ++ ++typedef struct xnstat_counter { ++} xnstat_counter_t; ++ ++#define xnstat_counter_inc(c) ({ do { } while(0); 0; }) ++#define xnstat_counter_get(c) ({ 0; }) ++#define xnstat_counter_set(c, value) do { } while (0) ++#endif /* CONFIG_XENO_OPT_STATS */ ++ ++/* Account the exectime of the current account until now, switch to ++ new_account, and return the previous one. */ ++#define xnstat_exectime_switch(sched, new_account) \ ++({ \ ++ xnstat_exectime_update(sched, xnstat_exectime_now()); \ ++ xnstat_exectime_set_current(sched, new_account); \ ++}) ++ ++/* Account the exectime of the current account until given start time, switch ++ to new_account, and return the previous one. */ ++#define xnstat_exectime_lazy_switch(sched, new_account, date) \ ++({ \ ++ xnstat_exectime_update(sched, date); \ ++ xnstat_exectime_set_current(sched, new_account); \ ++}) ++ ++/** @} */ ++ ++#endif /* !_COBALT_KERNEL_STAT_H */ +--- linux/include/xenomai/cobalt/kernel/sched-idle.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/cobalt/kernel/sched-idle.h 2021-04-07 16:01:28.200632776 +0800 +@@ -0,0 +1,71 @@ ++/* ++ * Copyright (C) 2008 Philippe Gerum . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#ifndef _COBALT_KERNEL_SCHED_IDLE_H ++#define _COBALT_KERNEL_SCHED_IDLE_H ++ ++#ifndef _COBALT_KERNEL_SCHED_H ++#error "please don't include cobalt/kernel/sched-idle.h directly" ++#endif ++ ++/** ++ * @addtogroup cobalt_core_sched ++ * @{ ++ */ ++ ++/* Idle priority level - actually never used for indexing. */ ++#define XNSCHED_IDLE_PRIO -1 ++ ++extern struct xnsched_class xnsched_class_idle; ++ ++static inline bool __xnsched_idle_setparam(struct xnthread *thread, ++ const union xnsched_policy_param *p) ++{ ++ xnthread_clear_state(thread, XNWEAK); ++ return xnsched_set_effective_priority(thread, p->idle.prio); ++} ++ ++static inline void __xnsched_idle_getparam(struct xnthread *thread, ++ union xnsched_policy_param *p) ++{ ++ p->idle.prio = thread->cprio; ++} ++ ++static inline void __xnsched_idle_trackprio(struct xnthread *thread, ++ const union xnsched_policy_param *p) ++{ ++ if (p) ++ /* Inheriting a priority-less class makes no sense. */ ++ XENO_WARN_ON_ONCE(COBALT, 1); ++ else ++ thread->cprio = XNSCHED_IDLE_PRIO; ++} ++ ++static inline void __xnsched_idle_protectprio(struct xnthread *thread, int prio) ++{ ++ XENO_WARN_ON_ONCE(COBALT, 1); ++} ++ ++static inline int xnsched_idle_init_thread(struct xnthread *thread) ++{ ++ return 0; ++} ++ ++/** @} */ ++ ++#endif /* !_COBALT_KERNEL_SCHED_IDLE_H */ +--- linux/include/xenomai/cobalt/kernel/vfile.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/cobalt/kernel/vfile.h 2021-04-07 16:01:28.196632782 +0800 +@@ -0,0 +1,667 @@ ++/* ++ * Copyright (C) 2010 Philippe Gerum ++ * ++ * Xenomai is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#ifndef _COBALT_KERNEL_VFILE_H ++#define _COBALT_KERNEL_VFILE_H ++ ++#if defined(CONFIG_XENO_OPT_VFILE) || defined(DOXYGEN_CPP) ++ ++#include ++#include ++#include ++ ++/** ++ * @addtogroup cobalt_core_vfile ++ * @{ ++ */ ++ ++struct xnvfile_directory; ++struct xnvfile_regular_iterator; ++struct xnvfile_snapshot_iterator; ++struct xnvfile_lock_ops; ++ ++struct xnvfile { ++ struct proc_dir_entry *pde; ++ struct file *file; ++ struct xnvfile_lock_ops *lockops; ++ int refcnt; ++ void *private; ++}; ++ ++/** ++ * @brief Vfile locking operations ++ * @anchor vfile_lockops ++ * ++ * This structure describes the operations to be provided for ++ * implementing locking support on vfiles. They apply to both ++ * snapshot-driven and regular vfiles. ++ */ ++struct xnvfile_lock_ops { ++ /** ++ * @anchor lockops_get ++ * This handler should grab the desired lock. ++ * ++ * @param vfile A pointer to the virtual file which needs ++ * locking. ++ * ++ * @return zero should be returned if the call ++ * succeeds. Otherwise, a negative error code can be returned; ++ * upon error, the current vfile operation is aborted, and the ++ * user-space caller is passed back the error value. ++ */ ++ int (*get)(struct xnvfile *vfile); ++ /** ++ * @anchor lockops_put This handler should release the lock ++ * previously grabbed by the @ref lockops_get "get() handler". ++ * ++ * @param vfile A pointer to the virtual file which currently ++ * holds the lock to release. ++ */ ++ void (*put)(struct xnvfile *vfile); ++}; ++ ++struct xnvfile_hostlock_class { ++ struct xnvfile_lock_ops ops; ++ struct mutex mutex; ++}; ++ ++struct xnvfile_nklock_class { ++ struct xnvfile_lock_ops ops; ++ spl_t s; ++}; ++ ++struct xnvfile_input { ++ const char __user *u_buf; ++ size_t size; ++ struct xnvfile *vfile; ++}; ++ ++/** ++ * @brief Regular vfile operation descriptor ++ * @anchor regular_ops ++ * ++ * This structure describes the operations available with a regular ++ * vfile. It defines handlers for sending back formatted kernel data ++ * upon a user-space read request, and for obtaining user data upon a ++ * user-space write request. ++ */ ++struct xnvfile_regular_ops { ++ /** ++ * @anchor regular_rewind This handler is called only once, ++ * when the virtual file is opened, before the @ref ++ * regular_begin "begin() handler" is invoked. ++ * ++ * @param it A pointer to the vfile iterator which will be ++ * used to read the file contents. ++ * ++ * @return Zero should be returned upon success. Otherwise, a ++ * negative error code aborts the operation, and is passed ++ * back to the reader. ++ * ++ * @note This handler is optional. It should not be used to ++ * allocate resources but rather to perform consistency ++ * checks, since no closure call is issued in case the open ++ * sequence eventually fails. ++ */ ++ int (*rewind)(struct xnvfile_regular_iterator *it); ++ /** ++ * @anchor regular_begin ++ * This handler should prepare for iterating over the records ++ * upon a read request, starting from the specified position. ++ * ++ * @param it A pointer to the current vfile iterator. On ++ * entry, it->pos is set to the (0-based) position of the ++ * first record to output. This handler may be called multiple ++ * times with different position requests. ++ * ++ * @return A pointer to the first record to format and output, ++ * to be passed to the @ref regular_show "show() handler" as ++ * its @a data parameter, if the call succeeds. Otherwise: ++ * ++ * - NULL in case no record is available, in which case the ++ * read operation will terminate immediately with no output. ++ * ++ * - VFILE_SEQ_START, a special value indicating that @ref ++ * regular_show "the show() handler" should receive a NULL ++ * data pointer first, in order to output a header. ++ * ++ * - ERR_PTR(errno), where errno is a negative error code; ++ * upon error, the current operation will be aborted ++ * immediately. ++ * ++ * @note This handler is optional; if none is given in the ++ * operation descriptor (i.e. NULL value), the @ref ++ * regular_show "show() handler()" will be called only once ++ * for a read operation, with a NULL @a data parameter. This ++ * particular setting is convenient for simple regular vfiles ++ * having a single, fixed record to output. ++ */ ++ void *(*begin)(struct xnvfile_regular_iterator *it); ++ /** ++ * @anchor regular_next ++ * This handler should return the address of the next record ++ * to format and output by the @ref regular_show "show() ++ * handler". ++ * ++ * @param it A pointer to the current vfile iterator. On ++ * entry, it->pos is set to the (0-based) position of the ++ * next record to output. ++ * ++ * @return A pointer to the next record to format and output, ++ * to be passed to the @ref regular_show "show() handler" as ++ * its @a data parameter, if the call succeeds. Otherwise: ++ * ++ * - NULL in case no record is available, in which case the ++ * read operation will terminate immediately with no output. ++ * ++ * - ERR_PTR(errno), where errno is a negative error code; ++ * upon error, the current operation will be aborted ++ * immediately. ++ * ++ * @note This handler is optional; if none is given in the ++ * operation descriptor (i.e. NULL value), the read operation ++ * will stop after the first invocation of the @ref regular_show ++ * "show() handler". ++ */ ++ void *(*next)(struct xnvfile_regular_iterator *it); ++ /** ++ * @anchor regular_end ++ * This handler is called after all records have been output. ++ * ++ * @param it A pointer to the current vfile iterator. ++ * ++ * @note This handler is optional and the pointer may be NULL. ++ */ ++ void (*end)(struct xnvfile_regular_iterator *it); ++ /** ++ * @anchor regular_show ++ * This handler should format and output a record. ++ * ++ * xnvfile_printf(), xnvfile_write(), xnvfile_puts() and ++ * xnvfile_putc() are available to format and/or emit the ++ * output. All routines take the iterator argument @a it as ++ * their first parameter. ++ * ++ * @param it A pointer to the current vfile iterator. ++ * ++ * @param data A pointer to the record to format then ++ * output. The first call to the handler may receive a NULL @a ++ * data pointer, depending on the presence and/or return of a ++ * @ref regular_begin "hander"; the show handler should test ++ * this special value to output any header that fits, prior to ++ * receiving more calls with actual records. ++ * ++ * @return zero if the call succeeds, also indicating that the ++ * handler should be called for the next record if ++ * any. Otherwise: ++ * ++ * - A negative error code. This will abort the output phase, ++ * and return this status to the reader. ++ * ++ * - VFILE_SEQ_SKIP, a special value indicating that the ++ * current record should be skipped and will not be output. ++ */ ++ int (*show)(struct xnvfile_regular_iterator *it, void *data); ++ /** ++ * @anchor regular_store ++ * This handler receives data written to the vfile, likely for ++ * updating some kernel setting, or triggering any other ++ * action which fits. This is the only handler which deals ++ * with the write-side of a vfile. It is called when writing ++ * to the /proc entry of the vfile from a user-space process. ++ * ++ * The input data is described by a descriptor passed to the ++ * handler, which may be subsequently passed to parsing helper ++ * routines. For instance, xnvfile_get_string() will accept ++ * the input descriptor for returning the written data as a ++ * null-terminated character string. On the other hand, ++ * xnvfile_get_integer() will attempt to return a long integer ++ * from the input data. ++ * ++ * @param input A pointer to an input descriptor. It refers to ++ * an opaque data from the handler's standpoint. ++ * ++ * @return the number of bytes read from the input descriptor ++ * if the call succeeds. Otherwise, a negative error code. ++ * Return values from parsing helper routines are commonly ++ * passed back to the caller by the @ref regular_store ++ * "store() handler". ++ * ++ * @note This handler is optional, and may be omitted for ++ * read-only vfiles. ++ */ ++ ssize_t (*store)(struct xnvfile_input *input); ++}; ++ ++struct xnvfile_regular { ++ struct xnvfile entry; ++ size_t privsz; ++ struct xnvfile_regular_ops *ops; ++}; ++ ++struct xnvfile_regular_template { ++ size_t privsz; ++ struct xnvfile_regular_ops *ops; ++ struct xnvfile_lock_ops *lockops; ++}; ++ ++/** ++ * @brief Regular vfile iterator ++ * @anchor regular_iterator ++ * ++ * This structure defines an iterator over a regular vfile. ++ */ ++struct xnvfile_regular_iterator { ++ /** Current record position while iterating. */ ++ loff_t pos; ++ /** Backlink to the host sequential file supporting the vfile. */ ++ struct seq_file *seq; ++ /** Backlink to the vfile being read. */ ++ struct xnvfile_regular *vfile; ++ /** ++ * Start of private area. Use xnvfile_iterator_priv() to ++ * address it. ++ */ ++ char private[0]; ++}; ++ ++/** ++ * @brief Snapshot vfile operation descriptor ++ * @anchor snapshot_ops ++ * ++ * This structure describes the operations available with a ++ * snapshot-driven vfile. It defines handlers for returning a ++ * printable snapshot of some Xenomai object contents upon a ++ * user-space read request, and for updating this object upon a ++ * user-space write request. ++ */ ++struct xnvfile_snapshot_ops { ++ /** ++ * @anchor snapshot_rewind ++ * This handler (re-)initializes the data collection, moving ++ * the seek pointer at the first record. When the file ++ * revision tag is touched while collecting data, the current ++ * reading is aborted, all collected data dropped, and the ++ * vfile is eventually rewound. ++ * ++ * @param it A pointer to the current snapshot iterator. Two ++ * useful information can be retrieved from this iterator in ++ * this context: ++ * ++ * - it->vfile is a pointer to the descriptor of the virtual ++ * file being rewound. ++ * ++ * - xnvfile_iterator_priv(it) returns a pointer to the ++ * private data area, available from the descriptor, which ++ * size is vfile->privsz. If the latter size is zero, the ++ * returned pointer is meaningless and should not be used. ++ * ++ * @return A negative error code aborts the data collection, ++ * and is passed back to the reader. Otherwise: ++ * ++ * - a strictly positive value is interpreted as the total ++ * number of records which will be returned by the @ref ++ * snapshot_next "next() handler" during the data collection ++ * phase. If no @ref snapshot_begin "begin() handler" is ++ * provided in the @ref snapshot_ops "operation descriptor", ++ * this value is used to allocate the snapshot buffer ++ * internally. The size of this buffer would then be ++ * vfile->datasz * value. ++ * ++ * - zero leaves the allocation to the @ref snapshot_begin ++ * "begin() handler" if present, or indicates that no record ++ * is to be output in case such handler is not given. ++ * ++ * @note This handler is optional; a NULL value indicates that ++ * nothing needs to be done for rewinding the vfile. It is ++ * called with the vfile lock held. ++ */ ++ int (*rewind)(struct xnvfile_snapshot_iterator *it); ++ /** ++ * @anchor snapshot_begin ++ * This handler should allocate the snapshot buffer to hold ++ * records during the data collection phase. When specified, ++ * all records collected via the @ref snapshot_next "next() ++ * handler" will be written to a cell from the memory area ++ * returned by begin(). ++ * ++ * @param it A pointer to the current snapshot iterator. ++ * ++ * @return A pointer to the record buffer, if the call ++ * succeeds. Otherwise: ++ * ++ * - NULL in case of allocation error. This will abort the data ++ * collection, and return -ENOMEM to the reader. ++ * ++ * - VFILE_SEQ_EMPTY, a special value indicating that no ++ * record will be output. In such a case, the @ref ++ * snapshot_next "next() handler" will not be called, and the ++ * data collection will stop immediately. However, the @ref ++ * snapshot_show "show() handler" will still be called once, ++ * with a NULL data pointer (i.e. header display request). ++ * ++ * @note This handler is optional; if none is given, an ++ * internal allocation depending on the value returned by the ++ * @ref snapshot_rewind "rewind() handler" can be obtained. ++ */ ++ void *(*begin)(struct xnvfile_snapshot_iterator *it); ++ /** ++ * @anchor snapshot_end ++ * This handler releases the memory buffer previously obtained ++ * from begin(). It is usually called after the snapshot data ++ * has been output by show(), but it may also be called before ++ * rewinding the vfile after a revision change, to release the ++ * dropped buffer. ++ * ++ * @param it A pointer to the current snapshot iterator. ++ * ++ * @param buf A pointer to the buffer to release. ++ * ++ * @note This routine is optional and the pointer may be ++ * NULL. It is not needed upon internal buffer allocation; ++ * see the description of the @ref snapshot_rewind "rewind() ++ * handler". ++ */ ++ void (*end)(struct xnvfile_snapshot_iterator *it, void *buf); ++ /** ++ * @anchor snapshot_next ++ * This handler fetches the next record, as part of the ++ * snapshot data to be sent back to the reader via the ++ * show(). ++ * ++ * @param it A pointer to the current snapshot iterator. ++ * ++ * @param data A pointer to the record to fill in. ++ * ++ * @return a strictly positive value, if the call succeeds and ++ * leaves a valid record into @a data, which should be passed ++ * to the @ref snapshot_show "show() handler()" during the ++ * formatting and output phase. Otherwise: ++ * ++ * - A negative error code. This will abort the data ++ * collection, and return this status to the reader. ++ * ++ * - VFILE_SEQ_SKIP, a special value indicating that the ++ * current record should be skipped. In such a case, the @a ++ * data pointer is not advanced to the next position before ++ * the @ref snapshot_next "next() handler" is called anew. ++ * ++ * @note This handler is called with the vfile lock ++ * held. Before each invocation of this handler, the vfile ++ * core checks whether the revision tag has been touched, in ++ * which case the data collection is restarted from scratch. A ++ * data collection phase succeeds whenever all records can be ++ * fetched via the @ref snapshot_next "next() handler", while ++ * the revision tag remains unchanged, which indicates that a ++ * consistent snapshot of the object state was taken. ++ */ ++ int (*next)(struct xnvfile_snapshot_iterator *it, void *data); ++ /** ++ * @anchor snapshot_show ++ * This handler should format and output a record from the ++ * collected data. ++ * ++ * xnvfile_printf(), xnvfile_write(), xnvfile_puts() and ++ * xnvfile_putc() are available to format and/or emit the ++ * output. All routines take the iterator argument @a it as ++ * their first parameter. ++ * ++ * @param it A pointer to the current snapshot iterator. ++ * ++ * @param data A pointer to the record to format then ++ * output. The first call to the handler is always passed a ++ * NULL @a data pointer; the show handler should test this ++ * special value to output any header that fits, prior to ++ * receiving more calls with actual records. ++ * ++ * @return zero if the call succeeds, also indicating that the ++ * handler should be called for the next record if ++ * any. Otherwise: ++ * ++ * - A negative error code. This will abort the output phase, ++ * and return this status to the reader. ++ * ++ * - VFILE_SEQ_SKIP, a special value indicating that the ++ * current record should be skipped and will not be output. ++ */ ++ int (*show)(struct xnvfile_snapshot_iterator *it, void *data); ++ /** ++ * @anchor snapshot_store ++ * This handler receives data written to the vfile, likely for ++ * updating the associated Xenomai object's state, or ++ * triggering any other action which fits. This is the only ++ * handler which deals with the write-side of a vfile. It is ++ * called when writing to the /proc entry of the vfile ++ * from a user-space process. ++ * ++ * The input data is described by a descriptor passed to the ++ * handler, which may be subsequently passed to parsing helper ++ * routines. For instance, xnvfile_get_string() will accept ++ * the input descriptor for returning the written data as a ++ * null-terminated character string. On the other hand, ++ * xnvfile_get_integer() will attempt to return a long integer ++ * from the input data. ++ * ++ * @param input A pointer to an input descriptor. It refers to ++ * an opaque data from the handler's standpoint. ++ * ++ * @return the number of bytes read from the input descriptor ++ * if the call succeeds. Otherwise, a negative error code. ++ * Return values from parsing helper routines are commonly ++ * passed back to the caller by the @ref snapshot_store ++ * "store() handler". ++ * ++ * @note This handler is optional, and may be omitted for ++ * read-only vfiles. ++ */ ++ ssize_t (*store)(struct xnvfile_input *input); ++}; ++ ++/** ++ * @brief Snapshot revision tag ++ * @anchor revision_tag ++ * ++ * This structure defines a revision tag to be used with @ref ++ * snapshot_vfile "snapshot-driven vfiles". ++ */ ++struct xnvfile_rev_tag { ++ /** Current revision number. */ ++ int rev; ++}; ++ ++struct xnvfile_snapshot_template { ++ size_t privsz; ++ size_t datasz; ++ struct xnvfile_rev_tag *tag; ++ struct xnvfile_snapshot_ops *ops; ++ struct xnvfile_lock_ops *lockops; ++}; ++ ++/** ++ * @brief Snapshot vfile descriptor ++ * @anchor snapshot_vfile ++ * ++ * This structure describes a snapshot-driven vfile. Reading from ++ * such a vfile involves a preliminary data collection phase under ++ * lock protection, and a subsequent formatting and output phase of ++ * the collected data records. Locking is done in a way that does not ++ * increase worst-case latency, regardless of the number of records to ++ * be collected for output. ++ */ ++struct xnvfile_snapshot { ++ struct xnvfile entry; ++ size_t privsz; ++ size_t datasz; ++ struct xnvfile_rev_tag *tag; ++ struct xnvfile_snapshot_ops *ops; ++}; ++ ++/** ++ * @brief Snapshot-driven vfile iterator ++ * @anchor snapshot_iterator ++ * ++ * This structure defines an iterator over a snapshot-driven vfile. ++ */ ++struct xnvfile_snapshot_iterator { ++ /** Number of collected records. */ ++ int nrdata; ++ /** Address of record buffer. */ ++ caddr_t databuf; ++ /** Backlink to the host sequential file supporting the vfile. */ ++ struct seq_file *seq; ++ /** Backlink to the vfile being read. */ ++ struct xnvfile_snapshot *vfile; ++ /** Buffer release handler. */ ++ void (*endfn)(struct xnvfile_snapshot_iterator *it, void *buf); ++ /** ++ * Start of private area. Use xnvfile_iterator_priv() to ++ * address it. ++ */ ++ char private[0]; ++}; ++ ++struct xnvfile_directory { ++ struct xnvfile entry; ++}; ++ ++struct xnvfile_link { ++ struct xnvfile entry; ++}; ++ ++/* vfile.begin()=> */ ++#define VFILE_SEQ_EMPTY ((void *)-1) ++/* =>vfile.show() */ ++#define VFILE_SEQ_START SEQ_START_TOKEN ++/* vfile.next/show()=> */ ++#define VFILE_SEQ_SKIP 2 ++ ++#define xnvfile_printf(it, args...) seq_printf((it)->seq, ##args) ++#define xnvfile_write(it, data, len) seq_write((it)->seq, (data),(len)) ++#define xnvfile_puts(it, s) seq_puts((it)->seq, (s)) ++#define xnvfile_putc(it, c) seq_putc((it)->seq, (c)) ++ ++static inline void xnvfile_touch_tag(struct xnvfile_rev_tag *tag) ++{ ++ tag->rev++; ++} ++ ++static inline void xnvfile_touch(struct xnvfile_snapshot *vfile) ++{ ++ xnvfile_touch_tag(vfile->tag); ++} ++ ++#define xnvfile_noentry \ ++ { \ ++ .pde = NULL, \ ++ .private = NULL, \ ++ .file = NULL, \ ++ .refcnt = 0, \ ++ } ++ ++#define xnvfile_nodir { .entry = xnvfile_noentry } ++#define xnvfile_nolink { .entry = xnvfile_noentry } ++#define xnvfile_nofile { .entry = xnvfile_noentry } ++ ++#define xnvfile_priv(e) ((e)->entry.private) ++#define xnvfile_nref(e) ((e)->entry.refcnt) ++#define xnvfile_file(e) ((e)->entry.file) ++#define xnvfile_iterator_priv(it) ((void *)(&(it)->private)) ++ ++extern struct xnvfile_nklock_class xnvfile_nucleus_lock; ++ ++extern struct xnvfile_directory cobalt_vfroot; ++ ++int xnvfile_init_root(void); ++ ++void xnvfile_destroy_root(void); ++ ++int xnvfile_init_snapshot(const char *name, ++ struct xnvfile_snapshot *vfile, ++ struct xnvfile_directory *parent); ++ ++int xnvfile_init_regular(const char *name, ++ struct xnvfile_regular *vfile, ++ struct xnvfile_directory *parent); ++ ++int xnvfile_init_dir(const char *name, ++ struct xnvfile_directory *vdir, ++ struct xnvfile_directory *parent); ++ ++int xnvfile_init_link(const char *from, ++ const char *to, ++ struct xnvfile_link *vlink, ++ struct xnvfile_directory *parent); ++ ++void xnvfile_destroy(struct xnvfile *vfile); ++ ++ssize_t xnvfile_get_blob(struct xnvfile_input *input, ++ void *data, size_t size); ++ ++ssize_t xnvfile_get_string(struct xnvfile_input *input, ++ char *s, size_t maxlen); ++ ++ssize_t xnvfile_get_integer(struct xnvfile_input *input, long *valp); ++ ++int __vfile_hostlock_get(struct xnvfile *vfile); ++ ++void __vfile_hostlock_put(struct xnvfile *vfile); ++ ++static inline ++void xnvfile_destroy_snapshot(struct xnvfile_snapshot *vfile) ++{ ++ xnvfile_destroy(&vfile->entry); ++} ++ ++static inline ++void xnvfile_destroy_regular(struct xnvfile_regular *vfile) ++{ ++ xnvfile_destroy(&vfile->entry); ++} ++ ++static inline ++void xnvfile_destroy_dir(struct xnvfile_directory *vdir) ++{ ++ xnvfile_destroy(&vdir->entry); ++} ++ ++static inline ++void xnvfile_destroy_link(struct xnvfile_link *vlink) ++{ ++ xnvfile_destroy(&vlink->entry); ++} ++ ++#define DEFINE_VFILE_HOSTLOCK(name) \ ++ struct xnvfile_hostlock_class name = { \ ++ .ops = { \ ++ .get = __vfile_hostlock_get, \ ++ .put = __vfile_hostlock_put, \ ++ }, \ ++ .mutex = __MUTEX_INITIALIZER(name.mutex), \ ++ } ++ ++#else /* !CONFIG_XENO_OPT_VFILE */ ++ ++#define xnvfile_touch_tag(tag) do { } while (0) ++ ++#define xnvfile_touch(vfile) do { } while (0) ++ ++#endif /* !CONFIG_XENO_OPT_VFILE */ ++ ++/** @} */ ++ ++#endif /* !_COBALT_KERNEL_VFILE_H */ +--- linux/include/xenomai/cobalt/kernel/sched-rt.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/cobalt/kernel/sched-rt.h 2021-04-07 16:01:28.191632789 +0800 +@@ -0,0 +1,150 @@ ++/* ++ * Copyright (C) 2008 Philippe Gerum . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#ifndef _COBALT_KERNEL_SCHED_RT_H ++#define _COBALT_KERNEL_SCHED_RT_H ++ ++#ifndef _COBALT_KERNEL_SCHED_H ++#error "please don't include cobalt/kernel/sched-rt.h directly" ++#endif ++ ++/** ++ * @addtogroup cobalt_core_sched ++ * @{ ++ */ ++ ++/* ++ * Global priority scale for Xenomai's core scheduling class, ++ * available to SCHED_COBALT members. ++ */ ++#define XNSCHED_CORE_MIN_PRIO 0 ++#define XNSCHED_CORE_MAX_PRIO 259 ++#define XNSCHED_CORE_NR_PRIO \ ++ (XNSCHED_CORE_MAX_PRIO - XNSCHED_CORE_MIN_PRIO + 1) ++ ++/* ++ * Priority range for SCHED_FIFO, and all other classes Cobalt ++ * implements except SCHED_COBALT. ++ */ ++#define XNSCHED_FIFO_MIN_PRIO 1 ++#define XNSCHED_FIFO_MAX_PRIO 256 ++ ++#if XNSCHED_CORE_NR_PRIO > XNSCHED_CLASS_WEIGHT_FACTOR || \ ++ (defined(CONFIG_XENO_OPT_SCALABLE_SCHED) && \ ++ XNSCHED_CORE_NR_PRIO > XNSCHED_MLQ_LEVELS) ++#error "XNSCHED_MLQ_LEVELS is too low" ++#endif ++ ++extern struct xnsched_class xnsched_class_rt; ++ ++static inline void __xnsched_rt_requeue(struct xnthread *thread) ++{ ++ xnsched_addq(&thread->sched->rt.runnable, thread); ++} ++ ++static inline void __xnsched_rt_enqueue(struct xnthread *thread) ++{ ++ xnsched_addq_tail(&thread->sched->rt.runnable, thread); ++} ++ ++static inline void __xnsched_rt_dequeue(struct xnthread *thread) ++{ ++ xnsched_delq(&thread->sched->rt.runnable, thread); ++} ++ ++static inline void __xnsched_rt_track_weakness(struct xnthread *thread) ++{ ++ /* ++ * We have to track threads exiting weak scheduling, i.e. any ++ * thread leaving the WEAK class code if compiled in, or ++ * assigned a zero priority if weak threads are hosted by the ++ * RT class. ++ * ++ * CAUTION: since we need to check the effective priority ++ * level for determining the weakness state, this can only ++ * apply to non-boosted threads. ++ */ ++ if (IS_ENABLED(CONFIG_XENO_OPT_SCHED_WEAK) || thread->cprio) ++ xnthread_clear_state(thread, XNWEAK); ++ else ++ xnthread_set_state(thread, XNWEAK); ++} ++ ++static inline bool __xnsched_rt_setparam(struct xnthread *thread, ++ const union xnsched_policy_param *p) ++{ ++ bool ret = xnsched_set_effective_priority(thread, p->rt.prio); ++ ++ if (!xnthread_test_state(thread, XNBOOST)) ++ __xnsched_rt_track_weakness(thread); ++ ++ return ret; ++} ++ ++static inline void __xnsched_rt_getparam(struct xnthread *thread, ++ union xnsched_policy_param *p) ++{ ++ p->rt.prio = thread->cprio; ++} ++ ++static inline void __xnsched_rt_trackprio(struct xnthread *thread, ++ const union xnsched_policy_param *p) ++{ ++ if (p) ++ thread->cprio = p->rt.prio; /* Force update. */ ++ else { ++ thread->cprio = thread->bprio; ++ /* Leaving PI/PP, so non-boosted by definition. */ ++ __xnsched_rt_track_weakness(thread); ++ } ++} ++ ++static inline void __xnsched_rt_protectprio(struct xnthread *thread, int prio) ++{ ++ /* ++ * The RT class supports the widest priority range from ++ * XNSCHED_CORE_MIN_PRIO to XNSCHED_CORE_MAX_PRIO inclusive, ++ * no need to cap the input value which is guaranteed to be in ++ * the range [1..XNSCHED_CORE_MAX_PRIO]. ++ */ ++ thread->cprio = prio; ++} ++ ++static inline void __xnsched_rt_forget(struct xnthread *thread) ++{ ++} ++ ++static inline int xnsched_rt_init_thread(struct xnthread *thread) ++{ ++ return 0; ++} ++ ++#ifdef CONFIG_XENO_OPT_SCHED_CLASSES ++struct xnthread *xnsched_rt_pick(struct xnsched *sched); ++#else ++static inline struct xnthread *xnsched_rt_pick(struct xnsched *sched) ++{ ++ return xnsched_getq(&sched->rt.runnable); ++} ++#endif ++ ++void xnsched_rt_tick(struct xnsched *sched); ++ ++/** @} */ ++ ++#endif /* !_COBALT_KERNEL_SCHED_RT_H */ +--- linux/include/xenomai/cobalt/kernel/vdso.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/cobalt/kernel/vdso.h 2021-04-07 16:01:28.186632796 +0800 +@@ -0,0 +1,45 @@ ++/* ++ * Copyright (C) 2009 Wolfgang Mauerer . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#ifndef _COBALT_KERNEL_VDSO_H ++#define _COBALT_KERNEL_VDSO_H ++ ++#include ++#include ++#include ++#include ++#include ++ ++/* ++ * Define the available feature set here. We have a single feature ++ * defined for now. ++ */ ++#ifdef CONFIG_XENO_OPT_HOSTRT ++#define XNVDSO_FEATURES XNVDSO_FEAT_HOST_REALTIME ++#else ++#define XNVDSO_FEATURES 0 ++#endif /* CONFIG_XENO_OPT_HOSTRT */ ++ ++extern struct xnvdso *nkvdso; ++ ++static inline struct xnvdso_hostrt_data *get_hostrt_data(void) ++{ ++ return &nkvdso->hostrt_data; ++} ++ ++#endif /* _COBALT_KERNEL_VDSO_H */ +--- linux/include/xenomai/cobalt/kernel/synch.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/cobalt/kernel/synch.h 2021-04-07 16:01:28.181632803 +0800 +@@ -0,0 +1,179 @@ ++/* ++ * Copyright (C) 2001,2002,2003 Philippe Gerum . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#ifndef _COBALT_KERNEL_SYNCH_H ++#define _COBALT_KERNEL_SYNCH_H ++ ++#include ++#include ++#include ++#include ++#include ++ ++/** ++ * @addtogroup cobalt_core_synch ++ * @{ ++ */ ++#define XNSYNCH_CLAIMED 0x100 /* Claimed by other thread(s) (PI) */ ++#define XNSYNCH_CEILING 0x200 /* Actively boosting (PP) */ ++ ++/* Spare flags usable by upper interfaces */ ++#define XNSYNCH_SPARE0 0x01000000 ++#define XNSYNCH_SPARE1 0x02000000 ++#define XNSYNCH_SPARE2 0x04000000 ++#define XNSYNCH_SPARE3 0x08000000 ++#define XNSYNCH_SPARE4 0x10000000 ++#define XNSYNCH_SPARE5 0x20000000 ++#define XNSYNCH_SPARE6 0x40000000 ++#define XNSYNCH_SPARE7 0x80000000 ++ ++/* Statuses */ ++#define XNSYNCH_DONE 0 /* Resource available / operation complete */ ++#define XNSYNCH_WAIT 1 /* Calling thread blocked -- start rescheduling */ ++#define XNSYNCH_RESCHED 2 /* Force rescheduling */ ++ ++struct xnthread; ++struct xnsynch; ++ ++struct xnsynch { ++ /** wait (weighted) prio in thread->boosters */ ++ int wprio; ++ /** thread->boosters */ ++ struct list_head next; ++ /** ++ * &variable holding the current priority ceiling value ++ * (xnsched_class_rt-based, [1..255], XNSYNCH_PP). ++ */ ++ u32 *ceiling_ref; ++ /** Status word */ ++ unsigned long status; ++ /** Pending threads */ ++ struct list_head pendq; ++ /** Thread which owns the resource */ ++ struct xnthread *owner; ++ /** Pointer to fast lock word */ ++ atomic_t *fastlock; ++ /* Cleanup handler */ ++ void (*cleanup)(struct xnsynch *synch); ++}; ++ ++#define XNSYNCH_WAITQUEUE_INITIALIZER(__name) { \ ++ .status = XNSYNCH_PRIO, \ ++ .wprio = -1, \ ++ .pendq = LIST_HEAD_INIT((__name).pendq), \ ++ .owner = NULL, \ ++ .cleanup = NULL, \ ++ .fastlock = NULL, \ ++ } ++ ++#define DEFINE_XNWAITQ(__name) \ ++ struct xnsynch __name = XNSYNCH_WAITQUEUE_INITIALIZER(__name) ++ ++static inline void xnsynch_set_status(struct xnsynch *synch, int bits) ++{ ++ synch->status |= bits; ++} ++ ++static inline void xnsynch_clear_status(struct xnsynch *synch, int bits) ++{ ++ synch->status &= ~bits; ++} ++ ++#define xnsynch_for_each_sleeper(__pos, __synch) \ ++ list_for_each_entry(__pos, &(__synch)->pendq, plink) ++ ++#define xnsynch_for_each_sleeper_safe(__pos, __tmp, __synch) \ ++ list_for_each_entry_safe(__pos, __tmp, &(__synch)->pendq, plink) ++ ++static inline int xnsynch_pended_p(struct xnsynch *synch) ++{ ++ return !list_empty(&synch->pendq); ++} ++ ++static inline struct xnthread *xnsynch_owner(struct xnsynch *synch) ++{ ++ return synch->owner; ++} ++ ++#define xnsynch_fastlock(synch) ((synch)->fastlock) ++#define xnsynch_fastlock_p(synch) ((synch)->fastlock != NULL) ++#define xnsynch_owner_check(synch, thread) \ ++ xnsynch_fast_owner_check((synch)->fastlock, thread->handle) ++ ++#ifdef CONFIG_XENO_OPT_DEBUG_MUTEX_RELAXED ++ ++void xnsynch_detect_relaxed_owner(struct xnsynch *synch, ++ struct xnthread *sleeper); ++ ++void xnsynch_detect_boosted_relax(struct xnthread *owner); ++ ++#else /* !CONFIG_XENO_OPT_DEBUG_MUTEX_RELAXED */ ++ ++static inline void xnsynch_detect_relaxed_owner(struct xnsynch *synch, ++ struct xnthread *sleeper) { } ++ ++static inline void xnsynch_detect_boosted_relax(struct xnthread *owner) { } ++ ++#endif /* !CONFIG_XENO_OPT_DEBUG_MUTEX_RELAXED */ ++ ++void xnsynch_init(struct xnsynch *synch, int flags, ++ atomic_t *fastlock); ++ ++void xnsynch_init_protect(struct xnsynch *synch, int flags, ++ atomic_t *fastlock, u32 *ceiling_ref); ++ ++int xnsynch_destroy(struct xnsynch *synch); ++ ++void xnsynch_commit_ceiling(struct xnthread *curr); ++ ++static inline void xnsynch_register_cleanup(struct xnsynch *synch, ++ void (*handler)(struct xnsynch *)) ++{ ++ synch->cleanup = handler; ++} ++ ++int __must_check xnsynch_sleep_on(struct xnsynch *synch, ++ xnticks_t timeout, ++ xntmode_t timeout_mode); ++ ++struct xnthread *xnsynch_wakeup_one_sleeper(struct xnsynch *synch); ++ ++int xnsynch_wakeup_many_sleepers(struct xnsynch *synch, int nr); ++ ++void xnsynch_wakeup_this_sleeper(struct xnsynch *synch, ++ struct xnthread *sleeper); ++ ++int __must_check xnsynch_acquire(struct xnsynch *synch, ++ xnticks_t timeout, ++ xntmode_t timeout_mode); ++ ++int __must_check xnsynch_try_acquire(struct xnsynch *synch); ++ ++bool xnsynch_release(struct xnsynch *synch, struct xnthread *thread); ++ ++struct xnthread *xnsynch_peek_pendq(struct xnsynch *synch); ++ ++int xnsynch_flush(struct xnsynch *synch, int reason); ++ ++void xnsynch_requeue_sleeper(struct xnthread *thread); ++ ++void xnsynch_forget_sleeper(struct xnthread *thread); ++ ++/** @} */ ++ ++#endif /* !_COBALT_KERNEL_SYNCH_H_ */ +--- linux/include/xenomai/cobalt/kernel/list.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/cobalt/kernel/list.h 2021-04-07 16:01:28.177632809 +0800 +@@ -0,0 +1,65 @@ ++/* ++ * Copyright (C) 2013 Philippe Gerum . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#ifndef _COBALT_KERNEL_LIST_H ++#define _COBALT_KERNEL_LIST_H ++ ++#include ++ ++#define __list_add_pri(__new, __head, __member_pri, __member_next, __relop) \ ++do { \ ++ typeof(*__new) *__pos; \ ++ if (list_empty(__head)) \ ++ list_add(&(__new)->__member_next, __head); \ ++ else { \ ++ list_for_each_entry_reverse(__pos, __head, __member_next) { \ ++ if ((__new)->__member_pri __relop __pos->__member_pri) \ ++ break; \ ++ } \ ++ list_add(&(__new)->__member_next, &__pos->__member_next); \ ++ } \ ++} while (0) ++ ++#define list_add_priff(__new, __head, __member_pri, __member_next) \ ++ __list_add_pri(__new, __head, __member_pri, __member_next, <=) ++ ++#define list_add_prilf(__new, __head, __member_pri, __member_next) \ ++ __list_add_pri(__new, __head, __member_pri, __member_next, <) ++ ++#define list_get_entry(__head, __type, __member) \ ++ ({ \ ++ __type *__item; \ ++ __item = list_first_entry(__head, __type, __member); \ ++ list_del(&__item->__member); \ ++ __item; \ ++ }) ++ ++#define list_get_entry_init(__head, __type, __member) \ ++ ({ \ ++ __type *__item; \ ++ __item = list_first_entry(__head, __type, __member); \ ++ list_del_init(&__item->__member); \ ++ __item; \ ++ }) ++ ++#ifndef list_next_entry ++#define list_next_entry(__item, __member) \ ++ list_entry((__item)->__member.next, typeof(*(__item)), __member) ++#endif ++ ++#endif /* !_COBALT_KERNEL_LIST_H_ */ +--- linux/include/xenomai/cobalt/kernel/sched-weak.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/cobalt/kernel/sched-weak.h 2021-04-07 16:01:28.172632816 +0800 +@@ -0,0 +1,59 @@ ++/* ++ * Copyright (C) 2013 Philippe Gerum . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#ifndef _COBALT_KERNEL_SCHED_WEAK_H ++#define _COBALT_KERNEL_SCHED_WEAK_H ++ ++#ifndef _COBALT_KERNEL_SCHED_H ++#error "please don't include cobalt/kernel/sched-weak.h directly" ++#endif ++ ++/** ++ * @addtogroup cobalt_core_sched ++ * @{ ++ */ ++ ++#ifdef CONFIG_XENO_OPT_SCHED_WEAK ++ ++#define XNSCHED_WEAK_MIN_PRIO 0 ++#define XNSCHED_WEAK_MAX_PRIO 99 ++#define XNSCHED_WEAK_NR_PRIO \ ++ (XNSCHED_WEAK_MAX_PRIO - XNSCHED_WEAK_MIN_PRIO + 1) ++ ++#if XNSCHED_WEAK_NR_PRIO > XNSCHED_CLASS_WEIGHT_FACTOR || \ ++ (defined(CONFIG_XENO_OPT_SCALABLE_SCHED) && \ ++ XNSCHED_WEAK_NR_PRIO > XNSCHED_MLQ_LEVELS) ++#error "WEAK class has too many priority levels" ++#endif ++ ++extern struct xnsched_class xnsched_class_weak; ++ ++struct xnsched_weak { ++ xnsched_queue_t runnable; /*!< Runnable thread queue. */ ++}; ++ ++static inline int xnsched_weak_init_thread(struct xnthread *thread) ++{ ++ return 0; ++} ++ ++#endif /* CONFIG_XENO_OPT_SCHED_WEAK */ ++ ++/** @} */ ++ ++#endif /* !_COBALT_KERNEL_SCHED_WEAK_H */ +--- linux/include/xenomai/cobalt/kernel/ancillaries.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/cobalt/kernel/ancillaries.h 2021-04-07 16:01:28.167632823 +0800 +@@ -0,0 +1,68 @@ ++/* ++ * Copyright (C) 2014 Philippe Gerum . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#ifndef _COBALT_KERNEL_ANCILLARIES_H ++#define _COBALT_KERNEL_ANCILLARIES_H ++ ++#include ++#include ++#include ++#include ++ ++#define ksformat(__dst, __len, __fmt, __args...) \ ++ ({ \ ++ size_t __ret; \ ++ __ret = snprintf(__dst, __len, __fmt, ##__args); \ ++ if (__ret >= __len) \ ++ __dst[__len-1] = '\0'; \ ++ __ret; \ ++ }) ++ ++#define kasformat(__fmt, __args...) \ ++ ({ \ ++ kasprintf(GFP_KERNEL, __fmt, ##__args); \ ++ }) ++ ++#define kvsformat(__dst, __len, __fmt, __ap) \ ++ ({ \ ++ size_t __ret; \ ++ __ret = vsnprintf(__dst, __len, __fmt, __ap); \ ++ if (__ret >= __len) \ ++ __dst[__len-1] = '\0'; \ ++ __ret; \ ++ }) ++ ++#define kvasformat(__fmt, __ap) \ ++ ({ \ ++ kvasprintf(GFP_KERNEL, __fmt, __ap); \ ++ }) ++ ++void __knamecpy_requires_character_array_as_destination(void); ++ ++#define knamecpy(__dst, __src) \ ++ ({ \ ++ if (!__builtin_types_compatible_p(typeof(__dst), char[])) \ ++ __knamecpy_requires_character_array_as_destination(); \ ++ strncpy((__dst), __src, sizeof(__dst)); \ ++ __dst[sizeof(__dst) - 1] = '\0'; \ ++ __dst; \ ++ }) ++ ++#define get_current_uuid() from_kuid_munged(current_user_ns(), current_uid()) ++ ++#endif /* !_COBALT_KERNEL_ANCILLARIES_H */ +--- linux/include/xenomai/cobalt/kernel/map.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/cobalt/kernel/map.h 2021-04-07 16:01:28.163632829 +0800 +@@ -0,0 +1,74 @@ ++/* ++ * Copyright (C) 2007 Philippe Gerum . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#ifndef _COBALT_KERNEL_MAP_H ++#define _COBALT_KERNEL_MAP_H ++ ++#include ++ ++/** ++ * @addtogroup cobalt_core_map ++ * @{ ++ */ ++ ++#define XNMAP_MAX_KEYS (BITS_PER_LONG * BITS_PER_LONG) ++ ++struct xnmap { ++ int nkeys; ++ int ukeys; ++ int offset; ++ unsigned long himask; ++ unsigned long himap; ++#define __IDMAP_LONGS ((XNMAP_MAX_KEYS+BITS_PER_LONG-1)/BITS_PER_LONG) ++ unsigned long lomap[__IDMAP_LONGS]; ++#undef __IDMAP_LONGS ++ void *objarray[1]; ++}; ++ ++struct xnmap *xnmap_create(int nkeys, ++ int reserve, ++ int offset); ++ ++void xnmap_delete(struct xnmap *map); ++ ++int xnmap_enter(struct xnmap *map, ++ int key, ++ void *objaddr); ++ ++int xnmap_remove(struct xnmap *map, ++ int key); ++ ++static inline void *xnmap_fetch_nocheck(struct xnmap *map, int key) ++{ ++ int ofkey = key - map->offset; ++ return map->objarray[ofkey]; ++} ++ ++static inline void *xnmap_fetch(struct xnmap *map, int key) ++{ ++ int ofkey = key - map->offset; ++ ++ if (ofkey < 0 || ofkey >= map->nkeys) ++ return NULL; ++ ++ return map->objarray[ofkey]; ++} ++ ++/** @} */ ++ ++#endif /* !_COBALT_KERNEL_MAP_H */ +--- linux/include/xenomai/cobalt/kernel/bufd.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/cobalt/kernel/bufd.h 2021-04-07 16:01:28.158632836 +0800 +@@ -0,0 +1,94 @@ ++/* ++ * Copyright (C) 2009 Philippe Gerum . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#ifndef _COBALT_KERNEL_BUFD_H ++#define _COBALT_KERNEL_BUFD_H ++ ++#include ++ ++/** ++ * @addtogroup cobalt_core_bufd ++ * ++ * @{ ++ */ ++ ++struct mm_struct; ++ ++struct xnbufd { ++ caddr_t b_ptr; /* src/dst buffer address */ ++ size_t b_len; /* total length of buffer */ ++ off_t b_off; /* # of bytes read/written */ ++ struct mm_struct *b_mm; /* src/dst address space */ ++ caddr_t b_carry; /* pointer to carry over area */ ++ char b_buf[64]; /* fast carry over area */ ++}; ++ ++void xnbufd_map_umem(struct xnbufd *bufd, ++ void __user *ptr, size_t len); ++ ++static inline void xnbufd_map_uread(struct xnbufd *bufd, ++ const void __user *ptr, size_t len) ++{ ++ xnbufd_map_umem(bufd, (void __user *)ptr, len); ++} ++ ++static inline void xnbufd_map_uwrite(struct xnbufd *bufd, ++ void __user *ptr, size_t len) ++{ ++ xnbufd_map_umem(bufd, ptr, len); ++} ++ ++ssize_t xnbufd_unmap_uread(struct xnbufd *bufd); ++ ++ssize_t xnbufd_unmap_uwrite(struct xnbufd *bufd); ++ ++void xnbufd_map_kmem(struct xnbufd *bufd, ++ void *ptr, size_t len); ++ ++static inline void xnbufd_map_kread(struct xnbufd *bufd, ++ const void *ptr, size_t len) ++{ ++ xnbufd_map_kmem(bufd, (void *)ptr, len); ++} ++ ++static inline void xnbufd_map_kwrite(struct xnbufd *bufd, ++ void *ptr, size_t len) ++{ ++ xnbufd_map_kmem(bufd, ptr, len); ++} ++ ++ssize_t xnbufd_unmap_kread(struct xnbufd *bufd); ++ ++ssize_t xnbufd_unmap_kwrite(struct xnbufd *bufd); ++ ++ssize_t xnbufd_copy_to_kmem(void *ptr, ++ struct xnbufd *bufd, size_t len); ++ ++ssize_t xnbufd_copy_from_kmem(struct xnbufd *bufd, ++ void *from, size_t len); ++ ++void xnbufd_invalidate(struct xnbufd *bufd); ++ ++static inline void xnbufd_reset(struct xnbufd *bufd) ++{ ++ bufd->b_off = 0; ++} ++ ++/** @} */ ++ ++#endif /* !_COBALT_KERNEL_BUFD_H */ +--- linux/include/xenomai/cobalt/kernel/clock.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/cobalt/kernel/clock.h 2021-04-07 16:01:28.153632843 +0800 +@@ -0,0 +1,361 @@ ++/* ++ * Copyright (C) 2006,2007 Philippe Gerum . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#ifndef _COBALT_KERNEL_CLOCK_H ++#define _COBALT_KERNEL_CLOCK_H ++ ++#include ++#include ++#include ++#include ++ ++/** ++ * @addtogroup cobalt_core_clock ++ * @{ ++ */ ++ ++struct xnsched; ++struct xntimerdata; ++ ++struct xnclock_gravity { ++ unsigned long irq; ++ unsigned long kernel; ++ unsigned long user; ++}; ++ ++struct xnclock { ++ /** (ns) */ ++ xnticks_t wallclock_offset; ++ /** (ns) */ ++ xnticks_t resolution; ++ /** (raw clock ticks). */ ++ struct xnclock_gravity gravity; ++ /** Clock name. */ ++ const char *name; ++ struct { ++#ifdef CONFIG_XENO_OPT_EXTCLOCK ++ xnticks_t (*read_raw)(struct xnclock *clock); ++ xnticks_t (*read_monotonic)(struct xnclock *clock); ++ int (*set_time)(struct xnclock *clock, ++ const struct timespec *ts); ++ xnsticks_t (*ns_to_ticks)(struct xnclock *clock, ++ xnsticks_t ns); ++ xnsticks_t (*ticks_to_ns)(struct xnclock *clock, ++ xnsticks_t ticks); ++ xnsticks_t (*ticks_to_ns_rounded)(struct xnclock *clock, ++ xnsticks_t ticks); ++ void (*program_local_shot)(struct xnclock *clock, ++ struct xnsched *sched); ++ void (*program_remote_shot)(struct xnclock *clock, ++ struct xnsched *sched); ++#endif ++ int (*adjust_time)(struct xnclock *clock, ++ struct timex *tx); ++ int (*set_gravity)(struct xnclock *clock, ++ const struct xnclock_gravity *p); ++ void (*reset_gravity)(struct xnclock *clock); ++#ifdef CONFIG_XENO_OPT_VFILE ++ void (*print_status)(struct xnclock *clock, ++ struct xnvfile_regular_iterator *it); ++#endif ++ } ops; ++ /* Private section. */ ++ struct xntimerdata *timerdata; ++ int id; ++#ifdef CONFIG_SMP ++ /** Possible CPU affinity of clock beat. */ ++ cpumask_t affinity; ++#endif ++#ifdef CONFIG_XENO_OPT_STATS ++ struct xnvfile_snapshot timer_vfile; ++ struct xnvfile_rev_tag timer_revtag; ++ struct list_head timerq; ++ int nrtimers; ++#endif /* CONFIG_XENO_OPT_STATS */ ++#ifdef CONFIG_XENO_OPT_VFILE ++ struct xnvfile_regular vfile; ++#endif ++}; ++ ++struct xnclock_ratelimit_state { ++ xnticks_t interval; ++ xnticks_t begin; ++ int burst; ++ int printed; ++ int missed; ++}; ++ ++extern struct xnclock nkclock; ++ ++extern unsigned long nktimerlat; ++ ++int xnclock_register(struct xnclock *clock, ++ const cpumask_t *affinity); ++ ++void xnclock_deregister(struct xnclock *clock); ++ ++void xnclock_tick(struct xnclock *clock); ++ ++void xnclock_adjust(struct xnclock *clock, ++ xnsticks_t delta); ++ ++void xnclock_core_local_shot(struct xnsched *sched); ++ ++void xnclock_core_remote_shot(struct xnsched *sched); ++ ++xnsticks_t xnclock_core_ns_to_ticks(xnsticks_t ns); ++ ++xnsticks_t xnclock_core_ticks_to_ns(xnsticks_t ticks); ++ ++xnsticks_t xnclock_core_ticks_to_ns_rounded(xnsticks_t ticks); ++ ++xnticks_t xnclock_core_read_monotonic(void); ++ ++static inline xnticks_t xnclock_core_read_raw(void) ++{ ++ unsigned long long t; ++ ipipe_read_tsc(t); ++ return t; ++} ++ ++/* We use the Linux defaults */ ++#define XN_RATELIMIT_INTERVAL 5000000000LL ++#define XN_RATELIMIT_BURST 10 ++ ++int __xnclock_ratelimit(struct xnclock_ratelimit_state *rs, const char *func); ++ ++#define xnclock_ratelimit() ({ \ ++ static struct xnclock_ratelimit_state __state = { \ ++ .interval = XN_RATELIMIT_INTERVAL, \ ++ .burst = XN_RATELIMIT_BURST, \ ++ }; \ ++ __xnclock_ratelimit(&__state, __func__); \ ++}) ++ ++#ifdef CONFIG_XENO_OPT_EXTCLOCK ++ ++static inline void xnclock_program_shot(struct xnclock *clock, ++ struct xnsched *sched) ++{ ++ if (likely(clock == &nkclock)) ++ xnclock_core_local_shot(sched); ++ else if (clock->ops.program_local_shot) ++ clock->ops.program_local_shot(clock, sched); ++} ++ ++static inline void xnclock_remote_shot(struct xnclock *clock, ++ struct xnsched *sched) ++{ ++#ifdef CONFIG_SMP ++ if (likely(clock == &nkclock)) ++ xnclock_core_remote_shot(sched); ++ else if (clock->ops.program_remote_shot) ++ clock->ops.program_remote_shot(clock, sched); ++#endif ++} ++ ++static inline xnticks_t xnclock_read_raw(struct xnclock *clock) ++{ ++ if (likely(clock == &nkclock)) ++ return xnclock_core_read_raw(); ++ ++ return clock->ops.read_raw(clock); ++} ++ ++static inline xnsticks_t xnclock_ns_to_ticks(struct xnclock *clock, ++ xnsticks_t ns) ++{ ++ if (likely(clock == &nkclock)) ++ return xnclock_core_ns_to_ticks(ns); ++ ++ return clock->ops.ns_to_ticks(clock, ns); ++} ++ ++static inline xnsticks_t xnclock_ticks_to_ns(struct xnclock *clock, ++ xnsticks_t ticks) ++{ ++ if (likely(clock == &nkclock)) ++ return xnclock_core_ticks_to_ns(ticks); ++ ++ return clock->ops.ticks_to_ns(clock, ticks); ++} ++ ++static inline xnsticks_t xnclock_ticks_to_ns_rounded(struct xnclock *clock, ++ xnsticks_t ticks) ++{ ++ if (likely(clock == &nkclock)) ++ return xnclock_core_ticks_to_ns_rounded(ticks); ++ ++ return clock->ops.ticks_to_ns_rounded(clock, ticks); ++} ++ ++static inline xnticks_t xnclock_read_monotonic(struct xnclock *clock) ++{ ++ if (likely(clock == &nkclock)) ++ return xnclock_core_read_monotonic(); ++ ++ return clock->ops.read_monotonic(clock); ++} ++ ++static inline int xnclock_set_time(struct xnclock *clock, ++ const struct timespec *ts) ++{ ++ if (likely(clock == &nkclock)) ++ return -EINVAL; ++ ++ return clock->ops.set_time(clock, ts); ++} ++ ++#else /* !CONFIG_XENO_OPT_EXTCLOCK */ ++ ++static inline void xnclock_program_shot(struct xnclock *clock, ++ struct xnsched *sched) ++{ ++ xnclock_core_local_shot(sched); ++} ++ ++static inline void xnclock_remote_shot(struct xnclock *clock, ++ struct xnsched *sched) ++{ ++#ifdef CONFIG_SMP ++ xnclock_core_remote_shot(sched); ++#endif ++} ++ ++static inline xnticks_t xnclock_read_raw(struct xnclock *clock) ++{ ++ return xnclock_core_read_raw(); ++} ++ ++static inline xnsticks_t xnclock_ns_to_ticks(struct xnclock *clock, ++ xnsticks_t ns) ++{ ++ return xnclock_core_ns_to_ticks(ns); ++} ++ ++static inline xnsticks_t xnclock_ticks_to_ns(struct xnclock *clock, ++ xnsticks_t ticks) ++{ ++ return xnclock_core_ticks_to_ns(ticks); ++} ++ ++static inline xnsticks_t xnclock_ticks_to_ns_rounded(struct xnclock *clock, ++ xnsticks_t ticks) ++{ ++ return xnclock_core_ticks_to_ns_rounded(ticks); ++} ++ ++static inline xnticks_t xnclock_read_monotonic(struct xnclock *clock) ++{ ++ return xnclock_core_read_monotonic(); ++} ++ ++static inline int xnclock_set_time(struct xnclock *clock, ++ const struct timespec *ts) ++{ ++ /* ++ * There is no way to change the core clock's idea of time. ++ */ ++ return -EINVAL; ++} ++ ++#endif /* !CONFIG_XENO_OPT_EXTCLOCK */ ++ ++static inline int xnclock_adjust_time(struct xnclock *clock, ++ struct timex *tx) ++{ ++ if (clock->ops.adjust_time == NULL) ++ return -EOPNOTSUPP; ++ ++ return clock->ops.adjust_time(clock, tx); ++} ++ ++static inline xnticks_t xnclock_get_offset(struct xnclock *clock) ++{ ++ return clock->wallclock_offset; ++} ++ ++static inline xnticks_t xnclock_get_resolution(struct xnclock *clock) ++{ ++ return clock->resolution; /* ns */ ++} ++ ++static inline void xnclock_set_resolution(struct xnclock *clock, ++ xnticks_t resolution) ++{ ++ clock->resolution = resolution; /* ns */ ++} ++ ++static inline int xnclock_set_gravity(struct xnclock *clock, ++ const struct xnclock_gravity *gravity) ++{ ++ if (clock->ops.set_gravity) ++ return clock->ops.set_gravity(clock, gravity); ++ ++ return -EINVAL; ++} ++ ++static inline void xnclock_reset_gravity(struct xnclock *clock) ++{ ++ if (clock->ops.reset_gravity) ++ clock->ops.reset_gravity(clock); ++} ++ ++#define xnclock_get_gravity(__clock, __type) ((__clock)->gravity.__type) ++ ++static inline xnticks_t xnclock_read_realtime(struct xnclock *clock) ++{ ++ /* ++ * Return an adjusted value of the monotonic time with the ++ * translated system wallclock offset. ++ */ ++ return xnclock_read_monotonic(clock) + xnclock_get_offset(clock); ++} ++ ++unsigned long long xnclock_divrem_billion(unsigned long long value, ++ unsigned long *rem); ++ ++xnticks_t xnclock_get_host_time(void); ++ ++#ifdef CONFIG_XENO_OPT_VFILE ++ ++void xnclock_init_proc(void); ++ ++void xnclock_cleanup_proc(void); ++ ++static inline void xnclock_print_status(struct xnclock *clock, ++ struct xnvfile_regular_iterator *it) ++{ ++ if (clock->ops.print_status) ++ clock->ops.print_status(clock, it); ++} ++ ++#else ++static inline void xnclock_init_proc(void) { } ++static inline void xnclock_cleanup_proc(void) { } ++#endif ++ ++void xnclock_update_freq(unsigned long long freq); ++ ++int xnclock_init(unsigned long long freq); ++ ++void xnclock_cleanup(void); ++ ++/** @} */ ++ ++#endif /* !_COBALT_KERNEL_CLOCK_H */ +--- linux/include/xenomai/cobalt/kernel/pipe.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/cobalt/kernel/pipe.h 2021-04-07 16:01:28.149632849 +0800 +@@ -0,0 +1,136 @@ ++/* ++ * Copyright (C) 2001,2002,2003 Philippe Gerum. ++ * ++ * Xenomai is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA ++ * 02139, USA; either version 2 of the License, or (at your option) ++ * any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#ifndef _COBALT_KERNEL_PIPE_H ++#define _COBALT_KERNEL_PIPE_H ++ ++#include ++#include ++#include ++#include ++#include ++ ++#define XNPIPE_NDEVS CONFIG_XENO_OPT_PIPE_NRDEV ++#define XNPIPE_DEV_MAJOR 150 ++ ++#define XNPIPE_KERN_CONN 0x1 ++#define XNPIPE_KERN_LCLOSE 0x2 ++#define XNPIPE_USER_CONN 0x4 ++#define XNPIPE_USER_SIGIO 0x8 ++#define XNPIPE_USER_WREAD 0x10 ++#define XNPIPE_USER_WREAD_READY 0x20 ++#define XNPIPE_USER_WSYNC 0x40 ++#define XNPIPE_USER_WSYNC_READY 0x80 ++#define XNPIPE_USER_LCONN 0x100 ++ ++#define XNPIPE_USER_ALL_WAIT \ ++(XNPIPE_USER_WREAD|XNPIPE_USER_WSYNC) ++ ++#define XNPIPE_USER_ALL_READY \ ++(XNPIPE_USER_WREAD_READY|XNPIPE_USER_WSYNC_READY) ++ ++struct xnpipe_mh { ++ size_t size; ++ size_t rdoff; ++ struct list_head link; ++}; ++ ++struct xnpipe_state; ++ ++struct xnpipe_operations { ++ void (*output)(struct xnpipe_mh *mh, void *xstate); ++ int (*input)(struct xnpipe_mh *mh, int retval, void *xstate); ++ void *(*alloc_ibuf)(size_t size, void *xstate); ++ void (*free_ibuf)(void *buf, void *xstate); ++ void (*free_obuf)(void *buf, void *xstate); ++ void (*release)(void *xstate); ++}; ++ ++struct xnpipe_state { ++ struct list_head slink; /* Link on sleep queue */ ++ struct list_head alink; /* Link on async queue */ ++ ++ struct list_head inq; /* From user-space to kernel */ ++ int nrinq; ++ struct list_head outq; /* From kernel to user-space */ ++ int nroutq; ++ struct xnsynch synchbase; ++ struct xnpipe_operations ops; ++ void *xstate; /* Extra state managed by caller */ ++ ++ /* Linux kernel part */ ++ unsigned long status; ++ struct fasync_struct *asyncq; ++ wait_queue_head_t readq; /* open/read/poll waiters */ ++ wait_queue_head_t syncq; /* sync waiters */ ++ int wcount; /* number of waiters on this minor */ ++ size_t ionrd; ++}; ++ ++extern struct xnpipe_state xnpipe_states[]; ++ ++#define xnminor_from_state(s) (s - xnpipe_states) ++ ++#ifdef CONFIG_XENO_OPT_PIPE ++int xnpipe_mount(void); ++void xnpipe_umount(void); ++#else /* !CONFIG_XENO_OPT_PIPE */ ++static inline int xnpipe_mount(void) { return 0; } ++static inline void xnpipe_umount(void) { } ++#endif /* !CONFIG_XENO_OPT_PIPE */ ++ ++/* Entry points of the kernel interface. */ ++ ++int xnpipe_connect(int minor, ++ struct xnpipe_operations *ops, void *xstate); ++ ++int xnpipe_disconnect(int minor); ++ ++ssize_t xnpipe_send(int minor, ++ struct xnpipe_mh *mh, size_t size, int flags); ++ ++ssize_t xnpipe_mfixup(int minor, struct xnpipe_mh *mh, ssize_t size); ++ ++ssize_t xnpipe_recv(int minor, ++ struct xnpipe_mh **pmh, xnticks_t timeout); ++ ++int xnpipe_flush(int minor, int mode); ++ ++int xnpipe_pollstate(int minor, unsigned int *mask_r); ++ ++static inline unsigned int __xnpipe_pollstate(int minor) ++{ ++ struct xnpipe_state *state = xnpipe_states + minor; ++ unsigned int mask = POLLOUT; ++ ++ if (!list_empty(&state->inq)) ++ mask |= POLLIN; ++ ++ return mask; ++} ++ ++static inline char *xnpipe_m_data(struct xnpipe_mh *mh) ++{ ++ return (char *)(mh + 1); ++} ++ ++#define xnpipe_m_size(mh) ((mh)->size) ++ ++#define xnpipe_m_rdoff(mh) ((mh)->rdoff) ++ ++#endif /* !_COBALT_KERNEL_PIPE_H */ +--- linux/include/xenomai/cobalt/kernel/sched-tp.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/cobalt/kernel/sched-tp.h 2021-04-07 16:01:28.144632856 +0800 +@@ -0,0 +1,99 @@ ++/* ++ * Copyright (C) 2008 Philippe Gerum . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#ifndef _COBALT_KERNEL_SCHED_TP_H ++#define _COBALT_KERNEL_SCHED_TP_H ++ ++#ifndef _COBALT_KERNEL_SCHED_H ++#error "please don't include cobalt/kernel/sched-tp.h directly" ++#endif ++ ++/** ++ * @addtogroup cobalt_core_sched ++ * @{ ++ */ ++ ++#ifdef CONFIG_XENO_OPT_SCHED_TP ++ ++#define XNSCHED_TP_MIN_PRIO 1 ++#define XNSCHED_TP_MAX_PRIO 255 ++#define XNSCHED_TP_NR_PRIO \ ++ (XNSCHED_TP_MAX_PRIO - XNSCHED_TP_MIN_PRIO + 1) ++ ++extern struct xnsched_class xnsched_class_tp; ++ ++struct xnsched_tp_window { ++ xnticks_t w_offset; ++ int w_part; ++}; ++ ++struct xnsched_tp_schedule { ++ int pwin_nr; ++ xnticks_t tf_duration; ++ atomic_t refcount; ++ struct xnsched_tp_window pwins[0]; ++}; ++ ++struct xnsched_tp { ++ struct xnsched_tpslot { ++ /** Per-partition runqueue. */ ++ xnsched_queue_t runnable; ++ } partitions[CONFIG_XENO_OPT_SCHED_TP_NRPART]; ++ /** Idle slot for passive windows. */ ++ struct xnsched_tpslot idle; ++ /** Active partition slot */ ++ struct xnsched_tpslot *tps; ++ /** Time frame timer */ ++ struct xntimer tf_timer; ++ /** Global partition schedule */ ++ struct xnsched_tp_schedule *gps; ++ /** Window index of next partition */ ++ int wnext; ++ /** Start of next time frame */ ++ xnticks_t tf_start; ++ /** Assigned thread queue */ ++ struct list_head threads; ++}; ++ ++static inline int xnsched_tp_init_thread(struct xnthread *thread) ++{ ++ thread->tps = NULL; ++ ++ return 0; ++} ++ ++struct xnsched_tp_schedule * ++xnsched_tp_set_schedule(struct xnsched *sched, ++ struct xnsched_tp_schedule *gps); ++ ++void xnsched_tp_start_schedule(struct xnsched *sched); ++ ++void xnsched_tp_stop_schedule(struct xnsched *sched); ++ ++int xnsched_tp_get_partition(struct xnsched *sched); ++ ++struct xnsched_tp_schedule * ++xnsched_tp_get_schedule(struct xnsched *sched); ++ ++void xnsched_tp_put_schedule(struct xnsched_tp_schedule *gps); ++ ++#endif /* CONFIG_XENO_OPT_SCHED_TP */ ++ ++/** @} */ ++ ++#endif /* !_COBALT_KERNEL_SCHED_TP_H */ +--- linux/include/xenomai/cobalt/kernel/registry.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/cobalt/kernel/registry.h 2021-04-07 16:01:28.139632863 +0800 +@@ -0,0 +1,200 @@ ++/* ++ * Copyright (C) 2004 Philippe Gerum ++ * ++ * Xenomai is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#ifndef _COBALT_KERNEL_REGISTRY_H ++#define _COBALT_KERNEL_REGISTRY_H ++ ++#include ++#include ++#include ++ ++/** ++ * @addtogroup cobalt_core_registry ++ * ++ * @{ ++ */ ++struct xnpnode; ++ ++struct xnobject { ++ void *objaddr; ++ const char *key; /* !< Hash key. May be NULL if anonynous. */ ++ unsigned long cstamp; /* !< Creation stamp. */ ++#ifdef CONFIG_XENO_OPT_VFILE ++ struct xnpnode *pnode; /* !< v-file information class. */ ++ union { ++ struct { ++ struct xnvfile_rev_tag tag; ++ struct xnvfile_snapshot file; ++ } vfsnap; /* !< virtual snapshot file. */ ++ struct xnvfile_regular vfreg; /* !< virtual regular file */ ++ struct xnvfile_link link; /* !< virtual link. */ ++ } vfile_u; ++ struct xnvfile *vfilp; ++#endif /* CONFIG_XENO_OPT_VFILE */ ++ struct hlist_node hlink; /* !< Link in h-table */ ++ struct list_head link; ++}; ++ ++int xnregistry_init(void); ++ ++void xnregistry_cleanup(void); ++ ++#ifdef CONFIG_XENO_OPT_VFILE ++ ++#define XNOBJECT_EXPORT_SCHEDULED ((struct xnvfile *)1L) ++#define XNOBJECT_EXPORT_INPROGRESS ((struct xnvfile *)2L) ++#define XNOBJECT_EXPORT_ABORTED ((struct xnvfile *)3L) ++ ++struct xnptree { ++ const char *dirname; ++ /* hidden */ ++ int entries; ++ struct xnvfile_directory vdir; ++}; ++ ++#define DEFINE_XNPTREE(__var, __name) \ ++ struct xnptree __var = { \ ++ .dirname = __name, \ ++ .entries = 0, \ ++ .vdir = xnvfile_nodir, \ ++ } ++ ++struct xnpnode_ops { ++ int (*export)(struct xnobject *object, struct xnpnode *pnode); ++ void (*unexport)(struct xnobject *object, struct xnpnode *pnode); ++ void (*touch)(struct xnobject *object); ++}; ++ ++struct xnpnode { ++ const char *dirname; ++ struct xnptree *root; ++ struct xnpnode_ops *ops; ++ /* hidden */ ++ int entries; ++ struct xnvfile_directory vdir; ++}; ++ ++struct xnpnode_snapshot { ++ struct xnpnode node; ++ struct xnvfile_snapshot_template vfile; ++}; ++ ++struct xnpnode_regular { ++ struct xnpnode node; ++ struct xnvfile_regular_template vfile; ++}; ++ ++struct xnpnode_link { ++ struct xnpnode node; ++ char *(*target)(void *obj); ++}; ++ ++#else /* !CONFIG_XENO_OPT_VFILE */ ++ ++#define DEFINE_XNPTREE(__var, __name); ++ ++/* Placeholders. */ ++ ++struct xnpnode { ++ const char *dirname; ++}; ++ ++struct xnpnode_snapshot { ++ struct xnpnode node; ++}; ++ ++struct xnpnode_regular { ++ struct xnpnode node; ++}; ++ ++struct xnpnode_link { ++ struct xnpnode node; ++}; ++ ++#endif /* !CONFIG_XENO_OPT_VFILE */ ++ ++/* Public interface. */ ++ ++extern struct xnobject *registry_obj_slots; ++ ++static inline struct xnobject *xnregistry_validate(xnhandle_t handle) ++{ ++ struct xnobject *object; ++ /* ++ * Careful: a removed object which is still in flight to be ++ * unexported carries a NULL objaddr, so we have to check this ++ * as well. ++ */ ++ handle = xnhandle_get_index(handle); ++ if (likely(handle && handle < CONFIG_XENO_OPT_REGISTRY_NRSLOTS)) { ++ object = ®istry_obj_slots[handle]; ++ return object->objaddr ? object : NULL; ++ } ++ ++ return NULL; ++} ++ ++static inline const char *xnregistry_key(xnhandle_t handle) ++{ ++ struct xnobject *object = xnregistry_validate(handle); ++ return object ? object->key : NULL; ++} ++ ++int xnregistry_enter(const char *key, ++ void *objaddr, ++ xnhandle_t *phandle, ++ struct xnpnode *pnode); ++ ++static inline int ++xnregistry_enter_anon(void *objaddr, xnhandle_t *phandle) ++{ ++ return xnregistry_enter(NULL, objaddr, phandle, NULL); ++} ++ ++int xnregistry_bind(const char *key, ++ xnticks_t timeout, ++ int timeout_mode, ++ xnhandle_t *phandle); ++ ++int xnregistry_remove(xnhandle_t handle); ++ ++static inline ++void *xnregistry_lookup(xnhandle_t handle, ++ unsigned long *cstamp_r) ++{ ++ struct xnobject *object = xnregistry_validate(handle); ++ ++ if (object == NULL) ++ return NULL; ++ ++ if (cstamp_r) ++ *cstamp_r = object->cstamp; ++ ++ return object->objaddr; ++} ++ ++int xnregistry_unlink(const char *key); ++ ++unsigned xnregistry_hash_size(void); ++ ++extern struct xnpnode_ops xnregistry_vfsnap_ops; ++ ++extern struct xnpnode_ops xnregistry_vlink_ops; ++ ++/** @} */ ++ ++#endif /* !_COBALT_KERNEL_REGISTRY_H */ +--- linux/include/xenomai/cobalt/kernel/trace.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/cobalt/kernel/trace.h 2021-04-07 16:01:28.134632870 +0800 +@@ -0,0 +1,105 @@ ++/* ++ * Copyright (C) 2006 Jan Kiszka . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#ifndef _COBALT_KERNEL_TRACE_H ++#define _COBALT_KERNEL_TRACE_H ++ ++#include ++#include ++#include ++ ++static inline int xntrace_max_begin(unsigned long v) ++{ ++ ipipe_trace_begin(v); ++ return 0; ++} ++ ++static inline int xntrace_max_end(unsigned long v) ++{ ++ ipipe_trace_end(v); ++ return 0; ++} ++ ++static inline int xntrace_max_reset(void) ++{ ++ ipipe_trace_max_reset(); ++ return 0; ++} ++ ++static inline int xntrace_user_start(void) ++{ ++ return ipipe_trace_frozen_reset(); ++} ++ ++static inline int xntrace_user_stop(unsigned long v) ++{ ++ ipipe_trace_freeze(v); ++ return 0; ++} ++ ++static inline int xntrace_user_freeze(unsigned long v, int once) ++{ ++ int ret = 0; ++ ++ if (!once) ++ ret = ipipe_trace_frozen_reset(); ++ ++ ipipe_trace_freeze(v); ++ ++ return ret; ++} ++ ++static inline int xntrace_special(unsigned char id, unsigned long v) ++{ ++ ipipe_trace_special(id, v); ++ return 0; ++} ++ ++static inline int xntrace_special_u64(unsigned char id, ++ unsigned long long v) ++{ ++ ipipe_trace_special(id, (unsigned long)(v >> 32)); ++ ipipe_trace_special(id, (unsigned long)(v & 0xFFFFFFFF)); ++ return 0; ++} ++ ++static inline int xntrace_pid(pid_t pid, short prio) ++{ ++ ipipe_trace_pid(pid, prio); ++ return 0; ++} ++ ++static inline int xntrace_tick(unsigned long delay_ticks) ++{ ++ ipipe_trace_event(0, delay_ticks); ++ return 0; ++} ++ ++static inline int xntrace_panic_freeze(void) ++{ ++ ipipe_trace_panic_freeze(); ++ return 0; ++} ++ ++static inline int xntrace_panic_dump(void) ++{ ++ ipipe_trace_panic_dump(); ++ return 0; ++} ++ ++#endif /* !_COBALT_KERNEL_TRACE_H */ +--- linux/include/xenomai/cobalt/kernel/init.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/cobalt/kernel/init.h 2021-04-07 16:01:28.130632876 +0800 +@@ -0,0 +1,54 @@ ++/* ++ * Copyright (C) 2013 Philippe Gerum . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#ifndef _COBALT_KERNEL_INIT_H ++#define _COBALT_KERNEL_INIT_H ++ ++#include ++#include ++#include ++ ++extern atomic_t cobalt_runstate; ++ ++static inline enum cobalt_run_states realtime_core_state(void) ++{ ++ return atomic_read(&cobalt_runstate); ++} ++ ++static inline int realtime_core_enabled(void) ++{ ++ return atomic_read(&cobalt_runstate) != COBALT_STATE_DISABLED; ++} ++ ++static inline int realtime_core_running(void) ++{ ++ return atomic_read(&cobalt_runstate) == COBALT_STATE_RUNNING; ++} ++ ++static inline void set_realtime_core_state(enum cobalt_run_states state) ++{ ++ atomic_set(&cobalt_runstate, state); ++} ++ ++void cobalt_add_state_chain(struct notifier_block *nb); ++ ++void cobalt_remove_state_chain(struct notifier_block *nb); ++ ++void cobalt_call_state_chain(enum cobalt_run_states newstate); ++ ++#endif /* !_COBALT_KERNEL_INIT_H_ */ +--- linux/include/xenomai/cobalt/kernel/select.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/cobalt/kernel/select.h 2021-04-07 16:01:28.125632883 +0800 +@@ -0,0 +1,147 @@ ++/* ++ * Copyright (C) 2008 Efixo ++ * ++ * Xenomai is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#ifndef _COBALT_KERNEL_SELECT_H ++#define _COBALT_KERNEL_SELECT_H ++ ++#include ++#include ++ ++/** ++ * @addtogroup cobalt_core_select ++ * @{ ++ */ ++ ++#define XNSELECT_READ 0 ++#define XNSELECT_WRITE 1 ++#define XNSELECT_EXCEPT 2 ++#define XNSELECT_MAX_TYPES 3 ++ ++struct xnselector { ++ struct xnsynch synchbase; ++ struct fds { ++ fd_set expected; ++ fd_set pending; ++ } fds [XNSELECT_MAX_TYPES]; ++ struct list_head destroy_link; ++ struct list_head bindings; /* only used by xnselector_destroy */ ++}; ++ ++#define __NFDBITS__ (8 * sizeof(unsigned long)) ++#define __FDSET_LONGS__ (__FD_SETSIZE/__NFDBITS__) ++#define __FDELT__(d) ((d) / __NFDBITS__) ++#define __FDMASK__(d) (1UL << ((d) % __NFDBITS__)) ++ ++static inline void __FD_SET__(unsigned long __fd, __kernel_fd_set *__fdsetp) ++{ ++ unsigned long __tmp = __fd / __NFDBITS__; ++ unsigned long __rem = __fd % __NFDBITS__; ++ __fdsetp->fds_bits[__tmp] |= (1UL<<__rem); ++} ++ ++static inline void __FD_CLR__(unsigned long __fd, __kernel_fd_set *__fdsetp) ++{ ++ unsigned long __tmp = __fd / __NFDBITS__; ++ unsigned long __rem = __fd % __NFDBITS__; ++ __fdsetp->fds_bits[__tmp] &= ~(1UL<<__rem); ++} ++ ++static inline int __FD_ISSET__(unsigned long __fd, const __kernel_fd_set *__p) ++{ ++ unsigned long __tmp = __fd / __NFDBITS__; ++ unsigned long __rem = __fd % __NFDBITS__; ++ return (__p->fds_bits[__tmp] & (1UL<<__rem)) != 0; ++} ++ ++static inline void __FD_ZERO__(__kernel_fd_set *__p) ++{ ++ unsigned long *__tmp = __p->fds_bits; ++ int __i; ++ ++ __i = __FDSET_LONGS__; ++ while (__i) { ++ __i--; ++ *__tmp = 0; ++ __tmp++; ++ } ++} ++ ++struct xnselect { ++ struct list_head bindings; ++}; ++ ++#define DECLARE_XNSELECT(name) struct xnselect name ++ ++struct xnselect_binding { ++ struct xnselector *selector; ++ struct xnselect *fd; ++ unsigned int type; ++ unsigned int bit_index; ++ struct list_head link; /* link in selected fds list. */ ++ struct list_head slink; /* link in selector list */ ++}; ++ ++void xnselect_init(struct xnselect *select_block); ++ ++int xnselect_bind(struct xnselect *select_block, ++ struct xnselect_binding *binding, ++ struct xnselector *selector, ++ unsigned int type, ++ unsigned int bit_index, ++ unsigned int state); ++ ++int __xnselect_signal(struct xnselect *select_block, unsigned int state); ++ ++/** ++ * Signal a file descriptor state change. ++ * ++ * @param select_block pointer to an @a xnselect structure representing the file ++ * descriptor whose state changed; ++ * @param state new value of the state. ++ * ++ * @retval 1 if rescheduling is needed; ++ * @retval 0 otherwise. ++ */ ++static inline int ++xnselect_signal(struct xnselect *select_block, unsigned int state) ++{ ++ if (!list_empty(&select_block->bindings)) ++ return __xnselect_signal(select_block, state); ++ ++ return 0; ++} ++ ++void xnselect_destroy(struct xnselect *select_block); ++ ++int xnselector_init(struct xnselector *selector); ++ ++int xnselect(struct xnselector *selector, ++ fd_set *out_fds[XNSELECT_MAX_TYPES], ++ fd_set *in_fds[XNSELECT_MAX_TYPES], ++ int nfds, ++ xnticks_t timeout, xntmode_t timeout_mode); ++ ++void xnselector_destroy(struct xnselector *selector); ++ ++int xnselect_mount(void); ++ ++int xnselect_umount(void); ++ ++/** @} */ ++ ++#endif /* _COBALT_KERNEL_SELECT_H */ +--- linux/include/xenomai/cobalt/kernel/compat.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/cobalt/kernel/compat.h 2021-04-07 16:01:28.120632890 +0800 +@@ -0,0 +1,167 @@ ++/* ++ * Copyright (C) 2014 Philippe Gerum ++ * ++ * Xenomai is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#ifndef _COBALT_KERNEL_COMPAT_H ++#define _COBALT_KERNEL_COMPAT_H ++ ++#ifdef CONFIG_XENO_ARCH_SYS3264 ++ ++#include ++#include ++#include ++#include ++ ++struct mq_attr; ++ ++struct __compat_sched_ss_param { ++ int __sched_low_priority; ++ struct compat_timespec __sched_repl_period; ++ struct compat_timespec __sched_init_budget; ++ int __sched_max_repl; ++}; ++ ++struct __compat_sched_rr_param { ++ struct compat_timespec __sched_rr_quantum; ++}; ++ ++struct compat_sched_param_ex { ++ int sched_priority; ++ union { ++ struct __compat_sched_ss_param ss; ++ struct __compat_sched_rr_param rr; ++ struct __sched_tp_param tp; ++ struct __sched_quota_param quota; ++ } sched_u; ++}; ++ ++struct compat_mq_attr { ++ compat_long_t mq_flags; ++ compat_long_t mq_maxmsg; ++ compat_long_t mq_msgsize; ++ compat_long_t mq_curmsgs; ++}; ++ ++struct compat_sched_tp_window { ++ struct compat_timespec offset; ++ struct compat_timespec duration; ++ int ptid; ++}; ++ ++struct __compat_sched_config_tp { ++ int op; ++ int nr_windows; ++ struct compat_sched_tp_window windows[0]; ++}; ++ ++union compat_sched_config { ++ struct __compat_sched_config_tp tp; ++ struct __sched_config_quota quota; ++}; ++ ++#define compat_sched_tp_confsz(nr_win) \ ++ (sizeof(struct __compat_sched_config_tp) + nr_win * sizeof(struct compat_sched_tp_window)) ++ ++typedef struct { ++ compat_ulong_t fds_bits[__FD_SETSIZE / (8 * sizeof(compat_long_t))]; ++} compat_fd_set; ++ ++struct compat_rtdm_mmap_request { ++ u64 offset; ++ compat_size_t length; ++ int prot; ++ int flags; ++}; ++ ++int sys32_get_timespec(struct timespec *ts, ++ const struct compat_timespec __user *cts); ++ ++int sys32_put_timespec(struct compat_timespec __user *cts, ++ const struct timespec *ts); ++ ++int sys32_get_itimerspec(struct itimerspec *its, ++ const struct compat_itimerspec __user *cits); ++ ++int sys32_put_itimerspec(struct compat_itimerspec __user *cits, ++ const struct itimerspec *its); ++ ++int sys32_get_timeval(struct timeval *tv, ++ const struct compat_timeval __user *ctv); ++ ++int sys32_put_timeval(struct compat_timeval __user *ctv, ++ const struct timeval *tv); ++ ++int sys32_get_timex(struct timex *tx, ++ const struct compat_timex __user *ctx); ++ ++int sys32_put_timex(struct compat_timex __user *ctx, ++ const struct timex *tx); ++ ++ssize_t sys32_get_fdset(fd_set *fds, const compat_fd_set __user *cfds, ++ size_t cfdsize); ++ ++ssize_t sys32_put_fdset(compat_fd_set __user *cfds, const fd_set *fds, ++ size_t fdsize); ++ ++int sys32_get_param_ex(int policy, ++ struct sched_param_ex *p, ++ const struct compat_sched_param_ex __user *u_cp); ++ ++int sys32_put_param_ex(int policy, ++ struct compat_sched_param_ex __user *u_cp, ++ const struct sched_param_ex *p); ++ ++int sys32_get_mqattr(struct mq_attr *ap, ++ const struct compat_mq_attr __user *u_cap); ++ ++int sys32_put_mqattr(struct compat_mq_attr __user *u_cap, ++ const struct mq_attr *ap); ++ ++int sys32_get_sigevent(struct sigevent *ev, ++ const struct compat_sigevent *__user u_cev); ++ ++int sys32_get_sigset(sigset_t *set, const compat_sigset_t *u_cset); ++ ++int sys32_put_sigset(compat_sigset_t *u_cset, const sigset_t *set); ++ ++int sys32_get_sigval(union sigval *val, const union compat_sigval *u_cval); ++ ++int sys32_put_siginfo(void __user *u_si, const struct siginfo *si, ++ int overrun); ++ ++int sys32_get_msghdr(struct user_msghdr *msg, ++ const struct compat_msghdr __user *u_cmsg); ++ ++int sys32_get_mmsghdr(struct mmsghdr *mmsg, ++ const struct compat_mmsghdr __user *u_cmmsg); ++ ++int sys32_put_msghdr(struct compat_msghdr __user *u_cmsg, ++ const struct user_msghdr *msg); ++ ++int sys32_put_mmsghdr(struct compat_mmsghdr __user *u_cmmsg, ++ const struct mmsghdr *mmsg); ++ ++int sys32_get_iovec(struct iovec *iov, ++ const struct compat_iovec __user *ciov, ++ int ciovlen); ++ ++int sys32_put_iovec(struct compat_iovec __user *u_ciov, ++ const struct iovec *iov, ++ int iovlen); ++ ++#endif /* CONFIG_XENO_ARCH_SYS3264 */ ++ ++#endif /* !_COBALT_KERNEL_COMPAT_H */ +--- linux/include/xenomai/cobalt/kernel/timer.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/cobalt/kernel/timer.h 2021-04-07 16:01:28.116632896 +0800 +@@ -0,0 +1,566 @@ ++/* ++ * Copyright (C) 2001,2002,2003 Philippe Gerum . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++ ++#ifndef _COBALT_KERNEL_TIMER_H ++#define _COBALT_KERNEL_TIMER_H ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/** ++ * @addtogroup cobalt_core_timer ++ * @{ ++ */ ++#define XN_INFINITE ((xnticks_t)0) ++#define XN_NONBLOCK ((xnticks_t)-1) ++ ++/* Timer modes */ ++typedef enum xntmode { ++ XN_RELATIVE, ++ XN_ABSOLUTE, ++ XN_REALTIME ++} xntmode_t; ++ ++/* Timer status */ ++#define XNTIMER_DEQUEUED 0x00000001 ++#define XNTIMER_KILLED 0x00000002 ++#define XNTIMER_PERIODIC 0x00000004 ++#define XNTIMER_REALTIME 0x00000008 ++#define XNTIMER_FIRED 0x00000010 ++#define XNTIMER_RUNNING 0x00000020 ++#define XNTIMER_KGRAVITY 0x00000040 ++#define XNTIMER_UGRAVITY 0x00000080 ++#define XNTIMER_IGRAVITY 0 /* most conservative */ ++ ++#define XNTIMER_GRAVITY_MASK (XNTIMER_KGRAVITY|XNTIMER_UGRAVITY) ++#define XNTIMER_INIT_MASK XNTIMER_GRAVITY_MASK ++ ++/* These flags are available to the real-time interfaces */ ++#define XNTIMER_SPARE0 0x01000000 ++#define XNTIMER_SPARE1 0x02000000 ++#define XNTIMER_SPARE2 0x04000000 ++#define XNTIMER_SPARE3 0x08000000 ++#define XNTIMER_SPARE4 0x10000000 ++#define XNTIMER_SPARE5 0x20000000 ++#define XNTIMER_SPARE6 0x40000000 ++#define XNTIMER_SPARE7 0x80000000 ++ ++/* Timer priorities */ ++#define XNTIMER_LOPRIO (-999999999) ++#define XNTIMER_STDPRIO 0 ++#define XNTIMER_HIPRIO 999999999 ++ ++struct xntlholder { ++ struct list_head link; ++ xnticks_t key; ++ int prio; ++}; ++ ++#define xntlholder_date(h) ((h)->key) ++#define xntlholder_prio(h) ((h)->prio) ++#define xntlist_init(q) INIT_LIST_HEAD(q) ++#define xntlist_empty(q) list_empty(q) ++ ++static inline struct xntlholder *xntlist_head(struct list_head *q) ++{ ++ if (list_empty(q)) ++ return NULL; ++ ++ return list_first_entry(q, struct xntlholder, link); ++} ++ ++static inline struct xntlholder *xntlist_next(struct list_head *q, ++ struct xntlholder *h) ++{ ++ if (list_is_last(&h->link, q)) ++ return NULL; ++ ++ return list_entry(h->link.next, struct xntlholder, link); ++} ++ ++static inline struct xntlholder *xntlist_second(struct list_head *q, ++ struct xntlholder *h) ++{ ++ return xntlist_next(q, h); ++} ++ ++static inline void xntlist_insert(struct list_head *q, struct xntlholder *holder) ++{ ++ struct xntlholder *p; ++ ++ if (list_empty(q)) { ++ list_add(&holder->link, q); ++ return; ++ } ++ ++ /* ++ * Insert the new timer at the proper place in the single ++ * queue. O(N) here, but this is the price for the increased ++ * flexibility... ++ */ ++ list_for_each_entry_reverse(p, q, link) { ++ if ((xnsticks_t) (holder->key - p->key) > 0 || ++ (holder->key == p->key && holder->prio <= p->prio)) ++ break; ++ } ++ ++ list_add(&holder->link, &p->link); ++} ++ ++#define xntlist_remove(q, h) \ ++ do { \ ++ (void)(q); \ ++ list_del(&(h)->link); \ ++ } while (0) ++ ++#if defined(CONFIG_XENO_OPT_TIMER_RBTREE) ++ ++#include ++ ++typedef struct { ++ unsigned long long date; ++ unsigned prio; ++ struct rb_node link; ++} xntimerh_t; ++ ++#define xntimerh_date(h) ((h)->date) ++#define xntimerh_prio(h) ((h)->prio) ++#define xntimerh_init(h) do { } while (0) ++ ++typedef struct { ++ struct rb_root root; ++ xntimerh_t *head; ++} xntimerq_t; ++ ++#define xntimerq_init(q) \ ++ ({ \ ++ xntimerq_t *_q = (q); \ ++ _q->root = RB_ROOT; \ ++ _q->head = NULL; \ ++ }) ++ ++#define xntimerq_destroy(q) do { } while (0) ++#define xntimerq_empty(q) ((q)->head == NULL) ++ ++#define xntimerq_head(q) ((q)->head) ++ ++#define xntimerq_next(q, h) \ ++ ({ \ ++ struct rb_node *_node = rb_next(&(h)->link); \ ++ _node ? (container_of(_node, xntimerh_t, link)) : NULL; \ ++ }) ++ ++#define xntimerq_second(q, h) xntimerq_next(q, h) ++ ++void xntimerq_insert(xntimerq_t *q, xntimerh_t *holder); ++ ++static inline void xntimerq_remove(xntimerq_t *q, xntimerh_t *holder) ++{ ++ if (holder == q->head) ++ q->head = xntimerq_second(q, holder); ++ ++ rb_erase(&holder->link, &q->root); ++} ++ ++typedef struct { } xntimerq_it_t; ++ ++#define xntimerq_it_begin(q,i) ((void) (i), xntimerq_head(q)) ++#define xntimerq_it_next(q,i,h) ((void) (i), xntimerq_next((q),(h))) ++ ++#else /* CONFIG_XENO_OPT_TIMER_LIST */ ++ ++typedef struct xntlholder xntimerh_t; ++ ++#define xntimerh_date(h) xntlholder_date(h) ++#define xntimerh_prio(h) xntlholder_prio(h) ++#define xntimerh_init(h) do { } while (0) ++ ++typedef struct list_head xntimerq_t; ++ ++#define xntimerq_init(q) xntlist_init(q) ++#define xntimerq_destroy(q) do { } while (0) ++#define xntimerq_empty(q) xntlist_empty(q) ++#define xntimerq_head(q) xntlist_head(q) ++#define xntimerq_second(q, h) xntlist_second((q),(h)) ++#define xntimerq_insert(q, h) xntlist_insert((q),(h)) ++#define xntimerq_remove(q, h) xntlist_remove((q),(h)) ++ ++typedef struct { } xntimerq_it_t; ++ ++#define xntimerq_it_begin(q,i) ((void) (i), xntlist_head(q)) ++#define xntimerq_it_next(q,i,h) ((void) (i), xntlist_next((q),(h))) ++ ++#endif /* CONFIG_XENO_OPT_TIMER_LIST */ ++ ++struct xnsched; ++ ++struct xntimerdata { ++ xntimerq_t q; ++}; ++ ++static inline struct xntimerdata * ++xnclock_percpu_timerdata(struct xnclock *clock, int cpu) ++{ ++ return per_cpu_ptr(clock->timerdata, cpu); ++} ++ ++static inline struct xntimerdata * ++xnclock_this_timerdata(struct xnclock *clock) ++{ ++ return raw_cpu_ptr(clock->timerdata); ++} ++ ++struct xntimer { ++#ifdef CONFIG_XENO_OPT_EXTCLOCK ++ struct xnclock *clock; ++#endif ++ /** Link in timers list. */ ++ xntimerh_t aplink; ++ struct list_head adjlink; ++ /** Timer status. */ ++ unsigned long status; ++ /** Periodic interval (clock ticks, 0 == one shot). */ ++ xnticks_t interval; ++ /** Periodic interval (nanoseconds, 0 == one shot). */ ++ xnticks_t interval_ns; ++ /** Count of timer ticks in periodic mode. */ ++ xnticks_t periodic_ticks; ++ /** First tick date in periodic mode. */ ++ xnticks_t start_date; ++ /** Date of next periodic release point (timer ticks). */ ++ xnticks_t pexpect_ticks; ++ /** Sched structure to which the timer is attached. */ ++ struct xnsched *sched; ++ /** Timeout handler. */ ++ void (*handler)(struct xntimer *timer); ++#ifdef CONFIG_XENO_OPT_STATS ++#ifdef CONFIG_XENO_OPT_EXTCLOCK ++ struct xnclock *tracker; ++#endif ++ /** Timer name to be displayed. */ ++ char name[XNOBJECT_NAME_LEN]; ++ /** Timer holder in timebase. */ ++ struct list_head next_stat; ++ /** Number of timer schedules. */ ++ xnstat_counter_t scheduled; ++ /** Number of timer events. */ ++ xnstat_counter_t fired; ++#endif /* CONFIG_XENO_OPT_STATS */ ++}; ++ ++#ifdef CONFIG_XENO_OPT_EXTCLOCK ++ ++static inline struct xnclock *xntimer_clock(struct xntimer *timer) ++{ ++ return timer->clock; ++} ++ ++void xntimer_set_clock(struct xntimer *timer, ++ struct xnclock *newclock); ++ ++#else /* !CONFIG_XENO_OPT_EXTCLOCK */ ++ ++static inline struct xnclock *xntimer_clock(struct xntimer *timer) ++{ ++ return &nkclock; ++} ++ ++static inline void xntimer_set_clock(struct xntimer *timer, ++ struct xnclock *newclock) ++{ ++ XENO_BUG_ON(COBALT, newclock != &nkclock); ++} ++ ++#endif /* !CONFIG_XENO_OPT_EXTCLOCK */ ++ ++#ifdef CONFIG_SMP ++static inline struct xnsched *xntimer_sched(struct xntimer *timer) ++{ ++ return timer->sched; ++} ++#else /* !CONFIG_SMP */ ++#define xntimer_sched(t) xnsched_current() ++#endif /* !CONFIG_SMP */ ++ ++#define xntimer_percpu_queue(__timer) \ ++ ({ \ ++ struct xntimerdata *tmd; \ ++ int cpu = xnsched_cpu((__timer)->sched); \ ++ tmd = xnclock_percpu_timerdata(xntimer_clock(__timer), cpu); \ ++ &tmd->q; \ ++ }) ++ ++static inline unsigned long xntimer_gravity(struct xntimer *timer) ++{ ++ struct xnclock *clock = xntimer_clock(timer); ++ ++ if (timer->status & XNTIMER_KGRAVITY) ++ return clock->gravity.kernel; ++ ++ if (timer->status & XNTIMER_UGRAVITY) ++ return clock->gravity.user; ++ ++ return clock->gravity.irq; ++} ++ ++static inline void xntimer_update_date(struct xntimer *timer) ++{ ++ xntimerh_date(&timer->aplink) = timer->start_date ++ + xnclock_ns_to_ticks(xntimer_clock(timer), ++ timer->periodic_ticks * timer->interval_ns) ++ - xntimer_gravity(timer); ++} ++ ++static inline xnticks_t xntimer_pexpect(struct xntimer *timer) ++{ ++ return timer->start_date + ++ xnclock_ns_to_ticks(xntimer_clock(timer), ++ timer->pexpect_ticks * timer->interval_ns); ++} ++ ++static inline void xntimer_set_priority(struct xntimer *timer, ++ int prio) ++{ ++ xntimerh_prio(&timer->aplink) = prio; ++} ++ ++static inline int xntimer_active_p(struct xntimer *timer) ++{ ++ return timer->sched != NULL; ++} ++ ++static inline int xntimer_running_p(struct xntimer *timer) ++{ ++ return (timer->status & XNTIMER_RUNNING) != 0; ++} ++ ++static inline int xntimer_fired_p(struct xntimer *timer) ++{ ++ return (timer->status & XNTIMER_FIRED) != 0; ++} ++ ++static inline int xntimer_periodic_p(struct xntimer *timer) ++{ ++ return (timer->status & XNTIMER_PERIODIC) != 0; ++} ++ ++void __xntimer_init(struct xntimer *timer, ++ struct xnclock *clock, ++ void (*handler)(struct xntimer *timer), ++ struct xnsched *sched, ++ int flags); ++ ++void xntimer_set_gravity(struct xntimer *timer, ++ int gravity); ++ ++#ifdef CONFIG_XENO_OPT_STATS ++ ++#define xntimer_init(__timer, __clock, __handler, __sched, __flags) \ ++do { \ ++ __xntimer_init(__timer, __clock, __handler, __sched, __flags); \ ++ xntimer_set_name(__timer, #__handler); \ ++} while (0) ++ ++static inline void xntimer_reset_stats(struct xntimer *timer) ++{ ++ xnstat_counter_set(&timer->scheduled, 0); ++ xnstat_counter_set(&timer->fired, 0); ++} ++ ++static inline void xntimer_account_scheduled(struct xntimer *timer) ++{ ++ xnstat_counter_inc(&timer->scheduled); ++} ++ ++static inline void xntimer_account_fired(struct xntimer *timer) ++{ ++ xnstat_counter_inc(&timer->fired); ++} ++ ++static inline void xntimer_set_name(struct xntimer *timer, const char *name) ++{ ++ knamecpy(timer->name, name); ++} ++ ++#else /* !CONFIG_XENO_OPT_STATS */ ++ ++#define xntimer_init __xntimer_init ++ ++static inline void xntimer_reset_stats(struct xntimer *timer) { } ++ ++static inline void xntimer_account_scheduled(struct xntimer *timer) { } ++ ++static inline void xntimer_account_fired(struct xntimer *timer) { } ++ ++static inline void xntimer_set_name(struct xntimer *timer, const char *name) { } ++ ++#endif /* !CONFIG_XENO_OPT_STATS */ ++ ++#if defined(CONFIG_XENO_OPT_EXTCLOCK) && defined(CONFIG_XENO_OPT_STATS) ++void xntimer_switch_tracking(struct xntimer *timer, ++ struct xnclock *newclock); ++#else ++static inline ++void xntimer_switch_tracking(struct xntimer *timer, ++ struct xnclock *newclock) { } ++#endif ++ ++void xntimer_destroy(struct xntimer *timer); ++ ++/** ++ * @fn xnticks_t xntimer_interval(struct xntimer *timer) ++ * ++ * @brief Return the timer interval value. ++ * ++ * Return the timer interval value in nanoseconds. ++ * ++ * @param timer The address of a valid timer descriptor. ++ * ++ * @return The duration of a period in nanoseconds. The special value ++ * XN_INFINITE is returned if @a timer is currently disabled or ++ * one shot. ++ * ++ * @coretags{unrestricted, atomic-entry} ++ */ ++static inline xnticks_t xntimer_interval(struct xntimer *timer) ++{ ++ return timer->interval_ns; ++} ++ ++static inline xnticks_t xntimer_expiry(struct xntimer *timer) ++{ ++ /* Real expiry date in ticks without anticipation (no gravity) */ ++ return xntimerh_date(&timer->aplink) + xntimer_gravity(timer); ++} ++ ++int xntimer_start(struct xntimer *timer, ++ xnticks_t value, ++ xnticks_t interval, ++ xntmode_t mode); ++ ++void __xntimer_stop(struct xntimer *timer); ++ ++xnticks_t xntimer_get_date(struct xntimer *timer); ++ ++xnticks_t __xntimer_get_timeout(struct xntimer *timer); ++ ++xnticks_t xntimer_get_interval(struct xntimer *timer); ++ ++int xntimer_heading_p(struct xntimer *timer); ++ ++static inline void xntimer_stop(struct xntimer *timer) ++{ ++ if (timer->status & XNTIMER_RUNNING) ++ __xntimer_stop(timer); ++} ++ ++static inline xnticks_t xntimer_get_timeout(struct xntimer *timer) ++{ ++ if (!xntimer_running_p(timer)) ++ return XN_INFINITE; ++ ++ return __xntimer_get_timeout(timer); ++} ++ ++static inline xnticks_t xntimer_get_timeout_stopped(struct xntimer *timer) ++{ ++ return __xntimer_get_timeout(timer); ++} ++ ++static inline void xntimer_enqueue(struct xntimer *timer, ++ xntimerq_t *q) ++{ ++ xntimerq_insert(q, &timer->aplink); ++ timer->status &= ~XNTIMER_DEQUEUED; ++ xntimer_account_scheduled(timer); ++} ++ ++static inline void xntimer_dequeue(struct xntimer *timer, ++ xntimerq_t *q) ++{ ++ xntimerq_remove(q, &timer->aplink); ++ timer->status |= XNTIMER_DEQUEUED; ++} ++ ++unsigned long long xntimer_get_overruns(struct xntimer *timer, ++ struct xnthread *waiter, ++ xnticks_t now); ++ ++#ifdef CONFIG_SMP ++ ++void __xntimer_migrate(struct xntimer *timer, struct xnsched *sched); ++ ++static inline ++void xntimer_migrate(struct xntimer *timer, struct xnsched *sched) ++{ /* nklocked, IRQs off */ ++ if (timer->sched != sched) ++ __xntimer_migrate(timer, sched); ++} ++ ++int xntimer_setup_ipi(void); ++ ++void xntimer_release_ipi(void); ++ ++void __xntimer_set_affinity(struct xntimer *timer, ++ struct xnsched *sched); ++ ++static inline void xntimer_set_affinity(struct xntimer *timer, ++ struct xnsched *sched) ++{ ++ if (sched != xntimer_sched(timer)) ++ __xntimer_set_affinity(timer, sched); ++} ++ ++#else /* ! CONFIG_SMP */ ++ ++static inline void xntimer_migrate(struct xntimer *timer, ++ struct xnsched *sched) ++{ ++ timer->sched = sched; ++} ++ ++static inline int xntimer_setup_ipi(void) ++{ ++ return 0; ++} ++ ++static inline void xntimer_release_ipi(void) { } ++ ++static inline void xntimer_set_affinity(struct xntimer *timer, ++ struct xnsched *sched) ++{ ++ xntimer_migrate(timer, sched); ++} ++ ++#endif /* CONFIG_SMP */ ++ ++char *xntimer_format_time(xnticks_t ns, ++ char *buf, size_t bufsz); ++ ++int xntimer_grab_hardware(void); ++ ++void xntimer_release_hardware(void); ++ ++/** @} */ ++ ++#endif /* !_COBALT_KERNEL_TIMER_H */ +--- linux/include/xenomai/cobalt/kernel/tree.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/cobalt/kernel/tree.h 2021-04-07 16:01:28.111632903 +0800 +@@ -0,0 +1,94 @@ ++/* ++ * Copyright (C) 2014 Gilles Chanteperdrix . ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#ifndef _COBALT_KERNEL_TREE_H ++#define _COBALT_KERNEL_TREE_H ++ ++#include ++#include ++#include ++ ++typedef unsigned long long xnkey_t; ++ ++static inline xnkey_t PTR_KEY(void *p) ++{ ++ return (xnkey_t)(long)p; ++} ++ ++struct xnid { ++ xnkey_t key; ++ struct rb_node link; ++}; ++ ++#define xnid_entry(ptr, type, member) \ ++ ({ \ ++ typeof(ptr) _ptr = (ptr); \ ++ (_ptr ? container_of(_ptr, type, member.link) : NULL); \ ++ }) ++ ++#define xnid_next_entry(ptr, member) \ ++ xnid_entry(rb_next(&ptr->member.link), typeof(*ptr), member) ++ ++static inline void xntree_init(struct rb_root *t) ++{ ++ *t = RB_ROOT; ++} ++ ++#define xntree_for_each_entry(pos, root, member) \ ++ for (pos = xnid_entry(rb_first(root), typeof(*pos), member); \ ++ pos; pos = xnid_next_entry(pos, member)) ++ ++void xntree_cleanup(struct rb_root *t, void *cookie, ++ void (*destroy)(void *cookie, struct xnid *id)); ++ ++int xnid_enter(struct rb_root *t, struct xnid *xnid, xnkey_t key); ++ ++static inline xnkey_t xnid_key(struct xnid *i) ++{ ++ return i->key; ++} ++ ++static inline ++struct xnid *xnid_fetch(struct rb_root *t, xnkey_t key) ++{ ++ struct rb_node *node = t->rb_node; ++ ++ while (node) { ++ struct xnid *i = container_of(node, struct xnid, link); ++ ++ if (key < i->key) ++ node = node->rb_left; ++ else if (key > i->key) ++ node = node->rb_right; ++ else ++ return i; ++ } ++ ++ return NULL; ++} ++ ++static inline int xnid_remove(struct rb_root *t, struct xnid *xnid) ++{ ++#ifdef CONFIG_XENO_OPT_DEBUG_COBALT ++ if (xnid_fetch(t, xnid->key) != xnid) ++ return -ENOENT; ++#endif ++ rb_erase(&xnid->link, t); ++ return 0; ++} ++ ++#endif /* _COBALT_KERNEL_TREE_H */ +--- linux/include/xenomai/cobalt/kernel/lock.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/cobalt/kernel/lock.h 2021-04-07 16:01:28.106632910 +0800 +@@ -0,0 +1,288 @@ ++/* ++ * Copyright (C) 2001-2008,2012 Philippe Gerum . ++ * Copyright (C) 2004,2005 Gilles Chanteperdrix . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#ifndef _COBALT_KERNEL_LOCK_H ++#define _COBALT_KERNEL_LOCK_H ++ ++#include ++#include ++#include ++ ++/** ++ * @addtogroup cobalt_core_lock ++ * ++ * @{ ++ */ ++typedef unsigned long spl_t; ++ ++/** ++ * Hard disable interrupts on the local processor, saving previous state. ++ * ++ * @param[out] x An unsigned long integer context variable ++ */ ++#define splhigh(x) ((x) = ipipe_test_and_stall_head() & 1) ++#ifdef CONFIG_SMP ++/** ++ * Restore the saved hard interrupt state on the local processor. ++ * ++ * @param[in] x The context variable previously updated by splhigh() ++ */ ++#define splexit(x) ipipe_restore_head(x & 1) ++#else /* !CONFIG_SMP */ ++#define splexit(x) ipipe_restore_head(x) ++#endif /* !CONFIG_SMP */ ++/** ++ * Hard disable interrupts on the local processor. ++ */ ++#define splmax() ipipe_stall_head() ++/** ++ * Hard enable interrupts on the local processor. ++ */ ++#define splnone() ipipe_unstall_head() ++/** ++ * Test hard interrupt state on the local processor. ++ * ++ * @return Zero if the local processor currently accepts interrupts, ++ * non-zero otherwise. ++ */ ++#define spltest() ipipe_test_head() ++ ++#ifdef CONFIG_XENO_OPT_DEBUG_LOCKING ++ ++struct xnlock { ++ unsigned owner; ++ arch_spinlock_t alock; ++ const char *file; ++ const char *function; ++ unsigned int line; ++ int cpu; ++ unsigned long long spin_time; ++ unsigned long long lock_date; ++}; ++ ++struct xnlockinfo { ++ unsigned long long spin_time; ++ unsigned long long lock_time; ++ const char *file; ++ const char *function; ++ unsigned int line; ++}; ++ ++#define XNARCH_LOCK_UNLOCKED (struct xnlock) { \ ++ ~0, \ ++ __ARCH_SPIN_LOCK_UNLOCKED, \ ++ NULL, \ ++ NULL, \ ++ 0, \ ++ -1, \ ++ 0LL, \ ++ 0LL, \ ++} ++ ++#define XNLOCK_DBG_CONTEXT , __FILE__, __LINE__, __FUNCTION__ ++#define XNLOCK_DBG_CONTEXT_ARGS \ ++ , const char *file, int line, const char *function ++#define XNLOCK_DBG_PASS_CONTEXT , file, line, function ++ ++void xnlock_dbg_prepare_acquire(unsigned long long *start); ++void xnlock_dbg_prepare_spin(unsigned int *spin_limit); ++void xnlock_dbg_acquired(struct xnlock *lock, int cpu, ++ unsigned long long *start, ++ const char *file, int line, ++ const char *function); ++int xnlock_dbg_release(struct xnlock *lock, ++ const char *file, int line, ++ const char *function); ++ ++DECLARE_PER_CPU(struct xnlockinfo, xnlock_stats); ++ ++#else /* !CONFIG_XENO_OPT_DEBUG_LOCKING */ ++ ++struct xnlock { ++ unsigned owner; ++ arch_spinlock_t alock; ++}; ++ ++#define XNARCH_LOCK_UNLOCKED \ ++ (struct xnlock) { \ ++ ~0, \ ++ __ARCH_SPIN_LOCK_UNLOCKED, \ ++ } ++ ++#define XNLOCK_DBG_CONTEXT ++#define XNLOCK_DBG_CONTEXT_ARGS ++#define XNLOCK_DBG_PASS_CONTEXT ++ ++static inline ++void xnlock_dbg_prepare_acquire(unsigned long long *start) ++{ ++} ++ ++static inline ++void xnlock_dbg_prepare_spin(unsigned int *spin_limit) ++{ ++} ++ ++static inline void ++xnlock_dbg_acquired(struct xnlock *lock, int cpu, ++ unsigned long long *start) ++{ ++} ++ ++static inline int xnlock_dbg_release(struct xnlock *lock) ++{ ++ return 0; ++} ++ ++#endif /* !CONFIG_XENO_OPT_DEBUG_LOCKING */ ++ ++#if defined(CONFIG_SMP) || defined(CONFIG_XENO_OPT_DEBUG_LOCKING) ++ ++#define xnlock_get(lock) __xnlock_get(lock XNLOCK_DBG_CONTEXT) ++#define xnlock_put(lock) __xnlock_put(lock XNLOCK_DBG_CONTEXT) ++#define xnlock_get_irqsave(lock,x) \ ++ ((x) = __xnlock_get_irqsave(lock XNLOCK_DBG_CONTEXT)) ++#define xnlock_put_irqrestore(lock,x) \ ++ __xnlock_put_irqrestore(lock,x XNLOCK_DBG_CONTEXT) ++#define xnlock_clear_irqoff(lock) xnlock_put_irqrestore(lock, 1) ++#define xnlock_clear_irqon(lock) xnlock_put_irqrestore(lock, 0) ++ ++static inline void xnlock_init (struct xnlock *lock) ++{ ++ *lock = XNARCH_LOCK_UNLOCKED; ++} ++ ++#define DECLARE_XNLOCK(lock) struct xnlock lock ++#define DECLARE_EXTERN_XNLOCK(lock) extern struct xnlock lock ++#define DEFINE_XNLOCK(lock) struct xnlock lock = XNARCH_LOCK_UNLOCKED ++#define DEFINE_PRIVATE_XNLOCK(lock) static DEFINE_XNLOCK(lock) ++ ++static inline int ____xnlock_get(struct xnlock *lock /*, */ XNLOCK_DBG_CONTEXT_ARGS) ++{ ++ int cpu = ipipe_processor_id(); ++ unsigned long long start; ++ ++ if (lock->owner == cpu) ++ return 2; ++ ++ xnlock_dbg_prepare_acquire(&start); ++ ++ arch_spin_lock(&lock->alock); ++ lock->owner = cpu; ++ ++ xnlock_dbg_acquired(lock, cpu, &start /*, */ XNLOCK_DBG_PASS_CONTEXT); ++ ++ return 0; ++} ++ ++static inline void ____xnlock_put(struct xnlock *lock /*, */ XNLOCK_DBG_CONTEXT_ARGS) ++{ ++ if (xnlock_dbg_release(lock /*, */ XNLOCK_DBG_PASS_CONTEXT)) ++ return; ++ ++ lock->owner = ~0U; ++ arch_spin_unlock(&lock->alock); ++} ++ ++#ifndef CONFIG_XENO_ARCH_OUTOFLINE_XNLOCK ++#define ___xnlock_get ____xnlock_get ++#define ___xnlock_put ____xnlock_put ++#else /* out of line xnlock */ ++int ___xnlock_get(struct xnlock *lock /*, */ XNLOCK_DBG_CONTEXT_ARGS); ++ ++void ___xnlock_put(struct xnlock *lock /*, */ XNLOCK_DBG_CONTEXT_ARGS); ++#endif /* out of line xnlock */ ++ ++#ifdef CONFIG_XENO_OPT_DEBUG_LOCKING ++/* Disable UP-over-SMP kernel optimization in debug mode. */ ++#define __locking_active__ 1 ++#else ++#define __locking_active__ ipipe_smp_p ++#endif ++ ++static inline spl_t ++__xnlock_get_irqsave(struct xnlock *lock /*, */ XNLOCK_DBG_CONTEXT_ARGS) ++{ ++ unsigned long flags; ++ ++ splhigh(flags); ++ ++ if (__locking_active__) ++ flags |= ___xnlock_get(lock /*, */ XNLOCK_DBG_PASS_CONTEXT); ++ ++ return flags; ++} ++ ++static inline void __xnlock_put_irqrestore(struct xnlock *lock, spl_t flags ++ /*, */ XNLOCK_DBG_CONTEXT_ARGS) ++{ ++ /* Only release the lock if we didn't take it recursively. */ ++ if (__locking_active__ && !(flags & 2)) ++ ___xnlock_put(lock /*, */ XNLOCK_DBG_PASS_CONTEXT); ++ ++ splexit(flags & 1); ++} ++ ++static inline int xnlock_is_owner(struct xnlock *lock) ++{ ++ if (__locking_active__) ++ return lock->owner == ipipe_processor_id(); ++ ++ return 1; ++} ++ ++static inline int __xnlock_get(struct xnlock *lock /*, */ XNLOCK_DBG_CONTEXT_ARGS) ++{ ++ if (__locking_active__) ++ return ___xnlock_get(lock /* , */ XNLOCK_DBG_PASS_CONTEXT); ++ ++ return 0; ++} ++ ++static inline void __xnlock_put(struct xnlock *lock /*, */ XNLOCK_DBG_CONTEXT_ARGS) ++{ ++ if (__locking_active__) ++ ___xnlock_put(lock /*, */ XNLOCK_DBG_PASS_CONTEXT); ++} ++ ++#undef __locking_active__ ++ ++#else /* !(CONFIG_SMP || CONFIG_XENO_OPT_DEBUG_LOCKING) */ ++ ++#define xnlock_init(lock) do { } while(0) ++#define xnlock_get(lock) do { } while(0) ++#define xnlock_put(lock) do { } while(0) ++#define xnlock_get_irqsave(lock,x) splhigh(x) ++#define xnlock_put_irqrestore(lock,x) splexit(x) ++#define xnlock_clear_irqoff(lock) splmax() ++#define xnlock_clear_irqon(lock) splnone() ++#define xnlock_is_owner(lock) 1 ++ ++#define DECLARE_XNLOCK(lock) ++#define DECLARE_EXTERN_XNLOCK(lock) ++#define DEFINE_XNLOCK(lock) ++#define DEFINE_PRIVATE_XNLOCK(lock) ++ ++#endif /* !(CONFIG_SMP || CONFIG_XENO_OPT_DEBUG_LOCKING) */ ++ ++DECLARE_EXTERN_XNLOCK(nklock); ++ ++/** @} */ ++ ++#endif /* !_COBALT_KERNEL_LOCK_H */ +--- linux/include/xenomai/cobalt/kernel/apc.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/cobalt/kernel/apc.h 2021-04-07 16:01:28.101632917 +0800 +@@ -0,0 +1,79 @@ ++/* ++ * Copyright (C) 2012 Philippe Gerum . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#ifndef _COBALT_KERNEL_APC_H ++#define _COBALT_KERNEL_APC_H ++ ++#include ++#include ++ ++/** ++ * @addtogroup cobalt_core_apc ++ * @{ ++ */ ++ ++int xnapc_alloc(const char *name, ++ void (*handler)(void *cookie), ++ void *cookie); ++ ++void xnapc_free(int apc); ++ ++static inline void __xnapc_schedule(int apc) ++{ ++ unsigned long *p = &raw_cpu_ptr(&cobalt_machine_cpudata)->apc_pending; ++ ++ if (!__test_and_set_bit(apc, p)) ++ ipipe_post_irq_root(cobalt_pipeline.apc_virq); ++} ++ ++/** ++ * @fn static inline int xnapc_schedule(int apc) ++ * ++ * @brief Schedule an APC invocation. ++ * ++ * This service marks the APC as pending for the Linux domain, so that ++ * its handler will be called as soon as possible, when the Linux ++ * domain gets back in control. ++ * ++ * When posted from the Linux domain, the APC handler is fired as soon ++ * as the interrupt mask is explicitly cleared by some kernel ++ * code. When posted from the Xenomai domain, the APC handler is ++ * fired as soon as the Linux domain is resumed, i.e. after Xenomai has ++ * completed all its pending duties. ++ * ++ * @param apc The APC id. to schedule. ++ * ++ * This service can be called from: ++ * ++ * - Any domain context, albeit the usual calling place is from the ++ * Xenomai domain. ++ */ ++static inline void xnapc_schedule(int apc) ++{ ++ unsigned long flags; ++ ++ flags = ipipe_test_and_stall_head() & 1; ++ __xnapc_schedule(apc); ++ ipipe_restore_head(flags); ++} ++ ++void apc_dispatch(unsigned int virq, void *arg); ++ ++/** @} */ ++ ++#endif /* !_COBALT_KERNEL_APC_H */ +--- linux/include/xenomai/cobalt/kernel/heap.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/cobalt/kernel/heap.h 2021-04-07 16:01:28.097632923 +0800 +@@ -0,0 +1,172 @@ ++/* ++ * Copyright (C) 2001,2002,2003 Philippe Gerum . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#ifndef _COBALT_KERNEL_HEAP_H ++#define _COBALT_KERNEL_HEAP_H ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/** ++ * @addtogroup cobalt_core_heap ++ * @{ ++ */ ++ ++#define XNHEAP_PAGE_SHIFT 9 /* 2^9 => 512 bytes */ ++#define XNHEAP_PAGE_SIZE (1UL << XNHEAP_PAGE_SHIFT) ++#define XNHEAP_PAGE_MASK (~(XNHEAP_PAGE_SIZE - 1)) ++#define XNHEAP_MIN_LOG2 4 /* 16 bytes */ ++/* ++ * Use bucketed memory for sizes between 2^XNHEAP_MIN_LOG2 and ++ * 2^(XNHEAP_PAGE_SHIFT-1). ++ */ ++#define XNHEAP_MAX_BUCKETS (XNHEAP_PAGE_SHIFT - XNHEAP_MIN_LOG2) ++#define XNHEAP_MIN_ALIGN (1U << XNHEAP_MIN_LOG2) ++/* Maximum size of a heap (4Gb - PAGE_SIZE). */ ++#define XNHEAP_MAX_HEAPSZ (4294967295U - PAGE_SIZE + 1) ++/* Bits we need for encoding a page # */ ++#define XNHEAP_PGENT_BITS (32 - XNHEAP_PAGE_SHIFT) ++/* Each page is represented by a page map entry. */ ++#define XNHEAP_PGMAP_BYTES sizeof(struct xnheap_pgentry) ++ ++struct xnheap_pgentry { ++ /* Linkage in bucket list. */ ++ unsigned int prev : XNHEAP_PGENT_BITS; ++ unsigned int next : XNHEAP_PGENT_BITS; ++ /* page_list or log2. */ ++ unsigned int type : 6; ++ /* ++ * We hold either a spatial map of busy blocks within the page ++ * for bucketed memory (up to 32 blocks per page), or the ++ * overall size of the multi-page block if entry.type == ++ * page_list. ++ */ ++ union { ++ u32 map; ++ u32 bsize; ++ }; ++}; ++ ++/* ++ * A range descriptor is stored at the beginning of the first page of ++ * a range of free pages. xnheap_range.size is nrpages * ++ * XNHEAP_PAGE_SIZE. Ranges are indexed by address and size in ++ * rbtrees. ++ */ ++struct xnheap_range { ++ struct rb_node addr_node; ++ struct rb_node size_node; ++ size_t size; ++}; ++ ++struct xnheap { ++ void *membase; ++ struct rb_root addr_tree; ++ struct rb_root size_tree; ++ struct xnheap_pgentry *pagemap; ++ size_t usable_size; ++ size_t used_size; ++ u32 buckets[XNHEAP_MAX_BUCKETS]; ++ char name[XNOBJECT_NAME_LEN]; ++ DECLARE_XNLOCK(lock); ++ struct list_head next; ++}; ++ ++extern struct xnheap cobalt_heap; ++ ++#define xnmalloc(size) xnheap_alloc(&cobalt_heap, size) ++#define xnfree(ptr) xnheap_free(&cobalt_heap, ptr) ++ ++static inline void *xnheap_get_membase(const struct xnheap *heap) ++{ ++ return heap->membase; ++} ++ ++static inline ++size_t xnheap_get_size(const struct xnheap *heap) ++{ ++ return heap->usable_size; ++} ++ ++static inline ++size_t xnheap_get_used(const struct xnheap *heap) ++{ ++ return heap->used_size; ++} ++ ++static inline ++size_t xnheap_get_free(const struct xnheap *heap) ++{ ++ return heap->usable_size - heap->used_size; ++} ++ ++int xnheap_init(struct xnheap *heap, ++ void *membase, size_t size); ++ ++void xnheap_destroy(struct xnheap *heap); ++ ++void *xnheap_alloc(struct xnheap *heap, size_t size); ++ ++void xnheap_free(struct xnheap *heap, void *block); ++ ++ssize_t xnheap_check_block(struct xnheap *heap, void *block); ++ ++void xnheap_set_name(struct xnheap *heap, ++ const char *name, ...); ++ ++void *xnheap_vmalloc(size_t size); ++ ++void xnheap_vfree(void *p); ++ ++static inline void *xnheap_zalloc(struct xnheap *heap, size_t size) ++{ ++ void *p; ++ ++ p = xnheap_alloc(heap, size); ++ if (p) ++ memset(p, 0, size); ++ ++ return p; ++} ++ ++static inline char *xnstrdup(const char *s) ++{ ++ char *p; ++ ++ p = xnmalloc(strlen(s) + 1); ++ if (p == NULL) ++ return NULL; ++ ++ return strcpy(p, s); ++} ++ ++#ifdef CONFIG_XENO_OPT_VFILE ++void xnheap_init_proc(void); ++void xnheap_cleanup_proc(void); ++#else /* !CONFIG_XENO_OPT_VFILE */ ++static inline void xnheap_init_proc(void) { } ++static inline void xnheap_cleanup_proc(void) { } ++#endif /* !CONFIG_XENO_OPT_VFILE */ ++ ++/** @} */ ++ ++#endif /* !_COBALT_KERNEL_HEAP_H */ +--- linux/include/xenomai/cobalt/kernel/schedparam.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/cobalt/kernel/schedparam.h 2021-04-07 16:01:28.092632930 +0800 +@@ -0,0 +1,77 @@ ++/* ++ * Copyright (C) 2008 Philippe Gerum . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#ifndef _COBALT_KERNEL_SCHEDPARAM_H ++#define _COBALT_KERNEL_SCHEDPARAM_H ++ ++/** ++ * @addtogroup cobalt_core_sched ++ * @{ ++ */ ++ ++struct xnsched_idle_param { ++ int prio; ++}; ++ ++struct xnsched_weak_param { ++ int prio; ++}; ++ ++struct xnsched_rt_param { ++ int prio; ++}; ++ ++struct xnsched_tp_param { ++ int prio; ++ int ptid; /* partition id. */ ++}; ++ ++struct xnsched_sporadic_param { ++ xnticks_t init_budget; ++ xnticks_t repl_period; ++ int max_repl; ++ int low_prio; ++ int normal_prio; ++ int current_prio; ++}; ++ ++struct xnsched_quota_param { ++ int prio; ++ int tgid; /* thread group id. */ ++}; ++ ++union xnsched_policy_param { ++ struct xnsched_idle_param idle; ++ struct xnsched_rt_param rt; ++#ifdef CONFIG_XENO_OPT_SCHED_WEAK ++ struct xnsched_weak_param weak; ++#endif ++#ifdef CONFIG_XENO_OPT_SCHED_TP ++ struct xnsched_tp_param tp; ++#endif ++#ifdef CONFIG_XENO_OPT_SCHED_SPORADIC ++ struct xnsched_sporadic_param pss; ++#endif ++#ifdef CONFIG_XENO_OPT_SCHED_QUOTA ++ struct xnsched_quota_param quota; ++#endif ++}; ++ ++/** @} */ ++ ++#endif /* !_COBALT_KERNEL_SCHEDPARAM_H */ +--- linux/include/xenomai/cobalt/kernel/ppd.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/cobalt/kernel/ppd.h 2021-04-07 16:01:28.087632937 +0800 +@@ -0,0 +1,42 @@ ++/* ++ * Copyright © 2006 Gilles Chanteperdrix ++ * ++ * Xenomai is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, ++ * USA; either version 2 of the License, or (at your option) any later ++ * version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#ifndef _COBALT_KERNEL_PPD_H ++#define _COBALT_KERNEL_PPD_H ++ ++#include ++#include ++#include ++#include ++ ++struct cobalt_umm { ++ struct xnheap heap; ++ atomic_t refcount; ++ void (*release)(struct cobalt_umm *umm); ++}; ++ ++struct cobalt_ppd { ++ struct cobalt_umm umm; ++ atomic_t refcnt; ++ char *exe_path; ++ struct rb_root fds; ++}; ++ ++extern struct cobalt_ppd cobalt_kernel_ppd; ++ ++#endif /* _COBALT_KERNEL_PPD_H */ +--- linux/include/xenomai/cobalt/kernel/sched-quota.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/cobalt/kernel/sched-quota.h 2021-04-07 16:01:28.082632945 +0800 +@@ -0,0 +1,93 @@ ++/* ++ * Copyright (C) 2013 Philippe Gerum . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#ifndef _COBALT_KERNEL_SCHED_QUOTA_H ++#define _COBALT_KERNEL_SCHED_QUOTA_H ++ ++#ifndef _COBALT_KERNEL_SCHED_H ++#error "please don't include cobalt/kernel/sched-quota.h directly" ++#endif ++ ++/** ++ * @addtogroup cobalt_core_sched ++ * @{ ++ */ ++ ++#ifdef CONFIG_XENO_OPT_SCHED_QUOTA ++ ++#define XNSCHED_QUOTA_MIN_PRIO 1 ++#define XNSCHED_QUOTA_MAX_PRIO 255 ++#define XNSCHED_QUOTA_NR_PRIO \ ++ (XNSCHED_QUOTA_MAX_PRIO - XNSCHED_QUOTA_MIN_PRIO + 1) ++ ++extern struct xnsched_class xnsched_class_quota; ++ ++struct xnsched_quota_group { ++ struct xnsched *sched; ++ xnticks_t quota_ns; ++ xnticks_t quota_peak_ns; ++ xnticks_t run_start_ns; ++ xnticks_t run_budget_ns; ++ xnticks_t run_credit_ns; ++ struct list_head members; ++ struct list_head expired; ++ struct list_head next; ++ int nr_active; ++ int nr_threads; ++ int tgid; ++ int quota_percent; ++ int quota_peak_percent; ++}; ++ ++struct xnsched_quota { ++ xnticks_t period_ns; ++ struct xntimer refill_timer; ++ struct xntimer limit_timer; ++ struct list_head groups; ++}; ++ ++static inline int xnsched_quota_init_thread(struct xnthread *thread) ++{ ++ thread->quota = NULL; ++ INIT_LIST_HEAD(&thread->quota_expired); ++ ++ return 0; ++} ++ ++int xnsched_quota_create_group(struct xnsched_quota_group *tg, ++ struct xnsched *sched, ++ int *quota_sum_r); ++ ++int xnsched_quota_destroy_group(struct xnsched_quota_group *tg, ++ int force, ++ int *quota_sum_r); ++ ++void xnsched_quota_set_limit(struct xnsched_quota_group *tg, ++ int quota_percent, int quota_peak_percent, ++ int *quota_sum_r); ++ ++struct xnsched_quota_group * ++xnsched_quota_find_group(struct xnsched *sched, int tgid); ++ ++int xnsched_quota_sum_all(struct xnsched *sched); ++ ++#endif /* !CONFIG_XENO_OPT_SCHED_QUOTA */ ++ ++/** @} */ ++ ++#endif /* !_COBALT_KERNEL_SCHED_QUOTA_H */ +--- linux/include/xenomai/cobalt/kernel/thread.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/cobalt/kernel/thread.h 2021-04-07 16:01:28.078632950 +0800 +@@ -0,0 +1,570 @@ ++/* ++ * Copyright (C) 2001,2002,2003 Philippe Gerum . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#ifndef _COBALT_KERNEL_THREAD_H ++#define _COBALT_KERNEL_THREAD_H ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/** ++ * @addtogroup cobalt_core_thread ++ * @{ ++ */ ++#define XNTHREAD_BLOCK_BITS (XNSUSP|XNPEND|XNDELAY|XNDORMANT|XNRELAX|XNMIGRATE|XNHELD|XNDBGSTOP) ++#define XNTHREAD_MODE_BITS (XNRRB|XNWARN|XNTRAPLB) ++ ++struct xnthread; ++struct xnsched; ++struct xnselector; ++struct xnsched_class; ++struct xnsched_tpslot; ++struct xnthread_personality; ++struct completion; ++ ++struct xnthread_init_attr { ++ struct xnthread_personality *personality; ++ cpumask_t affinity; ++ int flags; ++ const char *name; ++}; ++ ++struct xnthread_start_attr { ++ int mode; ++ void (*entry)(void *cookie); ++ void *cookie; ++}; ++ ++struct xnthread_wait_context { ++ int posted; ++}; ++ ++struct xnthread_personality { ++ const char *name; ++ unsigned int magic; ++ int xid; ++ atomic_t refcnt; ++ struct { ++ void *(*attach_process)(void); ++ void (*detach_process)(void *arg); ++ void (*map_thread)(struct xnthread *thread); ++ struct xnthread_personality *(*relax_thread)(struct xnthread *thread); ++ struct xnthread_personality *(*harden_thread)(struct xnthread *thread); ++ struct xnthread_personality *(*move_thread)(struct xnthread *thread, ++ int dest_cpu); ++ struct xnthread_personality *(*exit_thread)(struct xnthread *thread); ++ struct xnthread_personality *(*finalize_thread)(struct xnthread *thread); ++ } ops; ++ struct module *module; ++}; ++ ++struct xnthread { ++ struct xnarchtcb tcb; /* Architecture-dependent block */ ++ ++ __u32 state; /* Thread state flags */ ++ __u32 info; /* Thread information flags */ ++ __u32 local_info; /* Local thread information flags */ ++ ++ struct xnsched *sched; /* Thread scheduler */ ++ struct xnsched_class *sched_class; /* Current scheduling class */ ++ struct xnsched_class *base_class; /* Base scheduling class */ ++ ++#ifdef CONFIG_XENO_OPT_SCHED_TP ++ struct xnsched_tpslot *tps; /* Current partition slot for TP scheduling */ ++ struct list_head tp_link; /* Link in per-sched TP thread queue */ ++#endif ++#ifdef CONFIG_XENO_OPT_SCHED_SPORADIC ++ struct xnsched_sporadic_data *pss; /* Sporadic scheduling data. */ ++#endif ++#ifdef CONFIG_XENO_OPT_SCHED_QUOTA ++ struct xnsched_quota_group *quota; /* Quota scheduling group. */ ++ struct list_head quota_expired; ++ struct list_head quota_next; ++#endif ++ cpumask_t affinity; /* Processor affinity. */ ++ ++ /** Base priority (before PI/PP boost) */ ++ int bprio; ++ ++ /** Current (effective) priority */ ++ int cprio; ++ ++ /** ++ * Weighted priority (cprio + scheduling class weight). ++ */ ++ int wprio; ++ ++ int lock_count; /** Scheduler lock count. */ ++ ++ /** ++ * Thread holder in xnsched run queue. Ordered by ++ * thread->cprio. ++ */ ++ struct list_head rlink; ++ ++ /** ++ * Thread holder in xnsynch pendq. Prioritized by ++ * thread->cprio + scheduling class weight. ++ */ ++ struct list_head plink; ++ ++ /** Thread holder in global queue. */ ++ struct list_head glink; ++ ++ /** ++ * List of xnsynch owned by this thread which cause a priority ++ * boost due to one of the following reasons: ++ * ++ * - they are currently claimed by other thread(s) when ++ * enforcing the priority inheritance protocol (XNSYNCH_PI). ++ * ++ * - they require immediate priority ceiling (XNSYNCH_PP). ++ * ++ * This list is ordered by decreasing (weighted) thread ++ * priorities. ++ */ ++ struct list_head boosters; ++ ++ struct xnsynch *wchan; /* Resource the thread pends on */ ++ ++ struct xnsynch *wwake; /* Wait channel the thread was resumed from */ ++ ++ int res_count; /* Held resources count */ ++ ++ struct xntimer rtimer; /* Resource timer */ ++ ++ struct xntimer ptimer; /* Periodic timer */ ++ ++ xnticks_t rrperiod; /* Allotted round-robin period (ns) */ ++ ++ struct xnthread_wait_context *wcontext; /* Active wait context. */ ++ ++ struct { ++ xnstat_counter_t ssw; /* Primary -> secondary mode switch count */ ++ xnstat_counter_t csw; /* Context switches (includes secondary -> primary switches) */ ++ xnstat_counter_t xsc; /* Xenomai syscalls */ ++ xnstat_counter_t pf; /* Number of page faults */ ++ xnstat_exectime_t account; /* Execution time accounting entity */ ++ xnstat_exectime_t lastperiod; /* Interval marker for execution time reports */ ++ } stat; ++ ++ struct xnselector *selector; /* For select. */ ++ ++ xnhandle_t handle; /* Handle in registry */ ++ ++ char name[XNOBJECT_NAME_LEN]; /* Symbolic name of thread */ ++ ++ void (*entry)(void *cookie); /* Thread entry routine */ ++ void *cookie; /* Cookie to pass to the entry routine */ ++ ++ /** ++ * Thread data visible from userland through a window on the ++ * global heap. ++ */ ++ struct xnthread_user_window *u_window; ++ ++ struct xnthread_personality *personality; ++ ++ struct completion exited; ++ ++#ifdef CONFIG_XENO_OPT_DEBUG ++ const char *exe_path; /* Executable path */ ++ u32 proghash; /* Hash value for exe_path */ ++#endif ++}; ++ ++static inline int xnthread_get_state(const struct xnthread *thread) ++{ ++ return thread->state; ++} ++ ++static inline int xnthread_test_state(struct xnthread *thread, int bits) ++{ ++ return thread->state & bits; ++} ++ ++static inline void xnthread_set_state(struct xnthread *thread, int bits) ++{ ++ thread->state |= bits; ++} ++ ++static inline void xnthread_clear_state(struct xnthread *thread, int bits) ++{ ++ thread->state &= ~bits; ++} ++ ++static inline int xnthread_test_info(struct xnthread *thread, int bits) ++{ ++ return thread->info & bits; ++} ++ ++static inline void xnthread_set_info(struct xnthread *thread, int bits) ++{ ++ thread->info |= bits; ++} ++ ++static inline void xnthread_clear_info(struct xnthread *thread, int bits) ++{ ++ thread->info &= ~bits; ++} ++ ++static inline int xnthread_test_localinfo(struct xnthread *curr, int bits) ++{ ++ return curr->local_info & bits; ++} ++ ++static inline void xnthread_set_localinfo(struct xnthread *curr, int bits) ++{ ++ curr->local_info |= bits; ++} ++ ++static inline void xnthread_clear_localinfo(struct xnthread *curr, int bits) ++{ ++ curr->local_info &= ~bits; ++} ++ ++static inline struct xnarchtcb *xnthread_archtcb(struct xnthread *thread) ++{ ++ return &thread->tcb; ++} ++ ++static inline int xnthread_base_priority(const struct xnthread *thread) ++{ ++ return thread->bprio; ++} ++ ++static inline int xnthread_current_priority(const struct xnthread *thread) ++{ ++ return thread->cprio; ++} ++ ++static inline struct task_struct *xnthread_host_task(struct xnthread *thread) ++{ ++ return xnthread_archtcb(thread)->core.host_task; ++} ++ ++#define xnthread_for_each_booster(__pos, __thread) \ ++ list_for_each_entry(__pos, &(__thread)->boosters, next) ++ ++#define xnthread_for_each_booster_safe(__pos, __tmp, __thread) \ ++ list_for_each_entry_safe(__pos, __tmp, &(__thread)->boosters, next) ++ ++#define xnthread_run_handler(__t, __h, __a...) \ ++ do { \ ++ struct xnthread_personality *__p__ = (__t)->personality; \ ++ if ((__p__)->ops.__h) \ ++ (__p__)->ops.__h(__t, ##__a); \ ++ } while (0) ++ ++#define xnthread_run_handler_stack(__t, __h, __a...) \ ++ do { \ ++ struct xnthread_personality *__p__ = (__t)->personality; \ ++ do { \ ++ if ((__p__)->ops.__h == NULL) \ ++ break; \ ++ __p__ = (__p__)->ops.__h(__t, ##__a); \ ++ } while (__p__); \ ++ } while (0) ++ ++static inline ++struct xnthread_wait_context *xnthread_get_wait_context(struct xnthread *thread) ++{ ++ return thread->wcontext; ++} ++ ++static inline ++int xnthread_register(struct xnthread *thread, const char *name) ++{ ++ return xnregistry_enter(name, thread, &thread->handle, NULL); ++} ++ ++static inline ++struct xnthread *xnthread_lookup(xnhandle_t threadh) ++{ ++ struct xnthread *thread = xnregistry_lookup(threadh, NULL); ++ return thread && thread->handle == xnhandle_get_index(threadh) ? thread : NULL; ++} ++ ++static inline void xnthread_sync_window(struct xnthread *thread) ++{ ++ if (thread->u_window) { ++ thread->u_window->state = thread->state; ++ thread->u_window->info = thread->info; ++ } ++} ++ ++static inline ++void xnthread_clear_sync_window(struct xnthread *thread, int state_bits) ++{ ++ if (thread->u_window) { ++ thread->u_window->state = thread->state & ~state_bits; ++ thread->u_window->info = thread->info; ++ } ++} ++ ++static inline ++void xnthread_set_sync_window(struct xnthread *thread, int state_bits) ++{ ++ if (thread->u_window) { ++ thread->u_window->state = thread->state | state_bits; ++ thread->u_window->info = thread->info; ++ } ++} ++ ++static inline int normalize_priority(int prio) ++{ ++ return prio < MAX_RT_PRIO ? prio : MAX_RT_PRIO - 1; ++} ++ ++int __xnthread_init(struct xnthread *thread, ++ const struct xnthread_init_attr *attr, ++ struct xnsched *sched, ++ struct xnsched_class *sched_class, ++ const union xnsched_policy_param *sched_param); ++ ++void __xnthread_test_cancel(struct xnthread *curr); ++ ++void __xnthread_cleanup(struct xnthread *curr); ++ ++void __xnthread_discard(struct xnthread *thread); ++ ++/** ++ * @fn struct xnthread *xnthread_current(void) ++ * @brief Retrieve the current Cobalt core TCB. ++ * ++ * Returns the address of the current Cobalt core thread descriptor, ++ * or NULL if running over a regular Linux task. This call is not ++ * affected by the current runtime mode of the core thread. ++ * ++ * @note The returned value may differ from xnsched_current_thread() ++ * called from the same context, since the latter returns the root ++ * thread descriptor for the current CPU if the caller is running in ++ * secondary mode. ++ * ++ * @coretags{unrestricted} ++ */ ++static inline struct xnthread *xnthread_current(void) ++{ ++ return ipipe_current_threadinfo()->thread; ++} ++ ++/** ++ * @fn struct xnthread *xnthread_from_task(struct task_struct *p) ++ * @brief Retrieve the Cobalt core TCB attached to a Linux task. ++ * ++ * Returns the address of the Cobalt core thread descriptor attached ++ * to the Linux task @a p, or NULL if @a p is a regular Linux ++ * task. This call is not affected by the current runtime mode of the ++ * core thread. ++ * ++ * @coretags{unrestricted} ++ */ ++static inline struct xnthread *xnthread_from_task(struct task_struct *p) ++{ ++ return ipipe_task_threadinfo(p)->thread; ++} ++ ++/** ++ * @fn void xnthread_test_cancel(void) ++ * @brief Introduce a thread cancellation point. ++ * ++ * Terminates the current thread if a cancellation request is pending ++ * for it, i.e. if xnthread_cancel() was called. ++ * ++ * @coretags{mode-unrestricted} ++ */ ++static inline void xnthread_test_cancel(void) ++{ ++ struct xnthread *curr = xnthread_current(); ++ ++ if (curr && xnthread_test_info(curr, XNCANCELD)) ++ __xnthread_test_cancel(curr); ++} ++ ++static inline ++void xnthread_complete_wait(struct xnthread_wait_context *wc) ++{ ++ wc->posted = 1; ++} ++ ++static inline ++int xnthread_wait_complete_p(struct xnthread_wait_context *wc) ++{ ++ return wc->posted; ++} ++ ++#ifdef CONFIG_XENO_ARCH_FPU ++void xnthread_switch_fpu(struct xnsched *sched); ++#else ++static inline void xnthread_switch_fpu(struct xnsched *sched) { } ++#endif /* CONFIG_XENO_ARCH_FPU */ ++ ++void xnthread_init_shadow_tcb(struct xnthread *thread); ++ ++void xnthread_init_root_tcb(struct xnthread *thread); ++ ++void xnthread_deregister(struct xnthread *thread); ++ ++char *xnthread_format_status(unsigned long status, ++ char *buf, int size); ++ ++pid_t xnthread_host_pid(struct xnthread *thread); ++ ++int xnthread_set_clock(struct xnthread *thread, ++ struct xnclock *newclock); ++ ++xnticks_t xnthread_get_timeout(struct xnthread *thread, ++ xnticks_t ns); ++ ++xnticks_t xnthread_get_period(struct xnthread *thread); ++ ++void xnthread_prepare_wait(struct xnthread_wait_context *wc); ++ ++int xnthread_init(struct xnthread *thread, ++ const struct xnthread_init_attr *attr, ++ struct xnsched_class *sched_class, ++ const union xnsched_policy_param *sched_param); ++ ++int xnthread_start(struct xnthread *thread, ++ const struct xnthread_start_attr *attr); ++ ++int xnthread_set_mode(int clrmask, ++ int setmask); ++ ++void xnthread_suspend(struct xnthread *thread, ++ int mask, ++ xnticks_t timeout, ++ xntmode_t timeout_mode, ++ struct xnsynch *wchan); ++ ++void xnthread_resume(struct xnthread *thread, ++ int mask); ++ ++int xnthread_unblock(struct xnthread *thread); ++ ++int xnthread_set_periodic(struct xnthread *thread, ++ xnticks_t idate, ++ xntmode_t timeout_mode, ++ xnticks_t period); ++ ++int xnthread_wait_period(unsigned long *overruns_r); ++ ++int xnthread_set_slice(struct xnthread *thread, ++ xnticks_t quantum); ++ ++void xnthread_cancel(struct xnthread *thread); ++ ++int xnthread_join(struct xnthread *thread, bool uninterruptible); ++ ++int xnthread_harden(void); ++ ++void xnthread_relax(int notify, int reason); ++ ++void __xnthread_kick(struct xnthread *thread); ++ ++void xnthread_kick(struct xnthread *thread); ++ ++void __xnthread_demote(struct xnthread *thread); ++ ++void xnthread_demote(struct xnthread *thread); ++ ++void xnthread_signal(struct xnthread *thread, ++ int sig, int arg); ++ ++void xnthread_pin_initial(struct xnthread *thread); ++ ++int xnthread_map(struct xnthread *thread, ++ struct completion *done); ++ ++void xnthread_call_mayday(struct xnthread *thread, int reason); ++ ++static inline void xnthread_get_resource(struct xnthread *curr) ++{ ++ if (xnthread_test_state(curr, XNWEAK|XNDEBUG)) ++ curr->res_count++; ++} ++ ++static inline int xnthread_put_resource(struct xnthread *curr) ++{ ++ if (xnthread_test_state(curr, XNWEAK) || ++ IS_ENABLED(CONFIG_XENO_OPT_DEBUG_MUTEX_SLEEP)) { ++ if (unlikely(curr->res_count == 0)) { ++ if (xnthread_test_state(curr, XNWARN)) ++ xnthread_signal(curr, SIGDEBUG, ++ SIGDEBUG_RESCNT_IMBALANCE); ++ return -EPERM; ++ } ++ curr->res_count--; ++ } ++ ++ return 0; ++} ++ ++static inline void xnthread_commit_ceiling(struct xnthread *curr) ++{ ++ if (curr->u_window->pp_pending) ++ xnsynch_commit_ceiling(curr); ++} ++ ++#ifdef CONFIG_SMP ++ ++void xnthread_migrate_passive(struct xnthread *thread, ++ struct xnsched *sched); ++#else ++ ++static inline void xnthread_migrate_passive(struct xnthread *thread, ++ struct xnsched *sched) ++{ } ++ ++#endif ++ ++int __xnthread_set_schedparam(struct xnthread *thread, ++ struct xnsched_class *sched_class, ++ const union xnsched_policy_param *sched_param); ++ ++int xnthread_set_schedparam(struct xnthread *thread, ++ struct xnsched_class *sched_class, ++ const union xnsched_policy_param *sched_param); ++ ++int xnthread_killall(int grace, int mask); ++ ++void __xnthread_propagate_schedparam(struct xnthread *curr); ++ ++static inline void xnthread_propagate_schedparam(struct xnthread *curr) ++{ ++ if (xnthread_test_info(curr, XNSCHEDP)) ++ __xnthread_propagate_schedparam(curr); ++} ++ ++extern struct xnthread_personality xenomai_personality; ++ ++/** @} */ ++ ++#endif /* !_COBALT_KERNEL_THREAD_H */ +--- linux/include/xenomai/cobalt/kernel/assert.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/cobalt/kernel/assert.h 2021-04-07 16:01:28.073632957 +0800 +@@ -0,0 +1,74 @@ ++/* ++ * Copyright (C) 2006 Philippe Gerum . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#ifndef _COBALT_KERNEL_ASSERT_H ++#define _COBALT_KERNEL_ASSERT_H ++ ++#include ++#include ++#include ++ ++#define XENO_INFO KERN_INFO "[Xenomai] " ++#define XENO_WARNING KERN_WARNING "[Xenomai] " ++#define XENO_ERR KERN_ERR "[Xenomai] " ++ ++#define XENO_DEBUG(__subsys) \ ++ IS_ENABLED(CONFIG_XENO_OPT_DEBUG_##__subsys) ++#define XENO_ASSERT(__subsys, __cond) \ ++ (!WARN_ON(XENO_DEBUG(__subsys) && !(__cond))) ++#define XENO_BUG(__subsys) \ ++ BUG_ON(XENO_DEBUG(__subsys)) ++#define XENO_BUG_ON(__subsys, __cond) \ ++ BUG_ON(XENO_DEBUG(__subsys) && (__cond)) ++#define XENO_WARN(__subsys, __cond, __fmt...) \ ++ WARN(XENO_DEBUG(__subsys) && (__cond), __fmt) ++#define XENO_WARN_ON(__subsys, __cond) \ ++ WARN_ON(XENO_DEBUG(__subsys) && (__cond)) ++#define XENO_WARN_ON_ONCE(__subsys, __cond) \ ++ WARN_ON_ONCE(XENO_DEBUG(__subsys) && (__cond)) ++#ifdef CONFIG_SMP ++#define XENO_BUG_ON_SMP(__subsys, __cond) \ ++ XENO_BUG_ON(__subsys, __cond) ++#define XENO_WARN_ON_SMP(__subsys, __cond) \ ++ XENO_WARN_ON(__subsys, __cond) ++#define XENO_WARN_ON_ONCE_SMP(__subsys, __cond) \ ++ XENO_WARN_ON_ONCE(__subsys, __cond) ++#else ++#define XENO_BUG_ON_SMP(__subsys, __cond) \ ++ do { } while (0) ++#define XENO_WARN_ON_SMP(__subsys, __cond) \ ++ do { } while (0) ++#define XENO_WARN_ON_ONCE_SMP(__subsys, __cond) \ ++ do { } while (0) ++#endif ++ ++#define primary_mode_only() XENO_BUG_ON(CONTEXT, ipipe_root_p) ++#define secondary_mode_only() XENO_BUG_ON(CONTEXT, !ipipe_root_p) ++#define interrupt_only() XENO_BUG_ON(CONTEXT, !xnsched_interrupt_p()) ++#define realtime_cpu_only() XENO_BUG_ON(CONTEXT, !xnsched_supported_cpu(ipipe_processor_id())) ++#define thread_only() XENO_BUG_ON(CONTEXT, xnsched_interrupt_p()) ++#define irqoff_only() XENO_BUG_ON(CONTEXT, hard_irqs_disabled() == 0) ++#ifdef CONFIG_XENO_OPT_DEBUG_LOCKING ++#define atomic_only() XENO_BUG_ON(CONTEXT, (xnlock_is_owner(&nklock) && hard_irqs_disabled()) == 0) ++#define preemptible_only() XENO_BUG_ON(CONTEXT, xnlock_is_owner(&nklock) || hard_irqs_disabled()) ++#else ++#define atomic_only() XENO_BUG_ON(CONTEXT, hard_irqs_disabled() == 0) ++#define preemptible_only() XENO_BUG_ON(CONTEXT, hard_irqs_disabled() != 0) ++#endif ++ ++#endif /* !_COBALT_KERNEL_ASSERT_H */ +--- linux/include/xenomai/cobalt/kernel/arith.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/cobalt/kernel/arith.h 2021-04-07 16:01:28.068632964 +0800 +@@ -0,0 +1,35 @@ ++/* ++ * Generic arithmetic/conversion routines. ++ * Copyright © 2005 Stelian Pop. ++ * Copyright © 2005 Gilles Chanteperdrix. ++ * ++ * Xenomai is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, ++ * USA; either version 2 of the License, or (at your option) any later ++ * version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#ifndef _COBALT_KERNEL_ARITH_H ++#define _COBALT_KERNEL_ARITH_H ++ ++#include ++#include ++ ++#ifdef __BIG_ENDIAN ++#define endianstruct { unsigned int _h; unsigned int _l; } ++#else /* __LITTLE_ENDIAN */ ++#define endianstruct { unsigned int _l; unsigned int _h; } ++#endif ++ ++#include ++ ++#endif /* _COBALT_KERNEL_ARITH_H */ +--- linux/include/xenomai/cobalt/kernel/intr.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/cobalt/kernel/intr.h 2021-04-07 16:01:28.063632972 +0800 +@@ -0,0 +1,164 @@ ++/* ++ * Copyright (C) 2001,2002,2003 Philippe Gerum . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#ifndef _COBALT_KERNEL_INTR_H ++#define _COBALT_KERNEL_INTR_H ++ ++#include ++#include ++ ++/** ++ * @addtogroup cobalt_core_irq ++ * @{ ++ */ ++ ++/* Possible return values of a handler. */ ++#define XN_IRQ_NONE 0x1 ++#define XN_IRQ_HANDLED 0x2 ++#define XN_IRQ_STATMASK (XN_IRQ_NONE|XN_IRQ_HANDLED) ++#define XN_IRQ_PROPAGATE 0x100 ++#define XN_IRQ_DISABLE 0x200 ++ ++/* Init flags. */ ++#define XN_IRQTYPE_SHARED 0x1 ++#define XN_IRQTYPE_EDGE 0x2 ++ ++/* Status bits. */ ++#define XN_IRQSTAT_ATTACHED 0 ++#define _XN_IRQSTAT_ATTACHED (1 << XN_IRQSTAT_ATTACHED) ++#define XN_IRQSTAT_DISABLED 1 ++#define _XN_IRQSTAT_DISABLED (1 << XN_IRQSTAT_DISABLED) ++ ++struct xnintr; ++struct xnsched; ++ ++typedef int (*xnisr_t)(struct xnintr *intr); ++ ++typedef void (*xniack_t)(unsigned irq, void *arg); ++ ++struct xnirqstat { ++ /** Number of handled receipts since attachment. */ ++ xnstat_counter_t hits; ++ /** Runtime accounting entity */ ++ xnstat_exectime_t account; ++ /** Accumulated accounting entity */ ++ xnstat_exectime_t sum; ++}; ++ ++struct xnintr { ++#ifdef CONFIG_XENO_OPT_SHIRQ ++ /** Next object in the IRQ-sharing chain. */ ++ struct xnintr *next; ++#endif ++ /** Number of consequent unhandled interrupts */ ++ unsigned int unhandled; ++ /** Interrupt service routine. */ ++ xnisr_t isr; ++ /** User-defined cookie value. */ ++ void *cookie; ++ /** runtime status */ ++ unsigned long status; ++ /** Creation flags. */ ++ int flags; ++ /** IRQ number. */ ++ unsigned int irq; ++ /** Interrupt acknowledge routine. */ ++ xniack_t iack; ++ /** Symbolic name. */ ++ const char *name; ++ /** Descriptor maintenance lock. */ ++ raw_spinlock_t lock; ++#ifdef CONFIG_XENO_OPT_STATS_IRQS ++ /** Statistics. */ ++ struct xnirqstat *stats; ++#endif ++}; ++ ++struct xnintr_iterator { ++ int cpu; /** Current CPU in iteration. */ ++ unsigned long hits; /** Current hit counter. */ ++ xnticks_t exectime_period; /** Used CPU time in current accounting period. */ ++ xnticks_t account_period; /** Length of accounting period. */ ++ xnticks_t exectime_total; /** Overall CPU time consumed. */ ++ int list_rev; /** System-wide xnintr list revision (internal use). */ ++ struct xnintr *prev; /** Previously visited xnintr object (internal use). */ ++}; ++ ++int xnintr_mount(void); ++ ++void xnintr_core_clock_handler(void); ++ ++void xnintr_host_tick(struct xnsched *sched); ++ ++void xnintr_init_proc(void); ++ ++void xnintr_cleanup_proc(void); ++ ++ /* Public interface. */ ++ ++int xnintr_init(struct xnintr *intr, ++ const char *name, ++ unsigned irq, ++ xnisr_t isr, ++ xniack_t iack, ++ int flags); ++ ++void xnintr_destroy(struct xnintr *intr); ++ ++int xnintr_attach(struct xnintr *intr, ++ void *cookie); ++ ++void xnintr_detach(struct xnintr *intr); ++ ++void xnintr_enable(struct xnintr *intr); ++ ++void xnintr_disable(struct xnintr *intr); ++ ++void xnintr_affinity(struct xnintr *intr, ++ cpumask_t cpumask); ++ ++#ifdef CONFIG_XENO_OPT_STATS_IRQS ++extern struct xnintr nktimer; ++ ++int xnintr_query_init(struct xnintr_iterator *iterator); ++ ++int xnintr_get_query_lock(void); ++ ++void xnintr_put_query_lock(void); ++ ++int xnintr_query_next(int irq, struct xnintr_iterator *iterator, ++ char *name_buf); ++ ++#else /* !CONFIG_XENO_OPT_STATS_IRQS */ ++ ++static inline int xnintr_query_init(struct xnintr_iterator *iterator) ++{ ++ return 0; ++} ++ ++static inline int xnintr_get_query_lock(void) ++{ ++ return 0; ++} ++ ++static inline void xnintr_put_query_lock(void) {} ++#endif /* !CONFIG_XENO_OPT_STATS_IRQS */ ++ ++/** @} */ ++ ++#endif /* !_COBALT_KERNEL_INTR_H */ +--- linux/include/xenomai/cobalt/kernel/sched.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/cobalt/kernel/sched.h 2021-04-07 16:01:28.059632978 +0800 +@@ -0,0 +1,704 @@ ++/* ++ * Copyright (C) 2008 Philippe Gerum . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#ifndef _COBALT_KERNEL_SCHED_H ++#define _COBALT_KERNEL_SCHED_H ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/** ++ * @addtogroup cobalt_core_sched ++ * @{ ++ */ ++ ++/* Sched status flags */ ++#define XNRESCHED 0x10000000 /* Needs rescheduling */ ++#define XNINSW 0x20000000 /* In context switch */ ++#define XNINTCK 0x40000000 /* In master tick handler context */ ++ ++/* Sched local flags */ ++#define XNIDLE 0x00010000 /* Idle (no outstanding timer) */ ++#define XNHTICK 0x00008000 /* Host tick pending */ ++#define XNINIRQ 0x00004000 /* In IRQ handling context */ ++#define XNHDEFER 0x00002000 /* Host tick deferred */ ++ ++struct xnsched_rt { ++ xnsched_queue_t runnable; /*!< Runnable thread queue. */ ++}; ++ ++/*! ++ * \brief Scheduling information structure. ++ */ ++ ++struct xnsched { ++ /*!< Scheduler specific status bitmask. */ ++ unsigned long status; ++ /*!< Scheduler specific local flags bitmask. */ ++ unsigned long lflags; ++ /*!< Current thread. */ ++ struct xnthread *curr; ++#ifdef CONFIG_SMP ++ /*!< Owner CPU id. */ ++ int cpu; ++ /*!< Mask of CPUs needing rescheduling. */ ++ cpumask_t resched; ++#endif ++ /*!< Context of built-in real-time class. */ ++ struct xnsched_rt rt; ++#ifdef CONFIG_XENO_OPT_SCHED_WEAK ++ /*!< Context of weak scheduling class. */ ++ struct xnsched_weak weak; ++#endif ++#ifdef CONFIG_XENO_OPT_SCHED_TP ++ /*!< Context of TP class. */ ++ struct xnsched_tp tp; ++#endif ++#ifdef CONFIG_XENO_OPT_SCHED_SPORADIC ++ /*!< Context of sporadic scheduling class. */ ++ struct xnsched_sporadic pss; ++#endif ++#ifdef CONFIG_XENO_OPT_SCHED_QUOTA ++ /*!< Context of runtime quota scheduling. */ ++ struct xnsched_quota quota; ++#endif ++ /*!< Interrupt nesting level. */ ++ volatile unsigned inesting; ++ /*!< Host timer. */ ++ struct xntimer htimer; ++ /*!< Round-robin timer. */ ++ struct xntimer rrbtimer; ++ /*!< Root thread control block. */ ++ struct xnthread rootcb; ++#ifdef CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH ++ struct xnthread *last; ++#endif ++#ifdef CONFIG_XENO_ARCH_FPU ++ /*!< Thread owning the current FPU context. */ ++ struct xnthread *fpuholder; ++#endif ++#ifdef CONFIG_XENO_OPT_WATCHDOG ++ /*!< Watchdog timer object. */ ++ struct xntimer wdtimer; ++#endif ++#ifdef CONFIG_XENO_OPT_STATS ++ /*!< Last account switch date (ticks). */ ++ xnticks_t last_account_switch; ++ /*!< Currently active account */ ++ xnstat_exectime_t *current_account; ++#endif ++}; ++ ++DECLARE_PER_CPU(struct xnsched, nksched); ++ ++extern cpumask_t cobalt_cpu_affinity; ++ ++extern struct list_head nkthreadq; ++ ++extern int cobalt_nrthreads; ++ ++#ifdef CONFIG_XENO_OPT_VFILE ++extern struct xnvfile_rev_tag nkthreadlist_tag; ++#endif ++ ++union xnsched_policy_param; ++ ++struct xnsched_class { ++ void (*sched_init)(struct xnsched *sched); ++ void (*sched_enqueue)(struct xnthread *thread); ++ void (*sched_dequeue)(struct xnthread *thread); ++ void (*sched_requeue)(struct xnthread *thread); ++ struct xnthread *(*sched_pick)(struct xnsched *sched); ++ void (*sched_tick)(struct xnsched *sched); ++ void (*sched_rotate)(struct xnsched *sched, ++ const union xnsched_policy_param *p); ++ void (*sched_migrate)(struct xnthread *thread, ++ struct xnsched *sched); ++ int (*sched_chkparam)(struct xnthread *thread, ++ const union xnsched_policy_param *p); ++ /** ++ * Set base scheduling parameters. This routine is indirectly ++ * called upon a change of base scheduling settings through ++ * __xnthread_set_schedparam() -> xnsched_set_policy(), ++ * exclusively. ++ * ++ * The scheduling class implementation should do the necessary ++ * housekeeping to comply with the new settings. ++ * thread->base_class is up to date before the call is made, ++ * and should be considered for the new weighted priority ++ * calculation. On the contrary, thread->sched_class should ++ * NOT be referred to by this handler. ++ * ++ * sched_setparam() is NEVER involved in PI or PP ++ * management. However it must deny a priority update if it ++ * contradicts an ongoing boost for @a thread. This is ++ * typically what the xnsched_set_effective_priority() helper ++ * does for such handler. ++ * ++ * @param thread Affected thread. ++ * @param p New base policy settings. ++ * ++ * @return True if the effective priority was updated ++ * (thread->cprio). ++ */ ++ bool (*sched_setparam)(struct xnthread *thread, ++ const union xnsched_policy_param *p); ++ void (*sched_getparam)(struct xnthread *thread, ++ union xnsched_policy_param *p); ++ void (*sched_trackprio)(struct xnthread *thread, ++ const union xnsched_policy_param *p); ++ void (*sched_protectprio)(struct xnthread *thread, int prio); ++ int (*sched_declare)(struct xnthread *thread, ++ const union xnsched_policy_param *p); ++ void (*sched_forget)(struct xnthread *thread); ++ void (*sched_kick)(struct xnthread *thread); ++#ifdef CONFIG_XENO_OPT_VFILE ++ int (*sched_init_vfile)(struct xnsched_class *schedclass, ++ struct xnvfile_directory *vfroot); ++ void (*sched_cleanup_vfile)(struct xnsched_class *schedclass); ++#endif ++ int nthreads; ++ struct xnsched_class *next; ++ int weight; ++ int policy; ++ const char *name; ++}; ++ ++#define XNSCHED_CLASS_WEIGHT(n) (n * XNSCHED_CLASS_WEIGHT_FACTOR) ++ ++/* Placeholder for current thread priority */ ++#define XNSCHED_RUNPRIO 0x80000000 ++ ++#define xnsched_for_each_thread(__thread) \ ++ list_for_each_entry(__thread, &nkthreadq, glink) ++ ++#ifdef CONFIG_SMP ++static inline int xnsched_cpu(struct xnsched *sched) ++{ ++ return sched->cpu; ++} ++#else /* !CONFIG_SMP */ ++static inline int xnsched_cpu(struct xnsched *sched) ++{ ++ return 0; ++} ++#endif /* CONFIG_SMP */ ++ ++static inline struct xnsched *xnsched_struct(int cpu) ++{ ++ return &per_cpu(nksched, cpu); ++} ++ ++static inline struct xnsched *xnsched_current(void) ++{ ++ /* IRQs off */ ++ return raw_cpu_ptr(&nksched); ++} ++ ++static inline struct xnthread *xnsched_current_thread(void) ++{ ++ return xnsched_current()->curr; ++} ++ ++/* Test resched flag of given sched. */ ++static inline int xnsched_resched_p(struct xnsched *sched) ++{ ++ return sched->status & XNRESCHED; ++} ++ ++/* Set self resched flag for the current scheduler. */ ++static inline void xnsched_set_self_resched(struct xnsched *sched) ++{ ++ sched->status |= XNRESCHED; ++} ++ ++#define xnsched_realtime_domain cobalt_pipeline.domain ++ ++/* Set resched flag for the given scheduler. */ ++#ifdef CONFIG_SMP ++ ++static inline void xnsched_set_resched(struct xnsched *sched) ++{ ++ struct xnsched *current_sched = xnsched_current(); ++ ++ if (current_sched == sched) ++ current_sched->status |= XNRESCHED; ++ else if (!xnsched_resched_p(sched)) { ++ cpumask_set_cpu(xnsched_cpu(sched), ¤t_sched->resched); ++ sched->status |= XNRESCHED; ++ current_sched->status |= XNRESCHED; ++ } ++} ++ ++#define xnsched_realtime_cpus cobalt_pipeline.supported_cpus ++ ++static inline int xnsched_supported_cpu(int cpu) ++{ ++ return cpumask_test_cpu(cpu, &xnsched_realtime_cpus); ++} ++ ++static inline int xnsched_threading_cpu(int cpu) ++{ ++ return cpumask_test_cpu(cpu, &cobalt_cpu_affinity); ++} ++ ++#else /* !CONFIG_SMP */ ++ ++static inline void xnsched_set_resched(struct xnsched *sched) ++{ ++ xnsched_set_self_resched(sched); ++} ++ ++#define xnsched_realtime_cpus CPU_MASK_ALL ++ ++static inline int xnsched_supported_cpu(int cpu) ++{ ++ return 1; ++} ++ ++static inline int xnsched_threading_cpu(int cpu) ++{ ++ return 1; ++} ++ ++#endif /* !CONFIG_SMP */ ++ ++#define for_each_realtime_cpu(cpu) \ ++ for_each_online_cpu(cpu) \ ++ if (xnsched_supported_cpu(cpu)) \ ++ ++int ___xnsched_run(struct xnsched *sched); ++ ++void __xnsched_run_handler(void); ++ ++static inline int __xnsched_run(struct xnsched *sched) ++{ ++ /* ++ * Reschedule if XNSCHED is pending, but never over an IRQ ++ * handler or in the middle of unlocked context switch. ++ */ ++ if (((sched->status|sched->lflags) & ++ (XNINIRQ|XNINSW|XNRESCHED)) != XNRESCHED) ++ return 0; ++ ++ return ___xnsched_run(sched); ++} ++ ++static inline int xnsched_run(void) ++{ ++ struct xnsched *sched = xnsched_current(); ++ /* ++ * sched->curr is shared locklessly with ___xnsched_run(). ++ * READ_ONCE() makes sure the compiler never uses load tearing ++ * for reading this pointer piecemeal, so that multiple stores ++ * occurring concurrently on remote CPUs never yield a ++ * spurious merged value on the local one. ++ */ ++ struct xnthread *curr = READ_ONCE(sched->curr); ++ ++ /* ++ * If running over the root thread, hard irqs must be off ++ * (asserted out of line in ___xnsched_run()). ++ */ ++ return curr->lock_count > 0 ? 0 : __xnsched_run(sched); ++} ++ ++void xnsched_lock(void); ++ ++void xnsched_unlock(void); ++ ++static inline int xnsched_interrupt_p(void) ++{ ++ return xnsched_current()->lflags & XNINIRQ; ++} ++ ++static inline int xnsched_root_p(void) ++{ ++ return xnthread_test_state(xnsched_current_thread(), XNROOT); ++} ++ ++static inline int xnsched_unblockable_p(void) ++{ ++ return xnsched_interrupt_p() || xnsched_root_p(); ++} ++ ++static inline int xnsched_primary_p(void) ++{ ++ return !xnsched_unblockable_p(); ++} ++ ++#ifdef CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH ++ ++struct xnsched *xnsched_finish_unlocked_switch(struct xnsched *sched); ++ ++#define xnsched_resched_after_unlocked_switch() xnsched_run() ++ ++static inline ++int xnsched_maybe_resched_after_unlocked_switch(struct xnsched *sched) ++{ ++ return sched->status & XNRESCHED; ++} ++ ++#else /* !CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH */ ++ ++static inline struct xnsched * ++xnsched_finish_unlocked_switch(struct xnsched *sched) ++{ ++ XENO_BUG_ON(COBALT, !hard_irqs_disabled()); ++ return xnsched_current(); ++} ++ ++static inline void xnsched_resched_after_unlocked_switch(void) { } ++ ++static inline int ++xnsched_maybe_resched_after_unlocked_switch(struct xnsched *sched) ++{ ++ return 0; ++} ++ ++#endif /* !CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH */ ++ ++bool xnsched_set_effective_priority(struct xnthread *thread, ++ int prio); ++ ++#include ++#include ++ ++int xnsched_init_proc(void); ++ ++void xnsched_cleanup_proc(void); ++ ++void xnsched_register_classes(void); ++ ++void xnsched_init_all(void); ++ ++void xnsched_destroy_all(void); ++ ++struct xnthread *xnsched_pick_next(struct xnsched *sched); ++ ++void xnsched_putback(struct xnthread *thread); ++ ++int xnsched_set_policy(struct xnthread *thread, ++ struct xnsched_class *sched_class, ++ const union xnsched_policy_param *p); ++ ++void xnsched_track_policy(struct xnthread *thread, ++ struct xnthread *target); ++ ++void xnsched_protect_priority(struct xnthread *thread, ++ int prio); ++ ++void xnsched_migrate(struct xnthread *thread, ++ struct xnsched *sched); ++ ++void xnsched_migrate_passive(struct xnthread *thread, ++ struct xnsched *sched); ++ ++/** ++ * @fn void xnsched_rotate(struct xnsched *sched, struct xnsched_class *sched_class, const union xnsched_policy_param *sched_param) ++ * @brief Rotate a scheduler runqueue. ++ * ++ * The specified scheduling class is requested to rotate its runqueue ++ * for the given scheduler. Rotation is performed according to the ++ * scheduling parameter specified by @a sched_param. ++ * ++ * @note The nucleus supports round-robin scheduling for the members ++ * of the RT class. ++ * ++ * @param sched The per-CPU scheduler hosting the target scheduling ++ * class. ++ * ++ * @param sched_class The scheduling class which should rotate its ++ * runqueue. ++ * ++ * @param sched_param The scheduling parameter providing rotation ++ * information to the specified scheduling class. ++ * ++ * @coretags{unrestricted, atomic-entry} ++ */ ++static inline void xnsched_rotate(struct xnsched *sched, ++ struct xnsched_class *sched_class, ++ const union xnsched_policy_param *sched_param) ++{ ++ sched_class->sched_rotate(sched, sched_param); ++} ++ ++static inline int xnsched_init_thread(struct xnthread *thread) ++{ ++ int ret = 0; ++ ++ xnsched_idle_init_thread(thread); ++ xnsched_rt_init_thread(thread); ++ ++#ifdef CONFIG_XENO_OPT_SCHED_TP ++ ret = xnsched_tp_init_thread(thread); ++ if (ret) ++ return ret; ++#endif /* CONFIG_XENO_OPT_SCHED_TP */ ++#ifdef CONFIG_XENO_OPT_SCHED_SPORADIC ++ ret = xnsched_sporadic_init_thread(thread); ++ if (ret) ++ return ret; ++#endif /* CONFIG_XENO_OPT_SCHED_SPORADIC */ ++#ifdef CONFIG_XENO_OPT_SCHED_QUOTA ++ ret = xnsched_quota_init_thread(thread); ++ if (ret) ++ return ret; ++#endif /* CONFIG_XENO_OPT_SCHED_QUOTA */ ++ ++ return ret; ++} ++ ++static inline int xnsched_root_priority(struct xnsched *sched) ++{ ++ return sched->rootcb.cprio; ++} ++ ++static inline struct xnsched_class *xnsched_root_class(struct xnsched *sched) ++{ ++ return sched->rootcb.sched_class; ++} ++ ++static inline void xnsched_tick(struct xnsched *sched) ++{ ++ struct xnthread *curr = sched->curr; ++ struct xnsched_class *sched_class = curr->sched_class; ++ /* ++ * A thread that undergoes round-robin scheduling only ++ * consumes its time slice when it runs within its own ++ * scheduling class, which excludes temporary PI boosts, and ++ * does not hold the scheduler lock. ++ */ ++ if (sched_class == curr->base_class && ++ sched_class->sched_tick && ++ xnthread_test_state(curr, XNTHREAD_BLOCK_BITS|XNRRB) == XNRRB && ++ curr->lock_count == 0) ++ sched_class->sched_tick(sched); ++} ++ ++static inline int xnsched_chkparam(struct xnsched_class *sched_class, ++ struct xnthread *thread, ++ const union xnsched_policy_param *p) ++{ ++ if (sched_class->sched_chkparam) ++ return sched_class->sched_chkparam(thread, p); ++ ++ return 0; ++} ++ ++static inline int xnsched_declare(struct xnsched_class *sched_class, ++ struct xnthread *thread, ++ const union xnsched_policy_param *p) ++{ ++ int ret; ++ ++ if (sched_class->sched_declare) { ++ ret = sched_class->sched_declare(thread, p); ++ if (ret) ++ return ret; ++ } ++ if (sched_class != thread->base_class) ++ sched_class->nthreads++; ++ ++ return 0; ++} ++ ++static inline int xnsched_calc_wprio(struct xnsched_class *sched_class, ++ int prio) ++{ ++ return prio + sched_class->weight; ++} ++ ++#ifdef CONFIG_XENO_OPT_SCHED_CLASSES ++ ++static inline void xnsched_enqueue(struct xnthread *thread) ++{ ++ struct xnsched_class *sched_class = thread->sched_class; ++ ++ if (sched_class != &xnsched_class_idle) ++ sched_class->sched_enqueue(thread); ++} ++ ++static inline void xnsched_dequeue(struct xnthread *thread) ++{ ++ struct xnsched_class *sched_class = thread->sched_class; ++ ++ if (sched_class != &xnsched_class_idle) ++ sched_class->sched_dequeue(thread); ++} ++ ++static inline void xnsched_requeue(struct xnthread *thread) ++{ ++ struct xnsched_class *sched_class = thread->sched_class; ++ ++ if (sched_class != &xnsched_class_idle) ++ sched_class->sched_requeue(thread); ++} ++ ++static inline ++bool xnsched_setparam(struct xnthread *thread, ++ const union xnsched_policy_param *p) ++{ ++ return thread->base_class->sched_setparam(thread, p); ++} ++ ++static inline void xnsched_getparam(struct xnthread *thread, ++ union xnsched_policy_param *p) ++{ ++ thread->sched_class->sched_getparam(thread, p); ++} ++ ++static inline void xnsched_trackprio(struct xnthread *thread, ++ const union xnsched_policy_param *p) ++{ ++ thread->sched_class->sched_trackprio(thread, p); ++ thread->wprio = xnsched_calc_wprio(thread->sched_class, thread->cprio); ++} ++ ++static inline void xnsched_protectprio(struct xnthread *thread, int prio) ++{ ++ thread->sched_class->sched_protectprio(thread, prio); ++ thread->wprio = xnsched_calc_wprio(thread->sched_class, thread->cprio); ++} ++ ++static inline void xnsched_forget(struct xnthread *thread) ++{ ++ struct xnsched_class *sched_class = thread->base_class; ++ ++ --sched_class->nthreads; ++ ++ if (sched_class->sched_forget) ++ sched_class->sched_forget(thread); ++} ++ ++static inline void xnsched_kick(struct xnthread *thread) ++{ ++ struct xnsched_class *sched_class = thread->base_class; ++ ++ xnthread_set_info(thread, XNKICKED); ++ ++ if (sched_class->sched_kick) ++ sched_class->sched_kick(thread); ++ ++ xnsched_set_resched(thread->sched); ++} ++ ++#else /* !CONFIG_XENO_OPT_SCHED_CLASSES */ ++ ++/* ++ * If only the RT and IDLE scheduling classes are compiled in, we can ++ * fully inline common helpers for dealing with those. ++ */ ++ ++static inline void xnsched_enqueue(struct xnthread *thread) ++{ ++ struct xnsched_class *sched_class = thread->sched_class; ++ ++ if (sched_class != &xnsched_class_idle) ++ __xnsched_rt_enqueue(thread); ++} ++ ++static inline void xnsched_dequeue(struct xnthread *thread) ++{ ++ struct xnsched_class *sched_class = thread->sched_class; ++ ++ if (sched_class != &xnsched_class_idle) ++ __xnsched_rt_dequeue(thread); ++} ++ ++static inline void xnsched_requeue(struct xnthread *thread) ++{ ++ struct xnsched_class *sched_class = thread->sched_class; ++ ++ if (sched_class != &xnsched_class_idle) ++ __xnsched_rt_requeue(thread); ++} ++ ++static inline bool xnsched_setparam(struct xnthread *thread, ++ const union xnsched_policy_param *p) ++{ ++ struct xnsched_class *sched_class = thread->base_class; ++ ++ if (sched_class == &xnsched_class_idle) ++ return __xnsched_idle_setparam(thread, p); ++ ++ return __xnsched_rt_setparam(thread, p); ++} ++ ++static inline void xnsched_getparam(struct xnthread *thread, ++ union xnsched_policy_param *p) ++{ ++ struct xnsched_class *sched_class = thread->sched_class; ++ ++ if (sched_class == &xnsched_class_idle) ++ __xnsched_idle_getparam(thread, p); ++ else ++ __xnsched_rt_getparam(thread, p); ++} ++ ++static inline void xnsched_trackprio(struct xnthread *thread, ++ const union xnsched_policy_param *p) ++{ ++ struct xnsched_class *sched_class = thread->sched_class; ++ ++ if (sched_class == &xnsched_class_idle) ++ __xnsched_idle_trackprio(thread, p); ++ else ++ __xnsched_rt_trackprio(thread, p); ++ ++ thread->wprio = xnsched_calc_wprio(sched_class, thread->cprio); ++} ++ ++static inline void xnsched_protectprio(struct xnthread *thread, int prio) ++{ ++ struct xnsched_class *sched_class = thread->sched_class; ++ ++ if (sched_class == &xnsched_class_idle) ++ __xnsched_idle_protectprio(thread, prio); ++ else ++ __xnsched_rt_protectprio(thread, prio); ++ ++ thread->wprio = xnsched_calc_wprio(sched_class, thread->cprio); ++} ++ ++static inline void xnsched_forget(struct xnthread *thread) ++{ ++ --thread->base_class->nthreads; ++ __xnsched_rt_forget(thread); ++} ++ ++static inline void xnsched_kick(struct xnthread *thread) ++{ ++ xnthread_set_info(thread, XNKICKED); ++ xnsched_set_resched(thread->sched); ++} ++ ++#endif /* !CONFIG_XENO_OPT_SCHED_CLASSES */ ++ ++/** @} */ ++ ++#endif /* !_COBALT_KERNEL_SCHED_H */ +--- linux/include/xenomai/cobalt/kernel/sched-sporadic.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/xenomai/cobalt/kernel/sched-sporadic.h 2021-04-07 16:01:28.053632986 +0800 +@@ -0,0 +1,75 @@ ++/* ++ * Copyright (C) 2009 Philippe Gerum . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#ifndef _COBALT_KERNEL_SCHED_SPORADIC_H ++#define _COBALT_KERNEL_SCHED_SPORADIC_H ++ ++#ifndef _COBALT_KERNEL_SCHED_H ++#error "please don't include cobalt/kernel/sched-sporadic.h directly" ++#endif ++ ++/** ++ * @addtogroup cobalt_core_sched ++ * @{ ++ */ ++ ++#ifdef CONFIG_XENO_OPT_SCHED_SPORADIC ++ ++#define XNSCHED_SPORADIC_MIN_PRIO 1 ++#define XNSCHED_SPORADIC_MAX_PRIO 255 ++#define XNSCHED_SPORADIC_NR_PRIO \ ++ (XNSCHED_SPORADIC_MAX_PRIO - XNSCHED_SPORADIC_MIN_PRIO + 1) ++ ++extern struct xnsched_class xnsched_class_sporadic; ++ ++struct xnsched_sporadic_repl { ++ xnticks_t date; ++ xnticks_t amount; ++}; ++ ++struct xnsched_sporadic_data { ++ xnticks_t resume_date; ++ xnticks_t budget; ++ int repl_in; ++ int repl_out; ++ int repl_pending; ++ struct xntimer repl_timer; ++ struct xntimer drop_timer; ++ struct xnsched_sporadic_repl repl_data[CONFIG_XENO_OPT_SCHED_SPORADIC_MAXREPL]; ++ struct xnsched_sporadic_param param; ++ struct xnthread *thread; ++}; ++ ++struct xnsched_sporadic { ++#ifdef CONFIG_XENO_OPT_DEBUG_COBALT ++ unsigned long drop_retries; ++#endif ++}; ++ ++static inline int xnsched_sporadic_init_thread(struct xnthread *thread) ++{ ++ thread->pss = NULL; ++ ++ return 0; ++} ++ ++#endif /* !CONFIG_XENO_OPT_SCHED_SPORADIC */ ++ ++/** @} */ ++ ++#endif /* !_COBALT_KERNEL_SCHED_SPORADIC_H */ +--- linux/include/linux/xenomai/wrappers.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/linux/xenomai/wrappers.h 2021-04-07 16:01:25.949635992 +0800 +@@ -0,0 +1,56 @@ ++/* ++ * Copyright (C) 2017 Philippe Gerum . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#ifndef _COBALT_LINUX_WRAPPERS_H ++#define _COBALT_LINUX_WRAPPERS_H ++ ++#include ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4,11,0) ++#include ++#include ++ ++#define cobalt_set_task_state(tsk, state_value) \ ++ set_task_state(tsk, state_value) ++#else ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++/* ++ * The co-kernel can still do this sanely for a thread which is ++ * currently active on the head stage. ++ */ ++#define cobalt_set_task_state(tsk, state_value) \ ++ smp_store_mb((tsk)->state, (state_value)) ++#endif ++ ++#include ++ ++#ifndef ipipe_root_nr_syscalls ++#define ipipe_root_nr_syscalls(ti) NR_syscalls ++#endif ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 20, 0) ++typedef siginfo_t kernel_siginfo_t; ++#endif ++ ++#endif /* !_COBALT_LINUX_WRAPPERS_H */ +--- linux/include/asm-generic/xenomai/pci_ids.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/asm-generic/xenomai/pci_ids.h 2021-04-07 16:01:25.942636002 +0800 +@@ -0,0 +1,61 @@ ++/* ++ * Copyright (C) 2009 Gilles Chanteperdrix . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#ifndef _COBALT_ASM_GENERIC_PCI_IDS_H ++#define _COBALT_ASM_GENERIC_PCI_IDS_H ++ ++#include ++ ++/* SMI */ ++#ifndef PCI_DEVICE_ID_INTEL_ESB2_0 ++#define PCI_DEVICE_ID_INTEL_ESB2_0 0x2670 ++#endif ++#ifndef PCI_DEVICE_ID_INTEL_ICH7_0 ++#define PCI_DEVICE_ID_INTEL_ICH7_0 0x27b8 ++#endif ++#ifndef PCI_DEVICE_ID_INTEL_ICH7_1 ++#define PCI_DEVICE_ID_INTEL_ICH7_1 0x27b9 ++#endif ++#ifndef PCI_DEVICE_ID_INTEL_ICH8_4 ++#define PCI_DEVICE_ID_INTEL_ICH8_4 0x2815 ++#endif ++#ifndef PCI_DEVICE_ID_INTEL_ICH9_1 ++#define PCI_DEVICE_ID_INTEL_ICH9_1 0x2917 ++#endif ++#ifndef PCI_DEVICE_ID_INTEL_ICH9_5 ++#define PCI_DEVICE_ID_INTEL_ICH9_5 0x2919 ++#endif ++#ifndef PCI_DEVICE_ID_INTEL_ICH10_1 ++#define PCI_DEVICE_ID_INTEL_ICH10_1 0x3a16 ++#endif ++#ifndef PCI_DEVICE_ID_INTEL_PCH_LPC_MIN ++#define PCI_DEVICE_ID_INTEL_PCH_LPC_MIN 0x3b00 ++#endif ++ ++/* RTCAN */ ++#ifndef PCI_VENDOR_ID_ESDGMBH ++#define PCI_VENDOR_ID_ESDGMBH 0x12fe ++#endif ++#ifndef PCI_DEVICE_ID_PLX_9030 ++#define PCI_DEVICE_ID_PLX_9030 0x9030 ++#endif ++#ifndef PCI_DEVICE_ID_PLX_9056 ++#define PCI_DEVICE_ID_PLX_9056 0x9056 ++#endif ++ ++#endif /* _COBALT_ASM_GENERIC_PCI_IDS_H */ +--- linux/include/asm-generic/xenomai/machine.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/asm-generic/xenomai/machine.h 2021-04-07 16:01:25.938636008 +0800 +@@ -0,0 +1,77 @@ ++/** ++ * Copyright © 2012 Philippe Gerum. ++ * ++ * Xenomai is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, ++ * USA; either version 2 of the License, or (at your option) any later ++ * version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#ifndef _COBALT_ASM_GENERIC_MACHINE_H ++#define _COBALT_ASM_GENERIC_MACHINE_H ++ ++#include ++#include ++#include ++#include ++ ++struct vm_area_struct; ++ ++struct cobalt_machine { ++ const char *name; ++ int (*init)(void); ++ int (*late_init)(void); ++ void (*cleanup)(void); ++ void (*prefault)(struct vm_area_struct *vma); ++ unsigned long (*calibrate)(void); ++ const char *const *fault_labels; ++}; ++ ++extern struct cobalt_machine cobalt_machine; ++ ++struct cobalt_machine_cpudata { ++ unsigned long apc_pending; ++ unsigned long apc_shots[BITS_PER_LONG]; ++ unsigned int faults[IPIPE_NR_FAULTS]; ++}; ++ ++DECLARE_PER_CPU(struct cobalt_machine_cpudata, cobalt_machine_cpudata); ++ ++struct cobalt_pipeline { ++ struct ipipe_domain domain; ++ unsigned long timer_freq; ++ unsigned long clock_freq; ++ unsigned int apc_virq; ++ unsigned long apc_map; ++ unsigned int escalate_virq; ++ struct { ++ void (*handler)(void *cookie); ++ void *cookie; ++ const char *name; ++ } apc_table[BITS_PER_LONG]; ++#ifdef CONFIG_SMP ++ cpumask_t supported_cpus; ++#endif ++}; ++ ++extern struct cobalt_pipeline cobalt_pipeline; ++ ++static inline unsigned long xnarch_timer_calibrate(void) ++{ ++ return cobalt_machine.calibrate(); ++} ++ ++#ifndef xnarch_cache_aliasing ++#define xnarch_cache_aliasing() 0 ++#endif ++ ++#endif /* !_COBALT_ASM_GENERIC_MACHINE_H */ +--- linux/include/asm-generic/xenomai/syscall32.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/asm-generic/xenomai/syscall32.h 2021-04-07 16:01:25.933636015 +0800 +@@ -0,0 +1,30 @@ ++/* ++ * Copyright (C) 2014 Philippe Gerum . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#ifndef _COBALT_ASM_GENERIC_SYSCALL32_H ++#define _COBALT_ASM_GENERIC_SYSCALL32_H ++ ++#define __COBALT_CALL32_INITHAND(__handler) ++ ++#define __COBALT_CALL32_INITMODE(__mode) ++ ++#define __COBALT_CALL32_ENTRY(__name, __handler) ++ ++#define __COBALT_CALL_COMPAT(__reg) 0 ++ ++#endif /* !_COBALT_ASM_GENERIC_SYSCALL32_H */ +--- linux/include/asm-generic/xenomai/syscall.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/asm-generic/xenomai/syscall.h 2021-04-07 16:01:25.928636022 +0800 +@@ -0,0 +1,89 @@ ++/* ++ * Copyright (C) 2001,2002,2003,2004,2005 Philippe Gerum . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#ifndef _COBALT_ASM_GENERIC_SYSCALL_H ++#define _COBALT_ASM_GENERIC_SYSCALL_H ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 0, 0) ++#define access_rok(addr, size) access_ok((addr), (size)) ++#define access_wok(addr, size) access_ok((addr), (size)) ++#else ++#define access_rok(addr, size) access_ok(VERIFY_READ, (addr), (size)) ++#define access_wok(addr, size) access_ok(VERIFY_WRITE, (addr), (size)) ++#endif ++ ++#define __xn_reg_arglist(regs) \ ++ __xn_reg_arg1(regs), \ ++ __xn_reg_arg2(regs), \ ++ __xn_reg_arg3(regs), \ ++ __xn_reg_arg4(regs), \ ++ __xn_reg_arg5(regs) ++ ++#define __xn_copy_from_user(dstP, srcP, n) raw_copy_from_user(dstP, srcP, n) ++#define __xn_copy_to_user(dstP, srcP, n) raw_copy_to_user(dstP, srcP, n) ++#define __xn_put_user(src, dstP) __put_user(src, dstP) ++#define __xn_get_user(dst, srcP) __get_user(dst, srcP) ++#define __xn_strncpy_from_user(dstP, srcP, n) strncpy_from_user(dstP, srcP, n) ++ ++static inline int cobalt_copy_from_user(void *dst, const void __user *src, ++ size_t size) ++{ ++ size_t remaining = size; ++ ++ if (likely(access_rok(src, size))) ++ remaining = __xn_copy_from_user(dst, src, size); ++ ++ if (unlikely(remaining > 0)) { ++ memset(dst + (size - remaining), 0, remaining); ++ return -EFAULT; ++ } ++ return 0; ++} ++ ++static inline int cobalt_copy_to_user(void __user *dst, const void *src, ++ size_t size) ++{ ++ if (unlikely(!access_wok(dst, size) || ++ __xn_copy_to_user(dst, src, size))) ++ return -EFAULT; ++ return 0; ++} ++ ++static inline int cobalt_strncpy_from_user(char *dst, const char __user *src, ++ size_t count) ++{ ++ if (unlikely(!access_rok(src, 1))) ++ return -EFAULT; ++ ++ return __xn_strncpy_from_user(dst, src, count); ++} ++ ++/* 32bit syscall emulation */ ++#define __COBALT_COMPAT_BIT 0x1 ++/* 32bit syscall emulation - extended form */ ++#define __COBALT_COMPATX_BIT 0x2 ++ ++#endif /* !_COBALT_ASM_GENERIC_SYSCALL_H */ +--- linux/include/asm-generic/xenomai/wrappers.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/asm-generic/xenomai/wrappers.h 2021-04-07 16:01:25.924636028 +0800 +@@ -0,0 +1,180 @@ ++/* ++ * Copyright (C) 2005-2012 Philippe Gerum . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#ifndef _COBALT_ASM_GENERIC_WRAPPERS_H ++ ++#include ++ ++#ifdef CONFIG_IPIPE_LEGACY ++#error "CONFIG_IPIPE_LEGACY must be switched off" ++#endif ++ ++#define COBALT_BACKPORT(__sym) __cobalt_backport_ ##__sym ++ ++/* ++ * To keep the #ifdefery as readable as possible, please: ++ * ++ * - keep the conditional structure flat, no nesting (e.g. do not fold ++ * the pre-3.11 conditions into the pre-3.14 ones). ++ * - group all wrappers for a single kernel revision. ++ * - list conditional blocks in order of kernel release, latest first ++ * - identify the first kernel release for which the wrapper should ++ * be defined, instead of testing the existence of a preprocessor ++ * symbol, so that obsolete wrappers can be spotted. ++ */ ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4,15,0) ++#define get_compat_sigset(set, compat) \ ++({ \ ++ compat_sigset_t set32; \ ++ int ret; \ ++ \ ++ ret = cobalt_copy_from_user(&set32, compat, sizeof(compat_sigset_t)); \ ++ if (!ret) \ ++ sigset_from_compat(set, &set32); \ ++ ret; \ ++}) ++ ++#define put_compat_sigset(compat, set, size) \ ++({ \ ++ compat_sigset_t set32; \ ++ \ ++ sigset_to_compat(&set32, set); \ ++ cobalt_copy_to_user(compat, &set32, size); \ ++}) ++#endif /* < 4.15 */ ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4,11,0) ++#define raw_copy_to_user(__to, __from, __n) __copy_to_user_inatomic(__to, __from, __n) ++#define raw_copy_from_user(__to, __from, __n) __copy_from_user_inatomic(__to, __from, __n) ++#define raw_put_user(__from, __to) __put_user_inatomic(__from, __to) ++#define raw_get_user(__to, __from) __get_user_inatomic(__to, __from) ++#endif ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4,6,0) ++#define in_ia32_syscall() (current_thread_info()->status & TS_COMPAT) ++#endif ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4,5,0) ++#define cobalt_gpiochip_dev(__gc) ((__gc)->dev) ++#else ++#define cobalt_gpiochip_dev(__gc) ((__gc)->parent) ++#endif ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4,0,0) ++#define cobalt_get_restart_block(p) (&task_thread_info(p)->restart_block) ++#else ++#define cobalt_get_restart_block(p) (&(p)->restart_block) ++#endif ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(3,19,0) ++#define user_msghdr msghdr ++#endif ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(3,17,0) ++#include ++ ++#undef alloc_netdev ++#define alloc_netdev(sizeof_priv, name, name_assign_type, setup) \ ++ alloc_netdev_mqs(sizeof_priv, name, setup, 1, 1) ++ ++#include ++ ++static inline unsigned char * ++trace_seq_buffer_ptr(struct trace_seq *s) ++{ ++ return s->buffer + s->len; ++} ++ ++#endif /* < 3.17 */ ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(3,16,0) ++#define smp_mb__before_atomic() smp_mb() ++#define smp_mb__after_atomic() smp_mb() ++#endif /* < 3.16 */ ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(3,15,0) ++#define raw_cpu_ptr(v) __this_cpu_ptr(v) ++#endif /* < 3.15 */ ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(3,14,0) ++#include ++ ++#ifdef CONFIG_PCI ++#define pci_enable_msix_range COBALT_BACKPORT(pci_enable_msix_range) ++#ifdef CONFIG_PCI_MSI ++int pci_enable_msix_range(struct pci_dev *dev, ++ struct msix_entry *entries, ++ int minvec, int maxvec); ++#else /* !CONFIG_PCI_MSI */ ++static inline ++int pci_enable_msix_range(struct pci_dev *dev, ++ struct msix_entry *entries, ++ int minvec, int maxvec) ++{ ++ return -ENOSYS; ++} ++#endif /* !CONFIG_PCI_MSI */ ++#endif /* CONFIG_PCI */ ++#endif /* < 3.14 */ ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(3,13,0) ++#include ++#include ++ ++#define dma_set_mask_and_coherent COBALT_BACKPORT(dma_set_mask_and_coherent) ++static inline ++int dma_set_mask_and_coherent(struct device *dev, u64 mask) ++{ ++ int rc = dma_set_mask(dev, mask); ++ if (rc == 0) ++ dma_set_coherent_mask(dev, mask); ++ return rc; ++} ++ ++#ifdef CONFIG_HWMON ++#define hwmon_device_register_with_groups \ ++ COBALT_BACKPORT(hwmon_device_register_with_groups) ++struct device * ++hwmon_device_register_with_groups(struct device *dev, const char *name, ++ void *drvdata, ++ const struct attribute_group **groups); ++ ++#define devm_hwmon_device_register_with_groups \ ++ COBALT_BACKPORT(devm_hwmon_device_register_with_groups) ++struct device * ++devm_hwmon_device_register_with_groups(struct device *dev, const char *name, ++ void *drvdata, ++ const struct attribute_group **groups); ++#endif /* !CONFIG_HWMON */ ++ ++#define reinit_completion(__x) INIT_COMPLETION(*(__x)) ++ ++#endif /* < 3.13 */ ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(3,11,0) ++#define DEVICE_ATTR_RW(_name) __ATTR_RW(_name) ++#define DEVICE_ATTR_RO(_name) __ATTR_RO(_name) ++#define DEVICE_ATTR_WO(_name) __ATTR_WO(_name) ++#endif /* < 3.11 */ ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) ++#error "Xenomai/cobalt requires Linux kernel 3.10 or above" ++#endif /* < 3.10 */ ++ ++#endif /* _COBALT_ASM_GENERIC_WRAPPERS_H */ +--- linux/include/asm-generic/xenomai/thread.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/asm-generic/xenomai/thread.h 2021-04-07 16:01:25.919636035 +0800 +@@ -0,0 +1,42 @@ ++/* ++ * Copyright (C) 2013 Philippe Gerum . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#ifndef _COBALT_ASM_GENERIC_THREAD_H ++#define _COBALT_ASM_GENERIC_THREAD_H ++ ++#include ++#include ++ ++struct task_struct; ++ ++struct xnthread; ++struct xnarchtcb; ++ ++struct xntcb { ++ struct task_struct *host_task; ++ struct thread_struct *tsp; ++ struct mm_struct *mm; ++ struct mm_struct *active_mm; ++ struct thread_struct ts; ++ struct thread_info *tip; ++#ifdef CONFIG_XENO_ARCH_FPU ++ struct task_struct *user_fpu_owner; ++#endif ++}; ++ ++#endif /* !_COBALT_ASM_GENERIC_THREAD_H */ +--- linux/include/trace/events/cobalt-posix.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/trace/events/cobalt-posix.h 2021-04-07 16:01:25.912636045 +0800 +@@ -0,0 +1,1180 @@ ++/* ++ * Copyright (C) 2014 Jan Kiszka . ++ * Copyright (C) 2014 Philippe Gerum . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#undef TRACE_SYSTEM ++#define TRACE_SYSTEM cobalt_posix ++ ++#if !defined(_TRACE_COBALT_POSIX_H) || defined(TRACE_HEADER_MULTI_READ) ++#define _TRACE_COBALT_POSIX_H ++ ++#include ++#include ++#include ++#include ++#include ++ ++#define __timespec_fields(__name) \ ++ __field(__kernel_time_t, tv_sec_##__name) \ ++ __field(long, tv_nsec_##__name) ++ ++#define __assign_timespec(__to, __from) \ ++ do { \ ++ __entry->tv_sec_##__to = (__from)->tv_sec; \ ++ __entry->tv_nsec_##__to = (__from)->tv_nsec; \ ++ } while (0) ++ ++#define __timespec_args(__name) \ ++ __entry->tv_sec_##__name, __entry->tv_nsec_##__name ++ ++#ifdef CONFIG_X86_X32 ++#define __sc_x32(__name) , { sc_cobalt_##__name + __COBALT_X32_BASE, "x32-" #__name } ++#else ++#define __sc_x32(__name) ++#endif ++ ++#ifdef CONFIG_IA32_EMULATION ++#define __sc_compat(__name) , { sc_cobalt_##__name + __COBALT_IA32_BASE, "compat-" #__name } ++#else ++#define __sc_compat(__name) ++#endif ++ ++#define __cobalt_symbolic_syscall(__name) \ ++ { sc_cobalt_##__name, #__name } \ ++ __sc_x32(__name) \ ++ __sc_compat(__name) \ ++ ++#define __cobalt_syscall_name(__nr) \ ++ __print_symbolic((__nr), \ ++ __cobalt_symbolic_syscall(bind), \ ++ __cobalt_symbolic_syscall(thread_create), \ ++ __cobalt_symbolic_syscall(thread_getpid), \ ++ __cobalt_symbolic_syscall(thread_setmode), \ ++ __cobalt_symbolic_syscall(thread_setname), \ ++ __cobalt_symbolic_syscall(thread_join), \ ++ __cobalt_symbolic_syscall(thread_kill), \ ++ __cobalt_symbolic_syscall(thread_setschedparam_ex), \ ++ __cobalt_symbolic_syscall(thread_getschedparam_ex), \ ++ __cobalt_symbolic_syscall(thread_setschedprio), \ ++ __cobalt_symbolic_syscall(thread_getstat), \ ++ __cobalt_symbolic_syscall(sem_init), \ ++ __cobalt_symbolic_syscall(sem_destroy), \ ++ __cobalt_symbolic_syscall(sem_post), \ ++ __cobalt_symbolic_syscall(sem_wait), \ ++ __cobalt_symbolic_syscall(sem_trywait), \ ++ __cobalt_symbolic_syscall(sem_getvalue), \ ++ __cobalt_symbolic_syscall(sem_open), \ ++ __cobalt_symbolic_syscall(sem_close), \ ++ __cobalt_symbolic_syscall(sem_unlink), \ ++ __cobalt_symbolic_syscall(sem_timedwait), \ ++ __cobalt_symbolic_syscall(sem_inquire), \ ++ __cobalt_symbolic_syscall(sem_broadcast_np), \ ++ __cobalt_symbolic_syscall(clock_getres), \ ++ __cobalt_symbolic_syscall(clock_gettime), \ ++ __cobalt_symbolic_syscall(clock_settime), \ ++ __cobalt_symbolic_syscall(clock_nanosleep), \ ++ __cobalt_symbolic_syscall(mutex_init), \ ++ __cobalt_symbolic_syscall(mutex_check_init), \ ++ __cobalt_symbolic_syscall(mutex_destroy), \ ++ __cobalt_symbolic_syscall(mutex_lock), \ ++ __cobalt_symbolic_syscall(mutex_timedlock), \ ++ __cobalt_symbolic_syscall(mutex_trylock), \ ++ __cobalt_symbolic_syscall(mutex_unlock), \ ++ __cobalt_symbolic_syscall(cond_init), \ ++ __cobalt_symbolic_syscall(cond_destroy), \ ++ __cobalt_symbolic_syscall(cond_wait_prologue), \ ++ __cobalt_symbolic_syscall(cond_wait_epilogue), \ ++ __cobalt_symbolic_syscall(mq_open), \ ++ __cobalt_symbolic_syscall(mq_close), \ ++ __cobalt_symbolic_syscall(mq_unlink), \ ++ __cobalt_symbolic_syscall(mq_getattr), \ ++ __cobalt_symbolic_syscall(mq_timedsend), \ ++ __cobalt_symbolic_syscall(mq_timedreceive), \ ++ __cobalt_symbolic_syscall(mq_notify), \ ++ __cobalt_symbolic_syscall(sched_minprio), \ ++ __cobalt_symbolic_syscall(sched_maxprio), \ ++ __cobalt_symbolic_syscall(sched_weightprio), \ ++ __cobalt_symbolic_syscall(sched_yield), \ ++ __cobalt_symbolic_syscall(sched_setscheduler_ex), \ ++ __cobalt_symbolic_syscall(sched_getscheduler_ex), \ ++ __cobalt_symbolic_syscall(sched_setconfig_np), \ ++ __cobalt_symbolic_syscall(sched_getconfig_np), \ ++ __cobalt_symbolic_syscall(timer_create), \ ++ __cobalt_symbolic_syscall(timer_delete), \ ++ __cobalt_symbolic_syscall(timer_settime), \ ++ __cobalt_symbolic_syscall(timer_gettime), \ ++ __cobalt_symbolic_syscall(timer_getoverrun), \ ++ __cobalt_symbolic_syscall(timerfd_create), \ ++ __cobalt_symbolic_syscall(timerfd_settime), \ ++ __cobalt_symbolic_syscall(timerfd_gettime), \ ++ __cobalt_symbolic_syscall(sigwait), \ ++ __cobalt_symbolic_syscall(sigwaitinfo), \ ++ __cobalt_symbolic_syscall(sigtimedwait), \ ++ __cobalt_symbolic_syscall(sigpending), \ ++ __cobalt_symbolic_syscall(kill), \ ++ __cobalt_symbolic_syscall(sigqueue), \ ++ __cobalt_symbolic_syscall(monitor_init), \ ++ __cobalt_symbolic_syscall(monitor_destroy), \ ++ __cobalt_symbolic_syscall(monitor_enter), \ ++ __cobalt_symbolic_syscall(monitor_wait), \ ++ __cobalt_symbolic_syscall(monitor_sync), \ ++ __cobalt_symbolic_syscall(monitor_exit), \ ++ __cobalt_symbolic_syscall(event_init), \ ++ __cobalt_symbolic_syscall(event_wait), \ ++ __cobalt_symbolic_syscall(event_sync), \ ++ __cobalt_symbolic_syscall(event_destroy), \ ++ __cobalt_symbolic_syscall(event_inquire), \ ++ __cobalt_symbolic_syscall(open), \ ++ __cobalt_symbolic_syscall(socket), \ ++ __cobalt_symbolic_syscall(close), \ ++ __cobalt_symbolic_syscall(ioctl), \ ++ __cobalt_symbolic_syscall(read), \ ++ __cobalt_symbolic_syscall(write), \ ++ __cobalt_symbolic_syscall(recvmsg), \ ++ __cobalt_symbolic_syscall(sendmsg), \ ++ __cobalt_symbolic_syscall(mmap), \ ++ __cobalt_symbolic_syscall(select), \ ++ __cobalt_symbolic_syscall(fcntl), \ ++ __cobalt_symbolic_syscall(migrate), \ ++ __cobalt_symbolic_syscall(archcall), \ ++ __cobalt_symbolic_syscall(trace), \ ++ __cobalt_symbolic_syscall(corectl), \ ++ __cobalt_symbolic_syscall(get_current), \ ++ __cobalt_symbolic_syscall(backtrace), \ ++ __cobalt_symbolic_syscall(serialdbg), \ ++ __cobalt_symbolic_syscall(extend), \ ++ __cobalt_symbolic_syscall(ftrace_puts), \ ++ __cobalt_symbolic_syscall(recvmmsg), \ ++ __cobalt_symbolic_syscall(sendmmsg), \ ++ __cobalt_symbolic_syscall(clock_adjtime)) ++ ++DECLARE_EVENT_CLASS(syscall_entry, ++ TP_PROTO(unsigned int nr), ++ TP_ARGS(nr), ++ ++ TP_STRUCT__entry( ++ __field(unsigned int, nr) ++ ), ++ ++ TP_fast_assign( ++ __entry->nr = nr; ++ ), ++ ++ TP_printk("syscall=%s", __cobalt_syscall_name(__entry->nr)) ++); ++ ++DECLARE_EVENT_CLASS(syscall_exit, ++ TP_PROTO(long result), ++ TP_ARGS(result), ++ ++ TP_STRUCT__entry( ++ __field(long, result) ++ ), ++ ++ TP_fast_assign( ++ __entry->result = result; ++ ), ++ ++ TP_printk("result=%ld", __entry->result) ++); ++ ++#define cobalt_print_sched_policy(__policy) \ ++ __print_symbolic(__policy, \ ++ {SCHED_NORMAL, "normal"}, \ ++ {SCHED_FIFO, "fifo"}, \ ++ {SCHED_RR, "rr"}, \ ++ {SCHED_TP, "tp"}, \ ++ {SCHED_QUOTA, "quota"}, \ ++ {SCHED_SPORADIC, "sporadic"}, \ ++ {SCHED_COBALT, "cobalt"}, \ ++ {SCHED_WEAK, "weak"}) ++ ++const char *cobalt_trace_parse_sched_params(struct trace_seq *, int, ++ struct sched_param_ex *); ++ ++#define __parse_sched_params(policy, params) \ ++ cobalt_trace_parse_sched_params(p, policy, \ ++ (struct sched_param_ex *)(params)) ++ ++DECLARE_EVENT_CLASS(cobalt_posix_schedparam, ++ TP_PROTO(unsigned long pth, int policy, ++ const struct sched_param_ex *param_ex), ++ TP_ARGS(pth, policy, param_ex), ++ ++ TP_STRUCT__entry( ++ __field(unsigned long, pth) ++ __field(int, policy) ++ __dynamic_array(char, param_ex, sizeof(struct sched_param_ex)) ++ ), ++ ++ TP_fast_assign( ++ __entry->pth = pth; ++ __entry->policy = policy; ++ memcpy(__get_dynamic_array(param_ex), param_ex, sizeof(*param_ex)); ++ ), ++ ++ TP_printk("pth=%p policy=%s param={ %s }", ++ (void *)__entry->pth, ++ cobalt_print_sched_policy(__entry->policy), ++ __parse_sched_params(__entry->policy, ++ __get_dynamic_array(param_ex)) ++ ) ++); ++ ++DECLARE_EVENT_CLASS(cobalt_posix_scheduler, ++ TP_PROTO(pid_t pid, int policy, ++ const struct sched_param_ex *param_ex), ++ TP_ARGS(pid, policy, param_ex), ++ ++ TP_STRUCT__entry( ++ __field(pid_t, pid) ++ __field(int, policy) ++ __dynamic_array(char, param_ex, sizeof(struct sched_param_ex)) ++ ), ++ ++ TP_fast_assign( ++ __entry->pid = pid; ++ __entry->policy = policy; ++ memcpy(__get_dynamic_array(param_ex), param_ex, sizeof(*param_ex)); ++ ), ++ ++ TP_printk("pid=%d policy=%s param={ %s }", ++ __entry->pid, ++ cobalt_print_sched_policy(__entry->policy), ++ __parse_sched_params(__entry->policy, ++ __get_dynamic_array(param_ex)) ++ ) ++); ++ ++DECLARE_EVENT_CLASS(cobalt_void, ++ TP_PROTO(int dummy), ++ TP_ARGS(dummy), ++ TP_STRUCT__entry( ++ __field(int, dummy) ++ ), ++ TP_fast_assign( ++ (void)dummy; ++ ), ++ TP_printk("%s", "") ++); ++ ++DEFINE_EVENT(syscall_entry, cobalt_head_sysentry, ++ TP_PROTO(unsigned int nr), ++ TP_ARGS(nr) ++); ++ ++DEFINE_EVENT(syscall_exit, cobalt_head_sysexit, ++ TP_PROTO(long result), ++ TP_ARGS(result) ++); ++ ++DEFINE_EVENT(syscall_entry, cobalt_root_sysentry, ++ TP_PROTO(unsigned int nr), ++ TP_ARGS(nr) ++); ++ ++DEFINE_EVENT(syscall_exit, cobalt_root_sysexit, ++ TP_PROTO(long result), ++ TP_ARGS(result) ++); ++ ++DEFINE_EVENT(cobalt_posix_schedparam, cobalt_pthread_create, ++ TP_PROTO(unsigned long pth, int policy, ++ const struct sched_param_ex *param_ex), ++ TP_ARGS(pth, policy, param_ex) ++); ++ ++DEFINE_EVENT(cobalt_posix_schedparam, cobalt_pthread_setschedparam, ++ TP_PROTO(unsigned long pth, int policy, ++ const struct sched_param_ex *param_ex), ++ TP_ARGS(pth, policy, param_ex) ++); ++ ++DEFINE_EVENT(cobalt_posix_schedparam, cobalt_pthread_getschedparam, ++ TP_PROTO(unsigned long pth, int policy, ++ const struct sched_param_ex *param_ex), ++ TP_ARGS(pth, policy, param_ex) ++); ++ ++TRACE_EVENT(cobalt_pthread_setschedprio, ++ TP_PROTO(unsigned long pth, int prio), ++ TP_ARGS(pth, prio), ++ TP_STRUCT__entry( ++ __field(unsigned long, pth) ++ __field(int, prio) ++ ), ++ TP_fast_assign( ++ __entry->pth = pth; ++ __entry->prio = prio; ++ ), ++ TP_printk("pth=%p prio=%d", (void *)__entry->pth, __entry->prio) ++); ++ ++#define cobalt_print_thread_mode(__mode) \ ++ __print_flags(__mode, "|", \ ++ {PTHREAD_WARNSW, "warnsw"}, \ ++ {PTHREAD_LOCK_SCHED, "lock"}, \ ++ {PTHREAD_DISABLE_LOCKBREAK, "nolockbreak"}) ++ ++TRACE_EVENT(cobalt_pthread_setmode, ++ TP_PROTO(int clrmask, int setmask), ++ TP_ARGS(clrmask, setmask), ++ TP_STRUCT__entry( ++ __field(int, clrmask) ++ __field(int, setmask) ++ ), ++ TP_fast_assign( ++ __entry->clrmask = clrmask; ++ __entry->setmask = setmask; ++ ), ++ TP_printk("clrmask=%#x(%s) setmask=%#x(%s)", ++ __entry->clrmask, cobalt_print_thread_mode(__entry->clrmask), ++ __entry->setmask, cobalt_print_thread_mode(__entry->setmask)) ++); ++ ++TRACE_EVENT(cobalt_pthread_setname, ++ TP_PROTO(unsigned long pth, const char *name), ++ TP_ARGS(pth, name), ++ TP_STRUCT__entry( ++ __field(unsigned long, pth) ++ __string(name, name) ++ ), ++ TP_fast_assign( ++ __entry->pth = pth; ++ __assign_str(name, name); ++ ), ++ TP_printk("pth=%p name=%s", (void *)__entry->pth, __get_str(name)) ++); ++ ++DECLARE_EVENT_CLASS(cobalt_posix_pid, ++ TP_PROTO(pid_t pid), ++ TP_ARGS(pid), ++ TP_STRUCT__entry( ++ __field(pid_t, pid) ++ ), ++ TP_fast_assign( ++ __entry->pid = pid; ++ ), ++ TP_printk("pid=%d", __entry->pid) ++); ++ ++DEFINE_EVENT(cobalt_posix_pid, cobalt_pthread_stat, ++ TP_PROTO(pid_t pid), ++ TP_ARGS(pid) ++); ++ ++TRACE_EVENT(cobalt_pthread_kill, ++ TP_PROTO(unsigned long pth, int sig), ++ TP_ARGS(pth, sig), ++ TP_STRUCT__entry( ++ __field(unsigned long, pth) ++ __field(int, sig) ++ ), ++ TP_fast_assign( ++ __entry->pth = pth; ++ __entry->sig = sig; ++ ), ++ TP_printk("pth=%p sig=%d", (void *)__entry->pth, __entry->sig) ++); ++ ++TRACE_EVENT(cobalt_pthread_join, ++ TP_PROTO(unsigned long pth), ++ TP_ARGS(pth), ++ TP_STRUCT__entry( ++ __field(unsigned long, pth) ++ ), ++ TP_fast_assign( ++ __entry->pth = pth; ++ ), ++ TP_printk("pth=%p", (void *)__entry->pth) ++); ++ ++TRACE_EVENT(cobalt_pthread_pid, ++ TP_PROTO(unsigned long pth), ++ TP_ARGS(pth), ++ TP_STRUCT__entry( ++ __field(unsigned long, pth) ++ ), ++ TP_fast_assign( ++ __entry->pth = pth; ++ ), ++ TP_printk("pth=%p", (void *)__entry->pth) ++); ++ ++TRACE_EVENT(cobalt_pthread_extend, ++ TP_PROTO(unsigned long pth, const char *name), ++ TP_ARGS(pth, name), ++ TP_STRUCT__entry( ++ __field(unsigned long, pth) ++ __string(name, name) ++ ), ++ TP_fast_assign( ++ __entry->pth = pth; ++ __assign_str(name, name); ++ ), ++ TP_printk("pth=%p +personality=%s", (void *)__entry->pth, __get_str(name)) ++); ++ ++TRACE_EVENT(cobalt_pthread_restrict, ++ TP_PROTO(unsigned long pth, const char *name), ++ TP_ARGS(pth, name), ++ TP_STRUCT__entry( ++ __field(unsigned long, pth) ++ __string(name, name) ++ ), ++ TP_fast_assign( ++ __entry->pth = pth; ++ __assign_str(name, name); ++ ), ++ TP_printk("pth=%p -personality=%s", (void *)__entry->pth, __get_str(name)) ++); ++ ++DEFINE_EVENT(cobalt_void, cobalt_pthread_yield, ++ TP_PROTO(int dummy), ++ TP_ARGS(dummy) ++); ++ ++TRACE_EVENT(cobalt_sched_setconfig, ++ TP_PROTO(int cpu, int policy, size_t len), ++ TP_ARGS(cpu, policy, len), ++ TP_STRUCT__entry( ++ __field(int, cpu) ++ __field(int, policy) ++ __field(size_t, len) ++ ), ++ TP_fast_assign( ++ __entry->cpu = cpu; ++ __entry->policy = policy; ++ __entry->len = len; ++ ), ++ TP_printk("cpu=%d policy=%d(%s) len=%zu", ++ __entry->cpu, __entry->policy, ++ cobalt_print_sched_policy(__entry->policy), ++ __entry->len) ++); ++ ++TRACE_EVENT(cobalt_sched_get_config, ++ TP_PROTO(int cpu, int policy, size_t rlen), ++ TP_ARGS(cpu, policy, rlen), ++ TP_STRUCT__entry( ++ __field(int, cpu) ++ __field(int, policy) ++ __field(ssize_t, rlen) ++ ), ++ TP_fast_assign( ++ __entry->cpu = cpu; ++ __entry->policy = policy; ++ __entry->rlen = rlen; ++ ), ++ TP_printk("cpu=%d policy=%d(%s) rlen=%Zd", ++ __entry->cpu, __entry->policy, ++ cobalt_print_sched_policy(__entry->policy), ++ __entry->rlen) ++); ++ ++DEFINE_EVENT(cobalt_posix_scheduler, cobalt_sched_setscheduler, ++ TP_PROTO(pid_t pid, int policy, ++ const struct sched_param_ex *param_ex), ++ TP_ARGS(pid, policy, param_ex) ++); ++ ++DEFINE_EVENT(cobalt_posix_pid, cobalt_sched_getscheduler, ++ TP_PROTO(pid_t pid), ++ TP_ARGS(pid) ++); ++ ++DECLARE_EVENT_CLASS(cobalt_posix_prio_bound, ++ TP_PROTO(int policy, int prio), ++ TP_ARGS(policy, prio), ++ TP_STRUCT__entry( ++ __field(int, policy) ++ __field(int, prio) ++ ), ++ TP_fast_assign( ++ __entry->policy = policy; ++ __entry->prio = prio; ++ ), ++ TP_printk("policy=%d(%s) prio=%d", ++ __entry->policy, ++ cobalt_print_sched_policy(__entry->policy), ++ __entry->prio) ++); ++ ++DEFINE_EVENT(cobalt_posix_prio_bound, cobalt_sched_min_prio, ++ TP_PROTO(int policy, int prio), ++ TP_ARGS(policy, prio) ++); ++ ++DEFINE_EVENT(cobalt_posix_prio_bound, cobalt_sched_max_prio, ++ TP_PROTO(int policy, int prio), ++ TP_ARGS(policy, prio) ++); ++ ++DECLARE_EVENT_CLASS(cobalt_posix_sem, ++ TP_PROTO(xnhandle_t handle), ++ TP_ARGS(handle), ++ TP_STRUCT__entry( ++ __field(xnhandle_t, handle) ++ ), ++ TP_fast_assign( ++ __entry->handle = handle; ++ ), ++ TP_printk("sem=%#x", __entry->handle) ++); ++ ++DEFINE_EVENT(cobalt_posix_sem, cobalt_psem_wait, ++ TP_PROTO(xnhandle_t handle), ++ TP_ARGS(handle) ++); ++ ++DEFINE_EVENT(cobalt_posix_sem, cobalt_psem_trywait, ++ TP_PROTO(xnhandle_t handle), ++ TP_ARGS(handle) ++); ++ ++DEFINE_EVENT(cobalt_posix_sem, cobalt_psem_timedwait, ++ TP_PROTO(xnhandle_t handle), ++ TP_ARGS(handle) ++); ++ ++DEFINE_EVENT(cobalt_posix_sem, cobalt_psem_post, ++ TP_PROTO(xnhandle_t handle), ++ TP_ARGS(handle) ++); ++ ++DEFINE_EVENT(cobalt_posix_sem, cobalt_psem_destroy, ++ TP_PROTO(xnhandle_t handle), ++ TP_ARGS(handle) ++); ++ ++DEFINE_EVENT(cobalt_posix_sem, cobalt_psem_broadcast, ++ TP_PROTO(xnhandle_t handle), ++ TP_ARGS(handle) ++); ++ ++DEFINE_EVENT(cobalt_posix_sem, cobalt_psem_inquire, ++ TP_PROTO(xnhandle_t handle), ++ TP_ARGS(handle) ++); ++ ++TRACE_EVENT(cobalt_psem_getvalue, ++ TP_PROTO(xnhandle_t handle, int value), ++ TP_ARGS(handle, value), ++ TP_STRUCT__entry( ++ __field(xnhandle_t, handle) ++ __field(int, value) ++ ), ++ TP_fast_assign( ++ __entry->handle = handle; ++ __entry->value = value; ++ ), ++ TP_printk("sem=%#x value=%d", __entry->handle, __entry->value) ++); ++ ++#define cobalt_print_sem_flags(__flags) \ ++ __print_flags(__flags, "|", \ ++ {SEM_FIFO, "fifo"}, \ ++ {SEM_PULSE, "pulse"}, \ ++ {SEM_PSHARED, "pshared"}, \ ++ {SEM_REPORT, "report"}, \ ++ {SEM_WARNDEL, "warndel"}, \ ++ {SEM_RAWCLOCK, "rawclock"}, \ ++ {SEM_NOBUSYDEL, "nobusydel"}) ++ ++TRACE_EVENT(cobalt_psem_init, ++ TP_PROTO(const char *name, xnhandle_t handle, ++ int flags, unsigned int value), ++ TP_ARGS(name, handle, flags, value), ++ TP_STRUCT__entry( ++ __string(name, name) ++ __field(xnhandle_t, handle) ++ __field(int, flags) ++ __field(unsigned int, value) ++ ), ++ TP_fast_assign( ++ __assign_str(name, name); ++ __entry->handle = handle; ++ __entry->flags = flags; ++ __entry->value = value; ++ ), ++ TP_printk("sem=%#x(%s) flags=%#x(%s) value=%u", ++ __entry->handle, ++ __get_str(name), ++ __entry->flags, ++ cobalt_print_sem_flags(__entry->flags), ++ __entry->value) ++); ++ ++TRACE_EVENT(cobalt_psem_init_failed, ++ TP_PROTO(const char *name, int flags, unsigned int value, int status), ++ TP_ARGS(name, flags, value, status), ++ TP_STRUCT__entry( ++ __string(name, name) ++ __field(int, flags) ++ __field(unsigned int, value) ++ __field(int, status) ++ ), ++ TP_fast_assign( ++ __assign_str(name, name); ++ __entry->flags = flags; ++ __entry->value = value; ++ __entry->status = status; ++ ), ++ TP_printk("name=%s flags=%#x(%s) value=%u error=%d", ++ __get_str(name), ++ __entry->flags, ++ cobalt_print_sem_flags(__entry->flags), ++ __entry->value, __entry->status) ++); ++ ++#define cobalt_print_oflags(__flags) \ ++ __print_flags(__flags, "|", \ ++ {O_RDONLY, "rdonly"}, \ ++ {O_WRONLY, "wronly"}, \ ++ {O_RDWR, "rdwr"}, \ ++ {O_CREAT, "creat"}, \ ++ {O_EXCL, "excl"}, \ ++ {O_DIRECT, "direct"}, \ ++ {O_NONBLOCK, "nonblock"}, \ ++ {O_TRUNC, "trunc"}) ++ ++TRACE_EVENT(cobalt_psem_open, ++ TP_PROTO(const char *name, xnhandle_t handle, ++ int oflags, mode_t mode, unsigned int value), ++ TP_ARGS(name, handle, oflags, mode, value), ++ TP_STRUCT__entry( ++ __string(name, name) ++ __field(xnhandle_t, handle) ++ __field(int, oflags) ++ __field(mode_t, mode) ++ __field(unsigned int, value) ++ ), ++ TP_fast_assign( ++ __assign_str(name, name); ++ __entry->handle = handle; ++ __entry->oflags = oflags; ++ if (oflags & O_CREAT) { ++ __entry->mode = mode; ++ __entry->value = value; ++ } else { ++ __entry->mode = 0; ++ __entry->value = 0; ++ } ++ ), ++ TP_printk("named_sem=%#x=(%s) oflags=%#x(%s) mode=%o value=%u", ++ __entry->handle, __get_str(name), ++ __entry->oflags, cobalt_print_oflags(__entry->oflags), ++ __entry->mode, __entry->value) ++); ++ ++TRACE_EVENT(cobalt_psem_open_failed, ++ TP_PROTO(const char *name, int oflags, mode_t mode, ++ unsigned int value, int status), ++ TP_ARGS(name, oflags, mode, value, status), ++ TP_STRUCT__entry( ++ __string(name, name) ++ __field(int, oflags) ++ __field(mode_t, mode) ++ __field(unsigned int, value) ++ __field(int, status) ++ ), ++ TP_fast_assign( ++ __assign_str(name, name); ++ __entry->oflags = oflags; ++ __entry->status = status; ++ if (oflags & O_CREAT) { ++ __entry->mode = mode; ++ __entry->value = value; ++ } else { ++ __entry->mode = 0; ++ __entry->value = 0; ++ } ++ ), ++ TP_printk("named_sem=%s oflags=%#x(%s) mode=%o value=%u error=%d", ++ __get_str(name), ++ __entry->oflags, cobalt_print_oflags(__entry->oflags), ++ __entry->mode, __entry->value, __entry->status) ++); ++ ++DEFINE_EVENT(cobalt_posix_sem, cobalt_psem_close, ++ TP_PROTO(xnhandle_t handle), ++ TP_ARGS(handle) ++); ++ ++TRACE_EVENT(cobalt_psem_unlink, ++ TP_PROTO(const char *name), ++ TP_ARGS(name), ++ TP_STRUCT__entry( ++ __string(name, name) ++ ), ++ TP_fast_assign( ++ __assign_str(name, name); ++ ), ++ TP_printk("name=%s", __get_str(name)) ++); ++ ++DECLARE_EVENT_CLASS(cobalt_clock_timespec, ++ TP_PROTO(clockid_t clk_id, const struct timespec *val), ++ TP_ARGS(clk_id, val), ++ ++ TP_STRUCT__entry( ++ __field(clockid_t, clk_id) ++ __timespec_fields(val) ++ ), ++ ++ TP_fast_assign( ++ __entry->clk_id = clk_id; ++ __assign_timespec(val, val); ++ ), ++ ++ TP_printk("clock_id=%d timeval=(%ld.%09ld)", ++ __entry->clk_id, ++ __timespec_args(val) ++ ) ++); ++ ++DEFINE_EVENT(cobalt_clock_timespec, cobalt_clock_getres, ++ TP_PROTO(clockid_t clk_id, const struct timespec *res), ++ TP_ARGS(clk_id, res) ++); ++ ++DEFINE_EVENT(cobalt_clock_timespec, cobalt_clock_gettime, ++ TP_PROTO(clockid_t clk_id, const struct timespec *time), ++ TP_ARGS(clk_id, time) ++); ++ ++DEFINE_EVENT(cobalt_clock_timespec, cobalt_clock_settime, ++ TP_PROTO(clockid_t clk_id, const struct timespec *time), ++ TP_ARGS(clk_id, time) ++); ++ ++TRACE_EVENT(cobalt_clock_adjtime, ++ TP_PROTO(clockid_t clk_id, struct timex *tx), ++ TP_ARGS(clk_id, tx), ++ ++ TP_STRUCT__entry( ++ __field(clockid_t, clk_id) ++ __field(struct timex *, tx) ++ ), ++ ++ TP_fast_assign( ++ __entry->clk_id = clk_id; ++ __entry->tx = tx; ++ ), ++ ++ TP_printk("clock_id=%d timex=%p", ++ __entry->clk_id, ++ __entry->tx ++ ) ++); ++ ++#define cobalt_print_timer_flags(__flags) \ ++ __print_flags(__flags, "|", \ ++ {TIMER_ABSTIME, "TIMER_ABSTIME"}) ++ ++TRACE_EVENT(cobalt_clock_nanosleep, ++ TP_PROTO(clockid_t clk_id, int flags, const struct timespec *time), ++ TP_ARGS(clk_id, flags, time), ++ ++ TP_STRUCT__entry( ++ __field(clockid_t, clk_id) ++ __field(int, flags) ++ __timespec_fields(time) ++ ), ++ ++ TP_fast_assign( ++ __entry->clk_id = clk_id; ++ __entry->flags = flags; ++ __assign_timespec(time, time); ++ ), ++ ++ TP_printk("clock_id=%d flags=%#x(%s) rqt=(%ld.%09ld)", ++ __entry->clk_id, ++ __entry->flags, cobalt_print_timer_flags(__entry->flags), ++ __timespec_args(time) ++ ) ++); ++ ++DECLARE_EVENT_CLASS(cobalt_clock_ident, ++ TP_PROTO(const char *name, clockid_t clk_id), ++ TP_ARGS(name, clk_id), ++ TP_STRUCT__entry( ++ __string(name, name) ++ __field(clockid_t, clk_id) ++ ), ++ TP_fast_assign( ++ __assign_str(name, name); ++ __entry->clk_id = clk_id; ++ ), ++ TP_printk("name=%s, id=%#x", __get_str(name), __entry->clk_id) ++); ++ ++DEFINE_EVENT(cobalt_clock_ident, cobalt_clock_register, ++ TP_PROTO(const char *name, clockid_t clk_id), ++ TP_ARGS(name, clk_id) ++); ++ ++DEFINE_EVENT(cobalt_clock_ident, cobalt_clock_deregister, ++ TP_PROTO(const char *name, clockid_t clk_id), ++ TP_ARGS(name, clk_id) ++); ++ ++#define cobalt_print_clock(__clk_id) \ ++ __print_symbolic(__clk_id, \ ++ {CLOCK_MONOTONIC, "CLOCK_MONOTONIC"}, \ ++ {CLOCK_MONOTONIC_RAW, "CLOCK_MONOTONIC_RAW"}, \ ++ {CLOCK_REALTIME, "CLOCK_REALTIME"}) ++ ++TRACE_EVENT(cobalt_cond_init, ++ TP_PROTO(const struct cobalt_cond_shadow __user *u_cnd, ++ const struct cobalt_condattr *attr), ++ TP_ARGS(u_cnd, attr), ++ TP_STRUCT__entry( ++ __field(const struct cobalt_cond_shadow __user *, u_cnd) ++ __field(clockid_t, clk_id) ++ __field(int, pshared) ++ ), ++ TP_fast_assign( ++ __entry->u_cnd = u_cnd; ++ __entry->clk_id = attr->clock; ++ __entry->pshared = attr->pshared; ++ ), ++ TP_printk("cond=%p attr={ .clock=%s, .pshared=%d }", ++ __entry->u_cnd, ++ cobalt_print_clock(__entry->clk_id), ++ __entry->pshared) ++); ++ ++TRACE_EVENT(cobalt_cond_destroy, ++ TP_PROTO(const struct cobalt_cond_shadow __user *u_cnd), ++ TP_ARGS(u_cnd), ++ TP_STRUCT__entry( ++ __field(const struct cobalt_cond_shadow __user *, u_cnd) ++ ), ++ TP_fast_assign( ++ __entry->u_cnd = u_cnd; ++ ), ++ TP_printk("cond=%p", __entry->u_cnd) ++); ++ ++TRACE_EVENT(cobalt_cond_timedwait, ++ TP_PROTO(const struct cobalt_cond_shadow __user *u_cnd, ++ const struct cobalt_mutex_shadow __user *u_mx, ++ const struct timespec *timeout), ++ TP_ARGS(u_cnd, u_mx, timeout), ++ TP_STRUCT__entry( ++ __field(const struct cobalt_cond_shadow __user *, u_cnd) ++ __field(const struct cobalt_mutex_shadow __user *, u_mx) ++ __timespec_fields(timeout) ++ ), ++ TP_fast_assign( ++ __entry->u_cnd = u_cnd; ++ __entry->u_mx = u_mx; ++ __assign_timespec(timeout, timeout); ++ ), ++ TP_printk("cond=%p, mutex=%p, timeout=(%ld.%09ld)", ++ __entry->u_cnd, __entry->u_mx, __timespec_args(timeout)) ++); ++ ++TRACE_EVENT(cobalt_cond_wait, ++ TP_PROTO(const struct cobalt_cond_shadow __user *u_cnd, ++ const struct cobalt_mutex_shadow __user *u_mx), ++ TP_ARGS(u_cnd, u_mx), ++ TP_STRUCT__entry( ++ __field(const struct cobalt_cond_shadow __user *, u_cnd) ++ __field(const struct cobalt_mutex_shadow __user *, u_mx) ++ ), ++ TP_fast_assign( ++ __entry->u_cnd = u_cnd; ++ __entry->u_mx = u_mx; ++ ), ++ TP_printk("cond=%p, mutex=%p", ++ __entry->u_cnd, __entry->u_mx) ++); ++ ++TRACE_EVENT(cobalt_mq_open, ++ TP_PROTO(const char *name, int oflags, mode_t mode), ++ TP_ARGS(name, oflags, mode), ++ ++ TP_STRUCT__entry( ++ __string(name, name) ++ __field(int, oflags) ++ __field(mode_t, mode) ++ ), ++ ++ TP_fast_assign( ++ __assign_str(name, name); ++ __entry->oflags = oflags; ++ __entry->mode = (oflags & O_CREAT) ? mode : 0; ++ ), ++ ++ TP_printk("name=%s oflags=%#x(%s) mode=%o", ++ __get_str(name), ++ __entry->oflags, cobalt_print_oflags(__entry->oflags), ++ __entry->mode) ++); ++ ++TRACE_EVENT(cobalt_mq_notify, ++ TP_PROTO(mqd_t mqd, const struct sigevent *sev), ++ TP_ARGS(mqd, sev), ++ ++ TP_STRUCT__entry( ++ __field(mqd_t, mqd) ++ __field(int, signo) ++ ), ++ ++ TP_fast_assign( ++ __entry->mqd = mqd; ++ __entry->signo = sev && sev->sigev_notify != SIGEV_NONE ? ++ sev->sigev_signo : 0; ++ ), ++ ++ TP_printk("mqd=%d signo=%d", ++ __entry->mqd, __entry->signo) ++); ++ ++TRACE_EVENT(cobalt_mq_close, ++ TP_PROTO(mqd_t mqd), ++ TP_ARGS(mqd), ++ ++ TP_STRUCT__entry( ++ __field(mqd_t, mqd) ++ ), ++ ++ TP_fast_assign( ++ __entry->mqd = mqd; ++ ), ++ ++ TP_printk("mqd=%d", __entry->mqd) ++); ++ ++TRACE_EVENT(cobalt_mq_unlink, ++ TP_PROTO(const char *name), ++ TP_ARGS(name), ++ ++ TP_STRUCT__entry( ++ __string(name, name) ++ ), ++ ++ TP_fast_assign( ++ __assign_str(name, name); ++ ), ++ ++ TP_printk("name=%s", __get_str(name)) ++); ++ ++TRACE_EVENT(cobalt_mq_send, ++ TP_PROTO(mqd_t mqd, const void __user *u_buf, size_t len, ++ unsigned int prio), ++ TP_ARGS(mqd, u_buf, len, prio), ++ TP_STRUCT__entry( ++ __field(mqd_t, mqd) ++ __field(const void __user *, u_buf) ++ __field(size_t, len) ++ __field(unsigned int, prio) ++ ), ++ TP_fast_assign( ++ __entry->mqd = mqd; ++ __entry->u_buf = u_buf; ++ __entry->len = len; ++ __entry->prio = prio; ++ ), ++ TP_printk("mqd=%d buf=%p len=%zu prio=%u", ++ __entry->mqd, __entry->u_buf, __entry->len, ++ __entry->prio) ++); ++ ++TRACE_EVENT(cobalt_mq_timedreceive, ++ TP_PROTO(mqd_t mqd, const void __user *u_buf, size_t len, ++ const struct timespec *timeout), ++ TP_ARGS(mqd, u_buf, len, timeout), ++ TP_STRUCT__entry( ++ __field(mqd_t, mqd) ++ __field(const void __user *, u_buf) ++ __field(size_t, len) ++ __timespec_fields(timeout) ++ ), ++ TP_fast_assign( ++ __entry->mqd = mqd; ++ __entry->u_buf = u_buf; ++ __entry->len = len; ++ __assign_timespec(timeout, timeout); ++ ), ++ TP_printk("mqd=%d buf=%p len=%zu timeout=(%ld.%09ld)", ++ __entry->mqd, __entry->u_buf, __entry->len, ++ __timespec_args(timeout)) ++); ++ ++TRACE_EVENT(cobalt_mq_receive, ++ TP_PROTO(mqd_t mqd, const void __user *u_buf, size_t len), ++ TP_ARGS(mqd, u_buf, len), ++ TP_STRUCT__entry( ++ __field(mqd_t, mqd) ++ __field(const void __user *, u_buf) ++ __field(size_t, len) ++ ), ++ TP_fast_assign( ++ __entry->mqd = mqd; ++ __entry->u_buf = u_buf; ++ __entry->len = len; ++ ), ++ TP_printk("mqd=%d buf=%p len=%zu", ++ __entry->mqd, __entry->u_buf, __entry->len) ++); ++ ++DECLARE_EVENT_CLASS(cobalt_posix_mqattr, ++ TP_PROTO(mqd_t mqd, const struct mq_attr *attr), ++ TP_ARGS(mqd, attr), ++ TP_STRUCT__entry( ++ __field(mqd_t, mqd) ++ __field(long, flags) ++ __field(long, curmsgs) ++ __field(long, msgsize) ++ __field(long, maxmsg) ++ ), ++ TP_fast_assign( ++ __entry->mqd = mqd; ++ __entry->flags = attr->mq_flags; ++ __entry->curmsgs = attr->mq_curmsgs; ++ __entry->msgsize = attr->mq_msgsize; ++ __entry->maxmsg = attr->mq_maxmsg; ++ ), ++ TP_printk("mqd=%d flags=%#lx(%s) curmsgs=%ld msgsize=%ld maxmsg=%ld", ++ __entry->mqd, ++ __entry->flags, cobalt_print_oflags(__entry->flags), ++ __entry->curmsgs, ++ __entry->msgsize, ++ __entry->maxmsg ++ ) ++); ++ ++DEFINE_EVENT(cobalt_posix_mqattr, cobalt_mq_getattr, ++ TP_PROTO(mqd_t mqd, const struct mq_attr *attr), ++ TP_ARGS(mqd, attr) ++); ++ ++DEFINE_EVENT(cobalt_posix_mqattr, cobalt_mq_setattr, ++ TP_PROTO(mqd_t mqd, const struct mq_attr *attr), ++ TP_ARGS(mqd, attr) ++); ++ ++#define cobalt_print_evflags(__flags) \ ++ __print_flags(__flags, "|", \ ++ {COBALT_EVENT_SHARED, "shared"}, \ ++ {COBALT_EVENT_PRIO, "prio"}) ++ ++TRACE_EVENT(cobalt_event_init, ++ TP_PROTO(const struct cobalt_event_shadow __user *u_event, ++ unsigned long value, int flags), ++ TP_ARGS(u_event, value, flags), ++ TP_STRUCT__entry( ++ __field(const struct cobalt_event_shadow __user *, u_event) ++ __field(unsigned long, value) ++ __field(int, flags) ++ ), ++ TP_fast_assign( ++ __entry->u_event = u_event; ++ __entry->value = value; ++ __entry->flags = flags; ++ ), ++ TP_printk("event=%p value=%lu flags=%#x(%s)", ++ __entry->u_event, __entry->value, ++ __entry->flags, cobalt_print_evflags(__entry->flags)) ++); ++ ++#define cobalt_print_evmode(__mode) \ ++ __print_symbolic(__mode, \ ++ {COBALT_EVENT_ANY, "any"}, \ ++ {COBALT_EVENT_ALL, "all"}) ++ ++TRACE_EVENT(cobalt_event_timedwait, ++ TP_PROTO(const struct cobalt_event_shadow __user *u_event, ++ unsigned long bits, int mode, ++ const struct timespec *timeout), ++ TP_ARGS(u_event, bits, mode, timeout), ++ TP_STRUCT__entry( ++ __field(const struct cobalt_event_shadow __user *, u_event) ++ __field(unsigned long, bits) ++ __field(int, mode) ++ __timespec_fields(timeout) ++ ), ++ TP_fast_assign( ++ __entry->u_event = u_event; ++ __entry->bits = bits; ++ __entry->mode = mode; ++ __assign_timespec(timeout, timeout); ++ ), ++ TP_printk("event=%p bits=%#lx mode=%#x(%s) timeout=(%ld.%09ld)", ++ __entry->u_event, __entry->bits, __entry->mode, ++ cobalt_print_evmode(__entry->mode), ++ __timespec_args(timeout)) ++); ++ ++TRACE_EVENT(cobalt_event_wait, ++ TP_PROTO(const struct cobalt_event_shadow __user *u_event, ++ unsigned long bits, int mode), ++ TP_ARGS(u_event, bits, mode), ++ TP_STRUCT__entry( ++ __field(const struct cobalt_event_shadow __user *, u_event) ++ __field(unsigned long, bits) ++ __field(int, mode) ++ ), ++ TP_fast_assign( ++ __entry->u_event = u_event; ++ __entry->bits = bits; ++ __entry->mode = mode; ++ ), ++ TP_printk("event=%p bits=%#lx mode=%#x(%s)", ++ __entry->u_event, __entry->bits, __entry->mode, ++ cobalt_print_evmode(__entry->mode)) ++); ++ ++DECLARE_EVENT_CLASS(cobalt_event_ident, ++ TP_PROTO(const struct cobalt_event_shadow __user *u_event), ++ TP_ARGS(u_event), ++ TP_STRUCT__entry( ++ __field(const struct cobalt_event_shadow __user *, u_event) ++ ), ++ TP_fast_assign( ++ __entry->u_event = u_event; ++ ), ++ TP_printk("event=%p", __entry->u_event) ++); ++ ++DEFINE_EVENT(cobalt_event_ident, cobalt_event_destroy, ++ TP_PROTO(const struct cobalt_event_shadow __user *u_event), ++ TP_ARGS(u_event) ++); ++ ++DEFINE_EVENT(cobalt_event_ident, cobalt_event_sync, ++ TP_PROTO(const struct cobalt_event_shadow __user *u_event), ++ TP_ARGS(u_event) ++); ++ ++DEFINE_EVENT(cobalt_event_ident, cobalt_event_inquire, ++ TP_PROTO(const struct cobalt_event_shadow __user *u_event), ++ TP_ARGS(u_event) ++); ++ ++#endif /* _TRACE_COBALT_POSIX_H */ ++ ++/* This part must be outside protection */ ++#undef TRACE_INCLUDE_PATH ++#undef TRACE_INCLUDE_FILE ++#define TRACE_INCLUDE_FILE cobalt-posix ++#include +--- linux/include/trace/events/cobalt-rtdm.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/trace/events/cobalt-rtdm.h 2021-04-07 16:01:25.905636055 +0800 +@@ -0,0 +1,554 @@ ++/* ++ * Copyright (C) 2014 Jan Kiszka . ++ * Copyright (C) 2014 Philippe Gerum . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#undef TRACE_SYSTEM ++#define TRACE_SYSTEM cobalt_rtdm ++ ++#if !defined(_TRACE_COBALT_RTDM_H) || defined(TRACE_HEADER_MULTI_READ) ++#define _TRACE_COBALT_RTDM_H ++ ++#include ++#include ++#include ++ ++struct rtdm_fd; ++struct rtdm_event; ++struct rtdm_sem; ++struct rtdm_mutex; ++struct xnthread; ++struct rtdm_device; ++struct rtdm_dev_context; ++struct _rtdm_mmap_request; ++ ++DECLARE_EVENT_CLASS(fd_event, ++ TP_PROTO(struct rtdm_fd *fd, int ufd), ++ TP_ARGS(fd, ufd), ++ ++ TP_STRUCT__entry( ++ __field(struct rtdm_device *, dev) ++ __field(int, ufd) ++ ), ++ ++ TP_fast_assign( ++ __entry->dev = rtdm_fd_to_context(fd)->device; ++ __entry->ufd = ufd; ++ ), ++ ++ TP_printk("device=%p fd=%d", ++ __entry->dev, __entry->ufd) ++); ++ ++DECLARE_EVENT_CLASS(fd_request, ++ TP_PROTO(struct task_struct *task, ++ struct rtdm_fd *fd, int ufd, unsigned long arg), ++ TP_ARGS(task, fd, ufd, arg), ++ ++ TP_STRUCT__entry( ++ __array(char, comm, TASK_COMM_LEN) ++ __field(pid_t, pid) ++ __field(struct rtdm_device *, dev) ++ __field(int, ufd) ++ __field(unsigned long, arg) ++ ), ++ ++ TP_fast_assign( ++ memcpy(__entry->comm, task->comm, TASK_COMM_LEN); ++ __entry->pid = task_pid_nr(task); ++ __entry->dev = rtdm_fd_to_context(fd)->device; ++ __entry->ufd = ufd; ++ __entry->arg = arg; ++ ), ++ ++ TP_printk("device=%p fd=%d arg=%#lx pid=%d comm=%s", ++ __entry->dev, __entry->ufd, __entry->arg, ++ __entry->pid, __entry->comm) ++); ++ ++DECLARE_EVENT_CLASS(fd_request_status, ++ TP_PROTO(struct task_struct *task, ++ struct rtdm_fd *fd, int ufd, int status), ++ TP_ARGS(task, fd, ufd, status), ++ ++ TP_STRUCT__entry( ++ __array(char, comm, TASK_COMM_LEN) ++ __field(pid_t, pid) ++ __field(struct rtdm_device *, dev) ++ __field(int, ufd) ++ ), ++ ++ TP_fast_assign( ++ memcpy(__entry->comm, task->comm, TASK_COMM_LEN); ++ __entry->pid = task_pid_nr(task); ++ __entry->dev = ++ !IS_ERR(fd) ? rtdm_fd_to_context(fd)->device : NULL; ++ __entry->ufd = ufd; ++ ), ++ ++ TP_printk("device=%p fd=%d pid=%d comm=%s", ++ __entry->dev, __entry->ufd, __entry->pid, __entry->comm) ++); ++ ++DECLARE_EVENT_CLASS(task_op, ++ TP_PROTO(struct xnthread *task), ++ TP_ARGS(task), ++ ++ TP_STRUCT__entry( ++ __field(struct xnthread *, task) ++ __string(task_name, task->name) ++ ), ++ ++ TP_fast_assign( ++ __entry->task = task; ++ __assign_str(task_name, task->name); ++ ), ++ ++ TP_printk("task %p(%s)", __entry->task, __get_str(task_name)) ++); ++ ++DECLARE_EVENT_CLASS(event_op, ++ TP_PROTO(struct rtdm_event *ev), ++ TP_ARGS(ev), ++ ++ TP_STRUCT__entry( ++ __field(struct rtdm_event *, ev) ++ ), ++ ++ TP_fast_assign( ++ __entry->ev = ev; ++ ), ++ ++ TP_printk("event=%p", __entry->ev) ++); ++ ++DECLARE_EVENT_CLASS(sem_op, ++ TP_PROTO(struct rtdm_sem *sem), ++ TP_ARGS(sem), ++ ++ TP_STRUCT__entry( ++ __field(struct rtdm_sem *, sem) ++ ), ++ ++ TP_fast_assign( ++ __entry->sem = sem; ++ ), ++ ++ TP_printk("sem=%p", __entry->sem) ++); ++ ++DECLARE_EVENT_CLASS(mutex_op, ++ TP_PROTO(struct rtdm_mutex *mutex), ++ TP_ARGS(mutex), ++ ++ TP_STRUCT__entry( ++ __field(struct rtdm_mutex *, mutex) ++ ), ++ ++ TP_fast_assign( ++ __entry->mutex = mutex; ++ ), ++ ++ TP_printk("mutex=%p", __entry->mutex) ++); ++ ++TRACE_EVENT(cobalt_device_register, ++ TP_PROTO(struct rtdm_device *dev), ++ TP_ARGS(dev), ++ ++ TP_STRUCT__entry( ++ __field(struct rtdm_device *, dev) ++ __string(device_name, dev->name) ++ __field(int, flags) ++ __field(int, class_id) ++ __field(int, subclass_id) ++ __field(int, profile_version) ++ ), ++ ++ TP_fast_assign( ++ __entry->dev = dev; ++ __assign_str(device_name, dev->name); ++ __entry->flags = dev->driver->device_flags; ++ __entry->class_id = dev->driver->profile_info.class_id; ++ __entry->subclass_id = dev->driver->profile_info.subclass_id; ++ __entry->profile_version = dev->driver->profile_info.version; ++ ), ++ ++ TP_printk("%s device %s=%p flags=0x%x, class=%d.%d profile=%d", ++ (__entry->flags & RTDM_DEVICE_TYPE_MASK) ++ == RTDM_NAMED_DEVICE ? "named" : "protocol", ++ __get_str(device_name), __entry->dev, ++ __entry->flags, __entry->class_id, __entry->subclass_id, ++ __entry->profile_version) ++); ++ ++TRACE_EVENT(cobalt_device_unregister, ++ TP_PROTO(struct rtdm_device *dev), ++ TP_ARGS(dev), ++ ++ TP_STRUCT__entry( ++ __field(struct rtdm_device *, dev) ++ __string(device_name, dev->name) ++ ), ++ ++ TP_fast_assign( ++ __entry->dev = dev; ++ __assign_str(device_name, dev->name); ++ ), ++ ++ TP_printk("device %s=%p", ++ __get_str(device_name), __entry->dev) ++); ++ ++DEFINE_EVENT(fd_event, cobalt_fd_created, ++ TP_PROTO(struct rtdm_fd *fd, int ufd), ++ TP_ARGS(fd, ufd) ++); ++ ++DEFINE_EVENT(fd_request, cobalt_fd_open, ++ TP_PROTO(struct task_struct *task, ++ struct rtdm_fd *fd, int ufd, ++ unsigned long oflags), ++ TP_ARGS(task, fd, ufd, oflags) ++); ++ ++DEFINE_EVENT(fd_request, cobalt_fd_close, ++ TP_PROTO(struct task_struct *task, ++ struct rtdm_fd *fd, int ufd, ++ unsigned long lock_count), ++ TP_ARGS(task, fd, ufd, lock_count) ++); ++ ++DEFINE_EVENT(fd_request, cobalt_fd_socket, ++ TP_PROTO(struct task_struct *task, ++ struct rtdm_fd *fd, int ufd, ++ unsigned long protocol_family), ++ TP_ARGS(task, fd, ufd, protocol_family) ++); ++ ++DEFINE_EVENT(fd_request, cobalt_fd_read, ++ TP_PROTO(struct task_struct *task, ++ struct rtdm_fd *fd, int ufd, ++ unsigned long len), ++ TP_ARGS(task, fd, ufd, len) ++); ++ ++DEFINE_EVENT(fd_request, cobalt_fd_write, ++ TP_PROTO(struct task_struct *task, ++ struct rtdm_fd *fd, int ufd, ++ unsigned long len), ++ TP_ARGS(task, fd, ufd, len) ++); ++ ++DEFINE_EVENT(fd_request, cobalt_fd_ioctl, ++ TP_PROTO(struct task_struct *task, ++ struct rtdm_fd *fd, int ufd, ++ unsigned long request), ++ TP_ARGS(task, fd, ufd, request) ++); ++ ++DEFINE_EVENT(fd_request, cobalt_fd_sendmsg, ++ TP_PROTO(struct task_struct *task, ++ struct rtdm_fd *fd, int ufd, ++ unsigned long flags), ++ TP_ARGS(task, fd, ufd, flags) ++); ++ ++DEFINE_EVENT(fd_request, cobalt_fd_sendmmsg, ++ TP_PROTO(struct task_struct *task, ++ struct rtdm_fd *fd, int ufd, ++ unsigned long flags), ++ TP_ARGS(task, fd, ufd, flags) ++); ++ ++DEFINE_EVENT(fd_request, cobalt_fd_recvmsg, ++ TP_PROTO(struct task_struct *task, ++ struct rtdm_fd *fd, int ufd, ++ unsigned long flags), ++ TP_ARGS(task, fd, ufd, flags) ++); ++ ++DEFINE_EVENT(fd_request, cobalt_fd_recvmmsg, ++ TP_PROTO(struct task_struct *task, ++ struct rtdm_fd *fd, int ufd, ++ unsigned long flags), ++ TP_ARGS(task, fd, ufd, flags) ++); ++ ++#define cobalt_print_protbits(__prot) \ ++ __print_flags(__prot, "|", \ ++ {PROT_EXEC, "exec"}, \ ++ {PROT_READ, "read"}, \ ++ {PROT_WRITE, "write"}) ++ ++#define cobalt_print_mapbits(__flags) \ ++ __print_flags(__flags, "|", \ ++ {MAP_SHARED, "shared"}, \ ++ {MAP_PRIVATE, "private"}, \ ++ {MAP_ANONYMOUS, "anon"}, \ ++ {MAP_FIXED, "fixed"}, \ ++ {MAP_HUGETLB, "huge"}, \ ++ {MAP_NONBLOCK, "nonblock"}, \ ++ {MAP_NORESERVE, "noreserve"}, \ ++ {MAP_POPULATE, "populate"}, \ ++ {MAP_UNINITIALIZED, "uninit"}) ++ ++TRACE_EVENT(cobalt_fd_mmap, ++ TP_PROTO(struct task_struct *task, ++ struct rtdm_fd *fd, int ufd, struct _rtdm_mmap_request *rma), ++ TP_ARGS(task, fd, ufd, rma), ++ ++ TP_STRUCT__entry( ++ __array(char, comm, TASK_COMM_LEN) ++ __field(pid_t, pid) ++ __field(struct rtdm_device *, dev) ++ __field(int, ufd) ++ __field(size_t, length) ++ __field(off_t, offset) ++ __field(int, prot) ++ __field(int, flags) ++ ), ++ ++ TP_fast_assign( ++ memcpy(__entry->comm, task->comm, TASK_COMM_LEN); ++ __entry->pid = task_pid_nr(task); ++ __entry->dev = rtdm_fd_to_context(fd)->device; ++ __entry->ufd = ufd; ++ __entry->length = rma->length; ++ __entry->offset = rma->offset; ++ __entry->prot = rma->prot; ++ __entry->flags = rma->flags; ++ ), ++ ++ TP_printk("device=%p fd=%d area={ len:%zu, off:%Lu }" ++ " prot=%#x(%s) flags=%#x(%s) pid=%d comm=%s", ++ __entry->dev, __entry->ufd, __entry->length, ++ (unsigned long long)__entry->offset, ++ __entry->prot, cobalt_print_protbits(__entry->prot), ++ __entry->flags, cobalt_print_mapbits(__entry->flags), ++ __entry->pid, __entry->comm) ++); ++ ++DEFINE_EVENT(fd_request_status, cobalt_fd_ioctl_status, ++ TP_PROTO(struct task_struct *task, ++ struct rtdm_fd *fd, int ufd, ++ int status), ++ TP_ARGS(task, fd, ufd, status) ++); ++ ++DEFINE_EVENT(fd_request_status, cobalt_fd_read_status, ++ TP_PROTO(struct task_struct *task, ++ struct rtdm_fd *fd, int ufd, ++ int status), ++ TP_ARGS(task, fd, ufd, status) ++); ++ ++DEFINE_EVENT(fd_request_status, cobalt_fd_write_status, ++ TP_PROTO(struct task_struct *task, ++ struct rtdm_fd *fd, int ufd, ++ int status), ++ TP_ARGS(task, fd, ufd, status) ++); ++ ++DEFINE_EVENT(fd_request_status, cobalt_fd_recvmsg_status, ++ TP_PROTO(struct task_struct *task, ++ struct rtdm_fd *fd, int ufd, ++ int status), ++ TP_ARGS(task, fd, ufd, status) ++); ++ ++DEFINE_EVENT(fd_request_status, cobalt_fd_recvmmsg_status, ++ TP_PROTO(struct task_struct *task, ++ struct rtdm_fd *fd, int ufd, ++ int status), ++ TP_ARGS(task, fd, ufd, status) ++); ++ ++DEFINE_EVENT(fd_request_status, cobalt_fd_sendmsg_status, ++ TP_PROTO(struct task_struct *task, ++ struct rtdm_fd *fd, int ufd, ++ int status), ++ TP_ARGS(task, fd, ufd, status) ++); ++ ++DEFINE_EVENT(fd_request_status, cobalt_fd_sendmmsg_status, ++ TP_PROTO(struct task_struct *task, ++ struct rtdm_fd *fd, int ufd, ++ int status), ++ TP_ARGS(task, fd, ufd, status) ++); ++ ++DEFINE_EVENT(fd_request_status, cobalt_fd_mmap_status, ++ TP_PROTO(struct task_struct *task, ++ struct rtdm_fd *fd, int ufd, ++ int status), ++ TP_ARGS(task, fd, ufd, status) ++); ++ ++DEFINE_EVENT(task_op, cobalt_driver_task_join, ++ TP_PROTO(struct xnthread *task), ++ TP_ARGS(task) ++); ++ ++TRACE_EVENT(cobalt_driver_event_init, ++ TP_PROTO(struct rtdm_event *ev, unsigned long pending), ++ TP_ARGS(ev, pending), ++ ++ TP_STRUCT__entry( ++ __field(struct rtdm_event *, ev) ++ __field(unsigned long, pending) ++ ), ++ ++ TP_fast_assign( ++ __entry->ev = ev; ++ __entry->pending = pending; ++ ), ++ ++ TP_printk("event=%p pending=%#lx", ++ __entry->ev, __entry->pending) ++); ++ ++TRACE_EVENT(cobalt_driver_event_wait, ++ TP_PROTO(struct rtdm_event *ev, struct xnthread *task), ++ TP_ARGS(ev, task), ++ ++ TP_STRUCT__entry( ++ __field(struct xnthread *, task) ++ __string(task_name, task->name) ++ __field(struct rtdm_event *, ev) ++ ), ++ ++ TP_fast_assign( ++ __entry->task = task; ++ __assign_str(task_name, task->name); ++ __entry->ev = ev; ++ ), ++ ++ TP_printk("event=%p task=%p(%s)", ++ __entry->ev, __entry->task, __get_str(task_name)) ++); ++ ++DEFINE_EVENT(event_op, cobalt_driver_event_signal, ++ TP_PROTO(struct rtdm_event *ev), ++ TP_ARGS(ev) ++); ++ ++DEFINE_EVENT(event_op, cobalt_driver_event_clear, ++ TP_PROTO(struct rtdm_event *ev), ++ TP_ARGS(ev) ++); ++ ++DEFINE_EVENT(event_op, cobalt_driver_event_pulse, ++ TP_PROTO(struct rtdm_event *ev), ++ TP_ARGS(ev) ++); ++ ++DEFINE_EVENT(event_op, cobalt_driver_event_destroy, ++ TP_PROTO(struct rtdm_event *ev), ++ TP_ARGS(ev) ++); ++ ++TRACE_EVENT(cobalt_driver_sem_init, ++ TP_PROTO(struct rtdm_sem *sem, unsigned long value), ++ TP_ARGS(sem, value), ++ ++ TP_STRUCT__entry( ++ __field(struct rtdm_sem *, sem) ++ __field(unsigned long, value) ++ ), ++ ++ TP_fast_assign( ++ __entry->sem = sem; ++ __entry->value = value; ++ ), ++ ++ TP_printk("sem=%p value=%lu", ++ __entry->sem, __entry->value) ++); ++ ++TRACE_EVENT(cobalt_driver_sem_wait, ++ TP_PROTO(struct rtdm_sem *sem, struct xnthread *task), ++ TP_ARGS(sem, task), ++ ++ TP_STRUCT__entry( ++ __field(struct xnthread *, task) ++ __string(task_name, task->name) ++ __field(struct rtdm_sem *, sem) ++ ), ++ ++ TP_fast_assign( ++ __entry->task = task; ++ __assign_str(task_name, task->name); ++ __entry->sem = sem; ++ ), ++ ++ TP_printk("sem=%p task=%p(%s)", ++ __entry->sem, __entry->task, __get_str(task_name)) ++); ++ ++DEFINE_EVENT(sem_op, cobalt_driver_sem_up, ++ TP_PROTO(struct rtdm_sem *sem), ++ TP_ARGS(sem) ++); ++ ++DEFINE_EVENT(sem_op, cobalt_driver_sem_destroy, ++ TP_PROTO(struct rtdm_sem *sem), ++ TP_ARGS(sem) ++); ++ ++DEFINE_EVENT(mutex_op, cobalt_driver_mutex_init, ++ TP_PROTO(struct rtdm_mutex *mutex), ++ TP_ARGS(mutex) ++); ++ ++DEFINE_EVENT(mutex_op, cobalt_driver_mutex_release, ++ TP_PROTO(struct rtdm_mutex *mutex), ++ TP_ARGS(mutex) ++); ++ ++DEFINE_EVENT(mutex_op, cobalt_driver_mutex_destroy, ++ TP_PROTO(struct rtdm_mutex *mutex), ++ TP_ARGS(mutex) ++); ++ ++TRACE_EVENT(cobalt_driver_mutex_wait, ++ TP_PROTO(struct rtdm_mutex *mutex, struct xnthread *task), ++ TP_ARGS(mutex, task), ++ ++ TP_STRUCT__entry( ++ __field(struct xnthread *, task) ++ __string(task_name, task->name) ++ __field(struct rtdm_mutex *, mutex) ++ ), ++ ++ TP_fast_assign( ++ __entry->task = task; ++ __assign_str(task_name, task->name); ++ __entry->mutex = mutex; ++ ), ++ ++ TP_printk("mutex=%p task=%p(%s)", ++ __entry->mutex, __entry->task, __get_str(task_name)) ++); ++ ++#endif /* _TRACE_COBALT_RTDM_H */ ++ ++/* This part must be outside protection */ ++#undef TRACE_INCLUDE_PATH ++#undef TRACE_INCLUDE_FILE ++#define TRACE_INCLUDE_FILE cobalt-rtdm ++#include +--- linux/include/trace/events/cobalt-core.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/include/trace/events/cobalt-core.h 2021-04-07 16:01:25.897636066 +0800 +@@ -0,0 +1,777 @@ ++/* ++ * Copyright (C) 2014 Jan Kiszka . ++ * Copyright (C) 2014 Philippe Gerum . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#undef TRACE_SYSTEM ++#define TRACE_SYSTEM cobalt_core ++ ++#if !defined(_TRACE_COBALT_CORE_H) || defined(TRACE_HEADER_MULTI_READ) ++#define _TRACE_COBALT_CORE_H ++ ++#include ++ ++DECLARE_EVENT_CLASS(thread_event, ++ TP_PROTO(struct xnthread *thread), ++ TP_ARGS(thread), ++ ++ TP_STRUCT__entry( ++ __field(pid_t, pid) ++ __field(unsigned long, state) ++ __field(unsigned long, info) ++ ), ++ ++ TP_fast_assign( ++ __entry->state = thread->state; ++ __entry->info = thread->info; ++ __entry->pid = xnthread_host_pid(thread); ++ ), ++ ++ TP_printk("pid=%d state=0x%lx info=0x%lx", ++ __entry->pid, __entry->state, __entry->info) ++); ++ ++DECLARE_EVENT_CLASS(curr_thread_event, ++ TP_PROTO(struct xnthread *thread), ++ TP_ARGS(thread), ++ ++ TP_STRUCT__entry( ++ __field(struct xnthread *, thread) ++ __field(unsigned long, state) ++ __field(unsigned long, info) ++ ), ++ ++ TP_fast_assign( ++ __entry->state = thread->state; ++ __entry->info = thread->info; ++ ), ++ ++ TP_printk("state=0x%lx info=0x%lx", ++ __entry->state, __entry->info) ++); ++ ++DECLARE_EVENT_CLASS(synch_wait_event, ++ TP_PROTO(struct xnsynch *synch), ++ TP_ARGS(synch), ++ ++ TP_STRUCT__entry( ++ __field(struct xnsynch *, synch) ++ ), ++ ++ TP_fast_assign( ++ __entry->synch = synch; ++ ), ++ ++ TP_printk("synch=%p", __entry->synch) ++); ++ ++DECLARE_EVENT_CLASS(synch_post_event, ++ TP_PROTO(struct xnsynch *synch), ++ TP_ARGS(synch), ++ ++ TP_STRUCT__entry( ++ __field(struct xnsynch *, synch) ++ ), ++ ++ TP_fast_assign( ++ __entry->synch = synch; ++ ), ++ ++ TP_printk("synch=%p", __entry->synch) ++); ++ ++DECLARE_EVENT_CLASS(irq_event, ++ TP_PROTO(unsigned int irq), ++ TP_ARGS(irq), ++ ++ TP_STRUCT__entry( ++ __field(unsigned int, irq) ++ ), ++ ++ TP_fast_assign( ++ __entry->irq = irq; ++ ), ++ ++ TP_printk("irq=%u", __entry->irq) ++); ++ ++DECLARE_EVENT_CLASS(clock_event, ++ TP_PROTO(unsigned int irq), ++ TP_ARGS(irq), ++ ++ TP_STRUCT__entry( ++ __field(unsigned int, irq) ++ ), ++ ++ TP_fast_assign( ++ __entry->irq = irq; ++ ), ++ ++ TP_printk("clock_irq=%u", __entry->irq) ++); ++ ++DECLARE_EVENT_CLASS(timer_event, ++ TP_PROTO(struct xntimer *timer), ++ TP_ARGS(timer), ++ ++ TP_STRUCT__entry( ++ __field(struct xntimer *, timer) ++ ), ++ ++ TP_fast_assign( ++ __entry->timer = timer; ++ ), ++ ++ TP_printk("timer=%p", __entry->timer) ++); ++ ++TRACE_EVENT(cobalt_schedule, ++ TP_PROTO(struct xnsched *sched), ++ TP_ARGS(sched), ++ ++ TP_STRUCT__entry( ++ __field(unsigned long, status) ++ ), ++ ++ TP_fast_assign( ++ __entry->status = sched->status; ++ ), ++ ++ TP_printk("status=0x%lx", __entry->status) ++); ++ ++TRACE_EVENT(cobalt_schedule_remote, ++ TP_PROTO(struct xnsched *sched), ++ TP_ARGS(sched), ++ ++ TP_STRUCT__entry( ++ __field(unsigned long, status) ++ ), ++ ++ TP_fast_assign( ++ __entry->status = sched->status; ++ ), ++ ++ TP_printk("status=0x%lx", __entry->status) ++); ++ ++TRACE_EVENT(cobalt_switch_context, ++ TP_PROTO(struct xnthread *prev, struct xnthread *next), ++ TP_ARGS(prev, next), ++ ++ TP_STRUCT__entry( ++ __field(struct xnthread *, prev) ++ __string(prev_name, prev->name) ++ __field(pid_t, prev_pid) ++ __field(int, prev_prio) ++ __field(unsigned long, prev_state) ++ __field(struct xnthread *, next) ++ __string(next_name, next->name) ++ __field(pid_t, next_pid) ++ __field(int, next_prio) ++ ), ++ ++ TP_fast_assign( ++ __entry->prev = prev; ++ __assign_str(prev_name, prev->name); ++ __entry->prev_pid = xnthread_host_pid(prev); ++ __entry->prev_prio = xnthread_current_priority(prev); ++ __entry->prev_state = prev->state; ++ __entry->next = next; ++ __assign_str(next_name, next->name); ++ __entry->next_pid = xnthread_host_pid(next); ++ __entry->next_prio = xnthread_current_priority(next); ++ ), ++ ++ TP_printk("prev_name=%s prev_pid=%d prev_prio=%d prev_state=0x%lx ==> next_name=%s next_pid=%d next_prio=%d", ++ __get_str(prev_name), __entry->prev_pid, ++ __entry->prev_prio, __entry->prev_state, ++ __get_str(next_name), __entry->next_pid, __entry->next_prio) ++); ++ ++#ifdef CONFIG_XENO_OPT_SCHED_QUOTA ++ ++TRACE_EVENT(cobalt_schedquota_refill, ++ TP_PROTO(int dummy), ++ TP_ARGS(dummy), ++ ++ TP_STRUCT__entry( ++ __field(int, dummy) ++ ), ++ ++ TP_fast_assign( ++ (void)dummy; ++ ), ++ ++ TP_printk("%s", "") ++); ++ ++DECLARE_EVENT_CLASS(schedquota_group_event, ++ TP_PROTO(struct xnsched_quota_group *tg), ++ TP_ARGS(tg), ++ ++ TP_STRUCT__entry( ++ __field(int, tgid) ++ ), ++ ++ TP_fast_assign( ++ __entry->tgid = tg->tgid; ++ ), ++ ++ TP_printk("tgid=%d", ++ __entry->tgid) ++); ++ ++DEFINE_EVENT(schedquota_group_event, cobalt_schedquota_create_group, ++ TP_PROTO(struct xnsched_quota_group *tg), ++ TP_ARGS(tg) ++); ++ ++DEFINE_EVENT(schedquota_group_event, cobalt_schedquota_destroy_group, ++ TP_PROTO(struct xnsched_quota_group *tg), ++ TP_ARGS(tg) ++); ++ ++TRACE_EVENT(cobalt_schedquota_set_limit, ++ TP_PROTO(struct xnsched_quota_group *tg, ++ int percent, ++ int peak_percent), ++ TP_ARGS(tg, percent, peak_percent), ++ ++ TP_STRUCT__entry( ++ __field(int, tgid) ++ __field(int, percent) ++ __field(int, peak_percent) ++ ), ++ ++ TP_fast_assign( ++ __entry->tgid = tg->tgid; ++ __entry->percent = percent; ++ __entry->peak_percent = peak_percent; ++ ), ++ ++ TP_printk("tgid=%d percent=%d peak_percent=%d", ++ __entry->tgid, __entry->percent, __entry->peak_percent) ++); ++ ++DECLARE_EVENT_CLASS(schedquota_thread_event, ++ TP_PROTO(struct xnsched_quota_group *tg, ++ struct xnthread *thread), ++ TP_ARGS(tg, thread), ++ ++ TP_STRUCT__entry( ++ __field(int, tgid) ++ __field(struct xnthread *, thread) ++ __field(pid_t, pid) ++ ), ++ ++ TP_fast_assign( ++ __entry->tgid = tg->tgid; ++ __entry->thread = thread; ++ __entry->pid = xnthread_host_pid(thread); ++ ), ++ ++ TP_printk("tgid=%d thread=%p pid=%d", ++ __entry->tgid, __entry->thread, __entry->pid) ++); ++ ++DEFINE_EVENT(schedquota_thread_event, cobalt_schedquota_add_thread, ++ TP_PROTO(struct xnsched_quota_group *tg, ++ struct xnthread *thread), ++ TP_ARGS(tg, thread) ++); ++ ++DEFINE_EVENT(schedquota_thread_event, cobalt_schedquota_remove_thread, ++ TP_PROTO(struct xnsched_quota_group *tg, ++ struct xnthread *thread), ++ TP_ARGS(tg, thread) ++); ++ ++#endif /* CONFIG_XENO_OPT_SCHED_QUOTA */ ++ ++TRACE_EVENT(cobalt_thread_init, ++ TP_PROTO(struct xnthread *thread, ++ const struct xnthread_init_attr *attr, ++ struct xnsched_class *sched_class), ++ TP_ARGS(thread, attr, sched_class), ++ ++ TP_STRUCT__entry( ++ __field(struct xnthread *, thread) ++ __string(thread_name, thread->name) ++ __string(class_name, sched_class->name) ++ __field(unsigned long, flags) ++ __field(int, cprio) ++ ), ++ ++ TP_fast_assign( ++ __entry->thread = thread; ++ __assign_str(thread_name, thread->name); ++ __entry->flags = attr->flags; ++ __assign_str(class_name, sched_class->name); ++ __entry->cprio = thread->cprio; ++ ), ++ ++ TP_printk("thread=%p name=%s flags=0x%lx class=%s prio=%d", ++ __entry->thread, __get_str(thread_name), __entry->flags, ++ __get_str(class_name), __entry->cprio) ++); ++ ++TRACE_EVENT(cobalt_thread_suspend, ++ TP_PROTO(struct xnthread *thread, unsigned long mask, xnticks_t timeout, ++ xntmode_t timeout_mode, struct xnsynch *wchan), ++ TP_ARGS(thread, mask, timeout, timeout_mode, wchan), ++ ++ TP_STRUCT__entry( ++ __field(pid_t, pid) ++ __field(unsigned long, mask) ++ __field(xnticks_t, timeout) ++ __field(xntmode_t, timeout_mode) ++ __field(struct xnsynch *, wchan) ++ ), ++ ++ TP_fast_assign( ++ __entry->pid = xnthread_host_pid(thread); ++ __entry->mask = mask; ++ __entry->timeout = timeout; ++ __entry->timeout_mode = timeout_mode; ++ __entry->wchan = wchan; ++ ), ++ ++ TP_printk("pid=%d mask=0x%lx timeout=%Lu timeout_mode=%d wchan=%p", ++ __entry->pid, __entry->mask, ++ __entry->timeout, __entry->timeout_mode, __entry->wchan) ++); ++ ++TRACE_EVENT(cobalt_thread_resume, ++ TP_PROTO(struct xnthread *thread, unsigned long mask), ++ TP_ARGS(thread, mask), ++ ++ TP_STRUCT__entry( ++ __string(name, thread->name) ++ __field(pid_t, pid) ++ __field(unsigned long, mask) ++ ), ++ ++ TP_fast_assign( ++ __assign_str(name, thread->name); ++ __entry->pid = xnthread_host_pid(thread); ++ __entry->mask = mask; ++ ), ++ ++ TP_printk("name=%s pid=%d mask=0x%lx", ++ __get_str(name), __entry->pid, __entry->mask) ++); ++ ++TRACE_EVENT(cobalt_thread_fault, ++ TP_PROTO(struct ipipe_trap_data *td), ++ TP_ARGS(td), ++ ++ TP_STRUCT__entry( ++ __field(void *, ip) ++ __field(unsigned int, type) ++ ), ++ ++ TP_fast_assign( ++ __entry->ip = (void *)xnarch_fault_pc(td); ++ __entry->type = xnarch_fault_trap(td); ++ ), ++ ++ TP_printk("ip=%p type=%x", ++ __entry->ip, __entry->type) ++); ++ ++TRACE_EVENT(cobalt_thread_set_current_prio, ++ TP_PROTO(struct xnthread *thread), ++ TP_ARGS(thread), ++ ++ TP_STRUCT__entry( ++ __field(struct xnthread *, thread) ++ __field(pid_t, pid) ++ __field(int, cprio) ++ ), ++ ++ TP_fast_assign( ++ __entry->thread = thread; ++ __entry->pid = xnthread_host_pid(thread); ++ __entry->cprio = xnthread_current_priority(thread); ++ ), ++ ++ TP_printk("thread=%p pid=%d prio=%d", ++ __entry->thread, __entry->pid, __entry->cprio) ++); ++ ++DEFINE_EVENT(thread_event, cobalt_thread_start, ++ TP_PROTO(struct xnthread *thread), ++ TP_ARGS(thread) ++); ++ ++DEFINE_EVENT(thread_event, cobalt_thread_cancel, ++ TP_PROTO(struct xnthread *thread), ++ TP_ARGS(thread) ++); ++ ++DEFINE_EVENT(thread_event, cobalt_thread_join, ++ TP_PROTO(struct xnthread *thread), ++ TP_ARGS(thread) ++); ++ ++DEFINE_EVENT(thread_event, cobalt_thread_unblock, ++ TP_PROTO(struct xnthread *thread), ++ TP_ARGS(thread) ++); ++ ++DEFINE_EVENT(curr_thread_event, cobalt_thread_wait_period, ++ TP_PROTO(struct xnthread *thread), ++ TP_ARGS(thread) ++); ++ ++DEFINE_EVENT(curr_thread_event, cobalt_thread_missed_period, ++ TP_PROTO(struct xnthread *thread), ++ TP_ARGS(thread) ++); ++ ++DEFINE_EVENT(curr_thread_event, cobalt_thread_set_mode, ++ TP_PROTO(struct xnthread *thread), ++ TP_ARGS(thread) ++); ++ ++TRACE_EVENT(cobalt_thread_migrate, ++ TP_PROTO(unsigned int cpu), ++ TP_ARGS(cpu), ++ ++ TP_STRUCT__entry( ++ __field(unsigned int, cpu) ++ ), ++ ++ TP_fast_assign( ++ __entry->cpu = cpu; ++ ), ++ ++ TP_printk("cpu=%u", __entry->cpu) ++); ++ ++TRACE_EVENT(cobalt_thread_migrate_passive, ++ TP_PROTO(struct xnthread *thread, unsigned int cpu), ++ TP_ARGS(thread, cpu), ++ ++ TP_STRUCT__entry( ++ __field(struct xnthread *, thread) ++ __field(pid_t, pid) ++ __field(unsigned int, cpu) ++ ), ++ ++ TP_fast_assign( ++ __entry->thread = thread; ++ __entry->pid = xnthread_host_pid(thread); ++ __entry->cpu = cpu; ++ ), ++ ++ TP_printk("thread=%p pid=%d cpu=%u", ++ __entry->thread, __entry->pid, __entry->cpu) ++); ++ ++DEFINE_EVENT(curr_thread_event, cobalt_shadow_gohard, ++ TP_PROTO(struct xnthread *thread), ++ TP_ARGS(thread) ++); ++ ++DEFINE_EVENT(curr_thread_event, cobalt_watchdog_signal, ++ TP_PROTO(struct xnthread *thread), ++ TP_ARGS(thread) ++); ++ ++DEFINE_EVENT(curr_thread_event, cobalt_shadow_hardened, ++ TP_PROTO(struct xnthread *thread), ++ TP_ARGS(thread) ++); ++ ++#define cobalt_print_relax_reason(reason) \ ++ __print_symbolic(reason, \ ++ { SIGDEBUG_UNDEFINED, "undefined" }, \ ++ { SIGDEBUG_MIGRATE_SIGNAL, "signal" }, \ ++ { SIGDEBUG_MIGRATE_SYSCALL, "syscall" }, \ ++ { SIGDEBUG_MIGRATE_FAULT, "fault" }) ++ ++TRACE_EVENT(cobalt_shadow_gorelax, ++ TP_PROTO(int reason), ++ TP_ARGS(reason), ++ ++ TP_STRUCT__entry( ++ __field(int, reason) ++ ), ++ ++ TP_fast_assign( ++ __entry->reason = reason; ++ ), ++ ++ TP_printk("reason=%s", cobalt_print_relax_reason(__entry->reason)) ++); ++ ++DEFINE_EVENT(curr_thread_event, cobalt_shadow_relaxed, ++ TP_PROTO(struct xnthread *thread), ++ TP_ARGS(thread) ++); ++ ++DEFINE_EVENT(curr_thread_event, cobalt_shadow_entry, ++ TP_PROTO(struct xnthread *thread), ++ TP_ARGS(thread) ++); ++ ++TRACE_EVENT(cobalt_shadow_map, ++ TP_PROTO(struct xnthread *thread), ++ TP_ARGS(thread), ++ ++ TP_STRUCT__entry( ++ __field(struct xnthread *, thread) ++ __field(pid_t, pid) ++ __field(int, prio) ++ ), ++ ++ TP_fast_assign( ++ __entry->thread = thread; ++ __entry->pid = xnthread_host_pid(thread); ++ __entry->prio = xnthread_base_priority(thread); ++ ), ++ ++ TP_printk("thread=%p pid=%d prio=%d", ++ __entry->thread, __entry->pid, __entry->prio) ++); ++ ++DEFINE_EVENT(curr_thread_event, cobalt_shadow_unmap, ++ TP_PROTO(struct xnthread *thread), ++ TP_ARGS(thread) ++); ++ ++TRACE_EVENT(cobalt_lostage_request, ++ TP_PROTO(const char *type, struct task_struct *task), ++ TP_ARGS(type, task), ++ ++ TP_STRUCT__entry( ++ __field(pid_t, pid) ++ __array(char, comm, TASK_COMM_LEN) ++ __field(const char *, type) ++ ), ++ ++ TP_fast_assign( ++ __entry->type = type; ++ __entry->pid = task_pid_nr(task); ++ memcpy(__entry->comm, task->comm, TASK_COMM_LEN); ++ ), ++ ++ TP_printk("request=%s pid=%d comm=%s", ++ __entry->type, __entry->pid, __entry->comm) ++); ++ ++TRACE_EVENT(cobalt_lostage_wakeup, ++ TP_PROTO(struct task_struct *task), ++ TP_ARGS(task), ++ ++ TP_STRUCT__entry( ++ __field(pid_t, pid) ++ __array(char, comm, TASK_COMM_LEN) ++ ), ++ ++ TP_fast_assign( ++ __entry->pid = task_pid_nr(task); ++ memcpy(__entry->comm, task->comm, TASK_COMM_LEN); ++ ), ++ ++ TP_printk("pid=%d comm=%s", ++ __entry->pid, __entry->comm) ++); ++ ++TRACE_EVENT(cobalt_lostage_signal, ++ TP_PROTO(struct task_struct *task, int sig), ++ TP_ARGS(task, sig), ++ ++ TP_STRUCT__entry( ++ __field(pid_t, pid) ++ __array(char, comm, TASK_COMM_LEN) ++ __field(int, sig) ++ ), ++ ++ TP_fast_assign( ++ __entry->pid = task_pid_nr(task); ++ __entry->sig = sig; ++ memcpy(__entry->comm, task->comm, TASK_COMM_LEN); ++ ), ++ ++ TP_printk("pid=%d comm=%s sig=%d", ++ __entry->pid, __entry->comm, __entry->sig) ++); ++ ++DEFINE_EVENT(irq_event, cobalt_irq_entry, ++ TP_PROTO(unsigned int irq), ++ TP_ARGS(irq) ++); ++ ++DEFINE_EVENT(irq_event, cobalt_irq_exit, ++ TP_PROTO(unsigned int irq), ++ TP_ARGS(irq) ++); ++ ++DEFINE_EVENT(irq_event, cobalt_irq_attach, ++ TP_PROTO(unsigned int irq), ++ TP_ARGS(irq) ++); ++ ++DEFINE_EVENT(irq_event, cobalt_irq_detach, ++ TP_PROTO(unsigned int irq), ++ TP_ARGS(irq) ++); ++ ++DEFINE_EVENT(irq_event, cobalt_irq_enable, ++ TP_PROTO(unsigned int irq), ++ TP_ARGS(irq) ++); ++ ++DEFINE_EVENT(irq_event, cobalt_irq_disable, ++ TP_PROTO(unsigned int irq), ++ TP_ARGS(irq) ++); ++ ++DEFINE_EVENT(clock_event, cobalt_clock_entry, ++ TP_PROTO(unsigned int irq), ++ TP_ARGS(irq) ++); ++ ++DEFINE_EVENT(clock_event, cobalt_clock_exit, ++ TP_PROTO(unsigned int irq), ++ TP_ARGS(irq) ++); ++ ++DEFINE_EVENT(timer_event, cobalt_timer_stop, ++ TP_PROTO(struct xntimer *timer), ++ TP_ARGS(timer) ++); ++ ++DEFINE_EVENT(timer_event, cobalt_timer_expire, ++ TP_PROTO(struct xntimer *timer), ++ TP_ARGS(timer) ++); ++ ++#define cobalt_print_timer_mode(mode) \ ++ __print_symbolic(mode, \ ++ { XN_RELATIVE, "rel" }, \ ++ { XN_ABSOLUTE, "abs" }, \ ++ { XN_REALTIME, "rt" }) ++ ++TRACE_EVENT(cobalt_timer_start, ++ TP_PROTO(struct xntimer *timer, xnticks_t value, xnticks_t interval, ++ xntmode_t mode), ++ TP_ARGS(timer, value, interval, mode), ++ ++ TP_STRUCT__entry( ++ __field(struct xntimer *, timer) ++#ifdef CONFIG_XENO_OPT_STATS ++ __string(name, timer->name) ++#endif ++ __field(xnticks_t, value) ++ __field(xnticks_t, interval) ++ __field(xntmode_t, mode) ++ ), ++ ++ TP_fast_assign( ++ __entry->timer = timer; ++#ifdef CONFIG_XENO_OPT_STATS ++ __assign_str(name, timer->name); ++#endif ++ __entry->value = value; ++ __entry->interval = interval; ++ __entry->mode = mode; ++ ), ++ ++ TP_printk("timer=%p(%s) value=%Lu interval=%Lu mode=%s", ++ __entry->timer, ++#ifdef CONFIG_XENO_OPT_STATS ++ __get_str(name), ++#else ++ "(anon)", ++#endif ++ __entry->value, __entry->interval, ++ cobalt_print_timer_mode(__entry->mode)) ++); ++ ++#ifdef CONFIG_SMP ++ ++TRACE_EVENT(cobalt_timer_migrate, ++ TP_PROTO(struct xntimer *timer, unsigned int cpu), ++ TP_ARGS(timer, cpu), ++ ++ TP_STRUCT__entry( ++ __field(struct xntimer *, timer) ++ __field(unsigned int, cpu) ++ ), ++ ++ TP_fast_assign( ++ __entry->timer = timer; ++ __entry->cpu = cpu; ++ ), ++ ++ TP_printk("timer=%p cpu=%u", ++ __entry->timer, __entry->cpu) ++); ++ ++#endif /* CONFIG_SMP */ ++ ++DEFINE_EVENT(synch_wait_event, cobalt_synch_sleepon, ++ TP_PROTO(struct xnsynch *synch), ++ TP_ARGS(synch) ++); ++ ++DEFINE_EVENT(synch_wait_event, cobalt_synch_try_acquire, ++ TP_PROTO(struct xnsynch *synch), ++ TP_ARGS(synch) ++); ++ ++DEFINE_EVENT(synch_wait_event, cobalt_synch_acquire, ++ TP_PROTO(struct xnsynch *synch), ++ TP_ARGS(synch) ++); ++ ++DEFINE_EVENT(synch_post_event, cobalt_synch_release, ++ TP_PROTO(struct xnsynch *synch), ++ TP_ARGS(synch) ++); ++ ++DEFINE_EVENT(synch_post_event, cobalt_synch_wakeup, ++ TP_PROTO(struct xnsynch *synch), ++ TP_ARGS(synch) ++); ++ ++DEFINE_EVENT(synch_post_event, cobalt_synch_wakeup_many, ++ TP_PROTO(struct xnsynch *synch), ++ TP_ARGS(synch) ++); ++ ++DEFINE_EVENT(synch_post_event, cobalt_synch_flush, ++ TP_PROTO(struct xnsynch *synch), ++ TP_ARGS(synch) ++); ++ ++DEFINE_EVENT(synch_post_event, cobalt_synch_forget, ++ TP_PROTO(struct xnsynch *synch), ++ TP_ARGS(synch) ++); ++ ++#endif /* _TRACE_COBALT_CORE_H */ ++ ++/* This part must be outside protection */ ++#undef TRACE_INCLUDE_PATH ++#undef TRACE_INCLUDE_FILE ++#define TRACE_INCLUDE_FILE cobalt-core ++#include +--- linux/kernel/xenomai/rtdm/wrappers.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/rtdm/wrappers.c 2021-04-07 16:01:26.185635655 +0800 +@@ -0,0 +1,106 @@ ++/* ++ * Copyright (c) 2013 Hauke Mehrtens ++ * Copyright (c) 2013 Hannes Frederic Sowa ++ * Copyright (c) 2014 Luis R. Rodriguez ++ * ++ * Backport functionality introduced in Linux 3.13. ++ * ++ * Copyright (c) 2014 Hauke Mehrtens ++ * ++ * Backport functionality introduced in Linux 3.14. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++#include ++#include ++#include ++#include ++#include ++ ++/* ++ * Same rules as kernel/cobalt/include/asm-generic/xenomai/wrappers.h ++ * apply to reduce #ifdefery. ++ */ ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(3,14,0) ++#ifdef CONFIG_PCI_MSI ++int pci_enable_msix_range(struct pci_dev *dev, ++ struct msix_entry *entries, ++ int minvec, int maxvec) ++{ ++ int nvec = maxvec; ++ int rc; ++ ++ if (maxvec < minvec) ++ return -ERANGE; ++ ++ do { ++ rc = pci_enable_msix(dev, entries, nvec); ++ if (rc < 0) { ++ return rc; ++ } else if (rc > 0) { ++ if (rc < minvec) ++ return -ENOSPC; ++ nvec = rc; ++ } ++ } while (rc); ++ ++ return nvec; ++} ++EXPORT_SYMBOL(pci_enable_msix_range); ++#endif ++#endif /* < 3.14 */ ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(3,13,0) ++#ifdef CONFIG_HWMON ++struct device* ++hwmon_device_register_with_groups(struct device *dev, const char *name, ++ void *drvdata, ++ const struct attribute_group **groups) ++{ ++ struct device *hwdev; ++ ++ hwdev = hwmon_device_register(dev); ++ hwdev->groups = groups; ++ dev_set_drvdata(hwdev, drvdata); ++ return hwdev; ++} ++ ++static void devm_hwmon_release(struct device *dev, void *res) ++{ ++ struct device *hwdev = *(struct device **)res; ++ ++ hwmon_device_unregister(hwdev); ++} ++ ++struct device * ++devm_hwmon_device_register_with_groups(struct device *dev, const char *name, ++ void *drvdata, ++ const struct attribute_group **groups) ++{ ++ struct device **ptr, *hwdev; ++ ++ if (!dev) ++ return ERR_PTR(-EINVAL); ++ ++ ptr = devres_alloc(devm_hwmon_release, sizeof(*ptr), GFP_KERNEL); ++ if (!ptr) ++ return ERR_PTR(-ENOMEM); ++ ++ hwdev = hwmon_device_register_with_groups(dev, name, drvdata, groups); ++ if (IS_ERR(hwdev)) ++ goto error; ++ ++ *ptr = hwdev; ++ devres_add(dev, ptr); ++ return hwdev; ++ ++error: ++ devres_free(ptr); ++ return hwdev; ++} ++EXPORT_SYMBOL_GPL(devm_hwmon_device_register_with_groups); ++#endif ++#endif /* < 3.13 */ +--- linux/kernel/xenomai/rtdm/Makefile 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/rtdm/Makefile 2021-04-07 16:01:26.180635662 +0800 +@@ -0,0 +1,10 @@ ++ ++obj-$(CONFIG_XENOMAI) += xenomai.o ++ ++xenomai-y := core.o \ ++ device.o \ ++ drvlib.o \ ++ fd.o \ ++ wrappers.o ++ ++ccflags-y += -I$(src)/.. -Ikernel +--- linux/kernel/xenomai/rtdm/drvlib.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/rtdm/drvlib.c 2021-04-07 16:01:26.176635668 +0800 +@@ -0,0 +1,2446 @@ ++/* ++ * Real-Time Driver Model for Xenomai, driver library ++ * ++ * Copyright (C) 2005-2007 Jan Kiszka ++ * Copyright (C) 2005 Joerg Langenberg ++ * Copyright (C) 2008 Gilles Chanteperdrix ++ * Copyright (C) 2014 Philippe Gerum ++ * ++ * Xenomai is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "internal.h" ++#include ++ ++/** ++ * @ingroup rtdm_driver_interface ++ * @defgroup rtdm_clock Clock Services ++ * @{ ++ */ ++ ++#ifdef DOXYGEN_CPP /* Only used for doxygen doc generation */ ++ ++/** ++ * @brief Get system time ++ * ++ * @return The system time in nanoseconds is returned ++ * ++ * @note The resolution of this service depends on the system timer. In ++ * particular, if the system timer is running in periodic mode, the return ++ * value will be limited to multiples of the timer tick period. ++ * ++ * @note The system timer may have to be started to obtain valid results. ++ * Whether this happens automatically (as on Xenomai) or is controlled by the ++ * application depends on the RTDM host environment. ++ * ++ * @coretags{unrestricted} ++ */ ++nanosecs_abs_t rtdm_clock_read(void); ++ ++/** ++ * @brief Get monotonic time ++ * ++ * @return The monotonic time in nanoseconds is returned ++ * ++ * @note The resolution of this service depends on the system timer. In ++ * particular, if the system timer is running in periodic mode, the return ++ * value will be limited to multiples of the timer tick period. ++ * ++ * @note The system timer may have to be started to obtain valid results. ++ * Whether this happens automatically (as on Xenomai) or is controlled by the ++ * application depends on the RTDM host environment. ++ * ++ * @coretags{unrestricted} ++ */ ++nanosecs_abs_t rtdm_clock_read_monotonic(void); ++#endif /* DOXYGEN_CPP */ ++/** @} */ ++ ++/** ++ * @ingroup rtdm_driver_interface ++ * @defgroup rtdm_task Task Services ++ * @{ ++ */ ++ ++/** ++ * @brief Initialise and start a real-time task ++ * ++ * After initialising a task, the task handle remains valid and can be ++ * passed to RTDM services until either rtdm_task_destroy() or ++ * rtdm_task_join() was invoked. ++ * ++ * @param[in,out] task Task handle ++ * @param[in] name Optional task name ++ * @param[in] task_proc Procedure to be executed by the task ++ * @param[in] arg Custom argument passed to @c task_proc() on entry ++ * @param[in] priority Priority of the task, see also ++ * @ref rtdmtaskprio "Task Priority Range" ++ * @param[in] period Period in nanoseconds of a cyclic task, 0 for non-cyclic ++ * mode. Waiting for the first and subsequent periodic events is ++ * done using rtdm_task_wait_period(). ++ * ++ * @return 0 on success, otherwise negative error code ++ * ++ * @coretags{secondary-only, might-switch} ++ */ ++int rtdm_task_init(rtdm_task_t *task, const char *name, ++ rtdm_task_proc_t task_proc, void *arg, ++ int priority, nanosecs_rel_t period) ++{ ++ union xnsched_policy_param param; ++ struct xnthread_start_attr sattr; ++ struct xnthread_init_attr iattr; ++ int err; ++ ++ if (!realtime_core_enabled()) ++ return -ENOSYS; ++ ++ iattr.name = name; ++ iattr.flags = 0; ++ iattr.personality = &xenomai_personality; ++ iattr.affinity = CPU_MASK_ALL; ++ param.rt.prio = priority; ++ ++ err = xnthread_init(task, &iattr, &xnsched_class_rt, ¶m); ++ if (err) ++ return err; ++ ++ /* We need an anonymous registry entry to obtain a handle for fast ++ mutex locking. */ ++ err = xnthread_register(task, ""); ++ if (err) ++ goto cleanup_out; ++ ++ if (period > 0) { ++ err = xnthread_set_periodic(task, XN_INFINITE, ++ XN_RELATIVE, period); ++ if (err) ++ goto cleanup_out; ++ } ++ ++ sattr.mode = 0; ++ sattr.entry = task_proc; ++ sattr.cookie = arg; ++ err = xnthread_start(task, &sattr); ++ if (err) ++ goto cleanup_out; ++ ++ return 0; ++ ++ cleanup_out: ++ xnthread_cancel(task); ++ return err; ++} ++ ++EXPORT_SYMBOL_GPL(rtdm_task_init); ++ ++#ifdef DOXYGEN_CPP /* Only used for doxygen doc generation */ ++/** ++ * @brief Destroy a real-time task ++ * ++ * This call sends a termination request to @a task, then waits for it ++ * to exit. All RTDM task should check for pending termination ++ * requests by calling rtdm_task_should_stop() from their work loop. ++ * ++ * If @a task is current, rtdm_task_destroy() terminates the current ++ * context, and does not return to the caller. ++ * ++ * @param[in,out] task Task handle as returned by rtdm_task_init() ++ * ++ * @note Passing the same task handle to RTDM services after the completion of ++ * this function is not allowed. ++ * ++ * @coretags{secondary-only, might-switch} ++ */ ++void rtdm_task_destroy(rtdm_task_t *task); ++ ++/** ++ * @brief Check for pending termination request ++ * ++ * Check whether a termination request was received by the current ++ * RTDM task. Termination requests are sent by calling ++ * rtdm_task_destroy(). ++ * ++ * @return Non-zero indicates that a termination request is pending, ++ * in which case the caller should wrap up and exit. ++ * ++ * @coretags{rtdm-task, might-switch} ++ */ ++int rtdm_task_should_stop(void); ++ ++/** ++ * @brief Adjust real-time task priority ++ * ++ * @param[in,out] task Task handle as returned by rtdm_task_init() ++ * @param[in] priority New priority of the task, see also ++ * @ref rtdmtaskprio "Task Priority Range" ++ * ++ * @coretags{task-unrestricted, might-switch} ++ */ ++void rtdm_task_set_priority(rtdm_task_t *task, int priority); ++ ++/** ++ * @brief Adjust real-time task period ++ * ++ * @param[in,out] task Task handle as returned by rtdm_task_init(), or ++ * NULL for referring to the current RTDM task or Cobalt thread. ++ * ++ * @param[in] start_date The initial (absolute) date of the first ++ * release point, expressed in nanoseconds. @a task will be delayed ++ * by the first call to rtdm_task_wait_period() until this point is ++ * reached. If @a start_date is zero, the first release point is set ++ * to @a period nanoseconds after the current date. ++ ++ * @param[in] period New period in nanoseconds of a cyclic task, zero ++ * to disable cyclic mode for @a task. ++ * ++ * @coretags{task-unrestricted} ++ */ ++int rtdm_task_set_period(rtdm_task_t *task, nanosecs_abs_t start_date, ++ nanosecs_rel_t period); ++ ++/** ++ * @brief Wait on next real-time task period ++ * ++ * @param[in] overruns_r Address of a long word receiving the count of ++ * overruns if -ETIMEDOUT is returned, or NULL if the caller don't ++ * need that information. ++ * ++ * @return 0 on success, otherwise: ++ * ++ * - -EINVAL is returned if calling task is not in periodic mode. ++ * ++ * - -ETIMEDOUT is returned if a timer overrun occurred, which indicates ++ * that a previous release point has been missed by the calling task. ++ * ++ * @coretags{primary-only, might-switch} ++ */ ++int rtdm_task_wait_period(unsigned long *overruns_r); ++ ++/** ++ * @brief Activate a blocked real-time task ++ * ++ * @return Non-zero is returned if the task was actually unblocked from a ++ * pending wait state, 0 otherwise. ++ * ++ * @coretags{unrestricted, might-switch} ++ */ ++int rtdm_task_unblock(rtdm_task_t *task); ++ ++/** ++ * @brief Get current real-time task ++ * ++ * @return Pointer to task handle ++ * ++ * @coretags{mode-unrestricted} ++ */ ++rtdm_task_t *rtdm_task_current(void); ++ ++/** ++ * @brief Sleep a specified amount of time ++ * ++ * @param[in] delay Delay in nanoseconds, see @ref RTDM_TIMEOUT_xxx for ++ * special values. ++ * ++ * @return 0 on success, otherwise: ++ * ++ * - -EINTR is returned if calling task has been unblock by a signal or ++ * explicitly via rtdm_task_unblock(). ++ * ++ * - -EPERM @e may be returned if an illegal invocation environment is ++ * detected. ++ * ++ * @coretags{primary-only, might-switch} ++ */ ++int rtdm_task_sleep(nanosecs_rel_t delay); ++ ++/** ++ * @brief Sleep until a specified absolute time ++ * ++ * @deprecated Use rtdm_task_sleep_abs instead! ++ * ++ * @param[in] wakeup_time Absolute timeout in nanoseconds ++ * ++ * @return 0 on success, otherwise: ++ * ++ * - -EINTR is returned if calling task has been unblock by a signal or ++ * explicitly via rtdm_task_unblock(). ++ * ++ * - -EPERM @e may be returned if an illegal invocation environment is ++ * detected. ++ * ++ * @coretags{primary-only, might-switch} ++ */ ++int rtdm_task_sleep_until(nanosecs_abs_t wakeup_time); ++ ++/** ++ * @brief Sleep until a specified absolute time ++ * ++ * @param[in] wakeup_time Absolute timeout in nanoseconds ++ * @param[in] mode Selects the timer mode, see RTDM_TIMERMODE_xxx for details ++ * ++ * @return 0 on success, otherwise: ++ * ++ * - -EINTR is returned if calling task has been unblock by a signal or ++ * explicitly via rtdm_task_unblock(). ++ * ++ * - -EPERM @e may be returned if an illegal invocation environment is ++ * detected. ++ * ++ * - -EINVAL is returned if an invalid parameter was passed. ++ * ++ * @coretags{primary-only, might-switch} ++ */ ++int rtdm_task_sleep_abs(nanosecs_abs_t wakeup_time, enum rtdm_timer_mode mode); ++ ++/** ++ * @brief Safe busy waiting ++ * ++ * This service alternates active spinning and sleeping within a wait ++ * loop, until a condition is satisfied. While sleeping, a task is ++ * scheduled out and does not consume any CPU time. ++ * ++ * rtdm_task_busy_wait() is particularly useful for waiting for a ++ * state change reading an I/O register, which usually happens shortly ++ * after the wait starts, without incurring the adverse effects of ++ * long busy waiting if it doesn't. ++ * ++ * @param[in] condition The C expression to be tested for detecting ++ * completion. ++ * @param[in] spin_ns The time to spin on @a condition before ++ * sleeping, expressed as a count of nanoseconds. ++ * @param[in] sleep_ns The time to sleep for before spinning again, ++ * expressed as a count of nanoseconds. ++ * ++ * @return 0 on success if @a condition is satisfied, otherwise: ++ * ++ * - -EINTR is returned if the calling task has been unblocked by a ++ * Linux signal or explicitly via rtdm_task_unblock(). ++ * ++ * - -EPERM may be returned if an illegal invocation environment is ++ * detected. ++ * ++ * @coretags{primary-only, might-switch} ++ */ ++int rtdm_task_busy_wait(bool condition, nanosecs_rel_t spin_ns, ++ nanosecs_rel_t sleep_ns); ++ ++/** ++ * @brief Register wait context ++ * ++ * rtdm_wait_prepare() registers a wait context structure for the ++ * caller, which can be later retrieved by a call to ++ * rtdm_wait_get_context(). This call is normally issued before the ++ * current task blocks on a wait object, waiting for some (producer) ++ * code to wake it up. Arbitrary data can be exchanged between both ++ * sites via the wait context structure, which is allocated by the ++ * waiter (consumer) side. ++ * ++ * @a wc is the address of an anchor object which is commonly embedded ++ * into a larger structure with arbitrary contents, which needs to be ++ * shared between the consumer (waiter) and the producer for ++ * implementing the wait code. ++ * ++ * A typical implementation pattern for the wait side is: ++ * ++ * @code ++ * struct rtdm_waitqueue wq; ++ * struct some_wait_context { ++ * int input_value; ++ * int output_value; ++ * struct rtdm_wait_context wc; ++ * } wait_context; ++ * ++ * wait_context.input_value = 42; ++ * rtdm_wait_prepare(&wait_context); ++ * ret = rtdm_wait_condition(&wq, rtdm_wait_is_completed(&wait_context)); ++ * if (ret) ++ * goto wait_failed; ++ * handle_event(wait_context.output_value); ++ * @endcode ++ * ++ * On the producer side, the implementation would look like: ++ * ++ * @code ++ * struct rtdm_waitqueue wq; ++ * struct some_wait_context { ++ * int input_value; ++ * int output_value; ++ * struct rtdm_wait_context wc; ++ * } *wait_context_ptr; ++ * struct rtdm_wait_context *wc; ++ * rtdm_task_t *task; ++ * ++ * rtdm_for_each_waiter(task, &wq) { ++ * wc = rtdm_wait_get_context(task); ++ * wait_context_ptr = container_of(wc, struct some_wait_context, wc); ++ * wait_context_ptr->output_value = 12; ++ * } ++ * rtdm_waitqueue_broadcast(&wq); ++ * @endcode ++ * ++ * @param wc Wait context to register. ++ */ ++void rtdm_wait_prepare(struct rtdm_wait_context *wc); ++ ++/** ++ * @brief Mark completion for a wait context ++ * ++ * rtdm_complete_wait() marks a wait context as completed, so that ++ * rtdm_wait_is_completed() returns true for such context. ++ * ++ * @param wc Wait context to complete. ++ */ ++void rtdm_wait_complete(struct rtdm_wait_context *wc); ++ ++/** ++ * @brief Test completion of a wait context ++ * ++ * rtdm_wait_is_completed() returns true if rtdm_complete_wait() was ++ * called for @a wc. The completion mark is reset each time ++ * rtdm_wait_prepare() is called for a wait context. ++ * ++ * @param wc Wait context to check for completion. ++ * ++ * @return non-zero/true if rtdm_wait_complete() was called for @a wc, ++ * zero otherwise. ++ */ ++int rtdm_wait_is_completed(struct rtdm_wait_context *wc); ++ ++#endif /* DOXYGEN_CPP */ ++ ++int __rtdm_task_sleep(xnticks_t timeout, xntmode_t mode) ++{ ++ struct xnthread *thread; ++ ++ if (!XENO_ASSERT(COBALT, !xnsched_unblockable_p())) ++ return -EPERM; ++ ++ thread = xnthread_current(); ++ xnthread_suspend(thread, XNDELAY, timeout, mode, NULL); ++ ++ return xnthread_test_info(thread, XNBREAK) ? -EINTR : 0; ++} ++ ++EXPORT_SYMBOL_GPL(__rtdm_task_sleep); ++ ++/** ++ * @brief Wait on a real-time task to terminate ++ * ++ * @param[in,out] task Task handle as returned by rtdm_task_init() ++ * ++ * @note Passing the same task handle to RTDM services after the ++ * completion of this function is not allowed. ++ * ++ * @note This service does not trigger the termination of the targeted ++ * task. The user has to take of this, otherwise rtdm_task_join() ++ * will never return. ++ * ++ * @coretags{mode-unrestricted} ++ */ ++void rtdm_task_join(rtdm_task_t *task) ++{ ++ trace_cobalt_driver_task_join(task); ++ ++ xnthread_join(task, true); ++} ++ ++EXPORT_SYMBOL_GPL(rtdm_task_join); ++ ++/** ++ * @brief Busy-wait a specified amount of time ++ * ++ * This service does not schedule out the caller, but rather spins in ++ * a tight loop, burning CPU cycles until the timeout elapses. ++ * ++ * @param[in] delay Delay in nanoseconds. Note that a zero delay does @b not ++ * have the meaning of @c RTDM_TIMEOUT_INFINITE here. ++ * ++ * @note The caller must not be migratable to different CPUs while executing ++ * this service. Otherwise, the actual delay will be undefined. ++ * ++ * @coretags{unrestricted} ++ */ ++void rtdm_task_busy_sleep(nanosecs_rel_t delay) ++{ ++ xnticks_t wakeup; ++ ++ wakeup = xnclock_read_raw(&nkclock) + ++ xnclock_ns_to_ticks(&nkclock, delay); ++ ++ while ((xnsticks_t)(xnclock_read_raw(&nkclock) - wakeup) < 0) ++ cpu_relax(); ++} ++ ++EXPORT_SYMBOL_GPL(rtdm_task_busy_sleep); ++/** @} */ ++ ++/** ++ * @ingroup rtdm_driver_interface ++ * @defgroup rtdm_timer Timer Services ++ * @{ ++ */ ++ ++/** ++ * @brief Initialise a timer ++ * ++ * @param[in,out] timer Timer handle ++ * @param[in] handler Handler to be called on timer expiry ++ * @param[in] name Optional timer name ++ * ++ * @return 0 on success, otherwise negative error code ++ * ++ * @coretags{task-unrestricted} ++ */ ++int rtdm_timer_init(rtdm_timer_t *timer, rtdm_timer_handler_t handler, ++ const char *name) ++{ ++ if (!realtime_core_enabled()) ++ return -ENOSYS; ++ ++ xntimer_init((timer), &nkclock, handler, NULL, XNTIMER_IGRAVITY); ++ xntimer_set_name((timer), (name)); ++ return 0; ++} ++ ++EXPORT_SYMBOL_GPL(rtdm_timer_init); ++ ++/** ++ * @brief Destroy a timer ++ * ++ * @param[in,out] timer Timer handle as returned by rtdm_timer_init() ++ * ++ * @coretags{task-unrestricted} ++ */ ++void rtdm_timer_destroy(rtdm_timer_t *timer) ++{ ++ spl_t s; ++ ++ xnlock_get_irqsave(&nklock, s); ++ xntimer_destroy(timer); ++ xnlock_put_irqrestore(&nklock, s); ++} ++ ++EXPORT_SYMBOL_GPL(rtdm_timer_destroy); ++ ++/** ++ * @brief Start a timer ++ * ++ * @param[in,out] timer Timer handle as returned by rtdm_timer_init() ++ * @param[in] expiry Firing time of the timer, @c mode defines if relative or ++ * absolute ++ * @param[in] interval Relative reload value, > 0 if the timer shall work in ++ * periodic mode with the specific interval, 0 for one-shot timers ++ * @param[in] mode Defines the operation mode, see @ref RTDM_TIMERMODE_xxx for ++ * possible values ++ * ++ * @return 0 on success, otherwise: ++ * ++ * - -ETIMEDOUT is returned if @c expiry describes an absolute date in ++ * the past. In such an event, the timer is nevertheless armed for the ++ * next shot in the timeline if @a interval is non-zero. ++ * ++ * @coretags{unrestricted} ++ */ ++int rtdm_timer_start(rtdm_timer_t *timer, nanosecs_abs_t expiry, ++ nanosecs_rel_t interval, enum rtdm_timer_mode mode) ++{ ++ spl_t s; ++ int err; ++ ++ xnlock_get_irqsave(&nklock, s); ++ err = xntimer_start(timer, expiry, interval, (xntmode_t)mode); ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return err; ++} ++ ++EXPORT_SYMBOL_GPL(rtdm_timer_start); ++ ++/** ++ * @brief Stop a timer ++ * ++ * @param[in,out] timer Timer handle as returned by rtdm_timer_init() ++ * ++ * @coretags{unrestricted} ++ */ ++void rtdm_timer_stop(rtdm_timer_t *timer) ++{ ++ spl_t s; ++ ++ xnlock_get_irqsave(&nklock, s); ++ xntimer_stop(timer); ++ xnlock_put_irqrestore(&nklock, s); ++} ++ ++EXPORT_SYMBOL_GPL(rtdm_timer_stop); ++ ++#ifdef DOXYGEN_CPP /* Only used for doxygen doc generation */ ++/** ++ * @brief Start a timer from inside a timer handler ++ * ++ * @param[in,out] timer Timer handle as returned by rtdm_timer_init() ++ * @param[in] expiry Firing time of the timer, @c mode defines if relative or ++ * absolute ++ * @param[in] interval Relative reload value, > 0 if the timer shall work in ++ * periodic mode with the specific interval, 0 for one-shot timers ++ * @param[in] mode Defines the operation mode, see @ref RTDM_TIMERMODE_xxx for ++ * possible values ++ * ++ * @return 0 on success, otherwise: ++ * ++ * - -ETIMEDOUT is returned if @c expiry describes an absolute date in the ++ * past. ++ * ++ * @coretags{coreirq-only} ++ */ ++int rtdm_timer_start_in_handler(rtdm_timer_t *timer, nanosecs_abs_t expiry, ++ nanosecs_rel_t interval, ++ enum rtdm_timer_mode mode); ++ ++/** ++ * @brief Stop a timer from inside a timer handler ++ * ++ * @param[in,out] timer Timer handle as returned by rtdm_timer_init() ++ * ++ * @coretags{coreirq-only} ++ */ ++void rtdm_timer_stop_in_handler(rtdm_timer_t *timer); ++#endif /* DOXYGEN_CPP */ ++/** @} */ ++ ++/* --- IPC cleanup helper --- */ ++ ++#define RTDM_SYNCH_DELETED XNSYNCH_SPARE0 ++ ++void __rtdm_synch_flush(struct xnsynch *synch, unsigned long reason) ++{ ++ spl_t s; ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ if (reason == XNRMID) ++ xnsynch_set_status(synch, RTDM_SYNCH_DELETED); ++ ++ if (likely(xnsynch_flush(synch, reason) == XNSYNCH_RESCHED)) ++ xnsched_run(); ++ ++ xnlock_put_irqrestore(&nklock, s); ++} ++ ++EXPORT_SYMBOL_GPL(__rtdm_synch_flush); ++ ++/** ++ * @ingroup rtdm_driver_interface ++ * @defgroup rtdm_sync Synchronisation Services ++ * @{ ++ */ ++ ++/*! ++ * @name Timeout Sequence Management ++ * @{ ++ */ ++ ++/** ++ * @brief Initialise a timeout sequence ++ * ++ * This service initialises a timeout sequence handle according to the given ++ * timeout value. Timeout sequences allow to maintain a continuous @a timeout ++ * across multiple calls of blocking synchronisation services. A typical ++ * application scenario is given below. ++ * ++ * @param[in,out] timeout_seq Timeout sequence handle ++ * @param[in] timeout Relative timeout in nanoseconds, see ++ * @ref RTDM_TIMEOUT_xxx for special values ++ * ++ * Application Scenario: ++ * @code ++int device_service_routine(...) ++{ ++ rtdm_toseq_t timeout_seq; ++ ... ++ ++ rtdm_toseq_init(&timeout_seq, timeout); ++ ... ++ while (received < requested) { ++ ret = rtdm_event_timedwait(&data_available, timeout, &timeout_seq); ++ if (ret < 0) // including -ETIMEDOUT ++ break; ++ ++ // receive some data ++ ... ++ } ++ ... ++} ++ * @endcode ++ * Using a timeout sequence in such a scenario avoids that the user-provided ++ * relative @c timeout is restarted on every call to rtdm_event_timedwait(), ++ * potentially causing an overall delay that is larger than specified by ++ * @c timeout. Moreover, all functions supporting timeout sequences also ++ * interpret special timeout values (infinite and non-blocking), ++ * disburdening the driver developer from handling them separately. ++ * ++ * @coretags{task-unrestricted} ++ */ ++void rtdm_toseq_init(rtdm_toseq_t *timeout_seq, nanosecs_rel_t timeout) ++{ ++ XENO_WARN_ON(COBALT, xnsched_unblockable_p()); /* only warn here */ ++ ++ *timeout_seq = xnclock_read_monotonic(&nkclock) + timeout; ++} ++ ++EXPORT_SYMBOL_GPL(rtdm_toseq_init); ++ ++/** @} */ ++ ++/** ++ * @ingroup rtdm_sync ++ * @defgroup rtdm_sync_event Event Services ++ * @{ ++ */ ++ ++/** ++ * @brief Initialise an event ++ * ++ * @param[in,out] event Event handle ++ * @param[in] pending Non-zero if event shall be initialised as set, 0 otherwise ++ * ++ * @coretags{task-unrestricted} ++ */ ++void rtdm_event_init(rtdm_event_t *event, unsigned long pending) ++{ ++ spl_t s; ++ ++ trace_cobalt_driver_event_init(event, pending); ++ ++ /* Make atomic for re-initialisation support */ ++ xnlock_get_irqsave(&nklock, s); ++ ++ xnsynch_init(&event->synch_base, XNSYNCH_PRIO, NULL); ++ if (pending) ++ xnsynch_set_status(&event->synch_base, RTDM_EVENT_PENDING); ++ xnselect_init(&event->select_block); ++ ++ xnlock_put_irqrestore(&nklock, s); ++} ++ ++EXPORT_SYMBOL_GPL(rtdm_event_init); ++ ++/** ++ * @brief Destroy an event ++ * ++ * @param[in,out] event Event handle as returned by rtdm_event_init() ++ * ++ * @coretags{task-unrestricted, might-switch} ++ */ ++void rtdm_event_destroy(rtdm_event_t *event) ++{ ++ trace_cobalt_driver_event_destroy(event); ++ if (realtime_core_enabled()) { ++ __rtdm_synch_flush(&event->synch_base, XNRMID); ++ xnselect_destroy(&event->select_block); ++ } ++} ++EXPORT_SYMBOL_GPL(rtdm_event_destroy); ++ ++/** ++ * @brief Signal an event occurrence to currently listening waiters ++ * ++ * This function wakes up all current waiters of the given event, but it does ++ * not change the event state. Subsequently callers of rtdm_event_wait() or ++ * rtdm_event_timedwait() will therefore be blocked first. ++ * ++ * @param[in,out] event Event handle as returned by rtdm_event_init() ++ * ++ * @coretags{unrestricted, might-switch} ++ */ ++void rtdm_event_pulse(rtdm_event_t *event) ++{ ++ trace_cobalt_driver_event_pulse(event); ++ __rtdm_synch_flush(&event->synch_base, 0); ++} ++EXPORT_SYMBOL_GPL(rtdm_event_pulse); ++ ++/** ++ * @brief Signal an event occurrence ++ * ++ * This function sets the given event and wakes up all current waiters. If no ++ * waiter is presently registered, the next call to rtdm_event_wait() or ++ * rtdm_event_timedwait() will return immediately. ++ * ++ * @param[in,out] event Event handle as returned by rtdm_event_init() ++ * ++ * @coretags{unrestricted, might-switch} ++ */ ++void rtdm_event_signal(rtdm_event_t *event) ++{ ++ int resched = 0; ++ spl_t s; ++ ++ trace_cobalt_driver_event_signal(event); ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ xnsynch_set_status(&event->synch_base, RTDM_EVENT_PENDING); ++ if (xnsynch_flush(&event->synch_base, 0)) ++ resched = 1; ++ if (xnselect_signal(&event->select_block, 1)) ++ resched = 1; ++ if (resched) ++ xnsched_run(); ++ ++ xnlock_put_irqrestore(&nklock, s); ++} ++ ++EXPORT_SYMBOL_GPL(rtdm_event_signal); ++ ++/** ++ * @brief Wait on event occurrence ++ * ++ * This is the light-weight version of rtdm_event_timedwait(), implying an ++ * infinite timeout. ++ * ++ * @param[in,out] event Event handle as returned by rtdm_event_init() ++ * ++ * @return 0 on success, otherwise: ++ * ++ * - -EINTR is returned if calling task has been unblock by a signal or ++ * explicitly via rtdm_task_unblock(). ++ * ++ * - -EIDRM is returned if @a event has been destroyed. ++ * ++ * - -EPERM @e may be returned if an illegal invocation environment is ++ * detected. ++ * ++ * @coretags{primary-only, might-switch} ++ */ ++int rtdm_event_wait(rtdm_event_t *event) ++{ ++ return rtdm_event_timedwait(event, 0, NULL); ++} ++ ++EXPORT_SYMBOL_GPL(rtdm_event_wait); ++ ++/** ++ * @brief Wait on event occurrence with timeout ++ * ++ * This function waits or tests for the occurence of the given event, taking ++ * the provided timeout into account. On successful return, the event is ++ * reset. ++ * ++ * @param[in,out] event Event handle as returned by rtdm_event_init() ++ * @param[in] timeout Relative timeout in nanoseconds, see ++ * @ref RTDM_TIMEOUT_xxx for special values ++ * @param[in,out] timeout_seq Handle of a timeout sequence as returned by ++ * rtdm_toseq_init() or NULL ++ * ++ * @return 0 on success, otherwise: ++ * ++ * - -ETIMEDOUT is returned if the if the request has not been satisfied ++ * within the specified amount of time. ++ * ++ * - -EINTR is returned if calling task has been unblock by a signal or ++ * explicitly via rtdm_task_unblock(). ++ * ++ * - -EIDRM is returned if @a event has been destroyed. ++ * ++ * - -EPERM @e may be returned if an illegal invocation environment is ++ * detected. ++ * ++ * - -EWOULDBLOCK is returned if a negative @a timeout (i.e., non-blocking ++ * operation) has been specified. ++ * ++ * @coretags{primary-timed, might-switch} ++ */ ++int rtdm_event_timedwait(rtdm_event_t *event, nanosecs_rel_t timeout, ++ rtdm_toseq_t *timeout_seq) ++{ ++ struct xnthread *thread; ++ int err = 0, ret; ++ spl_t s; ++ ++ if (!XENO_ASSERT(COBALT, timeout < 0 || !xnsched_unblockable_p())) ++ return -EPERM; ++ ++ trace_cobalt_driver_event_wait(event, xnthread_current()); ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ if (unlikely(event->synch_base.status & RTDM_SYNCH_DELETED)) ++ err = -EIDRM; ++ else if (likely(event->synch_base.status & RTDM_EVENT_PENDING)) { ++ xnsynch_clear_status(&event->synch_base, RTDM_EVENT_PENDING); ++ xnselect_signal(&event->select_block, 0); ++ } else { ++ /* non-blocking mode */ ++ if (timeout < 0) { ++ err = -EWOULDBLOCK; ++ goto unlock_out; ++ } ++ ++ thread = xnthread_current(); ++ ++ if (timeout_seq && (timeout > 0)) ++ /* timeout sequence */ ++ ret = xnsynch_sleep_on(&event->synch_base, *timeout_seq, ++ XN_ABSOLUTE); ++ else ++ /* infinite or relative timeout */ ++ ret = xnsynch_sleep_on(&event->synch_base, timeout, XN_RELATIVE); ++ ++ if (likely(ret == 0)) { ++ xnsynch_clear_status(&event->synch_base, ++ RTDM_EVENT_PENDING); ++ xnselect_signal(&event->select_block, 0); ++ } else if (ret & XNTIMEO) ++ err = -ETIMEDOUT; ++ else if (ret & XNRMID) ++ err = -EIDRM; ++ else /* XNBREAK */ ++ err = -EINTR; ++ } ++ ++unlock_out: ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return err; ++} ++ ++EXPORT_SYMBOL_GPL(rtdm_event_timedwait); ++ ++/** ++ * @brief Clear event state ++ * ++ * @param[in,out] event Event handle as returned by rtdm_event_init() ++ * ++ * @coretags{unrestricted} ++ */ ++void rtdm_event_clear(rtdm_event_t *event) ++{ ++ spl_t s; ++ ++ trace_cobalt_driver_event_clear(event); ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ xnsynch_clear_status(&event->synch_base, RTDM_EVENT_PENDING); ++ xnselect_signal(&event->select_block, 0); ++ ++ xnlock_put_irqrestore(&nklock, s); ++} ++ ++EXPORT_SYMBOL_GPL(rtdm_event_clear); ++ ++/** ++ * @brief Bind a selector to an event ++ * ++ * This functions binds the given selector to an event so that the former is ++ * notified when the event state changes. Typically the select binding handler ++ * will invoke this service. ++ * ++ * @param[in,out] event Event handle as returned by rtdm_event_init() ++ * @param[in,out] selector Selector as passed to the select binding handler ++ * @param[in] type Type of the bound event as passed to the select binding handler ++ * @param[in] fd_index File descriptor index as passed to the select binding ++ * handler ++ * ++ * @return 0 on success, otherwise: ++ * ++ * - -ENOMEM is returned if there is insufficient memory to establish the ++ * dynamic binding. ++ * ++ * - -EINVAL is returned if @a type or @a fd_index are invalid. ++ * ++ * @coretags{task-unrestricted} ++ */ ++int rtdm_event_select(rtdm_event_t *event, rtdm_selector_t *selector, ++ enum rtdm_selecttype type, unsigned int fd_index) ++{ ++ struct xnselect_binding *binding; ++ int err; ++ spl_t s; ++ ++ binding = xnmalloc(sizeof(*binding)); ++ if (!binding) ++ return -ENOMEM; ++ ++ xnlock_get_irqsave(&nklock, s); ++ err = xnselect_bind(&event->select_block, ++ binding, selector, type, fd_index, ++ event->synch_base.status & (RTDM_SYNCH_DELETED | ++ RTDM_EVENT_PENDING)); ++ xnlock_put_irqrestore(&nklock, s); ++ ++ if (err) ++ xnfree(binding); ++ ++ return err; ++} ++EXPORT_SYMBOL_GPL(rtdm_event_select); ++ ++/** @} */ ++ ++/** ++ * @ingroup rtdm_sync ++ * @defgroup rtdm_sync_sem Semaphore Services ++ * @{ ++ */ ++ ++/** ++ * @brief Initialise a semaphore ++ * ++ * @param[in,out] sem Semaphore handle ++ * @param[in] value Initial value of the semaphore ++ * ++ * @coretags{task-unrestricted} ++ */ ++void rtdm_sem_init(rtdm_sem_t *sem, unsigned long value) ++{ ++ spl_t s; ++ ++ trace_cobalt_driver_sem_init(sem, value); ++ ++ /* Make atomic for re-initialisation support */ ++ xnlock_get_irqsave(&nklock, s); ++ ++ sem->value = value; ++ xnsynch_init(&sem->synch_base, XNSYNCH_PRIO, NULL); ++ xnselect_init(&sem->select_block); ++ ++ xnlock_put_irqrestore(&nklock, s); ++} ++ ++EXPORT_SYMBOL_GPL(rtdm_sem_init); ++ ++/** ++ * @brief Destroy a semaphore ++ * ++ * @param[in,out] sem Semaphore handle as returned by rtdm_sem_init() ++ * ++ * @coretags{task-unrestricted, might-switch} ++ */ ++void rtdm_sem_destroy(rtdm_sem_t *sem) ++{ ++ trace_cobalt_driver_sem_destroy(sem); ++ if (realtime_core_enabled()) { ++ __rtdm_synch_flush(&sem->synch_base, XNRMID); ++ xnselect_destroy(&sem->select_block); ++ } ++} ++EXPORT_SYMBOL_GPL(rtdm_sem_destroy); ++ ++/** ++ * @brief Decrement a semaphore ++ * ++ * This is the light-weight version of rtdm_sem_timeddown(), implying an ++ * infinite timeout. ++ * ++ * @param[in,out] sem Semaphore handle as returned by rtdm_sem_init() ++ * ++ * @return 0 on success, otherwise: ++ * ++ * - -EINTR is returned if calling task has been unblock by a signal or ++ * explicitly via rtdm_task_unblock(). ++ * ++ * - -EIDRM is returned if @a sem has been destroyed. ++ * ++ * - -EPERM @e may be returned if an illegal invocation environment is ++ * detected. ++ * ++ * @coretags{primary-only, might-switch} ++ */ ++int rtdm_sem_down(rtdm_sem_t *sem) ++{ ++ return rtdm_sem_timeddown(sem, 0, NULL); ++} ++ ++EXPORT_SYMBOL_GPL(rtdm_sem_down); ++ ++/** ++ * @brief Decrement a semaphore with timeout ++ * ++ * This function tries to decrement the given semphore's value if it is ++ * positive on entry. If not, the caller is blocked unless non-blocking ++ * operation was selected. ++ * ++ * @param[in,out] sem Semaphore handle as returned by rtdm_sem_init() ++ * @param[in] timeout Relative timeout in nanoseconds, see ++ * @ref RTDM_TIMEOUT_xxx for special values ++ * @param[in,out] timeout_seq Handle of a timeout sequence as returned by ++ * rtdm_toseq_init() or NULL ++ * ++ * @return 0 on success, otherwise: ++ * ++ * - -ETIMEDOUT is returned if the if the request has not been satisfied ++ * within the specified amount of time. ++ * ++ * - -EWOULDBLOCK is returned if @a timeout is negative and the semaphore ++ * value is currently not positive. ++ * ++ * - -EINTR is returned if calling task has been unblock by a signal or ++ * explicitly via rtdm_task_unblock(). ++ * ++ * - -EIDRM is returned if @a sem has been destroyed. ++ * ++ * - -EPERM @e may be returned if an illegal invocation environment is ++ * detected. ++ * ++ * @coretags{primary-timed, might-switch} ++ */ ++int rtdm_sem_timeddown(rtdm_sem_t *sem, nanosecs_rel_t timeout, ++ rtdm_toseq_t *timeout_seq) ++{ ++ struct xnthread *thread; ++ int err = 0, ret; ++ spl_t s; ++ ++ if (!XENO_ASSERT(COBALT, timeout < 0 || !xnsched_unblockable_p())) ++ return -EPERM; ++ ++ trace_cobalt_driver_sem_wait(sem, xnthread_current()); ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ if (unlikely(sem->synch_base.status & RTDM_SYNCH_DELETED)) ++ err = -EIDRM; ++ else if (sem->value > 0) { ++ if(!--sem->value) ++ xnselect_signal(&sem->select_block, 0); ++ } else if (timeout < 0) /* non-blocking mode */ ++ err = -EWOULDBLOCK; ++ else { ++ thread = xnthread_current(); ++ ++ if (timeout_seq && timeout > 0) ++ /* timeout sequence */ ++ ret = xnsynch_sleep_on(&sem->synch_base, *timeout_seq, ++ XN_ABSOLUTE); ++ else ++ /* infinite or relative timeout */ ++ ret = xnsynch_sleep_on(&sem->synch_base, timeout, XN_RELATIVE); ++ ++ if (ret) { ++ if (ret & XNTIMEO) ++ err = -ETIMEDOUT; ++ else if (ret & XNRMID) ++ err = -EIDRM; ++ else /* XNBREAK */ ++ err = -EINTR; ++ } ++ } ++ ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return err; ++} ++ ++EXPORT_SYMBOL_GPL(rtdm_sem_timeddown); ++ ++/** ++ * @brief Increment a semaphore ++ * ++ * This function increments the given semphore's value, waking up a potential ++ * waiter which was blocked upon rtdm_sem_down(). ++ * ++ * @param[in,out] sem Semaphore handle as returned by rtdm_sem_init() ++ * ++ * @coretags{unrestricted, might-switch} ++ */ ++void rtdm_sem_up(rtdm_sem_t *sem) ++{ ++ spl_t s; ++ ++ trace_cobalt_driver_sem_up(sem); ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ if (xnsynch_wakeup_one_sleeper(&sem->synch_base)) ++ xnsched_run(); ++ else ++ if (sem->value++ == 0 ++ && xnselect_signal(&sem->select_block, 1)) ++ xnsched_run(); ++ ++ xnlock_put_irqrestore(&nklock, s); ++} ++ ++EXPORT_SYMBOL_GPL(rtdm_sem_up); ++ ++/** ++ * @brief Bind a selector to a semaphore ++ * ++ * This functions binds the given selector to the semaphore so that the former ++ * is notified when the semaphore state changes. Typically the select binding ++ * handler will invoke this service. ++ * ++ * @param[in,out] sem Semaphore handle as returned by rtdm_sem_init() ++ * @param[in,out] selector Selector as passed to the select binding handler ++ * @param[in] type Type of the bound event as passed to the select binding handler ++ * @param[in] fd_index File descriptor index as passed to the select binding ++ * handler ++ * ++ * @return 0 on success, otherwise: ++ * ++ * - -ENOMEM is returned if there is insufficient memory to establish the ++ * dynamic binding. ++ * ++ * - -EINVAL is returned if @a type or @a fd_index are invalid. ++ * ++ * @coretags{task-unrestricted} ++ */ ++int rtdm_sem_select(rtdm_sem_t *sem, rtdm_selector_t *selector, ++ enum rtdm_selecttype type, unsigned int fd_index) ++{ ++ struct xnselect_binding *binding; ++ int err; ++ spl_t s; ++ ++ binding = xnmalloc(sizeof(*binding)); ++ if (!binding) ++ return -ENOMEM; ++ ++ xnlock_get_irqsave(&nklock, s); ++ err = xnselect_bind(&sem->select_block, binding, selector, ++ type, fd_index, ++ (sem->value > 0) || ++ sem->synch_base.status & RTDM_SYNCH_DELETED); ++ xnlock_put_irqrestore(&nklock, s); ++ ++ if (err) ++ xnfree(binding); ++ ++ return err; ++} ++EXPORT_SYMBOL_GPL(rtdm_sem_select); ++ ++/** @} */ ++ ++/** ++ * @ingroup rtdm_sync ++ * @defgroup rtdm_sync_mutex Mutex services ++ * @{ ++ */ ++ ++/** ++ * @brief Initialise a mutex ++ * ++ * This function initalises a basic mutex with priority inversion protection. ++ * "Basic", as it does not allow a mutex owner to recursively lock the same ++ * mutex again. ++ * ++ * @param[in,out] mutex Mutex handle ++ * ++ * @coretags{task-unrestricted} ++ */ ++void rtdm_mutex_init(rtdm_mutex_t *mutex) ++{ ++ spl_t s; ++ ++ /* Make atomic for re-initialisation support */ ++ xnlock_get_irqsave(&nklock, s); ++ xnsynch_init(&mutex->synch_base, XNSYNCH_PI, &mutex->fastlock); ++ xnlock_put_irqrestore(&nklock, s); ++} ++EXPORT_SYMBOL_GPL(rtdm_mutex_init); ++ ++/** ++ * @brief Destroy a mutex ++ * ++ * @param[in,out] mutex Mutex handle as returned by rtdm_mutex_init() ++ * ++ * @coretags{task-unrestricted, might-switch} ++ */ ++void rtdm_mutex_destroy(rtdm_mutex_t *mutex) ++{ ++ trace_cobalt_driver_mutex_destroy(mutex); ++ ++ if (realtime_core_enabled()) ++ __rtdm_synch_flush(&mutex->synch_base, XNRMID); ++} ++EXPORT_SYMBOL_GPL(rtdm_mutex_destroy); ++ ++/** ++ * @brief Release a mutex ++ * ++ * This function releases the given mutex, waking up a potential waiter which ++ * was blocked upon rtdm_mutex_lock() or rtdm_mutex_timedlock(). ++ * ++ * @param[in,out] mutex Mutex handle as returned by rtdm_mutex_init() ++ * ++ * @coretags{primary-only, might-switch} ++ */ ++void rtdm_mutex_unlock(rtdm_mutex_t *mutex) ++{ ++ if (!XENO_ASSERT(COBALT, !xnsched_interrupt_p())) ++ return; ++ ++ trace_cobalt_driver_mutex_release(mutex); ++ ++ if (unlikely(xnsynch_release(&mutex->synch_base, ++ xnsched_current_thread()))) ++ xnsched_run(); ++} ++EXPORT_SYMBOL_GPL(rtdm_mutex_unlock); ++ ++/** ++ * @brief Request a mutex ++ * ++ * This is the light-weight version of rtdm_mutex_timedlock(), implying an ++ * infinite timeout. ++ * ++ * @param[in,out] mutex Mutex handle as returned by rtdm_mutex_init() ++ * ++ * @return 0 on success, otherwise: ++ * ++ * - -EIDRM is returned if @a mutex has been destroyed. ++ * ++ * - -EPERM @e may be returned if an illegal invocation environment is ++ * detected. ++ * ++ * @coretags{primary-only, might-switch} ++ */ ++int rtdm_mutex_lock(rtdm_mutex_t *mutex) ++{ ++ return rtdm_mutex_timedlock(mutex, 0, NULL); ++} ++ ++EXPORT_SYMBOL_GPL(rtdm_mutex_lock); ++ ++/** ++ * @brief Request a mutex with timeout ++ * ++ * This function tries to acquire the given mutex. If it is not available, the ++ * caller is blocked unless non-blocking operation was selected. ++ * ++ * @param[in,out] mutex Mutex handle as returned by rtdm_mutex_init() ++ * @param[in] timeout Relative timeout in nanoseconds, see ++ * @ref RTDM_TIMEOUT_xxx for special values ++ * @param[in,out] timeout_seq Handle of a timeout sequence as returned by ++ * rtdm_toseq_init() or NULL ++ * ++ * @return 0 on success, otherwise: ++ * ++ * - -ETIMEDOUT is returned if the if the request has not been satisfied ++ * within the specified amount of time. ++ * ++ * - -EWOULDBLOCK is returned if @a timeout is negative and the semaphore ++ * value is currently not positive. ++ * ++ * - -EIDRM is returned if @a mutex has been destroyed. ++ * ++ * - -EPERM @e may be returned if an illegal invocation environment is ++ * detected. ++ * ++ * @coretags{primary-only, might-switch} ++ */ ++int rtdm_mutex_timedlock(rtdm_mutex_t *mutex, nanosecs_rel_t timeout, ++ rtdm_toseq_t *timeout_seq) ++{ ++ struct xnthread *curr; ++ int ret; ++ spl_t s; ++ ++ if (!XENO_ASSERT(COBALT, !xnsched_unblockable_p())) ++ return -EPERM; ++ ++ curr = xnthread_current(); ++ trace_cobalt_driver_mutex_wait(mutex, curr); ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ if (unlikely(mutex->synch_base.status & RTDM_SYNCH_DELETED)) { ++ ret = -EIDRM; ++ goto out; ++ } ++ ++ ret = xnsynch_try_acquire(&mutex->synch_base); ++ if (ret != -EBUSY) ++ goto out; ++ ++ if (timeout < 0) { ++ ret = -EWOULDBLOCK; ++ goto out; ++ } ++ ++ for (;;) { ++ if (timeout_seq && timeout > 0) /* timeout sequence */ ++ ret = xnsynch_acquire(&mutex->synch_base, *timeout_seq, ++ XN_ABSOLUTE); ++ else /* infinite or relative timeout */ ++ ret = xnsynch_acquire(&mutex->synch_base, timeout, ++ XN_RELATIVE); ++ if (ret == 0) ++ break; ++ if (ret & XNBREAK) ++ continue; ++ ret = ret & XNTIMEO ? -ETIMEDOUT : -EIDRM; ++ break; ++ } ++out: ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return ret; ++} ++ ++EXPORT_SYMBOL_GPL(rtdm_mutex_timedlock); ++/** @} */ ++ ++/** @} Synchronisation services */ ++ ++/** ++ * @ingroup rtdm_driver_interface ++ * @defgroup rtdm_irq Interrupt Management Services ++ * @{ ++ */ ++ ++/** ++ * @brief Register an interrupt handler ++ * ++ * This function registers the provided handler with an IRQ line and enables ++ * the line. ++ * ++ * @param[in,out] irq_handle IRQ handle ++ * @param[in] irq_no Line number of the addressed IRQ ++ * @param[in] handler Interrupt handler ++ * @param[in] flags Registration flags, see @ref RTDM_IRQTYPE_xxx for details ++ * @param[in] device_name Device name to show up in real-time IRQ lists ++ * @param[in] arg Pointer to be passed to the interrupt handler on invocation ++ * ++ * @return 0 on success, otherwise: ++ * ++ * - -EINVAL is returned if an invalid parameter was passed. ++ * ++ * - -EBUSY is returned if the specified IRQ line is already in use. ++ * ++ * - -ENOSYS is returned if the real-time core is disabled. ++ * ++ * @coretags{secondary-only} ++ */ ++int rtdm_irq_request(rtdm_irq_t *irq_handle, unsigned int irq_no, ++ rtdm_irq_handler_t handler, unsigned long flags, ++ const char *device_name, void *arg) ++{ ++ int err; ++ ++ if (!realtime_core_enabled()) ++ return -ENOSYS; ++ ++ if (!XENO_ASSERT(COBALT, xnsched_root_p())) ++ return -EPERM; ++ ++ err = xnintr_init(irq_handle, device_name, irq_no, handler, NULL, flags); ++ if (err) ++ return err; ++ ++ err = xnintr_attach(irq_handle, arg); ++ if (err) { ++ xnintr_destroy(irq_handle); ++ return err; ++ } ++ ++ xnintr_enable(irq_handle); ++ ++ return 0; ++} ++ ++EXPORT_SYMBOL_GPL(rtdm_irq_request); ++ ++#ifdef DOXYGEN_CPP /* Only used for doxygen doc generation */ ++/** ++ * @brief Release an interrupt handler ++ * ++ * @param[in,out] irq_handle IRQ handle as returned by rtdm_irq_request() ++ * ++ * @return 0 on success, otherwise negative error code ++ * ++ * @note The caller is responsible for shutting down the IRQ source at device ++ * level before invoking this service. In turn, rtdm_irq_free ensures that any ++ * pending event on the given IRQ line is fully processed on return from this ++ * service. ++ * ++ * @coretags{secondary-only} ++ */ ++int rtdm_irq_free(rtdm_irq_t *irq_handle); ++ ++/** ++ * @brief Enable interrupt line ++ * ++ * @param[in,out] irq_handle IRQ handle as returned by rtdm_irq_request() ++ * ++ * @return 0 on success, otherwise negative error code ++ * ++ * @note This service is for exceptional use only. Drivers should ++ * always prefer interrupt masking at device level (via corresponding ++ * control registers etc.) over masking at line level. Keep in mind ++ * that the latter is incompatible with IRQ line sharing and can also ++ * be more costly as interrupt controller access requires broader ++ * synchronization. Also, such service is solely available from ++ * secondary mode. The caller is responsible for excluding such ++ * conflicts. ++ * ++ * @coretags{secondary-only} ++ */ ++int rtdm_irq_enable(rtdm_irq_t *irq_handle); ++ ++/** ++ * @brief Disable interrupt line ++ * ++ * @param[in,out] irq_handle IRQ handle as returned by rtdm_irq_request() ++ * ++ * @return 0 on success, otherwise negative error code ++ * ++ * @note This service is for exceptional use only. Drivers should ++ * always prefer interrupt masking at device level (via corresponding ++ * control registers etc.) over masking at line level. Keep in mind ++ * that the latter is incompatible with IRQ line sharing and can also ++ * be more costly as interrupt controller access requires broader ++ * synchronization. Also, such service is solely available from ++ * secondary mode. The caller is responsible for excluding such ++ * conflicts. ++ * ++ * @coretags{secondary-only} ++ */ ++int rtdm_irq_disable(rtdm_irq_t *irq_handle); ++#endif /* DOXYGEN_CPP */ ++ ++/** @} Interrupt Management Services */ ++ ++/** ++ * @ingroup rtdm_driver_interface ++ * @defgroup rtdm_nrtsignal Non-Real-Time Signalling Services ++ * ++ * These services provide a mechanism to request the execution of a specified ++ * handler in non-real-time context. The triggering can safely be performed in ++ * real-time context without suffering from unknown delays. The handler ++ * execution will be deferred until the next time the real-time subsystem ++ * releases the CPU to the non-real-time part. ++ * @{ ++ */ ++ ++#ifdef DOXYGEN_CPP /* Only used for doxygen doc generation */ ++ ++/** ++ * @brief Register a non-real-time signal handler ++ * ++ * @param[in,out] nrt_sig Signal handle ++ * @param[in] handler Non-real-time signal handler ++ * @param[in] arg Custom argument passed to @c handler() on each invocation ++ * ++ * @return 0 on success, otherwise: ++ * ++ * - -EAGAIN is returned if no free signal slot is available. ++ * ++ * @coretags{task-unrestricted} ++ */ ++int rtdm_nrtsig_init(rtdm_nrtsig_t *nrt_sig, rtdm_nrtsig_handler_t handler, ++ void *arg); ++ ++/** ++ * @brief Release a non-realtime signal handler ++ * ++ * @param[in,out] nrt_sig Signal handle ++ * ++ * @coretags{task-unrestricted} ++ */ ++void rtdm_nrtsig_destroy(rtdm_nrtsig_t *nrt_sig); ++#endif /* DOXYGEN_CPP */ ++ ++struct nrtsig_work { ++ struct ipipe_work_header work; ++ struct rtdm_nrtsig *nrtsig; ++}; ++ ++static void nrtsig_execute(struct ipipe_work_header *work) ++{ ++ struct rtdm_nrtsig *nrtsig; ++ struct nrtsig_work *w; ++ ++ w = container_of(work, typeof(*w), work); ++ nrtsig = w->nrtsig; ++ nrtsig->handler(nrtsig, nrtsig->arg); ++} ++ ++/** ++ * Trigger non-real-time signal ++ * ++ * @param[in,out] nrt_sig Signal handle ++ * ++ * @coretags{unrestricted} ++ */ ++void rtdm_nrtsig_pend(rtdm_nrtsig_t *nrt_sig) ++{ ++ struct nrtsig_work nrtsig_work = { ++ .work = { ++ .size = sizeof(nrtsig_work), ++ .handler = nrtsig_execute, ++ }, ++ .nrtsig = nrt_sig, ++ }; ++ ipipe_post_work_root(&nrtsig_work, work); ++} ++EXPORT_SYMBOL_GPL(rtdm_nrtsig_pend); ++ ++struct lostage_schedule_work { ++ struct ipipe_work_header work; ++ struct work_struct *lostage_work; ++}; ++ ++static void lostage_schedule_work(struct ipipe_work_header *work) ++{ ++ struct lostage_schedule_work *w; ++ ++ w = container_of(work, typeof(*w), work); ++ schedule_work(w->lostage_work); ++} ++ ++/** ++ * Put a work task in Linux non real-time global workqueue from primary mode. ++ * ++ * @param lostage_work ++ */ ++void rtdm_schedule_nrt_work(struct work_struct *lostage_work) ++{ ++ struct lostage_schedule_work ipipe_work = { ++ .work = { ++ .size = sizeof(ipipe_work), ++ .handler = lostage_schedule_work, ++ }, ++ .lostage_work = lostage_work, ++ }; ++ ++ if (ipipe_root_p) ++ schedule_work(lostage_work); ++ else ++ ipipe_post_work_root(&ipipe_work, work); ++} ++EXPORT_SYMBOL_GPL(rtdm_schedule_nrt_work); ++ ++/** @} Non-Real-Time Signalling Services */ ++ ++ ++/** ++ * @ingroup rtdm_driver_interface ++ * @defgroup rtdm_util Utility Services ++ * @{ ++ */ ++ ++struct mmap_tramp_data { ++ struct rtdm_fd *fd; ++ struct file_operations *fops; ++ int (*mmap_handler)(struct rtdm_fd *fd, ++ struct vm_area_struct *vma); ++}; ++ ++struct mmap_helper_data { ++ void *src_vaddr; ++ phys_addr_t src_paddr; ++ struct vm_operations_struct *vm_ops; ++ void *vm_private_data; ++ struct mmap_tramp_data tramp_data; ++}; ++ ++static int mmap_kmem_helper(struct vm_area_struct *vma, void *va) ++{ ++ unsigned long addr, len, pfn, to; ++ int ret = 0; ++ ++ to = (unsigned long)va; ++ addr = vma->vm_start; ++ len = vma->vm_end - vma->vm_start; ++ ++ if (to != PAGE_ALIGN(to) || (len & ~PAGE_MASK) != 0) ++ return -EINVAL; ++ ++#ifndef CONFIG_MMU ++ pfn = __pa(to) >> PAGE_SHIFT; ++ ret = remap_pfn_range(vma, addr, pfn, len, PAGE_SHARED); ++#else ++ if (to < VMALLOC_START || to >= VMALLOC_END) { ++ /* logical address. */ ++ pfn = __pa(to) >> PAGE_SHIFT; ++ ret = remap_pfn_range(vma, addr, pfn, len, PAGE_SHARED); ++ if (ret) ++ return ret; ++ } else { ++ /* vmalloc memory. */ ++ while (len > 0) { ++ struct page *page = vmalloc_to_page((void *)to); ++ if (vm_insert_page(vma, addr, page)) ++ return -EAGAIN; ++ addr += PAGE_SIZE; ++ to += PAGE_SIZE; ++ len -= PAGE_SIZE; ++ } ++ } ++ ++ if (cobalt_machine.prefault) ++ cobalt_machine.prefault(vma); ++#endif ++ ++ return ret; ++} ++ ++static int mmap_iomem_helper(struct vm_area_struct *vma, phys_addr_t pa) ++{ ++ pgprot_t prot = PAGE_SHARED; ++ unsigned long len; ++ ++ len = vma->vm_end - vma->vm_start; ++#ifndef CONFIG_MMU ++ vma->vm_pgoff = pa >> PAGE_SHIFT; ++#endif /* CONFIG_MMU */ ++ ++#ifdef __HAVE_PHYS_MEM_ACCESS_PROT ++ if (vma->vm_file) ++ prot = phys_mem_access_prot(vma->vm_file, pa >> PAGE_SHIFT, ++ len, prot); ++#endif ++ vma->vm_page_prot = pgprot_noncached(prot); ++ ++ return remap_pfn_range(vma, vma->vm_start, pa >> PAGE_SHIFT, ++ len, vma->vm_page_prot); ++} ++ ++static int mmap_buffer_helper(struct rtdm_fd *fd, struct vm_area_struct *vma) ++{ ++ struct mmap_tramp_data *tramp_data = vma->vm_private_data; ++ struct mmap_helper_data *helper_data; ++ int ret; ++ ++ helper_data = container_of(tramp_data, struct mmap_helper_data, tramp_data); ++ vma->vm_ops = helper_data->vm_ops; ++ vma->vm_private_data = helper_data->vm_private_data; ++ ++ if (helper_data->src_paddr) ++ ret = mmap_iomem_helper(vma, helper_data->src_paddr); ++ else ++ ret = mmap_kmem_helper(vma, helper_data->src_vaddr); ++ ++ return ret; ++} ++ ++static int mmap_trampoline(struct file *filp, struct vm_area_struct *vma) ++{ ++ struct mmap_tramp_data *tramp_data = filp->private_data; ++ int ret; ++ ++ vma->vm_private_data = tramp_data; ++ ++ ret = tramp_data->mmap_handler(tramp_data->fd, vma); ++ if (ret) ++ return ret; ++ ++ return 0; ++} ++ ++#ifndef CONFIG_MMU ++ ++static unsigned long ++internal_get_unmapped_area(struct file *filp, ++ unsigned long addr, unsigned long len, ++ unsigned long pgoff, unsigned long flags) ++{ ++ struct mmap_tramp_data *tramp_data = filp->private_data; ++ struct mmap_helper_data *helper_data; ++ unsigned long pa; ++ ++ helper_data = container_of(tramp_data, struct mmap_helper_data, tramp_data); ++ pa = helper_data->src_paddr; ++ if (pa) ++ return (unsigned long)__va(pa); ++ ++ return (unsigned long)helper_data->src_vaddr; ++} ++ ++static int do_rtdm_mmap(struct mmap_tramp_data *tramp_data, ++ size_t len, off_t offset, int prot, int flags, ++ void **pptr) ++{ ++ const struct file_operations *old_fops; ++ unsigned long u_addr; ++ struct file *filp; ++ ++ filp = filp_open("/dev/mem", O_RDWR, 0); ++ if (IS_ERR(filp)) ++ return PTR_ERR(filp); ++ ++ old_fops = filp->f_op; ++ filp->f_op = tramp_data->fops; ++ filp->private_data = tramp_data; ++ u_addr = vm_mmap(filp, (unsigned long)*pptr, len, prot, flags, offset); ++ filp_close(filp, current->files); ++ filp->f_op = old_fops; ++ ++ if (IS_ERR_VALUE(u_addr)) ++ return (int)u_addr; ++ ++ *pptr = (void *)u_addr; ++ ++ return 0; ++} ++ ++#else /* CONFIG_MMU */ ++ ++static int do_rtdm_mmap(struct mmap_tramp_data *tramp_data, ++ size_t len, off_t offset, int prot, int flags, ++ void **pptr) ++{ ++ unsigned long u_addr; ++ struct file *filp; ++ ++ filp = anon_inode_getfile("[rtdm]", tramp_data->fops, tramp_data, O_RDWR); ++ if (IS_ERR(filp)) ++ return PTR_ERR(filp); ++ ++ u_addr = vm_mmap(filp, (unsigned long)*pptr, len, prot, flags, offset); ++ filp_close(filp, current->files); ++ ++ if (IS_ERR_VALUE(u_addr)) ++ return (int)u_addr; ++ ++ *pptr = (void *)u_addr; ++ ++ return 0; ++} ++ ++#define internal_get_unmapped_area NULL ++ ++#endif /* CONFIG_MMU */ ++ ++static struct file_operations internal_mmap_fops = { ++ .mmap = mmap_trampoline, ++ .get_unmapped_area = internal_get_unmapped_area ++}; ++ ++static unsigned long ++driver_get_unmapped_area(struct file *filp, ++ unsigned long addr, unsigned long len, ++ unsigned long pgoff, unsigned long flags) ++{ ++ struct mmap_tramp_data *tramp_data = filp->private_data; ++ struct rtdm_fd *fd = tramp_data->fd; ++ ++ if (fd->ops->get_unmapped_area) ++ return fd->ops->get_unmapped_area(fd, len, pgoff, flags); ++ ++#ifdef CONFIG_MMU ++ /* Run default handler. */ ++ return current->mm->get_unmapped_area(filp, addr, len, pgoff, flags); ++#else ++ return -ENODEV; ++#endif ++} ++ ++static struct file_operations driver_mmap_fops = { ++ .mmap = mmap_trampoline, ++ .get_unmapped_area = driver_get_unmapped_area ++}; ++ ++int __rtdm_mmap_from_fdop(struct rtdm_fd *fd, size_t len, off_t offset, ++ int prot, int flags, void **pptr) ++{ ++ struct mmap_tramp_data tramp_data = { ++ .fd = fd, ++ .fops = &driver_mmap_fops, ++ .mmap_handler = fd->ops->mmap, ++ }; ++ ++#ifndef CONFIG_MMU ++ /* ++ * XXX: A .get_unmapped_area handler must be provided in the ++ * nommu case. We use this to force the memory management code ++ * not to share VM regions for distinct areas to map to, as it ++ * would otherwise do since all requests currently apply to ++ * the same file (i.e. from /dev/mem, see do_mmap_pgoff() in ++ * the nommu case). ++ */ ++ if (fd->ops->get_unmapped_area) ++ offset = fd->ops->get_unmapped_area(fd, len, 0, flags); ++#endif ++ ++ return do_rtdm_mmap(&tramp_data, len, offset, prot, flags, pptr); ++} ++ ++/** ++ * Map a kernel memory range into the address space of the user. ++ * ++ * @param[in] fd RTDM file descriptor as passed to the invoked ++ * device operation handler ++ * @param[in] src_addr Kernel virtual address to be mapped ++ * @param[in] len Length of the memory range ++ * @param[in] prot Protection flags for the user's memory range, typically ++ * either PROT_READ or PROT_READ|PROT_WRITE ++ * @param[in,out] pptr Address of a pointer containing the desired user ++ * address or NULL on entry and the finally assigned address on return ++ * @param[in] vm_ops vm_operations to be executed on the vm_area of the ++ * user memory range or NULL ++ * @param[in] vm_private_data Private data to be stored in the vm_area, ++ * primarily useful for vm_operation handlers ++ * ++ * @return 0 on success, otherwise (most common values): ++ * ++ * - -EINVAL is returned if an invalid start address, size, or destination ++ * address was passed. ++ * ++ * - -ENOMEM is returned if there is insufficient free memory or the limit of ++ * memory mapping for the user process was reached. ++ * ++ * - -EAGAIN is returned if too much memory has been already locked by the ++ * user process. ++ * ++ * - -EPERM @e may be returned if an illegal invocation environment is ++ * detected. ++ * ++ * @note This service only works on memory regions allocated via kmalloc() or ++ * vmalloc(). To map physical I/O memory to user-space use ++ * rtdm_iomap_to_user() instead. ++ * ++ * @note RTDM supports two models for unmapping the memory area: ++ * - manual unmapping via rtdm_munmap(), which may be issued from a ++ * driver in response to an IOCTL call, or by a call to the regular ++ * munmap() call from the application. ++ * - automatic unmapping, triggered by the termination of the process ++ * which owns the mapping. ++ * To track the number of references pending on the resource mapped, ++ * the driver can pass the address of a close handler for the vm_area ++ * considered, in the @a vm_ops descriptor. See the relevant Linux ++ * kernel programming documentation (e.g. Linux Device Drivers book) ++ * on virtual memory management for details. ++ * ++ * @coretags{secondary-only} ++ */ ++int rtdm_mmap_to_user(struct rtdm_fd *fd, ++ void *src_addr, size_t len, ++ int prot, void **pptr, ++ struct vm_operations_struct *vm_ops, ++ void *vm_private_data) ++{ ++ struct mmap_helper_data helper_data = { ++ .tramp_data = { ++ .fd = fd, ++ .fops = &internal_mmap_fops, ++ .mmap_handler = mmap_buffer_helper, ++ }, ++ .src_vaddr = src_addr, ++ .src_paddr = 0, ++ .vm_ops = vm_ops, ++ .vm_private_data = vm_private_data ++ }; ++ ++ if (!XENO_ASSERT(COBALT, xnsched_root_p())) ++ return -EPERM; ++ ++ return do_rtdm_mmap(&helper_data.tramp_data, len, 0, prot, MAP_SHARED, pptr); ++} ++EXPORT_SYMBOL_GPL(rtdm_mmap_to_user); ++ ++/** ++ * Map an I/O memory range into the address space of the user. ++ * ++ * @param[in] fd RTDM file descriptor as passed to the invoked ++ * device operation handler ++ * @param[in] src_addr physical I/O address to be mapped ++ * @param[in] len Length of the memory range ++ * @param[in] prot Protection flags for the user's memory range, typically ++ * either PROT_READ or PROT_READ|PROT_WRITE ++ * @param[in,out] pptr Address of a pointer containing the desired user ++ * address or NULL on entry and the finally assigned address on return ++ * @param[in] vm_ops vm_operations to be executed on the vm_area of the ++ * user memory range or NULL ++ * @param[in] vm_private_data Private data to be stored in the vm_area, ++ * primarily useful for vm_operation handlers ++ * ++ * @return 0 on success, otherwise (most common values): ++ * ++ * - -EINVAL is returned if an invalid start address, size, or destination ++ * address was passed. ++ * ++ * - -ENOMEM is returned if there is insufficient free memory or the limit of ++ * memory mapping for the user process was reached. ++ * ++ * - -EAGAIN is returned if too much memory has been already locked by the ++ * user process. ++ * ++ * - -EPERM @e may be returned if an illegal invocation environment is ++ * detected. ++ * ++ * @note RTDM supports two models for unmapping the memory area: ++ * - manual unmapping via rtdm_munmap(), which may be issued from a ++ * driver in response to an IOCTL call, or by a call to the regular ++ * munmap() call from the application. ++ * - automatic unmapping, triggered by the termination of the process ++ * which owns the mapping. ++ * To track the number of references pending on the resource mapped, ++ * the driver can pass the address of a close handler for the vm_area ++ * considered, in the @a vm_ops descriptor. See the relevant Linux ++ * kernel programming documentation (e.g. Linux Device Drivers book) ++ * on virtual memory management for details. ++ * ++ * @coretags{secondary-only} ++ */ ++int rtdm_iomap_to_user(struct rtdm_fd *fd, ++ phys_addr_t src_addr, size_t len, ++ int prot, void **pptr, ++ struct vm_operations_struct *vm_ops, ++ void *vm_private_data) ++{ ++ struct mmap_helper_data helper_data = { ++ .tramp_data = { ++ .fd = fd, ++ .fops = &internal_mmap_fops, ++ .mmap_handler = mmap_buffer_helper, ++ }, ++ .src_vaddr = NULL, ++ .src_paddr = src_addr, ++ .vm_ops = vm_ops, ++ .vm_private_data = vm_private_data ++ }; ++ ++ if (!XENO_ASSERT(COBALT, xnsched_root_p())) ++ return -EPERM; ++ ++ return do_rtdm_mmap(&helper_data.tramp_data, len, 0, prot, MAP_SHARED, pptr); ++} ++EXPORT_SYMBOL_GPL(rtdm_iomap_to_user); ++ ++/** ++ * Map a kernel logical memory range to a virtual user area. ++ * ++ * This routine is commonly used from a ->mmap() handler of a RTDM ++ * driver, for mapping a virtual memory area with a direct physical ++ * mapping over the user address space referred to by @a vma. ++ * ++ * @param[in] vma The VMA descriptor to receive the mapping. ++ * @param[in] va The kernel logical address to be mapped. ++ * ++ * @return 0 on success, otherwise a negated error code is returned. ++ * ++ * @note This service works on memory regions allocated via ++ * kmalloc(). To map a chunk of virtual space with no direct physical ++ * mapping, or a physical I/O memory to a VMA, call rtdm_mmap_vmem() ++ * or rtdm_mmap_iomem() respectively instead. ++ * ++ * @coretags{secondary-only} ++ */ ++int rtdm_mmap_kmem(struct vm_area_struct *vma, void *va) ++{ ++ return mmap_kmem_helper(vma, va); ++} ++EXPORT_SYMBOL_GPL(rtdm_mmap_kmem); ++ ++/** ++ * Map a virtual memory range to a virtual user area. ++ * ++ * This routine is commonly used from a ->mmap() handler of a RTDM ++ * driver, for mapping a purely virtual memory area over the user ++ * address space referred to by @a vma. ++ * ++ * @param[in] vma The VMA descriptor to receive the mapping. ++ * @param[in] va The virtual address to be mapped. ++ * ++ * @return 0 on success, otherwise a negated error code is returned. ++ * ++ * @note This service works on memory regions allocated via ++ * vmalloc(). To map a chunk of logical space obtained from kmalloc(), ++ * or a physical I/O memory to a VMA, call rtdm_mmap_kmem() or ++ * rtdm_mmap_iomem() respectively instead. ++ * ++ * @coretags{secondary-only} ++ */ ++int rtdm_mmap_vmem(struct vm_area_struct *vma, void *va) ++{ ++ /* ++ * Our helper handles both of directly mapped to physical and ++ * purely virtual memory ranges. ++ */ ++ return mmap_kmem_helper(vma, va); ++} ++EXPORT_SYMBOL_GPL(rtdm_mmap_vmem); ++ ++/** ++ * Map an I/O memory range to a virtual user area. ++ * ++ * This routine is commonly used from a ->mmap() handler of a RTDM ++ * driver, for mapping an I/O memory area over the user address space ++ * referred to by @a vma. ++ * ++ * @param[in] vma The VMA descriptor to receive the mapping. ++ * @param[in] pa The physical I/O address to be mapped. ++ * ++ * @return 0 on success, otherwise a negated error code is returned. ++ * ++ * @note To map a chunk of logical space obtained from kmalloc(), or a ++ * purely virtual area with no direct physical mapping to a VMA, call ++ * rtdm_mmap_kmem() or rtdm_mmap_vmem() respectively instead. ++ * ++ * @coretags{secondary-only} ++ */ ++int rtdm_mmap_iomem(struct vm_area_struct *vma, phys_addr_t pa) ++{ ++ return mmap_iomem_helper(vma, pa); ++} ++EXPORT_SYMBOL_GPL(rtdm_mmap_iomem); ++ ++/** ++ * Unmap a user memory range. ++ * ++ * @param[in] ptr User address or the memory range ++ * @param[in] len Length of the memory range ++ * ++ * @return 0 on success, otherwise: ++ * ++ * - -EINVAL is returned if an invalid address or size was passed. ++ * ++ * - -EPERM @e may be returned if an illegal invocation environment is ++ * detected. ++ * ++ * @coretags{secondary-only} ++ */ ++int rtdm_munmap(void *ptr, size_t len) ++{ ++ if (!XENO_ASSERT(COBALT, xnsched_root_p())) ++ return -EPERM; ++ ++ return vm_munmap((unsigned long)ptr, len); ++} ++EXPORT_SYMBOL_GPL(rtdm_munmap); ++ ++int rtdm_get_iovec(struct rtdm_fd *fd, struct iovec **iovp, ++ const struct user_msghdr *msg, ++ struct iovec *iov_fast) ++{ ++ size_t len = sizeof(struct iovec) * msg->msg_iovlen; ++ struct iovec *iov = iov_fast; ++ ++ /* ++ * If the I/O vector doesn't fit in the fast memory, allocate ++ * a chunk from the system heap which is large enough to hold ++ * it. ++ */ ++ if (msg->msg_iovlen > RTDM_IOV_FASTMAX) { ++ iov = xnmalloc(len); ++ if (iov == NULL) ++ return -ENOMEM; ++ } ++ ++ *iovp = iov; ++ ++ if (!rtdm_fd_is_user(fd)) { ++ memcpy(iov, msg->msg_iov, len); ++ return 0; ++ } ++ ++#ifdef CONFIG_XENO_ARCH_SYS3264 ++ if (rtdm_fd_is_compat(fd)) ++ return sys32_get_iovec(iov, ++ (struct compat_iovec __user *)msg->msg_iov, ++ msg->msg_iovlen); ++#endif ++ ++ return rtdm_copy_from_user(fd, iov, msg->msg_iov, len); ++} ++EXPORT_SYMBOL_GPL(rtdm_get_iovec); ++ ++int rtdm_put_iovec(struct rtdm_fd *fd, struct iovec *iov, ++ const struct user_msghdr *msg, ++ struct iovec *iov_fast) ++{ ++ size_t len = sizeof(iov[0]) * msg->msg_iovlen; ++ int ret; ++ ++ if (!rtdm_fd_is_user(fd)) { ++ memcpy(msg->msg_iov, iov, len); ++ ret = 0; ++ } else ++#ifdef CONFIG_XENO_ARCH_SYS3264 ++ if (rtdm_fd_is_compat(fd)) ++ ret = sys32_put_iovec((struct compat_iovec __user *)msg->msg_iov, ++ iov, msg->msg_iovlen); ++ else ++#endif ++ ret = rtdm_copy_to_user(fd, msg->msg_iov, iov, len); ++ ++ if (iov != iov_fast) ++ xnfree(iov); ++ ++ return ret; ++} ++EXPORT_SYMBOL_GPL(rtdm_put_iovec); ++ ++ssize_t rtdm_get_iov_flatlen(struct iovec *iov, int iovlen) ++{ ++ ssize_t len; ++ int nvec; ++ ++ /* Return the flattened vector length. */ ++ for (len = 0, nvec = 0; nvec < iovlen; nvec++) { ++ ssize_t l = iov[nvec].iov_len; ++ if (l < 0 || len + l < len) /* SuS wants this. */ ++ return -EINVAL; ++ len += l; ++ } ++ ++ return len; ++} ++EXPORT_SYMBOL_GPL(rtdm_get_iov_flatlen); ++ ++#ifdef DOXYGEN_CPP /* Only used for doxygen doc generation */ ++ ++/** ++ * Real-time safe rate-limited message printing on kernel console ++ * ++ * @param[in] format Format string (conforming standard @c printf()) ++ * @param ... Arguments referred by @a format ++ * ++ * @return On success, this service returns the number of characters printed. ++ * Otherwise, a negative error code is returned. ++ * ++ * @coretags{unrestricted} ++ */ ++void rtdm_printk_ratelimited(const char *format, ...); ++ ++/** ++ * Real-time safe message printing on kernel console ++ * ++ * @param[in] format Format string (conforming standard @c printf()) ++ * @param ... Arguments referred by @a format ++ * ++ * @return On success, this service returns the number of characters printed. ++ * Otherwise, a negative error code is returned. ++ * ++ * @coretags{unrestricted} ++ */ ++void rtdm_printk(const char *format, ...); ++ ++/** ++ * Allocate memory block ++ * ++ * @param[in] size Requested size of the memory block ++ * ++ * @return The pointer to the allocated block is returned on success, NULL ++ * otherwise. ++ * ++ * @coretags{unrestricted} ++ */ ++void *rtdm_malloc(size_t size); ++ ++/** ++ * Release real-time memory block ++ * ++ * @param[in] ptr Pointer to memory block as returned by rtdm_malloc() ++ * ++ * @coretags{unrestricted} ++ */ ++void rtdm_free(void *ptr); ++ ++/** ++ * Check if read access to user-space memory block is safe ++ * ++ * @param[in] fd RTDM file descriptor as passed to the invoked ++ * device operation handler ++ * @param[in] ptr Address of the user-provided memory block ++ * @param[in] size Size of the memory block ++ * ++ * @return Non-zero is return when it is safe to read from the specified ++ * memory block, 0 otherwise. ++ * ++ * @coretags{task-unrestricted} ++ */ ++int rtdm_read_user_ok(struct rtdm_fd *fd, const void __user *ptr, ++ size_t size); ++ ++/** ++ * Check if read/write access to user-space memory block is safe ++ * ++ * @param[in] fd RTDM file descriptor as passed to the invoked ++ * device operation handler ++ * @param[in] ptr Address of the user-provided memory block ++ * @param[in] size Size of the memory block ++ * ++ * @return Non-zero is return when it is safe to read from or write to the ++ * specified memory block, 0 otherwise. ++ * ++ * @coretags{task-unrestricted} ++ */ ++int rtdm_rw_user_ok(struct rtdm_fd *fd, const void __user *ptr, ++ size_t size); ++ ++/** ++ * Copy user-space memory block to specified buffer ++ * ++ * @param[in] fd RTDM file descriptor as passed to the invoked ++ * device operation handler ++ * @param[in] dst Destination buffer address ++ * @param[in] src Address of the user-space memory block ++ * @param[in] size Size of the memory block ++ * ++ * @return 0 on success, otherwise: ++ * ++ * - -EFAULT is returned if an invalid memory area was accessed. ++ * ++ * @note Before invoking this service, verify via rtdm_read_user_ok() that the ++ * provided user-space address can securely be accessed. ++ * ++ * @coretags{task-unrestricted} ++ */ ++int rtdm_copy_from_user(struct rtdm_fd *fd, void *dst, ++ const void __user *src, size_t size); ++ ++/** ++ * Check if read access to user-space memory block and copy it to specified ++ * buffer ++ * ++ * @param[in] fd RTDM file descriptor as passed to the invoked ++ * device operation handler ++ * @param[in] dst Destination buffer address ++ * @param[in] src Address of the user-space memory block ++ * @param[in] size Size of the memory block ++ * ++ * @return 0 on success, otherwise: ++ * ++ * - -EFAULT is returned if an invalid memory area was accessed. ++ * ++ * @note This service is a combination of rtdm_read_user_ok and ++ * rtdm_copy_from_user. ++ * ++ * @coretags{task-unrestricted} ++ */ ++int rtdm_safe_copy_from_user(struct rtdm_fd *fd, void *dst, ++ const void __user *src, size_t size); ++ ++/** ++ * Copy specified buffer to user-space memory block ++ * ++ * @param[in] fd RTDM file descriptor as passed to the invoked ++ * device operation handler ++ * @param[in] dst Address of the user-space memory block ++ * @param[in] src Source buffer address ++ * @param[in] size Size of the memory block ++ * ++ * @return 0 on success, otherwise: ++ * ++ * - -EFAULT is returned if an invalid memory area was accessed. ++ * ++ * @note Before invoking this service, verify via rtdm_rw_user_ok() that the ++ * provided user-space address can securely be accessed. ++ * ++ * @coretags{task-unrestricted} ++ */ ++int rtdm_copy_to_user(struct rtdm_fd *fd, void __user *dst, ++ const void *src, size_t size); ++ ++/** ++ * Check if read/write access to user-space memory block is safe and copy ++ * specified buffer to it ++ * ++ * @param[in] fd RTDM file descriptor as passed to the invoked ++ * device operation handler ++ * @param[in] dst Address of the user-space memory block ++ * @param[in] src Source buffer address ++ * @param[in] size Size of the memory block ++ * ++ * @return 0 on success, otherwise: ++ * ++ * - -EFAULT is returned if an invalid memory area was accessed. ++ * ++ * @note This service is a combination of rtdm_rw_user_ok and ++ * rtdm_copy_to_user. ++ * ++ * @coretags{task-unrestricted} ++ */ ++int rtdm_safe_copy_to_user(struct rtdm_fd *fd, void __user *dst, ++ const void *src, size_t size); ++ ++/** ++ * Copy user-space string to specified buffer ++ * ++ * @param[in] fd RTDM file descriptor as passed to the invoked ++ * device operation handler ++ * @param[in] dst Destination buffer address ++ * @param[in] src Address of the user-space string ++ * @param[in] count Maximum number of bytes to copy, including the trailing ++ * '0' ++ * ++ * @return Length of the string on success (not including the trailing '0'), ++ * otherwise: ++ * ++ * - -EFAULT is returned if an invalid memory area was accessed. ++ * ++ * @note This services already includes a check of the source address, ++ * calling rtdm_read_user_ok() for @a src explicitly is not required. ++ * ++ * @coretags{task-unrestricted} ++ */ ++int rtdm_strncpy_from_user(struct rtdm_fd *fd, char *dst, ++ const char __user *src, size_t count); ++ ++/** ++ * Test if running in a real-time task ++ * ++ * @return Non-zero is returned if the caller resides in real-time context, 0 ++ * otherwise. ++ * ++ * @coretags{unrestricted} ++ */ ++int rtdm_in_rt_context(void); ++ ++/** ++ * Test if the caller is capable of running in real-time context ++ * ++ * @param[in] fd RTDM file descriptor as passed to the invoked ++ * device operation handler ++ * ++ * @return Non-zero is returned if the caller is able to execute in real-time ++ * context (independent of its current execution mode), 0 otherwise. ++ * ++ * @note This function can be used by drivers that provide different ++ * implementations for the same service depending on the execution mode of ++ * the caller. If a caller requests such a service in non-real-time context ++ * but is capable of running in real-time as well, it might be appropriate ++ * for the driver to reject the request via -ENOSYS so that RTDM can switch ++ * the caller and restart the request in real-time context. ++ * ++ * @coretags{unrestricted} ++ */ ++int rtdm_rt_capable(struct rtdm_fd *fd); ++ ++/** ++ * Test if the real-time core is available ++ * ++ * @return True if the real-time is available, false if it is disabled or in ++ * error state. ++ * ++ * @note Drivers should query the core state during initialization if they ++ * perform hardware setup operations or interact with RTDM services such as ++ * locks prior to calling an RTDM service that has a built-in state check of ++ * the real-time core (e.g. rtdm_dev_register() or rtdm_task_init()). ++ * ++ * @coretags{unrestricted} ++ */ ++bool rtdm_available(void); ++ ++#endif /* DOXYGEN_CPP */ ++ ++/** @} Utility Services */ +--- linux/kernel/xenomai/rtdm/core.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/rtdm/core.c 2021-04-07 16:01:26.171635675 +0800 +@@ -0,0 +1,1374 @@ ++/* ++ * Copyright (C) 2005 Jan Kiszka ++ * Copyright (C) 2005 Joerg Langenberg ++ * Copyright (C) 2014 Philippe Gerum ++ * ++ * Xenomai is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "rtdm/internal.h" ++#define CREATE_TRACE_POINTS ++#include ++#include "posix/process.h" ++ ++/** ++ * @ingroup rtdm ++ * @defgroup rtdm_driver_interface Driver programming interface ++ * RTDM driver programming interface ++ * @{ ++ */ ++ ++static void cleanup_instance(struct rtdm_device *dev, ++ struct rtdm_dev_context *context) ++{ ++ if (context) ++ kfree(context); ++ ++ __rtdm_put_device(dev); ++} ++ ++void __rtdm_dev_close(struct rtdm_fd *fd) ++{ ++ struct rtdm_dev_context *context = rtdm_fd_to_context(fd); ++ struct rtdm_device *dev = context->device; ++ struct rtdm_driver *drv = dev->driver; ++ ++ if (!fd->stale && drv->ops.close) ++ drv->ops.close(fd); ++ ++ cleanup_instance(dev, context); ++} ++ ++int __rtdm_anon_getfd(const char *name, int flags) ++{ ++ return anon_inode_getfd(name, &rtdm_dumb_fops, NULL, flags); ++} ++ ++void __rtdm_anon_putfd(int ufd) ++{ ++ __close_fd(current->files, ufd); ++} ++ ++static int create_instance(int ufd, struct rtdm_device *dev, ++ struct rtdm_dev_context **context_ptr) ++{ ++ struct rtdm_driver *drv = dev->driver; ++ struct rtdm_dev_context *context; ++ ++ /* ++ * Reset to NULL so that we can always use cleanup_files/instance to ++ * revert also partially successful allocations. ++ */ ++ *context_ptr = NULL; ++ ++ if ((drv->device_flags & RTDM_EXCLUSIVE) != 0 && ++ atomic_read(&dev->refcount) > 1) ++ return -EBUSY; ++ ++ context = kzalloc(sizeof(struct rtdm_dev_context) + ++ drv->context_size, GFP_KERNEL); ++ if (unlikely(context == NULL)) ++ return -ENOMEM; ++ ++ context->device = dev; ++ *context_ptr = context; ++ ++ return rtdm_fd_enter(&context->fd, ufd, RTDM_FD_MAGIC, &dev->ops); ++} ++ ++#ifdef CONFIG_XENO_OPT_RTDM_COMPAT_DEVNODE ++ ++static inline struct file * ++open_devnode(struct rtdm_device *dev, const char *path, int oflag) ++{ ++ struct file *filp; ++ char *filename; ++ ++ if (IS_ENABLED(CONFIG_XENO_OPT_DEBUG_LEGACY) && ++ strncmp(path, "/dev/rtdm/", 10)) ++ printk(XENO_WARNING ++ "%s[%d] opens obsolete device path: %s\n", ++ current->comm, task_pid_nr(current), path); ++ ++ filename = kasprintf(GFP_KERNEL, "/dev/rtdm/%s", dev->name); ++ if (filename == NULL) ++ return ERR_PTR(-ENOMEM); ++ ++ filp = filp_open(filename, oflag, 0); ++ kfree(filename); ++ ++ return filp; ++} ++ ++#else /* !CONFIG_XENO_OPT_RTDM_COMPAT_DEVNODE */ ++ ++static inline struct file * ++open_devnode(struct rtdm_device *dev, const char *path, int oflag) ++{ ++ return filp_open(path, oflag, 0); ++} ++ ++#endif /* !CONFIG_XENO_OPT_RTDM_COMPAT_DEVNODE */ ++ ++int __rtdm_dev_open(const char *path, int oflag) ++{ ++ struct rtdm_dev_context *context; ++ struct rtdm_device *dev; ++ struct file *filp; ++ int ufd, ret; ++ ++ secondary_mode_only(); ++ ++ /* ++ * CAUTION: we do want a lookup into the registry to happen ++ * before any attempt is made to open the devnode, so that we ++ * don't inadvertently open a regular (i.e. non-RTDM) device. ++ * Reason is that opening, then closing a device - because we ++ * don't manage it - may incur side-effects we don't want, ++ * e.g. opening then closing one end of a pipe would cause the ++ * other side to read the EOF condition. This is basically ++ * why we keep a RTDM registry for named devices, so that we ++ * can figure out whether an open() request is going to be ++ * valid, without having to open the devnode yet. ++ */ ++ dev = __rtdm_get_namedev(path); ++ if (dev == NULL) ++ return -EADV; ++ ++ ufd = get_unused_fd_flags(oflag); ++ if (ufd < 0) { ++ ret = ufd; ++ goto fail_fd; ++ } ++ ++ filp = open_devnode(dev, path, oflag); ++ if (IS_ERR(filp)) { ++ ret = PTR_ERR(filp); ++ goto fail_fopen; ++ } ++ ++ ret = create_instance(ufd, dev, &context); ++ if (ret < 0) ++ goto fail_create; ++ ++ context->fd.minor = dev->minor; ++ context->fd.oflags = oflag; ++ ++ trace_cobalt_fd_open(current, &context->fd, ufd, oflag); ++ ++ if (dev->ops.open) { ++ ret = dev->ops.open(&context->fd, oflag); ++ if (!XENO_ASSERT(COBALT, !spltest())) ++ splnone(); ++ if (ret < 0) ++ goto fail_open; ++ } ++ ++ ret = rtdm_device_new_fd(&context->fd, ufd, context->device); ++ if (ret < 0) ++ goto fail_open; ++ ++ fd_install(ufd, filp); ++ ++ return ufd; ++ ++fail_open: ++ cleanup_instance(dev, context); ++fail_create: ++ filp_close(filp, current->files); ++fail_fopen: ++ put_unused_fd(ufd); ++fail_fd: ++ __rtdm_put_device(dev); ++ ++ return ret; ++} ++EXPORT_SYMBOL_GPL(__rtdm_dev_open); ++ ++int __rtdm_dev_socket(int protocol_family, int socket_type, ++ int protocol) ++{ ++ struct rtdm_dev_context *context; ++ struct rtdm_device *dev; ++ int ufd, ret; ++ ++ secondary_mode_only(); ++ ++ dev = __rtdm_get_protodev(protocol_family, socket_type); ++ if (dev == NULL) ++ return -EAFNOSUPPORT; ++ ++ ufd = __rtdm_anon_getfd("[rtdm-socket]", O_RDWR); ++ if (ufd < 0) { ++ ret = ufd; ++ goto fail_getfd; ++ } ++ ++ ret = create_instance(ufd, dev, &context); ++ if (ret < 0) ++ goto fail_create; ++ ++ trace_cobalt_fd_socket(current, &context->fd, ufd, protocol_family); ++ ++ if (dev->ops.socket) { ++ ret = dev->ops.socket(&context->fd, protocol); ++ if (!XENO_ASSERT(COBALT, !spltest())) ++ splnone(); ++ if (ret < 0) ++ goto fail_socket; ++ } ++ ++ ret = rtdm_device_new_fd(&context->fd, ufd, context->device); ++ if (ret < 0) ++ goto fail_socket; ++ ++ return ufd; ++ ++fail_socket: ++ cleanup_instance(dev, context); ++fail_create: ++ __close_fd(current->files, ufd); ++fail_getfd: ++ __rtdm_put_device(dev); ++ ++ return ret; ++} ++EXPORT_SYMBOL_GPL(__rtdm_dev_socket); ++ ++int __rtdm_dev_ioctl_core(struct rtdm_fd *fd, unsigned int request, ++ void __user *arg) ++{ ++ struct rtdm_device *dev = rtdm_fd_device(fd); ++ struct rtdm_driver *drv; ++ struct rtdm_device_info dev_info; ++ ++ if (fd->magic != RTDM_FD_MAGIC || request != RTIOC_DEVICE_INFO) ++ return -EADV; ++ ++ drv = dev->driver; ++ dev_info.device_flags = drv->device_flags; ++ dev_info.device_class = drv->profile_info.class_id; ++ dev_info.device_sub_class = drv->profile_info.subclass_id; ++ dev_info.profile_version = drv->profile_info.version; ++ ++ return rtdm_safe_copy_to_user(fd, arg, &dev_info, sizeof(dev_info)); ++} ++ ++#ifdef DOXYGEN_CPP /* Only used for doxygen doc generation */ ++ ++/** ++ * @addtogroup rtdm_sync ++ *@{ ++ */ ++ ++/** ++ * @fn void rtdm_waitqueue_init(struct rtdm_waitqueue *wq) ++ * @brief Initialize a RTDM wait queue ++ * ++ * Sets up a wait queue structure for further use. ++ * ++ * @param wq waitqueue to initialize. ++ * ++ * @coretags{task-unrestricted} ++ */ ++void rtdm_waitqueue_init(struct rtdm_waitqueue *wq); ++ ++/** ++ * @fn void rtdm_waitqueue_destroy(struct rtdm_waitqueue *wq) ++ * @brief Deletes a RTDM wait queue ++ * ++ * Dismantles a wait queue structure, releasing all resources attached ++ * to it. ++ * ++ * @param wq waitqueue to delete. ++ * ++ * @coretags{task-unrestricted} ++ */ ++void rtdm_waitqueue_destroy(struct rtdm_waitqueue *wq); ++ ++/** ++ * @fn rtdm_timedwait_condition_locked(struct rtdm_wait_queue *wq, C_expr condition, nanosecs_rel_t timeout, rtdm_toseq_t *toseq) ++ * @brief Timed sleep on a locked waitqueue until a condition gets true ++ * ++ * The calling task is put to sleep until @a condition evaluates to ++ * true or a timeout occurs. The condition is checked each time the ++ * waitqueue @a wq is signaled. ++ * ++ * The waitqueue must have been locked by a call to ++ * rtdm_waitqueue_lock() prior to calling this service. ++ * ++ * @param wq locked waitqueue to wait on. The waitqueue lock is ++ * dropped when sleeping, then reacquired before this service returns ++ * to the caller. ++ * ++ * @param condition C expression for the event to wait for. ++ * ++ * @param timeout relative timeout in nanoseconds, see ++ * @ref RTDM_TIMEOUT_xxx for special values. ++ * ++ * @param[in,out] toseq handle of a timeout sequence as returned by ++ * rtdm_toseq_init() or NULL. ++ * ++ * @return 0 on success, otherwise: ++ * ++ * - -EINTR is returned if calling task has received a Linux signal or ++ * has been forcibly unblocked by a call to rtdm_task_unblock(). ++ * ++ * - -ETIMEDOUT is returned if the if the request has not been satisfied ++ * within the specified amount of time. ++ * ++ * @note rtdm_waitqueue_signal() has to be called after changing any ++ * variable that could change the result of the wait condition. ++ * ++ * @note Passing RTDM_TIMEOUT_NONE to @a timeout makes no sense for ++ * such service, and might cause unexpected behavior. ++ * ++ * @coretags{primary-only, might-switch} ++ */ ++rtdm_timedwait_condition_locked(struct rtdm_wait_queue *wq, C_expr condition, ++ nanosecs_rel_t timeout, rtdm_toseq_t *toseq); ++ ++/** ++ * @fn rtdm_wait_condition_locked(struct rtdm_wait_queue *wq, C_expr condition) ++ * @brief Sleep on a locked waitqueue until a condition gets true ++ * ++ * The calling task is put to sleep until @a condition evaluates to ++ * true. The condition is checked each time the waitqueue @a wq is ++ * signaled. ++ * ++ * The waitqueue must have been locked by a call to ++ * rtdm_waitqueue_lock() prior to calling this service. ++ * ++ * @param wq locked waitqueue to wait on. The waitqueue lock is ++ * dropped when sleeping, then reacquired before this service returns ++ * to the caller. ++ * ++ * @param condition C expression for the event to wait for. ++ * ++ * @return 0 on success, otherwise: ++ * ++ * - -EINTR is returned if calling task has received a Linux signal or ++ * has been forcibly unblocked by a call to rtdm_task_unblock(). ++ * ++ * @note rtdm_waitqueue_signal() has to be called after changing any ++ * variable that could change the result of the wait condition. ++ * ++ * @coretags{primary-only, might-switch} ++ */ ++rtdm_wait_condition_locked(struct rtdm_wait_queue *wq, C_expr condition); ++ ++/** ++ * @fn rtdm_timedwait_condition(struct rtdm_wait_queue *wq, C_expr condition, nanosecs_rel_t timeout, rtdm_toseq_t *toseq) ++ * @brief Timed sleep on a waitqueue until a condition gets true ++ * ++ * The calling task is put to sleep until @a condition evaluates to ++ * true or a timeout occurs. The condition is checked each time the ++ * waitqueue @a wq is signaled. ++ * ++ * @param wq waitqueue to wait on. ++ * ++ * @param condition C expression for the event to wait for. ++ * ++ * @param timeout relative timeout in nanoseconds, see ++ * @ref RTDM_TIMEOUT_xxx for special values. ++ * ++ * @param[in,out] toseq handle of a timeout sequence as returned by ++ * rtdm_toseq_init() or NULL. ++ * ++ * @return 0 on success, otherwise: ++ * ++ * - -EINTR is returned if calling task has received a Linux signal or ++ * has been forcibly unblocked by a call to rtdm_task_unblock(). ++ * ++ * - -ETIMEDOUT is returned if the if the request has not been satisfied ++ * within the specified amount of time. ++ * ++ * @note rtdm_waitqueue_signal() has to be called after changing any ++ * variable that could change the result of the wait condition. ++ * ++ * @note Passing RTDM_TIMEOUT_NONE to @a timeout makes no sense for ++ * such service, and might cause unexpected behavior. ++ * ++ * @coretags{primary-only, might-switch} ++ */ ++rtdm_timedwait_condition(struct rtdm_wait_queue *wq, C_expr condition, ++ nanosecs_rel_t timeout, rtdm_toseq_t *toseq); ++ ++/** ++ * @fn void rtdm_timedwait(struct rtdm_wait_queue *wq, nanosecs_rel_t timeout, rtdm_toseq_t *toseq) ++ * @brief Timed sleep on a waitqueue unconditionally ++ * ++ * The calling task is put to sleep until the waitqueue is signaled by ++ * either rtdm_waitqueue_signal() or rtdm_waitqueue_broadcast(), or ++ * flushed by a call to rtdm_waitqueue_flush(), or a timeout occurs. ++ * ++ * @param wq waitqueue to wait on. ++ * ++ * @param timeout relative timeout in nanoseconds, see ++ * @ref RTDM_TIMEOUT_xxx for special values. ++ * ++ * @param[in,out] toseq handle of a timeout sequence as returned by ++ * rtdm_toseq_init() or NULL. ++ * ++ * @return 0 on success, otherwise: ++ * ++ * - -EINTR is returned if the waitqueue has been flushed, or the ++ * calling task has received a Linux signal or has been forcibly ++ * unblocked by a call to rtdm_task_unblock(). ++ * ++ * - -ETIMEDOUT is returned if the if the request has not been satisfied ++ * within the specified amount of time. ++ * ++ * @note Passing RTDM_TIMEOUT_NONE to @a timeout makes no sense for ++ * such service, and might cause unexpected behavior. ++ * ++ * @coretags{primary-only, might-switch} ++ */ ++void rtdm_timedwait(struct rtdm_wait_queue *wq, ++ nanosecs_rel_t timeout, rtdm_toseq_t *toseq); ++ ++/** ++ * @fn void rtdm_timedwait_locked(struct rtdm_wait_queue *wq, nanosecs_rel_t timeout, rtdm_toseq_t *toseq) ++ * @brief Timed sleep on a locked waitqueue unconditionally ++ * ++ * The calling task is put to sleep until the waitqueue is signaled by ++ * either rtdm_waitqueue_signal() or rtdm_waitqueue_broadcast(), or ++ * flushed by a call to rtdm_waitqueue_flush(), or a timeout occurs. ++ * ++ * The waitqueue must have been locked by a call to ++ * rtdm_waitqueue_lock() prior to calling this service. ++ * ++ * @param wq locked waitqueue to wait on. The waitqueue lock is ++ * dropped when sleeping, then reacquired before this service returns ++ * to the caller. ++ * ++ * @param timeout relative timeout in nanoseconds, see ++ * @ref RTDM_TIMEOUT_xxx for special values. ++ * ++ * @param[in,out] toseq handle of a timeout sequence as returned by ++ * rtdm_toseq_init() or NULL. ++ * ++ * @return 0 on success, otherwise: ++ * ++ * - -EINTR is returned if the waitqueue has been flushed, or the ++ * calling task has received a Linux signal or has been forcibly ++ * unblocked by a call to rtdm_task_unblock(). ++ * ++ * - -ETIMEDOUT is returned if the if the request has not been satisfied ++ * within the specified amount of time. ++ * ++ * @note Passing RTDM_TIMEOUT_NONE to @a timeout makes no sense for ++ * such service, and might cause unexpected behavior. ++ * ++ * @coretags{primary-only, might-switch} ++ */ ++void rtdm_timedwait_locked(struct rtdm_wait_queue *wq, ++ nanosecs_rel_t timeout, rtdm_toseq_t *toseq); ++ ++/** ++ * @fn rtdm_wait_condition(struct rtdm_wait_queue *wq, C_expr condition) ++ * @brief Sleep on a waitqueue until a condition gets true ++ * ++ * The calling task is put to sleep until @a condition evaluates to ++ * true. The condition is checked each time the waitqueue @a wq is ++ * signaled. ++ * ++ * @param wq waitqueue to wait on ++ * ++ * @param condition C expression for the event to wait for. ++ * ++ * @return 0 on success, otherwise: ++ * ++ * - -EINTR is returned if calling task has received a Linux signal or ++ * has been forcibly unblocked by a call to rtdm_task_unblock(). ++ * ++ * @note rtdm_waitqueue_signal() has to be called after changing any ++ * variable that could change the result of the wait condition. ++ * ++ * @coretags{primary-only, might-switch} ++ */ ++rtdm_wait_condition(struct rtdm_wait_queue *wq, C_expr condition); ++ ++/** ++ * @fn void rtdm_wait(struct rtdm_wait_queue *wq) ++ * @brief Sleep on a waitqueue unconditionally ++ * ++ * The calling task is put to sleep until the waitqueue is signaled by ++ * either rtdm_waitqueue_signal() or rtdm_waitqueue_broadcast(), or ++ * flushed by a call to rtdm_waitqueue_flush(). ++ * ++ * @param wq waitqueue to wait on. ++ * ++ * @return 0 on success, otherwise: ++ * ++ * - -EINTR is returned if the waitqueue has been flushed, or the ++ * calling task has received a Linux signal or has been forcibly ++ * unblocked by a call to rtdm_task_unblock(). ++ * ++ * @coretags{primary-only, might-switch} ++ */ ++void rtdm_wait(struct rtdm_wait_queue *wq); ++ ++/** ++ * @fn void rtdm_wait_locked(struct rtdm_wait_queue *wq) ++ * @brief Sleep on a locked waitqueue unconditionally ++ * ++ * The calling task is put to sleep until the waitqueue is signaled by ++ * either rtdm_waitqueue_signal() or rtdm_waitqueue_broadcast(), or ++ * flushed by a call to rtdm_waitqueue_flush(). ++ * ++ * The waitqueue must have been locked by a call to ++ * rtdm_waitqueue_lock() prior to calling this service. ++ * ++ * @param wq locked waitqueue to wait on. The waitqueue lock is ++ * dropped when sleeping, then reacquired before this service returns ++ * to the caller. ++ * ++ * @return 0 on success, otherwise: ++ * ++ * - -EINTR is returned if the waitqueue has been flushed, or the ++ * calling task has received a Linux signal or has been forcibly ++ * unblocked by a call to rtdm_task_unblock(). ++ * ++ * @coretags{primary-only, might-switch} ++ */ ++void rtdm_wait_locked(struct rtdm_wait_queue *wq); ++ ++/** ++ * @fn void rtdm_waitqueue_lock(struct rtdm_wait_queue *wq, rtdm_lockctx_t context) ++ * @brief Lock a waitqueue ++ * ++ * Acquires the lock on the waitqueue @a wq. ++ * ++ * @param wq waitqueue to lock. ++ * ++ * @param context name of local variable to store the context in. ++ * ++ * @note Recursive locking might lead to unexpected behavior, ++ * including lock up. ++ * ++ * @coretags{unrestricted} ++ */ ++void rtdm_waitqueue_lock(struct rtdm_wait_queue *wq, rtdm_lockctx_t context); ++ ++/** ++ * @fn void rtdm_waitqueue_unlock(struct rtdm_wait_queue *wq, rtdm_lockctx_t context) ++ * @brief Unlock a waitqueue ++ * ++ * Releases the lock on the waitqueue @a wq. ++ * ++ * @param wq waitqueue to unlock. ++ * ++ * @param context name of local variable to retrieve the context from. ++ * ++ * @coretags{unrestricted} ++ */ ++void rtdm_waitqueue_unlock(struct rtdm_wait_queue *wq, rtdm_lockctx_t context); ++ ++/** ++ * @fn void rtdm_waitqueue_signal(struct rtdm_wait_queue *wq) ++ * @brief Signal a waitqueue ++ * ++ * Signals the waitqueue @a wq, waking up a single waiter (if ++ * any). ++ * ++ * @param wq waitqueue to signal. ++ * ++ * @return non-zero if a task has been readied as a result of this ++ * call, zero otherwise. ++ * ++ * @coretags{unrestricted, might-switch} ++ */ ++void rtdm_waitqueue_signal(struct rtdm_wait_queue *wq); ++ ++/** ++ * @fn void rtdm_waitqueue_broadcast(struct rtdm_wait_queue *wq) ++ * @brief Broadcast a waitqueue ++ * ++ * Broadcast the waitqueue @a wq, waking up all waiters. Each ++ * readied task may assume to have received the wake up event. ++ * ++ * @param wq waitqueue to broadcast. ++ * ++ * @return non-zero if at least one task has been readied as a result ++ * of this call, zero otherwise. ++ * ++ * @coretags{unrestricted, might-switch} ++ */ ++void rtdm_waitqueue_broadcast(struct rtdm_wait_queue *wq); ++ ++/** ++ * @fn void rtdm_waitqueue_flush(struct rtdm_wait_queue *wq) ++ * @brief Flush a waitqueue ++ * ++ * Flushes the waitqueue @a wq, unblocking all waiters with an error ++ * status (-EINTR). ++ * ++ * @param wq waitqueue to flush. ++ * ++ * @return non-zero if at least one task has been readied as a result ++ * of this call, zero otherwise. ++ * ++ * @coretags{unrestricted, might-switch} ++ */ ++void rtdm_waitqueue_flush(struct rtdm_wait_queue *wq); ++ ++/** ++ * @fn void rtdm_waitqueue_wakeup(struct rtdm_wait_queue *wq, rtdm_task_t waiter) ++ * @brief Signal a particular waiter on a waitqueue ++ * ++ * Signals the waitqueue @a wq, waking up waiter @a waiter only, ++ * which must be currently sleeping on the waitqueue. ++ * ++ * @param wq waitqueue to signal. ++ * ++ * @param waiter RTDM task to wake up. ++ * ++ * @coretags{unrestricted, might-switch} ++ */ ++void rtdm_waitqueue_wakeup(struct rtdm_wait_queue *wq, rtdm_task_t waiter); ++ ++/** ++ * @fn rtdm_for_each_waiter(rtdm_task_t pos, struct rtdm_wait_queue *wq) ++ * @brief Simple iterator for waitqueues ++ * ++ * This construct traverses the wait list of a given waitqueue ++ * @a wq, assigning each RTDM task pointer to the cursor variable ++ * @a pos, which must be of type rtdm_task_t. ++ * ++ * @a wq must have been locked by a call to rtdm_waitqueue_lock() ++ * prior to traversing its wait list. ++ * ++ * @param pos cursor variable holding a pointer to the RTDM task ++ * being fetched. ++ * ++ * @param wq waitqueue to scan. ++ * ++ * @note The waitqueue should not be signaled, broadcast or flushed ++ * during the traversal, unless the loop is aborted immediately ++ * after. Should multiple waiters be readied while iterating, the safe ++ * form rtdm_for_each_waiter_safe() must be used for traversal ++ * instead. ++ * ++ * @coretags{unrestricted} ++ */ ++rtdm_for_each_waiter(rtdm_task_t pos, struct rtdm_wait_queue *wq); ++ ++/** ++ * @fn rtdm_for_each_waiter_safe(rtdm_task_t pos, rtdm_task_t tmp, struct rtdm_wait_queue *wq) ++ * @brief Safe iterator for waitqueues ++ * ++ * This construct traverses the wait list of a given waitqueue ++ * @a wq, assigning each RTDM task pointer to the cursor variable ++ * @a pos, which must be of type rtdm_task_t. ++ * ++ * Unlike with rtdm_for_each_waiter(), the waitqueue may be signaled, ++ * broadcast or flushed during the traversal. ++ * ++ * @a wq must have been locked by a call to rtdm_waitqueue_lock() ++ * prior to traversing its wait list. ++ * ++ * @param pos cursor variable holding a pointer to the RTDM task ++ * being fetched. ++ * ++ * @param tmp temporary cursor variable. ++ * ++ * @param wq waitqueue to scan. ++ * ++ * @coretags{unrestricted} ++ */ ++rtdm_for_each_waiter_safe(rtdm_task_t pos, rtdm_task_t tmp, struct rtdm_wait_queue *wq); ++ ++/** @} rtdm_sync */ ++ ++/** ++ * @defgroup rtdm_interdriver_api Driver to driver services ++ * Inter-driver interface ++ *@{ ++ */ ++ ++/** ++ * @brief Open a device ++ * ++ * Refer to rtdm_open() for parameters and return values ++ * ++ * @coretags{secondary-only, might-switch} ++ */ ++int rtdm_open(const char *path, int oflag, ...); ++ ++/** ++ * @brief Create a socket ++ * ++ * Refer to rtdm_socket() for parameters and return values. Action ++ * depends on driver implementation, see @ref rtdm_profiles ++ * "Device Profiles". ++ * ++ * @coretags{secondary-only, might-switch} ++ */ ++int rtdm_socket(int protocol_family, int socket_type, int protocol); ++ ++/** ++ * @brief Close a device or socket ++ * ++ * Refer to rtdm_close() for parameters and return values. Action ++ * depends on driver implementation, see @ref rtdm_profiles ++ * "Device Profiles". ++ * ++ * @coretags{secondary-only, might-switch} ++ */ ++int rtdm_close(int fd); ++ ++/** ++ * @brief Issue an IOCTL ++ * ++ * Refer to rtdm_ioctl() for parameters and return values. Action ++ * depends on driver implementation, see @ref rtdm_profiles ++ * "Device Profiles". ++ * ++ * @coretags{task-unrestricted, might-switch} ++ */ ++int rtdm_ioctl(int fd, int request, ...); ++ ++/** ++ * @brief Read from device ++ * ++ * Refer to rtdm_read() for parameters and return values. Action ++ * depends on driver implementation, see @ref rtdm_profiles ++ * "Device Profiles". ++ * ++ * @coretags{mode-unrestricted, might-switch} ++ */ ++ssize_t rtdm_read(int fd, void *buf, size_t nbyte); ++ ++/** ++ * @brief Write to device ++ * ++ * Refer to rtdm_write() for parameters and return values. Action ++ * depends on driver implementation, see @ref rtdm_profiles ++ * "Device Profiles". ++ * ++ * @coretags{mode-unrestricted, might-switch} ++ */ ++ssize_t rtdm_write(int fd, const void *buf, size_t nbyte); ++ ++/** ++ * @brief Receive message from socket ++ * ++ * Refer to rtdm_recvmsg() for parameters and return values. Action ++ * depends on driver implementation, see @ref rtdm_profiles ++ * "Device Profiles". ++ * ++ * @coretags{mode-unrestricted, might-switch} ++ */ ++ssize_t rtdm_recvmsg(int fd, struct user_msghdr *msg, int flags); ++ ++/** ++ * @brief Receive message from socket ++ * ++ * Refer to rtdm_recvfrom() for parameters and return values. Action ++ * depends on driver implementation, see @ref rtdm_profiles ++ * "Device Profiles". ++ * ++ * @coretags{mode-unrestricted, might-switch} ++ */ ++ssize_t rtdm_recvfrom(int fd, void *buf, size_t len, int flags, ++ struct sockaddr *from, socklen_t *fromlen); ++ ++/** ++ * @brief Receive message from socket ++ * ++ * Refer to rtdm_recv() for parameters and return values. Action ++ * depends on driver implementation, see @ref rtdm_profiles ++ * "Device Profiles". ++ * ++ * @coretags{mode-unrestricted, might-switch} ++ */ ++ssize_t rtdm_recv(int fd, void *buf, size_t len, int flags); ++ ++/** ++ * @brief Transmit message to socket ++ * ++ * Refer to rtdm_sendmsg() for parameters and return values. Action ++ * depends on driver implementation, see @ref rtdm_profiles ++ * "Device Profiles". ++ * ++ * @coretags{mode-unrestricted, might-switch} ++ */ ++ssize_t rtdm_sendmsg(int fd, const struct user_msghdr *msg, int flags); ++ ++/** ++ * @brief Transmit message to socket ++ * ++ * Refer to rtdm_sendto() for parameters and return values. Action ++ * depends on driver implementation, see @ref rtdm_profiles ++ * "Device Profiles". ++ * ++ * @coretags{mode-unrestricted, might-switch} ++ */ ++ssize_t rtdm_sendto(int fd, const void *buf, size_t len, int flags, ++ const struct sockaddr *to, socklen_t tolen); ++ ++/** ++ * @brief Transmit message to socket ++ * ++ * Refer to rtdm_send() for parameters and return values. Action ++ * depends on driver implementation, see @ref rtdm_profiles ++ * "Device Profiles". ++ * ++ * @coretags{mode-unrestricted, might-switch} ++ */ ++ssize_t rtdm_send(int fd, const void *buf, size_t len, int flags); ++ ++/** ++ * @brief Bind to local address ++ * ++ * Refer to rtdm_bind() for parameters and return values. Action ++ * depends on driver implementation, see @ref rtdm_profiles ++ * "Device Profiles". ++ * ++ * @coretags{task-unrestricted, might-switch} ++ */ ++int rtdm_bind(int fd, const struct sockaddr *my_addr, socklen_t addrlen); ++ ++/** ++ * @brief Connect to remote address ++ * ++ * Refer to rtdm_connect() for parameters and return values. Action ++ * depends on driver implementation, see @ref rtdm_profiles ++ * "Device Profiles". ++ * ++ * @coretags{mode-unrestricted, might-switch} ++ */ ++int rtdm_connect(int fd, const struct sockaddr *serv_addr, socklen_t addrlen); ++ ++/** ++ * @brief Listen to incoming connection requests ++ * ++ * Refer to rtdm_listen() for parameters and return values. Action ++ * depends on driver implementation, see @ref rtdm_profiles ++ * "Device Profiles". ++ * ++ * @coretags{task-unrestricted, might-switch} ++ */ ++int rtdm_listen(int fd, int backlog); ++ ++/** ++ * @brief Accept a connection request ++ * ++ * Refer to rtdm_accept() for parameters and return values. Action ++ * depends on driver implementation, see @ref rtdm_profiles ++ * "Device Profiles". ++ * ++ * @coretags{mode-unrestricted, might-switch} ++ */ ++int rtdm_accept(int fd, struct sockaddr *addr, socklen_t *addrlen); ++ ++/** ++ * @brief Shut down parts of a connection ++ * ++ * Refer to rtdm_shutdown() for parameters and return values. Action ++ * depends on driver implementation, see @ref rtdm_profiles ++ * "Device Profiles". ++ * ++ * @coretags{secondary-only, might-switch} ++ */ ++int rtdm_shutdown(int fd, int how); ++ ++/** ++ * @brief Get socket option ++ * ++ * Refer to rtdm_getsockopt() for parameters and return values. Action ++ * depends on driver implementation, see @ref rtdm_profiles ++ * "Device Profiles". ++ * ++ * @coretags{task-unrestricted, might-switch} ++ */ ++int rtdm_getsockopt(int fd, int level, int optname, void *optval, ++ socklen_t *optlen); ++ ++/** ++ * @brief Set socket option ++ * ++ * Refer to rtdm_setsockopt() for parameters and return values. Action ++ * depends on driver implementation, see @ref rtdm_profiles ++ * "Device Profiles". ++ * ++ * @coretags{task-unrestricted, might-switch} ++ */ ++int rtdm_setsockopt(int fd, int level, int optname, const void *optval, ++ socklen_t optlen); ++ ++/** ++ * @brief Get local socket address ++ * ++ * Refer to rtdm_getsockname() for parameters and return values. Action ++ * depends on driver implementation, see @ref rtdm_profiles ++ * "Device Profiles". ++ * ++ * @coretags{task-unrestricted, might-switch} ++ */ ++int rtdm_getsockname(int fd, struct sockaddr *name, socklen_t *namelen); ++ ++/** ++ * @brief Get socket destination address ++ * ++ * Refer to rtdm_getpeername() for parameters and return values. Action ++ * depends on driver implementation, see @ref rtdm_profiles ++ * "Device Profiles". ++ * ++ * @coretags{task-unrestricted, might-switch} ++ */ ++int rtdm_getpeername(int fd, struct sockaddr *name, socklen_t *namelen); ++ ++/** @} Inter-driver calls */ ++ ++/** @} */ ++ ++/*! ++ * @addtogroup rtdm_user_api ++ * @{ ++ */ ++ ++/** ++ * @brief Open a device ++ * ++ * @param[in] path Device name ++ * @param[in] oflag Open flags ++ * @param ... Further parameters will be ignored. ++ * ++ * @return Positive file descriptor value on success, otherwise a negative ++ * error code. ++ * ++ * Action depends on driver implementation, see @ref rtdm_profiles ++ * "Device Profiles". ++ * ++ * @see @c open() in IEEE Std 1003.1, ++ * http://www.opengroup.org/onlinepubs/009695399 ++ * ++ * @coretags{secondary-only, might-switch} ++ */ ++int rtdm_open(const char *path, int oflag, ...); ++ ++/** ++ * @brief Create a socket ++ * ++ * @param[in] protocol_family Protocol family (@c PF_xxx) ++ * @param[in] socket_type Socket type (@c SOCK_xxx) ++ * @param[in] protocol Protocol ID, 0 for default ++ * ++ * @return Positive file descriptor value on success, otherwise a negative ++ * error code. ++ * ++ * Action depends on driver implementation, see @ref rtdm_profiles ++ * "Device Profiles". ++ * ++ * @see @c socket() in IEEE Std 1003.1, ++ * http://www.opengroup.org/onlinepubs/009695399 ++ * ++ * @coretags{secondary-only, might-switch} ++ */ ++int rtdm_socket(int protocol_family, int socket_type, int protocol); ++ ++/** ++ * @brief Close a device or socket ++ * ++ * @param[in] fd File descriptor as returned by rtdm_open() or rtdm_socket() ++ * ++ * @return 0 on success, otherwise a negative error code. ++ * ++ * @note If the matching rtdm_open() or rtdm_socket() call took place in ++ * non-real-time context, rtdm_close() must be issued within non-real-time ++ * as well. Otherwise, the call will fail. ++ * ++ * Action depends on driver implementation, see @ref rtdm_profiles ++ * "Device Profiles". ++ * ++ * @see @c close() in IEEE Std 1003.1, ++ * http://www.opengroup.org/onlinepubs/009695399 ++ * ++ * @coretags{secondary-only, might-switch} ++ */ ++int rtdm_close(int fd); ++ ++/** ++ * @brief Issue an IOCTL ++ * ++ * @param[in] fd File descriptor as returned by rtdm_open() or rtdm_socket() ++ * @param[in] request IOCTL code ++ * @param ... Optional third argument, depending on IOCTL function ++ * (@c void @c * or @c unsigned @c long) ++ * ++ * @return Positiv value on success, otherwise negative error code ++ * ++ * Action depends on driver implementation, see @ref rtdm_profiles ++ * "Device Profiles". ++ * ++ * @see @c ioctl() in IEEE Std 1003.1, ++ * http://www.opengroup.org/onlinepubs/009695399 ++ * ++ * @coretags{task-unrestricted, might-switch} ++ */ ++int rtdm_ioctl(int fd, int request, ...); ++ ++/** ++ * @brief Read from device ++ * ++ * @param[in] fd File descriptor as returned by rtdm_open() ++ * @param[out] buf Input buffer ++ * @param[in] nbyte Number of bytes to read ++ * ++ * @return Number of bytes read, otherwise negative error code ++ * ++ * Action depends on driver implementation, see @ref rtdm_profiles ++ * "Device Profiles". ++ * ++ * @see @c read() in IEEE Std 1003.1, ++ * http://www.opengroup.org/onlinepubs/009695399 ++ * ++ * @coretags{mode-unrestricted, might-switch} ++ */ ++ssize_t rtdm_read(int fd, void *buf, size_t nbyte); ++ ++/** ++ * @brief Write to device ++ * ++ * @param[in] fd File descriptor as returned by rtdm_open() ++ * @param[in] buf Output buffer ++ * @param[in] nbyte Number of bytes to write ++ * ++ * @return Number of bytes written, otherwise negative error code ++ * ++ * Action depends on driver implementation, see @ref rtdm_profiles ++ * "Device Profiles". ++ * ++ * @see @c write() in IEEE Std 1003.1, ++ * http://www.opengroup.org/onlinepubs/009695399 ++ * ++ * @coretags{mode-unrestricted, might-switch} ++ */ ++ssize_t rtdm_write(int fd, const void *buf, size_t nbyte); ++ ++/** ++ * @brief Receive message from socket ++ * ++ * @param[in] fd File descriptor as returned by rtdm_socket() ++ * @param[in,out] msg Message descriptor ++ * @param[in] flags Message flags ++ * ++ * @return Number of bytes received, otherwise negative error code ++ * ++ * Action depends on driver implementation, see @ref rtdm_profiles ++ * "Device Profiles". ++ * ++ * @see @c recvmsg() in IEEE Std 1003.1, ++ * http://www.opengroup.org/onlinepubs/009695399 ++ * ++ * @coretags{mode-unrestricted, might-switch} ++ */ ++ssize_t rtdm_recvmsg(int fd, struct user_msghdr *msg, int flags); ++ ++/** ++ * @brief Receive message from socket ++ * ++ * @param[in] fd File descriptor as returned by rtdm_socket() ++ * @param[out] buf Message buffer ++ * @param[in] len Message buffer size ++ * @param[in] flags Message flags ++ * @param[out] from Buffer for message sender address ++ * @param[in,out] fromlen Address buffer size ++ * ++ * @return Number of bytes received, otherwise negative error code ++ * ++ * Action depends on driver implementation, see @ref rtdm_profiles ++ * "Device Profiles". ++ * ++ * @see @c recvfrom() in IEEE Std 1003.1, ++ * http://www.opengroup.org/onlinepubs/009695399 ++ * ++ * @coretags{mode-unrestricted, might-switch} ++ */ ++ssize_t rtdm_recvfrom(int fd, void *buf, size_t len, int flags, ++ struct sockaddr *from, socklen_t *fromlen); ++ ++/** ++ * @brief Receive message from socket ++ * ++ * @param[in] fd File descriptor as returned by rtdm_socket() ++ * @param[out] buf Message buffer ++ * @param[in] len Message buffer size ++ * @param[in] flags Message flags ++ * ++ * @return Number of bytes received, otherwise negative error code ++ * ++ * Action depends on driver implementation, see @ref rtdm_profiles ++ * "Device Profiles". ++ * ++ * @see @c recv() in IEEE Std 1003.1, ++ * http://www.opengroup.org/onlinepubs/009695399 ++ * ++ * @coretags{mode-unrestricted, might-switch} ++ */ ++ssize_t rtdm_recv(int fd, void *buf, size_t len, int flags); ++ ++/** ++ * @brief Transmit message to socket ++ * ++ * @param[in] fd File descriptor as returned by rtdm_socket() ++ * @param[in] msg Message descriptor ++ * @param[in] flags Message flags ++ * ++ * @return Number of bytes sent, otherwise negative error code ++ * ++ * Action depends on driver implementation, see @ref rtdm_profiles ++ * "Device Profiles". ++ * ++ * @see @c sendmsg() in IEEE Std 1003.1, ++ * http://www.opengroup.org/onlinepubs/009695399 ++ * ++ * @coretags{mode-unrestricted, might-switch} ++ */ ++ssize_t rtdm_sendmsg(int fd, const struct user_msghdr *msg, int flags); ++ ++/** ++ * @brief Transmit message to socket ++ * ++ * @param[in] fd File descriptor as returned by rtdm_socket() ++ * @param[in] buf Message buffer ++ * @param[in] len Message buffer size ++ * @param[in] flags Message flags ++ * @param[in] to Buffer for message destination address ++ * @param[in] tolen Address buffer size ++ * ++ * @return Number of bytes sent, otherwise negative error code ++ * ++ * Action depends on driver implementation, see @ref rtdm_profiles ++ * "Device Profiles". ++ * ++ * @see @c sendto() in IEEE Std 1003.1, ++ * http://www.opengroup.org/onlinepubs/009695399 ++ * ++ * @coretags{mode-unrestricted, might-switch} ++ */ ++ssize_t rtdm_sendto(int fd, const void *buf, size_t len, int flags, ++ const struct sockaddr *to, socklen_t tolen); ++ ++/** ++ * @brief Transmit message to socket ++ * ++ * @param[in] fd File descriptor as returned by rtdm_socket() ++ * @param[in] buf Message buffer ++ * @param[in] len Message buffer size ++ * @param[in] flags Message flags ++ * ++ * @return Number of bytes sent, otherwise negative error code ++ * ++ * Action depends on driver implementation, see @ref rtdm_profiles ++ * "Device Profiles". ++ * ++ * @see @c send() in IEEE Std 1003.1, ++ * http://www.opengroup.org/onlinepubs/009695399 ++ * ++ * @coretags{mode-unrestricted, might-switch} ++ */ ++ssize_t rtdm_send(int fd, const void *buf, size_t len, int flags); ++ ++/** ++ * @brief Bind to local address ++ * ++ * @param[in] fd File descriptor as returned by rtdm_socket() ++ * @param[in] my_addr Address buffer ++ * @param[in] addrlen Address buffer size ++ * ++ * @return 0 on success, otherwise negative error code ++ * ++ * Action depends on driver implementation, see @ref rtdm_profiles ++ * "Device Profiles". ++ * ++ * @see @c bind() in IEEE Std 1003.1, ++ * http://www.opengroup.org/onlinepubs/009695399 ++ * ++ * @coretags{mode-unrestricted, might-switch} ++ */ ++int rtdm_bind(int fd, const struct sockaddr *my_addr, socklen_t addrlen); ++ ++/** ++ * @brief Connect to remote address ++ * ++ * @param[in] fd File descriptor as returned by rtdm_socket() ++ * @param[in] serv_addr Address buffer ++ * @param[in] addrlen Address buffer size ++ * ++ * @return 0 on success, otherwise negative error code ++ * ++ * Action depends on driver implementation, see @ref rtdm_profiles ++ * "Device Profiles". ++ * ++ * @see @c connect() in IEEE Std 1003.1, ++ * http://www.opengroup.org/onlinepubs/009695399 ++ * ++ * @coretags{mode-unrestricted, might-switch} ++ */ ++int rtdm_connect(int fd, const struct sockaddr *serv_addr, ++ socklen_t addrlen); ++ ++/** ++ * @brief Listen for incomming connection requests ++ * ++ * @param[in] fd File descriptor as returned by rtdm_socket() ++ * @param[in] backlog Maximum queue length ++ * ++ * @return 0 on success, otherwise negative error code ++ * ++ * Action depends on driver implementation, see @ref rtdm_profiles ++ * "Device Profiles". ++ * ++ * @see @c listen() in IEEE Std 1003.1, ++ * http://www.opengroup.org/onlinepubs/009695399 ++ * ++ * @coretags{task-unrestricted, might-switch} ++ */ ++int rtdm_listen(int fd, int backlog); ++ ++/** ++ * @brief Accept connection requests ++ * ++ * @param[in] fd File descriptor as returned by rtdm_socket() ++ * @param[out] addr Buffer for remote address ++ * @param[in,out] addrlen Address buffer size ++ * ++ * @return 0 on success, otherwise negative error code ++ * ++ * Action depends on driver implementation, see @ref rtdm_profiles ++ * "Device Profiles". ++ * ++ * @see @c accept() in IEEE Std 1003.1, ++ * http://www.opengroup.org/onlinepubs/009695399 ++ * ++ * @coretags{mode-unrestricted, might-switch} ++ */ ++int rtdm_accept(int fd, struct sockaddr *addr, socklen_t *addrlen); ++ ++/** ++ * @brief Shut down parts of a connection ++ * ++ * @param[in] fd File descriptor as returned by rtdm_socket() ++ * @param[in] how Specifies the part to be shut down (@c SHUT_xxx) ++* ++ * @return 0 on success, otherwise negative error code ++ * ++ * Action depends on driver implementation, see @ref rtdm_profiles ++ * "Device Profiles". ++ * ++ * @see @c shutdown() in IEEE Std 1003.1, ++ * http://www.opengroup.org/onlinepubs/009695399 ++ * ++ * @coretags{secondary-only, might-switch} ++ */ ++int rtdm_shutdown(int fd, int how); ++ ++/** ++ * @brief Get socket option ++ * ++ * @param[in] fd File descriptor as returned by rtdm_socket() ++ * @param[in] level Addressed stack level ++ * @param[in] optname Option name ID ++ * @param[out] optval Value buffer ++ * @param[in,out] optlen Value buffer size ++ * ++ * @return 0 on success, otherwise negative error code ++ * ++ * Action depends on driver implementation, see @ref rtdm_profiles ++ * "Device Profiles". ++ * ++ * @see @c getsockopt() in IEEE Std 1003.1, ++ * http://www.opengroup.org/onlinepubs/009695399 ++ * ++ * @coretags{task-unrestricted, might-switch} ++ */ ++int rtdm_getsockopt(int fd, int level, int optname, void *optval, ++ socklen_t *optlen); ++ ++/** ++ * @brief Set socket option ++ * ++ * @param[in] fd File descriptor as returned by rtdm_socket() ++ * @param[in] level Addressed stack level ++ * @param[in] optname Option name ID ++ * @param[in] optval Value buffer ++ * @param[in] optlen Value buffer size ++ * ++ * @return 0 on success, otherwise negative error code ++ * ++ * Action depends on driver implementation, see @ref rtdm_profiles ++ * "Device Profiles". ++ * ++ * @see @c setsockopt() in IEEE Std 1003.1, ++ * http://www.opengroup.org/onlinepubs/009695399 ++ * ++ * @coretags{task-unrestricted, might-switch} ++ */ ++int rtdm_setsockopt(int fd, int level, int optname, const void *optval, ++ socklen_t optlen); ++ ++/** ++ * @brief Get local socket address ++ * ++ * @param[in] fd File descriptor as returned by rtdm_socket() ++ * @param[out] name Address buffer ++ * @param[in,out] namelen Address buffer size ++ * ++ * @return 0 on success, otherwise negative error code ++ * ++ * Action depends on driver implementation, see @ref rtdm_profiles ++ * "Device Profiles". ++ * ++ * @see @c getsockname() in IEEE Std 1003.1, ++ * http://www.opengroup.org/onlinepubs/009695399 ++ * ++ * @coretags{task-unrestricted, might-switch} ++ */ ++int rtdm_getsockname(int fd, struct sockaddr *name, socklen_t *namelen); ++ ++/** ++ * @brief Get socket destination address ++ * ++ * @param[in] fd File descriptor as returned by rtdm_socket() ++ * @param[out] name Address buffer ++ * @param[in,out] namelen Address buffer size ++ * ++ * @return 0 on success, otherwise negative error code ++ * ++ * Action depends on driver implementation, see @ref rtdm_profiles ++ * "Device Profiles". ++ * ++ * @see @c getpeername() in IEEE Std 1003.1, ++ * http://www.opengroup.org/onlinepubs/009695399 ++ * ++ * @coretags{task-unrestricted, might-switch} ++ */ ++int rtdm_getpeername(int fd, struct sockaddr *name, socklen_t *namelen); ++ ++#endif /* DOXYGEN_CPP */ ++ ++/** @} */ +--- linux/kernel/xenomai/rtdm/device.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/rtdm/device.c 2021-04-07 16:01:26.166635682 +0800 +@@ -0,0 +1,649 @@ ++/* ++ * Real-Time Driver Model for Xenomai, device management ++ * ++ * Copyright (C) 2005 Jan Kiszka ++ * Copyright (C) 2005 Joerg Langenberg ++ * Copyright (C) 2014 Philippe Gerum ++ * ++ * Xenomai is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include "rtdm/internal.h" ++#include ++#include ++ ++/** ++ * @ingroup rtdm ++ * @defgroup rtdm_profiles Device Profiles ++ * ++ * Pre-defined classes of real-time devices ++ * ++ * Device profiles define which operation handlers a driver of a ++ * certain class of devices has to implement, which name or protocol ++ * it has to register, which IOCTLs it has to provide, and further ++ * details. Sub-classes can be defined in order to extend a device ++ * profile with more hardware-specific functions. ++ */ ++ ++/** ++ * @addtogroup rtdm_driver_interface ++ * @{ ++ */ ++ ++#define RTDM_DEVICE_MAGIC 0x82846877 ++ ++static struct rb_root protocol_devices; ++ ++static DEFINE_MUTEX(register_lock); ++static DECLARE_BITMAP(protocol_devices_minor_map, RTDM_MAX_MINOR); ++ ++static struct class *rtdm_class; ++ ++static int enosys(void) ++{ ++ return -ENOSYS; ++} ++ ++void __rtdm_put_device(struct rtdm_device *dev) ++{ ++ secondary_mode_only(); ++ ++ if (atomic_dec_and_test(&dev->refcount)) ++ wake_up(&dev->putwq); ++} ++ ++static inline xnkey_t get_proto_id(int pf, int type) ++{ ++ xnkey_t llpf = (unsigned int)pf; ++ return (llpf << 32) | (unsigned int)type; ++} ++ ++struct rtdm_device *__rtdm_get_namedev(const char *path) ++{ ++ struct rtdm_device *dev; ++ xnhandle_t handle; ++ int ret; ++ ++ secondary_mode_only(); ++ ++ /* skip common /dev prefix */ ++ if (strncmp(path, "/dev/", 5) == 0) ++ path += 5; ++ ++ /* skip RTDM devnode root */ ++ if (strncmp(path, "rtdm/", 5) == 0) ++ path += 5; ++ ++ ret = xnregistry_bind(path, XN_NONBLOCK, XN_RELATIVE, &handle); ++ if (ret) ++ return NULL; ++ ++ mutex_lock(®ister_lock); ++ ++ dev = xnregistry_lookup(handle, NULL); ++ if (dev && dev->magic == RTDM_DEVICE_MAGIC) ++ __rtdm_get_device(dev); ++ else ++ dev = NULL; ++ ++ mutex_unlock(®ister_lock); ++ ++ return dev; ++} ++ ++struct rtdm_device *__rtdm_get_protodev(int protocol_family, int socket_type) ++{ ++ struct rtdm_device *dev = NULL; ++ struct xnid *xnid; ++ xnkey_t id; ++ ++ secondary_mode_only(); ++ ++ id = get_proto_id(protocol_family, socket_type); ++ ++ mutex_lock(®ister_lock); ++ ++ xnid = xnid_fetch(&protocol_devices, id); ++ if (xnid) { ++ dev = container_of(xnid, struct rtdm_device, proto.id); ++ __rtdm_get_device(dev); ++ } ++ ++ mutex_unlock(®ister_lock); ++ ++ return dev; ++} ++ ++/** ++ * @ingroup rtdm_driver_interface ++ * @defgroup rtdm_device_register Device Registration Services ++ * @{ ++ */ ++ ++static char *rtdm_devnode(struct device *dev, umode_t *mode) ++{ ++ return kasprintf(GFP_KERNEL, "rtdm/%s", dev_name(dev)); ++} ++ ++static ssize_t profile_show(struct device *kdev, ++ struct device_attribute *attr, char *buf) ++{ ++ struct rtdm_device *dev = dev_get_drvdata(kdev); ++ ++ return sprintf(buf, "%d,%d\n", ++ dev->driver->profile_info.class_id, ++ dev->driver->profile_info.subclass_id); ++} ++ ++static ssize_t refcount_show(struct device *kdev, ++ struct device_attribute *attr, char *buf) ++{ ++ struct rtdm_device *dev = dev_get_drvdata(kdev); ++ ++ return sprintf(buf, "%d\n", atomic_read(&dev->refcount)); ++} ++ ++#define cat_count(__buf, __str) \ ++ ({ \ ++ int __ret = sizeof(__str) - 1; \ ++ strcat(__buf, __str); \ ++ __ret; \ ++ }) ++ ++static ssize_t flags_show(struct device *kdev, ++ struct device_attribute *attr, char *buf) ++{ ++ struct rtdm_device *dev = dev_get_drvdata(kdev); ++ struct rtdm_driver *drv = dev->driver; ++ ++ return sprintf(buf, "%#x\n", drv->device_flags); ++ ++} ++ ++static ssize_t type_show(struct device *kdev, ++ struct device_attribute *attr, char *buf) ++{ ++ struct rtdm_device *dev = dev_get_drvdata(kdev); ++ struct rtdm_driver *drv = dev->driver; ++ int ret; ++ ++ if (drv->device_flags & RTDM_NAMED_DEVICE) ++ ret = cat_count(buf, "named\n"); ++ else ++ ret = cat_count(buf, "protocol\n"); ++ ++ return ret; ++ ++} ++ ++#ifdef ATTRIBUTE_GROUPS ++ ++static DEVICE_ATTR_RO(profile); ++static DEVICE_ATTR_RO(refcount); ++static DEVICE_ATTR_RO(flags); ++static DEVICE_ATTR_RO(type); ++ ++static struct attribute *rtdm_attrs[] = { ++ &dev_attr_profile.attr, ++ &dev_attr_refcount.attr, ++ &dev_attr_flags.attr, ++ &dev_attr_type.attr, ++ NULL, ++}; ++ATTRIBUTE_GROUPS(rtdm); ++ ++#else /* !ATTRIBUTE_GROUPS */ ++ ++/* ++ * Cope with legacy sysfs attributes. Scheduled for removal when 3.10 ++ * is at EOL for us. ++ */ ++static struct device_attribute rtdm_attrs[] = { ++ DEVICE_ATTR_RO(profile), ++ DEVICE_ATTR_RO(refcount), ++ DEVICE_ATTR_RO(flags), ++ DEVICE_ATTR_RO(type), ++ __ATTR_NULL ++}; ++ ++#define dev_groups dev_attrs ++#define rtdm_groups rtdm_attrs ++ ++#endif /* !ATTRIBUTE_GROUPS */ ++ ++static int state_change_notifier(struct notifier_block *nb, ++ unsigned long action, void *data) ++{ ++ struct rtdm_driver *drv; ++ int ret; ++ ++ drv = container_of(nb, struct rtdm_driver, nb_statechange); ++ ++ switch (action) { ++ case COBALT_STATE_WARMUP: ++ if (drv->smops.start == NULL) ++ return NOTIFY_DONE; ++ ret = drv->smops.start(drv); ++ if (ret) ++ printk(XENO_WARNING ++ "failed starting driver %s (%d)\n", ++ drv->profile_info.name, ret); ++ break; ++ case COBALT_STATE_TEARDOWN: ++ if (drv->smops.stop == NULL) ++ return NOTIFY_DONE; ++ ret = drv->smops.stop(drv); ++ if (ret) ++ printk(XENO_WARNING ++ "failed stopping driver %s (%d)\n", ++ drv->profile_info.name, ret); ++ break; ++ default: ++ return NOTIFY_DONE; ++ } ++ ++ return NOTIFY_OK; ++} ++ ++static int register_driver(struct rtdm_driver *drv) ++{ ++ dev_t rdev; ++ int ret; ++ ++ if (drv->profile_info.magic == RTDM_CLASS_MAGIC) { ++ atomic_inc(&drv->refcount); ++ return 0; ++ } ++ ++ if (drv->profile_info.magic != ~RTDM_CLASS_MAGIC) { ++ XENO_WARN_ON_ONCE(COBALT, 1); ++ return -EINVAL; ++ } ++ ++ switch (drv->device_flags & RTDM_DEVICE_TYPE_MASK) { ++ case RTDM_NAMED_DEVICE: ++ case RTDM_PROTOCOL_DEVICE: ++ break; ++ default: ++ printk(XENO_WARNING "%s has invalid device type (%#x)\n", ++ drv->profile_info.name, ++ drv->device_flags & RTDM_DEVICE_TYPE_MASK); ++ return -EINVAL; ++ } ++ ++ if (drv->device_count <= 0 || ++ drv->device_count > RTDM_MAX_MINOR) { ++ printk(XENO_WARNING "%s has invalid device count (%d)\n", ++ drv->profile_info.name, drv->device_count); ++ return -EINVAL; ++ } ++ ++ if ((drv->device_flags & RTDM_NAMED_DEVICE) == 0) ++ goto done; ++ ++ if (drv->base_minor < 0 || ++ drv->base_minor >= RTDM_MAX_MINOR) { ++ printk(XENO_WARNING "%s has invalid base minor (%d)\n", ++ drv->profile_info.name, drv->base_minor); ++ return -EINVAL; ++ } ++ ++ ret = alloc_chrdev_region(&rdev, drv->base_minor, drv->device_count, ++ drv->profile_info.name); ++ if (ret) { ++ printk(XENO_WARNING "cannot allocate chrdev region %s[%d..%d]\n", ++ drv->profile_info.name, drv->base_minor, ++ drv->base_minor + drv->device_count - 1); ++ return ret; ++ } ++ ++ cdev_init(&drv->named.cdev, &rtdm_dumb_fops); ++ ret = cdev_add(&drv->named.cdev, rdev, drv->device_count); ++ if (ret) { ++ printk(XENO_WARNING "cannot create cdev series for %s\n", ++ drv->profile_info.name); ++ goto fail_cdev; ++ } ++ ++ drv->named.major = MAJOR(rdev); ++ bitmap_zero(drv->minor_map, RTDM_MAX_MINOR); ++ ++done: ++ atomic_set(&drv->refcount, 1); ++ drv->nb_statechange.notifier_call = state_change_notifier; ++ drv->nb_statechange.priority = 0; ++ cobalt_add_state_chain(&drv->nb_statechange); ++ drv->profile_info.magic = RTDM_CLASS_MAGIC; ++ ++ return 0; ++ ++fail_cdev: ++ unregister_chrdev_region(rdev, drv->device_count); ++ ++ return ret; ++} ++ ++static void unregister_driver(struct rtdm_driver *drv) ++{ ++ XENO_BUG_ON(COBALT, drv->profile_info.magic != RTDM_CLASS_MAGIC); ++ ++ if (!atomic_dec_and_test(&drv->refcount)) ++ return; ++ ++ cobalt_remove_state_chain(&drv->nb_statechange); ++ ++ drv->profile_info.magic = ~RTDM_CLASS_MAGIC; ++ ++ if (drv->device_flags & RTDM_NAMED_DEVICE) { ++ cdev_del(&drv->named.cdev); ++ unregister_chrdev_region(MKDEV(drv->named.major, drv->base_minor), ++ drv->device_count); ++ } ++} ++ ++/** ++ * @brief Register a RTDM device ++ * ++ * Registers a device in the RTDM namespace. ++ * ++ * @param[in] dev Device descriptor. ++ * ++ * @return 0 is returned upon success. Otherwise: ++ * ++ * - -EINVAL is returned if the descriptor contains invalid ++ * entries. RTDM_PROFILE_INFO() must appear in the list of ++ * initializers for the driver properties. ++ * ++ * - -EEXIST is returned if the specified device name of protocol ID is ++ * already in use. ++ * ++ * - -ENOMEM is returned if a memory allocation failed in the process ++ * of registering the device. ++ * ++ * - -EAGAIN is returned if no registry slot is available (check/raise ++ * CONFIG_XENO_OPT_REGISTRY_NRSLOTS). ++ * ++ * - -ENOSYS is returned if the real-time core is disabled. ++ * ++ * - -ENXIO is returned if no valid minor could be assigned ++ * ++ * @coretags{secondary-only} ++ */ ++int rtdm_dev_register(struct rtdm_device *dev) ++{ ++ struct class *kdev_class = rtdm_class; ++ struct device *kdev = NULL; ++ struct rtdm_driver *drv; ++ int ret, major, minor; ++ xnkey_t id; ++ dev_t rdev; ++ const char *dev_name; ++ ++ secondary_mode_only(); ++ ++ if (!realtime_core_enabled()) ++ return -ENOSYS; ++ ++ mutex_lock(®ister_lock); ++ ++ dev->name = NULL; ++ drv = dev->driver; ++ ret = register_driver(drv); ++ if (ret) { ++ mutex_unlock(®ister_lock); ++ return ret; ++ } ++ ++ dev->ops = drv->ops; ++ if (drv->device_flags & RTDM_NAMED_DEVICE) ++ dev->ops.socket = (typeof(dev->ops.socket))enosys; ++ else ++ dev->ops.open = (typeof(dev->ops.open))enosys; ++ ++ INIT_LIST_HEAD(&dev->openfd_list); ++ init_waitqueue_head(&dev->putwq); ++ dev->ops.close = __rtdm_dev_close; /* Interpose on driver's handler. */ ++ atomic_set(&dev->refcount, 0); ++ ++ if (drv->profile_info.kdev_class) ++ kdev_class = drv->profile_info.kdev_class; ++ ++ if (drv->device_flags & RTDM_NAMED_DEVICE) { ++ if (drv->device_flags & RTDM_FIXED_MINOR) { ++ minor = dev->minor; ++ if (minor < 0 || ++ minor >= drv->base_minor + drv->device_count) { ++ ret = -ENXIO; ++ goto fail; ++ } ++ } else { ++ minor = find_first_zero_bit(drv->minor_map, RTDM_MAX_MINOR); ++ if (minor >= RTDM_MAX_MINOR) { ++ ret = -ENXIO; ++ goto fail; ++ } ++ dev->minor = minor; ++ } ++ ++ major = drv->named.major; ++ dev->name = kasformat(dev->label, minor); ++ if (dev->name == NULL) { ++ ret = -ENOMEM; ++ goto fail; ++ } ++ if (dev->name[0] == '/') { ++ dev_name = dev->name+1; ++ } else { ++ dev_name = dev->name; ++ } ++ ret = xnregistry_enter(dev_name, dev, ++ &dev->named.handle, NULL); ++ if (ret) ++ goto fail; ++ ++ rdev = MKDEV(major, minor); ++ kdev = device_create(kdev_class, NULL, rdev, ++ dev, kbasename(dev->label), minor); ++ if (IS_ERR(kdev)) { ++ xnregistry_remove(dev->named.handle); ++ ret = PTR_ERR(kdev); ++ goto fail2; ++ } ++ __set_bit(minor, drv->minor_map); ++ } else { ++ minor = find_first_zero_bit(protocol_devices_minor_map, ++ RTDM_MAX_MINOR); ++ if (minor >= RTDM_MAX_MINOR) { ++ ret = -ENXIO; ++ goto fail; ++ } ++ dev->minor = minor; ++ ++ dev->name = kstrdup(dev->label, GFP_KERNEL); ++ if (dev->name == NULL) { ++ ret = -ENOMEM; ++ goto fail; ++ } ++ ++ rdev = MKDEV(0, minor); ++ kdev = device_create(kdev_class, NULL, rdev, ++ dev, dev->name); ++ if (IS_ERR(kdev)) { ++ ret = PTR_ERR(kdev); ++ goto fail2; ++ } ++ ++ id = get_proto_id(drv->protocol_family, drv->socket_type); ++ ret = xnid_enter(&protocol_devices, &dev->proto.id, id); ++ if (ret < 0) ++ goto fail; ++ __set_bit(minor, protocol_devices_minor_map); ++ } ++ ++ dev->rdev = rdev; ++ dev->kdev = kdev; ++ dev->magic = RTDM_DEVICE_MAGIC; ++ dev->kdev_class = kdev_class; ++ ++ mutex_unlock(®ister_lock); ++ ++ trace_cobalt_device_register(dev); ++ ++ return 0; ++fail: ++ if (kdev) ++ device_destroy(kdev_class, rdev); ++fail2: ++ unregister_driver(drv); ++ ++ mutex_unlock(®ister_lock); ++ ++ if (dev->name) ++ kfree(dev->name); ++ ++ return ret; ++} ++EXPORT_SYMBOL_GPL(rtdm_dev_register); ++ ++/** ++ * @brief Unregister a RTDM device ++ * ++ * Removes the device from the RTDM namespace. This routine first ++ * attempts to teardown all active connections to the @a device prior ++ * to unregistering. ++ * ++ * @param[in] dev Device descriptor. ++ * ++ * @coretags{secondary-only} ++ */ ++void rtdm_dev_unregister(struct rtdm_device *dev) ++{ ++ struct rtdm_driver *drv = dev->driver; ++ ++ secondary_mode_only(); ++ ++ trace_cobalt_device_unregister(dev); ++ ++ /* Lock out any further connection. */ ++ dev->magic = ~RTDM_DEVICE_MAGIC; ++ ++ /* Flush all fds from this device. */ ++ rtdm_device_flush_fds(dev); ++ ++ /* Then wait for the ongoing connections to finish. */ ++ wait_event(dev->putwq, ++ atomic_read(&dev->refcount) == 0); ++ ++ mutex_lock(®ister_lock); ++ ++ if (drv->device_flags & RTDM_NAMED_DEVICE) { ++ xnregistry_remove(dev->named.handle); ++ __clear_bit(dev->minor, drv->minor_map); ++ } else { ++ xnid_remove(&protocol_devices, &dev->proto.id); ++ __clear_bit(dev->minor, protocol_devices_minor_map); ++ } ++ ++ device_destroy(dev->kdev_class, dev->rdev); ++ ++ unregister_driver(drv); ++ ++ mutex_unlock(®ister_lock); ++ ++ kfree(dev->name); ++} ++EXPORT_SYMBOL_GPL(rtdm_dev_unregister); ++ ++/** ++ * @brief Set the kernel device class of a RTDM driver. ++ * ++ * Set the kernel device class assigned to the RTDM driver. By ++ * default, RTDM drivers belong to Linux's "rtdm" device class, ++ * creating a device node hierarchy rooted at /dev/rtdm, and sysfs ++ * nodes under /sys/class/rtdm. ++ * ++ * This call assigns a user-defined kernel device class to the RTDM ++ * driver, so that its devices are created into a different system ++ * hierarchy. ++ * ++ * rtdm_drv_set_sysclass() is meaningful only before the first device ++ * which is attached to @a drv is registered by a call to ++ * rtdm_dev_register(). ++ * ++ * @param[in] drv Address of the RTDM driver descriptor. ++ * ++ * @param[in] cls Pointer to the kernel device class. NULL is allowed ++ * to clear a previous setting, switching back to the default "rtdm" ++ * device class. ++ * ++ * @return 0 on success, otherwise: ++ * ++ * - -EBUSY is returned if the kernel device class has already been ++ * set for @a drv, or some device(s) attached to @a drv are currently ++ * registered. ++ * ++ * @coretags{task-unrestricted} ++ * ++ * @attention The kernel device class set by this call is not related to ++ * the RTDM class identification as defined by the @ref rtdm_profiles ++ * "RTDM profiles" in any way. This is strictly related to the Linux ++ * kernel device hierarchy. ++ */ ++int rtdm_drv_set_sysclass(struct rtdm_driver *drv, struct class *cls) ++{ ++ if ((cls && drv->profile_info.kdev_class) || ++ atomic_read(&drv->refcount)) ++ return -EBUSY; ++ ++ drv->profile_info.kdev_class = cls; ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(rtdm_drv_set_sysclass); ++ ++/** @} */ ++ ++int __init rtdm_init(void) ++{ ++ xntree_init(&protocol_devices); ++ ++ rtdm_class = class_create(THIS_MODULE, "rtdm"); ++ if (IS_ERR(rtdm_class)) { ++ printk(XENO_ERR "cannot create RTDM sysfs class\n"); ++ return PTR_ERR(rtdm_class); ++ } ++ rtdm_class->dev_groups = rtdm_groups; ++ rtdm_class->devnode = rtdm_devnode; ++ ++ bitmap_zero(protocol_devices_minor_map, RTDM_MAX_MINOR); ++ ++ return 0; ++} ++ ++void rtdm_cleanup(void) ++{ ++ class_destroy(rtdm_class); ++ /* ++ * NOTE: no need to flush the cleanup_queue as no device is ++ * allowed to unregister as long as there are references. ++ */ ++} ++ ++/** @} */ +--- linux/kernel/xenomai/rtdm/internal.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/rtdm/internal.h 2021-04-07 16:01:26.161635689 +0800 +@@ -0,0 +1,64 @@ ++/* ++ * Copyright (C) 2005-2007 Jan Kiszka . ++ * Copyright (C) 2005 Joerg Langenberg . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#ifndef _RTDM_INTERNAL_H ++#define _RTDM_INTERNAL_H ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++static inline void __rtdm_get_device(struct rtdm_device *device) ++{ ++ atomic_inc(&device->refcount); ++} ++ ++void __rtdm_put_device(struct rtdm_device *device); ++ ++struct rtdm_device *__rtdm_get_namedev(const char *path); ++ ++struct rtdm_device *__rtdm_get_protodev(int protocol_family, ++ int socket_type); ++ ++void __rtdm_dev_close(struct rtdm_fd *fd); ++ ++int __rtdm_dev_ioctl_core(struct rtdm_fd *fd, ++ unsigned int request, void __user *arg); ++ ++int __rtdm_mmap_from_fdop(struct rtdm_fd *fd, size_t len, off_t offset, ++ int prot, int flags, void **pptr); ++ ++/* nklock held, irqs off. */ ++static inline void rtdm_fd_get_light(struct rtdm_fd *fd) ++{ ++ ++fd->refs; ++} ++ ++int rtdm_init(void); ++ ++void rtdm_cleanup(void); ++ ++extern const struct file_operations rtdm_dumb_fops; ++ ++#endif /* _RTDM_INTERNAL_H */ +--- linux/kernel/xenomai/rtdm/fd.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/rtdm/fd.c 2021-04-07 16:01:26.156635696 +0800 +@@ -0,0 +1,1037 @@ ++/* ++ * Copyright (C) 2005 Jan Kiszka ++ * Copyright (C) 2005 Joerg Langenberg ++ * Copyright (C) 2013,2014 Gilles Chanteperdrix . ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "internal.h" ++#include "posix/process.h" ++#include "posix/syscall.h" ++#include "posix/clock.h" ++ ++#define RTDM_SETFL_MASK (O_NONBLOCK) ++ ++DEFINE_PRIVATE_XNLOCK(fdtree_lock); ++static LIST_HEAD(rtdm_fd_cleanup_queue); ++static struct semaphore rtdm_fd_cleanup_sem; ++ ++struct rtdm_fd_index { ++ struct xnid id; ++ struct rtdm_fd *fd; ++}; ++ ++static int enosys(void) ++{ ++ return -ENOSYS; ++} ++ ++static int eadv(void) ++{ ++ return -EADV; ++} ++ ++static inline struct rtdm_fd_index * ++fetch_fd_index(struct cobalt_ppd *p, int ufd) ++{ ++ struct xnid *id = xnid_fetch(&p->fds, ufd); ++ if (id == NULL) ++ return NULL; ++ ++ return container_of(id, struct rtdm_fd_index, id); ++} ++ ++static struct rtdm_fd *fetch_fd(struct cobalt_ppd *p, int ufd) ++{ ++ struct rtdm_fd_index *idx = fetch_fd_index(p, ufd); ++ if (idx == NULL) ++ return NULL; ++ ++ return idx->fd; ++} ++ ++#define assign_invalid_handler(__handler) \ ++ do \ ++ (__handler) = (typeof(__handler))eadv; \ ++ while (0) ++ ++#define __assign_default_handler(__handler, __placeholder) \ ++ do \ ++ if ((__handler) == NULL) \ ++ (__handler) = (typeof(__handler))__placeholder; \ ++ while (0) ++ ++/* Calling this handler should beget EADV if not implemented. */ ++#define assign_invalid_default_handler(__handler) \ ++ __assign_default_handler(__handler, eadv) ++ ++/* Calling this handler should beget ENOSYS if not implemented. */ ++#define assign_default_handler(__handler) \ ++ __assign_default_handler(__handler, enosys) ++ ++#define __rt(__handler) __handler ## _rt ++#define __nrt(__handler) __handler ## _nrt ++ ++/* ++ * Install a placeholder returning EADV if none of the dual handlers ++ * are implemented, ENOSYS otherwise for NULL handlers to trigger the ++ * adaptive switch. ++ */ ++#define assign_default_dual_handlers(__handler) \ ++ do \ ++ if (__rt(__handler) || __nrt(__handler)) { \ ++ assign_default_handler(__rt(__handler)); \ ++ assign_default_handler(__nrt(__handler)); \ ++ } else { \ ++ assign_invalid_handler(__rt(__handler)); \ ++ assign_invalid_handler(__nrt(__handler)); \ ++ } \ ++ while (0) ++ ++#ifdef CONFIG_XENO_ARCH_SYS3264 ++ ++static inline void set_compat_bit(struct rtdm_fd *fd) ++{ ++ struct pt_regs *regs; ++ ++ if (cobalt_ppd_get(0) == &cobalt_kernel_ppd) ++ fd->compat = 0; ++ else { ++ regs = task_pt_regs(current); ++ XENO_BUG_ON(COBALT, !__xn_syscall_p(regs)); ++ fd->compat = __COBALT_CALL_COMPAT(__xn_reg_sys(regs)); ++ } ++} ++ ++#else /* !CONFIG_XENO_ARCH_SYS3264 */ ++ ++static inline void set_compat_bit(struct rtdm_fd *fd) ++{ ++} ++ ++#endif /* !CONFIG_XENO_ARCH_SYS3264 */ ++ ++int rtdm_fd_enter(struct rtdm_fd *fd, int ufd, unsigned int magic, ++ struct rtdm_fd_ops *ops) ++{ ++ struct cobalt_ppd *ppd; ++ ++ secondary_mode_only(); ++ ++ if (magic == 0) ++ return -EINVAL; ++ ++ assign_default_dual_handlers(ops->ioctl); ++ assign_default_dual_handlers(ops->read); ++ assign_default_dual_handlers(ops->write); ++ assign_default_dual_handlers(ops->recvmsg); ++ assign_default_dual_handlers(ops->sendmsg); ++ assign_invalid_default_handler(ops->select); ++ assign_invalid_default_handler(ops->mmap); ++ ++ ppd = cobalt_ppd_get(0); ++ fd->magic = magic; ++ fd->ops = ops; ++ fd->owner = ppd; ++ fd->ufd = ufd; ++ fd->refs = 1; ++ fd->stale = false; ++ set_compat_bit(fd); ++ INIT_LIST_HEAD(&fd->next); ++ ++ return 0; ++} ++ ++int rtdm_fd_register(struct rtdm_fd *fd, int ufd) ++{ ++ struct rtdm_fd_index *idx; ++ struct cobalt_ppd *ppd; ++ spl_t s; ++ int ret = 0; ++ ++ ppd = cobalt_ppd_get(0); ++ idx = kmalloc(sizeof(*idx), GFP_KERNEL); ++ if (idx == NULL) ++ return -ENOMEM; ++ ++ idx->fd = fd; ++ ++ xnlock_get_irqsave(&fdtree_lock, s); ++ ret = xnid_enter(&ppd->fds, &idx->id, ufd); ++ xnlock_put_irqrestore(&fdtree_lock, s); ++ if (ret < 0) { ++ kfree(idx); ++ ret = -EBUSY; ++ } ++ ++ return ret; ++} ++ ++int rtdm_device_new_fd(struct rtdm_fd *fd, int ufd, ++ struct rtdm_device *device) ++{ ++ spl_t s; ++ int ret; ++ ++ ret = rtdm_fd_register(fd, ufd); ++ if (ret < 0) ++ return ret; ++ ++ trace_cobalt_fd_created(fd, ufd); ++ xnlock_get_irqsave(&fdtree_lock, s); ++ list_add(&fd->next, &device->openfd_list); ++ xnlock_put_irqrestore(&fdtree_lock, s); ++ ++ return 0; ++} ++ ++/** ++ * @brief Retrieve and lock a RTDM file descriptor ++ * ++ * @param[in] ufd User-side file descriptor ++ * @param[in] magic Magic word for lookup validation ++ * ++ * @return Pointer to the RTDM file descriptor matching @a ++ * ufd. Otherwise: ++ * ++ * - ERR_PTR(-EADV) if the use-space handle is either invalid, or not ++ * managed by RTDM. ++ * ++ * - ERR_PTR(-EBADF) if the underlying device is being torned down at ++ * the time of the call. ++ * ++ * @note The file descriptor returned must be later released by a call ++ * to rtdm_fd_put(). ++ * ++ * @coretags{unrestricted} ++ */ ++struct rtdm_fd *rtdm_fd_get(int ufd, unsigned int magic) ++{ ++ struct cobalt_ppd *p = cobalt_ppd_get(0); ++ struct rtdm_fd *fd; ++ spl_t s; ++ ++ xnlock_get_irqsave(&fdtree_lock, s); ++ fd = fetch_fd(p, ufd); ++ if (fd == NULL || (magic != 0 && fd->magic != magic)) { ++ fd = ERR_PTR(-EADV); ++ goto out; ++ } ++ ++ if (fd->stale) { ++ fd = ERR_PTR(-EBADF); ++ goto out; ++ } ++ ++ ++fd->refs; ++out: ++ xnlock_put_irqrestore(&fdtree_lock, s); ++ ++ return fd; ++} ++EXPORT_SYMBOL_GPL(rtdm_fd_get); ++ ++struct lostage_trigger_close { ++ struct ipipe_work_header work; /* Must be first */ ++}; ++ ++static int fd_cleanup_thread(void *data) ++{ ++ struct rtdm_fd *fd; ++ int err; ++ spl_t s; ++ ++ for (;;) { ++ set_cpus_allowed_ptr(current, cpu_online_mask); ++ ++ do { ++ err = down_interruptible(&rtdm_fd_cleanup_sem); ++ if (kthread_should_stop()) ++ return 0; ++ } while (err); ++ ++ xnlock_get_irqsave(&fdtree_lock, s); ++ fd = list_first_entry(&rtdm_fd_cleanup_queue, ++ struct rtdm_fd, cleanup); ++ list_del(&fd->cleanup); ++ xnlock_put_irqrestore(&fdtree_lock, s); ++ ++ fd->ops->close(fd); ++ } ++ ++ return 0; ++} ++ ++static void lostage_trigger_close(struct ipipe_work_header *work) ++{ ++ up(&rtdm_fd_cleanup_sem); ++} ++ ++static void __put_fd(struct rtdm_fd *fd, spl_t s) ++{ ++ bool destroy; ++ ++ XENO_WARN_ON(COBALT, fd->refs <= 0); ++ destroy = --fd->refs == 0; ++ if (destroy && !list_empty(&fd->next)) ++ list_del_init(&fd->next); ++ ++ xnlock_put_irqrestore(&fdtree_lock, s); ++ ++ if (!destroy) ++ return; ++ ++ if (ipipe_root_p) ++ fd->ops->close(fd); ++ else { ++ struct lostage_trigger_close closework = { ++ .work = { ++ .size = sizeof(closework), ++ .handler = lostage_trigger_close, ++ }, ++ }; ++ ++ xnlock_get_irqsave(&fdtree_lock, s); ++ list_add_tail(&fd->cleanup, &rtdm_fd_cleanup_queue); ++ xnlock_put_irqrestore(&fdtree_lock, s); ++ ++ ipipe_post_work_root(&closework, work); ++ } ++} ++ ++void rtdm_device_flush_fds(struct rtdm_device *dev) ++{ ++ struct rtdm_driver *drv = dev->driver; ++ struct rtdm_fd *fd; ++ spl_t s; ++ ++ xnlock_get_irqsave(&fdtree_lock, s); ++ ++ while (!list_empty(&dev->openfd_list)) { ++ fd = list_get_entry_init(&dev->openfd_list, struct rtdm_fd, next); ++ fd->stale = true; ++ if (drv->ops.close) { ++ rtdm_fd_get_light(fd); ++ xnlock_put_irqrestore(&fdtree_lock, s); ++ drv->ops.close(fd); ++ rtdm_fd_put(fd); ++ xnlock_get_irqsave(&fdtree_lock, s); ++ } ++ } ++ ++ xnlock_put_irqrestore(&fdtree_lock, s); ++} ++ ++/** ++ * @brief Release a RTDM file descriptor obtained via rtdm_fd_get() ++ * ++ * @param[in] fd RTDM file descriptor to release ++ * ++ * @note Every call to rtdm_fd_get() must be matched by a call to ++ * rtdm_fd_put(). ++ * ++ * @coretags{unrestricted} ++ */ ++void rtdm_fd_put(struct rtdm_fd *fd) ++{ ++ spl_t s; ++ ++ xnlock_get_irqsave(&fdtree_lock, s); ++ __put_fd(fd, s); ++} ++EXPORT_SYMBOL_GPL(rtdm_fd_put); ++ ++/** ++ * @brief Hold a reference on a RTDM file descriptor ++ * ++ * @param[in] fd Target file descriptor ++ * ++ * @note rtdm_fd_lock() increments the reference counter of @a fd. You ++ * only need to call this function in special scenarios, e.g. when ++ * keeping additional references to the file descriptor that have ++ * different lifetimes. Only use rtdm_fd_lock() on descriptors that ++ * are currently locked via an earlier rtdm_fd_get()/rtdm_fd_lock() or ++ * while running a device operation handler. ++ * ++ * @coretags{unrestricted} ++ */ ++int rtdm_fd_lock(struct rtdm_fd *fd) ++{ ++ spl_t s; ++ ++ xnlock_get_irqsave(&fdtree_lock, s); ++ if (fd->refs == 0) { ++ xnlock_put_irqrestore(&fdtree_lock, s); ++ return -EIDRM; ++ } ++ ++fd->refs; ++ xnlock_put_irqrestore(&fdtree_lock, s); ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(rtdm_fd_lock); ++ ++/** ++ * @brief Drop a reference on a RTDM file descriptor ++ * ++ * @param[in] fd Target file descriptor ++ * ++ * @note Every call to rtdm_fd_lock() must be matched by a call to ++ * rtdm_fd_unlock(). ++ * ++ * @coretags{unrestricted} ++ */ ++void rtdm_fd_unlock(struct rtdm_fd *fd) ++{ ++ spl_t s; ++ ++ xnlock_get_irqsave(&fdtree_lock, s); ++ __put_fd(fd, s); ++} ++EXPORT_SYMBOL_GPL(rtdm_fd_unlock); ++ ++int rtdm_fd_fcntl(int ufd, int cmd, ...) ++{ ++ struct rtdm_fd *fd; ++ va_list ap; ++ long arg; ++ int ret; ++ ++ fd = rtdm_fd_get(ufd, 0); ++ if (IS_ERR(fd)) ++ return PTR_ERR(fd); ++ ++ va_start(ap, cmd); ++ arg = va_arg(ap, long); ++ va_end(ap); ++ ++ switch (cmd) { ++ case F_GETFL: ++ ret = fd->oflags; ++ break; ++ case F_SETFL: ++ fd->oflags = (fd->oflags & ~RTDM_SETFL_MASK) | ++ (arg & RTDM_SETFL_MASK); ++ ret = 0; ++ break; ++ default: ++ ret = -EINVAL; ++ } ++ ++ rtdm_fd_put(fd); ++ ++ return ret; ++} ++EXPORT_SYMBOL_GPL(rtdm_fd_fcntl); ++ ++static struct rtdm_fd *get_fd_fixup_mode(int ufd) ++{ ++ struct xnthread *thread; ++ struct rtdm_fd *fd; ++ ++ fd = rtdm_fd_get(ufd, 0); ++ if (IS_ERR(fd)) ++ return fd; ++ ++ /* ++ * Mode is selected according to the following convention: ++ * ++ * - Cobalt threads must try running the syscall from primary ++ * mode as a first attempt, regardless of their scheduling ++ * class. The driver handler may ask for demoting the caller ++ * to secondary mode by returning -ENOSYS. ++ * ++ * - Regular threads (i.e. not bound to Cobalt) may only run ++ * the syscall from secondary mode. ++ */ ++ thread = xnthread_current(); ++ if (unlikely(ipipe_root_p)) { ++ if (thread == NULL || ++ xnthread_test_localinfo(thread, XNDESCENT)) ++ return fd; ++ } else if (likely(thread)) ++ return fd; ++ ++ /* ++ * We need to switch to the converse mode. Since all callers ++ * bear the "adaptive" tag, we just pass -ENOSYS back to the ++ * syscall dispatcher to get switched to the next mode. ++ */ ++ rtdm_fd_put(fd); ++ ++ return ERR_PTR(-ENOSYS); ++} ++ ++int rtdm_fd_ioctl(int ufd, unsigned int request, ...) ++{ ++ struct rtdm_fd *fd; ++ void __user *arg; ++ va_list args; ++ int err, ret; ++ ++ fd = get_fd_fixup_mode(ufd); ++ if (IS_ERR(fd)) { ++ err = PTR_ERR(fd); ++ goto out; ++ } ++ ++ va_start(args, request); ++ arg = va_arg(args, void __user *); ++ va_end(args); ++ ++ set_compat_bit(fd); ++ ++ trace_cobalt_fd_ioctl(current, fd, ufd, request); ++ ++ if (ipipe_root_p) ++ err = fd->ops->ioctl_nrt(fd, request, arg); ++ else ++ err = fd->ops->ioctl_rt(fd, request, arg); ++ ++ if (!XENO_ASSERT(COBALT, !spltest())) ++ splnone(); ++ ++ if (err < 0) { ++ ret = __rtdm_dev_ioctl_core(fd, request, arg); ++ if (ret != -EADV) ++ err = ret; ++ } ++ ++ rtdm_fd_put(fd); ++ out: ++ if (err < 0) ++ trace_cobalt_fd_ioctl_status(current, fd, ufd, err); ++ ++ return err; ++} ++EXPORT_SYMBOL_GPL(rtdm_fd_ioctl); ++ ++ssize_t ++rtdm_fd_read(int ufd, void __user *buf, size_t size) ++{ ++ struct rtdm_fd *fd; ++ ssize_t ret; ++ ++ fd = get_fd_fixup_mode(ufd); ++ if (IS_ERR(fd)) { ++ ret = PTR_ERR(fd); ++ goto out; ++ } ++ ++ set_compat_bit(fd); ++ ++ trace_cobalt_fd_read(current, fd, ufd, size); ++ ++ if (ipipe_root_p) ++ ret = fd->ops->read_nrt(fd, buf, size); ++ else ++ ret = fd->ops->read_rt(fd, buf, size); ++ ++ if (!XENO_ASSERT(COBALT, !spltest())) ++ splnone(); ++ ++ rtdm_fd_put(fd); ++ ++ out: ++ if (ret < 0) ++ trace_cobalt_fd_read_status(current, fd, ufd, ret); ++ ++ return ret; ++} ++EXPORT_SYMBOL_GPL(rtdm_fd_read); ++ ++ssize_t rtdm_fd_write(int ufd, const void __user *buf, size_t size) ++{ ++ struct rtdm_fd *fd; ++ ssize_t ret; ++ ++ fd = get_fd_fixup_mode(ufd); ++ if (IS_ERR(fd)) { ++ ret = PTR_ERR(fd); ++ goto out; ++ } ++ ++ set_compat_bit(fd); ++ ++ trace_cobalt_fd_write(current, fd, ufd, size); ++ ++ if (ipipe_root_p) ++ ret = fd->ops->write_nrt(fd, buf, size); ++ else ++ ret = fd->ops->write_rt(fd, buf, size); ++ ++ if (!XENO_ASSERT(COBALT, !spltest())) ++ splnone(); ++ ++ rtdm_fd_put(fd); ++ ++ out: ++ if (ret < 0) ++ trace_cobalt_fd_write_status(current, fd, ufd, ret); ++ ++ return ret; ++} ++EXPORT_SYMBOL_GPL(rtdm_fd_write); ++ ++ssize_t rtdm_fd_recvmsg(int ufd, struct user_msghdr *msg, int flags) ++{ ++ struct rtdm_fd *fd; ++ ssize_t ret; ++ ++ fd = get_fd_fixup_mode(ufd); ++ if (IS_ERR(fd)) { ++ ret = PTR_ERR(fd); ++ goto out; ++ } ++ ++ set_compat_bit(fd); ++ ++ trace_cobalt_fd_recvmsg(current, fd, ufd, flags); ++ ++ if (fd->oflags & O_NONBLOCK) ++ flags |= MSG_DONTWAIT; ++ ++ if (ipipe_root_p) ++ ret = fd->ops->recvmsg_nrt(fd, msg, flags); ++ else ++ ret = fd->ops->recvmsg_rt(fd, msg, flags); ++ ++ if (!XENO_ASSERT(COBALT, !spltest())) ++ splnone(); ++ ++ rtdm_fd_put(fd); ++out: ++ if (ret < 0) ++ trace_cobalt_fd_recvmsg_status(current, fd, ufd, ret); ++ ++ return ret; ++} ++EXPORT_SYMBOL_GPL(rtdm_fd_recvmsg); ++ ++struct cobalt_recvmmsg_timer { ++ struct xntimer timer; ++ struct xnthread *waiter; ++}; ++ ++static void recvmmsg_timeout_handler(struct xntimer *timer) ++{ ++ struct cobalt_recvmmsg_timer *rq; ++ ++ rq = container_of(timer, struct cobalt_recvmmsg_timer, timer); ++ xnthread_set_info(rq->waiter, XNTIMEO); ++ xnthread_resume(rq->waiter, XNDELAY); ++} ++ ++int __rtdm_fd_recvmmsg(int ufd, void __user *u_msgvec, unsigned int vlen, ++ unsigned int flags, void __user *u_timeout, ++ int (*get_mmsg)(struct mmsghdr *mmsg, void __user *u_mmsg), ++ int (*put_mmsg)(void __user **u_mmsg_p, const struct mmsghdr *mmsg), ++ int (*get_timespec)(struct timespec *ts, const void __user *u_ts)) ++{ ++ struct cobalt_recvmmsg_timer rq; ++ xntmode_t tmode = XN_RELATIVE; ++ struct timespec ts = { 0 }; ++ int ret = 0, datagrams = 0; ++ xnticks_t timeout = 0; ++ struct mmsghdr mmsg; ++ struct rtdm_fd *fd; ++ void __user *u_p; ++ ssize_t len; ++ spl_t s; ++ ++ fd = rtdm_fd_get(ufd, 0); ++ if (IS_ERR(fd)) { ++ ret = PTR_ERR(fd); ++ goto out; ++ } ++ ++ set_compat_bit(fd); ++ ++ trace_cobalt_fd_recvmmsg(current, fd, ufd, flags); ++ ++ if (u_timeout) { ++ ret = get_timespec(&ts, u_timeout); ++ if (ret) ++ goto fail; ++ ++ if ((unsigned long)ts.tv_nsec >= ONE_BILLION) { ++ ret = -EINVAL; ++ goto fail; ++ } ++ ++ tmode = XN_ABSOLUTE; ++ timeout = ts2ns(&ts); ++ if (timeout == 0) ++ flags |= MSG_DONTWAIT; ++ else { ++ timeout += xnclock_read_monotonic(&nkclock); ++ rq.waiter = xnthread_current(); ++ xntimer_init(&rq.timer, &nkclock, ++ recvmmsg_timeout_handler, ++ NULL, XNTIMER_IGRAVITY); ++ xnlock_get_irqsave(&nklock, s); ++ ret = xntimer_start(&rq.timer, timeout, ++ XN_INFINITE, tmode); ++ xnlock_put_irqrestore(&nklock, s); ++ } ++ } ++ ++ if (fd->oflags & O_NONBLOCK) ++ flags |= MSG_DONTWAIT; ++ ++ for (u_p = u_msgvec; vlen > 0; vlen--) { ++ ret = get_mmsg(&mmsg, u_p); ++ if (ret) ++ break; ++ len = fd->ops->recvmsg_rt(fd, &mmsg.msg_hdr, flags); ++ if (len < 0) { ++ ret = len; ++ break; ++ } ++ mmsg.msg_len = (unsigned int)len; ++ ret = put_mmsg(&u_p, &mmsg); ++ if (ret) ++ break; ++ datagrams++; ++ /* OOB data requires immediate handling. */ ++ if (mmsg.msg_hdr.msg_flags & MSG_OOB) ++ break; ++ if (flags & MSG_WAITFORONE) ++ flags |= MSG_DONTWAIT; ++ } ++ ++ if (timeout) { ++ xnlock_get_irqsave(&nklock, s); ++ xntimer_destroy(&rq.timer); ++ xnlock_put_irqrestore(&nklock, s); ++ } ++ ++fail: ++ rtdm_fd_put(fd); ++ ++ if (datagrams > 0) ++ ret = datagrams; ++ ++out: ++ trace_cobalt_fd_recvmmsg_status(current, fd, ufd, ret); ++ ++ return ret; ++} ++ ++ssize_t rtdm_fd_sendmsg(int ufd, const struct user_msghdr *msg, int flags) ++{ ++ struct rtdm_fd *fd; ++ ssize_t ret; ++ ++ fd = get_fd_fixup_mode(ufd); ++ if (IS_ERR(fd)) { ++ ret = PTR_ERR(fd); ++ goto out; ++ } ++ ++ set_compat_bit(fd); ++ ++ trace_cobalt_fd_sendmsg(current, fd, ufd, flags); ++ ++ if (fd->oflags & O_NONBLOCK) ++ flags |= MSG_DONTWAIT; ++ ++ if (ipipe_root_p) ++ ret = fd->ops->sendmsg_nrt(fd, msg, flags); ++ else ++ ret = fd->ops->sendmsg_rt(fd, msg, flags); ++ ++ if (!XENO_ASSERT(COBALT, !spltest())) ++ splnone(); ++ ++ rtdm_fd_put(fd); ++out: ++ if (ret < 0) ++ trace_cobalt_fd_sendmsg_status(current, fd, ufd, ret); ++ ++ return ret; ++} ++EXPORT_SYMBOL_GPL(rtdm_fd_sendmsg); ++ ++int __rtdm_fd_sendmmsg(int ufd, void __user *u_msgvec, unsigned int vlen, ++ unsigned int flags, ++ int (*get_mmsg)(struct mmsghdr *mmsg, void __user *u_mmsg), ++ int (*put_mmsg)(void __user **u_mmsg_p, const struct mmsghdr *mmsg)) ++{ ++ int ret = 0, datagrams = 0; ++ struct mmsghdr mmsg; ++ struct rtdm_fd *fd; ++ void __user *u_p; ++ ssize_t len; ++ ++ fd = rtdm_fd_get(ufd, 0); ++ if (IS_ERR(fd)) { ++ ret = PTR_ERR(fd); ++ goto out; ++ } ++ ++ set_compat_bit(fd); ++ ++ trace_cobalt_fd_sendmmsg(current, fd, ufd, flags); ++ ++ if (fd->oflags & O_NONBLOCK) ++ flags |= MSG_DONTWAIT; ++ ++ for (u_p = u_msgvec; vlen > 0; vlen--) { ++ ret = get_mmsg(&mmsg, u_p); ++ if (ret) ++ break; ++ len = fd->ops->sendmsg_rt(fd, &mmsg.msg_hdr, flags); ++ if (len < 0) { ++ ret = len; ++ break; ++ } ++ mmsg.msg_len = (unsigned int)len; ++ ret = put_mmsg(&u_p, &mmsg); ++ if (ret) ++ break; ++ datagrams++; ++ } ++ ++ rtdm_fd_put(fd); ++ ++ if (datagrams > 0) ++ ret = datagrams; ++ ++out: ++ trace_cobalt_fd_sendmmsg_status(current, fd, ufd, ret); ++ ++ return ret; ++} ++ ++static void ++__fd_close(struct cobalt_ppd *p, struct rtdm_fd_index *idx, spl_t s) ++{ ++ xnid_remove(&p->fds, &idx->id); ++ __put_fd(idx->fd, s); ++ ++ kfree(idx); ++} ++ ++int rtdm_fd_close(int ufd, unsigned int magic) ++{ ++ struct rtdm_fd_index *idx; ++ struct cobalt_ppd *ppd; ++ struct rtdm_fd *fd; ++ spl_t s; ++ ++ secondary_mode_only(); ++ ++ ppd = cobalt_ppd_get(0); ++ ++ xnlock_get_irqsave(&fdtree_lock, s); ++ idx = fetch_fd_index(ppd, ufd); ++ if (idx == NULL) ++ goto eadv; ++ ++ fd = idx->fd; ++ if (magic != 0 && fd->magic != magic) { ++eadv: ++ xnlock_put_irqrestore(&fdtree_lock, s); ++ return -EADV; ++ } ++ ++ set_compat_bit(fd); ++ ++ trace_cobalt_fd_close(current, fd, ufd, fd->refs); ++ ++ /* ++ * In dual kernel mode, the linux-side fdtable and the RTDM ++ * ->close() handler are asynchronously managed, i.e. the ++ * handler execution may be deferred after the regular file ++ * descriptor was removed from the fdtable if some refs on ++ * rtdm_fd are still pending. ++ */ ++ __fd_close(ppd, idx, s); ++ __close_fd(current->files, ufd); ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(rtdm_fd_close); ++ ++int rtdm_fd_mmap(int ufd, struct _rtdm_mmap_request *rma, ++ void **u_addrp) ++{ ++ struct rtdm_fd *fd; ++ int ret; ++ ++ secondary_mode_only(); ++ ++ fd = rtdm_fd_get(ufd, 0); ++ if (IS_ERR(fd)) { ++ ret = PTR_ERR(fd); ++ goto out; ++ } ++ ++ set_compat_bit(fd); ++ ++ trace_cobalt_fd_mmap(current, fd, ufd, rma); ++ ++ if (rma->flags & (MAP_FIXED|MAP_ANONYMOUS)) { ++ ret = -EADV; ++ goto unlock; ++ } ++ ++ ret = __rtdm_mmap_from_fdop(fd, rma->length, rma->offset, ++ rma->prot, rma->flags, u_addrp); ++unlock: ++ rtdm_fd_put(fd); ++out: ++ if (ret) ++ trace_cobalt_fd_mmap_status(current, fd, ufd, ret); ++ ++ return ret; ++} ++ ++int rtdm_fd_valid_p(int ufd) ++{ ++ struct rtdm_fd *fd; ++ spl_t s; ++ ++ xnlock_get_irqsave(&fdtree_lock, s); ++ fd = fetch_fd(cobalt_ppd_get(0), ufd); ++ xnlock_put_irqrestore(&fdtree_lock, s); ++ ++ return fd != NULL; ++} ++ ++/** ++ * @brief Bind a selector to specified event types of a given file descriptor ++ * @internal ++ * ++ * This function is invoked by higher RTOS layers implementing select-like ++ * services. It shall not be called directly by RTDM drivers. ++ * ++ * @param[in] ufd User-side file descriptor to bind to ++ * @param[in,out] selector Selector object that shall be bound to the given ++ * event ++ * @param[in] type Event type the caller is interested in ++ * ++ * @return 0 on success, otherwise: ++ * ++ * - -EBADF is returned if the file descriptor @a ufd cannot be resolved. ++ * - -EINVAL is returned if @a type is invalid. ++ * ++ * @coretags{task-unrestricted} ++ */ ++int rtdm_fd_select(int ufd, struct xnselector *selector, ++ unsigned int type) ++{ ++ struct rtdm_fd *fd; ++ int ret; ++ ++ fd = rtdm_fd_get(ufd, 0); ++ if (IS_ERR(fd)) ++ return PTR_ERR(fd); ++ ++ set_compat_bit(fd); ++ ++ ret = fd->ops->select(fd, selector, type, ufd); ++ ++ if (!XENO_ASSERT(COBALT, !spltest())) ++ splnone(); ++ ++ rtdm_fd_put(fd); ++ ++ return ret; ++} ++ ++static void destroy_fd(void *cookie, struct xnid *id) ++{ ++ struct cobalt_ppd *p = cookie; ++ struct rtdm_fd_index *idx; ++ spl_t s; ++ ++ idx = container_of(id, struct rtdm_fd_index, id); ++ xnlock_get_irqsave(&fdtree_lock, s); ++ __fd_close(p, idx, 0); ++} ++ ++void rtdm_fd_cleanup(struct cobalt_ppd *p) ++{ ++ /* ++ * This is called on behalf of a (userland) task exit handler, ++ * so we don't have to deal with the regular file descriptors, ++ * we only have to empty our own index. ++ */ ++ xntree_cleanup(&p->fds, p, destroy_fd); ++} ++ ++void rtdm_fd_init(void) ++{ ++ sema_init(&rtdm_fd_cleanup_sem, 0); ++ kthread_run(fd_cleanup_thread, NULL, "rtdm_fd"); ++} ++ ++static inline void warn_user(struct file *file, const char *call) ++{ ++ struct dentry *dentry = file->f_path.dentry; ++ ++ printk(XENO_WARNING ++ "%s[%d] called regular %s() on /dev/rtdm/%s\n", ++ current->comm, task_pid_nr(current), call + 5, dentry->d_name.name); ++} ++ ++static ssize_t dumb_read(struct file *file, char __user *buf, ++ size_t count, loff_t __user *ppos) ++{ ++ warn_user(file, __func__); ++ return -EINVAL; ++} ++ ++static ssize_t dumb_write(struct file *file, const char __user *buf, ++ size_t count, loff_t __user *ppos) ++{ ++ warn_user(file, __func__); ++ return -EINVAL; ++} ++ ++static unsigned int dumb_poll(struct file *file, poll_table *pt) ++{ ++ warn_user(file, __func__); ++ return -EINVAL; ++} ++ ++static long dumb_ioctl(struct file *file, unsigned int cmd, ++ unsigned long arg) ++{ ++ warn_user(file, __func__); ++ return -EINVAL; ++} ++ ++const struct file_operations rtdm_dumb_fops = { ++ .read = dumb_read, ++ .write = dumb_write, ++ .poll = dumb_poll, ++ .unlocked_ioctl = dumb_ioctl, ++}; +--- linux/kernel/xenomai/posix/corectl.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/posix/corectl.h 2021-04-07 16:01:26.149635706 +0800 +@@ -0,0 +1,38 @@ ++/* ++ * Copyright (C) 2016 Philippe Gerum . ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#ifndef _COBALT_POSIX_CORECTL_H ++#define _COBALT_POSIX_CORECTL_H ++ ++#include ++#include ++#include ++#include ++ ++struct cobalt_config_vector { ++ void __user *u_buf; ++ size_t u_bufsz; ++}; ++ ++COBALT_SYSCALL_DECL(corectl, ++ (int request, void __user *u_buf, size_t u_bufsz)); ++ ++void cobalt_add_config_chain(struct notifier_block *nb); ++ ++void cobalt_remove_config_chain(struct notifier_block *nb); ++ ++#endif /* !_COBALT_POSIX_CORECTL_H */ +--- linux/kernel/xenomai/posix/sched.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/posix/sched.c 2021-04-07 16:01:26.145635712 +0800 +@@ -0,0 +1,852 @@ ++/* ++ * Copyright (C) 2009 Philippe Gerum . ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#include ++#include "internal.h" ++#include "thread.h" ++#include "sched.h" ++#include "clock.h" ++#include ++ ++struct xnsched_class * ++cobalt_sched_policy_param(union xnsched_policy_param *param, ++ int u_policy, const struct sched_param_ex *param_ex, ++ xnticks_t *tslice_r) ++{ ++ struct xnsched_class *sched_class; ++ int prio, policy; ++ xnticks_t tslice; ++ ++ prio = param_ex->sched_priority; ++ tslice = XN_INFINITE; ++ policy = u_policy; ++ ++ /* ++ * NOTE: The user-defined policy may be different than ours, ++ * e.g. SCHED_FIFO,prio=-7 from userland would be interpreted ++ * as SCHED_WEAK,prio=7 in kernel space. ++ */ ++ if (prio < 0) { ++ prio = -prio; ++ policy = SCHED_WEAK; ++ } ++ sched_class = &xnsched_class_rt; ++ param->rt.prio = prio; ++ ++ switch (policy) { ++ case SCHED_NORMAL: ++ if (prio) ++ return NULL; ++ /* ++ * When the weak scheduling class is compiled in, ++ * SCHED_WEAK and SCHED_NORMAL threads are scheduled ++ * by xnsched_class_weak, at their respective priority ++ * levels. Otherwise, SCHED_NORMAL is scheduled by ++ * xnsched_class_rt at priority level #0. ++ */ ++ case SCHED_WEAK: ++#ifdef CONFIG_XENO_OPT_SCHED_WEAK ++ if (prio < XNSCHED_WEAK_MIN_PRIO || ++ prio > XNSCHED_WEAK_MAX_PRIO) ++ return NULL; ++ param->weak.prio = prio; ++ sched_class = &xnsched_class_weak; ++#else ++ if (prio) ++ return NULL; ++#endif ++ break; ++ case SCHED_RR: ++ /* if unspecified, use current one. */ ++ tslice = ts2ns(¶m_ex->sched_rr_quantum); ++ if (tslice == XN_INFINITE && tslice_r) ++ tslice = *tslice_r; ++ /* falldown wanted */ ++ case SCHED_FIFO: ++ if (prio < XNSCHED_FIFO_MIN_PRIO || ++ prio > XNSCHED_FIFO_MAX_PRIO) ++ return NULL; ++ break; ++ case SCHED_COBALT: ++ if (prio < XNSCHED_CORE_MIN_PRIO || ++ prio > XNSCHED_CORE_MAX_PRIO) ++ return NULL; ++ break; ++#ifdef CONFIG_XENO_OPT_SCHED_SPORADIC ++ case SCHED_SPORADIC: ++ param->pss.normal_prio = param_ex->sched_priority; ++ param->pss.low_prio = param_ex->sched_ss_low_priority; ++ param->pss.current_prio = param->pss.normal_prio; ++ param->pss.init_budget = ts2ns(¶m_ex->sched_ss_init_budget); ++ param->pss.repl_period = ts2ns(¶m_ex->sched_ss_repl_period); ++ param->pss.max_repl = param_ex->sched_ss_max_repl; ++ sched_class = &xnsched_class_sporadic; ++ break; ++#endif ++#ifdef CONFIG_XENO_OPT_SCHED_TP ++ case SCHED_TP: ++ param->tp.prio = param_ex->sched_priority; ++ param->tp.ptid = param_ex->sched_tp_partition; ++ sched_class = &xnsched_class_tp; ++ break; ++#endif ++#ifdef CONFIG_XENO_OPT_SCHED_QUOTA ++ case SCHED_QUOTA: ++ param->quota.prio = param_ex->sched_priority; ++ param->quota.tgid = param_ex->sched_quota_group; ++ sched_class = &xnsched_class_quota; ++ break; ++#endif ++ default: ++ return NULL; ++ } ++ ++ if (tslice_r) ++ *tslice_r = tslice; ++ ++ return sched_class; ++} ++ ++COBALT_SYSCALL(sched_minprio, current, (int policy)) ++{ ++ int ret; ++ ++ switch (policy) { ++ case SCHED_FIFO: ++ case SCHED_RR: ++ case SCHED_SPORADIC: ++ case SCHED_TP: ++ case SCHED_QUOTA: ++ ret = XNSCHED_FIFO_MIN_PRIO; ++ break; ++ case SCHED_COBALT: ++ ret = XNSCHED_CORE_MIN_PRIO; ++ break; ++ case SCHED_NORMAL: ++ case SCHED_WEAK: ++ ret = 0; ++ break; ++ default: ++ ret = -EINVAL; ++ } ++ ++ trace_cobalt_sched_min_prio(policy, ret); ++ ++ return ret; ++} ++ ++COBALT_SYSCALL(sched_maxprio, current, (int policy)) ++{ ++ int ret; ++ ++ switch (policy) { ++ case SCHED_FIFO: ++ case SCHED_RR: ++ case SCHED_SPORADIC: ++ case SCHED_TP: ++ case SCHED_QUOTA: ++ ret = XNSCHED_FIFO_MAX_PRIO; ++ break; ++ case SCHED_COBALT: ++ ret = XNSCHED_CORE_MAX_PRIO; ++ break; ++ case SCHED_NORMAL: ++ ret = 0; ++ break; ++ case SCHED_WEAK: ++#ifdef CONFIG_XENO_OPT_SCHED_WEAK ++ ret = XNSCHED_FIFO_MAX_PRIO; ++#else ++ ret = 0; ++#endif ++ break; ++ default: ++ ret = -EINVAL; ++ } ++ ++ trace_cobalt_sched_max_prio(policy, ret); ++ ++ return ret; ++} ++ ++COBALT_SYSCALL(sched_yield, primary, (void)) ++{ ++ struct cobalt_thread *curr = cobalt_current_thread(); ++ int ret = 0; ++ ++ trace_cobalt_pthread_yield(0); ++ ++ /* Maybe some extension wants to handle this. */ ++ if (cobalt_call_extension(sched_yield, &curr->extref, ret) && ret) ++ return ret > 0 ? 0 : ret; ++ ++ xnthread_resume(&curr->threadbase, 0); ++ if (xnsched_run()) ++ return 0; ++ ++ /* ++ * If the round-robin move did not beget any context switch to ++ * a thread running in primary mode, then wait for the next ++ * linux context switch to happen. ++ * ++ * Rationale: it is most probably unexpected that ++ * sched_yield() does not cause any context switch, since this ++ * service is commonly used for implementing a poor man's ++ * cooperative scheduling. By waiting for a context switch to ++ * happen in the regular kernel, we guarantee that the CPU has ++ * been relinquished for a while. ++ * ++ * Typically, this behavior allows a thread running in primary ++ * mode to effectively yield the CPU to a thread of ++ * same/higher priority stuck in secondary mode. ++ * ++ * NOTE: calling cobalt_yield() with no timeout ++ * (i.e. XN_INFINITE) is probably never a good idea. This ++ * means that a SCHED_FIFO non-rt thread stuck in a tight loop ++ * would prevent the caller from waking up, since no ++ * linux-originated schedule event would happen for unblocking ++ * it on the current CPU. For this reason, we pass the ++ * arbitrary TICK_NSEC value to limit the wait time to a ++ * reasonable amount. ++ */ ++ return cobalt_yield(TICK_NSEC, TICK_NSEC); ++} ++ ++#ifdef CONFIG_XENO_OPT_SCHED_TP ++ ++static inline ++int set_tp_config(int cpu, union sched_config *config, size_t len) ++{ ++ xnticks_t offset, duration, next_offset; ++ struct xnsched_tp_schedule *gps, *ogps; ++ struct xnsched_tp_window *w; ++ struct sched_tp_window *p; ++ struct xnsched *sched; ++ spl_t s; ++ int n; ++ ++ if (len < sizeof(config->tp)) ++ return -EINVAL; ++ ++ sched = xnsched_struct(cpu); ++ ++ switch (config->tp.op) { ++ case sched_tp_install: ++ if (config->tp.nr_windows > 0) ++ break; ++ /* Fallback wanted. */ ++ case sched_tp_uninstall: ++ gps = NULL; ++ goto set_schedule; ++ case sched_tp_start: ++ xnlock_get_irqsave(&nklock, s); ++ xnsched_tp_start_schedule(sched); ++ xnlock_put_irqrestore(&nklock, s); ++ return 0; ++ case sched_tp_stop: ++ xnlock_get_irqsave(&nklock, s); ++ xnsched_tp_stop_schedule(sched); ++ xnlock_put_irqrestore(&nklock, s); ++ return 0; ++ default: ++ return -EINVAL; ++ } ++ ++ /* Install a new TP schedule on CPU. */ ++ ++ gps = xnmalloc(sizeof(*gps) + config->tp.nr_windows * sizeof(*w)); ++ if (gps == NULL) ++ return -ENOMEM; ++ ++ for (n = 0, p = config->tp.windows, w = gps->pwins, next_offset = 0; ++ n < config->tp.nr_windows; n++, p++, w++) { ++ /* ++ * Time windows must be strictly contiguous. Holes may ++ * be defined using windows assigned to the pseudo ++ * partition #-1. ++ */ ++ offset = ts2ns(&p->offset); ++ if (offset != next_offset) ++ goto cleanup_and_fail; ++ ++ duration = ts2ns(&p->duration); ++ if (duration <= 0) ++ goto cleanup_and_fail; ++ ++ if (p->ptid < -1 || ++ p->ptid >= CONFIG_XENO_OPT_SCHED_TP_NRPART) ++ goto cleanup_and_fail; ++ ++ w->w_offset = next_offset; ++ w->w_part = p->ptid; ++ next_offset += duration; ++ } ++ ++ atomic_set(&gps->refcount, 1); ++ gps->pwin_nr = n; ++ gps->tf_duration = next_offset; ++set_schedule: ++ xnlock_get_irqsave(&nklock, s); ++ ogps = xnsched_tp_set_schedule(sched, gps); ++ xnlock_put_irqrestore(&nklock, s); ++ ++ if (ogps) ++ xnsched_tp_put_schedule(ogps); ++ ++ return 0; ++ ++cleanup_and_fail: ++ xnfree(gps); ++ ++ return -EINVAL; ++} ++ ++static inline ++ssize_t get_tp_config(int cpu, void __user *u_config, size_t len, ++ union sched_config *(*fetch_config) ++ (int policy, const void __user *u_config, ++ size_t *len), ++ ssize_t (*put_config)(int policy, void __user *u_config, ++ size_t u_len, ++ const union sched_config *config, ++ size_t len)) ++{ ++ struct xnsched_tp_window *pw, *w; ++ struct xnsched_tp_schedule *gps; ++ struct sched_tp_window *pp, *p; ++ union sched_config *config; ++ struct xnsched *sched; ++ ssize_t ret, elen; ++ spl_t s; ++ int n; ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ sched = xnsched_struct(cpu); ++ gps = xnsched_tp_get_schedule(sched); ++ if (gps == NULL) { ++ xnlock_put_irqrestore(&nklock, s); ++ return 0; ++ } ++ ++ xnlock_put_irqrestore(&nklock, s); ++ ++ elen = sched_tp_confsz(gps->pwin_nr); ++ config = xnmalloc(elen); ++ if (config == NULL) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ ++ config->tp.op = sched_tp_install; ++ config->tp.nr_windows = gps->pwin_nr; ++ for (n = 0, pp = p = config->tp.windows, pw = w = gps->pwins; ++ n < gps->pwin_nr; pp = p, p++, pw = w, w++, n++) { ++ ns2ts(&p->offset, w->w_offset); ++ ns2ts(&pp->duration, w->w_offset - pw->w_offset); ++ p->ptid = w->w_part; ++ } ++ ns2ts(&pp->duration, gps->tf_duration - pw->w_offset); ++ ret = put_config(SCHED_TP, u_config, len, config, elen); ++ xnfree(config); ++out: ++ xnsched_tp_put_schedule(gps); ++ ++ return ret; ++} ++ ++#else /* !CONFIG_XENO_OPT_SCHED_TP */ ++ ++static inline int ++set_tp_config(int cpu, union sched_config *config, size_t len) ++{ ++ return -EINVAL; ++} ++ ++static inline ssize_t ++get_tp_config(int cpu, union sched_config __user *u_config, size_t len, ++ union sched_config *(*fetch_config) ++ (int policy, const void __user *u_config, ++ size_t *len), ++ ssize_t (*put_config)(int policy, void __user *u_config, ++ size_t u_len, ++ const union sched_config *config, ++ size_t len)) ++{ ++ return -EINVAL; ++} ++ ++#endif /* !CONFIG_XENO_OPT_SCHED_TP */ ++ ++#ifdef CONFIG_XENO_OPT_SCHED_QUOTA ++ ++static inline ++int set_quota_config(int cpu, union sched_config *config, size_t len) ++{ ++ struct __sched_config_quota *p = &config->quota; ++ struct __sched_quota_info *iq = &p->info; ++ struct cobalt_sched_group *group; ++ struct xnsched_quota_group *tg; ++ struct xnsched *sched; ++ int ret, quota_sum; ++ spl_t s; ++ ++ if (len < sizeof(*p)) ++ return -EINVAL; ++ ++ switch (p->op) { ++ case sched_quota_add: ++ group = xnmalloc(sizeof(*group)); ++ if (group == NULL) ++ return -ENOMEM; ++ tg = &group->quota; ++ group->pshared = p->add.pshared != 0; ++ group->scope = cobalt_current_resources(group->pshared); ++ xnlock_get_irqsave(&nklock, s); ++ sched = xnsched_struct(cpu); ++ ret = xnsched_quota_create_group(tg, sched, "a_sum); ++ if (ret) { ++ xnlock_put_irqrestore(&nklock, s); ++ xnfree(group); ++ return ret; ++ } ++ list_add(&group->next, &group->scope->schedq); ++ xnlock_put_irqrestore(&nklock, s); ++ break; ++ case sched_quota_remove: ++ case sched_quota_force_remove: ++ xnlock_get_irqsave(&nklock, s); ++ sched = xnsched_struct(cpu); ++ tg = xnsched_quota_find_group(sched, p->remove.tgid); ++ if (tg == NULL) ++ goto bad_tgid; ++ group = container_of(tg, struct cobalt_sched_group, quota); ++ if (group->scope != cobalt_current_resources(group->pshared)) ++ goto bad_tgid; ++ ret = xnsched_quota_destroy_group(tg, ++ p->op == sched_quota_force_remove, ++ "a_sum); ++ if (ret) { ++ xnlock_put_irqrestore(&nklock, s); ++ return ret; ++ } ++ list_del(&group->next); ++ xnlock_put_irqrestore(&nklock, s); ++ iq->tgid = tg->tgid; ++ iq->quota = tg->quota_percent; ++ iq->quota_peak = tg->quota_peak_percent; ++ iq->quota_sum = quota_sum; ++ xnfree(group); ++ return 0; ++ case sched_quota_set: ++ xnlock_get_irqsave(&nklock, s); ++ sched = xnsched_struct(cpu); ++ tg = xnsched_quota_find_group(sched, p->set.tgid); ++ if (tg == NULL) ++ goto bad_tgid; ++ group = container_of(tg, struct cobalt_sched_group, quota); ++ if (group->scope != cobalt_current_resources(group->pshared)) ++ goto bad_tgid; ++ xnsched_quota_set_limit(tg, p->set.quota, p->set.quota_peak, ++ "a_sum); ++ xnlock_put_irqrestore(&nklock, s); ++ break; ++ default: ++ return -EINVAL; ++ } ++ ++ iq->tgid = tg->tgid; ++ iq->quota = tg->quota_percent; ++ iq->quota_peak = tg->quota_peak_percent; ++ iq->quota_sum = quota_sum; ++ ++ return 0; ++bad_tgid: ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return -ESRCH; ++} ++ ++static inline ++ssize_t get_quota_config(int cpu, void __user *u_config, size_t len, ++ union sched_config *(*fetch_config) ++ (int policy, const void __user *u_config, ++ size_t *len), ++ ssize_t (*put_config)(int policy, void __user *u_config, ++ size_t u_len, ++ const union sched_config *config, ++ size_t len)) ++{ ++ struct cobalt_sched_group *group; ++ struct xnsched_quota_group *tg; ++ union sched_config *config; ++ struct xnsched *sched; ++ ssize_t ret; ++ spl_t s; ++ ++ config = fetch_config(SCHED_QUOTA, u_config, &len); ++ if (IS_ERR(config)) ++ return PTR_ERR(config); ++ ++ xnlock_get_irqsave(&nklock, s); ++ sched = xnsched_struct(cpu); ++ tg = xnsched_quota_find_group(sched, config->quota.get.tgid); ++ if (tg == NULL) ++ goto bad_tgid; ++ ++ group = container_of(tg, struct cobalt_sched_group, quota); ++ if (group->scope != cobalt_current_resources(group->pshared)) ++ goto bad_tgid; ++ ++ config->quota.info.tgid = tg->tgid; ++ config->quota.info.quota = tg->quota_percent; ++ config->quota.info.quota_peak = tg->quota_peak_percent; ++ config->quota.info.quota_sum = xnsched_quota_sum_all(sched); ++ xnlock_put_irqrestore(&nklock, s); ++ ++ ret = put_config(SCHED_QUOTA, u_config, len, config, sizeof(*config)); ++ xnfree(config); ++ ++ return ret; ++bad_tgid: ++ xnlock_put_irqrestore(&nklock, s); ++ xnfree(config); ++ ++ return -ESRCH; ++} ++ ++#else /* !CONFIG_XENO_OPT_SCHED_QUOTA */ ++ ++static inline ++int set_quota_config(int cpu, union sched_config *config, size_t len) ++{ ++ return -EINVAL; ++} ++ ++static inline ++ssize_t get_quota_config(int cpu, void __user *u_config, ++ size_t len, ++ union sched_config *(*fetch_config) ++ (int policy, const void __user *u_config, ++ size_t *len), ++ ssize_t (*put_config)(int policy, void __user *u_config, ++ size_t u_len, ++ const union sched_config *config, ++ size_t len)) ++{ ++ return -EINVAL; ++} ++ ++#endif /* !CONFIG_XENO_OPT_SCHED_QUOTA */ ++ ++static union sched_config * ++sched_fetch_config(int policy, const void __user *u_config, size_t *len) ++{ ++ union sched_config *buf; ++ int ret; ++ ++ if (u_config == NULL) ++ return ERR_PTR(-EFAULT); ++ ++ if (policy == SCHED_QUOTA && *len < sizeof(buf->quota)) ++ return ERR_PTR(-EINVAL); ++ ++ buf = xnmalloc(*len); ++ if (buf == NULL) ++ return ERR_PTR(-ENOMEM); ++ ++ ret = cobalt_copy_from_user(buf, u_config, *len); ++ if (ret) { ++ xnfree(buf); ++ return ERR_PTR(ret); ++ } ++ ++ return buf; ++} ++ ++static int sched_ack_config(int policy, const union sched_config *config, ++ void __user *u_config) ++{ ++ union sched_config __user *u_p = u_config; ++ ++ if (policy != SCHED_QUOTA) ++ return 0; ++ ++ return u_p == NULL ? -EFAULT : ++ cobalt_copy_to_user(&u_p->quota.info, &config->quota.info, ++ sizeof(u_p->quota.info)); ++} ++ ++static ssize_t sched_put_config(int policy, ++ void __user *u_config, size_t u_len, ++ const union sched_config *config, size_t len) ++{ ++ union sched_config *u_p = u_config; ++ ++ if (u_config == NULL) ++ return -EFAULT; ++ ++ if (policy == SCHED_QUOTA) { ++ if (u_len < sizeof(config->quota)) ++ return -EINVAL; ++ return cobalt_copy_to_user(&u_p->quota.info, &config->quota.info, ++ sizeof(u_p->quota.info)) ?: ++ sizeof(u_p->quota.info); ++ } ++ ++ return cobalt_copy_to_user(u_config, config, len) ?: len; ++} ++ ++int __cobalt_sched_setconfig_np(int cpu, int policy, ++ void __user *u_config, ++ size_t len, ++ union sched_config *(*fetch_config) ++ (int policy, const void __user *u_config, ++ size_t *len), ++ int (*ack_config)(int policy, ++ const union sched_config *config, ++ void __user *u_config)) ++{ ++ union sched_config *buf; ++ int ret; ++ ++ trace_cobalt_sched_setconfig(cpu, policy, len); ++ ++ if (cpu < 0 || cpu >= NR_CPUS || !xnsched_threading_cpu(cpu)) ++ return -EINVAL; ++ ++ if (len == 0) ++ return -EINVAL; ++ ++ buf = fetch_config(policy, u_config, &len); ++ if (IS_ERR(buf)) ++ return PTR_ERR(buf); ++ ++ switch (policy) { ++ case SCHED_TP: ++ ret = set_tp_config(cpu, buf, len); ++ break; ++ case SCHED_QUOTA: ++ ret = set_quota_config(cpu, buf, len); ++ break; ++ default: ++ ret = -EINVAL; ++ } ++ ++ if (ret == 0) ++ ret = ack_config(policy, buf, u_config); ++ ++ xnfree(buf); ++ ++ return ret; ++} ++ ++COBALT_SYSCALL(sched_setconfig_np, conforming, ++ (int cpu, int policy, ++ union sched_config __user *u_config, ++ size_t len)) ++{ ++ return __cobalt_sched_setconfig_np(cpu, policy, u_config, len, ++ sched_fetch_config, sched_ack_config); ++} ++ ++ssize_t __cobalt_sched_getconfig_np(int cpu, int policy, ++ void __user *u_config, ++ size_t len, ++ union sched_config *(*fetch_config) ++ (int policy, const void __user *u_config, ++ size_t *len), ++ ssize_t (*put_config)(int policy, ++ void __user *u_config, ++ size_t u_len, ++ const union sched_config *config, ++ size_t len)) ++{ ++ ssize_t ret; ++ ++ switch (policy) { ++ case SCHED_TP: ++ ret = get_tp_config(cpu, u_config, len, ++ fetch_config, put_config); ++ break; ++ case SCHED_QUOTA: ++ ret = get_quota_config(cpu, u_config, len, ++ fetch_config, put_config); ++ break; ++ default: ++ ret = -EINVAL; ++ } ++ ++ trace_cobalt_sched_get_config(cpu, policy, ret); ++ ++ return ret; ++} ++ ++COBALT_SYSCALL(sched_getconfig_np, conforming, ++ (int cpu, int policy, ++ union sched_config __user *u_config, ++ size_t len)) ++{ ++ return __cobalt_sched_getconfig_np(cpu, policy, u_config, len, ++ sched_fetch_config, sched_put_config); ++} ++ ++int __cobalt_sched_weightprio(int policy, ++ const struct sched_param_ex *param_ex) ++{ ++ struct xnsched_class *sched_class; ++ union xnsched_policy_param param; ++ int prio; ++ ++ sched_class = cobalt_sched_policy_param(¶m, policy, ++ param_ex, NULL); ++ if (sched_class == NULL) ++ return -EINVAL; ++ ++ prio = param_ex->sched_priority; ++ if (prio < 0) ++ prio = -prio; ++ ++ return prio + sched_class->weight; ++} ++ ++COBALT_SYSCALL(sched_weightprio, current, ++ (int policy, const struct sched_param_ex __user *u_param)) ++{ ++ struct sched_param_ex param_ex; ++ ++ if (cobalt_copy_from_user(¶m_ex, u_param, sizeof(param_ex))) ++ return -EFAULT; ++ ++ return __cobalt_sched_weightprio(policy, ¶m_ex); ++} ++ ++int cobalt_sched_setscheduler_ex(pid_t pid, ++ int policy, ++ const struct sched_param_ex *param_ex, ++ __u32 __user *u_winoff, ++ int __user *u_promoted) ++{ ++ struct cobalt_local_hkey hkey; ++ struct cobalt_thread *thread; ++ int ret, promoted = 0; ++ spl_t s; ++ ++ trace_cobalt_sched_setscheduler(pid, policy, param_ex); ++ ++ if (pid) { ++ xnlock_get_irqsave(&nklock, s); ++ thread = cobalt_thread_find(pid); ++ xnlock_put_irqrestore(&nklock, s); ++ } else ++ thread = cobalt_current_thread(); ++ ++ if (thread == NULL) { ++ if (u_winoff == NULL || pid != task_pid_vnr(current)) ++ return -ESRCH; ++ ++ thread = cobalt_thread_shadow(&hkey, u_winoff); ++ if (IS_ERR(thread)) ++ return PTR_ERR(thread); ++ ++ promoted = 1; ++ } ++ ++ ret = __cobalt_thread_setschedparam_ex(thread, policy, param_ex); ++ if (ret) ++ return ret; ++ ++ return cobalt_copy_to_user(u_promoted, &promoted, sizeof(promoted)); ++} ++ ++COBALT_SYSCALL(sched_setscheduler_ex, conforming, ++ (pid_t pid, ++ int policy, ++ const struct sched_param_ex __user *u_param, ++ __u32 __user *u_winoff, ++ int __user *u_promoted)) ++{ ++ struct sched_param_ex param_ex; ++ ++ if (cobalt_copy_from_user(¶m_ex, u_param, sizeof(param_ex))) ++ return -EFAULT; ++ ++ return cobalt_sched_setscheduler_ex(pid, policy, ¶m_ex, ++ u_winoff, u_promoted); ++} ++ ++int cobalt_sched_getscheduler_ex(pid_t pid, ++ int *policy_r, ++ struct sched_param_ex *param_ex) ++{ ++ struct cobalt_thread *thread; ++ spl_t s; ++ ++ trace_cobalt_sched_getscheduler(pid); ++ ++ if (pid) { ++ xnlock_get_irqsave(&nklock, s); ++ thread = cobalt_thread_find(pid); ++ xnlock_put_irqrestore(&nklock, s); ++ } else ++ thread = cobalt_current_thread(); ++ ++ if (thread == NULL) ++ return -ESRCH; ++ ++ return __cobalt_thread_getschedparam_ex(thread, policy_r, param_ex); ++} ++ ++COBALT_SYSCALL(sched_getscheduler_ex, current, ++ (pid_t pid, ++ int __user *u_policy, ++ struct sched_param_ex __user *u_param)) ++{ ++ struct sched_param_ex param_ex; ++ int ret, policy; ++ ++ ret = cobalt_sched_getscheduler_ex(pid, &policy, ¶m_ex); ++ if (ret) ++ return ret; ++ ++ if (cobalt_copy_to_user(u_param, ¶m_ex, sizeof(param_ex)) || ++ cobalt_copy_to_user(u_policy, &policy, sizeof(policy))) ++ return -EFAULT; ++ ++ return 0; ++} ++ ++void cobalt_sched_reclaim(struct cobalt_process *process) ++{ ++ struct cobalt_resources *p = &process->resources; ++ struct cobalt_sched_group *group; ++ spl_t s; ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ while (!list_empty(&p->schedq)) { ++ group = list_get_entry(&p->schedq, struct cobalt_sched_group, next); ++#ifdef CONFIG_XENO_OPT_SCHED_QUOTA ++ xnsched_quota_destroy_group(&group->quota, 1, NULL); ++#endif ++ xnlock_put_irqrestore(&nklock, s); ++ xnfree(group); ++ xnlock_get_irqsave(&nklock, s); ++ } ++ ++ xnlock_put_irqrestore(&nklock, s); ++} +--- linux/kernel/xenomai/posix/io.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/posix/io.h 2021-04-07 16:01:26.140635719 +0800 +@@ -0,0 +1,76 @@ ++/* ++ * Copyright (C) 2005 Jan Kiszka . ++ * Copyright (C) 2005 Joerg Langenberg . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#ifndef _COBALT_POSIX_IO_H ++#define _COBALT_POSIX_IO_H ++ ++#include ++#include ++#include ++ ++int __cobalt_first_fd_valid_p(fd_set *fds[XNSELECT_MAX_TYPES], int nfds); ++ ++int __cobalt_select_bind_all(struct xnselector *selector, ++ fd_set *fds[XNSELECT_MAX_TYPES], int nfds); ++ ++COBALT_SYSCALL_DECL(open, ++ (const char __user *u_path, int oflag)); ++ ++COBALT_SYSCALL_DECL(socket, ++ (int protocol_family, ++ int socket_type, int protocol)); ++ ++COBALT_SYSCALL_DECL(close, (int fd)); ++ ++COBALT_SYSCALL_DECL(fcntl, (int fd, int cmd, long arg)); ++ ++COBALT_SYSCALL_DECL(ioctl, ++ (int fd, unsigned int request, void __user *arg)); ++ ++COBALT_SYSCALL_DECL(read, ++ (int fd, void __user *buf, size_t size)); ++ ++COBALT_SYSCALL_DECL(write, ++ (int fd, const void __user *buf, size_t size)); ++ ++COBALT_SYSCALL_DECL(recvmsg, ++ (int fd, struct user_msghdr __user *umsg, int flags)); ++ ++COBALT_SYSCALL_DECL(recvmmsg, ++ (int fd, struct mmsghdr __user *u_msgvec, unsigned int vlen, ++ unsigned int flags, struct timespec *u_timeout)); ++ ++COBALT_SYSCALL_DECL(sendmsg, ++ (int fd, struct user_msghdr __user *umsg, int flags)); ++ ++COBALT_SYSCALL_DECL(sendmmsg, ++ (int fd, struct mmsghdr __user *u_msgvec, ++ unsigned int vlen, unsigned int flags)); ++ ++COBALT_SYSCALL_DECL(mmap, ++ (int fd, struct _rtdm_mmap_request __user *u_rma, ++ void __user * __user *u_addrp)); ++ ++COBALT_SYSCALL_DECL(select, ++ (int nfds, ++ fd_set __user *u_rfds, ++ fd_set __user *u_wfds, ++ fd_set __user *u_xfds, ++ struct timeval __user *u_tv)); ++ ++#endif /* !_COBALT_POSIX_IO_H */ +--- linux/kernel/xenomai/posix/timerfd.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/posix/timerfd.c 2021-04-07 16:01:26.135635727 +0800 +@@ -0,0 +1,334 @@ ++/* ++ * Copyright (C) 2013 Gilles Chanteperdrix . ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include "internal.h" ++#include "clock.h" ++#include "timer.h" ++#include "timerfd.h" ++ ++struct cobalt_tfd { ++ int flags; ++ clockid_t clockid; ++ struct rtdm_fd fd; ++ struct xntimer timer; ++ DECLARE_XNSELECT(read_select); ++ struct itimerspec value; ++ struct xnsynch readers; ++ struct xnthread *target; ++}; ++ ++#define COBALT_TFD_TICKED (1 << 2) ++ ++#define COBALT_TFD_SETTIME_FLAGS (TFD_TIMER_ABSTIME | TFD_WAKEUP) ++ ++static ssize_t timerfd_read(struct rtdm_fd *fd, void __user *buf, size_t size) ++{ ++ struct cobalt_tfd *tfd; ++ __u64 __user *u_ticks; ++ __u64 ticks = 0; ++ bool aligned; ++ spl_t s; ++ int err; ++ ++ if (size < sizeof(ticks)) ++ return -EINVAL; ++ ++ u_ticks = buf; ++ if (!access_wok(u_ticks, sizeof(*u_ticks))) ++ return -EFAULT; ++ ++ aligned = (((unsigned long)buf) & (sizeof(ticks) - 1)) == 0; ++ ++ tfd = container_of(fd, struct cobalt_tfd, fd); ++ ++ xnlock_get_irqsave(&nklock, s); ++ if (tfd->flags & COBALT_TFD_TICKED) { ++ err = 0; ++ goto out; ++ } ++ if (rtdm_fd_flags(fd) & O_NONBLOCK) { ++ err = -EAGAIN; ++ goto out; ++ } ++ ++ do { ++ err = xnsynch_sleep_on(&tfd->readers, XN_INFINITE, XN_RELATIVE); ++ } while (err == 0 && (tfd->flags & COBALT_TFD_TICKED) == 0); ++ ++ if (err & XNBREAK) ++ err = -EINTR; ++ out: ++ if (err == 0) { ++ xnticks_t now; ++ ++ if (xntimer_periodic_p(&tfd->timer)) { ++ now = xnclock_read_raw(xntimer_clock(&tfd->timer)); ++ ticks = 1 + xntimer_get_overruns(&tfd->timer, ++ xnthread_current(), now); ++ } else ++ ticks = 1; ++ ++ tfd->flags &= ~COBALT_TFD_TICKED; ++ xnselect_signal(&tfd->read_select, 0); ++ } ++ xnlock_put_irqrestore(&nklock, s); ++ ++ if (err == 0) { ++ err = aligned ? __xn_put_user(ticks, u_ticks) : ++ __xn_copy_to_user(buf, &ticks, sizeof(ticks)); ++ if (err) ++ err =-EFAULT; ++ } ++ ++ return err ?: sizeof(ticks); ++} ++ ++static int ++timerfd_select(struct rtdm_fd *fd, struct xnselector *selector, ++ unsigned type, unsigned index) ++{ ++ struct cobalt_tfd *tfd = container_of(fd, struct cobalt_tfd, fd); ++ struct xnselect_binding *binding; ++ spl_t s; ++ int err; ++ ++ if (type != XNSELECT_READ) ++ return -EBADF; ++ ++ binding = xnmalloc(sizeof(*binding)); ++ if (binding == NULL) ++ return -ENOMEM; ++ ++ xnlock_get_irqsave(&nklock, s); ++ xntimer_set_affinity(&tfd->timer, xnthread_current()->sched); ++ err = xnselect_bind(&tfd->read_select, binding, selector, type, ++ index, tfd->flags & COBALT_TFD_TICKED); ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return err; ++} ++ ++static void timerfd_close(struct rtdm_fd *fd) ++{ ++ struct cobalt_tfd *tfd = container_of(fd, struct cobalt_tfd, fd); ++ spl_t s; ++ ++ xnlock_get_irqsave(&nklock, s); ++ xntimer_destroy(&tfd->timer); ++ xnsynch_destroy(&tfd->readers); ++ xnsched_run(); ++ xnlock_put_irqrestore(&nklock, s); ++ xnselect_destroy(&tfd->read_select); /* Reschedules. */ ++ xnfree(tfd); ++} ++ ++static struct rtdm_fd_ops timerfd_ops = { ++ .read_rt = timerfd_read, ++ .select = timerfd_select, ++ .close = timerfd_close, ++}; ++ ++static void timerfd_handler(struct xntimer *xntimer) ++{ ++ struct cobalt_tfd *tfd; ++ ++ tfd = container_of(xntimer, struct cobalt_tfd, timer); ++ tfd->flags |= COBALT_TFD_TICKED; ++ xnselect_signal(&tfd->read_select, 1); ++ xnsynch_wakeup_one_sleeper(&tfd->readers); ++ if (tfd->target) ++ xnthread_unblock(tfd->target); ++} ++ ++COBALT_SYSCALL(timerfd_create, lostage, (int clockid, int flags)) ++{ ++ struct cobalt_tfd *tfd; ++ struct xnthread *curr; ++ struct xnclock *clock; ++ int ret, ufd; ++ ++ if (flags & ~TFD_CREATE_FLAGS) ++ return -EINVAL; ++ ++ clock = cobalt_clock_find(clockid); ++ if (IS_ERR(clock)) ++ return PTR_ERR(clock); ++ ++ tfd = xnmalloc(sizeof(*tfd)); ++ if (tfd == NULL) ++ return -ENOMEM; ++ ++ ufd = __rtdm_anon_getfd("[cobalt-timerfd]", ++ O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS)); ++ if (ufd < 0) { ++ ret = ufd; ++ goto fail_getfd; ++ } ++ ++ tfd->flags = flags & ~TFD_NONBLOCK; ++ tfd->fd.oflags = (flags & TFD_NONBLOCK) ? O_NONBLOCK : 0; ++ tfd->clockid = clockid; ++ curr = xnthread_current(); ++ xntimer_init(&tfd->timer, clock, timerfd_handler, ++ curr ? curr->sched : NULL, XNTIMER_UGRAVITY); ++ xnsynch_init(&tfd->readers, XNSYNCH_PRIO, NULL); ++ xnselect_init(&tfd->read_select); ++ tfd->target = NULL; ++ ++ ret = rtdm_fd_enter(&tfd->fd, ufd, COBALT_TIMERFD_MAGIC, &timerfd_ops); ++ if (ret < 0) ++ goto fail; ++ ++ ret = rtdm_fd_register(&tfd->fd, ufd); ++ if (ret < 0) ++ goto fail; ++ ++ return ufd; ++fail: ++ xnselect_destroy(&tfd->read_select); ++ xnsynch_destroy(&tfd->readers); ++ xntimer_destroy(&tfd->timer); ++ __rtdm_anon_putfd(ufd); ++fail_getfd: ++ xnfree(tfd); ++ ++ return ret; ++} ++ ++static inline struct cobalt_tfd *tfd_get(int ufd) ++{ ++ struct rtdm_fd *fd; ++ ++ fd = rtdm_fd_get(ufd, COBALT_TIMERFD_MAGIC); ++ if (IS_ERR(fd)) { ++ int err = PTR_ERR(fd); ++ if (err == -EBADF && cobalt_current_process() == NULL) ++ err = -EPERM; ++ return ERR_PTR(err); ++ } ++ ++ return container_of(fd, struct cobalt_tfd, fd); ++} ++ ++static inline void tfd_put(struct cobalt_tfd *tfd) ++{ ++ rtdm_fd_put(&tfd->fd); ++} ++ ++int __cobalt_timerfd_settime(int fd, int flags, ++ const struct itimerspec *value, ++ struct itimerspec *ovalue) ++{ ++ struct cobalt_tfd *tfd; ++ int cflag, ret; ++ spl_t s; ++ ++ if (flags & ~COBALT_TFD_SETTIME_FLAGS) ++ return -EINVAL; ++ ++ tfd = tfd_get(fd); ++ if (IS_ERR(tfd)) ++ return PTR_ERR(tfd); ++ ++ cflag = (flags & TFD_TIMER_ABSTIME) ? TIMER_ABSTIME : 0; ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ tfd->target = NULL; ++ if (flags & TFD_WAKEUP) { ++ tfd->target = xnthread_current(); ++ if (tfd->target == NULL) { ++ ret = -EPERM; ++ goto out; ++ } ++ } ++ ++ if (ovalue) ++ __cobalt_timer_getval(&tfd->timer, ovalue); ++ ++ xntimer_set_affinity(&tfd->timer, xnthread_current()->sched); ++ ++ ret = __cobalt_timer_setval(&tfd->timer, ++ clock_flag(cflag, tfd->clockid), value); ++out: ++ xnlock_put_irqrestore(&nklock, s); ++ ++ tfd_put(tfd); ++ ++ return ret; ++} ++ ++COBALT_SYSCALL(timerfd_settime, primary, ++ (int fd, int flags, ++ const struct itimerspec __user *new_value, ++ struct itimerspec __user *old_value)) ++{ ++ struct itimerspec ovalue, value; ++ int ret; ++ ++ ret = cobalt_copy_from_user(&value, new_value, sizeof(value)); ++ if (ret) ++ return ret; ++ ++ ret = __cobalt_timerfd_settime(fd, flags, &value, &ovalue); ++ if (ret) ++ return ret; ++ ++ if (old_value) { ++ ret = cobalt_copy_to_user(old_value, &ovalue, sizeof(ovalue)); ++ value.it_value.tv_sec = 0; ++ value.it_value.tv_nsec = 0; ++ __cobalt_timerfd_settime(fd, flags, &value, NULL); ++ } ++ ++ return ret; ++} ++ ++int __cobalt_timerfd_gettime(int fd, struct itimerspec *value) ++{ ++ struct cobalt_tfd *tfd; ++ spl_t s; ++ ++ tfd = tfd_get(fd); ++ if (IS_ERR(tfd)) ++ return PTR_ERR(tfd); ++ ++ xnlock_get_irqsave(&nklock, s); ++ __cobalt_timer_getval(&tfd->timer, value); ++ xnlock_put_irqrestore(&nklock, s); ++ ++ tfd_put(tfd); ++ ++ return 0; ++} ++ ++COBALT_SYSCALL(timerfd_gettime, current, ++ (int fd, struct itimerspec __user *curr_value)) ++{ ++ struct itimerspec value; ++ int ret; ++ ++ ret = __cobalt_timerfd_gettime(fd, &value); ++ ++ return ret ?: cobalt_copy_to_user(curr_value, &value, sizeof(value)); ++} +--- linux/kernel/xenomai/posix/process.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/posix/process.h 2021-04-07 16:01:26.131635732 +0800 +@@ -0,0 +1,156 @@ ++/* ++ * Copyright (C) 2013 Philippe Gerum . ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#ifndef _COBALT_POSIX_PROCESS_H ++#define _COBALT_POSIX_PROCESS_H ++ ++#include ++#include ++#include ++ ++#define KEVENT_PROPAGATE 0 ++#define KEVENT_STOP 1 ++ ++#define NR_PERSONALITIES 4 ++#if BITS_PER_LONG < NR_PERSONALITIES ++#error "NR_PERSONALITIES overflows internal bitmap" ++#endif ++ ++struct mm_struct; ++struct xnthread_personality; ++struct cobalt_timer; ++ ++struct cobalt_resources { ++ struct list_head condq; ++ struct list_head mutexq; ++ struct list_head semq; ++ struct list_head monitorq; ++ struct list_head eventq; ++ struct list_head schedq; ++}; ++ ++struct cobalt_process { ++ struct mm_struct *mm; ++ struct hlist_node hlink; ++ struct cobalt_ppd sys_ppd; ++ unsigned long permap; ++ struct rb_root usems; ++ struct list_head sigwaiters; ++ struct cobalt_resources resources; ++ struct list_head thread_list; ++ DECLARE_BITMAP(timers_map, CONFIG_XENO_OPT_NRTIMERS); ++ struct cobalt_timer *timers[CONFIG_XENO_OPT_NRTIMERS]; ++ void *priv[NR_PERSONALITIES]; ++ int ufeatures; ++ unsigned int debugged_threads; ++}; ++ ++struct cobalt_resnode { ++ struct cobalt_resources *scope; ++ struct cobalt_process *owner; ++ struct list_head next; ++ xnhandle_t handle; ++}; ++ ++int cobalt_register_personality(struct xnthread_personality *personality); ++ ++int cobalt_unregister_personality(int xid); ++ ++struct xnthread_personality *cobalt_push_personality(int xid); ++ ++void cobalt_pop_personality(struct xnthread_personality *prev); ++ ++int cobalt_bind_core(int ufeatures); ++ ++int cobalt_bind_personality(unsigned int magic); ++ ++struct cobalt_process *cobalt_search_process(struct mm_struct *mm); ++ ++int cobalt_map_user(struct xnthread *thread, __u32 __user *u_winoff); ++ ++void *cobalt_get_context(int xid); ++ ++int cobalt_yield(xnticks_t min, xnticks_t max); ++ ++int cobalt_process_init(void); ++ ++extern struct list_head cobalt_global_thread_list; ++ ++extern struct cobalt_resources cobalt_global_resources; ++ ++static inline struct cobalt_process *cobalt_current_process(void) ++{ ++ return ipipe_current_threadinfo()->process; ++} ++ ++static inline struct cobalt_process * ++cobalt_set_process(struct cobalt_process *process) ++{ ++ struct ipipe_threadinfo *p = ipipe_current_threadinfo(); ++ struct cobalt_process *old; ++ ++ old = p->process; ++ p->process = process; ++ ++ return old; ++} ++ ++static inline struct cobalt_ppd *cobalt_ppd_get(int global) ++{ ++ struct cobalt_process *process; ++ ++ if (global || (process = cobalt_current_process()) == NULL) ++ return &cobalt_kernel_ppd; ++ ++ return &process->sys_ppd; ++} ++ ++static inline struct cobalt_resources *cobalt_current_resources(int pshared) ++{ ++ struct cobalt_process *process; ++ ++ if (pshared || (process = cobalt_current_process()) == NULL) ++ return &cobalt_global_resources; ++ ++ return &process->resources; ++} ++ ++static inline ++void __cobalt_add_resource(struct cobalt_resnode *node, int pshared) ++{ ++ node->owner = cobalt_current_process(); ++ node->scope = cobalt_current_resources(pshared); ++} ++ ++#define cobalt_add_resource(__node, __type, __pshared) \ ++ do { \ ++ __cobalt_add_resource(__node, __pshared); \ ++ list_add_tail(&(__node)->next, \ ++ &((__node)->scope)->__type ## q); \ ++ } while (0) ++ ++static inline ++void cobalt_del_resource(struct cobalt_resnode *node) ++{ ++ list_del(&node->next); ++} ++ ++extern struct xnthread_personality *cobalt_personalities[]; ++ ++extern struct xnthread_personality cobalt_personality; ++ ++#endif /* !_COBALT_POSIX_PROCESS_H */ +--- linux/kernel/xenomai/posix/memory.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/posix/memory.c 2021-04-07 16:01:26.126635739 +0800 +@@ -0,0 +1,353 @@ ++/* ++ * This file is part of the Xenomai project. ++ * ++ * Copyright (C) 2014 Philippe Gerum ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "process.h" ++#include "memory.h" ++ ++#define UMM_PRIVATE 0 /* Per-process user-mapped memory heap */ ++#define UMM_SHARED 1 /* Shared user-mapped memory heap */ ++#define SYS_GLOBAL 2 /* System heap (not mmapped) */ ++ ++struct xnvdso *nkvdso; ++EXPORT_SYMBOL_GPL(nkvdso); ++ ++static void umm_vmopen(struct vm_area_struct *vma) ++{ ++ struct cobalt_umm *umm = vma->vm_private_data; ++ ++ atomic_inc(&umm->refcount); ++} ++ ++static void umm_vmclose(struct vm_area_struct *vma) ++{ ++ struct cobalt_umm *umm = vma->vm_private_data; ++ ++ cobalt_umm_destroy(umm); ++} ++ ++static struct vm_operations_struct umm_vmops = { ++ .open = umm_vmopen, ++ .close = umm_vmclose, ++}; ++ ++static struct cobalt_umm *umm_from_fd(struct rtdm_fd *fd) ++{ ++ struct cobalt_process *process; ++ ++ process = cobalt_current_process(); ++ if (process == NULL) ++ return NULL; ++ ++ if (rtdm_fd_minor(fd) == UMM_PRIVATE) ++ return &process->sys_ppd.umm; ++ ++ return &cobalt_kernel_ppd.umm; ++} ++ ++static int umm_mmap(struct rtdm_fd *fd, struct vm_area_struct *vma) ++{ ++ struct cobalt_umm *umm; ++ size_t len; ++ int ret; ++ ++ umm = umm_from_fd(fd); ++ if (fd == NULL) ++ return -ENODEV; ++ ++ len = vma->vm_end - vma->vm_start; ++ if (len != xnheap_get_size(&umm->heap)) ++ return -EINVAL; ++ ++ vma->vm_private_data = umm; ++ vma->vm_ops = &umm_vmops; ++ if (xnarch_cache_aliasing()) ++ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); ++ ++ ret = rtdm_mmap_vmem(vma, xnheap_get_membase(&umm->heap)); ++ if (ret) ++ return ret; ++ ++ atomic_inc(&umm->refcount); ++ ++ return 0; ++} ++ ++#ifndef CONFIG_MMU ++static unsigned long umm_get_unmapped_area(struct rtdm_fd *fd, ++ unsigned long len, ++ unsigned long pgoff, ++ unsigned long flags) ++{ ++ struct cobalt_umm *umm; ++ ++ umm = umm_from_fd(fd); ++ if (umm == NULL) ++ return -ENODEV; ++ ++ if (pgoff == 0) ++ return (unsigned long)xnheap_get_membase(&umm->heap); ++ ++ return pgoff << PAGE_SHIFT; ++} ++#else ++#define umm_get_unmapped_area NULL ++#endif ++ ++static int stat_umm(struct rtdm_fd *fd, ++ struct cobalt_umm __user *u_stat) ++{ ++ struct cobalt_memdev_stat stat; ++ struct cobalt_umm *umm; ++ spl_t s; ++ ++ umm = umm_from_fd(fd); ++ if (umm == NULL) ++ return -ENODEV; ++ ++ xnlock_get_irqsave(&umm->heap.lock, s); ++ stat.size = xnheap_get_size(&umm->heap); ++ stat.free = xnheap_get_free(&umm->heap); ++ xnlock_put_irqrestore(&umm->heap.lock, s); ++ ++ return rtdm_safe_copy_to_user(fd, u_stat, &stat, sizeof(stat)); ++} ++ ++static int do_umm_ioctls(struct rtdm_fd *fd, ++ unsigned int request, void __user *arg) ++{ ++ int ret; ++ ++ switch (request) { ++ case MEMDEV_RTIOC_STAT: ++ ret = stat_umm(fd, arg); ++ break; ++ default: ++ ret = -EINVAL; ++ } ++ ++ return ret; ++} ++ ++static int umm_ioctl_rt(struct rtdm_fd *fd, ++ unsigned int request, void __user *arg) ++{ ++ return do_umm_ioctls(fd, request, arg); ++} ++ ++static int umm_ioctl_nrt(struct rtdm_fd *fd, ++ unsigned int request, void __user *arg) ++{ ++ return do_umm_ioctls(fd, request, arg); ++} ++ ++static int sysmem_open(struct rtdm_fd *fd, int oflags) ++{ ++ if ((oflags & O_ACCMODE) != O_RDONLY) ++ return -EACCES; ++ ++ return 0; ++} ++ ++static int do_sysmem_ioctls(struct rtdm_fd *fd, ++ unsigned int request, void __user *arg) ++{ ++ struct cobalt_memdev_stat stat; ++ spl_t s; ++ int ret; ++ ++ switch (request) { ++ case MEMDEV_RTIOC_STAT: ++ xnlock_get_irqsave(&cobalt_heap.lock, s); ++ stat.size = xnheap_get_size(&cobalt_heap); ++ stat.free = xnheap_get_free(&cobalt_heap); ++ xnlock_put_irqrestore(&cobalt_heap.lock, s); ++ ret = rtdm_safe_copy_to_user(fd, arg, &stat, sizeof(stat)); ++ break; ++ default: ++ ret = -EINVAL; ++ } ++ ++ return ret; ++} ++ ++static int sysmem_ioctl_rt(struct rtdm_fd *fd, ++ unsigned int request, void __user *arg) ++{ ++ return do_sysmem_ioctls(fd, request, arg); ++} ++ ++static int sysmem_ioctl_nrt(struct rtdm_fd *fd, ++ unsigned int request, void __user *arg) ++{ ++ return do_sysmem_ioctls(fd, request, arg); ++} ++ ++static struct rtdm_driver umm_driver = { ++ .profile_info = RTDM_PROFILE_INFO(umm, ++ RTDM_CLASS_MEMORY, ++ RTDM_SUBCLASS_GENERIC, ++ 0), ++ .device_flags = RTDM_NAMED_DEVICE|RTDM_FIXED_MINOR, ++ .device_count = 2, ++ .ops = { ++ .ioctl_rt = umm_ioctl_rt, ++ .ioctl_nrt = umm_ioctl_nrt, ++ .mmap = umm_mmap, ++ .get_unmapped_area = umm_get_unmapped_area, ++ }, ++}; ++ ++static struct rtdm_device umm_devices[] = { ++ [ UMM_PRIVATE ] = { ++ .driver = &umm_driver, ++ .label = COBALT_MEMDEV_PRIVATE, ++ .minor = UMM_PRIVATE, ++ }, ++ [ UMM_SHARED ] = { ++ .driver = &umm_driver, ++ .label = COBALT_MEMDEV_SHARED, ++ .minor = UMM_SHARED, ++ }, ++}; ++ ++static struct rtdm_driver sysmem_driver = { ++ .profile_info = RTDM_PROFILE_INFO(sysmem, ++ RTDM_CLASS_MEMORY, ++ SYS_GLOBAL, ++ 0), ++ .device_flags = RTDM_NAMED_DEVICE, ++ .device_count = 1, ++ .ops = { ++ .open = sysmem_open, ++ .ioctl_rt = sysmem_ioctl_rt, ++ .ioctl_nrt = sysmem_ioctl_nrt, ++ }, ++}; ++ ++static struct rtdm_device sysmem_device = { ++ .driver = &sysmem_driver, ++ .label = COBALT_MEMDEV_SYS, ++}; ++ ++static inline void init_vdso(void) ++{ ++ nkvdso->features = XNVDSO_FEATURES; ++ nkvdso->wallclock_offset = nkclock.wallclock_offset; ++} ++ ++int cobalt_memdev_init(void) ++{ ++ int ret; ++ ++ ret = cobalt_umm_init(&cobalt_kernel_ppd.umm, ++ CONFIG_XENO_OPT_SHARED_HEAPSZ * 1024, NULL); ++ if (ret) ++ return ret; ++ ++ cobalt_umm_set_name(&cobalt_kernel_ppd.umm, "shared heap"); ++ ++ nkvdso = cobalt_umm_alloc(&cobalt_kernel_ppd.umm, sizeof(*nkvdso)); ++ if (nkvdso == NULL) { ++ ret = -ENOMEM; ++ goto fail_vdso; ++ } ++ ++ init_vdso(); ++ ++ ret = rtdm_dev_register(umm_devices + UMM_PRIVATE); ++ if (ret) ++ goto fail_private; ++ ++ ret = rtdm_dev_register(umm_devices + UMM_SHARED); ++ if (ret) ++ goto fail_shared; ++ ++ ret = rtdm_dev_register(&sysmem_device); ++ if (ret) ++ goto fail_sysmem; ++ ++ return 0; ++ ++fail_sysmem: ++ rtdm_dev_unregister(umm_devices + UMM_SHARED); ++fail_shared: ++ rtdm_dev_unregister(umm_devices + UMM_PRIVATE); ++fail_private: ++ cobalt_umm_free(&cobalt_kernel_ppd.umm, nkvdso); ++fail_vdso: ++ cobalt_umm_destroy(&cobalt_kernel_ppd.umm); ++ ++ return ret; ++} ++ ++void cobalt_memdev_cleanup(void) ++{ ++ rtdm_dev_unregister(&sysmem_device); ++ rtdm_dev_unregister(umm_devices + UMM_SHARED); ++ rtdm_dev_unregister(umm_devices + UMM_PRIVATE); ++ cobalt_umm_free(&cobalt_kernel_ppd.umm, nkvdso); ++ cobalt_umm_destroy(&cobalt_kernel_ppd.umm); ++} ++ ++int cobalt_umm_init(struct cobalt_umm *umm, u32 size, ++ void (*release)(struct cobalt_umm *umm)) ++{ ++ void *basemem; ++ int ret; ++ ++ secondary_mode_only(); ++ ++ size = PAGE_ALIGN(size); ++ basemem = __vmalloc(size, GFP_KERNEL|__GFP_ZERO, ++ xnarch_cache_aliasing() ? ++ pgprot_noncached(PAGE_KERNEL) : PAGE_KERNEL); ++ if (basemem == NULL) ++ return -ENOMEM; ++ ++ ret = xnheap_init(&umm->heap, basemem, size); ++ if (ret) { ++ vfree(basemem); ++ return ret; ++ } ++ ++ umm->release = release; ++ atomic_set(&umm->refcount, 1); ++ smp_mb(); ++ ++ return 0; ++} ++ ++void cobalt_umm_destroy(struct cobalt_umm *umm) ++{ ++ secondary_mode_only(); ++ ++ if (atomic_dec_and_test(&umm->refcount)) { ++ xnheap_destroy(&umm->heap); ++ vfree(xnheap_get_membase(&umm->heap)); ++ if (umm->release) ++ umm->release(umm); ++ } ++} +--- linux/kernel/xenomai/posix/syscall32.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/posix/syscall32.h 2021-04-07 16:01:26.121635747 +0800 +@@ -0,0 +1,234 @@ ++/* ++ * Copyright (C) 2014 Philippe Gerum ++ * ++ * Xenomai is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#ifndef _COBALT_POSIX_SYSCALL32_H ++#define _COBALT_POSIX_SYSCALL32_H ++ ++#include ++ ++struct cobalt_mutex_shadow; ++struct cobalt_event_shadow; ++struct cobalt_cond_shadow; ++struct cobalt_sem_shadow; ++struct cobalt_monitor_shadow; ++ ++COBALT_SYSCALL32emu_DECL(thread_create, ++ (compat_ulong_t pth, ++ int policy, ++ const struct compat_sched_param_ex __user *u_param_ex, ++ int xid, ++ __u32 __user *u_winoff)); ++ ++COBALT_SYSCALL32emu_DECL(thread_setschedparam_ex, ++ (compat_ulong_t pth, ++ int policy, ++ const struct compat_sched_param_ex __user *u_param, ++ __u32 __user *u_winoff, ++ int __user *u_promoted)); ++ ++COBALT_SYSCALL32emu_DECL(thread_getschedparam_ex, ++ (compat_ulong_t pth, ++ int __user *u_policy, ++ struct compat_sched_param_ex __user *u_param)); ++ ++COBALT_SYSCALL32emu_DECL(thread_setschedprio, ++ (compat_ulong_t pth, ++ int prio, ++ __u32 __user *u_winoff, ++ int __user *u_promoted)); ++ ++COBALT_SYSCALL32emu_DECL(clock_getres, ++ (clockid_t clock_id, ++ struct compat_timespec __user *u_ts)); ++ ++COBALT_SYSCALL32emu_DECL(clock_gettime, ++ (clockid_t clock_id, ++ struct compat_timespec __user *u_ts)); ++ ++COBALT_SYSCALL32emu_DECL(clock_settime, ++ (clockid_t clock_id, ++ const struct compat_timespec __user *u_ts)); ++ ++COBALT_SYSCALL32emu_DECL(clock_adjtime, ++ (clockid_t clock_id, ++ struct compat_timex __user *u_tx)); ++ ++COBALT_SYSCALL32emu_DECL(clock_nanosleep, ++ (clockid_t clock_id, int flags, ++ const struct compat_timespec __user *u_rqt, ++ struct compat_timespec __user *u_rmt)); ++ ++COBALT_SYSCALL32emu_DECL(mutex_timedlock, ++ (struct cobalt_mutex_shadow __user *u_mx, ++ const struct compat_timespec __user *u_ts)); ++ ++COBALT_SYSCALL32emu_DECL(cond_wait_prologue, ++ (struct cobalt_cond_shadow __user *u_cnd, ++ struct cobalt_mutex_shadow __user *u_mx, ++ int *u_err, ++ unsigned int timed, ++ struct compat_timespec __user *u_ts)); ++ ++COBALT_SYSCALL32emu_DECL(mq_open, ++ (const char __user *u_name, int oflags, ++ mode_t mode, struct compat_mq_attr __user *u_attr)); ++ ++COBALT_SYSCALL32emu_DECL(mq_getattr, ++ (mqd_t uqd, struct compat_mq_attr __user *u_attr)); ++ ++COBALT_SYSCALL32emu_DECL(mq_timedsend, ++ (mqd_t uqd, const void __user *u_buf, size_t len, ++ unsigned int prio, ++ const struct compat_timespec __user *u_ts)); ++ ++COBALT_SYSCALL32emu_DECL(mq_timedreceive, ++ (mqd_t uqd, void __user *u_buf, ++ compat_ssize_t __user *u_len, ++ unsigned int __user *u_prio, ++ const struct compat_timespec __user *u_ts)); ++ ++COBALT_SYSCALL32x_DECL(mq_timedreceive, ++ (mqd_t uqd, void __user *u_buf, ++ compat_ssize_t __user *u_len, ++ unsigned int __user *u_prio, ++ const struct timespec __user *u_ts)); ++ ++COBALT_SYSCALL32emu_DECL(mq_notify, ++ (mqd_t fd, const struct compat_sigevent *__user u_cev)); ++ ++COBALT_SYSCALL32emu_DECL(sched_weightprio, ++ (int policy, ++ const struct compat_sched_param_ex __user *u_param)); ++ ++COBALT_SYSCALL32emu_DECL(sched_setconfig_np, ++ (int cpu, int policy, ++ union compat_sched_config __user *u_config, ++ size_t len)); ++ ++COBALT_SYSCALL32emu_DECL(sched_getconfig_np, ++ (int cpu, int policy, ++ union compat_sched_config __user *u_config, ++ size_t len)); ++ ++COBALT_SYSCALL32emu_DECL(sched_setscheduler_ex, ++ (compat_pid_t pid, ++ int policy, ++ const struct compat_sched_param_ex __user *u_param, ++ __u32 __user *u_winoff, ++ int __user *u_promoted)); ++ ++COBALT_SYSCALL32emu_DECL(sched_getscheduler_ex, ++ (compat_pid_t pid, ++ int __user *u_policy, ++ struct compat_sched_param_ex __user *u_param)); ++ ++COBALT_SYSCALL32emu_DECL(timer_create, ++ (clockid_t clock, ++ const struct compat_sigevent __user *u_sev, ++ timer_t __user *u_tm)); ++ ++COBALT_SYSCALL32emu_DECL(timer_settime, ++ (timer_t tm, int flags, ++ const struct compat_itimerspec __user *u_newval, ++ struct compat_itimerspec __user *u_oldval)); ++ ++COBALT_SYSCALL32emu_DECL(timer_gettime, ++ (timer_t tm, ++ struct compat_itimerspec __user *u_val)); ++ ++COBALT_SYSCALL32emu_DECL(timerfd_settime, ++ (int fd, int flags, ++ const struct compat_itimerspec __user *new_value, ++ struct compat_itimerspec __user *old_value)); ++ ++COBALT_SYSCALL32emu_DECL(timerfd_gettime, ++ (int fd, struct compat_itimerspec __user *value)); ++ ++COBALT_SYSCALL32emu_DECL(sigwait, ++ (const compat_sigset_t __user *u_set, ++ int __user *u_sig)); ++ ++COBALT_SYSCALL32emu_DECL(sigtimedwait, ++ (const compat_sigset_t __user *u_set, ++ struct compat_siginfo __user *u_si, ++ const struct compat_timespec __user *u_timeout)); ++ ++COBALT_SYSCALL32emu_DECL(sigwaitinfo, ++ (const compat_sigset_t __user *u_set, ++ struct compat_siginfo __user *u_si)); ++ ++COBALT_SYSCALL32emu_DECL(sigpending, ++ (compat_old_sigset_t __user *u_set)); ++ ++COBALT_SYSCALL32emu_DECL(sigqueue, ++ (pid_t pid, int sig, ++ const union compat_sigval __user *u_value)); ++ ++COBALT_SYSCALL32emu_DECL(monitor_wait, ++ (struct cobalt_monitor_shadow __user *u_mon, ++ int event, const struct compat_timespec __user *u_ts, ++ int __user *u_ret)); ++ ++COBALT_SYSCALL32emu_DECL(event_wait, ++ (struct cobalt_event_shadow __user *u_event, ++ unsigned int bits, ++ unsigned int __user *u_bits_r, ++ int mode, const struct compat_timespec __user *u_ts)); ++ ++COBALT_SYSCALL32emu_DECL(select, ++ (int nfds, ++ compat_fd_set __user *u_rfds, ++ compat_fd_set __user *u_wfds, ++ compat_fd_set __user *u_xfds, ++ struct compat_timeval __user *u_tv)); ++ ++COBALT_SYSCALL32emu_DECL(recvmsg, ++ (int fd, struct compat_msghdr __user *umsg, ++ int flags)); ++ ++COBALT_SYSCALL32emu_DECL(recvmmsg, ++ (int fd, struct compat_mmsghdr __user *u_msgvec, ++ unsigned int vlen, ++ unsigned int flags, struct compat_timespec *u_timeout)); ++ ++COBALT_SYSCALL32emu_DECL(sendmsg, ++ (int fd, struct compat_msghdr __user *umsg, ++ int flags)); ++ ++COBALT_SYSCALL32emu_DECL(sendmmsg, ++ (int fd, struct compat_mmsghdr __user *u_msgvec, unsigned int vlen, ++ unsigned int flags)); ++ ++COBALT_SYSCALL32emu_DECL(mmap, ++ (int fd, ++ struct compat_rtdm_mmap_request __user *u_rma, ++ compat_uptr_t __user *u_addrp)); ++ ++COBALT_SYSCALL32emu_DECL(backtrace, ++ (int nr, compat_ulong_t __user *u_backtrace, ++ int reason)); ++ ++COBALT_SYSCALL32emu_DECL(sem_open, ++ (compat_uptr_t __user *u_addrp, ++ const char __user *u_name, ++ int oflags, mode_t mode, unsigned int value)); ++ ++COBALT_SYSCALL32emu_DECL(sem_timedwait, ++ (struct cobalt_sem_shadow __user *u_sem, ++ struct compat_timespec __user *u_ts)); ++ ++#endif /* !_COBALT_POSIX_SYSCALL32_H */ +--- linux/kernel/xenomai/posix/cond.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/posix/cond.h 2021-04-07 16:01:26.116635754 +0800 +@@ -0,0 +1,71 @@ ++/* ++ * Written by Gilles Chanteperdrix . ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#ifndef _COBALT_POSIX_COND_H ++#define _COBALT_POSIX_COND_H ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++struct cobalt_mutex; ++ ++struct cobalt_cond { ++ unsigned int magic; ++ struct xnsynch synchbase; ++ struct list_head mutex_link; ++ struct cobalt_cond_state *state; ++ struct cobalt_condattr attr; ++ struct cobalt_mutex *mutex; ++ struct cobalt_resnode resnode; ++}; ++ ++int __cobalt_cond_wait_prologue(struct cobalt_cond_shadow __user *u_cnd, ++ struct cobalt_mutex_shadow __user *u_mx, ++ int *u_err, ++ void __user *u_ts, ++ int (*fetch_timeout)(struct timespec *ts, ++ const void __user *u_ts)); ++COBALT_SYSCALL_DECL(cond_init, ++ (struct cobalt_cond_shadow __user *u_cnd, ++ const struct cobalt_condattr __user *u_attr)); ++ ++COBALT_SYSCALL_DECL(cond_destroy, ++ (struct cobalt_cond_shadow __user *u_cnd)); ++ ++COBALT_SYSCALL_DECL(cond_wait_prologue, ++ (struct cobalt_cond_shadow __user *u_cnd, ++ struct cobalt_mutex_shadow __user *u_mx, ++ int *u_err, ++ unsigned int timed, ++ struct timespec __user *u_ts)); ++ ++COBALT_SYSCALL_DECL(cond_wait_epilogue, ++ (struct cobalt_cond_shadow __user *u_cnd, ++ struct cobalt_mutex_shadow __user *u_mx)); ++ ++int cobalt_cond_deferred_signals(struct cobalt_cond *cond); ++ ++void cobalt_cond_reclaim(struct cobalt_resnode *node, ++ spl_t s); ++ ++#endif /* !_COBALT_POSIX_COND_H */ +--- linux/kernel/xenomai/posix/timerfd.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/posix/timerfd.h 2021-04-07 16:01:26.112635759 +0800 +@@ -0,0 +1,42 @@ ++/* ++ * Copyright (C) 2014 Gilles Chanteperdrix . ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#ifndef TIMERFD_H ++#define TIMERFD_H ++ ++#include ++#include ++ ++int __cobalt_timerfd_settime(int fd, int flags, ++ const struct itimerspec *new_value, ++ struct itimerspec *old_value); ++ ++int __cobalt_timerfd_gettime(int fd, ++ struct itimerspec *value); ++ ++COBALT_SYSCALL_DECL(timerfd_create, ++ (int clockid, int flags)); ++ ++COBALT_SYSCALL_DECL(timerfd_settime, ++ (int fd, int flags, ++ const struct itimerspec __user *new_value, ++ struct itimerspec __user *old_value)); ++ ++COBALT_SYSCALL_DECL(timerfd_gettime, ++ (int fd, struct itimerspec __user *curr_value)); ++ ++#endif /* TIMERFD_H */ +--- linux/kernel/xenomai/posix/Makefile 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/posix/Makefile 2021-04-07 16:01:26.107635766 +0800 +@@ -0,0 +1,38 @@ ++ ++ccflags-y += -Ikernel ++ ++obj-$(CONFIG_XENOMAI) += xenomai.o ++ ++xenomai-y := \ ++ clock.o \ ++ cond.o \ ++ corectl.o \ ++ event.o \ ++ io.o \ ++ memory.o \ ++ monitor.o \ ++ mqueue.o \ ++ mutex.o \ ++ nsem.o \ ++ process.o \ ++ sched.o \ ++ sem.o \ ++ signal.o \ ++ syscall.o \ ++ thread.o \ ++ timer.o \ ++ timerfd.o ++ ++syscall_entries := $(srctree)/$(src)/gen-syscall-entries.sh ++ ++quiet_cmd_syscall_entries = GEN $@ ++ cmd_syscall_entries = $(CONFIG_SHELL) '$(syscall_entries)' $^ > $@ ++ ++$(obj)/syscall_entries.h: $(syscall_entries) $(wildcard $(srctree)/$(src)/*.c) ++ $(call if_changed,syscall_entries) ++ ++target += syscall_entries.h ++ ++$(obj)/syscall.o: $(obj)/syscall_entries.h ++ ++xenomai-$(CONFIG_XENO_ARCH_SYS3264) += compat.o syscall32.o +--- linux/kernel/xenomai/posix/process.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/posix/process.c 2021-04-07 16:01:26.102635774 +0800 +@@ -0,0 +1,1710 @@ ++/* ++ * Copyright (C) 2001-2014 Philippe Gerum . ++ * Copyright (C) 2001-2014 The Xenomai project ++ * Copyright (C) 2006 Gilles Chanteperdrix ++ * ++ * SMP support Copyright (C) 2004 The HYADES project ++ * RTAI/fusion Copyright (C) 2004 The RTAI project ++ * ++ * Xenomai is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "../debug.h" ++#include "internal.h" ++#include "thread.h" ++#include "sched.h" ++#include "mutex.h" ++#include "cond.h" ++#include "mqueue.h" ++#include "sem.h" ++#include "signal.h" ++#include "timer.h" ++#include "monitor.h" ++#include "clock.h" ++#include "event.h" ++#include "timerfd.h" ++#include "io.h" ++ ++static int gid_arg = -1; ++module_param_named(allowed_group, gid_arg, int, 0644); ++ ++static DEFINE_MUTEX(personality_lock); ++ ++static struct hlist_head *process_hash; ++DEFINE_PRIVATE_XNLOCK(process_hash_lock); ++#define PROCESS_HASH_SIZE 13 ++ ++struct xnthread_personality *cobalt_personalities[NR_PERSONALITIES]; ++ ++static struct xnsynch yield_sync; ++ ++LIST_HEAD(cobalt_global_thread_list); ++ ++struct cobalt_resources cobalt_global_resources = { ++ .condq = LIST_HEAD_INIT(cobalt_global_resources.condq), ++ .mutexq = LIST_HEAD_INIT(cobalt_global_resources.mutexq), ++ .semq = LIST_HEAD_INIT(cobalt_global_resources.semq), ++ .monitorq = LIST_HEAD_INIT(cobalt_global_resources.monitorq), ++ .eventq = LIST_HEAD_INIT(cobalt_global_resources.eventq), ++ .schedq = LIST_HEAD_INIT(cobalt_global_resources.schedq), ++}; ++ ++static inline struct cobalt_process * ++process_from_thread(struct xnthread *thread) ++{ ++ return container_of(thread, struct cobalt_thread, threadbase)->process; ++} ++ ++static unsigned __attribute__((pure)) process_hash_crunch(struct mm_struct *mm) ++{ ++ unsigned long hash = ((unsigned long)mm - PAGE_OFFSET) / sizeof(*mm); ++ return hash % PROCESS_HASH_SIZE; ++} ++ ++static struct cobalt_process *__process_hash_search(struct mm_struct *mm) ++{ ++ unsigned int bucket = process_hash_crunch(mm); ++ struct cobalt_process *p; ++ ++ hlist_for_each_entry(p, &process_hash[bucket], hlink) ++ if (p->mm == mm) ++ return p; ++ ++ return NULL; ++} ++ ++static int process_hash_enter(struct cobalt_process *p) ++{ ++ struct mm_struct *mm = current->mm; ++ unsigned int bucket = process_hash_crunch(mm); ++ int err; ++ spl_t s; ++ ++ xnlock_get_irqsave(&process_hash_lock, s); ++ if (__process_hash_search(mm)) { ++ err = -EBUSY; ++ goto out; ++ } ++ ++ p->mm = mm; ++ hlist_add_head(&p->hlink, &process_hash[bucket]); ++ err = 0; ++ out: ++ xnlock_put_irqrestore(&process_hash_lock, s); ++ return err; ++} ++ ++static void process_hash_remove(struct cobalt_process *p) ++{ ++ spl_t s; ++ ++ xnlock_get_irqsave(&process_hash_lock, s); ++ if (p->mm) ++ hlist_del(&p->hlink); ++ xnlock_put_irqrestore(&process_hash_lock, s); ++} ++ ++struct cobalt_process *cobalt_search_process(struct mm_struct *mm) ++{ ++ struct cobalt_process *process; ++ spl_t s; ++ ++ xnlock_get_irqsave(&process_hash_lock, s); ++ process = __process_hash_search(mm); ++ xnlock_put_irqrestore(&process_hash_lock, s); ++ ++ return process; ++} ++ ++static void *lookup_context(int xid) ++{ ++ struct cobalt_process *process = cobalt_current_process(); ++ void *priv = NULL; ++ spl_t s; ++ ++ xnlock_get_irqsave(&process_hash_lock, s); ++ /* ++ * First try matching the process context attached to the ++ * (usually main) thread which issued sc_cobalt_bind. If not ++ * found, try matching by mm context, which should point us ++ * back to the latter. If none match, then the current process ++ * is unbound. ++ */ ++ if (process == NULL && current->mm) ++ process = __process_hash_search(current->mm); ++ if (process) ++ priv = process->priv[xid]; ++ ++ xnlock_put_irqrestore(&process_hash_lock, s); ++ ++ return priv; ++} ++ ++static void remove_process(struct cobalt_process *process) ++{ ++ struct xnthread_personality *personality; ++ void *priv; ++ int xid; ++ ++ mutex_lock(&personality_lock); ++ ++ for (xid = NR_PERSONALITIES - 1; xid >= 0; xid--) { ++ if (!__test_and_clear_bit(xid, &process->permap)) ++ continue; ++ personality = cobalt_personalities[xid]; ++ priv = process->priv[xid]; ++ if (priv == NULL) ++ continue; ++ /* ++ * CAUTION: process potentially refers to stale memory ++ * upon return from detach_process() for the Cobalt ++ * personality, so don't dereference it afterwards. ++ */ ++ if (xid) ++ process->priv[xid] = NULL; ++ __clear_bit(personality->xid, &process->permap); ++ personality->ops.detach_process(priv); ++ atomic_dec(&personality->refcnt); ++ XENO_WARN_ON(COBALT, atomic_read(&personality->refcnt) < 0); ++ if (personality->module) ++ module_put(personality->module); ++ } ++ ++ cobalt_set_process(NULL); ++ ++ mutex_unlock(&personality_lock); ++} ++ ++static void post_ppd_release(struct cobalt_umm *umm) ++{ ++ struct cobalt_process *process; ++ ++ process = container_of(umm, struct cobalt_process, sys_ppd.umm); ++ kfree(process); ++} ++ ++static inline char *get_exe_path(struct task_struct *p) ++{ ++ struct file *exe_file; ++ char *pathname, *buf; ++ struct mm_struct *mm; ++ struct path path; ++ ++ /* ++ * PATH_MAX is fairly large, and in any case won't fit on the ++ * caller's stack happily; since we are mapping a shadow, ++ * which is a heavyweight operation anyway, let's pick the ++ * memory from the page allocator. ++ */ ++ buf = (char *)__get_free_page(GFP_KERNEL); ++ if (buf == NULL) ++ return ERR_PTR(-ENOMEM); ++ ++ mm = get_task_mm(p); ++ if (mm == NULL) { ++ pathname = "vmlinux"; ++ goto copy; /* kernel thread */ ++ } ++ ++ exe_file = get_mm_exe_file(mm); ++ mmput(mm); ++ if (exe_file == NULL) { ++ pathname = ERR_PTR(-ENOENT); ++ goto out; /* no luck. */ ++ } ++ ++ path = exe_file->f_path; ++ path_get(&exe_file->f_path); ++ fput(exe_file); ++ pathname = d_path(&path, buf, PATH_MAX); ++ path_put(&path); ++ if (IS_ERR(pathname)) ++ goto out; /* mmmh... */ ++copy: ++ /* caution: d_path() may start writing anywhere in the buffer. */ ++ pathname = kstrdup(pathname, GFP_KERNEL); ++out: ++ free_page((unsigned long)buf); ++ ++ return pathname; ++} ++ ++static inline int raise_cap(int cap) ++{ ++ struct cred *new; ++ ++ new = prepare_creds(); ++ if (new == NULL) ++ return -ENOMEM; ++ ++ cap_raise(new->cap_effective, cap); ++ ++ return commit_creds(new); ++} ++ ++static int bind_personality(struct xnthread_personality *personality) ++{ ++ struct cobalt_process *process; ++ void *priv; ++ ++ /* ++ * We also check capabilities for stacking a Cobalt extension, ++ * in case the process dropped the supervisor privileges after ++ * a successful initial binding to the Cobalt interface. ++ */ ++ if (!capable(CAP_SYS_NICE) && ++ (gid_arg == -1 || !in_group_p(KGIDT_INIT(gid_arg)))) ++ return -EPERM; ++ /* ++ * Protect from the same process binding to the same interface ++ * several times. ++ */ ++ priv = lookup_context(personality->xid); ++ if (priv) ++ return 0; ++ ++ priv = personality->ops.attach_process(); ++ if (IS_ERR(priv)) ++ return PTR_ERR(priv); ++ ++ process = cobalt_current_process(); ++ /* ++ * We are still covered by the personality_lock, so we may ++ * safely bump the module refcount after the attach handler ++ * has returned. ++ */ ++ if (personality->module && !try_module_get(personality->module)) { ++ personality->ops.detach_process(priv); ++ return -EAGAIN; ++ } ++ ++ __set_bit(personality->xid, &process->permap); ++ atomic_inc(&personality->refcnt); ++ process->priv[personality->xid] = priv; ++ ++ raise_cap(CAP_SYS_NICE); ++ raise_cap(CAP_IPC_LOCK); ++ raise_cap(CAP_SYS_RAWIO); ++ ++ return 0; ++} ++ ++int cobalt_bind_personality(unsigned int magic) ++{ ++ struct xnthread_personality *personality; ++ int xid, ret = -ESRCH; ++ ++ mutex_lock(&personality_lock); ++ ++ for (xid = 1; xid < NR_PERSONALITIES; xid++) { ++ personality = cobalt_personalities[xid]; ++ if (personality && personality->magic == magic) { ++ ret = bind_personality(personality); ++ break; ++ } ++ } ++ ++ mutex_unlock(&personality_lock); ++ ++ return ret ?: xid; ++} ++ ++int cobalt_bind_core(int ufeatures) ++{ ++ struct cobalt_process *process; ++ int ret; ++ ++ mutex_lock(&personality_lock); ++ ret = bind_personality(&cobalt_personality); ++ mutex_unlock(&personality_lock); ++ if (ret) ++ return ret; ++ ++ process = cobalt_current_process(); ++ /* Feature set userland knows about. */ ++ process->ufeatures = ufeatures; ++ ++ return 0; ++} ++ ++/** ++ * @fn int cobalt_register_personality(struct xnthread_personality *personality) ++ * @internal ++ * @brief Register a new interface personality. ++ * ++ * - personality->ops.attach_process() is called when a user-space ++ * process binds to the personality, on behalf of one of its ++ * threads. The attach_process() handler may return: ++ * ++ * . an opaque pointer, representing the context of the calling ++ * process for this personality; ++ * ++ * . a NULL pointer, meaning that no per-process structure should be ++ * attached to this process for this personality; ++ * ++ * . ERR_PTR(negative value) indicating an error, the binding ++ * process will then abort. ++ * ++ * - personality->ops.detach_process() is called on behalf of an ++ * exiting user-space process which has previously attached to the ++ * personality. This handler is passed a pointer to the per-process ++ * data received earlier from the ops->attach_process() handler. ++ * ++ * @return the personality (extension) identifier. ++ * ++ * @note cobalt_get_context() is NULL when ops.detach_process() is ++ * invoked for the personality the caller detaches from. ++ * ++ * @coretags{secondary-only} ++ */ ++int cobalt_register_personality(struct xnthread_personality *personality) ++{ ++ int xid; ++ ++ mutex_lock(&personality_lock); ++ ++ for (xid = 0; xid < NR_PERSONALITIES; xid++) { ++ if (cobalt_personalities[xid] == NULL) { ++ personality->xid = xid; ++ atomic_set(&personality->refcnt, 0); ++ cobalt_personalities[xid] = personality; ++ goto out; ++ } ++ } ++ ++ xid = -EAGAIN; ++out: ++ mutex_unlock(&personality_lock); ++ ++ return xid; ++} ++EXPORT_SYMBOL_GPL(cobalt_register_personality); ++ ++/* ++ * @brief Unregister an interface personality. ++ * ++ * @coretags{secondary-only} ++ */ ++int cobalt_unregister_personality(int xid) ++{ ++ struct xnthread_personality *personality; ++ int ret = 0; ++ ++ if (xid < 0 || xid >= NR_PERSONALITIES) ++ return -EINVAL; ++ ++ mutex_lock(&personality_lock); ++ ++ personality = cobalt_personalities[xid]; ++ if (atomic_read(&personality->refcnt) > 0) ++ ret = -EBUSY; ++ else ++ cobalt_personalities[xid] = NULL; ++ ++ mutex_unlock(&personality_lock); ++ ++ return ret; ++} ++EXPORT_SYMBOL_GPL(cobalt_unregister_personality); ++ ++/** ++ * Stack a new personality over Cobalt for the current thread. ++ * ++ * This service registers the current thread as a member of the ++ * additional personality identified by @a xid. If the current thread ++ * is already assigned this personality, the call returns successfully ++ * with no effect. ++ * ++ * @param xid the identifier of the additional personality. ++ * ++ * @return A handle to the previous personality. The caller should ++ * save this handle for unstacking @a xid when applicable via a call ++ * to cobalt_pop_personality(). ++ * ++ * @coretags{secondary-only} ++ */ ++struct xnthread_personality * ++cobalt_push_personality(int xid) ++{ ++ struct ipipe_threadinfo *p = ipipe_current_threadinfo(); ++ struct xnthread_personality *prev, *next; ++ struct xnthread *thread = p->thread; ++ ++ secondary_mode_only(); ++ ++ mutex_lock(&personality_lock); ++ ++ if (xid < 0 || xid >= NR_PERSONALITIES || ++ p->process == NULL || !test_bit(xid, &p->process->permap)) { ++ mutex_unlock(&personality_lock); ++ return NULL; ++ } ++ ++ next = cobalt_personalities[xid]; ++ prev = thread->personality; ++ if (next == prev) { ++ mutex_unlock(&personality_lock); ++ return prev; ++ } ++ ++ thread->personality = next; ++ mutex_unlock(&personality_lock); ++ xnthread_run_handler(thread, map_thread); ++ ++ return prev; ++} ++EXPORT_SYMBOL_GPL(cobalt_push_personality); ++ ++/** ++ * Pop the topmost personality from the current thread. ++ * ++ * This service pops the topmost personality off the current thread. ++ * ++ * @param prev the previous personality which was returned by the ++ * latest call to cobalt_push_personality() for the current thread. ++ * ++ * @coretags{secondary-only} ++ */ ++void cobalt_pop_personality(struct xnthread_personality *prev) ++{ ++ struct ipipe_threadinfo *p = ipipe_current_threadinfo(); ++ struct xnthread *thread = p->thread; ++ ++ secondary_mode_only(); ++ thread->personality = prev; ++} ++EXPORT_SYMBOL_GPL(cobalt_pop_personality); ++ ++/** ++ * Return the per-process data attached to the calling user process. ++ * ++ * This service returns the per-process data attached to the calling ++ * user process for the personality whose xid is @a xid. ++ * ++ * The per-process data was obtained from the ->attach_process() ++ * handler defined for the personality @a xid refers to. ++ * ++ * See cobalt_register_personality() documentation for information on ++ * the way to attach a per-process data to a process. ++ * ++ * @param xid the personality identifier. ++ * ++ * @return the per-process data if the current context is a user-space ++ * process; @return NULL otherwise. As a special case, ++ * cobalt_get_context(0) returns the current Cobalt process ++ * descriptor, which is strictly identical to calling ++ * cobalt_current_process(). ++ * ++ * @coretags{task-unrestricted} ++ */ ++void *cobalt_get_context(int xid) ++{ ++ return lookup_context(xid); ++} ++EXPORT_SYMBOL_GPL(cobalt_get_context); ++ ++int cobalt_yield(xnticks_t min, xnticks_t max) ++{ ++ xnticks_t start; ++ int ret; ++ ++ start = xnclock_read_monotonic(&nkclock); ++ max += start; ++ min += start; ++ ++ do { ++ ret = xnsynch_sleep_on(&yield_sync, max, XN_ABSOLUTE); ++ if (ret & XNBREAK) ++ return -EINTR; ++ } while (ret == 0 && xnclock_read_monotonic(&nkclock) < min); ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(cobalt_yield); ++ ++static inline void init_uthread_info(struct xnthread *thread) ++{ ++ struct ipipe_threadinfo *p; ++ ++ p = ipipe_current_threadinfo(); ++ p->thread = thread; ++ p->process = cobalt_search_process(current->mm); ++} ++ ++static inline void clear_threadinfo(void) ++{ ++ struct ipipe_threadinfo *p = ipipe_current_threadinfo(); ++ p->thread = NULL; ++ p->process = NULL; ++} ++ ++#ifdef CONFIG_MMU ++ ++static inline int disable_ondemand_memory(void) ++{ ++ struct task_struct *p = current; ++ kernel_siginfo_t si; ++ ++ if ((p->mm->def_flags & VM_LOCKED) == 0) { ++ memset(&si, 0, sizeof(si)); ++ si.si_signo = SIGDEBUG; ++ si.si_code = SI_QUEUE; ++ si.si_int = SIGDEBUG_NOMLOCK | sigdebug_marker; ++ send_sig_info(SIGDEBUG, &si, p); ++ return 0; ++ } ++ ++ return __ipipe_disable_ondemand_mappings(p); ++} ++ ++static inline int get_mayday_prot(void) ++{ ++ return PROT_READ|PROT_EXEC; ++} ++ ++#else /* !CONFIG_MMU */ ++ ++static inline int disable_ondemand_memory(void) ++{ ++ return 0; ++} ++ ++static inline int get_mayday_prot(void) ++{ ++ /* ++ * Until we stop backing /dev/mem with the mayday page, we ++ * can't ask for PROT_EXEC since the former does not define ++ * mmap capabilities, and default ones won't allow an ++ * executable mapping with MAP_SHARED. In the NOMMU case, this ++ * is (currently) not an issue. ++ */ ++ return PROT_READ; ++} ++ ++#endif /* !CONFIG_MMU */ ++ ++/** ++ * @fn int cobalt_map_user(struct xnthread *thread, __u32 __user *u_winoff) ++ * @internal ++ * @brief Create a shadow thread context over a user task. ++ * ++ * This call maps a Xenomai thread to the current regular Linux task ++ * running in userland. The priority and scheduling class of the ++ * underlying Linux task are not affected; it is assumed that the ++ * interface library did set them appropriately before issuing the ++ * shadow mapping request. ++ * ++ * @param thread The descriptor address of the new shadow thread to be ++ * mapped to current. This descriptor must have been previously ++ * initialized by a call to xnthread_init(). ++ * ++ * @param u_winoff will receive the offset of the per-thread ++ * "u_window" structure in the global heap associated to @a ++ * thread. This structure reflects thread state information visible ++ * from userland through a shared memory window. ++ * ++ * @return 0 is returned on success. Otherwise: ++ * ++ * - -EINVAL is returned if the thread control block does not bear the ++ * XNUSER bit. ++ * ++ * - -EBUSY is returned if either the current Linux task or the ++ * associated shadow thread is already involved in a shadow mapping. ++ * ++ * @coretags{secondary-only} ++ */ ++int cobalt_map_user(struct xnthread *thread, __u32 __user *u_winoff) ++{ ++ struct xnthread_user_window *u_window; ++ struct xnthread_start_attr attr; ++ struct cobalt_ppd *sys_ppd; ++ struct cobalt_umm *umm; ++ int ret; ++ ++ if (!xnthread_test_state(thread, XNUSER)) ++ return -EINVAL; ++ ++ if (xnthread_current() || xnthread_test_state(thread, XNMAPPED)) ++ return -EBUSY; ++ ++ if (!access_wok(u_winoff, sizeof(*u_winoff))) ++ return -EFAULT; ++ ++ ret = disable_ondemand_memory(); ++ if (ret) ++ return ret; ++ ++ umm = &cobalt_kernel_ppd.umm; ++ u_window = cobalt_umm_zalloc(umm, sizeof(*u_window)); ++ if (u_window == NULL) ++ return -ENOMEM; ++ ++ thread->u_window = u_window; ++ __xn_put_user(cobalt_umm_offset(umm, u_window), u_winoff); ++ xnthread_pin_initial(thread); ++ ++ /* ++ * CAUTION: we enable the pipeline notifier only when our ++ * shadow TCB is consistent, so that we won't trigger false ++ * positive in debug code from handle_schedule_event() and ++ * friends. ++ */ ++ xnthread_init_shadow_tcb(thread); ++ xnthread_suspend(thread, XNRELAX, XN_INFINITE, XN_RELATIVE, NULL); ++ init_uthread_info(thread); ++ xnthread_set_state(thread, XNMAPPED); ++ xndebug_shadow_init(thread); ++ sys_ppd = cobalt_ppd_get(0); ++ atomic_inc(&sys_ppd->refcnt); ++ /* ++ * ->map_thread() handler is invoked after the TCB is fully ++ * built, and when we know for sure that current will go ++ * through our task-exit handler, because it has a shadow ++ * extension and I-pipe notifications will soon be enabled for ++ * it. ++ */ ++ xnthread_run_handler(thread, map_thread); ++ ipipe_enable_notifier(current); ++ ++ attr.mode = 0; ++ attr.entry = NULL; ++ attr.cookie = NULL; ++ ret = xnthread_start(thread, &attr); ++ if (ret) ++ return ret; ++ ++ xnthread_sync_window(thread); ++ ++ xntrace_pid(xnthread_host_pid(thread), ++ xnthread_current_priority(thread)); ++ ++ return 0; ++} ++ ++#ifdef IPIPE_KEVT_PTRESUME ++static void stop_debugged_process(struct xnthread *thread) ++{ ++ struct cobalt_process *process = process_from_thread(thread); ++ struct cobalt_thread *cth; ++ ++ if (process->debugged_threads > 0) ++ return; ++ ++ list_for_each_entry(cth, &process->thread_list, next) { ++ if (&cth->threadbase == thread) ++ continue; ++ ++ xnthread_suspend(&cth->threadbase, XNDBGSTOP, XN_INFINITE, ++ XN_RELATIVE, NULL); ++ } ++} ++ ++static void resume_debugged_process(struct cobalt_process *process) ++{ ++ struct cobalt_thread *cth; ++ ++ xnsched_lock(); ++ ++ list_for_each_entry(cth, &process->thread_list, next) ++ if (xnthread_test_state(&cth->threadbase, XNDBGSTOP)) ++ xnthread_resume(&cth->threadbase, XNDBGSTOP); ++ ++ xnsched_unlock(); ++} ++ ++#else /* IPIPE_KEVT_PTRESUME unavailable */ ++ ++static inline void stop_debugged_process(struct xnthread *thread) ++{ ++} ++ ++static inline void resume_debugged_process(struct cobalt_process *process) ++{ ++} ++#endif /* IPIPE_KEVT_PTRESUME unavailable */ ++ ++/* called with nklock held */ ++static void cobalt_register_debugged_thread(struct xnthread *thread) ++{ ++ struct cobalt_process *process = process_from_thread(thread); ++ ++ xnthread_set_state(thread, XNSSTEP); ++ ++ stop_debugged_process(thread); ++ process->debugged_threads++; ++ ++ if (xnthread_test_state(thread, XNRELAX)) ++ xnthread_suspend(thread, XNDBGSTOP, XN_INFINITE, XN_RELATIVE, ++ NULL); ++} ++ ++/* called with nklock held */ ++static void cobalt_unregister_debugged_thread(struct xnthread *thread) ++{ ++ struct cobalt_process *process = process_from_thread(thread); ++ ++ process->debugged_threads--; ++ xnthread_clear_state(thread, XNSSTEP); ++ ++ if (process->debugged_threads == 0) ++ resume_debugged_process(process); ++} ++ ++static inline int handle_exception(struct ipipe_trap_data *d) ++{ ++ struct xnthread *thread; ++ struct xnsched *sched; ++ ++ sched = xnsched_current(); ++ thread = sched->curr; ++ ++ trace_cobalt_thread_fault(d); ++ ++ if (xnthread_test_state(thread, XNROOT)) ++ return 0; ++ ++#ifdef IPIPE_KEVT_USERINTRET ++ if (xnarch_fault_bp_p(d) && user_mode(d->regs)) { ++ spl_t s; ++ ++ XENO_WARN_ON(CORE, xnthread_test_state(thread, XNRELAX)); ++ xnlock_get_irqsave(&nklock, s); ++ xnthread_set_info(thread, XNCONTHI); ++ ipipe_enable_user_intret_notifier(); ++ stop_debugged_process(thread); ++ xnlock_put_irqrestore(&nklock, s); ++ xnsched_run(); ++ } ++#endif ++ ++ if (xnarch_fault_fpu_p(d)) { ++#ifdef CONFIG_XENO_ARCH_FPU ++ spl_t s; ++ ++ /* FPU exception received in primary mode. */ ++ splhigh(s); ++ if (xnarch_handle_fpu_fault(sched->fpuholder, thread, d)) { ++ sched->fpuholder = thread; ++ splexit(s); ++ return 1; ++ } ++ splexit(s); ++#endif /* CONFIG_XENO_ARCH_FPU */ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 16, 0) ++ printk("invalid use of FPU in Xenomai context at %pS\n", ++ (void *)xnarch_fault_pc(d)); ++#else ++ print_symbol("invalid use of FPU in Xenomai context at %s\n", ++ xnarch_fault_pc(d)); ++#endif ++ } ++ ++ /* ++ * If we experienced a trap on behalf of a shadow thread ++ * running in primary mode, move it to the Linux domain, ++ * leaving the kernel process the exception. ++ */ ++#if defined(CONFIG_XENO_OPT_DEBUG_COBALT) || defined(CONFIG_XENO_OPT_DEBUG_USER) ++ if (!user_mode(d->regs)) { ++ xntrace_panic_freeze(); ++ printk(XENO_WARNING ++ "switching %s to secondary mode after exception #%u in " ++ "kernel-space at 0x%lx (pid %d)\n", thread->name, ++ xnarch_fault_trap(d), ++ xnarch_fault_pc(d), ++ xnthread_host_pid(thread)); ++ xntrace_panic_dump(); ++ } else if (xnarch_fault_notify(d)) /* Don't report debug traps */ ++ printk(XENO_WARNING ++ "switching %s to secondary mode after exception #%u from " ++ "user-space at 0x%lx (pid %d)\n", thread->name, ++ xnarch_fault_trap(d), ++ xnarch_fault_pc(d), ++ xnthread_host_pid(thread)); ++#endif ++ ++ if (xnarch_fault_pf_p(d)) ++ /* ++ * The page fault counter is not SMP-safe, but it's a ++ * simple indicator that something went wrong wrt ++ * memory locking anyway. ++ */ ++ xnstat_counter_inc(&thread->stat.pf); ++ ++ xnthread_relax(xnarch_fault_notify(d), SIGDEBUG_MIGRATE_FAULT); ++ ++ return 0; ++} ++ ++static int handle_mayday_event(struct pt_regs *regs) ++{ ++ XENO_BUG_ON(COBALT, !xnthread_test_state(xnthread_current(), XNUSER)); ++ ++ xnthread_relax(0, 0); ++ ++ return KEVENT_PROPAGATE; ++} ++ ++int ipipe_trap_hook(struct ipipe_trap_data *data) ++{ ++ if (data->exception == IPIPE_TRAP_MAYDAY) ++ return handle_mayday_event(data->regs); ++ ++ /* ++ * No migration is possible on behalf of the head domain, so ++ * the following access is safe. ++ */ ++ raw_cpu_ptr(&cobalt_machine_cpudata)->faults[data->exception]++; ++ ++ if (handle_exception(data)) ++ return KEVENT_STOP; ++ ++ /* ++ * CAUTION: access faults must be propagated downstream ++ * whichever domain caused them, so that we don't spuriously ++ * raise a fatal error when some Linux fixup code is available ++ * to recover from the fault. ++ */ ++ return KEVENT_PROPAGATE; ++} ++ ++/* ++ * Legacy idle hook, unconditionally allow entering the idle state. ++ */ ++bool ipipe_enter_idle_hook(void) ++{ ++ return true; ++} ++ ++#ifdef CONFIG_SMP ++ ++static int handle_setaffinity_event(struct ipipe_cpu_migration_data *d) ++{ ++ struct task_struct *p = d->task; ++ struct xnthread *thread; ++ spl_t s; ++ ++ thread = xnthread_from_task(p); ++ if (thread == NULL) ++ return KEVENT_PROPAGATE; ++ ++ /* ++ * Detect a Cobalt thread sleeping in primary mode which is ++ * required to migrate to another CPU by the host kernel. ++ * ++ * We may NOT fix up thread->sched immediately using the ++ * passive migration call, because that latter always has to ++ * take place on behalf of the target thread itself while ++ * running in secondary mode. Therefore, that thread needs to ++ * go through secondary mode first, then move back to primary ++ * mode, so that affinity_ok() does the fixup work. ++ * ++ * We force this by sending a SIGSHADOW signal to the migrated ++ * thread, asking it to switch back to primary mode from the ++ * handler, at which point the interrupted syscall may be ++ * restarted. ++ */ ++ xnlock_get_irqsave(&nklock, s); ++ ++ if (xnthread_test_state(thread, XNTHREAD_BLOCK_BITS & ~XNRELAX)) ++ xnthread_signal(thread, SIGSHADOW, SIGSHADOW_ACTION_HARDEN); ++ ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return KEVENT_PROPAGATE; ++} ++ ++static inline bool affinity_ok(struct task_struct *p) /* nklocked, IRQs off */ ++{ ++ struct xnthread *thread = xnthread_from_task(p); ++ struct xnsched *sched; ++ int cpu = task_cpu(p); ++ ++ /* ++ * To maintain consistency between both Cobalt and host ++ * schedulers, reflecting a thread migration to another CPU ++ * into the Cobalt scheduler state must happen from secondary ++ * mode only, on behalf of the migrated thread itself once it ++ * runs on the target CPU. ++ * ++ * This means that the Cobalt scheduler state regarding the ++ * CPU information lags behind the host scheduler state until ++ * the migrated thread switches back to primary mode ++ * (i.e. task_cpu(p) != xnsched_cpu(xnthread_from_task(p)->sched)). ++ * This is ok since Cobalt does not schedule such thread until then. ++ * ++ * check_affinity() detects when a Cobalt thread switching ++ * back to primary mode did move to another CPU earlier while ++ * in secondary mode. If so, do the fixups to reflect the ++ * change. ++ */ ++ if (!xnsched_threading_cpu(cpu)) { ++ /* ++ * The thread is about to switch to primary mode on a ++ * non-rt CPU, which is damn wrong and hopeless. ++ * Whine and cancel that thread. ++ */ ++ printk(XENO_WARNING "thread %s[%d] switched to non-rt CPU%d, aborted.\n", ++ thread->name, xnthread_host_pid(thread), cpu); ++ /* ++ * Can't call xnthread_cancel() from a migration ++ * point, that would break. Since we are on the wakeup ++ * path to hardening, just raise XNCANCELD to catch it ++ * in xnthread_harden(). ++ */ ++ xnthread_set_info(thread, XNCANCELD); ++ return false; ++ } ++ ++ sched = xnsched_struct(cpu); ++ if (sched == thread->sched) ++ return true; ++ ++ /* ++ * The current thread moved to a supported real-time CPU, ++ * which is not part of its original affinity mask ++ * though. Assume user wants to extend this mask. ++ */ ++ if (!cpumask_test_cpu(cpu, &thread->affinity)) ++ cpumask_set_cpu(cpu, &thread->affinity); ++ ++ xnthread_run_handler_stack(thread, move_thread, cpu); ++ xnthread_migrate_passive(thread, sched); ++ ++ return true; ++} ++ ++#else /* !CONFIG_SMP */ ++ ++struct ipipe_cpu_migration_data; ++ ++static int handle_setaffinity_event(struct ipipe_cpu_migration_data *d) ++{ ++ return KEVENT_PROPAGATE; ++} ++ ++static inline bool affinity_ok(struct task_struct *p) ++{ ++ return true; ++} ++ ++#endif /* CONFIG_SMP */ ++ ++void ipipe_migration_hook(struct task_struct *p) /* hw IRQs off */ ++{ ++ struct xnthread *thread = xnthread_from_task(p); ++ ++ xnlock_get(&nklock); ++ ++ /* ++ * We fire the handler before the thread is migrated, so that ++ * thread->sched does not change between paired invocations of ++ * relax_thread/harden_thread handlers. ++ */ ++ xnthread_run_handler_stack(thread, harden_thread); ++ if (affinity_ok(p)) ++ xnthread_resume(thread, XNRELAX); ++ ++#ifdef IPIPE_KEVT_USERINTRET ++ /* ++ * In case we migrated independently of the user return notifier, clear ++ * XNCONTHI here and also disable the notifier - we are already done. ++ */ ++ if (unlikely(xnthread_test_info(thread, XNCONTHI))) { ++ xnthread_clear_info(thread, XNCONTHI); ++ ipipe_disable_user_intret_notifier(); ++ } ++#endif ++ ++ /* Unregister as debugged thread in case we postponed this. */ ++ if (unlikely(xnthread_test_state(thread, XNSSTEP))) ++ cobalt_unregister_debugged_thread(thread); ++ ++ xnlock_put(&nklock); ++ ++ xnsched_run(); ++} ++ ++#ifdef CONFIG_XENO_OPT_HOSTRT ++ ++static IPIPE_DEFINE_SPINLOCK(__hostrtlock); ++ ++static int handle_hostrt_event(struct ipipe_hostrt_data *hostrt) ++{ ++ unsigned long flags; ++ urwstate_t tmp; ++ ++ /* ++ * The locking strategy is twofold: ++ * - The spinlock protects against concurrent updates from within the ++ * Linux kernel and against preemption by Xenomai ++ * - The unsynced R/W block is for lockless read-only access. ++ */ ++ raw_spin_lock_irqsave(&__hostrtlock, flags); ++ ++ unsynced_write_block(&tmp, &nkvdso->hostrt_data.lock) { ++ nkvdso->hostrt_data.live = 1; ++ nkvdso->hostrt_data.cycle_last = hostrt->cycle_last; ++ nkvdso->hostrt_data.mask = hostrt->mask; ++ nkvdso->hostrt_data.mult = hostrt->mult; ++ nkvdso->hostrt_data.shift = hostrt->shift; ++ nkvdso->hostrt_data.wall_sec = hostrt->wall_time_sec; ++ nkvdso->hostrt_data.wall_nsec = hostrt->wall_time_nsec; ++ nkvdso->hostrt_data.wtom_sec = hostrt->wall_to_monotonic.tv_sec; ++ nkvdso->hostrt_data.wtom_nsec = hostrt->wall_to_monotonic.tv_nsec; ++ } ++ ++ raw_spin_unlock_irqrestore(&__hostrtlock, flags); ++ ++ return KEVENT_PROPAGATE; ++} ++ ++static inline void init_hostrt(void) ++{ ++ unsynced_rw_init(&nkvdso->hostrt_data.lock); ++ nkvdso->hostrt_data.live = 0; ++} ++ ++#else /* !CONFIG_XENO_OPT_HOSTRT */ ++ ++struct ipipe_hostrt_data; ++ ++static inline int handle_hostrt_event(struct ipipe_hostrt_data *hostrt) ++{ ++ return KEVENT_PROPAGATE; ++} ++ ++static inline void init_hostrt(void) { } ++ ++#endif /* !CONFIG_XENO_OPT_HOSTRT */ ++ ++static void __handle_taskexit_event(struct task_struct *p) ++{ ++ struct cobalt_ppd *sys_ppd; ++ struct xnthread *thread; ++ spl_t s; ++ ++ /* ++ * We are called for both kernel and user shadows over the ++ * root thread. ++ */ ++ secondary_mode_only(); ++ ++ thread = xnthread_current(); ++ XENO_BUG_ON(COBALT, thread == NULL); ++ trace_cobalt_shadow_unmap(thread); ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ if (xnthread_test_state(thread, XNSSTEP)) ++ cobalt_unregister_debugged_thread(thread); ++ ++ xnsched_run(); ++ ++ xnlock_put_irqrestore(&nklock, s); ++ ++ xnthread_run_handler_stack(thread, exit_thread); ++ ++ if (xnthread_test_state(thread, XNUSER)) { ++ cobalt_umm_free(&cobalt_kernel_ppd.umm, thread->u_window); ++ thread->u_window = NULL; ++ sys_ppd = cobalt_ppd_get(0); ++ if (atomic_dec_and_test(&sys_ppd->refcnt)) ++ remove_process(cobalt_current_process()); ++ } ++} ++ ++static int handle_taskexit_event(struct task_struct *p) /* p == current */ ++{ ++ __handle_taskexit_event(p); ++ ++ /* ++ * __xnthread_cleanup() -> ... -> finalize_thread ++ * handler. From that point, the TCB is dropped. Be careful of ++ * not treading on stale memory within @thread. ++ */ ++ __xnthread_cleanup(xnthread_current()); ++ ++ clear_threadinfo(); ++ ++ return KEVENT_PROPAGATE; ++} ++ ++static inline void signal_yield(void) ++{ ++ spl_t s; ++ ++ if (!xnsynch_pended_p(&yield_sync)) ++ return; ++ ++ xnlock_get_irqsave(&nklock, s); ++ if (xnsynch_pended_p(&yield_sync)) { ++ xnsynch_flush(&yield_sync, 0); ++ xnsched_run(); ++ } ++ xnlock_put_irqrestore(&nklock, s); ++} ++ ++static int handle_schedule_event(struct task_struct *next_task) ++{ ++ struct task_struct *prev_task; ++ struct xnthread *next; ++ sigset_t pending; ++ spl_t s; ++ ++ signal_yield(); ++ ++ prev_task = current; ++ next = xnthread_from_task(next_task); ++ if (next == NULL) ++ goto out; ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ /* ++ * Track tasks leaving the ptraced state. Check both SIGSTOP ++ * (NPTL) and SIGINT (LinuxThreads) to detect ptrace ++ * continuation. ++ */ ++ if (xnthread_test_state(next, XNSSTEP)) { ++ if (signal_pending(next_task)) { ++ /* ++ * Do not grab the sighand lock here: it's ++ * useless, and we already own the runqueue ++ * lock, so this would expose us to deadlock ++ * situations on SMP. ++ */ ++ sigorsets(&pending, ++ &next_task->pending.signal, ++ &next_task->signal->shared_pending.signal); ++ if (sigismember(&pending, SIGSTOP) || ++ sigismember(&pending, SIGINT)) ++ goto no_ptrace; ++ } ++ ++ /* ++ * Do not unregister before the thread migrated. ++ * cobalt_unregister_debugged_thread will then be called by our ++ * ipipe_migration_hook. ++ */ ++ if (!xnthread_test_info(next, XNCONTHI)) ++ cobalt_unregister_debugged_thread(next); ++ ++ xnthread_set_localinfo(next, XNHICCUP); ++ } ++ ++no_ptrace: ++ xnlock_put_irqrestore(&nklock, s); ++ ++ /* ++ * Do basic sanity checks on the incoming thread state. ++ * NOTE: we allow ptraced threads to run shortly in order to ++ * properly recover from a stopped state. ++ */ ++ if (!XENO_WARN(COBALT, !xnthread_test_state(next, XNRELAX), ++ "hardened thread %s[%d] running in Linux domain?! " ++ "(status=0x%x, sig=%d, prev=%s[%d])", ++ next->name, task_pid_nr(next_task), ++ xnthread_get_state(next), ++ signal_pending(next_task), ++ prev_task->comm, task_pid_nr(prev_task))) ++ XENO_WARN(COBALT, ++ !(next_task->ptrace & PT_PTRACED) && ++ !xnthread_test_state(next, XNDORMANT) ++ && xnthread_test_state(next, XNPEND), ++ "blocked thread %s[%d] rescheduled?! " ++ "(status=0x%x, sig=%d, prev=%s[%d])", ++ next->name, task_pid_nr(next_task), ++ xnthread_get_state(next), ++ signal_pending(next_task), prev_task->comm, ++ task_pid_nr(prev_task)); ++out: ++ return KEVENT_PROPAGATE; ++} ++ ++static int handle_sigwake_event(struct task_struct *p) ++{ ++ struct xnthread *thread; ++ sigset_t pending; ++ spl_t s; ++ ++ thread = xnthread_from_task(p); ++ if (thread == NULL) ++ return KEVENT_PROPAGATE; ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ /* ++ * CAUTION: __TASK_TRACED is not set in p->state yet. This ++ * state bit will be set right after we return, when the task ++ * is woken up. ++ */ ++ if ((p->ptrace & PT_PTRACED) && !xnthread_test_state(thread, XNSSTEP)) { ++ /* We already own the siglock. */ ++ sigorsets(&pending, ++ &p->pending.signal, ++ &p->signal->shared_pending.signal); ++ ++ if (sigismember(&pending, SIGTRAP) || ++ sigismember(&pending, SIGSTOP) ++ || sigismember(&pending, SIGINT)) ++ cobalt_register_debugged_thread(thread); ++ } ++ ++ if (xnthread_test_state(thread, XNRELAX)) ++ goto out; ++ ++ /* ++ * If kicking a shadow thread in primary mode, make sure Linux ++ * won't schedule in its mate under our feet as a result of ++ * running signal_wake_up(). The Xenomai scheduler must remain ++ * in control for now, until we explicitly relax the shadow ++ * thread to allow for processing the pending signals. Make ++ * sure we keep the additional state flags unmodified so that ++ * we don't break any undergoing ptrace. ++ */ ++ if (p->state & (TASK_INTERRUPTIBLE|TASK_UNINTERRUPTIBLE)) ++ cobalt_set_task_state(p, p->state | TASK_NOWAKEUP); ++ ++ /* ++ * Allow a thread stopped for debugging to resume briefly in order to ++ * migrate to secondary mode. xnthread_relax will reapply XNDBGSTOP. ++ */ ++ if (xnthread_test_state(thread, XNDBGSTOP)) ++ xnthread_resume(thread, XNDBGSTOP); ++ ++ __xnthread_kick(thread); ++out: ++ xnsched_run(); ++ ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return KEVENT_PROPAGATE; ++} ++ ++static int handle_cleanup_event(struct mm_struct *mm) ++{ ++ struct cobalt_process *old, *process; ++ struct cobalt_ppd *sys_ppd; ++ struct xnthread *curr; ++ ++ /* ++ * We are NOT called for exiting kernel shadows. ++ * cobalt_current_process() is cleared if we get there after ++ * handle_task_exit(), so we need to restore this context ++ * pointer temporarily. ++ */ ++ process = cobalt_search_process(mm); ++ old = cobalt_set_process(process); ++ sys_ppd = cobalt_ppd_get(0); ++ if (sys_ppd != &cobalt_kernel_ppd) { ++ bool running_exec; ++ ++ /* ++ * Detect a userland shadow running exec(), i.e. still ++ * attached to the current linux task (no prior ++ * clear_threadinfo). In this case, we emulate a task ++ * exit, since the Xenomai binding shall not survive ++ * the exec() syscall. Since the process will keep on ++ * running though, we have to disable the event ++ * notifier manually for it. ++ */ ++ curr = xnthread_current(); ++ running_exec = curr && (current->flags & PF_EXITING) == 0; ++ if (running_exec) { ++ __handle_taskexit_event(current); ++ ipipe_disable_notifier(current); ++ } ++ if (atomic_dec_and_test(&sys_ppd->refcnt)) ++ remove_process(process); ++ if (running_exec) { ++ __xnthread_cleanup(curr); ++ clear_threadinfo(); ++ } ++ } ++ ++ /* ++ * CAUTION: Do not override a state change caused by ++ * remove_process(). ++ */ ++ if (cobalt_current_process() == process) ++ cobalt_set_process(old); ++ ++ return KEVENT_PROPAGATE; ++} ++ ++static inline int handle_clockfreq_event(unsigned int *p) ++{ ++ unsigned int newfreq = *p; ++ ++ xnclock_update_freq(newfreq); ++ ++ return KEVENT_PROPAGATE; ++} ++ ++#ifdef IPIPE_KEVT_USERINTRET ++static int handle_user_return(struct task_struct *task) ++{ ++ struct xnthread *thread; ++ spl_t s; ++ int err; ++ ++ ipipe_disable_user_intret_notifier(); ++ ++ thread = xnthread_from_task(task); ++ if (thread == NULL) ++ return KEVENT_PROPAGATE; ++ ++ if (xnthread_test_info(thread, XNCONTHI)) { ++ xnlock_get_irqsave(&nklock, s); ++ xnthread_clear_info(thread, XNCONTHI); ++ xnlock_put_irqrestore(&nklock, s); ++ ++ err = xnthread_harden(); ++ ++ /* ++ * XNCONTHI may or may not have been re-applied if ++ * harden bailed out due to pending signals. Make sure ++ * it is set in that case. ++ */ ++ if (err == -ERESTARTSYS) { ++ xnlock_get_irqsave(&nklock, s); ++ xnthread_set_info(thread, XNCONTHI); ++ xnlock_put_irqrestore(&nklock, s); ++ } ++ } ++ ++ return KEVENT_PROPAGATE; ++} ++#endif /* IPIPE_KEVT_USERINTRET */ ++ ++#ifdef IPIPE_KEVT_PTRESUME ++int handle_ptrace_resume(struct ipipe_ptrace_resume_data *resume) ++{ ++ struct xnthread *thread; ++ spl_t s; ++ ++ thread = xnthread_from_task(resume->task); ++ if (thread == NULL) ++ return KEVENT_PROPAGATE; ++ ++ if (resume->request == PTRACE_SINGLESTEP && ++ xnthread_test_state(thread, XNSSTEP)) { ++ xnlock_get_irqsave(&nklock, s); ++ ++ xnthread_resume(thread, XNDBGSTOP); ++ cobalt_unregister_debugged_thread(thread); ++ ++ xnlock_put_irqrestore(&nklock, s); ++ } ++ ++ return KEVENT_PROPAGATE; ++} ++#endif /* IPIPE_KEVT_PTRESUME */ ++ ++int ipipe_kevent_hook(int kevent, void *data) ++{ ++ int ret; ++ ++ switch (kevent) { ++ case IPIPE_KEVT_SCHEDULE: ++ ret = handle_schedule_event(data); ++ break; ++ case IPIPE_KEVT_SIGWAKE: ++ ret = handle_sigwake_event(data); ++ break; ++ case IPIPE_KEVT_EXIT: ++ ret = handle_taskexit_event(data); ++ break; ++ case IPIPE_KEVT_CLEANUP: ++ ret = handle_cleanup_event(data); ++ break; ++ case IPIPE_KEVT_HOSTRT: ++ ret = handle_hostrt_event(data); ++ break; ++ case IPIPE_KEVT_SETAFFINITY: ++ ret = handle_setaffinity_event(data); ++ break; ++#ifdef IPIPE_KEVT_CLOCKFREQ ++ case IPIPE_KEVT_CLOCKFREQ: ++ ret = handle_clockfreq_event(data); ++ break; ++#endif ++#ifdef IPIPE_KEVT_USERINTRET ++ case IPIPE_KEVT_USERINTRET: ++ ret = handle_user_return(data); ++ break; ++#endif ++#ifdef IPIPE_KEVT_PTRESUME ++ case IPIPE_KEVT_PTRESUME: ++ ret = handle_ptrace_resume(data); ++ break; ++#endif ++ default: ++ ret = KEVENT_PROPAGATE; ++ } ++ ++ return ret; ++} ++ ++static int attach_process(struct cobalt_process *process) ++{ ++ struct cobalt_ppd *p = &process->sys_ppd; ++ char *exe_path; ++ int ret; ++ ++ ret = cobalt_umm_init(&p->umm, CONFIG_XENO_OPT_PRIVATE_HEAPSZ * 1024, ++ post_ppd_release); ++ if (ret) ++ return ret; ++ ++ cobalt_umm_set_name(&p->umm, "private heap[%d]", task_pid_nr(current)); ++ ++ exe_path = get_exe_path(current); ++ if (IS_ERR(exe_path)) { ++ printk(XENO_WARNING ++ "%s[%d] can't find exe path\n", ++ current->comm, task_pid_nr(current)); ++ exe_path = NULL; /* Not lethal, but weird. */ ++ } ++ p->exe_path = exe_path; ++ xntree_init(&p->fds); ++ atomic_set(&p->refcnt, 1); ++ ++ ret = process_hash_enter(process); ++ if (ret) ++ goto fail_hash; ++ ++ return 0; ++fail_hash: ++ if (p->exe_path) ++ kfree(p->exe_path); ++ cobalt_umm_destroy(&p->umm); ++ ++ return ret; ++} ++ ++static void *cobalt_process_attach(void) ++{ ++ struct cobalt_process *process; ++ int ret; ++ ++ process = kzalloc(sizeof(*process), GFP_KERNEL); ++ if (process == NULL) ++ return ERR_PTR(-ENOMEM); ++ ++ ret = attach_process(process); ++ if (ret) { ++ kfree(process); ++ return ERR_PTR(ret); ++ } ++ ++ INIT_LIST_HEAD(&process->resources.condq); ++ INIT_LIST_HEAD(&process->resources.mutexq); ++ INIT_LIST_HEAD(&process->resources.semq); ++ INIT_LIST_HEAD(&process->resources.monitorq); ++ INIT_LIST_HEAD(&process->resources.eventq); ++ INIT_LIST_HEAD(&process->resources.schedq); ++ INIT_LIST_HEAD(&process->sigwaiters); ++ INIT_LIST_HEAD(&process->thread_list); ++ xntree_init(&process->usems); ++ bitmap_fill(process->timers_map, CONFIG_XENO_OPT_NRTIMERS); ++ cobalt_set_process(process); ++ ++ return process; ++} ++ ++static void detach_process(struct cobalt_process *process) ++{ ++ struct cobalt_ppd *p = &process->sys_ppd; ++ ++ if (p->exe_path) ++ kfree(p->exe_path); ++ ++ rtdm_fd_cleanup(p); ++ process_hash_remove(process); ++ /* ++ * CAUTION: the process descriptor might be immediately ++ * released as a result of calling cobalt_umm_destroy(), so we ++ * must do this last, not to tread on stale memory. ++ */ ++ cobalt_umm_destroy(&p->umm); ++} ++ ++static void __reclaim_resource(struct cobalt_process *process, ++ void (*reclaim)(struct cobalt_resnode *node, spl_t s), ++ struct list_head *local, ++ struct list_head *global) ++{ ++ struct cobalt_resnode *node, *tmp; ++ LIST_HEAD(stash); ++ spl_t s; ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ if (list_empty(global)) ++ goto flush_local; ++ ++ list_for_each_entry_safe(node, tmp, global, next) { ++ if (node->owner == process) { ++ list_del(&node->next); ++ list_add(&node->next, &stash); ++ } ++ } ++ ++ list_for_each_entry_safe(node, tmp, &stash, next) { ++ reclaim(node, s); ++ xnlock_get_irqsave(&nklock, s); ++ } ++ ++ XENO_BUG_ON(COBALT, !list_empty(&stash)); ++ ++flush_local: ++ if (list_empty(local)) ++ goto out; ++ ++ list_for_each_entry_safe(node, tmp, local, next) { ++ reclaim(node, s); ++ xnlock_get_irqsave(&nklock, s); ++ } ++out: ++ xnsched_run(); ++ xnlock_put_irqrestore(&nklock, s); ++} ++ ++#define cobalt_reclaim_resource(__process, __reclaim, __type) \ ++ __reclaim_resource(__process, __reclaim, \ ++ &(__process)->resources.__type ## q, \ ++ &cobalt_global_resources.__type ## q) ++ ++static void cobalt_process_detach(void *arg) ++{ ++ struct cobalt_process *process = arg; ++ ++ cobalt_nsem_reclaim(process); ++ cobalt_timer_reclaim(process); ++ cobalt_sched_reclaim(process); ++ cobalt_reclaim_resource(process, cobalt_cond_reclaim, cond); ++ cobalt_reclaim_resource(process, cobalt_mutex_reclaim, mutex); ++ cobalt_reclaim_resource(process, cobalt_event_reclaim, event); ++ cobalt_reclaim_resource(process, cobalt_monitor_reclaim, monitor); ++ cobalt_reclaim_resource(process, cobalt_sem_reclaim, sem); ++ detach_process(process); ++ /* ++ * The cobalt_process descriptor release may be deferred until ++ * the last mapping on the private heap is gone. However, this ++ * is potentially stale memory already. ++ */ ++} ++ ++struct xnthread_personality cobalt_personality = { ++ .name = "cobalt", ++ .magic = 0, ++ .ops = { ++ .attach_process = cobalt_process_attach, ++ .detach_process = cobalt_process_detach, ++ .map_thread = cobalt_thread_map, ++ .exit_thread = cobalt_thread_exit, ++ .finalize_thread = cobalt_thread_finalize, ++ }, ++}; ++EXPORT_SYMBOL_GPL(cobalt_personality); ++ ++__init int cobalt_init(void) ++{ ++ unsigned int i, size; ++ int ret; ++ ++ size = sizeof(*process_hash) * PROCESS_HASH_SIZE; ++ process_hash = kmalloc(size, GFP_KERNEL); ++ if (process_hash == NULL) { ++ printk(XENO_ERR "cannot allocate processes hash table\n"); ++ return -ENOMEM; ++ } ++ ++ ret = xndebug_init(); ++ if (ret) ++ goto fail_debug; ++ ++ for (i = 0; i < PROCESS_HASH_SIZE; i++) ++ INIT_HLIST_HEAD(&process_hash[i]); ++ ++ xnsynch_init(&yield_sync, XNSYNCH_FIFO, NULL); ++ ++ ret = cobalt_memdev_init(); ++ if (ret) ++ goto fail_memdev; ++ ++ ret = cobalt_register_personality(&cobalt_personality); ++ if (ret) ++ goto fail_register; ++ ++ ret = cobalt_signal_init(); ++ if (ret) ++ goto fail_siginit; ++ ++ init_hostrt(); ++ ipipe_set_hooks(ipipe_root_domain, IPIPE_SYSCALL|IPIPE_KEVENT); ++ ipipe_set_hooks(&xnsched_realtime_domain, IPIPE_SYSCALL|IPIPE_TRAP); ++ ++ if (gid_arg != -1) ++ printk(XENO_INFO "allowing access to group %d\n", gid_arg); ++ ++ return 0; ++fail_siginit: ++ cobalt_unregister_personality(0); ++fail_register: ++ cobalt_memdev_cleanup(); ++fail_memdev: ++ xnsynch_destroy(&yield_sync); ++ xndebug_cleanup(); ++fail_debug: ++ kfree(process_hash); ++ ++ return ret; ++} +--- linux/kernel/xenomai/posix/syscall32.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/posix/syscall32.c 2021-04-07 16:01:26.098635779 +0800 +@@ -0,0 +1,945 @@ ++/* ++ * Copyright (C) 2014 Philippe Gerum ++ * ++ * Xenomai is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#include ++#include ++#include ++#include ++#include "internal.h" ++#include "syscall32.h" ++#include "thread.h" ++#include "mutex.h" ++#include "cond.h" ++#include "sem.h" ++#include "sched.h" ++#include "clock.h" ++#include "timer.h" ++#include "timerfd.h" ++#include "signal.h" ++#include "monitor.h" ++#include "event.h" ++#include "mqueue.h" ++#include "io.h" ++#include "../debug.h" ++ ++COBALT_SYSCALL32emu(thread_create, init, ++ (compat_ulong_t pth, ++ int policy, ++ const struct compat_sched_param_ex __user *u_param_ex, ++ int xid, ++ __u32 __user *u_winoff)) ++{ ++ struct sched_param_ex param_ex; ++ int ret; ++ ++ ret = sys32_get_param_ex(policy, ¶m_ex, u_param_ex); ++ if (ret) ++ return ret; ++ ++ return __cobalt_thread_create(pth, policy, ¶m_ex, xid, u_winoff); ++} ++ ++COBALT_SYSCALL32emu(thread_setschedparam_ex, conforming, ++ (compat_ulong_t pth, ++ int policy, ++ const struct compat_sched_param_ex __user *u_param_ex, ++ __u32 __user *u_winoff, ++ int __user *u_promoted)) ++{ ++ struct sched_param_ex param_ex; ++ int ret; ++ ++ ret = sys32_get_param_ex(policy, ¶m_ex, u_param_ex); ++ if (ret) ++ return ret; ++ ++ return cobalt_thread_setschedparam_ex(pth, policy, ¶m_ex, ++ u_winoff, u_promoted); ++} ++ ++COBALT_SYSCALL32emu(thread_getschedparam_ex, current, ++ (compat_ulong_t pth, ++ int __user *u_policy, ++ struct compat_sched_param_ex __user *u_param)) ++{ ++ struct sched_param_ex param_ex; ++ int ret, policy; ++ ++ ret = cobalt_thread_getschedparam_ex(pth, &policy, ¶m_ex); ++ if (ret) ++ return ret; ++ ++ ret = cobalt_copy_to_user(u_policy, &policy, sizeof(policy)); ++ ++ return ret ?: sys32_put_param_ex(policy, u_param, ¶m_ex); ++} ++ ++COBALT_SYSCALL32emu(thread_setschedprio, conforming, ++ (compat_ulong_t pth, ++ int prio, ++ __u32 __user *u_winoff, ++ int __user *u_promoted)) ++{ ++ return cobalt_thread_setschedprio(pth, prio, u_winoff, u_promoted); ++} ++ ++static inline int sys32_fetch_timeout(struct timespec *ts, ++ const void __user *u_ts) ++{ ++ return u_ts == NULL ? -EFAULT : ++ sys32_get_timespec(ts, u_ts); ++} ++ ++COBALT_SYSCALL32emu(sem_open, lostage, ++ (compat_uptr_t __user *u_addrp, ++ const char __user *u_name, ++ int oflags, mode_t mode, unsigned int value)) ++{ ++ struct cobalt_sem_shadow __user *usm; ++ compat_uptr_t cusm; ++ ++ if (__xn_get_user(cusm, u_addrp)) ++ return -EFAULT; ++ ++ usm = __cobalt_sem_open(compat_ptr(cusm), u_name, oflags, mode, value); ++ if (IS_ERR(usm)) ++ return PTR_ERR(usm); ++ ++ return __xn_put_user(ptr_to_compat(usm), u_addrp) ? -EFAULT : 0; ++} ++ ++COBALT_SYSCALL32emu(sem_timedwait, primary, ++ (struct cobalt_sem_shadow __user *u_sem, ++ struct compat_timespec __user *u_ts)) ++{ ++ return __cobalt_sem_timedwait(u_sem, u_ts, sys32_fetch_timeout); ++} ++ ++COBALT_SYSCALL32emu(clock_getres, current, ++ (clockid_t clock_id, ++ struct compat_timespec __user *u_ts)) ++{ ++ struct timespec ts; ++ int ret; ++ ++ ret = __cobalt_clock_getres(clock_id, &ts); ++ if (ret) ++ return ret; ++ ++ return u_ts ? sys32_put_timespec(u_ts, &ts) : 0; ++} ++ ++COBALT_SYSCALL32emu(clock_gettime, current, ++ (clockid_t clock_id, ++ struct compat_timespec __user *u_ts)) ++{ ++ struct timespec ts; ++ int ret; ++ ++ ret = __cobalt_clock_gettime(clock_id, &ts); ++ if (ret) ++ return ret; ++ ++ return sys32_put_timespec(u_ts, &ts); ++} ++ ++COBALT_SYSCALL32emu(clock_settime, current, ++ (clockid_t clock_id, ++ const struct compat_timespec __user *u_ts)) ++{ ++ struct timespec ts; ++ int ret; ++ ++ ret = sys32_get_timespec(&ts, u_ts); ++ if (ret) ++ return ret; ++ ++ return __cobalt_clock_settime(clock_id, &ts); ++} ++ ++COBALT_SYSCALL32emu(clock_adjtime, current, ++ (clockid_t clock_id, struct compat_timex __user *u_tx)) ++{ ++ struct timex tx; ++ int ret; ++ ++ ret = sys32_get_timex(&tx, u_tx); ++ if (ret) ++ return ret; ++ ++ ret = __cobalt_clock_adjtime(clock_id, &tx); ++ if (ret) ++ return ret; ++ ++ return sys32_put_timex(u_tx, &tx); ++} ++ ++COBALT_SYSCALL32emu(clock_nanosleep, nonrestartable, ++ (clockid_t clock_id, int flags, ++ const struct compat_timespec __user *u_rqt, ++ struct compat_timespec __user *u_rmt)) ++{ ++ struct timespec rqt, rmt, *rmtp = NULL; ++ int ret; ++ ++ if (u_rmt) ++ rmtp = &rmt; ++ ++ ret = sys32_get_timespec(&rqt, u_rqt); ++ if (ret) ++ return ret; ++ ++ ret = __cobalt_clock_nanosleep(clock_id, flags, &rqt, rmtp); ++ if (ret == -EINTR && flags == 0 && rmtp) ++ ret = sys32_put_timespec(u_rmt, rmtp); ++ ++ return ret; ++} ++ ++COBALT_SYSCALL32emu(mutex_timedlock, primary, ++ (struct cobalt_mutex_shadow __user *u_mx, ++ const struct compat_timespec __user *u_ts)) ++{ ++ return __cobalt_mutex_timedlock_break(u_mx, u_ts, sys32_fetch_timeout); ++} ++ ++COBALT_SYSCALL32emu(cond_wait_prologue, nonrestartable, ++ (struct cobalt_cond_shadow __user *u_cnd, ++ struct cobalt_mutex_shadow __user *u_mx, ++ int *u_err, ++ unsigned int timed, ++ struct compat_timespec __user *u_ts)) ++{ ++ return __cobalt_cond_wait_prologue(u_cnd, u_mx, u_err, u_ts, ++ timed ? sys32_fetch_timeout : NULL); ++} ++ ++COBALT_SYSCALL32emu(mq_open, lostage, ++ (const char __user *u_name, int oflags, ++ mode_t mode, struct compat_mq_attr __user *u_attr)) ++{ ++ struct mq_attr _attr, *attr = &_attr; ++ int ret; ++ ++ if ((oflags & O_CREAT) && u_attr) { ++ ret = sys32_get_mqattr(&_attr, u_attr); ++ if (ret) ++ return ret; ++ } else ++ attr = NULL; ++ ++ return __cobalt_mq_open(u_name, oflags, mode, attr); ++} ++ ++COBALT_SYSCALL32emu(mq_getattr, current, ++ (mqd_t uqd, struct compat_mq_attr __user *u_attr)) ++{ ++ struct mq_attr attr; ++ int ret; ++ ++ ret = __cobalt_mq_getattr(uqd, &attr); ++ if (ret) ++ return ret; ++ ++ return sys32_put_mqattr(u_attr, &attr); ++} ++ ++COBALT_SYSCALL32emu(mq_timedsend, primary, ++ (mqd_t uqd, const void __user *u_buf, size_t len, ++ unsigned int prio, ++ const struct compat_timespec __user *u_ts)) ++{ ++ return __cobalt_mq_timedsend(uqd, u_buf, len, prio, ++ u_ts, u_ts ? sys32_fetch_timeout : NULL); ++} ++ ++COBALT_SYSCALL32emu(mq_timedreceive, primary, ++ (mqd_t uqd, void __user *u_buf, ++ compat_ssize_t __user *u_len, ++ unsigned int __user *u_prio, ++ const struct compat_timespec __user *u_ts)) ++{ ++ compat_ssize_t clen; ++ ssize_t len; ++ int ret; ++ ++ ret = cobalt_copy_from_user(&clen, u_len, sizeof(*u_len)); ++ if (ret) ++ return ret; ++ ++ len = clen; ++ ret = __cobalt_mq_timedreceive(uqd, u_buf, &len, u_prio, ++ u_ts, u_ts ? sys32_fetch_timeout : NULL); ++ clen = len; ++ ++ return ret ?: cobalt_copy_to_user(u_len, &clen, sizeof(*u_len)); ++} ++ ++static inline int mq_fetch_timeout(struct timespec *ts, ++ const void __user *u_ts) ++{ ++ return u_ts == NULL ? -EFAULT : ++ cobalt_copy_from_user(ts, u_ts, sizeof(*ts)); ++ ++} ++ ++COBALT_SYSCALL32emu(mq_notify, primary, ++ (mqd_t fd, const struct compat_sigevent *__user u_cev)) ++{ ++ struct sigevent sev; ++ int ret; ++ ++ if (u_cev) { ++ ret = sys32_get_sigevent(&sev, u_cev); ++ if (ret) ++ return ret; ++ } ++ ++ return __cobalt_mq_notify(fd, u_cev ? &sev : NULL); ++} ++ ++COBALT_SYSCALL32emu(sched_weightprio, current, ++ (int policy, ++ const struct compat_sched_param_ex __user *u_param)) ++{ ++ struct sched_param_ex param_ex; ++ int ret; ++ ++ ret = sys32_get_param_ex(policy, ¶m_ex, u_param); ++ if (ret) ++ return ret; ++ ++ return __cobalt_sched_weightprio(policy, ¶m_ex); ++} ++ ++static union sched_config * ++sys32_fetch_config(int policy, const void __user *u_config, size_t *len) ++{ ++ union compat_sched_config *cbuf; ++ union sched_config *buf; ++ int ret, n; ++ ++ if (u_config == NULL) ++ return ERR_PTR(-EFAULT); ++ ++ if (policy == SCHED_QUOTA && *len < sizeof(cbuf->quota)) ++ return ERR_PTR(-EINVAL); ++ ++ cbuf = xnmalloc(*len); ++ if (cbuf == NULL) ++ return ERR_PTR(-ENOMEM); ++ ++ ret = cobalt_copy_from_user(cbuf, u_config, *len); ++ if (ret) { ++ buf = ERR_PTR(ret); ++ goto out; ++ } ++ ++ switch (policy) { ++ case SCHED_TP: ++ *len = sched_tp_confsz(cbuf->tp.nr_windows); ++ break; ++ case SCHED_QUOTA: ++ break; ++ default: ++ buf = ERR_PTR(-EINVAL); ++ goto out; ++ } ++ ++ buf = xnmalloc(*len); ++ if (buf == NULL) { ++ buf = ERR_PTR(-ENOMEM); ++ goto out; ++ } ++ ++ if (policy == SCHED_QUOTA) ++ memcpy(&buf->quota, &cbuf->quota, sizeof(cbuf->quota)); ++ else { ++ buf->tp.op = cbuf->tp.op; ++ buf->tp.nr_windows = cbuf->tp.nr_windows; ++ for (n = 0; n < buf->tp.nr_windows; n++) { ++ buf->tp.windows[n].ptid = cbuf->tp.windows[n].ptid; ++ buf->tp.windows[n].offset.tv_sec = cbuf->tp.windows[n].offset.tv_sec; ++ buf->tp.windows[n].offset.tv_nsec = cbuf->tp.windows[n].offset.tv_nsec; ++ buf->tp.windows[n].duration.tv_sec = cbuf->tp.windows[n].duration.tv_sec; ++ buf->tp.windows[n].duration.tv_nsec = cbuf->tp.windows[n].duration.tv_nsec; ++ } ++ } ++out: ++ xnfree(cbuf); ++ ++ return buf; ++} ++ ++static int sys32_ack_config(int policy, const union sched_config *config, ++ void __user *u_config) ++{ ++ union compat_sched_config __user *u_p = u_config; ++ ++ if (policy != SCHED_QUOTA) ++ return 0; ++ ++ return u_config == NULL ? -EFAULT : ++ cobalt_copy_to_user(&u_p->quota.info, &config->quota.info, ++ sizeof(u_p->quota.info)); ++} ++ ++static ssize_t sys32_put_config(int policy, ++ void __user *u_config, size_t u_len, ++ const union sched_config *config, size_t len) ++{ ++ union compat_sched_config __user *u_p = u_config; ++ int n, ret; ++ ++ if (u_config == NULL) ++ return -EFAULT; ++ ++ if (policy == SCHED_QUOTA) { ++ if (u_len < sizeof(u_p->quota)) ++ return -EINVAL; ++ return cobalt_copy_to_user(&u_p->quota.info, &config->quota.info, ++ sizeof(u_p->quota.info)) ?: ++ sizeof(u_p->quota.info); ++ } ++ ++ /* SCHED_TP */ ++ ++ if (u_len < compat_sched_tp_confsz(config->tp.nr_windows)) ++ return -ENOSPC; ++ ++ __xn_put_user(config->tp.op, &u_p->tp.op); ++ __xn_put_user(config->tp.nr_windows, &u_p->tp.nr_windows); ++ ++ for (n = 0, ret = 0; n < config->tp.nr_windows; n++) { ++ ret |= __xn_put_user(config->tp.windows[n].ptid, ++ &u_p->tp.windows[n].ptid); ++ ret |= __xn_put_user(config->tp.windows[n].offset.tv_sec, ++ &u_p->tp.windows[n].offset.tv_sec); ++ ret |= __xn_put_user(config->tp.windows[n].offset.tv_nsec, ++ &u_p->tp.windows[n].offset.tv_nsec); ++ ret |= __xn_put_user(config->tp.windows[n].duration.tv_sec, ++ &u_p->tp.windows[n].duration.tv_sec); ++ ret |= __xn_put_user(config->tp.windows[n].duration.tv_nsec, ++ &u_p->tp.windows[n].duration.tv_nsec); ++ } ++ ++ return ret ?: u_len; ++} ++ ++COBALT_SYSCALL32emu(sched_setconfig_np, conforming, ++ (int cpu, int policy, ++ union compat_sched_config __user *u_config, ++ size_t len)) ++{ ++ return __cobalt_sched_setconfig_np(cpu, policy, u_config, len, ++ sys32_fetch_config, sys32_ack_config); ++} ++ ++COBALT_SYSCALL32emu(sched_getconfig_np, conformin, ++ (int cpu, int policy, ++ union compat_sched_config __user *u_config, ++ size_t len)) ++{ ++ return __cobalt_sched_getconfig_np(cpu, policy, u_config, len, ++ sys32_fetch_config, sys32_put_config); ++} ++ ++COBALT_SYSCALL32emu(sched_setscheduler_ex, conforming, ++ (compat_pid_t pid, ++ int policy, ++ const struct compat_sched_param_ex __user *u_param_ex, ++ __u32 __user *u_winoff, ++ int __user *u_promoted)) ++{ ++ struct sched_param_ex param_ex; ++ int ret; ++ ++ ret = sys32_get_param_ex(policy, ¶m_ex, u_param_ex); ++ if (ret) ++ return ret; ++ ++ return cobalt_sched_setscheduler_ex(pid, policy, ¶m_ex, ++ u_winoff, u_promoted); ++} ++ ++COBALT_SYSCALL32emu(sched_getscheduler_ex, current, ++ (compat_pid_t pid, ++ int __user *u_policy, ++ struct compat_sched_param_ex __user *u_param)) ++{ ++ struct sched_param_ex param_ex; ++ int ret, policy; ++ ++ ret = cobalt_sched_getscheduler_ex(pid, &policy, ¶m_ex); ++ if (ret) ++ return ret; ++ ++ ret = cobalt_copy_to_user(u_policy, &policy, sizeof(policy)); ++ ++ return ret ?: sys32_put_param_ex(policy, u_param, ¶m_ex); ++} ++ ++COBALT_SYSCALL32emu(timer_create, current, ++ (clockid_t clock, ++ const struct compat_sigevent __user *u_sev, ++ timer_t __user *u_tm)) ++{ ++ struct sigevent sev, *evp = NULL; ++ int ret; ++ ++ if (u_sev) { ++ evp = &sev; ++ ret = sys32_get_sigevent(&sev, u_sev); ++ if (ret) ++ return ret; ++ } ++ ++ return __cobalt_timer_create(clock, evp, u_tm); ++} ++ ++COBALT_SYSCALL32emu(timer_settime, primary, ++ (timer_t tm, int flags, ++ const struct compat_itimerspec __user *u_newval, ++ struct compat_itimerspec __user *u_oldval)) ++{ ++ struct itimerspec newv, oldv, *oldvp = &oldv; ++ int ret; ++ ++ if (u_oldval == NULL) ++ oldvp = NULL; ++ ++ ret = sys32_get_itimerspec(&newv, u_newval); ++ if (ret) ++ return ret; ++ ++ ret = __cobalt_timer_settime(tm, flags, &newv, oldvp); ++ if (ret) ++ return ret; ++ ++ if (oldvp) { ++ ret = sys32_put_itimerspec(u_oldval, oldvp); ++ if (ret) ++ __cobalt_timer_settime(tm, flags, oldvp, NULL); ++ } ++ ++ return ret; ++} ++ ++COBALT_SYSCALL32emu(timer_gettime, current, ++ (timer_t tm, struct compat_itimerspec __user *u_val)) ++{ ++ struct itimerspec val; ++ int ret; ++ ++ ret = __cobalt_timer_gettime(tm, &val); ++ ++ return ret ?: sys32_put_itimerspec(u_val, &val); ++} ++ ++COBALT_SYSCALL32emu(timerfd_settime, primary, ++ (int fd, int flags, ++ const struct compat_itimerspec __user *new_value, ++ struct compat_itimerspec __user *old_value)) ++{ ++ struct itimerspec ovalue, value; ++ int ret; ++ ++ ret = sys32_get_itimerspec(&value, new_value); ++ if (ret) ++ return ret; ++ ++ ret = __cobalt_timerfd_settime(fd, flags, &value, &ovalue); ++ if (ret) ++ return ret; ++ ++ if (old_value) { ++ ret = sys32_put_itimerspec(old_value, &ovalue); ++ value.it_value.tv_sec = 0; ++ value.it_value.tv_nsec = 0; ++ __cobalt_timerfd_settime(fd, flags, &value, NULL); ++ } ++ ++ return ret; ++} ++ ++COBALT_SYSCALL32emu(timerfd_gettime, current, ++ (int fd, struct compat_itimerspec __user *curr_value)) ++{ ++ struct itimerspec value; ++ int ret; ++ ++ ret = __cobalt_timerfd_gettime(fd, &value); ++ ++ return ret ?: sys32_put_itimerspec(curr_value, &value); ++} ++ ++COBALT_SYSCALL32emu(sigwait, primary, ++ (const compat_sigset_t __user *u_set, ++ int __user *u_sig)) ++{ ++ sigset_t set; ++ int ret, sig; ++ ++ ret = sys32_get_sigset(&set, u_set); ++ if (ret) ++ return ret; ++ ++ sig = __cobalt_sigwait(&set); ++ if (sig < 0) ++ return sig; ++ ++ return cobalt_copy_to_user(u_sig, &sig, sizeof(*u_sig)); ++} ++ ++COBALT_SYSCALL32emu(sigtimedwait, nonrestartable, ++ (const compat_sigset_t __user *u_set, ++ struct compat_siginfo __user *u_si, ++ const struct compat_timespec __user *u_timeout)) ++{ ++ struct timespec timeout; ++ sigset_t set; ++ int ret; ++ ++ ret = sys32_get_sigset(&set, u_set); ++ if (ret) ++ return ret; ++ ++ ret = sys32_get_timespec(&timeout, u_timeout); ++ if (ret) ++ return ret; ++ ++ return __cobalt_sigtimedwait(&set, &timeout, u_si, true); ++} ++ ++COBALT_SYSCALL32emu(sigwaitinfo, nonrestartable, ++ (const compat_sigset_t __user *u_set, ++ struct compat_siginfo __user *u_si)) ++{ ++ sigset_t set; ++ int ret; ++ ++ ret = sys32_get_sigset(&set, u_set); ++ if (ret) ++ return ret; ++ ++ return __cobalt_sigwaitinfo(&set, u_si, true); ++} ++ ++COBALT_SYSCALL32emu(sigpending, primary, (compat_old_sigset_t __user *u_set)) ++{ ++ struct cobalt_thread *curr = cobalt_current_thread(); ++ ++ return sys32_put_sigset((compat_sigset_t *)u_set, &curr->sigpending); ++} ++ ++COBALT_SYSCALL32emu(sigqueue, conforming, ++ (pid_t pid, int sig, ++ const union compat_sigval __user *u_value)) ++{ ++ union sigval val; ++ int ret; ++ ++ ret = sys32_get_sigval(&val, u_value); ++ ++ return ret ?: __cobalt_sigqueue(pid, sig, &val); ++} ++ ++COBALT_SYSCALL32emu(monitor_wait, nonrestartable, ++ (struct cobalt_monitor_shadow __user *u_mon, ++ int event, const struct compat_timespec __user *u_ts, ++ int __user *u_ret)) ++{ ++ struct timespec ts, *tsp = NULL; ++ int ret; ++ ++ if (u_ts) { ++ tsp = &ts; ++ ret = sys32_get_timespec(&ts, u_ts); ++ if (ret) ++ return ret; ++ } ++ ++ return __cobalt_monitor_wait(u_mon, event, tsp, u_ret); ++} ++ ++COBALT_SYSCALL32emu(event_wait, primary, ++ (struct cobalt_event_shadow __user *u_event, ++ unsigned int bits, ++ unsigned int __user *u_bits_r, ++ int mode, const struct compat_timespec __user *u_ts)) ++{ ++ struct timespec ts, *tsp = NULL; ++ int ret; ++ ++ if (u_ts) { ++ tsp = &ts; ++ ret = sys32_get_timespec(&ts, u_ts); ++ if (ret) ++ return ret; ++ } ++ ++ return __cobalt_event_wait(u_event, bits, u_bits_r, mode, tsp); ++} ++ ++COBALT_SYSCALL32emu(select, nonrestartable, ++ (int nfds, ++ compat_fd_set __user *u_rfds, ++ compat_fd_set __user *u_wfds, ++ compat_fd_set __user *u_xfds, ++ struct compat_timeval __user *u_tv)) ++{ ++ compat_fd_set __user *ufd_sets[XNSELECT_MAX_TYPES] = { ++ [XNSELECT_READ] = u_rfds, ++ [XNSELECT_WRITE] = u_wfds, ++ [XNSELECT_EXCEPT] = u_xfds ++ }; ++ fd_set *in_fds[XNSELECT_MAX_TYPES] = {NULL, NULL, NULL}; ++ fd_set *out_fds[XNSELECT_MAX_TYPES] = {NULL, NULL, NULL}; ++ fd_set in_fds_storage[XNSELECT_MAX_TYPES], ++ out_fds_storage[XNSELECT_MAX_TYPES]; ++ xnticks_t timeout = XN_INFINITE; ++ xntmode_t mode = XN_RELATIVE; ++ struct xnselector *selector; ++ struct xnthread *curr; ++ struct timeval tv; ++ xnsticks_t diff; ++ size_t fds_size; ++ int i, err; ++ ++ curr = xnthread_current(); ++ ++ if (u_tv) { ++ err = sys32_get_timeval(&tv, u_tv); ++ if (err) ++ return err; ++ ++ if (tv.tv_usec >= 1000000) ++ return -EINVAL; ++ ++ timeout = clock_get_ticks(CLOCK_MONOTONIC) + tv2ns(&tv); ++ mode = XN_ABSOLUTE; ++ } ++ ++ fds_size = __FDELT__(nfds + __NFDBITS__ - 1) * sizeof(compat_ulong_t); ++ ++ for (i = 0; i < XNSELECT_MAX_TYPES; i++) ++ if (ufd_sets[i]) { ++ in_fds[i] = &in_fds_storage[i]; ++ out_fds[i] = & out_fds_storage[i]; ++ if (sys32_get_fdset(in_fds[i], ufd_sets[i], fds_size) < 0) ++ return -EFAULT; ++ } ++ ++ selector = curr->selector; ++ if (selector == NULL) { ++ /* Bail out if non-RTDM fildes is found. */ ++ if (!__cobalt_first_fd_valid_p(in_fds, nfds)) ++ return -EBADF; ++ ++ selector = xnmalloc(sizeof(*curr->selector)); ++ if (selector == NULL) ++ return -ENOMEM; ++ xnselector_init(selector); ++ curr->selector = selector; ++ ++ /* Bind directly the file descriptors, we do not need to go ++ through xnselect returning -ECHRNG */ ++ err = __cobalt_select_bind_all(selector, in_fds, nfds); ++ if (err) ++ return err; ++ } ++ ++ do { ++ err = xnselect(selector, out_fds, in_fds, nfds, timeout, mode); ++ if (err == -ECHRNG) { ++ int err = __cobalt_select_bind_all(selector, out_fds, nfds); ++ if (err) ++ return err; ++ } ++ } while (err == -ECHRNG); ++ ++ if (u_tv && (err > 0 || err == -EINTR)) { ++ diff = timeout - clock_get_ticks(CLOCK_MONOTONIC); ++ if (diff > 0) ++ ticks2tv(&tv, diff); ++ else ++ tv.tv_sec = tv.tv_usec = 0; ++ ++ if (sys32_put_timeval(u_tv, &tv)) ++ return -EFAULT; ++ } ++ ++ if (err >= 0) ++ for (i = 0; i < XNSELECT_MAX_TYPES; i++) ++ if (ufd_sets[i] && ++ sys32_put_fdset(ufd_sets[i], out_fds[i], ++ sizeof(fd_set)) < 0) ++ return -EFAULT; ++ return err; ++} ++ ++COBALT_SYSCALL32emu(recvmsg, handover, ++ (int fd, struct compat_msghdr __user *umsg, ++ int flags)) ++{ ++ struct user_msghdr m; ++ ssize_t ret; ++ ++ ret = sys32_get_msghdr(&m, umsg); ++ if (ret) ++ return ret; ++ ++ ret = rtdm_fd_recvmsg(fd, &m, flags); ++ if (ret < 0) ++ return ret; ++ ++ return sys32_put_msghdr(umsg, &m) ?: ret; ++} ++ ++static int get_timespec32(struct timespec *ts, ++ const void __user *u_ts) ++{ ++ return sys32_get_timespec(ts, u_ts); ++} ++ ++static int get_mmsg32(struct mmsghdr *mmsg, void __user *u_mmsg) ++{ ++ return sys32_get_mmsghdr(mmsg, u_mmsg); ++} ++ ++static int put_mmsg32(void __user **u_mmsg_p, const struct mmsghdr *mmsg) ++{ ++ struct compat_mmsghdr __user **p = (struct compat_mmsghdr **)u_mmsg_p, ++ *q __user = (*p)++; ++ ++ return sys32_put_mmsghdr(q, mmsg); ++} ++ ++COBALT_SYSCALL32emu(recvmmsg, primary, ++ (int ufd, struct compat_mmsghdr __user *u_msgvec, unsigned int vlen, ++ unsigned int flags, struct compat_timespec *u_timeout)) ++{ ++ return __rtdm_fd_recvmmsg(ufd, u_msgvec, vlen, flags, u_timeout, ++ get_mmsg32, put_mmsg32, ++ get_timespec32); ++} ++ ++COBALT_SYSCALL32emu(sendmsg, handover, ++ (int fd, struct compat_msghdr __user *umsg, int flags)) ++{ ++ struct user_msghdr m; ++ int ret; ++ ++ ret = sys32_get_msghdr(&m, umsg); ++ ++ return ret ?: rtdm_fd_sendmsg(fd, &m, flags); ++} ++ ++static int put_mmsglen32(void __user **u_mmsg_p, const struct mmsghdr *mmsg) ++{ ++ struct compat_mmsghdr __user **p = (struct compat_mmsghdr **)u_mmsg_p, ++ *q __user = (*p)++; ++ ++ return __xn_put_user(mmsg->msg_len, &q->msg_len); ++} ++ ++COBALT_SYSCALL32emu(sendmmsg, primary, ++ (int fd, struct compat_mmsghdr __user *u_msgvec, unsigned int vlen, ++ unsigned int flags)) ++{ ++ return __rtdm_fd_sendmmsg(fd, u_msgvec, vlen, flags, ++ get_mmsg32, put_mmsglen32); ++} ++ ++COBALT_SYSCALL32emu(mmap, lostage, ++ (int fd, struct compat_rtdm_mmap_request __user *u_crma, ++ compat_uptr_t __user *u_caddrp)) ++{ ++ struct _rtdm_mmap_request rma; ++ compat_uptr_t u_caddr; ++ void *u_addr = NULL; ++ int ret; ++ ++ if (u_crma == NULL || ++ !access_rok(u_crma, sizeof(*u_crma)) || ++ __xn_get_user(rma.length, &u_crma->length) || ++ __xn_get_user(rma.offset, &u_crma->offset) || ++ __xn_get_user(rma.prot, &u_crma->prot) || ++ __xn_get_user(rma.flags, &u_crma->flags)) ++ return -EFAULT; ++ ++ ret = rtdm_fd_mmap(fd, &rma, &u_addr); ++ if (ret) ++ return ret; ++ ++ u_caddr = ptr_to_compat(u_addr); ++ ++ return cobalt_copy_to_user(u_caddrp, &u_caddr, sizeof(u_caddr)); ++} ++ ++COBALT_SYSCALL32emu(backtrace, current, ++ (int nr, compat_ulong_t __user *u_backtrace, ++ int reason)) ++{ ++ compat_ulong_t cbacktrace[SIGSHADOW_BACKTRACE_DEPTH]; ++ unsigned long backtrace[SIGSHADOW_BACKTRACE_DEPTH]; ++ int ret, n; ++ ++ if (nr <= 0) ++ return 0; ++ ++ if (nr > SIGSHADOW_BACKTRACE_DEPTH) ++ nr = SIGSHADOW_BACKTRACE_DEPTH; ++ ++ ret = cobalt_copy_from_user(cbacktrace, u_backtrace, ++ nr * sizeof(compat_ulong_t)); ++ if (ret) ++ return ret; ++ ++ for (n = 0; n < nr; n++) ++ backtrace [n] = cbacktrace[n]; ++ ++ xndebug_trace_relax(nr, backtrace, reason); ++ ++ return 0; ++} ++ ++#ifdef COBALT_SYSCALL32x ++ ++COBALT_SYSCALL32x(mq_timedreceive, primary, ++ (mqd_t uqd, void __user *u_buf, ++ compat_ssize_t __user *u_len, ++ unsigned int __user *u_prio, ++ const struct timespec __user *u_ts)) ++{ ++ compat_ssize_t clen; ++ ssize_t len; ++ int ret; ++ ++ ret = cobalt_copy_from_user(&clen, u_len, sizeof(*u_len)); ++ if (ret) ++ return ret; ++ ++ len = clen; ++ ret = __cobalt_mq_timedreceive(uqd, u_buf, &len, u_prio, ++ u_ts, u_ts ? mq_fetch_timeout : NULL); ++ clen = len; ++ ++ return ret ?: cobalt_copy_to_user(u_len, &clen, sizeof(*u_len)); ++} ++ ++#endif /* COBALT_SYSCALL32x */ +--- linux/kernel/xenomai/posix/event.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/posix/event.c 2021-04-07 16:01:26.093635787 +0800 +@@ -0,0 +1,387 @@ ++/* ++ * Copyright (C) 2012 Philippe Gerum ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#include "internal.h" ++#include "thread.h" ++#include "clock.h" ++#include "event.h" ++#include ++ ++/* ++ * Cobalt event notification services ++ * ++ * An event flag group is a synchronization object represented by a ++ * regular native integer; every available bit in such word can be ++ * used to map a user-defined event flag. When a flag is set, the ++ * associated event is said to have occurred. ++ * ++ * Xenomai threads and interrupt handlers can use event flags to ++ * signal the occurrence of events to other threads; those threads can ++ * either wait for the events to occur in a conjunctive manner (all ++ * awaited events must have occurred to wake up), or in a disjunctive ++ * way (at least one of the awaited events must have occurred to wake ++ * up). ++ * ++ * We expose this non-POSIX feature through the internal API, as a ++ * fast IPC mechanism available to the Copperplate interface. ++ */ ++ ++struct event_wait_context { ++ struct xnthread_wait_context wc; ++ unsigned int value; ++ int mode; ++}; ++ ++COBALT_SYSCALL(event_init, current, ++ (struct cobalt_event_shadow __user *u_event, ++ unsigned int value, int flags)) ++{ ++ struct cobalt_event_shadow shadow; ++ struct cobalt_event_state *state; ++ int pshared, synflags, ret; ++ struct cobalt_event *event; ++ struct cobalt_umm *umm; ++ unsigned long stateoff; ++ spl_t s; ++ ++ trace_cobalt_event_init(u_event, value, flags); ++ ++ event = xnmalloc(sizeof(*event)); ++ if (event == NULL) ++ return -ENOMEM; ++ ++ pshared = (flags & COBALT_EVENT_SHARED) != 0; ++ umm = &cobalt_ppd_get(pshared)->umm; ++ state = cobalt_umm_alloc(umm, sizeof(*state)); ++ if (state == NULL) { ++ xnfree(event); ++ return -EAGAIN; ++ } ++ ++ ret = xnregistry_enter_anon(event, &event->resnode.handle); ++ if (ret) { ++ cobalt_umm_free(umm, state); ++ xnfree(event); ++ return ret; ++ } ++ ++ event->state = state; ++ event->flags = flags; ++ synflags = (flags & COBALT_EVENT_PRIO) ? XNSYNCH_PRIO : XNSYNCH_FIFO; ++ xnsynch_init(&event->synch, synflags, NULL); ++ state->value = value; ++ state->flags = 0; ++ state->nwaiters = 0; ++ stateoff = cobalt_umm_offset(umm, state); ++ XENO_BUG_ON(COBALT, stateoff != (__u32)stateoff); ++ ++ xnlock_get_irqsave(&nklock, s); ++ cobalt_add_resource(&event->resnode, event, pshared); ++ event->magic = COBALT_EVENT_MAGIC; ++ xnlock_put_irqrestore(&nklock, s); ++ ++ shadow.flags = flags; ++ shadow.handle = event->resnode.handle; ++ shadow.state_offset = (__u32)stateoff; ++ ++ return cobalt_copy_to_user(u_event, &shadow, sizeof(*u_event)); ++} ++ ++int __cobalt_event_wait(struct cobalt_event_shadow __user *u_event, ++ unsigned int bits, ++ unsigned int __user *u_bits_r, ++ int mode, const struct timespec *ts) ++{ ++ unsigned int rbits = 0, testval; ++ xnticks_t timeout = XN_INFINITE; ++ struct cobalt_event_state *state; ++ xntmode_t tmode = XN_RELATIVE; ++ struct event_wait_context ewc; ++ struct cobalt_event *event; ++ xnhandle_t handle; ++ int ret = 0, info; ++ spl_t s; ++ ++ handle = cobalt_get_handle_from_user(&u_event->handle); ++ ++ if (ts) { ++ if ((unsigned long)ts->tv_nsec >= ONE_BILLION) ++ return -EINVAL; ++ ++ timeout = ts2ns(ts); ++ if (timeout) { ++ timeout++; ++ tmode = XN_ABSOLUTE; ++ } else ++ timeout = XN_NONBLOCK; ++ trace_cobalt_event_timedwait(u_event, bits, mode, ts); ++ } else ++ trace_cobalt_event_wait(u_event, bits, mode); ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ event = xnregistry_lookup(handle, NULL); ++ if (event == NULL || event->magic != COBALT_EVENT_MAGIC) { ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ state = event->state; ++ ++ if (bits == 0) { ++ /* ++ * Special case: we don't wait for any event, we only ++ * return the current flag group value. ++ */ ++ rbits = state->value; ++ goto out; ++ } ++ ++ state->flags |= COBALT_EVENT_PENDED; ++ rbits = state->value & bits; ++ testval = mode & COBALT_EVENT_ANY ? rbits : bits; ++ if (rbits && rbits == testval) ++ goto done; ++ ++ if (timeout == XN_NONBLOCK) { ++ ret = -EWOULDBLOCK; ++ goto done; ++ } ++ ++ ewc.value = bits; ++ ewc.mode = mode; ++ xnthread_prepare_wait(&ewc.wc); ++ state->nwaiters++; ++ info = xnsynch_sleep_on(&event->synch, timeout, tmode); ++ if (info & XNRMID) { ++ ret = -EIDRM; ++ goto out; ++ } ++ if (info & (XNBREAK|XNTIMEO)) { ++ state->nwaiters--; ++ ret = (info & XNBREAK) ? -EINTR : -ETIMEDOUT; ++ } else ++ rbits = ewc.value; ++done: ++ if (!xnsynch_pended_p(&event->synch)) ++ state->flags &= ~COBALT_EVENT_PENDED; ++out: ++ xnlock_put_irqrestore(&nklock, s); ++ ++ if (ret == 0 && ++ cobalt_copy_to_user(u_bits_r, &rbits, sizeof(rbits))) ++ return -EFAULT; ++ ++ return ret; ++} ++ ++COBALT_SYSCALL(event_wait, primary, ++ (struct cobalt_event_shadow __user *u_event, ++ unsigned int bits, ++ unsigned int __user *u_bits_r, ++ int mode, const struct timespec __user *u_ts)) ++{ ++ struct timespec ts, *tsp = NULL; ++ int ret; ++ ++ if (u_ts) { ++ tsp = &ts; ++ ret = cobalt_copy_from_user(&ts, u_ts, sizeof(ts)); ++ if (ret) ++ return ret; ++ } ++ ++ return __cobalt_event_wait(u_event, bits, u_bits_r, mode, tsp); ++} ++ ++COBALT_SYSCALL(event_sync, current, ++ (struct cobalt_event_shadow __user *u_event)) ++{ ++ unsigned int bits, waitval, testval; ++ struct xnthread_wait_context *wc; ++ struct cobalt_event_state *state; ++ struct event_wait_context *ewc; ++ struct cobalt_event *event; ++ struct xnthread *p, *tmp; ++ xnhandle_t handle; ++ int ret = 0; ++ spl_t s; ++ ++ handle = cobalt_get_handle_from_user(&u_event->handle); ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ event = xnregistry_lookup(handle, NULL); ++ if (event == NULL || event->magic != COBALT_EVENT_MAGIC) { ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ /* ++ * Userland has already updated the bitmask, our job is to ++ * wake up any thread which could be satisfied by its current ++ * value. ++ */ ++ state = event->state; ++ bits = state->value; ++ ++ xnsynch_for_each_sleeper_safe(p, tmp, &event->synch) { ++ wc = xnthread_get_wait_context(p); ++ ewc = container_of(wc, struct event_wait_context, wc); ++ waitval = ewc->value & bits; ++ testval = ewc->mode & COBALT_EVENT_ANY ? waitval : ewc->value; ++ if (waitval && waitval == testval) { ++ state->nwaiters--; ++ ewc->value = waitval; ++ xnsynch_wakeup_this_sleeper(&event->synch, p); ++ } ++ } ++ ++ xnsched_run(); ++out: ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return ret; ++} ++ ++COBALT_SYSCALL(event_destroy, current, ++ (struct cobalt_event_shadow __user *u_event)) ++{ ++ struct cobalt_event *event; ++ xnhandle_t handle; ++ spl_t s; ++ ++ trace_cobalt_event_destroy(u_event); ++ ++ handle = cobalt_get_handle_from_user(&u_event->handle); ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ event = xnregistry_lookup(handle, NULL); ++ if (event == NULL || event->magic != COBALT_EVENT_MAGIC) { ++ xnlock_put_irqrestore(&nklock, s); ++ return -EINVAL; ++ } ++ ++ cobalt_event_reclaim(&event->resnode, s); /* drops lock */ ++ ++ return 0; ++} ++ ++COBALT_SYSCALL(event_inquire, current, ++ (struct cobalt_event_shadow __user *u_event, ++ struct cobalt_event_info __user *u_info, ++ pid_t __user *u_waitlist, ++ size_t waitsz)) ++{ ++ int nrpend = 0, nrwait = 0, nrpids, ret = 0; ++ unsigned long pstamp, nstamp = 0; ++ struct cobalt_event_info info; ++ struct cobalt_event *event; ++ pid_t *t = NULL, fbuf[16]; ++ struct xnthread *thread; ++ xnhandle_t handle; ++ spl_t s; ++ ++ handle = cobalt_get_handle_from_user(&u_event->handle); ++ ++ nrpids = waitsz / sizeof(pid_t); ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ for (;;) { ++ pstamp = nstamp; ++ event = xnregistry_lookup(handle, &nstamp); ++ if (event == NULL || event->magic != COBALT_EVENT_MAGIC) { ++ xnlock_put_irqrestore(&nklock, s); ++ return -EINVAL; ++ } ++ /* ++ * Allocate memory to return the wait list without ++ * holding any lock, then revalidate the handle. ++ */ ++ if (t == NULL) { ++ nrpend = 0; ++ if (!xnsynch_pended_p(&event->synch)) ++ break; ++ xnsynch_for_each_sleeper(thread, &event->synch) ++ nrpend++; ++ if (u_waitlist == NULL) ++ break; ++ xnlock_put_irqrestore(&nklock, s); ++ if (nrpids > nrpend) ++ nrpids = nrpend; ++ if (nrpend <= ARRAY_SIZE(fbuf)) ++ t = fbuf; /* Use fast buffer. */ ++ else { ++ t = xnmalloc(nrpend * sizeof(pid_t)); ++ if (t == NULL) ++ return -ENOMEM; ++ } ++ xnlock_get_irqsave(&nklock, s); ++ } else if (pstamp == nstamp) ++ break; ++ else { ++ xnlock_put_irqrestore(&nklock, s); ++ if (t != fbuf) ++ xnfree(t); ++ t = NULL; ++ xnlock_get_irqsave(&nklock, s); ++ } ++ } ++ ++ info.flags = event->flags; ++ info.value = event->value; ++ info.nrwait = nrpend; ++ ++ if (xnsynch_pended_p(&event->synch) && u_waitlist != NULL) { ++ xnsynch_for_each_sleeper(thread, &event->synch) { ++ if (nrwait >= nrpids) ++ break; ++ t[nrwait++] = xnthread_host_pid(thread); ++ } ++ } ++ ++ xnlock_put_irqrestore(&nklock, s); ++ ++ ret = cobalt_copy_to_user(u_info, &info, sizeof(info)); ++ if (ret == 0 && nrwait > 0) ++ ret = cobalt_copy_to_user(u_waitlist, t, nrwait * sizeof(pid_t)); ++ ++ if (t && t != fbuf) ++ xnfree(t); ++ ++ return ret ?: nrwait; ++} ++ ++void cobalt_event_reclaim(struct cobalt_resnode *node, spl_t s) ++{ ++ struct cobalt_event *event; ++ struct cobalt_umm *umm; ++ int pshared; ++ ++ event = container_of(node, struct cobalt_event, resnode); ++ xnregistry_remove(node->handle); ++ cobalt_del_resource(node); ++ xnsynch_destroy(&event->synch); ++ pshared = (event->flags & COBALT_EVENT_SHARED) != 0; ++ xnlock_put_irqrestore(&nklock, s); ++ ++ umm = &cobalt_ppd_get(pshared)->umm; ++ cobalt_umm_free(umm, event->state); ++ xnfree(event); ++} +--- linux/kernel/xenomai/posix/event.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/posix/event.h 2021-04-07 16:01:26.088635794 +0800 +@@ -0,0 +1,71 @@ ++/* ++ * Copyright (C) 2012 Philippe Gerum ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#ifndef _COBALT_POSIX_EVENT_H ++#define _COBALT_POSIX_EVENT_H ++ ++#include ++#include ++#include ++#include ++ ++struct cobalt_resources; ++struct cobalt_process; ++ ++struct cobalt_event { ++ unsigned int magic; ++ unsigned int value; ++ int flags; ++ struct xnsynch synch; ++ struct cobalt_event_state *state; ++ struct cobalt_resnode resnode; ++}; ++ ++int __cobalt_event_wait(struct cobalt_event_shadow __user *u_event, ++ unsigned int bits, ++ unsigned int __user *u_bits_r, ++ int mode, const struct timespec *ts); ++ ++COBALT_SYSCALL_DECL(event_init, ++ (struct cobalt_event_shadow __user *u_evtsh, ++ unsigned int value, ++ int flags)); ++ ++COBALT_SYSCALL_DECL(event_wait, ++ (struct cobalt_event_shadow __user *u_evtsh, ++ unsigned int bits, ++ unsigned int __user *u_bits_r, ++ int mode, ++ const struct timespec __user *u_ts)); ++ ++COBALT_SYSCALL_DECL(event_sync, ++ (struct cobalt_event_shadow __user *u_evtsh)); ++ ++COBALT_SYSCALL_DECL(event_destroy, ++ (struct cobalt_event_shadow __user *u_evtsh)); ++ ++COBALT_SYSCALL_DECL(event_inquire, ++ (struct cobalt_event_shadow __user *u_event, ++ struct cobalt_event_info __user *u_info, ++ pid_t __user *u_waitlist, ++ size_t waitsz)); ++ ++void cobalt_event_reclaim(struct cobalt_resnode *node, ++ spl_t s); ++ ++#endif /* !_COBALT_POSIX_EVENT_H */ +--- linux/kernel/xenomai/posix/memory.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/posix/memory.h 2021-04-07 16:01:26.084635799 +0800 +@@ -0,0 +1,61 @@ ++/* ++ * This file is part of the Xenomai project. ++ * ++ * Copyright (C) 2014 Philippe Gerum ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#ifndef _COBALT_POSIX_MEMORY_H ++#define _COBALT_POSIX_MEMORY_H ++ ++#include ++ ++#define cobalt_umm_set_name(__umm, __fmt, __args...) \ ++ xnheap_set_name(&(__umm)->heap, (__fmt), ## __args) ++ ++static inline ++void *cobalt_umm_alloc(struct cobalt_umm *umm, __u32 size) ++{ ++ return xnheap_alloc(&umm->heap, size); ++} ++ ++static inline ++void *cobalt_umm_zalloc(struct cobalt_umm *umm, __u32 size) ++{ ++ return xnheap_zalloc(&umm->heap, size); ++} ++ ++static inline ++void cobalt_umm_free(struct cobalt_umm *umm, void *p) ++{ ++ xnheap_free(&umm->heap, p); ++} ++ ++static inline ++__u32 cobalt_umm_offset(struct cobalt_umm *umm, void *p) ++{ ++ return p - xnheap_get_membase(&umm->heap); ++} ++ ++int cobalt_memdev_init(void); ++ ++void cobalt_memdev_cleanup(void); ++ ++int cobalt_umm_init(struct cobalt_umm *umm, u32 size, ++ void (*release)(struct cobalt_umm *umm)); ++ ++void cobalt_umm_destroy(struct cobalt_umm *umm); ++ ++#endif /* !_COBALT_POSIX_MEMORY_H */ +--- linux/kernel/xenomai/posix/clock.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/posix/clock.h 2021-04-07 16:01:26.079635806 +0800 +@@ -0,0 +1,125 @@ ++/* ++ * Written by Gilles Chanteperdrix . ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#ifndef _COBALT_POSIX_CLOCK_H ++#define _COBALT_POSIX_CLOCK_H ++ ++#include ++#include ++#include ++#include ++#include ++ ++#define ONE_BILLION 1000000000 ++ ++struct xnclock; ++ ++static inline void ns2ts(struct timespec *ts, xnticks_t nsecs) ++{ ++ ts->tv_sec = xnclock_divrem_billion(nsecs, &ts->tv_nsec); ++} ++ ++static inline xnticks_t ts2ns(const struct timespec *ts) ++{ ++ xnticks_t nsecs = ts->tv_nsec; ++ ++ if (ts->tv_sec) ++ nsecs += (xnticks_t)ts->tv_sec * ONE_BILLION; ++ ++ return nsecs; ++} ++ ++static inline xnticks_t tv2ns(const struct timeval *tv) ++{ ++ xnticks_t nsecs = tv->tv_usec * 1000; ++ ++ if (tv->tv_sec) ++ nsecs += (xnticks_t)tv->tv_sec * ONE_BILLION; ++ ++ return nsecs; ++} ++ ++static inline void ticks2tv(struct timeval *tv, xnticks_t ticks) ++{ ++ unsigned long nsecs; ++ ++ tv->tv_sec = xnclock_divrem_billion(ticks, &nsecs); ++ tv->tv_usec = nsecs / 1000; ++} ++ ++static inline xnticks_t clock_get_ticks(clockid_t clock_id) ++{ ++ return clock_id == CLOCK_REALTIME ? ++ xnclock_read_realtime(&nkclock) : ++ xnclock_read_monotonic(&nkclock); ++} ++ ++static inline int clock_flag(int flag, clockid_t clock_id) ++{ ++ if ((flag & TIMER_ABSTIME) == 0) ++ return XN_RELATIVE; ++ ++ if (clock_id == CLOCK_REALTIME) ++ return XN_REALTIME; ++ ++ return XN_ABSOLUTE; ++} ++ ++int __cobalt_clock_getres(clockid_t clock_id, ++ struct timespec *ts); ++ ++int __cobalt_clock_gettime(clockid_t clock_id, ++ struct timespec *ts); ++ ++int __cobalt_clock_settime(clockid_t clock_id, ++ const struct timespec *ts); ++ ++int __cobalt_clock_adjtime(clockid_t clock_id, ++ struct timex *tx); ++ ++int __cobalt_clock_nanosleep(clockid_t clock_id, int flags, ++ const struct timespec *rqt, ++ struct timespec *rmt); ++ ++COBALT_SYSCALL_DECL(clock_getres, ++ (clockid_t clock_id, struct timespec __user *u_ts)); ++ ++COBALT_SYSCALL_DECL(clock_gettime, ++ (clockid_t clock_id, struct timespec __user *u_ts)); ++ ++COBALT_SYSCALL_DECL(clock_settime, ++ (clockid_t clock_id, const struct timespec __user *u_ts)); ++ ++COBALT_SYSCALL_DECL(clock_adjtime, ++ (clockid_t clock_id, struct timex __user *u_tx)); ++ ++COBALT_SYSCALL_DECL(clock_nanosleep, ++ (clockid_t clock_id, int flags, ++ const struct timespec __user *u_rqt, ++ struct timespec __user *u_rmt)); ++ ++int cobalt_clock_register(struct xnclock *clock, ++ const cpumask_t *affinity, ++ clockid_t *clk_id); ++ ++void cobalt_clock_deregister(struct xnclock *clock); ++ ++struct xnclock *cobalt_clock_find(clockid_t clock_id); ++ ++extern DECLARE_BITMAP(cobalt_clock_extids, COBALT_MAX_EXTCLOCKS); ++ ++#endif /* !_COBALT_POSIX_CLOCK_H */ +--- linux/kernel/xenomai/posix/mutex.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/posix/mutex.c 2021-04-07 16:01:26.074635814 +0800 +@@ -0,0 +1,421 @@ ++/* ++ * Written by Gilles Chanteperdrix . ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#include "internal.h" ++#include "thread.h" ++#include "mutex.h" ++#include "cond.h" ++#include "clock.h" ++ ++static int cobalt_mutex_init_inner(struct cobalt_mutex_shadow *shadow, ++ struct cobalt_mutex *mutex, ++ struct cobalt_mutex_state *state, ++ const struct cobalt_mutexattr *attr) ++{ ++ int synch_flags = XNSYNCH_PRIO | XNSYNCH_OWNER; ++ struct cobalt_umm *umm; ++ spl_t s; ++ int ret; ++ ++ ret = xnregistry_enter_anon(mutex, &mutex->resnode.handle); ++ if (ret < 0) ++ return ret; ++ ++ umm = &cobalt_ppd_get(attr->pshared)->umm; ++ shadow->handle = mutex->resnode.handle; ++ shadow->magic = COBALT_MUTEX_MAGIC; ++ shadow->lockcnt = 0; ++ shadow->attr = *attr; ++ shadow->state_offset = cobalt_umm_offset(umm, state); ++ ++ mutex->magic = COBALT_MUTEX_MAGIC; ++ ++ if (attr->protocol == PTHREAD_PRIO_PROTECT) { ++ state->ceiling = attr->ceiling + 1; ++ xnsynch_init_protect(&mutex->synchbase, synch_flags, ++ &state->owner, &state->ceiling); ++ } else { ++ state->ceiling = 0; ++ if (attr->protocol == PTHREAD_PRIO_INHERIT) ++ synch_flags |= XNSYNCH_PI; ++ xnsynch_init(&mutex->synchbase, synch_flags, &state->owner); ++ } ++ ++ state->flags = (attr->type == PTHREAD_MUTEX_ERRORCHECK ++ ? COBALT_MUTEX_ERRORCHECK : 0); ++ mutex->attr = *attr; ++ INIT_LIST_HEAD(&mutex->conds); ++ ++ xnlock_get_irqsave(&nklock, s); ++ cobalt_add_resource(&mutex->resnode, mutex, attr->pshared); ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return 0; ++} ++ ++/* must be called with nklock locked, interrupts off. */ ++int __cobalt_mutex_acquire_unchecked(struct xnthread *cur, ++ struct cobalt_mutex *mutex, ++ const struct timespec *ts) ++{ ++ int ret; ++ ++ if (ts) { ++ if (ts->tv_nsec >= ONE_BILLION) ++ return -EINVAL; ++ ret = xnsynch_acquire(&mutex->synchbase, ts2ns(ts) + 1, XN_REALTIME); ++ } else ++ ret = xnsynch_acquire(&mutex->synchbase, XN_INFINITE, XN_RELATIVE); ++ ++ if (ret) { ++ if (ret & XNBREAK) ++ return -EINTR; ++ if (ret & XNTIMEO) ++ return -ETIMEDOUT; ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ ++int cobalt_mutex_release(struct xnthread *curr, ++ struct cobalt_mutex *mutex) ++{ /* nklock held, irqs off */ ++ struct cobalt_mutex_state *state; ++ struct cobalt_cond *cond; ++ unsigned long flags; ++ int need_resched; ++ ++ if (!cobalt_obj_active(mutex, COBALT_MUTEX_MAGIC, struct cobalt_mutex)) ++ return -EINVAL; ++ ++ if (mutex->resnode.scope != ++ cobalt_current_resources(mutex->attr.pshared)) ++ return -EPERM; ++ ++ /* ++ * We are about to release a mutex which is still pending PP ++ * (i.e. we never got scheduled out while holding it). Clear ++ * the lazy handle. ++ */ ++ if (mutex->resnode.handle == curr->u_window->pp_pending) ++ curr->u_window->pp_pending = XN_NO_HANDLE; ++ ++ state = container_of(mutex->synchbase.fastlock, struct cobalt_mutex_state, owner); ++ flags = state->flags; ++ need_resched = 0; ++ if ((flags & COBALT_MUTEX_COND_SIGNAL)) { ++ state->flags = flags & ~COBALT_MUTEX_COND_SIGNAL; ++ if (!list_empty(&mutex->conds)) { ++ list_for_each_entry(cond, &mutex->conds, mutex_link) ++ need_resched |= ++ cobalt_cond_deferred_signals(cond); ++ } ++ } ++ need_resched |= xnsynch_release(&mutex->synchbase, curr); ++ ++ return need_resched; ++} ++ ++int __cobalt_mutex_timedlock_break(struct cobalt_mutex_shadow __user *u_mx, ++ const void __user *u_ts, ++ int (*fetch_timeout)(struct timespec *ts, ++ const void __user *u_ts)) ++{ ++ struct xnthread *curr = xnthread_current(); ++ struct timespec ts, *tsp = NULL; ++ struct cobalt_mutex *mutex; ++ xnhandle_t handle; ++ spl_t s; ++ int ret; ++ ++ /* We need a valid thread handle for the fast lock. */ ++ if (curr->handle == XN_NO_HANDLE) ++ return -EPERM; ++ ++ handle = cobalt_get_handle_from_user(&u_mx->handle); ++redo: ++ xnlock_get_irqsave(&nklock, s); ++ ++ mutex = xnregistry_lookup(handle, NULL); ++ if (!cobalt_obj_active(mutex, COBALT_MUTEX_MAGIC, struct cobalt_mutex)) { ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ if (mutex->resnode.scope != ++ cobalt_current_resources(mutex->attr.pshared)) { ++ ret = -EPERM; ++ goto out; ++ } ++ ++ xnthread_commit_ceiling(curr); ++ ++ if (xnsynch_owner_check(&mutex->synchbase, curr)) { ++ if (fetch_timeout) { ++ xnlock_put_irqrestore(&nklock, s); ++ ret = fetch_timeout(&ts, u_ts); ++ if (ret) ++ return ret; ++ ++ fetch_timeout = NULL; ++ tsp = &ts; ++ goto redo; /* Revalidate handle. */ ++ } ++ ret = __cobalt_mutex_acquire_unchecked(curr, mutex, tsp); ++ xnlock_put_irqrestore(&nklock, s); ++ return ret; ++ } ++ ++ /* We already own the mutex, something looks wrong. */ ++ ++ ret = -EBUSY; ++ switch(mutex->attr.type) { ++ case PTHREAD_MUTEX_NORMAL: ++ /* Attempting to relock a normal mutex, deadlock. */ ++ if (IS_ENABLED(XENO_OPT_DEBUG_USER)) ++ printk(XENO_WARNING ++ "thread %s deadlocks on non-recursive mutex\n", ++ curr->name); ++ /* Make the caller hang. */ ++ __cobalt_mutex_acquire_unchecked(curr, mutex, NULL); ++ break; ++ ++ case PTHREAD_MUTEX_ERRORCHECK: ++ case PTHREAD_MUTEX_RECURSIVE: ++ /* ++ * Recursive mutexes are handled in user-space, so ++ * these cases should never happen. ++ */ ++ ret = -EINVAL; ++ break; ++ } ++out: ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return ret; ++} ++ ++COBALT_SYSCALL(mutex_check_init, current, ++ (struct cobalt_mutex_shadow __user *u_mx)) ++{ ++ struct cobalt_mutex *mutex; ++ xnhandle_t handle; ++ int err; ++ spl_t s; ++ ++ handle = cobalt_get_handle_from_user(&u_mx->handle); ++ ++ xnlock_get_irqsave(&nklock, s); ++ mutex = xnregistry_lookup(handle, NULL); ++ if (cobalt_obj_active(mutex, COBALT_MUTEX_MAGIC, typeof(*mutex))) ++ /* mutex is already in a queue. */ ++ err = -EBUSY; ++ else ++ err = 0; ++ ++ xnlock_put_irqrestore(&nklock, s); ++ return err; ++} ++ ++COBALT_SYSCALL(mutex_init, current, ++ (struct cobalt_mutex_shadow __user *u_mx, ++ const struct cobalt_mutexattr __user *u_attr)) ++{ ++ struct cobalt_mutex_state *state; ++ struct cobalt_mutex_shadow mx; ++ struct cobalt_mutexattr attr; ++ struct cobalt_mutex *mutex; ++ int ret; ++ ++ if (cobalt_copy_from_user(&mx, u_mx, sizeof(mx))) ++ return -EFAULT; ++ ++ if (cobalt_copy_from_user(&attr, u_attr, sizeof(attr))) ++ return -EFAULT; ++ ++ mutex = xnmalloc(sizeof(*mutex)); ++ if (mutex == NULL) ++ return -ENOMEM; ++ ++ state = cobalt_umm_alloc(&cobalt_ppd_get(attr.pshared)->umm, ++ sizeof(*state)); ++ if (state == NULL) { ++ xnfree(mutex); ++ return -EAGAIN; ++ } ++ ++ ret = cobalt_mutex_init_inner(&mx, mutex, state, &attr); ++ if (ret) { ++ xnfree(mutex); ++ cobalt_umm_free(&cobalt_ppd_get(attr.pshared)->umm, state); ++ return ret; ++ } ++ ++ return cobalt_copy_to_user(u_mx, &mx, sizeof(*u_mx)); ++} ++ ++COBALT_SYSCALL(mutex_destroy, current, ++ (struct cobalt_mutex_shadow __user *u_mx)) ++{ ++ struct cobalt_mutex_shadow mx; ++ struct cobalt_mutex *mutex; ++ spl_t s; ++ int ret; ++ ++ if (cobalt_copy_from_user(&mx, u_mx, sizeof(mx))) ++ return -EFAULT; ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ mutex = xnregistry_lookup(mx.handle, NULL); ++ if (!cobalt_obj_active(mutex, COBALT_MUTEX_MAGIC, typeof(*mutex))) { ++ ret = -EINVAL; ++ goto fail; ++ } ++ if (cobalt_current_resources(mutex->attr.pshared) != ++ mutex->resnode.scope) { ++ ret = -EPERM; ++ goto fail; ++ } ++ if (xnsynch_fast_owner_check(mutex->synchbase.fastlock, ++ XN_NO_HANDLE) != 0 || ++ !list_empty(&mutex->conds)) { ++ ret = -EBUSY; ++ goto fail; ++ } ++ ++ cobalt_mutex_reclaim(&mutex->resnode, s); /* drops lock */ ++ ++ cobalt_mark_deleted(&mx); ++ ++ return cobalt_copy_to_user(u_mx, &mx, sizeof(*u_mx)); ++fail: ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return ret; ++} ++ ++COBALT_SYSCALL(mutex_trylock, primary, ++ (struct cobalt_mutex_shadow __user *u_mx)) ++{ ++ struct xnthread *curr = xnthread_current(); ++ struct cobalt_mutex *mutex; ++ xnhandle_t handle; ++ spl_t s; ++ int ret; ++ ++ handle = cobalt_get_handle_from_user(&u_mx->handle); ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ mutex = xnregistry_lookup(handle, NULL); ++ if (!cobalt_obj_active(mutex, COBALT_MUTEX_MAGIC, typeof(*mutex))) { ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ xnthread_commit_ceiling(curr); ++ ++ ret = xnsynch_try_acquire(&mutex->synchbase); ++ ++out: ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return ret; ++} ++ ++COBALT_SYSCALL(mutex_lock, primary, ++ (struct cobalt_mutex_shadow __user *u_mx)) ++{ ++ return __cobalt_mutex_timedlock_break(u_mx, NULL, NULL); ++} ++ ++static inline int mutex_fetch_timeout(struct timespec *ts, ++ const void __user *u_ts) ++{ ++ return u_ts == NULL ? -EFAULT : ++ cobalt_copy_from_user(ts, u_ts, sizeof(*ts)); ++} ++ ++COBALT_SYSCALL(mutex_timedlock, primary, ++ (struct cobalt_mutex_shadow __user *u_mx, ++ const struct timespec __user *u_ts)) ++{ ++ return __cobalt_mutex_timedlock_break(u_mx, u_ts, mutex_fetch_timeout); ++} ++ ++COBALT_SYSCALL(mutex_unlock, nonrestartable, ++ (struct cobalt_mutex_shadow __user *u_mx)) ++{ ++ struct cobalt_mutex *mutex; ++ struct xnthread *curr; ++ xnhandle_t handle; ++ int ret; ++ spl_t s; ++ ++ handle = cobalt_get_handle_from_user(&u_mx->handle); ++ curr = xnthread_current(); ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ mutex = xnregistry_lookup(handle, NULL); ++ ret = cobalt_mutex_release(curr, mutex); ++ if (ret > 0) { ++ xnsched_run(); ++ ret = 0; ++ } ++ ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return ret; ++} ++ ++void cobalt_mutex_reclaim(struct cobalt_resnode *node, spl_t s) ++{ ++ struct cobalt_mutex_state *state; ++ struct cobalt_mutex *mutex; ++ int pshared; ++ ++ mutex = container_of(node, struct cobalt_mutex, resnode); ++ state = container_of(mutex->synchbase.fastlock, struct cobalt_mutex_state, owner); ++ pshared = mutex->attr.pshared; ++ xnregistry_remove(node->handle); ++ cobalt_del_resource(node); ++ xnsynch_destroy(&mutex->synchbase); ++ cobalt_mark_deleted(mutex); ++ xnlock_put_irqrestore(&nklock, s); ++ ++ cobalt_umm_free(&cobalt_ppd_get(pshared)->umm, state); ++ xnfree(mutex); ++} ++ ++struct xnsynch *lookup_lazy_pp(xnhandle_t handle) ++{ /* nklock held, irqs off */ ++ struct cobalt_mutex *mutex; ++ ++ /* Only mutexes may be PP-enabled. */ ++ ++ mutex = xnregistry_lookup(handle, NULL); ++ if (mutex == NULL || ++ !cobalt_obj_active(mutex, COBALT_MUTEX_MAGIC, struct cobalt_mutex) || ++ mutex->attr.protocol != PTHREAD_PRIO_PROTECT) ++ return NULL; ++ ++ return &mutex->synchbase; ++} +--- linux/kernel/xenomai/posix/io.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/posix/io.c 2021-04-07 16:01:26.069635821 +0800 +@@ -0,0 +1,342 @@ ++/* ++ * Copyright (C) 2005 Jan Kiszka . ++ * Copyright (C) 2005 Joerg Langenberg . ++ * Copyright (C) 2008 Gilles Chanteperdrix ++ * ++ * Xenomai is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#include ++#include ++#include ++#include ++#include "process.h" ++#include "internal.h" ++#include "clock.h" ++#include "io.h" ++ ++COBALT_SYSCALL(open, lostage, ++ (const char __user *u_path, int oflag)) ++{ ++ struct filename *filename; ++ int ufd; ++ ++ filename = getname(u_path); ++ if (IS_ERR(filename)) ++ return PTR_ERR(filename); ++ ++ ufd = __rtdm_dev_open(filename->name, oflag); ++ putname(filename); ++ ++ return ufd; ++} ++ ++COBALT_SYSCALL(socket, lostage, ++ (int protocol_family, int socket_type, int protocol)) ++{ ++ return __rtdm_dev_socket(protocol_family, socket_type, protocol); ++} ++ ++COBALT_SYSCALL(close, lostage, (int fd)) ++{ ++ return rtdm_fd_close(fd, 0); ++} ++ ++COBALT_SYSCALL(fcntl, current, (int fd, int cmd, long arg)) ++{ ++ return rtdm_fd_fcntl(fd, cmd, arg); ++} ++ ++COBALT_SYSCALL(ioctl, handover, ++ (int fd, unsigned int request, void __user *arg)) ++{ ++ return rtdm_fd_ioctl(fd, request, arg); ++} ++ ++COBALT_SYSCALL(read, handover, ++ (int fd, void __user *buf, size_t size)) ++{ ++ return rtdm_fd_read(fd, buf, size); ++} ++ ++COBALT_SYSCALL(write, handover, ++ (int fd, const void __user *buf, size_t size)) ++{ ++ return rtdm_fd_write(fd, buf, size); ++} ++ ++COBALT_SYSCALL(recvmsg, handover, ++ (int fd, struct user_msghdr __user *umsg, int flags)) ++{ ++ struct user_msghdr m; ++ ssize_t ret; ++ ++ ret = cobalt_copy_from_user(&m, umsg, sizeof(m)); ++ if (ret) ++ return ret; ++ ++ ret = rtdm_fd_recvmsg(fd, &m, flags); ++ if (ret < 0) ++ return ret; ++ ++ return cobalt_copy_to_user(umsg, &m, sizeof(*umsg)) ?: ret; ++} ++ ++static int get_timespec(struct timespec *ts, ++ const void __user *u_ts) ++{ ++ return cobalt_copy_from_user(ts, u_ts, sizeof(*ts)); ++} ++ ++static int get_mmsg(struct mmsghdr *mmsg, void __user *u_mmsg) ++{ ++ return cobalt_copy_from_user(mmsg, u_mmsg, sizeof(*mmsg)); ++} ++ ++static int put_mmsg(void __user **u_mmsg_p, const struct mmsghdr *mmsg) ++{ ++ struct mmsghdr __user **p = (struct mmsghdr **)u_mmsg_p, ++ *q __user = (*p)++; ++ ++ return cobalt_copy_to_user(q, mmsg, sizeof(*q)); ++} ++ ++COBALT_SYSCALL(recvmmsg, primary, ++ (int fd, struct mmsghdr __user *u_msgvec, unsigned int vlen, ++ unsigned int flags, struct timespec *u_timeout)) ++{ ++ return __rtdm_fd_recvmmsg(fd, u_msgvec, vlen, flags, u_timeout, ++ get_mmsg, put_mmsg, get_timespec); ++} ++ ++COBALT_SYSCALL(sendmsg, handover, ++ (int fd, struct user_msghdr __user *umsg, int flags)) ++{ ++ struct user_msghdr m; ++ int ret; ++ ++ ret = cobalt_copy_from_user(&m, umsg, sizeof(m)); ++ ++ return ret ?: rtdm_fd_sendmsg(fd, &m, flags); ++} ++ ++static int put_mmsglen(void __user **u_mmsg_p, const struct mmsghdr *mmsg) ++{ ++ struct mmsghdr __user **p = (struct mmsghdr **)u_mmsg_p, ++ *q __user = (*p)++; ++ ++ return __xn_put_user(mmsg->msg_len, &q->msg_len); ++} ++ ++COBALT_SYSCALL(sendmmsg, primary, ++ (int fd, struct mmsghdr __user *u_msgvec, ++ unsigned int vlen, unsigned int flags)) ++{ ++ return __rtdm_fd_sendmmsg(fd, u_msgvec, vlen, flags, ++ get_mmsg, put_mmsglen); ++} ++ ++COBALT_SYSCALL(mmap, lostage, ++ (int fd, struct _rtdm_mmap_request __user *u_rma, ++ void __user **u_addrp)) ++{ ++ struct _rtdm_mmap_request rma; ++ void *u_addr = NULL; ++ int ret; ++ ++ ret = cobalt_copy_from_user(&rma, u_rma, sizeof(rma)); ++ if (ret) ++ return ret; ++ ++ ret = rtdm_fd_mmap(fd, &rma, &u_addr); ++ if (ret) ++ return ret; ++ ++ return cobalt_copy_to_user(u_addrp, &u_addr, sizeof(u_addr)); ++} ++ ++int __cobalt_first_fd_valid_p(fd_set *fds[XNSELECT_MAX_TYPES], int nfds) ++{ ++ int i, fd; ++ ++ for (i = 0; i < XNSELECT_MAX_TYPES; i++) ++ if (fds[i] ++ && (fd = find_first_bit(fds[i]->fds_bits, nfds)) < nfds) ++ return rtdm_fd_valid_p(fd); ++ ++ /* All empty is correct, used as a "sleep" mechanism by strange ++ applications. */ ++ return 1; ++} ++ ++static int select_bind_one(struct xnselector *selector, unsigned type, int fd) ++{ ++ int rc; ++ ++ rc = rtdm_fd_select(fd, selector, type); ++ if (rc != -ENOENT) ++ return rc; ++ ++ return -EBADF; ++} ++ ++int __cobalt_select_bind_all(struct xnselector *selector, ++ fd_set *fds[XNSELECT_MAX_TYPES], int nfds) ++{ ++ unsigned fd, type; ++ int err; ++ ++ for (type = 0; type < XNSELECT_MAX_TYPES; type++) { ++ fd_set *set = fds[type]; ++ if (set) ++ for (fd = find_first_bit(set->fds_bits, nfds); ++ fd < nfds; ++ fd = find_next_bit(set->fds_bits, nfds, fd + 1)) { ++ err = select_bind_one(selector, type, fd); ++ if (err) ++ return err; ++ } ++ } ++ ++ return 0; ++} ++ ++/* int select(int, fd_set *, fd_set *, fd_set *, struct timeval *) */ ++COBALT_SYSCALL(select, primary, ++ (int nfds, ++ fd_set __user *u_rfds, ++ fd_set __user *u_wfds, ++ fd_set __user *u_xfds, ++ struct timeval __user *u_tv)) ++{ ++ fd_set __user *ufd_sets[XNSELECT_MAX_TYPES] = { ++ [XNSELECT_READ] = u_rfds, ++ [XNSELECT_WRITE] = u_wfds, ++ [XNSELECT_EXCEPT] = u_xfds ++ }; ++ fd_set *in_fds[XNSELECT_MAX_TYPES] = {NULL, NULL, NULL}; ++ fd_set *out_fds[XNSELECT_MAX_TYPES] = {NULL, NULL, NULL}; ++ fd_set in_fds_storage[XNSELECT_MAX_TYPES], ++ out_fds_storage[XNSELECT_MAX_TYPES]; ++ xnticks_t timeout = XN_INFINITE; ++ struct restart_block *restart; ++ xntmode_t mode = XN_RELATIVE; ++ struct xnselector *selector; ++ struct xnthread *curr; ++ struct timeval tv; ++ size_t fds_size; ++ int i, err; ++ ++ curr = xnthread_current(); ++ ++ if (u_tv) { ++ if (xnthread_test_localinfo(curr, XNSYSRST)) { ++ xnthread_clear_localinfo(curr, XNSYSRST); ++ ++ restart = cobalt_get_restart_block(current); ++ timeout = restart->nanosleep.expires; ++ ++ if (restart->fn != cobalt_restart_syscall_placeholder) { ++ err = -EINTR; ++ goto out; ++ } ++ } else { ++ if (!access_wok(u_tv, sizeof(tv)) ++ || cobalt_copy_from_user(&tv, u_tv, sizeof(tv))) ++ return -EFAULT; ++ ++ if (tv.tv_usec >= 1000000) ++ return -EINVAL; ++ ++ timeout = clock_get_ticks(CLOCK_MONOTONIC) + tv2ns(&tv); ++ } ++ ++ mode = XN_ABSOLUTE; ++ } ++ ++ fds_size = __FDELT__(nfds + __NFDBITS__ - 1) * sizeof(long); ++ ++ for (i = 0; i < XNSELECT_MAX_TYPES; i++) ++ if (ufd_sets[i]) { ++ in_fds[i] = &in_fds_storage[i]; ++ out_fds[i] = & out_fds_storage[i]; ++ if (!access_wok((void __user *) ufd_sets[i], ++ sizeof(fd_set)) ++ || cobalt_copy_from_user(in_fds[i], ++ (void __user *) ufd_sets[i], ++ fds_size)) ++ return -EFAULT; ++ } ++ ++ selector = curr->selector; ++ if (!selector) { ++ /* This function may be called from pure Linux fd_sets, we want ++ to avoid the xnselector allocation in this case, so, we do a ++ simple test: test if the first file descriptor we find in the ++ fd_set is an RTDM descriptor or a message queue descriptor. */ ++ if (!__cobalt_first_fd_valid_p(in_fds, nfds)) ++ return -EBADF; ++ ++ selector = xnmalloc(sizeof(*curr->selector)); ++ if (selector == NULL) ++ return -ENOMEM; ++ xnselector_init(selector); ++ curr->selector = selector; ++ ++ /* Bind directly the file descriptors, we do not need to go ++ through xnselect returning -ECHRNG */ ++ if ((err = __cobalt_select_bind_all(selector, in_fds, nfds))) ++ return err; ++ } ++ ++ do { ++ err = xnselect(selector, out_fds, in_fds, nfds, timeout, mode); ++ ++ if (err == -ECHRNG) { ++ int err = __cobalt_select_bind_all(selector, out_fds, nfds); ++ if (err) ++ return err; ++ } ++ } while (err == -ECHRNG); ++ ++ if (err == -EINTR && signal_pending(current)) { ++ xnthread_set_localinfo(curr, XNSYSRST); ++ ++ restart = cobalt_get_restart_block(current); ++ restart->fn = cobalt_restart_syscall_placeholder; ++ restart->nanosleep.expires = timeout; ++ ++ return -ERESTARTSYS; ++ } ++ ++out: ++ if (u_tv && (err > 0 || err == -EINTR)) { ++ xnsticks_t diff = timeout - clock_get_ticks(CLOCK_MONOTONIC); ++ if (diff > 0) ++ ticks2tv(&tv, diff); ++ else ++ tv.tv_sec = tv.tv_usec = 0; ++ ++ if (cobalt_copy_to_user(u_tv, &tv, sizeof(tv))) ++ return -EFAULT; ++ } ++ ++ if (err >= 0) ++ for (i = 0; i < XNSELECT_MAX_TYPES; i++) ++ if (ufd_sets[i] ++ && cobalt_copy_to_user((void __user *) ufd_sets[i], ++ out_fds[i], sizeof(fd_set))) ++ return -EFAULT; ++ return err; ++} +--- linux/kernel/xenomai/posix/compat.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/posix/compat.c 2021-04-07 16:01:26.065635827 +0800 +@@ -0,0 +1,486 @@ ++/* ++ * Copyright (C) 2014 Philippe Gerum ++ * ++ * Xenomai is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++int sys32_get_timespec(struct timespec *ts, ++ const struct compat_timespec __user *cts) ++{ ++ return (cts == NULL || ++ !access_rok(cts, sizeof(*cts)) || ++ __xn_get_user(ts->tv_sec, &cts->tv_sec) || ++ __xn_get_user(ts->tv_nsec, &cts->tv_nsec)) ? -EFAULT : 0; ++} ++EXPORT_SYMBOL_GPL(sys32_get_timespec); ++ ++int sys32_put_timespec(struct compat_timespec __user *cts, ++ const struct timespec *ts) ++{ ++ return (cts == NULL || ++ !access_wok(cts, sizeof(*cts)) || ++ __xn_put_user(ts->tv_sec, &cts->tv_sec) || ++ __xn_put_user(ts->tv_nsec, &cts->tv_nsec)) ? -EFAULT : 0; ++} ++EXPORT_SYMBOL_GPL(sys32_put_timespec); ++ ++int sys32_get_itimerspec(struct itimerspec *its, ++ const struct compat_itimerspec __user *cits) ++{ ++ int ret = sys32_get_timespec(&its->it_value, &cits->it_value); ++ ++ return ret ?: sys32_get_timespec(&its->it_interval, &cits->it_interval); ++} ++EXPORT_SYMBOL_GPL(sys32_get_itimerspec); ++ ++int sys32_put_itimerspec(struct compat_itimerspec __user *cits, ++ const struct itimerspec *its) ++{ ++ int ret = sys32_put_timespec(&cits->it_value, &its->it_value); ++ ++ return ret ?: sys32_put_timespec(&cits->it_interval, &its->it_interval); ++} ++EXPORT_SYMBOL_GPL(sys32_put_itimerspec); ++ ++int sys32_get_timeval(struct timeval *tv, ++ const struct compat_timeval __user *ctv) ++{ ++ return (ctv == NULL || ++ !access_rok(ctv, sizeof(*ctv)) || ++ __xn_get_user(tv->tv_sec, &ctv->tv_sec) || ++ __xn_get_user(tv->tv_usec, &ctv->tv_usec)) ? -EFAULT : 0; ++} ++EXPORT_SYMBOL_GPL(sys32_get_timeval); ++ ++int sys32_put_timeval(struct compat_timeval __user *ctv, ++ const struct timeval *tv) ++{ ++ return (ctv == NULL || ++ !access_wok(ctv, sizeof(*ctv)) || ++ __xn_put_user(tv->tv_sec, &ctv->tv_sec) || ++ __xn_put_user(tv->tv_usec, &ctv->tv_usec)) ? -EFAULT : 0; ++} ++EXPORT_SYMBOL_GPL(sys32_put_timeval); ++ ++int sys32_get_timex(struct timex *tx, ++ const struct compat_timex __user *ctx) ++{ ++ memset(tx, 0, sizeof(*tx)); ++ ++ if (!access_rok(ctx, sizeof(*ctx)) || ++ __xn_get_user(tx->modes, &ctx->modes) || ++ __xn_get_user(tx->offset, &ctx->offset) || ++ __xn_get_user(tx->freq, &ctx->freq) || ++ __xn_get_user(tx->maxerror, &ctx->maxerror) || ++ __xn_get_user(tx->esterror, &ctx->esterror) || ++ __xn_get_user(tx->status, &ctx->status) || ++ __xn_get_user(tx->constant, &ctx->constant) || ++ __xn_get_user(tx->precision, &ctx->precision) || ++ __xn_get_user(tx->tolerance, &ctx->tolerance) || ++ __xn_get_user(tx->time.tv_sec, &ctx->time.tv_sec) || ++ __xn_get_user(tx->time.tv_usec, &ctx->time.tv_usec) || ++ __xn_get_user(tx->tick, &ctx->tick) || ++ __xn_get_user(tx->ppsfreq, &ctx->ppsfreq) || ++ __xn_get_user(tx->jitter, &ctx->jitter) || ++ __xn_get_user(tx->shift, &ctx->shift) || ++ __xn_get_user(tx->stabil, &ctx->stabil) || ++ __xn_get_user(tx->jitcnt, &ctx->jitcnt) || ++ __xn_get_user(tx->calcnt, &ctx->calcnt) || ++ __xn_get_user(tx->errcnt, &ctx->errcnt) || ++ __xn_get_user(tx->stbcnt, &ctx->stbcnt)) ++ return -EFAULT; ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(sys32_get_timex); ++ ++int sys32_put_timex(struct compat_timex __user *ctx, ++ const struct timex *tx) ++{ ++ if (!access_wok(ctx, sizeof(*ctx)) || ++ __xn_put_user(tx->modes, &ctx->modes) || ++ __xn_put_user(tx->offset, &ctx->offset) || ++ __xn_put_user(tx->freq, &ctx->freq) || ++ __xn_put_user(tx->maxerror, &ctx->maxerror) || ++ __xn_put_user(tx->esterror, &ctx->esterror) || ++ __xn_put_user(tx->status, &ctx->status) || ++ __xn_put_user(tx->constant, &ctx->constant) || ++ __xn_put_user(tx->precision, &ctx->precision) || ++ __xn_put_user(tx->tolerance, &ctx->tolerance) || ++ __xn_put_user(tx->time.tv_sec, &ctx->time.tv_sec) || ++ __xn_put_user(tx->time.tv_usec, &ctx->time.tv_usec) || ++ __xn_put_user(tx->tick, &ctx->tick) || ++ __xn_put_user(tx->ppsfreq, &ctx->ppsfreq) || ++ __xn_put_user(tx->jitter, &ctx->jitter) || ++ __xn_put_user(tx->shift, &ctx->shift) || ++ __xn_put_user(tx->stabil, &ctx->stabil) || ++ __xn_put_user(tx->jitcnt, &ctx->jitcnt) || ++ __xn_put_user(tx->calcnt, &ctx->calcnt) || ++ __xn_put_user(tx->errcnt, &ctx->errcnt) || ++ __xn_put_user(tx->stbcnt, &ctx->stbcnt)) ++ return -EFAULT; ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(sys32_put_timex); ++ ++ssize_t sys32_get_fdset(fd_set *fds, const compat_fd_set __user *cfds, ++ size_t cfdsize) ++{ ++ int rdpos, wrpos, rdlim = cfdsize / sizeof(compat_ulong_t); ++ ++ if (cfds == NULL || !access_rok(cfds, cfdsize)) ++ return -EFAULT; ++ ++ for (rdpos = 0, wrpos = 0; rdpos < rdlim; rdpos++, wrpos++) ++ if (__xn_get_user(fds->fds_bits[wrpos], cfds->fds_bits + rdpos)) ++ return -EFAULT; ++ ++ return (ssize_t)rdlim * sizeof(long); ++} ++EXPORT_SYMBOL_GPL(sys32_get_fdset); ++ ++ssize_t sys32_put_fdset(compat_fd_set __user *cfds, const fd_set *fds, ++ size_t fdsize) ++{ ++ int rdpos, wrpos, wrlim = fdsize / sizeof(long); ++ ++ if (cfds == NULL || !access_wok(cfds, wrlim * sizeof(compat_ulong_t))) ++ return -EFAULT; ++ ++ for (rdpos = 0, wrpos = 0; wrpos < wrlim; rdpos++, wrpos++) ++ if (__xn_put_user(fds->fds_bits[rdpos], cfds->fds_bits + wrpos)) ++ return -EFAULT; ++ ++ return (ssize_t)wrlim * sizeof(compat_ulong_t); ++} ++EXPORT_SYMBOL_GPL(sys32_put_fdset); ++ ++int sys32_get_param_ex(int policy, ++ struct sched_param_ex *p, ++ const struct compat_sched_param_ex __user *u_cp) ++{ ++ struct compat_sched_param_ex cpex; ++ ++ if (u_cp == NULL || cobalt_copy_from_user(&cpex, u_cp, sizeof(cpex))) ++ return -EFAULT; ++ ++ p->sched_priority = cpex.sched_priority; ++ ++ switch (policy) { ++ case SCHED_SPORADIC: ++ p->sched_ss_low_priority = cpex.sched_ss_low_priority; ++ p->sched_ss_max_repl = cpex.sched_ss_max_repl; ++ p->sched_ss_repl_period.tv_sec = cpex.sched_ss_repl_period.tv_sec; ++ p->sched_ss_repl_period.tv_nsec = cpex.sched_ss_repl_period.tv_nsec; ++ p->sched_ss_init_budget.tv_sec = cpex.sched_ss_init_budget.tv_sec; ++ p->sched_ss_init_budget.tv_nsec = cpex.sched_ss_init_budget.tv_nsec; ++ break; ++ case SCHED_RR: ++ p->sched_rr_quantum.tv_sec = cpex.sched_rr_quantum.tv_sec; ++ p->sched_rr_quantum.tv_nsec = cpex.sched_rr_quantum.tv_nsec; ++ break; ++ case SCHED_TP: ++ p->sched_tp_partition = cpex.sched_tp_partition; ++ break; ++ case SCHED_QUOTA: ++ p->sched_quota_group = cpex.sched_quota_group; ++ break; ++ } ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(sys32_get_param_ex); ++ ++int sys32_put_param_ex(int policy, ++ struct compat_sched_param_ex __user *u_cp, ++ const struct sched_param_ex *p) ++{ ++ struct compat_sched_param_ex cpex; ++ ++ if (u_cp == NULL) ++ return -EFAULT; ++ ++ cpex.sched_priority = p->sched_priority; ++ ++ switch (policy) { ++ case SCHED_SPORADIC: ++ cpex.sched_ss_low_priority = p->sched_ss_low_priority; ++ cpex.sched_ss_max_repl = p->sched_ss_max_repl; ++ cpex.sched_ss_repl_period.tv_sec = p->sched_ss_repl_period.tv_sec; ++ cpex.sched_ss_repl_period.tv_nsec = p->sched_ss_repl_period.tv_nsec; ++ cpex.sched_ss_init_budget.tv_sec = p->sched_ss_init_budget.tv_sec; ++ cpex.sched_ss_init_budget.tv_nsec = p->sched_ss_init_budget.tv_nsec; ++ break; ++ case SCHED_RR: ++ cpex.sched_rr_quantum.tv_sec = p->sched_rr_quantum.tv_sec; ++ cpex.sched_rr_quantum.tv_nsec = p->sched_rr_quantum.tv_nsec; ++ break; ++ case SCHED_TP: ++ cpex.sched_tp_partition = p->sched_tp_partition; ++ break; ++ case SCHED_QUOTA: ++ cpex.sched_quota_group = p->sched_quota_group; ++ break; ++ } ++ ++ return cobalt_copy_to_user(u_cp, &cpex, sizeof(cpex)); ++} ++EXPORT_SYMBOL_GPL(sys32_put_param_ex); ++ ++int sys32_get_mqattr(struct mq_attr *ap, ++ const struct compat_mq_attr __user *u_cap) ++{ ++ struct compat_mq_attr cattr; ++ ++ if (u_cap == NULL || ++ cobalt_copy_from_user(&cattr, u_cap, sizeof(cattr))) ++ return -EFAULT; ++ ++ ap->mq_flags = cattr.mq_flags; ++ ap->mq_maxmsg = cattr.mq_maxmsg; ++ ap->mq_msgsize = cattr.mq_msgsize; ++ ap->mq_curmsgs = cattr.mq_curmsgs; ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(sys32_get_mqattr); ++ ++int sys32_put_mqattr(struct compat_mq_attr __user *u_cap, ++ const struct mq_attr *ap) ++{ ++ struct compat_mq_attr cattr; ++ ++ cattr.mq_flags = ap->mq_flags; ++ cattr.mq_maxmsg = ap->mq_maxmsg; ++ cattr.mq_msgsize = ap->mq_msgsize; ++ cattr.mq_curmsgs = ap->mq_curmsgs; ++ ++ return u_cap == NULL ? -EFAULT : ++ cobalt_copy_to_user(u_cap, &cattr, sizeof(cattr)); ++} ++EXPORT_SYMBOL_GPL(sys32_put_mqattr); ++ ++int sys32_get_sigevent(struct sigevent *ev, ++ const struct compat_sigevent *__user u_cev) ++{ ++ struct compat_sigevent cev; ++ compat_int_t *cp; ++ int ret, *p; ++ ++ if (u_cev == NULL) ++ return -EFAULT; ++ ++ ret = cobalt_copy_from_user(&cev, u_cev, sizeof(cev)); ++ if (ret) ++ return ret; ++ ++ memset(ev, 0, sizeof(*ev)); ++ ev->sigev_value.sival_ptr = compat_ptr(cev.sigev_value.sival_ptr); ++ ev->sigev_signo = cev.sigev_signo; ++ ev->sigev_notify = cev.sigev_notify; ++ /* ++ * Extensions may define extra fields we don't know about in ++ * the padding area, so we have to load it entirely. ++ */ ++ p = ev->_sigev_un._pad; ++ cp = cev._sigev_un._pad; ++ while (p < &ev->_sigev_un._pad[ARRAY_SIZE(ev->_sigev_un._pad)] && ++ cp < &cev._sigev_un._pad[ARRAY_SIZE(cev._sigev_un._pad)]) ++ *p++ = *cp++; ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(sys32_get_sigevent); ++ ++int sys32_get_sigset(sigset_t *set, const compat_sigset_t *u_cset) ++{ ++ return get_compat_sigset(set, u_cset); ++} ++EXPORT_SYMBOL_GPL(sys32_get_sigset); ++ ++int sys32_put_sigset(compat_sigset_t *u_cset, const sigset_t *set) ++{ ++ return put_compat_sigset(u_cset, set, sizeof(*u_cset)); ++} ++EXPORT_SYMBOL_GPL(sys32_put_sigset); ++ ++int sys32_get_sigval(union sigval *val, const union compat_sigval *u_cval) ++{ ++ union compat_sigval cval; ++ int ret; ++ ++ if (u_cval == NULL) ++ return -EFAULT; ++ ++ ret = cobalt_copy_from_user(&cval, u_cval, sizeof(cval)); ++ if (ret) ++ return ret; ++ ++ val->sival_ptr = compat_ptr(cval.sival_ptr); ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(sys32_get_sigval); ++ ++int sys32_put_siginfo(void __user *u_si, const struct siginfo *si, ++ int overrun) ++{ ++ struct compat_siginfo __user *u_p = u_si; ++ int ret; ++ ++ if (u_p == NULL) ++ return -EFAULT; ++ ++ ret = __xn_put_user(si->si_signo, &u_p->si_signo); ++ ret |= __xn_put_user(si->si_errno, &u_p->si_errno); ++ ret |= __xn_put_user(si->si_code, &u_p->si_code); ++ ++ /* ++ * Copy the generic/standard siginfo bits to userland. ++ */ ++ switch (si->si_code) { ++ case SI_TIMER: ++ ret |= __xn_put_user(si->si_tid, &u_p->si_tid); ++ ret |= __xn_put_user(ptr_to_compat(si->si_ptr), &u_p->si_ptr); ++ ret |= __xn_put_user(overrun, &u_p->si_overrun); ++ break; ++ case SI_QUEUE: ++ case SI_MESGQ: ++ ret |= __xn_put_user(ptr_to_compat(si->si_ptr), &u_p->si_ptr); ++ /* falldown wanted. */ ++ case SI_USER: ++ ret |= __xn_put_user(si->si_pid, &u_p->si_pid); ++ ret |= __xn_put_user(si->si_uid, &u_p->si_uid); ++ } ++ ++ return ret; ++} ++EXPORT_SYMBOL_GPL(sys32_put_siginfo); ++ ++int sys32_get_msghdr(struct user_msghdr *msg, ++ const struct compat_msghdr __user *u_cmsg) ++{ ++ compat_uptr_t tmp1, tmp2, tmp3; ++ ++ if (u_cmsg == NULL || ++ !access_rok(u_cmsg, sizeof(*u_cmsg)) || ++ __xn_get_user(tmp1, &u_cmsg->msg_name) || ++ __xn_get_user(msg->msg_namelen, &u_cmsg->msg_namelen) || ++ __xn_get_user(tmp2, &u_cmsg->msg_iov) || ++ __xn_get_user(msg->msg_iovlen, &u_cmsg->msg_iovlen) || ++ __xn_get_user(tmp3, &u_cmsg->msg_control) || ++ __xn_get_user(msg->msg_controllen, &u_cmsg->msg_controllen) || ++ __xn_get_user(msg->msg_flags, &u_cmsg->msg_flags)) ++ return -EFAULT; ++ ++ if (msg->msg_namelen > sizeof(struct sockaddr_storage)) ++ msg->msg_namelen = sizeof(struct sockaddr_storage); ++ ++ msg->msg_name = compat_ptr(tmp1); ++ msg->msg_iov = compat_ptr(tmp2); ++ msg->msg_control = compat_ptr(tmp3); ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(sys32_get_msghdr); ++ ++int sys32_get_mmsghdr(struct mmsghdr *mmsg, ++ const struct compat_mmsghdr __user *u_cmmsg) ++{ ++ if (u_cmmsg == NULL || ++ !access_rok(u_cmmsg, sizeof(*u_cmmsg)) || ++ __xn_get_user(mmsg->msg_len, &u_cmmsg->msg_len)) ++ return -EFAULT; ++ ++ return sys32_get_msghdr(&mmsg->msg_hdr, &u_cmmsg->msg_hdr); ++} ++EXPORT_SYMBOL_GPL(sys32_get_mmsghdr); ++ ++int sys32_put_msghdr(struct compat_msghdr __user *u_cmsg, ++ const struct user_msghdr *msg) ++{ ++ if (u_cmsg == NULL || ++ !access_wok(u_cmsg, sizeof(*u_cmsg)) || ++ __xn_put_user(ptr_to_compat(msg->msg_name), &u_cmsg->msg_name) || ++ __xn_put_user(msg->msg_namelen, &u_cmsg->msg_namelen) || ++ __xn_put_user(ptr_to_compat(msg->msg_iov), &u_cmsg->msg_iov) || ++ __xn_put_user(msg->msg_iovlen, &u_cmsg->msg_iovlen) || ++ __xn_put_user(ptr_to_compat(msg->msg_control), &u_cmsg->msg_control) || ++ __xn_put_user(msg->msg_controllen, &u_cmsg->msg_controllen) || ++ __xn_put_user(msg->msg_flags, &u_cmsg->msg_flags)) ++ return -EFAULT; ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(sys32_put_msghdr); ++ ++int sys32_put_mmsghdr(struct compat_mmsghdr __user *u_cmmsg, ++ const struct mmsghdr *mmsg) ++{ ++ if (u_cmmsg == NULL || ++ !access_wok(u_cmmsg, sizeof(*u_cmmsg)) || ++ __xn_put_user(mmsg->msg_len, &u_cmmsg->msg_len)) ++ return -EFAULT; ++ ++ return sys32_put_msghdr(&u_cmmsg->msg_hdr, &mmsg->msg_hdr); ++} ++EXPORT_SYMBOL_GPL(sys32_put_mmsghdr); ++ ++int sys32_get_iovec(struct iovec *iov, ++ const struct compat_iovec __user *u_ciov, ++ int ciovlen) ++{ ++ const struct compat_iovec __user *p; ++ struct compat_iovec ciov; ++ int ret, n; ++ ++ for (n = 0, p = u_ciov; n < ciovlen; n++, p++) { ++ ret = cobalt_copy_from_user(&ciov, p, sizeof(ciov)); ++ if (ret) ++ return ret; ++ iov[n].iov_base = compat_ptr(ciov.iov_base); ++ iov[n].iov_len = ciov.iov_len; ++ } ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(sys32_get_iovec); ++ ++int sys32_put_iovec(struct compat_iovec __user *u_ciov, ++ const struct iovec *iov, ++ int iovlen) ++{ ++ struct compat_iovec __user *p; ++ struct compat_iovec ciov; ++ int ret, n; ++ ++ for (n = 0, p = u_ciov; n < iovlen; n++, p++) { ++ ciov.iov_base = ptr_to_compat(iov[n].iov_base); ++ ciov.iov_len = iov[n].iov_len; ++ ret = cobalt_copy_to_user(p, &ciov, sizeof(*p)); ++ if (ret) ++ return ret; ++ } ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(sys32_put_iovec); +--- linux/kernel/xenomai/posix/monitor.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/posix/monitor.c 2021-04-07 16:01:26.060635834 +0800 +@@ -0,0 +1,435 @@ ++/* ++ * Copyright (C) 2011 Philippe Gerum ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#include "internal.h" ++#include "thread.h" ++#include "clock.h" ++#include "monitor.h" ++#include ++ ++/* ++ * The Cobalt monitor is a double-wait condition object, serializing ++ * accesses through a gate. It behaves like a mutex + two condition ++ * variables combo with extended signaling logic. Folding several ++ * conditions and the serialization support into a single object ++ * performs better on low end hw caches and allows for specific ++ * optimizations, compared to using separate general-purpose mutex and ++ * condvars. This object is used by the Copperplate interface ++ * internally when it runs over the Cobalt core. ++ * ++ * Threads can wait for some resource(s) to be granted (consumer ++ * side), or wait for the available resource(s) to drain (producer ++ * side). Therefore, signals are thread-directed for the grant side, ++ * and monitor-directed for the drain side. ++ * ++ * Typically, a consumer would wait for the GRANT condition to be ++ * satisfied, signaling the DRAINED condition when more resources ++ * could be made available if the protocol implements output ++ * contention (e.g. the write side of a message queue waiting for the ++ * consumer to release message slots). Conversely, a producer would ++ * wait for the DRAINED condition to be satisfied, issuing GRANT ++ * signals once more resources have been made available to the ++ * consumer. ++ * ++ * Implementation-wise, the monitor logic is shared with the Cobalt ++ * thread object. ++ */ ++COBALT_SYSCALL(monitor_init, current, ++ (struct cobalt_monitor_shadow __user *u_mon, ++ clockid_t clk_id, int flags)) ++{ ++ struct cobalt_monitor_shadow shadow; ++ struct cobalt_monitor_state *state; ++ struct cobalt_monitor *mon; ++ int pshared, tmode, ret; ++ struct cobalt_umm *umm; ++ unsigned long stateoff; ++ spl_t s; ++ ++ tmode = clock_flag(TIMER_ABSTIME, clk_id); ++ if (tmode < 0) ++ return -EINVAL; ++ ++ mon = xnmalloc(sizeof(*mon)); ++ if (mon == NULL) ++ return -ENOMEM; ++ ++ pshared = (flags & COBALT_MONITOR_SHARED) != 0; ++ umm = &cobalt_ppd_get(pshared)->umm; ++ state = cobalt_umm_alloc(umm, sizeof(*state)); ++ if (state == NULL) { ++ xnfree(mon); ++ return -EAGAIN; ++ } ++ ++ ret = xnregistry_enter_anon(mon, &mon->resnode.handle); ++ if (ret) { ++ cobalt_umm_free(umm, state); ++ xnfree(mon); ++ return ret; ++ } ++ ++ mon->state = state; ++ xnsynch_init(&mon->gate, XNSYNCH_PI, &state->owner); ++ xnsynch_init(&mon->drain, XNSYNCH_PRIO, NULL); ++ mon->flags = flags; ++ mon->tmode = tmode; ++ INIT_LIST_HEAD(&mon->waiters); ++ ++ xnlock_get_irqsave(&nklock, s); ++ cobalt_add_resource(&mon->resnode, monitor, pshared); ++ mon->magic = COBALT_MONITOR_MAGIC; ++ xnlock_put_irqrestore(&nklock, s); ++ ++ state->flags = 0; ++ stateoff = cobalt_umm_offset(umm, state); ++ XENO_BUG_ON(COBALT, stateoff != (__u32)stateoff); ++ shadow.flags = flags; ++ shadow.handle = mon->resnode.handle; ++ shadow.state_offset = (__u32)stateoff; ++ ++ return cobalt_copy_to_user(u_mon, &shadow, sizeof(*u_mon)); ++} ++ ++/* nklock held, irqs off */ ++static int monitor_enter(xnhandle_t handle, struct xnthread *curr) ++{ ++ struct cobalt_monitor *mon; ++ int info; ++ ++ mon = xnregistry_lookup(handle, NULL); /* (Re)validate. */ ++ if (mon == NULL || mon->magic != COBALT_MONITOR_MAGIC) ++ return -EINVAL; ++ ++ info = xnsynch_acquire(&mon->gate, XN_INFINITE, XN_RELATIVE); ++ if (info) ++ /* Break or error, no timeout possible. */ ++ return info & XNBREAK ? -EINTR : -EINVAL; ++ ++ mon->state->flags &= ~(COBALT_MONITOR_SIGNALED|COBALT_MONITOR_BROADCAST); ++ ++ return 0; ++} ++ ++COBALT_SYSCALL(monitor_enter, primary, ++ (struct cobalt_monitor_shadow __user *u_mon)) ++{ ++ struct xnthread *curr = xnthread_current(); ++ xnhandle_t handle; ++ int ret; ++ spl_t s; ++ ++ handle = cobalt_get_handle_from_user(&u_mon->handle); ++ ++ xnlock_get_irqsave(&nklock, s); ++ ret = monitor_enter(handle, curr); ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return ret; ++} ++ ++/* nklock held, irqs off */ ++static void monitor_wakeup(struct cobalt_monitor *mon) ++{ ++ struct cobalt_monitor_state *state = mon->state; ++ struct cobalt_thread *thread, *tmp; ++ struct xnthread *p; ++ int bcast; ++ ++ /* ++ * Having the GRANT signal pending does not necessarily mean ++ * that somebody is actually waiting for it, so we have to ++ * check both conditions below. ++ */ ++ bcast = (state->flags & COBALT_MONITOR_BROADCAST) != 0; ++ if ((state->flags & COBALT_MONITOR_GRANTED) == 0 || ++ list_empty(&mon->waiters)) ++ goto drain; ++ ++ /* ++ * Unblock waiters requesting a grant, either those who ++ * received it only or all of them, depending on the broadcast ++ * bit. ++ * ++ * We update the PENDED flag to inform userland about the ++ * presence of waiters, so that it may decide not to issue any ++ * syscall for exiting the monitor if nobody else is waiting ++ * at the gate. ++ */ ++ list_for_each_entry_safe(thread, tmp, &mon->waiters, monitor_link) { ++ p = &thread->threadbase; ++ /* ++ * A thread might receive a grant signal albeit it ++ * does not wait on a monitor, or it might have timed ++ * out before we got there, so we really have to check ++ * that ->wchan does match our sleep queue. ++ */ ++ if (bcast || ++ (p->u_window->grant_value && p->wchan == &thread->monitor_synch)) { ++ xnsynch_wakeup_this_sleeper(&thread->monitor_synch, p); ++ list_del_init(&thread->monitor_link); ++ } ++ } ++drain: ++ /* ++ * Unblock threads waiting for a drain event if that signal is ++ * pending, either one or all, depending on the broadcast ++ * flag. ++ */ ++ if ((state->flags & COBALT_MONITOR_DRAINED) != 0 && ++ xnsynch_pended_p(&mon->drain)) { ++ if (bcast) ++ xnsynch_flush(&mon->drain, 0); ++ else ++ xnsynch_wakeup_one_sleeper(&mon->drain); ++ } ++ ++ if (list_empty(&mon->waiters) && !xnsynch_pended_p(&mon->drain)) ++ state->flags &= ~COBALT_MONITOR_PENDED; ++} ++ ++int __cobalt_monitor_wait(struct cobalt_monitor_shadow __user *u_mon, ++ int event, const struct timespec *ts, ++ int __user *u_ret) ++{ ++ struct cobalt_thread *curr = cobalt_current_thread(); ++ struct cobalt_monitor_state *state; ++ xnticks_t timeout = XN_INFINITE; ++ int ret = 0, opret = 0, info; ++ struct cobalt_monitor *mon; ++ struct xnsynch *synch; ++ xnhandle_t handle; ++ xntmode_t tmode; ++ spl_t s; ++ ++ handle = cobalt_get_handle_from_user(&u_mon->handle); ++ ++ if (ts) ++ timeout = ts2ns(ts) + 1; ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ mon = xnregistry_lookup(handle, NULL); ++ if (mon == NULL || mon->magic != COBALT_MONITOR_MAGIC) { ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ /* ++ * The current thread might have sent signals to the monitor ++ * it wants to sleep on: wake up satisfied waiters before ++ * going to sleep. ++ */ ++ state = mon->state; ++ if (state->flags & COBALT_MONITOR_SIGNALED) ++ monitor_wakeup(mon); ++ ++ synch = &curr->monitor_synch; ++ if (event & COBALT_MONITOR_WAITDRAIN) ++ synch = &mon->drain; ++ else { ++ curr->threadbase.u_window->grant_value = 0; ++ list_add_tail(&curr->monitor_link, &mon->waiters); ++ } ++ ++ /* ++ * Tell userland that somebody is now waiting for a signal, so ++ * that later exiting the monitor on the producer side will ++ * trigger a wakeup syscall. ++ * ++ * CAUTION: we must raise the PENDED flag while holding the ++ * gate mutex, to prevent a signal from sneaking in from a ++ * remote CPU without the producer issuing the corresponding ++ * wakeup call when dropping the gate lock. ++ */ ++ state->flags |= COBALT_MONITOR_PENDED; ++ ++ tmode = ts ? mon->tmode : XN_RELATIVE; ++ ++ /* Release the gate prior to waiting, all atomically. */ ++ xnsynch_release(&mon->gate, &curr->threadbase); ++ ++ info = xnsynch_sleep_on(synch, timeout, tmode); ++ if (info) { ++ if ((event & COBALT_MONITOR_WAITDRAIN) == 0 && ++ !list_empty(&curr->monitor_link)) ++ list_del_init(&curr->monitor_link); ++ ++ if (list_empty(&mon->waiters) && !xnsynch_pended_p(&mon->drain)) ++ state->flags &= ~COBALT_MONITOR_PENDED; ++ ++ if (info & XNBREAK) { ++ opret = -EINTR; ++ goto out; ++ } ++ if (info & XNTIMEO) ++ opret = -ETIMEDOUT; ++ } ++ ++ ret = monitor_enter(handle, &curr->threadbase); ++out: ++ xnlock_put_irqrestore(&nklock, s); ++ ++ __xn_put_user(opret, u_ret); ++ ++ return ret; ++} ++ ++COBALT_SYSCALL(monitor_wait, nonrestartable, ++ (struct cobalt_monitor_shadow __user *u_mon, ++ int event, const struct timespec __user *u_ts, ++ int __user *u_ret)) ++{ ++ struct timespec ts, *tsp = NULL; ++ int ret; ++ ++ if (u_ts) { ++ tsp = &ts; ++ ret = cobalt_copy_from_user(&ts, u_ts, sizeof(ts)); ++ if (ret) ++ return ret; ++ } ++ ++ return __cobalt_monitor_wait(u_mon, event, tsp, u_ret); ++} ++ ++COBALT_SYSCALL(monitor_sync, nonrestartable, ++ (struct cobalt_monitor_shadow __user *u_mon)) ++{ ++ struct cobalt_monitor *mon; ++ struct xnthread *curr; ++ xnhandle_t handle; ++ int ret = 0; ++ spl_t s; ++ ++ handle = cobalt_get_handle_from_user(&u_mon->handle); ++ curr = xnthread_current(); ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ mon = xnregistry_lookup(handle, NULL); ++ if (mon == NULL || mon->magic != COBALT_MONITOR_MAGIC) ++ ret = -EINVAL; ++ else if (mon->state->flags & COBALT_MONITOR_SIGNALED) { ++ monitor_wakeup(mon); ++ xnsynch_release(&mon->gate, curr); ++ xnsched_run(); ++ ret = monitor_enter(handle, curr); ++ } ++ ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return ret; ++} ++ ++COBALT_SYSCALL(monitor_exit, primary, ++ (struct cobalt_monitor_shadow __user *u_mon)) ++{ ++ struct cobalt_monitor *mon; ++ struct xnthread *curr; ++ xnhandle_t handle; ++ int ret = 0; ++ spl_t s; ++ ++ handle = cobalt_get_handle_from_user(&u_mon->handle); ++ curr = xnthread_current(); ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ mon = xnregistry_lookup(handle, NULL); ++ if (mon == NULL || mon->magic != COBALT_MONITOR_MAGIC) ++ ret = -EINVAL; ++ else { ++ if (mon->state->flags & COBALT_MONITOR_SIGNALED) ++ monitor_wakeup(mon); ++ ++ xnsynch_release(&mon->gate, curr); ++ xnsched_run(); ++ } ++ ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return ret; ++} ++ ++COBALT_SYSCALL(monitor_destroy, primary, ++ (struct cobalt_monitor_shadow __user *u_mon)) ++{ ++ struct cobalt_monitor_state *state; ++ struct cobalt_monitor *mon; ++ struct xnthread *curr; ++ xnhandle_t handle; ++ int ret = 0; ++ spl_t s; ++ ++ handle = cobalt_get_handle_from_user(&u_mon->handle); ++ curr = xnthread_current(); ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ mon = xnregistry_lookup(handle, NULL); ++ if (mon == NULL || mon->magic != COBALT_MONITOR_MAGIC) { ++ ret = -EINVAL; ++ goto fail; ++ } ++ ++ state = mon->state; ++ if ((state->flags & COBALT_MONITOR_PENDED) != 0 || ++ xnsynch_pended_p(&mon->drain) || !list_empty(&mon->waiters)) { ++ ret = -EBUSY; ++ goto fail; ++ } ++ ++ /* ++ * A monitor must be destroyed by the thread currently holding ++ * its gate lock. ++ */ ++ if (xnsynch_owner_check(&mon->gate, curr)) { ++ ret = -EPERM; ++ goto fail; ++ } ++ ++ cobalt_monitor_reclaim(&mon->resnode, s); /* drops lock */ ++ ++ xnsched_run(); ++ ++ return 0; ++ fail: ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return ret; ++} ++ ++void cobalt_monitor_reclaim(struct cobalt_resnode *node, spl_t s) ++{ ++ struct cobalt_monitor *mon; ++ struct cobalt_umm *umm; ++ int pshared; ++ ++ mon = container_of(node, struct cobalt_monitor, resnode); ++ pshared = (mon->flags & COBALT_MONITOR_SHARED) != 0; ++ xnsynch_destroy(&mon->gate); ++ xnsynch_destroy(&mon->drain); ++ xnregistry_remove(node->handle); ++ cobalt_del_resource(node); ++ cobalt_mark_deleted(mon); ++ xnlock_put_irqrestore(&nklock, s); ++ ++ umm = &cobalt_ppd_get(pshared)->umm; ++ cobalt_umm_free(umm, mon->state); ++ xnfree(mon); ++} +--- linux/kernel/xenomai/posix/syscall.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/posix/syscall.h 2021-04-07 16:01:26.055635841 +0800 +@@ -0,0 +1,33 @@ ++/* ++ * Copyright (C) 2014 Philippe Gerum . ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#ifndef _COBALT_POSIX_SYSCALL_H ++#define _COBALT_POSIX_SYSCALL_H ++ ++#include ++ ++/* Regular (native) syscall handler implementation. */ ++#define COBALT_SYSCALL(__name, __mode, __args) \ ++ long CoBaLt_ ## __name __args ++ ++/* Regular (native) syscall handler declaration. */ ++#define COBALT_SYSCALL_DECL(__name, __args) \ ++ long CoBaLt_ ## __name __args ++ ++#include ++ ++#endif /* !_COBALT_POSIX_SYSCALL_H */ +--- linux/kernel/xenomai/posix/sem.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/posix/sem.c 2021-04-07 16:01:26.051635847 +0800 +@@ -0,0 +1,618 @@ ++/* ++ * Written by Gilles Chanteperdrix . ++ * Copyright (C) 2014,2015 Philippe Gerum ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#include ++#include ++#include "internal.h" ++#include "thread.h" ++#include "clock.h" ++#include "sem.h" ++#include ++ ++static inline struct cobalt_resources *sem_kqueue(struct cobalt_sem *sem) ++{ ++ int pshared = !!(sem->flags & SEM_PSHARED); ++ return cobalt_current_resources(pshared); ++} ++ ++static inline int sem_check(struct cobalt_sem *sem) ++{ ++ if (sem == NULL || sem->magic != COBALT_SEM_MAGIC) ++ return -EINVAL; ++ ++ if (sem->resnode.scope && sem->resnode.scope != sem_kqueue(sem)) ++ return -EPERM; ++ ++ return 0; ++} ++ ++int __cobalt_sem_destroy(xnhandle_t handle) ++{ ++ struct cobalt_sem *sem; ++ int ret = 0; ++ spl_t s; ++ ++ xnlock_get_irqsave(&nklock, s); ++ sem = xnregistry_lookup(handle, NULL); ++ if (!cobalt_obj_active(sem, COBALT_SEM_MAGIC, typeof(*sem))) { ++ ret = -EINVAL; ++ goto fail; ++ } ++ ++ if (--sem->refs) { ++ ret = -EBUSY; ++ goto fail; ++ } ++ ++ cobalt_mark_deleted(sem); ++ xnregistry_remove(sem->resnode.handle); ++ if (!sem->pathname) ++ cobalt_del_resource(&sem->resnode); ++ if (xnsynch_destroy(&sem->synchbase) == XNSYNCH_RESCHED) { ++ xnsched_run(); ++ ret = 1; ++ } ++ ++ xnlock_put_irqrestore(&nklock, s); ++ ++ if (sem->pathname) ++ putname(sem->pathname); ++ ++ cobalt_umm_free(&cobalt_ppd_get(!!(sem->flags & SEM_PSHARED))->umm, ++ sem->state); ++ ++ xnfree(sem); ++ ++ return ret; ++fail: ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return ret; ++} ++ ++struct cobalt_sem * ++__cobalt_sem_init(const char *name, struct cobalt_sem_shadow *sm, ++ int flags, unsigned int value) ++{ ++ struct cobalt_sem_state *state; ++ struct cobalt_sem *sem, *osem; ++ struct cobalt_ppd *sys_ppd; ++ int ret, sflags, pshared; ++ struct list_head *semq; ++ spl_t s; ++ ++ if ((flags & SEM_PULSE) != 0 && value > 0) { ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ sem = xnmalloc(sizeof(*sem)); ++ if (sem == NULL) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ ++ pshared = !!(flags & SEM_PSHARED); ++ sys_ppd = cobalt_ppd_get(pshared); ++ state = cobalt_umm_alloc(&sys_ppd->umm, sizeof(*state)); ++ if (state == NULL) { ++ ret = -EAGAIN; ++ goto err_free_sem; ++ } ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ semq = &cobalt_current_resources(pshared)->semq; ++ if ((sm->magic == COBALT_SEM_MAGIC && !list_empty(semq)) || ++ sm->magic == COBALT_NAMED_SEM_MAGIC) { ++ osem = xnregistry_lookup(sm->handle, NULL); ++ if (cobalt_obj_active(osem, COBALT_SEM_MAGIC, typeof(*osem))) { ++ ret = -EBUSY; ++ goto err_lock_put; ++ } ++ } ++ ++ if (value > (unsigned)SEM_VALUE_MAX) { ++ ret = -EINVAL; ++ goto err_lock_put; ++ } ++ ++ ret = xnregistry_enter(name ?: "", sem, &sem->resnode.handle, NULL); ++ if (ret < 0) ++ goto err_lock_put; ++ ++ sem->magic = COBALT_SEM_MAGIC; ++ if (!name) ++ cobalt_add_resource(&sem->resnode, sem, pshared); ++ else ++ sem->resnode.scope = NULL; ++ sflags = flags & SEM_FIFO ? 0 : XNSYNCH_PRIO; ++ xnsynch_init(&sem->synchbase, sflags, NULL); ++ ++ sem->state = state; ++ atomic_set(&state->value, value); ++ state->flags = flags; ++ sem->flags = flags; ++ sem->refs = name ? 2 : 1; ++ sem->pathname = NULL; ++ ++ xnlock_put_irqrestore(&nklock, s); ++ ++ __cobalt_sem_shadow_init(sem, ++ name ? COBALT_NAMED_SEM_MAGIC : COBALT_SEM_MAGIC, sm); ++ ++ trace_cobalt_psem_init(name ?: "anon", ++ sem->resnode.handle, flags, value); ++ ++ return sem; ++ ++err_lock_put: ++ xnlock_put_irqrestore(&nklock, s); ++ cobalt_umm_free(&sys_ppd->umm, state); ++err_free_sem: ++ xnfree(sem); ++out: ++ trace_cobalt_psem_init_failed(name ?: "anon", flags, value, ret); ++ ++ return ERR_PTR(ret); ++} ++ ++void __cobalt_sem_shadow_init(struct cobalt_sem *sem, __u32 magic, ++ struct cobalt_sem_shadow *sm) ++{ ++ __u32 flags = sem->state->flags; ++ struct cobalt_ppd *sys_ppd; ++ ++ sys_ppd = cobalt_ppd_get(!!(flags & SEM_PSHARED)); ++ ++ sm->magic = magic; ++ sm->handle = sem->resnode.handle; ++ sm->state_offset = cobalt_umm_offset(&sys_ppd->umm, sem->state); ++ if (sem->state->flags & SEM_PSHARED) ++ sm->state_offset = -sm->state_offset; ++} ++ ++static int sem_destroy(struct cobalt_sem_shadow *sm) ++{ ++ struct cobalt_sem *sem; ++ int warn, ret; ++ spl_t s; ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ if (sm->magic != COBALT_SEM_MAGIC) { ++ ret = -EINVAL; ++ goto fail; ++ } ++ ++ sem = xnregistry_lookup(sm->handle, NULL); ++ ret = sem_check(sem); ++ if (ret) ++ goto fail; ++ ++ if ((sem->flags & SEM_NOBUSYDEL) != 0 && ++ xnsynch_pended_p(&sem->synchbase)) { ++ ret = -EBUSY; ++ goto fail; ++ } ++ ++ warn = sem->flags & SEM_WARNDEL; ++ cobalt_mark_deleted(sm); ++ ++ xnlock_put_irqrestore(&nklock, s); ++ ++ ret = __cobalt_sem_destroy(sem->resnode.handle); ++ ++ return warn ? ret : 0; ++fail: ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return ret; ++} ++ ++static inline int do_trywait(struct cobalt_sem *sem) ++{ ++ int ret; ++ ++ ret = sem_check(sem); ++ if (ret) ++ return ret; ++ ++ if (atomic_sub_return(1, &sem->state->value) < 0) ++ return -EAGAIN; ++ ++ return 0; ++} ++ ++static int sem_wait(xnhandle_t handle) ++{ ++ struct cobalt_sem *sem; ++ int ret, info; ++ spl_t s; ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ sem = xnregistry_lookup(handle, NULL); ++ ret = do_trywait(sem); ++ if (ret != -EAGAIN) ++ goto out; ++ ++ ret = 0; ++ info = xnsynch_sleep_on(&sem->synchbase, XN_INFINITE, XN_RELATIVE); ++ if (info & XNRMID) { ++ ret = -EINVAL; ++ } else if (info & XNBREAK) { ++ atomic_inc(&sem->state->value); /* undo do_trywait() */ ++ ret = -EINTR; ++ } ++out: ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return ret; ++} ++ ++static inline int sem_fetch_timeout(struct timespec *ts, ++ const void __user *u_ts) ++{ ++ return u_ts == NULL ? -EFAULT : ++ cobalt_copy_from_user(ts, u_ts, sizeof(*ts)); ++} ++ ++int __cobalt_sem_timedwait(struct cobalt_sem_shadow __user *u_sem, ++ const void __user *u_ts, ++ int (*fetch_timeout)(struct timespec *ts, ++ const void __user *u_ts)) ++{ ++ struct timespec ts = { .tv_sec = 0, .tv_nsec = 0 }; ++ int pull_ts = 1, ret, info; ++ struct cobalt_sem *sem; ++ xnhandle_t handle; ++ xntmode_t tmode; ++ spl_t s; ++ ++ handle = cobalt_get_handle_from_user(&u_sem->handle); ++ trace_cobalt_psem_timedwait(handle); ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ for (;;) { ++ sem = xnregistry_lookup(handle, NULL); ++ ret = do_trywait(sem); ++ if (ret != -EAGAIN) ++ break; ++ ++ /* ++ * POSIX states that the validity of the timeout spec ++ * _need_ not be checked if the semaphore can be ++ * locked immediately, we show this behavior despite ++ * it's actually more complex, to keep some ++ * applications ported to Linux happy. ++ */ ++ if (pull_ts) { ++ atomic_inc(&sem->state->value); ++ xnlock_put_irqrestore(&nklock, s); ++ ret = fetch_timeout(&ts, u_ts); ++ xnlock_get_irqsave(&nklock, s); ++ if (ret) ++ break; ++ if (ts.tv_nsec >= ONE_BILLION) { ++ ret = -EINVAL; ++ break; ++ } ++ pull_ts = 0; ++ continue; ++ } ++ ++ ret = 0; ++ tmode = sem->flags & SEM_RAWCLOCK ? XN_ABSOLUTE : XN_REALTIME; ++ info = xnsynch_sleep_on(&sem->synchbase, ts2ns(&ts) + 1, tmode); ++ if (info & XNRMID) ++ ret = -EINVAL; ++ else if (info & (XNBREAK|XNTIMEO)) { ++ ret = (info & XNBREAK) ? -EINTR : -ETIMEDOUT; ++ atomic_inc(&sem->state->value); ++ } ++ break; ++ } ++ ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return ret; ++} ++ ++static int sem_post(xnhandle_t handle) ++{ ++ struct cobalt_sem *sem; ++ int ret; ++ spl_t s; ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ sem = xnregistry_lookup(handle, NULL); ++ ret = sem_check(sem); ++ if (ret) ++ goto out; ++ ++ if (atomic_read(&sem->state->value) == SEM_VALUE_MAX) { ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ if (atomic_inc_return(&sem->state->value) <= 0) { ++ if (xnsynch_wakeup_one_sleeper(&sem->synchbase)) ++ xnsched_run(); ++ } else if (sem->flags & SEM_PULSE) ++ atomic_set(&sem->state->value, 0); ++out: ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return ret; ++} ++ ++static int sem_getvalue(xnhandle_t handle, int *value) ++{ ++ struct cobalt_sem *sem; ++ int ret; ++ spl_t s; ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ sem = xnregistry_lookup(handle, NULL); ++ ret = sem_check(sem); ++ if (ret) { ++ xnlock_put_irqrestore(&nklock, s); ++ return ret; ++ } ++ ++ *value = atomic_read(&sem->state->value); ++ if ((sem->flags & SEM_REPORT) == 0 && *value < 0) ++ *value = 0; ++ ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return 0; ++} ++ ++COBALT_SYSCALL(sem_init, current, ++ (struct cobalt_sem_shadow __user *u_sem, ++ int flags, unsigned int value)) ++{ ++ struct cobalt_sem_shadow sm; ++ struct cobalt_sem *sem; ++ ++ if (cobalt_copy_from_user(&sm, u_sem, sizeof(sm))) ++ return -EFAULT; ++ ++ if (flags & ~(SEM_FIFO|SEM_PULSE|SEM_PSHARED|SEM_REPORT|\ ++ SEM_WARNDEL|SEM_RAWCLOCK|SEM_NOBUSYDEL)) ++ return -EINVAL; ++ ++ sem = __cobalt_sem_init(NULL, &sm, flags, value); ++ if (IS_ERR(sem)) ++ return PTR_ERR(sem); ++ ++ return cobalt_copy_to_user(u_sem, &sm, sizeof(*u_sem)); ++} ++ ++COBALT_SYSCALL(sem_post, current, ++ (struct cobalt_sem_shadow __user *u_sem)) ++{ ++ xnhandle_t handle; ++ ++ handle = cobalt_get_handle_from_user(&u_sem->handle); ++ trace_cobalt_psem_post(handle); ++ ++ return sem_post(handle); ++} ++ ++COBALT_SYSCALL(sem_wait, primary, ++ (struct cobalt_sem_shadow __user *u_sem)) ++{ ++ xnhandle_t handle; ++ ++ handle = cobalt_get_handle_from_user(&u_sem->handle); ++ trace_cobalt_psem_wait(handle); ++ ++ return sem_wait(handle); ++} ++ ++COBALT_SYSCALL(sem_timedwait, primary, ++ (struct cobalt_sem_shadow __user *u_sem, ++ struct timespec __user *u_ts)) ++{ ++ return __cobalt_sem_timedwait(u_sem, u_ts, sem_fetch_timeout); ++} ++ ++COBALT_SYSCALL(sem_trywait, primary, ++ (struct cobalt_sem_shadow __user *u_sem)) ++{ ++ struct cobalt_sem *sem; ++ xnhandle_t handle; ++ int ret; ++ spl_t s; ++ ++ handle = cobalt_get_handle_from_user(&u_sem->handle); ++ trace_cobalt_psem_trywait(handle); ++ ++ xnlock_get_irqsave(&nklock, s); ++ sem = xnregistry_lookup(handle, NULL); ++ ret = do_trywait(sem); ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return ret; ++} ++ ++COBALT_SYSCALL(sem_getvalue, current, ++ (struct cobalt_sem_shadow __user *u_sem, ++ int __user *u_sval)) ++{ ++ int ret, sval = -1; ++ xnhandle_t handle; ++ ++ handle = cobalt_get_handle_from_user(&u_sem->handle); ++ ++ ret = sem_getvalue(handle, &sval); ++ trace_cobalt_psem_getvalue(handle, sval); ++ if (ret) ++ return ret; ++ ++ return cobalt_copy_to_user(u_sval, &sval, sizeof(sval)); ++} ++ ++COBALT_SYSCALL(sem_destroy, current, ++ (struct cobalt_sem_shadow __user *u_sem)) ++{ ++ struct cobalt_sem_shadow sm; ++ int err; ++ ++ if (cobalt_copy_from_user(&sm, u_sem, sizeof(sm))) ++ return -EFAULT; ++ ++ trace_cobalt_psem_destroy(sm.handle); ++ ++ err = sem_destroy(&sm); ++ if (err < 0) ++ return err; ++ ++ return cobalt_copy_to_user(u_sem, &sm, sizeof(*u_sem)) ?: err; ++} ++ ++COBALT_SYSCALL(sem_broadcast_np, current, ++ (struct cobalt_sem_shadow __user *u_sem)) ++{ ++ struct cobalt_sem *sem; ++ xnhandle_t handle; ++ spl_t s; ++ int ret; ++ ++ handle = cobalt_get_handle_from_user(&u_sem->handle); ++ trace_cobalt_psem_broadcast(u_sem->handle); ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ sem = xnregistry_lookup(handle, NULL); ++ ret = sem_check(sem); ++ if (ret == 0 && atomic_read(&sem->state->value) < 0) { ++ atomic_set(&sem->state->value, 0); ++ xnsynch_flush(&sem->synchbase, 0); ++ xnsched_run(); ++ } ++ ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return ret; ++} ++ ++COBALT_SYSCALL(sem_inquire, current, ++ (struct cobalt_sem_shadow __user *u_sem, ++ struct cobalt_sem_info __user *u_info, ++ pid_t __user *u_waitlist, ++ size_t waitsz)) ++{ ++ int val = 0, nrwait = 0, nrpids, ret = 0; ++ unsigned long pstamp, nstamp = 0; ++ struct cobalt_sem_info info; ++ pid_t *t = NULL, fbuf[16]; ++ struct xnthread *thread; ++ struct cobalt_sem *sem; ++ xnhandle_t handle; ++ spl_t s; ++ ++ handle = cobalt_get_handle_from_user(&u_sem->handle); ++ trace_cobalt_psem_inquire(handle); ++ ++ nrpids = waitsz / sizeof(pid_t); ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ for (;;) { ++ pstamp = nstamp; ++ sem = xnregistry_lookup(handle, &nstamp); ++ if (sem == NULL || sem->magic != COBALT_SEM_MAGIC) { ++ xnlock_put_irqrestore(&nklock, s); ++ return -EINVAL; ++ } ++ /* ++ * Allocate memory to return the wait list without ++ * holding any lock, then revalidate the handle. ++ */ ++ if (t == NULL) { ++ val = atomic_read(&sem->state->value); ++ if (val >= 0 || u_waitlist == NULL) ++ break; ++ xnlock_put_irqrestore(&nklock, s); ++ if (nrpids > -val) ++ nrpids = -val; ++ if (-val <= ARRAY_SIZE(fbuf)) ++ t = fbuf; /* Use fast buffer. */ ++ else { ++ t = xnmalloc(-val * sizeof(pid_t)); ++ if (t == NULL) ++ return -ENOMEM; ++ } ++ xnlock_get_irqsave(&nklock, s); ++ } else if (pstamp == nstamp) ++ break; ++ else if (val != atomic_read(&sem->state->value)) { ++ xnlock_put_irqrestore(&nklock, s); ++ if (t != fbuf) ++ xnfree(t); ++ t = NULL; ++ xnlock_get_irqsave(&nklock, s); ++ } ++ } ++ ++ info.flags = sem->flags; ++ info.value = (sem->flags & SEM_REPORT) || val >= 0 ? val : 0; ++ info.nrwait = val < 0 ? -val : 0; ++ ++ if (xnsynch_pended_p(&sem->synchbase) && u_waitlist != NULL) { ++ xnsynch_for_each_sleeper(thread, &sem->synchbase) { ++ if (nrwait >= nrpids) ++ break; ++ t[nrwait++] = xnthread_host_pid(thread); ++ } ++ } ++ ++ xnlock_put_irqrestore(&nklock, s); ++ ++ ret = cobalt_copy_to_user(u_info, &info, sizeof(info)); ++ if (ret == 0 && nrwait > 0) ++ ret = cobalt_copy_to_user(u_waitlist, t, nrwait * sizeof(pid_t)); ++ ++ if (t && t != fbuf) ++ xnfree(t); ++ ++ return ret ?: nrwait; ++} ++ ++void cobalt_sem_reclaim(struct cobalt_resnode *node, spl_t s) ++{ ++ struct cobalt_sem *sem; ++ xnhandle_t handle; ++ int named, ret; ++ ++ sem = container_of(node, struct cobalt_sem, resnode); ++ named = (sem->flags & SEM_NAMED) != 0; ++ handle = node->handle; ++ xnlock_put_irqrestore(&nklock, s); ++ ret = __cobalt_sem_destroy(handle); ++ if (named && ret == -EBUSY) ++ xnregistry_unlink(xnregistry_key(handle)); ++} +--- linux/kernel/xenomai/posix/thread.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/posix/thread.c 2021-04-07 16:01:26.046635854 +0800 +@@ -0,0 +1,953 @@ ++/* ++ * Written by Gilles Chanteperdrix . ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include "internal.h" ++#include "thread.h" ++#include "sched.h" ++#include "signal.h" ++#include "timer.h" ++#include "clock.h" ++#include "sem.h" ++#define CREATE_TRACE_POINTS ++#include ++ ++xnticks_t cobalt_time_slice = CONFIG_XENO_OPT_RR_QUANTUM * 1000; ++ ++#define PTHREAD_HSLOTS (1 << 8) /* Must be a power of 2 */ ++ ++/* Process-local index, pthread_t x mm_struct (cobalt_local_hkey). */ ++struct local_thread_hash { ++ pid_t pid; ++ struct cobalt_thread *thread; ++ struct cobalt_local_hkey hkey; ++ struct local_thread_hash *next; ++}; ++ ++/* System-wide index on task_pid_nr(). */ ++struct global_thread_hash { ++ pid_t pid; ++ struct cobalt_thread *thread; ++ struct global_thread_hash *next; ++}; ++ ++static struct local_thread_hash *local_index[PTHREAD_HSLOTS]; ++ ++static struct global_thread_hash *global_index[PTHREAD_HSLOTS]; ++ ++static inline struct local_thread_hash * ++thread_hash(const struct cobalt_local_hkey *hkey, ++ struct cobalt_thread *thread, pid_t pid) ++{ ++ struct global_thread_hash **ghead, *gslot; ++ struct local_thread_hash **lhead, *lslot; ++ u32 hash; ++ void *p; ++ spl_t s; ++ ++ p = xnmalloc(sizeof(*lslot) + sizeof(*gslot)); ++ if (p == NULL) ++ return NULL; ++ ++ lslot = p; ++ lslot->hkey = *hkey; ++ lslot->thread = thread; ++ lslot->pid = pid; ++ hash = jhash2((u32 *)&lslot->hkey, ++ sizeof(lslot->hkey) / sizeof(u32), 0); ++ lhead = &local_index[hash & (PTHREAD_HSLOTS - 1)]; ++ ++ gslot = p + sizeof(*lslot); ++ gslot->pid = pid; ++ gslot->thread = thread; ++ hash = jhash2((u32 *)&pid, sizeof(pid) / sizeof(u32), 0); ++ ghead = &global_index[hash & (PTHREAD_HSLOTS - 1)]; ++ ++ xnlock_get_irqsave(&nklock, s); ++ lslot->next = *lhead; ++ *lhead = lslot; ++ gslot->next = *ghead; ++ *ghead = gslot; ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return lslot; ++} ++ ++static inline void thread_unhash(const struct cobalt_local_hkey *hkey) ++{ ++ struct global_thread_hash **gtail, *gslot; ++ struct local_thread_hash **ltail, *lslot; ++ pid_t pid; ++ u32 hash; ++ spl_t s; ++ ++ hash = jhash2((u32 *) hkey, sizeof(*hkey) / sizeof(u32), 0); ++ ltail = &local_index[hash & (PTHREAD_HSLOTS - 1)]; ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ lslot = *ltail; ++ while (lslot && ++ (lslot->hkey.u_pth != hkey->u_pth || ++ lslot->hkey.mm != hkey->mm)) { ++ ltail = &lslot->next; ++ lslot = *ltail; ++ } ++ ++ if (lslot == NULL) { ++ xnlock_put_irqrestore(&nklock, s); ++ return; ++ } ++ ++ *ltail = lslot->next; ++ pid = lslot->pid; ++ hash = jhash2((u32 *)&pid, sizeof(pid) / sizeof(u32), 0); ++ gtail = &global_index[hash & (PTHREAD_HSLOTS - 1)]; ++ gslot = *gtail; ++ while (gslot && gslot->pid != pid) { ++ gtail = &gslot->next; ++ gslot = *gtail; ++ } ++ /* gslot must be found here. */ ++ XENO_BUG_ON(COBALT, !(gslot && gtail)); ++ *gtail = gslot->next; ++ ++ xnlock_put_irqrestore(&nklock, s); ++ ++ xnfree(lslot); ++} ++ ++static struct cobalt_thread * ++thread_lookup(const struct cobalt_local_hkey *hkey) ++{ ++ struct local_thread_hash *lslot; ++ struct cobalt_thread *thread; ++ u32 hash; ++ spl_t s; ++ ++ hash = jhash2((u32 *)hkey, sizeof(*hkey) / sizeof(u32), 0); ++ lslot = local_index[hash & (PTHREAD_HSLOTS - 1)]; ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ while (lslot != NULL && ++ (lslot->hkey.u_pth != hkey->u_pth || lslot->hkey.mm != hkey->mm)) ++ lslot = lslot->next; ++ ++ thread = lslot ? lslot->thread : NULL; ++ ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return thread; ++} ++ ++struct cobalt_thread *cobalt_thread_find(pid_t pid) /* nklocked, IRQs off */ ++{ ++ struct global_thread_hash *gslot; ++ u32 hash; ++ ++ hash = jhash2((u32 *)&pid, sizeof(pid) / sizeof(u32), 0); ++ ++ gslot = global_index[hash & (PTHREAD_HSLOTS - 1)]; ++ while (gslot && gslot->pid != pid) ++ gslot = gslot->next; ++ ++ return gslot ? gslot->thread : NULL; ++} ++EXPORT_SYMBOL_GPL(cobalt_thread_find); ++ ++struct cobalt_thread *cobalt_thread_find_local(pid_t pid) /* nklocked, IRQs off */ ++{ ++ struct cobalt_thread *thread; ++ ++ thread = cobalt_thread_find(pid); ++ if (thread == NULL || thread->hkey.mm != current->mm) ++ return NULL; ++ ++ return thread; ++} ++EXPORT_SYMBOL_GPL(cobalt_thread_find_local); ++ ++struct cobalt_thread *cobalt_thread_lookup(unsigned long pth) /* nklocked, IRQs off */ ++{ ++ struct cobalt_local_hkey hkey; ++ ++ hkey.u_pth = pth; ++ hkey.mm = current->mm; ++ return thread_lookup(&hkey); ++} ++EXPORT_SYMBOL_GPL(cobalt_thread_lookup); ++ ++void cobalt_thread_map(struct xnthread *curr) ++{ ++ struct cobalt_thread *thread; ++ ++ thread = container_of(curr, struct cobalt_thread, threadbase); ++ thread->process = cobalt_current_process(); ++ XENO_BUG_ON(COBALT, thread->process == NULL); ++} ++ ++struct xnthread_personality *cobalt_thread_exit(struct xnthread *curr) ++{ ++ struct cobalt_thread *thread; ++ spl_t s; ++ ++ thread = container_of(curr, struct cobalt_thread, threadbase); ++ /* ++ * Unhash first, to prevent further access to the TCB from ++ * userland. ++ */ ++ thread_unhash(&thread->hkey); ++ xnlock_get_irqsave(&nklock, s); ++ cobalt_mark_deleted(thread); ++ list_del(&thread->next); ++ xnlock_put_irqrestore(&nklock, s); ++ cobalt_signal_flush(thread); ++ xnsynch_destroy(&thread->monitor_synch); ++ xnsynch_destroy(&thread->sigwait); ++ ++ return NULL; ++} ++ ++struct xnthread_personality *cobalt_thread_finalize(struct xnthread *zombie) ++{ ++ struct cobalt_thread *thread; ++ ++ thread = container_of(zombie, struct cobalt_thread, threadbase); ++ xnfree(thread); ++ ++ return NULL; ++} ++ ++int __cobalt_thread_setschedparam_ex(struct cobalt_thread *thread, int policy, ++ const struct sched_param_ex *param_ex) ++{ ++ struct xnsched_class *sched_class; ++ union xnsched_policy_param param; ++ xnticks_t tslice; ++ int ret = 0; ++ spl_t s; ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ if (!cobalt_obj_active(thread, COBALT_THREAD_MAGIC, ++ struct cobalt_thread)) { ++ ret = -ESRCH; ++ goto out; ++ } ++ ++ tslice = thread->threadbase.rrperiod; ++ sched_class = cobalt_sched_policy_param(¶m, policy, ++ param_ex, &tslice); ++ if (sched_class == NULL) { ++ ret = -EINVAL; ++ goto out; ++ } ++ xnthread_set_slice(&thread->threadbase, tslice); ++ if (cobalt_call_extension(thread_setsched, &thread->extref, ret, ++ sched_class, ¶m) && ret) ++ goto out; ++ ret = xnthread_set_schedparam(&thread->threadbase, ++ sched_class, ¶m); ++ xnsched_run(); ++out: ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return ret; ++} ++ ++int __cobalt_thread_getschedparam_ex(struct cobalt_thread *thread, ++ int *policy_r, ++ struct sched_param_ex *param_ex) ++{ ++ struct xnsched_class *base_class; ++ struct xnthread *base_thread; ++ spl_t s; ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ if (!cobalt_obj_active(thread, COBALT_THREAD_MAGIC, ++ struct cobalt_thread)) { ++ xnlock_put_irqrestore(&nklock, s); ++ return -ESRCH; ++ } ++ ++ base_thread = &thread->threadbase; ++ base_class = base_thread->base_class; ++ *policy_r = base_class->policy; ++ ++ param_ex->sched_priority = xnthread_base_priority(base_thread); ++ if (param_ex->sched_priority == 0) /* SCHED_FIFO/SCHED_WEAK */ ++ *policy_r = SCHED_NORMAL; ++ ++ if (base_class == &xnsched_class_rt) { ++ if (xnthread_test_state(base_thread, XNRRB)) { ++ ns2ts(¶m_ex->sched_rr_quantum, base_thread->rrperiod); ++ *policy_r = SCHED_RR; ++ } ++ goto out; ++ } ++ ++#ifdef CONFIG_XENO_OPT_SCHED_WEAK ++ if (base_class == &xnsched_class_weak) { ++ if (*policy_r != SCHED_WEAK) ++ param_ex->sched_priority = -param_ex->sched_priority; ++ goto out; ++ } ++#endif ++#ifdef CONFIG_XENO_OPT_SCHED_SPORADIC ++ if (base_class == &xnsched_class_sporadic) { ++ param_ex->sched_ss_low_priority = base_thread->pss->param.low_prio; ++ ns2ts(¶m_ex->sched_ss_repl_period, base_thread->pss->param.repl_period); ++ ns2ts(¶m_ex->sched_ss_init_budget, base_thread->pss->param.init_budget); ++ param_ex->sched_ss_max_repl = base_thread->pss->param.max_repl; ++ goto out; ++ } ++#endif ++#ifdef CONFIG_XENO_OPT_SCHED_TP ++ if (base_class == &xnsched_class_tp) { ++ param_ex->sched_tp_partition = ++ base_thread->tps - base_thread->sched->tp.partitions; ++ goto out; ++ } ++#endif ++#ifdef CONFIG_XENO_OPT_SCHED_QUOTA ++ if (base_class == &xnsched_class_quota) { ++ param_ex->sched_quota_group = base_thread->quota->tgid; ++ goto out; ++ } ++#endif ++ ++out: ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return 0; ++} ++ ++static int pthread_create(struct cobalt_thread **thread_p, ++ int policy, ++ const struct sched_param_ex *param_ex, ++ struct task_struct *task) ++{ ++ struct cobalt_process *process = cobalt_current_process(); ++ struct xnsched_class *sched_class; ++ union xnsched_policy_param param; ++ struct xnthread_init_attr iattr; ++ struct cobalt_thread *thread; ++ xnticks_t tslice; ++ int ret, n; ++ spl_t s; ++ ++ thread = xnmalloc(sizeof(*thread)); ++ if (thread == NULL) ++ return -EAGAIN; ++ ++ tslice = cobalt_time_slice; ++ sched_class = cobalt_sched_policy_param(¶m, policy, ++ param_ex, &tslice); ++ if (sched_class == NULL) { ++ xnfree(thread); ++ return -EINVAL; ++ } ++ ++ iattr.name = task->comm; ++ iattr.flags = XNUSER|XNFPU; ++ iattr.personality = &cobalt_personality; ++ iattr.affinity = CPU_MASK_ALL; ++ ret = xnthread_init(&thread->threadbase, &iattr, sched_class, ¶m); ++ if (ret) { ++ xnfree(thread); ++ return ret; ++ } ++ ++ thread->magic = COBALT_THREAD_MAGIC; ++ xnsynch_init(&thread->monitor_synch, XNSYNCH_FIFO, NULL); ++ ++ xnsynch_init(&thread->sigwait, XNSYNCH_FIFO, NULL); ++ sigemptyset(&thread->sigpending); ++ for (n = 0; n < _NSIG; n++) ++ INIT_LIST_HEAD(thread->sigqueues + n); ++ ++ xnthread_set_slice(&thread->threadbase, tslice); ++ cobalt_set_extref(&thread->extref, NULL, NULL); ++ ++ /* ++ * We need an anonymous registry entry to obtain a handle for ++ * fast mutex locking. ++ */ ++ ret = xnthread_register(&thread->threadbase, ""); ++ if (ret) { ++ xnsynch_destroy(&thread->monitor_synch); ++ xnsynch_destroy(&thread->sigwait); ++ xnfree(thread); ++ return ret; ++ } ++ ++ xnlock_get_irqsave(&nklock, s); ++ list_add_tail(&thread->next, process ? &process->thread_list ++ : &cobalt_global_thread_list); ++ xnlock_put_irqrestore(&nklock, s); ++ ++ thread->hkey.u_pth = 0; ++ thread->hkey.mm = NULL; ++ ++ *thread_p = thread; ++ ++ return 0; ++} ++ ++static void pthread_discard(struct cobalt_thread *thread) ++{ ++ spl_t s; ++ ++ xnsynch_destroy(&thread->monitor_synch); ++ xnsynch_destroy(&thread->sigwait); ++ ++ xnlock_get_irqsave(&nklock, s); ++ list_del(&thread->next); ++ xnlock_put_irqrestore(&nklock, s); ++ __xnthread_discard(&thread->threadbase); ++ xnfree(thread); ++} ++ ++static inline int pthread_setmode_np(int clrmask, int setmask, int *mode_r) ++{ ++ const int valid_flags = XNLOCK|XNWARN|XNTRAPLB; ++ int old; ++ ++ /* ++ * The conforming mode bit is actually zero, since jumping to ++ * this code entailed switching to primary mode already. ++ */ ++ if ((clrmask & ~valid_flags) != 0 || (setmask & ~valid_flags) != 0) ++ return -EINVAL; ++ ++ old = xnthread_set_mode(clrmask, setmask); ++ if (mode_r) ++ *mode_r = old; ++ ++ if ((clrmask & ~setmask) & XNLOCK) ++ /* Reschedule if the scheduler has been unlocked. */ ++ xnsched_run(); ++ ++ return 0; ++} ++ ++static struct cobalt_thread *thread_lookup_or_shadow(unsigned long pth, ++ __u32 __user *u_winoff, ++ int *promoted_r) ++{ ++ struct cobalt_local_hkey hkey; ++ struct cobalt_thread *thread; ++ ++ *promoted_r = 0; ++ ++ hkey.u_pth = pth; ++ hkey.mm = current->mm; ++ ++ thread = thread_lookup(&hkey); ++ if (thread == NULL) { ++ if (u_winoff == NULL) ++ return ERR_PTR(-ESRCH); ++ ++ thread = cobalt_thread_shadow(&hkey, u_winoff); ++ if (!IS_ERR(thread)) ++ *promoted_r = 1; ++ } ++ ++ return thread; ++} ++ ++int cobalt_thread_setschedparam_ex(unsigned long pth, ++ int policy, ++ const struct sched_param_ex *param_ex, ++ __u32 __user *u_winoff, ++ int __user *u_promoted) ++{ ++ struct cobalt_thread *thread; ++ int ret, promoted; ++ ++ trace_cobalt_pthread_setschedparam(pth, policy, param_ex); ++ ++ thread = thread_lookup_or_shadow(pth, u_winoff, &promoted); ++ if (IS_ERR(thread)) ++ return PTR_ERR(thread); ++ ++ ret = __cobalt_thread_setschedparam_ex(thread, policy, param_ex); ++ if (ret) ++ return ret; ++ ++ return cobalt_copy_to_user(u_promoted, &promoted, sizeof(promoted)); ++} ++ ++COBALT_SYSCALL(thread_setschedparam_ex, conforming, ++ (unsigned long pth, ++ int policy, ++ const struct sched_param_ex __user *u_param, ++ __u32 __user *u_winoff, ++ int __user *u_promoted)) ++{ ++ struct sched_param_ex param_ex; ++ ++ if (cobalt_copy_from_user(¶m_ex, u_param, sizeof(param_ex))) ++ return -EFAULT; ++ ++ return cobalt_thread_setschedparam_ex(pth, policy, ¶m_ex, ++ u_winoff, u_promoted); ++} ++ ++int cobalt_thread_getschedparam_ex(unsigned long pth, ++ int *policy_r, ++ struct sched_param_ex *param_ex) ++{ ++ struct cobalt_local_hkey hkey; ++ struct cobalt_thread *thread; ++ int ret; ++ ++ hkey.u_pth = pth; ++ hkey.mm = current->mm; ++ thread = thread_lookup(&hkey); ++ if (thread == NULL) ++ return -ESRCH; ++ ++ ret = __cobalt_thread_getschedparam_ex(thread, policy_r, param_ex); ++ if (ret) ++ return ret; ++ ++ trace_cobalt_pthread_getschedparam(pth, *policy_r, param_ex); ++ ++ return 0; ++} ++ ++COBALT_SYSCALL(thread_getschedparam_ex, current, ++ (unsigned long pth, ++ int __user *u_policy, ++ struct sched_param_ex __user *u_param)) ++{ ++ struct sched_param_ex param_ex; ++ int ret, policy; ++ ++ ret = cobalt_thread_getschedparam_ex(pth, &policy, ¶m_ex); ++ if (ret) ++ return ret; ++ ++ ret = cobalt_copy_to_user(u_policy, &policy, sizeof(policy)); ++ if (ret) ++ return ret; ++ ++ return cobalt_copy_to_user(u_param, ¶m_ex, sizeof(param_ex)); ++} ++ ++int cobalt_thread_setschedprio(unsigned long pth, ++ int prio, ++ __u32 __user *u_winoff, ++ int __user *u_promoted) ++{ ++ struct sched_param_ex param_ex; ++ struct cobalt_thread *thread; ++ int ret, policy, promoted; ++ ++ trace_cobalt_pthread_setschedprio(pth, prio); ++ ++ thread = thread_lookup_or_shadow(pth, u_winoff, &promoted); ++ if (IS_ERR(thread)) ++ return PTR_ERR(thread); ++ ++ ret = __cobalt_thread_getschedparam_ex(thread, &policy, ¶m_ex); ++ if (ret) ++ return ret; ++ ++ param_ex.sched_priority = prio; ++ ++ ret = __cobalt_thread_setschedparam_ex(thread, policy, ¶m_ex); ++ if (ret) ++ return ret; ++ ++ return cobalt_copy_to_user(u_promoted, &promoted, sizeof(promoted)); ++} ++ ++COBALT_SYSCALL(thread_setschedprio, conforming, ++ (unsigned long pth, ++ int prio, ++ __u32 __user *u_winoff, ++ int __user *u_promoted)) ++{ ++ return cobalt_thread_setschedprio(pth, prio, u_winoff, u_promoted); ++} ++ ++int __cobalt_thread_create(unsigned long pth, int policy, ++ struct sched_param_ex *param_ex, ++ int xid, __u32 __user *u_winoff) ++{ ++ struct cobalt_thread *thread = NULL; ++ struct task_struct *p = current; ++ struct cobalt_local_hkey hkey; ++ int ret; ++ ++ trace_cobalt_pthread_create(pth, policy, param_ex); ++ ++ /* ++ * We have been passed the pthread_t identifier the user-space ++ * Cobalt library has assigned to our caller; we'll index our ++ * internal pthread_t descriptor in kernel space on it. ++ */ ++ hkey.u_pth = pth; ++ hkey.mm = p->mm; ++ ++ ret = pthread_create(&thread, policy, param_ex, p); ++ if (ret) ++ return ret; ++ ++ ret = cobalt_map_user(&thread->threadbase, u_winoff); ++ if (ret) { ++ pthread_discard(thread); ++ return ret; ++ } ++ ++ if (!thread_hash(&hkey, thread, task_pid_vnr(p))) { ++ ret = -EAGAIN; ++ goto fail; ++ } ++ ++ thread->hkey = hkey; ++ ++ if (xid > 0 && cobalt_push_personality(xid) == NULL) { ++ ret = -EINVAL; ++ goto fail; ++ } ++ ++ return xnthread_harden(); ++fail: ++ xnthread_cancel(&thread->threadbase); ++ ++ return ret; ++} ++ ++COBALT_SYSCALL(thread_create, init, ++ (unsigned long pth, int policy, ++ struct sched_param_ex __user *u_param, ++ int xid, ++ __u32 __user *u_winoff)) ++{ ++ struct sched_param_ex param_ex; ++ int ret; ++ ++ ret = cobalt_copy_from_user(¶m_ex, u_param, sizeof(param_ex)); ++ if (ret) ++ return ret; ++ ++ return __cobalt_thread_create(pth, policy, ¶m_ex, xid, u_winoff); ++} ++ ++struct cobalt_thread * ++cobalt_thread_shadow(struct cobalt_local_hkey *hkey, ++ __u32 __user *u_winoff) ++{ ++ struct cobalt_thread *thread = NULL; ++ struct sched_param_ex param_ex; ++ int ret; ++ ++ if (xnthread_current()) ++ return ERR_PTR(-EBUSY); ++ ++ param_ex.sched_priority = 0; ++ trace_cobalt_pthread_create(hkey->u_pth, SCHED_NORMAL, ¶m_ex); ++ ret = pthread_create(&thread, SCHED_NORMAL, ¶m_ex, current); ++ if (ret) ++ return ERR_PTR(ret); ++ ++ ret = cobalt_map_user(&thread->threadbase, u_winoff); ++ if (ret) { ++ pthread_discard(thread); ++ return ERR_PTR(ret); ++ } ++ ++ if (!thread_hash(hkey, thread, task_pid_vnr(current))) { ++ ret = -EAGAIN; ++ goto fail; ++ } ++ ++ thread->hkey = *hkey; ++ ++ xnthread_harden(); ++ ++ return thread; ++fail: ++ xnthread_cancel(&thread->threadbase); ++ ++ return ERR_PTR(ret); ++} ++ ++COBALT_SYSCALL(thread_setmode, primary, ++ (int clrmask, int setmask, int __user *u_mode_r)) ++{ ++ int ret, old; ++ ++ trace_cobalt_pthread_setmode(clrmask, setmask); ++ ++ ret = pthread_setmode_np(clrmask, setmask, &old); ++ if (ret) ++ return ret; ++ ++ if (u_mode_r && cobalt_copy_to_user(u_mode_r, &old, sizeof(old))) ++ return -EFAULT; ++ ++ return 0; ++} ++ ++COBALT_SYSCALL(thread_setname, current, ++ (unsigned long pth, const char __user *u_name)) ++{ ++ struct cobalt_local_hkey hkey; ++ struct cobalt_thread *thread; ++ char name[XNOBJECT_NAME_LEN]; ++ struct task_struct *p; ++ spl_t s; ++ ++ if (cobalt_strncpy_from_user(name, u_name, ++ sizeof(name) - 1) < 0) ++ return -EFAULT; ++ ++ name[sizeof(name) - 1] = '\0'; ++ hkey.u_pth = pth; ++ hkey.mm = current->mm; ++ ++ trace_cobalt_pthread_setname(pth, name); ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ thread = thread_lookup(&hkey); ++ if (thread == NULL) { ++ xnlock_put_irqrestore(&nklock, s); ++ return -ESRCH; ++ } ++ ++ ksformat(thread->threadbase.name, ++ XNOBJECT_NAME_LEN - 1, "%s", name); ++ p = xnthread_host_task(&thread->threadbase); ++ get_task_struct(p); ++ ++ xnlock_put_irqrestore(&nklock, s); ++ ++ knamecpy(p->comm, name); ++ put_task_struct(p); ++ ++ return 0; ++} ++ ++COBALT_SYSCALL(thread_kill, conforming, ++ (unsigned long pth, int sig)) ++{ ++ struct cobalt_local_hkey hkey; ++ struct cobalt_thread *thread; ++ int ret; ++ spl_t s; ++ ++ trace_cobalt_pthread_kill(pth, sig); ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ hkey.u_pth = pth; ++ hkey.mm = current->mm; ++ thread = thread_lookup(&hkey); ++ if (thread == NULL) ++ ret = -ESRCH; ++ else ++ ret = __cobalt_kill(thread, sig, 0); ++ ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return ret; ++} ++ ++COBALT_SYSCALL(thread_join, primary, (unsigned long pth)) ++{ ++ struct cobalt_local_hkey hkey; ++ struct cobalt_thread *thread; ++ spl_t s; ++ ++ trace_cobalt_pthread_join(pth); ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ hkey.u_pth = pth; ++ hkey.mm = current->mm; ++ thread = thread_lookup(&hkey); ++ ++ xnlock_put_irqrestore(&nklock, s); ++ ++ if (thread == NULL) ++ return -ESRCH; ++ ++ return xnthread_join(&thread->threadbase, false); ++} ++ ++COBALT_SYSCALL(thread_getpid, current, (unsigned long pth)) ++{ ++ struct cobalt_local_hkey hkey; ++ struct cobalt_thread *thread; ++ pid_t pid; ++ spl_t s; ++ ++ trace_cobalt_pthread_pid(pth); ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ hkey.u_pth = pth; ++ hkey.mm = current->mm; ++ thread = thread_lookup(&hkey); ++ if (thread == NULL) ++ pid = -ESRCH; ++ else ++ pid = xnthread_host_pid(&thread->threadbase); ++ ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return pid; ++} ++ ++COBALT_SYSCALL(thread_getstat, current, ++ (pid_t pid, struct cobalt_threadstat __user *u_stat)) ++{ ++ struct cobalt_threadstat stat; ++ struct cobalt_thread *p; ++ struct xnthread *thread; ++ xnticks_t xtime; ++ spl_t s; ++ ++ trace_cobalt_pthread_stat(pid); ++ ++ if (pid == 0) { ++ thread = xnthread_current(); ++ if (thread == NULL) ++ return -EPERM; ++ xnlock_get_irqsave(&nklock, s); ++ } else { ++ xnlock_get_irqsave(&nklock, s); ++ p = cobalt_thread_find(pid); ++ if (p == NULL) { ++ xnlock_put_irqrestore(&nklock, s); ++ return -ESRCH; ++ } ++ thread = &p->threadbase; ++ } ++ ++ /* We have to hold the nklock to keep most values consistent. */ ++ stat.cpu = xnsched_cpu(thread->sched); ++ stat.cprio = xnthread_current_priority(thread); ++ xtime = xnstat_exectime_get_total(&thread->stat.account); ++ if (thread->sched->curr == thread) ++ xtime += xnstat_exectime_now() - ++ xnstat_exectime_get_last_switch(thread->sched); ++ stat.xtime = xnclock_ticks_to_ns(&nkclock, xtime); ++ stat.msw = xnstat_counter_get(&thread->stat.ssw); ++ stat.csw = xnstat_counter_get(&thread->stat.csw); ++ stat.xsc = xnstat_counter_get(&thread->stat.xsc); ++ stat.pf = xnstat_counter_get(&thread->stat.pf); ++ stat.status = xnthread_get_state(thread); ++ if (thread->lock_count > 0) ++ stat.status |= XNLOCK; ++ stat.timeout = xnthread_get_timeout(thread, ++ xnclock_read_monotonic(&nkclock)); ++ strcpy(stat.name, thread->name); ++ strcpy(stat.personality, thread->personality->name); ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return cobalt_copy_to_user(u_stat, &stat, sizeof(stat)); ++} ++ ++#ifdef CONFIG_XENO_OPT_COBALT_EXTENSION ++ ++int cobalt_thread_extend(struct cobalt_extension *ext, ++ void *priv) ++{ ++ struct cobalt_thread *thread = cobalt_current_thread(); ++ struct xnthread_personality *prev; ++ ++ trace_cobalt_pthread_extend(thread->hkey.u_pth, ext->core.name); ++ ++ prev = cobalt_push_personality(ext->core.xid); ++ if (prev == NULL) ++ return -EINVAL; ++ ++ cobalt_set_extref(&thread->extref, ext, priv); ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(cobalt_thread_extend); ++ ++void cobalt_thread_restrict(void) ++{ ++ struct cobalt_thread *thread = cobalt_current_thread(); ++ ++ trace_cobalt_pthread_restrict(thread->hkey.u_pth, ++ thread->threadbase.personality->name); ++ cobalt_pop_personality(&cobalt_personality); ++ cobalt_set_extref(&thread->extref, NULL, NULL); ++} ++EXPORT_SYMBOL_GPL(cobalt_thread_restrict); ++ ++#endif /* !CONFIG_XENO_OPT_COBALT_EXTENSION */ ++ ++const char *cobalt_trace_parse_sched_params(struct trace_seq *p, int policy, ++ struct sched_param_ex *params) ++{ ++ const char *ret = trace_seq_buffer_ptr(p); ++ ++ switch (policy) { ++ case SCHED_QUOTA: ++ trace_seq_printf(p, "priority=%d, group=%d", ++ params->sched_priority, ++ params->sched_quota_group); ++ break; ++ case SCHED_TP: ++ trace_seq_printf(p, "priority=%d, partition=%d", ++ params->sched_priority, ++ params->sched_tp_partition); ++ break; ++ case SCHED_NORMAL: ++ break; ++ case SCHED_SPORADIC: ++ trace_seq_printf(p, "priority=%d, low_priority=%d, " ++ "budget=(%ld.%09ld), period=(%ld.%09ld), " ++ "maxrepl=%d", ++ params->sched_priority, ++ params->sched_ss_low_priority, ++ params->sched_ss_init_budget.tv_sec, ++ params->sched_ss_init_budget.tv_nsec, ++ params->sched_ss_repl_period.tv_sec, ++ params->sched_ss_repl_period.tv_nsec, ++ params->sched_ss_max_repl); ++ break; ++ case SCHED_RR: ++ case SCHED_FIFO: ++ case SCHED_COBALT: ++ case SCHED_WEAK: ++ default: ++ trace_seq_printf(p, "priority=%d", params->sched_priority); ++ break; ++ } ++ trace_seq_putc(p, '\0'); ++ ++ return ret; ++} +--- linux/kernel/xenomai/posix/timer.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/posix/timer.c 2021-04-07 16:01:26.041635861 +0800 +@@ -0,0 +1,588 @@ ++/* ++ * Copyright (C) 2005 Philippe Gerum . ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#include ++#include ++#include ++#include "internal.h" ++#include "thread.h" ++#include "timer.h" ++#include "clock.h" ++#include "signal.h" ++ ++void cobalt_timer_handler(struct xntimer *xntimer) ++{ ++ struct cobalt_timer *timer; ++ /* ++ * Deliver the timer notification via a signal (unless ++ * SIGEV_NONE was given). If we can't do this because the ++ * target thread disappeared, then stop the timer. It will go ++ * away when timer_delete() is called, or the owner's process ++ * exits, whichever comes first. ++ */ ++ timer = container_of(xntimer, struct cobalt_timer, timerbase); ++ if (timer->sigp.si.si_signo && ++ cobalt_signal_send_pid(timer->target, &timer->sigp) == -ESRCH) ++ xntimer_stop(&timer->timerbase); ++} ++EXPORT_SYMBOL_GPL(cobalt_timer_handler); ++ ++static inline struct cobalt_thread * ++timer_init(struct cobalt_timer *timer, ++ const struct sigevent *__restrict__ evp) /* nklocked, IRQs off. */ ++{ ++ struct cobalt_thread *owner = cobalt_current_thread(), *target = NULL; ++ struct xnclock *clock; ++ ++ /* ++ * First, try to offload this operation to the extended ++ * personality the current thread might originate from. ++ */ ++ if (cobalt_initcall_extension(timer_init, &timer->extref, ++ owner, target, evp) && target) ++ return target; ++ ++ /* ++ * Ok, we have no extension available, or we do but it does ++ * not want to overload the standard behavior: handle this ++ * timer the pure Cobalt way then. ++ */ ++ if (evp == NULL || evp->sigev_notify == SIGEV_NONE) { ++ target = owner; /* Assume SIGEV_THREAD_ID. */ ++ goto init; ++ } ++ ++ if (evp->sigev_notify != SIGEV_THREAD_ID) ++ return ERR_PTR(-EINVAL); ++ ++ /* ++ * Recipient thread must be a Xenomai shadow in user-space, ++ * living in the same process than our caller. ++ */ ++ target = cobalt_thread_find_local(evp->sigev_notify_thread_id); ++ if (target == NULL) ++ return ERR_PTR(-EINVAL); ++init: ++ clock = cobalt_clock_find(timer->clockid); ++ if (IS_ERR(clock)) ++ return ERR_PTR(PTR_ERR(clock)); ++ ++ xntimer_init(&timer->timerbase, clock, cobalt_timer_handler, ++ target->threadbase.sched, XNTIMER_UGRAVITY); ++ ++ return target; ++} ++ ++static inline int timer_alloc_id(struct cobalt_process *cc) ++{ ++ int id; ++ ++ id = find_first_bit(cc->timers_map, CONFIG_XENO_OPT_NRTIMERS); ++ if (id == CONFIG_XENO_OPT_NRTIMERS) ++ return -EAGAIN; ++ ++ __clear_bit(id, cc->timers_map); ++ ++ return id; ++} ++ ++static inline void timer_free_id(struct cobalt_process *cc, int id) ++{ ++ __set_bit(id, cc->timers_map); ++} ++ ++struct cobalt_timer * ++cobalt_timer_by_id(struct cobalt_process *cc, timer_t timer_id) ++{ ++ if (timer_id < 0 || timer_id >= CONFIG_XENO_OPT_NRTIMERS) ++ return NULL; ++ ++ if (test_bit(timer_id, cc->timers_map)) ++ return NULL; ++ ++ return cc->timers[timer_id]; ++} ++ ++static inline int timer_create(clockid_t clockid, ++ const struct sigevent *__restrict__ evp, ++ timer_t * __restrict__ timerid) ++{ ++ struct cobalt_process *cc; ++ struct cobalt_thread *target; ++ struct cobalt_timer *timer; ++ int signo, ret = -EINVAL; ++ timer_t timer_id; ++ spl_t s; ++ ++ cc = cobalt_current_process(); ++ if (cc == NULL) ++ return -EPERM; ++ ++ timer = xnmalloc(sizeof(*timer)); ++ if (timer == NULL) ++ return -ENOMEM; ++ ++ timer->sigp.si.si_errno = 0; ++ timer->sigp.si.si_code = SI_TIMER; ++ timer->sigp.si.si_overrun = 0; ++ INIT_LIST_HEAD(&timer->sigp.next); ++ timer->clockid = clockid; ++ timer->overruns = 0; ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ ret = timer_alloc_id(cc); ++ if (ret < 0) ++ goto out; ++ ++ timer_id = ret; ++ ++ if (evp == NULL) { ++ timer->sigp.si.si_int = timer_id; ++ signo = SIGALRM; ++ } else { ++ if (evp->sigev_notify == SIGEV_NONE) ++ signo = 0; /* Don't notify. */ ++ else { ++ signo = evp->sigev_signo; ++ if (signo < 1 || signo > _NSIG) { ++ ret = -EINVAL; ++ goto fail; ++ } ++ timer->sigp.si.si_value = evp->sigev_value; ++ } ++ } ++ ++ timer->sigp.si.si_signo = signo; ++ timer->sigp.si.si_tid = timer_id; ++ timer->id = timer_id; ++ ++ target = timer_init(timer, evp); ++ if (target == NULL) { ++ ret = -EPERM; ++ goto fail; ++ } ++ ++ if (IS_ERR(target)) { ++ ret = PTR_ERR(target); ++ goto fail; ++ } ++ ++ timer->target = xnthread_host_pid(&target->threadbase); ++ cc->timers[timer_id] = timer; ++ ++ xnlock_put_irqrestore(&nklock, s); ++ ++ *timerid = timer_id; ++ ++ return 0; ++fail: ++ timer_free_id(cc, timer_id); ++out: ++ xnlock_put_irqrestore(&nklock, s); ++ ++ xnfree(timer); ++ ++ return ret; ++} ++ ++static void timer_cleanup(struct cobalt_process *p, struct cobalt_timer *timer) ++{ ++ xntimer_destroy(&timer->timerbase); ++ ++ if (!list_empty(&timer->sigp.next)) ++ list_del(&timer->sigp.next); ++ ++ timer_free_id(p, cobalt_timer_id(timer)); ++ p->timers[cobalt_timer_id(timer)] = NULL; ++} ++ ++static inline int ++timer_delete(timer_t timerid) ++{ ++ struct cobalt_process *cc; ++ struct cobalt_timer *timer; ++ int ret = 0; ++ spl_t s; ++ ++ cc = cobalt_current_process(); ++ if (cc == NULL) ++ return -EPERM; ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ timer = cobalt_timer_by_id(cc, timerid); ++ if (timer == NULL) { ++ xnlock_put_irqrestore(&nklock, s); ++ return -EINVAL; ++ } ++ /* ++ * If an extension runs and actually handles the deletion, we ++ * should not call the timer_cleanup extension handler for ++ * this timer, but we shall destroy the core timer. If the ++ * handler returns on error, the whole deletion process is ++ * aborted, leaving the timer untouched. In all other cases, ++ * we do the core timer cleanup work, firing the timer_cleanup ++ * extension handler if defined. ++ */ ++ if (cobalt_call_extension(timer_delete, &timer->extref, ret) && ret < 0) ++ goto out; ++ ++ if (ret == 0) ++ cobalt_call_extension(timer_cleanup, &timer->extref, ret); ++ else ++ ret = 0; ++ ++ timer_cleanup(cc, timer); ++ xnlock_put_irqrestore(&nklock, s); ++ xnfree(timer); ++ ++ return ret; ++ ++out: ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return ret; ++} ++ ++void __cobalt_timer_getval(struct xntimer *__restrict__ timer, ++ struct itimerspec *__restrict__ value) ++{ ++ ns2ts(&value->it_interval, xntimer_interval(timer)); ++ ++ if (!xntimer_running_p(timer)) { ++ value->it_value.tv_sec = 0; ++ value->it_value.tv_nsec = 0; ++ } else { ++ ns2ts(&value->it_value, xntimer_get_timeout(timer)); ++ } ++} ++ ++static inline void ++timer_gettimeout(struct cobalt_timer *__restrict__ timer, ++ struct itimerspec *__restrict__ value) ++{ ++ int ret = 0; ++ ++ if (cobalt_call_extension(timer_gettime, &timer->extref, ++ ret, value) && ret != 0) ++ return; ++ ++ __cobalt_timer_getval(&timer->timerbase, value); ++} ++ ++int __cobalt_timer_setval(struct xntimer *__restrict__ timer, int clock_flag, ++ const struct itimerspec *__restrict__ value) ++{ ++ xnticks_t start, period; ++ ++ if (value->it_value.tv_nsec == 0 && value->it_value.tv_sec == 0) { ++ xntimer_stop(timer); ++ return 0; ++ } ++ ++ if ((unsigned long)value->it_value.tv_nsec >= ONE_BILLION || ++ ((unsigned long)value->it_interval.tv_nsec >= ONE_BILLION && ++ (value->it_value.tv_sec != 0 || value->it_value.tv_nsec != 0))) ++ return -EINVAL; ++ ++ start = ts2ns(&value->it_value) + 1; ++ period = ts2ns(&value->it_interval); ++ ++ /* ++ * Now start the timer. If the timeout data has already ++ * passed, the caller will handle the case. ++ */ ++ return xntimer_start(timer, start, period, clock_flag); ++} ++ ++static inline int timer_set(struct cobalt_timer *timer, int flags, ++ const struct itimerspec *__restrict__ value) ++{ /* nklocked, IRQs off. */ ++ struct cobalt_thread *thread; ++ int ret = 0; ++ ++ /* First, try offloading the work to an extension. */ ++ ++ if (cobalt_call_extension(timer_settime, &timer->extref, ++ ret, value, flags) && ret != 0) ++ return ret < 0 ? ret : 0; ++ ++ /* ++ * No extension, or operation not handled. Default to plain ++ * POSIX behavior. ++ * ++ * If the target thread vanished, just don't start the timer. ++ */ ++ thread = cobalt_thread_find(timer->target); ++ if (thread == NULL) ++ return 0; ++ ++ /* ++ * Make the timer affine to the CPU running the thread to be ++ * signaled if possible. ++ */ ++ xntimer_set_affinity(&timer->timerbase, thread->threadbase.sched); ++ ++ return __cobalt_timer_setval(&timer->timerbase, ++ clock_flag(flags, timer->clockid), value); ++} ++ ++static inline void ++timer_deliver_late(struct cobalt_process *cc, timer_t timerid) ++{ ++ struct cobalt_timer *timer; ++ spl_t s; ++ ++ xnlock_get_irqsave(&nklock, s); ++ /* ++ * We dropped the lock shortly, revalidate the timer handle in ++ * case a deletion slipped in. ++ */ ++ timer = cobalt_timer_by_id(cc, timerid); ++ if (timer) ++ cobalt_timer_handler(&timer->timerbase); ++ ++ xnlock_put_irqrestore(&nklock, s); ++} ++ ++int __cobalt_timer_settime(timer_t timerid, int flags, ++ const struct itimerspec *__restrict__ value, ++ struct itimerspec *__restrict__ ovalue) ++{ ++ struct cobalt_timer *timer; ++ struct cobalt_process *cc; ++ int ret; ++ spl_t s; ++ ++ cc = cobalt_current_process(); ++ XENO_BUG_ON(COBALT, cc == NULL); ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ timer = cobalt_timer_by_id(cc, timerid); ++ if (timer == NULL) { ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ if (ovalue) ++ timer_gettimeout(timer, ovalue); ++ ++ ret = timer_set(timer, flags, value); ++ if (ret == -ETIMEDOUT) { ++ /* ++ * Time has already passed, deliver a notification ++ * immediately. Since we are about to dive into the ++ * signal machinery for this, let's drop the nklock to ++ * break the atomic section temporarily. ++ */ ++ xnlock_put_irqrestore(&nklock, s); ++ timer_deliver_late(cc, timerid); ++ return 0; ++ } ++out: ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return ret; ++} ++ ++int __cobalt_timer_gettime(timer_t timerid, struct itimerspec *value) ++{ ++ struct cobalt_timer *timer; ++ struct cobalt_process *cc; ++ spl_t s; ++ ++ cc = cobalt_current_process(); ++ if (cc == NULL) ++ return -EPERM; ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ timer = cobalt_timer_by_id(cc, timerid); ++ if (timer == NULL) ++ goto fail; ++ ++ timer_gettimeout(timer, value); ++ ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return 0; ++fail: ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return -EINVAL; ++} ++ ++COBALT_SYSCALL(timer_delete, current, (timer_t timerid)) ++{ ++ return timer_delete(timerid); ++} ++ ++int __cobalt_timer_create(clockid_t clock, ++ const struct sigevent *sev, ++ timer_t __user *u_tm) ++{ ++ timer_t timerid = 0; ++ int ret; ++ ++ ret = timer_create(clock, sev, &timerid); ++ if (ret) ++ return ret; ++ ++ if (cobalt_copy_to_user(u_tm, &timerid, sizeof(timerid))) { ++ timer_delete(timerid); ++ return -EFAULT; ++ } ++ ++ return 0; ++} ++ ++COBALT_SYSCALL(timer_create, current, ++ (clockid_t clock, ++ const struct sigevent __user *u_sev, ++ timer_t __user *u_tm)) ++{ ++ struct sigevent sev, *evp = NULL; ++ ++ if (u_sev) { ++ evp = &sev; ++ if (cobalt_copy_from_user(&sev, u_sev, sizeof(sev))) ++ return -EFAULT; ++ } ++ ++ return __cobalt_timer_create(clock, evp, u_tm); ++} ++ ++COBALT_SYSCALL(timer_settime, primary, ++ (timer_t tm, int flags, ++ const struct itimerspec __user *u_newval, ++ struct itimerspec __user *u_oldval)) ++{ ++ struct itimerspec newv, oldv, *oldvp = &oldv; ++ int ret; ++ ++ if (u_oldval == NULL) ++ oldvp = NULL; ++ ++ if (cobalt_copy_from_user(&newv, u_newval, sizeof(newv))) ++ return -EFAULT; ++ ++ ret = __cobalt_timer_settime(tm, flags, &newv, oldvp); ++ if (ret) ++ return ret; ++ ++ if (oldvp && cobalt_copy_to_user(u_oldval, oldvp, sizeof(oldv))) { ++ __cobalt_timer_settime(tm, flags, oldvp, NULL); ++ return -EFAULT; ++ } ++ ++ return 0; ++} ++ ++COBALT_SYSCALL(timer_gettime, current, ++ (timer_t tm, struct itimerspec __user *u_val)) ++{ ++ struct itimerspec val; ++ int ret; ++ ++ ret = __cobalt_timer_gettime(tm, &val); ++ if (ret) ++ return ret; ++ ++ return cobalt_copy_to_user(u_val, &val, sizeof(val)); ++} ++ ++COBALT_SYSCALL(timer_getoverrun, current, (timer_t timerid)) ++{ ++ struct cobalt_timer *timer; ++ struct cobalt_process *cc; ++ int overruns; ++ spl_t s; ++ ++ cc = cobalt_current_process(); ++ if (cc == NULL) ++ return -EPERM; ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ timer = cobalt_timer_by_id(cc, timerid); ++ if (timer == NULL) ++ goto fail; ++ ++ overruns = timer->overruns; ++ ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return overruns; ++fail: ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return -EINVAL; ++} ++ ++int cobalt_timer_deliver(struct cobalt_thread *waiter, timer_t timerid) /* nklocked, IRQs off. */ ++{ ++ struct cobalt_timer *timer; ++ xnticks_t now; ++ ++ timer = cobalt_timer_by_id(cobalt_current_process(), timerid); ++ if (timer == NULL) ++ /* Killed before ultimate delivery, who cares then? */ ++ return 0; ++ ++ if (!xntimer_periodic_p(&timer->timerbase)) ++ timer->overruns = 0; ++ else { ++ now = xnclock_read_raw(xntimer_clock(&timer->timerbase)); ++ timer->overruns = xntimer_get_overruns(&timer->timerbase, ++ &waiter->threadbase, now); ++ if ((unsigned int)timer->overruns > COBALT_DELAYMAX) ++ timer->overruns = COBALT_DELAYMAX; ++ } ++ ++ return timer->overruns; ++} ++ ++void cobalt_timer_reclaim(struct cobalt_process *p) ++{ ++ struct cobalt_timer *timer; ++ unsigned id; ++ spl_t s; ++ int ret; ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ if (find_first_zero_bit(p->timers_map, CONFIG_XENO_OPT_NRTIMERS) == ++ CONFIG_XENO_OPT_NRTIMERS) ++ goto out; ++ ++ for (id = 0; id < ARRAY_SIZE(p->timers); id++) { ++ timer = cobalt_timer_by_id(p, id); ++ if (timer == NULL) ++ continue; ++ ++ cobalt_call_extension(timer_cleanup, &timer->extref, ret); ++ timer_cleanup(p, timer); ++ xnlock_put_irqrestore(&nklock, s); ++ xnfree(timer); ++ xnlock_get_irqsave(&nklock, s); ++ } ++out: ++ xnlock_put_irqrestore(&nklock, s); ++} +--- linux/kernel/xenomai/posix/timer.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/posix/timer.h 2021-04-07 16:01:26.036635868 +0800 +@@ -0,0 +1,86 @@ ++/* ++ * Copyright (C) 2005 Philippe Gerum . ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#ifndef _COBALT_POSIX_TIMER_H ++#define _COBALT_POSIX_TIMER_H ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++struct cobalt_timer { ++ struct xntimer timerbase; ++ timer_t id; ++ int overruns; ++ clockid_t clockid; ++ pid_t target; ++ struct cobalt_sigpending sigp; ++ struct cobalt_extref extref; ++}; ++ ++int cobalt_timer_deliver(struct cobalt_thread *waiter, ++ timer_t timerid); ++ ++void cobalt_timer_reclaim(struct cobalt_process *p); ++ ++static inline timer_t cobalt_timer_id(const struct cobalt_timer *timer) ++{ ++ return timer->id; ++} ++ ++struct cobalt_timer * ++cobalt_timer_by_id(struct cobalt_process *p, timer_t timer_id); ++ ++void cobalt_timer_handler(struct xntimer *xntimer); ++ ++void __cobalt_timer_getval(struct xntimer *__restrict__ timer, ++ struct itimerspec *__restrict__ value); ++ ++int __cobalt_timer_setval(struct xntimer *__restrict__ timer, int clock_flag, ++ const struct itimerspec *__restrict__ value); ++ ++int __cobalt_timer_create(clockid_t clock, ++ const struct sigevent *sev, ++ timer_t __user *u_tm); ++ ++int __cobalt_timer_settime(timer_t timerid, int flags, ++ const struct itimerspec *__restrict__ value, ++ struct itimerspec *__restrict__ ovalue); ++ ++int __cobalt_timer_gettime(timer_t timerid, struct itimerspec *value); ++ ++COBALT_SYSCALL_DECL(timer_create, ++ (clockid_t clock, ++ const struct sigevent __user *u_sev, ++ timer_t __user *u_tm)); ++ ++COBALT_SYSCALL_DECL(timer_delete, (timer_t tm)); ++ ++COBALT_SYSCALL_DECL(timer_settime, ++ (timer_t tm, int flags, ++ const struct itimerspec __user *u_newval, ++ struct itimerspec __user *u_oldval)); ++ ++COBALT_SYSCALL_DECL(timer_gettime, ++ (timer_t tm, struct itimerspec __user *u_val)); ++ ++COBALT_SYSCALL_DECL(timer_getoverrun, (timer_t tm)); ++ ++#endif /* !_COBALT_POSIX_TIMER_H */ +--- linux/kernel/xenomai/posix/gen-syscall-entries.sh 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/posix/gen-syscall-entries.sh 2021-04-07 16:01:26.032635874 +0800 +@@ -0,0 +1,32 @@ ++#! /bin/sh ++ ++set -e ++ ++shift ++ ++awk ' ++match($0, /COBALT_SYSCALL\([^,]*,[ \t]*[^,]*/) { ++ str=substr($0, RSTART + 15, RLENGTH - 15) ++ match(str, /[^, \t]*/) ++ syscall=substr(str, RSTART, RLENGTH) ++ ++ if (syscall == "") { ++ print "Failed to find syscall name in line " $0 > "/dev/stderr" ++ exit 1 ++ } ++ ++ calls = calls " __COBALT_CALL_ENTRY(" syscall ") \\\n" ++ modes = modes " __COBALT_MODE(" str ") \\\n" ++ next ++} ++ ++/COBALT_SYSCALL\(/ { ++ print "Failed to parse line " $0 > "/dev/stderr" ++ exit 1 ++} ++ ++END { ++ print "#define __COBALT_CALL_ENTRIES \\\n" calls " /* end */" ++ print "#define __COBALT_CALL_MODES \\\n" modes " /* end */" ++} ++' $* +--- linux/kernel/xenomai/posix/mqueue.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/posix/mqueue.c 2021-04-07 16:01:26.027635881 +0800 +@@ -0,0 +1,1010 @@ ++/* ++ * Written by Gilles Chanteperdrix . ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include "internal.h" ++#include "thread.h" ++#include "signal.h" ++#include "timer.h" ++#include "mqueue.h" ++#include "clock.h" ++#include ++ ++#define COBALT_MSGMAX 65536 ++#define COBALT_MSGSIZEMAX (16*1024*1024) ++#define COBALT_MSGPRIOMAX 32768 ++ ++struct cobalt_mq { ++ unsigned magic; ++ ++ struct list_head link; ++ ++ struct xnsynch receivers; ++ struct xnsynch senders; ++ size_t memsize; ++ char *mem; ++ struct list_head queued; ++ struct list_head avail; ++ int nrqueued; ++ ++ /* mq_notify */ ++ struct siginfo si; ++ mqd_t target_qd; ++ struct cobalt_thread *target; ++ ++ struct mq_attr attr; ++ ++ unsigned refs; ++ char name[COBALT_MAXNAME]; ++ xnhandle_t handle; ++ ++ DECLARE_XNSELECT(read_select); ++ DECLARE_XNSELECT(write_select); ++}; ++ ++struct cobalt_mqd { ++ struct cobalt_mq *mq; ++ struct rtdm_fd fd; ++}; ++ ++struct cobalt_msg { ++ struct list_head link; ++ unsigned int prio; ++ size_t len; ++ char data[0]; ++}; ++ ++struct cobalt_mqwait_context { ++ struct xnthread_wait_context wc; ++ struct cobalt_msg *msg; ++}; ++ ++static struct mq_attr default_attr = { ++ .mq_maxmsg = 10, ++ .mq_msgsize = 8192, ++}; ++ ++static LIST_HEAD(cobalt_mqq); ++ ++static inline struct cobalt_msg *mq_msg_alloc(struct cobalt_mq *mq) ++{ ++ if (list_empty(&mq->avail)) ++ return NULL; ++ ++ return list_get_entry(&mq->avail, struct cobalt_msg, link); ++} ++ ++static inline void mq_msg_free(struct cobalt_mq *mq, struct cobalt_msg * msg) ++{ ++ list_add(&msg->link, &mq->avail); /* For earliest re-use of the block. */ ++} ++ ++static inline int mq_init(struct cobalt_mq *mq, const struct mq_attr *attr) ++{ ++ unsigned i, msgsize, memsize; ++ char *mem; ++ ++ if (attr == NULL) ++ attr = &default_attr; ++ else { ++ if (attr->mq_maxmsg <= 0 || attr->mq_msgsize <= 0) ++ return -EINVAL; ++ if (attr->mq_maxmsg > COBALT_MSGMAX) ++ return -EINVAL; ++ if (attr->mq_msgsize > COBALT_MSGSIZEMAX) ++ return -EINVAL; ++ } ++ ++ msgsize = attr->mq_msgsize + sizeof(struct cobalt_msg); ++ ++ /* Align msgsize on natural boundary. */ ++ if ((msgsize % sizeof(unsigned long))) ++ msgsize += ++ sizeof(unsigned long) - (msgsize % sizeof(unsigned long)); ++ ++ memsize = msgsize * attr->mq_maxmsg; ++ memsize = PAGE_ALIGN(memsize); ++ if (get_order(memsize) > MAX_ORDER) ++ return -ENOSPC; ++ ++ mem = xnheap_vmalloc(memsize); ++ if (mem == NULL) ++ return -ENOSPC; ++ ++ mq->memsize = memsize; ++ INIT_LIST_HEAD(&mq->queued); ++ mq->nrqueued = 0; ++ xnsynch_init(&mq->receivers, XNSYNCH_PRIO, NULL); ++ xnsynch_init(&mq->senders, XNSYNCH_PRIO, NULL); ++ mq->mem = mem; ++ ++ /* Fill the pool. */ ++ INIT_LIST_HEAD(&mq->avail); ++ for (i = 0; i < attr->mq_maxmsg; i++) { ++ struct cobalt_msg *msg = (struct cobalt_msg *) (mem + i * msgsize); ++ mq_msg_free(mq, msg); ++ } ++ ++ mq->attr = *attr; ++ mq->target = NULL; ++ xnselect_init(&mq->read_select); ++ xnselect_init(&mq->write_select); ++ mq->magic = COBALT_MQ_MAGIC; ++ mq->refs = 2; ++ INIT_LIST_HEAD(&mq->link); ++ ++ return 0; ++} ++ ++static inline void mq_destroy(struct cobalt_mq *mq) ++{ ++ spl_t s; ++ ++ xnlock_get_irqsave(&nklock, s); ++ xnsynch_destroy(&mq->receivers); ++ xnsynch_destroy(&mq->senders); ++ list_del(&mq->link); ++ xnsched_run(); ++ xnlock_put_irqrestore(&nklock, s); ++ xnselect_destroy(&mq->read_select); /* Reschedules. */ ++ xnselect_destroy(&mq->write_select); /* Ditto. */ ++ xnregistry_remove(mq->handle); ++ xnheap_vfree(mq->mem); ++ kfree(mq); ++} ++ ++static int mq_unref_inner(struct cobalt_mq *mq, spl_t s) ++{ ++ int destroy; ++ ++ destroy = --mq->refs == 0; ++ xnlock_put_irqrestore(&nklock, s); ++ ++ if (destroy) ++ mq_destroy(mq); ++ ++ return destroy; ++} ++ ++static int mq_unref(struct cobalt_mq *mq) ++{ ++ spl_t s; ++ ++ xnlock_get_irqsave(&nklock, s); ++ return mq_unref_inner(mq, s); ++} ++ ++static void mqd_close(struct rtdm_fd *fd) ++{ ++ struct cobalt_mqd *mqd = container_of(fd, struct cobalt_mqd, fd); ++ struct cobalt_mq *mq = mqd->mq; ++ ++ kfree(mqd); ++ mq_unref(mq); ++} ++ ++int ++mqd_select(struct rtdm_fd *fd, struct xnselector *selector, ++ unsigned type, unsigned index) ++{ ++ struct cobalt_mqd *mqd = container_of(fd, struct cobalt_mqd, fd); ++ struct xnselect_binding *binding; ++ struct cobalt_mq *mq; ++ int err; ++ spl_t s; ++ ++ if (type == XNSELECT_READ || type == XNSELECT_WRITE) { ++ binding = xnmalloc(sizeof(*binding)); ++ if (!binding) ++ return -ENOMEM; ++ } else ++ return -EBADF; ++ ++ xnlock_get_irqsave(&nklock, s); ++ mq = mqd->mq; ++ ++ switch(type) { ++ case XNSELECT_READ: ++ err = -EBADF; ++ if ((rtdm_fd_flags(fd) & COBALT_PERMS_MASK) == O_WRONLY) ++ goto unlock_and_error; ++ ++ err = xnselect_bind(&mq->read_select, binding, ++ selector, type, index, ++ !list_empty(&mq->queued)); ++ if (err) ++ goto unlock_and_error; ++ break; ++ ++ case XNSELECT_WRITE: ++ err = -EBADF; ++ if ((rtdm_fd_flags(fd) & COBALT_PERMS_MASK) == O_RDONLY) ++ goto unlock_and_error; ++ ++ err = xnselect_bind(&mq->write_select, binding, ++ selector, type, index, ++ !list_empty(&mq->avail)); ++ if (err) ++ goto unlock_and_error; ++ break; ++ } ++ xnlock_put_irqrestore(&nklock, s); ++ return 0; ++ ++ unlock_and_error: ++ xnlock_put_irqrestore(&nklock, s); ++ xnfree(binding); ++ return err; ++} ++ ++static struct rtdm_fd_ops mqd_ops = { ++ .close = mqd_close, ++ .select = mqd_select, ++}; ++ ++static inline int mqd_create(struct cobalt_mq *mq, unsigned long flags, int ufd) ++{ ++ struct cobalt_mqd *mqd; ++ int ret; ++ ++ if (cobalt_ppd_get(0) == &cobalt_kernel_ppd) ++ return -EPERM; ++ ++ mqd = kmalloc(sizeof(*mqd), GFP_KERNEL); ++ if (mqd == NULL) ++ return -ENOSPC; ++ ++ mqd->fd.oflags = flags; ++ mqd->mq = mq; ++ ++ ret = rtdm_fd_enter(&mqd->fd, ufd, COBALT_MQD_MAGIC, &mqd_ops); ++ if (ret < 0) ++ return ret; ++ ++ return rtdm_fd_register(&mqd->fd, ufd); ++} ++ ++static int mq_open(int uqd, const char *name, int oflags, ++ int mode, struct mq_attr *attr) ++{ ++ struct cobalt_mq *mq; ++ xnhandle_t handle; ++ spl_t s; ++ int err; ++ ++ if (name[0] != '/' || name[1] == '\0') ++ return -EINVAL; ++ ++ retry_bind: ++ err = xnregistry_bind(&name[1], XN_NONBLOCK, XN_RELATIVE, &handle); ++ switch (err) { ++ case 0: ++ /* Found */ ++ if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) ++ return -EEXIST; ++ ++ xnlock_get_irqsave(&nklock, s); ++ mq = xnregistry_lookup(handle, NULL); ++ if (mq && mq->magic != COBALT_MQ_MAGIC) { ++ xnlock_put_irqrestore(&nklock, s); ++ return -EINVAL; ++ } ++ ++ if (mq) { ++ ++mq->refs; ++ xnlock_put_irqrestore(&nklock, s); ++ } else { ++ xnlock_put_irqrestore(&nklock, s); ++ goto retry_bind; ++ } ++ ++ err = mqd_create(mq, oflags & (O_NONBLOCK | COBALT_PERMS_MASK), ++ uqd); ++ if (err < 0) { ++ mq_unref(mq); ++ return err; ++ } ++ break; ++ ++ case -EWOULDBLOCK: ++ /* Not found */ ++ if ((oflags & O_CREAT) == 0) ++ return (mqd_t)-ENOENT; ++ ++ mq = kmalloc(sizeof(*mq), GFP_KERNEL); ++ if (mq == NULL) ++ return -ENOSPC; ++ ++ err = mq_init(mq, attr); ++ if (err) { ++ kfree(mq); ++ return err; ++ } ++ ++ snprintf(mq->name, sizeof(mq->name), "%s", &name[1]); ++ ++ err = mqd_create(mq, oflags & (O_NONBLOCK | COBALT_PERMS_MASK), ++ uqd); ++ if (err < 0) { ++ mq_destroy(mq); ++ return err; ++ } ++ ++ xnlock_get_irqsave(&nklock, s); ++ err = xnregistry_enter(mq->name, mq, &mq->handle, NULL); ++ if (err < 0) ++ --mq->refs; ++ else ++ list_add_tail(&mq->link, &cobalt_mqq); ++ xnlock_put_irqrestore(&nklock, s); ++ if (err < 0) { ++ rtdm_fd_close(uqd, COBALT_MQD_MAGIC); ++ if (err == -EEXIST) ++ goto retry_bind; ++ return err; ++ } ++ break; ++ ++ default: ++ return err; ++ } ++ ++ return 0; ++} ++ ++static inline int mq_close(mqd_t fd) ++{ ++ return rtdm_fd_close(fd, COBALT_MQD_MAGIC); ++} ++ ++static inline int mq_unlink(const char *name) ++{ ++ struct cobalt_mq *mq; ++ xnhandle_t handle; ++ spl_t s; ++ int err; ++ ++ if (name[0] != '/' || name[1] == '\0') ++ return -EINVAL; ++ ++ err = xnregistry_bind(&name[1], XN_NONBLOCK, XN_RELATIVE, &handle); ++ if (err == -EWOULDBLOCK) ++ return -ENOENT; ++ if (err) ++ return err; ++ ++ xnlock_get_irqsave(&nklock, s); ++ mq = xnregistry_lookup(handle, NULL); ++ if (!mq) { ++ err = -ENOENT; ++ goto err_unlock; ++ } ++ if (mq->magic != COBALT_MQ_MAGIC) { ++ err = -EINVAL; ++ err_unlock: ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return err; ++ } ++ if (mq_unref_inner(mq, s) == 0) ++ xnregistry_unlink(&name[1]); ++ return 0; ++} ++ ++static inline struct cobalt_msg * ++mq_trysend(struct cobalt_mqd *mqd, size_t len) ++{ ++ struct cobalt_msg *msg; ++ struct cobalt_mq *mq; ++ unsigned flags; ++ ++ mq = mqd->mq; ++ flags = rtdm_fd_flags(&mqd->fd) & COBALT_PERMS_MASK; ++ ++ if (flags != O_WRONLY && flags != O_RDWR) ++ return ERR_PTR(-EBADF); ++ ++ if (len > mq->attr.mq_msgsize) ++ return ERR_PTR(-EMSGSIZE); ++ ++ msg = mq_msg_alloc(mq); ++ if (msg == NULL) ++ return ERR_PTR(-EAGAIN); ++ ++ if (list_empty(&mq->avail)) ++ xnselect_signal(&mq->write_select, 0); ++ ++ return msg; ++} ++ ++static inline struct cobalt_msg * ++mq_tryrcv(struct cobalt_mqd *mqd, size_t len) ++{ ++ struct cobalt_msg *msg; ++ unsigned int flags; ++ struct cobalt_mq *mq; ++ ++ mq = mqd->mq; ++ flags = rtdm_fd_flags(&mqd->fd) & COBALT_PERMS_MASK; ++ ++ if (flags != O_RDONLY && flags != O_RDWR) ++ return ERR_PTR(-EBADF); ++ ++ if (len < mq->attr.mq_msgsize) ++ return ERR_PTR(-EMSGSIZE); ++ ++ if (list_empty(&mq->queued)) ++ return ERR_PTR(-EAGAIN); ++ ++ msg = list_get_entry(&mq->queued, struct cobalt_msg, link); ++ mq->nrqueued--; ++ ++ if (list_empty(&mq->queued)) ++ xnselect_signal(&mq->read_select, 0); ++ ++ return msg; ++} ++ ++static struct cobalt_msg * ++mq_timedsend_inner(struct cobalt_mqd *mqd, ++ size_t len, const void __user *u_ts, ++ int (*fetch_timeout)(struct timespec *ts, ++ const void __user *u_ts)) ++{ ++ struct cobalt_mqwait_context mwc; ++ struct cobalt_msg *msg; ++ struct cobalt_mq *mq; ++ struct timespec ts; ++ xntmode_t tmode; ++ xnticks_t to; ++ spl_t s; ++ int ret; ++ ++ to = XN_INFINITE; ++ tmode = XN_RELATIVE; ++redo: ++ xnlock_get_irqsave(&nklock, s); ++ msg = mq_trysend(mqd, len); ++ if (msg != ERR_PTR(-EAGAIN)) ++ goto out; ++ ++ if (rtdm_fd_flags(&mqd->fd) & O_NONBLOCK) ++ goto out; ++ ++ if (fetch_timeout) { ++ xnlock_put_irqrestore(&nklock, s); ++ ret = fetch_timeout(&ts, u_ts); ++ if (ret) ++ return ERR_PTR(ret); ++ if ((unsigned long)ts.tv_nsec >= ONE_BILLION) ++ return ERR_PTR(-EINVAL); ++ to = ts2ns(&ts) + 1; ++ tmode = XN_REALTIME; ++ fetch_timeout = NULL; ++ goto redo; ++ } ++ ++ mq = mqd->mq; ++ xnthread_prepare_wait(&mwc.wc); ++ ret = xnsynch_sleep_on(&mq->senders, to, tmode); ++ if (ret) { ++ if (ret & XNBREAK) ++ msg = ERR_PTR(-EINTR); ++ else if (ret & XNTIMEO) ++ msg = ERR_PTR(-ETIMEDOUT); ++ else if (ret & XNRMID) ++ msg = ERR_PTR(-EBADF); ++ } else ++ msg = mwc.msg; ++out: ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return msg; ++} ++ ++static void mq_release_msg(struct cobalt_mq *mq, struct cobalt_msg *msg) ++{ ++ struct cobalt_mqwait_context *mwc; ++ struct xnthread_wait_context *wc; ++ struct xnthread *thread; ++ ++ /* ++ * Try passing the free message slot to a waiting sender, link ++ * it to the free queue otherwise. ++ */ ++ if (xnsynch_pended_p(&mq->senders)) { ++ thread = xnsynch_wakeup_one_sleeper(&mq->senders); ++ wc = xnthread_get_wait_context(thread); ++ mwc = container_of(wc, struct cobalt_mqwait_context, wc); ++ mwc->msg = msg; ++ xnthread_complete_wait(wc); ++ } else { ++ mq_msg_free(mq, msg); ++ if (list_is_singular(&mq->avail)) ++ xnselect_signal(&mq->write_select, 1); ++ } ++} ++ ++static int ++mq_finish_send(struct cobalt_mqd *mqd, struct cobalt_msg *msg) ++{ ++ struct cobalt_mqwait_context *mwc; ++ struct xnthread_wait_context *wc; ++ struct cobalt_sigpending *sigp; ++ struct xnthread *thread; ++ struct cobalt_mq *mq; ++ spl_t s; ++ ++ mq = mqd->mq; ++ ++ xnlock_get_irqsave(&nklock, s); ++ /* Can we do pipelined sending? */ ++ if (xnsynch_pended_p(&mq->receivers)) { ++ thread = xnsynch_wakeup_one_sleeper(&mq->receivers); ++ wc = xnthread_get_wait_context(thread); ++ mwc = container_of(wc, struct cobalt_mqwait_context, wc); ++ mwc->msg = msg; ++ xnthread_complete_wait(wc); ++ } else { ++ /* Nope, have to go through the queue. */ ++ list_add_priff(msg, &mq->queued, prio, link); ++ mq->nrqueued++; ++ ++ /* ++ * If first message and no pending reader, send a ++ * signal if notification was enabled via mq_notify(). ++ */ ++ if (list_is_singular(&mq->queued)) { ++ xnselect_signal(&mq->read_select, 1); ++ if (mq->target) { ++ sigp = cobalt_signal_alloc(); ++ if (sigp) { ++ cobalt_copy_siginfo(SI_MESGQ, &sigp->si, &mq->si); ++ if (cobalt_signal_send(mq->target, sigp, 0) <= 0) ++ cobalt_signal_free(sigp); ++ } ++ mq->target = NULL; ++ } ++ } ++ } ++ xnsched_run(); ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return 0; ++} ++ ++static struct cobalt_msg * ++mq_timedrcv_inner(struct cobalt_mqd *mqd, ++ size_t len, ++ const void __user *u_ts, ++ int (*fetch_timeout)(struct timespec *ts, ++ const void __user *u_ts)) ++{ ++ struct cobalt_mqwait_context mwc; ++ struct cobalt_msg *msg; ++ struct cobalt_mq *mq; ++ struct timespec ts; ++ xntmode_t tmode; ++ xnticks_t to; ++ spl_t s; ++ int ret; ++ ++ to = XN_INFINITE; ++ tmode = XN_RELATIVE; ++redo: ++ xnlock_get_irqsave(&nklock, s); ++ msg = mq_tryrcv(mqd, len); ++ if (msg != ERR_PTR(-EAGAIN)) ++ goto out; ++ ++ if (rtdm_fd_flags(&mqd->fd) & O_NONBLOCK) ++ goto out; ++ ++ if (fetch_timeout) { ++ xnlock_put_irqrestore(&nklock, s); ++ ret = fetch_timeout(&ts, u_ts); ++ if (ret) ++ return ERR_PTR(ret); ++ if (ts.tv_nsec >= ONE_BILLION) ++ return ERR_PTR(-EINVAL); ++ to = ts2ns(&ts) + 1; ++ tmode = XN_REALTIME; ++ fetch_timeout = NULL; ++ goto redo; ++ } ++ ++ mq = mqd->mq; ++ xnthread_prepare_wait(&mwc.wc); ++ ret = xnsynch_sleep_on(&mq->receivers, to, tmode); ++ if (ret == 0) ++ msg = mwc.msg; ++ else if (ret & XNRMID) ++ msg = ERR_PTR(-EBADF); ++ else if (ret & XNTIMEO) ++ msg = ERR_PTR(-ETIMEDOUT); ++ else ++ msg = ERR_PTR(-EINTR); ++out: ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return msg; ++} ++ ++static int ++mq_finish_rcv(struct cobalt_mqd *mqd, struct cobalt_msg *msg) ++{ ++ spl_t s; ++ ++ xnlock_get_irqsave(&nklock, s); ++ mq_release_msg(mqd->mq, msg); ++ xnsched_run(); ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return 0; ++} ++ ++static inline int mq_getattr(struct cobalt_mqd *mqd, struct mq_attr *attr) ++{ ++ struct cobalt_mq *mq; ++ spl_t s; ++ ++ mq = mqd->mq; ++ *attr = mq->attr; ++ xnlock_get_irqsave(&nklock, s); ++ attr->mq_flags = rtdm_fd_flags(&mqd->fd); ++ attr->mq_curmsgs = mq->nrqueued; ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return 0; ++} ++ ++static inline int ++mq_notify(struct cobalt_mqd *mqd, unsigned index, const struct sigevent *evp) ++{ ++ struct cobalt_thread *thread = cobalt_current_thread(); ++ struct cobalt_mq *mq; ++ int err; ++ spl_t s; ++ ++ if (evp && ((evp->sigev_notify != SIGEV_SIGNAL && ++ evp->sigev_notify != SIGEV_NONE) || ++ (unsigned int)(evp->sigev_signo - 1) > SIGRTMAX - 1)) ++ return -EINVAL; ++ ++ if (xnsched_interrupt_p() || thread == NULL) ++ return -EPERM; ++ ++ xnlock_get_irqsave(&nklock, s); ++ mq = mqd->mq; ++ if (mq->target && mq->target != thread) { ++ err = -EBUSY; ++ goto unlock_and_error; ++ } ++ ++ if (evp == NULL || evp->sigev_notify == SIGEV_NONE) ++ /* Here, mq->target == cobalt_current_thread() or NULL. */ ++ mq->target = NULL; ++ else { ++ mq->target = thread; ++ mq->target_qd = index; ++ mq->si.si_signo = evp->sigev_signo; ++ mq->si.si_errno = 0; ++ mq->si.si_code = SI_MESGQ; ++ mq->si.si_value = evp->sigev_value; ++ /* ++ * XXX: we differ from the regular kernel here, which ++ * passes the sender's pid/uid data into the ++ * receiver's namespaces. We pass the receiver's creds ++ * into the init namespace instead. ++ */ ++ mq->si.si_pid = task_pid_nr(current); ++ mq->si.si_uid = get_current_uuid(); ++ } ++ ++ xnlock_put_irqrestore(&nklock, s); ++ return 0; ++ ++ unlock_and_error: ++ xnlock_put_irqrestore(&nklock, s); ++ return err; ++} ++ ++static inline struct cobalt_mqd *cobalt_mqd_get(mqd_t ufd) ++{ ++ struct rtdm_fd *fd; ++ ++ fd = rtdm_fd_get(ufd, COBALT_MQD_MAGIC); ++ if (IS_ERR(fd)) { ++ int err = PTR_ERR(fd); ++ if (err == -EBADF && cobalt_current_process() == NULL) ++ err = -EPERM; ++ return ERR_PTR(err); ++ } ++ ++ return container_of(fd, struct cobalt_mqd, fd); ++} ++ ++static inline void cobalt_mqd_put(struct cobalt_mqd *mqd) ++{ ++ rtdm_fd_put(&mqd->fd); ++} ++ ++int __cobalt_mq_notify(mqd_t fd, const struct sigevent *evp) ++{ ++ struct cobalt_mqd *mqd; ++ int ret; ++ ++ mqd = cobalt_mqd_get(fd); ++ if (IS_ERR(mqd)) ++ ret = PTR_ERR(mqd); ++ else { ++ trace_cobalt_mq_notify(fd, evp); ++ ret = mq_notify(mqd, fd, evp); ++ cobalt_mqd_put(mqd); ++ } ++ ++ return ret; ++} ++ ++COBALT_SYSCALL(mq_notify, primary, ++ (mqd_t fd, const struct sigevent *__user evp)) ++{ ++ struct sigevent sev; ++ ++ if (evp && cobalt_copy_from_user(&sev, evp, sizeof(sev))) ++ return -EFAULT; ++ ++ return __cobalt_mq_notify(fd, evp ? &sev : NULL); ++} ++ ++int __cobalt_mq_open(const char __user *u_name, int oflags, ++ mode_t mode, struct mq_attr *attr) ++{ ++ char name[COBALT_MAXNAME]; ++ unsigned int len; ++ mqd_t uqd; ++ int ret; ++ ++ len = cobalt_strncpy_from_user(name, u_name, sizeof(name)); ++ if (len < 0) ++ return -EFAULT; ++ ++ if (len >= sizeof(name)) ++ return -ENAMETOOLONG; ++ ++ if (len == 0) ++ return -EINVAL; ++ ++ trace_cobalt_mq_open(name, oflags, mode); ++ ++ uqd = __rtdm_anon_getfd("[cobalt-mq]", oflags); ++ if (uqd < 0) ++ return uqd; ++ ++ ret = mq_open(uqd, name, oflags, mode, attr); ++ if (ret < 0) { ++ __rtdm_anon_putfd(uqd); ++ return ret; ++ } ++ ++ return uqd; ++} ++ ++COBALT_SYSCALL(mq_open, lostage, ++ (const char __user *u_name, int oflags, ++ mode_t mode, struct mq_attr __user *u_attr)) ++{ ++ struct mq_attr _attr, *attr = &_attr; ++ ++ if ((oflags & O_CREAT) && u_attr) { ++ if (cobalt_copy_from_user(&_attr, u_attr, sizeof(_attr))) ++ return -EFAULT; ++ } else ++ attr = NULL; ++ ++ return __cobalt_mq_open(u_name, oflags, mode, attr); ++} ++ ++COBALT_SYSCALL(mq_close, lostage, (mqd_t uqd)) ++{ ++ trace_cobalt_mq_close(uqd); ++ ++ return mq_close(uqd); ++} ++ ++COBALT_SYSCALL(mq_unlink, lostage, (const char __user *u_name)) ++{ ++ char name[COBALT_MAXNAME]; ++ unsigned len; ++ ++ len = cobalt_strncpy_from_user(name, u_name, sizeof(name)); ++ if (len < 0) ++ return -EFAULT; ++ if (len >= sizeof(name)) ++ return -ENAMETOOLONG; ++ ++ trace_cobalt_mq_unlink(name); ++ ++ return mq_unlink(name); ++} ++ ++int __cobalt_mq_getattr(mqd_t uqd, struct mq_attr *attr) ++{ ++ struct cobalt_mqd *mqd; ++ int ret; ++ ++ mqd = cobalt_mqd_get(uqd); ++ if (IS_ERR(mqd)) ++ return PTR_ERR(mqd); ++ ++ ret = mq_getattr(mqd, attr); ++ cobalt_mqd_put(mqd); ++ if (ret) ++ return ret; ++ ++ trace_cobalt_mq_getattr(uqd, attr); ++ ++ return 0; ++} ++ ++COBALT_SYSCALL(mq_getattr, current, ++ (mqd_t uqd, struct mq_attr __user *u_attr)) ++{ ++ struct mq_attr attr; ++ int ret; ++ ++ ret = __cobalt_mq_getattr(uqd, &attr); ++ if (ret) ++ return ret; ++ ++ return cobalt_copy_to_user(u_attr, &attr, sizeof(attr)); ++} ++ ++static inline int mq_fetch_timeout(struct timespec *ts, ++ const void __user *u_ts) ++{ ++ return u_ts == NULL ? -EFAULT : ++ cobalt_copy_from_user(ts, u_ts, sizeof(*ts)); ++} ++ ++int __cobalt_mq_timedsend(mqd_t uqd, const void __user *u_buf, size_t len, ++ unsigned int prio, const void __user *u_ts, ++ int (*fetch_timeout)(struct timespec *ts, ++ const void __user *u_ts)) ++{ ++ struct cobalt_msg *msg; ++ struct cobalt_mqd *mqd; ++ int ret; ++ ++ mqd = cobalt_mqd_get(uqd); ++ if (IS_ERR(mqd)) ++ return PTR_ERR(mqd); ++ ++ if (prio >= COBALT_MSGPRIOMAX) { ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ if (len > 0 && !access_rok(u_buf, len)) { ++ ret = -EFAULT; ++ goto out; ++ } ++ ++ trace_cobalt_mq_send(uqd, u_buf, len, prio); ++ msg = mq_timedsend_inner(mqd, len, u_ts, fetch_timeout); ++ if (IS_ERR(msg)) { ++ ret = PTR_ERR(msg); ++ goto out; ++ } ++ ++ ret = cobalt_copy_from_user(msg->data, u_buf, len); ++ if (ret) { ++ mq_finish_rcv(mqd, msg); ++ goto out; ++ } ++ msg->len = len; ++ msg->prio = prio; ++ ret = mq_finish_send(mqd, msg); ++out: ++ cobalt_mqd_put(mqd); ++ ++ return ret; ++} ++ ++COBALT_SYSCALL(mq_timedsend, primary, ++ (mqd_t uqd, const void __user *u_buf, size_t len, ++ unsigned int prio, const struct timespec __user *u_ts)) ++{ ++ return __cobalt_mq_timedsend(uqd, u_buf, len, prio, ++ u_ts, u_ts ? mq_fetch_timeout : NULL); ++} ++ ++int __cobalt_mq_timedreceive(mqd_t uqd, void __user *u_buf, ++ ssize_t *lenp, ++ unsigned int __user *u_prio, ++ const void __user *u_ts, ++ int (*fetch_timeout)(struct timespec *ts, ++ const void __user *u_ts)) ++{ ++ struct cobalt_mqd *mqd; ++ struct cobalt_msg *msg; ++ unsigned int prio; ++ int ret; ++ ++ mqd = cobalt_mqd_get(uqd); ++ if (IS_ERR(mqd)) ++ return PTR_ERR(mqd); ++ ++ if (*lenp > 0 && !access_wok(u_buf, *lenp)) { ++ ret = -EFAULT; ++ goto fail; ++ } ++ ++ msg = mq_timedrcv_inner(mqd, *lenp, u_ts, fetch_timeout); ++ if (IS_ERR(msg)) { ++ ret = PTR_ERR(msg); ++ goto fail; ++ } ++ ++ ret = cobalt_copy_to_user(u_buf, msg->data, msg->len); ++ if (ret) { ++ mq_finish_rcv(mqd, msg); ++ goto fail; ++ } ++ ++ *lenp = msg->len; ++ prio = msg->prio; ++ ret = mq_finish_rcv(mqd, msg); ++ if (ret) ++ goto fail; ++ ++ cobalt_mqd_put(mqd); ++ ++ if (u_prio && __xn_put_user(prio, u_prio)) ++ return -EFAULT; ++ ++ return 0; ++fail: ++ cobalt_mqd_put(mqd); ++ ++ return ret; ++} ++ ++COBALT_SYSCALL(mq_timedreceive, primary, ++ (mqd_t uqd, void __user *u_buf, ++ ssize_t __user *u_len, ++ unsigned int __user *u_prio, ++ const struct timespec __user *u_ts)) ++{ ++ ssize_t len; ++ int ret; ++ ++ ret = cobalt_copy_from_user(&len, u_len, sizeof(len)); ++ if (ret) ++ return ret; ++ ++ ret = __cobalt_mq_timedreceive(uqd, u_buf, &len, u_prio, ++ u_ts, u_ts ? mq_fetch_timeout : NULL); ++ ++ return ret ?: cobalt_copy_to_user(u_len, &len, sizeof(*u_len)); ++} +--- linux/kernel/xenomai/posix/mqueue.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/posix/mqueue.h 2021-04-07 16:01:26.022635888 +0800 +@@ -0,0 +1,74 @@ ++/* ++ * Written by Gilles Chanteperdrix . ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#ifndef _COBALT_POSIX_MQUEUE_H ++#define _COBALT_POSIX_MQUEUE_H ++ ++#include ++#include ++#include ++ ++struct mq_attr { ++ long mq_flags; ++ long mq_maxmsg; ++ long mq_msgsize; ++ long mq_curmsgs; ++}; ++ ++int __cobalt_mq_open(const char __user *u_name, int oflags, ++ mode_t mode, struct mq_attr *attr); ++ ++int __cobalt_mq_getattr(mqd_t uqd, struct mq_attr *attr); ++ ++int __cobalt_mq_timedsend(mqd_t uqd, const void __user *u_buf, size_t len, ++ unsigned int prio, const void __user *u_ts, ++ int (*fetch_timeout)(struct timespec *ts, ++ const void __user *u_ts)); ++ ++int __cobalt_mq_timedreceive(mqd_t uqd, void __user *u_buf, ++ ssize_t *lenp, ++ unsigned int __user *u_prio, ++ const void __user *u_ts, ++ int (*fetch_timeout)(struct timespec *ts, ++ const void __user *u_ts)); ++ ++int __cobalt_mq_notify(mqd_t fd, const struct sigevent *evp); ++ ++COBALT_SYSCALL_DECL(mq_open, ++ (const char __user *u_name, int oflags, ++ mode_t mode, struct mq_attr __user *u_attr)); ++ ++COBALT_SYSCALL_DECL(mq_close, (mqd_t uqd)); ++ ++COBALT_SYSCALL_DECL(mq_unlink, (const char __user *u_name)); ++ ++COBALT_SYSCALL_DECL(mq_getattr, (mqd_t uqd, struct mq_attr __user *u_attr)); ++ ++COBALT_SYSCALL_DECL(mq_timedsend, ++ (mqd_t uqd, const void __user *u_buf, size_t len, ++ unsigned int prio, const struct timespec __user *u_ts)); ++ ++COBALT_SYSCALL_DECL(mq_timedreceive, ++ (mqd_t uqd, void __user *u_buf, ssize_t __user *u_len, ++ unsigned int __user *u_prio, ++ const struct timespec __user *u_ts)); ++ ++COBALT_SYSCALL_DECL(mq_notify, ++ (mqd_t fd, const struct sigevent *__user evp)); ++ ++#endif /* !_COBALT_POSIX_MQUEUE_H */ +--- linux/kernel/xenomai/posix/mutex.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/posix/mutex.h 2021-04-07 16:01:26.018635894 +0800 +@@ -0,0 +1,76 @@ ++/* ++ * Written by Gilles Chanteperdrix . ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#ifndef _COBALT_POSIX_MUTEX_H ++#define _COBALT_POSIX_MUTEX_H ++ ++#include "thread.h" ++#include ++#include ++#include ++ ++struct cobalt_process; ++ ++struct cobalt_mutex { ++ unsigned int magic; ++ struct xnsynch synchbase; ++ /** cobalt_mutexq */ ++ struct list_head conds; ++ struct cobalt_mutexattr attr; ++ struct cobalt_resnode resnode; ++}; ++ ++int __cobalt_mutex_timedlock_break(struct cobalt_mutex_shadow __user *u_mx, ++ const void __user *u_ts, ++ int (*fetch_timeout)(struct timespec *ts, ++ const void __user *u_ts)); ++ ++int __cobalt_mutex_acquire_unchecked(struct xnthread *cur, ++ struct cobalt_mutex *mutex, ++ const struct timespec *ts); ++ ++COBALT_SYSCALL_DECL(mutex_check_init, ++ (struct cobalt_mutex_shadow __user *u_mx)); ++ ++COBALT_SYSCALL_DECL(mutex_init, ++ (struct cobalt_mutex_shadow __user *u_mx, ++ const struct cobalt_mutexattr __user *u_attr)); ++ ++COBALT_SYSCALL_DECL(mutex_destroy, ++ (struct cobalt_mutex_shadow __user *u_mx)); ++ ++COBALT_SYSCALL_DECL(mutex_trylock, ++ (struct cobalt_mutex_shadow __user *u_mx)); ++ ++COBALT_SYSCALL_DECL(mutex_lock, ++ (struct cobalt_mutex_shadow __user *u_mx)); ++ ++COBALT_SYSCALL_DECL(mutex_timedlock, ++ (struct cobalt_mutex_shadow __user *u_mx, ++ const struct timespec __user *u_ts)); ++ ++COBALT_SYSCALL_DECL(mutex_unlock, ++ (struct cobalt_mutex_shadow __user *u_mx)); ++ ++int cobalt_mutex_release(struct xnthread *cur, ++ struct cobalt_mutex *mutex); ++ ++void cobalt_mutex_reclaim(struct cobalt_resnode *node, ++ spl_t s); ++ ++#endif /* !_COBALT_POSIX_MUTEX_H */ +--- linux/kernel/xenomai/posix/clock.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/posix/clock.c 2021-04-07 16:01:26.013635901 +0800 +@@ -0,0 +1,443 @@ ++/* ++ * Written by Gilles Chanteperdrix . ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#include ++#include ++#include ++#include ++#include "internal.h" ++#include "thread.h" ++#include "clock.h" ++#include ++ ++static struct xnclock *external_clocks[COBALT_MAX_EXTCLOCKS]; ++ ++DECLARE_BITMAP(cobalt_clock_extids, COBALT_MAX_EXTCLOCKS); ++ ++static int do_clock_host_realtime(struct timespec *tp) ++{ ++#ifdef CONFIG_XENO_OPT_HOSTRT ++ struct xnvdso_hostrt_data *hostrt_data; ++ u64 now, base, mask, cycle_delta; ++ __u32 mult, shift; ++ unsigned long rem; ++ urwstate_t tmp; ++ __u64 nsec; ++ ++ hostrt_data = get_hostrt_data(); ++ BUG_ON(!hostrt_data); ++ ++ if (unlikely(!hostrt_data->live)) ++ return -1; ++ ++ /* ++ * Note: Disabling HW interrupts around writes to hostrt_data ++ * ensures that a reader (on the Xenomai side) cannot ++ * interrupt a writer (on the Linux kernel side) on the same ++ * CPU. The urw block is required when a reader is ++ * interleaved by a writer on a different CPU. This follows ++ * the approach from userland, where taking the spinlock is ++ * not possible. ++ */ ++ unsynced_read_block(&tmp, &hostrt_data->lock) { ++ now = xnclock_read_raw(&nkclock); ++ base = hostrt_data->cycle_last; ++ mask = hostrt_data->mask; ++ mult = hostrt_data->mult; ++ shift = hostrt_data->shift; ++ tp->tv_sec = hostrt_data->wall_sec; ++ nsec = hostrt_data->wall_nsec; ++ } ++ ++ /* ++ * At this point, we have a consistent copy of the fundamental ++ * data structure - calculate the interval between the current ++ * and base time stamp cycles, and convert the difference ++ * to nanoseconds. ++ */ ++ cycle_delta = (now - base) & mask; ++ nsec += (cycle_delta * mult) >> shift; ++ ++ /* Convert to the desired sec, usec representation */ ++ tp->tv_sec += xnclock_divrem_billion(nsec, &rem); ++ tp->tv_nsec = rem; ++ ++ return 0; ++#else /* CONFIG_XENO_OPT_HOSTRT */ ++ return -EINVAL; ++#endif ++} ++ ++#define do_ext_clock(__clock_id, __handler, __ret, __args...) \ ++({ \ ++ struct xnclock *__clock; \ ++ int __val = 0, __nr; \ ++ spl_t __s; \ ++ \ ++ if (!__COBALT_CLOCK_EXT_P(__clock_id)) \ ++ __val = -EINVAL; \ ++ else { \ ++ __nr = __COBALT_CLOCK_EXT_INDEX(__clock_id); \ ++ xnlock_get_irqsave(&nklock, __s); \ ++ if (!test_bit(__nr, cobalt_clock_extids)) { \ ++ xnlock_put_irqrestore(&nklock, __s); \ ++ __val = -EINVAL; \ ++ } else { \ ++ __clock = external_clocks[__nr]; \ ++ (__ret) = xnclock_ ## __handler(__clock, ##__args); \ ++ xnlock_put_irqrestore(&nklock, __s); \ ++ } \ ++ } \ ++ __val; \ ++}) ++ ++int __cobalt_clock_getres(clockid_t clock_id, struct timespec *ts) ++{ ++ xnticks_t ns; ++ int ret; ++ ++ switch (clock_id) { ++ case CLOCK_REALTIME: ++ case CLOCK_MONOTONIC: ++ case CLOCK_MONOTONIC_RAW: ++ ns2ts(ts, 1); ++ break; ++ default: ++ ret = do_ext_clock(clock_id, get_resolution, ns); ++ if (ret) ++ return ret; ++ ns2ts(ts, ns); ++ } ++ ++ trace_cobalt_clock_getres(clock_id, ts); ++ ++ return 0; ++} ++ ++COBALT_SYSCALL(clock_getres, current, ++ (clockid_t clock_id, struct timespec __user *u_ts)) ++{ ++ struct timespec ts; ++ int ret; ++ ++ ret = __cobalt_clock_getres(clock_id, &ts); ++ if (ret) ++ return ret; ++ ++ if (u_ts && cobalt_copy_to_user(u_ts, &ts, sizeof(ts))) ++ return -EFAULT; ++ ++ trace_cobalt_clock_getres(clock_id, &ts); ++ ++ return 0; ++} ++ ++int __cobalt_clock_gettime(clockid_t clock_id, struct timespec *ts) ++{ ++ xnticks_t ns; ++ int ret; ++ ++ switch (clock_id) { ++ case CLOCK_REALTIME: ++ ns2ts(ts, xnclock_read_realtime(&nkclock)); ++ break; ++ case CLOCK_MONOTONIC: ++ case CLOCK_MONOTONIC_RAW: ++ ns2ts(ts, xnclock_read_monotonic(&nkclock)); ++ break; ++ case CLOCK_HOST_REALTIME: ++ if (do_clock_host_realtime(ts) != 0) ++ return -EINVAL; ++ break; ++ default: ++ ret = do_ext_clock(clock_id, read_monotonic, ns); ++ if (ret) ++ return ret; ++ ns2ts(ts, ns); ++ } ++ ++ trace_cobalt_clock_gettime(clock_id, ts); ++ ++ return 0; ++} ++ ++COBALT_SYSCALL(clock_gettime, current, ++ (clockid_t clock_id, struct timespec __user *u_ts)) ++{ ++ struct timespec ts; ++ int ret; ++ ++ ret = __cobalt_clock_gettime(clock_id, &ts); ++ if (ret) ++ return ret; ++ ++ if (cobalt_copy_to_user(u_ts, &ts, sizeof(*u_ts))) ++ return -EFAULT; ++ ++ trace_cobalt_clock_gettime(clock_id, &ts); ++ ++ return 0; ++} ++ ++int __cobalt_clock_settime(clockid_t clock_id, const struct timespec *ts) ++{ ++ int _ret, ret = 0; ++ xnticks_t now; ++ spl_t s; ++ ++ if ((unsigned long)ts->tv_nsec >= ONE_BILLION) ++ return -EINVAL; ++ ++ switch (clock_id) { ++ case CLOCK_REALTIME: ++ xnlock_get_irqsave(&nklock, s); ++ now = xnclock_read_realtime(&nkclock); ++ xnclock_adjust(&nkclock, (xnsticks_t) (ts2ns(ts) - now)); ++ xnlock_put_irqrestore(&nklock, s); ++ break; ++ default: ++ _ret = do_ext_clock(clock_id, set_time, ret, ts); ++ if (_ret || ret) ++ return _ret ?: ret; ++ } ++ ++ trace_cobalt_clock_settime(clock_id, ts); ++ ++ return 0; ++} ++ ++int __cobalt_clock_adjtime(clockid_t clock_id, struct timex *tx) ++{ ++ int _ret, ret = 0; ++ ++ switch (clock_id) { ++ case CLOCK_REALTIME: ++ case CLOCK_MONOTONIC: ++ case CLOCK_MONOTONIC_RAW: ++ case CLOCK_HOST_REALTIME: ++ return -EOPNOTSUPP; ++ default: ++ _ret = do_ext_clock(clock_id, adjust_time, ret, tx); ++ if (_ret || ret) ++ return _ret ?: ret; ++ } ++ ++ trace_cobalt_clock_adjtime(clock_id, tx); ++ ++ return 0; ++} ++ ++COBALT_SYSCALL(clock_settime, current, ++ (clockid_t clock_id, const struct timespec __user *u_ts)) ++{ ++ struct timespec ts; ++ ++ if (cobalt_copy_from_user(&ts, u_ts, sizeof(ts))) ++ return -EFAULT; ++ ++ return __cobalt_clock_settime(clock_id, &ts); ++} ++ ++COBALT_SYSCALL(clock_adjtime, current, ++ (clockid_t clock_id, struct timex __user *u_tx)) ++{ ++ struct timex tx; ++ int ret; ++ ++ if (cobalt_copy_from_user(&tx, u_tx, sizeof(tx))) ++ return -EFAULT; ++ ++ ret = __cobalt_clock_adjtime(clock_id, &tx); ++ if (ret) ++ return ret; ++ ++ return cobalt_copy_to_user(u_tx, &tx, sizeof(tx)); ++} ++ ++int __cobalt_clock_nanosleep(clockid_t clock_id, int flags, ++ const struct timespec *rqt, ++ struct timespec *rmt) ++{ ++ struct restart_block *restart; ++ struct xnthread *cur; ++ xnsticks_t timeout, rem; ++ spl_t s; ++ ++ trace_cobalt_clock_nanosleep(clock_id, flags, rqt); ++ ++ if (clock_id != CLOCK_MONOTONIC && ++ clock_id != CLOCK_MONOTONIC_RAW && ++ clock_id != CLOCK_REALTIME) ++ return -EOPNOTSUPP; ++ ++ if (rqt->tv_sec < 0) ++ return -EINVAL; ++ ++ if ((unsigned long)rqt->tv_nsec >= ONE_BILLION) ++ return -EINVAL; ++ ++ if (flags & ~TIMER_ABSTIME) ++ return -EINVAL; ++ ++ cur = xnthread_current(); ++ ++ if (xnthread_test_localinfo(cur, XNSYSRST)) { ++ xnthread_clear_localinfo(cur, XNSYSRST); ++ ++ restart = cobalt_get_restart_block(current); ++ ++ if (restart->fn != cobalt_restart_syscall_placeholder) { ++ if (rmt) { ++ xnlock_get_irqsave(&nklock, s); ++ rem = xntimer_get_timeout_stopped(&cur->rtimer); ++ xnlock_put_irqrestore(&nklock, s); ++ ns2ts(rmt, rem > 1 ? rem : 0); ++ } ++ return -EINTR; ++ } ++ ++ timeout = restart->nanosleep.expires; ++ } else ++ timeout = ts2ns(rqt); ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ xnthread_suspend(cur, XNDELAY, timeout + 1, ++ clock_flag(flags, clock_id), NULL); ++ ++ if (xnthread_test_info(cur, XNBREAK)) { ++ if (signal_pending(current)) { ++ restart = cobalt_get_restart_block(current); ++ restart->nanosleep.expires = ++ (flags & TIMER_ABSTIME) ? timeout : ++ xntimer_get_timeout_stopped(&cur->rtimer); ++ xnlock_put_irqrestore(&nklock, s); ++ restart->fn = cobalt_restart_syscall_placeholder; ++ ++ xnthread_set_localinfo(cur, XNSYSRST); ++ ++ return -ERESTARTSYS; ++ } ++ ++ if (flags == 0 && rmt) { ++ rem = xntimer_get_timeout_stopped(&cur->rtimer); ++ xnlock_put_irqrestore(&nklock, s); ++ ns2ts(rmt, rem > 1 ? rem : 0); ++ } else ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return -EINTR; ++ } ++ ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return 0; ++} ++ ++COBALT_SYSCALL(clock_nanosleep, primary, ++ (clockid_t clock_id, int flags, ++ const struct timespec __user *u_rqt, ++ struct timespec __user *u_rmt)) ++{ ++ struct timespec rqt, rmt, *rmtp = NULL; ++ int ret; ++ ++ if (u_rmt) ++ rmtp = &rmt; ++ ++ if (cobalt_copy_from_user(&rqt, u_rqt, sizeof(rqt))) ++ return -EFAULT; ++ ++ ret = __cobalt_clock_nanosleep(clock_id, flags, &rqt, rmtp); ++ if (ret == -EINTR && flags == 0 && rmtp) { ++ if (cobalt_copy_to_user(u_rmt, rmtp, sizeof(*u_rmt))) ++ return -EFAULT; ++ } ++ ++ return ret; ++} ++ ++int cobalt_clock_register(struct xnclock *clock, const cpumask_t *affinity, ++ clockid_t *clk_id) ++{ ++ int ret, nr; ++ spl_t s; ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ nr = find_first_zero_bit(cobalt_clock_extids, COBALT_MAX_EXTCLOCKS); ++ if (nr >= COBALT_MAX_EXTCLOCKS) { ++ xnlock_put_irqrestore(&nklock, s); ++ return -EAGAIN; ++ } ++ ++ /* ++ * CAUTION: a bit raised in cobalt_clock_extids means that the ++ * corresponding entry in external_clocks[] is valid. The ++ * converse assumption is NOT true. ++ */ ++ __set_bit(nr, cobalt_clock_extids); ++ external_clocks[nr] = clock; ++ ++ xnlock_put_irqrestore(&nklock, s); ++ ++ ret = xnclock_register(clock, affinity); ++ if (ret) ++ return ret; ++ ++ clock->id = nr; ++ *clk_id = __COBALT_CLOCK_EXT(clock->id); ++ ++ trace_cobalt_clock_register(clock->name, *clk_id); ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(cobalt_clock_register); ++ ++void cobalt_clock_deregister(struct xnclock *clock) ++{ ++ trace_cobalt_clock_deregister(clock->name, clock->id); ++ clear_bit(clock->id, cobalt_clock_extids); ++ smp_mb__after_atomic(); ++ external_clocks[clock->id] = NULL; ++ xnclock_deregister(clock); ++} ++EXPORT_SYMBOL_GPL(cobalt_clock_deregister); ++ ++struct xnclock *cobalt_clock_find(clockid_t clock_id) ++{ ++ struct xnclock *clock = ERR_PTR(-EINVAL); ++ spl_t s; ++ int nr; ++ ++ if (clock_id == CLOCK_MONOTONIC || ++ clock_id == CLOCK_MONOTONIC_RAW || ++ clock_id == CLOCK_REALTIME) ++ return &nkclock; ++ ++ if (__COBALT_CLOCK_EXT_P(clock_id)) { ++ nr = __COBALT_CLOCK_EXT_INDEX(clock_id); ++ xnlock_get_irqsave(&nklock, s); ++ if (test_bit(nr, cobalt_clock_extids)) ++ clock = external_clocks[nr]; ++ xnlock_put_irqrestore(&nklock, s); ++ } ++ ++ return clock; ++} ++EXPORT_SYMBOL_GPL(cobalt_clock_find); +--- linux/kernel/xenomai/posix/internal.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/posix/internal.h 2021-04-07 16:01:26.008635908 +0800 +@@ -0,0 +1,60 @@ ++/* ++ * Written by Gilles Chanteperdrix . ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#ifndef _COBALT_POSIX_INTERNAL_H ++#define _COBALT_POSIX_INTERNAL_H ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "process.h" ++#include "extension.h" ++#include "syscall.h" ++#include "memory.h" ++ ++#define COBALT_MAXNAME 64 ++#define COBALT_PERMS_MASK (O_RDONLY | O_WRONLY | O_RDWR) ++ ++#define COBALT_MAGIC(n) (0x8686##n##n) ++#define COBALT_ANY_MAGIC COBALT_MAGIC(00) ++#define COBALT_THREAD_MAGIC COBALT_MAGIC(01) ++#define COBALT_MQ_MAGIC COBALT_MAGIC(0A) ++#define COBALT_MQD_MAGIC COBALT_MAGIC(0B) ++#define COBALT_EVENT_MAGIC COBALT_MAGIC(0F) ++#define COBALT_MONITOR_MAGIC COBALT_MAGIC(10) ++#define COBALT_TIMERFD_MAGIC COBALT_MAGIC(11) ++ ++#define cobalt_obj_active(h,m,t) \ ++ ((h) && ((t *)(h))->magic == (m)) ++ ++#define cobalt_mark_deleted(t) ((t)->magic = ~(t)->magic) ++ ++static inline xnhandle_t cobalt_get_handle_from_user(xnhandle_t *u_h) ++{ ++ xnhandle_t handle; ++ return __xn_get_user(handle, u_h) ? 0 : handle; ++} ++ ++int cobalt_init(void); ++ ++long cobalt_restart_syscall_placeholder(struct restart_block *param); ++ ++#endif /* !_COBALT_POSIX_INTERNAL_H */ +--- linux/kernel/xenomai/posix/syscall.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/posix/syscall.c 2021-04-07 16:01:26.003635915 +0800 +@@ -0,0 +1,801 @@ ++/* ++ * Copyright (C) 2005 Philippe Gerum ++ * Copyright (C) 2005 Gilles Chanteperdrix ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "internal.h" ++#include "thread.h" ++#include "sched.h" ++#include "mutex.h" ++#include "cond.h" ++#include "mqueue.h" ++#include "sem.h" ++#include "signal.h" ++#include "timer.h" ++#include "monitor.h" ++#include "clock.h" ++#include "event.h" ++#include "timerfd.h" ++#include "io.h" ++#include "corectl.h" ++#include "../debug.h" ++#include ++ ++/* Syscall must run into the Linux domain. */ ++#define __xn_exec_lostage 0x1 ++/* Syscall must run into the Xenomai domain. */ ++#define __xn_exec_histage 0x2 ++/* Shadow syscall: caller must be mapped. */ ++#define __xn_exec_shadow 0x4 ++/* Switch back toggle; caller must return to its original mode. */ ++#define __xn_exec_switchback 0x8 ++/* Exec in current domain. */ ++#define __xn_exec_current 0x10 ++/* Exec in conforming domain, Xenomai or Linux. */ ++#define __xn_exec_conforming 0x20 ++/* Attempt syscall restart in the opposite domain upon -ENOSYS. */ ++#define __xn_exec_adaptive 0x40 ++/* Do not restart syscall upon signal receipt. */ ++#define __xn_exec_norestart 0x80 ++/* Shorthand for shadow init syscall. */ ++#define __xn_exec_init __xn_exec_lostage ++/* Shorthand for shadow syscall in Xenomai space. */ ++#define __xn_exec_primary (__xn_exec_shadow|__xn_exec_histage) ++/* Shorthand for shadow syscall in Linux space. */ ++#define __xn_exec_secondary (__xn_exec_shadow|__xn_exec_lostage) ++/* Shorthand for syscall in Linux space with switchback if shadow. */ ++#define __xn_exec_downup (__xn_exec_lostage|__xn_exec_switchback) ++/* Shorthand for non-restartable primary syscall. */ ++#define __xn_exec_nonrestartable (__xn_exec_primary|__xn_exec_norestart) ++/* Domain probing syscall starting in conforming mode. */ ++#define __xn_exec_probing (__xn_exec_conforming|__xn_exec_adaptive) ++/* Hand over mode selection to syscall. */ ++#define __xn_exec_handover (__xn_exec_current|__xn_exec_adaptive) ++ ++typedef long (*cobalt_syshand)(unsigned long arg1, unsigned long arg2, ++ unsigned long arg3, unsigned long arg4, ++ unsigned long arg5); ++ ++static void prepare_for_signal(struct task_struct *p, ++ struct xnthread *thread, ++ struct pt_regs *regs, ++ int sysflags) ++{ ++ int notify = 0; ++ spl_t s; ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ if (xnthread_test_info(thread, XNKICKED)) { ++ if (signal_pending(p)) { ++ __xn_error_return(regs, ++ (sysflags & __xn_exec_norestart) ? ++ -EINTR : -ERESTARTSYS); ++ notify = !xnthread_test_state(thread, XNSSTEP); ++ xnthread_clear_info(thread, XNBREAK); ++ } ++ xnthread_clear_info(thread, XNKICKED); ++ } ++ ++ xnlock_put_irqrestore(&nklock, s); ++ ++ xnthread_test_cancel(); ++ ++ xnthread_relax(notify, SIGDEBUG_MIGRATE_SIGNAL); ++} ++ ++static COBALT_SYSCALL(migrate, current, (int domain)) ++{ ++ struct xnthread *thread = xnthread_current(); ++ ++ if (ipipe_root_p) { ++ if (domain == COBALT_PRIMARY) { ++ if (thread == NULL) ++ return -EPERM; ++ /* ++ * Paranoid: a corner case where userland ++ * fiddles with SIGSHADOW while the target ++ * thread is still waiting to be started. ++ */ ++ if (xnthread_test_state(thread, XNDORMANT)) ++ return 0; ++ ++ return xnthread_harden() ? : 1; ++ } ++ return 0; ++ } ++ ++ /* ipipe_current_domain != ipipe_root_domain */ ++ if (domain == COBALT_SECONDARY) { ++ xnthread_relax(0, 0); ++ return 1; ++ } ++ ++ return 0; ++} ++ ++static COBALT_SYSCALL(trace, current, ++ (int op, unsigned long a1, ++ unsigned long a2, unsigned long a3)) ++{ ++ int ret = -EINVAL; ++ ++ switch (op) { ++ case __xntrace_op_max_begin: ++ ret = xntrace_max_begin(a1); ++ break; ++ ++ case __xntrace_op_max_end: ++ ret = xntrace_max_end(a1); ++ break; ++ ++ case __xntrace_op_max_reset: ++ ret = xntrace_max_reset(); ++ break; ++ ++ case __xntrace_op_user_start: ++ ret = xntrace_user_start(); ++ break; ++ ++ case __xntrace_op_user_stop: ++ ret = xntrace_user_stop(a1); ++ break; ++ ++ case __xntrace_op_user_freeze: ++ ret = xntrace_user_freeze(a1, a2); ++ break; ++ ++ case __xntrace_op_special: ++ ret = xntrace_special(a1 & 0xFF, a2); ++ break; ++ ++ case __xntrace_op_special_u64: ++ ret = xntrace_special_u64(a1 & 0xFF, ++ (((u64) a2) << 32) | a3); ++ break; ++ } ++ return ret; ++} ++ ++static COBALT_SYSCALL(ftrace_puts, current, ++ (const char __user *str)) ++{ ++ char buf[256]; ++ unsigned len; ++ ++ len = cobalt_strncpy_from_user(buf, str, sizeof(buf)); ++ if (len < 0) ++ return -EFAULT; ++ ++#ifdef CONFIG_TRACING ++ __trace_puts(_THIS_IP_, buf, len); ++#endif ++ ++ return 0; ++} ++ ++static COBALT_SYSCALL(archcall, current, ++ (unsigned long a1, unsigned long a2, ++ unsigned long a3, unsigned long a4, ++ unsigned long a5)) ++{ ++ return xnarch_local_syscall(a1, a2, a3, a4, a5); ++} ++ ++static COBALT_SYSCALL(get_current, current, ++ (xnhandle_t __user *u_handle)) ++{ ++ struct xnthread *cur = xnthread_current(); ++ ++ if (cur == NULL) ++ return -EPERM; ++ ++ return cobalt_copy_to_user(u_handle, &cur->handle, ++ sizeof(*u_handle)); ++} ++ ++static COBALT_SYSCALL(backtrace, lostage, ++ (int nr, unsigned long __user *u_backtrace, int reason)) ++{ ++ unsigned long backtrace[SIGSHADOW_BACKTRACE_DEPTH]; ++ int ret; ++ ++ /* ++ * In case backtrace() in userland is broken or fails. We may ++ * want to know about this in kernel space however, for future ++ * use. ++ */ ++ if (nr <= 0) ++ return 0; ++ /* ++ * We may omit the older frames if we can't store the full ++ * backtrace. ++ */ ++ if (nr > SIGSHADOW_BACKTRACE_DEPTH) ++ nr = SIGSHADOW_BACKTRACE_DEPTH; ++ /* ++ * Fetch the backtrace array, filled with PC values as seen ++ * from the relaxing thread in user-space. This can't fail ++ */ ++ ret = cobalt_copy_from_user(backtrace, u_backtrace, nr * sizeof(long)); ++ if (ret) ++ return ret; ++ ++ xndebug_trace_relax(nr, backtrace, reason); ++ ++ return 0; ++} ++ ++static COBALT_SYSCALL(serialdbg, current, ++ (const char __user *u_msg, int len)) ++{ ++ char buf[128]; ++ int n; ++ ++ while (len > 0) { ++ n = len; ++ if (n > sizeof(buf)) ++ n = sizeof(buf); ++ if (cobalt_copy_from_user(buf, u_msg, n)) ++ return -EFAULT; ++ __ipipe_serial_debug("%.*s", n, buf); ++ u_msg += n; ++ len -= n; ++ } ++ ++ return 0; ++} ++ ++static void stringify_feature_set(unsigned long fset, char *buf, int size) ++{ ++ unsigned long feature; ++ int nc, nfeat; ++ ++ *buf = '\0'; ++ ++ for (feature = 1, nc = nfeat = 0; fset != 0 && size > 0; feature <<= 1) { ++ if (fset & feature) { ++ nc = ksformat(buf, size, "%s%s", ++ nfeat > 0 ? " " : "", ++ get_feature_label(feature)); ++ nfeat++; ++ size -= nc; ++ buf += nc; ++ fset &= ~feature; ++ } ++ } ++} ++ ++static COBALT_SYSCALL(bind, lostage, ++ (struct cobalt_bindreq __user *u_breq)) ++{ ++ unsigned long featreq, featmis; ++ struct cobalt_bindreq breq; ++ struct cobalt_featinfo *f; ++ int abirev; ++ ++ if (cobalt_copy_from_user(&breq, u_breq, sizeof(breq))) ++ return -EFAULT; ++ ++ f = &breq.feat_ret; ++ featreq = breq.feat_req; ++ if (!realtime_core_running() && (featreq & __xn_feat_control) == 0) ++ return -EAGAIN; ++ ++ /* ++ * Calculate the missing feature set: ++ * kernel_unavailable_set & user_mandatory_set. ++ */ ++ featmis = (~XENOMAI_FEAT_DEP & (featreq & XENOMAI_FEAT_MAN)); ++ abirev = breq.abi_rev; ++ ++ /* ++ * Pass back the supported feature set and the ABI revision ++ * level to user-space. ++ */ ++ f->feat_all = XENOMAI_FEAT_DEP; ++ stringify_feature_set(XENOMAI_FEAT_DEP, f->feat_all_s, ++ sizeof(f->feat_all_s)); ++ f->feat_man = featreq & XENOMAI_FEAT_MAN; ++ stringify_feature_set(f->feat_man, f->feat_man_s, ++ sizeof(f->feat_man_s)); ++ f->feat_mis = featmis; ++ stringify_feature_set(featmis, f->feat_mis_s, ++ sizeof(f->feat_mis_s)); ++ f->feat_req = featreq; ++ stringify_feature_set(featreq, f->feat_req_s, ++ sizeof(f->feat_req_s)); ++ f->feat_abirev = XENOMAI_ABI_REV; ++ collect_arch_features(f); ++ ++ f->clock_freq = cobalt_pipeline.clock_freq; ++ f->vdso_offset = cobalt_umm_offset(&cobalt_ppd_get(1)->umm, nkvdso); ++ ++ if (cobalt_copy_to_user(u_breq, &breq, sizeof(breq))) ++ return -EFAULT; ++ ++ /* ++ * If some mandatory features the user-space code relies on ++ * are missing at kernel level, we cannot go further. ++ */ ++ if (featmis) ++ return -EINVAL; ++ ++ if (!check_abi_revision(abirev)) ++ return -ENOEXEC; ++ ++ return cobalt_bind_core(featreq); ++} ++ ++static COBALT_SYSCALL(extend, lostage, (unsigned int magic)) ++{ ++ return cobalt_bind_personality(magic); ++} ++ ++static int CoBaLt_ni(void) ++{ ++ return -ENOSYS; ++} ++ ++/* ++ * We have a single syscall table for all ABI models, i.e. 64bit ++ * native + 32bit) or plain 32bit. In the former case, we may want to ++ * support several models with a single build (e.g. ia32 and x32 for ++ * x86_64). ++ * ++ * The syscall table is set up in a single step, based on three ++ * subsequent sources of initializers: ++ * ++ * - first, all syscall entries are defaulted to a placeholder ++ * returning -ENOSYS, as the table may be sparse. ++ * ++ * - then __COBALT_CALL_ENTRY() produces a native call entry ++ * (e.g. pure 64bit call handler for a 64bit architecture), optionally ++ * followed by a set of 32bit syscall entries offset by an ++ * arch-specific base index, which default to the native calls. These ++ * nitty-gritty details are defined by ++ * . 32bit architectures - or 64bit ones for ++ * which we don't support any 32bit ABI model - will simply define ++ * __COBALT_CALL32_ENTRY() as an empty macro. ++ * ++ * - finally, 32bit thunk entries are generated per-architecture, by ++ * including , overriding the default ++ * handlers installed during the previous step. ++ * ++ * For instance, with CONFIG_X86_X32 support enabled in an x86_64 ++ * kernel, sc_cobalt_mq_timedreceive would appear twice in the table, ++ * as: ++ * ++ * [sc_cobalt_mq_timedreceive] = cobalt_mq_timedreceive, ++ * ... ++ * [sc_cobalt_mq_timedreceive + __COBALT_X32_BASE] = cobalt32x_mq_timedreceive, ++ * ++ * cobalt32x_mq_timedreceive() would do the required thunking for ++ * dealing with the 32<->64bit conversion of arguments. On the other ++ * hand, sc_cobalt_sched_yield - which do not require any thunk - ++ * would also appear twice, but both entries would point at the native ++ * syscall implementation: ++ * ++ * [sc_cobalt_sched_yield] = cobalt_sched_yield, ++ * ... ++ * [sc_cobalt_sched_yield + __COBALT_X32_BASE] = cobalt_sched_yield, ++ * ++ * Accordingly, applications targeting the x32 model (-mx32) issue ++ * syscalls in the range [__COBALT_X32_BASE..__COBALT_X32_BASE + ++ * __NR_COBALT_SYSCALLS-1], whilst native (32/64bit) ones issue ++ * syscalls in the range [0..__NR_COBALT_SYSCALLS-1]. ++ * ++ * In short, this is an incremental process where the arch-specific ++ * code can override the 32bit syscall entries, pointing at the thunk ++ * routines it may need for handing 32bit calls over their respective ++ * 64bit implementation. ++ * ++ * By convention, there is NO pure 32bit syscall, which means that ++ * each 32bit syscall defined by a compat ABI interface MUST match a ++ * native (64bit) syscall. This is important as we share the call ++ * modes (i.e. __xn_exec_ bits) between all ABI models. ++ * ++ * --rpm ++ */ ++#define __syshand__(__name) ((cobalt_syshand)(CoBaLt_ ## __name)) ++ ++#define __COBALT_NI __syshand__(ni) ++ ++#define __COBALT_CALL_NI \ ++ [0 ... __NR_COBALT_SYSCALLS-1] = __COBALT_NI, \ ++ __COBALT_CALL32_INITHAND(__COBALT_NI) ++ ++#define __COBALT_CALL_NFLAGS \ ++ [0 ... __NR_COBALT_SYSCALLS-1] = 0, \ ++ __COBALT_CALL32_INITMODE(0) ++ ++#define __COBALT_CALL_ENTRY(__name) \ ++ [sc_cobalt_ ## __name] = __syshand__(__name), \ ++ __COBALT_CALL32_ENTRY(__name, __syshand__(__name)) ++ ++#define __COBALT_MODE(__name, __mode) \ ++ [sc_cobalt_ ## __name] = __xn_exec_##__mode, ++ ++#ifdef CONFIG_XENO_ARCH_SYS3264 ++#include "syscall32.h" ++#endif ++ ++#include "syscall_entries.h" ++ ++static const cobalt_syshand cobalt_syscalls[] = { ++ __COBALT_CALL_NI ++ __COBALT_CALL_ENTRIES ++#ifdef CONFIG_XENO_ARCH_SYS3264 ++#include ++#endif ++}; ++ ++static const int cobalt_sysmodes[] = { ++ __COBALT_CALL_NFLAGS ++ __COBALT_CALL_MODES ++}; ++ ++static inline int allowed_syscall(struct cobalt_process *process, ++ struct xnthread *thread, ++ int sysflags, int nr) ++{ ++ if (nr == sc_cobalt_bind) ++ return 1; ++ ++ if (process == NULL) ++ return 0; ++ ++ if (thread == NULL && (sysflags & __xn_exec_shadow)) ++ return 0; ++ ++ return cap_raised(current_cap(), CAP_SYS_NICE); ++} ++ ++static int handle_head_syscall(struct ipipe_domain *ipd, struct pt_regs *regs) ++{ ++ struct cobalt_process *process; ++ int switched, sigs, sysflags; ++ struct xnthread *thread; ++ cobalt_syshand handler; ++ struct task_struct *p; ++ unsigned int nr, code; ++ long ret; ++ ++ if (!__xn_syscall_p(regs)) ++ goto linux_syscall; ++ ++ thread = xnthread_current(); ++ code = __xn_syscall(regs); ++ if (code >= ARRAY_SIZE(cobalt_syscalls)) ++ goto bad_syscall; ++ ++ nr = code & (__NR_COBALT_SYSCALLS - 1); ++ ++ trace_cobalt_head_sysentry(code); ++ ++ process = cobalt_current_process(); ++ if (process == NULL) { ++ process = cobalt_search_process(current->mm); ++ cobalt_set_process(process); ++ } ++ ++ handler = cobalt_syscalls[code]; ++ sysflags = cobalt_sysmodes[nr]; ++ ++ /* ++ * Executing Cobalt services requires CAP_SYS_NICE, except for ++ * sc_cobalt_bind which does its own checks. ++ */ ++ if (unlikely(!allowed_syscall(process, thread, sysflags, nr))) { ++ /* ++ * Exclude get_current from reporting, it is used to probe the ++ * execution context. ++ */ ++ if (XENO_DEBUG(COBALT) && nr != sc_cobalt_get_current) ++ printk(XENO_WARNING ++ "syscall <%d> denied to %s[%d]\n", ++ nr, current->comm, task_pid_nr(current)); ++ __xn_error_return(regs, -EPERM); ++ goto ret_handled; ++ } ++ ++ if (sysflags & __xn_exec_conforming) ++ /* ++ * If the conforming exec bit is set, turn the exec ++ * bitmask for the syscall into the most appropriate ++ * setup for the caller, i.e. Xenomai domain for ++ * shadow threads, Linux otherwise. ++ */ ++ sysflags |= (thread ? __xn_exec_histage : __xn_exec_lostage); ++ ++ /* ++ * Here we have to dispatch the syscall execution properly, ++ * depending on: ++ * ++ * o Whether the syscall must be run into the Linux or Xenomai ++ * domain, or indifferently in the current Xenomai domain. ++ * ++ * o Whether the caller currently runs in the Linux or Xenomai ++ * domain. ++ */ ++restart: ++ /* ++ * Process adaptive syscalls by restarting them in the ++ * opposite domain upon receiving -ENOSYS from the syscall ++ * handler. ++ */ ++ switched = 0; ++ if (sysflags & __xn_exec_lostage) { ++ /* ++ * The syscall must run from the Linux domain. ++ */ ++ if (ipd == &xnsched_realtime_domain) { ++ /* ++ * Request originates from the Xenomai domain: ++ * relax the caller then invoke the syscall ++ * handler right after. ++ */ ++ xnthread_relax(1, SIGDEBUG_MIGRATE_SYSCALL); ++ switched = 1; ++ } else ++ /* ++ * Request originates from the Linux domain: ++ * propagate the event to our Linux-based ++ * handler, so that the syscall is executed ++ * from there. ++ */ ++ return KEVENT_PROPAGATE; ++ } else if (sysflags & (__xn_exec_histage | __xn_exec_current)) { ++ /* ++ * Syscall must run either from the Xenomai domain, or ++ * from the calling domain. ++ * ++ * If the request originates from the Linux domain, ++ * hand it over to our secondary-mode dispatcher. ++ * Otherwise, invoke the syscall handler immediately. ++ */ ++ if (ipd != &xnsched_realtime_domain) ++ return KEVENT_PROPAGATE; ++ } ++ ++ /* ++ * 'thread' has to be valid from that point: all syscalls ++ * regular threads may call have been pipelined to the root ++ * handler (lostage ones), or rejected by allowed_syscall(). ++ */ ++ ++ ret = handler(__xn_reg_arglist(regs)); ++ if (ret == -ENOSYS && (sysflags & __xn_exec_adaptive)) { ++ if (switched) { ++ ret = xnthread_harden(); ++ if (ret) { ++ switched = 0; ++ goto done; ++ } ++ } else /* Mark the primary -> secondary transition. */ ++ xnthread_set_localinfo(thread, XNDESCENT); ++ sysflags ^= ++ (__xn_exec_lostage | __xn_exec_histage | ++ __xn_exec_adaptive); ++ goto restart; ++ } ++done: ++ __xn_status_return(regs, ret); ++ sigs = 0; ++ if (!xnsched_root_p()) { ++ p = current; ++ if (signal_pending(p) || ++ xnthread_test_info(thread, XNKICKED)) { ++ sigs = 1; ++ prepare_for_signal(p, thread, regs, sysflags); ++ } else if (xnthread_test_state(thread, XNWEAK) && ++ thread->res_count == 0) { ++ if (switched) ++ switched = 0; ++ else ++ xnthread_relax(0, 0); ++ } ++ } ++ if (!sigs && (sysflags & __xn_exec_switchback) && switched) ++ /* -EPERM will be trapped later if needed. */ ++ xnthread_harden(); ++ ++ret_handled: ++ /* Update the stats and userland-visible state. */ ++ if (thread) { ++ xnthread_clear_localinfo(thread, XNDESCENT); ++ xnstat_counter_inc(&thread->stat.xsc); ++ xnthread_sync_window(thread); ++ } ++ ++ trace_cobalt_head_sysexit(__xn_reg_rval(regs)); ++ ++ return KEVENT_STOP; ++ ++linux_syscall: ++ if (xnsched_root_p()) ++ /* ++ * The call originates from the Linux domain, either ++ * from a relaxed shadow or from a regular Linux task; ++ * just propagate the event so that we will fall back ++ * to handle_root_syscall(). ++ */ ++ return KEVENT_PROPAGATE; ++ ++ if (!__xn_rootcall_p(regs, &code)) ++ goto bad_syscall; ++ ++ /* ++ * We know this is a Cobalt thread since it runs over the head ++ * domain, however the current syscall should be handled by ++ * the host kernel instead. Before this happens, we have to ++ * re-enter the root domain. ++ */ ++ xnthread_relax(1, SIGDEBUG_MIGRATE_SYSCALL); ++ ++ return KEVENT_PROPAGATE; ++ ++bad_syscall: ++ printk(XENO_WARNING "bad syscall <%#x>\n", code); ++ ++ __xn_error_return(regs, -ENOSYS); ++ ++ return KEVENT_STOP; ++} ++ ++static int handle_root_syscall(struct ipipe_domain *ipd, struct pt_regs *regs) ++{ ++ int sysflags, switched, sigs; ++ struct xnthread *thread; ++ cobalt_syshand handler; ++ struct task_struct *p; ++ unsigned int nr, code; ++ long ret; ++ ++ /* ++ * Catch cancellation requests pending for user shadows ++ * running mostly in secondary mode, i.e. XNWEAK. In that ++ * case, we won't run prepare_for_signal() that frequently, so ++ * check for cancellation here. ++ */ ++ xnthread_test_cancel(); ++ ++ if (!__xn_syscall_p(regs)) ++ /* Fall back to Linux syscall handling. */ ++ return KEVENT_PROPAGATE; ++ ++ thread = xnthread_current(); ++ /* code has already been checked in the head domain handler. */ ++ code = __xn_syscall(regs); ++ nr = code & (__NR_COBALT_SYSCALLS - 1); ++ ++ trace_cobalt_root_sysentry(code); ++ ++ /* Processing a Xenomai syscall. */ ++ ++ handler = cobalt_syscalls[code]; ++ sysflags = cobalt_sysmodes[nr]; ++ ++ if (thread && (sysflags & __xn_exec_conforming)) ++ sysflags |= __xn_exec_histage; ++restart: ++ /* ++ * Process adaptive syscalls by restarting them in the ++ * opposite domain upon receiving -ENOSYS from the syscall ++ * handler. ++ */ ++ switched = 0; ++ if (sysflags & __xn_exec_histage) { ++ /* ++ * This request originates from the Linux domain but ++ * should run into the Xenomai domain: harden the ++ * caller before invoking the syscall handler. ++ */ ++ ret = xnthread_harden(); ++ if (ret) { ++ __xn_error_return(regs, ret); ++ goto ret_handled; ++ } ++ switched = 1; ++ } else { ++ /* ++ * We want to run the syscall in the current Linux ++ * domain. This is a slow path, so proceed with any ++ * pending schedparam update on the fly. ++ */ ++ if (thread) ++ xnthread_propagate_schedparam(thread); ++ } ++ ++ ret = handler(__xn_reg_arglist(regs)); ++ if (ret == -ENOSYS && (sysflags & __xn_exec_adaptive)) { ++ sysflags ^= __xn_exec_histage; ++ if (switched) { ++ xnthread_relax(1, SIGDEBUG_MIGRATE_SYSCALL); ++ sysflags &= ~__xn_exec_adaptive; ++ /* Mark the primary -> secondary transition. */ ++ xnthread_set_localinfo(thread, XNDESCENT); ++ } ++ goto restart; ++ } ++ ++ __xn_status_return(regs, ret); ++ ++ sigs = 0; ++ if (!xnsched_root_p()) { ++ /* ++ * We may have gained a shadow TCB from the syscall we ++ * just invoked, so make sure to fetch it. ++ */ ++ thread = xnthread_current(); ++ p = current; ++ if (signal_pending(p)) { ++ sigs = 1; ++ prepare_for_signal(p, thread, regs, sysflags); ++ } else if (xnthread_test_state(thread, XNWEAK) && ++ thread->res_count == 0) ++ sysflags |= __xn_exec_switchback; ++ } ++ if (!sigs && (sysflags & __xn_exec_switchback) ++ && (switched || xnsched_primary_p())) ++ xnthread_relax(0, 0); ++ ++ret_handled: ++ /* Update the stats and userland-visible state. */ ++ if (thread) { ++ xnthread_clear_localinfo(thread, XNDESCENT|XNHICCUP); ++ xnstat_counter_inc(&thread->stat.xsc); ++ xnthread_sync_window(thread); ++ } ++ ++ trace_cobalt_root_sysexit(__xn_reg_rval(regs)); ++ ++ return KEVENT_STOP; ++} ++ ++int ipipe_syscall_hook(struct ipipe_domain *ipd, struct pt_regs *regs) ++{ ++ if (unlikely(ipipe_root_p)) ++ return handle_root_syscall(ipd, regs); ++ ++ return handle_head_syscall(ipd, regs); ++} ++ ++int ipipe_fastcall_hook(struct pt_regs *regs) ++{ ++ int ret; ++ ++ ret = handle_head_syscall(&xnsched_realtime_domain, regs); ++ XENO_BUG_ON(COBALT, ret == KEVENT_PROPAGATE); ++ ++ return ret; ++} ++ ++long cobalt_restart_syscall_placeholder(struct restart_block *param) ++{ ++ return -EINVAL; ++} +--- linux/kernel/xenomai/posix/sem.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/posix/sem.h 2021-04-07 16:01:25.999635921 +0800 +@@ -0,0 +1,128 @@ ++/* ++ * Written by Gilles Chanteperdrix . ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#ifndef _COBALT_POSIX_SEM_H ++#define _COBALT_POSIX_SEM_H ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++struct cobalt_process; ++struct filename; ++ ++struct cobalt_sem { ++ unsigned int magic; ++ struct xnsynch synchbase; ++ struct cobalt_sem_state *state; ++ int flags; ++ unsigned int refs; ++ struct filename *pathname; ++ struct cobalt_resnode resnode; ++}; ++ ++/* Copied from Linuxthreads semaphore.h. */ ++struct _sem_fastlock ++{ ++ long int __status; ++ int __spinlock; ++}; ++ ++typedef struct ++{ ++ struct _sem_fastlock __sem_lock; ++ int __sem_value; ++ long __sem_waiting; ++} sem_t; ++ ++#include ++ ++#define SEM_VALUE_MAX (INT_MAX) ++#define SEM_FAILED NULL ++#define SEM_NAMED 0x80000000 ++ ++struct cobalt_sem_shadow __user * ++__cobalt_sem_open(struct cobalt_sem_shadow __user *usm, ++ const char __user *u_name, ++ int oflags, mode_t mode, unsigned int value); ++ ++int __cobalt_sem_timedwait(struct cobalt_sem_shadow __user *u_sem, ++ const void __user *u_ts, ++ int (*fetch_timeout)(struct timespec *ts, ++ const void __user *u_ts)); ++ ++int __cobalt_sem_destroy(xnhandle_t handle); ++ ++void cobalt_nsem_reclaim(struct cobalt_process *process); ++ ++struct cobalt_sem * ++__cobalt_sem_init(const char *name, struct cobalt_sem_shadow *sem, ++ int flags, unsigned value); ++ ++void __cobalt_sem_shadow_init(struct cobalt_sem *sem, __u32 magic, ++ struct cobalt_sem_shadow *sm); ++ ++COBALT_SYSCALL_DECL(sem_init, ++ (struct cobalt_sem_shadow __user *u_sem, ++ int flags, unsigned value)); ++ ++COBALT_SYSCALL_DECL(sem_post, ++ (struct cobalt_sem_shadow __user *u_sem)); ++ ++COBALT_SYSCALL_DECL(sem_wait, ++ (struct cobalt_sem_shadow __user *u_sem)); ++ ++COBALT_SYSCALL_DECL(sem_timedwait, ++ (struct cobalt_sem_shadow __user *u_sem, ++ struct timespec __user *u_ts)); ++ ++COBALT_SYSCALL_DECL(sem_trywait, ++ (struct cobalt_sem_shadow __user *u_sem)); ++ ++COBALT_SYSCALL_DECL(sem_getvalue, ++ (struct cobalt_sem_shadow __user *u_sem, ++ int __user *u_sval)); ++ ++COBALT_SYSCALL_DECL(sem_destroy, ++ (struct cobalt_sem_shadow __user *u_sem)); ++ ++COBALT_SYSCALL_DECL(sem_open, ++ (struct cobalt_sem_shadow __user *__user *u_addrp, ++ const char __user *u_name, ++ int oflags, mode_t mode, unsigned int value)); ++ ++COBALT_SYSCALL_DECL(sem_close, ++ (struct cobalt_sem_shadow __user *usm)); ++ ++COBALT_SYSCALL_DECL(sem_unlink, (const char __user *u_name)); ++ ++COBALT_SYSCALL_DECL(sem_broadcast_np, ++ (struct cobalt_sem_shadow __user *u_sem)); ++ ++COBALT_SYSCALL_DECL(sem_inquire, ++ (struct cobalt_sem_shadow __user *u_sem, ++ struct cobalt_sem_info __user *u_info, ++ pid_t __user *u_waitlist, ++ size_t waitsz)); ++ ++void cobalt_sem_reclaim(struct cobalt_resnode *node, ++ spl_t s); ++ ++#endif /* !_COBALT_POSIX_SEM_H */ +--- linux/kernel/xenomai/posix/extension.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/posix/extension.h 2021-04-07 16:01:25.994635928 +0800 +@@ -0,0 +1,135 @@ ++/* ++ * Copyright (C) 2013 Philippe Gerum . ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#ifndef _COBALT_POSIX_EXTENSION_H ++#define _COBALT_POSIX_EXTENSION_H ++ ++#include ++#include ++ ++#ifdef CONFIG_XENO_OPT_COBALT_EXTENSION ++ ++#include ++ ++struct cobalt_timer; ++struct cobalt_sigpending; ++struct cobalt_extref; ++struct siginfo; ++struct xnsched_class; ++union xnsched_policy_param; ++ ++struct cobalt_extension { ++ struct xnthread_personality core; ++ struct { ++ struct cobalt_thread * ++ (*timer_init)(struct cobalt_extref *reftimer, /* nklocked, IRQs off. */ ++ const struct sigevent *__restrict__ evp); ++ int (*timer_settime)(struct cobalt_extref *reftimer, /* nklocked, IRQs off. */ ++ const struct itimerspec *__restrict__ value, ++ int flags); ++ int (*timer_gettime)(struct cobalt_extref *reftimer, /* nklocked, IRQs off. */ ++ struct itimerspec *__restrict__ value); ++ int (*timer_delete)(struct cobalt_extref *reftimer); /* nklocked, IRQs off. */ ++ int (*timer_cleanup)(struct cobalt_extref *reftimer); /* nklocked, IRQs off. */ ++ int (*signal_deliver)(struct cobalt_extref *refthread, ++ struct siginfo *si, ++ struct cobalt_sigpending *sigp); ++ int (*signal_queue)(struct cobalt_extref *refthread, ++ struct cobalt_sigpending *sigp); ++ int (*signal_copyinfo)(struct cobalt_extref *refthread, ++ void __user *u_si, ++ const struct siginfo *si, ++ int overrun); ++ int (*signal_copyinfo_compat)(struct cobalt_extref *refthread, ++ void __user *u_si, ++ const struct siginfo *si, ++ int overrun); ++ int (*sched_yield)(struct cobalt_extref *curref); ++ int (*thread_setsched)(struct cobalt_extref *refthread, /* nklocked, IRQs off. */ ++ struct xnsched_class *sched_class, ++ union xnsched_policy_param *param); ++ } ops; ++}; ++ ++struct cobalt_extref { ++ struct cobalt_extension *extension; ++ struct list_head next; ++ void *private; ++}; ++ ++static inline void cobalt_set_extref(struct cobalt_extref *ref, ++ struct cobalt_extension *ext, ++ void *priv) ++{ ++ ref->extension = ext; ++ ref->private = priv; ++} ++ ++/** ++ * All macros return non-zero if some thread-level extension code was ++ * called, leaving the output value into __ret. Otherwise, the __ret ++ * value is undefined. ++ */ ++#define cobalt_initcall_extension(__extfn, __extref, __owner, __ret, __args...) \ ++ ({ \ ++ int __val = 0; \ ++ if ((__owner) && (__owner)->extref.extension) { \ ++ (__extref)->extension = (__owner)->extref.extension; \ ++ if ((__extref)->extension->ops.__extfn) { \ ++ (__ret) = (__extref)->extension->ops. \ ++ __extfn(__extref, ##__args ); \ ++ __val = 1; \ ++ } \ ++ } else \ ++ (__extref)->extension = NULL; \ ++ __val; \ ++ }) ++ ++#define cobalt_call_extension(__extfn, __extref, __ret, __args...) \ ++ ({ \ ++ int __val = 0; \ ++ if ((__extref)->extension && \ ++ (__extref)->extension->ops.__extfn) { \ ++ (__ret) = (__extref)->extension->ops. \ ++ __extfn(__extref, ##__args ); \ ++ __val = 1; \ ++ } \ ++ __val; \ ++ }) ++ ++#else /* !CONFIG_XENO_OPT_COBALT_EXTENSION */ ++ ++struct cobalt_extension; ++ ++struct cobalt_extref { ++}; ++ ++static inline void cobalt_set_extref(struct cobalt_extref *ref, ++ struct cobalt_extension *ext, ++ void *priv) ++{ ++} ++ ++#define cobalt_initcall_extension(__extfn, __extref, __owner, __ret, __args...) \ ++ ({ (void)(__owner); (void)(__ret); 0; }) ++ ++#define cobalt_call_extension(__extfn, __extref, __ret, __args...) \ ++ ({ (void)(__ret); 0; }) ++ ++#endif /* !CONFIG_XENO_OPT_COBALT_EXTENSION */ ++ ++#endif /* !_COBALT_POSIX_EXTENSION_H */ +--- linux/kernel/xenomai/posix/thread.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/posix/thread.h 2021-04-07 16:01:25.989635935 +0800 +@@ -0,0 +1,228 @@ ++/* ++ * Written by Gilles Chanteperdrix . ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#ifndef _COBALT_POSIX_THREAD_H ++#define _COBALT_POSIX_THREAD_H ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++/* CAUTION: rtdm/cobalt.h reads this header. */ ++#include ++#include ++ ++#define PTHREAD_PROCESS_PRIVATE 0 ++#define PTHREAD_PROCESS_SHARED 1 ++ ++#define PTHREAD_CREATE_JOINABLE 0 ++#define PTHREAD_CREATE_DETACHED 1 ++ ++#define PTHREAD_INHERIT_SCHED 0 ++#define PTHREAD_EXPLICIT_SCHED 1 ++ ++#define PTHREAD_MUTEX_NORMAL 0 ++#define PTHREAD_MUTEX_RECURSIVE 1 ++#define PTHREAD_MUTEX_ERRORCHECK 2 ++#define PTHREAD_MUTEX_DEFAULT 0 ++ ++struct cobalt_thread; ++struct cobalt_threadstat; ++ ++/* ++ * pthread_mutexattr_t and pthread_condattr_t fit on 32 bits, for ++ * compatibility with libc. ++ */ ++ ++/* The following definitions are copied from linuxthread pthreadtypes.h. */ ++struct _pthread_fastlock { ++ long int __status; ++ int __spinlock; ++}; ++ ++typedef struct { ++ struct _pthread_fastlock __c_lock; ++ long __c_waiting; ++ char __padding[48 - sizeof (struct _pthread_fastlock) ++ - sizeof (long) - sizeof (long long)]; ++ long long __align; ++} pthread_cond_t; ++ ++enum { ++ PTHREAD_PRIO_NONE, ++ PTHREAD_PRIO_INHERIT, ++ PTHREAD_PRIO_PROTECT ++}; ++ ++typedef struct { ++ int __m_reserved; ++ int __m_count; ++ long __m_owner; ++ int __m_kind; ++ struct _pthread_fastlock __m_lock; ++} pthread_mutex_t; ++ ++struct cobalt_local_hkey { ++ /** pthread_t from userland. */ ++ unsigned long u_pth; ++ /** kernel mm context. */ ++ struct mm_struct *mm; ++}; ++ ++struct cobalt_thread { ++ unsigned int magic; ++ struct xnthread threadbase; ++ struct cobalt_extref extref; ++ struct cobalt_process *process; ++ struct list_head next; /* in global/process thread_list */ ++ ++ /** Signal management. */ ++ sigset_t sigpending; ++ struct list_head sigqueues[_NSIG]; /* in cobalt_sigpending */ ++ struct xnsynch sigwait; ++ struct list_head signext; ++ ++ /** Monitor wait object and link holder. */ ++ struct xnsynch monitor_synch; ++ struct list_head monitor_link; ++ ++ struct cobalt_local_hkey hkey; ++}; ++ ++struct cobalt_sigwait_context { ++ struct xnthread_wait_context wc; ++ sigset_t *set; ++ struct siginfo *si; ++}; ++ ++static inline struct cobalt_thread *cobalt_current_thread(void) ++{ ++ struct xnthread *curr = xnthread_current(); ++ return curr ? container_of(curr, struct cobalt_thread, threadbase) : NULL; ++} ++ ++int __cobalt_thread_create(unsigned long pth, int policy, ++ struct sched_param_ex __user *u_param, ++ int xid, __u32 __user *u_winoff); ++ ++int __cobalt_thread_setschedparam_ex(struct cobalt_thread *thread, int policy, ++ const struct sched_param_ex *param_ex); ++ ++int cobalt_thread_setschedparam_ex(unsigned long pth, ++ int policy, ++ const struct sched_param_ex *param_ex, ++ __u32 __user *u_winoff, ++ int __user *u_promoted); ++ ++int cobalt_thread_getschedparam_ex(unsigned long pth, ++ int *policy_r, ++ struct sched_param_ex *param_ex); ++ ++int __cobalt_thread_getschedparam_ex(struct cobalt_thread *thread, ++ int *policy_r, ++ struct sched_param_ex *param_ex); ++ ++int cobalt_thread_setschedprio(unsigned long pth, ++ int prio, ++ __u32 __user *u_winoff, ++ int __user *u_promoted); ++ ++struct cobalt_thread *cobalt_thread_find(pid_t pid); ++ ++struct cobalt_thread *cobalt_thread_find_local(pid_t pid); ++ ++struct cobalt_thread *cobalt_thread_lookup(unsigned long pth); ++ ++COBALT_SYSCALL_DECL(thread_create, ++ (unsigned long pth, int policy, ++ struct sched_param_ex __user *u_param, ++ int xid, __u32 __user *u_winoff)); ++ ++struct cobalt_thread * ++cobalt_thread_shadow(struct cobalt_local_hkey *lhkey, ++ __u32 __user *u_winoff); ++ ++COBALT_SYSCALL_DECL(thread_setmode, ++ (int clrmask, int setmask, int __user *u_mode_r)); ++ ++COBALT_SYSCALL_DECL(thread_setname, ++ (unsigned long pth, const char __user *u_name)); ++ ++COBALT_SYSCALL_DECL(thread_kill, (unsigned long pth, int sig)); ++ ++COBALT_SYSCALL_DECL(thread_join, (unsigned long pth)); ++ ++COBALT_SYSCALL_DECL(thread_getpid, (unsigned long pth)); ++ ++COBALT_SYSCALL_DECL(thread_getstat, ++ (pid_t pid, struct cobalt_threadstat __user *u_stat)); ++ ++COBALT_SYSCALL_DECL(thread_setschedparam_ex, ++ (unsigned long pth, ++ int policy, ++ const struct sched_param_ex __user *u_param, ++ __u32 __user *u_winoff, ++ int __user *u_promoted)); ++ ++COBALT_SYSCALL_DECL(thread_getschedparam_ex, ++ (unsigned long pth, ++ int __user *u_policy, ++ struct sched_param_ex __user *u_param)); ++ ++COBALT_SYSCALL_DECL(thread_setschedprio, ++ (unsigned long pth, ++ int prio, ++ __u32 __user *u_winoff, ++ int __user *u_promoted)); ++ ++void cobalt_thread_map(struct xnthread *curr); ++ ++struct xnthread_personality *cobalt_thread_exit(struct xnthread *curr); ++ ++struct xnthread_personality *cobalt_thread_finalize(struct xnthread *zombie); ++ ++#ifdef CONFIG_XENO_OPT_COBALT_EXTENSION ++ ++int cobalt_thread_extend(struct cobalt_extension *ext, ++ void *priv); ++ ++void cobalt_thread_restrict(void); ++ ++static inline ++int cobalt_thread_extended_p(const struct cobalt_thread *thread, ++ const struct cobalt_extension *ext) ++{ ++ return thread->extref.extension == ext; ++} ++ ++#else /* !CONFIG_XENO_OPT_COBALT_EXTENSION */ ++ ++static inline ++int cobalt_thread_extended_p(const struct cobalt_thread *thread, ++ const struct cobalt_extension *ext) ++{ ++ return 0; ++} ++ ++#endif /* !CONFIG_XENO_OPT_COBALT_EXTENSION */ ++ ++extern xnticks_t cobalt_time_slice; ++ ++#endif /* !_COBALT_POSIX_THREAD_H */ +--- linux/kernel/xenomai/posix/monitor.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/posix/monitor.h 2021-04-07 16:01:25.985635941 +0800 +@@ -0,0 +1,70 @@ ++/* ++ * Copyright (C) 2011 Philippe Gerum ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#ifndef _COBALT_POSIX_MONITOR_H ++#define _COBALT_POSIX_MONITOR_H ++ ++#include ++#include ++#include ++#include ++ ++struct cobalt_resources; ++struct cobalt_process; ++ ++struct cobalt_monitor { ++ unsigned int magic; ++ struct xnsynch gate; ++ struct xnsynch drain; ++ struct cobalt_monitor_state *state; ++ struct list_head waiters; ++ int flags; ++ xntmode_t tmode; ++ struct cobalt_resnode resnode; ++}; ++ ++int __cobalt_monitor_wait(struct cobalt_monitor_shadow __user *u_mon, ++ int event, const struct timespec *ts, ++ int __user *u_ret); ++ ++COBALT_SYSCALL_DECL(monitor_init, ++ (struct cobalt_monitor_shadow __user *u_monsh, ++ clockid_t clk_id, ++ int flags)); ++ ++COBALT_SYSCALL_DECL(monitor_enter, ++ (struct cobalt_monitor_shadow __user *u_monsh)); ++ ++COBALT_SYSCALL_DECL(monitor_sync, ++ (struct cobalt_monitor_shadow __user *u_monsh)); ++ ++COBALT_SYSCALL_DECL(monitor_exit, ++ (struct cobalt_monitor_shadow __user *u_monsh)); ++ ++COBALT_SYSCALL_DECL(monitor_wait, ++ (struct cobalt_monitor_shadow __user *u_monsh, ++ int event, const struct timespec __user *u_ts, ++ int __user *u_ret)); ++ ++COBALT_SYSCALL_DECL(monitor_destroy, ++ (struct cobalt_monitor_shadow __user *u_monsh)); ++ ++void cobalt_monitor_reclaim(struct cobalt_resnode *node, ++ spl_t s); ++ ++#endif /* !_COBALT_POSIX_MONITOR_H */ +--- linux/kernel/xenomai/posix/nsem.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/posix/nsem.c 2021-04-07 16:01:25.980635948 +0800 +@@ -0,0 +1,299 @@ ++/* ++ * Copyright (C) 2013 Gilles Chanteperdrix . ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include "internal.h" ++#include "sem.h" ++#include "thread.h" ++#include ++ ++DEFINE_PRIVATE_XNLOCK(named_sem_lock); ++ ++struct cobalt_named_sem { ++ struct cobalt_sem *sem; ++ struct cobalt_sem_shadow __user *usem; ++ unsigned int refs; ++ struct xnid id; ++}; ++ ++static struct cobalt_named_sem * ++sem_search(struct cobalt_process *process, xnhandle_t handle) ++{ ++ struct xnid *i; ++ ++ i = xnid_fetch(&process->usems, handle); ++ if (i == NULL) ++ return NULL; ++ ++ return container_of(i, struct cobalt_named_sem, id); ++} ++ ++static struct cobalt_sem_shadow __user * ++sem_open(struct cobalt_process *process, ++ struct cobalt_sem_shadow __user *ushadow, ++ struct filename *filename, int oflags, mode_t mode, ++ unsigned int value) ++{ ++ const char *name = filename->name; ++ struct cobalt_sem_shadow shadow; ++ struct cobalt_named_sem *u, *v; ++ struct cobalt_sem *sem; ++ xnhandle_t handle; ++ spl_t s; ++ int rc; ++ ++ if (name[0] != '/' || name[1] == '\0') ++ return ERR_PTR(-EINVAL); ++ ++ retry_bind: ++ rc = xnregistry_bind(&name[1], XN_NONBLOCK, XN_RELATIVE, &handle); ++ switch (rc) { ++ case 0: ++ /* Found */ ++ if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) ++ return ERR_PTR(-EEXIST); ++ ++ xnlock_get_irqsave(&named_sem_lock, s); ++ u = sem_search(process, handle); ++ if (u) { ++ ++u->refs; ++ xnlock_put_irqrestore(&named_sem_lock, s); ++ return u->usem; ++ } ++ xnlock_put_irqrestore(&named_sem_lock, s); ++ ++ xnlock_get_irqsave(&nklock, s); ++ sem = xnregistry_lookup(handle, NULL); ++ if (sem && sem->magic != COBALT_SEM_MAGIC) { ++ xnlock_put_irqrestore(&nklock, s); ++ return ERR_PTR(-EINVAL); ++ } ++ ++ if (sem) { ++ ++sem->refs; ++ xnlock_put_irqrestore(&nklock, s); ++ } else { ++ xnlock_put_irqrestore(&nklock, s); ++ goto retry_bind; ++ } ++ ++ __cobalt_sem_shadow_init(sem, COBALT_NAMED_SEM_MAGIC, &shadow); ++ break; ++ ++ case -EWOULDBLOCK: ++ /* Not found */ ++ if ((oflags & O_CREAT) == 0) ++ return ERR_PTR(-ENOENT); ++ ++ shadow.magic = 0; ++ sem = __cobalt_sem_init(&name[1], &shadow, ++ SEM_PSHARED | SEM_NAMED, value); ++ if (IS_ERR(sem)) { ++ rc = PTR_ERR(sem); ++ if (rc == -EEXIST) ++ goto retry_bind; ++ return ERR_PTR(rc); ++ } ++ ++ sem->pathname = filename; ++ handle = shadow.handle; ++ break; ++ ++ default: ++ return ERR_PTR(rc); ++ } ++ ++ if (cobalt_copy_to_user(ushadow, &shadow, sizeof(shadow))) { ++ __cobalt_sem_destroy(handle); ++ return ERR_PTR(-EFAULT); ++ } ++ ++ u = xnmalloc(sizeof(*u)); ++ if (u == NULL) { ++ __cobalt_sem_destroy(handle); ++ return ERR_PTR(-ENOMEM); ++ } ++ ++ u->sem = sem; ++ u->usem = ushadow; ++ u->refs = 1; ++ ++ xnlock_get_irqsave(&named_sem_lock, s); ++ v = sem_search(process, handle); ++ if (v) { ++ ++v->refs; ++ xnlock_put_irqrestore(&named_sem_lock, s); ++ xnlock_get_irqsave(&nklock, s); ++ --sem->refs; ++ xnlock_put_irqrestore(&nklock, s); ++ putname(filename); ++ xnfree(u); ++ u = v; ++ } else { ++ xnid_enter(&process->usems, &u->id, handle); ++ xnlock_put_irqrestore(&named_sem_lock, s); ++ } ++ ++ trace_cobalt_psem_open(name, handle, oflags, mode, value); ++ ++ return u->usem; ++} ++ ++static int sem_close(struct cobalt_process *process, xnhandle_t handle) ++{ ++ struct cobalt_named_sem *u; ++ spl_t s; ++ int err; ++ ++ xnlock_get_irqsave(&named_sem_lock, s); ++ u = sem_search(process, handle); ++ if (u == NULL) { ++ err = -ENOENT; ++ goto err_unlock; ++ } ++ ++ if (--u->refs) { ++ err = 0; ++ goto err_unlock; ++ } ++ ++ xnid_remove(&process->usems, &u->id); ++ xnlock_put_irqrestore(&named_sem_lock, s); ++ ++ __cobalt_sem_destroy(handle); ++ ++ xnfree(u); ++ return 1; ++ ++ err_unlock: ++ xnlock_put_irqrestore(&named_sem_lock, s); ++ return err; ++} ++ ++struct cobalt_sem_shadow __user * ++__cobalt_sem_open(struct cobalt_sem_shadow __user *usm, ++ const char __user *u_name, ++ int oflags, mode_t mode, unsigned int value) ++{ ++ struct cobalt_process *process; ++ struct filename *filename; ++ ++ process = cobalt_current_process(); ++ if (process == NULL) ++ return ERR_PTR(-EPERM); ++ ++ filename = getname(u_name); ++ if (IS_ERR(filename)) ++ return ERR_CAST(filename); ++ ++ usm = sem_open(process, usm, filename, oflags, mode, value); ++ if (IS_ERR(usm)) { ++ trace_cobalt_psem_open_failed(filename->name, oflags, mode, ++ value, PTR_ERR(usm)); ++ putname(filename); ++ } ++ ++ return usm; ++} ++ ++COBALT_SYSCALL(sem_open, lostage, ++ (struct cobalt_sem_shadow __user *__user *u_addrp, ++ const char __user *u_name, ++ int oflags, mode_t mode, unsigned int value)) ++{ ++ struct cobalt_sem_shadow __user *usm; ++ ++ if (__xn_get_user(usm, u_addrp)) ++ return -EFAULT; ++ ++ usm = __cobalt_sem_open(usm, u_name, oflags, mode, value); ++ if (IS_ERR(usm)) ++ return PTR_ERR(usm); ++ ++ return __xn_put_user(usm, u_addrp) ? -EFAULT : 0; ++} ++ ++COBALT_SYSCALL(sem_close, lostage, ++ (struct cobalt_sem_shadow __user *usm)) ++{ ++ struct cobalt_process *process; ++ xnhandle_t handle; ++ ++ process = cobalt_current_process(); ++ if (process == NULL) ++ return -EPERM; ++ ++ handle = cobalt_get_handle_from_user(&usm->handle); ++ trace_cobalt_psem_close(handle); ++ ++ return sem_close(process, handle); ++} ++ ++static inline int sem_unlink(const char *name) ++{ ++ xnhandle_t handle; ++ int ret; ++ ++ if (name[0] != '/') ++ return -EINVAL; ++ ++ ret = xnregistry_bind(name + 1, XN_NONBLOCK, XN_RELATIVE, &handle); ++ if (ret == -EWOULDBLOCK) ++ return -ENOENT; ++ ++ if (__cobalt_sem_destroy(handle) == -EBUSY) ++ xnregistry_unlink(xnregistry_key(handle)); ++ ++ return 0; ++} ++ ++COBALT_SYSCALL(sem_unlink, lostage, ++ (const char __user *u_name)) ++{ ++ struct filename *filename; ++ int ret; ++ ++ filename = getname(u_name); ++ if (IS_ERR(filename)) ++ return PTR_ERR(filename); ++ ++ trace_cobalt_psem_unlink(filename->name); ++ ret = sem_unlink(filename->name); ++ putname(filename); ++ ++ return ret; ++} ++ ++static void reclaim_named_sem(void *arg, struct xnid *i) ++{ ++ struct cobalt_process *process = arg; ++ struct cobalt_named_sem *u; ++ ++ u = container_of(i, struct cobalt_named_sem, id); ++ u->refs = 1; ++ sem_close(process, xnid_key(i)); ++} ++ ++void cobalt_nsem_reclaim(struct cobalt_process *process) ++{ ++ xntree_cleanup(&process->usems, process, reclaim_named_sem); ++} +--- linux/kernel/xenomai/posix/sched.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/posix/sched.h 2021-04-07 16:01:25.975635955 +0800 +@@ -0,0 +1,109 @@ ++/* ++ * Copyright (C) 2009 Philippe Gerum . ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#ifndef _COBALT_POSIX_SCHED_H ++#define _COBALT_POSIX_SCHED_H ++ ++#include ++#include ++#include ++ ++struct cobalt_resources; ++struct cobalt_process; ++ ++struct cobalt_sched_group { ++#ifdef CONFIG_XENO_OPT_SCHED_QUOTA ++ struct xnsched_quota_group quota; ++#endif ++ struct cobalt_resources *scope; ++ int pshared; ++ struct list_head next; ++}; ++ ++int __cobalt_sched_weightprio(int policy, ++ const struct sched_param_ex *param_ex); ++ ++int __cobalt_sched_setconfig_np(int cpu, int policy, ++ void __user *u_config, ++ size_t len, ++ union sched_config *(*fetch_config) ++ (int policy, const void __user *u_config, ++ size_t *len), ++ int (*ack_config)(int policy, ++ const union sched_config *config, ++ void __user *u_config)); ++ ++ssize_t __cobalt_sched_getconfig_np(int cpu, int policy, ++ void __user *u_config, ++ size_t len, ++ union sched_config *(*fetch_config) ++ (int policy, const void __user *u_config, ++ size_t *len), ++ ssize_t (*put_config)(int policy, ++ void __user *u_config, size_t u_len, ++ const union sched_config *config, ++ size_t len)); ++int cobalt_sched_setscheduler_ex(pid_t pid, ++ int policy, ++ const struct sched_param_ex *param_ex, ++ __u32 __user *u_winoff, ++ int __user *u_promoted); ++ ++int cobalt_sched_getscheduler_ex(pid_t pid, ++ int *policy_r, ++ struct sched_param_ex *param_ex); ++ ++struct xnsched_class * ++cobalt_sched_policy_param(union xnsched_policy_param *param, ++ int u_policy, const struct sched_param_ex *param_ex, ++ xnticks_t *tslice_r); ++ ++COBALT_SYSCALL_DECL(sched_yield, (void)); ++ ++COBALT_SYSCALL_DECL(sched_weightprio, ++ (int policy, const struct sched_param_ex __user *u_param)); ++ ++COBALT_SYSCALL_DECL(sched_minprio, (int policy)); ++ ++COBALT_SYSCALL_DECL(sched_maxprio, (int policy)); ++ ++COBALT_SYSCALL_DECL(sched_setconfig_np, ++ (int cpu, ++ int policy, ++ union sched_config __user *u_config, ++ size_t len)); ++ ++COBALT_SYSCALL_DECL(sched_getconfig_np, ++ (int cpu, int policy, ++ union sched_config __user *u_config, ++ size_t len)); ++ ++COBALT_SYSCALL_DECL(sched_setscheduler_ex, ++ (pid_t pid, ++ int policy, ++ const struct sched_param_ex __user *u_param, ++ __u32 __user *u_winoff, ++ int __user *u_promoted)); ++ ++COBALT_SYSCALL_DECL(sched_getscheduler_ex, ++ (pid_t pid, ++ int __user *u_policy, ++ struct sched_param_ex __user *u_param)); ++ ++void cobalt_sched_reclaim(struct cobalt_process *process); ++ ++#endif /* !_COBALT_POSIX_SCHED_H */ +--- linux/kernel/xenomai/posix/cond.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/posix/cond.c 2021-04-07 16:01:25.971635961 +0800 +@@ -0,0 +1,425 @@ ++/* ++ * Written by Gilles Chanteperdrix . ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#include "internal.h" ++#include "thread.h" ++#include "mutex.h" ++#include "cond.h" ++#include "clock.h" ++#include ++ ++static inline int ++pthread_cond_init(struct cobalt_cond_shadow *cnd, const struct cobalt_condattr *attr) ++{ ++ int synch_flags = XNSYNCH_PRIO, ret; ++ struct cobalt_cond *cond, *old_cond; ++ struct cobalt_cond_state *state; ++ struct cobalt_ppd *sys_ppd; ++ struct list_head *condq; ++ spl_t s; ++ ++ cond = xnmalloc(sizeof(*cond)); ++ if (cond == NULL) ++ return -ENOMEM; ++ ++ sys_ppd = cobalt_ppd_get(attr->pshared); ++ state = cobalt_umm_alloc(&sys_ppd->umm, sizeof(*state)); ++ if (state == NULL) { ++ ret = -EAGAIN; ++ goto fail_umm; ++ } ++ cond->state = state; ++ state->pending_signals = 0; ++ state->mutex_state_offset = ~0U; ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ condq = &cobalt_current_resources(attr->pshared)->condq; ++ if (cnd->magic == COBALT_COND_MAGIC && !list_empty(condq)) { ++ old_cond = xnregistry_lookup(cnd->handle, NULL); ++ if (cobalt_obj_active(old_cond, COBALT_COND_MAGIC, ++ typeof(*old_cond))) { ++ ret = -EBUSY; ++ goto fail_register; ++ } ++ } ++ ++ ret = xnregistry_enter_anon(cond, &cond->resnode.handle); ++ if (ret < 0) ++ goto fail_register; ++ if (attr->pshared) ++ cond->resnode.handle |= XNSYNCH_PSHARED; ++ cond->magic = COBALT_COND_MAGIC; ++ xnsynch_init(&cond->synchbase, synch_flags, NULL); ++ cond->attr = *attr; ++ cond->mutex = NULL; ++ cobalt_add_resource(&cond->resnode, cond, attr->pshared); ++ ++ cnd->handle = cond->resnode.handle; ++ cnd->state_offset = cobalt_umm_offset(&sys_ppd->umm, state); ++ cnd->magic = COBALT_COND_MAGIC; ++ ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return 0; ++fail_register: ++ xnlock_put_irqrestore(&nklock, s); ++ cobalt_umm_free(&sys_ppd->umm, state); ++fail_umm: ++ xnfree(cond); ++ ++ return ret; ++} ++ ++static inline int pthread_cond_destroy(struct cobalt_cond_shadow *cnd) ++{ ++ struct cobalt_cond *cond; ++ spl_t s; ++ ++ xnlock_get_irqsave(&nklock, s); ++ cond = xnregistry_lookup(cnd->handle, NULL); ++ if (cond == NULL) { ++ xnlock_put_irqrestore(&nklock, s); ++ return -EINVAL; ++ } ++ ++ if (!cobalt_obj_active(cnd, COBALT_COND_MAGIC, struct cobalt_cond_shadow) ++ || !cobalt_obj_active(cond, COBALT_COND_MAGIC, struct cobalt_cond)) { ++ xnlock_put_irqrestore(&nklock, s); ++ return -EINVAL; ++ } ++ ++ if (cond->resnode.scope != ++ cobalt_current_resources(cond->attr.pshared)) { ++ xnlock_put_irqrestore(&nklock, s); ++ return -EPERM; ++ } ++ ++ if (xnsynch_pended_p(&cond->synchbase) || cond->mutex) { ++ xnlock_put_irqrestore(&nklock, s); ++ return -EBUSY; ++ } ++ ++ cobalt_cond_reclaim(&cond->resnode, s); /* drops lock */ ++ ++ cobalt_mark_deleted(cnd); ++ ++ return 0; ++} ++ ++static inline int cobalt_cond_timedwait_prologue(struct xnthread *cur, ++ struct cobalt_cond *cond, ++ struct cobalt_mutex *mutex, ++ xnticks_t abs_to) ++{ ++ int err, ret; ++ spl_t s; ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ /* If another thread waiting for cond does not use the same mutex */ ++ if (!cobalt_obj_active(cond, COBALT_COND_MAGIC, struct cobalt_cond) ++ || (cond->mutex && cond->mutex != mutex)) { ++ err = -EINVAL; ++ goto unlock_and_return; ++ } ++ ++ if (cond->resnode.scope != ++ cobalt_current_resources(cond->attr.pshared)) { ++ err = -EPERM; ++ goto unlock_and_return; ++ } ++ ++ if (mutex->attr.pshared != cond->attr.pshared) { ++ err = -EINVAL; ++ goto unlock_and_return; ++ } ++ ++ /* Unlock mutex. */ ++ err = cobalt_mutex_release(cur, mutex); ++ if (err < 0) ++ goto unlock_and_return; ++ ++ /* err == 1 means a reschedule is needed, but do not ++ reschedule here, releasing the mutex and suspension must be ++ done atomically in pthread_cond_*wait. */ ++ ++ /* Bind mutex to cond. */ ++ if (cond->mutex == NULL) { ++ cond->mutex = mutex; ++ list_add_tail(&cond->mutex_link, &mutex->conds); ++ } ++ ++ /* Wait for another thread to signal the condition. */ ++ if (abs_to != XN_INFINITE) ++ ret = xnsynch_sleep_on(&cond->synchbase, abs_to, ++ clock_flag(TIMER_ABSTIME, cond->attr.clock)); ++ else ++ ret = xnsynch_sleep_on(&cond->synchbase, XN_INFINITE, XN_RELATIVE); ++ ++ /* There are three possible wakeup conditions : ++ - cond_signal / cond_broadcast, no status bit is set, and the function ++ should return 0 ; ++ - timeout, the status XNTIMEO is set, and the function should return ++ ETIMEDOUT ; ++ - pthread_kill, the status bit XNBREAK is set, but ignored, the ++ function simply returns EINTR (used only by the user-space ++ interface, replaced by 0 anywhere else), causing a wakeup, spurious ++ or not whether pthread_cond_signal was called between pthread_kill ++ and the moment when xnsynch_sleep_on returned ; ++ */ ++ ++ err = 0; ++ ++ if (ret & XNBREAK) ++ err = -EINTR; ++ else if (ret & XNTIMEO) ++ err = -ETIMEDOUT; ++ ++unlock_and_return: ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return err; ++} ++ ++static inline int cobalt_cond_timedwait_epilogue(struct xnthread *cur, ++ struct cobalt_cond *cond, ++ struct cobalt_mutex *mutex) ++{ ++ int err; ++ spl_t s; ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ err = __cobalt_mutex_acquire_unchecked(cur, mutex, NULL); ++ if (err == -EINTR) ++ goto unlock_and_return; ++ ++ /* ++ * Unbind mutex and cond, if no other thread is waiting, if ++ * the job was not already done. ++ */ ++ if (!xnsynch_pended_p(&cond->synchbase) && cond->mutex == mutex) { ++ cond->mutex = NULL; ++ list_del(&cond->mutex_link); ++ } ++ ++unlock_and_return: ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return err; ++} ++ ++COBALT_SYSCALL(cond_init, current, ++ (struct cobalt_cond_shadow __user *u_cnd, ++ const struct cobalt_condattr __user *u_attr)) ++{ ++ struct cobalt_cond_shadow cnd; ++ struct cobalt_condattr attr; ++ int err; ++ ++ if (cobalt_copy_from_user(&cnd, u_cnd, sizeof(cnd))) ++ return -EFAULT; ++ ++ if (cobalt_copy_from_user(&attr, u_attr, sizeof(attr))) ++ return -EFAULT; ++ ++ trace_cobalt_cond_init(u_cnd, &attr); ++ ++ err = pthread_cond_init(&cnd, &attr); ++ if (err < 0) ++ return err; ++ ++ return cobalt_copy_to_user(u_cnd, &cnd, sizeof(*u_cnd)); ++} ++ ++COBALT_SYSCALL(cond_destroy, current, ++ (struct cobalt_cond_shadow __user *u_cnd)) ++{ ++ struct cobalt_cond_shadow cnd; ++ int err; ++ ++ if (cobalt_copy_from_user(&cnd, u_cnd, sizeof(cnd))) ++ return -EFAULT; ++ ++ trace_cobalt_cond_destroy(u_cnd); ++ ++ err = pthread_cond_destroy(&cnd); ++ if (err < 0) ++ return err; ++ ++ return cobalt_copy_to_user(u_cnd, &cnd, sizeof(*u_cnd)); ++} ++ ++struct us_cond_data { ++ int err; ++}; ++ ++static inline int cond_fetch_timeout(struct timespec *ts, ++ const void __user *u_ts) ++{ ++ return u_ts == NULL ? -EFAULT : ++ cobalt_copy_from_user(ts, u_ts, sizeof(*ts)); ++} ++ ++int __cobalt_cond_wait_prologue(struct cobalt_cond_shadow __user *u_cnd, ++ struct cobalt_mutex_shadow __user *u_mx, ++ int *u_err, ++ void __user *u_ts, ++ int (*fetch_timeout)(struct timespec *ts, ++ const void __user *u_ts)) ++{ ++ struct xnthread *cur = xnthread_current(); ++ struct cobalt_cond *cond; ++ struct cobalt_mutex *mx; ++ struct us_cond_data d; ++ struct timespec ts; ++ xnhandle_t handle; ++ int err, perr = 0; ++ __u32 offset; ++ ++ handle = cobalt_get_handle_from_user(&u_cnd->handle); ++ cond = xnregistry_lookup(handle, NULL); ++ ++ handle = cobalt_get_handle_from_user(&u_mx->handle); ++ mx = xnregistry_lookup(handle, NULL); ++ ++ if (cond->mutex == NULL) { ++ __xn_get_user(offset, &u_mx->state_offset); ++ cond->state->mutex_state_offset = offset; ++ } ++ ++ if (fetch_timeout) { ++ err = fetch_timeout(&ts, u_ts); ++ if (err == 0) { ++ trace_cobalt_cond_timedwait(u_cnd, u_mx, &ts); ++ err = cobalt_cond_timedwait_prologue(cur, cond, mx, ++ ts2ns(&ts) + 1); ++ } ++ } else { ++ trace_cobalt_cond_wait(u_cnd, u_mx); ++ err = cobalt_cond_timedwait_prologue(cur, cond, mx, XN_INFINITE); ++ } ++ ++ switch(err) { ++ case 0: ++ case -ETIMEDOUT: ++ perr = d.err = err; ++ err = cobalt_cond_timedwait_epilogue(cur, cond, mx); ++ break; ++ ++ case -EINTR: ++ perr = err; ++ d.err = 0; /* epilogue should return 0. */ ++ break; ++ ++ default: ++ /* Please gcc and handle the case which will never ++ happen */ ++ d.err = EINVAL; ++ } ++ ++ if (cond->mutex == NULL) ++ cond->state->mutex_state_offset = ~0U; ++ ++ if (err == -EINTR) ++ __xn_put_user(d.err, u_err); ++ ++ return err == 0 ? perr : err; ++} ++ ++/* pthread_cond_wait_prologue(cond, mutex, count_ptr, timed, timeout) */ ++COBALT_SYSCALL(cond_wait_prologue, nonrestartable, ++ (struct cobalt_cond_shadow __user *u_cnd, ++ struct cobalt_mutex_shadow __user *u_mx, ++ int *u_err, ++ unsigned int timed, ++ struct timespec __user *u_ts)) ++{ ++ return __cobalt_cond_wait_prologue(u_cnd, u_mx, u_err, u_ts, ++ timed ? cond_fetch_timeout : NULL); ++} ++ ++COBALT_SYSCALL(cond_wait_epilogue, primary, ++ (struct cobalt_cond_shadow __user *u_cnd, ++ struct cobalt_mutex_shadow __user *u_mx)) ++{ ++ struct xnthread *cur = xnthread_current(); ++ struct cobalt_cond *cond; ++ struct cobalt_mutex *mx; ++ xnhandle_t handle; ++ int err; ++ ++ handle = cobalt_get_handle_from_user(&u_cnd->handle); ++ cond = xnregistry_lookup(handle, NULL); ++ ++ handle = cobalt_get_handle_from_user(&u_mx->handle); ++ mx = xnregistry_lookup(handle, NULL); ++ err = cobalt_cond_timedwait_epilogue(cur, cond, mx); ++ ++ if (cond->mutex == NULL) ++ cond->state->mutex_state_offset = ~0U; ++ ++ return err; ++} ++ ++int cobalt_cond_deferred_signals(struct cobalt_cond *cond) ++{ ++ struct cobalt_cond_state *state; ++ __u32 pending_signals; ++ int need_resched; ++ ++ state = cond->state; ++ pending_signals = state->pending_signals; ++ ++ switch(pending_signals) { ++ default: ++ state->pending_signals = 0; ++ need_resched = xnsynch_wakeup_many_sleepers(&cond->synchbase, ++ pending_signals); ++ break; ++ ++ case ~0U: ++ need_resched = ++ xnsynch_flush(&cond->synchbase, 0) == XNSYNCH_RESCHED; ++ state->pending_signals = 0; ++ break; ++ ++ case 0: ++ need_resched = 0; ++ break; ++ } ++ ++ return need_resched; ++} ++ ++void cobalt_cond_reclaim(struct cobalt_resnode *node, spl_t s) ++{ ++ struct cobalt_cond *cond; ++ ++ cond = container_of(node, struct cobalt_cond, resnode); ++ xnregistry_remove(node->handle); ++ cobalt_del_resource(node); ++ xnsynch_destroy(&cond->synchbase); ++ cobalt_mark_deleted(cond); ++ xnlock_put_irqrestore(&nklock, s); ++ ++ cobalt_umm_free(&cobalt_ppd_get(cond->attr.pshared)->umm, ++ cond->state); ++ xnfree(cond); ++} +--- linux/kernel/xenomai/posix/corectl.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/posix/corectl.c 2021-04-07 16:01:25.966635968 +0800 +@@ -0,0 +1,215 @@ ++/* ++ * Copyright (C) 2016 Philippe Gerum . ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "corectl.h" ++ ++static BLOCKING_NOTIFIER_HEAD(config_notifier_list); ++ ++static int do_conf_option(int option, void __user *u_buf, size_t u_bufsz) ++{ ++ struct cobalt_config_vector vec; ++ int ret, val = 0; ++ ++ if (option <= _CC_COBALT_GET_CORE_STATUS && u_bufsz < sizeof(val)) ++ return -EINVAL; ++ ++ switch (option) { ++ case _CC_COBALT_GET_VERSION: ++ val = XENO_VERSION_CODE; ++ break; ++ case _CC_COBALT_GET_NR_PIPES: ++#ifdef CONFIG_XENO_OPT_PIPE ++ val = CONFIG_XENO_OPT_PIPE_NRDEV; ++#endif ++ break; ++ case _CC_COBALT_GET_NR_TIMERS: ++ val = CONFIG_XENO_OPT_NRTIMERS; ++ break; ++ case _CC_COBALT_GET_POLICIES: ++ val = _CC_COBALT_SCHED_FIFO|_CC_COBALT_SCHED_RR; ++ if (IS_ENABLED(CONFIG_XENO_OPT_SCHED_WEAK)) ++ val |= _CC_COBALT_SCHED_WEAK; ++ if (IS_ENABLED(CONFIG_XENO_OPT_SCHED_SPORADIC)) ++ val |= _CC_COBALT_SCHED_SPORADIC; ++ if (IS_ENABLED(CONFIG_XENO_OPT_SCHED_QUOTA)) ++ val |= _CC_COBALT_SCHED_QUOTA; ++ if (IS_ENABLED(CONFIG_XENO_OPT_SCHED_TP)) ++ val |= _CC_COBALT_SCHED_TP; ++ break; ++ case _CC_COBALT_GET_DEBUG: ++ if (IS_ENABLED(CONFIG_XENO_OPT_DEBUG_COBALT)) ++ val |= _CC_COBALT_DEBUG_ASSERT; ++ if (IS_ENABLED(CONFIG_XENO_OPT_DEBUG_CONTEXT)) ++ val |= _CC_COBALT_DEBUG_CONTEXT; ++ if (IS_ENABLED(CONFIG_XENO_OPT_DEBUG_LOCKING)) ++ val |= _CC_COBALT_DEBUG_LOCKING; ++ if (IS_ENABLED(CONFIG_XENO_OPT_DEBUG_USER)) ++ val |= _CC_COBALT_DEBUG_USER; ++ if (IS_ENABLED(CONFIG_XENO_OPT_DEBUG_MUTEX_RELAXED)) ++ val |= _CC_COBALT_DEBUG_MUTEX_RELAXED; ++ if (IS_ENABLED(CONFIG_XENO_OPT_DEBUG_MUTEX_SLEEP)) ++ val |= _CC_COBALT_DEBUG_MUTEX_SLEEP; ++ if (IS_ENABLED(CONFIG_XENO_OPT_DEBUG_LEGACY)) ++ val |= _CC_COBALT_DEBUG_LEGACY; ++ if (IS_ENABLED(CONFIG_XENO_OPT_DEBUG_TRACE_RELAX)) ++ val |= _CC_COBALT_DEBUG_TRACE_RELAX; ++ if (IS_ENABLED(CONFIG_XENO_DRIVERS_RTNET_CHECKED)) ++ val |= _CC_COBALT_DEBUG_NET; ++ break; ++ case _CC_COBALT_GET_WATCHDOG: ++#ifdef CONFIG_XENO_OPT_WATCHDOG ++ val = CONFIG_XENO_OPT_WATCHDOG_TIMEOUT; ++#endif ++ break; ++ case _CC_COBALT_GET_CORE_STATUS: ++ val = realtime_core_state(); ++ break; ++ default: ++ if (!ipipe_root_p) ++ /* Switch to secondary mode first. */ ++ return -ENOSYS; ++ vec.u_buf = u_buf; ++ vec.u_bufsz = u_bufsz; ++ ret = blocking_notifier_call_chain(&config_notifier_list, ++ option, &vec); ++ if (ret == NOTIFY_DONE) ++ return -EINVAL; /* Nobody cared. */ ++ return notifier_to_errno(ret); ++ } ++ ++ ret = cobalt_copy_to_user(u_buf, &val, sizeof(val)); ++ ++ return ret ? -EFAULT : 0; ++} ++ ++static int stop_services(const void __user *u_buf, size_t u_bufsz) ++{ ++ const u32 final_grace_period = 3; /* seconds */ ++ enum cobalt_run_states state; ++ __u32 grace_period; ++ int ret; ++ ++ /* ++ * XXX: we don't have any syscall for unbinding a thread from ++ * the Cobalt core, so we deny real-time threads from stopping ++ * Cobalt services. i.e. _CC_COBALT_STOP_CORE must be issued ++ * from a plain regular linux thread. ++ */ ++ if (xnthread_current()) ++ return -EPERM; ++ ++ if (u_bufsz != sizeof(__u32)) ++ return -EINVAL; ++ ++ ret = cobalt_copy_from_user(&grace_period, ++ u_buf, sizeof(grace_period)); ++ if (ret) ++ return ret; ++ ++ state = atomic_cmpxchg(&cobalt_runstate, ++ COBALT_STATE_RUNNING, ++ COBALT_STATE_TEARDOWN); ++ switch (state) { ++ case COBALT_STATE_STOPPED: ++ break; ++ case COBALT_STATE_RUNNING: ++ /* Kill user threads. */ ++ ret = xnthread_killall(grace_period, XNUSER); ++ if (ret) { ++ set_realtime_core_state(state); ++ return ret; ++ } ++ cobalt_call_state_chain(COBALT_STATE_TEARDOWN); ++ /* Kill lingering RTDM tasks. */ ++ ret = xnthread_killall(final_grace_period, 0); ++ if (ret == -EAGAIN) ++ printk(XENO_WARNING "some RTDM tasks won't stop"); ++ xntimer_release_hardware(); ++ set_realtime_core_state(COBALT_STATE_STOPPED); ++ printk(XENO_INFO "services stopped\n"); ++ break; ++ default: ++ ret = -EINPROGRESS; ++ } ++ ++ return ret; ++} ++ ++static int start_services(void) ++{ ++ enum cobalt_run_states state; ++ int ret = 0; ++ ++ state = atomic_cmpxchg(&cobalt_runstate, ++ COBALT_STATE_STOPPED, ++ COBALT_STATE_WARMUP); ++ switch (state) { ++ case COBALT_STATE_RUNNING: ++ break; ++ case COBALT_STATE_STOPPED: ++ xntimer_grab_hardware(); ++ cobalt_call_state_chain(COBALT_STATE_WARMUP); ++ set_realtime_core_state(COBALT_STATE_RUNNING); ++ printk(XENO_INFO "services started\n"); ++ break; ++ default: ++ ret = -EINPROGRESS; ++ } ++ ++ return ret; ++} ++ ++COBALT_SYSCALL(corectl, probing, ++ (int request, void __user *u_buf, size_t u_bufsz)) ++{ ++ int ret; ++ ++ switch (request) { ++ case _CC_COBALT_STOP_CORE: ++ ret = stop_services(u_buf, u_bufsz); ++ break; ++ case _CC_COBALT_START_CORE: ++ ret = start_services(); ++ break; ++ default: ++ ret = do_conf_option(request, u_buf, u_bufsz); ++ } ++ ++ return ret; ++} ++ ++void cobalt_add_config_chain(struct notifier_block *nb) ++{ ++ blocking_notifier_chain_register(&config_notifier_list, nb); ++} ++EXPORT_SYMBOL_GPL(cobalt_add_config_chain); ++ ++void cobalt_remove_config_chain(struct notifier_block *nb) ++{ ++ blocking_notifier_chain_unregister(&config_notifier_list, nb); ++} ++EXPORT_SYMBOL_GPL(cobalt_remove_config_chain); +--- linux/kernel/xenomai/posix/signal.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/posix/signal.h 2021-04-07 16:01:25.961635975 +0800 +@@ -0,0 +1,113 @@ ++/* ++ * Copyright (C) 2013 Philippe Gerum . ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#ifndef _COBALT_POSIX_SIGNAL_H ++#define _COBALT_POSIX_SIGNAL_H ++ ++#include ++#include ++#include ++#include ++#include ++ ++struct cobalt_thread; ++ ++struct cobalt_sigpending { ++ struct siginfo si; ++ struct list_head next; ++}; ++ ++static inline ++void cobalt_copy_siginfo(int code, ++ struct siginfo *__restrict__ dst, ++ const struct siginfo *__restrict__ src) ++{ ++ dst->si_signo = src->si_signo; ++ dst->si_errno = src->si_errno; ++ dst->si_code = code; ++ ++ switch (code) { ++ case SI_TIMER: ++ dst->si_tid = src->si_tid; ++ dst->si_overrun = src->si_overrun; ++ dst->si_value = src->si_value; ++ break; ++ case SI_QUEUE: ++ case SI_MESGQ: ++ dst->si_value = src->si_value; ++ /* falldown wanted. */ ++ case SI_USER: ++ dst->si_pid = src->si_pid; ++ dst->si_uid = src->si_uid; ++ } ++} ++ ++int __cobalt_sigwait(sigset_t *set); ++ ++int __cobalt_sigtimedwait(sigset_t *set, ++ const struct timespec *timeout, ++ void __user *u_si, ++ bool compat); ++ ++int __cobalt_sigwaitinfo(sigset_t *set, ++ void __user *u_si, ++ bool compat); ++ ++int __cobalt_sigqueue(pid_t pid, int sig, const union sigval *value); ++ ++int cobalt_signal_send(struct cobalt_thread *thread, ++ struct cobalt_sigpending *sigp, ++ int group); ++ ++int cobalt_signal_send_pid(pid_t pid, ++ struct cobalt_sigpending *sigp); ++ ++struct cobalt_sigpending *cobalt_signal_alloc(void); ++ ++void cobalt_signal_free(struct cobalt_sigpending *sigp); ++ ++void cobalt_signal_flush(struct cobalt_thread *thread); ++ ++int cobalt_signal_wait(sigset_t *set, struct siginfo *si, ++ xnticks_t timeout, xntmode_t tmode); ++ ++int __cobalt_kill(struct cobalt_thread *thread, ++ int sig, int group); ++ ++COBALT_SYSCALL_DECL(sigwait, ++ (const sigset_t __user *u_set, int __user *u_sig)); ++ ++COBALT_SYSCALL_DECL(sigtimedwait, ++ (const sigset_t __user *u_set, ++ struct siginfo __user *u_si, ++ const struct timespec __user *u_timeout)); ++ ++COBALT_SYSCALL_DECL(sigwaitinfo, ++ (const sigset_t __user *u_set, ++ struct siginfo __user *u_si)); ++ ++COBALT_SYSCALL_DECL(sigpending, ++ (old_sigset_t __user *u_set)); ++ ++COBALT_SYSCALL_DECL(kill, (pid_t pid, int sig)); ++ ++COBALT_SYSCALL_DECL(sigqueue, ++ (pid_t pid, int sig, const union sigval __user *u_value)); ++ ++int cobalt_signal_init(void); ++ ++#endif /* !_COBALT_POSIX_SIGNAL_H */ +--- linux/kernel/xenomai/posix/signal.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/posix/signal.c 2021-04-07 16:01:25.956635982 +0800 +@@ -0,0 +1,616 @@ ++/* ++ * Copyright (C) 2013 Philippe Gerum . ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#include ++#include ++#include ++#include "internal.h" ++#include "signal.h" ++#include "thread.h" ++#include "timer.h" ++#include "clock.h" ++ ++static void *sigpending_mem; ++ ++static LIST_HEAD(sigpending_pool); ++ ++/* ++ * How many signal notifications which may be pending at any given ++ * time, except timers. Cobalt signals are always thread directed, ++ * and we assume that in practice, each signal number is processed by ++ * a dedicated thread. We provide for up to three real-time signal ++ * events to pile up, and a single notification pending for other ++ * signals. Timers use a fast queuing logic maintaining a count of ++ * overruns, and therefore do not consume any memory from this pool. ++ */ ++#define __SIGPOOL_SIZE (sizeof(struct cobalt_sigpending) * \ ++ (_NSIG + (SIGRTMAX - SIGRTMIN) * 2)) ++ ++static int cobalt_signal_deliver(struct cobalt_thread *thread, ++ struct cobalt_sigpending *sigp, ++ int group) ++{ /* nklocked, IRQs off */ ++ struct cobalt_sigwait_context *swc; ++ struct xnthread_wait_context *wc; ++ struct list_head *sigwaiters; ++ int sig, ret; ++ ++ sig = sigp->si.si_signo; ++ XENO_BUG_ON(COBALT, sig < 1 || sig > _NSIG); ++ ++ /* ++ * Attempt to deliver the signal immediately to the initial ++ * target that waits for it. ++ */ ++ if (xnsynch_pended_p(&thread->sigwait)) { ++ wc = xnthread_get_wait_context(&thread->threadbase); ++ swc = container_of(wc, struct cobalt_sigwait_context, wc); ++ if (sigismember(swc->set, sig)) ++ goto deliver; ++ } ++ ++ /* ++ * If that does not work out and we are sending to a thread ++ * group, try to deliver to any thread from the same process ++ * waiting for that signal. ++ */ ++ sigwaiters = &thread->process->sigwaiters; ++ if (!group || list_empty(sigwaiters)) ++ return 0; ++ ++ list_for_each_entry(thread, sigwaiters, signext) { ++ wc = xnthread_get_wait_context(&thread->threadbase); ++ swc = container_of(wc, struct cobalt_sigwait_context, wc); ++ if (sigismember(swc->set, sig)) ++ goto deliver; ++ } ++ ++ return 0; ++deliver: ++ cobalt_copy_siginfo(sigp->si.si_code, swc->si, &sigp->si); ++ cobalt_call_extension(signal_deliver, &thread->extref, ++ ret, swc->si, sigp); ++ xnthread_complete_wait(&swc->wc); ++ xnsynch_wakeup_one_sleeper(&thread->sigwait); ++ list_del(&thread->signext); ++ ++ /* ++ * This is an immediate delivery bypassing any queuing, so we ++ * have to release the sigpending data right away before ++ * leaving. ++ */ ++ cobalt_signal_free(sigp); ++ ++ return 1; ++} ++ ++int cobalt_signal_send(struct cobalt_thread *thread, ++ struct cobalt_sigpending *sigp, ++ int group) ++{ /* nklocked, IRQs off */ ++ struct list_head *sigq; ++ int sig, ret; ++ ++ /* Can we deliver this signal immediately? */ ++ ret = cobalt_signal_deliver(thread, sigp, group); ++ if (ret) ++ return ret; /* Yep, done. */ ++ ++ /* ++ * Nope, attempt to queue it. We start by calling any Cobalt ++ * extension for queuing the signal first. ++ */ ++ if (cobalt_call_extension(signal_queue, &thread->extref, ret, sigp)) { ++ if (ret) ++ /* Queuing done remotely or error. */ ++ return ret; ++ } ++ ++ sig = sigp->si.si_signo; ++ sigq = thread->sigqueues + sig - 1; ++ if (!list_empty(sigq)) { ++ /* Queue non-rt signals only once. */ ++ if (sig < SIGRTMIN) ++ return 0; ++ /* Queue rt signal source only once (SI_TIMER). */ ++ if (!list_empty(&sigp->next)) ++ return 0; ++ } ++ ++ sigaddset(&thread->sigpending, sig); ++ list_add_tail(&sigp->next, sigq); ++ ++ return 1; ++} ++EXPORT_SYMBOL_GPL(cobalt_signal_send); ++ ++int cobalt_signal_send_pid(pid_t pid, struct cobalt_sigpending *sigp) ++{ /* nklocked, IRQs off */ ++ struct cobalt_thread *thread; ++ ++ thread = cobalt_thread_find(pid); ++ if (thread) ++ return cobalt_signal_send(thread, sigp, 0); ++ ++ return -ESRCH; ++} ++EXPORT_SYMBOL_GPL(cobalt_signal_send_pid); ++ ++struct cobalt_sigpending *cobalt_signal_alloc(void) ++{ /* nklocked, IRQs off */ ++ struct cobalt_sigpending *sigp; ++ ++ if (list_empty(&sigpending_pool)) { ++ if (xnclock_ratelimit()) ++ printk(XENO_WARNING "signal bucket pool underflows\n"); ++ return NULL; ++ } ++ ++ sigp = list_get_entry(&sigpending_pool, struct cobalt_sigpending, next); ++ INIT_LIST_HEAD(&sigp->next); ++ ++ return sigp; ++} ++EXPORT_SYMBOL_GPL(cobalt_signal_alloc); ++ ++void cobalt_signal_free(struct cobalt_sigpending *sigp) ++{ /* nklocked, IRQs off */ ++ if ((void *)sigp >= sigpending_mem && ++ (void *)sigp < sigpending_mem + __SIGPOOL_SIZE) ++ list_add_tail(&sigp->next, &sigpending_pool); ++} ++EXPORT_SYMBOL_GPL(cobalt_signal_free); ++ ++void cobalt_signal_flush(struct cobalt_thread *thread) ++{ ++ struct cobalt_sigpending *sigp, *tmp; ++ struct list_head *sigq; ++ spl_t s; ++ int n; ++ ++ /* ++ * TCB is not accessible from userland anymore, no locking ++ * required. ++ */ ++ if (sigisemptyset(&thread->sigpending)) ++ return; ++ ++ for (n = 0; n < _NSIG; n++) { ++ sigq = thread->sigqueues + n; ++ if (list_empty(sigq)) ++ continue; ++ /* ++ * sigpending blocks must be unlinked so that we ++ * detect this fact when deleting their respective ++ * owners. ++ */ ++ list_for_each_entry_safe(sigp, tmp, sigq, next) { ++ list_del_init(&sigp->next); ++ if ((void *)sigp >= sigpending_mem && ++ (void *)sigp < sigpending_mem + __SIGPOOL_SIZE) { ++ xnlock_get_irqsave(&nklock, s); ++ list_add_tail(&sigp->next, &sigpending_pool); ++ xnlock_put_irqrestore(&nklock, s); ++ } ++ } ++ } ++ ++ sigemptyset(&thread->sigpending); ++} ++ ++static int signal_put_siginfo(void __user *u_si, const struct siginfo *si, ++ int overrun) ++{ ++ struct siginfo __user *u_p = u_si; ++ int ret; ++ ++ ret = __xn_put_user(si->si_signo, &u_p->si_signo); ++ ret |= __xn_put_user(si->si_errno, &u_p->si_errno); ++ ret |= __xn_put_user(si->si_code, &u_p->si_code); ++ ++ /* ++ * Copy the generic/standard siginfo bits to userland. ++ */ ++ switch (si->si_code) { ++ case SI_TIMER: ++ ret |= __xn_put_user(si->si_tid, &u_p->si_tid); ++ ret |= __xn_put_user(si->si_ptr, &u_p->si_ptr); ++ ret |= __xn_put_user(overrun, &u_p->si_overrun); ++ break; ++ case SI_QUEUE: ++ case SI_MESGQ: ++ ret |= __xn_put_user(si->si_ptr, &u_p->si_ptr); ++ /* falldown wanted. */ ++ case SI_USER: ++ ret |= __xn_put_user(si->si_pid, &u_p->si_pid); ++ ret |= __xn_put_user(si->si_uid, &u_p->si_uid); ++ } ++ ++ return ret; ++} ++ ++static int signal_wait(sigset_t *set, xnticks_t timeout, ++ void __user *u_si, bool compat) ++{ ++ struct cobalt_sigpending *sigp = NULL; ++ struct cobalt_sigwait_context swc; ++ struct cobalt_thread *curr; ++ int ret, sig, n, overrun; ++ unsigned long *p, *t, m; ++ struct siginfo si, *sip; ++ struct list_head *sigq; ++ spl_t s; ++ ++ curr = cobalt_current_thread(); ++ XENO_BUG_ON(COBALT, curr == NULL); ++ ++ if (u_si && !access_wok(u_si, sizeof(*u_si))) ++ return -EFAULT; ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++check: ++ if (sigisemptyset(&curr->sigpending)) ++ /* Most common/fast path. */ ++ goto wait; ++ ++ p = curr->sigpending.sig; /* pending */ ++ t = set->sig; /* tested */ ++ ++ for (n = 0, sig = 0; n < _NSIG_WORDS; ++n) { ++ m = *p++ & *t++; ++ if (m == 0) ++ continue; ++ sig = ffz(~m) + n *_NSIG_BPW + 1; ++ break; ++ } ++ ++ if (sig) { ++ sigq = curr->sigqueues + sig - 1; ++ if (list_empty(sigq)) { ++ sigdelset(&curr->sigpending, sig); ++ goto check; ++ } ++ sigp = list_get_entry(sigq, struct cobalt_sigpending, next); ++ INIT_LIST_HEAD(&sigp->next); /* Mark sigp as unlinked. */ ++ if (list_empty(sigq)) ++ sigdelset(&curr->sigpending, sig); ++ sip = &sigp->si; ++ ret = 0; ++ goto done; ++ } ++ ++wait: ++ if (timeout == XN_NONBLOCK) { ++ ret = -EAGAIN; ++ goto fail; ++ } ++ swc.set = set; ++ swc.si = &si; ++ xnthread_prepare_wait(&swc.wc); ++ list_add_tail(&curr->signext, &curr->process->sigwaiters); ++ ret = xnsynch_sleep_on(&curr->sigwait, timeout, XN_RELATIVE); ++ if (ret) { ++ list_del(&curr->signext); ++ ret = ret & XNBREAK ? -EINTR : -EAGAIN; ++ goto fail; ++ } ++ sig = si.si_signo; ++ sip = &si; ++done: ++ /* ++ * si_overrun raises a nasty issue since we have to ++ * collect+clear it atomically before we drop the lock, ++ * although we don't know in advance if any extension would ++ * use it along with the additional si_codes it may provide, ++ * but we must drop the lock before running the ++ * signal_copyinfo handler. ++ * ++ * Observing that si_overrun is likely the only "unstable" ++ * data from the signal information which might change under ++ * our feet while we copy the bits to userland, we collect it ++ * here from the atomic section for all unknown si_codes, ++ * then pass its value to the signal_copyinfo handler. ++ */ ++ switch (sip->si_code) { ++ case SI_TIMER: ++ overrun = cobalt_timer_deliver(curr, sip->si_tid); ++ break; ++ case SI_USER: ++ case SI_MESGQ: ++ case SI_QUEUE: ++ overrun = 0; ++ break; ++ default: ++ overrun = sip->si_overrun; ++ if (overrun) ++ sip->si_overrun = 0; ++ } ++ ++ xnlock_put_irqrestore(&nklock, s); ++ ++ if (u_si == NULL) ++ goto out; /* Return signo only. */ ++ ++#ifdef CONFIG_XENO_ARCH_SYS3264 ++ if (compat) { ++ ret = sys32_put_siginfo(u_si, sip, overrun); ++ if (!ret) ++ /* Allow an extended target to receive more data. */ ++ cobalt_call_extension(signal_copyinfo_compat, ++ &curr->extref, ret, u_si, sip, ++ overrun); ++ } else ++#endif ++ { ++ ret = signal_put_siginfo(u_si, sip, overrun); ++ if (!ret) ++ /* Allow an extended target to receive more data. */ ++ cobalt_call_extension(signal_copyinfo, &curr->extref, ++ ret, u_si, sip, overrun); ++ } ++ ++out: ++ /* ++ * If we pulled the signal information from a sigpending ++ * block, release it to the free pool if applicable. ++ */ ++ if (sigp && ++ (void *)sigp >= sigpending_mem && ++ (void *)sigp < sigpending_mem + __SIGPOOL_SIZE) { ++ xnlock_get_irqsave(&nklock, s); ++ list_add_tail(&sigp->next, &sigpending_pool); ++ xnlock_put_irqrestore(&nklock, s); ++ /* no more ref. to sigp beyond this point. */ ++ } ++ ++ return ret ? -EFAULT : sig; ++fail: ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return ret; ++} ++ ++int __cobalt_sigwait(sigset_t *set) ++{ ++ return signal_wait(set, XN_INFINITE, NULL, false); ++} ++ ++COBALT_SYSCALL(sigwait, primary, ++ (const sigset_t __user *u_set, int __user *u_sig)) ++{ ++ sigset_t set; ++ int sig; ++ ++ if (cobalt_copy_from_user(&set, u_set, sizeof(set))) ++ return -EFAULT; ++ ++ sig = signal_wait(&set, XN_INFINITE, NULL, false); ++ if (sig < 0) ++ return sig; ++ ++ return cobalt_copy_to_user(u_sig, &sig, sizeof(*u_sig)); ++} ++ ++int __cobalt_sigtimedwait(sigset_t *set, ++ const struct timespec *timeout, ++ void __user *u_si, ++ bool compat) ++{ ++ xnticks_t ticks; ++ ++ if ((unsigned long)timeout->tv_nsec >= ONE_BILLION) ++ return -EINVAL; ++ ++ ticks = ts2ns(timeout); ++ if (ticks++ == 0) ++ ticks = XN_NONBLOCK; ++ ++ return signal_wait(set, ticks, u_si, compat); ++} ++ ++COBALT_SYSCALL(sigtimedwait, nonrestartable, ++ (const sigset_t __user *u_set, ++ struct siginfo __user *u_si, ++ const struct timespec __user *u_timeout)) ++{ ++ struct timespec timeout; ++ sigset_t set; ++ ++ if (cobalt_copy_from_user(&set, u_set, sizeof(set))) ++ return -EFAULT; ++ ++ if (cobalt_copy_from_user(&timeout, u_timeout, sizeof(timeout))) ++ return -EFAULT; ++ ++ return __cobalt_sigtimedwait(&set, &timeout, u_si, false); ++} ++ ++int __cobalt_sigwaitinfo(sigset_t *set, ++ void __user *u_si, ++ bool compat) ++{ ++ return signal_wait(set, XN_INFINITE, u_si, compat); ++} ++ ++COBALT_SYSCALL(sigwaitinfo, nonrestartable, ++ (const sigset_t __user *u_set, struct siginfo __user *u_si)) ++{ ++ sigset_t set; ++ ++ if (cobalt_copy_from_user(&set, u_set, sizeof(set))) ++ return -EFAULT; ++ ++ return __cobalt_sigwaitinfo(&set, u_si, false); ++} ++ ++COBALT_SYSCALL(sigpending, primary, (old_sigset_t __user *u_set)) ++{ ++ struct cobalt_thread *curr = cobalt_current_thread(); ++ ++ return cobalt_copy_to_user(u_set, &curr->sigpending, sizeof(*u_set)); ++} ++ ++int __cobalt_kill(struct cobalt_thread *thread, int sig, int group) /* nklocked, IRQs off */ ++{ ++ struct cobalt_sigpending *sigp; ++ int ret = 0; ++ ++ /* ++ * We have undocumented pseudo-signals to suspend/resume/unblock ++ * threads, force them out of primary mode or even demote them ++ * to the weak scheduling class/priority. Process them early, ++ * before anyone can notice... ++ */ ++ switch(sig) { ++ case 0: ++ /* Check for existence only. */ ++ break; ++ case SIGSUSP: ++ /* ++ * All callers shall be tagged as conforming calls, so ++ * self-directed suspension can only happen from ++ * primary mode. Yummie. ++ */ ++ xnthread_suspend(&thread->threadbase, XNSUSP, ++ XN_INFINITE, XN_RELATIVE, NULL); ++ if (&thread->threadbase == xnthread_current() && ++ xnthread_test_info(&thread->threadbase, XNBREAK)) ++ ret = -EINTR; ++ break; ++ case SIGRESM: ++ xnthread_resume(&thread->threadbase, XNSUSP); ++ goto resched; ++ case SIGRELS: ++ xnthread_unblock(&thread->threadbase); ++ goto resched; ++ case SIGKICK: ++ xnthread_kick(&thread->threadbase); ++ goto resched; ++ case SIGDEMT: ++ xnthread_demote(&thread->threadbase); ++ goto resched; ++ case 1 ... _NSIG: ++ sigp = cobalt_signal_alloc(); ++ if (sigp) { ++ sigp->si.si_signo = sig; ++ sigp->si.si_errno = 0; ++ sigp->si.si_code = SI_USER; ++ sigp->si.si_pid = task_pid_nr(current); ++ sigp->si.si_uid = get_current_uuid(); ++ if (cobalt_signal_send(thread, sigp, group) <= 0) ++ cobalt_signal_free(sigp); ++ } ++ resched: ++ xnsched_run(); ++ break; ++ default: ++ ret = -EINVAL; ++ } ++ ++ return ret; ++} ++ ++COBALT_SYSCALL(kill, conforming, (pid_t pid, int sig)) ++{ ++ struct cobalt_thread *thread; ++ int ret; ++ spl_t s; ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ thread = cobalt_thread_find(pid); ++ if (thread == NULL) ++ ret = -ESRCH; ++ else ++ ret = __cobalt_kill(thread, sig, 1); ++ ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return ret; ++} ++ ++int __cobalt_sigqueue(pid_t pid, int sig, const union sigval *value) ++{ ++ struct cobalt_sigpending *sigp; ++ struct cobalt_thread *thread; ++ int ret = 0; ++ spl_t s; ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ thread = cobalt_thread_find(pid); ++ if (thread == NULL) { ++ ret = -ESRCH; ++ goto out; ++ } ++ ++ switch(sig) { ++ case 0: ++ /* Check for existence only. */ ++ break; ++ case 1 ... _NSIG: ++ sigp = cobalt_signal_alloc(); ++ if (sigp) { ++ sigp->si.si_signo = sig; ++ sigp->si.si_errno = 0; ++ sigp->si.si_code = SI_QUEUE; ++ sigp->si.si_pid = task_pid_nr(current); ++ sigp->si.si_uid = get_current_uuid(); ++ sigp->si.si_value = *value; ++ if (cobalt_signal_send(thread, sigp, 1) <= 0) ++ cobalt_signal_free(sigp); ++ else ++ xnsched_run(); ++ } ++ break; ++ default: ++ /* Cobalt pseudo-signals are never process-directed. */ ++ ret = __cobalt_kill(thread, sig, 0); ++ } ++out: ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return ret; ++} ++EXPORT_SYMBOL_GPL(__cobalt_sigqueue); ++ ++COBALT_SYSCALL(sigqueue, conforming, ++ (pid_t pid, int sig, const union sigval __user *u_value)) ++{ ++ union sigval val; ++ int ret; ++ ++ ret = cobalt_copy_from_user(&val, u_value, sizeof(val)); ++ ++ return ret ?: __cobalt_sigqueue(pid, sig, &val); ++} ++ ++__init int cobalt_signal_init(void) ++{ ++ struct cobalt_sigpending *sigp; ++ ++ sigpending_mem = xnheap_vmalloc(__SIGPOOL_SIZE); ++ if (sigpending_mem == NULL) ++ return -ENOMEM; ++ ++ for (sigp = sigpending_mem; ++ (void *)sigp < sigpending_mem + __SIGPOOL_SIZE; sigp++) ++ list_add_tail(&sigp->next, &sigpending_pool); ++ ++ return 0; ++} +--- linux/kernel/xenomai/debug.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/debug.h 2021-04-07 16:01:25.884636085 +0800 +@@ -0,0 +1,72 @@ ++/* ++ * Copyright (C) 2010 Philippe Gerum . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++ ++#ifndef _KERNEL_COBALT_DEBUG_H ++#define _KERNEL_COBALT_DEBUG_H ++ ++#include ++ ++struct xnthread; ++ ++#ifdef CONFIG_XENO_OPT_DEBUG ++ ++int xndebug_init(void); ++ ++void xndebug_cleanup(void); ++ ++void xndebug_shadow_init(struct xnthread *thread); ++ ++extern struct xnvfile_directory cobalt_debug_vfroot; ++ ++#else /* !XENO_OPT_DEBUG */ ++ ++static inline int xndebug_init(void) ++{ ++ return 0; ++} ++ ++static inline void xndebug_cleanup(void) ++{ ++} ++ ++static inline void xndebug_shadow_init(struct xnthread *thread) ++{ ++} ++ ++#endif /* !XENO_OPT_DEBUG */ ++ ++#ifdef CONFIG_XENO_OPT_DEBUG_TRACE_RELAX ++void xndebug_notify_relax(struct xnthread *thread, ++ int reason); ++void xndebug_trace_relax(int nr, unsigned long *backtrace, ++ int reason); ++#else ++static inline ++void xndebug_notify_relax(struct xnthread *thread, int reason) ++{ ++} ++static inline ++void xndebug_trace_relax(int nr, unsigned long *backtrace, ++ int reason) ++{ ++ /* Simply ignore. */ ++} ++#endif ++ ++#endif /* !_KERNEL_COBALT_DEBUG_H */ +--- linux/kernel/xenomai/intr.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/intr.c 2021-04-07 16:01:25.879636092 +0800 +@@ -0,0 +1,1204 @@ ++/* ++ * Copyright (C) 2001,2002,2003 Philippe Gerum . ++ * Copyright (C) 2005,2006 Dmitry Adamushko . ++ * Copyright (C) 2007 Jan Kiszka . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++*/ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/** ++ * @ingroup cobalt_core ++ * @defgroup cobalt_core_irq Interrupt management ++ * @{ ++ */ ++#define XNINTR_MAX_UNHANDLED 1000 ++ ++static DEFINE_MUTEX(intrlock); ++ ++#ifdef CONFIG_XENO_OPT_STATS_IRQS ++struct xnintr nktimer; /* Only for statistics */ ++static int xnintr_count = 1; /* Number of attached xnintr objects + nktimer */ ++static int xnintr_list_rev; /* Modification counter of xnintr list */ ++ ++/* Both functions update xnintr_list_rev at the very end. ++ * This guarantees that module.c::stat_seq_open() won't get ++ * an up-to-date xnintr_list_rev and old xnintr_count. */ ++ ++static inline void stat_counter_inc(void) ++{ ++ xnintr_count++; ++ smp_mb(); ++ xnintr_list_rev++; ++} ++ ++static inline void stat_counter_dec(void) ++{ ++ xnintr_count--; ++ smp_mb(); ++ xnintr_list_rev++; ++} ++ ++static inline void sync_stat_references(struct xnintr *intr) ++{ ++ struct xnirqstat *statp; ++ struct xnsched *sched; ++ int cpu; ++ ++ for_each_realtime_cpu(cpu) { ++ sched = xnsched_struct(cpu); ++ statp = per_cpu_ptr(intr->stats, cpu); ++ /* Synchronize on all dangling references to go away. */ ++ while (sched->current_account == &statp->account) ++ cpu_relax(); ++ } ++} ++ ++static void clear_irqstats(struct xnintr *intr) ++{ ++ struct xnirqstat *p; ++ int cpu; ++ ++ for_each_realtime_cpu(cpu) { ++ p = per_cpu_ptr(intr->stats, cpu); ++ memset(p, 0, sizeof(*p)); ++ } ++} ++ ++static inline void alloc_irqstats(struct xnintr *intr) ++{ ++ intr->stats = alloc_percpu(struct xnirqstat); ++ clear_irqstats(intr); ++} ++ ++static inline void free_irqstats(struct xnintr *intr) ++{ ++ free_percpu(intr->stats); ++} ++ ++static inline void query_irqstats(struct xnintr *intr, int cpu, ++ struct xnintr_iterator *iterator) ++{ ++ struct xnirqstat *statp; ++ xnticks_t last_switch; ++ ++ statp = per_cpu_ptr(intr->stats, cpu); ++ iterator->hits = xnstat_counter_get(&statp->hits); ++ last_switch = xnsched_struct(cpu)->last_account_switch; ++ iterator->exectime_period = statp->account.total; ++ iterator->account_period = last_switch - statp->account.start; ++ statp->sum.total += iterator->exectime_period; ++ iterator->exectime_total = statp->sum.total; ++ statp->account.total = 0; ++ statp->account.start = last_switch; ++} ++ ++static void inc_irqstats(struct xnintr *intr, struct xnsched *sched, xnticks_t start) ++{ ++ struct xnirqstat *statp; ++ ++ statp = raw_cpu_ptr(intr->stats); ++ xnstat_counter_inc(&statp->hits); ++ xnstat_exectime_lazy_switch(sched, &statp->account, start); ++} ++ ++static inline void switch_to_irqstats(struct xnintr *intr, ++ struct xnsched *sched) ++{ ++ struct xnirqstat *statp; ++ ++ statp = raw_cpu_ptr(intr->stats); ++ xnstat_exectime_switch(sched, &statp->account); ++} ++ ++static inline void switch_from_irqstats(struct xnsched *sched, ++ xnstat_exectime_t *prev) ++{ ++ xnstat_exectime_switch(sched, prev); ++} ++ ++static inline xnstat_exectime_t *switch_core_irqstats(struct xnsched *sched) ++{ ++ struct xnirqstat *statp; ++ xnstat_exectime_t *prev; ++ ++ statp = xnstat_percpu_data; ++ prev = xnstat_exectime_switch(sched, &statp->account); ++ xnstat_counter_inc(&statp->hits); ++ ++ return prev; ++} ++ ++#else /* !CONFIG_XENO_OPT_STATS_IRQS */ ++ ++static inline void stat_counter_inc(void) {} ++ ++static inline void stat_counter_dec(void) {} ++ ++static inline void sync_stat_references(struct xnintr *intr) {} ++ ++static inline void alloc_irqstats(struct xnintr *intr) {} ++ ++static inline void free_irqstats(struct xnintr *intr) {} ++ ++static inline void clear_irqstats(struct xnintr *intr) {} ++ ++static inline void query_irqstats(struct xnintr *intr, int cpu, ++ struct xnintr_iterator *iterator) {} ++ ++static inline void inc_irqstats(struct xnintr *intr, struct xnsched *sched, xnticks_t start) {} ++ ++static inline void switch_to_irqstats(struct xnintr *intr, ++ struct xnsched *sched) {} ++ ++static inline void switch_from_irqstats(struct xnsched *sched, ++ xnstat_exectime_t *prev) {} ++ ++static inline xnstat_exectime_t *switch_core_irqstats(struct xnsched *sched) ++{ ++ return NULL; ++} ++ ++#endif /* !CONFIG_XENO_OPT_STATS_IRQS */ ++ ++static void xnintr_irq_handler(unsigned int irq, void *cookie); ++ ++void xnintr_host_tick(struct xnsched *sched) /* Interrupts off. */ ++{ ++ sched->lflags &= ~XNHTICK; ++#ifdef XNARCH_HOST_TICK_IRQ ++ ipipe_post_irq_root(XNARCH_HOST_TICK_IRQ); ++#endif ++} ++ ++/* ++ * Low-level core clock irq handler. This one forwards ticks from the ++ * Xenomai platform timer to nkclock exclusively. ++ */ ++void xnintr_core_clock_handler(void) ++{ ++ struct xnsched *sched = xnsched_current(); ++ int cpu __maybe_unused = xnsched_cpu(sched); ++ xnstat_exectime_t *prev; ++ ++ if (!xnsched_supported_cpu(cpu)) { ++#ifdef XNARCH_HOST_TICK_IRQ ++ ipipe_post_irq_root(XNARCH_HOST_TICK_IRQ); ++#endif ++ return; ++ } ++ ++ prev = switch_core_irqstats(sched); ++ ++ trace_cobalt_clock_entry(per_cpu(ipipe_percpu.hrtimer_irq, cpu)); ++ ++ ++sched->inesting; ++ sched->lflags |= XNINIRQ; ++ ++ xnlock_get(&nklock); ++ xnclock_tick(&nkclock); ++ xnlock_put(&nklock); ++ ++ trace_cobalt_clock_exit(per_cpu(ipipe_percpu.hrtimer_irq, cpu)); ++ switch_from_irqstats(sched, prev); ++ ++ if (--sched->inesting == 0) { ++ sched->lflags &= ~XNINIRQ; ++ xnsched_run(); ++ sched = xnsched_current(); ++ } ++ /* ++ * If the core clock interrupt preempted a real-time thread, ++ * any transition to the root thread has already triggered a ++ * host tick propagation from xnsched_run(), so at this point, ++ * we only need to propagate the host tick in case the ++ * interrupt preempted the root thread. ++ */ ++ if ((sched->lflags & XNHTICK) && ++ xnthread_test_state(sched->curr, XNROOT)) ++ xnintr_host_tick(sched); ++} ++ ++struct irqdisable_work { ++ struct ipipe_work_header work; /* Must be first. */ ++ int irq; ++}; ++ ++static void lostage_irqdisable_line(struct ipipe_work_header *work) ++{ ++ struct irqdisable_work *rq; ++ ++ rq = container_of(work, struct irqdisable_work, work); ++ ipipe_disable_irq(rq->irq); ++} ++ ++static void disable_irq_line(int irq) ++{ ++ struct irqdisable_work diswork = { ++ .work = { ++ .size = sizeof(diswork), ++ .handler = lostage_irqdisable_line, ++ }, ++ .irq = irq, ++ }; ++ ++ ipipe_post_work_root(&diswork, work); ++} ++ ++/* Optional support for shared interrupts. */ ++ ++#ifdef CONFIG_XENO_OPT_SHIRQ ++ ++struct xnintr_vector { ++ DECLARE_XNLOCK(lock); ++ struct xnintr *handlers; ++ int unhandled; ++} ____cacheline_aligned_in_smp; ++ ++static struct xnintr_vector vectors[IPIPE_NR_IRQS]; ++ ++static inline struct xnintr *xnintr_vec_first(unsigned int irq) ++{ ++ return vectors[irq].handlers; ++} ++ ++static inline struct xnintr *xnintr_vec_next(struct xnintr *prev) ++{ ++ return prev->next; ++} ++ ++static void disable_shared_irq_line(struct xnintr_vector *vec) ++{ ++ int irq = vec - vectors; ++ struct xnintr *intr; ++ ++ xnlock_get(&vec->lock); ++ intr = vec->handlers; ++ while (intr) { ++ set_bit(XN_IRQSTAT_DISABLED, &intr->status); ++ intr = intr->next; ++ } ++ xnlock_put(&vec->lock); ++ disable_irq_line(irq); ++} ++ ++/* ++ * Low-level interrupt handler dispatching the user-defined ISRs for ++ * shared interrupts -- Called with interrupts off. ++ */ ++static void xnintr_vec_handler(unsigned int irq, void *cookie) ++{ ++ struct xnsched *sched = xnsched_current(); ++ struct xnintr_vector *vec = vectors + irq; ++ xnstat_exectime_t *prev; ++ struct xnintr *intr; ++ xnticks_t start; ++ int s = 0, ret; ++ ++ prev = xnstat_exectime_get_current(sched); ++ start = xnstat_exectime_now(); ++ trace_cobalt_irq_entry(irq); ++ ++ ++sched->inesting; ++ sched->lflags |= XNINIRQ; ++ ++ xnlock_get(&vec->lock); ++ intr = vec->handlers; ++ if (unlikely(test_bit(XN_IRQSTAT_DISABLED, &intr->status))) { ++ /* irqdisable_work is on its way, ignore. */ ++ xnlock_put(&vec->lock); ++ goto out; ++ } ++ ++ while (intr) { ++ /* ++ * NOTE: We assume that no CPU migration can occur ++ * while running the interrupt service routine. ++ */ ++ ret = intr->isr(intr); ++ XENO_WARN_ON_ONCE(USER, (ret & XN_IRQ_STATMASK) == 0); ++ s |= ret; ++ if (ret & XN_IRQ_HANDLED) { ++ inc_irqstats(intr, sched, start); ++ start = xnstat_exectime_now(); ++ } ++ intr = intr->next; ++ } ++ ++ xnlock_put(&vec->lock); ++ ++ if (unlikely(!(s & XN_IRQ_HANDLED))) { ++ if (++vec->unhandled == XNINTR_MAX_UNHANDLED) { ++ printk(XENO_ERR "%s: IRQ%d not handled. Disabling IRQ line\n", ++ __FUNCTION__, irq); ++ s |= XN_IRQ_DISABLE; ++ } ++ } else ++ vec->unhandled = 0; ++ ++ if (s & XN_IRQ_PROPAGATE) ++ ipipe_post_irq_root(irq); ++ else if (s & XN_IRQ_DISABLE) ++ disable_shared_irq_line(vec); ++ else ++ ipipe_end_irq(irq); ++out: ++ switch_from_irqstats(sched, prev); ++ ++ trace_cobalt_irq_exit(irq); ++ ++ if (--sched->inesting == 0) { ++ sched->lflags &= ~XNINIRQ; ++ xnsched_run(); ++ } ++} ++ ++/* ++ * Low-level interrupt handler dispatching the user-defined ISRs for ++ * shared edge-triggered interrupts -- Called with interrupts off. ++ */ ++static void xnintr_edge_vec_handler(unsigned int irq, void *cookie) ++{ ++ const int MAX_EDGEIRQ_COUNTER = 128; ++ struct xnsched *sched = xnsched_current(); ++ struct xnintr_vector *vec = vectors + irq; ++ struct xnintr *intr, *end = NULL; ++ int s = 0, counter = 0, ret; ++ xnstat_exectime_t *prev; ++ xnticks_t start; ++ ++ prev = xnstat_exectime_get_current(sched); ++ start = xnstat_exectime_now(); ++ trace_cobalt_irq_entry(irq); ++ ++ ++sched->inesting; ++ sched->lflags |= XNINIRQ; ++ ++ xnlock_get(&vec->lock); ++ intr = vec->handlers; ++ if (unlikely(test_bit(XN_IRQSTAT_DISABLED, &intr->status))) { ++ /* irqdisable_work is on its way, ignore. */ ++ xnlock_put(&vec->lock); ++ goto out; ++ } ++ ++ while (intr != end) { ++ switch_to_irqstats(intr, sched); ++ /* ++ * NOTE: We assume that no CPU migration will occur ++ * while running the interrupt service routine. ++ */ ++ ret = intr->isr(intr); ++ XENO_WARN_ON_ONCE(USER, (ret & XN_IRQ_STATMASK) == 0); ++ s |= ret; ++ ++ if (ret & XN_IRQ_HANDLED) { ++ end = NULL; ++ inc_irqstats(intr, sched, start); ++ start = xnstat_exectime_now(); ++ } else if (end == NULL) ++ end = intr; ++ ++ if (counter++ > MAX_EDGEIRQ_COUNTER) ++ break; ++ ++ intr = intr->next; ++ if (intr == NULL) ++ intr = vec->handlers; ++ } ++ ++ xnlock_put(&vec->lock); ++ ++ if (counter > MAX_EDGEIRQ_COUNTER) ++ printk(XENO_ERR "%s: failed to get the IRQ%d line free\n", ++ __FUNCTION__, irq); ++ ++ if (unlikely(!(s & XN_IRQ_HANDLED))) { ++ if (++vec->unhandled == XNINTR_MAX_UNHANDLED) { ++ printk(XENO_ERR "%s: IRQ%d not handled. Disabling IRQ line\n", ++ __FUNCTION__, irq); ++ s |= XN_IRQ_DISABLE; ++ } ++ } else ++ vec->unhandled = 0; ++ ++ if (s & XN_IRQ_PROPAGATE) ++ ipipe_post_irq_root(irq); ++ else if (s & XN_IRQ_DISABLE) ++ disable_shared_irq_line(vec); ++ else ++ ipipe_end_irq(irq); ++out: ++ switch_from_irqstats(sched, prev); ++ ++ trace_cobalt_irq_exit(irq); ++ ++ if (--sched->inesting == 0) { ++ sched->lflags &= ~XNINIRQ; ++ xnsched_run(); ++ } ++} ++ ++static inline bool cobalt_owns_irq(int irq) ++{ ++ ipipe_irq_handler_t h; ++ ++ h = __ipipe_irq_handler(&xnsched_realtime_domain, irq); ++ ++ return h == xnintr_vec_handler || ++ h == xnintr_edge_vec_handler || ++ h == xnintr_irq_handler; ++} ++ ++static inline int xnintr_irq_attach(struct xnintr *intr) ++{ ++ struct xnintr_vector *vec = vectors + intr->irq; ++ struct xnintr *prev, **p = &vec->handlers; ++ int ret; ++ ++ prev = *p; ++ if (prev) { ++ /* Check on whether the shared mode is allowed. */ ++ if ((prev->flags & intr->flags & XN_IRQTYPE_SHARED) == 0 || ++ (prev->iack != intr->iack) ++ || ((prev->flags & XN_IRQTYPE_EDGE) != ++ (intr->flags & XN_IRQTYPE_EDGE))) ++ return -EBUSY; ++ ++ /* ++ * Get a position at the end of the list to insert the ++ * new element. ++ */ ++ while (prev) { ++ p = &prev->next; ++ prev = *p; ++ } ++ } else { ++ /* Initialize the corresponding interrupt channel */ ++ void (*handler) (unsigned, void *) = xnintr_irq_handler; ++ ++ if (intr->flags & XN_IRQTYPE_SHARED) { ++ if (intr->flags & XN_IRQTYPE_EDGE) ++ handler = xnintr_edge_vec_handler; ++ else ++ handler = xnintr_vec_handler; ++ ++ } ++ vec->unhandled = 0; ++ ++ ret = ipipe_request_irq(&xnsched_realtime_domain, ++ intr->irq, handler, intr, ++ (ipipe_irq_ackfn_t)intr->iack); ++ if (ret) ++ return ret; ++ } ++ ++ intr->next = NULL; ++ /* ++ * Add the given interrupt object. No need to synchronise with ++ * the IRQ handler, we are only extending the chain. ++ */ ++ *p = intr; ++ ++ return 0; ++} ++ ++static inline void xnintr_irq_detach(struct xnintr *intr) ++{ ++ struct xnintr_vector *vec = vectors + intr->irq; ++ struct xnintr *e, **p = &vec->handlers; ++ ++ while ((e = *p) != NULL) { ++ if (e == intr) { ++ /* Remove the given interrupt object from the list. */ ++ xnlock_get(&vec->lock); ++ *p = e->next; ++ xnlock_put(&vec->lock); ++ ++ sync_stat_references(intr); ++ ++ /* Release the IRQ line if this was the last user */ ++ if (vec->handlers == NULL) ++ ipipe_free_irq(&xnsched_realtime_domain, intr->irq); ++ ++ return; ++ } ++ p = &e->next; ++ } ++ ++ printk(XENO_ERR "attempted to detach an unregistered interrupt descriptor\n"); ++} ++ ++#else /* !CONFIG_XENO_OPT_SHIRQ */ ++ ++struct xnintr_vector { ++#if defined(CONFIG_SMP) || defined(CONFIG_XENO_OPT_DEBUG_LOCKING) ++ DECLARE_XNLOCK(lock); ++#endif /* CONFIG_SMP || XENO_DEBUG(LOCKING) */ ++} ____cacheline_aligned_in_smp; ++ ++static struct xnintr_vector vectors[IPIPE_NR_IRQS]; ++ ++static inline bool cobalt_owns_irq(int irq) ++{ ++ ipipe_irq_handler_t h; ++ ++ h = __ipipe_irq_handler(&xnsched_realtime_domain, irq); ++ ++ return h == xnintr_irq_handler; ++} ++ ++static inline struct xnintr *xnintr_vec_first(unsigned int irq) ++{ ++ return cobalt_owns_irq(irq) ? ++ __ipipe_irq_cookie(&xnsched_realtime_domain, irq) : NULL; ++} ++ ++static inline struct xnintr *xnintr_vec_next(struct xnintr *prev) ++{ ++ return NULL; ++} ++ ++static inline int xnintr_irq_attach(struct xnintr *intr) ++{ ++ return ipipe_request_irq(&xnsched_realtime_domain, ++ intr->irq, xnintr_irq_handler, intr, ++ (ipipe_irq_ackfn_t)intr->iack); ++} ++ ++static inline void xnintr_irq_detach(struct xnintr *intr) ++{ ++ int irq = intr->irq; ++ ++ xnlock_get(&vectors[irq].lock); ++ ipipe_free_irq(&xnsched_realtime_domain, irq); ++ xnlock_put(&vectors[irq].lock); ++ ++ sync_stat_references(intr); ++} ++ ++#endif /* !CONFIG_XENO_OPT_SHIRQ */ ++ ++/* ++ * Low-level interrupt handler dispatching non-shared ISRs -- Called ++ * with interrupts off. ++ */ ++static void xnintr_irq_handler(unsigned int irq, void *cookie) ++{ ++ struct xnintr_vector __maybe_unused *vec = vectors + irq; ++ struct xnsched *sched = xnsched_current(); ++ xnstat_exectime_t *prev; ++ struct xnintr *intr; ++ xnticks_t start; ++ int s = 0; ++ ++ prev = xnstat_exectime_get_current(sched); ++ start = xnstat_exectime_now(); ++ trace_cobalt_irq_entry(irq); ++ ++ ++sched->inesting; ++ sched->lflags |= XNINIRQ; ++ ++ xnlock_get(&vec->lock); ++ ++#ifdef CONFIG_SMP ++ /* ++ * In SMP case, we have to reload the cookie under the per-IRQ ++ * lock to avoid racing with xnintr_detach. However, we ++ * assume that no CPU migration will occur while running the ++ * interrupt service routine, so the scheduler pointer will ++ * remain valid throughout this function. ++ */ ++ intr = __ipipe_irq_cookie(&xnsched_realtime_domain, irq); ++ if (unlikely(intr == NULL)) ++ goto done; ++#else ++ intr = cookie; ++#endif ++ if (unlikely(test_bit(XN_IRQSTAT_DISABLED, &intr->status))) { ++ /* irqdisable_work is on its way, ignore. */ ++ xnlock_put(&vec->lock); ++ goto out; ++ } ++ ++ s = intr->isr(intr); ++ XENO_WARN_ON_ONCE(USER, (s & XN_IRQ_STATMASK) == 0); ++ if (unlikely(!(s & XN_IRQ_HANDLED))) { ++ if (++intr->unhandled == XNINTR_MAX_UNHANDLED) { ++ printk(XENO_ERR "%s: IRQ%d not handled. Disabling IRQ line\n", ++ __FUNCTION__, irq); ++ s |= XN_IRQ_DISABLE; ++ } ++ } else { ++ inc_irqstats(intr, sched, start); ++ intr->unhandled = 0; ++ } ++ ++ if (s & XN_IRQ_DISABLE) ++ set_bit(XN_IRQSTAT_DISABLED, &intr->status); ++#ifdef CONFIG_SMP ++done: ++#endif ++ xnlock_put(&vec->lock); ++ ++ if (s & XN_IRQ_DISABLE) ++ disable_irq_line(irq); ++ else if (s & XN_IRQ_PROPAGATE) ++ ipipe_post_irq_root(irq); ++ else ++ ipipe_end_irq(irq); ++out: ++ switch_from_irqstats(sched, prev); ++ ++ trace_cobalt_irq_exit(irq); ++ ++ if (--sched->inesting == 0) { ++ sched->lflags &= ~XNINIRQ; ++ xnsched_run(); ++ } ++} ++ ++int __init xnintr_mount(void) ++{ ++ int i; ++ for (i = 0; i < IPIPE_NR_IRQS; ++i) ++ xnlock_init(&vectors[i].lock); ++ return 0; ++} ++ ++/** ++ * @fn int xnintr_init(struct xnintr *intr,const char *name,unsigned int irq,xnisr_t isr,xniack_t iack,int flags) ++ * @brief Initialize an interrupt descriptor. ++ * ++ * When an interrupt occurs on the given @a irq line, the interrupt ++ * service routine @a isr is fired in order to deal with the hardware ++ * event. The interrupt handler may call any non-blocking service from ++ * the Cobalt core. ++ * ++ * Upon receipt of an IRQ, the interrupt handler @a isr is immediately ++ * called on behalf of the interrupted stack context, the rescheduling ++ * procedure is locked, and the interrupt line is masked in the system ++ * interrupt controller chip. Upon return, the status of the ++ * interrupt handler is checked for the following bits: ++ * ++ * - XN_IRQ_HANDLED indicates that the interrupt request was ++ * successfully handled. ++ * ++ * - XN_IRQ_NONE indicates the opposite to XN_IRQ_HANDLED, meaning ++ * that no interrupt source could be identified for the ongoing ++ * request by the handler. ++ * ++ * In addition, one of the following bits may be present in the ++ * status: ++ * ++ * - XN_IRQ_DISABLE tells the Cobalt core to disable the interrupt ++ * line before returning from the interrupt context. ++ * ++ * - XN_IRQ_PROPAGATE propagates the IRQ event down the interrupt ++ * pipeline to Linux. Using this flag is strongly discouraged, unless ++ * you fully understand the implications of such propagation. ++ * ++ * @warning The handler should not use these bits if it shares the ++ * interrupt line with other handlers in the real-time domain. When ++ * any of these bits is detected, the interrupt line is left masked. ++ * ++ * A count of interrupt receipts is tracked into the interrupt ++ * descriptor, and reset to zero each time such descriptor is ++ * attached. Since this count could wrap around, it should be used as ++ * an indication of interrupt activity only. ++ * ++ * @param intr The address of a descriptor the Cobalt core will use to ++ * store the interrupt-specific data. ++ * ++ * @param name An ASCII string standing for the symbolic name of the ++ * interrupt or NULL. ++ * ++ * @param irq The IRQ line number associated with the interrupt ++ * descriptor. This value is architecture-dependent. An interrupt ++ * descriptor must be attached to the system by a call to ++ * xnintr_attach() before @a irq events can be received. ++ * ++ * @param isr The address of an interrupt handler, which is passed the ++ * address of the interrupt descriptor receiving the IRQ. ++ * ++ * @param iack The address of an optional interrupt acknowledge ++ * routine, aimed at replacing the default one. Only very specific ++ * situations actually require to override the default setting for ++ * this parameter, like having to acknowledge non-standard PIC ++ * hardware. @a iack should return a non-zero value to indicate that ++ * the interrupt has been properly acknowledged. If @a iack is NULL, ++ * the default routine will be used instead. ++ * ++ * @param flags A set of creation flags affecting the operation. The ++ * valid flags are: ++ * ++ * - XN_IRQTYPE_SHARED enables IRQ-sharing with other interrupt ++ * objects. ++ * ++ * - XN_IRQTYPE_EDGE is an additional flag need to be set together ++ * with XN_IRQTYPE_SHARED to enable IRQ-sharing of edge-triggered ++ * interrupts. ++ * ++ * @return 0 is returned on success. Otherwise, -EINVAL is returned if ++ * @a irq is not a valid interrupt number. ++ * ++ * @coretags{secondary-only} ++ */ ++int xnintr_init(struct xnintr *intr, const char *name, ++ unsigned int irq, xnisr_t isr, xniack_t iack, ++ int flags) ++{ ++ secondary_mode_only(); ++ ++ if (irq >= IPIPE_NR_IRQS) ++ return -EINVAL; ++ ++ intr->irq = irq; ++ intr->isr = isr; ++ intr->iack = iack; ++ intr->cookie = NULL; ++ intr->name = name ? : ""; ++ intr->flags = flags; ++ intr->status = _XN_IRQSTAT_DISABLED; ++ intr->unhandled = 0; ++ raw_spin_lock_init(&intr->lock); ++#ifdef CONFIG_XENO_OPT_SHIRQ ++ intr->next = NULL; ++#endif ++ alloc_irqstats(intr); ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(xnintr_init); ++ ++/** ++ * @fn void xnintr_destroy(struct xnintr *intr) ++ * @brief Destroy an interrupt descriptor. ++ * ++ * Destroys an interrupt descriptor previously initialized by ++ * xnintr_init(). The descriptor is automatically detached by a call ++ * to xnintr_detach(). No more IRQs will be received through this ++ * descriptor after this service has returned. ++ * ++ * @param intr The address of the interrupt descriptor to destroy. ++ * ++ * @coretags{secondary-only} ++ */ ++void xnintr_destroy(struct xnintr *intr) ++{ ++ secondary_mode_only(); ++ xnintr_detach(intr); ++ free_irqstats(intr); ++} ++EXPORT_SYMBOL_GPL(xnintr_destroy); ++ ++/** ++ * @fn int xnintr_attach(struct xnintr *intr, void *cookie) ++ * @brief Attach an interrupt descriptor. ++ * ++ * Attach an interrupt descriptor previously initialized by ++ * xnintr_init(). This operation registers the descriptor at the ++ * interrupt pipeline, but does not enable the interrupt line yet. A ++ * call to xnintr_enable() is required to start receiving IRQs from ++ * the interrupt line associated to the descriptor. ++ * ++ * @param intr The address of the interrupt descriptor to attach. ++ * ++ * @param cookie A user-defined opaque value which is stored into the ++ * descriptor for further retrieval by the interrupt handler. ++ * ++ * @return 0 is returned on success. Otherwise: ++ * ++ * - -EINVAL is returned if an error occurred while attaching the ++ * descriptor. ++ * ++ * - -EBUSY is returned if the descriptor was already attached. ++ * ++ * @note The caller must not hold nklock when invoking this service, ++ * this would cause deadlocks. ++ * ++ * @coretags{secondary-only} ++ * ++ * @note Attaching an interrupt descriptor resets the tracked number ++ * of IRQ receipts to zero. ++ */ ++int xnintr_attach(struct xnintr *intr, void *cookie) ++{ ++ int ret; ++ ++ secondary_mode_only(); ++ trace_cobalt_irq_attach(intr->irq); ++ ++ intr->cookie = cookie; ++ clear_irqstats(intr); ++ ++#ifdef CONFIG_SMP ++ ipipe_set_irq_affinity(intr->irq, xnsched_realtime_cpus); ++#endif /* CONFIG_SMP */ ++ ++ raw_spin_lock(&intr->lock); ++ ++ if (test_and_set_bit(XN_IRQSTAT_ATTACHED, &intr->status)) { ++ ret = -EBUSY; ++ goto out; ++ } ++ ++ ret = xnintr_irq_attach(intr); ++ if (ret) { ++ clear_bit(XN_IRQSTAT_ATTACHED, &intr->status); ++ goto out; ++ } ++ ++ stat_counter_inc(); ++out: ++ raw_spin_unlock(&intr->lock); ++ ++ return ret; ++} ++EXPORT_SYMBOL_GPL(xnintr_attach); ++ ++/** ++ * @fn int xnintr_detach(struct xnintr *intr) ++ * @brief Detach an interrupt descriptor. ++ * ++ * This call unregisters an interrupt descriptor previously attached ++ * by xnintr_attach() from the interrupt pipeline. Once detached, the ++ * associated interrupt line is disabled, but the descriptor remains ++ * valid. The descriptor can be attached anew by a call to ++ * xnintr_attach(). ++ * ++ * @param intr The address of the interrupt descriptor to detach. ++ * ++ * @note The caller must not hold nklock when invoking this ++ * service, this would cause deadlocks. ++ * ++ * @coretags{secondary-only} ++ */ ++void xnintr_detach(struct xnintr *intr) ++{ ++ secondary_mode_only(); ++ trace_cobalt_irq_detach(intr->irq); ++ ++ raw_spin_lock(&intr->lock); ++ ++ if (test_and_clear_bit(XN_IRQSTAT_ATTACHED, &intr->status)) { ++ xnintr_irq_detach(intr); ++ stat_counter_dec(); ++ } ++ ++ raw_spin_unlock(&intr->lock); ++} ++EXPORT_SYMBOL_GPL(xnintr_detach); ++ ++/** ++ * @fn void xnintr_enable(struct xnintr *intr) ++ * @brief Enable an interrupt line. ++ * ++ * Enables the interrupt line associated with an interrupt descriptor. ++ * ++ * @param intr The address of the interrupt descriptor. ++ * ++ * @coretags{secondary-only} ++ */ ++void xnintr_enable(struct xnintr *intr) ++{ ++ unsigned long flags; ++ ++ secondary_mode_only(); ++ trace_cobalt_irq_enable(intr->irq); ++ ++ raw_spin_lock_irqsave(&intr->lock, flags); ++ ++ /* ++ * If disabled on entry, there is no way we could race with ++ * disable_irq_line(). ++ */ ++ if (test_and_clear_bit(XN_IRQSTAT_DISABLED, &intr->status)) ++ ipipe_enable_irq(intr->irq); ++ ++ raw_spin_unlock_irqrestore(&intr->lock, flags); ++} ++EXPORT_SYMBOL_GPL(xnintr_enable); ++ ++/** ++ * @fn void xnintr_disable(struct xnintr *intr) ++ * @brief Disable an interrupt line. ++ * ++ * Disables the interrupt line associated with an interrupt ++ * descriptor. ++ * ++ * @param intr The address of the interrupt descriptor. ++ * ++ * @coretags{secondary-only} ++ */ ++void xnintr_disable(struct xnintr *intr) ++{ ++ unsigned long flags; ++ ++ secondary_mode_only(); ++ trace_cobalt_irq_disable(intr->irq); ++ ++ /* We only need a virtual masking. */ ++ raw_spin_lock_irqsave(&intr->lock, flags); ++ ++ /* ++ * Racing with disable_irq_line() is innocuous, the pipeline ++ * would serialize calls to ipipe_disable_irq() across CPUs, ++ * and the descriptor status would still properly match the ++ * line status in the end. ++ */ ++ if (!test_and_set_bit(XN_IRQSTAT_DISABLED, &intr->status)) ++ ipipe_disable_irq(intr->irq); ++ ++ raw_spin_unlock_irqrestore(&intr->lock, flags); ++} ++EXPORT_SYMBOL_GPL(xnintr_disable); ++ ++/** ++ * @fn void xnintr_affinity(struct xnintr *intr, cpumask_t cpumask) ++ * @brief Set processor affinity of interrupt. ++ * ++ * Restricts the IRQ line associated with the interrupt descriptor @a ++ * intr to be received only on processors which bits are set in @a ++ * cpumask. ++ * ++ * @param intr The address of the interrupt descriptor. ++ * ++ * @param cpumask The new processor affinity. ++ * ++ * @note Depending on architectures, setting more than one bit in @a ++ * cpumask could be meaningless. ++ * ++ * @coretags{secondary-only} ++ */ ++void xnintr_affinity(struct xnintr *intr, cpumask_t cpumask) ++{ ++ secondary_mode_only(); ++#ifdef CONFIG_SMP ++ ipipe_set_irq_affinity(intr->irq, cpumask); ++#endif ++} ++EXPORT_SYMBOL_GPL(xnintr_affinity); ++ ++static inline int xnintr_is_timer_irq(int irq) ++{ ++ int cpu; ++ ++ for_each_realtime_cpu(cpu) ++ if (irq == per_cpu(ipipe_percpu.hrtimer_irq, cpu)) ++ return 1; ++ ++ return 0; ++} ++ ++#ifdef CONFIG_XENO_OPT_STATS_IRQS ++ ++int xnintr_get_query_lock(void) ++{ ++ return mutex_lock_interruptible(&intrlock) ? -ERESTARTSYS : 0; ++} ++ ++void xnintr_put_query_lock(void) ++{ ++ mutex_unlock(&intrlock); ++} ++ ++int xnintr_query_init(struct xnintr_iterator *iterator) ++{ ++ iterator->prev = NULL; ++ ++ /* The order is important here: first xnintr_list_rev then ++ * xnintr_count. On the other hand, xnintr_attach/detach() ++ * update xnintr_count first and then xnintr_list_rev. This ++ * should guarantee that we can't get an up-to-date ++ * xnintr_list_rev and old xnintr_count here. The other way ++ * around is not a problem as xnintr_query() will notice this ++ * fact later. Should xnintr_list_rev change later, ++ * xnintr_query() will trigger an appropriate error below. ++ */ ++ iterator->list_rev = xnintr_list_rev; ++ smp_mb(); ++ ++ return xnintr_count; ++} ++ ++int xnintr_query_next(int irq, struct xnintr_iterator *iterator, ++ char *name_buf) ++{ ++ int cpu, nr_cpus = num_present_cpus(); ++ struct xnintr *intr; ++ ++ if (iterator->list_rev != xnintr_list_rev) ++ return -EAGAIN; ++ ++ intr = iterator->prev; ++ if (intr == NULL) { ++ if (xnintr_is_timer_irq(irq)) ++ intr = &nktimer; ++ else ++ intr = xnintr_vec_first(irq); ++ if (intr == NULL) ++ return -ENODEV; ++ iterator->prev = intr; ++ iterator->cpu = -1; ++ } ++ ++ for (;;) { ++ for (cpu = iterator->cpu + 1; cpu < nr_cpus; ++cpu) { ++ if (cpu_online(cpu)) { ++ ksformat(name_buf, XNOBJECT_NAME_LEN, "IRQ%d: %s", ++ irq, intr->name); ++ query_irqstats(intr, cpu, iterator); ++ iterator->cpu = cpu; ++ return 0; ++ } ++ } ++ ++ iterator->prev = xnintr_vec_next(intr); ++ if (iterator->prev == NULL) ++ return -ENODEV; ++ ++ iterator->cpu = -1; ++ } ++} ++ ++#endif /* CONFIG_XENO_OPT_STATS_IRQS */ ++ ++#ifdef CONFIG_XENO_OPT_VFILE ++ ++#include ++ ++static inline int format_irq_proc(unsigned int irq, ++ struct xnvfile_regular_iterator *it) ++{ ++ struct xnintr *intr; ++ struct irq_desc *d; ++ int cpu; ++ ++ for_each_realtime_cpu(cpu) ++ if (xnintr_is_timer_irq(irq)) { ++ xnvfile_printf(it, " [timer/%d]", cpu); ++ return 0; ++ } ++ ++#ifdef CONFIG_SMP ++ /* ++ * IPI numbers on ARM are not compile time constants, so do ++ * not use switch/case here. ++ */ ++ if (irq == IPIPE_HRTIMER_IPI) { ++ xnvfile_puts(it, " [timer-ipi]"); ++ return 0; ++ } ++ if (irq == IPIPE_RESCHEDULE_IPI) { ++ xnvfile_puts(it, " [reschedule]"); ++ return 0; ++ } ++ if (irq == IPIPE_CRITICAL_IPI) { ++ xnvfile_puts(it, " [sync]"); ++ return 0; ++ } ++#endif /* CONFIG_SMP */ ++ if (ipipe_virtual_irq_p(irq)) { ++ xnvfile_puts(it, " [virtual]"); ++ return 0; ++ } ++ ++ mutex_lock(&intrlock); ++ ++ if (!cobalt_owns_irq(irq)) { ++ xnvfile_puts(it, " "); ++ d = irq_to_desc(irq); ++ xnvfile_puts(it, d && d->name ? d->name : "-"); ++ } else { ++ intr = xnintr_vec_first(irq); ++ if (intr) { ++ xnvfile_puts(it, " "); ++ ++ do { ++ xnvfile_putc(it, ' '); ++ xnvfile_puts(it, intr->name); ++ intr = xnintr_vec_next(intr); ++ } while (intr); ++ } ++ } ++ ++ mutex_unlock(&intrlock); ++ ++ return 0; ++} ++ ++static int irq_vfile_show(struct xnvfile_regular_iterator *it, ++ void *data) ++{ ++ int cpu, irq; ++ ++ /* FIXME: We assume the entire output fits in a single page. */ ++ ++ xnvfile_puts(it, " IRQ "); ++ ++ for_each_realtime_cpu(cpu) ++ xnvfile_printf(it, " CPU%d", cpu); ++ ++ for (irq = 0; irq < IPIPE_NR_IRQS; irq++) { ++ if (__ipipe_irq_handler(&xnsched_realtime_domain, irq) == NULL) ++ continue; ++ ++ xnvfile_printf(it, "\n%5d:", irq); ++ ++ for_each_realtime_cpu(cpu) { ++ xnvfile_printf(it, "%12lu", ++ __ipipe_cpudata_irq_hits(&xnsched_realtime_domain, cpu, ++ irq)); ++ } ++ ++ format_irq_proc(irq, it); ++ } ++ ++ xnvfile_putc(it, '\n'); ++ ++ return 0; ++} ++ ++static struct xnvfile_regular_ops irq_vfile_ops = { ++ .show = irq_vfile_show, ++}; ++ ++static struct xnvfile_regular irq_vfile = { ++ .ops = &irq_vfile_ops, ++}; ++ ++void xnintr_init_proc(void) ++{ ++ xnvfile_init_regular("irq", &irq_vfile, &cobalt_vfroot); ++} ++ ++void xnintr_cleanup_proc(void) ++{ ++ xnvfile_destroy_regular(&irq_vfile); ++} ++ ++#endif /* CONFIG_XENO_OPT_VFILE */ ++ ++/** @} */ +--- linux/kernel/xenomai/apc.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/apc.c 2021-04-07 16:01:25.873636101 +0800 +@@ -0,0 +1,160 @@ ++/* ++ * Copyright (C) 2007,2012 Philippe Gerum . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#include ++#include ++#include ++ ++/** ++ * @ingroup cobalt_core ++ * @defgroup cobalt_core_apc Asynchronous Procedure Calls ++ * ++ * Services for scheduling function calls in the Linux domain ++ * ++ * APC is the acronym for Asynchronous Procedure Call, a mean by which ++ * activities from the Xenomai domain can schedule deferred ++ * invocations of handlers to be run into the Linux domain, as soon as ++ * possible when the Linux kernel gets back in control. ++ * ++ * Up to BITS_PER_LONG APC slots can be active at any point in time. ++ * ++ * APC support is built upon the interrupt pipeline's virtual ++ * interrupt support. ++ * ++ * @{ ++ */ ++static IPIPE_DEFINE_SPINLOCK(apc_lock); ++ ++void apc_dispatch(unsigned int virq, void *arg) ++{ ++ void (*handler)(void *), *cookie; ++ unsigned long *p; ++ int apc; ++ ++ /* ++ * CAUTION: The APC dispatch loop is not protected against a ++ * handler becoming unavailable while processing the pending ++ * queue; the software must make sure to uninstall all APCs ++ * before eventually unloading any module that may contain APC ++ * handlers. We keep the handler affinity with the poster's ++ * CPU, so that the handler is invoked on the same CPU than ++ * the code which called xnapc_schedule(). ++ */ ++ raw_spin_lock(&apc_lock); ++ ++ /* This is atomic linux context (non-threaded IRQ). */ ++ p = &raw_cpu_ptr(&cobalt_machine_cpudata)->apc_pending; ++ while (*p) { ++ apc = ffnz(*p); ++ clear_bit(apc, p); ++ handler = cobalt_pipeline.apc_table[apc].handler; ++ cookie = cobalt_pipeline.apc_table[apc].cookie; ++ raw_cpu_ptr(&cobalt_machine_cpudata)->apc_shots[apc]++; ++ raw_spin_unlock(&apc_lock); ++ handler(cookie); ++ raw_spin_lock(&apc_lock); ++ } ++ ++ raw_spin_unlock(&apc_lock); ++} ++ ++/** ++ * @fn int xnapc_alloc(const char *name,void (*handler)(void *cookie),void *cookie) ++ * ++ * @brief Allocate an APC slot. ++ * ++ * APC is the acronym for Asynchronous Procedure Call, a mean by which ++ * activities from the Xenomai domain can schedule deferred ++ * invocations of handlers to be run into the Linux domain, as soon as ++ * possible when the Linux kernel gets back in control. Up to ++ * BITS_PER_LONG APC slots can be active at any point in time. APC ++ * support is built upon the interrupt pipeline's virtual interrupt ++ * support. ++ * ++ * Any Linux kernel service which is callable from a regular Linux ++ * interrupt handler is in essence available to APC handlers. ++ * ++ * @param name is a symbolic name identifying the APC which will get ++ * reported through the /proc/xenomai/apc interface. Passing NULL to ++ * create an anonymous APC is allowed. ++ * ++ * @param handler The address of the fault handler to call upon ++ * exception condition. The handle will be passed the @a cookie value ++ * unmodified. ++ * ++ * @param cookie A user-defined opaque pointer the APC handler ++ * receives as its sole argument. ++ * ++ * @return a valid APC identifier is returned upon success, or a ++ * negative error code otherwise: ++ * ++ * - -EINVAL is returned if @a handler is invalid. ++ * ++ * - -EBUSY is returned if no more APC slots are available. ++ * ++ * @coretags{unrestricted} ++ */ ++int xnapc_alloc(const char *name, ++ void (*handler)(void *cookie), void *cookie) ++{ ++ unsigned long flags; ++ int apc; ++ ++ if (handler == NULL) ++ return -EINVAL; ++ ++ raw_spin_lock_irqsave(&apc_lock, flags); ++ ++ if (cobalt_pipeline.apc_map == ~0) { ++ apc = -EBUSY; ++ goto out; ++ } ++ ++ apc = ffz(cobalt_pipeline.apc_map); ++ __set_bit(apc, &cobalt_pipeline.apc_map); ++ cobalt_pipeline.apc_table[apc].handler = handler; ++ cobalt_pipeline.apc_table[apc].cookie = cookie; ++ cobalt_pipeline.apc_table[apc].name = name; ++out: ++ raw_spin_unlock_irqrestore(&apc_lock, flags); ++ ++ return apc; ++} ++EXPORT_SYMBOL_GPL(xnapc_alloc); ++ ++/** ++ * @fn int xnapc_free(int apc) ++ * ++ * @brief Releases an APC slot. ++ * ++ * This service deallocates an APC slot obtained by xnapc_alloc(). ++ * ++ * @param apc The APC id. to release, as returned by a successful call ++ * to the xnapc_alloc() service. ++ * ++ * @coretags{unrestricted} ++ */ ++void xnapc_free(int apc) ++{ ++ BUG_ON(apc < 0 || apc >= BITS_PER_LONG); ++ clear_bit(apc, &cobalt_pipeline.apc_map); ++ smp_mb__after_atomic(); ++} ++EXPORT_SYMBOL_GPL(xnapc_free); ++ ++/** @} */ +--- linux/kernel/xenomai/sched.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/sched.c 2021-04-07 16:01:25.869636107 +0800 +@@ -0,0 +1,1587 @@ ++/* ++ * Copyright (C) 2001-2013 Philippe Gerum . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#define CREATE_TRACE_POINTS ++#include ++ ++/** ++ * @ingroup cobalt_core ++ * @defgroup cobalt_core_sched Thread scheduling control ++ * @{ ++ */ ++ ++DEFINE_PER_CPU(struct xnsched, nksched); ++EXPORT_PER_CPU_SYMBOL_GPL(nksched); ++ ++cpumask_t cobalt_cpu_affinity = CPU_MASK_ALL; ++EXPORT_SYMBOL_GPL(cobalt_cpu_affinity); ++ ++LIST_HEAD(nkthreadq); ++ ++int cobalt_nrthreads; ++ ++#ifdef CONFIG_XENO_OPT_VFILE ++struct xnvfile_rev_tag nkthreadlist_tag; ++#endif ++ ++static struct xnsched_class *xnsched_class_highest; ++ ++#define for_each_xnsched_class(p) \ ++ for (p = xnsched_class_highest; p; p = p->next) ++ ++static void xnsched_register_class(struct xnsched_class *sched_class) ++{ ++ sched_class->next = xnsched_class_highest; ++ xnsched_class_highest = sched_class; ++ ++ /* ++ * Classes shall be registered by increasing priority order, ++ * idle first and up. ++ */ ++ XENO_BUG_ON(COBALT, sched_class->next && ++ sched_class->next->weight > sched_class->weight); ++ ++ printk(XENO_INFO "scheduling class %s registered.\n", sched_class->name); ++} ++ ++void xnsched_register_classes(void) ++{ ++ xnsched_register_class(&xnsched_class_idle); ++#ifdef CONFIG_XENO_OPT_SCHED_WEAK ++ xnsched_register_class(&xnsched_class_weak); ++#endif ++#ifdef CONFIG_XENO_OPT_SCHED_TP ++ xnsched_register_class(&xnsched_class_tp); ++#endif ++#ifdef CONFIG_XENO_OPT_SCHED_SPORADIC ++ xnsched_register_class(&xnsched_class_sporadic); ++#endif ++#ifdef CONFIG_XENO_OPT_SCHED_QUOTA ++ xnsched_register_class(&xnsched_class_quota); ++#endif ++ xnsched_register_class(&xnsched_class_rt); ++} ++ ++#ifdef CONFIG_XENO_OPT_WATCHDOG ++ ++static unsigned long wd_timeout_arg = CONFIG_XENO_OPT_WATCHDOG_TIMEOUT; ++module_param_named(watchdog_timeout, wd_timeout_arg, ulong, 0644); ++ ++static inline xnticks_t get_watchdog_timeout(void) ++{ ++ return wd_timeout_arg * 1000000000ULL; ++} ++ ++/** ++ * @internal ++ * @fn void watchdog_handler(struct xntimer *timer) ++ * @brief Process watchdog ticks. ++ * ++ * This internal routine handles incoming watchdog triggers to detect ++ * software lockups. It forces the offending thread to stop ++ * monopolizing the CPU, either by kicking it out of primary mode if ++ * running in user space, or cancelling it if kernel-based. ++ * ++ * @coretags{coreirq-only, atomic-entry} ++ */ ++static void watchdog_handler(struct xntimer *timer) ++{ ++ struct xnsched *sched = xnsched_current(); ++ struct xnthread *curr = sched->curr; ++ ++ /* ++ * CAUTION: The watchdog tick might have been delayed while we ++ * were busy switching the CPU to secondary mode at the ++ * trigger date eventually. Make sure that we are not about to ++ * kick the incoming root thread. ++ */ ++ if (xnthread_test_state(curr, XNROOT)) ++ return; ++ ++ trace_cobalt_watchdog_signal(curr); ++ ++ if (xnthread_test_state(curr, XNUSER)) { ++ printk(XENO_WARNING "watchdog triggered on CPU #%d -- runaway thread " ++ "'%s' signaled\n", xnsched_cpu(sched), curr->name); ++ xnthread_call_mayday(curr, SIGDEBUG_WATCHDOG); ++ } else { ++ printk(XENO_WARNING "watchdog triggered on CPU #%d -- runaway thread " ++ "'%s' canceled\n", xnsched_cpu(sched), curr->name); ++ /* ++ * On behalf on an IRQ handler, xnthread_cancel() ++ * would go half way cancelling the preempted ++ * thread. Therefore we manually raise XNKICKED to ++ * cause the next call to xnthread_suspend() to return ++ * early in XNBREAK condition, and XNCANCELD so that ++ * @thread exits next time it invokes ++ * xnthread_test_cancel(). ++ */ ++ xnthread_set_info(curr, XNKICKED|XNCANCELD); ++ } ++} ++ ++#endif /* CONFIG_XENO_OPT_WATCHDOG */ ++ ++static void roundrobin_handler(struct xntimer *timer) ++{ ++ struct xnsched *sched = container_of(timer, struct xnsched, rrbtimer); ++ xnsched_tick(sched); ++} ++ ++static void xnsched_init(struct xnsched *sched, int cpu) ++{ ++ char rrbtimer_name[XNOBJECT_NAME_LEN]; ++ char htimer_name[XNOBJECT_NAME_LEN]; ++ char root_name[XNOBJECT_NAME_LEN]; ++ union xnsched_policy_param param; ++ struct xnthread_init_attr attr; ++ struct xnsched_class *p; ++ ++#ifdef CONFIG_SMP ++ sched->cpu = cpu; ++ ksformat(htimer_name, sizeof(htimer_name), "[host-timer/%u]", cpu); ++ ksformat(rrbtimer_name, sizeof(rrbtimer_name), "[rrb-timer/%u]", cpu); ++ ksformat(root_name, sizeof(root_name), "ROOT/%u", cpu); ++ cpumask_clear(&sched->resched); ++#else ++ strcpy(htimer_name, "[host-timer]"); ++ strcpy(rrbtimer_name, "[rrb-timer]"); ++ strcpy(root_name, "ROOT"); ++#endif ++ for_each_xnsched_class(p) { ++ if (p->sched_init) ++ p->sched_init(sched); ++ } ++ ++ sched->status = 0; ++ sched->lflags = XNIDLE; ++ sched->inesting = 0; ++ sched->curr = &sched->rootcb; ++ ++ attr.flags = XNROOT | XNFPU; ++ attr.name = root_name; ++ attr.personality = &xenomai_personality; ++ attr.affinity = *cpumask_of(cpu); ++ param.idle.prio = XNSCHED_IDLE_PRIO; ++ ++ __xnthread_init(&sched->rootcb, &attr, ++ sched, &xnsched_class_idle, ¶m); ++ ++ /* ++ * No direct handler here since the host timer processing is ++ * postponed to xnintr_irq_handler(), as part of the interrupt ++ * exit code. ++ */ ++ xntimer_init(&sched->htimer, &nkclock, NULL, ++ sched, XNTIMER_IGRAVITY); ++ xntimer_set_priority(&sched->htimer, XNTIMER_LOPRIO); ++ xntimer_set_name(&sched->htimer, htimer_name); ++ xntimer_init(&sched->rrbtimer, &nkclock, roundrobin_handler, ++ sched, XNTIMER_IGRAVITY); ++ xntimer_set_name(&sched->rrbtimer, rrbtimer_name); ++ xntimer_set_priority(&sched->rrbtimer, XNTIMER_LOPRIO); ++ ++ xnstat_exectime_set_current(sched, &sched->rootcb.stat.account); ++#ifdef CONFIG_XENO_ARCH_FPU ++ sched->fpuholder = &sched->rootcb; ++#endif /* CONFIG_XENO_ARCH_FPU */ ++ ++ xnthread_init_root_tcb(&sched->rootcb); ++ list_add_tail(&sched->rootcb.glink, &nkthreadq); ++ cobalt_nrthreads++; ++ ++#ifdef CONFIG_XENO_OPT_WATCHDOG ++ xntimer_init(&sched->wdtimer, &nkclock, watchdog_handler, ++ sched, XNTIMER_IGRAVITY); ++ xntimer_set_name(&sched->wdtimer, "[watchdog]"); ++ xntimer_set_priority(&sched->wdtimer, XNTIMER_LOPRIO); ++#endif /* CONFIG_XENO_OPT_WATCHDOG */ ++} ++ ++void xnsched_init_all(void) ++{ ++ struct xnsched *sched; ++ int cpu; ++ ++ for_each_online_cpu(cpu) { ++ sched = &per_cpu(nksched, cpu); ++ xnsched_init(sched, cpu); ++ } ++ ++#ifdef CONFIG_SMP ++ ipipe_request_irq(&xnsched_realtime_domain, ++ IPIPE_RESCHEDULE_IPI, ++ (ipipe_irq_handler_t)__xnsched_run_handler, ++ NULL, NULL); ++#endif ++} ++ ++static void xnsched_destroy(struct xnsched *sched) ++{ ++ xntimer_destroy(&sched->htimer); ++ xntimer_destroy(&sched->rrbtimer); ++ xntimer_destroy(&sched->rootcb.ptimer); ++ xntimer_destroy(&sched->rootcb.rtimer); ++#ifdef CONFIG_XENO_OPT_WATCHDOG ++ xntimer_destroy(&sched->wdtimer); ++#endif /* CONFIG_XENO_OPT_WATCHDOG */ ++} ++ ++void xnsched_destroy_all(void) ++{ ++ struct xnthread *thread, *tmp; ++ struct xnsched *sched; ++ int cpu; ++ spl_t s; ++ ++#ifdef CONFIG_SMP ++ ipipe_free_irq(&xnsched_realtime_domain, IPIPE_RESCHEDULE_IPI); ++#endif ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ /* NOTE: &nkthreadq can't be empty (root thread(s)). */ ++ list_for_each_entry_safe(thread, tmp, &nkthreadq, glink) { ++ if (!xnthread_test_state(thread, XNROOT)) ++ xnthread_cancel(thread); ++ } ++ ++ xnsched_run(); ++ ++ for_each_online_cpu(cpu) { ++ sched = xnsched_struct(cpu); ++ xnsched_destroy(sched); ++ } ++ ++ xnlock_put_irqrestore(&nklock, s); ++} ++ ++static inline void set_thread_running(struct xnsched *sched, ++ struct xnthread *thread) ++{ ++ xnthread_clear_state(thread, XNREADY); ++ if (xnthread_test_state(thread, XNRRB)) ++ xntimer_start(&sched->rrbtimer, ++ thread->rrperiod, XN_INFINITE, XN_RELATIVE); ++ else ++ xntimer_stop(&sched->rrbtimer); ++} ++ ++/* Must be called with nklock locked, interrupts off. */ ++struct xnthread *xnsched_pick_next(struct xnsched *sched) ++{ ++ struct xnsched_class *p __maybe_unused; ++ struct xnthread *curr = sched->curr; ++ struct xnthread *thread; ++ ++ if (!xnthread_test_state(curr, XNTHREAD_BLOCK_BITS | XNZOMBIE)) { ++ /* ++ * Do not preempt the current thread if it holds the ++ * scheduler lock. ++ */ ++ if (curr->lock_count > 0) { ++ xnsched_set_self_resched(sched); ++ return curr; ++ } ++ /* ++ * Push the current thread back to the run queue of ++ * the scheduling class it belongs to, if not yet ++ * linked to it (XNREADY tells us if it is). ++ */ ++ if (!xnthread_test_state(curr, XNREADY)) { ++ xnsched_requeue(curr); ++ xnthread_set_state(curr, XNREADY); ++ } ++ } ++ ++ /* ++ * Find the runnable thread having the highest priority among ++ * all scheduling classes, scanned by decreasing priority. ++ */ ++#ifdef CONFIG_XENO_OPT_SCHED_CLASSES ++ for_each_xnsched_class(p) { ++ thread = p->sched_pick(sched); ++ if (thread) { ++ set_thread_running(sched, thread); ++ return thread; ++ } ++ } ++ ++ return NULL; /* Never executed because of the idle class. */ ++#else /* !CONFIG_XENO_OPT_SCHED_CLASSES */ ++ thread = xnsched_rt_pick(sched); ++ if (unlikely(thread == NULL)) ++ thread = &sched->rootcb; ++ ++ set_thread_running(sched, thread); ++ ++ return thread; ++#endif /* CONFIG_XENO_OPT_SCHED_CLASSES */ ++} ++ ++#ifdef CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH ++ ++struct xnsched *xnsched_finish_unlocked_switch(struct xnsched *sched) ++{ ++ struct xnthread *last; ++ spl_t s; ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++#ifdef CONFIG_SMP ++ /* If current thread migrated while suspended */ ++ sched = xnsched_current(); ++#endif /* CONFIG_SMP */ ++ ++ last = sched->last; ++ sched->status &= ~XNINSW; ++ ++ /* Detect a thread which has migrated. */ ++ if (last->sched != sched) { ++ xnsched_putback(last); ++ xnthread_clear_state(last, XNMIGRATE); ++ } ++ ++ return sched; ++} ++ ++#endif /* CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH */ ++ ++void xnsched_lock(void) ++{ ++ struct xnsched *sched = xnsched_current(); ++ /* See comments in xnsched_run(), ___xnsched_run(). */ ++ struct xnthread *curr = READ_ONCE(sched->curr); ++ ++ if (sched->lflags & XNINIRQ) ++ return; ++ ++ /* ++ * CAUTION: The fast xnthread_current() accessor carries the ++ * relevant lock nesting count only if current runs in primary ++ * mode. Otherwise, if the caller is unknown or relaxed ++ * Xenomai-wise, then we fall back to the root thread on the ++ * current scheduler, which must be done with IRQs off. ++ * Either way, we don't need to grab the super lock. ++ */ ++ XENO_WARN_ON_ONCE(COBALT, (curr->state & XNROOT) && ++ !hard_irqs_disabled()); ++ ++ curr->lock_count++; ++} ++EXPORT_SYMBOL_GPL(xnsched_lock); ++ ++void xnsched_unlock(void) ++{ ++ struct xnsched *sched = xnsched_current(); ++ struct xnthread *curr = READ_ONCE(sched->curr); ++ ++ XENO_WARN_ON_ONCE(COBALT, (curr->state & XNROOT) && ++ !hard_irqs_disabled()); ++ ++ if (sched->lflags & XNINIRQ) ++ return; ++ ++ if (!XENO_ASSERT(COBALT, curr->lock_count > 0)) ++ return; ++ ++ if (--curr->lock_count == 0) { ++ xnthread_clear_localinfo(curr, XNLBALERT); ++ xnsched_run(); ++ } ++} ++EXPORT_SYMBOL_GPL(xnsched_unlock); ++ ++/* nklock locked, interrupts off. */ ++void xnsched_putback(struct xnthread *thread) ++{ ++ if (xnthread_test_state(thread, XNREADY)) ++ xnsched_dequeue(thread); ++ else ++ xnthread_set_state(thread, XNREADY); ++ ++ xnsched_enqueue(thread); ++ xnsched_set_resched(thread->sched); ++} ++ ++/* nklock locked, interrupts off. */ ++int xnsched_set_policy(struct xnthread *thread, ++ struct xnsched_class *sched_class, ++ const union xnsched_policy_param *p) ++{ ++ struct xnsched_class *orig_effective_class __maybe_unused; ++ bool effective; ++ int ret; ++ ++ ret = xnsched_chkparam(sched_class, thread, p); ++ if (ret) ++ return ret; ++ ++ /* ++ * Declaring a thread to a new scheduling class may fail, so ++ * we do that early, while the thread is still a member of the ++ * previous class. However, this also means that the ++ * declaration callback shall not do anything that might ++ * affect the previous class (such as touching thread->rlink ++ * for instance). ++ */ ++ if (sched_class != thread->base_class) { ++ ret = xnsched_declare(sched_class, thread, p); ++ if (ret) ++ return ret; ++ } ++ ++ /* ++ * As a special case, we may be called from __xnthread_init() ++ * with no previous scheduling class at all. ++ */ ++ if (likely(thread->base_class != NULL)) { ++ if (xnthread_test_state(thread, XNREADY)) ++ xnsched_dequeue(thread); ++ ++ if (sched_class != thread->base_class) ++ xnsched_forget(thread); ++ } ++ ++ /* ++ * Set the base and effective scheduling parameters. However, ++ * xnsched_setparam() will deny lowering the effective ++ * priority if a boost is undergoing, only recording the ++ * change into the base priority field in such situation. ++ */ ++ thread->base_class = sched_class; ++ /* ++ * Referring to the effective class from a setparam() handler ++ * is wrong: make sure to break if so. ++ */ ++ if (XENO_DEBUG(COBALT)) { ++ orig_effective_class = thread->sched_class; ++ thread->sched_class = NULL; ++ } ++ ++ /* ++ * This is the ONLY place where calling xnsched_setparam() is ++ * legit, sane and safe. ++ */ ++ effective = xnsched_setparam(thread, p); ++ if (effective) { ++ thread->sched_class = sched_class; ++ thread->wprio = xnsched_calc_wprio(sched_class, thread->cprio); ++ } else if (XENO_DEBUG(COBALT)) ++ thread->sched_class = orig_effective_class; ++ ++ if (xnthread_test_state(thread, XNREADY)) ++ xnsched_enqueue(thread); ++ ++ if (!xnthread_test_state(thread, XNDORMANT)) ++ xnsched_set_resched(thread->sched); ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(xnsched_set_policy); ++ ++/* nklock locked, interrupts off. */ ++bool xnsched_set_effective_priority(struct xnthread *thread, int prio) ++{ ++ int wprio = xnsched_calc_wprio(thread->base_class, prio); ++ ++ thread->bprio = prio; ++ if (wprio == thread->wprio) ++ return true; ++ ++ /* ++ * We may not lower the effective/current priority of a ++ * boosted thread when changing the base scheduling ++ * parameters. Only xnsched_track_policy() and ++ * xnsched_protect_priority() may do so when dealing with PI ++ * and PP synchs resp. ++ */ ++ if (wprio < thread->wprio && xnthread_test_state(thread, XNBOOST)) ++ return false; ++ ++ thread->cprio = prio; ++ ++ trace_cobalt_thread_set_current_prio(thread); ++ ++ return true; ++} ++ ++/* nklock locked, interrupts off. */ ++void xnsched_track_policy(struct xnthread *thread, ++ struct xnthread *target) ++{ ++ union xnsched_policy_param param; ++ ++ /* ++ * Inherit (or reset) the effective scheduling class and ++ * priority of a thread. Unlike xnsched_set_policy(), this ++ * routine is allowed to lower the weighted priority with no ++ * restriction, even if a boost is undergoing. ++ */ ++ if (xnthread_test_state(thread, XNREADY)) ++ xnsched_dequeue(thread); ++ /* ++ * Self-targeting means to reset the scheduling policy and ++ * parameters to the base settings. Otherwise, make thread ++ * inherit the scheduling parameters from target. ++ */ ++ if (target == thread) { ++ thread->sched_class = thread->base_class; ++ xnsched_trackprio(thread, NULL); ++ /* ++ * Per SuSv2, resetting the base scheduling parameters ++ * should not move the thread to the tail of its ++ * priority group. ++ */ ++ if (xnthread_test_state(thread, XNREADY)) ++ xnsched_requeue(thread); ++ ++ } else { ++ xnsched_getparam(target, ¶m); ++ thread->sched_class = target->sched_class; ++ xnsched_trackprio(thread, ¶m); ++ if (xnthread_test_state(thread, XNREADY)) ++ xnsched_enqueue(thread); ++ } ++ ++ trace_cobalt_thread_set_current_prio(thread); ++ ++ xnsched_set_resched(thread->sched); ++} ++ ++/* nklock locked, interrupts off. */ ++void xnsched_protect_priority(struct xnthread *thread, int prio) ++{ ++ /* ++ * Apply a PP boost by changing the effective priority of a ++ * thread, forcing it to the RT class. Like ++ * xnsched_track_policy(), this routine is allowed to lower ++ * the weighted priority with no restriction, even if a boost ++ * is undergoing. ++ * ++ * This routine only deals with active boosts, resetting the ++ * base priority when leaving a PP boost is obtained by a call ++ * to xnsched_track_policy(). ++ */ ++ if (xnthread_test_state(thread, XNREADY)) ++ xnsched_dequeue(thread); ++ ++ thread->sched_class = &xnsched_class_rt; ++ xnsched_protectprio(thread, prio); ++ ++ if (xnthread_test_state(thread, XNREADY)) ++ xnsched_enqueue(thread); ++ ++ trace_cobalt_thread_set_current_prio(thread); ++ ++ xnsched_set_resched(thread->sched); ++} ++ ++static void migrate_thread(struct xnthread *thread, struct xnsched *sched) ++{ ++ struct xnsched_class *sched_class = thread->sched_class; ++ ++ if (xnthread_test_state(thread, XNREADY)) { ++ xnsched_dequeue(thread); ++ xnthread_clear_state(thread, XNREADY); ++ } ++ ++ if (sched_class->sched_migrate) ++ sched_class->sched_migrate(thread, sched); ++ /* ++ * WARNING: the scheduling class may have just changed as a ++ * result of calling the per-class migration hook. ++ */ ++ thread->sched = sched; ++} ++ ++/* ++ * nklock locked, interrupts off. thread must be runnable. ++ */ ++void xnsched_migrate(struct xnthread *thread, struct xnsched *sched) ++{ ++ xnsched_set_resched(thread->sched); ++ migrate_thread(thread, sched); ++ ++#ifdef CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH ++ /* ++ * Mark the thread in flight, xnsched_finish_unlocked_switch() ++ * will put the thread on the remote runqueue. ++ */ ++ xnthread_set_state(thread, XNMIGRATE); ++#else ++ /* Move thread to the remote run queue. */ ++ xnsched_putback(thread); ++#endif ++} ++ ++/* ++ * nklock locked, interrupts off. Thread may be blocked. ++ */ ++void xnsched_migrate_passive(struct xnthread *thread, struct xnsched *sched) ++{ ++ struct xnsched *last_sched = thread->sched; ++ ++ migrate_thread(thread, sched); ++ ++ if (!xnthread_test_state(thread, XNTHREAD_BLOCK_BITS)) { ++ xnsched_requeue(thread); ++ xnthread_set_state(thread, XNREADY); ++ xnsched_set_resched(last_sched); ++ } ++} ++ ++#ifdef CONFIG_XENO_OPT_SCALABLE_SCHED ++ ++void xnsched_initq(struct xnsched_mlq *q) ++{ ++ int prio; ++ ++ q->elems = 0; ++ bitmap_zero(q->prio_map, XNSCHED_MLQ_LEVELS); ++ ++ for (prio = 0; prio < XNSCHED_MLQ_LEVELS; prio++) ++ INIT_LIST_HEAD(q->heads + prio); ++} ++ ++static inline int get_qindex(struct xnsched_mlq *q, int prio) ++{ ++ XENO_BUG_ON(COBALT, prio < 0 || prio >= XNSCHED_MLQ_LEVELS); ++ /* ++ * BIG FAT WARNING: We need to rescale the priority level to a ++ * 0-based range. We use find_first_bit() to scan the bitmap ++ * which is a bit scan forward operation. Therefore, the lower ++ * the index value, the higher the priority (since least ++ * significant bits will be found first when scanning the ++ * bitmap). ++ */ ++ return XNSCHED_MLQ_LEVELS - prio - 1; ++} ++ ++static struct list_head *add_q(struct xnsched_mlq *q, int prio) ++{ ++ struct list_head *head; ++ int idx; ++ ++ idx = get_qindex(q, prio); ++ head = q->heads + idx; ++ q->elems++; ++ ++ /* New item is not linked yet. */ ++ if (list_empty(head)) ++ __set_bit(idx, q->prio_map); ++ ++ return head; ++} ++ ++void xnsched_addq(struct xnsched_mlq *q, struct xnthread *thread) ++{ ++ struct list_head *head = add_q(q, thread->cprio); ++ list_add(&thread->rlink, head); ++} ++ ++void xnsched_addq_tail(struct xnsched_mlq *q, struct xnthread *thread) ++{ ++ struct list_head *head = add_q(q, thread->cprio); ++ list_add_tail(&thread->rlink, head); ++} ++ ++static void del_q(struct xnsched_mlq *q, ++ struct list_head *entry, int idx) ++{ ++ struct list_head *head = q->heads + idx; ++ ++ list_del(entry); ++ q->elems--; ++ ++ if (list_empty(head)) ++ __clear_bit(idx, q->prio_map); ++} ++ ++void xnsched_delq(struct xnsched_mlq *q, struct xnthread *thread) ++{ ++ del_q(q, &thread->rlink, get_qindex(q, thread->cprio)); ++} ++ ++struct xnthread *xnsched_getq(struct xnsched_mlq *q) ++{ ++ struct xnthread *thread; ++ struct list_head *head; ++ int idx; ++ ++ if (q->elems == 0) ++ return NULL; ++ ++ idx = xnsched_weightq(q); ++ head = q->heads + idx; ++ XENO_BUG_ON(COBALT, list_empty(head)); ++ thread = list_first_entry(head, struct xnthread, rlink); ++ del_q(q, &thread->rlink, idx); ++ ++ return thread; ++} ++ ++struct xnthread *xnsched_findq(struct xnsched_mlq *q, int prio) ++{ ++ struct list_head *head; ++ int idx; ++ ++ idx = get_qindex(q, prio); ++ head = q->heads + idx; ++ if (list_empty(head)) ++ return NULL; ++ ++ return list_first_entry(head, struct xnthread, rlink); ++} ++ ++#ifdef CONFIG_XENO_OPT_SCHED_CLASSES ++ ++struct xnthread *xnsched_rt_pick(struct xnsched *sched) ++{ ++ struct xnsched_mlq *q = &sched->rt.runnable; ++ struct xnthread *thread; ++ struct list_head *head; ++ int idx; ++ ++ if (q->elems == 0) ++ return NULL; ++ ++ /* ++ * Some scheduling policies may be implemented as variants of ++ * the core SCHED_FIFO class, sharing its runqueue ++ * (e.g. SCHED_SPORADIC, SCHED_QUOTA). This means that we have ++ * to do some cascading to call the right pick handler ++ * eventually. ++ */ ++ idx = xnsched_weightq(q); ++ head = q->heads + idx; ++ XENO_BUG_ON(COBALT, list_empty(head)); ++ ++ /* ++ * The active class (i.e. ->sched_class) is the one currently ++ * queuing the thread, reflecting any priority boost due to ++ * PI. ++ */ ++ thread = list_first_entry(head, struct xnthread, rlink); ++ if (unlikely(thread->sched_class != &xnsched_class_rt)) ++ return thread->sched_class->sched_pick(sched); ++ ++ del_q(q, &thread->rlink, idx); ++ ++ return thread; ++} ++ ++#endif /* CONFIG_XENO_OPT_SCHED_CLASSES */ ++ ++#else /* !CONFIG_XENO_OPT_SCALABLE_SCHED */ ++ ++struct xnthread *xnsched_findq(struct list_head *q, int prio) ++{ ++ struct xnthread *thread; ++ ++ if (list_empty(q)) ++ return NULL; ++ ++ /* Find thread leading a priority group. */ ++ list_for_each_entry(thread, q, rlink) { ++ if (prio == thread->cprio) ++ return thread; ++ } ++ ++ return NULL; ++} ++ ++#ifdef CONFIG_XENO_OPT_SCHED_CLASSES ++ ++struct xnthread *xnsched_rt_pick(struct xnsched *sched) ++{ ++ struct list_head *q = &sched->rt.runnable; ++ struct xnthread *thread; ++ ++ if (list_empty(q)) ++ return NULL; ++ ++ thread = list_first_entry(q, struct xnthread, rlink); ++ if (unlikely(thread->sched_class != &xnsched_class_rt)) ++ return thread->sched_class->sched_pick(sched); ++ ++ list_del(&thread->rlink); ++ ++ return thread; ++} ++ ++#endif /* CONFIG_XENO_OPT_SCHED_CLASSES */ ++ ++#endif /* !CONFIG_XENO_OPT_SCALABLE_SCHED */ ++ ++static inline void switch_context(struct xnsched *sched, ++ struct xnthread *prev, struct xnthread *next) ++{ ++#ifdef CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH ++ sched->last = prev; ++ sched->status |= XNINSW; ++ xnlock_clear_irqon(&nklock); ++#endif ++ ++ xnarch_switch_to(prev, next); ++} ++ ++/** ++ * @fn int xnsched_run(void) ++ * @brief The rescheduling procedure. ++ * ++ * This is the central rescheduling routine which should be called to ++ * validate and apply changes which have previously been made to the ++ * nucleus scheduling state, such as suspending, resuming or changing ++ * the priority of threads. This call performs context switches as ++ * needed. xnsched_run() schedules out the current thread if: ++ * ++ * - the current thread is about to block. ++ * - a runnable thread from a higher priority scheduling class is ++ * waiting for the CPU. ++ * - the current thread does not lead the runnable threads from its ++ * own scheduling class (i.e. round-robin). ++ * ++ * The Cobalt core implements a lazy rescheduling scheme so that most ++ * of the services affecting the threads state MUST be followed by a ++ * call to the rescheduling procedure for the new scheduling state to ++ * be applied. ++ * ++ * In other words, multiple changes on the scheduler state can be done ++ * in a row, waking threads up, blocking others, without being ++ * immediately translated into the corresponding context switches. ++ * When all changes have been applied, xnsched_run() should be called ++ * for considering those changes, and possibly switching context. ++ * ++ * As a notable exception to the previous principle however, every ++ * action which ends up suspending the current thread begets an ++ * implicit call to the rescheduling procedure on behalf of the ++ * blocking service. ++ * ++ * Typically, self-suspension or sleeping on a synchronization object ++ * automatically leads to a call to the rescheduling procedure, ++ * therefore the caller does not need to explicitly issue ++ * xnsched_run() after such operations. ++ * ++ * The rescheduling procedure always leads to a null-effect if it is ++ * called on behalf of an interrupt service routine. Any outstanding ++ * scheduler lock held by the outgoing thread will be restored when ++ * the thread is scheduled back in. ++ * ++ * Calling this procedure with no applicable context switch pending is ++ * harmless and simply leads to a null-effect. ++ * ++ * @return Non-zero is returned if a context switch actually happened, ++ * otherwise zero if the current thread was left running. ++ * ++ * @coretags{unrestricted} ++ */ ++static inline int test_resched(struct xnsched *sched) ++{ ++ int resched = xnsched_resched_p(sched); ++#ifdef CONFIG_SMP ++ /* Send resched IPI to remote CPU(s). */ ++ if (unlikely(!cpumask_empty(&sched->resched))) { ++ smp_mb(); ++ ipipe_send_ipi(IPIPE_RESCHEDULE_IPI, sched->resched); ++ cpumask_clear(&sched->resched); ++ } ++#endif ++ sched->status &= ~XNRESCHED; ++ ++ return resched; ++} ++ ++static inline void enter_root(struct xnthread *root) ++{ ++ struct xnarchtcb *rootcb __maybe_unused = xnthread_archtcb(root); ++ ++#ifdef CONFIG_XENO_OPT_WATCHDOG ++ xntimer_stop(&root->sched->wdtimer); ++#endif ++#ifdef CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH ++ if (rootcb->core.mm == NULL) ++ set_ti_thread_flag(rootcb->core.tip, TIF_MMSWITCH_INT); ++#endif ++} ++ ++static inline void leave_root(struct xnthread *root) ++{ ++ struct xnarchtcb *rootcb = xnthread_archtcb(root); ++ struct task_struct *p = current; ++ ++ ipipe_notify_root_preemption(); ++ /* Remember the preempted Linux task pointer. */ ++ rootcb->core.host_task = p; ++ rootcb->core.tsp = &p->thread; ++ rootcb->core.mm = rootcb->core.active_mm = ipipe_get_active_mm(); ++ rootcb->core.tip = task_thread_info(p); ++ xnarch_leave_root(root); ++ ++#ifdef CONFIG_XENO_OPT_WATCHDOG ++ xntimer_start(&root->sched->wdtimer, get_watchdog_timeout(), ++ XN_INFINITE, XN_RELATIVE); ++#endif ++} ++ ++void __xnsched_run_handler(void) /* hw interrupts off. */ ++{ ++ trace_cobalt_schedule_remote(xnsched_current()); ++ xnsched_run(); ++} ++ ++static inline void do_lazy_user_work(struct xnthread *curr) ++{ ++ xnthread_commit_ceiling(curr); ++} ++ ++int ___xnsched_run(struct xnsched *sched) ++{ ++ struct xnthread *prev, *next, *curr; ++ int switched, shadow; ++ spl_t s; ++ ++ XENO_WARN_ON_ONCE(COBALT, !hard_irqs_disabled() && ipipe_root_p); ++ ++ if (xnarch_escalate()) ++ return 0; ++ ++ trace_cobalt_schedule(sched); ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ curr = sched->curr; ++ /* ++ * CAUTION: xnthread_host_task(curr) may be unsynced and even ++ * stale if curr = &rootcb, since the task logged by ++ * leave_root() may not still be the current one. Use ++ * "current" for disambiguating. ++ */ ++ xntrace_pid(task_pid_nr(current), xnthread_current_priority(curr)); ++reschedule: ++ if (xnthread_test_state(curr, XNUSER)) ++ do_lazy_user_work(curr); ++ ++ switched = 0; ++ if (!test_resched(sched)) ++ goto out; ++ ++ next = xnsched_pick_next(sched); ++ if (next == curr) { ++ if (unlikely(xnthread_test_state(next, XNROOT))) { ++ if (sched->lflags & XNHTICK) ++ xnintr_host_tick(sched); ++ if (sched->lflags & XNHDEFER) ++ xnclock_program_shot(&nkclock, sched); ++ } ++ goto out; ++ } ++ ++ prev = curr; ++ ++ trace_cobalt_switch_context(prev, next); ++ ++ /* ++ * sched->curr is shared locklessly with xnsched_run() and ++ * xnsched_lock(). WRITE_ONCE() makes sure sched->curr is ++ * written atomically so that these routines always observe ++ * consistent values by preventing the compiler from using ++ * store tearing. ++ */ ++ WRITE_ONCE(sched->curr, next); ++ shadow = 1; ++ ++ if (xnthread_test_state(prev, XNROOT)) { ++ leave_root(prev); ++ shadow = 0; ++ } else if (xnthread_test_state(next, XNROOT)) { ++ if (sched->lflags & XNHTICK) ++ xnintr_host_tick(sched); ++ if (sched->lflags & XNHDEFER) ++ xnclock_program_shot(&nkclock, sched); ++ enter_root(next); ++ } ++ ++ xnstat_exectime_switch(sched, &next->stat.account); ++ xnstat_counter_inc(&next->stat.csw); ++ ++ switch_context(sched, prev, next); ++ ++ /* ++ * Test whether we transitioned from primary mode to secondary ++ * over a shadow thread, caused by a call to xnthread_relax(). ++ * In such a case, we are running over the regular schedule() ++ * tail code, so we have to skip our tail code. ++ */ ++ if (shadow && ipipe_root_p) ++ goto shadow_epilogue; ++ ++ switched = 1; ++ sched = xnsched_finish_unlocked_switch(sched); ++ /* ++ * Re-read the currently running thread, this is needed ++ * because of relaxed/hardened transitions. ++ */ ++ curr = sched->curr; ++ xnthread_switch_fpu(sched); ++ xntrace_pid(task_pid_nr(current), xnthread_current_priority(curr)); ++out: ++ if (switched && ++ xnsched_maybe_resched_after_unlocked_switch(sched)) ++ goto reschedule; ++ ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return switched; ++ ++shadow_epilogue: ++ __ipipe_complete_domain_migration(); ++ ++ XENO_BUG_ON(COBALT, xnthread_current() == NULL); ++ ++ /* ++ * Interrupts must be disabled here (has to be done on entry ++ * of the Linux [__]switch_to function), but it is what ++ * callers expect, specifically the reschedule of an IRQ ++ * handler that hit before we call xnsched_run in ++ * xnthread_suspend() when relaxing a thread. ++ */ ++ XENO_BUG_ON(COBALT, !hard_irqs_disabled()); ++ ++ return 1; ++} ++EXPORT_SYMBOL_GPL(___xnsched_run); ++ ++#ifdef CONFIG_XENO_OPT_VFILE ++ ++static struct xnvfile_directory sched_vfroot; ++ ++struct vfile_schedlist_priv { ++ struct xnthread *curr; ++ xnticks_t start_time; ++}; ++ ++struct vfile_schedlist_data { ++ int cpu; ++ pid_t pid; ++ char name[XNOBJECT_NAME_LEN]; ++ char sched_class[XNOBJECT_NAME_LEN]; ++ char personality[XNOBJECT_NAME_LEN]; ++ int cprio; ++ xnticks_t timeout; ++ int state; ++}; ++ ++static struct xnvfile_snapshot_ops vfile_schedlist_ops; ++ ++static struct xnvfile_snapshot schedlist_vfile = { ++ .privsz = sizeof(struct vfile_schedlist_priv), ++ .datasz = sizeof(struct vfile_schedlist_data), ++ .tag = &nkthreadlist_tag, ++ .ops = &vfile_schedlist_ops, ++}; ++ ++static int vfile_schedlist_rewind(struct xnvfile_snapshot_iterator *it) ++{ ++ struct vfile_schedlist_priv *priv = xnvfile_iterator_priv(it); ++ ++ /* &nkthreadq cannot be empty (root thread(s)). */ ++ priv->curr = list_first_entry(&nkthreadq, struct xnthread, glink); ++ priv->start_time = xnclock_read_monotonic(&nkclock); ++ ++ return cobalt_nrthreads; ++} ++ ++static int vfile_schedlist_next(struct xnvfile_snapshot_iterator *it, ++ void *data) ++{ ++ struct vfile_schedlist_priv *priv = xnvfile_iterator_priv(it); ++ struct vfile_schedlist_data *p = data; ++ xnticks_t timeout, period; ++ struct xnthread *thread; ++ xnticks_t base_time; ++ ++ if (priv->curr == NULL) ++ return 0; /* All done. */ ++ ++ thread = priv->curr; ++ if (list_is_last(&thread->glink, &nkthreadq)) ++ priv->curr = NULL; ++ else ++ priv->curr = list_next_entry(thread, glink); ++ ++ p->cpu = xnsched_cpu(thread->sched); ++ p->pid = xnthread_host_pid(thread); ++ memcpy(p->name, thread->name, sizeof(p->name)); ++ p->cprio = thread->cprio; ++ p->state = xnthread_get_state(thread); ++ if (thread->lock_count > 0) ++ p->state |= XNLOCK; ++ knamecpy(p->sched_class, thread->sched_class->name); ++ knamecpy(p->personality, thread->personality->name); ++ period = xnthread_get_period(thread); ++ base_time = priv->start_time; ++ if (xntimer_clock(&thread->ptimer) != &nkclock) ++ base_time = xnclock_read_monotonic(xntimer_clock(&thread->ptimer)); ++ timeout = xnthread_get_timeout(thread, base_time); ++ /* ++ * Here we cheat: thread is periodic and the sampling rate may ++ * be high, so it is indeed possible that the next tick date ++ * from the ptimer progresses fast enough while we are busy ++ * collecting output data in this loop, so that next_date - ++ * start_time > period. In such a case, we simply ceil the ++ * value to period to keep the result meaningful, even if not ++ * necessarily accurate. But what does accuracy mean when the ++ * sampling frequency is high, and the way to read it has to ++ * go through the vfile interface anyway? ++ */ ++ if (period > 0 && period < timeout && ++ !xntimer_running_p(&thread->rtimer)) ++ timeout = period; ++ ++ p->timeout = timeout; ++ ++ return 1; ++} ++ ++static int vfile_schedlist_show(struct xnvfile_snapshot_iterator *it, ++ void *data) ++{ ++ struct vfile_schedlist_data *p = data; ++ char sbuf[64], pbuf[16], tbuf[16]; ++ ++ if (p == NULL) ++ xnvfile_printf(it, ++ "%-3s %-6s %-5s %-8s %-5s %-12s %-10s %s\n", ++ "CPU", "PID", "CLASS", "TYPE", "PRI", "TIMEOUT", ++ "STAT", "NAME"); ++ else { ++ ksformat(pbuf, sizeof(pbuf), "%3d", p->cprio); ++ xntimer_format_time(p->timeout, tbuf, sizeof(tbuf)); ++ xnthread_format_status(p->state, sbuf, sizeof(sbuf)); ++ ++ xnvfile_printf(it, ++ "%3u %-6d %-5s %-8s %-5s %-12s %-10s %s%s%s\n", ++ p->cpu, ++ p->pid, ++ p->sched_class, ++ p->personality, ++ pbuf, ++ tbuf, ++ sbuf, ++ (p->state & XNUSER) ? "" : "[", ++ p->name, ++ (p->state & XNUSER) ? "" : "]"); ++ } ++ ++ return 0; ++} ++ ++static struct xnvfile_snapshot_ops vfile_schedlist_ops = { ++ .rewind = vfile_schedlist_rewind, ++ .next = vfile_schedlist_next, ++ .show = vfile_schedlist_show, ++}; ++ ++#ifdef CONFIG_XENO_OPT_STATS ++ ++static spl_t vfile_schedstat_lock_s; ++ ++static int vfile_schedstat_get_lock(struct xnvfile *vfile) ++{ ++ int ret; ++ ++ ret = xnintr_get_query_lock(); ++ if (ret < 0) ++ return ret; ++ xnlock_get_irqsave(&nklock, vfile_schedstat_lock_s); ++ return 0; ++} ++ ++static void vfile_schedstat_put_lock(struct xnvfile *vfile) ++{ ++ xnlock_put_irqrestore(&nklock, vfile_schedstat_lock_s); ++ xnintr_put_query_lock(); ++} ++ ++static struct xnvfile_lock_ops vfile_schedstat_lockops = { ++ .get = vfile_schedstat_get_lock, ++ .put = vfile_schedstat_put_lock, ++}; ++ ++struct vfile_schedstat_priv { ++ int irq; ++ struct xnthread *curr; ++ struct xnintr_iterator intr_it; ++}; ++ ++struct vfile_schedstat_data { ++ int cpu; ++ pid_t pid; ++ int state; ++ char name[XNOBJECT_NAME_LEN]; ++ unsigned long ssw; ++ unsigned long csw; ++ unsigned long xsc; ++ unsigned long pf; ++ xnticks_t exectime_period; ++ xnticks_t account_period; ++ xnticks_t exectime_total; ++ struct xnsched_class *sched_class; ++ xnticks_t period; ++ int cprio; ++}; ++ ++static struct xnvfile_snapshot_ops vfile_schedstat_ops; ++ ++static struct xnvfile_snapshot schedstat_vfile = { ++ .privsz = sizeof(struct vfile_schedstat_priv), ++ .datasz = sizeof(struct vfile_schedstat_data), ++ .tag = &nkthreadlist_tag, ++ .ops = &vfile_schedstat_ops, ++ .entry = { .lockops = &vfile_schedstat_lockops }, ++}; ++ ++static int vfile_schedstat_rewind(struct xnvfile_snapshot_iterator *it) ++{ ++ struct vfile_schedstat_priv *priv = xnvfile_iterator_priv(it); ++ int irqnr; ++ ++ /* ++ * The activity numbers on each valid interrupt descriptor are ++ * grouped under a pseudo-thread. ++ */ ++ priv->curr = list_first_entry(&nkthreadq, struct xnthread, glink); ++ priv->irq = 0; ++ irqnr = xnintr_query_init(&priv->intr_it) * num_online_cpus(); ++ ++ return irqnr + cobalt_nrthreads; ++} ++ ++static int vfile_schedstat_next(struct xnvfile_snapshot_iterator *it, ++ void *data) ++{ ++ struct vfile_schedstat_priv *priv = xnvfile_iterator_priv(it); ++ struct vfile_schedstat_data *p = data; ++ struct xnthread *thread; ++ struct xnsched *sched; ++ xnticks_t period; ++ int __maybe_unused ret; ++ ++ if (priv->curr == NULL) ++ /* ++ * We are done with actual threads, scan interrupt ++ * descriptors. ++ */ ++ goto scan_irqs; ++ ++ thread = priv->curr; ++ if (list_is_last(&thread->glink, &nkthreadq)) ++ priv->curr = NULL; ++ else ++ priv->curr = list_next_entry(thread, glink); ++ ++ sched = thread->sched; ++ p->cpu = xnsched_cpu(sched); ++ p->pid = xnthread_host_pid(thread); ++ memcpy(p->name, thread->name, sizeof(p->name)); ++ p->state = xnthread_get_state(thread); ++ if (thread->lock_count > 0) ++ p->state |= XNLOCK; ++ p->ssw = xnstat_counter_get(&thread->stat.ssw); ++ p->csw = xnstat_counter_get(&thread->stat.csw); ++ p->xsc = xnstat_counter_get(&thread->stat.xsc); ++ p->pf = xnstat_counter_get(&thread->stat.pf); ++ p->sched_class = thread->sched_class; ++ p->cprio = thread->cprio; ++ p->period = xnthread_get_period(thread); ++ ++ period = sched->last_account_switch - thread->stat.lastperiod.start; ++ if (period == 0 && thread == sched->curr) { ++ p->exectime_period = 1; ++ p->account_period = 1; ++ } else { ++ p->exectime_period = thread->stat.account.total - ++ thread->stat.lastperiod.total; ++ p->account_period = period; ++ } ++ p->exectime_total = thread->stat.account.total; ++ thread->stat.lastperiod.total = thread->stat.account.total; ++ thread->stat.lastperiod.start = sched->last_account_switch; ++ ++ return 1; ++ ++scan_irqs: ++#ifdef CONFIG_XENO_OPT_STATS_IRQS ++ if (priv->irq >= IPIPE_NR_IRQS) ++ return 0; /* All done. */ ++ ++ ret = xnintr_query_next(priv->irq, &priv->intr_it, p->name); ++ if (ret) { ++ if (ret == -EAGAIN) ++ xnvfile_touch(it->vfile); /* force rewind. */ ++ priv->irq++; ++ return VFILE_SEQ_SKIP; ++ } ++ ++ if (!xnsched_supported_cpu(priv->intr_it.cpu)) ++ return VFILE_SEQ_SKIP; ++ ++ p->cpu = priv->intr_it.cpu; ++ p->csw = priv->intr_it.hits; ++ p->exectime_period = priv->intr_it.exectime_period; ++ p->account_period = priv->intr_it.account_period; ++ p->exectime_total = priv->intr_it.exectime_total; ++ p->pid = 0; ++ p->state = 0; ++ p->ssw = 0; ++ p->xsc = 0; ++ p->pf = 0; ++ p->sched_class = &xnsched_class_idle; ++ p->cprio = 0; ++ p->period = 0; ++ ++ return 1; ++#else /* !CONFIG_XENO_OPT_STATS_IRQS */ ++ return 0; ++#endif /* !CONFIG_XENO_OPT_STATS_IRQS */ ++} ++ ++static int vfile_schedstat_show(struct xnvfile_snapshot_iterator *it, ++ void *data) ++{ ++ struct vfile_schedstat_data *p = data; ++ int usage = 0; ++ ++ if (p == NULL) ++ xnvfile_printf(it, ++ "%-3s %-6s %-10s %-10s %-10s %-4s %-8s %5s" ++ " %s\n", ++ "CPU", "PID", "MSW", "CSW", "XSC", "PF", "STAT", "%CPU", ++ "NAME"); ++ else { ++ if (p->account_period) { ++ while (p->account_period > 0xffffffffUL) { ++ p->exectime_period >>= 16; ++ p->account_period >>= 16; ++ } ++ usage = xnarch_ulldiv(p->exectime_period * 1000LL + ++ (p->account_period >> 1), ++ p->account_period, NULL); ++ } ++ xnvfile_printf(it, ++ "%3u %-6d %-10lu %-10lu %-10lu %-4lu %.8x %3u.%u" ++ " %s%s%s\n", ++ p->cpu, p->pid, p->ssw, p->csw, p->xsc, p->pf, p->state, ++ usage / 10, usage % 10, ++ (p->state & XNUSER) ? "" : "[", ++ p->name, ++ (p->state & XNUSER) ? "" : "]"); ++ } ++ ++ return 0; ++} ++ ++static int vfile_schedacct_show(struct xnvfile_snapshot_iterator *it, ++ void *data) ++{ ++ struct vfile_schedstat_data *p = data; ++ ++ if (p == NULL) ++ return 0; ++ ++ xnvfile_printf(it, "%u %d %lu %lu %lu %lu %.8x %Lu %Lu %Lu %s %s %d %Lu\n", ++ p->cpu, p->pid, p->ssw, p->csw, p->xsc, p->pf, p->state, ++ xnclock_ticks_to_ns(&nkclock, p->account_period), ++ xnclock_ticks_to_ns(&nkclock, p->exectime_period), ++ xnclock_ticks_to_ns(&nkclock, p->exectime_total), ++ p->name, ++ p->sched_class->name, ++ p->cprio, ++ p->period); ++ ++ return 0; ++} ++ ++static struct xnvfile_snapshot_ops vfile_schedstat_ops = { ++ .rewind = vfile_schedstat_rewind, ++ .next = vfile_schedstat_next, ++ .show = vfile_schedstat_show, ++}; ++ ++/* ++ * An accounting vfile is a thread statistics vfile in disguise with a ++ * different output format, which is parser-friendly. ++ */ ++static struct xnvfile_snapshot_ops vfile_schedacct_ops; ++ ++static struct xnvfile_snapshot schedacct_vfile = { ++ .privsz = sizeof(struct vfile_schedstat_priv), ++ .datasz = sizeof(struct vfile_schedstat_data), ++ .tag = &nkthreadlist_tag, ++ .ops = &vfile_schedacct_ops, ++}; ++ ++static struct xnvfile_snapshot_ops vfile_schedacct_ops = { ++ .rewind = vfile_schedstat_rewind, ++ .next = vfile_schedstat_next, ++ .show = vfile_schedacct_show, ++}; ++ ++#endif /* CONFIG_XENO_OPT_STATS */ ++ ++#ifdef CONFIG_SMP ++ ++static int affinity_vfile_show(struct xnvfile_regular_iterator *it, ++ void *data) ++{ ++ unsigned long val = 0; ++ int cpu; ++ ++ for (cpu = 0; cpu < BITS_PER_LONG; cpu++) ++ if (cpumask_test_cpu(cpu, &cobalt_cpu_affinity)) ++ val |= (1UL << cpu); ++ ++ xnvfile_printf(it, "%08lx\n", val); ++ ++ return 0; ++} ++ ++static ssize_t affinity_vfile_store(struct xnvfile_input *input) ++{ ++ cpumask_t affinity, set; ++ ssize_t ret; ++ long val; ++ int cpu; ++ spl_t s; ++ ++ ret = xnvfile_get_integer(input, &val); ++ if (ret < 0) ++ return ret; ++ ++ if (val == 0) ++ affinity = xnsched_realtime_cpus; /* Reset to default. */ ++ else { ++ cpumask_clear(&affinity); ++ for (cpu = 0; cpu < BITS_PER_LONG; cpu++, val >>= 1) { ++ if (val & 1) ++ cpumask_set_cpu(cpu, &affinity); ++ } ++ } ++ ++ cpumask_and(&set, &affinity, cpu_online_mask); ++ if (cpumask_empty(&set)) ++ return -EINVAL; ++ ++ /* ++ * The new dynamic affinity must be a strict subset of the ++ * static set of supported CPUs. ++ */ ++ cpumask_or(&set, &affinity, &xnsched_realtime_cpus); ++ if (!cpumask_equal(&set, &xnsched_realtime_cpus)) ++ return -EINVAL; ++ ++ xnlock_get_irqsave(&nklock, s); ++ cobalt_cpu_affinity = affinity; ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return ret; ++} ++ ++static struct xnvfile_regular_ops affinity_vfile_ops = { ++ .show = affinity_vfile_show, ++ .store = affinity_vfile_store, ++}; ++ ++static struct xnvfile_regular affinity_vfile = { ++ .ops = &affinity_vfile_ops, ++}; ++ ++#endif /* CONFIG_SMP */ ++ ++int xnsched_init_proc(void) ++{ ++ struct xnsched_class *p; ++ int ret; ++ ++ ret = xnvfile_init_dir("sched", &sched_vfroot, &cobalt_vfroot); ++ if (ret) ++ return ret; ++ ++ ret = xnvfile_init_snapshot("threads", &schedlist_vfile, &sched_vfroot); ++ if (ret) ++ return ret; ++ ++ for_each_xnsched_class(p) { ++ if (p->sched_init_vfile) { ++ ret = p->sched_init_vfile(p, &sched_vfroot); ++ if (ret) ++ return ret; ++ } ++ } ++ ++#ifdef CONFIG_XENO_OPT_STATS ++ ret = xnvfile_init_snapshot("stat", &schedstat_vfile, &sched_vfroot); ++ if (ret) ++ return ret; ++ ret = xnvfile_init_snapshot("acct", &schedacct_vfile, &sched_vfroot); ++ if (ret) ++ return ret; ++#endif /* CONFIG_XENO_OPT_STATS */ ++ ++#ifdef CONFIG_SMP ++ xnvfile_init_regular("affinity", &affinity_vfile, &cobalt_vfroot); ++#endif /* CONFIG_SMP */ ++ ++ return 0; ++} ++ ++void xnsched_cleanup_proc(void) ++{ ++ struct xnsched_class *p; ++ ++ for_each_xnsched_class(p) { ++ if (p->sched_cleanup_vfile) ++ p->sched_cleanup_vfile(p); ++ } ++ ++#ifdef CONFIG_SMP ++ xnvfile_destroy_regular(&affinity_vfile); ++#endif /* CONFIG_SMP */ ++#ifdef CONFIG_XENO_OPT_STATS ++ xnvfile_destroy_snapshot(&schedacct_vfile); ++ xnvfile_destroy_snapshot(&schedstat_vfile); ++#endif /* CONFIG_XENO_OPT_STATS */ ++ xnvfile_destroy_snapshot(&schedlist_vfile); ++ xnvfile_destroy_dir(&sched_vfroot); ++} ++ ++#endif /* CONFIG_XENO_OPT_VFILE */ ++ ++/** @} */ +--- linux/kernel/xenomai/select.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/select.c 2021-04-07 16:01:25.864636114 +0800 +@@ -0,0 +1,460 @@ ++/* ++ * Gilles Chanteperdrix ++ * Copyright (C) 2008 Efixo ++ * ++ * Xenomai is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#include ++#include /* For hweight_long */ ++#include ++#include ++#include ++#include ++#include ++ ++/** ++ * @ingroup cobalt_core ++ * @defgroup cobalt_core_select Synchronous I/O multiplexing ++ * ++ * This module implements the services needed for implementing the ++ * POSIX select() service, or any other event multiplexing services. ++ * ++ * Following the implementation of the posix select service, this module defines ++ * three types of events: ++ * - \a XNSELECT_READ meaning that a file descriptor is ready for reading; ++ * - \a XNSELECT_WRITE meaning that a file descriptor is ready for writing; ++ * - \a XNSELECT_EXCEPT meaning that a file descriptor received an exceptional ++ * event. ++ * ++ * It works by defining two structures: ++ * - a @a struct @a xnselect structure, which should be added to every file ++ * descriptor for every event type (read, write, or except); ++ * - a @a struct @a xnselector structure, the selection structure, passed by ++ * the thread calling the xnselect service, where this service does all its ++ * housekeeping. ++ * @{ ++ */ ++ ++static LIST_HEAD(selector_list); ++static int deletion_apc; ++ ++/** ++ * Initialize a @a struct @a xnselect structure. ++ * ++ * This service must be called to initialize a @a struct @a xnselect structure ++ * before it is bound to a selector by the means of xnselect_bind(). ++ * ++ * @param select_block pointer to the xnselect structure to be initialized ++ * ++ * @coretags{task-unrestricted} ++ */ ++void xnselect_init(struct xnselect *select_block) ++{ ++ INIT_LIST_HEAD(&select_block->bindings); ++} ++EXPORT_SYMBOL_GPL(xnselect_init); ++ ++static inline int xnselect_wakeup(struct xnselector *selector) ++{ ++ return xnsynch_flush(&selector->synchbase, 0) == XNSYNCH_RESCHED; ++} ++ ++/** ++ * Bind a file descriptor (represented by its @a xnselect structure) to a ++ * selector block. ++ * ++ * @param select_block pointer to the @a struct @a xnselect to be bound; ++ * ++ * @param binding pointer to a newly allocated (using xnmalloc) @a struct ++ * @a xnselect_binding; ++ * ++ * @param selector pointer to the selector structure; ++ * ++ * @param type type of events (@a XNSELECT_READ, @a XNSELECT_WRITE, or @a ++ * XNSELECT_EXCEPT); ++ * ++ * @param index index of the file descriptor (represented by @a ++ * select_block) in the bit fields used by the @a selector structure; ++ * ++ * @param state current state of the file descriptor. ++ * ++ * @a select_block must have been initialized with xnselect_init(), ++ * the @a xnselector structure must have been initialized with ++ * xnselector_init(), @a binding may be uninitialized. ++ * ++ * This service must be called with nklock locked, irqs off. For this reason, ++ * the @a binding parameter must have been allocated by the caller outside the ++ * locking section. ++ * ++ * @retval -EINVAL if @a type or @a index is invalid; ++ * @retval 0 otherwise. ++ * ++ * @coretags{task-unrestricted, might-switch, atomic-entry} ++ */ ++int xnselect_bind(struct xnselect *select_block, ++ struct xnselect_binding *binding, ++ struct xnselector *selector, ++ unsigned type, ++ unsigned index, ++ unsigned state) ++{ ++ atomic_only(); ++ ++ if (type >= XNSELECT_MAX_TYPES || index > __FD_SETSIZE) ++ return -EINVAL; ++ ++ binding->selector = selector; ++ binding->fd = select_block; ++ binding->type = type; ++ binding->bit_index = index; ++ ++ list_add_tail(&binding->slink, &selector->bindings); ++ list_add_tail(&binding->link, &select_block->bindings); ++ __FD_SET__(index, &selector->fds[type].expected); ++ if (state) { ++ __FD_SET__(index, &selector->fds[type].pending); ++ if (xnselect_wakeup(selector)) ++ xnsched_run(); ++ } else ++ __FD_CLR__(index, &selector->fds[type].pending); ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(xnselect_bind); ++ ++/* Must be called with nklock locked irqs off */ ++int __xnselect_signal(struct xnselect *select_block, unsigned state) ++{ ++ struct xnselect_binding *binding; ++ struct xnselector *selector; ++ int resched = 0; ++ ++ list_for_each_entry(binding, &select_block->bindings, link) { ++ selector = binding->selector; ++ if (state) { ++ if (!__FD_ISSET__(binding->bit_index, ++ &selector->fds[binding->type].pending)) { ++ __FD_SET__(binding->bit_index, ++ &selector->fds[binding->type].pending); ++ if (xnselect_wakeup(selector)) ++ resched = 1; ++ } ++ } else ++ __FD_CLR__(binding->bit_index, ++ &selector->fds[binding->type].pending); ++ } ++ ++ return resched; ++} ++EXPORT_SYMBOL_GPL(__xnselect_signal); ++ ++/** ++ * Destroy the @a xnselect structure associated with a file descriptor. ++ * ++ * Any binding with a @a xnselector block is destroyed. ++ * ++ * @param select_block pointer to the @a xnselect structure associated ++ * with a file descriptor ++ * ++ * @coretags{task-unrestricted, might-switch} ++ */ ++void xnselect_destroy(struct xnselect *select_block) ++{ ++ struct xnselect_binding *binding, *tmp; ++ struct xnselector *selector; ++ int resched = 0; ++ spl_t s; ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ if (list_empty(&select_block->bindings)) ++ goto out; ++ ++ list_for_each_entry_safe(binding, tmp, &select_block->bindings, link) { ++ list_del(&binding->link); ++ selector = binding->selector; ++ __FD_CLR__(binding->bit_index, ++ &selector->fds[binding->type].expected); ++ if (!__FD_ISSET__(binding->bit_index, ++ &selector->fds[binding->type].pending)) { ++ __FD_SET__(binding->bit_index, ++ &selector->fds[binding->type].pending); ++ if (xnselect_wakeup(selector)) ++ resched = 1; ++ } ++ list_del(&binding->slink); ++ xnlock_put_irqrestore(&nklock, s); ++ xnfree(binding); ++ xnlock_get_irqsave(&nklock, s); ++ } ++ if (resched) ++ xnsched_run(); ++out: ++ xnlock_put_irqrestore(&nklock, s); ++} ++EXPORT_SYMBOL_GPL(xnselect_destroy); ++ ++static unsigned ++fd_set_andnot(fd_set *result, fd_set *first, fd_set *second, unsigned n) ++{ ++ unsigned i, not_empty = 0; ++ ++ for (i = 0; i < __FDELT__(n); i++) ++ if((result->fds_bits[i] = ++ first->fds_bits[i] & ~(second->fds_bits[i]))) ++ not_empty = 1; ++ ++ if (i < __FDSET_LONGS__ ++ && (result->fds_bits[i] = ++ first->fds_bits[i] & ~(second->fds_bits[i]) & (__FDMASK__(n) - 1))) ++ not_empty = 1; ++ ++ return not_empty; ++} ++ ++static unsigned ++fd_set_and(fd_set *result, fd_set *first, fd_set *second, unsigned n) ++{ ++ unsigned i, not_empty = 0; ++ ++ for (i = 0; i < __FDELT__(n); i++) ++ if((result->fds_bits[i] = ++ first->fds_bits[i] & second->fds_bits[i])) ++ not_empty = 1; ++ ++ if (i < __FDSET_LONGS__ ++ && (result->fds_bits[i] = ++ first->fds_bits[i] & second->fds_bits[i] & (__FDMASK__(n) - 1))) ++ not_empty = 1; ++ ++ return not_empty; ++} ++ ++static void fd_set_zeropad(fd_set *set, unsigned n) ++{ ++ unsigned i; ++ ++ i = __FDELT__(n); ++ ++ if (i < __FDSET_LONGS__) ++ set->fds_bits[i] &= (__FDMASK__(n) - 1); ++ ++ for(i++; i < __FDSET_LONGS__; i++) ++ set->fds_bits[i] = 0; ++} ++ ++static unsigned fd_set_popcount(fd_set *set, unsigned n) ++{ ++ unsigned count = 0, i; ++ ++ for (i = 0; i < __FDELT__(n); i++) ++ if (set->fds_bits[i]) ++ count += hweight_long(set->fds_bits[i]); ++ ++ if (i < __FDSET_LONGS__ && (set->fds_bits[i] & (__FDMASK__(n) - 1))) ++ count += hweight_long(set->fds_bits[i] & (__FDMASK__(n) - 1)); ++ ++ return count; ++} ++ ++/** ++ * Initialize a selector structure. ++ * ++ * @param selector The selector structure to be initialized. ++ * ++ * @retval 0 ++ * ++ * @coretags{task-unrestricted} ++ */ ++int xnselector_init(struct xnselector *selector) ++{ ++ unsigned int i; ++ ++ xnsynch_init(&selector->synchbase, XNSYNCH_FIFO, NULL); ++ for (i = 0; i < XNSELECT_MAX_TYPES; i++) { ++ __FD_ZERO__(&selector->fds[i].expected); ++ __FD_ZERO__(&selector->fds[i].pending); ++ } ++ INIT_LIST_HEAD(&selector->bindings); ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(xnselector_init); ++ ++/** ++ * Check the state of a number of file descriptors, wait for a state change if ++ * no descriptor is ready. ++ * ++ * @param selector structure to check for pending events ++ * @param out_fds The set of descriptors with pending events if a strictly positive number is returned, or the set of descriptors not yet bound if -ECHRNG is returned; ++ * @param in_fds the set of descriptors which events should be checked ++ * @param nfds the highest-numbered descriptor in any of the @a in_fds sets, plus 1; ++ * @param timeout the timeout, whose meaning depends on @a timeout_mode, note ++ * that xnselect() pass @a timeout and @a timeout_mode unchanged to ++ * xnsynch_sleep_on, so passing a relative value different from XN_INFINITE as a ++ * timeout with @a timeout_mode set to XN_RELATIVE, will cause a longer sleep ++ * than expected if the sleep is interrupted. ++ * @param timeout_mode the mode of @a timeout. ++ * ++ * @retval -EINVAL if @a nfds is negative; ++ * @retval -ECHRNG if some of the descriptors passed in @a in_fds have not yet ++ * been registered with xnselect_bind(), @a out_fds contains the set of such ++ * descriptors; ++ * @retval -EINTR if @a xnselect was interrupted while waiting; ++ * @retval 0 in case of timeout. ++ * @retval the number of file descriptors having received an event. ++ * ++ * @coretags{primary-only, might-switch} ++ */ ++int xnselect(struct xnselector *selector, ++ fd_set *out_fds[XNSELECT_MAX_TYPES], ++ fd_set *in_fds[XNSELECT_MAX_TYPES], ++ int nfds, ++ xnticks_t timeout, xntmode_t timeout_mode) ++{ ++ unsigned int i, not_empty = 0, count; ++ int info = 0; ++ spl_t s; ++ ++ if ((unsigned) nfds > __FD_SETSIZE) ++ return -EINVAL; ++ ++ for (i = 0; i < XNSELECT_MAX_TYPES; i++) ++ if (out_fds[i]) ++ fd_set_zeropad(out_fds[i], nfds); ++ ++ xnlock_get_irqsave(&nklock, s); ++ for (i = 0; i < XNSELECT_MAX_TYPES; i++) ++ if (out_fds[i] ++ && fd_set_andnot(out_fds[i], in_fds[i], ++ &selector->fds[i].expected, nfds)) ++ not_empty = 1; ++ xnlock_put_irqrestore(&nklock, s); ++ ++ if (not_empty) ++ return -ECHRNG; ++ ++ xnlock_get_irqsave(&nklock, s); ++ for (i = 0; i < XNSELECT_MAX_TYPES; i++) ++ if (out_fds[i] ++ && fd_set_and(out_fds[i], in_fds[i], ++ &selector->fds[i].pending, nfds)) ++ not_empty = 1; ++ ++ while (!not_empty) { ++ info = xnsynch_sleep_on(&selector->synchbase, ++ timeout, timeout_mode); ++ ++ for (i = 0; i < XNSELECT_MAX_TYPES; i++) ++ if (out_fds[i] ++ && fd_set_and(out_fds[i], in_fds[i], ++ &selector->fds[i].pending, nfds)) ++ not_empty = 1; ++ ++ if (info & (XNBREAK | XNTIMEO)) ++ break; ++ } ++ xnlock_put_irqrestore(&nklock, s); ++ ++ if (not_empty) { ++ for (count = 0, i = 0; i < XNSELECT_MAX_TYPES; i++) ++ if (out_fds[i]) ++ count += fd_set_popcount(out_fds[i], nfds); ++ ++ return count; ++ } ++ ++ if (info & XNBREAK) ++ return -EINTR; ++ ++ return 0; /* Timeout */ ++} ++EXPORT_SYMBOL_GPL(xnselect); ++ ++/** ++ * Destroy a selector block. ++ * ++ * All bindings with file descriptor are destroyed. ++ * ++ * @param selector the selector block to be destroyed ++ * ++ * @coretags{task-unrestricted} ++ */ ++void xnselector_destroy(struct xnselector *selector) ++{ ++ spl_t s; ++ ++ xnlock_get_irqsave(&nklock, s); ++ list_add_tail(&selector->destroy_link, &selector_list); ++ __xnapc_schedule(deletion_apc); ++ xnlock_put_irqrestore(&nklock, s); ++} ++EXPORT_SYMBOL_GPL(xnselector_destroy); ++ ++static void xnselector_destroy_loop(void *cookie) ++{ ++ struct xnselect_binding *binding, *tmpb; ++ struct xnselector *selector, *tmps; ++ struct xnselect *fd; ++ spl_t s; ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ if (list_empty(&selector_list)) ++ goto out; ++ ++ list_for_each_entry_safe(selector, tmps, &selector_list, destroy_link) { ++ list_del(&selector->destroy_link); ++ if (list_empty(&selector->bindings)) ++ goto release; ++ list_for_each_entry_safe(binding, tmpb, &selector->bindings, slink) { ++ list_del(&binding->slink); ++ fd = binding->fd; ++ list_del(&binding->link); ++ xnlock_put_irqrestore(&nklock, s); ++ xnfree(binding); ++ xnlock_get_irqsave(&nklock, s); ++ } ++ release: ++ xnsynch_destroy(&selector->synchbase); ++ xnsched_run(); ++ xnlock_put_irqrestore(&nklock, s); ++ ++ xnfree(selector); ++ ++ xnlock_get_irqsave(&nklock, s); ++ } ++out: ++ xnlock_put_irqrestore(&nklock, s); ++} ++ ++int xnselect_mount(void) ++{ ++ deletion_apc = xnapc_alloc("selector_list_destroy", ++ xnselector_destroy_loop, NULL); ++ if (deletion_apc < 0) ++ return deletion_apc; ++ ++ return 0; ++} ++ ++int xnselect_umount(void) ++{ ++ xnapc_free(deletion_apc); ++ return 0; ++} ++ ++/** @} */ +--- linux/kernel/xenomai/bufd.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/bufd.c 2021-04-07 16:01:25.859636121 +0800 +@@ -0,0 +1,653 @@ ++/* ++ * Copyright (C) 2009 Philippe Gerum . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#include ++#include ++#include ++#include ++#include ++ ++/** ++ * @ingroup cobalt_core ++ * @defgroup cobalt_core_bufd Buffer descriptor ++ * ++ * Abstraction for copying data to/from different address spaces ++ * ++ * A buffer descriptor is a simple abstraction dealing with copy ++ * operations to/from memory buffers which may belong to different ++ * address spaces. ++ * ++ * To this end, the buffer descriptor library provides a small set of ++ * copy routines which are aware of address space restrictions when ++ * moving data, and a generic container type which can hold a ++ * reference to - or cover - a particular memory area, either present ++ * in kernel space, or in any of the existing user memory contexts. ++ * ++ * The goal of the buffer descriptor abstraction is to hide address ++ * space specifics from Xenomai services dealing with memory areas, ++ * allowing them to operate on multiple address spaces seamlessly. ++ * ++ * The common usage patterns are as follows: ++ * ++ * - Implementing a Xenomai syscall returning a bulk of data to the ++ * caller, which may have to be copied back to either kernel or user ++ * space: ++ * ++ * @code ++ * [Syscall implementation] ++ * ssize_t rt_bulk_read_inner(struct xnbufd *bufd) ++ * { ++ * ssize_t ret; ++ * size_t len; ++ * void *bulk; ++ * ++ * bulk = get_next_readable_bulk(&len); ++ * ret = xnbufd_copy_from_kmem(bufd, bulk, min(bufd->b_len, len)); ++ * free_bulk(bulk); ++ * ++ * ret = this_may_fail(); ++ * if (ret) ++ * xnbufd_invalidate(bufd); ++ * ++ * return ret; ++ * } ++ * ++ * [Kernel wrapper for in-kernel calls] ++ * int rt_bulk_read(void *ptr, size_t len) ++ * { ++ * struct xnbufd bufd; ++ * ssize_t ret; ++ * ++ * xnbufd_map_kwrite(&bufd, ptr, len); ++ * ret = rt_bulk_read_inner(&bufd); ++ * xnbufd_unmap_kwrite(&bufd); ++ * ++ * return ret; ++ * } ++ * ++ * [Userland trampoline for user syscalls] ++ * int __rt_bulk_read(struct pt_regs *regs) ++ * { ++ * struct xnbufd bufd; ++ * void __user *ptr; ++ * ssize_t ret; ++ * size_t len; ++ * ++ * ptr = (void __user *)__xn_reg_arg1(regs); ++ * len = __xn_reg_arg2(regs); ++ * ++ * xnbufd_map_uwrite(&bufd, ptr, len); ++ * ret = rt_bulk_read_inner(&bufd); ++ * xnbufd_unmap_uwrite(&bufd); ++ * ++ * return ret; ++ * } ++ * @endcode ++ * ++ * - Implementing a Xenomai syscall receiving a bulk of data from the ++ * caller, which may have to be read from either kernel or user ++ * space: ++ * ++ * @code ++ * [Syscall implementation] ++ * ssize_t rt_bulk_write_inner(struct xnbufd *bufd) ++ * { ++ * void *bulk = get_free_bulk(bufd->b_len); ++ * return xnbufd_copy_to_kmem(bulk, bufd, bufd->b_len); ++ * } ++ * ++ * [Kernel wrapper for in-kernel calls] ++ * int rt_bulk_write(const void *ptr, size_t len) ++ * { ++ * struct xnbufd bufd; ++ * ssize_t ret; ++ * ++ * xnbufd_map_kread(&bufd, ptr, len); ++ * ret = rt_bulk_write_inner(&bufd); ++ * xnbufd_unmap_kread(&bufd); ++ * ++ * return ret; ++ * } ++ * ++ * [Userland trampoline for user syscalls] ++ * int __rt_bulk_write(struct pt_regs *regs) ++ * { ++ * struct xnbufd bufd; ++ * void __user *ptr; ++ * ssize_t ret; ++ * size_t len; ++ * ++ * ptr = (void __user *)__xn_reg_arg1(regs); ++ * len = __xn_reg_arg2(regs); ++ * ++ * xnbufd_map_uread(&bufd, ptr, len); ++ * ret = rt_bulk_write_inner(&bufd); ++ * xnbufd_unmap_uread(&bufd); ++ * ++ * return ret; ++ * } ++ * @endcode ++ * ++ *@{*/ ++ ++/** ++ * @fn void xnbufd_map_kread(struct xnbufd *bufd, const void *ptr, size_t len) ++ * @brief Initialize a buffer descriptor for reading from kernel memory. ++ * ++ * The new buffer descriptor may be used to copy data from kernel ++ * memory. This routine should be used in pair with ++ * xnbufd_unmap_kread(). ++ * ++ * @param bufd The address of the buffer descriptor which will map a ++ * @a len bytes kernel memory area, starting from @a ptr. ++ * ++ * @param ptr The start of the kernel buffer to map. ++ * ++ * @param len The length of the kernel buffer starting at @a ptr. ++ * ++ * @coretags{unrestricted} ++ */ ++ ++/** ++ * @fn void xnbufd_map_kwrite(struct xnbufd *bufd, void *ptr, size_t len) ++ * @brief Initialize a buffer descriptor for writing to kernel memory. ++ * ++ * The new buffer descriptor may be used to copy data to kernel ++ * memory. This routine should be used in pair with ++ * xnbufd_unmap_kwrite(). ++ * ++ * @param bufd The address of the buffer descriptor which will map a ++ * @a len bytes kernel memory area, starting from @a ptr. ++ * ++ * @param ptr The start of the kernel buffer to map. ++ * ++ * @param len The length of the kernel buffer starting at @a ptr. ++ * ++ * @coretags{unrestricted} ++ */ ++void xnbufd_map_kmem(struct xnbufd *bufd, void *ptr, size_t len) ++{ ++ bufd->b_ptr = ptr; ++ bufd->b_len = len; ++ bufd->b_mm = NULL; ++ bufd->b_off = 0; ++ bufd->b_carry = NULL; ++} ++EXPORT_SYMBOL_GPL(xnbufd_map_kmem); ++ ++/** ++ * @fn void xnbufd_map_uread(struct xnbufd *bufd, const void __user *ptr, size_t len) ++ * @brief Initialize a buffer descriptor for reading from user memory. ++ * ++ * The new buffer descriptor may be used to copy data from user ++ * memory. This routine should be used in pair with ++ * xnbufd_unmap_uread(). ++ * ++ * @param bufd The address of the buffer descriptor which will map a ++ * @a len bytes user memory area, starting from @a ptr. @a ptr is ++ * never dereferenced directly, since it may refer to a buffer that ++ * lives in another address space. ++ * ++ * @param ptr The start of the user buffer to map. ++ * ++ * @param len The length of the user buffer starting at @a ptr. ++ * ++ * @coretags{task-unrestricted} ++ */ ++ ++/** ++ * @fn void xnbufd_map_uwrite(struct xnbufd *bufd, void __user *ptr, size_t len) ++ * @brief Initialize a buffer descriptor for writing to user memory. ++ * ++ * The new buffer descriptor may be used to copy data to user ++ * memory. This routine should be used in pair with ++ * xnbufd_unmap_uwrite(). ++ * ++ * @param bufd The address of the buffer descriptor which will map a ++ * @a len bytes user memory area, starting from @a ptr. @a ptr is ++ * never dereferenced directly, since it may refer to a buffer that ++ * lives in another address space. ++ * ++ * @param ptr The start of the user buffer to map. ++ * ++ * @param len The length of the user buffer starting at @a ptr. ++ * ++ * @coretags{task-unrestricted} ++ */ ++ ++void xnbufd_map_umem(struct xnbufd *bufd, void __user *ptr, size_t len) ++{ ++ bufd->b_ptr = ptr; ++ bufd->b_len = len; ++ bufd->b_mm = current->mm; ++ bufd->b_off = 0; ++ bufd->b_carry = NULL; ++} ++EXPORT_SYMBOL_GPL(xnbufd_map_umem); ++ ++/** ++ * @fn ssize_t xnbufd_copy_to_kmem(void *to, struct xnbufd *bufd, size_t len) ++ * @brief Copy memory covered by a buffer descriptor to kernel memory. ++ * ++ * This routine copies @a len bytes from the area referred to by the ++ * buffer descriptor @a bufd to the kernel memory area @a to. ++ * xnbufd_copy_to_kmem() tracks the read offset within the source ++ * memory internally, so that it may be called several times in a ++ * loop, until the entire memory area is loaded. ++ * ++ * The source address space is dealt with, according to the following ++ * rules: ++ * ++ * - if @a bufd refers to readable kernel area (i.e. see ++ * xnbufd_map_kread()), the copy is immediately and fully performed ++ * with no restriction. ++ * ++ * - if @a bufd refers to a readable user area (i.e. see ++ * xnbufd_map_uread()), the copy is performed only if that area ++ * lives in the currently active address space, and only if the ++ * caller may sleep Linux-wise to process any potential page fault ++ * which may arise while reading from that memory. ++ * ++ * - any attempt to read from @a bufd from a non-suitable context is ++ * considered as a bug, and will raise a panic assertion when the ++ * nucleus is compiled in debug mode. ++ * ++ * @param to The start address of the kernel memory to copy to. ++ * ++ * @param bufd The address of the buffer descriptor covering the user ++ * memory to copy data from. ++ * ++ * @param len The length of the user memory to copy from @a bufd. ++ * ++ * @return The number of bytes read so far from the memory area ++ * covered by @a ubufd. Otherwise: ++ * ++ * - -EINVAL is returned upon attempt to read from the user area from ++ * an invalid context. This error is only returned when the debug ++ * mode is disabled; otherwise a panic assertion is raised. ++ * ++ * @coretags{task-unrestricted} ++ * ++ * @note Calling this routine while holding the nklock and/or running ++ * with interrupts disabled is invalid, and doing so will trigger a ++ * debug assertion. ++ * ++ * This routine may switch the caller to secondary mode if a page ++ * fault occurs while reading from the user area. For that reason, ++ * xnbufd_copy_to_kmem() may only be called from a preemptible section ++ * (Linux-wise). ++ */ ++ssize_t xnbufd_copy_to_kmem(void *to, struct xnbufd *bufd, size_t len) ++{ ++ caddr_t from; ++ ++ thread_only(); ++ ++ if (len == 0) ++ goto out; ++ ++ from = bufd->b_ptr + bufd->b_off; ++ ++ /* ++ * If the descriptor covers a source buffer living in the ++ * kernel address space, we may read from it directly. ++ */ ++ if (bufd->b_mm == NULL) { ++ memcpy(to, from, len); ++ goto advance_offset; ++ } ++ ++ /* ++ * We want to read data from user-space, check whether: ++ * 1) the source buffer lies in the current address space, ++ * 2) we may fault while reading from the buffer directly. ++ * ++ * If we can't reach the buffer, or the current context may ++ * not fault while reading data from it, copy_from_user() is ++ * not an option and we have a bug somewhere, since there is ++ * no way we could fetch the data to kernel space immediately. ++ * ++ * Note that we don't check for non-preemptible Linux context ++ * here, since the source buffer would live in kernel space in ++ * such a case. ++ */ ++ if (current->mm == bufd->b_mm) { ++ preemptible_only(); ++ if (cobalt_copy_from_user(to, (void __user *)from, len)) ++ return -EFAULT; ++ goto advance_offset; ++ } ++ ++ XENO_BUG(COBALT); ++ ++ return -EINVAL; ++ ++advance_offset: ++ bufd->b_off += len; ++out: ++ return (ssize_t)bufd->b_off; ++} ++EXPORT_SYMBOL_GPL(xnbufd_copy_to_kmem); ++ ++/** ++ * @fn ssize_t xnbufd_copy_from_kmem(struct xnbufd *bufd, void *from, size_t len) ++ * @brief Copy kernel memory to the area covered by a buffer descriptor. ++ * ++ * This routine copies @a len bytes from the kernel memory starting at ++ * @a from to the area referred to by the buffer descriptor @a ++ * bufd. xnbufd_copy_from_kmem() tracks the write offset within the ++ * destination memory internally, so that it may be called several ++ * times in a loop, until the entire memory area is stored. ++ * ++ * The destination address space is dealt with, according to the ++ * following rules: ++ * ++ * - if @a bufd refers to a writable kernel area (i.e. see ++ * xnbufd_map_kwrite()), the copy is immediatly and fully performed ++ * with no restriction. ++ * ++ * - if @a bufd refers to a writable user area (i.e. see ++ * xnbufd_map_uwrite()), the copy is performed only if that area ++ * lives in the currently active address space, and only if the ++ * caller may sleep Linux-wise to process any potential page fault ++ * which may arise while writing to that memory. ++ * ++ * - if @a bufd refers to a user area which may not be immediately ++ * written to from the current context, the copy is postponed until ++ * xnbufd_unmap_uwrite() is invoked for @a ubufd, at which point the ++ * copy will take place. In such a case, the source memory is ++ * transferred to a carry over buffer allocated internally; this ++ * operation may lead to request dynamic memory from the nucleus ++ * heap if @a len is greater than 64 bytes. ++ * ++ * @param bufd The address of the buffer descriptor covering the user ++ * memory to copy data to. ++ * ++ * @param from The start address of the kernel memory to copy from. ++ * ++ * @param len The length of the kernel memory to copy to @a bufd. ++ * ++ * @return The number of bytes written so far to the memory area ++ * covered by @a ubufd. Otherwise, ++ * ++ * - -ENOMEM is returned when no memory is available from the nucleus ++ * heap to allocate the carry over buffer. ++ * ++ * @coretags{unrestricted} ++ * ++ * @note Calling this routine while holding the nklock and/or running ++ * with interrupts disabled is invalid, and doing so will trigger a ++ * debug assertion. ++ * ++ * This routine may switch the caller to secondary mode if a page ++ * fault occurs while reading from the user area. For that reason, ++ * xnbufd_copy_to_kmem() may only be called from a preemptible section ++ * (Linux-wise). ++ */ ++ssize_t xnbufd_copy_from_kmem(struct xnbufd *bufd, void *from, size_t len) ++{ ++ caddr_t to; ++ ++ thread_only(); ++ ++ if (len == 0) ++ goto out; ++ ++ to = bufd->b_ptr + bufd->b_off; ++ ++ /* ++ * If the descriptor covers a destination buffer living in the ++ * kernel address space, we may copy to it directly. ++ */ ++ if (bufd->b_mm == NULL) ++ goto direct_copy; ++ ++ /* ++ * We want to pass data to user-space, check whether: ++ * 1) the destination buffer lies in the current address space, ++ * 2) we may fault while writing to the buffer directly. ++ * ++ * If we can't reach the buffer, or the current context may ++ * not fault while copying data to it, copy_to_user() is not ++ * an option and we have to convey the data from kernel memory ++ * through the carry over buffer. ++ * ++ * Note that we don't check for non-preemptible Linux context ++ * here: feeding a RT activity with data from a non-RT context ++ * is wrong in the first place, so never mind. ++ */ ++ if (current->mm == bufd->b_mm) { ++ preemptible_only(); ++ if (cobalt_copy_to_user((void __user *)to, from, len)) ++ return -EFAULT; ++ goto advance_offset; ++ } ++ ++ /* ++ * We need a carry over buffer to convey the data to ++ * user-space. xnbufd_unmap_uwrite() should be called on the ++ * way back to user-space to update the destination buffer ++ * from the carry over area. ++ */ ++ if (bufd->b_carry == NULL) { ++ /* ++ * Try to use the fast carry over area available ++ * directly from the descriptor for short messages, to ++ * save a dynamic allocation request. ++ */ ++ if (bufd->b_len <= sizeof(bufd->b_buf)) ++ bufd->b_carry = bufd->b_buf; ++ else { ++ bufd->b_carry = xnmalloc(bufd->b_len); ++ if (bufd->b_carry == NULL) ++ return -ENOMEM; ++ } ++ to = bufd->b_carry; ++ } else ++ to = bufd->b_carry + bufd->b_off; ++ ++direct_copy: ++ memcpy(to, from, len); ++ ++advance_offset: ++ bufd->b_off += len; ++out: ++ return (ssize_t)bufd->b_off; ++} ++EXPORT_SYMBOL_GPL(xnbufd_copy_from_kmem); ++ ++/** ++ * @fn void xnbufd_unmap_uread(struct xnbufd *bufd) ++ * @brief Finalize a buffer descriptor obtained from xnbufd_map_uread(). ++ * ++ * This routine finalizes a buffer descriptor previously initialized ++ * by a call to xnbufd_map_uread(), to read data from a user area. ++ * ++ * @param bufd The address of the buffer descriptor to finalize. ++ * ++ * @return The number of bytes read so far from the memory area ++ * covered by @a ubufd. ++ * ++ * @coretags{task-unrestricted} ++ * ++ * @note Calling this routine while holding the nklock and/or running ++ * with interrupts disabled is invalid, and doing so will trigger a ++ * debug assertion. ++ */ ++ssize_t xnbufd_unmap_uread(struct xnbufd *bufd) ++{ ++ preemptible_only(); ++ ++#ifdef CONFIG_XENO_OPT_DEBUG_COBALT ++ bufd->b_ptr = (caddr_t)-1; ++#endif ++ return bufd->b_off; ++} ++EXPORT_SYMBOL_GPL(xnbufd_unmap_uread); ++ ++/** ++ * @fn void xnbufd_unmap_uwrite(struct xnbufd *bufd) ++ * @brief Finalize a buffer descriptor obtained from xnbufd_map_uwrite(). ++ * ++ * This routine finalizes a buffer descriptor previously initialized ++ * by a call to xnbufd_map_uwrite(), to write data to a user area. ++ * ++ * The main action taken is to write the contents of the kernel memory ++ * area passed to xnbufd_copy_from_kmem() whenever the copy operation ++ * was postponed at that time; the carry over buffer is eventually ++ * released as needed. If xnbufd_copy_from_kmem() was allowed to copy ++ * to the destination user memory at once, then xnbufd_unmap_uwrite() ++ * leads to a no-op. ++ * ++ * @param bufd The address of the buffer descriptor to finalize. ++ * ++ * @return The number of bytes written so far to the memory area ++ * covered by @a ubufd. ++ * ++ * @coretags{task-unrestricted} ++ * ++ * @note Calling this routine while holding the nklock and/or running ++ * with interrupts disabled is invalid, and doing so will trigger a ++ * debug assertion. ++ */ ++ssize_t xnbufd_unmap_uwrite(struct xnbufd *bufd) ++{ ++ ssize_t ret = 0; ++ void __user *to; ++ void *from; ++ size_t len; ++ ++ preemptible_only(); ++ ++ len = bufd->b_off; ++ ++ if (bufd->b_carry == NULL) ++ /* Copy took place directly. Fine. */ ++ goto done; ++ ++ /* ++ * Something was written to the carry over area, copy the ++ * contents to user-space, then release the area if needed. ++ */ ++ to = (void __user *)bufd->b_ptr; ++ from = bufd->b_carry; ++ ret = cobalt_copy_to_user(to, from, len); ++ ++ if (bufd->b_len > sizeof(bufd->b_buf)) ++ xnfree(bufd->b_carry); ++done: ++#ifdef CONFIG_XENO_OPT_DEBUG_COBALT ++ bufd->b_ptr = (caddr_t)-1; ++#endif ++ return ret ?: (ssize_t)len; ++} ++EXPORT_SYMBOL_GPL(xnbufd_unmap_uwrite); ++ ++/** ++ * @fn void xnbufd_reset(struct xnbufd *bufd) ++ * @brief Reset a buffer descriptor. ++ * ++ * The buffer descriptor is reset, so that all data already copied is ++ * forgotten. Any carry over buffer allocated is kept, though. ++ * ++ * @param bufd The address of the buffer descriptor to reset. ++ * ++ * @coretags{unrestricted} ++ */ ++ ++/** ++ * @fn void xnbufd_invalidate(struct xnbufd *bufd) ++ * @brief Invalidate a buffer descriptor. ++ * ++ * The buffer descriptor is invalidated, making it unusable for ++ * further copy operations. If an outstanding carry over buffer was ++ * allocated by a previous call to xnbufd_copy_from_kmem(), it is ++ * immediately freed so that no data transfer will happen when the ++ * descriptor is finalized. ++ * ++ * The only action that may subsequently be performed on an ++ * invalidated descriptor is calling the relevant unmapping routine ++ * for it. For that reason, xnbufd_invalidate() should be invoked on ++ * the error path when data may have been transferred to the carry ++ * over buffer. ++ * ++ * @param bufd The address of the buffer descriptor to invalidate. ++ * ++ * @coretags{unrestricted} ++ */ ++void xnbufd_invalidate(struct xnbufd *bufd) ++{ ++#ifdef CONFIG_XENO_OPT_DEBUG_COBALT ++ bufd->b_ptr = (caddr_t)-1; ++#endif ++ if (bufd->b_carry) { ++ if (bufd->b_len > sizeof(bufd->b_buf)) ++ xnfree(bufd->b_carry); ++ bufd->b_carry = NULL; ++ } ++ bufd->b_off = 0; ++} ++EXPORT_SYMBOL_GPL(xnbufd_invalidate); ++ ++/** ++ * @fn void xnbufd_unmap_kread(struct xnbufd *bufd) ++ * @brief Finalize a buffer descriptor obtained from xnbufd_map_kread(). ++ * ++ * This routine finalizes a buffer descriptor previously initialized ++ * by a call to xnbufd_map_kread(), to read data from a kernel area. ++ * ++ * @param bufd The address of the buffer descriptor to finalize. ++ * ++ * @return The number of bytes read so far from the memory area ++ * covered by @a ubufd. ++ * ++ * @coretags{task-unrestricted} ++ */ ++ssize_t xnbufd_unmap_kread(struct xnbufd *bufd) ++{ ++#ifdef CONFIG_XENO_OPT_DEBUG_COBALT ++ bufd->b_ptr = (caddr_t)-1; ++#endif ++ return bufd->b_off; ++} ++EXPORT_SYMBOL_GPL(xnbufd_unmap_kread); ++ ++/** ++ * @fn void xnbufd_unmap_kwrite(struct xnbufd *bufd) ++ * @brief Finalize a buffer descriptor obtained from xnbufd_map_kwrite(). ++ * ++ * This routine finalizes a buffer descriptor previously initialized ++ * by a call to xnbufd_map_kwrite(), to write data to a kernel area. ++ * ++ * @param bufd The address of the buffer descriptor to finalize. ++ * ++ * @return The number of bytes written so far to the memory area ++ * covered by @a ubufd. ++ * ++ * @coretags{task-unrestricted} ++ */ ++ssize_t xnbufd_unmap_kwrite(struct xnbufd *bufd) ++{ ++#ifdef CONFIG_XENO_OPT_DEBUG_COBALT ++ bufd->b_ptr = (caddr_t)-1; ++#endif ++ return bufd->b_off; ++} ++EXPORT_SYMBOL_GPL(xnbufd_unmap_kwrite); ++ ++/** @} */ +--- linux/kernel/xenomai/sched-quota.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/sched-quota.c 2021-04-07 16:01:25.854636128 +0800 +@@ -0,0 +1,835 @@ ++/* ++ * Copyright (C) 2013 Philippe Gerum . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#include ++#include ++#include ++#include ++#include ++ ++/* ++ * With this policy, each per-CPU scheduler slot maintains a list of ++ * active thread groups, picking from the sched_rt runqueue. ++ * ++ * Each time a thread is picked from the runqueue, we check whether we ++ * still have budget for running it, looking at the group it belongs ++ * to. If so, a timer is armed to elapse when that group has no more ++ * budget, would the incoming thread run unpreempted until then ++ * (i.e. xnsched_quota->limit_timer). ++ * ++ * Otherwise, if no budget remains in the group for running the ++ * candidate thread, we move the latter to a local expiry queue ++ * maintained by the group. This process is done on the fly as we pull ++ * from the runqueue. ++ * ++ * Updating the remaining budget is done each time the Cobalt core ++ * asks for replacing the current thread with the next runnable one, ++ * i.e. xnsched_quota_pick(). There we charge the elapsed run time of ++ * the outgoing thread to the relevant group, and conversely, we check ++ * whether the incoming thread has budget. ++ * ++ * Finally, a per-CPU timer (xnsched_quota->refill_timer) periodically ++ * ticks in the background, in accordance to the defined quota ++ * interval. Thread group budgets get replenished by its handler in ++ * accordance to their respective share, pushing all expired threads ++ * back to the run queue in the same move. ++ * ++ * NOTE: since the core logic enforcing the budget entirely happens in ++ * xnsched_quota_pick(), applying a budget change can be done as ++ * simply as forcing the rescheduling procedure to be invoked asap. As ++ * a result of this, the Cobalt core will ask for the next thread to ++ * run, which means calling xnsched_quota_pick() eventually. ++ * ++ * CAUTION: xnsched_quota_group->nr_active does count both the threads ++ * from that group linked to the sched_rt runqueue, _and_ the threads ++ * moved to the local expiry queue. As a matter of fact, the expired ++ * threads - those for which we consumed all the per-group budget - ++ * are still seen as runnable (i.e. not blocked/suspended) by the ++ * Cobalt core. This only means that the SCHED_QUOTA policy won't pick ++ * them until the corresponding budget is replenished. ++ */ ++static DECLARE_BITMAP(group_map, CONFIG_XENO_OPT_SCHED_QUOTA_NR_GROUPS); ++ ++static inline int group_is_active(struct xnsched_quota_group *tg) ++{ ++ struct xnthread *curr = tg->sched->curr; ++ ++ if (tg->nr_active) ++ return 1; ++ ++ /* ++ * Check whether the current thread belongs to the group, and ++ * is still in running state (XNREADY denotes a thread linked ++ * to the runqueue, in which case tg->nr_active already ++ * accounts for it). ++ */ ++ if (curr->quota == tg && ++ xnthread_test_state(curr, XNREADY|XNTHREAD_BLOCK_BITS) == 0) ++ return 1; ++ ++ return 0; ++} ++ ++static inline void replenish_budget(struct xnsched_quota *qs, ++ struct xnsched_quota_group *tg) ++{ ++ xnticks_t budget_ns, credit_ns; ++ ++ if (tg->quota_ns == tg->quota_peak_ns) { ++ /* ++ * Fast path: we don't accumulate runtime credit. ++ * This includes groups with no runtime limit ++ * (i.e. quota off: quota >= period && quota == peak). ++ */ ++ tg->run_budget_ns = tg->quota_ns; ++ return; ++ } ++ ++ /* ++ * We have to deal with runtime credit accumulation, as the ++ * group may consume more than its base quota during a single ++ * interval, up to a peak duration though (not to monopolize ++ * the CPU). ++ * ++ * - In the simplest case, a group is allotted a new full ++ * budget plus the unconsumed portion of the previous budget, ++ * provided the sum does not exceed the peak quota. ++ * ++ * - When there is too much budget for a single interval ++ * (i.e. above peak quota), we spread the extra time over ++ * multiple intervals through a credit accumulation mechanism. ++ * ++ * - The accumulated credit is dropped whenever a group has no ++ * runnable threads. ++ */ ++ if (!group_is_active(tg)) { ++ /* Drop accumulated credit. */ ++ tg->run_credit_ns = 0; ++ tg->run_budget_ns = tg->quota_ns; ++ return; ++ } ++ ++ budget_ns = tg->run_budget_ns + tg->quota_ns; ++ if (budget_ns > tg->quota_peak_ns) { ++ /* Too much budget, spread it over intervals. */ ++ tg->run_credit_ns += budget_ns - tg->quota_peak_ns; ++ tg->run_budget_ns = tg->quota_peak_ns; ++ } else if (tg->run_credit_ns) { ++ credit_ns = tg->quota_peak_ns - budget_ns; ++ /* Consume the accumulated credit. */ ++ if (tg->run_credit_ns >= credit_ns) ++ tg->run_credit_ns -= credit_ns; ++ else { ++ credit_ns = tg->run_credit_ns; ++ tg->run_credit_ns = 0; ++ } ++ /* Allot extended budget, limited to peak quota. */ ++ tg->run_budget_ns = budget_ns + credit_ns; ++ } else ++ /* No credit, budget was below peak quota. */ ++ tg->run_budget_ns = budget_ns; ++} ++ ++static void quota_refill_handler(struct xntimer *timer) ++{ ++ struct xnsched_quota_group *tg; ++ struct xnthread *thread, *tmp; ++ struct xnsched_quota *qs; ++ struct xnsched *sched; ++ ++ qs = container_of(timer, struct xnsched_quota, refill_timer); ++ XENO_BUG_ON(COBALT, list_empty(&qs->groups)); ++ sched = container_of(qs, struct xnsched, quota); ++ ++ trace_cobalt_schedquota_refill(0); ++ ++ list_for_each_entry(tg, &qs->groups, next) { ++ /* Allot a new runtime budget for the group. */ ++ replenish_budget(qs, tg); ++ ++ if (tg->run_budget_ns == 0 || list_empty(&tg->expired)) ++ continue; ++ /* ++ * For each group living on this CPU, move all expired ++ * threads back to the runqueue. Since those threads ++ * were moved out of the runqueue as we were ++ * considering them for execution, we push them back ++ * in LIFO order to their respective priority group. ++ * The expiry queue is FIFO to keep ordering right ++ * among expired threads. ++ */ ++ list_for_each_entry_safe_reverse(thread, tmp, &tg->expired, quota_expired) { ++ list_del_init(&thread->quota_expired); ++ xnsched_addq(&sched->rt.runnable, thread); ++ } ++ } ++ ++ xnsched_set_self_resched(timer->sched); ++} ++ ++static void quota_limit_handler(struct xntimer *timer) ++{ ++ struct xnsched *sched; ++ ++ sched = container_of(timer, struct xnsched, quota.limit_timer); ++ /* ++ * Force a rescheduling on the return path of the current ++ * interrupt, so that the budget is re-evaluated for the ++ * current group in xnsched_quota_pick(). ++ */ ++ xnsched_set_self_resched(sched); ++} ++ ++static int quota_sum_all(struct xnsched_quota *qs) ++{ ++ struct xnsched_quota_group *tg; ++ int sum; ++ ++ if (list_empty(&qs->groups)) ++ return 0; ++ ++ sum = 0; ++ list_for_each_entry(tg, &qs->groups, next) ++ sum += tg->quota_percent; ++ ++ return sum; ++} ++ ++static void xnsched_quota_init(struct xnsched *sched) ++{ ++ char limiter_name[XNOBJECT_NAME_LEN], refiller_name[XNOBJECT_NAME_LEN]; ++ struct xnsched_quota *qs = &sched->quota; ++ ++ qs->period_ns = CONFIG_XENO_OPT_SCHED_QUOTA_PERIOD * 1000ULL; ++ INIT_LIST_HEAD(&qs->groups); ++ ++#ifdef CONFIG_SMP ++ ksformat(refiller_name, sizeof(refiller_name), ++ "[quota-refill/%u]", sched->cpu); ++ ksformat(limiter_name, sizeof(limiter_name), ++ "[quota-limit/%u]", sched->cpu); ++#else ++ strcpy(refiller_name, "[quota-refill]"); ++ strcpy(limiter_name, "[quota-limit]"); ++#endif ++ xntimer_init(&qs->refill_timer, ++ &nkclock, quota_refill_handler, sched, ++ XNTIMER_IGRAVITY); ++ xntimer_set_name(&qs->refill_timer, refiller_name); ++ ++ xntimer_init(&qs->limit_timer, ++ &nkclock, quota_limit_handler, sched, ++ XNTIMER_IGRAVITY); ++ xntimer_set_name(&qs->limit_timer, limiter_name); ++} ++ ++static bool xnsched_quota_setparam(struct xnthread *thread, ++ const union xnsched_policy_param *p) ++{ ++ struct xnsched_quota_group *tg; ++ struct xnsched_quota *qs; ++ bool effective; ++ ++ xnthread_clear_state(thread, XNWEAK); ++ effective = xnsched_set_effective_priority(thread, p->quota.prio); ++ ++ qs = &thread->sched->quota; ++ list_for_each_entry(tg, &qs->groups, next) { ++ if (tg->tgid != p->quota.tgid) ++ continue; ++ if (thread->quota) { ++ /* Dequeued earlier by our caller. */ ++ list_del(&thread->quota_next); ++ thread->quota->nr_threads--; ++ } ++ ++ trace_cobalt_schedquota_add_thread(tg, thread); ++ ++ thread->quota = tg; ++ list_add(&thread->quota_next, &tg->members); ++ tg->nr_threads++; ++ return effective; ++ } ++ ++ XENO_BUG(COBALT); ++ ++ return false; ++} ++ ++static void xnsched_quota_getparam(struct xnthread *thread, ++ union xnsched_policy_param *p) ++{ ++ p->quota.prio = thread->cprio; ++ p->quota.tgid = thread->quota->tgid; ++} ++ ++static void xnsched_quota_trackprio(struct xnthread *thread, ++ const union xnsched_policy_param *p) ++{ ++ if (p) { ++ /* We should not cross groups during PI boost. */ ++ XENO_WARN_ON(COBALT, ++ thread->base_class == &xnsched_class_quota && ++ thread->quota->tgid != p->quota.tgid); ++ thread->cprio = p->quota.prio; ++ } else ++ thread->cprio = thread->bprio; ++} ++ ++static void xnsched_quota_protectprio(struct xnthread *thread, int prio) ++{ ++ if (prio > XNSCHED_QUOTA_MAX_PRIO) ++ prio = XNSCHED_QUOTA_MAX_PRIO; ++ ++ thread->cprio = prio; ++} ++ ++static int xnsched_quota_chkparam(struct xnthread *thread, ++ const union xnsched_policy_param *p) ++{ ++ struct xnsched_quota_group *tg; ++ struct xnsched_quota *qs; ++ int tgid; ++ ++ if (p->quota.prio < XNSCHED_QUOTA_MIN_PRIO || ++ p->quota.prio > XNSCHED_QUOTA_MAX_PRIO) ++ return -EINVAL; ++ ++ tgid = p->quota.tgid; ++ if (tgid < 0 || tgid >= CONFIG_XENO_OPT_SCHED_QUOTA_NR_GROUPS) ++ return -EINVAL; ++ ++ /* ++ * The group must be managed on the same CPU the thread ++ * currently runs on. ++ */ ++ qs = &thread->sched->quota; ++ list_for_each_entry(tg, &qs->groups, next) { ++ if (tg->tgid == tgid) ++ return 0; ++ } ++ ++ /* ++ * If that group exists nevertheless, we give userland a ++ * specific error code. ++ */ ++ if (test_bit(tgid, group_map)) ++ return -EPERM; ++ ++ return -EINVAL; ++} ++ ++static void xnsched_quota_forget(struct xnthread *thread) ++{ ++ trace_cobalt_schedquota_remove_thread(thread->quota, thread); ++ ++ thread->quota->nr_threads--; ++ XENO_BUG_ON(COBALT, thread->quota->nr_threads < 0); ++ list_del(&thread->quota_next); ++ thread->quota = NULL; ++} ++ ++static void xnsched_quota_kick(struct xnthread *thread) ++{ ++ struct xnsched_quota_group *tg = thread->quota; ++ struct xnsched *sched = thread->sched; ++ ++ /* ++ * Allow a kicked thread to be elected for running until it ++ * relaxes, even if the group it belongs to lacks runtime ++ * budget. ++ */ ++ if (tg->run_budget_ns == 0 && !list_empty(&thread->quota_expired)) { ++ list_del_init(&thread->quota_expired); ++ xnsched_addq_tail(&sched->rt.runnable, thread); ++ } ++} ++ ++static inline int thread_is_runnable(struct xnthread *thread) ++{ ++ return thread->quota->run_budget_ns > 0 || ++ xnthread_test_info(thread, XNKICKED); ++} ++ ++static void xnsched_quota_enqueue(struct xnthread *thread) ++{ ++ struct xnsched_quota_group *tg = thread->quota; ++ struct xnsched *sched = thread->sched; ++ ++ if (!thread_is_runnable(thread)) ++ list_add_tail(&thread->quota_expired, &tg->expired); ++ else ++ xnsched_addq_tail(&sched->rt.runnable, thread); ++ ++ tg->nr_active++; ++} ++ ++static void xnsched_quota_dequeue(struct xnthread *thread) ++{ ++ struct xnsched_quota_group *tg = thread->quota; ++ struct xnsched *sched = thread->sched; ++ ++ if (!list_empty(&thread->quota_expired)) ++ list_del_init(&thread->quota_expired); ++ else ++ xnsched_delq(&sched->rt.runnable, thread); ++ ++ tg->nr_active--; ++} ++ ++static void xnsched_quota_requeue(struct xnthread *thread) ++{ ++ struct xnsched_quota_group *tg = thread->quota; ++ struct xnsched *sched = thread->sched; ++ ++ if (!thread_is_runnable(thread)) ++ list_add(&thread->quota_expired, &tg->expired); ++ else ++ xnsched_addq(&sched->rt.runnable, thread); ++ ++ tg->nr_active++; ++} ++ ++static struct xnthread *xnsched_quota_pick(struct xnsched *sched) ++{ ++ struct xnthread *next, *curr = sched->curr; ++ struct xnsched_quota *qs = &sched->quota; ++ struct xnsched_quota_group *otg, *tg; ++ xnticks_t now, elapsed; ++ int ret; ++ ++ now = xnclock_read_monotonic(&nkclock); ++ otg = curr->quota; ++ if (otg == NULL) ++ goto pick; ++ /* ++ * Charge the time consumed by the outgoing thread to the ++ * group it belongs to. ++ */ ++ elapsed = now - otg->run_start_ns; ++ if (elapsed < otg->run_budget_ns) ++ otg->run_budget_ns -= elapsed; ++ else ++ otg->run_budget_ns = 0; ++pick: ++ next = xnsched_getq(&sched->rt.runnable); ++ if (next == NULL) { ++ xntimer_stop(&qs->limit_timer); ++ return NULL; ++ } ++ ++ /* ++ * As we basically piggyback on the SCHED_FIFO runqueue, make ++ * sure to detect non-quota threads. ++ */ ++ tg = next->quota; ++ if (tg == NULL) ++ return next; ++ ++ tg->run_start_ns = now; ++ ++ /* ++ * Don't consider budget if kicked, we have to allow this ++ * thread to run until it eventually relaxes. ++ */ ++ if (xnthread_test_info(next, XNKICKED)) { ++ xntimer_stop(&qs->limit_timer); ++ goto out; ++ } ++ ++ if (tg->run_budget_ns == 0) { ++ /* Flush expired group members as we go. */ ++ list_add_tail(&next->quota_expired, &tg->expired); ++ goto pick; ++ } ++ ++ if (otg == tg && xntimer_running_p(&qs->limit_timer)) ++ /* Same group, leave the running timer untouched. */ ++ goto out; ++ ++ /* Arm limit timer for the new running group. */ ++ ret = xntimer_start(&qs->limit_timer, now + tg->run_budget_ns, ++ XN_INFINITE, XN_ABSOLUTE); ++ if (ret) { ++ /* Budget exhausted: deactivate this group. */ ++ tg->run_budget_ns = 0; ++ list_add_tail(&next->quota_expired, &tg->expired); ++ goto pick; ++ } ++out: ++ tg->nr_active--; ++ ++ return next; ++} ++ ++static void xnsched_quota_migrate(struct xnthread *thread, struct xnsched *sched) ++{ ++ union xnsched_policy_param param; ++ /* ++ * Runtime quota groups are defined per-CPU, so leaving the ++ * current CPU means exiting the group. We do this by moving ++ * the target thread to the plain RT class. ++ */ ++ param.rt.prio = thread->cprio; ++ __xnthread_set_schedparam(thread, &xnsched_class_rt, ¶m); ++} ++ ++/** ++ * @ingroup cobalt_core_sched ++ * @defgroup sched_quota SCHED_QUOTA scheduling policy ++ * ++ * The SCHED_QUOTA policy enforces a limitation on the CPU consumption ++ * of threads over a globally defined period, known as the quota ++ * interval. This is done by pooling threads with common requirements ++ * in groups, and giving each group a share of the global period ++ * (CONFIG_XENO_OPT_SCHED_QUOTA_PERIOD). ++ * ++ * When threads have entirely consumed the quota allotted to the group ++ * they belong to, the latter is suspended as a whole, until the next ++ * quota interval starts. At this point, a new runtime budget is ++ * given to each group, in accordance with its share. ++ * ++ *@{ ++ */ ++int xnsched_quota_create_group(struct xnsched_quota_group *tg, ++ struct xnsched *sched, ++ int *quota_sum_r) ++{ ++ int tgid, nr_groups = CONFIG_XENO_OPT_SCHED_QUOTA_NR_GROUPS; ++ struct xnsched_quota *qs = &sched->quota; ++ ++ atomic_only(); ++ ++ tgid = find_first_zero_bit(group_map, nr_groups); ++ if (tgid >= nr_groups) ++ return -ENOSPC; ++ ++ __set_bit(tgid, group_map); ++ tg->tgid = tgid; ++ tg->sched = sched; ++ tg->run_budget_ns = qs->period_ns; ++ tg->run_credit_ns = 0; ++ tg->quota_percent = 100; ++ tg->quota_peak_percent = 100; ++ tg->quota_ns = qs->period_ns; ++ tg->quota_peak_ns = qs->period_ns; ++ tg->nr_active = 0; ++ tg->nr_threads = 0; ++ INIT_LIST_HEAD(&tg->members); ++ INIT_LIST_HEAD(&tg->expired); ++ ++ trace_cobalt_schedquota_create_group(tg); ++ ++ if (list_empty(&qs->groups)) ++ xntimer_start(&qs->refill_timer, ++ qs->period_ns, qs->period_ns, XN_RELATIVE); ++ ++ list_add(&tg->next, &qs->groups); ++ *quota_sum_r = quota_sum_all(qs); ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(xnsched_quota_create_group); ++ ++int xnsched_quota_destroy_group(struct xnsched_quota_group *tg, ++ int force, int *quota_sum_r) ++{ ++ struct xnsched_quota *qs = &tg->sched->quota; ++ union xnsched_policy_param param; ++ struct xnthread *thread, *tmp; ++ ++ atomic_only(); ++ ++ if (!list_empty(&tg->members)) { ++ if (!force) ++ return -EBUSY; ++ /* Move group members to the rt class. */ ++ list_for_each_entry_safe(thread, tmp, &tg->members, quota_next) { ++ param.rt.prio = thread->cprio; ++ __xnthread_set_schedparam(thread, &xnsched_class_rt, ¶m); ++ } ++ } ++ ++ trace_cobalt_schedquota_destroy_group(tg); ++ ++ list_del(&tg->next); ++ __clear_bit(tg->tgid, group_map); ++ ++ if (list_empty(&qs->groups)) ++ xntimer_stop(&qs->refill_timer); ++ ++ if (quota_sum_r) ++ *quota_sum_r = quota_sum_all(qs); ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(xnsched_quota_destroy_group); ++ ++void xnsched_quota_set_limit(struct xnsched_quota_group *tg, ++ int quota_percent, int quota_peak_percent, ++ int *quota_sum_r) ++{ ++ struct xnsched *sched = tg->sched; ++ struct xnsched_quota *qs = &sched->quota; ++ xnticks_t old_quota_ns = tg->quota_ns; ++ struct xnthread *thread, *tmp, *curr; ++ xnticks_t now, elapsed, consumed; ++ ++ atomic_only(); ++ ++ trace_cobalt_schedquota_set_limit(tg, quota_percent, ++ quota_peak_percent); ++ ++ if (quota_percent < 0 || quota_percent > 100) { /* Quota off. */ ++ quota_percent = 100; ++ tg->quota_ns = qs->period_ns; ++ } else ++ tg->quota_ns = xnarch_div64(qs->period_ns * quota_percent, 100); ++ ++ if (quota_peak_percent < quota_percent) ++ quota_peak_percent = quota_percent; ++ ++ if (quota_peak_percent < 0 || quota_peak_percent > 100) { ++ quota_peak_percent = 100; ++ tg->quota_peak_ns = qs->period_ns; ++ } else ++ tg->quota_peak_ns = xnarch_div64(qs->period_ns * quota_peak_percent, 100); ++ ++ tg->quota_percent = quota_percent; ++ tg->quota_peak_percent = quota_peak_percent; ++ ++ curr = sched->curr; ++ if (curr->quota == tg && ++ xnthread_test_state(curr, XNREADY|XNTHREAD_BLOCK_BITS) == 0) { ++ now = xnclock_read_monotonic(&nkclock); ++ ++ elapsed = now - tg->run_start_ns; ++ if (elapsed < tg->run_budget_ns) ++ tg->run_budget_ns -= elapsed; ++ else ++ tg->run_budget_ns = 0; ++ ++ tg->run_start_ns = now; ++ ++ xntimer_stop(&qs->limit_timer); ++ } ++ ++ if (tg->run_budget_ns <= old_quota_ns) ++ consumed = old_quota_ns - tg->run_budget_ns; ++ else ++ consumed = 0; ++ if (tg->quota_ns >= consumed) ++ tg->run_budget_ns = tg->quota_ns - consumed; ++ else ++ tg->run_budget_ns = 0; ++ ++ tg->run_credit_ns = 0; /* Drop accumulated credit. */ ++ ++ *quota_sum_r = quota_sum_all(qs); ++ ++ if (tg->run_budget_ns > 0) { ++ list_for_each_entry_safe_reverse(thread, tmp, &tg->expired, ++ quota_expired) { ++ list_del_init(&thread->quota_expired); ++ xnsched_addq(&sched->rt.runnable, thread); ++ } ++ } ++ ++ /* ++ * Apply the new budget immediately, in case a member of this ++ * group is currently running. ++ */ ++ xnsched_set_resched(sched); ++ xnsched_run(); ++} ++EXPORT_SYMBOL_GPL(xnsched_quota_set_limit); ++ ++struct xnsched_quota_group * ++xnsched_quota_find_group(struct xnsched *sched, int tgid) ++{ ++ struct xnsched_quota_group *tg; ++ ++ atomic_only(); ++ ++ if (list_empty(&sched->quota.groups)) ++ return NULL; ++ ++ list_for_each_entry(tg, &sched->quota.groups, next) { ++ if (tg->tgid == tgid) ++ return tg; ++ } ++ ++ return NULL; ++} ++EXPORT_SYMBOL_GPL(xnsched_quota_find_group); ++ ++int xnsched_quota_sum_all(struct xnsched *sched) ++{ ++ struct xnsched_quota *qs = &sched->quota; ++ ++ atomic_only(); ++ ++ return quota_sum_all(qs); ++} ++EXPORT_SYMBOL_GPL(xnsched_quota_sum_all); ++ ++/** @} */ ++ ++#ifdef CONFIG_XENO_OPT_VFILE ++ ++struct xnvfile_directory sched_quota_vfroot; ++ ++struct vfile_sched_quota_priv { ++ struct xnthread *curr; ++}; ++ ++struct vfile_sched_quota_data { ++ int cpu; ++ pid_t pid; ++ int prio; ++ int tgid; ++ xnticks_t budget; ++ char name[XNOBJECT_NAME_LEN]; ++}; ++ ++static struct xnvfile_snapshot_ops vfile_sched_quota_ops; ++ ++static struct xnvfile_snapshot vfile_sched_quota = { ++ .privsz = sizeof(struct vfile_sched_quota_priv), ++ .datasz = sizeof(struct vfile_sched_quota_data), ++ .tag = &nkthreadlist_tag, ++ .ops = &vfile_sched_quota_ops, ++}; ++ ++static int vfile_sched_quota_rewind(struct xnvfile_snapshot_iterator *it) ++{ ++ struct vfile_sched_quota_priv *priv = xnvfile_iterator_priv(it); ++ int nrthreads = xnsched_class_quota.nthreads; ++ ++ if (nrthreads == 0) ++ return -ESRCH; ++ ++ priv->curr = list_first_entry(&nkthreadq, struct xnthread, glink); ++ ++ return nrthreads; ++} ++ ++static int vfile_sched_quota_next(struct xnvfile_snapshot_iterator *it, ++ void *data) ++{ ++ struct vfile_sched_quota_priv *priv = xnvfile_iterator_priv(it); ++ struct vfile_sched_quota_data *p = data; ++ struct xnthread *thread; ++ ++ if (priv->curr == NULL) ++ return 0; /* All done. */ ++ ++ thread = priv->curr; ++ if (list_is_last(&thread->glink, &nkthreadq)) ++ priv->curr = NULL; ++ else ++ priv->curr = list_next_entry(thread, glink); ++ ++ if (thread->base_class != &xnsched_class_quota) ++ return VFILE_SEQ_SKIP; ++ ++ p->cpu = xnsched_cpu(thread->sched); ++ p->pid = xnthread_host_pid(thread); ++ memcpy(p->name, thread->name, sizeof(p->name)); ++ p->tgid = thread->quota->tgid; ++ p->prio = thread->cprio; ++ p->budget = thread->quota->run_budget_ns; ++ ++ return 1; ++} ++ ++static int vfile_sched_quota_show(struct xnvfile_snapshot_iterator *it, ++ void *data) ++{ ++ struct vfile_sched_quota_data *p = data; ++ char buf[16]; ++ ++ if (p == NULL) ++ xnvfile_printf(it, "%-3s %-6s %-4s %-4s %-10s %s\n", ++ "CPU", "PID", "TGID", "PRI", "BUDGET", "NAME"); ++ else { ++ xntimer_format_time(p->budget, buf, sizeof(buf)); ++ xnvfile_printf(it, "%3u %-6d %-4d %-4d %-10s %s\n", ++ p->cpu, ++ p->pid, ++ p->tgid, ++ p->prio, ++ buf, ++ p->name); ++ } ++ ++ return 0; ++} ++ ++static struct xnvfile_snapshot_ops vfile_sched_quota_ops = { ++ .rewind = vfile_sched_quota_rewind, ++ .next = vfile_sched_quota_next, ++ .show = vfile_sched_quota_show, ++}; ++ ++static int xnsched_quota_init_vfile(struct xnsched_class *schedclass, ++ struct xnvfile_directory *vfroot) ++{ ++ int ret; ++ ++ ret = xnvfile_init_dir(schedclass->name, &sched_quota_vfroot, vfroot); ++ if (ret) ++ return ret; ++ ++ return xnvfile_init_snapshot("threads", &vfile_sched_quota, ++ &sched_quota_vfroot); ++} ++ ++static void xnsched_quota_cleanup_vfile(struct xnsched_class *schedclass) ++{ ++ xnvfile_destroy_snapshot(&vfile_sched_quota); ++ xnvfile_destroy_dir(&sched_quota_vfroot); ++} ++ ++#endif /* CONFIG_XENO_OPT_VFILE */ ++ ++struct xnsched_class xnsched_class_quota = { ++ .sched_init = xnsched_quota_init, ++ .sched_enqueue = xnsched_quota_enqueue, ++ .sched_dequeue = xnsched_quota_dequeue, ++ .sched_requeue = xnsched_quota_requeue, ++ .sched_pick = xnsched_quota_pick, ++ .sched_tick = NULL, ++ .sched_rotate = NULL, ++ .sched_migrate = xnsched_quota_migrate, ++ .sched_chkparam = xnsched_quota_chkparam, ++ .sched_setparam = xnsched_quota_setparam, ++ .sched_getparam = xnsched_quota_getparam, ++ .sched_trackprio = xnsched_quota_trackprio, ++ .sched_protectprio = xnsched_quota_protectprio, ++ .sched_forget = xnsched_quota_forget, ++ .sched_kick = xnsched_quota_kick, ++#ifdef CONFIG_XENO_OPT_VFILE ++ .sched_init_vfile = xnsched_quota_init_vfile, ++ .sched_cleanup_vfile = xnsched_quota_cleanup_vfile, ++#endif ++ .weight = XNSCHED_CLASS_WEIGHT(3), ++ .policy = SCHED_QUOTA, ++ .name = "quota" ++}; ++EXPORT_SYMBOL_GPL(xnsched_class_quota); +--- linux/kernel/xenomai/registry.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/registry.c 2021-04-07 16:01:25.848636137 +0800 +@@ -0,0 +1,947 @@ ++/* ++ * Copyright (C) 2004 Philippe Gerum ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/** ++ * @ingroup cobalt_core ++ * @defgroup cobalt_core_registry Registry services ++ * ++ * The registry provides a mean to index object descriptors on unique ++ * alphanumeric keys. When labeled this way, an object is globally ++ * exported; it can be searched for, and its descriptor returned to ++ * the caller for further use; the latter operation is called a ++ * "binding". When no object has been registered under the given name ++ * yet, the registry can be asked to set up a rendez-vous, blocking ++ * the caller until the object is eventually registered. ++ * ++ *@{ ++ */ ++ ++struct xnobject *registry_obj_slots; ++EXPORT_SYMBOL_GPL(registry_obj_slots); ++ ++static LIST_HEAD(free_object_list); /* Free objects. */ ++ ++static LIST_HEAD(busy_object_list); /* Active and exported objects. */ ++ ++static unsigned int nr_active_objects; ++ ++static unsigned long next_object_stamp; ++ ++static struct hlist_head *object_index; ++ ++static int nr_object_entries; ++ ++static struct xnsynch register_synch; ++ ++#ifdef CONFIG_XENO_OPT_VFILE ++ ++#include ++ ++static void proc_callback(struct work_struct *work); ++ ++static void registry_proc_schedule(void *cookie); ++ ++static LIST_HEAD(proc_object_list); /* Objects waiting for /proc handling. */ ++ ++static DECLARE_WORK(registry_proc_work, proc_callback); ++ ++static int proc_apc; ++ ++static struct xnvfile_directory registry_vfroot; ++ ++static int usage_vfile_show(struct xnvfile_regular_iterator *it, void *data) ++{ ++ xnvfile_printf(it, "%u/%u\n", ++ nr_active_objects, ++ CONFIG_XENO_OPT_REGISTRY_NRSLOTS); ++ return 0; ++} ++ ++static struct xnvfile_regular_ops usage_vfile_ops = { ++ .show = usage_vfile_show, ++}; ++ ++static struct xnvfile_regular usage_vfile = { ++ .ops = &usage_vfile_ops, ++}; ++ ++#endif /* CONFIG_XENO_OPT_VFILE */ ++ ++unsigned xnregistry_hash_size(void) ++{ ++ static const int primes[] = { ++ 101, 211, 307, 401, 503, 601, ++ 701, 809, 907, 1009, 1103 ++ }; ++ ++#define obj_hash_max(n) \ ++((n) < sizeof(primes) / sizeof(int) ? \ ++ (n) : sizeof(primes) / sizeof(int) - 1) ++ ++ return primes[obj_hash_max(CONFIG_XENO_OPT_REGISTRY_NRSLOTS / 100)]; ++} ++ ++int xnregistry_init(void) ++{ ++ int n, ret __maybe_unused; ++ ++ registry_obj_slots = kmalloc(CONFIG_XENO_OPT_REGISTRY_NRSLOTS * ++ sizeof(struct xnobject), GFP_KERNEL); ++ if (registry_obj_slots == NULL) ++ return -ENOMEM; ++ ++#ifdef CONFIG_XENO_OPT_VFILE ++ ret = xnvfile_init_dir("registry", ®istry_vfroot, &cobalt_vfroot); ++ if (ret) ++ return ret; ++ ++ ret = xnvfile_init_regular("usage", &usage_vfile, ®istry_vfroot); ++ if (ret) { ++ xnvfile_destroy_dir(®istry_vfroot); ++ return ret; ++ } ++ ++ proc_apc = ++ xnapc_alloc("registry_export", ®istry_proc_schedule, NULL); ++ ++ if (proc_apc < 0) { ++ xnvfile_destroy_regular(&usage_vfile); ++ xnvfile_destroy_dir(®istry_vfroot); ++ return proc_apc; ++ } ++#endif /* CONFIG_XENO_OPT_VFILE */ ++ ++ next_object_stamp = 0; ++ ++ for (n = 0; n < CONFIG_XENO_OPT_REGISTRY_NRSLOTS; n++) { ++ registry_obj_slots[n].objaddr = NULL; ++ list_add_tail(®istry_obj_slots[n].link, &free_object_list); ++ } ++ ++ /* Slot #0 is reserved/invalid. */ ++ list_get_entry(&free_object_list, struct xnobject, link); ++ nr_active_objects = 1; ++ ++ nr_object_entries = xnregistry_hash_size(); ++ object_index = kmalloc(sizeof(*object_index) * ++ nr_object_entries, GFP_KERNEL); ++ ++ if (object_index == NULL) { ++#ifdef CONFIG_XENO_OPT_VFILE ++ xnvfile_destroy_regular(&usage_vfile); ++ xnvfile_destroy_dir(®istry_vfroot); ++ xnapc_free(proc_apc); ++#endif /* CONFIG_XENO_OPT_VFILE */ ++ return -ENOMEM; ++ } ++ ++ for (n = 0; n < nr_object_entries; n++) ++ INIT_HLIST_HEAD(&object_index[n]); ++ ++ xnsynch_init(®ister_synch, XNSYNCH_FIFO, NULL); ++ ++ return 0; ++} ++ ++void xnregistry_cleanup(void) ++{ ++#ifdef CONFIG_XENO_OPT_VFILE ++ struct hlist_node *enext; ++ struct xnobject *ecurr; ++ struct xnpnode *pnode; ++ int n; ++ ++ flush_scheduled_work(); ++ ++ for (n = 0; n < nr_object_entries; n++) ++ hlist_for_each_entry_safe(ecurr, enext, ++ &object_index[n], hlink) { ++ pnode = ecurr->pnode; ++ if (pnode == NULL) ++ continue; ++ ++ pnode->ops->unexport(ecurr, pnode); ++ ++ if (--pnode->entries > 0) ++ continue; ++ ++ xnvfile_destroy_dir(&pnode->vdir); ++ ++ if (--pnode->root->entries == 0) ++ xnvfile_destroy_dir(&pnode->root->vdir); ++ } ++#endif /* CONFIG_XENO_OPT_VFILE */ ++ ++ kfree(object_index); ++ xnsynch_destroy(®ister_synch); ++ ++#ifdef CONFIG_XENO_OPT_VFILE ++ xnapc_free(proc_apc); ++ flush_scheduled_work(); ++ xnvfile_destroy_regular(&usage_vfile); ++ xnvfile_destroy_dir(®istry_vfroot); ++#endif /* CONFIG_XENO_OPT_VFILE */ ++ ++ kfree(registry_obj_slots); ++} ++ ++#ifdef CONFIG_XENO_OPT_VFILE ++ ++static DEFINE_SEMAPHORE(export_mutex); ++ ++/* ++ * The following stuff implements the mechanism for delegating ++ * export/unexport requests to/from the /proc interface from the ++ * Xenomai domain to the Linux kernel (i.e. the "lower stage"). This ++ * ends up being a bit complex due to the fact that such requests ++ * might lag enough before being processed by the Linux kernel so that ++ * subsequent requests might just contradict former ones before they ++ * even had a chance to be applied (e.g. export -> unexport in the ++ * Xenomai domain for short-lived objects). This situation and the ++ * like are hopefully properly handled due to a careful ++ * synchronization of operations across domains. ++ */ ++static void proc_callback(struct work_struct *work) ++{ ++ struct xnvfile_directory *rdir, *dir; ++ const char *rname, *type; ++ struct xnobject *object; ++ struct xnpnode *pnode; ++ int ret; ++ spl_t s; ++ ++ down(&export_mutex); ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ while (!list_empty(&proc_object_list)) { ++ object = list_get_entry(&proc_object_list, ++ struct xnobject, link); ++ pnode = object->pnode; ++ type = pnode->dirname; ++ dir = &pnode->vdir; ++ rdir = &pnode->root->vdir; ++ rname = pnode->root->dirname; ++ ++ if (object->vfilp != XNOBJECT_EXPORT_SCHEDULED) ++ goto unexport; ++ ++ object->vfilp = XNOBJECT_EXPORT_INPROGRESS; ++ list_add_tail(&object->link, &busy_object_list); ++ ++ xnlock_put_irqrestore(&nklock, s); ++ ++ if (pnode->entries++ == 0) { ++ if (pnode->root->entries++ == 0) { ++ /* Create the root directory on the fly. */ ++ ret = xnvfile_init_dir(rname, rdir, ®istry_vfroot); ++ if (ret) { ++ xnlock_get_irqsave(&nklock, s); ++ object->pnode = NULL; ++ pnode->root->entries = 0; ++ pnode->entries = 0; ++ continue; ++ } ++ } ++ /* Create the class directory on the fly. */ ++ ret = xnvfile_init_dir(type, dir, rdir); ++ if (ret) { ++ if (pnode->root->entries == 1) { ++ pnode->root->entries = 0; ++ xnvfile_destroy_dir(rdir); ++ } ++ xnlock_get_irqsave(&nklock, s); ++ object->pnode = NULL; ++ pnode->entries = 0; ++ continue; ++ } ++ } ++ ++ ret = pnode->ops->export(object, pnode); ++ if (ret && --pnode->entries == 0) { ++ xnvfile_destroy_dir(dir); ++ if (--pnode->root->entries == 0) ++ xnvfile_destroy_dir(rdir); ++ xnlock_get_irqsave(&nklock, s); ++ object->pnode = NULL; ++ } else ++ xnlock_get_irqsave(&nklock, s); ++ ++ continue; ++ ++ unexport: ++ object->vfilp = NULL; ++ object->pnode = NULL; ++ ++ if (object->vfilp == XNOBJECT_EXPORT_ABORTED) ++ object->objaddr = NULL; ++ ++ if (object->objaddr) ++ list_add_tail(&object->link, &busy_object_list); ++ else { ++ /* ++ * Trap the case where we are unexporting an ++ * already unregistered object. ++ */ ++ list_add_tail(&object->link, &free_object_list); ++ nr_active_objects--; ++ } ++ ++ xnlock_put_irqrestore(&nklock, s); ++ ++ pnode->ops->unexport(object, pnode); ++ ++ if (--pnode->entries == 0) { ++ xnvfile_destroy_dir(dir); ++ if (--pnode->root->entries == 0) ++ xnvfile_destroy_dir(rdir); ++ } ++ ++ xnlock_get_irqsave(&nklock, s); ++ } ++ ++ xnlock_put_irqrestore(&nklock, s); ++ ++ up(&export_mutex); ++} ++ ++static void registry_proc_schedule(void *cookie) ++{ ++ /* ++ * schedule_work() will check for us if the work has already ++ * been scheduled, so just be lazy and submit blindly. ++ */ ++ schedule_work(®istry_proc_work); ++} ++ ++static int registry_export_vfsnap(struct xnobject *object, ++ struct xnpnode *pnode) ++{ ++ struct xnpnode_snapshot *p; ++ int ret; ++ ++ /* ++ * Make sure to initialize _all_ mandatory vfile fields; most ++ * of the time we are using sane NULL defaults based on static ++ * storage for the vfile struct, but here we are building up a ++ * vfile object explicitly. ++ */ ++ p = container_of(pnode, struct xnpnode_snapshot, node); ++ object->vfile_u.vfsnap.file.datasz = p->vfile.datasz; ++ object->vfile_u.vfsnap.file.privsz = p->vfile.privsz; ++ /* ++ * Make the vfile refer to the provided tag struct if any, ++ * otherwise use our default tag space. In the latter case, ++ * each object family has its own private revision tag. ++ */ ++ object->vfile_u.vfsnap.file.tag = p->vfile.tag ?: ++ &object->vfile_u.vfsnap.tag; ++ object->vfile_u.vfsnap.file.ops = p->vfile.ops; ++ object->vfile_u.vfsnap.file.entry.lockops = p->vfile.lockops; ++ ++ ret = xnvfile_init_snapshot(object->key, &object->vfile_u.vfsnap.file, ++ &pnode->vdir); ++ if (ret) ++ return ret; ++ ++ object->vfilp = &object->vfile_u.vfsnap.file.entry; ++ object->vfilp->private = object->objaddr; ++ ++ return 0; ++} ++ ++static void registry_unexport_vfsnap(struct xnobject *object, ++ struct xnpnode *pnode) ++{ ++ xnvfile_destroy_snapshot(&object->vfile_u.vfsnap.file); ++} ++ ++static void registry_touch_vfsnap(struct xnobject *object) ++{ ++ xnvfile_touch(&object->vfile_u.vfsnap.file); ++} ++ ++struct xnpnode_ops xnregistry_vfsnap_ops = { ++ .export = registry_export_vfsnap, ++ .unexport = registry_unexport_vfsnap, ++ .touch = registry_touch_vfsnap, ++}; ++EXPORT_SYMBOL_GPL(xnregistry_vfsnap_ops); ++ ++static int registry_export_vfreg(struct xnobject *object, ++ struct xnpnode *pnode) ++{ ++ struct xnpnode_regular *p; ++ int ret; ++ ++ /* See registry_export_vfsnap() for hints. */ ++ p = container_of(pnode, struct xnpnode_regular, node); ++ object->vfile_u.vfreg.privsz = p->vfile.privsz; ++ object->vfile_u.vfreg.ops = p->vfile.ops; ++ object->vfile_u.vfreg.entry.lockops = p->vfile.lockops; ++ ++ ret = xnvfile_init_regular(object->key, &object->vfile_u.vfreg, ++ &pnode->vdir); ++ if (ret) ++ return ret; ++ ++ object->vfilp = &object->vfile_u.vfreg.entry; ++ object->vfilp->private = object->objaddr; ++ ++ return 0; ++} ++ ++static void registry_unexport_vfreg(struct xnobject *object, ++ struct xnpnode *pnode) ++{ ++ xnvfile_destroy_regular(&object->vfile_u.vfreg); ++} ++ ++struct xnpnode_ops xnregistry_vfreg_ops = { ++ .export = registry_export_vfreg, ++ .unexport = registry_unexport_vfreg, ++}; ++EXPORT_SYMBOL_GPL(xnregistry_vfreg_ops); ++ ++static int registry_export_vlink(struct xnobject *object, ++ struct xnpnode *pnode) ++{ ++ struct xnpnode_link *link_desc; ++ char *link_target; ++ int ret; ++ ++ link_desc = container_of(pnode, struct xnpnode_link, node); ++ link_target = link_desc->target(object->objaddr); ++ if (link_target == NULL) ++ return -ENOMEM; ++ ++ ret = xnvfile_init_link(object->key, link_target, ++ &object->vfile_u.link, &pnode->vdir); ++ kfree(link_target); ++ if (ret) ++ return ret; ++ ++ object->vfilp = &object->vfile_u.link.entry; ++ object->vfilp->private = object->objaddr; ++ ++ return 0; ++} ++ ++static void registry_unexport_vlink(struct xnobject *object, ++ struct xnpnode *pnode) ++{ ++ xnvfile_destroy_link(&object->vfile_u.link); ++} ++ ++struct xnpnode_ops xnregistry_vlink_ops = { ++ .export = registry_export_vlink, ++ .unexport = registry_unexport_vlink, ++}; ++EXPORT_SYMBOL_GPL(xnregistry_vlink_ops); ++ ++static inline void registry_export_pnode(struct xnobject *object, ++ struct xnpnode *pnode) ++{ ++ object->vfilp = XNOBJECT_EXPORT_SCHEDULED; ++ object->pnode = pnode; ++ list_del(&object->link); ++ list_add_tail(&object->link, &proc_object_list); ++ __xnapc_schedule(proc_apc); ++} ++ ++static inline void registry_unexport_pnode(struct xnobject *object) ++{ ++ if (object->vfilp != XNOBJECT_EXPORT_SCHEDULED) { ++ /* ++ * We might have preempted a v-file read op, so bump ++ * the object's revtag to make sure the data ++ * collection is aborted next, if we end up deleting ++ * the object being read. ++ */ ++ if (object->pnode->ops->touch) ++ object->pnode->ops->touch(object); ++ list_del(&object->link); ++ list_add_tail(&object->link, &proc_object_list); ++ __xnapc_schedule(proc_apc); ++ } else { ++ /* ++ * Unexporting before the lower stage has had a chance ++ * to export. Move back the object to the busyq just ++ * like if no export had been requested. ++ */ ++ list_del(&object->link); ++ list_add_tail(&object->link, &busy_object_list); ++ object->pnode = NULL; ++ object->vfilp = NULL; ++ } ++} ++ ++#endif /* CONFIG_XENO_OPT_VFILE */ ++ ++static unsigned registry_hash_crunch(const char *key) ++{ ++ unsigned int h = 0, g; ++ ++#define HQON 24 /* Higher byte position */ ++#define HBYTE 0xf0000000 /* Higher nibble on */ ++ ++ while (*key) { ++ h = (h << 4) + *key++; ++ if ((g = (h & HBYTE)) != 0) ++ h = (h ^ (g >> HQON)) ^ g; ++ } ++ ++ return h % nr_object_entries; ++} ++ ++static inline int registry_hash_enter(const char *key, struct xnobject *object) ++{ ++ struct xnobject *ecurr; ++ unsigned s; ++ ++ object->key = key; ++ s = registry_hash_crunch(key); ++ ++ hlist_for_each_entry(ecurr, &object_index[s], hlink) ++ if (ecurr == object || strcmp(key, ecurr->key) == 0) ++ return -EEXIST; ++ ++ hlist_add_head(&object->hlink, &object_index[s]); ++ ++ return 0; ++} ++ ++static inline int registry_hash_remove(struct xnobject *object) ++{ ++ unsigned int s = registry_hash_crunch(object->key); ++ struct xnobject *ecurr; ++ ++ hlist_for_each_entry(ecurr, &object_index[s], hlink) ++ if (ecurr == object) { ++ hlist_del(&ecurr->hlink); ++ return 0; ++ } ++ ++ return -ESRCH; ++} ++ ++static struct xnobject *registry_hash_find(const char *key) ++{ ++ struct xnobject *ecurr; ++ ++ hlist_for_each_entry(ecurr, ++ &object_index[registry_hash_crunch(key)], hlink) ++ if (strcmp(key, ecurr->key) == 0) ++ return ecurr; ++ ++ return NULL; ++} ++ ++struct registry_wait_context { ++ struct xnthread_wait_context wc; ++ const char *key; ++}; ++ ++static inline int registry_wakeup_sleepers(const char *key) ++{ ++ struct registry_wait_context *rwc; ++ struct xnthread_wait_context *wc; ++ struct xnthread *sleeper, *tmp; ++ int cnt = 0; ++ ++ xnsynch_for_each_sleeper_safe(sleeper, tmp, ®ister_synch) { ++ wc = xnthread_get_wait_context(sleeper); ++ rwc = container_of(wc, struct registry_wait_context, wc); ++ if (*key == *rwc->key && strcmp(key, rwc->key) == 0) { ++ xnsynch_wakeup_this_sleeper(®ister_synch, sleeper); ++ ++cnt; ++ } ++ } ++ ++ return cnt; ++} ++ ++/** ++ * @fn int xnregistry_enter(const char *key,void *objaddr,xnhandle_t *phandle,struct xnpnode *pnode) ++ * @brief Register a real-time object. ++ * ++ * This service allocates a new registry slot for an associated ++ * object, and indexes it by an alphanumeric key for later retrieval. ++ * ++ * @param key A valid NULL-terminated string by which the object will ++ * be indexed and later retrieved in the registry. Since it is assumed ++ * that such key is stored into the registered object, it will *not* ++ * be copied but only kept by reference in the registry. Pass an empty ++ * or NULL string if the object shall only occupy a registry slot for ++ * handle-based lookups. The slash character is not accepted in @a key ++ * if @a pnode is non-NULL. ++ * ++ * @param objaddr An opaque pointer to the object to index by @a ++ * key. ++ * ++ * @param phandle A pointer to a generic handle defined by the ++ * registry which will uniquely identify the indexed object, until the ++ * latter is unregistered using the xnregistry_remove() service. ++ * ++ * @param pnode A pointer to an optional /proc node class ++ * descriptor. This structure provides the information needed to ++ * export all objects from the given class through the /proc ++ * filesystem, under the /proc/xenomai/registry entry. Passing NULL ++ * indicates that no /proc support is available for the newly ++ * registered object. ++ * ++ * @return 0 is returned upon success. Otherwise: ++ * ++ * - -EINVAL is returned if @a objaddr is NULL. ++ * ++ * - -EINVAL if @a pnode is non-NULL, and @a key points to a valid ++ * string containing a '/' character. ++ * ++ * - -ENOMEM is returned if the system fails to get enough dynamic ++ * memory from the global real-time heap in order to register the ++ * object. ++ * ++ * - -EEXIST is returned if the @a key is already in use. ++ * ++ * @coretags{unrestricted, might-switch, atomic-entry} ++ */ ++int xnregistry_enter(const char *key, void *objaddr, ++ xnhandle_t *phandle, struct xnpnode *pnode) ++{ ++ struct xnobject *object; ++ spl_t s; ++ int ret; ++ ++ if (objaddr == NULL || ++ (pnode != NULL && key != NULL && strchr(key, '/'))) ++ return -EINVAL; ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ if (list_empty(&free_object_list)) { ++ ret = -EAGAIN; ++ goto unlock_and_exit; ++ } ++ ++ object = list_get_entry(&free_object_list, struct xnobject, link); ++ nr_active_objects++; ++ object->objaddr = objaddr; ++ object->cstamp = ++next_object_stamp; ++#ifdef CONFIG_XENO_OPT_VFILE ++ object->pnode = NULL; ++#endif ++ if (key == NULL || *key == '\0') { ++ object->key = NULL; ++ *phandle = object - registry_obj_slots; ++ ret = 0; ++ goto unlock_and_exit; ++ } ++ ++ ret = registry_hash_enter(key, object); ++ if (ret) { ++ nr_active_objects--; ++ list_add_tail(&object->link, &free_object_list); ++ goto unlock_and_exit; ++ } ++ ++ list_add_tail(&object->link, &busy_object_list); ++ ++ /* ++ * Make sure the handle is written back before the ++ * rescheduling takes place. ++ */ ++ *phandle = object - registry_obj_slots; ++ ++#ifdef CONFIG_XENO_OPT_VFILE ++ if (pnode) ++ registry_export_pnode(object, pnode); ++#endif /* CONFIG_XENO_OPT_VFILE */ ++ ++ if (registry_wakeup_sleepers(key)) ++ xnsched_run(); ++ ++unlock_and_exit: ++ ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return ret; ++} ++EXPORT_SYMBOL_GPL(xnregistry_enter); ++ ++/** ++ * @fn int xnregistry_bind(const char *key,xnticks_t timeout,int timeout_mode,xnhandle_t *phandle) ++ * @brief Bind to a real-time object. ++ * ++ * This service retrieves the registry handle of a given object ++ * identified by its key. Unless otherwise specified, this service ++ * will block the caller if the object is not registered yet, waiting ++ * for such registration to occur. ++ * ++ * @param key A valid NULL-terminated string which identifies the ++ * object to bind to. ++ * ++ * @param timeout The timeout which may be used to limit the time the ++ * thread wait for the object to be registered. This value is a wait ++ * time given as a count of nanoseconds. It can either be relative, ++ * absolute monotonic (XN_ABSOLUTE), or absolute adjustable ++ * (XN_REALTIME) depending on @a timeout_mode. Passing XN_INFINITE @b ++ * and setting @a timeout_mode to XN_RELATIVE specifies an unbounded ++ * wait. Passing XN_NONBLOCK causes the service to return immediately ++ * without waiting if the object is not registered on entry. All other ++ * values are used as a wait limit. ++ * ++ * @param timeout_mode The mode of the @a timeout parameter. It can ++ * either be set to XN_RELATIVE, XN_ABSOLUTE, or XN_REALTIME (see also ++ * xntimer_start()). ++ * ++ * @param phandle A pointer to a memory location which will be written ++ * upon success with the generic handle defined by the registry for ++ * the retrieved object. Contents of this memory is undefined upon ++ * failure. ++ * ++ * @return 0 is returned upon success. Otherwise: ++ * ++ * - -EINVAL is returned if @a key is NULL. ++ * ++ * - -EINTR is returned if xnthread_unblock() has been called for the ++ * waiting thread before the retrieval has completed. ++ * ++ * - -EWOULDBLOCK is returned if @a timeout is equal to XN_NONBLOCK ++ * and the searched object is not registered on entry. As a special ++ * exception, this error is also returned if this service should ++ * block, but was called from a context which cannot sleep ++ * (e.g. interrupt, non-realtime or scheduler locked). ++ * ++ * - -ETIMEDOUT is returned if the object cannot be retrieved within ++ * the specified amount of time. ++ * ++ * @coretags{primary-only, might-switch} ++ * ++ * @note xnregistry_bind() only returns the index portion of a handle, ++ * which might include other fixed bits to be complete ++ * (e.g. XNSYNCH_PSHARED). The caller is responsible for completing ++ * the handle returned with those bits if applicable, depending on the ++ * context. ++ */ ++int xnregistry_bind(const char *key, xnticks_t timeout, int timeout_mode, ++ xnhandle_t *phandle) ++{ ++ struct registry_wait_context rwc; ++ struct xnobject *object; ++ int ret = 0, info; ++ spl_t s; ++ ++ if (key == NULL) ++ return -EINVAL; ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ if (timeout_mode == XN_RELATIVE && ++ timeout != XN_INFINITE && timeout != XN_NONBLOCK) { ++ timeout_mode = XN_ABSOLUTE; ++ timeout += xnclock_read_monotonic(&nkclock); ++ } ++ ++ for (;;) { ++ object = registry_hash_find(key); ++ if (object) { ++ *phandle = object - registry_obj_slots; ++ goto unlock_and_exit; ++ } ++ ++ if ((timeout_mode == XN_RELATIVE && timeout == XN_NONBLOCK) || ++ xnsched_unblockable_p()) { ++ ret = -EWOULDBLOCK; ++ goto unlock_and_exit; ++ } ++ ++ rwc.key = key; ++ xnthread_prepare_wait(&rwc.wc); ++ info = xnsynch_sleep_on(®ister_synch, timeout, timeout_mode); ++ if (info & XNTIMEO) { ++ ret = -ETIMEDOUT; ++ goto unlock_and_exit; ++ } ++ if (info & XNBREAK) { ++ ret = -EINTR; ++ goto unlock_and_exit; ++ } ++ } ++ ++unlock_and_exit: ++ ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return ret; ++} ++EXPORT_SYMBOL_GPL(xnregistry_bind); ++ ++/** ++ * @fn int xnregistry_remove(xnhandle_t handle) ++ * @brief Forcibly unregister a real-time object. ++ * ++ * This service forcibly removes an object from the registry. The ++ * removal is performed regardless of the current object's locking ++ * status. ++ * ++ * @param handle The generic handle of the object to remove. ++ * ++ * @return 0 is returned upon success. Otherwise: ++ * ++ * - -ESRCH is returned if @a handle does not reference a registered ++ * object. ++ * ++ * @coretags{unrestricted} ++ */ ++int xnregistry_remove(xnhandle_t handle) ++{ ++ struct xnobject *object; ++ void *objaddr; ++ int ret = 0; ++ spl_t s; ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ object = xnregistry_validate(handle); ++ if (object == NULL) { ++ ret = -ESRCH; ++ goto unlock_and_exit; ++ } ++ ++ objaddr = object->objaddr; ++ object->objaddr = NULL; ++ object->cstamp = 0; ++ ++ if (object->key) { ++ registry_hash_remove(object); ++ ++#ifdef CONFIG_XENO_OPT_VFILE ++ if (object->pnode) { ++ if (object->vfilp == XNOBJECT_EXPORT_INPROGRESS) { ++ object->vfilp = XNOBJECT_EXPORT_ABORTED; ++ object->objaddr = objaddr; ++ } ++ ++ registry_unexport_pnode(object); ++ /* ++ * Leave the update of the object queues to ++ * the work callback if it has been kicked. ++ */ ++ if (object->pnode) { ++ xnlock_put_irqrestore(&nklock, s); ++ if (ipipe_root_p) ++ flush_work(®istry_proc_work); ++ return 0; ++ } ++ } ++#endif /* CONFIG_XENO_OPT_VFILE */ ++ ++ list_del(&object->link); ++ } ++ ++ if (!IS_ENABLED(CONFIG_XENO_OPT_VFILE) || !object->objaddr) { ++ list_add_tail(&object->link, &free_object_list); ++ nr_active_objects--; ++ } ++ ++unlock_and_exit: ++ ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return ret; ++} ++EXPORT_SYMBOL_GPL(xnregistry_remove); ++ ++/** ++ * Turn a named object into an anonymous object ++ * ++ * @coretags{unrestricted} ++ */ ++int xnregistry_unlink(const char *key) ++{ ++ struct xnobject *object; ++ int ret = 0; ++ spl_t s; ++ ++ if (key == NULL) ++ return -EINVAL; ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ object = registry_hash_find(key); ++ if (object == NULL) { ++ ret = -ESRCH; ++ goto unlock_and_exit; ++ } ++ ++ ret = registry_hash_remove(object); ++ if (ret < 0) ++ goto unlock_and_exit; ++ ++#ifdef CONFIG_XENO_OPT_VFILE ++ if (object->pnode) { ++ registry_unexport_pnode(object); ++ /* ++ * Leave the update of the object queues to ++ * the work callback if it has been kicked. ++ */ ++ if (object->pnode) ++ goto unlock_and_exit; ++ } ++#endif /* CONFIG_XENO_OPT_VFILE */ ++ ++ list_del(&object->link); ++ ++ object->key = NULL; ++ ++unlock_and_exit: ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return ret; ++} ++ ++/** ++ * @fn void *xnregistry_lookup(xnhandle_t handle, unsigned long *cstamp_r) ++ * @brief Find a real-time object into the registry. ++ * ++ * This service retrieves an object from its handle into the registry ++ * and returns the memory address of its descriptor. Optionally, it ++ * also copies back the object's creation stamp which is unique across ++ * object registration calls. ++ * ++ * @param handle The generic handle of the object to fetch. ++ * ++ * @param cstamp_r If not-NULL, the object's creation stamp will be ++ * copied to this memory area. ++ * ++ * @return The memory address of the object's descriptor is returned ++ * on success. Otherwise, NULL is returned if @a handle does not ++ * reference a registered object. ++ * ++ * @coretags{unrestricted} ++ */ ++ ++/** @} */ +--- linux/kernel/xenomai/map.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/map.c 2021-04-07 16:01:25.843636144 +0800 +@@ -0,0 +1,265 @@ ++/* ++ * Copyright (C) 2007 Philippe Gerum . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#include ++#include ++#include ++#include ++ ++/** ++ * @ingroup cobalt_core ++ * @defgroup cobalt_core_map Lightweight key-to-object mapping service ++ * ++ * A map is a simple indexing structure which associates unique ++ * integer keys with pointers to objects. The current implementation ++ * supports reservation, for naming/indexing objects, either on a ++ * fixed, user-provided integer (i.e. a reserved key value), or by ++ * drawing the next available key internally if the caller did not ++ * specify any fixed key. For instance, in some given map, the key ++ * space ranging from 0 to 255 could be reserved for fixed keys, ++ * whilst the range from 256 to 511 could be available for drawing ++ * free keys dynamically. ++ * ++ * A maximum of 1024 unique keys per map is supported on 32bit ++ * machines. ++ * ++ * (This implementation should not be confused with C++ STL maps, ++ * which are dynamically expandable and allow arbitrary key types; ++ * Xenomai maps don't). ++ * ++ * @{ ++ */ ++ ++/** ++ * @fn void xnmap_create(int nkeys, int reserve, int offset) ++ * @brief Create a map. ++ * ++ * Allocates a new map with the specified addressing capabilities. The ++ * memory is obtained from the Xenomai system heap. ++ * ++ * @param nkeys The maximum number of unique keys the map will be able ++ * to hold. This value cannot exceed the static limit represented by ++ * XNMAP_MAX_KEYS, and must be a power of two. ++ * ++ * @param reserve The number of keys which should be kept for ++ * reservation within the index space. Reserving a key means to ++ * specify a valid key to the xnmap_enter() service, which will then ++ * attempt to register this exact key, instead of drawing the next ++ * available key from the unreserved index space. When reservation is ++ * in effect, the unreserved index space will hold key values greater ++ * than @a reserve, keeping the low key values for the reserved space. ++ * For instance, passing @a reserve = 32 would cause the index range [ ++ * 0 .. 31 ] to be kept for reserved keys. When non-zero, @a reserve ++ * is rounded to the next multiple of BITS_PER_LONG. If @a reserve is ++ * zero no reservation will be available from the map. ++ * ++ * @param offset The lowest key value xnmap_enter() will return to the ++ * caller. Key values will be in the range [ 0 + offset .. @a nkeys + ++ * offset - 1 ]. Negative offsets are valid. ++ * ++ * @return the address of the new map is returned on success; ++ * otherwise, NULL is returned if @a nkeys is invalid. ++ * ++ * @coretags{task-unrestricted} ++ */ ++struct xnmap *xnmap_create(int nkeys, int reserve, int offset) ++{ ++ struct xnmap *map; ++ int mapsize; ++ ++ if (nkeys <= 0 || (nkeys & (nkeys - 1)) != 0) ++ return NULL; ++ ++ mapsize = sizeof(*map) + (nkeys - 1) * sizeof(map->objarray[0]); ++ map = xnmalloc(mapsize); ++ ++ if (!map) ++ return NULL; ++ ++ map->ukeys = 0; ++ map->nkeys = nkeys; ++ map->offset = offset; ++ map->himask = (1 << ((reserve + BITS_PER_LONG - 1) / BITS_PER_LONG)) - 1; ++ map->himap = ~0; ++ memset(map->lomap, ~0, sizeof(map->lomap)); ++ memset(map->objarray, 0, sizeof(map->objarray[0]) * nkeys); ++ ++ return map; ++} ++EXPORT_SYMBOL_GPL(xnmap_create); ++ ++/** ++ * @fn void xnmap_delete(struct xnmap *map) ++ * @brief Delete a map. ++ * ++ * Deletes a map, freeing any associated memory back to the Xenomai ++ * system heap. ++ * ++ * @param map The address of the map to delete. ++ * ++ * @coretags{task-unrestricted} ++ */ ++void xnmap_delete(struct xnmap *map) ++{ ++ xnfree(map); ++} ++EXPORT_SYMBOL_GPL(xnmap_delete); ++ ++/** ++ * @fn void xnmap_enter(struct xnmap *map, int key, void *objaddr) ++ * @brief Index an object into a map. ++ * ++ * Insert a new object into the given map. ++ * ++ * @param map The address of the map to insert into. ++ * ++ * @param key The key to index the object on. If this key is within ++ * the valid index range [ 0 - offset .. nkeys - offset - 1 ], then an ++ * attempt to reserve this exact key is made. If @a key has an ++ * out-of-range value lower or equal to 0 - offset - 1, then an ++ * attempt is made to draw a free key from the unreserved index space. ++ * ++ * @param objaddr The address of the object to index on the key. This ++ * value will be returned by a successful call to xnmap_fetch() with ++ * the same key. ++ * ++ * @return a valid key is returned on success, either @a key if ++ * reserved, or the next free key. Otherwise: ++ * ++ * - -EEXIST is returned upon attempt to reserve a busy key. ++ * ++ * - -ENOSPC when no more free key is available. ++ * ++ * @coretags{unrestricted} ++ */ ++int xnmap_enter(struct xnmap *map, int key, void *objaddr) ++{ ++ int hi, lo, ofkey = key - map->offset; ++ spl_t s; ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ if (ofkey >= 0 && ofkey < map->nkeys) { ++ if (map->objarray[ofkey] != NULL) { ++ key = -EEXIST; ++ goto unlock_and_exit; ++ } ++ } else if (map->ukeys >= map->nkeys) { ++ key = -ENOSPC; ++ goto unlock_and_exit; ++ } ++ else { ++ /* The himask implements a namespace reservation of ++ half of the bitmap space which cannot be used to ++ draw keys. */ ++ ++ hi = ffnz(map->himap & ~map->himask); ++ lo = ffnz(map->lomap[hi]); ++ ofkey = hi * BITS_PER_LONG + lo; ++ ++map->ukeys; ++ ++ map->lomap[hi] &= ~(1UL << lo); ++ if (map->lomap[hi] == 0) ++ map->himap &= ~(1UL << hi); ++ } ++ ++ map->objarray[ofkey] = objaddr; ++ ++ unlock_and_exit: ++ ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return ofkey + map->offset; ++} ++EXPORT_SYMBOL_GPL(xnmap_enter); ++ ++/** ++ * @fn void xnmap_remove(struct xnmap *map, int key) ++ * @brief Remove an object reference from a map. ++ * ++ * Removes an object reference from the given map, releasing the ++ * associated key. ++ * ++ * @param map The address of the map to remove from. ++ * ++ * @param key The key the object reference to be removed is indexed ++ * on. ++ * ++ * @return 0 is returned on success. Otherwise: ++ * ++ * - -ESRCH is returned if @a key is invalid. ++ * ++ * @coretags{unrestricted} ++ */ ++int xnmap_remove(struct xnmap *map, int key) ++{ ++ int ofkey = key - map->offset, hi, lo; ++ spl_t s; ++ ++ if (ofkey < 0 || ofkey >= map->nkeys) ++ return -ESRCH; ++ ++ hi = ofkey / BITS_PER_LONG; ++ lo = ofkey % BITS_PER_LONG; ++ xnlock_get_irqsave(&nklock, s); ++ map->objarray[ofkey] = NULL; ++ map->himap |= (1UL << hi); ++ map->lomap[hi] |= (1UL << lo); ++ --map->ukeys; ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(xnmap_remove); ++ ++/** ++ * @fn void xnmap_fetch(struct xnmap *map, int key) ++ * @brief Search an object into a map. ++ * ++ * Retrieve an object reference from the given map by its index key. ++ * ++ * @param map The address of the map to retrieve from. ++ * ++ * @param key The key to be searched for in the map index. ++ * ++ * @return The indexed object address is returned on success, ++ * otherwise NULL is returned when @a key is invalid or no object is ++ * currently indexed on it. ++ * ++ * @coretags{unrestricted} ++ */ ++ ++/** ++ * @fn void xnmap_fetch_nocheck(struct xnmap *map, int key) ++ * @brief Search an object into a map - unchecked form. ++ * ++ * Retrieve an object reference from the given map by its index key, ++ * but does not perform any sanity check on the provided key. ++ * ++ * @param map The address of the map to retrieve from. ++ * ++ * @param key The key to be searched for in the map index. ++ * ++ * @return The indexed object address is returned on success, ++ * otherwise NULL is returned when no object is currently indexed on ++ * @a key. ++ * ++ * @coretags{unrestricted} ++ */ ++ ++/** @} */ +--- linux/kernel/xenomai/Makefile 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/Makefile 2021-04-07 16:01:25.839636149 +0800 +@@ -0,0 +1,28 @@ ++obj-$(CONFIG_XENOMAI) += xenomai.o rtdm/ posix/ ++ ++xenomai-y := apc.o \ ++ arith.o \ ++ bufd.o \ ++ clock.o \ ++ heap.o \ ++ init.o \ ++ intr.o \ ++ lock.o \ ++ registry.o \ ++ sched-idle.o \ ++ sched-rt.o \ ++ sched.o \ ++ select.o \ ++ synch.o \ ++ thread.o \ ++ timer.o \ ++ tree.o ++ ++xenomai-$(CONFIG_XENO_OPT_SCHED_QUOTA) += sched-quota.o ++xenomai-$(CONFIG_XENO_OPT_SCHED_WEAK) += sched-weak.o ++xenomai-$(CONFIG_XENO_OPT_SCHED_SPORADIC) += sched-sporadic.o ++xenomai-$(CONFIG_XENO_OPT_SCHED_TP) += sched-tp.o ++xenomai-$(CONFIG_XENO_OPT_DEBUG) += debug.o ++xenomai-$(CONFIG_XENO_OPT_PIPE) += pipe.o ++xenomai-$(CONFIG_XENO_OPT_MAP) += map.o ++xenomai-$(CONFIG_PROC_FS) += vfile.o procfs.o +--- linux/kernel/xenomai/pipe.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/pipe.c 2021-04-07 16:01:25.834636157 +0800 +@@ -0,0 +1,1178 @@ ++/* ++ * Copyright (C) 2001,2002,2003,2004 Philippe Gerum . ++ * Copyright (C) 2005 Dmitry Adamushko ++ * ++ * Xenomai is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA ++ * 02139, USA; either version 2 of the License, or (at your option) ++ * any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++static int xnpipe_asyncsig = SIGIO; ++ ++struct xnpipe_state xnpipe_states[XNPIPE_NDEVS]; ++EXPORT_SYMBOL_GPL(xnpipe_states); ++ ++#define XNPIPE_BITMAP_SIZE ((XNPIPE_NDEVS + BITS_PER_LONG - 1) / BITS_PER_LONG) ++ ++static unsigned long xnpipe_bitmap[XNPIPE_BITMAP_SIZE]; ++ ++static LIST_HEAD(xnpipe_sleepq); ++ ++static LIST_HEAD(xnpipe_asyncq); ++ ++int xnpipe_wakeup_apc; ++ ++static struct class *xnpipe_class; ++ ++/* Allocation of minor values */ ++ ++static inline int xnpipe_minor_alloc(int minor) ++{ ++ spl_t s; ++ ++ if ((minor < 0 && minor != XNPIPE_MINOR_AUTO) || minor >= XNPIPE_NDEVS) ++ return -ENODEV; ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ if (minor == XNPIPE_MINOR_AUTO) ++ minor = find_first_zero_bit(xnpipe_bitmap, XNPIPE_NDEVS); ++ ++ if (minor == XNPIPE_NDEVS || ++ (xnpipe_bitmap[minor / BITS_PER_LONG] & ++ (1UL << (minor % BITS_PER_LONG)))) ++ minor = -EBUSY; ++ else ++ xnpipe_bitmap[minor / BITS_PER_LONG] |= ++ (1UL << (minor % BITS_PER_LONG)); ++ ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return minor; ++} ++ ++static inline void xnpipe_minor_free(int minor) ++{ ++ xnpipe_bitmap[minor / BITS_PER_LONG] &= ++ ~(1UL << (minor % BITS_PER_LONG)); ++} ++ ++static inline void xnpipe_enqueue_wait(struct xnpipe_state *state, int mask) ++{ ++ if (state->wcount != 0x7fffffff && state->wcount++ == 0) ++ list_add_tail(&state->slink, &xnpipe_sleepq); ++ ++ state->status |= mask; ++} ++ ++static inline void xnpipe_dequeue_wait(struct xnpipe_state *state, int mask) ++{ ++ if (state->status & mask) ++ if (--state->wcount == 0) { ++ list_del(&state->slink); ++ state->status &= ~mask; ++ } ++} ++ ++static inline void xnpipe_dequeue_all(struct xnpipe_state *state, int mask) ++{ ++ if (state->status & mask) { ++ if (state->wcount) { ++ state->wcount = 0; ++ list_del(&state->slink); ++ state->status &= ~mask; ++ } ++ } ++} ++ ++/* Must be entered with nklock held, interrupts off. */ ++#define xnpipe_wait(__state, __mask, __s, __cond) \ ++({ \ ++ wait_queue_head_t *__waitq; \ ++ DEFINE_WAIT(__wait); \ ++ int __sigpending; \ ++ \ ++ if ((__mask) & XNPIPE_USER_WREAD) \ ++ __waitq = &(__state)->readq; \ ++ else \ ++ __waitq = &(__state)->syncq; \ ++ \ ++ xnpipe_enqueue_wait(__state, __mask); \ ++ xnlock_put_irqrestore(&nklock, __s); \ ++ \ ++ for (;;) { \ ++ __sigpending = signal_pending(current); \ ++ if (__sigpending) \ ++ break; \ ++ prepare_to_wait_exclusive(__waitq, &__wait, TASK_INTERRUPTIBLE); \ ++ if (__cond || (__state)->status & XNPIPE_KERN_LCLOSE) \ ++ break; \ ++ schedule(); \ ++ } \ ++ \ ++ finish_wait(__waitq, &__wait); \ ++ \ ++ /* Restore the interrupt state initially set by the caller. */ \ ++ xnlock_get_irqsave(&nklock, __s); \ ++ xnpipe_dequeue_wait(__state, __mask); \ ++ \ ++ __sigpending; \ ++}) ++ ++static void xnpipe_wakeup_proc(void *cookie) ++{ ++ struct xnpipe_state *state; ++ unsigned long rbits; ++ spl_t s; ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ /* ++ * NOTE: sleepers might enter/leave the queue while we don't ++ * hold the nklock in these wakeup loops. So we iterate over ++ * each sleeper list until we find no more candidate for ++ * wakeup after an entire scan, redoing the scan from the list ++ * head otherwise. ++ */ ++ for (;;) { ++ if (list_empty(&xnpipe_sleepq)) ++ goto check_async; ++ ++ state = list_first_entry(&xnpipe_sleepq, struct xnpipe_state, slink); ++ ++ for (;;) { ++ rbits = state->status & XNPIPE_USER_ALL_READY; ++ if (rbits) ++ break; ++ if (list_is_last(&state->slink, &xnpipe_sleepq)) ++ goto check_async; ++ state = list_next_entry(state, slink); ++ } ++ ++ state->status &= ~rbits; ++ ++ if ((rbits & XNPIPE_USER_WREAD_READY) != 0) { ++ if (waitqueue_active(&state->readq)) { ++ xnlock_put_irqrestore(&nklock, s); ++ wake_up_interruptible(&state->readq); ++ xnlock_get_irqsave(&nklock, s); ++ } ++ } ++ if ((rbits & XNPIPE_USER_WSYNC_READY) != 0) { ++ if (waitqueue_active(&state->syncq)) { ++ xnlock_put_irqrestore(&nklock, s); ++ wake_up_interruptible(&state->syncq); ++ xnlock_get_irqsave(&nklock, s); ++ } ++ } ++ } ++ ++check_async: ++ /* ++ * Scan the async queue, sending the proper signal to ++ * subscribers. ++ */ ++ for (;;) { ++ if (list_empty(&xnpipe_asyncq)) ++ goto out; ++ ++ state = list_first_entry(&xnpipe_asyncq, struct xnpipe_state, alink); ++ ++ for (;;) { ++ if (state->status & XNPIPE_USER_SIGIO) ++ break; ++ if (list_is_last(&state->alink, &xnpipe_asyncq)) ++ goto out; ++ state = list_next_entry(state, alink); ++ } ++ ++ state->status &= ~XNPIPE_USER_SIGIO; ++ xnlock_put_irqrestore(&nklock, s); ++ kill_fasync(&state->asyncq, xnpipe_asyncsig, POLL_IN); ++ xnlock_get_irqsave(&nklock, s); ++ } ++out: ++ xnlock_put_irqrestore(&nklock, s); ++} ++ ++static inline void xnpipe_schedule_request(void) /* hw IRQs off */ ++{ ++ __xnapc_schedule(xnpipe_wakeup_apc); ++} ++ ++static inline ssize_t xnpipe_flush_bufq(void (*fn)(void *buf, void *xstate), ++ struct list_head *q, ++ void *xstate) ++{ ++ struct xnpipe_mh *mh, *tmp; ++ ssize_t n = 0; ++ ++ if (list_empty(q)) ++ return 0; ++ ++ /* Queue is private, no locking is required. */ ++ list_for_each_entry_safe(mh, tmp, q, link) { ++ list_del(&mh->link); ++ n += xnpipe_m_size(mh); ++ fn(mh, xstate); ++ } ++ ++ /* Return the overall count of bytes flushed. */ ++ return n; ++} ++ ++/* ++ * Move the specified queue contents to a private queue, then call the ++ * flush handler to purge it. The latter runs without locking. ++ * Returns the number of bytes flushed. Must be entered with nklock ++ * held, interrupts off. ++ */ ++#define xnpipe_flushq(__state, __q, __f, __s) \ ++({ \ ++ LIST_HEAD(__privq); \ ++ ssize_t __n; \ ++ \ ++ list_splice_init(&(state)->__q, &__privq); \ ++ (__state)->nr ## __q = 0; \ ++ xnlock_put_irqrestore(&nklock, (__s)); \ ++ __n = xnpipe_flush_bufq((__state)->ops.__f, &__privq, (__state)->xstate); \ ++ xnlock_get_irqsave(&nklock, (__s)); \ ++ \ ++ __n; \ ++}) ++ ++static void *xnpipe_default_alloc_ibuf(size_t size, void *xstate) ++{ ++ void *buf; ++ ++ buf = xnmalloc(size); ++ if (likely(buf != NULL)) ++ return buf; ++ ++ if (size > xnheap_get_size(&cobalt_heap)) ++ /* Request will never succeed. */ ++ return (struct xnpipe_mh *)-1; ++ ++ return NULL; ++} ++ ++static void xnpipe_default_free_ibuf(void *buf, void *xstate) ++{ ++ xnfree(buf); ++} ++ ++static void xnpipe_default_release(void *xstate) ++{ ++} ++ ++static inline int xnpipe_set_ops(struct xnpipe_state *state, ++ struct xnpipe_operations *ops) ++{ ++ state->ops = *ops; ++ ++ if (ops->free_obuf == NULL) ++ /* ++ * Caller must provide a way to free unread outgoing ++ * buffers. ++ */ ++ return -EINVAL; ++ ++ /* Set some default handlers for common usage. */ ++ if (ops->alloc_ibuf == NULL) ++ state->ops.alloc_ibuf = xnpipe_default_alloc_ibuf; ++ if (ops->free_ibuf == NULL) ++ state->ops.free_ibuf = xnpipe_default_free_ibuf; ++ if (ops->release == NULL) ++ state->ops.release = xnpipe_default_release; ++ ++ return 0; ++} ++ ++int xnpipe_connect(int minor, struct xnpipe_operations *ops, void *xstate) ++{ ++ struct xnpipe_state *state; ++ int need_sched = 0, ret; ++ spl_t s; ++ ++ minor = xnpipe_minor_alloc(minor); ++ if (minor < 0) ++ return minor; ++ ++ state = &xnpipe_states[minor]; ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ ret = xnpipe_set_ops(state, ops); ++ if (ret) { ++ xnlock_put_irqrestore(&nklock, s); ++ return ret; ++ } ++ ++ state->status |= XNPIPE_KERN_CONN; ++ xnsynch_init(&state->synchbase, XNSYNCH_FIFO, NULL); ++ state->xstate = xstate; ++ state->ionrd = 0; ++ ++ if (state->status & XNPIPE_USER_CONN) { ++ if (state->status & XNPIPE_USER_WREAD) { ++ /* ++ * Wake up the regular Linux task waiting for ++ * the kernel side to connect (xnpipe_open). ++ */ ++ state->status |= XNPIPE_USER_WREAD_READY; ++ need_sched = 1; ++ } ++ ++ if (state->asyncq) { /* Schedule asynch sig. */ ++ state->status |= XNPIPE_USER_SIGIO; ++ need_sched = 1; ++ } ++ } ++ ++ if (need_sched) ++ xnpipe_schedule_request(); ++ ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return minor; ++} ++EXPORT_SYMBOL_GPL(xnpipe_connect); ++ ++int xnpipe_disconnect(int minor) ++{ ++ struct xnpipe_state *state; ++ int need_sched = 0; ++ spl_t s; ++ ++ if (minor < 0 || minor >= XNPIPE_NDEVS) ++ return -ENODEV; ++ ++ state = &xnpipe_states[minor]; ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ if ((state->status & XNPIPE_KERN_CONN) == 0) { ++ xnlock_put_irqrestore(&nklock, s); ++ return -EBADF; ++ } ++ ++ state->status &= ~XNPIPE_KERN_CONN; ++ ++ state->ionrd -= xnpipe_flushq(state, outq, free_obuf, s); ++ ++ if ((state->status & XNPIPE_USER_CONN) == 0) ++ goto cleanup; ++ ++ xnpipe_flushq(state, inq, free_ibuf, s); ++ ++ if (xnsynch_destroy(&state->synchbase) == XNSYNCH_RESCHED) ++ xnsched_run(); ++ ++ if (state->status & XNPIPE_USER_WREAD) { ++ /* ++ * Wake up the regular Linux task waiting for some ++ * operation from the Xenomai side (read/write or ++ * poll). ++ */ ++ state->status |= XNPIPE_USER_WREAD_READY; ++ need_sched = 1; ++ } ++ ++ if (state->asyncq) { /* Schedule asynch sig. */ ++ state->status |= XNPIPE_USER_SIGIO; ++ need_sched = 1; ++ } ++ ++cleanup: ++ /* ++ * If xnpipe_release() has not fully run, enter lingering ++ * close. This will prevent the extra state from being wiped ++ * out until then. ++ */ ++ if (state->status & XNPIPE_USER_CONN) ++ state->status |= XNPIPE_KERN_LCLOSE; ++ else { ++ xnlock_put_irqrestore(&nklock, s); ++ state->ops.release(state->xstate); ++ xnlock_get_irqsave(&nklock, s); ++ xnpipe_minor_free(minor); ++ } ++ ++ if (need_sched) ++ xnpipe_schedule_request(); ++ ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(xnpipe_disconnect); ++ ++ssize_t xnpipe_send(int minor, struct xnpipe_mh *mh, size_t size, int flags) ++{ ++ struct xnpipe_state *state; ++ int need_sched = 0; ++ spl_t s; ++ ++ if (minor < 0 || minor >= XNPIPE_NDEVS) ++ return -ENODEV; ++ ++ if (size <= sizeof(*mh)) ++ return -EINVAL; ++ ++ state = &xnpipe_states[minor]; ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ if ((state->status & XNPIPE_KERN_CONN) == 0) { ++ xnlock_put_irqrestore(&nklock, s); ++ return -EBADF; ++ } ++ ++ xnpipe_m_size(mh) = size - sizeof(*mh); ++ xnpipe_m_rdoff(mh) = 0; ++ state->ionrd += xnpipe_m_size(mh); ++ ++ if (flags & XNPIPE_URGENT) ++ list_add(&mh->link, &state->outq); ++ else ++ list_add_tail(&mh->link, &state->outq); ++ ++ state->nroutq++; ++ ++ if ((state->status & XNPIPE_USER_CONN) == 0) { ++ xnlock_put_irqrestore(&nklock, s); ++ return (ssize_t) size; ++ } ++ ++ if (state->status & XNPIPE_USER_WREAD) { ++ /* ++ * Wake up the regular Linux task waiting for input ++ * from the Xenomai side. ++ */ ++ state->status |= XNPIPE_USER_WREAD_READY; ++ need_sched = 1; ++ } ++ ++ if (state->asyncq) { /* Schedule asynch sig. */ ++ state->status |= XNPIPE_USER_SIGIO; ++ need_sched = 1; ++ } ++ ++ if (need_sched) ++ xnpipe_schedule_request(); ++ ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return (ssize_t) size; ++} ++EXPORT_SYMBOL_GPL(xnpipe_send); ++ ++ssize_t xnpipe_mfixup(int minor, struct xnpipe_mh *mh, ssize_t size) ++{ ++ struct xnpipe_state *state; ++ spl_t s; ++ ++ if (minor < 0 || minor >= XNPIPE_NDEVS) ++ return -ENODEV; ++ ++ if (size < 0) ++ return -EINVAL; ++ ++ state = &xnpipe_states[minor]; ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ if ((state->status & XNPIPE_KERN_CONN) == 0) { ++ xnlock_put_irqrestore(&nklock, s); ++ return -EBADF; ++ } ++ ++ xnpipe_m_size(mh) += size; ++ state->ionrd += size; ++ ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return (ssize_t) size; ++} ++EXPORT_SYMBOL_GPL(xnpipe_mfixup); ++ ++ssize_t xnpipe_recv(int minor, struct xnpipe_mh **pmh, xnticks_t timeout) ++{ ++ struct xnpipe_state *state; ++ struct xnpipe_mh *mh; ++ xntmode_t mode; ++ ssize_t ret; ++ int info; ++ spl_t s; ++ ++ if (minor < 0 || minor >= XNPIPE_NDEVS) ++ return -ENODEV; ++ ++ if (xnsched_interrupt_p()) ++ return -EPERM; ++ ++ state = &xnpipe_states[minor]; ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ if ((state->status & XNPIPE_KERN_CONN) == 0) { ++ ret = -EBADF; ++ goto unlock_and_exit; ++ } ++ ++ /* ++ * If we received a relative timespec, rescale it to an ++ * absolute time value based on the monotonic clock. ++ */ ++ mode = XN_RELATIVE; ++ if (timeout != XN_NONBLOCK && timeout != XN_INFINITE) { ++ mode = XN_ABSOLUTE; ++ timeout += xnclock_read_monotonic(&nkclock); ++ } ++ ++ for (;;) { ++ if (!list_empty(&state->inq)) ++ break; ++ ++ if (timeout == XN_NONBLOCK) { ++ ret = -EWOULDBLOCK; ++ goto unlock_and_exit; ++ } ++ ++ info = xnsynch_sleep_on(&state->synchbase, timeout, mode); ++ if (info & XNTIMEO) { ++ ret = -ETIMEDOUT; ++ goto unlock_and_exit; ++ } ++ if (info & XNBREAK) { ++ ret = -EINTR; ++ goto unlock_and_exit; ++ } ++ if (info & XNRMID) { ++ ret = -EIDRM; ++ goto unlock_and_exit; ++ } ++ } ++ ++ mh = list_get_entry(&state->inq, struct xnpipe_mh, link); ++ *pmh = mh; ++ state->nrinq--; ++ ret = (ssize_t)xnpipe_m_size(mh); ++ ++ if (state->status & XNPIPE_USER_WSYNC) { ++ state->status |= XNPIPE_USER_WSYNC_READY; ++ xnpipe_schedule_request(); ++ } ++ ++unlock_and_exit: ++ ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return ret; ++} ++EXPORT_SYMBOL_GPL(xnpipe_recv); ++ ++int xnpipe_flush(int minor, int mode) ++{ ++ struct xnpipe_state *state; ++ int msgcount; ++ spl_t s; ++ ++ if (minor < 0 || minor >= XNPIPE_NDEVS) ++ return -ENODEV; ++ ++ state = &xnpipe_states[minor]; ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ if ((state->status & XNPIPE_KERN_CONN) == 0) { ++ xnlock_put_irqrestore(&nklock, s); ++ return -EBADF; ++ } ++ ++ msgcount = state->nroutq + state->nrinq; ++ ++ if (mode & XNPIPE_OFLUSH) ++ state->ionrd -= xnpipe_flushq(state, outq, free_obuf, s); ++ ++ if (mode & XNPIPE_IFLUSH) ++ xnpipe_flushq(state, inq, free_ibuf, s); ++ ++ if ((state->status & XNPIPE_USER_WSYNC) && ++ msgcount > state->nroutq + state->nrinq) { ++ state->status |= XNPIPE_USER_WSYNC_READY; ++ xnpipe_schedule_request(); ++ } ++ ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(xnpipe_flush); ++ ++int xnpipe_pollstate(int minor, unsigned int *mask_r) ++{ ++ struct xnpipe_state *state; ++ int ret = 0; ++ spl_t s; ++ ++ if (minor < 0 || minor >= XNPIPE_NDEVS) ++ return -ENODEV; ++ ++ state = xnpipe_states + minor; ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ if (state->status & XNPIPE_KERN_CONN) { ++ *mask_r = POLLOUT; ++ if (!list_empty(&state->inq)) ++ *mask_r |= POLLIN; ++ } else ++ ret = -EIO; ++ ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return ret; ++} ++EXPORT_SYMBOL_GPL(xnpipe_pollstate); ++ ++/* Must be entered with nklock held, interrupts off. */ ++#define xnpipe_cleanup_user_conn(__state, __s) \ ++ do { \ ++ xnpipe_flushq((__state), outq, free_obuf, (__s)); \ ++ xnpipe_flushq((__state), inq, free_ibuf, (__s)); \ ++ (__state)->status &= ~XNPIPE_USER_CONN; \ ++ if ((__state)->status & XNPIPE_KERN_LCLOSE) { \ ++ (__state)->status &= ~XNPIPE_KERN_LCLOSE; \ ++ xnlock_put_irqrestore(&nklock, (__s)); \ ++ (__state)->ops.release((__state)->xstate); \ ++ xnlock_get_irqsave(&nklock, (__s)); \ ++ xnpipe_minor_free(xnminor_from_state(__state)); \ ++ } \ ++ } while(0) ++ ++/* ++ * Open the pipe from user-space. ++ */ ++ ++static int xnpipe_open(struct inode *inode, struct file *file) ++{ ++ int minor, err = 0, sigpending; ++ struct xnpipe_state *state; ++ spl_t s; ++ ++ minor = MINOR(inode->i_rdev); ++ ++ if (minor >= XNPIPE_NDEVS) ++ return -ENXIO; /* TssTss... stop playing with mknod() ;o) */ ++ ++ state = &xnpipe_states[minor]; ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ /* Enforce exclusive open for the message queues. */ ++ if (state->status & (XNPIPE_USER_CONN | XNPIPE_USER_LCONN)) { ++ xnlock_put_irqrestore(&nklock, s); ++ return -EBUSY; ++ } ++ ++ state->status |= XNPIPE_USER_LCONN; ++ ++ xnlock_put_irqrestore(&nklock, s); ++ ++ file->private_data = state; ++ init_waitqueue_head(&state->readq); ++ init_waitqueue_head(&state->syncq); ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ state->status |= XNPIPE_USER_CONN; ++ state->status &= ~XNPIPE_USER_LCONN; ++ state->wcount = 0; ++ ++ state->status &= ++ ~(XNPIPE_USER_ALL_WAIT | XNPIPE_USER_ALL_READY | ++ XNPIPE_USER_SIGIO); ++ ++ if ((state->status & XNPIPE_KERN_CONN) == 0) { ++ if (file->f_flags & O_NONBLOCK) { ++ xnpipe_cleanup_user_conn(state, s); ++ xnlock_put_irqrestore(&nklock, s); ++ return -EWOULDBLOCK; ++ } ++ ++ sigpending = xnpipe_wait(state, XNPIPE_USER_WREAD, s, ++ state->status & XNPIPE_KERN_CONN); ++ if (sigpending) { ++ xnpipe_cleanup_user_conn(state, s); ++ xnlock_put_irqrestore(&nklock, s); ++ return -ERESTARTSYS; ++ } ++ } ++ ++ if (err) ++ xnpipe_cleanup_user_conn(state, s); ++ ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return err; ++} ++ ++static int xnpipe_release(struct inode *inode, struct file *file) ++{ ++ struct xnpipe_state *state = file->private_data; ++ spl_t s; ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ xnpipe_dequeue_all(state, XNPIPE_USER_WREAD); ++ xnpipe_dequeue_all(state, XNPIPE_USER_WSYNC); ++ ++ if (state->status & XNPIPE_KERN_CONN) { ++ /* Unblock waiters. */ ++ if (xnsynch_pended_p(&state->synchbase)) { ++ xnsynch_flush(&state->synchbase, XNRMID); ++ xnsched_run(); ++ } ++ } ++ ++ if (state->ops.input) ++ state->ops.input(NULL, -EPIPE, state->xstate); ++ ++ if (state->asyncq) { /* Clear the async queue */ ++ list_del(&state->alink); ++ state->status &= ~XNPIPE_USER_SIGIO; ++ xnlock_put_irqrestore(&nklock, s); ++ fasync_helper(-1, file, 0, &state->asyncq); ++ xnlock_get_irqsave(&nklock, s); ++ } ++ ++ xnpipe_cleanup_user_conn(state, s); ++ /* ++ * The extra state may not be available from now on, if ++ * xnpipe_disconnect() entered lingering close before we got ++ * there; so calling xnpipe_cleanup_user_conn() should be the ++ * last thing we do. ++ */ ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return 0; ++} ++ ++static ssize_t xnpipe_read(struct file *file, ++ char *buf, size_t count, loff_t *ppos) ++{ ++ struct xnpipe_state *state = file->private_data; ++ int sigpending, err = 0; ++ size_t nbytes, inbytes; ++ struct xnpipe_mh *mh; ++ ssize_t ret; ++ spl_t s; ++ ++ if (!access_wok(buf, count)) ++ return -EFAULT; ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ if ((state->status & XNPIPE_KERN_CONN) == 0) { ++ xnlock_put_irqrestore(&nklock, s); ++ return -EPIPE; ++ } ++ /* ++ * Queue probe and proc enqueuing must be seen atomically, ++ * including from the Xenomai side. ++ */ ++ if (list_empty(&state->outq)) { ++ if (file->f_flags & O_NONBLOCK) { ++ xnlock_put_irqrestore(&nklock, s); ++ return -EWOULDBLOCK; ++ } ++ ++ sigpending = xnpipe_wait(state, XNPIPE_USER_WREAD, s, ++ !list_empty(&state->outq)); ++ ++ if (list_empty(&state->outq)) { ++ xnlock_put_irqrestore(&nklock, s); ++ return sigpending ? -ERESTARTSYS : 0; ++ } ++ } ++ ++ mh = list_get_entry(&state->outq, struct xnpipe_mh, link); ++ state->nroutq--; ++ ++ /* ++ * We allow more data to be appended to the current message ++ * bucket while its contents is being copied to the user ++ * buffer, therefore, we need to loop until: 1) all the data ++ * has been copied, 2) we consumed the user buffer space ++ * entirely. ++ */ ++ ++ inbytes = 0; ++ ++ for (;;) { ++ nbytes = xnpipe_m_size(mh) - xnpipe_m_rdoff(mh); ++ ++ if (nbytes + inbytes > count) ++ nbytes = count - inbytes; ++ ++ if (nbytes == 0) ++ break; ++ ++ xnlock_put_irqrestore(&nklock, s); ++ ++ /* More data could be appended while doing this: */ ++ err = __copy_to_user(buf + inbytes, ++ xnpipe_m_data(mh) + xnpipe_m_rdoff(mh), ++ nbytes); ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ if (err) { ++ err = -EFAULT; ++ break; ++ } ++ ++ inbytes += nbytes; ++ xnpipe_m_rdoff(mh) += nbytes; ++ } ++ ++ state->ionrd -= inbytes; ++ ret = inbytes; ++ ++ if (xnpipe_m_size(mh) > xnpipe_m_rdoff(mh)) { ++ list_add(&mh->link, &state->outq); ++ state->nroutq++; ++ } else { ++ /* ++ * We always want to fire the output handler because ++ * whatever the error state is for userland (e.g ++ * -EFAULT), we did pull a message from our output ++ * queue. ++ */ ++ if (state->ops.output) ++ state->ops.output(mh, state->xstate); ++ xnlock_put_irqrestore(&nklock, s); ++ state->ops.free_obuf(mh, state->xstate); ++ xnlock_get_irqsave(&nklock, s); ++ if (state->status & XNPIPE_USER_WSYNC) { ++ state->status |= XNPIPE_USER_WSYNC_READY; ++ xnpipe_schedule_request(); ++ } ++ } ++ ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return err ? : ret; ++} ++ ++static ssize_t xnpipe_write(struct file *file, ++ const char *buf, size_t count, loff_t *ppos) ++{ ++ struct xnpipe_state *state = file->private_data; ++ struct xnpipe_mh *mh; ++ int pollnum, ret; ++ spl_t s; ++ ++ if (count == 0) ++ return 0; ++ ++ if (!access_rok(buf, count)) ++ return -EFAULT; ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++retry: ++ if ((state->status & XNPIPE_KERN_CONN) == 0) { ++ xnlock_put_irqrestore(&nklock, s); ++ return -EPIPE; ++ } ++ ++ pollnum = state->nrinq + state->nroutq; ++ xnlock_put_irqrestore(&nklock, s); ++ ++ mh = state->ops.alloc_ibuf(count + sizeof(*mh), state->xstate); ++ if (mh == (struct xnpipe_mh *)-1) ++ return -ENOMEM; ++ ++ if (mh == NULL) { ++ if (file->f_flags & O_NONBLOCK) ++ return -EWOULDBLOCK; ++ ++ xnlock_get_irqsave(&nklock, s); ++ if (xnpipe_wait(state, XNPIPE_USER_WSYNC, s, ++ pollnum > state->nrinq + state->nroutq)) { ++ xnlock_put_irqrestore(&nklock, s); ++ return -ERESTARTSYS; ++ } ++ goto retry; ++ } ++ ++ xnpipe_m_size(mh) = count; ++ xnpipe_m_rdoff(mh) = 0; ++ ++ if (copy_from_user(xnpipe_m_data(mh), buf, count)) { ++ state->ops.free_ibuf(mh, state->xstate); ++ return -EFAULT; ++ } ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ list_add_tail(&mh->link, &state->inq); ++ state->nrinq++; ++ ++ /* Wake up a Xenomai sleeper if any. */ ++ if (xnsynch_wakeup_one_sleeper(&state->synchbase)) ++ xnsched_run(); ++ ++ if (state->ops.input) { ++ ret = state->ops.input(mh, 0, state->xstate); ++ if (ret) ++ count = (size_t)ret; ++ } ++ ++ if (file->f_flags & O_SYNC) { ++ if (!list_empty(&state->inq)) { ++ if (xnpipe_wait(state, XNPIPE_USER_WSYNC, s, ++ list_empty(&state->inq))) ++ count = -ERESTARTSYS; ++ } ++ } ++ ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return (ssize_t)count; ++} ++ ++static long xnpipe_ioctl(struct file *file, unsigned int cmd, unsigned long arg) ++{ ++ struct xnpipe_state *state = file->private_data; ++ int ret = 0; ++ ssize_t n; ++ spl_t s; ++ ++ switch (cmd) { ++ case XNPIPEIOC_GET_NRDEV: ++ ++ if (put_user(XNPIPE_NDEVS, (int *)arg)) ++ return -EFAULT; ++ ++ break; ++ ++ case XNPIPEIOC_OFLUSH: ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ if ((state->status & XNPIPE_KERN_CONN) == 0) { ++ xnlock_put_irqrestore(&nklock, s); ++ return -EPIPE; ++ } ++ ++ n = xnpipe_flushq(state, outq, free_obuf, s); ++ state->ionrd -= n; ++ goto kick_wsync; ++ ++ case XNPIPEIOC_IFLUSH: ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ if ((state->status & XNPIPE_KERN_CONN) == 0) { ++ xnlock_put_irqrestore(&nklock, s); ++ return -EPIPE; ++ } ++ ++ n = xnpipe_flushq(state, inq, free_ibuf, s); ++ ++ kick_wsync: ++ ++ if (n > 0 && (state->status & XNPIPE_USER_WSYNC)) { ++ state->status |= XNPIPE_USER_WSYNC_READY; ++ xnpipe_schedule_request(); ++ } ++ ++ xnlock_put_irqrestore(&nklock, s); ++ ret = n; ++ break; ++ ++ case XNPIPEIOC_SETSIG: ++ ++ if (arg < 1 || arg >= _NSIG) ++ return -EINVAL; ++ ++ xnpipe_asyncsig = arg; ++ break; ++ ++ case FIONREAD: ++ ++ n = (state->status & XNPIPE_KERN_CONN) ? state->ionrd : 0; ++ ++ if (put_user(n, (int *)arg)) ++ return -EFAULT; ++ ++ break; ++ ++ case TCGETS: ++ /* For isatty() probing. */ ++ return -ENOTTY; ++ ++ default: ++ ++ return -EINVAL; ++ } ++ ++ return ret; ++} ++ ++static int xnpipe_fasync(int fd, struct file *file, int on) ++{ ++ struct xnpipe_state *state = file->private_data; ++ int ret, queued; ++ spl_t s; ++ ++ queued = (state->asyncq != NULL); ++ ret = fasync_helper(fd, file, on, &state->asyncq); ++ ++ if (state->asyncq) { ++ if (!queued) { ++ xnlock_get_irqsave(&nklock, s); ++ list_add_tail(&state->alink, &xnpipe_asyncq); ++ xnlock_put_irqrestore(&nklock, s); ++ } ++ } else if (queued) { ++ xnlock_get_irqsave(&nklock, s); ++ list_del(&state->alink); ++ xnlock_put_irqrestore(&nklock, s); ++ } ++ ++ return ret; ++} ++ ++static unsigned xnpipe_poll(struct file *file, poll_table *pt) ++{ ++ struct xnpipe_state *state = file->private_data; ++ unsigned r_mask = 0, w_mask = 0; ++ spl_t s; ++ ++ poll_wait(file, &state->readq, pt); ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ if (state->status & XNPIPE_KERN_CONN) ++ w_mask |= (POLLOUT | POLLWRNORM); ++ else ++ r_mask |= POLLHUP; ++ ++ if (!list_empty(&state->outq)) ++ r_mask |= (POLLIN | POLLRDNORM); ++ else ++ /* ++ * Procs which have issued a timed out poll req will ++ * remain linked to the sleepers queue, and will be ++ * silently unlinked the next time the Xenomai side ++ * kicks xnpipe_wakeup_proc(). ++ */ ++ xnpipe_enqueue_wait(state, XNPIPE_USER_WREAD); ++ ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return r_mask | w_mask; ++} ++ ++static struct file_operations xnpipe_fops = { ++ .read = xnpipe_read, ++ .write = xnpipe_write, ++ .poll = xnpipe_poll, ++ .unlocked_ioctl = xnpipe_ioctl, ++ .open = xnpipe_open, ++ .release = xnpipe_release, ++ .fasync = xnpipe_fasync ++}; ++ ++int xnpipe_mount(void) ++{ ++ struct xnpipe_state *state; ++ struct device *cldev; ++ int i; ++ ++ for (state = &xnpipe_states[0]; ++ state < &xnpipe_states[XNPIPE_NDEVS]; state++) { ++ state->status = 0; ++ state->asyncq = NULL; ++ INIT_LIST_HEAD(&state->inq); ++ state->nrinq = 0; ++ INIT_LIST_HEAD(&state->outq); ++ state->nroutq = 0; ++ } ++ ++ xnpipe_class = class_create(THIS_MODULE, "rtpipe"); ++ if (IS_ERR(xnpipe_class)) { ++ printk(XENO_ERR "error creating rtpipe class, err=%ld\n", ++ PTR_ERR(xnpipe_class)); ++ return -EBUSY; ++ } ++ ++ for (i = 0; i < XNPIPE_NDEVS; i++) { ++ cldev = device_create(xnpipe_class, NULL, ++ MKDEV(XNPIPE_DEV_MAJOR, i), ++ NULL, "rtp%d", i); ++ if (IS_ERR(cldev)) { ++ printk(XENO_ERR ++ "can't add device class, major=%d, minor=%d, err=%ld\n", ++ XNPIPE_DEV_MAJOR, i, PTR_ERR(cldev)); ++ class_destroy(xnpipe_class); ++ return -EBUSY; ++ } ++ } ++ ++ if (register_chrdev(XNPIPE_DEV_MAJOR, "rtpipe", &xnpipe_fops)) { ++ printk(XENO_ERR ++ "unable to reserve major #%d for message pipe support\n", ++ XNPIPE_DEV_MAJOR); ++ return -EBUSY; ++ } ++ ++ xnpipe_wakeup_apc = ++ xnapc_alloc("pipe_wakeup", &xnpipe_wakeup_proc, NULL); ++ ++ return 0; ++} ++ ++void xnpipe_umount(void) ++{ ++ int i; ++ ++ xnapc_free(xnpipe_wakeup_apc); ++ unregister_chrdev(XNPIPE_DEV_MAJOR, "rtpipe"); ++ ++ for (i = 0; i < XNPIPE_NDEVS; i++) ++ device_destroy(xnpipe_class, MKDEV(XNPIPE_DEV_MAJOR, i)); ++ ++ class_destroy(xnpipe_class); ++} +--- linux/kernel/xenomai/arith.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/arith.c 2021-04-07 16:01:25.829636164 +0800 +@@ -0,0 +1,65 @@ ++/* ++ * Copyright © 2005 Gilles Chanteperdrix. ++ * ++ * Xenomai is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#include ++ ++/** ++ * @ingroup cobalt_core ++ * @defgroup cobalt_core_arith In-kernel arithmetics ++ * ++ * A collection of helpers performing arithmetics not implicitly ++ * available from kernel context via GCC helpers. Many of these ++ * routines enable 64bit arithmetics on 32bit systems. Xenomai ++ * architecture ports normally implement the performance critical ones ++ * in hand-crafted assembly code (see ++ * kernel/cobalt/arch/\/include/asm/xenomai/uapi/arith.h). ++ * @{ ++ */ ++ ++/** ++ * Architecture-independent div64 operation with remainder. ++ * ++ * @param a dividend ++ * ++ * @param b divisor ++ * ++ * @param rem if non-NULL, a pointer to a 64bit variable for ++ * collecting the remainder from the division. ++ */ ++unsigned long long xnarch_generic_full_divmod64(unsigned long long a, ++ unsigned long long b, ++ unsigned long long *rem) ++{ ++ unsigned long long q = 0, r = a; ++ int i; ++ ++ for (i = fls(a >> 32) - fls(b >> 32), b <<= i; i >= 0; i--, b >>= 1) { ++ q <<= 1; ++ if (b <= r) { ++ r -= b; ++ q++; ++ } ++ } ++ ++ if (rem) ++ *rem = r; ++ return q; ++} ++EXPORT_SYMBOL_GPL(xnarch_generic_full_divmod64); ++ ++/** @} */ +--- linux/kernel/xenomai/sched-weak.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/sched-weak.c 2021-04-07 16:01:25.824636171 +0800 +@@ -0,0 +1,224 @@ ++/* ++ * Copyright (C) 2013 Philippe Gerum . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#include ++#include ++ ++static void xnsched_weak_init(struct xnsched *sched) ++{ ++ xnsched_initq(&sched->weak.runnable); ++} ++ ++static void xnsched_weak_requeue(struct xnthread *thread) ++{ ++ xnsched_addq(&thread->sched->weak.runnable, thread); ++} ++ ++static void xnsched_weak_enqueue(struct xnthread *thread) ++{ ++ xnsched_addq_tail(&thread->sched->weak.runnable, thread); ++} ++ ++static void xnsched_weak_dequeue(struct xnthread *thread) ++{ ++ xnsched_delq(&thread->sched->weak.runnable, thread); ++} ++ ++static struct xnthread *xnsched_weak_pick(struct xnsched *sched) ++{ ++ return xnsched_getq(&sched->weak.runnable); ++} ++ ++static bool xnsched_weak_setparam(struct xnthread *thread, ++ const union xnsched_policy_param *p) ++{ ++ if (!xnthread_test_state(thread, XNBOOST)) ++ xnthread_set_state(thread, XNWEAK); ++ ++ return xnsched_set_effective_priority(thread, p->weak.prio); ++} ++ ++static void xnsched_weak_getparam(struct xnthread *thread, ++ union xnsched_policy_param *p) ++{ ++ p->weak.prio = thread->cprio; ++} ++ ++static void xnsched_weak_trackprio(struct xnthread *thread, ++ const union xnsched_policy_param *p) ++{ ++ if (p) ++ thread->cprio = p->weak.prio; ++ else ++ thread->cprio = thread->bprio; ++} ++ ++static void xnsched_weak_protectprio(struct xnthread *thread, int prio) ++{ ++ if (prio > XNSCHED_WEAK_MAX_PRIO) ++ prio = XNSCHED_WEAK_MAX_PRIO; ++ ++ thread->cprio = prio; ++} ++ ++static int xnsched_weak_chkparam(struct xnthread *thread, ++ const union xnsched_policy_param *p) ++{ ++ if (p->weak.prio < XNSCHED_WEAK_MIN_PRIO || ++ p->weak.prio > XNSCHED_WEAK_MAX_PRIO) ++ return -EINVAL; ++ ++ return 0; ++} ++ ++#ifdef CONFIG_XENO_OPT_VFILE ++ ++struct xnvfile_directory sched_weak_vfroot; ++ ++struct vfile_sched_weak_priv { ++ struct xnthread *curr; ++}; ++ ++struct vfile_sched_weak_data { ++ int cpu; ++ pid_t pid; ++ char name[XNOBJECT_NAME_LEN]; ++ int cprio; ++}; ++ ++static struct xnvfile_snapshot_ops vfile_sched_weak_ops; ++ ++static struct xnvfile_snapshot vfile_sched_weak = { ++ .privsz = sizeof(struct vfile_sched_weak_priv), ++ .datasz = sizeof(struct vfile_sched_weak_data), ++ .tag = &nkthreadlist_tag, ++ .ops = &vfile_sched_weak_ops, ++}; ++ ++static int vfile_sched_weak_rewind(struct xnvfile_snapshot_iterator *it) ++{ ++ struct vfile_sched_weak_priv *priv = xnvfile_iterator_priv(it); ++ int nrthreads = xnsched_class_weak.nthreads; ++ ++ if (nrthreads == 0) ++ return -ESRCH; ++ ++ priv->curr = list_first_entry(&nkthreadq, struct xnthread, glink); ++ ++ return nrthreads; ++} ++ ++static int vfile_sched_weak_next(struct xnvfile_snapshot_iterator *it, ++ void *data) ++{ ++ struct vfile_sched_weak_priv *priv = xnvfile_iterator_priv(it); ++ struct vfile_sched_weak_data *p = data; ++ struct xnthread *thread; ++ ++ if (priv->curr == NULL) ++ return 0; /* All done. */ ++ ++ thread = priv->curr; ++ if (list_is_last(&thread->glink, &nkthreadq)) ++ priv->curr = NULL; ++ else ++ priv->curr = list_next_entry(thread, glink); ++ ++ if (thread->base_class != &xnsched_class_weak) ++ return VFILE_SEQ_SKIP; ++ ++ p->cpu = xnsched_cpu(thread->sched); ++ p->pid = xnthread_host_pid(thread); ++ memcpy(p->name, thread->name, sizeof(p->name)); ++ p->cprio = thread->cprio; ++ ++ return 1; ++} ++ ++static int vfile_sched_weak_show(struct xnvfile_snapshot_iterator *it, ++ void *data) ++{ ++ struct vfile_sched_weak_data *p = data; ++ char pribuf[16]; ++ ++ if (p == NULL) ++ xnvfile_printf(it, "%-3s %-6s %-4s %s\n", ++ "CPU", "PID", "PRI", "NAME"); ++ else { ++ ksformat(pribuf, sizeof(pribuf), "%3d", p->cprio); ++ xnvfile_printf(it, "%3u %-6d %-4s %s\n", ++ p->cpu, ++ p->pid, ++ pribuf, ++ p->name); ++ } ++ ++ return 0; ++} ++ ++static struct xnvfile_snapshot_ops vfile_sched_weak_ops = { ++ .rewind = vfile_sched_weak_rewind, ++ .next = vfile_sched_weak_next, ++ .show = vfile_sched_weak_show, ++}; ++ ++static int xnsched_weak_init_vfile(struct xnsched_class *schedclass, ++ struct xnvfile_directory *vfroot) ++{ ++ int ret; ++ ++ ret = xnvfile_init_dir(schedclass->name, &sched_weak_vfroot, vfroot); ++ if (ret) ++ return ret; ++ ++ return xnvfile_init_snapshot("threads", &vfile_sched_weak, ++ &sched_weak_vfroot); ++} ++ ++static void xnsched_weak_cleanup_vfile(struct xnsched_class *schedclass) ++{ ++ xnvfile_destroy_snapshot(&vfile_sched_weak); ++ xnvfile_destroy_dir(&sched_weak_vfroot); ++} ++ ++#endif /* CONFIG_XENO_OPT_VFILE */ ++ ++struct xnsched_class xnsched_class_weak = { ++ .sched_init = xnsched_weak_init, ++ .sched_enqueue = xnsched_weak_enqueue, ++ .sched_dequeue = xnsched_weak_dequeue, ++ .sched_requeue = xnsched_weak_requeue, ++ .sched_pick = xnsched_weak_pick, ++ .sched_tick = NULL, ++ .sched_rotate = NULL, ++ .sched_forget = NULL, ++ .sched_kick = NULL, ++ .sched_chkparam = xnsched_weak_chkparam, ++ .sched_setparam = xnsched_weak_setparam, ++ .sched_trackprio = xnsched_weak_trackprio, ++ .sched_protectprio = xnsched_weak_protectprio, ++ .sched_getparam = xnsched_weak_getparam, ++#ifdef CONFIG_XENO_OPT_VFILE ++ .sched_init_vfile = xnsched_weak_init_vfile, ++ .sched_cleanup_vfile = xnsched_weak_cleanup_vfile, ++#endif ++ .weight = XNSCHED_CLASS_WEIGHT(1), ++ .policy = SCHED_WEAK, ++ .name = "weak" ++}; ++EXPORT_SYMBOL_GPL(xnsched_class_weak); +--- linux/kernel/xenomai/synch.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/synch.c 2021-04-07 16:01:25.819636178 +0800 +@@ -0,0 +1,1185 @@ ++/* ++ * Copyright (C) 2001-2008 Philippe Gerum . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#define PP_CEILING_MASK 0xff ++ ++static inline int get_ceiling_value(struct xnsynch *synch) ++{ ++ /* ++ * The ceiling priority value is stored in user-writable ++ * memory, make sure to constrain it within valid bounds for ++ * xnsched_class_rt before using it. ++ */ ++ return *synch->ceiling_ref & PP_CEILING_MASK ?: 1; ++} ++ ++struct xnsynch *lookup_lazy_pp(xnhandle_t handle); ++ ++/** ++ * @ingroup cobalt_core ++ * @defgroup cobalt_core_synch Thread synchronization services ++ * @{ ++ */ ++ ++/** ++ * @brief Initialize a synchronization object. ++ * ++ * Initializes a synchronization object. Xenomai threads can wait on ++ * and signal such objects for serializing access to resources. ++ * This object has built-in support for priority inheritance. ++ * ++ * @param synch The address of a synchronization object descriptor ++ * Cobalt will use to store the object-specific data. This descriptor ++ * must always be valid while the object is active therefore it must ++ * be allocated in permanent memory. ++ * ++ * @param flags A set of creation flags affecting the operation. The ++ * valid flags are: ++ * ++ * - XNSYNCH_PRIO causes the threads waiting for the resource to pend ++ * in priority order. Otherwise, FIFO ordering is used (XNSYNCH_FIFO). ++ * ++ * - XNSYNCH_OWNER indicates that the synchronization object shall ++ * track the resource ownership, allowing a single owner at most at ++ * any point in time. Note that setting this flag implies the use of ++ * xnsynch_acquire() and xnsynch_release() instead of ++ * xnsynch_sleep_on() and xnsynch_wakeup_*(). ++ * ++ * - XNSYNCH_PI enables priority inheritance when a priority inversion ++ * is detected among threads using this object. XNSYNCH_PI implies ++ * XNSYNCH_OWNER and XNSYNCH_PRIO. ++ * ++ * - XNSYNCH_PP enables priority protect to prevent priority inversion. ++ * XNSYNCH_PP implies XNSYNCH_OWNER and XNSYNCH_PRIO. ++ * ++ * - XNSYNCH_DREORD (Disable REORDering) tells Cobalt not to reorder ++ * the wait list upon priority change of a waiter. Reordering is the ++ * default. Only applies when XNSYNCH_PRIO is present. ++ * ++ * @param fastlock Address of the fast lock word to be associated with ++ * a synchronization object with ownership tracking. Therefore, a ++ * valid fast-lock address is required if XNSYNCH_OWNER is set in @a ++ * flags. ++ * ++ * @coretags{task-unrestricted} ++ */ ++void xnsynch_init(struct xnsynch *synch, int flags, atomic_t *fastlock) ++{ ++ if (flags & (XNSYNCH_PI|XNSYNCH_PP)) ++ flags |= XNSYNCH_PRIO | XNSYNCH_OWNER; /* Obviously... */ ++ ++ synch->status = flags & ~XNSYNCH_CLAIMED; ++ synch->owner = NULL; ++ synch->cleanup = NULL; /* for PI/PP only. */ ++ synch->wprio = -1; ++ synch->ceiling_ref = NULL; ++ INIT_LIST_HEAD(&synch->pendq); ++ ++ if (flags & XNSYNCH_OWNER) { ++ BUG_ON(fastlock == NULL); ++ synch->fastlock = fastlock; ++ atomic_set(fastlock, XN_NO_HANDLE); ++ } else ++ synch->fastlock = NULL; ++} ++EXPORT_SYMBOL_GPL(xnsynch_init); ++ ++/** ++ * @brief Initialize a synchronization object enforcing PP. ++ * ++ * This call is a variant of xnsynch_init() for initializing ++ * synchronization objects enabling the priority protect protocol. ++ * ++ * @param synch The address of a synchronization object descriptor ++ * Cobalt will use to store the object-specific data. See ++ * xnsynch_init(). ++ * ++ * @param flags A set of creation flags affecting the operation. See ++ * xnsynch_init(). XNSYNCH_PI is mutually exclusive with XNSYNCH_PP, ++ * and won't be considered. ++ * ++ * @param fastlock Address of the fast lock word to be associated with ++ * a synchronization object with ownership tracking. See xnsynch_init(). ++ * ++ * @param ceiling_ref The address of the variable holding the current ++ * priority ceiling value for this object. ++ * ++ * @coretags{task-unrestricted} ++ */ ++void xnsynch_init_protect(struct xnsynch *synch, int flags, ++ atomic_t *fastlock, u32 *ceiling_ref) ++{ ++ xnsynch_init(synch, (flags & ~XNSYNCH_PI) | XNSYNCH_PP, fastlock); ++ synch->ceiling_ref = ceiling_ref; ++} ++ ++/** ++ * @fn void xnsynch_destroy(struct xnsynch *synch) ++ * @brief Destroy a synchronization object. ++ * ++ * Destroys the synchronization object @a synch, unblocking all ++ * waiters with the XNRMID status. ++ * ++ * @return XNSYNCH_RESCHED is returned if at least one thread is ++ * unblocked, which means the caller should invoke xnsched_run() for ++ * applying the new scheduling state. Otherwise, XNSYNCH_DONE is ++ * returned. ++ ++ * @sideeffect Same as xnsynch_flush(). ++ * ++ * @coretags{task-unrestricted} ++ */ ++int xnsynch_destroy(struct xnsynch *synch) ++{ ++ int ret; ++ ++ ret = xnsynch_flush(synch, XNRMID); ++ XENO_BUG_ON(COBALT, synch->status & XNSYNCH_CLAIMED); ++ ++ return ret; ++} ++EXPORT_SYMBOL_GPL(xnsynch_destroy); ++ ++/** ++ * @fn int xnsynch_sleep_on(struct xnsynch *synch, xnticks_t timeout, xntmode_t timeout_mode); ++ * @brief Sleep on an ownerless synchronization object. ++ * ++ * Makes the calling thread sleep on the specified synchronization ++ * object, waiting for it to be signaled. ++ * ++ * This service should be called by upper interfaces wanting the ++ * current thread to pend on the given resource. It must not be used ++ * with synchronization objects that are supposed to track ownership ++ * (XNSYNCH_OWNER). ++ * ++ * @param synch The descriptor address of the synchronization object ++ * to sleep on. ++ * ++ * @param timeout The timeout which may be used to limit the time the ++ * thread pends on the resource. This value is a wait time given as a ++ * count of nanoseconds. It can either be relative, absolute ++ * monotonic, or absolute adjustable depending on @a ++ * timeout_mode. Passing XN_INFINITE @b and setting @a mode to ++ * XN_RELATIVE specifies an unbounded wait. All other values are used ++ * to initialize a watchdog timer. ++ * ++ * @param timeout_mode The mode of the @a timeout parameter. It can ++ * either be set to XN_RELATIVE, XN_ABSOLUTE, or XN_REALTIME (see also ++ * xntimer_start()). ++ * ++ * @return A bitmask which may include zero or one information bit ++ * among XNRMID, XNTIMEO and XNBREAK, which should be tested by the ++ * caller, for detecting respectively: object deletion, timeout or ++ * signal/unblock conditions which might have happened while waiting. ++ * ++ * @coretags{primary-only, might-switch} ++ */ ++int xnsynch_sleep_on(struct xnsynch *synch, xnticks_t timeout, ++ xntmode_t timeout_mode) ++{ ++ struct xnthread *thread; ++ spl_t s; ++ ++ primary_mode_only(); ++ ++ XENO_BUG_ON(COBALT, synch->status & XNSYNCH_OWNER); ++ ++ thread = xnthread_current(); ++ ++ if (IS_ENABLED(CONFIG_XENO_OPT_DEBUG_MUTEX_SLEEP) && ++ thread->res_count > 0 && ++ xnthread_test_state(thread, XNWARN)) ++ xnthread_signal(thread, SIGDEBUG, SIGDEBUG_MUTEX_SLEEP); ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ trace_cobalt_synch_sleepon(synch); ++ ++ if ((synch->status & XNSYNCH_PRIO) == 0) /* i.e. FIFO */ ++ list_add_tail(&thread->plink, &synch->pendq); ++ else /* i.e. priority-sorted */ ++ list_add_priff(thread, &synch->pendq, wprio, plink); ++ ++ xnthread_suspend(thread, XNPEND, timeout, timeout_mode, synch); ++ ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return xnthread_test_info(thread, XNRMID|XNTIMEO|XNBREAK); ++} ++EXPORT_SYMBOL_GPL(xnsynch_sleep_on); ++ ++/** ++ * @fn struct xnthread *xnsynch_wakeup_one_sleeper(struct xnsynch *synch); ++ * @brief Unblock the heading thread from wait. ++ * ++ * This service wakes up the thread which is currently leading the ++ * synchronization object's pending list. The sleeping thread is ++ * unblocked from its pending state, but no reschedule is performed. ++ * ++ * This service should be called by upper interfaces wanting to signal ++ * the given resource so that a single waiter is resumed. It must not ++ * be used with synchronization objects that are supposed to track ++ * ownership (XNSYNCH_OWNER not set). ++ * ++ * @param synch The descriptor address of the synchronization object ++ * whose ownership is changed. ++ * ++ * @return The descriptor address of the unblocked thread. ++ * ++ * @coretags{unrestricted} ++ */ ++struct xnthread *xnsynch_wakeup_one_sleeper(struct xnsynch *synch) ++{ ++ struct xnthread *thread; ++ spl_t s; ++ ++ XENO_BUG_ON(COBALT, synch->status & XNSYNCH_OWNER); ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ if (list_empty(&synch->pendq)) { ++ thread = NULL; ++ goto out; ++ } ++ ++ trace_cobalt_synch_wakeup(synch); ++ thread = list_first_entry(&synch->pendq, struct xnthread, plink); ++ list_del(&thread->plink); ++ thread->wchan = NULL; ++ xnthread_resume(thread, XNPEND); ++out: ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return thread; ++} ++EXPORT_SYMBOL_GPL(xnsynch_wakeup_one_sleeper); ++ ++int xnsynch_wakeup_many_sleepers(struct xnsynch *synch, int nr) ++{ ++ struct xnthread *thread, *tmp; ++ int nwakeups = 0; ++ spl_t s; ++ ++ XENO_BUG_ON(COBALT, synch->status & XNSYNCH_OWNER); ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ if (list_empty(&synch->pendq)) ++ goto out; ++ ++ trace_cobalt_synch_wakeup_many(synch); ++ ++ list_for_each_entry_safe(thread, tmp, &synch->pendq, plink) { ++ if (nwakeups++ >= nr) ++ break; ++ list_del(&thread->plink); ++ thread->wchan = NULL; ++ xnthread_resume(thread, XNPEND); ++ } ++out: ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return nwakeups; ++} ++EXPORT_SYMBOL_GPL(xnsynch_wakeup_many_sleepers); ++ ++/** ++ * @fn void xnsynch_wakeup_this_sleeper(struct xnsynch *synch, struct xnthread *sleeper); ++ * @brief Unblock a particular thread from wait. ++ * ++ * This service wakes up a specific thread which is currently pending on ++ * the given synchronization object. The sleeping thread is unblocked ++ * from its pending state, but no reschedule is performed. ++ * ++ * This service should be called by upper interfaces wanting to signal ++ * the given resource so that a specific waiter is resumed. It must not ++ * be used with synchronization objects that are supposed to track ++ * ownership (XNSYNCH_OWNER not set). ++ * ++ * @param synch The descriptor address of the synchronization object ++ * whose ownership is changed. ++ * ++ * @param sleeper The thread to unblock which MUST be currently linked ++ * to the synchronization object's pending queue (i.e. synch->pendq). ++ * ++ * @coretags{unrestricted} ++ */ ++void xnsynch_wakeup_this_sleeper(struct xnsynch *synch, struct xnthread *sleeper) ++{ ++ spl_t s; ++ ++ XENO_BUG_ON(COBALT, synch->status & XNSYNCH_OWNER); ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ trace_cobalt_synch_wakeup(synch); ++ list_del(&sleeper->plink); ++ sleeper->wchan = NULL; ++ xnthread_resume(sleeper, XNPEND); ++ ++ xnlock_put_irqrestore(&nklock, s); ++} ++EXPORT_SYMBOL_GPL(xnsynch_wakeup_this_sleeper); ++ ++static inline void raise_boost_flag(struct xnthread *owner) ++{ ++ /* Backup the base priority at first boost only. */ ++ if (!xnthread_test_state(owner, XNBOOST)) { ++ owner->bprio = owner->cprio; ++ xnthread_set_state(owner, XNBOOST); ++ } ++} ++ ++static void inherit_thread_priority(struct xnthread *owner, ++ struct xnthread *target) ++{ ++ if (xnthread_test_state(owner, XNZOMBIE)) ++ return; ++ ++ /* Apply the scheduling policy of "target" to "thread" */ ++ xnsched_track_policy(owner, target); ++ ++ /* ++ * Owner may be sleeping, propagate priority update through ++ * the PI chain if needed. ++ */ ++ if (owner->wchan) ++ xnsynch_requeue_sleeper(owner); ++} ++ ++static void __ceil_owner_priority(struct xnthread *owner, int prio) ++{ ++ if (xnthread_test_state(owner, XNZOMBIE)) ++ return; ++ /* ++ * Raise owner priority to the ceiling value, this implicitly ++ * selects SCHED_FIFO for the owner. ++ */ ++ xnsched_protect_priority(owner, prio); ++ ++ if (owner->wchan) ++ xnsynch_requeue_sleeper(owner); ++} ++ ++static void adjust_boost(struct xnthread *owner, struct xnthread *target) ++{ ++ struct xnsynch *synch; ++ ++ /* ++ * CAUTION: we may have PI and PP-enabled objects among the ++ * boosters, considering the leader of synch->pendq is ++ * therefore NOT enough for determining the next boost ++ * priority, since PP is tracked on acquisition, not on ++ * contention. Check the head of the booster list instead. ++ */ ++ synch = list_first_entry(&owner->boosters, struct xnsynch, next); ++ if (synch->wprio == owner->wprio) ++ return; ++ ++ if (synch->status & XNSYNCH_PP) ++ __ceil_owner_priority(owner, get_ceiling_value(synch)); ++ else { ++ XENO_BUG_ON(COBALT, list_empty(&synch->pendq)); ++ if (target == NULL) ++ target = list_first_entry(&synch->pendq, ++ struct xnthread, plink); ++ inherit_thread_priority(owner, target); ++ } ++} ++ ++static void ceil_owner_priority(struct xnsynch *synch) ++{ ++ struct xnthread *owner = synch->owner; ++ int wprio; ++ ++ /* PP ceiling values are implicitly based on the RT class. */ ++ wprio = xnsched_calc_wprio(&xnsched_class_rt, ++ get_ceiling_value(synch)); ++ synch->wprio = wprio; ++ list_add_priff(synch, &owner->boosters, wprio, next); ++ raise_boost_flag(owner); ++ synch->status |= XNSYNCH_CEILING; ++ ++ /* ++ * If the ceiling value is lower than the current effective ++ * priority, we must not adjust the latter. BEWARE: not only ++ * this restriction is required to keep the PP logic right, ++ * but this is also a basic assumption made by all ++ * xnthread_commit_ceiling() callers which won't check for any ++ * rescheduling opportunity upon return. ++ * ++ * However we do want the object to be linked to the booster ++ * list, and XNBOOST must appear in the current thread status. ++ * ++ * This way, setparam() won't be allowed to decrease the ++ * current weighted priority below the ceiling value, until we ++ * eventually release this object. ++ */ ++ if (wprio > owner->wprio) ++ adjust_boost(owner, NULL); ++} ++ ++static inline ++void track_owner(struct xnsynch *synch, struct xnthread *owner) ++{ ++ synch->owner = owner; ++} ++ ++static inline /* nklock held, irqs off */ ++void set_current_owner_locked(struct xnsynch *synch, struct xnthread *owner) ++{ ++ /* ++ * Update the owner information, and apply priority protection ++ * for PP objects. We may only get there if owner is current, ++ * or blocked. ++ */ ++ track_owner(synch, owner); ++ if (synch->status & XNSYNCH_PP) ++ ceil_owner_priority(synch); ++} ++ ++static inline ++void set_current_owner(struct xnsynch *synch, struct xnthread *owner) ++{ ++ spl_t s; ++ ++ track_owner(synch, owner); ++ if (synch->status & XNSYNCH_PP) { ++ xnlock_get_irqsave(&nklock, s); ++ ceil_owner_priority(synch); ++ xnlock_put_irqrestore(&nklock, s); ++ } ++} ++ ++static inline ++xnhandle_t get_owner_handle(xnhandle_t ownerh, struct xnsynch *synch) ++{ ++ /* ++ * On acquisition from kernel space, the fast lock handle ++ * should bear the FLCEIL bit for PP objects, so that userland ++ * takes the slow path on release, jumping to the kernel for ++ * dropping the ceiling priority boost. ++ */ ++ if (synch->status & XNSYNCH_PP) ++ ownerh = xnsynch_fast_ceiling(ownerh); ++ ++ return ownerh; ++} ++ ++static void commit_ceiling(struct xnsynch *synch, struct xnthread *curr) ++{ ++ xnhandle_t oldh, h; ++ atomic_t *lockp; ++ ++ track_owner(synch, curr); ++ ceil_owner_priority(synch); ++ /* ++ * Raise FLCEIL, which indicates a kernel entry will be ++ * required for releasing this resource. ++ */ ++ lockp = xnsynch_fastlock(synch); ++ do { ++ h = atomic_read(lockp); ++ oldh = atomic_cmpxchg(lockp, h, xnsynch_fast_ceiling(h)); ++ } while (oldh != h); ++} ++ ++void xnsynch_commit_ceiling(struct xnthread *curr) /* nklock held, irqs off */ ++{ ++ struct xnsynch *synch; ++ atomic_t *lockp; ++ ++ /* curr->u_window has to be valid, curr bears XNUSER. */ ++ synch = lookup_lazy_pp(curr->u_window->pp_pending); ++ if (synch == NULL) { ++ /* ++ * If pp_pending is a bad handle, don't panic but ++ * rather ignore: we don't want a misbehaving userland ++ * to crash the kernel. ++ */ ++ XENO_WARN_ON_ONCE(USER, 1); ++ goto out; ++ } ++ ++ /* ++ * For PP locks, userland does, in that order: ++ * ++ * -- LOCK ++ * 1. curr->u_window->pp_pending = lock_handle ++ * barrier(); ++ * 2. atomic_cmpxchg(lockp, XN_NO_HANDLE, curr->handle); ++ * ++ * -- UNLOCK ++ * 1. atomic_cmpxchg(lockp, curr->handle, XN_NO_HANDLE); [unclaimed] ++ * barrier(); ++ * 2. curr->u_window->pp_pending = XN_NO_HANDLE ++ * ++ * Make sure we have not been caught in a rescheduling in ++ * between those steps. If we did, then we won't be holding ++ * the lock as we schedule away, therefore no priority update ++ * must take place. ++ */ ++ lockp = xnsynch_fastlock(synch); ++ if (xnsynch_fast_owner_check(lockp, curr->handle)) ++ return; ++ ++ /* ++ * In rare cases, we could be called multiple times for ++ * committing a lazy ceiling for the same object, e.g. if ++ * userland is preempted in the middle of a recursive locking ++ * sequence. ++ * ++ * This stems from the fact that userland has to update ++ * ->pp_pending prior to trying to grab the lock atomically, ++ * at which point it can figure out whether a recursive ++ * locking happened. We get out of this trap by testing the ++ * XNSYNCH_CEILING flag. ++ */ ++ if ((synch->status & XNSYNCH_CEILING) == 0) ++ commit_ceiling(synch, curr); ++out: ++ curr->u_window->pp_pending = XN_NO_HANDLE; ++} ++ ++/** ++ * @fn int xnsynch_try_acquire(struct xnsynch *synch); ++ * @brief Try acquiring the ownership of a synchronization object. ++ * ++ * This service should be called by upper interfaces wanting the ++ * current thread to acquire the ownership of the given resource. If ++ * the resource is already assigned to another thread, the call ++ * returns with an error code. ++ * ++ * This service must be used only with synchronization objects that ++ * track ownership (XNSYNCH_OWNER set. ++ * ++ * @param synch The descriptor address of the synchronization object ++ * to acquire. ++ * ++ * @return Zero is returned if @a synch has been successfully ++ * acquired. Otherwise: ++ * ++ * - -EDEADLK is returned if @a synch is currently held by the calling ++ * thread. ++ * ++ * - -EBUSY is returned if @a synch is currently held by another ++ * thread. ++ * ++ * @coretags{primary-only} ++ */ ++int xnsynch_try_acquire(struct xnsynch *synch) ++{ ++ struct xnthread *curr; ++ atomic_t *lockp; ++ xnhandle_t h; ++ ++ primary_mode_only(); ++ ++ XENO_BUG_ON(COBALT, (synch->status & XNSYNCH_OWNER) == 0); ++ ++ curr = xnthread_current(); ++ lockp = xnsynch_fastlock(synch); ++ trace_cobalt_synch_try_acquire(synch); ++ ++ h = atomic_cmpxchg(lockp, XN_NO_HANDLE, ++ get_owner_handle(curr->handle, synch)); ++ if (h != XN_NO_HANDLE) ++ return xnhandle_get_id(h) == curr->handle ? ++ -EDEADLK : -EBUSY; ++ ++ set_current_owner(synch, curr); ++ xnthread_get_resource(curr); ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(xnsynch_try_acquire); ++ ++/** ++ * @fn int xnsynch_acquire(struct xnsynch *synch, xnticks_t timeout, xntmode_t timeout_mode); ++ * @brief Acquire the ownership of a synchronization object. ++ * ++ * This service should be called by upper interfaces wanting the ++ * current thread to acquire the ownership of the given resource. If ++ * the resource is already assigned to another thread, the caller is ++ * suspended. ++ * ++ * This service must be used only with synchronization objects that ++ * track ownership (XNSYNCH_OWNER set. ++ * ++ * @param synch The descriptor address of the synchronization object ++ * to acquire. ++ * ++ * @param timeout The timeout which may be used to limit the time the ++ * thread pends on the resource. This value is a wait time given as a ++ * count of nanoseconds. It can either be relative, absolute ++ * monotonic, or absolute adjustable depending on @a ++ * timeout_mode. Passing XN_INFINITE @b and setting @a mode to ++ * XN_RELATIVE specifies an unbounded wait. All other values are used ++ * to initialize a watchdog timer. ++ * ++ * @param timeout_mode The mode of the @a timeout parameter. It can ++ * either be set to XN_RELATIVE, XN_ABSOLUTE, or XN_REALTIME (see also ++ * xntimer_start()). ++ * ++ * @return A bitmask which may include zero or one information bit ++ * among XNRMID, XNTIMEO and XNBREAK, which should be tested by the ++ * caller, for detecting respectively: object deletion, timeout or ++ * signal/unblock conditions which might have happened while waiting. ++ * ++ * @coretags{primary-only, might-switch} ++ * ++ * @note Unlike xnsynch_try_acquire(), this call does NOT check for ++ * invalid recursive locking request, which means that such request ++ * will always cause a deadlock for the caller. ++ */ ++int xnsynch_acquire(struct xnsynch *synch, xnticks_t timeout, ++ xntmode_t timeout_mode) ++{ ++ struct xnthread *curr, *owner; ++ xnhandle_t currh, h, oldh; ++ atomic_t *lockp; ++ spl_t s; ++ ++ primary_mode_only(); ++ ++ XENO_BUG_ON(COBALT, (synch->status & XNSYNCH_OWNER) == 0); ++ ++ curr = xnthread_current(); ++ currh = curr->handle; ++ lockp = xnsynch_fastlock(synch); ++ trace_cobalt_synch_acquire(synch); ++redo: ++ /* Basic form of xnsynch_try_acquire(). */ ++ h = atomic_cmpxchg(lockp, XN_NO_HANDLE, ++ get_owner_handle(currh, synch)); ++ if (likely(h == XN_NO_HANDLE)) { ++ set_current_owner(synch, curr); ++ xnthread_get_resource(curr); ++ return 0; ++ } ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ /* ++ * Set claimed bit. In case it appears to be set already, ++ * re-read its state under nklock so that we don't miss any ++ * change between the lock-less read and here. But also try to ++ * avoid cmpxchg where possible. Only if it appears not to be ++ * set, start with cmpxchg directly. ++ */ ++ if (xnsynch_fast_is_claimed(h)) { ++ oldh = atomic_read(lockp); ++ goto test_no_owner; ++ } ++ ++ do { ++ oldh = atomic_cmpxchg(lockp, h, xnsynch_fast_claimed(h)); ++ if (likely(oldh == h)) ++ break; ++ test_no_owner: ++ if (oldh == XN_NO_HANDLE) { ++ /* Mutex released from another cpu. */ ++ xnlock_put_irqrestore(&nklock, s); ++ goto redo; ++ } ++ h = oldh; ++ } while (!xnsynch_fast_is_claimed(h)); ++ ++ owner = xnthread_lookup(h); ++ if (owner == NULL) { ++ /* ++ * The handle is broken, therefore pretend that the ++ * synch object was deleted to signal an error. ++ */ ++ xnthread_set_info(curr, XNRMID); ++ goto out; ++ } ++ ++ /* ++ * This is the contended path. We just detected an earlier ++ * syscall-less fast locking from userland, fix up the ++ * in-kernel state information accordingly. ++ * ++ * The consistency of the state information is guaranteed, ++ * because we just raised the claim bit atomically for this ++ * contended lock, therefore userland will have to jump to the ++ * kernel when releasing it, instead of doing a fast ++ * unlock. Since we currently own the superlock, consistency ++ * wrt transfer_ownership() is guaranteed through ++ * serialization. ++ * ++ * CAUTION: in this particular case, the only assumptions we ++ * can safely make is that *owner is valid but not current on ++ * this CPU. ++ */ ++ track_owner(synch, owner); ++ xnsynch_detect_relaxed_owner(synch, curr); ++ ++ if ((synch->status & XNSYNCH_PRIO) == 0) { /* i.e. FIFO */ ++ list_add_tail(&curr->plink, &synch->pendq); ++ goto block; ++ } ++ ++ if (curr->wprio > owner->wprio) { ++ if (xnthread_test_info(owner, XNWAKEN) && owner->wwake == synch) { ++ /* Ownership is still pending, steal the resource. */ ++ set_current_owner_locked(synch, curr); ++ xnthread_clear_info(curr, XNRMID | XNTIMEO | XNBREAK); ++ xnthread_set_info(owner, XNROBBED); ++ goto grab; ++ } ++ ++ list_add_priff(curr, &synch->pendq, wprio, plink); ++ ++ if (synch->status & XNSYNCH_PI) { ++ raise_boost_flag(owner); ++ ++ if (synch->status & XNSYNCH_CLAIMED) ++ list_del(&synch->next); /* owner->boosters */ ++ else ++ synch->status |= XNSYNCH_CLAIMED; ++ ++ synch->wprio = curr->wprio; ++ list_add_priff(synch, &owner->boosters, wprio, next); ++ /* ++ * curr->wprio > owner->wprio implies that ++ * synch must be leading the booster list ++ * after insertion, so we may call ++ * inherit_thread_priority() for tracking ++ * current's priority directly without going ++ * through adjust_boost(). ++ */ ++ inherit_thread_priority(owner, curr); ++ } ++ } else ++ list_add_priff(curr, &synch->pendq, wprio, plink); ++block: ++ xnthread_suspend(curr, XNPEND, timeout, timeout_mode, synch); ++ curr->wwake = NULL; ++ xnthread_clear_info(curr, XNWAKEN); ++ ++ if (xnthread_test_info(curr, XNRMID | XNTIMEO | XNBREAK)) ++ goto out; ++ ++ if (xnthread_test_info(curr, XNROBBED)) { ++ /* ++ * Somebody stole us the ownership while we were ready ++ * to run, waiting for the CPU: we need to wait again ++ * for the resource. ++ */ ++ if (timeout_mode != XN_RELATIVE || timeout == XN_INFINITE) { ++ xnlock_put_irqrestore(&nklock, s); ++ goto redo; ++ } ++ timeout = xntimer_get_timeout_stopped(&curr->rtimer); ++ if (timeout > 1) { /* Otherwise, it's too late. */ ++ xnlock_put_irqrestore(&nklock, s); ++ goto redo; ++ } ++ xnthread_set_info(curr, XNTIMEO); ++ goto out; ++ } ++grab: ++ xnthread_get_resource(curr); ++ ++ if (xnsynch_pended_p(synch)) ++ currh = xnsynch_fast_claimed(currh); ++ ++ /* Set new ownership for this object. */ ++ atomic_set(lockp, get_owner_handle(currh, synch)); ++out: ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return xnthread_test_info(curr, XNRMID|XNTIMEO|XNBREAK); ++} ++EXPORT_SYMBOL_GPL(xnsynch_acquire); ++ ++static void drop_booster(struct xnsynch *synch, struct xnthread *owner) ++{ ++ list_del(&synch->next); /* owner->boosters */ ++ ++ if (list_empty(&owner->boosters)) { ++ xnthread_clear_state(owner, XNBOOST); ++ inherit_thread_priority(owner, owner); ++ } else ++ adjust_boost(owner, NULL); ++} ++ ++static inline void clear_pi_boost(struct xnsynch *synch, ++ struct xnthread *owner) ++{ /* nklock held, irqs off */ ++ synch->status &= ~XNSYNCH_CLAIMED; ++ drop_booster(synch, owner); ++} ++ ++static inline void clear_pp_boost(struct xnsynch *synch, ++ struct xnthread *owner) ++{ /* nklock held, irqs off */ ++ synch->status &= ~XNSYNCH_CEILING; ++ drop_booster(synch, owner); ++} ++ ++static bool transfer_ownership(struct xnsynch *synch, ++ struct xnthread *lastowner) ++{ /* nklock held, irqs off */ ++ struct xnthread *nextowner; ++ xnhandle_t nextownerh; ++ atomic_t *lockp; ++ ++ lockp = xnsynch_fastlock(synch); ++ ++ /* ++ * Our caller checked for contention locklessly, so we do have ++ * to check again under lock in a different way. ++ */ ++ if (list_empty(&synch->pendq)) { ++ synch->owner = NULL; ++ atomic_set(lockp, XN_NO_HANDLE); ++ return false; ++ } ++ ++ nextowner = list_first_entry(&synch->pendq, struct xnthread, plink); ++ list_del(&nextowner->plink); ++ nextowner->wchan = NULL; ++ nextowner->wwake = synch; ++ set_current_owner_locked(synch, nextowner); ++ xnthread_set_info(nextowner, XNWAKEN); ++ xnthread_resume(nextowner, XNPEND); ++ ++ if (synch->status & XNSYNCH_CLAIMED) ++ clear_pi_boost(synch, lastowner); ++ ++ nextownerh = get_owner_handle(nextowner->handle, synch); ++ if (xnsynch_pended_p(synch)) ++ nextownerh = xnsynch_fast_claimed(nextownerh); ++ ++ atomic_set(lockp, nextownerh); ++ ++ return true; ++} ++ ++/** ++ * @fn bool xnsynch_release(struct xnsynch *synch, struct xnthread *curr) ++ * @brief Release a resource and pass it to the next waiting thread. ++ * ++ * This service releases the ownership of the given synchronization ++ * object. The thread which is currently leading the object's pending ++ * list, if any, is unblocked from its pending state. However, no ++ * reschedule is performed. ++ * ++ * This service must be used only with synchronization objects that ++ * track ownership (XNSYNCH_OWNER set). ++ * ++ * @param synch The descriptor address of the synchronization object ++ * whose ownership is changed. ++ * ++ * @param curr The descriptor address of the current thread, which ++ * must own the object at the time of calling. ++ * ++ * @return True if a reschedule is required. ++ * ++ * @sideeffect ++ * ++ * - The effective priority of the previous resource owner might be ++ * lowered to its base priority value as a consequence of the priority ++ * boost being cleared. ++ * ++ * - The synchronization object ownership is transfered to the ++ * unblocked thread. ++ * ++ * @coretags{primary-only, might-switch} ++ */ ++bool xnsynch_release(struct xnsynch *synch, struct xnthread *curr) ++{ ++ bool need_resched = false; ++ xnhandle_t currh, h; ++ atomic_t *lockp; ++ spl_t s; ++ ++ XENO_BUG_ON(COBALT, (synch->status & XNSYNCH_OWNER) == 0); ++ ++ trace_cobalt_synch_release(synch); ++ ++ if (xnthread_put_resource(curr)) ++ return false; ++ ++ lockp = xnsynch_fastlock(synch); ++ currh = curr->handle; ++ /* ++ * FLCEIL may only be raised by the owner, or when the owner ++ * is blocked waiting for the synch (ownership transfer). In ++ * addition, only the current owner of a synch may release it, ++ * therefore we can't race while testing FLCEIL locklessly. ++ * All updates to FLCLAIM are covered by the superlock. ++ * ++ * Therefore, clearing the fastlock racelessly in this routine ++ * without leaking FLCEIL/FLCLAIM updates can be achieved by ++ * holding the superlock. ++ */ ++ xnlock_get_irqsave(&nklock, s); ++ ++ if (synch->status & XNSYNCH_CEILING) { ++ clear_pp_boost(synch, curr); ++ need_resched = true; ++ } ++ ++ h = atomic_cmpxchg(lockp, currh, XN_NO_HANDLE); ++ if ((h & ~XNSYNCH_FLCEIL) != currh) ++ /* FLCLAIM set, synch is contended. */ ++ need_resched = transfer_ownership(synch, curr); ++ else if (h != currh) /* FLCEIL set, FLCLAIM clear. */ ++ atomic_set(lockp, XN_NO_HANDLE); ++ ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return need_resched; ++} ++EXPORT_SYMBOL_GPL(xnsynch_release); ++ ++void xnsynch_requeue_sleeper(struct xnthread *thread) ++{ /* nklock held, irqs off */ ++ struct xnsynch *synch = thread->wchan; ++ struct xnthread *owner; ++ ++ XENO_BUG_ON(COBALT, !(synch->status & XNSYNCH_PRIO)); ++ ++ /* ++ * Update the position in the pend queue of a thread waiting ++ * for a lock. This routine propagates the change throughout ++ * the PI chain if required. ++ */ ++ list_del(&thread->plink); ++ list_add_priff(thread, &synch->pendq, wprio, plink); ++ owner = synch->owner; ++ ++ /* Only PI-enabled objects are of interest here. */ ++ if ((synch->status & XNSYNCH_PI) == 0) ++ return; ++ ++ synch->wprio = thread->wprio; ++ if (synch->status & XNSYNCH_CLAIMED) ++ list_del(&synch->next); ++ else { ++ synch->status |= XNSYNCH_CLAIMED; ++ raise_boost_flag(owner); ++ } ++ ++ list_add_priff(synch, &owner->boosters, wprio, next); ++ adjust_boost(owner, thread); ++} ++EXPORT_SYMBOL_GPL(xnsynch_requeue_sleeper); ++ ++/** ++ * @fn struct xnthread *xnsynch_peek_pendq(struct xnsynch *synch); ++ * @brief Access the thread leading a synch object wait queue. ++ * ++ * This services returns the descriptor address of to the thread leading a ++ * synchronization object wait queue. ++ * ++ * @param synch The descriptor address of the target synchronization object. ++ * ++ * @return The descriptor address of the unblocked thread. ++ * ++ * @coretags{unrestricted} ++ */ ++struct xnthread *xnsynch_peek_pendq(struct xnsynch *synch) ++{ ++ struct xnthread *thread = NULL; ++ spl_t s; ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ if (!list_empty(&synch->pendq)) ++ thread = list_first_entry(&synch->pendq, ++ struct xnthread, plink); ++ ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return thread; ++} ++EXPORT_SYMBOL_GPL(xnsynch_peek_pendq); ++ ++/** ++ * @fn int xnsynch_flush(struct xnsynch *synch, int reason); ++ * @brief Unblock all waiters pending on a resource. ++ * ++ * This service atomically releases all threads which currently sleep ++ * on a given resource. This service should be called by upper ++ * interfaces under circumstances requiring that the pending queue of ++ * a given resource is cleared, such as before the resource is ++ * deleted. ++ * ++ * @param synch The descriptor address of the synchronization object ++ * to be flushed. ++ * ++ * @param reason Some flags to set in the information mask of every ++ * unblocked thread. Zero is an acceptable value. The following bits ++ * are pre-defined by Cobalt: ++ * ++ * - XNRMID should be set to indicate that the synchronization object ++ * is about to be destroyed (see xnthread_resume()). ++ * ++ * - XNBREAK should be set to indicate that the wait has been forcibly ++ * interrupted (see xnthread_unblock()). ++ * ++ * @return XNSYNCH_RESCHED is returned if at least one thread is ++ * unblocked, which means the caller should invoke xnsched_run() for ++ * applying the new scheduling state. Otherwise, XNSYNCH_DONE is ++ * returned. ++ * ++ * @sideeffect ++ * ++ * - The effective priority of the current resource owner might be ++ * lowered to its base priority value as a consequence of the priority ++ * inheritance boost being cleared. ++ * ++ * @coretags{unrestricted} ++ */ ++int xnsynch_flush(struct xnsynch *synch, int reason) ++{ ++ struct xnthread *sleeper, *tmp; ++ int ret; ++ spl_t s; ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ trace_cobalt_synch_flush(synch); ++ ++ if (list_empty(&synch->pendq)) { ++ XENO_BUG_ON(COBALT, synch->status & XNSYNCH_CLAIMED); ++ ret = XNSYNCH_DONE; ++ } else { ++ ret = XNSYNCH_RESCHED; ++ list_for_each_entry_safe(sleeper, tmp, &synch->pendq, plink) { ++ list_del(&sleeper->plink); ++ xnthread_set_info(sleeper, reason); ++ sleeper->wchan = NULL; ++ xnthread_resume(sleeper, XNPEND); ++ } ++ if (synch->status & XNSYNCH_CLAIMED) ++ clear_pi_boost(synch, synch->owner); ++ } ++ ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return ret; ++} ++EXPORT_SYMBOL_GPL(xnsynch_flush); ++ ++void xnsynch_forget_sleeper(struct xnthread *thread) ++{ /* nklock held, irqs off */ ++ struct xnsynch *synch = thread->wchan; ++ struct xnthread *owner, *target; ++ ++ /* ++ * Do all the necessary housekeeping chores to stop a thread ++ * from waiting on a given synchronization object. Doing so ++ * may require to update a PI chain. ++ */ ++ trace_cobalt_synch_forget(synch); ++ ++ xnthread_clear_state(thread, XNPEND); ++ thread->wchan = NULL; ++ list_del(&thread->plink); /* synch->pendq */ ++ ++ /* ++ * Only a sleeper leaving a PI chain triggers an update. ++ * NOTE: PP objects never bear the CLAIMED bit. ++ */ ++ if ((synch->status & XNSYNCH_CLAIMED) == 0) ++ return; ++ ++ owner = synch->owner; ++ ++ if (list_empty(&synch->pendq)) { ++ /* No more sleepers: clear the PI boost. */ ++ clear_pi_boost(synch, owner); ++ return; ++ } ++ ++ /* ++ * Reorder the booster queue of the current owner after we ++ * left the wait list, then set its priority to the new ++ * required minimum required to prevent priority inversion. ++ */ ++ target = list_first_entry(&synch->pendq, struct xnthread, plink); ++ synch->wprio = target->wprio; ++ list_del(&synch->next); /* owner->boosters */ ++ list_add_priff(synch, &owner->boosters, wprio, next); ++ adjust_boost(owner, target); ++} ++EXPORT_SYMBOL_GPL(xnsynch_forget_sleeper); ++ ++#ifdef CONFIG_XENO_OPT_DEBUG_MUTEX_RELAXED ++ ++/* ++ * Detect when a thread is about to sleep on a synchronization ++ * object currently owned by someone running in secondary mode. ++ */ ++void xnsynch_detect_relaxed_owner(struct xnsynch *synch, ++ struct xnthread *sleeper) ++{ ++ if (xnthread_test_state(sleeper, XNWARN) && ++ !xnthread_test_info(sleeper, XNPIALERT) && ++ xnthread_test_state(synch->owner, XNRELAX)) { ++ xnthread_set_info(sleeper, XNPIALERT); ++ xnthread_signal(sleeper, SIGDEBUG, ++ SIGDEBUG_MIGRATE_PRIOINV); ++ } else ++ xnthread_clear_info(sleeper, XNPIALERT); ++} ++ ++/* ++ * Detect when a thread is about to relax while holding booster(s) ++ * (claimed PI or active PP object), which denotes a potential for ++ * priority inversion. In such an event, any sleeper bearing the ++ * XNWARN bit will receive a SIGDEBUG notification. ++ */ ++void xnsynch_detect_boosted_relax(struct xnthread *owner) ++{ ++ struct xnthread *sleeper; ++ struct xnsynch *synch; ++ spl_t s; ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ xnthread_for_each_booster(synch, owner) { ++ xnsynch_for_each_sleeper(sleeper, synch) { ++ if (xnthread_test_state(sleeper, XNWARN)) { ++ xnthread_set_info(sleeper, XNPIALERT); ++ xnthread_signal(sleeper, SIGDEBUG, ++ SIGDEBUG_MIGRATE_PRIOINV); ++ } ++ } ++ } ++ ++ xnlock_put_irqrestore(&nklock, s); ++} ++ ++#endif /* CONFIG_XENO_OPT_DEBUG_MUTEX_RELAXED */ ++ ++/** @} */ +--- linux/kernel/xenomai/sched-tp.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/sched-tp.c 2021-04-07 16:01:25.814636185 +0800 +@@ -0,0 +1,464 @@ ++/* ++ * Copyright (C) 2008 Philippe Gerum . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#include ++#include ++#include ++ ++static void tp_schedule_next(struct xnsched_tp *tp) ++{ ++ struct xnsched_tp_window *w; ++ struct xnsched *sched; ++ int p_next, ret; ++ xnticks_t t; ++ ++ for (;;) { ++ /* ++ * Switch to the next partition. Time holes in a ++ * global time frame are defined as partition windows ++ * assigned to part# -1, in which case the (always ++ * empty) idle queue will be polled for runnable ++ * threads. Therefore, we may assume that a window ++ * begins immediately after the previous one ends, ++ * which simplifies the implementation a lot. ++ */ ++ w = &tp->gps->pwins[tp->wnext]; ++ p_next = w->w_part; ++ tp->tps = p_next < 0 ? &tp->idle : &tp->partitions[p_next]; ++ ++ /* Schedule tick to advance to the next window. */ ++ tp->wnext = (tp->wnext + 1) % tp->gps->pwin_nr; ++ w = &tp->gps->pwins[tp->wnext]; ++ t = tp->tf_start + w->w_offset; ++ ++ ret = xntimer_start(&tp->tf_timer, t, XN_INFINITE, XN_ABSOLUTE); ++ if (ret != -ETIMEDOUT) ++ break; ++ /* ++ * We are late, make sure to remain within the bounds ++ * of a valid time frame before advancing to the next ++ * window. Otherwise, fix up by advancing to the next ++ * time frame immediately. ++ */ ++ for (;;) { ++ t = tp->tf_start + tp->gps->tf_duration; ++ if (xnclock_read_monotonic(&nkclock) > t) { ++ tp->tf_start = t; ++ tp->wnext = 0; ++ } else ++ break; ++ } ++ } ++ ++ sched = container_of(tp, struct xnsched, tp); ++ xnsched_set_resched(sched); ++} ++ ++static void tp_tick_handler(struct xntimer *timer) ++{ ++ struct xnsched_tp *tp = container_of(timer, struct xnsched_tp, tf_timer); ++ /* ++ * Advance beginning date of time frame by a full period if we ++ * are processing the last window. ++ */ ++ if (tp->wnext + 1 == tp->gps->pwin_nr) ++ tp->tf_start += tp->gps->tf_duration; ++ ++ tp_schedule_next(tp); ++} ++ ++static void xnsched_tp_init(struct xnsched *sched) ++{ ++ struct xnsched_tp *tp = &sched->tp; ++ char timer_name[XNOBJECT_NAME_LEN]; ++ int n; ++ ++ for (n = 0; n < CONFIG_XENO_OPT_SCHED_TP_NRPART; n++) ++ xnsched_initq(&tp->partitions[n].runnable); ++ ++ xnsched_initq(&tp->idle.runnable); ++ ++#ifdef CONFIG_SMP ++ ksformat(timer_name, sizeof(timer_name), "[tp-tick/%u]", sched->cpu); ++#else ++ strcpy(timer_name, "[tp-tick]"); ++#endif ++ tp->tps = NULL; ++ tp->gps = NULL; ++ INIT_LIST_HEAD(&tp->threads); ++ xntimer_init(&tp->tf_timer, &nkclock, tp_tick_handler, ++ sched, XNTIMER_IGRAVITY); ++ xntimer_set_name(&tp->tf_timer, timer_name); ++} ++ ++static bool xnsched_tp_setparam(struct xnthread *thread, ++ const union xnsched_policy_param *p) ++{ ++ struct xnsched *sched = thread->sched; ++ ++ thread->tps = &sched->tp.partitions[p->tp.ptid]; ++ xnthread_clear_state(thread, XNWEAK); ++ ++ return xnsched_set_effective_priority(thread, p->tp.prio); ++} ++ ++static void xnsched_tp_getparam(struct xnthread *thread, ++ union xnsched_policy_param *p) ++{ ++ p->tp.prio = thread->cprio; ++ p->tp.ptid = thread->tps - thread->sched->tp.partitions; ++} ++ ++static void xnsched_tp_trackprio(struct xnthread *thread, ++ const union xnsched_policy_param *p) ++{ ++ /* ++ * The assigned partition never changes internally due to PI ++ * (see xnsched_track_policy), since this would be pretty ++ * wrong with respect to TP scheduling: i.e. we may not allow ++ * a thread from another partition to consume CPU time from ++ * the current one, despite this would help enforcing PI (see ++ * note). In any case, introducing resource contention between ++ * threads that belong to different partitions is utterly ++ * wrong in the first place. Only an explicit call to ++ * xnsched_set_policy() may change the partition assigned to a ++ * thread. For that reason, a policy reset action only boils ++ * down to reinstating the base priority. ++ * ++ * NOTE: we do allow threads from lower scheduling classes to ++ * consume CPU time from the current window as a result of a ++ * PI boost, since this is aimed at speeding up the release of ++ * a synchronization object a TP thread needs. ++ */ ++ if (p) { ++ /* We should never cross partition boundaries. */ ++ XENO_WARN_ON(COBALT, ++ thread->base_class == &xnsched_class_tp && ++ thread->tps - thread->sched->tp.partitions != p->tp.ptid); ++ thread->cprio = p->tp.prio; ++ } else ++ thread->cprio = thread->bprio; ++} ++ ++static void xnsched_tp_protectprio(struct xnthread *thread, int prio) ++{ ++ if (prio > XNSCHED_TP_MAX_PRIO) ++ prio = XNSCHED_TP_MAX_PRIO; ++ ++ thread->cprio = prio; ++} ++ ++static int xnsched_tp_chkparam(struct xnthread *thread, ++ const union xnsched_policy_param *p) ++{ ++ struct xnsched_tp *tp = &thread->sched->tp; ++ ++ if (p->tp.ptid < 0 || ++ p->tp.ptid >= CONFIG_XENO_OPT_SCHED_TP_NRPART) ++ return -EINVAL; ++ ++ if (tp->gps == NULL || ++ p->tp.prio < XNSCHED_TP_MIN_PRIO || ++ p->tp.prio > XNSCHED_TP_MAX_PRIO) ++ return -EINVAL; ++ ++ return 0; ++} ++ ++static int xnsched_tp_declare(struct xnthread *thread, ++ const union xnsched_policy_param *p) ++{ ++ struct xnsched *sched = thread->sched; ++ ++ list_add_tail(&thread->tp_link, &sched->tp.threads); ++ ++ return 0; ++} ++ ++static void xnsched_tp_forget(struct xnthread *thread) ++{ ++ list_del(&thread->tp_link); ++ thread->tps = NULL; ++} ++ ++static void xnsched_tp_enqueue(struct xnthread *thread) ++{ ++ xnsched_addq_tail(&thread->tps->runnable, thread); ++} ++ ++static void xnsched_tp_dequeue(struct xnthread *thread) ++{ ++ xnsched_delq(&thread->tps->runnable, thread); ++} ++ ++static void xnsched_tp_requeue(struct xnthread *thread) ++{ ++ xnsched_addq(&thread->tps->runnable, thread); ++} ++ ++static struct xnthread *xnsched_tp_pick(struct xnsched *sched) ++{ ++ /* Never pick a thread if we don't schedule partitions. */ ++ if (!xntimer_running_p(&sched->tp.tf_timer)) ++ return NULL; ++ ++ return xnsched_getq(&sched->tp.tps->runnable); ++} ++ ++static void xnsched_tp_migrate(struct xnthread *thread, struct xnsched *sched) ++{ ++ union xnsched_policy_param param; ++ /* ++ * Since our partition schedule is a per-scheduler property, ++ * it cannot apply to a thread that moves to another CPU ++ * anymore. So we upgrade that thread to the RT class when a ++ * CPU migration occurs. A subsequent call to ++ * __xnthread_set_schedparam() may move it back to TP ++ * scheduling, with a partition assignment that fits the ++ * remote CPU's partition schedule. ++ */ ++ param.rt.prio = thread->cprio; ++ __xnthread_set_schedparam(thread, &xnsched_class_rt, ¶m); ++} ++ ++void xnsched_tp_start_schedule(struct xnsched *sched) ++{ ++ struct xnsched_tp *tp = &sched->tp; ++ ++ if (tp->gps == NULL) ++ return; ++ ++ tp->wnext = 0; ++ tp->tf_start = xnclock_read_monotonic(&nkclock); ++ tp_schedule_next(tp); ++} ++EXPORT_SYMBOL_GPL(xnsched_tp_start_schedule); ++ ++void xnsched_tp_stop_schedule(struct xnsched *sched) ++{ ++ struct xnsched_tp *tp = &sched->tp; ++ ++ if (tp->gps) ++ xntimer_stop(&tp->tf_timer); ++} ++EXPORT_SYMBOL_GPL(xnsched_tp_stop_schedule); ++ ++struct xnsched_tp_schedule * ++xnsched_tp_set_schedule(struct xnsched *sched, ++ struct xnsched_tp_schedule *gps) ++{ ++ struct xnsched_tp_schedule *old_gps; ++ struct xnsched_tp *tp = &sched->tp; ++ union xnsched_policy_param param; ++ struct xnthread *thread, *tmp; ++ ++ XENO_BUG_ON(COBALT, gps != NULL && ++ (gps->pwin_nr <= 0 || gps->pwins[0].w_offset != 0)); ++ ++ xnsched_tp_stop_schedule(sched); ++ ++ /* ++ * Move all TP threads on this scheduler to the RT class, ++ * until we call __xnthread_set_schedparam() for them again. ++ */ ++ if (list_empty(&tp->threads)) ++ goto done; ++ ++ list_for_each_entry_safe(thread, tmp, &tp->threads, tp_link) { ++ param.rt.prio = thread->cprio; ++ __xnthread_set_schedparam(thread, &xnsched_class_rt, ¶m); ++ } ++done: ++ old_gps = tp->gps; ++ tp->gps = gps; ++ ++ return old_gps; ++} ++EXPORT_SYMBOL_GPL(xnsched_tp_set_schedule); ++ ++struct xnsched_tp_schedule * ++xnsched_tp_get_schedule(struct xnsched *sched) ++{ ++ struct xnsched_tp_schedule *gps; ++ ++ gps = sched->tp.gps; ++ if (gps == NULL) ++ return NULL; ++ ++ atomic_inc(&gps->refcount); ++ ++ return gps; ++} ++EXPORT_SYMBOL_GPL(xnsched_tp_get_schedule); ++ ++void xnsched_tp_put_schedule(struct xnsched_tp_schedule *gps) ++{ ++ if (atomic_dec_and_test(&gps->refcount)) ++ xnfree(gps); ++} ++EXPORT_SYMBOL_GPL(xnsched_tp_put_schedule); ++ ++int xnsched_tp_get_partition(struct xnsched *sched) ++{ ++ struct xnsched_tp *tp = &sched->tp; ++ ++ if (tp->tps == NULL || tp->tps == &tp->idle) ++ return -1; ++ ++ return tp->tps - tp->partitions; ++} ++EXPORT_SYMBOL_GPL(xnsched_tp_get_partition); ++ ++#ifdef CONFIG_XENO_OPT_VFILE ++ ++struct xnvfile_directory sched_tp_vfroot; ++ ++struct vfile_sched_tp_priv { ++ struct xnthread *curr; ++}; ++ ++struct vfile_sched_tp_data { ++ int cpu; ++ pid_t pid; ++ char name[XNOBJECT_NAME_LEN]; ++ int prio; ++ int ptid; ++}; ++ ++static struct xnvfile_snapshot_ops vfile_sched_tp_ops; ++ ++static struct xnvfile_snapshot vfile_sched_tp = { ++ .privsz = sizeof(struct vfile_sched_tp_priv), ++ .datasz = sizeof(struct vfile_sched_tp_data), ++ .tag = &nkthreadlist_tag, ++ .ops = &vfile_sched_tp_ops, ++}; ++ ++static int vfile_sched_tp_rewind(struct xnvfile_snapshot_iterator *it) ++{ ++ struct vfile_sched_tp_priv *priv = xnvfile_iterator_priv(it); ++ int nrthreads = xnsched_class_tp.nthreads; ++ ++ if (nrthreads == 0) ++ return -ESRCH; ++ ++ priv->curr = list_first_entry(&nkthreadq, struct xnthread, glink); ++ ++ return nrthreads; ++} ++ ++static int vfile_sched_tp_next(struct xnvfile_snapshot_iterator *it, ++ void *data) ++{ ++ struct vfile_sched_tp_priv *priv = xnvfile_iterator_priv(it); ++ struct vfile_sched_tp_data *p = data; ++ struct xnthread *thread; ++ ++ if (priv->curr == NULL) ++ return 0; /* All done. */ ++ ++ thread = priv->curr; ++ if (list_is_last(&thread->glink, &nkthreadq)) ++ priv->curr = NULL; ++ else ++ priv->curr = list_next_entry(thread, glink); ++ ++ if (thread->base_class != &xnsched_class_tp) ++ return VFILE_SEQ_SKIP; ++ ++ p->cpu = xnsched_cpu(thread->sched); ++ p->pid = xnthread_host_pid(thread); ++ memcpy(p->name, thread->name, sizeof(p->name)); ++ p->ptid = thread->tps - thread->sched->tp.partitions; ++ p->prio = thread->cprio; ++ ++ return 1; ++} ++ ++static int vfile_sched_tp_show(struct xnvfile_snapshot_iterator *it, ++ void *data) ++{ ++ struct vfile_sched_tp_data *p = data; ++ ++ if (p == NULL) ++ xnvfile_printf(it, "%-3s %-6s %-4s %-4s %s\n", ++ "CPU", "PID", "PTID", "PRI", "NAME"); ++ else ++ xnvfile_printf(it, "%3u %-6d %-4d %-4d %s\n", ++ p->cpu, ++ p->pid, ++ p->ptid, ++ p->prio, ++ p->name); ++ ++ return 0; ++} ++ ++static struct xnvfile_snapshot_ops vfile_sched_tp_ops = { ++ .rewind = vfile_sched_tp_rewind, ++ .next = vfile_sched_tp_next, ++ .show = vfile_sched_tp_show, ++}; ++ ++static int xnsched_tp_init_vfile(struct xnsched_class *schedclass, ++ struct xnvfile_directory *vfroot) ++{ ++ int ret; ++ ++ ret = xnvfile_init_dir(schedclass->name, &sched_tp_vfroot, vfroot); ++ if (ret) ++ return ret; ++ ++ return xnvfile_init_snapshot("threads", &vfile_sched_tp, ++ &sched_tp_vfroot); ++} ++ ++static void xnsched_tp_cleanup_vfile(struct xnsched_class *schedclass) ++{ ++ xnvfile_destroy_snapshot(&vfile_sched_tp); ++ xnvfile_destroy_dir(&sched_tp_vfroot); ++} ++ ++#endif /* CONFIG_XENO_OPT_VFILE */ ++ ++struct xnsched_class xnsched_class_tp = { ++ .sched_init = xnsched_tp_init, ++ .sched_enqueue = xnsched_tp_enqueue, ++ .sched_dequeue = xnsched_tp_dequeue, ++ .sched_requeue = xnsched_tp_requeue, ++ .sched_pick = xnsched_tp_pick, ++ .sched_tick = NULL, ++ .sched_rotate = NULL, ++ .sched_migrate = xnsched_tp_migrate, ++ .sched_chkparam = xnsched_tp_chkparam, ++ .sched_setparam = xnsched_tp_setparam, ++ .sched_getparam = xnsched_tp_getparam, ++ .sched_trackprio = xnsched_tp_trackprio, ++ .sched_protectprio = xnsched_tp_protectprio, ++ .sched_declare = xnsched_tp_declare, ++ .sched_forget = xnsched_tp_forget, ++ .sched_kick = NULL, ++#ifdef CONFIG_XENO_OPT_VFILE ++ .sched_init_vfile = xnsched_tp_init_vfile, ++ .sched_cleanup_vfile = xnsched_tp_cleanup_vfile, ++#endif ++ .weight = XNSCHED_CLASS_WEIGHT(2), ++ .policy = SCHED_TP, ++ .name = "tp" ++}; ++EXPORT_SYMBOL_GPL(xnsched_class_tp); +--- linux/kernel/xenomai/procfs.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/procfs.h 2021-04-07 16:01:25.809636192 +0800 +@@ -0,0 +1,30 @@ ++/* ++ * Copyright (C) 2013 Philippe Gerum . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#ifndef _KERNEL_COBALT_PROCFS_H ++#define _KERNEL_COBALT_PROCFS_H ++ ++#ifdef CONFIG_XENO_OPT_VFILE ++int xnprocfs_init_tree(void); ++void xnprocfs_cleanup_tree(void); ++#else ++static inline int xnprocfs_init_tree(void) { return 0; } ++static inline void xnprocfs_cleanup_tree(void) { } ++#endif /* !CONFIG_XENO_OPT_VFILE */ ++ ++#endif /* !_KERNEL_COBALT_PROCFS_H */ +--- linux/kernel/xenomai/sched-sporadic.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/sched-sporadic.c 2021-04-07 16:01:25.804636199 +0800 +@@ -0,0 +1,560 @@ ++/* ++ * Copyright (C) 2009 Philippe Gerum . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#include ++#include ++#include ++ ++#define MAX_REPLENISH CONFIG_XENO_OPT_SCHED_SPORADIC_MAXREPL ++ ++static void sporadic_post_recharge(struct xnthread *thread, xnticks_t budget); ++ ++#ifdef CONFIG_XENO_OPT_DEBUG_COBALT ++ ++static inline void sporadic_note_late_drop(struct xnsched *sched) ++{ ++ /* ++ * This code should pull the break when a misconfigured ++ * sporadic thread is late on its drop date for more than a ++ * hundred times in a row. This normally reveals a time budget ++ * which is too tight. ++ */ ++ XENO_BUG_ON(COBALT, ++sched->pss.drop_retries > 100); ++} ++ ++static inline void sporadic_note_valid_drop(struct xnsched *sched) ++{ ++ sched->pss.drop_retries = 0; ++} ++ ++#else /* !CONFIG_XENO_OPT_DEBUG_COBALT */ ++ ++static inline void sporadic_note_late_drop(struct xnsched *sched) ++{ ++} ++ ++static inline void sporadic_note_valid_drop(struct xnsched *sched) ++{ ++} ++ ++#endif /* !CONFIG_XENO_OPT_DEBUG_COBALT */ ++ ++static inline xnticks_t sporadic_diff_time(xnticks_t start, xnticks_t end) ++{ ++ xnsticks_t d = (xnsticks_t)(end - start); ++ return unlikely(d < 0) ? -d : d; ++} ++ ++static void sporadic_drop_handler(struct xntimer *timer) ++{ ++ struct xnsched_sporadic_data *pss; ++ union xnsched_policy_param p; ++ struct xnthread *thread; ++ ++ /* ++ * XXX: this code will work properly regardless of ++ * primary/secondary mode issues. ++ */ ++ pss = container_of(timer, struct xnsched_sporadic_data, drop_timer); ++ thread = pss->thread; ++ ++ sporadic_post_recharge(thread, pss->budget); ++ ++ if (pss->budget == 0 && thread->cprio > pss->param.low_prio) { ++ if (pss->param.low_prio < 0) ++ /* ++ * Special case: low_prio == -1, we want the ++ * thread to suspend until a replenishment ++ * happens. ++ */ ++ xnthread_suspend(thread, XNHELD, ++ XN_INFINITE, XN_RELATIVE, NULL); ++ else { ++ p.pss.init_budget = 0; ++ p.pss.current_prio = pss->param.low_prio; ++ /* Move sporadic thread to the background. */ ++ __xnthread_set_schedparam(thread, &xnsched_class_sporadic, &p); ++ } ++ } ++} ++ ++static void sporadic_schedule_drop(struct xnthread *thread) ++{ ++ xnticks_t now = xnclock_read_monotonic(&nkclock); ++ struct xnsched_sporadic_data *pss = thread->pss; ++ int ret; ++ ++ pss->resume_date = now; ++ /* ++ * Assuming this timer should not fire that often unless the ++ * monitored thread behaves badly, we don't pin it on the CPU ++ * the thread is running, trading cycles at firing time ++ * against cycles when arming the timer. ++ */ ++ xntimer_set_affinity(&pss->drop_timer, thread->sched); ++ ret = xntimer_start(&pss->drop_timer, now + pss->budget, ++ XN_INFINITE, XN_ABSOLUTE); ++ if (ret == -ETIMEDOUT) { ++ sporadic_note_late_drop(thread->sched); ++ sporadic_drop_handler(&pss->drop_timer); ++ } else ++ sporadic_note_valid_drop(thread->sched); ++} ++ ++static void sporadic_replenish_handler(struct xntimer *timer) ++{ ++ struct xnsched_sporadic_data *pss; ++ union xnsched_policy_param p; ++ struct xnthread *thread; ++ xnticks_t now; ++ int r, ret; ++ ++ pss = container_of(timer, struct xnsched_sporadic_data, repl_timer); ++ thread = pss->thread; ++ XENO_BUG_ON(COBALT, pss->repl_pending <= 0); ++ ++retry: ++ now = xnclock_read_monotonic(&nkclock); ++ ++ do { ++ r = pss->repl_out; ++ if ((xnsticks_t)(now - pss->repl_data[r].date) <= 0) ++ break; ++ pss->budget += pss->repl_data[r].amount; ++ if (pss->budget > pss->param.init_budget) ++ pss->budget = pss->param.init_budget; ++ pss->repl_out = (r + 1) % MAX_REPLENISH; ++ } while(--pss->repl_pending > 0); ++ ++ if (pss->repl_pending > 0) { ++ xntimer_set_affinity(&pss->repl_timer, thread->sched); ++ ret = xntimer_start(&pss->repl_timer, pss->repl_data[r].date, ++ XN_INFINITE, XN_ABSOLUTE); ++ if (ret == -ETIMEDOUT) ++ goto retry; /* This plugs a tiny race. */ ++ } ++ ++ if (pss->budget == 0) ++ return; ++ ++ if (xnthread_test_state(thread, XNHELD)) ++ xnthread_resume(thread, XNHELD); ++ else if (thread->cprio < pss->param.normal_prio) { ++ p.pss.init_budget = 0; ++ p.pss.current_prio = pss->param.normal_prio; ++ /* Move sporadic thread to the foreground. */ ++ __xnthread_set_schedparam(thread, &xnsched_class_sporadic, &p); ++ } ++ ++ /* ++ * XXX: we have to reset the drop timer in case we preempted ++ * the thread which just got a budget increase. ++ */ ++ if (thread->sched->curr == thread) ++ sporadic_schedule_drop(thread); ++} ++ ++static void sporadic_post_recharge(struct xnthread *thread, xnticks_t budget) ++{ ++ struct xnsched_sporadic_data *pss = thread->pss; ++ int r, ret; ++ ++ if (pss->repl_pending >= pss->param.max_repl) ++ return; ++ ++ if (budget > pss->budget) { ++ budget = pss->budget; ++ pss->budget = 0; ++ } else ++ pss->budget -= budget; ++ ++ r = pss->repl_in; ++ pss->repl_data[r].date = pss->resume_date + pss->param.repl_period; ++ pss->repl_data[r].amount = budget; ++ pss->repl_in = (r + 1) % MAX_REPLENISH; ++ ++ if (pss->repl_pending++ == 0) { ++ xntimer_set_affinity(&pss->repl_timer, thread->sched); ++ ret = xntimer_start(&pss->repl_timer, pss->repl_data[r].date, ++ XN_INFINITE, XN_ABSOLUTE); ++ /* ++ * The following case should not happen unless the ++ * initial budget value is inappropriate, but let's ++ * handle it anyway. ++ */ ++ if (ret == -ETIMEDOUT) ++ sporadic_replenish_handler(&pss->repl_timer); ++ } ++} ++ ++static void sporadic_suspend_activity(struct xnthread *thread) ++{ ++ struct xnsched_sporadic_data *pss = thread->pss; ++ xnticks_t budget, now; ++ ++ if (pss->budget > 0) { ++ xntimer_stop(&pss->drop_timer); ++ now = xnclock_read_monotonic(&nkclock); ++ budget = sporadic_diff_time(now, pss->resume_date); ++ sporadic_post_recharge(thread, budget); ++ } ++} ++ ++static inline void sporadic_resume_activity(struct xnthread *thread) ++{ ++ if (thread->pss->budget > 0) ++ sporadic_schedule_drop(thread); ++} ++ ++static void xnsched_sporadic_init(struct xnsched *sched) ++{ ++ /* ++ * We litterally stack the sporadic scheduler on top of the RT ++ * one, reusing its run queue directly. This way, RT and ++ * sporadic threads are merged into the same runqueue and thus ++ * share the same priority scale, with the addition of budget ++ * management for the sporadic ones. ++ */ ++#ifdef CONFIG_XENO_OPT_DEBUG_COBALT ++ sched->pss.drop_retries = 0; ++#endif ++} ++ ++static bool xnsched_sporadic_setparam(struct xnthread *thread, ++ const union xnsched_policy_param *p) ++{ ++ struct xnsched_sporadic_data *pss = thread->pss; ++ bool effective; ++ ++ xnthread_clear_state(thread, XNWEAK); ++ effective = xnsched_set_effective_priority(thread, p->pss.current_prio); ++ ++ /* ++ * We use the budget information to determine whether we got ++ * here from one of our internal calls to ++ * xnthread_set_schedparam(), in which case we don't want to ++ * update the scheduling parameters, but only set the ++ * effective priority. ++ */ ++ if (p->pss.init_budget > 0) { ++ pss->param = p->pss; ++ pss->budget = p->pss.init_budget; ++ pss->repl_in = 0; ++ pss->repl_out = 0; ++ pss->repl_pending = 0; ++ if (effective && thread == thread->sched->curr) { ++ xntimer_stop(&pss->drop_timer); ++ sporadic_schedule_drop(thread); ++ } ++ } ++ ++ return effective; ++} ++ ++static void xnsched_sporadic_getparam(struct xnthread *thread, ++ union xnsched_policy_param *p) ++{ ++ p->pss = thread->pss->param; ++ p->pss.current_prio = thread->cprio; ++} ++ ++static void xnsched_sporadic_trackprio(struct xnthread *thread, ++ const union xnsched_policy_param *p) ++{ ++ if (p) ++ thread->cprio = p->pss.current_prio; ++ else ++ thread->cprio = thread->bprio; ++} ++ ++static void xnsched_sporadic_protectprio(struct xnthread *thread, int prio) ++{ ++ if (prio > XNSCHED_SPORADIC_MAX_PRIO) ++ prio = XNSCHED_SPORADIC_MAX_PRIO; ++ ++ thread->cprio = prio; ++} ++ ++static int xnsched_sporadic_chkparam(struct xnthread *thread, ++ const union xnsched_policy_param *p) ++{ ++ if (p->pss.low_prio != -1 && ++ (p->pss.low_prio < XNSCHED_SPORADIC_MIN_PRIO || ++ p->pss.low_prio > XNSCHED_SPORADIC_MAX_PRIO)) ++ return -EINVAL; ++ ++ if (p->pss.normal_prio < XNSCHED_SPORADIC_MIN_PRIO || ++ p->pss.normal_prio > XNSCHED_SPORADIC_MAX_PRIO) ++ return -EINVAL; ++ ++ if (p->pss.init_budget == 0) ++ return -EINVAL; ++ ++ if (p->pss.current_prio != p->pss.normal_prio) ++ return -EINVAL; ++ ++ if (p->pss.repl_period < p->pss.init_budget) ++ return -EINVAL; ++ ++ if (p->pss.normal_prio <= p->pss.low_prio) ++ return -EINVAL; ++ ++ if (p->pss.max_repl < 1 || p->pss.max_repl > MAX_REPLENISH) ++ return -EINVAL; ++ ++ return 0; ++} ++ ++static int xnsched_sporadic_declare(struct xnthread *thread, ++ const union xnsched_policy_param *p) ++{ ++ struct xnsched_sporadic_data *pss; ++ ++ pss = xnmalloc(sizeof(*pss)); ++ if (pss == NULL) ++ return -ENOMEM; ++ ++ xntimer_init(&pss->repl_timer, &nkclock, sporadic_replenish_handler, ++ thread->sched, XNTIMER_IGRAVITY); ++ xntimer_set_name(&pss->repl_timer, "pss-replenish"); ++ xntimer_init(&pss->drop_timer, &nkclock, sporadic_drop_handler, ++ thread->sched, XNTIMER_IGRAVITY); ++ xntimer_set_name(&pss->drop_timer, "pss-drop"); ++ ++ thread->pss = pss; ++ pss->thread = thread; ++ ++ return 0; ++} ++ ++static void xnsched_sporadic_forget(struct xnthread *thread) ++{ ++ struct xnsched_sporadic_data *pss = thread->pss; ++ ++ xntimer_destroy(&pss->repl_timer); ++ xntimer_destroy(&pss->drop_timer); ++ xnfree(pss); ++ thread->pss = NULL; ++} ++ ++static void xnsched_sporadic_enqueue(struct xnthread *thread) ++{ ++ __xnsched_rt_enqueue(thread); ++} ++ ++static void xnsched_sporadic_dequeue(struct xnthread *thread) ++{ ++ __xnsched_rt_dequeue(thread); ++} ++ ++static void xnsched_sporadic_requeue(struct xnthread *thread) ++{ ++ __xnsched_rt_requeue(thread); ++} ++ ++static struct xnthread *xnsched_sporadic_pick(struct xnsched *sched) ++{ ++ struct xnthread *curr = sched->curr, *next; ++ ++ next = xnsched_getq(&sched->rt.runnable); ++ if (next == NULL) ++ goto swap; ++ ++ if (curr == next) ++ return next; ++ ++ /* Arm the drop timer for an incoming sporadic thread. */ ++ if (next->pss) ++ sporadic_resume_activity(next); ++swap: ++ /* ++ * A non-sporadic outgoing thread is having a priority ++ * inheritance boost, so apply an infinite time budget as we ++ * want it to release the claimed resource asap. Otherwise, ++ * clear the drop timer, then schedule a replenishment ++ * operation. ++ */ ++ if (curr->pss) ++ sporadic_suspend_activity(curr); ++ ++ return next; ++} ++ ++#ifdef CONFIG_XENO_OPT_VFILE ++ ++struct xnvfile_directory sched_sporadic_vfroot; ++ ++struct vfile_sched_sporadic_priv { ++ int nrthreads; ++ struct xnthread *curr; ++}; ++ ++struct vfile_sched_sporadic_data { ++ int cpu; ++ pid_t pid; ++ char name[XNOBJECT_NAME_LEN]; ++ int current_prio; ++ int low_prio; ++ int normal_prio; ++ xnticks_t period; ++ xnticks_t timeout; ++ xnticks_t budget; ++}; ++ ++static struct xnvfile_snapshot_ops vfile_sched_sporadic_ops; ++ ++static struct xnvfile_snapshot vfile_sched_sporadic = { ++ .privsz = sizeof(struct vfile_sched_sporadic_priv), ++ .datasz = sizeof(struct vfile_sched_sporadic_data), ++ .tag = &nkthreadlist_tag, ++ .ops = &vfile_sched_sporadic_ops, ++}; ++ ++static int vfile_sched_sporadic_rewind(struct xnvfile_snapshot_iterator *it) ++{ ++ struct vfile_sched_sporadic_priv *priv = xnvfile_iterator_priv(it); ++ int nrthreads = xnsched_class_sporadic.nthreads; ++ ++ if (nrthreads == 0) ++ return -ESRCH; ++ ++ priv->curr = list_first_entry(&nkthreadq, struct xnthread, glink); ++ ++ return nrthreads; ++} ++ ++static int vfile_sched_sporadic_next(struct xnvfile_snapshot_iterator *it, ++ void *data) ++{ ++ struct vfile_sched_sporadic_priv *priv = xnvfile_iterator_priv(it); ++ struct vfile_sched_sporadic_data *p = data; ++ struct xnthread *thread; ++ ++ if (priv->curr == NULL) ++ return 0; /* All done. */ ++ ++ thread = priv->curr; ++ if (list_is_last(&thread->glink, &nkthreadq)) ++ priv->curr = NULL; ++ else ++ priv->curr = list_next_entry(thread, glink); ++ ++ if (thread->base_class != &xnsched_class_sporadic) ++ return VFILE_SEQ_SKIP; ++ ++ p->cpu = xnsched_cpu(thread->sched); ++ p->pid = xnthread_host_pid(thread); ++ memcpy(p->name, thread->name, sizeof(p->name)); ++ p->current_prio = thread->cprio; ++ p->low_prio = thread->pss->param.low_prio; ++ p->normal_prio = thread->pss->param.normal_prio; ++ p->period = xnthread_get_period(thread); ++ p->budget = thread->pss->param.init_budget; ++ ++ return 1; ++} ++ ++static int vfile_sched_sporadic_show(struct xnvfile_snapshot_iterator *it, ++ void *data) ++{ ++ char lpbuf[16], npbuf[16], ptbuf[16], btbuf[16]; ++ struct vfile_sched_sporadic_data *p = data; ++ ++ if (p == NULL) ++ xnvfile_printf(it, ++ "%-3s %-6s %-4s %-4s %-10s %-10s %s\n", ++ "CPU", "PID", "LPRI", "NPRI", "BUDGET", ++ "PERIOD", "NAME"); ++ else { ++ ksformat(lpbuf, sizeof(lpbuf), "%3d%c", ++ p->low_prio, p->current_prio == p->low_prio ? '*' : ' '); ++ ++ ksformat(npbuf, sizeof(npbuf), "%3d%c", ++ p->normal_prio, p->current_prio == p->normal_prio ? '*' : ' '); ++ ++ xntimer_format_time(p->period, ptbuf, sizeof(ptbuf)); ++ xntimer_format_time(p->budget, btbuf, sizeof(btbuf)); ++ ++ xnvfile_printf(it, ++ "%3u %-6d %-4s %-4s %-10s %-10s %s\n", ++ p->cpu, ++ p->pid, ++ lpbuf, ++ npbuf, ++ btbuf, ++ ptbuf, ++ p->name); ++ } ++ ++ return 0; ++} ++ ++static struct xnvfile_snapshot_ops vfile_sched_sporadic_ops = { ++ .rewind = vfile_sched_sporadic_rewind, ++ .next = vfile_sched_sporadic_next, ++ .show = vfile_sched_sporadic_show, ++}; ++ ++static int xnsched_sporadic_init_vfile(struct xnsched_class *schedclass, ++ struct xnvfile_directory *vfroot) ++{ ++ int ret; ++ ++ ret = xnvfile_init_dir(schedclass->name, ++ &sched_sporadic_vfroot, vfroot); ++ if (ret) ++ return ret; ++ ++ return xnvfile_init_snapshot("threads", &vfile_sched_sporadic, ++ &sched_sporadic_vfroot); ++} ++ ++static void xnsched_sporadic_cleanup_vfile(struct xnsched_class *schedclass) ++{ ++ xnvfile_destroy_snapshot(&vfile_sched_sporadic); ++ xnvfile_destroy_dir(&sched_sporadic_vfroot); ++} ++ ++#endif /* CONFIG_XENO_OPT_VFILE */ ++ ++struct xnsched_class xnsched_class_sporadic = { ++ .sched_init = xnsched_sporadic_init, ++ .sched_enqueue = xnsched_sporadic_enqueue, ++ .sched_dequeue = xnsched_sporadic_dequeue, ++ .sched_requeue = xnsched_sporadic_requeue, ++ .sched_pick = xnsched_sporadic_pick, ++ .sched_tick = NULL, ++ .sched_rotate = NULL, ++ .sched_migrate = NULL, ++ .sched_chkparam = xnsched_sporadic_chkparam, ++ .sched_setparam = xnsched_sporadic_setparam, ++ .sched_getparam = xnsched_sporadic_getparam, ++ .sched_trackprio = xnsched_sporadic_trackprio, ++ .sched_protectprio = xnsched_sporadic_protectprio, ++ .sched_declare = xnsched_sporadic_declare, ++ .sched_forget = xnsched_sporadic_forget, ++ .sched_kick = NULL, ++#ifdef CONFIG_XENO_OPT_VFILE ++ .sched_init_vfile = xnsched_sporadic_init_vfile, ++ .sched_cleanup_vfile = xnsched_sporadic_cleanup_vfile, ++#endif ++ .weight = XNSCHED_CLASS_WEIGHT(3), ++ .policy = SCHED_SPORADIC, ++ .name = "pss" ++}; ++EXPORT_SYMBOL_GPL(xnsched_class_sporadic); +--- linux/kernel/xenomai/init.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/init.c 2021-04-07 16:01:25.799636207 +0800 +@@ -0,0 +1,430 @@ ++/* ++ * Copyright (C) 2001-2013 Philippe Gerum . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "rtdm/internal.h" ++#include "posix/internal.h" ++#include "procfs.h" ++ ++/** ++ * @defgroup cobalt Cobalt ++ * ++ * Cobalt supplements the native Linux kernel in dual kernel ++ * configurations. It deals with all time-critical activities, such as ++ * handling interrupts, and scheduling real-time threads. The Cobalt ++ * kernel has higher priority over all the native kernel activities. ++ * ++ * Cobalt provides an implementation of the POSIX and RTDM interfaces ++ * based on a set of generic RTOS building blocks. ++ */ ++ ++static unsigned long timerfreq_arg; ++module_param_named(timerfreq, timerfreq_arg, ulong, 0444); ++ ++static unsigned long clockfreq_arg; ++module_param_named(clockfreq, clockfreq_arg, ulong, 0444); ++ ++#ifdef CONFIG_SMP ++static unsigned long supported_cpus_arg = -1; ++module_param_named(supported_cpus, supported_cpus_arg, ulong, 0444); ++#endif /* CONFIG_SMP */ ++ ++static unsigned long sysheap_size_arg; ++module_param_named(sysheap_size, sysheap_size_arg, ulong, 0444); ++ ++static char init_state_arg[16] = "enabled"; ++module_param_string(state, init_state_arg, sizeof(init_state_arg), 0444); ++ ++static BLOCKING_NOTIFIER_HEAD(state_notifier_list); ++ ++struct cobalt_pipeline cobalt_pipeline; ++EXPORT_SYMBOL_GPL(cobalt_pipeline); ++ ++DEFINE_PER_CPU(struct cobalt_machine_cpudata, cobalt_machine_cpudata); ++EXPORT_PER_CPU_SYMBOL_GPL(cobalt_machine_cpudata); ++ ++atomic_t cobalt_runstate = ATOMIC_INIT(COBALT_STATE_WARMUP); ++EXPORT_SYMBOL_GPL(cobalt_runstate); ++ ++struct cobalt_ppd cobalt_kernel_ppd = { ++ .exe_path = "vmlinux", ++}; ++EXPORT_SYMBOL_GPL(cobalt_kernel_ppd); ++ ++#ifdef CONFIG_XENO_OPT_DEBUG ++#define boot_debug_notice "[DEBUG]" ++#else ++#define boot_debug_notice "" ++#endif ++ ++#ifdef CONFIG_IPIPE_TRACE ++#define boot_lat_trace_notice "[LTRACE]" ++#else ++#define boot_lat_trace_notice "" ++#endif ++ ++#ifdef CONFIG_ENABLE_DEFAULT_TRACERS ++#define boot_evt_trace_notice "[ETRACE]" ++#else ++#define boot_evt_trace_notice "" ++#endif ++ ++#define boot_state_notice \ ++ ({ \ ++ realtime_core_state() == COBALT_STATE_STOPPED ? \ ++ "[STOPPED]" : ""; \ ++ }) ++ ++void cobalt_add_state_chain(struct notifier_block *nb) ++{ ++ blocking_notifier_chain_register(&state_notifier_list, nb); ++} ++EXPORT_SYMBOL_GPL(cobalt_add_state_chain); ++ ++void cobalt_remove_state_chain(struct notifier_block *nb) ++{ ++ blocking_notifier_chain_unregister(&state_notifier_list, nb); ++} ++EXPORT_SYMBOL_GPL(cobalt_remove_state_chain); ++ ++void cobalt_call_state_chain(enum cobalt_run_states newstate) ++{ ++ blocking_notifier_call_chain(&state_notifier_list, newstate, NULL); ++} ++EXPORT_SYMBOL_GPL(cobalt_call_state_chain); ++ ++static void sys_shutdown(void) ++{ ++ void *membase; ++ ++ xntimer_release_hardware(); ++ xnsched_destroy_all(); ++ xnregistry_cleanup(); ++ membase = xnheap_get_membase(&cobalt_heap); ++ xnheap_destroy(&cobalt_heap); ++ xnheap_vfree(membase); ++} ++ ++static int __init mach_setup(void) ++{ ++ struct ipipe_sysinfo sysinfo; ++ int ret, virq; ++ ++ ret = ipipe_select_timers(&xnsched_realtime_cpus); ++ if (ret < 0) ++ return ret; ++ ++ ipipe_get_sysinfo(&sysinfo); ++ ++ if (timerfreq_arg == 0) ++ timerfreq_arg = sysinfo.sys_hrtimer_freq; ++ ++ if (clockfreq_arg == 0) ++ clockfreq_arg = sysinfo.sys_hrclock_freq; ++ ++ if (clockfreq_arg == 0) { ++ printk(XENO_ERR "null clock frequency? Aborting.\n"); ++ return -ENODEV; ++ } ++ ++ cobalt_pipeline.timer_freq = timerfreq_arg; ++ cobalt_pipeline.clock_freq = clockfreq_arg; ++ ++ if (cobalt_machine.init) { ++ ret = cobalt_machine.init(); ++ if (ret) ++ return ret; ++ } ++ ++ ipipe_register_head(&xnsched_realtime_domain, "Xenomai"); ++ ++ ret = -EBUSY; ++ virq = ipipe_alloc_virq(); ++ if (virq == 0) ++ goto fail_apc; ++ ++ cobalt_pipeline.apc_virq = virq; ++ ++ ipipe_request_irq(ipipe_root_domain, ++ cobalt_pipeline.apc_virq, ++ apc_dispatch, ++ NULL, NULL); ++ ++ virq = ipipe_alloc_virq(); ++ if (virq == 0) ++ goto fail_escalate; ++ ++ cobalt_pipeline.escalate_virq = virq; ++ ++ ipipe_request_irq(&xnsched_realtime_domain, ++ cobalt_pipeline.escalate_virq, ++ (ipipe_irq_handler_t)__xnsched_run_handler, ++ NULL, NULL); ++ ++ ret = xnclock_init(cobalt_pipeline.clock_freq); ++ if (ret) ++ goto fail_clock; ++ ++ return 0; ++ ++fail_clock: ++ ipipe_free_irq(&xnsched_realtime_domain, ++ cobalt_pipeline.escalate_virq); ++ ipipe_free_virq(cobalt_pipeline.escalate_virq); ++fail_escalate: ++ ipipe_free_irq(ipipe_root_domain, ++ cobalt_pipeline.apc_virq); ++ ipipe_free_virq(cobalt_pipeline.apc_virq); ++fail_apc: ++ ipipe_unregister_head(&xnsched_realtime_domain); ++ ++ if (cobalt_machine.cleanup) ++ cobalt_machine.cleanup(); ++ ++ return ret; ++} ++ ++static inline int __init mach_late_setup(void) ++{ ++ if (cobalt_machine.late_init) ++ return cobalt_machine.late_init(); ++ ++ return 0; ++} ++ ++static __init void mach_cleanup(void) ++{ ++ ipipe_unregister_head(&xnsched_realtime_domain); ++ ipipe_free_irq(&xnsched_realtime_domain, ++ cobalt_pipeline.escalate_virq); ++ ipipe_free_virq(cobalt_pipeline.escalate_virq); ++ ipipe_timers_release(); ++ xnclock_cleanup(); ++} ++ ++static struct { ++ const char *label; ++ enum cobalt_run_states state; ++} init_states[] __initdata = { ++ { "disabled", COBALT_STATE_DISABLED }, ++ { "stopped", COBALT_STATE_STOPPED }, ++ { "enabled", COBALT_STATE_WARMUP }, ++}; ++ ++static void __init setup_init_state(void) ++{ ++ static char warn_bad_state[] __initdata = ++ XENO_WARNING "invalid init state '%s'\n"; ++ int n; ++ ++ for (n = 0; n < ARRAY_SIZE(init_states); n++) ++ if (strcmp(init_states[n].label, init_state_arg) == 0) { ++ set_realtime_core_state(init_states[n].state); ++ return; ++ } ++ ++ printk(warn_bad_state, init_state_arg); ++} ++ ++static __init int sys_init(void) ++{ ++ void *heapaddr; ++ int ret; ++ ++ if (sysheap_size_arg == 0) ++ sysheap_size_arg = CONFIG_XENO_OPT_SYS_HEAPSZ; ++ ++ heapaddr = xnheap_vmalloc(sysheap_size_arg * 1024); ++ if (heapaddr == NULL || ++ xnheap_init(&cobalt_heap, heapaddr, sysheap_size_arg * 1024)) { ++ return -ENOMEM; ++ } ++ xnheap_set_name(&cobalt_heap, "system heap"); ++ ++ xnsched_init_all(); ++ ++ xnregistry_init(); ++ ++ /* ++ * If starting in stopped mode, do all initializations, but do ++ * not enable the core timer. ++ */ ++ if (realtime_core_state() == COBALT_STATE_WARMUP) { ++ ret = xntimer_grab_hardware(); ++ if (ret) { ++ sys_shutdown(); ++ return ret; ++ } ++ set_realtime_core_state(COBALT_STATE_RUNNING); ++ } ++ ++ return 0; ++} ++ ++static int __init xenomai_init(void) ++{ ++ int ret, __maybe_unused cpu; ++ ++ setup_init_state(); ++ ++ if (!realtime_core_enabled()) { ++ printk(XENO_WARNING "disabled on kernel command line\n"); ++ return 0; ++ } ++ ++#ifdef CONFIG_SMP ++ cpumask_clear(&xnsched_realtime_cpus); ++ for_each_online_cpu(cpu) { ++ if (supported_cpus_arg & (1UL << cpu)) ++ cpumask_set_cpu(cpu, &xnsched_realtime_cpus); ++ } ++ if (cpumask_empty(&xnsched_realtime_cpus)) { ++ printk(XENO_WARNING "disabled via empty real-time CPU mask\n"); ++ set_realtime_core_state(COBALT_STATE_DISABLED); ++ return 0; ++ } ++ cobalt_cpu_affinity = xnsched_realtime_cpus; ++#endif /* CONFIG_SMP */ ++ ++ xnsched_register_classes(); ++ ++ ret = xnprocfs_init_tree(); ++ if (ret) ++ goto fail; ++ ++ ret = mach_setup(); ++ if (ret) ++ goto cleanup_proc; ++ ++ xnintr_mount(); ++ ++ ret = xnpipe_mount(); ++ if (ret) ++ goto cleanup_mach; ++ ++ ret = xnselect_mount(); ++ if (ret) ++ goto cleanup_pipe; ++ ++ ret = sys_init(); ++ if (ret) ++ goto cleanup_select; ++ ++ ret = mach_late_setup(); ++ if (ret) ++ goto cleanup_sys; ++ ++ ret = rtdm_init(); ++ if (ret) ++ goto cleanup_sys; ++ ++ ret = cobalt_init(); ++ if (ret) ++ goto cleanup_rtdm; ++ ++ rtdm_fd_init(); ++ ++ printk(XENO_INFO "Cobalt v%s %s%s%s%s\n", ++ XENO_VERSION_STRING, ++ boot_debug_notice, ++ boot_lat_trace_notice, ++ boot_evt_trace_notice, ++ boot_state_notice); ++ ++ return 0; ++ ++cleanup_rtdm: ++ rtdm_cleanup(); ++cleanup_sys: ++ sys_shutdown(); ++cleanup_select: ++ xnselect_umount(); ++cleanup_pipe: ++ xnpipe_umount(); ++cleanup_mach: ++ mach_cleanup(); ++cleanup_proc: ++ xnprocfs_cleanup_tree(); ++fail: ++ set_realtime_core_state(COBALT_STATE_DISABLED); ++ printk(XENO_ERR "init failed, code %d\n", ret); ++ ++ return ret; ++} ++device_initcall(xenomai_init); ++ ++/** ++ * @ingroup cobalt ++ * @defgroup cobalt_core Cobalt kernel ++ * ++ * The Cobalt core is a co-kernel which supplements the Linux kernel ++ * for delivering real-time services with very low latency. It ++ * implements a set of generic RTOS building blocks, which the ++ * Cobalt/POSIX and Cobalt/RTDM APIs are based on. Cobalt has higher ++ * priority over the Linux kernel activities. ++ * ++ * @{ ++ * ++ * @page cobalt-core-tags Dual kernel service tags ++ * ++ * The Cobalt kernel services may be restricted to particular calling ++ * contexts, or entail specific side-effects. To describe this ++ * information, each service documented by this section bears a set of ++ * tags when applicable. ++ * ++ * The table below matches the tags used throughout the documentation ++ * with the description of their meaning for the caller. ++ * ++ * @par ++ * Context tags ++ * ++ * ++ * ++ * ++ * ++ * ++ * ++ * ++ * ++ * ++ * ++ *
Tag Context on entry
primary-only Must be called from a Cobalt task in primary mode
primary-timed Requires a Cobalt task in primary mode if timed
coreirq-only Must be called from a Cobalt IRQ handler
secondary-only Must be called from a Cobalt task in secondary mode or regular Linux task
rtdm-task Must be called from a RTDM driver task
mode-unrestricted May be called from a Cobalt task in either primary or secondary mode
task-unrestricted May be called from a Cobalt or regular Linux task indifferently
unrestricted May be called from any context previously described
atomic-entry Caller must currently hold the big Cobalt kernel lock (nklock)
++ * ++ * @par ++ * Possible side-effects ++ * ++ * ++ * ++ *
Tag Description
might-switch The Cobalt kernel may switch context
++ * ++ * @} ++ */ +--- linux/kernel/xenomai/Kconfig 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/Kconfig 2021-04-07 16:01:25.794636214 +0800 +@@ -0,0 +1,491 @@ ++menu "Core features" ++ ++config XENO_OPT_SCHED_CLASSES ++ bool "Extra scheduling classes" ++ default n ++ help ++ The Cobalt kernel implements a set of scheduling classes. ++ Each scheduling class defines its own set of rules for ++ determining when and how to select a new thread to run. ++ ++ Cobalt has a built-in real-time class, which supports both ++ preemptive fixed-priority FIFO, and round-robin scheduling. ++ ++ Enabling CONFIG_XENO_OPT_SCHED_CLASSES allows you to select ++ additional scheduling classes to enable in the Cobalt kernel. ++ ++ If in doubt, say N. ++ ++config XENO_OPT_SCHED_WEAK ++ bool "Weak scheduling class" ++ default n ++ depends on XENO_OPT_SCHED_CLASSES ++ help ++ This option creates a Cobalt scheduling class for mapping ++ members of the regular POSIX SCHED_FIFO/RR policies to a low ++ priority class of the Cobalt kernel, providing no real-time ++ guarantee. Therefore, up to a hundred non real-time priority ++ levels are available from the SCHED_WEAK policy. ++ ++ When CONFIG_XENO_OPT_SCHED_WEAK is disabled, Cobalt still ++ supports a single non real-time priority level (i.e. zero ++ priority), assigned to members of the SCHED_OTHER class. ++ ++ SCHED_WEAK/SCHED_OTHER threads can access Cobalt resources, ++ wait on Cobalt synchronization objects, but cannot compete for ++ the CPU with members of the real-time Cobalt classes. ++ ++ Since Cobalt assumes no real-time requirement for ++ SCHED_WEAK/SCHED_OTHER threads, they are automatically moved ++ back to secondary mode upon return from any Cobalt syscall if ++ necessary, unless they hold a Cobalt mutex, which would defer ++ the transition until such mutex is released. ++ ++ If in doubt, say N. ++ ++config XENO_OPT_SCHED_TP ++ bool "Temporal partitioning" ++ default n ++ depends on XENO_OPT_SCHED_CLASSES ++ help ++ This option enables support for temporal partitioning. ++ ++ If in doubt, say N. ++ ++config XENO_OPT_SCHED_TP_NRPART ++ int "Number of partitions" ++ default 4 ++ range 1 1024 ++ depends on XENO_OPT_SCHED_TP ++ help ++ Define here the maximum number of temporal partitions the TP ++ scheduler may have to handle. ++ ++config XENO_OPT_SCHED_SPORADIC ++ bool "Sporadic scheduling" ++ default n ++ depends on XENO_OPT_SCHED_CLASSES ++ help ++ This option enables support for the sporadic scheduling policy ++ in the Cobalt kernel (SCHED_SPORADIC), also known as POSIX ++ sporadic server. ++ ++ It can be used to enforce a capped limit on the execution time ++ of a thread within a given period of time. ++ ++ If in doubt, say N. ++ ++config XENO_OPT_SCHED_SPORADIC_MAXREPL ++ int "Maximum number of pending replenishments" ++ default 8 ++ range 4 16 ++ depends on XENO_OPT_SCHED_SPORADIC ++ help ++ For performance reason, the budget replenishment information ++ is statically stored on a per-thread basis. This parameter ++ defines the maximum number of replenishment requests that can ++ be pending concurrently for any given thread that undergoes ++ sporadic scheduling (system minimum is 4). ++ ++config XENO_OPT_SCHED_QUOTA ++ bool "Thread groups with runtime quota" ++ default n ++ depends on XENO_OPT_SCHED_CLASSES ++ help ++ This option enables the SCHED_QUOTA scheduling policy in the ++ Cobalt kernel. ++ ++ This policy enforces a limitation on the CPU consumption of ++ threads over a globally defined period, known as the quota ++ interval. This is done by pooling threads with common ++ requirements in groups, and giving each group a share of the ++ global period (see CONFIG_XENO_OPT_SCHED_QUOTA_PERIOD). ++ ++ When threads have entirely consumed the quota allotted to the ++ group they belong to, the latter is suspended as a whole, ++ until the next quota interval starts. At this point, a new ++ runtime budget is given to each group, in accordance with its ++ share. ++ ++ If in doubt, say N. ++ ++config XENO_OPT_SCHED_QUOTA_PERIOD ++ int "Quota interval (us)" ++ default 10000 ++ range 100 1000000000 ++ depends on XENO_OPT_SCHED_QUOTA ++ help ++ The global period thread groups can get a share of. ++ ++config XENO_OPT_SCHED_QUOTA_NR_GROUPS ++ int "Number of thread groups" ++ default 32 ++ range 1 1024 ++ depends on XENO_OPT_SCHED_QUOTA ++ help ++ The overall number of thread groups which may be defined ++ across all CPUs. ++ ++config XENO_OPT_STATS ++ bool "Runtime statistics" ++ depends on XENO_OPT_VFILE ++ default y ++ help ++ This option causes the Cobalt kernel to collect various ++ per-thread runtime statistics, which are accessible through ++ the /proc/xenomai/sched/stat interface. ++ ++config XENO_OPT_STATS_IRQS ++ bool "Account IRQ handlers separatly" ++ depends on XENO_OPT_STATS ++ default y ++ help ++ When enabled, the runtime of interrupt handlers is accounted ++ separately from the threads they interrupt. Also, the ++ occurrence of shared interrupts is accounted on a per-handler ++ basis. ++ ++config XENO_OPT_SHIRQ ++ bool "Shared interrupts" ++ help ++ Enables support for both level- and edge-triggered shared ++ interrupts, so that multiple real-time interrupt handlers ++ are allowed to control dedicated hardware devices which are ++ configured to share the same interrupt line. ++ ++config XENO_OPT_RR_QUANTUM ++ int "Round-robin quantum (us)" ++ default 1000 ++ help ++ This parameter defines the duration of the default round-robin ++ time quantum expressed as a count of micro-seconds. This value ++ may be overriden internally by Cobalt services which do ++ provide a round-robin interval. ++ ++config XENO_OPT_AUTOTUNE ++ tristate "Auto-tuning" ++ default y ++ select XENO_DRIVERS_AUTOTUNE ++ help ++ Enable auto-tuning capabilities. Auto-tuning is used for ++ adjusting the core timing services to the intrinsic latency of ++ the platform. ++ ++config XENO_OPT_SCALABLE_SCHED ++ bool "O(1) scheduler" ++ help ++ This option causes a multi-level priority queue to be used in ++ the real-time scheduler, so that it operates in constant-time ++ regardless of the number of _concurrently runnable_ threads ++ (which might be much lower than the total number of active ++ threads). ++ ++ Its use is recommended for large multi-threaded systems ++ involving more than 10 of such threads; otherwise, the default ++ linear method usually performs better with lower memory ++ footprints. ++ ++choice ++ prompt "Timer indexing method" ++ default XENO_OPT_TIMER_LIST if !X86_64 ++ default XENO_OPT_TIMER_RBTREE if X86_64 ++ help ++ This option allows to select the underlying data structure ++ which is going to be used for ordering the outstanding ++ software timers managed by the Cobalt kernel. ++ ++config XENO_OPT_TIMER_LIST ++ bool "Linear" ++ help ++ Use a linked list. Albeit O(N), this simple data structure is ++ particularly efficient when only a few timers (< 10) may be ++ concurrently outstanding at any point in time. ++ ++config XENO_OPT_TIMER_RBTREE ++ bool "Tree" ++ help ++ Use a red-black tree. This data structure is efficient when a ++ high number of software timers may be concurrently ++ outstanding at any point in time. ++ ++endchoice ++ ++config XENO_OPT_HOSTRT ++ depends on IPIPE_HAVE_HOSTRT ++ def_bool y ++ ++config XENO_OPT_PIPE ++ bool ++ ++config XENO_OPT_MAP ++ bool ++ ++config XENO_OPT_EXTCLOCK ++ bool ++ ++config XENO_OPT_COBALT_EXTENSION ++ bool ++ ++config XENO_OPT_VFILE ++ bool ++ depends on PROC_FS ++ default y ++ ++endmenu ++ ++menu "Sizes and static limits" ++ ++config XENO_OPT_PIPE_NRDEV ++ int "Number of pipe devices" ++ depends on XENO_OPT_PIPE ++ default 32 ++ help ++ Message pipes are bi-directional FIFO communication channels ++ allowing data exchange between Cobalt threads and regular ++ POSIX threads. Pipes natively preserve message boundaries, but ++ can also be used in byte streaming mode from kernel to ++ user-space. ++ ++ This option sets the maximum number of pipe devices supported ++ in the system. Pipe devices are named /dev/rtpN where N is a ++ device minor number ranging from 0 to XENO_OPT_PIPE_NRDEV - 1. ++ ++config XENO_OPT_REGISTRY_NRSLOTS ++ int "Number of registry slots" ++ default 512 ++ help ++ The registry is used by the Cobalt kernel to export named ++ resources to user-space programs via the /proc interface. ++ Each named resource occupies a registry slot. This option sets ++ the maximum number of resources the registry can handle. ++ ++config XENO_OPT_SYS_HEAPSZ ++ int "Size of system heap (Kb)" ++ default 4096 ++ help ++ The system heap is used for various internal allocations by ++ the Cobalt kernel. The size is expressed in Kilobytes. ++ ++config XENO_OPT_PRIVATE_HEAPSZ ++ int "Size of private heap (Kb)" ++ default 256 ++ help ++ The Cobalt kernel implements fast IPC mechanisms within the ++ scope of a process which require a private kernel memory heap ++ to be mapped in the address space of each Xenomai application ++ process. This option can be used to set the size of this ++ per-process heap. ++ ++ 64k is considered a large enough size for common use cases. ++ ++config XENO_OPT_SHARED_HEAPSZ ++ int "Size of shared heap (Kb)" ++ default 256 ++ help ++ The Cobalt kernel implements fast IPC mechanisms between ++ processes which require a shared kernel memory heap to be ++ mapped in the address space of all Xenomai application ++ processes. This option can be used to set the size of this ++ system-wide heap. ++ ++ 64k is considered a large enough size for common use cases. ++ ++config XENO_OPT_NRTIMERS ++ int "Maximum number of POSIX timers per process" ++ default 256 ++ help ++ This tunable controls how many POSIX timers can exist at any ++ given time for each Cobalt process (a timer is created by a ++ call to the timer_create() service of the Cobalt/POSIX API). ++ ++config XENO_OPT_DEBUG_TRACE_LOGSZ ++ int "Trace log size" ++ depends on XENO_OPT_DEBUG_TRACE_RELAX ++ default 16 ++ help ++ The size (kilobytes) of the trace log of relax requests. Once ++ this limit is reached, subsequent traces will be silently ++ discarded. ++ ++ Writing to /proc/xenomai/debug/relax empties the trace log. ++ ++endmenu ++ ++menu "Latency settings" ++ ++config XENO_OPT_TIMING_SCHEDLAT ++ int "User scheduling latency (ns)" ++ default 0 ++ help ++ The user scheduling latency is the time between the ++ termination of an interrupt handler and the execution of the ++ first instruction of the real-time application thread this ++ handler resumes. A default value of 0 (recommended) will cause ++ a pre-calibrated value to be used. ++ ++ If the auto-tuner is enabled, this value will be used as the ++ factory default when running "autotune --reset". ++ ++config XENO_OPT_TIMING_KSCHEDLAT ++ int "Intra-kernel scheduling latency (ns)" ++ default 0 ++ help ++ The intra-kernel scheduling latency is the time between the ++ termination of an interrupt handler and the execution of the ++ first instruction of the RTDM kernel thread this handler ++ resumes. A default value of 0 (recommended) will cause a ++ pre-calibrated value to be used. ++ ++ Intra-kernel latency is usually significantly lower than user ++ scheduling latency on MMU-enabled platforms, due to CPU cache ++ latency. ++ ++ If the auto-tuner is enabled, this value will be used as the ++ factory default when running "autotune --reset". ++ ++config XENO_OPT_TIMING_IRQLAT ++ int "Interrupt latency (ns)" ++ default 0 ++ help ++ The interrupt latency is the time between the occurrence of an ++ IRQ and the first instruction of the interrupt handler which ++ will service it. A default value of 0 (recommended) will cause ++ a pre-calibrated value to be used. ++ ++ If the auto-tuner is enabled, this value will be used as the ++ factory default when running "autotune --reset". ++ ++endmenu ++ ++menuconfig XENO_OPT_DEBUG ++ depends on XENO_OPT_VFILE ++ bool "Debug support" ++ help ++ When enabled, various debugging features can be switched ++ on. They can help to find problems in applications, drivers, ++ and the Cobalt kernel. XENO_OPT_DEBUG by itself does not have ++ any impact on the generated code. ++ ++if XENO_OPT_DEBUG ++ ++config XENO_OPT_DEBUG_COBALT ++ bool "Cobalt runtime assertions" ++ help ++ This option activates various assertions inside the Cobalt ++ kernel. This option has limited overhead. ++ ++config XENO_OPT_DEBUG_MEMORY ++ bool "Cobalt memory checks" ++ help ++ This option enables memory debug checks inside the Cobalt ++ kernel. This option may induce significant overhead with large ++ heaps. ++ ++config XENO_OPT_DEBUG_CONTEXT ++ bool "Check for calling context" ++ help ++ This option enables checks for the calling context in the ++ Cobalt kernel, aimed at detecting when regular Linux routines ++ are entered from a real-time context, and conversely. ++ ++config XENO_OPT_DEBUG_LOCKING ++ bool "Spinlock debugging support" ++ default y if SMP ++ help ++ This option activates runtime assertions, and measurements ++ of spinlocks spinning time and duration in the Cobalt ++ kernel. It helps finding latency spots due to interrupt ++ masked sections. Statistics about the longest masked section ++ can be found in /proc/xenomai/debug/lock. ++ ++ This option may induce a measurable overhead on low end ++ machines. ++ ++config XENO_OPT_DEBUG_USER ++ bool "User consistency checks" ++ help ++ This option enables a set of consistency checks for ++ detecting wrong runtime behavior in user applications. ++ ++ With some of the debug categories, threads can ask for ++ notification when a problem is detected, by turning on the ++ PTHREAD_WARNSW mode bit with pthread_setmode_np(). Cobalt ++ sends the Linux-originated SIGDEBUG signal for notifying ++ threads, along with a reason code passed into the associated ++ siginfo data (see pthread_setmode_np()). ++ ++ Some of these runtime checks may induce overhead, enable ++ them for debugging purposes only. ++ ++if XENO_OPT_DEBUG_USER ++ ++config XENO_OPT_DEBUG_MUTEX_RELAXED ++ bool "Detect relaxed mutex owner" ++ default y ++ help ++ A thread which attempts to acquire a mutex currently owned by ++ another thread running in secondary/relaxed mode thread will ++ suffer unwanted latencies, due to a priority inversion. ++ debug notifications are enabled for such thread, it receives ++ a SIGDEBUG signal. ++ ++ This option has some overhead in real-time mode over ++ contented mutexes. ++ ++config XENO_OPT_DEBUG_MUTEX_SLEEP ++ bool "Detect sleeping with mutex" ++ default y ++ help ++ A thread which goes sleeping while holding a mutex is prone ++ to cause unwanted latencies to other threads serialized by ++ the same lock. If debug notifications are enabled for such ++ thread, it receives a SIGDEBUG signal right before entering ++ sleep. ++ ++ This option has noticeable overhead in real-time mode as it ++ disables the normal fast mutex operations from user-space, ++ causing a system call for each mutex acquisition/release. ++ ++config XENO_OPT_DEBUG_LEGACY ++ bool "Detect usage of legacy constructs/features" ++ default n ++ help ++ Turns on detection of legacy API usage. ++ ++endif # XENO_OPT_DEBUG_USER ++ ++config XENO_OPT_DEBUG_TRACE_RELAX ++ bool "Trace relax requests" ++ default n ++ help ++ This option enables recording of unwanted relax requests from ++ user-space applications leaving the real-time domain, logging ++ the thread information and code location involved. All records ++ are readable from /proc/xenomai/debug/relax, and can be ++ decoded using the "slackspot" utility. ++ ++config XENO_OPT_WATCHDOG ++ bool "Watchdog support" ++ default y ++ help ++ This option activates a watchdog aimed at detecting runaway ++ Cobalt threads. If enabled, the watchdog triggers after a ++ given period of uninterrupted real-time activity has elapsed ++ without Linux interaction in the meantime. ++ ++ In such an event, the current thread is moved out the ++ real-time domain, receiving a SIGDEBUG signal from the Linux ++ kernel immediately after. ++ ++ The timeout value of the watchdog can be set using the ++ XENO_OPT_WATCHDOG_TIMEOUT parameter. ++ ++config XENO_OPT_WATCHDOG_TIMEOUT ++ depends on XENO_OPT_WATCHDOG ++ int "Watchdog timeout" ++ default 4 ++ range 1 60 ++ help ++ Watchdog timeout value (in seconds). ++ ++endif # XENO_OPT_DEBUG +--- linux/kernel/xenomai/sched-idle.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/sched-idle.c 2021-04-07 16:01:25.790636220 +0800 +@@ -0,0 +1,67 @@ ++/* ++ * Copyright (C) 2008 Philippe Gerum . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#include ++ ++static struct xnthread *xnsched_idle_pick(struct xnsched *sched) ++{ ++ return &sched->rootcb; ++} ++ ++static bool xnsched_idle_setparam(struct xnthread *thread, ++ const union xnsched_policy_param *p) ++{ ++ return __xnsched_idle_setparam(thread, p); ++} ++ ++static void xnsched_idle_getparam(struct xnthread *thread, ++ union xnsched_policy_param *p) ++{ ++ __xnsched_idle_getparam(thread, p); ++} ++ ++static void xnsched_idle_trackprio(struct xnthread *thread, ++ const union xnsched_policy_param *p) ++{ ++ __xnsched_idle_trackprio(thread, p); ++} ++ ++static void xnsched_idle_protectprio(struct xnthread *thread, int prio) ++{ ++ __xnsched_idle_protectprio(thread, prio); ++} ++ ++struct xnsched_class xnsched_class_idle = { ++ .sched_init = NULL, ++ .sched_enqueue = NULL, ++ .sched_dequeue = NULL, ++ .sched_requeue = NULL, ++ .sched_tick = NULL, ++ .sched_rotate = NULL, ++ .sched_forget = NULL, ++ .sched_kick = NULL, ++ .sched_declare = NULL, ++ .sched_pick = xnsched_idle_pick, ++ .sched_setparam = xnsched_idle_setparam, ++ .sched_getparam = xnsched_idle_getparam, ++ .sched_trackprio = xnsched_idle_trackprio, ++ .sched_protectprio = xnsched_idle_protectprio, ++ .weight = XNSCHED_CLASS_WEIGHT(0), ++ .policy = SCHED_IDLE, ++ .name = "idle" ++}; +--- linux/kernel/xenomai/thread.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/thread.c 2021-04-07 16:01:25.785636227 +0800 +@@ -0,0 +1,2672 @@ ++/* ++ * Copyright (C) 2001-2013 Philippe Gerum . ++ * Copyright (C) 2006-2010 Gilles Chanteperdrix ++ * Copyright (C) 2001-2013 The Xenomai project ++ * ++ * SMP support Copyright (C) 2004 The HYADES project ++ * ++ * Xenomai is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "debug.h" ++ ++static DECLARE_WAIT_QUEUE_HEAD(join_all); ++ ++/** ++ * @ingroup cobalt_core ++ * @defgroup cobalt_core_thread Thread services ++ * @{ ++ */ ++ ++static void timeout_handler(struct xntimer *timer) ++{ ++ struct xnthread *thread = container_of(timer, struct xnthread, rtimer); ++ ++ xnthread_set_info(thread, XNTIMEO); /* Interrupts are off. */ ++ xnthread_resume(thread, XNDELAY); ++} ++ ++static void periodic_handler(struct xntimer *timer) ++{ ++ struct xnthread *thread = container_of(timer, struct xnthread, ptimer); ++ /* ++ * Prevent unwanted round-robin, and do not wake up threads ++ * blocked on a resource. ++ */ ++ if (xnthread_test_state(thread, XNDELAY|XNPEND) == XNDELAY) ++ xnthread_resume(thread, XNDELAY); ++ ++ /* ++ * The periodic thread might have migrated to another CPU ++ * while passive, fix the timer affinity if need be. ++ */ ++ xntimer_set_affinity(&thread->ptimer, thread->sched); ++} ++ ++static inline void enlist_new_thread(struct xnthread *thread) ++{ /* nklock held, irqs off */ ++ list_add_tail(&thread->glink, &nkthreadq); ++ cobalt_nrthreads++; ++ xnvfile_touch_tag(&nkthreadlist_tag); ++} ++ ++struct kthread_arg { ++ struct xnthread *thread; ++ struct completion *done; ++}; ++ ++static int kthread_trampoline(void *arg) ++{ ++ struct kthread_arg *ka = arg; ++ struct xnthread *thread = ka->thread; ++ struct sched_param param; ++ int ret, policy, prio; ++ ++ /* ++ * It only makes sense to create Xenomai kthreads with the ++ * SCHED_FIFO, SCHED_NORMAL or SCHED_WEAK policies. So ++ * anything that is not from Xenomai's RT class is assumed to ++ * belong to SCHED_NORMAL linux-wise. ++ */ ++ if (thread->sched_class != &xnsched_class_rt) { ++ policy = SCHED_NORMAL; ++ prio = 0; ++ } else { ++ policy = SCHED_FIFO; ++ prio = normalize_priority(thread->cprio); ++ } ++ ++ param.sched_priority = prio; ++ sched_setscheduler(current, policy, ¶m); ++ ++ ret = xnthread_map(thread, ka->done); ++ if (ret) { ++ printk(XENO_WARNING "failed to create kernel shadow %s\n", ++ thread->name); ++ return ret; ++ } ++ ++ trace_cobalt_shadow_entry(thread); ++ ++ thread->entry(thread->cookie); ++ ++ xnthread_cancel(thread); ++ ++ return 0; ++} ++ ++static inline int spawn_kthread(struct xnthread *thread) ++{ ++ DECLARE_COMPLETION_ONSTACK(done); ++ struct kthread_arg ka = { ++ .thread = thread, ++ .done = &done ++ }; ++ struct task_struct *p; ++ ++ p = kthread_run(kthread_trampoline, &ka, "%s", thread->name); ++ if (IS_ERR(p)) ++ return PTR_ERR(p); ++ ++ wait_for_completion(&done); ++ ++ return 0; ++} ++ ++int __xnthread_init(struct xnthread *thread, ++ const struct xnthread_init_attr *attr, ++ struct xnsched *sched, ++ struct xnsched_class *sched_class, ++ const union xnsched_policy_param *sched_param) ++{ ++ int flags = attr->flags, ret, gravity; ++ ++ flags &= ~(XNSUSP|XNBOOST); ++#ifndef CONFIG_XENO_ARCH_FPU ++ flags &= ~XNFPU; ++#endif ++ if ((flags & XNROOT) == 0) ++ flags |= XNDORMANT; ++ ++ if (attr->name) ++ ksformat(thread->name, ++ sizeof(thread->name), "%s", attr->name); ++ else ++ ksformat(thread->name, ++ sizeof(thread->name), "@%p", thread); ++ ++ /* ++ * We mirror the global user debug state into the per-thread ++ * state, to speed up branch taking in lib/cobalt wherever ++ * this needs to be tested. ++ */ ++ if (IS_ENABLED(CONFIG_XENO_OPT_DEBUG_MUTEX_SLEEP)) ++ flags |= XNDEBUG; ++ ++ thread->personality = attr->personality; ++ cpumask_and(&thread->affinity, &attr->affinity, &cobalt_cpu_affinity); ++ thread->sched = sched; ++ thread->state = flags; ++ thread->info = 0; ++ thread->local_info = 0; ++ thread->wprio = XNSCHED_IDLE_PRIO; ++ thread->cprio = XNSCHED_IDLE_PRIO; ++ thread->bprio = XNSCHED_IDLE_PRIO; ++ thread->lock_count = 0; ++ thread->rrperiod = XN_INFINITE; ++ thread->wchan = NULL; ++ thread->wwake = NULL; ++ thread->wcontext = NULL; ++ thread->res_count = 0; ++ thread->handle = XN_NO_HANDLE; ++ memset(&thread->stat, 0, sizeof(thread->stat)); ++ thread->selector = NULL; ++ INIT_LIST_HEAD(&thread->glink); ++ INIT_LIST_HEAD(&thread->boosters); ++ /* These will be filled by xnthread_start() */ ++ thread->entry = NULL; ++ thread->cookie = NULL; ++ init_completion(&thread->exited); ++ memset(xnthread_archtcb(thread), 0, sizeof(struct xnarchtcb)); ++ ++ gravity = flags & XNUSER ? XNTIMER_UGRAVITY : XNTIMER_KGRAVITY; ++ xntimer_init(&thread->rtimer, &nkclock, timeout_handler, ++ sched, gravity); ++ xntimer_set_name(&thread->rtimer, thread->name); ++ xntimer_set_priority(&thread->rtimer, XNTIMER_HIPRIO); ++ xntimer_init(&thread->ptimer, &nkclock, periodic_handler, ++ sched, gravity); ++ xntimer_set_name(&thread->ptimer, thread->name); ++ xntimer_set_priority(&thread->ptimer, XNTIMER_HIPRIO); ++ ++ thread->base_class = NULL; /* xnsched_set_policy() will set it. */ ++ ret = xnsched_init_thread(thread); ++ if (ret) ++ goto err_out; ++ ++ ret = xnsched_set_policy(thread, sched_class, sched_param); ++ if (ret) ++ goto err_out; ++ ++ if ((flags & (XNUSER|XNROOT)) == 0) { ++ ret = spawn_kthread(thread); ++ if (ret) ++ goto err_out; ++ } ++ ++ return 0; ++ ++err_out: ++ xntimer_destroy(&thread->rtimer); ++ xntimer_destroy(&thread->ptimer); ++ ++ return ret; ++} ++ ++void xnthread_init_shadow_tcb(struct xnthread *thread) ++{ ++ struct xnarchtcb *tcb = xnthread_archtcb(thread); ++ struct task_struct *p = current; ++ ++ /* ++ * If the current task is a kthread, the pipeline will take ++ * the necessary steps to make the FPU usable in such ++ * context. The kernel already took care of this issue for ++ * userland tasks (e.g. setting up a clean backup area). ++ */ ++ __ipipe_share_current(0); ++ ++ tcb->core.host_task = p; ++ tcb->core.tsp = &p->thread; ++ tcb->core.mm = p->mm; ++ tcb->core.active_mm = p->mm; ++ tcb->core.tip = task_thread_info(p); ++#ifdef CONFIG_XENO_ARCH_FPU ++ tcb->core.user_fpu_owner = p; ++#endif /* CONFIG_XENO_ARCH_FPU */ ++ xnarch_init_shadow_tcb(thread); ++ ++ trace_cobalt_shadow_map(thread); ++} ++ ++void xnthread_init_root_tcb(struct xnthread *thread) ++{ ++ struct xnarchtcb *tcb = xnthread_archtcb(thread); ++ struct task_struct *p = current; ++ ++ tcb->core.host_task = p; ++ tcb->core.tsp = &tcb->core.ts; ++ tcb->core.mm = p->mm; ++ tcb->core.tip = NULL; ++ xnarch_init_root_tcb(thread); ++} ++ ++void xnthread_deregister(struct xnthread *thread) ++{ ++ if (thread->handle != XN_NO_HANDLE) ++ xnregistry_remove(thread->handle); ++ ++ thread->handle = XN_NO_HANDLE; ++} ++ ++char *xnthread_format_status(unsigned long status, char *buf, int size) ++{ ++ static const char labels[] = XNTHREAD_STATE_LABELS; ++ int pos, c, mask; ++ char *wp; ++ ++ for (mask = (int)status, pos = 0, wp = buf; ++ mask != 0 && wp - buf < size - 2; /* 1-letter label + \0 */ ++ mask >>= 1, pos++) { ++ if ((mask & 1) == 0) ++ continue; ++ ++ c = labels[pos]; ++ ++ switch (1 << pos) { ++ case XNROOT: ++ c = 'R'; /* Always mark root as runnable. */ ++ break; ++ case XNREADY: ++ if (status & XNROOT) ++ continue; /* Already reported on XNROOT. */ ++ break; ++ case XNDELAY: ++ /* ++ * Only report genuine delays here, not timed ++ * waits for resources. ++ */ ++ if (status & XNPEND) ++ continue; ++ break; ++ case XNPEND: ++ /* Report timed waits with lowercase symbol. */ ++ if (status & XNDELAY) ++ c |= 0x20; ++ break; ++ default: ++ if (c == '.') ++ continue; ++ } ++ *wp++ = c; ++ } ++ ++ *wp = '\0'; ++ ++ return buf; ++} ++ ++pid_t xnthread_host_pid(struct xnthread *thread) ++{ ++ if (xnthread_test_state(thread, XNROOT)) ++ return 0; ++ if (!xnthread_host_task(thread)) ++ return -1; ++ ++ return task_pid_nr(xnthread_host_task(thread)); ++} ++ ++int xnthread_set_clock(struct xnthread *thread, struct xnclock *newclock) ++{ ++ spl_t s; ++ ++ if (thread == NULL) { ++ thread = xnthread_current(); ++ if (thread == NULL) ++ return -EPERM; ++ } ++ ++ /* Change the clock the thread's periodic timer is paced by. */ ++ xnlock_get_irqsave(&nklock, s); ++ xntimer_set_clock(&thread->ptimer, newclock); ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(xnthread_set_clock); ++ ++xnticks_t xnthread_get_timeout(struct xnthread *thread, xnticks_t ns) ++{ ++ struct xntimer *timer; ++ xnticks_t timeout; ++ ++ if (!xnthread_test_state(thread,XNDELAY)) ++ return 0LL; ++ ++ if (xntimer_running_p(&thread->rtimer)) ++ timer = &thread->rtimer; ++ else if (xntimer_running_p(&thread->ptimer)) ++ timer = &thread->ptimer; ++ else ++ return 0LL; ++ ++ timeout = xntimer_get_date(timer); ++ if (timeout <= ns) ++ return 1; ++ ++ return timeout - ns; ++} ++EXPORT_SYMBOL_GPL(xnthread_get_timeout); ++ ++xnticks_t xnthread_get_period(struct xnthread *thread) ++{ ++ xnticks_t period = 0; ++ /* ++ * The current thread period might be: ++ * - the value of the timer interval for periodic threads (ns/ticks) ++ * - or, the value of the alloted round-robin quantum (ticks) ++ * - or zero, meaning "no periodic activity". ++ */ ++ if (xntimer_running_p(&thread->ptimer)) ++ period = xntimer_interval(&thread->ptimer); ++ else if (xnthread_test_state(thread,XNRRB)) ++ period = thread->rrperiod; ++ ++ return period; ++} ++EXPORT_SYMBOL_GPL(xnthread_get_period); ++ ++void xnthread_prepare_wait(struct xnthread_wait_context *wc) ++{ ++ struct xnthread *curr = xnthread_current(); ++ ++ wc->posted = 0; ++ curr->wcontext = wc; ++} ++EXPORT_SYMBOL_GPL(xnthread_prepare_wait); ++ ++static inline int moving_target(struct xnsched *sched, struct xnthread *thread) ++{ ++ int ret = 0; ++#ifdef CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH ++ /* ++ * When deleting a thread in the course of a context switch or ++ * in flight to another CPU with nklock unlocked on a distant ++ * CPU, do nothing, this case will be caught in ++ * xnsched_finish_unlocked_switch. ++ */ ++ ret = (sched->status & XNINSW) || ++ xnthread_test_state(thread, XNMIGRATE); ++#endif ++ return ret; ++} ++ ++#ifdef CONFIG_XENO_ARCH_FPU ++ ++static inline void giveup_fpu(struct xnsched *sched, ++ struct xnthread *thread) ++{ ++ if (thread == sched->fpuholder) ++ sched->fpuholder = NULL; ++} ++ ++void xnthread_switch_fpu(struct xnsched *sched) ++{ ++ struct xnthread *curr = sched->curr; ++ ++ if (!xnthread_test_state(curr, XNFPU)) ++ return; ++ ++ xnarch_switch_fpu(sched->fpuholder, curr); ++ sched->fpuholder = curr; ++} ++ ++#else /* !CONFIG_XENO_ARCH_FPU */ ++ ++static inline void giveup_fpu(struct xnsched *sched, ++ struct xnthread *thread) ++{ ++} ++ ++#endif /* !CONFIG_XENO_ARCH_FPU */ ++ ++static inline void release_all_ownerships(struct xnthread *curr) ++{ ++ struct xnsynch *synch, *tmp; ++ ++ /* ++ * Release all the ownerships obtained by a thread on ++ * synchronization objects. This routine must be entered ++ * interrupts off. ++ */ ++ xnthread_for_each_booster_safe(synch, tmp, curr) { ++ xnsynch_release(synch, curr); ++ if (synch->cleanup) ++ synch->cleanup(synch); ++ } ++} ++ ++static inline void cleanup_tcb(struct xnthread *curr) /* nklock held, irqs off */ ++{ ++ struct xnsched *sched = curr->sched; ++ ++ list_del(&curr->glink); ++ cobalt_nrthreads--; ++ xnvfile_touch_tag(&nkthreadlist_tag); ++ ++ if (xnthread_test_state(curr, XNREADY)) { ++ XENO_BUG_ON(COBALT, xnthread_test_state(curr, XNTHREAD_BLOCK_BITS)); ++ xnsched_dequeue(curr); ++ xnthread_clear_state(curr, XNREADY); ++ } ++ ++ if (xnthread_test_state(curr, XNPEND)) ++ xnsynch_forget_sleeper(curr); ++ ++ xnthread_set_state(curr, XNZOMBIE); ++ /* ++ * NOTE: we must be running over the root thread, or @curr ++ * is dormant, which means that we don't risk sched->curr to ++ * disappear due to voluntary rescheduling while holding the ++ * nklock, despite @curr bears the zombie bit. ++ */ ++ release_all_ownerships(curr); ++ ++ giveup_fpu(sched, curr); ++ ++ if (moving_target(sched, curr)) ++ return; ++ ++ xnsched_forget(curr); ++ xnthread_deregister(curr); ++} ++ ++void __xnthread_cleanup(struct xnthread *curr) ++{ ++ spl_t s; ++ ++ secondary_mode_only(); ++ ++ xntimer_destroy(&curr->rtimer); ++ xntimer_destroy(&curr->ptimer); ++ ++ if (curr->selector) { ++ xnselector_destroy(curr->selector); ++ curr->selector = NULL; ++ } ++ ++ xnlock_get_irqsave(&nklock, s); ++ cleanup_tcb(curr); ++ xnlock_put_irqrestore(&nklock, s); ++ ++ /* Wake up the joiner if any (we can't have more than one). */ ++ complete(&curr->exited); ++ ++ /* Notify our exit to xnthread_killall() if need be. */ ++ if (waitqueue_active(&join_all)) ++ wake_up(&join_all); ++ ++ /* Finalize last since this incurs releasing the TCB. */ ++ xnthread_run_handler_stack(curr, finalize_thread); ++} ++ ++/* ++ * Unwinds xnthread_init() ops for an unmapped thread. Since the ++ * latter must be dormant, it can't be part of any runqueue. ++ */ ++void __xnthread_discard(struct xnthread *thread) ++{ ++ spl_t s; ++ ++ secondary_mode_only(); ++ ++ xntimer_destroy(&thread->rtimer); ++ xntimer_destroy(&thread->ptimer); ++ ++ xnlock_get_irqsave(&nklock, s); ++ if (!list_empty(&thread->glink)) { ++ list_del(&thread->glink); ++ cobalt_nrthreads--; ++ xnvfile_touch_tag(&nkthreadlist_tag); ++ } ++ xnthread_deregister(thread); ++ xnlock_put_irqrestore(&nklock, s); ++} ++ ++/** ++ * @fn void xnthread_init(struct xnthread *thread,const struct xnthread_init_attr *attr,struct xnsched_class *sched_class,const union xnsched_policy_param *sched_param) ++ * @brief Initialize a new thread. ++ * ++ * Initializes a new thread. The thread is left dormant until it is ++ * actually started by xnthread_start(). ++ * ++ * @param thread The address of a thread descriptor Cobalt will use to ++ * store the thread-specific data. This descriptor must always be ++ * valid while the thread is active therefore it must be allocated in ++ * permanent memory. @warning Some architectures may require the ++ * descriptor to be properly aligned in memory; this is an additional ++ * reason for descriptors not to be laid in the program stack where ++ * alignement constraints might not always be satisfied. ++ * ++ * @param attr A pointer to an attribute block describing the initial ++ * properties of the new thread. Members of this structure are defined ++ * as follows: ++ * ++ * - name: An ASCII string standing for the symbolic name of the ++ * thread. This name is copied to a safe place into the thread ++ * descriptor. This name might be used in various situations by Cobalt ++ * for issuing human-readable diagnostic messages, so it is usually a ++ * good idea to provide a sensible value here. NULL is fine though ++ * and means "anonymous". ++ * ++ * - flags: A set of creation flags affecting the operation. The ++ * following flags can be part of this bitmask: ++ * ++ * - XNSUSP creates the thread in a suspended state. In such a case, ++ * the thread shall be explicitly resumed using the xnthread_resume() ++ * service for its execution to actually begin, additionally to ++ * issuing xnthread_start() for it. This flag can also be specified ++ * when invoking xnthread_start() as a starting mode. ++ * ++ * - XNUSER shall be set if @a thread will be mapped over an existing ++ * user-space task. Otherwise, a new kernel host task is created, then ++ * paired with the new Xenomai thread. ++ * ++ * - XNFPU (enable FPU) tells Cobalt that the new thread may use the ++ * floating-point unit. XNFPU is implicitly assumed for user-space ++ * threads even if not set in @a flags. ++ * ++ * - affinity: The processor affinity of this thread. Passing ++ * CPU_MASK_ALL means "any cpu" from the allowed core affinity mask ++ * (cobalt_cpu_affinity). Passing an empty set is invalid. ++ * ++ * @param sched_class The initial scheduling class the new thread ++ * should be assigned to. ++ * ++ * @param sched_param The initial scheduling parameters to set for the ++ * new thread; @a sched_param must be valid within the context of @a ++ * sched_class. ++ * ++ * @return 0 is returned on success. Otherwise, the following error ++ * code indicates the cause of the failure: ++ * ++ * - -EINVAL is returned if @a attr->flags has invalid bits set, or @a ++ * attr->affinity is invalid (e.g. empty). ++ * ++ * @coretags{secondary-only} ++ */ ++int xnthread_init(struct xnthread *thread, ++ const struct xnthread_init_attr *attr, ++ struct xnsched_class *sched_class, ++ const union xnsched_policy_param *sched_param) ++{ ++ struct xnsched *sched; ++ cpumask_t affinity; ++ int ret; ++ ++ if (attr->flags & ~(XNFPU | XNUSER | XNSUSP)) ++ return -EINVAL; ++ ++ /* ++ * Pick an initial CPU for the new thread which is part of its ++ * affinity mask, and therefore also part of the supported ++ * CPUs. This CPU may change in pin_to_initial_cpu(). ++ */ ++ cpumask_and(&affinity, &attr->affinity, &cobalt_cpu_affinity); ++ if (cpumask_empty(&affinity)) ++ return -EINVAL; ++ ++ sched = xnsched_struct(cpumask_first(&affinity)); ++ ++ ret = __xnthread_init(thread, attr, sched, sched_class, sched_param); ++ if (ret) ++ return ret; ++ ++ trace_cobalt_thread_init(thread, attr, sched_class); ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(xnthread_init); ++ ++/** ++ * @fn int xnthread_start(struct xnthread *thread,const struct xnthread_start_attr *attr) ++ * @brief Start a newly created thread. ++ * ++ * Starts a (newly) created thread, scheduling it for the first ++ * time. This call releases the target thread from the XNDORMANT ++ * state. This service also sets the initial mode for the new thread. ++ * ++ * @param thread The descriptor address of the started thread which ++ * must have been previously initialized by a call to xnthread_init(). ++ * ++ * @param attr A pointer to an attribute block describing the ++ * execution properties of the new thread. Members of this structure ++ * are defined as follows: ++ * ++ * - mode: The initial thread mode. The following flags can be part of ++ * this bitmask: ++ * ++ * - XNLOCK causes the thread to lock the scheduler when it starts. ++ * The target thread will have to call the xnsched_unlock() ++ * service to unlock the scheduler. A non-preemptible thread may still ++ * block, in which case, the lock is reasserted when the thread is ++ * scheduled back in. ++ * ++ * - XNSUSP makes the thread start in a suspended state. In such a ++ * case, the thread will have to be explicitly resumed using the ++ * xnthread_resume() service for its execution to actually begin. ++ * ++ * - entry: The address of the thread's body routine. In other words, ++ * it is the thread entry point. ++ * ++ * - cookie: A user-defined opaque cookie Cobalt will pass to the ++ * emerging thread as the sole argument of its entry point. ++ * ++ * @retval 0 if @a thread could be started ; ++ * ++ * @retval -EBUSY if @a thread was not dormant or stopped ; ++ * ++ * @coretags{task-unrestricted, might-switch} ++ */ ++int xnthread_start(struct xnthread *thread, ++ const struct xnthread_start_attr *attr) ++{ ++ spl_t s; ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ if (!xnthread_test_state(thread, XNDORMANT)) { ++ xnlock_put_irqrestore(&nklock, s); ++ return -EBUSY; ++ } ++ ++ xnthread_set_state(thread, attr->mode & (XNTHREAD_MODE_BITS | XNSUSP)); ++ thread->entry = attr->entry; ++ thread->cookie = attr->cookie; ++ if (attr->mode & XNLOCK) ++ thread->lock_count = 1; ++ ++ /* ++ * A user-space thread starts immediately Cobalt-wise since we ++ * already have an underlying Linux context for it, so we can ++ * enlist it now to make it visible from the /proc interface. ++ */ ++ if (xnthread_test_state(thread, XNUSER)) ++ enlist_new_thread(thread); ++ ++ trace_cobalt_thread_start(thread); ++ ++ xnthread_resume(thread, XNDORMANT); ++ xnsched_run(); ++ ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(xnthread_start); ++ ++/** ++ * @fn void xnthread_set_mode(int clrmask,int setmask) ++ * @brief Change control mode of the current thread. ++ * ++ * Change the control mode of the current thread. The control mode ++ * affects several behaviours of the Cobalt core regarding this ++ * thread. ++ * ++ * @param clrmask Clears the corresponding bits from the control mode ++ * before setmask is applied. The scheduler lock held by the current ++ * thread can be forcibly released by passing the XNLOCK bit in this ++ * mask. In this case, the lock nesting count is also reset to zero. ++ * ++ * @param setmask The new thread mode. The following flags may be set ++ * in this bitmask: ++ * ++ * - XNLOCK makes the current thread non-preemptible by other threads. ++ * Unless XNTRAPLB is also set for the thread, the latter may still ++ * block, dropping the lock temporarily, in which case, the lock will ++ * be reacquired automatically when the thread resumes execution. ++ * ++ * - XNWARN enables debugging notifications for the current thread. A ++ * SIGDEBUG (Linux-originated) signal is sent when the following ++ * atypical or abnormal behavior is detected: ++ * ++ * - the current thread switches to secondary mode. Such notification ++ * comes in handy for detecting spurious relaxes. ++ * ++ * - CONFIG_XENO_OPT_DEBUG_MUTEX_RELAXED is enabled in the kernel ++ * configuration, and the current thread is sleeping on a Cobalt ++ * mutex currently owned by a thread running in secondary mode, ++ * which reveals a priority inversion. ++ * ++ * - the current thread is about to sleep while holding a Cobalt ++ * mutex, and CONFIG_XENO_OPT_DEBUG_MUTEX_SLEEP is enabled in the ++ * kernel configuration. Blocking for acquiring a mutex does not ++ * trigger such a signal though. ++ * ++ * - the current thread has both XNTRAPLB and XNLOCK set, and ++ * attempts to block on a Cobalt service, which would cause a ++ * lock break. ++ * ++ * - XNTRAPLB disallows breaking the scheduler lock. In the default ++ * case, a thread which holds the scheduler lock is allowed to drop it ++ * temporarily for sleeping. If this mode bit is set, such thread ++ * would return immediately with XNBREAK set from ++ * xnthread_suspend(). If XNWARN is set for the current thread, ++ * SIGDEBUG is sent in addition to raising the break condition. ++ * ++ * @coretags{primary-only, might-switch} ++ * ++ * @note Setting @a clrmask and @a setmask to zero leads to a nop, ++ * in which case xnthread_set_mode() returns the current mode. ++ */ ++int xnthread_set_mode(int clrmask, int setmask) ++{ ++ int oldmode, lock_count; ++ struct xnthread *curr; ++ spl_t s; ++ ++ primary_mode_only(); ++ ++ xnlock_get_irqsave(&nklock, s); ++ curr = xnsched_current_thread(); ++ oldmode = xnthread_get_state(curr) & XNTHREAD_MODE_BITS; ++ lock_count = curr->lock_count; ++ xnthread_clear_state(curr, clrmask & XNTHREAD_MODE_BITS); ++ xnthread_set_state(curr, setmask & XNTHREAD_MODE_BITS); ++ trace_cobalt_thread_set_mode(curr); ++ ++ if (setmask & XNLOCK) { ++ if (lock_count == 0) ++ xnsched_lock(); ++ } else if (clrmask & XNLOCK) { ++ if (lock_count > 0) { ++ curr->lock_count = 0; ++ xnthread_clear_localinfo(curr, XNLBALERT); ++ xnsched_run(); ++ } ++ } ++ ++ xnlock_put_irqrestore(&nklock, s); ++ ++ if (lock_count > 0) ++ oldmode |= XNLOCK; ++ ++ return oldmode; ++} ++EXPORT_SYMBOL_GPL(xnthread_set_mode); ++ ++/** ++ * @fn void xnthread_suspend(struct xnthread *thread, int mask,xnticks_t timeout, xntmode_t timeout_mode,struct xnsynch *wchan) ++ * @brief Suspend a thread. ++ * ++ * Suspends the execution of a thread according to a given suspensive ++ * condition. This thread will not be eligible for scheduling until it ++ * all the pending suspensive conditions set by this service are ++ * removed by one or more calls to xnthread_resume(). ++ * ++ * @param thread The descriptor address of the suspended thread. ++ * ++ * @param mask The suspension mask specifying the suspensive condition ++ * to add to the thread's wait mask. Possible values usable by the ++ * caller are: ++ * ++ * - XNSUSP. This flag forcibly suspends a thread, regardless of any ++ * resource to wait for. A reverse call to xnthread_resume() ++ * specifying the XNSUSP bit must be issued to remove this condition, ++ * which is cumulative with other suspension bits.@a wchan should be ++ * NULL when using this suspending mode. ++ * ++ * - XNDELAY. This flags denotes a counted delay wait (in ticks) which ++ * duration is defined by the value of the timeout parameter. ++ * ++ * - XNPEND. This flag denotes a wait for a synchronization object to ++ * be signaled. The wchan argument must points to this object. A ++ * timeout value can be passed to bound the wait. This suspending mode ++ * should not be used directly by the client interface, but rather ++ * through the xnsynch_sleep_on() call. ++ * ++ * @param timeout The timeout which may be used to limit the time the ++ * thread pends on a resource. This value is a wait time given in ++ * nanoseconds. It can either be relative, absolute monotonic, or ++ * absolute adjustable depending on @a timeout_mode. ++ * ++ * Passing XN_INFINITE @b and setting @a timeout_mode to XN_RELATIVE ++ * specifies an unbounded wait. All other values are used to ++ * initialize a watchdog timer. If the current operation mode of the ++ * system timer is oneshot and @a timeout elapses before ++ * xnthread_suspend() has completed, then the target thread will not ++ * be suspended, and this routine leads to a null effect. ++ * ++ * @param timeout_mode The mode of the @a timeout parameter. It can ++ * either be set to XN_RELATIVE, XN_ABSOLUTE, or XN_REALTIME (see also ++ * xntimer_start()). ++ * ++ * @param wchan The address of a pended resource. This parameter is ++ * used internally by the synchronization object implementation code ++ * to specify on which object the suspended thread pends. NULL is a ++ * legitimate value when this parameter does not apply to the current ++ * suspending mode (e.g. XNSUSP). ++ * ++ * @note If the target thread has received a Linux-originated signal, ++ * then this service immediately exits without suspending the thread, ++ * but raises the XNBREAK condition in its information mask. ++ * ++ * @coretags{unrestricted, might-switch} ++ */ ++void xnthread_suspend(struct xnthread *thread, int mask, ++ xnticks_t timeout, xntmode_t timeout_mode, ++ struct xnsynch *wchan) ++{ ++ unsigned long oldstate; ++ struct xnsched *sched; ++ spl_t s; ++ ++ /* No, you certainly do not want to suspend the root thread. */ ++ XENO_BUG_ON(COBALT, xnthread_test_state(thread, XNROOT)); ++ /* No built-in support for conjunctive wait. */ ++ XENO_BUG_ON(COBALT, wchan && thread->wchan); ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ trace_cobalt_thread_suspend(thread, mask, timeout, timeout_mode, wchan); ++ ++ sched = thread->sched; ++ oldstate = thread->state; ++ ++ /* ++ * If attempting to suspend a runnable thread which is pending ++ * a forced switch to secondary mode (XNKICKED), just raise ++ * the XNBREAK status and return immediately, except if we ++ * are precisely doing such switch by applying XNRELAX. ++ * ++ * In the latter case, we also make sure to clear XNKICKED, ++ * since we won't go through prepare_for_signal() once ++ * relaxed. ++ */ ++ if (likely((oldstate & XNTHREAD_BLOCK_BITS) == 0)) { ++ if (likely((mask & XNRELAX) == 0)) { ++ if (xnthread_test_info(thread, XNKICKED)) ++ goto abort; ++ if (thread == sched->curr && ++ thread->lock_count > 0 && ++ (oldstate & XNTRAPLB) != 0) ++ goto lock_break; ++ } ++ /* ++ * Do not destroy the info left behind by yet unprocessed ++ * wakeups when suspending a remote thread. ++ */ ++ if (thread == sched->curr) ++ xnthread_clear_info(thread, XNRMID|XNTIMEO|XNBREAK| ++ XNWAKEN|XNROBBED|XNKICKED); ++ } ++ ++ /* ++ * Don't start the timer for a thread delayed indefinitely. ++ */ ++ if (timeout != XN_INFINITE || timeout_mode != XN_RELATIVE) { ++ xntimer_set_affinity(&thread->rtimer, thread->sched); ++ if (xntimer_start(&thread->rtimer, timeout, XN_INFINITE, ++ timeout_mode)) { ++ /* (absolute) timeout value in the past, bail out. */ ++ if (wchan) { ++ thread->wchan = wchan; ++ xnsynch_forget_sleeper(thread); ++ } ++ xnthread_set_info(thread, XNTIMEO); ++ goto out; ++ } ++ xnthread_set_state(thread, XNDELAY); ++ } ++ ++ if (oldstate & XNREADY) { ++ xnsched_dequeue(thread); ++ xnthread_clear_state(thread, XNREADY); ++ } ++ ++ xnthread_set_state(thread, mask); ++ ++ /* ++ * We must make sure that we don't clear the wait channel if a ++ * thread is first blocked (wchan != NULL) then forcibly ++ * suspended (wchan == NULL), since these are conjunctive ++ * conditions. ++ */ ++ if (wchan) ++ thread->wchan = wchan; ++ ++ /* ++ * If the current thread is being relaxed, we must have been ++ * called from xnthread_relax(), in which case we introduce an ++ * opportunity for interrupt delivery right before switching ++ * context, which shortens the uninterruptible code path. ++ * ++ * We have to shut irqs off before calling __xnsched_run() ++ * though: if an interrupt could preempt us right after ++ * xnarch_escalate() is passed but before the nklock is ++ * grabbed, we would enter the critical section in ++ * ___xnsched_run() from the root domain, which would defeat ++ * the purpose of escalating the request. ++ * ++ * NOTE: using __xnsched_run() for rescheduling allows us to ++ * break the scheduler lock temporarily. ++ */ ++ if (likely(thread == sched->curr)) { ++ xnsched_set_resched(sched); ++ if (unlikely(mask & XNRELAX)) { ++ xnlock_clear_irqon(&nklock); ++ splmax(); ++ __xnsched_run(sched); ++ return; ++ } ++ /* ++ * If the thread is runnning on a remote CPU, ++ * xnsched_run() will trigger the IPI as required. In ++ * this case, sched refers to a remote runqueue, so ++ * make sure to always kick the rescheduling procedure ++ * for the local one. ++ */ ++ __xnsched_run(xnsched_current()); ++ goto out; ++ } ++ ++ /* ++ * Ok, this one is an interesting corner case, which requires ++ * a bit of background first. Here, we handle the case of ++ * suspending a _relaxed_ user shadow which is _not_ the ++ * current thread. ++ * ++ * The net effect is that we are attempting to stop the ++ * shadow thread for Cobalt, whilst this thread is actually ++ * running some code under the control of the Linux scheduler ++ * (i.e. it's relaxed). ++ * ++ * To make this possible, we force the target Linux task to ++ * migrate back to the Xenomai domain by sending it a ++ * SIGSHADOW signal the interface libraries trap for this ++ * specific internal purpose, whose handler is expected to ++ * call back Cobalt's migration service. ++ * ++ * By forcing this migration, we make sure that Cobalt ++ * controls, hence properly stops, the target thread according ++ * to the requested suspension condition. Otherwise, the ++ * shadow thread in secondary mode would just keep running ++ * into the Linux domain, thus breaking the most common ++ * assumptions regarding suspended threads. ++ * ++ * We only care for threads that are not current, and for ++ * XNSUSP, XNDELAY, XNDORMANT and XNHELD conditions, because: ++ * ++ * - There is no point in dealing with a relaxed thread which ++ * is current, since personalities have to ask for primary ++ * mode switch when processing any syscall which may block the ++ * caller (i.e. __xn_exec_primary). ++ * ++ * - among all blocking bits (XNTHREAD_BLOCK_BITS), only ++ * XNSUSP, XNDELAY, XNHELD and XNDBGSTOP may be applied by the ++ * current thread to a non-current thread. XNPEND is always ++ * added by the caller to its own state, XNMIGRATE, XNRELAX ++ * and XNDBGSTOP have special semantics escaping this issue. ++ * ++ * We don't signal threads which are already in a dormant ++ * state, since they are suspended by definition. ++ */ ++ if (((oldstate & (XNTHREAD_BLOCK_BITS|XNUSER)) == (XNRELAX|XNUSER)) && ++ (mask & (XNDELAY | XNSUSP | XNHELD)) != 0) ++ xnthread_signal(thread, SIGSHADOW, SIGSHADOW_ACTION_HARDEN); ++out: ++ xnlock_put_irqrestore(&nklock, s); ++ return; ++ ++lock_break: ++ /* NOTE: thread is current */ ++ if (xnthread_test_state(thread, XNWARN) && ++ !xnthread_test_localinfo(thread, XNLBALERT)) { ++ xnthread_set_info(thread, XNKICKED); ++ xnthread_set_localinfo(thread, XNLBALERT); ++ xnthread_signal(thread, SIGDEBUG, SIGDEBUG_LOCK_BREAK); ++ } ++abort: ++ if (wchan) { ++ thread->wchan = wchan; ++ xnsynch_forget_sleeper(thread); ++ } ++ xnthread_clear_info(thread, XNRMID | XNTIMEO); ++ xnthread_set_info(thread, XNBREAK); ++ xnlock_put_irqrestore(&nklock, s); ++} ++EXPORT_SYMBOL_GPL(xnthread_suspend); ++ ++/** ++ * @fn void xnthread_resume(struct xnthread *thread,int mask) ++ * @brief Resume a thread. ++ * ++ * Resumes the execution of a thread previously suspended by one or ++ * more calls to xnthread_suspend(). This call removes a suspensive ++ * condition affecting the target thread. When all suspensive ++ * conditions are gone, the thread is left in a READY state at which ++ * point it becomes eligible anew for scheduling. ++ * ++ * @param thread The descriptor address of the resumed thread. ++ * ++ * @param mask The suspension mask specifying the suspensive condition ++ * to remove from the thread's wait mask. Possible values usable by ++ * the caller are: ++ * ++ * - XNSUSP. This flag removes the explicit suspension condition. This ++ * condition might be additive to the XNPEND condition. ++ * ++ * - XNDELAY. This flag removes the counted delay wait condition. ++ * ++ * - XNPEND. This flag removes the resource wait condition. If a ++ * watchdog is armed, it is automatically disarmed by this ++ * call. Unlike the two previous conditions, only the current thread ++ * can set this condition for itself, i.e. no thread can force another ++ * one to pend on a resource. ++ * ++ * When the thread is eventually resumed by one or more calls to ++ * xnthread_resume(), the caller of xnthread_suspend() in the awakened ++ * thread that suspended itself should check for the following bits in ++ * its own information mask to determine what caused its wake up: ++ * ++ * - XNRMID means that the caller must assume that the pended ++ * synchronization object has been destroyed (see xnsynch_flush()). ++ * ++ * - XNTIMEO means that the delay elapsed, or the watchdog went off ++ * before the corresponding synchronization object was signaled. ++ * ++ * - XNBREAK means that the wait has been forcibly broken by a call to ++ * xnthread_unblock(). ++ * ++ * @coretags{unrestricted, might-switch} ++ */ ++void xnthread_resume(struct xnthread *thread, int mask) ++{ ++ unsigned long oldstate; ++ struct xnsched *sched; ++ spl_t s; ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ trace_cobalt_thread_resume(thread, mask); ++ ++ xntrace_pid(xnthread_host_pid(thread), xnthread_current_priority(thread)); ++ ++ sched = thread->sched; ++ oldstate = thread->state; ++ ++ if ((oldstate & XNTHREAD_BLOCK_BITS) == 0) { ++ if (oldstate & XNREADY) ++ xnsched_dequeue(thread); ++ goto enqueue; ++ } ++ ++ /* Clear the specified block bit(s) */ ++ xnthread_clear_state(thread, mask); ++ ++ /* ++ * If XNDELAY was set in the clear mask, xnthread_unblock() ++ * was called for the thread, or a timeout has elapsed. In the ++ * latter case, stopping the timer is a no-op. ++ */ ++ if (mask & XNDELAY) ++ xntimer_stop(&thread->rtimer); ++ ++ if (!xnthread_test_state(thread, XNTHREAD_BLOCK_BITS)) ++ goto clear_wchan; ++ ++ if (mask & XNDELAY) { ++ mask = xnthread_test_state(thread, XNPEND); ++ if (mask == 0) ++ goto unlock_and_exit; ++ if (thread->wchan) ++ xnsynch_forget_sleeper(thread); ++ goto recheck_state; ++ } ++ ++ if (xnthread_test_state(thread, XNDELAY)) { ++ if (mask & XNPEND) { ++ /* ++ * A resource became available to the thread. ++ * Cancel the watchdog timer. ++ */ ++ xntimer_stop(&thread->rtimer); ++ xnthread_clear_state(thread, XNDELAY); ++ } ++ goto recheck_state; ++ } ++ ++ /* ++ * The thread is still suspended, but is no more pending on a ++ * resource. ++ */ ++ if ((mask & XNPEND) != 0 && thread->wchan) ++ xnsynch_forget_sleeper(thread); ++ ++ goto unlock_and_exit; ++ ++recheck_state: ++ if (xnthread_test_state(thread, XNTHREAD_BLOCK_BITS)) ++ goto unlock_and_exit; ++ ++clear_wchan: ++ if ((mask & ~XNDELAY) != 0 && thread->wchan != NULL) ++ /* ++ * If the thread was actually suspended, clear the ++ * wait channel. -- this allows requests like ++ * xnthread_suspend(thread,XNDELAY,...) not to run ++ * the following code when the suspended thread is ++ * woken up while undergoing a simple delay. ++ */ ++ xnsynch_forget_sleeper(thread); ++ ++ if (unlikely((oldstate & mask) & XNHELD)) { ++ xnsched_requeue(thread); ++ goto ready; ++ } ++enqueue: ++ xnsched_enqueue(thread); ++ready: ++ xnthread_set_state(thread, XNREADY); ++ xnsched_set_resched(sched); ++unlock_and_exit: ++ xnlock_put_irqrestore(&nklock, s); ++} ++EXPORT_SYMBOL_GPL(xnthread_resume); ++ ++/** ++ * @fn int xnthread_unblock(struct xnthread *thread) ++ * @brief Unblock a thread. ++ * ++ * Breaks the thread out of any wait it is currently in. This call ++ * removes the XNDELAY and XNPEND suspensive conditions previously put ++ * by xnthread_suspend() on the target thread. If all suspensive ++ * conditions are gone, the thread is left in a READY state at which ++ * point it becomes eligible anew for scheduling. ++ * ++ * @param thread The descriptor address of the unblocked thread. ++ * ++ * This call neither releases the thread from the XNSUSP, XNRELAX, ++ * XNDORMANT or XNHELD suspensive conditions. ++ * ++ * When the thread resumes execution, the XNBREAK bit is set in the ++ * unblocked thread's information mask. Unblocking a non-blocked ++ * thread is perfectly harmless. ++ * ++ * @return non-zero is returned if the thread was actually unblocked ++ * from a pending wait state, 0 otherwise. ++ * ++ * @coretags{unrestricted, might-switch} ++ */ ++int xnthread_unblock(struct xnthread *thread) ++{ ++ int ret = 1; ++ spl_t s; ++ ++ /* ++ * Attempt to abort an undergoing wait for the given thread. ++ * If this state is due to an alarm that has been armed to ++ * limit the sleeping thread's waiting time while it pends for ++ * a resource, the corresponding XNPEND state will be cleared ++ * by xnthread_resume() in the same move. Otherwise, this call ++ * may abort an undergoing infinite wait for a resource (if ++ * any). ++ */ ++ xnlock_get_irqsave(&nklock, s); ++ ++ trace_cobalt_thread_unblock(thread); ++ ++ if (xnthread_test_state(thread, XNDELAY)) ++ xnthread_resume(thread, XNDELAY); ++ else if (xnthread_test_state(thread, XNPEND)) ++ xnthread_resume(thread, XNPEND); ++ else ++ ret = 0; ++ ++ /* ++ * We should not clear a previous break state if this service ++ * is called more than once before the target thread actually ++ * resumes, so we only set the bit here and never clear ++ * it. However, we must not raise the XNBREAK bit if the ++ * target thread was already awake at the time of this call, ++ * so that downstream code does not get confused by some ++ * "successful but interrupted syscall" condition. IOW, a ++ * break state raised here must always trigger an error code ++ * downstream, and an already successful syscall cannot be ++ * marked as interrupted. ++ */ ++ if (ret) ++ xnthread_set_info(thread, XNBREAK); ++ ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return ret; ++} ++EXPORT_SYMBOL_GPL(xnthread_unblock); ++ ++/** ++ * @fn int xnthread_set_periodic(struct xnthread *thread,xnticks_t idate, xntmode_t timeout_mode, xnticks_t period) ++ * @brief Make a thread periodic. ++ * ++ * Make a thread periodic by programming its first release point and ++ * its period in the processor time line. Subsequent calls to ++ * xnthread_wait_period() will delay the thread until the next ++ * periodic release point in the processor timeline is reached. ++ * ++ * @param thread The core thread to make periodic. If NULL, the ++ * current thread is assumed. ++ * ++ * @param idate The initial (absolute) date of the first release ++ * point, expressed in nanoseconds. The affected thread will be ++ * delayed by the first call to xnthread_wait_period() until this ++ * point is reached. If @a idate is equal to XN_INFINITE, the first ++ * release point is set to @a period nanoseconds after the current ++ * date. In the latter case, @a timeout_mode is not considered and can ++ * have any valid value. ++ * ++ * @param timeout_mode The mode of the @a idate parameter. It can ++ * either be set to XN_ABSOLUTE or XN_REALTIME with @a idate different ++ * from XN_INFINITE (see also xntimer_start()). ++ * ++ * @param period The period of the thread, expressed in nanoseconds. ++ * As a side-effect, passing XN_INFINITE attempts to stop the thread's ++ * periodic timer; in the latter case, the routine always exits ++ * succesfully, regardless of the previous state of this timer. ++ * ++ * @return 0 is returned upon success. Otherwise: ++ * ++ * - -ETIMEDOUT is returned @a idate is different from XN_INFINITE and ++ * represents a date in the past. ++ * ++ * - -EINVAL is returned if @a period is different from XN_INFINITE ++ * but shorter than the scheduling latency value for the target ++ * system, as available from /proc/xenomai/latency. -EINVAL is also ++ * returned if @a timeout_mode is not compatible with @a idate, such ++ * as XN_RELATIVE with @a idate different from XN_INFINITE. ++ * ++ * - -EPERM is returned if @a thread is NULL, but the caller is not a ++ * Xenomai thread. ++ * ++ * @coretags{task-unrestricted} ++ */ ++int xnthread_set_periodic(struct xnthread *thread, xnticks_t idate, ++ xntmode_t timeout_mode, xnticks_t period) ++{ ++ int ret = 0; ++ spl_t s; ++ ++ if (thread == NULL) { ++ thread = xnthread_current(); ++ if (thread == NULL) ++ return -EPERM; ++ } ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ if (period == XN_INFINITE) { ++ if (xntimer_running_p(&thread->ptimer)) ++ xntimer_stop(&thread->ptimer); ++ ++ goto unlock_and_exit; ++ } ++ ++ /* ++ * LART: detect periods which are shorter than the core clock ++ * gravity for kernel thread timers. This can't work, caller ++ * must have messed up arguments. ++ */ ++ if (period < xnclock_ticks_to_ns(&nkclock, ++ xnclock_get_gravity(&nkclock, kernel))) { ++ ret = -EINVAL; ++ goto unlock_and_exit; ++ } ++ ++ xntimer_set_affinity(&thread->ptimer, thread->sched); ++ ++ if (idate == XN_INFINITE) ++ xntimer_start(&thread->ptimer, period, period, XN_RELATIVE); ++ else { ++ if (timeout_mode == XN_REALTIME) ++ idate -= xnclock_get_offset(xntimer_clock(&thread->ptimer)); ++ else if (timeout_mode != XN_ABSOLUTE) { ++ ret = -EINVAL; ++ goto unlock_and_exit; ++ } ++ ret = xntimer_start(&thread->ptimer, idate, period, ++ XN_ABSOLUTE); ++ } ++ ++unlock_and_exit: ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return ret; ++} ++EXPORT_SYMBOL_GPL(xnthread_set_periodic); ++ ++/** ++ * @fn int xnthread_wait_period(unsigned long *overruns_r) ++ * @brief Wait for the next periodic release point. ++ * ++ * Make the current thread wait for the next periodic release point in ++ * the processor time line. ++ * ++ * @param overruns_r If non-NULL, @a overruns_r must be a pointer to a ++ * memory location which will be written with the count of pending ++ * overruns. This value is copied only when xnthread_wait_period() ++ * returns -ETIMEDOUT or success; the memory location remains ++ * unmodified otherwise. If NULL, this count will never be copied ++ * back. ++ * ++ * @return 0 is returned upon success; if @a overruns_r is valid, zero ++ * is copied to the pointed memory location. Otherwise: ++ * ++ * - -EWOULDBLOCK is returned if xnthread_set_periodic() has not ++ * previously been called for the calling thread. ++ * ++ * - -EINTR is returned if xnthread_unblock() has been called for the ++ * waiting thread before the next periodic release point has been ++ * reached. In this case, the overrun counter is reset too. ++ * ++ * - -ETIMEDOUT is returned if the timer has overrun, which indicates ++ * that one or more previous release points have been missed by the ++ * calling thread. If @a overruns_r is valid, the count of pending ++ * overruns is copied to the pointed memory location. ++ * ++ * @coretags{primary-only, might-switch} ++ */ ++int xnthread_wait_period(unsigned long *overruns_r) ++{ ++ unsigned long overruns = 0; ++ struct xnthread *thread; ++ struct xnclock *clock; ++ xnticks_t now; ++ int ret = 0; ++ spl_t s; ++ ++ thread = xnthread_current(); ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ if (unlikely(!xntimer_running_p(&thread->ptimer))) { ++ ret = -EWOULDBLOCK; ++ goto out; ++ } ++ ++ trace_cobalt_thread_wait_period(thread); ++ ++ clock = xntimer_clock(&thread->ptimer); ++ now = xnclock_read_raw(clock); ++ if (likely((xnsticks_t)(now - xntimer_pexpect(&thread->ptimer)) < 0)) { ++ xnthread_suspend(thread, XNDELAY, XN_INFINITE, XN_RELATIVE, NULL); ++ if (unlikely(xnthread_test_info(thread, XNBREAK))) { ++ ret = -EINTR; ++ goto out; ++ } ++ ++ now = xnclock_read_raw(clock); ++ } ++ ++ overruns = xntimer_get_overruns(&thread->ptimer, thread, now); ++ if (overruns) { ++ ret = -ETIMEDOUT; ++ trace_cobalt_thread_missed_period(thread); ++ } ++ ++ if (likely(overruns_r != NULL)) ++ *overruns_r = overruns; ++ out: ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return ret; ++} ++EXPORT_SYMBOL_GPL(xnthread_wait_period); ++ ++/** ++ * @fn int xnthread_set_slice(struct xnthread *thread, xnticks_t quantum) ++ * @brief Set thread time-slicing information. ++ * ++ * Update the time-slicing information for a given thread. This ++ * service enables or disables round-robin scheduling for the thread, ++ * depending on the value of @a quantum. By default, times-slicing is ++ * disabled for a new thread initialized by a call to xnthread_init(). ++ * ++ * @param thread The descriptor address of the affected thread. ++ * ++ * @param quantum The time quantum assigned to the thread expressed in ++ * nanoseconds. If @a quantum is different from XN_INFINITE, the ++ * time-slice for the thread is set to that value and its current time ++ * credit is refilled (i.e. the thread is given a full time-slice to ++ * run next). Otherwise, if @a quantum equals XN_INFINITE, ++ * time-slicing is stopped for that thread. ++ * ++ * @return 0 is returned upon success. Otherwise, -EINVAL is returned ++ * if @a quantum is not XN_INFINITE and: ++ * ++ * - the base scheduling class of the target thread does not support ++ * time-slicing, ++ * ++ * - @a quantum is smaller than the master clock gravity for a user ++ * thread, which denotes a spurious value. ++ * ++ * @coretags{task-unrestricted} ++ */ ++int xnthread_set_slice(struct xnthread *thread, xnticks_t quantum) ++{ ++ struct xnsched *sched; ++ spl_t s; ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ sched = thread->sched; ++ thread->rrperiod = quantum; ++ ++ if (quantum != XN_INFINITE) { ++ if (quantum <= xnclock_get_gravity(&nkclock, user) || ++ thread->base_class->sched_tick == NULL) { ++ xnlock_put_irqrestore(&nklock, s); ++ return -EINVAL; ++ } ++ xnthread_set_state(thread, XNRRB); ++ if (sched->curr == thread) ++ xntimer_start(&sched->rrbtimer, ++ quantum, XN_INFINITE, XN_RELATIVE); ++ } else { ++ xnthread_clear_state(thread, XNRRB); ++ if (sched->curr == thread) ++ xntimer_stop(&sched->rrbtimer); ++ } ++ ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(xnthread_set_slice); ++ ++/** ++ * @fn void xnthread_cancel(struct xnthread *thread) ++ * @brief Cancel a thread. ++ * ++ * Request cancellation of a thread. This service forces @a thread to ++ * exit from any blocking call, then to switch to secondary mode. ++ * @a thread will terminate as soon as it reaches a cancellation ++ * point. Cancellation points are defined for the following ++ * situations: ++ * ++ * - @a thread self-cancels by a call to xnthread_cancel(). ++ * - @a thread invokes a Linux syscall (user-space shadow only). ++ * - @a thread receives a Linux signal (user-space shadow only). ++ * - @a thread unblocks from a Xenomai syscall (user-space shadow only). ++ * - @a thread attempts to block on a Xenomai syscall (user-space shadow only). ++ * - @a thread explicitly calls xnthread_test_cancel(). ++ * ++ * @param thread The descriptor address of the thread to terminate. ++ * ++ * @coretags{task-unrestricted, might-switch} ++ * ++ * @note In addition to the common actions taken upon cancellation, a ++ * thread which belongs to the SCHED_WEAK class is sent a regular ++ * SIGTERM signal. ++ */ ++void xnthread_cancel(struct xnthread *thread) ++{ ++ spl_t s; ++ ++ /* Right, so you want to kill the kernel?! */ ++ XENO_BUG_ON(COBALT, xnthread_test_state(thread, XNROOT)); ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ if (xnthread_test_info(thread, XNCANCELD)) ++ goto check_self_cancel; ++ ++ trace_cobalt_thread_cancel(thread); ++ ++ xnthread_set_info(thread, XNCANCELD); ++ ++ /* ++ * If @thread is not started yet, fake a start request, ++ * raising the kicked condition bit to make sure it will reach ++ * xnthread_test_cancel() on its wakeup path. ++ */ ++ if (xnthread_test_state(thread, XNDORMANT)) { ++ xnthread_set_info(thread, XNKICKED); ++ xnthread_resume(thread, XNDORMANT); ++ goto out; ++ } ++ ++check_self_cancel: ++ if (xnthread_current() == thread) { ++ xnlock_put_irqrestore(&nklock, s); ++ xnthread_test_cancel(); ++ /* ++ * May return if on behalf of an IRQ handler which has ++ * preempted @thread. ++ */ ++ return; ++ } ++ ++ /* ++ * Force the non-current thread to exit: ++ * ++ * - unblock a user thread, switch it to weak scheduling, ++ * then send it SIGTERM. ++ * ++ * - just unblock a kernel thread, it is expected to reach a ++ * cancellation point soon after ++ * (i.e. xnthread_test_cancel()). ++ */ ++ if (xnthread_test_state(thread, XNUSER)) { ++ __xnthread_demote(thread); ++ xnthread_signal(thread, SIGTERM, 0); ++ } else ++ __xnthread_kick(thread); ++out: ++ xnsched_run(); ++ ++ xnlock_put_irqrestore(&nklock, s); ++} ++EXPORT_SYMBOL_GPL(xnthread_cancel); ++ ++struct wait_grace_struct { ++ struct completion done; ++ struct rcu_head rcu; ++}; ++ ++static void grace_elapsed(struct rcu_head *head) ++{ ++ struct wait_grace_struct *wgs; ++ ++ wgs = container_of(head, struct wait_grace_struct, rcu); ++ complete(&wgs->done); ++} ++ ++static void wait_for_rcu_grace_period(struct pid *pid) ++{ ++ struct wait_grace_struct wait = { ++ .done = COMPLETION_INITIALIZER_ONSTACK(wait.done), ++ }; ++ struct task_struct *p; ++ ++ init_rcu_head_on_stack(&wait.rcu); ++ ++ for (;;) { ++ call_rcu(&wait.rcu, grace_elapsed); ++ wait_for_completion(&wait.done); ++ if (pid == NULL) ++ break; ++ rcu_read_lock(); ++ p = pid_task(pid, PIDTYPE_PID); ++ rcu_read_unlock(); ++ if (p == NULL) ++ break; ++ reinit_completion(&wait.done); ++ } ++} ++ ++/** ++ * @fn void xnthread_join(struct xnthread *thread, bool uninterruptible) ++ * @brief Join with a terminated thread. ++ * ++ * This service waits for @a thread to terminate after a call to ++ * xnthread_cancel(). If that thread has already terminated or is ++ * dormant at the time of the call, then xnthread_join() returns ++ * immediately. ++ * ++ * xnthread_join() adapts to the calling context (primary or ++ * secondary), switching to secondary mode if needed for the duration ++ * of the wait. Upon return, the original runtime mode is restored, ++ * unless a Linux signal is pending. ++ * ++ * @param thread The descriptor address of the thread to join with. ++ * ++ * @param uninterruptible Boolean telling whether the service should ++ * wait for completion uninterruptible. ++ * ++ * @return 0 is returned on success. Otherwise, the following error ++ * codes indicate the cause of the failure: ++ * ++ * - -EDEADLK is returned if the current thread attempts to join ++ * itself. ++ * ++ * - -EINTR is returned if the current thread was unblocked while ++ * waiting for @a thread to terminate. ++ * ++ * - -EBUSY indicates that another thread is already waiting for @a ++ * thread to terminate. ++ * ++ * @coretags{task-unrestricted, might-switch} ++ */ ++int xnthread_join(struct xnthread *thread, bool uninterruptible) ++{ ++ struct xnthread *curr = xnthread_current(); ++ int ret = 0, switched = 0; ++ struct pid *pid; ++ pid_t tpid; ++ spl_t s; ++ ++ XENO_BUG_ON(COBALT, xnthread_test_state(thread, XNROOT)); ++ ++ if (thread == curr) ++ return -EDEADLK; ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ if (xnthread_test_state(thread, XNJOINED)) { ++ ret = -EBUSY; ++ goto out; ++ } ++ ++ if (xnthread_test_info(thread, XNDORMANT)) ++ goto out; ++ ++ trace_cobalt_thread_join(thread); ++ ++ xnthread_set_state(thread, XNJOINED); ++ tpid = xnthread_host_pid(thread); ++ ++ if (curr && !xnthread_test_state(curr, XNRELAX)) { ++ xnlock_put_irqrestore(&nklock, s); ++ xnthread_relax(0, 0); ++ switched = 1; ++ } else ++ xnlock_put_irqrestore(&nklock, s); ++ ++ /* ++ * Since in theory, we might be sleeping there for a long ++ * time, we get a reference on the pid struct holding our ++ * target, then we check for its existence upon wake up. ++ */ ++ pid = find_get_pid(tpid); ++ if (pid == NULL) ++ goto done; ++ ++ /* ++ * We have a tricky issue to deal with, which involves code ++ * relying on the assumption that a destroyed thread will have ++ * scheduled away from do_exit() before xnthread_join() ++ * returns. A typical example is illustrated by the following ++ * sequence, with a RTDM kernel task implemented in a ++ * dynamically loaded module: ++ * ++ * CPU0: rtdm_task_destroy(ktask) ++ * xnthread_cancel(ktask) ++ * xnthread_join(ktask) ++ * ..... ++ * rmmod(module) ++ * ++ * CPU1: in ktask() ++ * ... ++ * ... ++ * __xnthread_test_cancel() ++ * do_exit() ++ * schedule() ++ * ++ * In such a sequence, the code on CPU0 would expect the RTDM ++ * task to have scheduled away upon return from ++ * rtdm_task_destroy(), so that unmapping the destroyed task ++ * code and data memory when unloading the module is always ++ * safe. ++ * ++ * To address this, the joiner first waits for the joinee to ++ * signal completion from the Cobalt thread cleanup handler ++ * (__xnthread_cleanup), then waits for a full RCU grace ++ * period to have elapsed. Since the completion signal is sent ++ * on behalf of do_exit(), we may assume that the joinee has ++ * scheduled away before the RCU grace period ends. ++ */ ++ if (uninterruptible) ++ wait_for_completion(&thread->exited); ++ else { ++ ret = wait_for_completion_interruptible(&thread->exited); ++ if (ret < 0) { ++ put_pid(pid); ++ return -EINTR; ++ } ++ } ++ ++ /* Make sure the joinee has scheduled away ultimately. */ ++ wait_for_rcu_grace_period(pid); ++ ++ put_pid(pid); ++done: ++ ret = 0; ++ if (switched) ++ ret = xnthread_harden(); ++ ++ return ret; ++out: ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return ret; ++} ++EXPORT_SYMBOL_GPL(xnthread_join); ++ ++#ifdef CONFIG_SMP ++ ++void xnthread_migrate_passive(struct xnthread *thread, struct xnsched *sched) ++{ /* nklocked, IRQs off */ ++ if (thread->sched == sched) ++ return; ++ ++ trace_cobalt_thread_migrate_passive(thread, xnsched_cpu(sched)); ++ /* ++ * Timer migration is postponed until the next timeout happens ++ * for the periodic and rrb timers. The resource timer will be ++ * moved to the right CPU next time it is armed in ++ * xnthread_suspend(). ++ */ ++ xnsched_migrate_passive(thread, sched); ++ ++ xnstat_exectime_reset_stats(&thread->stat.lastperiod); ++} ++ ++#endif /* CONFIG_SMP */ ++ ++/** ++ * @fn int xnthread_set_schedparam(struct xnthread *thread,struct xnsched_class *sched_class,const union xnsched_policy_param *sched_param) ++ * @brief Change the base scheduling parameters of a thread. ++ * ++ * Changes the base scheduling policy and paramaters of a thread. If ++ * the thread is currently blocked, waiting in priority-pending mode ++ * (XNSYNCH_PRIO) for a synchronization object to be signaled, Cobalt ++ * will attempt to reorder the object's wait queue so that it reflects ++ * the new sleeper's priority, unless the XNSYNCH_DREORD flag has been ++ * set for the pended object. ++ * ++ * @param thread The descriptor address of the affected thread. See ++ * note. ++ * ++ * @param sched_class The new scheduling class the thread should be ++ * assigned to. ++ * ++ * @param sched_param The scheduling parameters to set for the thread; ++ * @a sched_param must be valid within the context of @a sched_class. ++ * ++ * It is absolutely required to use this service to change a thread ++ * priority, in order to have all the needed housekeeping chores ++ * correctly performed. i.e. Do *not* call xnsched_set_policy() ++ * directly or worse, change the thread.cprio field by hand in any ++ * case. ++ * ++ * @return 0 is returned on success. Otherwise, a negative error code ++ * indicates the cause of a failure that happened in the scheduling ++ * class implementation for @a sched_class. Invalid parameters passed ++ * into @a sched_param are common causes of error. ++ * ++ * @sideeffect ++ * ++ * - This service does not call the rescheduling procedure but may ++ * affect the state of the run queue for the previous and new ++ * scheduling classes. ++ * ++ * - Assigning the same scheduling class and parameters to a running ++ * or ready thread moves it to the end of the run queue, thus causing ++ * a manual round-robin, except if a priority boost is undergoing. ++ * ++ * @coretags{task-unregistred} ++ * ++ * @note The changes only apply to the Xenomai scheduling parameters ++ * for @a thread. There is no propagation/translation of such changes ++ * to the Linux scheduler for the task mated to the Xenomai target ++ * thread. ++ */ ++int xnthread_set_schedparam(struct xnthread *thread, ++ struct xnsched_class *sched_class, ++ const union xnsched_policy_param *sched_param) ++{ ++ spl_t s; ++ int ret; ++ ++ xnlock_get_irqsave(&nklock, s); ++ ret = __xnthread_set_schedparam(thread, sched_class, sched_param); ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return ret; ++} ++EXPORT_SYMBOL_GPL(xnthread_set_schedparam); ++ ++int __xnthread_set_schedparam(struct xnthread *thread, ++ struct xnsched_class *sched_class, ++ const union xnsched_policy_param *sched_param) ++{ ++ int old_wprio, new_wprio, ret; ++ ++ old_wprio = thread->wprio; ++ ++ ret = xnsched_set_policy(thread, sched_class, sched_param); ++ if (ret) ++ return ret; ++ ++ new_wprio = thread->wprio; ++ ++ /* ++ * If the thread is waiting on a synchronization object, ++ * update its position in the corresponding wait queue, unless ++ * 1) reordering is explicitly disabled, or 2) the (weighted) ++ * priority has not changed (to prevent spurious round-robin ++ * effects). ++ */ ++ if (old_wprio != new_wprio && thread->wchan && ++ (thread->wchan->status & (XNSYNCH_DREORD|XNSYNCH_PRIO)) ++ == XNSYNCH_PRIO) ++ xnsynch_requeue_sleeper(thread); ++ /* ++ * We should not move the thread at the end of its priority ++ * group, if any of these conditions is true: ++ * ++ * - thread is not runnable; ++ * - thread bears the ready bit which means that xnsched_set_policy() ++ * already reordered the run queue; ++ * - thread currently holds the scheduler lock, so we don't want ++ * any round-robin effect to take place; ++ * - a priority boost is undergoing for this thread. ++ */ ++ if (!xnthread_test_state(thread, XNTHREAD_BLOCK_BITS|XNREADY|XNBOOST) && ++ thread->lock_count == 0) ++ xnsched_putback(thread); ++ ++ xnthread_set_info(thread, XNSCHEDP); ++ /* Ask the target thread to call back if relaxed. */ ++ if (xnthread_test_state(thread, XNRELAX)) ++ xnthread_signal(thread, SIGSHADOW, SIGSHADOW_ACTION_HOME); ++ ++ return ret; ++} ++ ++void __xnthread_test_cancel(struct xnthread *curr) ++{ ++ /* ++ * Just in case xnthread_test_cancel() is called from an IRQ ++ * handler, in which case we may not take the exit path. ++ * ++ * NOTE: curr->sched is stable from our POV and can't change ++ * under our feet. ++ */ ++ if (curr->sched->lflags & XNINIRQ) ++ return; ++ ++ if (!xnthread_test_state(curr, XNRELAX)) ++ xnthread_relax(0, 0); ++ ++ do_exit(0); ++ /* ... won't return ... */ ++ XENO_BUG(COBALT); ++} ++EXPORT_SYMBOL_GPL(__xnthread_test_cancel); ++ ++/** ++ * @internal ++ * @fn int xnthread_harden(void); ++ * @brief Migrate a Linux task to the Xenomai domain. ++ * ++ * This service causes the transition of "current" from the Linux ++ * domain to Xenomai. The shadow will resume in the Xenomai domain as ++ * returning from schedule(). ++ * ++ * @coretags{secondary-only, might-switch} ++ */ ++int xnthread_harden(void) ++{ ++ struct task_struct *p = current; ++ struct xnthread *thread; ++ struct xnsched *sched; ++ int ret; ++ ++ secondary_mode_only(); ++ ++ thread = xnthread_current(); ++ if (thread == NULL) ++ return -EPERM; ++ ++ if (signal_pending(p)) ++ return -ERESTARTSYS; ++ ++ trace_cobalt_shadow_gohard(thread); ++ ++ xnthread_clear_sync_window(thread, XNRELAX); ++ ++ ret = __ipipe_migrate_head(); ++ if (ret) { ++ xnthread_test_cancel(); ++ xnthread_set_sync_window(thread, XNRELAX); ++ return ret; ++ } ++ ++ /* "current" is now running into the Xenomai domain. */ ++ sched = xnsched_finish_unlocked_switch(thread->sched); ++ xnthread_switch_fpu(sched); ++ ++ xnlock_clear_irqon(&nklock); ++ xnsched_resched_after_unlocked_switch(); ++ xnthread_test_cancel(); ++ ++ trace_cobalt_shadow_hardened(thread); ++ ++ /* ++ * Recheck pending signals once again. As we block task ++ * wakeups during the migration and handle_sigwake_event() ++ * ignores signals until XNRELAX is cleared, any signal ++ * between entering TASK_HARDENING and starting the migration ++ * is just silently queued up to here. ++ */ ++ if (signal_pending(p)) { ++ xnthread_relax(!xnthread_test_state(thread, XNSSTEP), ++ SIGDEBUG_MIGRATE_SIGNAL); ++ return -ERESTARTSYS; ++ } ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(xnthread_harden); ++ ++struct lostage_wakeup { ++ struct ipipe_work_header work; /* Must be first. */ ++ struct task_struct *task; ++}; ++ ++static void lostage_task_wakeup(struct ipipe_work_header *work) ++{ ++ struct lostage_wakeup *rq; ++ struct task_struct *p; ++ ++ rq = container_of(work, struct lostage_wakeup, work); ++ p = rq->task; ++ ++ trace_cobalt_lostage_wakeup(p); ++ ++ wake_up_process(p); ++} ++ ++static void post_wakeup(struct task_struct *p) ++{ ++ struct lostage_wakeup wakework = { ++ .work = { ++ .size = sizeof(wakework), ++ .handler = lostage_task_wakeup, ++ }, ++ .task = p, ++ }; ++ ++ trace_cobalt_lostage_request("wakeup", wakework.task); ++ ++ ipipe_post_work_root(&wakework, work); ++} ++ ++void __xnthread_propagate_schedparam(struct xnthread *curr) ++{ ++ int kpolicy = SCHED_FIFO, kprio = curr->bprio, ret; ++ struct task_struct *p = current; ++ struct sched_param param; ++ spl_t s; ++ ++ /* ++ * Test-set race for XNSCHEDP is ok, the propagation is meant ++ * to be done asap but not guaranteed to be carried out ++ * immediately, and the request will remain pending until it ++ * is eventually handled. We just have to protect against a ++ * set-clear race. ++ */ ++ xnlock_get_irqsave(&nklock, s); ++ xnthread_clear_info(curr, XNSCHEDP); ++ xnlock_put_irqrestore(&nklock, s); ++ ++ /* ++ * Map our policies/priorities to the regular kernel's ++ * (approximated). ++ */ ++ if (xnthread_test_state(curr, XNWEAK) && kprio == 0) ++ kpolicy = SCHED_NORMAL; ++ else if (kprio >= MAX_USER_RT_PRIO) ++ kprio = MAX_USER_RT_PRIO - 1; ++ ++ if (p->policy != kpolicy || (kprio > 0 && p->rt_priority != kprio)) { ++ param.sched_priority = kprio; ++ ret = sched_setscheduler_nocheck(p, kpolicy, ¶m); ++ XENO_WARN_ON(COBALT, ret != 0); ++ } ++} ++ ++/** ++ * @internal ++ * @fn void xnthread_relax(int notify, int reason); ++ * @brief Switch a shadow thread back to the Linux domain. ++ * ++ * This service yields the control of the running shadow back to ++ * Linux. This is obtained by suspending the shadow and scheduling a ++ * wake up call for the mated user task inside the Linux domain. The ++ * Linux task will resume on return from xnthread_suspend() on behalf ++ * of the root thread. ++ * ++ * @param notify A boolean flag indicating whether threads monitored ++ * from secondary mode switches should be sent a SIGDEBUG signal. For ++ * instance, some internal operations like task exit should not ++ * trigger such signal. ++ * ++ * @param reason The reason to report along with the SIGDEBUG signal. ++ * ++ * @coretags{primary-only, might-switch} ++ * ++ * @note "current" is valid here since the shadow runs with the ++ * properties of the Linux task. ++ */ ++void xnthread_relax(int notify, int reason) ++{ ++ struct xnthread *thread = xnthread_current(); ++ struct task_struct *p = current; ++ int suspension = XNRELAX; ++ int cpu __maybe_unused; ++ kernel_siginfo_t si; ++ ++ primary_mode_only(); ++ ++ /* ++ * Enqueue the request to move the running shadow from the Xenomai ++ * domain to the Linux domain. This will cause the Linux task ++ * to resume using the register state of the shadow thread. ++ */ ++ trace_cobalt_shadow_gorelax(reason); ++ ++ /* ++ * If you intend to change the following interrupt-free ++ * sequence, /first/ make sure to check the special handling ++ * of XNRELAX in xnthread_suspend() when switching out the ++ * current thread, not to break basic assumptions we make ++ * there. ++ * ++ * We disable interrupts during the migration sequence, but ++ * xnthread_suspend() has an interrupts-on section built in. ++ */ ++ splmax(); ++ post_wakeup(p); ++ /* ++ * Grab the nklock to synchronize the Linux task state ++ * manipulation with handle_sigwake_event. This lock will be ++ * dropped by xnthread_suspend(). ++ */ ++ xnlock_get(&nklock); ++#ifdef IPIPE_KEVT_USERINTRET ++ /* ++ * If the thread is being debugged, record that it should migrate back ++ * in case it resumes in userspace. If it resumes in kernel space, i.e. ++ * over a restarting syscall, the associated hardening will both clear ++ * XNCONTHI and disable the user return notifier again. ++ */ ++ if (xnthread_test_state(thread, XNSSTEP)) { ++ xnthread_set_info(thread, XNCONTHI); ++ ipipe_enable_user_intret_notifier(); ++ suspension |= XNDBGSTOP; ++ } ++#endif ++ set_current_state(p->state & ~TASK_NOWAKEUP); ++ xnthread_run_handler_stack(thread, relax_thread); ++ xnthread_suspend(thread, suspension, XN_INFINITE, XN_RELATIVE, NULL); ++ splnone(); ++ ++ /* ++ * Basic sanity check after an expected transition to secondary ++ * mode. ++ */ ++ XENO_WARN(COBALT, !ipipe_root_p, ++ "xnthread_relax() failed for thread %s[%d]", ++ thread->name, xnthread_host_pid(thread)); ++ ++ __ipipe_reenter_root(); ++ ++ /* Account for secondary mode switch. */ ++ xnstat_counter_inc(&thread->stat.ssw); ++ ++ /* ++ * When relaxing, we check for propagating to the regular ++ * kernel new Cobalt schedparams that might have been set for ++ * us while we were running in primary mode. ++ * ++ * CAUTION: This obviously won't update the schedparams cached ++ * by the glibc for the caller in user-space, but this is the ++ * deal: we don't relax threads which issue ++ * pthread_setschedparam[_ex]() from primary mode, but then ++ * only the kernel side (Cobalt and the host kernel) will be ++ * aware of the change, and glibc might cache obsolete ++ * information. ++ */ ++ xnthread_propagate_schedparam(thread); ++ ++ if (xnthread_test_state(thread, XNUSER) && notify) { ++ if (xnthread_test_state(thread, XNWARN)) { ++ /* Help debugging spurious relaxes. */ ++ xndebug_notify_relax(thread, reason); ++ memset(&si, 0, sizeof(si)); ++ si.si_signo = SIGDEBUG; ++ si.si_code = SI_QUEUE; ++ si.si_int = reason | sigdebug_marker; ++ send_sig_info(SIGDEBUG, &si, p); ++ } ++ xnsynch_detect_boosted_relax(thread); ++ } ++ ++ /* ++ * "current" is now running into the Linux domain on behalf of ++ * the root thread. ++ */ ++ xnthread_sync_window(thread); ++ ++#ifdef CONFIG_SMP ++ if (xnthread_test_localinfo(thread, XNMOVED)) { ++ xnthread_clear_localinfo(thread, XNMOVED); ++ cpu = xnsched_cpu(thread->sched); ++ set_cpus_allowed_ptr(p, cpumask_of(cpu)); ++ } ++#endif ++ /* ++ * After migration there will be no syscall restart (rather a signal ++ * delivery). ++ */ ++ xnthread_clear_localinfo(thread, XNSYSRST); ++ ++ ipipe_clear_thread_flag(TIP_MAYDAY); ++ ++ trace_cobalt_shadow_relaxed(thread); ++} ++EXPORT_SYMBOL_GPL(xnthread_relax); ++ ++struct lostage_signal { ++ struct ipipe_work_header work; /* Must be first. */ ++ struct task_struct *task; ++ int signo, sigval; ++}; ++ ++static inline void do_kthread_signal(struct task_struct *p, ++ struct xnthread *thread, ++ struct lostage_signal *rq) ++{ ++ printk(XENO_WARNING ++ "kernel shadow %s received unhandled signal %d (action=0x%x)\n", ++ thread->name, rq->signo, rq->sigval); ++} ++ ++static void lostage_task_signal(struct ipipe_work_header *work) ++{ ++ struct lostage_signal *rq; ++ struct xnthread *thread; ++ struct task_struct *p; ++ kernel_siginfo_t si; ++ int signo; ++ ++ rq = container_of(work, struct lostage_signal, work); ++ p = rq->task; ++ ++ thread = xnthread_from_task(p); ++ if (thread && !xnthread_test_state(thread, XNUSER)) { ++ do_kthread_signal(p, thread, rq); ++ return; ++ } ++ ++ signo = rq->signo; ++ ++ trace_cobalt_lostage_signal(p, signo); ++ ++ if (signo == SIGSHADOW || signo == SIGDEBUG) { ++ memset(&si, '\0', sizeof(si)); ++ si.si_signo = signo; ++ si.si_code = SI_QUEUE; ++ si.si_int = rq->sigval; ++ send_sig_info(signo, &si, p); ++ } else ++ send_sig(signo, p, 1); ++} ++ ++static int force_wakeup(struct xnthread *thread) /* nklock locked, irqs off */ ++{ ++ int ret = 0; ++ ++ if (xnthread_test_info(thread, XNKICKED)) ++ return 1; ++ ++ if (xnthread_unblock(thread)) { ++ xnthread_set_info(thread, XNKICKED); ++ ret = 1; ++ } ++ ++ /* ++ * CAUTION: we must NOT raise XNBREAK when clearing a forcible ++ * block state, such as XNSUSP, XNHELD. The caller of ++ * xnthread_suspend() we unblock shall proceed as for a normal ++ * return, until it traverses a cancellation point if ++ * XNCANCELD was raised earlier, or calls xnthread_suspend() ++ * which will detect XNKICKED and act accordingly. ++ * ++ * Rationale: callers of xnthread_suspend() may assume that ++ * receiving XNBREAK means that the process that motivated the ++ * blocking did not go to completion. E.g. the wait context ++ * (see. xnthread_prepare_wait()) was NOT posted before ++ * xnsynch_sleep_on() returned, leaving no useful data there. ++ * Therefore, in case only XNSUSP remains set for the thread ++ * on entry to force_wakeup(), after XNPEND was lifted earlier ++ * when the wait went to successful completion (i.e. no ++ * timeout), then we want the kicked thread to know that it ++ * did receive the requested resource, not finding XNBREAK in ++ * its state word. ++ * ++ * Callers of xnthread_suspend() may inquire for XNKICKED to ++ * detect forcible unblocks from XNSUSP, XNHELD, if they ++ * should act upon this case specifically. ++ */ ++ if (xnthread_test_state(thread, XNSUSP|XNHELD)) { ++ xnthread_resume(thread, XNSUSP|XNHELD); ++ xnthread_set_info(thread, XNKICKED); ++ } ++ ++ /* ++ * Tricky cases: ++ * ++ * - a thread which was ready on entry wasn't actually ++ * running, but nevertheless waits for the CPU in primary ++ * mode, so we have to make sure that it will be notified of ++ * the pending break condition as soon as it enters ++ * xnthread_suspend() from a blocking Xenomai syscall. ++ * ++ * - a ready/readied thread on exit may be prevented from ++ * running by the scheduling policy module it belongs ++ * to. Typically, policies enforcing a runtime budget do not ++ * block threads with no budget, but rather keep them out of ++ * their run queue, so that ->sched_pick() won't elect ++ * them. We tell the policy handler about the fact that we do ++ * want such thread to run until it relaxes, whatever this ++ * means internally for the implementation. ++ */ ++ if (xnthread_test_state(thread, XNREADY)) ++ xnsched_kick(thread); ++ ++ return ret; ++} ++ ++void __xnthread_kick(struct xnthread *thread) /* nklock locked, irqs off */ ++{ ++ struct task_struct *p = xnthread_host_task(thread); ++ ++ /* Thread is already relaxed -- nop. */ ++ if (xnthread_test_state(thread, XNRELAX)) ++ return; ++ ++ /* ++ * First, try to kick the thread out of any blocking syscall ++ * Xenomai-wise. If that succeeds, then the thread will relax ++ * on its return path to user-space. ++ */ ++ if (force_wakeup(thread)) ++ return; ++ ++ /* ++ * If that did not work out because the thread was not blocked ++ * (i.e. XNPEND/XNDELAY) in a syscall, then force a mayday ++ * trap. Note that we don't want to send that thread any linux ++ * signal, we only want to force it to switch to secondary ++ * mode asap. ++ * ++ * It could happen that a thread is relaxed on a syscall ++ * return path after it was resumed from self-suspension ++ * (e.g. XNSUSP) then also forced to run a mayday trap right ++ * after: this is still correct, at worst we would get a ++ * useless mayday syscall leading to a no-op, no big deal. ++ */ ++ xnthread_set_info(thread, XNKICKED); ++ ++ /* ++ * We may send mayday signals to userland threads only. ++ * However, no need to run a mayday trap if the current thread ++ * kicks itself out of primary mode: it will relax on its way ++ * back to userland via the current syscall ++ * epilogue. Otherwise, we want that thread to enter the ++ * mayday trap asap, to call us back for relaxing. ++ */ ++ if (thread != xnsched_current_thread() && ++ xnthread_test_state(thread, XNUSER)) ++ ipipe_raise_mayday(p); ++} ++ ++void xnthread_kick(struct xnthread *thread) ++{ ++ spl_t s; ++ ++ xnlock_get_irqsave(&nklock, s); ++ __xnthread_kick(thread); ++ xnlock_put_irqrestore(&nklock, s); ++} ++EXPORT_SYMBOL_GPL(xnthread_kick); ++ ++void __xnthread_demote(struct xnthread *thread) /* nklock locked, irqs off */ ++{ ++ struct xnsched_class *sched_class; ++ union xnsched_policy_param param; ++ ++ /* ++ * First we kick the thread out of primary mode, and have it ++ * resume execution immediately over the regular linux ++ * context. ++ */ ++ __xnthread_kick(thread); ++ ++ /* ++ * Then we demote it, turning that thread into a non real-time ++ * Xenomai shadow, which still has access to Xenomai ++ * resources, but won't compete for real-time scheduling ++ * anymore. In effect, moving the thread to a weak scheduling ++ * class/priority will prevent it from sticking back to ++ * primary mode. ++ */ ++#ifdef CONFIG_XENO_OPT_SCHED_WEAK ++ param.weak.prio = 0; ++ sched_class = &xnsched_class_weak; ++#else ++ param.rt.prio = 0; ++ sched_class = &xnsched_class_rt; ++#endif ++ __xnthread_set_schedparam(thread, sched_class, ¶m); ++} ++ ++void xnthread_demote(struct xnthread *thread) ++{ ++ spl_t s; ++ ++ xnlock_get_irqsave(&nklock, s); ++ __xnthread_demote(thread); ++ xnlock_put_irqrestore(&nklock, s); ++} ++EXPORT_SYMBOL_GPL(xnthread_demote); ++ ++void xnthread_signal(struct xnthread *thread, int sig, int arg) ++{ ++ struct lostage_signal sigwork = { ++ .work = { ++ .size = sizeof(sigwork), ++ .handler = lostage_task_signal, ++ }, ++ .task = xnthread_host_task(thread), ++ .signo = sig, ++ .sigval = sig == SIGDEBUG ? arg | sigdebug_marker : arg, ++ }; ++ ++ trace_cobalt_lostage_request("signal", sigwork.task); ++ ++ ipipe_post_work_root(&sigwork, work); ++} ++EXPORT_SYMBOL_GPL(xnthread_signal); ++ ++void xnthread_pin_initial(struct xnthread *thread) ++{ ++ struct task_struct *p = current; ++ struct xnsched *sched; ++ int cpu; ++ spl_t s; ++ ++ /* ++ * @thread is the Xenomai extension of the current kernel ++ * task. If the current CPU is part of the affinity mask of ++ * this thread, pin the latter on this CPU. Otherwise pin it ++ * to the first CPU of that mask. ++ */ ++ cpu = task_cpu(p); ++ if (!cpumask_test_cpu(cpu, &thread->affinity)) ++ cpu = cpumask_first(&thread->affinity); ++ ++ set_cpus_allowed_ptr(p, cpumask_of(cpu)); ++ /* ++ * @thread is still unstarted Xenomai-wise, we are precisely ++ * in the process of mapping the current kernel task to ++ * it. Therefore xnthread_migrate_passive() is the right way ++ * to pin it on a real-time CPU. ++ */ ++ xnlock_get_irqsave(&nklock, s); ++ sched = xnsched_struct(cpu); ++ xnthread_migrate_passive(thread, sched); ++ xnlock_put_irqrestore(&nklock, s); ++} ++ ++struct parent_wakeup_request { ++ struct ipipe_work_header work; /* Must be first. */ ++ struct completion *done; ++}; ++ ++static void do_parent_wakeup(struct ipipe_work_header *work) ++{ ++ struct parent_wakeup_request *rq; ++ ++ rq = container_of(work, struct parent_wakeup_request, work); ++ complete(rq->done); ++} ++ ++static inline void wakeup_parent(struct completion *done) ++{ ++ struct parent_wakeup_request wakework = { ++ .work = { ++ .size = sizeof(wakework), ++ .handler = do_parent_wakeup, ++ }, ++ .done = done, ++ }; ++ ++ trace_cobalt_lostage_request("wakeup", current); ++ ++ ipipe_post_work_root(&wakework, work); ++} ++ ++static inline void init_kthread_info(struct xnthread *thread) ++{ ++ struct ipipe_threadinfo *p; ++ ++ p = ipipe_current_threadinfo(); ++ p->thread = thread; ++ p->process = NULL; ++} ++ ++/** ++ * @fn int xnthread_map(struct xnthread *thread, struct completion *done) ++ * @internal ++ * @brief Create a shadow thread context over a kernel task. ++ * ++ * This call maps a Cobalt core thread to the "current" Linux task ++ * running in kernel space. The priority and scheduling class of the ++ * underlying Linux task are not affected; it is assumed that the ++ * caller did set them appropriately before issuing the shadow mapping ++ * request. ++ * ++ * This call immediately moves the calling kernel thread to the ++ * Xenomai domain. ++ * ++ * @param thread The descriptor address of the new shadow thread to be ++ * mapped to "current". This descriptor must have been previously ++ * initialized by a call to xnthread_init(). ++ * ++ * @param done A completion object to be signaled when @a thread is ++ * fully mapped over the current Linux context, waiting for ++ * xnthread_start(). ++ * ++ * @return 0 is returned on success. Otherwise: ++ * ++ * - -ERESTARTSYS is returned if the current Linux task has received a ++ * signal, thus preventing the final migration to the Xenomai domain ++ * (i.e. in order to process the signal in the Linux domain). This ++ * error should not be considered as fatal. ++ * ++ * - -EPERM is returned if the shadow thread has been killed before ++ * the current task had a chance to return to the caller. In such a ++ * case, the real-time mapping operation has failed globally, and no ++ * Xenomai resource remains attached to it. ++ * ++ * - -EINVAL is returned if the thread control block bears the XNUSER ++ * bit. ++ * ++ * - -EBUSY is returned if either the current Linux task or the ++ * associated shadow thread is already involved in a shadow mapping. ++ * ++ * @coretags{secondary-only, might-switch} ++ */ ++int xnthread_map(struct xnthread *thread, struct completion *done) ++{ ++ struct task_struct *p = current; ++ int ret; ++ spl_t s; ++ ++ if (xnthread_test_state(thread, XNUSER)) ++ return -EINVAL; ++ ++ if (xnthread_current() || xnthread_test_state(thread, XNMAPPED)) ++ return -EBUSY; ++ ++ thread->u_window = NULL; ++ xnthread_pin_initial(thread); ++ ++ xnthread_init_shadow_tcb(thread); ++ xnthread_suspend(thread, XNRELAX, XN_INFINITE, XN_RELATIVE, NULL); ++ init_kthread_info(thread); ++ xnthread_set_state(thread, XNMAPPED); ++ xndebug_shadow_init(thread); ++ xnthread_run_handler(thread, map_thread); ++ ipipe_enable_notifier(p); ++ ++ /* ++ * CAUTION: Soon after xnthread_init() has returned, ++ * xnthread_start() is commonly invoked from the root domain, ++ * therefore the call site may expect the started kernel ++ * shadow to preempt immediately. As a result of such ++ * assumption, start attributes (struct xnthread_start_attr) ++ * are often laid on the caller's stack. ++ * ++ * For this reason, we raise the completion signal to wake up ++ * the xnthread_init() caller only once the emerging thread is ++ * hardened, and __never__ before that point. Since we run ++ * over the Xenomai domain upon return from xnthread_harden(), ++ * we schedule a virtual interrupt handler in the root domain ++ * to signal the completion object. ++ */ ++ xnthread_resume(thread, XNDORMANT); ++ ret = xnthread_harden(); ++ wakeup_parent(done); ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ enlist_new_thread(thread); ++ /* ++ * Make sure xnthread_start() did not slip in from another CPU ++ * while we were back from wakeup_parent(). ++ */ ++ if (thread->entry == NULL) ++ xnthread_suspend(thread, XNDORMANT, ++ XN_INFINITE, XN_RELATIVE, NULL); ++ ++ xnlock_put_irqrestore(&nklock, s); ++ ++ xnthread_test_cancel(); ++ ++ xntrace_pid(xnthread_host_pid(thread), ++ xnthread_current_priority(thread)); ++ ++ return ret; ++} ++EXPORT_SYMBOL_GPL(xnthread_map); ++ ++/* nklock locked, irqs off */ ++void xnthread_call_mayday(struct xnthread *thread, int reason) ++{ ++ struct task_struct *p = xnthread_host_task(thread); ++ ++ /* Mayday traps are available to userland threads only. */ ++ XENO_BUG_ON(COBALT, !xnthread_test_state(thread, XNUSER)); ++ xnthread_set_info(thread, XNKICKED); ++ xnthread_signal(thread, SIGDEBUG, reason); ++ ipipe_raise_mayday(p); ++} ++EXPORT_SYMBOL_GPL(xnthread_call_mayday); ++ ++int xnthread_killall(int grace, int mask) ++{ ++ struct xnthread *t, *curr = xnthread_current(); ++ int nrkilled = 0, nrthreads, count; ++ long ret; ++ spl_t s; ++ ++ secondary_mode_only(); ++ ++ /* ++ * We may hold the core lock across calls to xnthread_cancel() ++ * provided that we won't self-cancel. ++ */ ++ xnlock_get_irqsave(&nklock, s); ++ ++ nrthreads = cobalt_nrthreads; ++ ++ xnsched_for_each_thread(t) { ++ if (xnthread_test_state(t, XNROOT) || ++ xnthread_test_state(t, mask) != mask || ++ t == curr) ++ continue; ++ ++ if (XENO_DEBUG(COBALT)) ++ printk(XENO_INFO "terminating %s[%d]\n", ++ t->name, xnthread_host_pid(t)); ++ nrkilled++; ++ xnthread_cancel(t); ++ } ++ ++ xnlock_put_irqrestore(&nklock, s); ++ ++ /* ++ * Cancel then join all existing threads during the grace ++ * period. It is the caller's responsibility to prevent more ++ * threads to bind to the system if required, we won't make ++ * any provision for this here. ++ */ ++ count = nrthreads - nrkilled; ++ if (XENO_DEBUG(COBALT)) ++ printk(XENO_INFO "waiting for %d threads to exit\n", ++ nrkilled); ++ ++ if (grace > 0) { ++ ret = wait_event_interruptible_timeout(join_all, ++ cobalt_nrthreads == count, ++ grace * HZ); ++ if (ret == 0) ++ return -EAGAIN; ++ } else ++ ret = wait_event_interruptible(join_all, ++ cobalt_nrthreads == count); ++ ++ /* Wait for a full RCU grace period to expire. */ ++ wait_for_rcu_grace_period(NULL); ++ ++ if (XENO_DEBUG(COBALT)) ++ printk(XENO_INFO "joined %d threads\n", ++ count + nrkilled - cobalt_nrthreads); ++ ++ return ret < 0 ? -EINTR : 0; ++} ++EXPORT_SYMBOL_GPL(xnthread_killall); ++ ++/* Xenomai's generic personality. */ ++struct xnthread_personality xenomai_personality = { ++ .name = "core", ++ .magic = -1 ++}; ++EXPORT_SYMBOL_GPL(xenomai_personality); ++ ++/** @} */ +--- linux/kernel/xenomai/timer.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/timer.c 2021-04-07 16:01:25.780636234 +0800 +@@ -0,0 +1,982 @@ ++/* ++ * Copyright (C) 2001,2002,2003,2007,2012 Philippe Gerum . ++ * Copyright (C) 2004 Gilles Chanteperdrix ++ * ++ * Xenomai is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/** ++ * @ingroup cobalt_core ++ * @defgroup cobalt_core_timer Timer services ++ * ++ * The Xenomai timer facility depends on a clock source (xnclock) for ++ * scheduling the next activation times. ++ * ++ * The core provides and depends on a monotonic clock source (nkclock) ++ * with nanosecond resolution, driving the platform timer hardware ++ * exposed by the interrupt pipeline. ++ * ++ * @{ ++ */ ++ ++int xntimer_heading_p(struct xntimer *timer) ++{ ++ struct xnsched *sched = timer->sched; ++ xntimerq_t *q; ++ xntimerh_t *h; ++ ++ q = xntimer_percpu_queue(timer); ++ h = xntimerq_head(q); ++ if (h == &timer->aplink) ++ return 1; ++ ++ if (sched->lflags & XNHDEFER) { ++ h = xntimerq_second(q, h); ++ if (h == &timer->aplink) ++ return 1; ++ } ++ ++ return 0; ++} ++ ++void xntimer_enqueue_and_program(struct xntimer *timer, xntimerq_t *q) ++{ ++ xntimer_enqueue(timer, q); ++ if (xntimer_heading_p(timer)) { ++ struct xnsched *sched = xntimer_sched(timer); ++ struct xnclock *clock = xntimer_clock(timer); ++ if (sched != xnsched_current()) ++ xnclock_remote_shot(clock, sched); ++ else ++ xnclock_program_shot(clock, sched); ++ } ++} ++ ++/** ++ * Arm a timer. ++ * ++ * Activates a timer so that the associated timeout handler will be ++ * fired after each expiration time. A timer can be either periodic or ++ * one-shot, depending on the reload value passed to this routine. The ++ * given timer must have been previously initialized. ++ * ++ * A timer is attached to the clock specified in xntimer_init(). ++ * ++ * @param timer The address of a valid timer descriptor. ++ * ++ * @param value The date of the initial timer shot, expressed in ++ * nanoseconds. ++ * ++ * @param interval The reload value of the timer. It is a periodic ++ * interval value to be used for reprogramming the next timer shot, ++ * expressed in nanoseconds. If @a interval is equal to XN_INFINITE, ++ * the timer will not be reloaded after it has expired. ++ * ++ * @param mode The timer mode. It can be XN_RELATIVE if @a value shall ++ * be interpreted as a relative date, XN_ABSOLUTE for an absolute date ++ * based on the monotonic clock of the related time base (as returned ++ * my xnclock_read_monotonic()), or XN_REALTIME if the absolute date ++ * is based on the adjustable real-time date for the relevant clock ++ * (obtained from xnclock_read_realtime()). ++ * ++ * @return 0 is returned upon success, or -ETIMEDOUT if an absolute ++ * date in the past has been given. In such an event, the timer is ++ * nevertheless armed for the next shot in the timeline if @a interval ++ * is different from XN_INFINITE. ++ * ++ * @coretags{unrestricted, atomic-entry} ++ */ ++int xntimer_start(struct xntimer *timer, ++ xnticks_t value, xnticks_t interval, ++ xntmode_t mode) ++{ ++ struct xnclock *clock = xntimer_clock(timer); ++ xntimerq_t *q = xntimer_percpu_queue(timer); ++ xnticks_t date, now, delay, period; ++ unsigned long gravity; ++ int ret = 0; ++ ++ trace_cobalt_timer_start(timer, value, interval, mode); ++ ++ if ((timer->status & XNTIMER_DEQUEUED) == 0) ++ xntimer_dequeue(timer, q); ++ ++ now = xnclock_read_raw(clock); ++ ++ timer->status &= ~(XNTIMER_REALTIME | XNTIMER_FIRED | XNTIMER_PERIODIC); ++ switch (mode) { ++ case XN_RELATIVE: ++ if ((xnsticks_t)value < 0) ++ return -ETIMEDOUT; ++ date = xnclock_ns_to_ticks(clock, value) + now; ++ break; ++ case XN_REALTIME: ++ timer->status |= XNTIMER_REALTIME; ++ value -= xnclock_get_offset(clock); ++ /* fall through */ ++ default: /* XN_ABSOLUTE || XN_REALTIME */ ++ date = xnclock_ns_to_ticks(clock, value); ++ if ((xnsticks_t)(date - now) <= 0) { ++ if (interval == XN_INFINITE) ++ return -ETIMEDOUT; ++ /* ++ * We are late on arrival for the first ++ * delivery, wait for the next shot on the ++ * periodic time line. ++ */ ++ delay = now - date; ++ period = xnclock_ns_to_ticks(clock, interval); ++ date += period * (xnarch_div64(delay, period) + 1); ++ } ++ break; ++ } ++ ++ /* ++ * To cope with the basic system latency, we apply a clock ++ * gravity value, which is the amount of time expressed in ++ * clock ticks by which we should anticipate the shot for any ++ * outstanding timer. The gravity value varies with the type ++ * of context the timer wakes up, i.e. irq handler, kernel or ++ * user thread. ++ */ ++ gravity = xntimer_gravity(timer); ++ xntimerh_date(&timer->aplink) = date - gravity; ++ if (now >= xntimerh_date(&timer->aplink)) ++ xntimerh_date(&timer->aplink) += gravity / 2; ++ ++ timer->interval_ns = XN_INFINITE; ++ timer->interval = XN_INFINITE; ++ if (interval != XN_INFINITE) { ++ timer->interval_ns = interval; ++ timer->interval = xnclock_ns_to_ticks(clock, interval); ++ timer->periodic_ticks = 0; ++ timer->start_date = date; ++ timer->pexpect_ticks = 0; ++ timer->status |= XNTIMER_PERIODIC; ++ } ++ ++ timer->status |= XNTIMER_RUNNING; ++ xntimer_enqueue_and_program(timer, q); ++ ++ return ret; ++} ++EXPORT_SYMBOL_GPL(xntimer_start); ++ ++/** ++ * @fn int xntimer_stop(struct xntimer *timer) ++ * ++ * @brief Disarm a timer. ++ * ++ * This service deactivates a timer previously armed using ++ * xntimer_start(). Once disarmed, the timer can be subsequently ++ * re-armed using the latter service. ++ * ++ * @param timer The address of a valid timer descriptor. ++ * ++ * @coretags{unrestricted, atomic-entry} ++ */ ++void __xntimer_stop(struct xntimer *timer) ++{ ++ struct xnclock *clock = xntimer_clock(timer); ++ xntimerq_t *q = xntimer_percpu_queue(timer); ++ struct xnsched *sched; ++ int heading = 1; ++ ++ trace_cobalt_timer_stop(timer); ++ ++ if ((timer->status & XNTIMER_DEQUEUED) == 0) { ++ heading = xntimer_heading_p(timer); ++ xntimer_dequeue(timer, q); ++ } ++ timer->status &= ~(XNTIMER_FIRED|XNTIMER_RUNNING); ++ sched = xntimer_sched(timer); ++ ++ /* ++ * If we removed the heading timer, reprogram the next shot if ++ * any. If the timer was running on another CPU, let it tick. ++ */ ++ if (heading && sched == xnsched_current()) ++ xnclock_program_shot(clock, sched); ++} ++EXPORT_SYMBOL_GPL(__xntimer_stop); ++ ++/** ++ * @fn xnticks_t xntimer_get_date(struct xntimer *timer) ++ * ++ * @brief Return the absolute expiration date. ++ * ++ * Return the next expiration date of a timer as an absolute count of ++ * nanoseconds. ++ * ++ * @param timer The address of a valid timer descriptor. ++ * ++ * @return The expiration date in nanoseconds. The special value ++ * XN_INFINITE is returned if @a timer is currently disabled. ++ * ++ * @coretags{unrestricted, atomic-entry} ++ */ ++xnticks_t xntimer_get_date(struct xntimer *timer) ++{ ++ if (!xntimer_running_p(timer)) ++ return XN_INFINITE; ++ ++ return xnclock_ticks_to_ns(xntimer_clock(timer), xntimer_expiry(timer)); ++} ++EXPORT_SYMBOL_GPL(xntimer_get_date); ++ ++/** ++ * @fn xnticks_t xntimer_get_timeout(struct xntimer *timer) ++ * ++ * @brief Return the relative expiration date. ++ * ++ * This call returns the count of nanoseconds remaining until the ++ * timer expires. ++ * ++ * @param timer The address of a valid timer descriptor. ++ * ++ * @return The count of nanoseconds until expiry. The special value ++ * XN_INFINITE is returned if @a timer is currently disabled. It ++ * might happen that the timer expires when this service runs (even if ++ * the associated handler has not been fired yet); in such a case, 1 ++ * is returned. ++ * ++ * @coretags{unrestricted, atomic-entry} ++ */ ++xnticks_t __xntimer_get_timeout(struct xntimer *timer) ++{ ++ struct xnclock *clock; ++ xnticks_t expiry, now; ++ ++ clock = xntimer_clock(timer); ++ now = xnclock_read_raw(clock); ++ expiry = xntimer_expiry(timer); ++ if (expiry < now) ++ return 1; /* Will elapse shortly. */ ++ ++ return xnclock_ticks_to_ns(clock, expiry - now); ++} ++EXPORT_SYMBOL_GPL(__xntimer_get_timeout); ++ ++/** ++ * @fn void xntimer_init(struct xntimer *timer,struct xnclock *clock,void (*handler)(struct xntimer *timer), struct xnsched *sched, int flags) ++ * @brief Initialize a timer object. ++ * ++ * Creates a timer. When created, a timer is left disarmed; it must be ++ * started using xntimer_start() in order to be activated. ++ * ++ * @param timer The address of a timer descriptor the nucleus will use ++ * to store the object-specific data. This descriptor must always be ++ * valid while the object is active therefore it must be allocated in ++ * permanent memory. ++ * ++ * @param clock The clock the timer relates to. Xenomai defines a ++ * monotonic system clock, with nanosecond resolution, named ++ * nkclock. In addition, external clocks driven by other tick sources ++ * may be created dynamically if CONFIG_XENO_OPT_EXTCLOCK is defined. ++ * ++ * @param handler The routine to call upon expiration of the timer. ++ * ++ * @param sched An optional pointer to the per-CPU scheduler slot the ++ * new timer is affine to. If non-NULL, the timer will fire on the CPU ++ * @a sched is bound to, otherwise it will fire either on the current ++ * CPU if real-time, or on the first real-time CPU. ++ * ++ * @param flags A set of flags describing the timer. A set of clock ++ * gravity hints can be passed via the @a flags argument, used for ++ * optimizing the built-in heuristics aimed at latency reduction: ++ * ++ * - XNTIMER_IGRAVITY, the timer activates a leaf timer handler. ++ * - XNTIMER_KGRAVITY, the timer activates a kernel thread. ++ * - XNTIMER_UGRAVITY, the timer activates a user-space thread. ++ * ++ * There is no limitation on the number of timers which can be ++ * created/active concurrently. ++ * ++ * @coretags{unrestricted} ++ */ ++#ifdef DOXYGEN_CPP ++void xntimer_init(struct xntimer *timer, struct xnclock *clock, ++ void (*handler)(struct xntimer *timer), ++ struct xnsched *sched, ++ int flags); ++#endif ++ ++void __xntimer_init(struct xntimer *timer, ++ struct xnclock *clock, ++ void (*handler)(struct xntimer *timer), ++ struct xnsched *sched, ++ int flags) ++{ ++ spl_t s __maybe_unused; ++ ++#ifdef CONFIG_XENO_OPT_EXTCLOCK ++ timer->clock = clock; ++#endif ++ xntimerh_init(&timer->aplink); ++ xntimerh_date(&timer->aplink) = XN_INFINITE; ++ xntimer_set_priority(timer, XNTIMER_STDPRIO); ++ timer->status = (XNTIMER_DEQUEUED|(flags & XNTIMER_INIT_MASK)); ++ timer->handler = handler; ++ timer->interval_ns = 0; ++ timer->sched = NULL; ++ ++ /* ++ * Set the timer affinity, preferably to xnsched_cpu(sched) if ++ * sched was given, CPU0 otherwise. ++ */ ++ if (sched == NULL) ++ sched = xnsched_struct(0); ++ ++ xntimer_set_affinity(timer, sched); ++ ++#ifdef CONFIG_XENO_OPT_STATS ++#ifdef CONFIG_XENO_OPT_EXTCLOCK ++ timer->tracker = clock; ++#endif ++ ksformat(timer->name, XNOBJECT_NAME_LEN, "%d/%s", ++ task_pid_nr(current), current->comm); ++ xntimer_reset_stats(timer); ++ xnlock_get_irqsave(&nklock, s); ++ list_add_tail(&timer->next_stat, &clock->timerq); ++ clock->nrtimers++; ++ xnvfile_touch(&clock->timer_vfile); ++ xnlock_put_irqrestore(&nklock, s); ++#endif /* CONFIG_XENO_OPT_STATS */ ++} ++EXPORT_SYMBOL_GPL(__xntimer_init); ++ ++void xntimer_set_gravity(struct xntimer *timer, int gravity) ++{ ++ spl_t s; ++ ++ xnlock_get_irqsave(&nklock, s); ++ timer->status &= ~XNTIMER_GRAVITY_MASK; ++ timer->status |= gravity; ++ xnlock_put_irqrestore(&nklock, s); ++} ++EXPORT_SYMBOL_GPL(xntimer_set_gravity); ++ ++#ifdef CONFIG_XENO_OPT_EXTCLOCK ++ ++#ifdef CONFIG_XENO_OPT_STATS ++ ++static void __xntimer_switch_tracking(struct xntimer *timer, ++ struct xnclock *newclock) ++{ ++ struct xnclock *oldclock = timer->tracker; ++ ++ list_del(&timer->next_stat); ++ oldclock->nrtimers--; ++ xnvfile_touch(&oldclock->timer_vfile); ++ list_add_tail(&timer->next_stat, &newclock->timerq); ++ newclock->nrtimers++; ++ xnvfile_touch(&newclock->timer_vfile); ++ timer->tracker = newclock; ++} ++ ++void xntimer_switch_tracking(struct xntimer *timer, ++ struct xnclock *newclock) ++{ ++ spl_t s; ++ ++ xnlock_get_irqsave(&nklock, s); ++ __xntimer_switch_tracking(timer, newclock); ++ xnlock_put_irqrestore(&nklock, s); ++} ++EXPORT_SYMBOL_GPL(xntimer_switch_tracking); ++ ++#else ++ ++static inline ++void __xntimer_switch_tracking(struct xntimer *timer, ++ struct xnclock *newclock) ++{ } ++ ++#endif /* CONFIG_XENO_OPT_STATS */ ++ ++/** ++ * @brief Set the reference clock of a timer. ++ * ++ * This service changes the reference clock pacing a timer. If the ++ * clock timers are tracked, the tracking information is updated too. ++ * ++ * @param timer The address of a valid timer descriptor. ++ * ++ * @param newclock The address of a valid clock descriptor. ++ * ++ * @coretags{unrestricted, atomic-entry} ++ */ ++void xntimer_set_clock(struct xntimer *timer, ++ struct xnclock *newclock) ++{ ++ if (timer->clock != newclock) { ++ xntimer_stop(timer); ++ timer->clock = newclock; ++ /* ++ * Since the timer was stopped, we can wait until it ++ * is restarted for fixing its CPU affinity. ++ */ ++ __xntimer_switch_tracking(timer, newclock); ++ } ++} ++ ++#endif /* CONFIG_XENO_OPT_EXTCLOCK */ ++ ++/** ++ * @fn void xntimer_destroy(struct xntimer *timer) ++ * ++ * @brief Release a timer object. ++ * ++ * Destroys a timer. After it has been destroyed, all resources ++ * associated with the timer have been released. The timer is ++ * automatically deactivated before deletion if active on entry. ++ * ++ * @param timer The address of a valid timer descriptor. ++ * ++ * @coretags{unrestricted} ++ */ ++void xntimer_destroy(struct xntimer *timer) ++{ ++ struct xnclock *clock __maybe_unused = xntimer_clock(timer); ++ spl_t s; ++ ++ xnlock_get_irqsave(&nklock, s); ++ xntimer_stop(timer); ++ timer->status |= XNTIMER_KILLED; ++ timer->sched = NULL; ++#ifdef CONFIG_XENO_OPT_STATS ++ list_del(&timer->next_stat); ++ clock->nrtimers--; ++ xnvfile_touch(&clock->timer_vfile); ++#endif /* CONFIG_XENO_OPT_STATS */ ++ xnlock_put_irqrestore(&nklock, s); ++} ++EXPORT_SYMBOL_GPL(xntimer_destroy); ++ ++#ifdef CONFIG_SMP ++ ++/** ++ * Migrate a timer. ++ * ++ * This call migrates a timer to another cpu. In order to avoid ++ * pathological cases, it must be called from the CPU to which @a ++ * timer is currently attached. ++ * ++ * @param timer The address of the timer object to be migrated. ++ * ++ * @param sched The address of the destination per-CPU scheduler ++ * slot. ++ * ++ * @coretags{unrestricted, atomic-entry} ++ */ ++void __xntimer_migrate(struct xntimer *timer, struct xnsched *sched) ++{ /* nklocked, IRQs off, sched != timer->sched */ ++ struct xnclock *clock; ++ xntimerq_t *q; ++ ++ trace_cobalt_timer_migrate(timer, xnsched_cpu(sched)); ++ ++ /* ++ * This assertion triggers when the timer is migrated to a CPU ++ * for which we do not expect any clock events/IRQs from the ++ * associated clock device. If so, the timer would never fire ++ * since clock ticks would never happen on that CPU. ++ */ ++ XENO_WARN_ON_SMP(COBALT, ++ !cpumask_empty(&xntimer_clock(timer)->affinity) && ++ !cpumask_test_cpu(xnsched_cpu(sched), ++ &xntimer_clock(timer)->affinity)); ++ ++ if (timer->status & XNTIMER_RUNNING) { ++ xntimer_stop(timer); ++ timer->sched = sched; ++ clock = xntimer_clock(timer); ++ q = xntimer_percpu_queue(timer); ++ xntimer_enqueue(timer, q); ++ if (xntimer_heading_p(timer)) ++ xnclock_remote_shot(clock, sched); ++ } else ++ timer->sched = sched; ++} ++EXPORT_SYMBOL_GPL(__xntimer_migrate); ++ ++static inline int get_clock_cpu(struct xnclock *clock, int cpu) ++{ ++ /* ++ * Check a CPU number against the possible set of CPUs ++ * receiving events from the underlying clock device. If the ++ * suggested CPU does not receive events from this device, ++ * return the first one which does instead. ++ * ++ * A global clock device with no particular IRQ affinity may ++ * tick on any CPU, but timers should always be queued on ++ * CPU0. ++ * ++ * NOTE: we have scheduler slots initialized for all online ++ * CPUs, we can program and receive clock ticks on any of ++ * them. So there is no point in restricting the valid CPU set ++ * to cobalt_cpu_affinity, which specifically refers to the ++ * set of CPUs which may run real-time threads. Although ++ * receiving a clock tick for waking up a thread living on a ++ * remote CPU is not optimal since this involves IPI-signaled ++ * rescheds, this is still a valid case. ++ */ ++ if (cpumask_empty(&clock->affinity)) ++ return 0; ++ ++ if (cpumask_test_cpu(cpu, &clock->affinity)) ++ return cpu; ++ ++ return cpumask_first(&clock->affinity); ++} ++ ++void __xntimer_set_affinity(struct xntimer *timer, struct xnsched *sched) ++{ /* nklocked, IRQs off */ ++ struct xnclock *clock = xntimer_clock(timer); ++ int cpu; ++ ++ /* ++ * Figure out which CPU is best suited for managing this ++ * timer, preferably picking xnsched_cpu(sched) if the ticking ++ * device moving the timer clock beats on that CPU. Otherwise, ++ * pick the first CPU from the clock affinity mask if set. If ++ * not, the timer is backed by a global device with no ++ * particular IRQ affinity, so it should always be queued to ++ * CPU0. ++ */ ++ cpu = 0; ++ if (!cpumask_empty(&clock->affinity)) ++ cpu = get_clock_cpu(clock, xnsched_cpu(sched)); ++ ++ xntimer_migrate(timer, xnsched_struct(cpu)); ++} ++EXPORT_SYMBOL_GPL(__xntimer_set_affinity); ++ ++int xntimer_setup_ipi(void) ++{ ++ return ipipe_request_irq(&xnsched_realtime_domain, ++ IPIPE_HRTIMER_IPI, ++ (ipipe_irq_handler_t)xnintr_core_clock_handler, ++ NULL, NULL); ++} ++ ++void xntimer_release_ipi(void) ++{ ++ ipipe_free_irq(&xnsched_realtime_domain, IPIPE_HRTIMER_IPI); ++} ++ ++#endif /* CONFIG_SMP */ ++ ++/** ++ * Get the count of overruns for the last tick. ++ * ++ * This service returns the count of pending overruns for the last ++ * tick of a given timer, as measured by the difference between the ++ * expected expiry date of the timer and the date @a now passed as ++ * argument. ++ * ++ * @param timer The address of a valid timer descriptor. ++ * ++ * @param waiter The thread for which the overrun count is being ++ * collected. ++ * ++ * @param now current date (as ++ * xnclock_read_raw(xntimer_clock(timer))) ++ * ++ * @return the number of overruns of @a timer at date @a now ++ * ++ * @coretags{unrestricted, atomic-entry} ++ */ ++unsigned long long xntimer_get_overruns(struct xntimer *timer, ++ struct xnthread *waiter, ++ xnticks_t now) ++{ ++ xnticks_t period = timer->interval; ++ unsigned long long overruns = 0; ++ xnsticks_t delta; ++ xntimerq_t *q; ++ ++ delta = now - xntimer_pexpect(timer); ++ if (unlikely(delta >= (xnsticks_t) period)) { ++ period = timer->interval_ns; ++ delta = xnclock_ticks_to_ns(xntimer_clock(timer), delta); ++ overruns = xnarch_div64(delta, period); ++ timer->pexpect_ticks += overruns; ++ if (xntimer_running_p(timer)) { ++ XENO_BUG_ON(COBALT, (timer->status & ++ (XNTIMER_DEQUEUED|XNTIMER_PERIODIC)) ++ != XNTIMER_PERIODIC); ++ q = xntimer_percpu_queue(timer); ++ xntimer_dequeue(timer, q); ++ while (xntimerh_date(&timer->aplink) < now) { ++ timer->periodic_ticks++; ++ xntimer_update_date(timer); ++ } ++ xntimer_enqueue_and_program(timer, q); ++ } ++ } ++ ++ timer->pexpect_ticks++; ++ ++ /* Hide overruns due to the most recent ptracing session. */ ++ if (xnthread_test_localinfo(waiter, XNHICCUP)) ++ return 0; ++ ++ return overruns; ++} ++EXPORT_SYMBOL_GPL(xntimer_get_overruns); ++ ++char *xntimer_format_time(xnticks_t ns, char *buf, size_t bufsz) ++{ ++ unsigned long ms, us, rem; ++ int len = (int)bufsz; ++ char *p = buf; ++ xnticks_t sec; ++ ++ if (ns == 0 && bufsz > 1) { ++ strcpy(buf, "-"); ++ return buf; ++ } ++ ++ sec = xnclock_divrem_billion(ns, &rem); ++ us = rem / 1000; ++ ms = us / 1000; ++ us %= 1000; ++ ++ if (sec) { ++ p += ksformat(p, bufsz, "%Lus", sec); ++ len = bufsz - (p - buf); ++ } ++ ++ if (len > 0 && (ms || (sec && us))) { ++ p += ksformat(p, bufsz - (p - buf), "%lums", ms); ++ len = bufsz - (p - buf); ++ } ++ ++ if (len > 0 && us) ++ p += ksformat(p, bufsz - (p - buf), "%luus", us); ++ ++ return buf; ++} ++EXPORT_SYMBOL_GPL(xntimer_format_time); ++ ++/** ++ * @internal ++ * @fn static int program_htick_shot(unsigned long delay, struct clock_event_device *cdev) ++ * ++ * @brief Program next host tick as a Xenomai timer event. ++ * ++ * Program the next shot for the host tick on the current CPU. ++ * Emulation is done using a nucleus timer attached to the master ++ * timebase. ++ * ++ * @param delay The time delta from the current date to the next tick, ++ * expressed as a count of nanoseconds. ++ * ++ * @param cdev An pointer to the clock device which notifies us. ++ * ++ * @coretags{unrestricted} ++ */ ++static int program_htick_shot(unsigned long delay, ++ struct clock_event_device *cdev) ++{ ++ struct xnsched *sched; ++ int ret; ++ spl_t s; ++ ++ xnlock_get_irqsave(&nklock, s); ++ sched = xnsched_current(); ++ ret = xntimer_start(&sched->htimer, delay, XN_INFINITE, XN_RELATIVE); ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return ret ? -ETIME : 0; ++} ++ ++/** ++ * @internal ++ * @fn void switch_htick_mode(enum clock_event_mode mode, struct clock_event_device *cdev) ++ * ++ * @brief Tick mode switch emulation callback. ++ * ++ * Changes the host tick mode for the tick device of the current CPU. ++ * ++ * @param mode The new mode to switch to. The possible values are: ++ * ++ * - CLOCK_EVT_MODE_ONESHOT, for a switch to oneshot mode. ++ * ++ * - CLOCK_EVT_MODE_PERIODIC, for a switch to periodic mode. The current ++ * implementation for the generic clockevent layer Linux exhibits ++ * should never downgrade from a oneshot to a periodic tick mode, so ++ * this mode should not be encountered. This said, the associated code ++ * is provided, basically for illustration purposes. ++ * ++ * - CLOCK_EVT_MODE_SHUTDOWN, indicates the removal of the current ++ * tick device. Normally, the nucleus only interposes on tick devices ++ * which should never be shut down, so this mode should not be ++ * encountered. ++ * ++ * @param cdev An opaque pointer to the clock device which notifies us. ++ * ++ * @coretags{unrestricted} ++ * ++ * @note GENERIC_CLOCKEVENTS is required from the host kernel. ++ */ ++static void switch_htick_mode(enum clock_event_mode mode, ++ struct clock_event_device *cdev) ++{ ++ struct xnsched *sched; ++ xnticks_t tickval; ++ spl_t s; ++ ++ if (mode == CLOCK_EVT_MODE_ONESHOT) ++ return; ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ sched = xnsched_current(); ++ ++ switch (mode) { ++ case CLOCK_EVT_MODE_PERIODIC: ++ tickval = 1000000000UL / HZ; ++ xntimer_start(&sched->htimer, tickval, tickval, XN_RELATIVE); ++ break; ++ case CLOCK_EVT_MODE_SHUTDOWN: ++ xntimer_stop(&sched->htimer); ++ break; ++ default: ++ XENO_BUG(COBALT); ++ } ++ ++ xnlock_put_irqrestore(&nklock, s); ++} ++ ++/** ++ * @fn int xntimer_grab_hardware(void) ++ * @brief Grab the hardware timer on all real-time CPUs. ++ * ++ * xntimer_grab_hardware() grabs and tunes the hardware timer for all ++ * real-time CPUs. ++ * ++ * Host tick emulation is performed for sharing the clock chip between ++ * Linux and Xenomai. ++ * ++ * @return a positive value is returned on success, representing the ++ * duration of a Linux periodic tick expressed as a count of ++ * nanoseconds; zero should be returned when the Linux kernel does not ++ * undergo periodic timing on the given CPU (e.g. oneshot ++ * mode). Otherwise: ++ * ++ * - -EBUSY is returned if the hardware timer has already been ++ * grabbed. xntimer_release_hardware() must be issued before ++ * xntimer_grab_hardware() is called again. ++ * ++ * - -ENODEV is returned if the hardware timer cannot be used. This ++ * situation may occur after the kernel disabled the timer due to ++ * invalid calibration results; in such a case, such hardware is ++ * unusable for any timing duties. ++ * ++ * @coretags{secondary-only} ++ */ ++static int grab_hardware_timer(int cpu) ++{ ++ int tickval, ret; ++ ++ ret = ipipe_timer_start(xnintr_core_clock_handler, ++ switch_htick_mode, program_htick_shot, cpu); ++ switch (ret) { ++ case CLOCK_EVT_MODE_PERIODIC: ++ /* ++ * Oneshot tick emulation callback won't be used, ask ++ * the caller to start an internal timer for emulating ++ * a periodic tick. ++ */ ++ tickval = 1000000000UL / HZ; ++ break; ++ ++ case CLOCK_EVT_MODE_ONESHOT: ++ /* oneshot tick emulation */ ++ tickval = 1; ++ break; ++ ++ case CLOCK_EVT_MODE_UNUSED: ++ /* we don't need to emulate the tick at all. */ ++ tickval = 0; ++ break; ++ ++ case CLOCK_EVT_MODE_SHUTDOWN: ++ return -ENODEV; ++ ++ default: ++ return ret; ++ } ++ ++ return tickval; ++} ++ ++int xntimer_grab_hardware(void) ++{ ++ struct xnsched *sched; ++ int ret, cpu, _cpu; ++ spl_t s; ++ ++#ifdef CONFIG_XENO_OPT_STATS_IRQS ++ /* ++ * Only for statistical purpose, the timer interrupt is ++ * attached by xntimer_grab_hardware(). ++ */ ++ xnintr_init(&nktimer, "[timer]", ++ per_cpu(ipipe_percpu.hrtimer_irq, 0), NULL, NULL, 0); ++#endif /* CONFIG_XENO_OPT_STATS_IRQS */ ++ ++ nkclock.wallclock_offset = ++ xnclock_get_host_time() - xnclock_read_monotonic(&nkclock); ++ ++ ret = xntimer_setup_ipi(); ++ if (ret) ++ return ret; ++ ++ for_each_realtime_cpu(cpu) { ++ ret = grab_hardware_timer(cpu); ++ if (ret < 0) ++ goto fail; ++ ++ xnlock_get_irqsave(&nklock, s); ++ ++ /* ++ * If the current tick device for the target CPU is ++ * periodic, we won't be called back for host tick ++ * emulation. Therefore, we need to start a periodic ++ * nucleus timer which will emulate the ticking for ++ * that CPU, since we are going to hijack the hw clock ++ * chip for managing our own system timer. ++ * ++ * CAUTION: ++ * ++ * - nucleus timers may be started only _after_ the hw ++ * timer has been set up for the target CPU through a ++ * call to xntimer_grab_hardware(). ++ * ++ * - we don't compensate for the elapsed portion of ++ * the current host tick, since we cannot get this ++ * information easily for all CPUs except the current ++ * one, and also because of the declining relevance of ++ * the jiffies clocksource anyway. ++ * ++ * - we must not hold the nklock across calls to ++ * xntimer_grab_hardware(). ++ */ ++ ++ sched = xnsched_struct(cpu); ++ /* Set up timer with host tick period if valid. */ ++ if (ret > 1) ++ xntimer_start(&sched->htimer, ret, ret, XN_RELATIVE); ++ else if (ret == 1) ++ xntimer_start(&sched->htimer, 0, 0, XN_RELATIVE); ++ ++ xnlock_put_irqrestore(&nklock, s); ++ } ++ ++ return 0; ++fail: ++ for_each_realtime_cpu(_cpu) { ++ if (_cpu == cpu) ++ break; ++ xnlock_get_irqsave(&nklock, s); ++ sched = xnsched_struct(cpu); ++ xntimer_stop(&sched->htimer); ++ xnlock_put_irqrestore(&nklock, s); ++ ipipe_timer_stop(_cpu); ++ } ++ ++ xntimer_release_ipi(); ++ ++ return ret; ++} ++EXPORT_SYMBOL_GPL(xntimer_grab_hardware); ++ ++/** ++ * @fn void xntimer_release_hardware(void) ++ * @brief Release hardware timers. ++ * ++ * Releases hardware timers previously grabbed by a call to ++ * xntimer_grab_hardware(). ++ * ++ * @coretags{secondary-only} ++ */ ++void xntimer_release_hardware(void) ++{ ++ int cpu; ++ ++ /* ++ * We must not hold the nklock while stopping the hardware ++ * timer, since this could cause deadlock situations to arise ++ * on SMP systems. ++ */ ++ for_each_realtime_cpu(cpu) ++ ipipe_timer_stop(cpu); ++ ++ xntimer_release_ipi(); ++ ++#ifdef CONFIG_XENO_OPT_STATS_IRQS ++ xnintr_destroy(&nktimer); ++#endif /* CONFIG_XENO_OPT_STATS_IRQS */ ++} ++EXPORT_SYMBOL_GPL(xntimer_release_hardware); ++ ++#if defined(CONFIG_XENO_OPT_TIMER_RBTREE) ++static inline bool xntimerh_is_lt(xntimerh_t *left, xntimerh_t *right) ++{ ++ return left->date < right->date ++ || (left->date == right->date && left->prio > right->prio); ++} ++ ++void xntimerq_insert(xntimerq_t *q, xntimerh_t *holder) ++{ ++ struct rb_node **new = &q->root.rb_node, *parent = NULL; ++ ++ if (!q->head) ++ q->head = holder; ++ else if (xntimerh_is_lt(holder, q->head)) { ++ parent = &q->head->link; ++ new = &parent->rb_left; ++ q->head = holder; ++ } else while (*new) { ++ xntimerh_t *i = container_of(*new, xntimerh_t, link); ++ ++ parent = *new; ++ if (xntimerh_is_lt(holder, i)) ++ new = &((*new)->rb_left); ++ else ++ new = &((*new)->rb_right); ++ } ++ ++ rb_link_node(&holder->link, parent, new); ++ rb_insert_color(&holder->link, &q->root); ++} ++#endif ++ ++/** @} */ +--- linux/kernel/xenomai/clock.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/clock.c 2021-04-07 16:01:25.775636241 +0800 +@@ -0,0 +1,900 @@ ++/* ++ * Copyright (C) 2006-2011 Philippe Gerum . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++/** ++ * @ingroup cobalt_core ++ * @defgroup cobalt_core_clock Clock services ++ * ++ * @{ ++ */ ++unsigned long nktimerlat; ++ ++static unsigned long long clockfreq; ++ ++#ifdef XNARCH_HAVE_LLMULSHFT ++ ++static unsigned int tsc_scale, tsc_shift; ++ ++#ifdef XNARCH_HAVE_NODIV_LLIMD ++ ++static struct xnarch_u32frac tsc_frac; ++static struct xnarch_u32frac bln_frac; ++ ++long long xnclock_core_ns_to_ticks(long long ns) ++{ ++ return xnarch_nodiv_llimd(ns, tsc_frac.frac, tsc_frac.integ); ++} ++ ++unsigned long long xnclock_divrem_billion(unsigned long long value, ++ unsigned long *rem) ++{ ++ unsigned long long q; ++ unsigned r; ++ ++ q = xnarch_nodiv_ullimd(value, bln_frac.frac, bln_frac.integ); ++ r = value - q * 1000000000; ++ if (r >= 1000000000) { ++ ++q; ++ r -= 1000000000; ++ } ++ *rem = r; ++ return q; ++} ++ ++#else /* !XNARCH_HAVE_NODIV_LLIMD */ ++ ++long long xnclock_core_ns_to_ticks(long long ns) ++{ ++ return xnarch_llimd(ns, 1 << tsc_shift, tsc_scale); ++} ++ ++#endif /* !XNARCH_HAVE_NODIV_LLIMD */ ++ ++xnsticks_t xnclock_core_ticks_to_ns(xnsticks_t ticks) ++{ ++ return xnarch_llmulshft(ticks, tsc_scale, tsc_shift); ++} ++ ++xnsticks_t xnclock_core_ticks_to_ns_rounded(xnsticks_t ticks) ++{ ++ unsigned int shift = tsc_shift - 1; ++ return (xnarch_llmulshft(ticks, tsc_scale, shift) + 1) / 2; ++} ++ ++#else /* !XNARCH_HAVE_LLMULSHFT */ ++ ++xnsticks_t xnclock_core_ticks_to_ns(xnsticks_t ticks) ++{ ++ return xnarch_llimd(ticks, 1000000000, clockfreq); ++} ++ ++xnsticks_t xnclock_core_ticks_to_ns_rounded(xnsticks_t ticks) ++{ ++ return (xnarch_llimd(ticks, 1000000000, clockfreq/2) + 1) / 2; ++} ++ ++xnsticks_t xnclock_core_ns_to_ticks(xnsticks_t ns) ++{ ++ return xnarch_llimd(ns, clockfreq, 1000000000); ++} ++ ++#endif /* !XNARCH_HAVE_LLMULSHFT */ ++ ++#ifndef XNARCH_HAVE_NODIV_LLIMD ++unsigned long long xnclock_divrem_billion(unsigned long long value, ++ unsigned long *rem) ++{ ++ return xnarch_ulldiv(value, 1000000000, rem); ++ ++} ++#endif /* !XNARCH_HAVE_NODIV_LLIMD */ ++ ++EXPORT_SYMBOL_GPL(xnclock_core_ticks_to_ns); ++EXPORT_SYMBOL_GPL(xnclock_core_ticks_to_ns_rounded); ++EXPORT_SYMBOL_GPL(xnclock_core_ns_to_ticks); ++EXPORT_SYMBOL_GPL(xnclock_divrem_billion); ++ ++DEFINE_PRIVATE_XNLOCK(ratelimit_lock); ++ ++int __xnclock_ratelimit(struct xnclock_ratelimit_state *rs, const char *func) ++{ ++ spl_t s; ++ int ret; ++ ++ if (!rs->interval) ++ return 1; ++ ++ xnlock_get_irqsave(&ratelimit_lock, s); ++ ++ if (!rs->begin) ++ rs->begin = xnclock_read_realtime(&nkclock); ++ if (xnclock_read_realtime(&nkclock) >= rs->begin + rs->interval) { ++ if (rs->missed) ++ printk(KERN_WARNING "%s: %d callbacks suppressed\n", ++ func, rs->missed); ++ rs->begin = 0; ++ rs->printed = 0; ++ rs->missed = 0; ++ } ++ if (rs->burst && rs->burst > rs->printed) { ++ rs->printed++; ++ ret = 1; ++ } else { ++ rs->missed++; ++ ret = 0; ++ } ++ xnlock_put_irqrestore(&ratelimit_lock, s); ++ ++ return ret; ++} ++EXPORT_SYMBOL_GPL(__xnclock_ratelimit); ++ ++void xnclock_core_local_shot(struct xnsched *sched) ++{ ++ struct xntimerdata *tmd; ++ struct xntimer *timer; ++ xnsticks_t delay; ++ xntimerh_t *h; ++ ++ /* ++ * Do not reprogram locally when inside the tick handler - ++ * will be done on exit anyway. Also exit if there is no ++ * pending timer. ++ */ ++ if (sched->status & XNINTCK) ++ return; ++ ++ /* ++ * Assume the core clock device always has percpu semantics in ++ * SMP. ++ */ ++ tmd = xnclock_this_timerdata(&nkclock); ++ h = xntimerq_head(&tmd->q); ++ if (h == NULL) { ++ sched->lflags |= XNIDLE; ++ return; ++ } ++ ++ /* ++ * Here we try to defer the host tick heading the timer queue, ++ * so that it does not preempt a real-time activity uselessly, ++ * in two cases: ++ * ++ * 1) a rescheduling is pending for the current CPU. We may ++ * assume that a real-time thread is about to resume, so we ++ * want to move the host tick out of the way until the host ++ * kernel resumes, unless there is no other outstanding ++ * timers. ++ * ++ * 2) the current thread is running in primary mode, in which ++ * case we may also defer the host tick until the host kernel ++ * resumes. ++ * ++ * The host tick deferral is cleared whenever Xenomai is about ++ * to yield control to the host kernel (see ___xnsched_run()), ++ * or a timer with an earlier timeout date is scheduled, ++ * whichever comes first. ++ */ ++ sched->lflags &= ~(XNHDEFER|XNIDLE); ++ timer = container_of(h, struct xntimer, aplink); ++ if (unlikely(timer == &sched->htimer)) { ++ if (xnsched_resched_p(sched) || ++ !xnthread_test_state(sched->curr, XNROOT)) { ++ h = xntimerq_second(&tmd->q, h); ++ if (h) { ++ sched->lflags |= XNHDEFER; ++ timer = container_of(h, struct xntimer, aplink); ++ } ++ } ++ } ++ ++ delay = xntimerh_date(&timer->aplink) - xnclock_core_read_raw(); ++ if (delay < 0) ++ delay = 0; ++ else if (delay > ULONG_MAX) ++ delay = ULONG_MAX; ++ ++ xntrace_tick((unsigned)delay); ++ ++ ipipe_timer_set(delay); ++} ++ ++#ifdef CONFIG_SMP ++void xnclock_core_remote_shot(struct xnsched *sched) ++{ ++ ipipe_send_ipi(IPIPE_HRTIMER_IPI, *cpumask_of(xnsched_cpu(sched))); ++} ++#endif ++ ++static void adjust_timer(struct xntimer *timer, xntimerq_t *q, ++ xnsticks_t delta) ++{ ++ struct xnclock *clock = xntimer_clock(timer); ++ xnticks_t period, div; ++ xnsticks_t diff; ++ ++ xntimerh_date(&timer->aplink) -= delta; ++ ++ if (xntimer_periodic_p(timer) == 0) ++ goto enqueue; ++ ++ timer->start_date -= delta; ++ period = xntimer_interval(timer); ++ diff = xnclock_ticks_to_ns(clock, ++ xnclock_read_raw(clock) - xntimer_expiry(timer)); ++ ++ if ((xnsticks_t)(diff - period) >= 0) { ++ /* ++ * Timer should tick several times before now, instead ++ * of calling timer->handler several times, we change ++ * the timer date without changing its pexpect, so ++ * that timer will tick only once and the lost ticks ++ * will be counted as overruns. ++ */ ++ div = xnarch_div64(diff, period); ++ timer->periodic_ticks += div; ++ xntimer_update_date(timer); ++ } else if (delta < 0 ++ && (timer->status & XNTIMER_FIRED) ++ && (xnsticks_t) (diff + period) <= 0) { ++ /* ++ * Timer is periodic and NOT waiting for its first ++ * shot, so we make it tick sooner than its original ++ * date in order to avoid the case where by adjusting ++ * time to a sooner date, real-time periodic timers do ++ * not tick until the original date has passed. ++ */ ++ div = xnarch_div64(-diff, period); ++ timer->periodic_ticks -= div; ++ timer->pexpect_ticks -= div; ++ xntimer_update_date(timer); ++ } ++ ++enqueue: ++ xntimer_enqueue(timer, q); ++} ++ ++static void adjust_clock_timers(struct xnclock *clock, xnsticks_t delta) ++{ ++ struct xntimer *timer, *tmp; ++ struct list_head adjq; ++ struct xnsched *sched; ++ xntimerq_it_t it; ++ unsigned int cpu; ++ xntimerh_t *h; ++ xntimerq_t *q; ++ ++ INIT_LIST_HEAD(&adjq); ++ delta = xnclock_ns_to_ticks(clock, delta); ++ ++ for_each_online_cpu(cpu) { ++ sched = xnsched_struct(cpu); ++ q = &xnclock_percpu_timerdata(clock, cpu)->q; ++ ++ for (h = xntimerq_it_begin(q, &it); h; ++ h = xntimerq_it_next(q, &it, h)) { ++ timer = container_of(h, struct xntimer, aplink); ++ if (timer->status & XNTIMER_REALTIME) ++ list_add_tail(&timer->adjlink, &adjq); ++ } ++ ++ if (list_empty(&adjq)) ++ continue; ++ ++ list_for_each_entry_safe(timer, tmp, &adjq, adjlink) { ++ list_del(&timer->adjlink); ++ xntimer_dequeue(timer, q); ++ adjust_timer(timer, q, delta); ++ } ++ ++ if (sched != xnsched_current()) ++ xnclock_remote_shot(clock, sched); ++ else ++ xnclock_program_shot(clock, sched); ++ } ++} ++ ++/** ++ * @fn void xnclock_adjust(struct xnclock *clock, xnsticks_t delta) ++ * @brief Adjust a clock time. ++ * ++ * This service changes the epoch for the given clock by applying the ++ * specified tick delta on its wallclock offset. ++ * ++ * @param clock The clock to adjust. ++ * ++ * @param delta The adjustment value expressed in nanoseconds. ++ * ++ * @coretags{task-unrestricted, atomic-entry} ++ * ++ * @note Xenomai tracks the system time in @a nkclock, as a ++ * monotonously increasing count of ticks since the epoch. The epoch ++ * is initially the same as the underlying machine time. ++ */ ++void xnclock_adjust(struct xnclock *clock, xnsticks_t delta) ++{ ++ xnticks_t now; ++ ++ nkclock.wallclock_offset += delta; ++ nkvdso->wallclock_offset = nkclock.wallclock_offset; ++ now = xnclock_read_monotonic(clock) + nkclock.wallclock_offset; ++ adjust_clock_timers(clock, delta); ++} ++EXPORT_SYMBOL_GPL(xnclock_adjust); ++ ++xnticks_t xnclock_get_host_time(void) ++{ ++ return ktime_to_ns(ktime_get_real()); ++} ++EXPORT_SYMBOL_GPL(xnclock_get_host_time); ++ ++xnticks_t xnclock_core_read_monotonic(void) ++{ ++ return xnclock_core_ticks_to_ns(xnclock_core_read_raw()); ++} ++EXPORT_SYMBOL_GPL(xnclock_core_read_monotonic); ++ ++#ifdef CONFIG_XENO_OPT_STATS ++ ++static struct xnvfile_directory timerlist_vfroot; ++ ++static struct xnvfile_snapshot_ops timerlist_ops; ++ ++struct vfile_clock_priv { ++ struct xntimer *curr; ++}; ++ ++struct vfile_clock_data { ++ int cpu; ++ unsigned int scheduled; ++ unsigned int fired; ++ xnticks_t timeout; ++ xnticks_t interval; ++ unsigned long status; ++ char name[XNOBJECT_NAME_LEN]; ++}; ++ ++static int timerlist_rewind(struct xnvfile_snapshot_iterator *it) ++{ ++ struct vfile_clock_priv *priv = xnvfile_iterator_priv(it); ++ struct xnclock *clock = xnvfile_priv(it->vfile); ++ ++ if (list_empty(&clock->timerq)) ++ return -ESRCH; ++ ++ priv->curr = list_first_entry(&clock->timerq, struct xntimer, next_stat); ++ ++ return clock->nrtimers; ++} ++ ++static int timerlist_next(struct xnvfile_snapshot_iterator *it, void *data) ++{ ++ struct vfile_clock_priv *priv = xnvfile_iterator_priv(it); ++ struct xnclock *clock = xnvfile_priv(it->vfile); ++ struct vfile_clock_data *p = data; ++ struct xntimer *timer; ++ ++ if (priv->curr == NULL) ++ return 0; ++ ++ timer = priv->curr; ++ if (list_is_last(&timer->next_stat, &clock->timerq)) ++ priv->curr = NULL; ++ else ++ priv->curr = list_entry(timer->next_stat.next, ++ struct xntimer, next_stat); ++ ++ if (clock == &nkclock && xnstat_counter_get(&timer->scheduled) == 0) ++ return VFILE_SEQ_SKIP; ++ ++ p->cpu = xnsched_cpu(xntimer_sched(timer)); ++ p->scheduled = xnstat_counter_get(&timer->scheduled); ++ p->fired = xnstat_counter_get(&timer->fired); ++ p->timeout = xntimer_get_timeout(timer); ++ p->interval = xntimer_interval(timer); ++ p->status = timer->status; ++ knamecpy(p->name, timer->name); ++ ++ return 1; ++} ++ ++static int timerlist_show(struct xnvfile_snapshot_iterator *it, void *data) ++{ ++ struct vfile_clock_data *p = data; ++ char timeout_buf[] = "- "; ++ char interval_buf[] = "- "; ++ char hit_buf[32]; ++ ++ if (p == NULL) ++ xnvfile_printf(it, ++ "%-3s %-20s %-10s %-10s %s\n", ++ "CPU", "SCHED/SHOT", "TIMEOUT", ++ "INTERVAL", "NAME"); ++ else { ++ if (p->status & XNTIMER_RUNNING) ++ xntimer_format_time(p->timeout, timeout_buf, ++ sizeof(timeout_buf)); ++ if (p->status & XNTIMER_PERIODIC) ++ xntimer_format_time(p->interval, interval_buf, ++ sizeof(interval_buf)); ++ ksformat(hit_buf, sizeof(hit_buf), "%u/%u", ++ p->scheduled, p->fired); ++ xnvfile_printf(it, ++ "%-3u %-20s %-10s %-10s %s\n", ++ p->cpu, hit_buf, timeout_buf, ++ interval_buf, p->name); ++ } ++ ++ return 0; ++} ++ ++static struct xnvfile_snapshot_ops timerlist_ops = { ++ .rewind = timerlist_rewind, ++ .next = timerlist_next, ++ .show = timerlist_show, ++}; ++ ++static void init_timerlist_proc(struct xnclock *clock) ++{ ++ memset(&clock->timer_vfile, 0, sizeof(clock->timer_vfile)); ++ clock->timer_vfile.privsz = sizeof(struct vfile_clock_priv); ++ clock->timer_vfile.datasz = sizeof(struct vfile_clock_data); ++ clock->timer_vfile.tag = &clock->timer_revtag; ++ clock->timer_vfile.ops = &timerlist_ops; ++ ++ xnvfile_init_snapshot(clock->name, &clock->timer_vfile, &timerlist_vfroot); ++ xnvfile_priv(&clock->timer_vfile) = clock; ++} ++ ++static void cleanup_timerlist_proc(struct xnclock *clock) ++{ ++ xnvfile_destroy_snapshot(&clock->timer_vfile); ++} ++ ++void init_timerlist_root(void) ++{ ++ xnvfile_init_dir("timer", &timerlist_vfroot, &cobalt_vfroot); ++} ++ ++void cleanup_timerlist_root(void) ++{ ++ xnvfile_destroy_dir(&timerlist_vfroot); ++} ++ ++#else /* !CONFIG_XENO_OPT_STATS */ ++ ++static inline void init_timerlist_root(void) { } ++ ++static inline void cleanup_timerlist_root(void) { } ++ ++static inline void init_timerlist_proc(struct xnclock *clock) { } ++ ++static inline void cleanup_timerlist_proc(struct xnclock *clock) { } ++ ++#endif /* !CONFIG_XENO_OPT_STATS */ ++ ++#ifdef CONFIG_XENO_OPT_VFILE ++ ++static struct xnvfile_directory clock_vfroot; ++ ++void print_core_clock_status(struct xnclock *clock, ++ struct xnvfile_regular_iterator *it) ++{ ++ const char *wd_status = "off"; ++ ++#ifdef CONFIG_XENO_OPT_WATCHDOG ++ wd_status = "on"; ++#endif /* CONFIG_XENO_OPT_WATCHDOG */ ++ ++ xnvfile_printf(it, "%8s: timer=%s, clock=%s\n", ++ "devices", ipipe_timer_name(), ipipe_clock_name()); ++ xnvfile_printf(it, "%8s: %s\n", "watchdog", wd_status); ++ xnvfile_printf(it, "%8s: %Lu\n", "setup", ++ xnclock_ticks_to_ns(&nkclock, nktimerlat)); ++} ++ ++static int clock_show(struct xnvfile_regular_iterator *it, void *data) ++{ ++ struct xnclock *clock = xnvfile_priv(it->vfile); ++ xnticks_t now = xnclock_read_raw(clock); ++ ++ if (clock->id >= 0) /* External clock, print id. */ ++ xnvfile_printf(it, "%7s: %d\n", "id", __COBALT_CLOCK_EXT(clock->id)); ++ ++ xnvfile_printf(it, "%7s: irq=%Ld kernel=%Ld user=%Ld\n", "gravity", ++ xnclock_ticks_to_ns(clock, xnclock_get_gravity(clock, irq)), ++ xnclock_ticks_to_ns(clock, xnclock_get_gravity(clock, kernel)), ++ xnclock_ticks_to_ns(clock, xnclock_get_gravity(clock, user))); ++ ++ xnclock_print_status(clock, it); ++ ++ xnvfile_printf(it, "%7s: %Lu (%.4Lx %.4x)\n", "ticks", ++ now, now >> 32, (u32)(now & -1U)); ++ ++ return 0; ++} ++ ++static ssize_t clock_store(struct xnvfile_input *input) ++{ ++ char buf[128], *args = buf, *p; ++ struct xnclock_gravity gravity; ++ struct xnvfile_regular *vfile; ++ unsigned long ns, ticks; ++ struct xnclock *clock; ++ ssize_t nbytes; ++ int ret; ++ ++ nbytes = xnvfile_get_string(input, buf, sizeof(buf)); ++ if (nbytes < 0) ++ return nbytes; ++ ++ vfile = container_of(input->vfile, struct xnvfile_regular, entry); ++ clock = xnvfile_priv(vfile); ++ gravity = clock->gravity; ++ ++ while ((p = strsep(&args, " \t:/,")) != NULL) { ++ if (*p == '\0') ++ continue; ++ ns = simple_strtol(p, &p, 10); ++ ticks = xnclock_ns_to_ticks(clock, ns); ++ switch (*p) { ++ case 'i': ++ gravity.irq = ticks; ++ break; ++ case 'k': ++ gravity.kernel = ticks; ++ break; ++ case 'u': ++ case '\0': ++ gravity.user = ticks; ++ break; ++ default: ++ return -EINVAL; ++ } ++ ret = xnclock_set_gravity(clock, &gravity); ++ if (ret) ++ return ret; ++ } ++ ++ return nbytes; ++} ++ ++static struct xnvfile_regular_ops clock_ops = { ++ .show = clock_show, ++ .store = clock_store, ++}; ++ ++static void init_clock_proc(struct xnclock *clock) ++{ ++ memset(&clock->vfile, 0, sizeof(clock->vfile)); ++ clock->vfile.ops = &clock_ops; ++ xnvfile_init_regular(clock->name, &clock->vfile, &clock_vfroot); ++ xnvfile_priv(&clock->vfile) = clock; ++ init_timerlist_proc(clock); ++} ++ ++static void cleanup_clock_proc(struct xnclock *clock) ++{ ++ cleanup_timerlist_proc(clock); ++ xnvfile_destroy_regular(&clock->vfile); ++} ++ ++void xnclock_init_proc(void) ++{ ++ xnvfile_init_dir("clock", &clock_vfroot, &cobalt_vfroot); ++ init_timerlist_root(); ++} ++ ++void xnclock_cleanup_proc(void) ++{ ++ xnvfile_destroy_dir(&clock_vfroot); ++ cleanup_timerlist_root(); ++} ++ ++#else /* !CONFIG_XENO_OPT_VFILE */ ++ ++static inline void init_clock_proc(struct xnclock *clock) { } ++ ++static inline void cleanup_clock_proc(struct xnclock *clock) { } ++ ++#endif /* !CONFIG_XENO_OPT_VFILE */ ++ ++/** ++ * @brief Register a Xenomai clock. ++ * ++ * This service installs a new clock which may be used to drive ++ * Xenomai timers. ++ * ++ * @param clock The new clock to register. ++ * ++ * @param affinity The set of CPUs we may expect the backing clock ++ * device to tick on. As a special case, passing a NULL affinity mask ++ * means that timer IRQs cannot be seen as percpu events, in which ++ * case all outstanding timers will be maintained into a single global ++ * queue instead of percpu timer queues. ++ * ++ * @coretags{secondary-only} ++ */ ++int xnclock_register(struct xnclock *clock, const cpumask_t *affinity) ++{ ++ struct xntimerdata *tmd; ++ int cpu; ++ ++ secondary_mode_only(); ++ ++#ifdef CONFIG_SMP ++ /* ++ * A CPU affinity set may be defined for each clock, ++ * enumerating the CPUs which can receive ticks from the ++ * backing clock device. When given, this set must be a ++ * subset of the real-time CPU set. ++ */ ++ if (affinity) { ++ cpumask_and(&clock->affinity, affinity, &xnsched_realtime_cpus); ++ if (cpumask_empty(&clock->affinity)) ++ return -EINVAL; ++ } else /* Device is global without particular IRQ affinity. */ ++ cpumask_clear(&clock->affinity); ++#endif ++ ++ /* Allocate the percpu timer queue slot. */ ++ clock->timerdata = alloc_percpu(struct xntimerdata); ++ if (clock->timerdata == NULL) ++ return -ENOMEM; ++ ++ /* ++ * POLA: init all timer slots for the new clock, although some ++ * of them might remain unused depending on the CPU affinity ++ * of the event source(s). If the clock device is global ++ * without any particular IRQ affinity, all timers will be ++ * queued to CPU0. ++ */ ++ for_each_online_cpu(cpu) { ++ tmd = xnclock_percpu_timerdata(clock, cpu); ++ xntimerq_init(&tmd->q); ++ } ++ ++#ifdef CONFIG_XENO_OPT_STATS ++ INIT_LIST_HEAD(&clock->timerq); ++#endif /* CONFIG_XENO_OPT_STATS */ ++ ++ init_clock_proc(clock); ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(xnclock_register); ++ ++/** ++ * @fn void xnclock_deregister(struct xnclock *clock) ++ * @brief Deregister a Xenomai clock. ++ * ++ * This service uninstalls a Xenomai clock previously registered with ++ * xnclock_register(). ++ * ++ * This service may be called once all timers driven by @a clock have ++ * been stopped. ++ * ++ * @param clock The clock to deregister. ++ * ++ * @coretags{secondary-only} ++ */ ++void xnclock_deregister(struct xnclock *clock) ++{ ++ struct xntimerdata *tmd; ++ int cpu; ++ ++ secondary_mode_only(); ++ ++ cleanup_clock_proc(clock); ++ ++ for_each_online_cpu(cpu) { ++ tmd = xnclock_percpu_timerdata(clock, cpu); ++ XENO_BUG_ON(COBALT, !xntimerq_empty(&tmd->q)); ++ xntimerq_destroy(&tmd->q); ++ } ++ ++ free_percpu(clock->timerdata); ++} ++EXPORT_SYMBOL_GPL(xnclock_deregister); ++ ++/** ++ * @fn void xnclock_tick(struct xnclock *clock) ++ * @brief Process a clock tick. ++ * ++ * This routine processes an incoming @a clock event, firing elapsed ++ * timers as appropriate. ++ * ++ * @param clock The clock for which a new event was received. ++ * ++ * @coretags{coreirq-only, atomic-entry} ++ * ++ * @note The current CPU must be part of the real-time affinity set ++ * unless the clock device has no particular IRQ affinity, otherwise ++ * weird things may happen. ++ */ ++void xnclock_tick(struct xnclock *clock) ++{ ++ struct xnsched *sched = xnsched_current(); ++ struct xntimer *timer; ++ xnsticks_t delta; ++ xntimerq_t *tmq; ++ xnticks_t now; ++ xntimerh_t *h; ++ ++ atomic_only(); ++ ++#ifdef CONFIG_SMP ++ /* ++ * Some external clock devices may be global without any ++ * particular IRQ affinity, in which case the associated ++ * timers will be queued to CPU0. ++ */ ++ if (IS_ENABLED(CONFIG_XENO_OPT_EXTCLOCK) && ++ clock != &nkclock && ++ !cpumask_test_cpu(xnsched_cpu(sched), &clock->affinity)) ++ tmq = &xnclock_percpu_timerdata(clock, 0)->q; ++ else ++#endif ++ tmq = &xnclock_this_timerdata(clock)->q; ++ ++ /* ++ * Optimisation: any local timer reprogramming triggered by ++ * invoked timer handlers can wait until we leave the tick ++ * handler. Use this status flag as hint to xntimer_start(). ++ */ ++ sched->status |= XNINTCK; ++ ++ now = xnclock_read_raw(clock); ++ while ((h = xntimerq_head(tmq)) != NULL) { ++ timer = container_of(h, struct xntimer, aplink); ++ delta = (xnsticks_t)(xntimerh_date(&timer->aplink) - now); ++ if (delta > 0) ++ break; ++ ++ trace_cobalt_timer_expire(timer); ++ ++ xntimer_dequeue(timer, tmq); ++ xntimer_account_fired(timer); ++ ++ /* ++ * By postponing the propagation of the low-priority ++ * host tick to the interrupt epilogue (see ++ * xnintr_irq_handler()), we save some I-cache, which ++ * translates into precious microsecs on low-end hw. ++ */ ++ if (unlikely(timer == &sched->htimer)) { ++ sched->lflags |= XNHTICK; ++ sched->lflags &= ~XNHDEFER; ++ if (timer->status & XNTIMER_PERIODIC) ++ goto advance; ++ continue; ++ } ++ ++ timer->handler(timer); ++ now = xnclock_read_raw(clock); ++ timer->status |= XNTIMER_FIRED; ++ /* ++ * Only requeue periodic timers which have not been ++ * requeued, stopped or killed. ++ */ ++ if ((timer->status & ++ (XNTIMER_PERIODIC|XNTIMER_DEQUEUED|XNTIMER_KILLED|XNTIMER_RUNNING)) != ++ (XNTIMER_PERIODIC|XNTIMER_DEQUEUED|XNTIMER_RUNNING)) ++ continue; ++ advance: ++ do { ++ timer->periodic_ticks++; ++ xntimer_update_date(timer); ++ } while (xntimerh_date(&timer->aplink) < now); ++ ++#ifdef CONFIG_SMP ++ /* ++ * If the timer was migrated over its timeout handler, ++ * xntimer_migrate() re-queued it already. ++ */ ++ if (unlikely(timer->sched != sched)) ++ continue; ++#endif ++ xntimer_enqueue(timer, tmq); ++ } ++ ++ sched->status &= ~XNINTCK; ++ ++ xnclock_program_shot(clock, sched); ++} ++EXPORT_SYMBOL_GPL(xnclock_tick); ++ ++void xnclock_update_freq(unsigned long long freq) ++{ ++ spl_t s; ++ ++ xnlock_get_irqsave(&nklock, s); ++ clockfreq = freq; ++#ifdef XNARCH_HAVE_LLMULSHFT ++ xnarch_init_llmulshft(1000000000, freq, &tsc_scale, &tsc_shift); ++#ifdef XNARCH_HAVE_NODIV_LLIMD ++ xnarch_init_u32frac(&tsc_frac, 1 << tsc_shift, tsc_scale); ++ xnarch_init_u32frac(&bln_frac, 1, 1000000000); ++#endif ++#endif ++ cobalt_pipeline.clock_freq = freq; ++ xnlock_put_irqrestore(&nklock, s); ++} ++ ++static int set_core_clock_gravity(struct xnclock *clock, ++ const struct xnclock_gravity *p) ++{ ++ nkclock.gravity = *p; ++ ++ return 0; ++} ++ ++static void reset_core_clock_gravity(struct xnclock *clock) ++{ ++ struct xnclock_gravity gravity; ++ ++ xnarch_get_latencies(&gravity); ++ gravity.user += nktimerlat; ++ if (gravity.kernel == 0) ++ gravity.kernel = gravity.user; ++ if (gravity.irq == 0) ++ gravity.irq = nktimerlat; ++ set_core_clock_gravity(clock, &gravity); ++} ++ ++struct xnclock nkclock = { ++ .name = "coreclk", ++ .resolution = 1, /* nanosecond. */ ++ .ops = { ++ .set_gravity = set_core_clock_gravity, ++ .reset_gravity = reset_core_clock_gravity, ++#ifdef CONFIG_XENO_OPT_VFILE ++ .print_status = print_core_clock_status, ++#endif ++ }, ++ .id = -1, ++}; ++EXPORT_SYMBOL_GPL(nkclock); ++ ++void xnclock_cleanup(void) ++{ ++ xnclock_deregister(&nkclock); ++} ++ ++int __init xnclock_init(unsigned long long freq) ++{ ++ xnclock_update_freq(freq); ++ nktimerlat = xnarch_timer_calibrate(); ++ xnclock_reset_gravity(&nkclock); ++ xnclock_register(&nkclock, &xnsched_realtime_cpus); ++ ++ return 0; ++} ++ ++/** @} */ +--- linux/kernel/xenomai/sched-rt.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/sched-rt.c 2021-04-07 16:01:25.770636248 +0800 +@@ -0,0 +1,257 @@ ++/* ++ * Copyright (C) 2008 Philippe Gerum . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#include ++ ++static void xnsched_rt_init(struct xnsched *sched) ++{ ++ xnsched_initq(&sched->rt.runnable); ++} ++ ++static void xnsched_rt_requeue(struct xnthread *thread) ++{ ++ /* ++ * Put back at same place: i.e. requeue to head of current ++ * priority group (i.e. LIFO, used for preemption handling). ++ */ ++ __xnsched_rt_requeue(thread); ++} ++ ++static void xnsched_rt_enqueue(struct xnthread *thread) ++{ ++ /* ++ * Enqueue for next pick: i.e. move to end of current priority ++ * group (i.e. FIFO). ++ */ ++ __xnsched_rt_enqueue(thread); ++} ++ ++static void xnsched_rt_dequeue(struct xnthread *thread) ++{ ++ /* ++ * Pull from the runnable thread queue. ++ */ ++ __xnsched_rt_dequeue(thread); ++} ++ ++static void xnsched_rt_rotate(struct xnsched *sched, ++ const union xnsched_policy_param *p) ++{ ++ struct xnthread *thread, *curr; ++ ++ if (xnsched_emptyq_p(&sched->rt.runnable)) ++ return; /* No runnable thread in this class. */ ++ ++ curr = sched->curr; ++ ++ if (p->rt.prio == XNSCHED_RUNPRIO) ++ thread = curr; ++ else { ++ thread = xnsched_findq(&sched->rt.runnable, p->rt.prio); ++ if (thread == NULL) ++ return; ++ } ++ ++ /* ++ * In case we picked the current thread, we have to make sure ++ * not to move it back to the run queue if it was blocked ++ * before we were called. The same goes if the current thread ++ * holds the scheduler lock. ++ */ ++ if (thread != curr || ++ (!xnthread_test_state(curr, XNTHREAD_BLOCK_BITS) && ++ curr->lock_count == 0)) ++ xnsched_putback(thread); ++} ++ ++void xnsched_rt_tick(struct xnsched *sched) ++{ ++ /* ++ * The round-robin time credit is only consumed by a running ++ * thread that neither holds the scheduler lock nor was ++ * blocked before entering this callback. As the time slice is ++ * exhausted for the running thread, move it back to the ++ * run queue at the end of its priority group. ++ */ ++ xnsched_putback(sched->curr); ++} ++ ++static bool xnsched_rt_setparam(struct xnthread *thread, ++ const union xnsched_policy_param *p) ++{ ++ return __xnsched_rt_setparam(thread, p); ++} ++ ++static void xnsched_rt_getparam(struct xnthread *thread, ++ union xnsched_policy_param *p) ++{ ++ __xnsched_rt_getparam(thread, p); ++} ++ ++static void xnsched_rt_trackprio(struct xnthread *thread, ++ const union xnsched_policy_param *p) ++{ ++ __xnsched_rt_trackprio(thread, p); ++} ++ ++static void xnsched_rt_protectprio(struct xnthread *thread, int prio) ++{ ++ __xnsched_rt_protectprio(thread, prio); ++} ++ ++#ifdef CONFIG_XENO_OPT_VFILE ++ ++struct xnvfile_directory sched_rt_vfroot; ++ ++struct vfile_sched_rt_priv { ++ struct xnthread *curr; ++}; ++ ++struct vfile_sched_rt_data { ++ int cpu; ++ pid_t pid; ++ char name[XNOBJECT_NAME_LEN]; ++ xnticks_t period; ++ int cprio; ++}; ++ ++static struct xnvfile_snapshot_ops vfile_sched_rt_ops; ++ ++static struct xnvfile_snapshot vfile_sched_rt = { ++ .privsz = sizeof(struct vfile_sched_rt_priv), ++ .datasz = sizeof(struct vfile_sched_rt_data), ++ .tag = &nkthreadlist_tag, ++ .ops = &vfile_sched_rt_ops, ++}; ++ ++static int vfile_sched_rt_rewind(struct xnvfile_snapshot_iterator *it) ++{ ++ struct vfile_sched_rt_priv *priv = xnvfile_iterator_priv(it); ++ int nrthreads = xnsched_class_rt.nthreads; ++ ++ if (nrthreads == 0) ++ return -ESRCH; ++ ++ priv->curr = list_first_entry(&nkthreadq, struct xnthread, glink); ++ ++ return nrthreads; ++} ++ ++static int vfile_sched_rt_next(struct xnvfile_snapshot_iterator *it, ++ void *data) ++{ ++ struct vfile_sched_rt_priv *priv = xnvfile_iterator_priv(it); ++ struct vfile_sched_rt_data *p = data; ++ struct xnthread *thread; ++ ++ if (priv->curr == NULL) ++ return 0; /* All done. */ ++ ++ thread = priv->curr; ++ if (list_is_last(&thread->glink, &nkthreadq)) ++ priv->curr = NULL; ++ else ++ priv->curr = list_next_entry(thread, glink); ++ ++ if (thread->base_class != &xnsched_class_rt || ++ xnthread_test_state(thread, XNWEAK)) ++ return VFILE_SEQ_SKIP; ++ ++ p->cpu = xnsched_cpu(thread->sched); ++ p->pid = xnthread_host_pid(thread); ++ memcpy(p->name, thread->name, sizeof(p->name)); ++ p->cprio = thread->cprio; ++ p->period = xnthread_get_period(thread); ++ ++ return 1; ++} ++ ++static int vfile_sched_rt_show(struct xnvfile_snapshot_iterator *it, ++ void *data) ++{ ++ struct vfile_sched_rt_data *p = data; ++ char pribuf[16], ptbuf[16]; ++ ++ if (p == NULL) ++ xnvfile_printf(it, "%-3s %-6s %-8s %-10s %s\n", ++ "CPU", "PID", "PRI", "PERIOD", "NAME"); ++ else { ++ ksformat(pribuf, sizeof(pribuf), "%3d", p->cprio); ++ xntimer_format_time(p->period, ptbuf, sizeof(ptbuf)); ++ xnvfile_printf(it, "%3u %-6d %-8s %-10s %s\n", ++ p->cpu, ++ p->pid, ++ pribuf, ++ ptbuf, ++ p->name); ++ } ++ ++ return 0; ++} ++ ++static struct xnvfile_snapshot_ops vfile_sched_rt_ops = { ++ .rewind = vfile_sched_rt_rewind, ++ .next = vfile_sched_rt_next, ++ .show = vfile_sched_rt_show, ++}; ++ ++static int xnsched_rt_init_vfile(struct xnsched_class *schedclass, ++ struct xnvfile_directory *vfroot) ++{ ++ int ret; ++ ++ ret = xnvfile_init_dir(schedclass->name, &sched_rt_vfroot, vfroot); ++ if (ret) ++ return ret; ++ ++ return xnvfile_init_snapshot("threads", &vfile_sched_rt, ++ &sched_rt_vfroot); ++} ++ ++static void xnsched_rt_cleanup_vfile(struct xnsched_class *schedclass) ++{ ++ xnvfile_destroy_snapshot(&vfile_sched_rt); ++ xnvfile_destroy_dir(&sched_rt_vfroot); ++} ++ ++#endif /* CONFIG_XENO_OPT_VFILE */ ++ ++struct xnsched_class xnsched_class_rt = { ++ .sched_init = xnsched_rt_init, ++ .sched_enqueue = xnsched_rt_enqueue, ++ .sched_dequeue = xnsched_rt_dequeue, ++ .sched_requeue = xnsched_rt_requeue, ++ .sched_pick = xnsched_rt_pick, ++ .sched_tick = xnsched_rt_tick, ++ .sched_rotate = xnsched_rt_rotate, ++ .sched_forget = NULL, ++ .sched_kick = NULL, ++ .sched_declare = NULL, ++ .sched_setparam = xnsched_rt_setparam, ++ .sched_trackprio = xnsched_rt_trackprio, ++ .sched_protectprio = xnsched_rt_protectprio, ++ .sched_getparam = xnsched_rt_getparam, ++#ifdef CONFIG_XENO_OPT_VFILE ++ .sched_init_vfile = xnsched_rt_init_vfile, ++ .sched_cleanup_vfile = xnsched_rt_cleanup_vfile, ++#endif ++ .weight = XNSCHED_CLASS_WEIGHT(4), ++ .policy = SCHED_FIFO, ++ .name = "rt" ++}; ++EXPORT_SYMBOL_GPL(xnsched_class_rt); +--- linux/kernel/xenomai/tree.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/tree.c 2021-04-07 16:01:25.765636255 +0800 +@@ -0,0 +1,57 @@ ++/* ++ * Copyright (C) 2014 Gilles Chanteperdrix . ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#include ++ ++void xntree_cleanup(struct rb_root *t, void *cookie, ++ void (*destroy)(void *cookie, struct xnid *id)) ++{ ++ struct rb_node *node, *next; ++ ++ node = rb_first(t); ++ while (node) { ++ next = rb_next(node); ++ ++ /* destroy is expected to remove the node from the rbtree */ ++ destroy(cookie, container_of(node, struct xnid, link)); ++ ++ node = next; ++ } ++} ++ ++int xnid_enter(struct rb_root *t, struct xnid *xnid, xnkey_t key) ++{ ++ struct rb_node **new = &t->rb_node, *parent = NULL; ++ ++ while (*new) { ++ struct xnid *i = container_of(*new, struct xnid, link); ++ ++ parent = *new; ++ if (key < i->key) ++ new = &((*new)->rb_left); ++ else if (key > i->key) ++ new = &((*new)->rb_right); ++ else ++ return -EEXIST; ++ } ++ ++ xnid->key = key; ++ rb_link_node(&xnid->link, parent, new); ++ rb_insert_color(&xnid->link, t); ++ ++ return 0; ++} +--- linux/kernel/xenomai/vfile.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/vfile.c 2021-04-07 16:01:25.760636262 +0800 +@@ -0,0 +1,980 @@ ++/* ++ * Copyright (C) 2010 Philippe Gerum ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/** ++ * @ingroup cobalt_core ++ * @defgroup cobalt_core_vfile Virtual file services ++ * ++ * Virtual files provide a mean to export Xenomai object states to ++ * user-space, based on common kernel interfaces. This encapsulation ++ * is aimed at: ++ * ++ * - supporting consistent collection of very large record-based ++ * output, without encurring latency peaks for undergoing real-time ++ * activities. ++ * ++ * - in the future, hiding discrepancies between linux kernel ++ * releases, regarding the proper way to export kernel object states ++ * to userland, either via the /proc interface or by any other mean. ++ * ++ * This virtual file implementation offers record-based read support ++ * based on seq_files, single-buffer write support, directory and link ++ * handling, all visible from the /proc namespace. ++ * ++ * The vfile support exposes four filesystem object types: ++ * ++ * - snapshot-driven file (struct xnvfile_snapshot). This is commonly ++ * used to export real-time object states via the /proc filesystem. To ++ * minimize the latency involved in protecting the vfile routines from ++ * changes applied by real-time code on such objects, a snapshot of ++ * the data to output is first taken under proper locking, before the ++ * collected data is formatted and sent out in a lockless manner. ++ * ++ * Because a large number of records may have to be output, the data ++ * collection phase is not strictly atomic as a whole, but only ++ * protected at record level. The vfile implementation can be notified ++ * of updates to the underlying data set, and restart the collection ++ * from scratch until the snapshot is fully consistent. ++ * ++ * - regular sequential file (struct xnvfile_regular). This is ++ * basically an encapsulated sequential file object as available from ++ * the host kernel (i.e. seq_file), with a few additional features to ++ * make it more handy in a Xenomai environment, like implicit locking ++ * support and shortened declaration for simplest, single-record ++ * output. ++ * ++ * - virtual link (struct xnvfile_link). This is a symbolic link ++ * feature integrated with the vfile semantics. The link target is ++ * computed dynamically at creation time from a user-given helper ++ * routine. ++ * ++ * - virtual directory (struct xnvfile_directory). A directory object, ++ * which can be used to create a hierarchy for ordering a set of vfile ++ * objects. ++ * ++ *@{*/ ++ ++/** ++ * @var struct xnvfile_directory cobalt_vfroot ++ * @brief Xenomai vfile root directory ++ * ++ * This vdir maps the /proc/xenomai directory. It can be used to ++ * create a hierarchy of Xenomai-related vfiles under this root. ++ */ ++struct xnvfile_directory cobalt_vfroot; ++EXPORT_SYMBOL_GPL(cobalt_vfroot); ++ ++static struct xnvfile_directory sysroot; ++ ++static void *vfile_snapshot_start(struct seq_file *seq, loff_t *offp) ++{ ++ struct xnvfile_snapshot_iterator *it = seq->private; ++ loff_t pos = *offp; ++ ++ if (pos > it->nrdata) ++ return NULL; ++ ++ if (pos == 0) ++ return SEQ_START_TOKEN; ++ ++ return it->databuf + (pos - 1) * it->vfile->datasz; ++} ++ ++static void *vfile_snapshot_next(struct seq_file *seq, void *v, loff_t *offp) ++{ ++ struct xnvfile_snapshot_iterator *it = seq->private; ++ loff_t pos = *offp; ++ ++ if (pos >= it->nrdata) ++ return NULL; ++ ++ ++*offp; ++ ++ return it->databuf + pos * it->vfile->datasz; ++} ++ ++static void vfile_snapshot_stop(struct seq_file *seq, void *v) ++{ ++} ++ ++static int vfile_snapshot_show(struct seq_file *seq, void *v) ++{ ++ struct xnvfile_snapshot_iterator *it = seq->private; ++ void *data = v == SEQ_START_TOKEN ? NULL : v; ++ int ret; ++ ++ ret = it->vfile->ops->show(it, data); ++ ++ return ret == VFILE_SEQ_SKIP ? SEQ_SKIP : ret; ++} ++ ++static struct seq_operations vfile_snapshot_ops = { ++ .start = vfile_snapshot_start, ++ .next = vfile_snapshot_next, ++ .stop = vfile_snapshot_stop, ++ .show = vfile_snapshot_show ++}; ++ ++static void vfile_snapshot_free(struct xnvfile_snapshot_iterator *it, void *buf) ++{ ++ kfree(buf); ++} ++ ++static int vfile_snapshot_open(struct inode *inode, struct file *file) ++{ ++ struct xnvfile_snapshot *vfile = PDE_DATA(inode); ++ struct xnvfile_snapshot_ops *ops = vfile->ops; ++ struct xnvfile_snapshot_iterator *it; ++ int revtag, ret, nrdata; ++ struct seq_file *seq; ++ caddr_t data; ++ ++ if ((file->f_mode & FMODE_WRITE) != 0 && ops->store == NULL) ++ return -EACCES; ++ ++ /* ++ * Make sure to create the seq_file backend only when reading ++ * from the v-file is possible. ++ */ ++ if ((file->f_mode & FMODE_READ) == 0) { ++ file->private_data = NULL; ++ return 0; ++ } ++ ++ if ((file->f_flags & O_EXCL) != 0 && xnvfile_nref(vfile) > 0) ++ return -EBUSY; ++ ++ it = kzalloc(sizeof(*it) + vfile->privsz, GFP_KERNEL); ++ if (it == NULL) ++ return -ENOMEM; ++ ++ it->vfile = vfile; ++ xnvfile_file(vfile) = file; ++ ++ ret = vfile->entry.lockops->get(&vfile->entry); ++ if (ret) ++ goto fail; ++redo: ++ /* ++ * The ->rewind() method is optional; there may be cases where ++ * we don't have to take an atomic snapshot of the v-file ++ * contents before proceeding. In case ->rewind() detects a ++ * stale backend object, it can force us to bail out. ++ * ++ * If present, ->rewind() may return a strictly positive ++ * value, indicating how many records at most may be returned ++ * by ->next(). We use this hint to allocate the snapshot ++ * buffer, in case ->begin() is not provided. The size of this ++ * buffer would then be vfile->datasz * hint value. ++ * ++ * If ->begin() is given, we always expect the latter do the ++ * allocation for us regardless of the hint value. Otherwise, ++ * a NULL return from ->rewind() tells us that the vfile won't ++ * output any snapshot data via ->show(). ++ */ ++ nrdata = 0; ++ if (ops->rewind) { ++ nrdata = ops->rewind(it); ++ if (nrdata < 0) { ++ ret = nrdata; ++ vfile->entry.lockops->put(&vfile->entry); ++ goto fail; ++ } ++ } ++ revtag = vfile->tag->rev; ++ ++ vfile->entry.lockops->put(&vfile->entry); ++ ++ /* Release the data buffer, in case we had to restart. */ ++ if (it->databuf) { ++ it->endfn(it, it->databuf); ++ it->databuf = NULL; ++ } ++ ++ /* ++ * Having no record to output is fine, in which case ->begin() ++ * shall return VFILE_SEQ_EMPTY if present. ->begin() may be ++ * absent, meaning that no allocation is even required to ++ * collect the records to output. NULL is kept for allocation ++ * errors in all other cases. ++ */ ++ if (ops->begin) { ++ XENO_BUG_ON(COBALT, ops->end == NULL); ++ data = ops->begin(it); ++ if (data == NULL) { ++ kfree(it); ++ return -ENOMEM; ++ } ++ if (data != VFILE_SEQ_EMPTY) { ++ it->databuf = data; ++ it->endfn = ops->end; ++ } ++ } else if (nrdata > 0 && vfile->datasz > 0) { ++ /* We have a hint for auto-allocation. */ ++ data = kmalloc(vfile->datasz * nrdata, GFP_KERNEL); ++ if (data == NULL) { ++ kfree(it); ++ return -ENOMEM; ++ } ++ it->databuf = data; ++ it->endfn = vfile_snapshot_free; ++ } ++ ++ ret = seq_open(file, &vfile_snapshot_ops); ++ if (ret) ++ goto fail; ++ ++ it->nrdata = 0; ++ data = it->databuf; ++ if (data == NULL) ++ goto finish; ++ ++ /* ++ * Take a snapshot of the vfile contents, redo if the revision ++ * tag of the scanned data set changed concurrently. ++ */ ++ for (;;) { ++ ret = vfile->entry.lockops->get(&vfile->entry); ++ if (ret) ++ break; ++ if (vfile->tag->rev != revtag) ++ goto redo; ++ ret = ops->next(it, data); ++ vfile->entry.lockops->put(&vfile->entry); ++ if (ret <= 0) ++ break; ++ if (ret != VFILE_SEQ_SKIP) { ++ data += vfile->datasz; ++ it->nrdata++; ++ } ++ } ++ ++ if (ret < 0) { ++ seq_release(inode, file); ++ fail: ++ if (it->databuf) ++ it->endfn(it, it->databuf); ++ kfree(it); ++ return ret; ++ } ++ ++finish: ++ seq = file->private_data; ++ it->seq = seq; ++ seq->private = it; ++ xnvfile_nref(vfile)++; ++ ++ return 0; ++} ++ ++static int vfile_snapshot_release(struct inode *inode, struct file *file) ++{ ++ struct seq_file *seq = file->private_data; ++ struct xnvfile_snapshot_iterator *it; ++ ++ if (seq) { ++ it = seq->private; ++ if (it) { ++ --xnvfile_nref(it->vfile); ++ XENO_BUG_ON(COBALT, it->vfile->entry.refcnt < 0); ++ if (it->databuf) ++ it->endfn(it, it->databuf); ++ kfree(it); ++ } ++ ++ return seq_release(inode, file); ++ } ++ ++ return 0; ++} ++ ++ssize_t vfile_snapshot_write(struct file *file, const char __user *buf, ++ size_t size, loff_t *ppos) ++{ ++ struct xnvfile_snapshot *vfile = ++ PDE_DATA(file->f_path.dentry->d_inode); ++ struct xnvfile_input input; ++ ssize_t ret; ++ ++ if (vfile->entry.lockops) { ++ ret = vfile->entry.lockops->get(&vfile->entry); ++ if (ret) ++ return ret; ++ } ++ ++ input.u_buf = buf; ++ input.size = size; ++ input.vfile = &vfile->entry; ++ ++ ret = vfile->ops->store(&input); ++ ++ if (vfile->entry.lockops) ++ vfile->entry.lockops->put(&vfile->entry); ++ ++ return ret; ++} ++ ++static struct file_operations vfile_snapshot_fops = { ++ .open = vfile_snapshot_open, ++ .read = seq_read, ++ .write = vfile_snapshot_write, ++ .llseek = seq_lseek, ++ .release = vfile_snapshot_release, ++}; ++ ++/** ++ * @fn int xnvfile_init_snapshot(const char *name, struct xnvfile_snapshot *vfile, struct xnvfile_directory *parent) ++ * @brief Initialize a snapshot-driven vfile. ++ * ++ * @param name The name which should appear in the pseudo-filesystem, ++ * identifying the vfile entry. ++ * ++ * @param vfile A pointer to a vfile descriptor to initialize ++ * from. The following fields in this structure should be filled in ++ * prior to call this routine: ++ * ++ * - .privsz is the size (in bytes) of the private data area to be ++ * reserved in the @ref snapshot_iterator "vfile iterator". A NULL ++ * value indicates that no private area should be reserved. ++ * ++ * - .datasz is the size (in bytes) of a single record to be collected ++ * by the @ref snapshot_next "next() handler" from the @ref ++ * snapshot_ops "operation descriptor". ++ * ++ * - .tag is a pointer to a mandatory vfile revision tag structure ++ * (struct xnvfile_rev_tag). This tag will be monitored for changes by ++ * the vfile core while collecting data to output, so that any update ++ * detected will cause the current snapshot data to be dropped, and ++ * the collection to restart from the beginning. To this end, any ++ * change to the data which may be part of the collected records, ++ * should also invoke xnvfile_touch() on the associated tag. ++ * ++ * - entry.lockops is a pointer to a @ref vfile_lockops "lock descriptor", ++ * defining the lock and unlock operations for the vfile. This pointer ++ * may be left to NULL, in which case the operations on the nucleus ++ * lock (i.e. nklock) will be used internally around calls to data ++ * collection handlers (see @ref snapshot_ops "operation descriptor"). ++ * ++ * - .ops is a pointer to an @ref snapshot_ops "operation descriptor". ++ * ++ * @param parent A pointer to a virtual directory descriptor; the ++ * vfile entry will be created into this directory. If NULL, the /proc ++ * root directory will be used. /proc/xenomai is mapped on the ++ * globally available @a cobalt_vfroot vdir. ++ * ++ * @return 0 is returned on success. Otherwise: ++ * ++ * - -ENOMEM is returned if the virtual file entry cannot be created ++ * in the /proc hierarchy. ++ * ++ * @coretags{secondary-only} ++ */ ++int xnvfile_init_snapshot(const char *name, ++ struct xnvfile_snapshot *vfile, ++ struct xnvfile_directory *parent) ++{ ++ struct proc_dir_entry *ppde, *pde; ++ int mode; ++ ++ XENO_BUG_ON(COBALT, vfile->tag == NULL); ++ ++ if (vfile->entry.lockops == NULL) ++ /* Defaults to nucleus lock */ ++ vfile->entry.lockops = &xnvfile_nucleus_lock.ops; ++ ++ if (parent == NULL) ++ parent = &sysroot; ++ ++ mode = vfile->ops->store ? 0644 : 0444; ++ ppde = parent->entry.pde; ++ pde = proc_create_data(name, mode, ppde, &vfile_snapshot_fops, vfile); ++ if (pde == NULL) ++ return -ENOMEM; ++ ++ vfile->entry.pde = pde; ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(xnvfile_init_snapshot); ++ ++static void *vfile_regular_start(struct seq_file *seq, loff_t *offp) ++{ ++ struct xnvfile_regular_iterator *it = seq->private; ++ struct xnvfile_regular *vfile = it->vfile; ++ int ret; ++ ++ it->pos = *offp; ++ ++ if (vfile->entry.lockops) { ++ ret = vfile->entry.lockops->get(&vfile->entry); ++ if (ret) ++ return ERR_PTR(ret); ++ } ++ ++ /* ++ * If we have no begin() op, then we allow a single call only ++ * to ->show(), by returning the start token once. Otherwise, ++ * we are done. ++ */ ++ if (vfile->ops->begin == NULL) ++ return it->pos > 0 ? NULL : SEQ_START_TOKEN; ++ ++ return vfile->ops->begin(it); ++} ++ ++static void *vfile_regular_next(struct seq_file *seq, void *v, loff_t *offp) ++{ ++ struct xnvfile_regular_iterator *it = seq->private; ++ struct xnvfile_regular *vfile = it->vfile; ++ void *data; ++ ++ if (vfile->ops->next == NULL) ++ return NULL; ++ ++ it->pos = *offp + 1; ++ ++ data = vfile->ops->next(it); ++ if (data == NULL) ++ return NULL; ++ ++ *offp = it->pos; ++ ++ return data; ++} ++ ++static void vfile_regular_stop(struct seq_file *seq, void *v) ++{ ++ struct xnvfile_regular_iterator *it = seq->private; ++ struct xnvfile_regular *vfile = it->vfile; ++ ++ if (vfile->entry.lockops) ++ vfile->entry.lockops->put(&vfile->entry); ++ ++ if (vfile->ops->end) ++ vfile->ops->end(it); ++} ++ ++static int vfile_regular_show(struct seq_file *seq, void *v) ++{ ++ struct xnvfile_regular_iterator *it = seq->private; ++ struct xnvfile_regular *vfile = it->vfile; ++ void *data = v == SEQ_START_TOKEN ? NULL : v; ++ int ret; ++ ++ ret = vfile->ops->show(it, data); ++ ++ return ret == VFILE_SEQ_SKIP ? SEQ_SKIP : ret; ++} ++ ++static struct seq_operations vfile_regular_ops = { ++ .start = vfile_regular_start, ++ .next = vfile_regular_next, ++ .stop = vfile_regular_stop, ++ .show = vfile_regular_show ++}; ++ ++static int vfile_regular_open(struct inode *inode, struct file *file) ++{ ++ struct xnvfile_regular *vfile = PDE_DATA(inode); ++ struct xnvfile_regular_ops *ops = vfile->ops; ++ struct xnvfile_regular_iterator *it; ++ struct seq_file *seq; ++ int ret; ++ ++ if ((file->f_flags & O_EXCL) != 0 && xnvfile_nref(vfile) > 0) ++ return -EBUSY; ++ ++ if ((file->f_mode & FMODE_WRITE) != 0 && ops->store == NULL) ++ return -EACCES; ++ ++ if ((file->f_mode & FMODE_READ) == 0) { ++ file->private_data = NULL; ++ return 0; ++ } ++ ++ it = kzalloc(sizeof(*it) + vfile->privsz, GFP_KERNEL); ++ if (it == NULL) ++ return -ENOMEM; ++ ++ it->vfile = vfile; ++ it->pos = -1; ++ xnvfile_file(vfile) = file; ++ ++ if (ops->rewind) { ++ ret = ops->rewind(it); ++ if (ret) { ++ fail: ++ kfree(it); ++ return ret; ++ } ++ } ++ ++ ret = seq_open(file, &vfile_regular_ops); ++ if (ret) ++ goto fail; ++ ++ seq = file->private_data; ++ it->seq = seq; ++ seq->private = it; ++ xnvfile_nref(vfile)++; ++ ++ return 0; ++} ++ ++static int vfile_regular_release(struct inode *inode, struct file *file) ++{ ++ struct seq_file *seq = file->private_data; ++ struct xnvfile_regular_iterator *it; ++ ++ if (seq) { ++ it = seq->private; ++ if (it) { ++ --xnvfile_nref(it->vfile); ++ XENO_BUG_ON(COBALT, xnvfile_nref(it->vfile) < 0); ++ kfree(it); ++ } ++ ++ return seq_release(inode, file); ++ } ++ ++ return 0; ++} ++ ++ssize_t vfile_regular_write(struct file *file, const char __user *buf, ++ size_t size, loff_t *ppos) ++{ ++ struct xnvfile_regular *vfile = ++ PDE_DATA(file->f_path.dentry->d_inode); ++ struct xnvfile_input input; ++ ssize_t ret; ++ ++ if (vfile->entry.lockops) { ++ ret = vfile->entry.lockops->get(&vfile->entry); ++ if (ret) ++ return ret; ++ } ++ ++ input.u_buf = buf; ++ input.size = size; ++ input.vfile = &vfile->entry; ++ ++ ret = vfile->ops->store(&input); ++ ++ if (vfile->entry.lockops) ++ vfile->entry.lockops->put(&vfile->entry); ++ ++ return ret; ++} ++ ++static struct file_operations vfile_regular_fops = { ++ .open = vfile_regular_open, ++ .read = seq_read, ++ .write = vfile_regular_write, ++ .llseek = seq_lseek, ++ .release = vfile_regular_release, ++}; ++ ++/** ++ * @fn int xnvfile_init_regular(const char *name, struct xnvfile_regular *vfile, struct xnvfile_directory *parent) ++ * @brief Initialize a regular vfile. ++ * ++ * @param name The name which should appear in the pseudo-filesystem, ++ * identifying the vfile entry. ++ * ++ * @param vfile A pointer to a vfile descriptor to initialize ++ * from. The following fields in this structure should be filled in ++ * prior to call this routine: ++ * ++ * - .privsz is the size (in bytes) of the private data area to be ++ * reserved in the @ref regular_iterator "vfile iterator". A NULL ++ * value indicates that no private area should be reserved. ++ * ++ * - entry.lockops is a pointer to a @ref vfile_lockops "locking ++ * descriptor", defining the lock and unlock operations for the ++ * vfile. This pointer may be left to NULL, in which case no ++ * locking will be applied. ++ * ++ * - .ops is a pointer to an @ref regular_ops "operation descriptor". ++ * ++ * @param parent A pointer to a virtual directory descriptor; the ++ * vfile entry will be created into this directory. If NULL, the /proc ++ * root directory will be used. /proc/xenomai is mapped on the ++ * globally available @a cobalt_vfroot vdir. ++ * ++ * @return 0 is returned on success. Otherwise: ++ * ++ * - -ENOMEM is returned if the virtual file entry cannot be created ++ * in the /proc hierarchy. ++ * ++ * @coretags{secondary-only} ++ */ ++int xnvfile_init_regular(const char *name, ++ struct xnvfile_regular *vfile, ++ struct xnvfile_directory *parent) ++{ ++ struct proc_dir_entry *ppde, *pde; ++ int mode; ++ ++ if (parent == NULL) ++ parent = &sysroot; ++ ++ mode = vfile->ops->store ? 0644 : 0444; ++ ppde = parent->entry.pde; ++ pde = proc_create_data(name, mode, ppde, &vfile_regular_fops, vfile); ++ if (pde == NULL) ++ return -ENOMEM; ++ ++ vfile->entry.pde = pde; ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(xnvfile_init_regular); ++ ++/** ++ * @fn int xnvfile_init_dir(const char *name, struct xnvfile_directory *vdir, struct xnvfile_directory *parent) ++ * @brief Initialize a virtual directory entry. ++ * ++ * @param name The name which should appear in the pseudo-filesystem, ++ * identifying the vdir entry. ++ * ++ * @param vdir A pointer to the virtual directory descriptor to ++ * initialize. ++ * ++ * @param parent A pointer to a virtual directory descriptor standing ++ * for the parent directory of the new vdir. If NULL, the /proc root ++ * directory will be used. /proc/xenomai is mapped on the globally ++ * available @a cobalt_vfroot vdir. ++ * ++ * @return 0 is returned on success. Otherwise: ++ * ++ * - -ENOMEM is returned if the virtual directory entry cannot be ++ * created in the /proc hierarchy. ++ * ++ * @coretags{secondary-only} ++ */ ++int xnvfile_init_dir(const char *name, ++ struct xnvfile_directory *vdir, ++ struct xnvfile_directory *parent) ++{ ++ struct proc_dir_entry *ppde, *pde; ++ ++ if (parent == NULL) ++ parent = &sysroot; ++ ++ ppde = parent->entry.pde; ++ pde = proc_mkdir(name, ppde); ++ if (pde == NULL) ++ return -ENOMEM; ++ ++ vdir->entry.pde = pde; ++ vdir->entry.lockops = NULL; ++ vdir->entry.private = NULL; ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(xnvfile_init_dir); ++ ++/** ++ * @fn int xnvfile_init_link(const char *from, const char *to, struct xnvfile_link *vlink, struct xnvfile_directory *parent) ++ * @brief Initialize a virtual link entry. ++ * ++ * @param from The name which should appear in the pseudo-filesystem, ++ * identifying the vlink entry. ++ * ++ * @param to The target file name which should be referred to ++ * symbolically by @a name. ++ * ++ * @param vlink A pointer to the virtual link descriptor to ++ * initialize. ++ * ++ * @param parent A pointer to a virtual directory descriptor standing ++ * for the parent directory of the new vlink. If NULL, the /proc root ++ * directory will be used. /proc/xenomai is mapped on the globally ++ * available @a cobalt_vfroot vdir. ++ * ++ * @return 0 is returned on success. Otherwise: ++ * ++ * - -ENOMEM is returned if the virtual link entry cannot be created ++ * in the /proc hierarchy. ++ * ++ * @coretags{secondary-only} ++ */ ++int xnvfile_init_link(const char *from, ++ const char *to, ++ struct xnvfile_link *vlink, ++ struct xnvfile_directory *parent) ++{ ++ struct proc_dir_entry *ppde, *pde; ++ ++ if (parent == NULL) ++ parent = &sysroot; ++ ++ ppde = parent->entry.pde; ++ pde = proc_symlink(from, ppde, to); ++ if (pde == NULL) ++ return -ENOMEM; ++ ++ vlink->entry.pde = pde; ++ vlink->entry.lockops = NULL; ++ vlink->entry.private = NULL; ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(xnvfile_init_link); ++ ++/** ++ * @fn void xnvfile_destroy(struct xnvfile *vfile) ++ * @brief Removes a virtual file entry. ++ * ++ * @param vfile A pointer to the virtual file descriptor to ++ * remove. ++ * ++ * @coretags{secondary-only} ++ */ ++void xnvfile_destroy(struct xnvfile *vfile) ++{ ++ proc_remove(vfile->pde); ++} ++EXPORT_SYMBOL_GPL(xnvfile_destroy); ++ ++/** ++ * @fn ssize_t xnvfile_get_blob(struct xnvfile_input *input, void *data, size_t size) ++ * @brief Read in a data bulk written to the vfile. ++ * ++ * When writing to a vfile, the associated store() handler from the ++ * @ref snapshot_store "snapshot-driven vfile" or @ref regular_store ++ * "regular vfile" is called, with a single argument describing the ++ * input data. xnvfile_get_blob() retrieves this data as an untyped ++ * binary blob, and copies it back to the caller's buffer. ++ * ++ * @param input A pointer to the input descriptor passed to the ++ * store() handler. ++ * ++ * @param data The address of the destination buffer to copy the input ++ * data to. ++ * ++ * @param size The maximum number of bytes to copy to the destination ++ * buffer. If @a size is larger than the actual data size, the input ++ * is truncated to @a size. ++ * ++ * @return The number of bytes read and copied to the destination ++ * buffer upon success. Otherwise, a negative error code is returned: ++ * ++ * - -EFAULT indicates an invalid source buffer address. ++ * ++ * @coretags{secondary-only} ++ */ ++ssize_t xnvfile_get_blob(struct xnvfile_input *input, ++ void *data, size_t size) ++{ ++ ssize_t nbytes = input->size; ++ ++ if (nbytes > size) ++ nbytes = size; ++ ++ if (nbytes > 0 && copy_from_user(data, input->u_buf, nbytes)) ++ return -EFAULT; ++ ++ return nbytes; ++} ++EXPORT_SYMBOL_GPL(xnvfile_get_blob); ++ ++/** ++ * @fn ssize_t xnvfile_get_string(struct xnvfile_input *input, char *s, size_t maxlen) ++ * @brief Read in a C-string written to the vfile. ++ * ++ * When writing to a vfile, the associated store() handler from the ++ * @ref snapshot_store "snapshot-driven vfile" or @ref regular_store ++ * "regular vfile" is called, with a single argument describing the ++ * input data. xnvfile_get_string() retrieves this data as a ++ * null-terminated character string, and copies it back to the ++ * caller's buffer. ++ * ++ * @param input A pointer to the input descriptor passed to the ++ * store() handler. ++ * ++ * @param s The address of the destination string buffer to copy the ++ * input data to. ++ * ++ * @param maxlen The maximum number of bytes to copy to the ++ * destination buffer, including the ending null character. If @a ++ * maxlen is larger than the actual string length, the input is ++ * truncated to @a maxlen. ++ * ++ * @return The number of characters read upon success. Otherwise, a ++ * negative error code is returned: ++ * ++ * - -EFAULT indicates an invalid source buffer address. ++ * ++ * @coretags{secondary-only} ++ */ ++ssize_t xnvfile_get_string(struct xnvfile_input *input, ++ char *s, size_t maxlen) ++{ ++ ssize_t nbytes, eol; ++ ++ if (maxlen < 1) ++ return -EINVAL; ++ ++ nbytes = xnvfile_get_blob(input, s, maxlen - 1); ++ if (nbytes < 0) ++ return nbytes; ++ ++ eol = nbytes; ++ if (eol > 0 && s[eol - 1] == '\n') ++ eol--; ++ ++ s[eol] = '\0'; ++ ++ return nbytes; ++} ++EXPORT_SYMBOL_GPL(xnvfile_get_string); ++ ++/** ++ * @fn ssize_t xnvfile_get_integer(struct xnvfile_input *input, long *valp) ++ * @brief Evaluate the string written to the vfile as a long integer. ++ * ++ * When writing to a vfile, the associated store() handler from the ++ * @ref snapshot_store "snapshot-driven vfile" or @ref regular_store ++ * "regular vfile" is called, with a single argument describing the ++ * input data. xnvfile_get_integer() retrieves and interprets this ++ * data as a long integer, and copies the resulting value back to @a ++ * valp. ++ * ++ * The long integer can be expressed in decimal, octal or hexadecimal ++ * bases depending on the prefix found. ++ * ++ * @param input A pointer to the input descriptor passed to the ++ * store() handler. ++ * ++ * @param valp The address of a long integer variable to receive the ++ * value. ++ * ++ * @return The number of characters read while evaluating the input as ++ * a long integer upon success. Otherwise, a negative error code is ++ * returned: ++ * ++ * - -EINVAL indicates a parse error on the input stream; the written ++ * text cannot be evaluated as a long integer. ++ * ++ * - -EFAULT indicates an invalid source buffer address. ++ * ++ * @coretags{secondary-only} ++ */ ++ssize_t xnvfile_get_integer(struct xnvfile_input *input, long *valp) ++{ ++ char *end, buf[32]; ++ ssize_t nbytes; ++ long val; ++ ++ nbytes = xnvfile_get_blob(input, buf, sizeof(buf) - 1); ++ if (nbytes < 0) ++ return nbytes; ++ ++ if (nbytes == 0) ++ return -EINVAL; ++ ++ buf[nbytes] = '\0'; ++ val = simple_strtol(buf, &end, 0); ++ ++ if (*end != '\0' && !isspace(*end)) ++ return -EINVAL; ++ ++ *valp = val; ++ ++ return nbytes; ++} ++EXPORT_SYMBOL_GPL(xnvfile_get_integer); ++ ++int __vfile_hostlock_get(struct xnvfile *vfile) ++{ ++ struct xnvfile_hostlock_class *lc; ++ ++ lc = container_of(vfile->lockops, struct xnvfile_hostlock_class, ops); ++ mutex_lock(&lc->mutex); ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(__vfile_hostlock_get); ++ ++void __vfile_hostlock_put(struct xnvfile *vfile) ++{ ++ struct xnvfile_hostlock_class *lc; ++ ++ lc = container_of(vfile->lockops, struct xnvfile_hostlock_class, ops); ++ mutex_unlock(&lc->mutex); ++} ++EXPORT_SYMBOL_GPL(__vfile_hostlock_put); ++ ++static int __vfile_nklock_get(struct xnvfile *vfile) ++{ ++ struct xnvfile_nklock_class *lc; ++ ++ lc = container_of(vfile->lockops, struct xnvfile_nklock_class, ops); ++ xnlock_get_irqsave(&nklock, lc->s); ++ ++ return 0; ++} ++ ++static void __vfile_nklock_put(struct xnvfile *vfile) ++{ ++ struct xnvfile_nklock_class *lc; ++ ++ lc = container_of(vfile->lockops, struct xnvfile_nklock_class, ops); ++ xnlock_put_irqrestore(&nklock, lc->s); ++} ++ ++struct xnvfile_nklock_class xnvfile_nucleus_lock = { ++ .ops = { ++ .get = __vfile_nklock_get, ++ .put = __vfile_nklock_put, ++ }, ++}; ++ ++int __init xnvfile_init_root(void) ++{ ++ struct xnvfile_directory *vdir = &cobalt_vfroot; ++ struct proc_dir_entry *pde; ++ ++ pde = proc_mkdir("xenomai", NULL); ++ if (pde == NULL) ++ return -ENOMEM; ++ ++ vdir->entry.pde = pde; ++ vdir->entry.lockops = NULL; ++ vdir->entry.private = NULL; ++ ++ return 0; ++} ++ ++void xnvfile_destroy_root(void) ++{ ++ cobalt_vfroot.entry.pde = NULL; ++ remove_proc_entry("xenomai", NULL); ++} ++ ++/** @} */ +--- linux/kernel/xenomai/heap.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/heap.c 2021-04-07 16:01:25.755636270 +0800 +@@ -0,0 +1,862 @@ ++/* ++ * Copyright (C) 2001,2002,2003 Philippe Gerum . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/** ++ * @ingroup cobalt_core ++ * @defgroup cobalt_core_heap Dynamic memory allocation services ++ * ++ * This code implements a variant of the allocator described in ++ * "Design of a General Purpose Memory Allocator for the 4.3BSD Unix ++ * Kernel" by Marshall K. McKusick and Michael J. Karels (USENIX ++ * 1988), see http://docs.FreeBSD.org/44doc/papers/kernmalloc.pdf. ++ * The free page list is maintained in rbtrees for fast lookups of ++ * multi-page memory ranges, and pages holding bucketed memory have a ++ * fast allocation bitmap to manage their blocks internally. ++ *@{ ++ */ ++struct xnheap cobalt_heap; /* System heap */ ++EXPORT_SYMBOL_GPL(cobalt_heap); ++ ++static LIST_HEAD(heapq); /* Heap list for v-file dump */ ++ ++static int nrheaps; ++ ++#ifdef CONFIG_XENO_OPT_VFILE ++ ++static struct xnvfile_rev_tag vfile_tag; ++ ++static struct xnvfile_snapshot_ops vfile_ops; ++ ++struct vfile_priv { ++ struct xnheap *curr; ++}; ++ ++struct vfile_data { ++ size_t all_mem; ++ size_t free_mem; ++ char name[XNOBJECT_NAME_LEN]; ++}; ++ ++static struct xnvfile_snapshot vfile = { ++ .privsz = sizeof(struct vfile_priv), ++ .datasz = sizeof(struct vfile_data), ++ .tag = &vfile_tag, ++ .ops = &vfile_ops, ++}; ++ ++static int vfile_rewind(struct xnvfile_snapshot_iterator *it) ++{ ++ struct vfile_priv *priv = xnvfile_iterator_priv(it); ++ ++ if (list_empty(&heapq)) { ++ priv->curr = NULL; ++ return 0; ++ } ++ ++ priv->curr = list_first_entry(&heapq, struct xnheap, next); ++ ++ return nrheaps; ++} ++ ++static int vfile_next(struct xnvfile_snapshot_iterator *it, void *data) ++{ ++ struct vfile_priv *priv = xnvfile_iterator_priv(it); ++ struct vfile_data *p = data; ++ struct xnheap *heap; ++ ++ if (priv->curr == NULL) ++ return 0; /* We are done. */ ++ ++ heap = priv->curr; ++ if (list_is_last(&heap->next, &heapq)) ++ priv->curr = NULL; ++ else ++ priv->curr = list_entry(heap->next.next, ++ struct xnheap, next); ++ ++ p->all_mem = xnheap_get_size(heap); ++ p->free_mem = xnheap_get_free(heap); ++ knamecpy(p->name, heap->name); ++ ++ return 1; ++} ++ ++static int vfile_show(struct xnvfile_snapshot_iterator *it, void *data) ++{ ++ struct vfile_data *p = data; ++ ++ if (p == NULL) ++ xnvfile_printf(it, "%9s %9s %s\n", ++ "TOTAL", "FREE", "NAME"); ++ else ++ xnvfile_printf(it, "%9zu %9zu %s\n", ++ p->all_mem, ++ p->free_mem, ++ p->name); ++ return 0; ++} ++ ++static struct xnvfile_snapshot_ops vfile_ops = { ++ .rewind = vfile_rewind, ++ .next = vfile_next, ++ .show = vfile_show, ++}; ++ ++void xnheap_init_proc(void) ++{ ++ xnvfile_init_snapshot("heap", &vfile, &cobalt_vfroot); ++} ++ ++void xnheap_cleanup_proc(void) ++{ ++ xnvfile_destroy_snapshot(&vfile); ++} ++ ++#endif /* CONFIG_XENO_OPT_VFILE */ ++ ++enum xnheap_pgtype { ++ page_free =0, ++ page_cont =1, ++ page_list =2 ++}; ++ ++static inline u32 __always_inline ++gen_block_mask(int log2size) ++{ ++ return -1U >> (32 - (XNHEAP_PAGE_SIZE >> log2size)); ++} ++ ++static inline __always_inline ++int addr_to_pagenr(struct xnheap *heap, void *p) ++{ ++ return ((void *)p - heap->membase) >> XNHEAP_PAGE_SHIFT; ++} ++ ++static inline __always_inline ++void *pagenr_to_addr(struct xnheap *heap, int pg) ++{ ++ return heap->membase + (pg << XNHEAP_PAGE_SHIFT); ++} ++ ++#ifdef CONFIG_XENO_OPT_DEBUG_MEMORY ++/* ++ * Setting page_cont/page_free in the page map is only required for ++ * enabling full checking of the block address in free requests, which ++ * may be extremely time-consuming when deallocating huge blocks ++ * spanning thousands of pages. We only do such marking when running ++ * in memory debug mode. ++ */ ++static inline bool ++page_is_valid(struct xnheap *heap, int pg) ++{ ++ switch (heap->pagemap[pg].type) { ++ case page_free: ++ case page_cont: ++ return false; ++ case page_list: ++ default: ++ return true; ++ } ++} ++ ++static void mark_pages(struct xnheap *heap, ++ int pg, int nrpages, ++ enum xnheap_pgtype type) ++{ ++ while (nrpages-- > 0) ++ heap->pagemap[pg].type = type; ++} ++ ++#else ++ ++static inline bool ++page_is_valid(struct xnheap *heap, int pg) ++{ ++ return true; ++} ++ ++static void mark_pages(struct xnheap *heap, ++ int pg, int nrpages, ++ enum xnheap_pgtype type) ++{ } ++ ++#endif ++ ++static struct xnheap_range * ++search_size_ge(struct rb_root *t, size_t size) ++{ ++ struct rb_node *rb, *deepest = NULL; ++ struct xnheap_range *r; ++ ++ /* ++ * We first try to find an exact match. If that fails, we walk ++ * the tree in logical order by increasing size value from the ++ * deepest node traversed until we find the first successor to ++ * that node, or nothing beyond it, whichever comes first. ++ */ ++ rb = t->rb_node; ++ while (rb) { ++ deepest = rb; ++ r = rb_entry(rb, struct xnheap_range, size_node); ++ if (size < r->size) { ++ rb = rb->rb_left; ++ continue; ++ } ++ if (size > r->size) { ++ rb = rb->rb_right; ++ continue; ++ } ++ return r; ++ } ++ ++ rb = deepest; ++ while (rb) { ++ r = rb_entry(rb, struct xnheap_range, size_node); ++ if (size <= r->size) ++ return r; ++ rb = rb_next(rb); ++ } ++ ++ return NULL; ++} ++ ++static struct xnheap_range * ++search_left_mergeable(struct xnheap *heap, struct xnheap_range *r) ++{ ++ struct rb_node *node = heap->addr_tree.rb_node; ++ struct xnheap_range *p; ++ ++ while (node) { ++ p = rb_entry(node, struct xnheap_range, addr_node); ++ if ((void *)p + p->size == (void *)r) ++ return p; ++ if (&r->addr_node < node) ++ node = node->rb_left; ++ else ++ node = node->rb_right; ++ } ++ ++ return NULL; ++} ++ ++static struct xnheap_range * ++search_right_mergeable(struct xnheap *heap, struct xnheap_range *r) ++{ ++ struct rb_node *node = heap->addr_tree.rb_node; ++ struct xnheap_range *p; ++ ++ while (node) { ++ p = rb_entry(node, struct xnheap_range, addr_node); ++ if ((void *)r + r->size == (void *)p) ++ return p; ++ if (&r->addr_node < node) ++ node = node->rb_left; ++ else ++ node = node->rb_right; ++ } ++ ++ return NULL; ++} ++ ++static void insert_range_bysize(struct xnheap *heap, struct xnheap_range *r) ++{ ++ struct rb_node **new = &heap->size_tree.rb_node, *parent = NULL; ++ struct xnheap_range *p; ++ ++ while (*new) { ++ p = container_of(*new, struct xnheap_range, size_node); ++ parent = *new; ++ if (r->size <= p->size) ++ new = &((*new)->rb_left); ++ else ++ new = &((*new)->rb_right); ++ } ++ ++ rb_link_node(&r->size_node, parent, new); ++ rb_insert_color(&r->size_node, &heap->size_tree); ++} ++ ++static void insert_range_byaddr(struct xnheap *heap, struct xnheap_range *r) ++{ ++ struct rb_node **new = &heap->addr_tree.rb_node, *parent = NULL; ++ struct xnheap_range *p; ++ ++ while (*new) { ++ p = container_of(*new, struct xnheap_range, addr_node); ++ parent = *new; ++ if (r < p) ++ new = &((*new)->rb_left); ++ else ++ new = &((*new)->rb_right); ++ } ++ ++ rb_link_node(&r->addr_node, parent, new); ++ rb_insert_color(&r->addr_node, &heap->addr_tree); ++} ++ ++static int reserve_page_range(struct xnheap *heap, size_t size) ++{ ++ struct xnheap_range *new, *splitr; ++ ++ /* Find a suitable range of pages covering 'size'. */ ++ new = search_size_ge(&heap->size_tree, size); ++ if (new == NULL) ++ return -1; ++ ++ rb_erase(&new->size_node, &heap->size_tree); ++ if (new->size == size) { ++ rb_erase(&new->addr_node, &heap->addr_tree); ++ return addr_to_pagenr(heap, new); ++ } ++ ++ /* ++ * The free range fetched is larger than what we need: split ++ * it in two, the upper part is returned to the caller, the ++ * lower part is sent back to the free list, which makes ++ * reindexing by address pointless. ++ */ ++ splitr = new; ++ splitr->size -= size; ++ new = (struct xnheap_range *)((void *)new + splitr->size); ++ insert_range_bysize(heap, splitr); ++ ++ return addr_to_pagenr(heap, new); ++} ++ ++static void release_page_range(struct xnheap *heap, ++ void *page, size_t size) ++{ ++ struct xnheap_range *freed = page, *left, *right; ++ bool addr_linked = false; ++ ++ freed->size = size; ++ ++ left = search_left_mergeable(heap, freed); ++ if (left) { ++ rb_erase(&left->size_node, &heap->size_tree); ++ left->size += freed->size; ++ freed = left; ++ addr_linked = true; ++ } ++ ++ right = search_right_mergeable(heap, freed); ++ if (right) { ++ rb_erase(&right->size_node, &heap->size_tree); ++ freed->size += right->size; ++ if (addr_linked) ++ rb_erase(&right->addr_node, &heap->addr_tree); ++ else ++ rb_replace_node(&right->addr_node, &freed->addr_node, ++ &heap->addr_tree); ++ } else if (!addr_linked) ++ insert_range_byaddr(heap, freed); ++ ++ insert_range_bysize(heap, freed); ++ mark_pages(heap, addr_to_pagenr(heap, page), ++ size >> XNHEAP_PAGE_SHIFT, page_free); ++} ++ ++static void add_page_front(struct xnheap *heap, ++ int pg, int log2size) ++{ ++ struct xnheap_pgentry *new, *head, *next; ++ int ilog; ++ ++ /* Insert page at front of the per-bucket page list. */ ++ ++ ilog = log2size - XNHEAP_MIN_LOG2; ++ new = &heap->pagemap[pg]; ++ if (heap->buckets[ilog] == -1U) { ++ heap->buckets[ilog] = pg; ++ new->prev = new->next = pg; ++ } else { ++ head = &heap->pagemap[heap->buckets[ilog]]; ++ new->prev = heap->buckets[ilog]; ++ new->next = head->next; ++ next = &heap->pagemap[new->next]; ++ next->prev = pg; ++ head->next = pg; ++ heap->buckets[ilog] = pg; ++ } ++} ++ ++static void remove_page(struct xnheap *heap, ++ int pg, int log2size) ++{ ++ struct xnheap_pgentry *old, *prev, *next; ++ int ilog = log2size - XNHEAP_MIN_LOG2; ++ ++ /* Remove page from the per-bucket page list. */ ++ ++ old = &heap->pagemap[pg]; ++ if (pg == old->next) ++ heap->buckets[ilog] = -1U; ++ else { ++ if (pg == heap->buckets[ilog]) ++ heap->buckets[ilog] = old->next; ++ prev = &heap->pagemap[old->prev]; ++ prev->next = old->next; ++ next = &heap->pagemap[old->next]; ++ next->prev = old->prev; ++ } ++} ++ ++static void move_page_front(struct xnheap *heap, ++ int pg, int log2size) ++{ ++ int ilog = log2size - XNHEAP_MIN_LOG2; ++ ++ /* Move page at front of the per-bucket page list. */ ++ ++ if (heap->buckets[ilog] == pg) ++ return; /* Already at front, no move. */ ++ ++ remove_page(heap, pg, log2size); ++ add_page_front(heap, pg, log2size); ++} ++ ++static void move_page_back(struct xnheap *heap, ++ int pg, int log2size) ++{ ++ struct xnheap_pgentry *old, *last, *head, *next; ++ int ilog; ++ ++ /* Move page at end of the per-bucket page list. */ ++ ++ old = &heap->pagemap[pg]; ++ if (pg == old->next) /* Singleton, no move. */ ++ return; ++ ++ remove_page(heap, pg, log2size); ++ ++ ilog = log2size - XNHEAP_MIN_LOG2; ++ head = &heap->pagemap[heap->buckets[ilog]]; ++ last = &heap->pagemap[head->prev]; ++ old->prev = head->prev; ++ old->next = last->next; ++ next = &heap->pagemap[old->next]; ++ next->prev = pg; ++ last->next = pg; ++} ++ ++static void *add_free_range(struct xnheap *heap, ++ size_t bsize, int log2size) ++{ ++ int pg; ++ ++ pg = reserve_page_range(heap, ALIGN(bsize, XNHEAP_PAGE_SIZE)); ++ if (pg < 0) ++ return NULL; ++ ++ /* ++ * Update the page entry. If @log2size is non-zero ++ * (i.e. bsize < XNHEAP_PAGE_SIZE), bsize is (1 << log2Size) ++ * between 2^XNHEAP_MIN_LOG2 and 2^(XNHEAP_PAGE_SHIFT - 1). ++ * Save the log2 power into entry.type, then update the ++ * per-page allocation bitmap to reserve the first block. ++ * ++ * Otherwise, we have a larger block which may span multiple ++ * pages: set entry.type to page_list, indicating the start of ++ * the page range, and entry.bsize to the overall block size. ++ */ ++ if (log2size) { ++ heap->pagemap[pg].type = log2size; ++ /* ++ * Mark the first object slot (#0) as busy, along with ++ * the leftmost bits we won't use for this log2 size. ++ */ ++ heap->pagemap[pg].map = ~gen_block_mask(log2size) | 1; ++ /* ++ * Insert the new page at front of the per-bucket page ++ * list, enforcing the assumption that pages with free ++ * space live close to the head of this list. ++ */ ++ add_page_front(heap, pg, log2size); ++ } else { ++ heap->pagemap[pg].type = page_list; ++ heap->pagemap[pg].bsize = (u32)bsize; ++ mark_pages(heap, pg + 1, ++ (bsize >> XNHEAP_PAGE_SHIFT) - 1, page_cont); ++ } ++ ++ heap->used_size += bsize; ++ ++ return pagenr_to_addr(heap, pg); ++} ++ ++/** ++ * @fn void *xnheap_alloc(struct xnheap *heap, size_t size) ++ * @brief Allocate a memory block from a memory heap. ++ * ++ * Allocates a contiguous region of memory from an active memory heap. ++ * Such allocation is guaranteed to be time-bounded. ++ * ++ * @param heap The descriptor address of the heap to get memory from. ++ * ++ * @param size The size in bytes of the requested block. ++ * ++ * @return The address of the allocated region upon success, or NULL ++ * if no memory is available from the specified heap. ++ * ++ * @coretags{unrestricted} ++ */ ++void *xnheap_alloc(struct xnheap *heap, size_t size) ++{ ++ int log2size, ilog, pg, b = -1; ++ size_t bsize; ++ void *block; ++ spl_t s; ++ ++ if (size == 0) ++ return NULL; ++ ++ if (size < XNHEAP_MIN_ALIGN) { ++ bsize = size = XNHEAP_MIN_ALIGN; ++ log2size = XNHEAP_MIN_LOG2; ++ } else { ++ log2size = ilog2(size); ++ if (log2size < XNHEAP_PAGE_SHIFT) { ++ if (size & (size - 1)) ++ log2size++; ++ bsize = 1 << log2size; ++ } else ++ bsize = ALIGN(size, XNHEAP_PAGE_SIZE); ++ } ++ ++ /* ++ * Allocate entire pages directly from the pool whenever the ++ * block is larger or equal to XNHEAP_PAGE_SIZE. Otherwise, ++ * use bucketed memory. ++ * ++ * NOTE: Fully busy pages from bucketed memory are moved back ++ * at the end of the per-bucket page list, so that we may ++ * always assume that either the heading page has some room ++ * available, or no room is available from any page linked to ++ * this list, in which case we should immediately add a fresh ++ * page. ++ */ ++ xnlock_get_irqsave(&heap->lock, s); ++ ++ if (bsize >= XNHEAP_PAGE_SIZE) ++ /* Add a range of contiguous free pages. */ ++ block = add_free_range(heap, bsize, 0); ++ else { ++ ilog = log2size - XNHEAP_MIN_LOG2; ++ XENO_WARN_ON(MEMORY, ilog < 0 || ilog >= XNHEAP_MAX_BUCKETS); ++ pg = heap->buckets[ilog]; ++ /* ++ * Find a block in the heading page if any. If there ++ * is none, there won't be any down the list: add a ++ * new page right away. ++ */ ++ if (pg < 0 || heap->pagemap[pg].map == -1U) ++ block = add_free_range(heap, bsize, log2size); ++ else { ++ b = ffs(~heap->pagemap[pg].map) - 1; ++ /* ++ * Got one block from the heading per-bucket ++ * page, tag it as busy in the per-page ++ * allocation map. ++ */ ++ heap->pagemap[pg].map |= (1U << b); ++ heap->used_size += bsize; ++ block = heap->membase + ++ (pg << XNHEAP_PAGE_SHIFT) + ++ (b << log2size); ++ if (heap->pagemap[pg].map == -1U) ++ move_page_back(heap, pg, log2size); ++ } ++ } ++ ++ xnlock_put_irqrestore(&heap->lock, s); ++ ++ return block; ++} ++EXPORT_SYMBOL_GPL(xnheap_alloc); ++ ++/** ++ * @fn void xnheap_free(struct xnheap *heap, void *block) ++ * @brief Release a block to a memory heap. ++ * ++ * Releases a memory block to a heap. ++ * ++ * @param heap The heap descriptor. ++ * ++ * @param block The block to be returned to the heap. ++ * ++ * @coretags{unrestricted} ++ */ ++void xnheap_free(struct xnheap *heap, void *block) ++{ ++ unsigned long pgoff, boff; ++ int log2size, pg, n; ++ size_t bsize; ++ u32 oldmap; ++ spl_t s; ++ ++ xnlock_get_irqsave(&heap->lock, s); ++ ++ /* Compute the heading page number in the page map. */ ++ pgoff = block - heap->membase; ++ pg = pgoff >> XNHEAP_PAGE_SHIFT; ++ ++ if (!page_is_valid(heap, pg)) ++ goto bad; ++ ++ switch (heap->pagemap[pg].type) { ++ case page_list: ++ bsize = heap->pagemap[pg].bsize; ++ XENO_WARN_ON(MEMORY, (bsize & (XNHEAP_PAGE_SIZE - 1)) != 0); ++ release_page_range(heap, pagenr_to_addr(heap, pg), bsize); ++ break; ++ ++ default: ++ log2size = heap->pagemap[pg].type; ++ bsize = (1 << log2size); ++ XENO_WARN_ON(MEMORY, bsize >= XNHEAP_PAGE_SIZE); ++ boff = pgoff & ~XNHEAP_PAGE_MASK; ++ if ((boff & (bsize - 1)) != 0) /* Not at block start? */ ++ goto bad; ++ ++ n = boff >> log2size; /* Block position in page. */ ++ oldmap = heap->pagemap[pg].map; ++ heap->pagemap[pg].map &= ~(1U << n); ++ ++ /* ++ * If the page the block was sitting on is fully idle, ++ * return it to the pool. Otherwise, check whether ++ * that page is transitioning from fully busy to ++ * partially busy state, in which case it should move ++ * toward the front of the per-bucket page list. ++ */ ++ if (heap->pagemap[pg].map == ~gen_block_mask(log2size)) { ++ remove_page(heap, pg, log2size); ++ release_page_range(heap, pagenr_to_addr(heap, pg), ++ XNHEAP_PAGE_SIZE); ++ } else if (oldmap == -1U) ++ move_page_front(heap, pg, log2size); ++ } ++ ++ heap->used_size -= bsize; ++ ++ xnlock_put_irqrestore(&heap->lock, s); ++ ++ return; ++bad: ++ xnlock_put_irqrestore(&heap->lock, s); ++ ++ XENO_WARN(MEMORY, 1, "invalid block %p in heap %s", ++ block, heap->name); ++} ++EXPORT_SYMBOL_GPL(xnheap_free); ++ ++ssize_t xnheap_check_block(struct xnheap *heap, void *block) ++{ ++ unsigned long pg, pgoff, boff; ++ ssize_t ret = -EINVAL; ++ size_t bsize; ++ spl_t s; ++ ++ xnlock_get_irqsave(&heap->lock, s); ++ ++ /* Calculate the page number from the block address. */ ++ pgoff = block - heap->membase; ++ pg = pgoff >> XNHEAP_PAGE_SHIFT; ++ if (page_is_valid(heap, pg)) { ++ if (heap->pagemap[pg].type == page_list) ++ bsize = heap->pagemap[pg].bsize; ++ else { ++ bsize = (1 << heap->pagemap[pg].type); ++ boff = pgoff & ~XNHEAP_PAGE_MASK; ++ if ((boff & (bsize - 1)) != 0) /* Not at block start? */ ++ goto out; ++ } ++ ret = (ssize_t)bsize; ++ } ++out: ++ xnlock_put_irqrestore(&heap->lock, s); ++ ++ return ret; ++} ++EXPORT_SYMBOL_GPL(xnheap_check_block); ++ ++/** ++ * @fn xnheap_init(struct xnheap *heap, void *membase, u32 size) ++ * @brief Initialize a memory heap. ++ * ++ * Initializes a memory heap suitable for time-bounded allocation ++ * requests of dynamic memory. ++ * ++ * @param heap The address of a heap descriptor to initialize. ++ * ++ * @param membase The address of the storage area. ++ * ++ * @param size The size in bytes of the storage area. @a size must be ++ * a multiple of XNHEAP_PAGE_SIZE and smaller than (4Gb - PAGE_SIZE) ++ * in the current implementation. ++ * ++ * @return 0 is returned upon success, or: ++ * ++ * - -EINVAL is returned if @a size is either greater than ++ * XNHEAP_MAX_HEAPSZ, or not aligned on PAGE_SIZE. ++ * ++ * - -ENOMEM is returned upon failure of allocating the meta-data area ++ * used internally to maintain the heap. ++ * ++ * @coretags{secondary-only} ++ */ ++int xnheap_init(struct xnheap *heap, void *membase, size_t size) ++{ ++ int n, nrpages; ++ spl_t s; ++ ++ secondary_mode_only(); ++ ++ if (size > XNHEAP_MAX_HEAPSZ || !PAGE_ALIGNED(size)) ++ return -EINVAL; ++ ++ /* Reset bucket page lists, all empty. */ ++ for (n = 0; n < XNHEAP_MAX_BUCKETS; n++) ++ heap->buckets[n] = -1U; ++ ++ xnlock_init(&heap->lock); ++ ++ nrpages = size >> XNHEAP_PAGE_SHIFT; ++ heap->pagemap = kzalloc(sizeof(struct xnheap_pgentry) * nrpages, ++ GFP_KERNEL); ++ if (heap->pagemap == NULL) ++ return -ENOMEM; ++ ++ heap->membase = membase; ++ heap->usable_size = size; ++ heap->used_size = 0; ++ ++ /* ++ * The free page pool is maintained as a set of ranges of ++ * contiguous pages indexed by address and size in rbtrees. ++ * Initially, we have a single range in those trees covering ++ * the whole memory we have been given for the heap. Over ++ * time, that range will be split then possibly re-merged back ++ * as allocations and deallocations take place. ++ */ ++ heap->size_tree = RB_ROOT; ++ heap->addr_tree = RB_ROOT; ++ release_page_range(heap, membase, size); ++ ++ /* Default name, override with xnheap_set_name() */ ++ ksformat(heap->name, sizeof(heap->name), "(%p)", heap); ++ ++ xnlock_get_irqsave(&nklock, s); ++ list_add_tail(&heap->next, &heapq); ++ nrheaps++; ++ xnvfile_touch_tag(&vfile_tag); ++ xnlock_put_irqrestore(&nklock, s); ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(xnheap_init); ++ ++/** ++ * @fn void xnheap_destroy(struct xnheap *heap) ++ * @brief Destroys a memory heap. ++ * ++ * Destroys a memory heap. ++ * ++ * @param heap The heap descriptor. ++ * ++ * @coretags{secondary-only} ++ */ ++void xnheap_destroy(struct xnheap *heap) ++{ ++ spl_t s; ++ ++ secondary_mode_only(); ++ ++ xnlock_get_irqsave(&nklock, s); ++ list_del(&heap->next); ++ nrheaps--; ++ xnvfile_touch_tag(&vfile_tag); ++ xnlock_put_irqrestore(&nklock, s); ++ kfree(heap->pagemap); ++} ++EXPORT_SYMBOL_GPL(xnheap_destroy); ++ ++/** ++ * @fn xnheap_set_name(struct xnheap *heap,const char *name,...) ++ * @brief Set the heap's name string. ++ * ++ * Set the heap name that will be used in statistic outputs. ++ * ++ * @param heap The address of a heap descriptor. ++ * ++ * @param name Name displayed in statistic outputs. This parameter can ++ * be a printk()-like format argument list. ++ * ++ * @coretags{task-unrestricted} ++ */ ++void xnheap_set_name(struct xnheap *heap, const char *name, ...) ++{ ++ va_list args; ++ ++ va_start(args, name); ++ kvsformat(heap->name, sizeof(heap->name), name, args); ++ va_end(args); ++} ++EXPORT_SYMBOL_GPL(xnheap_set_name); ++ ++void *xnheap_vmalloc(size_t size) ++{ ++ /* ++ * We want memory used in real-time context to be pulled from ++ * ZONE_NORMAL, however we don't need it to be physically ++ * contiguous. ++ * ++ * 32bit systems which would need HIGHMEM for running a Cobalt ++ * configuration would also be required to support PTE ++ * pinning, which not all architectures provide. Moreover, ++ * pinning PTEs eagerly for a potentially (very) large amount ++ * of memory may quickly degrade performance. ++ * ++ * If using a different kernel/user memory split cannot be the ++ * answer for those configs, it's likely that basing such ++ * software on a 32bit system had to be wrong in the first ++ * place anyway. ++ */ ++ return __vmalloc(size, GFP_KERNEL, PAGE_KERNEL); ++} ++EXPORT_SYMBOL_GPL(xnheap_vmalloc); ++ ++void xnheap_vfree(void *p) ++{ ++ vfree(p); ++} ++EXPORT_SYMBOL_GPL(xnheap_vfree); ++ ++/** @} */ +--- linux/kernel/xenomai/debug.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/debug.c 2021-04-07 16:01:25.749636278 +0800 +@@ -0,0 +1,659 @@ ++/* ++ * Copyright (C) 2010 Philippe Gerum . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "posix/process.h" ++#include "debug.h" ++ ++/** ++ * @ingroup cobalt_core ++ * @defgroup cobalt_core_debug Debugging services ++ * @{ ++ */ ++struct xnvfile_directory cobalt_debug_vfroot; ++EXPORT_SYMBOL_GPL(cobalt_debug_vfroot); ++ ++#ifdef CONFIG_XENO_OPT_DEBUG_TRACE_RELAX ++ ++#define SYMBOL_HSLOTS (1 << 8) ++ ++struct hashed_symbol { ++ struct hashed_symbol *next; ++ char symbol[0]; ++}; ++ ++static struct hashed_symbol *symbol_jhash[SYMBOL_HSLOTS]; ++ ++static struct xnheap memory_pool; ++ ++/* ++ * This is a permanent storage for ASCII strings which comes handy to ++ * get a unique and constant reference to a symbol while preserving ++ * storage space. Hashed symbols have infinite lifetime and are never ++ * flushed. ++ */ ++DEFINE_PRIVATE_XNLOCK(symbol_lock); ++ ++static const char *hash_symbol(const char *symbol) ++{ ++ struct hashed_symbol *p, **h; ++ const char *str; ++ size_t len; ++ u32 hash; ++ spl_t s; ++ ++ len = strlen(symbol); ++ hash = jhash(symbol, len, 0); ++ ++ xnlock_get_irqsave(&symbol_lock, s); ++ ++ h = &symbol_jhash[hash & (SYMBOL_HSLOTS - 1)]; ++ p = *h; ++ while (p && ++ (*p->symbol != *symbol || ++ strcmp(p->symbol + 1, symbol + 1))) ++ p = p->next; ++ ++ if (p) ++ goto done; ++ ++ p = xnheap_alloc(&memory_pool, sizeof(*p) + len + 1); ++ if (p == NULL) { ++ str = NULL; ++ goto out; ++ } ++ ++ strcpy(p->symbol, symbol); ++ p->next = *h; ++ *h = p; ++done: ++ str = p->symbol; ++out: ++ xnlock_put_irqrestore(&symbol_lock, s); ++ ++ return str; ++} ++ ++/* ++ * We define a static limit (RELAX_SPOTNR) for spot records to limit ++ * the memory consumption (we pull record memory from the system ++ * heap). The current value should be reasonable enough unless the ++ * application is extremely unsane, given that we only keep unique ++ * spots. Said differently, if the application has more than ++ * RELAX_SPOTNR distinct code locations doing spurious relaxes, then ++ * the first issue to address is likely PEBKAC. ++ */ ++#define RELAX_SPOTNR 128 ++#define RELAX_HSLOTS (1 << 8) ++ ++struct relax_record { ++ /* Number of hits for this location */ ++ u32 hits; ++ struct relax_spot { ++ /* Faulty thread name. */ ++ char thread[XNOBJECT_NAME_LEN]; ++ /* call stack the relax originates from. */ ++ int depth; ++ struct backtrace { ++ unsigned long pc; ++ const char *mapname; ++ } backtrace[SIGSHADOW_BACKTRACE_DEPTH]; ++ /* Program hash value of the caller. */ ++ u32 proghash; ++ /* Pid of the caller. */ ++ pid_t pid; ++ /* Reason for relaxing. */ ++ int reason; ++ } spot; ++ struct relax_record *r_next; ++ struct relax_record *h_next; ++ const char *exe_path; ++}; ++ ++static struct relax_record *relax_jhash[RELAX_HSLOTS]; ++ ++static struct relax_record *relax_record_list; ++ ++static int relax_overall, relax_queued; ++ ++DEFINE_PRIVATE_XNLOCK(relax_lock); ++ ++/* ++ * The motivation to centralize tracing information about relaxes ++ * directly into kernel space is fourfold: ++ * ++ * - this allows to gather all the trace data into a single location ++ * and keep it safe there, with no external log file involved. ++ * ++ * - enabling the tracing does not impose any requirement on the ++ * application (aside of being compiled with debug symbols for best ++ * interpreting that information). We only need a kernel config switch ++ * for this (i.e. CONFIG_XENO_OPT_DEBUG_TRACE_RELAX). ++ * ++ * - the data is collected and can be made available exactly the same ++ * way regardless of the application emitting the relax requests, or ++ * whether it is still alive when the trace data are displayed. ++ * ++ * - the kernel is able to provide accurate and detailed trace ++ * information, such as the relative offset of instructions causing ++ * relax requests within dynamic shared objects, without having to ++ * guess it roughly from /proc/pid/maps, or relying on ldd's ++ * --function-relocs feature, which both require to run on the target ++ * system to get the needed information. Instead, we allow a build ++ * host to use a cross-compilation toolchain later to extract the ++ * source location, from the raw data the kernel has provided on the ++ * target system. ++ * ++ * However, collecting the call frames within the application to ++ * determine the full context of a relax spot is not something we can ++ * do purely from kernel space, notably because it depends on build ++ * options we just don't know about (e.g. frame pointers availability ++ * for the app, or other nitty-gritty details depending on the ++ * toolchain). To solve this, we ask the application to send us a ++ * complete backtrace taken from the context of a specific signal ++ * handler, which we know is stacked over the relax spot. That ++ * information is then stored by the kernel after some ++ * post-processing, along with other data identifying the caller, and ++ * made available through the /proc/xenomai/debug/relax vfile. ++ * ++ * Implementation-wise, xndebug_notify_relax and xndebug_trace_relax ++ * routines are paired: first, xndebug_notify_relax sends a SIGSHADOW ++ * request to userland when a relax spot is detected from ++ * xnthread_relax, which should then trigger a call back to ++ * xndebug_trace_relax with the complete backtrace information, as ++ * seen from userland (via the internal sc_cobalt_backtrace ++ * syscall). All this runs on behalf of the relaxing thread, so we can ++ * make a number of convenient assumptions (such as being able to scan ++ * the current vma list to get detailed information about the ++ * executable mappings that could be involved). ++ */ ++ ++void xndebug_notify_relax(struct xnthread *thread, int reason) ++{ ++ xnthread_signal(thread, SIGSHADOW, ++ sigshadow_int(SIGSHADOW_ACTION_BACKTRACE, reason)); ++} ++ ++void xndebug_trace_relax(int nr, unsigned long *backtrace, ++ int reason) ++{ ++ struct relax_record *p, **h; ++ struct vm_area_struct *vma; ++ struct xnthread *thread; ++ struct relax_spot spot; ++ struct mm_struct *mm; ++ struct file *file; ++ unsigned long pc; ++ char *mapname; ++ int n, depth; ++ char *tmp; ++ u32 hash; ++ spl_t s; ++ ++ thread = xnthread_current(); ++ if (thread == NULL) ++ return; /* Can't be, right? What a mess. */ ++ ++ /* ++ * We compute PC values relative to the base of the shared ++ * executable mappings we find in the backtrace, which makes ++ * it possible for the slackspot utility to match the ++ * corresponding source code locations from unrelocated file ++ * offsets. ++ */ ++ ++ tmp = (char *)__get_free_page(GFP_KERNEL); ++ if (tmp == NULL) ++ /* ++ * The situation looks really bad, but we can't do ++ * anything about it. Just bail out. ++ */ ++ return; ++ ++ memset(&spot, 0, sizeof(spot)); ++ mm = get_task_mm(current); ++ down_read(&mm->mmap_sem); ++ ++ for (n = 0, depth = 0; n < nr; n++) { ++ pc = backtrace[n]; ++ ++ vma = find_vma(mm, pc); ++ if (vma == NULL) ++ continue; ++ ++ /* ++ * Hack. Unlike DSOs, executables and interpreters ++ * (e.g. dynamic linkers) are protected against write ++ * attempts. Use this to determine when $pc should be ++ * fixed up by subtracting the mapping base address in ++ * the DSO case. ++ */ ++ if (!(vma->vm_flags & VM_DENYWRITE)) ++ pc -= vma->vm_start; ++ ++ spot.backtrace[depth].pc = pc; ++ ++ /* ++ * Even in case we can't fetch the map name, we still ++ * record the PC value, which may still give some hint ++ * downstream. ++ */ ++ file = vma->vm_file; ++ if (file == NULL) ++ goto next_frame; ++ ++ mapname = d_path(&file->f_path, tmp, PAGE_SIZE); ++ if (IS_ERR(mapname)) ++ goto next_frame; ++ ++ spot.backtrace[depth].mapname = hash_symbol(mapname); ++ next_frame: ++ depth++; ++ } ++ ++ up_read(&mm->mmap_sem); ++ mmput(mm); ++ free_page((unsigned long)tmp); ++ ++ /* ++ * Most of the time we will be sent duplicates, since the odds ++ * of seeing the same thread running the same code doing the ++ * same mistake all over again are high. So we probe the hash ++ * table for an identical spot first, before going for a ++ * complete record allocation from the system heap if no match ++ * was found. Otherwise, we just take the fast exit path. ++ */ ++ spot.depth = depth; ++ spot.proghash = thread->proghash; ++ spot.pid = xnthread_host_pid(thread); ++ spot.reason = reason; ++ strcpy(spot.thread, thread->name); ++ hash = jhash2((u32 *)&spot, sizeof(spot) / sizeof(u32), 0); ++ ++ xnlock_get_irqsave(&relax_lock, s); ++ ++ h = &relax_jhash[hash & (RELAX_HSLOTS - 1)]; ++ p = *h; ++ while (p && ++ /* Try quick guesses first, then memcmp */ ++ (p->spot.depth != spot.depth || ++ p->spot.pid != spot.pid || ++ memcmp(&p->spot, &spot, sizeof(spot)))) ++ p = p->h_next; ++ ++ if (p) { ++ p->hits++; ++ goto out; /* Spot already recorded. */ ++ } ++ ++ if (relax_queued >= RELAX_SPOTNR) ++ goto out; /* No more space -- ignore. */ ++ /* ++ * We can only compete with other shadows which have just ++ * switched to secondary mode like us. So holding the ++ * relax_lock a bit more without disabling interrupts is not ++ * an issue. This allows us to postpone the record memory ++ * allocation while probing and updating the hash table in a ++ * single move. ++ */ ++ p = xnheap_alloc(&memory_pool, sizeof(*p)); ++ if (p == NULL) ++ goto out; /* Something is about to go wrong... */ ++ ++ memcpy(&p->spot, &spot, sizeof(p->spot)); ++ p->exe_path = hash_symbol(thread->exe_path); ++ p->hits = 1; ++ p->h_next = *h; ++ *h = p; ++ p->r_next = relax_record_list; ++ relax_record_list = p; ++ relax_queued++; ++out: ++ relax_overall++; ++ ++ xnlock_put_irqrestore(&relax_lock, s); ++} ++ ++static DEFINE_VFILE_HOSTLOCK(relax_mutex); ++ ++struct relax_vfile_priv { ++ int queued; ++ int overall; ++ int ncurr; ++ struct relax_record *head; ++ struct relax_record *curr; ++}; ++ ++static void *relax_vfile_begin(struct xnvfile_regular_iterator *it) ++{ ++ struct relax_vfile_priv *priv = xnvfile_iterator_priv(it); ++ struct relax_record *p; ++ spl_t s; ++ int n; ++ ++ /* ++ * Snapshot the counters under lock, to make sure they remain ++ * mutually consistent despite we dump the record list in a ++ * lock-less manner. Additionally, the vfile layer already ++ * holds the relax_mutex lock for us, so that we can't race ++ * with ->store(). ++ */ ++ xnlock_get_irqsave(&relax_lock, s); ++ ++ if (relax_queued == 0 || it->pos > relax_queued) { ++ xnlock_put_irqrestore(&relax_lock, s); ++ return NULL; ++ } ++ priv->overall = relax_overall; ++ priv->queued = relax_queued; ++ priv->head = relax_record_list; ++ ++ xnlock_put_irqrestore(&relax_lock, s); ++ ++ if (it->pos == 0) { ++ priv->curr = NULL; ++ priv->ncurr = -1; ++ return VFILE_SEQ_START; ++ } ++ ++ for (n = 1, p = priv->head; n < it->pos; n++) ++ p = p->r_next; ++ ++ priv->curr = p; ++ priv->ncurr = n; ++ ++ return p; ++} ++ ++static void *relax_vfile_next(struct xnvfile_regular_iterator *it) ++{ ++ struct relax_vfile_priv *priv = xnvfile_iterator_priv(it); ++ struct relax_record *p; ++ int n; ++ ++ if (it->pos > priv->queued) ++ return NULL; ++ ++ if (it->pos == priv->ncurr + 1) ++ p = priv->curr->r_next; ++ else { ++ for (n = 1, p = priv->head; n < it->pos; n++) ++ p = p->r_next; ++ } ++ ++ priv->curr = p; ++ priv->ncurr = it->pos; ++ ++ return p; ++} ++ ++static const char *reason_str[] = { ++ [SIGDEBUG_UNDEFINED] = "undefined", ++ [SIGDEBUG_MIGRATE_SIGNAL] = "signal", ++ [SIGDEBUG_MIGRATE_SYSCALL] = "syscall", ++ [SIGDEBUG_MIGRATE_FAULT] = "fault", ++ [SIGDEBUG_MIGRATE_PRIOINV] = "pi-error", ++ [SIGDEBUG_NOMLOCK] = "mlock-check", ++ [SIGDEBUG_WATCHDOG] = "runaway-break", ++ [SIGDEBUG_RESCNT_IMBALANCE] = "resource-count-imbalance", ++ [SIGDEBUG_MUTEX_SLEEP] = "sleep-holding-mutex", ++ [SIGDEBUG_LOCK_BREAK] = "scheduler-lock-break", ++}; ++ ++static int relax_vfile_show(struct xnvfile_regular_iterator *it, void *data) ++{ ++ struct relax_vfile_priv *priv = xnvfile_iterator_priv(it); ++ struct relax_record *p = data; ++ int n; ++ ++ /* ++ * No need to grab any lock to read a record from a previously ++ * validated index: the data must be there and won't be ++ * touched anymore. ++ */ ++ if (p == NULL) { ++ xnvfile_printf(it, "%d\n", priv->overall); ++ return 0; ++ } ++ ++ xnvfile_printf(it, "%s\n", p->exe_path ?: "?"); ++ xnvfile_printf(it, "%d %d %s %s\n", p->spot.pid, p->hits, ++ reason_str[p->spot.reason], p->spot.thread); ++ ++ for (n = 0; n < p->spot.depth; n++) ++ xnvfile_printf(it, "0x%lx %s\n", ++ p->spot.backtrace[n].pc, ++ p->spot.backtrace[n].mapname ?: "?"); ++ ++ xnvfile_printf(it, ".\n"); ++ ++ return 0; ++} ++ ++static ssize_t relax_vfile_store(struct xnvfile_input *input) ++{ ++ struct relax_record *p, *np; ++ spl_t s; ++ ++ /* ++ * Flush out all records. Races with ->show() are prevented ++ * using the relax_mutex lock. The vfile layer takes care of ++ * this internally. ++ */ ++ xnlock_get_irqsave(&relax_lock, s); ++ p = relax_record_list; ++ relax_record_list = NULL; ++ relax_overall = 0; ++ relax_queued = 0; ++ memset(relax_jhash, 0, sizeof(relax_jhash)); ++ xnlock_put_irqrestore(&relax_lock, s); ++ ++ while (p) { ++ np = p->r_next; ++ xnheap_free(&memory_pool, p); ++ p = np; ++ } ++ ++ return input->size; ++} ++ ++static struct xnvfile_regular_ops relax_vfile_ops = { ++ .begin = relax_vfile_begin, ++ .next = relax_vfile_next, ++ .show = relax_vfile_show, ++ .store = relax_vfile_store, ++}; ++ ++static struct xnvfile_regular relax_vfile = { ++ .privsz = sizeof(struct relax_vfile_priv), ++ .ops = &relax_vfile_ops, ++ .entry = { .lockops = &relax_mutex.ops }, ++}; ++ ++static inline int init_trace_relax(void) ++{ ++ u32 size = CONFIG_XENO_OPT_DEBUG_TRACE_LOGSZ * 1024; ++ void *p; ++ int ret; ++ ++ p = vmalloc(size); ++ if (p == NULL) ++ return -ENOMEM; ++ ++ ret = xnheap_init(&memory_pool, p, size); ++ if (ret) ++ return ret; ++ ++ xnheap_set_name(&memory_pool, "debug log"); ++ ++ ret = xnvfile_init_regular("relax", &relax_vfile, &cobalt_debug_vfroot); ++ if (ret) { ++ xnheap_destroy(&memory_pool); ++ vfree(p); ++ } ++ ++ return ret; ++} ++ ++static inline void cleanup_trace_relax(void) ++{ ++ void *p; ++ ++ xnvfile_destroy_regular(&relax_vfile); ++ p = xnheap_get_membase(&memory_pool); ++ xnheap_destroy(&memory_pool); ++ vfree(p); ++} ++ ++#else /* !CONFIG_XENO_OPT_DEBUG_TRACE_RELAX */ ++ ++static inline int init_trace_relax(void) ++{ ++ return 0; ++} ++ ++static inline void cleanup_trace_relax(void) ++{ ++} ++ ++static inline void init_thread_relax_trace(struct xnthread *thread) ++{ ++} ++ ++#endif /* !XENO_OPT_DEBUG_TRACE_RELAX */ ++ ++#ifdef CONFIG_XENO_OPT_DEBUG_LOCKING ++ ++void xnlock_dbg_prepare_acquire(unsigned long long *start) ++{ ++ *start = xnclock_read_raw(&nkclock); ++} ++EXPORT_SYMBOL_GPL(xnlock_dbg_prepare_acquire); ++ ++void xnlock_dbg_acquired(struct xnlock *lock, int cpu, unsigned long long *start, ++ const char *file, int line, const char *function) ++{ ++ lock->lock_date = *start; ++ lock->spin_time = xnclock_read_raw(&nkclock) - *start; ++ lock->file = file; ++ lock->function = function; ++ lock->line = line; ++ lock->cpu = cpu; ++} ++EXPORT_SYMBOL_GPL(xnlock_dbg_acquired); ++ ++int xnlock_dbg_release(struct xnlock *lock, ++ const char *file, int line, const char *function) ++{ ++ unsigned long long lock_time; ++ struct xnlockinfo *stats; ++ int cpu; ++ ++ lock_time = xnclock_read_raw(&nkclock) - lock->lock_date; ++ cpu = ipipe_processor_id(); ++ stats = &per_cpu(xnlock_stats, cpu); ++ ++ if (lock->file == NULL) { ++ lock->file = "??"; ++ lock->line = 0; ++ lock->function = "invalid"; ++ } ++ ++ if (unlikely(lock->owner != cpu)) { ++ ipipe_prepare_panic(); ++ printk(XENO_ERR "lock %p already unlocked on CPU #%d\n" ++ " last owner = %s:%u (%s(), CPU #%d)\n", ++ lock, cpu, lock->file, lock->line, lock->function, ++ lock->cpu); ++ show_stack(NULL,NULL); ++ return 1; ++ } ++ ++ /* File that we released it. */ ++ lock->cpu = -lock->cpu; ++ lock->file = file; ++ lock->line = line; ++ lock->function = function; ++ ++ if (lock_time > stats->lock_time) { ++ stats->lock_time = lock_time; ++ stats->spin_time = lock->spin_time; ++ stats->file = lock->file; ++ stats->function = lock->function; ++ stats->line = lock->line; ++ } ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(xnlock_dbg_release); ++ ++#endif /* CONFIG_XENO_OPT_DEBUG_LOCKING */ ++ ++void xndebug_shadow_init(struct xnthread *thread) ++{ ++ struct cobalt_ppd *sys_ppd; ++ size_t len; ++ ++ sys_ppd = cobalt_ppd_get(0); ++ /* ++ * The caller is current, so we know for sure that sys_ppd ++ * will still be valid after we dropped the lock. ++ * ++ * NOTE: Kernel shadows all share the system global ppd ++ * descriptor with no refcounting. ++ */ ++ thread->exe_path = sys_ppd->exe_path ?: "(unknown)"; ++ /* ++ * The program hash value is a unique token debug features may ++ * use to identify all threads which belong to a given ++ * executable file. Using this value for quick probes is often ++ * handier and more efficient than testing the whole exe_path. ++ */ ++ len = strlen(thread->exe_path); ++ thread->proghash = jhash(thread->exe_path, len, 0); ++} ++ ++int xndebug_init(void) ++{ ++ int ret; ++ ++ ret = init_trace_relax(); ++ if (ret) ++ return ret; ++ ++ return 0; ++} ++ ++void xndebug_cleanup(void) ++{ ++ cleanup_trace_relax(); ++} ++ ++/** @} */ +--- linux/kernel/xenomai/procfs.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/procfs.c 2021-04-07 16:01:25.744636285 +0800 +@@ -0,0 +1,262 @@ ++/* ++ * Copyright (C) 2001-2013 Philippe Gerum . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "debug.h" ++ ++#ifdef CONFIG_XENO_OPT_DEBUG_LOCKING ++ ++static int lock_vfile_show(struct xnvfile_regular_iterator *it, void *data) ++{ ++ struct xnlockinfo lockinfo; ++ spl_t s; ++ int cpu; ++ ++ for_each_realtime_cpu(cpu) { ++ xnlock_get_irqsave(&nklock, s); ++ lockinfo = per_cpu(xnlock_stats, cpu); ++ xnlock_put_irqrestore(&nklock, s); ++ ++ if (cpu > 0) ++ xnvfile_printf(it, "\n"); ++ ++ xnvfile_printf(it, "CPU%d:\n", cpu); ++ ++ xnvfile_printf(it, ++ " longest locked section: %llu ns\n" ++ " spinning time: %llu ns\n" ++ " section entry: %s:%d (%s)\n", ++ xnclock_ticks_to_ns(&nkclock, lockinfo.lock_time), ++ xnclock_ticks_to_ns(&nkclock, lockinfo.spin_time), ++ lockinfo.file, lockinfo.line, lockinfo.function); ++ } ++ ++ return 0; ++} ++ ++static ssize_t lock_vfile_store(struct xnvfile_input *input) ++{ ++ ssize_t ret; ++ spl_t s; ++ int cpu; ++ ++ long val; ++ ++ ret = xnvfile_get_integer(input, &val); ++ if (ret < 0) ++ return ret; ++ ++ if (val != 0) ++ return -EINVAL; ++ ++ for_each_realtime_cpu(cpu) { ++ xnlock_get_irqsave(&nklock, s); ++ memset(&per_cpu(xnlock_stats, cpu), '\0', sizeof(struct xnlockinfo)); ++ xnlock_put_irqrestore(&nklock, s); ++ } ++ ++ return ret; ++} ++ ++static struct xnvfile_regular_ops lock_vfile_ops = { ++ .show = lock_vfile_show, ++ .store = lock_vfile_store, ++}; ++ ++static struct xnvfile_regular lock_vfile = { ++ .ops = &lock_vfile_ops, ++}; ++ ++#endif /* CONFIG_XENO_OPT_DEBUG_LOCKING */ ++ ++static int latency_vfile_show(struct xnvfile_regular_iterator *it, void *data) ++{ ++ xnvfile_printf(it, "%Lu\n", ++ xnclock_ticks_to_ns(&nkclock, nkclock.gravity.user)); ++ ++ return 0; ++} ++ ++static ssize_t latency_vfile_store(struct xnvfile_input *input) ++{ ++ ssize_t ret; ++ long val; ++ ++ ret = xnvfile_get_integer(input, &val); ++ if (ret < 0) ++ return ret; ++ ++ nkclock.gravity.user = xnclock_ns_to_ticks(&nkclock, val); ++ ++ return ret; ++} ++ ++static struct xnvfile_regular_ops latency_vfile_ops = { ++ .show = latency_vfile_show, ++ .store = latency_vfile_store, ++}; ++ ++static struct xnvfile_regular latency_vfile = { ++ .ops = &latency_vfile_ops, ++}; ++ ++static int version_vfile_show(struct xnvfile_regular_iterator *it, void *data) ++{ ++ xnvfile_printf(it, "%s\n", XENO_VERSION_STRING); ++ ++ return 0; ++} ++ ++static struct xnvfile_regular_ops version_vfile_ops = { ++ .show = version_vfile_show, ++}; ++ ++static struct xnvfile_regular version_vfile = { ++ .ops = &version_vfile_ops, ++}; ++ ++static int faults_vfile_show(struct xnvfile_regular_iterator *it, void *data) ++{ ++ int cpu, trap; ++ ++ xnvfile_puts(it, "TRAP "); ++ ++ for_each_realtime_cpu(cpu) ++ xnvfile_printf(it, " CPU%d", cpu); ++ ++ for (trap = 0; cobalt_machine.fault_labels[trap]; trap++) { ++ if (*cobalt_machine.fault_labels[trap] == '\0') ++ continue; ++ ++ xnvfile_printf(it, "\n%3d: ", trap); ++ ++ for_each_realtime_cpu(cpu) ++ xnvfile_printf(it, "%12u", ++ per_cpu(cobalt_machine_cpudata, cpu).faults[trap]); ++ ++ xnvfile_printf(it, " (%s)", ++ cobalt_machine.fault_labels[trap]); ++ } ++ ++ xnvfile_putc(it, '\n'); ++ ++ return 0; ++} ++ ++static struct xnvfile_regular_ops faults_vfile_ops = { ++ .show = faults_vfile_show, ++}; ++ ++static struct xnvfile_regular faults_vfile = { ++ .ops = &faults_vfile_ops, ++}; ++ ++static int apc_vfile_show(struct xnvfile_regular_iterator *it, void *data) ++{ ++ int cpu, apc; ++ ++ /* We assume the entire output fits in a single page. */ ++ ++ xnvfile_puts(it, "APC "); ++ ++ for_each_realtime_cpu(cpu) ++ xnvfile_printf(it, " CPU%d", cpu); ++ ++ for (apc = 0; apc < BITS_PER_LONG; apc++) { ++ if (!test_bit(apc, &cobalt_pipeline.apc_map)) ++ continue; /* Not hooked. */ ++ ++ xnvfile_printf(it, "\n%3d: ", apc); ++ ++ for_each_realtime_cpu(cpu) ++ xnvfile_printf(it, "%12lu", ++ per_cpu(cobalt_machine_cpudata, cpu).apc_shots[apc]); ++ ++ if (cobalt_pipeline.apc_table[apc].name) ++ xnvfile_printf(it, " (%s)", ++ cobalt_pipeline.apc_table[apc].name); ++ } ++ ++ xnvfile_putc(it, '\n'); ++ ++ return 0; ++} ++ ++static struct xnvfile_regular_ops apc_vfile_ops = { ++ .show = apc_vfile_show, ++}; ++ ++static struct xnvfile_regular apc_vfile = { ++ .ops = &apc_vfile_ops, ++}; ++ ++void xnprocfs_cleanup_tree(void) ++{ ++#ifdef CONFIG_XENO_OPT_DEBUG ++#ifdef CONFIG_XENO_OPT_DEBUG_LOCKING ++ xnvfile_destroy_regular(&lock_vfile); ++#endif ++ xnvfile_destroy_dir(&cobalt_debug_vfroot); ++#endif /* XENO_OPT_DEBUG */ ++ xnvfile_destroy_regular(&apc_vfile); ++ xnvfile_destroy_regular(&faults_vfile); ++ xnvfile_destroy_regular(&version_vfile); ++ xnvfile_destroy_regular(&latency_vfile); ++ xnintr_cleanup_proc(); ++ xnheap_cleanup_proc(); ++ xnclock_cleanup_proc(); ++ xnsched_cleanup_proc(); ++ xnvfile_destroy_root(); ++} ++ ++int __init xnprocfs_init_tree(void) ++{ ++ int ret; ++ ++ ret = xnvfile_init_root(); ++ if (ret) ++ return ret; ++ ++ ret = xnsched_init_proc(); ++ if (ret) ++ return ret; ++ ++ xnclock_init_proc(); ++ xnheap_init_proc(); ++ xnintr_init_proc(); ++ xnvfile_init_regular("latency", &latency_vfile, &cobalt_vfroot); ++ xnvfile_init_regular("version", &version_vfile, &cobalt_vfroot); ++ xnvfile_init_regular("faults", &faults_vfile, &cobalt_vfroot); ++ xnvfile_init_regular("apc", &apc_vfile, &cobalt_vfroot); ++#ifdef CONFIG_XENO_OPT_DEBUG ++ xnvfile_init_dir("debug", &cobalt_debug_vfroot, &cobalt_vfroot); ++#ifdef CONFIG_XENO_OPT_DEBUG_LOCKING ++ xnvfile_init_regular("lock", &lock_vfile, &cobalt_debug_vfroot); ++#endif ++#endif ++ ++ return 0; ++} +--- linux/kernel/xenomai/lock.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/kernel/xenomai/lock.c 2021-04-07 16:01:25.739636292 +0800 +@@ -0,0 +1,65 @@ ++/* ++ * Copyright (C) 2001-2012 Philippe Gerum . ++ * Copyright (C) 2004,2005 Gilles Chanteperdrix . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#include ++#include ++ ++/** ++ * @ingroup cobalt_core ++ * @defgroup cobalt_core_lock Locking services ++ * ++ * The Xenomai core deals with concurrent activities from two distinct ++ * kernels running side-by-side. When interrupts are involved, the ++ * services from this section control the @b hard interrupt state ++ * exclusively, for protecting against processor-local or SMP ++ * concurrency. ++ * ++ * @note In a dual kernel configuration, hard interrupts are ++ * gated by the CPU. When enabled, hard interrupts are immediately ++ * delivered to the Xenomai core if they belong to a real-time source, ++ * or deferred until enabled by a second-stage virtual interrupt mask, ++ * if they belong to regular Linux devices/sources. ++ * ++ * @{ ++ */ ++DEFINE_XNLOCK(nklock); ++#if defined(CONFIG_SMP) || defined(CONFIG_XENO_OPT_DEBUG_LOCKING) ++EXPORT_SYMBOL_GPL(nklock); ++ ++#ifdef CONFIG_XENO_ARCH_OUTOFLINE_XNLOCK ++int ___xnlock_get(struct xnlock *lock /*, */ XNLOCK_DBG_CONTEXT_ARGS) ++{ ++ return ____xnlock_get(lock /* , */ XNLOCK_DBG_PASS_CONTEXT); ++} ++EXPORT_SYMBOL_GPL(___xnlock_get); ++ ++void ___xnlock_put(struct xnlock *lock /*, */ XNLOCK_DBG_CONTEXT_ARGS) ++{ ++ ____xnlock_put(lock /* , */ XNLOCK_DBG_PASS_CONTEXT); ++} ++EXPORT_SYMBOL_GPL(___xnlock_put); ++#endif /* out of line xnlock */ ++#endif /* CONFIG_SMP || XENO_DEBUG(LOCKING) */ ++ ++#ifdef CONFIG_XENO_OPT_DEBUG_LOCKING ++DEFINE_PER_CPU(struct xnlockinfo, xnlock_stats); ++EXPORT_PER_CPU_SYMBOL_GPL(xnlock_stats); ++#endif ++ ++/** @} */ +--- linux/kernel/Makefile 2021-04-07 16:00:26.635720743 +0800 ++++ linux-patched/kernel/Makefile 2021-04-07 16:01:25.595636498 +0800 +@@ -126,3 +126,5 @@ + targets += config_data.h + $(obj)/config_data.h: $(obj)/config_data.gz FORCE + $(call filechk,ikconfiggz) ++ ++obj-$(CONFIG_XENOMAI) += xenomai/ +--- linux/drivers/xenomai/testing/Makefile 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/testing/Makefile 2021-04-07 16:01:28.041633003 +0800 +@@ -0,0 +1,13 @@ ++ ++obj-$(CONFIG_XENO_DRIVERS_TIMERBENCH) += xeno_timerbench.o ++obj-$(CONFIG_XENO_DRIVERS_SWITCHTEST) += xeno_switchtest.o ++obj-$(CONFIG_XENO_DRIVERS_RTDMTEST) += xeno_rtdmtest.o ++obj-$(CONFIG_XENO_DRIVERS_HEAPCHECK) += xeno_heapcheck.o ++ ++xeno_timerbench-y := timerbench.o ++ ++xeno_switchtest-y := switchtest.o ++ ++xeno_rtdmtest-y := rtdmtest.o ++ ++xeno_heapcheck-y := heapcheck.o +--- linux/drivers/xenomai/testing/heapcheck.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/testing/heapcheck.c 2021-04-07 16:01:28.036633010 +0800 +@@ -0,0 +1,515 @@ ++/* ++ * Copyright (C) 2018 Philippe Gerum . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#define complain(__fmt, __args...) \ ++ printk(XENO_WARNING "heap check: " __fmt "\n", ##__args) ++ ++static struct xnheap test_heap = { ++ .name = "test_heap" ++}; ++ ++enum pattern { ++ alphabet_series, ++ digit_series, ++ binary_series, ++}; ++ ++struct chunk { ++ void *ptr; ++ enum pattern pattern; ++}; ++ ++struct runstats { ++ struct rttst_heap_stats stats; ++ struct runstats *next; ++}; ++ ++static struct runstats *statistics; ++ ++static int nrstats; ++ ++static inline void breathe(int loops) ++{ ++ if ((loops % 1000) == 0) ++ rtdm_task_sleep(300000ULL); ++} ++ ++static inline void do_swap(void *left, void *right, const size_t size) ++{ ++ char trans[size]; ++ ++ memcpy(trans, left, size); ++ memcpy(left, right, size); ++ memcpy(right, trans, size); ++} ++ ++static void random_shuffle(void *vbase, size_t nmemb, const size_t size) ++{ ++ struct { ++ char x[size]; ++ } __attribute__((packed)) *base = vbase; ++ unsigned int j, k; ++ ++ for (j = nmemb; j > 0; j--) { ++ k = (unsigned int)(prandom_u32() % nmemb) + 1; ++ if (j == k) ++ continue; ++ do_swap(&base[j - 1], &base[k - 1], size); ++ } ++} ++ ++static void fill_pattern(char *p, size_t size, enum pattern pat) ++{ ++ unsigned int val, count; ++ ++ switch (pat) { ++ case alphabet_series: ++ val = 'a'; ++ count = 26; ++ break; ++ case digit_series: ++ val = '0'; ++ count = 10; ++ break; ++ default: ++ val = 0; ++ count = 255; ++ break; ++ } ++ ++ while (size-- > 0) { ++ *p++ = (char)(val % count); ++ val++; ++ } ++} ++ ++static int check_pattern(const char *p, size_t size, enum pattern pat) ++{ ++ unsigned int val, count; ++ ++ switch (pat) { ++ case alphabet_series: ++ val = 'a'; ++ count = 26; ++ break; ++ case digit_series: ++ val = '0'; ++ count = 10; ++ break; ++ default: ++ val = 0; ++ count = 255; ++ break; ++ } ++ ++ while (size-- > 0) { ++ if (*p++ != (char)(val % count)) ++ return 0; ++ val++; ++ } ++ ++ return 1; ++} ++ ++static size_t find_largest_free(size_t free_size, size_t block_size) ++{ ++ void *p; ++ ++ for (;;) { ++ p = xnheap_alloc(&test_heap, free_size); ++ if (p) { ++ xnheap_free(&test_heap, p); ++ break; ++ } ++ if (free_size <= block_size) ++ break; ++ free_size -= block_size; ++ } ++ ++ return free_size; ++} ++ ++static int test_seq(size_t heap_size, size_t block_size, int flags) ++{ ++ long alloc_sum_ns, alloc_avg_ns, free_sum_ns, free_avg_ns, ++ alloc_max_ns, free_max_ns, d; ++ size_t user_size, largest_free, maximum_free, freed; ++ int ret, n, k, maxblocks, nrblocks; ++ nanosecs_rel_t start, end; ++ struct chunk *chunks; ++ struct runstats *st; ++ bool done_frag; ++ void *mem, *p; ++ ++ maxblocks = heap_size / block_size; ++ ++ mem = vmalloc(heap_size); ++ if (mem == NULL) ++ return -ENOMEM; ++ ++ ret = xnheap_init(&test_heap, mem, heap_size); ++ if (ret) { ++ complain("cannot init heap with size %zu", ++ heap_size); ++ goto out; ++ } ++ ++ chunks = vmalloc(sizeof(*chunks) * maxblocks); ++ if (chunks == NULL) { ++ ret = -ENOMEM; ++ goto no_chunks; ++ } ++ memset(chunks, 0, sizeof(*chunks) * maxblocks); ++ ++ ret = xnthread_harden(); ++ if (ret) ++ goto done; ++ ++ if (xnheap_get_size(&test_heap) != heap_size) { ++ complain("memory size inconsistency (%zu / %zu bytes)", ++ heap_size, xnheap_get_size(&test_heap)); ++ goto bad; ++ } ++ ++ user_size = 0; ++ alloc_avg_ns = 0; ++ free_avg_ns = 0; ++ alloc_max_ns = 0; ++ free_max_ns = 0; ++ maximum_free = 0; ++ largest_free = 0; ++ ++ for (n = 0, alloc_sum_ns = 0; ; n++) { ++ start = rtdm_clock_read_monotonic(); ++ p = xnheap_alloc(&test_heap, block_size); ++ end = rtdm_clock_read_monotonic(); ++ d = end - start; ++ if (d > alloc_max_ns) ++ alloc_max_ns = d; ++ alloc_sum_ns += d; ++ if (p == NULL) ++ break; ++ user_size += block_size; ++ if (n >= maxblocks) { ++ complain("too many blocks fetched" ++ " (heap=%zu, block=%zu, " ++ "got more than %d blocks)", ++ heap_size, block_size, maxblocks); ++ goto bad; ++ } ++ chunks[n].ptr = p; ++ if (flags & RTTST_HEAPCHECK_PATTERN) { ++ chunks[n].pattern = (enum pattern)(prandom_u32() % 3); ++ fill_pattern(chunks[n].ptr, block_size, chunks[n].pattern); ++ } ++ breathe(n); ++ } ++ ++ nrblocks = n; ++ if (nrblocks == 0) ++ goto do_stats; ++ ++ if ((flags & RTTST_HEAPCHECK_ZEROOVRD) && nrblocks != maxblocks) { ++ complain("too few blocks fetched, unexpected overhead" ++ " (heap=%zu, block=%zu, " ++ "got %d, less than %d blocks)", ++ heap_size, block_size, nrblocks, maxblocks); ++ goto bad; ++ } ++ ++ breathe(0); ++ ++ /* Make sure we did not trash any busy block while allocating. */ ++ if (flags & RTTST_HEAPCHECK_PATTERN) { ++ for (n = 0; n < nrblocks; n++) { ++ if (!check_pattern(chunks[n].ptr, block_size, ++ chunks[n].pattern)) { ++ complain("corrupted block #%d on alloc" ++ " sequence (pattern %d)", ++ n, chunks[n].pattern); ++ goto bad; ++ } ++ breathe(n); ++ } ++ } ++ ++ if (flags & RTTST_HEAPCHECK_SHUFFLE) ++ random_shuffle(chunks, nrblocks, sizeof(*chunks)); ++ ++ /* ++ * Release all blocks. ++ */ ++ for (n = 0, free_sum_ns = 0, freed = 0, done_frag = false; ++ n < nrblocks; n++) { ++ start = rtdm_clock_read_monotonic(); ++ xnheap_free(&test_heap, chunks[n].ptr); ++ end = rtdm_clock_read_monotonic(); ++ d = end - start; ++ if (d > free_max_ns) ++ free_max_ns = d; ++ free_sum_ns += d; ++ chunks[n].ptr = NULL; ++ /* Make sure we did not trash busy blocks while freeing. */ ++ if (flags & RTTST_HEAPCHECK_PATTERN) { ++ for (k = 0; k < nrblocks; k++) { ++ if (chunks[k].ptr && ++ !check_pattern(chunks[k].ptr, block_size, ++ chunks[k].pattern)) { ++ complain("corrupted block #%d on release" ++ " sequence (pattern %d)", ++ k, chunks[k].pattern); ++ goto bad; ++ } ++ breathe(k); ++ } ++ } ++ freed += block_size; ++ /* ++ * Get a sense of the fragmentation for the tested ++ * allocation pattern, heap and block sizes when half ++ * of the usable heap size should be available to us. ++ * NOTE: user_size excludes the overhead, this is ++ * actually what we managed to get from the current ++ * heap out of the allocation loop. ++ */ ++ if (!done_frag && freed >= user_size / 2) { ++ /* Calculate the external fragmentation. */ ++ largest_free = find_largest_free(freed, block_size); ++ maximum_free = freed; ++ done_frag = true; ++ } ++ breathe(n); ++ } ++ ++ /* ++ * If the deallocation mechanism is broken, we might not be ++ * able to reproduce the same allocation pattern with the same ++ * outcome, check this. ++ */ ++ if (flags & RTTST_HEAPCHECK_HOT) { ++ for (n = 0, alloc_max_ns = alloc_sum_ns = 0; ; n++) { ++ start = rtdm_clock_read_monotonic(); ++ p = xnheap_alloc(&test_heap, block_size); ++ end = rtdm_clock_read_monotonic(); ++ d = end - start; ++ if (d > alloc_max_ns) ++ alloc_max_ns = d; ++ alloc_sum_ns += d; ++ if (p == NULL) ++ break; ++ if (n >= maxblocks) { ++ complain("too many blocks fetched during hot pass" ++ " (heap=%zu, block=%zu, " ++ "got more than %d blocks)", ++ heap_size, block_size, maxblocks); ++ goto bad; ++ } ++ chunks[n].ptr = p; ++ breathe(n); ++ } ++ if (n != nrblocks) { ++ complain("inconsistent block count fetched" ++ " during hot pass (heap=%zu, block=%zu, " ++ "got %d blocks vs %d during alloc)", ++ heap_size, block_size, n, nrblocks); ++ goto bad; ++ } ++ for (n = 0, free_max_ns = free_sum_ns = 0; n < nrblocks; n++) { ++ start = rtdm_clock_read_monotonic(); ++ xnheap_free(&test_heap, chunks[n].ptr); ++ end = rtdm_clock_read_monotonic(); ++ d = end - start; ++ if (d > free_max_ns) ++ free_max_ns = d; ++ free_sum_ns += d; ++ breathe(n); ++ } ++ } ++ ++ alloc_avg_ns = alloc_sum_ns / nrblocks; ++ free_avg_ns = free_sum_ns / nrblocks; ++ ++ if ((flags & RTTST_HEAPCHECK_ZEROOVRD) && heap_size != user_size) { ++ complain("unexpected overhead reported"); ++ goto bad; ++ } ++ ++ if (xnheap_get_used(&test_heap) > 0) { ++ complain("memory leakage reported: %zu bytes missing", ++ xnheap_get_used(&test_heap)); ++ goto bad; ++ } ++ ++do_stats: ++ xnthread_relax(0, 0); ++ ret = 0; ++ /* ++ * Don't report stats when running a pattern check, timings ++ * are affected. ++ */ ++ if (!(flags & RTTST_HEAPCHECK_PATTERN)) { ++ st = kmalloc(sizeof(*st), GFP_KERNEL); ++ if (st == NULL) { ++ complain("failed allocating memory"); ++ ret = -ENOMEM; ++ goto out; ++ } ++ st->stats.heap_size = heap_size; ++ st->stats.user_size = user_size; ++ st->stats.block_size = block_size; ++ st->stats.nrblocks = nrblocks; ++ st->stats.alloc_avg_ns = alloc_avg_ns; ++ st->stats.alloc_max_ns = alloc_max_ns; ++ st->stats.free_avg_ns = free_avg_ns; ++ st->stats.free_max_ns = free_max_ns; ++ st->stats.maximum_free = maximum_free; ++ st->stats.largest_free = largest_free; ++ st->stats.flags = flags; ++ st->next = statistics; ++ statistics = st; ++ nrstats++; ++ } ++ ++done: ++ vfree(chunks); ++no_chunks: ++ xnheap_destroy(&test_heap); ++out: ++ vfree(mem); ++ ++ return ret; ++bad: ++ xnthread_relax(0, 0); ++ ret = -EPROTO; ++ goto done; ++} ++ ++static int collect_stats(struct rtdm_fd *fd, ++ struct rttst_heap_stats __user *buf, int nr) ++{ ++ struct runstats *p, *next; ++ int ret, n; ++ ++ if (nr < 0) ++ return -EINVAL; ++ ++ for (p = statistics, n = nr; p && n > 0 && nrstats > 0; ++ n--, nrstats--, p = next, buf += sizeof(p->stats)) { ++ ret = rtdm_copy_to_user(fd, buf, &p->stats, sizeof(p->stats)); ++ if (ret) ++ return ret; ++ next = p->next; ++ statistics = next; ++ kfree(p); ++ } ++ ++ return nr - n; ++} ++ ++static void heapcheck_close(struct rtdm_fd *fd) ++{ ++ struct runstats *p, *next; ++ ++ for (p = statistics; p; p = next) { ++ next = p->next; ++ kfree(p); ++ } ++ ++ statistics = NULL; ++} ++ ++static int heapcheck_ioctl(struct rtdm_fd *fd, ++ unsigned int request, void __user *arg) ++{ ++ struct rttst_heap_stathdr sthdr; ++ struct rttst_heap_parms parms; ++ int ret; ++ ++ switch (request) { ++ case RTTST_RTIOC_HEAP_CHECK: ++ ret = rtdm_copy_from_user(fd, &parms, arg, sizeof(parms)); ++ if (ret) ++ return ret; ++ ret = test_seq(parms.heap_size, ++ parms.block_size, ++ parms.flags); ++ if (ret) ++ return ret; ++ parms.nrstats = nrstats; ++ ret = rtdm_copy_to_user(fd, arg, &parms, sizeof(parms)); ++ break; ++ case RTTST_RTIOC_HEAP_STAT_COLLECT: ++ sthdr.buf = NULL; ++ ret = rtdm_copy_from_user(fd, &sthdr, arg, sizeof(sthdr)); ++ if (ret) ++ return ret; ++ ret = collect_stats(fd, sthdr.buf, sthdr.nrstats); ++ if (ret < 0) ++ return ret; ++ sthdr.nrstats = ret; ++ ret = rtdm_copy_to_user(fd, arg, &sthdr, sizeof(sthdr)); ++ break; ++ default: ++ ret = -EINVAL; ++ } ++ ++ return ret; ++} ++ ++static struct rtdm_driver heapcheck_driver = { ++ .profile_info = RTDM_PROFILE_INFO(heap_check, ++ RTDM_CLASS_TESTING, ++ RTDM_SUBCLASS_HEAPCHECK, ++ RTTST_PROFILE_VER), ++ .device_flags = RTDM_NAMED_DEVICE | RTDM_EXCLUSIVE, ++ .device_count = 1, ++ .ops = { ++ .close = heapcheck_close, ++ .ioctl_nrt = heapcheck_ioctl, ++ }, ++}; ++ ++static struct rtdm_device heapcheck_device = { ++ .driver = &heapcheck_driver, ++ .label = "heapcheck", ++}; ++ ++static int __init heapcheck_init(void) ++{ ++ return rtdm_dev_register(&heapcheck_device); ++} ++ ++static void __exit heapcheck_exit(void) ++{ ++ rtdm_dev_unregister(&heapcheck_device); ++} ++ ++module_init(heapcheck_init); ++module_exit(heapcheck_exit); ++ ++MODULE_LICENSE("GPL"); +--- linux/drivers/xenomai/testing/Kconfig 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/testing/Kconfig 2021-04-07 16:01:28.031633017 +0800 +@@ -0,0 +1,29 @@ ++menu "Testing drivers" ++ ++config XENO_DRIVERS_TIMERBENCH ++ tristate "Timer benchmark driver" ++ default y ++ help ++ Kernel-based benchmark driver for timer latency evaluation. ++ See testsuite/latency for a possible front-end. ++ ++config XENO_DRIVERS_SWITCHTEST ++ tristate "Context switch unit testing driver" ++ default y ++ help ++ Kernel-based driver for unit testing context switches and ++ FPU switches. ++ ++config XENO_DRIVERS_HEAPCHECK ++ tristate "Memory allocator test driver" ++ default y ++ help ++ Kernel-based driver for testing Cobalt's memory allocator. ++ ++config XENO_DRIVERS_RTDMTEST ++ depends on m ++ tristate "RTDM unit tests driver" ++ help ++ Kernel driver for performing RTDM unit tests. ++ ++endmenu +--- linux/drivers/xenomai/testing/switchtest.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/testing/switchtest.c 2021-04-07 16:01:28.027633023 +0800 +@@ -0,0 +1,752 @@ ++/* ++ * Copyright (C) 2010 Gilles Chanteperdrix . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++MODULE_DESCRIPTION("Cobalt context switch test helper"); ++MODULE_AUTHOR("Gilles Chanteperdrix "); ++MODULE_VERSION("0.1.1"); ++MODULE_LICENSE("GPL"); ++ ++#define RTSWITCH_RT 0x10000 ++#define RTSWITCH_NRT 0 ++#define RTSWITCH_KERNEL 0x20000 ++ ++struct rtswitch_task { ++ struct rttst_swtest_task base; ++ rtdm_event_t rt_synch; ++ struct semaphore nrt_synch; ++ struct xnthread ktask; /* For kernel-space real-time tasks. */ ++ unsigned int last_switch; ++}; ++ ++struct rtswitch_context { ++ struct rtswitch_task *tasks; ++ unsigned int tasks_count; ++ unsigned int next_index; ++ struct semaphore lock; ++ unsigned int cpu; ++ unsigned int switches_count; ++ ++ unsigned long pause_us; ++ unsigned int next_task; ++ rtdm_timer_t wake_up_delay; ++ ++ unsigned int failed; ++ struct rttst_swtest_error error; ++ ++ struct rtswitch_task *utask; ++ rtdm_nrtsig_t wake_utask; ++}; ++ ++static int fp_features; ++ ++static int report(const char *fmt, ...) ++{ ++ va_list ap; ++ int ret; ++ ++ va_start(ap, fmt); ++ ret = vprintk(fmt, ap); ++ va_end(ap); ++ ++ return ret; ++} ++ ++static void handle_ktask_error(struct rtswitch_context *ctx, unsigned int fp_val) ++{ ++ struct rtswitch_task *cur = &ctx->tasks[ctx->error.last_switch.to]; ++ unsigned int i; ++ ++ ctx->failed = 1; ++ ctx->error.fp_val = fp_val; ++ ++ if ((cur->base.flags & RTSWITCH_RT) == RTSWITCH_RT) ++ for (i = 0; i < ctx->tasks_count; i++) { ++ struct rtswitch_task *task = &ctx->tasks[i]; ++ ++ /* Find the first non kernel-space task. */ ++ if ((task->base.flags & RTSWITCH_KERNEL)) ++ continue; ++ ++ /* Unblock it. */ ++ switch(task->base.flags & RTSWITCH_RT) { ++ case RTSWITCH_NRT: ++ ctx->utask = task; ++ rtdm_nrtsig_pend(&ctx->wake_utask); ++ break; ++ ++ case RTSWITCH_RT: ++ rtdm_event_signal(&task->rt_synch); ++ break; ++ } ++ ++ xnthread_suspend(&cur->ktask, ++ XNSUSP, XN_INFINITE, XN_RELATIVE, NULL); ++ } ++} ++ ++static int rtswitch_pend_rt(struct rtswitch_context *ctx, ++ unsigned int idx) ++{ ++ struct rtswitch_task *task; ++ int rc; ++ ++ if (idx > ctx->tasks_count) ++ return -EINVAL; ++ ++ task = &ctx->tasks[idx]; ++ task->base.flags |= RTSWITCH_RT; ++ ++ rc = rtdm_event_wait(&task->rt_synch); ++ if (rc < 0) ++ return rc; ++ ++ if (ctx->failed) ++ return 1; ++ ++ return 0; ++} ++ ++static void timed_wake_up(rtdm_timer_t *timer) ++{ ++ struct rtswitch_context *ctx = ++ container_of(timer, struct rtswitch_context, wake_up_delay); ++ struct rtswitch_task *task; ++ ++ task = &ctx->tasks[ctx->next_task]; ++ ++ switch (task->base.flags & RTSWITCH_RT) { ++ case RTSWITCH_NRT: ++ ctx->utask = task; ++ rtdm_nrtsig_pend(&ctx->wake_utask); ++ break; ++ ++ case RTSWITCH_RT: ++ rtdm_event_signal(&task->rt_synch); ++ } ++} ++ ++static int rtswitch_to_rt(struct rtswitch_context *ctx, ++ unsigned int from_idx, ++ unsigned int to_idx) ++{ ++ struct rtswitch_task *from, *to; ++ int rc; ++ ++ if (from_idx > ctx->tasks_count || to_idx > ctx->tasks_count) ++ return -EINVAL; ++ ++ /* to == from is a special case which means ++ "return to the previous task". */ ++ if (to_idx == from_idx) ++ to_idx = ctx->error.last_switch.from; ++ ++ from = &ctx->tasks[from_idx]; ++ to = &ctx->tasks[to_idx]; ++ ++ from->base.flags |= RTSWITCH_RT; ++ from->last_switch = ++ctx->switches_count; ++ ctx->error.last_switch.from = from_idx; ++ ctx->error.last_switch.to = to_idx; ++ barrier(); ++ ++ if (ctx->pause_us) { ++ ctx->next_task = to_idx; ++ barrier(); ++ rtdm_timer_start(&ctx->wake_up_delay, ++ ctx->pause_us * 1000, 0, ++ RTDM_TIMERMODE_RELATIVE); ++ xnsched_lock(); ++ } else ++ switch (to->base.flags & RTSWITCH_RT) { ++ case RTSWITCH_NRT: ++ ctx->utask = to; ++ barrier(); ++ rtdm_nrtsig_pend(&ctx->wake_utask); ++ xnsched_lock(); ++ break; ++ ++ case RTSWITCH_RT: ++ xnsched_lock(); ++ rtdm_event_signal(&to->rt_synch); ++ break; ++ ++ default: ++ return -EINVAL; ++ } ++ ++ rc = rtdm_event_wait(&from->rt_synch); ++ xnsched_unlock(); ++ ++ if (rc < 0) ++ return rc; ++ ++ if (ctx->failed) ++ return 1; ++ ++ return 0; ++} ++ ++static int rtswitch_pend_nrt(struct rtswitch_context *ctx, ++ unsigned int idx) ++{ ++ struct rtswitch_task *task; ++ ++ if (idx > ctx->tasks_count) ++ return -EINVAL; ++ ++ task = &ctx->tasks[idx]; ++ ++ task->base.flags &= ~RTSWITCH_RT; ++ ++ if (down_interruptible(&task->nrt_synch)) ++ return -EINTR; ++ ++ if (ctx->failed) ++ return 1; ++ ++ return 0; ++} ++ ++static int rtswitch_to_nrt(struct rtswitch_context *ctx, ++ unsigned int from_idx, ++ unsigned int to_idx) ++{ ++ struct rtswitch_task *from, *to; ++ unsigned int expected, fp_val; ++ int fp_check; ++ ++ if (from_idx > ctx->tasks_count || to_idx > ctx->tasks_count) ++ return -EINVAL; ++ ++ /* to == from is a special case which means ++ "return to the previous task". */ ++ if (to_idx == from_idx) ++ to_idx = ctx->error.last_switch.from; ++ ++ from = &ctx->tasks[from_idx]; ++ to = &ctx->tasks[to_idx]; ++ ++ fp_check = ctx->switches_count == from->last_switch + 1 ++ && ctx->error.last_switch.from == to_idx ++ && ctx->error.last_switch.to == from_idx; ++ ++ from->base.flags &= ~RTSWITCH_RT; ++ from->last_switch = ++ctx->switches_count; ++ ctx->error.last_switch.from = from_idx; ++ ctx->error.last_switch.to = to_idx; ++ barrier(); ++ ++ if (ctx->pause_us) { ++ ctx->next_task = to_idx; ++ barrier(); ++ rtdm_timer_start(&ctx->wake_up_delay, ++ ctx->pause_us * 1000, 0, ++ RTDM_TIMERMODE_RELATIVE); ++ } else ++ switch (to->base.flags & RTSWITCH_RT) { ++ case RTSWITCH_NRT: ++ switch_to_nrt: ++ up(&to->nrt_synch); ++ break; ++ ++ case RTSWITCH_RT: ++ ++ if (!fp_check || fp_linux_begin() < 0) { ++ fp_check = 0; ++ goto signal_nofp; ++ } ++ ++ expected = from_idx + 500 + ++ (ctx->switches_count % 4000000) * 1000; ++ ++ fp_regs_set(fp_features, expected); ++ rtdm_event_signal(&to->rt_synch); ++ fp_val = fp_regs_check(fp_features, expected, report); ++ fp_linux_end(); ++ ++ if(down_interruptible(&from->nrt_synch)) ++ return -EINTR; ++ if (ctx->failed) ++ return 1; ++ if (fp_val != expected) { ++ handle_ktask_error(ctx, fp_val); ++ return 1; ++ } ++ ++ from->base.flags &= ~RTSWITCH_RT; ++ from->last_switch = ++ctx->switches_count; ++ ctx->error.last_switch.from = from_idx; ++ ctx->error.last_switch.to = to_idx; ++ if ((to->base.flags & RTSWITCH_RT) == RTSWITCH_NRT) ++ goto switch_to_nrt; ++ expected = from_idx + 500 + ++ (ctx->switches_count % 4000000) * 1000; ++ barrier(); ++ ++ fp_linux_begin(); ++ fp_regs_set(fp_features, expected); ++ rtdm_event_signal(&to->rt_synch); ++ fp_val = fp_regs_check(fp_features, expected, report); ++ fp_linux_end(); ++ ++ if (down_interruptible(&from->nrt_synch)) ++ return -EINTR; ++ if (ctx->failed) ++ return 1; ++ if (fp_val != expected) { ++ handle_ktask_error(ctx, fp_val); ++ return 1; ++ } ++ ++ from->base.flags &= ~RTSWITCH_RT; ++ from->last_switch = ++ctx->switches_count; ++ ctx->error.last_switch.from = from_idx; ++ ctx->error.last_switch.to = to_idx; ++ barrier(); ++ if ((to->base.flags & RTSWITCH_RT) == RTSWITCH_NRT) ++ goto switch_to_nrt; ++ ++ signal_nofp: ++ rtdm_event_signal(&to->rt_synch); ++ break; ++ ++ default: ++ return -EINVAL; ++ } ++ ++ if (down_interruptible(&from->nrt_synch)) ++ return -EINTR; ++ ++ if (ctx->failed) ++ return 1; ++ ++ return 0; ++} ++ ++static int rtswitch_set_tasks_count(struct rtswitch_context *ctx, unsigned int count) ++{ ++ struct rtswitch_task *tasks; ++ ++ if (ctx->tasks_count == count) ++ return 0; ++ ++ tasks = vmalloc(count * sizeof(*tasks)); ++ ++ if (!tasks) ++ return -ENOMEM; ++ ++ down(&ctx->lock); ++ ++ if (ctx->tasks) ++ vfree(ctx->tasks); ++ ++ ctx->tasks = tasks; ++ ctx->tasks_count = count; ++ ctx->next_index = 0; ++ ++ up(&ctx->lock); ++ ++ return 0; ++} ++ ++static int rtswitch_register_task(struct rtswitch_context *ctx, ++ struct rttst_swtest_task *arg) ++{ ++ struct rtswitch_task *t; ++ ++ down(&ctx->lock); ++ ++ if (ctx->next_index == ctx->tasks_count) { ++ up(&ctx->lock); ++ return -EBUSY; ++ } ++ ++ arg->index = ctx->next_index; ++ t = &ctx->tasks[arg->index]; ++ ctx->next_index++; ++ t->base = *arg; ++ t->last_switch = 0; ++ sema_init(&t->nrt_synch, 0); ++ rtdm_event_init(&t->rt_synch, 0); ++ ++ up(&ctx->lock); ++ ++ return 0; ++} ++ ++struct taskarg { ++ struct rtswitch_context *ctx; ++ struct rtswitch_task *task; ++}; ++ ++static void rtswitch_ktask(void *cookie) ++{ ++ struct taskarg *arg = (struct taskarg *) cookie; ++ unsigned int fp_val, expected, to, i = 0; ++ struct rtswitch_context *ctx = arg->ctx; ++ struct rtswitch_task *task = arg->task; ++ ++ to = task->base.index; ++ ++ rtswitch_pend_rt(ctx, task->base.index); ++ ++ while (!rtdm_task_should_stop()) { ++ if (task->base.flags & RTTST_SWTEST_USE_FPU) ++ fp_regs_set(fp_features, task->base.index + i * 1000); ++ ++ switch(i % 3) { ++ case 0: ++ /* to == from means "return to last task" */ ++ rtswitch_to_rt(ctx, task->base.index, task->base.index); ++ break; ++ case 1: ++ if (++to == task->base.index) ++ ++to; ++ if (to > ctx->tasks_count - 1) ++ to = 0; ++ if (to == task->base.index) ++ ++to; ++ ++ /* Fall through. */ ++ case 2: ++ rtswitch_to_rt(ctx, task->base.index, to); ++ } ++ ++ if (task->base.flags & RTTST_SWTEST_USE_FPU) { ++ expected = task->base.index + i * 1000; ++ fp_val = fp_regs_check(fp_features, expected, report); ++ ++ if (fp_val != expected) { ++ if (task->base.flags & RTTST_SWTEST_FREEZE) ++ xntrace_user_freeze(0, 0); ++ handle_ktask_error(ctx, fp_val); ++ } ++ } ++ ++ if (++i == 4000000) ++ i = 0; ++ } ++} ++ ++static int rtswitch_create_ktask(struct rtswitch_context *ctx, ++ struct rttst_swtest_task *ptask) ++{ ++ union xnsched_policy_param param; ++ struct xnthread_start_attr sattr; ++ struct xnthread_init_attr iattr; ++ struct rtswitch_task *task; ++ struct taskarg arg; ++ int init_flags; ++ char name[30]; ++ int err; ++ ++ /* ++ * Silently disable FP tests in kernel if FPU is not supported ++ * there. Typical case is math emulation support: we can use ++ * it from userland as a synthetic FPU, but there is no sane ++ * way to use it from kernel-based threads (Xenomai or Linux). ++ */ ++ if (!fp_kernel_supported()) ++ ptask->flags &= ~RTTST_SWTEST_USE_FPU; ++ ++ ptask->flags |= RTSWITCH_KERNEL; ++ err = rtswitch_register_task(ctx, ptask); ++ ++ if (err) ++ return err; ++ ++ ksformat(name, sizeof(name), "rtk%d/%u", ptask->index, ctx->cpu); ++ ++ task = &ctx->tasks[ptask->index]; ++ ++ arg.ctx = ctx; ++ arg.task = task; ++ ++ init_flags = (ptask->flags & RTTST_SWTEST_FPU) ? XNFPU : 0; ++ ++ iattr.name = name; ++ iattr.flags = init_flags; ++ iattr.personality = &xenomai_personality; ++ iattr.affinity = *cpumask_of(ctx->cpu); ++ param.rt.prio = 1; ++ ++ set_cpus_allowed_ptr(current, cpumask_of(ctx->cpu)); ++ ++ err = xnthread_init(&task->ktask, ++ &iattr, &xnsched_class_rt, ¶m); ++ if (!err) { ++ sattr.mode = 0; ++ sattr.entry = rtswitch_ktask; ++ sattr.cookie = &arg; ++ err = xnthread_start(&task->ktask, &sattr); ++ } else ++ /* ++ * In order to avoid calling xnthread_cancel() for an ++ * invalid thread. ++ */ ++ task->base.flags = 0; ++ /* ++ * Putting the argument on stack is safe, because the new ++ * thread, thanks to the above call to set_cpus_allowed_ptr(), ++ * will preempt the current thread immediately, and will ++ * suspend only once the arguments on stack are used. ++ */ ++ ++ return err; ++} ++ ++static void rtswitch_utask_waker(rtdm_nrtsig_t *sig, void *arg) ++{ ++ struct rtswitch_context *ctx = (struct rtswitch_context *)arg; ++ up(&ctx->utask->nrt_synch); ++} ++ ++static int rtswitch_open(struct rtdm_fd *fd, int oflags) ++{ ++ struct rtswitch_context *ctx = rtdm_fd_to_private(fd); ++ ++ ctx->tasks = NULL; ++ ctx->tasks_count = ctx->next_index = ctx->cpu = ctx->switches_count = 0; ++ sema_init(&ctx->lock, 1); ++ ctx->failed = 0; ++ ctx->error.last_switch.from = ctx->error.last_switch.to = -1; ++ ctx->pause_us = 0; ++ ++ rtdm_nrtsig_init(&ctx->wake_utask, rtswitch_utask_waker, ctx); ++ ++ rtdm_timer_init(&ctx->wake_up_delay, timed_wake_up, "switchtest timer"); ++ ++ return 0; ++} ++ ++static void rtswitch_close(struct rtdm_fd *fd) ++{ ++ struct rtswitch_context *ctx = rtdm_fd_to_private(fd); ++ unsigned int i; ++ ++ rtdm_timer_destroy(&ctx->wake_up_delay); ++ rtdm_nrtsig_destroy(&ctx->wake_utask); ++ ++ if (ctx->tasks) { ++ set_cpus_allowed_ptr(current, cpumask_of(ctx->cpu)); ++ ++ for (i = 0; i < ctx->next_index; i++) { ++ struct rtswitch_task *task = &ctx->tasks[i]; ++ ++ if (task->base.flags & RTSWITCH_KERNEL) { ++ rtdm_task_destroy(&task->ktask); ++ rtdm_task_join(&task->ktask); ++ } ++ rtdm_event_destroy(&task->rt_synch); ++ } ++ vfree(ctx->tasks); ++ } ++} ++ ++static int rtswitch_ioctl_nrt(struct rtdm_fd *fd, ++ unsigned int request, ++ void *arg) ++{ ++ struct rtswitch_context *ctx = rtdm_fd_to_private(fd); ++ struct rttst_swtest_task task; ++ struct rttst_swtest_dir fromto; ++ __u32 count; ++ int err; ++ ++ switch (request) { ++ case RTTST_RTIOC_SWTEST_SET_TASKS_COUNT: ++ return rtswitch_set_tasks_count(ctx, ++ (unsigned long) arg); ++ ++ case RTTST_RTIOC_SWTEST_SET_CPU: ++ if ((unsigned long) arg > num_online_cpus() - 1) ++ return -EINVAL; ++ ++ ctx->cpu = (unsigned long) arg; ++ return 0; ++ ++ case RTTST_RTIOC_SWTEST_SET_PAUSE: ++ ctx->pause_us = (unsigned long) arg; ++ return 0; ++ ++ case RTTST_RTIOC_SWTEST_REGISTER_UTASK: ++ if (!rtdm_rw_user_ok(fd, arg, sizeof(task))) ++ return -EFAULT; ++ ++ rtdm_copy_from_user(fd, &task, arg, sizeof(task)); ++ ++ err = rtswitch_register_task(ctx, &task); ++ ++ if (!err) ++ rtdm_copy_to_user(fd, ++ arg, ++ &task, ++ sizeof(task)); ++ ++ return err; ++ ++ case RTTST_RTIOC_SWTEST_CREATE_KTASK: ++ if (!rtdm_rw_user_ok(fd, arg, sizeof(task))) ++ return -EFAULT; ++ ++ rtdm_copy_from_user(fd, &task, arg, sizeof(task)); ++ ++ err = rtswitch_create_ktask(ctx, &task); ++ ++ if (!err) ++ rtdm_copy_to_user(fd, ++ arg, ++ &task, ++ sizeof(task)); ++ ++ return err; ++ ++ case RTTST_RTIOC_SWTEST_PEND: ++ if (!rtdm_read_user_ok(fd, arg, sizeof(task))) ++ return -EFAULT; ++ ++ rtdm_copy_from_user(fd, &task, arg, sizeof(task)); ++ ++ return rtswitch_pend_nrt(ctx, task.index); ++ ++ case RTTST_RTIOC_SWTEST_SWITCH_TO: ++ if (!rtdm_read_user_ok(fd, arg, sizeof(fromto))) ++ return -EFAULT; ++ ++ rtdm_copy_from_user(fd, ++ &fromto, ++ arg, ++ sizeof(fromto)); ++ ++ return rtswitch_to_nrt(ctx, fromto.from, fromto.to); ++ ++ case RTTST_RTIOC_SWTEST_GET_SWITCHES_COUNT: ++ if (!rtdm_rw_user_ok(fd, arg, sizeof(count))) ++ return -EFAULT; ++ ++ count = ctx->switches_count; ++ ++ rtdm_copy_to_user(fd, arg, &count, sizeof(count)); ++ ++ return 0; ++ ++ case RTTST_RTIOC_SWTEST_GET_LAST_ERROR: ++ if (!rtdm_rw_user_ok(fd, arg, sizeof(ctx->error))) ++ return -EFAULT; ++ ++ rtdm_copy_to_user(fd, ++ arg, ++ &ctx->error, ++ sizeof(ctx->error)); ++ ++ return 0; ++ ++ default: ++ return -ENOSYS; ++ } ++} ++ ++static int rtswitch_ioctl_rt(struct rtdm_fd *fd, ++ unsigned int request, ++ void *arg) ++{ ++ struct rtswitch_context *ctx = rtdm_fd_to_private(fd); ++ struct rttst_swtest_task task; ++ struct rttst_swtest_dir fromto; ++ ++ switch (request) { ++ case RTTST_RTIOC_SWTEST_PEND: ++ if (!rtdm_read_user_ok(fd, arg, sizeof(task))) ++ return -EFAULT; ++ ++ rtdm_copy_from_user(fd, &task, arg, sizeof(task)); ++ ++ return rtswitch_pend_rt(ctx, task.index); ++ ++ case RTTST_RTIOC_SWTEST_SWITCH_TO: ++ if (!rtdm_read_user_ok(fd, arg, sizeof(fromto))) ++ return -EFAULT; ++ ++ rtdm_copy_from_user(fd, ++ &fromto, ++ arg, ++ sizeof(fromto)); ++ ++ return rtswitch_to_rt(ctx, fromto.from, fromto.to); ++ ++ case RTTST_RTIOC_SWTEST_GET_LAST_ERROR: ++ if (!rtdm_rw_user_ok(fd, arg, sizeof(ctx->error))) ++ return -EFAULT; ++ ++ rtdm_copy_to_user(fd, ++ arg, ++ &ctx->error, ++ sizeof(ctx->error)); ++ ++ return 0; ++ ++ default: ++ return -ENOSYS; ++ } ++} ++ ++static struct rtdm_driver switchtest_driver = { ++ .profile_info = RTDM_PROFILE_INFO(switchtest, ++ RTDM_CLASS_TESTING, ++ RTDM_SUBCLASS_SWITCHTEST, ++ RTTST_PROFILE_VER), ++ .device_flags = RTDM_NAMED_DEVICE, ++ .device_count = 1, ++ .context_size = sizeof(struct rtswitch_context), ++ .ops = { ++ .open = rtswitch_open, ++ .close = rtswitch_close, ++ .ioctl_rt = rtswitch_ioctl_rt, ++ .ioctl_nrt = rtswitch_ioctl_nrt, ++ }, ++}; ++ ++static struct rtdm_device device = { ++ .driver = &switchtest_driver, ++ .label = "switchtest", ++}; ++ ++int __init __switchtest_init(void) ++{ ++ fp_features = fp_detect(); ++ ++ return rtdm_dev_register(&device); ++} ++ ++void __switchtest_exit(void) ++{ ++ rtdm_dev_unregister(&device); ++} ++ ++module_init(__switchtest_init); ++module_exit(__switchtest_exit); +--- linux/drivers/xenomai/testing/rtdmtest.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/testing/rtdmtest.c 2021-04-07 16:01:28.022633030 +0800 +@@ -0,0 +1,293 @@ ++/* ++ * Copyright (C) 2010 Jan Kiszka . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#include ++#include ++#include ++ ++MODULE_DESCRIPTION("RTDM test helper module"); ++MODULE_AUTHOR("Jan Kiszka "); ++MODULE_VERSION("0.1.0"); ++MODULE_LICENSE("GPL"); ++ ++struct rtdm_basic_context { ++ rtdm_timer_t close_timer; ++ unsigned long close_counter; ++ unsigned long close_deferral; ++}; ++ ++struct rtdm_actor_context { ++ rtdm_task_t actor_task; ++ unsigned int request; ++ rtdm_event_t run; ++ rtdm_event_t done; ++ union { ++ __u32 cpu; ++ } args; ++}; ++ ++static void close_timer_proc(rtdm_timer_t *timer) ++{ ++ struct rtdm_basic_context *ctx = ++ container_of(timer, struct rtdm_basic_context, close_timer); ++ ++ if (ctx->close_counter != 1) ++ printk(XENO_ERR ++ "rtdmtest: %s: close_counter is %lu, should be 1!\n", ++ __FUNCTION__, ctx->close_counter); ++ ++ ctx->close_deferral = RTTST_RTDM_NORMAL_CLOSE; ++ rtdm_fd_unlock(rtdm_private_to_fd(ctx)); ++} ++ ++static int rtdm_basic_open(struct rtdm_fd *fd, int oflags) ++{ ++ struct rtdm_basic_context *ctx = rtdm_fd_to_private(fd); ++ ++ rtdm_timer_init(&ctx->close_timer, close_timer_proc, ++ "rtdm close test"); ++ ctx->close_counter = 0; ++ ctx->close_deferral = RTTST_RTDM_NORMAL_CLOSE; ++ ++ return 0; ++} ++ ++static void rtdm_basic_close(struct rtdm_fd *fd) ++{ ++ struct rtdm_basic_context *ctx = rtdm_fd_to_private(fd); ++ ++ ctx->close_counter++; ++ ++ switch (ctx->close_deferral) { ++ case RTTST_RTDM_DEFER_CLOSE_CONTEXT: ++ if (ctx->close_counter != 2) { ++ printk(XENO_ERR ++ "rtdmtest: %s: close_counter is %lu, " ++ "should be 2!\n", ++ __FUNCTION__, ctx->close_counter); ++ return; ++ } ++ rtdm_fd_unlock(fd); ++ break; ++ } ++ ++ rtdm_timer_destroy(&ctx->close_timer); ++} ++ ++static int rtdm_basic_ioctl_rt(struct rtdm_fd *fd, ++ unsigned int request, void __user *arg) ++{ ++ int ret, magic = RTTST_RTDM_MAGIC_PRIMARY; ++ ++ switch (request) { ++ case RTTST_RTIOC_RTDM_PING_PRIMARY: ++ ret = rtdm_safe_copy_to_user(fd, arg, &magic, ++ sizeof(magic)); ++ break; ++ default: ++ ret = -ENOSYS; ++ } ++ ++ return ret; ++} ++ ++static int rtdm_basic_ioctl_nrt(struct rtdm_fd *fd, ++ unsigned int request, void __user *arg) ++{ ++ struct rtdm_basic_context *ctx = rtdm_fd_to_private(fd); ++ int ret = 0, magic = RTTST_RTDM_MAGIC_SECONDARY; ++ ++ switch (request) { ++ case RTTST_RTIOC_RTDM_DEFER_CLOSE: ++ ctx->close_deferral = (unsigned long)arg; ++ if (ctx->close_deferral == RTTST_RTDM_DEFER_CLOSE_CONTEXT) { ++ ++ctx->close_counter; ++ rtdm_fd_lock(fd); ++ rtdm_timer_start(&ctx->close_timer, 300000000ULL, 0, ++ RTDM_TIMERMODE_RELATIVE); ++ } ++ break; ++ case RTTST_RTIOC_RTDM_PING_SECONDARY: ++ ret = rtdm_safe_copy_to_user(fd, arg, &magic, ++ sizeof(magic)); ++ break; ++ default: ++ ret = -ENOTTY; ++ } ++ ++ return ret; ++} ++ ++static void actor_handler(void *arg) ++{ ++ struct rtdm_actor_context *ctx = arg; ++ int ret; ++ ++ for (;;) { ++ if (rtdm_task_should_stop()) ++ return; ++ ++ ret = rtdm_event_wait(&ctx->run); ++ if (ret) ++ break; ++ ++ switch (ctx->request) { ++ case RTTST_RTIOC_RTDM_ACTOR_GET_CPU: ++ ctx->args.cpu = task_cpu(current); ++ break; ++ default: ++ printk(XENO_ERR "rtdmtest: bad request code %d\n", ++ ctx->request); ++ } ++ ++ rtdm_event_signal(&ctx->done); ++ } ++} ++ ++static int rtdm_actor_open(struct rtdm_fd *fd, int oflags) ++{ ++ struct rtdm_actor_context *ctx = rtdm_fd_to_private(fd); ++ ++ rtdm_event_init(&ctx->run, 0); ++ rtdm_event_init(&ctx->done, 0); ++ ++ return rtdm_task_init(&ctx->actor_task, "rtdm_actor", ++ actor_handler, ctx, ++ RTDM_TASK_LOWEST_PRIORITY, 0); ++} ++ ++static void rtdm_actor_close(struct rtdm_fd *fd) ++{ ++ struct rtdm_actor_context *ctx = rtdm_fd_to_private(fd); ++ ++ rtdm_task_destroy(&ctx->actor_task); ++ rtdm_event_destroy(&ctx->run); ++ rtdm_event_destroy(&ctx->done); ++} ++ ++#define ACTION_TIMEOUT 50000000ULL /* 50 ms timeout on action */ ++ ++static int run_action(struct rtdm_actor_context *ctx, unsigned int request) ++{ ++ rtdm_toseq_t toseq; ++ ++ rtdm_toseq_init(&toseq, ACTION_TIMEOUT); ++ ctx->request = request; ++ rtdm_event_signal(&ctx->run); ++ /* ++ * XXX: The handshake mechanism is not bullet-proof against ++ * -EINTR received when waiting for the done event. Hopefully ++ * we won't restart/start a request while the action task has ++ * not yet completed the previous one we stopped waiting for ++ * abruptly. ++ */ ++ return rtdm_event_timedwait(&ctx->done, ACTION_TIMEOUT, &toseq); ++} ++ ++static int rtdm_actor_ioctl(struct rtdm_fd *fd, ++ unsigned int request, void __user *arg) ++{ ++ struct rtdm_actor_context *ctx = rtdm_fd_to_private(fd); ++ int ret; ++ ++ switch (request) { ++ case RTTST_RTIOC_RTDM_ACTOR_GET_CPU: ++ ctx->args.cpu = (__u32)-EINVAL; ++ ret = run_action(ctx, request); ++ if (ret) ++ break; ++ ret = rtdm_safe_copy_to_user(fd, arg, &ctx->args.cpu, ++ sizeof(ctx->args.cpu)); ++ break; ++ default: ++ ret = -ENOTTY; ++ } ++ ++ return ret; ++} ++ ++static struct rtdm_driver rtdm_basic_driver = { ++ .profile_info = RTDM_PROFILE_INFO(rtdm_test_basic, ++ RTDM_CLASS_TESTING, ++ RTDM_SUBCLASS_RTDMTEST, ++ RTTST_PROFILE_VER), ++ .device_flags = RTDM_NAMED_DEVICE | RTDM_EXCLUSIVE, ++ .device_count = 2, ++ .context_size = sizeof(struct rtdm_basic_context), ++ .ops = { ++ .open = rtdm_basic_open, ++ .close = rtdm_basic_close, ++ .ioctl_rt = rtdm_basic_ioctl_rt, ++ .ioctl_nrt = rtdm_basic_ioctl_nrt, ++ }, ++}; ++ ++static struct rtdm_driver rtdm_actor_driver = { ++ .profile_info = RTDM_PROFILE_INFO(rtdm_test_actor, ++ RTDM_CLASS_TESTING, ++ RTDM_SUBCLASS_RTDMTEST, ++ RTTST_PROFILE_VER), ++ .device_flags = RTDM_NAMED_DEVICE | RTDM_EXCLUSIVE, ++ .device_count = 1, ++ .context_size = sizeof(struct rtdm_actor_context), ++ .ops = { ++ .open = rtdm_actor_open, ++ .close = rtdm_actor_close, ++ .ioctl_rt = rtdm_actor_ioctl, ++ }, ++}; ++ ++static struct rtdm_device device[3] = { ++ [0 ... 1] = { ++ .driver = &rtdm_basic_driver, ++ .label = "rtdm%d", ++ }, ++ [2] = { ++ .driver = &rtdm_actor_driver, ++ .label = "rtdmx", ++ } ++}; ++ ++static int __init rtdm_test_init(void) ++{ ++ int i, ret; ++ ++ for (i = 0; i < ARRAY_SIZE(device); i++) { ++ ret = rtdm_dev_register(device + i); ++ if (ret) ++ goto fail; ++ } ++ ++ return 0; ++fail: ++ while (i-- > 0) ++ rtdm_dev_unregister(device + i); ++ ++ return ret; ++} ++ ++static void __exit rtdm_test_exit(void) ++{ ++ int i; ++ ++ for (i = 0; i < ARRAY_SIZE(device); i++) ++ rtdm_dev_unregister(device + i); ++} ++ ++module_init(rtdm_test_init); ++module_exit(rtdm_test_exit); +--- linux/drivers/xenomai/testing/timerbench.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/testing/timerbench.c 2021-04-07 16:01:28.017633037 +0800 +@@ -0,0 +1,529 @@ ++/* ++ * Copyright (C) 2005 Jan Kiszka . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++MODULE_DESCRIPTION("Timer latency test helper"); ++MODULE_AUTHOR("Jan Kiszka "); ++MODULE_VERSION("0.2.1"); ++MODULE_LICENSE("GPL"); ++ ++struct rt_tmbench_context { ++ int mode; ++ unsigned int period; ++ int freeze_max; ++ int warmup_loops; ++ int samples_per_sec; ++ int32_t *histogram_min; ++ int32_t *histogram_max; ++ int32_t *histogram_avg; ++ int histogram_size; ++ int bucketsize; ++ ++ rtdm_task_t timer_task; ++ ++ rtdm_timer_t timer; ++ int warmup; ++ uint64_t start_time; ++ uint64_t date; ++ struct rttst_bench_res curr; ++ ++ rtdm_event_t result_event; ++ struct rttst_interm_bench_res result; ++ ++ struct semaphore nrt_mutex; ++}; ++ ++static inline void add_histogram(struct rt_tmbench_context *ctx, ++ __s32 *histogram, __s32 addval) ++{ ++ /* bucketsize steps */ ++ int inabs = (addval >= 0 ? addval : -addval) / ctx->bucketsize; ++ histogram[inabs < ctx->histogram_size ? ++ inabs : ctx->histogram_size - 1]++; ++} ++ ++static inline long long slldiv(long long s, unsigned d) ++{ ++ return s >= 0 ? xnarch_ulldiv(s, d, NULL) : -xnarch_ulldiv(-s, d, NULL); ++} ++ ++static void eval_inner_loop(struct rt_tmbench_context *ctx, __s32 dt) ++{ ++ if (dt > ctx->curr.max) ++ ctx->curr.max = dt; ++ if (dt < ctx->curr.min) ++ ctx->curr.min = dt; ++ ctx->curr.avg += dt; ++ ++#ifdef CONFIG_IPIPE_TRACE ++ if (ctx->freeze_max && (dt > ctx->result.overall.max) && !ctx->warmup) { ++ ipipe_trace_frozen_reset(); ++ ipipe_trace_freeze(dt); ++ ctx->result.overall.max = dt; ++ } ++#endif /* CONFIG_IPIPE_TRACE */ ++ ++ ctx->date += ctx->period; ++ ++ if (!ctx->warmup && ctx->histogram_size) ++ add_histogram(ctx, ctx->histogram_avg, dt); ++ ++ /* Evaluate overruns and adjust next release date. ++ Beware of signedness! */ ++ while (dt > 0 && (unsigned long)dt > ctx->period) { ++ ctx->curr.overruns++; ++ ctx->date += ctx->period; ++ dt -= ctx->period; ++ } ++} ++ ++static void eval_outer_loop(struct rt_tmbench_context *ctx) ++{ ++ if (!ctx->warmup) { ++ if (ctx->histogram_size) { ++ add_histogram(ctx, ctx->histogram_max, ctx->curr.max); ++ add_histogram(ctx, ctx->histogram_min, ctx->curr.min); ++ } ++ ++ ctx->result.last.min = ctx->curr.min; ++ if (ctx->curr.min < ctx->result.overall.min) ++ ctx->result.overall.min = ctx->curr.min; ++ ++ ctx->result.last.max = ctx->curr.max; ++ if (ctx->curr.max > ctx->result.overall.max) ++ ctx->result.overall.max = ctx->curr.max; ++ ++ ctx->result.last.avg = ++ slldiv(ctx->curr.avg, ctx->samples_per_sec); ++ ctx->result.overall.avg += ctx->result.last.avg; ++ ctx->result.overall.overruns += ctx->curr.overruns; ++ rtdm_event_pulse(&ctx->result_event); ++ } ++ ++ if (ctx->warmup && ++ (ctx->result.overall.test_loops == ctx->warmup_loops)) { ++ ctx->result.overall.test_loops = 0; ++ ctx->warmup = 0; ++ } ++ ++ ctx->curr.min = 10000000; ++ ctx->curr.max = -10000000; ++ ctx->curr.avg = 0; ++ ctx->curr.overruns = 0; ++ ++ ctx->result.overall.test_loops++; ++} ++ ++static void timer_task_proc(void *arg) ++{ ++ struct rt_tmbench_context *ctx = arg; ++ int count, err; ++ spl_t s; ++ ++ /* first event: one millisecond from now. */ ++ ctx->date = rtdm_clock_read_monotonic() + 1000000; ++ ++ while (1) { ++ for (count = 0; count < ctx->samples_per_sec; count++) { ++ cobalt_atomic_enter(s); ++ ctx->start_time = rtdm_clock_read_monotonic(); ++ err = rtdm_task_sleep_abs(ctx->date, ++ RTDM_TIMERMODE_ABSOLUTE); ++ cobalt_atomic_leave(s); ++ if (err) ++ return; ++ ++ eval_inner_loop(ctx, ++ (__s32)(rtdm_clock_read_monotonic() - ++ ctx->date)); ++ } ++ eval_outer_loop(ctx); ++ } ++} ++ ++static void timer_proc(rtdm_timer_t *timer) ++{ ++ struct rt_tmbench_context *ctx = ++ container_of(timer, struct rt_tmbench_context, timer); ++ int err; ++ ++ do { ++ eval_inner_loop(ctx, (__s32)(rtdm_clock_read_monotonic() - ++ ctx->date)); ++ ++ ctx->start_time = rtdm_clock_read_monotonic(); ++ err = rtdm_timer_start_in_handler(&ctx->timer, ctx->date, 0, ++ RTDM_TIMERMODE_ABSOLUTE); ++ ++ if (++ctx->curr.test_loops >= ctx->samples_per_sec) { ++ ctx->curr.test_loops = 0; ++ eval_outer_loop(ctx); ++ } ++ } while (err); ++} ++ ++static int rt_tmbench_open(struct rtdm_fd *fd, int oflags) ++{ ++ struct rt_tmbench_context *ctx; ++ ++ ctx = rtdm_fd_to_private(fd); ++ ++ ctx->mode = RTTST_TMBENCH_INVALID; ++ sema_init(&ctx->nrt_mutex, 1); ++ ++ return 0; ++} ++ ++static void rt_tmbench_close(struct rtdm_fd *fd) ++{ ++ struct rt_tmbench_context *ctx; ++ ++ ctx = rtdm_fd_to_private(fd); ++ ++ down(&ctx->nrt_mutex); ++ ++ if (ctx->mode >= 0) { ++ if (ctx->mode == RTTST_TMBENCH_TASK) ++ rtdm_task_destroy(&ctx->timer_task); ++ else if (ctx->mode == RTTST_TMBENCH_HANDLER) ++ rtdm_timer_destroy(&ctx->timer); ++ ++ rtdm_event_destroy(&ctx->result_event); ++ ++ if (ctx->histogram_size) ++ kfree(ctx->histogram_min); ++ ++ ctx->mode = RTTST_TMBENCH_INVALID; ++ ctx->histogram_size = 0; ++ } ++ ++ up(&ctx->nrt_mutex); ++} ++ ++static int rt_tmbench_start(struct rtdm_fd *fd, ++ struct rt_tmbench_context *ctx, ++ struct rttst_tmbench_config __user *user_config) ++{ ++ int err = 0; ++ spl_t s; ++ ++ struct rttst_tmbench_config config_buf; ++ struct rttst_tmbench_config *config = ++ (struct rttst_tmbench_config *)user_config; ++ ++ if (rtdm_fd_is_user(fd)) { ++ if (rtdm_safe_copy_from_user ++ (fd, &config_buf,user_config, ++ sizeof(struct rttst_tmbench_config)) < 0) ++ return -EFAULT; ++ ++ config = &config_buf; ++ } ++ ++ down(&ctx->nrt_mutex); ++ ++ ctx->period = config->period; ++ ctx->warmup_loops = config->warmup_loops; ++ ctx->samples_per_sec = 1000000000 / ctx->period; ++ ctx->histogram_size = config->histogram_size; ++ ctx->freeze_max = config->freeze_max; ++ ++ if (ctx->histogram_size > 0) { ++ ctx->histogram_min = ++ kmalloc(3 * ctx->histogram_size * sizeof(int32_t), ++ GFP_KERNEL); ++ ctx->histogram_max = ++ ctx->histogram_min + config->histogram_size; ++ ctx->histogram_avg = ++ ctx->histogram_max + config->histogram_size; ++ ++ if (!ctx->histogram_min) { ++ up(&ctx->nrt_mutex); ++ return -ENOMEM; ++ } ++ ++ memset(ctx->histogram_min, 0, ++ 3 * ctx->histogram_size * sizeof(int32_t)); ++ ctx->bucketsize = config->histogram_bucketsize; ++ } ++ ++ ctx->result.overall.min = 10000000; ++ ctx->result.overall.max = -10000000; ++ ctx->result.overall.avg = 0; ++ ctx->result.overall.test_loops = 1; ++ ctx->result.overall.overruns = 0; ++ ++ ctx->warmup = 1; ++ ++ ctx->curr.min = 10000000; ++ ctx->curr.max = -10000000; ++ ctx->curr.avg = 0; ++ ctx->curr.overruns = 0; ++ ctx->mode = RTTST_TMBENCH_INVALID; ++ ++ rtdm_event_init(&ctx->result_event, 0); ++ ++ if (config->mode == RTTST_TMBENCH_TASK) { ++ err = rtdm_task_init(&ctx->timer_task, "timerbench", ++ timer_task_proc, ctx, ++ config->priority, 0); ++ if (!err) ++ ctx->mode = RTTST_TMBENCH_TASK; ++ } else { ++ rtdm_timer_init(&ctx->timer, timer_proc, ++ rtdm_fd_device(fd)->name); ++ ++ ctx->curr.test_loops = 0; ++ ++ ctx->mode = RTTST_TMBENCH_HANDLER; ++ ++ cobalt_atomic_enter(s); ++ ctx->start_time = rtdm_clock_read_monotonic(); ++ ++ /* first event: one millisecond from now. */ ++ ctx->date = ctx->start_time + 1000000; ++ ++ err = rtdm_timer_start(&ctx->timer, ctx->date, 0, ++ RTDM_TIMERMODE_ABSOLUTE); ++ cobalt_atomic_leave(s); ++ } ++ ++ up(&ctx->nrt_mutex); ++ ++ return err; ++} ++ ++static int kernel_copy_results(struct rt_tmbench_context *ctx, ++ struct rttst_overall_bench_res *res) ++{ ++ int size; ++ ++ memcpy(&res->result, &ctx->result.overall, sizeof(res->result)); ++ ++ if (ctx->histogram_size > 0) { ++ size = ctx->histogram_size * sizeof(int32_t); ++ memcpy(res->histogram_min, ctx->histogram_min, size); ++ memcpy(res->histogram_max, ctx->histogram_max, size); ++ memcpy(res->histogram_avg, ctx->histogram_avg, size); ++ kfree(ctx->histogram_min); ++ } ++ ++ return 0; ++} ++ ++static int user_copy_results(struct rt_tmbench_context *ctx, ++ struct rttst_overall_bench_res __user *u_res) ++{ ++ struct rtdm_fd *fd = rtdm_private_to_fd(ctx); ++ struct rttst_overall_bench_res res_buf; ++ int ret, size; ++ ++ ret = rtdm_safe_copy_to_user(fd, &u_res->result, ++ &ctx->result.overall, ++ sizeof(u_res->result)); ++ if (ret || ctx->histogram_size == 0) ++ return ret; ++ ++ size = ctx->histogram_size * sizeof(int32_t); ++ ++ if (rtdm_safe_copy_from_user(fd, &res_buf, u_res, sizeof(res_buf)) < 0 || ++ rtdm_safe_copy_to_user(fd, res_buf.histogram_min, ++ ctx->histogram_min, size) < 0 || ++ rtdm_safe_copy_to_user(fd, res_buf.histogram_max, ++ ctx->histogram_max, size) < 0 || ++ rtdm_safe_copy_to_user(fd, res_buf.histogram_avg, ++ ctx->histogram_avg, size) < 0) ++ return -EFAULT; ++ ++ return 0; ++} ++ ++#ifdef CONFIG_XENO_ARCH_SYS3264 ++ ++static int compat_user_copy_results(struct rt_tmbench_context *ctx, ++ struct compat_rttst_overall_bench_res __user *u_res) ++{ ++ struct compat_rttst_overall_bench_res res_buf; ++ struct rtdm_fd *fd = rtdm_private_to_fd(ctx); ++ int ret, size; ++ ++ ret = rtdm_safe_copy_to_user(fd, &u_res->result, ++ &ctx->result.overall, ++ sizeof(u_res->result)); ++ if (ret || ctx->histogram_size == 0) ++ return ret; ++ ++ size = ctx->histogram_size * sizeof(int32_t); ++ ++ if (rtdm_safe_copy_from_user(fd, &res_buf, u_res, sizeof(res_buf)) < 0 || ++ rtdm_safe_copy_to_user(fd, compat_ptr(res_buf.histogram_min), ++ ctx->histogram_min, size) < 0 || ++ rtdm_safe_copy_to_user(fd, compat_ptr(res_buf.histogram_max), ++ ctx->histogram_max, size) < 0 || ++ rtdm_safe_copy_to_user(fd, compat_ptr(res_buf.histogram_avg), ++ ctx->histogram_avg, size) < 0) ++ return -EFAULT; ++ ++ return 0; ++} ++ ++#endif /* CONFIG_XENO_ARCH_SYS3264 */ ++ ++static int rt_tmbench_stop(struct rt_tmbench_context *ctx, void *u_res) ++{ ++ struct rtdm_fd *fd = rtdm_private_to_fd(ctx); ++ int ret; ++ ++ down(&ctx->nrt_mutex); ++ ++ if (ctx->mode < 0) { ++ up(&ctx->nrt_mutex); ++ return -EINVAL; ++ } ++ ++ if (ctx->mode == RTTST_TMBENCH_TASK) ++ rtdm_task_destroy(&ctx->timer_task); ++ else if (ctx->mode == RTTST_TMBENCH_HANDLER) ++ rtdm_timer_destroy(&ctx->timer); ++ ++ rtdm_event_destroy(&ctx->result_event); ++ ++ ctx->mode = RTTST_TMBENCH_INVALID; ++ ++ ctx->result.overall.avg = ++ slldiv(ctx->result.overall.avg, ++ ((ctx->result.overall.test_loops) > 1 ? ++ ctx->result.overall.test_loops : 2) - 1); ++ ++ if (rtdm_fd_is_user(fd)) { ++#ifdef CONFIG_XENO_ARCH_SYS3264 ++ if (rtdm_fd_is_compat(fd)) ++ ret = compat_user_copy_results(ctx, u_res); ++ else ++#endif ++ ret = user_copy_results(ctx, u_res); ++ } else ++ ret = kernel_copy_results(ctx, u_res); ++ ++ if (ctx->histogram_size > 0) ++ kfree(ctx->histogram_min); ++ ++ up(&ctx->nrt_mutex); ++ ++ return ret; ++} ++ ++static int rt_tmbench_ioctl_nrt(struct rtdm_fd *fd, ++ unsigned int request, void __user *arg) ++{ ++ struct rt_tmbench_context *ctx; ++ int err = 0; ++ ++ ctx = rtdm_fd_to_private(fd); ++ ++ switch (request) { ++ case RTTST_RTIOC_TMBENCH_START: ++ err = rt_tmbench_start(fd, ctx, arg); ++ break; ++ ++ COMPAT_CASE(RTTST_RTIOC_TMBENCH_STOP): ++ err = rt_tmbench_stop(ctx, arg); ++ break; ++ default: ++ err = -ENOSYS; ++ } ++ ++ return err; ++} ++ ++static int rt_tmbench_ioctl_rt(struct rtdm_fd *fd, ++ unsigned int request, void __user *arg) ++{ ++ struct rt_tmbench_context *ctx; ++ int err = 0; ++ ++ ctx = rtdm_fd_to_private(fd); ++ ++ switch (request) { ++ case RTTST_RTIOC_INTERM_BENCH_RES: ++ err = rtdm_event_wait(&ctx->result_event); ++ if (err) ++ return err; ++ ++ if (rtdm_fd_is_user(fd)) { ++ struct rttst_interm_bench_res __user *user_res = arg; ++ ++ err = rtdm_safe_copy_to_user(fd, user_res, ++ &ctx->result, ++ sizeof(*user_res)); ++ } else { ++ struct rttst_interm_bench_res *res = (void *)arg; ++ ++ memcpy(res, &ctx->result, sizeof(*res)); ++ } ++ ++ break; ++ ++ default: ++ err = -ENOSYS; ++ } ++ ++ return err; ++} ++ ++static struct rtdm_driver timerbench_driver = { ++ .profile_info = RTDM_PROFILE_INFO(timerbench, ++ RTDM_CLASS_TESTING, ++ RTDM_SUBCLASS_TIMERBENCH, ++ RTTST_PROFILE_VER), ++ .device_flags = RTDM_NAMED_DEVICE, ++ .device_count = 1, ++ .context_size = sizeof(struct rt_tmbench_context), ++ .ops = { ++ .open = rt_tmbench_open, ++ .close = rt_tmbench_close, ++ .ioctl_rt = rt_tmbench_ioctl_rt, ++ .ioctl_nrt = rt_tmbench_ioctl_nrt, ++ }, ++}; ++ ++static struct rtdm_device device = { ++ .driver = &timerbench_driver, ++ .label = "timerbench", ++}; ++ ++static int __init __timerbench_init(void) ++{ ++ return rtdm_dev_register(&device); ++} ++ ++static void __timerbench_exit(void) ++{ ++ rtdm_dev_unregister(&device); ++} ++ ++module_init(__timerbench_init); ++module_exit(__timerbench_exit); +--- linux/drivers/xenomai/spi/spi-bcm2835.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/spi/spi-bcm2835.c 2021-04-07 16:01:28.013633043 +0800 +@@ -0,0 +1,699 @@ ++/** ++ * I/O handling lifted from drivers/spi/spi-bcm2835.c: ++ * Copyright (C) 2012 Chris Boot ++ * Copyright (C) 2013 Stephen Warren ++ * Copyright (C) 2015 Martin Sperl ++ * ++ * RTDM integration by: ++ * Copyright (C) 2016 Philippe Gerum ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "spi-master.h" ++ ++#define RTDM_SUBCLASS_BCM2835 1 ++ ++/* SPI register offsets */ ++#define BCM2835_SPI_CS 0x00 ++#define BCM2835_SPI_FIFO 0x04 ++#define BCM2835_SPI_CLK 0x08 ++#define BCM2835_SPI_DLEN 0x0c ++#define BCM2835_SPI_LTOH 0x10 ++#define BCM2835_SPI_DC 0x14 ++ ++/* Bitfields in CS */ ++#define BCM2835_SPI_CS_LEN_LONG 0x02000000 ++#define BCM2835_SPI_CS_DMA_LEN 0x01000000 ++#define BCM2835_SPI_CS_CSPOL2 0x00800000 ++#define BCM2835_SPI_CS_CSPOL1 0x00400000 ++#define BCM2835_SPI_CS_CSPOL0 0x00200000 ++#define BCM2835_SPI_CS_RXF 0x00100000 ++#define BCM2835_SPI_CS_RXR 0x00080000 ++#define BCM2835_SPI_CS_TXD 0x00040000 ++#define BCM2835_SPI_CS_RXD 0x00020000 ++#define BCM2835_SPI_CS_DONE 0x00010000 ++#define BCM2835_SPI_CS_LEN 0x00002000 ++#define BCM2835_SPI_CS_REN 0x00001000 ++#define BCM2835_SPI_CS_ADCS 0x00000800 ++#define BCM2835_SPI_CS_INTR 0x00000400 ++#define BCM2835_SPI_CS_INTD 0x00000200 ++#define BCM2835_SPI_CS_DMAEN 0x00000100 ++#define BCM2835_SPI_CS_TA 0x00000080 ++#define BCM2835_SPI_CS_CSPOL 0x00000040 ++#define BCM2835_SPI_CS_CLEAR_RX 0x00000020 ++#define BCM2835_SPI_CS_CLEAR_TX 0x00000010 ++#define BCM2835_SPI_CS_CPOL 0x00000008 ++#define BCM2835_SPI_CS_CPHA 0x00000004 ++#define BCM2835_SPI_CS_CS_10 0x00000002 ++#define BCM2835_SPI_CS_CS_01 0x00000001 ++ ++#define BCM2835_SPI_POLLING_LIMIT_US 30 ++#define BCM2835_SPI_POLLING_JIFFIES 2 ++#define BCM2835_SPI_DMA_MIN_LENGTH 96 ++#define BCM2835_SPI_MODE_BITS (SPI_CPOL | SPI_CPHA | SPI_CS_HIGH \ ++ | SPI_NO_CS | SPI_3WIRE) ++ ++struct spi_master_bcm2835 { ++ struct rtdm_spi_master master; ++ void __iomem *regs; ++ struct clk *clk; ++ unsigned long clk_hz; ++ rtdm_irq_t irqh; ++ const u8 *tx_buf; ++ u8 *rx_buf; ++ int tx_len; ++ int rx_len; ++ rtdm_event_t transfer_done; ++}; ++ ++struct spi_slave_bcm2835 { ++ struct rtdm_spi_remote_slave slave; ++ void *io_virt; ++ dma_addr_t io_dma; ++ size_t io_len; ++}; ++ ++static inline struct spi_slave_bcm2835 * ++to_slave_bcm2835(struct rtdm_spi_remote_slave *slave) ++{ ++ return container_of(slave, struct spi_slave_bcm2835, slave); ++} ++ ++static inline struct spi_master_bcm2835 * ++to_master_bcm2835(struct rtdm_spi_remote_slave *slave) ++{ ++ return container_of(slave->master, struct spi_master_bcm2835, master); ++} ++ ++static inline struct device * ++master_to_kdev(struct rtdm_spi_master *master) ++{ ++ return &master->kmaster->dev; ++} ++ ++static inline u32 bcm2835_rd(struct spi_master_bcm2835 *spim, ++ unsigned int reg) ++{ ++ return readl(spim->regs + reg); ++} ++ ++static inline void bcm2835_wr(struct spi_master_bcm2835 *spim, ++ unsigned int reg, u32 val) ++{ ++ writel(val, spim->regs + reg); ++} ++ ++static inline void bcm2835_rd_fifo(struct spi_master_bcm2835 *spim) ++{ ++ u8 byte; ++ ++ while (spim->rx_len > 0 && ++ (bcm2835_rd(spim, BCM2835_SPI_CS) & BCM2835_SPI_CS_RXD)) { ++ byte = bcm2835_rd(spim, BCM2835_SPI_FIFO); ++ if (spim->rx_buf) ++ *spim->rx_buf++ = byte; ++ spim->rx_len--; ++ } ++} ++ ++static inline void bcm2835_wr_fifo(struct spi_master_bcm2835 *spim) ++{ ++ u8 byte; ++ ++ while (spim->tx_len > 0 && ++ (bcm2835_rd(spim, BCM2835_SPI_CS) & BCM2835_SPI_CS_TXD)) { ++ byte = spim->tx_buf ? *spim->tx_buf++ : 0; ++ bcm2835_wr(spim, BCM2835_SPI_FIFO, byte); ++ spim->tx_len--; ++ } ++} ++ ++static void bcm2835_reset_hw(struct spi_master_bcm2835 *spim) ++{ ++ u32 cs = bcm2835_rd(spim, BCM2835_SPI_CS); ++ ++ cs &= ~(BCM2835_SPI_CS_INTR | ++ BCM2835_SPI_CS_INTD | ++ BCM2835_SPI_CS_DMAEN | ++ BCM2835_SPI_CS_TA); ++ cs |= BCM2835_SPI_CS_CLEAR_RX | BCM2835_SPI_CS_CLEAR_TX; ++ ++ /* Reset the SPI block. */ ++ bcm2835_wr(spim, BCM2835_SPI_CS, cs); ++ bcm2835_wr(spim, BCM2835_SPI_DLEN, 0); ++} ++ ++static int bcm2835_spi_interrupt(rtdm_irq_t *irqh) ++{ ++ struct spi_master_bcm2835 *spim; ++ ++ spim = rtdm_irq_get_arg(irqh, struct spi_master_bcm2835); ++ ++ bcm2835_rd_fifo(spim); ++ bcm2835_wr_fifo(spim); ++ ++ if (bcm2835_rd(spim, BCM2835_SPI_CS) & BCM2835_SPI_CS_DONE) { ++ bcm2835_reset_hw(spim); ++ rtdm_event_signal(&spim->transfer_done); ++ } ++ ++ return RTDM_IRQ_HANDLED; ++} ++ ++static int bcm2835_configure(struct rtdm_spi_remote_slave *slave) ++{ ++ struct spi_master_bcm2835 *spim = to_master_bcm2835(slave); ++ struct rtdm_spi_config *config = &slave->config; ++ unsigned long spi_hz, cdiv; ++ u32 cs; ++ ++ /* Set clock polarity and phase. */ ++ ++ cs = bcm2835_rd(spim, BCM2835_SPI_CS); ++ ++ cs &= ~(BCM2835_SPI_CS_CPOL | BCM2835_SPI_CS_CPHA); ++ if (config->mode & SPI_CPOL) ++ cs |= BCM2835_SPI_CS_CPOL; ++ if (config->mode & SPI_CPHA) ++ cs |= BCM2835_SPI_CS_CPHA; ++ ++ bcm2835_wr(spim, BCM2835_SPI_CS, cs); ++ ++ /* Set clock frequency. */ ++ ++ spi_hz = config->speed_hz; ++ ++ /* ++ * Fastest clock rate is of the APB clock, which is close to ++ * clk_hz / 2. ++ */ ++ if (spi_hz >= spim->clk_hz / 2) ++ cdiv = 2; ++ else if (spi_hz) { ++ cdiv = DIV_ROUND_UP(spim->clk_hz, spi_hz); /* Multiple of 2. */ ++ cdiv += (cdiv % 2); ++ if (cdiv >= 65536) ++ cdiv = 0; ++ } else ++ cdiv = 0; ++ ++ bcm2835_wr(spim, BCM2835_SPI_CLK, cdiv); ++ ++ return 0; ++} ++ ++static void bcm2835_chip_select(struct rtdm_spi_remote_slave *slave, ++ bool active) ++{ ++ struct spi_master_bcm2835 *spim = to_master_bcm2835(slave); ++ struct rtdm_spi_config *config = &slave->config; ++ u32 cs; ++ ++ cs = bcm2835_rd(spim, BCM2835_SPI_CS); ++ ++ if (config->mode & SPI_CS_HIGH) { ++ cs |= BCM2835_SPI_CS_CSPOL; ++ cs |= BCM2835_SPI_CS_CSPOL0 << slave->chip_select; ++ } else { ++ cs &= ~BCM2835_SPI_CS_CSPOL; ++ cs &= ~(BCM2835_SPI_CS_CSPOL0 << slave->chip_select); ++ } ++ ++ /* "active" is the logical state, not the impedance level. */ ++ ++ if (active) { ++ if (config->mode & SPI_NO_CS) ++ cs |= BCM2835_SPI_CS_CS_10 | BCM2835_SPI_CS_CS_01; ++ else { ++ cs &= ~(BCM2835_SPI_CS_CS_10 | BCM2835_SPI_CS_CS_01); ++ cs |= slave->chip_select; ++ } ++ } else { ++ /* Put HW-CS into deselected state. */ ++ cs &= ~BCM2835_SPI_CS_CSPOL; ++ /* Use the "undefined" chip-select as precaution. */ ++ cs |= BCM2835_SPI_CS_CS_10 | BCM2835_SPI_CS_CS_01; ++ } ++ ++ bcm2835_wr(spim, BCM2835_SPI_CS, cs); ++} ++ ++static int do_transfer_irq(struct rtdm_spi_remote_slave *slave) ++{ ++ struct spi_master_bcm2835 *spim = to_master_bcm2835(slave); ++ int ret; ++ u32 cs; ++ ++ cs = bcm2835_rd(spim, BCM2835_SPI_CS); ++ ++ cs &= ~BCM2835_SPI_CS_REN; ++ if ((slave->config.mode & SPI_3WIRE) && spim->rx_buf) ++ cs |= BCM2835_SPI_CS_REN; ++ ++ cs |= BCM2835_SPI_CS_TA; ++ ++ /* ++ * Fill in fifo if we have gpio-cs note that there have been ++ * rare events where the native-CS flapped for <1us which may ++ * change the behaviour with gpio-cs this does not happen, so ++ * it is implemented only for this case. ++ */ ++ if (gpio_is_valid(slave->cs_gpio)) { ++ /* Set dummy CS, ->chip_select() was not called. */ ++ cs |= BCM2835_SPI_CS_CS_10 | BCM2835_SPI_CS_CS_01; ++ /* Enable SPI block, before filling FIFO. */ ++ bcm2835_wr(spim, BCM2835_SPI_CS, cs); ++ bcm2835_wr_fifo(spim); ++ } ++ ++ /* Enable interrupts last, wait for transfer completion. */ ++ cs |= BCM2835_SPI_CS_INTR | BCM2835_SPI_CS_INTD; ++ bcm2835_wr(spim, BCM2835_SPI_CS, cs); ++ ++ ret = rtdm_event_wait(&spim->transfer_done); ++ if (ret) { ++ bcm2835_reset_hw(spim); ++ return ret; ++ } ++ ++ return 0; ++} ++ ++static int bcm2835_transfer_iobufs(struct rtdm_spi_remote_slave *slave) ++{ ++ struct spi_master_bcm2835 *spim = to_master_bcm2835(slave); ++ struct spi_slave_bcm2835 *bcm = to_slave_bcm2835(slave); ++ ++ if (bcm->io_len == 0) ++ return -EINVAL; /* No I/O buffers set. */ ++ ++ spim->tx_len = bcm->io_len / 2; ++ spim->rx_len = spim->tx_len; ++ spim->tx_buf = bcm->io_virt + spim->rx_len; ++ spim->rx_buf = bcm->io_virt; ++ ++ return do_transfer_irq(slave); ++} ++ ++static int bcm2835_transfer_iobufs_n(struct rtdm_spi_remote_slave *slave, ++ int len) ++{ ++ struct spi_master_bcm2835 *spim = to_master_bcm2835(slave); ++ struct spi_slave_bcm2835 *bcm = to_slave_bcm2835(slave); ++ ++ if ((bcm->io_len == 0) || ++ (len <= 0) || (len > (bcm->io_len / 2))) ++ return -EINVAL; ++ ++ spim->tx_len = len; ++ spim->rx_len = len; ++ spim->tx_buf = bcm->io_virt + bcm->io_len / 2; ++ spim->rx_buf = bcm->io_virt; ++ ++ return do_transfer_irq(slave); ++} ++ ++static ssize_t bcm2835_read(struct rtdm_spi_remote_slave *slave, ++ void *rx, size_t len) ++{ ++ struct spi_master_bcm2835 *spim = to_master_bcm2835(slave); ++ ++ spim->tx_len = len; ++ spim->rx_len = len; ++ spim->tx_buf = NULL; ++ spim->rx_buf = rx; ++ ++ return do_transfer_irq(slave) ?: len; ++} ++ ++static ssize_t bcm2835_write(struct rtdm_spi_remote_slave *slave, ++ const void *tx, size_t len) ++{ ++ struct spi_master_bcm2835 *spim = to_master_bcm2835(slave); ++ ++ spim->tx_len = len; ++ spim->rx_len = len; ++ spim->tx_buf = tx; ++ spim->rx_buf = NULL; ++ ++ return do_transfer_irq(slave) ?: len; ++} ++ ++static int set_iobufs(struct spi_slave_bcm2835 *bcm, size_t len) ++{ ++ dma_addr_t dma; ++ void *p; ++ ++ if (len == 0) ++ return -EINVAL; ++ ++ len = L1_CACHE_ALIGN(len) * 2; ++ if (len == bcm->io_len) ++ return 0; ++ ++ if (bcm->io_len) ++ return -EINVAL; /* I/O buffers may not be resized. */ ++ ++ /* ++ * Since we need the I/O buffers to be set for starting a ++ * transfer, there is no need for serializing this routine and ++ * transfer_iobufs(), provided io_len is set last. ++ * ++ * NOTE: We don't need coherent memory until we actually get ++ * DMA transfers working, this code is a bit ahead of ++ * schedule. ++ * ++ * Revisit: this assumes DMA mask is 4Gb. ++ */ ++ p = dma_alloc_coherent(NULL, len, &dma, GFP_KERNEL); ++ if (p == NULL) ++ return -ENOMEM; ++ ++ bcm->io_dma = dma; ++ bcm->io_virt = p; ++ smp_mb(); ++ /* ++ * May race with transfer_iobufs(), must be assigned after all ++ * the rest is set up, enforcing a membar. ++ */ ++ bcm->io_len = len; ++ ++ return 0; ++} ++ ++static int bcm2835_set_iobufs(struct rtdm_spi_remote_slave *slave, ++ struct rtdm_spi_iobufs *p) ++{ ++ struct spi_slave_bcm2835 *bcm = to_slave_bcm2835(slave); ++ int ret; ++ ++ ret = set_iobufs(bcm, p->io_len); ++ if (ret) ++ return ret; ++ ++ p->i_offset = 0; ++ p->o_offset = bcm->io_len / 2; ++ p->map_len = bcm->io_len; ++ ++ return 0; ++} ++ ++static int bcm2835_mmap_iobufs(struct rtdm_spi_remote_slave *slave, ++ struct vm_area_struct *vma) ++{ ++ struct spi_slave_bcm2835 *bcm = to_slave_bcm2835(slave); ++ ++ /* ++ * dma_alloc_coherent() delivers non-cached memory, make sure ++ * to return consistent mapping attributes. Typically, mixing ++ * memory attributes across address spaces referring to the ++ * same physical area is architecturally wrong on ARM. ++ */ ++ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); ++ ++ return rtdm_mmap_kmem(vma, bcm->io_virt); ++} ++ ++static void bcm2835_mmap_release(struct rtdm_spi_remote_slave *slave) ++{ ++ struct spi_slave_bcm2835 *bcm = to_slave_bcm2835(slave); ++ ++ dma_free_coherent(NULL, bcm->io_len, ++ bcm->io_virt, bcm->io_dma); ++ bcm->io_len = 0; ++} ++ ++static int gpio_match_name(struct gpio_chip *chip, void *data) ++{ ++ return !strcmp(chip->label, data); ++} ++ ++static int find_cs_gpio(struct spi_device *spi) ++{ ++ struct spi_master *kmaster = spi->master; ++ u32 pingroup_index, pin, pin_index; ++ struct device_node *pins; ++ struct gpio_chip *chip; ++ int ret; ++ ++ if (gpio_is_valid(spi->cs_gpio)) { ++ dev_info(&spi->dev, "using GPIO%i for CS%d\n", ++ spi->cs_gpio, spi->chip_select); ++ return 0; ++ } ++ ++ /* Translate native CS to GPIO. */ ++ ++ for (pingroup_index = 0; ++ (pins = of_parse_phandle(kmaster->dev.of_node, ++ "pinctrl-0", pingroup_index)) != 0; pingroup_index++) { ++ for (pin_index = 0; ++ of_property_read_u32_index(pins, "brcm,pins", ++ pin_index, &pin) == 0; pin_index++) { ++ if ((spi->chip_select == 0 && ++ (pin == 8 || pin == 36 || pin == 46)) || ++ (spi->chip_select == 1 && ++ (pin == 7 || pin == 35))) { ++ spi->cs_gpio = pin; ++ break; ++ } ++ } ++ of_node_put(pins); ++ } ++ ++ /* If that failed, assume GPIOs 7-11 are used */ ++ if (!gpio_is_valid(spi->cs_gpio) ) { ++ chip = gpiochip_find("pinctrl-bcm2835", gpio_match_name); ++ if (chip == NULL) ++ return 0; ++ ++ spi->cs_gpio = chip->base + 8 - spi->chip_select; ++ } ++ ++ dev_info(&spi->dev, ++ "setting up native-CS%i as GPIO %i\n", ++ spi->chip_select, spi->cs_gpio); ++ ++ ret = gpio_direction_output(spi->cs_gpio, ++ (spi->mode & SPI_CS_HIGH) ? 0 : 1); ++ if (ret) { ++ dev_err(&spi->dev, ++ "could not set CS%i gpio %i as output: %i", ++ spi->chip_select, spi->cs_gpio, ret); ++ return ret; ++ } ++ ++ /* ++ * Force value on GPIO in case the pin controller does not ++ * handle that properly when switching to output mode. ++ */ ++ gpio_set_value(spi->cs_gpio, (spi->mode & SPI_CS_HIGH) ? 0 : 1); ++ ++ return 0; ++} ++ ++static struct rtdm_spi_remote_slave * ++bcm2835_attach_slave(struct rtdm_spi_master *master, struct spi_device *spi) ++{ ++ struct spi_slave_bcm2835 *bcm; ++ int ret; ++ ++ if (spi->chip_select > 1) { ++ /* ++ * Error in the case of native CS requested with CS > ++ * 1 officially there is a CS2, but it is not ++ * documented which GPIO is connected with that... ++ */ ++ dev_err(&spi->dev, ++ "%s: only two native chip-selects are supported\n", ++ __func__); ++ return ERR_PTR(-EINVAL); ++ } ++ ++ ret = find_cs_gpio(spi); ++ if (ret) ++ return ERR_PTR(ret); ++ ++ bcm = kzalloc(sizeof(*bcm), GFP_KERNEL); ++ if (bcm == NULL) ++ return ERR_PTR(-ENOMEM); ++ ++ ret = rtdm_spi_add_remote_slave(&bcm->slave, master, spi); ++ if (ret) { ++ dev_err(&spi->dev, ++ "%s: failed to attach slave\n", __func__); ++ kfree(bcm); ++ return ERR_PTR(ret); ++ } ++ ++ return &bcm->slave; ++} ++ ++static void bcm2835_detach_slave(struct rtdm_spi_remote_slave *slave) ++{ ++ struct spi_slave_bcm2835 *bcm = to_slave_bcm2835(slave); ++ ++ rtdm_spi_remove_remote_slave(slave); ++ kfree(bcm); ++} ++ ++static struct rtdm_spi_master_ops bcm2835_master_ops = { ++ .configure = bcm2835_configure, ++ .chip_select = bcm2835_chip_select, ++ .set_iobufs = bcm2835_set_iobufs, ++ .mmap_iobufs = bcm2835_mmap_iobufs, ++ .mmap_release = bcm2835_mmap_release, ++ .transfer_iobufs = bcm2835_transfer_iobufs, ++ .transfer_iobufs_n = bcm2835_transfer_iobufs_n, ++ .write = bcm2835_write, ++ .read = bcm2835_read, ++ .attach_slave = bcm2835_attach_slave, ++ .detach_slave = bcm2835_detach_slave, ++}; ++ ++static int bcm2835_spi_probe(struct platform_device *pdev) ++{ ++ struct spi_master_bcm2835 *spim; ++ struct rtdm_spi_master *master; ++ struct spi_master *kmaster; ++ struct resource *r; ++ int ret, irq; ++ ++ dev_dbg(&pdev->dev, "%s: entered\n", __func__); ++ ++ master = rtdm_spi_alloc_master(&pdev->dev, ++ struct spi_master_bcm2835, master); ++ if (master == NULL) ++ return -ENOMEM; ++ ++ master->subclass = RTDM_SUBCLASS_BCM2835; ++ master->ops = &bcm2835_master_ops; ++ platform_set_drvdata(pdev, master); ++ ++ kmaster = master->kmaster; ++ kmaster->mode_bits = BCM2835_SPI_MODE_BITS; ++ kmaster->bits_per_word_mask = SPI_BPW_MASK(8); ++ kmaster->num_chipselect = 2; ++ kmaster->dev.of_node = pdev->dev.of_node; ++ ++ spim = container_of(master, struct spi_master_bcm2835, master); ++ rtdm_event_init(&spim->transfer_done, 0); ++ ++ r = platform_get_resource(pdev, IORESOURCE_MEM, 0); ++ spim->regs = devm_ioremap_resource(&pdev->dev, r); ++ if (IS_ERR(spim->regs)) { ++ dev_err(&pdev->dev, "%s: cannot map I/O memory\n", __func__); ++ ret = PTR_ERR(spim->regs); ++ goto fail; ++ } ++ ++ spim->clk = devm_clk_get(&pdev->dev, NULL); ++ if (IS_ERR(spim->clk)) { ++ ret = PTR_ERR(spim->clk); ++ goto fail; ++ } ++ ++ spim->clk_hz = clk_get_rate(spim->clk); ++ ++ irq = irq_of_parse_and_map(pdev->dev.of_node, 0); ++ if (irq <= 0) { ++ ret = irq ?: -ENODEV; ++ goto fail; ++ } ++ ++ clk_prepare_enable(spim->clk); ++ ++ /* Initialise the hardware with the default polarities */ ++ bcm2835_wr(spim, BCM2835_SPI_CS, ++ BCM2835_SPI_CS_CLEAR_RX | BCM2835_SPI_CS_CLEAR_TX); ++ ++ ret = rtdm_irq_request(&spim->irqh, irq, ++ bcm2835_spi_interrupt, 0, ++ dev_name(&pdev->dev), spim); ++ if (ret) { ++ dev_err(&pdev->dev, "%s: cannot request IRQ%d\n", ++ __func__, irq); ++ goto fail_unclk; ++ } ++ ++ ret = rtdm_spi_add_master(&spim->master); ++ if (ret) { ++ dev_err(&pdev->dev, "%s: failed to add master\n", ++ __func__); ++ goto fail_unclk; ++ } ++ ++ return 0; ++ ++fail_unclk: ++ clk_disable_unprepare(spim->clk); ++fail: ++ spi_master_put(kmaster); ++ ++ return ret; ++} ++ ++static int bcm2835_spi_remove(struct platform_device *pdev) ++{ ++ struct rtdm_spi_master *master = platform_get_drvdata(pdev); ++ struct spi_master_bcm2835 *spim; ++ ++ dev_dbg(&pdev->dev, "%s: entered\n", __func__); ++ ++ spim = container_of(master, struct spi_master_bcm2835, master); ++ ++ /* Clear FIFOs, and disable the HW block */ ++ bcm2835_wr(spim, BCM2835_SPI_CS, ++ BCM2835_SPI_CS_CLEAR_RX | BCM2835_SPI_CS_CLEAR_TX); ++ ++ rtdm_irq_free(&spim->irqh); ++ ++ clk_disable_unprepare(spim->clk); ++ ++ rtdm_spi_remove_master(master); ++ ++ return 0; ++} ++ ++static const struct of_device_id bcm2835_spi_match[] = { ++ { ++ .compatible = "brcm,bcm2835-spi", ++ }, ++ { /* Sentinel */ }, ++}; ++MODULE_DEVICE_TABLE(of, bcm2835_spi_match); ++ ++static struct platform_driver bcm2835_spi_driver = { ++ .driver = { ++ .name = "spi-bcm2835", ++ .of_match_table = bcm2835_spi_match, ++ }, ++ .probe = bcm2835_spi_probe, ++ .remove = bcm2835_spi_remove, ++}; ++module_platform_driver(bcm2835_spi_driver); ++ ++MODULE_LICENSE("GPL"); +--- linux/drivers/xenomai/spi/spi-omap2-mcspi-rt.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/spi/spi-omap2-mcspi-rt.c 2021-04-07 16:01:28.008633050 +0800 +@@ -0,0 +1,999 @@ ++/** ++ * I/O handling lifted from drivers/spi/spi-omap2-mcspi.c: ++ * Copyright (C) 2019 Laurentiu-Cristian Duca ++ * ++ * RTDM integration by: ++ * Copyright (C) 2016 Philippe Gerum ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "spi-master.h" ++ ++#define RTDM_SUBCLASS_OMAP2_MCSPI 3 ++ ++#define OMAP4_MCSPI_REG_OFFSET 0x100 ++#define OMAP2_MCSPI_SPI_MODE_BITS (SPI_CPOL | SPI_CPHA | SPI_CS_HIGH) ++ ++#define OMAP2_MCSPI_MAX_FREQ 48000000 ++#define OMAP2_MCSPI_DRIVER_MAX_FREQ 40000000 ++#define OMAP2_MCSPI_MAX_DIVIDER 4096 ++#define OMAP2_MCSPI_MAX_FIFODEPTH 64 ++#define OMAP2_MCSPI_MAX_FIFOWCNT 0xFFFF ++#define SPI_AUTOSUSPEND_TIMEOUT 2000 ++#define PM_NEGATIVE_DELAY -2000 ++ ++#define OMAP2_MCSPI_REVISION 0x00 ++#define OMAP2_MCSPI_SYSCONFIG 0x10 ++#define OMAP2_MCSPI_SYSSTATUS 0x14 ++#define OMAP2_MCSPI_IRQSTATUS 0x18 ++#define OMAP2_MCSPI_IRQENABLE 0x1c ++#define OMAP2_MCSPI_WAKEUPENABLE 0x20 ++#define OMAP2_MCSPI_SYST 0x24 ++#define OMAP2_MCSPI_MODULCTRL 0x28 ++#define OMAP2_MCSPI_XFERLEVEL 0x7c ++ ++/* per-channel (chip select) banks, 0x14 bytes each, first is: */ ++#define OMAP2_MCSPI_CHANNELBANK_SIZE 0x14 ++#define OMAP2_MCSPI_CHCONF0 0x2c ++#define OMAP2_MCSPI_CHSTAT0 0x30 ++#define OMAP2_MCSPI_CHCTRL0 0x34 ++#define OMAP2_MCSPI_TX0 0x38 ++#define OMAP2_MCSPI_RX0 0x3c ++ ++/* per-register bitmasks: */ ++#define OMAP2_MCSPI_IRQSTATUS_EOW BIT(17) ++#define OMAP2_MCSPI_IRQSTATUS_RX1_FULL BIT(6) ++#define OMAP2_MCSPI_IRQSTATUS_TX1_EMPTY BIT(4) ++#define OMAP2_MCSPI_IRQSTATUS_RX0_FULL BIT(2) ++#define OMAP2_MCSPI_IRQSTATUS_TX0_EMPTY BIT(0) ++ ++#define OMAP2_MCSPI_IRQENABLE_EOW BIT(17) ++#define OMAP2_MCSPI_IRQENABLE_RX1_FULL BIT(6) ++#define OMAP2_MCSPI_IRQENABLE_TX1_EMPTY BIT(4) ++#define OMAP2_MCSPI_IRQENABLE_RX0_FULL BIT(2) ++#define OMAP2_MCSPI_IRQENABLE_TX0_EMPTY BIT(0) ++ ++#define OMAP2_MCSPI_MODULCTRL_SINGLE BIT(0) ++#define OMAP2_MCSPI_MODULCTRL_MS BIT(2) ++#define OMAP2_MCSPI_MODULCTRL_STEST BIT(3) ++ ++#define OMAP2_MCSPI_CHCONF_PHA BIT(0) ++#define OMAP2_MCSPI_CHCONF_POL BIT(1) ++#define OMAP2_MCSPI_CHCONF_CLKD_MASK (0x0f << 2) ++#define OMAP2_MCSPI_CHCONF_EPOL BIT(6) ++#define OMAP2_MCSPI_CHCONF_WL_MASK (0x1f << 7) ++#define OMAP2_MCSPI_CHCONF_TRM_RX_ONLY BIT(12) ++#define OMAP2_MCSPI_CHCONF_TRM_TX_ONLY BIT(13) ++#define OMAP2_MCSPI_CHCONF_TRM_MASK (0x03 << 12) ++#define OMAP2_MCSPI_CHCONF_DMAW BIT(14) ++#define OMAP2_MCSPI_CHCONF_DMAR BIT(15) ++#define OMAP2_MCSPI_CHCONF_DPE0 BIT(16) ++#define OMAP2_MCSPI_CHCONF_DPE1 BIT(17) ++#define OMAP2_MCSPI_CHCONF_IS BIT(18) ++#define OMAP2_MCSPI_CHCONF_TURBO BIT(19) ++#define OMAP2_MCSPI_CHCONF_FORCE BIT(20) ++#define OMAP2_MCSPI_CHCONF_FFET BIT(27) ++#define OMAP2_MCSPI_CHCONF_FFER BIT(28) ++#define OMAP2_MCSPI_CHCONF_CLKG BIT(29) ++ ++#define OMAP2_MCSPI_CHSTAT_RXS BIT(0) ++#define OMAP2_MCSPI_CHSTAT_TXS BIT(1) ++#define OMAP2_MCSPI_CHSTAT_EOT BIT(2) ++#define OMAP2_MCSPI_CHSTAT_TXFFE BIT(3) ++ ++#define OMAP2_MCSPI_CHCTRL_EN BIT(0) ++#define OMAP2_MCSPI_CHCTRL_EXTCLK_MASK (0xff << 8) ++ ++#define OMAP2_MCSPI_WAKEUPENABLE_WKEN BIT(0) ++ ++#define OMAP2_MCSPI_SYSCONFIG_CLOCKACTIVITY_MASK (0x3 << 8) ++#define OMAP2_MCSPI_SYSCONFIG_SIDLEMODE_MASK (0x3 << 3) ++#define OMAP2_MCSPI_SYSCONFIG_SOFTRESET BIT(1) ++#define OMAP2_MCSPI_SYSCONFIG_AUTOIDLE BIT(0) ++ ++#define OMAP2_MCSPI_SYSSTATUS_RESETDONE BIT(0) ++ ++/* current version supports max 2 CS per module */ ++#define OMAP2_MCSPI_CS_N 2 ++ ++#define MCSPI_PINDIR_D0_IN_D1_OUT 0 ++#define MCSPI_PINDIR_D0_OUT_D1_IN 1 ++ ++struct omap2_mcspi_platform_config { ++ unsigned short num_cs; ++ unsigned int regs_offset; ++ unsigned int pin_dir:1; ++}; ++ ++struct omap2_mcspi_cs { ++ /* CS channel */ ++ void __iomem *regs; ++ unsigned long phys; ++ u8 chosen; ++}; ++ ++struct spi_master_omap2_mcspi { ++ struct rtdm_spi_master master; ++ void __iomem *regs; ++ unsigned long phys; ++ rtdm_irq_t irqh; ++ const u8 *tx_buf; ++ u8 *rx_buf; ++ int tx_len; ++ int rx_len; ++ int fifo_depth; ++ rtdm_event_t transfer_done; ++ unsigned int pin_dir:1; ++ struct omap2_mcspi_cs cs[OMAP2_MCSPI_CS_N]; ++ /* logging */ ++ int n_rx_full; ++ int n_tx_empty; ++ int n_interrupts; ++}; ++ ++struct spi_slave_omap2_mcspi { ++ struct rtdm_spi_remote_slave slave; ++ void *io_virt; ++ dma_addr_t io_dma; ++ size_t io_len; ++}; ++ ++static inline struct spi_slave_omap2_mcspi * ++to_slave_omap2_mcspi(struct rtdm_spi_remote_slave *slave) ++{ ++ return container_of(slave, struct spi_slave_omap2_mcspi, slave); ++} ++ ++static inline struct spi_master_omap2_mcspi * ++to_master_omap2_mcspi(struct rtdm_spi_remote_slave *slave) ++{ ++ return container_of(slave->master, ++ struct spi_master_omap2_mcspi, master); ++} ++ ++static inline struct device * ++master_to_kdev(struct rtdm_spi_master *master) ++{ ++ return &master->kmaster->dev; ++} ++ ++static inline u32 mcspi_rd_reg(struct spi_master_omap2_mcspi *spim, ++ unsigned int reg) ++{ ++ return readl(spim->regs + reg); ++} ++ ++static inline void mcspi_wr_reg(struct spi_master_omap2_mcspi *spim, ++ unsigned int reg, u32 val) ++{ ++ writel(val, spim->regs + reg); ++} ++ ++static inline u32 ++mcspi_rd_cs_reg(struct spi_master_omap2_mcspi *spim, ++ int cs_id, unsigned int reg) ++{ ++ return readl(spim->cs[cs_id].regs + reg); ++} ++ ++static inline void ++mcspi_wr_cs_reg(struct spi_master_omap2_mcspi *spim, int cs_id, ++ unsigned int reg, u32 val) ++{ ++ writel(val, spim->cs[cs_id].regs + reg); ++} ++ ++static void omap2_mcspi_init_hw(struct spi_master_omap2_mcspi *spim) ++{ ++ u32 l; ++ ++ l = mcspi_rd_reg(spim, OMAP2_MCSPI_SYSCONFIG); ++ /* CLOCKACTIVITY = 3h: OCP and Functional clocks are maintained */ ++ l |= OMAP2_MCSPI_SYSCONFIG_CLOCKACTIVITY_MASK; ++ /* SIDLEMODE = 1h: ignore idle requests */ ++ l &= ~OMAP2_MCSPI_SYSCONFIG_SIDLEMODE_MASK; ++ l |= 0x1 << 3; ++ /* AUTOIDLE=0: OCP clock is free-running */ ++ l &= ~OMAP2_MCSPI_SYSCONFIG_AUTOIDLE; ++ mcspi_wr_reg(spim, OMAP2_MCSPI_SYSCONFIG, l); ++ ++ /* Initialise the hardware with the default polarities (only omap2) */ ++ mcspi_wr_reg(spim, OMAP2_MCSPI_WAKEUPENABLE, ++ OMAP2_MCSPI_WAKEUPENABLE_WKEN); ++ ++ /* Setup single-channel master mode */ ++ l = mcspi_rd_reg(spim, OMAP2_MCSPI_MODULCTRL); ++ /* MS=0 => spi master */ ++ l &= ~(OMAP2_MCSPI_MODULCTRL_STEST | OMAP2_MCSPI_MODULCTRL_MS); ++ l |= OMAP2_MCSPI_MODULCTRL_SINGLE; ++ mcspi_wr_reg(spim, OMAP2_MCSPI_MODULCTRL, l); ++} ++ ++static void omap2_mcspi_reset_hw(struct spi_master_omap2_mcspi *spim) ++{ ++ u32 l; ++ ++ l = mcspi_rd_reg(spim, OMAP2_MCSPI_SYSCONFIG); ++ l |= OMAP2_MCSPI_SYSCONFIG_SOFTRESET; ++ mcspi_wr_reg(spim, OMAP2_MCSPI_SYSCONFIG, l); ++ /* wait until reset is done */ ++ do { ++ l = mcspi_rd_reg(spim, OMAP2_MCSPI_SYSSTATUS); ++ cpu_relax(); ++ } while (!(l & OMAP2_MCSPI_SYSSTATUS_RESETDONE)); ++} ++ ++static void ++omap2_mcspi_chip_select(struct rtdm_spi_remote_slave *slave, bool active) ++{ ++ struct spi_master_omap2_mcspi *spim = to_master_omap2_mcspi(slave); ++ u32 l; ++ ++ /* FORCE: manual SPIEN assertion to keep SPIEN active */ ++ l = mcspi_rd_cs_reg(spim, slave->chip_select, OMAP2_MCSPI_CHCONF0); ++ /* "active" is the logical state, not the impedance level. */ ++ if (active) ++ l |= OMAP2_MCSPI_CHCONF_FORCE; ++ else ++ l &= ~OMAP2_MCSPI_CHCONF_FORCE; ++ mcspi_wr_cs_reg(spim, slave->chip_select, OMAP2_MCSPI_CHCONF0, l); ++ /* Flash post-writes */ ++ l = mcspi_rd_cs_reg(spim, slave->chip_select, OMAP2_MCSPI_CHCONF0); ++} ++ ++static u32 omap2_mcspi_calc_divisor(u32 speed_hz) ++{ ++ u32 div; ++ ++ for (div = 0; div < 15; div++) ++ if (speed_hz >= (OMAP2_MCSPI_MAX_FREQ >> div)) ++ return div; ++ ++ return 15; ++} ++ ++/* channel 0 enable/disable */ ++static void ++omap2_mcspi_channel_enable(struct rtdm_spi_remote_slave *slave, int enable) ++{ ++ struct spi_master_omap2_mcspi *spim = to_master_omap2_mcspi(slave); ++ u32 l; ++ ++ l = mcspi_rd_cs_reg(spim, slave->chip_select, OMAP2_MCSPI_CHCTRL0); ++ if (enable) ++ l |= OMAP2_MCSPI_CHCTRL_EN; ++ else ++ l &= ~OMAP2_MCSPI_CHCTRL_EN; ++ mcspi_wr_cs_reg(spim, slave->chip_select, OMAP2_MCSPI_CHCTRL0, l); ++ /* Flash post-writes */ ++ l = mcspi_rd_cs_reg(spim, slave->chip_select, OMAP2_MCSPI_CHCTRL0); ++} ++ ++/* called only when no transfer is active to this device */ ++static int omap2_mcspi_configure(struct rtdm_spi_remote_slave *slave) ++{ ++ struct spi_master_omap2_mcspi *spim = to_master_omap2_mcspi(slave); ++ struct rtdm_spi_config *config = &slave->config; ++ u32 l = 0, clkd = 0, div = 1, extclk = 0, clkg = 0, word_len; ++ u32 speed_hz = OMAP2_MCSPI_MAX_FREQ; ++ u32 chctrl0; ++ ++ /* The configuration parameters can be loaded in MCSPI_CH(i)CONF ++ * only when the channel is disabled ++ */ ++ omap2_mcspi_channel_enable(slave, 0); ++ ++ l = mcspi_rd_cs_reg(spim, slave->chip_select, OMAP2_MCSPI_CHCONF0); ++ ++ /* Set clock frequency. */ ++ speed_hz = (u32) config->speed_hz; ++ if (speed_hz > OMAP2_MCSPI_DRIVER_MAX_FREQ) { ++ dev_warn(slave_to_kdev(slave), ++ "maximum clock frequency is %d", ++ OMAP2_MCSPI_DRIVER_MAX_FREQ); ++ } ++ speed_hz = min_t(u32, speed_hz, OMAP2_MCSPI_DRIVER_MAX_FREQ); ++ if (speed_hz < (OMAP2_MCSPI_MAX_FREQ / OMAP2_MCSPI_MAX_DIVIDER)) { ++ clkd = omap2_mcspi_calc_divisor(speed_hz); ++ speed_hz = OMAP2_MCSPI_MAX_FREQ >> clkd; ++ clkg = 0; ++ } else { ++ div = (OMAP2_MCSPI_MAX_FREQ + speed_hz - 1) / speed_hz; ++ speed_hz = OMAP2_MCSPI_MAX_FREQ / div; ++ clkd = (div - 1) & 0xf; ++ extclk = (div - 1) >> 4; ++ clkg = OMAP2_MCSPI_CHCONF_CLKG; ++ } ++ /* set clock divisor */ ++ l &= ~OMAP2_MCSPI_CHCONF_CLKD_MASK; ++ l |= clkd << 2; ++ /* set clock granularity */ ++ l &= ~OMAP2_MCSPI_CHCONF_CLKG; ++ l |= clkg; ++ if (clkg) { ++ chctrl0 = mcspi_rd_cs_reg(spim, ++ slave->chip_select, OMAP2_MCSPI_CHCTRL0); ++ chctrl0 &= ~OMAP2_MCSPI_CHCTRL_EXTCLK_MASK; ++ chctrl0 |= extclk << 8; ++ mcspi_wr_cs_reg(spim, ++ slave->chip_select, OMAP2_MCSPI_CHCTRL0, chctrl0); ++ } ++ ++ if (spim->pin_dir == MCSPI_PINDIR_D0_IN_D1_OUT) { ++ l &= ~OMAP2_MCSPI_CHCONF_IS; ++ l &= ~OMAP2_MCSPI_CHCONF_DPE1; ++ l |= OMAP2_MCSPI_CHCONF_DPE0; ++ } else { ++ l |= OMAP2_MCSPI_CHCONF_IS; ++ l |= OMAP2_MCSPI_CHCONF_DPE1; ++ l &= ~OMAP2_MCSPI_CHCONF_DPE0; ++ } ++ ++ /* wordlength */ ++ word_len = config->bits_per_word; ++ /* TODO: allow word_len != 8 */ ++ if (word_len != 8) { ++ dev_err(slave_to_kdev(slave), "word_len(%d) != 8.\n", ++ word_len); ++ return -EIO; ++ } ++ l &= ~OMAP2_MCSPI_CHCONF_WL_MASK; ++ l |= (word_len - 1) << 7; ++ ++ /* set chipselect polarity; manage with FORCE */ ++ if (!(config->mode & SPI_CS_HIGH)) ++ /* CS active-low */ ++ l |= OMAP2_MCSPI_CHCONF_EPOL; ++ else ++ l &= ~OMAP2_MCSPI_CHCONF_EPOL; ++ ++ /* set SPI mode 0..3 */ ++ if (config->mode & SPI_CPOL) ++ l |= OMAP2_MCSPI_CHCONF_POL; ++ else ++ l &= ~OMAP2_MCSPI_CHCONF_POL; ++ if (config->mode & SPI_CPHA) ++ l |= OMAP2_MCSPI_CHCONF_PHA; ++ else ++ l &= ~OMAP2_MCSPI_CHCONF_PHA; ++ ++ mcspi_wr_cs_reg(spim, slave->chip_select, OMAP2_MCSPI_CHCONF0, l); ++ l = mcspi_rd_cs_reg(spim, slave->chip_select, OMAP2_MCSPI_CHCONF0); ++ ++ omap2_mcspi_chip_select(slave, 0); ++ ++ return 0; ++} ++ ++static void mcspi_rd_fifo(struct spi_master_omap2_mcspi *spim, int cs_id) ++{ ++ u8 byte; ++ int i; ++ ++ /* Receiver register must be read to remove source of interrupt */ ++ for (i = 0; i < spim->fifo_depth; i++) { ++ byte = mcspi_rd_cs_reg(spim, cs_id, OMAP2_MCSPI_RX0); ++ if (spim->rx_buf && (spim->rx_len > 0)) ++ *spim->rx_buf++ = byte; ++ spim->rx_len--; ++ } ++} ++ ++static void mcspi_wr_fifo(struct spi_master_omap2_mcspi *spim, int cs_id) ++{ ++ u8 byte; ++ int i; ++ ++ /* load transmitter register to remove the source of the interrupt */ ++ for (i = 0; i < spim->fifo_depth; i++) { ++ if (spim->tx_len <= 0) ++ byte = 0; ++ else ++ byte = spim->tx_buf ? *spim->tx_buf++ : 0; ++ mcspi_wr_cs_reg(spim, cs_id, OMAP2_MCSPI_TX0, byte); ++ spim->tx_len--; ++ } ++} ++ ++static int omap2_mcspi_interrupt(rtdm_irq_t *irqh) ++{ ++ struct spi_master_omap2_mcspi *spim; ++ u32 l; ++ int i, cs_id = 0; ++ ++ spim = rtdm_irq_get_arg(irqh, struct spi_master_omap2_mcspi); ++ for (i = 0; i < OMAP2_MCSPI_CS_N; i++) ++ if (spim->cs[i].chosen) { ++ cs_id = i; ++ break; ++ } ++ ++ spim->n_interrupts++; ++ l = mcspi_rd_reg(spim, OMAP2_MCSPI_IRQSTATUS); ++ ++ if ((l & OMAP2_MCSPI_IRQSTATUS_RX0_FULL) || ++ (l & OMAP2_MCSPI_IRQSTATUS_RX1_FULL)) { ++ mcspi_rd_fifo(spim, cs_id); ++ spim->n_rx_full++; ++ } ++ if ((l & OMAP2_MCSPI_IRQSTATUS_TX0_EMPTY) || ++ (l & OMAP2_MCSPI_IRQSTATUS_TX1_EMPTY)) { ++ if (spim->tx_len > 0) ++ mcspi_wr_fifo(spim, cs_id); ++ spim->n_tx_empty++; ++ } ++ ++ /* write 1 to OMAP2_MCSPI_IRQSTATUS field to reset it */ ++ mcspi_wr_reg(spim, OMAP2_MCSPI_IRQSTATUS, l); ++ ++ if ((spim->tx_len <= 0) && (spim->rx_len <= 0)) { ++ /* disable interrupts */ ++ mcspi_wr_reg(spim, OMAP2_MCSPI_IRQENABLE, 0); ++ ++ rtdm_event_signal(&spim->transfer_done); ++ } ++ ++ return RTDM_IRQ_HANDLED; ++} ++ ++static int omap2_mcspi_disable_fifo(struct rtdm_spi_remote_slave *slave, ++ int cs_id) ++{ ++ struct spi_master_omap2_mcspi *spim = to_master_omap2_mcspi(slave); ++ u32 chconf; ++ ++ chconf = mcspi_rd_cs_reg(spim, cs_id, OMAP2_MCSPI_CHCONF0); ++ chconf &= ~(OMAP2_MCSPI_CHCONF_FFER | OMAP2_MCSPI_CHCONF_FFET); ++ mcspi_wr_cs_reg(spim, cs_id, OMAP2_MCSPI_CHCONF0, chconf); ++ return 0; ++} ++ ++static int omap2_mcspi_set_fifo(struct rtdm_spi_remote_slave *slave) ++{ ++ struct spi_master_omap2_mcspi *spim = to_master_omap2_mcspi(slave); ++ unsigned int wcnt; ++ int max_fifo_depth, fifo_depth, bytes_per_word; ++ u32 chconf, xferlevel; ++ ++ chconf = mcspi_rd_cs_reg(spim, slave->chip_select, OMAP2_MCSPI_CHCONF0); ++ bytes_per_word = 1; ++ ++ max_fifo_depth = OMAP2_MCSPI_MAX_FIFODEPTH / 2; ++ if (spim->tx_len < max_fifo_depth) { ++ fifo_depth = spim->tx_len; ++ wcnt = spim->tx_len / bytes_per_word; ++ } else { ++ fifo_depth = max_fifo_depth; ++ wcnt = max_fifo_depth * (spim->tx_len / max_fifo_depth) ++ / bytes_per_word; ++ } ++ if (wcnt > OMAP2_MCSPI_MAX_FIFOWCNT) { ++ dev_err(slave_to_kdev(slave), ++ "%s: wcnt=%d: too many bytes in a transfer.\n", ++ __func__, wcnt); ++ return -EINVAL; ++ } ++ ++ chconf |= OMAP2_MCSPI_CHCONF_FFER; ++ chconf |= OMAP2_MCSPI_CHCONF_FFET; ++ ++ mcspi_wr_cs_reg(spim, slave->chip_select, OMAP2_MCSPI_CHCONF0, chconf); ++ spim->fifo_depth = fifo_depth; ++ ++ xferlevel = wcnt << 16; ++ xferlevel |= (fifo_depth - 1) << 8; ++ xferlevel |= fifo_depth - 1; ++ mcspi_wr_reg(spim, OMAP2_MCSPI_XFERLEVEL, xferlevel); ++ ++ return 0; ++} ++ ++ ++static int do_transfer_irq_bh(struct rtdm_spi_remote_slave *slave) ++{ ++ struct spi_master_omap2_mcspi *spim = to_master_omap2_mcspi(slave); ++ u32 chconf, l; ++ int ret; ++ int i; ++ ++ /* configure to send and receive */ ++ chconf = mcspi_rd_cs_reg(spim, slave->chip_select, OMAP2_MCSPI_CHCONF0); ++ chconf &= ~OMAP2_MCSPI_CHCONF_TRM_MASK; ++ chconf &= ~OMAP2_MCSPI_CHCONF_TURBO; ++ mcspi_wr_cs_reg(spim, slave->chip_select, OMAP2_MCSPI_CHCONF0, chconf); ++ ++ /* fifo can be enabled on a single channel */ ++ if (slave->chip_select == 0) { ++ if (spim->cs[1].chosen) ++ omap2_mcspi_disable_fifo(slave, 1); ++ } else { ++ if (spim->cs[0].chosen) ++ omap2_mcspi_disable_fifo(slave, 0); ++ } ++ ret = omap2_mcspi_set_fifo(slave); ++ if (ret) ++ return ret; ++ ++ omap2_mcspi_channel_enable(slave, 1); ++ ++ /* Set slave->chip_select as chosen */ ++ for (i = 0; i < OMAP2_MCSPI_CS_N; i++) ++ if (i == slave->chip_select) ++ spim->cs[i].chosen = 1; ++ else ++ spim->cs[i].chosen = 0; ++ ++ /* The interrupt status bit should always be reset ++ * after the channel is enabled ++ * and before the event is enabled as an interrupt source. ++ */ ++ /* write 1 to OMAP2_MCSPI_IRQSTATUS field to reset it */ ++ l = mcspi_rd_reg(spim, OMAP2_MCSPI_IRQSTATUS); ++ mcspi_wr_reg(spim, OMAP2_MCSPI_IRQSTATUS, l); ++ ++ spim->n_interrupts = 0; ++ spim->n_rx_full = 0; ++ spim->n_tx_empty = 0; ++ ++ /* Enable interrupts last. */ ++ /* support only two channels */ ++ if (slave->chip_select == 0) ++ l = OMAP2_MCSPI_IRQENABLE_TX0_EMPTY | ++ OMAP2_MCSPI_IRQENABLE_RX0_FULL; ++ else ++ l = OMAP2_MCSPI_IRQENABLE_TX1_EMPTY | ++ OMAP2_MCSPI_IRQENABLE_RX1_FULL; ++ mcspi_wr_reg(spim, OMAP2_MCSPI_IRQENABLE, l); ++ ++ /* TX_EMPTY will be raised only after data is transfered */ ++ mcspi_wr_fifo(spim, slave->chip_select); ++ ++ /* wait for transfer completion */ ++ ret = rtdm_event_wait(&spim->transfer_done); ++ omap2_mcspi_channel_enable(slave, 0); ++ if (ret) ++ return ret; ++ ++ /* spim->tx_len and spim->rx_len should be 0 */ ++ if (spim->tx_len || spim->rx_len) ++ return -EIO; ++ return 0; ++} ++ ++static int do_transfer_irq(struct rtdm_spi_remote_slave *slave) ++{ ++ struct spi_master_omap2_mcspi *spim = to_master_omap2_mcspi(slave); ++ int len, first_size, last_size, ret; ++ ++ len = spim->tx_len; ++ ++ if (len < (OMAP2_MCSPI_MAX_FIFODEPTH / 2)) ++ goto label_last; ++ ++ first_size = (OMAP2_MCSPI_MAX_FIFODEPTH / 2) * ++ (len / (OMAP2_MCSPI_MAX_FIFODEPTH / 2)); ++ spim->tx_len = first_size; ++ spim->rx_len = first_size; ++ ret = do_transfer_irq_bh(slave); ++ if (ret) ++ return ret; ++ ++label_last: ++ last_size = len % (OMAP2_MCSPI_MAX_FIFODEPTH / 2); ++ if (last_size == 0) ++ return ret; ++ spim->tx_len = last_size; ++ spim->rx_len = last_size; ++ ret = do_transfer_irq_bh(slave); ++ return ret; ++} ++ ++static int omap2_mcspi_transfer_iobufs(struct rtdm_spi_remote_slave *slave) ++{ ++ struct spi_master_omap2_mcspi *spim = to_master_omap2_mcspi(slave); ++ struct spi_slave_omap2_mcspi *mapped_data = to_slave_omap2_mcspi(slave); ++ int ret; ++ ++ if (mapped_data->io_len == 0) ++ return -EINVAL; /* No I/O buffers set. */ ++ ++ spim->tx_len = mapped_data->io_len / 2; ++ spim->rx_len = spim->tx_len; ++ spim->tx_buf = mapped_data->io_virt + spim->rx_len; ++ spim->rx_buf = mapped_data->io_virt; ++ ++ ret = do_transfer_irq(slave); ++ ++ return ret ? : 0; ++} ++ ++static int omap2_mcspi_transfer_iobufs_n(struct rtdm_spi_remote_slave *slave, ++ int len) ++{ ++ struct spi_master_omap2_mcspi *spim = to_master_omap2_mcspi(slave); ++ struct spi_slave_omap2_mcspi *mapped_data = to_slave_omap2_mcspi(slave); ++ int ret; ++ ++ if ((mapped_data->io_len == 0) || ++ (len <= 0) || (len > (mapped_data->io_len / 2))) ++ return -EINVAL; ++ ++ spim->tx_len = len; ++ spim->rx_len = len; ++ spim->tx_buf = mapped_data->io_virt + mapped_data->io_len / 2; ++ spim->rx_buf = mapped_data->io_virt; ++ ++ ret = do_transfer_irq(slave); ++ ++ ++ return ret ? : 0; ++} ++ ++static ssize_t omap2_mcspi_read(struct rtdm_spi_remote_slave *slave, ++ void *rx, size_t len) ++{ ++ struct spi_master_omap2_mcspi *spim = to_master_omap2_mcspi(slave); ++ int ret; ++ ++ spim->tx_len = len; ++ spim->rx_len = len; ++ spim->tx_buf = NULL; ++ spim->rx_buf = rx; ++ ++ ret = do_transfer_irq(slave); ++ ++ return ret ? : len; ++} ++ ++static ssize_t omap2_mcspi_write(struct rtdm_spi_remote_slave *slave, ++ const void *tx, size_t len) ++{ ++ struct spi_master_omap2_mcspi *spim = to_master_omap2_mcspi(slave); ++ int ret; ++ ++ spim->tx_len = len; ++ spim->rx_len = len; ++ spim->tx_buf = tx; ++ spim->rx_buf = NULL; ++ ++ ret = do_transfer_irq(slave); ++ ++ return ret ? : len; ++} ++ ++static int set_iobufs(struct spi_slave_omap2_mcspi *mapped_data, size_t len) ++{ ++ dma_addr_t dma; ++ void *p; ++ ++ if (len == 0) ++ return -EINVAL; ++ ++ len = L1_CACHE_ALIGN(len) * 2; ++ if (len == mapped_data->io_len) ++ return 0; ++ ++ if (mapped_data->io_len) ++ return -EINVAL; /* I/O buffers may not be resized. */ ++ ++ /* ++ * Since we need the I/O buffers to be set for starting a ++ * transfer, there is no need for serializing this routine and ++ * transfer_iobufs(), provided io_len is set last. ++ * ++ * NOTE: We don't need coherent memory until we actually get ++ * DMA transfers working, this code is a bit ahead of ++ * schedule. ++ * ++ * Revisit: this assumes DMA mask is 4Gb. ++ */ ++ p = dma_alloc_coherent(NULL, len, &dma, GFP_KERNEL); ++ if (p == NULL) ++ return -ENOMEM; ++ ++ mapped_data->io_dma = dma; ++ mapped_data->io_virt = p; ++ /* ++ * May race with transfer_iobufs(), must be assigned after all ++ * the rest is set up, enforcing a membar. ++ */ ++ smp_mb(); ++ mapped_data->io_len = len; ++ ++ return 0; ++} ++ ++static int omap2_mcspi_set_iobufs(struct rtdm_spi_remote_slave *slave, ++ struct rtdm_spi_iobufs *p) ++{ ++ struct spi_slave_omap2_mcspi *mapped_data = to_slave_omap2_mcspi(slave); ++ int ret; ++ ++ ret = set_iobufs(mapped_data, p->io_len); ++ if (ret) ++ return ret; ++ ++ p->i_offset = 0; ++ p->o_offset = mapped_data->io_len / 2; ++ p->map_len = mapped_data->io_len; ++ ++ return 0; ++} ++ ++static int omap2_mcspi_mmap_iobufs(struct rtdm_spi_remote_slave *slave, ++ struct vm_area_struct *vma) ++{ ++ struct spi_slave_omap2_mcspi *mapped_data = to_slave_omap2_mcspi(slave); ++ ++ /* ++ * dma_alloc_coherent() delivers non-cached memory, make sure ++ * to return consistent mapping attributes. Typically, mixing ++ * memory attributes across address spaces referring to the ++ * same physical area is architecturally wrong on ARM. ++ */ ++ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); ++ ++ ++ return rtdm_mmap_kmem(vma, mapped_data->io_virt); ++} ++ ++static void omap2_mcspi_mmap_release(struct rtdm_spi_remote_slave *slave) ++{ ++ struct spi_slave_omap2_mcspi *mapped_data = to_slave_omap2_mcspi(slave); ++ ++ dma_free_coherent(NULL, mapped_data->io_len, ++ mapped_data->io_virt, mapped_data->io_dma); ++ mapped_data->io_len = 0; ++} ++ ++static struct rtdm_spi_remote_slave * ++omap2_mcspi_attach_slave(struct rtdm_spi_master *master, struct spi_device *spi) ++{ ++ struct spi_master_omap2_mcspi *spim; ++ struct spi_slave_omap2_mcspi *mapped_data; ++ int ret; ++ ++ if ((spi->chip_select >= OMAP2_MCSPI_CS_N) || (OMAP2_MCSPI_CS_N > 2)) { ++ /* Error in the case of native CS requested with CS > 1 */ ++ dev_err(&spi->dev, "%s: only two native CS per spi module are supported\n", ++ __func__); ++ return ERR_PTR(-EINVAL); ++ } ++ ++ mapped_data = kzalloc(sizeof(*mapped_data), GFP_KERNEL); ++ if (mapped_data == NULL) ++ return ERR_PTR(-ENOMEM); ++ ++ ret = rtdm_spi_add_remote_slave(&mapped_data->slave, master, spi); ++ if (ret) { ++ dev_err(&spi->dev, "%s: failed to attach slave\n", __func__); ++ kfree(mapped_data); ++ return ERR_PTR(ret); ++ } ++ ++ spim = container_of(master, struct spi_master_omap2_mcspi, master); ++ spim->cs[spi->chip_select].chosen = 0; ++ spim->cs[spi->chip_select].regs = spim->regs + ++ spi->chip_select * OMAP2_MCSPI_CHANNELBANK_SIZE; ++ spim->cs[spi->chip_select].phys = spim->phys + ++ spi->chip_select * OMAP2_MCSPI_CHANNELBANK_SIZE; ++ ++ return &mapped_data->slave; ++} ++ ++static void omap2_mcspi_detach_slave(struct rtdm_spi_remote_slave *slave) ++{ ++ struct spi_slave_omap2_mcspi *mapped_data = to_slave_omap2_mcspi(slave); ++ ++ rtdm_spi_remove_remote_slave(slave); ++ ++ kfree(mapped_data); ++} ++ ++static struct rtdm_spi_master_ops omap2_mcspi_master_ops = { ++ .configure = omap2_mcspi_configure, ++ .chip_select = omap2_mcspi_chip_select, ++ .set_iobufs = omap2_mcspi_set_iobufs, ++ .mmap_iobufs = omap2_mcspi_mmap_iobufs, ++ .mmap_release = omap2_mcspi_mmap_release, ++ .transfer_iobufs = omap2_mcspi_transfer_iobufs, ++ .transfer_iobufs_n = omap2_mcspi_transfer_iobufs_n, ++ .write = omap2_mcspi_write, ++ .read = omap2_mcspi_read, ++ .attach_slave = omap2_mcspi_attach_slave, ++ .detach_slave = omap2_mcspi_detach_slave, ++}; ++ ++static struct omap2_mcspi_platform_config omap2_pdata = { ++ .regs_offset = 0, ++}; ++ ++static struct omap2_mcspi_platform_config omap4_pdata = { ++ .regs_offset = OMAP4_MCSPI_REG_OFFSET, ++}; ++ ++static const struct of_device_id omap_mcspi_of_match[] = { ++ { ++ .compatible = "ti,omap2-mcspi", ++ .data = &omap2_pdata, ++ }, ++ { ++ /* beaglebone black */ ++ .compatible = "ti,omap4-mcspi", ++ .data = &omap4_pdata, ++ }, ++ { /* Sentinel */ }, ++}; ++MODULE_DEVICE_TABLE(of, omap_mcspi_of_match); ++ ++static int omap2_mcspi_probe(struct platform_device *pdev) ++{ ++ struct spi_master_omap2_mcspi *spim; ++ struct rtdm_spi_master *master; ++ struct spi_master *kmaster; ++ struct resource *r; ++ int ret, irq; ++ u32 regs_offset = 0; ++ const struct omap2_mcspi_platform_config *pdata; ++ const struct of_device_id *match; ++ u32 num_cs = 1; ++ unsigned int pin_dir = MCSPI_PINDIR_D0_IN_D1_OUT; ++ ++ match = of_match_device(omap_mcspi_of_match, &pdev->dev); ++ if (match) { ++ pdata = match->data; ++ regs_offset = pdata->regs_offset; ++ } else { ++ dev_err(&pdev->dev, "%s: cannot find a match with device tree\n" ++ "of '%s' or '%s'", ++ __func__, ++ omap_mcspi_of_match[0].compatible, ++ omap_mcspi_of_match[1].compatible); ++ return -ENOENT; ++ } ++ ++ master = rtdm_spi_alloc_master(&pdev->dev, ++ struct spi_master_omap2_mcspi, master); ++ if (master == NULL) ++ return -ENOMEM; ++ ++ master->subclass = RTDM_SUBCLASS_OMAP2_MCSPI; ++ master->ops = &omap2_mcspi_master_ops; ++ platform_set_drvdata(pdev, master); ++ ++ kmaster = master->kmaster; ++ /* flags understood by this controller driver */ ++ kmaster->mode_bits = OMAP2_MCSPI_SPI_MODE_BITS; ++ /* TODO: SPI_BPW_RANGE_MASK(4, 32); */ ++ kmaster->bits_per_word_mask = SPI_BPW_MASK(8); ++ of_property_read_u32(pdev->dev.of_node, "ti,spi-num-cs", &num_cs); ++ kmaster->num_chipselect = num_cs; ++ if (of_get_property(pdev->dev.of_node, ++ "ti,pindir-d0-out-d1-in", NULL)) { ++ pin_dir = MCSPI_PINDIR_D0_OUT_D1_IN; ++ } ++ ++ kmaster->max_speed_hz = OMAP2_MCSPI_MAX_FREQ; ++ kmaster->min_speed_hz = OMAP2_MCSPI_MAX_FREQ >> 15; ++ kmaster->dev.of_node = pdev->dev.of_node; ++ ++ spim = container_of(master, struct spi_master_omap2_mcspi, master); ++ rtdm_event_init(&spim->transfer_done, 0); ++ ++ spim->pin_dir = pin_dir; ++ r = platform_get_resource(pdev, IORESOURCE_MEM, 0); ++ spim->regs = devm_ioremap_resource(&pdev->dev, r); ++ if (IS_ERR(spim->regs)) { ++ dev_err(&pdev->dev, "%s: cannot map I/O memory\n", __func__); ++ ret = PTR_ERR(spim->regs); ++ goto fail; ++ } ++ spim->phys = r->start + regs_offset; ++ spim->regs += regs_offset; ++ ++ irq = irq_of_parse_and_map(pdev->dev.of_node, 0); ++ if (irq <= 0) { ++ ret = irq ?: -ENODEV; ++ dev_err(&pdev->dev, "%s: irq_of_parse_and_map: %d\n", ++ __func__, irq); ++ goto fail; ++ } ++ ++ ret = rtdm_irq_request(&spim->irqh, irq, ++ omap2_mcspi_interrupt, 0, ++ dev_name(&pdev->dev), spim); ++ if (ret) { ++ dev_err(&pdev->dev, "%s: cannot request IRQ%d\n", ++ __func__, irq); ++ goto fail_unclk; ++ } ++ ++ ret = rtdm_spi_add_master(&spim->master); ++ if (ret) { ++ dev_err(&pdev->dev, "%s: failed to add master\n", __func__); ++ goto fail_unclk; ++ } ++ ++ pm_runtime_use_autosuspend(&pdev->dev); ++ /* if delay is negative and the use_autosuspend flag is set ++ * then runtime suspends are prevented. ++ */ ++ pm_runtime_set_autosuspend_delay(&pdev->dev, PM_NEGATIVE_DELAY); ++ pm_runtime_enable(&pdev->dev); ++ ret = pm_runtime_get_sync(&pdev->dev); ++ if (ret < 0) { ++ dev_err(&pdev->dev, "%s: pm_runtime_get_sync error %d\n", ++ __func__, ret); ++ return ret; ++ } ++ ++ omap2_mcspi_reset_hw(spim); ++ omap2_mcspi_init_hw(spim); ++ ++ dev_info(&pdev->dev, "success\n"); ++ return 0; ++ ++fail_unclk: ++fail: ++ spi_master_put(kmaster); ++ ++ return ret; ++} ++ ++static int omap2_mcspi_remove(struct platform_device *pdev) ++{ ++ struct rtdm_spi_master *master = platform_get_drvdata(pdev); ++ struct spi_master_omap2_mcspi *spim; ++ ++ spim = container_of(master, struct spi_master_omap2_mcspi, master); ++ ++ omap2_mcspi_reset_hw(spim); ++ ++ pm_runtime_dont_use_autosuspend(&pdev->dev); ++ pm_runtime_put_sync(&pdev->dev); ++ pm_runtime_disable(&pdev->dev); ++ ++ rtdm_irq_free(&spim->irqh); ++ ++ rtdm_spi_remove_master(master); ++ ++ return 0; ++} ++ ++static struct platform_driver omap2_mcspi_spi_driver = { ++ .driver = { ++ .name = "omap2_mcspi_rt", ++ .of_match_table = omap_mcspi_of_match, ++ }, ++ .probe = omap2_mcspi_probe, ++ .remove = omap2_mcspi_remove, ++}; ++module_platform_driver(omap2_mcspi_spi_driver); ++ ++MODULE_LICENSE("GPL"); +--- linux/drivers/xenomai/spi/Makefile 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/spi/Makefile 2021-04-07 16:01:28.003633057 +0800 +@@ -0,0 +1,14 @@ ++ ++ccflags-$(CONFIG_XENO_DRIVERS_SPI_DEBUG) := -DDEBUG ++ ++obj-$(CONFIG_XENO_DRIVERS_SPI) += xeno_spi.o ++ ++xeno_spi-y := spi-master.o spi-device.o ++ ++obj-$(CONFIG_XENO_DRIVERS_SPI_BCM2835) += xeno_spi_bcm2835.o ++obj-$(CONFIG_XENO_DRIVERS_SPI_SUN6I) += xeno_spi_sun6i.o ++obj-$(CONFIG_XENO_DRIVERS_SPI_OMAP2_MCSPI_RT) += xeno_spi_omap2_mcspi_rt.o ++ ++xeno_spi_bcm2835-y := spi-bcm2835.o ++xeno_spi_sun6i-y := spi-sun6i.o ++xeno_spi_omap2_mcspi_rt-y := spi-omap2-mcspi-rt.o +--- linux/drivers/xenomai/spi/spi-device.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/spi/spi-device.c 2021-04-07 16:01:27.999633063 +0800 +@@ -0,0 +1,181 @@ ++/** ++ * @note Copyright (C) 2016 Philippe Gerum ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "spi-master.h" ++ ++int rtdm_spi_add_remote_slave(struct rtdm_spi_remote_slave *slave, ++ struct rtdm_spi_master *master, ++ struct spi_device *spi) ++{ ++ struct spi_master *kmaster = master->kmaster; ++ struct rtdm_device *dev; ++ rtdm_lockctx_t c; ++ int ret; ++ ++ memset(slave, 0, sizeof(*slave)); ++ slave->chip_select = spi->chip_select; ++ slave->config.bits_per_word = spi->bits_per_word; ++ slave->config.speed_hz = spi->max_speed_hz; ++ slave->config.mode = spi->mode; ++ slave->master = master; ++ ++ dev = &slave->dev; ++ dev->driver = &master->driver; ++ dev->label = kasprintf(GFP_KERNEL, "%s/slave%d.%%d", ++ dev_name(&kmaster->dev), ++ kmaster->bus_num); ++ if (dev->label == NULL) ++ return -ENOMEM; ++ ++ if (gpio_is_valid(spi->cs_gpio)) ++ slave->cs_gpio = spi->cs_gpio; ++ else { ++ slave->cs_gpio = -ENOENT; ++ if (kmaster->cs_gpios) ++ slave->cs_gpio = kmaster->cs_gpios[spi->chip_select]; ++ } ++ ++ if (gpio_is_valid(slave->cs_gpio)) { ++ ret = gpio_request(slave->cs_gpio, dev->label); ++ if (ret) ++ goto fail; ++ slave->cs_gpiod = gpio_to_desc(slave->cs_gpio); ++ if (slave->cs_gpiod == NULL) ++ goto fail; ++ } ++ ++ mutex_init(&slave->ctl_lock); ++ ++ dev->device_data = master; ++ ret = rtdm_dev_register(dev); ++ if (ret) ++ goto fail; ++ ++ rtdm_lock_get_irqsave(&master->lock, c); ++ list_add_tail(&slave->next, &master->slaves); ++ rtdm_lock_put_irqrestore(&master->lock, c); ++ ++ return 0; ++fail: ++ kfree(dev->label); ++ ++ return ret; ++} ++EXPORT_SYMBOL_GPL(rtdm_spi_add_remote_slave); ++ ++void rtdm_spi_remove_remote_slave(struct rtdm_spi_remote_slave *slave) ++{ ++ struct rtdm_spi_master *master = slave->master; ++ struct rtdm_device *dev; ++ rtdm_lockctx_t c; ++ ++ if (gpio_is_valid(slave->cs_gpio)) ++ gpio_free(slave->cs_gpio); ++ ++ mutex_destroy(&slave->ctl_lock); ++ rtdm_lock_get_irqsave(&master->lock, c); ++ list_del(&slave->next); ++ rtdm_lock_put_irqrestore(&master->lock, c); ++ dev = &slave->dev; ++ rtdm_dev_unregister(dev); ++ kfree(dev->label); ++} ++EXPORT_SYMBOL_GPL(rtdm_spi_remove_remote_slave); ++ ++static int spi_device_probe(struct spi_device *spi) ++{ ++ struct rtdm_spi_remote_slave *slave; ++ struct rtdm_spi_master *master; ++ int ret; ++ ++ /* ++ * Chicken and egg issue: we want the RTDM device class name ++ * to duplicate the SPI master name, but that information is ++ * only available after spi_register_master() has returned. We ++ * solve this by initializing the RTDM driver descriptor on ++ * the fly when the first SPI device on the bus is advertised ++ * on behalf of spi_register_master(). ++ * ++ * NOTE: the driver core guarantees serialization. ++ */ ++ master = spi_master_get_devdata(spi->master); ++ if (master->devclass == NULL) { ++ ret = __rtdm_spi_setup_driver(master); ++ if (ret) ++ return ret; ++ } ++ ++ slave = master->ops->attach_slave(master, spi); ++ if (IS_ERR(slave)) ++ return PTR_ERR(slave); ++ ++ spi_set_drvdata(spi, slave); ++ ++ return 0; ++} ++ ++static int spi_device_remove(struct spi_device *spi) ++{ ++ struct rtdm_spi_remote_slave *slave = spi_get_drvdata(spi); ++ ++ slave->master->ops->detach_slave(slave); ++ ++ return 0; ++} ++ ++static const struct of_device_id spi_device_match[] = { ++ { ++ .compatible = "rtdm-spidev", ++ }, ++ { /* Sentinel */ }, ++}; ++MODULE_DEVICE_TABLE(of, spi_device_match); ++ ++static struct spi_driver spi_device_driver = { ++ .driver = { ++ .name = "rtdm_spi_device", ++ .owner = THIS_MODULE, ++ .of_match_table = spi_device_match, ++ }, ++ .probe = spi_device_probe, ++ .remove = spi_device_remove, ++}; ++ ++static int __init spi_device_init(void) ++{ ++ int ret; ++ ++ ret = spi_register_driver(&spi_device_driver); ++ ++ return ret; ++} ++module_init(spi_device_init); ++ ++static void __exit spi_device_exit(void) ++{ ++ spi_unregister_driver(&spi_device_driver); ++ ++} ++module_exit(spi_device_exit); +--- linux/drivers/xenomai/spi/spi-sun6i.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/spi/spi-sun6i.c 2021-04-07 16:01:27.994633070 +0800 +@@ -0,0 +1,674 @@ ++/** ++ * I/O handling lifted from drivers/spi/spi-sun6i.c: ++ * Copyright (C) 2012 - 2014 Allwinner Tech ++ * Pan Nan ++ * Copyright (C) 2014 Maxime Ripard ++ * Maxime Ripard ++ * ++ * RTDM integration by: ++ * Copyright (C) 2017 Philippe Gerum ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "spi-master.h" ++ ++#define RTDM_SUBCLASS_SUN6I 2 ++ ++#define SUN6I_GBL_CTL_REG 0x04 ++#define SUN6I_GBL_CTL_BUS_ENABLE BIT(0) ++#define SUN6I_GBL_CTL_MASTER BIT(1) ++#define SUN6I_GBL_CTL_TP BIT(7) ++#define SUN6I_GBL_CTL_RST BIT(31) ++ ++#define SUN6I_TFR_CTL_REG 0x08 ++#define SUN6I_TFR_CTL_CPHA BIT(0) ++#define SUN6I_TFR_CTL_CPOL BIT(1) ++#define SUN6I_TFR_CTL_SPOL BIT(2) ++#define SUN6I_TFR_CTL_CS_MASK 0x30 ++#define SUN6I_TFR_CTL_CS(cs) (((cs) << 4) & SUN6I_TFR_CTL_CS_MASK) ++#define SUN6I_TFR_CTL_CS_MANUAL BIT(6) ++#define SUN6I_TFR_CTL_CS_LEVEL BIT(7) ++#define SUN6I_TFR_CTL_DHB BIT(8) ++#define SUN6I_TFR_CTL_FBS BIT(12) ++#define SUN6I_TFR_CTL_XCH BIT(31) ++ ++#define SUN6I_INT_CTL_REG 0x10 ++#define SUN6I_INT_CTL_RX_RDY BIT(0) ++#define SUN6I_INT_CTL_TX_RDY BIT(4) ++#define SUN6I_INT_CTL_RX_OVF BIT(8) ++#define SUN6I_INT_CTL_TC BIT(12) ++ ++#define SUN6I_INT_STA_REG 0x14 ++ ++#define SUN6I_FIFO_CTL_REG 0x18 ++#define SUN6I_FIFO_CTL_RX_RDY_TRIG_LEVEL_MASK 0xff ++#define SUN6I_FIFO_CTL_RX_RDY_TRIG_LEVEL_BITS 0 ++#define SUN6I_FIFO_CTL_RX_RST BIT(15) ++#define SUN6I_FIFO_CTL_TX_RDY_TRIG_LEVEL_MASK 0xff ++#define SUN6I_FIFO_CTL_TX_RDY_TRIG_LEVEL_BITS 16 ++#define SUN6I_FIFO_CTL_TX_RST BIT(31) ++ ++#define SUN6I_FIFO_STA_REG 0x1c ++#define SUN6I_FIFO_STA_RX_CNT(reg) (((reg) >> 0) & 0xff) ++#define SUN6I_FIFO_STA_TX_CNT(reg) (((reg) >> 16) & 0xff) ++ ++#define SUN6I_CLK_CTL_REG 0x24 ++#define SUN6I_CLK_CTL_CDR2_MASK 0xff ++#define SUN6I_CLK_CTL_CDR2(div) (((div) & SUN6I_CLK_CTL_CDR2_MASK) << 0) ++#define SUN6I_CLK_CTL_CDR1_MASK 0xf ++#define SUN6I_CLK_CTL_CDR1(div) (((div) & SUN6I_CLK_CTL_CDR1_MASK) << 8) ++#define SUN6I_CLK_CTL_DRS BIT(12) ++ ++#define SUN6I_MAX_XFER_SIZE 0xffffff ++ ++#define SUN6I_BURST_CNT_REG 0x30 ++#define SUN6I_BURST_CNT(cnt) ((cnt) & SUN6I_MAX_XFER_SIZE) ++ ++#define SUN6I_XMIT_CNT_REG 0x34 ++#define SUN6I_XMIT_CNT(cnt) ((cnt) & SUN6I_MAX_XFER_SIZE) ++ ++#define SUN6I_BURST_CTL_CNT_REG 0x38 ++#define SUN6I_BURST_CTL_CNT_STC(cnt) ((cnt) & SUN6I_MAX_XFER_SIZE) ++ ++#define SUN6I_TXDATA_REG 0x200 ++#define SUN6I_RXDATA_REG 0x300 ++ ++#define SUN6I_SPI_MODE_BITS (SPI_CPOL | SPI_CPHA | SPI_CS_HIGH \ ++ | SPI_LSB_FIRST) ++ ++ struct spi_setup_data { ++ int fifo_depth; ++ }; ++ ++static struct spi_setup_data sun6i_data = { ++ .fifo_depth = 128, ++}; ++ ++static struct spi_setup_data sun8i_data = { ++ .fifo_depth = 64, ++}; ++ ++struct spi_master_sun6i { ++ struct rtdm_spi_master master; ++ void __iomem *regs; ++ struct reset_control *rstc; ++ struct clk *hclk; ++ struct clk *mclk; ++ unsigned long clk_hz; ++ rtdm_irq_t irqh; ++ const u8 *tx_buf; ++ u8 *rx_buf; ++ int tx_len; ++ int rx_len; ++ rtdm_event_t transfer_done; ++ const struct spi_setup_data *setup; ++}; ++ ++struct spi_slave_sun6i { ++ struct rtdm_spi_remote_slave slave; ++ void *io_virt; ++ dma_addr_t io_dma; ++ size_t io_len; ++}; ++ ++static inline struct spi_slave_sun6i * ++to_slave_sun6i(struct rtdm_spi_remote_slave *slave) ++{ ++ return container_of(slave, struct spi_slave_sun6i, slave); ++} ++ ++static inline struct spi_master_sun6i * ++to_master_sun6i(struct rtdm_spi_remote_slave *slave) ++{ ++ return container_of(slave->master, struct spi_master_sun6i, master); ++} ++ ++static inline struct device * ++master_to_kdev(struct rtdm_spi_master *master) ++{ ++ return &master->kmaster->dev; ++} ++ ++static inline u32 sun6i_rd(struct spi_master_sun6i *spim, ++ unsigned int reg) ++{ ++ return readl(spim->regs + reg); ++} ++ ++static inline void sun6i_wr(struct spi_master_sun6i *spim, ++ unsigned int reg, u32 val) ++{ ++ writel(val, spim->regs + reg); ++} ++ ++static void sun6i_rd_fifo(struct spi_master_sun6i *spim) ++{ ++ u32 reg; ++ int len; ++ u8 byte; ++ ++ reg = sun6i_rd(spim, SUN6I_FIFO_STA_REG); ++ len = min((int)SUN6I_FIFO_STA_RX_CNT(reg), spim->rx_len); ++ ++ while (len-- > 0) { ++ byte = sun6i_rd(spim, SUN6I_RXDATA_REG); ++ if (spim->rx_buf) ++ *spim->rx_buf++ = byte; ++ spim->rx_len--; ++ } ++} ++ ++static void sun6i_wr_fifo(struct spi_master_sun6i *spim) ++{ ++ u32 reg; ++ int len; ++ u8 byte; ++ ++ reg = sun6i_rd(spim, SUN6I_FIFO_STA_REG); ++ len = min(spim->setup->fifo_depth - (int)SUN6I_FIFO_STA_TX_CNT(reg), ++ spim->tx_len); ++ ++ while (len-- > 0) { ++ byte = spim->tx_buf ? *spim->tx_buf++ : 0; ++ sun6i_wr(spim, SUN6I_TXDATA_REG, byte); ++ spim->tx_len--; ++ } ++} ++ ++static int sun6i_spi_interrupt(rtdm_irq_t *irqh) ++{ ++ struct spi_master_sun6i *spim; ++ u32 status; ++ ++ spim = rtdm_irq_get_arg(irqh, struct spi_master_sun6i); ++ ++ sun6i_rd_fifo(spim); ++ sun6i_wr_fifo(spim); ++ ++ status = sun6i_rd(spim, SUN6I_INT_STA_REG); ++ if ((status & SUN6I_INT_CTL_TC)) { ++ sun6i_wr(spim, SUN6I_INT_STA_REG, SUN6I_INT_CTL_TC); ++ sun6i_wr(spim, SUN6I_INT_CTL_REG, 0); ++ rtdm_event_signal(&spim->transfer_done); ++ } else if (status & SUN6I_INT_CTL_TX_RDY) ++ sun6i_wr(spim, SUN6I_INT_STA_REG, SUN6I_INT_CTL_TX_RDY); ++ ++ return RTDM_IRQ_HANDLED; ++} ++ ++static int sun6i_configure(struct rtdm_spi_remote_slave *slave) ++{ ++ struct spi_master_sun6i *spim = to_master_sun6i(slave); ++ struct rtdm_spi_config *config = &slave->config; ++ u32 reg, div; ++ ++ /* Set clock polarity and phase. */ ++ ++ reg = sun6i_rd(spim, SUN6I_TFR_CTL_REG); ++ reg &= ~(SUN6I_TFR_CTL_CPOL | SUN6I_TFR_CTL_CPHA | ++ SUN6I_TFR_CTL_FBS | SUN6I_TFR_CTL_SPOL); ++ ++ /* Manual CS via ->chip_select(). */ ++ reg |= SUN6I_TFR_CTL_CS_MANUAL; ++ ++ if (config->mode & SPI_CPOL) ++ reg |= SUN6I_TFR_CTL_CPOL; ++ ++ if (config->mode & SPI_CPHA) ++ reg |= SUN6I_TFR_CTL_CPHA; ++ ++ if (config->mode & SPI_LSB_FIRST) ++ reg |= SUN6I_TFR_CTL_FBS; ++ ++ if (!(config->mode & SPI_CS_HIGH)) ++ reg |= SUN6I_TFR_CTL_SPOL; ++ ++ sun6i_wr(spim, SUN6I_TFR_CTL_REG, reg); ++ ++ /* Setup clock divider. */ ++ ++ div = spim->clk_hz / (2 * config->speed_hz); ++ if (div <= SUN6I_CLK_CTL_CDR2_MASK + 1) { ++ if (div > 0) ++ div--; ++ reg = SUN6I_CLK_CTL_CDR2(div) | SUN6I_CLK_CTL_DRS; ++ } else { ++ div = ilog2(spim->clk_hz) - ilog2(config->speed_hz); ++ reg = SUN6I_CLK_CTL_CDR1(div); ++ } ++ ++ sun6i_wr(spim, SUN6I_CLK_CTL_REG, reg); ++ ++ return 0; ++} ++ ++static void sun6i_chip_select(struct rtdm_spi_remote_slave *slave, ++ bool active) ++{ ++ struct spi_master_sun6i *spim = to_master_sun6i(slave); ++ u32 reg; ++ ++ /* ++ * We have no cs_gpios, so this handler will be called for ++ * each transfer. ++ */ ++ reg = sun6i_rd(spim, SUN6I_TFR_CTL_REG); ++ reg &= ~(SUN6I_TFR_CTL_CS_MASK | SUN6I_TFR_CTL_CS_LEVEL); ++ reg |= SUN6I_TFR_CTL_CS(slave->chip_select); ++ ++ if (active) ++ reg |= SUN6I_TFR_CTL_CS_LEVEL; ++ ++ sun6i_wr(spim, SUN6I_TFR_CTL_REG, reg); ++} ++ ++static int do_transfer_irq(struct rtdm_spi_remote_slave *slave) ++{ ++ struct spi_master_sun6i *spim = to_master_sun6i(slave); ++ u32 tx_len = 0, reg; ++ int ret; ++ ++ /* Reset FIFO. */ ++ sun6i_wr(spim, SUN6I_FIFO_CTL_REG, ++ SUN6I_FIFO_CTL_RX_RST | SUN6I_FIFO_CTL_TX_RST); ++ ++ /* Set FIFO interrupt trigger level to 3/4 of the fifo depth. */ ++ reg = spim->setup->fifo_depth / 4 * 3; ++ sun6i_wr(spim, SUN6I_FIFO_CTL_REG, ++ (reg << SUN6I_FIFO_CTL_RX_RDY_TRIG_LEVEL_BITS) | ++ (reg << SUN6I_FIFO_CTL_TX_RDY_TRIG_LEVEL_BITS)); ++ ++ reg = sun6i_rd(spim, SUN6I_TFR_CTL_REG); ++ reg &= ~SUN6I_TFR_CTL_DHB; ++ /* Discard unused SPI bursts if TX only. */ ++ if (spim->rx_buf == NULL) ++ reg |= SUN6I_TFR_CTL_DHB; ++ sun6i_wr(spim, SUN6I_TFR_CTL_REG, reg); ++ ++ if (spim->tx_buf) ++ tx_len = spim->tx_len; ++ ++ /* Setup the counters. */ ++ sun6i_wr(spim, SUN6I_BURST_CNT_REG, SUN6I_BURST_CNT(spim->tx_len)); ++ sun6i_wr(spim, SUN6I_XMIT_CNT_REG, SUN6I_XMIT_CNT(tx_len)); ++ sun6i_wr(spim, SUN6I_BURST_CTL_CNT_REG, ++ SUN6I_BURST_CTL_CNT_STC(tx_len)); ++ ++ /* Fill the TX FIFO */ ++ sun6i_wr_fifo(spim); ++ ++ /* Enable interrupts. */ ++ reg = sun6i_rd(spim, SUN6I_INT_CTL_REG); ++ reg |= SUN6I_INT_CTL_TC | SUN6I_INT_CTL_TX_RDY; ++ sun6i_wr(spim, SUN6I_INT_CTL_REG, reg); ++ ++ /* Start the transfer. */ ++ reg = sun6i_rd(spim, SUN6I_TFR_CTL_REG); ++ sun6i_wr(spim, SUN6I_TFR_CTL_REG, reg | SUN6I_TFR_CTL_XCH); ++ ++ ret = rtdm_event_wait(&spim->transfer_done); ++ if (ret) { ++ sun6i_wr(spim, SUN6I_INT_CTL_REG, 0); ++ return ret; ++ } ++ ++ return 0; ++} ++ ++static int sun6i_transfer_iobufs(struct rtdm_spi_remote_slave *slave) ++{ ++ struct spi_master_sun6i *spim = to_master_sun6i(slave); ++ struct spi_slave_sun6i *sun6i = to_slave_sun6i(slave); ++ ++ if (sun6i->io_len == 0) ++ return -EINVAL; /* No I/O buffers set. */ ++ ++ spim->tx_len = sun6i->io_len / 2; ++ spim->rx_len = spim->tx_len; ++ spim->tx_buf = sun6i->io_virt + spim->rx_len; ++ spim->rx_buf = sun6i->io_virt; ++ ++ return do_transfer_irq(slave); ++} ++ ++static int sun6i_transfer_iobufs_n(struct rtdm_spi_remote_slave *slave, ++ int len) ++{ ++ struct spi_master_sun6i *spim = to_master_sun6i(slave); ++ struct spi_slave_sun6i *sun6i = to_slave_sun6i(slave); ++ ++ if ((sun6i->io_len == 0) || ++ (len <= 0) || (len > (sun6i->io_len / 2))) ++ return -EINVAL; ++ ++ spim->tx_len = len; ++ spim->rx_len = len; ++ spim->tx_buf = sun6i->io_virt + sun6i->io_len / 2; ++ spim->rx_buf = sun6i->io_virt; ++ ++ return do_transfer_irq(slave); ++} ++ ++static ssize_t sun6i_read(struct rtdm_spi_remote_slave *slave, ++ void *rx, size_t len) ++{ ++ struct spi_master_sun6i *spim = to_master_sun6i(slave); ++ ++ spim->tx_len = len; ++ spim->rx_len = len; ++ spim->tx_buf = NULL; ++ spim->rx_buf = rx; ++ ++ return do_transfer_irq(slave) ?: len; ++} ++ ++static ssize_t sun6i_write(struct rtdm_spi_remote_slave *slave, ++ const void *tx, size_t len) ++{ ++ struct spi_master_sun6i *spim = to_master_sun6i(slave); ++ ++ spim->tx_len = len; ++ spim->rx_len = len; ++ spim->tx_buf = tx; ++ spim->rx_buf = NULL; ++ ++ return do_transfer_irq(slave) ?: len; ++} ++ ++static int set_iobufs(struct spi_slave_sun6i *sun6i, size_t len) ++{ ++ dma_addr_t dma; ++ void *p; ++ ++ if (len == 0) ++ return -EINVAL; ++ ++ len = L1_CACHE_ALIGN(len) * 2; ++ if (len == sun6i->io_len) ++ return 0; ++ ++ if (sun6i->io_len) ++ return -EINVAL; /* I/O buffers may not be resized. */ ++ ++ p = dma_alloc_coherent(NULL, len, &dma, GFP_KERNEL); ++ if (p == NULL) ++ return -ENOMEM; ++ ++ sun6i->io_dma = dma; ++ sun6i->io_virt = p; ++ smp_mb(); ++ sun6i->io_len = len; ++ ++ return 0; ++} ++ ++static int sun6i_set_iobufs(struct rtdm_spi_remote_slave *slave, ++ struct rtdm_spi_iobufs *p) ++{ ++ struct spi_slave_sun6i *sun6i = to_slave_sun6i(slave); ++ int ret; ++ ++ ret = set_iobufs(sun6i, p->io_len); ++ if (ret) ++ return ret; ++ ++ p->i_offset = 0; ++ p->o_offset = sun6i->io_len / 2; ++ p->map_len = sun6i->io_len; ++ ++ return 0; ++} ++ ++static int sun6i_mmap_iobufs(struct rtdm_spi_remote_slave *slave, ++ struct vm_area_struct *vma) ++{ ++ struct spi_slave_sun6i *sun6i = to_slave_sun6i(slave); ++ ++ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); ++ ++ return rtdm_mmap_kmem(vma, sun6i->io_virt); ++} ++ ++static void sun6i_mmap_release(struct rtdm_spi_remote_slave *slave) ++{ ++ struct spi_slave_sun6i *sun6i = to_slave_sun6i(slave); ++ ++ dma_free_coherent(NULL, sun6i->io_len, ++ sun6i->io_virt, sun6i->io_dma); ++ sun6i->io_len = 0; ++} ++ ++static struct rtdm_spi_remote_slave * ++sun6i_attach_slave(struct rtdm_spi_master *master, struct spi_device *spi) ++{ ++ struct spi_slave_sun6i *sun6i; ++ int ret; ++ ++ sun6i = kzalloc(sizeof(*sun6i), GFP_KERNEL); ++ if (sun6i == NULL) ++ return ERR_PTR(-ENOMEM); ++ ++ ret = rtdm_spi_add_remote_slave(&sun6i->slave, master, spi); ++ if (ret) { ++ dev_err(&spi->dev, ++ "%s: failed to attach slave\n", __func__); ++ kfree(sun6i); ++ return ERR_PTR(ret); ++ } ++ ++ return &sun6i->slave; ++} ++ ++static void sun6i_detach_slave(struct rtdm_spi_remote_slave *slave) ++{ ++ struct spi_slave_sun6i *sun6i = to_slave_sun6i(slave); ++ ++ rtdm_spi_remove_remote_slave(slave); ++ kfree(sun6i); ++} ++ ++static struct rtdm_spi_master_ops sun6i_master_ops = { ++ .configure = sun6i_configure, ++ .chip_select = sun6i_chip_select, ++ .set_iobufs = sun6i_set_iobufs, ++ .mmap_iobufs = sun6i_mmap_iobufs, ++ .mmap_release = sun6i_mmap_release, ++ .transfer_iobufs = sun6i_transfer_iobufs, ++ .transfer_iobufs_n = sun6i_transfer_iobufs_n, ++ .write = sun6i_write, ++ .read = sun6i_read, ++ .attach_slave = sun6i_attach_slave, ++ .detach_slave = sun6i_detach_slave, ++}; ++ ++static int sun6i_spi_probe(struct platform_device *pdev) ++{ ++ struct rtdm_spi_master *master; ++ struct spi_master_sun6i *spim; ++ struct spi_master *kmaster; ++ struct resource *r; ++ int ret, irq; ++ u32 clk_rate; ++ ++ dev_dbg(&pdev->dev, "%s: entered\n", __func__); ++ ++ master = rtdm_spi_alloc_master(&pdev->dev, ++ struct spi_master_sun6i, master); ++ if (master == NULL) ++ return -ENOMEM; ++ ++ master->subclass = RTDM_SUBCLASS_SUN6I; ++ master->ops = &sun6i_master_ops; ++ platform_set_drvdata(pdev, master); ++ ++ kmaster = master->kmaster; ++ kmaster->max_speed_hz = 100 * 1000 * 1000; ++ kmaster->min_speed_hz = 3 * 1000; ++ kmaster->mode_bits = SUN6I_SPI_MODE_BITS; ++ kmaster->bits_per_word_mask = SPI_BPW_MASK(8); ++ kmaster->num_chipselect = 4; ++ kmaster->dev.of_node = pdev->dev.of_node; ++ ++ spim = container_of(master, struct spi_master_sun6i, master); ++ spim->setup = of_device_get_match_data(&pdev->dev); ++ ++ rtdm_event_init(&spim->transfer_done, 0); ++ ++ r = platform_get_resource(pdev, IORESOURCE_MEM, 0); ++ spim->regs = devm_ioremap_resource(&pdev->dev, r); ++ if (IS_ERR(spim->regs)) { ++ dev_err(&pdev->dev, "%s: cannot map I/O memory\n", __func__); ++ ret = PTR_ERR(spim->regs); ++ goto fail; ++ } ++ ++ spim->hclk = devm_clk_get(&pdev->dev, "ahb"); ++ if (IS_ERR(spim->hclk)) { ++ dev_err(&pdev->dev, "Unable to acquire AHB clock\n"); ++ ret = PTR_ERR(spim->hclk); ++ goto fail; ++ } ++ ++ spim->mclk = devm_clk_get(&pdev->dev, "mod"); ++ if (IS_ERR(spim->mclk)) { ++ dev_err(&pdev->dev, "Unable to acquire MOD clock\n"); ++ ret = PTR_ERR(spim->mclk); ++ goto fail; ++ } ++ ++ spim->rstc = devm_reset_control_get(&pdev->dev, NULL); ++ if (IS_ERR(spim->rstc)) { ++ dev_err(&pdev->dev, "Couldn't get reset controller\n"); ++ ret = PTR_ERR(spim->rstc); ++ goto fail; ++ } ++ ++ /* ++ * Ensure that we have a parent clock fast enough to handle ++ * the fastest transfers properly. ++ */ ++ clk_rate = clk_get_rate(spim->mclk); ++ if (clk_rate < 2 * kmaster->max_speed_hz) ++ clk_set_rate(spim->mclk, 2 * kmaster->max_speed_hz); ++ ++ spim->clk_hz = clk_get_rate(spim->mclk); ++ ++ irq = irq_of_parse_and_map(pdev->dev.of_node, 0); ++ if (irq <= 0) { ++ ret = irq ?: -ENODEV; ++ goto fail; ++ } ++ ++ clk_prepare_enable(spim->hclk); ++ clk_prepare_enable(spim->mclk); ++ ++ ret = reset_control_deassert(spim->rstc); ++ if (ret) ++ goto fail_unclk; ++ ++ /* Enable SPI module, in master mode with smart burst. */ ++ ++ sun6i_wr(spim, SUN6I_GBL_CTL_REG, ++ SUN6I_GBL_CTL_BUS_ENABLE | SUN6I_GBL_CTL_MASTER | ++ SUN6I_GBL_CTL_TP); ++ ++ /* Disable and clear all interrupts. */ ++ sun6i_wr(spim, SUN6I_INT_CTL_REG, 0); ++ sun6i_wr(spim, SUN6I_INT_STA_REG, ~0); ++ ++ ret = rtdm_irq_request(&spim->irqh, irq, ++ sun6i_spi_interrupt, 0, ++ dev_name(&pdev->dev), spim); ++ if (ret) { ++ dev_err(&pdev->dev, "%s: cannot request IRQ%d\n", ++ __func__, irq); ++ goto fail_unclk; ++ } ++ ++ ret = rtdm_spi_add_master(&spim->master); ++ if (ret) { ++ dev_err(&pdev->dev, "%s: failed to add master\n", ++ __func__); ++ goto fail_register; ++ } ++ ++ return 0; ++ ++fail_register: ++ rtdm_irq_free(&spim->irqh); ++fail_unclk: ++ clk_disable_unprepare(spim->mclk); ++ clk_disable_unprepare(spim->hclk); ++fail: ++ spi_master_put(kmaster); ++ ++ return ret; ++} ++ ++static int sun6i_spi_remove(struct platform_device *pdev) ++{ ++ struct rtdm_spi_master *master = platform_get_drvdata(pdev); ++ struct spi_master_sun6i *spim; ++ ++ dev_dbg(&pdev->dev, "%s: entered\n", __func__); ++ ++ spim = container_of(master, struct spi_master_sun6i, master); ++ ++ rtdm_irq_free(&spim->irqh); ++ ++ clk_disable_unprepare(spim->mclk); ++ clk_disable_unprepare(spim->hclk); ++ ++ rtdm_spi_remove_master(master); ++ ++ return 0; ++} ++ ++static const struct of_device_id sun6i_spi_match[] = { ++ { ++ .compatible = "allwinner,sun6i-a31-spi", ++ .data = &sun6i_data, ++ }, ++ { ++ .compatible = "allwinner,sun8i-h3-spi", ++ .data = &sun8i_data, ++ }, ++ { /* Sentinel */ }, ++}; ++MODULE_DEVICE_TABLE(of, sun6i_spi_match); ++ ++static struct platform_driver sun6i_spi_driver = { ++ .driver = { ++ .name = "spi-sun6i", ++ .of_match_table = sun6i_spi_match, ++ }, ++ .probe = sun6i_spi_probe, ++ .remove = sun6i_spi_remove, ++}; ++module_platform_driver(sun6i_spi_driver); ++ ++MODULE_LICENSE("GPL"); +--- linux/drivers/xenomai/spi/Kconfig 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/spi/Kconfig 2021-04-07 16:01:27.989633078 +0800 +@@ -0,0 +1,39 @@ ++menu "Real-time SPI master drivers" ++ ++config XENO_DRIVERS_SPI ++ depends on SPI ++ tristate ++ ++config XENO_DRIVERS_SPI_BCM2835 ++ depends on ARCH_BCM2708 || ARCH_BCM2835 ++ select XENO_DRIVERS_SPI ++ tristate "Support for BCM2835 SPI" ++ help ++ ++ Enables support for the SPI0 controller available from ++ Broadcom's BCM2835 SoC. ++ ++config XENO_DRIVERS_SPI_SUN6I ++ depends on MACH_SUN6I || MACH_SUN8I ++ select XENO_DRIVERS_SPI ++ tristate "Support for A31/H3 SoC SPI" ++ help ++ ++ Enables support for the SPI controller available from ++ Allwinner's A31, H3 SoCs. ++ ++config XENO_DRIVERS_SPI_OMAP2_MCSPI_RT ++ tristate "McSPI rt-driver for OMAP" ++ depends on HAS_DMA ++ depends on ARCH_OMAP2PLUS || COMPILE_TEST ++ select XENO_DRIVERS_SPI ++ help ++ ++ SPI real-time master controller for OMAP24XX and later Multichannel SPI ++ (McSPI) modules. ++ ++config XENO_DRIVERS_SPI_DEBUG ++ depends on XENO_DRIVERS_SPI ++ bool "Enable SPI core debugging features" ++ ++endmenu +--- linux/drivers/xenomai/spi/spi-device.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/spi/spi-device.h 2021-04-07 16:01:27.984633085 +0800 +@@ -0,0 +1,54 @@ ++/** ++ * @note Copyright (C) 2016 Philippe Gerum ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#ifndef _RTDM_SPI_DEVICE_H ++#define _RTDM_SPI_DEVICE_H ++ ++#include ++#include ++#include ++#include ++#include ++ ++struct class; ++struct rtdm_spi_master; ++ ++struct rtdm_spi_remote_slave { ++ u8 chip_select; ++ int cs_gpio; ++ struct gpio_desc *cs_gpiod; ++ struct rtdm_device dev; ++ struct list_head next; ++ struct rtdm_spi_config config; ++ struct rtdm_spi_master *master; ++ atomic_t mmap_refs; ++ struct mutex ctl_lock; ++}; ++ ++static inline struct device * ++slave_to_kdev(struct rtdm_spi_remote_slave *slave) ++{ ++ return rtdm_dev_to_kdev(&slave->dev); ++} ++ ++int rtdm_spi_add_remote_slave(struct rtdm_spi_remote_slave *slave, ++ struct rtdm_spi_master *spim, ++ struct spi_device *spi); ++ ++void rtdm_spi_remove_remote_slave(struct rtdm_spi_remote_slave *slave); ++ ++#endif /* !_RTDM_SPI_DEVICE_H */ +--- linux/drivers/xenomai/spi/spi-master.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/spi/spi-master.h 2021-04-07 16:01:27.980633090 +0800 +@@ -0,0 +1,82 @@ ++/** ++ * @note Copyright (C) 2016 Philippe Gerum ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#ifndef _RTDM_SPI_MASTER_H ++#define _RTDM_SPI_MASTER_H ++ ++#include ++#include ++#include "spi-device.h" ++ ++struct class; ++struct device_node; ++struct rtdm_spi_master; ++struct spi_master; ++ ++struct rtdm_spi_master_ops { ++ int (*open)(struct rtdm_spi_remote_slave *slave); ++ void (*close)(struct rtdm_spi_remote_slave *slave); ++ int (*configure)(struct rtdm_spi_remote_slave *slave); ++ void (*chip_select)(struct rtdm_spi_remote_slave *slave, ++ bool active); ++ int (*set_iobufs)(struct rtdm_spi_remote_slave *slave, ++ struct rtdm_spi_iobufs *p); ++ int (*mmap_iobufs)(struct rtdm_spi_remote_slave *slave, ++ struct vm_area_struct *vma); ++ void (*mmap_release)(struct rtdm_spi_remote_slave *slave); ++ int (*transfer_iobufs)(struct rtdm_spi_remote_slave *slave); ++ int (*transfer_iobufs_n)(struct rtdm_spi_remote_slave *slave, int len); ++ ssize_t (*write)(struct rtdm_spi_remote_slave *slave, ++ const void *tx, size_t len); ++ ssize_t (*read)(struct rtdm_spi_remote_slave *slave, ++ void *rx, size_t len); ++ struct rtdm_spi_remote_slave *(*attach_slave) ++ (struct rtdm_spi_master *master, ++ struct spi_device *spi); ++ void (*detach_slave)(struct rtdm_spi_remote_slave *slave); ++}; ++ ++struct rtdm_spi_master { ++ int subclass; ++ const struct rtdm_spi_master_ops *ops; ++ struct spi_master *kmaster; ++ struct { /* Internal */ ++ struct rtdm_driver driver; ++ struct class *devclass; ++ char *classname; ++ struct list_head slaves; ++ struct list_head next; ++ rtdm_lock_t lock; ++ rtdm_mutex_t bus_lock; ++ struct rtdm_spi_remote_slave *cs; ++ }; ++}; ++ ++#define rtdm_spi_alloc_master(__dev, __type, __mptr) \ ++ __rtdm_spi_alloc_master(__dev, sizeof(__type), \ ++ offsetof(__type, __mptr)) \ ++ ++struct rtdm_spi_master * ++__rtdm_spi_alloc_master(struct device *dev, size_t size, int off); ++ ++int __rtdm_spi_setup_driver(struct rtdm_spi_master *master); ++ ++int rtdm_spi_add_master(struct rtdm_spi_master *master); ++ ++void rtdm_spi_remove_master(struct rtdm_spi_master *master); ++ ++#endif /* !_RTDM_SPI_MASTER_H */ +--- linux/drivers/xenomai/spi/spi-master.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/spi/spi-master.c 2021-04-07 16:01:27.975633097 +0800 +@@ -0,0 +1,448 @@ ++/** ++ * @note Copyright (C) 2016 Philippe Gerum ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "spi-master.h" ++ ++static inline ++struct device *to_kdev(struct rtdm_spi_remote_slave *slave) ++{ ++ return rtdm_dev_to_kdev(&slave->dev); ++} ++ ++static inline struct rtdm_spi_remote_slave *fd_to_slave(struct rtdm_fd *fd) ++{ ++ struct rtdm_device *dev = rtdm_fd_device(fd); ++ ++ return container_of(dev, struct rtdm_spi_remote_slave, dev); ++} ++ ++static int update_slave_config(struct rtdm_spi_remote_slave *slave, ++ struct rtdm_spi_config *config) ++{ ++ struct rtdm_spi_config old_config; ++ struct rtdm_spi_master *master = slave->master; ++ int ret; ++ ++ rtdm_mutex_lock(&master->bus_lock); ++ ++ old_config = slave->config; ++ slave->config = *config; ++ ret = slave->master->ops->configure(slave); ++ if (ret) { ++ slave->config = old_config; ++ rtdm_mutex_unlock(&master->bus_lock); ++ return ret; ++ } ++ ++ rtdm_mutex_unlock(&master->bus_lock); ++ ++ dev_info(to_kdev(slave), ++ "configured mode %d, %s%s%s%s%u bits/w, %u Hz max\n", ++ (int) (slave->config.mode & (SPI_CPOL | SPI_CPHA)), ++ (slave->config.mode & SPI_CS_HIGH) ? "cs_high, " : "", ++ (slave->config.mode & SPI_LSB_FIRST) ? "lsb, " : "", ++ (slave->config.mode & SPI_3WIRE) ? "3wire, " : "", ++ (slave->config.mode & SPI_LOOP) ? "loopback, " : "", ++ slave->config.bits_per_word, ++ slave->config.speed_hz); ++ ++ return 0; ++} ++ ++static int spi_master_open(struct rtdm_fd *fd, int oflags) ++{ ++ struct rtdm_spi_remote_slave *slave = fd_to_slave(fd); ++ struct rtdm_spi_master *master = slave->master; ++ ++ if (master->ops->open) ++ return master->ops->open(slave); ++ ++ return 0; ++} ++ ++static void spi_master_close(struct rtdm_fd *fd) ++{ ++ struct rtdm_spi_remote_slave *slave = fd_to_slave(fd); ++ struct rtdm_spi_master *master = slave->master; ++ rtdm_lockctx_t c; ++ ++ rtdm_lock_get_irqsave(&master->lock, c); ++ ++ if (master->cs == slave) ++ master->cs = NULL; ++ ++ rtdm_lock_put_irqrestore(&master->lock, c); ++ ++ if (master->ops->close) ++ master->ops->close(slave); ++} ++ ++static int do_chip_select(struct rtdm_spi_remote_slave *slave) ++{ /* master->bus_lock held */ ++ struct rtdm_spi_master *master = slave->master; ++ rtdm_lockctx_t c; ++ int state; ++ ++ if (slave->config.speed_hz == 0) ++ return -EINVAL; /* Setup is missing. */ ++ ++ /* Serialize with spi_master_close() */ ++ rtdm_lock_get_irqsave(&master->lock, c); ++ ++ if (master->cs != slave) { ++ if (gpio_is_valid(slave->cs_gpio)) { ++ state = !!(slave->config.mode & SPI_CS_HIGH); ++ gpiod_set_raw_value(slave->cs_gpiod, state); ++ } else ++ master->ops->chip_select(slave, true); ++ master->cs = slave; ++ } ++ ++ rtdm_lock_put_irqrestore(&master->lock, c); ++ ++ return 0; ++} ++ ++static void do_chip_deselect(struct rtdm_spi_remote_slave *slave) ++{ /* master->bus_lock held */ ++ struct rtdm_spi_master *master = slave->master; ++ rtdm_lockctx_t c; ++ int state; ++ ++ rtdm_lock_get_irqsave(&master->lock, c); ++ ++ if (gpio_is_valid(slave->cs_gpio)) { ++ state = !(slave->config.mode & SPI_CS_HIGH); ++ gpiod_set_raw_value(slave->cs_gpiod, state); ++ } else ++ master->ops->chip_select(slave, false); ++ ++ master->cs = NULL; ++ ++ rtdm_lock_put_irqrestore(&master->lock, c); ++} ++ ++static int spi_master_ioctl_rt(struct rtdm_fd *fd, ++ unsigned int request, void *arg) ++{ ++ struct rtdm_spi_remote_slave *slave = fd_to_slave(fd); ++ struct rtdm_spi_master *master = slave->master; ++ struct rtdm_spi_config config; ++ int ret, len; ++ ++ switch (request) { ++ case SPI_RTIOC_SET_CONFIG: ++ ret = rtdm_safe_copy_from_user(fd, &config, ++ arg, sizeof(config)); ++ if (ret == 0) ++ ret = update_slave_config(slave, &config); ++ break; ++ case SPI_RTIOC_GET_CONFIG: ++ rtdm_mutex_lock(&master->bus_lock); ++ config = slave->config; ++ rtdm_mutex_unlock(&master->bus_lock); ++ ret = rtdm_safe_copy_to_user(fd, arg, ++ &config, sizeof(config)); ++ break; ++ case SPI_RTIOC_TRANSFER: ++ ret = -EINVAL; ++ if (master->ops->transfer_iobufs) { ++ rtdm_mutex_lock(&master->bus_lock); ++ ret = do_chip_select(slave); ++ if (ret == 0) { ++ ret = master->ops->transfer_iobufs(slave); ++ do_chip_deselect(slave); ++ } ++ rtdm_mutex_unlock(&master->bus_lock); ++ } ++ break; ++ case SPI_RTIOC_TRANSFER_N: ++ ret = -EINVAL; ++ if (master->ops->transfer_iobufs_n) { ++ len = (int)arg; ++ rtdm_mutex_lock(&master->bus_lock); ++ ret = do_chip_select(slave); ++ if (ret == 0) { ++ ret = master->ops->transfer_iobufs_n(slave, len); ++ do_chip_deselect(slave); ++ } ++ rtdm_mutex_unlock(&master->bus_lock); ++ } ++ break; ++ default: ++ ret = -ENOSYS; ++ } ++ ++ return ret; ++} ++ ++static int spi_master_ioctl_nrt(struct rtdm_fd *fd, ++ unsigned int request, void *arg) ++{ ++ struct rtdm_spi_remote_slave *slave = fd_to_slave(fd); ++ struct rtdm_spi_master *master = slave->master; ++ struct rtdm_spi_iobufs iobufs; ++ int ret; ++ ++ switch (request) { ++ case SPI_RTIOC_SET_IOBUFS: ++ ret = rtdm_safe_copy_from_user(fd, &iobufs, ++ arg, sizeof(iobufs)); ++ if (ret) ++ break; ++ /* ++ * No transfer can happen without I/O buffers being ++ * set, and I/O buffers cannot be reset, therefore we ++ * need no serialization with the transfer code here. ++ */ ++ mutex_lock(&slave->ctl_lock); ++ ret = master->ops->set_iobufs(slave, &iobufs); ++ mutex_unlock(&slave->ctl_lock); ++ if (ret == 0) ++ ret = rtdm_safe_copy_to_user(fd, arg, ++ &iobufs, sizeof(iobufs)); ++ break; ++ default: ++ ret = -EINVAL; ++ } ++ ++ return ret; ++} ++ ++static ssize_t spi_master_read_rt(struct rtdm_fd *fd, ++ void __user *u_buf, size_t len) ++{ ++ struct rtdm_spi_remote_slave *slave = fd_to_slave(fd); ++ struct rtdm_spi_master *master = slave->master; ++ void *rx; ++ int ret; ++ ++ if (len == 0) ++ return 0; ++ ++ rx = xnmalloc(len); ++ if (rx == NULL) ++ return -ENOMEM; ++ ++ rtdm_mutex_lock(&master->bus_lock); ++ ret = do_chip_select(slave); ++ if (ret == 0) { ++ ret = master->ops->read(slave, rx, len); ++ do_chip_deselect(slave); ++ } ++ rtdm_mutex_unlock(&master->bus_lock); ++ if (ret > 0) ++ ret = rtdm_safe_copy_to_user(fd, u_buf, rx, ret); ++ ++ xnfree(rx); ++ ++ return ret; ++} ++ ++static ssize_t spi_master_write_rt(struct rtdm_fd *fd, ++ const void __user *u_buf, size_t len) ++{ ++ struct rtdm_spi_remote_slave *slave = fd_to_slave(fd); ++ struct rtdm_spi_master *master = slave->master; ++ void *tx; ++ int ret; ++ ++ if (len == 0) ++ return 0; ++ ++ tx = xnmalloc(len); ++ if (tx == NULL) ++ return -ENOMEM; ++ ++ ret = rtdm_safe_copy_from_user(fd, tx, u_buf, len); ++ if (ret == 0) { ++ rtdm_mutex_lock(&master->bus_lock); ++ ret = do_chip_select(slave); ++ if (ret == 0) { ++ ret = master->ops->write(slave, tx, len); ++ do_chip_deselect(slave); ++ } ++ rtdm_mutex_unlock(&master->bus_lock); ++ } ++ ++ xnfree(tx); ++ ++ return ret; ++} ++ ++static void iobufs_vmopen(struct vm_area_struct *vma) ++{ ++ struct rtdm_spi_remote_slave *slave = vma->vm_private_data; ++ ++ atomic_inc(&slave->mmap_refs); ++ dev_dbg(slave_to_kdev(slave), "mapping added\n"); ++} ++ ++static void iobufs_vmclose(struct vm_area_struct *vma) ++{ ++ struct rtdm_spi_remote_slave *slave = vma->vm_private_data; ++ ++ if (atomic_dec_and_test(&slave->mmap_refs)) { ++ slave->master->ops->mmap_release(slave); ++ dev_dbg(slave_to_kdev(slave), "mapping released\n"); ++ } ++} ++ ++static struct vm_operations_struct iobufs_vmops = { ++ .open = iobufs_vmopen, ++ .close = iobufs_vmclose, ++}; ++ ++static int spi_master_mmap(struct rtdm_fd *fd, struct vm_area_struct *vma) ++{ ++ struct rtdm_spi_remote_slave *slave = fd_to_slave(fd); ++ int ret; ++ ++ if (slave->master->ops->mmap_iobufs == NULL) ++ return -EINVAL; ++ ++ ret = slave->master->ops->mmap_iobufs(slave, vma); ++ if (ret) ++ return ret; ++ ++ dev_dbg(slave_to_kdev(slave), "mapping created\n"); ++ atomic_inc(&slave->mmap_refs); ++ ++ if (slave->master->ops->mmap_release) { ++ vma->vm_ops = &iobufs_vmops; ++ vma->vm_private_data = slave; ++ } ++ ++ return 0; ++} ++ ++static char *spi_slave_devnode(struct device *dev, umode_t *mode) ++{ ++ return kasprintf(GFP_KERNEL, "rtdm/%s/%s", ++ dev->class->name, ++ dev_name(dev)); ++} ++ ++struct rtdm_spi_master * ++__rtdm_spi_alloc_master(struct device *dev, size_t size, int off) ++{ ++ struct rtdm_spi_master *master; ++ struct spi_master *kmaster; ++ ++ kmaster = spi_alloc_master(dev, size); ++ if (kmaster == NULL) ++ return NULL; ++ ++ master = (void *)(kmaster + 1) + off; ++ master->kmaster = kmaster; ++ spi_master_set_devdata(kmaster, master); ++ ++ return master; ++} ++EXPORT_SYMBOL_GPL(__rtdm_spi_alloc_master); ++ ++int __rtdm_spi_setup_driver(struct rtdm_spi_master *master) ++{ ++ master->classname = kstrdup( ++ dev_name(&master->kmaster->dev), GFP_KERNEL); ++ master->devclass = class_create(THIS_MODULE, ++ master->classname); ++ if (IS_ERR(master->devclass)) { ++ kfree(master->classname); ++ printk(XENO_ERR "cannot create sysfs class\n"); ++ return PTR_ERR(master->devclass); ++ } ++ ++ master->devclass->devnode = spi_slave_devnode; ++ master->cs = NULL; ++ ++ master->driver.profile_info = (struct rtdm_profile_info) ++ RTDM_PROFILE_INFO(rtdm_spi_master, ++ RTDM_CLASS_SPI, ++ master->subclass, ++ 0); ++ master->driver.device_flags = RTDM_NAMED_DEVICE; ++ master->driver.base_minor = 0; ++ master->driver.device_count = 256; ++ master->driver.context_size = 0; ++ master->driver.ops = (struct rtdm_fd_ops){ ++ .open = spi_master_open, ++ .close = spi_master_close, ++ .read_rt = spi_master_read_rt, ++ .write_rt = spi_master_write_rt, ++ .ioctl_rt = spi_master_ioctl_rt, ++ .ioctl_nrt = spi_master_ioctl_nrt, ++ .mmap = spi_master_mmap, ++ }; ++ ++ rtdm_drv_set_sysclass(&master->driver, master->devclass); ++ ++ INIT_LIST_HEAD(&master->slaves); ++ rtdm_lock_init(&master->lock); ++ rtdm_mutex_init(&master->bus_lock); ++ ++ return 0; ++} ++ ++static int spi_transfer_one_unimp(struct spi_master *master, ++ struct spi_device *spi, ++ struct spi_transfer *tfr) ++{ ++ return -ENODEV; ++} ++ ++int rtdm_spi_add_master(struct rtdm_spi_master *master) ++{ ++ struct spi_master *kmaster = master->kmaster; ++ ++ /* ++ * Prevent the transfer handler to be called from the regular ++ * SPI stack, just in case. ++ */ ++ kmaster->transfer_one = spi_transfer_one_unimp; ++ master->devclass = NULL; ++ ++ /* ++ * Add the core SPI driver, devices on the bus will be ++ * enumerated, handed to spi_device_probe(). ++ */ ++ return spi_register_master(kmaster); ++} ++EXPORT_SYMBOL_GPL(rtdm_spi_add_master); ++ ++void rtdm_spi_remove_master(struct rtdm_spi_master *master) ++{ ++ struct class *class = master->devclass; ++ char *classname = master->classname; ++ ++ rtdm_mutex_destroy(&master->bus_lock); ++ spi_unregister_master(master->kmaster); ++ rtdm_drv_set_sysclass(&master->driver, NULL); ++ class_destroy(class); ++ kfree(classname); ++} ++EXPORT_SYMBOL_GPL(rtdm_spi_remove_master); ++ ++MODULE_LICENSE("GPL"); +--- linux/drivers/xenomai/gpiopwm/gpiopwm.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/gpiopwm/gpiopwm.c 2021-04-07 16:01:27.970633105 +0800 +@@ -0,0 +1,298 @@ ++/* ++ * Copyright (C) 2015 Jorge Ramirez . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++MODULE_AUTHOR("Jorge Ramirez "); ++MODULE_DESCRIPTION("PWM driver"); ++MODULE_VERSION("0.0.1"); ++MODULE_LICENSE("GPL"); ++ ++#define MAX_DUTY_CYCLE 100 ++#define MAX_SAMPLES (MAX_DUTY_CYCLE + 1) ++ ++struct gpiopwm_base_signal { ++ unsigned long period; ++}; ++ ++struct gpiopwm_duty_signal { ++ unsigned int range_min; ++ unsigned int range_max; ++ unsigned long period; ++ unsigned int cycle; ++}; ++ ++struct gpiopwm_control { ++ struct gpiopwm_duty_signal duty; ++ unsigned int configured; ++ unsigned int update; ++}; ++ ++struct gpiopwm_priv { ++ struct gpiopwm_base_signal base; ++ struct gpiopwm_duty_signal duty; ++ struct gpiopwm_control ctrl; ++ ++ rtdm_timer_t base_timer; ++ rtdm_timer_t duty_timer; ++ ++ int gpio; ++}; ++ ++static inline int div100(long long dividend) ++{ ++ const long long divisor = 0x28f5c29; ++ return ((divisor * dividend) >> 32) & 0xffffffff; ++} ++ ++static inline unsigned long duty_period(struct gpiopwm_duty_signal *p) ++{ ++ unsigned long period; ++ ++ period = p->range_min + div100((p->range_max - p->range_min) * p->cycle); ++ return period * 1000; ++} ++ ++static void gpiopwm_handle_base_timer(rtdm_timer_t *timer) ++{ ++ struct gpiopwm_priv *ctx = container_of(timer, struct gpiopwm_priv, ++ base_timer); ++ gpio_set_value(ctx->gpio, 1); ++ ++ /* one shot timer to avoid carrying over errors */ ++ rtdm_timer_start_in_handler(&ctx->duty_timer, ctx->duty.period, 0, ++ RTDM_TIMERMODE_RELATIVE); ++ ++ if (ctx->ctrl.update) { ++ ctx->duty.period = ctx->ctrl.duty.period; ++ ctx->duty.cycle = ctx->ctrl.duty.cycle; ++ ctx->ctrl.update = 0; ++ } ++} ++ ++static void gpiopwm_handle_duty_timer(rtdm_timer_t *timer) ++{ ++ struct gpiopwm_priv *ctx = container_of(timer, struct gpiopwm_priv, ++ duty_timer); ++ gpio_set_value(ctx->gpio, 0); ++} ++ ++static inline int gpiopwm_config(struct rtdm_fd *fd, struct gpiopwm *conf) ++{ ++ struct rtdm_dev_context *dev_ctx = rtdm_fd_to_context(fd); ++ struct gpiopwm_priv *ctx = rtdm_fd_to_private(fd); ++ int ret; ++ ++ if (ctx->ctrl.configured) ++ return -EINVAL; ++ ++ if (conf->duty_cycle > MAX_DUTY_CYCLE) ++ return -EINVAL; ++ ++ ret = gpio_request(conf->gpio, dev_ctx->device->name); ++ if (ret < 0) { ++ ctx->gpio = -1; ++ return ret; ++ } ++ ++ ret = gpio_direction_output(conf->gpio, 0); ++ if (ret < 0) ++ return ret; ++ ++ gpio_set_value(conf->gpio, 0); ++ ++ ctx->duty.range_min = ctx->ctrl.duty.range_min = conf->range_min; ++ ctx->duty.range_max = ctx->ctrl.duty.range_max = conf->range_max; ++ ctx->duty.cycle = conf->duty_cycle; ++ ctx->base.period = conf->period; ++ ctx->gpio = conf->gpio; ++ ctx->duty.period = duty_period(&ctx->duty); ++ ++ rtdm_timer_init(&ctx->base_timer, gpiopwm_handle_base_timer, "base_timer"); ++ rtdm_timer_init(&ctx->duty_timer, gpiopwm_handle_duty_timer, "duty_timer"); ++ ++ ctx->ctrl.configured = 1; ++ ++ return 0; ++} ++ ++static inline int gpiopwm_change_duty_cycle(struct gpiopwm_priv *ctx, unsigned int cycle) ++{ ++ if (cycle > MAX_DUTY_CYCLE) ++ return -EINVAL; ++ ++ /* prepare the new data on the calling thread */ ++ ctx->ctrl.duty.cycle = cycle; ++ ctx->ctrl.duty.period = duty_period(&ctx->ctrl.duty); ++ ++ /* update data on the next base signal timeout */ ++ ctx->ctrl.update = 1; ++ ++ return 0; ++} ++ ++static inline int gpiopwm_stop(struct rtdm_fd *fd) ++{ ++ struct gpiopwm_priv *ctx = rtdm_fd_to_private(fd); ++ ++ if (!ctx->ctrl.configured) ++ return -EINVAL; ++ ++ gpio_set_value(ctx->gpio, 0); ++ ++ rtdm_timer_stop(&ctx->base_timer); ++ rtdm_timer_stop(&ctx->duty_timer); ++ ++ return 0; ++} ++ ++static inline int gpiopwm_start(struct rtdm_fd *fd) ++{ ++ struct gpiopwm_priv *ctx = rtdm_fd_to_private(fd); ++ ++ if (!ctx->ctrl.configured) ++ return -EINVAL; ++ ++ /* update duty cycle on next timeout */ ++ ctx->ctrl.update = 1; ++ ++ /* start the base signal tick */ ++ rtdm_timer_start(&ctx->base_timer, ctx->base.period, ctx->base.period, ++ RTDM_TIMERMODE_RELATIVE); ++ ++ return 0; ++} ++ ++static int gpiopwm_ioctl_rt(struct rtdm_fd *fd, unsigned int request, void __user *arg) ++{ ++ struct gpiopwm_priv *ctx = rtdm_fd_to_private(fd); ++ ++ switch (request) { ++ case GPIOPWM_RTIOC_SET_CONFIG: ++ return -ENOSYS; ++ case GPIOPWM_RTIOC_CHANGE_DUTY_CYCLE: ++ return gpiopwm_change_duty_cycle(ctx, (unsigned long) arg); ++ case GPIOPWM_RTIOC_START: ++ return gpiopwm_start(fd); ++ case GPIOPWM_RTIOC_STOP: ++ return gpiopwm_stop(fd); ++ default: ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ ++static int gpiopwm_ioctl_nrt(struct rtdm_fd *fd, unsigned int request, void __user *arg) ++{ ++ struct gpiopwm conf; ++ ++ switch (request) { ++ case GPIOPWM_RTIOC_SET_CONFIG: ++ if (!rtdm_rw_user_ok(fd, arg, sizeof(conf))) ++ return -EFAULT; ++ ++ rtdm_copy_from_user(fd, &conf, arg, sizeof(conf)); ++ return gpiopwm_config(fd, &conf); ++ case GPIOPWM_RTIOC_GET_CONFIG: ++ default: ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ ++static int gpiopwm_open(struct rtdm_fd *fd, int oflags) ++{ ++ struct gpiopwm_priv *ctx = rtdm_fd_to_private(fd); ++ ++ ctx->ctrl.configured = 0; ++ ctx->gpio = -1; ++ ++ return 0; ++} ++ ++static void gpiopwm_close(struct rtdm_fd *fd) ++{ ++ struct gpiopwm_priv *ctx = rtdm_fd_to_private(fd); ++ ++ if (ctx->gpio >= 0) ++ gpio_free(ctx->gpio); ++ ++ if (!ctx->ctrl.configured) ++ return; ++ ++ rtdm_timer_destroy(&ctx->base_timer); ++ rtdm_timer_destroy(&ctx->duty_timer); ++} ++ ++static struct rtdm_driver gpiopwm_driver = { ++ .profile_info = RTDM_PROFILE_INFO(gpiopwm, ++ RTDM_CLASS_PWM, ++ RTDM_SUBCLASS_GENERIC, ++ RTPWM_PROFILE_VER), ++ .device_flags = RTDM_NAMED_DEVICE | RTDM_EXCLUSIVE, ++ .device_count = 8, ++ .context_size = sizeof(struct gpiopwm_priv), ++ .ops = { ++ .open = gpiopwm_open, ++ .close = gpiopwm_close, ++ .ioctl_rt = gpiopwm_ioctl_rt, ++ .ioctl_nrt = gpiopwm_ioctl_nrt, ++ }, ++}; ++ ++static struct rtdm_device device[8] = { ++ [0 ... 7] = { ++ .driver = &gpiopwm_driver, ++ .label = "gpiopwm%d", ++ } ++}; ++ ++static int __init __gpiopwm_init(void) ++{ ++ int i, ret; ++ ++ for (i = 0; i < ARRAY_SIZE(device); i++) { ++ ret = rtdm_dev_register(device + i); ++ if (ret) ++ goto fail; ++ } ++ ++ return 0; ++fail: ++ while (i-- > 0) ++ rtdm_dev_unregister(device + i); ++ ++ return ret; ++} ++ ++static void __exit __gpiopwm_exit(void) ++{ ++ int i; ++ ++ for (i = 0; i < ARRAY_SIZE(device); i++) ++ rtdm_dev_unregister(device + i); ++} ++ ++module_init(__gpiopwm_init); ++module_exit(__gpiopwm_exit); +--- linux/drivers/xenomai/gpiopwm/Makefile 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/gpiopwm/Makefile 2021-04-07 16:01:27.966633110 +0800 +@@ -0,0 +1,5 @@ ++ccflags-y += -Ikernel -Iinclude/xenomai/ ++ ++obj-$(CONFIG_XENO_DRIVERS_GPIOPWM) += xeno_gpiopwm.o ++ ++xeno_gpiopwm-y := gpiopwm.o +--- linux/drivers/xenomai/gpiopwm/Kconfig 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/gpiopwm/Kconfig 2021-04-07 16:01:27.961633117 +0800 +@@ -0,0 +1,9 @@ ++menu "GPIOPWM support" ++ ++config XENO_DRIVERS_GPIOPWM ++ tristate "GPIOPWM driver" ++ help ++ ++ An RTDM-based GPIO PWM generator driver ++ ++endmenu +--- linux/drivers/xenomai/Makefile 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/Makefile 2021-04-07 16:01:27.956633124 +0800 +@@ -0,0 +1 @@ ++obj-$(CONFIG_XENOMAI) += autotune/ serial/ testing/ can/ net/ analogy/ ipc/ udd/ gpio/ gpiopwm/ spi/ +--- linux/drivers/xenomai/analogy/testing/Makefile 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/analogy/testing/Makefile 2021-04-07 16:01:27.952633130 +0800 +@@ -0,0 +1,8 @@ ++ ++ccflags-y += -Idrivers/xenomai/analogy ++ ++obj-$(CONFIG_XENO_DRIVERS_ANALOGY_FAKE) += analogy_fake.o ++ ++analogy_fake-y := fake.o ++ ++analogy_loop-y := loop.o +--- linux/drivers/xenomai/analogy/testing/loop.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/analogy/testing/loop.c 2021-04-07 16:01:27.947633137 +0800 +@@ -0,0 +1,285 @@ ++#include ++#include ++ ++#define LOOP_TASK_PERIOD 1000000 ++#define LOOP_NB_BITS 16 ++ ++#define LOOP_INPUT_SUBD 0 ++#define LOOP_OUTPUT_SUBD 1 ++ ++/* Channels descriptor */ ++static struct a4l_channels_desc loop_chandesc = { ++ .mode = A4L_CHAN_GLOBAL_CHANDESC, ++ .length = 8, ++ .chans = { ++ {A4L_CHAN_AREF_GROUND, LOOP_NB_BITS}, ++ }, ++}; ++ ++/* Ranges tab */ ++static struct a4l_rngtab loop_rngtab = { ++ .length = 2, ++ .rngs = { ++ RANGE_V(-5,5), ++ RANGE_V(-10,10), ++ }, ++}; ++/* Ranges descriptor */ ++struct a4l_rngdesc loop_rngdesc = RNG_GLOBAL(loop_rngtab); ++ ++/* Command options mask */ ++static struct a4l_cmd_desc loop_cmd_mask = { ++ .idx_subd = 0, ++ .start_src = TRIG_NOW | TRIG_INT, ++ .scan_begin_src = TRIG_TIMER, ++ .convert_src = TRIG_NOW | TRIG_TIMER, ++ .scan_end_src = TRIG_COUNT, ++ .stop_src = TRIG_COUNT| TRIG_NONE, ++}; ++ ++/* Private data organization */ ++struct loop_priv { ++ ++ /* Task descriptor */ ++ rtdm_task_t loop_task; ++ ++ /* Misc fields */ ++ int loop_running; ++ uint16_t loop_insn_value; ++}; ++typedef struct loop_priv lpprv_t; ++ ++/* Attach arguments contents */ ++struct loop_attach_arg { ++ unsigned long period; ++}; ++typedef struct loop_attach_arg lpattr_t; ++ ++static void loop_task_proc(void *arg); ++ ++/* --- Task part --- */ ++ ++/* Timer task routine */ ++static void loop_task_proc(void *arg) ++{ ++ struct a4l_device *dev = (struct a4l_device*)arg; ++ struct a4l_subdevice *input_subd, *output_subd; ++ lpprv_t *priv = (lpprv_t *)dev->priv; ++ ++ input_subd = a4l_get_subd(dev, LOOP_INPUT_SUBD); ++ output_subd = a4l_get_subd(dev, LOOP_OUTPUT_SUBD); ++ ++ if (input_subd == NULL || output_subd == NULL) { ++ a4l_err(dev, "loop_task_proc: subdevices unavailable\n"); ++ return; ++ } ++ ++ while (1) { ++ ++ int running; ++ ++ running = priv->loop_running; ++ ++ if (running) { ++ uint16_t value; ++ int ret=0; ++ ++ while (ret==0) { ++ ++ ret = a4l_buf_get(output_subd, ++ &value, sizeof(uint16_t)); ++ if (ret == 0) { ++ ++ a4l_info(dev, ++ "loop_task_proc: " ++ "data available\n"); ++ ++ a4l_buf_evt(output_subd, 0); ++ ++ ret = a4l_buf_put(input_subd, ++ &value, ++ sizeof(uint16_t)); ++ ++ if (ret==0) ++ a4l_buf_evt(input_subd, 0); ++ } ++ } ++ } ++ ++ rtdm_task_sleep(LOOP_TASK_PERIOD); ++ } ++} ++ ++/* --- Analogy Callbacks --- */ ++ ++/* Command callback */ ++int loop_cmd(struct a4l_subdevice *subd, struct a4l_cmd_desc *cmd) ++{ ++ a4l_info(subd->dev, "loop_cmd: (subd=%d)\n", subd->idx); ++ ++ return 0; ++ ++} ++ ++/* Trigger callback */ ++int loop_trigger(struct a4l_subdevice *subd, lsampl_t trignum) ++{ ++ lpprv_t *priv = (lpprv_t *)subd->dev->priv; ++ ++ a4l_info(subd->dev, "loop_trigger: (subd=%d)\n", subd->idx); ++ ++ priv->loop_running = 1; ++ ++ return 0; ++} ++ ++/* Cancel callback */ ++void loop_cancel(struct a4l_subdevice *subd) ++{ ++ lpprv_t *priv = (lpprv_t *)subd->dev->priv; ++ ++ a4l_info(subd->dev, "loop_cancel: (subd=%d)\n", subd->idx); ++ ++ priv->loop_running = 0; ++} ++ ++/* Read instruction callback */ ++int loop_insn_read(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn) ++{ ++ lpprv_t *priv = (lpprv_t*)subd->dev->priv; ++ uint16_t *data = (uint16_t *)insn->data; ++ ++ /* Checks the buffer size */ ++ if (insn->data_size != sizeof(uint16_t)) ++ return -EINVAL; ++ ++ /* Sets the memorized value */ ++ data[0] = priv->loop_insn_value; ++ ++ return 0; ++} ++ ++/* Write instruction callback */ ++int loop_insn_write(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn) ++{ ++ lpprv_t *priv = (lpprv_t*)subd->dev->priv; ++ uint16_t *data = (uint16_t *)insn->data; ++ ++ /* Checks the buffer size */ ++ if (insn->data_size != sizeof(uint16_t)) ++ return -EINVAL; ++ ++ /* Retrieves the value to memorize */ ++ priv->loop_insn_value = data[0]; ++ ++ return 0; ++} ++ ++void setup_input_subd(struct a4l_subdevice *subd) ++{ ++ memset(subd, 0, sizeof(struct a4l_subdevice)); ++ ++ subd->flags |= A4L_SUBD_AI; ++ subd->flags |= A4L_SUBD_CMD; ++ subd->flags |= A4L_SUBD_MMAP; ++ subd->rng_desc = &loop_rngdesc; ++ subd->chan_desc = &loop_chandesc; ++ subd->do_cmd = loop_cmd; ++ subd->cancel = loop_cancel; ++ subd->cmd_mask = &loop_cmd_mask; ++ subd->insn_read = loop_insn_read; ++ subd->insn_write = loop_insn_write; ++} ++ ++void setup_output_subd(struct a4l_subdevice *subd) ++{ ++ memset(subd, 0, sizeof(struct a4l_subdevice)); ++ ++ subd->flags = A4L_SUBD_AO; ++ subd->flags |= A4L_SUBD_CMD; ++ subd->flags |= A4L_SUBD_MMAP; ++ subd->rng_desc = &loop_rngdesc; ++ subd->chan_desc = &loop_chandesc; ++ subd->do_cmd = loop_cmd; ++ subd->cancel = loop_cancel; ++ subd->trigger = loop_trigger; ++ subd->cmd_mask = &loop_cmd_mask; ++ subd->insn_read = loop_insn_read; ++ subd->insn_write = loop_insn_write; ++} ++ ++/* Attach callback */ ++int loop_attach(struct a4l_device *dev, ++ a4l_lnkdesc_t *arg) ++{ ++ int ret = 0; ++ struct a4l_subdevice *subd; ++ lpprv_t *priv = (lpprv_t *)dev->priv; ++ ++ /* Add the fake input subdevice */ ++ subd = a4l_alloc_subd(0, setup_input_subd); ++ if (subd == NULL) ++ return -ENOMEM; ++ ++ ret = a4l_add_subd(dev, subd); ++ if (ret != LOOP_INPUT_SUBD) ++ /* Let Analogy free the lately allocated subdevice */ ++ return (ret < 0) ? ret : -EINVAL; ++ ++ /* Add the fake output subdevice */ ++ subd = a4l_alloc_subd(0, setup_output_subd); ++ if (subd == NULL) ++ /* Let Analogy free the lately allocated subdevice */ ++ return -ENOMEM; ++ ++ ret = a4l_add_subd(dev, subd); ++ if (ret != LOOP_OUTPUT_SUBD) ++ /* Let Analogy free the lately allocated subdevices */ ++ return (ret < 0) ? ret : -EINVAL; ++ ++ priv->loop_running = 0; ++ priv->loop_insn_value = 0; ++ ++ ret = rtmd_task_init(&priv->loop_task, ++ "a4l_loop task", ++ loop_task_proc, ++ dev, RTDM_TASK_HIGHEST_PRIORITY, 0); ++ ++ return ret; ++} ++ ++/* Detach callback */ ++int loop_detach(struct a4l_device *dev) ++{ ++ lpprv_t *priv = (lpprv_t *)dev->priv; ++ ++ rtdm_task_destroy(&priv->loop_task); ++ ++ return 0; ++} ++ ++/* --- Module part --- */ ++ ++static struct a4l_driver loop_drv = { ++ .owner = THIS_MODULE, ++ .board_name = "analogy_loop", ++ .attach = loop_attach, ++ .detach = loop_detach, ++ .privdata_size = sizeof(lpprv_t), ++}; ++ ++static int __init a4l_loop_init(void) ++{ ++ return a4l_register_drv(&loop_drv); ++} ++ ++static void __exit a4l_loop_cleanup(void) ++{ ++ a4l_unregister_drv(&loop_drv); ++} ++ ++MODULE_DESCRIPTION("Analogy loop driver"); ++MODULE_LICENSE("GPL"); ++ ++module_init(a4l_loop_init); ++module_exit(a4l_loop_cleanup); +--- linux/drivers/xenomai/analogy/testing/Kconfig 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/analogy/testing/Kconfig 2021-04-07 16:01:27.942633145 +0800 +@@ -0,0 +1,13 @@ ++ ++config XENO_DRIVERS_ANALOGY_FAKE ++ depends on XENO_DRIVERS_ANALOGY ++ tristate "Fake driver" ++ default n ++ help ++ ++ The fake driver displays many subdevices: ++ - 0: analog input; ++ - 1: digital input / output; ++ - 2: analog output; ++ - 3: analog input; data written into the subdevice 2 can be ++ read here. +--- linux/drivers/xenomai/analogy/testing/fake.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/analogy/testing/fake.c 2021-04-07 16:01:27.938633150 +0800 +@@ -0,0 +1,686 @@ ++#include ++#include ++ ++#define TASK_PERIOD 1000000 ++ ++#define AI_SUBD 0 ++#define DIO_SUBD 1 ++#define AO_SUBD 2 ++#define AI2_SUBD 3 ++ ++#define TRANSFER_SIZE 0x1000 ++ ++/* --- Driver related structures --- */ ++struct fake_priv { ++ /* Attach configuration parameters ++ (they should be relocated in ai_priv) */ ++ unsigned long amplitude_div; ++ unsigned long quanta_cnt; ++ ++ /* Task descriptor */ ++ rtdm_task_t task; ++ ++ /* Statuses of the asynchronous subdevices */ ++ int ai_running; ++ int ao_running; ++ int ai2_running; ++}; ++ ++struct ai_priv { ++ ++ /* Specific timing fields */ ++ unsigned long scan_period_ns; ++ unsigned long convert_period_ns; ++ unsigned long current_ns; ++ unsigned long reminder_ns; ++ unsigned long long last_ns; ++ ++ /* Misc fields */ ++ unsigned long amplitude_div; ++ unsigned long quanta_cnt; ++}; ++ ++struct ao_ai2_priv { ++ /* Asynchronous loop stuff */ ++ uint8_t buffer[TRANSFER_SIZE]; ++ int count; ++ /* Synchronous loop stuff */ ++ uint16_t insn_value; ++}; ++ ++struct dio_priv { ++ /* Bits status */ ++ uint16_t bits_values; ++}; ++ ++/* --- Channels / ranges part --- */ ++ ++/* Channels descriptors */ ++ ++static struct a4l_channels_desc analog_chandesc = { ++ .mode = A4L_CHAN_GLOBAL_CHANDESC, ++ .length = 8, ++ .chans = { ++ {A4L_CHAN_AREF_GROUND, 16}, ++ }, ++}; ++ ++static struct a4l_channels_desc dio_chandesc = { ++ .mode = A4L_CHAN_GLOBAL_CHANDESC, ++ .length = 16, ++ .chans = { ++ {A4L_CHAN_AREF_GROUND, 1}, ++ }, ++}; ++ ++/* Ranges tab */ ++static struct a4l_rngtab analog_rngtab = { ++ .length = 2, ++ .rngs = { ++ RANGE_V(-5,5), ++ RANGE_V(-10,10), ++ }, ++}; ++/* Ranges descriptor */ ++static struct a4l_rngdesc analog_rngdesc = RNG_GLOBAL(analog_rngtab); ++ ++/* Command options masks */ ++ ++static struct a4l_cmd_desc ai_cmd_mask = { ++ .idx_subd = 0, ++ .start_src = TRIG_NOW, ++ .scan_begin_src = TRIG_TIMER, ++ .convert_src = TRIG_NOW | TRIG_TIMER, ++ .scan_end_src = TRIG_COUNT, ++ .stop_src = TRIG_COUNT | TRIG_NONE, ++}; ++ ++static struct a4l_cmd_desc ao_cmd_mask = { ++ .idx_subd = 0, ++ .start_src = TRIG_NOW | TRIG_INT, ++ .scan_begin_src = TRIG_TIMER, ++ .convert_src = TRIG_NOW | TRIG_TIMER, ++ .scan_end_src = TRIG_COUNT, ++ .stop_src = TRIG_COUNT | TRIG_NONE, ++}; ++ ++/* --- Analog input simulation --- */ ++ ++/* --- Values generation for 1st AI --- */ ++ ++static inline uint16_t ai_value_output(struct ai_priv *priv) ++{ ++ static uint16_t output_tab[8] = { ++ 0x0001, 0x2000, 0x4000, 0x6000, ++ 0x8000, 0xa000, 0xc000, 0xffff ++ }; ++ static unsigned int output_idx; ++ static DEFINE_RTDM_LOCK(output_lock); ++ ++ unsigned long flags; ++ unsigned int idx; ++ ++ rtdm_lock_get_irqsave(&output_lock, flags); ++ ++ output_idx += priv->quanta_cnt; ++ if(output_idx == 8) ++ output_idx = 0; ++ idx = output_idx; ++ ++ rtdm_lock_put_irqrestore(&output_lock, flags); ++ ++ return output_tab[idx] / priv->amplitude_div; ++} ++ ++int ai_push_values(struct a4l_subdevice *subd) ++{ ++ uint64_t now_ns, elapsed_ns = 0; ++ struct a4l_cmd_desc *cmd; ++ struct ai_priv *priv; ++ int i = 0; ++ ++ if (!subd) ++ return -EINVAL; ++ ++ priv = (struct ai_priv *)subd->priv; ++ ++ cmd = a4l_get_cmd(subd); ++ if (!cmd) ++ return -EPIPE; ++ ++ now_ns = a4l_get_time(); ++ elapsed_ns += now_ns - priv->last_ns + priv->reminder_ns; ++ priv->last_ns = now_ns; ++ ++ while(elapsed_ns >= priv->scan_period_ns) { ++ int j; ++ ++ for(j = 0; j < cmd->nb_chan; j++) { ++ uint16_t value = ai_value_output(priv); ++ a4l_buf_put(subd, &value, sizeof(uint16_t)); ++ } ++ ++ elapsed_ns -= priv->scan_period_ns; ++ i++; ++ } ++ ++ priv->current_ns += i * priv->scan_period_ns; ++ priv->reminder_ns = elapsed_ns; ++ ++ if (i != 0) ++ a4l_buf_evt(subd, 0); ++ ++ return 0; ++} ++ ++/* --- Data retrieval for AO --- */ ++ ++int ao_pull_values(struct a4l_subdevice *subd) ++{ ++ struct ao_ai2_priv *priv = (struct ao_ai2_priv *)subd->priv; ++ int err; ++ ++ /* Let's have a look at how many samples are available */ ++ priv->count = a4l_buf_count(subd) < TRANSFER_SIZE ? ++ a4l_buf_count(subd) : TRANSFER_SIZE; ++ ++ if (!priv->count) ++ return 0; ++ ++ err = a4l_buf_get(subd, priv->buffer, priv->count); ++ if (err < 0) { ++ a4l_err(subd->dev, "ao_get_values: a4l_buf_get failed (err=%d)\n", err); ++ priv->count = 0; ++ return err; ++ ++ } ++ ++ a4l_info(subd->dev, " %d bytes added to private buffer from async p=%p\n", ++ priv->count, subd->buf->buf); ++ ++ a4l_buf_evt(subd, 0); ++ ++ return 0; ++} ++ ++/* --- Data redirection for 2nd AI (from AO) --- */ ++ ++int ai2_push_values(struct a4l_subdevice *subd) ++{ ++ struct ao_ai2_priv *priv = *((struct ao_ai2_priv **)subd->priv); ++ int err = 0; ++ ++ if (priv->count) { ++ err = a4l_buf_put(subd, priv->buffer, priv->count); ++ ++ /* If there is no more place in the asynchronous ++ buffer, data are likely to be dropped; it is just a ++ test driver so no need to implement trickier mechanism */ ++ err = (err == -EAGAIN) ? 0 : err; ++ ++ a4l_info(subd->dev, "%d bytes added to async buffer p=%p\n", ++ priv->count, subd->buf->buf); ++ ++ priv->count = 0; ++ if (err < 0) ++ a4l_err(subd->dev, ++ "ai2_push_values: " ++ "a4l_buf_put failed (err=%d)\n", err); ++ else ++ a4l_buf_evt(subd, 0); ++ } ++ ++ return err; ++} ++ ++/* --- Asynchronous AI functions --- */ ++ ++static int ai_cmd(struct a4l_subdevice *subd, struct a4l_cmd_desc *cmd) ++{ ++ struct fake_priv *priv = (struct fake_priv *)subd->dev->priv; ++ struct ai_priv *ai_priv = (struct ai_priv *)subd->priv; ++ ++ ai_priv->scan_period_ns = cmd->scan_begin_arg; ++ ai_priv->convert_period_ns = (cmd->convert_src==TRIG_TIMER)? ++ cmd->convert_arg:0; ++ ++ a4l_dbg(1, drv_dbg, subd->dev, "scan_period=%luns convert_period=%luns\n", ++ ai_priv->scan_period_ns, ai_priv->convert_period_ns); ++ ++ ai_priv->last_ns = a4l_get_time(); ++ ++ ai_priv->current_ns = ((unsigned long)ai_priv->last_ns); ++ ai_priv->reminder_ns = 0; ++ ++ priv->ai_running = 1; ++ ++ return 0; ++ ++} ++ ++static int ai_cmdtest(struct a4l_subdevice *subd, struct a4l_cmd_desc *cmd) ++{ ++ if(cmd->scan_begin_src == TRIG_TIMER) ++ { ++ if (cmd->scan_begin_arg < 1000) ++ return -EINVAL; ++ ++ if (cmd->convert_src == TRIG_TIMER && ++ cmd->scan_begin_arg < (cmd->convert_arg * cmd->nb_chan)) ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ ++static void ai_cancel(struct a4l_subdevice *subd) ++{ ++ struct fake_priv *priv = (struct fake_priv *)subd->dev->priv; ++ ++ priv->ai_running = 0; ++} ++ ++static void ai_munge(struct a4l_subdevice *subd, void *buf, unsigned long size) ++{ ++ int i; ++ ++ for(i = 0; i < size / sizeof(uint16_t); i++) ++ ((uint16_t *)buf)[i] += 1; ++} ++ ++/* --- Asynchronous A0 functions --- */ ++ ++int ao_cmd(struct a4l_subdevice *subd, struct a4l_cmd_desc *cmd) ++{ ++ a4l_info(subd->dev, "(subd=%d)\n", subd->idx); ++ return 0; ++} ++ ++int ao_trigger(struct a4l_subdevice *subd, lsampl_t trignum) ++{ ++ struct fake_priv *priv = (struct fake_priv *)subd->dev->priv; ++ ++ a4l_info(subd->dev, "(subd=%d)\n", subd->idx); ++ priv->ao_running = 1; ++ return 0; ++} ++ ++void ao_cancel(struct a4l_subdevice *subd) ++{ ++ struct fake_priv *priv = (struct fake_priv *)subd->dev->priv; ++ struct ao_ai2_priv *ao_priv = (struct ao_ai2_priv *)subd->priv; ++ int running; ++ ++ a4l_info(subd->dev, "(subd=%d)\n", subd->idx); ++ priv->ao_running = 0; ++ ++ running = priv->ai2_running; ++ if (running) { ++ struct a4l_subdevice *ai2_subd = ++ (struct a4l_subdevice *)a4l_get_subd(subd->dev, AI2_SUBD); ++ /* Here, we have not saved the required amount of ++ data; so, we cannot know whether or not, it is the ++ end of the acquisition; that is why we force it */ ++ priv->ai2_running = 0; ++ ao_priv->count = 0; ++ ++ a4l_info(subd->dev, "subd %d cancelling subd %d too \n", ++ subd->idx, AI2_SUBD); ++ ++ a4l_buf_evt(ai2_subd, A4L_BUF_EOA); ++ } ++} ++ ++/* --- Asynchronous 2nd AI functions --- */ ++ ++int ai2_cmd(struct a4l_subdevice *subd, struct a4l_cmd_desc *cmd) ++{ ++ struct fake_priv *priv = (struct fake_priv *)subd->dev->priv; ++ ++ a4l_info(subd->dev, "(subd=%d)\n", subd->idx); ++ priv->ai2_running = 1; ++ return 0; ++} ++ ++void ai2_cancel(struct a4l_subdevice *subd) ++{ ++ struct fake_priv *priv = (struct fake_priv *)subd->dev->priv; ++ struct ao_ai2_priv *ai2_priv = *((struct ao_ai2_priv **)subd->priv); ++ ++ int running; ++ ++ a4l_info(subd->dev, "(subd=%d)\n", subd->idx); ++ priv->ai2_running = 0; ++ ++ running = priv->ao_running; ++ if (running) { ++ struct a4l_subdevice *ao_subd = ++ (struct a4l_subdevice *)a4l_get_subd(subd->dev, AO_SUBD); ++ /* Here, we have not saved the required amount of ++ data; so, we cannot know whether or not, it is the ++ end of the acquisition; that is why we force it */ ++ priv->ao_running = 0; ++ ai2_priv->count = 0; ++ ++ a4l_info(subd->dev, "subd %d cancelling subd %d too \n", ++ subd->idx, AO_SUBD); ++ ++ a4l_buf_evt(ao_subd, A4L_BUF_EOA); ++ } ++ ++} ++ ++ ++/* --- Synchronous AI functions --- */ ++ ++static int ai_insn_read(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn) ++{ ++ struct ai_priv *priv = (struct ai_priv *)subd->priv; ++ uint16_t *data = (uint16_t *)insn->data; ++ int i; ++ ++ for(i = 0; i < insn->data_size / sizeof(uint16_t); i++) ++ data[i] = ai_value_output(priv); ++ ++ return 0; ++} ++ ++/* --- Synchronous DIO function --- */ ++ ++static int dio_insn_bits(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn) ++{ ++ struct dio_priv *priv = (struct dio_priv *)subd->priv; ++ uint16_t *data = (uint16_t *)insn->data; ++ ++ if (insn->data_size != 2 * sizeof(uint16_t)) ++ return -EINVAL; ++ ++ if (data[0] != 0) { ++ priv->bits_values &= ~(data[0]); ++ priv->bits_values |= (data[0] & data[1]); ++ } ++ ++ data[1] = priv->bits_values; ++ ++ return 0; ++} ++ ++/* --- Synchronous AO + AI2 functions --- */ ++ ++int ao_insn_write(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn) ++{ ++ struct ao_ai2_priv *priv = (struct ao_ai2_priv *)subd->priv; ++ uint16_t *data = (uint16_t *)insn->data; ++ ++ /* Checks the buffer size */ ++ if (insn->data_size != sizeof(uint16_t)) ++ return -EINVAL; ++ ++ /* Retrieves the value to memorize */ ++ priv->insn_value = data[0]; ++ ++ return 0; ++} ++ ++int ai2_insn_read(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn) ++{ ++ struct ao_ai2_priv *priv = *((struct ao_ai2_priv **)subd->priv); ++ uint16_t *data = (uint16_t *)insn->data; ++ ++ /* Checks the buffer size */ ++ if (insn->data_size != sizeof(uint16_t)) ++ return -EINVAL; ++ ++ /* Sets the memorized value */ ++ data[0] = priv->insn_value; ++ ++ return 0; ++} ++ ++/* --- Global task part --- */ ++ ++/* One task is enough for all the asynchronous subdevices, it is just a fake ++ * driver after all ++ */ ++static void task_proc(void *arg) ++{ ++ struct a4l_subdevice *ai_subd, *ao_subd, *ai2_subd; ++ struct a4l_device *dev; ++ struct fake_priv *priv; ++ int running; ++ ++ dev = arg; ++ ai_subd = a4l_get_subd(dev, AI_SUBD); ++ ao_subd = a4l_get_subd(dev, AO_SUBD); ++ ai2_subd = a4l_get_subd(dev, AI2_SUBD); ++ ++ priv = dev->priv; ++ ++ while(!rtdm_task_should_stop()) { ++ ++ /* copy sample static data from the subd private buffer to the ++ * asynchronous buffer ++ */ ++ running = priv->ai_running; ++ if (running && ai_push_values(ai_subd) < 0) { ++ /* on error, wait for detach to destroy the task */ ++ rtdm_task_sleep(RTDM_TIMEOUT_INFINITE); ++ continue; ++ } ++ ++ /* ++ * pull the data from the output subdevice (asynchronous buffer) ++ * into its private buffer ++ */ ++ running = priv->ao_running; ++ if (running && ao_pull_values(ao_subd) < 0) { ++ rtdm_task_sleep(RTDM_TIMEOUT_INFINITE); ++ continue; ++ } ++ ++ running = priv->ai2_running; ++ /* ++ * then loop it to the ai2 subd since their private data is shared: so ++ * pull the data from the private buffer back into the device's ++ * asynchronous buffer ++ */ ++ if (running && ai2_push_values(ai2_subd) < 0) { ++ rtdm_task_sleep(RTDM_TIMEOUT_INFINITE); ++ continue; ++ } ++ ++ rtdm_task_sleep(TASK_PERIOD); ++ } ++} ++ ++/* --- Initialization functions --- */ ++ ++void setup_ai_subd(struct a4l_subdevice *subd) ++{ ++ /* Fill the subdevice structure */ ++ subd->flags |= A4L_SUBD_AI; ++ subd->flags |= A4L_SUBD_CMD; ++ subd->flags |= A4L_SUBD_MMAP; ++ subd->rng_desc = &analog_rngdesc; ++ subd->chan_desc = &analog_chandesc; ++ subd->do_cmd = ai_cmd; ++ subd->do_cmdtest = ai_cmdtest; ++ subd->cancel = ai_cancel; ++ subd->munge = ai_munge; ++ subd->cmd_mask = &ai_cmd_mask; ++ subd->insn_read = ai_insn_read; ++} ++ ++void setup_dio_subd(struct a4l_subdevice *subd) ++{ ++ /* Fill the subdevice structure */ ++ subd->flags |= A4L_SUBD_DIO; ++ subd->chan_desc = &dio_chandesc; ++ subd->rng_desc = &range_digital; ++ subd->insn_bits = dio_insn_bits; ++} ++ ++void setup_ao_subd(struct a4l_subdevice *subd) ++{ ++ /* Fill the subdevice structure */ ++ subd->flags |= A4L_SUBD_AO; ++ subd->flags |= A4L_SUBD_CMD; ++ subd->flags |= A4L_SUBD_MMAP; ++ subd->rng_desc = &analog_rngdesc; ++ subd->chan_desc = &analog_chandesc; ++ subd->do_cmd = ao_cmd; ++ subd->cancel = ao_cancel; ++ subd->trigger = ao_trigger; ++ subd->cmd_mask = &ao_cmd_mask; ++ subd->insn_write = ao_insn_write; ++} ++ ++void setup_ai2_subd(struct a4l_subdevice *subd) ++{ ++ /* Fill the subdevice structure */ ++ subd->flags |= A4L_SUBD_AI; ++ subd->flags |= A4L_SUBD_CMD; ++ subd->flags |= A4L_SUBD_MMAP; ++ subd->rng_desc = &analog_rngdesc; ++ subd->chan_desc = &analog_chandesc; ++ subd->do_cmd = ai2_cmd; ++ subd->cancel = ai2_cancel; ++ subd->cmd_mask = &ai_cmd_mask; ++ subd->insn_read = ai2_insn_read; ++} ++ ++/* --- Attach / detach functions --- */ ++ ++int test_attach(struct a4l_device *dev, a4l_lnkdesc_t *arg) ++{ ++ typedef void (*setup_subd_function) (struct a4l_subdevice *subd); ++ struct fake_priv *priv = (struct fake_priv *) dev->priv; ++ struct a4l_subdevice *subd; ++ unsigned long tmp; ++ struct ai_priv *r; ++ int i, ret = 0; ++ ++ struct initializers { ++ struct a4l_subdevice *subd; ++ setup_subd_function init; ++ int private_len; ++ char *name; ++ int index; ++ } sds[] = { ++ [AI_SUBD] = { ++ .name = "AI", ++ .private_len = sizeof(struct ai_priv), ++ .init = setup_ai_subd, ++ .index = AI_SUBD, ++ .subd = NULL, ++ }, ++ [DIO_SUBD] = { ++ .name = "DIO", ++ .private_len = sizeof(struct dio_priv), ++ .init = setup_dio_subd, ++ .index = DIO_SUBD, ++ .subd = NULL, ++ }, ++ [AO_SUBD] = { ++ .name = "AO", ++ .private_len = sizeof(struct ao_ai2_priv), ++ .init = setup_ao_subd, ++ .index = AO_SUBD, ++ .subd = NULL, ++ }, ++ [AI2_SUBD] = { ++ .name = "AI2", ++ .private_len = sizeof(struct ao_ai2_priv *), ++ .init = setup_ai2_subd, ++ .index = AI2_SUBD, ++ .subd = NULL, ++ }, ++ }; ++ ++ a4l_dbg(1, drv_dbg, dev, "starting attach procedure...\n"); ++ ++ /* Set default values for attach parameters */ ++ priv->amplitude_div = 1; ++ priv->quanta_cnt = 1; ++ if (arg->opts_size) { ++ unsigned long *args = (unsigned long *)arg->opts; ++ priv->amplitude_div = args[0]; ++ if (arg->opts_size == 2 * sizeof(unsigned long)) ++ priv->quanta_cnt = (args[1] > 7 || args[1] == 0) ? ++ 1 : args[1]; ++ } ++ ++ /* create and register the subdevices */ ++ for (i = 0; i < ARRAY_SIZE(sds) ; i++) { ++ ++ subd = a4l_alloc_subd(sds[i].private_len, sds[i].init); ++ if (subd == NULL) ++ return -ENOMEM; ++ ++ ret = a4l_add_subd(dev, subd); ++ if (ret != sds[i].index) ++ return (ret < 0) ? ret : -EINVAL; ++ ++ sds[i].subd = subd; ++ ++ a4l_dbg(1, drv_dbg, dev, " %s subdev registered \n", sds[i].name); ++ } ++ ++ /* initialize specifics */ ++ r = (void *) sds[AI_SUBD].subd->priv; ++ r->amplitude_div = priv->amplitude_div; ++ r->quanta_cnt = priv->quanta_cnt; ++ ++ /* A0 and AI2 shared their private buffers */ ++ tmp = (unsigned long) sds[AO_SUBD].subd->priv; ++ memcpy(sds[AI2_SUBD].subd->priv, &tmp, sds[AI2_SUBD].private_len) ; ++ ++ /* create the task */ ++ ret = rtdm_task_init(&priv->task, "Fake AI task", task_proc, dev, ++ RTDM_TASK_HIGHEST_PRIORITY, 0); ++ if (ret) ++ a4l_dbg(1, drv_dbg, dev, "Error creating A4L task \n"); ++ ++ a4l_dbg(1, drv_dbg, dev, "attach procedure completed: " ++ "adiv = %lu, qcount = %lu \n" ++ , priv->amplitude_div, priv->quanta_cnt); ++ ++ return ret; ++} ++ ++int test_detach(struct a4l_device *dev) ++{ ++ struct fake_priv *priv = (struct fake_priv *)dev->priv; ++ ++ rtdm_task_destroy(&priv->task); ++ a4l_dbg(1, drv_dbg, dev, "detach procedure complete\n"); ++ ++ return 0; ++} ++ ++/* --- Module stuff --- */ ++ ++static struct a4l_driver test_drv = { ++ .owner = THIS_MODULE, ++ .board_name = "analogy_fake", ++ .driver_name = "fake", ++ .attach = test_attach, ++ .detach = test_detach, ++ .privdata_size = sizeof(struct fake_priv), ++}; ++ ++static int __init a4l_fake_init(void) ++{ ++ return a4l_register_drv(&test_drv); ++} ++ ++static void __exit a4l_fake_cleanup(void) ++{ ++ a4l_unregister_drv(&test_drv); ++} ++ ++MODULE_DESCRIPTION("Analogy fake driver"); ++MODULE_LICENSE("GPL"); ++ ++module_init(a4l_fake_init); ++module_exit(a4l_fake_cleanup); +--- linux/drivers/xenomai/analogy/proc.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/analogy/proc.h 2021-04-07 16:01:27.933633157 +0800 +@@ -0,0 +1,33 @@ ++/* ++ * Analogy for Linux, procfs related features ++ * ++ * Copyright (C) 1997-2000 David A. Schleef ++ * Copyright (C) 2008 Alexis Berlemont ++ * ++ * Xenomai is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#ifndef __ANALOGY_PROC_H__ ++#define __ANALOGY_PROC_H__ ++ ++#ifdef __KERNEL__ ++ ++#ifdef CONFIG_PROC_FS ++extern struct proc_dir_entry *a4l_proc_root; ++#endif /* CONFIG_PROC_FS */ ++ ++#endif /* __KERNEL__ */ ++ ++#endif /* __ANALOGY_PROC_H__ */ +--- linux/drivers/xenomai/analogy/transfer.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/analogy/transfer.c 2021-04-07 16:01:27.928633164 +0800 +@@ -0,0 +1,259 @@ ++/* ++ * Analogy for Linux, transfer related features ++ * ++ * Copyright (C) 1997-2000 David A. Schleef ++ * Copyright (C) 2008 Alexis Berlemont ++ * ++ * Xenomai is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#include ++#include ++#include ++#include ++ ++#include "proc.h" ++ ++/* --- Initialization / cleanup / cancel functions --- */ ++ ++int a4l_precleanup_transfer(struct a4l_device_context * cxt) ++{ ++ struct a4l_device *dev; ++ struct a4l_transfer *tsf; ++ int i, err = 0; ++ ++ dev = a4l_get_dev(cxt); ++ tsf = &dev->transfer; ++ ++ if (tsf == NULL) { ++ __a4l_err("a4l_precleanup_transfer: " ++ "incoherent status, transfer block not reachable\n"); ++ return -ENODEV; ++ } ++ ++ for (i = 0; i < tsf->nb_subd; i++) { ++ unsigned long *status = &tsf->subds[i]->status; ++ ++ __a4l_dbg(1, core_dbg, "subd[%d]->status=0x%08lx\n", i, *status); ++ ++ if (test_and_set_bit(A4L_SUBD_BUSY, status)) { ++ __a4l_err("a4l_precleanup_transfer: " ++ "device busy, acquisition occuring\n"); ++ err = -EBUSY; ++ goto out_error; ++ } else ++ set_bit(A4L_SUBD_CLEAN, status); ++ } ++ ++ return 0; ++ ++out_error: ++ for (i = 0; i < tsf->nb_subd; i++) { ++ unsigned long *status = &tsf->subds[i]->status; ++ ++ if (test_bit(A4L_SUBD_CLEAN, status)){ ++ clear_bit(A4L_SUBD_BUSY, status); ++ clear_bit(A4L_SUBD_CLEAN, status); ++ } ++ } ++ ++ return err; ++} ++ ++int a4l_cleanup_transfer(struct a4l_device_context * cxt) ++{ ++ struct a4l_device *dev; ++ struct a4l_transfer *tsf; ++ ++ dev = a4l_get_dev(cxt); ++ tsf = &dev->transfer; ++ ++ /* Releases the pointers tab, if need be */ ++ if (tsf->subds != NULL) { ++ rtdm_free(tsf->subds); ++ } ++ ++ memset(tsf, 0, sizeof(struct a4l_transfer)); ++ ++ return 0; ++} ++ ++void a4l_presetup_transfer(struct a4l_device_context *cxt) ++{ ++ struct a4l_device *dev = NULL; ++ struct a4l_transfer *tsf; ++ ++ dev = a4l_get_dev(cxt); ++ tsf = &dev->transfer; ++ ++ /* Clear the structure */ ++ memset(tsf, 0, sizeof(struct a4l_transfer)); ++ ++ tsf->default_bufsize = A4L_BUF_DEFSIZE; ++ ++ /* 0 is also considered as a valid IRQ, then ++ the IRQ number must be initialized with another value */ ++ tsf->irq_desc.irq = A4L_IRQ_UNUSED; ++} ++ ++int a4l_setup_transfer(struct a4l_device_context * cxt) ++{ ++ struct a4l_device *dev = NULL; ++ struct a4l_transfer *tsf; ++ struct list_head *this; ++ int i = 0, ret = 0; ++ ++ dev = a4l_get_dev(cxt); ++ tsf = &dev->transfer; ++ ++ /* Recovers the subdevices count ++ (as they are registered in a linked list */ ++ list_for_each(this, &dev->subdvsq) { ++ tsf->nb_subd++; ++ } ++ ++ __a4l_dbg(1, core_dbg, "nb_subd=%d\n", tsf->nb_subd); ++ ++ /* Allocates a suitable tab for the subdevices */ ++ tsf->subds = rtdm_malloc(tsf->nb_subd * sizeof(struct a4l_subdevice *)); ++ if (tsf->subds == NULL) { ++ __a4l_err("a4l_setup_transfer: call1(alloc) failed \n"); ++ ret = -ENOMEM; ++ goto out_setup_tsf; ++ } ++ ++ /* Recovers the subdevices pointers */ ++ list_for_each(this, &dev->subdvsq) { ++ tsf->subds[i++] = list_entry(this, struct a4l_subdevice, list); ++ } ++ ++out_setup_tsf: ++ ++ if (ret != 0) ++ a4l_cleanup_transfer(cxt); ++ ++ return ret; ++} ++ ++/* --- IRQ handling section --- */ ++ ++int a4l_request_irq(struct a4l_device * dev, ++ unsigned int irq, ++ a4l_irq_hdlr_t handler, ++ unsigned long flags, void *cookie) ++{ ++ int ret; ++ ++ if (dev->transfer.irq_desc.irq != A4L_IRQ_UNUSED) ++ return -EBUSY; ++ ++ ret = __a4l_request_irq(&dev->transfer.irq_desc, irq, handler, flags, ++ cookie); ++ if (ret != 0) { ++ __a4l_err("a4l_request_irq: IRQ registration failed\n"); ++ dev->transfer.irq_desc.irq = A4L_IRQ_UNUSED; ++ } ++ ++ return ret; ++} ++ ++int a4l_free_irq(struct a4l_device * dev, unsigned int irq) ++{ ++ ++ int ret = 0; ++ ++ if (dev->transfer.irq_desc.irq != irq) ++ return -EINVAL; ++ ++ /* There is less need to use a spinlock ++ than for a4l_request_irq() */ ++ ret = __a4l_free_irq(&dev->transfer.irq_desc); ++ ++ if (ret == 0) ++ dev->transfer.irq_desc.irq = A4L_IRQ_UNUSED; ++ ++ return ret; ++} ++ ++unsigned int a4l_get_irq(struct a4l_device * dev) ++{ ++ return dev->transfer.irq_desc.irq; ++} ++ ++/* --- Proc section --- */ ++ ++#ifdef CONFIG_PROC_FS ++ ++int a4l_rdproc_transfer(struct seq_file *seq, void *v) ++{ ++ struct a4l_transfer *transfer = (struct a4l_transfer *) seq->private; ++ int i; ++ ++ if (v != SEQ_START_TOKEN) ++ return -EINVAL; ++ ++ seq_printf(seq, "-- Subdevices --\n\n"); ++ seq_printf(seq, "| idx | type\n"); ++ ++ /* Gives the subdevice type's name */ ++ for (i = 0; i < transfer->nb_subd; i++) { ++ char *type; ++ switch (transfer->subds[i]->flags & A4L_SUBD_TYPES) { ++ case A4L_SUBD_UNUSED: ++ type = "Unused subdevice"; ++ break; ++ case A4L_SUBD_AI: ++ type = "Analog input subdevice"; ++ break; ++ case A4L_SUBD_AO: ++ type = "Analog output subdevice"; ++ break; ++ case A4L_SUBD_DI: ++ type = "Digital input subdevice"; ++ break; ++ case A4L_SUBD_DO: ++ type = "Digital output subdevice"; ++ break; ++ case A4L_SUBD_DIO: ++ type = "Digital input/output subdevice"; ++ break; ++ case A4L_SUBD_COUNTER: ++ type = "Counter subdevice"; ++ break; ++ case A4L_SUBD_TIMER: ++ type = "Timer subdevice"; ++ break; ++ case A4L_SUBD_MEMORY: ++ type = "Memory subdevice"; ++ break; ++ case A4L_SUBD_CALIB: ++ type = "Calibration subdevice"; ++ break; ++ case A4L_SUBD_PROC: ++ type = "Processor subdevice"; ++ break; ++ case A4L_SUBD_SERIAL: ++ type = "Serial subdevice"; ++ break; ++ default: ++ type = "Unknown subdevice"; ++ } ++ ++ seq_printf(seq, "| %02d | %s\n", i, type); ++ } ++ ++ return 0; ++} ++ ++#endif /* CONFIG_PROC_FS */ +--- linux/drivers/xenomai/analogy/buffer.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/analogy/buffer.c 2021-04-07 16:01:27.923633172 +0800 +@@ -0,0 +1,1145 @@ ++/* ++ * Analogy for Linux, buffer related features ++ * ++ * Copyright (C) 1997-2000 David A. Schleef ++ * Copyright (C) 2008 Alexis Berlemont ++ * ++ * Xenomai is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* --- Initialization functions (init, alloc, free) --- */ ++ ++/* The buffer charactistic is very close to the Comedi one: it is ++ allocated with vmalloc() and all physical addresses of the pages which ++ compose the virtual buffer are hold in a table */ ++ ++void a4l_free_buffer(struct a4l_buffer * buf_desc) ++{ ++ __a4l_dbg(1, core_dbg, "buf=%p buf->buf=%p\n", buf_desc, buf_desc->buf); ++ ++ if (buf_desc->pg_list != NULL) { ++ rtdm_free(buf_desc->pg_list); ++ buf_desc->pg_list = NULL; ++ } ++ ++ if (buf_desc->buf != NULL) { ++ char *vaddr, *vabase = buf_desc->buf; ++ for (vaddr = vabase; vaddr < vabase + buf_desc->size; ++ vaddr += PAGE_SIZE) ++ ClearPageReserved(vmalloc_to_page(vaddr)); ++ vfree(buf_desc->buf); ++ buf_desc->buf = NULL; ++ } ++} ++ ++int a4l_alloc_buffer(struct a4l_buffer *buf_desc, int buf_size) ++{ ++ int ret = 0; ++ char *vaddr, *vabase; ++ ++ buf_desc->size = buf_size; ++ buf_desc->size = PAGE_ALIGN(buf_desc->size); ++ ++ buf_desc->buf = vmalloc_32(buf_desc->size); ++ if (buf_desc->buf == NULL) { ++ ret = -ENOMEM; ++ goto out_virt_contig_alloc; ++ } ++ ++ vabase = buf_desc->buf; ++ ++ for (vaddr = vabase; vaddr < vabase + buf_desc->size; ++ vaddr += PAGE_SIZE) ++ SetPageReserved(vmalloc_to_page(vaddr)); ++ ++ buf_desc->pg_list = rtdm_malloc(((buf_desc->size) >> PAGE_SHIFT) * ++ sizeof(unsigned long)); ++ if (buf_desc->pg_list == NULL) { ++ ret = -ENOMEM; ++ goto out_virt_contig_alloc; ++ } ++ ++ for (vaddr = vabase; vaddr < vabase + buf_desc->size; ++ vaddr += PAGE_SIZE) ++ buf_desc->pg_list[(vaddr - vabase) >> PAGE_SHIFT] = ++ (unsigned long) page_to_phys(vmalloc_to_page(vaddr)); ++ ++ __a4l_dbg(1, core_dbg, "buf=%p buf->buf=%p\n", buf_desc, buf_desc->buf); ++ ++out_virt_contig_alloc: ++ if (ret != 0) ++ a4l_free_buffer(buf_desc); ++ ++ return ret; ++} ++ ++static void a4l_reinit_buffer(struct a4l_buffer *buf_desc) ++{ ++ /* No command to process yet */ ++ buf_desc->cur_cmd = NULL; ++ ++ /* No more (or not yet) linked with a subdevice */ ++ buf_desc->subd = NULL; ++ ++ /* Initializes counts and flags */ ++ buf_desc->end_count = 0; ++ buf_desc->prd_count = 0; ++ buf_desc->cns_count = 0; ++ buf_desc->tmp_count = 0; ++ buf_desc->mng_count = 0; ++ ++ /* Flush pending events */ ++ buf_desc->flags = 0; ++ a4l_flush_sync(&buf_desc->sync); ++} ++ ++void a4l_init_buffer(struct a4l_buffer *buf_desc) ++{ ++ memset(buf_desc, 0, sizeof(struct a4l_buffer)); ++ a4l_init_sync(&buf_desc->sync); ++ a4l_reinit_buffer(buf_desc); ++} ++ ++void a4l_cleanup_buffer(struct a4l_buffer *buf_desc) ++{ ++ a4l_cleanup_sync(&buf_desc->sync); ++} ++ ++int a4l_setup_buffer(struct a4l_device_context *cxt, struct a4l_cmd_desc *cmd) ++{ ++ struct a4l_buffer *buf_desc = cxt->buffer; ++ int i; ++ ++ /* Retrieve the related subdevice */ ++ buf_desc->subd = a4l_get_subd(cxt->dev, cmd->idx_subd); ++ if (buf_desc->subd == NULL) { ++ __a4l_err("a4l_setup_buffer: subdevice index " ++ "out of range (%d)\n", cmd->idx_subd); ++ return -EINVAL; ++ } ++ ++ if (test_and_set_bit(A4L_SUBD_BUSY_NR, &buf_desc->subd->status)) { ++ __a4l_err("a4l_setup_buffer: subdevice %d already busy\n", ++ cmd->idx_subd); ++ return -EBUSY; ++ } ++ ++ /* Checks if the transfer system has to work in bulk mode */ ++ if (cmd->flags & A4L_CMD_BULK) ++ set_bit(A4L_BUF_BULK_NR, &buf_desc->flags); ++ ++ /* Sets the working command */ ++ buf_desc->cur_cmd = cmd; ++ ++ /* Link the subdevice with the context's buffer */ ++ buf_desc->subd->buf = buf_desc; ++ ++ /* Computes the count to reach, if need be */ ++ if (cmd->stop_src == TRIG_COUNT) { ++ for (i = 0; i < cmd->nb_chan; i++) { ++ struct a4l_channel *chft; ++ chft = a4l_get_chfeat(buf_desc->subd, ++ CR_CHAN(cmd->chan_descs[i])); ++ buf_desc->end_count += chft->nb_bits / 8; ++ } ++ buf_desc->end_count *= cmd->stop_arg; ++ } ++ ++ __a4l_dbg(1, core_dbg, "end_count=%lu\n", buf_desc->end_count); ++ ++ return 0; ++} ++ ++void a4l_cancel_buffer(struct a4l_device_context *cxt) ++{ ++ struct a4l_buffer *buf_desc = cxt->buffer; ++ struct a4l_subdevice *subd = buf_desc->subd; ++ ++ if (!subd || !test_bit(A4L_SUBD_BUSY_NR, &subd->status)) ++ return; ++ ++ /* If a "cancel" function is registered, call it ++ (Note: this function is called before having checked ++ if a command is under progress; we consider that ++ the "cancel" function can be used as as to (re)initialize ++ some component) */ ++ if (subd->cancel != NULL) ++ subd->cancel(subd); ++ ++ if (buf_desc->cur_cmd != NULL) { ++ a4l_free_cmddesc(buf_desc->cur_cmd); ++ rtdm_free(buf_desc->cur_cmd); ++ buf_desc->cur_cmd = NULL; ++ } ++ ++ a4l_reinit_buffer(buf_desc); ++ ++ clear_bit(A4L_SUBD_BUSY_NR, &subd->status); ++ subd->buf = NULL; ++} ++ ++/* --- Munge related function --- */ ++ ++int a4l_get_chan(struct a4l_subdevice *subd) ++{ ++ int i, j, tmp_count, tmp_size = 0; ++ struct a4l_cmd_desc *cmd; ++ ++ cmd = a4l_get_cmd(subd); ++ if (!cmd) ++ return -EINVAL; ++ ++ /* There is no need to check the channel idx, ++ it has already been controlled in command_test */ ++ ++ /* We assume channels can have different sizes; ++ so, we have to compute the global size of the channels ++ in this command... */ ++ for (i = 0; i < cmd->nb_chan; i++) { ++ j = (subd->chan_desc->mode != A4L_CHAN_GLOBAL_CHANDESC) ? ++ CR_CHAN(cmd->chan_descs[i]) : 0; ++ tmp_size += subd->chan_desc->chans[j].nb_bits; ++ } ++ ++ /* Translation bits -> bytes */ ++ tmp_size /= 8; ++ ++ tmp_count = subd->buf->mng_count % tmp_size; ++ ++ /* Translation bytes -> bits */ ++ tmp_count *= 8; ++ ++ /* ...and find the channel the last munged sample ++ was related with */ ++ for (i = 0; tmp_count > 0 && i < cmd->nb_chan; i++) { ++ j = (subd->chan_desc->mode != A4L_CHAN_GLOBAL_CHANDESC) ? ++ CR_CHAN(cmd->chan_descs[i]) : 0; ++ tmp_count -= subd->chan_desc->chans[j].nb_bits; ++ } ++ ++ if (tmp_count == 0) ++ return i; ++ else ++ return -EINVAL; ++} ++ ++/* --- Transfer / copy functions --- */ ++ ++/* The following functions are explained in the Doxygen section ++ "Buffer management services" in driver_facilities.c */ ++ ++int a4l_buf_prepare_absput(struct a4l_subdevice *subd, unsigned long count) ++{ ++ struct a4l_buffer *buf = subd->buf; ++ ++ if (!buf || !test_bit(A4L_SUBD_BUSY_NR, &subd->status)) ++ return -ENOENT; ++ ++ if (!a4l_subd_is_input(subd)) ++ return -EINVAL; ++ ++ return __pre_abs_put(buf, count); ++} ++ ++ ++int a4l_buf_commit_absput(struct a4l_subdevice *subd, unsigned long count) ++{ ++ struct a4l_buffer *buf = subd->buf; ++ ++ if (!buf || !test_bit(A4L_SUBD_BUSY_NR, &subd->status)) ++ return -ENOENT; ++ ++ if (!a4l_subd_is_input(subd)) ++ return -EINVAL; ++ ++ return __abs_put(buf, count); ++} ++ ++int a4l_buf_prepare_put(struct a4l_subdevice *subd, unsigned long count) ++{ ++ struct a4l_buffer *buf = subd->buf; ++ ++ if (!buf || !test_bit(A4L_SUBD_BUSY_NR, &subd->status)) ++ return -ENOENT; ++ ++ if (!a4l_subd_is_input(subd)) ++ return -EINVAL; ++ ++ return __pre_put(buf, count); ++} ++ ++int a4l_buf_commit_put(struct a4l_subdevice *subd, unsigned long count) ++{ ++ struct a4l_buffer *buf = subd->buf; ++ ++ if (!buf || !test_bit(A4L_SUBD_BUSY_NR, &subd->status)) ++ return -ENOENT; ++ ++ if (!a4l_subd_is_input(subd)) ++ return -EINVAL; ++ ++ return __put(buf, count); ++} ++ ++int a4l_buf_put(struct a4l_subdevice *subd, void *bufdata, unsigned long count) ++{ ++ struct a4l_buffer *buf = subd->buf; ++ int err; ++ ++ if (!buf || !test_bit(A4L_SUBD_BUSY_NR, &subd->status)) ++ return -ENOENT; ++ ++ if (!a4l_subd_is_input(subd)) ++ return -EINVAL; ++ ++ if (__count_to_put(buf) < count) ++ return -EAGAIN; ++ ++ err = __produce(NULL, buf, bufdata, count); ++ if (err < 0) ++ return err; ++ ++ err = __put(buf, count); ++ ++ return err; ++} ++ ++int a4l_buf_prepare_absget(struct a4l_subdevice *subd, unsigned long count) ++{ ++ struct a4l_buffer *buf = subd->buf; ++ ++ if (!buf || !test_bit(A4L_SUBD_BUSY_NR, &subd->status)) ++ return -ENOENT; ++ ++ if (!a4l_subd_is_output(subd)) ++ return -EINVAL; ++ ++ return __pre_abs_get(buf, count); ++} ++ ++int a4l_buf_commit_absget(struct a4l_subdevice *subd, unsigned long count) ++{ ++ struct a4l_buffer *buf = subd->buf; ++ ++ if (!buf || !test_bit(A4L_SUBD_BUSY_NR, &subd->status)) ++ return -ENOENT; ++ ++ if (!a4l_subd_is_output(subd)) ++ return -EINVAL; ++ ++ return __abs_get(buf, count); ++} ++ ++int a4l_buf_prepare_get(struct a4l_subdevice *subd, unsigned long count) ++{ ++ struct a4l_buffer *buf = subd->buf; ++ ++ if (!buf || !test_bit(A4L_SUBD_BUSY_NR, &subd->status)) ++ return -ENOENT; ++ ++ if (!a4l_subd_is_output(subd)) ++ return -EINVAL; ++ ++ return __pre_get(buf, count); ++} ++ ++int a4l_buf_commit_get(struct a4l_subdevice *subd, unsigned long count) ++{ ++ struct a4l_buffer *buf = subd->buf; ++ ++ /* Basic checkings */ ++ ++ if (!buf || !test_bit(A4L_SUBD_BUSY_NR, &subd->status)) ++ return -ENOENT; ++ ++ if (!a4l_subd_is_output(subd)) ++ return -EINVAL; ++ ++ return __get(buf, count); ++} ++ ++int a4l_buf_get(struct a4l_subdevice *subd, void *bufdata, unsigned long count) ++{ ++ struct a4l_buffer *buf = subd->buf; ++ int err; ++ ++ /* Basic checkings */ ++ ++ if (!buf || !test_bit(A4L_SUBD_BUSY_NR, &subd->status)) ++ return -ENOENT; ++ ++ if (!a4l_subd_is_output(subd)) ++ return -EINVAL; ++ ++ if (__count_to_get(buf) < count) ++ return -EAGAIN; ++ ++ /* Update the counter */ ++ err = __consume(NULL, buf, bufdata, count); ++ if (err < 0) ++ return err; ++ ++ /* Perform the transfer */ ++ err = __get(buf, count); ++ ++ return err; ++} ++ ++int a4l_buf_evt(struct a4l_subdevice *subd, unsigned long evts) ++{ ++ struct a4l_buffer *buf = subd->buf; ++ int tmp; ++ unsigned long wake = 0, count = ULONG_MAX; ++ ++ /* Warning: here, there may be a condition race : the cancel ++ function is called by the user side and a4l_buf_evt and all ++ the a4l_buf_... functions are called by the kernel ++ side. Nonetheless, the driver should be in charge of such ++ race conditions, not the framework */ ++ ++ /* Basic checking */ ++ if (!buf || !test_bit(A4L_SUBD_BUSY_NR, &subd->status)) ++ return -ENOENT; ++ ++ /* Here we save the data count available for the user side */ ++ if (evts == 0) { ++ count = a4l_subd_is_input(subd) ? ++ __count_to_get(buf) : __count_to_put(buf); ++ wake = __count_to_end(buf) < buf->wake_count ? ++ __count_to_end(buf) : buf->wake_count; ++ } else { ++ /* Even if it is a little more complex, atomic ++ operations are used so as to prevent any kind of ++ corner case */ ++ while ((tmp = ffs(evts) - 1) != -1) { ++ set_bit(tmp, &buf->flags); ++ clear_bit(tmp, &evts); ++ } ++ } ++ ++ if (count >= wake) ++ /* Notify the user-space side */ ++ a4l_signal_sync(&buf->sync); ++ ++ return 0; ++} ++ ++unsigned long a4l_buf_count(struct a4l_subdevice *subd) ++{ ++ struct a4l_buffer *buf = subd->buf; ++ unsigned long ret = 0; ++ ++ /* Basic checking */ ++ if (!buf || !test_bit(A4L_SUBD_BUSY_NR, &subd->status)) ++ return -ENOENT; ++ ++ if (a4l_subd_is_input(subd)) ++ ret = __count_to_put(buf); ++ else if (a4l_subd_is_output(subd)) ++ ret = __count_to_get(buf); ++ ++ return ret; ++} ++ ++/* --- Mmap functions --- */ ++ ++void a4l_map(struct vm_area_struct *area) ++{ ++ unsigned long *status = (unsigned long *)area->vm_private_data; ++ set_bit(A4L_BUF_MAP_NR, status); ++} ++ ++void a4l_unmap(struct vm_area_struct *area) ++{ ++ unsigned long *status = (unsigned long *)area->vm_private_data; ++ clear_bit(A4L_BUF_MAP_NR, status); ++} ++ ++static struct vm_operations_struct a4l_vm_ops = { ++ .open = a4l_map, ++ .close = a4l_unmap, ++}; ++ ++int a4l_ioctl_mmap(struct a4l_device_context *cxt, void *arg) ++{ ++ struct rtdm_fd *fd = rtdm_private_to_fd(cxt); ++ a4l_mmap_t map_cfg; ++ struct a4l_device *dev; ++ struct a4l_buffer *buf; ++ int ret; ++ ++ /* The mmap operation cannot be performed in a ++ real-time context */ ++ if (rtdm_in_rt_context()) { ++ return -ENOSYS; ++ } ++ ++ dev = a4l_get_dev(cxt); ++ buf = cxt->buffer; ++ ++ /* Basic checkings */ ++ ++ if (!test_bit(A4L_DEV_ATTACHED_NR, &dev->flags)) { ++ __a4l_err("a4l_ioctl_mmap: cannot mmap on " ++ "an unattached device\n"); ++ return -EINVAL; ++ } ++ ++ if (test_bit(A4L_BUF_MAP_NR, &buf->flags)) { ++ __a4l_err("a4l_ioctl_mmap: buffer already mapped\n"); ++ return -EBUSY; ++ } ++ ++ if (rtdm_safe_copy_from_user(fd, ++ &map_cfg, arg, sizeof(a4l_mmap_t)) != 0) ++ return -EFAULT; ++ ++ /* Check the size to be mapped */ ++ if ((map_cfg.size & ~(PAGE_MASK)) != 0 || map_cfg.size > buf->size) ++ return -EFAULT; ++ ++ /* All the magic is here */ ++ ret = rtdm_mmap_to_user(fd, ++ buf->buf, ++ map_cfg.size, ++ PROT_READ | PROT_WRITE, ++ &map_cfg.ptr, &a4l_vm_ops, &buf->flags); ++ ++ if (ret < 0) { ++ __a4l_err("a4l_ioctl_mmap: internal error, " ++ "rtdm_mmap_to_user failed (err=%d)\n", ret); ++ return ret; ++ } ++ ++ return rtdm_safe_copy_to_user(fd, ++ arg, &map_cfg, sizeof(a4l_mmap_t)); ++} ++ ++/* --- IOCTL / FOPS functions --- */ ++ ++int a4l_ioctl_cancel(struct a4l_device_context * cxt, void *arg) ++{ ++ unsigned int idx_subd = (unsigned long)arg; ++ struct a4l_device *dev = a4l_get_dev(cxt); ++ struct a4l_subdevice *subd; ++ ++ /* Basically check the device */ ++ if (!test_bit(A4L_DEV_ATTACHED_NR, &dev->flags)) { ++ __a4l_err("a4l_ioctl_cancel: operation not supported on " ++ "an unattached device\n"); ++ return -EINVAL; ++ } ++ ++ if (cxt->buffer->subd == NULL) { ++ __a4l_err("a4l_ioctl_cancel: " ++ "no acquisition to cancel on this context\n"); ++ return -EINVAL; ++ } ++ ++ if (idx_subd >= dev->transfer.nb_subd) { ++ __a4l_err("a4l_ioctl_cancel: bad subdevice index\n"); ++ return -EINVAL; ++ } ++ ++ subd = dev->transfer.subds[idx_subd]; ++ ++ if (subd != cxt->buffer->subd) { ++ __a4l_err("a4l_ioctl_cancel: " ++ "current context works on another subdevice " ++ "(%d!=%d)\n", cxt->buffer->subd->idx, subd->idx); ++ return -EINVAL; ++ } ++ ++ a4l_cancel_buffer(cxt); ++ return 0; ++} ++ ++/* The ioctl BUFCFG is only useful for changing the size of the ++ asynchronous buffer. ++ (BUFCFG = free of the current buffer + allocation of a new one) */ ++ ++int a4l_ioctl_bufcfg(struct a4l_device_context * cxt, void *arg) ++{ ++ struct rtdm_fd *fd = rtdm_private_to_fd(cxt); ++ struct a4l_device *dev = a4l_get_dev(cxt); ++ struct a4l_buffer *buf = cxt->buffer; ++ struct a4l_subdevice *subd = buf->subd; ++ a4l_bufcfg_t buf_cfg; ++ ++ /* As Linux API is used to allocate a virtual buffer, ++ the calling process must not be in primary mode */ ++ if (rtdm_in_rt_context()) { ++ return -ENOSYS; ++ } ++ ++ /* Basic checking */ ++ if (!test_bit(A4L_DEV_ATTACHED_NR, &dev->flags)) { ++ __a4l_err("a4l_ioctl_bufcfg: unattached device\n"); ++ return -EINVAL; ++ } ++ ++ if (rtdm_safe_copy_from_user(fd, ++ &buf_cfg, ++ arg, sizeof(a4l_bufcfg_t)) != 0) ++ return -EFAULT; ++ ++ if (buf_cfg.buf_size > A4L_BUF_MAXSIZE) { ++ __a4l_err("a4l_ioctl_bufcfg: buffer size too big (<=16MB)\n"); ++ return -EINVAL; ++ } ++ ++ if (buf_cfg.idx_subd == A4L_BUF_DEFMAGIC) { ++ cxt->dev->transfer.default_bufsize = buf_cfg.buf_size; ++ return 0; ++ } ++ ++ if (subd && test_bit(A4L_SUBD_BUSY_NR, &subd->status)) { ++ __a4l_err("a4l_ioctl_bufcfg: acquisition in progress\n"); ++ return -EBUSY; ++ } ++ ++ if (test_bit(A4L_BUF_MAP, &buf->flags)) { ++ __a4l_err("a4l_ioctl_bufcfg: please unmap before " ++ "configuring buffer\n"); ++ return -EPERM; ++ } ++ ++ /* Free the buffer... */ ++ a4l_free_buffer(buf); ++ ++ /* ...to reallocate it */ ++ return a4l_alloc_buffer(buf, buf_cfg.buf_size); ++} ++ ++/* The ioctl BUFCFG2 allows the user space process to define the ++ minimal amount of data which should trigger a wake-up. If the ABI ++ could be broken, this facility would be handled by the original ++ BUFCFG ioctl. At the next major release, this ioctl will vanish. */ ++ ++int a4l_ioctl_bufcfg2(struct a4l_device_context * cxt, void *arg) ++{ ++ struct rtdm_fd *fd = rtdm_private_to_fd(cxt); ++ struct a4l_device *dev = a4l_get_dev(cxt); ++ struct a4l_buffer *buf = cxt->buffer; ++ a4l_bufcfg2_t buf_cfg; ++ ++ /* Basic checking */ ++ if (!test_bit(A4L_DEV_ATTACHED_NR, &dev->flags)) { ++ __a4l_err("a4l_ioctl_bufcfg2: unattached device\n"); ++ return -EINVAL; ++ } ++ ++ if (rtdm_safe_copy_from_user(fd, ++ &buf_cfg, ++ arg, sizeof(a4l_bufcfg2_t)) != 0) ++ return -EFAULT; ++ ++ if (buf_cfg.wake_count > buf->size) { ++ __a4l_err("a4l_ioctl_bufcfg2: " ++ "wake-up threshold too big (> buffer size: %lu)\n", ++ buf->size); ++ return -EINVAL; ++ } ++ ++ buf->wake_count = buf_cfg.wake_count; ++ ++ return 0; ++} ++ ++/* The BUFINFO ioctl provides two basic roles: ++ - tell the user app the size of the asynchronous buffer ++ - display the read/write counters (how many bytes to read/write) */ ++ ++int a4l_ioctl_bufinfo(struct a4l_device_context * cxt, void *arg) ++{ ++ struct rtdm_fd *fd = rtdm_private_to_fd(cxt); ++ struct a4l_device *dev = a4l_get_dev(cxt); ++ struct a4l_buffer *buf = cxt->buffer; ++ struct a4l_subdevice *subd = buf->subd; ++ a4l_bufinfo_t info; ++ ++ unsigned long tmp_cnt; ++ int ret; ++ ++ if (!rtdm_in_rt_context() && rtdm_rt_capable(fd)) ++ return -ENOSYS; ++ ++ /* Basic checking */ ++ if (!test_bit(A4L_DEV_ATTACHED_NR, &dev->flags)) { ++ __a4l_err("a4l_ioctl_bufinfo: unattached device\n"); ++ return -EINVAL; ++ } ++ ++ if (rtdm_safe_copy_from_user(fd, ++ &info, arg, sizeof(a4l_bufinfo_t)) != 0) ++ return -EFAULT; ++ ++ ++ /* If a transfer is not occuring, simply return buffer ++ informations, otherwise make the transfer progress */ ++ if (!subd || !test_bit(A4L_SUBD_BUSY_NR, &subd->status)) { ++ info.rw_count = 0; ++ goto a4l_ioctl_bufinfo_out; ++ } ++ ++ ret = __handle_event(buf); ++ ++ if (a4l_subd_is_input(subd)) { ++ ++ /* Updates consume count if rw_count is not null */ ++ if (info.rw_count != 0) ++ buf->cns_count += info.rw_count; ++ ++ /* Retrieves the data amount to read */ ++ tmp_cnt = info.rw_count = __count_to_get(buf); ++ ++ __a4l_dbg(1, core_dbg, "count to read=%lu\n", tmp_cnt); ++ ++ if ((ret < 0 && ret != -ENOENT) || ++ (ret == -ENOENT && tmp_cnt == 0)) { ++ a4l_cancel_buffer(cxt); ++ return ret; ++ } ++ } else if (a4l_subd_is_output(subd)) { ++ ++ if (ret < 0) { ++ a4l_cancel_buffer(cxt); ++ if (info.rw_count != 0) ++ return ret; ++ } ++ ++ /* If rw_count is not null, ++ there is something to write / munge */ ++ if (info.rw_count != 0 && info.rw_count <= __count_to_put(buf)) { ++ ++ /* Updates the production pointer */ ++ buf->prd_count += info.rw_count; ++ ++ /* Sets the munge count */ ++ tmp_cnt = info.rw_count; ++ } else ++ tmp_cnt = 0; ++ ++ /* Retrieves the data amount which is writable */ ++ info.rw_count = __count_to_put(buf); ++ ++ __a4l_dbg(1, core_dbg, " count to write=%lu\n", info.rw_count); ++ ++ } else { ++ __a4l_err("a4l_ioctl_bufinfo: inappropriate subdevice\n"); ++ return -EINVAL; ++ } ++ ++ /* Performs the munge if need be */ ++ if (subd->munge != NULL) { ++ ++ /* Call the munge callback */ ++ __munge(subd, subd->munge, buf, tmp_cnt); ++ ++ /* Updates munge count */ ++ buf->mng_count += tmp_cnt; ++ } ++ ++a4l_ioctl_bufinfo_out: ++ ++ /* Sets the buffer size */ ++ info.buf_size = buf->size; ++ ++ /* Sends the structure back to user space */ ++ if (rtdm_safe_copy_to_user(fd, ++ arg, &info, sizeof(a4l_bufinfo_t)) != 0) ++ return -EFAULT; ++ ++ return 0; ++} ++ ++/* The ioctl BUFINFO2 tells the user application the minimal amount of ++data which should trigger a wake-up. If the ABI could be broken, this ++facility would be handled by the original BUFINFO ioctl. At the next ++major release, this ioctl will vanish. */ ++ ++int a4l_ioctl_bufinfo2(struct a4l_device_context * cxt, void *arg) ++{ ++ struct rtdm_fd *fd = rtdm_private_to_fd(cxt); ++ struct a4l_device *dev = a4l_get_dev(cxt); ++ struct a4l_buffer *buf = cxt->buffer; ++ a4l_bufcfg2_t buf_cfg; ++ ++ /* Basic checking */ ++ if (!test_bit(A4L_DEV_ATTACHED_NR, &dev->flags)) { ++ __a4l_err("a4l_ioctl_bufcfg2: unattached device\n"); ++ return -EINVAL; ++ } ++ ++ buf_cfg.wake_count = buf->wake_count; ++ ++ if (rtdm_safe_copy_to_user(fd, ++ arg, &buf_cfg, sizeof(a4l_bufcfg2_t)) != 0) ++ return -EFAULT; ++ ++ return 0; ++} ++ ++/* The function a4l_read_buffer can be considered as the kernel entry ++ point of the RTDM syscall read. This syscall is supposed to be used ++ only during asynchronous acquisitions */ ++ssize_t a4l_read_buffer(struct a4l_device_context * cxt, void *bufdata, size_t nbytes) ++{ ++ struct a4l_device *dev = a4l_get_dev(cxt); ++ struct a4l_buffer *buf = cxt->buffer; ++ struct a4l_subdevice *subd = buf->subd; ++ ssize_t count = 0; ++ ++ /* Basic checkings */ ++ ++ if (!test_bit(A4L_DEV_ATTACHED_NR, &dev->flags)) { ++ __a4l_err("a4l_read: unattached device\n"); ++ return -EINVAL; ++ } ++ ++ if (!subd || !test_bit(A4L_SUBD_BUSY_NR, &subd->status)) { ++ __a4l_err("a4l_read: idle subdevice on this context\n"); ++ return -ENOENT; ++ } ++ ++ if (!a4l_subd_is_input(subd)) { ++ __a4l_err("a4l_read: operation requires an input subdevice \n"); ++ return -EINVAL; ++ } ++ ++ while (count < nbytes) { ++ ++ unsigned long tmp_cnt; ++ ++ /* Check the events */ ++ int ret = __handle_event(buf); ++ ++ __dump_buffer_counters(buf); ++ ++ /* Compute the data amount to copy */ ++ tmp_cnt = __count_to_get(buf); ++ ++ /* Check tmp_cnt count is not higher than ++ the global count to read */ ++ if (tmp_cnt > nbytes - count) ++ tmp_cnt = nbytes - count; ++ ++ /* We check whether there is an error */ ++ if (ret < 0 && ret != -ENOENT) { ++ __a4l_err("a4l_read: failed to handle event %d \n", ret); ++ a4l_cancel_buffer(cxt); ++ count = ret; ++ goto out_a4l_read; ++ } ++ ++ /* We check whether the acquisition is over */ ++ if (ret == -ENOENT && tmp_cnt == 0) { ++ __a4l_info("a4l_read: acquisition done - all data " ++ "requested by the client was delivered \n"); ++ a4l_cancel_buffer(cxt); ++ count = 0; ++ goto out_a4l_read; ++ } ++ ++ if (tmp_cnt > 0) { ++ ++ /* Performs the munge if need be */ ++ if (subd->munge != NULL) { ++ __munge(subd, subd->munge, buf, tmp_cnt); ++ ++ /* Updates munge count */ ++ buf->mng_count += tmp_cnt; ++ } ++ ++ /* Performs the copy */ ++ ret = __consume(cxt, buf, bufdata + count, tmp_cnt); ++ ++ if (ret < 0) { ++ count = ret; ++ goto out_a4l_read; ++ } ++ ++ /* Updates consume count */ ++ buf->cns_count += tmp_cnt; ++ a4l_dbg(1, core_dbg, dev, "buf->cns_cnt=%ld \n", buf->cns_count); ++ ++ /* Updates the return value */ ++ count += tmp_cnt; ++ ++ /* If the driver does not work in bulk mode, ++ we must leave this function */ ++ if (!test_bit(A4L_BUF_BULK, &buf->flags)) ++ goto out_a4l_read; ++ } ++ else { ++ /* If the acquisition is not over, we must not ++ leave the function without having read a least byte */ ++ ret = a4l_wait_sync(&(buf->sync), rtdm_in_rt_context()); ++ if (ret < 0) { ++ if (ret == -ERESTARTSYS) ++ ret = -EINTR; ++ count = ret; ++ goto out_a4l_read; ++ } ++ } ++ } ++ ++out_a4l_read: ++ ++ return count; ++} ++ ++/* The function a4l_write_buffer can be considered as the kernel entry ++ point of the RTDM syscall write. This syscall is supposed to be ++ used only during asynchronous acquisitions */ ++ssize_t a4l_write_buffer(struct a4l_device_context *cxt, const void *bufdata, size_t nbytes) ++{ ++ struct a4l_device *dev = a4l_get_dev(cxt); ++ struct a4l_buffer *buf = cxt->buffer; ++ struct a4l_subdevice *subd = buf->subd; ++ ssize_t count = 0; ++ ++ /* Basic checkings */ ++ ++ if (!test_bit(A4L_DEV_ATTACHED_NR, &dev->flags)) { ++ __a4l_err("a4l_write: unattached device\n"); ++ return -EINVAL; ++ } ++ ++ if (!subd || !test_bit(A4L_SUBD_BUSY_NR, &subd->status)) { ++ __a4l_err("a4l_write: idle subdevice on this context\n"); ++ return -ENOENT; ++ } ++ ++ if (!a4l_subd_is_output(subd)) { ++ __a4l_err("a4l_write: operation requires an output subdevice \n"); ++ return -EINVAL; ++ } ++ ++ while (count < nbytes) { ++ ++ unsigned long tmp_cnt; ++ ++ /* Check the events */ ++ int ret = __handle_event(buf); ++ ++ __dump_buffer_counters(buf); ++ ++ /* Compute the data amount to copy */ ++ tmp_cnt = __count_to_put(buf); ++ ++ /* Check tmp_cnt count is not higher than ++ the global count to write */ ++ if (tmp_cnt > nbytes - count) ++ tmp_cnt = nbytes - count; ++ ++ if (ret < 0) { ++ count = (ret == -ENOENT) ? -EINVAL : ret; ++ __a4l_err("a4l_write: failed to handle event %d \n", ret); ++ a4l_cancel_buffer(cxt); ++ goto out_a4l_write; ++ } ++ ++ if (tmp_cnt > 0) { ++ ++ ++ /* Performs the copy */ ++ ret = __produce(cxt, ++ buf, (void *)bufdata + count, tmp_cnt); ++ if (ret < 0) { ++ count = ret; ++ goto out_a4l_write; ++ } ++ ++ /* Performs the munge if need be */ ++ if (subd->munge != NULL) { ++ __munge(subd, subd->munge, buf, tmp_cnt); ++ ++ /* Updates munge count */ ++ buf->mng_count += tmp_cnt; ++ } ++ ++ /* Updates produce count */ ++ buf->prd_count += tmp_cnt; ++ a4l_dbg(1, core_dbg, dev , "buf->prd_cnt=%ld \n", buf->prd_count); ++ ++ /* Updates the return value */ ++ count += tmp_cnt; ++ ++ /* If the driver does not work in bulk mode, ++ we must leave this function */ ++ if (!test_bit(A4L_BUF_BULK, &buf->flags)) ++ goto out_a4l_write; ++ } else { ++ /* The buffer is full, we have to wait for a slot to free */ ++ ret = a4l_wait_sync(&(buf->sync), rtdm_in_rt_context()); ++ if (ret < 0) { ++ __a4l_err("a4l_write: failed to wait for free slot (%d)\n", ret); ++ if (ret == -ERESTARTSYS) ++ ret = -EINTR; ++ count = ret; ++ goto out_a4l_write; ++ } ++ } ++ } ++ ++out_a4l_write: ++ ++ return count; ++} ++ ++int a4l_select(struct a4l_device_context *cxt, ++ rtdm_selector_t *selector, ++ enum rtdm_selecttype type, unsigned fd_index) ++{ ++ struct a4l_device *dev = a4l_get_dev(cxt); ++ struct a4l_buffer *buf = cxt->buffer; ++ struct a4l_subdevice *subd = buf->subd; ++ ++ /* Basic checkings */ ++ ++ if (!test_bit(A4L_DEV_ATTACHED_NR, &dev->flags)) { ++ __a4l_err("a4l_select: unattached device\n"); ++ return -EINVAL; ++ } ++ ++ if (!subd || !test_bit(A4L_SUBD_BUSY, &subd->status)) { ++ __a4l_err("a4l_select: idle subdevice on this context\n"); ++ return -ENOENT; ++ } ++ ++ /* Check the RTDM select type ++ (RTDM_SELECTTYPE_EXCEPT is not supported) */ ++ ++ if(type != RTDM_SELECTTYPE_READ && ++ type != RTDM_SELECTTYPE_WRITE) { ++ __a4l_err("a4l_select: wrong select argument\n"); ++ return -EINVAL; ++ } ++ ++ if (type == RTDM_SELECTTYPE_READ && !a4l_subd_is_input(subd)) { ++ __a4l_err("a4l_select: current context " ++ "does not work with an input subdevice\n"); ++ return -EINVAL; ++ } ++ ++ if (type == RTDM_SELECTTYPE_WRITE && !a4l_subd_is_output(subd)) { ++ __a4l_err("a4l_select: current context " ++ "does not work with an input subdevice\n"); ++ return -EINVAL; ++ } ++ ++ /* Performs a bind on the Analogy synchronization element */ ++ return a4l_select_sync(&(buf->sync), selector, type, fd_index); ++} ++ ++int a4l_ioctl_poll(struct a4l_device_context * cxt, void *arg) ++{ ++ struct rtdm_fd *fd = rtdm_private_to_fd(cxt); ++ int ret = 0; ++ unsigned long tmp_cnt = 0; ++ struct a4l_device *dev = a4l_get_dev(cxt); ++ struct a4l_buffer *buf = cxt->buffer; ++ struct a4l_subdevice *subd = buf->subd; ++ a4l_poll_t poll; ++ ++ if (!rtdm_in_rt_context() && rtdm_rt_capable(fd)) ++ return -ENOSYS; ++ ++ /* Basic checking */ ++ ++ if (!test_bit(A4L_DEV_ATTACHED_NR, &dev->flags)) { ++ __a4l_err("a4l_poll: unattached device\n"); ++ return -EINVAL; ++ } ++ ++ if (!subd || !test_bit(A4L_SUBD_BUSY_NR, &subd->status)) { ++ __a4l_err("a4l_poll: idle subdevice on this context\n"); ++ return -ENOENT; ++ } ++ ++ if (rtdm_safe_copy_from_user(fd, ++ &poll, arg, sizeof(a4l_poll_t)) != 0) ++ return -EFAULT; ++ ++ /* Checks the buffer events */ ++ a4l_flush_sync(&buf->sync); ++ ret = __handle_event(buf); ++ ++ /* Retrieves the data amount to compute ++ according to the subdevice type */ ++ if (a4l_subd_is_input(subd)) { ++ ++ tmp_cnt = __count_to_get(buf); ++ ++ /* Check if some error occured */ ++ if (ret < 0 && ret != -ENOENT) { ++ a4l_cancel_buffer(cxt); ++ return ret; ++ } ++ ++ /* Check whether the acquisition is over */ ++ if (ret == -ENOENT && tmp_cnt == 0) { ++ a4l_cancel_buffer(cxt); ++ return 0; ++ } ++ } else { ++ ++ /* If some error was detected, cancel the transfer */ ++ if (ret < 0) { ++ a4l_cancel_buffer(cxt); ++ return ret; ++ } ++ ++ tmp_cnt = __count_to_put(buf); ++ } ++ ++ if (poll.arg == A4L_NONBLOCK || tmp_cnt != 0) ++ goto out_poll; ++ ++ if (poll.arg == A4L_INFINITE) ++ ret = a4l_wait_sync(&(buf->sync), rtdm_in_rt_context()); ++ else { ++ unsigned long long ns = ((unsigned long long)poll.arg) * ++ ((unsigned long long)NSEC_PER_MSEC); ++ ret = a4l_timedwait_sync(&(buf->sync), rtdm_in_rt_context(), ns); ++ } ++ ++ if (ret == 0) { ++ /* Retrieves the count once more */ ++ if (a4l_subd_is_input(dev->transfer.subds[poll.idx_subd])) ++ tmp_cnt = __count_to_get(buf); ++ else ++ tmp_cnt = __count_to_put(buf); ++ } ++ else ++ return ret; ++ ++out_poll: ++ ++ poll.arg = tmp_cnt; ++ ++ ret = rtdm_safe_copy_to_user(fd, ++ arg, &poll, sizeof(a4l_poll_t)); ++ ++ return ret; ++} +--- linux/drivers/xenomai/analogy/intel/8255.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/analogy/intel/8255.c 2021-04-07 16:01:27.919633178 +0800 +@@ -0,0 +1,331 @@ ++/* ++ * Analogy subdevice driver for 8255 chip ++ * Copyright (C) 1999 David A. Schleef ++ * ++ * This code is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * This code is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#include ++#include ++#include ++#include ++ ++#include "8255.h" ++ ++#define CALLBACK_ARG (((subd_8255_t *)subd->priv)->cb_arg) ++#define CALLBACK_FUNC (((subd_8255_t *)subd->priv)->cb_func) ++ ++/* Channels descriptor */ ++static struct a4l_channels_desc chandesc_8255 = { ++ .mode = A4L_CHAN_GLOBAL_CHANDESC, ++ .length = 24, ++ .chans = { ++ {A4L_CHAN_AREF_GROUND, sizeof(sampl_t)}, ++ }, ++}; ++ ++/* Command options mask */ ++static struct a4l_cmd_desc cmd_mask_8255 = { ++ .idx_subd = 0, ++ .start_src = TRIG_NOW, ++ .scan_begin_src = TRIG_EXT, ++ .convert_src = TRIG_FOLLOW, ++ .scan_end_src = TRIG_COUNT, ++ .stop_src = TRIG_NONE, ++}; ++ ++void a4l_subdev_8255_interrupt(struct a4l_subdevice *subd) ++{ ++ sampl_t d; ++ ++ /* Retrieve the sample... */ ++ d = CALLBACK_FUNC(0, _8255_DATA, 0, CALLBACK_ARG); ++ d |= (CALLBACK_FUNC(0, _8255_DATA + 1, 0, CALLBACK_ARG) << 8); ++ ++ /* ...and send it */ ++ a4l_buf_put(subd, &d, sizeof(sampl_t)); ++ ++ a4l_buf_evt(subd, 0); ++} ++EXPORT_SYMBOL_GPL(a4l_subdev_8255_interrupt); ++ ++static int subdev_8255_cb(int dir, int port, int data, unsigned long arg) ++{ ++ unsigned long iobase = arg; ++ ++ if (dir) { ++ outb(data, iobase + port); ++ return 0; ++ } else { ++ return inb(iobase + port); ++ } ++} ++ ++static void do_config(struct a4l_subdevice *subd) ++{ ++ int config; ++ subd_8255_t *subd_8255 = (subd_8255_t *)subd->priv; ++ ++ config = CR_CW; ++ /* 1 in io_bits indicates output, 1 in config indicates input */ ++ if (!(subd_8255->io_bits & 0x0000ff)) ++ config |= CR_A_IO; ++ if (!(subd_8255->io_bits & 0x00ff00)) ++ config |= CR_B_IO; ++ if (!(subd_8255->io_bits & 0x0f0000)) ++ config |= CR_C_LO_IO; ++ if (!(subd_8255->io_bits & 0xf00000)) ++ config |= CR_C_HI_IO; ++ CALLBACK_FUNC(1, _8255_CR, config, CALLBACK_ARG); ++} ++ ++int subd_8255_cmd(struct a4l_subdevice *subd, struct a4l_cmd_desc *cmd) ++{ ++ /* FIXME */ ++ return 0; ++} ++ ++int subd_8255_cmdtest(struct a4l_subdevice *subd, struct a4l_cmd_desc *cmd) ++{ ++ if (cmd->start_arg != 0) { ++ cmd->start_arg = 0; ++ return -EINVAL; ++ } ++ if (cmd->scan_begin_arg != 0) { ++ cmd->scan_begin_arg = 0; ++ return -EINVAL; ++ } ++ if (cmd->convert_arg != 0) { ++ cmd->convert_arg = 0; ++ return -EINVAL; ++ } ++ if (cmd->scan_end_arg != 1) { ++ cmd->scan_end_arg = 1; ++ return -EINVAL; ++ } ++ if (cmd->stop_arg != 0) { ++ cmd->stop_arg = 0; ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ ++void subd_8255_cancel(struct a4l_subdevice *subd) ++{ ++ /* FIXME */ ++} ++ ++int subd_8255_insn_bits(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn) ++{ ++ subd_8255_t *subd_8255 = (subd_8255_t *)subd->priv; ++ uint32_t *data = (uint32_t *)insn->data; ++ ++ if (data[0]) { ++ ++ subd_8255->status &= ~data[0]; ++ subd_8255->status |= (data[0] & data[1]); ++ ++ if (data[0] & 0xff) ++ CALLBACK_FUNC(1, _8255_DATA, ++ subd_8255->status & 0xff, CALLBACK_ARG); ++ if (data[0] & 0xff00) ++ CALLBACK_FUNC(1, _8255_DATA + 1, ++ (subd_8255->status >> 8) & 0xff, ++ CALLBACK_ARG); ++ if (data[0] & 0xff0000) ++ CALLBACK_FUNC(1, _8255_DATA + 2, ++ (subd_8255->status >> 16) & 0xff, ++ CALLBACK_ARG); ++ } ++ ++ data[1] = CALLBACK_FUNC(0, _8255_DATA, 0, CALLBACK_ARG); ++ data[1] |= (CALLBACK_FUNC(0, _8255_DATA + 1, 0, CALLBACK_ARG) << 8); ++ data[1] |= (CALLBACK_FUNC(0, _8255_DATA + 2, 0, CALLBACK_ARG) << 16); ++ ++ return 0; ++} ++ ++int subd_8255_insn_config(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn) ++{ ++ unsigned int mask; ++ unsigned int bits; ++ subd_8255_t *subd_8255 = (subd_8255_t *)subd->priv; ++ unsigned int *data = (unsigned int *)insn->data; ++ ++ mask = 1 << CR_CHAN(insn->chan_desc); ++ ++ if (mask & 0x0000ff) { ++ bits = 0x0000ff; ++ } else if (mask & 0x00ff00) { ++ bits = 0x00ff00; ++ } else if (mask & 0x0f0000) { ++ bits = 0x0f0000; ++ } else { ++ bits = 0xf00000; ++ } ++ ++ switch (data[0]) { ++ case A4L_INSN_CONFIG_DIO_INPUT: ++ subd_8255->io_bits &= ~bits; ++ break; ++ case A4L_INSN_CONFIG_DIO_OUTPUT: ++ subd_8255->io_bits |= bits; ++ break; ++ case A4L_INSN_CONFIG_DIO_QUERY: ++ data[1] = (subd_8255->io_bits & bits) ? ++ A4L_OUTPUT : A4L_INPUT; ++ return 0; ++ break; ++ default: ++ return -EINVAL; ++ } ++ ++ do_config(subd); ++ ++ return 0; ++} ++ ++void a4l_subdev_8255_init(struct a4l_subdevice *subd) ++{ ++ subd_8255_t *subd_8255 = (subd_8255_t *)subd->priv; ++ /* Initializes the subdevice structure */ ++ memset(subd, 0, sizeof(struct a4l_subdevice)); ++ ++ /* Subdevice filling part */ ++ ++ subd->flags = A4L_SUBD_DIO; ++ subd->flags |= A4L_SUBD_CMD; ++ subd->chan_desc = &chandesc_8255; ++ subd->insn_bits = subd_8255_insn_bits; ++ subd->insn_config = subd_8255_insn_config; ++ ++ if(subd_8255->have_irq) { ++ subd->cmd_mask = &cmd_mask_8255; ++ subd->do_cmdtest = subd_8255_cmdtest; ++ subd->do_cmd = subd_8255_cmd; ++ subd->cancel = subd_8255_cancel; ++ } ++ ++ /* 8255 setting part */ ++ ++ if(CALLBACK_FUNC == NULL) ++ CALLBACK_FUNC = subdev_8255_cb; ++ ++ do_config(subd); ++} ++EXPORT_SYMBOL_GPL(a4l_subdev_8255_init); ++ ++/* ++ ++ Start of the 8255 standalone device ++ ++*/ ++ ++static int dev_8255_attach(struct a4l_device *dev, a4l_lnkdesc_t *arg) ++{ ++ unsigned long *addrs; ++ int i, err = 0; ++ ++ if(arg->opts == NULL || arg->opts_size == 0) { ++ a4l_err(dev, ++ "dev_8255_attach: unable to detect any 8255 chip, " ++ "chips addresses must be passed as attach arguments\n"); ++ return -EINVAL; ++ } ++ ++ addrs = (unsigned long*) arg->opts; ++ ++ for(i = 0; i < (arg->opts_size / sizeof(unsigned long)); i++) { ++ struct a4l_subdevice * subd; ++ subd_8255_t *subd_8255; ++ ++ subd = a4l_alloc_subd(sizeof(subd_8255_t), NULL); ++ if(subd == NULL) { ++ a4l_err(dev, ++ "dev_8255_attach: " ++ "unable to allocate subdevice\n"); ++ /* There is no need to free previously ++ allocated structure(s), the analogy layer will ++ do it for us */ ++ err = -ENOMEM; ++ goto out_attach; ++ } ++ ++ memset(subd, 0, sizeof(struct a4l_subdevice)); ++ memset(subd->priv, 0, sizeof(subd_8255_t)); ++ ++ subd_8255 = (subd_8255_t *)subd->priv; ++ ++ if(request_region(addrs[i], _8255_SIZE, "Analogy 8255") == 0) { ++ subd->flags = A4L_SUBD_UNUSED; ++ a4l_warn(dev, ++ "dev_8255_attach: " ++ "I/O port conflict at 0x%lx\n", addrs[i]); ++ } ++ else { ++ subd_8255->cb_arg = addrs[i]; ++ a4l_subdev_8255_init(subd); ++ } ++ ++ err = a4l_add_subd(dev, subd); ++ if(err < 0) { ++ a4l_err(dev, ++ "dev_8255_attach: " ++ "a4l_add_subd() failed (err=%d)\n", err); ++ goto out_attach; ++ } ++ } ++ ++out_attach: ++ return err; ++} ++ ++static int dev_8255_detach(struct a4l_device *dev) ++{ ++ struct a4l_subdevice *subd; ++ int i = 0; ++ ++ while((subd = a4l_get_subd(dev, i++)) != NULL) { ++ subd_8255_t *subd_8255 = (subd_8255_t *) subd->priv; ++ if(subd_8255 != NULL && subd_8255->cb_arg != 0) ++ release_region(subd_8255->cb_arg, _8255_SIZE); ++ } ++ ++ return 0; ++} ++ ++static struct a4l_driver drv_8255 = { ++ .owner = THIS_MODULE, ++ .board_name = "analogy_8255", ++ .driver_name = "8255", ++ .attach = dev_8255_attach, ++ .detach = dev_8255_detach, ++ .privdata_size = 0, ++}; ++ ++static int __init drv_8255_init(void) ++{ ++ return a4l_register_drv(&drv_8255); ++} ++ ++static void __exit drv_8255_cleanup(void) ++{ ++ a4l_unregister_drv(&drv_8255); ++} ++MODULE_DESCRIPTION("Analogy driver for 8255 chip"); ++MODULE_LICENSE("GPL"); ++ ++module_init(drv_8255_init); ++module_exit(drv_8255_cleanup); +--- linux/drivers/xenomai/analogy/intel/parport.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/analogy/intel/parport.c 2021-04-07 16:01:27.914633185 +0800 +@@ -0,0 +1,457 @@ ++/* ++ * Analogy driver for standard parallel port ++ * Copyright (C) 1998,2001 David A. Schleef ++ * ++ * This code is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * This code is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++/* ++ A cheap and easy way to get a few more digital I/O lines. Steal ++ additional parallel ports from old computers or your neighbors' ++ computers. ++ ++ Attach options list: ++ 0: I/O port base for the parallel port. ++ 1: IRQ ++ ++ Parallel Port Lines: ++ ++ pin subdev chan aka ++ --- ------ ---- --- ++ 1 2 0 strobe ++ 2 0 0 data 0 ++ 3 0 1 data 1 ++ 4 0 2 data 2 ++ 5 0 3 data 3 ++ 6 0 4 data 4 ++ 7 0 5 data 5 ++ 8 0 6 data 6 ++ 9 0 7 data 7 ++ 10 1 3 acknowledge ++ 11 1 4 busy ++ 12 1 2 output ++ 13 1 1 printer selected ++ 14 2 1 auto LF ++ 15 1 0 error ++ 16 2 2 init ++ 17 2 3 select printer ++ 18-25 ground ++ ++ Notes: ++ ++ Subdevices 0 is digital I/O, subdevice 1 is digital input, and ++ subdevice 2 is digital output. Unlike other Analogy devices, ++ subdevice 0 defaults to output. ++ ++ Pins 13 and 14 are inverted once by Analogy and once by the ++ hardware, thus cancelling the effect. ++ ++ Pin 1 is a strobe, thus acts like one. There's no way in software ++ to change this, at least on a standard parallel port. ++ ++ Subdevice 3 pretends to be a digital input subdevice, but it always ++ returns 0 when read. However, if you run a command with ++ scan_begin_src=TRIG_EXT, it uses pin 10 as a external triggering ++ pin, which can be used to wake up tasks. ++ ++ see http://www.beyondlogic.org/ for information. ++ or http://www.linux-magazin.de/ausgabe/1999/10/IO/io.html ++*/ ++ ++#include ++#include ++#include /* For inb/outb */ ++#include ++ ++#define PARPORT_SIZE 3 ++ ++#define PARPORT_A 0 ++#define PARPORT_B 1 ++#define PARPORT_C 2 ++ ++#define DEFAULT_ADDRESS 0x378 ++#define DEFAULT_IRQ 7 ++ ++typedef struct parport_subd_priv { ++ unsigned long io_bits; ++} parport_spriv_t; ++ ++typedef struct parport_priv { ++ unsigned long io_base; ++ unsigned int a_data; ++ unsigned int c_data; ++ int enable_irq; ++} parport_priv_t; ++ ++#define devpriv ((parport_priv_t *)(dev->priv)) ++ ++static int parport_insn_a(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn) ++{ ++ struct a4l_device *dev = subd->dev; ++ uint8_t *data = (uint8_t *)insn->data; ++ ++ if (data[0]) { ++ devpriv->a_data &= ~data[0]; ++ devpriv->a_data |= (data[0] & data[1]); ++ ++ outb(devpriv->a_data, devpriv->io_base + PARPORT_A); ++ } ++ ++ data[1] = inb(devpriv->io_base + PARPORT_A); ++ ++ return 0; ++} ++ ++static int parport_insn_config_a(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn) ++{ ++ struct a4l_device *dev = subd->dev; ++ parport_spriv_t *spriv = (parport_spriv_t *)subd->priv; ++ unsigned int *data = (unsigned int *)insn->data; ++ ++ /* No need to check the channel descriptor; the input / output ++ setting is global for all channels */ ++ ++ switch (data[0]) { ++ ++ case A4L_INSN_CONFIG_DIO_OUTPUT: ++ spriv->io_bits = 0xff; ++ devpriv->c_data &= ~(1 << 5); ++ break; ++ ++ case A4L_INSN_CONFIG_DIO_INPUT: ++ spriv->io_bits = 0; ++ devpriv->c_data |= (1 << 5); ++ break; ++ ++ case A4L_INSN_CONFIG_DIO_QUERY: ++ data[1] = (spriv->io_bits == 0xff) ? ++ A4L_OUTPUT: A4L_INPUT; ++ break; ++ ++ default: ++ return -EINVAL; ++ } ++ ++ outb(devpriv->c_data, devpriv->io_base + PARPORT_C); ++ ++ return 0; ++} ++ ++static int parport_insn_b(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn) ++{ ++ struct a4l_device *dev = subd->dev; ++ uint8_t *data = (uint8_t *)insn->data; ++ ++ if (data[0]) { ++ /* should writes be ignored? */ ++ } ++ ++ data[1] = (inb(devpriv->io_base + PARPORT_B) >> 3); ++ ++ return 0; ++} ++ ++static int parport_insn_c(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn) ++{ ++ struct a4l_device *dev = subd->dev; ++ uint8_t *data = (uint8_t *)insn->data; ++ ++ data[0] &= 0x0f; ++ if (data[0]) { ++ devpriv->c_data &= ~data[0]; ++ devpriv->c_data |= (data[0] & data[1]); ++ ++ outb(devpriv->c_data, devpriv->io_base + PARPORT_C); ++ } ++ ++ data[1] = devpriv->c_data & 0xf; ++ ++ return 2; ++} ++ ++static int parport_intr_insn(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn) ++{ ++ uint8_t *data = (uint8_t *)insn->data; ++ ++ if (insn->data_size < sizeof(uint8_t)) ++ return -EINVAL; ++ ++ data[1] = 0; ++ return 0; ++} ++ ++static struct a4l_cmd_desc parport_intr_cmd_mask = { ++ .idx_subd = 0, ++ .start_src = TRIG_NOW, ++ .scan_begin_src = TRIG_EXT, ++ .convert_src = TRIG_FOLLOW, ++ .scan_end_src = TRIG_COUNT, ++ .stop_src = TRIG_NONE, ++}; ++ ++static int parport_intr_cmdtest(struct a4l_subdevice *subd, struct a4l_cmd_desc * cmd) ++{ ++ ++ if (cmd->start_arg != 0) { ++ return -EINVAL; ++ } ++ if (cmd->scan_begin_arg != 0) { ++ return -EINVAL; ++ } ++ if (cmd->convert_arg != 0) { ++ return -EINVAL; ++ } ++ if (cmd->scan_end_arg != 1) { ++ return -EINVAL; ++ } ++ if (cmd->stop_arg != 0) { ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ ++static int parport_intr_cmd(struct a4l_subdevice *subd, struct a4l_cmd_desc *cmd) ++{ ++ struct a4l_device *dev = subd->dev; ++ ++ devpriv->c_data |= 0x10; ++ outb(devpriv->c_data, devpriv->io_base + PARPORT_C); ++ ++ devpriv->enable_irq = 1; ++ ++ return 0; ++} ++ ++static void parport_intr_cancel(struct a4l_subdevice *subd) ++{ ++ struct a4l_device *dev = subd->dev; ++ ++ a4l_info(dev, "cancel in progress\n"); ++ ++ devpriv->c_data &= ~0x10; ++ outb(devpriv->c_data, devpriv->io_base + PARPORT_C); ++ ++ devpriv->enable_irq = 0; ++} ++ ++static int parport_interrupt(unsigned int irq, void *d) ++{ ++ struct a4l_device *dev = d; ++ struct a4l_subdevice *subd = a4l_get_subd(dev, 3); ++ ++ if (!devpriv->enable_irq) { ++ a4l_err(dev, "parport_interrupt: bogus irq, ignored\n"); ++ return IRQ_NONE; ++ } ++ ++ a4l_buf_put(subd, 0, sizeof(unsigned int)); ++ a4l_buf_evt(subd, 0); ++ ++ return 0; ++} ++ ++ ++/* --- Channels descriptor --- */ ++ ++static struct a4l_channels_desc parport_chan_desc_a = { ++ .mode = A4L_CHAN_GLOBAL_CHANDESC, ++ .length = 8, ++ .chans = { ++ {A4L_CHAN_AREF_GROUND, 1}, ++ }, ++}; ++ ++static struct a4l_channels_desc parport_chan_desc_b = { ++ .mode = A4L_CHAN_GLOBAL_CHANDESC, ++ .length = 5, ++ .chans = { ++ {A4L_CHAN_AREF_GROUND, 1}, ++ }, ++}; ++ ++static struct a4l_channels_desc parport_chan_desc_c = { ++ .mode = A4L_CHAN_GLOBAL_CHANDESC, ++ .length = 4, ++ .chans = { ++ {A4L_CHAN_AREF_GROUND, 1}, ++ }, ++}; ++ ++static struct a4l_channels_desc parport_chan_desc_intr = { ++ .mode = A4L_CHAN_GLOBAL_CHANDESC, ++ .length = 1, ++ .chans = { ++ {A4L_CHAN_AREF_GROUND, 1}, ++ }, ++}; ++ ++/* --- Subdevice initialization functions --- */ ++ ++static void setup_subd_a(struct a4l_subdevice *subd) ++{ ++ subd->flags = A4L_SUBD_DIO; ++ subd->chan_desc = &parport_chan_desc_a; ++ subd->rng_desc = &range_digital; ++ subd->insn_bits = parport_insn_a; ++ subd->insn_config = parport_insn_config_a; ++} ++ ++static void setup_subd_b(struct a4l_subdevice *subd) ++{ ++ subd->flags = A4L_SUBD_DI; ++ subd->chan_desc = &parport_chan_desc_b; ++ subd->rng_desc = &range_digital; ++ subd->insn_bits = parport_insn_b; ++} ++ ++static void setup_subd_c(struct a4l_subdevice *subd) ++{ ++ subd->flags = A4L_SUBD_DO; ++ subd->chan_desc = &parport_chan_desc_c; ++ subd->rng_desc = &range_digital; ++ subd->insn_bits = parport_insn_c; ++} ++ ++static void setup_subd_intr(struct a4l_subdevice *subd) ++{ ++ subd->flags = A4L_SUBD_DI; ++ subd->chan_desc = &parport_chan_desc_intr; ++ subd->rng_desc = &range_digital; ++ subd->insn_bits = parport_intr_insn; ++ subd->cmd_mask = &parport_intr_cmd_mask; ++ subd->do_cmdtest = parport_intr_cmdtest; ++ subd->do_cmd = parport_intr_cmd; ++ subd->cancel = parport_intr_cancel; ++} ++ ++static void (*setup_subds[3])(struct a4l_subdevice *) = { ++ setup_subd_a, ++ setup_subd_b, ++ setup_subd_c ++}; ++ ++static int dev_parport_attach(struct a4l_device *dev, a4l_lnkdesc_t *arg) ++{ ++ int i, err = 0, irq = A4L_IRQ_UNUSED; ++ unsigned long io_base; ++ ++ if(arg->opts == NULL || arg->opts_size < sizeof(unsigned long)) { ++ ++ a4l_warn(dev, ++ "dev_parport_attach: no attach options specified, " ++ "taking default options (addr=0x%x, irq=%d)\n", ++ DEFAULT_ADDRESS, DEFAULT_IRQ); ++ ++ io_base = DEFAULT_ADDRESS; ++ irq = DEFAULT_IRQ; ++ } else { ++ ++ io_base = ((unsigned long *)arg->opts)[0]; ++ ++ if (arg->opts_size >= 2 * sizeof(unsigned long)) ++ irq = (int) ((unsigned long *)arg->opts)[1]; ++ } ++ ++ if (!request_region(io_base, PARPORT_SIZE, "analogy_parport")) { ++ a4l_err(dev, "dev_parport_attach: I/O port conflict"); ++ return -EIO; ++ } ++ ++ a4l_info(dev, "address = 0x%lx\n", io_base); ++ ++ for (i = 0; i < 3; i++) { ++ ++ struct a4l_subdevice *subd = a4l_alloc_subd(sizeof(parport_spriv_t), ++ setup_subds[i]); ++ if (subd == NULL) ++ return -ENOMEM; ++ ++ err = a4l_add_subd(dev, subd); ++ if (err != i) ++ return err; ++ } ++ ++ if (irq != A4L_IRQ_UNUSED) { ++ ++ struct a4l_subdevice *subd; ++ ++ a4l_info(dev, "irq = %d\n", irq); ++ ++ err = a4l_request_irq(dev, irq, parport_interrupt, 0, dev); ++ if (err < 0) { ++ a4l_err(dev, "dev_parport_attach: irq not available\n"); ++ return err; ++ } ++ ++ subd = a4l_alloc_subd(0, setup_subd_intr); ++ if (subd == NULL) ++ return -ENOMEM; ++ ++ err = a4l_add_subd(dev, subd); ++ if (err < 0) ++ return err; ++ } ++ ++ devpriv->io_base = io_base; ++ ++ devpriv->a_data = 0; ++ outb(devpriv->a_data, devpriv->io_base + PARPORT_A); ++ ++ devpriv->c_data = 0; ++ outb(devpriv->c_data, devpriv->io_base + PARPORT_C); ++ ++ return 0; ++} ++ ++static int dev_parport_detach(struct a4l_device *dev) ++{ ++ int err = 0; ++ ++ if (devpriv->io_base != 0) ++ release_region(devpriv->io_base, PARPORT_SIZE); ++ ++ if (a4l_get_irq(dev) != A4L_IRQ_UNUSED) { ++ a4l_free_irq(dev, a4l_get_irq(dev)); ++ } ++ ++ ++ return err; ++} ++ ++static struct a4l_driver drv_parport = { ++ .owner = THIS_MODULE, ++ .board_name = "analogy_parport", ++ .driver_name = "parport", ++ .attach = dev_parport_attach, ++ .detach = dev_parport_detach, ++ .privdata_size = sizeof(parport_priv_t), ++}; ++ ++static int __init drv_parport_init(void) ++{ ++ return a4l_register_drv(&drv_parport); ++} ++ ++static void __exit drv_parport_cleanup(void) ++{ ++ a4l_unregister_drv(&drv_parport); ++} ++ ++MODULE_DESCRIPTION("Analogy driver for standard parallel port"); ++MODULE_LICENSE("GPL"); ++ ++module_init(drv_parport_init); ++module_exit(drv_parport_cleanup); +--- linux/drivers/xenomai/analogy/intel/Makefile 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/analogy/intel/Makefile 2021-04-07 16:01:27.909633192 +0800 +@@ -0,0 +1,10 @@ ++ ++ccflags-y += -Idrivers/xenomai/analogy ++ ++obj-$(CONFIG_XENO_DRIVERS_ANALOGY_8255) += analogy_8255.o ++ ++obj-$(CONFIG_XENO_DRIVERS_ANALOGY_PARPORT) += analogy_parport.o ++ ++analogy_8255-y := 8255.o ++ ++analogy_parport-y := parport.o +--- linux/drivers/xenomai/analogy/intel/Kconfig 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/analogy/intel/Kconfig 2021-04-07 16:01:27.905633197 +0800 +@@ -0,0 +1,10 @@ ++ ++config XENO_DRIVERS_ANALOGY_8255 ++ depends on XENO_DRIVERS_ANALOGY ++ tristate "8255 driver" ++ default n ++ ++config XENO_DRIVERS_ANALOGY_PARPORT ++ depends on XENO_DRIVERS_ANALOGY && X86 ++ tristate "Standard parallel port driver" ++ default n +--- linux/drivers/xenomai/analogy/intel/8255.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/analogy/intel/8255.h 2021-04-07 16:01:27.900633205 +0800 +@@ -0,0 +1,60 @@ ++/* ++ * Hardware driver for 8255 chip ++ * @note Copyright (C) 1999 David A. Schleef ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. ++ */ ++#ifndef __ANALOGY_8255_H__ ++#define __ANALOGY_8255_H__ ++ ++#include ++ ++typedef int (*a4l_8255_cb_t)(int, int, int, unsigned long); ++ ++typedef struct subd_8255_struct { ++ unsigned long cb_arg; ++ a4l_8255_cb_t cb_func; ++ unsigned int status; ++ int have_irq; ++ int io_bits; ++} subd_8255_t; ++ ++#if (defined(CONFIG_XENO_DRIVERS_ANALOGY_8255) || \ ++ defined(CONFIG_XENO_DRIVERS_ANALOGY_8255_MODULE)) ++ ++#define _8255_SIZE 4 ++ ++#define _8255_DATA 0 ++#define _8255_CR 3 ++ ++#define CR_C_LO_IO 0x01 ++#define CR_B_IO 0x02 ++#define CR_B_MODE 0x04 ++#define CR_C_HI_IO 0x08 ++#define CR_A_IO 0x10 ++#define CR_A_MODE(a) ((a)<<5) ++#define CR_CW 0x80 ++ ++void a4l_subdev_8255_init(struct a4l_subdevice *subd); ++void a4l_subdev_8255_interrupt(struct a4l_subdevice *subd); ++ ++#else /* !CONFIG_XENO_DRIVERS_ANALOGY_8255 */ ++ ++#define a4l_subdev_8255_init(x) do { } while(0) ++#define a4l_subdev_8255_interrupt(x) do { } while(0) ++ ++#endif /* CONFIG_XENO_DRIVERS_ANALOGY_8255 */ ++ ++#endif /* !__ANALOGY_8255_H__ */ +--- linux/drivers/xenomai/analogy/rtdm_helpers.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/analogy/rtdm_helpers.c 2021-04-07 16:01:27.895633212 +0800 +@@ -0,0 +1,214 @@ ++/* ++ * Analogy for Linux, RTDM helpers ++ * ++ * Copyright (C) 1997-2000 David A. Schleef ++ * Copyright (C) 2008 Alexis Berlemont ++ * ++ * Xenomai is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#include ++#include ++#include ++#include ++ ++#include ++ ++/* --- Time section --- */ ++ ++static nanosecs_abs_t a4l_clkofs; ++ ++void a4l_init_time(void) ++{ ++ nanosecs_abs_t t1, t2; ++ t1 = rtdm_clock_read(); ++ t2 = ktime_to_ns(ktime_get_real()); ++ a4l_clkofs = t2 - t1; ++} ++ ++nanosecs_abs_t a4l_get_time(void) ++{ ++ return a4l_clkofs + rtdm_clock_read(); ++} ++ ++/* --- IRQ section --- */ ++ ++static int a4l_handle_irq(rtdm_irq_t *irq_handle) ++{ ++ struct a4l_irq_descriptor *dsc = ++ rtdm_irq_get_arg(irq_handle, struct a4l_irq_descriptor); ++ ++ if (dsc->handler((unsigned int)irq_handle->irq, dsc->cookie) == 0) ++ return RTDM_IRQ_HANDLED; ++ else ++ return RTDM_IRQ_NONE; ++} ++ ++int __a4l_request_irq(struct a4l_irq_descriptor *dsc, ++ unsigned int irq, ++ a4l_irq_hdlr_t handler, ++ unsigned long flags, void *cookie) ++{ ++ /* Fills the IRQ descriptor */ ++ dsc->handler = handler; ++ dsc->cookie = cookie; ++ dsc->irq = irq; ++ ++ /* Registers the RT IRQ handler */ ++ return rtdm_irq_request(&dsc->rtdm_desc, ++ (int)irq, ++ a4l_handle_irq, flags, "Analogy device", dsc); ++} ++ ++int __a4l_free_irq(struct a4l_irq_descriptor * dsc) ++{ ++ return rtdm_irq_free(&dsc->rtdm_desc); ++} ++ ++/* --- Synchronization section --- */ ++ ++static void a4l_nrt_sync_handler(rtdm_nrtsig_t *nrt_sig, void *arg) ++{ ++ struct a4l_sync *snc = (struct a4l_sync *) arg; ++ wake_up_interruptible(&snc->wq); ++} ++ ++int a4l_init_sync(struct a4l_sync *snc) ++{ ++ int ret = 0; ++ ++ /* Initializes the flags field */ ++ snc->status = 0; ++ ++ /* If the process is NRT, we need a wait queue structure */ ++ init_waitqueue_head(&snc->wq); ++ ++ /* Initializes the RTDM event */ ++ rtdm_event_init(&snc->rtdm_evt, 0); ++ ++ /* Initializes the gateway to NRT context */ ++ rtdm_nrtsig_init(&snc->nrt_sig, a4l_nrt_sync_handler, snc); ++ ++ return ret; ++} ++ ++void a4l_cleanup_sync(struct a4l_sync *snc) ++{ ++ rtdm_nrtsig_destroy(&snc->nrt_sig); ++ rtdm_event_destroy(&snc->rtdm_evt); ++} ++ ++int a4l_wait_sync(struct a4l_sync *snc, int rt) ++{ ++ int ret = 0; ++ ++ if (test_bit(__EVT_PDING, &snc->status)) ++ goto out_wait; ++ ++ if (rt != 0) { ++ /* If the calling process is in primary mode, ++ we can use RTDM API ... */ ++ set_bit(__RT_WAITER, &snc->status); ++ ret = rtdm_event_wait(&snc->rtdm_evt); ++ } else { ++ /* ... else if the process is NRT, ++ the Linux wait queue system is used */ ++ set_bit(__NRT_WAITER, &snc->status); ++ ret = wait_event_interruptible(snc->wq, ++ test_bit(__EVT_PDING, ++ &snc->status)); ++ } ++ ++out_wait: ++ ++ clear_bit(__EVT_PDING, &snc->status); ++ ++ return ret; ++} ++ ++int a4l_timedwait_sync(struct a4l_sync * snc, ++ int rt, unsigned long long ns_timeout) ++{ ++ int ret = 0; ++ unsigned long timeout; ++ ++ if (test_bit(__EVT_PDING, &snc->status)) ++ goto out_wait; ++ ++ if (rt != 0) { ++ /* If the calling process is in primary mode, ++ we can use RTDM API ... */ ++ set_bit(__RT_WAITER, &snc->status); ++ ret = rtdm_event_timedwait(&snc->rtdm_evt, ns_timeout, NULL); ++ } else { ++ /* ... else if the process is NRT, ++ the Linux wait queue system is used */ ++ ++ timeout = do_div(ns_timeout, 1000); ++ ++ /* We consider the Linux kernel cannot tick at a frequency ++ higher than 1 MHz ++ If the timeout value is lower than 1us, we round up to 1us */ ++ timeout = (timeout == 0) ? 1 : usecs_to_jiffies(timeout); ++ ++ set_bit(__NRT_WAITER, &snc->status); ++ ++ ret = wait_event_interruptible_timeout(snc->wq, ++ test_bit(__EVT_PDING, ++ &snc->status), ++ timeout); ++ } ++ ++out_wait: ++ ++ clear_bit(__EVT_PDING, &snc->status); ++ ++ return ret; ++} ++ ++void a4l_flush_sync(struct a4l_sync * snc) ++{ ++ /* Clear the status bitfield */ ++ snc->status = 0; ++ ++ /* Flush the RTDM event */ ++ rtdm_event_clear(&snc->rtdm_evt); ++} ++ ++void a4l_signal_sync(struct a4l_sync * snc) ++{ ++ int hit = 0; ++ ++ set_bit(__EVT_PDING, &snc->status); ++ ++ /* a4l_signal_sync() is bound not to be called upon the right ++ user process context; so, the status flags stores its mode. ++ Thus the proper event signaling function is called */ ++ if (test_and_clear_bit(__RT_WAITER, &snc->status)) { ++ rtdm_event_signal(&snc->rtdm_evt); ++ hit++; ++ } ++ ++ if (test_and_clear_bit(__NRT_WAITER, &snc->status)) { ++ rtdm_nrtsig_pend(&snc->nrt_sig); ++ hit++; ++ } ++ ++ if (hit == 0) { ++ /* At first signaling, we may not know the proper way ++ to send the event */ ++ rtdm_event_signal(&snc->rtdm_evt); ++ rtdm_nrtsig_pend(&snc->nrt_sig); ++ } ++} +--- linux/drivers/xenomai/analogy/Makefile 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/analogy/Makefile 2021-04-07 16:01:27.891633217 +0800 +@@ -0,0 +1,16 @@ ++ ++ccflags-y += -Idrivers/xenomai/analogy ++ ++obj-$(CONFIG_XENO_DRIVERS_ANALOGY) += xeno_analogy.o testing/ intel/ national_instruments/ sensoray/ ++ ++xeno_analogy-y := \ ++ buffer.o \ ++ command.o \ ++ device.o \ ++ driver.o \ ++ driver_facilities.o \ ++ instruction.o \ ++ rtdm_helpers.o \ ++ subdevice.o \ ++ transfer.o \ ++ rtdm_interface.o +--- linux/drivers/xenomai/analogy/command.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/analogy/command.c 2021-04-07 16:01:27.886633224 +0800 +@@ -0,0 +1,392 @@ ++/* ++ * Analogy for Linux, command related features ++ * ++ * Copyright (C) 1997-2000 David A. Schleef ++ * Copyright (C) 2008 Alexis Berlemont ++ * ++ * Xenomai is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* --- Command descriptor management functions --- */ ++int a4l_fill_cmddesc(struct a4l_device_context *cxt, struct a4l_cmd_desc *desc, ++ unsigned int **chan_descs, void *arg) ++{ ++ unsigned int *tmpchans = NULL; ++ int ret = 0; ++ ++ ret = rtdm_safe_copy_from_user(rtdm_private_to_fd(cxt), ++ desc, arg, sizeof(struct a4l_cmd_desc)); ++ if (ret != 0) ++ goto out_cmddesc; ++ ++ ++ if (desc->nb_chan == 0) { ++ ret = -EINVAL; ++ goto out_cmddesc; ++ } ++ ++ tmpchans = rtdm_malloc(desc->nb_chan * sizeof(unsigned int)); ++ if (tmpchans == NULL) { ++ ret = -ENOMEM; ++ goto out_cmddesc; ++ } ++ ++ ret = rtdm_safe_copy_from_user(rtdm_private_to_fd(cxt), ++ tmpchans, ++ desc->chan_descs, ++ desc->nb_chan * sizeof(unsigned int)); ++ if (ret != 0) { ++ __a4l_err("%s invalid arguments \n", __FUNCTION__); ++ goto out_cmddesc; ++ } ++ ++ *chan_descs = desc->chan_descs; ++ desc->chan_descs = tmpchans; ++ ++ __a4l_dbg(1, core_dbg, "desc dump: \n"); ++ __a4l_dbg(1, core_dbg, "\t->idx_subd=%u\n", desc->idx_subd); ++ __a4l_dbg(1, core_dbg, "\t->flags=%lu\n", desc->flags); ++ __a4l_dbg(1, core_dbg, "\t->nb_chan=%u\n", desc->nb_chan); ++ __a4l_dbg(1, core_dbg, "\t->chan_descs=0x%x\n", *desc->chan_descs); ++ __a4l_dbg(1, core_dbg, "\t->data_len=%u\n", desc->data_len); ++ __a4l_dbg(1, core_dbg, "\t->pdata=0x%p\n", desc->data); ++ ++ out_cmddesc: ++ ++ if (ret != 0) { ++ __a4l_err("a4l_fill_cmddesc: %d \n", ret); ++ if (tmpchans != NULL) ++ rtdm_free(tmpchans); ++ desc->chan_descs = NULL; ++ } ++ ++ return ret; ++} ++ ++void a4l_free_cmddesc(struct a4l_cmd_desc * desc) ++{ ++ if (desc->chan_descs != NULL) ++ rtdm_free(desc->chan_descs); ++} ++ ++int a4l_check_cmddesc(struct a4l_device_context * cxt, struct a4l_cmd_desc * desc) ++{ ++ struct a4l_device *dev = a4l_get_dev(cxt); ++ struct a4l_subdevice *subd; ++ ++ if (desc->idx_subd >= dev->transfer.nb_subd) { ++ __a4l_err("a4l_check_cmddesc: " ++ "subdevice index out of range (idx=%u)\n", ++ desc->idx_subd); ++ return -EINVAL; ++ } ++ ++ subd = dev->transfer.subds[desc->idx_subd]; ++ ++ if ((subd->flags & A4L_SUBD_TYPES) == A4L_SUBD_UNUSED) { ++ __a4l_err("a4l_check_cmddesc: " ++ "subdevice type incoherent\n"); ++ return -EIO; ++ } ++ ++ if (!(subd->flags & A4L_SUBD_CMD)) { ++ __a4l_err("a4l_check_cmddesc: operation not supported, " ++ "synchronous only subdevice\n"); ++ return -EIO; ++ } ++ ++ if (test_bit(A4L_SUBD_BUSY, &subd->status)) { ++ __a4l_err("a4l_check_cmddesc: subdevice busy\n"); ++ return -EBUSY; ++ } ++ ++ return a4l_check_chanlist(dev->transfer.subds[desc->idx_subd], ++ desc->nb_chan, desc->chan_descs); ++} ++ ++/* --- Command checking functions --- */ ++ ++int a4l_check_generic_cmdcnt(struct a4l_cmd_desc * desc) ++{ ++ unsigned int tmp1, tmp2; ++ ++ /* Makes sure trigger sources are trivially valid */ ++ tmp1 = ++ desc->start_src & ~(TRIG_NOW | TRIG_INT | TRIG_EXT | TRIG_FOLLOW); ++ tmp2 = desc->start_src & (TRIG_NOW | TRIG_INT | TRIG_EXT | TRIG_FOLLOW); ++ if (tmp1 != 0 || tmp2 == 0) { ++ __a4l_err("a4l_check_cmddesc: start_src, weird trigger\n"); ++ return -EINVAL; ++ } ++ ++ tmp1 = desc->scan_begin_src & ~(TRIG_TIMER | TRIG_EXT | TRIG_FOLLOW); ++ tmp2 = desc->scan_begin_src & (TRIG_TIMER | TRIG_EXT | TRIG_FOLLOW); ++ if (tmp1 != 0 || tmp2 == 0) { ++ __a4l_err("a4l_check_cmddesc: scan_begin_src, , weird trigger\n"); ++ return -EINVAL; ++ } ++ ++ tmp1 = desc->convert_src & ~(TRIG_TIMER | TRIG_EXT | TRIG_NOW); ++ tmp2 = desc->convert_src & (TRIG_TIMER | TRIG_EXT | TRIG_NOW); ++ if (tmp1 != 0 || tmp2 == 0) { ++ __a4l_err("a4l_check_cmddesc: convert_src, weird trigger\n"); ++ return -EINVAL; ++ } ++ ++ tmp1 = desc->scan_end_src & ~(TRIG_COUNT); ++ if (tmp1 != 0) { ++ __a4l_err("a4l_check_cmddesc: scan_end_src, weird trigger\n"); ++ return -EINVAL; ++ } ++ ++ tmp1 = desc->stop_src & ~(TRIG_COUNT | TRIG_NONE); ++ tmp2 = desc->stop_src & (TRIG_COUNT | TRIG_NONE); ++ if (tmp1 != 0 || tmp2 == 0) { ++ __a4l_err("a4l_check_cmddesc: stop_src, weird trigger\n"); ++ return -EINVAL; ++ } ++ ++ /* Makes sure trigger sources are unique */ ++ if (desc->start_src != TRIG_NOW && ++ desc->start_src != TRIG_INT && ++ desc->start_src != TRIG_EXT && desc->start_src != TRIG_FOLLOW) { ++ __a4l_err("a4l_check_cmddesc: start_src, " ++ "only one trigger should be set\n"); ++ return -EINVAL; ++ } ++ ++ if (desc->scan_begin_src != TRIG_TIMER && ++ desc->scan_begin_src != TRIG_EXT && ++ desc->scan_begin_src != TRIG_FOLLOW) { ++ __a4l_err("a4l_check_cmddesc: scan_begin_src, " ++ "only one trigger should be set\n"); ++ return -EINVAL; ++ } ++ ++ if (desc->convert_src != TRIG_TIMER && ++ desc->convert_src != TRIG_EXT && desc->convert_src != TRIG_NOW) { ++ __a4l_err("a4l_check_cmddesc: convert_src, " ++ "only one trigger should be set\n"); ++ return -EINVAL; ++ } ++ ++ if (desc->stop_src != TRIG_COUNT && desc->stop_src != TRIG_NONE) { ++ __a4l_err("a4l_check_cmddesc: stop_src, " ++ "only one trigger should be set\n"); ++ return -EINVAL; ++ } ++ ++ /* Makes sure arguments are trivially compatible */ ++ tmp1 = desc->start_src & (TRIG_NOW | TRIG_FOLLOW | TRIG_INT); ++ tmp2 = desc->start_arg; ++ if (tmp1 != 0 && tmp2 != 0) { ++ __a4l_err("a4l_check_cmddesc: no start_arg expected\n"); ++ return -EINVAL; ++ } ++ ++ tmp1 = desc->scan_begin_src & TRIG_FOLLOW; ++ tmp2 = desc->scan_begin_arg; ++ if (tmp1 != 0 && tmp2 != 0) { ++ __a4l_err("a4l_check_cmddesc: no scan_begin_arg expected\n"); ++ return -EINVAL; ++ } ++ ++ tmp1 = desc->convert_src & TRIG_NOW; ++ tmp2 = desc->convert_arg; ++ if (tmp1 != 0 && tmp2 != 0) { ++ __a4l_err("a4l_check_cmddesc: no convert_arg expected\n"); ++ return -EINVAL; ++ } ++ ++ tmp1 = desc->stop_src & TRIG_NONE; ++ tmp2 = desc->stop_arg; ++ if (tmp1 != 0 && tmp2 != 0) { ++ __a4l_err("a4l_check_cmddesc: no stop_arg expected\n"); ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ ++int a4l_check_specific_cmdcnt(struct a4l_device_context * cxt, struct a4l_cmd_desc * desc) ++{ ++ unsigned int tmp1, tmp2; ++ struct a4l_device *dev = a4l_get_dev(cxt); ++ struct a4l_cmd_desc *cmd_mask = dev->transfer.subds[desc->idx_subd]->cmd_mask; ++ ++ if (cmd_mask == NULL) ++ return 0; ++ ++ if (cmd_mask->start_src != 0) { ++ tmp1 = desc->start_src & ~(cmd_mask->start_src); ++ tmp2 = desc->start_src & (cmd_mask->start_src); ++ if (tmp1 != 0 || tmp2 == 0) { ++ __a4l_err("a4l_check_cmddesc: start_src, " ++ "trigger unsupported\n"); ++ return -EINVAL; ++ } ++ } ++ ++ if (cmd_mask->scan_begin_src != 0) { ++ tmp1 = desc->scan_begin_src & ~(cmd_mask->scan_begin_src); ++ tmp2 = desc->scan_begin_src & (cmd_mask->scan_begin_src); ++ if (tmp1 != 0 || tmp2 == 0) { ++ __a4l_err("a4l_check_cmddesc: scan_begin_src, " ++ "trigger unsupported\n"); ++ return -EINVAL; ++ } ++ } ++ ++ if (cmd_mask->convert_src != 0) { ++ tmp1 = desc->convert_src & ~(cmd_mask->convert_src); ++ tmp2 = desc->convert_src & (cmd_mask->convert_src); ++ if (tmp1 != 0 || tmp2 == 0) { ++ __a4l_err("a4l_check_cmddesc: convert_src, " ++ "trigger unsupported\n"); ++ return -EINVAL; ++ } ++ } ++ ++ if (cmd_mask->scan_end_src != 0) { ++ tmp1 = desc->scan_end_src & ~(cmd_mask->scan_end_src); ++ if (tmp1 != 0) { ++ __a4l_err("a4l_check_cmddesc: scan_end_src, " ++ "trigger unsupported\n"); ++ return -EINVAL; ++ } ++ } ++ ++ if (cmd_mask->stop_src != 0) { ++ tmp1 = desc->stop_src & ~(cmd_mask->stop_src); ++ tmp2 = desc->stop_src & (cmd_mask->stop_src); ++ if (tmp1 != 0 || tmp2 == 0) { ++ __a4l_err("a4l_check_cmddesc: stop_src, " ++ "trigger unsupported\n"); ++ return -EINVAL; ++ } ++ } ++ ++ return 0; ++} ++ ++/* --- IOCTL / FOPS function --- */ ++ ++int a4l_ioctl_cmd(struct a4l_device_context * ctx, void *arg) ++{ ++ int ret = 0, simul_flag = 0; ++ struct a4l_cmd_desc *cmd_desc = NULL; ++ struct a4l_device *dev = a4l_get_dev(ctx); ++ unsigned int *chan_descs, *tmp; ++ struct a4l_subdevice *subd; ++ ++ /* The command launching cannot be done in real-time because ++ of some possible buffer allocations in the drivers */ ++ if (rtdm_in_rt_context()) ++ return -ENOSYS; ++ ++ /* Basically check the device */ ++ if (!test_bit(A4L_DEV_ATTACHED_NR, &dev->flags)) { ++ __a4l_err("a4l_ioctl_cmd: cannot command " ++ "an unattached device\n"); ++ return -EINVAL; ++ } ++ ++ /* Allocates the command */ ++ cmd_desc = (struct a4l_cmd_desc *) rtdm_malloc(sizeof(struct a4l_cmd_desc)); ++ if (cmd_desc == NULL) ++ return -ENOMEM; ++ memset(cmd_desc, 0, sizeof(struct a4l_cmd_desc)); ++ ++ /* Gets the command */ ++ ret = a4l_fill_cmddesc(ctx, cmd_desc, &chan_descs, arg); ++ if (ret != 0) ++ goto out_ioctl_cmd; ++ ++ /* Checks the command */ ++ ret = a4l_check_cmddesc(ctx, cmd_desc); ++ if (ret != 0) ++ goto out_ioctl_cmd; ++ ++ ret = a4l_check_generic_cmdcnt(cmd_desc); ++ if (ret != 0) ++ goto out_ioctl_cmd; ++ ++ ret = a4l_check_specific_cmdcnt(ctx, cmd_desc); ++ if (ret != 0) ++ goto out_ioctl_cmd; ++ ++ __a4l_dbg(1, core_dbg,"1st cmd checks passed\n"); ++ subd = dev->transfer.subds[cmd_desc->idx_subd]; ++ ++ /* Tests the command with the cmdtest function */ ++ if (cmd_desc->flags & A4L_CMD_SIMUL) { ++ simul_flag = 1; ++ ++ if (!subd->do_cmdtest) { ++ __a4l_err("a4l_ioctl_cmd: driver's cmd_test NULL\n"); ++ ret = -EINVAL; ++ goto out_ioctl_cmd; ++ } ++ ++ ret = subd->do_cmdtest(subd, cmd_desc); ++ if (ret != 0) { ++ __a4l_err("a4l_ioctl_cmd: driver's cmd_test failed\n"); ++ goto out_ioctl_cmd; ++ } ++ __a4l_dbg(1, core_dbg, "driver's cmd checks passed\n"); ++ goto out_ioctl_cmd; ++ } ++ ++ ++ /* Gets the transfer system ready */ ++ ret = a4l_setup_buffer(ctx, cmd_desc); ++ if (ret < 0) ++ goto out_ioctl_cmd; ++ ++ /* Eventually launches the command */ ++ ret = subd->do_cmd(subd, cmd_desc); ++ ++ if (ret != 0) { ++ a4l_cancel_buffer(ctx); ++ goto out_ioctl_cmd; ++ } ++ ++ out_ioctl_cmd: ++ ++ if (simul_flag) { ++ /* copy the kernel based descriptor */ ++ tmp = cmd_desc->chan_descs; ++ /* return the user based descriptor */ ++ cmd_desc->chan_descs = chan_descs; ++ rtdm_safe_copy_to_user(rtdm_private_to_fd(ctx), arg, cmd_desc, ++ sizeof(struct a4l_cmd_desc)); ++ /* make sure we release the memory associated to the kernel */ ++ cmd_desc->chan_descs = tmp; ++ ++ } ++ ++ if (ret != 0 || simul_flag == 1) { ++ a4l_free_cmddesc(cmd_desc); ++ rtdm_free(cmd_desc); ++ } ++ ++ return ret; ++} +--- linux/drivers/xenomai/analogy/subdevice.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/analogy/subdevice.c 2021-04-07 16:01:27.881633232 +0800 +@@ -0,0 +1,449 @@ ++/* ++ * Analogy for Linux, subdevice, channel and range related features ++ * ++ * Copyright (C) 1997-2000 David A. Schleef ++ * Copyright (C) 2008 Alexis Berlemont ++ * ++ * Xenomai is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* --- Common ranges declarations --- */ ++ ++struct a4l_rngtab rng_bipolar10 = { 1, { ++ RANGE_V(-10, 10), ++ }}; ++struct a4l_rngdesc a4l_range_bipolar10 = RNG_GLOBAL(rng_bipolar10); ++ ++struct a4l_rngtab rng_bipolar5 = { 1, { ++ RANGE_V(-5, 5), ++ }}; ++struct a4l_rngdesc a4l_range_bipolar5 = RNG_GLOBAL(rng_bipolar5); ++ ++struct a4l_rngtab rng_unipolar10 = { 1, { ++ RANGE_V(0, 10), ++ }}; ++struct a4l_rngdesc a4l_range_unipolar10 = RNG_GLOBAL(rng_unipolar10); ++ ++struct a4l_rngtab rng_unipolar5 = { 1, { ++ RANGE_V(0, 5), ++ }}; ++struct a4l_rngdesc a4l_range_unipolar5 = RNG_GLOBAL(rng_unipolar5); ++ ++struct a4l_rngtab rng_unknown = { 1, { ++ RANGE(0, 1), ++ }}; ++struct a4l_rngdesc a4l_range_unknown = RNG_GLOBAL(rng_unknown); ++ ++struct a4l_rngtab rng_fake = { 0, { ++ RANGE(0, 0), ++ }}; ++struct a4l_rngdesc a4l_range_fake = RNG_GLOBAL(rng_fake); ++ ++/* --- Basic channel / range management functions --- */ ++ ++struct a4l_channel *a4l_get_chfeat(struct a4l_subdevice *sb, int idx) ++{ ++ int i = (sb->chan_desc->mode != A4L_CHAN_GLOBAL_CHANDESC) ? idx : 0; ++ return &(sb->chan_desc->chans[i]); ++} ++ ++struct a4l_range *a4l_get_rngfeat(struct a4l_subdevice *sb, int chidx, int rngidx) ++{ ++ int i = (sb->rng_desc->mode != A4L_RNG_GLOBAL_RNGDESC) ? chidx : 0; ++ return &(sb->rng_desc->rngtabs[i]->rngs[rngidx]); ++} ++ ++int a4l_check_chanlist(struct a4l_subdevice *subd, ++ unsigned char nb_chan, unsigned int *chans) ++{ ++ int i, j; ++ ++ if (nb_chan > subd->chan_desc->length) ++ return -EINVAL; ++ ++ for (i = 0; i < nb_chan; i++) { ++ j = (subd->chan_desc->mode != A4L_CHAN_GLOBAL_CHANDESC) ? i : 0; ++ ++ if (CR_CHAN(chans[i]) >= subd->chan_desc->length) { ++ __a4l_err("a4l_check_chanlist: " ++ "chan idx out_of range (%u>=%lu)\n", ++ CR_CHAN(chans[i]), subd->chan_desc->length); ++ return -EINVAL; ++ } ++ if (CR_AREF(chans[i]) != 0 && ++ (CR_AREF(chans[i]) & subd->chan_desc->chans[j].flags) == 0) ++ { ++ __a4l_err("a4l_check_chanlist: " ++ "bad channel type\n"); ++ return -EINVAL; ++ } ++ } ++ ++ if (subd->rng_desc == NULL) ++ return 0; ++ ++ for (i = 0; i < nb_chan; i++) { ++ j = (subd->rng_desc->mode != A4L_RNG_GLOBAL_RNGDESC) ? i : 0; ++ ++ if (CR_RNG(chans[i]) > subd->rng_desc->rngtabs[j]->length) { ++ __a4l_err("a4l_check_chanlist: " ++ "rng idx out_of range (%u>=%u)\n", ++ CR_RNG(chans[i]), ++ subd->rng_desc->rngtabs[j]->length); ++ return -EINVAL; ++ } ++ } ++ ++ return 0; ++} ++ ++/* --- Upper layer functions --- */ ++ ++struct a4l_subdevice * a4l_alloc_subd(int sizeof_priv, ++ void (*setup)(struct a4l_subdevice *)) ++{ ++ struct a4l_subdevice *subd; ++ ++ subd = rtdm_malloc(sizeof(struct a4l_subdevice) + sizeof_priv); ++ ++ if(subd != NULL) { ++ memset(subd, 0 , sizeof(struct a4l_subdevice) + sizeof_priv); ++ if(setup != NULL) ++ setup(subd); ++ } ++ ++ return subd; ++} ++ ++int a4l_add_subd(struct a4l_device * dev, struct a4l_subdevice * subd) ++{ ++ struct list_head *this; ++ int i = 0; ++ ++ /* Basic checking */ ++ if (dev == NULL || subd == NULL) ++ return -EINVAL; ++ ++ list_add_tail(&subd->list, &dev->subdvsq); ++ ++ subd->dev = dev; ++ ++ list_for_each(this, &dev->subdvsq) { ++ i++; ++ } ++ ++ subd->idx = --i; ++ ++ return i; ++} ++ ++struct a4l_subdevice *a4l_get_subd(struct a4l_device *dev, int idx) ++{ ++ int i = 0; ++ struct a4l_subdevice *subd = NULL; ++ struct list_head *this; ++ ++ /* This function is not optimized as we do not go through the ++ transfer structure */ ++ ++ list_for_each(this, &dev->subdvsq) { ++ if(idx == i++) ++ subd = list_entry(this, struct a4l_subdevice, list); ++ } ++ ++ return subd; ++} ++ ++/* --- IOCTL / FOPS functions --- */ ++ ++int a4l_ioctl_subdinfo(struct a4l_device_context * cxt, void *arg) ++{ ++ struct rtdm_fd *fd = rtdm_private_to_fd(cxt); ++ struct a4l_device *dev = a4l_get_dev(cxt); ++ int i, ret = 0; ++ a4l_sbinfo_t *subd_info; ++ ++ /* Basic checking */ ++ if (!test_bit(A4L_DEV_ATTACHED_NR, &dev->flags)) { ++ __a4l_err("a4l_ioctl_subdinfo: unattached device\n"); ++ return -EINVAL; ++ } ++ ++ subd_info = rtdm_malloc(dev->transfer.nb_subd * ++ sizeof(a4l_sbinfo_t)); ++ if (subd_info == NULL) ++ return -ENOMEM; ++ ++ for (i = 0; i < dev->transfer.nb_subd; i++) { ++ subd_info[i].flags = dev->transfer.subds[i]->flags; ++ subd_info[i].status = dev->transfer.subds[i]->status; ++ subd_info[i].nb_chan = ++ (dev->transfer.subds[i]->chan_desc != NULL) ? ++ dev->transfer.subds[i]->chan_desc->length : 0; ++ } ++ ++ if (rtdm_safe_copy_to_user(fd, ++ arg, ++ subd_info, dev->transfer.nb_subd * ++ sizeof(a4l_sbinfo_t)) != 0) ++ ret = -EFAULT; ++ ++ rtdm_free(subd_info); ++ ++ return ret; ++ ++} ++ ++int a4l_ioctl_nbchaninfo(struct a4l_device_context * cxt, void *arg) ++{ ++ struct rtdm_fd *fd = rtdm_private_to_fd(cxt); ++ struct a4l_device *dev = a4l_get_dev(cxt); ++ a4l_chinfo_arg_t inarg; ++ ++ /* Basic checking */ ++ if (!dev->flags & A4L_DEV_ATTACHED_NR) { ++ __a4l_err("a4l_ioctl_nbchaninfo: unattached device\n"); ++ return -EINVAL; ++ } ++ ++ if (rtdm_safe_copy_from_user(fd, ++ &inarg, arg, ++ sizeof(a4l_chinfo_arg_t)) != 0) ++ return -EFAULT; ++ ++ if (inarg.idx_subd >= dev->transfer.nb_subd) { ++ __a4l_err("a4l_ioctl_nbchaninfo: subdevice index " ++ "out of range\n"); ++ return -EINVAL; ++ } ++ ++ if(dev->transfer.subds[inarg.idx_subd]->chan_desc == NULL) ++ inarg.info = (void *)0; ++ else ++ inarg.info = (void *)(unsigned long) ++ dev->transfer.subds[inarg.idx_subd]->chan_desc->length; ++ ++ if (rtdm_safe_copy_to_user(fd, ++ arg, ++ &inarg, sizeof(a4l_chinfo_arg_t)) != 0) ++ return -EFAULT; ++ ++ return 0; ++} ++ ++int a4l_ioctl_chaninfo(struct a4l_device_context * cxt, void *arg) ++{ ++ struct rtdm_fd *fd = rtdm_private_to_fd(cxt); ++ int i, ret = 0; ++ struct a4l_device *dev = a4l_get_dev(cxt); ++ a4l_chinfo_t *chan_info; ++ a4l_chinfo_arg_t inarg; ++ struct a4l_channels_desc *chan_desc; ++ struct a4l_rngdesc *rng_desc; ++ ++ /* Basic checking */ ++ if (!test_bit(A4L_DEV_ATTACHED_NR, &dev->flags)) { ++ __a4l_err("a4l_ioctl_chaninfo: unattached device\n"); ++ return -EINVAL; ++ } ++ ++ if (rtdm_safe_copy_from_user(fd, ++ &inarg, arg, ++ sizeof(a4l_chinfo_arg_t)) != 0) ++ return -EFAULT; ++ ++ if (inarg.idx_subd >= dev->transfer.nb_subd) { ++ __a4l_err("a4l_ioctl_chaninfo: bad subdevice index\n"); ++ return -EINVAL; ++ } ++ ++ chan_desc = dev->transfer.subds[inarg.idx_subd]->chan_desc; ++ rng_desc = dev->transfer.subds[inarg.idx_subd]->rng_desc; ++ ++ if (chan_desc == NULL) { ++ __a4l_err("a4l_ioctl_chaninfo: no channel descriptor " ++ "for subdevice %d\n", inarg.idx_subd); ++ return -EINVAL; ++ } ++ ++ if(rng_desc == NULL) ++ rng_desc = &a4l_range_fake; ++ ++ chan_info = rtdm_malloc(chan_desc->length * sizeof(a4l_chinfo_t)); ++ if (chan_info == NULL) ++ return -ENOMEM; ++ ++ /* If the channel descriptor is global, the fields are filled ++ with the same instance of channel descriptor */ ++ for (i = 0; i < chan_desc->length; i++) { ++ int j = ++ (chan_desc->mode != A4L_CHAN_GLOBAL_CHANDESC) ? i : 0; ++ int k = (rng_desc->mode != A4L_RNG_GLOBAL_RNGDESC) ? i : 0; ++ ++ chan_info[i].chan_flags = chan_desc->chans[j].flags; ++ chan_info[i].nb_bits = chan_desc->chans[j].nb_bits; ++ chan_info[i].nb_rng = rng_desc->rngtabs[k]->length; ++ ++ if (chan_desc->mode == A4L_CHAN_GLOBAL_CHANDESC) ++ chan_info[i].chan_flags |= A4L_CHAN_GLOBAL; ++ } ++ ++ if (rtdm_safe_copy_to_user(fd, ++ inarg.info, ++ chan_info, ++ chan_desc->length * ++ sizeof(a4l_chinfo_t)) != 0) ++ return -EFAULT; ++ ++ rtdm_free(chan_info); ++ ++ return ret; ++} ++ ++int a4l_ioctl_nbrnginfo(struct a4l_device_context * cxt, void *arg) ++{ ++ int i; ++ struct rtdm_fd *fd = rtdm_private_to_fd(cxt); ++ struct a4l_device *dev = a4l_get_dev(cxt); ++ a4l_rnginfo_arg_t inarg; ++ struct a4l_rngdesc *rng_desc; ++ ++ /* Basic checking */ ++ if (!test_bit(A4L_DEV_ATTACHED_NR, &dev->flags)) { ++ __a4l_err("a4l_ioctl_nbrnginfo: unattached device\n"); ++ return -EINVAL; ++ } ++ ++ if (rtdm_safe_copy_from_user(fd, ++ &inarg, ++ arg, sizeof(a4l_rnginfo_arg_t)) != 0) ++ return -EFAULT; ++ ++ if (inarg.idx_subd >= dev->transfer.nb_subd) { ++ __a4l_err("a4l_ioctl_nbrnginfo: bad subdevice index\n"); ++ return -EINVAL; ++ } ++ ++ if (dev->transfer.subds[inarg.idx_subd]->chan_desc == NULL) { ++ __a4l_err("a4l_ioctl_nbrnginfo: no channel descriptor " ++ "for subdevice %d\n", inarg.idx_subd); ++ return -EINVAL; ++ } ++ ++ if (inarg.idx_chan >= ++ dev->transfer.subds[inarg.idx_subd]->chan_desc->length) { ++ __a4l_err("a4l_ioctl_nbrnginfo: bad channel index\n"); ++ return -EINVAL; ++ } ++ ++ rng_desc = dev->transfer.subds[inarg.idx_subd]->rng_desc; ++ if (rng_desc != NULL) { ++ i = (rng_desc->mode != A4L_RNG_GLOBAL_RNGDESC) ? ++ inarg.idx_chan : 0; ++ inarg.info = (void *)(unsigned long) ++ rng_desc->rngtabs[i]->length; ++ } else ++ inarg.info = (void *)0; ++ ++ ++ if (rtdm_safe_copy_to_user(fd, ++ arg, ++ &inarg, sizeof(a4l_rnginfo_arg_t)) != 0) ++ return -EFAULT; ++ ++ return 0; ++} ++ ++int a4l_ioctl_rnginfo(struct a4l_device_context * cxt, void *arg) ++{ ++ struct rtdm_fd *fd = rtdm_private_to_fd(cxt); ++ int i, ret = 0; ++ unsigned int tmp; ++ struct a4l_device *dev = a4l_get_dev(cxt); ++ struct a4l_rngdesc *rng_desc; ++ a4l_rnginfo_t *rng_info; ++ a4l_rnginfo_arg_t inarg; ++ ++ /* Basic checking */ ++ if (!test_bit(A4L_DEV_ATTACHED_NR, &dev->flags)) { ++ __a4l_err("a4l_ioctl_rnginfo: unattached device\n"); ++ return -EINVAL; ++ } ++ ++ if (rtdm_safe_copy_from_user(fd, ++ &inarg, ++ arg, sizeof(a4l_rnginfo_arg_t)) != 0) ++ return -EFAULT; ++ ++ if (inarg.idx_subd >= dev->transfer.nb_subd) { ++ __a4l_err("a4l_ioctl_rnginfo: bad subdevice index\n"); ++ return -EINVAL; ++ } ++ ++ if (dev->transfer.subds[inarg.idx_subd]->chan_desc == NULL) { ++ __a4l_err("a4l_ioctl_rnginfo: no channel descriptor " ++ "for subdevice %d\n", inarg.idx_subd); ++ return -EINVAL; ++ } ++ ++ if (inarg.idx_chan >= ++ dev->transfer.subds[inarg.idx_subd]->chan_desc->length) { ++ __a4l_err("a4l_ioctl_rnginfo: bad channel index\n"); ++ return -EINVAL; ++ } ++ ++ rng_desc = dev->transfer.subds[inarg.idx_subd]->rng_desc; ++ if (rng_desc == NULL) { ++ __a4l_err("a4l_ioctl_rnginfo: no range descriptor " ++ "for channel %d\n", inarg.idx_chan); ++ return -EINVAL; ++ } ++ ++ /* If the range descriptor is global, ++ we take the first instance */ ++ tmp = (rng_desc->mode != A4L_RNG_GLOBAL_RNGDESC) ? ++ inarg.idx_chan : 0; ++ ++ rng_info = rtdm_malloc(rng_desc->rngtabs[tmp]->length * ++ sizeof(a4l_rnginfo_t)); ++ if (rng_info == NULL) ++ return -ENOMEM; ++ ++ for (i = 0; i < rng_desc->rngtabs[tmp]->length; i++) { ++ rng_info[i].min = rng_desc->rngtabs[tmp]->rngs[i].min; ++ rng_info[i].max = rng_desc->rngtabs[tmp]->rngs[i].max; ++ rng_info[i].flags = rng_desc->rngtabs[tmp]->rngs[i].flags; ++ ++ if (rng_desc->mode == A4L_RNG_GLOBAL_RNGDESC) ++ rng_info[i].flags |= A4L_RNG_GLOBAL; ++ } ++ ++ if (rtdm_safe_copy_to_user(fd, ++ inarg.info, ++ rng_info, ++ rng_desc->rngtabs[tmp]->length * ++ sizeof(a4l_rnginfo_t)) != 0) ++ return -EFAULT; ++ ++ rtdm_free(rng_info); ++ ++ return ret; ++} +--- linux/drivers/xenomai/analogy/sensoray/Makefile 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/analogy/sensoray/Makefile 2021-04-07 16:01:27.877633237 +0800 +@@ -0,0 +1,6 @@ ++ ++ccflags-y += -Idrivers/xenomai/analogy ++ ++obj-$(CONFIG_XENO_DRIVERS_ANALOGY_S526) += analogy_s526.o ++ ++analogy_s526-y := s526.o +--- linux/drivers/xenomai/analogy/sensoray/s526.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/analogy/sensoray/s526.c 2021-04-07 16:01:27.872633245 +0800 +@@ -0,0 +1,756 @@ ++/* ++ * Analogy driver for Sensoray Model 526 board ++ * ++ * Copyright (C) 2009 Simon Boulay ++ * ++ * Derived from comedi: ++ * Copyright (C) 2000 David A. Schleef ++ * 2006 Everett Wang ++ * 2009 Ian Abbott ++ * ++ * This code is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * This code is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++/* ++ * Original code comes from comedi linux-next staging driver (2009.12.20) ++ * Board documentation: http://www.sensoray.com/products/526data.htm ++ * Everything should work as in comedi: ++ * - Encoder works ++ * - Analog input works ++ * - Analog output works ++ * - PWM output works ++ * - Commands are not supported yet. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++/* Board description */ ++#define S526_GPCT_CHANS 4 ++#define S526_GPCT_BITS 24 ++#define S526_AI_CHANS 10 /* 8 regular differential inputs ++ * channel 8 is "reference 0" (+10V) ++ * channel 9 is "reference 1" (0V) */ ++#define S526_AI_BITS 16 ++#define S526_AI_TIMEOUT 100 ++#define S526_AO_CHANS 4 ++#define S526_AO_BITS 16 ++#define S526_DIO_CHANS 8 ++#define S526_DIO_BITS 1 ++ ++/* Ports */ ++#define S526_IOSIZE 0x40 /* 64 bytes */ ++#define S526_DEFAULT_ADDRESS 0x2C0 /* Manufacturing default */ ++ ++/* Registers */ ++#define REG_TCR 0x00 ++#define REG_WDC 0x02 ++#define REG_DAC 0x04 ++#define REG_ADC 0x06 ++#define REG_ADD 0x08 ++#define REG_DIO 0x0A ++#define REG_IER 0x0C ++#define REG_ISR 0x0E ++#define REG_MSC 0x10 ++#define REG_C0L 0x12 ++#define REG_C0H 0x14 ++#define REG_C0M 0x16 ++#define REG_C0C 0x18 ++#define REG_C1L 0x1A ++#define REG_C1H 0x1C ++#define REG_C1M 0x1E ++#define REG_C1C 0x20 ++#define REG_C2L 0x22 ++#define REG_C2H 0x24 ++#define REG_C2M 0x26 ++#define REG_C2C 0x28 ++#define REG_C3L 0x2A ++#define REG_C3H 0x2C ++#define REG_C3M 0x2E ++#define REG_C3C 0x30 ++#define REG_EED 0x32 ++#define REG_EEC 0x34 ++ ++#define ISR_ADC_DONE 0x4 ++ ++struct counter_mode_register_t { ++#if defined (__LITTLE_ENDIAN_BITFIELD) ++ unsigned short coutSource:1; ++ unsigned short coutPolarity:1; ++ unsigned short autoLoadResetRcap:3; ++ unsigned short hwCtEnableSource:2; ++ unsigned short ctEnableCtrl:2; ++ unsigned short clockSource:2; ++ unsigned short countDir:1; ++ unsigned short countDirCtrl:1; ++ unsigned short outputRegLatchCtrl:1; ++ unsigned short preloadRegSel:1; ++ unsigned short reserved:1; ++#elif defined(__BIG_ENDIAN_BITFIELD) ++ unsigned short reserved:1; ++ unsigned short preloadRegSel:1; ++ unsigned short outputRegLatchCtrl:1; ++ unsigned short countDirCtrl:1; ++ unsigned short countDir:1; ++ unsigned short clockSource:2; ++ unsigned short ctEnableCtrl:2; ++ unsigned short hwCtEnableSource:2; ++ unsigned short autoLoadResetRcap:3; ++ unsigned short coutPolarity:1; ++ unsigned short coutSource:1; ++#else ++#error Unknown bit field order ++#endif ++}; ++ ++union cmReg { ++ struct counter_mode_register_t reg; ++ unsigned short value; ++}; ++ ++/* Application Classes for GPCT Subdevices */ ++enum S526_GPCT_APP_CLASS { ++ CountingAndTimeMeasurement, ++ SinglePulseGeneration, ++ PulseTrainGeneration, ++ PositionMeasurement, ++ Miscellaneous ++}; ++ ++/* GPCT subdevices configuration */ ++#define MAX_GPCT_CONFIG_DATA 6 ++struct s526GPCTConfig { ++ enum S526_GPCT_APP_CLASS app; ++ int data[MAX_GPCT_CONFIG_DATA]; ++}; ++ ++typedef struct s526_priv { ++ unsigned long io_base; ++} s526_priv_t; ++ ++struct s526_subd_gpct_priv { ++ struct s526GPCTConfig config[4]; ++}; ++ ++struct s526_subd_ai_priv { ++ uint16_t config; ++}; ++ ++struct s526_subd_ao_priv { ++ uint16_t readback[2]; ++}; ++ ++struct s526_subd_dio_priv { ++ int io_bits; ++ unsigned int state; ++}; ++ ++#define devpriv ((s526_priv_t*)(dev->priv)) ++ ++#define ADDR_REG(reg) (devpriv->io_base + (reg)) ++#define ADDR_CHAN_REG(reg, chan) (devpriv->io_base + (reg) + (chan) * 8) ++ ++ ++static int s526_gpct_insn_config(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn) ++{ ++ struct a4l_device *dev = subd->dev; ++ struct s526_subd_gpct_priv *subdpriv = ++ (struct s526_subd_gpct_priv *)subd->priv; ++ unsigned int *data = (unsigned int *)insn->data; ++ int subdev_channel = CR_CHAN(insn->chan_desc); ++ int i; ++ short value; ++ union cmReg cmReg; ++ ++ a4l_dbg(1, drv_dbg, dev, ++ "s526_gpct_insn_config: Configuring Channel %d\n", ++ subdev_channel); ++ ++ for (i = 0; i < MAX_GPCT_CONFIG_DATA; i++) { ++ subdpriv->config[subdev_channel].data[i] = data[i]; ++ a4l_dbg(1, drv_dbg, dev, "data[%d]=%x\n", i, data[i]); ++ } ++ ++ switch (data[0]) { ++ case A4L_INSN_CONFIG_GPCT_QUADRATURE_ENCODER: ++ /* ++ * data[0]: Application Type ++ * data[1]: Counter Mode Register Value ++ * data[2]: Pre-load Register Value ++ * data[3]: Conter Control Register ++ */ ++ a4l_dbg(1, drv_dbg, dev, "s526_gpct_insn_config: Configuring Encoder\n"); ++ subdpriv->config[subdev_channel].app = PositionMeasurement; ++ ++ /* Set Counter Mode Register */ ++ cmReg.value = data[1] & 0xFFFF; ++ ++ a4l_dbg(1, drv_dbg, dev, "Counter Mode register=%x\n", cmReg.value); ++ outw(cmReg.value, ADDR_CHAN_REG(REG_C0M, subdev_channel)); ++ ++ /* Reset the counter if it is software preload */ ++ if (cmReg.reg.autoLoadResetRcap == 0) { ++ outw(0x8000, ADDR_CHAN_REG(REG_C0C, subdev_channel)); /* Reset the counter */ ++ /* outw(0x4000, ADDR_CHAN_REG(REG_C0C, subdev_channel)); /\* Load the counter from PR0 *\/ */ ++ } ++ break; ++ ++ case A4L_INSN_CONFIG_GPCT_SINGLE_PULSE_GENERATOR: ++ /* ++ * data[0]: Application Type ++ * data[1]: Counter Mode Register Value ++ * data[2]: Pre-load Register 0 Value ++ * data[3]: Pre-load Register 1 Value ++ * data[4]: Conter Control Register ++ */ ++ a4l_dbg(1, drv_dbg, dev, "s526_gpct_insn_config: Configuring SPG\n"); ++ subdpriv->config[subdev_channel].app = SinglePulseGeneration; ++ ++ /* Set Counter Mode Register */ ++ cmReg.value = (short)(data[1] & 0xFFFF); ++ cmReg.reg.preloadRegSel = 0; /* PR0 */ ++ outw(cmReg.value, ADDR_CHAN_REG(REG_C0M, subdev_channel)); ++ ++ /* Load the pre-load register 0 high word */ ++ value = (short)((data[2] >> 16) & 0xFFFF); ++ outw(value, ADDR_CHAN_REG(REG_C0H, subdev_channel)); ++ ++ /* Load the pre-load register 0 low word */ ++ value = (short)(data[2] & 0xFFFF); ++ outw(value, ADDR_CHAN_REG(REG_C0L, subdev_channel)); ++ ++ /* Set Counter Mode Register */ ++ cmReg.value = (short)(data[1] & 0xFFFF); ++ cmReg.reg.preloadRegSel = 1; /* PR1 */ ++ outw(cmReg.value, ADDR_CHAN_REG(REG_C0M, subdev_channel)); ++ ++ /* Load the pre-load register 1 high word */ ++ value = (short)((data[3] >> 16) & 0xFFFF); ++ outw(value, ADDR_CHAN_REG(REG_C0H, subdev_channel)); ++ ++ /* Load the pre-load register 1 low word */ ++ value = (short)(data[3] & 0xFFFF); ++ outw(value, ADDR_CHAN_REG(REG_C0L, subdev_channel)); ++ ++ /* Write the Counter Control Register */ ++ if (data[4] != 0) { ++ value = (short)(data[4] & 0xFFFF); ++ outw(value, ADDR_CHAN_REG(REG_C0C, subdev_channel)); ++ } ++ break; ++ ++ case A4L_INSN_CONFIG_GPCT_PULSE_TRAIN_GENERATOR: ++ /* ++ * data[0]: Application Type ++ * data[1]: Counter Mode Register Value ++ * data[2]: Pre-load Register 0 Value ++ * data[3]: Pre-load Register 1 Value ++ * data[4]: Conter Control Register ++ */ ++ a4l_dbg(1, drv_dbg, dev, "s526_gpct_insn_config: Configuring PTG\n"); ++ subdpriv->config[subdev_channel].app = PulseTrainGeneration; ++ ++ /* Set Counter Mode Register */ ++ cmReg.value = (short)(data[1] & 0xFFFF); ++ cmReg.reg.preloadRegSel = 0; /* PR0 */ ++ outw(cmReg.value, ADDR_CHAN_REG(REG_C0M, subdev_channel)); ++ ++ /* Load the pre-load register 0 high word */ ++ value = (short)((data[2] >> 16) & 0xFFFF); ++ outw(value, ADDR_CHAN_REG(REG_C0H, subdev_channel)); ++ ++ /* Load the pre-load register 0 low word */ ++ value = (short)(data[2] & 0xFFFF); ++ outw(value, ADDR_CHAN_REG(REG_C0L, subdev_channel)); ++ ++ /* Set Counter Mode Register */ ++ cmReg.value = (short)(data[1] & 0xFFFF); ++ cmReg.reg.preloadRegSel = 1; /* PR1 */ ++ outw(cmReg.value, ADDR_CHAN_REG(REG_C0M, subdev_channel)); ++ ++ /* Load the pre-load register 1 high word */ ++ value = (short)((data[3] >> 16) & 0xFFFF); ++ outw(value, ADDR_CHAN_REG(REG_C0H, subdev_channel)); ++ ++ /* Load the pre-load register 1 low word */ ++ value = (short)(data[3] & 0xFFFF); ++ outw(value, ADDR_CHAN_REG(REG_C0L, subdev_channel)); ++ ++ /* Write the Counter Control Register */ ++ if (data[4] != 0) { ++ value = (short)(data[4] & 0xFFFF); ++ outw(value, ADDR_CHAN_REG(REG_C0C, subdev_channel)); ++ } ++ break; ++ ++ default: ++ a4l_err(dev, "s526_gpct_insn_config: unsupported GPCT_insn_config\n"); ++ return -EINVAL; ++ break; ++ } ++ ++ return 0; ++} ++ ++static int s526_gpct_rinsn(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn) ++{ ++ struct a4l_device *dev = subd->dev; ++ uint32_t *data = (uint32_t *)insn->data; ++ int counter_channel = CR_CHAN(insn->chan_desc); ++ unsigned short datalow; ++ unsigned short datahigh; ++ int i; ++ ++ if (insn->data_size <= 0) { ++ a4l_err(dev, "s526_gpct_rinsn: data size should be > 0\n"); ++ return -EINVAL; ++ } ++ ++ for (i = 0; i < insn->data_size / sizeof(uint32_t); i++) { ++ datalow = inw(ADDR_CHAN_REG(REG_C0L, counter_channel)); ++ datahigh = inw(ADDR_CHAN_REG(REG_C0H, counter_channel)); ++ data[i] = (int)(datahigh & 0x00FF); ++ data[i] = (data[i] << 16) | (datalow & 0xFFFF); ++ a4l_dbg(1, drv_dbg, dev, ++ "s526_gpct_rinsn GPCT[%d]: %x(0x%04x, 0x%04x)\n", ++ counter_channel, data[i], datahigh, datalow); ++ } ++ ++ return 0; ++} ++ ++static int s526_gpct_winsn(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn) ++{ ++ struct a4l_device *dev = subd->dev; ++ struct s526_subd_gpct_priv *subdpriv = ++ (struct s526_subd_gpct_priv *)subd->priv; ++ uint32_t *data = (uint32_t *)insn->data; ++ int subdev_channel = CR_CHAN(insn->chan_desc); ++ short value; ++ union cmReg cmReg; ++ ++ a4l_dbg(1, drv_dbg, dev, ++ "s526_gpct_winsn: GPCT_INSN_WRITE on channel %d\n", ++ subdev_channel); ++ ++ cmReg.value = inw(ADDR_CHAN_REG(REG_C0M, subdev_channel)); ++ a4l_dbg(1, drv_dbg, dev, ++ "s526_gpct_winsn: Counter Mode Register: %x\n", cmReg.value); ++ ++ /* Check what Application of Counter this channel is configured for */ ++ switch (subdpriv->config[subdev_channel].app) { ++ case PositionMeasurement: ++ a4l_dbg(1, drv_dbg, dev, "s526_gpct_winsn: INSN_WRITE: PM\n"); ++ outw(0xFFFF & ((*data) >> 16), ADDR_CHAN_REG(REG_C0H, ++ subdev_channel)); ++ outw(0xFFFF & (*data), ++ ADDR_CHAN_REG(REG_C0L, subdev_channel)); ++ break; ++ ++ case SinglePulseGeneration: ++ a4l_dbg(1, drv_dbg, dev, "s526_gpct_winsn: INSN_WRITE: SPG\n"); ++ outw(0xFFFF & ((*data) >> 16), ADDR_CHAN_REG(REG_C0H, ++ subdev_channel)); ++ outw(0xFFFF & (*data), ++ ADDR_CHAN_REG(REG_C0L, subdev_channel)); ++ break; ++ ++ case PulseTrainGeneration: ++ /* ++ * data[0] contains the PULSE_WIDTH ++ * data[1] contains the PULSE_PERIOD ++ * @pre PULSE_PERIOD > PULSE_WIDTH > 0 ++ * The above periods must be expressed as a multiple of the ++ * pulse frequency on the selected source ++ */ ++ a4l_dbg(1, drv_dbg, dev, "s526_gpct_winsn: INSN_WRITE: PTG\n"); ++ if ((data[1] > data[0]) && (data[0] > 0)) { ++ (subdpriv->config[subdev_channel]).data[0] = data[0]; ++ (subdpriv->config[subdev_channel]).data[1] = data[1]; ++ } else { ++ a4l_err(dev, ++ "s526_gpct_winsn: INSN_WRITE: PTG: Problem with Pulse params -> %du %du\n", ++ data[0], data[1]); ++ return -EINVAL; ++ } ++ ++ value = (short)((*data >> 16) & 0xFFFF); ++ outw(value, ADDR_CHAN_REG(REG_C0H, subdev_channel)); ++ value = (short)(*data & 0xFFFF); ++ outw(value, ADDR_CHAN_REG(REG_C0L, subdev_channel)); ++ break; ++ default: /* Impossible */ ++ a4l_err(dev, ++ "s526_gpct_winsn: INSN_WRITE: Functionality %d not implemented yet\n", ++ subdpriv->config[subdev_channel].app); ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ ++static int s526_ai_insn_config(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn) ++{ ++ struct a4l_device *dev = subd->dev; ++ struct s526_subd_ai_priv *subdpriv = ++ (struct s526_subd_ai_priv *)subd->priv; ++ unsigned int *data = (unsigned int *)insn->data; ++ ++ if (insn->data_size < sizeof(unsigned int)) ++ return -EINVAL; ++ ++ /* data[0] : channels was set in relevant bits. ++ * data[1] : delay ++ */ ++ /* COMMENT: abbotti 2008-07-24: I don't know why you'd want to ++ * enable channels here. The channel should be enabled in the ++ * INSN_READ handler. */ ++ ++ /* Enable ADC interrupt */ ++ outw(ISR_ADC_DONE, ADDR_REG(REG_IER)); ++ a4l_dbg(1, drv_dbg, dev, ++ "s526_ai_insn_config: ADC current value: 0x%04x\n", ++ inw(ADDR_REG(REG_ADC))); ++ ++ subdpriv->config = (data[0] & 0x3FF) << 5; ++ if (data[1] > 0) ++ subdpriv->config |= 0x8000; /* set the delay */ ++ ++ subdpriv->config |= 0x0001; /* ADC start bit. */ ++ ++ return 0; ++} ++ ++static int s526_ai_rinsn(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn) ++{ ++ struct a4l_device *dev = subd->dev; ++ struct s526_subd_ai_priv *subdpriv = ++ (struct s526_subd_ai_priv *)subd->priv; ++ uint16_t *data = (uint16_t *)insn->data; ++ int n, i; ++ int chan = CR_CHAN(insn->chan_desc); ++ uint16_t value; ++ uint16_t d; ++ uint16_t status; ++ ++ /* Set configured delay, enable channel for this channel only, ++ * select "ADC read" channel, set "ADC start" bit. */ ++ value = (subdpriv->config & 0x8000) | ++ ((1 << 5) << chan) | (chan << 1) | 0x0001; ++ ++ /* convert n samples */ ++ for (n = 0; n < insn->data_size / sizeof(uint16_t); n++) { ++ /* trigger conversion */ ++ outw(value, ADDR_REG(REG_ADC)); ++ a4l_dbg(1, drv_dbg, dev, "s526_ai_rinsn: Wrote 0x%04x to ADC\n", ++ value); ++ ++ /* wait for conversion to end */ ++ for (i = 0; i < S526_AI_TIMEOUT; i++) { ++ status = inw(ADDR_REG(REG_ISR)); ++ if (status & ISR_ADC_DONE) { ++ outw(ISR_ADC_DONE, ADDR_REG(REG_ISR)); ++ break; ++ } ++ } ++ if (i == S526_AI_TIMEOUT) { ++ a4l_warn(dev, "s526_ai_rinsn: ADC(0x%04x) timeout\n", ++ inw(ADDR_REG(REG_ISR))); ++ return -ETIMEDOUT; ++ } ++ ++ /* read data */ ++ d = inw(ADDR_REG(REG_ADD)); ++ a4l_dbg(1, drv_dbg, dev, "s526_ai_rinsn: AI[%d]=0x%04x\n", ++ n, (uint16_t)(d & 0xFFFF)); ++ ++ /* munge data */ ++ data[n] = d ^ 0x8000; ++ } ++ ++ return 0; ++} ++ ++static int s526_ao_winsn(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn) ++{ ++ struct a4l_device *dev = subd->dev; ++ struct s526_subd_ao_priv *subdpriv = ++ (struct s526_subd_ao_priv *)subd->priv; ++ uint16_t *data = (uint16_t *)insn->data; ++ int i; ++ int chan = CR_CHAN(insn->chan_desc); ++ uint16_t val; ++ ++ val = chan << 1; ++ outw(val, ADDR_REG(REG_DAC)); ++ ++ for (i = 0; i < insn->data_size / sizeof(uint16_t); i++) { ++ outw(data[i], ADDR_REG(REG_ADD)); /* write the data to preload register */ ++ subdpriv->readback[chan] = data[i]; ++ outw(val + 1, ADDR_REG(REG_DAC)); /* starts the D/A conversion. */ ++ } ++ ++ return 0; ++} ++ ++static int s526_ao_rinsn(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn) ++{ ++ struct s526_subd_ao_priv *subdpriv = ++ (struct s526_subd_ao_priv *)subd->priv; ++ uint16_t *data = (uint16_t *)insn->data; ++ int i; ++ int chan = CR_CHAN(insn->chan_desc); ++ ++ for (i = 0; i < insn->data_size / sizeof(uint16_t); i++) ++ data[i] = subdpriv->readback[chan]; ++ ++ return 0; ++} ++ ++static int s526_dio_insn_config(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn) ++{ ++ struct a4l_device *dev = subd->dev; ++ struct s526_subd_dio_priv *subdpriv = ++ (struct s526_subd_dio_priv *)subd->priv; ++ unsigned int *data = (unsigned int *)insn->data; ++ int chan = CR_CHAN(insn->chan_desc); ++ int group, mask; ++ ++ group = chan >> 2; ++ mask = 0xF << (group << 2); ++ ++ switch (data[0]) { ++ case A4L_INSN_CONFIG_DIO_OUTPUT: ++ subdpriv->state |= 1 << (group + 10); /* bit 10/11 set the ++ * group 1/2's mode */ ++ subdpriv->io_bits |= mask; ++ break; ++ case A4L_INSN_CONFIG_DIO_INPUT: ++ subdpriv->state &= ~(1 << (group + 10)); /* 1 is output, 0 is ++ * input. */ ++ subdpriv->io_bits &= ~mask; ++ break; ++ case A4L_INSN_CONFIG_DIO_QUERY: ++ data[1] = ++ (subdpriv->io_bits & mask) ? A4L_OUTPUT : A4L_INPUT; ++ return 0; ++ default: ++ return -EINVAL; ++ } ++ ++ outw(subdpriv->state, ADDR_REG(REG_DIO)); ++ ++ return 0; ++} ++ ++static int s526_dio_insn_bits(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn) ++{ ++ struct a4l_device *dev = subd->dev; ++ struct s526_subd_dio_priv *subdpriv = ++ (struct s526_subd_dio_priv *)subd->priv; ++ uint8_t *data = (uint8_t *)insn->data; ++ ++ if (insn->data_size != 2 * sizeof(uint8_t)) ++ return -EINVAL; ++ ++ if (data[0]) { ++ subdpriv->state &= ~(data[0]); ++ subdpriv->state |= data[0] & data[1]; ++ ++ outw(subdpriv->state, ADDR_REG(REG_DIO)); ++ } ++ ++ data[1] = inw(ADDR_REG(REG_DIO)) & 0xFF; /* low 8 bits are the data */ ++ ++ return 0; ++} ++ ++/* --- Channels descriptor --- */ ++ ++static struct a4l_channels_desc s526_chan_desc_gpct = { ++ .mode = A4L_CHAN_GLOBAL_CHANDESC, ++ .length = S526_GPCT_CHANS, ++ .chans = { ++ {A4L_CHAN_AREF_GROUND, S526_GPCT_BITS}, ++ }, ++}; ++ ++static struct a4l_channels_desc s526_chan_desc_ai = { ++ .mode = A4L_CHAN_GLOBAL_CHANDESC, ++ .length = S526_AI_CHANS, ++ .chans = { ++ {A4L_CHAN_AREF_GROUND, S526_AI_BITS}, ++ }, ++}; ++ ++static struct a4l_channels_desc s526_chan_desc_ao = { ++ .mode = A4L_CHAN_GLOBAL_CHANDESC, ++ .length = S526_AO_CHANS, ++ .chans = { ++ {A4L_CHAN_AREF_GROUND, S526_AO_BITS}, ++ }, ++}; ++ ++static struct a4l_channels_desc s526_chan_desc_dio = { ++ .mode = A4L_CHAN_GLOBAL_CHANDESC, ++ .length = S526_DIO_CHANS, ++ .chans = { ++ {A4L_CHAN_AREF_GROUND, S526_DIO_BITS}, ++ }, ++}; ++ ++/* --- Subdevice initialization functions --- */ ++ ++/* General purpose counter/timer (gpct) */ ++static void setup_subd_gpct(struct a4l_subdevice *subd) ++{ ++ subd->flags = A4L_SUBD_COUNTER; ++ subd->chan_desc = &s526_chan_desc_gpct; ++ subd->insn_read = s526_gpct_rinsn; ++ subd->insn_config = s526_gpct_insn_config; ++ subd->insn_write = s526_gpct_winsn; ++} ++ ++/* Analog input subdevice */ ++static void setup_subd_ai(struct a4l_subdevice *subd) ++{ ++ subd->flags = A4L_SUBD_AI; ++ subd->chan_desc = &s526_chan_desc_ai; ++ subd->rng_desc = &a4l_range_bipolar10; ++ subd->insn_read = s526_ai_rinsn; ++ subd->insn_config = s526_ai_insn_config; ++} ++ ++/* Analog output subdevice */ ++static void setup_subd_ao(struct a4l_subdevice *subd) ++{ ++ subd->flags = A4L_SUBD_AO; ++ subd->chan_desc = &s526_chan_desc_ao; ++ subd->rng_desc = &a4l_range_bipolar10; ++ subd->insn_write = s526_ao_winsn; ++ subd->insn_read = s526_ao_rinsn; ++} ++ ++/* Digital i/o subdevice */ ++static void setup_subd_dio(struct a4l_subdevice *subd) ++{ ++ subd->flags = A4L_SUBD_DIO; ++ subd->chan_desc = &s526_chan_desc_dio; ++ subd->rng_desc = &range_digital; ++ subd->insn_bits = s526_dio_insn_bits; ++ subd->insn_config = s526_dio_insn_config; ++} ++ ++struct setup_subd { ++ void (*setup_func) (struct a4l_subdevice *); ++ int sizeof_priv; ++}; ++ ++static struct setup_subd setup_subds[4] = { ++ { ++ .setup_func = setup_subd_gpct, ++ .sizeof_priv = sizeof(struct s526_subd_gpct_priv), ++ }, ++ { ++ .setup_func = setup_subd_ai, ++ .sizeof_priv = sizeof(struct s526_subd_ai_priv), ++ }, ++ { ++ .setup_func = setup_subd_ao, ++ .sizeof_priv = sizeof(struct s526_subd_ao_priv), ++ }, ++ { ++ .setup_func = setup_subd_dio, ++ .sizeof_priv = sizeof(struct s526_subd_dio_priv), ++ }, ++}; ++ ++static int dev_s526_attach(struct a4l_device *dev, a4l_lnkdesc_t *arg) ++{ ++ int io_base; ++ int i; ++ int err = 0; ++ ++ if (arg->opts == NULL || arg->opts_size < sizeof(unsigned long)) { ++ a4l_warn(dev, ++ "dev_s526_attach: no attach options specified; " ++ "using defaults: addr=0x%x\n", ++ S526_DEFAULT_ADDRESS); ++ io_base = S526_DEFAULT_ADDRESS; ++ } else { ++ io_base = ((unsigned long *)arg->opts)[0]; ++ } ++ ++ if (!request_region(io_base, S526_IOSIZE, "s526")) { ++ a4l_err(dev, "dev_s526_attach: I/O port conflict\n"); ++ return -EIO; ++ } ++ ++ /* Allocate the subdevice structures. */ ++ for (i = 0; i < 4; i++) { ++ struct a4l_subdevice *subd = a4l_alloc_subd(setup_subds[i].sizeof_priv, ++ setup_subds[i].setup_func); ++ ++ if (subd == NULL) ++ return -ENOMEM; ++ ++ err = a4l_add_subd(dev, subd); ++ if (err != i) ++ return err; ++ } ++ ++ devpriv->io_base = io_base; ++ ++ a4l_info(dev, " attached (address = 0x%x)\n", io_base); ++ ++ return 0; ++} ++ ++static int dev_s526_detach(struct a4l_device *dev) ++{ ++ int err = 0; ++ ++ if (devpriv->io_base != 0) ++ release_region(devpriv->io_base, S526_IOSIZE); ++ ++ return err; ++} ++ ++static struct a4l_driver drv_s526 = { ++ .owner = THIS_MODULE, ++ .board_name = "analogy_s526", ++ .driver_name = "s526", ++ .attach = dev_s526_attach, ++ .detach = dev_s526_detach, ++ .privdata_size = sizeof(s526_priv_t), ++}; ++ ++static int __init drv_s526_init(void) ++{ ++ return a4l_register_drv(&drv_s526); ++} ++ ++static void __exit drv_s526_cleanup(void) ++{ ++ a4l_unregister_drv(&drv_s526); ++} ++ ++MODULE_DESCRIPTION("Analogy driver for Sensoray Model 526 board."); ++MODULE_LICENSE("GPL"); ++ ++module_init(drv_s526_init); ++module_exit(drv_s526_cleanup); +--- linux/drivers/xenomai/analogy/sensoray/Kconfig 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/analogy/sensoray/Kconfig 2021-04-07 16:01:27.867633252 +0800 +@@ -0,0 +1,5 @@ ++ ++config XENO_DRIVERS_ANALOGY_S526 ++ depends on XENO_DRIVERS_ANALOGY ++ tristate "Sensoray Model 526 driver" ++ default n +--- linux/drivers/xenomai/analogy/driver_facilities.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/analogy/driver_facilities.c 2021-04-07 16:01:27.863633257 +0800 +@@ -0,0 +1,608 @@ ++/* ++ * Analogy for Linux, driver facilities ++ * ++ * Copyright (C) 1997-2000 David A. Schleef ++ * Copyright (C) 2008 Alexis Berlemont ++ * ++ * Xenomai is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#include ++#include ++#include ++ ++/** ++ * @ingroup cobalt ++ * @defgroup analogy Analogy framework ++ * A RTDM-based interface for implementing DAQ card drivers ++ */ ++ ++/** ++ * @ingroup analogy ++ * @defgroup analogy_driver_facilities Driver API ++ * Programming interface provided to DAQ card drivers ++ */ ++ ++/* --- Driver section --- */ ++ ++/** ++ * @ingroup analogy_driver_facilities ++ * @defgroup analogy_driver Driver management services ++ * ++ * Analogy driver registration / unregistration ++ * ++ * In a common Linux char driver, the developer has to register a fops ++ * structure filled with callbacks for read / write / mmap / ioctl ++ * operations. ++ * ++ * Analogy drivers do not have to implement read / write / mmap / ++ * ioctl functions, these procedures are implemented in the Analogy ++ * generic layer. Then, the transfers between user-space and ++ * kernel-space are already managed. Analogy drivers work with commands ++ * and instructions which are some kind of more dedicated read / write ++ * operations. And, instead of registering a fops structure, a Analogy ++ * driver must register some a4l_driver structure. ++ * ++ * @{ ++ */ ++ ++/** ++ * @brief Register an Analogy driver ++ * ++ * After initialising a driver structure, the driver must be made ++ * available so as to be attached. ++ * ++ * @param[in] drv Driver descriptor structure ++ * ++ * @return 0 on success, otherwise negative error code. ++ * ++ */ ++int a4l_register_drv(struct a4l_driver * drv); ++EXPORT_SYMBOL_GPL(a4l_register_drv); ++ ++/** ++ * @brief Unregister an Analogy driver ++ * ++ * This function removes the driver descriptor from the Analogy driver ++ * list. The driver cannot be attached anymore. ++ * ++ * @param[in] drv Driver descriptor structure ++ * ++ * @return 0 on success, otherwise negative error code. ++ * ++ */ ++int a4l_unregister_drv(struct a4l_driver * drv); ++EXPORT_SYMBOL_GPL(a4l_unregister_drv); ++ ++/** @} */ ++ ++/* --- Subdevice section --- */ ++ ++/** ++ * @ingroup analogy_driver_facilities ++ * @defgroup analogy_subdevice Subdevice management services ++ * ++ * Subdevice declaration in a driver ++ * ++ * The subdevice structure is the most complex one in the Analogy ++ * driver layer. It contains some description fields to fill and some ++ * callbacks to declare. ++ * ++ * The description fields are: ++ * - flags: to define the subdevice type and its capabilities; ++ * - chan_desc: to describe the channels which compose the subdevice; ++ * - rng_desc: to declare the usable ranges; ++ * ++ * The functions callbakcs are: ++ * - do_cmd() and do_cmdtest(): to performe asynchronous acquisitions ++ * thanks to commands; ++ * - cancel(): to abort a working asynchronous acquisition; ++ * - munge(): to apply modifications on the data freshly acquired ++ * during an asynchronous transfer. Warning: using this feature with ++ * can significantly reduce the performances (if the munge operation ++ * is complex, it will trigger high CPU charge and if the ++ * acquisition device is DMA capable, many cache-misses and ++ * cache-replaces will occur (the benefits of the DMA controller ++ * will vanish); ++ * - trigger(): optionnaly to launch an asynchronous acquisition; ++ * - insn_read(), insn_write(), insn_bits(), insn_config(): to perform ++ * synchronous acquisition operations. ++ * ++ * Once the subdevice is filled, it must be inserted into the driver ++ * structure thanks to a4l_add_subd(). ++ * ++ * @{ ++ */ ++ ++EXPORT_SYMBOL_GPL(a4l_range_bipolar10); ++EXPORT_SYMBOL_GPL(a4l_range_bipolar5); ++EXPORT_SYMBOL_GPL(a4l_range_unipolar10); ++EXPORT_SYMBOL_GPL(a4l_range_unipolar5); ++EXPORT_SYMBOL_GPL(a4l_range_unknown); ++EXPORT_SYMBOL_GPL(a4l_range_fake); ++ ++/** ++ * @brief Allocate a subdevice descriptor ++ * ++ * This is a helper function so as to get a suitable subdevice ++ * descriptor ++ * ++ * @param[in] sizeof_priv Size of the subdevice's private data ++ * @param[in] setup Setup function to be called after the allocation ++ * ++ * @return the index with which the subdevice has been registered, in ++ * case of error a negative error code is returned. ++ * ++ */ ++struct a4l_subdevice * a4l_alloc_subd(int sizeof_priv, ++ void (*setup)(struct a4l_subdevice *)); ++EXPORT_SYMBOL_GPL(a4l_alloc_subd); ++ ++/** ++ * @brief Add a subdevice to the driver descriptor ++ * ++ * Once the driver descriptor structure is initialized, the function ++ * a4l_add_subd() must be used so to add some subdevices to the ++ * driver. ++ * ++ * @param[in] dev Device descriptor structure ++ * @param[in] subd Subdevice descriptor structure ++ * ++ * @return the index with which the subdevice has been registered, in ++ * case of error a negative error code is returned. ++ * ++ */ ++int a4l_add_subd(struct a4l_device *dev, struct a4l_subdevice *subd); ++EXPORT_SYMBOL_GPL(a4l_add_subd); ++ ++/** ++ * @brief Get a pointer to the subdevice descriptor referenced by its ++ * registration index ++ * ++ * This function is scarcely useful as all the drivers callbacks get ++ * the related subdevice descriptor as first argument. ++ * This function is not optimized, it goes through a linked list to ++ * get the proper pointer. So it must not be used in real-time context ++ * but at initialization / cleanup time (attach / detach). ++ * ++ * @param[in] dev Device descriptor structure ++ * @param[in] idx Subdevice index ++ * ++ * @return 0 on success, otherwise negative error code. ++ * ++ */ ++struct a4l_subdevice *a4l_get_subd(struct a4l_device *dev, int idx); ++EXPORT_SYMBOL_GPL(a4l_get_subd); ++ ++/** @} */ ++ ++/* --- Buffer section --- */ ++ ++/** ++ * @ingroup analogy_driver_facilities ++ * @defgroup analogy_buffer Buffer management services ++ * ++ * Buffer management services ++ * ++ * The buffer is the key component of the Analogy infrastructure. It ++ * manages transfers between the user-space and the Analogy drivers ++ * thanks to generic functions which are described hereafter. Thanks ++ * to the buffer subsystem, the driver developer does not have to care ++ * about the way the user program retrieves or sends data. ++ * ++ * To write a classical char driver, the developer has to fill a fops ++ * structure so as to provide transfer operations to the user program ++ * (read, write, ioctl and mmap if need be). ++ * ++ * The Analogy infrastructure manages the whole interface with the ++ * userspace; the common read, write, mmap, etc. callbacks are generic ++ * Analogy functions. These functions manage (and perform, if need be) ++ * tranfers between the user-space and an asynchronous buffer thanks ++ * to lockless mechanisms. ++ * ++ * Consequently, the developer has to use the proper buffer functions ++ * in order to write / read acquired data into / from the asynchronous ++ * buffer. ++ * ++ * Here are listed the functions: ++ * - a4l_buf_prepare_(abs)put() and a4l_buf_commit_(abs)put() ++ * - a4l_buf_prepare_(abs)get() and a4l_buf_commit_(abs)get() ++ * - a4l_buf_put() ++ * - a4l_buf_get() ++ * - a4l_buf_evt(). ++ * ++ * The functions count might seem high; however, the developer needs a ++ * few of them to write a driver. Having so many functions enables to ++ * manage any transfer cases: ++ * - If some DMA controller is available, there is no need to make the ++ * driver copy the acquired data into the asynchronous buffer, the ++ * DMA controller must directly trigger DMA shots into / from the ++ * buffer. In that case, a function a4l_buf_prepare_*() must be used ++ * so as to set up the DMA transfer and a function ++ * a4l_buf_commit_*() has to be called to complete the transfer(). ++ * - For DMA controllers which need to work with global counter (the ++ * transfered data count since the beginning of the acquisition), ++ * the functions a4l_buf_*_abs_*() have been made available. ++ * - If no DMA controller is available, the driver has to perform the ++ * copy between the hardware component and the asynchronous ++ * buffer. In such cases, the functions a4l_buf_get() and ++ * a4l_buf_put() are useful. ++ * ++ * @{ ++ */ ++ ++/** ++ * @brief Update the absolute count of data sent from the device to ++ * the buffer since the start of the acquisition and after the next ++ * DMA shot ++ * ++ * The functions a4l_buf_prepare_(abs)put(), ++ * a4l_buf_commit_(abs)put(), a4l_buf_prepare_(abs)get() and ++ * a4l_buf_commit_(absg)et() have been made available for DMA ++ * transfers. In such situations, no data copy is needed between the ++ * Analogy buffer and the device as some DMA controller is in charge ++ * of performing data shots from / to the Analogy buffer. However, some ++ * pointers still have to be updated so as to monitor the tranfers. ++ * ++ * @param[in] subd Subdevice descriptor structure ++ * @param[in] count The data count to be transferred during the next ++ * DMA shot plus the data count which have been copied since the start ++ * of the acquisition ++ * ++ * @return 0 on success, otherwise negative error code. ++ * ++ */ ++int a4l_buf_prepare_absput(struct a4l_subdevice *subd, unsigned long count); ++EXPORT_SYMBOL_GPL(a4l_buf_prepare_absput); ++ ++/** ++ * @brief Set the absolute count of data which was sent from the ++ * device to the buffer since the start of the acquisition and until ++ * the last DMA shot ++ * ++ * The functions a4l_buf_prepare_(abs)put(), ++ * a4l_buf_commit_(abs)put(), a4l_buf_prepare_(abs)get() and ++ * a4l_buf_commit_(abs)get() have been made available for DMA ++ * transfers. In such situations, no data copy is needed between the ++ * Analogy buffer and the device as some DMA controller is in charge ++ * of performing data shots from / to the Analogy buffer. However, ++ * some pointers still have to be updated so as to monitor the ++ * tranfers. ++ * ++ * @param[in] subd Subdevice descriptor structure ++ * @param[in] count The data count transferred to the buffer during ++ * the last DMA shot plus the data count which have been sent / ++ * retrieved since the beginning of the acquisition ++ * ++ * @return 0 on success, otherwise negative error code. ++ * ++ */ ++int a4l_buf_commit_absput(struct a4l_subdevice *subd, unsigned long count); ++EXPORT_SYMBOL_GPL(a4l_buf_commit_absput); ++ ++/** ++ * @brief Set the count of data which is to be sent to the buffer at ++ * the next DMA shot ++ * ++ * The functions a4l_buf_prepare_(abs)put(), ++ * a4l_buf_commit_(abs)put(), a4l_buf_prepare_(abs)get() and ++ * a4l_buf_commit_(abs)get() have been made available for DMA ++ * transfers. In such situations, no data copy is needed between the ++ * Analogy buffer and the device as some DMA controller is in charge ++ * of performing data shots from / to the Analogy buffer. However, ++ * some pointers still have to be updated so as to monitor the ++ * tranfers. ++ * ++ * @param[in] subd Subdevice descriptor structure ++ * @param[in] count The data count to be transferred ++ * ++ * @return 0 on success, otherwise negative error code. ++ * ++ */ ++int a4l_buf_prepare_put(struct a4l_subdevice *subd, unsigned long count); ++EXPORT_SYMBOL_GPL(a4l_buf_prepare_put); ++ ++/** ++ * @brief Set the count of data sent to the buffer during the last ++ * completed DMA shots ++ * ++ * The functions a4l_buf_prepare_(abs)put(), ++ * a4l_buf_commit_(abs)put(), a4l_buf_prepare_(abs)get() and ++ * a4l_buf_commit_(abs)get() have been made available for DMA ++ * transfers. In such situations, no data copy is needed between the ++ * Analogy buffer and the device as some DMA controller is in charge ++ * of performing data shots from / to the Analogy buffer. However, ++ * some pointers still have to be updated so as to monitor the ++ * tranfers. ++ * ++ * @param[in] subd Subdevice descriptor structure ++ * @param[in] count The amount of data transferred ++ * ++ * @return 0 on success, otherwise negative error code. ++ * ++ */ ++int a4l_buf_commit_put(struct a4l_subdevice *subd, unsigned long count); ++EXPORT_SYMBOL_GPL(a4l_buf_commit_put); ++ ++/** ++ * @brief Copy some data from the device driver to the buffer ++ * ++ * The function a4l_buf_put() must copy data coming from some ++ * acquisition device to the Analogy buffer. This ring-buffer is an ++ * intermediate area between the device driver and the user-space ++ * program, which is supposed to recover the acquired data. ++ * ++ * @param[in] subd Subdevice descriptor structure ++ * @param[in] bufdata The data buffer to copy into the Analogy buffer ++ * @param[in] count The amount of data to copy ++ * ++ * @return 0 on success, otherwise negative error code. ++ * ++ */ ++int a4l_buf_put(struct a4l_subdevice *subd, void *bufdata, unsigned long count); ++EXPORT_SYMBOL_GPL(a4l_buf_put); ++ ++/** ++ * @brief Update the absolute count of data sent from the buffer to ++ * the device since the start of the acquisition and after the next ++ * DMA shot ++ * ++ * The functions a4l_buf_prepare_(abs)put(), ++ * a4l_buf_commit_(abs)put(), a4l_buf_prepare_(abs)get() and ++ * a4l_buf_commit_(absg)et() have been made available for DMA ++ * transfers. In such situations, no data copy is needed between the ++ * Analogy buffer and the device as some DMA controller is in charge ++ * of performing data shots from / to the Analogy buffer. However, ++ * some pointers still have to be updated so as to monitor the ++ * tranfers. ++ * ++ * @param[in] subd Subdevice descriptor structure ++ * @param[in] count The data count to be transferred during the next ++ * DMA shot plus the data count which have been copied since the start ++ * of the acquisition ++ * ++ * @return 0 on success, otherwise negative error code. ++ * ++ */ ++int a4l_buf_prepare_absget(struct a4l_subdevice *subd, unsigned long count); ++EXPORT_SYMBOL_GPL(a4l_buf_prepare_absget); ++ ++/** ++ * @brief Set the absolute count of data which was sent from the ++ * buffer to the device since the start of the acquisition and until ++ * the last DMA shot ++ * ++ * The functions a4l_buf_prepare_(abs)put(), ++ * a4l_buf_commit_(abs)put(), a4l_buf_prepare_(abs)get() and ++ * a4l_buf_commit_(abs)get() have been made available for DMA ++ * transfers. In such situations, no data copy is needed between the ++ * Analogy buffer and the device as some DMA controller is in charge ++ * of performing data shots from / to the Analogy buffer. However, ++ * some pointers still have to be updated so as to monitor the ++ * tranfers. ++ * ++ * @param[in] subd Subdevice descriptor structure ++ * @param[in] count The data count transferred to the device during ++ * the last DMA shot plus the data count which have been sent since ++ * the beginning of the acquisition ++ * ++ * @return 0 on success, otherwise negative error code. ++ * ++ */ ++int a4l_buf_commit_absget(struct a4l_subdevice *subd, unsigned long count); ++EXPORT_SYMBOL_GPL(a4l_buf_commit_absget); ++ ++/** ++ * @brief Set the count of data which is to be sent from the buffer to ++ * the device at the next DMA shot ++ * ++ * The functions a4l_buf_prepare_(abs)put(), ++ * a4l_buf_commit_(abs)put(), a4l_buf_prepare_(abs)get() and ++ * a4l_buf_commit_(abs)get() have been made available for DMA ++ * transfers. In such situations, no data copy is needed between the ++ * Analogy buffer and the device as some DMA controller is in charge ++ * of performing data shots from / to the Analogy buffer. However, ++ * some pointers still have to be updated so as to monitor the ++ * tranfers. ++ * ++ * @param[in] subd Subdevice descriptor structure ++ * @param[in] count The data count to be transferred ++ * ++ * @return 0 on success, otherwise negative error code. ++ * ++ */ ++int a4l_buf_prepare_get(struct a4l_subdevice *subd, unsigned long count); ++EXPORT_SYMBOL_GPL(a4l_buf_prepare_get); ++ ++/** ++ * @brief Set the count of data sent from the buffer to the device ++ * during the last completed DMA shots ++ * ++ * The functions a4l_buf_prepare_(abs)put(), ++ * a4l_buf_commit_(abs)put(), a4l_buf_prepare_(abs)get() and ++ * a4l_buf_commit_(abs)get() have been made available for DMA ++ * transfers. In such situations, no data copy is needed between the ++ * Analogy buffer and the device as some DMA controller is in charge ++ * of performing data shots from / to the Analogy buffer. However, ++ * some pointers still have to be updated so as to monitor the ++ * tranfers. ++ * ++ * @param[in] subd Subdevice descriptor structure ++ * @param[in] count The amount of data transferred ++ * ++ * @return 0 on success, otherwise negative error code. ++ * ++ */ ++int a4l_buf_commit_get(struct a4l_subdevice *subd, unsigned long count); ++EXPORT_SYMBOL_GPL(a4l_buf_commit_get); ++ ++/** ++ * @brief Copy some data from the buffer to the device driver ++ * ++ * The function a4l_buf_get() must copy data coming from the Analogy ++ * buffer to some acquisition device. This ring-buffer is an ++ * intermediate area between the device driver and the user-space ++ * program, which is supposed to provide the data to send to the ++ * device. ++ * ++ * @param[in] subd Subdevice descriptor structure ++ * @param[in] bufdata The data buffer to copy into the Analogy buffer ++ * @param[in] count The amount of data to copy ++ * ++ * @return 0 on success, otherwise negative error code. ++ * ++ */ ++int a4l_buf_get(struct a4l_subdevice *subd, void *bufdata, unsigned long count); ++EXPORT_SYMBOL_GPL(a4l_buf_get); ++ ++/** ++ * @brief Signal some event(s) to a user-space program involved in ++ * some read / write operation ++ * ++ * The function a4l_buf_evt() is useful in many cases: ++ * - To wake-up a process waiting for some data to read. ++ * - To wake-up a process waiting for some data to write. ++ * - To notify the user-process an error has occured during the ++ * acquistion. ++ * ++ * @param[in] subd Subdevice descriptor structure ++ * @param[in] evts Some specific event to notify: ++ * - A4L_BUF_ERROR to indicate some error has occured during the ++ * transfer ++ * - A4L_BUF_EOA to indicate the acquisition is complete (this ++ * event is automatically set, it should not be used). ++ * ++ * @return 0 on success, otherwise negative error code. ++ * ++ */ ++int a4l_buf_evt(struct a4l_subdevice *subd, unsigned long evts); ++EXPORT_SYMBOL_GPL(a4l_buf_evt); ++ ++/** ++ * @brief Get the data amount available in the Analogy buffer ++ * ++ * @param[in] subd Subdevice descriptor structure ++ * ++ * @return the amount of data available in the Analogy buffer. ++ * ++ */ ++unsigned long a4l_buf_count(struct a4l_subdevice *subd); ++EXPORT_SYMBOL_GPL(a4l_buf_count); ++ ++#ifdef DOXYGEN_CPP /* Only used for doxygen doc generation */ ++ ++/** ++ * @brief Get the current Analogy command descriptor ++ * ++ * @param[in] subd Subdevice descriptor structure ++ * ++ * @return the command descriptor. ++ * ++ */ ++struct a4l_cmd_desc *a4l_get_cmd(struct a4l_subdevice * subd); ++ ++#endif /* DOXYGEN_CPP */ ++ ++/** ++ * @brief Get the channel index according to its type ++ * ++ * @param[in] subd Subdevice descriptor structure ++ * ++ * @return the channel index. ++ * ++ */ ++int a4l_get_chan(struct a4l_subdevice *subd); ++EXPORT_SYMBOL_GPL(a4l_get_chan); ++ ++/** @} */ ++ ++/* --- IRQ handling section --- */ ++ ++/** ++ * @ingroup analogy_driver_facilities ++ * @defgroup analogy_irq Interrupt management services ++ * @{ ++ */ ++ ++/** ++ * @brief Get the interrupt number in use for a specific device ++ * ++ * @param[in] dev Device descriptor structure ++ * ++ * @return the line number used or A4L_IRQ_UNUSED if no interrupt ++ * is registered. ++ * ++ */ ++unsigned int a4l_get_irq(struct a4l_device * dev); ++EXPORT_SYMBOL_GPL(a4l_get_irq); ++ ++/** ++ * @brief Register an interrupt handler for a specific device ++ * ++ * @param[in] dev Device descriptor structure ++ * @param[in] irq Line number of the addressed IRQ ++ * @param[in] handler Interrupt handler ++ * @param[in] flags Registration flags: ++ * - RTDM_IRQTYPE_SHARED: enable IRQ-sharing with other drivers ++ * (Warning: real-time drivers and non-real-time drivers cannot ++ * share an interrupt line). ++ * - RTDM_IRQTYPE_EDGE: mark IRQ as edge-triggered (Warning: this flag ++ * is meaningless in RTDM-less context). ++ * - A4L_IRQ_DISABLED: keep IRQ disabled when calling the action ++ * handler (Warning: this flag is ignored in RTDM-enabled ++ * configuration). ++ * @param[in] cookie Pointer to be passed to the interrupt handler on ++ * invocation ++ * ++ * @return 0 on success, otherwise negative error code. ++ * ++ */ ++int a4l_request_irq(struct a4l_device * dev, ++ unsigned int irq, ++ a4l_irq_hdlr_t handler, ++ unsigned long flags, void *cookie); ++EXPORT_SYMBOL_GPL(a4l_request_irq); ++ ++/** ++ * @brief Release an interrupt handler for a specific device ++ * ++ * @param[in] dev Device descriptor structure ++ * @param[in] irq Line number of the addressed IRQ ++ * ++ * @return 0 on success, otherwise negative error code. ++ * ++ */ ++int a4l_free_irq(struct a4l_device * dev, unsigned int irq); ++EXPORT_SYMBOL_GPL(a4l_free_irq); ++ ++/** @} */ ++ ++/* --- Misc section --- */ ++ ++/** ++ * @ingroup analogy_driver_facilities ++ * @defgroup analogy_misc Misc services ++ * @{ ++ */ ++ ++/** ++ * @brief Get the absolute time in nanoseconds ++ * ++ * @return the absolute time expressed in nanoseconds ++ * ++ */ ++unsigned long long a4l_get_time(void); ++EXPORT_SYMBOL_GPL(a4l_get_time); ++ ++/** @} */ +--- linux/drivers/xenomai/analogy/Kconfig 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/analogy/Kconfig 2021-04-07 16:01:27.858633265 +0800 +@@ -0,0 +1,56 @@ ++menu "ANALOGY drivers" ++ ++config XENO_DRIVERS_ANALOGY ++ tristate "ANALOGY interface" ++ help ++ ++ ANALOGY is a framework aimed at supporting data acquisition ++ devices. ++ ++config XENO_DRIVERS_ANALOGY_DEBUG ++ depends on XENO_DRIVERS_ANALOGY ++ bool "Analogy debug trace" ++ default n ++ help ++ ++ Enable debugging traces in Analogy so as to monitor Analogy's ++ core and drivers behaviours. ++ ++config XENO_DRIVERS_ANALOGY_DEBUG_FTRACE ++ depends on XENO_DRIVERS_ANALOGY_DEBUG ++ bool "Analogy debug ftrace" ++ default n ++ help ++ ++ Route the Analogy a4l_dbg and a4l_info statements to /sys/kernel/debug/ ++ ++config XENO_DRIVERS_ANALOGY_DEBUG_LEVEL ++ depends on XENO_DRIVERS_ANALOGY_DEBUG ++ int "Analogy core debug level threshold" ++ default 0 ++ help ++ ++ Define the level above which the debugging traces will not be ++ displayed. ++ ++ WARNING: this threshold is only applied on the Analogy ++ core. That will not affect the driver. ++ ++config XENO_DRIVERS_ANALOGY_DRIVER_DEBUG_LEVEL ++ depends on XENO_DRIVERS_ANALOGY_DEBUG ++ int "Analogy driver debug level threshold" ++ default 0 ++ help ++ ++ Define the level above which the debugging traces will not be ++ displayed. ++ ++ WARNING: this threshold is only applied on the Analogy ++ driver. That will not affect the core. ++ ++source "drivers/xenomai/analogy/testing/Kconfig" ++source "drivers/xenomai/analogy/intel/Kconfig" ++source "drivers/xenomai/analogy/national_instruments/Kconfig" ++source "drivers/xenomai/analogy/sensoray/Kconfig" ++ ++endmenu +--- linux/drivers/xenomai/analogy/device.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/analogy/device.c 2021-04-07 16:01:27.853633272 +0800 +@@ -0,0 +1,459 @@ ++/* ++ * Analogy for Linux, device related features ++ * ++ * Copyright (C) 1997-2000 David A. Schleef ++ * Copyright (C) 2008 Alexis Berlemont ++ * ++ * Xenomai is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#include ++#include ++#include ++#include ++#include ++ ++#include "proc.h" ++ ++static struct a4l_device a4l_devs[A4L_NB_DEVICES]; ++ ++/* --- Device tab management functions --- */ ++ ++void a4l_init_devs(void) ++{ ++ int i; ++ memset(a4l_devs, 0, A4L_NB_DEVICES * sizeof(struct a4l_device)); ++ for (i = 0; i < A4L_NB_DEVICES; i++) { ++ rtdm_lock_init(&a4l_devs[i].lock); ++ a4l_devs[i].transfer.irq_desc.irq = A4L_IRQ_UNUSED; ++ } ++} ++ ++int a4l_check_cleanup_devs(void) ++{ ++ int i, ret = 0; ++ ++ for (i = 0; i < A4L_NB_DEVICES && ret == 0; i++) ++ if (test_bit(A4L_DEV_ATTACHED_NR, &a4l_devs[i].flags)) ++ ret = -EBUSY; ++ ++ return ret; ++} ++ ++void a4l_set_dev(struct a4l_device_context *cxt) ++{ ++ /* Retrieve the minor index */ ++ const int minor = a4l_get_minor(cxt); ++ /* Fill the dev fields accordingly */ ++ cxt->dev = &(a4l_devs[minor]); ++} ++ ++/* --- Device tab proc section --- */ ++ ++#ifdef CONFIG_PROC_FS ++ ++int a4l_rdproc_devs(struct seq_file *p, void *data) ++{ ++ int i; ++ ++ seq_printf(p, "-- Analogy devices --\n\n"); ++ seq_printf(p, "| idx | status | driver\n"); ++ ++ for (i = 0; i < A4L_NB_DEVICES; i++) { ++ char *status, *name; ++ ++ /* Gets the device's state */ ++ if (a4l_devs[i].flags == 0) { ++ status = "Unused"; ++ name = "No driver"; ++ } else if (test_bit(A4L_DEV_ATTACHED_NR, &a4l_devs[i].flags)) { ++ status = "Linked"; ++ name = a4l_devs[i].driver->driver_name; ++ } else { ++ status = "Broken"; ++ name = "Unknown"; ++ } ++ ++ seq_printf(p, "| %02d | %s | %s\n", i, status, name); ++ } ++ return 0; ++} ++ ++static int a4l_proc_transfer_open(struct inode *inode, struct file *file) ++{ ++ return single_open(file, a4l_rdproc_transfer, PDE_DATA(inode)); ++} ++ ++static const struct file_operations a4l_proc_transfer_ops = { ++ .open = a4l_proc_transfer_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = single_release, ++}; ++ ++int a4l_proc_attach(struct a4l_device_context * cxt) ++{ ++ int ret = 0; ++ struct a4l_device *dev = a4l_get_dev(cxt); ++ struct proc_dir_entry *entry; ++ char *entry_name; ++ ++ /* Allocate the buffer for the file name */ ++ entry_name = rtdm_malloc(A4L_NAMELEN + 4); ++ if (entry_name == NULL) { ++ __a4l_err("a4l_proc_attach: failed to allocate buffer\n"); ++ return -ENOMEM; ++ } ++ ++ /* Create the proc file name */ ++ ksformat(entry_name, A4L_NAMELEN + 4, "%02d-%s", ++ a4l_get_minor(cxt), dev->driver->board_name); ++ ++ /* Create the proc entry */ ++ entry = proc_create_data(entry_name, 0444, a4l_proc_root, ++ &a4l_proc_transfer_ops, &dev->transfer); ++ if (entry == NULL) { ++ __a4l_err("a4l_proc_attach: " ++ "failed to create /proc/analogy/%s\n", ++ entry_name); ++ ret = -ENOMEM; ++ } ++ ++ rtdm_free(entry_name); ++ ++ return ret; ++} ++ ++void a4l_proc_detach(struct a4l_device_context * cxt) ++{ ++ struct a4l_device *dev = a4l_get_dev(cxt); ++ char *entry_name; ++ ++ entry_name = rtdm_malloc(A4L_NAMELEN + 4); ++ if (entry_name == NULL) { ++ __a4l_err("a4l_proc_detach: " ++ "failed to allocate filename buffer\n"); ++ return; ++ } ++ ++ ksformat(entry_name, A4L_NAMELEN + 4, "%02d-%s", ++ a4l_get_minor(cxt), dev->driver->board_name); ++ ++ remove_proc_entry(entry_name, a4l_proc_root); ++ ++ rtdm_free(entry_name); ++} ++ ++#else /* !CONFIG_PROC_FS */ ++ ++int a4l_proc_attach(struct a4l_device_context * cxt) ++{ ++ return 0; ++} ++ ++void a4l_proc_detach(struct a4l_device_context * cxt) ++{ ++} ++ ++#endif /* CONFIG_PROC_FS */ ++ ++/* --- Attach / detach section --- */ ++ ++int a4l_fill_lnkdesc(struct a4l_device_context * cxt, ++ a4l_lnkdesc_t * link_arg, void *arg) ++{ ++ struct rtdm_fd *fd = rtdm_private_to_fd(cxt); ++ int ret; ++ char *tmpname = NULL; ++ void *tmpopts = NULL; ++ ++ ret = rtdm_safe_copy_from_user(fd, ++ link_arg, arg, sizeof(a4l_lnkdesc_t)); ++ if (ret != 0) { ++ __a4l_err("a4l_fill_lnkdesc: " ++ "call1(copy_from_user) failed\n"); ++ goto out_get_lnkdesc; ++ } ++ ++ if (link_arg->bname_size != 0 && link_arg->bname != NULL) { ++ tmpname = rtdm_malloc(link_arg->bname_size + 1); ++ if (tmpname == NULL) { ++ __a4l_err("a4l_fill_lnkdesc: " ++ "call1(alloc) failed\n"); ++ ret = -ENOMEM; ++ goto out_get_lnkdesc; ++ } ++ tmpname[link_arg->bname_size] = 0; ++ ++ ret = rtdm_safe_copy_from_user(fd, ++ tmpname, ++ link_arg->bname, ++ link_arg->bname_size); ++ if (ret != 0) { ++ __a4l_err("a4l_fill_lnkdesc: " ++ "call2(copy_from_user) failed\n"); ++ goto out_get_lnkdesc; ++ } ++ } else { ++ __a4l_err("a4l_fill_lnkdesc: board name missing\n"); ++ ret = -EINVAL; ++ goto out_get_lnkdesc; ++ } ++ ++ if (link_arg->opts_size != 0 && link_arg->opts != NULL) { ++ tmpopts = rtdm_malloc(link_arg->opts_size); ++ ++ if (tmpopts == NULL) { ++ __a4l_err("a4l_fill_lnkdesc: " ++ "call2(alloc) failed\n"); ++ ret = -ENOMEM; ++ goto out_get_lnkdesc; ++ } ++ ++ ret = rtdm_safe_copy_from_user(fd, ++ tmpopts, ++ link_arg->opts, ++ link_arg->opts_size); ++ if (ret != 0) { ++ __a4l_err("a4l_fill_lnkdesc: " ++ "call3(copy_from_user) failed\n"); ++ goto out_get_lnkdesc; ++ } ++ } ++ ++ link_arg->bname = tmpname; ++ link_arg->opts = tmpopts; ++ ++ out_get_lnkdesc: ++ ++ if (tmpname == NULL) { ++ link_arg->bname = NULL; ++ link_arg->bname_size = 0; ++ } ++ ++ if (tmpopts == NULL) { ++ link_arg->opts = NULL; ++ link_arg->opts_size = 0; ++ } ++ ++ return ret; ++} ++ ++void a4l_free_lnkdesc(struct a4l_device_context * cxt, a4l_lnkdesc_t * link_arg) ++{ ++ if (link_arg->bname != NULL) ++ rtdm_free(link_arg->bname); ++ ++ if (link_arg->opts != NULL) ++ rtdm_free(link_arg->opts); ++} ++ ++int a4l_assign_driver(struct a4l_device_context * cxt, ++ struct a4l_driver * drv, a4l_lnkdesc_t * link_arg) ++{ ++ int ret = 0; ++ struct a4l_device *dev = a4l_get_dev(cxt); ++ ++ dev->driver = drv; ++ INIT_LIST_HEAD(&dev->subdvsq); ++ ++ if (drv->privdata_size == 0) ++ __a4l_dbg(1, core_dbg, " warning! " ++ "the field priv will not be usable\n"); ++ else { ++ dev->priv = rtdm_malloc(drv->privdata_size); ++ if (dev->priv == NULL) { ++ __a4l_err("a4l_assign_driver: " ++ "call(alloc) failed\n"); ++ ret = -ENOMEM; ++ goto out_assign_driver; ++ } ++ ++ /* Initialize the private data even if it not our role ++ (the driver should do it), that may prevent hard to ++ find bugs */ ++ memset(dev->priv, 0, drv->privdata_size); ++ } ++ ++ if ((ret = drv->attach(dev, link_arg)) != 0) ++ __a4l_err("a4l_assign_driver: " ++ "call(drv->attach) failed (ret=%d)\n", ++ ret); ++ ++out_assign_driver: ++ ++ /* Increments module's count */ ++ if (ret == 0 && (!try_module_get(drv->owner))) { ++ __a4l_err("a4l_assign_driver: " ++ "driver's owner field wrongly set\n"); ++ ret = -ENODEV; ++ } ++ ++ if (ret != 0 && dev->priv != NULL) { ++ rtdm_free(dev->priv); ++ dev->driver = NULL; ++ } ++ ++ return ret; ++} ++ ++int a4l_release_driver(struct a4l_device_context * cxt) ++{ ++ struct a4l_device *dev = a4l_get_dev(cxt); ++ struct a4l_subdevice *subd, *tmp; ++ int ret = 0; ++ ++ if ((ret = dev->driver->detach(dev)) != 0) ++ goto out_release_driver; ++ ++ module_put(dev->driver->owner); ++ ++ /* In case, the driver developer did not free the subdevices */ ++ if (!list_empty(&dev->subdvsq)) ++ list_for_each_entry_safe(subd, tmp, &dev->subdvsq, list) { ++ list_del(&subd->list); ++ rtdm_free(subd); ++ } ++ ++ /* Free the private field */ ++ if (dev->priv) ++ rtdm_free(dev->priv); ++ ++ dev->driver = NULL; ++ ++out_release_driver: ++ return ret; ++} ++ ++int a4l_device_attach(struct a4l_device_context * cxt, void *arg) ++{ ++ int ret = 0; ++ a4l_lnkdesc_t link_arg; ++ struct a4l_driver *drv = NULL; ++ ++ if ((ret = a4l_fill_lnkdesc(cxt, &link_arg, arg)) != 0) ++ goto out_attach; ++ ++ if ((ret = a4l_lct_drv(link_arg.bname, &drv)) != 0) { ++ __a4l_err("a4l_device_attach: " ++ "cannot find board name %s\n", link_arg.bname); ++ goto out_attach; ++ } ++ ++ if ((ret = a4l_assign_driver(cxt, drv, &link_arg)) != 0) ++ goto out_attach; ++ ++ out_attach: ++ a4l_free_lnkdesc(cxt, &link_arg); ++ return ret; ++} ++ ++int a4l_device_detach(struct a4l_device_context * cxt) ++{ ++ struct a4l_device *dev = a4l_get_dev(cxt); ++ ++ if (dev->driver == NULL) { ++ __a4l_err("a4l_device_detach: " ++ "incoherent state, driver not reachable\n"); ++ return -ENXIO; ++ } ++ ++ return a4l_release_driver(cxt); ++} ++ ++/* --- IOCTL / FOPS functions --- */ ++ ++int a4l_ioctl_devcfg(struct a4l_device_context * cxt, void *arg) ++{ ++ int ret = 0; ++ ++ if (rtdm_in_rt_context()) ++ return -ENOSYS; ++ ++ if (arg == NULL) { ++ /* Basic checking */ ++ if (!test_bit(A4L_DEV_ATTACHED_NR, &(a4l_get_dev(cxt)->flags))) { ++ __a4l_err("a4l_ioctl_devcfg: " ++ "free device, no driver to detach\n"); ++ return -EINVAL; ++ } ++ /* Pre-cleanup of the transfer structure, we ensure ++ that nothing is busy */ ++ if ((ret = a4l_precleanup_transfer(cxt)) != 0) ++ return ret; ++ /* Remove the related proc file */ ++ a4l_proc_detach(cxt); ++ /* Free the device and the driver from each other */ ++ if ((ret = a4l_device_detach(cxt)) == 0) ++ clear_bit(A4L_DEV_ATTACHED_NR, ++ &(a4l_get_dev(cxt)->flags)); ++ /* Free the transfer structure and its related data */ ++ if ((ret = a4l_cleanup_transfer(cxt)) != 0) ++ return ret; ++ } else { ++ /* Basic checking */ ++ if (test_bit ++ (A4L_DEV_ATTACHED_NR, &(a4l_get_dev(cxt)->flags))) { ++ __a4l_err("a4l_ioctl_devcfg: " ++ "linked device, cannot attach more driver\n"); ++ return -EINVAL; ++ } ++ /* Pre-initialization of the transfer structure */ ++ a4l_presetup_transfer(cxt); ++ /* Link the device with the driver */ ++ if ((ret = a4l_device_attach(cxt, arg)) != 0) ++ return ret; ++ /* Create the transfer structure and ++ the related proc file */ ++ if ((ret = a4l_setup_transfer(cxt)) != 0 || ++ (ret = a4l_proc_attach(cxt)) != 0) ++ a4l_device_detach(cxt); ++ else ++ set_bit(A4L_DEV_ATTACHED_NR, ++ &(a4l_get_dev(cxt)->flags)); ++ } ++ ++ return ret; ++} ++ ++int a4l_ioctl_devinfo(struct a4l_device_context * cxt, void *arg) ++{ ++ struct rtdm_fd *fd = rtdm_private_to_fd(cxt); ++ a4l_dvinfo_t info; ++ struct a4l_device *dev = a4l_get_dev(cxt); ++ ++ memset(&info, 0, sizeof(a4l_dvinfo_t)); ++ ++ if (test_bit(A4L_DEV_ATTACHED_NR, &dev->flags)) { ++ int len = (strlen(dev->driver->board_name) > A4L_NAMELEN) ? ++ A4L_NAMELEN : strlen(dev->driver->board_name); ++ ++ memcpy(info.board_name, dev->driver->board_name, len); ++ ++ len = (strlen(dev->driver->driver_name) > A4L_NAMELEN) ? ++ A4L_NAMELEN : strlen(dev->driver->driver_name); ++ ++ memcpy(info.driver_name, dev->driver->driver_name, len); ++ ++ info.nb_subd = dev->transfer.nb_subd; ++ /* TODO: for API compatibility issue, find the first ++ read subdevice and write subdevice */ ++ } ++ ++ if (rtdm_safe_copy_to_user(fd, ++ arg, &info, sizeof(a4l_dvinfo_t)) != 0) ++ return -EFAULT; ++ ++ return 0; ++} +--- linux/drivers/xenomai/analogy/national_instruments/ni_tio.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/analogy/national_instruments/ni_tio.h 2021-04-07 16:01:27.849633278 +0800 +@@ -0,0 +1,1192 @@ ++/* ++ * Hardware driver for NI general purpose counter ++ * Copyright (C) 2006 Frank Mori Hess ++ * ++ * This code is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * This code is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#ifndef __ANALOGY_NI_TIO_H__ ++#define __ANALOGY_NI_TIO_H__ ++ ++#include ++ ++#ifdef CONFIG_PCI ++#include "mite.h" ++#endif ++ ++enum ni_gpct_register { ++ NITIO_G0_Autoincrement_Reg, ++ NITIO_G1_Autoincrement_Reg, ++ NITIO_G2_Autoincrement_Reg, ++ NITIO_G3_Autoincrement_Reg, ++ NITIO_G0_Command_Reg, ++ NITIO_G1_Command_Reg, ++ NITIO_G2_Command_Reg, ++ NITIO_G3_Command_Reg, ++ NITIO_G0_HW_Save_Reg, ++ NITIO_G1_HW_Save_Reg, ++ NITIO_G2_HW_Save_Reg, ++ NITIO_G3_HW_Save_Reg, ++ NITIO_G0_SW_Save_Reg, ++ NITIO_G1_SW_Save_Reg, ++ NITIO_G2_SW_Save_Reg, ++ NITIO_G3_SW_Save_Reg, ++ NITIO_G0_Mode_Reg, ++ NITIO_G1_Mode_Reg, ++ NITIO_G2_Mode_Reg, ++ NITIO_G3_Mode_Reg, ++ NITIO_G0_LoadA_Reg, ++ NITIO_G1_LoadA_Reg, ++ NITIO_G2_LoadA_Reg, ++ NITIO_G3_LoadA_Reg, ++ NITIO_G0_LoadB_Reg, ++ NITIO_G1_LoadB_Reg, ++ NITIO_G2_LoadB_Reg, ++ NITIO_G3_LoadB_Reg, ++ NITIO_G0_Input_Select_Reg, ++ NITIO_G1_Input_Select_Reg, ++ NITIO_G2_Input_Select_Reg, ++ NITIO_G3_Input_Select_Reg, ++ NITIO_G0_Counting_Mode_Reg, ++ NITIO_G1_Counting_Mode_Reg, ++ NITIO_G2_Counting_Mode_Reg, ++ NITIO_G3_Counting_Mode_Reg, ++ NITIO_G0_Second_Gate_Reg, ++ NITIO_G1_Second_Gate_Reg, ++ NITIO_G2_Second_Gate_Reg, ++ NITIO_G3_Second_Gate_Reg, ++ NITIO_G01_Status_Reg, ++ NITIO_G23_Status_Reg, ++ NITIO_G01_Joint_Reset_Reg, ++ NITIO_G23_Joint_Reset_Reg, ++ NITIO_G01_Joint_Status1_Reg, ++ NITIO_G23_Joint_Status1_Reg, ++ NITIO_G01_Joint_Status2_Reg, ++ NITIO_G23_Joint_Status2_Reg, ++ NITIO_G0_DMA_Config_Reg, ++ NITIO_G1_DMA_Config_Reg, ++ NITIO_G2_DMA_Config_Reg, ++ NITIO_G3_DMA_Config_Reg, ++ NITIO_G0_DMA_Status_Reg, ++ NITIO_G1_DMA_Status_Reg, ++ NITIO_G2_DMA_Status_Reg, ++ NITIO_G3_DMA_Status_Reg, ++ NITIO_G0_ABZ_Reg, ++ NITIO_G1_ABZ_Reg, ++ NITIO_G0_Interrupt_Acknowledge_Reg, ++ NITIO_G1_Interrupt_Acknowledge_Reg, ++ NITIO_G2_Interrupt_Acknowledge_Reg, ++ NITIO_G3_Interrupt_Acknowledge_Reg, ++ NITIO_G0_Status_Reg, ++ NITIO_G1_Status_Reg, ++ NITIO_G2_Status_Reg, ++ NITIO_G3_Status_Reg, ++ NITIO_G0_Interrupt_Enable_Reg, ++ NITIO_G1_Interrupt_Enable_Reg, ++ NITIO_G2_Interrupt_Enable_Reg, ++ NITIO_G3_Interrupt_Enable_Reg, ++ NITIO_Num_Registers, ++}; ++ ++static inline enum ni_gpct_register NITIO_Gi_Autoincrement_Reg(unsigned ++ counter_index) ++{ ++ switch (counter_index) { ++ case 0: ++ return NITIO_G0_Autoincrement_Reg; ++ break; ++ case 1: ++ return NITIO_G1_Autoincrement_Reg; ++ break; ++ case 2: ++ return NITIO_G2_Autoincrement_Reg; ++ break; ++ case 3: ++ return NITIO_G3_Autoincrement_Reg; ++ break; ++ default: ++ BUG(); ++ break; ++ } ++ return 0; ++} ++ ++static inline enum ni_gpct_register NITIO_Gi_Command_Reg(unsigned counter_index) ++{ ++ switch (counter_index) { ++ case 0: ++ return NITIO_G0_Command_Reg; ++ break; ++ case 1: ++ return NITIO_G1_Command_Reg; ++ break; ++ case 2: ++ return NITIO_G2_Command_Reg; ++ break; ++ case 3: ++ return NITIO_G3_Command_Reg; ++ break; ++ default: ++ BUG(); ++ break; ++ } ++ return 0; ++} ++ ++static inline enum ni_gpct_register NITIO_Gi_Counting_Mode_Reg(unsigned ++ counter_index) ++{ ++ switch (counter_index) { ++ case 0: ++ return NITIO_G0_Counting_Mode_Reg; ++ break; ++ case 1: ++ return NITIO_G1_Counting_Mode_Reg; ++ break; ++ case 2: ++ return NITIO_G2_Counting_Mode_Reg; ++ break; ++ case 3: ++ return NITIO_G3_Counting_Mode_Reg; ++ break; ++ default: ++ BUG(); ++ break; ++ } ++ return 0; ++} ++ ++static inline enum ni_gpct_register NITIO_Gi_Input_Select_Reg(unsigned ++ counter_index) ++{ ++ switch (counter_index) { ++ case 0: ++ return NITIO_G0_Input_Select_Reg; ++ break; ++ case 1: ++ return NITIO_G1_Input_Select_Reg; ++ break; ++ case 2: ++ return NITIO_G2_Input_Select_Reg; ++ break; ++ case 3: ++ return NITIO_G3_Input_Select_Reg; ++ break; ++ default: ++ BUG(); ++ break; ++ } ++ return 0; ++} ++ ++static inline enum ni_gpct_register NITIO_Gxx_Joint_Reset_Reg(unsigned ++ counter_index) ++{ ++ switch (counter_index) { ++ case 0: ++ case 1: ++ return NITIO_G01_Joint_Reset_Reg; ++ break; ++ case 2: ++ case 3: ++ return NITIO_G23_Joint_Reset_Reg; ++ break; ++ default: ++ BUG(); ++ break; ++ } ++ return 0; ++} ++ ++static inline enum ni_gpct_register NITIO_Gxx_Joint_Status1_Reg(unsigned ++ counter_index) ++{ ++ switch (counter_index) { ++ case 0: ++ case 1: ++ return NITIO_G01_Joint_Status1_Reg; ++ break; ++ case 2: ++ case 3: ++ return NITIO_G23_Joint_Status1_Reg; ++ break; ++ default: ++ BUG(); ++ break; ++ } ++ return 0; ++} ++ ++static inline enum ni_gpct_register NITIO_Gxx_Joint_Status2_Reg(unsigned ++ counter_index) ++{ ++ switch (counter_index) { ++ case 0: ++ case 1: ++ return NITIO_G01_Joint_Status2_Reg; ++ break; ++ case 2: ++ case 3: ++ return NITIO_G23_Joint_Status2_Reg; ++ break; ++ default: ++ BUG(); ++ break; ++ } ++ return 0; ++} ++ ++static inline enum ni_gpct_register NITIO_Gxx_Status_Reg(unsigned counter_index) ++{ ++ switch (counter_index) { ++ case 0: ++ case 1: ++ return NITIO_G01_Status_Reg; ++ break; ++ case 2: ++ case 3: ++ return NITIO_G23_Status_Reg; ++ break; ++ default: ++ BUG(); ++ break; ++ } ++ return 0; ++} ++ ++static inline enum ni_gpct_register NITIO_Gi_LoadA_Reg(unsigned counter_index) ++{ ++ switch (counter_index) { ++ case 0: ++ return NITIO_G0_LoadA_Reg; ++ break; ++ case 1: ++ return NITIO_G1_LoadA_Reg; ++ break; ++ case 2: ++ return NITIO_G2_LoadA_Reg; ++ break; ++ case 3: ++ return NITIO_G3_LoadA_Reg; ++ break; ++ default: ++ BUG(); ++ break; ++ } ++ return 0; ++} ++ ++static inline enum ni_gpct_register NITIO_Gi_LoadB_Reg(unsigned counter_index) ++{ ++ switch (counter_index) { ++ case 0: ++ return NITIO_G0_LoadB_Reg; ++ break; ++ case 1: ++ return NITIO_G1_LoadB_Reg; ++ break; ++ case 2: ++ return NITIO_G2_LoadB_Reg; ++ break; ++ case 3: ++ return NITIO_G3_LoadB_Reg; ++ break; ++ default: ++ BUG(); ++ break; ++ } ++ return 0; ++} ++ ++static inline enum ni_gpct_register NITIO_Gi_Mode_Reg(unsigned counter_index) ++{ ++ switch (counter_index) { ++ case 0: ++ return NITIO_G0_Mode_Reg; ++ break; ++ case 1: ++ return NITIO_G1_Mode_Reg; ++ break; ++ case 2: ++ return NITIO_G2_Mode_Reg; ++ break; ++ case 3: ++ return NITIO_G3_Mode_Reg; ++ break; ++ default: ++ BUG(); ++ break; ++ } ++ return 0; ++} ++ ++static inline enum ni_gpct_register NITIO_Gi_SW_Save_Reg(int counter_index) ++{ ++ switch (counter_index) { ++ case 0: ++ return NITIO_G0_SW_Save_Reg; ++ break; ++ case 1: ++ return NITIO_G1_SW_Save_Reg; ++ break; ++ case 2: ++ return NITIO_G2_SW_Save_Reg; ++ break; ++ case 3: ++ return NITIO_G3_SW_Save_Reg; ++ break; ++ default: ++ BUG(); ++ break; ++ } ++ return 0; ++} ++ ++static inline enum ni_gpct_register NITIO_Gi_Second_Gate_Reg(int counter_index) ++{ ++ switch (counter_index) { ++ case 0: ++ return NITIO_G0_Second_Gate_Reg; ++ break; ++ case 1: ++ return NITIO_G1_Second_Gate_Reg; ++ break; ++ case 2: ++ return NITIO_G2_Second_Gate_Reg; ++ break; ++ case 3: ++ return NITIO_G3_Second_Gate_Reg; ++ break; ++ default: ++ BUG(); ++ break; ++ } ++ return 0; ++} ++ ++static inline enum ni_gpct_register NITIO_Gi_DMA_Config_Reg(int counter_index) ++{ ++ switch (counter_index) { ++ case 0: ++ return NITIO_G0_DMA_Config_Reg; ++ break; ++ case 1: ++ return NITIO_G1_DMA_Config_Reg; ++ break; ++ case 2: ++ return NITIO_G2_DMA_Config_Reg; ++ break; ++ case 3: ++ return NITIO_G3_DMA_Config_Reg; ++ break; ++ default: ++ BUG(); ++ break; ++ } ++ return 0; ++} ++ ++static inline enum ni_gpct_register NITIO_Gi_DMA_Status_Reg(int counter_index) ++{ ++ switch (counter_index) { ++ case 0: ++ return NITIO_G0_DMA_Status_Reg; ++ break; ++ case 1: ++ return NITIO_G1_DMA_Status_Reg; ++ break; ++ case 2: ++ return NITIO_G2_DMA_Status_Reg; ++ break; ++ case 3: ++ return NITIO_G3_DMA_Status_Reg; ++ break; ++ default: ++ BUG(); ++ break; ++ } ++ return 0; ++} ++ ++static inline enum ni_gpct_register NITIO_Gi_ABZ_Reg(int counter_index) ++{ ++ switch (counter_index) { ++ case 0: ++ return NITIO_G0_ABZ_Reg; ++ break; ++ case 1: ++ return NITIO_G1_ABZ_Reg; ++ break; ++ default: ++ BUG(); ++ break; ++ } ++ return 0; ++} ++ ++static inline enum ni_gpct_register NITIO_Gi_Interrupt_Acknowledge_Reg(int ++ counter_index) ++{ ++ switch (counter_index) { ++ case 0: ++ return NITIO_G0_Interrupt_Acknowledge_Reg; ++ break; ++ case 1: ++ return NITIO_G1_Interrupt_Acknowledge_Reg; ++ break; ++ case 2: ++ return NITIO_G2_Interrupt_Acknowledge_Reg; ++ break; ++ case 3: ++ return NITIO_G3_Interrupt_Acknowledge_Reg; ++ break; ++ default: ++ BUG(); ++ break; ++ } ++ return 0; ++} ++ ++static inline enum ni_gpct_register NITIO_Gi_Status_Reg(int counter_index) ++{ ++ switch (counter_index) { ++ case 0: ++ return NITIO_G0_Status_Reg; ++ break; ++ case 1: ++ return NITIO_G1_Status_Reg; ++ break; ++ case 2: ++ return NITIO_G2_Status_Reg; ++ break; ++ case 3: ++ return NITIO_G3_Status_Reg; ++ break; ++ default: ++ BUG(); ++ break; ++ } ++ return 0; ++} ++ ++static inline enum ni_gpct_register NITIO_Gi_Interrupt_Enable_Reg(int ++ counter_index) ++{ ++ switch (counter_index) { ++ case 0: ++ return NITIO_G0_Interrupt_Enable_Reg; ++ break; ++ case 1: ++ return NITIO_G1_Interrupt_Enable_Reg; ++ break; ++ case 2: ++ return NITIO_G2_Interrupt_Enable_Reg; ++ break; ++ case 3: ++ return NITIO_G3_Interrupt_Enable_Reg; ++ break; ++ default: ++ BUG(); ++ break; ++ } ++ return 0; ++} ++ ++enum ni_gpct_variant { ++ ni_gpct_variant_e_series, ++ ni_gpct_variant_m_series, ++ ni_gpct_variant_660x ++}; ++ ++struct ni_gpct { ++ struct ni_gpct_device *counter_dev; ++ unsigned counter_index; ++ unsigned chip_index; ++ uint64_t clock_period_ps; /* clock period in picoseconds */ ++ struct mite_channel *mite_chan; ++ rtdm_lock_t lock; ++}; ++ ++struct ni_gpct_device { ++ struct a4l_device *dev; ++ void (*write_register)(struct ni_gpct * counter, ++ unsigned int bits, enum ni_gpct_register reg); ++ unsigned (*read_register)(struct ni_gpct * counter, ++ enum ni_gpct_register reg); ++ enum ni_gpct_variant variant; ++ struct ni_gpct **counters; ++ unsigned num_counters; ++ unsigned regs[NITIO_Num_Registers]; ++ rtdm_lock_t regs_lock; ++}; ++ ++#define Gi_Auto_Increment_Mask 0xff ++#define Gi_Up_Down_Shift 5 ++ ++#define Gi_Arm_Bit 0x1 ++#define Gi_Save_Trace_Bit 0x2 ++#define Gi_Load_Bit 0x4 ++#define Gi_Disarm_Bit 0x10 ++#define Gi_Up_Down_Mask (0x3 << Gi_Up_Down_Shift) ++#define Gi_Always_Down_Bits (0x0 << Gi_Up_Down_Shift) ++#define Gi_Always_Up_Bits (0x1 << Gi_Up_Down_Shift) ++#define Gi_Up_Down_Hardware_IO_Bits (0x2 << Gi_Up_Down_Shift) ++#define Gi_Up_Down_Hardware_Gate_Bits (0x3 << Gi_Up_Down_Shift) ++#define Gi_Write_Switch_Bit 0x80 ++#define Gi_Synchronize_Gate_Bit 0x100 ++#define Gi_Little_Big_Endian_Bit 0x200 ++#define Gi_Bank_Switch_Start_Bit 0x400 ++#define Gi_Bank_Switch_Mode_Bit 0x800 ++#define Gi_Bank_Switch_Enable_Bit 0x1000 ++#define Gi_Arm_Copy_Bit 0x2000 ++#define Gi_Save_Trace_Copy_Bit 0x4000 ++#define Gi_Disarm_Copy_Bit 0x8000 ++ ++#define Gi_Index_Phase_Bitshift 5 ++#define Gi_HW_Arm_Select_Shift 8 ++ ++#define Gi_Counting_Mode_Mask 0x7 ++#define Gi_Counting_Mode_Normal_Bits 0x0 ++#define Gi_Counting_Mode_QuadratureX1_Bits 0x1 ++#define Gi_Counting_Mode_QuadratureX2_Bits 0x2 ++#define Gi_Counting_Mode_QuadratureX4_Bits 0x3 ++#define Gi_Counting_Mode_Two_Pulse_Bits 0x4 ++#define Gi_Counting_Mode_Sync_Source_Bits 0x6 ++#define Gi_Index_Mode_Bit 0x10 ++#define Gi_Index_Phase_Mask (0x3 << Gi_Index_Phase_Bitshift) ++#define Gi_Index_Phase_LowA_LowB (0x0 << Gi_Index_Phase_Bitshift) ++#define Gi_Index_Phase_LowA_HighB (0x1 << Gi_Index_Phase_Bitshift) ++#define Gi_Index_Phase_HighA_LowB (0x2 << Gi_Index_Phase_Bitshift) ++#define Gi_Index_Phase_HighA_HighB (0x3 << Gi_Index_Phase_Bitshift) ++ ++/* From m-series example code, ++ not documented in 660x register level manual */ ++#define Gi_HW_Arm_Enable_Bit 0x80 ++/* From m-series example code, ++ not documented in 660x register level manual */ ++#define Gi_660x_HW_Arm_Select_Mask (0x7 << Gi_HW_Arm_Select_Shift) ++#define Gi_660x_Prescale_X8_Bit 0x1000 ++#define Gi_M_Series_Prescale_X8_Bit 0x2000 ++#define Gi_M_Series_HW_Arm_Select_Mask (0x1f << Gi_HW_Arm_Select_Shift) ++/* Must be set for clocks over 40MHz, ++ which includes synchronous counting and quadrature modes */ ++#define Gi_660x_Alternate_Sync_Bit 0x2000 ++#define Gi_M_Series_Alternate_Sync_Bit 0x4000 ++/* From m-series example code, ++ not documented in 660x register level manual */ ++#define Gi_660x_Prescale_X2_Bit 0x4000 ++#define Gi_M_Series_Prescale_X2_Bit 0x8000 ++ ++static inline unsigned int Gi_Alternate_Sync_Bit(enum ni_gpct_variant variant) ++{ ++ switch (variant) { ++ case ni_gpct_variant_e_series: ++ return 0; ++ break; ++ case ni_gpct_variant_m_series: ++ return Gi_M_Series_Alternate_Sync_Bit; ++ break; ++ case ni_gpct_variant_660x: ++ return Gi_660x_Alternate_Sync_Bit; ++ break; ++ default: ++ BUG(); ++ break; ++ } ++ return 0; ++} ++ ++static inline unsigned int Gi_Prescale_X2_Bit(enum ni_gpct_variant variant) ++{ ++ switch (variant) { ++ case ni_gpct_variant_e_series: ++ return 0; ++ break; ++ case ni_gpct_variant_m_series: ++ return Gi_M_Series_Prescale_X2_Bit; ++ break; ++ case ni_gpct_variant_660x: ++ return Gi_660x_Prescale_X2_Bit; ++ break; ++ default: ++ BUG(); ++ break; ++ } ++ return 0; ++} ++ ++static inline unsigned int Gi_Prescale_X8_Bit(enum ni_gpct_variant variant) ++{ ++ switch (variant) { ++ case ni_gpct_variant_e_series: ++ return 0; ++ break; ++ case ni_gpct_variant_m_series: ++ return Gi_M_Series_Prescale_X8_Bit; ++ break; ++ case ni_gpct_variant_660x: ++ return Gi_660x_Prescale_X8_Bit; ++ break; ++ default: ++ BUG(); ++ break; ++ } ++ return 0; ++} ++ ++static inline unsigned int Gi_HW_Arm_Select_Mask(enum ni_gpct_variant variant) ++{ ++ switch (variant) { ++ case ni_gpct_variant_e_series: ++ return 0; ++ break; ++ case ni_gpct_variant_m_series: ++ return Gi_M_Series_HW_Arm_Select_Mask; ++ break; ++ case ni_gpct_variant_660x: ++ return Gi_660x_HW_Arm_Select_Mask; ++ break; ++ default: ++ BUG(); ++ break; ++ } ++ return 0; ++} ++ ++#define NI_660x_Timebase_1_Clock 0x0 /* 20MHz */ ++#define NI_660x_Source_Pin_i_Clock 0x1 ++#define NI_660x_Next_Gate_Clock 0xa ++#define NI_660x_Timebase_2_Clock 0x12 /* 100KHz */ ++#define NI_660x_Next_TC_Clock 0x13 ++#define NI_660x_Timebase_3_Clock 0x1e /* 80MHz */ ++#define NI_660x_Logic_Low_Clock 0x1f ++ ++#define ni_660x_max_rtsi_channel 6 ++#define ni_660x_max_source_pin 7 ++ ++static inline unsigned int NI_660x_RTSI_Clock(unsigned int n) ++{ ++ BUG_ON(n > ni_660x_max_rtsi_channel); ++ return (0xb + n); ++} ++ ++static inline unsigned int NI_660x_Source_Pin_Clock(unsigned int n) ++{ ++ BUG_ON(n > ni_660x_max_source_pin); ++ return (0x2 + n); ++} ++ ++/* Clock sources for ni e and m series boards, ++ get bits with Gi_Source_Select_Bits() */ ++#define NI_M_Series_Timebase_1_Clock 0x0 /* 20MHz */ ++#define NI_M_Series_Timebase_2_Clock 0x12 /* 100KHz */ ++#define NI_M_Series_Next_TC_Clock 0x13 ++#define NI_M_Series_Next_Gate_Clock 0x14 /* when Gi_Src_SubSelect = 0 */ ++#define NI_M_Series_PXI_Star_Trigger_Clock 0x14 /* when Gi_Src_SubSelect = 1 */ ++#define NI_M_Series_PXI10_Clock 0x1d ++#define NI_M_Series_Timebase_3_Clock 0x1e /* 80MHz, when Gi_Src_SubSelect = 0 */ ++#define NI_M_Series_Analog_Trigger_Out_Clock 0x1e /* when Gi_Src_SubSelect = 1 */ ++#define NI_M_Series_Logic_Low_Clock 0x1f ++ ++#define ni_m_series_max_pfi_channel 15 ++#define ni_m_series_max_rtsi_channel 7 ++ ++static inline unsigned int NI_M_Series_PFI_Clock(unsigned int n) ++{ ++ BUG_ON(n > ni_m_series_max_pfi_channel); ++ if (n < 10) ++ return 1 + n; ++ else ++ return 0xb + n; ++} ++ ++static inline unsigned int NI_M_Series_RTSI_Clock(unsigned int n) ++{ ++ BUG_ON(n > ni_m_series_max_rtsi_channel); ++ if (n == 7) ++ return 0x1b; ++ else ++ return 0xb + n; ++} ++ ++#define NI_660x_Source_Pin_i_Gate_Select 0x0 ++#define NI_660x_Gate_Pin_i_Gate_Select 0x1 ++#define NI_660x_Next_SRC_Gate_Select 0xa ++#define NI_660x_Next_Out_Gate_Select 0x14 ++#define NI_660x_Logic_Low_Gate_Select 0x1f ++#define ni_660x_max_gate_pin 7 ++ ++static inline unsigned int NI_660x_Gate_Pin_Gate_Select(unsigned int n) ++{ ++ BUG_ON(n > ni_660x_max_gate_pin); ++ return 0x2 + n; ++} ++ ++static inline unsigned int NI_660x_RTSI_Gate_Select(unsigned int n) ++{ ++ BUG_ON(n > ni_660x_max_rtsi_channel); ++ return 0xb + n; ++} ++ ++ ++#define NI_M_Series_Timestamp_Mux_Gate_Select 0x0 ++#define NI_M_Series_AI_START2_Gate_Select 0x12 ++#define NI_M_Series_PXI_Star_Trigger_Gate_Select 0x13 ++#define NI_M_Series_Next_Out_Gate_Select 0x14 ++#define NI_M_Series_AI_START1_Gate_Select 0x1c ++#define NI_M_Series_Next_SRC_Gate_Select 0x1d ++#define NI_M_Series_Analog_Trigger_Out_Gate_Select 0x1e ++#define NI_M_Series_Logic_Low_Gate_Select 0x1f ++ ++static inline unsigned int NI_M_Series_RTSI_Gate_Select(unsigned int n) ++{ ++ BUG_ON(n > ni_m_series_max_rtsi_channel); ++ if (n == 7) ++ return 0x1b; ++ return 0xb + n; ++} ++ ++static inline unsigned int NI_M_Series_PFI_Gate_Select(unsigned int n) ++{ ++ BUG_ON(n > ni_m_series_max_pfi_channel); ++ if (n < 10) ++ return 1 + n; ++ return 0xb + n; ++} ++ ++ ++#define Gi_Source_Select_Shift 2 ++#define Gi_Gate_Select_Shift 7 ++ ++#define Gi_Read_Acknowledges_Irq 0x1 /* not present on 660x */ ++#define Gi_Write_Acknowledges_Irq 0x2 /* not present on 660x */ ++#define Gi_Source_Select_Mask 0x7c ++#define Gi_Gate_Select_Mask (0x1f << Gi_Gate_Select_Shift) ++#define Gi_Gate_Select_Load_Source_Bit 0x1000 ++#define Gi_Or_Gate_Bit 0x2000 ++#define Gi_Output_Polarity_Bit 0x4000 /* set to invert */ ++#define Gi_Source_Polarity_Bit 0x8000 /* set to invert */ ++ ++#define Gi_Source_Select_Bits(x) ((x << Gi_Source_Select_Shift) & \ ++ Gi_Source_Select_Mask) ++#define Gi_Gate_Select_Bits(x) ((x << Gi_Gate_Select_Shift) & \ ++ Gi_Gate_Select_Mask) ++ ++#define Gi_Gating_Mode_Mask 0x3 ++#define Gi_Gating_Disabled_Bits 0x0 ++#define Gi_Level_Gating_Bits 0x1 ++#define Gi_Rising_Edge_Gating_Bits 0x2 ++#define Gi_Falling_Edge_Gating_Bits 0x3 ++#define Gi_Gate_On_Both_Edges_Bit 0x4 /* used in conjunction with ++ rising edge gating mode */ ++#define Gi_Trigger_Mode_for_Edge_Gate_Mask 0x18 ++#define Gi_Edge_Gate_Starts_Stops_Bits 0x0 ++#define Gi_Edge_Gate_Stops_Starts_Bits 0x8 ++#define Gi_Edge_Gate_Starts_Bits 0x10 ++#define Gi_Edge_Gate_No_Starts_or_Stops_Bits 0x18 ++#define Gi_Stop_Mode_Mask 0x60 ++#define Gi_Stop_on_Gate_Bits 0x00 ++#define Gi_Stop_on_Gate_or_TC_Bits 0x20 ++#define Gi_Stop_on_Gate_or_Second_TC_Bits 0x40 ++#define Gi_Load_Source_Select_Bit 0x80 ++#define Gi_Output_Mode_Mask 0x300 ++#define Gi_Output_TC_Pulse_Bits 0x100 ++#define Gi_Output_TC_Toggle_Bits 0x200 ++#define Gi_Output_TC_or_Gate_Toggle_Bits 0x300 ++#define Gi_Counting_Once_Mask 0xc00 ++#define Gi_No_Hardware_Disarm_Bits 0x000 ++#define Gi_Disarm_at_TC_Bits 0x400 ++#define Gi_Disarm_at_Gate_Bits 0x800 ++#define Gi_Disarm_at_TC_or_Gate_Bits 0xc00 ++#define Gi_Loading_On_TC_Bit 0x1000 ++#define Gi_Gate_Polarity_Bit 0x2000 ++#define Gi_Loading_On_Gate_Bit 0x4000 ++#define Gi_Reload_Source_Switching_Bit 0x8000 ++ ++#define NI_660x_Source_Pin_i_Second_Gate_Select 0x0 ++#define NI_660x_Up_Down_Pin_i_Second_Gate_Select 0x1 ++#define NI_660x_Next_SRC_Second_Gate_Select 0xa ++#define NI_660x_Next_Out_Second_Gate_Select 0x14 ++#define NI_660x_Selected_Gate_Second_Gate_Select 0x1e ++#define NI_660x_Logic_Low_Second_Gate_Select 0x1f ++ ++#define ni_660x_max_up_down_pin 7 ++ ++static inline ++unsigned int NI_660x_Up_Down_Pin_Second_Gate_Select(unsigned int n) ++{ ++ BUG_ON(n > ni_660x_max_up_down_pin); ++ return 0x2 + n; ++} ++static inline ++unsigned int NI_660x_RTSI_Second_Gate_Select(unsigned int n) ++{ ++ BUG_ON(n > ni_660x_max_rtsi_channel); ++ return 0xb + n; ++} ++ ++#define Gi_Second_Gate_Select_Shift 7 ++ ++/*FIXME: m-series has a second gate subselect bit */ ++/*FIXME: m-series second gate sources are undocumented (by NI)*/ ++#define Gi_Second_Gate_Mode_Bit 0x1 ++#define Gi_Second_Gate_Select_Mask (0x1f << Gi_Second_Gate_Select_Shift) ++#define Gi_Second_Gate_Polarity_Bit 0x2000 ++#define Gi_Second_Gate_Subselect_Bit 0x4000 /* m-series only */ ++#define Gi_Source_Subselect_Bit 0x8000 /* m-series only */ ++ ++static inline ++unsigned int Gi_Second_Gate_Select_Bits(unsigned int second_gate_select) ++{ ++ return (second_gate_select << Gi_Second_Gate_Select_Shift) & ++ Gi_Second_Gate_Select_Mask; ++} ++ ++#define G0_Save_Bit 0x1 ++#define G1_Save_Bit 0x2 ++#define G0_Counting_Bit 0x4 ++#define G1_Counting_Bit 0x8 ++#define G0_Next_Load_Source_Bit 0x10 ++#define G1_Next_Load_Source_Bit 0x20 ++#define G0_Stale_Data_Bit 0x40 ++#define G1_Stale_Data_Bit 0x80 ++#define G0_Armed_Bit 0x100 ++#define G1_Armed_Bit 0x200 ++#define G0_No_Load_Between_Gates_Bit 0x400 ++#define G1_No_Load_Between_Gates_Bit 0x800 ++#define G0_TC_Error_Bit 0x1000 ++#define G1_TC_Error_Bit 0x2000 ++#define G0_Gate_Error_Bit 0x4000 ++#define G1_Gate_Error_Bit 0x8000 ++ ++static inline unsigned int Gi_Counting_Bit(unsigned int counter_index) ++{ ++ if (counter_index % 2) ++ return G1_Counting_Bit; ++ return G0_Counting_Bit; ++} ++ ++static inline unsigned int Gi_Armed_Bit(unsigned int counter_index) ++{ ++ if (counter_index % 2) ++ return G1_Armed_Bit; ++ return G0_Armed_Bit; ++} ++ ++static inline unsigned int Gi_Next_Load_Source_Bit(unsigned counter_index) ++{ ++ if (counter_index % 2) ++ return G1_Next_Load_Source_Bit; ++ return G0_Next_Load_Source_Bit; ++} ++ ++static inline unsigned int Gi_Stale_Data_Bit(unsigned int counter_index) ++{ ++ if (counter_index % 2) ++ return G1_Stale_Data_Bit; ++ return G0_Stale_Data_Bit; ++} ++ ++static inline unsigned int Gi_TC_Error_Bit(unsigned int counter_index) ++{ ++ if (counter_index % 2) ++ return G1_TC_Error_Bit; ++ return G0_TC_Error_Bit; ++} ++ ++static inline unsigned int Gi_Gate_Error_Bit(unsigned int counter_index) ++{ ++ if (counter_index % 2) ++ return G1_Gate_Error_Bit; ++ return G0_Gate_Error_Bit; ++} ++ ++/* Joint reset register bits */ ++static inline unsigned Gi_Reset_Bit(unsigned int counter_index) ++{ ++ return 0x1 << (2 + (counter_index % 2)); ++} ++ ++#define G0_Output_Bit 0x1 ++#define G1_Output_Bit 0x2 ++#define G0_HW_Save_Bit 0x1000 ++#define G1_HW_Save_Bit 0x2000 ++#define G0_Permanent_Stale_Bit 0x4000 ++#define G1_Permanent_Stale_Bit 0x8000 ++ ++static inline unsigned int Gi_Permanent_Stale_Bit(unsigned ++ counter_index) ++{ ++ if (counter_index % 2) ++ return G1_Permanent_Stale_Bit; ++ return G0_Permanent_Stale_Bit; ++} ++ ++#define Gi_DMA_Enable_Bit 0x1 ++#define Gi_DMA_Write_Bit 0x2 ++#define Gi_DMA_Int_Bit 0x4 ++ ++#define Gi_DMA_Readbank_Bit 0x2000 ++#define Gi_DRQ_Error_Bit 0x4000 ++#define Gi_DRQ_Status_Bit 0x8000 ++ ++#define G0_Gate_Error_Confirm_Bit 0x20 ++#define G0_TC_Error_Confirm_Bit 0x40 ++ ++#define G1_Gate_Error_Confirm_Bit 0x2 ++#define G1_TC_Error_Confirm_Bit 0x4 ++ ++static inline unsigned int Gi_Gate_Error_Confirm_Bit(unsigned int counter_index) ++{ ++ if (counter_index % 2) ++ return G1_Gate_Error_Confirm_Bit; ++ return G0_Gate_Error_Confirm_Bit; ++} ++ ++static inline unsigned int Gi_TC_Error_Confirm_Bit(unsigned int counter_index) ++{ ++ if (counter_index % 2) ++ return G1_TC_Error_Confirm_Bit; ++ return G0_TC_Error_Confirm_Bit; ++} ++ ++/* Bits that are the same in G0/G2 and G1/G3 interrupt acknowledge registers */ ++#define Gi_TC_Interrupt_Ack_Bit 0x4000 ++#define Gi_Gate_Interrupt_Ack_Bit 0x8000 ++ ++#define Gi_Gate_Interrupt_Bit 0x4 ++#define Gi_TC_Bit 0x8 ++#define Gi_Interrupt_Bit 0x8000 ++ ++#define G0_TC_Interrupt_Enable_Bit 0x40 ++#define G0_Gate_Interrupt_Enable_Bit 0x100 ++ ++#define G1_TC_Interrupt_Enable_Bit 0x200 ++#define G1_Gate_Interrupt_Enable_Bit 0x400 ++ ++static inline unsigned int Gi_Gate_Interrupt_Enable_Bit(unsigned int counter_index) ++{ ++ unsigned int bit; ++ ++ if (counter_index % 2) { ++ bit = G1_Gate_Interrupt_Enable_Bit; ++ } else { ++ bit = G0_Gate_Interrupt_Enable_Bit; ++ } ++ return bit; ++} ++ ++#define counter_status_mask (A4L_COUNTER_ARMED | A4L_COUNTER_COUNTING) ++ ++#define NI_USUAL_PFI_SELECT(x) ((x < 10) ? (0x1 + x) : (0xb + x)) ++#define NI_USUAL_RTSI_SELECT(x) ((x < 7 ) ? (0xb + x) : (0x1b + x)) ++ ++/* Mode bits for NI general-purpose counters, set with ++ INSN_CONFIG_SET_COUNTER_MODE */ ++#define NI_GPCT_COUNTING_MODE_SHIFT 16 ++#define NI_GPCT_INDEX_PHASE_BITSHIFT 20 ++#define NI_GPCT_COUNTING_DIRECTION_SHIFT 24 ++ ++#define NI_GPCT_GATE_ON_BOTH_EDGES_BIT 0x4 ++#define NI_GPCT_EDGE_GATE_MODE_MASK 0x18 ++#define NI_GPCT_EDGE_GATE_STARTS_STOPS_BITS 0x0 ++#define NI_GPCT_EDGE_GATE_STOPS_STARTS_BITS 0x8 ++#define NI_GPCT_EDGE_GATE_STARTS_BITS 0x10 ++#define NI_GPCT_EDGE_GATE_NO_STARTS_NO_STOPS_BITS 0x18 ++#define NI_GPCT_STOP_MODE_MASK 0x60 ++#define NI_GPCT_STOP_ON_GATE_BITS 0x00 ++#define NI_GPCT_STOP_ON_GATE_OR_TC_BITS 0x20 ++#define NI_GPCT_STOP_ON_GATE_OR_SECOND_TC_BITS 0x40 ++#define NI_GPCT_LOAD_B_SELECT_BIT 0x80 ++#define NI_GPCT_OUTPUT_MODE_MASK 0x300 ++#define NI_GPCT_OUTPUT_TC_PULSE_BITS 0x100 ++#define NI_GPCT_OUTPUT_TC_TOGGLE_BITS 0x200 ++#define NI_GPCT_OUTPUT_TC_OR_GATE_TOGGLE_BITS 0x300 ++#define NI_GPCT_HARDWARE_DISARM_MASK 0xc00 ++#define NI_GPCT_NO_HARDWARE_DISARM_BITS 0x000 ++#define NI_GPCT_DISARM_AT_TC_BITS 0x400 ++#define NI_GPCT_DISARM_AT_GATE_BITS 0x800 ++#define NI_GPCT_DISARM_AT_TC_OR_GATE_BITS 0xc00 ++#define NI_GPCT_LOADING_ON_TC_BIT 0x1000 ++#define NI_GPCT_LOADING_ON_GATE_BIT 0x4000 ++#define NI_GPCT_COUNTING_MODE_MASK 0x7 << NI_GPCT_COUNTING_MODE_SHIFT ++#define NI_GPCT_COUNTING_MODE_NORMAL_BITS 0x0 << NI_GPCT_COUNTING_MODE_SHIFT ++#define NI_GPCT_COUNTING_MODE_QUADRATURE_X1_BITS 0x1 << NI_GPCT_COUNTING_MODE_SHIFT ++#define NI_GPCT_COUNTING_MODE_QUADRATURE_X2_BITS 0x2 << NI_GPCT_COUNTING_MODE_SHIFT ++#define NI_GPCT_COUNTING_MODE_QUADRATURE_X4_BITS 0x3 << NI_GPCT_COUNTING_MODE_SHIFT ++#define NI_GPCT_COUNTING_MODE_TWO_PULSE_BITS 0x4 << NI_GPCT_COUNTING_MODE_SHIFT ++#define NI_GPCT_COUNTING_MODE_SYNC_SOURCE_BITS 0x6 << NI_GPCT_COUNTING_MODE_SHIFT ++#define NI_GPCT_INDEX_PHASE_MASK 0x3 << NI_GPCT_INDEX_PHASE_BITSHIFT ++#define NI_GPCT_INDEX_PHASE_LOW_A_LOW_B_BITS 0x0 << NI_GPCT_INDEX_PHASE_BITSHIFT ++#define NI_GPCT_INDEX_PHASE_LOW_A_HIGH_B_BITS 0x1 << NI_GPCT_INDEX_PHASE_BITSHIFT ++#define NI_GPCT_INDEX_PHASE_HIGH_A_LOW_B_BITS 0x2 << NI_GPCT_INDEX_PHASE_BITSHIFT ++#define NI_GPCT_INDEX_PHASE_HIGH_A_HIGH_B_BITS 0x3 << NI_GPCT_INDEX_PHASE_BITSHIFT ++#define NI_GPCT_INDEX_ENABLE_BIT 0x400000 ++#define NI_GPCT_COUNTING_DIRECTION_MASK 0x3 << NI_GPCT_COUNTING_DIRECTION_SHIFT ++#define NI_GPCT_COUNTING_DIRECTION_DOWN_BITS 0x00 << NI_GPCT_COUNTING_DIRECTION_SHIFT ++#define NI_GPCT_COUNTING_DIRECTION_UP_BITS 0x1 << NI_GPCT_COUNTING_DIRECTION_SHIFT ++#define NI_GPCT_COUNTING_DIRECTION_HW_UP_DOWN_BITS 0x2 << NI_GPCT_COUNTING_DIRECTION_SHIFT ++#define NI_GPCT_COUNTING_DIRECTION_HW_GATE_BITS 0x3 << NI_GPCT_COUNTING_DIRECTION_SHIFT ++#define NI_GPCT_RELOAD_SOURCE_MASK 0xc000000 ++#define NI_GPCT_RELOAD_SOURCE_FIXED_BITS 0x0 ++#define NI_GPCT_RELOAD_SOURCE_SWITCHING_BITS 0x4000000 ++#define NI_GPCT_RELOAD_SOURCE_GATE_SELECT_BITS 0x8000000 ++#define NI_GPCT_OR_GATE_BIT 0x10000000 ++#define NI_GPCT_INVERT_OUTPUT_BIT 0x20000000 ++ ++/* Bits for setting a clock source with INSN_CONFIG_SET_CLOCK_SRC when ++ using NI general-purpose counters. */ ++#define NI_GPCT_CLOCK_SRC_SELECT_MASK 0x3f ++#define NI_GPCT_TIMEBASE_1_CLOCK_SRC_BITS 0x0 ++#define NI_GPCT_TIMEBASE_2_CLOCK_SRC_BITS 0x1 ++#define NI_GPCT_TIMEBASE_3_CLOCK_SRC_BITS 0x2 ++#define NI_GPCT_LOGIC_LOW_CLOCK_SRC_BITS 0x3 ++#define NI_GPCT_NEXT_GATE_CLOCK_SRC_BITS 0x4 ++#define NI_GPCT_NEXT_TC_CLOCK_SRC_BITS 0x5 ++#define NI_GPCT_SOURCE_PIN_i_CLOCK_SRC_BITS 0x6 /* NI 660x-specific */ ++#define NI_GPCT_PXI10_CLOCK_SRC_BITS 0x7 ++#define NI_GPCT_PXI_STAR_TRIGGER_CLOCK_SRC_BITS 0x8 ++#define NI_GPCT_ANALOG_TRIGGER_OUT_CLOCK_SRC_BITS 0x9 ++#define NI_GPCT_PRESCALE_MODE_CLOCK_SRC_MASK 0x30000000 ++#define NI_GPCT_NO_PRESCALE_CLOCK_SRC_BITS 0x0 ++#define NI_GPCT_PRESCALE_X2_CLOCK_SRC_BITS 0x10000000 /* divide source by 2 */ ++#define NI_GPCT_PRESCALE_X8_CLOCK_SRC_BITS 0x20000000 /* divide source by 8 */ ++#define NI_GPCT_INVERT_CLOCK_SRC_BIT 0x80000000 ++#define NI_GPCT_SOURCE_PIN_CLOCK_SRC_BITS(x) (0x10 + x) ++#define NI_GPCT_RTSI_CLOCK_SRC_BITS(x) (0x18 + x) ++#define NI_GPCT_PFI_CLOCK_SRC_BITS(x) (0x20 + x) ++ ++/* Possibilities for setting a gate source with ++ INSN_CONFIG_SET_GATE_SRC when using NI general-purpose counters. ++ May be bitwise-or'd with CR_EDGE or CR_INVERT. */ ++/* M-series gates */ ++#define NI_GPCT_TIMESTAMP_MUX_GATE_SELECT 0x0 ++#define NI_GPCT_AI_START2_GATE_SELECT 0x12 ++#define NI_GPCT_PXI_STAR_TRIGGER_GATE_SELECT 0x13 ++#define NI_GPCT_NEXT_OUT_GATE_SELECT 0x14 ++#define NI_GPCT_AI_START1_GATE_SELECT 0x1c ++#define NI_GPCT_NEXT_SOURCE_GATE_SELECT 0x1d ++#define NI_GPCT_ANALOG_TRIGGER_OUT_GATE_SELECT 0x1e ++#define NI_GPCT_LOGIC_LOW_GATE_SELECT 0x1f ++/* More gates for 660x */ ++#define NI_GPCT_SOURCE_PIN_i_GATE_SELECT 0x100 ++#define NI_GPCT_GATE_PIN_i_GATE_SELECT 0x101 ++/* More gates for 660x "second gate" */ ++#define NI_GPCT_UP_DOWN_PIN_i_GATE_SELECT 0x201 ++#define NI_GPCT_SELECTED_GATE_GATE_SELECT 0x21e ++/* M-series "second gate" sources are unknown, we should add them here ++ with an offset of 0x300 when known. */ ++#define NI_GPCT_DISABLED_GATE_SELECT 0x8000 ++#define NI_GPCT_GATE_PIN_GATE_SELECT(x) (0x102 + x) ++#define NI_GPCT_RTSI_GATE_SELECT(x) NI_USUAL_RTSI_SELECT(x) ++#define NI_GPCT_PFI_GATE_SELECT(x) NI_USUAL_PFI_SELECT(x) ++#define NI_GPCT_UP_DOWN_PIN_GATE_SELECT(x) (0x202 + x) ++ ++/* Possibilities for setting a source with INSN_CONFIG_SET_OTHER_SRC ++ when using NI general-purpose counters. */ ++#define NI_GPCT_SOURCE_ENCODER_A 0 ++#define NI_GPCT_SOURCE_ENCODER_B 1 ++#define NI_GPCT_SOURCE_ENCODER_Z 2 ++/* M-series gates */ ++/* Still unknown, probably only need NI_GPCT_PFI_OTHER_SELECT */ ++#define NI_GPCT_DISABLED_OTHER_SELECT 0x8000 ++#define NI_GPCT_PFI_OTHER_SELECT(x) NI_USUAL_PFI_SELECT(x) ++ ++/* Start sources for ni general-purpose counters for use with ++ INSN_CONFIG_ARM */ ++#define NI_GPCT_ARM_IMMEDIATE 0x0 ++/* Start both the counter and the adjacent paired counter ++ simultaneously */ ++#define NI_GPCT_ARM_PAIRED_IMMEDIATE 0x1 ++/* NI doesn't document bits for selecting hardware arm triggers. If ++ the NI_GPCT_ARM_UNKNOWN bit is set, we will pass the least significant ++ bits (3 bits for 660x or 5 bits for m-series) through to the ++ hardware. This will at least allow someone to figure out what the bits ++ do later. */ ++#define NI_GPCT_ARM_UNKNOWN 0x1000 ++ ++/* Digital filtering options for ni 660x for use with ++ INSN_CONFIG_FILTER. */ ++#define NI_GPCT_FILTER_OFF 0x0 ++#define NI_GPCT_FILTER_TIMEBASE_3_SYNC 0x1 ++#define NI_GPCT_FILTER_100x_TIMEBASE_1 0x2 ++#define NI_GPCT_FILTER_20x_TIMEBASE_1 0x3 ++#define NI_GPCT_FILTER_10x_TIMEBASE_1 0x4 ++#define NI_GPCT_FILTER_2x_TIMEBASE_1 0x5 ++#define NI_GPCT_FILTER_2x_TIMEBASE_3 0x6 ++ ++/* Master clock sources for ni mio boards and ++ INSN_CONFIG_SET_CLOCK_SRC */ ++#define NI_MIO_INTERNAL_CLOCK 0 ++#define NI_MIO_RTSI_CLOCK 1 ++/* Doesn't work for m-series, use NI_MIO_PLL_RTSI_CLOCK() the ++ NI_MIO_PLL_* sources are m-series only */ ++#define NI_MIO_PLL_PXI_STAR_TRIGGER_CLOCK 2 ++#define NI_MIO_PLL_PXI10_CLOCK 3 ++#define NI_MIO_PLL_RTSI0_CLOCK 4 ++ ++#define NI_MIO_PLL_RTSI_CLOCK(x) (NI_MIO_PLL_RTSI0_CLOCK + (x)) ++ ++/* Signals which can be routed to an NI RTSI pin with ++ INSN_CONFIG_SET_ROUTING. The numbers assigned are not arbitrary, they ++ correspond to the bits required to program the board. */ ++#define NI_RTSI_OUTPUT_ADR_START1 0 ++#define NI_RTSI_OUTPUT_ADR_START2 1 ++#define NI_RTSI_OUTPUT_SCLKG 2 ++#define NI_RTSI_OUTPUT_DACUPDN 3 ++#define NI_RTSI_OUTPUT_DA_START1 4 ++#define NI_RTSI_OUTPUT_G_SRC0 5 ++#define NI_RTSI_OUTPUT_G_GATE0 6 ++#define NI_RTSI_OUTPUT_RGOUT0 7 ++#define NI_RTSI_OUTPUT_RTSI_BRD_0 8 ++/* Pre-m-series always have RTSI clock on line 7 */ ++#define NI_RTSI_OUTPUT_RTSI_OSC 12 ++ ++#define NI_RTSI_OUTPUT_RTSI_BRD(x) (NI_RTSI_OUTPUT_RTSI_BRD_0 + (x)) ++ ++ ++int a4l_ni_tio_rinsn(struct ni_gpct *counter, struct a4l_kernel_instruction *insn); ++int a4l_ni_tio_winsn(struct ni_gpct *counter, struct a4l_kernel_instruction *insn); ++int a4l_ni_tio_insn_config(struct ni_gpct *counter, struct a4l_kernel_instruction *insn); ++void a4l_ni_tio_init_counter(struct ni_gpct *counter); ++ ++struct ni_gpct_device *a4l_ni_gpct_device_construct(struct a4l_device * dev, ++ void (*write_register) (struct ni_gpct * counter, unsigned int bits, ++ enum ni_gpct_register reg), ++ unsigned int (*read_register) (struct ni_gpct * counter, ++ enum ni_gpct_register reg), enum ni_gpct_variant variant, ++ unsigned int num_counters); ++void a4l_ni_gpct_device_destroy(struct ni_gpct_device *counter_dev); ++ ++#if (defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE) || \ ++ defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE_MODULE)) ++ ++extern struct a4l_cmd_desc a4l_ni_tio_cmd_mask; ++ ++int a4l_ni_tio_input_inttrig(struct ni_gpct *counter, lsampl_t trignum); ++int a4l_ni_tio_cmd(struct ni_gpct *counter, struct a4l_cmd_desc *cmd); ++int a4l_ni_tio_cmdtest(struct ni_gpct *counter, struct a4l_cmd_desc *cmd); ++int a4l_ni_tio_cancel(struct ni_gpct *counter); ++ ++void a4l_ni_tio_handle_interrupt(struct ni_gpct *counter, struct a4l_device *dev); ++void a4l_ni_tio_set_mite_channel(struct ni_gpct *counter, ++ struct mite_channel *mite_chan); ++void a4l_ni_tio_acknowledge_and_confirm(struct ni_gpct *counter, ++ int *gate_error, ++ int *tc_error, ++ int *perm_stale_data, int *stale_data); ++ ++#endif /* CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE */ ++ ++#endif /* !__ANALOGY_NI_TIO_H__ */ +--- linux/drivers/xenomai/analogy/national_instruments/ni_670x.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/analogy/national_instruments/ni_670x.c 2021-04-07 16:01:27.844633285 +0800 +@@ -0,0 +1,443 @@ ++/* ++ comedi/drivers/ni_670x.c ++ Hardware driver for NI 670x devices ++ ++ COMEDI - Linux Control and Measurement Device Interface ++ Copyright (C) 1997-2001 David A. Schleef ++ ++ This program is free software; you can redistribute it and/or modify ++ it under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 2 of the License, or ++ (at your option) any later version. ++ ++ This program is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with this program; if not, write to the Free Software ++ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ ++*/ ++/* ++Driver: ni_670x ++Description: National Instruments 670x ++Author: Bart Joris ++Updated: Wed, 11 Dec 2002 18:25:35 -0800 ++Devices: [National Instruments] PCI-6703 (ni_670x), PCI-6704 ++Status: unknown ++ ++Commands are not supported. ++*/ ++ ++/* ++ Bart Joris Last updated on 20/08/2001 ++ ++ Manuals: ++ ++ 322110a.pdf PCI/PXI-6704 User Manual ++ 322110b.pdf PCI/PXI-6703/6704 User Manual ++*/ ++ ++/* ++ * Integration with Xenomai/Analogy layer based on the ++ * comedi driver. Adaptation made by ++ * Julien Delange ++ */ ++ ++#include ++#include ++#include ++ ++#include "../intel/8255.h" ++#include "ni_mio.h" ++#include "mite.h" ++ ++#define PCIMIO_IRQ_POLARITY 1 ++ ++#define AO_VALUE_OFFSET 0x00 ++#define AO_CHAN_OFFSET 0x0c ++#define AO_STATUS_OFFSET 0x10 ++#define AO_CONTROL_OFFSET 0x10 ++#define DIO_PORT0_DIR_OFFSET 0x20 ++#define DIO_PORT0_DATA_OFFSET 0x24 ++#define DIO_PORT1_DIR_OFFSET 0x28 ++#define DIO_PORT1_DATA_OFFSET 0x2c ++#define MISC_STATUS_OFFSET 0x14 ++#define MISC_CONTROL_OFFSET 0x14 ++ ++/* Board description*/ ++ ++struct ni_670x_board { ++ unsigned short device_id; ++ const char *name; ++ unsigned short ao_chans; ++ unsigned short ao_bits; ++}; ++ ++#define thisboard ((struct ni_670x_board *)dev->board_ptr) ++ ++struct ni_670x_private { ++ struct mite_struct *mite; ++ int boardtype; ++ int dio; ++ unsigned int ao_readback[32]; ++ ++ /* ++ * Added when porting to xenomai ++ */ ++ int irq_polarity; ++ int irq_pin; ++ int irq; ++ struct ni_670x_board *board_ptr; ++ /* ++ * END OF ADDED when porting to xenomai ++ */ ++}; ++ ++struct ni_670x_subd_priv { ++ int io_bits; ++ unsigned int state; ++ uint16_t readback[2]; ++ uint16_t config; ++ void* counter; ++}; ++ ++static int ni_670x_ao_winsn(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn); ++static int ni_670x_ao_rinsn(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn); ++static int ni_670x_dio_insn_bits(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn); ++static int ni_670x_dio_insn_config(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn); ++ ++static struct a4l_channels_desc ni_670x_desc_dio = { ++ .mode = A4L_CHAN_GLOBAL_CHANDESC, ++ .length = 8, ++ .chans = { ++ {A4L_CHAN_AREF_GROUND, 1}, ++ }, ++}; ++ ++static struct a4l_channels_desc ni_670x_desc_ao = { ++ .mode = A4L_CHAN_GLOBAL_CHANDESC, ++ .length = 0, /* initialized later according to the board found */ ++ .chans = { ++ {A4L_CHAN_AREF_GROUND, 16}, ++ }, ++}; ++ ++ ++static struct a4l_rngtab range_0_20mA = { 1, {RANGE_mA(0, 20)} }; ++static struct a4l_rngtab rng_bipolar10 = { 1, {RANGE_V(-10, 10) }}; ++ ++struct a4l_rngtab *range_table_list[32] = { ++ &rng_bipolar10, &rng_bipolar10, &rng_bipolar10, &rng_bipolar10, ++ &rng_bipolar10, &rng_bipolar10, &rng_bipolar10, &rng_bipolar10, ++ &rng_bipolar10, &rng_bipolar10, &rng_bipolar10, &rng_bipolar10, ++ &rng_bipolar10, &rng_bipolar10, &rng_bipolar10, &rng_bipolar10, ++ &range_0_20mA, &range_0_20mA, &range_0_20mA, &range_0_20mA, ++ &range_0_20mA, &range_0_20mA, &range_0_20mA, &range_0_20mA, ++ &range_0_20mA, &range_0_20mA, &range_0_20mA, &range_0_20mA, ++ &range_0_20mA, &range_0_20mA, &range_0_20mA, &range_0_20mA}; ++ ++static A4L_RNGDESC(32) ni670x_ao_desc; ++ ++static void setup_subd_ao(struct a4l_subdevice *subd) ++{ ++ int i; ++ int nchans; ++ ++ nchans = ((struct ni_670x_private*)(subd->dev->priv))->board_ptr->ao_chans; ++ subd->flags = A4L_SUBD_AO; ++ subd->chan_desc = &ni_670x_desc_ao; ++ subd->chan_desc->length = nchans; ++ if (nchans == 32) { ++ ++ subd->rng_desc = (struct a4l_rngdesc*) &ni670x_ao_desc; ++ subd->rng_desc->mode = A4L_RNG_PERCHAN_RNGDESC; ++ for (i = 0 ; i < 16 ; i++) { ++ subd->rng_desc->rngtabs[i] =&rng_bipolar10; ++ subd->rng_desc->rngtabs[16+i] =&range_0_20mA; ++ } ++ } else ++ subd->rng_desc = &a4l_range_bipolar10; ++ ++ subd->insn_write = &ni_670x_ao_winsn; ++ subd->insn_read = &ni_670x_ao_rinsn; ++} ++ ++static void setup_subd_dio(struct a4l_subdevice *s) ++{ ++ /* Digital i/o subdevice */ ++ s->flags = A4L_SUBD_DIO; ++ s->chan_desc = &ni_670x_desc_dio; ++ s->rng_desc = &range_digital; ++ s->insn_bits = ni_670x_dio_insn_bits; ++ s->insn_config = ni_670x_dio_insn_config; ++} ++ ++struct setup_subd { ++ void (*setup_func) (struct a4l_subdevice *); ++ int sizeof_priv; ++}; ++ ++static struct setup_subd setup_subds[2] = { ++ { ++ .setup_func = setup_subd_ao, ++ .sizeof_priv = sizeof(struct ni_670x_subd_priv), ++ }, ++ { ++ .setup_func = setup_subd_dio, ++ .sizeof_priv = sizeof(struct ni_670x_subd_priv), ++ }, ++}; ++ ++static const struct ni_670x_board ni_670x_boards[] = { ++ { ++ .device_id = 0x2c90, ++ .name = "PCI-6703", ++ .ao_chans = 16, ++ .ao_bits = 16, ++ }, ++ { ++ .device_id = 0x1920, ++ .name = "PXI-6704", ++ .ao_chans = 32, ++ .ao_bits = 16, ++ }, ++ { ++ .device_id = 0x1290, ++ .name = "PCI-6704", ++ .ao_chans = 32, ++ .ao_bits = 16, ++ }, ++}; ++ ++#define n_ni_670x_boards ((sizeof(ni_670x_boards)/sizeof(ni_670x_boards[0]))) ++ ++static const struct pci_device_id ni_670x_pci_table[] = { ++ {PCI_DEVICE(PCI_VENDOR_ID_NI, 0x2c90)}, ++ {PCI_DEVICE(PCI_VENDOR_ID_NI, 0x1920)}, ++ {0} ++}; ++ ++MODULE_DEVICE_TABLE(pci, ni_670x_pci_table); ++ ++#define devpriv ((struct ni_670x_private *)dev->priv) ++ ++static inline struct ni_670x_private *private(struct a4l_device *dev) ++{ ++ return (struct ni_670x_private*) dev->priv; ++} ++ ++ ++static int ni_670x_attach (struct a4l_device *dev, a4l_lnkdesc_t *arg); ++static int ni_670x_detach(struct a4l_device *dev); ++ ++static struct a4l_driver ni_670x_drv = { ++ .owner = THIS_MODULE, ++ .board_name = "analogy_ni_670x", ++ .driver_name = "ni_670x", ++ .attach = ni_670x_attach, ++ .detach = ni_670x_detach, ++ .privdata_size = sizeof(struct ni_670x_private), ++}; ++ ++static int __init driver_ni_670x_init_module(void) ++{ ++ return a4l_register_drv (&ni_670x_drv); ++} ++ ++static void __exit driver_ni_670x_cleanup_module(void) ++{ ++ a4l_unregister_drv (&ni_670x_drv); ++} ++ ++module_init(driver_ni_670x_init_module); ++module_exit(driver_ni_670x_cleanup_module); ++ ++static int ni_670x_attach (struct a4l_device *dev, a4l_lnkdesc_t *arg) ++{ ++ int ret, bus, slot, i, irq; ++ struct mite_struct *mite; ++ struct ni_670x_board* board = NULL; ++ int err; ++ ++ if(arg->opts == NULL || arg->opts_size == 0) ++ bus = slot = 0; ++ else { ++ bus = arg->opts_size >= sizeof(unsigned long) ? ++ ((unsigned long *)arg->opts)[0] : 0; ++ slot = arg->opts_size >= sizeof(unsigned long) * 2 ? ++ ((unsigned long *)arg->opts)[1] : 0; ++ } ++ ++ a4l_info(dev, "ni670x attach procedure started(bus=%d/slot=%d)...\n", ++ bus, slot); ++ ++ mite = NULL; ++ ++ for(i = 0; i < n_ni_670x_boards && mite == NULL; i++) { ++ mite = a4l_mite_find_device(bus, ++ slot, ni_670x_boards[i].device_id); ++ board = (struct ni_670x_board*) &ni_670x_boards[i]; ++ } ++ ++ if(mite == NULL) { ++ a4l_err(dev, "%s: cannot find the MITE device\n", __FUNCTION__); ++ return -ENOENT; ++ } ++ ++ a4l_info(dev, "Found device %d %s\n", i, ni_670x_boards[i].name); ++ ++ devpriv->irq_polarity = PCIMIO_IRQ_POLARITY; ++ devpriv->irq_pin = 0; ++ ++ devpriv->mite = mite; ++ devpriv->board_ptr = board; ++ ++ ret = a4l_mite_setup(devpriv->mite, 0); ++ if (ret < 0) { ++ a4l_err(dev, "%s: error setting up mite\n", __FUNCTION__); ++ return ret; ++ } ++ ++ irq = mite_irq(devpriv->mite); ++ devpriv->irq = irq; ++ ++ a4l_info(dev, "found %s board\n", board->name); ++ ++ for (i = 0; i < 2; i++) { ++ struct a4l_subdevice *subd = ++ a4l_alloc_subd(setup_subds[i].sizeof_priv, NULL); ++ ++ if (subd == NULL) { ++ a4l_err(dev, ++ "%s: cannot allocate subdevice\n", ++ __FUNCTION__); ++ return -ENOMEM; ++ } ++ ++ err = a4l_add_subd(dev, subd); ++ if (err != i) { ++ a4l_err(dev, ++ "%s: cannot add subdevice\n", ++ __FUNCTION__); ++ return err; ++ } ++ ++ setup_subds[i].setup_func (subd); ++ } ++ ++ /* Config of misc registers */ ++ writel(0x10, devpriv->mite->daq_io_addr + MISC_CONTROL_OFFSET); ++ /* Config of ao registers */ ++ writel(0x00, devpriv->mite->daq_io_addr + AO_CONTROL_OFFSET); ++ ++ a4l_info(dev, "ni670x attached\n"); ++ ++ return 0; ++} ++ ++static int ni_670x_detach(struct a4l_device *dev) ++{ ++ a4l_info(dev, "ni670x detach procedure started...\n"); ++ ++ if(dev->priv != NULL && devpriv->mite != NULL) ++ a4l_mite_unsetup(devpriv->mite); ++ ++ a4l_info(dev, "ni670x detach procedure succeeded...\n"); ++ ++ return 0; ++} ++ ++ ++static int ni_670x_dio_insn_config(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn) ++{ ++ struct a4l_device *dev = subd->dev; ++ unsigned int *data = (unsigned int *)insn->data; ++ int chan = CR_CHAN(insn->chan_desc); ++ struct ni_670x_subd_priv *subdpriv = ++ (struct ni_670x_subd_priv *)subd->priv; ++ ++ switch (data[0]) { ++ case A4L_INSN_CONFIG_DIO_OUTPUT: ++ subdpriv->io_bits |= 1 << chan; ++ break; ++ case A4L_INSN_CONFIG_DIO_INPUT: ++ subdpriv->io_bits &= ~(1 << chan); ++ break; ++ case A4L_INSN_CONFIG_DIO_QUERY: ++ data[1] = (subdpriv->io_bits & (1 << chan)) ? ++ A4L_OUTPUT : A4L_INPUT; ++ return 0; ++ break; ++ default: ++ return -EINVAL; ++ break; ++ } ++ ++ writel(subdpriv->io_bits, ++ devpriv->mite->daq_io_addr + DIO_PORT0_DIR_OFFSET); ++ ++ return 0; ++} ++ ++static int ni_670x_ao_winsn(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn) ++{ ++ int i; ++ unsigned int tmp; ++ unsigned int* dtmp; ++ int chan; ++ dtmp = (unsigned int*)insn->data; ++ chan = CR_CHAN(insn->chan_desc); ++ ++ /* Channel number mapping : ++ ++ NI 6703/ NI 6704 | NI 6704 Only ++ ---------------------------------------------------- ++ vch(0) : 0 | ich(16) : 1 ++ vch(1) : 2 | ich(17) : 3 ++ . : . | . . ++ . : . | . . ++ . : . | . . ++ vch(15) : 30 | ich(31) : 31 */ ++ ++ for (i = 0; i < insn->data_size / sizeof(unsigned int); i++) { ++ ++ tmp = dtmp[i]; ++ ++ /* First write in channel register which channel to use */ ++ writel(((chan & 15) << 1) | ((chan & 16) >> 4), ++ private (subd->dev)->mite->daq_io_addr + AO_CHAN_OFFSET); ++ ++ /* write channel value */ ++ writel(dtmp[i], ++ private(subd->dev)->mite->daq_io_addr + AO_VALUE_OFFSET); ++ private(subd->dev)->ao_readback[chan] = tmp; ++ } ++ ++ return 0; ++} ++ ++static int ni_670x_ao_rinsn(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn) ++{ ++ int i; ++ unsigned int* dtmp; ++ int chan = CR_CHAN(insn->chan_desc); ++ ++ dtmp = (unsigned int*)insn->data; ++ ++ for (i = 0; i < insn->data_size / sizeof(unsigned int); i++) ++ dtmp[i] = private(subd->dev)->ao_readback[chan]; ++ ++ return 0; ++} ++ ++ ++static int ni_670x_dio_insn_bits(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn) ++{ ++ return -ENOSYS; ++} ++ ++MODULE_DESCRIPTION("Analogy driver for NI670x series cards"); ++MODULE_LICENSE("GPL"); +--- linux/drivers/xenomai/analogy/national_instruments/ni_mio.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/analogy/national_instruments/ni_mio.h 2021-04-07 16:01:27.839633292 +0800 +@@ -0,0 +1,122 @@ ++/* ++ * Hardware driver for NI Mite PCI interface chip ++ * Copyright (C) 1999 David A. Schleef ++ * ++ * This code is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * This code is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#ifndef __ANALOGY_NI_MIO_H__ ++#define __ANALOGY_NI_MIO_H__ ++ ++/* Debug stuff */ ++ ++#ifdef CONFIG_DEBUG_MIO ++#define MDPRINTK(fmt, args...) rtdm_printk(format, ##args) ++#else /* !CONFIG_DEBUG_MIO */ ++#define MDPRINTK(fmt, args...) ++#endif /* CONFIG_DEBUG_MIO */ ++ ++/* Subdevice related defines */ ++ ++#define AIMODE_NONE 0 ++#define AIMODE_HALF_FULL 1 ++#define AIMODE_SCAN 2 ++#define AIMODE_SAMPLE 3 ++ ++#define NI_AI_SUBDEV 0 ++#define NI_AO_SUBDEV 1 ++#define NI_DIO_SUBDEV 2 ++#define NI_8255_DIO_SUBDEV 3 ++#define NI_UNUSED_SUBDEV 4 ++#define NI_CALIBRATION_SUBDEV 5 ++#define NI_EEPROM_SUBDEV 6 ++#define NI_PFI_DIO_SUBDEV 7 ++#define NI_CS5529_CALIBRATION_SUBDEV 8 ++#define NI_SERIAL_SUBDEV 9 ++#define NI_RTSI_SUBDEV 10 ++#define NI_GPCT0_SUBDEV 11 ++#define NI_GPCT1_SUBDEV 12 ++#define NI_FREQ_OUT_SUBDEV 13 ++#define NI_NUM_SUBDEVICES 14 ++ ++#define NI_GPCT_SUBDEV(x) ((x == 1) ? NI_GPCT1_SUBDEV : NI_GPCT0_SUBDEV) ++ ++#define TIMEBASE_1_NS 50 ++#define TIMEBASE_2_NS 10000 ++ ++#define SERIAL_DISABLED 0 ++#define SERIAL_600NS 600 ++#define SERIAL_1_2US 1200 ++#define SERIAL_10US 10000 ++ ++/* PFI digital filtering options for ni m-series for use with ++ INSN_CONFIG_FILTER. */ ++#define NI_PFI_FILTER_OFF 0x0 ++#define NI_PFI_FILTER_125ns 0x1 ++#define NI_PFI_FILTER_6425ns 0x2 ++#define NI_PFI_FILTER_2550us 0x3 ++ ++/* Signals which can be routed to an NI PFI pin on an m-series board ++ with INSN_CONFIG_SET_ROUTING. These numbers are also returned by ++ INSN_CONFIG_GET_ROUTING on pre-m-series boards, even though their ++ routing cannot be changed. The numbers assigned are not arbitrary, ++ they correspond to the bits required to program the board. */ ++#define NI_PFI_OUTPUT_PFI_DEFAULT 0 ++#define NI_PFI_OUTPUT_AI_START1 1 ++#define NI_PFI_OUTPUT_AI_START2 2 ++#define NI_PFI_OUTPUT_AI_CONVERT 3 ++#define NI_PFI_OUTPUT_G_SRC1 4 ++#define NI_PFI_OUTPUT_G_GATE1 5 ++#define NI_PFI_OUTPUT_AO_UPDATE_N 6 ++#define NI_PFI_OUTPUT_AO_START1 7 ++#define NI_PFI_OUTPUT_AI_START_PULSE 8 ++#define NI_PFI_OUTPUT_G_SRC0 9 ++#define NI_PFI_OUTPUT_G_GATE0 10 ++#define NI_PFI_OUTPUT_EXT_STROBE 11 ++#define NI_PFI_OUTPUT_AI_EXT_MUX_CLK 12 ++#define NI_PFI_OUTPUT_GOUT0 13 ++#define NI_PFI_OUTPUT_GOUT1 14 ++#define NI_PFI_OUTPUT_FREQ_OUT 15 ++#define NI_PFI_OUTPUT_PFI_DO 16 ++#define NI_PFI_OUTPUT_I_ATRIG 17 ++#define NI_PFI_OUTPUT_RTSI0 18 ++#define NI_PFI_OUTPUT_PXI_STAR_TRIGGER_IN 26 ++#define NI_PFI_OUTPUT_SCXI_TRIG1 27 ++#define NI_PFI_OUTPUT_DIO_CHANGE_DETECT_RTSI 28 ++#define NI_PFI_OUTPUT_CDI_SAMPLE 29 ++#define NI_PFI_OUTPUT_CDO_UPDATE 30 ++ ++static inline unsigned int NI_PFI_OUTPUT_RTSI(unsigned rtsi_channel) { ++ return NI_PFI_OUTPUT_RTSI0 + rtsi_channel; ++} ++ ++/* Ranges declarations */ ++ ++extern struct a4l_rngdesc a4l_range_ni_E_ai; ++extern struct a4l_rngdesc a4l_range_ni_E_ai_limited; ++extern struct a4l_rngdesc a4l_range_ni_E_ai_limited14; ++extern struct a4l_rngdesc a4l_range_ni_E_ai_bipolar4; ++extern struct a4l_rngdesc a4l_range_ni_E_ai_611x; ++extern struct a4l_rngdesc range_ni_E_ai_622x; ++extern struct a4l_rngdesc range_ni_E_ai_628x; ++extern struct a4l_rngdesc a4l_range_ni_S_ai_6143; ++extern struct a4l_rngdesc a4l_range_ni_E_ao_ext; ++ ++/* Misc functions declarations */ ++ ++int a4l_ni_E_interrupt(unsigned int irq, void *d); ++int a4l_ni_E_init(struct a4l_device *dev); ++ ++ ++#endif /* !__ANALOGY_NI_MIO_H__ */ +--- linux/drivers/xenomai/analogy/national_instruments/Makefile 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/analogy/national_instruments/Makefile 2021-04-07 16:01:27.834633299 +0800 +@@ -0,0 +1,16 @@ ++ ++ccflags-y += -Idrivers/xenomai/analogy ++ ++obj-$(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE) += analogy_ni_mite.o ++obj-$(CONFIG_XENO_DRIVERS_ANALOGY_NI_TIO) += analogy_ni_tio.o ++obj-$(CONFIG_XENO_DRIVERS_ANALOGY_NI_MIO) += analogy_ni_mio.o ++obj-$(CONFIG_XENO_DRIVERS_ANALOGY_NI_PCIMIO) += analogy_ni_pcimio.o ++obj-$(CONFIG_XENO_DRIVERS_ANALOGY_NI_670x) += analogy_ni_670x.o ++obj-$(CONFIG_XENO_DRIVERS_ANALOGY_NI_660x) += analogy_ni_660x.o ++ ++analogy_ni_mite-y := mite.o ++analogy_ni_tio-y := tio_common.o ++analogy_ni_mio-y := mio_common.o ++analogy_ni_pcimio-y := pcimio.o ++analogy_ni_670x-y := ni_670x.o ++analogy_ni_660x-y := ni_660x.o +--- linux/drivers/xenomai/analogy/national_instruments/ni_660x.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/analogy/national_instruments/ni_660x.c 2021-04-07 16:01:27.830633305 +0800 +@@ -0,0 +1,1481 @@ ++/* ++ * comedi/drivers/ni_660x.c ++ * Hardware driver for NI 660x devices ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++/* ++ * Driver: ni_660x ++ * Description: National Instruments 660x counter/timer boards ++ * Devices: ++ * [National Instruments] PCI-6601 (ni_660x), PCI-6602, PXI-6602, ++ * PXI-6608 ++ * Author: J.P. Mellor , ++ * Herman.Bruyninckx@mech.kuleuven.ac.be, ++ * Wim.Meeussen@mech.kuleuven.ac.be, ++ * Klaas.Gadeyne@mech.kuleuven.ac.be, ++ * Frank Mori Hess ++ * Updated: Thu Oct 18 12:56:06 EDT 2007 ++ * Status: experimental ++ ++ * Encoders work. PulseGeneration (both single pulse and pulse train) ++ * works. Buffered commands work for input but not output. ++ ++ * References: ++ * DAQ 660x Register-Level Programmer Manual (NI 370505A-01) ++ * DAQ 6601/6602 User Manual (NI 322137B-01) ++ */ ++ ++/* ++ * Integration with Xenomai/Analogy layer based on the ++ * comedi driver. Adaptation made by ++ * Julien Delange ++ */ ++ ++#include ++ ++#include ++#include ++ ++#include "../intel/8255.h" ++#include "ni_stc.h" ++#include "ni_mio.h" ++#include "ni_tio.h" ++#include "mite.h" ++ ++enum io_direction { ++ DIRECTION_INPUT = 0, ++ DIRECTION_OUTPUT = 1, ++ DIRECTION_OPENDRAIN = 2 ++}; ++ ++ ++enum ni_660x_constants { ++ min_counter_pfi_chan = 8, ++ max_dio_pfi_chan = 31, ++ counters_per_chip = 4 ++}; ++ ++struct ni_660x_subd_priv { ++ int io_bits; ++ unsigned int state; ++ uint16_t readback[2]; ++ uint16_t config; ++ struct ni_gpct* counter; ++}; ++ ++#define NUM_PFI_CHANNELS 40 ++/* Really there are only up to 3 dma channels, but the register layout ++ allows for 4 */ ++#define MAX_DMA_CHANNEL 4 ++ ++static struct a4l_channels_desc chandesc_ni660x = { ++ .mode = A4L_CHAN_GLOBAL_CHANDESC, ++ .length = NUM_PFI_CHANNELS, ++ .chans = { ++ {A4L_CHAN_AREF_GROUND, sizeof(sampl_t)}, ++ }, ++}; ++ ++#define subdev_priv ((struct ni_660x_subd_priv*)s->priv) ++ ++/* See Register-Level Programmer Manual page 3.1 */ ++enum NI_660x_Register { ++ G0InterruptAcknowledge, ++ G0StatusRegister, ++ G1InterruptAcknowledge, ++ G1StatusRegister, ++ G01StatusRegister, ++ G0CommandRegister, ++ STCDIOParallelInput, ++ G1CommandRegister, ++ G0HWSaveRegister, ++ G1HWSaveRegister, ++ STCDIOOutput, ++ STCDIOControl, ++ G0SWSaveRegister, ++ G1SWSaveRegister, ++ G0ModeRegister, ++ G01JointStatus1Register, ++ G1ModeRegister, ++ STCDIOSerialInput, ++ G0LoadARegister, ++ G01JointStatus2Register, ++ G0LoadBRegister, ++ G1LoadARegister, ++ G1LoadBRegister, ++ G0InputSelectRegister, ++ G1InputSelectRegister, ++ G0AutoincrementRegister, ++ G1AutoincrementRegister, ++ G01JointResetRegister, ++ G0InterruptEnable, ++ G1InterruptEnable, ++ G0CountingModeRegister, ++ G1CountingModeRegister, ++ G0SecondGateRegister, ++ G1SecondGateRegister, ++ G0DMAConfigRegister, ++ G0DMAStatusRegister, ++ G1DMAConfigRegister, ++ G1DMAStatusRegister, ++ G2InterruptAcknowledge, ++ G2StatusRegister, ++ G3InterruptAcknowledge, ++ G3StatusRegister, ++ G23StatusRegister, ++ G2CommandRegister, ++ G3CommandRegister, ++ G2HWSaveRegister, ++ G3HWSaveRegister, ++ G2SWSaveRegister, ++ G3SWSaveRegister, ++ G2ModeRegister, ++ G23JointStatus1Register, ++ G3ModeRegister, ++ G2LoadARegister, ++ G23JointStatus2Register, ++ G2LoadBRegister, ++ G3LoadARegister, ++ G3LoadBRegister, ++ G2InputSelectRegister, ++ G3InputSelectRegister, ++ G2AutoincrementRegister, ++ G3AutoincrementRegister, ++ G23JointResetRegister, ++ G2InterruptEnable, ++ G3InterruptEnable, ++ G2CountingModeRegister, ++ G3CountingModeRegister, ++ G3SecondGateRegister, ++ G2SecondGateRegister, ++ G2DMAConfigRegister, ++ G2DMAStatusRegister, ++ G3DMAConfigRegister, ++ G3DMAStatusRegister, ++ DIO32Input, ++ DIO32Output, ++ ClockConfigRegister, ++ GlobalInterruptStatusRegister, ++ DMAConfigRegister, ++ GlobalInterruptConfigRegister, ++ IOConfigReg0_1, ++ IOConfigReg2_3, ++ IOConfigReg4_5, ++ IOConfigReg6_7, ++ IOConfigReg8_9, ++ IOConfigReg10_11, ++ IOConfigReg12_13, ++ IOConfigReg14_15, ++ IOConfigReg16_17, ++ IOConfigReg18_19, ++ IOConfigReg20_21, ++ IOConfigReg22_23, ++ IOConfigReg24_25, ++ IOConfigReg26_27, ++ IOConfigReg28_29, ++ IOConfigReg30_31, ++ IOConfigReg32_33, ++ IOConfigReg34_35, ++ IOConfigReg36_37, ++ IOConfigReg38_39, ++ NumRegisters, ++}; ++ ++static inline unsigned IOConfigReg(unsigned pfi_channel) ++{ ++ unsigned reg = IOConfigReg0_1 + pfi_channel / 2; ++ BUG_ON(reg > IOConfigReg38_39); ++ return reg; ++} ++ ++enum ni_660x_register_width { ++ DATA_1B, ++ DATA_2B, ++ DATA_4B ++}; ++ ++enum ni_660x_register_direction { ++ NI_660x_READ, ++ NI_660x_WRITE, ++ NI_660x_READ_WRITE ++}; ++ ++enum ni_660x_pfi_output_select { ++ pfi_output_select_high_Z = 0, ++ pfi_output_select_counter = 1, ++ pfi_output_select_do = 2, ++ num_pfi_output_selects ++}; ++ ++enum ni_660x_subdevices { ++ NI_660X_DIO_SUBDEV = 1, ++ NI_660X_GPCT_SUBDEV_0 = 2 ++}; ++ ++static inline unsigned NI_660X_GPCT_SUBDEV(unsigned index) ++{ ++ return NI_660X_GPCT_SUBDEV_0 + index; ++} ++ ++struct NI_660xRegisterData { ++ ++ const char *name; /* Register Name */ ++ int offset; /* Offset from base address from GPCT chip */ ++ enum ni_660x_register_direction direction; ++ enum ni_660x_register_width size; /* 1 byte, 2 bytes, or 4 bytes */ ++}; ++ ++static const struct NI_660xRegisterData registerData[NumRegisters] = { ++ {"G0 Interrupt Acknowledge", 0x004, NI_660x_WRITE, DATA_2B}, ++ {"G0 Status Register", 0x004, NI_660x_READ, DATA_2B}, ++ {"G1 Interrupt Acknowledge", 0x006, NI_660x_WRITE, DATA_2B}, ++ {"G1 Status Register", 0x006, NI_660x_READ, DATA_2B}, ++ {"G01 Status Register ", 0x008, NI_660x_READ, DATA_2B}, ++ {"G0 Command Register", 0x00C, NI_660x_WRITE, DATA_2B}, ++ {"STC DIO Parallel Input", 0x00E, NI_660x_READ, DATA_2B}, ++ {"G1 Command Register", 0x00E, NI_660x_WRITE, DATA_2B}, ++ {"G0 HW Save Register", 0x010, NI_660x_READ, DATA_4B}, ++ {"G1 HW Save Register", 0x014, NI_660x_READ, DATA_4B}, ++ {"STC DIO Output", 0x014, NI_660x_WRITE, DATA_2B}, ++ {"STC DIO Control", 0x016, NI_660x_WRITE, DATA_2B}, ++ {"G0 SW Save Register", 0x018, NI_660x_READ, DATA_4B}, ++ {"G1 SW Save Register", 0x01C, NI_660x_READ, DATA_4B}, ++ {"G0 Mode Register", 0x034, NI_660x_WRITE, DATA_2B}, ++ {"G01 Joint Status 1 Register", 0x036, NI_660x_READ, DATA_2B}, ++ {"G1 Mode Register", 0x036, NI_660x_WRITE, DATA_2B}, ++ {"STC DIO Serial Input", 0x038, NI_660x_READ, DATA_2B}, ++ {"G0 Load A Register", 0x038, NI_660x_WRITE, DATA_4B}, ++ {"G01 Joint Status 2 Register", 0x03A, NI_660x_READ, DATA_2B}, ++ {"G0 Load B Register", 0x03C, NI_660x_WRITE, DATA_4B}, ++ {"G1 Load A Register", 0x040, NI_660x_WRITE, DATA_4B}, ++ {"G1 Load B Register", 0x044, NI_660x_WRITE, DATA_4B}, ++ {"G0 Input Select Register", 0x048, NI_660x_WRITE, DATA_2B}, ++ {"G1 Input Select Register", 0x04A, NI_660x_WRITE, DATA_2B}, ++ {"G0 Autoincrement Register", 0x088, NI_660x_WRITE, DATA_2B}, ++ {"G1 Autoincrement Register", 0x08A, NI_660x_WRITE, DATA_2B}, ++ {"G01 Joint Reset Register", 0x090, NI_660x_WRITE, DATA_2B}, ++ {"G0 Interrupt Enable", 0x092, NI_660x_WRITE, DATA_2B}, ++ {"G1 Interrupt Enable", 0x096, NI_660x_WRITE, DATA_2B}, ++ {"G0 Counting Mode Register", 0x0B0, NI_660x_WRITE, DATA_2B}, ++ {"G1 Counting Mode Register", 0x0B2, NI_660x_WRITE, DATA_2B}, ++ {"G0 Second Gate Register", 0x0B4, NI_660x_WRITE, DATA_2B}, ++ {"G1 Second Gate Register", 0x0B6, NI_660x_WRITE, DATA_2B}, ++ {"G0 DMA Config Register", 0x0B8, NI_660x_WRITE, DATA_2B}, ++ {"G0 DMA Status Register", 0x0B8, NI_660x_READ, DATA_2B}, ++ {"G1 DMA Config Register", 0x0BA, NI_660x_WRITE, DATA_2B}, ++ {"G1 DMA Status Register", 0x0BA, NI_660x_READ, DATA_2B}, ++ {"G2 Interrupt Acknowledge", 0x104, NI_660x_WRITE, DATA_2B}, ++ {"G2 Status Register", 0x104, NI_660x_READ, DATA_2B}, ++ {"G3 Interrupt Acknowledge", 0x106, NI_660x_WRITE, DATA_2B}, ++ {"G3 Status Register", 0x106, NI_660x_READ, DATA_2B}, ++ {"G23 Status Register", 0x108, NI_660x_READ, DATA_2B}, ++ {"G2 Command Register", 0x10C, NI_660x_WRITE, DATA_2B}, ++ {"G3 Command Register", 0x10E, NI_660x_WRITE, DATA_2B}, ++ {"G2 HW Save Register", 0x110, NI_660x_READ, DATA_4B}, ++ {"G3 HW Save Register", 0x114, NI_660x_READ, DATA_4B}, ++ {"G2 SW Save Register", 0x118, NI_660x_READ, DATA_4B}, ++ {"G3 SW Save Register", 0x11C, NI_660x_READ, DATA_4B}, ++ {"G2 Mode Register", 0x134, NI_660x_WRITE, DATA_2B}, ++ {"G23 Joint Status 1 Register", 0x136, NI_660x_READ, DATA_2B}, ++ {"G3 Mode Register", 0x136, NI_660x_WRITE, DATA_2B}, ++ {"G2 Load A Register", 0x138, NI_660x_WRITE, DATA_4B}, ++ {"G23 Joint Status 2 Register", 0x13A, NI_660x_READ, DATA_2B}, ++ {"G2 Load B Register", 0x13C, NI_660x_WRITE, DATA_4B}, ++ {"G3 Load A Register", 0x140, NI_660x_WRITE, DATA_4B}, ++ {"G3 Load B Register", 0x144, NI_660x_WRITE, DATA_4B}, ++ {"G2 Input Select Register", 0x148, NI_660x_WRITE, DATA_2B}, ++ {"G3 Input Select Register", 0x14A, NI_660x_WRITE, DATA_2B}, ++ {"G2 Autoincrement Register", 0x188, NI_660x_WRITE, DATA_2B}, ++ {"G3 Autoincrement Register", 0x18A, NI_660x_WRITE, DATA_2B}, ++ {"G23 Joint Reset Register", 0x190, NI_660x_WRITE, DATA_2B}, ++ {"G2 Interrupt Enable", 0x192, NI_660x_WRITE, DATA_2B}, ++ {"G3 Interrupt Enable", 0x196, NI_660x_WRITE, DATA_2B}, ++ {"G2 Counting Mode Register", 0x1B0, NI_660x_WRITE, DATA_2B}, ++ {"G3 Counting Mode Register", 0x1B2, NI_660x_WRITE, DATA_2B}, ++ {"G3 Second Gate Register", 0x1B6, NI_660x_WRITE, DATA_2B}, ++ {"G2 Second Gate Register", 0x1B4, NI_660x_WRITE, DATA_2B}, ++ {"G2 DMA Config Register", 0x1B8, NI_660x_WRITE, DATA_2B}, ++ {"G2 DMA Status Register", 0x1B8, NI_660x_READ, DATA_2B}, ++ {"G3 DMA Config Register", 0x1BA, NI_660x_WRITE, DATA_2B}, ++ {"G3 DMA Status Register", 0x1BA, NI_660x_READ, DATA_2B}, ++ {"32 bit Digital Input", 0x414, NI_660x_READ, DATA_4B}, ++ {"32 bit Digital Output", 0x510, NI_660x_WRITE, DATA_4B}, ++ {"Clock Config Register", 0x73C, NI_660x_WRITE, DATA_4B}, ++ {"Global Interrupt Status Register", 0x754, NI_660x_READ, DATA_4B}, ++ {"DMA Configuration Register", 0x76C, NI_660x_WRITE, DATA_4B}, ++ {"Global Interrupt Config Register", 0x770, NI_660x_WRITE, DATA_4B}, ++ {"IO Config Register 0-1", 0x77C, NI_660x_READ_WRITE, DATA_2B}, ++ {"IO Config Register 2-3", 0x77E, NI_660x_READ_WRITE, DATA_2B}, ++ {"IO Config Register 4-5", 0x780, NI_660x_READ_WRITE, DATA_2B}, ++ {"IO Config Register 6-7", 0x782, NI_660x_READ_WRITE, DATA_2B}, ++ {"IO Config Register 8-9", 0x784, NI_660x_READ_WRITE, DATA_2B}, ++ {"IO Config Register 10-11", 0x786, NI_660x_READ_WRITE, DATA_2B}, ++ {"IO Config Register 12-13", 0x788, NI_660x_READ_WRITE, DATA_2B}, ++ {"IO Config Register 14-15", 0x78A, NI_660x_READ_WRITE, DATA_2B}, ++ {"IO Config Register 16-17", 0x78C, NI_660x_READ_WRITE, DATA_2B}, ++ {"IO Config Register 18-19", 0x78E, NI_660x_READ_WRITE, DATA_2B}, ++ {"IO Config Register 20-21", 0x790, NI_660x_READ_WRITE, DATA_2B}, ++ {"IO Config Register 22-23", 0x792, NI_660x_READ_WRITE, DATA_2B}, ++ {"IO Config Register 24-25", 0x794, NI_660x_READ_WRITE, DATA_2B}, ++ {"IO Config Register 26-27", 0x796, NI_660x_READ_WRITE, DATA_2B}, ++ {"IO Config Register 28-29", 0x798, NI_660x_READ_WRITE, DATA_2B}, ++ {"IO Config Register 30-31", 0x79A, NI_660x_READ_WRITE, DATA_2B}, ++ {"IO Config Register 32-33", 0x79C, NI_660x_READ_WRITE, DATA_2B}, ++ {"IO Config Register 34-35", 0x79E, NI_660x_READ_WRITE, DATA_2B}, ++ {"IO Config Register 36-37", 0x7A0, NI_660x_READ_WRITE, DATA_2B}, ++ {"IO Config Register 38-39", 0x7A2, NI_660x_READ_WRITE, DATA_2B} ++}; ++ ++/* kind of ENABLE for the second counter */ ++enum clock_config_register_bits { ++ CounterSwap = 0x1 << 21 ++}; ++ ++/* ioconfigreg */ ++static inline unsigned ioconfig_bitshift(unsigned pfi_channel) ++{ ++ if (pfi_channel % 2) ++ return 0; ++ else ++ return 8; ++} ++ ++static inline unsigned pfi_output_select_mask(unsigned pfi_channel) ++{ ++ return 0x3 << ioconfig_bitshift(pfi_channel); ++} ++ ++static inline unsigned pfi_output_select_bits(unsigned pfi_channel, ++ unsigned output_select) ++{ ++ return (output_select & 0x3) << ioconfig_bitshift(pfi_channel); ++} ++ ++static inline unsigned pfi_input_select_mask(unsigned pfi_channel) ++{ ++ return 0x7 << (4 + ioconfig_bitshift(pfi_channel)); ++} ++ ++static inline unsigned pfi_input_select_bits(unsigned pfi_channel, ++ unsigned input_select) ++{ ++ return (input_select & 0x7) << (4 + ioconfig_bitshift(pfi_channel)); ++} ++ ++/* Dma configuration register bits */ ++static inline unsigned dma_select_mask(unsigned dma_channel) ++{ ++ BUG_ON(dma_channel >= MAX_DMA_CHANNEL); ++ return 0x1f << (8 * dma_channel); ++} ++ ++enum dma_selection { ++ dma_selection_none = 0x1f, ++}; ++ ++static inline unsigned dma_selection_counter(unsigned counter_index) ++{ ++ BUG_ON(counter_index >= counters_per_chip); ++ return counter_index; ++} ++ ++static inline unsigned dma_select_bits(unsigned dma_channel, unsigned selection) ++{ ++ BUG_ON(dma_channel >= MAX_DMA_CHANNEL); ++ return (selection << (8 * dma_channel)) & dma_select_mask(dma_channel); ++} ++ ++static inline unsigned dma_reset_bit(unsigned dma_channel) ++{ ++ BUG_ON(dma_channel >= MAX_DMA_CHANNEL); ++ return 0x80 << (8 * dma_channel); ++} ++ ++enum global_interrupt_status_register_bits { ++ Counter_0_Int_Bit = 0x100, ++ Counter_1_Int_Bit = 0x200, ++ Counter_2_Int_Bit = 0x400, ++ Counter_3_Int_Bit = 0x800, ++ Cascade_Int_Bit = 0x20000000, ++ Global_Int_Bit = 0x80000000 ++}; ++ ++enum global_interrupt_config_register_bits { ++ Cascade_Int_Enable_Bit = 0x20000000, ++ Global_Int_Polarity_Bit = 0x40000000, ++ Global_Int_Enable_Bit = 0x80000000 ++}; ++ ++/* Offset of the GPCT chips from the base-adress of the card: ++ First chip is at base-address +0x00, etc. */ ++static const unsigned GPCT_OFFSET[2] = { 0x0, 0x800 }; ++ ++/* Board description */ ++struct ni_660x_board { ++ unsigned short dev_id; /* `lspci` will show you this */ ++ const char *name; ++ unsigned n_chips; /* total number of TIO chips */ ++}; ++ ++static const struct ni_660x_board ni_660x_boards[] = { ++ { ++ .dev_id = 0x2c60, ++ .name = "PCI-6601", ++ .n_chips = 1, ++ }, ++ { ++ .dev_id = 0x1310, ++ .name = "PCI-6602", ++ .n_chips = 2, ++ }, ++ { ++ .dev_id = 0x1360, ++ .name = "PXI-6602", ++ .n_chips = 2, ++ }, ++ { ++ .dev_id = 0x2cc0, ++ .name = "PXI-6608", ++ .n_chips = 2, ++ }, ++}; ++ ++#define NI_660X_MAX_NUM_CHIPS 2 ++#define NI_660X_MAX_NUM_COUNTERS (NI_660X_MAX_NUM_CHIPS * counters_per_chip) ++ ++static const struct pci_device_id ni_660x_pci_table[] = { ++ { ++ PCI_VENDOR_ID_NATINST, 0x2c60, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, { ++ PCI_VENDOR_ID_NATINST, 0x1310, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, { ++ PCI_VENDOR_ID_NATINST, 0x1360, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, { ++ PCI_VENDOR_ID_NATINST, 0x2cc0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, { ++ 0} ++}; ++ ++MODULE_DEVICE_TABLE(pci, ni_660x_pci_table); ++ ++struct ni_660x_private { ++ struct mite_struct *mite; ++ struct ni_gpct_device *counter_dev; ++ uint64_t pfi_direction_bits; ++ ++ struct mite_dma_descriptor_ring ++ *mite_rings[NI_660X_MAX_NUM_CHIPS][counters_per_chip]; ++ ++ rtdm_lock_t mite_channel_lock; ++ /* Interrupt_lock prevents races between interrupt and ++ comedi_poll */ ++ rtdm_lock_t interrupt_lock; ++ unsigned int dma_configuration_soft_copies[NI_660X_MAX_NUM_CHIPS]; ++ rtdm_lock_t soft_reg_copy_lock; ++ unsigned short pfi_output_selects[NUM_PFI_CHANNELS]; ++ ++ struct ni_660x_board *board_ptr; ++}; ++ ++#undef devpriv ++#define devpriv ((struct ni_660x_private *)dev->priv) ++ ++static inline struct ni_660x_private *private(struct a4l_device *dev) ++{ ++ return (struct ni_660x_private*) dev->priv; ++} ++ ++/* Initialized in ni_660x_find_device() */ ++static inline const struct ni_660x_board *board(struct a4l_device *dev) ++{ ++ return ((struct ni_660x_private*)dev->priv)->board_ptr; ++} ++ ++#define n_ni_660x_boards ARRAY_SIZE(ni_660x_boards) ++ ++static int ni_660x_attach(struct a4l_device *dev, ++ a4l_lnkdesc_t *arg); ++static int ni_660x_detach(struct a4l_device *dev); ++static void init_tio_chip(struct a4l_device *dev, int chipset); ++static void ni_660x_select_pfi_output(struct a4l_device *dev, ++ unsigned pfi_channel, ++ unsigned output_select); ++ ++static struct a4l_driver ni_660x_drv = { ++ .board_name = "analogy_ni_660x", ++ .driver_name = "ni_660x", ++ .owner = THIS_MODULE, ++ .attach = ni_660x_attach, ++ .detach = ni_660x_detach, ++ .privdata_size = sizeof(struct ni_660x_private), ++}; ++ ++static int ni_660x_set_pfi_routing(struct a4l_device *dev, unsigned chan, ++ unsigned source); ++ ++/* Possible instructions for a GPCT */ ++static int ni_660x_GPCT_rinsn( ++ struct a4l_subdevice *s, ++ struct a4l_kernel_instruction *insn); ++static int ni_660x_GPCT_insn_config( ++ struct a4l_subdevice *s, ++ struct a4l_kernel_instruction *insn); ++static int ni_660x_GPCT_winsn( ++ struct a4l_subdevice *s, ++ struct a4l_kernel_instruction *insn); ++ ++/* Possible instructions for Digital IO */ ++static int ni_660x_dio_insn_config( ++ struct a4l_subdevice *s, ++ struct a4l_kernel_instruction *insn); ++static int ni_660x_dio_insn_bits( ++ struct a4l_subdevice *s, ++ struct a4l_kernel_instruction *insn); ++ ++static inline unsigned ni_660x_num_counters(struct a4l_device *dev) ++{ ++ return board(dev)->n_chips * counters_per_chip; ++} ++ ++static enum NI_660x_Register ni_gpct_to_660x_register(enum ni_gpct_register reg) ++{ ++ ++ enum NI_660x_Register ni_660x_register; ++ switch (reg) { ++ case NITIO_G0_Autoincrement_Reg: ++ ni_660x_register = G0AutoincrementRegister; ++ break; ++ case NITIO_G1_Autoincrement_Reg: ++ ni_660x_register = G1AutoincrementRegister; ++ break; ++ case NITIO_G2_Autoincrement_Reg: ++ ni_660x_register = G2AutoincrementRegister; ++ break; ++ case NITIO_G3_Autoincrement_Reg: ++ ni_660x_register = G3AutoincrementRegister; ++ break; ++ case NITIO_G0_Command_Reg: ++ ni_660x_register = G0CommandRegister; ++ break; ++ case NITIO_G1_Command_Reg: ++ ni_660x_register = G1CommandRegister; ++ break; ++ case NITIO_G2_Command_Reg: ++ ni_660x_register = G2CommandRegister; ++ break; ++ case NITIO_G3_Command_Reg: ++ ni_660x_register = G3CommandRegister; ++ break; ++ case NITIO_G0_HW_Save_Reg: ++ ni_660x_register = G0HWSaveRegister; ++ break; ++ case NITIO_G1_HW_Save_Reg: ++ ni_660x_register = G1HWSaveRegister; ++ break; ++ case NITIO_G2_HW_Save_Reg: ++ ni_660x_register = G2HWSaveRegister; ++ break; ++ case NITIO_G3_HW_Save_Reg: ++ ni_660x_register = G3HWSaveRegister; ++ break; ++ case NITIO_G0_SW_Save_Reg: ++ ni_660x_register = G0SWSaveRegister; ++ break; ++ case NITIO_G1_SW_Save_Reg: ++ ni_660x_register = G1SWSaveRegister; ++ break; ++ case NITIO_G2_SW_Save_Reg: ++ ni_660x_register = G2SWSaveRegister; ++ break; ++ case NITIO_G3_SW_Save_Reg: ++ ni_660x_register = G3SWSaveRegister; ++ break; ++ case NITIO_G0_Mode_Reg: ++ ni_660x_register = G0ModeRegister; ++ break; ++ case NITIO_G1_Mode_Reg: ++ ni_660x_register = G1ModeRegister; ++ break; ++ case NITIO_G2_Mode_Reg: ++ ni_660x_register = G2ModeRegister; ++ break; ++ case NITIO_G3_Mode_Reg: ++ ni_660x_register = G3ModeRegister; ++ break; ++ case NITIO_G0_LoadA_Reg: ++ ni_660x_register = G0LoadARegister; ++ break; ++ case NITIO_G1_LoadA_Reg: ++ ni_660x_register = G1LoadARegister; ++ break; ++ case NITIO_G2_LoadA_Reg: ++ ni_660x_register = G2LoadARegister; ++ break; ++ case NITIO_G3_LoadA_Reg: ++ ni_660x_register = G3LoadARegister; ++ break; ++ case NITIO_G0_LoadB_Reg: ++ ni_660x_register = G0LoadBRegister; ++ break; ++ case NITIO_G1_LoadB_Reg: ++ ni_660x_register = G1LoadBRegister; ++ break; ++ case NITIO_G2_LoadB_Reg: ++ ni_660x_register = G2LoadBRegister; ++ break; ++ case NITIO_G3_LoadB_Reg: ++ ni_660x_register = G3LoadBRegister; ++ break; ++ case NITIO_G0_Input_Select_Reg: ++ ni_660x_register = G0InputSelectRegister; ++ break; ++ case NITIO_G1_Input_Select_Reg: ++ ni_660x_register = G1InputSelectRegister; ++ break; ++ case NITIO_G2_Input_Select_Reg: ++ ni_660x_register = G2InputSelectRegister; ++ break; ++ case NITIO_G3_Input_Select_Reg: ++ ni_660x_register = G3InputSelectRegister; ++ break; ++ case NITIO_G01_Status_Reg: ++ ni_660x_register = G01StatusRegister; ++ break; ++ case NITIO_G23_Status_Reg: ++ ni_660x_register = G23StatusRegister; ++ break; ++ case NITIO_G01_Joint_Reset_Reg: ++ ni_660x_register = G01JointResetRegister; ++ break; ++ case NITIO_G23_Joint_Reset_Reg: ++ ni_660x_register = G23JointResetRegister; ++ break; ++ case NITIO_G01_Joint_Status1_Reg: ++ ni_660x_register = G01JointStatus1Register; ++ break; ++ case NITIO_G23_Joint_Status1_Reg: ++ ni_660x_register = G23JointStatus1Register; ++ break; ++ case NITIO_G01_Joint_Status2_Reg: ++ ni_660x_register = G01JointStatus2Register; ++ break; ++ case NITIO_G23_Joint_Status2_Reg: ++ ni_660x_register = G23JointStatus2Register; ++ break; ++ case NITIO_G0_Counting_Mode_Reg: ++ ni_660x_register = G0CountingModeRegister; ++ break; ++ case NITIO_G1_Counting_Mode_Reg: ++ ni_660x_register = G1CountingModeRegister; ++ break; ++ case NITIO_G2_Counting_Mode_Reg: ++ ni_660x_register = G2CountingModeRegister; ++ break; ++ case NITIO_G3_Counting_Mode_Reg: ++ ni_660x_register = G3CountingModeRegister; ++ break; ++ case NITIO_G0_Second_Gate_Reg: ++ ni_660x_register = G0SecondGateRegister; ++ break; ++ case NITIO_G1_Second_Gate_Reg: ++ ni_660x_register = G1SecondGateRegister; ++ break; ++ case NITIO_G2_Second_Gate_Reg: ++ ni_660x_register = G2SecondGateRegister; ++ break; ++ case NITIO_G3_Second_Gate_Reg: ++ ni_660x_register = G3SecondGateRegister; ++ break; ++ case NITIO_G0_DMA_Config_Reg: ++ ni_660x_register = G0DMAConfigRegister; ++ break; ++ case NITIO_G0_DMA_Status_Reg: ++ ni_660x_register = G0DMAStatusRegister; ++ break; ++ case NITIO_G1_DMA_Config_Reg: ++ ni_660x_register = G1DMAConfigRegister; ++ break; ++ case NITIO_G1_DMA_Status_Reg: ++ ni_660x_register = G1DMAStatusRegister; ++ break; ++ case NITIO_G2_DMA_Config_Reg: ++ ni_660x_register = G2DMAConfigRegister; ++ break; ++ case NITIO_G2_DMA_Status_Reg: ++ ni_660x_register = G2DMAStatusRegister; ++ break; ++ case NITIO_G3_DMA_Config_Reg: ++ ni_660x_register = G3DMAConfigRegister; ++ break; ++ case NITIO_G3_DMA_Status_Reg: ++ ni_660x_register = G3DMAStatusRegister; ++ break; ++ case NITIO_G0_Interrupt_Acknowledge_Reg: ++ ni_660x_register = G0InterruptAcknowledge; ++ break; ++ case NITIO_G1_Interrupt_Acknowledge_Reg: ++ ni_660x_register = G1InterruptAcknowledge; ++ break; ++ case NITIO_G2_Interrupt_Acknowledge_Reg: ++ ni_660x_register = G2InterruptAcknowledge; ++ break; ++ case NITIO_G3_Interrupt_Acknowledge_Reg: ++ ni_660x_register = G3InterruptAcknowledge; ++ break; ++ case NITIO_G0_Status_Reg: ++ ni_660x_register = G0StatusRegister; ++ break; ++ case NITIO_G1_Status_Reg: ++ ni_660x_register = G0StatusRegister; ++ break; ++ case NITIO_G2_Status_Reg: ++ ni_660x_register = G0StatusRegister; ++ break; ++ case NITIO_G3_Status_Reg: ++ ni_660x_register = G0StatusRegister; ++ break; ++ case NITIO_G0_Interrupt_Enable_Reg: ++ ni_660x_register = G0InterruptEnable; ++ break; ++ case NITIO_G1_Interrupt_Enable_Reg: ++ ni_660x_register = G1InterruptEnable; ++ break; ++ case NITIO_G2_Interrupt_Enable_Reg: ++ ni_660x_register = G2InterruptEnable; ++ break; ++ case NITIO_G3_Interrupt_Enable_Reg: ++ ni_660x_register = G3InterruptEnable; ++ break; ++ default: ++ __a4l_err("%s: unhandled register 0x%x in switch.\n", ++ __FUNCTION__, reg); ++ BUG(); ++ return 0; ++ break; ++ } ++ return ni_660x_register; ++} ++ ++static inline void ni_660x_write_register(struct a4l_device *dev, ++ unsigned chip_index, unsigned bits, ++ enum NI_660x_Register reg) ++{ ++ void *const write_address = ++ private(dev)->mite->daq_io_addr + GPCT_OFFSET[chip_index] + ++ registerData[reg].offset; ++ ++ switch (registerData[reg].size) { ++ case DATA_2B: ++ writew(bits, write_address); ++ break; ++ case DATA_4B: ++ writel(bits, write_address); ++ break; ++ default: ++ __a4l_err("%s: %s: bug! unhandled case (reg=0x%x) in switch.\n", ++ __FILE__, __FUNCTION__, reg); ++ BUG(); ++ break; ++ } ++} ++ ++static inline unsigned ni_660x_read_register(struct a4l_device *dev, ++ unsigned chip_index, ++ enum NI_660x_Register reg) ++{ ++ void *const read_address = ++ private(dev)->mite->daq_io_addr + GPCT_OFFSET[chip_index] + ++ registerData[reg].offset; ++ ++ switch (registerData[reg].size) { ++ case DATA_2B: ++ return readw(read_address); ++ break; ++ case DATA_4B: ++ return readl(read_address); ++ break; ++ default: ++ __a4l_err("%s: %s: bug! unhandled case (reg=0x%x) in switch.\n", ++ __FILE__, __FUNCTION__, reg); ++ BUG(); ++ break; ++ } ++ return 0; ++} ++ ++static void ni_gpct_write_register(struct ni_gpct *counter, ++ unsigned int bits, enum ni_gpct_register reg) ++{ ++ struct a4l_device *dev = counter->counter_dev->dev; ++ enum NI_660x_Register ni_660x_register = ni_gpct_to_660x_register(reg); ++ ++ ni_660x_write_register(dev, counter->chip_index, bits, ++ ni_660x_register); ++} ++ ++static unsigned ni_gpct_read_register(struct ni_gpct *counter, ++ enum ni_gpct_register reg) ++{ ++ struct a4l_device *dev = counter->counter_dev->dev; ++ enum NI_660x_Register ni_660x_register = ni_gpct_to_660x_register(reg); ++ ++ return ni_660x_read_register(dev, counter->chip_index, ++ ni_660x_register); ++} ++ ++static inline ++struct mite_dma_descriptor_ring *mite_ring(struct ni_660x_private *priv, ++ struct ni_gpct *counter) ++{ ++ ++ return priv->mite_rings[counter->chip_index][counter->counter_index]; ++} ++ ++static inline ++void ni_660x_set_dma_channel(struct a4l_device *dev, ++ unsigned int mite_channel, struct ni_gpct *counter) ++{ ++ unsigned long flags; ++ ++ rtdm_lock_get_irqsave(&private(dev)->soft_reg_copy_lock, flags); ++ private(dev)->dma_configuration_soft_copies[counter->chip_index] &= ++ ~dma_select_mask(mite_channel); ++ private(dev)->dma_configuration_soft_copies[counter->chip_index] |= ++ dma_select_bits(mite_channel, ++ dma_selection_counter(counter->counter_index)); ++ ni_660x_write_register(dev, counter->chip_index, ++ private(dev)-> ++ dma_configuration_soft_copies ++ [counter->chip_index] | ++ dma_reset_bit(mite_channel), DMAConfigRegister); ++ mmiowb(); ++ rtdm_lock_put_irqrestore(&private(dev)->soft_reg_copy_lock, flags); ++} ++ ++static inline ++void ni_660x_unset_dma_channel(struct a4l_device *dev, ++ unsigned int mite_channel, ++ struct ni_gpct *counter) ++{ ++ unsigned long flags; ++ rtdm_lock_get_irqsave(&private(dev)->soft_reg_copy_lock, flags); ++ private(dev)->dma_configuration_soft_copies[counter->chip_index] &= ++ ~dma_select_mask(mite_channel); ++ private(dev)->dma_configuration_soft_copies[counter->chip_index] |= ++ dma_select_bits(mite_channel, dma_selection_none); ++ ni_660x_write_register(dev, counter->chip_index, ++ private(dev)-> ++ dma_configuration_soft_copies ++ [counter->chip_index], DMAConfigRegister); ++ mmiowb(); ++ rtdm_lock_put_irqrestore(&private(dev)->soft_reg_copy_lock, flags); ++} ++ ++static int ni_660x_request_mite_channel(struct a4l_device *dev, ++ struct ni_gpct *counter, ++ enum io_direction direction) ++{ ++ unsigned long flags; ++ struct mite_channel *mite_chan; ++ ++ rtdm_lock_get_irqsave(&private(dev)->mite_channel_lock, flags); ++ BUG_ON(counter->mite_chan); ++ mite_chan = mite_request_channel(private(dev)->mite, ++ mite_ring(private(dev), counter)); ++ if (mite_chan == NULL) { ++ rtdm_lock_put_irqrestore(&private(dev)->mite_channel_lock, flags); ++ a4l_err(dev, ++ "%s: failed to reserve mite dma channel for counter.\n", ++ __FUNCTION__); ++ return -EBUSY; ++ } ++ mite_chan->dir = direction; ++ a4l_ni_tio_set_mite_channel(counter, mite_chan); ++ ni_660x_set_dma_channel(dev, mite_chan->channel, counter); ++ rtdm_lock_put_irqrestore(&private(dev)->mite_channel_lock, flags); ++ return 0; ++} ++ ++void ni_660x_release_mite_channel(struct a4l_device *dev, ++ struct ni_gpct *counter) ++{ ++ unsigned long flags; ++ ++ rtdm_lock_get_irqsave(&private(dev)->mite_channel_lock, flags); ++ if (counter->mite_chan) { ++ struct mite_channel *mite_chan = counter->mite_chan; ++ ++ ni_660x_unset_dma_channel(dev, mite_chan->channel, counter); ++ a4l_ni_tio_set_mite_channel(counter, NULL); ++ a4l_mite_release_channel(mite_chan); ++ } ++ rtdm_lock_put_irqrestore(&private(dev)->mite_channel_lock, flags); ++} ++ ++static int ni_660x_cmd(struct a4l_subdevice *s, struct a4l_cmd_desc* cmd) ++{ ++ int retval; ++ ++ struct ni_gpct *counter = subdev_priv->counter; ++ ++ retval = ni_660x_request_mite_channel(s->dev, counter, A4L_INPUT); ++ if (retval) { ++ a4l_err(s->dev, ++ "%s: no dma channel available for use by counter", ++ __FUNCTION__); ++ return retval; ++ } ++ ++ a4l_ni_tio_acknowledge_and_confirm (counter, NULL, NULL, NULL, NULL); ++ retval = a4l_ni_tio_cmd(counter, cmd); ++ ++ return retval; ++} ++ ++static int ni_660x_cmdtest(struct a4l_subdevice *s, struct a4l_cmd_desc *cmd) ++{ ++ struct ni_gpct *counter = subdev_priv->counter; ++ return a4l_ni_tio_cmdtest(counter, cmd); ++} ++ ++static int ni_660x_cancel(struct a4l_subdevice *s) ++{ ++ struct ni_gpct *counter = subdev_priv->counter; ++ int retval; ++ ++ retval = a4l_ni_tio_cancel(counter); ++ ni_660x_release_mite_channel(s->dev, counter); ++ return retval; ++} ++ ++static void set_tio_counterswap(struct a4l_device *dev, int chipset) ++{ ++ /* See P. 3.5 of the Register-Level Programming manual. The ++ CounterSwap bit has to be set on the second chip, otherwise ++ it will try to use the same pins as the first chip. ++ */ ++ ++ if (chipset) ++ ni_660x_write_register(dev, ++ chipset, ++ CounterSwap, ClockConfigRegister); ++ else ++ ni_660x_write_register(dev, ++ chipset, 0, ClockConfigRegister); ++} ++ ++static void ni_660x_handle_gpct_interrupt(struct a4l_device *dev, ++ struct a4l_subdevice *s) ++{ ++ struct a4l_buffer *buf = s->buf; ++ ++ a4l_ni_tio_handle_interrupt(subdev_priv->counter, dev); ++ if ( test_bit(A4L_BUF_EOA_NR, &buf->flags) && ++ test_bit(A4L_BUF_ERROR_NR, &buf->flags) && ++ test_bit(A4L_BUF_EOA_NR, &buf->flags)) ++ ni_660x_cancel(s); ++ else ++ a4l_buf_evt(s, 0); ++} ++ ++static int ni_660x_interrupt(unsigned int irq, void *d) ++{ ++ struct a4l_device *dev = d; ++ unsigned long flags; ++ ++ if (test_bit(A4L_DEV_ATTACHED_NR, &dev->flags)) ++ return -ENOENT; ++ ++ /* Lock to avoid race with comedi_poll */ ++ rtdm_lock_get_irqsave(&private(dev)->interrupt_lock, flags); ++ smp_mb(); ++ ++ while (&dev->subdvsq != dev->subdvsq.next) { ++ struct list_head *this = dev->subdvsq.next; ++ struct a4l_subdevice *tmp = list_entry(this, struct a4l_subdevice, list); ++ ni_660x_handle_gpct_interrupt(dev, tmp); ++ } ++ ++ rtdm_lock_put_irqrestore(&private(dev)->interrupt_lock, flags); ++ return 0; ++} ++ ++static int ni_660x_alloc_mite_rings(struct a4l_device *dev) ++{ ++ unsigned int i; ++ unsigned int j; ++ ++ for (i = 0; i < board(dev)->n_chips; ++i) { ++ for (j = 0; j < counters_per_chip; ++j) { ++ private(dev)->mite_rings[i][j] = ++ mite_alloc_ring(private(dev)->mite); ++ if (private(dev)->mite_rings[i][j] == NULL) ++ return -ENOMEM; ++ } ++ } ++ ++ return 0; ++} ++ ++static void ni_660x_free_mite_rings(struct a4l_device *dev) ++{ ++ unsigned int i; ++ unsigned int j; ++ ++ for (i = 0; i < board(dev)->n_chips; ++i) ++ for (j = 0; j < counters_per_chip; ++j) ++ mite_free_ring(private(dev)->mite_rings[i][j]); ++} ++ ++ ++static int __init driver_ni_660x_init_module(void) ++{ ++ return a4l_register_drv (&ni_660x_drv); ++} ++ ++static void __exit driver_ni_660x_cleanup_module(void) ++{ ++ a4l_unregister_drv (&ni_660x_drv); ++} ++ ++module_init(driver_ni_660x_init_module); ++module_exit(driver_ni_660x_cleanup_module); ++ ++static int ni_660x_attach(struct a4l_device *dev, a4l_lnkdesc_t *arg) ++{ ++ struct a4l_subdevice *s; ++ int ret; ++ int err; ++ int bus, slot; ++ unsigned i; ++ int nsubdev = 0; ++ unsigned global_interrupt_config_bits; ++ struct mite_struct *mitedev; ++ struct ni_660x_board* boardptr = NULL; ++ ++ ret = 0; ++ bus = slot = 0; ++ mitedev = NULL; ++ nsubdev = 0; ++ ++ if(arg->opts == NULL || arg->opts_size == 0) ++ bus = slot = 0; ++ else { ++ bus = arg->opts_size >= sizeof(unsigned long) ? ++ ((unsigned long *)arg->opts)[0] : 0; ++ slot = arg->opts_size >= sizeof(unsigned long) * 2 ? ++ ((unsigned long *)arg->opts)[1] : 0; ++ } ++ ++ for (i = 0; ( i < n_ni_660x_boards ) && ( mitedev == NULL ); i++) { ++ mitedev = a4l_mite_find_device(bus, slot, ++ ni_660x_boards[i].dev_id); ++ boardptr = (struct ni_660x_board*) &ni_660x_boards[i]; ++ } ++ ++ ++ if(mitedev == NULL) { ++ a4l_info(dev, "mite device not found\n"); ++ return -ENOENT; ++ } ++ ++ a4l_info(dev, "Board found (name=%s), continue initialization ...", ++ boardptr->name); ++ ++ private(dev)->mite = mitedev; ++ private(dev)->board_ptr = boardptr; ++ ++ rtdm_lock_init(&private(dev)->mite_channel_lock); ++ rtdm_lock_init(&private(dev)->interrupt_lock); ++ rtdm_lock_init(&private(dev)->soft_reg_copy_lock); ++ for (i = 0; i < NUM_PFI_CHANNELS; ++i) { ++ private(dev)->pfi_output_selects[i] = pfi_output_select_counter; ++ } ++ ++ ret = a4l_mite_setup(private(dev)->mite, 1); ++ if (ret < 0) { ++ a4l_err(dev, "%s: error setting up mite\n", __FUNCTION__); ++ return ret; ++ } ++ ++ ret = ni_660x_alloc_mite_rings(dev); ++ if (ret < 0) { ++ a4l_err(dev, "%s: error setting up mite rings\n", __FUNCTION__); ++ return ret; ++ } ++ ++ /* Setup first subdevice */ ++ s = a4l_alloc_subd(sizeof(struct ni_660x_subd_priv), NULL); ++ if (s == NULL) ++ return -ENOMEM; ++ ++ s->flags = A4L_SUBD_UNUSED; ++ ++ err = a4l_add_subd(dev, s); ++ if (err != nsubdev) { ++ a4l_info(dev, "cannot add first subdevice, returns %d, expect %d\n", err, i); ++ return err; ++ } ++ ++ nsubdev++; ++ ++ /* Setup second subdevice */ ++ s = a4l_alloc_subd(sizeof(struct ni_660x_subd_priv), NULL); ++ if (s == NULL) { ++ a4l_info(dev, "cannot allocate second subdevice\n"); ++ return -ENOMEM; ++ } ++ ++ s->flags = A4L_SUBD_DIO; ++ s->flags |= A4L_SUBD_CMD; ++ s->chan_desc = &chandesc_ni660x; ++ s->rng_desc = &range_digital; ++ s->insn_bits = ni_660x_dio_insn_bits; ++ s->insn_config = ni_660x_dio_insn_config; ++ s->dev = dev; ++ subdev_priv->io_bits = 0; ++ ni_660x_write_register(dev, 0, 0, STCDIOControl); ++ ++ err = a4l_add_subd(dev, s); ++ if (err != nsubdev) ++ return err; ++ ++ nsubdev++; ++ ++ private(dev)->counter_dev = ++ a4l_ni_gpct_device_construct(dev, ++ &ni_gpct_write_register, ++ &ni_gpct_read_register, ++ ni_gpct_variant_660x, ++ ni_660x_num_counters (dev)); ++ if (private(dev)->counter_dev == NULL) ++ return -ENOMEM; ++ ++ for (i = 0; i < ni_660x_num_counters(dev); ++i) { ++ /* TODO: check why there are kmalloc here... and in pcimio */ ++ private(dev)->counter_dev->counters[i] = ++ kmalloc(sizeof(struct ni_gpct), GFP_KERNEL); ++ private(dev)->counter_dev->counters[i]->counter_dev = ++ private(dev)->counter_dev; ++ rtdm_lock_init(&(private(dev)->counter_dev->counters[i]->lock)); ++ } ++ ++ for (i = 0; i < NI_660X_MAX_NUM_COUNTERS; ++i) { ++ if (i < ni_660x_num_counters(dev)) { ++ /* Setup other subdevice */ ++ s = a4l_alloc_subd(sizeof(struct ni_660x_subd_priv), NULL); ++ ++ if (s == NULL) ++ return -ENOMEM; ++ ++ s->flags = A4L_SUBD_COUNTER; ++ s->chan_desc = rtdm_malloc (sizeof (struct a4l_channels_desc)); ++ s->chan_desc->length = 3; ++ s->insn_read = ni_660x_GPCT_rinsn; ++ s->insn_write = ni_660x_GPCT_winsn; ++ s->insn_config = ni_660x_GPCT_insn_config; ++ s->do_cmd = &ni_660x_cmd; ++ s->do_cmdtest = &ni_660x_cmdtest; ++ s->cancel = &ni_660x_cancel; ++ ++ subdev_priv->counter = private(dev)->counter_dev->counters[i]; ++ ++ private(dev)->counter_dev->counters[i]->chip_index = ++ i / counters_per_chip; ++ private(dev)->counter_dev->counters[i]->counter_index = ++ i % counters_per_chip; ++ } else { ++ s = a4l_alloc_subd(sizeof(struct ni_660x_subd_priv), NULL); ++ if (s == NULL) ++ return -ENOMEM; ++ s->flags = A4L_SUBD_UNUSED; ++ } ++ ++ err = a4l_add_subd(dev, s); ++ ++ if (err != nsubdev) ++ return err; ++ ++ nsubdev++; ++ } ++ ++ for (i = 0; i < board(dev)->n_chips; ++i) ++ init_tio_chip(dev, i); ++ ++ for (i = 0; i < ni_660x_num_counters(dev); ++i) ++ a4l_ni_tio_init_counter(private(dev)->counter_dev->counters[i]); ++ ++ for (i = 0; i < NUM_PFI_CHANNELS; ++i) { ++ if (i < min_counter_pfi_chan) ++ ni_660x_set_pfi_routing(dev, i, pfi_output_select_do); ++ else ++ ni_660x_set_pfi_routing(dev, i, ++ pfi_output_select_counter); ++ ni_660x_select_pfi_output(dev, i, pfi_output_select_high_Z); ++ } ++ ++ ++ /* To be safe, set counterswap bits on tio chips after all the ++ counter outputs have been set to high impedance mode */ ++ ++ for (i = 0; i < board(dev)->n_chips; ++i) ++ set_tio_counterswap(dev, i); ++ ++ ret = a4l_request_irq(dev, ++ mite_irq(private(dev)->mite), ++ ni_660x_interrupt, RTDM_IRQTYPE_SHARED, dev); ++ ++ if (ret < 0) { ++ a4l_err(dev, "%s: IRQ not available\n", __FUNCTION__); ++ return ret; ++ } ++ ++ global_interrupt_config_bits = Global_Int_Enable_Bit; ++ if (board(dev)->n_chips > 1) ++ global_interrupt_config_bits |= Cascade_Int_Enable_Bit; ++ ++ ni_660x_write_register(dev, 0, global_interrupt_config_bits, ++ GlobalInterruptConfigRegister); ++ ++ a4l_info(dev, "attach succeed, ready to be used\n"); ++ ++ return 0; ++} ++ ++static int ni_660x_detach(struct a4l_device *dev) ++{ ++ int i; ++ ++ a4l_info(dev, "begin to detach the driver ..."); ++ ++ /* Free irq */ ++ if(a4l_get_irq(dev)!=A4L_IRQ_UNUSED) ++ a4l_free_irq(dev,a4l_get_irq(dev)); ++ ++ if (dev->priv) { ++ ++ if (private(dev)->counter_dev) { ++ ++ for (i = 0; i < ni_660x_num_counters(dev); ++i) ++ if ((private(dev)->counter_dev->counters[i]) != NULL) ++ kfree (private(dev)->counter_dev->counters[i]); ++ ++ a4l_ni_gpct_device_destroy(private(dev)->counter_dev); ++ } ++ ++ if (private(dev)->mite) { ++ ni_660x_free_mite_rings(dev); ++ a4l_mite_unsetup(private(dev)->mite); ++ } ++ } ++ ++ a4l_info(dev, "driver detached !\n"); ++ ++ return 0; ++} ++ ++static int ni_660x_GPCT_rinsn(struct a4l_subdevice *s, struct a4l_kernel_instruction *insn) ++{ ++ return a4l_ni_tio_rinsn(subdev_priv->counter, insn); ++} ++ ++static void init_tio_chip(struct a4l_device *dev, int chipset) ++{ ++ unsigned int i; ++ ++ /* Init dma configuration register */ ++ private(dev)->dma_configuration_soft_copies[chipset] = 0; ++ for (i = 0; i < MAX_DMA_CHANNEL; ++i) { ++ private(dev)->dma_configuration_soft_copies[chipset] |= ++ dma_select_bits(i, dma_selection_none) & dma_select_mask(i); ++ } ++ ++ ni_660x_write_register(dev, chipset, ++ private(dev)-> ++ dma_configuration_soft_copies[chipset], ++ DMAConfigRegister); ++ ++ for (i = 0; i < NUM_PFI_CHANNELS; ++i) ++ ni_660x_write_register(dev, chipset, 0, IOConfigReg(i)); ++} ++ ++static int ni_660x_GPCT_insn_config(struct a4l_subdevice *s, struct a4l_kernel_instruction *insn) ++{ ++ return a4l_ni_tio_insn_config (subdev_priv->counter, insn); ++} ++ ++static int ni_660x_GPCT_winsn(struct a4l_subdevice *s, struct a4l_kernel_instruction *insn) ++{ ++ return a4l_ni_tio_winsn(subdev_priv->counter, insn); ++} ++ ++static int ni_660x_dio_insn_bits(struct a4l_subdevice *s, struct a4l_kernel_instruction *insn) ++{ ++ unsigned int* data = (unsigned int*) insn->data; ++ unsigned int base_bitfield_channel = CR_CHAN(insn->chan_desc); ++ ++ /* Check if we have to write some bits */ ++ if (data[0]) { ++ subdev_priv->state &= ~(data[0] << base_bitfield_channel); ++ subdev_priv->state |= (data[0] & data[1]) << base_bitfield_channel; ++ /* Write out the new digital output lines */ ++ ni_660x_write_register(s->dev, 0, subdev_priv->state, DIO32Output); ++ } ++ ++ /* On return, data[1] contains the value of the digital input ++ and output lines. */ ++ data[1] = ni_660x_read_register(s->dev, 0,DIO32Input) >> ++ base_bitfield_channel; ++ ++ return 0; ++} ++ ++static void ni_660x_select_pfi_output(struct a4l_device *dev, ++ unsigned pfi_channel, ++ unsigned output_select) ++{ ++ static const unsigned counter_4_7_first_pfi = 8; ++ static const unsigned counter_4_7_last_pfi = 23; ++ unsigned active_chipset = 0; ++ unsigned idle_chipset = 0; ++ unsigned active_bits; ++ unsigned idle_bits; ++ ++ if (board(dev)->n_chips > 1) { ++ if (output_select == pfi_output_select_counter && ++ pfi_channel >= counter_4_7_first_pfi && ++ pfi_channel <= counter_4_7_last_pfi) { ++ active_chipset = 1; ++ idle_chipset = 0; ++ } else { ++ active_chipset = 0; ++ idle_chipset = 1; ++ } ++ } ++ ++ if (idle_chipset != active_chipset) { ++ ++ idle_bits =ni_660x_read_register(dev, idle_chipset, ++ IOConfigReg(pfi_channel)); ++ idle_bits &= ~pfi_output_select_mask(pfi_channel); ++ idle_bits |= ++ pfi_output_select_bits(pfi_channel, ++ pfi_output_select_high_Z); ++ ni_660x_write_register(dev, idle_chipset, idle_bits, ++ IOConfigReg(pfi_channel)); ++ } ++ ++ active_bits = ++ ni_660x_read_register(dev, active_chipset, ++ IOConfigReg(pfi_channel)); ++ active_bits &= ~pfi_output_select_mask(pfi_channel); ++ active_bits |= pfi_output_select_bits(pfi_channel, output_select); ++ ni_660x_write_register(dev, active_chipset, active_bits, ++ IOConfigReg(pfi_channel)); ++} ++ ++static int ni_660x_set_pfi_routing(struct a4l_device *dev, unsigned chan, ++ unsigned source) ++{ ++ BUG_ON(chan >= NUM_PFI_CHANNELS); ++ ++ if (source > num_pfi_output_selects) ++ return -EINVAL; ++ if (source == pfi_output_select_high_Z) ++ return -EINVAL; ++ if (chan < min_counter_pfi_chan) { ++ if (source == pfi_output_select_counter) ++ return -EINVAL; ++ } else if (chan > max_dio_pfi_chan) { ++ if (source == pfi_output_select_do) ++ return -EINVAL; ++ } ++ BUG_ON(chan >= NUM_PFI_CHANNELS); ++ ++ private(dev)->pfi_output_selects[chan] = source; ++ if (private(dev)->pfi_direction_bits & (((uint64_t) 1) << chan)) ++ ni_660x_select_pfi_output(dev, chan, ++ private(dev)-> ++ pfi_output_selects[chan]); ++ return 0; ++} ++ ++static unsigned ni_660x_get_pfi_routing(struct a4l_device *dev, ++ unsigned chan) ++{ ++ BUG_ON(chan >= NUM_PFI_CHANNELS); ++ return private(dev)->pfi_output_selects[chan]; ++} ++ ++static void ni660x_config_filter(struct a4l_device *dev, ++ unsigned pfi_channel, ++ int filter) ++{ ++ unsigned int bits; ++ ++ bits = ni_660x_read_register(dev, 0, IOConfigReg(pfi_channel)); ++ bits &= ~pfi_input_select_mask(pfi_channel); ++ bits |= pfi_input_select_bits(pfi_channel, filter); ++ ni_660x_write_register(dev, 0, bits, IOConfigReg(pfi_channel)); ++} ++ ++static int ni_660x_dio_insn_config(struct a4l_subdevice *s, struct a4l_kernel_instruction *insn) ++{ ++ unsigned int* data = insn->data; ++ int chan = CR_CHAN(insn->chan_desc); ++ struct a4l_device* dev = s->dev; ++ ++ if (data == NULL) ++ return -EINVAL; ++ ++ /* The input or output configuration of each digital line is ++ * configured by a special insn_config instruction. chanspec ++ * contains the channel to be changed, and data[0] contains the ++ * value COMEDI_INPUT or COMEDI_OUTPUT. */ ++ ++ switch (data[0]) { ++ case A4L_INSN_CONFIG_DIO_OUTPUT: ++ private(dev)->pfi_direction_bits |= ((uint64_t) 1) << chan; ++ ni_660x_select_pfi_output(dev, chan, ++ private(dev)-> ++ pfi_output_selects[chan]); ++ break; ++ case A4L_INSN_CONFIG_DIO_INPUT: ++ private(dev)->pfi_direction_bits &= ~(((uint64_t) 1) << chan); ++ ni_660x_select_pfi_output(dev, chan, pfi_output_select_high_Z); ++ break; ++ case A4L_INSN_CONFIG_DIO_QUERY: ++ data[1] = ++ (private(dev)->pfi_direction_bits & ++ (((uint64_t) 1) << chan)) ? A4L_OUTPUT : A4L_INPUT; ++ return 0; ++ case A4L_INSN_CONFIG_SET_ROUTING: ++ return ni_660x_set_pfi_routing(dev, chan, data[1]); ++ break; ++ case A4L_INSN_CONFIG_GET_ROUTING: ++ data[1] = ni_660x_get_pfi_routing(dev, chan); ++ break; ++ case A4L_INSN_CONFIG_FILTER: ++ ni660x_config_filter(dev, chan, data[1]); ++ break; ++ default: ++ return -EINVAL; ++ break; ++ }; ++ ++ return 0; ++} ++ ++ ++MODULE_DESCRIPTION("Analogy driver for NI660x series cards"); ++MODULE_LICENSE("GPL"); +--- linux/drivers/xenomai/analogy/national_instruments/tio_common.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/analogy/national_instruments/tio_common.c 2021-04-07 16:01:27.825633312 +0800 +@@ -0,0 +1,1998 @@ ++/* ++ * Hardware driver for NI general purpose counter ++ * Copyright (C) 2006 Frank Mori Hess ++ * ++ * This code is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * This code is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ * ++ * Description: National Instruments general purpose counters ++ * This module is not used directly by end-users. Rather, it is used ++ * by other drivers (for example ni_660x and ni_pcimio) to provide ++ * support for NI's general purpose counters. It was originally based ++ * on the counter code from ni_660x.c and ni_mio_common.c. ++ * ++ * Author: ++ * J.P. Mellor ++ * Herman.Bruyninckx@mech.kuleuven.ac.be ++ * Wim.Meeussen@mech.kuleuven.ac.be, ++ * Klaas.Gadeyne@mech.kuleuven.ac.be, ++ * Frank Mori Hess ++ * ++ * References: ++ * DAQ 660x Register-Level Programmer Manual (NI 370505A-01) ++ * DAQ 6601/6602 User Manual (NI 322137B-01) ++ * 340934b.pdf DAQ-STC reference manual ++ * ++ * TODO: ++ * - Support use of both banks X and Y ++ * ++ */ ++ ++#include ++#include ++#include ++#include ++ ++#include "ni_tio.h" ++#include "ni_mio.h" ++ ++static inline void write_register(struct ni_gpct *counter, ++ unsigned int bits, enum ni_gpct_register reg) ++{ ++ BUG_ON(reg >= NITIO_Num_Registers); ++ counter->counter_dev->write_register(counter, bits, reg); ++} ++ ++static inline unsigned int read_register(struct ni_gpct *counter, ++ enum ni_gpct_register reg) ++{ ++ BUG_ON(reg >= NITIO_Num_Registers); ++ return counter->counter_dev->read_register(counter, reg); ++} ++ ++struct ni_gpct_device *a4l_ni_gpct_device_construct(struct a4l_device * dev, ++ void (*write_register) (struct ni_gpct * counter, unsigned int bits, ++ enum ni_gpct_register reg), ++ unsigned int (*read_register) (struct ni_gpct * counter, ++ enum ni_gpct_register reg), enum ni_gpct_variant variant, ++ unsigned int num_counters) ++{ ++ struct ni_gpct_device *counter_dev = ++ kmalloc(sizeof(struct ni_gpct_device), GFP_KERNEL); ++ if (counter_dev == NULL) ++ return NULL; ++ ++ memset(counter_dev, 0, sizeof(struct ni_gpct_device)); ++ ++ counter_dev->dev = dev; ++ counter_dev->write_register = write_register; ++ counter_dev->read_register = read_register; ++ counter_dev->variant = variant; ++ rtdm_lock_init(&counter_dev->regs_lock); ++ BUG_ON(num_counters == 0); ++ ++ counter_dev->counters = ++ kmalloc(sizeof(struct ni_gpct *) * num_counters, GFP_KERNEL); ++ ++ if (counter_dev->counters == NULL) { ++ kfree(counter_dev); ++ return NULL; ++ } ++ ++ memset(counter_dev->counters, 0, sizeof(struct ni_gpct *) * num_counters); ++ ++ counter_dev->num_counters = num_counters; ++ return counter_dev; ++} ++ ++void a4l_ni_gpct_device_destroy(struct ni_gpct_device *counter_dev) ++{ ++ if (counter_dev->counters == NULL) ++ return; ++ kfree(counter_dev->counters); ++ kfree(counter_dev); ++} ++ ++static ++int ni_tio_counting_mode_registers_present(const struct ni_gpct_device *counter_dev) ++{ ++ switch (counter_dev->variant) { ++ case ni_gpct_variant_e_series: ++ return 0; ++ break; ++ case ni_gpct_variant_m_series: ++ case ni_gpct_variant_660x: ++ return 1; ++ break; ++ default: ++ BUG(); ++ break; ++ } ++ return 0; ++} ++ ++static ++int ni_tio_second_gate_registers_present(const struct ni_gpct_device *counter_dev) ++{ ++ switch (counter_dev->variant) { ++ case ni_gpct_variant_e_series: ++ return 0; ++ break; ++ case ni_gpct_variant_m_series: ++ case ni_gpct_variant_660x: ++ return 1; ++ break; ++ default: ++ BUG(); ++ break; ++ } ++ return 0; ++} ++ ++static inline ++void ni_tio_set_bits_transient(struct ni_gpct *counter, ++ enum ni_gpct_register register_index, ++ unsigned int bit_mask, ++ unsigned int bit_values, ++ unsigned transient_bit_values) ++{ ++ struct ni_gpct_device *counter_dev = counter->counter_dev; ++ unsigned long flags; ++ ++ BUG_ON(register_index >= NITIO_Num_Registers); ++ rtdm_lock_get_irqsave(&counter_dev->regs_lock, flags); ++ counter_dev->regs[register_index] &= ~bit_mask; ++ counter_dev->regs[register_index] |= (bit_values & bit_mask); ++ write_register(counter, ++ counter_dev->regs[register_index] | transient_bit_values, ++ register_index); ++ mmiowb(); ++ rtdm_lock_put_irqrestore(&counter_dev->regs_lock, flags); ++} ++ ++/* ni_tio_set_bits( ) is for safely writing to registers whose bits ++ may be twiddled in interrupt context, or whose software copy may be ++ read in interrupt context. */ ++static inline void ni_tio_set_bits(struct ni_gpct *counter, ++ enum ni_gpct_register register_index, ++ unsigned int bit_mask, ++ unsigned int bit_values) ++{ ++ ni_tio_set_bits_transient(counter, ++ register_index, ++ bit_mask, bit_values, 0x0); ++} ++ ++/* ni_tio_get_soft_copy( ) is for safely reading the software copy of ++ a register whose bits might be modified in interrupt context, or whose ++ software copy might need to be read in interrupt context. */ ++static inline ++unsigned int ni_tio_get_soft_copy(const struct ni_gpct *counter, ++ enum ni_gpct_register register_index) ++{ ++ struct ni_gpct_device *counter_dev = counter->counter_dev; ++ unsigned long flags; ++ unsigned value; ++ ++ BUG_ON(register_index >= NITIO_Num_Registers); ++ rtdm_lock_get_irqsave(&counter_dev->regs_lock, flags); ++ value = counter_dev->regs[register_index]; ++ rtdm_lock_put_irqrestore(&counter_dev->regs_lock, flags); ++ return value; ++} ++ ++static void ni_tio_reset_count_and_disarm(struct ni_gpct *counter) ++{ ++ write_register(counter, Gi_Reset_Bit(counter->counter_index), ++ NITIO_Gxx_Joint_Reset_Reg(counter->counter_index)); ++} ++ ++void a4l_ni_tio_init_counter(struct ni_gpct *counter) ++{ ++ struct ni_gpct_device *counter_dev = counter->counter_dev; ++ ++ ni_tio_reset_count_and_disarm(counter); ++ /* Initialize counter registers */ ++ counter_dev->regs[NITIO_Gi_Autoincrement_Reg(counter->counter_index)] = ++ 0x0; ++ write_register(counter, ++ counter_dev->regs[NITIO_Gi_Autoincrement_Reg(counter-> ++ counter_index)], ++ NITIO_Gi_Autoincrement_Reg(counter->counter_index)); ++ ni_tio_set_bits(counter, NITIO_Gi_Command_Reg(counter->counter_index), ++ ~0, Gi_Synchronize_Gate_Bit); ++ ni_tio_set_bits(counter, NITIO_Gi_Mode_Reg(counter->counter_index), ~0, ++ 0); ++ counter_dev->regs[NITIO_Gi_LoadA_Reg(counter->counter_index)] = 0x0; ++ write_register(counter, ++ counter_dev->regs[NITIO_Gi_LoadA_Reg(counter->counter_index)], ++ NITIO_Gi_LoadA_Reg(counter->counter_index)); ++ counter_dev->regs[NITIO_Gi_LoadB_Reg(counter->counter_index)] = 0x0; ++ write_register(counter, ++ counter_dev->regs[NITIO_Gi_LoadB_Reg(counter->counter_index)], ++ NITIO_Gi_LoadB_Reg(counter->counter_index)); ++ ni_tio_set_bits(counter, ++ NITIO_Gi_Input_Select_Reg(counter->counter_index), ~0, 0); ++ if (ni_tio_counting_mode_registers_present(counter_dev)) { ++ ni_tio_set_bits(counter, ++ NITIO_Gi_Counting_Mode_Reg(counter->counter_index), ~0, ++ 0); ++ } ++ if (ni_tio_second_gate_registers_present(counter_dev)) { ++ counter_dev->regs[NITIO_Gi_Second_Gate_Reg(counter-> ++ counter_index)] = 0x0; ++ write_register(counter, ++ counter_dev->regs[NITIO_Gi_Second_Gate_Reg(counter-> ++ counter_index)], ++ NITIO_Gi_Second_Gate_Reg(counter->counter_index)); ++ } ++ ni_tio_set_bits(counter, ++ NITIO_Gi_DMA_Config_Reg(counter->counter_index), ~0, 0x0); ++ ni_tio_set_bits(counter, ++ NITIO_Gi_Interrupt_Enable_Reg(counter->counter_index), ~0, 0x0); ++} ++ ++static lsampl_t ni_tio_counter_status(struct ni_gpct *counter) ++{ ++ lsampl_t status = 0; ++ unsigned int bits; ++ ++ bits = read_register(counter,NITIO_Gxx_Status_Reg(counter->counter_index)); ++ if (bits & Gi_Armed_Bit(counter->counter_index)) { ++ status |= A4L_COUNTER_ARMED; ++ if (bits & Gi_Counting_Bit(counter->counter_index)) ++ status |= A4L_COUNTER_COUNTING; ++ } ++ return status; ++} ++ ++static ++uint64_t ni_tio_clock_period_ps(const struct ni_gpct *counter, ++ unsigned int generic_clock_source); ++static ++unsigned int ni_tio_generic_clock_src_select(const struct ni_gpct *counter); ++ ++static void ni_tio_set_sync_mode(struct ni_gpct *counter, int force_alt_sync) ++{ ++ struct ni_gpct_device *counter_dev = counter->counter_dev; ++ const unsigned counting_mode_reg = ++ NITIO_Gi_Counting_Mode_Reg(counter->counter_index); ++ static const uint64_t min_normal_sync_period_ps = 25000; ++ const uint64_t clock_period_ps = ni_tio_clock_period_ps(counter, ++ ni_tio_generic_clock_src_select(counter)); ++ ++ if (ni_tio_counting_mode_registers_present(counter_dev) == 0) ++ return; ++ ++ switch (ni_tio_get_soft_copy(counter, ++ counting_mode_reg) & Gi_Counting_Mode_Mask) { ++ case Gi_Counting_Mode_QuadratureX1_Bits: ++ case Gi_Counting_Mode_QuadratureX2_Bits: ++ case Gi_Counting_Mode_QuadratureX4_Bits: ++ case Gi_Counting_Mode_Sync_Source_Bits: ++ force_alt_sync = 1; ++ break; ++ default: ++ break; ++ } ++ ++ /* It's not clear what we should do if clock_period is ++ unknown, so we are not using the alt sync bit in that case, ++ but allow the caller to decide by using the force_alt_sync ++ parameter. */ ++ if (force_alt_sync || ++ (clock_period_ps ++ && clock_period_ps < min_normal_sync_period_ps)) { ++ ni_tio_set_bits(counter, counting_mode_reg, ++ Gi_Alternate_Sync_Bit(counter_dev->variant), ++ Gi_Alternate_Sync_Bit(counter_dev->variant)); ++ } else { ++ ni_tio_set_bits(counter, counting_mode_reg, ++ Gi_Alternate_Sync_Bit(counter_dev->variant), 0x0); ++ } ++} ++ ++static int ni_tio_set_counter_mode(struct ni_gpct *counter, unsigned int mode) ++{ ++ struct ni_gpct_device *counter_dev = counter->counter_dev; ++ unsigned mode_reg_mask; ++ unsigned mode_reg_values; ++ unsigned input_select_bits = 0; ++ ++ /* these bits map directly on to the mode register */ ++ static const unsigned mode_reg_direct_mask = ++ NI_GPCT_GATE_ON_BOTH_EDGES_BIT | NI_GPCT_EDGE_GATE_MODE_MASK | ++ NI_GPCT_STOP_MODE_MASK | NI_GPCT_OUTPUT_MODE_MASK | ++ NI_GPCT_HARDWARE_DISARM_MASK | NI_GPCT_LOADING_ON_TC_BIT | ++ NI_GPCT_LOADING_ON_GATE_BIT | NI_GPCT_LOAD_B_SELECT_BIT; ++ ++ mode_reg_mask = mode_reg_direct_mask | Gi_Reload_Source_Switching_Bit; ++ mode_reg_values = mode & mode_reg_direct_mask; ++ switch (mode & NI_GPCT_RELOAD_SOURCE_MASK) { ++ case NI_GPCT_RELOAD_SOURCE_FIXED_BITS: ++ break; ++ case NI_GPCT_RELOAD_SOURCE_SWITCHING_BITS: ++ mode_reg_values |= Gi_Reload_Source_Switching_Bit; ++ break; ++ case NI_GPCT_RELOAD_SOURCE_GATE_SELECT_BITS: ++ input_select_bits |= Gi_Gate_Select_Load_Source_Bit; ++ mode_reg_mask |= Gi_Gating_Mode_Mask; ++ mode_reg_values |= Gi_Level_Gating_Bits; ++ break; ++ default: ++ break; ++ } ++ ni_tio_set_bits(counter, NITIO_Gi_Mode_Reg(counter->counter_index), ++ mode_reg_mask, mode_reg_values); ++ ++ if (ni_tio_counting_mode_registers_present(counter_dev)) { ++ unsigned counting_mode_bits = 0; ++ counting_mode_bits |= ++ (mode >> NI_GPCT_COUNTING_MODE_SHIFT) & ++ Gi_Counting_Mode_Mask; ++ counting_mode_bits |= ++ ((mode >> NI_GPCT_INDEX_PHASE_BITSHIFT) << ++ Gi_Index_Phase_Bitshift) & Gi_Index_Phase_Mask; ++ if (mode & NI_GPCT_INDEX_ENABLE_BIT) { ++ counting_mode_bits |= Gi_Index_Mode_Bit; ++ } ++ ni_tio_set_bits(counter, ++ NITIO_Gi_Counting_Mode_Reg(counter->counter_index), ++ Gi_Counting_Mode_Mask | Gi_Index_Phase_Mask | ++ Gi_Index_Mode_Bit, counting_mode_bits); ++ ni_tio_set_sync_mode(counter, 0); ++ } ++ ++ ni_tio_set_bits(counter, NITIO_Gi_Command_Reg(counter->counter_index), ++ Gi_Up_Down_Mask, ++ (mode >> NI_GPCT_COUNTING_DIRECTION_SHIFT) << Gi_Up_Down_Shift); ++ ++ if (mode & NI_GPCT_OR_GATE_BIT) { ++ input_select_bits |= Gi_Or_Gate_Bit; ++ } ++ if (mode & NI_GPCT_INVERT_OUTPUT_BIT) { ++ input_select_bits |= Gi_Output_Polarity_Bit; ++ } ++ ni_tio_set_bits(counter, ++ NITIO_Gi_Input_Select_Reg(counter->counter_index), ++ Gi_Gate_Select_Load_Source_Bit | Gi_Or_Gate_Bit | ++ Gi_Output_Polarity_Bit, input_select_bits); ++ ++ return 0; ++} ++ ++static int ni_tio_arm(struct ni_gpct *counter, int arm, unsigned int start_trigger) ++{ ++ struct ni_gpct_device *counter_dev = counter->counter_dev; ++ ++ unsigned int command_transient_bits = 0; ++ ++ if (arm) { ++ switch (start_trigger) { ++ case NI_GPCT_ARM_IMMEDIATE: ++ command_transient_bits |= Gi_Arm_Bit; ++ break; ++ case NI_GPCT_ARM_PAIRED_IMMEDIATE: ++ command_transient_bits |= Gi_Arm_Bit | Gi_Arm_Copy_Bit; ++ break; ++ default: ++ break; ++ } ++ if (ni_tio_counting_mode_registers_present(counter_dev)) { ++ unsigned counting_mode_bits = 0; ++ ++ switch (start_trigger) { ++ case NI_GPCT_ARM_IMMEDIATE: ++ case NI_GPCT_ARM_PAIRED_IMMEDIATE: ++ break; ++ default: ++ if (start_trigger & NI_GPCT_ARM_UNKNOWN) { ++ /* Pass-through the least ++ significant bits so we can ++ figure out what select later ++ */ ++ unsigned hw_arm_select_bits = ++ (start_trigger << ++ Gi_HW_Arm_Select_Shift) & ++ Gi_HW_Arm_Select_Mask ++ (counter_dev->variant); ++ ++ counting_mode_bits |= ++ Gi_HW_Arm_Enable_Bit | ++ hw_arm_select_bits; ++ } else { ++ return -EINVAL; ++ } ++ break; ++ } ++ ni_tio_set_bits(counter, ++ NITIO_Gi_Counting_Mode_Reg(counter-> ++ counter_index), ++ Gi_HW_Arm_Select_Mask(counter_dev-> ++ variant) | Gi_HW_Arm_Enable_Bit, ++ counting_mode_bits); ++ } ++ } else { ++ command_transient_bits |= Gi_Disarm_Bit; ++ } ++ ni_tio_set_bits_transient(counter, ++ NITIO_Gi_Command_Reg(counter->counter_index), 0, 0, ++ command_transient_bits); ++ return 0; ++} ++ ++static unsigned int ni_660x_source_select_bits(lsampl_t clock_source) ++{ ++ unsigned int ni_660x_clock; ++ unsigned int i; ++ const unsigned int clock_select_bits = ++ clock_source & NI_GPCT_CLOCK_SRC_SELECT_MASK; ++ ++ switch (clock_select_bits) { ++ case NI_GPCT_TIMEBASE_1_CLOCK_SRC_BITS: ++ ni_660x_clock = NI_660x_Timebase_1_Clock; ++ break; ++ case NI_GPCT_TIMEBASE_2_CLOCK_SRC_BITS: ++ ni_660x_clock = NI_660x_Timebase_2_Clock; ++ break; ++ case NI_GPCT_TIMEBASE_3_CLOCK_SRC_BITS: ++ ni_660x_clock = NI_660x_Timebase_3_Clock; ++ break; ++ case NI_GPCT_LOGIC_LOW_CLOCK_SRC_BITS: ++ ni_660x_clock = NI_660x_Logic_Low_Clock; ++ break; ++ case NI_GPCT_SOURCE_PIN_i_CLOCK_SRC_BITS: ++ ni_660x_clock = NI_660x_Source_Pin_i_Clock; ++ break; ++ case NI_GPCT_NEXT_GATE_CLOCK_SRC_BITS: ++ ni_660x_clock = NI_660x_Next_Gate_Clock; ++ break; ++ case NI_GPCT_NEXT_TC_CLOCK_SRC_BITS: ++ ni_660x_clock = NI_660x_Next_TC_Clock; ++ break; ++ default: ++ for (i = 0; i <= ni_660x_max_rtsi_channel; ++i) { ++ if (clock_select_bits == NI_GPCT_RTSI_CLOCK_SRC_BITS(i)) { ++ ni_660x_clock = NI_660x_RTSI_Clock(i); ++ break; ++ } ++ } ++ if (i <= ni_660x_max_rtsi_channel) ++ break; ++ for (i = 0; i <= ni_660x_max_source_pin; ++i) { ++ if (clock_select_bits == ++ NI_GPCT_SOURCE_PIN_CLOCK_SRC_BITS(i)) { ++ ni_660x_clock = NI_660x_Source_Pin_Clock(i); ++ break; ++ } ++ } ++ if (i <= ni_660x_max_source_pin) ++ break; ++ ni_660x_clock = 0; ++ BUG(); ++ break; ++ } ++ return Gi_Source_Select_Bits(ni_660x_clock); ++} ++ ++static unsigned int ni_m_series_source_select_bits(lsampl_t clock_source) ++{ ++ unsigned int ni_m_series_clock; ++ unsigned int i; ++ const unsigned int clock_select_bits = ++ clock_source & NI_GPCT_CLOCK_SRC_SELECT_MASK; ++ switch (clock_select_bits) { ++ case NI_GPCT_TIMEBASE_1_CLOCK_SRC_BITS: ++ ni_m_series_clock = NI_M_Series_Timebase_1_Clock; ++ break; ++ case NI_GPCT_TIMEBASE_2_CLOCK_SRC_BITS: ++ ni_m_series_clock = NI_M_Series_Timebase_2_Clock; ++ break; ++ case NI_GPCT_TIMEBASE_3_CLOCK_SRC_BITS: ++ ni_m_series_clock = NI_M_Series_Timebase_3_Clock; ++ break; ++ case NI_GPCT_LOGIC_LOW_CLOCK_SRC_BITS: ++ ni_m_series_clock = NI_M_Series_Logic_Low_Clock; ++ break; ++ case NI_GPCT_NEXT_GATE_CLOCK_SRC_BITS: ++ ni_m_series_clock = NI_M_Series_Next_Gate_Clock; ++ break; ++ case NI_GPCT_NEXT_TC_CLOCK_SRC_BITS: ++ ni_m_series_clock = NI_M_Series_Next_TC_Clock; ++ break; ++ case NI_GPCT_PXI10_CLOCK_SRC_BITS: ++ ni_m_series_clock = NI_M_Series_PXI10_Clock; ++ break; ++ case NI_GPCT_PXI_STAR_TRIGGER_CLOCK_SRC_BITS: ++ ni_m_series_clock = NI_M_Series_PXI_Star_Trigger_Clock; ++ break; ++ case NI_GPCT_ANALOG_TRIGGER_OUT_CLOCK_SRC_BITS: ++ ni_m_series_clock = NI_M_Series_Analog_Trigger_Out_Clock; ++ break; ++ default: ++ for (i = 0; i <= ni_m_series_max_rtsi_channel; ++i) { ++ if (clock_select_bits == NI_GPCT_RTSI_CLOCK_SRC_BITS(i)) { ++ ni_m_series_clock = NI_M_Series_RTSI_Clock(i); ++ break; ++ } ++ } ++ if (i <= ni_m_series_max_rtsi_channel) ++ break; ++ for (i = 0; i <= ni_m_series_max_pfi_channel; ++i) { ++ if (clock_select_bits == NI_GPCT_PFI_CLOCK_SRC_BITS(i)) { ++ ni_m_series_clock = NI_M_Series_PFI_Clock(i); ++ break; ++ } ++ } ++ if (i <= ni_m_series_max_pfi_channel) ++ break; ++ __a4l_err("invalid clock source 0x%lx\n", ++ (unsigned long)clock_source); ++ BUG(); ++ ni_m_series_clock = 0; ++ break; ++ } ++ return Gi_Source_Select_Bits(ni_m_series_clock); ++} ++ ++static void ni_tio_set_source_subselect(struct ni_gpct *counter, ++ lsampl_t clock_source) ++{ ++ struct ni_gpct_device *counter_dev = counter->counter_dev; ++ const unsigned second_gate_reg = ++ NITIO_Gi_Second_Gate_Reg(counter->counter_index); ++ ++ if (counter_dev->variant != ni_gpct_variant_m_series) ++ return; ++ switch (clock_source & NI_GPCT_CLOCK_SRC_SELECT_MASK) { ++ /* Gi_Source_Subselect is zero */ ++ case NI_GPCT_NEXT_GATE_CLOCK_SRC_BITS: ++ case NI_GPCT_TIMEBASE_3_CLOCK_SRC_BITS: ++ counter_dev->regs[second_gate_reg] &= ~Gi_Source_Subselect_Bit; ++ break; ++ /* Gi_Source_Subselect is one */ ++ case NI_GPCT_ANALOG_TRIGGER_OUT_CLOCK_SRC_BITS: ++ case NI_GPCT_PXI_STAR_TRIGGER_CLOCK_SRC_BITS: ++ counter_dev->regs[second_gate_reg] |= Gi_Source_Subselect_Bit; ++ break; ++ /* Gi_Source_Subselect doesn't matter */ ++ default: ++ return; ++ break; ++ } ++ write_register(counter, counter_dev->regs[second_gate_reg], ++ second_gate_reg); ++} ++ ++static int ni_tio_set_clock_src(struct ni_gpct *counter, ++ lsampl_t clock_source, lsampl_t period_ns) ++{ ++ struct ni_gpct_device *counter_dev = counter->counter_dev; ++ unsigned input_select_bits = 0; ++ static const uint64_t pico_per_nano = 1000; ++ ++ /* FIXME: validate clock source */ ++ switch (counter_dev->variant) { ++ case ni_gpct_variant_660x: ++ input_select_bits |= ni_660x_source_select_bits(clock_source); ++ break; ++ case ni_gpct_variant_e_series: ++ case ni_gpct_variant_m_series: ++ input_select_bits |= ++ ni_m_series_source_select_bits(clock_source); ++ break; ++ default: ++ BUG(); ++ break; ++ } ++ if (clock_source & NI_GPCT_INVERT_CLOCK_SRC_BIT) ++ input_select_bits |= Gi_Source_Polarity_Bit; ++ ni_tio_set_bits(counter, ++ NITIO_Gi_Input_Select_Reg(counter->counter_index), ++ Gi_Source_Select_Mask | Gi_Source_Polarity_Bit, ++ input_select_bits); ++ ni_tio_set_source_subselect(counter, clock_source); ++ if (ni_tio_counting_mode_registers_present(counter_dev)) { ++ const unsigned prescaling_mode = ++ clock_source & NI_GPCT_PRESCALE_MODE_CLOCK_SRC_MASK; ++ unsigned counting_mode_bits = 0; ++ ++ switch (prescaling_mode) { ++ case NI_GPCT_NO_PRESCALE_CLOCK_SRC_BITS: ++ break; ++ case NI_GPCT_PRESCALE_X2_CLOCK_SRC_BITS: ++ counting_mode_bits |= ++ Gi_Prescale_X2_Bit(counter_dev->variant); ++ break; ++ case NI_GPCT_PRESCALE_X8_CLOCK_SRC_BITS: ++ counting_mode_bits |= ++ Gi_Prescale_X8_Bit(counter_dev->variant); ++ break; ++ default: ++ return -EINVAL; ++ break; ++ } ++ ni_tio_set_bits(counter, ++ NITIO_Gi_Counting_Mode_Reg(counter->counter_index), ++ Gi_Prescale_X2_Bit(counter_dev-> ++ variant) | Gi_Prescale_X8_Bit(counter_dev-> ++ variant), counting_mode_bits); ++ } ++ counter->clock_period_ps = pico_per_nano * period_ns; ++ ni_tio_set_sync_mode(counter, 0); ++ return 0; ++} ++ ++static unsigned int ni_tio_clock_src_modifiers(const struct ni_gpct *counter) ++{ ++ struct ni_gpct_device *counter_dev = counter->counter_dev; ++ const unsigned counting_mode_bits = ni_tio_get_soft_copy(counter, ++ NITIO_Gi_Counting_Mode_Reg(counter->counter_index)); ++ unsigned int bits = 0; ++ ++ if (ni_tio_get_soft_copy(counter, ++ NITIO_Gi_Input_Select_Reg(counter-> ++ counter_index)) & Gi_Source_Polarity_Bit) ++ bits |= NI_GPCT_INVERT_CLOCK_SRC_BIT; ++ if (counting_mode_bits & Gi_Prescale_X2_Bit(counter_dev->variant)) ++ bits |= NI_GPCT_PRESCALE_X2_CLOCK_SRC_BITS; ++ if (counting_mode_bits & Gi_Prescale_X8_Bit(counter_dev->variant)) ++ bits |= NI_GPCT_PRESCALE_X8_CLOCK_SRC_BITS; ++ return bits; ++} ++ ++static unsigned int ni_m_series_clock_src_select(const struct ni_gpct *counter) ++{ ++ struct ni_gpct_device *counter_dev = counter->counter_dev; ++ const unsigned int second_gate_reg = ++ NITIO_Gi_Second_Gate_Reg(counter->counter_index); ++ unsigned int i, clock_source = 0; ++ ++ const unsigned int input_select = (ni_tio_get_soft_copy(counter, ++ NITIO_Gi_Input_Select_Reg(counter-> ++ counter_index)) & Gi_Source_Select_Mask) >> ++ Gi_Source_Select_Shift; ++ ++ switch (input_select) { ++ case NI_M_Series_Timebase_1_Clock: ++ clock_source = NI_GPCT_TIMEBASE_1_CLOCK_SRC_BITS; ++ break; ++ case NI_M_Series_Timebase_2_Clock: ++ clock_source = NI_GPCT_TIMEBASE_2_CLOCK_SRC_BITS; ++ break; ++ case NI_M_Series_Timebase_3_Clock: ++ if (counter_dev-> ++ regs[second_gate_reg] & Gi_Source_Subselect_Bit) ++ clock_source = ++ NI_GPCT_ANALOG_TRIGGER_OUT_CLOCK_SRC_BITS; ++ else ++ clock_source = NI_GPCT_TIMEBASE_3_CLOCK_SRC_BITS; ++ break; ++ case NI_M_Series_Logic_Low_Clock: ++ clock_source = NI_GPCT_LOGIC_LOW_CLOCK_SRC_BITS; ++ break; ++ case NI_M_Series_Next_Gate_Clock: ++ if (counter_dev-> ++ regs[second_gate_reg] & Gi_Source_Subselect_Bit) ++ clock_source = NI_GPCT_PXI_STAR_TRIGGER_CLOCK_SRC_BITS; ++ else ++ clock_source = NI_GPCT_NEXT_GATE_CLOCK_SRC_BITS; ++ break; ++ case NI_M_Series_PXI10_Clock: ++ clock_source = NI_GPCT_PXI10_CLOCK_SRC_BITS; ++ break; ++ case NI_M_Series_Next_TC_Clock: ++ clock_source = NI_GPCT_NEXT_TC_CLOCK_SRC_BITS; ++ break; ++ default: ++ for (i = 0; i <= ni_m_series_max_rtsi_channel; ++i) { ++ if (input_select == NI_M_Series_RTSI_Clock(i)) { ++ clock_source = NI_GPCT_RTSI_CLOCK_SRC_BITS(i); ++ break; ++ } ++ } ++ if (i <= ni_m_series_max_rtsi_channel) ++ break; ++ for (i = 0; i <= ni_m_series_max_pfi_channel; ++i) { ++ if (input_select == NI_M_Series_PFI_Clock(i)) { ++ clock_source = NI_GPCT_PFI_CLOCK_SRC_BITS(i); ++ break; ++ } ++ } ++ if (i <= ni_m_series_max_pfi_channel) ++ break; ++ BUG(); ++ break; ++ } ++ clock_source |= ni_tio_clock_src_modifiers(counter); ++ return clock_source; ++} ++ ++static unsigned int ni_660x_clock_src_select(const struct ni_gpct *counter) ++{ ++ unsigned int i, clock_source = 0; ++ const unsigned input_select = (ni_tio_get_soft_copy(counter, ++ NITIO_Gi_Input_Select_Reg(counter-> ++ counter_index)) & Gi_Source_Select_Mask) >> ++ Gi_Source_Select_Shift; ++ ++ switch (input_select) { ++ case NI_660x_Timebase_1_Clock: ++ clock_source = NI_GPCT_TIMEBASE_1_CLOCK_SRC_BITS; ++ break; ++ case NI_660x_Timebase_2_Clock: ++ clock_source = NI_GPCT_TIMEBASE_2_CLOCK_SRC_BITS; ++ break; ++ case NI_660x_Timebase_3_Clock: ++ clock_source = NI_GPCT_TIMEBASE_3_CLOCK_SRC_BITS; ++ break; ++ case NI_660x_Logic_Low_Clock: ++ clock_source = NI_GPCT_LOGIC_LOW_CLOCK_SRC_BITS; ++ break; ++ case NI_660x_Source_Pin_i_Clock: ++ clock_source = NI_GPCT_SOURCE_PIN_i_CLOCK_SRC_BITS; ++ break; ++ case NI_660x_Next_Gate_Clock: ++ clock_source = NI_GPCT_NEXT_GATE_CLOCK_SRC_BITS; ++ break; ++ case NI_660x_Next_TC_Clock: ++ clock_source = NI_GPCT_NEXT_TC_CLOCK_SRC_BITS; ++ break; ++ default: ++ for (i = 0; i <= ni_660x_max_rtsi_channel; ++i) { ++ if (input_select == NI_660x_RTSI_Clock(i)) { ++ clock_source = NI_GPCT_RTSI_CLOCK_SRC_BITS(i); ++ break; ++ } ++ } ++ if (i <= ni_660x_max_rtsi_channel) ++ break; ++ for (i = 0; i <= ni_660x_max_source_pin; ++i) { ++ if (input_select == NI_660x_Source_Pin_Clock(i)) { ++ clock_source = ++ NI_GPCT_SOURCE_PIN_CLOCK_SRC_BITS(i); ++ break; ++ } ++ } ++ if (i <= ni_660x_max_source_pin) ++ break; ++ BUG(); ++ break; ++ } ++ clock_source |= ni_tio_clock_src_modifiers(counter); ++ return clock_source; ++} ++ ++static unsigned int ni_tio_generic_clock_src_select(const struct ni_gpct *counter) ++{ ++ switch (counter->counter_dev->variant) { ++ case ni_gpct_variant_e_series: ++ case ni_gpct_variant_m_series: ++ return ni_m_series_clock_src_select(counter); ++ break; ++ case ni_gpct_variant_660x: ++ return ni_660x_clock_src_select(counter); ++ break; ++ default: ++ BUG(); ++ break; ++ } ++ return 0; ++} ++ ++static uint64_t ni_tio_clock_period_ps(const struct ni_gpct *counter, ++ unsigned int generic_clock_source) ++{ ++ uint64_t clock_period_ps; ++ ++ switch (generic_clock_source & NI_GPCT_CLOCK_SRC_SELECT_MASK) { ++ case NI_GPCT_TIMEBASE_1_CLOCK_SRC_BITS: ++ clock_period_ps = 50000; ++ break; ++ case NI_GPCT_TIMEBASE_2_CLOCK_SRC_BITS: ++ clock_period_ps = 10000000; ++ break; ++ case NI_GPCT_TIMEBASE_3_CLOCK_SRC_BITS: ++ clock_period_ps = 12500; ++ break; ++ case NI_GPCT_PXI10_CLOCK_SRC_BITS: ++ clock_period_ps = 100000; ++ break; ++ default: ++ /* Clock period is specified by user with prescaling ++ already taken into account. */ ++ return counter->clock_period_ps; ++ break; ++ } ++ ++ switch (generic_clock_source & NI_GPCT_PRESCALE_MODE_CLOCK_SRC_MASK) { ++ case NI_GPCT_NO_PRESCALE_CLOCK_SRC_BITS: ++ break; ++ case NI_GPCT_PRESCALE_X2_CLOCK_SRC_BITS: ++ clock_period_ps *= 2; ++ break; ++ case NI_GPCT_PRESCALE_X8_CLOCK_SRC_BITS: ++ clock_period_ps *= 8; ++ break; ++ default: ++ BUG(); ++ break; ++ } ++ return clock_period_ps; ++} ++ ++static void ni_tio_get_clock_src(struct ni_gpct *counter, ++ unsigned int * clock_source, ++ unsigned int * period_ns) ++{ ++ static const unsigned int pico_per_nano = 1000; ++ uint64_t temp64; ++ ++ *clock_source = ni_tio_generic_clock_src_select(counter); ++ temp64 = ni_tio_clock_period_ps(counter, *clock_source); ++ do_div(temp64, pico_per_nano); ++ *period_ns = temp64; ++} ++ ++static void ni_tio_set_first_gate_modifiers(struct ni_gpct *counter, ++ lsampl_t gate_source) ++{ ++ const unsigned int mode_mask = Gi_Gate_Polarity_Bit | Gi_Gating_Mode_Mask; ++ unsigned int mode_values = 0; ++ ++ if (gate_source & CR_INVERT) { ++ mode_values |= Gi_Gate_Polarity_Bit; ++ } ++ if (gate_source & CR_EDGE) { ++ mode_values |= Gi_Rising_Edge_Gating_Bits; ++ } else { ++ mode_values |= Gi_Level_Gating_Bits; ++ } ++ ni_tio_set_bits(counter, NITIO_Gi_Mode_Reg(counter->counter_index), ++ mode_mask, mode_values); ++} ++ ++static int ni_660x_set_first_gate(struct ni_gpct *counter, lsampl_t gate_source) ++{ ++ const unsigned int selected_gate = CR_CHAN(gate_source); ++ /* Bits of selected_gate that may be meaningful to ++ input select register */ ++ const unsigned int selected_gate_mask = 0x1f; ++ unsigned ni_660x_gate_select; ++ unsigned i; ++ ++ switch (selected_gate) { ++ case NI_GPCT_NEXT_SOURCE_GATE_SELECT: ++ ni_660x_gate_select = NI_660x_Next_SRC_Gate_Select; ++ break; ++ case NI_GPCT_NEXT_OUT_GATE_SELECT: ++ case NI_GPCT_LOGIC_LOW_GATE_SELECT: ++ case NI_GPCT_SOURCE_PIN_i_GATE_SELECT: ++ case NI_GPCT_GATE_PIN_i_GATE_SELECT: ++ ni_660x_gate_select = selected_gate & selected_gate_mask; ++ break; ++ default: ++ for (i = 0; i <= ni_660x_max_rtsi_channel; ++i) { ++ if (selected_gate == NI_GPCT_RTSI_GATE_SELECT(i)) { ++ ni_660x_gate_select = ++ selected_gate & selected_gate_mask; ++ break; ++ } ++ } ++ if (i <= ni_660x_max_rtsi_channel) ++ break; ++ for (i = 0; i <= ni_660x_max_gate_pin; ++i) { ++ if (selected_gate == NI_GPCT_GATE_PIN_GATE_SELECT(i)) { ++ ni_660x_gate_select = ++ selected_gate & selected_gate_mask; ++ break; ++ } ++ } ++ if (i <= ni_660x_max_gate_pin) ++ break; ++ return -EINVAL; ++ break; ++ } ++ ni_tio_set_bits(counter, ++ NITIO_Gi_Input_Select_Reg(counter->counter_index), ++ Gi_Gate_Select_Mask, Gi_Gate_Select_Bits(ni_660x_gate_select)); ++ return 0; ++} ++ ++static int ni_m_series_set_first_gate(struct ni_gpct *counter, ++ lsampl_t gate_source) ++{ ++ const unsigned int selected_gate = CR_CHAN(gate_source); ++ /* bits of selected_gate that may be meaningful to input select register */ ++ const unsigned int selected_gate_mask = 0x1f; ++ unsigned int i, ni_m_series_gate_select; ++ ++ switch (selected_gate) { ++ case NI_GPCT_TIMESTAMP_MUX_GATE_SELECT: ++ case NI_GPCT_AI_START2_GATE_SELECT: ++ case NI_GPCT_PXI_STAR_TRIGGER_GATE_SELECT: ++ case NI_GPCT_NEXT_OUT_GATE_SELECT: ++ case NI_GPCT_AI_START1_GATE_SELECT: ++ case NI_GPCT_NEXT_SOURCE_GATE_SELECT: ++ case NI_GPCT_ANALOG_TRIGGER_OUT_GATE_SELECT: ++ case NI_GPCT_LOGIC_LOW_GATE_SELECT: ++ ni_m_series_gate_select = selected_gate & selected_gate_mask; ++ break; ++ default: ++ for (i = 0; i <= ni_m_series_max_rtsi_channel; ++i) { ++ if (selected_gate == NI_GPCT_RTSI_GATE_SELECT(i)) { ++ ni_m_series_gate_select = ++ selected_gate & selected_gate_mask; ++ break; ++ } ++ } ++ if (i <= ni_m_series_max_rtsi_channel) ++ break; ++ for (i = 0; i <= ni_m_series_max_pfi_channel; ++i) { ++ if (selected_gate == NI_GPCT_PFI_GATE_SELECT(i)) { ++ ni_m_series_gate_select = ++ selected_gate & selected_gate_mask; ++ break; ++ } ++ } ++ if (i <= ni_m_series_max_pfi_channel) ++ break; ++ return -EINVAL; ++ break; ++ } ++ ni_tio_set_bits(counter, ++ NITIO_Gi_Input_Select_Reg(counter->counter_index), ++ Gi_Gate_Select_Mask, ++ Gi_Gate_Select_Bits(ni_m_series_gate_select)); ++ return 0; ++} ++ ++static int ni_660x_set_second_gate(struct ni_gpct *counter, ++ lsampl_t gate_source) ++{ ++ struct ni_gpct_device *counter_dev = counter->counter_dev; ++ const unsigned int second_gate_reg = ++ NITIO_Gi_Second_Gate_Reg(counter->counter_index); ++ const unsigned int selected_second_gate = CR_CHAN(gate_source); ++ /* bits of second_gate that may be meaningful to second gate register */ ++ static const unsigned int selected_second_gate_mask = 0x1f; ++ unsigned int i, ni_660x_second_gate_select; ++ ++ switch (selected_second_gate) { ++ case NI_GPCT_SOURCE_PIN_i_GATE_SELECT: ++ case NI_GPCT_UP_DOWN_PIN_i_GATE_SELECT: ++ case NI_GPCT_SELECTED_GATE_GATE_SELECT: ++ case NI_GPCT_NEXT_OUT_GATE_SELECT: ++ case NI_GPCT_LOGIC_LOW_GATE_SELECT: ++ ni_660x_second_gate_select = ++ selected_second_gate & selected_second_gate_mask; ++ break; ++ case NI_GPCT_NEXT_SOURCE_GATE_SELECT: ++ ni_660x_second_gate_select = ++ NI_660x_Next_SRC_Second_Gate_Select; ++ break; ++ default: ++ for (i = 0; i <= ni_660x_max_rtsi_channel; ++i) { ++ if (selected_second_gate == NI_GPCT_RTSI_GATE_SELECT(i)) { ++ ni_660x_second_gate_select = ++ selected_second_gate & ++ selected_second_gate_mask; ++ break; ++ } ++ } ++ if (i <= ni_660x_max_rtsi_channel) ++ break; ++ for (i = 0; i <= ni_660x_max_up_down_pin; ++i) { ++ if (selected_second_gate == ++ NI_GPCT_UP_DOWN_PIN_GATE_SELECT(i)) { ++ ni_660x_second_gate_select = ++ selected_second_gate & ++ selected_second_gate_mask; ++ break; ++ } ++ } ++ if (i <= ni_660x_max_up_down_pin) ++ break; ++ return -EINVAL; ++ break; ++ }; ++ counter_dev->regs[second_gate_reg] |= Gi_Second_Gate_Mode_Bit; ++ counter_dev->regs[second_gate_reg] &= ~Gi_Second_Gate_Select_Mask; ++ counter_dev->regs[second_gate_reg] |= ++ Gi_Second_Gate_Select_Bits(ni_660x_second_gate_select); ++ write_register(counter, counter_dev->regs[second_gate_reg], ++ second_gate_reg); ++ return 0; ++} ++ ++static int ni_m_series_set_second_gate(struct ni_gpct *counter, ++ lsampl_t gate_source) ++{ ++ struct ni_gpct_device *counter_dev = counter->counter_dev; ++ const unsigned int second_gate_reg = ++ NITIO_Gi_Second_Gate_Reg(counter->counter_index); ++ const unsigned int selected_second_gate = CR_CHAN(gate_source); ++ /* Bits of second_gate that may be meaningful to second gate register */ ++ static const unsigned int selected_second_gate_mask = 0x1f; ++ unsigned int ni_m_series_second_gate_select; ++ ++ /* FIXME: We don't know what the m-series second gate codes ++ are, so we'll just pass the bits through for now. */ ++ switch (selected_second_gate) { ++ default: ++ ni_m_series_second_gate_select = ++ selected_second_gate & selected_second_gate_mask; ++ break; ++ }; ++ counter_dev->regs[second_gate_reg] |= Gi_Second_Gate_Mode_Bit; ++ counter_dev->regs[second_gate_reg] &= ~Gi_Second_Gate_Select_Mask; ++ counter_dev->regs[second_gate_reg] |= ++ Gi_Second_Gate_Select_Bits(ni_m_series_second_gate_select); ++ write_register(counter, counter_dev->regs[second_gate_reg], ++ second_gate_reg); ++ return 0; ++} ++ ++static int ni_tio_set_gate_src(struct ni_gpct *counter, ++ unsigned int gate_index, lsampl_t gate_source) ++{ ++ struct ni_gpct_device *counter_dev = counter->counter_dev; ++ const unsigned int second_gate_reg = ++ NITIO_Gi_Second_Gate_Reg(counter->counter_index); ++ ++ switch (gate_index) { ++ case 0: ++ if (CR_CHAN(gate_source) == NI_GPCT_DISABLED_GATE_SELECT) { ++ ni_tio_set_bits(counter, ++ NITIO_Gi_Mode_Reg(counter->counter_index), ++ Gi_Gating_Mode_Mask, Gi_Gating_Disabled_Bits); ++ return 0; ++ } ++ ni_tio_set_first_gate_modifiers(counter, gate_source); ++ switch (counter_dev->variant) { ++ case ni_gpct_variant_e_series: ++ case ni_gpct_variant_m_series: ++ return ni_m_series_set_first_gate(counter, gate_source); ++ break; ++ case ni_gpct_variant_660x: ++ return ni_660x_set_first_gate(counter, gate_source); ++ break; ++ default: ++ BUG(); ++ break; ++ } ++ break; ++ case 1: ++ if (ni_tio_second_gate_registers_present(counter_dev) == 0) ++ return -EINVAL; ++ if (CR_CHAN(gate_source) == NI_GPCT_DISABLED_GATE_SELECT) { ++ counter_dev->regs[second_gate_reg] &= ++ ~Gi_Second_Gate_Mode_Bit; ++ write_register(counter, ++ counter_dev->regs[second_gate_reg], ++ second_gate_reg); ++ return 0; ++ } ++ if (gate_source & CR_INVERT) { ++ counter_dev->regs[second_gate_reg] |= ++ Gi_Second_Gate_Polarity_Bit; ++ } else { ++ counter_dev->regs[second_gate_reg] &= ++ ~Gi_Second_Gate_Polarity_Bit; ++ } ++ switch (counter_dev->variant) { ++ case ni_gpct_variant_m_series: ++ return ni_m_series_set_second_gate(counter, ++ gate_source); ++ break; ++ case ni_gpct_variant_660x: ++ return ni_660x_set_second_gate(counter, gate_source); ++ break; ++ default: ++ BUG(); ++ break; ++ } ++ break; ++ default: ++ return -EINVAL; ++ break; ++ } ++ return 0; ++} ++ ++static int ni_tio_set_other_src(struct ni_gpct *counter, ++ unsigned int index, unsigned int source) ++{ ++ struct ni_gpct_device *counter_dev = counter->counter_dev; ++ ++ if (counter_dev->variant == ni_gpct_variant_m_series) { ++ unsigned int abz_reg, shift, mask; ++ ++ abz_reg = NITIO_Gi_ABZ_Reg(counter->counter_index); ++ switch (index) { ++ case NI_GPCT_SOURCE_ENCODER_A: ++ shift = 10; ++ break; ++ case NI_GPCT_SOURCE_ENCODER_B: ++ shift = 5; ++ break; ++ case NI_GPCT_SOURCE_ENCODER_Z: ++ shift = 0; ++ break; ++ default: ++ return -EINVAL; ++ break; ++ } ++ mask = 0x1f << shift; ++ if (source > 0x1f) { ++ /* Disable gate */ ++ source = 0x1f; ++ } ++ counter_dev->regs[abz_reg] &= ~mask; ++ counter_dev->regs[abz_reg] |= (source << shift) & mask; ++ write_register(counter, counter_dev->regs[abz_reg], abz_reg); ++ return 0; ++ } ++ return -EINVAL; ++} ++ ++static unsigned int ni_660x_first_gate_to_generic_gate_source(unsigned int ni_660x_gate_select) ++{ ++ unsigned int i; ++ ++ switch (ni_660x_gate_select) { ++ case NI_660x_Source_Pin_i_Gate_Select: ++ return NI_GPCT_SOURCE_PIN_i_GATE_SELECT; ++ break; ++ case NI_660x_Gate_Pin_i_Gate_Select: ++ return NI_GPCT_GATE_PIN_i_GATE_SELECT; ++ break; ++ case NI_660x_Next_SRC_Gate_Select: ++ return NI_GPCT_NEXT_SOURCE_GATE_SELECT; ++ break; ++ case NI_660x_Next_Out_Gate_Select: ++ return NI_GPCT_NEXT_OUT_GATE_SELECT; ++ break; ++ case NI_660x_Logic_Low_Gate_Select: ++ return NI_GPCT_LOGIC_LOW_GATE_SELECT; ++ break; ++ default: ++ for (i = 0; i <= ni_660x_max_rtsi_channel; ++i) { ++ if (ni_660x_gate_select == NI_660x_RTSI_Gate_Select(i)) { ++ return NI_GPCT_RTSI_GATE_SELECT(i); ++ break; ++ } ++ } ++ if (i <= ni_660x_max_rtsi_channel) ++ break; ++ for (i = 0; i <= ni_660x_max_gate_pin; ++i) { ++ if (ni_660x_gate_select == ++ NI_660x_Gate_Pin_Gate_Select(i)) { ++ return NI_GPCT_GATE_PIN_GATE_SELECT(i); ++ break; ++ } ++ } ++ if (i <= ni_660x_max_gate_pin) ++ break; ++ BUG(); ++ break; ++ } ++ return 0; ++} ++ ++static unsigned int ni_m_series_first_gate_to_generic_gate_source(unsigned int ++ ni_m_series_gate_select) ++{ ++ unsigned int i; ++ ++ switch (ni_m_series_gate_select) { ++ case NI_M_Series_Timestamp_Mux_Gate_Select: ++ return NI_GPCT_TIMESTAMP_MUX_GATE_SELECT; ++ break; ++ case NI_M_Series_AI_START2_Gate_Select: ++ return NI_GPCT_AI_START2_GATE_SELECT; ++ break; ++ case NI_M_Series_PXI_Star_Trigger_Gate_Select: ++ return NI_GPCT_PXI_STAR_TRIGGER_GATE_SELECT; ++ break; ++ case NI_M_Series_Next_Out_Gate_Select: ++ return NI_GPCT_NEXT_OUT_GATE_SELECT; ++ break; ++ case NI_M_Series_AI_START1_Gate_Select: ++ return NI_GPCT_AI_START1_GATE_SELECT; ++ break; ++ case NI_M_Series_Next_SRC_Gate_Select: ++ return NI_GPCT_NEXT_SOURCE_GATE_SELECT; ++ break; ++ case NI_M_Series_Analog_Trigger_Out_Gate_Select: ++ return NI_GPCT_ANALOG_TRIGGER_OUT_GATE_SELECT; ++ break; ++ case NI_M_Series_Logic_Low_Gate_Select: ++ return NI_GPCT_LOGIC_LOW_GATE_SELECT; ++ break; ++ default: ++ for (i = 0; i <= ni_m_series_max_rtsi_channel; ++i) { ++ if (ni_m_series_gate_select == ++ NI_M_Series_RTSI_Gate_Select(i)) { ++ return NI_GPCT_RTSI_GATE_SELECT(i); ++ break; ++ } ++ } ++ if (i <= ni_m_series_max_rtsi_channel) ++ break; ++ for (i = 0; i <= ni_m_series_max_pfi_channel; ++i) { ++ if (ni_m_series_gate_select == ++ NI_M_Series_PFI_Gate_Select(i)) { ++ return NI_GPCT_PFI_GATE_SELECT(i); ++ break; ++ } ++ } ++ if (i <= ni_m_series_max_pfi_channel) ++ break; ++ BUG(); ++ break; ++ } ++ return 0; ++} ++ ++static unsigned int ni_660x_second_gate_to_generic_gate_source(unsigned int ++ ni_660x_gate_select) ++{ ++ unsigned int i; ++ ++ switch (ni_660x_gate_select) { ++ case NI_660x_Source_Pin_i_Second_Gate_Select: ++ return NI_GPCT_SOURCE_PIN_i_GATE_SELECT; ++ break; ++ case NI_660x_Up_Down_Pin_i_Second_Gate_Select: ++ return NI_GPCT_UP_DOWN_PIN_i_GATE_SELECT; ++ break; ++ case NI_660x_Next_SRC_Second_Gate_Select: ++ return NI_GPCT_NEXT_SOURCE_GATE_SELECT; ++ break; ++ case NI_660x_Next_Out_Second_Gate_Select: ++ return NI_GPCT_NEXT_OUT_GATE_SELECT; ++ break; ++ case NI_660x_Selected_Gate_Second_Gate_Select: ++ return NI_GPCT_SELECTED_GATE_GATE_SELECT; ++ break; ++ case NI_660x_Logic_Low_Second_Gate_Select: ++ return NI_GPCT_LOGIC_LOW_GATE_SELECT; ++ break; ++ default: ++ for (i = 0; i <= ni_660x_max_rtsi_channel; ++i) { ++ if (ni_660x_gate_select == ++ NI_660x_RTSI_Second_Gate_Select(i)) { ++ return NI_GPCT_RTSI_GATE_SELECT(i); ++ break; ++ } ++ } ++ if (i <= ni_660x_max_rtsi_channel) ++ break; ++ for (i = 0; i <= ni_660x_max_up_down_pin; ++i) { ++ if (ni_660x_gate_select == ++ NI_660x_Up_Down_Pin_Second_Gate_Select(i)) { ++ return NI_GPCT_UP_DOWN_PIN_GATE_SELECT(i); ++ break; ++ } ++ } ++ if (i <= ni_660x_max_up_down_pin) ++ break; ++ BUG(); ++ break; ++ } ++ return 0; ++} ++ ++static unsigned int ni_m_series_second_gate_to_generic_gate_source(unsigned int ++ ni_m_series_gate_select) ++{ ++ /* FIXME: the second gate sources for the m series are ++ undocumented, so we just return the raw bits for now. */ ++ switch (ni_m_series_gate_select) { ++ default: ++ return ni_m_series_gate_select; ++ break; ++ } ++ return 0; ++}; ++ ++static int ni_tio_get_gate_src(struct ni_gpct *counter, ++ unsigned int gate_index, ++ unsigned int * gate_source) ++{ ++ struct ni_gpct_device *counter_dev = counter->counter_dev; ++ const unsigned int mode_bits = ni_tio_get_soft_copy(counter, ++ NITIO_Gi_Mode_Reg(counter->counter_index)); ++ const unsigned int second_gate_reg = ++ NITIO_Gi_Second_Gate_Reg(counter->counter_index); ++ unsigned int gate_select_bits; ++ ++ switch (gate_index) { ++ case 0: ++ if ((mode_bits & Gi_Gating_Mode_Mask) == ++ Gi_Gating_Disabled_Bits) { ++ *gate_source = NI_GPCT_DISABLED_GATE_SELECT; ++ return 0; ++ } else { ++ gate_select_bits = ++ (ni_tio_get_soft_copy(counter, ++ NITIO_Gi_Input_Select_Reg(counter-> ++ counter_index)) & ++ Gi_Gate_Select_Mask) >> Gi_Gate_Select_Shift; ++ } ++ switch (counter_dev->variant) { ++ case ni_gpct_variant_e_series: ++ case ni_gpct_variant_m_series: ++ *gate_source = ++ ni_m_series_first_gate_to_generic_gate_source ++ (gate_select_bits); ++ break; ++ case ni_gpct_variant_660x: ++ *gate_source = ++ ni_660x_first_gate_to_generic_gate_source ++ (gate_select_bits); ++ break; ++ default: ++ BUG(); ++ break; ++ } ++ if (mode_bits & Gi_Gate_Polarity_Bit) { ++ *gate_source |= CR_INVERT; ++ } ++ if ((mode_bits & Gi_Gating_Mode_Mask) != Gi_Level_Gating_Bits) { ++ *gate_source |= CR_EDGE; ++ } ++ break; ++ case 1: ++ if ((mode_bits & Gi_Gating_Mode_Mask) == Gi_Gating_Disabled_Bits ++ || (counter_dev-> ++ regs[second_gate_reg] & Gi_Second_Gate_Mode_Bit) ++ == 0) { ++ *gate_source = NI_GPCT_DISABLED_GATE_SELECT; ++ return 0; ++ } else { ++ gate_select_bits = ++ (counter_dev-> ++ regs[second_gate_reg] & ++ Gi_Second_Gate_Select_Mask) >> ++ Gi_Second_Gate_Select_Shift; ++ } ++ switch (counter_dev->variant) { ++ case ni_gpct_variant_e_series: ++ case ni_gpct_variant_m_series: ++ *gate_source = ++ ni_m_series_second_gate_to_generic_gate_source ++ (gate_select_bits); ++ break; ++ case ni_gpct_variant_660x: ++ *gate_source = ++ ni_660x_second_gate_to_generic_gate_source ++ (gate_select_bits); ++ break; ++ default: ++ BUG(); ++ break; ++ } ++ if (counter_dev-> ++ regs[second_gate_reg] & Gi_Second_Gate_Polarity_Bit) { ++ *gate_source |= CR_INVERT; ++ } ++ /* Second gate can't have edge/level mode set independently */ ++ if ((mode_bits & Gi_Gating_Mode_Mask) != Gi_Level_Gating_Bits) { ++ *gate_source |= CR_EDGE; ++ } ++ break; ++ default: ++ return -EINVAL; ++ break; ++ } ++ return 0; ++} ++ ++int a4l_ni_tio_insn_config(struct ni_gpct *counter, struct a4l_kernel_instruction *insn) ++{ ++ unsigned int *data = (unsigned int *)insn->data; ++ ++ switch (data[0]) { ++ case A4L_INSN_CONFIG_SET_COUNTER_MODE: ++ return ni_tio_set_counter_mode(counter, data[1]); ++ break; ++ case A4L_INSN_CONFIG_ARM: ++ return ni_tio_arm(counter, 1, data[1]); ++ break; ++ case A4L_INSN_CONFIG_DISARM: ++ ni_tio_arm(counter, 0, 0); ++ return 0; ++ break; ++ case A4L_INSN_CONFIG_GET_COUNTER_STATUS: ++ data[1] = ni_tio_counter_status(counter); ++ data[2] = counter_status_mask; ++ return 0; ++ break; ++ case A4L_INSN_CONFIG_SET_CLOCK_SRC: ++ return ni_tio_set_clock_src(counter, data[1], data[2]); ++ break; ++ case A4L_INSN_CONFIG_GET_CLOCK_SRC: ++ ni_tio_get_clock_src(counter, &data[1], &data[2]); ++ return 0; ++ break; ++ case A4L_INSN_CONFIG_SET_GATE_SRC: ++ return ni_tio_set_gate_src(counter, data[1], data[2]); ++ break; ++ case A4L_INSN_CONFIG_GET_GATE_SRC: ++ return ni_tio_get_gate_src(counter, data[1], &data[2]); ++ break; ++ case A4L_INSN_CONFIG_SET_OTHER_SRC: ++ return ni_tio_set_other_src(counter, data[1], data[2]); ++ break; ++ case A4L_INSN_CONFIG_RESET: ++ ni_tio_reset_count_and_disarm(counter); ++ return 0; ++ break; ++ default: ++ break; ++ } ++ return -EINVAL; ++} ++ ++int a4l_ni_tio_rinsn(struct ni_gpct *counter, struct a4l_kernel_instruction *insn) ++{ ++ struct ni_gpct_device *counter_dev = counter->counter_dev; ++ const unsigned int channel = CR_CHAN(insn->chan_desc); ++ unsigned int first_read; ++ unsigned int second_read; ++ unsigned int correct_read; ++ ++ uint32_t *data = (uint32_t *)insn->data; ++ ++ if (insn->data_size != sizeof(uint32_t)) ++ return -EINVAL; ++ ++ switch (channel) { ++ case 0: ++ ni_tio_set_bits(counter, ++ NITIO_Gi_Command_Reg(counter->counter_index), ++ Gi_Save_Trace_Bit, 0); ++ ni_tio_set_bits(counter, ++ NITIO_Gi_Command_Reg(counter->counter_index), ++ Gi_Save_Trace_Bit, Gi_Save_Trace_Bit); ++ /* The count doesn't get latched until the next clock ++ edge, so it is possible the count may change (once) ++ while we are reading. Since the read of the ++ SW_Save_Reg isn't atomic (apparently even when it's a ++ 32 bit register according to 660x docs), we need to ++ read twice and make sure the reading hasn't changed. ++ If it has, a third read will be correct since the ++ count value will definitely have latched by then. */ ++ first_read = ++ read_register(counter, ++ NITIO_Gi_SW_Save_Reg(counter->counter_index)); ++ second_read = ++ read_register(counter, ++ NITIO_Gi_SW_Save_Reg(counter->counter_index)); ++ if (first_read != second_read) ++ correct_read = ++ read_register(counter, ++ NITIO_Gi_SW_Save_Reg(counter->counter_index)); ++ else ++ correct_read = first_read; ++ data[0] = correct_read; ++ return 0; ++ break; ++ case 1: ++ data[0] = counter_dev->regs ++ [NITIO_Gi_LoadA_Reg(counter->counter_index)]; ++ break; ++ case 2: ++ data[0] = counter_dev->regs ++ [NITIO_Gi_LoadB_Reg(counter->counter_index)]; ++ break; ++ }; ++ ++ return 0; ++} ++ ++static unsigned int ni_tio_next_load_register(struct ni_gpct *counter) ++{ ++ const unsigned int bits = read_register(counter, ++ NITIO_Gxx_Status_Reg(counter->counter_index)); ++ ++ if (bits & Gi_Next_Load_Source_Bit(counter->counter_index)) { ++ return NITIO_Gi_LoadB_Reg(counter->counter_index); ++ } else { ++ return NITIO_Gi_LoadA_Reg(counter->counter_index); ++ } ++} ++ ++int a4l_ni_tio_winsn(struct ni_gpct *counter, struct a4l_kernel_instruction *insn) ++{ ++ struct ni_gpct_device *counter_dev = counter->counter_dev; ++ const unsigned int channel = CR_CHAN(insn->chan_desc); ++ unsigned int load_reg; ++ ++ uint32_t *data = (uint32_t *)insn->data; ++ ++ if (insn->data_size != sizeof(uint32_t)) ++ return -EINVAL; ++ ++ switch (channel) { ++ case 0: ++ /* Unsafe if counter is armed. Should probably check ++ status and return -EBUSY if armed. */ ++ /* Don't disturb load source select, just use ++ whichever load register is already selected. */ ++ load_reg = ni_tio_next_load_register(counter); ++ write_register(counter, data[0], load_reg); ++ ni_tio_set_bits_transient(counter, ++ NITIO_Gi_Command_Reg(counter->counter_index), 0, 0, ++ Gi_Load_Bit); ++ /* Restore state of load reg to whatever the user set ++ last set it to */ ++ write_register(counter, counter_dev->regs[load_reg], load_reg); ++ break; ++ case 1: ++ counter_dev->regs[NITIO_Gi_LoadA_Reg(counter->counter_index)] = ++ data[0]; ++ write_register(counter, data[0], ++ NITIO_Gi_LoadA_Reg(counter->counter_index)); ++ break; ++ case 2: ++ counter_dev->regs[NITIO_Gi_LoadB_Reg(counter->counter_index)] = ++ data[0]; ++ write_register(counter, data[0], ++ NITIO_Gi_LoadB_Reg(counter->counter_index)); ++ break; ++ default: ++ return -EINVAL; ++ break; ++ } ++ ++ return 0; ++} ++ ++#if (defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE) || \ ++ defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE_MODULE)) ++ ++static void ni_tio_configure_dma(struct ni_gpct *counter, ++ short enable, short read_not_write) ++{ ++ struct ni_gpct_device *counter_dev = counter->counter_dev; ++ unsigned int input_select_bits = 0; ++ ++ if (enable) { ++ if (read_not_write) { ++ input_select_bits |= Gi_Read_Acknowledges_Irq; ++ } else { ++ input_select_bits |= Gi_Write_Acknowledges_Irq; ++ } ++ } ++ ni_tio_set_bits(counter, ++ NITIO_Gi_Input_Select_Reg(counter->counter_index), ++ Gi_Read_Acknowledges_Irq | Gi_Write_Acknowledges_Irq, ++ input_select_bits); ++ switch (counter_dev->variant) { ++ case ni_gpct_variant_e_series: ++ break; ++ case ni_gpct_variant_m_series: ++ case ni_gpct_variant_660x: ++ { ++ unsigned gi_dma_config_bits = 0; ++ ++ if (enable) { ++ gi_dma_config_bits |= Gi_DMA_Enable_Bit; ++ gi_dma_config_bits |= Gi_DMA_Int_Bit; ++ } ++ if (read_not_write == 0) { ++ gi_dma_config_bits |= Gi_DMA_Write_Bit; ++ } ++ ni_tio_set_bits(counter, ++ NITIO_Gi_DMA_Config_Reg(counter->counter_index), ++ Gi_DMA_Enable_Bit | Gi_DMA_Int_Bit | ++ Gi_DMA_Write_Bit, gi_dma_config_bits); ++ } ++ break; ++ } ++} ++ ++/* TODO: a4l_ni_tio_input_inttrig is left unused because the trigger ++ callback cannot be changed at run time */ ++int a4l_ni_tio_input_inttrig(struct ni_gpct *counter, lsampl_t trignum) ++{ ++ unsigned long flags; ++ int retval = 0; ++ ++ BUG_ON(counter == NULL); ++ if (trignum != 0) ++ return -EINVAL; ++ ++ rtdm_lock_get_irqsave(&counter->lock, flags); ++ if (counter->mite_chan) ++ a4l_mite_dma_arm(counter->mite_chan); ++ else ++ retval = -EIO; ++ rtdm_lock_put_irqrestore(&counter->lock, flags); ++ if (retval < 0) ++ return retval; ++ retval = ni_tio_arm(counter, 1, NI_GPCT_ARM_IMMEDIATE); ++ ++ /* TODO: disable trigger until a command is recorded. ++ Null trig at beginning prevent ao start trigger from executing ++ more than once per command (and doing things like trying to ++ allocate the ao dma channel multiple times) */ ++ ++ return retval; ++} ++ ++static int ni_tio_input_cmd(struct ni_gpct *counter, struct a4l_cmd_desc *cmd) ++{ ++ struct ni_gpct_device *counter_dev = counter->counter_dev; ++ int retval = 0; ++ ++ counter->mite_chan->dir = A4L_INPUT; ++ switch (counter_dev->variant) { ++ case ni_gpct_variant_m_series: ++ case ni_gpct_variant_660x: ++ a4l_mite_prep_dma(counter->mite_chan, 32, 32); ++ break; ++ case ni_gpct_variant_e_series: ++ a4l_mite_prep_dma(counter->mite_chan, 16, 32); ++ break; ++ default: ++ BUG(); ++ break; ++ } ++ ni_tio_set_bits(counter, NITIO_Gi_Command_Reg(counter->counter_index), ++ Gi_Save_Trace_Bit, 0); ++ ni_tio_configure_dma(counter, 1, 1); ++ switch (cmd->start_src) { ++ case TRIG_NOW: ++ a4l_mite_dma_arm(counter->mite_chan); ++ retval = ni_tio_arm(counter, 1, NI_GPCT_ARM_IMMEDIATE); ++ break; ++ case TRIG_INT: ++ break; ++ case TRIG_EXT: ++ a4l_mite_dma_arm(counter->mite_chan); ++ retval = ni_tio_arm(counter, 1, cmd->start_arg); ++ case TRIG_OTHER: ++ a4l_mite_dma_arm(counter->mite_chan); ++ break; ++ default: ++ BUG(); ++ break; ++ } ++ return retval; ++} ++ ++static int ni_tio_output_cmd(struct ni_gpct *counter, struct a4l_cmd_desc *cmd) ++{ ++ __a4l_err("ni_tio: output commands not yet implemented.\n"); ++ return -ENOTSUPP; ++} ++ ++static int ni_tio_cmd_setup(struct ni_gpct *counter, struct a4l_cmd_desc *cmd) ++{ ++ int retval = 0, set_gate_source = 0; ++ unsigned int gate_source; ++ ++ if (cmd->scan_begin_src == TRIG_EXT) { ++ set_gate_source = 1; ++ gate_source = cmd->scan_begin_arg; ++ } else if (cmd->convert_src == TRIG_EXT) { ++ set_gate_source = 1; ++ gate_source = cmd->convert_arg; ++ } ++ if (set_gate_source) { ++ retval = ni_tio_set_gate_src(counter, 0, gate_source); ++ } ++ if (cmd->flags & TRIG_WAKE_EOS) { ++ ni_tio_set_bits(counter, ++ NITIO_Gi_Interrupt_Enable_Reg(counter->counter_index), ++ Gi_Gate_Interrupt_Enable_Bit(counter->counter_index), ++ Gi_Gate_Interrupt_Enable_Bit(counter->counter_index)); ++ } ++ return retval; ++} ++ ++int a4l_ni_tio_cmd(struct ni_gpct *counter, struct a4l_cmd_desc *cmd) ++{ ++ int retval = 0; ++ unsigned long flags; ++ ++ rtdm_lock_get_irqsave(&counter->lock, flags); ++ if (counter->mite_chan == NULL) { ++ __a4l_err("a4l_ni_tio_cmd: commands only supported with DMA." ++ " Interrupt-driven commands not yet implemented.\n"); ++ retval = -EIO; ++ } else { ++ retval = ni_tio_cmd_setup(counter, cmd); ++ if (retval == 0) { ++ if (cmd->flags & A4L_CMD_WRITE) { ++ retval = ni_tio_output_cmd(counter, cmd); ++ } else { ++ retval = ni_tio_input_cmd(counter, cmd); ++ } ++ } ++ } ++ rtdm_lock_put_irqrestore(&counter->lock, flags); ++ return retval; ++} ++ ++struct a4l_cmd_desc a4l_ni_tio_cmd_mask = { ++ .idx_subd = 0, ++ .start_src = TRIG_NOW | TRIG_INT | TRIG_OTHER | TRIG_EXT, ++ .scan_begin_src = TRIG_FOLLOW | TRIG_EXT | TRIG_OTHER, ++ .convert_src = TRIG_NOW | TRIG_EXT | TRIG_OTHER, ++ .scan_end_src = TRIG_COUNT, ++ .stop_src = TRIG_NONE, ++}; ++ ++int a4l_ni_tio_cmdtest(struct ni_gpct *counter, struct a4l_cmd_desc *cmd) ++{ ++ /* Make sure trigger sources are trivially valid */ ++ ++ if ((cmd->start_src & TRIG_EXT) != 0 && ++ ni_tio_counting_mode_registers_present(counter->counter_dev) == 0) ++ return -EINVAL; ++ ++ /* Make sure trigger sources are mutually compatible */ ++ ++ if (cmd->convert_src != TRIG_NOW && cmd->scan_begin_src != TRIG_FOLLOW) ++ return -EINVAL; ++ ++ /* Make sure arguments are trivially compatible */ ++ ++ if (cmd->start_src != TRIG_EXT) { ++ if (cmd->start_arg != 0) { ++ return -EINVAL; ++ } ++ } ++ ++ if (cmd->scan_begin_src != TRIG_EXT) { ++ if (cmd->scan_begin_arg) { ++ return -EINVAL; ++ } ++ } ++ ++ if (cmd->convert_src != TRIG_EXT) { ++ if (cmd->convert_arg) { ++ return -EINVAL; ++ } ++ } ++ ++ if (cmd->scan_end_arg != cmd->nb_chan) { ++ return -EINVAL; ++ } ++ ++ if (cmd->stop_src == TRIG_NONE) { ++ if (cmd->stop_arg != 0) { ++ return -EINVAL; ++ } ++ } ++ ++ return 0; ++} ++ ++int a4l_ni_tio_cancel(struct ni_gpct *counter) ++{ ++ unsigned long flags; ++ ++ ni_tio_arm(counter, 0, 0); ++ rtdm_lock_get_irqsave(&counter->lock, flags); ++ if (counter->mite_chan) { ++ a4l_mite_dma_disarm(counter->mite_chan); ++ } ++ rtdm_lock_put_irqrestore(&counter->lock, flags); ++ ni_tio_configure_dma(counter, 0, 0); ++ ++ ni_tio_set_bits(counter, ++ NITIO_Gi_Interrupt_Enable_Reg(counter->counter_index), ++ Gi_Gate_Interrupt_Enable_Bit(counter->counter_index), 0x0); ++ return 0; ++} ++ ++/* During buffered input counter operation for e-series, the gate ++ interrupt is acked automatically by the dma controller, due to the ++ Gi_Read/Write_Acknowledges_IRQ bits in the input select ++ register. */ ++static int should_ack_gate(struct ni_gpct *counter) ++{ ++ unsigned long flags; ++ int retval = 0; ++ ++ switch (counter->counter_dev->variant) { ++ case ni_gpct_variant_m_series: ++ case ni_gpct_variant_660x: ++ /* Not sure if 660x really supports gate interrupts ++ (the bits are not listed in register-level manual) */ ++ return 1; ++ break; ++ case ni_gpct_variant_e_series: ++ rtdm_lock_get_irqsave(&counter->lock, flags); ++ { ++ if (counter->mite_chan == NULL || ++ counter->mite_chan->dir != A4L_INPUT || ++ (a4l_mite_done(counter->mite_chan))) { ++ retval = 1; ++ } ++ } ++ rtdm_lock_put_irqrestore(&counter->lock, flags); ++ break; ++ } ++ return retval; ++} ++ ++void a4l_ni_tio_acknowledge_and_confirm(struct ni_gpct *counter, ++ int *gate_error, ++ int *tc_error, ++ int *perm_stale_data, int *stale_data) ++{ ++ const unsigned short gxx_status = read_register(counter, ++ NITIO_Gxx_Status_Reg(counter->counter_index)); ++ const unsigned short gi_status = read_register(counter, ++ NITIO_Gi_Status_Reg(counter->counter_index)); ++ unsigned ack = 0; ++ ++ if (gate_error) ++ *gate_error = 0; ++ if (tc_error) ++ *tc_error = 0; ++ if (perm_stale_data) ++ *perm_stale_data = 0; ++ if (stale_data) ++ *stale_data = 0; ++ ++ if (gxx_status & Gi_Gate_Error_Bit(counter->counter_index)) { ++ ack |= Gi_Gate_Error_Confirm_Bit(counter->counter_index); ++ if (gate_error) { ++ /* 660x don't support automatic ++ acknowledgement of gate interrupt via dma ++ read/write and report bogus gate errors */ ++ if (counter->counter_dev->variant != ++ ni_gpct_variant_660x) { ++ *gate_error = 1; ++ } ++ } ++ } ++ if (gxx_status & Gi_TC_Error_Bit(counter->counter_index)) { ++ ack |= Gi_TC_Error_Confirm_Bit(counter->counter_index); ++ if (tc_error) ++ *tc_error = 1; ++ } ++ if (gi_status & Gi_TC_Bit) { ++ ack |= Gi_TC_Interrupt_Ack_Bit; ++ } ++ if (gi_status & Gi_Gate_Interrupt_Bit) { ++ if (should_ack_gate(counter)) ++ ack |= Gi_Gate_Interrupt_Ack_Bit; ++ } ++ if (ack) ++ write_register(counter, ack, ++ NITIO_Gi_Interrupt_Acknowledge_Reg(counter-> ++ counter_index)); ++ if (ni_tio_get_soft_copy(counter, ++ NITIO_Gi_Mode_Reg(counter-> ++ counter_index)) & Gi_Loading_On_Gate_Bit) { ++ if (gxx_status & Gi_Stale_Data_Bit(counter->counter_index)) { ++ if (stale_data) ++ *stale_data = 1; ++ } ++ if (read_register(counter, ++ NITIO_Gxx_Joint_Status2_Reg(counter-> ++ counter_index)) & ++ Gi_Permanent_Stale_Bit(counter->counter_index)) { ++ __a4l_err("%s: Gi_Permanent_Stale_Data detected.\n", ++ __FUNCTION__); ++ if (perm_stale_data) ++ *perm_stale_data = 1; ++ } ++ } ++} ++ ++/* TODO: to be adapted after a4l_buf_evt review */ ++void a4l_ni_tio_handle_interrupt(struct ni_gpct *counter, struct a4l_device *dev) ++{ ++ unsigned gpct_mite_status; ++ unsigned long flags; ++ int gate_error; ++ int tc_error; ++ int perm_stale_data; ++ struct a4l_subdevice *subd = ++ a4l_get_subd(dev, NI_GPCT_SUBDEV(counter->counter_index)); ++ ++ a4l_ni_tio_acknowledge_and_confirm(counter, &gate_error, &tc_error, ++ &perm_stale_data, NULL); ++ if (gate_error) { ++ __a4l_err("%s: Gi_Gate_Error detected.\n", __FUNCTION__); ++ a4l_buf_evt(subd, A4L_BUF_ERROR); ++ } ++ if (perm_stale_data) { ++ a4l_buf_evt(subd, A4L_BUF_ERROR); ++ } ++ switch (counter->counter_dev->variant) { ++ case ni_gpct_variant_m_series: ++ case ni_gpct_variant_660x: ++ if (read_register(counter, ++ NITIO_Gi_DMA_Status_Reg(counter->counter_index)) ++ & Gi_DRQ_Error_Bit) { ++ __a4l_err("%s: Gi_DRQ_Error detected.\n", __FUNCTION__); ++ a4l_buf_evt(subd, A4L_BUF_ERROR); ++ } ++ break; ++ case ni_gpct_variant_e_series: ++ break; ++ } ++ rtdm_lock_get_irqsave(&counter->lock, flags); ++ if (counter->mite_chan == NULL) { ++ rtdm_lock_put_irqrestore(&counter->lock, flags); ++ return; ++ } ++ gpct_mite_status = a4l_mite_get_status(counter->mite_chan); ++ if (gpct_mite_status & CHSR_LINKC) { ++ writel(CHOR_CLRLC, ++ counter->mite_chan->mite->mite_io_addr + ++ MITE_CHOR(counter->mite_chan->channel)); ++ } ++ a4l_mite_sync_input_dma(counter->mite_chan, subd); ++ rtdm_lock_put_irqrestore(&counter->lock, flags); ++} ++ ++void a4l_ni_tio_set_mite_channel(struct ni_gpct *counter, ++ struct mite_channel *mite_chan) ++{ ++ unsigned long flags; ++ ++ rtdm_lock_get_irqsave(&counter->lock, flags); ++ counter->mite_chan = mite_chan; ++ rtdm_lock_put_irqrestore(&counter->lock, flags); ++} ++ ++#endif /* CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE */ ++ ++static int __init ni_tio_init_module(void) ++{ ++ return 0; ++} ++ ++static void __exit ni_tio_cleanup_module(void) ++{ ++} ++ ++MODULE_DESCRIPTION("Analogy support for NI general-purpose counters"); ++MODULE_LICENSE("GPL"); ++ ++module_init(ni_tio_init_module); ++module_exit(ni_tio_cleanup_module); ++ ++EXPORT_SYMBOL_GPL(a4l_ni_tio_rinsn); ++EXPORT_SYMBOL_GPL(a4l_ni_tio_winsn); ++EXPORT_SYMBOL_GPL(a4l_ni_tio_insn_config); ++EXPORT_SYMBOL_GPL(a4l_ni_tio_init_counter); ++EXPORT_SYMBOL_GPL(a4l_ni_gpct_device_construct); ++EXPORT_SYMBOL_GPL(a4l_ni_gpct_device_destroy); ++ ++#if (defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE) || \ ++ defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE_MODULE)) ++ ++EXPORT_SYMBOL_GPL(a4l_ni_tio_input_inttrig); ++EXPORT_SYMBOL_GPL(a4l_ni_tio_cmd); ++EXPORT_SYMBOL_GPL(a4l_ni_tio_cmd_mask); ++EXPORT_SYMBOL_GPL(a4l_ni_tio_cmdtest); ++EXPORT_SYMBOL_GPL(a4l_ni_tio_cancel); ++EXPORT_SYMBOL_GPL(a4l_ni_tio_handle_interrupt); ++EXPORT_SYMBOL_GPL(a4l_ni_tio_set_mite_channel); ++EXPORT_SYMBOL_GPL(a4l_ni_tio_acknowledge_and_confirm); ++ ++#endif /* CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE */ +--- linux/drivers/xenomai/analogy/national_instruments/Kconfig 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/analogy/national_instruments/Kconfig 2021-04-07 16:01:27.820633319 +0800 +@@ -0,0 +1,42 @@ ++ ++config XENO_DRIVERS_ANALOGY_NI_MITE ++ depends on XENO_DRIVERS_ANALOGY && PCI ++ tristate "NI MITE driver" ++ default n ++ ++config XENO_DRIVERS_ANALOGY_NI_TIO ++ depends on XENO_DRIVERS_ANALOGY ++ tristate "NI TIO driver" ++ default n ++ ++config XENO_DRIVERS_ANALOGY_NI_MIO ++ depends on XENO_DRIVERS_ANALOGY && XENO_DRIVERS_ANALOGY_NI_TIO && PCI ++ tristate "NI MIO driver" ++ default n ++ ++config XENO_DRIVERS_ANALOGY_NI_PCIMIO ++ depends on XENO_DRIVERS_ANALOGY && PCI ++ select XENO_DRIVERS_ANALOGY_NI_MITE ++ select XENO_DRIVERS_ANALOGY_NI_TIO ++ select XENO_DRIVERS_ANALOGY_NI_MIO ++ select XENO_DRIVERS_ANALOGY_8255 ++ tristate "NI PCIMIO driver" ++ default n ++ ++config XENO_DRIVERS_ANALOGY_NI_670x ++ depends on EXPERIMENTAL && XENO_DRIVERS_ANALOGY && PCI ++ select XENO_DRIVERS_ANALOGY_NI_MITE ++ select XENO_DRIVERS_ANALOGY_NI_TIO ++ select XENO_DRIVERS_ANALOGY_NI_MIO ++ select XENO_DRIVERS_ANALOGY_8255 ++ tristate "NI 670X driver (EXPERIMENTAL)" ++ default n ++ ++config XENO_DRIVERS_ANALOGY_NI_660x ++ depends on EXPERIMENTAL && XENO_DRIVERS_ANALOGY && PCI ++ select XENO_DRIVERS_ANALOGY_NI_MITE ++ select XENO_DRIVERS_ANALOGY_NI_TIO ++ select XENO_DRIVERS_ANALOGY_NI_MIO ++ select XENO_DRIVERS_ANALOGY_8255 ++ tristate "NI 660X driver (EXPERIMENTAL)" ++ default n +--- linux/drivers/xenomai/analogy/national_instruments/mite.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/analogy/national_instruments/mite.c 2021-04-07 16:01:27.815633326 +0800 +@@ -0,0 +1,839 @@ ++/* ++ * Hardware driver for NI Mite PCI interface chip ++ * ++ * Copyright (C) 1999 David A. Schleef ++ * ++ * This code is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * This code is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ * ++ * The NI Mite driver was originally written by Tomasz Motylewski ++ * <...>, and ported to comedi by ds. ++ * ++ * References for specifications: ++ * ++ * 321747b.pdf Register Level Programmer Manual (obsolete) ++ * 321747c.pdf Register Level Programmer Manual (new) ++ * DAQ-STC reference manual ++ * ++ * Other possibly relevant info: ++ * ++ * 320517c.pdf User manual (obsolete) ++ * 320517f.pdf User manual (new) ++ * 320889a.pdf delete ++ * 320906c.pdf maximum signal ratings ++ * 321066a.pdf about 16x ++ * 321791a.pdf discontinuation of at-mio-16e-10 rev. c ++ * 321808a.pdf about at-mio-16e-10 rev P ++ * 321837a.pdf discontinuation of at-mio-16de-10 rev d ++ * 321838a.pdf about at-mio-16de-10 rev N ++ * ++ * ISSUES: ++ */ ++ ++#include ++#include "mite.h" ++ ++#ifdef CONFIG_DEBUG_MITE ++#define MDPRINTK(fmt, args...) rtdm_printk(fmt, ##args) ++#else /* !CONFIG_DEBUG_MITE */ ++#define MDPRINTK(fmt, args...) ++#endif /* CONFIG_DEBUG_MITE */ ++ ++static LIST_HEAD(mite_devices); ++ ++static struct pci_device_id mite_id[] = { ++ {PCI_DEVICE(PCI_VENDOR_ID_NATINST, PCI_ANY_ID), }, ++ {0, } ++}; ++ ++static int mite_probe(struct pci_dev *dev, const struct pci_device_id *id) ++{ ++ int i, err = 0; ++ struct mite_struct *mite; ++ ++ mite = kmalloc(sizeof(struct mite_struct), GFP_KERNEL); ++ if(mite == NULL) ++ return -ENOMEM; ++ ++ memset(mite, 0, sizeof(struct mite_struct)); ++ ++ rtdm_lock_init(&mite->lock); ++ ++ mite->pcidev = dev; ++ if (pci_enable_device(dev) < 0) { ++ __a4l_err("error enabling mite\n"); ++ err = -EIO; ++ goto out; ++ } ++ ++ for(i = 0; i < MAX_MITE_DMA_CHANNELS; i++) { ++ mite->channels[i].mite = mite; ++ mite->channels[i].channel = i; ++ mite->channels[i].done = 1; ++ } ++ ++ list_add(&mite->list, &mite_devices); ++ ++out: ++ if (err < 0) ++ kfree(mite); ++ ++ return err; ++} ++ ++static void mite_remove(struct pci_dev *dev) ++{ ++ struct list_head *this; ++ ++ list_for_each(this, &mite_devices) { ++ struct mite_struct *mite = ++ list_entry(this, struct mite_struct, list); ++ ++ if(mite->pcidev == dev) { ++ list_del(this); ++ kfree(mite); ++ break; ++ } ++ } ++} ++ ++static struct pci_driver mite_driver = { ++ .name = "analogy_mite", ++ .id_table = mite_id, ++ .probe = mite_probe, ++ .remove = mite_remove, ++}; ++ ++int a4l_mite_setup(struct mite_struct *mite, int use_iodwbsr_1) ++{ ++ unsigned long length; ++ resource_size_t addr; ++ int i; ++ u32 csigr_bits; ++ unsigned unknown_dma_burst_bits; ++ ++ __a4l_dbg(1, drv_dbg, "starting setup...\n"); ++ ++ pci_set_master(mite->pcidev); ++ ++ if (pci_request_regions(mite->pcidev, "mite")) { ++ __a4l_err("failed to request mite io regions\n"); ++ return -EIO; ++ }; ++ ++ /* The PCI BAR0 is the Mite */ ++ addr = pci_resource_start(mite->pcidev, 0); ++ length = pci_resource_len(mite->pcidev, 0); ++ mite->mite_phys_addr = addr; ++ mite->mite_io_addr = ioremap(addr, length); ++ if (!mite->mite_io_addr) { ++ __a4l_err("failed to remap mite io memory address\n"); ++ pci_release_regions(mite->pcidev); ++ return -ENOMEM; ++ } ++ ++ __a4l_dbg(1, drv_dbg, "bar0(mite) 0x%08llx mapped to %p\n", ++ (unsigned long long)mite->mite_phys_addr, ++ mite->mite_io_addr); ++ ++ ++ /* The PCI BAR1 is the DAQ */ ++ addr = pci_resource_start(mite->pcidev, 1); ++ length = pci_resource_len(mite->pcidev, 1); ++ mite->daq_phys_addr = addr; ++ mite->daq_io_addr = ioremap(mite->daq_phys_addr, length); ++ if (!mite->daq_io_addr) { ++ __a4l_err("failed to remap daq io memory address\n"); ++ pci_release_regions(mite->pcidev); ++ return -ENOMEM; ++ } ++ ++ __a4l_dbg(1, drv_dbg, "bar0(daq) 0x%08llx mapped to %p\n", ++ (unsigned long long)mite->daq_phys_addr, ++ mite->daq_io_addr); ++ ++ if (use_iodwbsr_1) { ++ __a4l_dbg(1, drv_dbg, "using I/O Window Base Size register 1\n"); ++ writel(0, mite->mite_io_addr + MITE_IODWBSR); ++ writel(mite-> ++ daq_phys_addr | WENAB | ++ MITE_IODWBSR_1_WSIZE_bits(length), ++ mite->mite_io_addr + MITE_IODWBSR_1); ++ writel(0, mite->mite_io_addr + MITE_IODWCR_1); ++ } else { ++ writel(mite->daq_phys_addr | WENAB, ++ mite->mite_io_addr + MITE_IODWBSR); ++ } ++ ++ /* Make sure dma bursts work. I got this from running a bus analyzer ++ on a pxi-6281 and a pxi-6713. 6713 powered up with register value ++ of 0x61f and bursts worked. 6281 powered up with register value of ++ 0x1f and bursts didn't work. The NI windows driver reads the register, ++ then does a bitwise-or of 0x600 with it and writes it back. ++ */ ++ unknown_dma_burst_bits = ++ readl(mite->mite_io_addr + MITE_UNKNOWN_DMA_BURST_REG); ++ unknown_dma_burst_bits |= UNKNOWN_DMA_BURST_ENABLE_BITS; ++ writel(unknown_dma_burst_bits, ++ mite->mite_io_addr + MITE_UNKNOWN_DMA_BURST_REG); ++ ++ csigr_bits = readl(mite->mite_io_addr + MITE_CSIGR); ++ mite->num_channels = mite_csigr_dmac(csigr_bits); ++ if (mite->num_channels > MAX_MITE_DMA_CHANNELS) { ++ __a4l_err("MITE: bug? chip claims to have %i dma channels. " ++ "Setting to %i.\n", ++ mite->num_channels, MAX_MITE_DMA_CHANNELS); ++ mite->num_channels = MAX_MITE_DMA_CHANNELS; ++ } ++ ++ __a4l_dbg(1, drv_dbg, " version = %i, type = %i, mite mode = %i, " ++ "interface mode = %i\n", ++ mite_csigr_version(csigr_bits), ++ mite_csigr_type(csigr_bits), ++ mite_csigr_mmode(csigr_bits), ++ mite_csigr_imode(csigr_bits)); ++ __a4l_dbg(1, drv_dbg, " num channels = %i, write post fifo depth = %i, " ++ "wins = %i, iowins = %i\n", ++ mite_csigr_dmac(csigr_bits), ++ mite_csigr_wpdep(csigr_bits), ++ mite_csigr_wins(csigr_bits), ++ mite_csigr_iowins(csigr_bits)); ++ ++ for (i = 0; i < mite->num_channels; i++) { ++ /* Registers the channel as a free one */ ++ mite->channel_allocated[i] = 0; ++ /* Reset the channel */ ++ writel(CHOR_DMARESET, mite->mite_io_addr + MITE_CHOR(i)); ++ /* Disable interrupts */ ++ writel(CHCR_CLR_DMA_IE | CHCR_CLR_LINKP_IE | CHCR_CLR_SAR_IE | ++ CHCR_CLR_DONE_IE | CHCR_CLR_MRDY_IE | CHCR_CLR_DRDY_IE | ++ CHCR_CLR_LC_IE | CHCR_CLR_CONT_RB_IE, ++ mite->mite_io_addr + MITE_CHCR(i)); ++ ++ __a4l_dbg(1, drv_dbg, "channel[%d] initialized\n", i); ++ } ++ ++ mite->used = 1; ++ ++ return 0; ++} ++ ++void a4l_mite_unsetup(struct mite_struct *mite) ++{ ++ if (!mite) ++ return; ++ ++ if (mite->mite_io_addr) { ++ iounmap(mite->mite_io_addr); ++ mite->mite_io_addr = NULL; ++ } ++ ++ if (mite->daq_io_addr) { ++ iounmap(mite->daq_io_addr); ++ mite->daq_io_addr = NULL; ++ } ++ ++ if(mite->used) ++ pci_release_regions( mite->pcidev ); ++ ++ mite->used = 0; ++} ++ ++void a4l_mite_list_devices(void) ++{ ++ struct list_head *this; ++ ++ printk("Analogy: MITE: Available NI device IDs:"); ++ list_for_each(this, &mite_devices) { ++ struct mite_struct *mite = ++ list_entry(this, struct mite_struct, list); ++ ++ printk(" 0x%04x", mite_device_id(mite)); ++ if(mite->used) ++ printk("(used)"); ++ } ++ ++ printk("\n"); ++} ++ ++ ++ ++struct mite_struct * a4l_mite_find_device(int bus, ++ int slot, unsigned short device_id) ++{ ++ struct list_head *this; ++ ++ list_for_each(this, &mite_devices) { ++ struct mite_struct *mite = ++ list_entry(this, struct mite_struct, list); ++ ++ if(mite->pcidev->device != device_id) ++ continue; ++ ++ if((bus <= 0 && slot <= 0) || ++ (bus == mite->pcidev->bus->number && ++ slot == PCI_SLOT(mite->pcidev->devfn))) ++ return mite; ++ } ++ ++ return NULL; ++} ++EXPORT_SYMBOL_GPL(a4l_mite_find_device); ++ ++struct mite_channel * ++a4l_mite_request_channel_in_range(struct mite_struct *mite, ++ struct mite_dma_descriptor_ring *ring, ++ unsigned min_channel, unsigned max_channel) ++{ ++ int i; ++ unsigned long flags; ++ struct mite_channel *channel = NULL; ++ ++ __a4l_dbg(1, drv_dbg, " min_channel = %u, max_channel = %u\n", ++ min_channel, max_channel); ++ ++ /* spin lock so a4l_mite_release_channel can be called safely ++ from interrupts */ ++ rtdm_lock_get_irqsave(&mite->lock, flags); ++ for (i = min_channel; i <= max_channel; ++i) { ++ ++ __a4l_dbg(1, drv_dbg, " channel[%d] allocated = %d\n", ++ i, mite->channel_allocated[i]); ++ ++ if (mite->channel_allocated[i] == 0) { ++ mite->channel_allocated[i] = 1; ++ channel = &mite->channels[i]; ++ channel->ring = ring; ++ break; ++ } ++ } ++ rtdm_lock_put_irqrestore(&mite->lock, flags); ++ return channel; ++} ++ ++void a4l_mite_release_channel(struct mite_channel *mite_chan) ++{ ++ struct mite_struct *mite = mite_chan->mite; ++ unsigned long flags; ++ ++ /* Spin lock to prevent races with mite_request_channel */ ++ rtdm_lock_get_irqsave(&mite->lock, flags); ++ if (mite->channel_allocated[mite_chan->channel]) { ++ /* disable all channel's interrupts */ ++ writel(CHCR_CLR_DMA_IE | CHCR_CLR_LINKP_IE | ++ CHCR_CLR_SAR_IE | CHCR_CLR_DONE_IE | ++ CHCR_CLR_MRDY_IE | CHCR_CLR_DRDY_IE | ++ CHCR_CLR_LC_IE | CHCR_CLR_CONT_RB_IE, ++ mite->mite_io_addr + MITE_CHCR(mite_chan->channel)); ++ a4l_mite_dma_disarm(mite_chan); ++ mite_dma_reset(mite_chan); ++ mite->channel_allocated[mite_chan->channel] = 0; ++ mite_chan->ring = NULL; ++ mmiowb(); ++ } ++ rtdm_lock_put_irqrestore(&mite->lock, flags); ++} ++ ++void a4l_mite_dma_arm(struct mite_channel *mite_chan) ++{ ++ struct mite_struct *mite = mite_chan->mite; ++ int chor; ++ unsigned long flags; ++ ++ MDPRINTK("a4l_mite_dma_arm ch%i\n", mite_chan->channel); ++ /* Memory barrier is intended to insure any twiddling with the buffer ++ is done before writing to the mite to arm dma transfer */ ++ smp_mb(); ++ /* arm */ ++ chor = CHOR_START; ++ rtdm_lock_get_irqsave(&mite->lock, flags); ++ mite_chan->done = 0; ++ writel(chor, mite->mite_io_addr + MITE_CHOR(mite_chan->channel)); ++ mmiowb(); ++ rtdm_lock_put_irqrestore(&mite->lock, flags); ++} ++ ++void a4l_mite_dma_disarm(struct mite_channel *mite_chan) ++{ ++ struct mite_struct *mite = mite_chan->mite; ++ unsigned chor; ++ ++ /* disarm */ ++ chor = CHOR_ABORT; ++ writel(chor, mite->mite_io_addr + MITE_CHOR(mite_chan->channel)); ++} ++ ++int a4l_mite_buf_change(struct mite_dma_descriptor_ring *ring, struct a4l_subdevice *subd) ++{ ++ struct a4l_buffer *buf = subd->buf; ++ unsigned int n_links; ++ int i; ++ ++ if (ring->descriptors) { ++ pci_free_consistent(ring->pcidev, ++ ring->n_links * sizeof(struct mite_dma_descriptor), ++ ring->descriptors, ring->descriptors_dma_addr); ++ } ++ ring->descriptors = NULL; ++ ring->descriptors_dma_addr = 0; ++ ring->n_links = 0; ++ ++ if (buf->size == 0) { ++ return 0; ++ } ++ n_links = buf->size >> PAGE_SHIFT; ++ ++ MDPRINTK("ring->pcidev=%p, n_links=0x%04x\n", ring->pcidev, n_links); ++ ++ ring->descriptors = ++ pci_alloc_consistent(ring->pcidev, ++ n_links * sizeof(struct mite_dma_descriptor), ++ &ring->descriptors_dma_addr); ++ if (!ring->descriptors) { ++ printk("MITE: ring buffer allocation failed\n"); ++ return -ENOMEM; ++ } ++ ring->n_links = n_links; ++ ++ for (i = 0; i < n_links; i++) { ++ ring->descriptors[i].count = cpu_to_le32(PAGE_SIZE); ++ ring->descriptors[i].addr = cpu_to_le32(buf->pg_list[i]); ++ ring->descriptors[i].next = ++ cpu_to_le32(ring->descriptors_dma_addr + ++ (i + 1) * sizeof(struct mite_dma_descriptor)); ++ } ++ ++ ring->descriptors[n_links - 1].next = ++ cpu_to_le32(ring->descriptors_dma_addr); ++ ++ /* Barrier is meant to insure that all the writes to the dma descriptors ++ have completed before the dma controller is commanded to read them */ ++ smp_wmb(); ++ ++ return 0; ++} ++ ++void a4l_mite_prep_dma(struct mite_channel *mite_chan, ++ unsigned int num_device_bits, unsigned int num_memory_bits) ++{ ++ unsigned int chor, chcr, mcr, dcr, lkcr; ++ struct mite_struct *mite = mite_chan->mite; ++ ++ MDPRINTK("a4l_mite_prep_dma ch%i\n", mite_chan->channel); ++ ++ /* reset DMA and FIFO */ ++ chor = CHOR_DMARESET | CHOR_FRESET; ++ writel(chor, mite->mite_io_addr + MITE_CHOR(mite_chan->channel)); ++ ++ /* short link chaining mode */ ++ chcr = CHCR_SET_DMA_IE | CHCR_LINKSHORT | CHCR_SET_DONE_IE | ++ CHCR_BURSTEN; ++ /* ++ * Link Complete Interrupt: interrupt every time a link ++ * in MITE_RING is completed. This can generate a lot of ++ * extra interrupts, but right now we update the values ++ * of buf_int_ptr and buf_int_count at each interrupt. A ++ * better method is to poll the MITE before each user ++ * "read()" to calculate the number of bytes available. ++ */ ++ chcr |= CHCR_SET_LC_IE; ++ if (num_memory_bits == 32 && num_device_bits == 16) { ++ /* Doing a combined 32 and 16 bit byteswap gets the 16 ++ bit samples into the fifo in the right order. ++ Tested doing 32 bit memory to 16 bit device ++ transfers to the analog out of a pxi-6281, which ++ has mite version = 1, type = 4. This also works ++ for dma reads from the counters on e-series boards. ++ */ ++ chcr |= CHCR_BYTE_SWAP_DEVICE | CHCR_BYTE_SWAP_MEMORY; ++ } ++ ++ if (mite_chan->dir == A4L_INPUT) { ++ chcr |= CHCR_DEV_TO_MEM; ++ } ++ writel(chcr, mite->mite_io_addr + MITE_CHCR(mite_chan->channel)); ++ ++ /* to/from memory */ ++ mcr = CR_RL(64) | CR_ASEQUP; ++ switch (num_memory_bits) { ++ case 8: ++ mcr |= CR_PSIZE8; ++ break; ++ case 16: ++ mcr |= CR_PSIZE16; ++ break; ++ case 32: ++ mcr |= CR_PSIZE32; ++ break; ++ default: ++ __a4l_err("MITE: bug! " ++ "invalid mem bit width for dma transfer\n"); ++ break; ++ } ++ writel(mcr, mite->mite_io_addr + MITE_MCR(mite_chan->channel)); ++ ++ /* from/to device */ ++ dcr = CR_RL(64) | CR_ASEQUP; ++ dcr |= CR_PORTIO | CR_AMDEVICE | CR_REQSDRQ(mite_chan->channel); ++ switch (num_device_bits) { ++ case 8: ++ dcr |= CR_PSIZE8; ++ break; ++ case 16: ++ dcr |= CR_PSIZE16; ++ break; ++ case 32: ++ dcr |= CR_PSIZE32; ++ break; ++ default: ++ __a4l_info("MITE: bug! " ++ "invalid dev bit width for dma transfer\n"); ++ break; ++ } ++ writel(dcr, mite->mite_io_addr + MITE_DCR(mite_chan->channel)); ++ ++ /* reset the DAR */ ++ writel(0, mite->mite_io_addr + MITE_DAR(mite_chan->channel)); ++ ++ /* the link is 32bits */ ++ lkcr = CR_RL(64) | CR_ASEQUP | CR_PSIZE32; ++ writel(lkcr, mite->mite_io_addr + MITE_LKCR(mite_chan->channel)); ++ ++ /* starting address for link chaining */ ++ writel(mite_chan->ring->descriptors_dma_addr, ++ mite->mite_io_addr + MITE_LKAR(mite_chan->channel)); ++ ++ MDPRINTK("exit a4l_mite_prep_dma\n"); ++} ++ ++u32 mite_device_bytes_transferred(struct mite_channel *mite_chan) ++{ ++ struct mite_struct *mite = mite_chan->mite; ++ return readl(mite->mite_io_addr + MITE_DAR(mite_chan->channel)); ++} ++ ++u32 a4l_mite_bytes_in_transit(struct mite_channel * mite_chan) ++{ ++ struct mite_struct *mite = mite_chan->mite; ++ return readl(mite->mite_io_addr + ++ MITE_FCR(mite_chan->channel)) & 0x000000FF; ++} ++ ++/* Returns lower bound for number of bytes transferred from device to memory */ ++u32 a4l_mite_bytes_written_to_memory_lb(struct mite_channel * mite_chan) ++{ ++ u32 device_byte_count; ++ ++ device_byte_count = mite_device_bytes_transferred(mite_chan); ++ return device_byte_count - a4l_mite_bytes_in_transit(mite_chan); ++} ++ ++/* Returns upper bound for number of bytes transferred from device to memory */ ++u32 a4l_mite_bytes_written_to_memory_ub(struct mite_channel * mite_chan) ++{ ++ u32 in_transit_count; ++ ++ in_transit_count = a4l_mite_bytes_in_transit(mite_chan); ++ return mite_device_bytes_transferred(mite_chan) - in_transit_count; ++} ++ ++/* Returns lower bound for number of bytes read from memory for transfer to device */ ++u32 a4l_mite_bytes_read_from_memory_lb(struct mite_channel * mite_chan) ++{ ++ u32 device_byte_count; ++ ++ device_byte_count = mite_device_bytes_transferred(mite_chan); ++ return device_byte_count + a4l_mite_bytes_in_transit(mite_chan); ++} ++ ++/* Returns upper bound for number of bytes read from memory for transfer to device */ ++u32 a4l_mite_bytes_read_from_memory_ub(struct mite_channel * mite_chan) ++{ ++ u32 in_transit_count; ++ ++ in_transit_count = a4l_mite_bytes_in_transit(mite_chan); ++ return mite_device_bytes_transferred(mite_chan) + in_transit_count; ++} ++ ++int a4l_mite_sync_input_dma(struct mite_channel *mite_chan, struct a4l_subdevice *subd) ++{ ++ unsigned int nbytes_lb, nbytes_ub; ++ ++ nbytes_lb = a4l_mite_bytes_written_to_memory_lb(mite_chan); ++ nbytes_ub = a4l_mite_bytes_written_to_memory_ub(mite_chan); ++ ++ if(a4l_buf_prepare_absput(subd, nbytes_ub) != 0) { ++ __a4l_err("MITE: DMA overwrite of free area\n"); ++ return -EPIPE; ++ } ++ ++ return a4l_buf_commit_absput(subd, nbytes_lb); ++} ++ ++int a4l_mite_sync_output_dma(struct mite_channel *mite_chan, struct a4l_subdevice *subd) ++{ ++ struct a4l_buffer *buf = subd->buf; ++ unsigned int nbytes_ub, nbytes_lb; ++ int err; ++ ++ nbytes_lb = a4l_mite_bytes_read_from_memory_lb(mite_chan); ++ nbytes_ub = a4l_mite_bytes_read_from_memory_ub(mite_chan); ++ ++ err = a4l_buf_prepare_absget(subd, nbytes_ub); ++ if(err < 0) { ++ __a4l_info("MITE: DMA underrun\n"); ++ return -EPIPE; ++ } ++ ++ err = a4l_buf_commit_absget(subd, nbytes_lb); ++ ++ /* If the MITE has already transfered more than required, we ++ can disable it */ ++ if (test_bit(A4L_BUF_EOA_NR, &buf->flags)) ++ writel(CHOR_STOP, ++ mite_chan->mite->mite_io_addr + ++ MITE_CHOR(mite_chan->channel)); ++ ++ return err; ++} ++ ++u32 a4l_mite_get_status(struct mite_channel *mite_chan) ++{ ++ struct mite_struct *mite = mite_chan->mite; ++ u32 status; ++ unsigned long flags; ++ ++ rtdm_lock_get_irqsave(&mite->lock, flags); ++ status = readl(mite->mite_io_addr + MITE_CHSR(mite_chan->channel)); ++ if (status & CHSR_DONE) { ++ mite_chan->done = 1; ++ writel(CHOR_CLRDONE, ++ mite->mite_io_addr + MITE_CHOR(mite_chan->channel)); ++ } ++ mmiowb(); ++ rtdm_lock_put_irqrestore(&mite->lock, flags); ++ return status; ++} ++ ++int a4l_mite_done(struct mite_channel *mite_chan) ++{ ++ struct mite_struct *mite = mite_chan->mite; ++ unsigned long flags; ++ int done; ++ ++ a4l_mite_get_status(mite_chan); ++ rtdm_lock_get_irqsave(&mite->lock, flags); ++ done = mite_chan->done; ++ rtdm_lock_put_irqrestore(&mite->lock, flags); ++ return done; ++} ++ ++#ifdef CONFIG_DEBUG_MITE ++ ++static void a4l_mite_decode(const char *const bit_str[], unsigned int bits); ++ ++/* names of bits in mite registers */ ++ ++static const char *const mite_CHOR_strings[] = { ++ "start", "cont", "stop", "abort", ++ "freset", "clrlc", "clrrb", "clrdone", ++ "clr_lpause", "set_lpause", "clr_send_tc", ++ "set_send_tc", "12", "13", "14", ++ "15", "16", "17", "18", ++ "19", "20", "21", "22", ++ "23", "24", "25", "26", ++ "27", "28", "29", "30", ++ "dmareset", ++}; ++ ++static const char *const mite_CHCR_strings[] = { ++ "continue", "ringbuff", "2", "3", ++ "4", "5", "6", "7", ++ "8", "9", "10", "11", ++ "12", "13", "bursten", "fifodis", ++ "clr_cont_rb_ie", "set_cont_rb_ie", "clr_lc_ie", "set_lc_ie", ++ "clr_drdy_ie", "set_drdy_ie", "clr_mrdy_ie", "set_mrdy_ie", ++ "clr_done_ie", "set_done_ie", "clr_sar_ie", "set_sar_ie", ++ "clr_linkp_ie", "set_linkp_ie", "clr_dma_ie", "set_dma_ie", ++}; ++ ++static const char *const mite_MCR_strings[] = { ++ "amdevice", "1", "2", "3", ++ "4", "5", "portio", "portvxi", ++ "psizebyte", "psizehalf (byte & half = word)", "aseqxp1", "11", ++ "12", "13", "blocken", "berhand", ++ "reqsintlim/reqs0", "reqs1", "reqs2", "rd32", ++ "rd512", "rl1", "rl2", "rl8", ++ "24", "25", "26", "27", ++ "28", "29", "30", "stopen", ++}; ++ ++static const char *const mite_DCR_strings[] = { ++ "amdevice", "1", "2", "3", ++ "4", "5", "portio", "portvxi", ++ "psizebyte", "psizehalf (byte & half = word)", "aseqxp1", "aseqxp2", ++ "aseqxp8", "13", "blocken", "berhand", ++ "reqsintlim", "reqs1", "reqs2", "rd32", ++ "rd512", "rl1", "rl2", "rl8", ++ "23", "24", "25", "27", ++ "28", "wsdevc", "wsdevs", "rwdevpack", ++}; ++ ++static const char *const mite_LKCR_strings[] = { ++ "amdevice", "1", "2", "3", ++ "4", "5", "portio", "portvxi", ++ "psizebyte", "psizehalf (byte & half = word)", "asequp", "aseqdown", ++ "12", "13", "14", "berhand", ++ "16", "17", "18", "rd32", ++ "rd512", "rl1", "rl2", "rl8", ++ "24", "25", "26", "27", ++ "28", "29", "30", "chngend", ++}; ++ ++static const char *const mite_CHSR_strings[] = { ++ "d.err0", "d.err1", "m.err0", "m.err1", ++ "l.err0", "l.err1", "drq0", "drq1", ++ "end", "xferr", "operr0", "operr1", ++ "stops", "habort", "sabort", "error", ++ "16", "conts_rb", "18", "linkc", ++ "20", "drdy", "22", "mrdy", ++ "24", "done", "26", "sars", ++ "28", "lpauses", "30", "int", ++}; ++ ++void a4l_mite_dump_regs(struct mite_channel *mite_chan) ++{ ++ unsigned long mite_io_addr = ++ (unsigned long)mite_chan->mite->mite_io_addr; ++ unsigned long addr = 0; ++ unsigned long temp = 0; ++ ++ printk("a4l_mite_dump_regs ch%i\n", mite_chan->channel); ++ printk("mite address is =0x%08lx\n", mite_io_addr); ++ ++ addr = mite_io_addr + MITE_CHOR(mite_chan->channel); ++ printk("mite status[CHOR]at 0x%08lx =0x%08lx\n", addr, temp = ++ readl((void *)addr)); ++ a4l_mite_decode(mite_CHOR_strings, temp); ++ addr = mite_io_addr + MITE_CHCR(mite_chan->channel); ++ printk("mite status[CHCR]at 0x%08lx =0x%08lx\n", addr, temp = ++ readl((void *)addr)); ++ a4l_mite_decode(mite_CHCR_strings, temp); ++ addr = mite_io_addr + MITE_TCR(mite_chan->channel); ++ printk("mite status[TCR] at 0x%08lx =0x%08x\n", addr, ++ readl((void *)addr)); ++ addr = mite_io_addr + MITE_MCR(mite_chan->channel); ++ printk("mite status[MCR] at 0x%08lx =0x%08lx\n", addr, temp = ++ readl((void *)addr)); ++ a4l_mite_decode(mite_MCR_strings, temp); ++ ++ addr = mite_io_addr + MITE_MAR(mite_chan->channel); ++ printk("mite status[MAR] at 0x%08lx =0x%08x\n", addr, ++ readl((void *)addr)); ++ addr = mite_io_addr + MITE_DCR(mite_chan->channel); ++ printk("mite status[DCR] at 0x%08lx =0x%08lx\n", addr, temp = ++ readl((void *)addr)); ++ a4l_mite_decode(mite_DCR_strings, temp); ++ addr = mite_io_addr + MITE_DAR(mite_chan->channel); ++ printk("mite status[DAR] at 0x%08lx =0x%08x\n", addr, ++ readl((void *)addr)); ++ addr = mite_io_addr + MITE_LKCR(mite_chan->channel); ++ printk("mite status[LKCR]at 0x%08lx =0x%08lx\n", addr, temp = ++ readl((void *)addr)); ++ a4l_mite_decode(mite_LKCR_strings, temp); ++ addr = mite_io_addr + MITE_LKAR(mite_chan->channel); ++ printk("mite status[LKAR]at 0x%08lx =0x%08x\n", addr, ++ readl((void *)addr)); ++ ++ addr = mite_io_addr + MITE_CHSR(mite_chan->channel); ++ printk("mite status[CHSR]at 0x%08lx =0x%08lx\n", addr, temp = ++ readl((void *)addr)); ++ a4l_mite_decode(mite_CHSR_strings, temp); ++ addr = mite_io_addr + MITE_FCR(mite_chan->channel); ++ printk("mite status[FCR] at 0x%08lx =0x%08x\n\n", addr, ++ readl((void *)addr)); ++} ++ ++ ++static void a4l_mite_decode(const char *const bit_str[], unsigned int bits) ++{ ++ int i; ++ ++ for (i = 31; i >= 0; i--) { ++ if (bits & (1 << i)) { ++ printk(" %s", bit_str[i]); ++ } ++ } ++ printk("\n"); ++} ++ ++#endif /* CONFIG_DEBUG_MITE */ ++ ++ ++static int __init mite_init(void) ++{ ++ int err; ++ ++ /* Register the mite's PCI driver */ ++ err = pci_register_driver(&mite_driver); ++ ++ if(err == 0) ++ a4l_mite_list_devices(); ++ ++ return err; ++} ++ ++static void __exit mite_cleanup(void) ++{ ++ ++ /* Unregister the PCI structure driver */ ++ pci_unregister_driver(&mite_driver); ++ ++ /* Just paranoia... */ ++ while(&mite_devices != mite_devices.next) { ++ struct list_head *this = mite_devices.next; ++ struct mite_struct *mite = ++ list_entry(this, struct mite_struct, list); ++ ++ list_del(this); ++ kfree(mite); ++ } ++} ++ ++MODULE_LICENSE("GPL"); ++module_init(mite_init); ++module_exit(mite_cleanup); ++ ++EXPORT_SYMBOL_GPL(a4l_mite_dma_arm); ++EXPORT_SYMBOL_GPL(a4l_mite_dma_disarm); ++EXPORT_SYMBOL_GPL(a4l_mite_sync_input_dma); ++EXPORT_SYMBOL_GPL(a4l_mite_sync_output_dma); ++EXPORT_SYMBOL_GPL(a4l_mite_setup); ++EXPORT_SYMBOL_GPL(a4l_mite_unsetup); ++EXPORT_SYMBOL_GPL(a4l_mite_list_devices); ++EXPORT_SYMBOL_GPL(a4l_mite_request_channel_in_range); ++EXPORT_SYMBOL_GPL(a4l_mite_release_channel); ++EXPORT_SYMBOL_GPL(a4l_mite_prep_dma); ++EXPORT_SYMBOL_GPL(a4l_mite_buf_change); ++EXPORT_SYMBOL_GPL(a4l_mite_bytes_written_to_memory_lb); ++EXPORT_SYMBOL_GPL(a4l_mite_bytes_written_to_memory_ub); ++EXPORT_SYMBOL_GPL(a4l_mite_bytes_read_from_memory_lb); ++EXPORT_SYMBOL_GPL(a4l_mite_bytes_read_from_memory_ub); ++EXPORT_SYMBOL_GPL(a4l_mite_bytes_in_transit); ++EXPORT_SYMBOL_GPL(a4l_mite_get_status); ++EXPORT_SYMBOL_GPL(a4l_mite_done); ++#ifdef CONFIG_DEBUG_MITE ++EXPORT_SYMBOL_GPL(a4l_mite_decode); ++EXPORT_SYMBOL_GPL(a4l_mite_dump_regs); ++#endif /* CONFIG_DEBUG_MITE */ +--- linux/drivers/xenomai/analogy/national_instruments/mite.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/analogy/national_instruments/mite.h 2021-04-07 16:01:27.810633333 +0800 +@@ -0,0 +1,435 @@ ++/* ++ * Hardware driver for NI Mite PCI interface chip ++ * @note Copyright (C) 1999 David A. Schleef ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. ++ */ ++#ifndef __ANALOGY_NI_MITE_H__ ++#define __ANALOGY_NI_MITE_H__ ++ ++#include ++#include ++#include ++ ++#define PCI_VENDOR_ID_NATINST 0x1093 ++#define PCI_MITE_SIZE 4096 ++#define PCI_DAQ_SIZE 4096 ++#define PCI_DAQ_SIZE_660X 8192 ++#define PCIMIO_COMPAT ++#define MAX_MITE_DMA_CHANNELS 8 ++ ++#define TOP_OF_PAGE(x) ((x)|(~(PAGE_MASK))) ++ ++struct mite_dma_descriptor { ++ u32 count; ++ u32 addr; ++ u32 next; ++ u32 dar; ++}; ++ ++struct mite_dma_descriptor_ring { ++ struct pci_dev *pcidev; ++ u32 n_links; ++ struct mite_dma_descriptor *descriptors; ++ dma_addr_t descriptors_dma_addr; ++}; ++ ++struct mite_channel { ++ struct mite_struct *mite; ++ u32 channel; ++ u32 dir; ++ u32 done; ++ struct mite_dma_descriptor_ring *ring; ++}; ++ ++struct mite_struct { ++ struct list_head list; ++ rtdm_lock_t lock; ++ u32 used; ++ u32 num_channels; ++ ++ struct mite_channel channels[MAX_MITE_DMA_CHANNELS]; ++ u32 channel_allocated[MAX_MITE_DMA_CHANNELS]; ++ ++ struct pci_dev *pcidev; ++ resource_size_t mite_phys_addr; ++ void *mite_io_addr; ++ resource_size_t daq_phys_addr; ++ void *daq_io_addr; ++}; ++ ++static inline ++struct mite_dma_descriptor_ring *mite_alloc_ring(struct mite_struct *mite) ++{ ++ struct mite_dma_descriptor_ring *ring = ++ kmalloc(sizeof(struct mite_dma_descriptor_ring), GFP_DMA); ++ ++ if (ring == NULL) ++ return ring; ++ ++ memset(ring, 0, sizeof(struct mite_dma_descriptor_ring)); ++ ++ ring->pcidev = mite->pcidev; ++ if (ring->pcidev == NULL) { ++ kfree(ring); ++ return NULL; ++ } ++ ++ return ring; ++}; ++ ++static inline void mite_free_ring(struct mite_dma_descriptor_ring *ring) ++{ ++ if (ring) { ++ if (ring->descriptors) { ++ pci_free_consistent( ++ ring->pcidev, ++ ring->n_links * ++ sizeof(struct mite_dma_descriptor), ++ ring->descriptors, ring->descriptors_dma_addr); ++ } ++ kfree(ring); ++ } ++}; ++ ++static inline unsigned int mite_irq(struct mite_struct *mite) ++{ ++ return mite->pcidev->irq; ++}; ++static inline unsigned int mite_device_id(struct mite_struct *mite) ++{ ++ return mite->pcidev->device; ++}; ++ ++int a4l_mite_setup(struct mite_struct *mite, int use_iodwbsr_1); ++void a4l_mite_unsetup(struct mite_struct *mite); ++void a4l_mite_list_devices(void); ++struct mite_struct * a4l_mite_find_device(int bus, ++ int slot, unsigned short device_id); ++struct mite_channel * ++a4l_mite_request_channel_in_range(struct mite_struct *mite, ++ struct mite_dma_descriptor_ring *ring, ++ unsigned min_channel, unsigned max_channel); ++static inline struct mite_channel *mite_request_channel(struct mite_struct ++ *mite, struct mite_dma_descriptor_ring *ring) ++{ ++ return a4l_mite_request_channel_in_range(mite, ring, 0, ++ mite->num_channels - 1); ++} ++void a4l_mite_release_channel(struct mite_channel *mite_chan); ++ ++void a4l_mite_dma_arm(struct mite_channel *mite_chan); ++void a4l_mite_dma_disarm(struct mite_channel *mite_chan); ++int a4l_mite_sync_input_dma(struct mite_channel *mite_chan, struct a4l_subdevice *subd); ++int a4l_mite_sync_output_dma(struct mite_channel *mite_chan, struct a4l_subdevice *subd); ++u32 a4l_mite_bytes_written_to_memory_lb(struct mite_channel *mite_chan); ++u32 a4l_mite_bytes_written_to_memory_ub(struct mite_channel *mite_chan); ++u32 a4l_mite_bytes_read_from_memory_lb(struct mite_channel *mite_chan); ++u32 a4l_mite_bytes_read_from_memory_ub(struct mite_channel *mite_chan); ++u32 a4l_mite_bytes_in_transit(struct mite_channel *mite_chan); ++u32 a4l_mite_get_status(struct mite_channel *mite_chan); ++int a4l_mite_done(struct mite_channel *mite_chan); ++void a4l_mite_prep_dma(struct mite_channel *mite_chan, ++ unsigned int num_device_bits, unsigned int num_memory_bits); ++int a4l_mite_buf_change(struct mite_dma_descriptor_ring *ring, struct a4l_subdevice *subd); ++ ++#ifdef CONFIG_DEBUG_MITE ++void mite_print_chsr(unsigned int chsr); ++void a4l_mite_dump_regs(struct mite_channel *mite_chan); ++#endif ++ ++static inline int CHAN_OFFSET(int channel) ++{ ++ return 0x500 + 0x100 * channel; ++}; ++ ++enum mite_registers { ++ /* The bits 0x90180700 in MITE_UNKNOWN_DMA_BURST_REG can be ++ written and read back. The bits 0x1f always read as 1. ++ The rest always read as zero. */ ++ MITE_UNKNOWN_DMA_BURST_REG = 0x28, ++ MITE_IODWBSR = 0xc0, //IO Device Window Base Size Register ++ MITE_IODWBSR_1 = 0xc4, // IO Device Window Base Size Register 1 ++ MITE_IODWCR_1 = 0xf4, ++ MITE_PCI_CONFIG_OFFSET = 0x300, ++ MITE_CSIGR = 0x460 //chip signature ++}; ++static inline int MITE_CHOR(int channel) // channel operation ++{ ++ return CHAN_OFFSET(channel) + 0x0; ++}; ++static inline int MITE_CHCR(int channel) // channel control ++{ ++ return CHAN_OFFSET(channel) + 0x4; ++}; ++static inline int MITE_TCR(int channel) // transfer count ++{ ++ return CHAN_OFFSET(channel) + 0x8; ++}; ++static inline int MITE_MCR(int channel) // memory configuration ++{ ++ return CHAN_OFFSET(channel) + 0xc; ++}; ++static inline int MITE_MAR(int channel) // memory address ++{ ++ return CHAN_OFFSET(channel) + 0x10; ++}; ++static inline int MITE_DCR(int channel) // device configuration ++{ ++ return CHAN_OFFSET(channel) + 0x14; ++}; ++static inline int MITE_DAR(int channel) // device address ++{ ++ return CHAN_OFFSET(channel) + 0x18; ++}; ++static inline int MITE_LKCR(int channel) // link configuration ++{ ++ return CHAN_OFFSET(channel) + 0x1c; ++}; ++static inline int MITE_LKAR(int channel) // link address ++{ ++ return CHAN_OFFSET(channel) + 0x20; ++}; ++static inline int MITE_LLKAR(int channel) // see mite section of tnt5002 manual ++{ ++ return CHAN_OFFSET(channel) + 0x24; ++}; ++static inline int MITE_BAR(int channel) // base address ++{ ++ return CHAN_OFFSET(channel) + 0x28; ++}; ++static inline int MITE_BCR(int channel) // base count ++{ ++ return CHAN_OFFSET(channel) + 0x2c; ++}; ++static inline int MITE_SAR(int channel) // ? address ++{ ++ return CHAN_OFFSET(channel) + 0x30; ++}; ++static inline int MITE_WSCR(int channel) // ? ++{ ++ return CHAN_OFFSET(channel) + 0x34; ++}; ++static inline int MITE_WSER(int channel) // ? ++{ ++ return CHAN_OFFSET(channel) + 0x38; ++}; ++static inline int MITE_CHSR(int channel) // channel status ++{ ++ return CHAN_OFFSET(channel) + 0x3c; ++}; ++static inline int MITE_FCR(int channel) // fifo count ++{ ++ return CHAN_OFFSET(channel) + 0x40; ++}; ++ ++enum MITE_IODWBSR_bits { ++ WENAB = 0x80, // window enable ++}; ++ ++static inline unsigned MITE_IODWBSR_1_WSIZE_bits(unsigned size) ++{ ++ unsigned order = 0; ++ while (size >>= 1) ++ ++order; ++ BUG_ON(order < 1); ++ return (order - 1) & 0x1f; ++} ++ ++enum MITE_UNKNOWN_DMA_BURST_bits { ++ UNKNOWN_DMA_BURST_ENABLE_BITS = 0x600 ++}; ++ ++static inline int mite_csigr_version(u32 csigr_bits) ++{ ++ return csigr_bits & 0xf; ++}; ++static inline int mite_csigr_type(u32 csigr_bits) ++{ // original mite = 0, minimite = 1 ++ return (csigr_bits >> 4) & 0xf; ++}; ++static inline int mite_csigr_mmode(u32 csigr_bits) ++{ // mite mode, minimite = 1 ++ return (csigr_bits >> 8) & 0x3; ++}; ++static inline int mite_csigr_imode(u32 csigr_bits) ++{ // cpu port interface mode, pci = 0x3 ++ return (csigr_bits >> 12) & 0x3; ++}; ++static inline int mite_csigr_dmac(u32 csigr_bits) ++{ // number of dma channels ++ return (csigr_bits >> 16) & 0xf; ++}; ++static inline int mite_csigr_wpdep(u32 csigr_bits) ++{ // write post fifo depth ++ unsigned int wpdep_bits = (csigr_bits >> 20) & 0x7; ++ if (wpdep_bits == 0) ++ return 0; ++ else ++ return 1 << (wpdep_bits - 1); ++}; ++static inline int mite_csigr_wins(u32 csigr_bits) ++{ ++ return (csigr_bits >> 24) & 0x1f; ++}; ++static inline int mite_csigr_iowins(u32 csigr_bits) ++{ // number of io windows ++ return (csigr_bits >> 29) & 0x7; ++}; ++ ++enum MITE_MCR_bits { ++ MCRPON = 0, ++}; ++ ++enum MITE_DCR_bits { ++ DCR_NORMAL = (1 << 29), ++ DCRPON = 0, ++}; ++ ++enum MITE_CHOR_bits { ++ CHOR_DMARESET = (1 << 31), ++ CHOR_SET_SEND_TC = (1 << 11), ++ CHOR_CLR_SEND_TC = (1 << 10), ++ CHOR_SET_LPAUSE = (1 << 9), ++ CHOR_CLR_LPAUSE = (1 << 8), ++ CHOR_CLRDONE = (1 << 7), ++ CHOR_CLRRB = (1 << 6), ++ CHOR_CLRLC = (1 << 5), ++ CHOR_FRESET = (1 << 4), ++ CHOR_ABORT = (1 << 3), /* stop without emptying fifo */ ++ CHOR_STOP = (1 << 2), /* stop after emptying fifo */ ++ CHOR_CONT = (1 << 1), ++ CHOR_START = (1 << 0), ++ CHOR_PON = (CHOR_CLR_SEND_TC | CHOR_CLR_LPAUSE), ++}; ++ ++enum MITE_CHCR_bits { ++ CHCR_SET_DMA_IE = (1 << 31), ++ CHCR_CLR_DMA_IE = (1 << 30), ++ CHCR_SET_LINKP_IE = (1 << 29), ++ CHCR_CLR_LINKP_IE = (1 << 28), ++ CHCR_SET_SAR_IE = (1 << 27), ++ CHCR_CLR_SAR_IE = (1 << 26), ++ CHCR_SET_DONE_IE = (1 << 25), ++ CHCR_CLR_DONE_IE = (1 << 24), ++ CHCR_SET_MRDY_IE = (1 << 23), ++ CHCR_CLR_MRDY_IE = (1 << 22), ++ CHCR_SET_DRDY_IE = (1 << 21), ++ CHCR_CLR_DRDY_IE = (1 << 20), ++ CHCR_SET_LC_IE = (1 << 19), ++ CHCR_CLR_LC_IE = (1 << 18), ++ CHCR_SET_CONT_RB_IE = (1 << 17), ++ CHCR_CLR_CONT_RB_IE = (1 << 16), ++ CHCR_FIFODIS = (1 << 15), ++ CHCR_FIFO_ON = 0, ++ CHCR_BURSTEN = (1 << 14), ++ CHCR_NO_BURSTEN = 0, ++ CHCR_BYTE_SWAP_DEVICE = (1 << 6), ++ CHCR_BYTE_SWAP_MEMORY = (1 << 4), ++ CHCR_DIR = (1 << 3), ++ CHCR_DEV_TO_MEM = CHCR_DIR, ++ CHCR_MEM_TO_DEV = 0, ++ CHCR_NORMAL = (0 << 0), ++ CHCR_CONTINUE = (1 << 0), ++ CHCR_RINGBUFF = (2 << 0), ++ CHCR_LINKSHORT = (4 << 0), ++ CHCR_LINKLONG = (5 << 0), ++ CHCRPON = ++ (CHCR_CLR_DMA_IE | CHCR_CLR_LINKP_IE | CHCR_CLR_SAR_IE | ++ CHCR_CLR_DONE_IE | CHCR_CLR_MRDY_IE | CHCR_CLR_DRDY_IE | ++ CHCR_CLR_LC_IE | CHCR_CLR_CONT_RB_IE), ++}; ++ ++enum ConfigRegister_bits { ++ CR_REQS_MASK = 0x7 << 16, ++ CR_ASEQDONT = 0x0 << 10, ++ CR_ASEQUP = 0x1 << 10, ++ CR_ASEQDOWN = 0x2 << 10, ++ CR_ASEQ_MASK = 0x3 << 10, ++ CR_PSIZE8 = (1 << 8), ++ CR_PSIZE16 = (2 << 8), ++ CR_PSIZE32 = (3 << 8), ++ CR_PORTCPU = (0 << 6), ++ CR_PORTIO = (1 << 6), ++ CR_PORTVXI = (2 << 6), ++ CR_PORTMXI = (3 << 6), ++ CR_AMDEVICE = (1 << 0), ++}; ++static inline int CR_REQS(int source) ++{ ++ return (source & 0x7) << 16; ++}; ++static inline int CR_REQSDRQ(unsigned drq_line) ++{ ++ /* This also works on m-series when ++ using channels (drq_line) 4 or 5. */ ++ return CR_REQS((drq_line & 0x3) | 0x4); ++} ++static inline int CR_RL(unsigned int retry_limit) ++{ ++ int value = 0; ++ ++ while (retry_limit) { ++ retry_limit >>= 1; ++ value++; ++ } ++ if (value > 0x7) ++ __a4l_err("bug! retry_limit too large\n"); ++ ++ return (value & 0x7) << 21; ++} ++ ++enum CHSR_bits { ++ CHSR_INT = (1 << 31), ++ CHSR_LPAUSES = (1 << 29), ++ CHSR_SARS = (1 << 27), ++ CHSR_DONE = (1 << 25), ++ CHSR_MRDY = (1 << 23), ++ CHSR_DRDY = (1 << 21), ++ CHSR_LINKC = (1 << 19), ++ CHSR_CONTS_RB = (1 << 17), ++ CHSR_ERROR = (1 << 15), ++ CHSR_SABORT = (1 << 14), ++ CHSR_HABORT = (1 << 13), ++ CHSR_STOPS = (1 << 12), ++ CHSR_OPERR_mask = (3 << 10), ++ CHSR_OPERR_NOERROR = (0 << 10), ++ CHSR_OPERR_FIFOERROR = (1 << 10), ++ CHSR_OPERR_LINKERROR = (1 << 10), /* ??? */ ++ CHSR_XFERR = (1 << 9), ++ CHSR_END = (1 << 8), ++ CHSR_DRQ1 = (1 << 7), ++ CHSR_DRQ0 = (1 << 6), ++ CHSR_LxERR_mask = (3 << 4), ++ CHSR_LBERR = (1 << 4), ++ CHSR_LRERR = (2 << 4), ++ CHSR_LOERR = (3 << 4), ++ CHSR_MxERR_mask = (3 << 2), ++ CHSR_MBERR = (1 << 2), ++ CHSR_MRERR = (2 << 2), ++ CHSR_MOERR = (3 << 2), ++ CHSR_DxERR_mask = (3 << 0), ++ CHSR_DBERR = (1 << 0), ++ CHSR_DRERR = (2 << 0), ++ CHSR_DOERR = (3 << 0), ++}; ++ ++static inline void mite_dma_reset(struct mite_channel *mite_chan) ++{ ++ writel(CHOR_DMARESET | CHOR_FRESET, ++ mite_chan->mite->mite_io_addr + MITE_CHOR(mite_chan->channel)); ++}; ++ ++#endif /* !__ANALOGY_NI_MITE_H__ */ +--- linux/drivers/xenomai/analogy/national_instruments/mio_common.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/analogy/national_instruments/mio_common.c 2021-04-07 16:01:27.806633339 +0800 +@@ -0,0 +1,5590 @@ ++/* ++ * Hardware driver for DAQ-STC based boards ++ * ++ * Copyright (C) 1997-2001 David A. Schleef ++ * Copyright (C) 2002-2006 Frank Mori Hess ++ * ++ * This code is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * This code is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ * ++ * Description: DAQ-STC systems ++ * ++ * References: ++ * 340747b.pdf AT-MIO E series Register-Level Programmer Manual ++ * 341079b.pdf PCI E Series Register-Level Programmer Manual ++ * 340934b.pdf DAQ-STC reference manual ++ * 322080b.pdf 6711/6713/6715 User Manual ++ * 320945c.pdf PCI E Series User Manual ++ * 322138a.pdf PCI-6052E and DAQPad-6052E User Manual ++ * 320517c.pdf AT E Series User manual (obsolete) ++ * 320517f.pdf AT E Series User manual ++ * 320906c.pdf Maximum signal ratings ++ * 321066a.pdf About 16x ++ * 321791a.pdf Discontinuation of at-mio-16e-10 rev. c ++ * 321808a.pdf About at-mio-16e-10 rev P ++ * 321837a.pdf Discontinuation of at-mio-16de-10 rev d ++ * 321838a.pdf About at-mio-16de-10 rev N ++ * ++ * ISSUES: ++ * - The interrupt routine needs to be cleaned up ++ * - S-Series PCI-6143 support has been added but is not fully tested ++ * as yet. Terry Barnaby, BEAM Ltd. ++ * ++ */ ++#include ++#include ++#include "../intel/8255.h" ++#include "mite.h" ++#include "ni_stc.h" ++#include "ni_mio.h" ++ ++#define NI_TIMEOUT 1000 ++ ++/* Note: this table must match the ai_gain_* definitions */ ++static const short ni_gainlkup[][16] = { ++ /* ai_gain_16 */ ++ {0, 1, 2, 3, 4, 5, 6, 7, 0x100, 0x101, 0x102, 0x103, 0x104, 0x105, ++ 0x106, 0x107}, ++ /* ai_gain_8 */ ++ {1, 2, 4, 7, 0x101, 0x102, 0x104, 0x107}, ++ /* ai_gain_14 */ ++ {1, 2, 3, 4, 5, 6, 7, 0x101, 0x102, 0x103, 0x104, 0x105, 0x106, ++ 0x107}, ++ /* ai_gain_4 */ ++ {0, 1, 4, 7}, ++ /* ai_gain_611x */ ++ {0x00a, 0x00b, 0x001, 0x002, 0x003, 0x004, 0x005, 0x006}, ++ /* ai_gain_622x */ ++ {0, 1, 4, 5}, ++ /* ai_gain_628x */ ++ {1, 2, 3, 4, 5, 6, 7}, ++ /* ai_gain_6143 */ ++ {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, ++}; ++ ++struct a4l_rngtab rng_ni_E_ai = {16, { ++ RANGE_V(-10, 10), ++ RANGE_V(-5, 5), ++ RANGE_V(-2.5, 2.5), ++ RANGE_V(-1, 1), ++ RANGE_V(-0.5, 0.5), ++ RANGE_V(-0.25, 0.25), ++ RANGE_V(-0.1, 0.1), ++ RANGE_V(-0.05, 0.05), ++ RANGE_V(0, 20), ++ RANGE_V(0, 10), ++ RANGE_V(0, 5), ++ RANGE_V(0, 2), ++ RANGE_V(0, 1), ++ RANGE_V(0, 0.5), ++ RANGE_V(0, 0.2), ++ RANGE_V(0, 0.1), ++}}; ++struct a4l_rngdesc a4l_range_ni_E_ai = ++ RNG_GLOBAL(rng_ni_E_ai); ++ ++struct a4l_rngtab rng_ni_E_ai_limited = {8, { ++ RANGE_V(-10, 10), ++ RANGE_V(-5, 5), ++ RANGE_V(-1, 1), ++ RANGE_V(-0.1, 0.1), ++ RANGE_V(0, 10), ++ RANGE_V(0, 5), ++ RANGE_V(0, 1), ++ RANGE_V(0, 0.1), ++}}; ++struct a4l_rngdesc a4l_range_ni_E_ai_limited = ++ RNG_GLOBAL(rng_ni_E_ai_limited); ++ ++struct a4l_rngtab rng_ni_E_ai_limited14 = {14, { ++ RANGE_V(-10, 10), ++ RANGE_V(-5, 5), ++ RANGE_V(-2, 2), ++ RANGE_V(-1, 1), ++ RANGE_V(-0.5, 0.5), ++ RANGE_V(-0.2, 0.2), ++ RANGE_V(-0.1, 0.1), ++ RANGE_V(0, 10), ++ RANGE_V(0, 5), ++ RANGE_V(0, 2), ++ RANGE_V(0, 1), ++ RANGE_V(0, 0.5), ++ RANGE_V(0, 0.2), ++ RANGE_V(0, 0.1), ++}}; ++struct a4l_rngdesc a4l_range_ni_E_ai_limited14 = ++ RNG_GLOBAL(rng_ni_E_ai_limited14); ++ ++struct a4l_rngtab rng_ni_E_ai_bipolar4 = {4, { ++ RANGE_V(-10,10), ++ RANGE_V(-5, 5), ++ RANGE_V(-0.5, 0.5), ++ RANGE_V(-0.05, 0.05), ++}}; ++struct a4l_rngdesc a4l_range_ni_E_ai_bipolar4 = ++ RNG_GLOBAL(rng_ni_E_ai_bipolar4); ++ ++struct a4l_rngtab rng_ni_E_ai_611x = {8, { ++ RANGE_V(-50, 50), ++ RANGE_V(-20, 20), ++ RANGE_V(-10, 10), ++ RANGE_V(-5, 5), ++ RANGE_V(-2, 2), ++ RANGE_V(-1, 1), ++ RANGE_V(-0.5, 0.5), ++ RANGE_V(-0.2, 0.2), ++}}; ++struct a4l_rngdesc a4l_range_ni_E_ai_611x = ++ RNG_GLOBAL(rng_ni_E_ai_611x); ++ ++struct a4l_rngtab rng_ni_M_ai_622x = {4, { ++ RANGE_V(-10, 10), ++ RANGE_V(-5, 5), ++ RANGE_V(-1, 1), ++ RANGE_V(-0.2, 0.2), ++}}; ++struct a4l_rngdesc a4l_range_ni_M_ai_622x = ++ RNG_GLOBAL(rng_ni_M_ai_622x); ++ ++struct a4l_rngtab rng_ni_M_ai_628x = {7, { ++ RANGE_V(-10, 10), ++ RANGE_V(-5, 5), ++ RANGE_V(-2, 2), ++ RANGE_V(-1, 1), ++ RANGE_V(-0.5, 0.5), ++ RANGE_V(-0.2, 0.2), ++ RANGE_V(-0.1, 0.1), ++}}; ++struct a4l_rngdesc a4l_range_ni_M_ai_628x = ++ RNG_GLOBAL(rng_ni_M_ai_628x); ++ ++struct a4l_rngtab rng_ni_S_ai_6143 = {1, { ++ RANGE_V(-5, 5), ++}}; ++struct a4l_rngdesc a4l_range_ni_S_ai_6143 = ++ RNG_GLOBAL(rng_ni_S_ai_6143); ++ ++ ++struct a4l_rngtab rng_ni_E_ao_ext = {4, { ++ RANGE_V(-10, 10), ++ RANGE_V(0, 10), ++ RANGE_ext(-1, 1), ++ RANGE_ext(0, 1), ++}}; ++struct a4l_rngdesc a4l_range_ni_E_ao_ext = ++ RNG_GLOBAL(rng_ni_E_ao_ext); ++ ++struct a4l_rngdesc *ni_range_lkup[] = { ++ &a4l_range_ni_E_ai, ++ &a4l_range_ni_E_ai_limited, ++ &a4l_range_ni_E_ai_limited14, ++ &a4l_range_ni_E_ai_bipolar4, ++ &a4l_range_ni_E_ai_611x, ++ &a4l_range_ni_M_ai_622x, ++ &a4l_range_ni_M_ai_628x, ++ &a4l_range_ni_S_ai_6143 ++}; ++ ++static const int num_adc_stages_611x = 3; ++ ++static void ni_handle_fifo_dregs(struct a4l_subdevice *subd); ++static void get_last_sample_611x(struct a4l_subdevice *subd); ++static void get_last_sample_6143(struct a4l_subdevice *subd); ++static void handle_cdio_interrupt(struct a4l_device *dev); ++static void ni_load_channelgain_list(struct a4l_device *dev, ++ unsigned int n_chan, unsigned int *list); ++ ++#if (!defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE) && \ ++ !defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE_MODULE)) ++static void ni_handle_fifo_half_full(struct a4l_subdevice *subd); ++static int ni_ao_fifo_half_empty(struct a4l_subdevice *subd); ++#endif /* !CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE */ ++ ++static inline void ni_set_bitfield(struct a4l_device *dev, ++ int reg, ++ unsigned int bit_mask, ++ unsigned int bit_values) ++{ ++ unsigned long flags; ++ ++ rtdm_lock_get_irqsave(&devpriv->soft_reg_copy_lock, flags); ++ switch (reg) { ++ case Interrupt_A_Enable_Register: ++ devpriv->int_a_enable_reg &= ~bit_mask; ++ devpriv->int_a_enable_reg |= bit_values & bit_mask; ++ devpriv->stc_writew(dev, devpriv->int_a_enable_reg, ++ Interrupt_A_Enable_Register); ++ break; ++ case Interrupt_B_Enable_Register: ++ devpriv->int_b_enable_reg &= ~bit_mask; ++ devpriv->int_b_enable_reg |= bit_values & bit_mask; ++ devpriv->stc_writew(dev, devpriv->int_b_enable_reg, ++ Interrupt_B_Enable_Register); ++ break; ++ case IO_Bidirection_Pin_Register: ++ devpriv->io_bidirection_pin_reg &= ~bit_mask; ++ devpriv->io_bidirection_pin_reg |= bit_values & bit_mask; ++ devpriv->stc_writew(dev, devpriv->io_bidirection_pin_reg, ++ IO_Bidirection_Pin_Register); ++ break; ++ case AI_AO_Select: ++ devpriv->ai_ao_select_reg &= ~bit_mask; ++ devpriv->ai_ao_select_reg |= bit_values & bit_mask; ++ ni_writeb(devpriv->ai_ao_select_reg, AI_AO_Select); ++ break; ++ case G0_G1_Select: ++ devpriv->g0_g1_select_reg &= ~bit_mask; ++ devpriv->g0_g1_select_reg |= bit_values & bit_mask; ++ ni_writeb(devpriv->g0_g1_select_reg, G0_G1_Select); ++ break; ++ default: ++ a4l_err(dev, ++ "Warning %s() called with invalid register\n", ++ __FUNCTION__); ++ a4l_err(dev,"reg is %d\n", reg); ++ break; ++ } ++ ++ mmiowb(); ++ rtdm_lock_put_irqrestore(&devpriv->soft_reg_copy_lock, flags); ++} ++ ++#if (defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE) || \ ++ defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE_MODULE)) ++ ++static int ni_ai_drain_dma(struct a4l_subdevice *subd); ++ ++static inline void ni_set_ai_dma_channel(struct a4l_device * dev, int channel) ++{ ++ unsigned bitfield; ++ ++ if (channel >= 0) { ++ bitfield = ++ (ni_stc_dma_channel_select_bitfield(channel) << ++ AI_DMA_Select_Shift) & AI_DMA_Select_Mask; ++ } else { ++ bitfield = 0; ++ } ++ ni_set_bitfield(dev, AI_AO_Select, AI_DMA_Select_Mask, bitfield); ++} ++ ++static inline void ni_set_ao_dma_channel(struct a4l_device * dev, int channel) ++{ ++ unsigned bitfield; ++ ++ if (channel >= 0) { ++ bitfield = ++ (ni_stc_dma_channel_select_bitfield(channel) << ++ AO_DMA_Select_Shift) & AO_DMA_Select_Mask; ++ } else { ++ bitfield = 0; ++ } ++ ni_set_bitfield(dev, AI_AO_Select, AO_DMA_Select_Mask, bitfield); ++} ++ ++static inline void ni_set_gpct_dma_channel(struct a4l_device * dev, ++ unsigned gpct_index, int mite_channel) ++{ ++ unsigned bitfield; ++ ++ if (mite_channel >= 0) { ++ bitfield = GPCT_DMA_Select_Bits(gpct_index, mite_channel); ++ } else { ++ bitfield = 0; ++ } ++ ni_set_bitfield(dev, G0_G1_Select, GPCT_DMA_Select_Mask(gpct_index), ++ bitfield); ++} ++ ++static inline void ni_set_cdo_dma_channel(struct a4l_device * dev, int mite_channel) ++{ ++ unsigned long flags; ++ ++ rtdm_lock_get_irqsave(&devpriv->soft_reg_copy_lock, flags); ++ devpriv->cdio_dma_select_reg &= ~CDO_DMA_Select_Mask; ++ if (mite_channel >= 0) { ++ /*XXX just guessing ++ ni_stc_dma_channel_select_bitfield() returns the right ++ bits, under the assumption the cdio dma selection ++ works just like ai/ao/gpct. Definitely works for dma ++ channels 0 and 1. */ ++ devpriv->cdio_dma_select_reg |= ++ (ni_stc_dma_channel_select_bitfield(mite_channel) << ++ CDO_DMA_Select_Shift) & CDO_DMA_Select_Mask; ++ } ++ ni_writeb(devpriv->cdio_dma_select_reg, M_Offset_CDIO_DMA_Select); ++ mmiowb(); ++ rtdm_lock_put_irqrestore(&devpriv->soft_reg_copy_lock, flags); ++} ++ ++static int ni_request_ai_mite_channel(struct a4l_device * dev) ++{ ++ unsigned long flags; ++ ++ rtdm_lock_get_irqsave(&devpriv->mite_channel_lock, flags); ++ BUG_ON(devpriv->ai_mite_chan); ++ devpriv->ai_mite_chan = ++ mite_request_channel(devpriv->mite, devpriv->ai_mite_ring); ++ if (devpriv->ai_mite_chan == NULL) { ++ rtdm_lock_put_irqrestore(&devpriv->mite_channel_lock, ++ flags); ++ a4l_err(dev, ++ "ni_request_ai_mite_channel: " ++ "failed to reserve mite dma channel for analog input."); ++ return -EBUSY; ++ } ++ devpriv->ai_mite_chan->dir = A4L_INPUT; ++ ni_set_ai_dma_channel(dev, devpriv->ai_mite_chan->channel); ++ rtdm_lock_put_irqrestore(&devpriv->mite_channel_lock, flags); ++ return 0; ++} ++ ++static int ni_request_ao_mite_channel(struct a4l_device * dev) ++{ ++ unsigned long flags; ++ ++ rtdm_lock_get_irqsave(&devpriv->mite_channel_lock, flags); ++ BUG_ON(devpriv->ao_mite_chan); ++ devpriv->ao_mite_chan = ++ mite_request_channel(devpriv->mite, devpriv->ao_mite_ring); ++ if (devpriv->ao_mite_chan == NULL) { ++ rtdm_lock_put_irqrestore(&devpriv->mite_channel_lock, ++ flags); ++ a4l_err(dev, ++ "ni_request_ao_mite_channel: " ++ "failed to reserve mite dma channel for analog outut."); ++ return -EBUSY; ++ } ++ devpriv->ao_mite_chan->dir = A4L_OUTPUT; ++ ni_set_ao_dma_channel(dev, devpriv->ao_mite_chan->channel); ++ rtdm_lock_put_irqrestore(&devpriv->mite_channel_lock, flags); ++ return 0; ++} ++ ++static int ni_request_gpct_mite_channel(struct a4l_device * dev, ++ unsigned gpct_index, int direction) ++{ ++ unsigned long flags; ++ struct mite_channel *mite_chan; ++ ++ BUG_ON(gpct_index >= NUM_GPCT); ++ rtdm_lock_get_irqsave(&devpriv->mite_channel_lock, flags); ++ BUG_ON(devpriv->counter_dev->counters[gpct_index]->mite_chan); ++ mite_chan = mite_request_channel(devpriv->mite, ++ devpriv->gpct_mite_ring[gpct_index]); ++ if (mite_chan == NULL) { ++ rtdm_lock_put_irqrestore(&devpriv->mite_channel_lock, ++ flags); ++ a4l_err(dev, ++ "ni_request_gpct_mite_channel: " ++ "failed to reserve mite dma channel for counter."); ++ return -EBUSY; ++ } ++ mite_chan->dir = direction; ++ a4l_ni_tio_set_mite_channel(devpriv->counter_dev->counters[gpct_index], ++ mite_chan); ++ ni_set_gpct_dma_channel(dev, gpct_index, mite_chan->channel); ++ rtdm_lock_put_irqrestore(&devpriv->mite_channel_lock, flags); ++ return 0; ++} ++ ++static int ni_request_cdo_mite_channel(struct a4l_device *dev) ++{ ++ unsigned long flags; ++ int err = 0; ++ ++ rtdm_lock_get_irqsave(&devpriv->mite_channel_lock, flags); ++ ++ /* No channel should be allocated... */ ++ BUG_ON(devpriv->cdo_mite_chan); ++ /* ...until now */ ++ devpriv->cdo_mite_chan = ++ mite_request_channel(devpriv->mite, devpriv->cdo_mite_ring); ++ ++ if (devpriv->cdo_mite_chan) { ++ devpriv->cdo_mite_chan->dir = A4L_OUTPUT; ++ ni_set_cdo_dma_channel(dev, devpriv->cdo_mite_chan->channel); ++ } else { ++ err = -EBUSY; ++ a4l_err(dev, ++ "ni_request_cdo_mite_channel: " ++ "failed to reserve mite dma channel " ++ "for correlated digital outut."); ++ } ++ ++ rtdm_lock_put_irqrestore(&devpriv->mite_channel_lock, flags); ++ ++ return err; ++} ++ ++void ni_release_ai_mite_channel(struct a4l_device *dev) ++{ ++ unsigned long flags; ++ ++ rtdm_lock_get_irqsave(&devpriv->mite_channel_lock, flags); ++ if (devpriv->ai_mite_chan) { ++ ni_set_ai_dma_channel(dev, -1); ++ a4l_mite_release_channel(devpriv->ai_mite_chan); ++ devpriv->ai_mite_chan = NULL; ++ } ++ rtdm_lock_put_irqrestore(&devpriv->mite_channel_lock, flags); ++ ++} ++ ++void ni_release_ao_mite_channel(struct a4l_device *dev) ++{ ++ unsigned long flags; ++ ++ rtdm_lock_get_irqsave(&devpriv->mite_channel_lock, flags); ++ if (devpriv->ao_mite_chan) { ++ ni_set_ao_dma_channel(dev, -1); ++ a4l_mite_release_channel(devpriv->ao_mite_chan); ++ devpriv->ao_mite_chan = NULL; ++ } ++ rtdm_lock_put_irqrestore(&devpriv->mite_channel_lock, flags); ++ ++} ++ ++void ni_release_gpct_mite_channel(struct a4l_device *dev, unsigned gpct_index) ++{ ++ unsigned long flags; ++ ++ BUG_ON(gpct_index >= NUM_GPCT); ++ rtdm_lock_get_irqsave(&devpriv->mite_channel_lock, flags); ++ if (devpriv->counter_dev->counters[gpct_index]->mite_chan) { ++ struct mite_channel *mite_chan = ++ devpriv->counter_dev->counters[gpct_index]->mite_chan; ++ ++ ni_set_gpct_dma_channel(dev, gpct_index, -1); ++ a4l_ni_tio_set_mite_channel(devpriv->counter_dev-> ++ counters[gpct_index], NULL); ++ a4l_mite_release_channel(mite_chan); ++ } ++ rtdm_lock_put_irqrestore(&devpriv->mite_channel_lock, flags); ++ ++} ++ ++void ni_release_cdo_mite_channel(struct a4l_device *dev) ++{ ++ unsigned long flags; ++ ++ rtdm_lock_get_irqsave(&devpriv->mite_channel_lock, flags); ++ if (devpriv->cdo_mite_chan) { ++ ni_set_cdo_dma_channel(dev, -1); ++ a4l_mite_release_channel(devpriv->cdo_mite_chan); ++ devpriv->cdo_mite_chan = NULL; ++ } ++ rtdm_lock_put_irqrestore(&devpriv->mite_channel_lock, flags); ++ ++} ++ ++void ni_sync_ai_dma(struct a4l_subdevice *subd) ++{ ++ struct a4l_device *dev = subd->dev; ++ unsigned long flags; ++ ++ rtdm_lock_get_irqsave(&devpriv->mite_channel_lock, flags); ++ if (devpriv->ai_mite_chan) ++ a4l_mite_sync_input_dma(devpriv->ai_mite_chan, subd); ++ rtdm_lock_put_irqrestore(&devpriv->mite_channel_lock, flags); ++} ++ ++void mite_handle_b_linkc(struct a4l_subdevice *subd) ++{ ++ struct a4l_device *dev = subd->dev; ++ unsigned long flags; ++ ++ rtdm_lock_get_irqsave(&devpriv->mite_channel_lock, flags); ++ if (devpriv->ao_mite_chan) ++ a4l_mite_sync_output_dma(devpriv->ao_mite_chan, subd); ++ rtdm_lock_put_irqrestore(&devpriv->mite_channel_lock, flags); ++} ++ ++static int ni_ao_wait_for_dma_load(struct a4l_subdevice *subd) ++{ ++ static const int timeout = 10000; ++ ++ struct a4l_device *dev = subd->dev; ++ struct a4l_buffer *buf = subd->buf; ++ ++ int i; ++ ++ for (i = 0; i < timeout; i++) { ++ ++ int buffer_filled; ++ unsigned short b_status; ++ ++ b_status = devpriv->stc_readw(dev, AO_Status_1_Register); ++ ++ buffer_filled = test_bit(A4L_BUF_EOA_NR, &buf->flags); ++ buffer_filled |= (b_status & AO_FIFO_Half_Full_St); ++ ++ if (buffer_filled) ++ break; ++ ++ /* If we poll too often, the pci bus activity seems ++ to slow the dma transfer down */ ++ a4l_udelay(10); ++ } ++ ++ if (i == timeout) { ++ a4l_err(dev, ++ "ni_ao_wait_for_dma_load: " ++ "timed out waiting for dma load"); ++ return -EPIPE; ++ } ++ ++ return 0; ++} ++ ++ ++#else /* !CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE */ ++ ++static inline int ni_ai_drain_dma(struct a4l_subdevice *subd) ++{ ++ return -ENOTSUPP; ++} ++ ++static inline int ni_request_ai_mite_channel(struct a4l_device * dev) ++{ ++ return -ENOTSUPP; ++} ++ ++static inline int ni_request_ao_mite_channel(struct a4l_device * dev) ++{ ++ return -ENOTSUPP; ++} ++ ++static inline ++int ni_request_gpct_mite_channel(struct a4l_device * dev, ++ unsigned gpct_index, int direction) ++{ ++ return -ENOTSUPP; ++} ++ ++static inline int ni_request_cdo_mite_channel(struct a4l_device *dev) ++{ ++ return -ENOTSUPP; ++} ++ ++#define ni_release_ai_mite_channel(x) do { } while (0) ++#define ni_release_ao_mite_channel(x) do { } while (0) ++#define ni_release_gpct_mite_channel(x) do { } while (0) ++#define ni_release_cdo_mite_channel(x) do { } while (0) ++#define ni_sync_ai_dma(x) do { } while (0) ++#define mite_handle_b_linkc(x) do { } while (0) ++ ++static inline int ni_ao_wait_for_dma_load(struct a4l_subdevice *subd) ++{ ++ return -ENOTSUPP; ++} ++ ++#endif /* CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE */ ++ ++/* E-series boards use the second irq signals to generate dma requests ++ for their counters */ ++void ni_e_series_enable_second_irq(struct a4l_device *dev, ++ unsigned gpct_index, short enable) ++{ ++ if (boardtype.reg_type & ni_reg_m_series_mask) ++ return; ++ switch (gpct_index) { ++ case 0: ++ if (enable) { ++ devpriv->stc_writew(dev, G0_Gate_Second_Irq_Enable, ++ Second_IRQ_A_Enable_Register); ++ } else { ++ devpriv->stc_writew(dev, 0, ++ Second_IRQ_A_Enable_Register); ++ } ++ break; ++ case 1: ++ if (enable) { ++ devpriv->stc_writew(dev, G1_Gate_Second_Irq_Enable, ++ Second_IRQ_B_Enable_Register); ++ } else { ++ devpriv->stc_writew(dev, 0, ++ Second_IRQ_B_Enable_Register); ++ } ++ break; ++ default: ++ BUG(); ++ break; ++ } ++} ++ ++void ni_clear_ai_fifo(struct a4l_device *dev) ++{ ++ if (boardtype.reg_type == ni_reg_6143) { ++ /* Flush the 6143 data FIFO */ ++ ni_writel(0x10, AIFIFO_Control_6143); /* Flush fifo */ ++ ni_writel(0x00, AIFIFO_Control_6143); /* Flush fifo */ ++ while (ni_readl(AIFIFO_Status_6143) & 0x10); /* Wait for complete */ ++ } else { ++ devpriv->stc_writew(dev, 1, ADC_FIFO_Clear); ++ if (boardtype.reg_type == ni_reg_625x) { ++ ni_writeb(0, M_Offset_Static_AI_Control(0)); ++ ni_writeb(1, M_Offset_Static_AI_Control(0)); ++ } ++ } ++} ++ ++#define ao_win_out(data, addr) ni_ao_win_outw(dev, data, addr) ++static inline void ni_ao_win_outw(struct a4l_device *dev, uint16_t data, int addr) ++{ ++ unsigned long flags; ++ ++ rtdm_lock_get_irqsave(&devpriv->window_lock, flags); ++ ni_writew(addr, AO_Window_Address_611x); ++ ni_writew(data, AO_Window_Data_611x); ++ rtdm_lock_put_irqrestore(&devpriv->window_lock, flags); ++} ++ ++static inline void ni_ao_win_outl(struct a4l_device *dev, uint32_t data, int addr) ++{ ++ unsigned long flags; ++ ++ rtdm_lock_get_irqsave(&devpriv->window_lock, flags); ++ ni_writew(addr, AO_Window_Address_611x); ++ ni_writel(data, AO_Window_Data_611x); ++ rtdm_lock_put_irqrestore(&devpriv->window_lock, flags); ++} ++ ++static inline unsigned short ni_ao_win_inw(struct a4l_device *dev, int addr) ++{ ++ unsigned long flags; ++ unsigned short data; ++ ++ rtdm_lock_get_irqsave(&devpriv->window_lock, flags); ++ ni_writew(addr, AO_Window_Address_611x); ++ data = ni_readw(AO_Window_Data_611x); ++ rtdm_lock_put_irqrestore(&devpriv->window_lock, flags); ++ return data; ++} ++ ++/* ++ * ni_set_bits( ) allows different parts of the ni_mio_common driver ++ * to share registers (such as Interrupt_A_Register) without interfering ++ * with each other. ++ * ++ * NOTE: the switch/case statements are optimized out for a constant ++ * argument so this is actually quite fast--- If you must wrap another ++ * function around this make it inline to avoid a large speed penalty. ++ * ++ * value should only be 1 or 0. ++ */ ++ ++static inline void ni_set_bits(struct a4l_device *dev, ++ int reg, unsigned bits, unsigned value) ++{ ++ unsigned bit_values; ++ ++ if (value) ++ bit_values = bits; ++ else ++ bit_values = 0; ++ ++ ni_set_bitfield(dev, reg, bits, bit_values); ++} ++ ++static void shutdown_ai_command(struct a4l_subdevice *subd) ++{ ++ ni_ai_drain_dma(subd); ++ ni_handle_fifo_dregs(subd); ++ get_last_sample_611x(subd); ++ get_last_sample_6143(subd); ++ ++ /* TODO: stop the acquisiton */ ++} ++ ++static void ni_handle_eos(struct a4l_subdevice *subd) ++{ ++ struct a4l_device *dev = subd->dev; ++ ++ if (devpriv->aimode == AIMODE_SCAN) { ++ static const int timeout = 10; ++ int i; ++ ++ for (i = 0; i < timeout; i++) { ++ ni_sync_ai_dma(subd); ++ /* TODO: stop when the transfer is really over */ ++ a4l_udelay(1); ++ } ++ } ++ ++ /* Handle special case of single scan using AI_End_On_End_Of_Scan */ ++ if ((devpriv->ai_cmd2 & AI_End_On_End_Of_Scan)) { ++ shutdown_ai_command(subd); ++ } ++} ++ ++static void ni_event(struct a4l_subdevice * subd) ++{ ++ /* Temporary hack */ ++ struct a4l_buffer *buf = subd->buf; ++ ++ if(test_bit(A4L_BUF_ERROR_NR, &buf->flags)) { ++ if (subd->cancel != NULL) ++ subd->cancel(subd); ++ } ++ ++ a4l_buf_evt(subd, 0); ++ ++} ++ ++static void handle_gpct_interrupt(struct a4l_device *dev, unsigned short counter_index) ++{ ++#if (defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE) || \ ++ defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE_MODULE)) ++ struct ni_gpct *counter = devpriv->counter_dev->counters[counter_index]; ++ a4l_ni_tio_handle_interrupt(counter, dev); ++#endif /* CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE */ ++} ++ ++#ifdef CONFIG_DEBUG_MIO_COMMON ++static const char *const status_a_strings[] = { ++ "passthru0", "fifo", "G0_gate", "G0_TC", ++ "stop", "start", "sc_tc", "start1", ++ "start2", "sc_tc_error", "overflow", "overrun", ++ "fifo_empty", "fifo_half_full", "fifo_full", "interrupt_a" ++}; ++ ++static void ni_mio_print_status_a(int status) ++{ ++ int i; ++ ++ __a4l_info("A status:"); ++ for (i = 15; i >= 0; i--) { ++ if (status & (1 << i)) { ++ __a4l_info(" %s", status_a_strings[i]); ++ } ++ } ++ __a4l_info("\n"); ++} ++ ++static const char *const status_b_strings[] = { ++ "passthru1", "fifo", "G1_gate", "G1_TC", ++ "UI2_TC", "UPDATE", "UC_TC", "BC_TC", ++ "start1", "overrun", "start", "bc_tc_error", ++ "fifo_empty", "fifo_half_full", "fifo_full", "interrupt_b" ++}; ++ ++static void ni_mio_print_status_b(int status) ++{ ++ int i; ++ ++ __a4l_info("B status:"); ++ for (i = 15; i >= 0; i--) { ++ if (status & (1 << i)) { ++ __a4l_info(" %s", status_b_strings[i]); ++ } ++ } ++ __a4l_info("\n"); ++} ++ ++#else /* !CONFIG_DEBUG_MIO_COMMON */ ++ ++#define ni_mio_print_status_a(x) ++#define ni_mio_print_status_b(x) ++ ++#endif /* CONFIG_DEBUG_MIO_COMMON */ ++ ++static void ack_a_interrupt(struct a4l_device *dev, unsigned short a_status) ++{ ++ unsigned short ack = 0; ++ ++ if (a_status & AI_SC_TC_St) { ++ ack |= AI_SC_TC_Interrupt_Ack; ++ } ++ if (a_status & AI_START1_St) { ++ ack |= AI_START1_Interrupt_Ack; ++ } ++ if (a_status & AI_START_St) { ++ ack |= AI_START_Interrupt_Ack; ++ } ++ if (a_status & AI_STOP_St) { ++ /* not sure why we used to ack the START here also, ++ instead of doing it independently. Frank Hess ++ 2007-07-06 */ ++ ack |= AI_STOP_Interrupt_Ack; ++ } ++ if (ack) ++ devpriv->stc_writew(dev, ack, Interrupt_A_Ack_Register); ++} ++ ++static void handle_a_interrupt(struct a4l_device *dev, ++ unsigned short status,unsigned int ai_mite_status) ++{ ++ ++ struct a4l_subdevice *subd = a4l_get_subd(dev, NI_AI_SUBDEV); ++ ++ /* 67xx boards don't have ai subdevice, but their gpct0 ++ might generate an a interrupt. */ ++ ++ if((subd->flags & A4L_SUBD_TYPES) == A4L_SUBD_UNUSED) ++ return; ++ ++ a4l_dbg(1, drv_dbg, dev, "ni_mio_common: interrupt: " ++ "a_status=%04x ai_mite_status=%08x\n",status, ai_mite_status); ++ ni_mio_print_status_a(status); ++ ++#if (defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE) || \ ++ defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE_MODULE)) ++ if (ai_mite_status & CHSR_LINKC) ++ ni_sync_ai_dma(subd); ++ ++ if (ai_mite_status & ~(CHSR_INT | CHSR_LINKC | CHSR_DONE | CHSR_MRDY | ++ CHSR_DRDY | CHSR_DRQ1 | CHSR_DRQ0 | CHSR_ERROR | ++ CHSR_SABORT | CHSR_XFERR | CHSR_LxERR_mask)) { ++ a4l_dbg(1, drv_dbg, dev, "ni_mio_common: interrupt: " ++ "unknown mite interrupt, ack! (ai_mite_status=%08x)\n", ++ ai_mite_status); ++ a4l_buf_evt(subd, A4L_BUF_ERROR); ++ } ++#endif /* CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE */ ++ ++ /* Test for all uncommon interrupt events at the same time */ ++ if (status & (AI_Overrun_St | AI_Overflow_St | AI_SC_TC_Error_St | ++ AI_SC_TC_St | AI_START1_St)) { ++ if (status == 0xffff) { ++ a4l_dbg(1, drv_dbg, dev, "ni_mio_common: interrupt: " ++ "a_status=0xffff. Card removed?\n"); ++ /* TODO: we probably aren't even running a command now, ++ so it's a good idea to be careful. ++ we should check the transfer status */ ++ a4l_buf_evt(subd, A4L_BUF_ERROR); ++ ni_event(subd); ++ return; ++ } ++ if (status & (AI_Overrun_St | AI_Overflow_St | ++ AI_SC_TC_Error_St)) { ++ a4l_dbg(1, drv_dbg, dev, "ni_mio_common: interrupt: " ++ "ai error a_status=%04x\n", status); ++ ni_mio_print_status_a(status); ++ ++ shutdown_ai_command(subd); ++ ++ a4l_buf_evt(subd, A4L_BUF_ERROR); ++ ni_event(subd); ++ ++ return; ++ } ++ if (status & AI_SC_TC_St) { ++ a4l_dbg(1, drv_dbg, dev, "ni_mio_common: SC_TC interrupt\n"); ++ if (!devpriv->ai_continuous) { ++ shutdown_ai_command(subd); ++ } ++ } ++ } ++ ++#if (!defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE) && \ ++ !defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE_MODULE)) ++ ++ if (status & AI_FIFO_Half_Full_St) { ++ int i; ++ static const int timeout = 10; ++ /* PCMCIA cards (at least 6036) seem to stop producing ++ interrupts if we fail to get the fifo less than half ++ full, so loop to be sure. */ ++ for (i = 0; i < timeout; ++i) { ++ ni_handle_fifo_half_full(subd); ++ if ((devpriv->stc_readw(dev, AI_Status_1_Register) & ++ AI_FIFO_Half_Full_St) == 0) ++ break; ++ } ++ } ++#endif /* !CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE */ ++ ++ if ((status & AI_STOP_St)) { ++ ni_handle_eos(subd); ++ } ++ ++ ni_event(subd); ++ ++ status = devpriv->stc_readw(dev, AI_Status_1_Register); ++ if (status & Interrupt_A_St) ++ a4l_dbg(1, drv_dbg, dev, "ni_mio_common: interrupt: " ++ " didn't clear interrupt? status=0x%x\n", status); ++} ++ ++static void ack_b_interrupt(struct a4l_device *dev, unsigned short b_status) ++{ ++ unsigned short ack = 0; ++ if (b_status & AO_BC_TC_St) { ++ ack |= AO_BC_TC_Interrupt_Ack; ++ } ++ if (b_status & AO_Overrun_St) { ++ ack |= AO_Error_Interrupt_Ack; ++ } ++ if (b_status & AO_START_St) { ++ ack |= AO_START_Interrupt_Ack; ++ } ++ if (b_status & AO_START1_St) { ++ ack |= AO_START1_Interrupt_Ack; ++ } ++ if (b_status & AO_UC_TC_St) { ++ ack |= AO_UC_TC_Interrupt_Ack; ++ } ++ if (b_status & AO_UI2_TC_St) { ++ ack |= AO_UI2_TC_Interrupt_Ack; ++ } ++ if (b_status & AO_UPDATE_St) { ++ ack |= AO_UPDATE_Interrupt_Ack; ++ } ++ if (ack) ++ devpriv->stc_writew(dev, ack, Interrupt_B_Ack_Register); ++} ++ ++static void handle_b_interrupt(struct a4l_device * dev, ++ unsigned short b_status, unsigned int ao_mite_status) ++{ ++ ++ struct a4l_subdevice *subd = a4l_get_subd(dev, NI_AO_SUBDEV); ++ ++ a4l_dbg(1, drv_dbg, dev, ++ "ni_mio_common: interrupt: b_status=%04x m1_status=%08x\n", ++ b_status, ao_mite_status); ++ ++ ni_mio_print_status_b(b_status); ++ ++ if (b_status == 0xffff) ++ return; ++ ++ if (b_status & AO_Overrun_St) { ++ a4l_err(dev, ++ "ni_mio_common: interrupt: " ++ "AO FIFO underrun status=0x%04x status2=0x%04x\n", ++ b_status, ++ devpriv->stc_readw(dev, AO_Status_2_Register)); ++ a4l_buf_evt(subd, A4L_BUF_ERROR); ++ } ++ ++ if (b_status & AO_BC_TC_St) { ++ a4l_dbg(1, drv_dbg, dev, ++ "ni_mio_common: interrupt: " ++ "AO BC_TC status=0x%04x status2=0x%04x\n", ++ b_status, devpriv->stc_readw(dev, AO_Status_2_Register)); ++ a4l_buf_evt(subd, A4L_BUF_EOA); ++ } ++ ++#if (defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE) || \ ++ defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE_MODULE)) ++ ++ if (ao_mite_status & CHSR_STOPS) { ++ a4l_dbg(1, drv_dbg, dev, ++ "ni_mio_common: interrupt: MITE transfer stopped\n"); ++ } else if (ao_mite_status & CHSR_LINKC) { ++ /* Currently, mite.c requires us to handle LINKC */ ++ mite_handle_b_linkc(subd); ++ } ++ ++ if (ao_mite_status & ++ ~(CHSR_INT | CHSR_LINKC | CHSR_DONE | CHSR_MRDY | ++ CHSR_DRDY | CHSR_DRQ1 | CHSR_DRQ0 | CHSR_ERROR | ++ CHSR_SABORT | CHSR_STOPS | CHSR_XFERR | CHSR_LxERR_mask)) { ++ a4l_err(dev, ++ "unknown mite interrupt, ack! (ao_mite_status=%08x)\n", ++ ao_mite_status); ++ a4l_buf_evt(subd, A4L_BUF_ERROR); ++ } ++#endif /* CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE */ ++ ++#if (!defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE) && \ ++ !defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE_MODULE)) ++ ++ if (b_status & AO_FIFO_Request_St) { ++ int ret; ++ ++ ret = ni_ao_fifo_half_empty(subd); ++ if (!ret) { ++ a4l_err(dev, ++ "ni_mio_common: " ++ "interrupt: AO buffer underrun\n"); ++ ni_set_bits(dev, Interrupt_B_Enable_Register, ++ AO_FIFO_Interrupt_Enable | ++ AO_Error_Interrupt_Enable, 0); ++ a4l_buf_evt(subd, A4L_BUF_ERROR); ++ } ++ } ++#endif /* CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE */ ++ ++ ni_event(subd); ++} ++ ++int a4l_ni_E_interrupt(unsigned int irq, void *d) ++{ ++ struct a4l_device *dev = d; ++ unsigned short a_status; ++ unsigned short b_status; ++ unsigned int ai_mite_status = 0; ++ unsigned int ao_mite_status = 0; ++ unsigned long flags; ++ struct mite_struct *mite = devpriv->mite; ++ ++ /* Make sure dev->attached is checked before handler does ++ anything else. */ ++ smp_mb(); ++ ++ /* lock to avoid race with a4l_poll */ ++ rtdm_lock_get_irqsave(&dev->lock, flags); ++ a_status = devpriv->stc_readw(dev, AI_Status_1_Register); ++ b_status = devpriv->stc_readw(dev, AO_Status_1_Register); ++ if (mite) { ++#if (defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE) || \ ++ defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE_MODULE)) ++ rtdm_lock_get(&devpriv->mite_channel_lock); ++ if (devpriv->ai_mite_chan) { ++ ai_mite_status = a4l_mite_get_status(devpriv->ai_mite_chan); ++ if (ai_mite_status & CHSR_LINKC) ++ writel(CHOR_CLRLC, ++ devpriv->mite->mite_io_addr + ++ MITE_CHOR(devpriv->ai_mite_chan->channel)); ++ } ++ if (devpriv->ao_mite_chan) { ++ ao_mite_status = a4l_mite_get_status(devpriv->ao_mite_chan); ++ if (ao_mite_status & CHSR_LINKC) ++ writel(CHOR_CLRLC, ++ mite->mite_io_addr + ++ MITE_CHOR(devpriv->ao_mite_chan->channel)); ++ } ++ rtdm_lock_put(&devpriv->mite_channel_lock); ++#endif /* CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE */ ++ } ++ ack_a_interrupt(dev, a_status); ++ ack_b_interrupt(dev, b_status); ++ if ((a_status & Interrupt_A_St) || (ai_mite_status & CHSR_INT)) ++ handle_a_interrupt(dev, a_status, ai_mite_status); ++ if ((b_status & Interrupt_B_St) || (ao_mite_status & CHSR_INT)) ++ handle_b_interrupt(dev, b_status, ao_mite_status); ++ handle_gpct_interrupt(dev, 0); ++ handle_gpct_interrupt(dev, 1); ++ handle_cdio_interrupt(dev); ++ ++ rtdm_lock_put_irqrestore(&dev->lock, flags); ++ return 0; ++} ++ ++#if (!defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE) && \ ++ !defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE_MODULE)) ++ ++static void ni_ao_fifo_load(struct a4l_subdevice *subd, int n) ++{ ++ struct a4l_device *dev = subd->dev; ++ sampl_t d; ++ u32 packed_data; ++ int i, err = 1; ++ ++ for (i = 0; i < n; i++) { ++ err = a4l_buf_get(subd, &d, sizeof(sampl_t)); ++ if (err != 0) ++ break; ++ ++ if (boardtype.reg_type & ni_reg_6xxx_mask) { ++ packed_data = d & 0xffff; ++ /* 6711 only has 16 bit wide ao fifo */ ++ if (boardtype.reg_type != ni_reg_6711) { ++ err = a4l_buf_get(subd, &d, sizeof(sampl_t)); ++ if (err != 0) ++ break; ++ i++; ++ packed_data |= (d << 16) & 0xffff0000; ++ } ++ ni_writel(packed_data, DAC_FIFO_Data_611x); ++ } else { ++ ni_writew(d, DAC_FIFO_Data); ++ } ++ } ++ if (err != 0) { ++ a4l_buf_evt(subd, A4L_BUF_ERROR); ++ } ++} ++ ++/* ++ * There's a small problem if the FIFO gets really low and we ++ * don't have the data to fill it. Basically, if after we fill ++ * the FIFO with all the data available, the FIFO is _still_ ++ * less than half full, we never clear the interrupt. If the ++ * IRQ is in edge mode, we never get another interrupt, because ++ * this one wasn't cleared. If in level mode, we get flooded ++ * with interrupts that we can't fulfill, because nothing ever ++ * gets put into the buffer. ++ * ++ * This kind of situation is recoverable, but it is easier to ++ * just pretend we had a FIFO underrun, since there is a good ++ * chance it will happen anyway. This is _not_ the case for ++ * RT code, as RT code might purposely be running close to the ++ * metal. Needs to be fixed eventually. ++ */ ++static int ni_ao_fifo_half_empty(struct a4l_subdevice *subd) ++{ ++ struct a4l_device *dev = subd->dev; ++ int n; ++ ++ n = a4l_buf_count(subd); ++ if (n == 0) { ++ a4l_buf_evt(subd, A4L_BUF_ERROR); ++ return 0; ++ } ++ ++ n /= sizeof(sampl_t); ++ if (n > boardtype.ao_fifo_depth / 2) ++ n = boardtype.ao_fifo_depth / 2; ++ ++ ni_ao_fifo_load(subd, n); ++ ++ return 1; ++} ++ ++static int ni_ao_prep_fifo(struct a4l_subdevice *subd) ++{ ++ struct a4l_device *dev = subd->dev; ++ int n; ++ ++ /* Reset fifo */ ++ devpriv->stc_writew(dev, 1, DAC_FIFO_Clear); ++ if (boardtype.reg_type & ni_reg_6xxx_mask) ++ ni_ao_win_outl(dev, 0x6, AO_FIFO_Offset_Load_611x); ++ ++ /* Load some data */ ++ n = a4l_buf_count(subd); ++ if (n == 0) ++ return 0; ++ ++ n /= sizeof(sampl_t); ++ if (n > boardtype.ao_fifo_depth) ++ n = boardtype.ao_fifo_depth; ++ ++ ni_ao_fifo_load(subd, n); ++ ++ return n; ++} ++ ++static void ni_ai_fifo_read(struct a4l_subdevice *subd, int n) ++{ ++ struct a4l_device *dev = subd->dev; ++ int i; ++ ++ if (boardtype.reg_type == ni_reg_611x) { ++ sampl_t data[2]; ++ u32 dl; ++ ++ for (i = 0; i < n / 2; i++) { ++ dl = ni_readl(ADC_FIFO_Data_611x); ++ /* This may get the hi/lo data in the wrong order */ ++ data[0] = (dl >> 16) & 0xffff; ++ data[1] = dl & 0xffff; ++ a4l_buf_put(subd, data, sizeof(sampl_t) * 2); ++ } ++ /* Check if there's a single sample stuck in the FIFO */ ++ if (n % 2) { ++ dl = ni_readl(ADC_FIFO_Data_611x); ++ data[0] = dl & 0xffff; ++ a4l_buf_put(subd, &data[0], sizeof(sampl_t)); ++ } ++ } else if (boardtype.reg_type == ni_reg_6143) { ++ sampl_t data[2]; ++ u32 dl; ++ ++ /* This just reads the FIFO assuming the data is ++ present, no checks on the FIFO status are performed */ ++ for (i = 0; i < n / 2; i++) { ++ dl = ni_readl(AIFIFO_Data_6143); ++ ++ data[0] = (dl >> 16) & 0xffff; ++ data[1] = dl & 0xffff; ++ a4l_buf_put(subd, data, sizeof(sampl_t) * 2); ++ } ++ if (n % 2) { ++ /* Assume there is a single sample stuck in the FIFO. ++ Get stranded sample into FIFO */ ++ ni_writel(0x01, AIFIFO_Control_6143); ++ dl = ni_readl(AIFIFO_Data_6143); ++ data[0] = (dl >> 16) & 0xffff; ++ a4l_buf_put(subd, &data[0], sizeof(sampl_t)); ++ } ++ } else { ++ if (n > sizeof(devpriv->ai_fifo_buffer) / ++ sizeof(devpriv->ai_fifo_buffer[0])) { ++ a4l_err(dev, ++ "ni_ai_fifo_read: " ++ "bug! ai_fifo_buffer too small"); ++ a4l_buf_evt(subd, A4L_BUF_ERROR); ++ return; ++ } ++ for (i = 0; i < n; i++) { ++ devpriv->ai_fifo_buffer[i] = ++ ni_readw(ADC_FIFO_Data_Register); ++ } ++ a4l_buf_put(subd, ++ devpriv->ai_fifo_buffer, ++ n * sizeof(devpriv->ai_fifo_buffer[0])); ++ } ++} ++ ++static void ni_handle_fifo_half_full(struct a4l_subdevice *subd) ++{ ++ struct a4l_device *dev = subd->dev; ++ ni_ai_fifo_read(subd, boardtype.ai_fifo_depth / 2); ++} ++ ++#endif /* !CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE */ ++ ++#if (defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE) || \ ++ defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE_MODULE)) ++ ++static int ni_ai_drain_dma(struct a4l_subdevice *subd) ++{ ++ int i; ++ static const int timeout = 10000; ++ unsigned long flags; ++ int retval = 0; ++ struct a4l_device *dev = subd->dev; ++ ++ rtdm_lock_get_irqsave(&devpriv->mite_channel_lock, flags); ++ if (devpriv->ai_mite_chan) { ++ for (i = 0; i < timeout; i++) { ++ if ((devpriv->stc_readw(dev, ++ AI_Status_1_Register) & ++ AI_FIFO_Empty_St) ++ && a4l_mite_bytes_in_transit(devpriv-> ++ ai_mite_chan) == 0) ++ break; ++ a4l_udelay(5); ++ } ++ if (i == timeout) { ++ a4l_info(dev, "wait for dma drain timed out\n"); ++ ++ a4l_info(dev, "a4l_mite_bytes_in_transit=%i, " ++ "AI_Status1_Register=0x%x\n", ++ a4l_mite_bytes_in_transit(devpriv->ai_mite_chan), ++ devpriv->stc_readw(dev, AI_Status_1_Register)); ++ retval = -1; ++ } ++ } ++ rtdm_lock_put_irqrestore(&devpriv->mite_channel_lock, flags); ++ ++ ni_sync_ai_dma(subd); ++ ++ return retval; ++} ++ ++#endif /* CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE */ ++ ++/* Empties the AI fifo */ ++static void ni_handle_fifo_dregs(struct a4l_subdevice *subd) ++{ ++ sampl_t data[2]; ++ u32 dl; ++ short fifo_empty; ++ int i; ++ struct a4l_device *dev = subd->dev; ++ ++ if (boardtype.reg_type == ni_reg_611x) { ++ while ((devpriv->stc_readw(dev, ++ AI_Status_1_Register) & ++ AI_FIFO_Empty_St) == 0) { ++ dl = ni_readl(ADC_FIFO_Data_611x); ++ ++ /* This may get the hi/lo data in the wrong order */ ++ data[0] = (dl >> 16); ++ data[1] = (dl & 0xffff); ++ a4l_buf_put(subd, data, sizeof(sampl_t) * 2); ++ } ++ } else if (boardtype.reg_type == ni_reg_6143) { ++ i = 0; ++ while (ni_readl(AIFIFO_Status_6143) & 0x04) { ++ dl = ni_readl(AIFIFO_Data_6143); ++ ++ /* This may get the hi/lo data in the wrong order */ ++ data[0] = (dl >> 16); ++ data[1] = (dl & 0xffff); ++ a4l_buf_put(subd, data, sizeof(sampl_t) * 2); ++ i += 2; ++ } ++ // Check if stranded sample is present ++ if (ni_readl(AIFIFO_Status_6143) & 0x01) { ++ ni_writel(0x01, AIFIFO_Control_6143); // Get stranded sample into FIFO ++ dl = ni_readl(AIFIFO_Data_6143); ++ data[0] = (dl >> 16) & 0xffff; ++ a4l_buf_put(subd, &data[0], sizeof(sampl_t)); ++ } ++ ++ } else { ++ fifo_empty = ++ devpriv->stc_readw(dev, ++ AI_Status_1_Register) & AI_FIFO_Empty_St; ++ while (fifo_empty == 0) { ++ for (i = 0; ++ i < ++ sizeof(devpriv->ai_fifo_buffer) / ++ sizeof(devpriv->ai_fifo_buffer[0]); i++) { ++ fifo_empty = ++ devpriv->stc_readw(dev, ++ AI_Status_1_Register) & ++ AI_FIFO_Empty_St; ++ if (fifo_empty) ++ break; ++ devpriv->ai_fifo_buffer[i] = ++ ni_readw(ADC_FIFO_Data_Register); ++ } ++ a4l_buf_put(subd, ++ devpriv->ai_fifo_buffer, ++ i * sizeof(devpriv->ai_fifo_buffer[0])); ++ } ++ } ++} ++ ++static void get_last_sample_611x(struct a4l_subdevice *subd) ++{ ++ sampl_t data; ++ u32 dl; ++ struct a4l_device *dev = subd->dev; ++ ++ if (boardtype.reg_type != ni_reg_611x) ++ return; ++ ++ /* Check if there's a single sample stuck in the FIFO */ ++ if (ni_readb(XXX_Status) & 0x80) { ++ dl = ni_readl(ADC_FIFO_Data_611x); ++ data = (dl & 0xffff); ++ a4l_buf_put(subd, &data, sizeof(sampl_t)); ++ } ++} ++ ++static void get_last_sample_6143(struct a4l_subdevice *subd) ++{ ++ sampl_t data; ++ u32 dl; ++ struct a4l_device *dev = subd->dev; ++ ++ if (boardtype.reg_type != ni_reg_6143) ++ return; ++ ++ /* Check if there's a single sample stuck in the FIFO */ ++ if (ni_readl(AIFIFO_Status_6143) & 0x01) { ++ /* Get stranded sample into FIFO */ ++ ni_writel(0x01, AIFIFO_Control_6143); ++ dl = ni_readl(AIFIFO_Data_6143); ++ ++ /* This may get the hi/lo data in the wrong order */ ++ data = (dl >> 16) & 0xffff; ++ a4l_buf_put(subd, &data, sizeof(sampl_t)); ++ } ++} ++ ++static void ni_ai_munge16(struct a4l_subdevice *subd, void *buf, unsigned long size) ++{ ++ struct a4l_device *dev = subd->dev; ++ struct a4l_cmd_desc *cmd = a4l_get_cmd(subd); ++ int chan_idx = a4l_get_chan(subd); ++ unsigned int i; ++ sampl_t *array = buf; ++ ++ for (i = 0; i < size / sizeof(sampl_t); i++) { ++#if (defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE) || \ ++ defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE_MODULE)) ++ array[i] = le16_to_cpu(array[i]); ++#endif /* CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE */ ++ array[i] += devpriv->ai_offset[chan_idx]; ++ chan_idx++; ++ chan_idx %= cmd->nb_chan; ++ } ++} ++ ++static void ni_ai_munge32(struct a4l_subdevice *subd, void *buf, unsigned long size) ++{ ++ struct a4l_device *dev = subd->dev; ++ struct a4l_cmd_desc *cmd = a4l_get_cmd(subd); ++ int chan_idx = a4l_get_chan(subd); ++ unsigned int i; ++ lsampl_t *larray = buf; ++ ++ for (i = 0; i < size / sizeof(lsampl_t); i++) { ++#if (defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE) || \ ++ defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE_MODULE)) ++ larray[i] = le32_to_cpu(larray[i]); ++#endif /* CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE */ ++ larray[i] += devpriv->ai_offset[chan_idx]; ++ chan_idx++; ++ chan_idx %= cmd->nb_chan; ++ } ++} ++ ++#if (defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE) || \ ++ defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE_MODULE)) ++ ++static int ni_ai_setup_MITE_dma(struct a4l_subdevice *subd) ++{ ++ struct a4l_device *dev = subd->dev; ++ unsigned long flags; ++ int err; ++ ++ err = ni_request_ai_mite_channel(dev); ++ if (err < 0) ++ return err; ++ ++ err = a4l_mite_buf_change(devpriv->ai_mite_chan->ring, subd); ++ if (err < 0) ++ return err; ++ ++ rtdm_lock_get_irqsave(&devpriv->mite_channel_lock, flags); ++ ++ switch (boardtype.reg_type) { ++ case ni_reg_611x: ++ case ni_reg_6143: ++ a4l_mite_prep_dma(devpriv->ai_mite_chan, 32, 16); ++ break; ++ case ni_reg_628x: ++ a4l_mite_prep_dma(devpriv->ai_mite_chan, 32, 32); ++ break; ++ default: ++ a4l_mite_prep_dma(devpriv->ai_mite_chan, 16, 16); ++ break; ++ }; ++ ++ /* start the MITE */ ++ a4l_mite_dma_arm(devpriv->ai_mite_chan); ++ ++ rtdm_lock_put_irqrestore(&devpriv->mite_channel_lock, flags); ++ ++ return 0; ++} ++ ++static int ni_ao_setup_MITE_dma(struct a4l_subdevice *subd) ++{ ++ struct a4l_device *dev = subd->dev; ++ unsigned long flags; ++ int err; ++ ++ err = ni_request_ao_mite_channel(dev); ++ if (err < 0) ++ return err; ++ ++ err = a4l_mite_buf_change(devpriv->ao_mite_chan->ring, subd); ++ if (err < 0) ++ return err; ++ ++ rtdm_lock_get_irqsave(&devpriv->mite_channel_lock, flags); ++ ++ if (devpriv->ao_mite_chan) { ++ ++ if (boardtype.reg_type & (ni_reg_611x | ni_reg_6713)) { ++ a4l_mite_prep_dma(devpriv->ao_mite_chan, 32, 32); ++ } else { ++ /* Doing 32 instead of 16 bit wide transfers ++ from memory makes the mite do 32 bit pci ++ transfers, doubling pci bandwidth. */ ++ a4l_mite_prep_dma(devpriv->ao_mite_chan, 16, 32); ++ } ++ a4l_mite_dma_arm(devpriv->ao_mite_chan); ++ } else ++ err = -EIO; ++ ++ rtdm_lock_put_irqrestore(&devpriv->mite_channel_lock, flags); ++ ++ return err; ++} ++ ++static int ni_cdo_setup_MITE_dma(struct a4l_subdevice *subd) ++{ ++ struct a4l_device *dev = subd->dev; ++ unsigned long flags; ++ int err; ++ ++ err = ni_request_cdo_mite_channel(dev); ++ if (err < 0) ++ return err; ++ ++ /* No need to get a lock to setup the ring buffer */ ++ err = a4l_mite_buf_change(devpriv->cdo_mite_chan->ring, subd); ++ if (err < 0) ++ return err; ++ ++ rtdm_lock_get_irqsave(&devpriv->mite_channel_lock, flags); ++ ++ /* This test should be useless but one never knows */ ++ if (devpriv->cdo_mite_chan) { ++ /* Configure the DMA transfer */ ++ a4l_mite_prep_dma(devpriv->cdo_mite_chan, 32, 32); ++ a4l_mite_dma_arm(devpriv->cdo_mite_chan); ++ } else ++ err = -EIO; ++ ++ rtdm_lock_put_irqrestore(&devpriv->mite_channel_lock, flags); ++ ++ return err; ++} ++ ++#endif /* CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE */ ++ ++static void ni_ai_reset(struct a4l_subdevice *subd) ++{ ++ struct a4l_device *dev = subd->dev; ++ ++ ni_release_ai_mite_channel(dev); ++ ++ /* ai configuration */ ++ devpriv->stc_writew(dev, AI_Configuration_Start | AI_Reset, ++ Joint_Reset_Register); ++ ++ ni_set_bits(dev, Interrupt_A_Enable_Register, ++ AI_SC_TC_Interrupt_Enable | AI_START1_Interrupt_Enable | ++ AI_START2_Interrupt_Enable | AI_START_Interrupt_Enable | ++ AI_STOP_Interrupt_Enable | AI_Error_Interrupt_Enable | ++ AI_FIFO_Interrupt_Enable, 0); ++ ++ ni_clear_ai_fifo(dev); ++ ++ if (boardtype.reg_type != ni_reg_6143) ++ ni_writeb(0, Misc_Command); ++ ++ devpriv->stc_writew(dev, AI_Disarm, AI_Command_1_Register); /* reset pulses */ ++ devpriv->stc_writew(dev, ++ AI_Start_Stop | AI_Mode_1_Reserved /*| AI_Trigger_Once */ , ++ AI_Mode_1_Register); ++ devpriv->stc_writew(dev, 0x0000, AI_Mode_2_Register); ++ /* generate FIFO interrupts on non-empty */ ++ devpriv->stc_writew(dev, (0 << 6) | 0x0000, AI_Mode_3_Register); ++ if (boardtype.reg_type == ni_reg_611x) { ++ devpriv->stc_writew(dev, AI_SHIFTIN_Pulse_Width | ++ AI_SOC_Polarity | ++ AI_LOCALMUX_CLK_Pulse_Width, AI_Personal_Register); ++ devpriv->stc_writew(dev, AI_SCAN_IN_PROG_Output_Select(3) | ++ AI_EXTMUX_CLK_Output_Select(0) | ++ AI_LOCALMUX_CLK_Output_Select(2) | ++ AI_SC_TC_Output_Select(3) | ++ AI_CONVERT_Output_Select(AI_CONVERT_Output_Enable_High), ++ AI_Output_Control_Register); ++ } else if (boardtype.reg_type == ni_reg_6143) { ++ devpriv->stc_writew(dev, AI_SHIFTIN_Pulse_Width | ++ AI_SOC_Polarity | ++ AI_LOCALMUX_CLK_Pulse_Width, AI_Personal_Register); ++ devpriv->stc_writew(dev, AI_SCAN_IN_PROG_Output_Select(3) | ++ AI_EXTMUX_CLK_Output_Select(0) | ++ AI_LOCALMUX_CLK_Output_Select(2) | ++ AI_SC_TC_Output_Select(3) | ++ AI_CONVERT_Output_Select(AI_CONVERT_Output_Enable_Low), ++ AI_Output_Control_Register); ++ } else { ++ unsigned int ai_output_control_bits; ++ devpriv->stc_writew(dev, AI_SHIFTIN_Pulse_Width | ++ AI_SOC_Polarity | ++ AI_CONVERT_Pulse_Width | ++ AI_LOCALMUX_CLK_Pulse_Width, AI_Personal_Register); ++ ai_output_control_bits = AI_SCAN_IN_PROG_Output_Select(3) | ++ AI_EXTMUX_CLK_Output_Select(0) | ++ AI_LOCALMUX_CLK_Output_Select(2) | ++ AI_SC_TC_Output_Select(3); ++ if (boardtype.reg_type == ni_reg_622x) ++ ai_output_control_bits |= ++ AI_CONVERT_Output_Select ++ (AI_CONVERT_Output_Enable_High); ++ else ++ ai_output_control_bits |= ++ AI_CONVERT_Output_Select ++ (AI_CONVERT_Output_Enable_Low); ++ devpriv->stc_writew(dev, ai_output_control_bits, ++ AI_Output_Control_Register); ++ } ++ ++ /* the following registers should not be changed, because there ++ * are no backup registers in devpriv. If you want to change ++ * any of these, add a backup register and other appropriate code: ++ * AI_Mode_1_Register ++ * AI_Mode_3_Register ++ * AI_Personal_Register ++ * AI_Output_Control_Register ++ */ ++ ++ /* clear interrupts */ ++ devpriv->stc_writew(dev, AI_SC_TC_Error_Confirm | AI_START_Interrupt_Ack | ++ AI_START2_Interrupt_Ack | AI_START1_Interrupt_Ack | ++ AI_SC_TC_Interrupt_Ack | AI_Error_Interrupt_Ack | ++ AI_STOP_Interrupt_Ack, Interrupt_A_Ack_Register); ++ ++ devpriv->stc_writew(dev, AI_Configuration_End, Joint_Reset_Register); ++} ++ ++static int ni_ai_insn_read(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn) ++{ ++ struct a4l_device *dev = subd->dev; ++ const unsigned int mask = (1 << boardtype.adbits) - 1; ++ int i, n; ++ unsigned int signbits; ++ unsigned short d; ++ unsigned long dl; ++ uint16_t *data = (uint16_t *)insn->data; ++ ++ ni_load_channelgain_list(dev, 1, &insn->chan_desc); ++ ++ ni_clear_ai_fifo(dev); ++ ++ signbits = devpriv->ai_offset[0]; ++ if (boardtype.reg_type == ni_reg_611x) { ++ for (n = 0; n < num_adc_stages_611x; n++) { ++ devpriv->stc_writew(dev, AI_CONVERT_Pulse, ++ AI_Command_1_Register); ++ a4l_udelay(1); ++ } ++ for (n = 0; n < insn->data_size / sizeof(uint16_t); n++) { ++ devpriv->stc_writew(dev, AI_CONVERT_Pulse, ++ AI_Command_1_Register); ++ /* The 611x has screwy 32-bit FIFOs. */ ++ d = 0; ++ for (i = 0; i < NI_TIMEOUT; i++) { ++ if (ni_readb(XXX_Status) & 0x80) { ++ d = (ni_readl(ADC_FIFO_Data_611x) >> 16) ++ & 0xffff; ++ break; ++ } ++ if (!(devpriv->stc_readw(dev, ++ AI_Status_1_Register) & ++ AI_FIFO_Empty_St)) { ++ d = ni_readl(ADC_FIFO_Data_611x) & ++ 0xffff; ++ break; ++ } ++ } ++ if (i == NI_TIMEOUT) { ++ a4l_warn(dev, ++ "ni_mio_common: " ++ "timeout in 611x ni_ai_insn_read\n"); ++ return -ETIME; ++ } ++ d += signbits; ++ data[n] = d; ++ } ++ } else if (boardtype.reg_type == ni_reg_6143) { ++ for (n = 0; n < insn->data_size / sizeof(uint16_t); n++) { ++ devpriv->stc_writew(dev, AI_CONVERT_Pulse, ++ AI_Command_1_Register); ++ ++ /* The 6143 has 32-bit FIFOs. ++ You need to strobe a bit to move a single ++ 16bit stranded sample into the FIFO */ ++ dl = 0; ++ for (i = 0; i < NI_TIMEOUT; i++) { ++ if (ni_readl(AIFIFO_Status_6143) & 0x01) { ++ ni_writel(0x01, AIFIFO_Control_6143); // Get stranded sample into FIFO ++ dl = ni_readl(AIFIFO_Data_6143); ++ break; ++ } ++ } ++ if (i == NI_TIMEOUT) { ++ a4l_warn(dev, ++ "ni_mio_common: " ++ "timeout in 6143 ni_ai_insn_read\n"); ++ return -ETIME; ++ } ++ data[n] = (((dl >> 16) & 0xFFFF) + signbits) & 0xFFFF; ++ } ++ } else { ++ for (n = 0; n < insn->data_size / sizeof(uint16_t); n++) { ++ devpriv->stc_writew(dev, AI_CONVERT_Pulse, ++ AI_Command_1_Register); ++ for (i = 0; i < NI_TIMEOUT; i++) { ++ if (!(devpriv->stc_readw(dev, ++ AI_Status_1_Register) & ++ AI_FIFO_Empty_St)) ++ break; ++ } ++ if (i == NI_TIMEOUT) { ++ a4l_warn(dev, ++ "ni_mio_common: " ++ "timeout in ni_ai_insn_read\n"); ++ return -ETIME; ++ } ++ if (boardtype.reg_type & ni_reg_m_series_mask) { ++ data[n] = ni_readl(M_Offset_AI_FIFO_Data) & mask; ++ } else { ++ d = ni_readw(ADC_FIFO_Data_Register); ++ /* subtle: needs to be short addition */ ++ d += signbits; ++ data[n] = d; ++ } ++ } ++ } ++ return 0; ++} ++ ++void ni_prime_channelgain_list(struct a4l_device *dev) ++{ ++ int i; ++ devpriv->stc_writew(dev, AI_CONVERT_Pulse, AI_Command_1_Register); ++ for (i = 0; i < NI_TIMEOUT; ++i) { ++ if (!(devpriv->stc_readw(dev, ++ AI_Status_1_Register) & ++ AI_FIFO_Empty_St)) { ++ devpriv->stc_writew(dev, 1, ADC_FIFO_Clear); ++ return; ++ } ++ a4l_udelay(1); ++ } ++ a4l_warn(dev, "ni_mio_common: timeout loading channel/gain list\n"); ++} ++ ++static void ni_m_series_load_channelgain_list(struct a4l_device *dev, ++ unsigned int n_chan, ++ unsigned int *list) ++{ ++ unsigned int chan, range, aref; ++ unsigned int i; ++ unsigned offset; ++ unsigned int dither; ++ unsigned range_code; ++ ++ devpriv->stc_writew(dev, 1, Configuration_Memory_Clear); ++ ++ if ((list[0] & CR_ALT_SOURCE)) { ++ unsigned bypass_bits; ++ chan = CR_CHAN(list[0]); ++ range = CR_RNG(list[0]); ++ range_code = ni_gainlkup[boardtype.gainlkup][range]; ++ dither = ((list[0] & CR_ALT_FILTER) != 0); ++ bypass_bits = MSeries_AI_Bypass_Config_FIFO_Bit; ++ bypass_bits |= chan; ++ bypass_bits |= ++ (devpriv-> ++ ai_calib_source) & (MSeries_AI_Bypass_Cal_Sel_Pos_Mask | ++ MSeries_AI_Bypass_Cal_Sel_Neg_Mask | ++ MSeries_AI_Bypass_Mode_Mux_Mask | ++ MSeries_AO_Bypass_AO_Cal_Sel_Mask); ++ bypass_bits |= MSeries_AI_Bypass_Gain_Bits(range_code); ++ if (dither) ++ bypass_bits |= MSeries_AI_Bypass_Dither_Bit; ++ // don't use 2's complement encoding ++ bypass_bits |= MSeries_AI_Bypass_Polarity_Bit; ++ ni_writel(bypass_bits, M_Offset_AI_Config_FIFO_Bypass); ++ } else { ++ ni_writel(0, M_Offset_AI_Config_FIFO_Bypass); ++ } ++ offset = 0; ++ for (i = 0; i < n_chan; i++) { ++ unsigned config_bits = 0; ++ chan = CR_CHAN(list[i]); ++ aref = CR_AREF(list[i]); ++ range = CR_RNG(list[i]); ++ dither = ((list[i] & CR_ALT_FILTER) != 0); ++ ++ range_code = ni_gainlkup[boardtype.gainlkup][range]; ++ devpriv->ai_offset[i] = offset; ++ switch (aref) { ++ case AREF_DIFF: ++ config_bits |= ++ MSeries_AI_Config_Channel_Type_Differential_Bits; ++ break; ++ case AREF_COMMON: ++ config_bits |= ++ MSeries_AI_Config_Channel_Type_Common_Ref_Bits; ++ break; ++ case AREF_GROUND: ++ config_bits |= ++ MSeries_AI_Config_Channel_Type_Ground_Ref_Bits; ++ break; ++ case AREF_OTHER: ++ break; ++ } ++ config_bits |= MSeries_AI_Config_Channel_Bits(chan); ++ config_bits |= ++ MSeries_AI_Config_Bank_Bits(boardtype.reg_type, chan); ++ config_bits |= MSeries_AI_Config_Gain_Bits(range_code); ++ if (i == n_chan - 1) ++ config_bits |= MSeries_AI_Config_Last_Channel_Bit; ++ if (dither) ++ config_bits |= MSeries_AI_Config_Dither_Bit; ++ // don't use 2's complement encoding ++ config_bits |= MSeries_AI_Config_Polarity_Bit; ++ ni_writew(config_bits, M_Offset_AI_Config_FIFO_Data); ++ } ++ ni_prime_channelgain_list(dev); ++} ++ ++/* ++ * Notes on the 6110 and 6111: ++ * These boards a slightly different than the rest of the series, since ++ * they have multiple A/D converters. ++ * From the driver side, the configuration memory is a ++ * little different. ++ * Configuration Memory Low: ++ * bits 15-9: same ++ * bit 8: unipolar/bipolar (should be 0 for bipolar) ++ * bits 0-3: gain. This is 4 bits instead of 3 for the other boards ++ * 1001 gain=0.1 (+/- 50) ++ * 1010 0.2 ++ * 1011 0.1 ++ * 0001 1 ++ * 0010 2 ++ * 0011 5 ++ * 0100 10 ++ * 0101 20 ++ * 0110 50 ++ * Configuration Memory High: ++ * bits 12-14: Channel Type ++ * 001 for differential ++ * 000 for calibration ++ * bit 11: coupling (this is not currently handled) ++ * 1 AC coupling ++ * 0 DC coupling ++ * bits 0-2: channel ++ * valid channels are 0-3 ++ */ ++static void ni_load_channelgain_list(struct a4l_device *dev, ++ unsigned int n_chan, unsigned int *list) ++{ ++ unsigned int chan, range, aref; ++ unsigned int i; ++ unsigned int hi, lo; ++ unsigned offset; ++ unsigned int dither; ++ ++ if (boardtype.reg_type & ni_reg_m_series_mask) { ++ ni_m_series_load_channelgain_list(dev, n_chan, list); ++ return; ++ } ++ if (n_chan == 1 && (boardtype.reg_type != ni_reg_611x) ++ && (boardtype.reg_type != ni_reg_6143)) { ++ if (devpriv->changain_state ++ && devpriv->changain_spec == list[0]) { ++ /* ready to go. */ ++ return; ++ } ++ devpriv->changain_state = 1; ++ devpriv->changain_spec = list[0]; ++ } else { ++ devpriv->changain_state = 0; ++ } ++ ++ devpriv->stc_writew(dev, 1, Configuration_Memory_Clear); ++ ++ /* Set up Calibration mode if required */ ++ if (boardtype.reg_type == ni_reg_6143) { ++ if ((list[0] & CR_ALT_SOURCE) ++ && !devpriv->ai_calib_source_enabled) { ++ /* Strobe Relay enable bit */ ++ ni_writew(devpriv-> ++ ai_calib_source | ++ Calibration_Channel_6143_RelayOn, ++ Calibration_Channel_6143); ++ ni_writew(devpriv->ai_calib_source, ++ Calibration_Channel_6143); ++ devpriv->ai_calib_source_enabled = 1; ++ /* Allow relays to change */ ++ if(rtdm_in_rt_context()) ++ rtdm_task_sleep(100*1000000); ++ else ++ msleep_interruptible(100); ++ } else if (!(list[0] & CR_ALT_SOURCE) ++ && devpriv->ai_calib_source_enabled) { ++ /* Strobe Relay disable bit */ ++ ni_writew(devpriv-> ++ ai_calib_source | ++ Calibration_Channel_6143_RelayOff, ++ Calibration_Channel_6143); ++ ni_writew(devpriv->ai_calib_source, ++ Calibration_Channel_6143); ++ devpriv->ai_calib_source_enabled = 0; ++ /* Allow relays to change */ ++ if(rtdm_in_rt_context()) ++ rtdm_task_sleep(100*1000000); ++ else ++ msleep_interruptible(100); ++ } ++ } ++ ++ offset = 1 << (boardtype.adbits - 1); ++ for (i = 0; i < n_chan; i++) { ++ if ((boardtype.reg_type != ni_reg_6143) ++ && (list[i] & CR_ALT_SOURCE)) { ++ chan = devpriv->ai_calib_source; ++ } else { ++ chan = CR_CHAN(list[i]); ++ } ++ aref = CR_AREF(list[i]); ++ range = CR_RNG(list[i]); ++ dither = ((list[i] & CR_ALT_FILTER) != 0); ++ ++ /* fix the external/internal range differences */ ++ range = ni_gainlkup[boardtype.gainlkup][range]; ++ if (boardtype.reg_type == ni_reg_611x) ++ devpriv->ai_offset[i] = offset; ++ else ++ devpriv->ai_offset[i] = (range & 0x100) ? 0 : offset; ++ ++ hi = 0; ++ if ((list[i] & CR_ALT_SOURCE)) { ++ if (boardtype.reg_type == ni_reg_611x) ++ ni_writew(CR_CHAN(list[i]) & 0x0003, ++ Calibration_Channel_Select_611x); ++ } else { ++ if (boardtype.reg_type == ni_reg_611x) ++ aref = AREF_DIFF; ++ else if (boardtype.reg_type == ni_reg_6143) ++ aref = AREF_OTHER; ++ switch (aref) { ++ case AREF_DIFF: ++ hi |= AI_DIFFERENTIAL; ++ break; ++ case AREF_COMMON: ++ hi |= AI_COMMON; ++ break; ++ case AREF_GROUND: ++ hi |= AI_GROUND; ++ break; ++ case AREF_OTHER: ++ break; ++ } ++ } ++ hi |= AI_CONFIG_CHANNEL(chan); ++ ++ ni_writew(hi, Configuration_Memory_High); ++ ++ if (boardtype.reg_type != ni_reg_6143) { ++ lo = range; ++ if (i == n_chan - 1) ++ lo |= AI_LAST_CHANNEL; ++ if (dither) ++ lo |= AI_DITHER; ++ ++ ni_writew(lo, Configuration_Memory_Low); ++ } ++ } ++ ++ /* prime the channel/gain list */ ++ if ((boardtype.reg_type != ni_reg_611x) ++ && (boardtype.reg_type != ni_reg_6143)) { ++ ni_prime_channelgain_list(dev); ++ } ++} ++ ++static int ni_ns_to_timer(const struct a4l_device *dev, ++ unsigned int nanosec, int round_mode) ++{ ++ int divider; ++ switch (round_mode) { ++ case TRIG_ROUND_NEAREST: ++ default: ++ divider = (nanosec + devpriv->clock_ns / 2) / devpriv->clock_ns; ++ break; ++ case TRIG_ROUND_DOWN: ++ divider = (nanosec) / devpriv->clock_ns; ++ break; ++ case TRIG_ROUND_UP: ++ divider = (nanosec + devpriv->clock_ns - 1) / devpriv->clock_ns; ++ break; ++ } ++ return divider - 1; ++} ++ ++static unsigned int ni_timer_to_ns(const struct a4l_device *dev, int timer) ++{ ++ return devpriv->clock_ns * (timer + 1); ++} ++ ++static unsigned int ni_min_ai_scan_period_ns(struct a4l_device *dev, ++ unsigned int num_channels) ++{ ++ switch (boardtype.reg_type) { ++ case ni_reg_611x: ++ case ni_reg_6143: ++ /* simultaneously-sampled inputs */ ++ return boardtype.ai_speed; ++ break; ++ default: ++ /* multiplexed inputs */ ++ break; ++ }; ++ return boardtype.ai_speed * num_channels; ++} ++ ++static struct a4l_cmd_desc mio_ai_cmd_mask = { ++ .idx_subd = 0, ++ .start_src = TRIG_NOW | TRIG_INT | TRIG_EXT, ++ .scan_begin_src = TRIG_TIMER | TRIG_EXT, ++ .convert_src = TRIG_TIMER | TRIG_EXT | TRIG_NOW, ++ .scan_end_src = TRIG_COUNT, ++ .stop_src = TRIG_COUNT | TRIG_NONE, ++}; ++ ++int ni_ai_inttrig(struct a4l_subdevice *subd, lsampl_t trignum) ++{ ++ struct a4l_device *dev = subd->dev; ++ ++ if (trignum != 0) ++ return -EINVAL; ++ ++ devpriv->stc_writew(dev, AI_START1_Pulse | devpriv->ai_cmd2, ++ AI_Command_2_Register); ++ ++ return 1; ++} ++ ++#define cfc_check_trigger_arg_is(a,b) __cfc_check_trigger_arg_is(a,b, dev, __LINE__) ++static inline int __cfc_check_trigger_arg_is(unsigned int *arg, ++ unsigned int val, ++ struct a4l_device *dev, ++ unsigned int line) ++{ ++ if (*arg != val) { ++ a4l_dbg(1, drv_dbg, dev, "line %d: *arg (%d) != val (%d) \n", ++ line, *arg, val); ++ *arg = val; ++ return -EINVAL; ++ } ++ return 0; ++} ++ ++#define cfc_check_trigger_is_unique(a) __cfc_check_trigger_is_unique(a, dev, __LINE__) ++static inline int __cfc_check_trigger_is_unique(unsigned int src, ++ struct a4l_device *dev, ++ unsigned int line) ++{ ++ /* this test is true if more than one _src bit is set */ ++ if ((src & (src - 1)) != 0) { ++ a4l_dbg(1, drv_dbg, dev, "line %d: src (%d) \n", line, src); ++ return -EINVAL; ++ } ++ return 0; ++} ++ ++#define cfc_check_trigger_src(a,b) __cfc_check_trigger_src(a,b, dev, __LINE__) ++static inline int __cfc_check_trigger_src(unsigned int *src, ++ unsigned int flags, ++ struct a4l_device *dev, ++ unsigned int line) ++{ ++ unsigned int orig_src = *src; ++ ++ *src = orig_src & flags; ++ if (*src == 0 || *src != orig_src){ ++ a4l_dbg(1, drv_dbg, dev, "line %d: *src (%d) orig_src (%d) flags(%d) \n", ++ line, *src, orig_src, flags); ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ ++#define cfc_check_trigger_arg_min(a,b) __cfc_check_trigger_arg_min(a,b, dev, __LINE__) ++static inline int __cfc_check_trigger_arg_min(unsigned int *arg, ++ unsigned int val, ++ struct a4l_device *dev, ++ unsigned int line) ++{ ++ if (*arg < val) { ++ a4l_dbg(1, drv_dbg, dev, "line %d: *arg (%d) < val (%d) \n", ++ line, *arg, val); ++ *arg = val; ++ return -EINVAL; ++ } ++ return 0; ++} ++ ++#define cfc_check_trigger_arg_max(a,b) __cfc_check_trigger_arg_max(a,b, dev, __LINE__) ++static inline int __cfc_check_trigger_arg_max(unsigned int *arg, ++ unsigned int val, ++ struct a4l_device *dev, ++ unsigned int line) ++{ ++ if (*arg > val) { ++ a4l_dbg(1, drv_dbg, dev, "line %d: *arg (%d) > val (%d) \n", ++ line, *arg, val); ++ *arg = val; ++ return -EINVAL; ++ } ++ return 0; ++} ++ ++static int ni_ai_cmdtest(struct a4l_subdevice *subd, struct a4l_cmd_desc *cmd) ++{ ++ struct a4l_device *dev = subd->dev; ++ unsigned int sources; ++ int tmp, err = 0; ++ ++ /* Step 1 : check if triggers are trivially valid */ ++ err |= cfc_check_trigger_src(&cmd->start_src, TRIG_NOW | TRIG_INT | TRIG_EXT); ++ err |= cfc_check_trigger_src(&cmd->scan_begin_src, TRIG_TIMER | TRIG_EXT); ++ ++ sources = TRIG_TIMER | TRIG_EXT; ++ if (boardtype.reg_type == ni_reg_611x || boardtype.reg_type == ni_reg_6143) ++ sources |= TRIG_NOW; ++ ++ err |= cfc_check_trigger_src(&cmd->convert_src, sources); ++ err |= cfc_check_trigger_src(&cmd->scan_end_src, TRIG_COUNT); ++ err |= cfc_check_trigger_src(&cmd->stop_src, TRIG_COUNT | TRIG_NONE); ++ ++ if (err) { ++ if (cmd->valid_simul_stages & BIT(1)) ++ return 0; ++ ++ a4l_dbg(1, drv_dbg, dev, "ai_cmdtest ERR 1 \n"); ++ return -EINVAL; ++ } ++ ++ /* Step 2a : make sure trigger sources are unique */ ++ err |= cfc_check_trigger_is_unique(cmd->start_src); ++ err |= cfc_check_trigger_is_unique(cmd->scan_begin_src); ++ err |= cfc_check_trigger_is_unique(cmd->convert_src); ++ err |= cfc_check_trigger_is_unique(cmd->stop_src); ++ ++ /* Step 2b : and mutually compatible */ ++ ++ if (err) { ++ if (cmd->valid_simul_stages & BIT(2)) ++ return 0; ++ ++ a4l_dbg(1, drv_dbg, dev, "ai_cmdtest ERR 2 \n"); ++ return -EINVAL; ++ } ++ ++ /* Step 3: check if arguments are trivially valid */ ++ ++ if (cmd->start_src == TRIG_EXT) { ++ /* external trigger */ ++ unsigned int tmp = CR_CHAN(cmd->start_arg); ++ if (tmp > 16) ++ tmp = 16; ++ tmp |= (cmd->start_arg & (CR_INVERT | CR_EDGE)); ++ err |= cfc_check_trigger_arg_is(&cmd->start_arg, tmp); ++ ++ } else { ++ /* true for both TRIG_NOW and TRIG_INT */ ++ err |= cfc_check_trigger_arg_is(&cmd->start_arg, 0); ++ } ++ ++ if (cmd->scan_begin_src == TRIG_TIMER) { ++ err |= cfc_check_trigger_arg_min(&cmd->scan_begin_arg, ++ ni_min_ai_scan_period_ns(dev, cmd->nb_chan)); ++ ++ err |= cfc_check_trigger_arg_max(&cmd->scan_begin_arg, ++ devpriv->clock_ns * 0xffffff); ++ } else if (cmd->scan_begin_src == TRIG_EXT) { ++ /* external trigger */ ++ unsigned int tmp = CR_CHAN(cmd->scan_begin_arg); ++ ++ if (tmp > 16) ++ tmp = 16; ++ tmp |= (cmd->scan_begin_arg & (CR_INVERT | CR_EDGE)); ++ err |= cfc_check_trigger_arg_is(&cmd->scan_begin_arg, tmp); ++ ++ } else { /* TRIG_OTHER */ ++ err |= cfc_check_trigger_arg_is(&cmd->scan_begin_arg, 0); ++ ++ } ++ ++ if (cmd->convert_src == TRIG_TIMER) { ++ if ((boardtype.reg_type == ni_reg_611x) ++ || (boardtype.reg_type == ni_reg_6143)) { ++ err |= cfc_check_trigger_arg_is(&cmd->convert_arg, 0); ++ ++ } else { ++ err |= cfc_check_trigger_arg_min(&cmd->convert_arg, ++ boardtype.ai_speed); ++ err |= cfc_check_trigger_arg_max(&cmd->convert_arg, ++ devpriv->clock_ns * 0xffff); ++ } ++ } else if (cmd->convert_src == TRIG_EXT) { ++ /* external trigger */ ++ unsigned int tmp = CR_CHAN(cmd->convert_arg); ++ ++ if (tmp > 16) ++ tmp = 16; ++ tmp |= (cmd->convert_arg & (CR_ALT_FILTER | CR_INVERT)); ++ err |= cfc_check_trigger_arg_is(&cmd->convert_arg, tmp); ++ } else if (cmd->convert_src == TRIG_NOW) { ++ err |= cfc_check_trigger_arg_is(&cmd->convert_arg, 0); ++ } ++ ++ err |= cfc_check_trigger_arg_is(&cmd->scan_end_arg, cmd->nb_chan); ++ ++ if (cmd->stop_src == TRIG_COUNT) { ++ unsigned int max_count = 0x01000000; ++ ++ if (boardtype.reg_type == ni_reg_611x) ++ max_count -= num_adc_stages_611x; ++ err |= cfc_check_trigger_arg_max(&cmd->stop_arg, max_count); ++ err |= cfc_check_trigger_arg_min(&cmd->stop_arg, 1); ++ ++ } else { ++ /* TRIG_NONE */ ++ err |= cfc_check_trigger_arg_is(&cmd->stop_arg, 0); ++ } ++ ++ if (err) { ++ if (cmd->valid_simul_stages & BIT(3)) ++ return 0; ++ ++ a4l_dbg(1, drv_dbg, dev, "ai_cmdtest ERR 3 \n"); ++ return 3; ++ } ++ ++ /* step 4: fix up any arguments */ ++ if (cmd->scan_begin_src == TRIG_TIMER) { ++ tmp = cmd->scan_begin_arg; ++ cmd->scan_begin_arg = ++ ni_timer_to_ns(dev, ni_ns_to_timer(dev, ++ cmd->scan_begin_arg, ++ cmd->flags & ++ TRIG_ROUND_MASK)); ++ if (tmp != cmd->scan_begin_arg) ++ err++; ++ } ++ if (cmd->convert_src == TRIG_TIMER) { ++ if ((boardtype.reg_type != ni_reg_611x) ++ && (boardtype.reg_type != ni_reg_6143)) { ++ tmp = cmd->convert_arg; ++ cmd->convert_arg = ++ ni_timer_to_ns(dev, ni_ns_to_timer(dev, ++ cmd->convert_arg, ++ cmd-> ++ flags & ++ TRIG_ROUND_MASK)); ++ if (tmp != cmd->convert_arg) ++ err++; ++ if (cmd->scan_begin_src == TRIG_TIMER && ++ cmd->scan_begin_arg < ++ cmd->convert_arg * cmd->scan_end_arg) { ++ cmd->scan_begin_arg = ++ cmd->convert_arg * cmd->scan_end_arg; ++ err++; ++ } ++ } ++ } ++ ++ if (err) { ++ if (cmd->valid_simul_stages & BIT(4)) ++ return 0; ++ ++ a4l_dbg(1, drv_dbg, dev, "ai_cmdtest ERR 4 \n"); ++ return -EINVAL; ++ } ++ ++ return 0; ++ ++ ++} ++ ++static int ni_ai_cmd(struct a4l_subdevice *subd, struct a4l_cmd_desc *cmd) ++{ ++ struct a4l_device *dev = subd->dev; ++ int timer; ++ int mode1 = 0; /* mode1 is needed for both stop and convert */ ++ int mode2 = 0; ++ int start_stop_select = 0; ++ unsigned int stop_count; ++ int interrupt_a_enable = 0; ++ ++ a4l_info(dev, "start\n"); ++ ++ if (a4l_get_irq(dev) == A4L_IRQ_UNUSED) { ++ a4l_err(dev, "ni_ai_cmd: cannot run command without an irq"); ++ return -EIO; ++ } ++ ni_clear_ai_fifo(dev); ++ ++ ni_load_channelgain_list(dev, cmd->nb_chan, cmd->chan_descs); ++ ++ /* start configuration */ ++ devpriv->stc_writew(dev, AI_Configuration_Start, Joint_Reset_Register); ++ ++ /* disable analog triggering for now, since it ++ * interferes with the use of pfi0 */ ++ devpriv->an_trig_etc_reg &= ~Analog_Trigger_Enable; ++ devpriv->stc_writew(dev, devpriv->an_trig_etc_reg, ++ Analog_Trigger_Etc_Register); ++ ++ switch (cmd->start_src) { ++ case TRIG_INT: ++ case TRIG_NOW: ++ devpriv->stc_writew(dev, AI_START2_Select(0) | ++ AI_START1_Sync | AI_START1_Edge | AI_START1_Select(0), ++ AI_Trigger_Select_Register); ++ break; ++ case TRIG_EXT: ++ { ++ int chan = CR_CHAN(cmd->start_arg); ++ unsigned int bits = AI_START2_Select(0) | ++ AI_START1_Sync | AI_START1_Select(chan + 1); ++ ++ if (cmd->start_arg & CR_INVERT) ++ bits |= AI_START1_Polarity; ++ if (cmd->start_arg & CR_EDGE) ++ bits |= AI_START1_Edge; ++ devpriv->stc_writew(dev, bits, ++ AI_Trigger_Select_Register); ++ break; ++ } ++ } ++ ++ mode2 &= ~AI_Pre_Trigger; ++ mode2 &= ~AI_SC_Initial_Load_Source; ++ mode2 &= ~AI_SC_Reload_Mode; ++ devpriv->stc_writew(dev, mode2, AI_Mode_2_Register); ++ ++ if (cmd->nb_chan == 1 || (boardtype.reg_type == ni_reg_611x) ++ || (boardtype.reg_type == ni_reg_6143)) { ++ start_stop_select |= AI_STOP_Polarity; ++ start_stop_select |= AI_STOP_Select(31);/* logic low */ ++ start_stop_select |= AI_STOP_Sync; ++ } else { ++ start_stop_select |= AI_STOP_Select(19);/* ai configuration memory */ ++ } ++ devpriv->stc_writew(dev, start_stop_select, ++ AI_START_STOP_Select_Register); ++ ++ devpriv->ai_cmd2 = 0; ++ switch (cmd->stop_src) { ++ case TRIG_COUNT: ++ stop_count = cmd->stop_arg - 1; ++ ++ if (boardtype.reg_type == ni_reg_611x) { ++ /* have to take 3 stage adc pipeline into account */ ++ stop_count += num_adc_stages_611x; ++ } ++ /* stage number of scans */ ++ devpriv->stc_writel(dev, stop_count, AI_SC_Load_A_Registers); ++ ++ mode1 |= AI_Start_Stop | AI_Mode_1_Reserved | AI_Trigger_Once; ++ devpriv->stc_writew(dev, mode1, AI_Mode_1_Register); ++ /* load SC (Scan Count) */ ++ devpriv->stc_writew(dev, AI_SC_Load, AI_Command_1_Register); ++ ++ devpriv->ai_continuous = 0; ++ if (stop_count == 0) { ++ devpriv->ai_cmd2 |= AI_End_On_End_Of_Scan; ++ interrupt_a_enable |= AI_STOP_Interrupt_Enable; ++ /* this is required to get the last sample ++ for nb_chan > 1, not sure why */ ++ if (cmd->nb_chan > 1) ++ start_stop_select |= ++ AI_STOP_Polarity | AI_STOP_Edge; ++ } ++ break; ++ case TRIG_NONE: ++ /* stage number of scans */ ++ devpriv->stc_writel(dev, 0, AI_SC_Load_A_Registers); ++ ++ mode1 |= AI_Start_Stop | AI_Mode_1_Reserved | AI_Continuous; ++ devpriv->stc_writew(dev, mode1, AI_Mode_1_Register); ++ ++ /* load SC (Scan Count) */ ++ devpriv->stc_writew(dev, AI_SC_Load, AI_Command_1_Register); ++ ++ devpriv->ai_continuous = 1; ++ ++ break; ++ } ++ ++ switch (cmd->scan_begin_src) { ++ case TRIG_TIMER: ++ /* ++ stop bits for non 611x boards ++ AI_SI_Special_Trigger_Delay=0 ++ AI_Pre_Trigger=0 ++ AI_START_STOP_Select_Register: ++ AI_START_Polarity=0 (?) rising edge ++ AI_START_Edge=1 edge triggered ++ AI_START_Sync=1 (?) ++ AI_START_Select=0 SI_TC ++ AI_STOP_Polarity=0 rising edge ++ AI_STOP_Edge=0 level ++ AI_STOP_Sync=1 ++ AI_STOP_Select=19 external pin (configuration mem) ++ */ ++ start_stop_select |= AI_START_Edge | AI_START_Sync; ++ devpriv->stc_writew(dev, start_stop_select, ++ AI_START_STOP_Select_Register); ++ ++ mode2 |= AI_SI_Reload_Mode(0); ++ /* AI_SI_Initial_Load_Source=A */ ++ mode2 &= ~AI_SI_Initial_Load_Source; ++ ++ devpriv->stc_writew(dev, mode2, AI_Mode_2_Register); ++ ++ /* load SI */ ++ timer = ni_ns_to_timer(dev, cmd->scan_begin_arg, ++ TRIG_ROUND_NEAREST); ++ devpriv->stc_writel(dev, timer, AI_SI_Load_A_Registers); ++ devpriv->stc_writew(dev, AI_SI_Load, AI_Command_1_Register); ++ break; ++ case TRIG_EXT: ++ if (cmd->scan_begin_arg & CR_EDGE) ++ start_stop_select |= AI_START_Edge; ++ /* AI_START_Polarity==1 is falling edge */ ++ if (cmd->scan_begin_arg & CR_INVERT) ++ start_stop_select |= AI_START_Polarity; ++ if (cmd->scan_begin_src != cmd->convert_src || ++ (cmd->scan_begin_arg & ~CR_EDGE) != ++ (cmd->convert_arg & ~CR_EDGE)) ++ start_stop_select |= AI_START_Sync; ++ start_stop_select |= ++ AI_START_Select(1 + CR_CHAN(cmd->scan_begin_arg)); ++ devpriv->stc_writew(dev, start_stop_select, ++ AI_START_STOP_Select_Register); ++ break; ++ } ++ ++ switch (cmd->convert_src) { ++ case TRIG_TIMER: ++ case TRIG_NOW: ++ if (cmd->convert_arg == 0 || cmd->convert_src == TRIG_NOW) ++ timer = 1; ++ else ++ timer = ni_ns_to_timer(dev, cmd->convert_arg, ++ TRIG_ROUND_NEAREST); ++ devpriv->stc_writew(dev, 1, AI_SI2_Load_A_Register); /* 0,0 does not work. */ ++ devpriv->stc_writew(dev, timer, AI_SI2_Load_B_Register); ++ ++ /* AI_SI2_Reload_Mode = alternate */ ++ /* AI_SI2_Initial_Load_Source = A */ ++ mode2 &= ~AI_SI2_Initial_Load_Source; ++ mode2 |= AI_SI2_Reload_Mode; ++ devpriv->stc_writew(dev, mode2, AI_Mode_2_Register); ++ ++ /* AI_SI2_Load */ ++ devpriv->stc_writew(dev, AI_SI2_Load, AI_Command_1_Register); ++ ++ mode2 |= AI_SI2_Reload_Mode; /* alternate */ ++ mode2 |= AI_SI2_Initial_Load_Source; /* B */ ++ ++ devpriv->stc_writew(dev, mode2, AI_Mode_2_Register); ++ break; ++ case TRIG_EXT: ++ mode1 |= AI_CONVERT_Source_Select(1 + cmd->convert_arg); ++ if ((cmd->convert_arg & CR_INVERT) == 0) ++ mode1 |= AI_CONVERT_Source_Polarity; ++ devpriv->stc_writew(dev, mode1, AI_Mode_1_Register); ++ ++ mode2 |= AI_Start_Stop_Gate_Enable | AI_SC_Gate_Enable; ++ devpriv->stc_writew(dev, mode2, AI_Mode_2_Register); ++ ++ break; ++ } ++ ++ if (a4l_get_irq(dev) != A4L_IRQ_UNUSED) { ++ ++ /* interrupt on FIFO, errors, SC_TC */ ++ interrupt_a_enable |= AI_Error_Interrupt_Enable | ++ AI_SC_TC_Interrupt_Enable; ++ ++#if (!defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE) && \ ++ !defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE_MODULE)) ++ interrupt_a_enable |= AI_FIFO_Interrupt_Enable; ++#endif /* CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE */ ++ ++ if (cmd->flags & TRIG_WAKE_EOS ++ || (devpriv->ai_cmd2 & AI_End_On_End_Of_Scan)) { ++ /* wake on end-of-scan */ ++ devpriv->aimode = AIMODE_SCAN; ++ } else { ++ devpriv->aimode = AIMODE_HALF_FULL; ++ } ++ ++ switch (devpriv->aimode) { ++ case AIMODE_HALF_FULL: ++ /* generate FIFO interrupts and DMA requests on half-full */ ++#if (defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE) || \ ++ defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE_MODULE)) ++ devpriv->stc_writew(dev, AI_FIFO_Mode_HF_to_E, ++ AI_Mode_3_Register); ++#else /* !CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE */ ++ devpriv->stc_writew(dev, AI_FIFO_Mode_HF, ++ AI_Mode_3_Register); ++#endif /* CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE */ ++ break; ++ case AIMODE_SAMPLE: ++ /* generate FIFO interrupts on non-empty */ ++ devpriv->stc_writew(dev, AI_FIFO_Mode_NE, ++ AI_Mode_3_Register); ++ break; ++ case AIMODE_SCAN: ++#if (defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE) || \ ++ defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE_MODULE)) ++ devpriv->stc_writew(dev, AI_FIFO_Mode_NE, ++ AI_Mode_3_Register); ++#else /* !CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE */ ++ devpriv->stc_writew(dev, AI_FIFO_Mode_HF, ++ AI_Mode_3_Register); ++#endif /* CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE */ ++ interrupt_a_enable |= AI_STOP_Interrupt_Enable; ++ break; ++ default: ++ break; ++ } ++ ++ /* Clear interrupts */ ++ devpriv->stc_writew(dev, ++ AI_Error_Interrupt_Ack | AI_STOP_Interrupt_Ack | ++ AI_START_Interrupt_Ack | AI_START2_Interrupt_Ack | ++ AI_START1_Interrupt_Ack | AI_SC_TC_Interrupt_Ack | ++ AI_SC_TC_Error_Confirm, Interrupt_A_Ack_Register); /* clear interrupts */ ++ ++ ni_set_bits(dev, Interrupt_A_Enable_Register, ++ interrupt_a_enable, 1); ++ ++ a4l_info(dev, "Interrupt_A_Enable_Register = 0x%04x\n", ++ devpriv->int_a_enable_reg); ++ } else { ++ /* interrupt on nothing */ ++ ni_set_bits(dev, Interrupt_A_Enable_Register, ~0, 0); ++ ++ /* XXX start polling if necessary */ ++ a4l_warn(dev, "ni_ai_cmd: interrupting on nothing\n"); ++ } ++ ++ /* end configuration */ ++ devpriv->stc_writew(dev, AI_Configuration_End, Joint_Reset_Register); ++ ++ switch (cmd->scan_begin_src) { ++ case TRIG_TIMER: ++ devpriv->stc_writew(dev, ++ AI_SI2_Arm | AI_SI_Arm | AI_DIV_Arm | AI_SC_Arm, ++ AI_Command_1_Register); ++ break; ++ case TRIG_EXT: ++ /* XXX AI_SI_Arm? */ ++ devpriv->stc_writew(dev, ++ AI_SI2_Arm | AI_SI_Arm | AI_DIV_Arm | AI_SC_Arm, ++ AI_Command_1_Register); ++ break; ++ } ++ ++#if (defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE) || \ ++ defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE_MODULE)) ++ { ++ int retval = ni_ai_setup_MITE_dma(subd); ++ if (retval) ++ return retval; ++ } ++ ++#endif /* CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE */ ++ ++ switch (cmd->start_src) { ++ case TRIG_NOW: ++ /* AI_START1_Pulse */ ++ devpriv->stc_writew(dev, AI_START1_Pulse | devpriv->ai_cmd2, ++ AI_Command_2_Register); ++ break; ++ case TRIG_EXT: ++ /* TODO: set trigger callback field to NULL */ ++ break; ++ case TRIG_INT: ++ /* TODO: set trigger callback field to ni_ai_inttrig */ ++ break; ++ } ++ ++ a4l_info(dev, "exit\n"); ++ ++ return 0; ++} ++ ++int ni_ai_config_analog_trig(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn) ++{ ++ struct a4l_device *dev = subd->dev; ++ unsigned int a, b, modebits; ++ int err = 0; ++ uint32_t *data = (uint32_t *)insn->data; ++ ++ /* data[1] is flags ++ * data[2] is analog line ++ * data[3] is set level ++ * data[4] is reset level */ ++ if (!boardtype.has_analog_trig) ++ return -EINVAL; ++ ++ if ((data[1] & 0xffff0000) != A4L_EV_SCAN_BEGIN) { ++ data[1] &= (A4L_EV_SCAN_BEGIN | 0xffff); ++ err++; ++ } ++ if (data[2] >= boardtype.n_adchan) { ++ data[2] = boardtype.n_adchan - 1; ++ err++; ++ } ++ if (data[3] > 255) { /* a */ ++ data[3] = 255; ++ err++; ++ } ++ if (data[4] > 255) { /* b */ ++ data[4] = 255; ++ err++; ++ } ++ /* ++ * 00 ignore ++ * 01 set ++ * 10 reset ++ * ++ * modes: ++ * 1 level: +b- +a- ++ * high mode 00 00 01 10 ++ * low mode 00 00 10 01 ++ * 2 level: (a> 4); ++ } ++ devpriv->atrig_low = a; ++ devpriv->atrig_high = b; ++ switch (modebits) { ++ case 0x81: /* low hysteresis mode */ ++ devpriv->atrig_mode = 6; ++ break; ++ case 0x42: /* high hysteresis mode */ ++ devpriv->atrig_mode = 3; ++ break; ++ case 0x96: /* middle window mode */ ++ devpriv->atrig_mode = 2; ++ break; ++ default: ++ data[1] &= ~0xff; ++ err++; ++ } ++ } else { ++ /* one level mode */ ++ if (b != 0) { ++ data[4] = 0; ++ err++; ++ } ++ switch (modebits) { ++ case 0x06: /* high window mode */ ++ devpriv->atrig_high = a; ++ devpriv->atrig_mode = 0; ++ break; ++ case 0x09: /* low window mode */ ++ devpriv->atrig_low = a; ++ devpriv->atrig_mode = 1; ++ break; ++ default: ++ data[1] &= ~0xff; ++ err++; ++ } ++ } ++ ++ if (err) ++ return -EAGAIN; ++ ++ return 0; ++} ++ ++int ni_ai_insn_config(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn) ++{ ++ struct a4l_device *dev = subd->dev; ++ unsigned int *data = (unsigned int *)insn->data; ++ ++ if (insn->data_size < sizeof(unsigned int)) ++ return -EINVAL; ++ ++ switch (data[0]) { ++ case A4L_INSN_CONFIG_ANALOG_TRIG: ++ return ni_ai_config_analog_trig(subd, insn); ++ case A4L_INSN_CONFIG_ALT_SOURCE: ++ if (boardtype.reg_type & ni_reg_m_series_mask) { ++ if (data[1] & ~(MSeries_AI_Bypass_Cal_Sel_Pos_Mask | ++ MSeries_AI_Bypass_Cal_Sel_Neg_Mask | ++ MSeries_AI_Bypass_Mode_Mux_Mask | ++ MSeries_AO_Bypass_AO_Cal_Sel_Mask)) { ++ return -EINVAL; ++ } ++ devpriv->ai_calib_source = data[1]; ++ } else if (boardtype.reg_type == ni_reg_6143) { ++ unsigned int calib_source; ++ ++ calib_source = data[1] & 0xf; ++ ++ if (calib_source > 0xF) ++ return -EINVAL; ++ ++ devpriv->ai_calib_source = calib_source; ++ ni_writew(calib_source, Calibration_Channel_6143); ++ } else { ++ unsigned int calib_source; ++ unsigned int calib_source_adjust; ++ ++ calib_source = data[1] & 0xf; ++ calib_source_adjust = (data[1] >> 4) & 0xff; ++ ++ if (calib_source >= 8) ++ return -EINVAL; ++ devpriv->ai_calib_source = calib_source; ++ if (boardtype.reg_type == ni_reg_611x) { ++ ni_writeb(calib_source_adjust, ++ Cal_Gain_Select_611x); ++ } ++ } ++ return 0; ++ default: ++ break; ++ } ++ ++ return -EINVAL; ++} ++ ++/* munge data from unsigned to 2's complement for analog output bipolar modes */ ++static void ni_ao_munge(struct a4l_subdevice *subd, void *buf, unsigned long size) ++{ ++ struct a4l_device *dev = subd->dev; ++ struct a4l_cmd_desc *cmd = a4l_get_cmd(subd); ++ int chan_idx = a4l_get_chan(subd); ++ uint16_t *array = buf; ++ unsigned int i, range, offset; ++ ++ offset = 1 << (boardtype.aobits - 1); ++ for (i = 0; i < size / sizeof(uint16_t); i++) { ++ ++ range = CR_RNG(cmd->chan_descs[chan_idx]); ++ if (boardtype.ao_unipolar == 0 || (range & 1) == 0) ++ array[i] -= offset; ++ ++#if (defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE) || \ ++ defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE_MODULE)) ++ array[i] = cpu_to_le16(array[i]); ++#endif /* CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE */ ++ ++ chan_idx++; ++ chan_idx %= cmd->nb_chan; ++ } ++} ++ ++static int ni_m_series_ao_config_chan_descs(struct a4l_subdevice *subd, ++ unsigned int chanspec[], ++ unsigned int n_chans, int timed) ++{ ++ unsigned int range; ++ unsigned int chan; ++ unsigned int conf; ++ int i, invert = 0; ++ struct a4l_device *dev = subd->dev; ++ ++ for (i = 0; i < boardtype.n_aochan; ++i) { ++ ni_writeb(0xf, M_Offset_AO_Waveform_Order(i)); ++ } ++ for (i = 0; i < n_chans; i++) { ++ struct a4l_range *rng; ++ int idx; ++ chan = CR_CHAN(chanspec[i]); ++ range = CR_RNG(chanspec[i]); ++ ++ /* TODO: this a huge hack! ++ Something is missing in the kernel API. We must ++ allow access on the proper range descriptor */ ++ idx = (subd->rng_desc->mode != ++ A4L_RNG_GLOBAL_RNGDESC) ? chan : 0; ++ rng = &(subd->rng_desc->rngtabs[idx]->rngs[range]); ++ ++ invert = 0; ++ conf = 0; ++ switch (rng->max - rng->min) { ++ case 20000000: ++ conf |= MSeries_AO_DAC_Reference_10V_Internal_Bits; ++ ni_writeb(0, M_Offset_AO_Reference_Attenuation(chan)); ++ break; ++ case 10000000: ++ conf |= MSeries_AO_DAC_Reference_5V_Internal_Bits; ++ ni_writeb(0, M_Offset_AO_Reference_Attenuation(chan)); ++ break; ++ case 4000000: ++ conf |= MSeries_AO_DAC_Reference_10V_Internal_Bits; ++ ni_writeb(MSeries_Attenuate_x5_Bit, ++ M_Offset_AO_Reference_Attenuation(chan)); ++ break; ++ case 2000000: ++ conf |= MSeries_AO_DAC_Reference_5V_Internal_Bits; ++ ni_writeb(MSeries_Attenuate_x5_Bit, ++ M_Offset_AO_Reference_Attenuation(chan)); ++ break; ++ default: ++ a4l_err(subd->dev, ++ "%s: bug! unhandled ao reference voltage\n", ++ __FUNCTION__); ++ break; ++ } ++ switch (rng->max + rng->min) { ++ case 0: ++ conf |= MSeries_AO_DAC_Offset_0V_Bits; ++ break; ++ case 10000000: ++ conf |= MSeries_AO_DAC_Offset_5V_Bits; ++ break; ++ default: ++ a4l_err(subd->dev, ++ "%s: bug! unhandled ao offset voltage\n", ++ __FUNCTION__); ++ break; ++ } ++ if (timed) ++ conf |= MSeries_AO_Update_Timed_Bit; ++ ni_writeb(conf, M_Offset_AO_Config_Bank(chan)); ++ devpriv->ao_conf[chan] = conf; ++ ni_writeb(i, M_Offset_AO_Waveform_Order(chan)); ++ } ++ return invert; ++} ++ ++static int ni_old_ao_config_chan_descs(struct a4l_subdevice *subd, ++ unsigned int chanspec[], ++ unsigned int n_chans) ++{ ++ struct a4l_device *dev = subd->dev; ++ unsigned int range; ++ unsigned int chan; ++ unsigned int conf; ++ int i, invert = 0; ++ ++ for (i = 0; i < n_chans; i++) { ++ chan = CR_CHAN(chanspec[i]); ++ range = CR_RNG(chanspec[i]); ++ conf = AO_Channel(chan); ++ ++ if (boardtype.ao_unipolar) { ++ if ((range & 1) == 0) { ++ conf |= AO_Bipolar; ++ invert = (1 << (boardtype.aobits - 1)); ++ } else { ++ invert = 0; ++ } ++ if (range & 2) ++ conf |= AO_Ext_Ref; ++ } else { ++ conf |= AO_Bipolar; ++ invert = (1 << (boardtype.aobits - 1)); ++ } ++ ++ /* not all boards can deglitch, but this shouldn't hurt */ ++ if (chanspec[i] & CR_DEGLITCH) ++ conf |= AO_Deglitch; ++ ++ /* analog reference */ ++ /* AREF_OTHER connects AO ground to AI ground, i think */ ++ conf |= (CR_AREF(chanspec[i]) == ++ AREF_OTHER) ? AO_Ground_Ref : 0; ++ ++ ni_writew(conf, AO_Configuration); ++ devpriv->ao_conf[chan] = conf; ++ } ++ return invert; ++} ++ ++static int ni_ao_config_chan_descs(struct a4l_subdevice *subd, ++ unsigned int chanspec[], ++ unsigned int n_chans, int timed) ++{ ++ struct a4l_device *dev = subd->dev; ++ ++ if (boardtype.reg_type & ni_reg_m_series_mask) ++ return ni_m_series_ao_config_chan_descs(subd, ++ chanspec, ++ n_chans, timed); ++ else ++ return ni_old_ao_config_chan_descs(subd, chanspec, n_chans); ++} ++ ++int ni_ao_insn_read(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn) ++{ ++ struct a4l_device *dev = subd->dev; ++ uint16_t *data = (uint16_t *)insn->data; ++ ++ data[0] = devpriv->ao[CR_CHAN(insn->chan_desc)]; ++ ++ return 0; ++} ++ ++int ni_ao_insn_write(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn) ++{ ++ struct a4l_device *dev = subd->dev; ++ unsigned int chan = CR_CHAN(insn->chan_desc); ++ uint16_t *data = (uint16_t *)insn->data; ++ unsigned int invert; ++ ++ invert = ni_ao_config_chan_descs(subd, ++ &insn->chan_desc, 1, 0); ++ ++ devpriv->ao[chan] = data[0]; ++ ++ if (boardtype.reg_type & ni_reg_m_series_mask) { ++ ni_writew(data[0], M_Offset_DAC_Direct_Data(chan)); ++ } else ++ ni_writew(data[0] ^ invert, ++ (chan) ? DAC1_Direct_Data : DAC0_Direct_Data); ++ ++ return 0; ++} ++ ++int ni_ao_insn_write_671x(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn) ++{ ++ struct a4l_device *dev = subd->dev; ++ unsigned int chan = CR_CHAN(insn->chan_desc); ++ uint16_t *data = (uint16_t *)insn->data; ++ unsigned int invert; ++ ++ ao_win_out(1 << chan, AO_Immediate_671x); ++ invert = 1 << (boardtype.aobits - 1); ++ ++ ni_ao_config_chan_descs(subd, &insn->chan_desc, 1, 0); ++ ++ devpriv->ao[chan] = data[0]; ++ ao_win_out(data[0] ^ invert, DACx_Direct_Data_671x(chan)); ++ ++ return 0; ++} ++ ++int ni_ao_inttrig(struct a4l_subdevice *subd, lsampl_t trignum) ++{ ++ struct a4l_device *dev = subd->dev; ++ int ret, interrupt_b_bits, i; ++ static const int timeout = 1000; ++ ++ if (trignum != 0) ++ return -EINVAL; ++ ++ /* TODO: disable trigger until a command is recorded. ++ Null trig at beginning prevent ao start trigger from executing ++ more than once per command (and doing things like trying to ++ allocate the ao dma channel multiple times) */ ++ ++ ni_set_bits(dev, Interrupt_B_Enable_Register, ++ AO_FIFO_Interrupt_Enable | AO_Error_Interrupt_Enable, 0); ++ interrupt_b_bits = AO_Error_Interrupt_Enable; ++ ++#if (defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE) || \ ++ defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE_MODULE)) ++ devpriv->stc_writew(dev, 1, DAC_FIFO_Clear); ++ if (boardtype.reg_type & ni_reg_6xxx_mask) ++ ni_ao_win_outl(dev, 0x6, AO_FIFO_Offset_Load_611x); ++ ret = ni_ao_setup_MITE_dma(subd); ++ if (ret) ++ return ret; ++ ret = ni_ao_wait_for_dma_load(subd); ++ if (ret < 0) ++ return ret; ++#else /* !CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE */ ++ ret = ni_ao_prep_fifo(subd); ++ if (ret == 0) ++ return -EPIPE; ++ ++ interrupt_b_bits |= AO_FIFO_Interrupt_Enable; ++#endif /* CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE */ ++ ++ devpriv->stc_writew(dev, devpriv->ao_mode3 | AO_Not_An_UPDATE, ++ AO_Mode_3_Register); ++ devpriv->stc_writew(dev, devpriv->ao_mode3, AO_Mode_3_Register); ++ /* wait for DACs to be loaded */ ++ for (i = 0; i < timeout; i++) { ++ a4l_udelay(1); ++ if ((devpriv->stc_readw(dev,Joint_Status_2_Register) & ++ AO_TMRDACWRs_In_Progress_St) == 0) ++ break; ++ } ++ if (i == timeout) { ++ a4l_err(dev, ++ "ni_ao_inttrig: timed out " ++ "waiting for AO_TMRDACWRs_In_Progress_St to clear"); ++ return -EIO; ++ } ++ /* stc manual says we are need to clear error interrupt after ++ AO_TMRDACWRs_In_Progress_St clears */ ++ devpriv->stc_writew(dev, AO_Error_Interrupt_Ack, ++ Interrupt_B_Ack_Register); ++ ++ ni_set_bits(dev, Interrupt_B_Enable_Register, interrupt_b_bits, 1); ++ ++ devpriv->stc_writew(dev, ++ devpriv->ao_cmd1 | ++ AO_UI_Arm | AO_UC_Arm | ++ AO_BC_Arm | AO_DAC1_Update_Mode | ++ AO_DAC0_Update_Mode, ++ AO_Command_1_Register); ++ ++ devpriv->stc_writew(dev, ++ devpriv->ao_cmd2 | AO_START1_Pulse, ++ AO_Command_2_Register); ++ ++ return 0; ++} ++ ++int ni_ao_cmd(struct a4l_subdevice *subd, struct a4l_cmd_desc *cmd) ++{ ++ struct a4l_device *dev = subd->dev; ++ ++ int bits; ++ int i; ++ unsigned trigvar; ++ ++ if (a4l_get_irq(dev) == A4L_IRQ_UNUSED) { ++ a4l_err(dev, "ni_ao_cmd: cannot run command without an irq"); ++ return -EIO; ++ } ++ ++ devpriv->stc_writew(dev, AO_Configuration_Start, Joint_Reset_Register); ++ ++ devpriv->stc_writew(dev, AO_Disarm, AO_Command_1_Register); ++ ++ if (boardtype.reg_type & ni_reg_6xxx_mask) { ++ ao_win_out(CLEAR_WG, AO_Misc_611x); ++ ++ bits = 0; ++ for (i = 0; i < cmd->nb_chan; i++) { ++ int chan; ++ ++ chan = CR_CHAN(cmd->chan_descs[i]); ++ bits |= 1 << chan; ++ ao_win_out(chan, AO_Waveform_Generation_611x); ++ } ++ ao_win_out(bits, AO_Timed_611x); ++ } ++ ++ ni_ao_config_chan_descs(subd, cmd->chan_descs, cmd->nb_chan, 1); ++ ++ if (cmd->stop_src == TRIG_NONE) { ++ devpriv->ao_mode1 |= AO_Continuous; ++ devpriv->ao_mode1 &= ~AO_Trigger_Once; ++ } else { ++ devpriv->ao_mode1 &= ~AO_Continuous; ++ devpriv->ao_mode1 |= AO_Trigger_Once; ++ } ++ devpriv->stc_writew(dev, devpriv->ao_mode1, AO_Mode_1_Register); ++ devpriv->ao_trigger_select &= ++ ~(AO_START1_Polarity | AO_START1_Select(-1)); ++ devpriv->ao_trigger_select |= AO_START1_Edge | AO_START1_Sync; ++ devpriv->stc_writew(dev, devpriv->ao_trigger_select, ++ AO_Trigger_Select_Register); ++ devpriv->ao_mode3 &= ~AO_Trigger_Length; ++ devpriv->stc_writew(dev, devpriv->ao_mode3, AO_Mode_3_Register); ++ ++ devpriv->stc_writew(dev, devpriv->ao_mode1, AO_Mode_1_Register); ++ devpriv->ao_mode2 &= ~AO_BC_Initial_Load_Source; ++ devpriv->stc_writew(dev, devpriv->ao_mode2, AO_Mode_2_Register); ++ if (cmd->stop_src == TRIG_NONE) { ++ devpriv->stc_writel(dev, 0xffffff, AO_BC_Load_A_Register); ++ } else { ++ devpriv->stc_writel(dev, 0, AO_BC_Load_A_Register); ++ } ++ devpriv->stc_writew(dev, AO_BC_Load, AO_Command_1_Register); ++ devpriv->ao_mode2 &= ~AO_UC_Initial_Load_Source; ++ devpriv->stc_writew(dev, devpriv->ao_mode2, AO_Mode_2_Register); ++ switch (cmd->stop_src) { ++ case TRIG_COUNT: ++ devpriv->stc_writel(dev, cmd->stop_arg, AO_UC_Load_A_Register); ++ devpriv->stc_writew(dev, AO_UC_Load, AO_Command_1_Register); ++ devpriv->stc_writel(dev, cmd->stop_arg - 1, ++ AO_UC_Load_A_Register); ++ break; ++ case TRIG_NONE: ++ devpriv->stc_writel(dev, 0xffffff, AO_UC_Load_A_Register); ++ devpriv->stc_writew(dev, AO_UC_Load, AO_Command_1_Register); ++ devpriv->stc_writel(dev, 0xffffff, AO_UC_Load_A_Register); ++ break; ++ default: ++ devpriv->stc_writel(dev, 0, AO_UC_Load_A_Register); ++ devpriv->stc_writew(dev, AO_UC_Load, AO_Command_1_Register); ++ devpriv->stc_writel(dev, cmd->stop_arg, AO_UC_Load_A_Register); ++ } ++ ++ devpriv->ao_mode1 &= ++ ~(AO_UI_Source_Select(0x1f) | AO_UI_Source_Polarity | ++ AO_UPDATE_Source_Select(0x1f) | AO_UPDATE_Source_Polarity); ++ switch (cmd->scan_begin_src) { ++ case TRIG_TIMER: ++ devpriv->ao_cmd2 &= ~AO_BC_Gate_Enable; ++ trigvar = ++ ni_ns_to_timer(dev, cmd->scan_begin_arg, ++ TRIG_ROUND_NEAREST); ++ devpriv->stc_writel(dev, 1, AO_UI_Load_A_Register); ++ devpriv->stc_writew(dev, AO_UI_Load, AO_Command_1_Register); ++ devpriv->stc_writel(dev, trigvar, AO_UI_Load_A_Register); ++ break; ++ case TRIG_EXT: ++ devpriv->ao_mode1 |= ++ AO_UPDATE_Source_Select(cmd->scan_begin_arg); ++ if (cmd->scan_begin_arg & CR_INVERT) ++ devpriv->ao_mode1 |= AO_UPDATE_Source_Polarity; ++ devpriv->ao_cmd2 |= AO_BC_Gate_Enable; ++ break; ++ default: ++ BUG(); ++ break; ++ } ++ devpriv->stc_writew(dev, devpriv->ao_cmd2, AO_Command_2_Register); ++ devpriv->stc_writew(dev, devpriv->ao_mode1, AO_Mode_1_Register); ++ devpriv->ao_mode2 &= ++ ~(AO_UI_Reload_Mode(3) | AO_UI_Initial_Load_Source); ++ devpriv->stc_writew(dev, devpriv->ao_mode2, AO_Mode_2_Register); ++ ++ if ((boardtype.reg_type & ni_reg_6xxx_mask) == 0) { ++ if (cmd->scan_end_arg > 1) { ++ devpriv->ao_mode1 |= AO_Multiple_Channels; ++ devpriv->stc_writew(dev, ++ AO_Number_Of_Channels(cmd->scan_end_arg - 1) | ++ AO_UPDATE_Output_Select ++ (AO_Update_Output_High_Z), ++ AO_Output_Control_Register); ++ } else { ++ unsigned int bits; ++ devpriv->ao_mode1 &= ~AO_Multiple_Channels; ++ bits = AO_UPDATE_Output_Select(AO_Update_Output_High_Z); ++ if (boardtype.reg_type & ni_reg_m_series_mask) { ++ bits |= AO_Number_Of_Channels(0); ++ } else { ++ bits |= AO_Number_Of_Channels(CR_CHAN(cmd-> ++ chan_descs[0])); ++ } ++ devpriv->stc_writew(dev, bits, ++ AO_Output_Control_Register); ++ } ++ devpriv->stc_writew(dev, devpriv->ao_mode1, AO_Mode_1_Register); ++ } ++ ++ devpriv->stc_writew(dev, AO_DAC0_Update_Mode | AO_DAC1_Update_Mode, ++ AO_Command_1_Register); ++ ++ devpriv->ao_mode3 |= AO_Stop_On_Overrun_Error; ++ devpriv->stc_writew(dev, devpriv->ao_mode3, AO_Mode_3_Register); ++ ++ devpriv->ao_mode2 &= ~AO_FIFO_Mode_Mask; ++ ++#if (defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE) || \ ++ defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE_MODULE)) ++ devpriv->ao_mode2 |= AO_FIFO_Mode_HF_to_F; ++#else /* !CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE */ ++ devpriv->ao_mode2 |= AO_FIFO_Mode_HF; ++#endif /* CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE */ ++ devpriv->ao_mode2 &= ~AO_FIFO_Retransmit_Enable; ++ devpriv->stc_writew(dev, devpriv->ao_mode2, AO_Mode_2_Register); ++ ++ bits = AO_BC_Source_Select | AO_UPDATE_Pulse_Width | ++ AO_TMRDACWR_Pulse_Width; ++ if (boardtype.ao_fifo_depth) ++ bits |= AO_FIFO_Enable; ++ else ++ bits |= AO_DMA_PIO_Control; ++#if 0 ++ /* F Hess: windows driver does not set AO_Number_Of_DAC_Packages bit for 6281, ++ verified with bus analyzer. */ ++ if (boardtype.reg_type & ni_reg_m_series_mask) ++ bits |= AO_Number_Of_DAC_Packages; ++#endif ++ devpriv->stc_writew(dev, bits, AO_Personal_Register); ++ /* enable sending of ao dma requests */ ++ devpriv->stc_writew(dev, AO_AOFREQ_Enable, AO_Start_Select_Register); ++ ++ devpriv->stc_writew(dev, AO_Configuration_End, Joint_Reset_Register); ++ ++ if (cmd->stop_src == TRIG_COUNT) { ++ devpriv->stc_writew(dev, AO_BC_TC_Interrupt_Ack, ++ Interrupt_B_Ack_Register); ++ ni_set_bits(dev, Interrupt_B_Enable_Register, ++ AO_BC_TC_Interrupt_Enable, 1); ++ } ++ ++ return 0; ++} ++ ++struct a4l_cmd_desc mio_ao_cmd_mask = { ++ .idx_subd = 0, ++ .start_src = TRIG_INT, ++ .scan_begin_src = TRIG_TIMER | TRIG_EXT, ++ .convert_src = TRIG_NOW, ++ .scan_end_src = TRIG_COUNT, ++ .stop_src = TRIG_COUNT | TRIG_NONE, ++}; ++ ++int ni_ao_cmdtest(struct a4l_subdevice *subd, struct a4l_cmd_desc *cmd) ++{ ++ struct a4l_device *dev = subd->dev; ++ ++ /* Make sure trigger sources are unique and mutually compatible */ ++ ++ if (cmd->stop_src != TRIG_COUNT && cmd->stop_src != TRIG_NONE) ++ return -EINVAL; ++ ++ /* Make sure arguments are trivially compatible */ ++ ++ if (cmd->start_arg != 0) { ++ cmd->start_arg = 0; ++ return -EINVAL; ++ } ++ ++ if (cmd->scan_begin_src == TRIG_TIMER) { ++ if (cmd->scan_begin_arg < boardtype.ao_speed) { ++ cmd->scan_begin_arg = boardtype.ao_speed; ++ return -EINVAL; ++ } ++ if (cmd->scan_begin_arg > devpriv->clock_ns * 0xffffff) { ++ /* XXX check */ ++ cmd->scan_begin_arg = devpriv->clock_ns * 0xffffff; ++ return -EINVAL; ++ } ++ } ++ ++ if (cmd->convert_arg != 0) { ++ cmd->convert_arg = 0; ++ return -EINVAL; ++ } ++ if (cmd->scan_end_arg != cmd->nb_chan) { ++ cmd->scan_end_arg = cmd->nb_chan; ++ return -EINVAL; ++ } ++ if (cmd->stop_src == TRIG_COUNT) { ++ /* XXX check */ ++ if (cmd->stop_arg > 0x00ffffff) { ++ cmd->stop_arg = 0x00ffffff; ++ return -EINVAL; ++ } ++ } else { ++ /* TRIG_NONE */ ++ if (cmd->stop_arg != 0) { ++ cmd->stop_arg = 0; ++ return -EINVAL; ++ } ++ } ++ ++ /* step 4: fix up any arguments */ ++ if (cmd->scan_begin_src == TRIG_TIMER) { ++ ++ if(cmd->scan_begin_arg != ++ ni_timer_to_ns(dev, ++ ni_ns_to_timer(dev, ++ cmd->scan_begin_arg, ++ cmd->flags & TRIG_ROUND_MASK))) ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ ++void ni_ao_reset(struct a4l_subdevice *subd) ++{ ++ struct a4l_device *dev = subd->dev; ++ ++ ni_release_ao_mite_channel(dev); ++ ++ devpriv->stc_writew(dev, AO_Configuration_Start, Joint_Reset_Register); ++ devpriv->stc_writew(dev, AO_Disarm, AO_Command_1_Register); ++ ni_set_bits(dev, Interrupt_B_Enable_Register, ~0, 0); ++ devpriv->stc_writew(dev, AO_BC_Source_Select, AO_Personal_Register); ++ devpriv->stc_writew(dev, 0x3f98, Interrupt_B_Ack_Register); ++ devpriv->stc_writew(dev, AO_BC_Source_Select | AO_UPDATE_Pulse_Width | ++ AO_TMRDACWR_Pulse_Width, AO_Personal_Register); ++ devpriv->stc_writew(dev, 0, AO_Output_Control_Register); ++ devpriv->stc_writew(dev, 0, AO_Start_Select_Register); ++ devpriv->ao_cmd1 = 0; ++ devpriv->stc_writew(dev, devpriv->ao_cmd1, AO_Command_1_Register); ++ devpriv->ao_cmd2 = 0; ++ devpriv->stc_writew(dev, devpriv->ao_cmd2, AO_Command_2_Register); ++ devpriv->ao_mode1 = 0; ++ devpriv->stc_writew(dev, devpriv->ao_mode1, AO_Mode_1_Register); ++ devpriv->ao_mode2 = 0; ++ devpriv->stc_writew(dev, devpriv->ao_mode2, AO_Mode_2_Register); ++ if (boardtype.reg_type & ni_reg_m_series_mask) ++ devpriv->ao_mode3 = AO_Last_Gate_Disable; ++ else ++ devpriv->ao_mode3 = 0; ++ devpriv->stc_writew(dev, devpriv->ao_mode3, AO_Mode_3_Register); ++ devpriv->ao_trigger_select = 0; ++ devpriv->stc_writew(dev, devpriv->ao_trigger_select, ++ AO_Trigger_Select_Register); ++ if (boardtype.reg_type & ni_reg_6xxx_mask) { ++ ao_win_out(0x3, AO_Immediate_671x); ++ ao_win_out(CLEAR_WG, AO_Misc_611x); ++ } ++ devpriv->stc_writew(dev, AO_Configuration_End, Joint_Reset_Register); ++} ++ ++/* digital io */ ++ ++int ni_dio_insn_config(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn) ++{ ++ struct a4l_device *dev = subd->dev; ++ unsigned int *data = (unsigned int *)insn->data; ++ ++#ifdef CONFIG_DEBUG_DIO ++ a4l_info(dev, "chan=%d io=%d\n", CR_CHAN(insn->chan_desc), data[0]); ++#endif /* CONFIG_DEBUG_DIO */ ++ ++ switch (data[0]) { ++ case A4L_INSN_CONFIG_DIO_OUTPUT: ++ devpriv->io_bits |= 1 << CR_CHAN(insn->chan_desc); ++ break; ++ case A4L_INSN_CONFIG_DIO_INPUT: ++ devpriv->io_bits &= ~(1 << CR_CHAN(insn->chan_desc)); ++ break; ++ case A4L_INSN_CONFIG_DIO_QUERY: ++ data[1] = (devpriv->io_bits & ++ (1 << CR_CHAN(insn->chan_desc))) ? ++ A4L_OUTPUT : A4L_INPUT; ++ return 0; ++ break; ++ default: ++ return -EINVAL; ++ } ++ ++ devpriv->dio_control &= ~DIO_Pins_Dir_Mask; ++ devpriv->dio_control |= DIO_Pins_Dir(devpriv->io_bits); ++ devpriv->stc_writew(dev, devpriv->dio_control, DIO_Control_Register); ++ ++ return 1; ++} ++ ++int ni_dio_insn_bits(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn) ++{ ++ struct a4l_device *dev = subd->dev; ++ uint8_t *data = (uint8_t *)insn->data; ++ ++#ifdef CONFIG_DEBUG_DIO ++ a4l_info(dev, "mask=0x%x bits=0x%x\n", data[0], data[1]); ++#endif ++ ++ if (insn->data_size != 2 * sizeof(uint8_t)) ++ return -EINVAL; ++ ++ if (data[0]) { ++ /* Perform check to make sure we're not using the ++ serial part of the dio */ ++ if ((data[0] & (DIO_SDIN | DIO_SDOUT)) ++ && devpriv->serial_interval_ns) ++ return -EBUSY; ++ ++ devpriv->dio_state &= ~data[0]; ++ devpriv->dio_state |= (data[0] & data[1]); ++ devpriv->dio_output &= ~DIO_Parallel_Data_Mask; ++ devpriv->dio_output |= ++ DIO_Parallel_Data_Out(devpriv->dio_state); ++ devpriv->stc_writew(dev, devpriv->dio_output, ++ DIO_Output_Register); ++ } ++ ++ data[1] = (uint8_t) ++ devpriv->stc_readw(dev, DIO_Parallel_Input_Register); ++ ++ return 0; ++} ++ ++int ni_m_series_dio_insn_config(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn) ++{ ++ struct a4l_device *dev = subd->dev; ++ unsigned int *data = (unsigned int *)insn->data; ++ ++#ifdef CONFIG_DEBUG_DIO ++ a4l_info(dev, "chan=%d io=%d\n", CR_CHAN(insn->chan_desc), data[0]); ++#endif ++ switch (data[0]) { ++ case A4L_INSN_CONFIG_DIO_OUTPUT: ++ devpriv->io_bits |= 1 << CR_CHAN(insn->chan_desc); ++ break; ++ case A4L_INSN_CONFIG_DIO_INPUT: ++ devpriv->io_bits &= ~(1 << CR_CHAN(insn->chan_desc)); ++ break; ++ case A4L_INSN_CONFIG_DIO_QUERY: ++ data[1] = (devpriv->io_bits & ++ (1 << CR_CHAN(insn->chan_desc))) ? ++ A4L_OUTPUT : A4L_INPUT; ++ return 0; ++ break; ++ default: ++ return -EINVAL; ++ } ++ ++ ni_writel(devpriv->io_bits, M_Offset_DIO_Direction); ++ ++ return 0; ++} ++ ++int ni_m_series_dio_insn_bits_8(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn) ++{ ++ struct a4l_device *dev = subd->dev; ++ uint8_t *data = (uint8_t *)insn->data; ++ ++#ifdef CONFIG_DEBUG_DIO ++ a4l_info(dev, "mask=0x%x bits=0x%x\n", data[0], data[1]); ++#endif ++ ++ if (insn->data_size != 2 * sizeof(uint8_t)) ++ return -EINVAL; ++ ++ if (data[0]) { ++ devpriv->dio_state &= ~data[0]; ++ devpriv->dio_state |= (data[0] & data[1]); ++ ni_writel(devpriv->dio_state, M_Offset_Static_Digital_Output); ++ } ++ ++ data[1] = (uint8_t) ni_readl(M_Offset_Static_Digital_Input); ++ ++ return 0; ++} ++ ++int ni_m_series_dio_insn_bits_32(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn) ++{ ++ struct a4l_device *dev = subd->dev; ++ uint32_t *data = (uint32_t *)insn->data; ++ ++#ifdef CONFIG_DEBUG_DIO ++ a4l_info(dev, "mask=0x%x bits=0x%x\n", data[0], data[1]); ++#endif ++ ++ if (insn->data_size != 2 * sizeof(uint32_t)) ++ return -EINVAL; ++ ++ if (data[0]) { ++ devpriv->dio_state &= ~data[0]; ++ devpriv->dio_state |= (data[0] & data[1]); ++ ni_writel(devpriv->dio_state, M_Offset_Static_Digital_Output); ++ } ++ ++ data[1] = ni_readl(M_Offset_Static_Digital_Input); ++ ++ return 0; ++} ++ ++#if (defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE) || \ ++ defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE_MODULE)) ++ ++struct a4l_cmd_desc mio_dio_cmd_mask = { ++ .idx_subd = 0, ++ .start_src = TRIG_INT, ++ .scan_begin_src = TRIG_EXT, ++ .convert_src = TRIG_NOW, ++ .scan_end_src = TRIG_COUNT, ++ .stop_src = TRIG_NONE, ++}; ++ ++int ni_cdio_cmdtest(struct a4l_subdevice *subd, struct a4l_cmd_desc *cmd) ++{ ++ unsigned int i; ++ ++ /* Make sure arguments are trivially compatible */ ++ ++ if (cmd->start_arg != 0) { ++ cmd->start_arg = 0; ++ return -EINVAL; ++ } ++ ++ if ((cmd->scan_begin_arg & ++ PACK_FLAGS(CDO_Sample_Source_Select_Mask, 0, 0, CR_INVERT)) != ++ cmd->scan_begin_arg) ++ return -EINVAL; ++ ++ if (cmd->convert_arg != 0) { ++ cmd->convert_arg = 0; ++ return -EINVAL; ++ } ++ ++ if (cmd->scan_end_arg != cmd->nb_chan) { ++ cmd->scan_end_arg = cmd->nb_chan; ++ return -EINVAL; ++ } ++ ++ if (cmd->stop_arg != 0) { ++ cmd->stop_arg = 0; ++ return -EINVAL; ++ } ++ ++ /* Check chan_descs */ ++ ++ for (i = 0; i < cmd->nb_chan; ++i) { ++ if (cmd->chan_descs[i] != i) ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ ++int ni_cdio_cmd(struct a4l_subdevice *subd, struct a4l_cmd_desc *cmd) ++{ ++ struct a4l_device *dev = subd->dev; ++ unsigned cdo_mode_bits = CDO_FIFO_Mode_Bit | CDO_Halt_On_Error_Bit; ++ ++ ni_writel(CDO_Reset_Bit, M_Offset_CDIO_Command); ++ switch (cmd->scan_begin_src) { ++ case TRIG_EXT: ++ cdo_mode_bits |= ++ CR_CHAN(cmd->scan_begin_arg) & ++ CDO_Sample_Source_Select_Mask; ++ break; ++ default: ++ BUG(); ++ break; ++ } ++ if (cmd->scan_begin_arg & CR_INVERT) ++ cdo_mode_bits |= CDO_Polarity_Bit; ++ ni_writel(cdo_mode_bits, M_Offset_CDO_Mode); ++ ++ if (devpriv->io_bits) { ++ ni_writel(devpriv->dio_state, M_Offset_CDO_FIFO_Data); ++ ni_writel(CDO_SW_Update_Bit, M_Offset_CDIO_Command); ++ ni_writel(devpriv->io_bits, M_Offset_CDO_Mask_Enable); ++ } else { ++ a4l_err(dev, ++ "ni_cdio_cmd: attempted to run digital " ++ "output command with no lines configured as outputs"); ++ return -EIO; ++ } ++ ++ return 0; ++} ++ ++void ni_cdio_cancel(struct a4l_subdevice *subd) ++{ ++ struct a4l_device *dev = subd->dev; ++ ni_writel(CDO_Disarm_Bit | CDO_Error_Interrupt_Enable_Clear_Bit | ++ CDO_Empty_FIFO_Interrupt_Enable_Clear_Bit | ++ CDO_FIFO_Request_Interrupt_Enable_Clear_Bit, ++ M_Offset_CDIO_Command); ++ ++ ni_writel(0, M_Offset_CDO_Mask_Enable); ++ ni_release_cdo_mite_channel(dev); ++} ++ ++int ni_cdo_inttrig(struct a4l_subdevice *subd, lsampl_t trignum) ++{ ++ struct a4l_device *dev = subd->dev; ++ int err; ++ unsigned i; ++ const unsigned timeout = 1000; ++ ++ /* TODO: disable trigger until a command is recorded. ++ Null trig at beginning prevent ao start trigger from executing ++ more than once per command (and doing things like trying to ++ allocate the ao dma channel multiple times) */ ++ ++ err = ni_cdo_setup_MITE_dma(subd); ++ if (err < 0) ++ return err; ++ ++ /* wait for dma to fill output fifo */ ++ for (i = 0; i < timeout; ++i) { ++ if (ni_readl(M_Offset_CDIO_Status) & CDO_FIFO_Full_Bit) ++ break; ++ a4l_udelay(10); ++ } ++ ++ if (i == timeout) { ++ a4l_err(dev, "ni_cdo_inttrig: dma failed to fill cdo fifo!"); ++ ni_cdio_cancel(subd); ++ return -EIO; ++ } ++ ++ ni_writel(CDO_Arm_Bit | ++ CDO_Error_Interrupt_Enable_Set_Bit | ++ CDO_Empty_FIFO_Interrupt_Enable_Set_Bit, ++ M_Offset_CDIO_Command); ++ ++ return 0; ++} ++ ++#endif /* CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE */ ++ ++static void handle_cdio_interrupt(struct a4l_device *dev) ++{ ++#if (defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE) || \ ++ defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE_MODULE)) ++ unsigned cdio_status; ++ unsigned long flags; ++ struct a4l_subdevice *subd = a4l_get_subd(dev, NI_DIO_SUBDEV); ++ ++ if ((boardtype.reg_type & ni_reg_m_series_mask) == 0) { ++ return; ++ } ++ rtdm_lock_get_irqsave(&devpriv->mite_channel_lock, flags); ++ if (devpriv->cdo_mite_chan) { ++ unsigned cdo_mite_status = ++ a4l_mite_get_status(devpriv->cdo_mite_chan); ++ if (cdo_mite_status & CHSR_LINKC) { ++ writel(CHOR_CLRLC, ++ devpriv->mite->mite_io_addr + ++ MITE_CHOR(devpriv->cdo_mite_chan->channel)); ++ } ++ a4l_mite_sync_output_dma(devpriv->cdo_mite_chan, subd); ++ } ++ rtdm_lock_put_irqrestore(&devpriv->mite_channel_lock, flags); ++ ++ cdio_status = ni_readl(M_Offset_CDIO_Status); ++ if (cdio_status & (CDO_Overrun_Bit | CDO_Underflow_Bit)) { ++ /* XXX just guessing this is needed and does something useful */ ++ ni_writel(CDO_Error_Interrupt_Confirm_Bit, M_Offset_CDIO_Command); ++ a4l_buf_evt(subd, A4L_BUF_ERROR); ++ } ++ if (cdio_status & CDO_FIFO_Empty_Bit) { ++ ni_writel(CDO_Empty_FIFO_Interrupt_Enable_Clear_Bit, ++ M_Offset_CDIO_Command); ++ } ++ a4l_buf_evt(subd, 0); ++#endif /* CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE */ ++} ++ ++static int ni_serial_hw_readwrite8(struct a4l_device * dev, ++ unsigned char data_out, unsigned char *data_in) ++{ ++ unsigned int status1; ++ int err = 0, count = 20; ++ ++#ifdef CONFIG_DEBUG_DIO ++ a4l_info(dev, "outputting 0x%x\n", data_out); ++#endif ++ ++ devpriv->dio_output &= ~DIO_Serial_Data_Mask; ++ devpriv->dio_output |= DIO_Serial_Data_Out(data_out); ++ devpriv->stc_writew(dev, devpriv->dio_output, DIO_Output_Register); ++ ++ status1 = devpriv->stc_readw(dev, Joint_Status_1_Register); ++ if (status1 & DIO_Serial_IO_In_Progress_St) { ++ err = -EBUSY; ++ goto Error; ++ } ++ ++ devpriv->dio_control |= DIO_HW_Serial_Start; ++ devpriv->stc_writew(dev, devpriv->dio_control, DIO_Control_Register); ++ devpriv->dio_control &= ~DIO_HW_Serial_Start; ++ ++ /* Wait until STC says we're done, but don't loop infinitely. */ ++ while ((status1 = ++ devpriv->stc_readw(dev, ++ Joint_Status_1_Register)) & ++ DIO_Serial_IO_In_Progress_St) { ++ /* Delay one bit per loop */ ++ a4l_udelay((devpriv->serial_interval_ns + 999) / 1000); ++ if (--count < 0) { ++ a4l_err(dev, ++ "ni_serial_hw_readwrite8: " ++ "SPI serial I/O didn't finish in time!\n"); ++ err = -ETIME; ++ goto Error; ++ } ++ } ++ ++ /* Delay for last bit. This delay is absolutely necessary, because ++ DIO_Serial_IO_In_Progress_St goes high one bit too early. */ ++ a4l_udelay((devpriv->serial_interval_ns + 999) / 1000); ++ ++ if (data_in != NULL) { ++ *data_in = devpriv->stc_readw(dev, DIO_Serial_Input_Register); ++#ifdef CONFIG_DEBUG_DIO ++ a4l_info(dev, "inputted 0x%x\n", *data_in); ++#endif ++ } ++ ++Error: ++ devpriv->stc_writew(dev, devpriv->dio_control, DIO_Control_Register); ++ ++ return err; ++} ++ ++static int ni_serial_sw_readwrite8(struct a4l_device * dev, ++ unsigned char data_out, unsigned char *data_in) ++{ ++ unsigned char mask, input = 0; ++ ++#ifdef CONFIG_DEBUG_DIO ++ a4l_info(dev, "outputting 0x%x\n", data_out); ++#endif ++ ++ /* Wait for one bit before transfer */ ++ a4l_udelay((devpriv->serial_interval_ns + 999) / 1000); ++ ++ for (mask = 0x80; mask; mask >>= 1) { ++ /* Output current bit; note that we cannot touch devpriv->dio_state ++ because it is a per-subdevice field, and serial is ++ a separate subdevice from DIO. */ ++ devpriv->dio_output &= ~DIO_SDOUT; ++ if (data_out & mask) { ++ devpriv->dio_output |= DIO_SDOUT; ++ } ++ devpriv->stc_writew(dev, devpriv->dio_output, ++ DIO_Output_Register); ++ ++ /* Assert SDCLK (active low, inverted), wait for half of ++ the delay, deassert SDCLK, and wait for the other half. */ ++ devpriv->dio_control |= DIO_Software_Serial_Control; ++ devpriv->stc_writew(dev, devpriv->dio_control, ++ DIO_Control_Register); ++ ++ a4l_udelay((devpriv->serial_interval_ns + 999) / 2000); ++ ++ devpriv->dio_control &= ~DIO_Software_Serial_Control; ++ devpriv->stc_writew(dev, devpriv->dio_control, ++ DIO_Control_Register); ++ ++ a4l_udelay((devpriv->serial_interval_ns + 999) / 2000); ++ ++ /* Input current bit */ ++ if (devpriv->stc_readw(dev, ++ DIO_Parallel_Input_Register) & DIO_SDIN) { ++ input |= mask; ++ } ++ } ++#ifdef CONFIG_DEBUG_DIO ++ a4l_info(dev, "inputted 0x%x\n", input); ++#endif ++ if (data_in) ++ *data_in = input; ++ ++ return 0; ++} ++ ++int ni_serial_insn_config(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn) ++{ ++ struct a4l_device *dev = subd->dev; ++ int err = 0; ++ unsigned char byte_out, byte_in = 0; ++ unsigned int *data = (unsigned int *)insn->data; ++ ++ if (insn->data_size != 2 * sizeof(unsigned int)) ++ return -EINVAL; ++ ++ switch (data[0]) { ++ case A4L_INSN_CONFIG_SERIAL_CLOCK: ++ ++#ifdef CONFIG_DEBUG_DIO ++ a4l_info(dev, "SPI serial clock Config %d\n", data[1]); ++#endif ++ ++ devpriv->serial_hw_mode = 1; ++ devpriv->dio_control |= DIO_HW_Serial_Enable; ++ ++ if (data[1] == SERIAL_DISABLED) { ++ devpriv->serial_hw_mode = 0; ++ devpriv->dio_control &= ~(DIO_HW_Serial_Enable | ++ DIO_Software_Serial_Control); ++ data[1] = SERIAL_DISABLED; ++ devpriv->serial_interval_ns = data[1]; ++ } else if (data[1] <= SERIAL_600NS) { ++ /* Warning: this clock speed is too fast to reliably ++ control SCXI. */ ++ devpriv->dio_control &= ~DIO_HW_Serial_Timebase; ++ devpriv->clock_and_fout |= Slow_Internal_Timebase; ++ devpriv->clock_and_fout &= ~DIO_Serial_Out_Divide_By_2; ++ data[1] = SERIAL_600NS; ++ devpriv->serial_interval_ns = data[1]; ++ } else if (data[1] <= SERIAL_1_2US) { ++ devpriv->dio_control &= ~DIO_HW_Serial_Timebase; ++ devpriv->clock_and_fout |= Slow_Internal_Timebase | ++ DIO_Serial_Out_Divide_By_2; ++ data[1] = SERIAL_1_2US; ++ devpriv->serial_interval_ns = data[1]; ++ } else if (data[1] <= SERIAL_10US) { ++ devpriv->dio_control |= DIO_HW_Serial_Timebase; ++ devpriv->clock_and_fout |= Slow_Internal_Timebase | ++ DIO_Serial_Out_Divide_By_2; ++ /* Note: DIO_Serial_Out_Divide_By_2 only affects ++ 600ns/1.2us. If you turn divide_by_2 off with the ++ slow clock, you will still get 10us, except then ++ all your delays are wrong. */ ++ data[1] = SERIAL_10US; ++ devpriv->serial_interval_ns = data[1]; ++ } else { ++ devpriv->dio_control &= ~(DIO_HW_Serial_Enable | ++ DIO_Software_Serial_Control); ++ devpriv->serial_hw_mode = 0; ++ data[1] = (data[1] / 1000) * 1000; ++ devpriv->serial_interval_ns = data[1]; ++ } ++ ++ devpriv->stc_writew(dev, devpriv->dio_control, ++ DIO_Control_Register); ++ devpriv->stc_writew(dev, devpriv->clock_and_fout, ++ Clock_and_FOUT_Register); ++ return 0; ++ ++ break; ++ ++ case A4L_INSN_CONFIG_BIDIRECTIONAL_DATA: ++ ++ if (devpriv->serial_interval_ns == 0) { ++ return -EINVAL; ++ } ++ ++ byte_out = data[1] & 0xFF; ++ ++ if (devpriv->serial_hw_mode) { ++ err = ni_serial_hw_readwrite8(dev, byte_out, &byte_in); ++ } else if (devpriv->serial_interval_ns > 0) { ++ err = ni_serial_sw_readwrite8(dev, byte_out, &byte_in); ++ } else { ++ a4l_err(dev, ++ "ni_serial_insn_config: serial disabled!\n"); ++ return -EINVAL; ++ } ++ if (err < 0) ++ return err; ++ data[1] = byte_in & 0xFF; ++ return 0; ++ ++ break; ++ default: ++ return -EINVAL; ++ } ++ ++ return -EINVAL; ++} ++ ++void mio_common_detach(struct a4l_device * dev) ++{ ++ if (dev->priv) { ++ if (devpriv->counter_dev) { ++ a4l_ni_gpct_device_destroy(devpriv->counter_dev); ++ } ++ } ++} ++ ++static void init_ao_67xx(struct a4l_device * dev) ++{ ++ struct a4l_subdevice *subd = a4l_get_subd(dev, NI_AO_SUBDEV); ++ int i; ++ ++ if (subd == NULL) { ++ a4l_err(dev, "%s: unable to find AO subdevice\n", __FUNCTION__); ++ return; ++ } ++ ++ for (i = 0; i < subd->chan_desc->length; i++) ++ ni_ao_win_outw(dev, AO_Channel(i) | 0x0, ++ AO_Configuration_2_67xx); ++} ++ ++static unsigned int ni_gpct_to_stc_register(enum ni_gpct_register reg) ++{ ++ unsigned stc_register; ++ switch (reg) { ++ case NITIO_G0_Autoincrement_Reg: ++ stc_register = G_Autoincrement_Register(0); ++ break; ++ case NITIO_G1_Autoincrement_Reg: ++ stc_register = G_Autoincrement_Register(1); ++ break; ++ case NITIO_G0_Command_Reg: ++ stc_register = G_Command_Register(0); ++ break; ++ case NITIO_G1_Command_Reg: ++ stc_register = G_Command_Register(1); ++ break; ++ case NITIO_G0_HW_Save_Reg: ++ stc_register = G_HW_Save_Register(0); ++ break; ++ case NITIO_G1_HW_Save_Reg: ++ stc_register = G_HW_Save_Register(1); ++ break; ++ case NITIO_G0_SW_Save_Reg: ++ stc_register = G_Save_Register(0); ++ break; ++ case NITIO_G1_SW_Save_Reg: ++ stc_register = G_Save_Register(1); ++ break; ++ case NITIO_G0_Mode_Reg: ++ stc_register = G_Mode_Register(0); ++ break; ++ case NITIO_G1_Mode_Reg: ++ stc_register = G_Mode_Register(1); ++ break; ++ case NITIO_G0_LoadA_Reg: ++ stc_register = G_Load_A_Register(0); ++ break; ++ case NITIO_G1_LoadA_Reg: ++ stc_register = G_Load_A_Register(1); ++ break; ++ case NITIO_G0_LoadB_Reg: ++ stc_register = G_Load_B_Register(0); ++ break; ++ case NITIO_G1_LoadB_Reg: ++ stc_register = G_Load_B_Register(1); ++ break; ++ case NITIO_G0_Input_Select_Reg: ++ stc_register = G_Input_Select_Register(0); ++ break; ++ case NITIO_G1_Input_Select_Reg: ++ stc_register = G_Input_Select_Register(1); ++ break; ++ case NITIO_G01_Status_Reg: ++ stc_register = G_Status_Register; ++ break; ++ case NITIO_G01_Joint_Reset_Reg: ++ stc_register = Joint_Reset_Register; ++ break; ++ case NITIO_G01_Joint_Status1_Reg: ++ stc_register = Joint_Status_1_Register; ++ break; ++ case NITIO_G01_Joint_Status2_Reg: ++ stc_register = Joint_Status_2_Register; ++ break; ++ case NITIO_G0_Interrupt_Acknowledge_Reg: ++ stc_register = Interrupt_A_Ack_Register; ++ break; ++ case NITIO_G1_Interrupt_Acknowledge_Reg: ++ stc_register = Interrupt_B_Ack_Register; ++ break; ++ case NITIO_G0_Status_Reg: ++ stc_register = AI_Status_1_Register; ++ break; ++ case NITIO_G1_Status_Reg: ++ stc_register = AO_Status_1_Register; ++ break; ++ case NITIO_G0_Interrupt_Enable_Reg: ++ stc_register = Interrupt_A_Enable_Register; ++ break; ++ case NITIO_G1_Interrupt_Enable_Reg: ++ stc_register = Interrupt_B_Enable_Register; ++ break; ++ default: ++ __a4l_err("%s: unhandled register 0x%x in switch.\n", ++ __FUNCTION__, reg); ++ BUG(); ++ return 0; ++ break; ++ } ++ return stc_register; ++} ++ ++static void ni_gpct_write_register(struct ni_gpct *counter, ++ unsigned int bits, enum ni_gpct_register reg) ++{ ++ struct a4l_device *dev = counter->counter_dev->dev; ++ unsigned stc_register; ++ /* bits in the join reset register which are relevant to counters */ ++ static const unsigned gpct_joint_reset_mask = G0_Reset | G1_Reset; ++ static const unsigned gpct_interrupt_a_enable_mask = ++ G0_Gate_Interrupt_Enable | G0_TC_Interrupt_Enable; ++ static const unsigned gpct_interrupt_b_enable_mask = ++ G1_Gate_Interrupt_Enable | G1_TC_Interrupt_Enable; ++ ++ switch (reg) { ++ /* m-series-only registers */ ++ case NITIO_G0_Counting_Mode_Reg: ++ ni_writew(bits, M_Offset_G0_Counting_Mode); ++ break; ++ case NITIO_G1_Counting_Mode_Reg: ++ ni_writew(bits, M_Offset_G1_Counting_Mode); ++ break; ++ case NITIO_G0_Second_Gate_Reg: ++ ni_writew(bits, M_Offset_G0_Second_Gate); ++ break; ++ case NITIO_G1_Second_Gate_Reg: ++ ni_writew(bits, M_Offset_G1_Second_Gate); ++ break; ++ case NITIO_G0_DMA_Config_Reg: ++ ni_writew(bits, M_Offset_G0_DMA_Config); ++ break; ++ case NITIO_G1_DMA_Config_Reg: ++ ni_writew(bits, M_Offset_G1_DMA_Config); ++ break; ++ case NITIO_G0_ABZ_Reg: ++ ni_writew(bits, M_Offset_G0_MSeries_ABZ); ++ break; ++ case NITIO_G1_ABZ_Reg: ++ ni_writew(bits, M_Offset_G1_MSeries_ABZ); ++ break; ++ ++ /* 32 bit registers */ ++ case NITIO_G0_LoadA_Reg: ++ case NITIO_G1_LoadA_Reg: ++ case NITIO_G0_LoadB_Reg: ++ case NITIO_G1_LoadB_Reg: ++ stc_register = ni_gpct_to_stc_register(reg); ++ devpriv->stc_writel(dev, bits, stc_register); ++ break; ++ ++ /* 16 bit registers */ ++ case NITIO_G0_Interrupt_Enable_Reg: ++ BUG_ON(bits & ~gpct_interrupt_a_enable_mask); ++ ni_set_bitfield(dev, Interrupt_A_Enable_Register, ++ gpct_interrupt_a_enable_mask, bits); ++ break; ++ case NITIO_G1_Interrupt_Enable_Reg: ++ BUG_ON(bits & ~gpct_interrupt_b_enable_mask); ++ ni_set_bitfield(dev, Interrupt_B_Enable_Register, ++ gpct_interrupt_b_enable_mask, bits); ++ break; ++ case NITIO_G01_Joint_Reset_Reg: ++ BUG_ON(bits & ~gpct_joint_reset_mask); ++ /* fall-through */ ++ default: ++ stc_register = ni_gpct_to_stc_register(reg); ++ devpriv->stc_writew(dev, bits, stc_register); ++ } ++} ++ ++static unsigned int ni_gpct_read_register(struct ni_gpct *counter, ++ enum ni_gpct_register reg) ++{ ++ struct a4l_device *dev = counter->counter_dev->dev; ++ unsigned int stc_register; ++ switch (reg) { ++ /* m-series only registers */ ++ case NITIO_G0_DMA_Status_Reg: ++ return ni_readw(M_Offset_G0_DMA_Status); ++ break; ++ case NITIO_G1_DMA_Status_Reg: ++ return ni_readw(M_Offset_G1_DMA_Status); ++ break; ++ ++ /* 32 bit registers */ ++ case NITIO_G0_HW_Save_Reg: ++ case NITIO_G1_HW_Save_Reg: ++ case NITIO_G0_SW_Save_Reg: ++ case NITIO_G1_SW_Save_Reg: ++ stc_register = ni_gpct_to_stc_register(reg); ++ return devpriv->stc_readl(dev, stc_register); ++ break; ++ ++ /* 16 bit registers */ ++ default: ++ stc_register = ni_gpct_to_stc_register(reg); ++ return devpriv->stc_readw(dev, stc_register); ++ break; ++ } ++ return 0; ++} ++ ++int ni_freq_out_insn_read(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn) ++{ ++ struct a4l_device *dev = subd->dev; ++ uint8_t *data = (uint8_t *)insn->data; ++ ++ data[0] = FOUT_Divider(devpriv->clock_and_fout); ++ ++ return 0; ++} ++ ++int ni_freq_out_insn_write(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn) ++{ ++ struct a4l_device *dev = subd->dev; ++ uint8_t *data = (uint8_t *)insn->data; ++ ++ devpriv->clock_and_fout &= ~FOUT_Enable; ++ devpriv->stc_writew(dev, devpriv->clock_and_fout, ++ Clock_and_FOUT_Register); ++ devpriv->clock_and_fout &= ~FOUT_Divider_mask; ++ devpriv->clock_and_fout |= FOUT_Divider(data[0]); ++ devpriv->clock_and_fout |= FOUT_Enable; ++ devpriv->stc_writew(dev, devpriv->clock_and_fout, ++ Clock_and_FOUT_Register); ++ ++ return 0; ++} ++ ++static int ni_set_freq_out_clock(struct a4l_device * dev, lsampl_t clock_source) ++{ ++ switch (clock_source) { ++ case NI_FREQ_OUT_TIMEBASE_1_DIV_2_CLOCK_SRC: ++ devpriv->clock_and_fout &= ~FOUT_Timebase_Select; ++ break; ++ case NI_FREQ_OUT_TIMEBASE_2_CLOCK_SRC: ++ devpriv->clock_and_fout |= FOUT_Timebase_Select; ++ break; ++ default: ++ return -EINVAL; ++ } ++ devpriv->stc_writew(dev, devpriv->clock_and_fout, ++ Clock_and_FOUT_Register); ++ ++ return 0; ++} ++ ++static void ni_get_freq_out_clock(struct a4l_device * dev, ++ unsigned int * clock_source, ++ unsigned int * clock_period_ns) ++{ ++ if (devpriv->clock_and_fout & FOUT_Timebase_Select) { ++ *clock_source = NI_FREQ_OUT_TIMEBASE_2_CLOCK_SRC; ++ *clock_period_ns = TIMEBASE_2_NS; ++ } else { ++ *clock_source = NI_FREQ_OUT_TIMEBASE_1_DIV_2_CLOCK_SRC; ++ *clock_period_ns = TIMEBASE_1_NS * 2; ++ } ++} ++ ++int ni_freq_out_insn_config(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn) ++{ ++ struct a4l_device *dev = subd->dev; ++ unsigned int *data = (unsigned int *)insn->data; ++ ++ switch (data[0]) { ++ case A4L_INSN_CONFIG_SET_CLOCK_SRC: ++ return ni_set_freq_out_clock(dev, data[1]); ++ break; ++ case A4L_INSN_CONFIG_GET_CLOCK_SRC: ++ ni_get_freq_out_clock(dev, &data[1], &data[2]); ++ return 0; ++ default: ++ break; ++ } ++ ++ return -EINVAL; ++} ++ ++static int ni_8255_callback(int dir, int port, int data, unsigned long arg) ++{ ++ struct a4l_device *dev = (struct a4l_device *) arg; ++ ++ if (dir) { ++ ni_writeb(data, Port_A + 2 * port); ++ return 0; ++ } else { ++ return ni_readb(Port_A + 2 * port); ++ } ++} ++ ++/* ++ reads bytes out of eeprom ++*/ ++ ++static int ni_read_eeprom(struct a4l_device *dev, int addr) ++{ ++ int bit; ++ int bitstring; ++ ++ bitstring = 0x0300 | ((addr & 0x100) << 3) | (addr & 0xff); ++ ni_writeb(0x04, Serial_Command); ++ for (bit = 0x8000; bit; bit >>= 1) { ++ ni_writeb(0x04 | ((bit & bitstring) ? 0x02 : 0), ++ Serial_Command); ++ ni_writeb(0x05 | ((bit & bitstring) ? 0x02 : 0), ++ Serial_Command); ++ } ++ bitstring = 0; ++ for (bit = 0x80; bit; bit >>= 1) { ++ ni_writeb(0x04, Serial_Command); ++ ni_writeb(0x05, Serial_Command); ++ bitstring |= ((ni_readb(XXX_Status) & PROMOUT) ? bit : 0); ++ } ++ ni_writeb(0x00, Serial_Command); ++ ++ return bitstring; ++} ++ ++/* ++ presents the EEPROM as a subdevice ++*/ ++ ++static int ni_eeprom_insn_read(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn) ++{ ++ struct a4l_device *dev = subd->dev; ++ uint8_t *data = (uint8_t *)insn->data; ++ ++ data[0] = ni_read_eeprom(dev, CR_CHAN(insn->chan_desc)); ++ ++ return 0; ++} ++ ++ ++static int ni_m_series_eeprom_insn_read(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn) ++{ ++ struct a4l_device *dev = subd->dev; ++ uint8_t *data = (uint8_t *)insn->data; ++ ++ data[0] = devpriv->eeprom_buffer[CR_CHAN(insn->chan_desc)]; ++ ++ return 0; ++} ++ ++static int ni_get_pwm_config(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn) ++{ ++ struct a4l_device *dev = subd->dev; ++ unsigned int *data = (unsigned int*)insn->data; ++ ++ data[1] = devpriv->pwm_up_count * devpriv->clock_ns; ++ data[2] = devpriv->pwm_down_count * devpriv->clock_ns; ++ ++ return 0; ++} ++ ++static int ni_m_series_pwm_config(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn) ++{ ++ struct a4l_device *dev = subd->dev; ++ unsigned int up_count, down_count; ++ unsigned int *data = (unsigned int*)insn->data; ++ ++ switch (data[0]) { ++ case A4L_INSN_CONFIG_PWM_OUTPUT: ++ switch (data[1]) { ++ case TRIG_ROUND_NEAREST: ++ up_count = ++ (data[2] + ++ devpriv->clock_ns / 2) / devpriv->clock_ns; ++ break; ++ case TRIG_ROUND_DOWN: ++ up_count = data[2] / devpriv->clock_ns; ++ break; ++ case TRIG_ROUND_UP: ++ up_count =(data[2] + devpriv->clock_ns - 1) / ++ devpriv->clock_ns; ++ break; ++ default: ++ return -EINVAL; ++ break; ++ } ++ switch (data[3]) { ++ case TRIG_ROUND_NEAREST: ++ down_count = (data[4] + devpriv->clock_ns / 2) / ++ devpriv->clock_ns; ++ break; ++ case TRIG_ROUND_DOWN: ++ down_count = data[4] / devpriv->clock_ns; ++ break; ++ case TRIG_ROUND_UP: ++ down_count = ++ (data[4] + devpriv->clock_ns - 1) / ++ devpriv->clock_ns; ++ break; ++ default: ++ return -EINVAL; ++ break; ++ } ++ if (up_count * devpriv->clock_ns != data[2] || ++ down_count * devpriv->clock_ns != data[4]) { ++ data[2] = up_count * devpriv->clock_ns; ++ data[4] = down_count * devpriv->clock_ns; ++ return -EAGAIN; ++ } ++ ni_writel(MSeries_Cal_PWM_High_Time_Bits(up_count) | ++ MSeries_Cal_PWM_Low_Time_Bits(down_count), ++ M_Offset_Cal_PWM); ++ devpriv->pwm_up_count = up_count; ++ devpriv->pwm_down_count = down_count; ++ return 0; ++ break; ++ case A4L_INSN_CONFIG_GET_PWM_OUTPUT: ++ return ni_get_pwm_config(subd, insn); ++ break; ++ default: ++ return -EINVAL; ++ break; ++ } ++ return 0; ++} ++ ++static int ni_6143_pwm_config(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn) ++{ ++ struct a4l_device *dev = subd->dev; ++ unsigned int *data = (unsigned int*)insn->data; ++ ++ unsigned up_count, down_count; ++ switch (data[0]) { ++ case A4L_INSN_CONFIG_PWM_OUTPUT: ++ switch (data[1]) { ++ case TRIG_ROUND_NEAREST: ++ up_count = ++ (data[2] + devpriv->clock_ns / 2) / ++ devpriv->clock_ns; ++ break; ++ case TRIG_ROUND_DOWN: ++ up_count = data[2] / devpriv->clock_ns; ++ break; ++ case TRIG_ROUND_UP: ++ up_count = (data[2] + devpriv->clock_ns - 1) / ++ devpriv->clock_ns; ++ break; ++ default: ++ return -EINVAL; ++ break; ++ } ++ switch (data[3]) { ++ case TRIG_ROUND_NEAREST: ++ down_count = (data[4] + devpriv->clock_ns / 2) / ++ devpriv->clock_ns; ++ break; ++ case TRIG_ROUND_DOWN: ++ down_count = data[4] / devpriv->clock_ns; ++ break; ++ case TRIG_ROUND_UP: ++ down_count = (data[4] + devpriv->clock_ns - 1) / ++ devpriv->clock_ns; ++ break; ++ default: ++ return -EINVAL; ++ break; ++ } ++ if (up_count * devpriv->clock_ns != data[2] || ++ down_count * devpriv->clock_ns != data[4]) { ++ data[2] = up_count * devpriv->clock_ns; ++ data[4] = down_count * devpriv->clock_ns; ++ return -EAGAIN; ++ } ++ ni_writel(up_count, Calibration_HighTime_6143); ++ devpriv->pwm_up_count = up_count; ++ ni_writel(down_count, Calibration_LowTime_6143); ++ devpriv->pwm_down_count = down_count; ++ return 0; ++ break; ++ case A4L_INSN_CONFIG_GET_PWM_OUTPUT: ++ return ni_get_pwm_config(subd, insn); ++ default: ++ return -EINVAL; ++ break; ++ } ++ return 0; ++} ++ ++static int pack_mb88341(int addr, int val, int *bitstring) ++{ ++ /* ++ Fujitsu MB 88341 ++ Note that address bits are reversed. Thanks to ++ Ingo Keen for noticing this. ++ ++ Note also that the 88341 expects address values from ++ 1-12, whereas we use channel numbers 0-11. The NI ++ docs use 1-12, also, so be careful here. ++ */ ++ addr++; ++ *bitstring = ((addr & 0x1) << 11) | ++ ((addr & 0x2) << 9) | ++ ((addr & 0x4) << 7) | ((addr & 0x8) << 5) | (val & 0xff); ++ return 12; ++} ++ ++static int pack_dac8800(int addr, int val, int *bitstring) ++{ ++ *bitstring = ((addr & 0x7) << 8) | (val & 0xff); ++ return 11; ++} ++ ++static int pack_dac8043(int addr, int val, int *bitstring) ++{ ++ *bitstring = val & 0xfff; ++ return 12; ++} ++ ++static int pack_ad8522(int addr, int val, int *bitstring) ++{ ++ *bitstring = (val & 0xfff) | (addr ? 0xc000 : 0xa000); ++ return 16; ++} ++ ++static int pack_ad8804(int addr, int val, int *bitstring) ++{ ++ *bitstring = ((addr & 0xf) << 8) | (val & 0xff); ++ return 12; ++} ++ ++static int pack_ad8842(int addr, int val, int *bitstring) ++{ ++ *bitstring = ((addr + 1) << 8) | (val & 0xff); ++ return 12; ++} ++ ++struct caldac_struct { ++ int n_chans; ++ int n_bits; ++ int (*packbits) (int, int, int *); ++}; ++ ++static struct caldac_struct caldacs[] = { ++ [mb88341] = {12, 8, pack_mb88341}, ++ [dac8800] = {8, 8, pack_dac8800}, ++ [dac8043] = {1, 12, pack_dac8043}, ++ [ad8522] = {2, 12, pack_ad8522}, ++ [ad8804] = {12, 8, pack_ad8804}, ++ [ad8842] = {8, 8, pack_ad8842}, ++ [ad8804_debug] = {16, 8, pack_ad8804}, ++}; ++ ++static void ni_write_caldac(struct a4l_device * dev, int addr, int val) ++{ ++ unsigned int loadbit = 0, bits = 0, bit, bitstring = 0; ++ int i; ++ int type; ++ ++ if (devpriv->caldacs[addr] == val) ++ return; ++ devpriv->caldacs[addr] = val; ++ ++ for (i = 0; i < 3; i++) { ++ type = boardtype.caldac[i]; ++ if (type == caldac_none) ++ break; ++ if (addr < caldacs[type].n_chans) { ++ bits = caldacs[type].packbits(addr, val, &bitstring); ++ loadbit = SerDacLd(i); ++ break; ++ } ++ addr -= caldacs[type].n_chans; ++ } ++ ++ for (bit = 1 << (bits - 1); bit; bit >>= 1) { ++ ni_writeb(((bit & bitstring) ? 0x02 : 0), Serial_Command); ++ a4l_udelay(1); ++ ni_writeb(1 | ((bit & bitstring) ? 0x02 : 0), Serial_Command); ++ a4l_udelay(1); ++ } ++ ni_writeb(loadbit, Serial_Command); ++ a4l_udelay(1); ++ ni_writeb(0, Serial_Command); ++} ++ ++static void caldac_setup(struct a4l_device *dev, struct a4l_subdevice *subd) ++{ ++ int i, j; ++ int n_dacs; ++ int n_chans = 0; ++ int n_bits; ++ int diffbits = 0; ++ int type; ++ int chan; ++ ++ type = boardtype.caldac[0]; ++ if (type == caldac_none) ++ return; ++ n_bits = caldacs[type].n_bits; ++ for (i = 0; i < 3; i++) { ++ type = boardtype.caldac[i]; ++ if (type == caldac_none) ++ break; ++ if (caldacs[type].n_bits != n_bits) ++ diffbits = 1; ++ n_chans += caldacs[type].n_chans; ++ } ++ n_dacs = i; ++ ++ if (diffbits) { ++ ++ if (n_chans > MAX_N_CALDACS) { ++ a4l_err(dev, "BUG! MAX_N_CALDACS too small\n"); ++ } ++ ++ subd->chan_desc = kmalloc(sizeof(struct a4l_channels_desc) + ++ n_chans * sizeof(struct a4l_channel), GFP_KERNEL); ++ ++ memset(subd->chan_desc, ++ 0, ++ sizeof(struct a4l_channels_desc) + n_chans * sizeof(struct a4l_channel)); ++ ++ subd->chan_desc->length = n_chans; ++ subd->chan_desc->mode = A4L_CHAN_PERCHAN_CHANDESC; ++ ++ chan = 0; ++ for (i = 0; i < n_dacs; i++) { ++ type = boardtype.caldac[i]; ++ for (j = 0; j < caldacs[type].n_chans; j++) { ++ ++ subd->chan_desc->chans[chan].nb_bits = ++ caldacs[type].n_bits; ++ ++ chan++; ++ } ++ } ++ ++ for (chan = 0; chan < n_chans; chan++) { ++ unsigned long tmp = ++ (1 << subd->chan_desc->chans[chan].nb_bits) / 2; ++ ni_write_caldac(dev, chan, tmp); ++ } ++ } else { ++ subd->chan_desc = kmalloc(sizeof(struct a4l_channels_desc) + ++ sizeof(struct a4l_channel), GFP_KERNEL); ++ ++ memset(subd->chan_desc, ++ 0, sizeof(struct a4l_channels_desc) + sizeof(struct a4l_channel)); ++ ++ subd->chan_desc->length = n_chans; ++ subd->chan_desc->mode = A4L_CHAN_GLOBAL_CHANDESC; ++ ++ type = boardtype.caldac[0]; ++ ++ subd->chan_desc->chans[0].nb_bits = caldacs[type].n_bits; ++ ++ for (chan = 0; chan < n_chans; chan++) ++ ni_write_caldac(dev, ++ chan, ++ (1 << subd->chan_desc->chans[0].nb_bits) / 2); ++ } ++} ++ ++static int ni_calib_insn_write(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn) ++{ ++ struct a4l_device *dev = subd->dev; ++ uint16_t *data = (uint16_t *)insn->data; ++ ++ ni_write_caldac(dev, CR_CHAN(insn->chan_desc), data[0]); ++ return 0; ++} ++ ++static int ni_calib_insn_read(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn) ++{ ++ struct a4l_device *dev = subd->dev; ++ uint16_t *data = (uint16_t *)insn->data; ++ ++ data[0] = devpriv->caldacs[CR_CHAN(insn->chan_desc)]; ++ ++ return 0; ++} ++ ++static int ni_gpct_insn_config(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn) ++{ ++ struct ni_gpct *counter = (struct ni_gpct *)subd->priv; ++ return a4l_ni_tio_insn_config(counter, insn); ++} ++ ++static int ni_gpct_insn_read(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn) ++{ ++ struct ni_gpct *counter = (struct ni_gpct *)subd->priv; ++ return a4l_ni_tio_rinsn(counter, insn); ++} ++ ++static int ni_gpct_insn_write(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn) ++{ ++ struct ni_gpct *counter = (struct ni_gpct *)subd->priv; ++ return a4l_ni_tio_winsn(counter, insn); ++} ++ ++#if (defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE) || \ ++ defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE_MODULE)) ++ ++static int ni_gpct_cmd(struct a4l_subdevice *subd, struct a4l_cmd_desc *cmd) ++{ ++ int retval; ++ struct a4l_device *dev = subd->dev; ++ struct ni_gpct *counter = (struct ni_gpct *)subd->priv; ++ struct mite_dma_descriptor_ring *ring; ++ ++ retval = ni_request_gpct_mite_channel(dev, ++ counter->counter_index, ++ A4L_INPUT); ++ if (retval) { ++ a4l_err(dev, ++ "ni_gpct_cmd: " ++ "no dma channel available for use by counter\n"); ++ return retval; ++ } ++ ++ ring = devpriv->gpct_mite_ring[counter->counter_index]; ++ retval = a4l_mite_buf_change(ring, subd); ++ if (retval) { ++ a4l_err(dev, ++ "ni_gpct_cmd: " ++ "dma ring configuration failed\n"); ++ return retval; ++ ++ } ++ ++ a4l_ni_tio_acknowledge_and_confirm(counter, NULL, NULL, NULL, NULL); ++ ni_e_series_enable_second_irq(dev, counter->counter_index, 1); ++ retval = a4l_ni_tio_cmd(counter, cmd); ++ ++ return retval; ++} ++ ++static int ni_gpct_cmdtest(struct a4l_subdevice *subd, struct a4l_cmd_desc *cmd) ++{ ++ struct ni_gpct *counter = (struct ni_gpct *)subd->priv; ++ return a4l_ni_tio_cmdtest(counter, cmd); ++} ++ ++static void ni_gpct_cancel(struct a4l_subdevice *subd) ++{ ++ struct a4l_device *dev = subd->dev; ++ struct ni_gpct *counter = (struct ni_gpct *)subd->priv; ++ ++ a4l_ni_tio_cancel(counter); ++ ni_e_series_enable_second_irq(dev, counter->counter_index, 0); ++ ni_release_gpct_mite_channel(dev, counter->counter_index); ++} ++ ++#endif /* CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE */ ++ ++ ++/* ++ * ++ * Programmable Function Inputs ++ * ++ */ ++ ++static int ni_m_series_set_pfi_routing(struct a4l_device *dev, ++ unsigned int chan, unsigned int source) ++{ ++ unsigned int pfi_reg_index; ++ unsigned int array_offset; ++ ++ if ((source & 0x1f) != source) ++ return -EINVAL; ++ pfi_reg_index = 1 + chan / 3; ++ array_offset = pfi_reg_index - 1; ++ devpriv->pfi_output_select_reg[array_offset] &= ++ ~MSeries_PFI_Output_Select_Mask(chan); ++ devpriv->pfi_output_select_reg[array_offset] |= ++ MSeries_PFI_Output_Select_Bits(chan, source); ++ ni_writew(devpriv->pfi_output_select_reg[array_offset], ++ M_Offset_PFI_Output_Select(pfi_reg_index)); ++ return 2; ++} ++ ++static unsigned int ni_old_get_pfi_routing(struct a4l_device *dev, ++ unsigned int chan) ++{ ++ /* pre-m-series boards have fixed signals on pfi pins */ ++ ++ switch (chan) { ++ case 0: ++ return NI_PFI_OUTPUT_AI_START1; ++ break; ++ case 1: ++ return NI_PFI_OUTPUT_AI_START2; ++ break; ++ case 2: ++ return NI_PFI_OUTPUT_AI_CONVERT; ++ break; ++ case 3: ++ return NI_PFI_OUTPUT_G_SRC1; ++ break; ++ case 4: ++ return NI_PFI_OUTPUT_G_GATE1; ++ break; ++ case 5: ++ return NI_PFI_OUTPUT_AO_UPDATE_N; ++ break; ++ case 6: ++ return NI_PFI_OUTPUT_AO_START1; ++ break; ++ case 7: ++ return NI_PFI_OUTPUT_AI_START_PULSE; ++ break; ++ case 8: ++ return NI_PFI_OUTPUT_G_SRC0; ++ break; ++ case 9: ++ return NI_PFI_OUTPUT_G_GATE0; ++ break; ++ default: ++ __a4l_err("%s: bug, unhandled case in switch.\n", ++ __FUNCTION__); ++ break; ++ } ++ return 0; ++} ++ ++static int ni_old_set_pfi_routing(struct a4l_device *dev, ++ unsigned int chan, unsigned int source) ++{ ++ /* pre-m-series boards have fixed signals on pfi pins */ ++ if (source != ni_old_get_pfi_routing(dev, chan)) ++ return -EINVAL; ++ ++ return 2; ++} ++ ++static int ni_set_pfi_routing(struct a4l_device *dev, ++ unsigned int chan, unsigned int source) ++{ ++ if (boardtype.reg_type & ni_reg_m_series_mask) ++ return ni_m_series_set_pfi_routing(dev, chan, source); ++ else ++ return ni_old_set_pfi_routing(dev, chan, source); ++} ++ ++static unsigned int ni_m_series_get_pfi_routing(struct a4l_device *dev, ++ unsigned int chan) ++{ ++ const unsigned int array_offset = chan / 3; ++ return MSeries_PFI_Output_Select_Source(chan, ++ devpriv->pfi_output_select_reg[array_offset]); ++} ++ ++static unsigned int ni_get_pfi_routing(struct a4l_device *dev, unsigned int chan) ++{ ++ if (boardtype.reg_type & ni_reg_m_series_mask) ++ return ni_m_series_get_pfi_routing(dev, chan); ++ else ++ return ni_old_get_pfi_routing(dev, chan); ++} ++ ++static int ni_config_filter(struct a4l_device *dev, ++ unsigned int pfi_channel, int filter) ++{ ++ unsigned int bits; ++ if ((boardtype.reg_type & ni_reg_m_series_mask) == 0) { ++ return -ENOTSUPP; ++ } ++ bits = ni_readl(M_Offset_PFI_Filter); ++ bits &= ~MSeries_PFI_Filter_Select_Mask(pfi_channel); ++ bits |= MSeries_PFI_Filter_Select_Bits(pfi_channel, filter); ++ ni_writel(bits, M_Offset_PFI_Filter); ++ return 0; ++} ++ ++static int ni_pfi_insn_bits(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn) ++{ ++ struct a4l_device *dev = subd->dev; ++ uint16_t *data = (uint16_t *)insn->data; ++ ++ if (data[0]) { ++ devpriv->pfi_state &= ~data[0]; ++ devpriv->pfi_state |= (data[0] & data[1]); ++ ni_writew(devpriv->pfi_state, M_Offset_PFI_DO); ++ } ++ ++ data[1] = ni_readw(M_Offset_PFI_DI); ++ ++ return 0; ++} ++ ++static int ni_pfi_insn_config(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn) ++{ ++ struct a4l_device *dev = subd->dev; ++ unsigned int chan, *data = (unsigned int *)insn->data; ++ ++ if (insn->data_size < sizeof(unsigned int)) ++ return -EINVAL; ++ ++ chan = CR_CHAN(insn->chan_desc); ++ ++ switch (data[0]) { ++ case A4L_OUTPUT: ++ ni_set_bits(dev, IO_Bidirection_Pin_Register, 1 << chan, 1); ++ break; ++ case A4L_INPUT: ++ ni_set_bits(dev, IO_Bidirection_Pin_Register, 1 << chan, 0); ++ break; ++ case A4L_INSN_CONFIG_DIO_QUERY: ++ data[1] = (devpriv->io_bidirection_pin_reg & (1 << chan)) ? ++ A4L_OUTPUT : A4L_INPUT; ++ return 0; ++ break; ++ case A4L_INSN_CONFIG_SET_ROUTING: ++ return ni_set_pfi_routing(dev, chan, data[1]); ++ break; ++ case A4L_INSN_CONFIG_GET_ROUTING: ++ data[1] = ni_get_pfi_routing(dev, chan); ++ break; ++ case A4L_INSN_CONFIG_FILTER: ++ return ni_config_filter(dev, chan, data[1]); ++ break; ++ default: ++ return -EINVAL; ++ } ++ return 0; ++} ++ ++/* ++ * ++ * RTSI Bus Functions ++ * ++ */ ++ ++/* Find best multiplier/divider to try and get the PLL running at 80 MHz ++ * given an arbitrary frequency input clock */ ++static int ni_mseries_get_pll_parameters(unsigned int reference_period_ns, ++ unsigned int *freq_divider, ++ unsigned int *freq_multiplier, ++ unsigned int *actual_period_ns) ++{ ++ unsigned div; ++ unsigned best_div = 1; ++ static const unsigned max_div = 0x10; ++ unsigned mult; ++ unsigned best_mult = 1; ++ static const unsigned max_mult = 0x100; ++ static const unsigned pico_per_nano = 1000; ++ ++ const unsigned reference_picosec = reference_period_ns * pico_per_nano; ++ /* m-series wants the phased-locked loop to output 80MHz, which is divided by 4 to ++ * 20 MHz for most timing clocks */ ++ static const unsigned target_picosec = 12500; ++ static const unsigned fudge_factor_80_to_20Mhz = 4; ++ int best_period_picosec = 0; ++ for (div = 1; div <= max_div; ++div) { ++ for (mult = 1; mult <= max_mult; ++mult) { ++ unsigned new_period_ps = ++ (reference_picosec * div) / mult; ++ if (abs(new_period_ps - target_picosec) < ++ abs(best_period_picosec - target_picosec)) { ++ best_period_picosec = new_period_ps; ++ best_div = div; ++ best_mult = mult; ++ } ++ } ++ } ++ if (best_period_picosec == 0) { ++ __a4l_err("%s: bug, failed to find pll parameters\n", ++ __FUNCTION__); ++ return -EIO; ++ } ++ *freq_divider = best_div; ++ *freq_multiplier = best_mult; ++ *actual_period_ns = ++ (best_period_picosec * fudge_factor_80_to_20Mhz + ++ (pico_per_nano / 2)) / pico_per_nano; ++ return 0; ++} ++ ++static int ni_mseries_set_pll_master_clock(struct a4l_device * dev, ++ unsigned int source, ++ unsigned int period_ns) ++{ ++ static const unsigned min_period_ns = 50; ++ static const unsigned max_period_ns = 1000; ++ static const unsigned timeout = 1000; ++ unsigned pll_control_bits; ++ unsigned freq_divider; ++ unsigned freq_multiplier; ++ unsigned i; ++ int retval; ++ if (source == NI_MIO_PLL_PXI10_CLOCK) ++ period_ns = 100; ++ /* These limits are somewhat arbitrary, but NI advertises 1 to ++ 20MHz range so we'll use that */ ++ if (period_ns < min_period_ns || period_ns > max_period_ns) { ++ a4l_err(dev, ++ "%s: you must specify an input clock frequency " ++ "between %i and %i nanosec " ++ "for the phased-lock loop.\n", ++ __FUNCTION__, min_period_ns, max_period_ns); ++ return -EINVAL; ++ } ++ devpriv->rtsi_trig_direction_reg &= ~Use_RTSI_Clock_Bit; ++ devpriv->stc_writew(dev, devpriv->rtsi_trig_direction_reg, ++ RTSI_Trig_Direction_Register); ++ pll_control_bits = ++ MSeries_PLL_Enable_Bit | MSeries_PLL_VCO_Mode_75_150MHz_Bits; ++ devpriv->clock_and_fout2 |= ++ MSeries_Timebase1_Select_Bit | MSeries_Timebase3_Select_Bit; ++ devpriv->clock_and_fout2 &= ~MSeries_PLL_In_Source_Select_Mask; ++ switch (source) { ++ case NI_MIO_PLL_PXI_STAR_TRIGGER_CLOCK: ++ devpriv->clock_and_fout2 |= ++ MSeries_PLL_In_Source_Select_Star_Trigger_Bits; ++ retval = ni_mseries_get_pll_parameters(period_ns, &freq_divider, ++ &freq_multiplier, &devpriv->clock_ns); ++ if (retval < 0) ++ return retval; ++ break; ++ case NI_MIO_PLL_PXI10_CLOCK: ++ /* pxi clock is 10MHz */ ++ devpriv->clock_and_fout2 |= ++ MSeries_PLL_In_Source_Select_PXI_Clock10; ++ retval = ni_mseries_get_pll_parameters(period_ns, &freq_divider, ++ &freq_multiplier, &devpriv->clock_ns); ++ if (retval < 0) ++ return retval; ++ break; ++ default: ++ { ++ unsigned rtsi_channel; ++ static const unsigned max_rtsi_channel = 7; ++ for (rtsi_channel = 0; rtsi_channel <= max_rtsi_channel; ++ ++rtsi_channel) { ++ if (source == ++ NI_MIO_PLL_RTSI_CLOCK(rtsi_channel)) { ++ devpriv->clock_and_fout2 |= ++ MSeries_PLL_In_Source_Select_RTSI_Bits ++ (rtsi_channel); ++ break; ++ } ++ } ++ if (rtsi_channel > max_rtsi_channel) ++ return -EINVAL; ++ retval = ni_mseries_get_pll_parameters(period_ns, ++ &freq_divider, &freq_multiplier, ++ &devpriv->clock_ns); ++ if (retval < 0) ++ return retval; ++ } ++ break; ++ } ++ ni_writew(devpriv->clock_and_fout2, M_Offset_Clock_and_Fout2); ++ pll_control_bits |= ++ MSeries_PLL_Divisor_Bits(freq_divider) | ++ MSeries_PLL_Multiplier_Bits(freq_multiplier); ++ ni_writew(pll_control_bits, M_Offset_PLL_Control); ++ devpriv->clock_source = source; ++ /* It seems to typically take a few hundred microseconds for PLL to lock */ ++ for (i = 0; i < timeout; ++i) { ++ if (ni_readw(M_Offset_PLL_Status) & MSeries_PLL_Locked_Bit) { ++ break; ++ } ++ udelay(1); ++ } ++ if (i == timeout) { ++ a4l_err(dev, ++ "%s: timed out waiting for PLL to lock " ++ "to reference clock source %i with period %i ns.\n", ++ __FUNCTION__, source, period_ns); ++ return -ETIMEDOUT; ++ } ++ return 3; ++} ++ ++static int ni_set_master_clock(struct a4l_device *dev, ++ unsigned int source, unsigned int period_ns) ++{ ++ if (source == NI_MIO_INTERNAL_CLOCK) { ++ devpriv->rtsi_trig_direction_reg &= ~Use_RTSI_Clock_Bit; ++ devpriv->stc_writew(dev, devpriv->rtsi_trig_direction_reg, ++ RTSI_Trig_Direction_Register); ++ devpriv->clock_ns = TIMEBASE_1_NS; ++ if (boardtype.reg_type & ni_reg_m_series_mask) { ++ devpriv->clock_and_fout2 &= ++ ~(MSeries_Timebase1_Select_Bit | ++ MSeries_Timebase3_Select_Bit); ++ ni_writew(devpriv->clock_and_fout2, ++ M_Offset_Clock_and_Fout2); ++ ni_writew(0, M_Offset_PLL_Control); ++ } ++ devpriv->clock_source = source; ++ } else { ++ if (boardtype.reg_type & ni_reg_m_series_mask) { ++ return ni_mseries_set_pll_master_clock(dev, source, ++ period_ns); ++ } else { ++ if (source == NI_MIO_RTSI_CLOCK) { ++ devpriv->rtsi_trig_direction_reg |= ++ Use_RTSI_Clock_Bit; ++ devpriv->stc_writew(dev, ++ devpriv->rtsi_trig_direction_reg, ++ RTSI_Trig_Direction_Register); ++ if (devpriv->clock_ns == 0) { ++ a4l_err(dev, ++ "%s: we don't handle an " ++ "unspecified clock period " ++ "correctly yet, returning error.\n", ++ __FUNCTION__); ++ return -EINVAL; ++ } else { ++ devpriv->clock_ns = period_ns; ++ } ++ devpriv->clock_source = source; ++ } else ++ return -EINVAL; ++ } ++ } ++ return 3; ++} ++ ++static void ni_rtsi_init(struct a4l_device * dev) ++{ ++ /* Initialise the RTSI bus signal switch to a default state */ ++ ++ /* Set clock mode to internal */ ++ devpriv->clock_and_fout2 = MSeries_RTSI_10MHz_Bit; ++ if (ni_set_master_clock(dev, NI_MIO_INTERNAL_CLOCK, 0) < 0) { ++ a4l_err(dev, "ni_set_master_clock failed, bug?"); ++ } ++ ++ /* Default internal lines routing to RTSI bus lines */ ++ devpriv->rtsi_trig_a_output_reg = ++ RTSI_Trig_Output_Bits(0, NI_RTSI_OUTPUT_ADR_START1) | ++ RTSI_Trig_Output_Bits(1, NI_RTSI_OUTPUT_ADR_START2) | ++ RTSI_Trig_Output_Bits(2, NI_RTSI_OUTPUT_SCLKG) | ++ RTSI_Trig_Output_Bits(3, NI_RTSI_OUTPUT_DACUPDN); ++ devpriv->stc_writew(dev, devpriv->rtsi_trig_a_output_reg, ++ RTSI_Trig_A_Output_Register); ++ devpriv->rtsi_trig_b_output_reg = ++ RTSI_Trig_Output_Bits(4, NI_RTSI_OUTPUT_DA_START1) | ++ RTSI_Trig_Output_Bits(5, NI_RTSI_OUTPUT_G_SRC0) | ++ RTSI_Trig_Output_Bits(6, NI_RTSI_OUTPUT_G_GATE0); ++ ++ if (boardtype.reg_type & ni_reg_m_series_mask) ++ devpriv->rtsi_trig_b_output_reg |= ++ RTSI_Trig_Output_Bits(7, NI_RTSI_OUTPUT_RTSI_OSC); ++ devpriv->stc_writew(dev, devpriv->rtsi_trig_b_output_reg, ++ RTSI_Trig_B_Output_Register); ++} ++ ++int a4l_ni_E_init(struct a4l_device *dev) ++{ ++ int ret; ++ unsigned int j, counter_variant; ++ struct a4l_subdevice *subd; ++ ++ if (boardtype.n_aochan > MAX_N_AO_CHAN) { ++ a4l_err(dev, "bug! boardtype.n_aochan > MAX_N_AO_CHAN\n"); ++ return -EINVAL; ++ } ++ ++ /* analog input subdevice */ ++ ++ a4l_dbg(1, drv_dbg, dev, "mio_common: starting attach procedure...\n"); ++ ++ subd = a4l_alloc_subd(0, NULL); ++ if(subd == NULL) ++ return -ENOMEM; ++ ++ a4l_dbg(1, drv_dbg, dev, "mio_common: registering AI subdevice...\n"); ++ ++ if (boardtype.n_adchan) { ++ ++ a4l_dbg(1, drv_dbg, dev, ++ "mio_common: AI: %d channels\n", boardtype.n_adchan); ++ ++ subd->flags = A4L_SUBD_AI | A4L_SUBD_CMD | A4L_SUBD_MMAP; ++ subd->rng_desc = ni_range_lkup[boardtype.gainlkup]; ++ ++ subd->chan_desc = kmalloc(sizeof(struct a4l_channels_desc) + ++ sizeof(struct a4l_channel), GFP_KERNEL); ++ ++ subd->chan_desc->mode = A4L_CHAN_GLOBAL_CHANDESC; ++ subd->chan_desc->length = boardtype.n_adchan; ++ subd->chan_desc->chans[0].flags = A4L_CHAN_AREF_DIFF; ++ if (boardtype.reg_type != ni_reg_611x) ++ subd->chan_desc->chans[0].flags |= A4L_CHAN_AREF_GROUND | ++ A4L_CHAN_AREF_COMMON | A4L_CHAN_AREF_OTHER; ++ subd->chan_desc->chans[0].nb_bits = boardtype.adbits; ++ ++ subd->insn_read = ni_ai_insn_read; ++ subd->insn_config = ni_ai_insn_config; ++ subd->do_cmdtest = ni_ai_cmdtest; ++ subd->do_cmd = ni_ai_cmd; ++ subd->cancel = ni_ai_reset; ++ subd->trigger = ni_ai_inttrig; ++ ++ subd->munge = (boardtype.adbits > 16) ? ++ ni_ai_munge32 : ni_ai_munge16; ++ ++ subd->cmd_mask = &mio_ai_cmd_mask; ++ } else { ++ a4l_dbg(1, drv_dbg, dev, ++ "mio_common: AI subdevice not present\n"); ++ subd->flags = A4L_SUBD_UNUSED; ++ } ++ ++ ret = a4l_add_subd(dev, subd); ++ if(ret != NI_AI_SUBDEV) ++ return ret; ++ ++ a4l_dbg(1, drv_dbg, dev, "mio_common: AI subdevice registered\n"); ++ ++ subd = a4l_alloc_subd(0, NULL); ++ if(subd == NULL) ++ return -ENOMEM; ++ ++ a4l_dbg(1, drv_dbg, dev, "mio_common: registering AO subdevice...\n"); ++ ++ /* analog output subdevice */ ++ if (boardtype.n_aochan) { ++ ++ a4l_dbg(1, drv_dbg, dev, ++ "mio_common: AO: %d channels\n", boardtype.n_aochan); ++ ++ subd->flags = A4L_SUBD_AO; ++ subd->chan_desc = kmalloc(sizeof(struct a4l_channels_desc) + ++ sizeof(struct a4l_channel), GFP_KERNEL); ++ ++ subd->chan_desc->mode = A4L_CHAN_GLOBAL_CHANDESC; ++ subd->chan_desc->length = boardtype.n_aochan; ++ subd->chan_desc->chans[0].flags = A4L_CHAN_AREF_GROUND; ++ subd->chan_desc->chans[0].nb_bits = boardtype.aobits; ++ ++ subd->rng_desc = boardtype.ao_range_table; ++ ++ subd->insn_read = ni_ao_insn_read; ++ if (boardtype.reg_type & ni_reg_6xxx_mask) ++ subd->insn_write = &ni_ao_insn_write_671x; ++ else ++ subd->insn_write = &ni_ao_insn_write; ++ ++ ++ if (boardtype.ao_fifo_depth) { ++ subd->flags |= A4L_SUBD_CMD | A4L_SUBD_MMAP; ++ subd->do_cmd = &ni_ao_cmd; ++ subd->cmd_mask = &mio_ao_cmd_mask; ++ subd->do_cmdtest = &ni_ao_cmdtest; ++ subd->trigger = ni_ao_inttrig; ++ if ((boardtype.reg_type & ni_reg_m_series_mask) == 0) ++ subd->munge = &ni_ao_munge; ++ } ++ ++ subd->cancel = &ni_ao_reset; ++ ++ } else { ++ a4l_dbg(1, drv_dbg, dev, ++ "mio_common: AO subdevice not present\n"); ++ subd->flags = A4L_SUBD_UNUSED; ++ } ++ ++ ret = a4l_add_subd(dev, subd); ++ if(ret != NI_AO_SUBDEV) ++ return ret; ++ ++ a4l_dbg(1, drv_dbg, dev, "mio_common: AO subdevice registered\n"); ++ ++ if ((boardtype.reg_type & ni_reg_67xx_mask)) ++ init_ao_67xx(dev); ++ ++ /* digital i/o subdevice */ ++ ++ subd = a4l_alloc_subd(0, NULL); ++ if(subd == NULL) ++ return -ENOMEM; ++ ++ a4l_dbg(1, drv_dbg, dev, "mio_common: registering DIO subdevice...\n"); ++ a4l_dbg(1, drv_dbg, dev, ++ "mio_common: DIO: %d channels\n", ++ boardtype.num_p0_dio_channels); ++ ++ subd->flags = A4L_SUBD_DIO; ++ ++ subd->chan_desc = kmalloc(sizeof(struct a4l_channels_desc) + ++ sizeof(struct a4l_channel), GFP_KERNEL); ++ subd->chan_desc->mode = A4L_CHAN_GLOBAL_CHANDESC; ++ subd->chan_desc->length = boardtype.num_p0_dio_channels; ++ subd->chan_desc->chans[0].flags = A4L_CHAN_AREF_GROUND; ++ subd->chan_desc->chans[0].nb_bits = 1; ++ devpriv->io_bits = 0; /* all bits input */ ++ ++ subd->rng_desc = &range_digital; ++ ++ if (boardtype.reg_type & ni_reg_m_series_mask) { ++ ++ if (subd->chan_desc->length == 8) ++ subd->insn_bits = ni_m_series_dio_insn_bits_8; ++ else ++ subd->insn_bits = ni_m_series_dio_insn_bits_32; ++ ++ subd->insn_config = ni_m_series_dio_insn_config; ++ ++#if (defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE) || \ ++ defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE_MODULE)) ++ ++ a4l_dbg(1, drv_dbg, dev, ++ "mio_common: DIO: command feature available\n"); ++ ++ subd->flags |= A4L_SUBD_CMD; ++ subd->do_cmd = ni_cdio_cmd; ++ subd->do_cmdtest = ni_cdio_cmdtest; ++ subd->cmd_mask = &mio_dio_cmd_mask; ++ subd->cancel = ni_cdio_cancel; ++ subd->trigger = ni_cdo_inttrig; ++ ++#endif /* CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE */ ++ ++ ni_writel(CDO_Reset_Bit | CDI_Reset_Bit, M_Offset_CDIO_Command); ++ ni_writel(devpriv->io_bits, M_Offset_DIO_Direction); ++ } else { ++ ++ subd->insn_bits = ni_dio_insn_bits; ++ subd->insn_config = ni_dio_insn_config; ++ devpriv->dio_control = DIO_Pins_Dir(devpriv->io_bits); ++ ni_writew(devpriv->dio_control, DIO_Control_Register); ++ } ++ ++ ret = a4l_add_subd(dev, subd); ++ if(ret != NI_DIO_SUBDEV) ++ return ret; ++ ++ a4l_dbg(1, drv_dbg, dev, "mio_common: DIO subdevice registered\n"); ++ ++ /* 8255 device */ ++ subd = a4l_alloc_subd(sizeof(subd_8255_t), NULL); ++ if(subd == NULL) ++ return -ENOMEM; ++ ++ a4l_dbg(1, drv_dbg, dev, "mio_common: registering 8255 subdevice...\n"); ++ ++ if (boardtype.has_8255) { ++ devpriv->subd_8255.cb_arg = (unsigned long)dev; ++ devpriv->subd_8255.cb_func = ni_8255_callback; ++ a4l_subdev_8255_init(subd); ++ } else { ++ a4l_dbg(1, drv_dbg, dev, ++ "mio_common: 8255 subdevice not present\n"); ++ subd->flags = A4L_SUBD_UNUSED; ++ } ++ ++ ret = a4l_add_subd(dev, subd); ++ if(ret != NI_8255_DIO_SUBDEV) ++ return ret; ++ ++ a4l_dbg(1, drv_dbg, dev, "mio_common: 8255 subdevice registered\n"); ++ ++ /* formerly general purpose counter/timer device, but no longer used */ ++ subd = a4l_alloc_subd(0, NULL); ++ if(subd == NULL) ++ return -ENOMEM; ++ ++ subd->flags = A4L_SUBD_UNUSED; ++ ret = a4l_add_subd(dev, subd); ++ if(ret != NI_UNUSED_SUBDEV) ++ return ret; ++ ++ /* calibration subdevice -- ai and ao */ ++ subd = a4l_alloc_subd(0, NULL); ++ if(subd == NULL) ++ return -ENOMEM; ++ ++ a4l_dbg(1, drv_dbg, dev, "mio_common: registering calib subdevice...\n"); ++ ++ subd->flags = A4L_SUBD_CALIB; ++ if (boardtype.reg_type & ni_reg_m_series_mask) { ++ /* internal PWM analog output ++ used for AI nonlinearity calibration */ ++ a4l_dbg(1, drv_dbg, dev, ++ "mio_common: calib: M series calibration"); ++ subd->insn_config = ni_m_series_pwm_config; ++ ni_writel(0x0, M_Offset_Cal_PWM); ++ } else if (boardtype.reg_type == ni_reg_6143) { ++ /* internal PWM analog output ++ used for AI nonlinearity calibration */ ++ a4l_dbg(1, drv_dbg, dev, ++ "mio_common: calib: 6143 calibration"); ++ subd->insn_config = ni_6143_pwm_config; ++ } else { ++ a4l_dbg(1, drv_dbg, dev, ++ "mio_common: calib: common calibration"); ++ subd->insn_read = ni_calib_insn_read; ++ subd->insn_write = ni_calib_insn_write; ++ caldac_setup(dev, subd); ++ } ++ ++ ret = a4l_add_subd(dev, subd); ++ if(ret != NI_CALIBRATION_SUBDEV) ++ return ret; ++ ++ a4l_dbg(1, drv_dbg, dev, "mio_common: calib subdevice registered\n"); ++ ++ /* EEPROM */ ++ subd = a4l_alloc_subd(0, NULL); ++ if(subd == NULL) ++ return -ENOMEM; ++ ++ a4l_dbg(1, drv_dbg, dev, ++ "mio_common: registering EEPROM subdevice...\n"); ++ ++ subd->flags = A4L_SUBD_MEMORY; ++ ++ subd->chan_desc = kmalloc(sizeof(struct a4l_channels_desc) + ++ sizeof(struct a4l_channel), GFP_KERNEL); ++ subd->chan_desc->mode = A4L_CHAN_GLOBAL_CHANDESC; ++ subd->chan_desc->chans[0].flags = 0; ++ subd->chan_desc->chans[0].nb_bits = 8; ++ ++ if (boardtype.reg_type & ni_reg_m_series_mask) { ++ subd->chan_desc->length = M_SERIES_EEPROM_SIZE; ++ subd->insn_read = ni_m_series_eeprom_insn_read; ++ } else { ++ subd->chan_desc->length = 512; ++ subd->insn_read = ni_eeprom_insn_read; ++ } ++ ++ a4l_dbg(1, drv_dbg, dev, ++ "mio_common: EEPROM: size = %lu\n", subd->chan_desc->length); ++ ++ ret = a4l_add_subd(dev, subd); ++ if(ret != NI_EEPROM_SUBDEV) ++ return ret; ++ ++ a4l_dbg(1, drv_dbg, dev, "mio_common: EEPROM subdevice registered\n"); ++ ++ /* PFI */ ++ subd = a4l_alloc_subd(0, NULL); ++ if(subd == NULL) ++ return -ENOMEM; ++ ++ a4l_dbg(1, drv_dbg, dev, ++ "mio_common: registering PFI(DIO) subdevice...\n"); ++ ++ subd->flags = A4L_SUBD_DIO; ++ ++ subd->chan_desc = kmalloc(sizeof(struct a4l_channels_desc) + ++ sizeof(struct a4l_channel), GFP_KERNEL); ++ subd->chan_desc->mode = A4L_CHAN_GLOBAL_CHANDESC; ++ subd->chan_desc->chans[0].flags = 0; ++ subd->chan_desc->chans[0].nb_bits = 1; ++ ++ if (boardtype.reg_type & ni_reg_m_series_mask) { ++ unsigned int i; ++ subd->chan_desc->length = 16; ++ ni_writew(devpriv->dio_state, M_Offset_PFI_DO); ++ for (i = 0; i < NUM_PFI_OUTPUT_SELECT_REGS; ++i) { ++ ni_writew(devpriv->pfi_output_select_reg[i], ++ M_Offset_PFI_Output_Select(i + 1)); ++ } ++ } else ++ subd->chan_desc->length = 10; ++ ++ a4l_dbg(1, drv_dbg, dev, ++ "mio_common: PFI: %lu bits...\n", subd->chan_desc->length); ++ ++ if (boardtype.reg_type & ni_reg_m_series_mask) { ++ subd->insn_bits = ni_pfi_insn_bits; ++ } ++ ++ subd->insn_config = ni_pfi_insn_config; ++ ni_set_bits(dev, IO_Bidirection_Pin_Register, ~0, 0); ++ ++ ret = a4l_add_subd(dev, subd); ++ if(ret != NI_PFI_DIO_SUBDEV) ++ return ret; ++ ++ a4l_dbg(1, drv_dbg, dev, "mio_common: PFI subdevice registered\n"); ++ ++ /* cs5529 calibration adc */ ++ subd = a4l_alloc_subd(0, NULL); ++ if(subd == NULL) ++ return -ENOMEM; ++ ++#if 0 /* TODO: add subdevices callbacks */ ++ subd->flags = A4L_SUBD_AI; ++ ++ if (boardtype.reg_type & ni_reg_67xx_mask) { ++ ++ subd->chan_desc = kmalloc(sizeof(struct a4l_channels_desc) + ++ sizeof(struct a4l_channel), GFP_KERNEL); ++ subd->chan_desc->mode = A4L_CHAN_GLOBAL_CHANDESC; ++ subd->chan_desc->length = boardtype.n_aochan; ++ subd->chan_desc->chans[0].flags = 0; ++ subd->chan_desc->chans[0].nb_bits = 16; ++ ++ /* one channel for each analog output channel */ ++ subd->rng_desc = &a4l_range_unknown; /* XXX */ ++ s->insn_read = cs5529_ai_insn_read; ++ init_cs5529(dev); ++ } else ++#endif /* TODO: add subdevices callbacks */ ++ subd->flags = A4L_SUBD_UNUSED; ++ ++ ret = a4l_add_subd(dev, subd); ++ if(ret != NI_CS5529_CALIBRATION_SUBDEV) ++ return ret; ++ ++ /* Serial */ ++ subd = a4l_alloc_subd(0, NULL); ++ if(subd == NULL) ++ return -ENOMEM; ++ ++ a4l_dbg(1, drv_dbg, dev, ++ "mio_common: registering serial subdevice...\n"); ++ ++ subd->flags = A4L_SUBD_SERIAL; ++ ++ subd->chan_desc = kmalloc(sizeof(struct a4l_channels_desc) + ++ sizeof(struct a4l_channel), GFP_KERNEL); ++ subd->chan_desc->mode = A4L_CHAN_GLOBAL_CHANDESC; ++ subd->chan_desc->length = 1; ++ subd->chan_desc->chans[0].flags = 0; ++ subd->chan_desc->chans[0].nb_bits = 8; ++ ++ subd->insn_config = ni_serial_insn_config; ++ ++ devpriv->serial_interval_ns = 0; ++ devpriv->serial_hw_mode = 0; ++ ++ ret = a4l_add_subd(dev, subd); ++ if(ret != NI_SERIAL_SUBDEV) ++ return ret; ++ ++ a4l_dbg(1, drv_dbg, dev, "mio_common: serial subdevice registered\n"); ++ ++ /* RTSI */ ++ subd = a4l_alloc_subd(0, NULL); ++ if(subd == NULL) ++ return -ENOMEM; ++ ++#if 1 /* TODO: add RTSI subdevice */ ++ subd->flags = A4L_SUBD_UNUSED; ++ ni_rtsi_init(dev); ++ ++#else /* TODO: add RTSI subdevice */ ++ subd->flags = A4L_SUBD_DIO; ++ ++ subd->chan_desc = kmalloc(sizeof(struct a4l_channels_desc) + ++ sizeof(struct a4l_channel), GFP_KERNEL); ++ subd->chan_desc->mode = A4L_CHAN_GLOBAL_CHANDESC; ++ subd->chan_desc->length = 8; ++ subd->chan_desc->chans[0].flags = 0; ++ subd->chan_desc->chans[0].nb_bits = 1; ++ ++ subd->insn_bits = ni_rtsi_insn_bits; ++ subd->insn_config = ni_rtsi_insn_config; ++ ni_rtsi_init(dev); ++ ++#endif /* TODO: add RTSI subdevice */ ++ ++ ret = a4l_add_subd(dev, subd); ++ if(ret != NI_RTSI_SUBDEV) ++ return ret; ++ ++ if (boardtype.reg_type & ni_reg_m_series_mask) { ++ counter_variant = ni_gpct_variant_m_series; ++ } else { ++ counter_variant = ni_gpct_variant_e_series; ++ } ++ devpriv->counter_dev = ++ a4l_ni_gpct_device_construct(dev, ++ &ni_gpct_write_register, ++ &ni_gpct_read_register, ++ counter_variant, NUM_GPCT); ++ ++ /* General purpose counters */ ++ for (j = 0; j < NUM_GPCT; ++j) { ++ struct ni_gpct *counter; ++ ++ subd = a4l_alloc_subd(sizeof(struct ni_gpct), NULL); ++ if(subd == NULL) ++ return -ENOMEM; ++ ++ a4l_dbg(1, drv_dbg, dev, ++ "mio_common: registering GPCT[%d] subdevice...\n", j); ++ ++ subd->flags = A4L_SUBD_COUNTER; ++ ++ subd->chan_desc = kmalloc(sizeof(struct a4l_channels_desc) + ++ sizeof(struct a4l_channel), GFP_KERNEL); ++ subd->chan_desc->mode = A4L_CHAN_GLOBAL_CHANDESC; ++ subd->chan_desc->length = 3; ++ subd->chan_desc->chans[0].flags = 0; ++ ++ if (boardtype.reg_type & ni_reg_m_series_mask) ++ subd->chan_desc->chans[0].nb_bits = 32; ++ else ++ subd->chan_desc->chans[0].nb_bits = 24; ++ ++ a4l_dbg(1, drv_dbg, dev, ++ "mio_common: GPCT[%d]: %lu bits\n", ++ j, subd->chan_desc->chans[0].nb_bits); ++ ++ subd->insn_read = ni_gpct_insn_read; ++ subd->insn_write = ni_gpct_insn_write; ++ subd->insn_config = ni_gpct_insn_config; ++ ++#if (defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE) || \ ++ defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE_MODULE)) ++ ++ a4l_dbg(1, drv_dbg, dev, ++ "mio_common: GPCT[%d]: command feature available\n", j); ++ subd->flags |= A4L_SUBD_CMD; ++ subd->cmd_mask = &a4l_ni_tio_cmd_mask; ++ subd->do_cmd = ni_gpct_cmd; ++ subd->do_cmdtest = ni_gpct_cmdtest; ++ subd->cancel = ni_gpct_cancel; ++#endif /* CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE */ ++ ++ counter = (struct ni_gpct *)subd->priv; ++ rtdm_lock_init(&counter->lock); ++ counter->chip_index = 0; ++ counter->counter_index = j; ++ counter->counter_dev = devpriv->counter_dev; ++ devpriv->counter_dev->counters[j] = counter; ++ ++ a4l_ni_tio_init_counter(counter); ++ ++ ret = a4l_add_subd(dev, subd); ++ if(ret != NI_GPCT_SUBDEV(j)) ++ return ret; ++ ++ a4l_dbg(1, drv_dbg, dev, ++ "mio_common: GCPT[%d] subdevice registered\n", j); ++ } ++ ++ /* Frequency output */ ++ subd = a4l_alloc_subd(0, NULL); ++ if(subd == NULL) ++ return -ENOMEM; ++ ++ a4l_dbg(1, drv_dbg, dev, ++ "mio_common: registering counter subdevice...\n"); ++ ++ subd->flags = A4L_SUBD_COUNTER; ++ ++ subd->chan_desc = kmalloc(sizeof(struct a4l_channels_desc) + ++ sizeof(struct a4l_channel), GFP_KERNEL); ++ subd->chan_desc->mode = A4L_CHAN_GLOBAL_CHANDESC; ++ subd->chan_desc->length = 1; ++ subd->chan_desc->chans[0].flags = 0; ++ subd->chan_desc->chans[0].nb_bits = 4; ++ ++ subd->insn_read = ni_freq_out_insn_read; ++ subd->insn_write = ni_freq_out_insn_write; ++ subd->insn_config = ni_freq_out_insn_config; ++ ++ ret = a4l_add_subd(dev, subd); ++ if(ret != NI_FREQ_OUT_SUBDEV) ++ return ret; ++ ++ a4l_dbg(1, drv_dbg, dev, ++ "mio_common: counter subdevice registered\n"); ++ ++ a4l_dbg(1, drv_dbg, dev, "mio_common: initializing AI...\n"); ++ ++ /* ai configuration */ ++ ni_ai_reset(a4l_get_subd(dev, NI_AI_SUBDEV)); ++ if ((boardtype.reg_type & ni_reg_6xxx_mask) == 0) { ++ // BEAM is this needed for PCI-6143 ?? ++ devpriv->clock_and_fout = ++ Slow_Internal_Time_Divide_By_2 | ++ Slow_Internal_Timebase | ++ Clock_To_Board_Divide_By_2 | ++ Clock_To_Board | ++ AI_Output_Divide_By_2 | AO_Output_Divide_By_2; ++ } else { ++ devpriv->clock_and_fout = ++ Slow_Internal_Time_Divide_By_2 | ++ Slow_Internal_Timebase | ++ Clock_To_Board_Divide_By_2 | Clock_To_Board; ++ } ++ devpriv->stc_writew(dev, devpriv->clock_and_fout, ++ Clock_and_FOUT_Register); ++ ++ a4l_dbg(1, drv_dbg, dev, "mio_common: AI initialization OK\n"); ++ ++ a4l_dbg(1, drv_dbg, dev, "mio_common: initializing A0...\n"); ++ ++ /* analog output configuration */ ++ ni_ao_reset(a4l_get_subd(dev, NI_AO_SUBDEV)); ++ ++ if (a4l_get_irq(dev) != A4L_IRQ_UNUSED) { ++ devpriv->stc_writew(dev, ++ (devpriv->irq_polarity ? Interrupt_Output_Polarity : 0) | ++ (Interrupt_Output_On_3_Pins & 0) | Interrupt_A_Enable | ++ Interrupt_B_Enable | ++ Interrupt_A_Output_Select(devpriv->irq_pin) | ++ Interrupt_B_Output_Select(devpriv->irq_pin), ++ Interrupt_Control_Register); ++ } ++ ++ a4l_dbg(1, drv_dbg, dev, "mio_common: A0 initialization OK\n"); ++ ++ /* DMA setup */ ++ ++ a4l_dbg(1, drv_dbg, dev, "mio_common: DMA setup\n"); ++ ++ ni_writeb(devpriv->ai_ao_select_reg, AI_AO_Select); ++ ni_writeb(devpriv->g0_g1_select_reg, G0_G1_Select); ++ ++ if (boardtype.reg_type & ni_reg_6xxx_mask) { ++ ni_writeb(0, Magic_611x); ++ } else if (boardtype.reg_type & ni_reg_m_series_mask) { ++ int channel; ++ for (channel = 0; channel < boardtype.n_aochan; ++channel) { ++ ni_writeb(0xf, M_Offset_AO_Waveform_Order(channel)); ++ ni_writeb(0x0, ++ M_Offset_AO_Reference_Attenuation(channel)); ++ } ++ ni_writeb(0x0, M_Offset_AO_Calibration); ++ } ++ ++ a4l_dbg(1, drv_dbg, dev, "mio_common: attach procedure complete\n"); ++ ++ return 0; ++} ++ ++MODULE_DESCRIPTION("Analogy support for NI DAQ-STC based boards"); ++MODULE_LICENSE("GPL"); ++ ++EXPORT_SYMBOL_GPL(a4l_range_ni_E_ai); ++EXPORT_SYMBOL_GPL(a4l_range_ni_E_ai_limited); ++EXPORT_SYMBOL_GPL(a4l_range_ni_E_ai_limited14); ++EXPORT_SYMBOL_GPL(a4l_range_ni_E_ai_bipolar4); ++EXPORT_SYMBOL_GPL(a4l_range_ni_E_ai_611x); ++EXPORT_SYMBOL_GPL(a4l_range_ni_M_ai_622x); ++EXPORT_SYMBOL_GPL(a4l_range_ni_M_ai_628x); ++EXPORT_SYMBOL_GPL(a4l_range_ni_S_ai_6143); ++EXPORT_SYMBOL_GPL(a4l_range_ni_E_ao_ext); ++EXPORT_SYMBOL_GPL(a4l_ni_E_interrupt); ++EXPORT_SYMBOL_GPL(a4l_ni_E_init); +--- linux/drivers/xenomai/analogy/national_instruments/pcimio.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/analogy/national_instruments/pcimio.c 2021-04-07 16:01:27.801633346 +0800 +@@ -0,0 +1,1603 @@ ++/* ++ * Hardware driver for NI PCI-MIO E series cards ++ * ++ * Copyright (C) 1997-8 David A. Schleef ++ * ++ * This code is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * This code is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ * ++ * Description: National Instruments PCI-MIO-E series and M series ++ * (all boards) ++ * ++ * Author: ds, John Hallen, Frank Mori Hess, Rolf Mueller, Herbert Peremans, ++ * Herman Bruyninckx, Terry Barnaby ++ * Status: works ++ * Devices: [National Instruments] PCI-MIO-16XE-50 (ni_pcimio), ++ * PCI-MIO-16XE-10, PXI-6030E, PCI-MIO-16E-1, PCI-MIO-16E-4, PCI-6014, ++ * PCI-6040E,PXI-6040E, PCI-6030E, PCI-6031E, PCI-6032E, PCI-6033E, ++ * PCI-6071E, PCI-6023E, PCI-6024E, PCI-6025E, PXI-6025E, PCI-6034E, ++ * PCI-6035E, PCI-6052E, PCI-6110, PCI-6111, PCI-6220, PCI-6221, ++ * PCI-6224, PCI-6225, PCI-6229, PCI-6250, PCI-6251, PCIe-6251, ++ * PCI-6254, PCI-6259, PCIe-6259, PCI-6280, PCI-6281, PXI-6281, ++ * PCI-6284, PCI-6289, PCI-6711, PXI-6711, PCI-6713, PXI-6713, ++ * PXI-6071E, PCI-6070E, PXI-6070E, PXI-6052E, PCI-6036E, PCI-6731, ++ * PCI-6733, PXI-6733, PCI-6143, PXI-6143 ++ * ++ * These boards are almost identical to the AT-MIO E series, except that ++ * they use the PCI bus instead of ISA (i.e., AT). See the notes for ++ * the ni_atmio.o driver for additional information about these boards. ++ * ++ * By default, the driver uses DMA to transfer analog input data to ++ * memory. When DMA is enabled, not all triggering features are ++ * supported. ++ * ++ * Note that the PCI-6143 is a simultaneous sampling device with 8 ++ * convertors. With this board all of the convertors perform one ++ * simultaneous sample during a scan interval. The period for a scan ++ * is used for the convert time in an Analgoy cmd. The convert trigger ++ * source is normally set to TRIG_NOW by default. ++ * ++ * The RTSI trigger bus is supported on these cards on subdevice ++ * 10. See the Analogy library documentation for details. ++ * ++ * References: ++ * 341079b.pdf PCI E Series Register-Level Programmer Manual ++ * 340934b.pdf DAQ-STC reference manual ++ * 322080b.pdf 6711/6713/6715 User Manual ++ * 320945c.pdf PCI E Series User Manual ++ * 322138a.pdf PCI-6052E and DAQPad-6052E User Manual ++ * ++ * ISSUES: ++ * - When DMA is enabled, XXX_EV_CONVERT does not work correctly. ++ * - Calibration is not fully implemented ++ * - SCXI is probably broken for m-series boards ++ * - Digital I/O may not work on 673x. ++ * - Information (number of channels, bits, etc.) for some devices may ++ * be incorrect. Please check this and submit a bug if there are ++ * problems for your device. ++ * - Need to deal with external reference for DAC, and other DAC ++ * properties in board properties ++ * - Deal with at-mio-16de-10 revision D to N changes, etc. ++ * - Need to add other CALDAC type ++ * - Need to slow down DAC loading. I don't trust NI's claim that two ++ * writes to the PCI bus slows IO enough. I would prefer to use ++ * a4l_udelay(). Timing specs: (clock) ++ * AD8522 30ns ++ * DAC8043 120ns ++ * DAC8800 60ns ++ * MB88341 ? ++ * ++ */ ++ ++#include ++#include ++ ++#include "../intel/8255.h" ++#include "ni_stc.h" ++#include "ni_mio.h" ++#include "mite.h" ++ ++#define PCIMIO_IRQ_POLARITY 1 ++ ++/* The following two tables must be in the same order */ ++static struct pci_device_id ni_pci_table[] __maybe_unused = { ++ { PCI_VENDOR_ID_NATINST, 0x0162, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_NATINST, 0x1170, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_NATINST, 0x1180, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_NATINST, 0x1190, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_NATINST, 0x11b0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_NATINST, 0x11c0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_NATINST, 0x11d0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_NATINST, 0x1270, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_NATINST, 0x1330, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_NATINST, 0x1340, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_NATINST, 0x1350, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_NATINST, 0x14e0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_NATINST, 0x14f0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_NATINST, 0x1580, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_NATINST, 0x15b0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_NATINST, 0x1880, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_NATINST, 0x1870, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_NATINST, 0x18b0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_NATINST, 0x18c0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_NATINST, 0x2410, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_NATINST, 0x2420, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_NATINST, 0x2430, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_NATINST, 0x2890, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_NATINST, 0x28c0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_NATINST, 0x2a60, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_NATINST, 0x2a70, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_NATINST, 0x2a80, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_NATINST, 0x2ab0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_NATINST, 0x2b80, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_NATINST, 0x2b90, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_NATINST, 0x2c80, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_NATINST, 0x2ca0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_NATINST, 0x70aa, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_NATINST, 0x70ab, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_NATINST, 0x70ac, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_NATINST, 0x70af, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_NATINST, 0x70b0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_NATINST, 0x70b4, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_NATINST, 0x70b6, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_NATINST, 0x70b7, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_NATINST, 0x70b8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_NATINST, 0x70bc, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_NATINST, 0x70bd, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_NATINST, 0x70bf, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_NATINST, 0x70c0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_NATINST, 0x70f2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_NATINST, 0x710d, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_NATINST, 0x716c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_NATINST, 0x717f, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_NATINST, 0x71bc, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_NATINST, 0x717d, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, ++ { 0 } ++}; ++MODULE_DEVICE_TABLE(pci, ni_pci_table); ++ ++/* These are not all the possible ao ranges for 628x boards. ++ They can do OFFSET +- REFERENCE where OFFSET can be ++ 0V, 5V, APFI<0,1>, or AO<0...3> and RANGE can ++ be 10V, 5V, 2V, 1V, APFI<0,1>, AO<0...3>. That's ++ 63 different possibilities. An AO channel ++ can not act as it's own OFFSET or REFERENCE. ++*/ ++ ++#if 0 ++static struct a4l_rngtab rng_ni_M_628x_ao = { 8, { ++ RANGE(-10, 10), ++ RANGE(-5, 5), ++ RANGE(-2, 2), ++ RANGE(-1, 1), ++ RANGE(-5, 15), ++ RANGE(0, 10), ++ RANGE(3, 7), ++ RANGE(4, 6), ++ RANGE_ext(-1, 1) ++}}; ++static struct a4l_rngdesc range_ni_M_628x_ao = ++ RNG_GLOBAL(rng_ni_M_628x_ao); ++#endif ++ ++static struct a4l_rngtab rng_ni_M_625x_ao = { 3, { ++ RANGE(-10, 10), ++ RANGE(-5, 5), ++ RANGE_ext(-1, 1) ++}}; ++static struct a4l_rngdesc range_ni_M_625x_ao = ++ RNG_GLOBAL(rng_ni_M_625x_ao); ++ ++static struct a4l_rngtab rng_ni_M_622x_ao = { 1, { ++ RANGE(-10, 10), ++}}; ++static struct a4l_rngdesc range_ni_M_622x_ao = ++ RNG_GLOBAL(rng_ni_M_622x_ao); ++ ++static ni_board ni_boards[]={ ++ { device_id: 0x0162, // NI also says 0x1620. typo? ++ name: "pci-mio-16xe-50", ++ n_adchan: 16, ++ adbits: 16, ++ ai_fifo_depth: 2048, ++ alwaysdither: 1, ++ gainlkup: ai_gain_8, ++ ai_speed: 50000, ++ n_aochan: 2, ++ aobits: 12, ++ ao_fifo_depth: 0, ++ .ao_range_table = &a4l_range_bipolar10, ++ ao_unipolar: 0, ++ ao_speed: 50000, ++ .num_p0_dio_channels = 8, ++ caldac: {dac8800,dac8043}, ++ has_8255: 0, ++ }, ++ { device_id: 0x1170, ++ name: "pci-mio-16xe-10", // aka pci-6030E ++ n_adchan: 16, ++ adbits: 16, ++ ai_fifo_depth: 512, ++ alwaysdither: 1, ++ gainlkup: ai_gain_14, ++ ai_speed: 10000, ++ n_aochan: 2, ++ aobits: 16, ++ ao_fifo_depth: 2048, ++ .ao_range_table = &a4l_range_ni_E_ao_ext, ++ ao_unipolar: 1, ++ ao_speed: 10000, ++ .num_p0_dio_channels = 8, ++ caldac: {dac8800,dac8043,ad8522}, ++ has_8255: 0, ++ }, ++ { device_id: 0x28c0, ++ name: "pci-6014", ++ n_adchan: 16, ++ adbits: 16, ++ ai_fifo_depth: 512, ++ alwaysdither: 1, ++ gainlkup: ai_gain_4, ++ ai_speed: 5000, ++ n_aochan: 2, ++ aobits: 16, ++ ao_fifo_depth: 0, ++ .ao_range_table = &a4l_range_bipolar10, ++ ao_unipolar: 0, ++ ao_speed: 100000, ++ .num_p0_dio_channels = 8, ++ caldac: {ad8804_debug}, ++ has_8255: 0, ++ }, ++ { device_id: 0x11d0, ++ name: "pxi-6030e", ++ n_adchan: 16, ++ adbits: 16, ++ ai_fifo_depth: 512, ++ alwaysdither: 1, ++ gainlkup: ai_gain_14, ++ ai_speed: 10000, ++ n_aochan: 2, ++ aobits: 16, ++ ao_fifo_depth: 2048, ++ .ao_range_table = &a4l_range_ni_E_ao_ext, ++ ao_unipolar: 1, ++ ao_speed: 10000, ++ .num_p0_dio_channels = 8, ++ caldac: {dac8800,dac8043,ad8522}, ++ has_8255: 0, ++ }, ++ ++ { device_id: 0x1180, ++ name: "pci-mio-16e-1", /* aka pci-6070e */ ++ n_adchan: 16, ++ adbits: 12, ++ ai_fifo_depth: 512, ++ alwaysdither: 0, ++ gainlkup: ai_gain_16, ++ ai_speed: 800, ++ n_aochan: 2, ++ aobits: 12, ++ ao_fifo_depth: 2048, ++ .ao_range_table = &a4l_range_ni_E_ao_ext, ++ ao_unipolar: 1, ++ ao_speed: 1000, ++ .num_p0_dio_channels = 8, ++ caldac: {mb88341}, ++ has_8255: 0, ++ }, ++ { device_id: 0x1190, ++ name: "pci-mio-16e-4", /* aka pci-6040e */ ++ n_adchan: 16, ++ adbits: 12, ++ ai_fifo_depth: 512, ++ alwaysdither: 0, ++ gainlkup: ai_gain_16, ++ /* Note: there have been reported problems with full speed ++ * on this board */ ++ ai_speed: 2000, ++ n_aochan: 2, ++ aobits: 12, ++ ao_fifo_depth: 512, ++ .ao_range_table = &a4l_range_ni_E_ao_ext, ++ ao_unipolar: 1, ++ ao_speed: 1000, ++ .num_p0_dio_channels = 8, ++ caldac: {ad8804_debug}, // doc says mb88341 ++ has_8255: 0, ++ }, ++ { device_id: 0x11c0, ++ name: "pxi-6040e", ++ n_adchan: 16, ++ adbits: 12, ++ ai_fifo_depth: 512, ++ alwaysdither: 0, ++ gainlkup: ai_gain_16, ++ ai_speed: 2000, ++ n_aochan: 2, ++ aobits: 12, ++ ao_fifo_depth: 512, ++ .ao_range_table = &a4l_range_ni_E_ao_ext, ++ ao_unipolar: 1, ++ ao_speed: 1000, ++ .num_p0_dio_channels = 8, ++ caldac: {mb88341}, ++ has_8255: 0, ++ }, ++ ++ { device_id: 0x1330, ++ name: "pci-6031e", ++ n_adchan: 64, ++ adbits: 16, ++ ai_fifo_depth: 512, ++ alwaysdither: 1, ++ gainlkup: ai_gain_14, ++ ai_speed: 10000, ++ n_aochan: 2, ++ aobits: 16, ++ ao_fifo_depth: 2048, ++ .ao_range_table = &a4l_range_ni_E_ao_ext, ++ ao_unipolar: 1, ++ ao_speed: 10000, ++ .num_p0_dio_channels = 8, ++ caldac: {dac8800,dac8043,ad8522}, ++ has_8255: 0, ++ }, ++ { device_id: 0x1270, ++ name: "pci-6032e", ++ n_adchan: 16, ++ adbits: 16, ++ ai_fifo_depth: 512, ++ alwaysdither: 1, ++ gainlkup: ai_gain_14, ++ ai_speed: 10000, ++ n_aochan: 0, ++ aobits: 0, ++ ao_fifo_depth: 0, ++ ao_unipolar: 0, ++ .num_p0_dio_channels = 8, ++ caldac: {dac8800,dac8043,ad8522}, ++ has_8255: 0, ++ }, ++ { device_id: 0x1340, ++ name: "pci-6033e", ++ n_adchan: 64, ++ adbits: 16, ++ ai_fifo_depth: 512, ++ alwaysdither: 1, ++ gainlkup: ai_gain_14, ++ ai_speed: 10000, ++ n_aochan: 0, ++ aobits: 0, ++ ao_fifo_depth: 0, ++ ao_unipolar: 0, ++ .num_p0_dio_channels = 8, ++ caldac: {dac8800,dac8043,ad8522}, ++ has_8255: 0, ++ }, ++ { device_id: 0x1350, ++ name: "pci-6071e", ++ n_adchan: 64, ++ adbits: 12, ++ ai_fifo_depth: 512, ++ alwaysdither: 1, ++ gainlkup: ai_gain_16, ++ ai_speed: 800, ++ n_aochan: 2, ++ aobits: 12, ++ ao_fifo_depth: 2048, ++ .ao_range_table = &a4l_range_ni_E_ao_ext, ++ ao_unipolar: 1, ++ ao_speed: 1000, ++ .num_p0_dio_channels = 8, ++ caldac: {ad8804_debug}, ++ has_8255: 0, ++ }, ++ { device_id: 0x2a60, ++ name: "pci-6023e", ++ n_adchan: 16, ++ adbits: 12, ++ ai_fifo_depth: 512, ++ alwaysdither: 0, ++ gainlkup: ai_gain_4, ++ ai_speed: 5000, ++ n_aochan: 0, ++ aobits: 0, ++ ao_unipolar: 0, ++ .num_p0_dio_channels = 8, ++ caldac: {ad8804_debug}, /* manual is wrong */ ++ has_8255: 0, ++ }, ++ { device_id: 0x2a70, ++ name: "pci-6024e", ++ n_adchan: 16, ++ adbits: 12, ++ ai_fifo_depth: 512, ++ alwaysdither: 0, ++ gainlkup: ai_gain_4, ++ ai_speed: 5000, ++ n_aochan: 2, ++ aobits: 12, ++ ao_fifo_depth: 0, ++ .ao_range_table = &a4l_range_bipolar10, ++ ao_unipolar: 0, ++ ao_speed: 100000, ++ .num_p0_dio_channels = 8, ++ caldac: {ad8804_debug}, /* manual is wrong */ ++ has_8255: 0, ++ }, ++ { device_id: 0x2a80, ++ name: "pci-6025e", ++ n_adchan: 16, ++ adbits: 12, ++ ai_fifo_depth: 512, ++ alwaysdither: 0, ++ gainlkup: ai_gain_4, ++ ai_speed: 5000, ++ n_aochan: 2, ++ aobits: 12, ++ ao_fifo_depth: 0, ++ .ao_range_table = &a4l_range_bipolar10, ++ ao_unipolar: 0, ++ ao_speed: 100000, ++ .num_p0_dio_channels = 8, ++ caldac: {ad8804_debug}, /* manual is wrong */ ++ has_8255: 1, ++ }, ++ { device_id: 0x2ab0, ++ name: "pxi-6025e", ++ n_adchan: 16, ++ adbits: 12, ++ ai_fifo_depth: 512, ++ alwaysdither: 0, ++ gainlkup: ai_gain_4, ++ ai_speed: 5000, ++ n_aochan: 2, ++ aobits: 12, ++ ao_fifo_depth: 0, ++ .ao_range_table = &a4l_range_ni_E_ao_ext, ++ ao_unipolar: 1, ++ ao_speed: 100000, ++ .num_p0_dio_channels = 8, ++ caldac: {ad8804_debug}, /* manual is wrong */ ++ has_8255: 1, ++ }, ++ ++ { device_id: 0x2ca0, ++ name: "pci-6034e", ++ n_adchan: 16, ++ adbits: 16, ++ ai_fifo_depth: 512, ++ alwaysdither: 1, ++ gainlkup: ai_gain_4, ++ ai_speed: 5000, ++ n_aochan: 0, ++ aobits: 0, ++ ao_fifo_depth: 0, ++ ao_unipolar: 0, ++ .num_p0_dio_channels = 8, ++ caldac: {ad8804_debug}, ++ has_8255: 0, ++ }, ++ { device_id: 0x2c80, ++ name: "pci-6035e", ++ n_adchan: 16, ++ adbits: 16, ++ ai_fifo_depth: 512, ++ alwaysdither: 1, ++ gainlkup: ai_gain_4, ++ ai_speed: 5000, ++ n_aochan: 2, ++ aobits: 12, ++ ao_fifo_depth: 0, ++ .ao_range_table = &a4l_range_bipolar10, ++ ao_unipolar: 0, ++ ao_speed: 100000, ++ .num_p0_dio_channels = 8, ++ caldac: {ad8804_debug}, ++ has_8255: 0, ++ }, ++ { device_id: 0x18b0, ++ name: "pci-6052e", ++ n_adchan: 16, ++ adbits: 16, ++ ai_fifo_depth: 512, ++ alwaysdither: 1, ++ gainlkup: ai_gain_16, ++ ai_speed: 3000, ++ n_aochan: 2, ++ aobits: 16, ++ ao_unipolar: 1, ++ ao_fifo_depth: 2048, ++ .ao_range_table = &a4l_range_ni_E_ao_ext, ++ ao_speed: 3000, ++ .num_p0_dio_channels = 8, ++ caldac: {ad8804_debug,ad8804_debug,ad8522}, /* manual is wrong */ ++ }, ++ { device_id: 0x14e0, ++ name: "pci-6110", ++ n_adchan: 4, ++ adbits: 12, ++ ai_fifo_depth: 8192, ++ alwaysdither: 0, ++ gainlkup: ai_gain_611x, ++ ai_speed: 200, ++ n_aochan: 2, ++ aobits: 16, ++ reg_type: ni_reg_611x, ++ .ao_range_table = &a4l_range_bipolar10, ++ ao_unipolar: 0, ++ ao_fifo_depth: 2048, ++ ao_speed: 250, ++ .num_p0_dio_channels = 8, ++ caldac: {ad8804,ad8804}, ++ }, ++ { device_id: 0x14f0, ++ name: "pci-6111", ++ n_adchan: 2, ++ adbits: 12, ++ ai_fifo_depth: 8192, ++ alwaysdither: 0, ++ gainlkup: ai_gain_611x, ++ ai_speed: 200, ++ n_aochan: 2, ++ aobits: 16, ++ reg_type: ni_reg_611x, ++ .ao_range_table = &a4l_range_bipolar10, ++ ao_unipolar: 0, ++ ao_fifo_depth: 2048, ++ ao_speed: 250, ++ .num_p0_dio_channels = 8, ++ caldac: {ad8804,ad8804}, ++ }, ++#if 0 /* Need device IDs */ ++ /* The 6115 boards probably need their own driver */ ++ { device_id: 0x2ed0, ++ name: "pci-6115", ++ n_adchan: 4, ++ adbits: 12, ++ ai_fifo_depth: 8192, ++ alwaysdither: 0, ++ gainlkup: ai_gain_611x, ++ ai_speed: 100, ++ n_aochan: 2, ++ aobits: 16, ++ ao_671x: 1, ++ ao_unipolar: 0, ++ ao_fifo_depth: 2048, ++ ao_speed: 250, ++ .num_p0_dio_channels = 8, ++ reg_611x: 1, ++ caldac: {ad8804_debug,ad8804_debug,ad8804_debug},/* XXX */ ++ }, ++#endif ++#if 0 /* Need device IDs */ ++ { device_id: 0x0000, ++ name: "pxi-6115", ++ n_adchan: 4, ++ adbits: 12, ++ ai_fifo_depth: 8192, ++ alwaysdither: 0, ++ gainlkup: ai_gain_611x, ++ ai_speed: 100, ++ n_aochan: 2, ++ aobits: 16, ++ ao_671x: 1, ++ ao_unipolar: 0, ++ ao_fifo_depth: 2048, ++ ao_speed: 250, ++ reg_611x: 1, ++ .num_p0_dio_channels = 8, ++ caldac: {ad8804_debug,ad8804_debug,ad8804_debug},/* XXX */ ++ }, ++#endif ++ { device_id: 0x1880, ++ name: "pci-6711", ++ n_adchan: 0, /* no analog input */ ++ n_aochan: 4, ++ aobits: 12, ++ ao_unipolar: 0, ++ ao_fifo_depth: 16384, /* data sheet says 8192, but fifo really holds 16384 samples */ ++ .ao_range_table = &a4l_range_bipolar10, ++ ao_speed: 1000, ++ .num_p0_dio_channels = 8, ++ reg_type: ni_reg_6711, ++ caldac: {ad8804_debug}, ++ }, ++ { device_id: 0x2b90, ++ name: "pxi-6711", ++ n_adchan: 0, /* no analog input */ ++ n_aochan: 4, ++ aobits: 12, ++ ao_unipolar: 0, ++ ao_fifo_depth: 16384, ++ .ao_range_table = &a4l_range_bipolar10, ++ ao_speed: 1000, ++ .num_p0_dio_channels = 8, ++ reg_type: ni_reg_6711, ++ caldac: {ad8804_debug}, ++ }, ++ { device_id: 0x1870, ++ name: "pci-6713", ++ n_adchan: 0, /* no analog input */ ++ n_aochan: 8, ++ aobits: 12, ++ ao_unipolar: 0, ++ ao_fifo_depth: 16384, ++ .ao_range_table = &a4l_range_bipolar10, ++ ao_speed: 1000, ++ .num_p0_dio_channels = 8, ++ reg_type: ni_reg_6713, ++ caldac: {ad8804_debug,ad8804_debug}, ++ }, ++ { device_id: 0x2b80, ++ name: "pxi-6713", ++ n_adchan: 0, /* no analog input */ ++ n_aochan: 8, ++ aobits: 12, ++ ao_unipolar: 0, ++ ao_fifo_depth: 16384, ++ .ao_range_table = &a4l_range_bipolar10, ++ ao_speed: 1000, ++ .num_p0_dio_channels = 8, ++ reg_type: ni_reg_6713, ++ caldac: {ad8804_debug,ad8804_debug}, ++ }, ++ { device_id: 0x2430, ++ name: "pci-6731", ++ n_adchan: 0, /* no analog input */ ++ n_aochan: 4, ++ aobits: 16, ++ ao_unipolar: 0, ++ ao_fifo_depth: 8192, ++ .ao_range_table = &a4l_range_bipolar10, ++ ao_speed: 1000, ++ .num_p0_dio_channels = 8, ++ reg_type: ni_reg_6711, ++ caldac: {ad8804_debug}, ++ }, ++#if 0 /* Need device IDs */ ++ { device_id: 0x0, ++ name: "pxi-6731", ++ n_adchan: 0, /* no analog input */ ++ n_aochan: 4, ++ aobits: 16, ++ ao_unipolar: 0, ++ ao_fifo_depth: 8192, ++ .ao_range_table = &a4l_range_bipolar10, ++ .num_p0_dio_channels = 8, ++ reg_type: ni_reg_6711, ++ caldac: {ad8804_debug}, ++ }, ++#endif ++ { device_id: 0x2410, ++ name: "pci-6733", ++ n_adchan: 0, /* no analog input */ ++ n_aochan: 8, ++ aobits: 16, ++ ao_unipolar: 0, ++ ao_fifo_depth: 16384, ++ .ao_range_table = &a4l_range_bipolar10, ++ ao_speed: 1000, ++ .num_p0_dio_channels = 8, ++ reg_type: ni_reg_6713, ++ caldac: {ad8804_debug,ad8804_debug}, ++ }, ++ { device_id: 0x2420, ++ name: "pxi-6733", ++ n_adchan: 0, /* no analog input */ ++ n_aochan: 8, ++ aobits: 16, ++ ao_unipolar: 0, ++ ao_fifo_depth: 16384, ++ .ao_range_table = &a4l_range_bipolar10, ++ ao_speed: 1000, ++ .num_p0_dio_channels = 8, ++ reg_type: ni_reg_6713, ++ caldac: {ad8804_debug,ad8804_debug}, ++ }, ++ { device_id: 0x15b0, ++ name: "pxi-6071e", ++ n_adchan: 64, ++ adbits: 12, ++ ai_fifo_depth: 512, ++ alwaysdither: 1, ++ gainlkup: ai_gain_16, ++ ai_speed: 800, ++ n_aochan: 2, ++ aobits: 12, ++ ao_fifo_depth: 2048, ++ .ao_range_table = &a4l_range_ni_E_ao_ext, ++ ao_unipolar: 1, ++ ao_speed: 1000, ++ .num_p0_dio_channels = 8, ++ caldac: {ad8804_debug}, ++ has_8255: 0, ++ }, ++ { device_id: 0x11b0, ++ name: "pxi-6070e", ++ n_adchan: 16, ++ adbits: 12, ++ ai_fifo_depth: 512, ++ alwaysdither: 1, ++ gainlkup: ai_gain_16, ++ ai_speed: 800, ++ n_aochan: 2, ++ aobits: 12, ++ ao_fifo_depth: 2048, ++ .ao_range_table = &a4l_range_ni_E_ao_ext, ++ ao_unipolar: 1, ++ ao_speed: 1000, ++ .num_p0_dio_channels = 8, ++ caldac: {ad8804_debug}, ++ has_8255: 0, ++ }, ++ { device_id: 0x18c0, ++ name: "pxi-6052e", ++ n_adchan: 16, ++ adbits: 16, ++ ai_fifo_depth: 512, ++ alwaysdither: 1, ++ gainlkup: ai_gain_16, ++ ai_speed: 3000, ++ n_aochan: 2, ++ aobits: 16, ++ ao_unipolar: 1, ++ ao_fifo_depth: 2048, ++ .ao_range_table = &a4l_range_ni_E_ao_ext, ++ ao_speed: 3000, ++ .num_p0_dio_channels = 8, ++ caldac: {mb88341,mb88341,ad8522}, ++ }, ++ { device_id: 0x1580, ++ name: "pxi-6031e", ++ n_adchan: 64, ++ adbits: 16, ++ ai_fifo_depth: 512, ++ alwaysdither: 1, ++ gainlkup: ai_gain_14, ++ ai_speed: 10000, ++ n_aochan: 2, ++ aobits: 16, ++ ao_fifo_depth: 2048, ++ .ao_range_table = &a4l_range_ni_E_ao_ext, ++ ao_unipolar: 1, ++ ao_speed: 10000, ++ .num_p0_dio_channels = 8, ++ caldac: {dac8800,dac8043,ad8522}, ++ }, ++ { device_id: 0x2890, ++ name: "pci-6036e", ++ n_adchan: 16, ++ adbits: 16, ++ ai_fifo_depth: 512, ++ alwaysdither: 1, ++ gainlkup: ai_gain_4, ++ ai_speed: 5000, ++ n_aochan: 2, ++ aobits: 16, ++ ao_fifo_depth: 0, ++ .ao_range_table = &a4l_range_bipolar10, ++ ao_unipolar: 0, ++ ao_speed: 100000, ++ .num_p0_dio_channels = 8, ++ caldac: {ad8804_debug}, ++ has_8255: 0, ++ }, ++ { device_id: 0x70b0, ++ name: "pci-6220", ++ n_adchan: 16, ++ adbits: 16, ++ ai_fifo_depth: 512, //FIXME: guess ++ gainlkup: ai_gain_622x, ++ ai_speed: 4000, ++ n_aochan: 0, ++ aobits: 0, ++ ao_fifo_depth: 0, ++ .num_p0_dio_channels = 8, ++ reg_type: ni_reg_622x, ++ ao_unipolar: 0, ++ .caldac = {caldac_none}, ++ has_8255: 0, ++ }, ++ { device_id: 0x70af, ++ name: "pci-6221", ++ n_adchan: 16, ++ adbits: 16, ++ ai_fifo_depth: 4095, ++ gainlkup: ai_gain_622x, ++ ai_speed: 4000, ++ n_aochan: 2, ++ aobits: 16, ++ ao_fifo_depth: 8191, ++ .ao_range_table = &a4l_range_bipolar10, ++ reg_type: ni_reg_622x, ++ ao_unipolar: 0, ++ ao_speed: 1200, ++ .num_p0_dio_channels = 8, ++ .caldac = {caldac_none}, ++ has_8255: 0, ++ }, ++ { device_id: 0x71bc, ++ name: "pci-6221_37pin", ++ n_adchan: 16, ++ adbits: 16, ++ ai_fifo_depth: 4095, ++ gainlkup: ai_gain_622x, ++ ai_speed: 4000, ++ n_aochan: 2, ++ aobits: 16, ++ ao_fifo_depth: 8191, ++ .ao_range_table = &a4l_range_bipolar10, ++ reg_type: ni_reg_622x, ++ ao_unipolar: 0, ++ ao_speed: 1200, ++ .num_p0_dio_channels = 8, ++ .caldac = {caldac_none}, ++ has_8255: 0, ++ }, ++ { device_id: 0x70f2, ++ name: "pci-6224", ++ n_adchan: 32, ++ adbits: 16, ++ ai_fifo_depth: 4095, ++ gainlkup: ai_gain_622x, ++ ai_speed: 4000, ++ n_aochan: 0, ++ aobits: 0, ++ ao_fifo_depth: 0, ++ reg_type: ni_reg_622x, ++ ao_unipolar: 0, ++ .num_p0_dio_channels = 32, ++ .caldac = {caldac_none}, ++ has_8255: 0, ++ }, ++ { device_id: 0x716c, ++ name: "pci-6225", ++ n_adchan: 80, ++ adbits: 16, ++ ai_fifo_depth: 4095, ++ gainlkup: ai_gain_622x, ++ ai_speed: 4000, ++ n_aochan: 2, ++ aobits: 16, ++ ao_fifo_depth: 8191, ++ .ao_range_table = &range_ni_M_622x_ao, ++ reg_type: ni_reg_622x, ++ ao_unipolar: 0, ++ ao_speed: 1200, ++ .num_p0_dio_channels = 32, ++ .caldac = {caldac_none}, ++ has_8255: 0, ++ }, ++ { device_id: 0x70aa, ++ name: "pci-6229", ++ n_adchan: 32, ++ adbits: 16, ++ ai_fifo_depth: 4095, ++ gainlkup: ai_gain_622x, ++ ai_speed: 4000, ++ n_aochan: 4, ++ aobits: 16, ++ ao_fifo_depth: 8191, ++ .ao_range_table = &range_ni_M_622x_ao, ++ reg_type: ni_reg_622x, ++ ao_unipolar: 0, ++ ao_speed: 1200, ++ .num_p0_dio_channels = 32, ++ .caldac = {caldac_none}, ++ has_8255: 0, ++ }, ++ { device_id: 0x70b4, ++ name: "pci-6250", ++ n_adchan: 16, ++ adbits: 16, ++ ai_fifo_depth: 4095, ++ .gainlkup = ai_gain_628x, ++ ai_speed: 800, ++ n_aochan: 0, ++ aobits: 0, ++ ao_fifo_depth: 0, ++ reg_type: ni_reg_625x, ++ ao_unipolar: 0, ++ .num_p0_dio_channels = 8, ++ .caldac = {caldac_none}, ++ has_8255: 0, ++ }, ++ { device_id: 0x70b8, ++ name: "pci-6251", ++ n_adchan: 16, ++ adbits: 16, ++ ai_fifo_depth: 4095, ++ .gainlkup = ai_gain_628x, ++ ai_speed: 800, ++ n_aochan: 2, ++ aobits: 16, ++ ao_fifo_depth: 8191, ++ .ao_range_table = &range_ni_M_625x_ao, ++ reg_type: ni_reg_625x, ++ ao_unipolar: 0, ++ ao_speed: 357, ++ .num_p0_dio_channels = 8, ++ .caldac = {caldac_none}, ++ has_8255: 0, ++ }, ++ { device_id: 0x717d, ++ name: "pcie-6251", ++ n_adchan: 16, ++ adbits: 16, ++ ai_fifo_depth: 4095, ++ .gainlkup = ai_gain_628x, ++ ai_speed: 800, ++ n_aochan: 2, ++ aobits: 16, ++ ao_fifo_depth: 8191, ++ .ao_range_table = &range_ni_M_625x_ao, ++ reg_type: ni_reg_625x, ++ ao_unipolar: 0, ++ ao_speed: 357, ++ .num_p0_dio_channels = 8, ++ .caldac = {caldac_none}, ++ has_8255: 0, ++ }, ++ { device_id: 0x70b7, ++ name: "pci-6254", ++ n_adchan: 32, ++ adbits: 16, ++ ai_fifo_depth: 4095, ++ .gainlkup = ai_gain_628x, ++ ai_speed: 800, ++ n_aochan: 0, ++ aobits: 0, ++ ao_fifo_depth: 0, ++ reg_type: ni_reg_625x, ++ ao_unipolar: 0, ++ .num_p0_dio_channels = 32, ++ .caldac = {caldac_none}, ++ has_8255: 0, ++ }, ++ { device_id: 0x70ab, ++ name: "pci-6259", ++ n_adchan: 32, ++ adbits: 16, ++ ai_fifo_depth: 4095, ++ .gainlkup = ai_gain_628x, ++ ai_speed: 800, ++ n_aochan: 4, ++ aobits: 16, ++ ao_fifo_depth: 8191, ++ .ao_range_table = &range_ni_M_625x_ao, ++ reg_type: ni_reg_625x, ++ ao_unipolar: 0, ++ ao_speed: 357, ++ .num_p0_dio_channels = 32, ++ .caldac = {caldac_none}, ++ has_8255: 0, ++ }, ++ { device_id: 0x717f, ++ name: "pcie-6259", ++ n_adchan: 32, ++ adbits: 16, ++ ai_fifo_depth: 4095, ++ .gainlkup = ai_gain_628x, ++ ai_speed: 800, ++ n_aochan: 4, ++ aobits: 16, ++ ao_fifo_depth: 8191, ++ .ao_range_table = &range_ni_M_625x_ao, ++ reg_type: ni_reg_625x, ++ ao_unipolar: 0, ++ ao_speed: 357, ++ .num_p0_dio_channels = 32, ++ .caldac = {caldac_none}, ++ has_8255: 0, ++ }, ++#if 0 /* TODO: fix data size */ ++ { device_id: 0x70b6, ++ name: "pci-6280", ++ n_adchan: 16, ++ adbits: 18, ++ ai_fifo_depth: 2047, ++ .gainlkup = ai_gain_628x, ++ ai_speed: 1600, ++ n_aochan: 0, ++ aobits: 0, ++ ao_fifo_depth: 8191, ++ reg_type: ni_reg_628x, ++ ao_unipolar: 0, ++ .num_p0_dio_channels = 8, ++ .caldac = {caldac_none}, ++ has_8255: 0, ++ }, ++ { device_id: 0x70bd, ++ name: "pci-6281", ++ n_adchan: 16, ++ adbits: 18, ++ ai_fifo_depth: 2047, ++ .gainlkup = ai_gain_628x, ++ ai_speed: 1600, ++ n_aochan: 2, ++ aobits: 16, ++ ao_fifo_depth: 8191, ++ .ao_range_table = &range_ni_M_628x_ao, ++ reg_type: ni_reg_628x, ++ ao_unipolar: 1, ++ ao_speed: 357, ++ .num_p0_dio_channels = 8, ++ .caldac = {caldac_none}, ++ has_8255: 0, ++ }, ++ { device_id: 0x70bf, ++ name: "pxi-6281", ++ n_adchan: 16, ++ adbits: 18, ++ ai_fifo_depth: 2047, ++ .gainlkup = ai_gain_628x, ++ ai_speed: 1600, ++ n_aochan: 2, ++ aobits: 16, ++ ao_fifo_depth: 8191, ++ .ao_range_table = &range_ni_M_628x_ao, ++ reg_type: ni_reg_628x, ++ ao_unipolar: 1, ++ ao_speed: 357, ++ .num_p0_dio_channels = 8, ++ .caldac = {caldac_none}, ++ has_8255: 0, ++ }, ++ { device_id: 0x70bc, ++ name: "pci-6284", ++ n_adchan: 32, ++ adbits: 18, ++ ai_fifo_depth: 2047, ++ .gainlkup = ai_gain_628x, ++ ai_speed: 1600, ++ n_aochan: 0, ++ aobits: 0, ++ ao_fifo_depth: 0, ++ reg_type: ni_reg_628x, ++ ao_unipolar: 0, ++ .num_p0_dio_channels = 32, ++ .caldac = {caldac_none}, ++ has_8255: 0, ++ }, ++ { device_id: 0x70ac, ++ name: "pci-6289", ++ n_adchan: 32, ++ adbits: 18, ++ ai_fifo_depth: 2047, ++ .gainlkup = ai_gain_628x, ++ ai_speed: 1600, ++ n_aochan: 4, ++ aobits: 16, ++ ao_fifo_depth: 8191, ++ .ao_range_table = &range_ni_M_628x_ao, ++ reg_type: ni_reg_628x, ++ ao_unipolar: 1, ++ ao_speed: 357, ++ .num_p0_dio_channels = 32, ++ .caldac = {caldac_none}, ++ has_8255: 0, ++ }, ++#endif /* TODO: fix data size */ ++ { device_id: 0x70C0, ++ name: "pci-6143", ++ n_adchan: 8, ++ adbits: 16, ++ ai_fifo_depth: 1024, ++ alwaysdither: 0, ++ gainlkup: ai_gain_6143, ++ ai_speed: 4000, ++ n_aochan: 0, ++ aobits: 0, ++ reg_type: ni_reg_6143, ++ ao_unipolar: 0, ++ ao_fifo_depth: 0, ++ .num_p0_dio_channels = 8, ++ .caldac = {ad8804_debug,ad8804_debug}, ++ }, ++ { device_id: 0x710D, ++ name: "pxi-6143", ++ n_adchan: 8, ++ adbits: 16, ++ ai_fifo_depth: 1024, ++ alwaysdither: 0, ++ gainlkup: ai_gain_6143, ++ ai_speed: 4000, ++ n_aochan: 0, ++ aobits: 0, ++ reg_type: ni_reg_6143, ++ ao_unipolar: 0, ++ ao_fifo_depth: 0, ++ .num_p0_dio_channels = 8, ++ .caldac = {ad8804_debug,ad8804_debug}, ++ }, ++}; ++#define n_pcimio_boards ((sizeof(ni_boards)/sizeof(ni_boards[0]))) ++ ++/* How we access STC registers */ ++ ++/* We automatically take advantage of STC registers that can be ++ * read/written directly in the I/O space of the board. Most ++ * PCIMIO devices map the low 8 STC registers to iobase+addr*2. ++ * The 611x devices map the write registers to iobase+addr*2, and ++ * the read registers to iobase+(addr-1)*2. */ ++/* However, the 611x boards still aren't working, so I'm disabling ++ * non-windowed STC access temporarily */ ++ ++static void e_series_win_out(struct a4l_device *dev, uint16_t data, int reg) ++{ ++ unsigned long flags; ++ ++ rtdm_lock_get_irqsave(&devpriv->window_lock, flags); ++ ni_writew(reg, Window_Address); ++ ni_writew(data, Window_Data); ++ rtdm_lock_put_irqrestore(&devpriv->window_lock, flags); ++} ++ ++static uint16_t e_series_win_in(struct a4l_device *dev, int reg) ++{ ++ unsigned long flags; ++ uint16_t ret; ++ ++ rtdm_lock_get_irqsave(&devpriv->window_lock, flags); ++ ni_writew(reg, Window_Address); ++ ret = ni_readw(Window_Data); ++ rtdm_lock_put_irqrestore(&devpriv->window_lock,flags); ++ ++ return ret; ++} ++ ++static void m_series_stc_writew(struct a4l_device *dev, uint16_t data, int reg) ++{ ++ unsigned offset; ++ switch(reg) ++ { ++ case ADC_FIFO_Clear: ++ offset = M_Offset_AI_FIFO_Clear; ++ break; ++ case AI_Command_1_Register: ++ offset = M_Offset_AI_Command_1; ++ break; ++ case AI_Command_2_Register: ++ offset = M_Offset_AI_Command_2; ++ break; ++ case AI_Mode_1_Register: ++ offset = M_Offset_AI_Mode_1; ++ break; ++ case AI_Mode_2_Register: ++ offset = M_Offset_AI_Mode_2; ++ break; ++ case AI_Mode_3_Register: ++ offset = M_Offset_AI_Mode_3; ++ break; ++ case AI_Output_Control_Register: ++ offset = M_Offset_AI_Output_Control; ++ break; ++ case AI_Personal_Register: ++ offset = M_Offset_AI_Personal; ++ break; ++ case AI_SI2_Load_A_Register: ++ /* This is actually a 32 bit register on m series boards */ ++ ni_writel(data, M_Offset_AI_SI2_Load_A); ++ return; ++ break; ++ case AI_SI2_Load_B_Register: ++ /* This is actually a 32 bit register on m series boards */ ++ ni_writel(data, M_Offset_AI_SI2_Load_B); ++ return; ++ break; ++ case AI_START_STOP_Select_Register: ++ offset = M_Offset_AI_START_STOP_Select; ++ break; ++ case AI_Trigger_Select_Register: ++ offset = M_Offset_AI_Trigger_Select; ++ break; ++ case Analog_Trigger_Etc_Register: ++ offset = M_Offset_Analog_Trigger_Etc; ++ break; ++ case AO_Command_1_Register: ++ offset = M_Offset_AO_Command_1; ++ break; ++ case AO_Command_2_Register: ++ offset = M_Offset_AO_Command_2; ++ break; ++ case AO_Mode_1_Register: ++ offset = M_Offset_AO_Mode_1; ++ break; ++ case AO_Mode_2_Register: ++ offset = M_Offset_AO_Mode_2; ++ break; ++ case AO_Mode_3_Register: ++ offset = M_Offset_AO_Mode_3; ++ break; ++ case AO_Output_Control_Register: ++ offset = M_Offset_AO_Output_Control; ++ break; ++ case AO_Personal_Register: ++ offset = M_Offset_AO_Personal; ++ break; ++ case AO_Start_Select_Register: ++ offset = M_Offset_AO_Start_Select; ++ break; ++ case AO_Trigger_Select_Register: ++ offset = M_Offset_AO_Trigger_Select; ++ break; ++ case Clock_and_FOUT_Register: ++ offset = M_Offset_Clock_and_FOUT; ++ break; ++ case Configuration_Memory_Clear: ++ offset = M_Offset_Configuration_Memory_Clear; ++ break; ++ case DAC_FIFO_Clear: ++ offset = M_Offset_AO_FIFO_Clear; ++ break; ++ case DIO_Control_Register: ++ rtdm_printk("%s: FIXME: register 0x%x does not map cleanly on to m-series boards.\n", __FUNCTION__, reg); ++ return; ++ break; ++ case G_Autoincrement_Register(0): ++ offset = M_Offset_G0_Autoincrement; ++ break; ++ case G_Autoincrement_Register(1): ++ offset = M_Offset_G1_Autoincrement; ++ break; ++ case G_Command_Register(0): ++ offset = M_Offset_G0_Command; ++ break; ++ case G_Command_Register(1): ++ offset = M_Offset_G1_Command; ++ break; ++ case G_Input_Select_Register(0): ++ offset = M_Offset_G0_Input_Select; ++ break; ++ case G_Input_Select_Register(1): ++ offset = M_Offset_G1_Input_Select; ++ break; ++ case G_Mode_Register(0): ++ offset = M_Offset_G0_Mode; ++ break; ++ case G_Mode_Register(1): ++ offset = M_Offset_G1_Mode; ++ break; ++ case Interrupt_A_Ack_Register: ++ offset = M_Offset_Interrupt_A_Ack; ++ break; ++ case Interrupt_A_Enable_Register: ++ offset = M_Offset_Interrupt_A_Enable; ++ break; ++ case Interrupt_B_Ack_Register: ++ offset = M_Offset_Interrupt_B_Ack; ++ break; ++ case Interrupt_B_Enable_Register: ++ offset = M_Offset_Interrupt_B_Enable; ++ break; ++ case Interrupt_Control_Register: ++ offset = M_Offset_Interrupt_Control; ++ break; ++ case IO_Bidirection_Pin_Register: ++ offset = M_Offset_IO_Bidirection_Pin; ++ break; ++ case Joint_Reset_Register: ++ offset = M_Offset_Joint_Reset; ++ break; ++ case RTSI_Trig_A_Output_Register: ++ offset = M_Offset_RTSI_Trig_A_Output; ++ break; ++ case RTSI_Trig_B_Output_Register: ++ offset = M_Offset_RTSI_Trig_B_Output; ++ break; ++ case RTSI_Trig_Direction_Register: ++ offset = M_Offset_RTSI_Trig_Direction; ++ break; ++ /* FIXME: DIO_Output_Register (16 bit reg) is replaced ++ by M_Offset_Static_Digital_Output (32 bit) and ++ M_Offset_SCXI_Serial_Data_Out (8 bit) */ ++ default: ++ rtdm_printk("%s: bug! unhandled register=0x%x in switch.\n", ++ __FUNCTION__, reg); ++ BUG(); ++ return; ++ } ++ ni_writew(data, offset); ++} ++ ++static uint16_t m_series_stc_readw(struct a4l_device *dev, int reg) ++{ ++ unsigned offset; ++ switch(reg) ++ { ++ case AI_Status_1_Register: ++ offset = M_Offset_AI_Status_1; ++ break; ++ case AO_Status_1_Register: ++ offset = M_Offset_AO_Status_1; ++ break; ++ case AO_Status_2_Register: ++ offset = M_Offset_AO_Status_2; ++ break; ++ case DIO_Serial_Input_Register: ++ return ni_readb(M_Offset_SCXI_Serial_Data_In); ++ break; ++ case Joint_Status_1_Register: ++ offset = M_Offset_Joint_Status_1; ++ break; ++ case Joint_Status_2_Register: ++ offset = M_Offset_Joint_Status_2; ++ break; ++ case G_Status_Register: ++ offset = M_Offset_G01_Status; ++ break; ++ default: ++ rtdm_printk("%s: bug! " ++ "unhandled register=0x%x in switch.\n", ++ __FUNCTION__, reg); ++ BUG(); ++ return 0; ++ break; ++ } ++ return ni_readw(offset); ++} ++ ++static void m_series_stc_writel(struct a4l_device *dev, uint32_t data, int reg) ++{ ++ unsigned offset; ++ ++ switch(reg) ++ { ++ case AI_SC_Load_A_Registers: ++ offset = M_Offset_AI_SC_Load_A; ++ break; ++ case AI_SI_Load_A_Registers: ++ offset = M_Offset_AI_SI_Load_A; ++ break; ++ case AO_BC_Load_A_Register: ++ offset = M_Offset_AO_BC_Load_A; ++ break; ++ case AO_UC_Load_A_Register: ++ offset = M_Offset_AO_UC_Load_A; ++ break; ++ case AO_UI_Load_A_Register: ++ offset = M_Offset_AO_UI_Load_A; ++ break; ++ case G_Load_A_Register(0): ++ offset = M_Offset_G0_Load_A; ++ break; ++ case G_Load_A_Register(1): ++ offset = M_Offset_G1_Load_A; ++ break; ++ case G_Load_B_Register(0): ++ offset = M_Offset_G0_Load_B; ++ break; ++ case G_Load_B_Register(1): ++ offset = M_Offset_G1_Load_B; ++ break; ++ default: ++ rtdm_printk("%s: bug! unhandled register=0x%x in switch.\n", ++ __FUNCTION__, reg); ++ BUG(); ++ return; ++ } ++ ni_writel(data, offset); ++} ++ ++static uint32_t m_series_stc_readl(struct a4l_device *dev, int reg) ++{ ++ unsigned offset; ++ switch(reg) ++ { ++ case G_HW_Save_Register(0): ++ offset = M_Offset_G0_HW_Save; ++ break; ++ case G_HW_Save_Register(1): ++ offset = M_Offset_G1_HW_Save; ++ break; ++ case G_Save_Register(0): ++ offset = M_Offset_G0_Save; ++ break; ++ case G_Save_Register(1): ++ offset = M_Offset_G1_Save; ++ break; ++ default: ++ rtdm_printk("%s: bug! unhandled register=0x%x in switch.\n", ++ __FUNCTION__, reg); ++ BUG(); ++ return 0; ++ } ++ return ni_readl(offset); ++} ++ ++static void win_out2(struct a4l_device *dev, uint32_t data, int reg) ++{ ++ devpriv->stc_writew(dev, data >> 16, reg); ++ devpriv->stc_writew(dev, data & 0xffff, reg + 1); ++} ++ ++static uint32_t win_in2(struct a4l_device *dev, int reg) ++{ ++ uint32_t bits; ++ bits = devpriv->stc_readw(dev, reg) << 16; ++ bits |= devpriv->stc_readw(dev, reg + 1); ++ return bits; ++} ++ ++static void m_series_init_eeprom_buffer(struct a4l_device *dev) ++{ ++ static const int Start_Cal_EEPROM = 0x400; ++ static const unsigned window_size = 10; ++ unsigned old_iodwbsr_bits; ++ unsigned old_iodwbsr1_bits; ++ unsigned old_iodwcr1_bits; ++ int i; ++ ++ old_iodwbsr_bits = readl(devpriv->mite->mite_io_addr + MITE_IODWBSR); ++ old_iodwbsr1_bits = readl(devpriv->mite->mite_io_addr + MITE_IODWBSR_1); ++ old_iodwcr1_bits = readl(devpriv->mite->mite_io_addr + MITE_IODWCR_1); ++ writel(0x0, devpriv->mite->mite_io_addr + MITE_IODWBSR); ++ writel(((0x80 | window_size) | devpriv->mite->daq_phys_addr), ++ devpriv->mite->mite_io_addr + MITE_IODWBSR_1); ++ writel(0x0, devpriv->mite->mite_io_addr + MITE_IODWCR_1); ++ writel(0xf, devpriv->mite->mite_io_addr + 0x30); ++ ++ for(i = 0; i < M_SERIES_EEPROM_SIZE; ++i) ++ { ++ devpriv->eeprom_buffer[i] = ni_readb(Start_Cal_EEPROM + i); ++ } ++ ++ writel(old_iodwbsr1_bits, devpriv->mite->mite_io_addr + MITE_IODWBSR_1); ++ writel(old_iodwbsr_bits, devpriv->mite->mite_io_addr + MITE_IODWBSR); ++ writel(old_iodwcr1_bits, devpriv->mite->mite_io_addr + MITE_IODWCR_1); ++ writel(0x0, devpriv->mite->mite_io_addr + 0x30); ++} ++ ++static void init_6143(struct a4l_device *dev) ++{ ++ /* Disable interrupts */ ++ devpriv->stc_writew(dev, 0, Interrupt_Control_Register); ++ ++ /* Initialise 6143 AI specific bits */ ++ ++ /* Set G0,G1 DMA mode to E series version */ ++ ni_writeb(0x00, Magic_6143); ++ /* Set EOCMode, ADCMode and pipelinedelay */ ++ ni_writeb(0x80, PipelineDelay_6143); ++ /* Set EOC Delay */ ++ ni_writeb(0x00, EOC_Set_6143); ++ ++ /* Set the FIFO half full level */ ++ ni_writel(boardtype.ai_fifo_depth / 2, AIFIFO_Flag_6143); ++ ++ /* Strobe Relay disable bit */ ++ devpriv->ai_calib_source_enabled = 0; ++ ni_writew(devpriv->ai_calib_source | Calibration_Channel_6143_RelayOff, ++ Calibration_Channel_6143); ++ ni_writew(devpriv->ai_calib_source, Calibration_Channel_6143); ++} ++ ++static int pcimio_attach(struct a4l_device *dev, a4l_lnkdesc_t *arg) ++{ ++ int ret, bus, slot, i, irq; ++ struct mite_struct *mite = NULL; ++ struct ni_board_struct *board = NULL; ++ ++ if(arg->opts == NULL || arg->opts_size == 0) ++ bus = slot = 0; ++ else { ++ bus = arg->opts_size >= sizeof(unsigned long) ? ++ ((unsigned long *)arg->opts)[0] : 0; ++ slot = arg->opts_size >= sizeof(unsigned long) * 2 ? ++ ((unsigned long *)arg->opts)[1] : 0; ++ } ++ ++ for(i = 0; i < n_pcimio_boards && mite == NULL; i++) { ++ mite = a4l_mite_find_device(bus, slot, ni_boards[i].device_id); ++ board = &ni_boards[i]; ++ } ++ ++ if(mite == 0) ++ return -ENOENT; ++ ++ devpriv->irq_polarity = PCIMIO_IRQ_POLARITY; ++ devpriv->irq_pin = 0; ++ ++ devpriv->mite = mite; ++ devpriv->board_ptr = board; ++ ++ devpriv->ai_mite_ring = mite_alloc_ring(mite); ++ devpriv->ao_mite_ring = mite_alloc_ring(mite); ++ devpriv->cdo_mite_ring = mite_alloc_ring(mite); ++ devpriv->gpct_mite_ring[0] = mite_alloc_ring(mite); ++ devpriv->gpct_mite_ring[1] = mite_alloc_ring(mite); ++ ++ if(devpriv->ai_mite_ring == NULL || ++ devpriv->ao_mite_ring == NULL || ++ devpriv->cdo_mite_ring == NULL || ++ devpriv->gpct_mite_ring[0] == NULL || ++ devpriv->gpct_mite_ring[1] == NULL) ++ return -ENOMEM; ++ ++ a4l_info(dev, "found %s board\n", boardtype.name); ++ ++ if(boardtype.reg_type & ni_reg_m_series_mask) ++ { ++ devpriv->stc_writew = &m_series_stc_writew; ++ devpriv->stc_readw = &m_series_stc_readw; ++ devpriv->stc_writel = &m_series_stc_writel; ++ devpriv->stc_readl = &m_series_stc_readl; ++ }else ++ { ++ devpriv->stc_writew = &e_series_win_out; ++ devpriv->stc_readw = &e_series_win_in; ++ devpriv->stc_writel = &win_out2; ++ devpriv->stc_readl = &win_in2; ++ } ++ ++ ret = a4l_mite_setup(devpriv->mite, 0); ++ if(ret < 0) ++ { ++ a4l_err(dev, "pcmio_attach: error setting up mite\n"); ++ return ret; ++ } ++ ++ if(boardtype.reg_type & ni_reg_m_series_mask) ++ m_series_init_eeprom_buffer(dev); ++ if(boardtype.reg_type == ni_reg_6143) ++ init_6143(dev); ++ ++ irq = mite_irq(devpriv->mite); ++ ++ if(irq == 0){ ++ a4l_warn(dev, "pcimio_attach: unknown irq (bad)\n\n"); ++ }else{ ++ a4l_info(dev, "found irq %u\n", irq); ++ ret = a4l_request_irq(dev, ++ irq, ++ a4l_ni_E_interrupt, RTDM_IRQTYPE_SHARED, dev); ++ if(ret < 0) ++ a4l_err(dev, "pcimio_attach: irq not available\n"); ++ } ++ ++ ret = a4l_ni_E_init(dev); ++ if(ret < 0) ++ return ret; ++ ++ dev->driver->driver_name = devpriv->board_ptr->name; ++ ++ return ret; ++} ++ ++static int pcimio_detach(struct a4l_device *dev) ++{ ++ if(a4l_get_irq(dev)!=A4L_IRQ_UNUSED){ ++ a4l_free_irq(dev,a4l_get_irq(dev)); ++ } ++ ++ if(dev->priv != NULL && devpriv->mite != NULL) ++ { ++ mite_free_ring(devpriv->ai_mite_ring); ++ mite_free_ring(devpriv->ao_mite_ring); ++ mite_free_ring(devpriv->gpct_mite_ring[0]); ++ mite_free_ring(devpriv->gpct_mite_ring[1]); ++ a4l_mite_unsetup(devpriv->mite); ++ } ++ ++ dev->driver->driver_name = NULL; ++ ++ return 0; ++} ++ ++static struct a4l_driver pcimio_drv = { ++ .owner = THIS_MODULE, ++ .board_name = "analogy_ni_pcimio", ++ .driver_name = NULL, ++ .attach = pcimio_attach, ++ .detach = pcimio_detach, ++ .privdata_size = sizeof(ni_private), ++}; ++ ++static int __init pcimio_init(void) ++{ ++ return a4l_register_drv(&pcimio_drv); ++} ++ ++static void __exit pcimio_cleanup(void) ++{ ++ a4l_unregister_drv(&pcimio_drv); ++} ++ ++MODULE_DESCRIPTION("Analogy driver for NI PCI-MIO series cards"); ++MODULE_LICENSE("GPL"); ++ ++module_init(pcimio_init); ++module_exit(pcimio_cleanup); +--- linux/drivers/xenomai/analogy/national_instruments/ni_stc.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/analogy/national_instruments/ni_stc.h 2021-04-07 16:01:27.796633353 +0800 +@@ -0,0 +1,1417 @@ ++/* ++ * Register descriptions for NI DAQ-STC chip ++ * ++ * Copyright (C) 1998-9 David A. Schleef ++ * ++ * This code is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * This code is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this code; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ * ++ * References: ++ * 340934b.pdf DAQ-STC reference manual ++ * ++ */ ++#ifndef __ANALOGY_NI_STC_H__ ++#define __ANALOGY_NI_STC_H__ ++ ++#include "ni_tio.h" ++ ++#define _bit15 0x8000 ++#define _bit14 0x4000 ++#define _bit13 0x2000 ++#define _bit12 0x1000 ++#define _bit11 0x0800 ++#define _bit10 0x0400 ++#define _bit9 0x0200 ++#define _bit8 0x0100 ++#define _bit7 0x0080 ++#define _bit6 0x0040 ++#define _bit5 0x0020 ++#define _bit4 0x0010 ++#define _bit3 0x0008 ++#define _bit2 0x0004 ++#define _bit1 0x0002 ++#define _bit0 0x0001 ++ ++#define NUM_PFI_OUTPUT_SELECT_REGS 6 ++ ++/* Registers in the National Instruments DAQ-STC chip */ ++ ++#define Interrupt_A_Ack_Register 2 ++#define G0_Gate_Interrupt_Ack _bit15 ++#define G0_TC_Interrupt_Ack _bit14 ++#define AI_Error_Interrupt_Ack _bit13 ++#define AI_STOP_Interrupt_Ack _bit12 ++#define AI_START_Interrupt_Ack _bit11 ++#define AI_START2_Interrupt_Ack _bit10 ++#define AI_START1_Interrupt_Ack _bit9 ++#define AI_SC_TC_Interrupt_Ack _bit8 ++#define AI_SC_TC_Error_Confirm _bit7 ++#define G0_TC_Error_Confirm _bit6 ++#define G0_Gate_Error_Confirm _bit5 ++ ++#define AI_Status_1_Register 2 ++#define Interrupt_A_St _bit15 ++#define AI_FIFO_Full_St _bit14 ++#define AI_FIFO_Half_Full_St _bit13 ++#define AI_FIFO_Empty_St _bit12 ++#define AI_Overrun_St _bit11 ++#define AI_Overflow_St _bit10 ++#define AI_SC_TC_Error_St _bit9 ++#define AI_START2_St _bit8 ++#define AI_START1_St _bit7 ++#define AI_SC_TC_St _bit6 ++#define AI_START_St _bit5 ++#define AI_STOP_St _bit4 ++#define G0_TC_St _bit3 ++#define G0_Gate_Interrupt_St _bit2 ++#define AI_FIFO_Request_St _bit1 ++#define Pass_Thru_0_Interrupt_St _bit0 ++ ++#define AI_Status_2_Register 5 ++ ++#define Interrupt_B_Ack_Register 3 ++#define G1_Gate_Error_Confirm _bit1 ++#define G1_TC_Error_Confirm _bit2 ++#define AO_BC_TC_Trigger_Error_Confirm _bit3 ++#define AO_BC_TC_Error_Confirm _bit4 ++#define AO_UI2_TC_Error_Confrim _bit5 ++#define AO_UI2_TC_Interrupt_Ack _bit6 ++#define AO_UC_TC_Interrupt_Ack _bit7 ++#define AO_BC_TC_Interrupt_Ack _bit8 ++#define AO_START1_Interrupt_Ack _bit9 ++#define AO_UPDATE_Interrupt_Ack _bit10 ++#define AO_START_Interrupt_Ack _bit11 ++#define AO_STOP_Interrupt_Ack _bit12 ++#define AO_Error_Interrupt_Ack _bit13 ++#define G1_TC_Interrupt_Ack _bit14 ++#define G1_Gate_Interrupt_Ack _bit15 ++ ++#define AO_Status_1_Register 3 ++#define Interrupt_B_St _bit15 ++#define AO_FIFO_Full_St _bit14 ++#define AO_FIFO_Half_Full_St _bit13 ++#define AO_FIFO_Empty_St _bit12 ++#define AO_BC_TC_Error_St _bit11 ++#define AO_START_St _bit10 ++#define AO_Overrun_St _bit9 ++#define AO_START1_St _bit8 ++#define AO_BC_TC_St _bit7 ++#define AO_UC_TC_St _bit6 ++#define AO_UPDATE_St _bit5 ++#define AO_UI2_TC_St _bit4 ++#define G1_TC_St _bit3 ++#define G1_Gate_Interrupt_St _bit2 ++#define AO_FIFO_Request_St _bit1 ++#define Pass_Thru_1_Interrupt_St _bit0 ++ ++ ++#define AI_Command_2_Register 4 ++#define AI_End_On_SC_TC _bit15 ++#define AI_End_On_End_Of_Scan _bit14 ++#define AI_START1_Disable _bit11 ++#define AI_SC_Save_Trace _bit10 ++#define AI_SI_Switch_Load_On_SC_TC _bit9 ++#define AI_SI_Switch_Load_On_STOP _bit8 ++#define AI_SI_Switch_Load_On_TC _bit7 ++#define AI_SC_Switch_Load_On_TC _bit4 ++#define AI_STOP_Pulse _bit3 ++#define AI_START_Pulse _bit2 ++#define AI_START2_Pulse _bit1 ++#define AI_START1_Pulse _bit0 ++ ++#define AO_Command_2_Register 5 ++#define AO_End_On_BC_TC(x) (((x) & 0x3) << 14) ++#define AO_Start_Stop_Gate_Enable _bit13 ++#define AO_UC_Save_Trace _bit12 ++#define AO_BC_Gate_Enable _bit11 ++#define AO_BC_Save_Trace _bit10 ++#define AO_UI_Switch_Load_On_BC_TC _bit9 ++#define AO_UI_Switch_Load_On_Stop _bit8 ++#define AO_UI_Switch_Load_On_TC _bit7 ++#define AO_UC_Switch_Load_On_BC_TC _bit6 ++#define AO_UC_Switch_Load_On_TC _bit5 ++#define AO_BC_Switch_Load_On_TC _bit4 ++#define AO_Mute_B _bit3 ++#define AO_Mute_A _bit2 ++#define AO_UPDATE2_Pulse _bit1 ++#define AO_START1_Pulse _bit0 ++ ++#define AO_Status_2_Register 6 ++ ++#define DIO_Parallel_Input_Register 7 ++ ++#define AI_Command_1_Register 8 ++#define AI_Analog_Trigger_Reset _bit14 ++#define AI_Disarm _bit13 ++#define AI_SI2_Arm _bit12 ++#define AI_SI2_Load _bit11 ++#define AI_SI_Arm _bit10 ++#define AI_SI_Load _bit9 ++#define AI_DIV_Arm _bit8 ++#define AI_DIV_Load _bit7 ++#define AI_SC_Arm _bit6 ++#define AI_SC_Load _bit5 ++#define AI_SCAN_IN_PROG_Pulse _bit4 ++#define AI_EXTMUX_CLK_Pulse _bit3 ++#define AI_LOCALMUX_CLK_Pulse _bit2 ++#define AI_SC_TC_Pulse _bit1 ++#define AI_CONVERT_Pulse _bit0 ++ ++#define AO_Command_1_Register 9 ++#define AO_Analog_Trigger_Reset _bit15 ++#define AO_START_Pulse _bit14 ++#define AO_Disarm _bit13 ++#define AO_UI2_Arm_Disarm _bit12 ++#define AO_UI2_Load _bit11 ++#define AO_UI_Arm _bit10 ++#define AO_UI_Load _bit9 ++#define AO_UC_Arm _bit8 ++#define AO_UC_Load _bit7 ++#define AO_BC_Arm _bit6 ++#define AO_BC_Load _bit5 ++#define AO_DAC1_Update_Mode _bit4 ++#define AO_LDAC1_Source_Select _bit3 ++#define AO_DAC0_Update_Mode _bit2 ++#define AO_LDAC0_Source_Select _bit1 ++#define AO_UPDATE_Pulse _bit0 ++ ++ ++#define DIO_Output_Register 10 ++#define DIO_Parallel_Data_Out(a) ((a)&0xff) ++#define DIO_Parallel_Data_Mask 0xff ++#define DIO_SDOUT _bit0 ++#define DIO_SDIN _bit4 ++#define DIO_Serial_Data_Out(a) (((a)&0xff)<<8) ++#define DIO_Serial_Data_Mask 0xff00 ++ ++#define DIO_Control_Register 11 ++#define DIO_Software_Serial_Control _bit11 ++#define DIO_HW_Serial_Timebase _bit10 ++#define DIO_HW_Serial_Enable _bit9 ++#define DIO_HW_Serial_Start _bit8 ++#define DIO_Pins_Dir(a) ((a)&0xff) ++#define DIO_Pins_Dir_Mask 0xff ++ ++#define AI_Mode_1_Register 12 ++#define AI_CONVERT_Source_Select(a) (((a) & 0x1f) << 11) ++#define AI_SI_Source_select(a) (((a) & 0x1f) << 6) ++#define AI_CONVERT_Source_Polarity _bit5 ++#define AI_SI_Source_Polarity _bit4 ++#define AI_Start_Stop _bit3 ++#define AI_Mode_1_Reserved _bit2 ++#define AI_Continuous _bit1 ++#define AI_Trigger_Once _bit0 ++ ++#define AI_Mode_2_Register 13 ++#define AI_SC_Gate_Enable _bit15 ++#define AI_Start_Stop_Gate_Enable _bit14 ++#define AI_Pre_Trigger _bit13 ++#define AI_External_MUX_Present _bit12 ++#define AI_SI2_Initial_Load_Source _bit9 ++#define AI_SI2_Reload_Mode _bit8 ++#define AI_SI_Initial_Load_Source _bit7 ++#define AI_SI_Reload_Mode(a) (((a) & 0x7)<<4) ++#define AI_SI_Write_Switch _bit3 ++#define AI_SC_Initial_Load_Source _bit2 ++#define AI_SC_Reload_Mode _bit1 ++#define AI_SC_Write_Switch _bit0 ++ ++#define AI_SI_Load_A_Registers 14 ++#define AI_SI_Load_B_Registers 16 ++#define AI_SC_Load_A_Registers 18 ++#define AI_SC_Load_B_Registers 20 ++#define AI_SI_Save_Registers 64 ++#define AI_SC_Save_Registers 66 ++ ++#define AI_SI2_Load_A_Register 23 ++#define AI_SI2_Load_B_Register 25 ++ ++#define Joint_Status_1_Register 27 ++#define DIO_Serial_IO_In_Progress_St _bit12 ++ ++#define DIO_Serial_Input_Register 28 ++#define Joint_Status_2_Register 29 ++#define AO_TMRDACWRs_In_Progress_St _bit5 ++ ++#define AO_Mode_1_Register 38 ++#define AO_UPDATE_Source_Select(x) (((x)&0x1f)<<11) ++#define AO_UI_Source_Select(x) (((x)&0x1f)<<6) ++#define AO_Multiple_Channels _bit5 ++#define AO_UPDATE_Source_Polarity _bit4 ++#define AO_UI_Source_Polarity _bit3 ++#define AO_UC_Switch_Load_Every_TC _bit2 ++#define AO_Continuous _bit1 ++#define AO_Trigger_Once _bit0 ++ ++#define AO_Mode_2_Register 39 ++#define AO_FIFO_Mode_Mask ( 0x3 << 14 ) ++#define AO_FIFO_Mode_HF_to_F (3<<14) ++#define AO_FIFO_Mode_F (2<<14) ++#define AO_FIFO_Mode_HF (1<<14) ++#define AO_FIFO_Mode_E (0<<14) ++#define AO_FIFO_Retransmit_Enable _bit13 ++#define AO_START1_Disable _bit12 ++#define AO_UC_Initial_Load_Source _bit11 ++#define AO_UC_Write_Switch _bit10 ++#define AO_UI2_Initial_Load_Source _bit9 ++#define AO_UI2_Reload_Mode _bit8 ++#define AO_UI_Initial_Load_Source _bit7 ++#define AO_UI_Reload_Mode(x) (((x) & 0x7) << 4) ++#define AO_UI_Write_Switch _bit3 ++#define AO_BC_Initial_Load_Source _bit2 ++#define AO_BC_Reload_Mode _bit1 ++#define AO_BC_Write_Switch _bit0 ++ ++#define AO_UI_Load_A_Register 40 ++#define AO_UI_Load_A_Register_High 40 ++#define AO_UI_Load_A_Register_Low 41 ++#define AO_UI_Load_B_Register 42 ++#define AO_UI_Save_Registers 16 ++#define AO_BC_Load_A_Register 44 ++#define AO_BC_Load_A_Register_High 44 ++#define AO_BC_Load_A_Register_Low 45 ++#define AO_BC_Load_B_Register 46 ++#define AO_BC_Load_B_Register_High 46 ++#define AO_BC_Load_B_Register_Low 47 ++#define AO_BC_Save_Registers 18 ++#define AO_UC_Load_A_Register 48 ++#define AO_UC_Load_A_Register_High 48 ++#define AO_UC_Load_A_Register_Low 49 ++#define AO_UC_Load_B_Register 50 ++#define AO_UC_Save_Registers 20 ++ ++#define Clock_and_FOUT_Register 56 ++#define FOUT_Enable _bit15 ++#define FOUT_Timebase_Select _bit14 ++#define DIO_Serial_Out_Divide_By_2 _bit13 ++#define Slow_Internal_Time_Divide_By_2 _bit12 ++#define Slow_Internal_Timebase _bit11 ++#define G_Source_Divide_By_2 _bit10 ++#define Clock_To_Board_Divide_By_2 _bit9 ++#define Clock_To_Board _bit8 ++#define AI_Output_Divide_By_2 _bit7 ++#define AI_Source_Divide_By_2 _bit6 ++#define AO_Output_Divide_By_2 _bit5 ++#define AO_Source_Divide_By_2 _bit4 ++#define FOUT_Divider_mask 0xf ++#define FOUT_Divider(x) (((x) & 0xf) << 0) ++ ++#define IO_Bidirection_Pin_Register 57 ++#define RTSI_Trig_Direction_Register 58 ++#define Drive_RTSI_Clock_Bit 0x1 ++#define Use_RTSI_Clock_Bit 0x2 ++ ++static inline unsigned int RTSI_Output_Bit(unsigned channel, int is_mseries) ++{ ++ unsigned max_channel; ++ unsigned base_bit_shift; ++ if(is_mseries) ++ { ++ base_bit_shift = 8; ++ max_channel = 7; ++ }else ++ { ++ base_bit_shift = 9; ++ max_channel = 6; ++ } ++ if(channel > max_channel) ++ { ++ rtdm_printk("%s: bug, invalid RTSI_channel=%i\n", ++ __FUNCTION__, channel); ++ return 0; ++ } ++ return 1 << (base_bit_shift + channel); ++} ++ ++#define Interrupt_Control_Register 59 ++#define Interrupt_B_Enable _bit15 ++#define Interrupt_B_Output_Select(x) ((x)<<12) ++#define Interrupt_A_Enable _bit11 ++#define Interrupt_A_Output_Select(x) ((x)<<8) ++#define Pass_Thru_0_Interrupt_Polarity _bit3 ++#define Pass_Thru_1_Interrupt_Polarity _bit2 ++#define Interrupt_Output_On_3_Pins _bit1 ++#define Interrupt_Output_Polarity _bit0 ++ ++#define AI_Output_Control_Register 60 ++#define AI_START_Output_Select _bit10 ++#define AI_SCAN_IN_PROG_Output_Select(x) (((x) & 0x3) << 8) ++#define AI_EXTMUX_CLK_Output_Select(x) (((x) & 0x3) << 6) ++#define AI_LOCALMUX_CLK_Output_Select(x) ((x)<<4) ++#define AI_SC_TC_Output_Select(x) ((x)<<2) ++#define AI_CONVERT_Output_High_Z 0 ++#define AI_CONVERT_Output_Ground 1 ++#define AI_CONVERT_Output_Enable_Low 2 ++#define AI_CONVERT_Output_Enable_High 3 ++#define AI_CONVERT_Output_Select(x) ((x) & 0x3) ++ ++#define AI_START_STOP_Select_Register 62 ++#define AI_START_Polarity _bit15 ++#define AI_STOP_Polarity _bit14 ++#define AI_STOP_Sync _bit13 ++#define AI_STOP_Edge _bit12 ++#define AI_STOP_Select(a) (((a) & 0x1f)<<7) ++#define AI_START_Sync _bit6 ++#define AI_START_Edge _bit5 ++#define AI_START_Select(a) ((a) & 0x1f) ++ ++#define AI_Trigger_Select_Register 63 ++#define AI_START1_Polarity _bit15 ++#define AI_START2_Polarity _bit14 ++#define AI_START2_Sync _bit13 ++#define AI_START2_Edge _bit12 ++#define AI_START2_Select(a) (((a) & 0x1f) << 7) ++#define AI_START1_Sync _bit6 ++#define AI_START1_Edge _bit5 ++#define AI_START1_Select(a) ((a) & 0x1f) ++ ++#define AI_DIV_Load_A_Register 64 ++ ++#define AO_Start_Select_Register 66 ++#define AO_UI2_Software_Gate _bit15 ++#define AO_UI2_External_Gate_Polarity _bit14 ++#define AO_START_Polarity _bit13 ++#define AO_AOFREQ_Enable _bit12 ++#define AO_UI2_External_Gate_Select(a) (((a) & 0x1f) << 7) ++#define AO_START_Sync _bit6 ++#define AO_START_Edge _bit5 ++#define AO_START_Select(a) ((a) & 0x1f) ++ ++#define AO_Trigger_Select_Register 67 ++#define AO_UI2_External_Gate_Enable _bit15 ++#define AO_Delayed_START1 _bit14 ++#define AO_START1_Polarity _bit13 ++#define AO_UI2_Source_Polarity _bit12 ++#define AO_UI2_Source_Select(x) (((x)&0x1f)<<7) ++#define AO_START1_Sync _bit6 ++#define AO_START1_Edge _bit5 ++#define AO_START1_Select(x) (((x)&0x1f)<<0) ++ ++#define AO_Mode_3_Register 70 ++#define AO_UI2_Switch_Load_Next_TC _bit13 ++#define AO_UC_Switch_Load_Every_BC_TC _bit12 ++#define AO_Trigger_Length _bit11 ++#define AO_Stop_On_Overrun_Error _bit5 ++#define AO_Stop_On_BC_TC_Trigger_Error _bit4 ++#define AO_Stop_On_BC_TC_Error _bit3 ++#define AO_Not_An_UPDATE _bit2 ++#define AO_Software_Gate _bit1 ++#define AO_Last_Gate_Disable _bit0 /* M Series only */ ++ ++#define Joint_Reset_Register 72 ++#define Software_Reset _bit11 ++#define AO_Configuration_End _bit9 ++#define AI_Configuration_End _bit8 ++#define AO_Configuration_Start _bit5 ++#define AI_Configuration_Start _bit4 ++#define G1_Reset _bit3 ++#define G0_Reset _bit2 ++#define AO_Reset _bit1 ++#define AI_Reset _bit0 ++ ++#define Interrupt_A_Enable_Register 73 ++#define Pass_Thru_0_Interrupt_Enable _bit9 ++#define G0_Gate_Interrupt_Enable _bit8 ++#define AI_FIFO_Interrupt_Enable _bit7 ++#define G0_TC_Interrupt_Enable _bit6 ++#define AI_Error_Interrupt_Enable _bit5 ++#define AI_STOP_Interrupt_Enable _bit4 ++#define AI_START_Interrupt_Enable _bit3 ++#define AI_START2_Interrupt_Enable _bit2 ++#define AI_START1_Interrupt_Enable _bit1 ++#define AI_SC_TC_Interrupt_Enable _bit0 ++ ++#define Interrupt_B_Enable_Register 75 ++#define Pass_Thru_1_Interrupt_Enable _bit11 ++#define G1_Gate_Interrupt_Enable _bit10 ++#define G1_TC_Interrupt_Enable _bit9 ++#define AO_FIFO_Interrupt_Enable _bit8 ++#define AO_UI2_TC_Interrupt_Enable _bit7 ++#define AO_UC_TC_Interrupt_Enable _bit6 ++#define AO_Error_Interrupt_Enable _bit5 ++#define AO_STOP_Interrupt_Enable _bit4 ++#define AO_START_Interrupt_Enable _bit3 ++#define AO_UPDATE_Interrupt_Enable _bit2 ++#define AO_START1_Interrupt_Enable _bit1 ++#define AO_BC_TC_Interrupt_Enable _bit0 ++ ++#define Second_IRQ_A_Enable_Register 74 ++#define AI_SC_TC_Second_Irq_Enable _bit0 ++#define AI_START1_Second_Irq_Enable _bit1 ++#define AI_START2_Second_Irq_Enable _bit2 ++#define AI_START_Second_Irq_Enable _bit3 ++#define AI_STOP_Second_Irq_Enable _bit4 ++#define AI_Error_Second_Irq_Enable _bit5 ++#define G0_TC_Second_Irq_Enable _bit6 ++#define AI_FIFO_Second_Irq_Enable _bit7 ++#define G0_Gate_Second_Irq_Enable _bit8 ++#define Pass_Thru_0_Second_Irq_Enable _bit9 ++ ++#define Second_IRQ_B_Enable_Register 76 ++#define AO_BC_TC_Second_Irq_Enable _bit0 ++#define AO_START1_Second_Irq_Enable _bit1 ++#define AO_UPDATE_Second_Irq_Enable _bit2 ++#define AO_START_Second_Irq_Enable _bit3 ++#define AO_STOP_Second_Irq_Enable _bit4 ++#define AO_Error_Second_Irq_Enable _bit5 ++#define AO_UC_TC_Second_Irq_Enable _bit6 ++#define AO_UI2_TC_Second_Irq_Enable _bit7 ++#define AO_FIFO_Second_Irq_Enable _bit8 ++#define G1_TC_Second_Irq_Enable _bit9 ++#define G1_Gate_Second_Irq_Enable _bit10 ++#define Pass_Thru_1_Second_Irq_Enable _bit11 ++ ++#define AI_Personal_Register 77 ++#define AI_SHIFTIN_Pulse_Width _bit15 ++#define AI_EOC_Polarity _bit14 ++#define AI_SOC_Polarity _bit13 ++#define AI_SHIFTIN_Polarity _bit12 ++#define AI_CONVERT_Pulse_Timebase _bit11 ++#define AI_CONVERT_Pulse_Width _bit10 ++#define AI_CONVERT_Original_Pulse _bit9 ++#define AI_FIFO_Flags_Polarity _bit8 ++#define AI_Overrun_Mode _bit7 ++#define AI_EXTMUX_CLK_Pulse_Width _bit6 ++#define AI_LOCALMUX_CLK_Pulse_Width _bit5 ++#define AI_AIFREQ_Polarity _bit4 ++ ++#define AO_Personal_Register 78 ++#define AO_Interval_Buffer_Mode _bit3 ++#define AO_BC_Source_Select _bit4 ++#define AO_UPDATE_Pulse_Width _bit5 ++#define AO_UPDATE_Pulse_Timebase _bit6 ++#define AO_UPDATE_Original_Pulse _bit7 ++#define AO_DMA_PIO_Control _bit8 /* M Series: reserved */ ++#define AO_AOFREQ_Polarity _bit9 /* M Series: reserved */ ++#define AO_FIFO_Enable _bit10 ++#define AO_FIFO_Flags_Polarity _bit11 /* M Series: reserved */ ++#define AO_TMRDACWR_Pulse_Width _bit12 ++#define AO_Fast_CPU _bit13 /* M Series: reserved */ ++#define AO_Number_Of_DAC_Packages _bit14 /* 1 for "single" mode, ++ 0 for "dual" */ ++#define AO_Multiple_DACS_Per_Package _bit15 /* M Series only */ ++ ++#define RTSI_Trig_A_Output_Register 79 ++ ++#define RTSI_Trig_B_Output_Register 80 ++#define RTSI_Sub_Selection_1_Bit _bit15 /* not for M Series */ ++#define RTSI_Trig_Output_Bits(x, y) ((y & 0xf) << ((x % 4) * 4)) ++#define RTSI_Trig_Output_Mask(x) (0xf << ((x % 4) * 4)) ++#define RTSI_Trig_Output_Source(x, y) ((y >> ((x % 4) * 4)) & 0xf) ++ ++#define RTSI_Board_Register 81 ++#define Write_Strobe_0_Register 82 ++#define Write_Strobe_1_Register 83 ++#define Write_Strobe_2_Register 84 ++#define Write_Strobe_3_Register 85 ++ ++#define AO_Output_Control_Register 86 ++#define AO_External_Gate_Enable _bit15 ++#define AO_External_Gate_Select(x) (((x)&0x1f)<<10) ++#define AO_Number_Of_Channels(x) (((x)&0xf)<<6) ++#define AO_UPDATE2_Output_Select(x) (((x)&0x3)<<4) ++#define AO_External_Gate_Polarity _bit3 ++#define AO_UPDATE2_Output_Toggle _bit2 ++#define AO_Update_Output_High_Z 0 ++#define AO_Update_Output_Ground 1 ++#define AO_Update_Output_Enable_Low 2 ++#define AO_Update_Output_Enable_High 3 ++#define AO_UPDATE_Output_Select(x) (x&0x3) ++ ++#define AI_Mode_3_Register 87 ++#define AI_Trigger_Length _bit15 ++#define AI_Delay_START _bit14 ++#define AI_Software_Gate _bit13 ++#define AI_SI_Special_Trigger_Delay _bit12 ++#define AI_SI2_Source_Select _bit11 ++#define AI_Delayed_START2 _bit10 ++#define AI_Delayed_START1 _bit9 ++#define AI_External_Gate_Mode _bit8 ++#define AI_FIFO_Mode_HF_to_E (3<<6) ++#define AI_FIFO_Mode_F (2<<6) ++#define AI_FIFO_Mode_HF (1<<6) ++#define AI_FIFO_Mode_NE (0<<6) ++#define AI_External_Gate_Polarity _bit5 ++#define AI_External_Gate_Select(a) ((a) & 0x1f) ++ ++#define G_Autoincrement_Register(a) (68+(a)) ++#define G_Command_Register(a) (6+(a)) ++#define G_HW_Save_Register(a) (8+(a)*2) ++#define G_HW_Save_Register_High(a) (8+(a)*2) ++#define G_HW_Save_Register_Low(a) (9+(a)*2) ++#define G_Input_Select_Register(a) (36+(a)) ++#define G_Load_A_Register(a) (28+(a)*4) ++#define G_Load_A_Register_High(a) (28+(a)*4) ++#define G_Load_A_Register_Low(a) (29+(a)*4) ++#define G_Load_B_Register(a) (30+(a)*4) ++#define G_Load_B_Register_High(a) (30+(a)*4) ++#define G_Load_B_Register_Low(a) (31+(a)*4) ++#define G_Mode_Register(a) (26+(a)) ++#define G_Save_Register(a) (12+(a)*2) ++#define G_Save_Register_High(a) (12+(a)*2) ++#define G_Save_Register_Low(a) (13+(a)*2) ++#define G_Status_Register 4 ++#define Analog_Trigger_Etc_Register 61 ++ ++/* command register */ ++#define G_Disarm_Copy _bit15 /* strobe */ ++#define G_Save_Trace_Copy _bit14 ++#define G_Arm_Copy _bit13 /* strobe */ ++#define G_Bank_Switch_Start _bit10 /* strobe */ ++#define G_Little_Big_Endian _bit9 ++#define G_Synchronized_Gate _bit8 ++#define G_Write_Switch _bit7 ++#define G_Up_Down(a) (((a)&0x03)<<5) ++#define G_Disarm _bit4 /* strobe */ ++#define G_Analog_Trigger_Reset _bit3 /* strobe */ ++#define G_Save_Trace _bit1 ++#define G_Arm _bit0 /* strobe */ ++ ++/* channel agnostic names for the command register #defines */ ++#define G_Bank_Switch_Enable _bit12 ++#define G_Bank_Switch_Mode _bit11 ++#define G_Load _bit2 /* strobe */ ++ ++/* input select register */ ++#define G_Gate_Select(a) (((a)&0x1f)<<7) ++#define G_Source_Select(a) (((a)&0x1f)<<2) ++#define G_Write_Acknowledges_Irq _bit1 ++#define G_Read_Acknowledges_Irq _bit0 ++ ++/* same input select register, but with channel agnostic names */ ++#define G_Source_Polarity _bit15 ++#define G_Output_Polarity _bit14 ++#define G_OR_Gate _bit13 ++#define G_Gate_Select_Load_Source _bit12 ++ ++/* mode register */ ++#define G_Loading_On_TC _bit12 ++#define G_Output_Mode(a) (((a)&0x03)<<8) ++#define G_Trigger_Mode_For_Edge_Gate(a) (((a)&0x03)<<3) ++#define G_Gating_Mode(a) (((a)&0x03)<<0) ++ ++/* same input mode register, but with channel agnostic names */ ++#define G_Load_Source_Select _bit7 ++#define G_Reload_Source_Switching _bit15 ++#define G_Loading_On_Gate _bit14 ++#define G_Gate_Polarity _bit13 ++ ++#define G_Counting_Once(a) (((a)&0x03)<<10) ++#define G_Stop_Mode(a) (((a)&0x03)<<5) ++#define G_Gate_On_Both_Edges _bit2 ++ ++/* G_Status_Register */ ++#define G1_Gate_Error_St _bit15 ++#define G0_Gate_Error_St _bit14 ++#define G1_TC_Error_St _bit13 ++#define G0_TC_Error_St _bit12 ++#define G1_No_Load_Between_Gates_St _bit11 ++#define G0_No_Load_Between_Gates_St _bit10 ++#define G1_Armed_St _bit9 ++#define G0_Armed_St _bit8 ++#define G1_Stale_Data_St _bit7 ++#define G0_Stale_Data_St _bit6 ++#define G1_Next_Load_Source_St _bit5 ++#define G0_Next_Load_Source_St _bit4 ++#define G1_Counting_St _bit3 ++#define G0_Counting_St _bit2 ++#define G1_Save_St _bit1 ++#define G0_Save_St _bit0 ++ ++/* general purpose counter timer */ ++#define G_Autoincrement(a) ((a)<<0) ++ ++/*Analog_Trigger_Etc_Register*/ ++#define Analog_Trigger_Mode(x) ((x) & 0x7) ++#define Analog_Trigger_Enable _bit3 ++#define Analog_Trigger_Drive _bit4 ++#define GPFO_1_Output_Select _bit7 ++#define GPFO_0_Output_Select(a) ((a)<<11) ++#define GPFO_0_Output_Enable _bit14 ++#define GPFO_1_Output_Enable _bit15 ++ ++/* Additional windowed registers unique to E series */ ++ ++/* 16 bit registers shadowed from DAQ-STC */ ++#define Window_Address 0x00 ++#define Window_Data 0x02 ++ ++#define Configuration_Memory_Clear 82 ++#define ADC_FIFO_Clear 83 ++#define DAC_FIFO_Clear 84 ++ ++/* i/o port offsets */ ++ ++/* 8 bit registers */ ++#define XXX_Status 0x01 ++#define PROMOUT _bit0 ++#define AI_FIFO_LOWER_NOT_EMPTY _bit3 ++ ++#define Serial_Command 0x0d ++#define Misc_Command 0x0f ++#define Port_A 0x19 ++#define Port_B 0x1b ++#define Port_C 0x1d ++#define Configuration 0x1f ++#define Strobes 0x01 ++#define Channel_A_Mode 0x03 ++#define Channel_B_Mode 0x05 ++#define Channel_C_Mode 0x07 ++#define AI_AO_Select 0x09 ++#define AI_DMA_Select_Shift 0 ++#define AI_DMA_Select_Mask 0xf ++#define AO_DMA_Select_Shift 4 ++#define AO_DMA_Select_Mask (0xf << AO_DMA_Select_Shift) ++ ++#define G0_G1_Select 0x0b ++ ++static inline unsigned ni_stc_dma_channel_select_bitfield(unsigned channel) ++{ ++ if(channel < 4) return 1 << channel; ++ if(channel == 4) return 0x3; ++ if(channel == 5) return 0x5; ++ BUG(); ++ return 0; ++} ++static inline unsigned GPCT_DMA_Select_Bits(unsigned gpct_index, unsigned mite_channel) ++{ ++ BUG_ON(gpct_index > 1); ++ return ni_stc_dma_channel_select_bitfield(mite_channel) << (4 * gpct_index); ++} ++static inline unsigned GPCT_DMA_Select_Mask(unsigned gpct_index) ++{ ++ BUG_ON(gpct_index > 1); ++ return 0xf << (4 * gpct_index); ++} ++ ++/* 16 bit registers */ ++ ++#define Configuration_Memory_Low 0x10 ++#define AI_DITHER _bit9 ++#define AI_LAST_CHANNEL _bit15 ++ ++#define Configuration_Memory_High 0x12 ++#define AI_AC_COUPLE _bit11 ++#define AI_DIFFERENTIAL _bit12 ++#define AI_COMMON _bit13 ++#define AI_GROUND (_bit12|_bit13) ++#define AI_CONFIG_CHANNEL(x) (x&0x3f) ++ ++#define ADC_FIFO_Data_Register 0x1c ++ ++#define AO_Configuration 0x16 ++#define AO_Bipolar _bit0 ++#define AO_Deglitch _bit1 ++#define AO_Ext_Ref _bit2 ++#define AO_Ground_Ref _bit3 ++#define AO_Channel(x) ((x) << 8) ++ ++#define DAC_FIFO_Data 0x1e ++#define DAC0_Direct_Data 0x18 ++#define DAC1_Direct_Data 0x1a ++ ++/* 611x registers (these boards differ from the e-series) */ ++ ++#define Magic_611x 0x19 /* w8 (new) */ ++#define Calibration_Channel_Select_611x 0x1a /* w16 (new) */ ++#define ADC_FIFO_Data_611x 0x1c /* r32 (incompatible) */ ++#define AI_FIFO_Offset_Load_611x 0x05 /* r8 (new) */ ++#define DAC_FIFO_Data_611x 0x14 /* w32 (incompatible) */ ++#define Cal_Gain_Select_611x 0x05 /* w8 (new) */ ++ ++#define AO_Window_Address_611x 0x18 ++#define AO_Window_Data_611x 0x1e ++ ++/* 6143 registers */ ++#define Magic_6143 0x19 /* w8 */ ++#define G0G1_DMA_Select_6143 0x0B /* w8 */ ++#define PipelineDelay_6143 0x1f /* w8 */ ++#define EOC_Set_6143 0x1D /* w8 */ ++#define AIDMA_Select_6143 0x09 /* w8 */ ++#define AIFIFO_Data_6143 0x8C /* w32 */ ++#define AIFIFO_Flag_6143 0x84 /* w32 */ ++#define AIFIFO_Control_6143 0x88 /* w32 */ ++#define AIFIFO_Status_6143 0x88 /* w32 */ ++#define AIFIFO_DMAThreshold_6143 0x90 /* w32 */ ++#define AIFIFO_Words_Available_6143 0x94 /* w32 */ ++ ++#define Calibration_Channel_6143 0x42 /* w16 */ ++#define Calibration_LowTime_6143 0x20 /* w16 */ ++#define Calibration_HighTime_6143 0x22 /* w16 */ ++#define Relay_Counter_Load_Val__6143 0x4C /* w32 */ ++#define Signature_6143 0x50 /* w32 */ ++#define Release_Date_6143 0x54 /* w32 */ ++#define Release_Oldest_Date_6143 0x58 /* w32 */ ++ ++#define Calibration_Channel_6143_RelayOn 0x8000 /* Calibration relay switch On */ ++#define Calibration_Channel_6143_RelayOff 0x4000 /* Calibration relay switch Off */ ++#define Calibration_Channel_Gnd_Gnd 0x00 /* Offset Calibration */ ++#define Calibration_Channel_2v5_Gnd 0x02 /* 2.5V Reference */ ++#define Calibration_Channel_Pwm_Gnd 0x05 /* +/- 5V Self Cal */ ++#define Calibration_Channel_2v5_Pwm 0x0a /* PWM Calibration */ ++#define Calibration_Channel_Pwm_Pwm 0x0d /* CMRR */ ++#define Calibration_Channel_Gnd_Pwm 0x0e /* PWM Calibration */ ++ ++/* 671x, 611x registers */ ++ ++/* 671xi 611x windowed ao registers */ ++#define AO_Immediate_671x 0x11 /* W 16 */ ++#define AO_Timed_611x 0x10 /* W 16 */ ++#define AO_FIFO_Offset_Load_611x 0x13 /* W32 */ ++#define AO_Later_Single_Point_Updates 0x14 /* W 16 */ ++#define AO_Waveform_Generation_611x 0x15 /* W 16 */ ++#define AO_Misc_611x 0x16 /* W 16 */ ++#define AO_Calibration_Channel_Select_67xx 0x17 /* W 16 */ ++#define AO_Configuration_2_67xx 0x18 /* W 16 */ ++#define CAL_ADC_Command_67xx 0x19 /* W 8 */ ++#define CAL_ADC_Status_67xx 0x1a /* R 8 */ ++#define CAL_ADC_Data_67xx 0x1b /* R 16 */ ++#define CAL_ADC_Config_Data_High_Word_67xx 0x1c /* RW 16 */ ++#define CAL_ADC_Config_Data_Low_Word_67xx 0x1d /* RW 16 */ ++ ++static inline unsigned int DACx_Direct_Data_671x(int channel) ++{ ++ return channel; ++} ++ ++#define CLEAR_WG _bit0 ++ ++#define CSCFG_CAL_CONTROL_MASK 0x7 ++#define CSCFG_SELF_CAL_OFFSET 0x1 ++#define CSCFG_SELF_CAL_GAIN 0x2 ++#define CSCFG_SELF_CAL_OFFSET_GAIN 0x3 ++#define CSCFG_SYSTEM_CAL_OFFSET 0x5 ++#define CSCFG_SYSTEM_CAL_GAIN 0x6 ++#define CSCFG_DONE (1 << 3) ++#define CSCFG_POWER_SAVE_SELECT (1 << 4) ++#define CSCFG_PORT_MODE (1 << 5) ++#define CSCFG_RESET_VALID (1 << 6) ++#define CSCFG_RESET (1 << 7) ++#define CSCFG_UNIPOLAR (1 << 12) ++#define CSCFG_WORD_RATE_2180_CYCLES (0x0 << 13) ++#define CSCFG_WORD_RATE_1092_CYCLES (0x1 << 13) ++#define CSCFG_WORD_RATE_532_CYCLES (0x2 << 13) ++#define CSCFG_WORD_RATE_388_CYCLES (0x3 << 13) ++#define CSCFG_WORD_RATE_324_CYCLES (0x4 << 13) ++#define CSCFG_WORD_RATE_17444_CYCLES (0x5 << 13) ++#define CSCFG_WORD_RATE_8724_CYCLES (0x6 << 13) ++#define CSCFG_WORD_RATE_4364_CYCLES (0x7 << 13) ++#define CSCFG_WORD_RATE_MASK (0x7 << 13) ++#define CSCFG_LOW_POWER (1 << 16) ++ ++#define CS5529_CONFIG_DOUT(x) (1 << (18 + x)) ++#define CS5529_CONFIG_AOUT(x) (1 << (22 + x)) ++ ++/* cs5529 command bits */ ++#define CSCMD_POWER_SAVE _bit0 ++#define CSCMD_REGISTER_SELECT_MASK 0xe ++#define CSCMD_OFFSET_REGISTER 0x0 ++#define CSCMD_GAIN_REGISTER _bit1 ++#define CSCMD_CONFIG_REGISTER _bit2 ++#define CSCMD_READ _bit4 ++#define CSCMD_CONTINUOUS_CONVERSIONS _bit5 ++#define CSCMD_SINGLE_CONVERSION _bit6 ++#define CSCMD_COMMAND _bit7 ++ ++/* cs5529 status bits */ ++#define CSS_ADC_BUSY _bit0 ++#define CSS_OSC_DETECT _bit1 /* indicates adc error */ ++#define CSS_OVERRANGE _bit3 ++ ++#define SerDacLd(x) (0x08<<(x)) ++ ++/* ++ This is stuff unique to the NI E series drivers, ++ but I thought I'd put it here anyway. ++*/ ++ ++enum ++{ ++ ai_gain_16 = 0, ++ ai_gain_8, ++ ai_gain_14, ++ ai_gain_4, ++ ai_gain_611x, ++ ai_gain_622x, ++ ai_gain_628x, ++ ai_gain_6143 ++}; ++enum caldac_enum ++{ ++ caldac_none=0, ++ mb88341, ++ dac8800, ++ dac8043, ++ ad8522, ++ ad8804, ++ ad8842, ++ ad8804_debug ++}; ++enum ni_reg_type ++{ ++ ni_reg_normal = 0x0, ++ ni_reg_611x = 0x1, ++ ni_reg_6711 = 0x2, ++ ni_reg_6713 = 0x4, ++ ni_reg_67xx_mask = 0x6, ++ ni_reg_6xxx_mask = 0x7, ++ ni_reg_622x = 0x8, ++ ni_reg_625x = 0x10, ++ ni_reg_628x = 0x18, ++ ni_reg_m_series_mask = 0x18, ++ ni_reg_6143 = 0x20 ++}; ++ ++/* M Series registers offsets */ ++#define M_Offset_CDIO_DMA_Select 0x7 /* write */ ++#define M_Offset_SCXI_Status 0x7 /* read */ ++#define M_Offset_AI_AO_Select 0x9 /* write, same offset as e-series */ ++#define M_Offset_SCXI_Serial_Data_In 0x9 /* read */ ++#define M_Offset_G0_G1_Select 0xb /* write, same offset as e-series */ ++#define M_Offset_Misc_Command 0xf ++#define M_Offset_SCXI_Serial_Data_Out 0x11 ++#define M_Offset_SCXI_Control 0x13 ++#define M_Offset_SCXI_Output_Enable 0x15 ++#define M_Offset_AI_FIFO_Data 0x1c ++#define M_Offset_Static_Digital_Output 0x24 /* write */ ++#define M_Offset_Static_Digital_Input 0x24 /* read */ ++#define M_Offset_DIO_Direction 0x28 ++#define M_Offset_Cal_PWM 0x40 ++#define M_Offset_AI_Config_FIFO_Data 0x5e ++#define M_Offset_Interrupt_C_Enable 0x88 /* write */ ++#define M_Offset_Interrupt_C_Status 0x88 /* read */ ++#define M_Offset_Analog_Trigger_Control 0x8c ++#define M_Offset_AO_Serial_Interrupt_Enable 0xa0 ++#define M_Offset_AO_Serial_Interrupt_Ack 0xa1 /* write */ ++#define M_Offset_AO_Serial_Interrupt_Status 0xa1 /* read */ ++#define M_Offset_AO_Calibration 0xa3 ++#define M_Offset_AO_FIFO_Data 0xa4 ++#define M_Offset_PFI_Filter 0xb0 ++#define M_Offset_RTSI_Filter 0xb4 ++#define M_Offset_SCXI_Legacy_Compatibility 0xbc ++#define M_Offset_Interrupt_A_Ack 0x104 /* write */ ++#define M_Offset_AI_Status_1 0x104 /* read */ ++#define M_Offset_Interrupt_B_Ack 0x106 /* write */ ++#define M_Offset_AO_Status_1 0x106 /* read */ ++#define M_Offset_AI_Command_2 0x108 /* write */ ++#define M_Offset_G01_Status 0x108 /* read */ ++#define M_Offset_AO_Command_2 0x10a ++#define M_Offset_AO_Status_2 0x10c /* read */ ++#define M_Offset_G0_Command 0x10c /* write */ ++#define M_Offset_G1_Command 0x10e /* write */ ++#define M_Offset_G0_HW_Save 0x110 ++#define M_Offset_G0_HW_Save_High 0x110 ++#define M_Offset_AI_Command_1 0x110 ++#define M_Offset_G0_HW_Save_Low 0x112 ++#define M_Offset_AO_Command_1 0x112 ++#define M_Offset_G1_HW_Save 0x114 ++#define M_Offset_G1_HW_Save_High 0x114 ++#define M_Offset_G1_HW_Save_Low 0x116 ++#define M_Offset_AI_Mode_1 0x118 ++#define M_Offset_G0_Save 0x118 ++#define M_Offset_G0_Save_High 0x118 ++#define M_Offset_AI_Mode_2 0x11a ++#define M_Offset_G0_Save_Low 0x11a ++#define M_Offset_AI_SI_Load_A 0x11c ++#define M_Offset_G1_Save 0x11c ++#define M_Offset_G1_Save_High 0x11c ++#define M_Offset_G1_Save_Low 0x11e ++#define M_Offset_AI_SI_Load_B 0x120 /* write */ ++#define M_Offset_AO_UI_Save 0x120 /* read */ ++#define M_Offset_AI_SC_Load_A 0x124 /* write */ ++#define M_Offset_AO_BC_Save 0x124 /* read */ ++#define M_Offset_AI_SC_Load_B 0x128 /* write */ ++#define M_Offset_AO_UC_Save 0x128 /* read */ ++#define M_Offset_AI_SI2_Load_A 0x12c ++#define M_Offset_AI_SI2_Load_B 0x130 ++#define M_Offset_G0_Mode 0x134 ++#define M_Offset_G1_Mode 0x136 /* write */ ++#define M_Offset_Joint_Status_1 0x136 /* read */ ++#define M_Offset_G0_Load_A 0x138 ++#define M_Offset_Joint_Status_2 0x13a ++#define M_Offset_G0_Load_B 0x13c ++#define M_Offset_G1_Load_A 0x140 ++#define M_Offset_G1_Load_B 0x144 ++#define M_Offset_G0_Input_Select 0x148 ++#define M_Offset_G1_Input_Select 0x14a ++#define M_Offset_AO_Mode_1 0x14c ++#define M_Offset_AO_Mode_2 0x14e ++#define M_Offset_AO_UI_Load_A 0x150 ++#define M_Offset_AO_UI_Load_B 0x154 ++#define M_Offset_AO_BC_Load_A 0x158 ++#define M_Offset_AO_BC_Load_B 0x15c ++#define M_Offset_AO_UC_Load_A 0x160 ++#define M_Offset_AO_UC_Load_B 0x164 ++#define M_Offset_Clock_and_FOUT 0x170 ++#define M_Offset_IO_Bidirection_Pin 0x172 ++#define M_Offset_RTSI_Trig_Direction 0x174 ++#define M_Offset_Interrupt_Control 0x176 ++#define M_Offset_AI_Output_Control 0x178 ++#define M_Offset_Analog_Trigger_Etc 0x17a ++#define M_Offset_AI_START_STOP_Select 0x17c ++#define M_Offset_AI_Trigger_Select 0x17e ++#define M_Offset_AI_SI_Save 0x180 /* read */ ++#define M_Offset_AI_DIV_Load_A 0x180 /* write */ ++#define M_Offset_AI_SC_Save 0x184 /* read */ ++#define M_Offset_AO_Start_Select 0x184 /* write */ ++#define M_Offset_AO_Trigger_Select 0x186 ++#define M_Offset_AO_Mode_3 0x18c ++#define M_Offset_G0_Autoincrement 0x188 ++#define M_Offset_G1_Autoincrement 0x18a ++#define M_Offset_Joint_Reset 0x190 ++#define M_Offset_Interrupt_A_Enable 0x192 ++#define M_Offset_Interrupt_B_Enable 0x196 ++#define M_Offset_AI_Personal 0x19a ++#define M_Offset_AO_Personal 0x19c ++#define M_Offset_RTSI_Trig_A_Output 0x19e ++#define M_Offset_RTSI_Trig_B_Output 0x1a0 ++#define M_Offset_RTSI_Shared_MUX 0x1a2 ++#define M_Offset_AO_Output_Control 0x1ac ++#define M_Offset_AI_Mode_3 0x1ae ++#define M_Offset_Configuration_Memory_Clear 0x1a4 ++#define M_Offset_AI_FIFO_Clear 0x1a6 ++#define M_Offset_AO_FIFO_Clear 0x1a8 ++#define M_Offset_G0_Counting_Mode 0x1b0 ++#define M_Offset_G1_Counting_Mode 0x1b2 ++#define M_Offset_G0_Second_Gate 0x1b4 ++#define M_Offset_G1_Second_Gate 0x1b6 ++#define M_Offset_G0_DMA_Config 0x1b8 /* write */ ++#define M_Offset_G0_DMA_Status 0x1b8 /* read */ ++#define M_Offset_G1_DMA_Config 0x1ba /* write */ ++#define M_Offset_G1_DMA_Status 0x1ba /* read */ ++#define M_Offset_G0_MSeries_ABZ 0x1c0 ++#define M_Offset_G1_MSeries_ABZ 0x1c2 ++#define M_Offset_Clock_and_Fout2 0x1c4 ++#define M_Offset_PLL_Control 0x1c6 ++#define M_Offset_PLL_Status 0x1c8 ++#define M_Offset_PFI_Output_Select_1 0x1d0 ++#define M_Offset_PFI_Output_Select_2 0x1d2 ++#define M_Offset_PFI_Output_Select_3 0x1d4 ++#define M_Offset_PFI_Output_Select_4 0x1d6 ++#define M_Offset_PFI_Output_Select_5 0x1d8 ++#define M_Offset_PFI_Output_Select_6 0x1da ++#define M_Offset_PFI_DI 0x1dc ++#define M_Offset_PFI_DO 0x1de ++#define M_Offset_AI_Config_FIFO_Bypass 0x218 ++#define M_Offset_SCXI_DIO_Enable 0x21c ++#define M_Offset_CDI_FIFO_Data 0x220 /* read */ ++#define M_Offset_CDO_FIFO_Data 0x220 /* write */ ++#define M_Offset_CDIO_Status 0x224 /* read */ ++#define M_Offset_CDIO_Command 0x224 /* write */ ++#define M_Offset_CDI_Mode 0x228 ++#define M_Offset_CDO_Mode 0x22c ++#define M_Offset_CDI_Mask_Enable 0x230 ++#define M_Offset_CDO_Mask_Enable 0x234 ++#define M_Offset_AO_Waveform_Order(x) (0xc2 + 0x4 * x) ++#define M_Offset_AO_Config_Bank(x) (0xc3 + 0x4 * x) ++#define M_Offset_DAC_Direct_Data(x) (0xc0 + 0x4 * x) ++#define M_Offset_Gen_PWM(x) (0x44 + 0x2 * x) ++ ++static inline int M_Offset_Static_AI_Control(int i) ++{ ++ int offset[] = ++ { ++ 0x64, ++ 0x261, ++ 0x262, ++ 0x263, ++ }; ++ if(((unsigned)i) >= sizeof(offset) / sizeof(offset[0])) ++ { ++ rtdm_printk("%s: invalid channel=%i\n", __FUNCTION__, i); ++ return offset[0]; ++ } ++ return offset[i]; ++}; ++static inline int M_Offset_AO_Reference_Attenuation(int channel) ++{ ++ int offset[] = ++ { ++ 0x264, ++ 0x265, ++ 0x266, ++ 0x267 ++ }; ++ if(((unsigned)channel) >= sizeof(offset) / sizeof(offset[0])) ++ { ++ rtdm_printk("%s: invalid channel=%i\n", __FUNCTION__, channel); ++ return offset[0]; ++ } ++ return offset[channel]; ++}; ++static inline unsigned M_Offset_PFI_Output_Select(unsigned n) ++{ ++ if(n < 1 || n > NUM_PFI_OUTPUT_SELECT_REGS) ++ { ++ rtdm_printk("%s: invalid pfi output select register=%i\n", __FUNCTION__, n); ++ return M_Offset_PFI_Output_Select_1; ++ } ++ return M_Offset_PFI_Output_Select_1 + (n - 1) * 2; ++} ++ ++#define MSeries_AI_Config_Channel_Type_Mask (0x7 << 6) ++#define MSeries_AI_Config_Channel_Type_Calibration_Bits 0x0 ++#define MSeries_AI_Config_Channel_Type_Differential_Bits (0x1 << 6) ++#define MSeries_AI_Config_Channel_Type_Common_Ref_Bits (0x2 << 6) ++#define MSeries_AI_Config_Channel_Type_Ground_Ref_Bits (0x3 << 6) ++#define MSeries_AI_Config_Channel_Type_Aux_Bits (0x5 << 6) ++#define MSeries_AI_Config_Channel_Type_Ghost_Bits (0x7 << 6) ++#define MSeries_AI_Config_Polarity_Bit 0x1000 /* 0 for 2's complement encoding */ ++#define MSeries_AI_Config_Dither_Bit 0x2000 ++#define MSeries_AI_Config_Last_Channel_Bit 0x4000 ++#define MSeries_AI_Config_Channel_Bits(x) (x & 0xf) ++#define MSeries_AI_Config_Gain_Bits(x) ((x & 0x7) << 9) ++ ++static inline ++unsigned int MSeries_AI_Config_Bank_Bits(unsigned int reg_type, ++ unsigned int channel) ++{ ++ unsigned int bits = channel & 0x30; ++ if (reg_type == ni_reg_622x) { ++ if (channel & 0x40) ++ bits |= 0x400; ++ } ++ return bits; ++} ++ ++#define MSeries_PLL_In_Source_Select_RTSI0_Bits 0xb ++#define MSeries_PLL_In_Source_Select_Star_Trigger_Bits 0x14 ++#define MSeries_PLL_In_Source_Select_RTSI7_Bits 0x1b ++#define MSeries_PLL_In_Source_Select_PXI_Clock10 0x1d ++#define MSeries_PLL_In_Source_Select_Mask 0x1f ++#define MSeries_Timebase1_Select_Bit 0x20 /* use PLL for timebase 1 */ ++#define MSeries_Timebase3_Select_Bit 0x40 /* use PLL for timebase 3 */ ++/* Use 10MHz instead of 20MHz for RTSI clock frequency. Appears ++ to have no effect, at least on pxi-6281, which always uses ++ 20MHz rtsi clock frequency */ ++#define MSeries_RTSI_10MHz_Bit 0x80 ++ ++static inline ++unsigned int MSeries_PLL_In_Source_Select_RTSI_Bits(unsigned int RTSI_channel) ++{ ++ if(RTSI_channel > 7) ++ { ++ rtdm_printk("%s: bug, invalid RTSI_channel=%i\n", __FUNCTION__, RTSI_channel); ++ return 0; ++ } ++ if(RTSI_channel == 7) return MSeries_PLL_In_Source_Select_RTSI7_Bits; ++ else return MSeries_PLL_In_Source_Select_RTSI0_Bits + RTSI_channel; ++} ++ ++#define MSeries_PLL_Enable_Bit 0x1000 ++#define MSeries_PLL_VCO_Mode_200_325MHz_Bits 0x0 ++#define MSeries_PLL_VCO_Mode_175_225MHz_Bits 0x2000 ++#define MSeries_PLL_VCO_Mode_100_225MHz_Bits 0x4000 ++#define MSeries_PLL_VCO_Mode_75_150MHz_Bits 0x6000 ++ ++static inline ++unsigned int MSeries_PLL_Divisor_Bits(unsigned int divisor) ++{ ++ static const unsigned int max_divisor = 0x10; ++ if(divisor < 1 || divisor > max_divisor) ++ { ++ rtdm_printk("%s: bug, invalid divisor=%i\n", __FUNCTION__, divisor); ++ return 0; ++ } ++ return (divisor & 0xf) << 8; ++} ++static inline ++unsigned int MSeries_PLL_Multiplier_Bits(unsigned int multiplier) ++{ ++ static const unsigned int max_multiplier = 0x100; ++ if(multiplier < 1 || multiplier > max_multiplier) ++ { ++ rtdm_printk("%s: bug, invalid multiplier=%i\n", __FUNCTION__, multiplier); ++ return 0; ++ } ++ return multiplier & 0xff; ++} ++ ++#define MSeries_PLL_Locked_Bit 0x1 ++ ++#define MSeries_AI_Bypass_Channel_Mask 0x7 ++#define MSeries_AI_Bypass_Bank_Mask 0x78 ++#define MSeries_AI_Bypass_Cal_Sel_Pos_Mask 0x380 ++#define MSeries_AI_Bypass_Cal_Sel_Neg_Mask 0x1c00 ++#define MSeries_AI_Bypass_Mode_Mux_Mask 0x6000 ++#define MSeries_AO_Bypass_AO_Cal_Sel_Mask 0x38000 ++#define MSeries_AI_Bypass_Gain_Mask 0x1c0000 ++#define MSeries_AI_Bypass_Dither_Bit 0x200000 ++#define MSeries_AI_Bypass_Polarity_Bit 0x400000 /* 0 for 2's complement encoding */ ++#define MSeries_AI_Bypass_Config_FIFO_Bit 0x80000000 ++#define MSeries_AI_Bypass_Cal_Sel_Pos_Bits(x) ((x << 7) & \ ++ MSeries_AI_Bypass_Cal_Sel_Pos_Mask) ++#define MSeries_AI_Bypass_Cal_Sel_Neg_Bits(x) ((x << 10) & \ ++ MSeries_AI_Bypass_Cal_Sel_Pos_Mask) ++#define MSeries_AI_Bypass_Gain_Bits(x) ((x << 18) & \ ++ MSeries_AI_Bypass_Gain_Mask) ++ ++#define MSeries_AO_DAC_Offset_Select_Mask 0x7 ++#define MSeries_AO_DAC_Offset_0V_Bits 0x0 ++#define MSeries_AO_DAC_Offset_5V_Bits 0x1 ++#define MSeries_AO_DAC_Reference_Mask 0x38 ++#define MSeries_AO_DAC_Reference_10V_Internal_Bits 0x0 ++#define MSeries_AO_DAC_Reference_5V_Internal_Bits 0x8 ++#define MSeries_AO_Update_Timed_Bit 0x40 ++#define MSeries_AO_Bipolar_Bit 0x80 /* turns on 2's complement encoding */ ++ ++#define MSeries_Attenuate_x5_Bit 0x1 ++ ++#define MSeries_Cal_PWM_High_Time_Bits(x) ((x << 16) & 0xffff0000) ++#define MSeries_Cal_PWM_Low_Time_Bits(x) (x & 0xffff) ++ ++#define MSeries_PFI_Output_Select_Mask(x) (0x1f << (x % 3) * 5) ++#define MSeries_PFI_Output_Select_Bits(x, y) ((y & 0x1f) << ((x % 3) * 5)) ++// inverse to MSeries_PFI_Output_Select_Bits ++#define MSeries_PFI_Output_Select_Source(x, y) ((y >> ((x % 3) * 5)) & 0x1f) ++ ++#define Gi_DMA_BankSW_Error_Bit 0x10 ++#define Gi_DMA_Reset_Bit 0x8 ++#define Gi_DMA_Int_Enable_Bit 0x4 ++#define Gi_DMA_Write_Bit 0x2 ++#define Gi_DMA_Enable_Bit 0x1 ++ ++#define MSeries_PFI_Filter_Select_Mask(x) (0x3 << (x * 2)) ++#define MSeries_PFI_Filter_Select_Bits(x, y) ((y << (x * 2)) & \ ++ MSeries_PFI_Filter_Select_Mask(x)) ++ ++/* CDIO DMA select bits */ ++#define CDI_DMA_Select_Shift 0 ++#define CDI_DMA_Select_Mask 0xf ++#define CDO_DMA_Select_Shift 4 ++#define CDO_DMA_Select_Mask 0xf << CDO_DMA_Select_Shift ++ ++/* CDIO status bits */ ++#define CDO_FIFO_Empty_Bit 0x1 ++#define CDO_FIFO_Full_Bit 0x2 ++#define CDO_FIFO_Request_Bit 0x4 ++#define CDO_Overrun_Bit 0x8 ++#define CDO_Underflow_Bit 0x10 ++#define CDI_FIFO_Empty_Bit 0x10000 ++#define CDI_FIFO_Full_Bit 0x20000 ++#define CDI_FIFO_Request_Bit 0x40000 ++#define CDI_Overrun_Bit 0x80000 ++#define CDI_Overflow_Bit 0x100000 ++ ++/* CDIO command bits */ ++#define CDO_Disarm_Bit 0x1 ++#define CDO_Arm_Bit 0x2 ++#define CDI_Disarm_Bit 0x4 ++#define CDI_Arm_Bit 0x8 ++#define CDO_Reset_Bit 0x10 ++#define CDI_Reset_Bit 0x20 ++#define CDO_Error_Interrupt_Enable_Set_Bit 0x40 ++#define CDO_Error_Interrupt_Enable_Clear_Bit 0x80 ++#define CDI_Error_Interrupt_Enable_Set_Bit 0x100 ++#define CDI_Error_Interrupt_Enable_Clear_Bit 0x200 ++#define CDO_FIFO_Request_Interrupt_Enable_Set_Bit 0x400 ++#define CDO_FIFO_Request_Interrupt_Enable_Clear_Bit 0x800 ++#define CDI_FIFO_Request_Interrupt_Enable_Set_Bit 0x1000 ++#define CDI_FIFO_Request_Interrupt_Enable_Clear_Bit 0x2000 ++#define CDO_Error_Interrupt_Confirm_Bit 0x4000 ++#define CDI_Error_Interrupt_Confirm_Bit 0x8000 ++#define CDO_Empty_FIFO_Interrupt_Enable_Set_Bit 0x10000 ++#define CDO_Empty_FIFO_Interrupt_Enable_Clear_Bit 0x20000 ++#define CDO_SW_Update_Bit 0x80000 ++#define CDI_SW_Update_Bit 0x100000 ++ ++/* CDIO mode bits */ ++#define CDI_Sample_Source_Select_Mask 0x3f ++#define CDI_Halt_On_Error_Bit 0x200 ++/* sample clock on falling edge */ ++#define CDI_Polarity_Bit 0x400 ++/* set for half full mode, clear for not empty mode */ ++#define CDI_FIFO_Mode_Bit 0x800 ++/* data lanes specify which dio channels map to byte or word accesses ++ to the dio fifos */ ++#define CDI_Data_Lane_Mask 0x3000 ++#define CDI_Data_Lane_0_15_Bits 0x0 ++#define CDI_Data_Lane_16_31_Bits 0x1000 ++#define CDI_Data_Lane_0_7_Bits 0x0 ++#define CDI_Data_Lane_8_15_Bits 0x1000 ++#define CDI_Data_Lane_16_23_Bits 0x2000 ++#define CDI_Data_Lane_24_31_Bits 0x3000 ++ ++/* CDO mode bits */ ++#define CDO_Sample_Source_Select_Mask 0x3f ++#define CDO_Retransmit_Bit 0x100 ++#define CDO_Halt_On_Error_Bit 0x200 ++/* sample clock on falling edge */ ++#define CDO_Polarity_Bit 0x400 ++/* set for half full mode, clear for not full mode */ ++#define CDO_FIFO_Mode_Bit 0x800 ++/* data lanes specify which dio channels map to byte or word accesses ++ to the dio fifos */ ++#define CDO_Data_Lane_Mask 0x3000 ++#define CDO_Data_Lane_0_15_Bits 0x0 ++#define CDO_Data_Lane_16_31_Bits 0x1000 ++#define CDO_Data_Lane_0_7_Bits 0x0 ++#define CDO_Data_Lane_8_15_Bits 0x1000 ++#define CDO_Data_Lane_16_23_Bits 0x2000 ++#define CDO_Data_Lane_24_31_Bits 0x3000 ++ ++/* Interrupt C bits */ ++#define Interrupt_Group_C_Enable_Bit 0x1 ++#define Interrupt_Group_C_Status_Bit 0x1 ++ ++#define M_SERIES_EEPROM_SIZE 1024 ++ ++typedef struct ni_board_struct{ ++ unsigned short device_id; ++ int isapnp_id; ++ char *name; ++ ++ int n_adchan; ++ int adbits; ++ ++ int ai_fifo_depth; ++ unsigned int alwaysdither : 1; ++ int gainlkup; ++ int ai_speed; ++ ++ int n_aochan; ++ int aobits; ++ struct a4l_rngdesc *ao_range_table; ++ int ao_fifo_depth; ++ ++ unsigned ao_speed; ++ ++ unsigned num_p0_dio_channels; ++ ++ int reg_type; ++ unsigned int ao_unipolar : 1; ++ unsigned int has_8255 : 1; ++ unsigned int has_analog_trig : 1; ++ ++ enum caldac_enum caldac[3]; ++} ni_board; ++ ++#define n_ni_boards (sizeof(ni_boards)/sizeof(ni_board)) ++ ++#define MAX_N_CALDACS 34 ++#define MAX_N_AO_CHAN 8 ++#define NUM_GPCT 2 ++ ++#define NI_PRIVATE_COMMON \ ++ uint16_t (*stc_readw)(struct a4l_device *dev, int register); \ ++ uint32_t (*stc_readl)(struct a4l_device *dev, int register); \ ++ void (*stc_writew)(struct a4l_device *dev, uint16_t value, int register); \ ++ void (*stc_writel)(struct a4l_device *dev, uint32_t value, int register); \ ++ \ ++ int dio_state; \ ++ int pfi_state; \ ++ int io_bits; \ ++ unsigned short dio_output; \ ++ unsigned short dio_control; \ ++ int ao0p,ao1p; \ ++ int lastchan; \ ++ int last_do; \ ++ int rt_irq; \ ++ int irq_polarity; \ ++ int irq_pin; \ ++ int aimode; \ ++ int ai_continuous; \ ++ int blocksize; \ ++ int n_left; \ ++ unsigned int ai_calib_source; \ ++ unsigned int ai_calib_source_enabled; \ ++ rtdm_lock_t window_lock; \ ++ rtdm_lock_t soft_reg_copy_lock; \ ++ rtdm_lock_t mite_channel_lock; \ ++ \ ++ int changain_state; \ ++ unsigned int changain_spec; \ ++ \ ++ unsigned int caldac_maxdata_list[MAX_N_CALDACS]; \ ++ unsigned short ao[MAX_N_AO_CHAN]; \ ++ unsigned short caldacs[MAX_N_CALDACS]; \ ++ \ ++ unsigned short ai_cmd2; \ ++ \ ++ unsigned short ao_conf[MAX_N_AO_CHAN]; \ ++ unsigned short ao_mode1; \ ++ unsigned short ao_mode2; \ ++ unsigned short ao_mode3; \ ++ unsigned short ao_cmd1; \ ++ unsigned short ao_cmd2; \ ++ unsigned short ao_cmd3; \ ++ unsigned short ao_trigger_select; \ ++ \ ++ struct ni_gpct_device *counter_dev; \ ++ unsigned short an_trig_etc_reg; \ ++ \ ++ unsigned ai_offset[512]; \ ++ \ ++ unsigned long serial_interval_ns; \ ++ unsigned char serial_hw_mode; \ ++ unsigned short clock_and_fout; \ ++ unsigned short clock_and_fout2; \ ++ \ ++ unsigned short int_a_enable_reg; \ ++ unsigned short int_b_enable_reg; \ ++ unsigned short io_bidirection_pin_reg; \ ++ unsigned short rtsi_trig_direction_reg; \ ++ unsigned short rtsi_trig_a_output_reg; \ ++ unsigned short rtsi_trig_b_output_reg; \ ++ unsigned short pfi_output_select_reg[NUM_PFI_OUTPUT_SELECT_REGS]; \ ++ unsigned short ai_ao_select_reg; \ ++ unsigned short g0_g1_select_reg; \ ++ unsigned short cdio_dma_select_reg; \ ++ \ ++ unsigned clock_ns; \ ++ unsigned clock_source; \ ++ \ ++ unsigned short atrig_mode; \ ++ unsigned short atrig_high; \ ++ unsigned short atrig_low; \ ++ \ ++ unsigned short pwm_up_count; \ ++ unsigned short pwm_down_count; \ ++ \ ++ sampl_t ai_fifo_buffer[0x2000]; \ ++ uint8_t eeprom_buffer[M_SERIES_EEPROM_SIZE]; \ ++ \ ++ struct mite_struct *mite; \ ++ struct mite_channel *ai_mite_chan; \ ++ struct mite_channel *ao_mite_chan;\ ++ struct mite_channel *cdo_mite_chan;\ ++ struct mite_dma_descriptor_ring *ai_mite_ring; \ ++ struct mite_dma_descriptor_ring *ao_mite_ring; \ ++ struct mite_dma_descriptor_ring *cdo_mite_ring; \ ++ struct mite_dma_descriptor_ring *gpct_mite_ring[NUM_GPCT]; \ ++ subd_8255_t subd_8255 ++ ++ ++typedef struct { ++ ni_board *board_ptr; ++ NI_PRIVATE_COMMON; ++} ni_private; ++ ++#define devpriv ((ni_private *)dev->priv) ++#define boardtype (*(ni_board *)devpriv->board_ptr) ++ ++/* How we access registers */ ++ ++#define ni_writel(a,b) (writel((a), devpriv->mite->daq_io_addr + (b))) ++#define ni_readl(a) (readl(devpriv->mite->daq_io_addr + (a))) ++#define ni_writew(a,b) (writew((a), devpriv->mite->daq_io_addr + (b))) ++#define ni_readw(a) (readw(devpriv->mite->daq_io_addr + (a))) ++#define ni_writeb(a,b) (writeb((a), devpriv->mite->daq_io_addr + (b))) ++#define ni_readb(a) (readb(devpriv->mite->daq_io_addr + (a))) ++ ++/* INSN_CONFIG_SET_CLOCK_SRC argument for NI cards */ ++#define NI_FREQ_OUT_TIMEBASE_1_DIV_2_CLOCK_SRC 0 /* 10 MHz */ ++#define NI_FREQ_OUT_TIMEBASE_2_CLOCK_SRC 1 /* 100 KHz */ ++ ++#endif /* _ANALOGY_NI_STC_H */ +--- linux/drivers/xenomai/analogy/rtdm_interface.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/analogy/rtdm_interface.c 2021-04-07 16:01:27.791633360 +0800 +@@ -0,0 +1,310 @@ ++/* ++ * Analogy for Linux, user interface (open, read, write, ioctl, proc) ++ * ++ * Copyright (C) 1997-2000 David A. Schleef ++ * Copyright (C) 2008 Alexis Berlemont ++ * ++ * Xenomai is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++MODULE_AUTHOR("Alexis Berlemont"); ++MODULE_DESCRIPTION("Analogy core driver"); ++MODULE_LICENSE("GPL"); ++ ++int (* const a4l_ioctl_functions[]) (struct a4l_device_context *, void *) = { ++ [_IOC_NR(A4L_DEVCFG)] = a4l_ioctl_devcfg, ++ [_IOC_NR(A4L_DEVINFO)] = a4l_ioctl_devinfo, ++ [_IOC_NR(A4L_SUBDINFO)] = a4l_ioctl_subdinfo, ++ [_IOC_NR(A4L_CHANINFO)] = a4l_ioctl_chaninfo, ++ [_IOC_NR(A4L_RNGINFO)] = a4l_ioctl_rnginfo, ++ [_IOC_NR(A4L_CMD)] = a4l_ioctl_cmd, ++ [_IOC_NR(A4L_CANCEL)] = a4l_ioctl_cancel, ++ [_IOC_NR(A4L_INSNLIST)] = a4l_ioctl_insnlist, ++ [_IOC_NR(A4L_INSN)] = a4l_ioctl_insn, ++ [_IOC_NR(A4L_BUFCFG)] = a4l_ioctl_bufcfg, ++ [_IOC_NR(A4L_BUFINFO)] = a4l_ioctl_bufinfo, ++ [_IOC_NR(A4L_POLL)] = a4l_ioctl_poll, ++ [_IOC_NR(A4L_MMAP)] = a4l_ioctl_mmap, ++ [_IOC_NR(A4L_NBCHANINFO)] = a4l_ioctl_nbchaninfo, ++ [_IOC_NR(A4L_NBRNGINFO)] = a4l_ioctl_nbrnginfo, ++ [_IOC_NR(A4L_BUFCFG2)] = a4l_ioctl_bufcfg2, ++ [_IOC_NR(A4L_BUFINFO2)] = a4l_ioctl_bufinfo2 ++}; ++ ++#ifdef CONFIG_PROC_FS ++struct proc_dir_entry *a4l_proc_root; ++ ++static int a4l_proc_devs_open(struct inode *inode, struct file *file) ++{ ++ return single_open(file, a4l_rdproc_devs, NULL); ++} ++ ++static const struct file_operations a4l_proc_devs_ops = { ++ .open = a4l_proc_devs_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = single_release, ++}; ++ ++static int a4l_proc_drvs_open(struct inode *inode, struct file *file) ++{ ++ return single_open(file, a4l_rdproc_drvs, NULL); ++} ++ ++static const struct file_operations a4l_proc_drvs_ops = { ++ .open = a4l_proc_drvs_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = single_release, ++}; ++ ++int a4l_init_proc(void) ++{ ++ int ret = 0; ++ struct proc_dir_entry *entry; ++ ++ /* Creates the global directory */ ++ a4l_proc_root = proc_mkdir("analogy", NULL); ++ if (a4l_proc_root == NULL) { ++ __a4l_err("a4l_proc_init: " ++ "failed to create /proc/analogy\n"); ++ return -ENOMEM; ++ } ++ ++ /* Creates the devices related file */ ++ entry = proc_create("devices", 0444, a4l_proc_root, ++ &a4l_proc_devs_ops); ++ if (entry == NULL) { ++ __a4l_err("a4l_proc_init: " ++ "failed to create /proc/analogy/devices\n"); ++ ret = -ENOMEM; ++ goto err_proc_init; ++ } ++ ++ /* Creates the drivers related file */ ++ entry = proc_create("drivers", 0444, a4l_proc_root, ++ &a4l_proc_drvs_ops); ++ if (entry == NULL) { ++ __a4l_err("a4l_proc_init: " ++ "failed to create /proc/analogy/drivers\n"); ++ ret = -ENOMEM; ++ goto err_proc_init; ++ } ++ ++ return 0; ++ ++err_proc_init: ++ remove_proc_entry("devices", a4l_proc_root); ++ remove_proc_entry("analogy", NULL); ++ return ret; ++} ++ ++void a4l_cleanup_proc(void) ++{ ++ remove_proc_entry("drivers", a4l_proc_root); ++ remove_proc_entry("devices", a4l_proc_root); ++ remove_proc_entry("analogy", NULL); ++} ++ ++#else /* !CONFIG_PROC_FS */ ++ ++#define a4l_init_proc() 0 ++#define a4l_cleanup_proc() ++ ++#endif /* CONFIG_PROC_FS */ ++ ++int a4l_open(struct rtdm_fd *fd, int flags) ++{ ++ struct a4l_device_context *cxt = (struct a4l_device_context *)rtdm_fd_to_private(fd); ++ ++ /* Get a pointer on the selected device (thanks to minor index) */ ++ a4l_set_dev(cxt); ++ ++ /* Initialize the buffer structure */ ++ cxt->buffer = rtdm_malloc(sizeof(struct a4l_buffer)); ++ ++ a4l_init_buffer(cxt->buffer); ++ /* Allocate the asynchronous buffer ++ NOTE: it should be interesting to allocate the buffer only ++ on demand especially if the system is short of memory */ ++ if (cxt->dev->transfer.default_bufsize) ++ a4l_alloc_buffer(cxt->buffer, ++ cxt->dev->transfer.default_bufsize); ++ ++ __a4l_dbg(1, core_dbg, "cxt=%p cxt->buf=%p, cxt->buf->buf=%p\n", ++ cxt, cxt->buffer, cxt->buffer->buf); ++ ++ return 0; ++} ++ ++void a4l_close(struct rtdm_fd *fd) ++{ ++ struct a4l_device_context *cxt = (struct a4l_device_context *)rtdm_fd_to_private(fd); ++ ++ /* Cancel the maybe occuring asynchronous transfer */ ++ a4l_cancel_buffer(cxt); ++ ++ /* Free the buffer which was linked with this context and... */ ++ a4l_free_buffer(cxt->buffer); ++ ++ /* ...free the other buffer resources (sync) and... */ ++ a4l_cleanup_buffer(cxt->buffer); ++ ++ /* ...free the structure */ ++ rtdm_free(cxt->buffer); ++} ++ ++ssize_t a4l_read(struct rtdm_fd *fd, void *buf, size_t nbytes) ++{ ++ struct a4l_device_context *cxt = (struct a4l_device_context *)rtdm_fd_to_private(fd); ++ ++ /* Jump into the RT domain if possible */ ++ if (!rtdm_in_rt_context() && rtdm_rt_capable(fd)) ++ return -ENOSYS; ++ ++ if (nbytes == 0) ++ return 0; ++ ++ return a4l_read_buffer(cxt, buf, nbytes); ++} ++ ++ssize_t a4l_write(struct rtdm_fd *fd, const void *buf, size_t nbytes) ++{ ++ struct a4l_device_context *cxt = (struct a4l_device_context *)rtdm_fd_to_private(fd); ++ ++ /* Jump into the RT domain if possible */ ++ if (!rtdm_in_rt_context() && rtdm_rt_capable(fd)) ++ return -ENOSYS; ++ ++ if (nbytes == 0) ++ return 0; ++ ++ return a4l_write_buffer(cxt, buf, nbytes); ++} ++ ++int a4l_ioctl(struct rtdm_fd *fd, unsigned int request, void *arg) ++{ ++ struct a4l_device_context *cxt = (struct a4l_device_context *)rtdm_fd_to_private(fd); ++ ++ return a4l_ioctl_functions[_IOC_NR(request)] (cxt, arg); ++} ++ ++int a4l_rt_select(struct rtdm_fd *fd, ++ rtdm_selector_t *selector, ++ enum rtdm_selecttype type, unsigned fd_index) ++{ ++ struct a4l_device_context *cxt = (struct a4l_device_context *)rtdm_fd_to_private(fd); ++ ++ return a4l_select(cxt, selector, type, fd_index); ++} ++ ++static struct rtdm_driver analogy_driver = { ++ .profile_info = RTDM_PROFILE_INFO(analogy, ++ RTDM_CLASS_EXPERIMENTAL, ++ RTDM_SUBCLASS_ANALOGY, ++ 0), ++ .device_flags = RTDM_NAMED_DEVICE, ++ .device_count = A4L_NB_DEVICES, ++ .context_size = sizeof(struct a4l_device_context), ++ .ops = { ++ .open = a4l_open, ++ .close = a4l_close, ++ .ioctl_rt = a4l_ioctl, ++ .read_rt = a4l_read, ++ .write_rt = a4l_write, ++ .ioctl_nrt = a4l_ioctl, ++ .read_nrt = a4l_read, ++ .write_nrt = a4l_write, ++ .select = a4l_rt_select, ++ }, ++}; ++ ++static struct rtdm_device rtdm_devs[A4L_NB_DEVICES] = { ++ [0 ... A4L_NB_DEVICES - 1] = { ++ .driver = &analogy_driver, ++ .label = "analogy%d", ++ } ++}; ++ ++int a4l_register(void) ++{ ++ int i, ret; ++ ++ for (i = 0; i < A4L_NB_DEVICES; i++) { ++ ret = rtdm_dev_register(rtdm_devs + i); ++ if (ret) ++ goto fail; ++ } ++ ++ return 0; ++fail: ++ while (i-- > 0) ++ rtdm_dev_unregister(rtdm_devs + i); ++ ++ return ret; ++} ++ ++void a4l_unregister(void) ++{ ++ int i; ++ for (i = 0; i < A4L_NB_DEVICES; i++) ++ rtdm_dev_unregister(&(rtdm_devs[i])); ++} ++ ++static int __init a4l_init(void) ++{ ++ int ret; ++ ++ if (!rtdm_available()) ++ return -ENOSYS; ++ ++ /* Initializes the devices */ ++ a4l_init_devs(); ++ ++ /* Initializes Analogy time management */ ++ a4l_init_time(); ++ ++ /* Registers RTDM / fops interface */ ++ ret = a4l_register(); ++ if (ret != 0) { ++ a4l_unregister(); ++ goto out_a4l_init; ++ } ++ ++ /* Initializes Analogy proc layer */ ++ ret = a4l_init_proc(); ++ ++out_a4l_init: ++ return ret; ++} ++ ++static void __exit a4l_cleanup(void) ++{ ++ /* Removes Analogy proc files */ ++ a4l_cleanup_proc(); ++ ++ /* Unregisters RTDM / fops interface */ ++ a4l_unregister(); ++} ++ ++module_init(a4l_init); ++module_exit(a4l_cleanup); +--- linux/drivers/xenomai/analogy/instruction.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/analogy/instruction.c 2021-04-07 16:01:27.786633367 +0800 +@@ -0,0 +1,427 @@ ++/* ++ * Analogy for Linux, instruction related features ++ * ++ * Copyright (C) 1997-2000 David A. Schleef ++ * Copyright (C) 2008 Alexis Berlemont ++ * ++ * Xenomai is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++int a4l_do_insn_gettime(struct a4l_kernel_instruction * dsc) ++{ ++ nanosecs_abs_t ns; ++ uint32_t ns2; ++ ++ unsigned int *data = (unsigned int *)dsc->data; ++ ++ /* Basic checkings */ ++ if (dsc->data_size != 2 * sizeof(unsigned int)) { ++ __a4l_err("a4l_do_insn_gettime: data size should be 2\n"); ++ return -EINVAL; ++ } ++ ++ /* Get a timestamp */ ++ ns = a4l_get_time(); ++ ++ /* Perform the conversion */ ++ ns2 = do_div(ns, 1000000000); ++ data[0] = (unsigned int) ns; ++ data[1] = (unsigned int) ns2 / 1000; ++ ++ return 0; ++} ++ ++int a4l_do_insn_wait(struct a4l_kernel_instruction * dsc) ++{ ++ unsigned int us; ++ unsigned int *data = (unsigned int *)dsc->data; ++ ++ /* Basic checkings */ ++ if (dsc->data_size != sizeof(unsigned int)) { ++ __a4l_err("a4l_do_insn_wait: data size should be 1\n"); ++ return -EINVAL; ++ } ++ ++ if (data[0] > A4L_INSN_WAIT_MAX) { ++ __a4l_err("a4l_do_insn_wait: wait duration is out of range\n"); ++ return -EINVAL; ++ } ++ ++ /* As we use (a4l_)udelay, we have to convert the delay into ++ microseconds */ ++ us = data[0] / 1000; ++ ++ /* At least, the delay is rounded up to 1 microsecond */ ++ if (us == 0) ++ us = 1; ++ ++ /* Performs the busy waiting */ ++ a4l_udelay(us); ++ ++ return 0; ++} ++ ++int a4l_do_insn_trig(struct a4l_device_context * cxt, struct a4l_kernel_instruction * dsc) ++{ ++ struct a4l_subdevice *subd; ++ struct a4l_device *dev = a4l_get_dev(cxt); ++ unsigned int trignum; ++ unsigned int *data = (unsigned int*)dsc->data; ++ ++ /* Basic checkings */ ++ if (dsc->data_size > 1) { ++ __a4l_err("a4l_do_insn_trig: data size should not be > 1\n"); ++ return -EINVAL; ++ } ++ ++ trignum = (dsc->data_size == sizeof(unsigned int)) ? data[0] : 0; ++ ++ if (dsc->idx_subd >= dev->transfer.nb_subd) { ++ __a4l_err("a4l_do_insn_trig: " ++ "subdevice index is out of range\n"); ++ return -EINVAL; ++ } ++ ++ subd = dev->transfer.subds[dsc->idx_subd]; ++ ++ /* Checks that the concerned subdevice is trigger-compliant */ ++ if ((subd->flags & A4L_SUBD_CMD) == 0 || subd->trigger == NULL) { ++ __a4l_err("a4l_do_insn_trig: subdevice does not support " ++ "triggering or asynchronous acquisition\n"); ++ return -EINVAL; ++ } ++ ++ /* Performs the trigger */ ++ return subd->trigger(subd, trignum); ++} ++ ++int a4l_fill_insndsc(struct a4l_device_context * cxt, struct a4l_kernel_instruction * dsc, void *arg) ++{ ++ struct rtdm_fd *fd = rtdm_private_to_fd(cxt); ++ int ret = 0; ++ void *tmp_data = NULL; ++ ++ ret = rtdm_safe_copy_from_user(fd, ++ dsc, arg, sizeof(a4l_insn_t)); ++ if (ret != 0) ++ goto out_insndsc; ++ ++ if (dsc->data_size != 0 && dsc->data == NULL) { ++ __a4l_err("a4l_fill_insndsc: no data pointer specified\n"); ++ ret = -EINVAL; ++ goto out_insndsc; ++ } ++ ++ if (dsc->data_size != 0 && dsc->data != NULL) { ++ tmp_data = rtdm_malloc(dsc->data_size); ++ if (tmp_data == NULL) { ++ ret = -ENOMEM; ++ goto out_insndsc; ++ } ++ ++ if ((dsc->type & A4L_INSN_MASK_WRITE) != 0) { ++ ret = rtdm_safe_copy_from_user(fd, ++ tmp_data, dsc->data, ++ dsc->data_size); ++ if (ret < 0) ++ goto out_insndsc; ++ } ++ } ++ ++ dsc->__udata = dsc->data; ++ dsc->data = tmp_data; ++ ++out_insndsc: ++ ++ if (ret != 0 && tmp_data != NULL) ++ rtdm_free(tmp_data); ++ ++ return ret; ++} ++ ++int a4l_free_insndsc(struct a4l_device_context * cxt, struct a4l_kernel_instruction * dsc) ++{ ++ struct rtdm_fd *fd = rtdm_private_to_fd(cxt); ++ int ret = 0; ++ ++ if ((dsc->type & A4L_INSN_MASK_READ) != 0) ++ ret = rtdm_safe_copy_to_user(fd, ++ dsc->__udata, ++ dsc->data, dsc->data_size); ++ ++ if (dsc->data != NULL) ++ rtdm_free(dsc->data); ++ ++ return ret; ++} ++ ++int a4l_do_special_insn(struct a4l_device_context * cxt, struct a4l_kernel_instruction * dsc) ++{ ++ int ret = 0; ++ ++ switch (dsc->type) { ++ case A4L_INSN_GTOD: ++ ret = a4l_do_insn_gettime(dsc); ++ break; ++ case A4L_INSN_WAIT: ++ ret = a4l_do_insn_wait(dsc); ++ break; ++ case A4L_INSN_INTTRIG: ++ ret = a4l_do_insn_trig(cxt, dsc); ++ break; ++ default: ++ __a4l_err("a4l_do_special_insn: " ++ "incoherent instruction code\n"); ++ return -EINVAL; ++ } ++ ++ if (ret < 0) ++ __a4l_err("a4l_do_special_insn: " ++ "execution of the instruction failed (err=%d)\n", ++ ret); ++ ++ return ret; ++} ++ ++int a4l_do_insn(struct a4l_device_context * cxt, struct a4l_kernel_instruction * dsc) ++{ ++ int ret = 0; ++ struct a4l_subdevice *subd; ++ struct a4l_device *dev = a4l_get_dev(cxt); ++ int (*hdlr) (struct a4l_subdevice *, struct a4l_kernel_instruction *) = NULL; ++ ++ /* Checks the subdevice index */ ++ if (dsc->idx_subd >= dev->transfer.nb_subd) { ++ __a4l_err("a4l_do_insn: " ++ "subdevice index out of range (idx=%d)\n", ++ dsc->idx_subd); ++ return -EINVAL; ++ } ++ ++ /* Recovers pointers on the proper subdevice */ ++ subd = dev->transfer.subds[dsc->idx_subd]; ++ ++ /* Checks the subdevice's characteristics */ ++ if ((subd->flags & A4L_SUBD_TYPES) == A4L_SUBD_UNUSED) { ++ __a4l_err("a4l_do_insn: wrong subdevice selected\n"); ++ return -EINVAL; ++ } ++ ++ /* Checks the channel descriptor */ ++ if ((subd->flags & A4L_SUBD_TYPES) != A4L_SUBD_CALIB) { ++ ret = a4l_check_chanlist(dev->transfer.subds[dsc->idx_subd], ++ 1, &dsc->chan_desc); ++ if (ret < 0) ++ return ret; ++ } ++ ++ /* Choose the proper handler, we can check the pointer because ++ the subdevice was memset to 0 at allocation time */ ++ switch (dsc->type) { ++ case A4L_INSN_READ: ++ hdlr = subd->insn_read; ++ break; ++ case A4L_INSN_WRITE: ++ hdlr = subd->insn_write; ++ break; ++ case A4L_INSN_BITS: ++ hdlr = subd->insn_bits; ++ break; ++ case A4L_INSN_CONFIG: ++ hdlr = subd->insn_config; ++ break; ++ default: ++ ret = -EINVAL; ++ } ++ ++ /* We check the instruction type */ ++ if (ret < 0) ++ return ret; ++ ++ /* We check whether a handler is available */ ++ if (hdlr == NULL) ++ return -ENOSYS; ++ ++ /* Prevents the subdevice from being used during ++ the following operations */ ++ if (test_and_set_bit(A4L_SUBD_BUSY_NR, &subd->status)) { ++ ret = -EBUSY; ++ goto out_do_insn; ++ } ++ ++ /* Let's the driver-specific code perform the instruction */ ++ ret = hdlr(subd, dsc); ++ ++ if (ret < 0) ++ __a4l_err("a4l_do_insn: " ++ "execution of the instruction failed (err=%d)\n", ++ ret); ++ ++out_do_insn: ++ ++ /* Releases the subdevice from its reserved state */ ++ clear_bit(A4L_SUBD_BUSY_NR, &subd->status); ++ ++ return ret; ++} ++ ++int a4l_ioctl_insn(struct a4l_device_context * cxt, void *arg) ++{ ++ struct rtdm_fd *fd = rtdm_private_to_fd(cxt); ++ int ret = 0; ++ struct a4l_kernel_instruction insn; ++ struct a4l_device *dev = a4l_get_dev(cxt); ++ ++ if (!rtdm_in_rt_context() && rtdm_rt_capable(fd)) ++ return -ENOSYS; ++ ++ /* Basic checking */ ++ if (!test_bit(A4L_DEV_ATTACHED_NR, &dev->flags)) { ++ __a4l_err("a4l_ioctl_insn: unattached device\n"); ++ return -EINVAL; ++ } ++ ++ /* Recovers the instruction descriptor */ ++ ret = a4l_fill_insndsc(cxt, &insn, arg); ++ if (ret != 0) ++ goto err_ioctl_insn; ++ ++ /* Performs the instruction */ ++ if ((insn.type & A4L_INSN_MASK_SPECIAL) != 0) ++ ret = a4l_do_special_insn(cxt, &insn); ++ else ++ ret = a4l_do_insn(cxt, &insn); ++ ++ if (ret < 0) ++ goto err_ioctl_insn; ++ ++ /* Frees the used memory and sends back some ++ data, if need be */ ++ ret = a4l_free_insndsc(cxt, &insn); ++ ++ return ret; ++ ++err_ioctl_insn: ++ a4l_free_insndsc(cxt, &insn); ++ return ret; ++} ++ ++int a4l_fill_ilstdsc(struct a4l_device_context * cxt, struct a4l_kernel_instruction_list * dsc, void *arg) ++{ ++ struct rtdm_fd *fd = rtdm_private_to_fd(cxt); ++ int i, ret = 0; ++ ++ dsc->insns = NULL; ++ ++ /* Recovers the structure from user space */ ++ ret = rtdm_safe_copy_from_user(fd, ++ dsc, arg, sizeof(a4l_insnlst_t)); ++ if (ret < 0) ++ return ret; ++ ++ /* Some basic checking */ ++ if (dsc->count == 0) { ++ __a4l_err("a4l_fill_ilstdsc: instruction list's count is 0\n"); ++ return -EINVAL; ++ } ++ ++ /* Keeps the user pointer in an opaque field */ ++ dsc->__uinsns = (a4l_insn_t *)dsc->insns; ++ ++ dsc->insns = rtdm_malloc(dsc->count * sizeof(struct a4l_kernel_instruction)); ++ if (dsc->insns == NULL) ++ return -ENOMEM; ++ ++ /* Recovers the instructions, one by one. This part is not ++ optimized */ ++ for (i = 0; i < dsc->count && ret == 0; i++) ++ ret = a4l_fill_insndsc(cxt, ++ &(dsc->insns[i]), ++ &(dsc->__uinsns[i])); ++ ++ /* In case of error, frees the allocated memory */ ++ if (ret < 0 && dsc->insns != NULL) ++ rtdm_free(dsc->insns); ++ ++ return ret; ++} ++ ++int a4l_free_ilstdsc(struct a4l_device_context * cxt, struct a4l_kernel_instruction_list * dsc) ++{ ++ int i, ret = 0; ++ ++ if (dsc->insns != NULL) { ++ ++ for (i = 0; i < dsc->count && ret == 0; i++) ++ ret = a4l_free_insndsc(cxt, &(dsc->insns[i])); ++ ++ while (i < dsc->count) { ++ a4l_free_insndsc(cxt, &(dsc->insns[i])); ++ i++; ++ } ++ ++ rtdm_free(dsc->insns); ++ } ++ ++ return ret; ++} ++ ++/* This function is not optimized in terms of memory footprint and ++ CPU charge; however, the whole analogy instruction system was not ++ designed for performance issues */ ++int a4l_ioctl_insnlist(struct a4l_device_context * cxt, void *arg) ++{ ++ struct rtdm_fd *fd = rtdm_private_to_fd(cxt); ++ int i, ret = 0; ++ struct a4l_kernel_instruction_list ilst; ++ struct a4l_device *dev = a4l_get_dev(cxt); ++ ++ if (!rtdm_in_rt_context() && rtdm_rt_capable(fd)) ++ return -ENOSYS; ++ ++ /* Basic checking */ ++ if (!test_bit(A4L_DEV_ATTACHED_NR, &dev->flags)) { ++ __a4l_err("a4l_ioctl_insnlist: unattached device\n"); ++ return -EINVAL; ++ } ++ ++ if ((ret = a4l_fill_ilstdsc(cxt, &ilst, arg)) < 0) ++ return ret; ++ ++ /* Performs the instructions */ ++ for (i = 0; i < ilst.count && ret == 0; i++) { ++ if ((ilst.insns[i].type & A4L_INSN_MASK_SPECIAL) != 0) ++ ret = a4l_do_special_insn(cxt, &ilst.insns[i]); ++ else ++ ret = a4l_do_insn(cxt, &ilst.insns[i]); ++ } ++ ++ if (ret < 0) ++ goto err_ioctl_ilst; ++ ++ return a4l_free_ilstdsc(cxt, &ilst); ++ ++err_ioctl_ilst: ++ a4l_free_ilstdsc(cxt, &ilst); ++ return ret; ++} +--- linux/drivers/xenomai/analogy/driver.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/analogy/driver.c 2021-04-07 16:01:27.782633373 +0800 +@@ -0,0 +1,104 @@ ++/* ++ * Analogy for Linux, driver related features ++ * ++ * Copyright (C) 1997-2000 David A. Schleef ++ * Copyright (C) 2008 Alexis Berlemont ++ * ++ * Xenomai is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#include ++#include ++#include ++ ++#include "proc.h" ++ ++static LIST_HEAD(a4l_drvs); ++ ++/* --- Driver list management functions --- */ ++ ++int a4l_lct_drv(char *pin, struct a4l_driver ** pio) ++{ ++ struct list_head *this; ++ int ret = -EINVAL; ++ ++ __a4l_dbg(1, core_dbg, "name=%s\n", pin); ++ ++ /* Goes through the linked list so as to find ++ a driver instance with the same name */ ++ list_for_each(this, &a4l_drvs) { ++ struct a4l_driver *drv = list_entry(this, struct a4l_driver, list); ++ ++ if (strcmp(drv->board_name, pin) == 0) { ++ /* The argument pio can be NULL ++ if there is no need to retrieve the pointer */ ++ if (pio != NULL) ++ *pio = drv; ++ ret = 0; ++ break; ++ } ++ } ++ ++ return ret; ++} ++ ++int a4l_register_drv(struct a4l_driver * drv) ++{ ++ if (!rtdm_available()) ++ return -ENOSYS; ++ ++ __a4l_dbg(1, core_dbg, "board name=%s\n", drv->board_name); ++ ++ if (a4l_lct_drv(drv->board_name, NULL) != 0) { ++ list_add(&drv->list, &a4l_drvs); ++ return 0; ++ } else ++ return -EINVAL; ++} ++ ++int a4l_unregister_drv(struct a4l_driver * drv) ++{ ++ __a4l_dbg(1, core_dbg, "board name=%s\n", drv->board_name); ++ ++ if (a4l_lct_drv(drv->board_name, NULL) == 0) { ++ /* Here, we consider the argument is pointing ++ to a real driver struct (not a blank structure ++ with only the name field properly set */ ++ list_del(&drv->list); ++ return 0; ++ } else ++ return -EINVAL; ++} ++ ++#ifdef CONFIG_PROC_FS ++ ++/* --- Driver list proc section --- */ ++ ++int a4l_rdproc_drvs(struct seq_file *p, void *data) ++{ ++ int i = 0; ++ struct list_head *this; ++ ++ seq_printf(p, "-- Analogy drivers --\n\n"); ++ ++ seq_printf(p, "| idx | board name \n"); ++ ++ list_for_each(this, &a4l_drvs) { ++ struct a4l_driver *drv = list_entry(this, struct a4l_driver, list); ++ seq_printf(p, "| %02d | %s \n", i++, drv->board_name); ++ } ++ return 0; ++} ++ ++#endif /* CONFIG_PROC_FS */ +--- linux/drivers/xenomai/net/drivers/rt_macb.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/rt_macb.h 2021-04-07 16:01:27.777633380 +0800 +@@ -0,0 +1,624 @@ ++/* ++ * Atmel MACB Ethernet Controller driver ++ * ++ * Copyright (C) 2004-2006 Atmel Corporation ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++#ifndef _MACB_H ++#define _MACB_H ++ ++#define MACB_GREGS_NBR 16 ++#define MACB_GREGS_VERSION 1 ++ ++/* MACB register offsets */ ++#define MACB_NCR 0x0000 ++#define MACB_NCFGR 0x0004 ++#define MACB_NSR 0x0008 ++#define MACB_TAR 0x000c /* AT91RM9200 only */ ++#define MACB_TCR 0x0010 /* AT91RM9200 only */ ++#define MACB_TSR 0x0014 ++#define MACB_RBQP 0x0018 ++#define MACB_TBQP 0x001c ++#define MACB_RSR 0x0020 ++#define MACB_ISR 0x0024 ++#define MACB_IER 0x0028 ++#define MACB_IDR 0x002c ++#define MACB_IMR 0x0030 ++#define MACB_MAN 0x0034 ++#define MACB_PTR 0x0038 ++#define MACB_PFR 0x003c ++#define MACB_FTO 0x0040 ++#define MACB_SCF 0x0044 ++#define MACB_MCF 0x0048 ++#define MACB_FRO 0x004c ++#define MACB_FCSE 0x0050 ++#define MACB_ALE 0x0054 ++#define MACB_DTF 0x0058 ++#define MACB_LCOL 0x005c ++#define MACB_EXCOL 0x0060 ++#define MACB_TUND 0x0064 ++#define MACB_CSE 0x0068 ++#define MACB_RRE 0x006c ++#define MACB_ROVR 0x0070 ++#define MACB_RSE 0x0074 ++#define MACB_ELE 0x0078 ++#define MACB_RJA 0x007c ++#define MACB_USF 0x0080 ++#define MACB_STE 0x0084 ++#define MACB_RLE 0x0088 ++#define MACB_TPF 0x008c ++#define MACB_HRB 0x0090 ++#define MACB_HRT 0x0094 ++#define MACB_SA1B 0x0098 ++#define MACB_SA1T 0x009c ++#define MACB_SA2B 0x00a0 ++#define MACB_SA2T 0x00a4 ++#define MACB_SA3B 0x00a8 ++#define MACB_SA3T 0x00ac ++#define MACB_SA4B 0x00b0 ++#define MACB_SA4T 0x00b4 ++#define MACB_TID 0x00b8 ++#define MACB_TPQ 0x00bc ++#define MACB_USRIO 0x00c0 ++#define MACB_WOL 0x00c4 ++#define MACB_MID 0x00fc ++ ++/* GEM register offsets. */ ++#define GEM_NCFGR 0x0004 ++#define GEM_USRIO 0x000c ++#define GEM_DMACFG 0x0010 ++#define GEM_HRB 0x0080 ++#define GEM_HRT 0x0084 ++#define GEM_SA1B 0x0088 ++#define GEM_SA1T 0x008C ++#define GEM_SA2B 0x0090 ++#define GEM_SA2T 0x0094 ++#define GEM_SA3B 0x0098 ++#define GEM_SA3T 0x009C ++#define GEM_SA4B 0x00A0 ++#define GEM_SA4T 0x00A4 ++#define GEM_OTX 0x0100 ++#define GEM_DCFG1 0x0280 ++#define GEM_DCFG2 0x0284 ++#define GEM_DCFG3 0x0288 ++#define GEM_DCFG4 0x028c ++#define GEM_DCFG5 0x0290 ++#define GEM_DCFG6 0x0294 ++#define GEM_DCFG7 0x0298 ++ ++/* Bitfields in NCR */ ++#define MACB_LB_OFFSET 0 ++#define MACB_LB_SIZE 1 ++#define MACB_LLB_OFFSET 1 ++#define MACB_LLB_SIZE 1 ++#define MACB_RE_OFFSET 2 ++#define MACB_RE_SIZE 1 ++#define MACB_TE_OFFSET 3 ++#define MACB_TE_SIZE 1 ++#define MACB_MPE_OFFSET 4 ++#define MACB_MPE_SIZE 1 ++#define MACB_CLRSTAT_OFFSET 5 ++#define MACB_CLRSTAT_SIZE 1 ++#define MACB_INCSTAT_OFFSET 6 ++#define MACB_INCSTAT_SIZE 1 ++#define MACB_WESTAT_OFFSET 7 ++#define MACB_WESTAT_SIZE 1 ++#define MACB_BP_OFFSET 8 ++#define MACB_BP_SIZE 1 ++#define MACB_TSTART_OFFSET 9 ++#define MACB_TSTART_SIZE 1 ++#define MACB_THALT_OFFSET 10 ++#define MACB_THALT_SIZE 1 ++#define MACB_NCR_TPF_OFFSET 11 ++#define MACB_NCR_TPF_SIZE 1 ++#define MACB_TZQ_OFFSET 12 ++#define MACB_TZQ_SIZE 1 ++ ++/* Bitfields in NCFGR */ ++#define MACB_SPD_OFFSET 0 ++#define MACB_SPD_SIZE 1 ++#define MACB_FD_OFFSET 1 ++#define MACB_FD_SIZE 1 ++#define MACB_BIT_RATE_OFFSET 2 ++#define MACB_BIT_RATE_SIZE 1 ++#define MACB_JFRAME_OFFSET 3 ++#define MACB_JFRAME_SIZE 1 ++#define MACB_CAF_OFFSET 4 ++#define MACB_CAF_SIZE 1 ++#define MACB_NBC_OFFSET 5 ++#define MACB_NBC_SIZE 1 ++#define MACB_NCFGR_MTI_OFFSET 6 ++#define MACB_NCFGR_MTI_SIZE 1 ++#define MACB_UNI_OFFSET 7 ++#define MACB_UNI_SIZE 1 ++#define MACB_BIG_OFFSET 8 ++#define MACB_BIG_SIZE 1 ++#define MACB_EAE_OFFSET 9 ++#define MACB_EAE_SIZE 1 ++#define MACB_CLK_OFFSET 10 ++#define MACB_CLK_SIZE 2 ++#define MACB_RTY_OFFSET 12 ++#define MACB_RTY_SIZE 1 ++#define MACB_PAE_OFFSET 13 ++#define MACB_PAE_SIZE 1 ++#define MACB_RM9200_RMII_OFFSET 13 /* AT91RM9200 only */ ++#define MACB_RM9200_RMII_SIZE 1 /* AT91RM9200 only */ ++#define MACB_RBOF_OFFSET 14 ++#define MACB_RBOF_SIZE 2 ++#define MACB_RLCE_OFFSET 16 ++#define MACB_RLCE_SIZE 1 ++#define MACB_DRFCS_OFFSET 17 ++#define MACB_DRFCS_SIZE 1 ++#define MACB_EFRHD_OFFSET 18 ++#define MACB_EFRHD_SIZE 1 ++#define MACB_IRXFCS_OFFSET 19 ++#define MACB_IRXFCS_SIZE 1 ++ ++/* GEM specific NCFGR bitfields. */ ++#define GEM_GBE_OFFSET 10 ++#define GEM_GBE_SIZE 1 ++#define GEM_CLK_OFFSET 18 ++#define GEM_CLK_SIZE 3 ++#define GEM_DBW_OFFSET 21 ++#define GEM_DBW_SIZE 2 ++ ++/* Constants for data bus width. */ ++#define GEM_DBW32 0 ++#define GEM_DBW64 1 ++#define GEM_DBW128 2 ++ ++/* Bitfields in DMACFG. */ ++#define GEM_FBLDO_OFFSET 0 ++#define GEM_FBLDO_SIZE 5 ++#define GEM_ENDIA_OFFSET 7 ++#define GEM_ENDIA_SIZE 1 ++#define GEM_RXBMS_OFFSET 8 ++#define GEM_RXBMS_SIZE 2 ++#define GEM_TXPBMS_OFFSET 10 ++#define GEM_TXPBMS_SIZE 1 ++#define GEM_TXCOEN_OFFSET 11 ++#define GEM_TXCOEN_SIZE 1 ++#define GEM_RXBS_OFFSET 16 ++#define GEM_RXBS_SIZE 8 ++#define GEM_DDRP_OFFSET 24 ++#define GEM_DDRP_SIZE 1 ++ ++ ++/* Bitfields in NSR */ ++#define MACB_NSR_LINK_OFFSET 0 ++#define MACB_NSR_LINK_SIZE 1 ++#define MACB_MDIO_OFFSET 1 ++#define MACB_MDIO_SIZE 1 ++#define MACB_IDLE_OFFSET 2 ++#define MACB_IDLE_SIZE 1 ++ ++/* Bitfields in TSR */ ++#define MACB_UBR_OFFSET 0 ++#define MACB_UBR_SIZE 1 ++#define MACB_COL_OFFSET 1 ++#define MACB_COL_SIZE 1 ++#define MACB_TSR_RLE_OFFSET 2 ++#define MACB_TSR_RLE_SIZE 1 ++#define MACB_TGO_OFFSET 3 ++#define MACB_TGO_SIZE 1 ++#define MACB_BEX_OFFSET 4 ++#define MACB_BEX_SIZE 1 ++#define MACB_RM9200_BNQ_OFFSET 4 /* AT91RM9200 only */ ++#define MACB_RM9200_BNQ_SIZE 1 /* AT91RM9200 only */ ++#define MACB_COMP_OFFSET 5 ++#define MACB_COMP_SIZE 1 ++#define MACB_UND_OFFSET 6 ++#define MACB_UND_SIZE 1 ++ ++/* Bitfields in RSR */ ++#define MACB_BNA_OFFSET 0 ++#define MACB_BNA_SIZE 1 ++#define MACB_REC_OFFSET 1 ++#define MACB_REC_SIZE 1 ++#define MACB_OVR_OFFSET 2 ++#define MACB_OVR_SIZE 1 ++ ++/* Bitfields in ISR/IER/IDR/IMR */ ++#define MACB_MFD_OFFSET 0 ++#define MACB_MFD_SIZE 1 ++#define MACB_RCOMP_OFFSET 1 ++#define MACB_RCOMP_SIZE 1 ++#define MACB_RXUBR_OFFSET 2 ++#define MACB_RXUBR_SIZE 1 ++#define MACB_TXUBR_OFFSET 3 ++#define MACB_TXUBR_SIZE 1 ++#define MACB_ISR_TUND_OFFSET 4 ++#define MACB_ISR_TUND_SIZE 1 ++#define MACB_ISR_RLE_OFFSET 5 ++#define MACB_ISR_RLE_SIZE 1 ++#define MACB_TXERR_OFFSET 6 ++#define MACB_TXERR_SIZE 1 ++#define MACB_TCOMP_OFFSET 7 ++#define MACB_TCOMP_SIZE 1 ++#define MACB_ISR_LINK_OFFSET 9 ++#define MACB_ISR_LINK_SIZE 1 ++#define MACB_ISR_ROVR_OFFSET 10 ++#define MACB_ISR_ROVR_SIZE 1 ++#define MACB_HRESP_OFFSET 11 ++#define MACB_HRESP_SIZE 1 ++#define MACB_PFR_OFFSET 12 ++#define MACB_PFR_SIZE 1 ++#define MACB_PTZ_OFFSET 13 ++#define MACB_PTZ_SIZE 1 ++ ++/* Bitfields in MAN */ ++#define MACB_DATA_OFFSET 0 ++#define MACB_DATA_SIZE 16 ++#define MACB_CODE_OFFSET 16 ++#define MACB_CODE_SIZE 2 ++#define MACB_REGA_OFFSET 18 ++#define MACB_REGA_SIZE 5 ++#define MACB_PHYA_OFFSET 23 ++#define MACB_PHYA_SIZE 5 ++#define MACB_RW_OFFSET 28 ++#define MACB_RW_SIZE 2 ++#define MACB_SOF_OFFSET 30 ++#define MACB_SOF_SIZE 2 ++ ++/* Bitfields in USRIO (AVR32) */ ++#define MACB_MII_OFFSET 0 ++#define MACB_MII_SIZE 1 ++#define MACB_EAM_OFFSET 1 ++#define MACB_EAM_SIZE 1 ++#define MACB_TX_PAUSE_OFFSET 2 ++#define MACB_TX_PAUSE_SIZE 1 ++#define MACB_TX_PAUSE_ZERO_OFFSET 3 ++#define MACB_TX_PAUSE_ZERO_SIZE 1 ++ ++/* Bitfields in USRIO (AT91) */ ++#define MACB_RMII_OFFSET 0 ++#define MACB_RMII_SIZE 1 ++#define GEM_RGMII_OFFSET 0 /* GEM gigabit mode */ ++#define GEM_RGMII_SIZE 1 ++#define MACB_CLKEN_OFFSET 1 ++#define MACB_CLKEN_SIZE 1 ++ ++/* Bitfields in WOL */ ++#define MACB_IP_OFFSET 0 ++#define MACB_IP_SIZE 16 ++#define MACB_MAG_OFFSET 16 ++#define MACB_MAG_SIZE 1 ++#define MACB_ARP_OFFSET 17 ++#define MACB_ARP_SIZE 1 ++#define MACB_SA1_OFFSET 18 ++#define MACB_SA1_SIZE 1 ++#define MACB_WOL_MTI_OFFSET 19 ++#define MACB_WOL_MTI_SIZE 1 ++ ++/* Bitfields in MID */ ++#define MACB_IDNUM_OFFSET 16 ++#define MACB_IDNUM_SIZE 16 ++#define MACB_REV_OFFSET 0 ++#define MACB_REV_SIZE 16 ++ ++/* Bitfields in DCFG1. */ ++#define GEM_IRQCOR_OFFSET 23 ++#define GEM_IRQCOR_SIZE 1 ++#define GEM_DBWDEF_OFFSET 25 ++#define GEM_DBWDEF_SIZE 3 ++ ++/* Constants for CLK */ ++#define MACB_CLK_DIV8 0 ++#define MACB_CLK_DIV16 1 ++#define MACB_CLK_DIV32 2 ++#define MACB_CLK_DIV64 3 ++ ++/* GEM specific constants for CLK. */ ++#define GEM_CLK_DIV8 0 ++#define GEM_CLK_DIV16 1 ++#define GEM_CLK_DIV32 2 ++#define GEM_CLK_DIV48 3 ++#define GEM_CLK_DIV64 4 ++#define GEM_CLK_DIV96 5 ++ ++/* Constants for MAN register */ ++#define MACB_MAN_SOF 1 ++#define MACB_MAN_WRITE 1 ++#define MACB_MAN_READ 2 ++#define MACB_MAN_CODE 2 ++ ++/* Capability mask bits */ ++#define MACB_CAPS_ISR_CLEAR_ON_WRITE 0x1 ++ ++/* Bit manipulation macros */ ++#define MACB_BIT(name) \ ++ (1 << MACB_##name##_OFFSET) ++#define MACB_BF(name,value) \ ++ (((value) & ((1 << MACB_##name##_SIZE) - 1)) \ ++ << MACB_##name##_OFFSET) ++#define MACB_BFEXT(name,value)\ ++ (((value) >> MACB_##name##_OFFSET) \ ++ & ((1 << MACB_##name##_SIZE) - 1)) ++#define MACB_BFINS(name,value,old) \ ++ (((old) & ~(((1 << MACB_##name##_SIZE) - 1) \ ++ << MACB_##name##_OFFSET)) \ ++ | MACB_BF(name,value)) ++ ++#define GEM_BIT(name) \ ++ (1 << GEM_##name##_OFFSET) ++#define GEM_BF(name, value) \ ++ (((value) & ((1 << GEM_##name##_SIZE) - 1)) \ ++ << GEM_##name##_OFFSET) ++#define GEM_BFEXT(name, value)\ ++ (((value) >> GEM_##name##_OFFSET) \ ++ & ((1 << GEM_##name##_SIZE) - 1)) ++#define GEM_BFINS(name, value, old) \ ++ (((old) & ~(((1 << GEM_##name##_SIZE) - 1) \ ++ << GEM_##name##_OFFSET)) \ ++ | GEM_BF(name, value)) ++ ++/* Register access macros */ ++#define macb_readl(port,reg) \ ++ __raw_readl((port)->regs + MACB_##reg) ++#define macb_writel(port,reg,value) \ ++ __raw_writel((value), (port)->regs + MACB_##reg) ++#define gem_readl(port, reg) \ ++ __raw_readl((port)->regs + GEM_##reg) ++#define gem_writel(port, reg, value) \ ++ __raw_writel((value), (port)->regs + GEM_##reg) ++ ++/* ++ * Conditional GEM/MACB macros. These perform the operation to the correct ++ * register dependent on whether the device is a GEM or a MACB. For registers ++ * and bitfields that are common across both devices, use macb_{read,write}l ++ * to avoid the cost of the conditional. ++ */ ++#define macb_or_gem_writel(__bp, __reg, __value) \ ++ ({ \ ++ if (macb_is_gem((__bp))) \ ++ gem_writel((__bp), __reg, __value); \ ++ else \ ++ macb_writel((__bp), __reg, __value); \ ++ }) ++ ++#define macb_or_gem_readl(__bp, __reg) \ ++ ({ \ ++ u32 __v; \ ++ if (macb_is_gem((__bp))) \ ++ __v = gem_readl((__bp), __reg); \ ++ else \ ++ __v = macb_readl((__bp), __reg); \ ++ __v; \ ++ }) ++ ++/** ++ * @brief Hardware DMA descriptor ++ * @anchor macb_dma_desc ++ */ ++struct macb_dma_desc { ++ /** DMA address of data buffer */ ++ u32 addr; ++ /** Control and status bits */ ++ u32 ctrl; ++}; ++ ++/* DMA descriptor bitfields */ ++#define MACB_RX_USED_OFFSET 0 ++#define MACB_RX_USED_SIZE 1 ++#define MACB_RX_WRAP_OFFSET 1 ++#define MACB_RX_WRAP_SIZE 1 ++#define MACB_RX_WADDR_OFFSET 2 ++#define MACB_RX_WADDR_SIZE 30 ++ ++#define MACB_RX_FRMLEN_OFFSET 0 ++#define MACB_RX_FRMLEN_SIZE 12 ++#define MACB_RX_OFFSET_OFFSET 12 ++#define MACB_RX_OFFSET_SIZE 2 ++#define MACB_RX_SOF_OFFSET 14 ++#define MACB_RX_SOF_SIZE 1 ++#define MACB_RX_EOF_OFFSET 15 ++#define MACB_RX_EOF_SIZE 1 ++#define MACB_RX_CFI_OFFSET 16 ++#define MACB_RX_CFI_SIZE 1 ++#define MACB_RX_VLAN_PRI_OFFSET 17 ++#define MACB_RX_VLAN_PRI_SIZE 3 ++#define MACB_RX_PRI_TAG_OFFSET 20 ++#define MACB_RX_PRI_TAG_SIZE 1 ++#define MACB_RX_VLAN_TAG_OFFSET 21 ++#define MACB_RX_VLAN_TAG_SIZE 1 ++#define MACB_RX_TYPEID_MATCH_OFFSET 22 ++#define MACB_RX_TYPEID_MATCH_SIZE 1 ++#define MACB_RX_SA4_MATCH_OFFSET 23 ++#define MACB_RX_SA4_MATCH_SIZE 1 ++#define MACB_RX_SA3_MATCH_OFFSET 24 ++#define MACB_RX_SA3_MATCH_SIZE 1 ++#define MACB_RX_SA2_MATCH_OFFSET 25 ++#define MACB_RX_SA2_MATCH_SIZE 1 ++#define MACB_RX_SA1_MATCH_OFFSET 26 ++#define MACB_RX_SA1_MATCH_SIZE 1 ++#define MACB_RX_EXT_MATCH_OFFSET 28 ++#define MACB_RX_EXT_MATCH_SIZE 1 ++#define MACB_RX_UHASH_MATCH_OFFSET 29 ++#define MACB_RX_UHASH_MATCH_SIZE 1 ++#define MACB_RX_MHASH_MATCH_OFFSET 30 ++#define MACB_RX_MHASH_MATCH_SIZE 1 ++#define MACB_RX_BROADCAST_OFFSET 31 ++#define MACB_RX_BROADCAST_SIZE 1 ++ ++#define MACB_TX_FRMLEN_OFFSET 0 ++#define MACB_TX_FRMLEN_SIZE 11 ++#define MACB_TX_LAST_OFFSET 15 ++#define MACB_TX_LAST_SIZE 1 ++#define MACB_TX_NOCRC_OFFSET 16 ++#define MACB_TX_NOCRC_SIZE 1 ++#define MACB_TX_BUF_EXHAUSTED_OFFSET 27 ++#define MACB_TX_BUF_EXHAUSTED_SIZE 1 ++#define MACB_TX_UNDERRUN_OFFSET 28 ++#define MACB_TX_UNDERRUN_SIZE 1 ++#define MACB_TX_ERROR_OFFSET 29 ++#define MACB_TX_ERROR_SIZE 1 ++#define MACB_TX_WRAP_OFFSET 30 ++#define MACB_TX_WRAP_SIZE 1 ++#define MACB_TX_USED_OFFSET 31 ++#define MACB_TX_USED_SIZE 1 ++ ++/** ++ * @brief Data about an skb which is being transmitted ++ * @anchor macb_tx_skb ++ */ ++struct macb_tx_skb { ++ /** skb currently being transmitted */ ++ struct rtskb *skb; ++ /** DMA address of the skb's data buffer */ ++ dma_addr_t mapping; ++}; ++ ++/* ++ * Hardware-collected statistics. Used when updating the network ++ * device stats by a periodic timer. ++ */ ++struct macb_stats { ++ u32 rx_pause_frames; ++ u32 tx_ok; ++ u32 tx_single_cols; ++ u32 tx_multiple_cols; ++ u32 rx_ok; ++ u32 rx_fcs_errors; ++ u32 rx_align_errors; ++ u32 tx_deferred; ++ u32 tx_late_cols; ++ u32 tx_excessive_cols; ++ u32 tx_underruns; ++ u32 tx_carrier_errors; ++ u32 rx_resource_errors; ++ u32 rx_overruns; ++ u32 rx_symbol_errors; ++ u32 rx_oversize_pkts; ++ u32 rx_jabbers; ++ u32 rx_undersize_pkts; ++ u32 sqe_test_errors; ++ u32 rx_length_mismatch; ++ u32 tx_pause_frames; ++}; ++ ++struct gem_stats { ++ u32 tx_octets_31_0; ++ u32 tx_octets_47_32; ++ u32 tx_frames; ++ u32 tx_broadcast_frames; ++ u32 tx_multicast_frames; ++ u32 tx_pause_frames; ++ u32 tx_64_byte_frames; ++ u32 tx_65_127_byte_frames; ++ u32 tx_128_255_byte_frames; ++ u32 tx_256_511_byte_frames; ++ u32 tx_512_1023_byte_frames; ++ u32 tx_1024_1518_byte_frames; ++ u32 tx_greater_than_1518_byte_frames; ++ u32 tx_underrun; ++ u32 tx_single_collision_frames; ++ u32 tx_multiple_collision_frames; ++ u32 tx_excessive_collisions; ++ u32 tx_late_collisions; ++ u32 tx_deferred_frames; ++ u32 tx_carrier_sense_errors; ++ u32 rx_octets_31_0; ++ u32 rx_octets_47_32; ++ u32 rx_frames; ++ u32 rx_broadcast_frames; ++ u32 rx_multicast_frames; ++ u32 rx_pause_frames; ++ u32 rx_64_byte_frames; ++ u32 rx_65_127_byte_frames; ++ u32 rx_128_255_byte_frames; ++ u32 rx_256_511_byte_frames; ++ u32 rx_512_1023_byte_frames; ++ u32 rx_1024_1518_byte_frames; ++ u32 rx_greater_than_1518_byte_frames; ++ u32 rx_undersized_frames; ++ u32 rx_oversize_frames; ++ u32 rx_jabbers; ++ u32 rx_frame_check_sequence_errors; ++ u32 rx_length_field_frame_errors; ++ u32 rx_symbol_errors; ++ u32 rx_alignment_errors; ++ u32 rx_resource_errors; ++ u32 rx_overruns; ++ u32 rx_ip_header_checksum_errors; ++ u32 rx_tcp_checksum_errors; ++ u32 rx_udp_checksum_errors; ++}; ++ ++struct macb; ++ ++struct macb_or_gem_ops { ++ int (*mog_alloc_rx_buffers)(struct macb *bp); ++ void (*mog_free_rx_buffers)(struct macb *bp); ++ void (*mog_init_rings)(struct macb *bp); ++ int (*mog_rx)(struct macb *bp, int budget, nanosecs_abs_t *ts); ++}; ++ ++struct macb { ++ void __iomem *regs; ++ ++ unsigned int rx_tail; ++ unsigned int rx_prepared_head; ++ struct macb_dma_desc *rx_ring; ++ struct rtskb **rx_skbuff; ++ void *rx_buffers; ++ size_t rx_buffer_size; ++ ++ unsigned int tx_head, tx_tail; ++ struct macb_dma_desc *tx_ring; ++ struct macb_tx_skb *tx_skb; ++ ++ rtdm_lock_t lock; ++ struct platform_device *pdev; ++ struct clk *pclk; ++ struct clk *hclk; ++ struct clk *tx_clk; ++ struct rtnet_device *dev; ++ struct work_struct tx_error_task; ++ struct net_device_stats stats; ++ union { ++ struct macb_stats macb; ++ struct gem_stats gem; ++ } hw_stats; ++ ++ dma_addr_t rx_ring_dma; ++ dma_addr_t tx_ring_dma; ++ dma_addr_t rx_buffers_dma; ++ ++ struct macb_or_gem_ops macbgem_ops; ++ ++ struct mii_bus *mii_bus; ++ struct phy_device *phy_dev; ++ unsigned int link; ++ unsigned int speed; ++ unsigned int duplex; ++ ++ u32 caps; ++ ++ phy_interface_t phy_interface; ++ ++ struct net_device *phy_phony_net_device; ++ rtdm_irq_t irq_handle; ++ ++ /* AT91RM9200 transmit */ ++ struct rtskb *skb; /* holds skb until xmit interrupt completes */ ++ dma_addr_t skb_physaddr; /* phys addr from pci_map_single */ ++ int skb_length; /* saved skb length for pci_unmap_single */ ++}; ++ ++extern const struct ethtool_ops macb_ethtool_ops; ++ ++int rtmacb_mii_init(struct macb *bp); ++int rtmacb_ioctl(struct rtnet_device *dev, unsigned cmd, void *arg); ++struct net_device_stats *rtmacb_get_stats(struct rtnet_device *dev); ++void rtmacb_set_hwaddr(struct macb *bp); ++void rtmacb_get_hwaddr(struct macb *bp); ++ ++static inline bool macb_is_gem(struct macb *bp) ++{ ++ return MACB_BFEXT(IDNUM, macb_readl(bp, MID)) == 0x2; ++} ++ ++#endif /* _MACB_H */ +--- linux/drivers/xenomai/net/drivers/rt_fec.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/rt_fec.h 2021-04-07 16:01:27.772633387 +0800 +@@ -0,0 +1,153 @@ ++/****************************************************************************/ ++ ++/* ++ * fec.h -- Fast Ethernet Controller for Motorola ColdFire SoC ++ * processors. ++ * ++ * (C) Copyright 2000-2005, Greg Ungerer (gerg@snapgear.com) ++ * (C) Copyright 2000-2001, Lineo (www.lineo.com) ++ */ ++ ++/****************************************************************************/ ++#ifndef RT_FEC_H ++#define RT_FEC_H ++/****************************************************************************/ ++ ++#if defined(CONFIG_M523x) || defined(CONFIG_M527x) || defined(CONFIG_M528x) || \ ++ defined(CONFIG_M520x) || defined(CONFIG_M532x) || \ ++ defined(CONFIG_ARCH_MXC) || defined(CONFIG_SOC_IMX28) ++/* ++ * Just figures, Motorola would have to change the offsets for ++ * registers in the same peripheral device on different models ++ * of the ColdFire! ++ */ ++#define FEC_IEVENT 0x004 /* Interrupt event reg */ ++#define FEC_IMASK 0x008 /* Interrupt mask reg */ ++#define FEC_R_DES_ACTIVE 0x010 /* Receive descriptor reg */ ++#define FEC_X_DES_ACTIVE 0x014 /* Transmit descriptor reg */ ++#define FEC_ECNTRL 0x024 /* Ethernet control reg */ ++#define FEC_MII_DATA 0x040 /* MII manage frame reg */ ++#define FEC_MII_SPEED 0x044 /* MII speed control reg */ ++#define FEC_MIB_CTRLSTAT 0x064 /* MIB control/status reg */ ++#define FEC_R_CNTRL 0x084 /* Receive control reg */ ++#define FEC_X_CNTRL 0x0c4 /* Transmit Control reg */ ++#define FEC_ADDR_LOW 0x0e4 /* Low 32bits MAC address */ ++#define FEC_ADDR_HIGH 0x0e8 /* High 16bits MAC address */ ++#define FEC_OPD 0x0ec /* Opcode + Pause duration */ ++#define FEC_HASH_TABLE_HIGH 0x118 /* High 32bits hash table */ ++#define FEC_HASH_TABLE_LOW 0x11c /* Low 32bits hash table */ ++#define FEC_GRP_HASH_TABLE_HIGH 0x120 /* High 32bits hash table */ ++#define FEC_GRP_HASH_TABLE_LOW 0x124 /* Low 32bits hash table */ ++#define FEC_X_WMRK 0x144 /* FIFO transmit water mark */ ++#define FEC_R_BOUND 0x14c /* FIFO receive bound reg */ ++#define FEC_R_FSTART 0x150 /* FIFO receive start reg */ ++#define FEC_R_DES_START 0x180 /* Receive descriptor ring */ ++#define FEC_X_DES_START 0x184 /* Transmit descriptor ring */ ++#define FEC_R_BUFF_SIZE 0x188 /* Maximum receive buff size */ ++#define FEC_TACC 0x1c0 /* Transmit accelerator reg */ ++#define FEC_MIIGSK_CFGR 0x300 /* MIIGSK Configuration reg */ ++#define FEC_MIIGSK_ENR 0x308 /* MIIGSK Enable reg */ ++ ++#define BM_MIIGSK_CFGR_MII 0x00 ++#define BM_MIIGSK_CFGR_RMII 0x01 ++#define BM_MIIGSK_CFGR_FRCONT_10M 0x40 ++ ++#else ++ ++#define FEC_ECNTRL 0x000 /* Ethernet control reg */ ++#define FEC_IEVENT 0x004 /* Interrupt even reg */ ++#define FEC_IMASK 0x008 /* Interrupt mask reg */ ++#define FEC_IVEC 0x00c /* Interrupt vec status reg */ ++#define FEC_R_DES_ACTIVE 0x010 /* Receive descriptor reg */ ++#define FEC_X_DES_ACTIVE 0x014 /* Transmit descriptor reg */ ++#define FEC_MII_DATA 0x040 /* MII manage frame reg */ ++#define FEC_MII_SPEED 0x044 /* MII speed control reg */ ++#define FEC_R_BOUND 0x08c /* FIFO receive bound reg */ ++#define FEC_R_FSTART 0x090 /* FIFO receive start reg */ ++#define FEC_X_WMRK 0x0a4 /* FIFO transmit water mark */ ++#define FEC_X_FSTART 0x0ac /* FIFO transmit start reg */ ++#define FEC_R_CNTRL 0x104 /* Receive control reg */ ++#define FEC_MAX_FRM_LEN 0x108 /* Maximum frame length reg */ ++#define FEC_X_CNTRL 0x144 /* Transmit Control reg */ ++#define FEC_ADDR_LOW 0x3c0 /* Low 32bits MAC address */ ++#define FEC_ADDR_HIGH 0x3c4 /* High 16bits MAC address */ ++#define FEC_GRP_HASH_TABLE_HIGH 0x3c8 /* High 32bits hash table */ ++#define FEC_GRP_HASH_TABLE_LOW 0x3cc /* Low 32bits hash table */ ++#define FEC_R_DES_START 0x3d0 /* Receive descriptor ring */ ++#define FEC_X_DES_START 0x3d4 /* Transmit descriptor ring */ ++#define FEC_R_BUFF_SIZE 0x3d8 /* Maximum receive buff size */ ++#define FEC_FIFO_RAM 0x400 /* FIFO RAM buffer */ ++ ++#endif /* CONFIG_M5272 */ ++ ++ ++/* ++ * Define the buffer descriptor structure. ++ */ ++#if defined(CONFIG_ARCH_MXC) || defined(CONFIG_SOC_IMX28) ++struct bufdesc { ++ unsigned short cbd_datlen; /* Data length */ ++ unsigned short cbd_sc; /* Control and status info */ ++ unsigned long cbd_bufaddr; /* Buffer address */ ++}; ++#else ++struct bufdesc { ++ unsigned short cbd_sc; /* Control and status info */ ++ unsigned short cbd_datlen; /* Data length */ ++ unsigned long cbd_bufaddr; /* Buffer address */ ++}; ++#endif ++ ++/* ++ * The following definitions courtesy of commproc.h, which where ++ * Copyright (c) 1997 Dan Malek (dmalek@jlc.net). ++ */ ++#define BD_SC_EMPTY ((ushort)0x8000) /* Receive is empty */ ++#define BD_SC_READY ((ushort)0x8000) /* Transmit is ready */ ++#define BD_SC_WRAP ((ushort)0x2000) /* Last buffer descriptor */ ++#define BD_SC_INTRPT ((ushort)0x1000) /* Interrupt on change */ ++#define BD_SC_CM ((ushort)0x0200) /* Continuous mode */ ++#define BD_SC_ID ((ushort)0x0100) /* Rec'd too many idles */ ++#define BD_SC_P ((ushort)0x0100) /* xmt preamble */ ++#define BD_SC_BR ((ushort)0x0020) /* Break received */ ++#define BD_SC_FR ((ushort)0x0010) /* Framing error */ ++#define BD_SC_PR ((ushort)0x0008) /* Parity error */ ++#define BD_SC_OV ((ushort)0x0002) /* Overrun */ ++#define BD_SC_CD ((ushort)0x0001) /* ?? */ ++ ++/* Buffer descriptor control/status used by Ethernet receive. ++*/ ++#define BD_ENET_RX_EMPTY ((ushort)0x8000) ++#define BD_ENET_RX_WRAP ((ushort)0x2000) ++#define BD_ENET_RX_INTR ((ushort)0x1000) ++#define BD_ENET_RX_LAST ((ushort)0x0800) ++#define BD_ENET_RX_FIRST ((ushort)0x0400) ++#define BD_ENET_RX_MISS ((ushort)0x0100) ++#define BD_ENET_RX_LG ((ushort)0x0020) ++#define BD_ENET_RX_NO ((ushort)0x0010) ++#define BD_ENET_RX_SH ((ushort)0x0008) ++#define BD_ENET_RX_CR ((ushort)0x0004) ++#define BD_ENET_RX_OV ((ushort)0x0002) ++#define BD_ENET_RX_CL ((ushort)0x0001) ++#define BD_ENET_RX_STATS ((ushort)0x013f) /* All status bits */ ++ ++/* Buffer descriptor control/status used by Ethernet transmit. ++*/ ++#define BD_ENET_TX_READY ((ushort)0x8000) ++#define BD_ENET_TX_PAD ((ushort)0x4000) ++#define BD_ENET_TX_WRAP ((ushort)0x2000) ++#define BD_ENET_TX_INTR ((ushort)0x1000) ++#define BD_ENET_TX_LAST ((ushort)0x0800) ++#define BD_ENET_TX_TC ((ushort)0x0400) ++#define BD_ENET_TX_DEF ((ushort)0x0200) ++#define BD_ENET_TX_HB ((ushort)0x0100) ++#define BD_ENET_TX_LC ((ushort)0x0080) ++#define BD_ENET_TX_RL ((ushort)0x0040) ++#define BD_ENET_TX_RCMASK ((ushort)0x003c) ++#define BD_ENET_TX_UN ((ushort)0x0002) ++#define BD_ENET_TX_CSL ((ushort)0x0001) ++#define BD_ENET_TX_STATS ((ushort)0x03ff) /* All status bits */ ++ ++ ++/****************************************************************************/ ++#endif /* RT_FEC_H */ +--- linux/drivers/xenomai/net/drivers/experimental/e1000/e1000_manage.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/experimental/e1000/e1000_manage.h 2021-04-07 16:01:27.768633393 +0800 +@@ -0,0 +1,81 @@ ++/******************************************************************************* ++ ++ Intel PRO/1000 Linux driver ++ Copyright(c) 1999 - 2008 Intel Corporation. ++ ++ This program is free software; you can redistribute it and/or modify it ++ under the terms and conditions of the GNU General Public License, ++ version 2, as published by the Free Software Foundation. ++ ++ This program is distributed in the hope it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ You should have received a copy of the GNU General Public License along with ++ this program; if not, write to the Free Software Foundation, Inc., ++ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. ++ ++ The full GNU General Public License is included in this distribution in ++ the file called "COPYING". ++ ++ Contact Information: ++ Linux NICS ++ e1000-devel Mailing List ++ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 ++ ++*******************************************************************************/ ++ ++#ifndef _E1000_MANAGE_H_ ++#define _E1000_MANAGE_H_ ++ ++bool e1000_check_mng_mode_generic(struct e1000_hw *hw); ++bool e1000_enable_tx_pkt_filtering_generic(struct e1000_hw *hw); ++s32 e1000_mng_enable_host_if_generic(struct e1000_hw *hw); ++s32 e1000_mng_host_if_write_generic(struct e1000_hw *hw, u8 *buffer, ++ u16 length, u16 offset, u8 *sum); ++s32 e1000_mng_write_cmd_header_generic(struct e1000_hw *hw, ++ struct e1000_host_mng_command_header *hdr); ++s32 e1000_mng_write_dhcp_info_generic(struct e1000_hw *hw, ++ u8 *buffer, u16 length); ++ ++typedef enum { ++ e1000_mng_mode_none = 0, ++ e1000_mng_mode_asf, ++ e1000_mng_mode_pt, ++ e1000_mng_mode_ipmi, ++ e1000_mng_mode_host_if_only ++} e1000_mng_mode; ++ ++#define E1000_FACTPS_MNGCG 0x20000000 ++ ++#define E1000_FWSM_MODE_MASK 0xE ++#define E1000_FWSM_MODE_SHIFT 1 ++ ++#define E1000_MNG_IAMT_MODE 0x3 ++#define E1000_MNG_DHCP_COOKIE_LENGTH 0x10 ++#define E1000_MNG_DHCP_COOKIE_OFFSET 0x6F0 ++#define E1000_MNG_DHCP_COMMAND_TIMEOUT 10 ++#define E1000_MNG_DHCP_TX_PAYLOAD_CMD 64 ++#define E1000_MNG_DHCP_COOKIE_STATUS_PARSING 0x1 ++#define E1000_MNG_DHCP_COOKIE_STATUS_VLAN 0x2 ++ ++#define E1000_VFTA_ENTRY_SHIFT 5 ++#define E1000_VFTA_ENTRY_MASK 0x7F ++#define E1000_VFTA_ENTRY_BIT_SHIFT_MASK 0x1F ++ ++#define E1000_HI_MAX_BLOCK_BYTE_LENGTH 1792 /* Num of bytes in range */ ++#define E1000_HI_MAX_BLOCK_DWORD_LENGTH 448 /* Num of dwords in range */ ++#define E1000_HI_COMMAND_TIMEOUT 500 /* Process HI command limit */ ++ ++#define E1000_HICR_EN 0x01 /* Enable bit - RO */ ++/* Driver sets this bit when done to put command in RAM */ ++#define E1000_HICR_C 0x02 ++#define E1000_HICR_SV 0x04 /* Status Validity */ ++#define E1000_HICR_FW_RESET_ENABLE 0x40 ++#define E1000_HICR_FW_RESET 0x80 ++ ++/* Intel(R) Active Management Technology signature */ ++#define E1000_IAMT_SIGNATURE 0x544D4149 ++ ++#endif +--- linux/drivers/xenomai/net/drivers/experimental/e1000/e1000_regs.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/experimental/e1000/e1000_regs.h 2021-04-07 16:01:27.763633400 +0800 +@@ -0,0 +1,307 @@ ++/******************************************************************************* ++ ++ Intel PRO/1000 Linux driver ++ Copyright(c) 1999 - 2008 Intel Corporation. ++ ++ This program is free software; you can redistribute it and/or modify it ++ under the terms and conditions of the GNU General Public License, ++ version 2, as published by the Free Software Foundation. ++ ++ This program is distributed in the hope it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ You should have received a copy of the GNU General Public License along with ++ this program; if not, write to the Free Software Foundation, Inc., ++ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. ++ ++ The full GNU General Public License is included in this distribution in ++ the file called "COPYING". ++ ++ Contact Information: ++ Linux NICS ++ e1000-devel Mailing List ++ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 ++ ++*******************************************************************************/ ++ ++#ifndef _E1000_REGS_H_ ++#define _E1000_REGS_H_ ++ ++#define E1000_CTRL 0x00000 /* Device Control - RW */ ++#define E1000_CTRL_DUP 0x00004 /* Device Control Duplicate (Shadow) - RW */ ++#define E1000_STATUS 0x00008 /* Device Status - RO */ ++#define E1000_EECD 0x00010 /* EEPROM/Flash Control - RW */ ++#define E1000_EERD 0x00014 /* EEPROM Read - RW */ ++#define E1000_CTRL_EXT 0x00018 /* Extended Device Control - RW */ ++#define E1000_FLA 0x0001C /* Flash Access - RW */ ++#define E1000_MDIC 0x00020 /* MDI Control - RW */ ++#define E1000_SCTL 0x00024 /* SerDes Control - RW */ ++#define E1000_FCAL 0x00028 /* Flow Control Address Low - RW */ ++#define E1000_FCAH 0x0002C /* Flow Control Address High -RW */ ++#define E1000_FEXTNVM 0x00028 /* Future Extended NVM - RW */ ++#define E1000_FCT 0x00030 /* Flow Control Type - RW */ ++#define E1000_CONNSW 0x00034 /* Copper/Fiber switch control - RW */ ++#define E1000_VET 0x00038 /* VLAN Ether Type - RW */ ++#define E1000_ICR 0x000C0 /* Interrupt Cause Read - R/clr */ ++#define E1000_ITR 0x000C4 /* Interrupt Throttling Rate - RW */ ++#define E1000_ICS 0x000C8 /* Interrupt Cause Set - WO */ ++#define E1000_IMS 0x000D0 /* Interrupt Mask Set - RW */ ++#define E1000_IMC 0x000D8 /* Interrupt Mask Clear - WO */ ++#define E1000_IAM 0x000E0 /* Interrupt Acknowledge Auto Mask */ ++#define E1000_RCTL 0x00100 /* Rx Control - RW */ ++#define E1000_FCTTV 0x00170 /* Flow Control Transmit Timer Value - RW */ ++#define E1000_TXCW 0x00178 /* Tx Configuration Word - RW */ ++#define E1000_RXCW 0x00180 /* Rx Configuration Word - RO */ ++#define E1000_EICR 0x01580 /* Ext. Interrupt Cause Read - R/clr */ ++#define E1000_EITR(_n) (0x01680 + (0x4 * (_n))) ++#define E1000_EICS 0x01520 /* Ext. Interrupt Cause Set - W0 */ ++#define E1000_EIMS 0x01524 /* Ext. Interrupt Mask Set/Read - RW */ ++#define E1000_EIMC 0x01528 /* Ext. Interrupt Mask Clear - WO */ ++#define E1000_EIAC 0x0152C /* Ext. Interrupt Auto Clear - RW */ ++#define E1000_EIAM 0x01530 /* Ext. Interrupt Ack Auto Clear Mask - RW */ ++#define E1000_TCTL 0x00400 /* Tx Control - RW */ ++#define E1000_TCTL_EXT 0x00404 /* Extended Tx Control - RW */ ++#define E1000_TIPG 0x00410 /* Tx Inter-packet gap -RW */ ++#define E1000_TBT 0x00448 /* Tx Burst Timer - RW */ ++#define E1000_AIT 0x00458 /* Adaptive Interframe Spacing Throttle - RW */ ++#define E1000_LEDCTL 0x00E00 /* LED Control - RW */ ++#define E1000_EXTCNF_CTRL 0x00F00 /* Extended Configuration Control */ ++#define E1000_EXTCNF_SIZE 0x00F08 /* Extended Configuration Size */ ++#define E1000_PHY_CTRL 0x00F10 /* PHY Control Register in CSR */ ++#define E1000_PBA 0x01000 /* Packet Buffer Allocation - RW */ ++#define E1000_PBS 0x01008 /* Packet Buffer Size */ ++#define E1000_EEMNGCTL 0x01010 /* MNG EEprom Control */ ++#define E1000_EEARBC 0x01024 /* EEPROM Auto Read Bus Control */ ++#define E1000_FLASHT 0x01028 /* FLASH Timer Register */ ++#define E1000_EEWR 0x0102C /* EEPROM Write Register - RW */ ++#define E1000_FLSWCTL 0x01030 /* FLASH control register */ ++#define E1000_FLSWDATA 0x01034 /* FLASH data register */ ++#define E1000_FLSWCNT 0x01038 /* FLASH Access Counter */ ++#define E1000_FLOP 0x0103C /* FLASH Opcode Register */ ++#define E1000_I2CCMD 0x01028 /* SFPI2C Command Register - RW */ ++#define E1000_I2CPARAMS 0x0102C /* SFPI2C Parameters Register - RW */ ++#define E1000_WDSTP 0x01040 /* Watchdog Setup - RW */ ++#define E1000_SWDSTS 0x01044 /* SW Device Status - RW */ ++#define E1000_FRTIMER 0x01048 /* Free Running Timer - RW */ ++#define E1000_TCPTIMER 0x0104C /* TCP Timer - RW */ ++#define E1000_ERT 0x02008 /* Early Rx Threshold - RW */ ++#define E1000_FCRTL 0x02160 /* Flow Control Receive Threshold Low - RW */ ++#define E1000_FCRTH 0x02168 /* Flow Control Receive Threshold High - RW */ ++#define E1000_PSRCTL 0x02170 /* Packet Split Receive Control - RW */ ++#define E1000_RDFPCQ(_n) (0x02430 + (0x4 * (_n))) ++#define E1000_PBRTH 0x02458 /* PB Rx Arbitration Threshold - RW */ ++#define E1000_FCRTV 0x02460 /* Flow Control Refresh Timer Value - RW */ ++/* Split and Replication Rx Control - RW */ ++#define E1000_RDPUMB 0x025CC /* DMA Rx Descriptor uC Mailbox - RW */ ++#define E1000_RDPUAD 0x025D0 /* DMA Rx Descriptor uC Addr Command - RW */ ++#define E1000_RDPUWD 0x025D4 /* DMA Rx Descriptor uC Data Write - RW */ ++#define E1000_RDPURD 0x025D8 /* DMA Rx Descriptor uC Data Read - RW */ ++#define E1000_RDPUCTL 0x025DC /* DMA Rx Descriptor uC Control - RW */ ++#define E1000_RDTR 0x02820 /* Rx Delay Timer - RW */ ++#define E1000_RADV 0x0282C /* Rx Interrupt Absolute Delay Timer - RW */ ++/* ++ * Convenience macros ++ * ++ * Note: "_n" is the queue number of the register to be written to. ++ * ++ * Example usage: ++ * E1000_RDBAL_REG(current_rx_queue) ++ */ ++#define E1000_RDBAL(_n) ((_n) < 4 ? (0x02800 + ((_n) * 0x100)) : (0x0C000 + ((_n) * 0x40))) ++#define E1000_RDBAH(_n) ((_n) < 4 ? (0x02804 + ((_n) * 0x100)) : (0x0C004 + ((_n) * 0x40))) ++#define E1000_RDLEN(_n) ((_n) < 4 ? (0x02808 + ((_n) * 0x100)) : (0x0C008 + ((_n) * 0x40))) ++#define E1000_SRRCTL(_n) ((_n) < 4 ? (0x0280C + ((_n) * 0x100)) : (0x0C00C + ((_n) * 0x40))) ++#define E1000_RDH(_n) ((_n) < 4 ? (0x02810 + ((_n) * 0x100)) : (0x0C010 + ((_n) * 0x40))) ++#define E1000_RDT(_n) ((_n) < 4 ? (0x02818 + ((_n) * 0x100)) : (0x0C018 + ((_n) * 0x40))) ++#define E1000_RXDCTL(_n) ((_n) < 4 ? (0x02828 + ((_n) * 0x100)) : (0x0C028 + ((_n) * 0x40))) ++#define E1000_TDBAL(_n) ((_n) < 4 ? (0x03800 + ((_n) * 0x100)) : (0x0E000 + ((_n) * 0x40))) ++#define E1000_TDBAH(_n) ((_n) < 4 ? (0x03804 + ((_n) * 0x100)) : (0x0E004 + ((_n) * 0x40))) ++#define E1000_TDLEN(_n) ((_n) < 4 ? (0x03808 + ((_n) * 0x100)) : (0x0E008 + ((_n) * 0x40))) ++#define E1000_TDH(_n) ((_n) < 4 ? (0x03810 + ((_n) * 0x100)) : (0x0E010 + ((_n) * 0x40))) ++#define E1000_TDT(_n) ((_n) < 4 ? (0x03818 + ((_n) * 0x100)) : (0x0E018 + ((_n) * 0x40))) ++#define E1000_TXDCTL(_n) ((_n) < 4 ? (0x03828 + ((_n) * 0x100)) : (0x0E028 + ((_n) * 0x40))) ++#define E1000_TARC(_n) (0x03840 + (_n << 8)) ++#define E1000_DCA_TXCTRL(_n) (0x03814 + (_n << 8)) ++#define E1000_DCA_RXCTRL(_n) (0x02814 + (_n << 8)) ++#define E1000_TDWBAL(_n) ((_n) < 4 ? (0x03838 + ((_n) * 0x100)) : (0x0E038 + ((_n) * 0x40))) ++#define E1000_TDWBAH(_n) ((_n) < 4 ? (0x0383C + ((_n) * 0x100)) : (0x0E03C + ((_n) * 0x40))) ++#define E1000_RSRPD 0x02C00 /* Rx Small Packet Detect - RW */ ++#define E1000_RAID 0x02C08 /* Receive Ack Interrupt Delay - RW */ ++#define E1000_TXDMAC 0x03000 /* Tx DMA Control - RW */ ++#define E1000_KABGTXD 0x03004 /* AFE Band Gap Transmit Ref Data */ ++#define E1000_PSRTYPE(_i) (0x05480 + ((_i) * 4)) ++#define E1000_RAL(_i) (0x05400 + ((_i) * 8)) ++#define E1000_RAH(_i) (0x05404 + ((_i) * 8)) ++#define E1000_IP4AT_REG(_i) (0x05840 + ((_i) * 8)) ++#define E1000_IP6AT_REG(_i) (0x05880 + ((_i) * 4)) ++#define E1000_WUPM_REG(_i) (0x05A00 + ((_i) * 4)) ++#define E1000_FFMT_REG(_i) (0x09000 + ((_i) * 8)) ++#define E1000_FFVT_REG(_i) (0x09800 + ((_i) * 8)) ++#define E1000_FFLT_REG(_i) (0x05F00 + ((_i) * 8)) ++#define E1000_TDFH 0x03410 /* Tx Data FIFO Head - RW */ ++#define E1000_TDFT 0x03418 /* Tx Data FIFO Tail - RW */ ++#define E1000_TDFHS 0x03420 /* Tx Data FIFO Head Saved - RW */ ++#define E1000_TDFTS 0x03428 /* Tx Data FIFO Tail Saved - RW */ ++#define E1000_TDFPC 0x03430 /* Tx Data FIFO Packet Count - RW */ ++#define E1000_TDPUMB 0x0357C /* DMA Tx Descriptor uC Mail Box - RW */ ++#define E1000_TDPUAD 0x03580 /* DMA Tx Descriptor uC Addr Command - RW */ ++#define E1000_TDPUWD 0x03584 /* DMA Tx Descriptor uC Data Write - RW */ ++#define E1000_TDPURD 0x03588 /* DMA Tx Descriptor uC Data Read - RW */ ++#define E1000_TDPUCTL 0x0358C /* DMA Tx Descriptor uC Control - RW */ ++#define E1000_DTXCTL 0x03590 /* DMA Tx Control - RW */ ++#define E1000_TIDV 0x03820 /* Tx Interrupt Delay Value - RW */ ++#define E1000_TADV 0x0382C /* Tx Interrupt Absolute Delay Val - RW */ ++#define E1000_TSPMT 0x03830 /* TCP Segmentation PAD & Min Threshold - RW */ ++#define E1000_CRCERRS 0x04000 /* CRC Error Count - R/clr */ ++#define E1000_ALGNERRC 0x04004 /* Alignment Error Count - R/clr */ ++#define E1000_SYMERRS 0x04008 /* Symbol Error Count - R/clr */ ++#define E1000_RXERRC 0x0400C /* Receive Error Count - R/clr */ ++#define E1000_MPC 0x04010 /* Missed Packet Count - R/clr */ ++#define E1000_SCC 0x04014 /* Single Collision Count - R/clr */ ++#define E1000_ECOL 0x04018 /* Excessive Collision Count - R/clr */ ++#define E1000_MCC 0x0401C /* Multiple Collision Count - R/clr */ ++#define E1000_LATECOL 0x04020 /* Late Collision Count - R/clr */ ++#define E1000_COLC 0x04028 /* Collision Count - R/clr */ ++#define E1000_DC 0x04030 /* Defer Count - R/clr */ ++#define E1000_TNCRS 0x04034 /* Tx-No CRS - R/clr */ ++#define E1000_SEC 0x04038 /* Sequence Error Count - R/clr */ ++#define E1000_CEXTERR 0x0403C /* Carrier Extension Error Count - R/clr */ ++#define E1000_RLEC 0x04040 /* Receive Length Error Count - R/clr */ ++#define E1000_XONRXC 0x04048 /* XON Rx Count - R/clr */ ++#define E1000_XONTXC 0x0404C /* XON Tx Count - R/clr */ ++#define E1000_XOFFRXC 0x04050 /* XOFF Rx Count - R/clr */ ++#define E1000_XOFFTXC 0x04054 /* XOFF Tx Count - R/clr */ ++#define E1000_FCRUC 0x04058 /* Flow Control Rx Unsupported Count- R/clr */ ++#define E1000_PRC64 0x0405C /* Packets Rx (64 bytes) - R/clr */ ++#define E1000_PRC127 0x04060 /* Packets Rx (65-127 bytes) - R/clr */ ++#define E1000_PRC255 0x04064 /* Packets Rx (128-255 bytes) - R/clr */ ++#define E1000_PRC511 0x04068 /* Packets Rx (255-511 bytes) - R/clr */ ++#define E1000_PRC1023 0x0406C /* Packets Rx (512-1023 bytes) - R/clr */ ++#define E1000_PRC1522 0x04070 /* Packets Rx (1024-1522 bytes) - R/clr */ ++#define E1000_GPRC 0x04074 /* Good Packets Rx Count - R/clr */ ++#define E1000_BPRC 0x04078 /* Broadcast Packets Rx Count - R/clr */ ++#define E1000_MPRC 0x0407C /* Multicast Packets Rx Count - R/clr */ ++#define E1000_GPTC 0x04080 /* Good Packets Tx Count - R/clr */ ++#define E1000_GORCL 0x04088 /* Good Octets Rx Count Low - R/clr */ ++#define E1000_GORCH 0x0408C /* Good Octets Rx Count High - R/clr */ ++#define E1000_GOTCL 0x04090 /* Good Octets Tx Count Low - R/clr */ ++#define E1000_GOTCH 0x04094 /* Good Octets Tx Count High - R/clr */ ++#define E1000_RNBC 0x040A0 /* Rx No Buffers Count - R/clr */ ++#define E1000_RUC 0x040A4 /* Rx Undersize Count - R/clr */ ++#define E1000_RFC 0x040A8 /* Rx Fragment Count - R/clr */ ++#define E1000_ROC 0x040AC /* Rx Oversize Count - R/clr */ ++#define E1000_RJC 0x040B0 /* Rx Jabber Count - R/clr */ ++#define E1000_MGTPRC 0x040B4 /* Management Packets Rx Count - R/clr */ ++#define E1000_MGTPDC 0x040B8 /* Management Packets Dropped Count - R/clr */ ++#define E1000_MGTPTC 0x040BC /* Management Packets Tx Count - R/clr */ ++#define E1000_TORL 0x040C0 /* Total Octets Rx Low - R/clr */ ++#define E1000_TORH 0x040C4 /* Total Octets Rx High - R/clr */ ++#define E1000_TOTL 0x040C8 /* Total Octets Tx Low - R/clr */ ++#define E1000_TOTH 0x040CC /* Total Octets Tx High - R/clr */ ++#define E1000_TPR 0x040D0 /* Total Packets Rx - R/clr */ ++#define E1000_TPT 0x040D4 /* Total Packets Tx - R/clr */ ++#define E1000_PTC64 0x040D8 /* Packets Tx (64 bytes) - R/clr */ ++#define E1000_PTC127 0x040DC /* Packets Tx (65-127 bytes) - R/clr */ ++#define E1000_PTC255 0x040E0 /* Packets Tx (128-255 bytes) - R/clr */ ++#define E1000_PTC511 0x040E4 /* Packets Tx (256-511 bytes) - R/clr */ ++#define E1000_PTC1023 0x040E8 /* Packets Tx (512-1023 bytes) - R/clr */ ++#define E1000_PTC1522 0x040EC /* Packets Tx (1024-1522 Bytes) - R/clr */ ++#define E1000_MPTC 0x040F0 /* Multicast Packets Tx Count - R/clr */ ++#define E1000_BPTC 0x040F4 /* Broadcast Packets Tx Count - R/clr */ ++#define E1000_TSCTC 0x040F8 /* TCP Segmentation Context Tx - R/clr */ ++#define E1000_TSCTFC 0x040FC /* TCP Segmentation Context Tx Fail - R/clr */ ++#define E1000_IAC 0x04100 /* Interrupt Assertion Count */ ++#define E1000_ICRXPTC 0x04104 /* Interrupt Cause Rx Packet Timer Expire Count */ ++#define E1000_ICRXATC 0x04108 /* Interrupt Cause Rx Absolute Timer Expire Count */ ++#define E1000_ICTXPTC 0x0410C /* Interrupt Cause Tx Packet Timer Expire Count */ ++#define E1000_ICTXATC 0x04110 /* Interrupt Cause Tx Absolute Timer Expire Count */ ++#define E1000_ICTXQEC 0x04118 /* Interrupt Cause Tx Queue Empty Count */ ++#define E1000_ICTXQMTC 0x0411C /* Interrupt Cause Tx Queue Minimum Threshold Count */ ++#define E1000_ICRXDMTC 0x04120 /* Interrupt Cause Rx Descriptor Minimum Threshold Count */ ++#define E1000_ICRXOC 0x04124 /* Interrupt Cause Receiver Overrun Count */ ++ ++#define E1000_PCS_CFG0 0x04200 /* PCS Configuration 0 - RW */ ++#define E1000_PCS_LCTL 0x04208 /* PCS Link Control - RW */ ++#define E1000_PCS_LSTAT 0x0420C /* PCS Link Status - RO */ ++#define E1000_CBTMPC 0x0402C /* Circuit Breaker Tx Packet Count */ ++#define E1000_HTDPMC 0x0403C /* Host Transmit Discarded Packets */ ++#define E1000_CBRDPC 0x04044 /* Circuit Breaker Rx Dropped Count */ ++#define E1000_CBRMPC 0x040FC /* Circuit Breaker Rx Packet Count */ ++#define E1000_RPTHC 0x04104 /* Rx Packets To Host */ ++#define E1000_HGPTC 0x04118 /* Host Good Packets Tx Count */ ++#define E1000_HTCBDPC 0x04124 /* Host Tx Circuit Breaker Dropped Count */ ++#define E1000_HGORCL 0x04128 /* Host Good Octets Received Count Low */ ++#define E1000_HGORCH 0x0412C /* Host Good Octets Received Count High */ ++#define E1000_HGOTCL 0x04130 /* Host Good Octets Transmit Count Low */ ++#define E1000_HGOTCH 0x04134 /* Host Good Octets Transmit Count High */ ++#define E1000_LENERRS 0x04138 /* Length Errors Count */ ++#define E1000_SCVPC 0x04228 /* SerDes/SGMII Code Violation Pkt Count */ ++#define E1000_HRMPC 0x0A018 /* Header Redirection Missed Packet Count */ ++#define E1000_PCS_ANADV 0x04218 /* AN advertisement - RW */ ++#define E1000_PCS_LPAB 0x0421C /* Link Partner Ability - RW */ ++#define E1000_PCS_NPTX 0x04220 /* AN Next Page Transmit - RW */ ++#define E1000_PCS_LPABNP 0x04224 /* Link Partner Ability Next Page - RW */ ++#define E1000_1GSTAT_RCV 0x04228 /* 1GSTAT Code Violation Packet Count - RW */ ++#define E1000_RXCSUM 0x05000 /* Rx Checksum Control - RW */ ++#define E1000_RLPML 0x05004 /* Rx Long Packet Max Length */ ++#define E1000_RFCTL 0x05008 /* Receive Filter Control*/ ++#define E1000_MTA 0x05200 /* Multicast Table Array - RW Array */ ++#define E1000_RA 0x05400 /* Receive Address - RW Array */ ++#define E1000_VFTA 0x05600 /* VLAN Filter Table Array - RW Array */ ++#define E1000_VMD_CTL 0x0581C /* VMDq Control - RW */ ++#define E1000_VFQA0 0x0B000 /* VLAN Filter Queue Array 0 - RW Array */ ++#define E1000_VFQA1 0x0B200 /* VLAN Filter Queue Array 1 - RW Array */ ++#define E1000_WUC 0x05800 /* Wakeup Control - RW */ ++#define E1000_WUFC 0x05808 /* Wakeup Filter Control - RW */ ++#define E1000_WUS 0x05810 /* Wakeup Status - RO */ ++#define E1000_MANC 0x05820 /* Management Control - RW */ ++#define E1000_IPAV 0x05838 /* IP Address Valid - RW */ ++#define E1000_IP4AT 0x05840 /* IPv4 Address Table - RW Array */ ++#define E1000_IP6AT 0x05880 /* IPv6 Address Table - RW Array */ ++#define E1000_WUPL 0x05900 /* Wakeup Packet Length - RW */ ++#define E1000_WUPM 0x05A00 /* Wakeup Packet Memory - RO A */ ++#define E1000_PBACL 0x05B68 /* MSIx PBA Clear - Read/Write 1's to clear */ ++#define E1000_FFLT 0x05F00 /* Flexible Filter Length Table - RW Array */ ++#define E1000_HOST_IF 0x08800 /* Host Interface */ ++#define E1000_FFMT 0x09000 /* Flexible Filter Mask Table - RW Array */ ++#define E1000_FFVT 0x09800 /* Flexible Filter Value Table - RW Array */ ++ ++#define E1000_KMRNCTRLSTA 0x00034 /* MAC-PHY interface - RW */ ++#define E1000_MDPHYA 0x0003C /* PHY address - RW */ ++#define E1000_MANC2H 0x05860 /* Management Control To Host - RW */ ++#define E1000_SW_FW_SYNC 0x05B5C /* Software-Firmware Synchronization - RW */ ++#define E1000_CCMCTL 0x05B48 /* CCM Control Register */ ++#define E1000_GIOCTL 0x05B44 /* GIO Analog Control Register */ ++#define E1000_SCCTL 0x05B4C /* PCIc PLL Configuration Register */ ++#define E1000_GCR 0x05B00 /* PCI-Ex Control */ ++#define E1000_GSCL_1 0x05B10 /* PCI-Ex Statistic Control #1 */ ++#define E1000_GSCL_2 0x05B14 /* PCI-Ex Statistic Control #2 */ ++#define E1000_GSCL_3 0x05B18 /* PCI-Ex Statistic Control #3 */ ++#define E1000_GSCL_4 0x05B1C /* PCI-Ex Statistic Control #4 */ ++#define E1000_FACTPS 0x05B30 /* Function Active and Power State to MNG */ ++#define E1000_SWSM 0x05B50 /* SW Semaphore */ ++#define E1000_FWSM 0x05B54 /* FW Semaphore */ ++#define E1000_DCA_ID 0x05B70 /* DCA Requester ID Information - RO */ ++#define E1000_DCA_CTRL 0x05B74 /* DCA Control - RW */ ++#define E1000_FFLT_DBG 0x05F04 /* Debug Register */ ++#define E1000_HICR 0x08F00 /* Host Inteface Control */ ++ ++/* RSS registers */ ++#define E1000_CPUVEC 0x02C10 /* CPU Vector Register - RW */ ++#define E1000_MRQC 0x05818 /* Multiple Receive Control - RW */ ++#define E1000_IMIR(_i) (0x05A80 + ((_i) * 4)) /* Immediate Interrupt */ ++#define E1000_IMIREXT(_i) (0x05AA0 + ((_i) * 4)) /* Immediate Interrupt Ext*/ ++#define E1000_IMIRVP 0x05AC0 /* Immediate Interrupt Rx VLAN Priority - RW */ ++#define E1000_MSIXBM(_i) (0x01600 + ((_i) * 4)) /* MSI-X Allocation Register (_i) - RW */ ++#define E1000_MSIXTADD(_i) (0x0C000 + ((_i) * 0x10)) /* MSI-X Table entry addr low reg 0 - RW */ ++#define E1000_MSIXTUADD(_i) (0x0C004 + ((_i) * 0x10)) /* MSI-X Table entry addr upper reg 0 - RW */ ++#define E1000_MSIXTMSG(_i) (0x0C008 + ((_i) * 0x10)) /* MSI-X Table entry message reg 0 - RW */ ++#define E1000_MSIXVCTRL(_i) (0x0C00C + ((_i) * 0x10)) /* MSI-X Table entry vector ctrl reg 0 - RW */ ++#define E1000_MSIXPBA 0x0E000 /* MSI-X Pending bit array */ ++#define E1000_RETA(_i) (0x05C00 + ((_i) * 4)) /* Redirection Table - RW Array */ ++#define E1000_RSSRK(_i) (0x05C80 + ((_i) * 4)) /* RSS Random Key - RW Array */ ++#define E1000_RSSIM 0x05864 /* RSS Interrupt Mask */ ++#define E1000_RSSIR 0x05868 /* RSS Interrupt Request */ ++#endif +--- linux/drivers/xenomai/net/drivers/experimental/e1000/e1000_82541.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/experimental/e1000/e1000_82541.h 2021-04-07 16:01:27.758633407 +0800 +@@ -0,0 +1,84 @@ ++/******************************************************************************* ++ ++ Intel PRO/1000 Linux driver ++ Copyright(c) 1999 - 2008 Intel Corporation. ++ ++ This program is free software; you can redistribute it and/or modify it ++ under the terms and conditions of the GNU General Public License, ++ version 2, as published by the Free Software Foundation. ++ ++ This program is distributed in the hope it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ You should have received a copy of the GNU General Public License along with ++ this program; if not, write to the Free Software Foundation, Inc., ++ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. ++ ++ The full GNU General Public License is included in this distribution in ++ the file called "COPYING". ++ ++ Contact Information: ++ Linux NICS ++ e1000-devel Mailing List ++ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 ++ ++*******************************************************************************/ ++ ++#ifndef _E1000_82541_H_ ++#define _E1000_82541_H_ ++ ++#define NVM_WORD_SIZE_BASE_SHIFT_82541 (NVM_WORD_SIZE_BASE_SHIFT + 1) ++ ++#define IGP01E1000_PHY_CHANNEL_NUM 4 ++ ++#define IGP01E1000_PHY_AGC_A 0x1172 ++#define IGP01E1000_PHY_AGC_B 0x1272 ++#define IGP01E1000_PHY_AGC_C 0x1472 ++#define IGP01E1000_PHY_AGC_D 0x1872 ++ ++#define IGP01E1000_PHY_AGC_PARAM_A 0x1171 ++#define IGP01E1000_PHY_AGC_PARAM_B 0x1271 ++#define IGP01E1000_PHY_AGC_PARAM_C 0x1471 ++#define IGP01E1000_PHY_AGC_PARAM_D 0x1871 ++ ++#define IGP01E1000_PHY_EDAC_MU_INDEX 0xC000 ++#define IGP01E1000_PHY_EDAC_SIGN_EXT_9_BITS 0x8000 ++ ++#define IGP01E1000_PHY_DSP_RESET 0x1F33 ++ ++#define IGP01E1000_PHY_DSP_FFE 0x1F35 ++#define IGP01E1000_PHY_DSP_FFE_CM_CP 0x0069 ++#define IGP01E1000_PHY_DSP_FFE_DEFAULT 0x002A ++ ++#define IGP01E1000_IEEE_FORCE_GIG 0x0140 ++#define IGP01E1000_IEEE_RESTART_AUTONEG 0x3300 ++ ++#define IGP01E1000_AGC_LENGTH_SHIFT 7 ++#define IGP01E1000_AGC_RANGE 10 ++ ++#define FFE_IDLE_ERR_COUNT_TIMEOUT_20 20 ++#define FFE_IDLE_ERR_COUNT_TIMEOUT_100 100 ++ ++#define IGP01E1000_ANALOG_FUSE_STATUS 0x20D0 ++#define IGP01E1000_ANALOG_SPARE_FUSE_STATUS 0x20D1 ++#define IGP01E1000_ANALOG_FUSE_CONTROL 0x20DC ++#define IGP01E1000_ANALOG_FUSE_BYPASS 0x20DE ++ ++#define IGP01E1000_ANALOG_SPARE_FUSE_ENABLED 0x0100 ++#define IGP01E1000_ANALOG_FUSE_FINE_MASK 0x0F80 ++#define IGP01E1000_ANALOG_FUSE_COARSE_MASK 0x0070 ++#define IGP01E1000_ANALOG_FUSE_COARSE_THRESH 0x0040 ++#define IGP01E1000_ANALOG_FUSE_COARSE_10 0x0010 ++#define IGP01E1000_ANALOG_FUSE_FINE_1 0x0080 ++#define IGP01E1000_ANALOG_FUSE_FINE_10 0x0500 ++#define IGP01E1000_ANALOG_FUSE_POLY_MASK 0xF000 ++#define IGP01E1000_ANALOG_FUSE_ENABLE_SW_CONTROL 0x0002 ++ ++#define IGP01E1000_MSE_CHANNEL_D 0x000F ++#define IGP01E1000_MSE_CHANNEL_C 0x00F0 ++#define IGP01E1000_MSE_CHANNEL_B 0x0F00 ++#define IGP01E1000_MSE_CHANNEL_A 0xF000 ++ ++#endif +--- linux/drivers/xenomai/net/drivers/experimental/e1000/e1000_80003es2lan.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/experimental/e1000/e1000_80003es2lan.h 2021-04-07 16:01:27.753633415 +0800 +@@ -0,0 +1,95 @@ ++/******************************************************************************* ++ ++ Intel PRO/1000 Linux driver ++ Copyright(c) 1999 - 2008 Intel Corporation. ++ ++ This program is free software; you can redistribute it and/or modify it ++ under the terms and conditions of the GNU General Public License, ++ version 2, as published by the Free Software Foundation. ++ ++ This program is distributed in the hope it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ You should have received a copy of the GNU General Public License along with ++ this program; if not, write to the Free Software Foundation, Inc., ++ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. ++ ++ The full GNU General Public License is included in this distribution in ++ the file called "COPYING". ++ ++ Contact Information: ++ Linux NICS ++ e1000-devel Mailing List ++ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 ++ ++*******************************************************************************/ ++ ++#ifndef _E1000_80003ES2LAN_H_ ++#define _E1000_80003ES2LAN_H_ ++ ++#define E1000_KMRNCTRLSTA_OFFSET_FIFO_CTRL 0x00 ++#define E1000_KMRNCTRLSTA_OFFSET_INB_CTRL 0x02 ++#define E1000_KMRNCTRLSTA_OFFSET_HD_CTRL 0x10 ++#define E1000_KMRNCTRLSTA_OFFSET_MAC2PHY_OPMODE 0x1F ++ ++#define E1000_KMRNCTRLSTA_FIFO_CTRL_RX_BYPASS 0x0008 ++#define E1000_KMRNCTRLSTA_FIFO_CTRL_TX_BYPASS 0x0800 ++#define E1000_KMRNCTRLSTA_INB_CTRL_DIS_PADDING 0x0010 ++ ++#define E1000_KMRNCTRLSTA_HD_CTRL_10_100_DEFAULT 0x0004 ++#define E1000_KMRNCTRLSTA_HD_CTRL_1000_DEFAULT 0x0000 ++#define E1000_KMRNCTRLSTA_OPMODE_E_IDLE 0x2000 ++ ++#define E1000_TCTL_EXT_GCEX_MASK 0x000FFC00 /* Gigabit Carry Extend Padding */ ++#define DEFAULT_TCTL_EXT_GCEX_80003ES2LAN 0x00010000 ++ ++#define DEFAULT_TIPG_IPGT_1000_80003ES2LAN 0x8 ++#define DEFAULT_TIPG_IPGT_10_100_80003ES2LAN 0x9 ++ ++/* GG82563 PHY Specific Status Register (Page 0, Register 16 */ ++#define GG82563_PSCR_POLARITY_REVERSAL_DISABLE 0x0002 /* 1=Reversal Disabled */ ++#define GG82563_PSCR_CROSSOVER_MODE_MASK 0x0060 ++#define GG82563_PSCR_CROSSOVER_MODE_MDI 0x0000 /* 00=Manual MDI */ ++#define GG82563_PSCR_CROSSOVER_MODE_MDIX 0x0020 /* 01=Manual MDIX */ ++#define GG82563_PSCR_CROSSOVER_MODE_AUTO 0x0060 /* 11=Auto crossover */ ++ ++/* PHY Specific Control Register 2 (Page 0, Register 26) */ ++#define GG82563_PSCR2_REVERSE_AUTO_NEG 0x2000 ++ /* 1=Reverse Auto-Negotiation */ ++ ++/* MAC Specific Control Register (Page 2, Register 21) */ ++/* Tx clock speed for Link Down and 1000BASE-T for the following speeds */ ++#define GG82563_MSCR_TX_CLK_MASK 0x0007 ++#define GG82563_MSCR_TX_CLK_10MBPS_2_5 0x0004 ++#define GG82563_MSCR_TX_CLK_100MBPS_25 0x0005 ++#define GG82563_MSCR_TX_CLK_1000MBPS_2_5 0x0006 ++#define GG82563_MSCR_TX_CLK_1000MBPS_25 0x0007 ++ ++#define GG82563_MSCR_ASSERT_CRS_ON_TX 0x0010 /* 1=Assert */ ++ ++/* DSP Distance Register (Page 5, Register 26) */ ++/* ++ * 0 = <50M ++ * 1 = 50-80M ++ * 2 = 80-100M ++ * 3 = 110-140M ++ * 4 = >140M ++ */ ++#define GG82563_DSPD_CABLE_LENGTH 0x0007 ++ ++/* Kumeran Mode Control Register (Page 193, Register 16) */ ++#define GG82563_KMCR_PASS_FALSE_CARRIER 0x0800 ++ ++/* Max number of times Kumeran read/write should be validated */ ++#define GG82563_MAX_KMRN_RETRY 0x5 ++ ++/* Power Management Control Register (Page 193, Register 20) */ ++#define GG82563_PMCR_ENABLE_ELECTRICAL_IDLE 0x0001 ++ /* 1=Enable SERDES Electrical Idle */ ++ ++/* In-Band Control Register (Page 194, Register 18) */ ++#define GG82563_ICR_DIS_PADDING 0x0010 /* Disable Padding */ ++ ++#endif +--- linux/drivers/xenomai/net/drivers/experimental/e1000/e1000_main.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/experimental/e1000/e1000_main.c 2021-04-07 16:01:27.749633420 +0800 +@@ -0,0 +1,5983 @@ ++/******************************************************************************* ++ ++ Intel PRO/1000 Linux driver ++ Copyright(c) 1999 - 2008 Intel Corporation. ++ ++ This program is free software; you can redistribute it and/or modify it ++ under the terms and conditions of the GNU General Public License, ++ version 2, as published by the Free Software Foundation. ++ ++ This program is distributed in the hope it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ You should have received a copy of the GNU General Public License along with ++ this program; if not, write to the Free Software Foundation, Inc., ++ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. ++ ++ The full GNU General Public License is included in this distribution in ++ the file called "COPYING". ++ ++ Contact Information: ++ Linux NICS ++ e1000-devel Mailing List ++ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 ++ ++*******************************************************************************/ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++ ++// RTNET defines... ++#ifdef NETIF_F_TSO ++#undef NETIF_F_TSO ++#endif ++ ++#ifdef NETIF_F_TSO6 ++#undef NETIF_F_TSO6 ++#endif ++ ++#ifdef NETIF_F_HW_VLAN_TX ++#undef NETIF_F_HW_VLAN_TX ++#endif ++ ++#ifdef CONFIG_E1000_NAPI ++#undef CONFIG_E1000_NAPI ++#endif ++ ++#ifdef MAX_SKB_FRAGS ++#undef MAX_SKB_FRAGS ++#endif ++ ++#ifndef CONFIG_E1000_DISABLE_PACKET_SPLIT ++#define CONFIG_E1000_DISABLE_PACKET_SPLIT ++#endif ++ ++#ifdef CONFIG_E1000_MQ ++#undef CONFIG_E1000_MQ ++#endif ++ ++#ifdef CONFIG_NET_POLL_CONTROLLER ++#undef CONFIG_NET_POLL_CONTROLLER ++#endif ++ ++#ifdef CONFIG_PM ++#undef CONFIG_PM ++#endif ++ ++#ifdef HAVE_PCI_ERS ++#error "STOP it here" ++#undef HAVE_PCI_ERS ++#endif ++ ++#ifdef USE_REBOOT_NOTIFIER ++#undef USE_REBOOT_NOTIFIER ++#endif ++ ++#ifdef HAVE_TX_TIMEOUT ++#undef HAVE_TX_TIMEOUT ++#endif ++ ++ ++#ifdef NETIF_F_TSO ++#include ++#ifdef NETIF_F_TSO6 ++#include ++#endif ++#endif ++#ifdef SIOCGMIIPHY ++#include ++#endif ++#ifdef SIOCETHTOOL ++#include ++#endif ++#ifdef NETIF_F_HW_VLAN_TX ++#include ++#endif ++#ifdef CONFIG_E1000_MQ ++#include ++#include ++#endif ++ ++#include "e1000.h" ++ ++#ifdef HAVE_PCI_ERS ++#error "STOP it here" ++#endif ++ ++ ++ ++char e1000_driver_name[MODULE_NAME_LEN] = "rt_e1000"; ++static char e1000_driver_string[] = "Intel(R) PRO/1000 Network Driver"; ++ ++#ifdef CONFIG_E1000_NAPI ++#define DRV_NAPI "-NAPI" ++#else ++#define DRV_NAPI ++#endif ++ ++ ++#define DRV_DEBUG ++ ++#define DRV_HW_PERF ++ ++/* ++ * Port to rtnet based on e1000 driver version 7.6.15.5 (22-Sep-2008 Mathias Koehrer) ++ * ++ * */ ++ ++#define DRV_VERSION "7.6.15.5" DRV_NAPI DRV_DEBUG DRV_HW_PERF " ported to RTnet" ++const char e1000_driver_version[] = DRV_VERSION; ++static const char e1000_copyright[] = "Copyright (c) 1999-2008 Intel Corporation."; ++ ++// RTNET wrappers ++#define kmalloc(a,b) rtdm_malloc(a) ++#define vmalloc(a) rtdm_malloc(a) ++#define kfree(a) rtdm_free(a) ++#define vfree(a) rtdm_free(a) ++#define skb_reserve(a,b) rtskb_reserve(a,b) ++#define net_device rtnet_device ++#define sk_buff rtskb ++#define netdev_priv(a) a->priv ++// ---------------------- ++ ++ ++ ++/* e1000_pci_tbl - PCI Device ID Table ++ * ++ * Last entry must be all 0s ++ * ++ * Macro expands to... ++ * {PCI_DEVICE(PCI_VENDOR_ID_INTEL, device_id)} ++ */ ++ ++#define PCI_ID_LIST_PCI \ ++ INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82542), \ ++ INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82543GC_FIBER), \ ++ INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82543GC_COPPER), \ ++ INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82544EI_COPPER), \ ++ INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82544EI_FIBER), \ ++ INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82544GC_COPPER), \ ++ INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82544GC_LOM), \ ++ INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82540EM), \ ++ INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82545EM_COPPER), \ ++ INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82546EB_COPPER), \ ++ INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82545EM_FIBER), \ ++ INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82546EB_FIBER), \ ++ INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82541EI), \ ++ INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82541ER_LOM), \ ++ INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82540EM_LOM), \ ++ INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82540EP_LOM), \ ++ INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82540EP), \ ++ INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82541EI_MOBILE), \ ++ INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82547EI), \ ++ INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82547EI_MOBILE), \ ++ INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82546EB_QUAD_COPPER), \ ++ INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82540EP_LP), \ ++ INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82545GM_COPPER), \ ++ INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82545GM_FIBER), \ ++ INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82545GM_SERDES), \ ++ INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82547GI), \ ++ INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82541GI), \ ++ INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82541GI_MOBILE), \ ++ INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82541ER), \ ++ INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82546GB_COPPER), \ ++ INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82546GB_FIBER), \ ++ INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82546GB_SERDES), \ ++ INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82541GI_LF), \ ++ INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82546GB_PCIE), \ ++ INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82546GB_QUAD_COPPER), \ ++ INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3) ++ ++#define PCI_ID_LIST_PCIE \ ++ INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_ICH8_IGP_M_AMT), \ ++ INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_ICH8_IGP_AMT), \ ++ INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_ICH8_IGP_C), \ ++ INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_ICH8_IFE), \ ++ INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_ICH8_IGP_M), \ ++ INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82571EB_COPPER), \ ++ INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82571EB_FIBER), \ ++ INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82571EB_SERDES), \ ++ INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82572EI_COPPER), \ ++ INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82572EI_FIBER), \ ++ INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82572EI_SERDES), \ ++ INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82573E), \ ++ INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82573E_IAMT), \ ++ INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_80003ES2LAN_COPPER_DPT), \ ++ INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_80003ES2LAN_SERDES_DPT), \ ++ INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82573L), \ ++ INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82571EB_QUAD_COPPER), \ ++ INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82571EB_QUAD_FIBER), \ ++ INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82571EB_SERDES_DUAL), \ ++ INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82571EB_SERDES_QUAD), \ ++ INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82572EI), \ ++ INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_80003ES2LAN_COPPER_SPT), \ ++ INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_80003ES2LAN_SERDES_SPT), \ ++ INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82571EB_QUAD_COPPER_LP), \ ++ INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82571PT_QUAD_COPPER), \ ++ INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_ICH8_IFE_GT), \ ++ INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_ICH8_IFE_G), \ ++ INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_ICH9_IGP_AMT), \ ++ INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_ICH9_IGP_C), \ ++ INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_ICH9_IFE), \ ++ INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_ICH9_IFE_G), \ ++ INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_ICH9_IFE_GT) ++ ++ ++ ++ ++static struct pci_device_id e1000_pci_tbl[] = { ++ PCI_ID_LIST_PCI, ++ PCI_ID_LIST_PCIE, ++ /* required last entry */ ++ {0,} ++}; ++MODULE_DEVICE_TABLE(pci, e1000_pci_tbl); ++ ++static struct pci_device_id e1000_pcipure_tbl[] = { ++ PCI_ID_LIST_PCI, ++ /* required last entry */ ++ {0,} ++}; ++ ++static struct pci_device_id e1000_pcie_tbl[] = { ++ PCI_ID_LIST_PCIE, ++ /* required last entry */ ++ {0,} ++}; ++ ++ ++ ++static int e1000_setup_tx_resources(struct e1000_adapter *adapter, ++ struct e1000_tx_ring *tx_ring); ++static int e1000_setup_rx_resources(struct e1000_adapter *adapter, ++ struct e1000_rx_ring *rx_ring); ++static void e1000_free_tx_resources(struct e1000_adapter *adapter, ++ struct e1000_tx_ring *tx_ring); ++static void e1000_free_rx_resources(struct e1000_adapter *adapter, ++ struct e1000_rx_ring *rx_ring); ++ ++static int e1000_init_module(void); ++static void e1000_exit_module(void); ++static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent); ++static void e1000_remove(struct pci_dev *pdev); ++static int e1000_alloc_queues(struct e1000_adapter *adapter); ++#ifdef CONFIG_E1000_MQ ++static void e1000_setup_queue_mapping(struct e1000_adapter *adapter); ++#endif ++static int e1000_sw_init(struct e1000_adapter *adapter); ++static int e1000_open(struct net_device *netdev); ++static int e1000_close(struct net_device *netdev); ++static void e1000_configure(struct e1000_adapter *adapter); ++static void e1000_configure_tx(struct e1000_adapter *adapter); ++static void e1000_configure_rx(struct e1000_adapter *adapter); ++static void e1000_setup_rctl(struct e1000_adapter *adapter); ++static void e1000_clean_all_tx_rings(struct e1000_adapter *adapter); ++static void e1000_clean_all_rx_rings(struct e1000_adapter *adapter); ++static void e1000_clean_tx_ring(struct e1000_adapter *adapter, ++ struct e1000_tx_ring *tx_ring); ++static void e1000_clean_rx_ring(struct e1000_adapter *adapter, ++ struct e1000_rx_ring *rx_ring); ++static void e1000_set_multi(struct net_device *netdev); ++static void e1000_update_phy_info_task(struct work_struct *work); ++static void e1000_watchdog_task(struct work_struct *work); ++static void e1000_82547_tx_fifo_stall_task(struct work_struct *work); ++static int e1000_xmit_frame_ring(struct sk_buff *skb, struct net_device *netdev, ++ struct e1000_tx_ring *tx_ring); ++static int e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev); ++#ifdef CONFIG_E1000_MQ ++static int e1000_subqueue_xmit_frame(struct sk_buff *skb, ++ struct net_device *netdev, int queue); ++#endif ++static void e1000_phy_read_status(struct e1000_adapter *adapter); ++#if 0 ++static struct net_device_stats * e1000_get_stats(struct net_device *netdev); ++static int e1000_change_mtu(struct net_device *netdev, int new_mtu); ++static int e1000_set_mac(struct net_device *netdev, void *p); ++#endif ++static int e1000_intr(rtdm_irq_t *irq_handle); ++static int e1000_intr_msi(rtdm_irq_t *irq_handle); ++static bool e1000_clean_tx_irq(struct e1000_adapter *adapter, ++ struct e1000_tx_ring *tx_ring); ++#ifdef CONFIG_E1000_NAPI ++static int e1000_poll(struct napi_struct *napi, int budget); ++static bool e1000_clean_rx_irq(struct e1000_adapter *adapter, ++ struct e1000_rx_ring *rx_ring, ++ int *work_done, int work_to_do); ++static bool e1000_clean_rx_irq_ps(struct e1000_adapter *adapter, ++ struct e1000_rx_ring *rx_ring, ++ int *work_done, int work_to_do); ++static bool e1000_clean_jumbo_rx_irq(struct e1000_adapter *adapter, ++ struct e1000_rx_ring *rx_ring, ++ int *work_done, int work_to_do); ++static void e1000_alloc_jumbo_rx_buffers(struct e1000_adapter *adapter, ++ struct e1000_rx_ring *rx_ring, ++ int cleaned_count); ++#else ++static bool e1000_clean_rx_irq(struct e1000_adapter *adapter, ++ struct e1000_rx_ring *rx_ring, ++ nanosecs_abs_t *time_stamp); ++static bool e1000_clean_rx_irq_ps(struct e1000_adapter *adapter, ++ struct e1000_rx_ring *rx_ring, ++ nanosecs_abs_t *time_stamp); ++#endif ++static void e1000_alloc_rx_buffers(struct e1000_adapter *adapter, ++ struct e1000_rx_ring *rx_ring, ++ int cleaned_count); ++static void e1000_alloc_rx_buffers_ps(struct e1000_adapter *adapter, ++ struct e1000_rx_ring *rx_ring, ++ int cleaned_count); ++#if 0 ++static int e1000_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd); ++#ifdef SIOCGMIIPHY ++static int e1000_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, ++ int cmd); ++static void e1000_enter_82542_rst(struct e1000_adapter *adapter); ++static void e1000_leave_82542_rst(struct e1000_adapter *adapter); ++static void e1000_tx_timeout(struct net_device *dev); ++#endif ++#endif ++static void e1000_reset_task(struct work_struct *work); ++static void e1000_smartspeed(struct e1000_adapter *adapter); ++static int e1000_82547_fifo_workaround(struct e1000_adapter *adapter, ++ struct sk_buff *skb); ++ ++#ifdef NETIF_F_HW_VLAN_TX ++static void e1000_vlan_rx_register(struct net_device *netdev, ++ struct vlan_group *grp); ++static void e1000_vlan_rx_add_vid(struct net_device *netdev, u16 vid); ++static void e1000_vlan_rx_kill_vid(struct net_device *netdev, u16 vid); ++static void e1000_restore_vlan(struct e1000_adapter *adapter); ++#endif ++ ++// static int e1000_suspend(struct pci_dev *pdev, pm_message_t state); ++#ifdef CONFIG_PM ++static int e1000_resume(struct pci_dev *pdev); ++#endif ++#ifndef USE_REBOOT_NOTIFIER ++// static void e1000_shutdown(struct pci_dev *pdev); ++#else ++static int e1000_notify_reboot(struct notifier_block *, unsigned long event, ++ void *ptr); ++static struct notifier_block e1000_notifier_reboot = { ++ .notifier_call = e1000_notify_reboot, ++ .next = NULL, ++ .priority = 0 ++}; ++#endif ++ ++#ifdef CONFIG_NET_POLL_CONTROLLER ++/* for netdump / net console */ ++static void e1000_netpoll (struct net_device *netdev); ++#endif ++ ++#define COPYBREAK_DEFAULT 256 ++static unsigned int copybreak __read_mostly = COPYBREAK_DEFAULT; ++module_param(copybreak, uint, 0644); ++MODULE_PARM_DESC(copybreak, ++ "Maximum size of packet that is copied to a new buffer on receive"); ++ ++ ++#ifdef HAVE_PCI_ERS ++static pci_ers_result_t e1000_io_error_detected(struct pci_dev *pdev, ++ pci_channel_state_t state); ++static pci_ers_result_t e1000_io_slot_reset(struct pci_dev *pdev); ++static void e1000_io_resume(struct pci_dev *pdev); ++ ++static struct pci_error_handlers e1000_err_handler = { ++ .error_detected = e1000_io_error_detected, ++ .slot_reset = e1000_io_slot_reset, ++ .resume = e1000_io_resume, ++}; ++#endif ++ ++static struct pci_driver e1000_driver = { ++ .name = e1000_driver_name, ++ .id_table = e1000_pci_tbl, ++ .probe = e1000_probe, ++ .remove = e1000_remove, ++#ifdef HAVE_PCI_ERS ++ .err_handler = &e1000_err_handler ++#endif ++}; ++ ++MODULE_AUTHOR("Intel Corporation, "); ++MODULE_DESCRIPTION("Intel(R) PRO/1000 Network Driver"); ++MODULE_LICENSE("GPL"); ++MODULE_VERSION(DRV_VERSION); ++ ++#define MAX_UNITS 8 ++static int cards[MAX_UNITS] = { [0 ... (MAX_UNITS-1)] = 1 }; ++module_param_array(cards, int, NULL, 0444); ++MODULE_PARM_DESC(cards, "array of cards to be supported (eg. 1,0,1)"); ++ ++ ++static int local_debug = NETIF_MSG_DRV | NETIF_MSG_PROBE; ++module_param(local_debug, int, 0); ++MODULE_PARM_DESC(local_debug, "Debug level (0=none,...,16=all)"); ++ ++/* The parameter 'pciif' might be used to use this driver for ++ * PCI or PCIe only NICs. ++ * This allows to reflect the situation that newer Linux kernels ++ * have two different (non real time) drivers for the e1000: ++ * e1000 for PCI only ++ * e1000e for PCIe only ++ * ++ * Using the 'pciif' parameter allows to load the driver ++ * modprobe rt_e1000 pciif=pci ++ * to use it as PCI only ++ * and a ++ * modprobe rt_e1000 -o rt_e1000e pciif=pcie ++ * allows to load a second instance of this driver named 'rt_e1000e' ++ * ++ * If the 'pciif' paramter is not specified, all (PCI and PCIe) e1000 ++ * NICs will be used. ++ * */ ++static char *pciif = "all"; ++module_param(pciif, charp, 0); ++MODULE_PARM_DESC(pciif, "PCI Interface: 'all' (default), 'pci', 'pcie'"); ++ ++ ++//#define register_netdev(a) rt_register_rtnetdev(a) ++//#define unregister_netdev(a) rt_unregister_rtnetdev(a) ++//#define free_netdev(a) rtdev_free(a) ++//#define netif_stop_queue(a) rtnetif_stop_queue(a) ++ ++/** ++ * e1000_init_module - Driver Registration Routine ++ * ++ * e1000_init_module is the first routine called when the driver is ++ * loaded. All it does is register with the PCI subsystem. ++ **/ ++static int __init e1000_init_module(void) ++{ ++ int ret; ++ strcpy(e1000_driver_name, THIS_MODULE->name); ++ printk(KERN_INFO "%s - %s version %s (pciif: %s)\n", ++ e1000_driver_string, e1000_driver_name, e1000_driver_version, pciif); ++ ++ printk(KERN_INFO "%s\n", e1000_copyright); ++ ++ ++ if (0 == strcmp(pciif, "pcie")) ++ { ++ // PCIe only ++ e1000_driver.id_table = e1000_pcie_tbl; ++ } ++ else if (0 == strcmp(pciif, "pci")) ++ { ++ // PCI only ++ e1000_driver.id_table = e1000_pcipure_tbl; ++ } ++ ++ ret = pci_register_driver(&e1000_driver); ++#ifdef USE_REBOOT_NOTIFIER ++ if (ret >= 0) { ++ register_reboot_notifier(&e1000_notifier_reboot); ++ } ++#endif ++ if (copybreak != COPYBREAK_DEFAULT) { ++ if (copybreak == 0) ++ printk(KERN_INFO "e1000: copybreak disabled\n"); ++ else ++ printk(KERN_INFO "e1000: copybreak enabled for " ++ "packets <= %u bytes\n", copybreak); ++ } ++ return ret; ++} ++ ++module_init(e1000_init_module); ++ ++/** ++ * e1000_exit_module - Driver Exit Cleanup Routine ++ * ++ * e1000_exit_module is called just before the driver is removed ++ * from memory. ++ **/ ++static void __exit e1000_exit_module(void) ++{ ++#ifdef USE_REBOOT_NOTIFIER ++ unregister_reboot_notifier(&e1000_notifier_reboot); ++#endif ++ pci_unregister_driver(&e1000_driver); ++} ++ ++module_exit(e1000_exit_module); ++ ++static int e1000_request_irq(struct e1000_adapter *adapter) ++{ ++ struct net_device *netdev = adapter->netdev; ++ int err = 0; ++ ++ if (adapter->flags & E1000_FLAG_HAS_MSI) { ++ err = pci_enable_msi(adapter->pdev); ++ if (!err) ++ adapter->flags |= E1000_FLAG_MSI_ENABLED; ++ } ++ rt_stack_connect(netdev, &STACK_manager); ++ if (adapter->flags & E1000_FLAG_MSI_ENABLED) { ++ err = rtdm_irq_request(&adapter->irq_handle, adapter->pdev->irq, e1000_intr_msi, ++ 0, netdev->name, netdev); ++ if (!err) { ++ return err; ++ } else { ++ adapter->flags &= ~E1000_FLAG_MSI_ENABLED; ++ pci_disable_msi(adapter->pdev); ++ } ++ } ++ err = rtdm_irq_request(&adapter->irq_handle, adapter->pdev->irq, ++ e1000_intr, RTDM_IRQTYPE_SHARED, netdev->name, ++ netdev); ++ if (err) ++ DPRINTK(PROBE, ERR, "Unable to allocate interrupt Error: %d\n", ++ err); ++ ++ return err; ++} ++ ++static void e1000_free_irq(struct e1000_adapter *adapter) ++{ ++ // struct net_device *netdev = adapter->netdev; ++ ++ rtdm_irq_free(&adapter->irq_handle); ++ ++ if (adapter->flags & E1000_FLAG_MSI_ENABLED) { ++ pci_disable_msi(adapter->pdev); ++ adapter->flags &= ~E1000_FLAG_MSI_ENABLED; ++ } ++} ++ ++/** ++ * e1000_irq_disable - Mask off interrupt generation on the NIC ++ * @adapter: board private structure ++ **/ ++static void e1000_irq_disable(struct e1000_adapter *adapter) ++{ ++ atomic_inc(&adapter->irq_sem); ++ E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0); ++ E1000_WRITE_FLUSH(&adapter->hw); ++ synchronize_irq(adapter->pdev->irq); ++} ++ ++/** ++ * e1000_irq_enable - Enable default interrupt generation settings ++ * @adapter: board private structure ++ **/ ++ ++static void e1000_irq_enable(struct e1000_adapter *adapter) ++{ ++ if (likely(atomic_dec_and_test(&adapter->irq_sem))) { ++ E1000_WRITE_REG(&adapter->hw, E1000_IMS, IMS_ENABLE_MASK); ++ E1000_WRITE_FLUSH(&adapter->hw); ++ } ++} ++#ifdef NETIF_F_HW_VLAN_TX ++ ++static void e1000_update_mng_vlan(struct e1000_adapter *adapter) ++{ ++ struct net_device *netdev = adapter->netdev; ++ u16 vid = adapter->hw.mng_cookie.vlan_id; ++ u16 old_vid = adapter->mng_vlan_id; ++ if (adapter->vlgrp) { ++ if (!vlan_group_get_device(adapter->vlgrp, vid)) { ++ if (adapter->hw.mng_cookie.status & ++ E1000_MNG_DHCP_COOKIE_STATUS_VLAN) { ++ e1000_vlan_rx_add_vid(netdev, vid); ++ adapter->mng_vlan_id = vid; ++ } else { ++ adapter->mng_vlan_id = E1000_MNG_VLAN_NONE; ++ } ++ ++ if ((old_vid != (u16)E1000_MNG_VLAN_NONE) && ++ (vid != old_vid) && ++ !vlan_group_get_device(adapter->vlgrp, old_vid)) ++ e1000_vlan_rx_kill_vid(netdev, old_vid); ++ } else { ++ adapter->mng_vlan_id = vid; ++ } ++ } ++} ++#endif ++ ++/** ++ * e1000_release_hw_control - release control of the h/w to f/w ++ * @adapter: address of board private structure ++ * ++ * e1000_release_hw_control resets {CTRL_EXT|SWSM}:DRV_LOAD bit. ++ * For ASF and Pass Through versions of f/w this means that the ++ * driver is no longer loaded. For AMT version (only with 82573) i ++ * of the f/w this means that the network i/f is closed. ++ * ++ **/ ++static void e1000_release_hw_control(struct e1000_adapter *adapter) ++{ ++ u32 ctrl_ext; ++ u32 swsm; ++ ++ /* Let firmware taken over control of h/w */ ++ switch (adapter->hw.mac.type) { ++ case e1000_82573: ++ swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM); ++ E1000_WRITE_REG(&adapter->hw, E1000_SWSM, ++ swsm & ~E1000_SWSM_DRV_LOAD); ++ break; ++ case e1000_82571: ++ case e1000_82572: ++ case e1000_80003es2lan: ++ case e1000_ich8lan: ++ case e1000_ich9lan: ++ ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); ++ E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ++ ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD); ++ break; ++ default: ++ break; ++ } ++} ++ ++/** ++ * e1000_get_hw_control - get control of the h/w from f/w ++ * @adapter: address of board private structure ++ * ++ * e1000_get_hw_control sets {CTRL_EXT|SWSM}:DRV_LOAD bit. ++ * For ASF and Pass Through versions of f/w this means that ++ * the driver is loaded. For AMT version (only with 82573) ++ * of the f/w this means that the network i/f is open. ++ * ++ **/ ++static void e1000_get_hw_control(struct e1000_adapter *adapter) ++{ ++ u32 ctrl_ext; ++ u32 swsm; ++ ++ /* Let firmware know the driver has taken over */ ++ switch (adapter->hw.mac.type) { ++ case e1000_82573: ++ swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM); ++ E1000_WRITE_REG(&adapter->hw, E1000_SWSM, ++ swsm | E1000_SWSM_DRV_LOAD); ++ break; ++ case e1000_82571: ++ case e1000_82572: ++ case e1000_80003es2lan: ++ case e1000_ich8lan: ++ case e1000_ich9lan: ++ ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); ++ E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ++ ctrl_ext | E1000_CTRL_EXT_DRV_LOAD); ++ break; ++ default: ++ break; ++ } ++} ++ ++static void e1000_init_manageability(struct e1000_adapter *adapter) ++{ ++} ++ ++static void e1000_release_manageability(struct e1000_adapter *adapter) ++{ ++} ++ ++/** ++ * e1000_configure - configure the hardware for RX and TX ++ * @adapter: private board structure ++ **/ ++static void e1000_configure(struct e1000_adapter *adapter) ++{ ++ struct net_device *netdev = adapter->netdev; ++ int i; ++ ++ e1000_set_multi(netdev); ++ ++#ifdef NETIF_F_HW_VLAN_TX ++ e1000_restore_vlan(adapter); ++#endif ++ e1000_init_manageability(adapter); ++ ++ e1000_configure_tx(adapter); ++ e1000_setup_rctl(adapter); ++ e1000_configure_rx(adapter); ++ /* call E1000_DESC_UNUSED which always leaves ++ * at least 1 descriptor unused to make sure ++ * next_to_use != next_to_clean */ ++ for (i = 0; i < adapter->num_rx_queues; i++) { ++ struct e1000_rx_ring *ring = &adapter->rx_ring[i]; ++ adapter->alloc_rx_buf(adapter, ring, ++ E1000_DESC_UNUSED(ring)); ++ } ++ ++#ifdef CONFIG_E1000_MQ ++ e1000_setup_queue_mapping(adapter); ++#endif ++ ++ // adapter->tx_queue_len = netdev->tx_queue_len; ++} ++ ++static void e1000_napi_enable_all(struct e1000_adapter *adapter) ++{ ++#ifdef CONFIG_E1000_NAPI ++ int i; ++ for (i = 0; i < adapter->num_rx_queues; i++) ++ napi_enable(&adapter->rx_ring[i].napi); ++#endif ++} ++ ++static void e1000_napi_disable_all(struct e1000_adapter *adapter) ++{ ++#ifdef CONFIG_E1000_NAPI ++ int i; ++ for (i = 0; i < adapter->num_rx_queues; i++) ++ napi_disable(&adapter->rx_ring[i].napi); ++#endif ++} ++ ++int e1000_up(struct e1000_adapter *adapter) ++{ ++ /* hardware has been reset, we need to reload some things */ ++ e1000_configure(adapter); ++ ++ clear_bit(__E1000_DOWN, &adapter->state); ++ ++ e1000_napi_enable_all(adapter); ++ ++ e1000_irq_enable(adapter); ++ ++ /* fire a link change interrupt to start the watchdog */ ++ // E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC); ++ return 0; ++} ++ ++static void e1000_down_and_stop(struct e1000_adapter *adapter) ++{ ++ /* signal that we're down so the interrupt handler does not ++ * reschedule our watchdog timer */ ++ set_bit(__E1000_DOWN, &adapter->state); ++ ++ cancel_work_sync(&adapter->reset_task); ++ cancel_delayed_work_sync(&adapter->watchdog_task); ++ cancel_delayed_work_sync(&adapter->phy_info_task); ++ cancel_delayed_work_sync(&adapter->fifo_stall_task); ++} ++ ++void e1000_down(struct e1000_adapter *adapter) ++{ ++ struct net_device *netdev = adapter->netdev; ++ u32 tctl, rctl; ++ ++ e1000_down_and_stop(adapter); ++ ++ /* disable receives in the hardware */ ++ rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); ++ E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); ++ /* flush and sleep below */ ++ ++#ifdef NETIF_F_LLTX ++ rtnetif_stop_queue(netdev); ++#else ++ rtnetif_tx_disable(netdev); ++#endif ++ ++ /* disable transmits in the hardware */ ++ tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL); ++ tctl &= ~E1000_TCTL_EN; ++ E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl); ++ /* flush both disables and wait for them to finish */ ++ E1000_WRITE_FLUSH(&adapter->hw); ++ msleep(10); ++ ++ e1000_napi_disable_all(adapter); ++ ++ e1000_irq_disable(adapter); ++ ++ // netdev->tx_queue_len = adapter->tx_queue_len; ++ rtnetif_carrier_off(netdev); ++ adapter->link_speed = 0; ++ adapter->link_duplex = 0; ++ ++ e1000_reset(adapter); ++ e1000_clean_all_tx_rings(adapter); ++ e1000_clean_all_rx_rings(adapter); ++} ++ ++void e1000_reinit_locked(struct e1000_adapter *adapter) ++{ ++ WARN_ON(in_interrupt()); ++ while (test_and_set_bit(__E1000_RESETTING, &adapter->state)) ++ msleep(1); ++ e1000_down(adapter); ++ e1000_up(adapter); ++ clear_bit(__E1000_RESETTING, &adapter->state); ++} ++ ++void e1000_reset(struct e1000_adapter *adapter) ++{ ++ struct e1000_mac_info *mac = &adapter->hw.mac; ++ struct e1000_fc_info *fc = &adapter->hw.fc; ++ u32 pba = 0, tx_space, min_tx_space, min_rx_space; ++ bool legacy_pba_adjust = FALSE; ++ u16 hwm; ++ ++ /* Repartition Pba for greater than 9k mtu ++ * To take effect CTRL.RST is required. ++ */ ++ ++ switch (mac->type) { ++ case e1000_82542: ++ case e1000_82543: ++ case e1000_82544: ++ case e1000_82540: ++ case e1000_82541: ++ case e1000_82541_rev_2: ++ legacy_pba_adjust = TRUE; ++ pba = E1000_PBA_48K; ++ break; ++ case e1000_82545: ++ case e1000_82545_rev_3: ++ case e1000_82546: ++ case e1000_82546_rev_3: ++ pba = E1000_PBA_48K; ++ break; ++ case e1000_82547: ++ case e1000_82547_rev_2: ++ legacy_pba_adjust = TRUE; ++ pba = E1000_PBA_30K; ++ break; ++ case e1000_82571: ++ case e1000_82572: ++ case e1000_80003es2lan: ++ pba = E1000_PBA_38K; ++ break; ++ case e1000_82573: ++ pba = E1000_PBA_20K; ++ break; ++ case e1000_ich8lan: ++ pba = E1000_PBA_8K; ++ break; ++ case e1000_ich9lan: ++#define E1000_PBA_10K 0x000A ++ pba = E1000_PBA_10K; ++ break; ++ case e1000_undefined: ++ case e1000_num_macs: ++ break; ++ } ++ ++ if (legacy_pba_adjust == TRUE) { ++ if (adapter->max_frame_size > E1000_RXBUFFER_8192) ++ pba -= 8; /* allocate more FIFO for Tx */ ++ ++ if (mac->type == e1000_82547) { ++ adapter->tx_fifo_head = 0; ++ adapter->tx_head_addr = pba << E1000_TX_HEAD_ADDR_SHIFT; ++ adapter->tx_fifo_size = ++ (E1000_PBA_40K - pba) << E1000_PBA_BYTES_SHIFT; ++ atomic_set(&adapter->tx_fifo_stall, 0); ++ } ++ } else if (adapter->max_frame_size > ETH_FRAME_LEN + ETHERNET_FCS_SIZE) { ++ /* adjust PBA for jumbo frames */ ++ E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba); ++ ++ /* To maintain wire speed transmits, the Tx FIFO should be ++ * large enough to accommodate two full transmit packets, ++ * rounded up to the next 1KB and expressed in KB. Likewise, ++ * the Rx FIFO should be large enough to accommodate at least ++ * one full receive packet and is similarly rounded up and ++ * expressed in KB. */ ++ pba = E1000_READ_REG(&adapter->hw, E1000_PBA); ++ /* upper 16 bits has Tx packet buffer allocation size in KB */ ++ tx_space = pba >> 16; ++ /* lower 16 bits has Rx packet buffer allocation size in KB */ ++ pba &= 0xffff; ++ /* the tx fifo also stores 16 bytes of information about the tx ++ * but don't include ethernet FCS because hardware appends it */ ++ min_tx_space = (adapter->max_frame_size + ++ sizeof(struct e1000_tx_desc) - ++ ETHERNET_FCS_SIZE) * 2; ++ min_tx_space = ALIGN(min_tx_space, 1024); ++ min_tx_space >>= 10; ++ /* software strips receive CRC, so leave room for it */ ++ min_rx_space = adapter->max_frame_size; ++ min_rx_space = ALIGN(min_rx_space, 1024); ++ min_rx_space >>= 10; ++ ++ /* If current Tx allocation is less than the min Tx FIFO size, ++ * and the min Tx FIFO size is less than the current Rx FIFO ++ * allocation, take space away from current Rx allocation */ ++ if (tx_space < min_tx_space && ++ ((min_tx_space - tx_space) < pba)) { ++ pba = pba - (min_tx_space - tx_space); ++ ++ /* PCI/PCIx hardware has PBA alignment constraints */ ++ switch (mac->type) { ++ case e1000_82545 ... e1000_82546_rev_3: ++ pba &= ~(E1000_PBA_8K - 1); ++ break; ++ default: ++ break; ++ } ++ ++ /* if short on rx space, rx wins and must trump tx ++ * adjustment or use Early Receive if available */ ++ if (pba < min_rx_space) { ++ switch (mac->type) { ++ case e1000_82573: ++ case e1000_ich9lan: ++ /* ERT enabled in e1000_configure_rx */ ++ break; ++ default: ++ pba = min_rx_space; ++ break; ++ } ++ } ++ } ++ } ++ ++ E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba); ++ ++ /* flow control settings */ ++ /* The high water mark must be low enough to fit one full frame ++ * (or the size used for early receive) above it in the Rx FIFO. ++ * Set it to the lower of: ++ * - 90% of the Rx FIFO size, and ++ * - the full Rx FIFO size minus the early receive size (for parts ++ * with ERT support assuming ERT set to E1000_ERT_2048), or ++ * - the full Rx FIFO size minus one full frame */ ++ hwm = min(((pba << 10) * 9 / 10), ++ ((mac->type == e1000_82573 || mac->type == e1000_ich9lan) ? ++ (u16)((pba << 10) - (E1000_ERT_2048 << 3)) : ++ ((pba << 10) - adapter->max_frame_size))); ++ ++ fc->high_water = hwm & 0xFFF8; /* 8-byte granularity */ ++ fc->low_water = fc->high_water - 8; ++ ++ if (mac->type == e1000_80003es2lan) ++ fc->pause_time = 0xFFFF; ++ else ++ fc->pause_time = E1000_FC_PAUSE_TIME; ++ fc->send_xon = 1; ++ fc->type = fc->original_type; ++ ++ /* Allow time for pending master requests to run */ ++ e1000_reset_hw(&adapter->hw); ++ ++ /* For 82573 and ICHx if AMT is enabled, let the firmware know ++ * that the network interface is in control */ ++ if (((adapter->hw.mac.type == e1000_82573) || ++ (adapter->hw.mac.type == e1000_ich8lan) || ++ (adapter->hw.mac.type == e1000_ich9lan)) && ++ e1000_check_mng_mode(&adapter->hw)) ++ e1000_get_hw_control(adapter); ++ ++ if (mac->type >= e1000_82544) ++ E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0); ++ ++ if (e1000_init_hw(&adapter->hw)) ++ DPRINTK(PROBE, ERR, "Hardware Error\n"); ++#ifdef NETIF_F_HW_VLAN_TX ++ e1000_update_mng_vlan(adapter); ++#endif ++ /* if (adapter->hwflags & HWFLAGS_PHY_PWR_BIT) { */ ++ if (mac->type >= e1000_82544 && ++ mac->type <= e1000_82547_rev_2 && ++ mac->autoneg == 1 && ++ adapter->hw.phy.autoneg_advertised == ADVERTISE_1000_FULL) { ++ u32 ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL); ++ /* clear phy power management bit if we are in gig only mode, ++ * which if enabled will attempt negotiation to 100Mb, which ++ * can cause a loss of link at power off or driver unload */ ++ ctrl &= ~E1000_CTRL_SWDPIN3; ++ E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl); ++ } ++ ++#if defined(CONFIG_PPC64) || defined(CONFIG_PPC) ++#define E1000_GCR_DISABLE_TIMEOUT_MECHANISM 0x80000000 ++ if (adapter->hw.mac.type == e1000_82571) { ++ /* work around pSeries hardware by disabling timeouts */ ++ u32 gcr = E1000_READ_REG(&adapter->hw, E1000_GCR); ++ gcr |= E1000_GCR_DISABLE_TIMEOUT_MECHANISM; ++ E1000_WRITE_REG(&adapter->hw, E1000_GCR, gcr); ++ } ++#endif ++ ++ /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */ ++ E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERNET_IEEE_VLAN_TYPE); ++ ++ e1000_reset_adaptive(&adapter->hw); ++ e1000_get_phy_info(&adapter->hw); ++ ++ if (!(adapter->flags & E1000_FLAG_SMART_POWER_DOWN) && ++ (mac->type == e1000_82571 || mac->type == e1000_82572)) { ++ u16 phy_data = 0; ++ /* speed up time to link by disabling smart power down, ignore ++ * the return value of this function because there is nothing ++ * different we would do if it failed */ ++ e1000_read_phy_reg(&adapter->hw, IGP02E1000_PHY_POWER_MGMT, ++ &phy_data); ++ phy_data &= ~IGP02E1000_PM_SPD; ++ e1000_write_phy_reg(&adapter->hw, IGP02E1000_PHY_POWER_MGMT, ++ phy_data); ++ } ++ ++ e1000_release_manageability(adapter); ++} ++ ++/** ++ * e1000_probe - Device Initialization Routine ++ * @pdev: PCI device information struct ++ * @ent: entry in e1000_pci_tbl ++ * ++ * Returns 0 on success, negative on failure ++ * ++ * e1000_probe initializes an adapter identified by a pci_dev structure. ++ * The OS initialization, configuring of the adapter private structure, ++ * and a hardware reset occur. ++ **/ ++static int e1000_probe(struct pci_dev *pdev, ++ const struct pci_device_id *ent) ++{ ++ struct net_device *netdev; ++ struct e1000_adapter *adapter; ++ ++ static int cards_found = 0; ++ static int global_quad_port_a = 0; /* global ksp3 port a indication */ ++ int i, err, pci_using_dac; ++ u16 eeprom_data = 0; ++ u16 eeprom_apme_mask = E1000_EEPROM_APME; ++ ++ if (cards[cards_found++] == 0) ++ { ++ return -ENODEV; ++ } ++ ++ if ((err = pci_enable_device(pdev))) ++ return err; ++ ++ if (!(err = pci_set_dma_mask(pdev, DMA_64BIT_MASK)) && ++ !(err = pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK))) { ++ pci_using_dac = 1; ++ } else { ++ if ((err = pci_set_dma_mask(pdev, DMA_32BIT_MASK)) && ++ (err = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK))) { ++ E1000_ERR("No usable DMA configuration, aborting\n"); ++ goto err_dma; ++ } ++ pci_using_dac = 0; ++ } ++ ++ if ((err = pci_request_regions(pdev, e1000_driver_name))) ++ goto err_pci_reg; ++ ++ pci_set_master(pdev); ++ ++ err = -ENOMEM; ++#ifdef CONFIG_E1000_MQ ++ netdev = rt_alloc_etherdev(sizeof(struct e1000_adapter) + ++ (sizeof(struct net_device_subqueue) * ++ E1000_MAX_TX_QUEUES), 16); ++#else ++ netdev = rt_alloc_etherdev(sizeof(struct e1000_adapter), ++ 2 * E1000_DEFAULT_RXD + E1000_DEFAULT_TXD); ++#endif ++ if (!netdev) ++ goto err_alloc_etherdev; ++ ++ memset(netdev->priv, 0, sizeof(struct e1000_adapter)); ++ rt_rtdev_connect(netdev, &RTDEV_manager); ++ ++ // SET_NETDEV_DEV(netdev, &pdev->dev); ++ netdev->vers = RTDEV_VERS_2_0; ++ ++ pci_set_drvdata(pdev, netdev); ++ adapter = netdev->priv; ++ adapter->netdev = netdev; ++ adapter->pdev = pdev; ++ adapter->hw.back = adapter; ++ adapter->msg_enable = (1 << local_debug) - 1; ++ ++ err = -EIO; ++ adapter->hw.hw_addr = ioremap(pci_resource_start(pdev, BAR_0), ++ pci_resource_len(pdev, BAR_0)); ++ if (!adapter->hw.hw_addr) ++ goto err_ioremap; ++ ++ for (i = BAR_1; i <= BAR_5; i++) { ++ if (pci_resource_len(pdev, i) == 0) ++ continue; ++ if (pci_resource_flags(pdev, i) & IORESOURCE_IO) { ++ adapter->hw.io_base = pci_resource_start(pdev, i); ++ break; ++ } ++ } ++ ++ netdev->open = &e1000_open; ++ netdev->stop = &e1000_close; ++ netdev->hard_start_xmit = &e1000_xmit_frame; ++#ifdef CONFIG_E1000_MQ ++ netdev->hard_start_subqueue_xmit = &e1000_subqueue_xmit_frame; ++#endif ++#ifdef HAVE_TX_TIMEOUT ++ netdev->tx_timeout = &e1000_tx_timeout; ++ netdev->watchdog_timeo = 5 * HZ; ++#endif ++#ifdef NETIF_F_HW_VLAN_TX ++ netdev->vlan_rx_register = e1000_vlan_rx_register; ++ netdev->vlan_rx_add_vid = e1000_vlan_rx_add_vid; ++ netdev->vlan_rx_kill_vid = e1000_vlan_rx_kill_vid; ++#endif ++#ifdef CONFIG_NET_POLL_CONTROLLER ++ netdev->poll_controller = e1000_netpoll; ++#endif ++ strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1); ++ ++ adapter->bd_number = cards_found; ++ ++ /* setup the private structure */ ++ if ((err = e1000_sw_init(adapter))) ++ goto err_sw_init; ++ ++ err = -EIO; ++ /* Flash BAR mapping must happen after e1000_sw_init ++ * because it depends on mac.type */ ++ if (((adapter->hw.mac.type == e1000_ich8lan) || ++ (adapter->hw.mac.type == e1000_ich9lan)) && ++ (pci_resource_flags(pdev, 1) & IORESOURCE_MEM)) { ++ adapter->hw.flash_address = ioremap(pci_resource_start(pdev, 1), ++ pci_resource_len(pdev, 1)); ++ if (!adapter->hw.flash_address) ++ goto err_flashmap; ++ } ++ ++ if ((err = e1000_init_mac_params(&adapter->hw))) ++ goto err_hw_init; ++ ++ if ((err = e1000_init_nvm_params(&adapter->hw))) ++ goto err_hw_init; ++ ++ if ((err = e1000_init_phy_params(&adapter->hw))) ++ goto err_hw_init; ++ ++ e1000_get_bus_info(&adapter->hw); ++ ++ e1000_init_script_state_82541(&adapter->hw, TRUE); ++ e1000_set_tbi_compatibility_82543(&adapter->hw, TRUE); ++ ++ adapter->hw.phy.autoneg_wait_to_complete = FALSE; ++ adapter->hw.mac.adaptive_ifs = FALSE; ++ ++ /* Copper options */ ++ ++ if (adapter->hw.phy.media_type == e1000_media_type_copper) { ++ adapter->hw.phy.mdix = AUTO_ALL_MODES; ++ adapter->hw.phy.disable_polarity_correction = FALSE; ++ adapter->hw.phy.ms_type = E1000_MASTER_SLAVE; ++ } ++ ++ if (e1000_check_reset_block(&adapter->hw)) ++ DPRINTK(PROBE, INFO, "PHY reset is blocked due to SOL/IDER session.\n"); ++ ++#ifdef MAX_SKB_FRAGS ++ if (adapter->hw.mac.type >= e1000_82543) { ++#ifdef NETIF_F_HW_VLAN_TX ++ netdev->features = NETIF_F_SG | ++ NETIF_F_HW_CSUM | ++ NETIF_F_HW_VLAN_TX | ++ NETIF_F_HW_VLAN_RX | ++ NETIF_F_HW_VLAN_FILTER; ++ if ((adapter->hw.mac.type == e1000_ich8lan) || ++ (adapter->hw.mac.type == e1000_ich9lan)) ++ netdev->features &= ~NETIF_F_HW_VLAN_FILTER; ++#else ++ netdev->features = NETIF_F_SG | NETIF_F_HW_CSUM; ++#endif ++ } ++ ++#ifdef NETIF_F_TSO ++ if ((adapter->hw.mac.type >= e1000_82544) && ++ (adapter->hw.mac.type != e1000_82547)) { ++ adapter->flags |= E1000_FLAG_HAS_TSO; ++ netdev->features |= NETIF_F_TSO; ++ } ++ ++#ifdef NETIF_F_TSO6 ++ if (adapter->hw.mac.type > e1000_82547_rev_2) { ++ adapter->flags |= E1000_FLAG_HAS_TSO6; ++ netdev->features |= NETIF_F_TSO6; ++ } ++#endif ++#endif ++ if (pci_using_dac) ++ netdev->features |= NETIF_F_HIGHDMA; ++ ++#endif ++#ifdef NETIF_F_LLTX ++ netdev->features |= NETIF_F_LLTX; ++#endif ++ ++ /* Hardware features, flags and workarounds */ ++ if (adapter->hw.mac.type >= e1000_82571) { ++ adapter->flags |= E1000_FLAG_INT_ASSERT_AUTO_MASK; ++ adapter->flags |= E1000_FLAG_HAS_MSI; ++ adapter->flags |= E1000_FLAG_HAS_MANC2H; ++ } ++ ++ if (adapter->hw.mac.type >= e1000_82540) { ++ adapter->flags |= E1000_FLAG_HAS_SMBUS; ++ adapter->flags |= E1000_FLAG_HAS_INTR_MODERATION; ++ } ++ ++ if (adapter->hw.mac.type == e1000_82543) ++ adapter->flags |= E1000_FLAG_BAD_TX_CARRIER_STATS_FD; ++ ++ /* In rare occasions, ESB2 systems would end up started without ++ * the RX unit being turned on. */ ++ if (adapter->hw.mac.type == e1000_80003es2lan) ++ adapter->flags |= E1000_FLAG_RX_NEEDS_RESTART; ++ ++ adapter->en_mng_pt = e1000_enable_mng_pass_thru(&adapter->hw); ++ ++ /* before reading the NVM, reset the controller to ++ * put the device in a known good starting state */ ++ ++ e1000_reset_hw(&adapter->hw); ++ ++ /* make sure we don't intercept ARP packets until we're up */ ++ e1000_release_manageability(adapter); ++ ++ /* make sure the NVM is good */ ++ ++ if (e1000_validate_nvm_checksum(&adapter->hw) < 0) { ++ DPRINTK(PROBE, ERR, "The NVM Checksum Is Not Valid\n"); ++ err = -EIO; ++ goto err_eeprom; ++ } ++ ++ /* copy the MAC address out of the NVM */ ++ ++ if (e1000_read_mac_addr(&adapter->hw)) ++ DPRINTK(PROBE, ERR, "NVM Read Error\n"); ++ memcpy(netdev->dev_addr, adapter->hw.mac.addr, netdev->addr_len); ++#ifdef ETHTOOL_GPERMADDR ++ memcpy(netdev->perm_addr, adapter->hw.mac.addr, netdev->addr_len); ++ ++ if (!is_valid_ether_addr(netdev->perm_addr)) { ++#else ++ if (!is_valid_ether_addr(netdev->dev_addr)) { ++#endif ++ DPRINTK(PROBE, ERR, "Invalid MAC Address\n"); ++ err = -EIO; ++ goto err_eeprom; ++ } ++ ++ INIT_DELAYED_WORK(&adapter->watchdog_task, e1000_watchdog_task); ++ INIT_DELAYED_WORK(&adapter->fifo_stall_task, ++ e1000_82547_tx_fifo_stall_task); ++ INIT_DELAYED_WORK(&adapter->phy_info_task, e1000_update_phy_info_task); ++ INIT_WORK(&adapter->reset_task, e1000_reset_task); ++ ++ e1000_check_options(adapter); ++ ++ /* Initial Wake on LAN setting ++ * If APM wake is enabled in the EEPROM, ++ * enable the ACPI Magic Packet filter ++ */ ++ ++ switch (adapter->hw.mac.type) { ++ case e1000_82542: ++ case e1000_82543: ++ break; ++ case e1000_82544: ++ e1000_read_nvm(&adapter->hw, ++ NVM_INIT_CONTROL2_REG, 1, &eeprom_data); ++ eeprom_apme_mask = E1000_EEPROM_82544_APM; ++ break; ++ case e1000_ich8lan: ++ case e1000_ich9lan: ++ /* APME bit in EEPROM is mapped to WUC.APME */ ++ eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC); ++ eeprom_apme_mask = E1000_WUC_APME; ++ break; ++ case e1000_82546: ++ case e1000_82546_rev_3: ++ case e1000_82571: ++ case e1000_80003es2lan: ++ if (adapter->hw.bus.func == 1) { ++ e1000_read_nvm(&adapter->hw, ++ NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data); ++ break; ++ } ++ /* Fall Through */ ++ default: ++ e1000_read_nvm(&adapter->hw, ++ NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data); ++ break; ++ } ++ if (eeprom_data & eeprom_apme_mask) ++ adapter->eeprom_wol |= E1000_WUFC_MAG; ++ ++ /* now that we have the eeprom settings, apply the special cases ++ * where the eeprom may be wrong or the board simply won't support ++ * wake on lan on a particular port */ ++ switch (pdev->device) { ++ case E1000_DEV_ID_82546GB_PCIE: ++ case E1000_DEV_ID_82571EB_SERDES_QUAD: ++ adapter->eeprom_wol = 0; ++ break; ++ case E1000_DEV_ID_82546EB_FIBER: ++ case E1000_DEV_ID_82546GB_FIBER: ++ case E1000_DEV_ID_82571EB_FIBER: ++ /* Wake events only supported on port A for dual fiber ++ * regardless of eeprom setting */ ++ if (E1000_READ_REG(&adapter->hw, E1000_STATUS) & ++ E1000_STATUS_FUNC_1) ++ adapter->eeprom_wol = 0; ++ break; ++ case E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3: ++ case E1000_DEV_ID_82571EB_QUAD_COPPER: ++ case E1000_DEV_ID_82571EB_QUAD_FIBER: ++ case E1000_DEV_ID_82571EB_QUAD_COPPER_LP: ++ case E1000_DEV_ID_82571PT_QUAD_COPPER: ++ /* if quad port adapter, disable WoL on all but port A */ ++ if (global_quad_port_a != 0) ++ adapter->eeprom_wol = 0; ++ else ++ adapter->flags |= E1000_FLAG_QUAD_PORT_A; ++ /* Reset for multiple quad port adapters */ ++ if (++global_quad_port_a == 4) ++ global_quad_port_a = 0; ++ break; ++ } ++ ++ /* initialize the wol settings based on the eeprom settings */ ++ adapter->wol = adapter->eeprom_wol; ++ ++ /* print bus type/speed/width info */ ++ { ++ struct e1000_hw *hw = &adapter->hw; ++ DPRINTK(PROBE, INFO, "(PCI%s:%s:%s) ", ++ ((hw->bus.type == e1000_bus_type_pcix) ? "-X" : ++ (hw->bus.type == e1000_bus_type_pci_express ? " Express":"")), ++ ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" : ++ (hw->bus.speed == e1000_bus_speed_133) ? "133MHz" : ++ (hw->bus.speed == e1000_bus_speed_120) ? "120MHz" : ++ (hw->bus.speed == e1000_bus_speed_100) ? "100MHz" : ++ (hw->bus.speed == e1000_bus_speed_66) ? "66MHz" : "33MHz"), ++ ((hw->bus.width == e1000_bus_width_64) ? "64-bit" : ++ (hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" : ++ (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" : ++ "32-bit")); ++ } ++ ++ for (i = 0; i < 6; i++) ++ printk("%2.2x%c", netdev->dev_addr[i], i == 5 ? '\n' : ':'); ++ ++ /* reset the hardware with the new settings */ ++ e1000_reset(adapter); ++ ++ /* If the controller is 82573 or ICH and f/w is AMT, do not set ++ * DRV_LOAD until the interface is up. For all other cases, ++ * let the f/w know that the h/w is now under the control ++ * of the driver. */ ++ if (((adapter->hw.mac.type != e1000_82573) && ++ (adapter->hw.mac.type != e1000_ich8lan) && ++ (adapter->hw.mac.type != e1000_ich9lan)) || ++ !e1000_check_mng_mode(&adapter->hw)) ++ e1000_get_hw_control(adapter); ++ ++ /* tell the stack to leave us alone until e1000_open() is called */ ++ rtnetif_carrier_off(netdev); ++ rtnetif_stop_queue(netdev); ++ ++ strcpy(netdev->name, "rteth%d"); ++ err = rt_register_rtnetdev(netdev); ++ if (err) ++ goto err_register; ++ ++ DPRINTK(PROBE, INFO, "Intel(R) PRO/1000 Network Connection\n"); ++ ++ cards_found++; ++ return 0; ++ ++err_register: ++err_hw_init: ++ e1000_release_hw_control(adapter); ++err_eeprom: ++ if (!e1000_check_reset_block(&adapter->hw)) ++ e1000_phy_hw_reset(&adapter->hw); ++ ++ if (adapter->hw.flash_address) ++ iounmap(adapter->hw.flash_address); ++ ++ e1000_remove_device(&adapter->hw); ++err_flashmap: ++ kfree(adapter->tx_ring); ++ kfree(adapter->rx_ring); ++err_sw_init: ++ iounmap(adapter->hw.hw_addr); ++err_ioremap: ++ rtdev_free(netdev); ++err_alloc_etherdev: ++ pci_release_regions(pdev); ++err_pci_reg: ++err_dma: ++ pci_disable_device(pdev); ++ return err; ++} ++ ++/** ++ * e1000_remove - Device Removal Routine ++ * @pdev: PCI device information struct ++ * ++ * e1000_remove is called by the PCI subsystem to alert the driver ++ * that it should release a PCI device. The could be caused by a ++ * Hot-Plug event, or because the driver is going to be removed from ++ * memory. ++ **/ ++static void e1000_remove(struct pci_dev *pdev) ++{ ++ struct net_device *netdev = pci_get_drvdata(pdev); ++ struct e1000_adapter *adapter = netdev_priv(netdev); ++ ++ e1000_down_and_stop(adapter); ++ ++ e1000_release_manageability(adapter); ++ ++ /* Release control of h/w to f/w. If f/w is AMT enabled, this ++ * would have already happened in close and is redundant. */ ++ e1000_release_hw_control(adapter); ++ ++ rt_unregister_rtnetdev(netdev); ++ ++ if (!e1000_check_reset_block(&adapter->hw)) ++ e1000_phy_hw_reset(&adapter->hw); ++ ++ e1000_remove_device(&adapter->hw); ++ ++ kfree(adapter->tx_ring); ++ kfree(adapter->rx_ring); ++ ++ iounmap(adapter->hw.hw_addr); ++ if (adapter->hw.flash_address) ++ iounmap(adapter->hw.flash_address); ++ pci_release_regions(pdev); ++ ++ rtdev_free(netdev); ++ ++ pci_disable_device(pdev); ++} ++ ++/** ++ * e1000_sw_init - Initialize general software structures (struct e1000_adapter) ++ * @adapter: board private structure to initialize ++ * ++ * e1000_sw_init initializes the Adapter private data structure. ++ * Fields are initialized based on PCI device information and ++ * OS network device settings (MTU size). ++ **/ ++static int e1000_sw_init(struct e1000_adapter *adapter) ++{ ++ struct e1000_hw *hw = &adapter->hw; ++ struct net_device *netdev = adapter->netdev; ++ struct pci_dev *pdev = adapter->pdev; ++#ifdef CONFIG_E1000_NAPI ++ int i; ++#endif ++ ++ /* PCI config space info */ ++ ++ hw->vendor_id = pdev->vendor; ++ hw->device_id = pdev->device; ++ hw->subsystem_vendor_id = pdev->subsystem_vendor; ++ hw->subsystem_device_id = pdev->subsystem_device; ++ ++ pci_read_config_byte(pdev, PCI_REVISION_ID, &hw->revision_id); ++ ++ pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word); ++ ++ adapter->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE; ++ adapter->rx_ps_bsize0 = E1000_RXBUFFER_128; ++ adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETHERNET_FCS_SIZE; ++ adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE; ++ ++ /* Initialize the hardware-specific values */ ++ if (e1000_setup_init_funcs(hw, FALSE)) { ++ DPRINTK(PROBE, ERR, "Hardware Initialization Failure\n"); ++ return -EIO; ++ } ++ ++#ifdef CONFIG_E1000_MQ ++ /* Number of supported queues. ++ * TODO: It's assumed num_rx_queues >= num_tx_queues, since multi-rx ++ * queues are much more interesting. Is it worth coding for the ++ * possibility (however improbable) of num_tx_queues > num_rx_queues? ++ */ ++ switch (hw->mac.type) { ++ case e1000_82571: ++ case e1000_82572: ++ case e1000_82573: ++ case e1000_80003es2lan: ++ adapter->num_tx_queues = 2; ++ adapter->num_rx_queues = 2; ++ break; ++ case e1000_ich8lan: ++ case e1000_ich9lan: ++ if ((adapter->hw.device_id == E1000_DEV_ID_ICH8_IGP_AMT) || ++ (adapter->hw.device_id == E1000_DEV_ID_ICH8_IGP_M_AMT) || ++ (adapter->hw.device_id == E1000_DEV_ID_ICH9_IGP_AMT)) { ++ adapter->num_tx_queues = 2; ++ adapter->num_rx_queues = 2; ++ break; ++ } ++ /* Fall through - remaining ICH SKUs do not support MQ */ ++ default: ++ /* All hardware before 82571 only have 1 queue each for Rx/Tx. ++ * However, the 82571 family does not have MSI-X, so multi- ++ * queue isn't enabled. ++ * It'd be wise not to mess with this default case. :) */ ++ adapter->num_tx_queues = 1; ++ adapter->num_rx_queues = 1; ++ netdev->egress_subqueue_count = 0; ++ break; ++ } ++ adapter->num_rx_queues = min(adapter->num_rx_queues, num_online_cpus()); ++ adapter->num_tx_queues = min(adapter->num_tx_queues, num_online_cpus()); ++ ++ if ((adapter->num_tx_queues > 1) || (adapter->num_rx_queues > 1)) { ++ netdev->egress_subqueue = (struct net_device_subqueue *) ++ ((void *)adapter + ++ sizeof(struct e1000_adapter)); ++ netdev->egress_subqueue_count = adapter->num_tx_queues; ++ DPRINTK(DRV, INFO, "Multiqueue Enabled: RX queues = %u, " ++ "TX queues = %u\n", adapter->num_rx_queues, ++ adapter->num_tx_queues); ++ } ++#else ++ adapter->num_tx_queues = 1; ++ adapter->num_rx_queues = 1; ++#endif ++ ++ if (e1000_alloc_queues(adapter)) { ++ DPRINTK(PROBE, ERR, "Unable to allocate memory for queues\n"); ++ return -ENOMEM; ++ } ++ ++#ifdef CONFIG_E1000_NAPI ++ for (i = 0; i < adapter->num_rx_queues; i++) { ++ struct e1000_rx_ring *rx_ring = &adapter->rx_ring[i]; ++ netif_napi_add(adapter->netdev, &rx_ring->napi, e1000_poll, 64); ++ } ++ rtdm_lock_init(&adapter->tx_queue_lock); ++#ifdef CONFIG_E1000_MQ ++ for (i = 0; i < adapter->num_tx_queues; i++) ++ rtdm_lock_init(&adapter->tx_ring[i].tx_queue_lock); ++#endif ++#endif ++ ++ /* Explicitly disable IRQ since the NIC can be in any state. */ ++ atomic_set(&adapter->irq_sem, 0); ++ e1000_irq_disable(adapter); ++ ++ rtdm_lock_init(&adapter->stats_lock); ++ ++ set_bit(__E1000_DOWN, &adapter->state); ++ return 0; ++} ++ ++/** ++ * e1000_alloc_queues - Allocate memory for all rings ++ * @adapter: board private structure to initialize ++ **/ ++static int e1000_alloc_queues(struct e1000_adapter *adapter) ++{ ++ adapter->tx_ring = kcalloc(adapter->num_tx_queues, ++ sizeof(struct e1000_tx_ring), GFP_KERNEL); ++ if (!adapter->tx_ring) ++ return -ENOMEM; ++ ++ adapter->rx_ring = kcalloc(adapter->num_rx_queues, ++ sizeof(struct e1000_rx_ring), GFP_KERNEL); ++ if (!adapter->rx_ring) { ++ kfree(adapter->tx_ring); ++ return -ENOMEM; ++ } ++ ++#ifdef CONFIG_E1000_MQ ++ adapter->cpu_tx_ring = alloc_percpu(struct e1000_tx_ring *); ++#endif ++ ++ return E1000_SUCCESS; ++} ++ ++#ifdef CONFIG_E1000_MQ ++static void e1000_setup_queue_mapping(struct e1000_adapter *adapter) ++{ ++ int i, cpu; ++ ++ lock_cpu_hotplug(); ++ i = 0; ++ for_each_online_cpu(cpu) { ++ *per_cpu_ptr(adapter->cpu_tx_ring, cpu) = ++ &adapter->tx_ring[i % adapter->num_tx_queues]; ++ i++; ++ } ++ unlock_cpu_hotplug(); ++} ++#endif ++ ++/** ++ * e1000_intr_msi_test - Interrupt Handler ++ * @irq: interrupt number ++ * @data: pointer to a network interface device structure ++ **/ ++static irqreturn_t e1000_intr_msi_test(int irq, void *data) ++{ ++ struct net_device *netdev = data; ++ struct e1000_adapter *adapter = netdev_priv(netdev); ++ ++ u32 icr = E1000_READ_REG(&adapter->hw, E1000_ICR); ++ DPRINTK(HW,INFO, "icr is %08X\n", icr); ++ if (icr & E1000_ICR_RXSEQ) { ++ adapter->flags |= E1000_FLAG_HAS_MSI; ++ wmb(); ++ } ++ ++ return IRQ_HANDLED; ++} ++ ++/** ++ * e1000_test_msi_interrupt - Returns 0 for successful test ++ * @adapter: board private struct ++ * ++ * code flow taken from tg3.c ++ **/ ++static int e1000_test_msi_interrupt(struct e1000_adapter *adapter) ++{ ++ struct net_device *netdev = adapter->netdev; ++ int err; ++ ++ /* poll_enable hasn't been called yet, so don't need disable */ ++ /* clear any pending events */ ++ E1000_READ_REG(&adapter->hw, E1000_ICR); ++ ++ /* free the real vector and request a test handler */ ++ e1000_free_irq(adapter); ++ ++ err = pci_enable_msi(adapter->pdev); ++ err = request_irq(adapter->pdev->irq, &e1000_intr_msi_test, 0, ++ netdev->name, netdev); ++ if (err) { ++ pci_disable_msi(adapter->pdev); ++ goto msi_test_failed; ++ } ++ ++ /* our temporary test variable */ ++ adapter->flags &= ~E1000_FLAG_HAS_MSI; ++ wmb(); ++ ++ e1000_irq_enable(adapter); ++ ++ /* fire an unusual interrupt on the test handler */ ++ E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_RXSEQ); ++ E1000_WRITE_FLUSH(&adapter->hw); ++ msleep(50); ++ ++ e1000_irq_disable(adapter); ++ ++ rmb(); ++ if (!(adapter->flags & E1000_FLAG_HAS_MSI)) { ++ adapter->flags |= E1000_FLAG_HAS_MSI; ++ err = -EIO; ++ DPRINTK(HW, INFO, "MSI interrupt test failed!\n"); ++ } ++ ++ free_irq(adapter->pdev->irq, netdev); ++ pci_disable_msi(adapter->pdev); ++ ++ if (err == -EIO) ++ goto msi_test_failed; ++ ++ /* okay so the test worked, restore settings */ ++ DPRINTK(HW, INFO, "MSI interrupt test succeeded!\n"); ++msi_test_failed: ++ /* restore the original vector, even if it failed */ ++ e1000_request_irq(adapter); ++ return err; ++} ++ ++/** ++ * e1000_test_msi - Returns 0 if MSI test succeeds and INTx mode is restored ++ * @adapter: board private struct ++ * ++ * code flow taken from tg3.c, called with e1000 interrupts disabled. ++ **/ ++static int e1000_test_msi(struct e1000_adapter *adapter) ++{ ++ int err; ++ u16 pci_cmd; ++ ++ if (!(adapter->flags & E1000_FLAG_MSI_ENABLED) || ++ !(adapter->flags & E1000_FLAG_HAS_MSI)) ++ return 0; ++ ++ /* disable SERR in case the MSI write causes a master abort */ ++ pci_read_config_word(adapter->pdev, PCI_COMMAND, &pci_cmd); ++ pci_write_config_word(adapter->pdev, PCI_COMMAND, ++ pci_cmd & ~PCI_COMMAND_SERR); ++ ++ err = e1000_test_msi_interrupt(adapter); ++ ++ /* restore previous setting of command word */ ++ pci_write_config_word(adapter->pdev, PCI_COMMAND, pci_cmd); ++ ++ /* success ! */ ++ if (!err) ++ return 0; ++ ++ /* EIO means MSI test failed */ ++ if (err != -EIO) ++ return err; ++ ++ /* back to INTx mode */ ++ DPRINTK(PROBE, WARNING, "MSI interrupt test failed, using legacy " ++ "interrupt.\n"); ++ ++ e1000_free_irq(adapter); ++ adapter->flags &= ~E1000_FLAG_HAS_MSI; ++ ++ err = e1000_request_irq(adapter); ++ ++ return err; ++} ++ ++/** ++ * e1000_open - Called when a network interface is made active ++ * @netdev: network interface device structure ++ * ++ * Returns 0 on success, negative value on failure ++ * ++ * The open entry point is called when a network interface is made ++ * active by the system (IFF_UP). At this point all resources needed ++ * for transmit and receive operations are allocated, the interrupt ++ * handler is registered with the OS, the watchdog timer is started, ++ * and the stack is notified that the interface is ready. ++ **/ ++static int e1000_open(struct net_device *netdev) ++{ ++ struct e1000_adapter *adapter = netdev_priv(netdev); ++ int err; ++ /* disallow open during test */ ++ if (test_bit(__E1000_TESTING, &adapter->state)) ++ return -EBUSY; ++ ++ /* allocate transmit descriptors */ ++ err = e1000_setup_all_tx_resources(adapter); ++ if (err) ++ goto err_setup_tx; ++ ++ /* allocate receive descriptors */ ++ err = e1000_setup_all_rx_resources(adapter); ++ if (err) ++ goto err_setup_rx; ++ ++ if (adapter->hw.phy.media_type == e1000_media_type_copper) { ++ e1000_power_up_phy(&adapter->hw); ++ e1000_setup_link(&adapter->hw); ++ } ++ ++#ifdef NETIF_F_HW_VLAN_TX ++ adapter->mng_vlan_id = E1000_MNG_VLAN_NONE; ++ if ((adapter->hw.mng_cookie.status & ++ E1000_MNG_DHCP_COOKIE_STATUS_VLAN)) { ++ e1000_update_mng_vlan(adapter); ++ } ++#endif ++ ++ /* For 82573 and ICHx if AMT is enabled, let the firmware know ++ * that the network interface is now open */ ++ if (((adapter->hw.mac.type == e1000_82573) || ++ (adapter->hw.mac.type == e1000_ich8lan) || ++ (adapter->hw.mac.type == e1000_ich9lan)) && ++ e1000_check_mng_mode(&adapter->hw)) ++ e1000_get_hw_control(adapter); ++ ++ /* before we allocate an interrupt, we must be ready to handle it. ++ * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt ++ * as soon as we call pci_request_irq, so we have to setup our ++ * clean_rx handler before we do so. */ ++ e1000_configure(adapter); ++ ++ ++ err = e1000_request_irq(adapter); ++ if (err) ++ goto err_req_irq; ++ ++ /* work around PCIe errata with MSI interrupts causing some chipsets to ++ * ignore e1000 MSI messages, which means we need to test our MSI ++ * interrupt now */ ++ err = e1000_test_msi(adapter); ++ if (err) { ++ DPRINTK(PROBE, ERR, "Interrupt allocation failed\n"); ++ goto err_req_irq; ++ } ++ ++ /* From here on the code is the same as e1000_up() */ ++ clear_bit(__E1000_DOWN, &adapter->state); ++ ++ e1000_napi_enable_all(adapter); ++ ++ schedule_delayed_work(&adapter->watchdog_task, 1); ++ e1000_irq_enable(adapter); ++ ++ /* fire a link status change interrupt to start the watchdog */ ++ E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC); ++ ++ return E1000_SUCCESS; ++ ++err_req_irq: ++ e1000_release_hw_control(adapter); ++ /* Power down the PHY so no link is implied when interface is down * ++ * The PHY cannot be powered down if any of the following is TRUE * ++ * (a) WoL is enabled ++ * (b) AMT is active ++ * (c) SoL/IDER session is active */ ++ if (!adapter->wol && adapter->hw.mac.type >= e1000_82540 && ++ adapter->hw.phy.media_type == e1000_media_type_copper) ++ e1000_power_down_phy(&adapter->hw); ++ e1000_free_all_rx_resources(adapter); ++err_setup_rx: ++ e1000_free_all_tx_resources(adapter); ++err_setup_tx: ++ e1000_reset(adapter); ++ ++ return err; ++} ++ ++/** ++ * e1000_close - Disables a network interface ++ * @netdev: network interface device structure ++ * ++ * Returns 0, this is not allowed to fail ++ * ++ * The close entry point is called when an interface is de-activated ++ * by the OS. The hardware is still under the drivers control, but ++ * needs to be disabled. A global MAC reset is issued to stop the ++ * hardware, and all transmit and receive resources are freed. ++ **/ ++static int e1000_close(struct net_device *netdev) ++{ ++ struct e1000_adapter *adapter = netdev_priv(netdev); ++ ++ WARN_ON(test_bit(__E1000_RESETTING, &adapter->state)); ++ e1000_down(adapter); ++ /* Power down the PHY so no link is implied when interface is down * ++ * The PHY cannot be powered down if any of the following is TRUE * ++ * (a) WoL is enabled ++ * (b) AMT is active ++ * (c) SoL/IDER session is active */ ++ if (!adapter->wol && adapter->hw.mac.type >= e1000_82540 && ++ adapter->hw.phy.media_type == e1000_media_type_copper) ++ e1000_power_down_phy(&adapter->hw); ++ e1000_free_irq(adapter); ++ ++ e1000_free_all_tx_resources(adapter); ++ e1000_free_all_rx_resources(adapter); ++ ++#ifdef NETIF_F_HW_VLAN_TX ++ /* kill manageability vlan ID if supported, but not if a vlan with ++ * the same ID is registered on the host OS (let 8021q kill it) */ ++ if ((adapter->hw.mng_cookie.status & ++ E1000_MNG_DHCP_COOKIE_STATUS_VLAN) && ++ !(adapter->vlgrp && ++ vlan_group_get_device(adapter->vlgrp, adapter->mng_vlan_id))) { ++ e1000_vlan_rx_kill_vid(netdev, adapter->mng_vlan_id); ++ } ++#endif ++ ++ /* For 82573 and ICHx if AMT is enabled, let the firmware know ++ * that the network interface is now closed */ ++ if (((adapter->hw.mac.type == e1000_82573) || ++ (adapter->hw.mac.type == e1000_ich8lan) || ++ (adapter->hw.mac.type == e1000_ich9lan)) && ++ e1000_check_mng_mode(&adapter->hw)) ++ e1000_release_hw_control(adapter); ++ ++ return 0; ++} ++ ++/** ++ * e1000_check_64k_bound - check that memory doesn't cross 64kB boundary ++ * @adapter: address of board private structure ++ * @start: address of beginning of memory ++ * @len: length of memory ++ **/ ++static bool e1000_check_64k_bound(struct e1000_adapter *adapter, ++ void *start, unsigned long len) ++{ ++ unsigned long begin = (unsigned long) start; ++ unsigned long end = begin + len; ++ ++ /* First rev 82545 and 82546 need to not allow any memory ++ * write location to cross 64k boundary due to errata 23 */ ++ if (adapter->hw.mac.type == e1000_82545 || ++ adapter->hw.mac.type == e1000_82546) { ++ return ((begin ^ (end - 1)) >> 16) != 0 ? FALSE : TRUE; ++ } ++ ++ return TRUE; ++} ++ ++/** ++ * e1000_setup_tx_resources - allocate Tx resources (Descriptors) ++ * @adapter: board private structure ++ * @tx_ring: tx descriptor ring (for a specific queue) to setup ++ * ++ * Return 0 on success, negative on failure ++ **/ ++static int e1000_setup_tx_resources(struct e1000_adapter *adapter, ++ struct e1000_tx_ring *tx_ring) ++{ ++ struct pci_dev *pdev = adapter->pdev; ++ int size; ++ ++ size = sizeof(struct e1000_buffer) * tx_ring->count; ++ tx_ring->buffer_info = vmalloc(size); ++ if (!tx_ring->buffer_info) { ++ DPRINTK(PROBE, ERR, ++ "Unable to allocate memory for the transmit descriptor ring\n"); ++ return -ENOMEM; ++ } ++ memset(tx_ring->buffer_info, 0, size); ++ ++ /* round up to nearest 4K */ ++ ++ tx_ring->size = tx_ring->count * sizeof(struct e1000_tx_desc); ++ tx_ring->size = ALIGN(tx_ring->size, 4096); ++ ++ tx_ring->desc = pci_alloc_consistent(pdev, tx_ring->size, ++ &tx_ring->dma); ++ if (!tx_ring->desc) { ++setup_tx_desc_die: ++ vfree(tx_ring->buffer_info); ++ DPRINTK(PROBE, ERR, ++ "Unable to allocate memory for the transmit descriptor ring\n"); ++ return -ENOMEM; ++ } ++ ++ /* Fix for errata 23, can't cross 64kB boundary */ ++ if (!e1000_check_64k_bound(adapter, tx_ring->desc, tx_ring->size)) { ++ void *olddesc = tx_ring->desc; ++ dma_addr_t olddma = tx_ring->dma; ++ DPRINTK(TX_ERR, ERR, "tx_ring align check failed: %u bytes " ++ "at %p\n", tx_ring->size, tx_ring->desc); ++ /* Try again, without freeing the previous */ ++ tx_ring->desc = pci_alloc_consistent(pdev, tx_ring->size, ++ &tx_ring->dma); ++ /* Failed allocation, critical failure */ ++ if (!tx_ring->desc) { ++ pci_free_consistent(pdev, tx_ring->size, olddesc, ++ olddma); ++ goto setup_tx_desc_die; ++ } ++ ++ if (!e1000_check_64k_bound(adapter, tx_ring->desc, ++ tx_ring->size)) { ++ /* give up */ ++ pci_free_consistent(pdev, tx_ring->size, tx_ring->desc, ++ tx_ring->dma); ++ pci_free_consistent(pdev, tx_ring->size, olddesc, ++ olddma); ++ DPRINTK(PROBE, ERR, ++ "Unable to allocate aligned memory " ++ "for the transmit descriptor ring\n"); ++ vfree(tx_ring->buffer_info); ++ return -ENOMEM; ++ } else { ++ /* Free old allocation, new allocation was successful */ ++ pci_free_consistent(pdev, tx_ring->size, olddesc, ++ olddma); ++ } ++ } ++ memset(tx_ring->desc, 0, tx_ring->size); ++ ++ tx_ring->next_to_use = 0; ++ tx_ring->next_to_clean = 0; ++ rtdm_lock_init(&tx_ring->tx_lock); ++ ++ return 0; ++} ++ ++/** ++ * e1000_setup_all_tx_resources - wrapper to allocate Tx resources ++ * @adapter: board private structure ++ * ++ * this allocates tx resources for all queues, return 0 on success, negative ++ * on failure ++ **/ ++int e1000_setup_all_tx_resources(struct e1000_adapter *adapter) ++{ ++ int i, err = 0; ++ ++ for (i = 0; i < adapter->num_tx_queues; i++) { ++ err = e1000_setup_tx_resources(adapter, &adapter->tx_ring[i]); ++ if (err) { ++ DPRINTK(PROBE, ERR, ++ "Allocation for Tx Queue %u failed\n", i); ++ for (i-- ; i >= 0; i--) ++ e1000_free_tx_resources(adapter, ++ &adapter->tx_ring[i]); ++ break; ++ } ++ } ++ ++ return err; ++} ++ ++/** ++ * e1000_configure_tx - Configure 8254x Transmit Unit after Reset ++ * @adapter: board private structure ++ * ++ * Configure the Tx unit of the MAC after a reset. ++ **/ ++static void e1000_configure_tx(struct e1000_adapter *adapter) ++{ ++ u64 tdba; ++ struct e1000_hw *hw = &adapter->hw; ++ u32 tdlen, tctl, tipg, tarc; ++ u32 ipgr1, ipgr2; ++ int i; ++ ++ /* Setup the HW Tx Head and Tail descriptor pointers */ ++ for (i = 0; i < adapter->num_tx_queues; i++) { ++ tdba = adapter->tx_ring[i].dma; ++ tdlen = adapter->tx_ring[i].count * sizeof(struct e1000_tx_desc); ++ E1000_WRITE_REG(hw, E1000_TDBAL(i), (tdba & 0x00000000ffffffffULL)); ++ E1000_WRITE_REG(hw, E1000_TDBAH(i), (tdba >> 32)); ++ E1000_WRITE_REG(hw, E1000_TDLEN(i), tdlen); ++ E1000_WRITE_REG(hw, E1000_TDH(i), 0); ++ E1000_WRITE_REG(hw, E1000_TDT(i), 0); ++ adapter->tx_ring[i].tdh = E1000_REGISTER(hw, E1000_TDH(i)); ++ adapter->tx_ring[i].tdt = E1000_REGISTER(hw, E1000_TDT(i)); ++ } ++ ++ ++ /* Set the default values for the Tx Inter Packet Gap timer */ ++ if (adapter->hw.mac.type <= e1000_82547_rev_2 && ++ (hw->phy.media_type == e1000_media_type_fiber || ++ hw->phy.media_type == e1000_media_type_internal_serdes)) ++ tipg = DEFAULT_82543_TIPG_IPGT_FIBER; ++ else ++ tipg = DEFAULT_82543_TIPG_IPGT_COPPER; ++ ++ switch (hw->mac.type) { ++ case e1000_82542: ++ tipg = DEFAULT_82542_TIPG_IPGT; ++ ipgr1 = DEFAULT_82542_TIPG_IPGR1; ++ ipgr2 = DEFAULT_82542_TIPG_IPGR2; ++ break; ++ case e1000_80003es2lan: ++ ipgr1 = DEFAULT_82543_TIPG_IPGR1; ++ ipgr2 = DEFAULT_80003ES2LAN_TIPG_IPGR2; ++ break; ++ default: ++ ipgr1 = DEFAULT_82543_TIPG_IPGR1; ++ ipgr2 = DEFAULT_82543_TIPG_IPGR2; ++ break; ++ } ++ tipg |= ipgr1 << E1000_TIPG_IPGR1_SHIFT; ++ tipg |= ipgr2 << E1000_TIPG_IPGR2_SHIFT; ++ E1000_WRITE_REG(hw, E1000_TIPG, tipg); ++ ++ /* Set the Tx Interrupt Delay register */ ++ ++ E1000_WRITE_REG(hw, E1000_TIDV, adapter->tx_int_delay); ++ if (adapter->flags & E1000_FLAG_HAS_INTR_MODERATION) ++ E1000_WRITE_REG(hw, E1000_TADV, adapter->tx_abs_int_delay); ++ ++ /* Program the Transmit Control Register */ ++ ++ tctl = E1000_READ_REG(hw, E1000_TCTL); ++ tctl &= ~E1000_TCTL_CT; ++ tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC | ++ (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT); ++ ++ if (hw->mac.type == e1000_82571 || hw->mac.type == e1000_82572) { ++ tarc = E1000_READ_REG(hw, E1000_TARC(0)); ++ /* set the speed mode bit, we'll clear it if we're not at ++ * gigabit link later */ ++#define SPEED_MODE_BIT (1 << 21) ++ tarc |= SPEED_MODE_BIT; ++ E1000_WRITE_REG(hw, E1000_TARC(0), tarc); ++ } else if (hw->mac.type == e1000_80003es2lan) { ++ tarc = E1000_READ_REG(hw, E1000_TARC(0)); ++ tarc |= 1; ++ E1000_WRITE_REG(hw, E1000_TARC(0), tarc); ++ tarc = E1000_READ_REG(hw, E1000_TARC(1)); ++ tarc |= 1; ++ E1000_WRITE_REG(hw, E1000_TARC(1), tarc); ++ } ++ ++ e1000_config_collision_dist(hw); ++ ++ /* Setup Transmit Descriptor Settings for eop descriptor */ ++ adapter->txd_cmd = E1000_TXD_CMD_EOP | E1000_TXD_CMD_IFCS; ++ ++ /* only set IDE if we are delaying interrupts using the timers */ ++ if (adapter->tx_int_delay) ++ adapter->txd_cmd |= E1000_TXD_CMD_IDE; ++ ++ if (hw->mac.type < e1000_82543) ++ adapter->txd_cmd |= E1000_TXD_CMD_RPS; ++ else ++ adapter->txd_cmd |= E1000_TXD_CMD_RS; ++ ++ /* Cache if we're 82544 running in PCI-X because we'll ++ * need this to apply a workaround later in the send path. */ ++ if (hw->mac.type == e1000_82544 && ++ hw->bus.type == e1000_bus_type_pcix) ++ adapter->pcix_82544 = 1; ++ ++ E1000_WRITE_REG(hw, E1000_TCTL, tctl); ++ ++} ++ ++/** ++ * e1000_setup_rx_resources - allocate Rx resources (Descriptors) ++ * @adapter: board private structure ++ * @rx_ring: rx descriptor ring (for a specific queue) to setup ++ * ++ * Returns 0 on success, negative on failure ++ **/ ++static int e1000_setup_rx_resources(struct e1000_adapter *adapter, ++ struct e1000_rx_ring *rx_ring) ++{ ++ struct pci_dev *pdev = adapter->pdev; ++ int size, desc_len; ++ ++ size = sizeof(struct e1000_rx_buffer) * rx_ring->count; ++ rx_ring->buffer_info = vmalloc(size); ++ if (!rx_ring->buffer_info) { ++ DPRINTK(PROBE, ERR, ++ "Unable to allocate memory for the receive descriptor ring\n"); ++ return -ENOMEM; ++ } ++ memset(rx_ring->buffer_info, 0, size); ++ ++ rx_ring->ps_page = kcalloc(rx_ring->count, sizeof(struct e1000_ps_page), ++ GFP_KERNEL); ++ if (!rx_ring->ps_page) { ++ vfree(rx_ring->buffer_info); ++ DPRINTK(PROBE, ERR, ++ "Unable to allocate memory for the receive descriptor ring\n"); ++ return -ENOMEM; ++ } ++ ++ rx_ring->ps_page_dma = kcalloc(rx_ring->count, ++ sizeof(struct e1000_ps_page_dma), ++ GFP_KERNEL); ++ if (!rx_ring->ps_page_dma) { ++ vfree(rx_ring->buffer_info); ++ kfree(rx_ring->ps_page); ++ DPRINTK(PROBE, ERR, ++ "Unable to allocate memory for the receive descriptor ring\n"); ++ return -ENOMEM; ++ } ++ ++ if (adapter->hw.mac.type <= e1000_82547_rev_2) ++ desc_len = sizeof(struct e1000_rx_desc); ++ else ++ desc_len = sizeof(union e1000_rx_desc_packet_split); ++ ++ /* Round up to nearest 4K */ ++ ++ rx_ring->size = rx_ring->count * desc_len; ++ rx_ring->size = ALIGN(rx_ring->size, 4096); ++ ++ rx_ring->desc = pci_alloc_consistent(pdev, rx_ring->size, ++ &rx_ring->dma); ++ ++ if (!rx_ring->desc) { ++ DPRINTK(PROBE, ERR, ++ "Unable to allocate memory for the receive descriptor ring\n"); ++setup_rx_desc_die: ++ vfree(rx_ring->buffer_info); ++ kfree(rx_ring->ps_page); ++ kfree(rx_ring->ps_page_dma); ++ return -ENOMEM; ++ } ++ ++ /* Fix for errata 23, can't cross 64kB boundary */ ++ if (!e1000_check_64k_bound(adapter, rx_ring->desc, rx_ring->size)) { ++ void *olddesc = rx_ring->desc; ++ dma_addr_t olddma = rx_ring->dma; ++ DPRINTK(RX_ERR, ERR, "rx_ring align check failed: %u bytes " ++ "at %p\n", rx_ring->size, rx_ring->desc); ++ /* Try again, without freeing the previous */ ++ rx_ring->desc = pci_alloc_consistent(pdev, rx_ring->size, ++ &rx_ring->dma); ++ /* Failed allocation, critical failure */ ++ if (!rx_ring->desc) { ++ pci_free_consistent(pdev, rx_ring->size, olddesc, ++ olddma); ++ DPRINTK(PROBE, ERR, ++ "Unable to allocate memory " ++ "for the receive descriptor ring\n"); ++ goto setup_rx_desc_die; ++ } ++ ++ if (!e1000_check_64k_bound(adapter, rx_ring->desc, ++ rx_ring->size)) { ++ /* give up */ ++ pci_free_consistent(pdev, rx_ring->size, rx_ring->desc, ++ rx_ring->dma); ++ pci_free_consistent(pdev, rx_ring->size, olddesc, ++ olddma); ++ DPRINTK(PROBE, ERR, ++ "Unable to allocate aligned memory " ++ "for the receive descriptor ring\n"); ++ goto setup_rx_desc_die; ++ } else { ++ /* Free old allocation, new allocation was successful */ ++ pci_free_consistent(pdev, rx_ring->size, olddesc, ++ olddma); ++ } ++ } ++ memset(rx_ring->desc, 0, rx_ring->size); ++ ++ /* set up ring defaults */ ++ rx_ring->next_to_clean = 0; ++ rx_ring->next_to_use = 0; ++ rx_ring->rx_skb_top = NULL; ++ rx_ring->adapter = adapter; ++ ++ return 0; ++} ++ ++/** ++ * e1000_setup_all_rx_resources - wrapper to allocate Rx resources ++ * @adapter: board private structure ++ * ++ * this allocates rx resources for all queues, return 0 on success, negative ++ * on failure ++ **/ ++int e1000_setup_all_rx_resources(struct e1000_adapter *adapter) ++{ ++ int i, err = 0; ++ ++ for (i = 0; i < adapter->num_rx_queues; i++) { ++ err = e1000_setup_rx_resources(adapter, &adapter->rx_ring[i]); ++ if (err) { ++ DPRINTK(PROBE, ERR, ++ "Allocation for Rx Queue %u failed\n", i); ++ for (i-- ; i >= 0; i--) ++ e1000_free_rx_resources(adapter, ++ &adapter->rx_ring[i]); ++ break; ++ } ++ } ++ ++ return err; ++} ++ ++#define PAGE_USE_COUNT(S) (((S) >> PAGE_SHIFT) + \ ++ (((S) & (PAGE_SIZE - 1)) ? 1 : 0)) ++/** ++ * e1000_setup_rctl - configure the receive control registers ++ * @adapter: Board private structure ++ **/ ++static void e1000_setup_rctl(struct e1000_adapter *adapter) ++{ ++ u32 rctl, rfctl; ++ u32 psrctl = 0; ++#ifndef CONFIG_E1000_DISABLE_PACKET_SPLIT ++ u32 pages = 0; ++#endif ++ ++ rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); ++ ++ rctl &= ~(3 << E1000_RCTL_MO_SHIFT); ++ ++ rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | ++ E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF | ++ (adapter->hw.mac.mc_filter_type << E1000_RCTL_MO_SHIFT); ++ ++ /* disable the stripping of CRC because it breaks ++ * BMC firmware connected over SMBUS ++ if (adapter->hw.mac.type > e1000_82543) ++ rctl |= E1000_RCTL_SECRC; ++ */ ++ ++ if (e1000_tbi_sbp_enabled_82543(&adapter->hw)) ++ rctl |= E1000_RCTL_SBP; ++ else ++ rctl &= ~E1000_RCTL_SBP; ++ ++ if (adapter->netdev->mtu <= ETH_DATA_LEN) ++ rctl &= ~E1000_RCTL_LPE; ++ else ++ rctl |= E1000_RCTL_LPE; ++ ++ /* Setup buffer sizes */ ++ rctl &= ~E1000_RCTL_SZ_4096; ++ rctl |= E1000_RCTL_BSEX; ++ switch (adapter->rx_buffer_len) { ++ case E1000_RXBUFFER_256: ++ rctl |= E1000_RCTL_SZ_256; ++ rctl &= ~E1000_RCTL_BSEX; ++ break; ++ case E1000_RXBUFFER_512: ++ rctl |= E1000_RCTL_SZ_512; ++ rctl &= ~E1000_RCTL_BSEX; ++ break; ++ case E1000_RXBUFFER_1024: ++ rctl |= E1000_RCTL_SZ_1024; ++ rctl &= ~E1000_RCTL_BSEX; ++ break; ++ case E1000_RXBUFFER_2048: ++ default: ++ rctl |= E1000_RCTL_SZ_2048; ++ rctl &= ~E1000_RCTL_BSEX; ++ break; ++ case E1000_RXBUFFER_4096: ++ rctl |= E1000_RCTL_SZ_4096; ++ break; ++ case E1000_RXBUFFER_8192: ++ rctl |= E1000_RCTL_SZ_8192; ++ break; ++ case E1000_RXBUFFER_16384: ++ rctl |= E1000_RCTL_SZ_16384; ++ break; ++ } ++ ++#ifndef CONFIG_E1000_DISABLE_PACKET_SPLIT ++ /* 82571 and greater support packet-split where the protocol ++ * header is placed in skb->data and the packet data is ++ * placed in pages hanging off of skb_shinfo(skb)->nr_frags. ++ * In the case of a non-split, skb->data is linearly filled, ++ * followed by the page buffers. Therefore, skb->data is ++ * sized to hold the largest protocol header. ++ */ ++ /* allocations using alloc_page take too long for regular MTU ++ * so only enable packet split for jumbo frames */ ++ pages = PAGE_USE_COUNT(adapter->netdev->mtu); ++ if ((adapter->hw.mac.type >= e1000_82571) && (pages <= 3) && ++ PAGE_SIZE <= 16384 && (rctl & E1000_RCTL_LPE)) ++ adapter->rx_ps_pages = pages; ++ else ++ adapter->rx_ps_pages = 0; ++#endif ++ ++ if (adapter->rx_ps_pages) { ++ /* Configure extra packet-split registers */ ++ rfctl = E1000_READ_REG(&adapter->hw, E1000_RFCTL); ++ rfctl |= E1000_RFCTL_EXTEN; ++ /* disable packet split support for IPv6 extension headers, ++ * because some malformed IPv6 headers can hang the RX */ ++ rfctl |= (E1000_RFCTL_IPV6_EX_DIS | ++ E1000_RFCTL_NEW_IPV6_EXT_DIS); ++ ++ E1000_WRITE_REG(&adapter->hw, E1000_RFCTL, rfctl); ++ ++ /* disable the stripping of CRC because it breaks ++ * BMC firmware connected over SMBUS */ ++ rctl |= E1000_RCTL_DTYP_PS /* | E1000_RCTL_SECRC */; ++ ++ psrctl |= adapter->rx_ps_bsize0 >> ++ E1000_PSRCTL_BSIZE0_SHIFT; ++ ++ switch (adapter->rx_ps_pages) { ++ case 3: ++ psrctl |= PAGE_SIZE << ++ E1000_PSRCTL_BSIZE3_SHIFT; ++ case 2: ++ psrctl |= PAGE_SIZE << ++ E1000_PSRCTL_BSIZE2_SHIFT; ++ case 1: ++ psrctl |= PAGE_SIZE >> ++ E1000_PSRCTL_BSIZE1_SHIFT; ++ break; ++ } ++ ++ E1000_WRITE_REG(&adapter->hw, E1000_PSRCTL, psrctl); ++ } ++ ++ E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl); ++ adapter->flags &= ~E1000_FLAG_RX_RESTART_NOW; ++} ++ ++/** ++ * e1000_configure_rx - Configure 8254x Receive Unit after Reset ++ * @adapter: board private structure ++ * ++ * Configure the Rx unit of the MAC after a reset. ++ **/ ++static void e1000_configure_rx(struct e1000_adapter *adapter) ++{ ++ u64 rdba; ++ struct e1000_hw *hw = &adapter->hw; ++ u32 rdlen, rctl, rxcsum, ctrl_ext; ++ int i; ++ ++ if (adapter->rx_ps_pages) { ++ /* this is a 32 byte descriptor */ ++ rdlen = adapter->rx_ring[0].count * ++ sizeof(union e1000_rx_desc_packet_split); ++ adapter->clean_rx = e1000_clean_rx_irq_ps; ++ adapter->alloc_rx_buf = e1000_alloc_rx_buffers_ps; ++#ifdef CONFIG_E1000_NAPI ++ } else if (adapter->netdev->mtu > MAXIMUM_ETHERNET_VLAN_SIZE) { ++ rdlen = adapter->rx_ring[0].count * ++ sizeof(struct e1000_rx_desc); ++ adapter->clean_rx = e1000_clean_jumbo_rx_irq; ++ adapter->alloc_rx_buf = e1000_alloc_jumbo_rx_buffers; ++#endif ++ } else { ++ rdlen = adapter->rx_ring[0].count * ++ sizeof(struct e1000_rx_desc); ++ adapter->clean_rx = e1000_clean_rx_irq; ++ adapter->alloc_rx_buf = e1000_alloc_rx_buffers; ++ } ++ ++ /* disable receives while setting up the descriptors */ ++ rctl = E1000_READ_REG(hw, E1000_RCTL); ++ E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); ++ E1000_WRITE_FLUSH(hw); ++ mdelay(10); ++ ++ /* set the Receive Delay Timer Register */ ++ E1000_WRITE_REG(hw, E1000_RDTR, adapter->rx_int_delay); ++ ++ if (adapter->flags & E1000_FLAG_HAS_INTR_MODERATION) { ++ E1000_WRITE_REG(hw, E1000_RADV, adapter->rx_abs_int_delay); ++ if (adapter->itr_setting != 0) ++ E1000_WRITE_REG(hw, E1000_ITR, ++ 1000000000 / (adapter->itr * 256)); ++ } ++ ++ if (hw->mac.type >= e1000_82571) { ++ ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT); ++ /* Reset delay timers after every interrupt */ ++ ctrl_ext |= E1000_CTRL_EXT_INT_TIMER_CLR; ++#ifdef CONFIG_E1000_NAPI ++ /* Auto-Mask interrupts upon ICR access */ ++ ctrl_ext |= E1000_CTRL_EXT_IAME; ++ E1000_WRITE_REG(hw, E1000_IAM, 0xffffffff); ++#endif ++ E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext); ++ E1000_WRITE_FLUSH(hw); ++ } ++ ++ /* Setup the HW Rx Head and Tail Descriptor Pointers and ++ * the Base and Length of the Rx Descriptor Ring */ ++ for (i = 0; i < adapter->num_rx_queues; i++) { ++ rdba = adapter->rx_ring[i].dma; ++ E1000_WRITE_REG(hw, E1000_RDBAL(i), (rdba & 0x00000000ffffffffULL)); ++ E1000_WRITE_REG(hw, E1000_RDBAH(i), (rdba >> 32)); ++ E1000_WRITE_REG(hw, E1000_RDLEN(i), rdlen); ++ E1000_WRITE_REG(hw, E1000_RDH(i), 0); ++ E1000_WRITE_REG(hw, E1000_RDT(i), 0); ++ adapter->rx_ring[i].rdh = E1000_REGISTER(hw, E1000_RDH(i)); ++ adapter->rx_ring[i].rdt = E1000_REGISTER(hw, E1000_RDT(i)); ++ } ++ ++#ifdef CONFIG_E1000_MQ ++ if (adapter->num_rx_queues > 1) { ++ u32 random[10]; ++ u32 reta, mrqc; ++ int i; ++ ++ get_random_bytes(&random[0], 40); ++ ++ switch (adapter->num_rx_queues) { ++ default: ++ reta = 0x00800080; ++ mrqc = E1000_MRQC_ENABLE_RSS_2Q; ++ break; ++ } ++ ++ /* Fill out redirection table */ ++ for (i = 0; i < 32; i++) ++ E1000_WRITE_REG_ARRAY(hw, E1000_RETA, i, reta); ++ /* Fill out hash function seeds */ ++ for (i = 0; i < 10; i++) ++ E1000_WRITE_REG_ARRAY(hw, E1000_RSSRK, i, random[i]); ++ ++ mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 | ++ E1000_MRQC_RSS_FIELD_IPV4_TCP); ++ ++ E1000_WRITE_REG(hw, E1000_MRQC, mrqc); ++ ++ /* Multiqueue and packet checksumming are mutually exclusive. */ ++ rxcsum = E1000_READ_REG(hw, E1000_RXCSUM); ++ rxcsum |= E1000_RXCSUM_PCSD; ++ E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum); ++ } else if (hw->mac.type >= e1000_82543) { ++#else ++ if (hw->mac.type >= e1000_82543) { ++#endif /* CONFIG_E1000_MQ */ ++ /* Enable 82543 Receive Checksum Offload for TCP and UDP */ ++ rxcsum = E1000_READ_REG(hw, E1000_RXCSUM); ++ if (adapter->rx_csum == TRUE) { ++ rxcsum |= E1000_RXCSUM_TUOFL; ++ ++ /* Enable 82571 IPv4 payload checksum for UDP fragments ++ * Must be used in conjunction with packet-split. */ ++ if ((hw->mac.type >= e1000_82571) && ++ (adapter->rx_ps_pages)) { ++ rxcsum |= E1000_RXCSUM_IPPCSE; ++ } ++ } else { ++ rxcsum &= ~E1000_RXCSUM_TUOFL; ++ /* don't need to clear IPPCSE as it defaults to 0 */ ++ } ++ E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum); ++ } ++ ++ /* Enable early receives on supported devices, only takes effect when ++ * packet size is equal or larger than the specified value (in 8 byte ++ * units), e.g. using jumbo frames when setting to E1000_ERT_2048 */ ++ if ((hw->mac.type == e1000_82573 || hw->mac.type == e1000_ich9lan) && ++ (adapter->netdev->mtu > ETH_DATA_LEN)) ++ E1000_WRITE_REG(hw, E1000_ERT, E1000_ERT_2048); ++ ++ /* Enable Receives */ ++ E1000_WRITE_REG(hw, E1000_RCTL, rctl); ++} ++ ++/** ++ * e1000_free_tx_resources - Free Tx Resources per Queue ++ * @adapter: board private structure ++ * @tx_ring: Tx descriptor ring for a specific queue ++ * ++ * Free all transmit software resources ++ **/ ++static void e1000_free_tx_resources(struct e1000_adapter *adapter, ++ struct e1000_tx_ring *tx_ring) ++{ ++ struct pci_dev *pdev = adapter->pdev; ++ ++ e1000_clean_tx_ring(adapter, tx_ring); ++ ++ vfree(tx_ring->buffer_info); ++ tx_ring->buffer_info = NULL; ++ ++ pci_free_consistent(pdev, tx_ring->size, tx_ring->desc, tx_ring->dma); ++ ++ tx_ring->desc = NULL; ++} ++ ++/** ++ * e1000_free_all_tx_resources - Free Tx Resources for All Queues ++ * @adapter: board private structure ++ * ++ * Free all transmit software resources ++ **/ ++void e1000_free_all_tx_resources(struct e1000_adapter *adapter) ++{ ++ int i; ++ ++ for (i = 0; i < adapter->num_tx_queues; i++) ++ e1000_free_tx_resources(adapter, &adapter->tx_ring[i]); ++} ++ ++static void e1000_unmap_and_free_tx_resource(struct e1000_adapter *adapter, ++ struct e1000_buffer *buffer_info) ++{ ++ if (buffer_info->dma) { ++ pci_unmap_page(adapter->pdev, ++ buffer_info->dma, ++ buffer_info->length, ++ PCI_DMA_TODEVICE); ++ buffer_info->dma = 0; ++ } ++ if (buffer_info->skb) { ++ kfree_rtskb(buffer_info->skb); ++ buffer_info->skb = NULL; ++ } ++ /* buffer_info must be completely set up in the transmit path */ ++} ++ ++/** ++ * e1000_clean_tx_ring - Free Tx Buffers ++ * @adapter: board private structure ++ * @tx_ring: ring to be cleaned ++ **/ ++static void e1000_clean_tx_ring(struct e1000_adapter *adapter, ++ struct e1000_tx_ring *tx_ring) ++{ ++ struct e1000_buffer *buffer_info; ++ unsigned long size; ++ unsigned int i; ++ ++ /* Free all the Tx ring sk_buffs */ ++ ++ for (i = 0; i < tx_ring->count; i++) { ++ buffer_info = &tx_ring->buffer_info[i]; ++ e1000_unmap_and_free_tx_resource(adapter, buffer_info); ++ } ++ ++ size = sizeof(struct e1000_buffer) * tx_ring->count; ++ memset(tx_ring->buffer_info, 0, size); ++ ++ /* Zero out the descriptor ring */ ++ ++ memset(tx_ring->desc, 0, tx_ring->size); ++ ++ tx_ring->next_to_use = 0; ++ tx_ring->next_to_clean = 0; ++ tx_ring->last_tx_tso = 0; ++ ++ writel(0, adapter->hw.hw_addr + tx_ring->tdh); ++ writel(0, adapter->hw.hw_addr + tx_ring->tdt); ++} ++ ++/** ++ * e1000_clean_all_tx_rings - Free Tx Buffers for all queues ++ * @adapter: board private structure ++ **/ ++static void e1000_clean_all_tx_rings(struct e1000_adapter *adapter) ++{ ++ int i; ++ ++ for (i = 0; i < adapter->num_tx_queues; i++) ++ e1000_clean_tx_ring(adapter, &adapter->tx_ring[i]); ++} ++ ++/** ++ * e1000_free_rx_resources - Free Rx Resources ++ * @adapter: board private structure ++ * @rx_ring: ring to clean the resources from ++ * ++ * Free all receive software resources ++ **/ ++static void e1000_free_rx_resources(struct e1000_adapter *adapter, ++ struct e1000_rx_ring *rx_ring) ++{ ++ struct pci_dev *pdev = adapter->pdev; ++ ++ e1000_clean_rx_ring(adapter, rx_ring); ++ ++ vfree(rx_ring->buffer_info); ++ rx_ring->buffer_info = NULL; ++ kfree(rx_ring->ps_page); ++ rx_ring->ps_page = NULL; ++ kfree(rx_ring->ps_page_dma); ++ rx_ring->ps_page_dma = NULL; ++ ++ pci_free_consistent(pdev, rx_ring->size, rx_ring->desc, rx_ring->dma); ++ ++ rx_ring->desc = NULL; ++} ++ ++/** ++ * e1000_free_all_rx_resources - Free Rx Resources for All Queues ++ * @adapter: board private structure ++ * ++ * Free all receive software resources ++ **/ ++void e1000_free_all_rx_resources(struct e1000_adapter *adapter) ++{ ++ int i; ++ ++ for (i = 0; i < adapter->num_rx_queues; i++) ++ e1000_free_rx_resources(adapter, &adapter->rx_ring[i]); ++} ++ ++/** ++ * e1000_clean_rx_ring - Free Rx Buffers per Queue ++ * @adapter: board private structure ++ * @rx_ring: ring to free buffers from ++ **/ ++static void e1000_clean_rx_ring(struct e1000_adapter *adapter, ++ struct e1000_rx_ring *rx_ring) ++{ ++ struct e1000_rx_buffer *buffer_info; ++ struct e1000_ps_page *ps_page; ++ struct e1000_ps_page_dma *ps_page_dma; ++ struct pci_dev *pdev = adapter->pdev; ++ unsigned long size; ++ unsigned int i, j; ++ ++ /* Free all the Rx ring sk_buffs */ ++ for (i = 0; i < rx_ring->count; i++) { ++ buffer_info = &rx_ring->buffer_info[i]; ++ if (buffer_info->dma && ++ adapter->clean_rx == e1000_clean_rx_irq) { ++ pci_unmap_single(pdev, buffer_info->dma, ++ adapter->rx_buffer_len, ++ PCI_DMA_FROMDEVICE); ++#ifdef CONFIG_E1000_NAPI ++ } else if (buffer_info->dma && ++ adapter->clean_rx == e1000_clean_jumbo_rx_irq) { ++ pci_unmap_page(pdev, buffer_info->dma, PAGE_SIZE, ++ PCI_DMA_FROMDEVICE); ++#endif ++ } else if (buffer_info->dma && ++ adapter->clean_rx == e1000_clean_rx_irq_ps) { ++ pci_unmap_single(pdev, buffer_info->dma, ++ adapter->rx_ps_bsize0, ++ PCI_DMA_FROMDEVICE); ++ } ++ buffer_info->dma = 0; ++ if (buffer_info->page) { ++ put_page(buffer_info->page); ++ buffer_info->page = NULL; ++ } ++ if (buffer_info->skb) { ++ kfree_rtskb(buffer_info->skb); ++ buffer_info->skb = NULL; ++ } ++ ps_page = &rx_ring->ps_page[i]; ++ ps_page_dma = &rx_ring->ps_page_dma[i]; ++ for (j = 0; j < adapter->rx_ps_pages; j++) { ++ if (!ps_page->ps_page[j]) break; ++ pci_unmap_page(pdev, ++ ps_page_dma->ps_page_dma[j], ++ PAGE_SIZE, PCI_DMA_FROMDEVICE); ++ ps_page_dma->ps_page_dma[j] = 0; ++ put_page(ps_page->ps_page[j]); ++ ps_page->ps_page[j] = NULL; ++ } ++ } ++ ++#ifdef CONFIG_E1000_NAPI ++ /* there also may be some cached data from a chained receive */ ++ if (rx_ring->rx_skb_top) { ++ kfree_rtskb(rx_ring->rx_skb_top); ++ rx_ring->rx_skb_top = NULL; ++ } ++#endif ++ ++ size = sizeof(struct e1000_rx_buffer) * rx_ring->count; ++ memset(rx_ring->buffer_info, 0, size); ++ size = sizeof(struct e1000_ps_page) * rx_ring->count; ++ memset(rx_ring->ps_page, 0, size); ++ size = sizeof(struct e1000_ps_page_dma) * rx_ring->count; ++ memset(rx_ring->ps_page_dma, 0, size); ++ ++ /* Zero out the descriptor ring */ ++ ++ memset(rx_ring->desc, 0, rx_ring->size); ++ ++ rx_ring->next_to_clean = 0; ++ rx_ring->next_to_use = 0; ++ ++ writel(0, adapter->hw.hw_addr + rx_ring->rdh); ++ writel(0, adapter->hw.hw_addr + rx_ring->rdt); ++} ++ ++/** ++ * e1000_clean_all_rx_rings - Free Rx Buffers for all queues ++ * @adapter: board private structure ++ **/ ++static void e1000_clean_all_rx_rings(struct e1000_adapter *adapter) ++{ ++ int i; ++ ++ for (i = 0; i < adapter->num_rx_queues; i++) ++ e1000_clean_rx_ring(adapter, &adapter->rx_ring[i]); ++} ++ ++/* The 82542 2.0 (revision 2) needs to have the receive unit in reset ++ * and memory write and invalidate disabled for certain operations ++ */ ++#if 0 ++static void e1000_enter_82542_rst(struct e1000_adapter *adapter) ++{ ++ struct net_device *netdev = adapter->netdev; ++ u32 rctl; ++ ++ if (adapter->hw.mac.type != e1000_82542) ++ return; ++ if (adapter->hw.revision_id != E1000_REVISION_2) ++ return; ++ ++ e1000_pci_clear_mwi(&adapter->hw); ++ ++ rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); ++ rctl |= E1000_RCTL_RST; ++ E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl); ++ E1000_WRITE_FLUSH(&adapter->hw); ++ mdelay(5); ++ ++ if (rtnetif_running(netdev)) ++ e1000_clean_all_rx_rings(adapter); ++} ++ ++static void e1000_leave_82542_rst(struct e1000_adapter *adapter) ++{ ++ struct net_device *netdev = adapter->netdev; ++ u32 rctl; ++ ++ if (adapter->hw.mac.type != e1000_82542) ++ return; ++ if (adapter->hw.revision_id != E1000_REVISION_2) ++ return; ++ ++ rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); ++ rctl &= ~E1000_RCTL_RST; ++ E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl); ++ E1000_WRITE_FLUSH(&adapter->hw); ++ mdelay(5); ++ ++ if (adapter->hw.bus.pci_cmd_word & PCI_COMMAND_INVALIDATE) ++ e1000_pci_set_mwi(&adapter->hw); ++ ++ if (rtnetif_running(netdev)) { ++ /* No need to loop, because 82542 supports only 1 queue */ ++ struct e1000_rx_ring *ring = &adapter->rx_ring[0]; ++ e1000_configure_rx(adapter); ++ adapter->alloc_rx_buf(adapter, ring, E1000_DESC_UNUSED(ring)); ++ } ++} ++ ++/** ++ * e1000_set_mac - Change the Ethernet Address of the NIC ++ * @netdev: network interface device structure ++ * @p: pointer to an address structure ++ * ++ * Returns 0 on success, negative on failure ++ **/ ++static int e1000_set_mac(struct net_device *netdev, void *p) ++{ ++ struct e1000_adapter *adapter = netdev_priv(netdev); ++ struct sockaddr *addr = p; ++ ++ if (!is_valid_ether_addr(addr->sa_data)) ++ return -EADDRNOTAVAIL; ++ ++ /* 82542 2.0 needs to be in reset to write receive address registers */ ++ ++ if (adapter->hw.mac.type == e1000_82542) ++ e1000_enter_82542_rst(adapter); ++ ++ memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); ++ memcpy(adapter->hw.mac.addr, addr->sa_data, netdev->addr_len); ++ ++ e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0); ++ ++ /* With 82571 controllers, LAA may be overwritten (with the default) ++ * due to controller reset from the other port. */ ++ if (adapter->hw.mac.type == e1000_82571) { ++ /* activate the work around */ ++ e1000_set_laa_state_82571(&adapter->hw, TRUE); ++ ++ /* Hold a copy of the LAA in RAR[14] This is done so that ++ * between the time RAR[0] gets clobbered and the time it ++ * gets fixed (in e1000_watchdog), the actual LAA is in one ++ * of the RARs and no incoming packets directed to this port ++ * are dropped. Eventually the LAA will be in RAR[0] and ++ * RAR[14] */ ++ e1000_rar_set(&adapter->hw, ++ adapter->hw.mac.addr, ++ adapter->hw.mac.rar_entry_count - 1); ++ } ++ ++ if (adapter->hw.mac.type == e1000_82542) ++ e1000_leave_82542_rst(adapter); ++ ++ return 0; ++} ++#endif ++ ++/** ++ * e1000_set_multi - Multicast and Promiscuous mode set ++ * @netdev: network interface device structure ++ * ++ * The set_multi entry point is called whenever the multicast address ++ * list or the network interface flags are updated. This routine is ++ * responsible for configuring the hardware for proper multicast, ++ * promiscuous mode, and all-multi behavior. ++ **/ ++static void e1000_set_multi(struct net_device *netdev) ++{ ++ struct e1000_adapter *adapter = netdev_priv(netdev); ++ struct e1000_hw *hw = &adapter->hw; ++ u32 rctl; ++ ++ /* Check for Promiscuous and All Multicast modes */ ++ ++ rctl = E1000_READ_REG(hw, E1000_RCTL); ++ ++ if (netdev->flags & IFF_PROMISC) { ++ rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE); ++ } else if (netdev->flags & IFF_ALLMULTI) { ++ rctl |= E1000_RCTL_MPE; ++ rctl &= ~E1000_RCTL_UPE; ++ } else { ++ rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE); ++ } ++ ++ E1000_WRITE_REG(hw, E1000_RCTL, rctl); ++} ++ ++/* Need to wait a few seconds after link up to get diagnostic information from ++ * the phy */ ++static void e1000_update_phy_info_task(struct work_struct *work) ++{ ++ struct e1000_adapter *adapter = container_of(work, ++ struct e1000_adapter, ++ phy_info_task.work); ++ e1000_get_phy_info(&adapter->hw); ++} ++ ++/** ++ * e1000_82547_tx_fifo_stall_task - task to complete work ++ * @work: work struct contained inside adapter struct ++ **/ ++static void e1000_82547_tx_fifo_stall_task(struct work_struct *work) ++{ ++ struct e1000_adapter *adapter = container_of(work, ++ struct e1000_adapter, ++ fifo_stall_task.work); ++ struct net_device *netdev = adapter->netdev; ++ u32 tctl; ++ ++ if (atomic_read(&adapter->tx_fifo_stall)) { ++ if ((E1000_READ_REG(&adapter->hw, E1000_TDT(0)) == ++ E1000_READ_REG(&adapter->hw, E1000_TDH(0))) && ++ (E1000_READ_REG(&adapter->hw, E1000_TDFT) == ++ E1000_READ_REG(&adapter->hw, E1000_TDFH)) && ++ (E1000_READ_REG(&adapter->hw, E1000_TDFTS) == ++ E1000_READ_REG(&adapter->hw, E1000_TDFHS))) { ++ tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL); ++ E1000_WRITE_REG(&adapter->hw, E1000_TCTL, ++ tctl & ~E1000_TCTL_EN); ++ E1000_WRITE_REG(&adapter->hw, E1000_TDFT, ++ adapter->tx_head_addr); ++ E1000_WRITE_REG(&adapter->hw, E1000_TDFH, ++ adapter->tx_head_addr); ++ E1000_WRITE_REG(&adapter->hw, E1000_TDFTS, ++ adapter->tx_head_addr); ++ E1000_WRITE_REG(&adapter->hw, E1000_TDFHS, ++ adapter->tx_head_addr); ++ E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl); ++ E1000_WRITE_FLUSH(&adapter->hw); ++ ++ adapter->tx_fifo_head = 0; ++ atomic_set(&adapter->tx_fifo_stall, 0); ++ rtnetif_wake_queue(netdev); ++ } else if (!test_bit(__E1000_DOWN, &adapter->state)) ++ schedule_delayed_work(&adapter->fifo_stall_task, 1); ++ } ++} ++ ++static bool e1000_has_link(struct e1000_adapter *adapter) ++{ ++ struct e1000_hw *hw = &adapter->hw; ++ bool link_active = FALSE; ++ s32 ret_val = 0; ++ ++ /* get_link_status is set on LSC (link status) interrupt or ++ * rx sequence error interrupt. get_link_status will stay ++ * false until the e1000_check_for_link establishes link ++ * for copper adapters ONLY ++ */ ++ switch (hw->phy.media_type) { ++ case e1000_media_type_copper: ++ if (hw->mac.get_link_status) { ++ ret_val = e1000_check_for_link(hw); ++ link_active = !hw->mac.get_link_status; ++ } else { ++ link_active = TRUE; ++ } ++ break; ++ case e1000_media_type_fiber: ++ ret_val = e1000_check_for_link(hw); ++ link_active = !!(E1000_READ_REG(hw, E1000_STATUS) & ++ E1000_STATUS_LU); ++ break; ++ case e1000_media_type_internal_serdes: ++ ret_val = e1000_check_for_link(hw); ++ link_active = adapter->hw.mac.serdes_has_link; ++ break; ++ default: ++ case e1000_media_type_unknown: ++ break; ++ } ++ ++ if ((ret_val == E1000_ERR_PHY) && (hw->phy.type == e1000_phy_igp_3) && ++ (E1000_READ_REG(&adapter->hw, E1000_CTRL) & E1000_PHY_CTRL_GBE_DISABLE)) { ++ /* See e1000_kmrn_lock_loss_workaround_ich8lan() */ ++ DPRINTK(LINK, INFO, ++ "Gigabit has been disabled, downgrading speed\n"); ++ } ++ ++ return link_active; ++} ++ ++static void e1000_enable_receives(struct e1000_adapter *adapter) ++{ ++ /* make sure the receive unit is started */ ++ if ((adapter->flags & E1000_FLAG_RX_NEEDS_RESTART) && ++ (adapter->flags & E1000_FLAG_RX_RESTART_NOW)) { ++ struct e1000_hw *hw = &adapter->hw; ++ u32 rctl = E1000_READ_REG(hw, E1000_RCTL); ++ E1000_WRITE_REG(hw, E1000_RCTL, rctl | E1000_RCTL_EN); ++ adapter->flags &= ~E1000_FLAG_RX_RESTART_NOW; ++ } ++} ++ ++static void e1000_watchdog_task(struct work_struct *work) ++{ ++ struct e1000_adapter *adapter = container_of(work, ++ struct e1000_adapter, ++ watchdog_task.work); ++ ++ struct net_device *netdev = adapter->netdev; ++ struct e1000_mac_info *mac = &adapter->hw.mac; ++ struct e1000_tx_ring *tx_ring; ++ u32 link, tctl; ++ int i, tx_pending = 0; ++ ++ link = e1000_has_link(adapter); ++ if ((rtnetif_carrier_ok(netdev)) && link) { ++ e1000_enable_receives(adapter); ++ goto link_up; ++ } ++ ++ if (mac->type == e1000_82573) { ++ e1000_enable_tx_pkt_filtering(&adapter->hw); ++#ifdef NETIF_F_HW_VLAN_TX ++ if (adapter->mng_vlan_id != adapter->hw.mng_cookie.vlan_id) ++ e1000_update_mng_vlan(adapter); ++#endif ++ } ++ ++ if (link) { ++ if (!rtnetif_carrier_ok(netdev)) { ++ u32 ctrl; ++ bool txb2b = 1; ++#ifdef SIOCGMIIPHY ++ /* update snapshot of PHY registers on LSC */ ++ e1000_phy_read_status(adapter); ++#endif ++ e1000_get_speed_and_duplex(&adapter->hw, ++ &adapter->link_speed, ++ &adapter->link_duplex); ++ ++ ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL); ++ DPRINTK(LINK, INFO, "NIC Link is Up %d Mbps %s, " ++ "Flow Control: %s\n", ++ adapter->link_speed, ++ adapter->link_duplex == FULL_DUPLEX ? ++ "Full Duplex" : "Half Duplex", ++ ((ctrl & E1000_CTRL_TFCE) && (ctrl & ++ E1000_CTRL_RFCE)) ? "RX/TX" : ((ctrl & ++ E1000_CTRL_RFCE) ? "RX" : ((ctrl & ++ E1000_CTRL_TFCE) ? "TX" : "None" ))); ++ ++ /* tweak tx_queue_len according to speed/duplex ++ * and adjust the timeout factor */ ++ //netdev->tx_queue_len = adapter->tx_queue_len; ++ adapter->tx_timeout_factor = 1; ++ switch (adapter->link_speed) { ++ case SPEED_10: ++ txb2b = 0; ++ //netdev->tx_queue_len = 10; ++ adapter->tx_timeout_factor = 16; ++ break; ++ case SPEED_100: ++ txb2b = 0; ++ //netdev->tx_queue_len = 100; ++ /* maybe add some timeout factor ? */ ++ break; ++ } ++ ++ if ((mac->type == e1000_82571 || ++ mac->type == e1000_82572) && ++ txb2b == 0) { ++ u32 tarc0; ++ tarc0 = E1000_READ_REG(&adapter->hw, E1000_TARC(0)); ++ tarc0 &= ~SPEED_MODE_BIT; ++ E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc0); ++ } ++ ++#ifdef NETIF_F_TSO ++ /* disable TSO for pcie and 10/100 speeds, to avoid ++ * some hardware issues */ ++ if (!(adapter->flags & E1000_FLAG_TSO_FORCE) && ++ adapter->hw.bus.type == e1000_bus_type_pci_express){ ++ switch (adapter->link_speed) { ++ case SPEED_10: ++ case SPEED_100: ++ DPRINTK(PROBE,INFO, ++ "10/100 speed: disabling TSO\n"); ++ netdev->features &= ~NETIF_F_TSO; ++#ifdef NETIF_F_TSO6 ++ netdev->features &= ~NETIF_F_TSO6; ++#endif ++ break; ++ case SPEED_1000: ++ netdev->features |= NETIF_F_TSO; ++#ifdef NETIF_F_TSO6 ++ netdev->features |= NETIF_F_TSO6; ++#endif ++ break; ++ default: ++ /* oops */ ++ break; ++ } ++ } ++#endif ++ ++ /* enable transmits in the hardware, need to do this ++ * after setting TARC0 */ ++ tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL); ++ tctl |= E1000_TCTL_EN; ++ E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl); ++ ++ rtnetif_carrier_on(netdev); ++ rtnetif_wake_queue(netdev); ++#ifdef CONFIG_E1000_MQ ++ if (netif_is_multiqueue(netdev)) ++ for (i = 0; i < adapter->num_tx_queues; i++) ++ netif_wake_subqueue(netdev, i); ++#endif ++ ++ if (!test_bit(__E1000_DOWN, &adapter->state)) ++ schedule_delayed_work(&adapter->phy_info_task, ++ 2 * HZ); ++ adapter->smartspeed = 0; ++ } ++ } else { ++ if (rtnetif_carrier_ok(netdev)) { ++ adapter->link_speed = 0; ++ adapter->link_duplex = 0; ++ DPRINTK(LINK, INFO, "NIC Link is Down\n"); ++ rtnetif_carrier_off(netdev); ++ rtnetif_stop_queue(netdev); ++ if (!test_bit(__E1000_DOWN, &adapter->state)) ++ schedule_delayed_work(&adapter->phy_info_task, ++ 2 * HZ); ++ ++ /* 80003ES2LAN workaround-- ++ * For packet buffer work-around on link down event; ++ * disable receives in the ISR and ++ * reset device here in the watchdog ++ */ ++ if (adapter->flags & E1000_FLAG_RX_NEEDS_RESTART) ++ /* reset device */ ++ schedule_work(&adapter->reset_task); ++ } ++ ++ e1000_smartspeed(adapter); ++ } ++ ++link_up: ++ e1000_update_stats(adapter); ++ ++ mac->tx_packet_delta = adapter->stats.tpt - adapter->tpt_old; ++ adapter->tpt_old = adapter->stats.tpt; ++ mac->collision_delta = adapter->stats.colc - adapter->colc_old; ++ adapter->colc_old = adapter->stats.colc; ++ ++ adapter->gorc = adapter->stats.gorc - adapter->gorc_old; ++ adapter->gorc_old = adapter->stats.gorc; ++ adapter->gotc = adapter->stats.gotc - adapter->gotc_old; ++ adapter->gotc_old = adapter->stats.gotc; ++ ++ e1000_update_adaptive(&adapter->hw); ++ ++ if (!rtnetif_carrier_ok(netdev)) { ++ for (i = 0 ; i < adapter->num_tx_queues ; i++) { ++ tx_ring = &adapter->tx_ring[i]; ++ tx_pending |= (E1000_DESC_UNUSED(tx_ring) + 1 < ++ tx_ring->count); ++ } ++ if (tx_pending) { ++ /* We've lost link, so the controller stops DMA, ++ * but we've got queued Tx work that's never going ++ * to get done, so reset controller to flush Tx. ++ * (Do the reset outside of interrupt context). */ ++ adapter->tx_timeout_count++; ++ schedule_work(&adapter->reset_task); ++ } ++ } ++ ++ /* Cause software interrupt to ensure rx ring is cleaned */ ++ E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_RXDMT0); ++ ++ /* Force detection of hung controller every watchdog period */ ++ adapter->detect_tx_hung = TRUE; ++ ++ /* With 82571 controllers, LAA may be overwritten due to controller ++ * reset from the other port. Set the appropriate LAA in RAR[0] */ ++ if (e1000_get_laa_state_82571(&adapter->hw) == TRUE) ++ e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0); ++ ++ /* Reschedule the task */ ++ if (!test_bit(__E1000_DOWN, &adapter->state)) ++ schedule_delayed_work(&adapter->watchdog_task, 2 * HZ); ++} ++ ++enum latency_range { ++ lowest_latency = 0, ++ low_latency = 1, ++ bulk_latency = 2, ++ latency_invalid = 255 ++}; ++ ++/** ++ * e1000_update_itr - update the dynamic ITR value based on statistics ++ * @adapter: pointer to adapter ++ * @itr_setting: current adapter->itr ++ * @packets: the number of packets during this measurement interval ++ * @bytes: the number of bytes during this measurement interval ++ * ++ * Stores a new ITR value based on packets and byte ++ * counts during the last interrupt. The advantage of per interrupt ++ * computation is faster updates and more accurate ITR for the current ++ * traffic pattern. Constants in this function were computed ++ * based on theoretical maximum wire speed and thresholds were set based ++ * on testing data as well as attempting to minimize response time ++ * while increasing bulk throughput. ++ * this functionality is controlled by the InterruptThrottleRate module ++ * parameter (see e1000_param.c) ++ **/ ++#if 0 ++static unsigned int e1000_update_itr(struct e1000_adapter *adapter, ++ u16 itr_setting, int packets, ++ int bytes) ++{ ++ unsigned int retval = itr_setting; ++ ++ if (unlikely(!(adapter->flags & E1000_FLAG_HAS_INTR_MODERATION))) ++ goto update_itr_done; ++ ++ if (packets == 0) ++ goto update_itr_done; ++ ++ switch (itr_setting) { ++ case lowest_latency: ++ /* handle TSO and jumbo frames */ ++ if (bytes/packets > 8000) ++ retval = bulk_latency; ++ else if ((packets < 5) && (bytes > 512)) { ++ retval = low_latency; ++ } ++ break; ++ case low_latency: /* 50 usec aka 20000 ints/s */ ++ if (bytes > 10000) { ++ /* this if handles the TSO accounting */ ++ if (bytes/packets > 8000) { ++ retval = bulk_latency; ++ } else if ((packets < 10) || ((bytes/packets) > 1200)) { ++ retval = bulk_latency; ++ } else if ((packets > 35)) { ++ retval = lowest_latency; ++ } ++ } else if (bytes/packets > 2000) { ++ retval = bulk_latency; ++ } else if (packets <= 2 && bytes < 512) { ++ retval = lowest_latency; ++ } ++ break; ++ case bulk_latency: /* 250 usec aka 4000 ints/s */ ++ if (bytes > 25000) { ++ if (packets > 35) { ++ retval = low_latency; ++ } ++ } else if (bytes < 6000) { ++ retval = low_latency; ++ } ++ break; ++ } ++ ++update_itr_done: ++ return retval; ++} ++#endif ++ ++static void e1000_set_itr(struct e1000_adapter *adapter) ++{ ++} ++ ++#define E1000_TX_FLAGS_CSUM 0x00000001 ++#define E1000_TX_FLAGS_VLAN 0x00000002 ++#define E1000_TX_FLAGS_TSO 0x00000004 ++#define E1000_TX_FLAGS_IPV4 0x00000008 ++#define E1000_TX_FLAGS_VLAN_MASK 0xffff0000 ++#define E1000_TX_FLAGS_VLAN_SHIFT 16 ++ ++static int e1000_tso(struct e1000_adapter *adapter, ++ struct e1000_tx_ring *tx_ring, struct sk_buff *skb) ++{ ++#ifdef NETIF_F_TSO ++ struct e1000_context_desc *context_desc; ++ struct e1000_buffer *buffer_info; ++ unsigned int i; ++ u32 cmd_length = 0; ++ u16 ipcse = 0, tucse, mss; ++ u8 ipcss, ipcso, tucss, tucso, hdr_len; ++ int err; ++ ++ if (skb_is_gso(skb)) { ++ if (skb_header_cloned(skb)) { ++ err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); ++ if (err) ++ return err; ++ } ++ ++ hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); ++ mss = skb_shinfo(skb)->gso_size; ++ if (skb->protocol == htons(ETH_P_IP)) { ++ struct iphdr *iph = ip_hdr(skb); ++ iph->tot_len = 0; ++ iph->check = 0; ++ tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr, ++ iph->daddr, 0, ++ IPPROTO_TCP, ++ 0); ++ cmd_length = E1000_TXD_CMD_IP; ++ ipcse = skb_transport_offset(skb) - 1; ++#ifdef NETIF_F_TSO6 ++ } else if (skb_shinfo(skb)->gso_type == SKB_GSO_TCPV6) { ++ ipv6_hdr(skb)->payload_len = 0; ++ tcp_hdr(skb)->check = ++ ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, ++ &ipv6_hdr(skb)->daddr, ++ 0, IPPROTO_TCP, 0); ++ ipcse = 0; ++#endif ++ } ++ ipcss = skb_network_offset(skb); ++ ipcso = (void *)&(ip_hdr(skb)->check) - (void *)skb->data; ++ tucss = skb_transport_offset(skb); ++ tucso = (void *)&(tcp_hdr(skb)->check) - (void *)skb->data; ++ tucse = 0; ++ ++ cmd_length |= (E1000_TXD_CMD_DEXT | E1000_TXD_CMD_TSE | ++ E1000_TXD_CMD_TCP | (skb->len - (hdr_len))); ++ ++ i = tx_ring->next_to_use; ++ context_desc = E1000_CONTEXT_DESC(*tx_ring, i); ++ buffer_info = &tx_ring->buffer_info[i]; ++ ++ context_desc->lower_setup.ip_fields.ipcss = ipcss; ++ context_desc->lower_setup.ip_fields.ipcso = ipcso; ++ context_desc->lower_setup.ip_fields.ipcse = cpu_to_le16(ipcse); ++ context_desc->upper_setup.tcp_fields.tucss = tucss; ++ context_desc->upper_setup.tcp_fields.tucso = tucso; ++ context_desc->upper_setup.tcp_fields.tucse = cpu_to_le16(tucse); ++ context_desc->tcp_seg_setup.fields.mss = cpu_to_le16(mss); ++ context_desc->tcp_seg_setup.fields.hdr_len = hdr_len; ++ context_desc->cmd_and_length = cpu_to_le32(cmd_length); ++ ++ buffer_info->time_stamp = jiffies; ++ buffer_info->next_to_watch = i; ++ ++ if (++i == tx_ring->count) i = 0; ++ tx_ring->next_to_use = i; ++ ++ return TRUE; ++ } ++#endif ++ ++ return FALSE; ++} ++ ++static bool e1000_tx_csum(struct e1000_adapter *adapter, ++ struct e1000_tx_ring *tx_ring, ++ struct sk_buff *skb) ++{ ++ struct e1000_context_desc *context_desc; ++ struct e1000_buffer *buffer_info; ++ unsigned int i; ++ // u8 css; ++ u32 cmd_len = E1000_TXD_CMD_DEXT; ++ ++ if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) ++ return FALSE; ++ ++ switch (skb->protocol) { ++ case __constant_htons(ETH_P_IP): ++ break; ++ default: ++ if (unlikely(net_ratelimit())) { ++ DPRINTK(PROBE, WARNING, "checksum_partial proto=%x!\n", ++ skb->protocol); ++ } ++ break; ++ } ++ ++ // css = skb_transport_offset(skb); ++ ++ i = tx_ring->next_to_use; ++ buffer_info = &tx_ring->buffer_info[i]; ++ context_desc = E1000_CONTEXT_DESC(*tx_ring, i); ++ ++ context_desc->lower_setup.ip_config = 0; ++ context_desc->cmd_and_length = cpu_to_le32(cmd_len); ++ ++ buffer_info->time_stamp = jiffies; ++ buffer_info->next_to_watch = i; ++ ++ if (unlikely(++i == tx_ring->count)) i = 0; ++ tx_ring->next_to_use = i; ++ ++ return TRUE; ++} ++ ++#define E1000_MAX_TXD_PWR 12 ++#define E1000_MAX_DATA_PER_TXD (1<len; ++ unsigned int offset = 0, size, count = 0, i; ++#ifdef MAX_SKB_FRAGS ++ unsigned int f; ++ len -= skb->data_len; ++#endif ++ ++ i = tx_ring->next_to_use; ++ ++ while (len) { ++ buffer_info = &tx_ring->buffer_info[i]; ++ size = min(len, max_per_txd); ++#ifdef NETIF_F_TSO ++ /* Workaround for Controller erratum -- ++ * descriptor for non-tso packet in a linear SKB that follows a ++ * tso gets written back prematurely before the data is fully ++ * DMA'd to the controller */ ++ if (tx_ring->last_tx_tso && !skb_is_gso(skb)) { ++ tx_ring->last_tx_tso = 0; ++ if (!skb->data_len) ++ size -= 4; ++ } ++ ++ /* Workaround for premature desc write-backs ++ * in TSO mode. Append 4-byte sentinel desc */ ++ if (unlikely(mss && !nr_frags && size == len && size > 8)) ++ size -= 4; ++#endif ++ /* work-around for errata 10 and it applies ++ * to all controllers in PCI-X mode ++ * The fix is to make sure that the first descriptor of a ++ * packet is smaller than 2048 - 16 - 16 (or 2016) bytes ++ */ ++ if (unlikely((adapter->hw.bus.type == e1000_bus_type_pcix) && ++ (size > 2015) && count == 0)) ++ size = 2015; ++ ++ /* Workaround for potential 82544 hang in PCI-X. Avoid ++ * terminating buffers within evenly-aligned dwords. */ ++ if (unlikely(adapter->pcix_82544 && ++ !((unsigned long)(skb->data + offset + size - 1) & 4) && ++ size > 4)) ++ size -= 4; ++ ++ buffer_info->length = size; ++ /* set time_stamp *before* dma to help avoid a possible race */ ++ buffer_info->time_stamp = jiffies; ++ buffer_info->dma = ++ pci_map_single(adapter->pdev, ++ skb->data + offset, ++ size, ++ PCI_DMA_TODEVICE); ++ buffer_info->next_to_watch = i; ++ ++ len -= size; ++ offset += size; ++ count++; ++ if (unlikely(++i == tx_ring->count)) i = 0; ++ } ++ ++#ifdef MAX_SKB_FRAGS ++ for (f = 0; f < nr_frags; f++) { ++ struct skb_frag_struct *frag; ++ ++ frag = &skb_shinfo(skb)->frags[f]; ++ len = frag->size; ++ offset = frag->page_offset; ++ ++ while (len) { ++ buffer_info = &tx_ring->buffer_info[i]; ++ size = min(len, max_per_txd); ++#ifdef NETIF_F_TSO ++ /* Workaround for premature desc write-backs ++ * in TSO mode. Append 4-byte sentinel desc */ ++ if (unlikely(mss && f == (nr_frags-1) && size == len && size > 8)) ++ size -= 4; ++#endif ++ /* Workaround for potential 82544 hang in PCI-X. ++ * Avoid terminating buffers within evenly-aligned ++ * dwords. */ ++ if (unlikely(adapter->pcix_82544 && ++ !((unsigned long)(frag->page+offset+size-1) & 4) && ++ size > 4)) ++ size -= 4; ++ ++ buffer_info->length = size; ++ buffer_info->time_stamp = jiffies; ++ buffer_info->dma = ++ pci_map_page(adapter->pdev, ++ frag->page, ++ offset, ++ size, ++ PCI_DMA_TODEVICE); ++ buffer_info->next_to_watch = i; ++ ++ len -= size; ++ offset += size; ++ count++; ++ if (unlikely(++i == tx_ring->count)) i = 0; ++ } ++ } ++#endif ++ ++ i = (i == 0) ? tx_ring->count - 1 : i - 1; ++ tx_ring->buffer_info[i].skb = skb; ++ tx_ring->buffer_info[first].next_to_watch = i; ++ ++ return count; ++} ++ ++static void e1000_tx_queue(struct e1000_adapter *adapter, ++ struct e1000_tx_ring *tx_ring, ++ int tx_flags, int count, nanosecs_abs_t *xmit_stamp) ++{ ++ struct e1000_tx_desc *tx_desc = NULL; ++ struct e1000_buffer *buffer_info; ++ u32 txd_upper = 0, txd_lower = E1000_TXD_CMD_IFCS; ++ unsigned int i; ++ rtdm_lockctx_t context; ++ ++ if (likely(tx_flags & E1000_TX_FLAGS_TSO)) { ++ txd_lower |= E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D | ++ E1000_TXD_CMD_TSE; ++ txd_upper |= E1000_TXD_POPTS_TXSM << 8; ++ ++ if (likely(tx_flags & E1000_TX_FLAGS_IPV4)) ++ txd_upper |= E1000_TXD_POPTS_IXSM << 8; ++ } ++ ++ if (likely(tx_flags & E1000_TX_FLAGS_CSUM)) { ++ txd_lower |= E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D; ++ txd_upper |= E1000_TXD_POPTS_TXSM << 8; ++ } ++ ++ if (unlikely(tx_flags & E1000_TX_FLAGS_VLAN)) { ++ txd_lower |= E1000_TXD_CMD_VLE; ++ txd_upper |= (tx_flags & E1000_TX_FLAGS_VLAN_MASK); ++ } ++ ++ i = tx_ring->next_to_use; ++ ++ while (count--) { ++ buffer_info = &tx_ring->buffer_info[i]; ++ tx_desc = E1000_TX_DESC(*tx_ring, i); ++ tx_desc->buffer_addr = cpu_to_le64(buffer_info->dma); ++ tx_desc->lower.data = ++ cpu_to_le32(txd_lower | buffer_info->length); ++ tx_desc->upper.data = cpu_to_le32(txd_upper); ++ if (unlikely(++i == tx_ring->count)) i = 0; ++ } ++ ++ tx_desc->lower.data |= cpu_to_le32(adapter->txd_cmd); ++ ++ rtdm_lock_irqsave(context); ++ ++ if (xmit_stamp) ++ *xmit_stamp = cpu_to_be64(rtdm_clock_read() + *xmit_stamp); ++ ++ /* Force memory writes to complete before letting h/w ++ * know there are new descriptors to fetch. (Only ++ * applicable for weak-ordered memory model archs, ++ * such as IA-64). */ ++ wmb(); ++ ++ tx_ring->next_to_use = i; ++ writel(i, adapter->hw.hw_addr + tx_ring->tdt); ++ ++ rtdm_lock_irqrestore(context); ++ /* we need this if more than one processor can write to our tail ++ * at a time, it synchronizes IO on IA64/Altix systems */ ++ mmiowb(); ++} ++ ++#define E1000_FIFO_HDR 0x10 ++#define E1000_82547_PAD_LEN 0x3E0 ++ ++/** ++ * 82547 workaround to avoid controller hang in half-duplex environment. ++ * The workaround is to avoid queuing a large packet that would span ++ * the internal Tx FIFO ring boundary by notifying the stack to resend ++ * the packet at a later time. This gives the Tx FIFO an opportunity to ++ * flush all packets. When that occurs, we reset the Tx FIFO pointers ++ * to the beginning of the Tx FIFO. ++ **/ ++static int e1000_82547_fifo_workaround(struct e1000_adapter *adapter, ++ struct sk_buff *skb) ++{ ++ u32 fifo_space = adapter->tx_fifo_size - adapter->tx_fifo_head; ++ u32 skb_fifo_len = skb->len + E1000_FIFO_HDR; ++ ++ skb_fifo_len = ALIGN(skb_fifo_len, E1000_FIFO_HDR); ++ ++ if (adapter->link_duplex != HALF_DUPLEX) ++ goto no_fifo_stall_required; ++ ++ if (atomic_read(&adapter->tx_fifo_stall)) ++ return 1; ++ ++ if (skb_fifo_len >= (E1000_82547_PAD_LEN + fifo_space)) { ++ atomic_set(&adapter->tx_fifo_stall, 1); ++ return 1; ++ } ++ ++no_fifo_stall_required: ++ adapter->tx_fifo_head += skb_fifo_len; ++ if (adapter->tx_fifo_head >= adapter->tx_fifo_size) ++ adapter->tx_fifo_head -= adapter->tx_fifo_size; ++ return 0; ++} ++ ++#define MINIMUM_DHCP_PACKET_SIZE 282 ++static int e1000_transfer_dhcp_info(struct e1000_adapter *adapter, ++ struct sk_buff *skb) ++{ ++ struct e1000_hw *hw = &adapter->hw; ++ u16 length, offset; ++#ifdef NETIF_F_HW_VLAN_TX ++ if (vlan_tx_tag_present(skb)) { ++ if (!((vlan_tx_tag_get(skb) == adapter->hw.mng_cookie.vlan_id) ++ && (adapter->hw.mng_cookie.status & ++ E1000_MNG_DHCP_COOKIE_STATUS_VLAN))) ++ return 0; ++ } ++#endif ++ if (skb->len > MINIMUM_DHCP_PACKET_SIZE) { ++ struct ethhdr *eth = (struct ethhdr *) skb->data; ++ if ((htons(ETH_P_IP) == eth->h_proto)) { ++ const struct iphdr *ip = ++ (struct iphdr *)((u8 *)skb->data+14); ++ if (IPPROTO_UDP == ip->protocol) { ++ struct udphdr *udp = ++ (struct udphdr *)((u8 *)ip + ++ (ip->ihl << 2)); ++ if (ntohs(udp->dest) == 67) { ++ offset = (u8 *)udp + 8 - skb->data; ++ length = skb->len - offset; ++ ++ return e1000_mng_write_dhcp_info(hw, ++ (u8 *)udp + 8, ++ length); ++ } ++ } ++ } ++ } ++ return 0; ++} ++ ++static int __e1000_maybe_stop_tx(struct net_device *netdev, ++ struct e1000_tx_ring *tx_ring, int size) ++{ ++ struct e1000_adapter *adapter = netdev_priv(netdev); ++ ++ rtnetif_stop_queue(netdev); ++ /* Herbert's original patch had: ++ * smp_mb__after_netif_stop_queue(); ++ * but since that doesn't exist yet, just open code it. */ ++ smp_mb(); ++ ++ /* We need to check again in a case another CPU has just ++ * made room available. */ ++ if (likely(E1000_DESC_UNUSED(tx_ring) < size)) ++ return -EBUSY; ++ ++ /* A reprieve! */ ++ rtnetif_start_queue(netdev); ++ ++adapter->restart_queue; ++ return 0; ++} ++ ++static int e1000_maybe_stop_tx(struct net_device *netdev, ++ struct e1000_tx_ring *tx_ring, int size) ++{ ++ if (likely(E1000_DESC_UNUSED(tx_ring) >= size)) ++ return 0; ++ return __e1000_maybe_stop_tx(netdev, tx_ring, size); ++} ++ ++#define TXD_USE_COUNT(S, X) (((S) >> (X)) + 1 ) ++static int e1000_xmit_frame_ring(struct sk_buff *skb, ++ struct net_device *netdev, ++ struct e1000_tx_ring *tx_ring) ++{ ++ struct e1000_adapter *adapter = netdev_priv(netdev); ++ unsigned int first, max_per_txd = E1000_MAX_DATA_PER_TXD; ++ unsigned int max_txd_pwr = E1000_MAX_TXD_PWR; ++ unsigned int tx_flags = 0; ++ unsigned int len = skb->len; ++ unsigned long irq_flags; ++ unsigned int nr_frags = 0; ++ unsigned int mss = 0; ++ int count = 0; ++ int tso; ++#ifdef MAX_SKB_FRAGS ++ unsigned int f; ++ len -= skb->data_len; ++#endif ++ ++ if (test_bit(__E1000_DOWN, &adapter->state)) { ++ kfree_rtskb(skb); ++ return NETDEV_TX_OK; ++ } ++ ++ if (unlikely(skb->len <= 0)) { ++ kfree_rtskb(skb); ++ return NETDEV_TX_OK; ++ } ++ ++ ++ /* 82571 and newer doesn't need the workaround that limited descriptor ++ * length to 4kB */ ++ if (adapter->hw.mac.type >= e1000_82571) ++ max_per_txd = 8192; ++ ++#ifdef NETIF_F_TSO ++ mss = skb_shinfo(skb)->gso_size; ++ /* The controller does a simple calculation to ++ * make sure there is enough room in the FIFO before ++ * initiating the DMA for each buffer. The calc is: ++ * 4 = ceil(buffer len/mss). To make sure we don't ++ * overrun the FIFO, adjust the max buffer len if mss ++ * drops. */ ++ if (mss) { ++ u8 hdr_len; ++ max_per_txd = min(mss << 2, max_per_txd); ++ max_txd_pwr = fls(max_per_txd) - 1; ++ ++ /* TSO Workaround for 82571/2/3 Controllers -- if skb->data ++ * points to just header, pull a few bytes of payload from ++ * frags into skb->data */ ++ hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); ++ if (skb->data_len && (hdr_len == (skb->len - skb->data_len))) { ++ switch (adapter->hw.mac.type) { ++ unsigned int pull_size; ++ case e1000_82544: ++ /* Make sure we have room to chop off 4 bytes, ++ * and that the end alignment will work out to ++ * this hardware's requirements ++ * NOTE: this is a TSO only workaround ++ * if end byte alignment not correct move us ++ * into the next dword */ ++ if ((unsigned long)(skb_tail_pointer(skb) - 1) & 4) ++ break; ++ /* fall through */ ++ case e1000_82571: ++ case e1000_82572: ++ case e1000_82573: ++ case e1000_ich8lan: ++ case e1000_ich9lan: ++ pull_size = min((unsigned int)4, skb->data_len); ++ if (!__pskb_pull_tail(skb, pull_size)) { ++ DPRINTK(DRV, ERR, ++ "__pskb_pull_tail failed.\n"); ++ kfree_rtskb(skb); ++ return NETDEV_TX_OK; ++ } ++ len = skb->len - skb->data_len; ++ break; ++ default: ++ /* do nothing */ ++ break; ++ } ++ } ++ } ++ ++ /* reserve a descriptor for the offload context */ ++ if ((mss) || (skb->ip_summed == CHECKSUM_PARTIAL)) ++ count++; ++ count++; ++#else ++ if (skb->ip_summed == CHECKSUM_PARTIAL) ++ count++; ++#endif ++ ++#ifdef NETIF_F_TSO ++ /* Controller Erratum workaround */ ++ if (!skb->data_len && tx_ring->last_tx_tso && !skb_is_gso(skb)) ++ count++; ++#endif ++ ++ count += TXD_USE_COUNT(len, max_txd_pwr); ++ ++ if (adapter->pcix_82544) ++ count++; ++ ++ /* work-around for errata 10 and it applies to all controllers ++ * in PCI-X mode, so add one more descriptor to the count ++ */ ++ if (unlikely((adapter->hw.bus.type == e1000_bus_type_pcix) && ++ (len > 2015))) ++ count++; ++ ++#ifdef MAX_SKB_FRAGS ++ nr_frags = skb_shinfo(skb)->nr_frags; ++ for (f = 0; f < nr_frags; f++) ++ count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size, ++ max_txd_pwr); ++ if (adapter->pcix_82544) ++ count += nr_frags; ++ ++#endif ++ ++ if (adapter->hw.mac.tx_pkt_filtering && ++ (adapter->hw.mac.type == e1000_82573)) ++ e1000_transfer_dhcp_info(adapter, skb); ++ ++ rtdm_lock_get_irqsave(&tx_ring->tx_lock, irq_flags); ++ ++ /* need: count + 2 desc gap to keep tail from touching ++ * head, otherwise try next time */ ++ if (unlikely(e1000_maybe_stop_tx(netdev, tx_ring, count + 2))) { ++ rtdm_lock_put_irqrestore(&tx_ring->tx_lock, irq_flags); ++ rtdm_printk("FATAL: rt_e1000 ran into tail close to head situation!\n"); ++ return NETDEV_TX_BUSY; ++ } ++ ++ if (unlikely(adapter->hw.mac.type == e1000_82547)) { ++ if (unlikely(e1000_82547_fifo_workaround(adapter, skb))) { ++ rtnetif_stop_queue(netdev); ++ rtdm_lock_put_irqrestore(&tx_ring->tx_lock, irq_flags); ++ if (!test_bit(__E1000_DOWN, &adapter->state)) ++ schedule_delayed_work(&adapter->fifo_stall_task, ++ 1); ++ rtdm_printk("FATAL: rt_e1000 ran into tail 82547 controller bug!\n"); ++ return NETDEV_TX_BUSY; ++ } ++ } ++ ++#ifndef NETIF_F_LLTX ++ rtdm_lock_put_irqrestore(&tx_ring->tx_lock, irq_flags); ++ ++#endif ++#ifdef NETIF_F_HW_VLAN_TX ++ if (unlikely(adapter->vlgrp && vlan_tx_tag_present(skb))) { ++ tx_flags |= E1000_TX_FLAGS_VLAN; ++ tx_flags |= (vlan_tx_tag_get(skb) << E1000_TX_FLAGS_VLAN_SHIFT); ++ } ++#endif ++ ++ first = tx_ring->next_to_use; ++ ++ tso = e1000_tso(adapter, tx_ring, skb); ++ if (tso < 0) { ++ kfree_rtskb(skb); ++#ifdef NETIF_F_LLTX ++ rtdm_lock_put_irqrestore(&tx_ring->tx_lock, irq_flags); ++#endif ++ return NETDEV_TX_OK; ++ } ++ ++ if (likely(tso)) { ++ tx_ring->last_tx_tso = 1; ++ tx_flags |= E1000_TX_FLAGS_TSO; ++ } else if (likely(e1000_tx_csum(adapter, tx_ring, skb))) ++ tx_flags |= E1000_TX_FLAGS_CSUM; ++ ++ /* Old method was to assume IPv4 packet by default if TSO was enabled. ++ * 82571 hardware supports TSO capabilities for IPv6 as well... ++ * no longer assume, we must. */ ++ if (likely(skb->protocol == htons(ETH_P_IP))) ++ tx_flags |= E1000_TX_FLAGS_IPV4; ++ ++ e1000_tx_queue(adapter, tx_ring, tx_flags, ++ e1000_tx_map(adapter, tx_ring, skb, first, ++ max_per_txd, nr_frags, mss), ++ skb->xmit_stamp); ++ ++ // netdev->trans_start = jiffies; ++ ++ /* Make sure there is space in the ring for the next send. */ ++ // e1000_maybe_stop_tx(netdev, tx_ring, MAX_SKB_FRAGS + 2); ++ ++#ifdef NETIF_F_LLTX ++ rtdm_lock_put_irqrestore(&tx_ring->tx_lock, irq_flags); ++#endif ++ return NETDEV_TX_OK; ++} ++ ++static int e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev) ++{ ++ struct e1000_adapter *adapter = netdev_priv(netdev); ++ struct e1000_tx_ring *tx_ring = adapter->tx_ring; ++ ++ /* This goes back to the question of how to logically map a tx queue ++ * to a flow. Right now, performance is impacted slightly negatively ++ * if using multiple tx queues. If the stack breaks away from a ++ * single qdisc implementation, we can look at this again. */ ++ return (e1000_xmit_frame_ring(skb, netdev, tx_ring)); ++} ++ ++#ifdef CONFIG_E1000_MQ ++static int e1000_subqueue_xmit_frame(struct sk_buff *skb, ++ struct net_device *netdev, int queue) ++{ ++ struct e1000_adapter *adapter = netdev_priv(netdev); ++ struct e1000_tx_ring *tx_ring = &adapter->tx_ring[queue]; ++ ++ return (e1000_xmit_frame_ring(skb, netdev, tx_ring)); ++} ++#endif ++ ++ ++/** ++ * e1000_tx_timeout - Respond to a Tx Hang ++ * @netdev: network interface device structure ++ **/ ++#if 0 ++static void e1000_tx_timeout(struct net_device *netdev) ++{ ++ struct e1000_adapter *adapter = netdev_priv(netdev); ++ ++ /* Do the reset outside of interrupt context */ ++ adapter->tx_timeout_count++; ++ schedule_work(&adapter->reset_task); ++} ++#endif ++ ++static void e1000_reset_task(struct work_struct *work) ++{ ++ struct e1000_adapter *adapter; ++ adapter = container_of(work, struct e1000_adapter, reset_task); ++ ++ e1000_reinit_locked(adapter); ++} ++ ++#if 0 ++/** ++ * e1000_get_stats - Get System Network Statistics ++ * @netdev: network interface device structure ++ * ++ * Returns the address of the device statistics structure. ++ * The statistics are actually updated from the timer callback. ++ **/ ++static struct net_device_stats * e1000_get_stats(struct net_device *netdev) ++{ ++ struct e1000_adapter *adapter = netdev_priv(netdev); ++ ++ /* only return the current stats */ ++ return &adapter->net_stats; ++} ++ ++/** ++ * e1000_change_mtu - Change the Maximum Transfer Unit ++ * @netdev: network interface device structure ++ * @new_mtu: new value for maximum frame size ++ * ++ * Returns 0 on success, negative on failure ++ **/ ++static int e1000_change_mtu(struct net_device *netdev, int new_mtu) ++{ ++ struct e1000_adapter *adapter = netdev_priv(netdev); ++ int max_frame = new_mtu + ETH_HLEN + ETHERNET_FCS_SIZE; ++ u16 eeprom_data = 0; ++ ++ if ((max_frame < ETH_ZLEN + ETHERNET_FCS_SIZE) || ++ (max_frame > MAX_JUMBO_FRAME_SIZE)) { ++ DPRINTK(PROBE, ERR, "Invalid MTU setting\n"); ++ return -EINVAL; ++ } ++ ++ /* Adapter-specific max frame size limits. */ ++ switch (adapter->hw.mac.type) { ++ case e1000_undefined: ++ case e1000_82542: ++ case e1000_ich8lan: ++ if (max_frame > ETH_FRAME_LEN + ETHERNET_FCS_SIZE) { ++ DPRINTK(PROBE, ERR, "Jumbo Frames not supported.\n"); ++ return -EINVAL; ++ } ++ break; ++ case e1000_82573: ++ /* Jumbo Frames not supported if: ++ * - this is not an 82573L device ++ * - ASPM is enabled in any way (0x1A bits 3:2) */ ++ e1000_read_nvm(&adapter->hw, NVM_INIT_3GIO_3, 1, &eeprom_data); ++ if ((adapter->hw.device_id != E1000_DEV_ID_82573L) || ++ (eeprom_data & NVM_WORD1A_ASPM_MASK)) { ++ if (max_frame > ETH_FRAME_LEN + ETHERNET_FCS_SIZE) { ++ DPRINTK(PROBE, ERR, ++ "Jumbo Frames not supported.\n"); ++ return -EINVAL; ++ } ++ break; ++ } ++ /* ERT will be enabled later to enable wire speed receives */ ++ ++ /* fall through to get support */ ++ case e1000_ich9lan: ++ if ((adapter->hw.phy.type == e1000_phy_ife) && ++ (max_frame > ETH_FRAME_LEN + ETHERNET_FCS_SIZE)) { ++ DPRINTK(PROBE, ERR, "Jumbo Frames not supported.\n"); ++ return -EINVAL; ++ } ++ /* fall through to get support */ ++ case e1000_82571: ++ case e1000_82572: ++ case e1000_80003es2lan: ++#define MAX_STD_JUMBO_FRAME_SIZE 9234 ++ if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) { ++ DPRINTK(PROBE, ERR, "MTU > 9216 not supported.\n"); ++ return -EINVAL; ++ } ++ break; ++ default: ++ /* Capable of supporting up to MAX_JUMBO_FRAME_SIZE limit. */ ++ break; ++ } ++ ++ while (test_and_set_bit(__E1000_RESETTING, &adapter->state)) ++ msleep(1); ++ /* e1000_down has a dependency on max_frame_size */ ++ adapter->max_frame_size = max_frame; ++ if (rtnetif_running(netdev)) ++ e1000_down(adapter); ++ ++ /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN ++ * means we reserve 2 more, this pushes us to allocate from the next ++ * larger slab size. ++ * i.e. RXBUFFER_2048 --> size-4096 slab ++ * however with the new *_jumbo_rx* routines, jumbo receives will use ++ * fragmented skbs */ ++ ++ if (max_frame <= E1000_RXBUFFER_256) ++ adapter->rx_buffer_len = E1000_RXBUFFER_256; ++ else if (max_frame <= E1000_RXBUFFER_512) ++ adapter->rx_buffer_len = E1000_RXBUFFER_512; ++ else if (max_frame <= E1000_RXBUFFER_1024) ++ adapter->rx_buffer_len = E1000_RXBUFFER_1024; ++ else if (max_frame <= E1000_RXBUFFER_2048) ++ adapter->rx_buffer_len = E1000_RXBUFFER_2048; ++#ifdef CONFIG_E1000_NAPI ++ else ++ adapter->rx_buffer_len = E1000_RXBUFFER_4096; ++#else ++ else if (max_frame <= E1000_RXBUFFER_4096) ++ adapter->rx_buffer_len = E1000_RXBUFFER_4096; ++ else if (max_frame <= E1000_RXBUFFER_8192) ++ adapter->rx_buffer_len = E1000_RXBUFFER_8192; ++ else if (max_frame <= E1000_RXBUFFER_16384) ++ adapter->rx_buffer_len = E1000_RXBUFFER_16384; ++#endif ++ ++ /* adjust allocation if LPE protects us, and we aren't using SBP */ ++ if (!e1000_tbi_sbp_enabled_82543(&adapter->hw) && ++ ((max_frame == ETH_FRAME_LEN + ETHERNET_FCS_SIZE) || ++ (max_frame == MAXIMUM_ETHERNET_VLAN_SIZE))) ++ adapter->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE; ++ ++ DPRINTK(PROBE, INFO, "changing MTU from %d to %d\n", ++ netdev->mtu, new_mtu); ++ netdev->mtu = new_mtu; ++ ++ if (rtnetif_running(netdev)) ++ e1000_up(adapter); ++ else ++ e1000_reset(adapter); ++ ++ clear_bit(__E1000_RESETTING, &adapter->state); ++ ++ return 0; ++} ++#endif ++ ++/** ++ * e1000_update_stats - Update the board statistics counters ++ * @adapter: board private structure ++ **/ ++void e1000_update_stats(struct e1000_adapter *adapter) ++{ ++} ++#ifdef SIOCGMIIPHY ++ ++/** ++ * e1000_phy_read_status - Update the PHY register status snapshot ++ * @adapter: board private structure ++ **/ ++static void e1000_phy_read_status(struct e1000_adapter *adapter) ++{ ++ struct e1000_hw *hw = &adapter->hw; ++ struct e1000_phy_regs *phy = &adapter->phy_regs; ++ int ret_val = E1000_SUCCESS; ++ unsigned long irq_flags; ++ ++ ++ rtdm_lock_get_irqsave(&adapter->stats_lock, irq_flags); ++ ++ if (E1000_READ_REG(hw, E1000_STATUS)& E1000_STATUS_LU) { ++ ret_val = e1000_read_phy_reg(hw, PHY_CONTROL, &phy->bmcr); ++ ret_val |= e1000_read_phy_reg(hw, PHY_STATUS, &phy->bmsr); ++ ret_val |= e1000_read_phy_reg(hw, PHY_AUTONEG_ADV, ++ &phy->advertise); ++ ret_val |= e1000_read_phy_reg(hw, PHY_LP_ABILITY, &phy->lpa); ++ ret_val |= e1000_read_phy_reg(hw, PHY_AUTONEG_EXP, ++ &phy->expansion); ++ ret_val |= e1000_read_phy_reg(hw, PHY_1000T_CTRL, ++ &phy->ctrl1000); ++ ret_val |= e1000_read_phy_reg(hw, PHY_1000T_STATUS, ++ &phy->stat1000); ++ ret_val |= e1000_read_phy_reg(hw, PHY_EXT_STATUS, ++ &phy->estatus); ++ if (ret_val) ++ DPRINTK(DRV, WARNING, "Error reading PHY register\n"); ++ } else { ++ /* Do not read PHY registers if link is not up ++ * Set values to typical power-on defaults */ ++ phy->bmcr = (BMCR_SPEED1000 | BMCR_ANENABLE | BMCR_FULLDPLX); ++ phy->bmsr = (BMSR_100FULL | BMSR_100HALF | BMSR_10FULL | ++ BMSR_10HALF | BMSR_ESTATEN | BMSR_ANEGCAPABLE | ++ BMSR_ERCAP); ++ phy->advertise = (ADVERTISE_PAUSE_ASYM | ADVERTISE_PAUSE_CAP | ++ ADVERTISE_ALL | ADVERTISE_CSMA); ++ phy->lpa = 0; ++ phy->expansion = EXPANSION_ENABLENPAGE; ++ phy->ctrl1000 = ADVERTISE_1000FULL; ++ phy->stat1000 = 0; ++ phy->estatus = (ESTATUS_1000_TFULL | ESTATUS_1000_THALF); ++ } ++ ++ rtdm_lock_put_irqrestore(&adapter->stats_lock, irq_flags); ++} ++#endif ++ ++ ++/** ++ * e1000_intr_msi - Interrupt Handler ++ * @irq: interrupt number ++ * @data: pointer to a network interface device structure ++ **/ ++static int e1000_intr_msi(rtdm_irq_t *irq_handle) ++{ ++ struct rtnet_device *netdev = rtdm_irq_get_arg(irq_handle, struct rtnet_device); ++ struct e1000_adapter *adapter = netdev_priv(netdev); ++ struct e1000_hw *hw = &adapter->hw; ++#ifndef CONFIG_E1000_NAPI ++ int i, j; ++ int rx_cleaned, tx_cleaned; ++#endif ++ u32 icr = E1000_READ_REG(hw, E1000_ICR); ++ nanosecs_abs_t time_stamp = rtdm_clock_read(); ++ ++ ++ ++#ifdef CONFIG_E1000_NAPI ++ /* read ICR disables interrupts using IAM, so keep up with our ++ * enable/disable accounting */ ++ atomic_inc(&adapter->irq_sem); ++#endif ++ if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { ++ hw->mac.get_link_status = 1; ++ /* ICH8 workaround-- Call gig speed drop workaround on cable ++ * disconnect (LSC) before accessing any PHY registers */ ++ if ((hw->mac.type == e1000_ich8lan) && ++ (hw->phy.type == e1000_phy_igp_3) && ++ (!(E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU))) ++ e1000_gig_downshift_workaround_ich8lan(hw); ++ ++ /* 80003ES2LAN workaround-- For packet buffer work-around on ++ * link down event; disable receives here in the ISR and reset ++ * adapter in watchdog */ ++ if (rtnetif_carrier_ok(netdev) && ++ (adapter->flags & E1000_FLAG_RX_NEEDS_RESTART)) { ++ /* disable receives */ ++ u32 rctl = E1000_READ_REG(hw, E1000_RCTL); ++ E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); ++ adapter->flags |= E1000_FLAG_RX_RESTART_NOW; ++ } ++ /* guard against interrupt when we're going down */ ++ //if (!test_bit(__E1000_DOWN, &adapter->state)) ++ // mod_timer(&adapter->watchdog_timer, jiffies + 1); ++ } ++ ++#ifdef CONFIG_E1000_NAPI ++ /* XXX only using ring 0 for napi */ ++ if (likely(netif_rx_schedule_prep(netdev, &adapter->rx_ring[0].napi))) { ++ adapter->total_tx_bytes = 0; ++ adapter->total_tx_packets = 0; ++ adapter->total_rx_bytes = 0; ++ adapter->total_rx_packets = 0; ++ __netif_rx_schedule(netdev, &adapter->rx_ring[0].napi); ++ } else { ++ atomic_dec(&adapter->irq_sem); ++ } ++#else ++ adapter->total_tx_bytes = 0; ++ adapter->total_rx_bytes = 0; ++ adapter->total_tx_packets = 0; ++ adapter->total_rx_packets = 0; ++ adapter->data_received = 0; ++ ++ for (i = 0; i < E1000_MAX_INTR; i++) { ++ rx_cleaned = 0; ++ for (j = 0; j < adapter->num_rx_queues; j++) ++ rx_cleaned |= adapter->clean_rx(adapter, ++ &adapter->rx_ring[j], &time_stamp); ++ ++ tx_cleaned = 0; ++ for (j = 0 ; j < adapter->num_tx_queues ; j++) ++ tx_cleaned |= e1000_clean_tx_irq(adapter, ++ &adapter->tx_ring[j]); ++ ++ if (!rx_cleaned && !tx_cleaned) ++ break; ++ } ++ ++ if (likely(adapter->itr_setting & 3)) ++ e1000_set_itr(adapter); ++#endif ++ ++ if (adapter->data_received) ++ rt_mark_stack_mgr(netdev); ++ ++ return RTDM_IRQ_HANDLED; ++} ++ ++/** ++ * e1000_intr - Interrupt Handler ++ * @irq: interrupt number ++ * @data: pointer to a network interface device structure ++ **/ ++static int e1000_intr(rtdm_irq_t *irq_handle) ++{ ++ struct rtnet_device *netdev = rtdm_irq_get_arg(irq_handle, struct rtnet_device); ++ struct e1000_adapter *adapter = netdev_priv(netdev); ++ struct e1000_hw *hw = &adapter->hw; ++ u32 rctl, icr = E1000_READ_REG(hw, E1000_ICR); ++#ifndef CONFIG_E1000_NAPI ++ int i, j; ++ int rx_cleaned, tx_cleaned; ++#endif ++ nanosecs_abs_t time_stamp = rtdm_clock_read(); ++ if (unlikely(!icr)) ++ return RTDM_IRQ_NONE; /* Not our interrupt */ ++ ++#ifdef CONFIG_E1000_NAPI ++ /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is ++ * not set, then the adapter didn't send an interrupt */ ++ if ((adapter->flags & E1000_FLAG_INT_ASSERT_AUTO_MASK) && ++ !(icr & E1000_ICR_INT_ASSERTED)) ++ return IRQ_NONE; ++ ++ /* Interrupt Auto-Mask...upon reading ICR, ++ * interrupts are masked. No need for the ++ * IMC write, but it does mean we should ++ * account for it ASAP. */ ++ if (likely(hw->mac.type >= e1000_82571)) ++ atomic_inc(&adapter->irq_sem); ++#endif ++ ++ if (unlikely(icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))) { ++ hw->mac.get_link_status = 1; ++ /* ICH8 workaround-- Call gig speed drop workaround on cable ++ * disconnect (LSC) before accessing any PHY registers */ ++ if ((hw->mac.type == e1000_ich8lan) && ++ (hw->phy.type == e1000_phy_igp_3) && ++ (!(E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU))) ++ e1000_gig_downshift_workaround_ich8lan(hw); ++ ++ /* 80003ES2LAN workaround-- ++ * For packet buffer work-around on link down event; ++ * disable receives here in the ISR and ++ * reset adapter in watchdog ++ */ ++ if (rtnetif_carrier_ok(netdev) && ++ (adapter->flags & E1000_FLAG_RX_NEEDS_RESTART)) { ++ /* disable receives */ ++ rctl = E1000_READ_REG(hw, E1000_RCTL); ++ E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); ++ adapter->flags |= E1000_FLAG_RX_RESTART_NOW; ++ } ++ /* guard against interrupt when we're going down */ ++ //if (!test_bit(__E1000_DOWN, &adapter->state)) ++ // mod_timer(&adapter->watchdog_timer, jiffies + 1); ++ } ++ ++#ifdef CONFIG_E1000_NAPI ++ if (hw->mac.type < e1000_82571) { ++ /* disable interrupts, without the synchronize_irq bit */ ++ atomic_inc(&adapter->irq_sem); ++ E1000_WRITE_REG(hw, E1000_IMC, ~0); ++ E1000_WRITE_FLUSH(hw); ++ } ++ /* XXX only using ring 0 for napi */ ++ if (likely(netif_rx_schedule_prep(netdev, &adapter->rx_ring[0].napi))) { ++ adapter->total_tx_bytes = 0; ++ adapter->total_tx_packets = 0; ++ adapter->total_rx_bytes = 0; ++ adapter->total_rx_packets = 0; ++ __netif_rx_schedule(netdev, &adapter->rx_ring[0].napi); ++ } else { ++ atomic_dec(&adapter->irq_sem); ++ } ++#else ++ /* Writing IMC and IMS is needed for 82547. ++ * Due to Hub Link bus being occupied, an interrupt ++ * de-assertion message is not able to be sent. ++ * When an interrupt assertion message is generated later, ++ * two messages are re-ordered and sent out. ++ * That causes APIC to think 82547 is in de-assertion ++ * state, while 82547 is in assertion state, resulting ++ * in dead lock. Writing IMC forces 82547 into ++ * de-assertion state. ++ */ ++ if (hw->mac.type == e1000_82547 || hw->mac.type == e1000_82547_rev_2) { ++ atomic_inc(&adapter->irq_sem); ++ E1000_WRITE_REG(hw, E1000_IMC, ~0); ++ } ++ ++ adapter->data_received = 0; ++ adapter->total_tx_bytes = 0; ++ adapter->total_rx_bytes = 0; ++ adapter->total_tx_packets = 0; ++ adapter->total_rx_packets = 0; ++ ++ for (i = 0; i < E1000_MAX_INTR; i++) { ++ rx_cleaned = 0; ++ for (j = 0; j < adapter->num_rx_queues; j++) ++ rx_cleaned |= adapter->clean_rx(adapter, ++ &adapter->rx_ring[j], &time_stamp); ++ ++ tx_cleaned = 0; ++ for (j = 0 ; j < adapter->num_tx_queues ; j++) ++ tx_cleaned |= e1000_clean_tx_irq(adapter, ++ &adapter->tx_ring[j]); ++ ++ if (!rx_cleaned && !tx_cleaned) ++ break; ++ } ++ ++ if (likely(adapter->itr_setting & 3)) ++ e1000_set_itr(adapter); ++ ++ if (hw->mac.type == e1000_82547 || hw->mac.type == e1000_82547_rev_2) ++ e1000_irq_enable(adapter); ++ ++#endif ++ ++ if (adapter->data_received) ++ rt_mark_stack_mgr(netdev); ++ return RTDM_IRQ_HANDLED; ++} ++ ++#ifdef CONFIG_E1000_NAPI ++/** ++ * e1000_poll - NAPI Rx polling callback ++ * @napi: struct associated with this polling callback ++ * @budget: amount of packets driver is allowed to process this poll ++ **/ ++static int e1000_poll(struct napi_struct *napi, int budget) ++{ ++ struct e1000_rx_ring *rx_ring = container_of(napi, struct e1000_rx_ring, ++ napi); ++ struct e1000_adapter *adapter = rx_ring->adapter; ++ struct net_device *netdev = adapter->netdev; ++ int tx_clean_complete = 1, work_done = 0; ++ int i; ++ ++ /* FIXME: i think this code is un-necessary when using base netdev */ ++ /* Keep link state information with original netdev */ ++ if (!rtnetif_carrier_ok(netdev)) ++ goto quit_polling; ++ ++ /* e1000_poll is called per-cpu. This lock protects ++ * tx_ring[i] from being cleaned by multiple cpus ++ * simultaneously. A failure obtaining the lock means ++ * tx_ring[i] is currently being cleaned anyway. */ ++ for (i = 0; i < adapter->num_tx_queues; i++) { ++#ifdef CONFIG_E1000_MQ ++ if (spin_trylock(&adapter->tx_ring[i].tx_queue_lock)) { ++ tx_clean_complete &= e1000_clean_tx_irq(adapter, ++ &adapter->tx_ring[i]); ++ spin_unlock(&adapter->tx_ring[i].tx_queue_lock); ++ } ++#else ++ if (spin_trylock(&adapter->tx_queue_lock)) { ++ tx_clean_complete &= e1000_clean_tx_irq(adapter, ++ &adapter->tx_ring[i]); ++ spin_unlock(&adapter->tx_queue_lock); ++ } ++#endif ++ } ++ ++ for (i = 0; i < adapter->num_rx_queues; i++) { ++ adapter->clean_rx(adapter, &adapter->rx_ring[i], ++ &work_done, budget); ++ } ++ ++ /* If no Tx and not enough Rx work done, exit the polling mode */ ++ if ((tx_clean_complete && (work_done == 0)) || ++ !rtnetif_running(netdev)) { ++quit_polling: ++ if (likely(adapter->itr_setting & 3)) ++ e1000_set_itr(adapter); ++ netif_rx_complete(netdev, napi); ++ if (test_bit(__E1000_DOWN, &adapter->state)) ++ atomic_dec(&adapter->irq_sem); ++ else ++ e1000_irq_enable(adapter); ++ return 0; ++ } ++ ++ /* need to make sure the stack is aware of a tx-only poll loop */ ++ if (!tx_clean_complete) ++ work_done = budget; ++ ++ return work_done; ++} ++ ++#endif ++/** ++ * e1000_clean_tx_irq - Reclaim resources after transmit completes ++ * @adapter: board private structure ++ * ++ * the return value indicates whether actual cleaning was done, there ++ * is no guarantee that everything was cleaned ++ **/ ++static bool e1000_clean_tx_irq(struct e1000_adapter *adapter, ++ struct e1000_tx_ring *tx_ring) ++{ ++ struct net_device *netdev = adapter->netdev; ++ struct e1000_tx_desc *tx_desc, *eop_desc; ++ struct e1000_buffer *buffer_info; ++ unsigned int i, eop; ++#ifdef CONFIG_E1000_NAPI ++ unsigned int count = 0; ++#endif ++ bool cleaned = FALSE; ++ bool retval = TRUE; ++ unsigned int total_tx_bytes=0, total_tx_packets=0; ++ ++ ++ i = tx_ring->next_to_clean; ++ eop = tx_ring->buffer_info[i].next_to_watch; ++ eop_desc = E1000_TX_DESC(*tx_ring, eop); ++ ++ while (eop_desc->upper.data & cpu_to_le32(E1000_TXD_STAT_DD)) { ++ for (cleaned = FALSE; !cleaned; ) { ++ tx_desc = E1000_TX_DESC(*tx_ring, i); ++ buffer_info = &tx_ring->buffer_info[i]; ++ cleaned = (i == eop); ++ ++#ifdef CONFIG_E1000_MQ ++ tx_ring->tx_stats.bytes += buffer_info->length; ++#endif ++ if (cleaned) { ++ struct sk_buff *skb = buffer_info->skb; ++#ifdef NETIF_F_TSO ++ unsigned int segs, bytecount; ++ segs = skb_shinfo(skb)->gso_segs ?: 1; ++ /* multiply data chunks by size of headers */ ++ bytecount = ((segs - 1) * skb_headlen(skb)) + ++ skb->len; ++ total_tx_packets += segs; ++ total_tx_bytes += bytecount; ++#else ++ total_tx_packets++; ++ total_tx_bytes += skb->len; ++#endif ++ } ++ e1000_unmap_and_free_tx_resource(adapter, buffer_info); ++ tx_desc->upper.data = 0; ++ ++ if (unlikely(++i == tx_ring->count)) i = 0; ++ } ++ ++#ifdef CONFIG_E1000_MQ ++ tx_ring->tx_stats.packets++; ++#endif ++ eop = tx_ring->buffer_info[i].next_to_watch; ++ eop_desc = E1000_TX_DESC(*tx_ring, eop); ++#ifdef CONFIG_E1000_NAPI ++#define E1000_TX_WEIGHT 64 ++ /* weight of a sort for tx, to avoid endless transmit cleanup */ ++ if (count++ == E1000_TX_WEIGHT) { ++ retval = FALSE; ++ break; ++ } ++#endif ++ } ++ ++ tx_ring->next_to_clean = i; ++ ++#define TX_WAKE_THRESHOLD 32 ++ if (unlikely(cleaned && rtnetif_carrier_ok(netdev) && ++ E1000_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD)) { ++ /* Make sure that anybody stopping the queue after this ++ * sees the new next_to_clean. ++ */ ++ smp_mb(); ++ ++ if (rtnetif_queue_stopped(netdev) && ++ !(test_bit(__E1000_DOWN, &adapter->state))) { ++ rtnetif_wake_queue(netdev); ++ ++adapter->restart_queue; ++ } ++ } ++ ++ if (adapter->detect_tx_hung) { ++ /* Detect a transmit hang in hardware, this serializes the ++ * check with the clearing of time_stamp and movement of i */ ++ adapter->detect_tx_hung = FALSE; ++ if (tx_ring->buffer_info[eop].dma && ++ time_after(jiffies, tx_ring->buffer_info[eop].time_stamp + ++ (adapter->tx_timeout_factor * HZ)) ++ && !(E1000_READ_REG(&adapter->hw, E1000_STATUS) & ++ E1000_STATUS_TXOFF)) { ++ ++ /* detected Tx unit hang */ ++ DPRINTK(DRV, ERR, "Detected Tx Unit Hang\n" ++ " Tx Queue <%lu>\n" ++ " TDH <%x>\n" ++ " TDT <%x>\n" ++ " next_to_use <%x>\n" ++ " next_to_clean <%x>\n" ++ "buffer_info[next_to_clean]\n" ++ " time_stamp <%lx>\n" ++ " next_to_watch <%x>\n" ++ " jiffies <%lx>\n" ++ " next_to_watch.status <%x>\n", ++ (unsigned long)((tx_ring - adapter->tx_ring) / ++ sizeof(struct e1000_tx_ring)), ++ readl(adapter->hw.hw_addr + tx_ring->tdh), ++ readl(adapter->hw.hw_addr + tx_ring->tdt), ++ tx_ring->next_to_use, ++ tx_ring->next_to_clean, ++ tx_ring->buffer_info[eop].time_stamp, ++ eop, ++ jiffies, ++ eop_desc->upper.fields.status); ++ rtnetif_stop_queue(netdev); ++ } ++ } ++ adapter->total_tx_bytes += total_tx_bytes; ++ adapter->total_tx_packets += total_tx_packets; ++ adapter->net_stats.tx_bytes += total_tx_bytes; ++ adapter->net_stats.tx_packets += total_tx_packets; ++ return retval; ++} ++ ++/** ++ * e1000_rx_checksum - Receive Checksum Offload for 82543 ++ * @adapter: board private structure ++ * @status_err: receive descriptor status and error fields ++ * @csum: receive descriptor csum field ++ * @sk_buff: socket buffer with received data ++ **/ ++static void e1000_rx_checksum(struct e1000_adapter *adapter, u32 status_err, ++ u32 csum, struct sk_buff *skb) ++{ ++ u16 status = (u16)status_err; ++ u8 errors = (u8)(status_err >> 24); ++ skb->ip_summed = CHECKSUM_NONE; ++ ++ /* 82543 or newer only */ ++ if (unlikely(adapter->hw.mac.type < e1000_82543)) return; ++ /* Ignore Checksum bit is set */ ++ if (unlikely(status & E1000_RXD_STAT_IXSM)) return; ++ /* TCP/UDP checksum error bit is set */ ++ if (unlikely(errors & E1000_RXD_ERR_TCPE)) { ++ /* let the stack verify checksum errors */ ++ adapter->hw_csum_err++; ++ return; ++ } ++ /* TCP/UDP Checksum has not been calculated */ ++ if (adapter->hw.mac.type <= e1000_82547_rev_2) { ++ if (!(status & E1000_RXD_STAT_TCPCS)) ++ return; ++ } else { ++ if (!(status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))) ++ return; ++ } ++ /* It must be a TCP or UDP packet with a valid checksum */ ++ if (likely(status & E1000_RXD_STAT_TCPCS)) { ++ /* TCP checksum is good */ ++ skb->ip_summed = CHECKSUM_UNNECESSARY; ++ } else if (adapter->hw.mac.type > e1000_82547_rev_2) { ++ /* IP fragment with UDP payload */ ++ /* Hardware complements the payload checksum, so we undo it ++ * and then put the value in host order for further stack use. ++ */ ++ csum = ntohl(csum ^ 0xFFFF); ++ skb->csum = csum; ++ skb->ip_summed = CHECKSUM_COMPLETE; ++ } ++ adapter->hw_csum_good++; ++} ++ ++/** ++ * e1000_receive_skb - helper function to handle rx indications ++ * @adapter: board private structure ++ * @status: descriptor status field as written by hardware ++ * @vlan: descriptor vlan field as written by hardware (no le/be conversion) ++ * @skb: pointer to sk_buff to be indicated to stack ++ **/ ++static void e1000_receive_skb(struct e1000_adapter *adapter, u8 status, ++ u16 vlan, struct sk_buff *skb) ++{ ++#ifdef CONFIG_E1000_NAPI ++#ifdef NETIF_F_HW_VLAN_TX ++ if (unlikely(adapter->vlgrp && (status & E1000_RXD_STAT_VP))) { ++ vlan_hwaccel_receive_skb(skb, adapter->vlgrp, ++ le16_to_cpu(vlan) & ++ E1000_RXD_SPC_VLAN_MASK); ++ } else { ++ netif_receive_skb(skb); ++ } ++#else ++ netif_receive_skb(skb); ++#endif ++#else /* CONFIG_E1000_NAPI */ ++#ifdef NETIF_F_HW_VLAN_TX ++ if (unlikely(adapter->vlgrp && (status & E1000_RXD_STAT_VP))) { ++ vlan_hwaccel_rx(skb, adapter->vlgrp, ++ le16_to_cpu(vlan) & E1000_RXD_SPC_VLAN_MASK); ++ } else { ++ netif_rx(skb); ++ } ++#else ++ rtnetif_rx(skb); ++#endif ++#endif /* CONFIG_E1000_NAPI */ ++} ++ ++#ifdef CONFIG_E1000_NAPI ++/* NOTE: these new jumbo frame routines rely on NAPI because of the ++ * pskb_may_pull call, which eventually must call kmap_atomic which you cannot ++ * call from hard irq context */ ++ ++/** ++ * e1000_consume_page - helper function ++ **/ ++static void e1000_consume_page(struct e1000_rx_buffer *bi, struct sk_buff *skb, ++ u16 length) ++{ ++ bi->page = NULL; ++ skb->len += length; ++ skb->data_len += length; ++ skb->truesize += length; ++} ++ ++/** ++ * e1000_clean_jumbo_rx_irq - Send received data up the network stack; legacy ++ * @adapter: board private structure ++ * ++ * the return value indicates whether actual cleaning was done, there ++ * is no guarantee that everything was cleaned ++ **/ ++static bool e1000_clean_jumbo_rx_irq(struct e1000_adapter *adapter, ++ struct e1000_rx_ring *rx_ring, ++ int *work_done, int work_to_do) ++{ ++ struct net_device *netdev = adapter->netdev; ++ struct pci_dev *pdev = adapter->pdev; ++ struct e1000_rx_desc *rx_desc, *next_rxd; ++ struct e1000_rx_buffer *buffer_info, *next_buffer; ++ unsigned long irq_flags; ++ u32 length; ++ unsigned int i; ++ int cleaned_count = 0; ++ bool cleaned = FALSE; ++ unsigned int total_rx_bytes=0, total_rx_packets=0; ++ ++ i = rx_ring->next_to_clean; ++ rx_desc = E1000_RX_DESC(*rx_ring, i); ++ buffer_info = &rx_ring->buffer_info[i]; ++ ++ while (rx_desc->status & E1000_RXD_STAT_DD) { ++ struct sk_buff *skb; ++ u8 status; ++ ++ if (*work_done >= work_to_do) ++ break; ++ (*work_done)++; ++ ++ status = rx_desc->status; ++ skb = buffer_info->skb; ++ buffer_info->skb = NULL; ++ ++ if (++i == rx_ring->count) i = 0; ++ next_rxd = E1000_RX_DESC(*rx_ring, i); ++ prefetch(next_rxd); ++ ++ next_buffer = &rx_ring->buffer_info[i]; ++ ++ cleaned = TRUE; ++ cleaned_count++; ++ pci_unmap_page(pdev, ++ buffer_info->dma, ++ PAGE_SIZE, ++ PCI_DMA_FROMDEVICE); ++ buffer_info->dma = 0; ++ ++ length = le16_to_cpu(rx_desc->length); ++ ++ /* errors is only valid for DD + EOP descriptors */ ++ if (unlikely((status & E1000_RXD_STAT_EOP) && ++ (rx_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK))) { ++ u8 last_byte = *(skb->data + length - 1); ++ if (TBI_ACCEPT(&adapter->hw, status, ++ rx_desc->errors, length, last_byte, ++ adapter->min_frame_size, ++ adapter->max_frame_size)) { ++ rtdm_lock_get_irqsave(&adapter->stats_lock, ++ irq_flags); ++ e1000_tbi_adjust_stats_82543(&adapter->hw, ++ &adapter->stats, ++ length, skb->data, ++ adapter->max_frame_size); ++ rtdm_lock_put_irqrestore(&adapter->stats_lock, ++ irq_flags); ++ length--; ++ } else { ++ /* recycle both page and skb */ ++ buffer_info->skb = skb; ++ /* an error means any chain goes out the window ++ * too */ ++ if (rx_ring->rx_skb_top) ++ kfree_rtskb(rx_ring->rx_skb_top); ++ rx_ring->rx_skb_top = NULL; ++ goto next_desc; ++ } ++ } ++ ++#define rxtop rx_ring->rx_skb_top ++ if (!(status & E1000_RXD_STAT_EOP)) { ++ /* this descriptor is only the beginning (or middle) */ ++ if (!rxtop) { ++ /* this is the beginning of a chain */ ++ rxtop = skb; ++ skb_fill_page_desc(rxtop, 0, buffer_info->page, ++ 0, length); ++ } else { ++ /* this is the middle of a chain */ ++ skb_fill_page_desc(rxtop, ++ skb_shinfo(rxtop)->nr_frags, ++ buffer_info->page, 0, length); ++ /* re-use the skb, only consumed the page */ ++ buffer_info->skb = skb; ++ } ++ e1000_consume_page(buffer_info, rxtop, length); ++ goto next_desc; ++ } else { ++ if (rxtop) { ++ /* end of the chain */ ++ skb_fill_page_desc(rxtop, ++ skb_shinfo(rxtop)->nr_frags, ++ buffer_info->page, 0, length); ++ /* re-use the current skb, we only consumed the ++ * page */ ++ buffer_info->skb = skb; ++ skb = rxtop; ++ rxtop = NULL; ++ e1000_consume_page(buffer_info, skb, length); ++ } else { ++ /* no chain, got EOP, this buf is the packet ++ * copybreak to save the put_page/alloc_page */ ++ if (length <= copybreak && ++ skb_tailroom(skb) >= length) { ++ u8 *vaddr; ++ vaddr = kmap_atomic(buffer_info->page, ++ KM_SKB_DATA_SOFTIRQ); ++ memcpy(skb_tail_pointer(skb), vaddr, length); ++ kunmap_atomic(vaddr, ++ KM_SKB_DATA_SOFTIRQ); ++ /* re-use the page, so don't erase ++ * buffer_info->page */ ++ rtskb_put(skb, length); ++ } else { ++ skb_fill_page_desc(skb, 0, ++ buffer_info->page, 0, ++ length); ++ e1000_consume_page(buffer_info, skb, ++ length); ++ } ++ } ++ } ++ ++ /* Receive Checksum Offload XXX recompute due to CRC strip? */ ++ e1000_rx_checksum(adapter, ++ (u32)(status) | ++ ((u32)(rx_desc->errors) << 24), ++ le16_to_cpu(rx_desc->csum), skb); ++ ++ pskb_trim(skb, skb->len - 4); ++ ++ /* probably a little skewed due to removing CRC */ ++ total_rx_bytes += skb->len; ++ total_rx_packets++; ++ ++ /* eth type trans needs skb->data to point to something */ ++ if (!pskb_may_pull(skb, ETH_HLEN)) { ++ DPRINTK(DRV, ERR, "__pskb_pull_tail failed.\n"); ++ kfree_rtskb(skb); ++ goto next_desc; ++ } ++ ++ skb->protocol = rt_eth_type_trans(skb, netdev); ++ ++ e1000_receive_skb(adapter, status, rx_desc->special, skb); ++ adapter->data_received = 1; // Set flag for the main interrupt routine ++ ++ netdev->last_rx = jiffies; ++#ifdef CONFIG_E1000_MQ ++ rx_ring->rx_stats.packets++; ++ rx_ring->rx_stats.bytes += length; ++#endif ++ ++next_desc: ++ rx_desc->status = 0; ++ ++ /* return some buffers to hardware, one at a time is too slow */ ++ if (unlikely(cleaned_count >= E1000_RX_BUFFER_WRITE)) { ++ adapter->alloc_rx_buf(adapter, rx_ring, cleaned_count); ++ cleaned_count = 0; ++ } ++ ++ /* use prefetched values */ ++ rx_desc = next_rxd; ++ buffer_info = next_buffer; ++ } ++ rx_ring->next_to_clean = i; ++ ++ cleaned_count = E1000_DESC_UNUSED(rx_ring); ++ if (cleaned_count) ++ adapter->alloc_rx_buf(adapter, rx_ring, cleaned_count); ++ ++ adapter->total_rx_packets += total_rx_packets; ++ adapter->total_rx_bytes += total_rx_bytes; ++ adapter->net_stats.rx_bytes += total_rx_bytes; ++ adapter->net_stats.rx_packets += total_rx_packets; ++ return cleaned; ++} ++#endif /* NAPI */ ++ ++ ++/** ++ * e1000_clean_rx_irq - Send received data up the network stack; legacy ++ * @adapter: board private structure ++ * ++ * the return value indicates whether actual cleaning was done, there ++ * is no guarantee that everything was cleaned ++ **/ ++#ifdef CONFIG_E1000_NAPI ++static bool e1000_clean_rx_irq(struct e1000_adapter *adapter, ++ struct e1000_rx_ring *rx_ring, ++ int *work_done, int work_to_do) ++#else ++static bool e1000_clean_rx_irq(struct e1000_adapter *adapter, ++ struct e1000_rx_ring *rx_ring, ++ nanosecs_abs_t *time_stamp) ++#endif ++{ ++ struct net_device *netdev = adapter->netdev; ++ struct pci_dev *pdev = adapter->pdev; ++ struct e1000_rx_desc *rx_desc, *next_rxd; ++ struct e1000_rx_buffer *buffer_info, *next_buffer; ++ u32 length; ++ unsigned int i; ++ int cleaned_count = 0; ++ bool cleaned = FALSE; ++ unsigned int total_rx_bytes=0, total_rx_packets=0; ++ ++ // rtdm_printk("<2> e1000_clean_rx_irq %i\n", __LINE__); ++ ++ i = rx_ring->next_to_clean; ++ rx_desc = E1000_RX_DESC(*rx_ring, i); ++ buffer_info = &rx_ring->buffer_info[i]; ++ ++ while (rx_desc->status & E1000_RXD_STAT_DD) { ++ struct sk_buff *skb; ++ u8 status; ++ ++#ifdef CONFIG_E1000_NAPI ++ if (*work_done >= work_to_do) ++ break; ++ (*work_done)++; ++#endif ++ status = rx_desc->status; ++ skb = buffer_info->skb; ++ buffer_info->skb = NULL; ++ ++ prefetch(skb->data - NET_IP_ALIGN); ++ ++ if (++i == rx_ring->count) i = 0; ++ next_rxd = E1000_RX_DESC(*rx_ring, i); ++ prefetch(next_rxd); ++ ++ next_buffer = &rx_ring->buffer_info[i]; ++ ++ cleaned = TRUE; ++ cleaned_count++; ++ pci_unmap_single(pdev, ++ buffer_info->dma, ++ adapter->rx_buffer_len, ++ PCI_DMA_FROMDEVICE); ++ buffer_info->dma = 0; ++ ++ length = le16_to_cpu(rx_desc->length); ++ ++ /* !EOP means multiple descriptors were used to store a single ++ * packet, also make sure the frame isn't just CRC only */ ++ if (unlikely(!(status & E1000_RXD_STAT_EOP) || (length <= 4))) { ++ /* All receives must fit into a single buffer */ ++ E1000_DBG("%s: Receive packet consumed multiple" ++ " buffers\n", netdev->name); ++ /* recycle */ ++ buffer_info->skb = skb; ++ goto next_desc; ++ } ++ ++ if (unlikely(rx_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK)) { ++ u8 last_byte = *(skb->data + length - 1); ++ if (TBI_ACCEPT(&adapter->hw, status, ++ rx_desc->errors, length, last_byte, ++ adapter->min_frame_size, ++ adapter->max_frame_size)) { ++ length--; ++ } else { ++ /* recycle */ ++ buffer_info->skb = skb; ++ goto next_desc; ++ } ++ } ++ ++ /* adjust length to remove Ethernet CRC, this must be ++ * done after the TBI_ACCEPT workaround above */ ++ length -= 4; ++ ++ /* probably a little skewed due to removing CRC */ ++ total_rx_bytes += length; ++ total_rx_packets++; ++ ++ rtskb_put(skb, length); ++ ++ /* Receive Checksum Offload */ ++ e1000_rx_checksum(adapter, ++ (u32)(status) | ++ ((u32)(rx_desc->errors) << 24), ++ le16_to_cpu(rx_desc->csum), skb); ++ ++ skb->protocol = rt_eth_type_trans(skb, netdev); ++ skb->time_stamp = *time_stamp; ++ ++ e1000_receive_skb(adapter, status, rx_desc->special, skb); ++ adapter->data_received = 1; // Set flag for the main interrupt routine ++ ++ // netdev->last_rx = jiffies; ++#ifdef CONFIG_E1000_MQ ++ rx_ring->rx_stats.packets++; ++ rx_ring->rx_stats.bytes += length; ++#endif ++ ++next_desc: ++ rx_desc->status = 0; ++ ++ /* return some buffers to hardware, one at a time is too slow */ ++ if (unlikely(cleaned_count >= E1000_RX_BUFFER_WRITE)) { ++ adapter->alloc_rx_buf(adapter, rx_ring, cleaned_count); ++ cleaned_count = 0; ++ } ++ ++ /* use prefetched values */ ++ rx_desc = next_rxd; ++ buffer_info = next_buffer; ++ } ++ rx_ring->next_to_clean = i; ++ ++ cleaned_count = E1000_DESC_UNUSED(rx_ring); ++ if (cleaned_count) ++ adapter->alloc_rx_buf(adapter, rx_ring, cleaned_count); ++ ++ adapter->total_rx_packets += total_rx_packets; ++ adapter->total_rx_bytes += total_rx_bytes; ++ adapter->net_stats.rx_bytes += total_rx_bytes; ++ adapter->net_stats.rx_packets += total_rx_packets; ++ return cleaned; ++} ++ ++/** ++ * e1000_clean_rx_irq_ps - Send received data up the network stack; packet split ++ * @adapter: board private structure ++ * ++ * the return value indicates whether actual cleaning was done, there ++ * is no guarantee that everything was cleaned ++ **/ ++#ifdef CONFIG_E1000_NAPI ++static bool e1000_clean_rx_irq_ps(struct e1000_adapter *adapter, ++ struct e1000_rx_ring *rx_ring, ++ int *work_done, int work_to_do) ++#else ++static bool e1000_clean_rx_irq_ps(struct e1000_adapter *adapter, ++ struct e1000_rx_ring *rx_ring, ++ nanosecs_abs_t *time_stamp) ++#endif ++{ ++#ifdef CONFIG_E1000_DISABLE_PACKET_SPLIT ++ return true; ++ ++#else ++ ++ union e1000_rx_desc_packet_split *rx_desc, *next_rxd; ++ struct net_device *netdev = adapter->netdev; ++ struct pci_dev *pdev = adapter->pdev; ++ struct e1000_rx_buffer *buffer_info, *next_buffer; ++ struct e1000_ps_page *ps_page; ++ struct e1000_ps_page_dma *ps_page_dma; ++ struct sk_buff *skb; ++ unsigned int i, j; ++ u32 length, staterr; ++ int cleaned_count = 0; ++ bool cleaned = FALSE; ++ unsigned int total_rx_bytes=0, total_rx_packets=0; ++ ++ i = rx_ring->next_to_clean; ++ rx_desc = E1000_RX_DESC_PS(*rx_ring, i); ++ staterr = le32_to_cpu(rx_desc->wb.middle.status_error); ++ buffer_info = &rx_ring->buffer_info[i]; ++ ++ while (staterr & E1000_RXD_STAT_DD) { ++ ps_page = &rx_ring->ps_page[i]; ++ ps_page_dma = &rx_ring->ps_page_dma[i]; ++#ifdef CONFIG_E1000_NAPI ++ if (unlikely(*work_done >= work_to_do)) ++ break; ++ (*work_done)++; ++#endif ++ skb = buffer_info->skb; ++ ++ /* in the packet split case this is header only */ ++ prefetch(skb->data - NET_IP_ALIGN); ++ ++ if (++i == rx_ring->count) i = 0; ++ next_rxd = E1000_RX_DESC_PS(*rx_ring, i); ++ prefetch(next_rxd); ++ ++ next_buffer = &rx_ring->buffer_info[i]; ++ ++ cleaned = TRUE; ++ cleaned_count++; ++ pci_unmap_single(pdev, buffer_info->dma, ++ adapter->rx_ps_bsize0, ++ PCI_DMA_FROMDEVICE); ++ buffer_info->dma = 0; ++ ++ if (unlikely(!(staterr & E1000_RXD_STAT_EOP))) { ++ E1000_DBG("%s: Packet Split buffers didn't pick up" ++ " the full packet\n", netdev->name); ++ dev_kfree_skb_irq(skb); ++ goto next_desc; ++ } ++ ++ if (unlikely(staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK)) { ++ dev_kfree_skb_irq(skb); ++ goto next_desc; ++ } ++ ++ length = le16_to_cpu(rx_desc->wb.middle.length0); ++ ++ if (unlikely(!length)) { ++ E1000_DBG("%s: Last part of the packet spanning" ++ " multiple descriptors\n", netdev->name); ++ dev_kfree_skb_irq(skb); ++ goto next_desc; ++ } ++ ++ /* Good Receive */ ++ rtskb_put(skb, length); ++#ifdef CONFIG_E1000_MQ ++ rx_ring->rx_stats.packets++; ++ rx_ring->rx_stats.bytes += skb->len; ++#endif ++ ++#ifdef CONFIG_E1000_NAPI ++ { ++ /* this looks ugly, but it seems compiler issues make it ++ more efficient than reusing j */ ++ int l1 = le16_to_cpu(rx_desc->wb.upper.length[0]); ++ ++ /* page alloc/put takes too long and effects small packet ++ * throughput, so unsplit small packets and save the alloc/put ++ * only valid in softirq (napi) context to call kmap_* */ ++ if (l1 && (l1 <= copybreak) && ++ ((length + l1) <= adapter->rx_ps_bsize0)) { ++ u8 *vaddr; ++ /* there is no documentation about how to call ++ * kmap_atomic, so we can't hold the mapping ++ * very long */ ++ pci_dma_sync_single_for_cpu(pdev, ++ ps_page_dma->ps_page_dma[0], ++ PAGE_SIZE, ++ PCI_DMA_FROMDEVICE); ++ vaddr = kmap_atomic(ps_page->ps_page[0], ++ KM_SKB_DATA_SOFTIRQ); ++ memcpy(skb_tail_pointer(skb), vaddr, l1); ++ kunmap_atomic(vaddr, KM_SKB_DATA_SOFTIRQ); ++ pci_dma_sync_single_for_device(pdev, ++ ps_page_dma->ps_page_dma[0], ++ PAGE_SIZE, PCI_DMA_FROMDEVICE); ++ /* remove the CRC */ ++ l1 -= 4; ++ rtskb_put(skb, l1); ++ goto copydone; ++ } /* if */ ++ } ++#endif ++ ++ for (j = 0; j < adapter->rx_ps_pages; j++) { ++ if (!(length= le16_to_cpu(rx_desc->wb.upper.length[j]))) ++ break; ++ pci_unmap_page(pdev, ps_page_dma->ps_page_dma[j], ++ PAGE_SIZE, PCI_DMA_FROMDEVICE); ++ ps_page_dma->ps_page_dma[j] = 0; ++ skb_fill_page_desc(skb, j, ps_page->ps_page[j], 0, ++ length); ++ ps_page->ps_page[j] = NULL; ++ skb->len += length; ++ skb->data_len += length; ++ skb->truesize += length; ++ } ++ ++ /* strip the ethernet crc, problem is we're using pages now so ++ * this whole operation can get a little cpu intensive */ ++ pskb_trim(skb, skb->len - 4); ++ ++#ifdef CONFIG_E1000_NAPI ++copydone: ++#endif ++ total_rx_bytes += skb->len; ++ total_rx_packets++; ++ ++ e1000_rx_checksum(adapter, staterr, ++ le16_to_cpu(rx_desc->wb.lower.hi_dword.csum_ip.csum), skb); ++ skb->protocol = rt_eth_type_trans(skb, netdev); ++ ++ if (likely(rx_desc->wb.upper.header_status & ++ cpu_to_le16(E1000_RXDPS_HDRSTAT_HDRSP))) ++ adapter->rx_hdr_split++; ++ ++ e1000_receive_skb(adapter, staterr, rx_desc->wb.middle.vlan, ++ skb); ++ netdev->last_rx = jiffies; ++ ++next_desc: ++ rx_desc->wb.middle.status_error &= cpu_to_le32(~0xFF); ++ buffer_info->skb = NULL; ++ ++ /* return some buffers to hardware, one at a time is too slow */ ++ if (unlikely(cleaned_count >= E1000_RX_BUFFER_WRITE)) { ++ adapter->alloc_rx_buf(adapter, rx_ring, cleaned_count); ++ cleaned_count = 0; ++ } ++ ++ /* use prefetched values */ ++ rx_desc = next_rxd; ++ buffer_info = next_buffer; ++ ++ staterr = le32_to_cpu(rx_desc->wb.middle.status_error); ++ } ++ rx_ring->next_to_clean = i; ++ ++ cleaned_count = E1000_DESC_UNUSED(rx_ring); ++ if (cleaned_count) ++ adapter->alloc_rx_buf(adapter, rx_ring, cleaned_count); ++ ++ adapter->total_rx_packets += total_rx_packets; ++ adapter->total_rx_bytes += total_rx_bytes; ++ adapter->net_stats.rx_bytes += total_rx_bytes; ++ adapter->net_stats.rx_packets += total_rx_packets; ++ return cleaned; ++#endif ++} ++ ++#ifdef CONFIG_E1000_NAPI ++/** ++ * e1000_alloc_jumbo_rx_buffers - Replace used jumbo receive buffers ++ * @adapter: address of board private structure ++ * @rx_ring: pointer to receive ring structure ++ * @cleaned_count: number of buffers to allocate this pass ++ **/ ++static void e1000_alloc_jumbo_rx_buffers(struct e1000_adapter *adapter, ++ struct e1000_rx_ring *rx_ring, ++ int cleaned_count) ++{ ++ struct net_device *netdev = adapter->netdev; ++ struct pci_dev *pdev = adapter->pdev; ++ struct e1000_rx_desc *rx_desc; ++ struct e1000_rx_buffer *buffer_info; ++ struct sk_buff *skb; ++ unsigned int i; ++ unsigned int bufsz = 256 - ++ 16 /*for skb_reserve */ - ++ NET_IP_ALIGN; ++ ++ i = rx_ring->next_to_use; ++ buffer_info = &rx_ring->buffer_info[i]; ++ ++ while (cleaned_count--) { ++ skb = buffer_info->skb; ++ if (skb) { ++ skb_trim(skb, 0); ++ goto check_page; ++ } ++ ++ skb = rtnetdev_alloc_rtskb(netdev, bufsz); ++ if (unlikely(!skb)) { ++ /* Better luck next round */ ++ adapter->alloc_rx_buff_failed++; ++ break; ++ } ++ ++ /* Fix for errata 23, can't cross 64kB boundary */ ++ if (!e1000_check_64k_bound(adapter, skb->data, bufsz)) { ++ struct sk_buff *oldskb = skb; ++ DPRINTK(PROBE, ERR, "skb align check failed: %u bytes " ++ "at %p\n", bufsz, skb->data); ++ /* Try again, without freeing the previous */ ++ skb = rtnetdev_alloc_rtskb(netdev, bufsz); ++ /* Failed allocation, critical failure */ ++ if (!skb) { ++ kfree_rtskb(oldskb); ++ adapter->alloc_rx_buff_failed++; ++ break; ++ } ++ ++ if (!e1000_check_64k_bound(adapter, skb->data, bufsz)) { ++ /* give up */ ++ kfree_rtskb(skb); ++ kfree_rtskb(oldskb); ++ adapter->alloc_rx_buff_failed++; ++ break; /* while !buffer_info->skb */ ++ } ++ ++ /* Use new allocation */ ++ kfree_rtskb(oldskb); ++ } ++ /* Make buffer alignment 2 beyond a 16 byte boundary ++ * this will result in a 16 byte aligned IP header after ++ * the 14 byte MAC header is removed ++ */ ++ skb_reserve(skb, NET_IP_ALIGN); ++ ++ buffer_info->skb = skb; ++check_page: ++ /* allocate a new page if necessary */ ++ if (!buffer_info->page) { ++ buffer_info->page = alloc_page(GFP_ATOMIC); ++ if (unlikely(!buffer_info->page)) { ++ adapter->alloc_rx_buff_failed++; ++ break; ++ } ++ } ++ ++ if (!buffer_info->dma) ++ buffer_info->dma = pci_map_page(pdev, ++ buffer_info->page, 0, ++ PAGE_SIZE, ++ PCI_DMA_FROMDEVICE); ++ ++ rx_desc = E1000_RX_DESC(*rx_ring, i); ++ rx_desc->buffer_addr = cpu_to_le64(buffer_info->dma); ++ ++ if (unlikely(++i == rx_ring->count)) ++ i = 0; ++ buffer_info = &rx_ring->buffer_info[i]; ++ } ++ ++ if (likely(rx_ring->next_to_use != i)) { ++ rx_ring->next_to_use = i; ++ if (unlikely(i-- == 0)) ++ i = (rx_ring->count - 1); ++ ++ /* Force memory writes to complete before letting h/w ++ * know there are new descriptors to fetch. (Only ++ * applicable for weak-ordered memory model archs, ++ * such as IA-64). */ ++ wmb(); ++ writel(i, adapter->hw.hw_addr + rx_ring->rdt); ++ } ++} ++#endif /* NAPI */ ++ ++/** ++ * e1000_alloc_rx_buffers - Replace used receive buffers; legacy & extended ++ * @adapter: address of board private structure ++ **/ ++static void e1000_alloc_rx_buffers(struct e1000_adapter *adapter, ++ struct e1000_rx_ring *rx_ring, ++ int cleaned_count) ++{ ++ struct net_device *netdev = adapter->netdev; ++ struct pci_dev *pdev = adapter->pdev; ++ struct e1000_rx_desc *rx_desc; ++ struct e1000_rx_buffer *buffer_info; ++ struct sk_buff *skb; ++ unsigned int i; ++ unsigned int bufsz = adapter->rx_buffer_len + NET_IP_ALIGN; ++ ++ i = rx_ring->next_to_use; ++ buffer_info = &rx_ring->buffer_info[i]; ++ ++ while (cleaned_count--) { ++ skb = buffer_info->skb; ++ if (skb) { ++ rtskb_trim(skb, 0); ++ goto map_skb; ++ } ++ ++ skb = rtnetdev_alloc_rtskb(netdev, bufsz); ++ if (unlikely(!skb)) { ++ /* Better luck next round */ ++ adapter->alloc_rx_buff_failed++; ++ break; ++ } ++ ++ /* Fix for errata 23, can't cross 64kB boundary */ ++ if (!e1000_check_64k_bound(adapter, skb->data, bufsz)) { ++ struct sk_buff *oldskb = skb; ++ DPRINTK(RX_ERR, ERR, "skb align check failed: %u bytes " ++ "at %p\n", bufsz, skb->data); ++ /* Try again, without freeing the previous */ ++ skb = rtnetdev_alloc_rtskb(netdev, bufsz); ++ /* Failed allocation, critical failure */ ++ if (!skb) { ++ kfree_rtskb(oldskb); ++ adapter->alloc_rx_buff_failed++; ++ break; ++ } ++ ++ if (!e1000_check_64k_bound(adapter, skb->data, bufsz)) { ++ /* give up */ ++ kfree_rtskb(skb); ++ kfree_rtskb(oldskb); ++ adapter->alloc_rx_buff_failed++; ++ break; /* while !buffer_info->skb */ ++ } ++ ++ /* Use new allocation */ ++ kfree_rtskb(oldskb); ++ } ++ /* Make buffer alignment 2 beyond a 16 byte boundary ++ * this will result in a 16 byte aligned IP header after ++ * the 14 byte MAC header is removed ++ */ ++ skb_reserve(skb, NET_IP_ALIGN); ++ ++ buffer_info->skb = skb; ++map_skb: ++ buffer_info->dma = pci_map_single(pdev, ++ skb->data, ++ adapter->rx_buffer_len, ++ PCI_DMA_FROMDEVICE); ++ ++ /* Fix for errata 23, can't cross 64kB boundary */ ++ if (!e1000_check_64k_bound(adapter, ++ (void *)(unsigned long)buffer_info->dma, ++ adapter->rx_buffer_len)) { ++ DPRINTK(RX_ERR, ERR, ++ "dma align check failed: %u bytes at %p\n", ++ adapter->rx_buffer_len, ++ (void *)(unsigned long)buffer_info->dma); ++ kfree_rtskb(skb); ++ buffer_info->skb = NULL; ++ ++ pci_unmap_single(pdev, buffer_info->dma, ++ adapter->rx_buffer_len, ++ PCI_DMA_FROMDEVICE); ++ buffer_info->dma = 0; ++ ++ adapter->alloc_rx_buff_failed++; ++ break; /* while !buffer_info->skb */ ++ } ++ rx_desc = E1000_RX_DESC(*rx_ring, i); ++ rx_desc->buffer_addr = cpu_to_le64(buffer_info->dma); ++ ++ if (unlikely(++i == rx_ring->count)) ++ i = 0; ++ buffer_info = &rx_ring->buffer_info[i]; ++ } ++ ++ if (likely(rx_ring->next_to_use != i)) { ++ rx_ring->next_to_use = i; ++ if (unlikely(i-- == 0)) ++ i = (rx_ring->count - 1); ++ ++ /* Force memory writes to complete before letting h/w ++ * know there are new descriptors to fetch. (Only ++ * applicable for weak-ordered memory model archs, ++ * such as IA-64). */ ++ wmb(); ++ writel(i, adapter->hw.hw_addr + rx_ring->rdt); ++ } ++} ++ ++/** ++ * e1000_alloc_rx_buffers_ps - Replace used receive buffers; packet split ++ * @adapter: address of board private structure ++ **/ ++static void e1000_alloc_rx_buffers_ps(struct e1000_adapter *adapter, ++ struct e1000_rx_ring *rx_ring, ++ int cleaned_count) ++{ ++} ++ ++/** ++ * e1000_smartspeed - Workaround for SmartSpeed on 82541 and 82547 controllers. ++ * @adapter: ++ **/ ++static void e1000_smartspeed(struct e1000_adapter *adapter) ++{ ++ struct e1000_mac_info *mac = &adapter->hw.mac; ++ struct e1000_phy_info *phy = &adapter->hw.phy; ++ u16 phy_status; ++ u16 phy_ctrl; ++ ++ if ((phy->type != e1000_phy_igp) || !mac->autoneg || ++ !(phy->autoneg_advertised & ADVERTISE_1000_FULL)) ++ return; ++ ++ if (adapter->smartspeed == 0) { ++ /* If Master/Slave config fault is asserted twice, ++ * we assume back-to-back */ ++ e1000_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_status); ++ if (!(phy_status & SR_1000T_MS_CONFIG_FAULT)) return; ++ e1000_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_status); ++ if (!(phy_status & SR_1000T_MS_CONFIG_FAULT)) return; ++ e1000_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_ctrl); ++ if (phy_ctrl & CR_1000T_MS_ENABLE) { ++ phy_ctrl &= ~CR_1000T_MS_ENABLE; ++ e1000_write_phy_reg(&adapter->hw, PHY_1000T_CTRL, ++ phy_ctrl); ++ adapter->smartspeed++; ++ if (!e1000_phy_setup_autoneg(&adapter->hw) && ++ !e1000_read_phy_reg(&adapter->hw, PHY_CONTROL, ++ &phy_ctrl)) { ++ phy_ctrl |= (MII_CR_AUTO_NEG_EN | ++ MII_CR_RESTART_AUTO_NEG); ++ e1000_write_phy_reg(&adapter->hw, PHY_CONTROL, ++ phy_ctrl); ++ } ++ } ++ return; ++ } else if (adapter->smartspeed == E1000_SMARTSPEED_DOWNSHIFT) { ++ /* If still no link, perhaps using 2/3 pair cable */ ++ e1000_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_ctrl); ++ phy_ctrl |= CR_1000T_MS_ENABLE; ++ e1000_write_phy_reg(&adapter->hw, PHY_1000T_CTRL, phy_ctrl); ++ if (!e1000_phy_setup_autoneg(&adapter->hw) && ++ !e1000_read_phy_reg(&adapter->hw, PHY_CONTROL, &phy_ctrl)) { ++ phy_ctrl |= (MII_CR_AUTO_NEG_EN | ++ MII_CR_RESTART_AUTO_NEG); ++ e1000_write_phy_reg(&adapter->hw, PHY_CONTROL, phy_ctrl); ++ } ++ } ++ /* Restart process after E1000_SMARTSPEED_MAX iterations */ ++ if (adapter->smartspeed++ == E1000_SMARTSPEED_MAX) ++ adapter->smartspeed = 0; ++} ++ ++/** ++ * e1000_ioctl - ++ * @netdev: ++ * @ifreq: ++ * @cmd: ++ **/ ++#if 0 ++static int e1000_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) ++{ ++ switch (cmd) { ++#ifdef SIOCGMIIPHY ++ case SIOCGMIIPHY: ++ case SIOCGMIIREG: ++ case SIOCSMIIREG: ++ return e1000_mii_ioctl(netdev, ifr, cmd); ++#endif ++#ifdef ETHTOOL_OPS_COMPAT ++ case SIOCETHTOOL: ++ return ethtool_ioctl(ifr); ++#endif ++ default: ++ return -EOPNOTSUPP; ++ } ++} ++ ++#ifdef SIOCGMIIPHY ++/** ++ * e1000_mii_ioctl - ++ * @netdev: ++ * @ifreq: ++ * @cmd: ++ **/ ++static int e1000_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, ++ int cmd) ++{ ++ struct e1000_adapter *adapter = netdev_priv(netdev); ++ struct mii_ioctl_data *data = if_mii(ifr); ++ ++ if (adapter->hw.phy.media_type != e1000_media_type_copper) ++ return -EOPNOTSUPP; ++ ++ switch (cmd) { ++ case SIOCGMIIPHY: ++ data->phy_id = adapter->hw.phy.addr; ++ break; ++ case SIOCGMIIREG: ++ if (!capable(CAP_NET_ADMIN)) ++ return -EPERM; ++ switch (data->reg_num & 0x1F) { ++ case MII_BMCR: ++ data->val_out = adapter->phy_regs.bmcr; ++ break; ++ case MII_BMSR: ++ data->val_out = adapter->phy_regs.bmsr; ++ break; ++ case MII_PHYSID1: ++ data->val_out = (adapter->hw.phy.id >> 16); ++ break; ++ case MII_PHYSID2: ++ data->val_out = (adapter->hw.phy.id & 0xFFFF); ++ break; ++ case MII_ADVERTISE: ++ data->val_out = adapter->phy_regs.advertise; ++ break; ++ case MII_LPA: ++ data->val_out = adapter->phy_regs.lpa; ++ break; ++ case MII_EXPANSION: ++ data->val_out = adapter->phy_regs.expansion; ++ break; ++ case MII_CTRL1000: ++ data->val_out = adapter->phy_regs.ctrl1000; ++ break; ++ case MII_STAT1000: ++ data->val_out = adapter->phy_regs.stat1000; ++ break; ++ case MII_ESTATUS: ++ data->val_out = adapter->phy_regs.estatus; ++ break; ++ default: ++ return -EIO; ++ } ++ break; ++ case SIOCSMIIREG: ++ default: ++ return -EOPNOTSUPP; ++ } ++ return E1000_SUCCESS; ++} ++#endif ++#endif ++ ++void e1000_pci_set_mwi(struct e1000_hw *hw) ++{ ++ struct e1000_adapter *adapter = hw->back; ++ int ret_val = pci_set_mwi(adapter->pdev); ++ ++ if (ret_val) ++ DPRINTK(PROBE, ERR, "Error in setting MWI\n"); ++} ++ ++void e1000_pci_clear_mwi(struct e1000_hw *hw) ++{ ++ struct e1000_adapter *adapter = hw->back; ++ ++ pci_clear_mwi(adapter->pdev); ++} ++ ++void e1000_read_pci_cfg(struct e1000_hw *hw, u32 reg, u16 *value) ++{ ++ struct e1000_adapter *adapter = hw->back; ++ ++ pci_read_config_word(adapter->pdev, reg, value); ++} ++ ++void e1000_write_pci_cfg(struct e1000_hw *hw, u32 reg, u16 *value) ++{ ++ struct e1000_adapter *adapter = hw->back; ++ ++ pci_write_config_word(adapter->pdev, reg, *value); ++} ++ ++s32 e1000_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value) ++{ ++ struct e1000_adapter *adapter = hw->back; ++ u16 cap_offset; ++ ++ cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP); ++ if (!cap_offset) ++ return -E1000_ERR_CONFIG; ++ ++ pci_read_config_word(adapter->pdev, cap_offset + reg, value); ++ ++ return E1000_SUCCESS; ++} ++ ++#ifdef NETIF_F_HW_VLAN_TX ++static void e1000_vlan_rx_register(struct net_device *netdev, ++ struct vlan_group *grp) ++{ ++ struct e1000_adapter *adapter = netdev_priv(netdev); ++ u32 ctrl, rctl; ++ ++ e1000_irq_disable(adapter); ++ adapter->vlgrp = grp; ++ ++ if (grp) { ++ /* enable VLAN tag insert/strip */ ++ ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL); ++ ctrl |= E1000_CTRL_VME; ++ E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl); ++ ++ if ((adapter->hw.mac.type != e1000_ich8lan) && ++ (adapter->hw.mac.type != e1000_ich9lan)) { ++ /* enable VLAN receive filtering */ ++ rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); ++ rctl |= E1000_RCTL_VFE; ++ rctl &= ~E1000_RCTL_CFIEN; ++ E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl); ++ e1000_update_mng_vlan(adapter); ++ } ++ } else { ++ /* disable VLAN tag insert/strip */ ++ ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL); ++ ctrl &= ~E1000_CTRL_VME; ++ E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl); ++ ++ if ((adapter->hw.mac.type != e1000_ich8lan) && ++ (adapter->hw.mac.type != e1000_ich9lan)) { ++ /* disable VLAN filtering */ ++ rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); ++ rctl &= ~E1000_RCTL_VFE; ++ E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl); ++ if (adapter->mng_vlan_id != ++ (u16)E1000_MNG_VLAN_NONE) { ++ e1000_vlan_rx_kill_vid(netdev, ++ adapter->mng_vlan_id); ++ adapter->mng_vlan_id = E1000_MNG_VLAN_NONE; ++ } ++ } ++ } ++ ++ e1000_irq_enable(adapter); ++} ++ ++static void e1000_vlan_rx_add_vid(struct net_device *netdev, u16 vid) ++{ ++ struct e1000_adapter *adapter = netdev_priv(netdev); ++ u32 vfta, index; ++ struct net_device *v_netdev; ++ ++ if ((adapter->hw.mng_cookie.status & ++ E1000_MNG_DHCP_COOKIE_STATUS_VLAN) && ++ (vid == adapter->mng_vlan_id)) ++ return; ++ /* add VID to filter table */ ++ index = (vid >> 5) & 0x7F; ++ vfta = E1000_READ_REG_ARRAY(&adapter->hw, E1000_VFTA, index); ++ vfta |= (1 << (vid & 0x1F)); ++ e1000_write_vfta(&adapter->hw, index, vfta); ++ /* Copy feature flags from netdev to the vlan netdev for this vid. ++ * This allows things like TSO to bubble down to our vlan device. ++ */ ++ v_netdev = vlan_group_get_device(adapter->vlgrp, vid); ++ v_netdev->features |= adapter->netdev->features; ++ vlan_group_set_device(adapter->vlgrp, vid, v_netdev); ++} ++ ++static void e1000_vlan_rx_kill_vid(struct net_device *netdev, u16 vid) ++{ ++ struct e1000_adapter *adapter = netdev_priv(netdev); ++ u32 vfta, index; ++ ++ e1000_irq_disable(adapter); ++ vlan_group_set_device(adapter->vlgrp, vid, NULL); ++ e1000_irq_enable(adapter); ++ ++ if ((adapter->hw.mng_cookie.status & ++ E1000_MNG_DHCP_COOKIE_STATUS_VLAN) && ++ (vid == adapter->mng_vlan_id)) { ++ /* release control to f/w */ ++ e1000_release_hw_control(adapter); ++ return; ++ } ++ ++ /* remove VID from filter table */ ++ index = (vid >> 5) & 0x7F; ++ vfta = E1000_READ_REG_ARRAY(&adapter->hw, E1000_VFTA, index); ++ vfta &= ~(1 << (vid & 0x1F)); ++ e1000_write_vfta(&adapter->hw, index, vfta); ++} ++ ++static void e1000_restore_vlan(struct e1000_adapter *adapter) ++{ ++ e1000_vlan_rx_register(adapter->netdev, adapter->vlgrp); ++ ++ if (adapter->vlgrp) { ++ u16 vid; ++ for (vid = 0; vid < VLAN_N_VID; vid++) { ++ if (!vlan_group_get_device(adapter->vlgrp, vid)) ++ continue; ++ e1000_vlan_rx_add_vid(adapter->netdev, vid); ++ } ++ } ++} ++#endif ++ ++int e1000_set_spd_dplx(struct e1000_adapter *adapter, u16 spddplx) ++{ ++ struct e1000_mac_info *mac = &adapter->hw.mac; ++ ++ mac->autoneg = 0; ++ ++ /* Fiber NICs only allow 1000 gbps Full duplex */ ++ if ((adapter->hw.phy.media_type == e1000_media_type_fiber) && ++ spddplx != (SPEED_1000 + DUPLEX_FULL)) { ++ DPRINTK(PROBE, ERR, "Unsupported Speed/Duplex configuration\n"); ++ return -EINVAL; ++ } ++ ++ switch (spddplx) { ++ case SPEED_10 + DUPLEX_HALF: ++ mac->forced_speed_duplex = ADVERTISE_10_HALF; ++ break; ++ case SPEED_10 + DUPLEX_FULL: ++ mac->forced_speed_duplex = ADVERTISE_10_FULL; ++ break; ++ case SPEED_100 + DUPLEX_HALF: ++ mac->forced_speed_duplex = ADVERTISE_100_HALF; ++ break; ++ case SPEED_100 + DUPLEX_FULL: ++ mac->forced_speed_duplex = ADVERTISE_100_FULL; ++ break; ++ case SPEED_1000 + DUPLEX_FULL: ++ mac->autoneg = 1; ++ adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL; ++ break; ++ case SPEED_1000 + DUPLEX_HALF: /* not supported */ ++ default: ++ DPRINTK(PROBE, ERR, "Unsupported Speed/Duplex configuration\n"); ++ return -EINVAL; ++ } ++ return 0; ++} ++ ++#ifdef USE_REBOOT_NOTIFIER ++/* only want to do this for 2.4 kernels? */ ++static int e1000_notify_reboot(struct notifier_block *nb, ++ unsigned long event, void *p) ++{ ++ struct pci_dev *pdev = NULL; ++ ++ switch (event) { ++ case SYS_DOWN: ++ case SYS_HALT: ++ case SYS_POWER_OFF: ++ while ((pdev = pci_find_device(PCI_ANY_ID, PCI_ANY_ID, pdev))) { ++ if (pci_dev_driver(pdev) == &e1000_driver) ++ e1000_suspend(pdev, PMSG_SUSPEND); ++ } ++ } ++ return NOTIFY_DONE; ++} ++#endif ++ ++#ifdef CONFIG_PM ++static int e1000_resume(struct pci_dev *pdev) ++{ ++ struct net_device *netdev = pci_get_drvdata(pdev); ++ struct e1000_adapter *adapter = netdev_priv(netdev); ++ u32 err; ++ ++ pci_set_power_state(pdev, PCI_D0); ++ pci_restore_state(pdev); ++ if ((err = pci_enable_device(pdev))) { ++ printk(KERN_ERR "e1000: Cannot enable PCI device from suspend\n"); ++ return err; ++ } ++ pci_set_master(pdev); ++ ++ pci_enable_wake(pdev, PCI_D3hot, 0); ++ pci_enable_wake(pdev, PCI_D3cold, 0); ++ ++ if (rtnetif_running(netdev) && (err = e1000_request_irq(adapter))) ++ return err; ++ ++ if (adapter->hw.phy.media_type == e1000_media_type_copper) { ++ e1000_power_up_phy(&adapter->hw); ++ e1000_setup_link(&adapter->hw); ++ } ++ e1000_reset(adapter); ++ E1000_WRITE_REG(&adapter->hw, E1000_WUS, ~0); ++ ++ e1000_init_manageability(adapter); ++ ++ if (rtnetif_running(netdev)) ++ e1000_up(adapter); ++ ++ netif_device_attach(netdev); ++ ++ /* If the controller is 82573 or ICHx and f/w is AMT, do not set ++ * DRV_LOAD until the interface is up. For all other cases, ++ * let the f/w know that the h/w is now under the control ++ * of the driver. */ ++ if (((adapter->hw.mac.type != e1000_82573) && ++ (adapter->hw.mac.type != e1000_ich8lan) && ++ (adapter->hw.mac.type != e1000_ich9lan)) || ++ !e1000_check_mng_mode(&adapter->hw)) ++ e1000_get_hw_control(adapter); ++ ++ return 0; ++} ++#endif ++ ++#ifdef CONFIG_NET_POLL_CONTROLLER ++/* ++ * Polling 'interrupt' - used by things like netconsole to send skbs ++ * without having to re-enable interrupts. It's not called while ++ * the interrupt routine is executing. ++ */ ++static void e1000_netpoll(struct net_device *netdev) ++{ ++ struct e1000_adapter *adapter = netdev_priv(netdev); ++ int i; ++ ++ disable_irq(adapter->pdev->irq); ++ e1000_intr(adapter->pdev->irq, netdev); ++ ++ for (i = 0; i < adapter->num_tx_queues ; i++ ) ++ e1000_clean_tx_irq(adapter, &adapter->tx_ring[i]); ++#ifndef CONFIG_E1000_NAPI ++ for (i = 0; i < adapter->num_rx_queues ; i++ ) ++ adapter->clean_rx(adapter, &adapter->rx_ring[i], NULL); ++#endif ++ enable_irq(adapter->pdev->irq); ++} ++#endif ++ ++#ifdef HAVE_PCI_ERS ++/** ++ * e1000_io_error_detected - called when PCI error is detected ++ * @pdev: Pointer to PCI device ++ * @state: The current pci connection state ++ * ++ * This function is called after a PCI bus error affecting ++ * this device has been detected. ++ */ ++static pci_ers_result_t e1000_io_error_detected(struct pci_dev *pdev, ++ pci_channel_state_t state) ++{ ++ struct net_device *netdev = pci_get_drvdata(pdev); ++ struct e1000_adapter *adapter = netdev->priv; ++ ++ netif_device_detach(netdev); ++ ++ if (rtnetif_running(netdev)) ++ e1000_down(adapter); ++ pci_disable_device(pdev); ++ ++ /* Request a slot slot reset. */ ++ return PCI_ERS_RESULT_NEED_RESET; ++} ++ ++/** ++ * e1000_io_slot_reset - called after the pci bus has been reset. ++ * @pdev: Pointer to PCI device ++ * ++ * Restart the card from scratch, as if from a cold-boot. Implementation ++ * resembles the first-half of the e1000_resume routine. ++ */ ++static pci_ers_result_t e1000_io_slot_reset(struct pci_dev *pdev) ++{ ++ struct net_device *netdev = pci_get_drvdata(pdev); ++ struct e1000_adapter *adapter = netdev->priv; ++ ++ if (pci_enable_device(pdev)) { ++ printk(KERN_ERR "e1000: Cannot re-enable PCI device after reset.\n"); ++ return PCI_ERS_RESULT_DISCONNECT; ++ } ++ pci_set_master(pdev); ++ ++ pci_enable_wake(pdev, PCI_D3hot, 0); ++ pci_enable_wake(pdev, PCI_D3cold, 0); ++ ++ e1000_reset(adapter); ++ E1000_WRITE_REG(&adapter->hw, E1000_WUS, ~0); ++ ++ return PCI_ERS_RESULT_RECOVERED; ++} ++ ++/** ++ * e1000_io_resume - called when traffic can start flowing again. ++ * @pdev: Pointer to PCI device ++ * ++ * This callback is called when the error recovery driver tells us that ++ * its OK to resume normal operation. Implementation resembles the ++ * second-half of the e1000_resume routine. ++ */ ++static void e1000_io_resume(struct pci_dev *pdev) ++{ ++ struct net_device *netdev = pci_get_drvdata(pdev); ++ struct e1000_adapter *adapter = netdev->priv; ++ ++ e1000_init_manageability(adapter); ++ ++ if (rtnetif_running(netdev)) { ++ if (e1000_up(adapter)) { ++ printk("e1000: can't bring device back up after reset\n"); ++ return; ++ } ++ } ++ ++ netif_device_attach(netdev); ++ ++ /* If the controller is 82573 or ICHx and f/w is AMT, do not set ++ * DRV_LOAD until the interface is up. For all other cases, ++ * let the f/w know that the h/w is now under the control ++ * of the driver. */ ++ if (((adapter->hw.mac.type != e1000_82573) && ++ (adapter->hw.mac.type != e1000_ich8lan) && ++ (adapter->hw.mac.type != e1000_ich9lan)) || ++ !e1000_check_mng_mode(&adapter->hw)) ++ e1000_get_hw_control(adapter); ++ ++} ++#endif /* HAVE_PCI_ERS */ ++ ++s32 e1000_alloc_zeroed_dev_spec_struct(struct e1000_hw *hw, u32 size) ++{ ++ hw->dev_spec = kmalloc(size, GFP_KERNEL); ++ ++ if (!hw->dev_spec) ++ return -ENOMEM; ++ ++ memset(hw->dev_spec, 0, size); ++ ++ return E1000_SUCCESS; ++} ++ ++void e1000_free_dev_spec_struct(struct e1000_hw *hw) ++{ ++ if (!hw->dev_spec) ++ return; ++ ++ kfree(hw->dev_spec); ++} ++ ++/* vim: set ts=4: */ ++/* e1000_main.c */ +--- linux/drivers/xenomai/net/drivers/experimental/e1000/e1000_ich8lan.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/experimental/e1000/e1000_ich8lan.c 2021-04-07 16:01:27.743633429 +0800 +@@ -0,0 +1,2582 @@ ++/******************************************************************************* ++ ++ Intel PRO/1000 Linux driver ++ Copyright(c) 1999 - 2008 Intel Corporation. ++ ++ This program is free software; you can redistribute it and/or modify it ++ under the terms and conditions of the GNU General Public License, ++ version 2, as published by the Free Software Foundation. ++ ++ This program is distributed in the hope it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ You should have received a copy of the GNU General Public License along with ++ this program; if not, write to the Free Software Foundation, Inc., ++ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. ++ ++ The full GNU General Public License is included in this distribution in ++ the file called "COPYING". ++ ++ Contact Information: ++ Linux NICS ++ e1000-devel Mailing List ++ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 ++ ++*******************************************************************************/ ++ ++/* e1000_ich8lan ++ * e1000_ich9lan ++ */ ++ ++#include "e1000_api.h" ++#include "e1000_ich8lan.h" ++ ++static s32 e1000_init_phy_params_ich8lan(struct e1000_hw *hw); ++static s32 e1000_init_nvm_params_ich8lan(struct e1000_hw *hw); ++static s32 e1000_init_mac_params_ich8lan(struct e1000_hw *hw); ++static s32 e1000_acquire_swflag_ich8lan(struct e1000_hw *hw); ++static void e1000_release_swflag_ich8lan(struct e1000_hw *hw); ++static bool e1000_check_mng_mode_ich8lan(struct e1000_hw *hw); ++static s32 e1000_check_polarity_ife_ich8lan(struct e1000_hw *hw); ++static s32 e1000_check_reset_block_ich8lan(struct e1000_hw *hw); ++static s32 e1000_phy_force_speed_duplex_ich8lan(struct e1000_hw *hw); ++static s32 e1000_phy_hw_reset_ich8lan(struct e1000_hw *hw); ++static s32 e1000_get_phy_info_ich8lan(struct e1000_hw *hw); ++static s32 e1000_set_d0_lplu_state_ich8lan(struct e1000_hw *hw, ++ bool active); ++static s32 e1000_set_d3_lplu_state_ich8lan(struct e1000_hw *hw, ++ bool active); ++static s32 e1000_read_nvm_ich8lan(struct e1000_hw *hw, u16 offset, ++ u16 words, u16 *data); ++static s32 e1000_write_nvm_ich8lan(struct e1000_hw *hw, u16 offset, ++ u16 words, u16 *data); ++static s32 e1000_validate_nvm_checksum_ich8lan(struct e1000_hw *hw); ++static s32 e1000_update_nvm_checksum_ich8lan(struct e1000_hw *hw); ++static s32 e1000_valid_led_default_ich8lan(struct e1000_hw *hw, ++ u16 *data); ++static s32 e1000_get_bus_info_ich8lan(struct e1000_hw *hw); ++static s32 e1000_reset_hw_ich8lan(struct e1000_hw *hw); ++static s32 e1000_init_hw_ich8lan(struct e1000_hw *hw); ++static s32 e1000_setup_link_ich8lan(struct e1000_hw *hw); ++static s32 e1000_setup_copper_link_ich8lan(struct e1000_hw *hw); ++static s32 e1000_get_link_up_info_ich8lan(struct e1000_hw *hw, ++ u16 *speed, u16 *duplex); ++static s32 e1000_cleanup_led_ich8lan(struct e1000_hw *hw); ++static s32 e1000_led_on_ich8lan(struct e1000_hw *hw); ++static s32 e1000_led_off_ich8lan(struct e1000_hw *hw); ++static void e1000_clear_hw_cntrs_ich8lan(struct e1000_hw *hw); ++static s32 e1000_erase_flash_bank_ich8lan(struct e1000_hw *hw, u32 bank); ++static s32 e1000_flash_cycle_ich8lan(struct e1000_hw *hw, u32 timeout); ++static s32 e1000_flash_cycle_init_ich8lan(struct e1000_hw *hw); ++static s32 e1000_get_phy_info_ife_ich8lan(struct e1000_hw *hw); ++static void e1000_initialize_hw_bits_ich8lan(struct e1000_hw *hw); ++static s32 e1000_kmrn_lock_loss_workaround_ich8lan(struct e1000_hw *hw); ++static s32 e1000_read_flash_data_ich8lan(struct e1000_hw *hw, u32 offset, ++ u8 size, u16* data); ++static s32 e1000_read_flash_word_ich8lan(struct e1000_hw *hw, ++ u32 offset, u16 *data); ++static s32 e1000_retry_write_flash_byte_ich8lan(struct e1000_hw *hw, ++ u32 offset, u8 byte); ++static s32 e1000_write_flash_byte_ich8lan(struct e1000_hw *hw, ++ u32 offset, u8 data); ++static s32 e1000_write_flash_data_ich8lan(struct e1000_hw *hw, u32 offset, ++ u8 size, u16 data); ++static s32 e1000_get_cfg_done_ich8lan(struct e1000_hw *hw); ++static void e1000_power_down_phy_copper_ich8lan(struct e1000_hw *hw); ++ ++/* ICH GbE Flash Hardware Sequencing Flash Status Register bit breakdown */ ++/* Offset 04h HSFSTS */ ++union ich8_hws_flash_status { ++ struct ich8_hsfsts { ++ u16 flcdone :1; /* bit 0 Flash Cycle Done */ ++ u16 flcerr :1; /* bit 1 Flash Cycle Error */ ++ u16 dael :1; /* bit 2 Direct Access error Log */ ++ u16 berasesz :2; /* bit 4:3 Sector Erase Size */ ++ u16 flcinprog :1; /* bit 5 flash cycle in Progress */ ++ u16 reserved1 :2; /* bit 13:6 Reserved */ ++ u16 reserved2 :6; /* bit 13:6 Reserved */ ++ u16 fldesvalid :1; /* bit 14 Flash Descriptor Valid */ ++ u16 flockdn :1; /* bit 15 Flash Config Lock-Down */ ++ } hsf_status; ++ u16 regval; ++}; ++ ++/* ICH GbE Flash Hardware Sequencing Flash control Register bit breakdown */ ++/* Offset 06h FLCTL */ ++union ich8_hws_flash_ctrl { ++ struct ich8_hsflctl { ++ u16 flcgo :1; /* 0 Flash Cycle Go */ ++ u16 flcycle :2; /* 2:1 Flash Cycle */ ++ u16 reserved :5; /* 7:3 Reserved */ ++ u16 fldbcount :2; /* 9:8 Flash Data Byte Count */ ++ u16 flockdn :6; /* 15:10 Reserved */ ++ } hsf_ctrl; ++ u16 regval; ++}; ++ ++/* ICH Flash Region Access Permissions */ ++union ich8_hws_flash_regacc { ++ struct ich8_flracc { ++ u32 grra :8; /* 0:7 GbE region Read Access */ ++ u32 grwa :8; /* 8:15 GbE region Write Access */ ++ u32 gmrag :8; /* 23:16 GbE Master Read Access Grant */ ++ u32 gmwag :8; /* 31:24 GbE Master Write Access Grant */ ++ } hsf_flregacc; ++ u16 regval; ++}; ++ ++struct e1000_shadow_ram { ++ u16 value; ++ bool modified; ++}; ++ ++struct e1000_dev_spec_ich8lan { ++ bool kmrn_lock_loss_workaround_enabled; ++ struct e1000_shadow_ram shadow_ram[E1000_SHADOW_RAM_WORDS]; ++}; ++ ++/** ++ * e1000_init_phy_params_ich8lan - Initialize PHY function pointers ++ * @hw: pointer to the HW structure ++ * ++ * Initialize family-specific PHY parameters and function pointers. ++ **/ ++static s32 e1000_init_phy_params_ich8lan(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ struct e1000_functions *func = &hw->func; ++ s32 ret_val = E1000_SUCCESS; ++ u16 i = 0; ++ ++ DEBUGFUNC("e1000_init_phy_params_ich8lan"); ++ ++ phy->addr = 1; ++ phy->reset_delay_us = 100; ++ ++ func->acquire_phy = e1000_acquire_swflag_ich8lan; ++ func->check_polarity = e1000_check_polarity_ife_ich8lan; ++ func->check_reset_block = e1000_check_reset_block_ich8lan; ++ func->force_speed_duplex = e1000_phy_force_speed_duplex_ich8lan; ++ func->get_cable_length = e1000_get_cable_length_igp_2; ++ func->get_cfg_done = e1000_get_cfg_done_ich8lan; ++ func->get_phy_info = e1000_get_phy_info_ich8lan; ++ func->read_phy_reg = e1000_read_phy_reg_igp; ++ func->release_phy = e1000_release_swflag_ich8lan; ++ func->reset_phy = e1000_phy_hw_reset_ich8lan; ++ func->set_d0_lplu_state = e1000_set_d0_lplu_state_ich8lan; ++ func->set_d3_lplu_state = e1000_set_d3_lplu_state_ich8lan; ++ func->write_phy_reg = e1000_write_phy_reg_igp; ++ func->power_up_phy = e1000_power_up_phy_copper; ++ func->power_down_phy = e1000_power_down_phy_copper_ich8lan; ++ ++ ++ phy->id = 0; ++ while ((e1000_phy_unknown == e1000_get_phy_type_from_id(phy->id)) && ++ (i++ < 100)) { ++ msec_delay(1); ++ ret_val = e1000_get_phy_id(hw); ++ if (ret_val) ++ goto out; ++ } ++ ++ /* Verify phy id */ ++ switch (phy->id) { ++ case IGP03E1000_E_PHY_ID: ++ phy->type = e1000_phy_igp_3; ++ phy->autoneg_mask = AUTONEG_ADVERTISE_SPEED_DEFAULT; ++ break; ++ case IFE_E_PHY_ID: ++ case IFE_PLUS_E_PHY_ID: ++ case IFE_C_E_PHY_ID: ++ phy->type = e1000_phy_ife; ++ phy->autoneg_mask = E1000_ALL_NOT_GIG; ++ break; ++ default: ++ ret_val = -E1000_ERR_PHY; ++ goto out; ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_init_nvm_params_ich8lan - Initialize NVM function pointers ++ * @hw: pointer to the HW structure ++ * ++ * Initialize family-specific NVM parameters and function ++ * pointers. ++ **/ ++static s32 e1000_init_nvm_params_ich8lan(struct e1000_hw *hw) ++{ ++ struct e1000_nvm_info *nvm = &hw->nvm; ++ struct e1000_functions *func = &hw->func; ++ struct e1000_dev_spec_ich8lan *dev_spec; ++ u32 gfpreg, sector_base_addr, sector_end_addr; ++ s32 ret_val = E1000_SUCCESS; ++ u16 i; ++ ++ DEBUGFUNC("e1000_init_nvm_params_ich8lan"); ++ ++ /* Can't read flash registers if the register set isn't mapped. */ ++ if (!hw->flash_address) { ++ DEBUGOUT("ERROR: Flash registers not mapped\n"); ++ ret_val = -E1000_ERR_CONFIG; ++ goto out; ++ } ++ ++ nvm->type = e1000_nvm_flash_sw; ++ ++ gfpreg = E1000_READ_FLASH_REG(hw, ICH_FLASH_GFPREG); ++ ++ /* ++ * sector_X_addr is a "sector"-aligned address (4096 bytes) ++ * Add 1 to sector_end_addr since this sector is included in ++ * the overall size. ++ */ ++ sector_base_addr = gfpreg & FLASH_GFPREG_BASE_MASK; ++ sector_end_addr = ((gfpreg >> 16) & FLASH_GFPREG_BASE_MASK) + 1; ++ ++ /* flash_base_addr is byte-aligned */ ++ nvm->flash_base_addr = sector_base_addr << FLASH_SECTOR_ADDR_SHIFT; ++ ++ /* ++ * find total size of the NVM, then cut in half since the total ++ * size represents two separate NVM banks. ++ */ ++ nvm->flash_bank_size = (sector_end_addr - sector_base_addr) ++ << FLASH_SECTOR_ADDR_SHIFT; ++ nvm->flash_bank_size /= 2; ++ /* Adjust to word count */ ++ nvm->flash_bank_size /= sizeof(u16); ++ ++ nvm->word_size = E1000_SHADOW_RAM_WORDS; ++ ++ dev_spec = (struct e1000_dev_spec_ich8lan *)hw->dev_spec; ++ ++ if (!dev_spec) { ++ DEBUGOUT("dev_spec pointer is set to NULL.\n"); ++ ret_val = -E1000_ERR_CONFIG; ++ goto out; ++ } ++ ++ /* Clear shadow ram */ ++ for (i = 0; i < nvm->word_size; i++) { ++ dev_spec->shadow_ram[i].modified = FALSE; ++ dev_spec->shadow_ram[i].value = 0xFFFF; ++ } ++ ++ /* Function Pointers */ ++ func->acquire_nvm = e1000_acquire_swflag_ich8lan; ++ func->read_nvm = e1000_read_nvm_ich8lan; ++ func->release_nvm = e1000_release_swflag_ich8lan; ++ func->update_nvm = e1000_update_nvm_checksum_ich8lan; ++ func->valid_led_default = e1000_valid_led_default_ich8lan; ++ func->validate_nvm = e1000_validate_nvm_checksum_ich8lan; ++ func->write_nvm = e1000_write_nvm_ich8lan; ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_init_mac_params_ich8lan - Initialize MAC function pointers ++ * @hw: pointer to the HW structure ++ * ++ * Initialize family-specific MAC parameters and function ++ * pointers. ++ **/ ++static s32 e1000_init_mac_params_ich8lan(struct e1000_hw *hw) ++{ ++ struct e1000_mac_info *mac = &hw->mac; ++ struct e1000_functions *func = &hw->func; ++ s32 ret_val = E1000_SUCCESS; ++ ++ DEBUGFUNC("e1000_init_mac_params_ich8lan"); ++ ++ /* Set media type function pointer */ ++ hw->phy.media_type = e1000_media_type_copper; ++ ++ /* Set mta register count */ ++ mac->mta_reg_count = 32; ++ /* Set rar entry count */ ++ mac->rar_entry_count = E1000_ICH_RAR_ENTRIES; ++ if (mac->type == e1000_ich8lan) ++ mac->rar_entry_count--; ++ /* Set if part includes ASF firmware */ ++ mac->asf_firmware_present = TRUE; ++ /* Set if manageability features are enabled. */ ++ mac->arc_subsystem_valid = TRUE; ++ ++ /* Function pointers */ ++ ++ /* bus type/speed/width */ ++ func->get_bus_info = e1000_get_bus_info_ich8lan; ++ /* reset */ ++ func->reset_hw = e1000_reset_hw_ich8lan; ++ /* hw initialization */ ++ func->init_hw = e1000_init_hw_ich8lan; ++ /* link setup */ ++ func->setup_link = e1000_setup_link_ich8lan; ++ /* physical interface setup */ ++ func->setup_physical_interface = e1000_setup_copper_link_ich8lan; ++ /* check for link */ ++ func->check_for_link = e1000_check_for_copper_link_generic; ++ /* check management mode */ ++ func->check_mng_mode = e1000_check_mng_mode_ich8lan; ++ /* link info */ ++ func->get_link_up_info = e1000_get_link_up_info_ich8lan; ++ /* multicast address update */ ++ func->update_mc_addr_list = e1000_update_mc_addr_list_generic; ++ /* setting MTA */ ++ func->mta_set = e1000_mta_set_generic; ++ /* blink LED */ ++ func->blink_led = e1000_blink_led_generic; ++ /* setup LED */ ++ func->setup_led = e1000_setup_led_generic; ++ /* cleanup LED */ ++ func->cleanup_led = e1000_cleanup_led_ich8lan; ++ /* turn on/off LED */ ++ func->led_on = e1000_led_on_ich8lan; ++ func->led_off = e1000_led_off_ich8lan; ++ /* remove device */ ++ func->remove_device = e1000_remove_device_generic; ++ /* clear hardware counters */ ++ func->clear_hw_cntrs = e1000_clear_hw_cntrs_ich8lan; ++ ++ hw->dev_spec_size = sizeof(struct e1000_dev_spec_ich8lan); ++ ++ /* Device-specific structure allocation */ ++ ret_val = e1000_alloc_zeroed_dev_spec_struct(hw, hw->dev_spec_size); ++ if (ret_val) ++ goto out; ++ ++ /* Enable PCS Lock-loss workaround for ICH8 */ ++ if (mac->type == e1000_ich8lan) ++ e1000_set_kmrn_lock_loss_workaround_ich8lan(hw, TRUE); ++ ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_init_function_pointers_ich8lan - Initialize ICH8 function pointers ++ * @hw: pointer to the HW structure ++ * ++ * Initialize family-specific function pointers for PHY, MAC, and NVM. ++ **/ ++void e1000_init_function_pointers_ich8lan(struct e1000_hw *hw) ++{ ++ DEBUGFUNC("e1000_init_function_pointers_ich8lan"); ++ ++ hw->func.init_mac_params = e1000_init_mac_params_ich8lan; ++ hw->func.init_nvm_params = e1000_init_nvm_params_ich8lan; ++ hw->func.init_phy_params = e1000_init_phy_params_ich8lan; ++} ++ ++/** ++ * e1000_acquire_swflag_ich8lan - Acquire software control flag ++ * @hw: pointer to the HW structure ++ * ++ * Acquires the software control flag for performing NVM and PHY ++ * operations. This is a function pointer entry point only called by ++ * read/write routines for the PHY and NVM parts. ++ **/ ++static s32 e1000_acquire_swflag_ich8lan(struct e1000_hw *hw) ++{ ++ u32 extcnf_ctrl, timeout = PHY_CFG_TIMEOUT; ++ s32 ret_val = E1000_SUCCESS; ++ ++ DEBUGFUNC("e1000_acquire_swflag_ich8lan"); ++ ++ while (timeout) { ++ extcnf_ctrl = E1000_READ_REG(hw, E1000_EXTCNF_CTRL); ++ extcnf_ctrl |= E1000_EXTCNF_CTRL_SWFLAG; ++ E1000_WRITE_REG(hw, E1000_EXTCNF_CTRL, extcnf_ctrl); ++ ++ extcnf_ctrl = E1000_READ_REG(hw, E1000_EXTCNF_CTRL); ++ if (extcnf_ctrl & E1000_EXTCNF_CTRL_SWFLAG) ++ break; ++ msec_delay_irq(1); ++ timeout--; ++ } ++ ++ if (!timeout) { ++ DEBUGOUT("FW or HW has locked the resource for too long.\n"); ++ extcnf_ctrl &= ~E1000_EXTCNF_CTRL_SWFLAG; ++ E1000_WRITE_REG(hw, E1000_EXTCNF_CTRL, extcnf_ctrl); ++ ret_val = -E1000_ERR_CONFIG; ++ goto out; ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_release_swflag_ich8lan - Release software control flag ++ * @hw: pointer to the HW structure ++ * ++ * Releases the software control flag for performing NVM and PHY operations. ++ * This is a function pointer entry point only called by read/write ++ * routines for the PHY and NVM parts. ++ **/ ++static void e1000_release_swflag_ich8lan(struct e1000_hw *hw) ++{ ++ u32 extcnf_ctrl; ++ ++ DEBUGFUNC("e1000_release_swflag_ich8lan"); ++ ++ extcnf_ctrl = E1000_READ_REG(hw, E1000_EXTCNF_CTRL); ++ extcnf_ctrl &= ~E1000_EXTCNF_CTRL_SWFLAG; ++ E1000_WRITE_REG(hw, E1000_EXTCNF_CTRL, extcnf_ctrl); ++ ++ return; ++} ++ ++/** ++ * e1000_check_mng_mode_ich8lan - Checks management mode ++ * @hw: pointer to the HW structure ++ * ++ * This checks if the adapter has manageability enabled. ++ * This is a function pointer entry point only called by read/write ++ * routines for the PHY and NVM parts. ++ **/ ++static bool e1000_check_mng_mode_ich8lan(struct e1000_hw *hw) ++{ ++ u32 fwsm; ++ ++ DEBUGFUNC("e1000_check_mng_mode_ich8lan"); ++ ++ fwsm = E1000_READ_REG(hw, E1000_FWSM); ++ ++ return ((fwsm & E1000_FWSM_MODE_MASK) == ++ (E1000_ICH_MNG_IAMT_MODE << E1000_FWSM_MODE_SHIFT)); ++} ++ ++/** ++ * e1000_check_reset_block_ich8lan - Check if PHY reset is blocked ++ * @hw: pointer to the HW structure ++ * ++ * Checks if firmware is blocking the reset of the PHY. ++ * This is a function pointer entry point only called by ++ * reset routines. ++ **/ ++static s32 e1000_check_reset_block_ich8lan(struct e1000_hw *hw) ++{ ++ u32 fwsm; ++ ++ DEBUGFUNC("e1000_check_reset_block_ich8lan"); ++ ++ fwsm = E1000_READ_REG(hw, E1000_FWSM); ++ ++ return (fwsm & E1000_ICH_FWSM_RSPCIPHY) ? E1000_SUCCESS ++ : E1000_BLK_PHY_RESET; ++} ++ ++/** ++ * e1000_phy_force_speed_duplex_ich8lan - Force PHY speed & duplex ++ * @hw: pointer to the HW structure ++ * ++ * Forces the speed and duplex settings of the PHY. ++ * This is a function pointer entry point only called by ++ * PHY setup routines. ++ **/ ++static s32 e1000_phy_force_speed_duplex_ich8lan(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u16 data; ++ bool link; ++ ++ DEBUGFUNC("e1000_phy_force_speed_duplex_ich8lan"); ++ ++ if (phy->type != e1000_phy_ife) { ++ ret_val = e1000_phy_force_speed_duplex_igp(hw); ++ goto out; ++ } ++ ++ ret_val = e1000_read_phy_reg(hw, PHY_CONTROL, &data); ++ if (ret_val) ++ goto out; ++ ++ e1000_phy_force_speed_duplex_setup(hw, &data); ++ ++ ret_val = e1000_write_phy_reg(hw, PHY_CONTROL, data); ++ if (ret_val) ++ goto out; ++ ++ /* Disable MDI-X support for 10/100 */ ++ ret_val = e1000_read_phy_reg(hw, IFE_PHY_MDIX_CONTROL, &data); ++ if (ret_val) ++ goto out; ++ ++ data &= ~IFE_PMC_AUTO_MDIX; ++ data &= ~IFE_PMC_FORCE_MDIX; ++ ++ ret_val = e1000_write_phy_reg(hw, IFE_PHY_MDIX_CONTROL, data); ++ if (ret_val) ++ goto out; ++ ++ DEBUGOUT1("IFE PMC: %X\n", data); ++ ++ usec_delay(1); ++ ++ if (phy->autoneg_wait_to_complete) { ++ DEBUGOUT("Waiting for forced speed/duplex link on IFE phy.\n"); ++ ++ ret_val = e1000_phy_has_link_generic(hw, ++ PHY_FORCE_LIMIT, ++ 100000, ++ &link); ++ if (ret_val) ++ goto out; ++ ++ if (!link) { ++ DEBUGOUT("Link taking longer than expected.\n"); ++ } ++ ++ /* Try once more */ ++ ret_val = e1000_phy_has_link_generic(hw, ++ PHY_FORCE_LIMIT, ++ 100000, ++ &link); ++ if (ret_val) ++ goto out; ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_phy_hw_reset_ich8lan - Performs a PHY reset ++ * @hw: pointer to the HW structure ++ * ++ * Resets the PHY ++ * This is a function pointer entry point called by drivers ++ * or other shared routines. ++ **/ ++static s32 e1000_phy_hw_reset_ich8lan(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ u32 i, data, cnf_size, cnf_base_addr, sw_cfg_mask; ++ s32 ret_val; ++ u16 loop = E1000_ICH8_LAN_INIT_TIMEOUT; ++ u16 word_addr, reg_data, reg_addr, phy_page = 0; ++ ++ DEBUGFUNC("e1000_phy_hw_reset_ich8lan"); ++ ++ ret_val = e1000_phy_hw_reset_generic(hw); ++ if (ret_val) ++ goto out; ++ ++ /* ++ * Initialize the PHY from the NVM on ICH platforms. This ++ * is needed due to an issue where the NVM configuration is ++ * not properly autoloaded after power transitions. ++ * Therefore, after each PHY reset, we will load the ++ * configuration data out of the NVM manually. ++ */ ++ if (hw->mac.type == e1000_ich8lan && phy->type == e1000_phy_igp_3) { ++ /* Check if SW needs configure the PHY */ ++ if ((hw->device_id == E1000_DEV_ID_ICH8_IGP_M_AMT) || ++ (hw->device_id == E1000_DEV_ID_ICH8_IGP_M)) ++ sw_cfg_mask = E1000_FEXTNVM_SW_CONFIG_ICH8M; ++ else ++ sw_cfg_mask = E1000_FEXTNVM_SW_CONFIG; ++ ++ data = E1000_READ_REG(hw, E1000_FEXTNVM); ++ if (!(data & sw_cfg_mask)) ++ goto out; ++ ++ /* Wait for basic configuration completes before proceeding*/ ++ do { ++ data = E1000_READ_REG(hw, E1000_STATUS); ++ data &= E1000_STATUS_LAN_INIT_DONE; ++ usec_delay(100); ++ } while ((!data) && --loop); ++ ++ /* ++ * If basic configuration is incomplete before the above loop ++ * count reaches 0, loading the configuration from NVM will ++ * leave the PHY in a bad state possibly resulting in no link. ++ */ ++ if (loop == 0) { ++ DEBUGOUT("LAN_INIT_DONE not set, increase timeout\n"); ++ } ++ ++ /* Clear the Init Done bit for the next init event */ ++ data = E1000_READ_REG(hw, E1000_STATUS); ++ data &= ~E1000_STATUS_LAN_INIT_DONE; ++ E1000_WRITE_REG(hw, E1000_STATUS, data); ++ ++ /* ++ * Make sure HW does not configure LCD from PHY ++ * extended configuration before SW configuration ++ */ ++ data = E1000_READ_REG(hw, E1000_EXTCNF_CTRL); ++ if (data & E1000_EXTCNF_CTRL_LCD_WRITE_ENABLE) ++ goto out; ++ ++ cnf_size = E1000_READ_REG(hw, E1000_EXTCNF_SIZE); ++ cnf_size &= E1000_EXTCNF_SIZE_EXT_PCIE_LENGTH_MASK; ++ cnf_size >>= E1000_EXTCNF_SIZE_EXT_PCIE_LENGTH_SHIFT; ++ if (!cnf_size) ++ goto out; ++ ++ cnf_base_addr = data & E1000_EXTCNF_CTRL_EXT_CNF_POINTER_MASK; ++ cnf_base_addr >>= E1000_EXTCNF_CTRL_EXT_CNF_POINTER_SHIFT; ++ ++ /* ++ * Configure LCD from extended configuration ++ * region. ++ */ ++ ++ /* cnf_base_addr is in DWORD */ ++ word_addr = (u16)(cnf_base_addr << 1); ++ ++ for (i = 0; i < cnf_size; i++) { ++ ret_val = e1000_read_nvm(hw, ++ (word_addr + i * 2), ++ 1, ++ ®_data); ++ if (ret_val) ++ goto out; ++ ++ ret_val = e1000_read_nvm(hw, ++ (word_addr + i * 2 + 1), ++ 1, ++ ®_addr); ++ if (ret_val) ++ goto out; ++ ++ /* Save off the PHY page for future writes. */ ++ if (reg_addr == IGP01E1000_PHY_PAGE_SELECT) { ++ phy_page = reg_data; ++ continue; ++ } ++ ++ reg_addr |= phy_page; ++ ++ ret_val = e1000_write_phy_reg(hw, ++ (u32)reg_addr, ++ reg_data); ++ if (ret_val) ++ goto out; ++ } ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_get_phy_info_ich8lan - Calls appropriate PHY type get_phy_info ++ * @hw: pointer to the HW structure ++ * ++ * Wrapper for calling the get_phy_info routines for the appropriate phy type. ++ * This is a function pointer entry point called by drivers ++ * or other shared routines. ++ **/ ++static s32 e1000_get_phy_info_ich8lan(struct e1000_hw *hw) ++{ ++ s32 ret_val = -E1000_ERR_PHY_TYPE; ++ ++ DEBUGFUNC("e1000_get_phy_info_ich8lan"); ++ ++ switch (hw->phy.type) { ++ case e1000_phy_ife: ++ ret_val = e1000_get_phy_info_ife_ich8lan(hw); ++ break; ++ case e1000_phy_igp_3: ++ ret_val = e1000_get_phy_info_igp(hw); ++ break; ++ default: ++ break; ++ } ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_get_phy_info_ife_ich8lan - Retrieves various IFE PHY states ++ * @hw: pointer to the HW structure ++ * ++ * Populates "phy" structure with various feature states. ++ * This function is only called by other family-specific ++ * routines. ++ **/ ++static s32 e1000_get_phy_info_ife_ich8lan(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u16 data; ++ bool link; ++ ++ DEBUGFUNC("e1000_get_phy_info_ife_ich8lan"); ++ ++ ret_val = e1000_phy_has_link_generic(hw, 1, 0, &link); ++ if (ret_val) ++ goto out; ++ ++ if (!link) { ++ DEBUGOUT("Phy info is only valid if link is up\n"); ++ ret_val = -E1000_ERR_CONFIG; ++ goto out; ++ } ++ ++ ret_val = e1000_read_phy_reg(hw, IFE_PHY_SPECIAL_CONTROL, &data); ++ if (ret_val) ++ goto out; ++ phy->polarity_correction = (data & IFE_PSC_AUTO_POLARITY_DISABLE) ++ ? FALSE : TRUE; ++ ++ if (phy->polarity_correction) { ++ ret_val = e1000_check_polarity_ife_ich8lan(hw); ++ if (ret_val) ++ goto out; ++ } else { ++ /* Polarity is forced */ ++ phy->cable_polarity = (data & IFE_PSC_FORCE_POLARITY) ++ ? e1000_rev_polarity_reversed ++ : e1000_rev_polarity_normal; ++ } ++ ++ ret_val = e1000_read_phy_reg(hw, IFE_PHY_MDIX_CONTROL, &data); ++ if (ret_val) ++ goto out; ++ ++ phy->is_mdix = (data & IFE_PMC_MDIX_STATUS) ? TRUE : FALSE; ++ ++ /* The following parameters are undefined for 10/100 operation. */ ++ phy->cable_length = E1000_CABLE_LENGTH_UNDEFINED; ++ phy->local_rx = e1000_1000t_rx_status_undefined; ++ phy->remote_rx = e1000_1000t_rx_status_undefined; ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_check_polarity_ife_ich8lan - Check cable polarity for IFE PHY ++ * @hw: pointer to the HW structure ++ * ++ * Polarity is determined on the polarity reveral feature being enabled. ++ * This function is only called by other family-specific ++ * routines. ++ **/ ++static s32 e1000_check_polarity_ife_ich8lan(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u16 phy_data, offset, mask; ++ ++ DEBUGFUNC("e1000_check_polarity_ife_ich8lan"); ++ ++ /* ++ * Polarity is determined based on the reversal feature ++ * being enabled. ++ */ ++ if (phy->polarity_correction) { ++ offset = IFE_PHY_EXTENDED_STATUS_CONTROL; ++ mask = IFE_PESC_POLARITY_REVERSED; ++ } else { ++ offset = IFE_PHY_SPECIAL_CONTROL; ++ mask = IFE_PSC_FORCE_POLARITY; ++ } ++ ++ ret_val = e1000_read_phy_reg(hw, offset, &phy_data); ++ ++ if (!ret_val) ++ phy->cable_polarity = (phy_data & mask) ++ ? e1000_rev_polarity_reversed ++ : e1000_rev_polarity_normal; ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_set_d0_lplu_state_ich8lan - Set Low Power Linkup D0 state ++ * @hw: pointer to the HW structure ++ * @active: TRUE to enable LPLU, FALSE to disable ++ * ++ * Sets the LPLU D0 state according to the active flag. When ++ * activating LPLU this function also disables smart speed ++ * and vice versa. LPLU will not be activated unless the ++ * device autonegotiation advertisement meets standards of ++ * either 10 or 10/100 or 10/100/1000 at all duplexes. ++ * This is a function pointer entry point only called by ++ * PHY setup routines. ++ **/ ++static s32 e1000_set_d0_lplu_state_ich8lan(struct e1000_hw *hw, ++ bool active) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ u32 phy_ctrl; ++ s32 ret_val = E1000_SUCCESS; ++ u16 data; ++ ++ DEBUGFUNC("e1000_set_d0_lplu_state_ich8lan"); ++ ++ if (phy->type == e1000_phy_ife) ++ goto out; ++ ++ phy_ctrl = E1000_READ_REG(hw, E1000_PHY_CTRL); ++ ++ if (active) { ++ phy_ctrl |= E1000_PHY_CTRL_D0A_LPLU; ++ E1000_WRITE_REG(hw, E1000_PHY_CTRL, phy_ctrl); ++ ++ /* ++ * Call gig speed drop workaround on LPLU before accessing ++ * any PHY registers ++ */ ++ if ((hw->mac.type == e1000_ich8lan) && ++ (hw->phy.type == e1000_phy_igp_3)) ++ e1000_gig_downshift_workaround_ich8lan(hw); ++ ++ /* When LPLU is enabled, we should disable SmartSpeed */ ++ ret_val = e1000_read_phy_reg(hw, ++ IGP01E1000_PHY_PORT_CONFIG, ++ &data); ++ data &= ~IGP01E1000_PSCFR_SMART_SPEED; ++ ret_val = e1000_write_phy_reg(hw, ++ IGP01E1000_PHY_PORT_CONFIG, ++ data); ++ if (ret_val) ++ goto out; ++ } else { ++ phy_ctrl &= ~E1000_PHY_CTRL_D0A_LPLU; ++ E1000_WRITE_REG(hw, E1000_PHY_CTRL, phy_ctrl); ++ ++ /* ++ * LPLU and SmartSpeed are mutually exclusive. LPLU is used ++ * during Dx states where the power conservation is most ++ * important. During driver activity we should enable ++ * SmartSpeed, so performance is maintained. ++ */ ++ if (phy->smart_speed == e1000_smart_speed_on) { ++ ret_val = e1000_read_phy_reg(hw, ++ IGP01E1000_PHY_PORT_CONFIG, ++ &data); ++ if (ret_val) ++ goto out; ++ ++ data |= IGP01E1000_PSCFR_SMART_SPEED; ++ ret_val = e1000_write_phy_reg(hw, ++ IGP01E1000_PHY_PORT_CONFIG, ++ data); ++ if (ret_val) ++ goto out; ++ } else if (phy->smart_speed == e1000_smart_speed_off) { ++ ret_val = e1000_read_phy_reg(hw, ++ IGP01E1000_PHY_PORT_CONFIG, ++ &data); ++ if (ret_val) ++ goto out; ++ ++ data &= ~IGP01E1000_PSCFR_SMART_SPEED; ++ ret_val = e1000_write_phy_reg(hw, ++ IGP01E1000_PHY_PORT_CONFIG, ++ data); ++ if (ret_val) ++ goto out; ++ } ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_set_d3_lplu_state_ich8lan - Set Low Power Linkup D3 state ++ * @hw: pointer to the HW structure ++ * @active: TRUE to enable LPLU, FALSE to disable ++ * ++ * Sets the LPLU D3 state according to the active flag. When ++ * activating LPLU this function also disables smart speed ++ * and vice versa. LPLU will not be activated unless the ++ * device autonegotiation advertisement meets standards of ++ * either 10 or 10/100 or 10/100/1000 at all duplexes. ++ * This is a function pointer entry point only called by ++ * PHY setup routines. ++ **/ ++static s32 e1000_set_d3_lplu_state_ich8lan(struct e1000_hw *hw, ++ bool active) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ u32 phy_ctrl; ++ s32 ret_val = E1000_SUCCESS; ++ u16 data; ++ ++ DEBUGFUNC("e1000_set_d3_lplu_state_ich8lan"); ++ ++ phy_ctrl = E1000_READ_REG(hw, E1000_PHY_CTRL); ++ ++ if (!active) { ++ phy_ctrl &= ~E1000_PHY_CTRL_NOND0A_LPLU; ++ E1000_WRITE_REG(hw, E1000_PHY_CTRL, phy_ctrl); ++ /* ++ * LPLU and SmartSpeed are mutually exclusive. LPLU is used ++ * during Dx states where the power conservation is most ++ * important. During driver activity we should enable ++ * SmartSpeed, so performance is maintained. ++ */ ++ if (phy->smart_speed == e1000_smart_speed_on) { ++ ret_val = e1000_read_phy_reg(hw, ++ IGP01E1000_PHY_PORT_CONFIG, ++ &data); ++ if (ret_val) ++ goto out; ++ ++ data |= IGP01E1000_PSCFR_SMART_SPEED; ++ ret_val = e1000_write_phy_reg(hw, ++ IGP01E1000_PHY_PORT_CONFIG, ++ data); ++ if (ret_val) ++ goto out; ++ } else if (phy->smart_speed == e1000_smart_speed_off) { ++ ret_val = e1000_read_phy_reg(hw, ++ IGP01E1000_PHY_PORT_CONFIG, ++ &data); ++ if (ret_val) ++ goto out; ++ ++ data &= ~IGP01E1000_PSCFR_SMART_SPEED; ++ ret_val = e1000_write_phy_reg(hw, ++ IGP01E1000_PHY_PORT_CONFIG, ++ data); ++ if (ret_val) ++ goto out; ++ } ++ } else if ((phy->autoneg_advertised == E1000_ALL_SPEED_DUPLEX) || ++ (phy->autoneg_advertised == E1000_ALL_NOT_GIG) || ++ (phy->autoneg_advertised == E1000_ALL_10_SPEED)) { ++ phy_ctrl |= E1000_PHY_CTRL_NOND0A_LPLU; ++ E1000_WRITE_REG(hw, E1000_PHY_CTRL, phy_ctrl); ++ ++ /* ++ * Call gig speed drop workaround on LPLU before accessing ++ * any PHY registers ++ */ ++ if ((hw->mac.type == e1000_ich8lan) && ++ (hw->phy.type == e1000_phy_igp_3)) ++ e1000_gig_downshift_workaround_ich8lan(hw); ++ ++ /* When LPLU is enabled, we should disable SmartSpeed */ ++ ret_val = e1000_read_phy_reg(hw, ++ IGP01E1000_PHY_PORT_CONFIG, ++ &data); ++ if (ret_val) ++ goto out; ++ ++ data &= ~IGP01E1000_PSCFR_SMART_SPEED; ++ ret_val = e1000_write_phy_reg(hw, ++ IGP01E1000_PHY_PORT_CONFIG, ++ data); ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_valid_nvm_bank_detect_ich8lan - finds out the valid bank 0 or 1 ++ * @hw: pointer to the HW structure ++ * @bank: pointer to the variable that returns the active bank ++ * ++ * Reads signature byte from the NVM using the flash access registers. ++ **/ ++static s32 e1000_valid_nvm_bank_detect_ich8lan(struct e1000_hw *hw, u32 *bank) ++{ ++ s32 ret_val = E1000_SUCCESS; ++ if (E1000_READ_REG(hw, E1000_EECD) & E1000_EECD_SEC1VAL) ++ *bank = 1; ++ else ++ *bank = 0; ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_read_nvm_ich8lan - Read word(s) from the NVM ++ * @hw: pointer to the HW structure ++ * @offset: The offset (in bytes) of the word(s) to read. ++ * @words: Size of data to read in words ++ * @data: Pointer to the word(s) to read at offset. ++ * ++ * Reads a word(s) from the NVM using the flash access registers. ++ **/ ++static s32 e1000_read_nvm_ich8lan(struct e1000_hw *hw, u16 offset, u16 words, ++ u16 *data) ++{ ++ struct e1000_nvm_info *nvm = &hw->nvm; ++ struct e1000_dev_spec_ich8lan *dev_spec; ++ u32 act_offset; ++ s32 ret_val = E1000_SUCCESS; ++ u32 bank = 0; ++ u16 i, word; ++ ++ DEBUGFUNC("e1000_read_nvm_ich8lan"); ++ ++ dev_spec = (struct e1000_dev_spec_ich8lan *)hw->dev_spec; ++ ++ if (!dev_spec) { ++ DEBUGOUT("dev_spec pointer is set to NULL.\n"); ++ ret_val = -E1000_ERR_CONFIG; ++ goto out; ++ } ++ ++ if ((offset >= nvm->word_size) || (words > nvm->word_size - offset) || ++ (words == 0)) { ++ DEBUGOUT("nvm parameter(s) out of bounds\n"); ++ ret_val = -E1000_ERR_NVM; ++ goto out; ++ } ++ ++ ret_val = e1000_acquire_nvm(hw); ++ if (ret_val) ++ goto out; ++ ++ ret_val = e1000_valid_nvm_bank_detect_ich8lan(hw, &bank); ++ if (ret_val != E1000_SUCCESS) ++ goto out; ++ ++ act_offset = (bank) ? nvm->flash_bank_size : 0; ++ act_offset += offset; ++ ++ for (i = 0; i < words; i++) { ++ if ((dev_spec->shadow_ram) && ++ (dev_spec->shadow_ram[offset+i].modified)) { ++ data[i] = dev_spec->shadow_ram[offset+i].value; ++ } else { ++ ret_val = e1000_read_flash_word_ich8lan(hw, ++ act_offset + i, ++ &word); ++ if (ret_val) ++ break; ++ data[i] = word; ++ } ++ } ++ ++ e1000_release_nvm(hw); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_flash_cycle_init_ich8lan - Initialize flash ++ * @hw: pointer to the HW structure ++ * ++ * This function does initial flash setup so that a new read/write/erase cycle ++ * can be started. ++ **/ ++static s32 e1000_flash_cycle_init_ich8lan(struct e1000_hw *hw) ++{ ++ union ich8_hws_flash_status hsfsts; ++ s32 ret_val = -E1000_ERR_NVM; ++ s32 i = 0; ++ ++ DEBUGFUNC("e1000_flash_cycle_init_ich8lan"); ++ ++ hsfsts.regval = E1000_READ_FLASH_REG16(hw, ICH_FLASH_HSFSTS); ++ ++ /* Check if the flash descriptor is valid */ ++ if (hsfsts.hsf_status.fldesvalid == 0) { ++ DEBUGOUT("Flash descriptor invalid. " ++ "SW Sequencing must be used."); ++ goto out; ++ } ++ ++ /* Clear FCERR and DAEL in hw status by writing 1 */ ++ hsfsts.hsf_status.flcerr = 1; ++ hsfsts.hsf_status.dael = 1; ++ ++ E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFSTS, hsfsts.regval); ++ ++ /* ++ * Either we should have a hardware SPI cycle in progress ++ * bit to check against, in order to start a new cycle or ++ * FDONE bit should be changed in the hardware so that it ++ * is 1 after harware reset, which can then be used as an ++ * indication whether a cycle is in progress or has been ++ * completed. ++ */ ++ ++ if (hsfsts.hsf_status.flcinprog == 0) { ++ /* ++ * There is no cycle running at present, ++ * so we can start a cycle. ++ * Begin by setting Flash Cycle Done. ++ */ ++ hsfsts.hsf_status.flcdone = 1; ++ E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFSTS, hsfsts.regval); ++ ret_val = E1000_SUCCESS; ++ } else { ++ /* ++ * Otherwise poll for sometime so the current ++ * cycle has a chance to end before giving up. ++ */ ++ for (i = 0; i < ICH_FLASH_READ_COMMAND_TIMEOUT; i++) { ++ hsfsts.regval = E1000_READ_FLASH_REG16(hw, ++ ICH_FLASH_HSFSTS); ++ if (hsfsts.hsf_status.flcinprog == 0) { ++ ret_val = E1000_SUCCESS; ++ break; ++ } ++ usec_delay(1); ++ } ++ if (ret_val == E1000_SUCCESS) { ++ /* ++ * Successful in waiting for previous cycle to timeout, ++ * now set the Flash Cycle Done. ++ */ ++ hsfsts.hsf_status.flcdone = 1; ++ E1000_WRITE_FLASH_REG16(hw, ++ ICH_FLASH_HSFSTS, ++ hsfsts.regval); ++ } else { ++ DEBUGOUT("Flash controller busy, cannot get access"); ++ } ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_flash_cycle_ich8lan - Starts flash cycle (read/write/erase) ++ * @hw: pointer to the HW structure ++ * @timeout: maximum time to wait for completion ++ * ++ * This function starts a flash cycle and waits for its completion. ++ **/ ++static s32 e1000_flash_cycle_ich8lan(struct e1000_hw *hw, u32 timeout) ++{ ++ union ich8_hws_flash_ctrl hsflctl; ++ union ich8_hws_flash_status hsfsts; ++ s32 ret_val = -E1000_ERR_NVM; ++ u32 i = 0; ++ ++ DEBUGFUNC("e1000_flash_cycle_ich8lan"); ++ ++ /* Start a cycle by writing 1 in Flash Cycle Go in Hw Flash Control */ ++ hsflctl.regval = E1000_READ_FLASH_REG16(hw, ICH_FLASH_HSFCTL); ++ hsflctl.hsf_ctrl.flcgo = 1; ++ E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFCTL, hsflctl.regval); ++ ++ /* wait till FDONE bit is set to 1 */ ++ do { ++ hsfsts.regval = E1000_READ_FLASH_REG16(hw, ICH_FLASH_HSFSTS); ++ if (hsfsts.hsf_status.flcdone == 1) ++ break; ++ usec_delay(1); ++ } while (i++ < timeout); ++ ++ if (hsfsts.hsf_status.flcdone == 1 && hsfsts.hsf_status.flcerr == 0) ++ ret_val = E1000_SUCCESS; ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_read_flash_word_ich8lan - Read word from flash ++ * @hw: pointer to the HW structure ++ * @offset: offset to data location ++ * @data: pointer to the location for storing the data ++ * ++ * Reads the flash word at offset into data. Offset is converted ++ * to bytes before read. ++ **/ ++static s32 e1000_read_flash_word_ich8lan(struct e1000_hw *hw, u32 offset, ++ u16 *data) ++{ ++ s32 ret_val; ++ ++ DEBUGFUNC("e1000_read_flash_word_ich8lan"); ++ ++ if (!data) { ++ ret_val = -E1000_ERR_NVM; ++ goto out; ++ } ++ ++ /* Must convert offset into bytes. */ ++ offset <<= 1; ++ ++ ret_val = e1000_read_flash_data_ich8lan(hw, offset, 2, data); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_read_flash_data_ich8lan - Read byte or word from NVM ++ * @hw: pointer to the HW structure ++ * @offset: The offset (in bytes) of the byte or word to read. ++ * @size: Size of data to read, 1=byte 2=word ++ * @data: Pointer to the word to store the value read. ++ * ++ * Reads a byte or word from the NVM using the flash access registers. ++ **/ ++static s32 e1000_read_flash_data_ich8lan(struct e1000_hw *hw, u32 offset, ++ u8 size, u16* data) ++{ ++ union ich8_hws_flash_status hsfsts; ++ union ich8_hws_flash_ctrl hsflctl; ++ u32 flash_linear_addr; ++ u32 flash_data = 0; ++ s32 ret_val = -E1000_ERR_NVM; ++ u8 count = 0; ++ ++ DEBUGFUNC("e1000_read_flash_data_ich8lan"); ++ ++ if (size < 1 || size > 2 || offset > ICH_FLASH_LINEAR_ADDR_MASK) ++ goto out; ++ ++ flash_linear_addr = (ICH_FLASH_LINEAR_ADDR_MASK & offset) + ++ hw->nvm.flash_base_addr; ++ ++ do { ++ usec_delay(1); ++ /* Steps */ ++ ret_val = e1000_flash_cycle_init_ich8lan(hw); ++ if (ret_val != E1000_SUCCESS) ++ break; ++ ++ hsflctl.regval = E1000_READ_FLASH_REG16(hw, ICH_FLASH_HSFCTL); ++ /* 0b/1b corresponds to 1 or 2 byte size, respectively. */ ++ hsflctl.hsf_ctrl.fldbcount = size - 1; ++ hsflctl.hsf_ctrl.flcycle = ICH_CYCLE_READ; ++ E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFCTL, hsflctl.regval); ++ ++ E1000_WRITE_FLASH_REG(hw, ICH_FLASH_FADDR, flash_linear_addr); ++ ++ ret_val = e1000_flash_cycle_ich8lan(hw, ++ ICH_FLASH_READ_COMMAND_TIMEOUT); ++ ++ /* ++ * Check if FCERR is set to 1, if set to 1, clear it ++ * and try the whole sequence a few more times, else ++ * read in (shift in) the Flash Data0, the order is ++ * least significant byte first msb to lsb ++ */ ++ if (ret_val == E1000_SUCCESS) { ++ flash_data = E1000_READ_FLASH_REG(hw, ICH_FLASH_FDATA0); ++ if (size == 1) { ++ *data = (u8)(flash_data & 0x000000FF); ++ } else if (size == 2) { ++ *data = (u16)(flash_data & 0x0000FFFF); ++ } ++ break; ++ } else { ++ /* ++ * If we've gotten here, then things are probably ++ * completely hosed, but if the error condition is ++ * detected, it won't hurt to give it another try... ++ * ICH_FLASH_CYCLE_REPEAT_COUNT times. ++ */ ++ hsfsts.regval = E1000_READ_FLASH_REG16(hw, ++ ICH_FLASH_HSFSTS); ++ if (hsfsts.hsf_status.flcerr == 1) { ++ /* Repeat for some time before giving up. */ ++ continue; ++ } else if (hsfsts.hsf_status.flcdone == 0) { ++ DEBUGOUT("Timeout error - flash cycle " ++ "did not complete."); ++ break; ++ } ++ } ++ } while (count++ < ICH_FLASH_CYCLE_REPEAT_COUNT); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_write_nvm_ich8lan - Write word(s) to the NVM ++ * @hw: pointer to the HW structure ++ * @offset: The offset (in bytes) of the word(s) to write. ++ * @words: Size of data to write in words ++ * @data: Pointer to the word(s) to write at offset. ++ * ++ * Writes a byte or word to the NVM using the flash access registers. ++ **/ ++static s32 e1000_write_nvm_ich8lan(struct e1000_hw *hw, u16 offset, u16 words, ++ u16 *data) ++{ ++ struct e1000_nvm_info *nvm = &hw->nvm; ++ struct e1000_dev_spec_ich8lan *dev_spec; ++ s32 ret_val = E1000_SUCCESS; ++ u16 i; ++ ++ DEBUGFUNC("e1000_write_nvm_ich8lan"); ++ ++ dev_spec = (struct e1000_dev_spec_ich8lan *)hw->dev_spec; ++ ++ if (!dev_spec) { ++ DEBUGOUT("dev_spec pointer is set to NULL.\n"); ++ ret_val = -E1000_ERR_CONFIG; ++ goto out; ++ } ++ ++ if ((offset >= nvm->word_size) || (words > nvm->word_size - offset) || ++ (words == 0)) { ++ DEBUGOUT("nvm parameter(s) out of bounds\n"); ++ ret_val = -E1000_ERR_NVM; ++ goto out; ++ } ++ ++ ret_val = e1000_acquire_nvm(hw); ++ if (ret_val) ++ goto out; ++ ++ for (i = 0; i < words; i++) { ++ dev_spec->shadow_ram[offset+i].modified = TRUE; ++ dev_spec->shadow_ram[offset+i].value = data[i]; ++ } ++ ++ e1000_release_nvm(hw); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_update_nvm_checksum_ich8lan - Update the checksum for NVM ++ * @hw: pointer to the HW structure ++ * ++ * The NVM checksum is updated by calling the generic update_nvm_checksum, ++ * which writes the checksum to the shadow ram. The changes in the shadow ++ * ram are then committed to the EEPROM by processing each bank at a time ++ * checking for the modified bit and writing only the pending changes. ++ * After a succesful commit, the shadow ram is cleared and is ready for ++ * future writes. ++ **/ ++static s32 e1000_update_nvm_checksum_ich8lan(struct e1000_hw *hw) ++{ ++ struct e1000_nvm_info *nvm = &hw->nvm; ++ struct e1000_dev_spec_ich8lan *dev_spec; ++ u32 i, act_offset, new_bank_offset, old_bank_offset, bank; ++ s32 ret_val; ++ u16 data; ++ ++ DEBUGFUNC("e1000_update_nvm_checksum_ich8lan"); ++ ++ dev_spec = (struct e1000_dev_spec_ich8lan *)hw->dev_spec; ++ ++ ret_val = e1000_update_nvm_checksum_generic(hw); ++ if (ret_val) ++ goto out; ++ ++ if (nvm->type != e1000_nvm_flash_sw) ++ goto out; ++ ++ ret_val = e1000_acquire_nvm(hw); ++ if (ret_val) ++ goto out; ++ ++ /* ++ * We're writing to the opposite bank so if we're on bank 1, ++ * write to bank 0 etc. We also need to erase the segment that ++ * is going to be written ++ */ ++ ret_val = e1000_valid_nvm_bank_detect_ich8lan(hw, &bank); ++ if (ret_val != E1000_SUCCESS) ++ goto out; ++ ++ if (bank == 0) { ++ new_bank_offset = nvm->flash_bank_size; ++ old_bank_offset = 0; ++ e1000_erase_flash_bank_ich8lan(hw, 1); ++ } else { ++ old_bank_offset = nvm->flash_bank_size; ++ new_bank_offset = 0; ++ e1000_erase_flash_bank_ich8lan(hw, 0); ++ } ++ ++ for (i = 0; i < E1000_SHADOW_RAM_WORDS; i++) { ++ /* ++ * Determine whether to write the value stored ++ * in the other NVM bank or a modified value stored ++ * in the shadow RAM ++ */ ++ if (dev_spec->shadow_ram[i].modified) { ++ data = dev_spec->shadow_ram[i].value; ++ } else { ++ e1000_read_flash_word_ich8lan(hw, ++ i + old_bank_offset, ++ &data); ++ } ++ ++ /* ++ * If the word is 0x13, then make sure the signature bits ++ * (15:14) are 11b until the commit has completed. ++ * This will allow us to write 10b which indicates the ++ * signature is valid. We want to do this after the write ++ * has completed so that we don't mark the segment valid ++ * while the write is still in progress ++ */ ++ if (i == E1000_ICH_NVM_SIG_WORD) ++ data |= E1000_ICH_NVM_SIG_MASK; ++ ++ /* Convert offset to bytes. */ ++ act_offset = (i + new_bank_offset) << 1; ++ ++ usec_delay(100); ++ /* Write the bytes to the new bank. */ ++ ret_val = e1000_retry_write_flash_byte_ich8lan(hw, ++ act_offset, ++ (u8)data); ++ if (ret_val) ++ break; ++ ++ usec_delay(100); ++ ret_val = e1000_retry_write_flash_byte_ich8lan(hw, ++ act_offset + 1, ++ (u8)(data >> 8)); ++ if (ret_val) ++ break; ++ } ++ ++ /* ++ * Don't bother writing the segment valid bits if sector ++ * programming failed. ++ */ ++ if (ret_val) { ++ DEBUGOUT("Flash commit failed.\n"); ++ e1000_release_nvm(hw); ++ goto out; ++ } ++ ++ /* ++ * Finally validate the new segment by setting bit 15:14 ++ * to 10b in word 0x13 , this can be done without an ++ * erase as well since these bits are 11 to start with ++ * and we need to change bit 14 to 0b ++ */ ++ act_offset = new_bank_offset + E1000_ICH_NVM_SIG_WORD; ++ e1000_read_flash_word_ich8lan(hw, act_offset, &data); ++ data &= 0xBFFF; ++ ret_val = e1000_retry_write_flash_byte_ich8lan(hw, ++ act_offset * 2 + 1, ++ (u8)(data >> 8)); ++ if (ret_val) { ++ e1000_release_nvm(hw); ++ goto out; ++ } ++ ++ /* ++ * And invalidate the previously valid segment by setting ++ * its signature word (0x13) high_byte to 0b. This can be ++ * done without an erase because flash erase sets all bits ++ * to 1's. We can write 1's to 0's without an erase ++ */ ++ act_offset = (old_bank_offset + E1000_ICH_NVM_SIG_WORD) * 2 + 1; ++ ret_val = e1000_retry_write_flash_byte_ich8lan(hw, act_offset, 0); ++ if (ret_val) { ++ e1000_release_nvm(hw); ++ goto out; ++ } ++ ++ /* Great! Everything worked, we can now clear the cached entries. */ ++ for (i = 0; i < E1000_SHADOW_RAM_WORDS; i++) { ++ dev_spec->shadow_ram[i].modified = FALSE; ++ dev_spec->shadow_ram[i].value = 0xFFFF; ++ } ++ ++ e1000_release_nvm(hw); ++ ++ /* ++ * Reload the EEPROM, or else modifications will not appear ++ * until after the next adapter reset. ++ */ ++ e1000_reload_nvm(hw); ++ msec_delay(10); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_validate_nvm_checksum_ich8lan - Validate EEPROM checksum ++ * @hw: pointer to the HW structure ++ * ++ * Check to see if checksum needs to be fixed by reading bit 6 in word 0x19. ++ * If the bit is 0, that the EEPROM had been modified, but the checksum was ++ * not calculated, in which case we need to calculate the checksum and set ++ * bit 6. ++ **/ ++static s32 e1000_validate_nvm_checksum_ich8lan(struct e1000_hw *hw) ++{ ++ s32 ret_val = E1000_SUCCESS; ++ u16 data; ++ ++ DEBUGFUNC("e1000_validate_nvm_checksum_ich8lan"); ++ ++ /* ++ * Read 0x19 and check bit 6. If this bit is 0, the checksum ++ * needs to be fixed. This bit is an indication that the NVM ++ * was prepared by OEM software and did not calculate the ++ * checksum...a likely scenario. ++ */ ++ ret_val = e1000_read_nvm(hw, 0x19, 1, &data); ++ if (ret_val) ++ goto out; ++ ++ if ((data & 0x40) == 0) { ++ data |= 0x40; ++ ret_val = e1000_write_nvm(hw, 0x19, 1, &data); ++ if (ret_val) ++ goto out; ++ ret_val = e1000_update_nvm_checksum(hw); ++ if (ret_val) ++ goto out; ++ } ++ ++ ret_val = e1000_validate_nvm_checksum_generic(hw); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_write_flash_data_ich8lan - Writes bytes to the NVM ++ * @hw: pointer to the HW structure ++ * @offset: The offset (in bytes) of the byte/word to read. ++ * @size: Size of data to read, 1=byte 2=word ++ * @data: The byte(s) to write to the NVM. ++ * ++ * Writes one/two bytes to the NVM using the flash access registers. ++ **/ ++static s32 e1000_write_flash_data_ich8lan(struct e1000_hw *hw, u32 offset, ++ u8 size, u16 data) ++{ ++ union ich8_hws_flash_status hsfsts; ++ union ich8_hws_flash_ctrl hsflctl; ++ u32 flash_linear_addr; ++ u32 flash_data = 0; ++ s32 ret_val = -E1000_ERR_NVM; ++ u8 count = 0; ++ ++ DEBUGFUNC("e1000_write_ich8_data"); ++ ++ if (size < 1 || size > 2 || data > size * 0xff || ++ offset > ICH_FLASH_LINEAR_ADDR_MASK) ++ goto out; ++ ++ flash_linear_addr = (ICH_FLASH_LINEAR_ADDR_MASK & offset) + ++ hw->nvm.flash_base_addr; ++ ++ do { ++ usec_delay(1); ++ /* Steps */ ++ ret_val = e1000_flash_cycle_init_ich8lan(hw); ++ if (ret_val != E1000_SUCCESS) ++ break; ++ ++ hsflctl.regval = E1000_READ_FLASH_REG16(hw, ICH_FLASH_HSFCTL); ++ /* 0b/1b corresponds to 1 or 2 byte size, respectively. */ ++ hsflctl.hsf_ctrl.fldbcount = size -1; ++ hsflctl.hsf_ctrl.flcycle = ICH_CYCLE_WRITE; ++ E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFCTL, hsflctl.regval); ++ ++ E1000_WRITE_FLASH_REG(hw, ICH_FLASH_FADDR, flash_linear_addr); ++ ++ if (size == 1) ++ flash_data = (u32)data & 0x00FF; ++ else ++ flash_data = (u32)data; ++ ++ E1000_WRITE_FLASH_REG(hw, ICH_FLASH_FDATA0, flash_data); ++ ++ /* ++ * check if FCERR is set to 1 , if set to 1, clear it ++ * and try the whole sequence a few more times else done ++ */ ++ ret_val = e1000_flash_cycle_ich8lan(hw, ++ ICH_FLASH_WRITE_COMMAND_TIMEOUT); ++ if (ret_val == E1000_SUCCESS) { ++ break; ++ } else { ++ /* ++ * If we're here, then things are most likely ++ * completely hosed, but if the error condition ++ * is detected, it won't hurt to give it another ++ * try...ICH_FLASH_CYCLE_REPEAT_COUNT times. ++ */ ++ hsfsts.regval = E1000_READ_FLASH_REG16(hw, ++ ICH_FLASH_HSFSTS); ++ if (hsfsts.hsf_status.flcerr == 1) { ++ /* Repeat for some time before giving up. */ ++ continue; ++ } else if (hsfsts.hsf_status.flcdone == 0) { ++ DEBUGOUT("Timeout error - flash cycle " ++ "did not complete."); ++ break; ++ } ++ } ++ } while (count++ < ICH_FLASH_CYCLE_REPEAT_COUNT); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_write_flash_byte_ich8lan - Write a single byte to NVM ++ * @hw: pointer to the HW structure ++ * @offset: The index of the byte to read. ++ * @data: The byte to write to the NVM. ++ * ++ * Writes a single byte to the NVM using the flash access registers. ++ **/ ++static s32 e1000_write_flash_byte_ich8lan(struct e1000_hw *hw, u32 offset, ++ u8 data) ++{ ++ u16 word = (u16)data; ++ ++ DEBUGFUNC("e1000_write_flash_byte_ich8lan"); ++ ++ return e1000_write_flash_data_ich8lan(hw, offset, 1, word); ++} ++ ++/** ++ * e1000_retry_write_flash_byte_ich8lan - Writes a single byte to NVM ++ * @hw: pointer to the HW structure ++ * @offset: The offset of the byte to write. ++ * @byte: The byte to write to the NVM. ++ * ++ * Writes a single byte to the NVM using the flash access registers. ++ * Goes through a retry algorithm before giving up. ++ **/ ++static s32 e1000_retry_write_flash_byte_ich8lan(struct e1000_hw *hw, ++ u32 offset, u8 byte) ++{ ++ s32 ret_val; ++ u16 program_retries; ++ ++ DEBUGFUNC("e1000_retry_write_flash_byte_ich8lan"); ++ ++ ret_val = e1000_write_flash_byte_ich8lan(hw, offset, byte); ++ if (ret_val == E1000_SUCCESS) ++ goto out; ++ ++ for (program_retries = 0; program_retries < 100; program_retries++) { ++ DEBUGOUT2("Retrying Byte %2.2X at offset %u\n", byte, offset); ++ usec_delay(100); ++ ret_val = e1000_write_flash_byte_ich8lan(hw, offset, byte); ++ if (ret_val == E1000_SUCCESS) ++ break; ++ } ++ if (program_retries == 100) { ++ ret_val = -E1000_ERR_NVM; ++ goto out; ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_erase_flash_bank_ich8lan - Erase a bank (4k) from NVM ++ * @hw: pointer to the HW structure ++ * @bank: 0 for first bank, 1 for second bank, etc. ++ * ++ * Erases the bank specified. Each bank is a 4k block. Banks are 0 based. ++ * bank N is 4096 * N + flash_reg_addr. ++ **/ ++static s32 e1000_erase_flash_bank_ich8lan(struct e1000_hw *hw, u32 bank) ++{ ++ struct e1000_nvm_info *nvm = &hw->nvm; ++ union ich8_hws_flash_status hsfsts; ++ union ich8_hws_flash_ctrl hsflctl; ++ u32 flash_linear_addr; ++ /* bank size is in 16bit words - adjust to bytes */ ++ u32 flash_bank_size = nvm->flash_bank_size * 2; ++ s32 ret_val = E1000_SUCCESS; ++ s32 count = 0; ++ s32 j, iteration, sector_size; ++ ++ DEBUGFUNC("e1000_erase_flash_bank_ich8lan"); ++ ++ hsfsts.regval = E1000_READ_FLASH_REG16(hw, ICH_FLASH_HSFSTS); ++ ++ /* ++ * Determine HW Sector size: Read BERASE bits of hw flash status ++ * register ++ * 00: The Hw sector is 256 bytes, hence we need to erase 16 ++ * consecutive sectors. The start index for the nth Hw sector ++ * can be calculated as = bank * 4096 + n * 256 ++ * 01: The Hw sector is 4K bytes, hence we need to erase 1 sector. ++ * The start index for the nth Hw sector can be calculated ++ * as = bank * 4096 ++ * 10: The Hw sector is 8K bytes, nth sector = bank * 8192 ++ * (ich9 only, otherwise error condition) ++ * 11: The Hw sector is 64K bytes, nth sector = bank * 65536 ++ */ ++ switch (hsfsts.hsf_status.berasesz) { ++ case 0: ++ /* Hw sector size 256 */ ++ sector_size = ICH_FLASH_SEG_SIZE_256; ++ iteration = flash_bank_size / ICH_FLASH_SEG_SIZE_256; ++ break; ++ case 1: ++ sector_size = ICH_FLASH_SEG_SIZE_4K; ++ iteration = flash_bank_size / ICH_FLASH_SEG_SIZE_4K; ++ break; ++ case 2: ++ if (hw->mac.type == e1000_ich9lan) { ++ sector_size = ICH_FLASH_SEG_SIZE_8K; ++ iteration = flash_bank_size / ICH_FLASH_SEG_SIZE_8K; ++ } else { ++ ret_val = -E1000_ERR_NVM; ++ goto out; ++ } ++ break; ++ case 3: ++ sector_size = ICH_FLASH_SEG_SIZE_64K; ++ iteration = flash_bank_size / ICH_FLASH_SEG_SIZE_64K; ++ break; ++ default: ++ ret_val = -E1000_ERR_NVM; ++ goto out; ++ } ++ ++ /* Start with the base address, then add the sector offset. */ ++ flash_linear_addr = hw->nvm.flash_base_addr; ++ flash_linear_addr += (bank) ? (sector_size * iteration) : 0; ++ ++ for (j = 0; j < iteration ; j++) { ++ do { ++ /* Steps */ ++ ret_val = e1000_flash_cycle_init_ich8lan(hw); ++ if (ret_val) ++ goto out; ++ ++ /* ++ * Write a value 11 (block Erase) in Flash ++ * Cycle field in hw flash control ++ */ ++ hsflctl.regval = E1000_READ_FLASH_REG16(hw, ++ ICH_FLASH_HSFCTL); ++ hsflctl.hsf_ctrl.flcycle = ICH_CYCLE_ERASE; ++ E1000_WRITE_FLASH_REG16(hw, ++ ICH_FLASH_HSFCTL, ++ hsflctl.regval); ++ ++ /* ++ * Write the last 24 bits of an index within the ++ * block into Flash Linear address field in Flash ++ * Address. ++ */ ++ flash_linear_addr += (j * sector_size); ++ E1000_WRITE_FLASH_REG(hw, ++ ICH_FLASH_FADDR, ++ flash_linear_addr); ++ ++ ret_val = e1000_flash_cycle_ich8lan(hw, ++ ICH_FLASH_ERASE_COMMAND_TIMEOUT); ++ if (ret_val == E1000_SUCCESS) { ++ break; ++ } else { ++ /* ++ * Check if FCERR is set to 1. If 1, ++ * clear it and try the whole sequence ++ * a few more times else Done ++ */ ++ hsfsts.regval = E1000_READ_FLASH_REG16(hw, ++ ICH_FLASH_HSFSTS); ++ if (hsfsts.hsf_status.flcerr == 1) { ++ /* ++ * repeat for some time before ++ * giving up ++ */ ++ continue; ++ } else if (hsfsts.hsf_status.flcdone == 0) ++ goto out; ++ } ++ } while (++count < ICH_FLASH_CYCLE_REPEAT_COUNT); ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_valid_led_default_ich8lan - Set the default LED settings ++ * @hw: pointer to the HW structure ++ * @data: Pointer to the LED settings ++ * ++ * Reads the LED default settings from the NVM to data. If the NVM LED ++ * settings is all 0's or F's, set the LED default to a valid LED default ++ * setting. ++ **/ ++static s32 e1000_valid_led_default_ich8lan(struct e1000_hw *hw, u16 *data) ++{ ++ s32 ret_val; ++ ++ DEBUGFUNC("e1000_valid_led_default_ich8lan"); ++ ++ ret_val = e1000_read_nvm(hw, NVM_ID_LED_SETTINGS, 1, data); ++ if (ret_val) { ++ DEBUGOUT("NVM Read Error\n"); ++ goto out; ++ } ++ ++ if (*data == ID_LED_RESERVED_0000 || ++ *data == ID_LED_RESERVED_FFFF) ++ *data = ID_LED_DEFAULT_ICH8LAN; ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_get_bus_info_ich8lan - Get/Set the bus type and width ++ * @hw: pointer to the HW structure ++ * ++ * ICH8 use the PCI Express bus, but does not contain a PCI Express Capability ++ * register, so the the bus width is hard coded. ++ **/ ++static s32 e1000_get_bus_info_ich8lan(struct e1000_hw *hw) ++{ ++ struct e1000_bus_info *bus = &hw->bus; ++ s32 ret_val; ++ ++ DEBUGFUNC("e1000_get_bus_info_ich8lan"); ++ ++ ret_val = e1000_get_bus_info_pcie_generic(hw); ++ ++ /* ++ * ICH devices are "PCI Express"-ish. They have ++ * a configuration space, but do not contain ++ * PCI Express Capability registers, so bus width ++ * must be hardcoded. ++ */ ++ if (bus->width == e1000_bus_width_unknown) ++ bus->width = e1000_bus_width_pcie_x1; ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_reset_hw_ich8lan - Reset the hardware ++ * @hw: pointer to the HW structure ++ * ++ * Does a full reset of the hardware which includes a reset of the PHY and ++ * MAC. ++ **/ ++static s32 e1000_reset_hw_ich8lan(struct e1000_hw *hw) ++{ ++ u32 ctrl, icr, kab; ++ s32 ret_val; ++ ++ DEBUGFUNC("e1000_reset_hw_ich8lan"); ++ ++ /* ++ * Prevent the PCI-E bus from sticking if there is no TLP connection ++ * on the last TLP read/write transaction when MAC is reset. ++ */ ++ ret_val = e1000_disable_pcie_master_generic(hw); ++ if (ret_val) { ++ DEBUGOUT("PCI-E Master disable polling has failed.\n"); ++ } ++ ++ DEBUGOUT("Masking off all interrupts\n"); ++ E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff); ++ ++ /* ++ * Disable the Transmit and Receive units. Then delay to allow ++ * any pending transactions to complete before we hit the MAC ++ * with the global reset. ++ */ ++ E1000_WRITE_REG(hw, E1000_RCTL, 0); ++ E1000_WRITE_REG(hw, E1000_TCTL, E1000_TCTL_PSP); ++ E1000_WRITE_FLUSH(hw); ++ ++ msec_delay(10); ++ ++ /* Workaround for ICH8 bit corruption issue in FIFO memory */ ++ if (hw->mac.type == e1000_ich8lan) { ++ /* Set Tx and Rx buffer allocation to 8k apiece. */ ++ E1000_WRITE_REG(hw, E1000_PBA, E1000_PBA_8K); ++ /* Set Packet Buffer Size to 16k. */ ++ E1000_WRITE_REG(hw, E1000_PBS, E1000_PBS_16K); ++ } ++ ++ ctrl = E1000_READ_REG(hw, E1000_CTRL); ++ ++ if (!e1000_check_reset_block(hw) && !hw->phy.reset_disable) { ++ /* ++ * PHY HW reset requires MAC CORE reset at the same ++ * time to make sure the interface between MAC and the ++ * external PHY is reset. ++ */ ++ ctrl |= E1000_CTRL_PHY_RST; ++ } ++ ret_val = e1000_acquire_swflag_ich8lan(hw); ++ DEBUGOUT("Issuing a global reset to ich8lan"); ++ E1000_WRITE_REG(hw, E1000_CTRL, (ctrl | E1000_CTRL_RST)); ++ msec_delay(20); ++ ++ ret_val = e1000_get_auto_rd_done_generic(hw); ++ if (ret_val) { ++ /* ++ * When auto config read does not complete, do not ++ * return with an error. This can happen in situations ++ * where there is no eeprom and prevents getting link. ++ */ ++ DEBUGOUT("Auto Read Done did not complete\n"); ++ } ++ ++ E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff); ++ icr = E1000_READ_REG(hw, E1000_ICR); ++ ++ kab = E1000_READ_REG(hw, E1000_KABGTXD); ++ kab |= E1000_KABGTXD_BGSQLBIAS; ++ E1000_WRITE_REG(hw, E1000_KABGTXD, kab); ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_init_hw_ich8lan - Initialize the hardware ++ * @hw: pointer to the HW structure ++ * ++ * Prepares the hardware for transmit and receive by doing the following: ++ * - initialize hardware bits ++ * - initialize LED identification ++ * - setup receive address registers ++ * - setup flow control ++ * - setup transmit discriptors ++ * - clear statistics ++ **/ ++static s32 e1000_init_hw_ich8lan(struct e1000_hw *hw) ++{ ++ struct e1000_mac_info *mac = &hw->mac; ++ u32 ctrl_ext, txdctl, snoop; ++ s32 ret_val; ++ u16 i; ++ ++ DEBUGFUNC("e1000_init_hw_ich8lan"); ++ ++ e1000_initialize_hw_bits_ich8lan(hw); ++ ++ /* Initialize identification LED */ ++ ret_val = e1000_id_led_init_generic(hw); ++ if (ret_val) { ++ DEBUGOUT("Error initializing identification LED\n"); ++ /* This is not fatal and we should not stop init due to this */ ++ } ++ ++ /* Setup the receive address. */ ++ e1000_init_rx_addrs_generic(hw, mac->rar_entry_count); ++ ++ /* Zero out the Multicast HASH table */ ++ DEBUGOUT("Zeroing the MTA\n"); ++ for (i = 0; i < mac->mta_reg_count; i++) ++ E1000_WRITE_REG_ARRAY(hw, E1000_MTA, i, 0); ++ ++ /* Setup link and flow control */ ++ ret_val = e1000_setup_link(hw); ++ ++ /* Set the transmit descriptor write-back policy for both queues */ ++ txdctl = E1000_READ_REG(hw, E1000_TXDCTL(0)); ++ txdctl = (txdctl & ~E1000_TXDCTL_WTHRESH) | ++ E1000_TXDCTL_FULL_TX_DESC_WB; ++ txdctl = (txdctl & ~E1000_TXDCTL_PTHRESH) | ++ E1000_TXDCTL_MAX_TX_DESC_PREFETCH; ++ E1000_WRITE_REG(hw, E1000_TXDCTL(0), txdctl); ++ txdctl = E1000_READ_REG(hw, E1000_TXDCTL(1)); ++ txdctl = (txdctl & ~E1000_TXDCTL_WTHRESH) | ++ E1000_TXDCTL_FULL_TX_DESC_WB; ++ txdctl = (txdctl & ~E1000_TXDCTL_PTHRESH) | ++ E1000_TXDCTL_MAX_TX_DESC_PREFETCH; ++ E1000_WRITE_REG(hw, E1000_TXDCTL(1), txdctl); ++ ++ /* ++ * ICH8 has opposite polarity of no_snoop bits. ++ * By default, we should use snoop behavior. ++ */ ++ if (mac->type == e1000_ich8lan) ++ snoop = PCIE_ICH8_SNOOP_ALL; ++ else ++ snoop = (u32)~(PCIE_NO_SNOOP_ALL); ++ e1000_set_pcie_no_snoop_generic(hw, snoop); ++ ++ ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT); ++ ctrl_ext |= E1000_CTRL_EXT_RO_DIS; ++ E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext); ++ ++ /* ++ * Clear all of the statistics registers (clear on read). It is ++ * important that we do this after we have tried to establish link ++ * because the symbol error count will increment wildly if there ++ * is no link. ++ */ ++ e1000_clear_hw_cntrs_ich8lan(hw); ++ ++ return ret_val; ++} ++/** ++ * e1000_initialize_hw_bits_ich8lan - Initialize required hardware bits ++ * @hw: pointer to the HW structure ++ * ++ * Sets/Clears required hardware bits necessary for correctly setting up the ++ * hardware for transmit and receive. ++ **/ ++static void e1000_initialize_hw_bits_ich8lan(struct e1000_hw *hw) ++{ ++ u32 reg; ++ ++ DEBUGFUNC("e1000_initialize_hw_bits_ich8lan"); ++ ++ if (hw->mac.disable_hw_init_bits) ++ goto out; ++ ++ /* Extended Device Control */ ++ reg = E1000_READ_REG(hw, E1000_CTRL_EXT); ++ reg |= (1 << 22); ++ E1000_WRITE_REG(hw, E1000_CTRL_EXT, reg); ++ ++ /* Transmit Descriptor Control 0 */ ++ reg = E1000_READ_REG(hw, E1000_TXDCTL(0)); ++ reg |= (1 << 22); ++ E1000_WRITE_REG(hw, E1000_TXDCTL(0), reg); ++ ++ /* Transmit Descriptor Control 1 */ ++ reg = E1000_READ_REG(hw, E1000_TXDCTL(1)); ++ reg |= (1 << 22); ++ E1000_WRITE_REG(hw, E1000_TXDCTL(1), reg); ++ ++ /* Transmit Arbitration Control 0 */ ++ reg = E1000_READ_REG(hw, E1000_TARC(0)); ++ if (hw->mac.type == e1000_ich8lan) ++ reg |= (1 << 28) | (1 << 29); ++ reg |= (1 << 23) | (1 << 24) | (1 << 26) | (1 << 27); ++ E1000_WRITE_REG(hw, E1000_TARC(0), reg); ++ ++ /* Transmit Arbitration Control 1 */ ++ reg = E1000_READ_REG(hw, E1000_TARC(1)); ++ if (E1000_READ_REG(hw, E1000_TCTL) & E1000_TCTL_MULR) ++ reg &= ~(1 << 28); ++ else ++ reg |= (1 << 28); ++ reg |= (1 << 24) | (1 << 26) | (1 << 30); ++ E1000_WRITE_REG(hw, E1000_TARC(1), reg); ++ ++ /* Device Status */ ++ if (hw->mac.type == e1000_ich8lan) { ++ reg = E1000_READ_REG(hw, E1000_STATUS); ++ reg &= ~(1 << 31); ++ E1000_WRITE_REG(hw, E1000_STATUS, reg); ++ } ++ ++out: ++ return; ++} ++ ++/** ++ * e1000_setup_link_ich8lan - Setup flow control and link settings ++ * @hw: pointer to the HW structure ++ * ++ * Determines which flow control settings to use, then configures flow ++ * control. Calls the appropriate media-specific link configuration ++ * function. Assuming the adapter has a valid link partner, a valid link ++ * should be established. Assumes the hardware has previously been reset ++ * and the transmitter and receiver are not enabled. ++ **/ ++static s32 e1000_setup_link_ich8lan(struct e1000_hw *hw) ++{ ++ struct e1000_functions *func = &hw->func; ++ s32 ret_val = E1000_SUCCESS; ++ ++ DEBUGFUNC("e1000_setup_link_ich8lan"); ++ ++ if (e1000_check_reset_block(hw)) ++ goto out; ++ ++ /* ++ * ICH parts do not have a word in the NVM to determine ++ * the default flow control setting, so we explicitly ++ * set it to full. ++ */ ++ if (hw->fc.type == e1000_fc_default) ++ hw->fc.type = e1000_fc_full; ++ ++ hw->fc.original_type = hw->fc.type; ++ ++ DEBUGOUT1("After fix-ups FlowControl is now = %x\n", hw->fc.type); ++ ++ /* Continue to configure the copper link. */ ++ ret_val = func->setup_physical_interface(hw); ++ if (ret_val) ++ goto out; ++ ++ E1000_WRITE_REG(hw, E1000_FCTTV, hw->fc.pause_time); ++ ++ ret_val = e1000_set_fc_watermarks_generic(hw); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_setup_copper_link_ich8lan - Configure MAC/PHY interface ++ * @hw: pointer to the HW structure ++ * ++ * Configures the kumeran interface to the PHY to wait the appropriate time ++ * when polling the PHY, then call the generic setup_copper_link to finish ++ * configuring the copper link. ++ **/ ++static s32 e1000_setup_copper_link_ich8lan(struct e1000_hw *hw) ++{ ++ u32 ctrl; ++ s32 ret_val; ++ u16 reg_data; ++ ++ DEBUGFUNC("e1000_setup_copper_link_ich8lan"); ++ ++ ctrl = E1000_READ_REG(hw, E1000_CTRL); ++ ctrl |= E1000_CTRL_SLU; ++ ctrl &= ~(E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX); ++ E1000_WRITE_REG(hw, E1000_CTRL, ctrl); ++ ++ /* ++ * Set the mac to wait the maximum time between each iteration ++ * and increase the max iterations when polling the phy; ++ * this fixes erroneous timeouts at 10Mbps. ++ */ ++ ret_val = e1000_write_kmrn_reg(hw, GG82563_REG(0x34, 4), 0xFFFF); ++ if (ret_val) ++ goto out; ++ ret_val = e1000_read_kmrn_reg(hw, GG82563_REG(0x34, 9), ®_data); ++ if (ret_val) ++ goto out; ++ reg_data |= 0x3F; ++ ret_val = e1000_write_kmrn_reg(hw, GG82563_REG(0x34, 9), reg_data); ++ if (ret_val) ++ goto out; ++ ++ if (hw->phy.type == e1000_phy_igp_3) { ++ ret_val = e1000_copper_link_setup_igp(hw); ++ if (ret_val) ++ goto out; ++ } ++ ++ if (hw->phy.type == e1000_phy_ife) { ++ ret_val = e1000_read_phy_reg(hw, IFE_PHY_MDIX_CONTROL, ®_data); ++ if (ret_val) ++ goto out; ++ ++ reg_data &= ~IFE_PMC_AUTO_MDIX; ++ ++ switch (hw->phy.mdix) { ++ case 1: ++ reg_data &= ~IFE_PMC_FORCE_MDIX; ++ break; ++ case 2: ++ reg_data |= IFE_PMC_FORCE_MDIX; ++ break; ++ case 0: ++ default: ++ reg_data |= IFE_PMC_AUTO_MDIX; ++ break; ++ } ++ ret_val = e1000_write_phy_reg(hw, IFE_PHY_MDIX_CONTROL, reg_data); ++ if (ret_val) ++ goto out; ++ } ++ ret_val = e1000_setup_copper_link_generic(hw); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_get_link_up_info_ich8lan - Get current link speed and duplex ++ * @hw: pointer to the HW structure ++ * @speed: pointer to store current link speed ++ * @duplex: pointer to store the current link duplex ++ * ++ * Calls the generic get_speed_and_duplex to retreive the current link ++ * information and then calls the Kumeran lock loss workaround for links at ++ * gigabit speeds. ++ **/ ++static s32 e1000_get_link_up_info_ich8lan(struct e1000_hw *hw, u16 *speed, ++ u16 *duplex) ++{ ++ s32 ret_val; ++ ++ DEBUGFUNC("e1000_get_link_up_info_ich8lan"); ++ ++ ret_val = e1000_get_speed_and_duplex_copper_generic(hw, speed, duplex); ++ if (ret_val) ++ goto out; ++ ++ if ((hw->mac.type == e1000_ich8lan) && ++ (hw->phy.type == e1000_phy_igp_3) && ++ (*speed == SPEED_1000)) { ++ ret_val = e1000_kmrn_lock_loss_workaround_ich8lan(hw); ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_kmrn_lock_loss_workaround_ich8lan - Kumeran workaround ++ * @hw: pointer to the HW structure ++ * ++ * Work-around for 82566 Kumeran PCS lock loss: ++ * On link status change (i.e. PCI reset, speed change) and link is up and ++ * speed is gigabit- ++ * 0) if workaround is optionally disabled do nothing ++ * 1) wait 1ms for Kumeran link to come up ++ * 2) check Kumeran Diagnostic register PCS lock loss bit ++ * 3) if not set the link is locked (all is good), otherwise... ++ * 4) reset the PHY ++ * 5) repeat up to 10 times ++ * Note: this is only called for IGP3 copper when speed is 1gb. ++ **/ ++static s32 e1000_kmrn_lock_loss_workaround_ich8lan(struct e1000_hw *hw) ++{ ++ struct e1000_dev_spec_ich8lan *dev_spec; ++ u32 phy_ctrl; ++ s32 ret_val = E1000_SUCCESS; ++ u16 i, data; ++ bool link; ++ ++ DEBUGFUNC("e1000_kmrn_lock_loss_workaround_ich8lan"); ++ ++ dev_spec = (struct e1000_dev_spec_ich8lan *)hw->dev_spec; ++ ++ if (!dev_spec) { ++ DEBUGOUT("dev_spec pointer is set to NULL.\n"); ++ ret_val = -E1000_ERR_CONFIG; ++ goto out; ++ } ++ ++ if (!(dev_spec->kmrn_lock_loss_workaround_enabled)) ++ goto out; ++ ++ /* ++ * Make sure link is up before proceeding. If not just return. ++ * Attempting this while link is negotiating fouled up link ++ * stability ++ */ ++ ret_val = e1000_phy_has_link_generic(hw, 1, 0, &link); ++ if (!link) { ++ ret_val = E1000_SUCCESS; ++ goto out; ++ } ++ ++ for (i = 0; i < 10; i++) { ++ /* read once to clear */ ++ ret_val = e1000_read_phy_reg(hw, IGP3_KMRN_DIAG, &data); ++ if (ret_val) ++ goto out; ++ /* and again to get new status */ ++ ret_val = e1000_read_phy_reg(hw, IGP3_KMRN_DIAG, &data); ++ if (ret_val) ++ goto out; ++ ++ /* check for PCS lock */ ++ if (!(data & IGP3_KMRN_DIAG_PCS_LOCK_LOSS)) { ++ ret_val = E1000_SUCCESS; ++ goto out; ++ } ++ ++ /* Issue PHY reset */ ++ e1000_phy_hw_reset(hw); ++ msec_delay_irq(5); ++ } ++ /* Disable GigE link negotiation */ ++ phy_ctrl = E1000_READ_REG(hw, E1000_PHY_CTRL); ++ phy_ctrl |= (E1000_PHY_CTRL_GBE_DISABLE | ++ E1000_PHY_CTRL_NOND0A_GBE_DISABLE); ++ E1000_WRITE_REG(hw, E1000_PHY_CTRL, phy_ctrl); ++ ++ /* ++ * Call gig speed drop workaround on Giga disable before accessing ++ * any PHY registers ++ */ ++ e1000_gig_downshift_workaround_ich8lan(hw); ++ ++ /* unable to acquire PCS lock */ ++ ret_val = -E1000_ERR_PHY; ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_set_kmrn_lock_loss_workaound_ich8lan - Set Kumeran workaround state ++ * @hw: pointer to the HW structure ++ * @state: boolean value used to set the current Kumaran workaround state ++ * ++ * If ICH8, set the current Kumeran workaround state (enabled - TRUE ++ * /disabled - FALSE). ++ **/ ++void e1000_set_kmrn_lock_loss_workaround_ich8lan(struct e1000_hw *hw, ++ bool state) ++{ ++ struct e1000_dev_spec_ich8lan *dev_spec; ++ ++ DEBUGFUNC("e1000_set_kmrn_lock_loss_workaround_ich8lan"); ++ ++ if (hw->mac.type != e1000_ich8lan) { ++ DEBUGOUT("Workaround applies to ICH8 only.\n"); ++ goto out; ++ } ++ ++ dev_spec = (struct e1000_dev_spec_ich8lan *)hw->dev_spec; ++ ++ if (!dev_spec) { ++ DEBUGOUT("dev_spec pointer is set to NULL.\n"); ++ goto out; ++ } ++ ++ dev_spec->kmrn_lock_loss_workaround_enabled = state; ++ ++out: ++ return; ++} ++ ++/** ++ * e1000_ipg3_phy_powerdown_workaround_ich8lan - Power down workaround on D3 ++ * @hw: pointer to the HW structure ++ * ++ * Workaround for 82566 power-down on D3 entry: ++ * 1) disable gigabit link ++ * 2) write VR power-down enable ++ * 3) read it back ++ * Continue if successful, else issue LCD reset and repeat ++ **/ ++void e1000_igp3_phy_powerdown_workaround_ich8lan(struct e1000_hw *hw) ++{ ++ u32 reg; ++ u16 data; ++ u8 retry = 0; ++ ++ DEBUGFUNC("e1000_igp3_phy_powerdown_workaround_ich8lan"); ++ ++ if (hw->phy.type != e1000_phy_igp_3) ++ goto out; ++ ++ /* Try the workaround twice (if needed) */ ++ do { ++ /* Disable link */ ++ reg = E1000_READ_REG(hw, E1000_PHY_CTRL); ++ reg |= (E1000_PHY_CTRL_GBE_DISABLE | ++ E1000_PHY_CTRL_NOND0A_GBE_DISABLE); ++ E1000_WRITE_REG(hw, E1000_PHY_CTRL, reg); ++ ++ /* ++ * Call gig speed drop workaround on Giga disable before ++ * accessing any PHY registers ++ */ ++ if (hw->mac.type == e1000_ich8lan) ++ e1000_gig_downshift_workaround_ich8lan(hw); ++ ++ /* Write VR power-down enable */ ++ e1000_read_phy_reg(hw, IGP3_VR_CTRL, &data); ++ data &= ~IGP3_VR_CTRL_DEV_POWERDOWN_MODE_MASK; ++ e1000_write_phy_reg(hw, ++ IGP3_VR_CTRL, ++ data | IGP3_VR_CTRL_MODE_SHUTDOWN); ++ ++ /* Read it back and test */ ++ e1000_read_phy_reg(hw, IGP3_VR_CTRL, &data); ++ data &= IGP3_VR_CTRL_DEV_POWERDOWN_MODE_MASK; ++ if ((data == IGP3_VR_CTRL_MODE_SHUTDOWN) || retry) ++ break; ++ ++ /* Issue PHY reset and repeat at most one more time */ ++ reg = E1000_READ_REG(hw, E1000_CTRL); ++ E1000_WRITE_REG(hw, E1000_CTRL, reg | E1000_CTRL_PHY_RST); ++ retry++; ++ } while (retry); ++ ++out: ++ return; ++} ++ ++/** ++ * e1000_gig_downshift_workaround_ich8lan - WoL from S5 stops working ++ * @hw: pointer to the HW structure ++ * ++ * Steps to take when dropping from 1Gb/s (eg. link cable removal (LSC), ++ * LPLU, Giga disable, MDIC PHY reset): ++ * 1) Set Kumeran Near-end loopback ++ * 2) Clear Kumeran Near-end loopback ++ * Should only be called for ICH8[m] devices with IGP_3 Phy. ++ **/ ++void e1000_gig_downshift_workaround_ich8lan(struct e1000_hw *hw) ++{ ++ s32 ret_val = E1000_SUCCESS; ++ u16 reg_data; ++ ++ DEBUGFUNC("e1000_gig_downshift_workaround_ich8lan"); ++ ++ if ((hw->mac.type != e1000_ich8lan) || ++ (hw->phy.type != e1000_phy_igp_3)) ++ goto out; ++ ++ ret_val = e1000_read_kmrn_reg(hw, E1000_KMRNCTRLSTA_DIAG_OFFSET, ++ ®_data); ++ if (ret_val) ++ goto out; ++ reg_data |= E1000_KMRNCTRLSTA_DIAG_NELPBK; ++ ret_val = e1000_write_kmrn_reg(hw, E1000_KMRNCTRLSTA_DIAG_OFFSET, ++ reg_data); ++ if (ret_val) ++ goto out; ++ reg_data &= ~E1000_KMRNCTRLSTA_DIAG_NELPBK; ++ ret_val = e1000_write_kmrn_reg(hw, E1000_KMRNCTRLSTA_DIAG_OFFSET, ++ reg_data); ++out: ++ return; ++} ++ ++/** ++ * e1000_cleanup_led_ich8lan - Restore the default LED operation ++ * @hw: pointer to the HW structure ++ * ++ * Return the LED back to the default configuration. ++ **/ ++static s32 e1000_cleanup_led_ich8lan(struct e1000_hw *hw) ++{ ++ s32 ret_val = E1000_SUCCESS; ++ ++ DEBUGFUNC("e1000_cleanup_led_ich8lan"); ++ ++ if (hw->phy.type == e1000_phy_ife) ++ ret_val = e1000_write_phy_reg(hw, ++ IFE_PHY_SPECIAL_CONTROL_LED, ++ 0); ++ else ++ E1000_WRITE_REG(hw, E1000_LEDCTL, hw->mac.ledctl_default); ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_led_on_ich8lan - Turn LED's on ++ * @hw: pointer to the HW structure ++ * ++ * Turn on the LED's. ++ **/ ++static s32 e1000_led_on_ich8lan(struct e1000_hw *hw) ++{ ++ s32 ret_val = E1000_SUCCESS; ++ ++ DEBUGFUNC("e1000_led_on_ich8lan"); ++ ++ if (hw->phy.type == e1000_phy_ife) ++ ret_val = e1000_write_phy_reg(hw, ++ IFE_PHY_SPECIAL_CONTROL_LED, ++ (IFE_PSCL_PROBE_MODE | IFE_PSCL_PROBE_LEDS_ON)); ++ else ++ E1000_WRITE_REG(hw, E1000_LEDCTL, hw->mac.ledctl_mode2); ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_led_off_ich8lan - Turn LED's off ++ * @hw: pointer to the HW structure ++ * ++ * Turn off the LED's. ++ **/ ++static s32 e1000_led_off_ich8lan(struct e1000_hw *hw) ++{ ++ s32 ret_val = E1000_SUCCESS; ++ ++ DEBUGFUNC("e1000_led_off_ich8lan"); ++ ++ if (hw->phy.type == e1000_phy_ife) ++ ret_val = e1000_write_phy_reg(hw, ++ IFE_PHY_SPECIAL_CONTROL_LED, ++ (IFE_PSCL_PROBE_MODE | IFE_PSCL_PROBE_LEDS_OFF)); ++ else ++ E1000_WRITE_REG(hw, E1000_LEDCTL, hw->mac.ledctl_mode1); ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_get_cfg_done_ich8lan - Read config done bit ++ * @hw: pointer to the HW structure ++ * ++ * Read the management control register for the config done bit for ++ * completion status. NOTE: silicon which is EEPROM-less will fail trying ++ * to read the config done bit, so an error is *ONLY* logged and returns ++ * E1000_SUCCESS. If we were to return with error, EEPROM-less silicon ++ * would not be able to be reset or change link. ++ **/ ++static s32 e1000_get_cfg_done_ich8lan(struct e1000_hw *hw) ++{ ++ s32 ret_val = E1000_SUCCESS; ++ ++ e1000_get_cfg_done_generic(hw); ++ ++ /* If EEPROM is not marked present, init the IGP 3 PHY manually */ ++ if (((E1000_READ_REG(hw, E1000_EECD) & E1000_EECD_PRES) == 0) && ++ (hw->phy.type == e1000_phy_igp_3)) { ++ e1000_phy_init_script_igp3(hw); ++ } ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_power_down_phy_copper_ich8lan - Remove link during PHY power down ++ * @hw: pointer to the HW structure ++ * ++ * In the case of a PHY power down to save power, or to turn off link during a ++ * driver unload, or wake on lan is not enabled, remove the link. ++ **/ ++static void e1000_power_down_phy_copper_ich8lan(struct e1000_hw *hw) ++{ ++ /* If the management interface is not enabled, then power down */ ++ if (!(e1000_check_mng_mode(hw) || e1000_check_reset_block(hw))) ++ e1000_power_down_phy_copper(hw); ++ ++ return; ++} ++ ++/** ++ * e1000_clear_hw_cntrs_ich8lan - Clear statistical counters ++ * @hw: pointer to the HW structure ++ * ++ * Clears hardware counters specific to the silicon family and calls ++ * clear_hw_cntrs_generic to clear all general purpose counters. ++ **/ ++static void e1000_clear_hw_cntrs_ich8lan(struct e1000_hw *hw) ++{ ++ volatile u32 temp; ++ ++ DEBUGFUNC("e1000_clear_hw_cntrs_ich8lan"); ++ ++ e1000_clear_hw_cntrs_base_generic(hw); ++ ++ temp = E1000_READ_REG(hw, E1000_ALGNERRC); ++ temp = E1000_READ_REG(hw, E1000_RXERRC); ++ temp = E1000_READ_REG(hw, E1000_TNCRS); ++ temp = E1000_READ_REG(hw, E1000_CEXTERR); ++ temp = E1000_READ_REG(hw, E1000_TSCTC); ++ temp = E1000_READ_REG(hw, E1000_TSCTFC); ++ ++ temp = E1000_READ_REG(hw, E1000_MGTPRC); ++ temp = E1000_READ_REG(hw, E1000_MGTPDC); ++ temp = E1000_READ_REG(hw, E1000_MGTPTC); ++ ++ temp = E1000_READ_REG(hw, E1000_IAC); ++ temp = E1000_READ_REG(hw, E1000_ICRXOC); ++} ++ +--- linux/drivers/xenomai/net/drivers/experimental/e1000/e1000_mac.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/experimental/e1000/e1000_mac.h 2021-04-07 16:01:27.738633436 +0800 +@@ -0,0 +1,86 @@ ++/******************************************************************************* ++ ++ Intel PRO/1000 Linux driver ++ Copyright(c) 1999 - 2008 Intel Corporation. ++ ++ This program is free software; you can redistribute it and/or modify it ++ under the terms and conditions of the GNU General Public License, ++ version 2, as published by the Free Software Foundation. ++ ++ This program is distributed in the hope it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ You should have received a copy of the GNU General Public License along with ++ this program; if not, write to the Free Software Foundation, Inc., ++ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. ++ ++ The full GNU General Public License is included in this distribution in ++ the file called "COPYING". ++ ++ Contact Information: ++ Linux NICS ++ e1000-devel Mailing List ++ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 ++ ++*******************************************************************************/ ++ ++#ifndef _E1000_MAC_H_ ++#define _E1000_MAC_H_ ++ ++/* ++ * Functions that should not be called directly from drivers but can be used ++ * by other files in this 'shared code' ++ */ ++s32 e1000_blink_led_generic(struct e1000_hw *hw); ++s32 e1000_check_for_copper_link_generic(struct e1000_hw *hw); ++s32 e1000_check_for_fiber_link_generic(struct e1000_hw *hw); ++s32 e1000_check_for_serdes_link_generic(struct e1000_hw *hw); ++s32 e1000_cleanup_led_generic(struct e1000_hw *hw); ++s32 e1000_commit_fc_settings_generic(struct e1000_hw *hw); ++s32 e1000_config_fc_after_link_up_generic(struct e1000_hw *hw); ++s32 e1000_disable_pcie_master_generic(struct e1000_hw *hw); ++s32 e1000_force_mac_fc_generic(struct e1000_hw *hw); ++s32 e1000_get_auto_rd_done_generic(struct e1000_hw *hw); ++s32 e1000_get_bus_info_pci_generic(struct e1000_hw *hw); ++s32 e1000_get_bus_info_pcie_generic(struct e1000_hw *hw); ++s32 e1000_get_hw_semaphore_generic(struct e1000_hw *hw); ++s32 e1000_get_speed_and_duplex_copper_generic(struct e1000_hw *hw, u16 *speed, ++ u16 *duplex); ++s32 e1000_get_speed_and_duplex_fiber_serdes_generic(struct e1000_hw *hw, ++ u16 *speed, u16 *duplex); ++s32 e1000_id_led_init_generic(struct e1000_hw *hw); ++s32 e1000_led_on_generic(struct e1000_hw *hw); ++s32 e1000_led_off_generic(struct e1000_hw *hw); ++void e1000_update_mc_addr_list_generic(struct e1000_hw *hw, ++ u8 *mc_addr_list, u32 mc_addr_count, ++ u32 rar_used_count, u32 rar_count); ++s32 e1000_poll_fiber_serdes_link_generic(struct e1000_hw *hw); ++s32 e1000_set_default_fc_generic(struct e1000_hw *hw); ++s32 e1000_set_fc_watermarks_generic(struct e1000_hw *hw); ++s32 e1000_setup_fiber_serdes_link_generic(struct e1000_hw *hw); ++s32 e1000_setup_led_generic(struct e1000_hw *hw); ++s32 e1000_setup_link_generic(struct e1000_hw *hw); ++s32 e1000_validate_mdi_setting_generic(struct e1000_hw *hw); ++s32 e1000_write_8bit_ctrl_reg_generic(struct e1000_hw *hw, u32 reg, ++ u32 offset, u8 data); ++ ++u32 e1000_hash_mc_addr_generic(struct e1000_hw *hw, u8 *mc_addr); ++ ++void e1000_clear_hw_cntrs_base_generic(struct e1000_hw *hw); ++void e1000_clear_vfta_generic(struct e1000_hw *hw); ++void e1000_config_collision_dist_generic(struct e1000_hw *hw); ++void e1000_init_rx_addrs_generic(struct e1000_hw *hw, u16 rar_count); ++void e1000_mta_set_generic(struct e1000_hw *hw, u32 hash_value); ++void e1000_pcix_mmrbc_workaround_generic(struct e1000_hw *hw); ++void e1000_put_hw_semaphore_generic(struct e1000_hw *hw); ++void e1000_rar_set_generic(struct e1000_hw *hw, u8 *addr, u32 index); ++s32 e1000_check_alt_mac_addr_generic(struct e1000_hw *hw); ++void e1000_remove_device_generic(struct e1000_hw *hw); ++void e1000_reset_adaptive_generic(struct e1000_hw *hw); ++void e1000_set_pcie_no_snoop_generic(struct e1000_hw *hw, u32 no_snoop); ++void e1000_update_adaptive_generic(struct e1000_hw *hw); ++void e1000_write_vfta_generic(struct e1000_hw *hw, u32 offset, u32 value); ++ ++#endif +--- linux/drivers/xenomai/net/drivers/experimental/e1000/e1000.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/experimental/e1000/e1000.h 2021-04-07 16:01:27.734633442 +0800 +@@ -0,0 +1,425 @@ ++/******************************************************************************* ++ ++ Intel PRO/1000 Linux driver ++ Copyright(c) 1999 - 2008 Intel Corporation. ++ ++ This program is free software; you can redistribute it and/or modify it ++ under the terms and conditions of the GNU General Public License, ++ version 2, as published by the Free Software Foundation. ++ ++ This program is distributed in the hope it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ You should have received a copy of the GNU General Public License along with ++ this program; if not, write to the Free Software Foundation, Inc., ++ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. ++ ++ The full GNU General Public License is included in this distribution in ++ the file called "COPYING". ++ ++ Contact Information: ++ Linux NICS ++ e1000-devel Mailing List ++ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 ++ ++*******************************************************************************/ ++ ++ ++/* Linux PRO/1000 Ethernet Driver main header file */ ++ ++#ifndef _E1000_H_ ++#define _E1000_H_ ++ ++#include "kcompat.h" ++ ++#include "e1000_api.h" ++ ++#define BAR_0 0 ++#define BAR_1 1 ++#define BAR_5 5 ++ ++#define INTEL_E1000_ETHERNET_DEVICE(device_id) {\ ++ PCI_DEVICE(PCI_VENDOR_ID_INTEL, device_id)} ++ ++struct e1000_adapter; ++ ++#define E1000_DBG(args...) ++ ++#define E1000_ERR(args...) printk(KERN_ERR "e1000: " args) ++ ++#define PFX "e1000: " ++#define DPRINTK(nlevel, klevel, fmt, args...) \ ++ (void)((NETIF_MSG_##nlevel & adapter->msg_enable) && \ ++ printk(KERN_##klevel PFX "%s: %s: " fmt, adapter->netdev->name, \ ++ __FUNCTION__ , ## args)) ++ ++#define E1000_MAX_INTR 10 ++ ++/* TX/RX descriptor defines */ ++#define E1000_DEFAULT_TXD 256 ++#define E1000_MAX_TXD 256 ++#define E1000_MIN_TXD 80 ++#define E1000_MAX_82544_TXD 4096 ++ ++#define E1000_DEFAULT_RXD 256 ++#define E1000_MAX_RXD 256 ++ ++#define E1000_MIN_RXD 80 ++#define E1000_MAX_82544_RXD 4096 ++ ++#define E1000_MIN_ITR_USECS 10 /* 100000 irq/sec */ ++#define E1000_MAX_ITR_USECS 10000 /* 100 irq/sec */ ++ ++#ifdef CONFIG_E1000_MQ ++#define E1000_MAX_TX_QUEUES 4 ++#endif ++ ++/* this is the size past which hardware will drop packets when setting LPE=0 */ ++#define MAXIMUM_ETHERNET_VLAN_SIZE 1522 ++ ++/* Supported Rx Buffer Sizes */ ++#define E1000_RXBUFFER_128 128 /* Used for packet split */ ++#define E1000_RXBUFFER_256 256 /* Used for packet split */ ++#define E1000_RXBUFFER_512 512 ++#define E1000_RXBUFFER_1024 1024 ++#define E1000_RXBUFFER_2048 2048 ++#define E1000_RXBUFFER_4096 4096 ++#define E1000_RXBUFFER_8192 8192 ++#define E1000_RXBUFFER_16384 16384 ++ ++/* SmartSpeed delimiters */ ++#define E1000_SMARTSPEED_DOWNSHIFT 3 ++#define E1000_SMARTSPEED_MAX 15 ++ ++/* Packet Buffer allocations */ ++#define E1000_PBA_BYTES_SHIFT 0xA ++#define E1000_TX_HEAD_ADDR_SHIFT 7 ++#define E1000_PBA_TX_MASK 0xFFFF0000 ++ ++/* Early Receive defines */ ++#define E1000_ERT_2048 0x100 ++ ++#define E1000_FC_PAUSE_TIME 0x0680 /* 858 usec */ ++ ++/* How many Tx Descriptors do we need to call netif_wake_queue ? */ ++#define E1000_TX_QUEUE_WAKE 16 ++/* How many Rx Buffers do we bundle into one write to the hardware ? */ ++#define E1000_RX_BUFFER_WRITE 16 /* Must be power of 2 */ ++ ++#define AUTO_ALL_MODES 0 ++#define E1000_EEPROM_82544_APM 0x0004 ++#define E1000_EEPROM_APME 0x0400 ++ ++#ifndef E1000_MASTER_SLAVE ++/* Switch to override PHY master/slave setting */ ++#define E1000_MASTER_SLAVE e1000_ms_hw_default ++#endif ++ ++#ifdef NETIF_F_HW_VLAN_TX ++#define E1000_MNG_VLAN_NONE -1 ++#endif ++/* Number of packet split data buffers (not including the header buffer) */ ++#define PS_PAGE_BUFFERS MAX_PS_BUFFERS-1 ++ ++/* wrapper around a pointer to a socket buffer, ++ * so a DMA handle can be stored along with the buffer */ ++struct e1000_buffer { ++ struct rtskb *skb; ++ dma_addr_t dma; ++ unsigned long time_stamp; ++ u16 length; ++ u16 next_to_watch; ++}; ++ ++struct e1000_rx_buffer { ++ struct rtskb *skb; ++ dma_addr_t dma; ++ struct page *page; ++}; ++ ++#ifdef CONFIG_E1000_MQ ++struct e1000_queue_stats { ++ u64 packets; ++ u64 bytes; ++}; ++#endif ++ ++struct e1000_ps_page { struct page *ps_page[PS_PAGE_BUFFERS]; }; ++struct e1000_ps_page_dma { u64 ps_page_dma[PS_PAGE_BUFFERS]; }; ++ ++struct e1000_tx_ring { ++ /* pointer to the descriptor ring memory */ ++ void *desc; ++ /* physical address of the descriptor ring */ ++ dma_addr_t dma; ++ /* length of descriptor ring in bytes */ ++ unsigned int size; ++ /* number of descriptors in the ring */ ++ unsigned int count; ++ /* next descriptor to associate a buffer with */ ++ unsigned int next_to_use; ++ /* next descriptor to check for DD status bit */ ++ unsigned int next_to_clean; ++ /* array of buffer information structs */ ++ struct e1000_buffer *buffer_info; ++ ++#ifdef CONFIG_E1000_MQ ++ /* for tx ring cleanup - needed for multiqueue */ ++ spinlock_t tx_queue_lock; ++#endif ++ rtdm_lock_t tx_lock; ++ u16 tdh; ++ u16 tdt; ++#ifdef CONFIG_E1000_MQ ++ struct e1000_queue_stats tx_stats; ++#endif ++ bool last_tx_tso; ++}; ++ ++struct e1000_rx_ring { ++ struct e1000_adapter *adapter; /* back link */ ++ /* pointer to the descriptor ring memory */ ++ void *desc; ++ /* physical address of the descriptor ring */ ++ dma_addr_t dma; ++ /* length of descriptor ring in bytes */ ++ unsigned int size; ++ /* number of descriptors in the ring */ ++ unsigned int count; ++ /* next descriptor to associate a buffer with */ ++ unsigned int next_to_use; ++ /* next descriptor to check for DD status bit */ ++ unsigned int next_to_clean; ++#ifdef CONFIG_E1000_NAPI ++ struct napi_struct napi; ++#endif ++ /* array of buffer information structs */ ++ struct e1000_rx_buffer *buffer_info; ++ /* arrays of page information for packet split */ ++ struct e1000_ps_page *ps_page; ++ struct e1000_ps_page_dma *ps_page_dma; ++ struct sk_buff *rx_skb_top; ++ ++ /* cpu for rx queue */ ++ int cpu; ++ ++ u16 rdh; ++ u16 rdt; ++#ifdef CONFIG_E1000_MQ ++ struct e1000_queue_stats rx_stats; ++#endif ++}; ++ ++#define E1000_DESC_UNUSED(R) \ ++ ((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \ ++ (R)->next_to_clean - (R)->next_to_use - 1) ++ ++#define E1000_RX_DESC_PS(R, i) \ ++ (&(((union e1000_rx_desc_packet_split *)((R).desc))[i])) ++#define E1000_RX_DESC_EXT(R, i) \ ++ (&(((union e1000_rx_desc_extended *)((R).desc))[i])) ++#define E1000_GET_DESC(R, i, type) (&(((struct type *)((R).desc))[i])) ++#define E1000_RX_DESC(R, i) E1000_GET_DESC(R, i, e1000_rx_desc) ++#define E1000_TX_DESC(R, i) E1000_GET_DESC(R, i, e1000_tx_desc) ++#define E1000_CONTEXT_DESC(R, i) E1000_GET_DESC(R, i, e1000_context_desc) ++ ++#ifdef SIOCGMIIPHY ++/* PHY register snapshot values */ ++struct e1000_phy_regs { ++ u16 bmcr; /* basic mode control register */ ++ u16 bmsr; /* basic mode status register */ ++ u16 advertise; /* auto-negotiation advertisement */ ++ u16 lpa; /* link partner ability register */ ++ u16 expansion; /* auto-negotiation expansion reg */ ++ u16 ctrl1000; /* 1000BASE-T control register */ ++ u16 stat1000; /* 1000BASE-T status register */ ++ u16 estatus; /* extended status register */ ++}; ++#endif ++ ++/* board specific private data structure */ ++ ++struct e1000_adapter { ++#ifdef NETIF_F_HW_VLAN_TX ++ struct vlan_group *vlgrp; ++ u16 mng_vlan_id; ++#endif ++ u32 bd_number; ++ u32 rx_buffer_len; ++ u32 wol; ++ u32 smartspeed; ++ u32 en_mng_pt; ++ u16 link_speed; ++ u16 link_duplex; ++ rtdm_lock_t stats_lock; ++#ifdef CONFIG_E1000_NAPI ++ spinlock_t tx_queue_lock; ++#endif ++ atomic_t irq_sem; ++ unsigned int total_tx_bytes; ++ unsigned int total_tx_packets; ++ unsigned int total_rx_bytes; ++ unsigned int total_rx_packets; ++ /* Interrupt Throttle Rate */ ++ u32 itr; ++ u32 itr_setting; ++ u16 tx_itr; ++ u16 rx_itr; ++ ++ bool fc_autoneg; ++ ++#ifdef ETHTOOL_PHYS_ID ++ struct timer_list blink_timer; ++ unsigned long led_status; ++#endif ++ ++ /* TX */ ++ struct e1000_tx_ring *tx_ring; /* One per active queue */ ++#ifdef CONFIG_E1000_MQ ++ struct e1000_tx_ring **cpu_tx_ring; /* per-cpu */ ++#endif ++ unsigned int restart_queue; ++ unsigned long tx_queue_len; ++ u32 txd_cmd; ++ u32 tx_int_delay; ++ u32 tx_abs_int_delay; ++ u32 gotc; ++ u64 gotc_old; ++ u64 tpt_old; ++ u64 colc_old; ++ u32 tx_timeout_count; ++ u32 tx_fifo_head; ++ u32 tx_head_addr; ++ u32 tx_fifo_size; ++ u8 tx_timeout_factor; ++ atomic_t tx_fifo_stall; ++ bool pcix_82544; ++ bool detect_tx_hung; ++ ++ /* RX */ ++#ifdef CONFIG_E1000_NAPI ++ bool (*clean_rx) (struct e1000_adapter *adapter, ++ struct e1000_rx_ring *rx_ring, ++ int *work_done, int work_to_do); ++#else ++ bool (*clean_rx) (struct e1000_adapter *adapter, ++ struct e1000_rx_ring *rx_ring, ++ nanosecs_abs_t *time_stamp); ++#endif ++ void (*alloc_rx_buf) (struct e1000_adapter *adapter, ++ struct e1000_rx_ring *rx_ring, ++ int cleaned_count); ++ struct e1000_rx_ring *rx_ring; /* One per active queue */ ++#ifdef CONFIG_E1000_NAPI ++ //struct napi_struct napi; ++#endif ++ int num_tx_queues; ++ int num_rx_queues; ++ ++ u64 hw_csum_err; ++ u64 hw_csum_good; ++ u64 rx_hdr_split; ++ u32 alloc_rx_buff_failed; ++ u32 rx_int_delay; ++ u32 rx_abs_int_delay; ++ bool rx_csum; ++ unsigned int rx_ps_pages; ++ u32 gorc; ++ u64 gorc_old; ++ u16 rx_ps_bsize0; ++ u32 max_frame_size; ++ u32 min_frame_size; ++ ++ ++ /* OS defined structs */ ++ struct rtnet_device *netdev; ++ struct pci_dev *pdev; ++ struct net_device_stats net_stats; ++ ++ rtdm_irq_t irq_handle; ++ char data_received; ++ ++ /* structs defined in e1000_hw.h */ ++ struct e1000_hw hw; ++ struct e1000_hw_stats stats; ++ struct e1000_phy_info phy_info; ++ struct e1000_phy_stats phy_stats; ++ ++#ifdef SIOCGMIIPHY ++ /* Snapshot of PHY registers */ ++ struct e1000_phy_regs phy_regs; ++#endif ++ ++#ifdef ETHTOOL_TEST ++ u32 test_icr; ++ struct e1000_tx_ring test_tx_ring; ++ struct e1000_rx_ring test_rx_ring; ++#endif ++ ++ ++ int msg_enable; ++ /* to not mess up cache alignment, always add to the bottom */ ++ unsigned long state; ++ u32 eeprom_wol; ++ ++ u32 *config_space; ++ ++ /* hardware capability, feature, and workaround flags */ ++ unsigned int flags; ++ ++ struct work_struct reset_task; ++ struct delayed_work watchdog_task; ++ struct delayed_work fifo_stall_task; ++ struct delayed_work phy_info_task; ++}; ++ ++#define E1000_FLAG_HAS_SMBUS (1 << 0) ++#define E1000_FLAG_HAS_MANC2H (1 << 1) ++#define E1000_FLAG_HAS_MSI (1 << 2) ++#define E1000_FLAG_MSI_ENABLED (1 << 3) ++#define E1000_FLAG_HAS_INTR_MODERATION (1 << 4) ++#define E1000_FLAG_RX_NEEDS_RESTART (1 << 5) ++#define E1000_FLAG_BAD_TX_CARRIER_STATS_FD (1 << 6) ++#define E1000_FLAG_INT_ASSERT_AUTO_MASK (1 << 7) ++#define E1000_FLAG_QUAD_PORT_A (1 << 8) ++#define E1000_FLAG_SMART_POWER_DOWN (1 << 9) ++#ifdef NETIF_F_TSO ++#define E1000_FLAG_HAS_TSO (1 << 10) ++#ifdef NETIF_F_TSO6 ++#define E1000_FLAG_HAS_TSO6 (1 << 11) ++#endif ++#define E1000_FLAG_TSO_FORCE (1 << 12) ++#endif ++#define E1000_FLAG_RX_RESTART_NOW (1 << 13) ++ ++enum e1000_state_t { ++ __E1000_TESTING, ++ __E1000_RESETTING, ++ __E1000_DOWN ++}; ++ ++extern char e1000_driver_name[]; ++extern const char e1000_driver_version[]; ++ ++extern void e1000_power_up_phy(struct e1000_hw *hw); ++ ++extern void e1000_set_ethtool_ops(struct net_device *netdev); ++extern void e1000_check_options(struct e1000_adapter *adapter); ++ ++extern int e1000_up(struct e1000_adapter *adapter); ++extern void e1000_down(struct e1000_adapter *adapter); ++extern void e1000_reinit_locked(struct e1000_adapter *adapter); ++extern void e1000_reset(struct e1000_adapter *adapter); ++extern int e1000_set_spd_dplx(struct e1000_adapter *adapter, u16 spddplx); ++extern int e1000_setup_all_rx_resources(struct e1000_adapter *adapter); ++extern int e1000_setup_all_tx_resources(struct e1000_adapter *adapter); ++extern void e1000_free_all_rx_resources(struct e1000_adapter *adapter); ++extern void e1000_free_all_tx_resources(struct e1000_adapter *adapter); ++extern void e1000_update_stats(struct e1000_adapter *adapter); ++#ifdef ETHTOOL_OPS_COMPAT ++extern int ethtool_ioctl(struct ifreq *ifr); ++#endif ++ ++#endif /* _E1000_H_ */ +--- linux/drivers/xenomai/net/drivers/experimental/e1000/e1000_hw.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/experimental/e1000/e1000_hw.h 2021-04-07 16:01:27.729633449 +0800 +@@ -0,0 +1,711 @@ ++/******************************************************************************* ++ ++ Intel PRO/1000 Linux driver ++ Copyright(c) 1999 - 2008 Intel Corporation. ++ ++ This program is free software; you can redistribute it and/or modify it ++ under the terms and conditions of the GNU General Public License, ++ version 2, as published by the Free Software Foundation. ++ ++ This program is distributed in the hope it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ You should have received a copy of the GNU General Public License along with ++ this program; if not, write to the Free Software Foundation, Inc., ++ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. ++ ++ The full GNU General Public License is included in this distribution in ++ the file called "COPYING". ++ ++ Contact Information: ++ Linux NICS ++ e1000-devel Mailing List ++ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 ++ ++*******************************************************************************/ ++ ++#ifndef _E1000_HW_H_ ++#define _E1000_HW_H_ ++ ++#include "e1000_osdep.h" ++#include "e1000_regs.h" ++#include "e1000_defines.h" ++ ++struct e1000_hw; ++ ++#define E1000_DEV_ID_82542 0x1000 ++#define E1000_DEV_ID_82543GC_FIBER 0x1001 ++#define E1000_DEV_ID_82543GC_COPPER 0x1004 ++#define E1000_DEV_ID_82544EI_COPPER 0x1008 ++#define E1000_DEV_ID_82544EI_FIBER 0x1009 ++#define E1000_DEV_ID_82544GC_COPPER 0x100C ++#define E1000_DEV_ID_82544GC_LOM 0x100D ++#define E1000_DEV_ID_82540EM 0x100E ++#define E1000_DEV_ID_82540EM_LOM 0x1015 ++#define E1000_DEV_ID_82540EP_LOM 0x1016 ++#define E1000_DEV_ID_82540EP 0x1017 ++#define E1000_DEV_ID_82540EP_LP 0x101E ++#define E1000_DEV_ID_82545EM_COPPER 0x100F ++#define E1000_DEV_ID_82545EM_FIBER 0x1011 ++#define E1000_DEV_ID_82545GM_COPPER 0x1026 ++#define E1000_DEV_ID_82545GM_FIBER 0x1027 ++#define E1000_DEV_ID_82545GM_SERDES 0x1028 ++#define E1000_DEV_ID_82546EB_COPPER 0x1010 ++#define E1000_DEV_ID_82546EB_FIBER 0x1012 ++#define E1000_DEV_ID_82546EB_QUAD_COPPER 0x101D ++#define E1000_DEV_ID_82546GB_COPPER 0x1079 ++#define E1000_DEV_ID_82546GB_FIBER 0x107A ++#define E1000_DEV_ID_82546GB_SERDES 0x107B ++#define E1000_DEV_ID_82546GB_PCIE 0x108A ++#define E1000_DEV_ID_82546GB_QUAD_COPPER 0x1099 ++#define E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3 0x10B5 ++#define E1000_DEV_ID_82541EI 0x1013 ++#define E1000_DEV_ID_82541EI_MOBILE 0x1018 ++#define E1000_DEV_ID_82541ER_LOM 0x1014 ++#define E1000_DEV_ID_82541ER 0x1078 ++#define E1000_DEV_ID_82541GI 0x1076 ++#define E1000_DEV_ID_82541GI_LF 0x107C ++#define E1000_DEV_ID_82541GI_MOBILE 0x1077 ++#define E1000_DEV_ID_82547EI 0x1019 ++#define E1000_DEV_ID_82547EI_MOBILE 0x101A ++#define E1000_DEV_ID_82547GI 0x1075 ++#define E1000_DEV_ID_82571EB_COPPER 0x105E ++#define E1000_DEV_ID_82571EB_FIBER 0x105F ++#define E1000_DEV_ID_82571EB_SERDES 0x1060 ++#define E1000_DEV_ID_82571EB_SERDES_DUAL 0x10D9 ++#define E1000_DEV_ID_82571EB_SERDES_QUAD 0x10DA ++#define E1000_DEV_ID_82571EB_QUAD_COPPER 0x10A4 ++#define E1000_DEV_ID_82571PT_QUAD_COPPER 0x10D5 ++#define E1000_DEV_ID_82571EB_QUAD_FIBER 0x10A5 ++#define E1000_DEV_ID_82571EB_QUAD_COPPER_LP 0x10BC ++#define E1000_DEV_ID_82572EI_COPPER 0x107D ++#define E1000_DEV_ID_82572EI_FIBER 0x107E ++#define E1000_DEV_ID_82572EI_SERDES 0x107F ++#define E1000_DEV_ID_82572EI 0x10B9 ++#define E1000_DEV_ID_82573E 0x108B ++#define E1000_DEV_ID_82573E_IAMT 0x108C ++#define E1000_DEV_ID_82573L 0x109A ++#define E1000_DEV_ID_80003ES2LAN_COPPER_DPT 0x1096 ++#define E1000_DEV_ID_80003ES2LAN_SERDES_DPT 0x1098 ++#define E1000_DEV_ID_80003ES2LAN_COPPER_SPT 0x10BA ++#define E1000_DEV_ID_80003ES2LAN_SERDES_SPT 0x10BB ++#define E1000_DEV_ID_ICH8_IGP_M_AMT 0x1049 ++#define E1000_DEV_ID_ICH8_IGP_AMT 0x104A ++#define E1000_DEV_ID_ICH8_IGP_C 0x104B ++#define E1000_DEV_ID_ICH8_IFE 0x104C ++#define E1000_DEV_ID_ICH8_IFE_GT 0x10C4 ++#define E1000_DEV_ID_ICH8_IFE_G 0x10C5 ++#define E1000_DEV_ID_ICH8_IGP_M 0x104D ++#define E1000_DEV_ID_ICH9_IGP_AMT 0x10BD ++#define E1000_DEV_ID_ICH9_IGP_C 0x294C ++#define E1000_DEV_ID_ICH9_IFE 0x10C0 ++#define E1000_DEV_ID_ICH9_IFE_GT 0x10C3 ++#define E1000_DEV_ID_ICH9_IFE_G 0x10C2 ++ ++#define E1000_REVISION_0 0 ++#define E1000_REVISION_1 1 ++#define E1000_REVISION_2 2 ++#define E1000_REVISION_3 3 ++#define E1000_REVISION_4 4 ++ ++#define E1000_FUNC_0 0 ++#define E1000_FUNC_1 1 ++ ++typedef enum { ++ e1000_undefined = 0, ++ e1000_82542, ++ e1000_82543, ++ e1000_82544, ++ e1000_82540, ++ e1000_82545, ++ e1000_82545_rev_3, ++ e1000_82546, ++ e1000_82546_rev_3, ++ e1000_82541, ++ e1000_82541_rev_2, ++ e1000_82547, ++ e1000_82547_rev_2, ++ e1000_82571, ++ e1000_82572, ++ e1000_82573, ++ e1000_80003es2lan, ++ e1000_ich8lan, ++ e1000_ich9lan, ++ e1000_num_macs /* List is 1-based, so subtract 1 for true count. */ ++} e1000_mac_type; ++ ++typedef enum { ++ e1000_media_type_unknown = 0, ++ e1000_media_type_copper = 1, ++ e1000_media_type_fiber = 2, ++ e1000_media_type_internal_serdes = 3, ++ e1000_num_media_types ++} e1000_media_type; ++ ++typedef enum { ++ e1000_nvm_unknown = 0, ++ e1000_nvm_none, ++ e1000_nvm_eeprom_spi, ++ e1000_nvm_eeprom_microwire, ++ e1000_nvm_flash_hw, ++ e1000_nvm_flash_sw ++} e1000_nvm_type; ++ ++typedef enum { ++ e1000_nvm_override_none = 0, ++ e1000_nvm_override_spi_small, ++ e1000_nvm_override_spi_large, ++ e1000_nvm_override_microwire_small, ++ e1000_nvm_override_microwire_large ++} e1000_nvm_override; ++ ++typedef enum { ++ e1000_phy_unknown = 0, ++ e1000_phy_none, ++ e1000_phy_m88, ++ e1000_phy_igp, ++ e1000_phy_igp_2, ++ e1000_phy_gg82563, ++ e1000_phy_igp_3, ++ e1000_phy_ife, ++} e1000_phy_type; ++ ++typedef enum { ++ e1000_bus_type_unknown = 0, ++ e1000_bus_type_pci, ++ e1000_bus_type_pcix, ++ e1000_bus_type_pci_express, ++ e1000_bus_type_reserved ++} e1000_bus_type; ++ ++typedef enum { ++ e1000_bus_speed_unknown = 0, ++ e1000_bus_speed_33, ++ e1000_bus_speed_66, ++ e1000_bus_speed_100, ++ e1000_bus_speed_120, ++ e1000_bus_speed_133, ++ e1000_bus_speed_2500, ++ e1000_bus_speed_5000, ++ e1000_bus_speed_reserved ++} e1000_bus_speed; ++ ++typedef enum { ++ e1000_bus_width_unknown = 0, ++ e1000_bus_width_pcie_x1, ++ e1000_bus_width_pcie_x2, ++ e1000_bus_width_pcie_x4 = 4, ++ e1000_bus_width_pcie_x8 = 8, ++ e1000_bus_width_32, ++ e1000_bus_width_64, ++ e1000_bus_width_reserved ++} e1000_bus_width; ++ ++typedef enum { ++ e1000_1000t_rx_status_not_ok = 0, ++ e1000_1000t_rx_status_ok, ++ e1000_1000t_rx_status_undefined = 0xFF ++} e1000_1000t_rx_status; ++ ++typedef enum { ++ e1000_rev_polarity_normal = 0, ++ e1000_rev_polarity_reversed, ++ e1000_rev_polarity_undefined = 0xFF ++} e1000_rev_polarity; ++ ++typedef enum { ++ e1000_fc_none = 0, ++ e1000_fc_rx_pause, ++ e1000_fc_tx_pause, ++ e1000_fc_full, ++ e1000_fc_default = 0xFF ++} e1000_fc_type; ++ ++typedef enum { ++ e1000_ffe_config_enabled = 0, ++ e1000_ffe_config_active, ++ e1000_ffe_config_blocked ++} e1000_ffe_config; ++ ++typedef enum { ++ e1000_dsp_config_disabled = 0, ++ e1000_dsp_config_enabled, ++ e1000_dsp_config_activated, ++ e1000_dsp_config_undefined = 0xFF ++} e1000_dsp_config; ++ ++/* Receive Descriptor */ ++struct e1000_rx_desc { ++ u64 buffer_addr; /* Address of the descriptor's data buffer */ ++ u16 length; /* Length of data DMAed into data buffer */ ++ u16 csum; /* Packet checksum */ ++ u8 status; /* Descriptor status */ ++ u8 errors; /* Descriptor Errors */ ++ u16 special; ++}; ++ ++/* Receive Descriptor - Extended */ ++union e1000_rx_desc_extended { ++ struct { ++ u64 buffer_addr; ++ u64 reserved; ++ } read; ++ struct { ++ struct { ++ u32 mrq; /* Multiple Rx Queues */ ++ union { ++ u32 rss; /* RSS Hash */ ++ struct { ++ u16 ip_id; /* IP id */ ++ u16 csum; /* Packet Checksum */ ++ } csum_ip; ++ } hi_dword; ++ } lower; ++ struct { ++ u32 status_error; /* ext status/error */ ++ u16 length; ++ u16 vlan; /* VLAN tag */ ++ } upper; ++ } wb; /* writeback */ ++}; ++ ++#define MAX_PS_BUFFERS 4 ++/* Receive Descriptor - Packet Split */ ++union e1000_rx_desc_packet_split { ++ struct { ++ /* one buffer for protocol header(s), three data buffers */ ++ u64 buffer_addr[MAX_PS_BUFFERS]; ++ } read; ++ struct { ++ struct { ++ u32 mrq; /* Multiple Rx Queues */ ++ union { ++ u32 rss; /* RSS Hash */ ++ struct { ++ u16 ip_id; /* IP id */ ++ u16 csum; /* Packet Checksum */ ++ } csum_ip; ++ } hi_dword; ++ } lower; ++ struct { ++ u32 status_error; /* ext status/error */ ++ u16 length0; /* length of buffer 0 */ ++ u16 vlan; /* VLAN tag */ ++ } middle; ++ struct { ++ u16 header_status; ++ u16 length[3]; /* length of buffers 1-3 */ ++ } upper; ++ u64 reserved; ++ } wb; /* writeback */ ++}; ++ ++/* Transmit Descriptor */ ++struct e1000_tx_desc { ++ u64 buffer_addr; /* Address of the descriptor's data buffer */ ++ union { ++ u32 data; ++ struct { ++ u16 length; /* Data buffer length */ ++ u8 cso; /* Checksum offset */ ++ u8 cmd; /* Descriptor control */ ++ } flags; ++ } lower; ++ union { ++ u32 data; ++ struct { ++ u8 status; /* Descriptor status */ ++ u8 css; /* Checksum start */ ++ u16 special; ++ } fields; ++ } upper; ++}; ++ ++/* Offload Context Descriptor */ ++struct e1000_context_desc { ++ union { ++ u32 ip_config; ++ struct { ++ u8 ipcss; /* IP checksum start */ ++ u8 ipcso; /* IP checksum offset */ ++ u16 ipcse; /* IP checksum end */ ++ } ip_fields; ++ } lower_setup; ++ union { ++ u32 tcp_config; ++ struct { ++ u8 tucss; /* TCP checksum start */ ++ u8 tucso; /* TCP checksum offset */ ++ u16 tucse; /* TCP checksum end */ ++ } tcp_fields; ++ } upper_setup; ++ u32 cmd_and_length; ++ union { ++ u32 data; ++ struct { ++ u8 status; /* Descriptor status */ ++ u8 hdr_len; /* Header length */ ++ u16 mss; /* Maximum segment size */ ++ } fields; ++ } tcp_seg_setup; ++}; ++ ++/* Offload data descriptor */ ++struct e1000_data_desc { ++ u64 buffer_addr; /* Address of the descriptor's buffer address */ ++ union { ++ u32 data; ++ struct { ++ u16 length; /* Data buffer length */ ++ u8 typ_len_ext; ++ u8 cmd; ++ } flags; ++ } lower; ++ union { ++ u32 data; ++ struct { ++ u8 status; /* Descriptor status */ ++ u8 popts; /* Packet Options */ ++ u16 special; ++ } fields; ++ } upper; ++}; ++ ++/* Statistics counters collected by the MAC */ ++struct e1000_hw_stats { ++ u64 crcerrs; ++ u64 algnerrc; ++ u64 symerrs; ++ u64 rxerrc; ++ u64 mpc; ++ u64 scc; ++ u64 ecol; ++ u64 mcc; ++ u64 latecol; ++ u64 colc; ++ u64 dc; ++ u64 tncrs; ++ u64 sec; ++ u64 cexterr; ++ u64 rlec; ++ u64 xonrxc; ++ u64 xontxc; ++ u64 xoffrxc; ++ u64 xofftxc; ++ u64 fcruc; ++ u64 prc64; ++ u64 prc127; ++ u64 prc255; ++ u64 prc511; ++ u64 prc1023; ++ u64 prc1522; ++ u64 gprc; ++ u64 bprc; ++ u64 mprc; ++ u64 gptc; ++ u64 gorc; ++ u64 gotc; ++ u64 rnbc; ++ u64 ruc; ++ u64 rfc; ++ u64 roc; ++ u64 rjc; ++ u64 mgprc; ++ u64 mgpdc; ++ u64 mgptc; ++ u64 tor; ++ u64 tot; ++ u64 tpr; ++ u64 tpt; ++ u64 ptc64; ++ u64 ptc127; ++ u64 ptc255; ++ u64 ptc511; ++ u64 ptc1023; ++ u64 ptc1522; ++ u64 mptc; ++ u64 bptc; ++ u64 tsctc; ++ u64 tsctfc; ++ u64 iac; ++ u64 icrxptc; ++ u64 icrxatc; ++ u64 ictxptc; ++ u64 ictxatc; ++ u64 ictxqec; ++ u64 ictxqmtc; ++ u64 icrxdmtc; ++ u64 icrxoc; ++ u64 cbtmpc; ++ u64 htdpmc; ++ u64 cbrdpc; ++ u64 cbrmpc; ++ u64 rpthc; ++ u64 hgptc; ++ u64 htcbdpc; ++ u64 hgorc; ++ u64 hgotc; ++ u64 lenerrs; ++ u64 scvpc; ++ u64 hrmpc; ++}; ++ ++struct e1000_phy_stats { ++ u32 idle_errors; ++ u32 receive_errors; ++}; ++ ++struct e1000_host_mng_dhcp_cookie { ++ u32 signature; ++ u8 status; ++ u8 reserved0; ++ u16 vlan_id; ++ u32 reserved1; ++ u16 reserved2; ++ u8 reserved3; ++ u8 checksum; ++}; ++ ++/* Host Interface "Rev 1" */ ++struct e1000_host_command_header { ++ u8 command_id; ++ u8 command_length; ++ u8 command_options; ++ u8 checksum; ++}; ++ ++#define E1000_HI_MAX_DATA_LENGTH 252 ++struct e1000_host_command_info { ++ struct e1000_host_command_header command_header; ++ u8 command_data[E1000_HI_MAX_DATA_LENGTH]; ++}; ++ ++/* Host Interface "Rev 2" */ ++struct e1000_host_mng_command_header { ++ u8 command_id; ++ u8 checksum; ++ u16 reserved1; ++ u16 reserved2; ++ u16 command_length; ++}; ++ ++#define E1000_HI_MAX_MNG_DATA_LENGTH 0x6F8 ++struct e1000_host_mng_command_info { ++ struct e1000_host_mng_command_header command_header; ++ u8 command_data[E1000_HI_MAX_MNG_DATA_LENGTH]; ++}; ++ ++#include "e1000_mac.h" ++#include "e1000_phy.h" ++#include "e1000_nvm.h" ++#include "e1000_manage.h" ++ ++struct e1000_functions { ++ /* Function pointers for the MAC. */ ++ s32 (*init_mac_params)(struct e1000_hw *); ++ s32 (*blink_led)(struct e1000_hw *); ++ s32 (*check_for_link)(struct e1000_hw *); ++ bool (*check_mng_mode)(struct e1000_hw *hw); ++ s32 (*cleanup_led)(struct e1000_hw *); ++ void (*clear_hw_cntrs)(struct e1000_hw *); ++ void (*clear_vfta)(struct e1000_hw *); ++ s32 (*get_bus_info)(struct e1000_hw *); ++ s32 (*get_link_up_info)(struct e1000_hw *, u16 *, u16 *); ++ s32 (*led_on)(struct e1000_hw *); ++ s32 (*led_off)(struct e1000_hw *); ++ void (*update_mc_addr_list)(struct e1000_hw *, u8 *, u32, u32, ++ u32); ++ void (*remove_device)(struct e1000_hw *); ++ s32 (*reset_hw)(struct e1000_hw *); ++ s32 (*init_hw)(struct e1000_hw *); ++ s32 (*setup_link)(struct e1000_hw *); ++ s32 (*setup_physical_interface)(struct e1000_hw *); ++ s32 (*setup_led)(struct e1000_hw *); ++ void (*write_vfta)(struct e1000_hw *, u32, u32); ++ void (*mta_set)(struct e1000_hw *, u32); ++ void (*config_collision_dist)(struct e1000_hw*); ++ void (*rar_set)(struct e1000_hw*, u8*, u32); ++ s32 (*read_mac_addr)(struct e1000_hw*); ++ s32 (*validate_mdi_setting)(struct e1000_hw*); ++ s32 (*mng_host_if_write)(struct e1000_hw*, u8*, u16, u16, u8*); ++ s32 (*mng_write_cmd_header)(struct e1000_hw *hw, ++ struct e1000_host_mng_command_header*); ++ s32 (*mng_enable_host_if)(struct e1000_hw*); ++ s32 (*wait_autoneg)(struct e1000_hw*); ++ ++ /* Function pointers for the PHY. */ ++ s32 (*init_phy_params)(struct e1000_hw *); ++ s32 (*acquire_phy)(struct e1000_hw *); ++ s32 (*check_polarity)(struct e1000_hw *); ++ s32 (*check_reset_block)(struct e1000_hw *); ++ s32 (*commit_phy)(struct e1000_hw *); ++ s32 (*force_speed_duplex)(struct e1000_hw *); ++ s32 (*get_cfg_done)(struct e1000_hw *hw); ++ s32 (*get_cable_length)(struct e1000_hw *); ++ s32 (*get_phy_info)(struct e1000_hw *); ++ s32 (*read_phy_reg)(struct e1000_hw *, u32, u16 *); ++ void (*release_phy)(struct e1000_hw *); ++ s32 (*reset_phy)(struct e1000_hw *); ++ s32 (*set_d0_lplu_state)(struct e1000_hw *, bool); ++ s32 (*set_d3_lplu_state)(struct e1000_hw *, bool); ++ s32 (*write_phy_reg)(struct e1000_hw *, u32, u16); ++ void (*power_up_phy)(struct e1000_hw *); ++ void (*power_down_phy)(struct e1000_hw *); ++ ++ /* Function pointers for the NVM. */ ++ s32 (*init_nvm_params)(struct e1000_hw *); ++ s32 (*acquire_nvm)(struct e1000_hw *); ++ s32 (*read_nvm)(struct e1000_hw *, u16, u16, u16 *); ++ void (*release_nvm)(struct e1000_hw *); ++ void (*reload_nvm)(struct e1000_hw *); ++ s32 (*update_nvm)(struct e1000_hw *); ++ s32 (*valid_led_default)(struct e1000_hw *, u16 *); ++ s32 (*validate_nvm)(struct e1000_hw *); ++ s32 (*write_nvm)(struct e1000_hw *, u16, u16, u16 *); ++}; ++ ++struct e1000_mac_info { ++ u8 addr[6]; ++ u8 perm_addr[6]; ++ ++ e1000_mac_type type; ++ ++ u32 collision_delta; ++ u32 ledctl_default; ++ u32 ledctl_mode1; ++ u32 ledctl_mode2; ++ u32 mc_filter_type; ++ u32 tx_packet_delta; ++ u32 txcw; ++ ++ u16 current_ifs_val; ++ u16 ifs_max_val; ++ u16 ifs_min_val; ++ u16 ifs_ratio; ++ u16 ifs_step_size; ++ u16 mta_reg_count; ++ u16 rar_entry_count; ++ ++ u8 forced_speed_duplex; ++ ++ bool adaptive_ifs; ++ bool arc_subsystem_valid; ++ bool asf_firmware_present; ++ bool autoneg; ++ bool autoneg_failed; ++ bool disable_av; ++ bool disable_hw_init_bits; ++ bool get_link_status; ++ bool ifs_params_forced; ++ bool in_ifs_mode; ++ bool report_tx_early; ++ bool serdes_has_link; ++ bool tx_pkt_filtering; ++}; ++ ++struct e1000_phy_info { ++ e1000_phy_type type; ++ ++ e1000_1000t_rx_status local_rx; ++ e1000_1000t_rx_status remote_rx; ++ e1000_ms_type ms_type; ++ e1000_ms_type original_ms_type; ++ e1000_rev_polarity cable_polarity; ++ e1000_smart_speed smart_speed; ++ ++ u32 addr; ++ u32 id; ++ u32 reset_delay_us; /* in usec */ ++ u32 revision; ++ ++ e1000_media_type media_type; ++ ++ u16 autoneg_advertised; ++ u16 autoneg_mask; ++ u16 cable_length; ++ u16 max_cable_length; ++ u16 min_cable_length; ++ ++ u8 mdix; ++ ++ bool disable_polarity_correction; ++ bool is_mdix; ++ bool polarity_correction; ++ bool reset_disable; ++ bool speed_downgraded; ++ bool autoneg_wait_to_complete; ++}; ++ ++struct e1000_nvm_info { ++ e1000_nvm_type type; ++ e1000_nvm_override override; ++ ++ u32 flash_bank_size; ++ u32 flash_base_addr; ++ ++ u16 word_size; ++ u16 delay_usec; ++ u16 address_bits; ++ u16 opcode_bits; ++ u16 page_size; ++}; ++ ++struct e1000_bus_info { ++ e1000_bus_type type; ++ e1000_bus_speed speed; ++ e1000_bus_width width; ++ ++ u32 snoop; ++ ++ u16 func; ++ u16 pci_cmd_word; ++}; ++ ++struct e1000_fc_info { ++ u32 high_water; /* Flow control high-water mark */ ++ u32 low_water; /* Flow control low-water mark */ ++ u16 pause_time; /* Flow control pause timer */ ++ bool send_xon; /* Flow control send XON */ ++ bool strict_ieee; /* Strict IEEE mode */ ++ e1000_fc_type type; /* Type of flow control */ ++ e1000_fc_type original_type; ++}; ++ ++struct e1000_hw { ++ void *back; ++ void *dev_spec; ++ ++ u8 __iomem *hw_addr; ++ u8 __iomem *flash_address; ++ unsigned long io_base; ++ ++ struct e1000_functions func; ++ struct e1000_mac_info mac; ++ struct e1000_fc_info fc; ++ struct e1000_phy_info phy; ++ struct e1000_nvm_info nvm; ++ struct e1000_bus_info bus; ++ struct e1000_host_mng_dhcp_cookie mng_cookie; ++ ++ u32 dev_spec_size; ++ ++ u16 device_id; ++ u16 subsystem_vendor_id; ++ u16 subsystem_device_id; ++ u16 vendor_id; ++ ++ u8 revision_id; ++}; ++ ++/* These functions must be implemented by drivers */ ++void e1000_pci_clear_mwi(struct e1000_hw *hw); ++void e1000_pci_set_mwi(struct e1000_hw *hw); ++s32 e1000_alloc_zeroed_dev_spec_struct(struct e1000_hw *hw, u32 size); ++s32 e1000_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value); ++void e1000_free_dev_spec_struct(struct e1000_hw *hw); ++void e1000_read_pci_cfg(struct e1000_hw *hw, u32 reg, u16 *value); ++void e1000_write_pci_cfg(struct e1000_hw *hw, u32 reg, u16 *value); ++ ++#endif +--- linux/drivers/xenomai/net/drivers/experimental/e1000/e1000_ich8lan.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/experimental/e1000/e1000_ich8lan.h 2021-04-07 16:01:27.724633456 +0800 +@@ -0,0 +1,110 @@ ++/******************************************************************************* ++ ++ Intel PRO/1000 Linux driver ++ Copyright(c) 1999 - 2008 Intel Corporation. ++ ++ This program is free software; you can redistribute it and/or modify it ++ under the terms and conditions of the GNU General Public License, ++ version 2, as published by the Free Software Foundation. ++ ++ This program is distributed in the hope it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ You should have received a copy of the GNU General Public License along with ++ this program; if not, write to the Free Software Foundation, Inc., ++ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. ++ ++ The full GNU General Public License is included in this distribution in ++ the file called "COPYING". ++ ++ Contact Information: ++ Linux NICS ++ e1000-devel Mailing List ++ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 ++ ++*******************************************************************************/ ++ ++#ifndef _E1000_ICH8LAN_H_ ++#define _E1000_ICH8LAN_H_ ++ ++#define ICH_FLASH_GFPREG 0x0000 ++#define ICH_FLASH_HSFSTS 0x0004 ++#define ICH_FLASH_HSFCTL 0x0006 ++#define ICH_FLASH_FADDR 0x0008 ++#define ICH_FLASH_FDATA0 0x0010 ++ ++#define ICH_FLASH_READ_COMMAND_TIMEOUT 500 ++#define ICH_FLASH_WRITE_COMMAND_TIMEOUT 500 ++#define ICH_FLASH_ERASE_COMMAND_TIMEOUT 3000000 ++#define ICH_FLASH_LINEAR_ADDR_MASK 0x00FFFFFF ++#define ICH_FLASH_CYCLE_REPEAT_COUNT 10 ++ ++#define ICH_CYCLE_READ 0 ++#define ICH_CYCLE_WRITE 2 ++#define ICH_CYCLE_ERASE 3 ++ ++#define FLASH_GFPREG_BASE_MASK 0x1FFF ++#define FLASH_SECTOR_ADDR_SHIFT 12 ++ ++#define E1000_SHADOW_RAM_WORDS 2048 ++ ++#define ICH_FLASH_SEG_SIZE_256 256 ++#define ICH_FLASH_SEG_SIZE_4K 4096 ++#define ICH_FLASH_SEG_SIZE_8K 8192 ++#define ICH_FLASH_SEG_SIZE_64K 65536 ++#define ICH_FLASH_SECTOR_SIZE 4096 ++ ++#define ICH_FLASH_REG_MAPSIZE 0x00A0 ++ ++#define E1000_ICH_FWSM_RSPCIPHY 0x00000040 /* Reset PHY on PCI Reset */ ++#define E1000_ICH_FWSM_DISSW 0x10000000 /* FW Disables SW Writes */ ++/* FW established a valid mode */ ++#define E1000_ICH_FWSM_FW_VALID 0x00008000 ++ ++#define E1000_ICH_MNG_IAMT_MODE 0x2 ++ ++#define ID_LED_DEFAULT_ICH8LAN ((ID_LED_DEF1_DEF2 << 12) | \ ++ (ID_LED_DEF1_OFF2 << 8) | \ ++ (ID_LED_DEF1_ON2 << 4) | \ ++ (ID_LED_DEF1_DEF2)) ++ ++#define E1000_ICH_NVM_SIG_WORD 0x13 ++#define E1000_ICH_NVM_SIG_MASK 0xC000 ++ ++#define E1000_ICH8_LAN_INIT_TIMEOUT 1500 ++ ++#define E1000_FEXTNVM_SW_CONFIG 1 ++#define E1000_FEXTNVM_SW_CONFIG_ICH8M (1 << 27) /* Bit redefined for ICH8M */ ++ ++#define PCIE_ICH8_SNOOP_ALL PCIE_NO_SNOOP_ALL ++ ++#define E1000_ICH_RAR_ENTRIES 7 ++ ++#define PHY_PAGE_SHIFT 5 ++#define PHY_REG(page, reg) (((page) << PHY_PAGE_SHIFT) | \ ++ ((reg) & MAX_PHY_REG_ADDRESS)) ++#define IGP3_KMRN_DIAG PHY_REG(770, 19) /* KMRN Diagnostic */ ++#define IGP3_VR_CTRL PHY_REG(776, 18) /* Voltage Regulator Control */ ++#define IGP3_CAPABILITY PHY_REG(776, 19) /* Capability */ ++#define IGP3_PM_CTRL PHY_REG(769, 20) /* Power Management Control */ ++ ++#define IGP3_KMRN_DIAG_PCS_LOCK_LOSS 0x0002 ++#define IGP3_VR_CTRL_DEV_POWERDOWN_MODE_MASK 0x0300 ++#define IGP3_VR_CTRL_MODE_SHUTDOWN 0x0200 ++#define IGP3_PM_CTRL_FORCE_PWR_DOWN 0x0020 ++ ++/* ++ * Additional interrupts need to be handled for ICH family: ++ * DSW = The FW changed the status of the DISSW bit in FWSM ++ * PHYINT = The LAN connected device generates an interrupt ++ * EPRST = Manageability reset event ++ */ ++#define IMS_ICH_ENABLE_MASK (\ ++ E1000_IMS_DSW | \ ++ E1000_IMS_PHYINT | \ ++ E1000_IMS_EPRST) ++ ++ ++#endif +--- linux/drivers/xenomai/net/drivers/experimental/e1000/Makefile 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/experimental/e1000/Makefile 2021-04-07 16:01:27.719633463 +0800 +@@ -0,0 +1,19 @@ ++ccflags-y += -Idrivers/xenomai/net/stack/include ++ ++obj-$(CONFIG_XENO_DRIVERS_NET_DRV_E1000_NEW) += rt_e1000_new.o ++ ++rt_e1000_new-y := \ ++ e1000_80003es2lan.o \ ++ e1000_82540.o \ ++ e1000_82541.o \ ++ e1000_82542.o \ ++ e1000_82543.o \ ++ e1000_82571.o \ ++ e1000_api.o \ ++ e1000_ich8lan.o \ ++ e1000_mac.o \ ++ e1000_main.o \ ++ e1000_manage.o \ ++ e1000_nvm.o \ ++ e1000_param.o \ ++ e1000_phy.o +--- linux/drivers/xenomai/net/drivers/experimental/e1000/e1000_api.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/experimental/e1000/e1000_api.c 2021-04-07 16:01:27.715633469 +0800 +@@ -0,0 +1,1164 @@ ++/******************************************************************************* ++ ++ Intel PRO/1000 Linux driver ++ Copyright(c) 1999 - 2008 Intel Corporation. ++ ++ This program is free software; you can redistribute it and/or modify it ++ under the terms and conditions of the GNU General Public License, ++ version 2, as published by the Free Software Foundation. ++ ++ This program is distributed in the hope it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ You should have received a copy of the GNU General Public License along with ++ this program; if not, write to the Free Software Foundation, Inc., ++ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. ++ ++ The full GNU General Public License is included in this distribution in ++ the file called "COPYING". ++ ++ Contact Information: ++ Linux NICS ++ e1000-devel Mailing List ++ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 ++ ++*******************************************************************************/ ++ ++#include "e1000_api.h" ++#include "e1000_mac.h" ++#include "e1000_nvm.h" ++#include "e1000_phy.h" ++ ++/** ++ * e1000_init_mac_params - Initialize MAC function pointers ++ * @hw: pointer to the HW structure ++ * ++ * This function initializes the function pointers for the MAC ++ * set of functions. Called by drivers or by e1000_setup_init_funcs. ++ **/ ++s32 e1000_init_mac_params(struct e1000_hw *hw) ++{ ++ s32 ret_val = E1000_SUCCESS; ++ ++ if (hw->func.init_mac_params) { ++ ret_val = hw->func.init_mac_params(hw); ++ if (ret_val) { ++ DEBUGOUT("MAC Initialization Error\n"); ++ goto out; ++ } ++ } else { ++ DEBUGOUT("mac.init_mac_params was NULL\n"); ++ ret_val = -E1000_ERR_CONFIG; ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_init_nvm_params - Initialize NVM function pointers ++ * @hw: pointer to the HW structure ++ * ++ * This function initializes the function pointers for the NVM ++ * set of functions. Called by drivers or by e1000_setup_init_funcs. ++ **/ ++s32 e1000_init_nvm_params(struct e1000_hw *hw) ++{ ++ s32 ret_val = E1000_SUCCESS; ++ ++ if (hw->func.init_nvm_params) { ++ ret_val = hw->func.init_nvm_params(hw); ++ if (ret_val) { ++ DEBUGOUT("NVM Initialization Error\n"); ++ goto out; ++ } ++ } else { ++ DEBUGOUT("nvm.init_nvm_params was NULL\n"); ++ ret_val = -E1000_ERR_CONFIG; ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_init_phy_params - Initialize PHY function pointers ++ * @hw: pointer to the HW structure ++ * ++ * This function initializes the function pointers for the PHY ++ * set of functions. Called by drivers or by e1000_setup_init_funcs. ++ **/ ++s32 e1000_init_phy_params(struct e1000_hw *hw) ++{ ++ s32 ret_val = E1000_SUCCESS; ++ ++ if (hw->func.init_phy_params) { ++ ret_val = hw->func.init_phy_params(hw); ++ if (ret_val) { ++ DEBUGOUT("PHY Initialization Error\n"); ++ goto out; ++ } ++ } else { ++ DEBUGOUT("phy.init_phy_params was NULL\n"); ++ ret_val = -E1000_ERR_CONFIG; ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_set_mac_type - Sets MAC type ++ * @hw: pointer to the HW structure ++ * ++ * This function sets the mac type of the adapter based on the ++ * device ID stored in the hw structure. ++ * MUST BE FIRST FUNCTION CALLED (explicitly or through ++ * e1000_setup_init_funcs()). ++ **/ ++s32 e1000_set_mac_type(struct e1000_hw *hw) ++{ ++ struct e1000_mac_info *mac = &hw->mac; ++ s32 ret_val = E1000_SUCCESS; ++ ++ DEBUGFUNC("e1000_set_mac_type"); ++ ++ switch (hw->device_id) { ++ case E1000_DEV_ID_82542: ++ mac->type = e1000_82542; ++ break; ++ case E1000_DEV_ID_82543GC_FIBER: ++ case E1000_DEV_ID_82543GC_COPPER: ++ mac->type = e1000_82543; ++ break; ++ case E1000_DEV_ID_82544EI_COPPER: ++ case E1000_DEV_ID_82544EI_FIBER: ++ case E1000_DEV_ID_82544GC_COPPER: ++ case E1000_DEV_ID_82544GC_LOM: ++ mac->type = e1000_82544; ++ break; ++ case E1000_DEV_ID_82540EM: ++ case E1000_DEV_ID_82540EM_LOM: ++ case E1000_DEV_ID_82540EP: ++ case E1000_DEV_ID_82540EP_LOM: ++ case E1000_DEV_ID_82540EP_LP: ++ mac->type = e1000_82540; ++ break; ++ case E1000_DEV_ID_82545EM_COPPER: ++ case E1000_DEV_ID_82545EM_FIBER: ++ mac->type = e1000_82545; ++ break; ++ case E1000_DEV_ID_82545GM_COPPER: ++ case E1000_DEV_ID_82545GM_FIBER: ++ case E1000_DEV_ID_82545GM_SERDES: ++ mac->type = e1000_82545_rev_3; ++ break; ++ case E1000_DEV_ID_82546EB_COPPER: ++ case E1000_DEV_ID_82546EB_FIBER: ++ case E1000_DEV_ID_82546EB_QUAD_COPPER: ++ mac->type = e1000_82546; ++ break; ++ case E1000_DEV_ID_82546GB_COPPER: ++ case E1000_DEV_ID_82546GB_FIBER: ++ case E1000_DEV_ID_82546GB_SERDES: ++ case E1000_DEV_ID_82546GB_PCIE: ++ case E1000_DEV_ID_82546GB_QUAD_COPPER: ++ case E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3: ++ mac->type = e1000_82546_rev_3; ++ break; ++ case E1000_DEV_ID_82541EI: ++ case E1000_DEV_ID_82541EI_MOBILE: ++ case E1000_DEV_ID_82541ER_LOM: ++ mac->type = e1000_82541; ++ break; ++ case E1000_DEV_ID_82541ER: ++ case E1000_DEV_ID_82541GI: ++ case E1000_DEV_ID_82541GI_LF: ++ case E1000_DEV_ID_82541GI_MOBILE: ++ mac->type = e1000_82541_rev_2; ++ break; ++ case E1000_DEV_ID_82547EI: ++ case E1000_DEV_ID_82547EI_MOBILE: ++ mac->type = e1000_82547; ++ break; ++ case E1000_DEV_ID_82547GI: ++ mac->type = e1000_82547_rev_2; ++ break; ++ case E1000_DEV_ID_82571EB_COPPER: ++ case E1000_DEV_ID_82571EB_FIBER: ++ case E1000_DEV_ID_82571EB_SERDES: ++ case E1000_DEV_ID_82571EB_SERDES_DUAL: ++ case E1000_DEV_ID_82571EB_SERDES_QUAD: ++ case E1000_DEV_ID_82571EB_QUAD_COPPER: ++ case E1000_DEV_ID_82571PT_QUAD_COPPER: ++ case E1000_DEV_ID_82571EB_QUAD_FIBER: ++ case E1000_DEV_ID_82571EB_QUAD_COPPER_LP: ++ mac->type = e1000_82571; ++ break; ++ case E1000_DEV_ID_82572EI: ++ case E1000_DEV_ID_82572EI_COPPER: ++ case E1000_DEV_ID_82572EI_FIBER: ++ case E1000_DEV_ID_82572EI_SERDES: ++ mac->type = e1000_82572; ++ break; ++ case E1000_DEV_ID_82573E: ++ case E1000_DEV_ID_82573E_IAMT: ++ case E1000_DEV_ID_82573L: ++ mac->type = e1000_82573; ++ break; ++ case E1000_DEV_ID_80003ES2LAN_COPPER_DPT: ++ case E1000_DEV_ID_80003ES2LAN_SERDES_DPT: ++ case E1000_DEV_ID_80003ES2LAN_COPPER_SPT: ++ case E1000_DEV_ID_80003ES2LAN_SERDES_SPT: ++ mac->type = e1000_80003es2lan; ++ break; ++ case E1000_DEV_ID_ICH8_IFE: ++ case E1000_DEV_ID_ICH8_IFE_GT: ++ case E1000_DEV_ID_ICH8_IFE_G: ++ case E1000_DEV_ID_ICH8_IGP_M: ++ case E1000_DEV_ID_ICH8_IGP_M_AMT: ++ case E1000_DEV_ID_ICH8_IGP_AMT: ++ case E1000_DEV_ID_ICH8_IGP_C: ++ mac->type = e1000_ich8lan; ++ break; ++ case E1000_DEV_ID_ICH9_IFE: ++ case E1000_DEV_ID_ICH9_IFE_GT: ++ case E1000_DEV_ID_ICH9_IFE_G: ++ case E1000_DEV_ID_ICH9_IGP_AMT: ++ case E1000_DEV_ID_ICH9_IGP_C: ++ mac->type = e1000_ich9lan; ++ break; ++ default: ++ /* Should never have loaded on this device */ ++ ret_val = -E1000_ERR_MAC_INIT; ++ break; ++ } ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_setup_init_funcs - Initializes function pointers ++ * @hw: pointer to the HW structure ++ * @init_device: TRUE will initialize the rest of the function pointers ++ * getting the device ready for use. FALSE will only set ++ * MAC type and the function pointers for the other init ++ * functions. Passing FALSE will not generate any hardware ++ * reads or writes. ++ * ++ * This function must be called by a driver in order to use the rest ++ * of the 'shared' code files. Called by drivers only. ++ **/ ++s32 e1000_setup_init_funcs(struct e1000_hw *hw, bool init_device) ++{ ++ s32 ret_val; ++ ++ /* Can't do much good without knowing the MAC type. */ ++ ret_val = e1000_set_mac_type(hw); ++ if (ret_val) { ++ DEBUGOUT("ERROR: MAC type could not be set properly.\n"); ++ goto out; ++ } ++ ++ if (!hw->hw_addr) { ++ DEBUGOUT("ERROR: Registers not mapped\n"); ++ ret_val = -E1000_ERR_CONFIG; ++ goto out; ++ } ++ ++ /* ++ * Init some generic function pointers that are currently all pointing ++ * to generic implementations. We do this first allowing a driver ++ * module to override it afterwards. ++ */ ++ hw->func.config_collision_dist = e1000_config_collision_dist_generic; ++ hw->func.rar_set = e1000_rar_set_generic; ++ hw->func.validate_mdi_setting = e1000_validate_mdi_setting_generic; ++ hw->func.mng_host_if_write = e1000_mng_host_if_write_generic; ++ hw->func.mng_write_cmd_header = e1000_mng_write_cmd_header_generic; ++ hw->func.mng_enable_host_if = e1000_mng_enable_host_if_generic; ++ hw->func.wait_autoneg = e1000_wait_autoneg_generic; ++ hw->func.reload_nvm = e1000_reload_nvm_generic; ++ ++ /* ++ * Set up the init function pointers. These are functions within the ++ * adapter family file that sets up function pointers for the rest of ++ * the functions in that family. ++ */ ++ switch (hw->mac.type) { ++ case e1000_82542: ++ e1000_init_function_pointers_82542(hw); ++ break; ++ case e1000_82543: ++ case e1000_82544: ++ e1000_init_function_pointers_82543(hw); ++ break; ++ case e1000_82540: ++ case e1000_82545: ++ case e1000_82545_rev_3: ++ case e1000_82546: ++ case e1000_82546_rev_3: ++ e1000_init_function_pointers_82540(hw); ++ break; ++ case e1000_82541: ++ case e1000_82541_rev_2: ++ case e1000_82547: ++ case e1000_82547_rev_2: ++ e1000_init_function_pointers_82541(hw); ++ break; ++ case e1000_82571: ++ case e1000_82572: ++ case e1000_82573: ++ e1000_init_function_pointers_82571(hw); ++ break; ++ case e1000_80003es2lan: ++ e1000_init_function_pointers_80003es2lan(hw); ++ break; ++ case e1000_ich8lan: ++ case e1000_ich9lan: ++ e1000_init_function_pointers_ich8lan(hw); ++ break; ++ default: ++ DEBUGOUT("Hardware not supported\n"); ++ ret_val = -E1000_ERR_CONFIG; ++ break; ++ } ++ ++ /* ++ * Initialize the rest of the function pointers. These require some ++ * register reads/writes in some cases. ++ */ ++ if (!(ret_val) && init_device) { ++ ret_val = e1000_init_mac_params(hw); ++ if (ret_val) ++ goto out; ++ ++ ret_val = e1000_init_nvm_params(hw); ++ if (ret_val) ++ goto out; ++ ++ ret_val = e1000_init_phy_params(hw); ++ if (ret_val) ++ goto out; ++ ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_remove_device - Free device specific structure ++ * @hw: pointer to the HW structure ++ * ++ * If a device specific structure was allocated, this function will ++ * free it. This is a function pointer entry point called by drivers. ++ **/ ++void e1000_remove_device(struct e1000_hw *hw) ++{ ++ if (hw->func.remove_device) ++ hw->func.remove_device(hw); ++} ++ ++/** ++ * e1000_get_bus_info - Obtain bus information for adapter ++ * @hw: pointer to the HW structure ++ * ++ * This will obtain information about the HW bus for which the ++ * adaper is attached and stores it in the hw structure. This is a ++ * function pointer entry point called by drivers. ++ **/ ++s32 e1000_get_bus_info(struct e1000_hw *hw) ++{ ++ if (hw->func.get_bus_info) ++ return hw->func.get_bus_info(hw); ++ ++ return E1000_SUCCESS; ++} ++ ++/** ++ * e1000_clear_vfta - Clear VLAN filter table ++ * @hw: pointer to the HW structure ++ * ++ * This clears the VLAN filter table on the adapter. This is a function ++ * pointer entry point called by drivers. ++ **/ ++void e1000_clear_vfta(struct e1000_hw *hw) ++{ ++ if (hw->func.clear_vfta) ++ hw->func.clear_vfta (hw); ++} ++ ++/** ++ * e1000_write_vfta - Write value to VLAN filter table ++ * @hw: pointer to the HW structure ++ * @offset: the 32-bit offset in which to write the value to. ++ * @value: the 32-bit value to write at location offset. ++ * ++ * This writes a 32-bit value to a 32-bit offset in the VLAN filter ++ * table. This is a function pointer entry point called by drivers. ++ **/ ++void e1000_write_vfta(struct e1000_hw *hw, u32 offset, u32 value) ++{ ++ if (hw->func.write_vfta) ++ hw->func.write_vfta(hw, offset, value); ++} ++ ++/** ++ * e1000_update_mc_addr_list - Update Multicast addresses ++ * @hw: pointer to the HW structure ++ * @mc_addr_list: array of multicast addresses to program ++ * @mc_addr_count: number of multicast addresses to program ++ * @rar_used_count: the first RAR register free to program ++ * @rar_count: total number of supported Receive Address Registers ++ * ++ * Updates the Receive Address Registers and Multicast Table Array. ++ * The caller must have a packed mc_addr_list of multicast addresses. ++ * The parameter rar_count will usually be hw->mac.rar_entry_count ++ * unless there are workarounds that change this. Currently no func pointer ++ * exists and all implementations are handled in the generic version of this ++ * function. ++ **/ ++void e1000_update_mc_addr_list(struct e1000_hw *hw, u8 *mc_addr_list, ++ u32 mc_addr_count, u32 rar_used_count, ++ u32 rar_count) ++{ ++ if (hw->func.update_mc_addr_list) ++ hw->func.update_mc_addr_list(hw, ++ mc_addr_list, ++ mc_addr_count, ++ rar_used_count, ++ rar_count); ++} ++ ++/** ++ * e1000_force_mac_fc - Force MAC flow control ++ * @hw: pointer to the HW structure ++ * ++ * Force the MAC's flow control settings. Currently no func pointer exists ++ * and all implementations are handled in the generic version of this ++ * function. ++ **/ ++s32 e1000_force_mac_fc(struct e1000_hw *hw) ++{ ++ return e1000_force_mac_fc_generic(hw); ++} ++ ++/** ++ * e1000_check_for_link - Check/Store link connection ++ * @hw: pointer to the HW structure ++ * ++ * This checks the link condition of the adapter and stores the ++ * results in the hw->mac structure. This is a function pointer entry ++ * point called by drivers. ++ **/ ++s32 e1000_check_for_link(struct e1000_hw *hw) ++{ ++ if (hw->func.check_for_link) ++ return hw->func.check_for_link(hw); ++ ++ return -E1000_ERR_CONFIG; ++} ++ ++/** ++ * e1000_check_mng_mode - Check management mode ++ * @hw: pointer to the HW structure ++ * ++ * This checks if the adapter has manageability enabled. ++ * This is a function pointer entry point called by drivers. ++ **/ ++bool e1000_check_mng_mode(struct e1000_hw *hw) ++{ ++ if (hw->func.check_mng_mode) ++ return hw->func.check_mng_mode(hw); ++ ++ return FALSE; ++} ++ ++/** ++ * e1000_mng_write_dhcp_info - Writes DHCP info to host interface ++ * @hw: pointer to the HW structure ++ * @buffer: pointer to the host interface ++ * @length: size of the buffer ++ * ++ * Writes the DHCP information to the host interface. ++ **/ ++s32 e1000_mng_write_dhcp_info(struct e1000_hw *hw, u8 *buffer, u16 length) ++{ ++ return e1000_mng_write_dhcp_info_generic(hw, buffer, length); ++} ++ ++/** ++ * e1000_reset_hw - Reset hardware ++ * @hw: pointer to the HW structure ++ * ++ * This resets the hardware into a known state. This is a function pointer ++ * entry point called by drivers. ++ **/ ++s32 e1000_reset_hw(struct e1000_hw *hw) ++{ ++ if (hw->func.reset_hw) ++ return hw->func.reset_hw(hw); ++ ++ return -E1000_ERR_CONFIG; ++} ++ ++/** ++ * e1000_init_hw - Initialize hardware ++ * @hw: pointer to the HW structure ++ * ++ * This inits the hardware readying it for operation. This is a function ++ * pointer entry point called by drivers. ++ **/ ++s32 e1000_init_hw(struct e1000_hw *hw) ++{ ++ if (hw->func.init_hw) ++ return hw->func.init_hw(hw); ++ ++ return -E1000_ERR_CONFIG; ++} ++ ++/** ++ * e1000_setup_link - Configures link and flow control ++ * @hw: pointer to the HW structure ++ * ++ * This configures link and flow control settings for the adapter. This ++ * is a function pointer entry point called by drivers. While modules can ++ * also call this, they probably call their own version of this function. ++ **/ ++s32 e1000_setup_link(struct e1000_hw *hw) ++{ ++ if (hw->func.setup_link) ++ return hw->func.setup_link(hw); ++ ++ return -E1000_ERR_CONFIG; ++} ++ ++/** ++ * e1000_get_speed_and_duplex - Returns current speed and duplex ++ * @hw: pointer to the HW structure ++ * @speed: pointer to a 16-bit value to store the speed ++ * @duplex: pointer to a 16-bit value to store the duplex. ++ * ++ * This returns the speed and duplex of the adapter in the two 'out' ++ * variables passed in. This is a function pointer entry point called ++ * by drivers. ++ **/ ++s32 e1000_get_speed_and_duplex(struct e1000_hw *hw, u16 *speed, u16 *duplex) ++{ ++ if (hw->func.get_link_up_info) ++ return hw->func.get_link_up_info(hw, speed, duplex); ++ ++ return -E1000_ERR_CONFIG; ++} ++ ++/** ++ * e1000_setup_led - Configures SW controllable LED ++ * @hw: pointer to the HW structure ++ * ++ * This prepares the SW controllable LED for use and saves the current state ++ * of the LED so it can be later restored. This is a function pointer entry ++ * point called by drivers. ++ **/ ++s32 e1000_setup_led(struct e1000_hw *hw) ++{ ++ if (hw->func.setup_led) ++ return hw->func.setup_led(hw); ++ ++ return E1000_SUCCESS; ++} ++ ++/** ++ * e1000_cleanup_led - Restores SW controllable LED ++ * @hw: pointer to the HW structure ++ * ++ * This restores the SW controllable LED to the value saved off by ++ * e1000_setup_led. This is a function pointer entry point called by drivers. ++ **/ ++s32 e1000_cleanup_led(struct e1000_hw *hw) ++{ ++ if (hw->func.cleanup_led) ++ return hw->func.cleanup_led(hw); ++ ++ return E1000_SUCCESS; ++} ++ ++/** ++ * e1000_blink_led - Blink SW controllable LED ++ * @hw: pointer to the HW structure ++ * ++ * This starts the adapter LED blinking. Request the LED to be setup first ++ * and cleaned up after. This is a function pointer entry point called by ++ * drivers. ++ **/ ++s32 e1000_blink_led(struct e1000_hw *hw) ++{ ++ if (hw->func.blink_led) ++ return hw->func.blink_led(hw); ++ ++ return E1000_SUCCESS; ++} ++ ++/** ++ * e1000_led_on - Turn on SW controllable LED ++ * @hw: pointer to the HW structure ++ * ++ * Turns the SW defined LED on. This is a function pointer entry point ++ * called by drivers. ++ **/ ++s32 e1000_led_on(struct e1000_hw *hw) ++{ ++ if (hw->func.led_on) ++ return hw->func.led_on(hw); ++ ++ return E1000_SUCCESS; ++} ++ ++/** ++ * e1000_led_off - Turn off SW controllable LED ++ * @hw: pointer to the HW structure ++ * ++ * Turns the SW defined LED off. This is a function pointer entry point ++ * called by drivers. ++ **/ ++s32 e1000_led_off(struct e1000_hw *hw) ++{ ++ if (hw->func.led_off) ++ return hw->func.led_off(hw); ++ ++ return E1000_SUCCESS; ++} ++ ++/** ++ * e1000_reset_adaptive - Reset adaptive IFS ++ * @hw: pointer to the HW structure ++ * ++ * Resets the adaptive IFS. Currently no func pointer exists and all ++ * implementations are handled in the generic version of this function. ++ **/ ++void e1000_reset_adaptive(struct e1000_hw *hw) ++{ ++ e1000_reset_adaptive_generic(hw); ++} ++ ++/** ++ * e1000_update_adaptive - Update adaptive IFS ++ * @hw: pointer to the HW structure ++ * ++ * Updates adapter IFS. Currently no func pointer exists and all ++ * implementations are handled in the generic version of this function. ++ **/ ++void e1000_update_adaptive(struct e1000_hw *hw) ++{ ++ return; // TODO ++ e1000_update_adaptive_generic(hw); ++} ++ ++/** ++ * e1000_disable_pcie_master - Disable PCI-Express master access ++ * @hw: pointer to the HW structure ++ * ++ * Disables PCI-Express master access and verifies there are no pending ++ * requests. Currently no func pointer exists and all implementations are ++ * handled in the generic version of this function. ++ **/ ++s32 e1000_disable_pcie_master(struct e1000_hw *hw) ++{ ++ return e1000_disable_pcie_master_generic(hw); ++} ++ ++/** ++ * e1000_config_collision_dist - Configure collision distance ++ * @hw: pointer to the HW structure ++ * ++ * Configures the collision distance to the default value and is used ++ * during link setup. ++ **/ ++void e1000_config_collision_dist(struct e1000_hw *hw) ++{ ++ if (hw->func.config_collision_dist) ++ hw->func.config_collision_dist(hw); ++} ++ ++/** ++ * e1000_rar_set - Sets a receive address register ++ * @hw: pointer to the HW structure ++ * @addr: address to set the RAR to ++ * @index: the RAR to set ++ * ++ * Sets a Receive Address Register (RAR) to the specified address. ++ **/ ++void e1000_rar_set(struct e1000_hw *hw, u8 *addr, u32 index) ++{ ++ if (hw->func.rar_set) ++ hw->func.rar_set(hw, addr, index); ++} ++ ++/** ++ * e1000_validate_mdi_setting - Ensures valid MDI/MDIX SW state ++ * @hw: pointer to the HW structure ++ * ++ * Ensures that the MDI/MDIX SW state is valid. ++ **/ ++s32 e1000_validate_mdi_setting(struct e1000_hw *hw) ++{ ++ if (hw->func.validate_mdi_setting) ++ return hw->func.validate_mdi_setting(hw); ++ ++ return E1000_SUCCESS; ++} ++ ++/** ++ * e1000_mta_set - Sets multicast table bit ++ * @hw: pointer to the HW structure ++ * @hash_value: Multicast hash value. ++ * ++ * This sets the bit in the multicast table corresponding to the ++ * hash value. This is a function pointer entry point called by drivers. ++ **/ ++void e1000_mta_set(struct e1000_hw *hw, u32 hash_value) ++{ ++ if (hw->func.mta_set) ++ hw->func.mta_set(hw, hash_value); ++} ++ ++/** ++ * e1000_hash_mc_addr - Determines address location in multicast table ++ * @hw: pointer to the HW structure ++ * @mc_addr: Multicast address to hash. ++ * ++ * This hashes an address to determine its location in the multicast ++ * table. Currently no func pointer exists and all implementations ++ * are handled in the generic version of this function. ++ **/ ++u32 e1000_hash_mc_addr(struct e1000_hw *hw, u8 *mc_addr) ++{ ++ return e1000_hash_mc_addr_generic(hw, mc_addr); ++} ++ ++/** ++ * e1000_enable_tx_pkt_filtering - Enable packet filtering on TX ++ * @hw: pointer to the HW structure ++ * ++ * Enables packet filtering on transmit packets if manageability is enabled ++ * and host interface is enabled. ++ * Currently no func pointer exists and all implementations are handled in the ++ * generic version of this function. ++ **/ ++bool e1000_enable_tx_pkt_filtering(struct e1000_hw *hw) ++{ ++ return e1000_enable_tx_pkt_filtering_generic(hw); ++} ++ ++/** ++ * e1000_mng_host_if_write - Writes to the manageability host interface ++ * @hw: pointer to the HW structure ++ * @buffer: pointer to the host interface buffer ++ * @length: size of the buffer ++ * @offset: location in the buffer to write to ++ * @sum: sum of the data (not checksum) ++ * ++ * This function writes the buffer content at the offset given on the host if. ++ * It also does alignment considerations to do the writes in most efficient ++ * way. Also fills up the sum of the buffer in *buffer parameter. ++ **/ ++s32 e1000_mng_host_if_write(struct e1000_hw * hw, u8 *buffer, u16 length, ++ u16 offset, u8 *sum) ++{ ++ if (hw->func.mng_host_if_write) ++ return hw->func.mng_host_if_write(hw, buffer, length, offset, ++ sum); ++ ++ return E1000_NOT_IMPLEMENTED; ++} ++ ++/** ++ * e1000_mng_write_cmd_header - Writes manageability command header ++ * @hw: pointer to the HW structure ++ * @hdr: pointer to the host interface command header ++ * ++ * Writes the command header after does the checksum calculation. ++ **/ ++s32 e1000_mng_write_cmd_header(struct e1000_hw *hw, ++ struct e1000_host_mng_command_header *hdr) ++{ ++ if (hw->func.mng_write_cmd_header) ++ return hw->func.mng_write_cmd_header(hw, hdr); ++ ++ return E1000_NOT_IMPLEMENTED; ++} ++ ++/** ++ * e1000_mng_enable_host_if - Checks host interface is enabled ++ * @hw: pointer to the HW structure ++ * ++ * Returns E1000_success upon success, else E1000_ERR_HOST_INTERFACE_COMMAND ++ * ++ * This function checks whether the HOST IF is enabled for command operaton ++ * and also checks whether the previous command is completed. It busy waits ++ * in case of previous command is not completed. ++ **/ ++s32 e1000_mng_enable_host_if(struct e1000_hw * hw) ++{ ++ if (hw->func.mng_enable_host_if) ++ return hw->func.mng_enable_host_if(hw); ++ ++ return E1000_NOT_IMPLEMENTED; ++} ++ ++/** ++ * e1000_wait_autoneg - Waits for autonegotiation completion ++ * @hw: pointer to the HW structure ++ * ++ * Waits for autoneg to complete. Currently no func pointer exists and all ++ * implementations are handled in the generic version of this function. ++ **/ ++s32 e1000_wait_autoneg(struct e1000_hw *hw) ++{ ++ if (hw->func.wait_autoneg) ++ return hw->func.wait_autoneg(hw); ++ ++ return E1000_SUCCESS; ++} ++ ++/** ++ * e1000_check_reset_block - Verifies PHY can be reset ++ * @hw: pointer to the HW structure ++ * ++ * Checks if the PHY is in a state that can be reset or if manageability ++ * has it tied up. This is a function pointer entry point called by drivers. ++ **/ ++s32 e1000_check_reset_block(struct e1000_hw *hw) ++{ ++ if (hw->func.check_reset_block) ++ return hw->func.check_reset_block(hw); ++ ++ return E1000_SUCCESS; ++} ++ ++/** ++ * e1000_read_phy_reg - Reads PHY register ++ * @hw: pointer to the HW structure ++ * @offset: the register to read ++ * @data: the buffer to store the 16-bit read. ++ * ++ * Reads the PHY register and returns the value in data. ++ * This is a function pointer entry point called by drivers. ++ **/ ++s32 e1000_read_phy_reg(struct e1000_hw *hw, u32 offset, u16 *data) ++{ ++ if (hw->func.read_phy_reg) ++ return hw->func.read_phy_reg(hw, offset, data); ++ ++ return E1000_SUCCESS; ++} ++ ++/** ++ * e1000_write_phy_reg - Writes PHY register ++ * @hw: pointer to the HW structure ++ * @offset: the register to write ++ * @data: the value to write. ++ * ++ * Writes the PHY register at offset with the value in data. ++ * This is a function pointer entry point called by drivers. ++ **/ ++s32 e1000_write_phy_reg(struct e1000_hw *hw, u32 offset, u16 data) ++{ ++ if (hw->func.write_phy_reg) ++ return hw->func.write_phy_reg(hw, offset, data); ++ ++ return E1000_SUCCESS; ++} ++ ++/** ++ * e1000_read_kmrn_reg - Reads register using Kumeran interface ++ * @hw: pointer to the HW structure ++ * @offset: the register to read ++ * @data: the location to store the 16-bit value read. ++ * ++ * Reads a register out of the Kumeran interface. Currently no func pointer ++ * exists and all implementations are handled in the generic version of ++ * this function. ++ **/ ++s32 e1000_read_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 *data) ++{ ++ return e1000_read_kmrn_reg_generic(hw, offset, data); ++} ++ ++/** ++ * e1000_write_kmrn_reg - Writes register using Kumeran interface ++ * @hw: pointer to the HW structure ++ * @offset: the register to write ++ * @data: the value to write. ++ * ++ * Writes a register to the Kumeran interface. Currently no func pointer ++ * exists and all implementations are handled in the generic version of ++ * this function. ++ **/ ++s32 e1000_write_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 data) ++{ ++ return e1000_write_kmrn_reg_generic(hw, offset, data); ++} ++ ++/** ++ * e1000_get_cable_length - Retrieves cable length estimation ++ * @hw: pointer to the HW structure ++ * ++ * This function estimates the cable length and stores them in ++ * hw->phy.min_length and hw->phy.max_length. This is a function pointer ++ * entry point called by drivers. ++ **/ ++s32 e1000_get_cable_length(struct e1000_hw *hw) ++{ ++ if (hw->func.get_cable_length) ++ return hw->func.get_cable_length(hw); ++ ++ return E1000_SUCCESS; ++} ++ ++/** ++ * e1000_get_phy_info - Retrieves PHY information from registers ++ * @hw: pointer to the HW structure ++ * ++ * This function gets some information from various PHY registers and ++ * populates hw->phy values with it. This is a function pointer entry ++ * point called by drivers. ++ **/ ++s32 e1000_get_phy_info(struct e1000_hw *hw) ++{ ++ if (hw->func.get_phy_info) ++ return hw->func.get_phy_info(hw); ++ ++ return E1000_SUCCESS; ++} ++ ++/** ++ * e1000_phy_hw_reset - Hard PHY reset ++ * @hw: pointer to the HW structure ++ * ++ * Performs a hard PHY reset. This is a function pointer entry point called ++ * by drivers. ++ **/ ++s32 e1000_phy_hw_reset(struct e1000_hw *hw) ++{ ++ if (hw->func.reset_phy) ++ return hw->func.reset_phy(hw); ++ ++ return E1000_SUCCESS; ++} ++ ++/** ++ * e1000_phy_commit - Soft PHY reset ++ * @hw: pointer to the HW structure ++ * ++ * Performs a soft PHY reset on those that apply. This is a function pointer ++ * entry point called by drivers. ++ **/ ++s32 e1000_phy_commit(struct e1000_hw *hw) ++{ ++ if (hw->func.commit_phy) ++ return hw->func.commit_phy(hw); ++ ++ return E1000_SUCCESS; ++} ++ ++/** ++ * e1000_set_d3_lplu_state - Sets low power link up state for D0 ++ * @hw: pointer to the HW structure ++ * @active: boolean used to enable/disable lplu ++ * ++ * Success returns 0, Failure returns 1 ++ * ++ * The low power link up (lplu) state is set to the power management level D0 ++ * and SmartSpeed is disabled when active is true, else clear lplu for D0 ++ * and enable Smartspeed. LPLU and Smartspeed are mutually exclusive. LPLU ++ * is used during Dx states where the power conservation is most important. ++ * During driver activity, SmartSpeed should be enabled so performance is ++ * maintained. This is a function pointer entry point called by drivers. ++ **/ ++s32 e1000_set_d0_lplu_state(struct e1000_hw *hw, bool active) ++{ ++ if (hw->func.set_d0_lplu_state) ++ return hw->func.set_d0_lplu_state(hw, active); ++ ++ return E1000_SUCCESS; ++} ++ ++/** ++ * e1000_set_d3_lplu_state - Sets low power link up state for D3 ++ * @hw: pointer to the HW structure ++ * @active: boolean used to enable/disable lplu ++ * ++ * Success returns 0, Failure returns 1 ++ * ++ * The low power link up (lplu) state is set to the power management level D3 ++ * and SmartSpeed is disabled when active is true, else clear lplu for D3 ++ * and enable Smartspeed. LPLU and Smartspeed are mutually exclusive. LPLU ++ * is used during Dx states where the power conservation is most important. ++ * During driver activity, SmartSpeed should be enabled so performance is ++ * maintained. This is a function pointer entry point called by drivers. ++ **/ ++s32 e1000_set_d3_lplu_state(struct e1000_hw *hw, bool active) ++{ ++ if (hw->func.set_d3_lplu_state) ++ return hw->func.set_d3_lplu_state(hw, active); ++ ++ return E1000_SUCCESS; ++} ++ ++/** ++ * e1000_read_mac_addr - Reads MAC address ++ * @hw: pointer to the HW structure ++ * ++ * Reads the MAC address out of the adapter and stores it in the HW structure. ++ * Currently no func pointer exists and all implementations are handled in the ++ * generic version of this function. ++ **/ ++s32 e1000_read_mac_addr(struct e1000_hw *hw) ++{ ++ if (hw->func.read_mac_addr) ++ return hw->func.read_mac_addr(hw); ++ ++ return e1000_read_mac_addr_generic(hw); ++} ++ ++/** ++ * e1000_read_pba_num - Read device part number ++ * @hw: pointer to the HW structure ++ * @pba_num: pointer to device part number ++ * ++ * Reads the product board assembly (PBA) number from the EEPROM and stores ++ * the value in pba_num. ++ * Currently no func pointer exists and all implementations are handled in the ++ * generic version of this function. ++ **/ ++s32 e1000_read_pba_num(struct e1000_hw *hw, u32 *pba_num) ++{ ++ return e1000_read_pba_num_generic(hw, pba_num); ++} ++ ++/** ++ * e1000_validate_nvm_checksum - Verifies NVM (EEPROM) checksum ++ * @hw: pointer to the HW structure ++ * ++ * Validates the NVM checksum is correct. This is a function pointer entry ++ * point called by drivers. ++ **/ ++s32 e1000_validate_nvm_checksum(struct e1000_hw *hw) ++{ ++ if (hw->func.validate_nvm) ++ return hw->func.validate_nvm(hw); ++ ++ return -E1000_ERR_CONFIG; ++} ++ ++/** ++ * e1000_update_nvm_checksum - Updates NVM (EEPROM) checksum ++ * @hw: pointer to the HW structure ++ * ++ * Updates the NVM checksum. Currently no func pointer exists and all ++ * implementations are handled in the generic version of this function. ++ **/ ++s32 e1000_update_nvm_checksum(struct e1000_hw *hw) ++{ ++ if (hw->func.update_nvm) ++ return hw->func.update_nvm(hw); ++ ++ return -E1000_ERR_CONFIG; ++} ++ ++/** ++ * e1000_reload_nvm - Reloads EEPROM ++ * @hw: pointer to the HW structure ++ * ++ * Reloads the EEPROM by setting the "Reinitialize from EEPROM" bit in the ++ * extended control register. ++ **/ ++void e1000_reload_nvm(struct e1000_hw *hw) ++{ ++ if (hw->func.reload_nvm) ++ hw->func.reload_nvm(hw); ++} ++ ++/** ++ * e1000_read_nvm - Reads NVM (EEPROM) ++ * @hw: pointer to the HW structure ++ * @offset: the word offset to read ++ * @words: number of 16-bit words to read ++ * @data: pointer to the properly sized buffer for the data. ++ * ++ * Reads 16-bit chunks of data from the NVM (EEPROM). This is a function ++ * pointer entry point called by drivers. ++ **/ ++s32 e1000_read_nvm(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) ++{ ++ if (hw->func.read_nvm) ++ return hw->func.read_nvm(hw, offset, words, data); ++ ++ return -E1000_ERR_CONFIG; ++} ++ ++/** ++ * e1000_write_nvm - Writes to NVM (EEPROM) ++ * @hw: pointer to the HW structure ++ * @offset: the word offset to read ++ * @words: number of 16-bit words to write ++ * @data: pointer to the properly sized buffer for the data. ++ * ++ * Writes 16-bit chunks of data to the NVM (EEPROM). This is a function ++ * pointer entry point called by drivers. ++ **/ ++s32 e1000_write_nvm(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) ++{ ++ if (hw->func.write_nvm) ++ return hw->func.write_nvm(hw, offset, words, data); ++ ++ return E1000_SUCCESS; ++} ++ ++/** ++ * e1000_write_8bit_ctrl_reg - Writes 8bit Control register ++ * @hw: pointer to the HW structure ++ * @reg: 32bit register offset ++ * @offset: the register to write ++ * @data: the value to write. ++ * ++ * Writes the PHY register at offset with the value in data. ++ * This is a function pointer entry point called by drivers. ++ **/ ++s32 e1000_write_8bit_ctrl_reg(struct e1000_hw *hw, u32 reg, u32 offset, ++ u8 data) ++{ ++ return e1000_write_8bit_ctrl_reg_generic(hw, reg, offset, data); ++} ++ ++/** ++ * e1000_power_up_phy - Restores link in case of PHY power down ++ * @hw: pointer to the HW structure ++ * ++ * The phy may be powered down to save power, to turn off link when the ++ * driver is unloaded, or wake on lan is not enabled (among others). ++ **/ ++void e1000_power_up_phy(struct e1000_hw *hw) ++{ ++ if (hw->func.power_up_phy) ++ hw->func.power_up_phy(hw); ++ ++ e1000_setup_link(hw); ++} ++ ++/** ++ * e1000_power_down_phy - Power down PHY ++ * @hw: pointer to the HW structure ++ * ++ * The phy may be powered down to save power, to turn off link when the ++ * driver is unloaded, or wake on lan is not enabled (among others). ++ **/ ++void e1000_power_down_phy(struct e1000_hw *hw) ++{ ++ if (hw->func.power_down_phy) ++ hw->func.power_down_phy(hw); ++} ++ +--- linux/drivers/xenomai/net/drivers/experimental/e1000/e1000_manage.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/experimental/e1000/e1000_manage.c 2021-04-07 16:01:27.710633476 +0800 +@@ -0,0 +1,384 @@ ++/******************************************************************************* ++ ++ Intel PRO/1000 Linux driver ++ Copyright(c) 1999 - 2008 Intel Corporation. ++ ++ This program is free software; you can redistribute it and/or modify it ++ under the terms and conditions of the GNU General Public License, ++ version 2, as published by the Free Software Foundation. ++ ++ This program is distributed in the hope it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ You should have received a copy of the GNU General Public License along with ++ this program; if not, write to the Free Software Foundation, Inc., ++ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. ++ ++ The full GNU General Public License is included in this distribution in ++ the file called "COPYING". ++ ++ Contact Information: ++ Linux NICS ++ e1000-devel Mailing List ++ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 ++ ++*******************************************************************************/ ++ ++#include "e1000_api.h" ++#include "e1000_manage.h" ++ ++static u8 e1000_calculate_checksum(u8 *buffer, u32 length); ++ ++/** ++ * e1000_calculate_checksum - Calculate checksum for buffer ++ * @buffer: pointer to EEPROM ++ * @length: size of EEPROM to calculate a checksum for ++ * ++ * Calculates the checksum for some buffer on a specified length. The ++ * checksum calculated is returned. ++ **/ ++static u8 e1000_calculate_checksum(u8 *buffer, u32 length) ++{ ++ u32 i; ++ u8 sum = 0; ++ ++ DEBUGFUNC("e1000_calculate_checksum"); ++ ++ if (!buffer) ++ return 0; ++ ++ for (i = 0; i < length; i++) ++ sum += buffer[i]; ++ ++ return (u8) (0 - sum); ++} ++ ++/** ++ * e1000_mng_enable_host_if_generic - Checks host interface is enabled ++ * @hw: pointer to the HW structure ++ * ++ * Returns E1000_success upon success, else E1000_ERR_HOST_INTERFACE_COMMAND ++ * ++ * This function checks whether the HOST IF is enabled for command operaton ++ * and also checks whether the previous command is completed. It busy waits ++ * in case of previous command is not completed. ++ **/ ++s32 e1000_mng_enable_host_if_generic(struct e1000_hw * hw) ++{ ++ u32 hicr; ++ s32 ret_val = E1000_SUCCESS; ++ u8 i; ++ ++ DEBUGFUNC("e1000_mng_enable_host_if_generic"); ++ ++ /* Check that the host interface is enabled. */ ++ hicr = E1000_READ_REG(hw, E1000_HICR); ++ if ((hicr & E1000_HICR_EN) == 0) { ++ DEBUGOUT("E1000_HOST_EN bit disabled.\n"); ++ ret_val = -E1000_ERR_HOST_INTERFACE_COMMAND; ++ goto out; ++ } ++ /* check the previous command is completed */ ++ for (i = 0; i < E1000_MNG_DHCP_COMMAND_TIMEOUT; i++) { ++ hicr = E1000_READ_REG(hw, E1000_HICR); ++ if (!(hicr & E1000_HICR_C)) ++ break; ++ msec_delay_irq(1); ++ } ++ ++ if (i == E1000_MNG_DHCP_COMMAND_TIMEOUT) { ++ DEBUGOUT("Previous command timeout failed .\n"); ++ ret_val = -E1000_ERR_HOST_INTERFACE_COMMAND; ++ goto out; ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_check_mng_mode_generic - Generic check managament mode ++ * @hw: pointer to the HW structure ++ * ++ * Reads the firmware semaphore register and returns true (>0) if ++ * manageability is enabled, else false (0). ++ **/ ++bool e1000_check_mng_mode_generic(struct e1000_hw *hw) ++{ ++ u32 fwsm; ++ ++ DEBUGFUNC("e1000_check_mng_mode_generic"); ++ ++ fwsm = E1000_READ_REG(hw, E1000_FWSM); ++ ++ return ((fwsm & E1000_FWSM_MODE_MASK) == ++ (E1000_MNG_IAMT_MODE << E1000_FWSM_MODE_SHIFT)); ++} ++ ++/** ++ * e1000_enable_tx_pkt_filtering_generic - Enable packet filtering on TX ++ * @hw: pointer to the HW structure ++ * ++ * Enables packet filtering on transmit packets if manageability is enabled ++ * and host interface is enabled. ++ **/ ++bool e1000_enable_tx_pkt_filtering_generic(struct e1000_hw *hw) ++{ ++ struct e1000_host_mng_dhcp_cookie *hdr = &hw->mng_cookie; ++ u32 *buffer = (u32 *)&hw->mng_cookie; ++ u32 offset; ++ s32 ret_val, hdr_csum, csum; ++ u8 i, len; ++ bool tx_filter = TRUE; ++ ++ DEBUGFUNC("e1000_enable_tx_pkt_filtering_generic"); ++ ++ /* No manageability, no filtering */ ++ if (!e1000_check_mng_mode(hw)) { ++ tx_filter = FALSE; ++ goto out; ++ } ++ ++ /* ++ * If we can't read from the host interface for whatever ++ * reason, disable filtering. ++ */ ++ ret_val = e1000_mng_enable_host_if(hw); ++ if (ret_val != E1000_SUCCESS) { ++ tx_filter = FALSE; ++ goto out; ++ } ++ ++ /* Read in the header. Length and offset are in dwords. */ ++ len = E1000_MNG_DHCP_COOKIE_LENGTH >> 2; ++ offset = E1000_MNG_DHCP_COOKIE_OFFSET >> 2; ++ for (i = 0; i < len; i++) { ++ *(buffer + i) = E1000_READ_REG_ARRAY_DWORD(hw, ++ E1000_HOST_IF, ++ offset + i); ++ } ++ hdr_csum = hdr->checksum; ++ hdr->checksum = 0; ++ csum = e1000_calculate_checksum((u8 *)hdr, ++ E1000_MNG_DHCP_COOKIE_LENGTH); ++ /* ++ * If either the checksums or signature don't match, then ++ * the cookie area isn't considered valid, in which case we ++ * take the safe route of assuming Tx filtering is enabled. ++ */ ++ if (hdr_csum != csum) ++ goto out; ++ if (hdr->signature != E1000_IAMT_SIGNATURE) ++ goto out; ++ ++ /* Cookie area is valid, make the final check for filtering. */ ++ if (!(hdr->status & E1000_MNG_DHCP_COOKIE_STATUS_PARSING)) ++ tx_filter = FALSE; ++ ++out: ++ hw->mac.tx_pkt_filtering = tx_filter; ++ return tx_filter; ++} ++ ++/** ++ * e1000_mng_write_dhcp_info_generic - Writes DHCP info to host interface ++ * @hw: pointer to the HW structure ++ * @buffer: pointer to the host interface ++ * @length: size of the buffer ++ * ++ * Writes the DHCP information to the host interface. ++ **/ ++s32 e1000_mng_write_dhcp_info_generic(struct e1000_hw * hw, u8 *buffer, ++ u16 length) ++{ ++ struct e1000_host_mng_command_header hdr; ++ s32 ret_val; ++ u32 hicr; ++ ++ DEBUGFUNC("e1000_mng_write_dhcp_info_generic"); ++ ++ hdr.command_id = E1000_MNG_DHCP_TX_PAYLOAD_CMD; ++ hdr.command_length = length; ++ hdr.reserved1 = 0; ++ hdr.reserved2 = 0; ++ hdr.checksum = 0; ++ ++ /* Enable the host interface */ ++ ret_val = e1000_mng_enable_host_if(hw); ++ if (ret_val) ++ goto out; ++ ++ /* Populate the host interface with the contents of "buffer". */ ++ ret_val = e1000_mng_host_if_write(hw, buffer, length, ++ sizeof(hdr), &(hdr.checksum)); ++ if (ret_val) ++ goto out; ++ ++ /* Write the manageability command header */ ++ ret_val = e1000_mng_write_cmd_header(hw, &hdr); ++ if (ret_val) ++ goto out; ++ ++ /* Tell the ARC a new command is pending. */ ++ hicr = E1000_READ_REG(hw, E1000_HICR); ++ E1000_WRITE_REG(hw, E1000_HICR, hicr | E1000_HICR_C); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_mng_write_cmd_header_generic - Writes manageability command header ++ * @hw: pointer to the HW structure ++ * @hdr: pointer to the host interface command header ++ * ++ * Writes the command header after does the checksum calculation. ++ **/ ++s32 e1000_mng_write_cmd_header_generic(struct e1000_hw * hw, ++ struct e1000_host_mng_command_header * hdr) ++{ ++ u16 i, length = sizeof(struct e1000_host_mng_command_header); ++ ++ DEBUGFUNC("e1000_mng_write_cmd_header_generic"); ++ ++ /* Write the whole command header structure with new checksum. */ ++ ++ hdr->checksum = e1000_calculate_checksum((u8 *)hdr, length); ++ ++ length >>= 2; ++ /* Write the relevant command block into the ram area. */ ++ for (i = 0; i < length; i++) { ++ E1000_WRITE_REG_ARRAY_DWORD(hw, E1000_HOST_IF, i, ++ *((u32 *) hdr + i)); ++ E1000_WRITE_FLUSH(hw); ++ } ++ ++ return E1000_SUCCESS; ++} ++ ++/** ++ * e1000_mng_host_if_write_generic - Write to the manageability host interface ++ * @hw: pointer to the HW structure ++ * @buffer: pointer to the host interface buffer ++ * @length: size of the buffer ++ * @offset: location in the buffer to write to ++ * @sum: sum of the data (not checksum) ++ * ++ * This function writes the buffer content at the offset given on the host if. ++ * It also does alignment considerations to do the writes in most efficient ++ * way. Also fills up the sum of the buffer in *buffer parameter. ++ **/ ++s32 e1000_mng_host_if_write_generic(struct e1000_hw * hw, u8 *buffer, ++ u16 length, u16 offset, u8 *sum) ++{ ++ u8 *tmp; ++ u8 *bufptr = buffer; ++ u32 data = 0; ++ s32 ret_val = E1000_SUCCESS; ++ u16 remaining, i, j, prev_bytes; ++ ++ DEBUGFUNC("e1000_mng_host_if_write_generic"); ++ ++ /* sum = only sum of the data and it is not checksum */ ++ ++ if (length == 0 || offset + length > E1000_HI_MAX_MNG_DATA_LENGTH) { ++ ret_val = -E1000_ERR_PARAM; ++ goto out; ++ } ++ ++ tmp = (u8 *)&data; ++ prev_bytes = offset & 0x3; ++ offset >>= 2; ++ ++ if (prev_bytes) { ++ data = E1000_READ_REG_ARRAY_DWORD(hw, E1000_HOST_IF, offset); ++ for (j = prev_bytes; j < sizeof(u32); j++) { ++ *(tmp + j) = *bufptr++; ++ *sum += *(tmp + j); ++ } ++ E1000_WRITE_REG_ARRAY_DWORD(hw, E1000_HOST_IF, offset, data); ++ length -= j - prev_bytes; ++ offset++; ++ } ++ ++ remaining = length & 0x3; ++ length -= remaining; ++ ++ /* Calculate length in DWORDs */ ++ length >>= 2; ++ ++ /* ++ * The device driver writes the relevant command block into the ++ * ram area. ++ */ ++ for (i = 0; i < length; i++) { ++ for (j = 0; j < sizeof(u32); j++) { ++ *(tmp + j) = *bufptr++; ++ *sum += *(tmp + j); ++ } ++ ++ E1000_WRITE_REG_ARRAY_DWORD(hw, E1000_HOST_IF, offset + i, data); ++ } ++ if (remaining) { ++ for (j = 0; j < sizeof(u32); j++) { ++ if (j < remaining) ++ *(tmp + j) = *bufptr++; ++ else ++ *(tmp + j) = 0; ++ ++ *sum += *(tmp + j); ++ } ++ E1000_WRITE_REG_ARRAY_DWORD(hw, E1000_HOST_IF, offset + i, data); ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_enable_mng_pass_thru - Enable processing of ARP's ++ * @hw: pointer to the HW structure ++ * ++ * Verifies the hardware needs to allow ARPs to be processed by the host. ++ **/ ++bool e1000_enable_mng_pass_thru(struct e1000_hw *hw) ++{ ++ u32 manc; ++ u32 fwsm, factps; ++ bool ret_val = FALSE; ++ ++ DEBUGFUNC("e1000_enable_mng_pass_thru"); ++ ++ if (!hw->mac.asf_firmware_present) ++ goto out; ++ ++ manc = E1000_READ_REG(hw, E1000_MANC); ++ ++ if (!(manc & E1000_MANC_RCV_TCO_EN) || ++ !(manc & E1000_MANC_EN_MAC_ADDR_FILTER)) ++ goto out; ++ ++ if (hw->mac.arc_subsystem_valid) { ++ fwsm = E1000_READ_REG(hw, E1000_FWSM); ++ factps = E1000_READ_REG(hw, E1000_FACTPS); ++ ++ if (!(factps & E1000_FACTPS_MNGCG) && ++ ((fwsm & E1000_FWSM_MODE_MASK) == ++ (e1000_mng_mode_pt << E1000_FWSM_MODE_SHIFT))) { ++ ret_val = TRUE; ++ goto out; ++ } ++ } else { ++ if ((manc & E1000_MANC_SMBUS_EN) && ++ !(manc & E1000_MANC_ASF_EN)) { ++ ret_val = TRUE; ++ goto out; ++ } ++ } ++ ++out: ++ return ret_val; ++} ++ +--- linux/drivers/xenomai/net/drivers/experimental/e1000/e1000_82543.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/experimental/e1000/e1000_82543.h 2021-04-07 16:01:27.705633483 +0800 +@@ -0,0 +1,44 @@ ++/******************************************************************************* ++ ++ Intel PRO/1000 Linux driver ++ Copyright(c) 1999 - 2008 Intel Corporation. ++ ++ This program is free software; you can redistribute it and/or modify it ++ under the terms and conditions of the GNU General Public License, ++ version 2, as published by the Free Software Foundation. ++ ++ This program is distributed in the hope it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ You should have received a copy of the GNU General Public License along with ++ this program; if not, write to the Free Software Foundation, Inc., ++ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. ++ ++ The full GNU General Public License is included in this distribution in ++ the file called "COPYING". ++ ++ Contact Information: ++ Linux NICS ++ e1000-devel Mailing List ++ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 ++ ++*******************************************************************************/ ++ ++#ifndef _E1000_82543_H_ ++#define _E1000_82543_H_ ++ ++#define PHY_PREAMBLE 0xFFFFFFFF ++#define PHY_PREAMBLE_SIZE 32 ++#define PHY_SOF 0x1 ++#define PHY_OP_READ 0x2 ++#define PHY_OP_WRITE 0x1 ++#define PHY_TURNAROUND 0x2 ++ ++#define TBI_COMPAT_ENABLED 0x1 /* Global "knob" for the workaround */ ++/* If TBI_COMPAT_ENABLED, then this is the current state (on/off) */ ++#define TBI_SBP_ENABLED 0x2 ++ ++ ++#endif +--- linux/drivers/xenomai/net/drivers/experimental/e1000/e1000_phy.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/experimental/e1000/e1000_phy.h 2021-04-07 16:01:27.700633490 +0800 +@@ -0,0 +1,168 @@ ++/******************************************************************************* ++ ++ Intel PRO/1000 Linux driver ++ Copyright(c) 1999 - 2008 Intel Corporation. ++ ++ This program is free software; you can redistribute it and/or modify it ++ under the terms and conditions of the GNU General Public License, ++ version 2, as published by the Free Software Foundation. ++ ++ This program is distributed in the hope it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ You should have received a copy of the GNU General Public License along with ++ this program; if not, write to the Free Software Foundation, Inc., ++ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. ++ ++ The full GNU General Public License is included in this distribution in ++ the file called "COPYING". ++ ++ Contact Information: ++ Linux NICS ++ e1000-devel Mailing List ++ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 ++ ++*******************************************************************************/ ++ ++#ifndef _E1000_PHY_H_ ++#define _E1000_PHY_H_ ++ ++typedef enum { ++ e1000_ms_hw_default = 0, ++ e1000_ms_force_master, ++ e1000_ms_force_slave, ++ e1000_ms_auto ++} e1000_ms_type; ++ ++typedef enum { ++ e1000_smart_speed_default = 0, ++ e1000_smart_speed_on, ++ e1000_smart_speed_off ++} e1000_smart_speed; ++ ++s32 e1000_check_downshift_generic(struct e1000_hw *hw); ++s32 e1000_check_polarity_m88(struct e1000_hw *hw); ++s32 e1000_check_polarity_igp(struct e1000_hw *hw); ++s32 e1000_check_reset_block_generic(struct e1000_hw *hw); ++s32 e1000_copper_link_autoneg(struct e1000_hw *hw); ++s32 e1000_phy_force_speed_duplex(struct e1000_hw *hw); ++s32 e1000_copper_link_setup_igp(struct e1000_hw *hw); ++s32 e1000_copper_link_setup_m88(struct e1000_hw *hw); ++s32 e1000_phy_force_speed_duplex_igp(struct e1000_hw *hw); ++s32 e1000_phy_force_speed_duplex_m88(struct e1000_hw *hw); ++s32 e1000_get_cable_length_m88(struct e1000_hw *hw); ++s32 e1000_get_cable_length_igp_2(struct e1000_hw *hw); ++s32 e1000_get_cfg_done_generic(struct e1000_hw *hw); ++s32 e1000_get_phy_id(struct e1000_hw *hw); ++s32 e1000_get_phy_info_igp(struct e1000_hw *hw); ++s32 e1000_get_phy_info_m88(struct e1000_hw *hw); ++s32 e1000_phy_sw_reset_generic(struct e1000_hw *hw); ++void e1000_phy_force_speed_duplex_setup(struct e1000_hw *hw, u16 *phy_ctrl); ++s32 e1000_phy_hw_reset_generic(struct e1000_hw *hw); ++s32 e1000_phy_reset_dsp_generic(struct e1000_hw *hw); ++s32 e1000_phy_setup_autoneg(struct e1000_hw *hw); ++s32 e1000_read_kmrn_reg_generic(struct e1000_hw *hw, u32 offset, u16 *data); ++s32 e1000_read_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 *data); ++s32 e1000_read_phy_reg_m88(struct e1000_hw *hw, u32 offset, u16 *data); ++s32 e1000_set_d3_lplu_state_generic(struct e1000_hw *hw, bool active); ++s32 e1000_setup_copper_link_generic(struct e1000_hw *hw); ++s32 e1000_wait_autoneg_generic(struct e1000_hw *hw); ++s32 e1000_write_kmrn_reg_generic(struct e1000_hw *hw, u32 offset, u16 data); ++s32 e1000_write_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 data); ++s32 e1000_write_phy_reg_m88(struct e1000_hw *hw, u32 offset, u16 data); ++s32 e1000_phy_reset_dsp(struct e1000_hw *hw); ++s32 e1000_phy_has_link_generic(struct e1000_hw *hw, u32 iterations, ++ u32 usec_interval, bool *success); ++s32 e1000_phy_init_script_igp3(struct e1000_hw *hw); ++e1000_phy_type e1000_get_phy_type_from_id(u32 phy_id); ++void e1000_power_up_phy_copper(struct e1000_hw *hw); ++void e1000_power_down_phy_copper(struct e1000_hw *hw); ++s32 e1000_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data); ++s32 e1000_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data); ++ ++#define E1000_MAX_PHY_ADDR 4 ++ ++/* IGP01E1000 Specific Registers */ ++#define IGP01E1000_PHY_PORT_CONFIG 0x10 /* Port Config */ ++#define IGP01E1000_PHY_PORT_STATUS 0x11 /* Status */ ++#define IGP01E1000_PHY_PORT_CTRL 0x12 /* Control */ ++#define IGP01E1000_PHY_LINK_HEALTH 0x13 /* PHY Link Health */ ++#define IGP01E1000_GMII_FIFO 0x14 /* GMII FIFO */ ++#define IGP01E1000_PHY_CHANNEL_QUALITY 0x15 /* PHY Channel Quality */ ++#define IGP02E1000_PHY_POWER_MGMT 0x19 /* Power Management */ ++#define IGP01E1000_PHY_PAGE_SELECT 0x1F /* Page Select */ ++#define BM_PHY_PAGE_SELECT 22 /* Page Select for BM */ ++#define IGP_PAGE_SHIFT 5 ++#define PHY_REG_MASK 0x1F ++ ++ ++#define IGP01E1000_PHY_PCS_INIT_REG 0x00B4 ++#define IGP01E1000_PHY_POLARITY_MASK 0x0078 ++ ++#define IGP01E1000_PSCR_AUTO_MDIX 0x1000 ++#define IGP01E1000_PSCR_FORCE_MDI_MDIX 0x2000 /* 0=MDI, 1=MDIX */ ++ ++#define IGP01E1000_PSCFR_SMART_SPEED 0x0080 ++ ++/* Enable flexible speed on link-up */ ++#define IGP01E1000_GMII_FLEX_SPD 0x0010 ++#define IGP01E1000_GMII_SPD 0x0020 /* Enable SPD */ ++ ++#define IGP02E1000_PM_SPD 0x0001 /* Smart Power Down */ ++#define IGP02E1000_PM_D0_LPLU 0x0002 /* For D0a states */ ++#define IGP02E1000_PM_D3_LPLU 0x0004 /* For all other states */ ++ ++#define IGP01E1000_PLHR_SS_DOWNGRADE 0x8000 ++ ++#define IGP01E1000_PSSR_POLARITY_REVERSED 0x0002 ++#define IGP01E1000_PSSR_MDIX 0x0008 ++#define IGP01E1000_PSSR_SPEED_MASK 0xC000 ++#define IGP01E1000_PSSR_SPEED_1000MBPS 0xC000 ++ ++#define IGP02E1000_PHY_CHANNEL_NUM 4 ++#define IGP02E1000_PHY_AGC_A 0x11B1 ++#define IGP02E1000_PHY_AGC_B 0x12B1 ++#define IGP02E1000_PHY_AGC_C 0x14B1 ++#define IGP02E1000_PHY_AGC_D 0x18B1 ++ ++#define IGP02E1000_AGC_LENGTH_SHIFT 9 /* Course - 15:13, Fine - 12:9 */ ++#define IGP02E1000_AGC_LENGTH_MASK 0x7F ++#define IGP02E1000_AGC_RANGE 15 ++ ++#define IGP03E1000_PHY_MISC_CTRL 0x1B ++#define IGP03E1000_PHY_MISC_DUPLEX_MANUAL_SET 0x1000 /* Manually Set Duplex */ ++ ++#define E1000_CABLE_LENGTH_UNDEFINED 0xFF ++ ++#define E1000_KMRNCTRLSTA_OFFSET 0x001F0000 ++#define E1000_KMRNCTRLSTA_OFFSET_SHIFT 16 ++#define E1000_KMRNCTRLSTA_REN 0x00200000 ++#define E1000_KMRNCTRLSTA_DIAG_OFFSET 0x3 /* Kumeran Diagnostic */ ++#define E1000_KMRNCTRLSTA_DIAG_NELPBK 0x1000 /* Nearend Loopback mode */ ++ ++#define IFE_PHY_EXTENDED_STATUS_CONTROL 0x10 ++#define IFE_PHY_SPECIAL_CONTROL 0x11 /* 100BaseTx PHY Special Control */ ++#define IFE_PHY_SPECIAL_CONTROL_LED 0x1B /* PHY Special and LED Control */ ++#define IFE_PHY_MDIX_CONTROL 0x1C /* MDI/MDI-X Control */ ++ ++/* IFE PHY Extended Status Control */ ++#define IFE_PESC_POLARITY_REVERSED 0x0100 ++ ++/* IFE PHY Special Control */ ++#define IFE_PSC_AUTO_POLARITY_DISABLE 0x0010 ++#define IFE_PSC_FORCE_POLARITY 0x0020 ++#define IFE_PSC_DISABLE_DYNAMIC_POWER_DOWN 0x0100 ++ ++/* IFE PHY Special Control and LED Control */ ++#define IFE_PSCL_PROBE_MODE 0x0020 ++#define IFE_PSCL_PROBE_LEDS_OFF 0x0006 /* Force LEDs 0 and 2 off */ ++#define IFE_PSCL_PROBE_LEDS_ON 0x0007 /* Force LEDs 0 and 2 on */ ++ ++/* IFE PHY MDIX Control */ ++#define IFE_PMC_MDIX_STATUS 0x0020 /* 1=MDI-X, 0=MDI */ ++#define IFE_PMC_FORCE_MDIX 0x0040 /* 1=force MDI-X, 0=force MDI */ ++#define IFE_PMC_AUTO_MDIX 0x0080 /* 1=enable auto MDI/MDI-X, 0=disable */ ++ ++#endif +--- linux/drivers/xenomai/net/drivers/experimental/e1000/e1000_nvm.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/experimental/e1000/e1000_nvm.h 2021-04-07 16:01:27.696633496 +0800 +@@ -0,0 +1,61 @@ ++/******************************************************************************* ++ ++ Intel PRO/1000 Linux driver ++ Copyright(c) 1999 - 2008 Intel Corporation. ++ ++ This program is free software; you can redistribute it and/or modify it ++ under the terms and conditions of the GNU General Public License, ++ version 2, as published by the Free Software Foundation. ++ ++ This program is distributed in the hope it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ You should have received a copy of the GNU General Public License along with ++ this program; if not, write to the Free Software Foundation, Inc., ++ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. ++ ++ The full GNU General Public License is included in this distribution in ++ the file called "COPYING". ++ ++ Contact Information: ++ Linux NICS ++ e1000-devel Mailing List ++ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 ++ ++*******************************************************************************/ ++ ++#ifndef _E1000_NVM_H_ ++#define _E1000_NVM_H_ ++ ++s32 e1000_acquire_nvm_generic(struct e1000_hw *hw); ++ ++s32 e1000_poll_eerd_eewr_done(struct e1000_hw *hw, int ee_reg); ++s32 e1000_read_mac_addr_generic(struct e1000_hw *hw); ++s32 e1000_read_pba_num_generic(struct e1000_hw *hw, u32 *pba_num); ++s32 e1000_read_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, u16 *data); ++s32 e1000_read_nvm_microwire(struct e1000_hw *hw, u16 offset, ++ u16 words, u16 *data); ++s32 e1000_read_nvm_eerd(struct e1000_hw *hw, u16 offset, u16 words, ++ u16 *data); ++s32 e1000_valid_led_default_generic(struct e1000_hw *hw, u16 *data); ++s32 e1000_validate_nvm_checksum_generic(struct e1000_hw *hw); ++s32 e1000_write_nvm_eewr(struct e1000_hw *hw, u16 offset, ++ u16 words, u16 *data); ++s32 e1000_write_nvm_microwire(struct e1000_hw *hw, u16 offset, ++ u16 words, u16 *data); ++s32 e1000_write_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, ++ u16 *data); ++s32 e1000_update_nvm_checksum_generic(struct e1000_hw *hw); ++void e1000_stop_nvm(struct e1000_hw *hw); ++void e1000_release_nvm_generic(struct e1000_hw *hw); ++void e1000_reload_nvm_generic(struct e1000_hw *hw); ++ ++/* Function pointers */ ++s32 e1000_acquire_nvm(struct e1000_hw *hw); ++void e1000_release_nvm(struct e1000_hw *hw); ++ ++#define E1000_STM_OPCODE 0xDB00 ++ ++#endif +--- linux/drivers/xenomai/net/drivers/experimental/e1000/e1000_82571.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/experimental/e1000/e1000_82571.h 2021-04-07 16:01:27.691633503 +0800 +@@ -0,0 +1,40 @@ ++/******************************************************************************* ++ ++ Intel PRO/1000 Linux driver ++ Copyright(c) 1999 - 2008 Intel Corporation. ++ ++ This program is free software; you can redistribute it and/or modify it ++ under the terms and conditions of the GNU General Public License, ++ version 2, as published by the Free Software Foundation. ++ ++ This program is distributed in the hope it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ You should have received a copy of the GNU General Public License along with ++ this program; if not, write to the Free Software Foundation, Inc., ++ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. ++ ++ The full GNU General Public License is included in this distribution in ++ the file called "COPYING". ++ ++ Contact Information: ++ Linux NICS ++ e1000-devel Mailing List ++ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 ++ ++*******************************************************************************/ ++ ++#ifndef _E1000_82571_H_ ++#define _E1000_82571_H_ ++ ++#define ID_LED_RESERVED_F746 0xF746 ++#define ID_LED_DEFAULT_82573 ((ID_LED_DEF1_DEF2 << 12) | \ ++ (ID_LED_OFF1_ON2 << 8) | \ ++ (ID_LED_DEF1_DEF2 << 4) | \ ++ (ID_LED_DEF1_DEF2)) ++ ++#define E1000_GCR_L1_ACT_WITHOUT_L0S_RX 0x08000000 ++ ++#endif +--- linux/drivers/xenomai/net/drivers/experimental/e1000/e1000_80003es2lan.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/experimental/e1000/e1000_80003es2lan.c 2021-04-07 16:01:27.686633510 +0800 +@@ -0,0 +1,1401 @@ ++/******************************************************************************* ++ ++ Intel PRO/1000 Linux driver ++ Copyright(c) 1999 - 2008 Intel Corporation. ++ ++ This program is free software; you can redistribute it and/or modify it ++ under the terms and conditions of the GNU General Public License, ++ version 2, as published by the Free Software Foundation. ++ ++ This program is distributed in the hope it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ You should have received a copy of the GNU General Public License along with ++ this program; if not, write to the Free Software Foundation, Inc., ++ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. ++ ++ The full GNU General Public License is included in this distribution in ++ the file called "COPYING". ++ ++ Contact Information: ++ Linux NICS ++ e1000-devel Mailing List ++ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 ++ ++*******************************************************************************/ ++ ++/* e1000_80003es2lan ++ */ ++ ++#include "e1000_api.h" ++#include "e1000_80003es2lan.h" ++ ++static s32 e1000_init_phy_params_80003es2lan(struct e1000_hw *hw); ++static s32 e1000_init_nvm_params_80003es2lan(struct e1000_hw *hw); ++static s32 e1000_init_mac_params_80003es2lan(struct e1000_hw *hw); ++static s32 e1000_acquire_phy_80003es2lan(struct e1000_hw *hw); ++static void e1000_release_phy_80003es2lan(struct e1000_hw *hw); ++static s32 e1000_acquire_nvm_80003es2lan(struct e1000_hw *hw); ++static void e1000_release_nvm_80003es2lan(struct e1000_hw *hw); ++static s32 e1000_read_phy_reg_gg82563_80003es2lan(struct e1000_hw *hw, ++ u32 offset, ++ u16 *data); ++static s32 e1000_write_phy_reg_gg82563_80003es2lan(struct e1000_hw *hw, ++ u32 offset, ++ u16 data); ++static s32 e1000_write_nvm_80003es2lan(struct e1000_hw *hw, u16 offset, ++ u16 words, u16 *data); ++static s32 e1000_get_cfg_done_80003es2lan(struct e1000_hw *hw); ++static s32 e1000_phy_force_speed_duplex_80003es2lan(struct e1000_hw *hw); ++static s32 e1000_get_cable_length_80003es2lan(struct e1000_hw *hw); ++static s32 e1000_get_link_up_info_80003es2lan(struct e1000_hw *hw, u16 *speed, ++ u16 *duplex); ++static s32 e1000_reset_hw_80003es2lan(struct e1000_hw *hw); ++static s32 e1000_init_hw_80003es2lan(struct e1000_hw *hw); ++static s32 e1000_setup_copper_link_80003es2lan(struct e1000_hw *hw); ++static void e1000_clear_hw_cntrs_80003es2lan(struct e1000_hw *hw); ++static s32 e1000_acquire_swfw_sync_80003es2lan(struct e1000_hw *hw, u16 mask); ++static s32 e1000_cfg_kmrn_10_100_80003es2lan(struct e1000_hw *hw, u16 duplex); ++static s32 e1000_cfg_kmrn_1000_80003es2lan(struct e1000_hw *hw); ++static s32 e1000_copper_link_setup_gg82563_80003es2lan(struct e1000_hw *hw); ++static void e1000_initialize_hw_bits_80003es2lan(struct e1000_hw *hw); ++static void e1000_release_swfw_sync_80003es2lan(struct e1000_hw *hw, u16 mask); ++static s32 e1000_read_mac_addr_80003es2lan(struct e1000_hw *hw); ++static void e1000_power_down_phy_copper_80003es2lan(struct e1000_hw *hw); ++ ++/* ++ * A table for the GG82563 cable length where the range is defined ++ * with a lower bound at "index" and the upper bound at ++ * "index + 5". ++ */ ++static const u16 e1000_gg82563_cable_length_table[] = ++ { 0, 60, 115, 150, 150, 60, 115, 150, 180, 180, 0xFF }; ++#define GG82563_CABLE_LENGTH_TABLE_SIZE \ ++ (sizeof(e1000_gg82563_cable_length_table) / \ ++ sizeof(e1000_gg82563_cable_length_table[0])) ++ ++/** ++ * e1000_init_phy_params_80003es2lan - Init ESB2 PHY func ptrs. ++ * @hw: pointer to the HW structure ++ * ++ * This is a function pointer entry point called by the api module. ++ **/ ++static s32 e1000_init_phy_params_80003es2lan(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ struct e1000_functions *func = &hw->func; ++ s32 ret_val = E1000_SUCCESS; ++ ++ DEBUGFUNC("e1000_init_phy_params_80003es2lan"); ++ ++ if (hw->phy.media_type != e1000_media_type_copper) { ++ phy->type = e1000_phy_none; ++ goto out; ++ } else { ++ func->power_up_phy = e1000_power_up_phy_copper; ++ func->power_down_phy = e1000_power_down_phy_copper_80003es2lan; ++ } ++ ++ phy->addr = 1; ++ phy->autoneg_mask = AUTONEG_ADVERTISE_SPEED_DEFAULT; ++ phy->reset_delay_us = 100; ++ phy->type = e1000_phy_gg82563; ++ ++ func->acquire_phy = e1000_acquire_phy_80003es2lan; ++ func->check_polarity = e1000_check_polarity_m88; ++ func->check_reset_block = e1000_check_reset_block_generic; ++ func->commit_phy = e1000_phy_sw_reset_generic; ++ func->get_cfg_done = e1000_get_cfg_done_80003es2lan; ++ func->get_phy_info = e1000_get_phy_info_m88; ++ func->release_phy = e1000_release_phy_80003es2lan; ++ func->reset_phy = e1000_phy_hw_reset_generic; ++ func->set_d3_lplu_state = e1000_set_d3_lplu_state_generic; ++ ++ func->force_speed_duplex = e1000_phy_force_speed_duplex_80003es2lan; ++ func->get_cable_length = e1000_get_cable_length_80003es2lan; ++ func->read_phy_reg = e1000_read_phy_reg_gg82563_80003es2lan; ++ func->write_phy_reg = e1000_write_phy_reg_gg82563_80003es2lan; ++ ++ /* This can only be done after all function pointers are setup. */ ++ ret_val = e1000_get_phy_id(hw); ++ ++ /* Verify phy id */ ++ if (phy->id != GG82563_E_PHY_ID) { ++ ret_val = -E1000_ERR_PHY; ++ goto out; ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_init_nvm_params_80003es2lan - Init ESB2 NVM func ptrs. ++ * @hw: pointer to the HW structure ++ * ++ * This is a function pointer entry point called by the api module. ++ **/ ++static s32 e1000_init_nvm_params_80003es2lan(struct e1000_hw *hw) ++{ ++ struct e1000_nvm_info *nvm = &hw->nvm; ++ struct e1000_functions *func = &hw->func; ++ u32 eecd = E1000_READ_REG(hw, E1000_EECD); ++ u16 size; ++ ++ DEBUGFUNC("e1000_init_nvm_params_80003es2lan"); ++ ++ nvm->opcode_bits = 8; ++ nvm->delay_usec = 1; ++ switch (nvm->override) { ++ case e1000_nvm_override_spi_large: ++ nvm->page_size = 32; ++ nvm->address_bits = 16; ++ break; ++ case e1000_nvm_override_spi_small: ++ nvm->page_size = 8; ++ nvm->address_bits = 8; ++ break; ++ default: ++ nvm->page_size = eecd & E1000_EECD_ADDR_BITS ? 32 : 8; ++ nvm->address_bits = eecd & E1000_EECD_ADDR_BITS ? 16 : 8; ++ break; ++ } ++ ++ nvm->type = e1000_nvm_eeprom_spi; ++ ++ size = (u16)((eecd & E1000_EECD_SIZE_EX_MASK) >> ++ E1000_EECD_SIZE_EX_SHIFT); ++ ++ /* ++ * Added to a constant, "size" becomes the left-shift value ++ * for setting word_size. ++ */ ++ size += NVM_WORD_SIZE_BASE_SHIFT; ++ ++ /* EEPROM access above 16k is unsupported */ ++ if (size > 14) ++ size = 14; ++ nvm->word_size = 1 << size; ++ ++ /* Function Pointers */ ++ func->acquire_nvm = e1000_acquire_nvm_80003es2lan; ++ func->read_nvm = e1000_read_nvm_eerd; ++ func->release_nvm = e1000_release_nvm_80003es2lan; ++ func->update_nvm = e1000_update_nvm_checksum_generic; ++ func->valid_led_default = e1000_valid_led_default_generic; ++ func->validate_nvm = e1000_validate_nvm_checksum_generic; ++ func->write_nvm = e1000_write_nvm_80003es2lan; ++ ++ return E1000_SUCCESS; ++} ++ ++/** ++ * e1000_init_mac_params_80003es2lan - Init ESB2 MAC func ptrs. ++ * @hw: pointer to the HW structure ++ * ++ * This is a function pointer entry point called by the api module. ++ **/ ++static s32 e1000_init_mac_params_80003es2lan(struct e1000_hw *hw) ++{ ++ struct e1000_mac_info *mac = &hw->mac; ++ struct e1000_functions *func = &hw->func; ++ s32 ret_val = E1000_SUCCESS; ++ ++ DEBUGFUNC("e1000_init_mac_params_80003es2lan"); ++ ++ /* Set media type */ ++ switch (hw->device_id) { ++ case E1000_DEV_ID_80003ES2LAN_SERDES_DPT: ++ hw->phy.media_type = e1000_media_type_internal_serdes; ++ break; ++ default: ++ hw->phy.media_type = e1000_media_type_copper; ++ break; ++ } ++ ++ /* Set mta register count */ ++ mac->mta_reg_count = 128; ++ /* Set rar entry count */ ++ mac->rar_entry_count = E1000_RAR_ENTRIES; ++ /* Set if part includes ASF firmware */ ++ mac->asf_firmware_present = TRUE; ++ /* Set if manageability features are enabled. */ ++ mac->arc_subsystem_valid = ++ (E1000_READ_REG(hw, E1000_FWSM) & E1000_FWSM_MODE_MASK) ++ ? TRUE : FALSE; ++ ++ /* Function pointers */ ++ ++ /* bus type/speed/width */ ++ func->get_bus_info = e1000_get_bus_info_pcie_generic; ++ /* reset */ ++ func->reset_hw = e1000_reset_hw_80003es2lan; ++ /* hw initialization */ ++ func->init_hw = e1000_init_hw_80003es2lan; ++ /* link setup */ ++ func->setup_link = e1000_setup_link_generic; ++ /* physical interface link setup */ ++ func->setup_physical_interface = ++ (hw->phy.media_type == e1000_media_type_copper) ++ ? e1000_setup_copper_link_80003es2lan ++ : e1000_setup_fiber_serdes_link_generic; ++ /* check for link */ ++ switch (hw->phy.media_type) { ++ case e1000_media_type_copper: ++ func->check_for_link = e1000_check_for_copper_link_generic; ++ break; ++ case e1000_media_type_fiber: ++ func->check_for_link = e1000_check_for_fiber_link_generic; ++ break; ++ case e1000_media_type_internal_serdes: ++ func->check_for_link = e1000_check_for_serdes_link_generic; ++ break; ++ default: ++ ret_val = -E1000_ERR_CONFIG; ++ goto out; ++ break; ++ } ++ /* check management mode */ ++ func->check_mng_mode = e1000_check_mng_mode_generic; ++ /* multicast address update */ ++ func->update_mc_addr_list = e1000_update_mc_addr_list_generic; ++ /* writing VFTA */ ++ func->write_vfta = e1000_write_vfta_generic; ++ /* clearing VFTA */ ++ func->clear_vfta = e1000_clear_vfta_generic; ++ /* setting MTA */ ++ func->mta_set = e1000_mta_set_generic; ++ /* read mac address */ ++ func->read_mac_addr = e1000_read_mac_addr_80003es2lan; ++ /* blink LED */ ++ func->blink_led = e1000_blink_led_generic; ++ /* setup LED */ ++ func->setup_led = e1000_setup_led_generic; ++ /* cleanup LED */ ++ func->cleanup_led = e1000_cleanup_led_generic; ++ /* turn on/off LED */ ++ func->led_on = e1000_led_on_generic; ++ func->led_off = e1000_led_off_generic; ++ /* remove device */ ++ func->remove_device = e1000_remove_device_generic; ++ /* clear hardware counters */ ++ func->clear_hw_cntrs = e1000_clear_hw_cntrs_80003es2lan; ++ /* link info */ ++ func->get_link_up_info = e1000_get_link_up_info_80003es2lan; ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_init_function_pointers_80003es2lan - Init ESB2 func ptrs. ++ * @hw: pointer to the HW structure ++ * ++ * The only function explicitly called by the api module to initialize ++ * all function pointers and parameters. ++ **/ ++void e1000_init_function_pointers_80003es2lan(struct e1000_hw *hw) ++{ ++ DEBUGFUNC("e1000_init_function_pointers_80003es2lan"); ++ ++ hw->func.init_mac_params = e1000_init_mac_params_80003es2lan; ++ hw->func.init_nvm_params = e1000_init_nvm_params_80003es2lan; ++ hw->func.init_phy_params = e1000_init_phy_params_80003es2lan; ++} ++ ++/** ++ * e1000_acquire_phy_80003es2lan - Acquire rights to access PHY ++ * @hw: pointer to the HW structure ++ * ++ * A wrapper to acquire access rights to the correct PHY. This is a ++ * function pointer entry point called by the api module. ++ **/ ++static s32 e1000_acquire_phy_80003es2lan(struct e1000_hw *hw) ++{ ++ u16 mask; ++ ++ DEBUGFUNC("e1000_acquire_phy_80003es2lan"); ++ ++ mask = hw->bus.func ? E1000_SWFW_PHY1_SM : E1000_SWFW_PHY0_SM; ++ mask |= E1000_SWFW_CSR_SM; ++ ++ return e1000_acquire_swfw_sync_80003es2lan(hw, mask); ++} ++ ++/** ++ * e1000_release_phy_80003es2lan - Release rights to access PHY ++ * @hw: pointer to the HW structure ++ * ++ * A wrapper to release access rights to the correct PHY. This is a ++ * function pointer entry point called by the api module. ++ **/ ++static void e1000_release_phy_80003es2lan(struct e1000_hw *hw) ++{ ++ u16 mask; ++ ++ DEBUGFUNC("e1000_release_phy_80003es2lan"); ++ ++ mask = hw->bus.func ? E1000_SWFW_PHY1_SM : E1000_SWFW_PHY0_SM; ++ mask |= E1000_SWFW_CSR_SM; ++ ++ e1000_release_swfw_sync_80003es2lan(hw, mask); ++} ++ ++/** ++ * e1000_acquire_nvm_80003es2lan - Acquire rights to access NVM ++ * @hw: pointer to the HW structure ++ * ++ * Acquire the semaphore to access the EEPROM. This is a function ++ * pointer entry point called by the api module. ++ **/ ++static s32 e1000_acquire_nvm_80003es2lan(struct e1000_hw *hw) ++{ ++ s32 ret_val; ++ ++ DEBUGFUNC("e1000_acquire_nvm_80003es2lan"); ++ ++ ret_val = e1000_acquire_swfw_sync_80003es2lan(hw, E1000_SWFW_EEP_SM); ++ if (ret_val) ++ goto out; ++ ++ ret_val = e1000_acquire_nvm_generic(hw); ++ ++ if (ret_val) ++ e1000_release_swfw_sync_80003es2lan(hw, E1000_SWFW_EEP_SM); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_release_nvm_80003es2lan - Relinquish rights to access NVM ++ * @hw: pointer to the HW structure ++ * ++ * Release the semaphore used to access the EEPROM. This is a ++ * function pointer entry point called by the api module. ++ **/ ++static void e1000_release_nvm_80003es2lan(struct e1000_hw *hw) ++{ ++ DEBUGFUNC("e1000_release_nvm_80003es2lan"); ++ ++ e1000_release_nvm_generic(hw); ++ e1000_release_swfw_sync_80003es2lan(hw, E1000_SWFW_EEP_SM); ++} ++ ++/** ++ * e1000_acquire_swfw_sync_80003es2lan - Acquire SW/FW semaphore ++ * @hw: pointer to the HW structure ++ * @mask: specifies which semaphore to acquire ++ * ++ * Acquire the SW/FW semaphore to access the PHY or NVM. The mask ++ * will also specify which port we're acquiring the lock for. ++ **/ ++static s32 e1000_acquire_swfw_sync_80003es2lan(struct e1000_hw *hw, u16 mask) ++{ ++ u32 swfw_sync; ++ u32 swmask = mask; ++ u32 fwmask = mask << 16; ++ s32 ret_val = E1000_SUCCESS; ++ s32 i = 0, timeout = 200; ++ ++ DEBUGFUNC("e1000_acquire_swfw_sync_80003es2lan"); ++ ++ while (i < timeout) { ++ if (e1000_get_hw_semaphore_generic(hw)) { ++ ret_val = -E1000_ERR_SWFW_SYNC; ++ goto out; ++ } ++ ++ swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC); ++ if (!(swfw_sync & (fwmask | swmask))) ++ break; ++ ++ /* ++ * Firmware currently using resource (fwmask) ++ * or other software thread using resource (swmask) ++ */ ++ e1000_put_hw_semaphore_generic(hw); ++ msec_delay_irq(5); ++ i++; ++ } ++ ++ if (i == timeout) { ++ DEBUGOUT("Driver can't access resource, SW_FW_SYNC timeout.\n"); ++ ret_val = -E1000_ERR_SWFW_SYNC; ++ goto out; ++ } ++ ++ swfw_sync |= swmask; ++ E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync); ++ ++ e1000_put_hw_semaphore_generic(hw); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_release_swfw_sync_80003es2lan - Release SW/FW semaphore ++ * @hw: pointer to the HW structure ++ * @mask: specifies which semaphore to acquire ++ * ++ * Release the SW/FW semaphore used to access the PHY or NVM. The mask ++ * will also specify which port we're releasing the lock for. ++ **/ ++static void e1000_release_swfw_sync_80003es2lan(struct e1000_hw *hw, u16 mask) ++{ ++ u32 swfw_sync; ++ ++ DEBUGFUNC("e1000_release_swfw_sync_80003es2lan"); ++ ++ while (e1000_get_hw_semaphore_generic(hw) != E1000_SUCCESS); ++ /* Empty */ ++ ++ swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC); ++ swfw_sync &= ~mask; ++ E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync); ++ ++ e1000_put_hw_semaphore_generic(hw); ++} ++ ++/** ++ * e1000_read_phy_reg_gg82563_80003es2lan - Read GG82563 PHY register ++ * @hw: pointer to the HW structure ++ * @offset: offset of the register to read ++ * @data: pointer to the data returned from the operation ++ * ++ * Read the GG82563 PHY register. This is a function pointer entry ++ * point called by the api module. ++ **/ ++static s32 e1000_read_phy_reg_gg82563_80003es2lan(struct e1000_hw *hw, ++ u32 offset, u16 *data) ++{ ++ s32 ret_val; ++ u32 page_select; ++ u16 temp; ++ ++ DEBUGFUNC("e1000_read_phy_reg_gg82563_80003es2lan"); ++ ++ ret_val = e1000_acquire_phy_80003es2lan(hw); ++ if (ret_val) ++ goto out; ++ ++ /* Select Configuration Page */ ++ if ((offset & MAX_PHY_REG_ADDRESS) < GG82563_MIN_ALT_REG) { ++ page_select = GG82563_PHY_PAGE_SELECT; ++ } else { ++ /* ++ * Use Alternative Page Select register to access ++ * registers 30 and 31 ++ */ ++ page_select = GG82563_PHY_PAGE_SELECT_ALT; ++ } ++ ++ temp = (u16)((u16)offset >> GG82563_PAGE_SHIFT); ++ ret_val = e1000_write_phy_reg_mdic(hw, page_select, temp); ++ if (ret_val) { ++ e1000_release_phy_80003es2lan(hw); ++ goto out; ++ } ++ ++ /* ++ * The "ready" bit in the MDIC register may be incorrectly set ++ * before the device has completed the "Page Select" MDI ++ * transaction. So we wait 200us after each MDI command... ++ */ ++ usec_delay(200); ++ ++ /* ...and verify the command was successful. */ ++ ret_val = e1000_read_phy_reg_mdic(hw, page_select, &temp); ++ ++ if (((u16)offset >> GG82563_PAGE_SHIFT) != temp) { ++ ret_val = -E1000_ERR_PHY; ++ e1000_release_phy_80003es2lan(hw); ++ goto out; ++ } ++ ++ usec_delay(200); ++ ++ ret_val = e1000_read_phy_reg_mdic(hw, ++ MAX_PHY_REG_ADDRESS & offset, ++ data); ++ ++ usec_delay(200); ++ e1000_release_phy_80003es2lan(hw); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_write_phy_reg_gg82563_80003es2lan - Write GG82563 PHY register ++ * @hw: pointer to the HW structure ++ * @offset: offset of the register to read ++ * @data: value to write to the register ++ * ++ * Write to the GG82563 PHY register. This is a function pointer entry ++ * point called by the api module. ++ **/ ++static s32 e1000_write_phy_reg_gg82563_80003es2lan(struct e1000_hw *hw, ++ u32 offset, u16 data) ++{ ++ s32 ret_val; ++ u32 page_select; ++ u16 temp; ++ ++ DEBUGFUNC("e1000_write_phy_reg_gg82563_80003es2lan"); ++ ++ ret_val = e1000_acquire_phy_80003es2lan(hw); ++ if (ret_val) ++ goto out; ++ ++ /* Select Configuration Page */ ++ if ((offset & MAX_PHY_REG_ADDRESS) < GG82563_MIN_ALT_REG) { ++ page_select = GG82563_PHY_PAGE_SELECT; ++ } else { ++ /* ++ * Use Alternative Page Select register to access ++ * registers 30 and 31 ++ */ ++ page_select = GG82563_PHY_PAGE_SELECT_ALT; ++ } ++ ++ temp = (u16)((u16)offset >> GG82563_PAGE_SHIFT); ++ ret_val = e1000_write_phy_reg_mdic(hw, page_select, temp); ++ if (ret_val) { ++ e1000_release_phy_80003es2lan(hw); ++ goto out; ++ } ++ ++ ++ /* ++ * The "ready" bit in the MDIC register may be incorrectly set ++ * before the device has completed the "Page Select" MDI ++ * transaction. So we wait 200us after each MDI command... ++ */ ++ usec_delay(200); ++ ++ /* ...and verify the command was successful. */ ++ ret_val = e1000_read_phy_reg_mdic(hw, page_select, &temp); ++ ++ if (((u16)offset >> GG82563_PAGE_SHIFT) != temp) { ++ ret_val = -E1000_ERR_PHY; ++ e1000_release_phy_80003es2lan(hw); ++ goto out; ++ } ++ ++ usec_delay(200); ++ ++ ret_val = e1000_write_phy_reg_mdic(hw, ++ MAX_PHY_REG_ADDRESS & offset, ++ data); ++ ++ usec_delay(200); ++ e1000_release_phy_80003es2lan(hw); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_write_nvm_80003es2lan - Write to ESB2 NVM ++ * @hw: pointer to the HW structure ++ * @offset: offset of the register to read ++ * @words: number of words to write ++ * @data: buffer of data to write to the NVM ++ * ++ * Write "words" of data to the ESB2 NVM. This is a function ++ * pointer entry point called by the api module. ++ **/ ++static s32 e1000_write_nvm_80003es2lan(struct e1000_hw *hw, u16 offset, ++ u16 words, u16 *data) ++{ ++ DEBUGFUNC("e1000_write_nvm_80003es2lan"); ++ ++ return e1000_write_nvm_spi(hw, offset, words, data); ++} ++ ++/** ++ * e1000_get_cfg_done_80003es2lan - Wait for configuration to complete ++ * @hw: pointer to the HW structure ++ * ++ * Wait a specific amount of time for manageability processes to complete. ++ * This is a function pointer entry point called by the phy module. ++ **/ ++static s32 e1000_get_cfg_done_80003es2lan(struct e1000_hw *hw) ++{ ++ s32 timeout = PHY_CFG_TIMEOUT; ++ s32 ret_val = E1000_SUCCESS; ++ u32 mask = E1000_NVM_CFG_DONE_PORT_0; ++ ++ DEBUGFUNC("e1000_get_cfg_done_80003es2lan"); ++ ++ if (hw->bus.func == 1) ++ mask = E1000_NVM_CFG_DONE_PORT_1; ++ ++ while (timeout) { ++ if (E1000_READ_REG(hw, E1000_EEMNGCTL) & mask) ++ break; ++ msec_delay(1); ++ timeout--; ++ } ++ if (!timeout) { ++ DEBUGOUT("MNG configuration cycle has not completed.\n"); ++ ret_val = -E1000_ERR_RESET; ++ goto out; ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_phy_force_speed_duplex_80003es2lan - Force PHY speed and duplex ++ * @hw: pointer to the HW structure ++ * ++ * Force the speed and duplex settings onto the PHY. This is a ++ * function pointer entry point called by the phy module. ++ **/ ++static s32 e1000_phy_force_speed_duplex_80003es2lan(struct e1000_hw *hw) ++{ ++ s32 ret_val; ++ u16 phy_data; ++ bool link; ++ ++ DEBUGFUNC("e1000_phy_force_speed_duplex_80003es2lan"); ++ ++ /* ++ * Clear Auto-Crossover to force MDI manually. M88E1000 requires MDI ++ * forced whenever speed and duplex are forced. ++ */ ++ ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data); ++ if (ret_val) ++ goto out; ++ ++ phy_data &= ~GG82563_PSCR_CROSSOVER_MODE_AUTO; ++ ret_val = e1000_write_phy_reg(hw, GG82563_PHY_SPEC_CTRL, phy_data); ++ if (ret_val) ++ goto out; ++ ++ DEBUGOUT1("GG82563 PSCR: %X\n", phy_data); ++ ++ ret_val = e1000_read_phy_reg(hw, PHY_CONTROL, &phy_data); ++ if (ret_val) ++ goto out; ++ ++ e1000_phy_force_speed_duplex_setup(hw, &phy_data); ++ ++ /* Reset the phy to commit changes. */ ++ phy_data |= MII_CR_RESET; ++ ++ ret_val = e1000_write_phy_reg(hw, PHY_CONTROL, phy_data); ++ if (ret_val) ++ goto out; ++ ++ usec_delay(1); ++ ++ if (hw->phy.autoneg_wait_to_complete) { ++ DEBUGOUT("Waiting for forced speed/duplex link " ++ "on GG82563 phy.\n"); ++ ++ ret_val = e1000_phy_has_link_generic(hw, PHY_FORCE_LIMIT, ++ 100000, &link); ++ if (ret_val) ++ goto out; ++ ++ if (!link) { ++ /* ++ * We didn't get link. ++ * Reset the DSP and cross our fingers. ++ */ ++ ret_val = e1000_phy_reset_dsp_generic(hw); ++ if (ret_val) ++ goto out; ++ } ++ ++ /* Try once more */ ++ ret_val = e1000_phy_has_link_generic(hw, PHY_FORCE_LIMIT, ++ 100000, &link); ++ if (ret_val) ++ goto out; ++ } ++ ++ ret_val = e1000_read_phy_reg(hw, GG82563_PHY_MAC_SPEC_CTRL, &phy_data); ++ if (ret_val) ++ goto out; ++ ++ /* ++ * Resetting the phy means we need to verify the TX_CLK corresponds ++ * to the link speed. 10Mbps -> 2.5MHz, else 25MHz. ++ */ ++ phy_data &= ~GG82563_MSCR_TX_CLK_MASK; ++ if (hw->mac.forced_speed_duplex & E1000_ALL_10_SPEED) ++ phy_data |= GG82563_MSCR_TX_CLK_10MBPS_2_5; ++ else ++ phy_data |= GG82563_MSCR_TX_CLK_100MBPS_25; ++ ++ /* ++ * In addition, we must re-enable CRS on Tx for both half and full ++ * duplex. ++ */ ++ phy_data |= GG82563_MSCR_ASSERT_CRS_ON_TX; ++ ret_val = e1000_write_phy_reg(hw, GG82563_PHY_MAC_SPEC_CTRL, phy_data); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_get_cable_length_80003es2lan - Set approximate cable length ++ * @hw: pointer to the HW structure ++ * ++ * Find the approximate cable length as measured by the GG82563 PHY. ++ * This is a function pointer entry point called by the phy module. ++ **/ ++static s32 e1000_get_cable_length_80003es2lan(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u16 phy_data, index; ++ ++ DEBUGFUNC("e1000_get_cable_length_80003es2lan"); ++ ++ ret_val = e1000_read_phy_reg(hw, GG82563_PHY_DSP_DISTANCE, &phy_data); ++ if (ret_val) ++ goto out; ++ ++ index = phy_data & GG82563_DSPD_CABLE_LENGTH; ++ phy->min_cable_length = e1000_gg82563_cable_length_table[index]; ++ phy->max_cable_length = e1000_gg82563_cable_length_table[index+5]; ++ ++ phy->cable_length = (phy->min_cable_length + phy->max_cable_length) / 2; ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_get_link_up_info_80003es2lan - Report speed and duplex ++ * @hw: pointer to the HW structure ++ * @speed: pointer to speed buffer ++ * @duplex: pointer to duplex buffer ++ * ++ * Retrieve the current speed and duplex configuration. ++ * This is a function pointer entry point called by the api module. ++ **/ ++static s32 e1000_get_link_up_info_80003es2lan(struct e1000_hw *hw, u16 *speed, ++ u16 *duplex) ++{ ++ s32 ret_val; ++ ++ DEBUGFUNC("e1000_get_link_up_info_80003es2lan"); ++ ++ if (hw->phy.media_type == e1000_media_type_copper) { ++ ret_val = e1000_get_speed_and_duplex_copper_generic(hw, ++ speed, ++ duplex); ++ if (ret_val) ++ goto out; ++ if (*speed == SPEED_1000) ++ ret_val = e1000_cfg_kmrn_1000_80003es2lan(hw); ++ else ++ ret_val = e1000_cfg_kmrn_10_100_80003es2lan(hw, ++ *duplex); ++ } else { ++ ret_val = e1000_get_speed_and_duplex_fiber_serdes_generic(hw, ++ speed, ++ duplex); ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_reset_hw_80003es2lan - Reset the ESB2 controller ++ * @hw: pointer to the HW structure ++ * ++ * Perform a global reset to the ESB2 controller. ++ * This is a function pointer entry point called by the api module. ++ **/ ++static s32 e1000_reset_hw_80003es2lan(struct e1000_hw *hw) ++{ ++ u32 ctrl, icr; ++ s32 ret_val; ++ ++ DEBUGFUNC("e1000_reset_hw_80003es2lan"); ++ ++ /* ++ * Prevent the PCI-E bus from sticking if there is no TLP connection ++ * on the last TLP read/write transaction when MAC is reset. ++ */ ++ ret_val = e1000_disable_pcie_master_generic(hw); ++ if (ret_val) { ++ DEBUGOUT("PCI-E Master disable polling has failed.\n"); ++ } ++ ++ DEBUGOUT("Masking off all interrupts\n"); ++ E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff); ++ ++ E1000_WRITE_REG(hw, E1000_RCTL, 0); ++ E1000_WRITE_REG(hw, E1000_TCTL, E1000_TCTL_PSP); ++ E1000_WRITE_FLUSH(hw); ++ ++ msec_delay(10); ++ ++ ctrl = E1000_READ_REG(hw, E1000_CTRL); ++ ++ DEBUGOUT("Issuing a global reset to MAC\n"); ++ E1000_WRITE_REG(hw, E1000_CTRL, ctrl | E1000_CTRL_RST); ++ ++ ret_val = e1000_get_auto_rd_done_generic(hw); ++ if (ret_val) ++ /* We don't want to continue accessing MAC registers. */ ++ goto out; ++ ++ /* Clear any pending interrupt events. */ ++ E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff); ++ icr = E1000_READ_REG(hw, E1000_ICR); ++ ++ ret_val = e1000_check_alt_mac_addr_generic(hw); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_init_hw_80003es2lan - Initialize the ESB2 controller ++ * @hw: pointer to the HW structure ++ * ++ * Initialize the hw bits, LED, VFTA, MTA, link and hw counters. ++ * This is a function pointer entry point called by the api module. ++ **/ ++static s32 e1000_init_hw_80003es2lan(struct e1000_hw *hw) ++{ ++ struct e1000_mac_info *mac = &hw->mac; ++ u32 reg_data; ++ s32 ret_val; ++ u16 i; ++ ++ DEBUGFUNC("e1000_init_hw_80003es2lan"); ++ ++ e1000_initialize_hw_bits_80003es2lan(hw); ++ ++ /* Initialize identification LED */ ++ ret_val = e1000_id_led_init_generic(hw); ++ if (ret_val) { ++ DEBUGOUT("Error initializing identification LED\n"); ++ /* This is not fatal and we should not stop init due to this */ ++ } ++ ++ /* Disabling VLAN filtering */ ++ DEBUGOUT("Initializing the IEEE VLAN\n"); ++ e1000_clear_vfta(hw); ++ ++ /* Setup the receive address. */ ++ e1000_init_rx_addrs_generic(hw, mac->rar_entry_count); ++ ++ /* Zero out the Multicast HASH table */ ++ DEBUGOUT("Zeroing the MTA\n"); ++ for (i = 0; i < mac->mta_reg_count; i++) ++ E1000_WRITE_REG_ARRAY(hw, E1000_MTA, i, 0); ++ ++ /* Setup link and flow control */ ++ ret_val = e1000_setup_link(hw); ++ ++ /* Set the transmit descriptor write-back policy */ ++ reg_data = E1000_READ_REG(hw, E1000_TXDCTL(0)); ++ reg_data = (reg_data & ~E1000_TXDCTL_WTHRESH) | ++ E1000_TXDCTL_FULL_TX_DESC_WB | E1000_TXDCTL_COUNT_DESC; ++ E1000_WRITE_REG(hw, E1000_TXDCTL(0), reg_data); ++ ++ /* ...for both queues. */ ++ reg_data = E1000_READ_REG(hw, E1000_TXDCTL(1)); ++ reg_data = (reg_data & ~E1000_TXDCTL_WTHRESH) | ++ E1000_TXDCTL_FULL_TX_DESC_WB | E1000_TXDCTL_COUNT_DESC; ++ E1000_WRITE_REG(hw, E1000_TXDCTL(1), reg_data); ++ ++ /* Enable retransmit on late collisions */ ++ reg_data = E1000_READ_REG(hw, E1000_TCTL); ++ reg_data |= E1000_TCTL_RTLC; ++ E1000_WRITE_REG(hw, E1000_TCTL, reg_data); ++ ++ /* Configure Gigabit Carry Extend Padding */ ++ reg_data = E1000_READ_REG(hw, E1000_TCTL_EXT); ++ reg_data &= ~E1000_TCTL_EXT_GCEX_MASK; ++ reg_data |= DEFAULT_TCTL_EXT_GCEX_80003ES2LAN; ++ E1000_WRITE_REG(hw, E1000_TCTL_EXT, reg_data); ++ ++ /* Configure Transmit Inter-Packet Gap */ ++ reg_data = E1000_READ_REG(hw, E1000_TIPG); ++ reg_data &= ~E1000_TIPG_IPGT_MASK; ++ reg_data |= DEFAULT_TIPG_IPGT_1000_80003ES2LAN; ++ E1000_WRITE_REG(hw, E1000_TIPG, reg_data); ++ ++ reg_data = E1000_READ_REG_ARRAY(hw, E1000_FFLT, 0x0001); ++ reg_data &= ~0x00100000; ++ E1000_WRITE_REG_ARRAY(hw, E1000_FFLT, 0x0001, reg_data); ++ ++ /* ++ * Clear all of the statistics registers (clear on read). It is ++ * important that we do this after we have tried to establish link ++ * because the symbol error count will increment wildly if there ++ * is no link. ++ */ ++ e1000_clear_hw_cntrs_80003es2lan(hw); ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_initialize_hw_bits_80003es2lan - Init hw bits of ESB2 ++ * @hw: pointer to the HW structure ++ * ++ * Initializes required hardware-dependent bits needed for normal operation. ++ **/ ++static void e1000_initialize_hw_bits_80003es2lan(struct e1000_hw *hw) ++{ ++ u32 reg; ++ ++ DEBUGFUNC("e1000_initialize_hw_bits_80003es2lan"); ++ ++ if (hw->mac.disable_hw_init_bits) ++ goto out; ++ ++ /* Transmit Descriptor Control 0 */ ++ reg = E1000_READ_REG(hw, E1000_TXDCTL(0)); ++ reg |= (1 << 22); ++ E1000_WRITE_REG(hw, E1000_TXDCTL(0), reg); ++ ++ /* Transmit Descriptor Control 1 */ ++ reg = E1000_READ_REG(hw, E1000_TXDCTL(1)); ++ reg |= (1 << 22); ++ E1000_WRITE_REG(hw, E1000_TXDCTL(1), reg); ++ ++ /* Transmit Arbitration Control 0 */ ++ reg = E1000_READ_REG(hw, E1000_TARC(0)); ++ reg &= ~(0xF << 27); /* 30:27 */ ++ if (hw->phy.media_type != e1000_media_type_copper) ++ reg &= ~(1 << 20); ++ E1000_WRITE_REG(hw, E1000_TARC(0), reg); ++ ++ /* Transmit Arbitration Control 1 */ ++ reg = E1000_READ_REG(hw, E1000_TARC(1)); ++ if (E1000_READ_REG(hw, E1000_TCTL) & E1000_TCTL_MULR) ++ reg &= ~(1 << 28); ++ else ++ reg |= (1 << 28); ++ E1000_WRITE_REG(hw, E1000_TARC(1), reg); ++ ++out: ++ return; ++} ++ ++/** ++ * e1000_copper_link_setup_gg82563_80003es2lan - Configure GG82563 Link ++ * @hw: pointer to the HW structure ++ * ++ * Setup some GG82563 PHY registers for obtaining link ++ **/ ++static s32 e1000_copper_link_setup_gg82563_80003es2lan(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u32 ctrl_ext; ++ u16 data; ++ ++ DEBUGFUNC("e1000_copper_link_setup_gg82563_80003es2lan"); ++ ++ if (!phy->reset_disable) { ++ ret_val = e1000_read_phy_reg(hw, GG82563_PHY_MAC_SPEC_CTRL, ++ &data); ++ if (ret_val) ++ goto out; ++ ++ data |= GG82563_MSCR_ASSERT_CRS_ON_TX; ++ /* Use 25MHz for both link down and 1000Base-T for Tx clock. */ ++ data |= GG82563_MSCR_TX_CLK_1000MBPS_25; ++ ++ ret_val = e1000_write_phy_reg(hw, GG82563_PHY_MAC_SPEC_CTRL, ++ data); ++ if (ret_val) ++ goto out; ++ ++ /* ++ * Options: ++ * MDI/MDI-X = 0 (default) ++ * 0 - Auto for all speeds ++ * 1 - MDI mode ++ * 2 - MDI-X mode ++ * 3 - Auto for 1000Base-T only (MDI-X for 10/100Base-T modes) ++ */ ++ ret_val = e1000_read_phy_reg(hw, GG82563_PHY_SPEC_CTRL, &data); ++ if (ret_val) ++ goto out; ++ ++ data &= ~GG82563_PSCR_CROSSOVER_MODE_MASK; ++ ++ switch (phy->mdix) { ++ case 1: ++ data |= GG82563_PSCR_CROSSOVER_MODE_MDI; ++ break; ++ case 2: ++ data |= GG82563_PSCR_CROSSOVER_MODE_MDIX; ++ break; ++ case 0: ++ default: ++ data |= GG82563_PSCR_CROSSOVER_MODE_AUTO; ++ break; ++ } ++ ++ /* ++ * Options: ++ * disable_polarity_correction = 0 (default) ++ * Automatic Correction for Reversed Cable Polarity ++ * 0 - Disabled ++ * 1 - Enabled ++ */ ++ data &= ~GG82563_PSCR_POLARITY_REVERSAL_DISABLE; ++ if (phy->disable_polarity_correction) ++ data |= GG82563_PSCR_POLARITY_REVERSAL_DISABLE; ++ ++ ret_val = e1000_write_phy_reg(hw, GG82563_PHY_SPEC_CTRL, data); ++ if (ret_val) ++ goto out; ++ ++ /* SW Reset the PHY so all changes take effect */ ++ ret_val = e1000_phy_commit(hw); ++ if (ret_val) { ++ DEBUGOUT("Error Resetting the PHY\n"); ++ goto out; ++ } ++ ++ } ++ ++ /* Bypass Rx and Tx FIFO's */ ++ ret_val = e1000_write_kmrn_reg(hw, ++ E1000_KMRNCTRLSTA_OFFSET_FIFO_CTRL, ++ E1000_KMRNCTRLSTA_FIFO_CTRL_RX_BYPASS | ++ E1000_KMRNCTRLSTA_FIFO_CTRL_TX_BYPASS); ++ if (ret_val) ++ goto out; ++ ++ ret_val = e1000_read_kmrn_reg(hw, ++ E1000_KMRNCTRLSTA_OFFSET_MAC2PHY_OPMODE, ++ &data); ++ if (ret_val) ++ goto out; ++ data |= E1000_KMRNCTRLSTA_OPMODE_E_IDLE; ++ ret_val = e1000_write_kmrn_reg(hw, ++ E1000_KMRNCTRLSTA_OFFSET_MAC2PHY_OPMODE, ++ data); ++ if (ret_val) ++ goto out; ++ ++ ret_val = e1000_read_phy_reg(hw, GG82563_PHY_SPEC_CTRL_2, &data); ++ if (ret_val) ++ goto out; ++ ++ data &= ~GG82563_PSCR2_REVERSE_AUTO_NEG; ++ ret_val = e1000_write_phy_reg(hw, GG82563_PHY_SPEC_CTRL_2, data); ++ if (ret_val) ++ goto out; ++ ++ ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT); ++ ctrl_ext &= ~(E1000_CTRL_EXT_LINK_MODE_MASK); ++ E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext); ++ ++ ret_val = e1000_read_phy_reg(hw, GG82563_PHY_PWR_MGMT_CTRL, &data); ++ if (ret_val) ++ goto out; ++ ++ /* ++ * Do not init these registers when the HW is in IAMT mode, since the ++ * firmware will have already initialized them. We only initialize ++ * them if the HW is not in IAMT mode. ++ */ ++ if (!(e1000_check_mng_mode(hw))) { ++ /* Enable Electrical Idle on the PHY */ ++ data |= GG82563_PMCR_ENABLE_ELECTRICAL_IDLE; ++ ret_val = e1000_write_phy_reg(hw, ++ GG82563_PHY_PWR_MGMT_CTRL, ++ data); ++ if (ret_val) ++ goto out; ++ ++ ret_val = e1000_read_phy_reg(hw, ++ GG82563_PHY_KMRN_MODE_CTRL, ++ &data); ++ if (ret_val) ++ goto out; ++ ++ data &= ~GG82563_KMCR_PASS_FALSE_CARRIER; ++ ret_val = e1000_write_phy_reg(hw, ++ GG82563_PHY_KMRN_MODE_CTRL, ++ data); ++ ++ if (ret_val) ++ goto out; ++ } ++ ++ /* ++ * Workaround: Disable padding in Kumeran interface in the MAC ++ * and in the PHY to avoid CRC errors. ++ */ ++ ret_val = e1000_read_phy_reg(hw, GG82563_PHY_INBAND_CTRL, &data); ++ if (ret_val) ++ goto out; ++ ++ data |= GG82563_ICR_DIS_PADDING; ++ ret_val = e1000_write_phy_reg(hw, GG82563_PHY_INBAND_CTRL, data); ++ if (ret_val) ++ goto out; ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_setup_copper_link_80003es2lan - Setup Copper Link for ESB2 ++ * @hw: pointer to the HW structure ++ * ++ * Essentially a wrapper for setting up all things "copper" related. ++ * This is a function pointer entry point called by the mac module. ++ **/ ++static s32 e1000_setup_copper_link_80003es2lan(struct e1000_hw *hw) ++{ ++ u32 ctrl; ++ s32 ret_val; ++ u16 reg_data; ++ ++ DEBUGFUNC("e1000_setup_copper_link_80003es2lan"); ++ ++ ctrl = E1000_READ_REG(hw, E1000_CTRL); ++ ctrl |= E1000_CTRL_SLU; ++ ctrl &= ~(E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX); ++ E1000_WRITE_REG(hw, E1000_CTRL, ctrl); ++ ++ /* ++ * Set the mac to wait the maximum time between each ++ * iteration and increase the max iterations when ++ * polling the phy; this fixes erroneous timeouts at 10Mbps. ++ */ ++ ret_val = e1000_write_kmrn_reg(hw, GG82563_REG(0x34, 4), 0xFFFF); ++ if (ret_val) ++ goto out; ++ ret_val = e1000_read_kmrn_reg(hw, GG82563_REG(0x34, 9), ®_data); ++ if (ret_val) ++ goto out; ++ reg_data |= 0x3F; ++ ret_val = e1000_write_kmrn_reg(hw, GG82563_REG(0x34, 9), reg_data); ++ if (ret_val) ++ goto out; ++ ret_val = e1000_read_kmrn_reg(hw, ++ E1000_KMRNCTRLSTA_OFFSET_INB_CTRL, ++ ®_data); ++ if (ret_val) ++ goto out; ++ reg_data |= E1000_KMRNCTRLSTA_INB_CTRL_DIS_PADDING; ++ ret_val = e1000_write_kmrn_reg(hw, ++ E1000_KMRNCTRLSTA_OFFSET_INB_CTRL, ++ reg_data); ++ if (ret_val) ++ goto out; ++ ++ ret_val = e1000_copper_link_setup_gg82563_80003es2lan(hw); ++ if (ret_val) ++ goto out; ++ ++ ret_val = e1000_setup_copper_link_generic(hw); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_cfg_kmrn_10_100_80003es2lan - Apply "quirks" for 10/100 operation ++ * @hw: pointer to the HW structure ++ * @duplex: current duplex setting ++ * ++ * Configure the KMRN interface by applying last minute quirks for ++ * 10/100 operation. ++ **/ ++static s32 e1000_cfg_kmrn_10_100_80003es2lan(struct e1000_hw *hw, u16 duplex) ++{ ++ s32 ret_val = E1000_SUCCESS; ++ u32 tipg; ++ u32 i = 0; ++ u16 reg_data, reg_data2; ++ ++ DEBUGFUNC("e1000_configure_kmrn_for_10_100"); ++ ++ reg_data = E1000_KMRNCTRLSTA_HD_CTRL_10_100_DEFAULT; ++ ret_val = e1000_write_kmrn_reg(hw, ++ E1000_KMRNCTRLSTA_OFFSET_HD_CTRL, ++ reg_data); ++ if (ret_val) ++ goto out; ++ ++ /* Configure Transmit Inter-Packet Gap */ ++ tipg = E1000_READ_REG(hw, E1000_TIPG); ++ tipg &= ~E1000_TIPG_IPGT_MASK; ++ tipg |= DEFAULT_TIPG_IPGT_10_100_80003ES2LAN; ++ E1000_WRITE_REG(hw, E1000_TIPG, tipg); ++ ++ ++ do { ++ ret_val = e1000_read_phy_reg(hw, GG82563_PHY_KMRN_MODE_CTRL, ++ ®_data); ++ if (ret_val) ++ goto out; ++ ++ ret_val = e1000_read_phy_reg(hw, GG82563_PHY_KMRN_MODE_CTRL, ++ ®_data2); ++ if (ret_val) ++ goto out; ++ i++; ++ } while ((reg_data != reg_data2) && (i < GG82563_MAX_KMRN_RETRY)); ++ ++ if (duplex == HALF_DUPLEX) ++ reg_data |= GG82563_KMCR_PASS_FALSE_CARRIER; ++ else ++ reg_data &= ~GG82563_KMCR_PASS_FALSE_CARRIER; ++ ++ ret_val = e1000_write_phy_reg(hw, GG82563_PHY_KMRN_MODE_CTRL, reg_data); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_cfg_kmrn_1000_80003es2lan - Apply "quirks" for gigabit operation ++ * @hw: pointer to the HW structure ++ * ++ * Configure the KMRN interface by applying last minute quirks for ++ * gigabit operation. ++ **/ ++static s32 e1000_cfg_kmrn_1000_80003es2lan(struct e1000_hw *hw) ++{ ++ s32 ret_val = E1000_SUCCESS; ++ u16 reg_data, reg_data2; ++ u32 tipg; ++ u32 i = 0; ++ ++ DEBUGFUNC("e1000_configure_kmrn_for_1000"); ++ ++ reg_data = E1000_KMRNCTRLSTA_HD_CTRL_1000_DEFAULT; ++ ret_val = e1000_write_kmrn_reg(hw, ++ E1000_KMRNCTRLSTA_OFFSET_HD_CTRL, ++ reg_data); ++ if (ret_val) ++ goto out; ++ ++ /* Configure Transmit Inter-Packet Gap */ ++ tipg = E1000_READ_REG(hw, E1000_TIPG); ++ tipg &= ~E1000_TIPG_IPGT_MASK; ++ tipg |= DEFAULT_TIPG_IPGT_1000_80003ES2LAN; ++ E1000_WRITE_REG(hw, E1000_TIPG, tipg); ++ ++ ++ do { ++ ret_val = e1000_read_phy_reg(hw, GG82563_PHY_KMRN_MODE_CTRL, ++ ®_data); ++ if (ret_val) ++ goto out; ++ ++ ret_val = e1000_read_phy_reg(hw, GG82563_PHY_KMRN_MODE_CTRL, ++ ®_data2); ++ if (ret_val) ++ goto out; ++ i++; ++ } while ((reg_data != reg_data2) && (i < GG82563_MAX_KMRN_RETRY)); ++ ++ reg_data &= ~GG82563_KMCR_PASS_FALSE_CARRIER; ++ ret_val = e1000_write_phy_reg(hw, GG82563_PHY_KMRN_MODE_CTRL, reg_data); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_read_mac_addr_80003es2lan - Read device MAC address ++ * @hw: pointer to the HW structure ++ **/ ++static s32 e1000_read_mac_addr_80003es2lan(struct e1000_hw *hw) ++{ ++ s32 ret_val = E1000_SUCCESS; ++ ++ DEBUGFUNC("e1000_read_mac_addr_80003es2lan"); ++ if (e1000_check_alt_mac_addr_generic(hw)) ++ ret_val = e1000_read_mac_addr_generic(hw); ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_power_down_phy_copper_80003es2lan - Remove link during PHY power down ++ * @hw: pointer to the HW structure ++ * ++ * In the case of a PHY power down to save power, or to turn off link during a ++ * driver unload, or wake on lan is not enabled, remove the link. ++ **/ ++static void e1000_power_down_phy_copper_80003es2lan(struct e1000_hw *hw) ++{ ++ /* If the management interface is not enabled, then power down */ ++ if (!(e1000_check_mng_mode(hw) || e1000_check_reset_block(hw))) ++ e1000_power_down_phy_copper(hw); ++ ++ return; ++} ++ ++/** ++ * e1000_clear_hw_cntrs_80003es2lan - Clear device specific hardware counters ++ * @hw: pointer to the HW structure ++ * ++ * Clears the hardware counters by reading the counter registers. ++ **/ ++static void e1000_clear_hw_cntrs_80003es2lan(struct e1000_hw *hw) ++{ ++ volatile u32 temp; ++ ++ DEBUGFUNC("e1000_clear_hw_cntrs_80003es2lan"); ++ ++ e1000_clear_hw_cntrs_base_generic(hw); ++ ++ temp = E1000_READ_REG(hw, E1000_PRC64); ++ temp = E1000_READ_REG(hw, E1000_PRC127); ++ temp = E1000_READ_REG(hw, E1000_PRC255); ++ temp = E1000_READ_REG(hw, E1000_PRC511); ++ temp = E1000_READ_REG(hw, E1000_PRC1023); ++ temp = E1000_READ_REG(hw, E1000_PRC1522); ++ temp = E1000_READ_REG(hw, E1000_PTC64); ++ temp = E1000_READ_REG(hw, E1000_PTC127); ++ temp = E1000_READ_REG(hw, E1000_PTC255); ++ temp = E1000_READ_REG(hw, E1000_PTC511); ++ temp = E1000_READ_REG(hw, E1000_PTC1023); ++ temp = E1000_READ_REG(hw, E1000_PTC1522); ++ ++ temp = E1000_READ_REG(hw, E1000_ALGNERRC); ++ temp = E1000_READ_REG(hw, E1000_RXERRC); ++ temp = E1000_READ_REG(hw, E1000_TNCRS); ++ temp = E1000_READ_REG(hw, E1000_CEXTERR); ++ temp = E1000_READ_REG(hw, E1000_TSCTC); ++ temp = E1000_READ_REG(hw, E1000_TSCTFC); ++ ++ temp = E1000_READ_REG(hw, E1000_MGTPRC); ++ temp = E1000_READ_REG(hw, E1000_MGTPDC); ++ temp = E1000_READ_REG(hw, E1000_MGTPTC); ++ ++ temp = E1000_READ_REG(hw, E1000_IAC); ++ temp = E1000_READ_REG(hw, E1000_ICRXOC); ++ ++ temp = E1000_READ_REG(hw, E1000_ICRXPTC); ++ temp = E1000_READ_REG(hw, E1000_ICRXATC); ++ temp = E1000_READ_REG(hw, E1000_ICTXPTC); ++ temp = E1000_READ_REG(hw, E1000_ICTXATC); ++ temp = E1000_READ_REG(hw, E1000_ICTXQEC); ++ temp = E1000_READ_REG(hw, E1000_ICTXQMTC); ++ temp = E1000_READ_REG(hw, E1000_ICRXDMTC); ++} +--- linux/drivers/xenomai/net/drivers/experimental/e1000/e1000_nvm.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/experimental/e1000/e1000_nvm.c 2021-04-07 16:01:27.682633516 +0800 +@@ -0,0 +1,893 @@ ++/******************************************************************************* ++ ++ Intel PRO/1000 Linux driver ++ Copyright(c) 1999 - 2008 Intel Corporation. ++ ++ This program is free software; you can redistribute it and/or modify it ++ under the terms and conditions of the GNU General Public License, ++ version 2, as published by the Free Software Foundation. ++ ++ This program is distributed in the hope it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ You should have received a copy of the GNU General Public License along with ++ this program; if not, write to the Free Software Foundation, Inc., ++ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. ++ ++ The full GNU General Public License is included in this distribution in ++ the file called "COPYING". ++ ++ Contact Information: ++ Linux NICS ++ e1000-devel Mailing List ++ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 ++ ++*******************************************************************************/ ++ ++#include "e1000_api.h" ++#include "e1000_nvm.h" ++ ++/** ++ * e1000_raise_eec_clk - Raise EEPROM clock ++ * @hw: pointer to the HW structure ++ * @eecd: pointer to the EEPROM ++ * ++ * Enable/Raise the EEPROM clock bit. ++ **/ ++static void e1000_raise_eec_clk(struct e1000_hw *hw, u32 *eecd) ++{ ++ *eecd = *eecd | E1000_EECD_SK; ++ E1000_WRITE_REG(hw, E1000_EECD, *eecd); ++ E1000_WRITE_FLUSH(hw); ++ usec_delay(hw->nvm.delay_usec); ++} ++ ++/** ++ * e1000_lower_eec_clk - Lower EEPROM clock ++ * @hw: pointer to the HW structure ++ * @eecd: pointer to the EEPROM ++ * ++ * Clear/Lower the EEPROM clock bit. ++ **/ ++static void e1000_lower_eec_clk(struct e1000_hw *hw, u32 *eecd) ++{ ++ *eecd = *eecd & ~E1000_EECD_SK; ++ E1000_WRITE_REG(hw, E1000_EECD, *eecd); ++ E1000_WRITE_FLUSH(hw); ++ usec_delay(hw->nvm.delay_usec); ++} ++ ++/** ++ * e1000_shift_out_eec_bits - Shift data bits our to the EEPROM ++ * @hw: pointer to the HW structure ++ * @data: data to send to the EEPROM ++ * @count: number of bits to shift out ++ * ++ * We need to shift 'count' bits out to the EEPROM. So, the value in the ++ * "data" parameter will be shifted out to the EEPROM one bit at a time. ++ * In order to do this, "data" must be broken down into bits. ++ **/ ++static void e1000_shift_out_eec_bits(struct e1000_hw *hw, u16 data, u16 count) ++{ ++ struct e1000_nvm_info *nvm = &hw->nvm; ++ u32 eecd = E1000_READ_REG(hw, E1000_EECD); ++ u32 mask; ++ ++ DEBUGFUNC("e1000_shift_out_eec_bits"); ++ ++ mask = 0x01 << (count - 1); ++ if (nvm->type == e1000_nvm_eeprom_microwire) ++ eecd &= ~E1000_EECD_DO; ++ else if (nvm->type == e1000_nvm_eeprom_spi) ++ eecd |= E1000_EECD_DO; ++ ++ do { ++ eecd &= ~E1000_EECD_DI; ++ ++ if (data & mask) ++ eecd |= E1000_EECD_DI; ++ ++ E1000_WRITE_REG(hw, E1000_EECD, eecd); ++ E1000_WRITE_FLUSH(hw); ++ ++ usec_delay(nvm->delay_usec); ++ ++ e1000_raise_eec_clk(hw, &eecd); ++ e1000_lower_eec_clk(hw, &eecd); ++ ++ mask >>= 1; ++ } while (mask); ++ ++ eecd &= ~E1000_EECD_DI; ++ E1000_WRITE_REG(hw, E1000_EECD, eecd); ++} ++ ++/** ++ * e1000_shift_in_eec_bits - Shift data bits in from the EEPROM ++ * @hw: pointer to the HW structure ++ * @count: number of bits to shift in ++ * ++ * In order to read a register from the EEPROM, we need to shift 'count' bits ++ * in from the EEPROM. Bits are "shifted in" by raising the clock input to ++ * the EEPROM (setting the SK bit), and then reading the value of the data out ++ * "DO" bit. During this "shifting in" process the data in "DI" bit should ++ * always be clear. ++ **/ ++static u16 e1000_shift_in_eec_bits(struct e1000_hw *hw, u16 count) ++{ ++ u32 eecd; ++ u32 i; ++ u16 data; ++ ++ DEBUGFUNC("e1000_shift_in_eec_bits"); ++ ++ eecd = E1000_READ_REG(hw, E1000_EECD); ++ ++ eecd &= ~(E1000_EECD_DO | E1000_EECD_DI); ++ data = 0; ++ ++ for (i = 0; i < count; i++) { ++ data <<= 1; ++ e1000_raise_eec_clk(hw, &eecd); ++ ++ eecd = E1000_READ_REG(hw, E1000_EECD); ++ ++ eecd &= ~E1000_EECD_DI; ++ if (eecd & E1000_EECD_DO) ++ data |= 1; ++ ++ e1000_lower_eec_clk(hw, &eecd); ++ } ++ ++ return data; ++} ++ ++/** ++ * e1000_poll_eerd_eewr_done - Poll for EEPROM read/write completion ++ * @hw: pointer to the HW structure ++ * @ee_reg: EEPROM flag for polling ++ * ++ * Polls the EEPROM status bit for either read or write completion based ++ * upon the value of 'ee_reg'. ++ **/ ++s32 e1000_poll_eerd_eewr_done(struct e1000_hw *hw, int ee_reg) ++{ ++ u32 attempts = 100000; ++ u32 i, reg = 0; ++ s32 ret_val = -E1000_ERR_NVM; ++ ++ DEBUGFUNC("e1000_poll_eerd_eewr_done"); ++ ++ for (i = 0; i < attempts; i++) { ++ if (ee_reg == E1000_NVM_POLL_READ) ++ reg = E1000_READ_REG(hw, E1000_EERD); ++ else ++ reg = E1000_READ_REG(hw, E1000_EEWR); ++ ++ if (reg & E1000_NVM_RW_REG_DONE) { ++ ret_val = E1000_SUCCESS; ++ break; ++ } ++ ++ usec_delay(5); ++ } ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_acquire_nvm_generic - Generic request for access to EEPROM ++ * @hw: pointer to the HW structure ++ * ++ * Set the EEPROM access request bit and wait for EEPROM access grant bit. ++ * Return successful if access grant bit set, else clear the request for ++ * EEPROM access and return -E1000_ERR_NVM (-1). ++ **/ ++s32 e1000_acquire_nvm_generic(struct e1000_hw *hw) ++{ ++ u32 eecd = E1000_READ_REG(hw, E1000_EECD); ++ s32 timeout = E1000_NVM_GRANT_ATTEMPTS; ++ s32 ret_val = E1000_SUCCESS; ++ ++ DEBUGFUNC("e1000_acquire_nvm_generic"); ++ ++ E1000_WRITE_REG(hw, E1000_EECD, eecd | E1000_EECD_REQ); ++ eecd = E1000_READ_REG(hw, E1000_EECD); ++ ++ while (timeout) { ++ if (eecd & E1000_EECD_GNT) ++ break; ++ usec_delay(5); ++ eecd = E1000_READ_REG(hw, E1000_EECD); ++ timeout--; ++ } ++ ++ if (!timeout) { ++ eecd &= ~E1000_EECD_REQ; ++ E1000_WRITE_REG(hw, E1000_EECD, eecd); ++ DEBUGOUT("Could not acquire NVM grant\n"); ++ ret_val = -E1000_ERR_NVM; ++ } ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_standby_nvm - Return EEPROM to standby state ++ * @hw: pointer to the HW structure ++ * ++ * Return the EEPROM to a standby state. ++ **/ ++static void e1000_standby_nvm(struct e1000_hw *hw) ++{ ++ struct e1000_nvm_info *nvm = &hw->nvm; ++ u32 eecd = E1000_READ_REG(hw, E1000_EECD); ++ ++ DEBUGFUNC("e1000_standby_nvm"); ++ ++ if (nvm->type == e1000_nvm_eeprom_microwire) { ++ eecd &= ~(E1000_EECD_CS | E1000_EECD_SK); ++ E1000_WRITE_REG(hw, E1000_EECD, eecd); ++ E1000_WRITE_FLUSH(hw); ++ usec_delay(nvm->delay_usec); ++ ++ e1000_raise_eec_clk(hw, &eecd); ++ ++ /* Select EEPROM */ ++ eecd |= E1000_EECD_CS; ++ E1000_WRITE_REG(hw, E1000_EECD, eecd); ++ E1000_WRITE_FLUSH(hw); ++ usec_delay(nvm->delay_usec); ++ ++ e1000_lower_eec_clk(hw, &eecd); ++ } else if (nvm->type == e1000_nvm_eeprom_spi) { ++ /* Toggle CS to flush commands */ ++ eecd |= E1000_EECD_CS; ++ E1000_WRITE_REG(hw, E1000_EECD, eecd); ++ E1000_WRITE_FLUSH(hw); ++ usec_delay(nvm->delay_usec); ++ eecd &= ~E1000_EECD_CS; ++ E1000_WRITE_REG(hw, E1000_EECD, eecd); ++ E1000_WRITE_FLUSH(hw); ++ usec_delay(nvm->delay_usec); ++ } ++} ++ ++/** ++ * e1000_stop_nvm - Terminate EEPROM command ++ * @hw: pointer to the HW structure ++ * ++ * Terminates the current command by inverting the EEPROM's chip select pin. ++ **/ ++void e1000_stop_nvm(struct e1000_hw *hw) ++{ ++ u32 eecd; ++ ++ DEBUGFUNC("e1000_stop_nvm"); ++ ++ eecd = E1000_READ_REG(hw, E1000_EECD); ++ if (hw->nvm.type == e1000_nvm_eeprom_spi) { ++ /* Pull CS high */ ++ eecd |= E1000_EECD_CS; ++ e1000_lower_eec_clk(hw, &eecd); ++ } else if (hw->nvm.type == e1000_nvm_eeprom_microwire) { ++ /* CS on Microcwire is active-high */ ++ eecd &= ~(E1000_EECD_CS | E1000_EECD_DI); ++ E1000_WRITE_REG(hw, E1000_EECD, eecd); ++ e1000_raise_eec_clk(hw, &eecd); ++ e1000_lower_eec_clk(hw, &eecd); ++ } ++} ++ ++/** ++ * e1000_release_nvm_generic - Release exclusive access to EEPROM ++ * @hw: pointer to the HW structure ++ * ++ * Stop any current commands to the EEPROM and clear the EEPROM request bit. ++ **/ ++void e1000_release_nvm_generic(struct e1000_hw *hw) ++{ ++ u32 eecd; ++ ++ DEBUGFUNC("e1000_release_nvm_generic"); ++ ++ e1000_stop_nvm(hw); ++ ++ eecd = E1000_READ_REG(hw, E1000_EECD); ++ eecd &= ~E1000_EECD_REQ; ++ E1000_WRITE_REG(hw, E1000_EECD, eecd); ++} ++ ++/** ++ * e1000_ready_nvm_eeprom - Prepares EEPROM for read/write ++ * @hw: pointer to the HW structure ++ * ++ * Setups the EEPROM for reading and writing. ++ **/ ++static s32 e1000_ready_nvm_eeprom(struct e1000_hw *hw) ++{ ++ struct e1000_nvm_info *nvm = &hw->nvm; ++ u32 eecd = E1000_READ_REG(hw, E1000_EECD); ++ s32 ret_val = E1000_SUCCESS; ++ u16 timeout = 0; ++ u8 spi_stat_reg; ++ ++ DEBUGFUNC("e1000_ready_nvm_eeprom"); ++ ++ if (nvm->type == e1000_nvm_eeprom_microwire) { ++ /* Clear SK and DI */ ++ eecd &= ~(E1000_EECD_DI | E1000_EECD_SK); ++ E1000_WRITE_REG(hw, E1000_EECD, eecd); ++ /* Set CS */ ++ eecd |= E1000_EECD_CS; ++ E1000_WRITE_REG(hw, E1000_EECD, eecd); ++ } else if (nvm->type == e1000_nvm_eeprom_spi) { ++ /* Clear SK and CS */ ++ eecd &= ~(E1000_EECD_CS | E1000_EECD_SK); ++ E1000_WRITE_REG(hw, E1000_EECD, eecd); ++ usec_delay(1); ++ timeout = NVM_MAX_RETRY_SPI; ++ ++ /* ++ * Read "Status Register" repeatedly until the LSB is cleared. ++ * The EEPROM will signal that the command has been completed ++ * by clearing bit 0 of the internal status register. If it's ++ * not cleared within 'timeout', then error out. ++ */ ++ while (timeout) { ++ e1000_shift_out_eec_bits(hw, NVM_RDSR_OPCODE_SPI, ++ hw->nvm.opcode_bits); ++ spi_stat_reg = (u8)e1000_shift_in_eec_bits(hw, 8); ++ if (!(spi_stat_reg & NVM_STATUS_RDY_SPI)) ++ break; ++ ++ usec_delay(5); ++ e1000_standby_nvm(hw); ++ timeout--; ++ } ++ ++ if (!timeout) { ++ DEBUGOUT("SPI NVM Status error\n"); ++ ret_val = -E1000_ERR_NVM; ++ goto out; ++ } ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_read_nvm_spi - Read EEPROM's using SPI ++ * @hw: pointer to the HW structure ++ * @offset: offset of word in the EEPROM to read ++ * @words: number of words to read ++ * @data: word read from the EEPROM ++ * ++ * Reads a 16 bit word from the EEPROM. ++ **/ ++s32 e1000_read_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) ++{ ++ struct e1000_nvm_info *nvm = &hw->nvm; ++ u32 i = 0; ++ s32 ret_val; ++ u16 word_in; ++ u8 read_opcode = NVM_READ_OPCODE_SPI; ++ ++ DEBUGFUNC("e1000_read_nvm_spi"); ++ ++ /* ++ * A check for invalid values: offset too large, too many words, ++ * and not enough words. ++ */ ++ if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) || ++ (words == 0)) { ++ DEBUGOUT("nvm parameter(s) out of bounds\n"); ++ ret_val = -E1000_ERR_NVM; ++ goto out; ++ } ++ ++ ret_val = e1000_acquire_nvm(hw); ++ if (ret_val) ++ goto out; ++ ++ ret_val = e1000_ready_nvm_eeprom(hw); ++ if (ret_val) ++ goto release; ++ ++ e1000_standby_nvm(hw); ++ ++ if ((nvm->address_bits == 8) && (offset >= 128)) ++ read_opcode |= NVM_A8_OPCODE_SPI; ++ ++ /* Send the READ command (opcode + addr) */ ++ e1000_shift_out_eec_bits(hw, read_opcode, nvm->opcode_bits); ++ e1000_shift_out_eec_bits(hw, (u16)(offset*2), nvm->address_bits); ++ ++ /* ++ * Read the data. SPI NVMs increment the address with each byte ++ * read and will roll over if reading beyond the end. This allows ++ * us to read the whole NVM from any offset ++ */ ++ for (i = 0; i < words; i++) { ++ word_in = e1000_shift_in_eec_bits(hw, 16); ++ data[i] = (word_in >> 8) | (word_in << 8); ++ } ++ ++release: ++ e1000_release_nvm(hw); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_read_nvm_microwire - Reads EEPROM's using microwire ++ * @hw: pointer to the HW structure ++ * @offset: offset of word in the EEPROM to read ++ * @words: number of words to read ++ * @data: word read from the EEPROM ++ * ++ * Reads a 16 bit word from the EEPROM. ++ **/ ++s32 e1000_read_nvm_microwire(struct e1000_hw *hw, u16 offset, u16 words, ++ u16 *data) ++{ ++ struct e1000_nvm_info *nvm = &hw->nvm; ++ u32 i = 0; ++ s32 ret_val; ++ u8 read_opcode = NVM_READ_OPCODE_MICROWIRE; ++ ++ DEBUGFUNC("e1000_read_nvm_microwire"); ++ ++ /* ++ * A check for invalid values: offset too large, too many words, ++ * and not enough words. ++ */ ++ if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) || ++ (words == 0)) { ++ DEBUGOUT("nvm parameter(s) out of bounds\n"); ++ ret_val = -E1000_ERR_NVM; ++ goto out; ++ } ++ ++ ret_val = e1000_acquire_nvm(hw); ++ if (ret_val) ++ goto out; ++ ++ ret_val = e1000_ready_nvm_eeprom(hw); ++ if (ret_val) ++ goto release; ++ ++ for (i = 0; i < words; i++) { ++ /* Send the READ command (opcode + addr) */ ++ e1000_shift_out_eec_bits(hw, read_opcode, nvm->opcode_bits); ++ e1000_shift_out_eec_bits(hw, (u16)(offset + i), ++ nvm->address_bits); ++ ++ /* ++ * Read the data. For microwire, each word requires the ++ * overhead of setup and tear-down. ++ */ ++ data[i] = e1000_shift_in_eec_bits(hw, 16); ++ e1000_standby_nvm(hw); ++ } ++ ++release: ++ e1000_release_nvm(hw); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_read_nvm_eerd - Reads EEPROM using EERD register ++ * @hw: pointer to the HW structure ++ * @offset: offset of word in the EEPROM to read ++ * @words: number of words to read ++ * @data: word read from the EEPROM ++ * ++ * Reads a 16 bit word from the EEPROM using the EERD register. ++ **/ ++s32 e1000_read_nvm_eerd(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) ++{ ++ struct e1000_nvm_info *nvm = &hw->nvm; ++ u32 i, eerd = 0; ++ s32 ret_val = E1000_SUCCESS; ++ ++ DEBUGFUNC("e1000_read_nvm_eerd"); ++ ++ /* ++ * A check for invalid values: offset too large, too many words, ++ * too many words for the offset, and not enough words. ++ */ ++ if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) || ++ (words == 0)) { ++ DEBUGOUT("nvm parameter(s) out of bounds\n"); ++ ret_val = -E1000_ERR_NVM; ++ goto out; ++ } ++ ++ for (i = 0; i < words; i++) { ++ eerd = ((offset+i) << E1000_NVM_RW_ADDR_SHIFT) + ++ E1000_NVM_RW_REG_START; ++ ++ E1000_WRITE_REG(hw, E1000_EERD, eerd); ++ ret_val = e1000_poll_eerd_eewr_done(hw, E1000_NVM_POLL_READ); ++ if (ret_val) ++ break; ++ ++ data[i] = (E1000_READ_REG(hw, E1000_EERD) >> ++ E1000_NVM_RW_REG_DATA); ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_write_nvm_spi - Write to EEPROM using SPI ++ * @hw: pointer to the HW structure ++ * @offset: offset within the EEPROM to be written to ++ * @words: number of words to write ++ * @data: 16 bit word(s) to be written to the EEPROM ++ * ++ * Writes data to EEPROM at offset using SPI interface. ++ * ++ * If e1000_update_nvm_checksum is not called after this function , the ++ * EEPROM will most likley contain an invalid checksum. ++ **/ ++s32 e1000_write_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) ++{ ++ struct e1000_nvm_info *nvm = &hw->nvm; ++ s32 ret_val; ++ u16 widx = 0; ++ ++ DEBUGFUNC("e1000_write_nvm_spi"); ++ ++ /* ++ * A check for invalid values: offset too large, too many words, ++ * and not enough words. ++ */ ++ if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) || ++ (words == 0)) { ++ DEBUGOUT("nvm parameter(s) out of bounds\n"); ++ ret_val = -E1000_ERR_NVM; ++ goto out; ++ } ++ ++ ret_val = e1000_acquire_nvm(hw); ++ if (ret_val) ++ goto out; ++ ++ msec_delay(10); ++ ++ while (widx < words) { ++ u8 write_opcode = NVM_WRITE_OPCODE_SPI; ++ ++ ret_val = e1000_ready_nvm_eeprom(hw); ++ if (ret_val) ++ goto release; ++ ++ e1000_standby_nvm(hw); ++ ++ /* Send the WRITE ENABLE command (8 bit opcode) */ ++ e1000_shift_out_eec_bits(hw, NVM_WREN_OPCODE_SPI, ++ nvm->opcode_bits); ++ ++ e1000_standby_nvm(hw); ++ ++ /* ++ * Some SPI eeproms use the 8th address bit embedded in the ++ * opcode ++ */ ++ if ((nvm->address_bits == 8) && (offset >= 128)) ++ write_opcode |= NVM_A8_OPCODE_SPI; ++ ++ /* Send the Write command (8-bit opcode + addr) */ ++ e1000_shift_out_eec_bits(hw, write_opcode, nvm->opcode_bits); ++ e1000_shift_out_eec_bits(hw, (u16)((offset + widx) * 2), ++ nvm->address_bits); ++ ++ /* Loop to allow for up to whole page write of eeprom */ ++ while (widx < words) { ++ u16 word_out = data[widx]; ++ word_out = (word_out >> 8) | (word_out << 8); ++ e1000_shift_out_eec_bits(hw, word_out, 16); ++ widx++; ++ ++ if ((((offset + widx) * 2) % nvm->page_size) == 0) { ++ e1000_standby_nvm(hw); ++ break; ++ } ++ } ++ } ++ ++ msec_delay(10); ++release: ++ e1000_release_nvm(hw); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_write_nvm_microwire - Writes EEPROM using microwire ++ * @hw: pointer to the HW structure ++ * @offset: offset within the EEPROM to be written to ++ * @words: number of words to write ++ * @data: 16 bit word(s) to be written to the EEPROM ++ * ++ * Writes data to EEPROM at offset using microwire interface. ++ * ++ * If e1000_update_nvm_checksum is not called after this function , the ++ * EEPROM will most likley contain an invalid checksum. ++ **/ ++s32 e1000_write_nvm_microwire(struct e1000_hw *hw, u16 offset, u16 words, ++ u16 *data) ++{ ++ struct e1000_nvm_info *nvm = &hw->nvm; ++ s32 ret_val; ++ u32 eecd; ++ u16 words_written = 0; ++ u16 widx = 0; ++ ++ DEBUGFUNC("e1000_write_nvm_microwire"); ++ ++ /* ++ * A check for invalid values: offset too large, too many words, ++ * and not enough words. ++ */ ++ if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) || ++ (words == 0)) { ++ DEBUGOUT("nvm parameter(s) out of bounds\n"); ++ ret_val = -E1000_ERR_NVM; ++ goto out; ++ } ++ ++ ret_val = e1000_acquire_nvm(hw); ++ if (ret_val) ++ goto out; ++ ++ ret_val = e1000_ready_nvm_eeprom(hw); ++ if (ret_val) ++ goto release; ++ ++ e1000_shift_out_eec_bits(hw, NVM_EWEN_OPCODE_MICROWIRE, ++ (u16)(nvm->opcode_bits + 2)); ++ ++ e1000_shift_out_eec_bits(hw, 0, (u16)(nvm->address_bits - 2)); ++ ++ e1000_standby_nvm(hw); ++ ++ while (words_written < words) { ++ e1000_shift_out_eec_bits(hw, NVM_WRITE_OPCODE_MICROWIRE, ++ nvm->opcode_bits); ++ ++ e1000_shift_out_eec_bits(hw, (u16)(offset + words_written), ++ nvm->address_bits); ++ ++ e1000_shift_out_eec_bits(hw, data[words_written], 16); ++ ++ e1000_standby_nvm(hw); ++ ++ for (widx = 0; widx < 200; widx++) { ++ eecd = E1000_READ_REG(hw, E1000_EECD); ++ if (eecd & E1000_EECD_DO) ++ break; ++ usec_delay(50); ++ } ++ ++ if (widx == 200) { ++ DEBUGOUT("NVM Write did not complete\n"); ++ ret_val = -E1000_ERR_NVM; ++ goto release; ++ } ++ ++ e1000_standby_nvm(hw); ++ ++ words_written++; ++ } ++ ++ e1000_shift_out_eec_bits(hw, NVM_EWDS_OPCODE_MICROWIRE, ++ (u16)(nvm->opcode_bits + 2)); ++ ++ e1000_shift_out_eec_bits(hw, 0, (u16)(nvm->address_bits - 2)); ++ ++release: ++ e1000_release_nvm(hw); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_read_pba_num_generic - Read device part number ++ * @hw: pointer to the HW structure ++ * @pba_num: pointer to device part number ++ * ++ * Reads the product board assembly (PBA) number from the EEPROM and stores ++ * the value in pba_num. ++ **/ ++s32 e1000_read_pba_num_generic(struct e1000_hw *hw, u32 *pba_num) ++{ ++ s32 ret_val; ++ u16 nvm_data; ++ ++ DEBUGFUNC("e1000_read_pba_num_generic"); ++ ++ ret_val = e1000_read_nvm(hw, NVM_PBA_OFFSET_0, 1, &nvm_data); ++ if (ret_val) { ++ DEBUGOUT("NVM Read Error\n"); ++ goto out; ++ } ++ *pba_num = (u32)(nvm_data << 16); ++ ++ ret_val = e1000_read_nvm(hw, NVM_PBA_OFFSET_1, 1, &nvm_data); ++ if (ret_val) { ++ DEBUGOUT("NVM Read Error\n"); ++ goto out; ++ } ++ *pba_num |= nvm_data; ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_read_mac_addr_generic - Read device MAC address ++ * @hw: pointer to the HW structure ++ * ++ * Reads the device MAC address from the EEPROM and stores the value. ++ * Since devices with two ports use the same EEPROM, we increment the ++ * last bit in the MAC address for the second port. ++ **/ ++s32 e1000_read_mac_addr_generic(struct e1000_hw *hw) ++{ ++ s32 ret_val = E1000_SUCCESS; ++ u16 offset, nvm_data, i; ++ ++ DEBUGFUNC("e1000_read_mac_addr"); ++ ++ for (i = 0; i < ETH_ADDR_LEN; i += 2) { ++ offset = i >> 1; ++ ret_val = e1000_read_nvm(hw, offset, 1, &nvm_data); ++ if (ret_val) { ++ DEBUGOUT("NVM Read Error\n"); ++ goto out; ++ } ++ hw->mac.perm_addr[i] = (u8)(nvm_data & 0xFF); ++ hw->mac.perm_addr[i+1] = (u8)(nvm_data >> 8); ++ } ++ ++ /* Flip last bit of mac address if we're on second port */ ++ if (hw->bus.func == E1000_FUNC_1) ++ hw->mac.perm_addr[5] ^= 1; ++ ++ for (i = 0; i < ETH_ADDR_LEN; i++) ++ hw->mac.addr[i] = hw->mac.perm_addr[i]; ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_validate_nvm_checksum_generic - Validate EEPROM checksum ++ * @hw: pointer to the HW structure ++ * ++ * Calculates the EEPROM checksum by reading/adding each word of the EEPROM ++ * and then verifies that the sum of the EEPROM is equal to 0xBABA. ++ **/ ++s32 e1000_validate_nvm_checksum_generic(struct e1000_hw *hw) ++{ ++ s32 ret_val = E1000_SUCCESS; ++ u16 checksum = 0; ++ u16 i, nvm_data; ++ ++ DEBUGFUNC("e1000_validate_nvm_checksum_generic"); ++ ++ for (i = 0; i < (NVM_CHECKSUM_REG + 1); i++) { ++ ret_val = e1000_read_nvm(hw, i, 1, &nvm_data); ++ if (ret_val) { ++ DEBUGOUT("NVM Read Error\n"); ++ goto out; ++ } ++ checksum += nvm_data; ++ } ++ ++ if (checksum != (u16) NVM_SUM) { ++ DEBUGOUT("NVM Checksum Invalid\n"); ++ ret_val = -E1000_ERR_NVM; ++ goto out; ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_update_nvm_checksum_generic - Update EEPROM checksum ++ * @hw: pointer to the HW structure ++ * ++ * Updates the EEPROM checksum by reading/adding each word of the EEPROM ++ * up to the checksum. Then calculates the EEPROM checksum and writes the ++ * value to the EEPROM. ++ **/ ++s32 e1000_update_nvm_checksum_generic(struct e1000_hw *hw) ++{ ++ s32 ret_val; ++ u16 checksum = 0; ++ u16 i, nvm_data; ++ ++ DEBUGFUNC("e1000_update_nvm_checksum"); ++ ++ for (i = 0; i < NVM_CHECKSUM_REG; i++) { ++ ret_val = e1000_read_nvm(hw, i, 1, &nvm_data); ++ if (ret_val) { ++ DEBUGOUT("NVM Read Error while updating checksum.\n"); ++ goto out; ++ } ++ checksum += nvm_data; ++ } ++ checksum = (u16) NVM_SUM - checksum; ++ ret_val = e1000_write_nvm(hw, NVM_CHECKSUM_REG, 1, &checksum); ++ if (ret_val) { ++ DEBUGOUT("NVM Write Error while updating checksum.\n"); ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_reload_nvm_generic - Reloads EEPROM ++ * @hw: pointer to the HW structure ++ * ++ * Reloads the EEPROM by setting the "Reinitialize from EEPROM" bit in the ++ * extended control register. ++ **/ ++void e1000_reload_nvm_generic(struct e1000_hw *hw) ++{ ++ u32 ctrl_ext; ++ ++ DEBUGFUNC("e1000_reload_nvm_generic"); ++ ++ usec_delay(10); ++ ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT); ++ ctrl_ext |= E1000_CTRL_EXT_EE_RST; ++ E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext); ++ E1000_WRITE_FLUSH(hw); ++} ++ ++/* Function pointers local to this file and not intended for public use */ ++ ++/** ++ * e1000_acquire_nvm - Acquire exclusive access to EEPROM ++ * @hw: pointer to the HW structure ++ * ++ * For those silicon families which have implemented a NVM acquire function, ++ * run the defined function else return success. ++ **/ ++s32 e1000_acquire_nvm(struct e1000_hw *hw) ++{ ++ if (hw->func.acquire_nvm) ++ return hw->func.acquire_nvm(hw); ++ ++ return E1000_SUCCESS; ++} ++ ++/** ++ * e1000_release_nvm - Release exclusive access to EEPROM ++ * @hw: pointer to the HW structure ++ * ++ * For those silicon families which have implemented a NVM release function, ++ * run the defined fucntion else return success. ++ **/ ++void e1000_release_nvm(struct e1000_hw *hw) ++{ ++ if (hw->func.release_nvm) ++ hw->func.release_nvm(hw); ++} ++ +--- linux/drivers/xenomai/net/drivers/experimental/e1000/e1000_82541.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/experimental/e1000/e1000_82541.c 2021-04-07 16:01:27.677633523 +0800 +@@ -0,0 +1,1328 @@ ++/******************************************************************************* ++ ++ Intel PRO/1000 Linux driver ++ Copyright(c) 1999 - 2008 Intel Corporation. ++ ++ This program is free software; you can redistribute it and/or modify it ++ under the terms and conditions of the GNU General Public License, ++ version 2, as published by the Free Software Foundation. ++ ++ This program is distributed in the hope it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ You should have received a copy of the GNU General Public License along with ++ this program; if not, write to the Free Software Foundation, Inc., ++ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. ++ ++ The full GNU General Public License is included in this distribution in ++ the file called "COPYING". ++ ++ Contact Information: ++ Linux NICS ++ e1000-devel Mailing List ++ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 ++ ++*******************************************************************************/ ++ ++/* e1000_82541 ++ * e1000_82547 ++ * e1000_82541_rev_2 ++ * e1000_82547_rev_2 ++ */ ++ ++#include "e1000_api.h" ++#include "e1000_82541.h" ++ ++static s32 e1000_init_phy_params_82541(struct e1000_hw *hw); ++static s32 e1000_init_nvm_params_82541(struct e1000_hw *hw); ++static s32 e1000_init_mac_params_82541(struct e1000_hw *hw); ++static s32 e1000_reset_hw_82541(struct e1000_hw *hw); ++static s32 e1000_init_hw_82541(struct e1000_hw *hw); ++static s32 e1000_get_link_up_info_82541(struct e1000_hw *hw, u16 *speed, ++ u16 *duplex); ++static s32 e1000_phy_hw_reset_82541(struct e1000_hw *hw); ++static s32 e1000_setup_copper_link_82541(struct e1000_hw *hw); ++static s32 e1000_check_for_link_82541(struct e1000_hw *hw); ++static s32 e1000_get_cable_length_igp_82541(struct e1000_hw *hw); ++static s32 e1000_set_d3_lplu_state_82541(struct e1000_hw *hw, ++ bool active); ++static s32 e1000_setup_led_82541(struct e1000_hw *hw); ++static s32 e1000_cleanup_led_82541(struct e1000_hw *hw); ++static void e1000_clear_hw_cntrs_82541(struct e1000_hw *hw); ++static s32 e1000_config_dsp_after_link_change_82541(struct e1000_hw *hw, ++ bool link_up); ++static s32 e1000_phy_init_script_82541(struct e1000_hw *hw); ++static void e1000_power_down_phy_copper_82541(struct e1000_hw *hw); ++ ++static const u16 e1000_igp_cable_length_table[] = ++ { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, ++ 5, 10, 10, 10, 10, 10, 10, 10, 20, 20, 20, 20, 20, 25, 25, 25, ++ 25, 25, 25, 25, 30, 30, 30, 30, 40, 40, 40, 40, 40, 40, 40, 40, ++ 40, 50, 50, 50, 50, 50, 50, 50, 60, 60, 60, 60, 60, 60, 60, 60, ++ 60, 70, 70, 70, 70, 70, 70, 80, 80, 80, 80, 80, 80, 90, 90, 90, ++ 90, 90, 90, 90, 90, 90, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, ++ 100, 100, 100, 100, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, ++ 110, 110, 110, 110, 110, 110, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120}; ++#define IGP01E1000_AGC_LENGTH_TABLE_SIZE \ ++ (sizeof(e1000_igp_cable_length_table) / \ ++ sizeof(e1000_igp_cable_length_table[0])) ++ ++struct e1000_dev_spec_82541 { ++ e1000_dsp_config dsp_config; ++ e1000_ffe_config ffe_config; ++ u16 spd_default; ++ bool phy_init_script; ++}; ++ ++/** ++ * e1000_init_phy_params_82541 - Init PHY func ptrs. ++ * @hw: pointer to the HW structure ++ * ++ * This is a function pointer entry point called by the api module. ++ **/ ++static s32 e1000_init_phy_params_82541(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ struct e1000_functions *func = &hw->func; ++ s32 ret_val = E1000_SUCCESS; ++ ++ DEBUGFUNC("e1000_init_phy_params_82541"); ++ ++ phy->addr = 1; ++ phy->autoneg_mask = AUTONEG_ADVERTISE_SPEED_DEFAULT; ++ phy->reset_delay_us = 10000; ++ phy->type = e1000_phy_igp; ++ ++ /* Function Pointers */ ++ func->check_polarity = e1000_check_polarity_igp; ++ func->force_speed_duplex = e1000_phy_force_speed_duplex_igp; ++ func->get_cable_length = e1000_get_cable_length_igp_82541; ++ func->get_cfg_done = e1000_get_cfg_done_generic; ++ func->get_phy_info = e1000_get_phy_info_igp; ++ func->read_phy_reg = e1000_read_phy_reg_igp; ++ func->reset_phy = e1000_phy_hw_reset_82541; ++ func->set_d3_lplu_state = e1000_set_d3_lplu_state_82541; ++ func->write_phy_reg = e1000_write_phy_reg_igp; ++ func->power_up_phy = e1000_power_up_phy_copper; ++ func->power_down_phy = e1000_power_down_phy_copper_82541; ++ ++ ret_val = e1000_get_phy_id(hw); ++ if (ret_val) ++ goto out; ++ ++ /* Verify phy id */ ++ if (phy->id != IGP01E1000_I_PHY_ID) { ++ ret_val = -E1000_ERR_PHY; ++ goto out; ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_init_nvm_params_82541 - Init NVM func ptrs. ++ * @hw: pointer to the HW structure ++ * ++ * This is a function pointer entry point called by the api module. ++ **/ ++static s32 e1000_init_nvm_params_82541(struct e1000_hw *hw) ++{ ++ struct e1000_nvm_info *nvm = &hw->nvm; ++ struct e1000_functions *func = &hw->func; ++ s32 ret_val = E1000_SUCCESS; ++ u32 eecd = E1000_READ_REG(hw, E1000_EECD); ++ u16 size; ++ ++ DEBUGFUNC("e1000_init_nvm_params_82541"); ++ ++ switch (nvm->override) { ++ case e1000_nvm_override_spi_large: ++ nvm->type = e1000_nvm_eeprom_spi; ++ eecd |= E1000_EECD_ADDR_BITS; ++ break; ++ case e1000_nvm_override_spi_small: ++ nvm->type = e1000_nvm_eeprom_spi; ++ eecd &= ~E1000_EECD_ADDR_BITS; ++ break; ++ case e1000_nvm_override_microwire_large: ++ nvm->type = e1000_nvm_eeprom_microwire; ++ eecd |= E1000_EECD_SIZE; ++ break; ++ case e1000_nvm_override_microwire_small: ++ nvm->type = e1000_nvm_eeprom_microwire; ++ eecd &= ~E1000_EECD_SIZE; ++ break; ++ default: ++ nvm->type = eecd & E1000_EECD_TYPE ++ ? e1000_nvm_eeprom_spi ++ : e1000_nvm_eeprom_microwire; ++ break; ++ } ++ ++ if (nvm->type == e1000_nvm_eeprom_spi) { ++ nvm->address_bits = (eecd & E1000_EECD_ADDR_BITS) ++ ? 16 : 8; ++ nvm->delay_usec = 1; ++ nvm->opcode_bits = 8; ++ nvm->page_size = (eecd & E1000_EECD_ADDR_BITS) ++ ? 32 : 8; ++ ++ /* Function Pointers */ ++ func->acquire_nvm = e1000_acquire_nvm_generic; ++ func->read_nvm = e1000_read_nvm_spi; ++ func->release_nvm = e1000_release_nvm_generic; ++ func->update_nvm = e1000_update_nvm_checksum_generic; ++ func->valid_led_default = e1000_valid_led_default_generic; ++ func->validate_nvm = e1000_validate_nvm_checksum_generic; ++ func->write_nvm = e1000_write_nvm_spi; ++ ++ /* ++ * nvm->word_size must be discovered after the pointers ++ * are set so we can verify the size from the nvm image ++ * itself. Temporarily set it to a dummy value so the ++ * read will work. ++ */ ++ nvm->word_size = 64; ++ ret_val = e1000_read_nvm(hw, NVM_CFG, 1, &size); ++ if (ret_val) ++ goto out; ++ size = (size & NVM_SIZE_MASK) >> NVM_SIZE_SHIFT; ++ /* ++ * if size != 0, it can be added to a constant and become ++ * the left-shift value to set the word_size. Otherwise, ++ * word_size stays at 64. ++ */ ++ if (size) { ++ size += NVM_WORD_SIZE_BASE_SHIFT_82541; ++ nvm->word_size = 1 << size; ++ } ++ } else { ++ nvm->address_bits = (eecd & E1000_EECD_ADDR_BITS) ++ ? 8 : 6; ++ nvm->delay_usec = 50; ++ nvm->opcode_bits = 3; ++ nvm->word_size = (eecd & E1000_EECD_ADDR_BITS) ++ ? 256 : 64; ++ ++ /* Function Pointers */ ++ func->acquire_nvm = e1000_acquire_nvm_generic; ++ func->read_nvm = e1000_read_nvm_microwire; ++ func->release_nvm = e1000_release_nvm_generic; ++ func->update_nvm = e1000_update_nvm_checksum_generic; ++ func->valid_led_default = e1000_valid_led_default_generic; ++ func->validate_nvm = e1000_validate_nvm_checksum_generic; ++ func->write_nvm = e1000_write_nvm_microwire; ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_init_mac_params_82541 - Init MAC func ptrs. ++ * @hw: pointer to the HW structure ++ * ++ * This is a function pointer entry point called by the api module. ++ **/ ++static s32 e1000_init_mac_params_82541(struct e1000_hw *hw) ++{ ++ struct e1000_mac_info *mac = &hw->mac; ++ struct e1000_functions *func = &hw->func; ++ s32 ret_val; ++ ++ DEBUGFUNC("e1000_init_mac_params_82541"); ++ ++ /* Set media type */ ++ hw->phy.media_type = e1000_media_type_copper; ++ /* Set mta register count */ ++ mac->mta_reg_count = 128; ++ /* Set rar entry count */ ++ mac->rar_entry_count = E1000_RAR_ENTRIES; ++ /* Set if part includes ASF firmware */ ++ mac->asf_firmware_present = TRUE; ++ ++ /* Function Pointers */ ++ ++ /* bus type/speed/width */ ++ func->get_bus_info = e1000_get_bus_info_pci_generic; ++ /* reset */ ++ func->reset_hw = e1000_reset_hw_82541; ++ /* hw initialization */ ++ func->init_hw = e1000_init_hw_82541; ++ /* link setup */ ++ func->setup_link = e1000_setup_link_generic; ++ /* physical interface link setup */ ++ func->setup_physical_interface = e1000_setup_copper_link_82541; ++ /* check for link */ ++ func->check_for_link = e1000_check_for_link_82541; ++ /* link info */ ++ func->get_link_up_info = e1000_get_link_up_info_82541; ++ /* multicast address update */ ++ func->update_mc_addr_list = e1000_update_mc_addr_list_generic; ++ /* writing VFTA */ ++ func->write_vfta = e1000_write_vfta_generic; ++ /* clearing VFTA */ ++ func->clear_vfta = e1000_clear_vfta_generic; ++ /* setting MTA */ ++ func->mta_set = e1000_mta_set_generic; ++ /* setup LED */ ++ func->setup_led = e1000_setup_led_82541; ++ /* cleanup LED */ ++ func->cleanup_led = e1000_cleanup_led_82541; ++ /* turn on/off LED */ ++ func->led_on = e1000_led_on_generic; ++ func->led_off = e1000_led_off_generic; ++ /* remove device */ ++ func->remove_device = e1000_remove_device_generic; ++ /* clear hardware counters */ ++ func->clear_hw_cntrs = e1000_clear_hw_cntrs_82541; ++ ++ hw->dev_spec_size = sizeof(struct e1000_dev_spec_82541); ++ ++ /* Device-specific structure allocation */ ++ ret_val = e1000_alloc_zeroed_dev_spec_struct(hw, hw->dev_spec_size); ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_init_function_pointers_82541 - Init func ptrs. ++ * @hw: pointer to the HW structure ++ * ++ * The only function explicitly called by the api module to initialize ++ * all function pointers and parameters. ++ **/ ++void e1000_init_function_pointers_82541(struct e1000_hw *hw) ++{ ++ DEBUGFUNC("e1000_init_function_pointers_82541"); ++ ++ hw->func.init_mac_params = e1000_init_mac_params_82541; ++ hw->func.init_nvm_params = e1000_init_nvm_params_82541; ++ hw->func.init_phy_params = e1000_init_phy_params_82541; ++} ++ ++/** ++ * e1000_reset_hw_82541 - Reset hardware ++ * @hw: pointer to the HW structure ++ * ++ * This resets the hardware into a known state. This is a ++ * function pointer entry point called by the api module. ++ **/ ++static s32 e1000_reset_hw_82541(struct e1000_hw *hw) ++{ ++ u32 ledctl, ctrl, icr, manc; ++ ++ DEBUGFUNC("e1000_reset_hw_82541"); ++ ++ DEBUGOUT("Masking off all interrupts\n"); ++ E1000_WRITE_REG(hw, E1000_IMC, 0xFFFFFFFF); ++ ++ E1000_WRITE_REG(hw, E1000_RCTL, 0); ++ E1000_WRITE_REG(hw, E1000_TCTL, E1000_TCTL_PSP); ++ E1000_WRITE_FLUSH(hw); ++ ++ /* ++ * Delay to allow any outstanding PCI transactions to complete ++ * before resetting the device. ++ */ ++ msec_delay(10); ++ ++ ctrl = E1000_READ_REG(hw, E1000_CTRL); ++ ++ /* Must reset the Phy before resetting the MAC */ ++ if ((hw->mac.type == e1000_82541) || (hw->mac.type == e1000_82547)) { ++ E1000_WRITE_REG(hw, E1000_CTRL, (ctrl | E1000_CTRL_PHY_RST)); ++ msec_delay(5); ++ } ++ ++ DEBUGOUT("Issuing a global reset to 82541/82547 MAC\n"); ++ switch (hw->mac.type) { ++ case e1000_82541: ++ case e1000_82541_rev_2: ++ /* ++ * These controllers can't ack the 64-bit write when ++ * issuing the reset, so we use IO-mapping as a ++ * workaround to issue the reset. ++ */ ++ E1000_WRITE_REG_IO(hw, E1000_CTRL, ctrl | E1000_CTRL_RST); ++ break; ++ default: ++ E1000_WRITE_REG(hw, E1000_CTRL, ctrl | E1000_CTRL_RST); ++ break; ++ } ++ ++ /* Wait for NVM reload */ ++ msec_delay(20); ++ ++ /* Disable HW ARPs on ASF enabled adapters */ ++ manc = E1000_READ_REG(hw, E1000_MANC); ++ manc &= ~E1000_MANC_ARP_EN; ++ E1000_WRITE_REG(hw, E1000_MANC, manc); ++ ++ if ((hw->mac.type == e1000_82541) || (hw->mac.type == e1000_82547)) { ++ e1000_phy_init_script_82541(hw); ++ ++ /* Configure activity LED after Phy reset */ ++ ledctl = E1000_READ_REG(hw, E1000_LEDCTL); ++ ledctl &= IGP_ACTIVITY_LED_MASK; ++ ledctl |= (IGP_ACTIVITY_LED_ENABLE | IGP_LED3_MODE); ++ E1000_WRITE_REG(hw, E1000_LEDCTL, ledctl); ++ } ++ ++ /* Once again, mask the interrupts */ ++ DEBUGOUT("Masking off all interrupts\n"); ++ E1000_WRITE_REG(hw, E1000_IMC, 0xFFFFFFFF); ++ ++ /* Clear any pending interrupt events. */ ++ icr = E1000_READ_REG(hw, E1000_ICR); ++ ++ return E1000_SUCCESS; ++} ++ ++/** ++ * e1000_init_hw_82541 - Initialize hardware ++ * @hw: pointer to the HW structure ++ * ++ * This inits the hardware readying it for operation. This is a ++ * function pointer entry point called by the api module. ++ **/ ++static s32 e1000_init_hw_82541(struct e1000_hw *hw) ++{ ++ struct e1000_mac_info *mac = &hw->mac; ++ u32 i, txdctl; ++ s32 ret_val; ++ ++ DEBUGFUNC("e1000_init_hw_82541"); ++ ++ /* Initialize identification LED */ ++ ret_val = e1000_id_led_init_generic(hw); ++ if (ret_val) { ++ DEBUGOUT("Error initializing identification LED\n"); ++ /* This is not fatal and we should not stop init due to this */ ++ } ++ ++ /* Disabling VLAN filtering */ ++ DEBUGOUT("Initializing the IEEE VLAN\n"); ++ e1000_clear_vfta(hw); ++ ++ /* Setup the receive address. */ ++ e1000_init_rx_addrs_generic(hw, mac->rar_entry_count); ++ ++ /* Zero out the Multicast HASH table */ ++ DEBUGOUT("Zeroing the MTA\n"); ++ for (i = 0; i < mac->mta_reg_count; i++) { ++ E1000_WRITE_REG_ARRAY(hw, E1000_MTA, i, 0); ++ /* ++ * Avoid back to back register writes by adding the register ++ * read (flush). This is to protect against some strange ++ * bridge configurations that may issue Memory Write Block ++ * (MWB) to our register space. ++ */ ++ E1000_WRITE_FLUSH(hw); ++ } ++ ++ /* Setup link and flow control */ ++ ret_val = e1000_setup_link(hw); ++ ++ txdctl = E1000_READ_REG(hw, E1000_TXDCTL(0)); ++ txdctl = (txdctl & ~E1000_TXDCTL_WTHRESH) | ++ E1000_TXDCTL_FULL_TX_DESC_WB; ++ E1000_WRITE_REG(hw, E1000_TXDCTL(0), txdctl); ++ ++ /* ++ * Clear all of the statistics registers (clear on read). It is ++ * important that we do this after we have tried to establish link ++ * because the symbol error count will increment wildly if there ++ * is no link. ++ */ ++ e1000_clear_hw_cntrs_82541(hw); ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_get_link_up_info_82541 - Report speed and duplex ++ * @hw: pointer to the HW structure ++ * @speed: pointer to speed buffer ++ * @duplex: pointer to duplex buffer ++ * ++ * Retrieve the current speed and duplex configuration. ++ * This is a function pointer entry point called by the api module. ++ **/ ++static s32 e1000_get_link_up_info_82541(struct e1000_hw *hw, u16 *speed, ++ u16 *duplex) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u16 data; ++ ++ DEBUGFUNC("e1000_get_link_up_info_82541"); ++ ++ ret_val = e1000_get_speed_and_duplex_copper_generic(hw, speed, duplex); ++ if (ret_val) ++ goto out; ++ ++ if (!phy->speed_downgraded) ++ goto out; ++ ++ /* ++ * IGP01 PHY may advertise full duplex operation after speed ++ * downgrade even if it is operating at half duplex. ++ * Here we set the duplex settings to match the duplex in the ++ * link partner's capabilities. ++ */ ++ ret_val = e1000_read_phy_reg(hw, PHY_AUTONEG_EXP, &data); ++ if (ret_val) ++ goto out; ++ ++ if (!(data & NWAY_ER_LP_NWAY_CAPS)) { ++ *duplex = HALF_DUPLEX; ++ } else { ++ ret_val = e1000_read_phy_reg(hw, PHY_LP_ABILITY, &data); ++ if (ret_val) ++ goto out; ++ ++ if (*speed == SPEED_100) { ++ if (!(data & NWAY_LPAR_100TX_FD_CAPS)) ++ *duplex = HALF_DUPLEX; ++ } else if (*speed == SPEED_10) { ++ if (!(data & NWAY_LPAR_10T_FD_CAPS)) ++ *duplex = HALF_DUPLEX; ++ } ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_phy_hw_reset_82541 - PHY hardware reset ++ * @hw: pointer to the HW structure ++ * ++ * Verify the reset block is not blocking us from resetting. Acquire ++ * semaphore (if necessary) and read/set/write the device control reset ++ * bit in the PHY. Wait the appropriate delay time for the device to ++ * reset and relase the semaphore (if necessary). ++ * This is a function pointer entry point called by the api module. ++ **/ ++static s32 e1000_phy_hw_reset_82541(struct e1000_hw *hw) ++{ ++ s32 ret_val; ++ u32 ledctl; ++ ++ DEBUGFUNC("e1000_phy_hw_reset_82541"); ++ ++ ret_val = e1000_phy_hw_reset_generic(hw); ++ if (ret_val) ++ goto out; ++ ++ e1000_phy_init_script_82541(hw); ++ ++ if ((hw->mac.type == e1000_82541) || (hw->mac.type == e1000_82547)) { ++ /* Configure activity LED after PHY reset */ ++ ledctl = E1000_READ_REG(hw, E1000_LEDCTL); ++ ledctl &= IGP_ACTIVITY_LED_MASK; ++ ledctl |= (IGP_ACTIVITY_LED_ENABLE | IGP_LED3_MODE); ++ E1000_WRITE_REG(hw, E1000_LEDCTL, ledctl); ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_setup_copper_link_82541 - Configure copper link settings ++ * @hw: pointer to the HW structure ++ * ++ * Calls the appropriate function to configure the link for auto-neg or forced ++ * speed and duplex. Then we check for link, once link is established calls ++ * to configure collision distance and flow control are called. If link is ++ * not established, we return -E1000_ERR_PHY (-2). This is a function ++ * pointer entry point called by the api module. ++ **/ ++static s32 e1000_setup_copper_link_82541(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ struct e1000_dev_spec_82541 *dev_spec; ++ s32 ret_val; ++ u32 ctrl, ledctl; ++ ++ DEBUGFUNC("e1000_setup_copper_link_82541"); ++ ++ ctrl = E1000_READ_REG(hw, E1000_CTRL); ++ ctrl |= E1000_CTRL_SLU; ++ ctrl &= ~(E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX); ++ E1000_WRITE_REG(hw, E1000_CTRL, ctrl); ++ ++ hw->phy.reset_disable = FALSE; ++ ++ dev_spec = (struct e1000_dev_spec_82541 *)hw->dev_spec; ++ ++ /* Earlier revs of the IGP phy require us to force MDI. */ ++ if (hw->mac.type == e1000_82541 || hw->mac.type == e1000_82547) { ++ dev_spec->dsp_config = e1000_dsp_config_disabled; ++ phy->mdix = 1; ++ } else { ++ dev_spec->dsp_config = e1000_dsp_config_enabled; ++ } ++ ++ ret_val = e1000_copper_link_setup_igp(hw); ++ if (ret_val) ++ goto out; ++ ++ if (hw->mac.autoneg) { ++ if (dev_spec->ffe_config == e1000_ffe_config_active) ++ dev_spec->ffe_config = e1000_ffe_config_enabled; ++ } ++ ++ /* Configure activity LED after Phy reset */ ++ ledctl = E1000_READ_REG(hw, E1000_LEDCTL); ++ ledctl &= IGP_ACTIVITY_LED_MASK; ++ ledctl |= (IGP_ACTIVITY_LED_ENABLE | IGP_LED3_MODE); ++ E1000_WRITE_REG(hw, E1000_LEDCTL, ledctl); ++ ++ ret_val = e1000_setup_copper_link_generic(hw); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_check_for_link_82541 - Check/Store link connection ++ * @hw: pointer to the HW structure ++ * ++ * This checks the link condition of the adapter and stores the ++ * results in the hw->mac structure. This is a function pointer entry ++ * point called by the api module. ++ **/ ++static s32 e1000_check_for_link_82541(struct e1000_hw *hw) ++{ ++ struct e1000_mac_info *mac = &hw->mac; ++ s32 ret_val; ++ bool link; ++ ++ DEBUGFUNC("e1000_check_for_link_82541"); ++ ++ /* ++ * We only want to go out to the PHY registers to see if Auto-Neg ++ * has completed and/or if our link status has changed. The ++ * get_link_status flag is set upon receiving a Link Status ++ * Change or Rx Sequence Error interrupt. ++ */ ++ if (!mac->get_link_status) { ++ ret_val = E1000_SUCCESS; ++ goto out; ++ } ++ ++ /* ++ * First we want to see if the MII Status Register reports ++ * link. If so, then we want to get the current speed/duplex ++ * of the PHY. ++ */ ++ ret_val = e1000_phy_has_link_generic(hw, 1, 0, &link); ++ if (ret_val) ++ goto out; ++ ++ if (!link) { ++ ret_val = e1000_config_dsp_after_link_change_82541(hw, FALSE); ++ goto out; /* No link detected */ ++ } ++ ++ mac->get_link_status = FALSE; ++ ++ /* ++ * Check if there was DownShift, must be checked ++ * immediately after link-up ++ */ ++ e1000_check_downshift_generic(hw); ++ ++ /* ++ * If we are forcing speed/duplex, then we simply return since ++ * we have already determined whether we have link or not. ++ */ ++ if (!mac->autoneg) { ++ ret_val = -E1000_ERR_CONFIG; ++ goto out; ++ } ++ ++ ret_val = e1000_config_dsp_after_link_change_82541(hw, TRUE); ++ ++ /* ++ * Auto-Neg is enabled. Auto Speed Detection takes care ++ * of MAC speed/duplex configuration. So we only need to ++ * configure Collision Distance in the MAC. ++ */ ++ e1000_config_collision_dist_generic(hw); ++ ++ /* ++ * Configure Flow Control now that Auto-Neg has completed. ++ * First, we need to restore the desired flow control ++ * settings because we may have had to re-autoneg with a ++ * different link partner. ++ */ ++ ret_val = e1000_config_fc_after_link_up_generic(hw); ++ if (ret_val) { ++ DEBUGOUT("Error configuring flow control\n"); ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_config_dsp_after_link_change_82541 - Config DSP after link ++ * @hw: pointer to the HW structure ++ * @link_up: boolean flag for link up status ++ * ++ * Return E1000_ERR_PHY when failing to read/write the PHY, else E1000_SUCCESS ++ * at any other case. ++ * ++ * 82541_rev_2 & 82547_rev_2 have the capability to configure the DSP when a ++ * gigabit link is achieved to improve link quality. ++ * This is a function pointer entry point called by the api module. ++ **/ ++static s32 e1000_config_dsp_after_link_change_82541(struct e1000_hw *hw, ++ bool link_up) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ struct e1000_dev_spec_82541 *dev_spec; ++ s32 ret_val; ++ u32 idle_errs = 0; ++ u16 phy_data, phy_saved_data, speed, duplex, i; ++ u16 ffe_idle_err_timeout = FFE_IDLE_ERR_COUNT_TIMEOUT_20; ++ u16 dsp_reg_array[IGP01E1000_PHY_CHANNEL_NUM] = ++ {IGP01E1000_PHY_AGC_PARAM_A, ++ IGP01E1000_PHY_AGC_PARAM_B, ++ IGP01E1000_PHY_AGC_PARAM_C, ++ IGP01E1000_PHY_AGC_PARAM_D}; ++ ++ DEBUGFUNC("e1000_config_dsp_after_link_change_82541"); ++ ++ dev_spec = (struct e1000_dev_spec_82541 *)hw->dev_spec; ++ ++ if (link_up) { ++ ret_val = e1000_get_speed_and_duplex(hw, &speed, &duplex); ++ if (ret_val) { ++ DEBUGOUT("Error getting link speed and duplex\n"); ++ goto out; ++ } ++ ++ if (speed != SPEED_1000) { ++ ret_val = E1000_SUCCESS; ++ goto out; ++ } ++ ++ ret_val = e1000_get_cable_length(hw); ++ if (ret_val) ++ goto out; ++ ++ if ((dev_spec->dsp_config == e1000_dsp_config_enabled) && ++ phy->min_cable_length >= 50) { ++ ++ for (i = 0; i < IGP01E1000_PHY_CHANNEL_NUM; i++) { ++ ret_val = e1000_read_phy_reg(hw, ++ dsp_reg_array[i], ++ &phy_data); ++ if (ret_val) ++ goto out; ++ ++ phy_data &= ~IGP01E1000_PHY_EDAC_MU_INDEX; ++ ++ ret_val = e1000_write_phy_reg(hw, ++ dsp_reg_array[i], ++ phy_data); ++ if (ret_val) ++ goto out; ++ } ++ dev_spec->dsp_config = e1000_dsp_config_activated; ++ } ++ ++ if ((dev_spec->ffe_config != e1000_ffe_config_enabled) || ++ (phy->min_cable_length >= 50)) { ++ ret_val = E1000_SUCCESS; ++ goto out; ++ } ++ ++ /* clear previous idle error counts */ ++ ret_val = e1000_read_phy_reg(hw, PHY_1000T_STATUS, &phy_data); ++ if (ret_val) ++ goto out; ++ ++ for (i = 0; i < ffe_idle_err_timeout; i++) { ++ usec_delay(1000); ++ ret_val = e1000_read_phy_reg(hw, ++ PHY_1000T_STATUS, ++ &phy_data); ++ if (ret_val) ++ goto out; ++ ++ idle_errs += (phy_data & SR_1000T_IDLE_ERROR_CNT); ++ if (idle_errs > SR_1000T_PHY_EXCESSIVE_IDLE_ERR_COUNT) { ++ dev_spec->ffe_config = e1000_ffe_config_active; ++ ++ ret_val = e1000_write_phy_reg(hw, ++ IGP01E1000_PHY_DSP_FFE, ++ IGP01E1000_PHY_DSP_FFE_CM_CP); ++ if (ret_val) ++ goto out; ++ break; ++ } ++ ++ if (idle_errs) ++ ffe_idle_err_timeout = ++ FFE_IDLE_ERR_COUNT_TIMEOUT_100; ++ } ++ } else { ++ if (dev_spec->dsp_config == e1000_dsp_config_activated) { ++ /* ++ * Save off the current value of register 0x2F5B ++ * to be restored at the end of the routines. ++ */ ++ ret_val = e1000_read_phy_reg(hw, ++ 0x2F5B, ++ &phy_saved_data); ++ if (ret_val) ++ goto out; ++ ++ /* Disable the PHY transmitter */ ++ ret_val = e1000_write_phy_reg(hw, 0x2F5B, 0x0003); ++ if (ret_val) ++ goto out; ++ ++ msec_delay_irq(20); ++ ++ ret_val = e1000_write_phy_reg(hw, ++ 0x0000, ++ IGP01E1000_IEEE_FORCE_GIG); ++ if (ret_val) ++ goto out; ++ for (i = 0; i < IGP01E1000_PHY_CHANNEL_NUM; i++) { ++ ret_val = e1000_read_phy_reg(hw, ++ dsp_reg_array[i], ++ &phy_data); ++ if (ret_val) ++ goto out; ++ ++ phy_data &= ~IGP01E1000_PHY_EDAC_MU_INDEX; ++ phy_data |= IGP01E1000_PHY_EDAC_SIGN_EXT_9_BITS; ++ ++ ret_val = e1000_write_phy_reg(hw, ++ dsp_reg_array[i], ++ phy_data); ++ if (ret_val) ++ goto out; ++ } ++ ++ ret_val = e1000_write_phy_reg(hw, ++ 0x0000, ++ IGP01E1000_IEEE_RESTART_AUTONEG); ++ if (ret_val) ++ goto out; ++ ++ msec_delay_irq(20); ++ ++ /* Now enable the transmitter */ ++ ret_val = e1000_write_phy_reg(hw, ++ 0x2F5B, ++ phy_saved_data); ++ if (ret_val) ++ goto out; ++ ++ dev_spec->dsp_config = e1000_dsp_config_enabled; ++ } ++ ++ if (dev_spec->ffe_config != e1000_ffe_config_active) { ++ ret_val = E1000_SUCCESS; ++ goto out; ++ } ++ ++ /* ++ * Save off the current value of register 0x2F5B ++ * to be restored at the end of the routines. ++ */ ++ ret_val = e1000_read_phy_reg(hw, 0x2F5B, &phy_saved_data); ++ if (ret_val) ++ goto out; ++ ++ /* Disable the PHY transmitter */ ++ ret_val = e1000_write_phy_reg(hw, 0x2F5B, 0x0003); ++ if (ret_val) ++ goto out; ++ ++ msec_delay_irq(20); ++ ++ ret_val = e1000_write_phy_reg(hw, ++ 0x0000, ++ IGP01E1000_IEEE_FORCE_GIG); ++ if (ret_val) ++ goto out; ++ ++ ret_val = e1000_write_phy_reg(hw, ++ IGP01E1000_PHY_DSP_FFE, ++ IGP01E1000_PHY_DSP_FFE_DEFAULT); ++ if (ret_val) ++ goto out; ++ ++ ret_val = e1000_write_phy_reg(hw, ++ 0x0000, ++ IGP01E1000_IEEE_RESTART_AUTONEG); ++ if (ret_val) ++ goto out; ++ ++ msec_delay_irq(20); ++ ++ /* Now enable the transmitter */ ++ ret_val = e1000_write_phy_reg(hw, 0x2F5B, phy_saved_data); ++ ++ if (ret_val) ++ goto out; ++ ++ dev_spec->ffe_config = e1000_ffe_config_enabled; ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_get_cable_length_igp_82541 - Determine cable length for igp PHY ++ * @hw: pointer to the HW structure ++ * ++ * The automatic gain control (agc) normalizes the amplitude of the ++ * received signal, adjusting for the attenuation produced by the ++ * cable. By reading the AGC registers, which reperesent the ++ * cobination of course and fine gain value, the value can be put ++ * into a lookup table to obtain the approximate cable length ++ * for each channel. This is a function pointer entry point called by the ++ * api module. ++ **/ ++static s32 e1000_get_cable_length_igp_82541(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val = E1000_SUCCESS; ++ u16 i, data; ++ u16 cur_agc_value, agc_value = 0; ++ u16 min_agc_value = IGP01E1000_AGC_LENGTH_TABLE_SIZE; ++ u16 agc_reg_array[IGP01E1000_PHY_CHANNEL_NUM] = ++ {IGP01E1000_PHY_AGC_A, ++ IGP01E1000_PHY_AGC_B, ++ IGP01E1000_PHY_AGC_C, ++ IGP01E1000_PHY_AGC_D}; ++ ++ DEBUGFUNC("e1000_get_cable_length_igp_82541"); ++ ++ /* Read the AGC registers for all channels */ ++ for (i = 0; i < IGP01E1000_PHY_CHANNEL_NUM; i++) { ++ ret_val = e1000_read_phy_reg(hw, agc_reg_array[i], &data); ++ if (ret_val) ++ goto out; ++ ++ cur_agc_value = data >> IGP01E1000_AGC_LENGTH_SHIFT; ++ ++ /* Bounds checking */ ++ if ((cur_agc_value >= IGP01E1000_AGC_LENGTH_TABLE_SIZE - 1) || ++ (cur_agc_value == 0)) { ++ ret_val = -E1000_ERR_PHY; ++ goto out; ++ } ++ ++ agc_value += cur_agc_value; ++ ++ if (min_agc_value > cur_agc_value) ++ min_agc_value = cur_agc_value; ++ } ++ ++ /* Remove the minimal AGC result for length < 50m */ ++ if (agc_value < IGP01E1000_PHY_CHANNEL_NUM * 50) { ++ agc_value -= min_agc_value; ++ /* Average the three remaining channels for the length. */ ++ agc_value /= (IGP01E1000_PHY_CHANNEL_NUM - 1); ++ } else { ++ /* Average the channels for the length. */ ++ agc_value /= IGP01E1000_PHY_CHANNEL_NUM; ++ } ++ ++ phy->min_cable_length = (e1000_igp_cable_length_table[agc_value] > ++ IGP01E1000_AGC_RANGE) ++ ? (e1000_igp_cable_length_table[agc_value] - ++ IGP01E1000_AGC_RANGE) ++ : 0; ++ phy->max_cable_length = e1000_igp_cable_length_table[agc_value] + ++ IGP01E1000_AGC_RANGE; ++ ++ phy->cable_length = (phy->min_cable_length + phy->max_cable_length) / 2; ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_set_d3_lplu_state_82541 - Sets low power link up state for D3 ++ * @hw: pointer to the HW structure ++ * @active: boolean used to enable/disable lplu ++ * ++ * Success returns 0, Failure returns 1 ++ * ++ * The low power link up (lplu) state is set to the power management level D3 ++ * and SmartSpeed is disabled when active is true, else clear lplu for D3 ++ * and enable Smartspeed. LPLU and Smartspeed are mutually exclusive. LPLU ++ * is used during Dx states where the power conservation is most important. ++ * During driver activity, SmartSpeed should be enabled so performance is ++ * maintained. This is a function pointer entry point called by the ++ * api module. ++ **/ ++static s32 e1000_set_d3_lplu_state_82541(struct e1000_hw *hw, bool active) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u16 data; ++ ++ DEBUGFUNC("e1000_set_d3_lplu_state_82541"); ++ ++ switch (hw->mac.type) { ++ case e1000_82541_rev_2: ++ case e1000_82547_rev_2: ++ break; ++ default: ++ ret_val = e1000_set_d3_lplu_state_generic(hw, active); ++ goto out; ++ break; ++ } ++ ++ ret_val = e1000_read_phy_reg(hw, IGP01E1000_GMII_FIFO, &data); ++ if (ret_val) ++ goto out; ++ ++ if (!active) { ++ data &= ~IGP01E1000_GMII_FLEX_SPD; ++ ret_val = e1000_write_phy_reg(hw, IGP01E1000_GMII_FIFO, data); ++ if (ret_val) ++ goto out; ++ ++ /* ++ * LPLU and SmartSpeed are mutually exclusive. LPLU is used ++ * during Dx states where the power conservation is most ++ * important. During driver activity we should enable ++ * SmartSpeed, so performance is maintained. ++ */ ++ if (phy->smart_speed == e1000_smart_speed_on) { ++ ret_val = e1000_read_phy_reg(hw, ++ IGP01E1000_PHY_PORT_CONFIG, ++ &data); ++ if (ret_val) ++ goto out; ++ ++ data |= IGP01E1000_PSCFR_SMART_SPEED; ++ ret_val = e1000_write_phy_reg(hw, ++ IGP01E1000_PHY_PORT_CONFIG, ++ data); ++ if (ret_val) ++ goto out; ++ } else if (phy->smart_speed == e1000_smart_speed_off) { ++ ret_val = e1000_read_phy_reg(hw, ++ IGP01E1000_PHY_PORT_CONFIG, ++ &data); ++ if (ret_val) ++ goto out; ++ ++ data &= ~IGP01E1000_PSCFR_SMART_SPEED; ++ ret_val = e1000_write_phy_reg(hw, ++ IGP01E1000_PHY_PORT_CONFIG, ++ data); ++ if (ret_val) ++ goto out; ++ } ++ } else if ((phy->autoneg_advertised == E1000_ALL_SPEED_DUPLEX) || ++ (phy->autoneg_advertised == E1000_ALL_NOT_GIG) || ++ (phy->autoneg_advertised == E1000_ALL_10_SPEED)) { ++ data |= IGP01E1000_GMII_FLEX_SPD; ++ ret_val = e1000_write_phy_reg(hw, IGP01E1000_GMII_FIFO, data); ++ if (ret_val) ++ goto out; ++ ++ /* When LPLU is enabled, we should disable SmartSpeed */ ++ ret_val = e1000_read_phy_reg(hw, ++ IGP01E1000_PHY_PORT_CONFIG, ++ &data); ++ if (ret_val) ++ goto out; ++ ++ data &= ~IGP01E1000_PSCFR_SMART_SPEED; ++ ret_val = e1000_write_phy_reg(hw, ++ IGP01E1000_PHY_PORT_CONFIG, ++ data); ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_setup_led_82541 - Configures SW controllable LED ++ * @hw: pointer to the HW structure ++ * ++ * This prepares the SW controllable LED for use and saves the current state ++ * of the LED so it can be later restored. This is a function pointer entry ++ * point called by the api module. ++ **/ ++static s32 e1000_setup_led_82541(struct e1000_hw *hw) ++{ ++ struct e1000_dev_spec_82541 *dev_spec; ++ s32 ret_val; ++ ++ DEBUGFUNC("e1000_setup_led_82541"); ++ ++ dev_spec = (struct e1000_dev_spec_82541 *)hw->dev_spec; ++ ++ ret_val = e1000_read_phy_reg(hw, ++ IGP01E1000_GMII_FIFO, ++ &dev_spec->spd_default); ++ if (ret_val) ++ goto out; ++ ++ ret_val = e1000_write_phy_reg(hw, ++ IGP01E1000_GMII_FIFO, ++ (u16)(dev_spec->spd_default & ++ ~IGP01E1000_GMII_SPD)); ++ if (ret_val) ++ goto out; ++ ++ E1000_WRITE_REG(hw, E1000_LEDCTL, hw->mac.ledctl_mode1); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_cleanup_led_82541 - Set LED config to default operation ++ * @hw: pointer to the HW structure ++ * ++ * Remove the current LED configuration and set the LED configuration ++ * to the default value, saved from the EEPROM. This is a function pointer ++ * entry point called by the api module. ++ **/ ++static s32 e1000_cleanup_led_82541(struct e1000_hw *hw) ++{ ++ struct e1000_dev_spec_82541 *dev_spec; ++ s32 ret_val; ++ ++ DEBUGFUNC("e1000_cleanup_led_82541"); ++ ++ dev_spec = (struct e1000_dev_spec_82541 *)hw->dev_spec; ++ ++ ret_val = e1000_write_phy_reg(hw, ++ IGP01E1000_GMII_FIFO, ++ dev_spec->spd_default); ++ if (ret_val) ++ goto out; ++ ++ E1000_WRITE_REG(hw, E1000_LEDCTL, hw->mac.ledctl_default); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_phy_init_script_82541 - Initialize GbE PHY ++ * @hw: pointer to the HW structure ++ * ++ * Initializes the IGP PHY. ++ **/ ++static s32 e1000_phy_init_script_82541(struct e1000_hw *hw) ++{ ++ struct e1000_dev_spec_82541 *dev_spec; ++ u32 ret_val; ++ u16 phy_saved_data; ++ ++ DEBUGFUNC("e1000_phy_init_script_82541"); ++ ++ dev_spec = (struct e1000_dev_spec_82541 *)hw->dev_spec; ++ ++ if (!dev_spec->phy_init_script) { ++ ret_val = E1000_SUCCESS; ++ goto out; ++ } ++ ++ /* Delay after phy reset to enable NVM configuration to load */ ++ msec_delay(20); ++ ++ /* ++ * Save off the current value of register 0x2F5B to be restored at ++ * the end of this routine. ++ */ ++ ret_val = e1000_read_phy_reg(hw, 0x2F5B, &phy_saved_data); ++ ++ /* Disabled the PHY transmitter */ ++ e1000_write_phy_reg(hw, 0x2F5B, 0x0003); ++ ++ msec_delay(20); ++ ++ e1000_write_phy_reg(hw, 0x0000, 0x0140); ++ ++ msec_delay(5); ++ ++ switch (hw->mac.type) { ++ case e1000_82541: ++ case e1000_82547: ++ e1000_write_phy_reg(hw, 0x1F95, 0x0001); ++ ++ e1000_write_phy_reg(hw, 0x1F71, 0xBD21); ++ ++ e1000_write_phy_reg(hw, 0x1F79, 0x0018); ++ ++ e1000_write_phy_reg(hw, 0x1F30, 0x1600); ++ ++ e1000_write_phy_reg(hw, 0x1F31, 0x0014); ++ ++ e1000_write_phy_reg(hw, 0x1F32, 0x161C); ++ ++ e1000_write_phy_reg(hw, 0x1F94, 0x0003); ++ ++ e1000_write_phy_reg(hw, 0x1F96, 0x003F); ++ ++ e1000_write_phy_reg(hw, 0x2010, 0x0008); ++ break; ++ case e1000_82541_rev_2: ++ case e1000_82547_rev_2: ++ e1000_write_phy_reg(hw, 0x1F73, 0x0099); ++ break; ++ default: ++ break; ++ } ++ ++ e1000_write_phy_reg(hw, 0x0000, 0x3300); ++ ++ msec_delay(20); ++ ++ /* Now enable the transmitter */ ++ e1000_write_phy_reg(hw, 0x2F5B, phy_saved_data); ++ ++ if (hw->mac.type == e1000_82547) { ++ u16 fused, fine, coarse; ++ ++ /* Move to analog registers page */ ++ e1000_read_phy_reg(hw, ++ IGP01E1000_ANALOG_SPARE_FUSE_STATUS, ++ &fused); ++ ++ if (!(fused & IGP01E1000_ANALOG_SPARE_FUSE_ENABLED)) { ++ e1000_read_phy_reg(hw, ++ IGP01E1000_ANALOG_FUSE_STATUS, ++ &fused); ++ ++ fine = fused & IGP01E1000_ANALOG_FUSE_FINE_MASK; ++ coarse = fused & IGP01E1000_ANALOG_FUSE_COARSE_MASK; ++ ++ if (coarse > IGP01E1000_ANALOG_FUSE_COARSE_THRESH) { ++ coarse -= IGP01E1000_ANALOG_FUSE_COARSE_10; ++ fine -= IGP01E1000_ANALOG_FUSE_FINE_1; ++ } else if (coarse == ++ IGP01E1000_ANALOG_FUSE_COARSE_THRESH) ++ fine -= IGP01E1000_ANALOG_FUSE_FINE_10; ++ ++ fused = (fused & IGP01E1000_ANALOG_FUSE_POLY_MASK) | ++ (fine & IGP01E1000_ANALOG_FUSE_FINE_MASK) | ++ (coarse & IGP01E1000_ANALOG_FUSE_COARSE_MASK); ++ ++ e1000_write_phy_reg(hw, ++ IGP01E1000_ANALOG_FUSE_CONTROL, ++ fused); ++ e1000_write_phy_reg(hw, ++ IGP01E1000_ANALOG_FUSE_BYPASS, ++ IGP01E1000_ANALOG_FUSE_ENABLE_SW_CONTROL); ++ } ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_init_script_state_82541 - Enable/Disable PHY init script ++ * @hw: pointer to the HW structure ++ * @state: boolean value used to enable/disable PHY init script ++ * ++ * Allows the driver to enable/disable the PHY init script, if the PHY is an ++ * IGP PHY. This is a function pointer entry point called by the api module. ++ **/ ++void e1000_init_script_state_82541(struct e1000_hw *hw, bool state) ++{ ++ struct e1000_dev_spec_82541 *dev_spec; ++ ++ DEBUGFUNC("e1000_init_script_state_82541"); ++ ++ if (hw->phy.type != e1000_phy_igp) { ++ DEBUGOUT("Initialization script not necessary.\n"); ++ goto out; ++ } ++ ++ dev_spec = (struct e1000_dev_spec_82541 *)hw->dev_spec; ++ ++ if (!dev_spec) { ++ DEBUGOUT("dev_spec pointer is set to NULL.\n"); ++ goto out; ++ } ++ ++ dev_spec->phy_init_script = state; ++ ++out: ++ return; ++} ++ ++/** ++ * e1000_power_down_phy_copper_82541 - Remove link in case of PHY power down ++ * @hw: pointer to the HW structure ++ * ++ * In the case of a PHY power down to save power, or to turn off link during a ++ * driver unload, or wake on lan is not enabled, remove the link. ++ **/ ++static void e1000_power_down_phy_copper_82541(struct e1000_hw *hw) ++{ ++ /* If the management interface is not enabled, then power down */ ++ if (!(E1000_READ_REG(hw, E1000_MANC) & E1000_MANC_SMBUS_EN)) ++ e1000_power_down_phy_copper(hw); ++ ++ return; ++} ++ ++/** ++ * e1000_clear_hw_cntrs_82541 - Clear device specific hardware counters ++ * @hw: pointer to the HW structure ++ * ++ * Clears the hardware counters by reading the counter registers. ++ **/ ++static void e1000_clear_hw_cntrs_82541(struct e1000_hw *hw) ++{ ++ volatile u32 temp; ++ ++ DEBUGFUNC("e1000_clear_hw_cntrs_82541"); ++ ++ e1000_clear_hw_cntrs_base_generic(hw); ++ ++ temp = E1000_READ_REG(hw, E1000_PRC64); ++ temp = E1000_READ_REG(hw, E1000_PRC127); ++ temp = E1000_READ_REG(hw, E1000_PRC255); ++ temp = E1000_READ_REG(hw, E1000_PRC511); ++ temp = E1000_READ_REG(hw, E1000_PRC1023); ++ temp = E1000_READ_REG(hw, E1000_PRC1522); ++ temp = E1000_READ_REG(hw, E1000_PTC64); ++ temp = E1000_READ_REG(hw, E1000_PTC127); ++ temp = E1000_READ_REG(hw, E1000_PTC255); ++ temp = E1000_READ_REG(hw, E1000_PTC511); ++ temp = E1000_READ_REG(hw, E1000_PTC1023); ++ temp = E1000_READ_REG(hw, E1000_PTC1522); ++ ++ temp = E1000_READ_REG(hw, E1000_ALGNERRC); ++ temp = E1000_READ_REG(hw, E1000_RXERRC); ++ temp = E1000_READ_REG(hw, E1000_TNCRS); ++ temp = E1000_READ_REG(hw, E1000_CEXTERR); ++ temp = E1000_READ_REG(hw, E1000_TSCTC); ++ temp = E1000_READ_REG(hw, E1000_TSCTFC); ++ ++ temp = E1000_READ_REG(hw, E1000_MGTPRC); ++ temp = E1000_READ_REG(hw, E1000_MGTPDC); ++ temp = E1000_READ_REG(hw, E1000_MGTPTC); ++} +--- linux/drivers/xenomai/net/drivers/experimental/e1000/e1000_phy.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/experimental/e1000/e1000_phy.c 2021-04-07 16:01:27.672633530 +0800 +@@ -0,0 +1,2106 @@ ++/******************************************************************************* ++ ++ Intel PRO/1000 Linux driver ++ Copyright(c) 1999 - 2008 Intel Corporation. ++ ++ This program is free software; you can redistribute it and/or modify it ++ under the terms and conditions of the GNU General Public License, ++ version 2, as published by the Free Software Foundation. ++ ++ This program is distributed in the hope it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ You should have received a copy of the GNU General Public License along with ++ this program; if not, write to the Free Software Foundation, Inc., ++ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. ++ ++ The full GNU General Public License is included in this distribution in ++ the file called "COPYING". ++ ++ Contact Information: ++ Linux NICS ++ e1000-devel Mailing List ++ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 ++ ++*******************************************************************************/ ++ ++#include "e1000_api.h" ++#include "e1000_phy.h" ++ ++static s32 e1000_get_phy_cfg_done(struct e1000_hw *hw); ++static void e1000_release_phy(struct e1000_hw *hw); ++static s32 e1000_acquire_phy(struct e1000_hw *hw); ++ ++/* Cable length tables */ ++static const u16 e1000_m88_cable_length_table[] = ++ { 0, 50, 80, 110, 140, 140, E1000_CABLE_LENGTH_UNDEFINED }; ++#define M88E1000_CABLE_LENGTH_TABLE_SIZE \ ++ (sizeof(e1000_m88_cable_length_table) / \ ++ sizeof(e1000_m88_cable_length_table[0])) ++ ++static const u16 e1000_igp_2_cable_length_table[] = ++ { 0, 0, 0, 0, 0, 0, 0, 0, 3, 5, 8, 11, 13, 16, 18, 21, ++ 0, 0, 0, 3, 6, 10, 13, 16, 19, 23, 26, 29, 32, 35, 38, 41, ++ 6, 10, 14, 18, 22, 26, 30, 33, 37, 41, 44, 48, 51, 54, 58, 61, ++ 21, 26, 31, 35, 40, 44, 49, 53, 57, 61, 65, 68, 72, 75, 79, 82, ++ 40, 45, 51, 56, 61, 66, 70, 75, 79, 83, 87, 91, 94, 98, 101, 104, ++ 60, 66, 72, 77, 82, 87, 92, 96, 100, 104, 108, 111, 114, 117, 119, 121, ++ 83, 89, 95, 100, 105, 109, 113, 116, 119, 122, 124, ++ 104, 109, 114, 118, 121, 124}; ++#define IGP02E1000_CABLE_LENGTH_TABLE_SIZE \ ++ (sizeof(e1000_igp_2_cable_length_table) / \ ++ sizeof(e1000_igp_2_cable_length_table[0])) ++ ++/** ++ * e1000_check_reset_block_generic - Check if PHY reset is blocked ++ * @hw: pointer to the HW structure ++ * ++ * Read the PHY management control register and check whether a PHY reset ++ * is blocked. If a reset is not blocked return E1000_SUCCESS, otherwise ++ * return E1000_BLK_PHY_RESET (12). ++ **/ ++s32 e1000_check_reset_block_generic(struct e1000_hw *hw) ++{ ++ u32 manc; ++ ++ DEBUGFUNC("e1000_check_reset_block"); ++ ++ manc = E1000_READ_REG(hw, E1000_MANC); ++ ++ return (manc & E1000_MANC_BLK_PHY_RST_ON_IDE) ? ++ E1000_BLK_PHY_RESET : E1000_SUCCESS; ++} ++ ++/** ++ * e1000_get_phy_id - Retrieve the PHY ID and revision ++ * @hw: pointer to the HW structure ++ * ++ * Reads the PHY registers and stores the PHY ID and possibly the PHY ++ * revision in the hardware structure. ++ **/ ++s32 e1000_get_phy_id(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val = E1000_SUCCESS; ++ u16 phy_id; ++ ++ DEBUGFUNC("e1000_get_phy_id"); ++ ++ ret_val = e1000_read_phy_reg(hw, PHY_ID1, &phy_id); ++ if (ret_val) ++ goto out; ++ ++ phy->id = (u32)(phy_id << 16); ++ usec_delay(20); ++ ret_val = e1000_read_phy_reg(hw, PHY_ID2, &phy_id); ++ if (ret_val) ++ goto out; ++ ++ phy->id |= (u32)(phy_id & PHY_REVISION_MASK); ++ phy->revision = (u32)(phy_id & ~PHY_REVISION_MASK); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_phy_reset_dsp_generic - Reset PHY DSP ++ * @hw: pointer to the HW structure ++ * ++ * Reset the digital signal processor. ++ **/ ++s32 e1000_phy_reset_dsp_generic(struct e1000_hw *hw) ++{ ++ s32 ret_val; ++ ++ DEBUGFUNC("e1000_phy_reset_dsp_generic"); ++ ++ ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_GEN_CONTROL, 0xC1); ++ if (ret_val) ++ goto out; ++ ++ ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_GEN_CONTROL, 0); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_read_phy_reg_mdic - Read MDI control register ++ * @hw: pointer to the HW structure ++ * @offset: register offset to be read ++ * @data: pointer to the read data ++ * ++ * Reads the MDI control regsiter in the PHY at offset and stores the ++ * information read to data. ++ **/ ++s32 e1000_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ u32 i, mdic = 0; ++ s32 ret_val = E1000_SUCCESS; ++ ++ DEBUGFUNC("e1000_read_phy_reg_mdic"); ++ ++ if (offset > MAX_PHY_REG_ADDRESS) { ++ DEBUGOUT1("PHY Address %d is out of range\n", offset); ++ ret_val = -E1000_ERR_PARAM; ++ goto out; ++ } ++ ++ /* ++ * Set up Op-code, Phy Address, and register offset in the MDI ++ * Control register. The MAC will take care of interfacing with the ++ * PHY to retrieve the desired data. ++ */ ++ mdic = ((offset << E1000_MDIC_REG_SHIFT) | ++ (phy->addr << E1000_MDIC_PHY_SHIFT) | ++ (E1000_MDIC_OP_READ)); ++ ++ E1000_WRITE_REG(hw, E1000_MDIC, mdic); ++ ++ /* ++ * Poll the ready bit to see if the MDI read completed ++ * Increasing the time out as testing showed failures with ++ * the lower time out ++ */ ++ for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) { ++ usec_delay(50); ++ mdic = E1000_READ_REG(hw, E1000_MDIC); ++ if (mdic & E1000_MDIC_READY) ++ break; ++ } ++ if (!(mdic & E1000_MDIC_READY)) { ++ DEBUGOUT("MDI Read did not complete\n"); ++ ret_val = -E1000_ERR_PHY; ++ goto out; ++ } ++ if (mdic & E1000_MDIC_ERROR) { ++ DEBUGOUT("MDI Error\n"); ++ ret_val = -E1000_ERR_PHY; ++ goto out; ++ } ++ *data = (u16) mdic; ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_write_phy_reg_mdic - Write MDI control register ++ * @hw: pointer to the HW structure ++ * @offset: register offset to write to ++ * @data: data to write to register at offset ++ * ++ * Writes data to MDI control register in the PHY at offset. ++ **/ ++s32 e1000_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ u32 i, mdic = 0; ++ s32 ret_val = E1000_SUCCESS; ++ ++ DEBUGFUNC("e1000_write_phy_reg_mdic"); ++ ++ if (offset > MAX_PHY_REG_ADDRESS) { ++ DEBUGOUT1("PHY Address %d is out of range\n", offset); ++ ret_val = -E1000_ERR_PARAM; ++ goto out; ++ } ++ ++ /* ++ * Set up Op-code, Phy Address, and register offset in the MDI ++ * Control register. The MAC will take care of interfacing with the ++ * PHY to retrieve the desired data. ++ */ ++ mdic = (((u32)data) | ++ (offset << E1000_MDIC_REG_SHIFT) | ++ (phy->addr << E1000_MDIC_PHY_SHIFT) | ++ (E1000_MDIC_OP_WRITE)); ++ ++ E1000_WRITE_REG(hw, E1000_MDIC, mdic); ++ ++ /* ++ * Poll the ready bit to see if the MDI read completed ++ * Increasing the time out as testing showed failures with ++ * the lower time out ++ */ ++ for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) { ++ usec_delay(50); ++ mdic = E1000_READ_REG(hw, E1000_MDIC); ++ if (mdic & E1000_MDIC_READY) ++ break; ++ } ++ if (!(mdic & E1000_MDIC_READY)) { ++ DEBUGOUT("MDI Write did not complete\n"); ++ ret_val = -E1000_ERR_PHY; ++ goto out; ++ } ++ if (mdic & E1000_MDIC_ERROR) { ++ DEBUGOUT("MDI Error\n"); ++ ret_val = -E1000_ERR_PHY; ++ goto out; ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_read_phy_reg_m88 - Read m88 PHY register ++ * @hw: pointer to the HW structure ++ * @offset: register offset to be read ++ * @data: pointer to the read data ++ * ++ * Acquires semaphore, if necessary, then reads the PHY register at offset ++ * and storing the retrieved information in data. Release any acquired ++ * semaphores before exiting. ++ **/ ++s32 e1000_read_phy_reg_m88(struct e1000_hw *hw, u32 offset, u16 *data) ++{ ++ s32 ret_val; ++ ++ DEBUGFUNC("e1000_read_phy_reg_m88"); ++ ++ ret_val = e1000_acquire_phy(hw); ++ if (ret_val) ++ goto out; ++ ++ ret_val = e1000_read_phy_reg_mdic(hw, ++ MAX_PHY_REG_ADDRESS & offset, ++ data); ++ ++ e1000_release_phy(hw); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_write_phy_reg_m88 - Write m88 PHY register ++ * @hw: pointer to the HW structure ++ * @offset: register offset to write to ++ * @data: data to write at register offset ++ * ++ * Acquires semaphore, if necessary, then writes the data to PHY register ++ * at the offset. Release any acquired semaphores before exiting. ++ **/ ++s32 e1000_write_phy_reg_m88(struct e1000_hw *hw, u32 offset, u16 data) ++{ ++ s32 ret_val; ++ ++ DEBUGFUNC("e1000_write_phy_reg_m88"); ++ ++ ret_val = e1000_acquire_phy(hw); ++ if (ret_val) ++ goto out; ++ ++ ret_val = e1000_write_phy_reg_mdic(hw, ++ MAX_PHY_REG_ADDRESS & offset, ++ data); ++ ++ e1000_release_phy(hw); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_read_phy_reg_igp - Read igp PHY register ++ * @hw: pointer to the HW structure ++ * @offset: register offset to be read ++ * @data: pointer to the read data ++ * ++ * Acquires semaphore, if necessary, then reads the PHY register at offset ++ * and storing the retrieved information in data. Release any acquired ++ * semaphores before exiting. ++ **/ ++s32 e1000_read_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 *data) ++{ ++ s32 ret_val; ++ ++ DEBUGFUNC("e1000_read_phy_reg_igp"); ++ ++ ret_val = e1000_acquire_phy(hw); ++ if (ret_val) ++ goto out; ++ ++ if (offset > MAX_PHY_MULTI_PAGE_REG) { ++ ret_val = e1000_write_phy_reg_mdic(hw, ++ IGP01E1000_PHY_PAGE_SELECT, ++ (u16)offset); ++ if (ret_val) { ++ e1000_release_phy(hw); ++ goto out; ++ } ++ } ++ ++ ret_val = e1000_read_phy_reg_mdic(hw, ++ MAX_PHY_REG_ADDRESS & offset, ++ data); ++ ++ e1000_release_phy(hw); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_write_phy_reg_igp - Write igp PHY register ++ * @hw: pointer to the HW structure ++ * @offset: register offset to write to ++ * @data: data to write at register offset ++ * ++ * Acquires semaphore, if necessary, then writes the data to PHY register ++ * at the offset. Release any acquired semaphores before exiting. ++ **/ ++s32 e1000_write_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 data) ++{ ++ s32 ret_val; ++ ++ DEBUGFUNC("e1000_write_phy_reg_igp"); ++ ++ ret_val = e1000_acquire_phy(hw); ++ if (ret_val) ++ goto out; ++ ++ if (offset > MAX_PHY_MULTI_PAGE_REG) { ++ ret_val = e1000_write_phy_reg_mdic(hw, ++ IGP01E1000_PHY_PAGE_SELECT, ++ (u16)offset); ++ if (ret_val) { ++ e1000_release_phy(hw); ++ goto out; ++ } ++ } ++ ++ ret_val = e1000_write_phy_reg_mdic(hw, ++ MAX_PHY_REG_ADDRESS & offset, ++ data); ++ ++ e1000_release_phy(hw); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_read_kmrn_reg_generic - Read kumeran register ++ * @hw: pointer to the HW structure ++ * @offset: register offset to be read ++ * @data: pointer to the read data ++ * ++ * Acquires semaphore, if necessary. Then reads the PHY register at offset ++ * using the kumeran interface. The information retrieved is stored in data. ++ * Release any acquired semaphores before exiting. ++ **/ ++s32 e1000_read_kmrn_reg_generic(struct e1000_hw *hw, u32 offset, u16 *data) ++{ ++ u32 kmrnctrlsta; ++ s32 ret_val; ++ ++ DEBUGFUNC("e1000_read_kmrn_reg_generic"); ++ ++ ret_val = e1000_acquire_phy(hw); ++ if (ret_val) ++ goto out; ++ ++ kmrnctrlsta = ((offset << E1000_KMRNCTRLSTA_OFFSET_SHIFT) & ++ E1000_KMRNCTRLSTA_OFFSET) | E1000_KMRNCTRLSTA_REN; ++ E1000_WRITE_REG(hw, E1000_KMRNCTRLSTA, kmrnctrlsta); ++ ++ usec_delay(2); ++ ++ kmrnctrlsta = E1000_READ_REG(hw, E1000_KMRNCTRLSTA); ++ *data = (u16)kmrnctrlsta; ++ ++ e1000_release_phy(hw); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_write_kmrn_reg_generic - Write kumeran register ++ * @hw: pointer to the HW structure ++ * @offset: register offset to write to ++ * @data: data to write at register offset ++ * ++ * Acquires semaphore, if necessary. Then write the data to PHY register ++ * at the offset using the kumeran interface. Release any acquired semaphores ++ * before exiting. ++ **/ ++s32 e1000_write_kmrn_reg_generic(struct e1000_hw *hw, u32 offset, u16 data) ++{ ++ u32 kmrnctrlsta; ++ s32 ret_val; ++ ++ DEBUGFUNC("e1000_write_kmrn_reg_generic"); ++ ++ ret_val = e1000_acquire_phy(hw); ++ if (ret_val) ++ goto out; ++ ++ kmrnctrlsta = ((offset << E1000_KMRNCTRLSTA_OFFSET_SHIFT) & ++ E1000_KMRNCTRLSTA_OFFSET) | data; ++ E1000_WRITE_REG(hw, E1000_KMRNCTRLSTA, kmrnctrlsta); ++ ++ usec_delay(2); ++ e1000_release_phy(hw); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_copper_link_setup_m88 - Setup m88 PHY's for copper link ++ * @hw: pointer to the HW structure ++ * ++ * Sets up MDI/MDI-X and polarity for m88 PHY's. If necessary, transmit clock ++ * and downshift values are set also. ++ **/ ++s32 e1000_copper_link_setup_m88(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u16 phy_data; ++ ++ DEBUGFUNC("e1000_copper_link_setup_m88"); ++ ++ if (phy->reset_disable) { ++ ret_val = E1000_SUCCESS; ++ goto out; ++ } ++ ++ /* Enable CRS on TX. This must be set for half-duplex operation. */ ++ ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data); ++ if (ret_val) ++ goto out; ++ ++ phy_data |= M88E1000_PSCR_ASSERT_CRS_ON_TX; ++ ++ /* ++ * Options: ++ * MDI/MDI-X = 0 (default) ++ * 0 - Auto for all speeds ++ * 1 - MDI mode ++ * 2 - MDI-X mode ++ * 3 - Auto for 1000Base-T only (MDI-X for 10/100Base-T modes) ++ */ ++ phy_data &= ~M88E1000_PSCR_AUTO_X_MODE; ++ ++ switch (phy->mdix) { ++ case 1: ++ phy_data |= M88E1000_PSCR_MDI_MANUAL_MODE; ++ break; ++ case 2: ++ phy_data |= M88E1000_PSCR_MDIX_MANUAL_MODE; ++ break; ++ case 3: ++ phy_data |= M88E1000_PSCR_AUTO_X_1000T; ++ break; ++ case 0: ++ default: ++ phy_data |= M88E1000_PSCR_AUTO_X_MODE; ++ break; ++ } ++ ++ /* ++ * Options: ++ * disable_polarity_correction = 0 (default) ++ * Automatic Correction for Reversed Cable Polarity ++ * 0 - Disabled ++ * 1 - Enabled ++ */ ++ phy_data &= ~M88E1000_PSCR_POLARITY_REVERSAL; ++ if (phy->disable_polarity_correction == 1) ++ phy_data |= M88E1000_PSCR_POLARITY_REVERSAL; ++ ++ ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data); ++ if (ret_val) ++ goto out; ++ ++ if (phy->revision < E1000_REVISION_4) { ++ /* ++ * Force TX_CLK in the Extended PHY Specific Control Register ++ * to 25MHz clock. ++ */ ++ ret_val = e1000_read_phy_reg(hw, ++ M88E1000_EXT_PHY_SPEC_CTRL, ++ &phy_data); ++ if (ret_val) ++ goto out; ++ ++ phy_data |= M88E1000_EPSCR_TX_CLK_25; ++ ++ if ((phy->revision == E1000_REVISION_2) && ++ (phy->id == M88E1111_I_PHY_ID)) { ++ /* 82573L PHY - set the downshift counter to 5x. */ ++ phy_data &= ~M88EC018_EPSCR_DOWNSHIFT_COUNTER_MASK; ++ phy_data |= M88EC018_EPSCR_DOWNSHIFT_COUNTER_5X; ++ } else { ++ /* Configure Master and Slave downshift values */ ++ phy_data &= ~(M88E1000_EPSCR_MASTER_DOWNSHIFT_MASK | ++ M88E1000_EPSCR_SLAVE_DOWNSHIFT_MASK); ++ phy_data |= (M88E1000_EPSCR_MASTER_DOWNSHIFT_1X | ++ M88E1000_EPSCR_SLAVE_DOWNSHIFT_1X); ++ } ++ ret_val = e1000_write_phy_reg(hw, ++ M88E1000_EXT_PHY_SPEC_CTRL, ++ phy_data); ++ if (ret_val) ++ goto out; ++ } ++ ++ /* Commit the changes. */ ++ ret_val = e1000_phy_commit(hw); ++ if (ret_val) { ++ DEBUGOUT("Error committing the PHY changes\n"); ++ goto out; ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_copper_link_setup_igp - Setup igp PHY's for copper link ++ * @hw: pointer to the HW structure ++ * ++ * Sets up LPLU, MDI/MDI-X, polarity, Smartspeed and Master/Slave config for ++ * igp PHY's. ++ **/ ++s32 e1000_copper_link_setup_igp(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u16 data; ++ ++ DEBUGFUNC("e1000_copper_link_setup_igp"); ++ ++ if (phy->reset_disable) { ++ ret_val = E1000_SUCCESS; ++ goto out; ++ } ++ ++ ret_val = e1000_phy_hw_reset(hw); ++ if (ret_val) { ++ DEBUGOUT("Error resetting the PHY.\n"); ++ goto out; ++ } ++ ++ /* Wait 15ms for MAC to configure PHY from NVM settings. */ ++ msec_delay(15); ++ ++ /* ++ * The NVM settings will configure LPLU in D3 for ++ * non-IGP1 PHYs. ++ */ ++ if (phy->type == e1000_phy_igp) { ++ /* disable lplu d3 during driver init */ ++ ret_val = e1000_set_d3_lplu_state(hw, FALSE); ++ if (ret_val) { ++ DEBUGOUT("Error Disabling LPLU D3\n"); ++ goto out; ++ } ++ } ++ ++ /* disable lplu d0 during driver init */ ++ ret_val = e1000_set_d0_lplu_state(hw, FALSE); ++ if (ret_val) { ++ DEBUGOUT("Error Disabling LPLU D0\n"); ++ goto out; ++ } ++ /* Configure mdi-mdix settings */ ++ ret_val = e1000_read_phy_reg(hw, IGP01E1000_PHY_PORT_CTRL, &data); ++ if (ret_val) ++ goto out; ++ ++ data &= ~IGP01E1000_PSCR_AUTO_MDIX; ++ ++ switch (phy->mdix) { ++ case 1: ++ data &= ~IGP01E1000_PSCR_FORCE_MDI_MDIX; ++ break; ++ case 2: ++ data |= IGP01E1000_PSCR_FORCE_MDI_MDIX; ++ break; ++ case 0: ++ default: ++ data |= IGP01E1000_PSCR_AUTO_MDIX; ++ break; ++ } ++ ret_val = e1000_write_phy_reg(hw, IGP01E1000_PHY_PORT_CTRL, data); ++ if (ret_val) ++ goto out; ++ ++ /* set auto-master slave resolution settings */ ++ if (hw->mac.autoneg) { ++ /* ++ * when autonegotiation advertisement is only 1000Mbps then we ++ * should disable SmartSpeed and enable Auto MasterSlave ++ * resolution as hardware default. ++ */ ++ if (phy->autoneg_advertised == ADVERTISE_1000_FULL) { ++ /* Disable SmartSpeed */ ++ ret_val = e1000_read_phy_reg(hw, ++ IGP01E1000_PHY_PORT_CONFIG, ++ &data); ++ if (ret_val) ++ goto out; ++ ++ data &= ~IGP01E1000_PSCFR_SMART_SPEED; ++ ret_val = e1000_write_phy_reg(hw, ++ IGP01E1000_PHY_PORT_CONFIG, ++ data); ++ if (ret_val) ++ goto out; ++ ++ /* Set auto Master/Slave resolution process */ ++ ret_val = e1000_read_phy_reg(hw, PHY_1000T_CTRL, &data); ++ if (ret_val) ++ goto out; ++ ++ data &= ~CR_1000T_MS_ENABLE; ++ ret_val = e1000_write_phy_reg(hw, PHY_1000T_CTRL, data); ++ if (ret_val) ++ goto out; ++ } ++ ++ ret_val = e1000_read_phy_reg(hw, PHY_1000T_CTRL, &data); ++ if (ret_val) ++ goto out; ++ ++ /* load defaults for future use */ ++ phy->original_ms_type = (data & CR_1000T_MS_ENABLE) ? ++ ((data & CR_1000T_MS_VALUE) ? ++ e1000_ms_force_master : ++ e1000_ms_force_slave) : ++ e1000_ms_auto; ++ ++ switch (phy->ms_type) { ++ case e1000_ms_force_master: ++ data |= (CR_1000T_MS_ENABLE | CR_1000T_MS_VALUE); ++ break; ++ case e1000_ms_force_slave: ++ data |= CR_1000T_MS_ENABLE; ++ data &= ~(CR_1000T_MS_VALUE); ++ break; ++ case e1000_ms_auto: ++ data &= ~CR_1000T_MS_ENABLE; ++ default: ++ break; ++ } ++ ret_val = e1000_write_phy_reg(hw, PHY_1000T_CTRL, data); ++ if (ret_val) ++ goto out; ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_copper_link_autoneg - Setup/Enable autoneg for copper link ++ * @hw: pointer to the HW structure ++ * ++ * Performs initial bounds checking on autoneg advertisement parameter, then ++ * configure to advertise the full capability. Setup the PHY to autoneg ++ * and restart the negotiation process between the link partner. If ++ * autoneg_wait_to_complete, then wait for autoneg to complete before exiting. ++ **/ ++s32 e1000_copper_link_autoneg(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u16 phy_ctrl; ++ ++ DEBUGFUNC("e1000_copper_link_autoneg"); ++ ++ /* ++ * Perform some bounds checking on the autoneg advertisement ++ * parameter. ++ */ ++ phy->autoneg_advertised &= phy->autoneg_mask; ++ ++ /* ++ * If autoneg_advertised is zero, we assume it was not defaulted ++ * by the calling code so we set to advertise full capability. ++ */ ++ if (phy->autoneg_advertised == 0) ++ phy->autoneg_advertised = phy->autoneg_mask; ++ ++ DEBUGOUT("Reconfiguring auto-neg advertisement params\n"); ++ ret_val = e1000_phy_setup_autoneg(hw); ++ if (ret_val) { ++ DEBUGOUT("Error Setting up Auto-Negotiation\n"); ++ goto out; ++ } ++ DEBUGOUT("Restarting Auto-Neg\n"); ++ ++ /* ++ * Restart auto-negotiation by setting the Auto Neg Enable bit and ++ * the Auto Neg Restart bit in the PHY control register. ++ */ ++ ret_val = e1000_read_phy_reg(hw, PHY_CONTROL, &phy_ctrl); ++ if (ret_val) ++ goto out; ++ ++ phy_ctrl |= (MII_CR_AUTO_NEG_EN | MII_CR_RESTART_AUTO_NEG); ++ ret_val = e1000_write_phy_reg(hw, PHY_CONTROL, phy_ctrl); ++ if (ret_val) ++ goto out; ++ ++ /* ++ * Does the user want to wait for Auto-Neg to complete here, or ++ * check at a later time (for example, callback routine). ++ */ ++ if (phy->autoneg_wait_to_complete) { ++ ret_val = e1000_wait_autoneg(hw); ++ if (ret_val) { ++ DEBUGOUT("Error while waiting for " ++ "autoneg to complete\n"); ++ goto out; ++ } ++ } ++ ++ hw->mac.get_link_status = TRUE; ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_phy_setup_autoneg - Configure PHY for auto-negotiation ++ * @hw: pointer to the HW structure ++ * ++ * Reads the MII auto-neg advertisement register and/or the 1000T control ++ * register and if the PHY is already setup for auto-negotiation, then ++ * return successful. Otherwise, setup advertisement and flow control to ++ * the appropriate values for the wanted auto-negotiation. ++ **/ ++s32 e1000_phy_setup_autoneg(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u16 mii_autoneg_adv_reg; ++ u16 mii_1000t_ctrl_reg = 0; ++ ++ DEBUGFUNC("e1000_phy_setup_autoneg"); ++ ++ phy->autoneg_advertised &= phy->autoneg_mask; ++ ++ /* Read the MII Auto-Neg Advertisement Register (Address 4). */ ++ ret_val = e1000_read_phy_reg(hw, PHY_AUTONEG_ADV, &mii_autoneg_adv_reg); ++ if (ret_val) ++ goto out; ++ ++ if (phy->autoneg_mask & ADVERTISE_1000_FULL) { ++ /* Read the MII 1000Base-T Control Register (Address 9). */ ++ ret_val = e1000_read_phy_reg(hw, ++ PHY_1000T_CTRL, ++ &mii_1000t_ctrl_reg); ++ if (ret_val) ++ goto out; ++ } ++ ++ /* ++ * Need to parse both autoneg_advertised and fc and set up ++ * the appropriate PHY registers. First we will parse for ++ * autoneg_advertised software override. Since we can advertise ++ * a plethora of combinations, we need to check each bit ++ * individually. ++ */ ++ ++ /* ++ * First we clear all the 10/100 mb speed bits in the Auto-Neg ++ * Advertisement Register (Address 4) and the 1000 mb speed bits in ++ * the 1000Base-T Control Register (Address 9). ++ */ ++ mii_autoneg_adv_reg &= ~(NWAY_AR_100TX_FD_CAPS | ++ NWAY_AR_100TX_HD_CAPS | ++ NWAY_AR_10T_FD_CAPS | ++ NWAY_AR_10T_HD_CAPS); ++ mii_1000t_ctrl_reg &= ~(CR_1000T_HD_CAPS | CR_1000T_FD_CAPS); ++ ++ DEBUGOUT1("autoneg_advertised %x\n", phy->autoneg_advertised); ++ ++ /* Do we want to advertise 10 Mb Half Duplex? */ ++ if (phy->autoneg_advertised & ADVERTISE_10_HALF) { ++ DEBUGOUT("Advertise 10mb Half duplex\n"); ++ mii_autoneg_adv_reg |= NWAY_AR_10T_HD_CAPS; ++ } ++ ++ /* Do we want to advertise 10 Mb Full Duplex? */ ++ if (phy->autoneg_advertised & ADVERTISE_10_FULL) { ++ DEBUGOUT("Advertise 10mb Full duplex\n"); ++ mii_autoneg_adv_reg |= NWAY_AR_10T_FD_CAPS; ++ } ++ ++ /* Do we want to advertise 100 Mb Half Duplex? */ ++ if (phy->autoneg_advertised & ADVERTISE_100_HALF) { ++ DEBUGOUT("Advertise 100mb Half duplex\n"); ++ mii_autoneg_adv_reg |= NWAY_AR_100TX_HD_CAPS; ++ } ++ ++ /* Do we want to advertise 100 Mb Full Duplex? */ ++ if (phy->autoneg_advertised & ADVERTISE_100_FULL) { ++ DEBUGOUT("Advertise 100mb Full duplex\n"); ++ mii_autoneg_adv_reg |= NWAY_AR_100TX_FD_CAPS; ++ } ++ ++ /* We do not allow the Phy to advertise 1000 Mb Half Duplex */ ++ if (phy->autoneg_advertised & ADVERTISE_1000_HALF) { ++ DEBUGOUT("Advertise 1000mb Half duplex request denied!\n"); ++ } ++ ++ /* Do we want to advertise 1000 Mb Full Duplex? */ ++ if (phy->autoneg_advertised & ADVERTISE_1000_FULL) { ++ DEBUGOUT("Advertise 1000mb Full duplex\n"); ++ mii_1000t_ctrl_reg |= CR_1000T_FD_CAPS; ++ } ++ ++ /* ++ * Check for a software override of the flow control settings, and ++ * setup the PHY advertisement registers accordingly. If ++ * auto-negotiation is enabled, then software will have to set the ++ * "PAUSE" bits to the correct value in the Auto-Negotiation ++ * Advertisement Register (PHY_AUTONEG_ADV) and re-start auto- ++ * negotiation. ++ * ++ * The possible values of the "fc" parameter are: ++ * 0: Flow control is completely disabled ++ * 1: Rx flow control is enabled (we can receive pause frames ++ * but not send pause frames). ++ * 2: Tx flow control is enabled (we can send pause frames ++ * but we do not support receiving pause frames). ++ * 3: Both Rx and Tx flow control (symmetric) are enabled. ++ * other: No software override. The flow control configuration ++ * in the EEPROM is used. ++ */ ++ switch (hw->fc.type) { ++ case e1000_fc_none: ++ /* ++ * Flow control (Rx & Tx) is completely disabled by a ++ * software over-ride. ++ */ ++ mii_autoneg_adv_reg &= ~(NWAY_AR_ASM_DIR | NWAY_AR_PAUSE); ++ break; ++ case e1000_fc_rx_pause: ++ /* ++ * Rx Flow control is enabled, and Tx Flow control is ++ * disabled, by a software over-ride. ++ * ++ * Since there really isn't a way to advertise that we are ++ * capable of Rx Pause ONLY, we will advertise that we ++ * support both symmetric and asymmetric Rx PAUSE. Later ++ * (in e1000_config_fc_after_link_up) we will disable the ++ * hw's ability to send PAUSE frames. ++ */ ++ mii_autoneg_adv_reg |= (NWAY_AR_ASM_DIR | NWAY_AR_PAUSE); ++ break; ++ case e1000_fc_tx_pause: ++ /* ++ * Tx Flow control is enabled, and Rx Flow control is ++ * disabled, by a software over-ride. ++ */ ++ mii_autoneg_adv_reg |= NWAY_AR_ASM_DIR; ++ mii_autoneg_adv_reg &= ~NWAY_AR_PAUSE; ++ break; ++ case e1000_fc_full: ++ /* ++ * Flow control (both Rx and Tx) is enabled by a software ++ * over-ride. ++ */ ++ mii_autoneg_adv_reg |= (NWAY_AR_ASM_DIR | NWAY_AR_PAUSE); ++ break; ++ default: ++ DEBUGOUT("Flow control param set incorrectly\n"); ++ ret_val = -E1000_ERR_CONFIG; ++ goto out; ++ } ++ ++ ret_val = e1000_write_phy_reg(hw, PHY_AUTONEG_ADV, mii_autoneg_adv_reg); ++ if (ret_val) ++ goto out; ++ ++ DEBUGOUT1("Auto-Neg Advertising %x\n", mii_autoneg_adv_reg); ++ ++ if (phy->autoneg_mask & ADVERTISE_1000_FULL) { ++ ret_val = e1000_write_phy_reg(hw, ++ PHY_1000T_CTRL, ++ mii_1000t_ctrl_reg); ++ if (ret_val) ++ goto out; ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_setup_copper_link_generic - Configure copper link settings ++ * @hw: pointer to the HW structure ++ * ++ * Calls the appropriate function to configure the link for auto-neg or forced ++ * speed and duplex. Then we check for link, once link is established calls ++ * to configure collision distance and flow control are called. If link is ++ * not established, we return -E1000_ERR_PHY (-2). ++ **/ ++s32 e1000_setup_copper_link_generic(struct e1000_hw *hw) ++{ ++ s32 ret_val; ++ bool link; ++ ++ DEBUGFUNC("e1000_setup_copper_link_generic"); ++ ++ if (hw->mac.autoneg) { ++ /* ++ * Setup autoneg and flow control advertisement and perform ++ * autonegotiation. ++ */ ++ ret_val = e1000_copper_link_autoneg(hw); ++ if (ret_val) ++ goto out; ++ } else { ++ /* ++ * PHY will be set to 10H, 10F, 100H or 100F ++ * depending on user settings. ++ */ ++ DEBUGOUT("Forcing Speed and Duplex\n"); ++ ret_val = e1000_phy_force_speed_duplex(hw); ++ if (ret_val) { ++ DEBUGOUT("Error Forcing Speed and Duplex\n"); ++ goto out; ++ } ++ } ++ ++ /* ++ * Check link status. Wait up to 100 microseconds for link to become ++ * valid. ++ */ ++ ret_val = e1000_phy_has_link_generic(hw, ++ COPPER_LINK_UP_LIMIT, ++ 10, ++ &link); ++ if (ret_val) ++ goto out; ++ ++ if (link) { ++ DEBUGOUT("Valid link established!!!\n"); ++ e1000_config_collision_dist_generic(hw); ++ ret_val = e1000_config_fc_after_link_up_generic(hw); ++ } else { ++ DEBUGOUT("Unable to establish link!!!\n"); ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_phy_force_speed_duplex_igp - Force speed/duplex for igp PHY ++ * @hw: pointer to the HW structure ++ * ++ * Calls the PHY setup function to force speed and duplex. Clears the ++ * auto-crossover to force MDI manually. Waits for link and returns ++ * successful if link up is successful, else -E1000_ERR_PHY (-2). ++ **/ ++s32 e1000_phy_force_speed_duplex_igp(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u16 phy_data; ++ bool link; ++ ++ DEBUGFUNC("e1000_phy_force_speed_duplex_igp"); ++ ++ ret_val = e1000_read_phy_reg(hw, PHY_CONTROL, &phy_data); ++ if (ret_val) ++ goto out; ++ ++ e1000_phy_force_speed_duplex_setup(hw, &phy_data); ++ ++ ret_val = e1000_write_phy_reg(hw, PHY_CONTROL, phy_data); ++ if (ret_val) ++ goto out; ++ ++ /* ++ * Clear Auto-Crossover to force MDI manually. IGP requires MDI ++ * forced whenever speed and duplex are forced. ++ */ ++ ret_val = e1000_read_phy_reg(hw, IGP01E1000_PHY_PORT_CTRL, &phy_data); ++ if (ret_val) ++ goto out; ++ ++ phy_data &= ~IGP01E1000_PSCR_AUTO_MDIX; ++ phy_data &= ~IGP01E1000_PSCR_FORCE_MDI_MDIX; ++ ++ ret_val = e1000_write_phy_reg(hw, IGP01E1000_PHY_PORT_CTRL, phy_data); ++ if (ret_val) ++ goto out; ++ ++ DEBUGOUT1("IGP PSCR: %X\n", phy_data); ++ ++ usec_delay(1); ++ ++ if (phy->autoneg_wait_to_complete) { ++ DEBUGOUT("Waiting for forced speed/duplex link on IGP phy.\n"); ++ ++ ret_val = e1000_phy_has_link_generic(hw, ++ PHY_FORCE_LIMIT, ++ 100000, ++ &link); ++ if (ret_val) ++ goto out; ++ ++ if (!link) { ++ DEBUGOUT("Link taking longer than expected.\n"); ++ } ++ ++ /* Try once more */ ++ ret_val = e1000_phy_has_link_generic(hw, ++ PHY_FORCE_LIMIT, ++ 100000, ++ &link); ++ if (ret_val) ++ goto out; ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_phy_force_speed_duplex_m88 - Force speed/duplex for m88 PHY ++ * @hw: pointer to the HW structure ++ * ++ * Calls the PHY setup function to force speed and duplex. Clears the ++ * auto-crossover to force MDI manually. Resets the PHY to commit the ++ * changes. If time expires while waiting for link up, we reset the DSP. ++ * After reset, TX_CLK and CRS on Tx must be set. Return successful upon ++ * successful completion, else return corresponding error code. ++ **/ ++s32 e1000_phy_force_speed_duplex_m88(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u16 phy_data; ++ bool link; ++ ++ DEBUGFUNC("e1000_phy_force_speed_duplex_m88"); ++ ++ /* ++ * Clear Auto-Crossover to force MDI manually. M88E1000 requires MDI ++ * forced whenever speed and duplex are forced. ++ */ ++ ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data); ++ if (ret_val) ++ goto out; ++ ++ phy_data &= ~M88E1000_PSCR_AUTO_X_MODE; ++ ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data); ++ if (ret_val) ++ goto out; ++ ++ DEBUGOUT1("M88E1000 PSCR: %X\n", phy_data); ++ ++ ret_val = e1000_read_phy_reg(hw, PHY_CONTROL, &phy_data); ++ if (ret_val) ++ goto out; ++ ++ e1000_phy_force_speed_duplex_setup(hw, &phy_data); ++ ++ /* Reset the phy to commit changes. */ ++ phy_data |= MII_CR_RESET; ++ ++ ret_val = e1000_write_phy_reg(hw, PHY_CONTROL, phy_data); ++ if (ret_val) ++ goto out; ++ ++ usec_delay(1); ++ ++ if (phy->autoneg_wait_to_complete) { ++ DEBUGOUT("Waiting for forced speed/duplex link on M88 phy.\n"); ++ ++ ret_val = e1000_phy_has_link_generic(hw, ++ PHY_FORCE_LIMIT, ++ 100000, ++ &link); ++ if (ret_val) ++ goto out; ++ ++ if (!link) { ++ /* ++ * We didn't get link. ++ * Reset the DSP and cross our fingers. ++ */ ++ ret_val = e1000_write_phy_reg(hw, ++ M88E1000_PHY_PAGE_SELECT, ++ 0x001d); ++ if (ret_val) ++ goto out; ++ ret_val = e1000_phy_reset_dsp_generic(hw); ++ if (ret_val) ++ goto out; ++ } ++ ++ /* Try once more */ ++ ret_val = e1000_phy_has_link_generic(hw, ++ PHY_FORCE_LIMIT, ++ 100000, ++ &link); ++ if (ret_val) ++ goto out; ++ } ++ ++ ret_val = e1000_read_phy_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL, &phy_data); ++ if (ret_val) ++ goto out; ++ ++ /* ++ * Resetting the phy means we need to re-force TX_CLK in the ++ * Extended PHY Specific Control Register to 25MHz clock from ++ * the reset value of 2.5MHz. ++ */ ++ phy_data |= M88E1000_EPSCR_TX_CLK_25; ++ ret_val = e1000_write_phy_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL, phy_data); ++ if (ret_val) ++ goto out; ++ ++ /* ++ * In addition, we must re-enable CRS on Tx for both half and full ++ * duplex. ++ */ ++ ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data); ++ if (ret_val) ++ goto out; ++ ++ phy_data |= M88E1000_PSCR_ASSERT_CRS_ON_TX; ++ ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_phy_force_speed_duplex_setup - Configure forced PHY speed/duplex ++ * @hw: pointer to the HW structure ++ * @phy_ctrl: pointer to current value of PHY_CONTROL ++ * ++ * Forces speed and duplex on the PHY by doing the following: disable flow ++ * control, force speed/duplex on the MAC, disable auto speed detection, ++ * disable auto-negotiation, configure duplex, configure speed, configure ++ * the collision distance, write configuration to CTRL register. The ++ * caller must write to the PHY_CONTROL register for these settings to ++ * take affect. ++ **/ ++void e1000_phy_force_speed_duplex_setup(struct e1000_hw *hw, u16 *phy_ctrl) ++{ ++ struct e1000_mac_info *mac = &hw->mac; ++ u32 ctrl; ++ ++ DEBUGFUNC("e1000_phy_force_speed_duplex_setup"); ++ ++ /* Turn off flow control when forcing speed/duplex */ ++ hw->fc.type = e1000_fc_none; ++ ++ /* Force speed/duplex on the mac */ ++ ctrl = E1000_READ_REG(hw, E1000_CTRL); ++ ctrl |= (E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX); ++ ctrl &= ~E1000_CTRL_SPD_SEL; ++ ++ /* Disable Auto Speed Detection */ ++ ctrl &= ~E1000_CTRL_ASDE; ++ ++ /* Disable autoneg on the phy */ ++ *phy_ctrl &= ~MII_CR_AUTO_NEG_EN; ++ ++ /* Forcing Full or Half Duplex? */ ++ if (mac->forced_speed_duplex & E1000_ALL_HALF_DUPLEX) { ++ ctrl &= ~E1000_CTRL_FD; ++ *phy_ctrl &= ~MII_CR_FULL_DUPLEX; ++ DEBUGOUT("Half Duplex\n"); ++ } else { ++ ctrl |= E1000_CTRL_FD; ++ *phy_ctrl |= MII_CR_FULL_DUPLEX; ++ DEBUGOUT("Full Duplex\n"); ++ } ++ ++ /* Forcing 10mb or 100mb? */ ++ if (mac->forced_speed_duplex & E1000_ALL_100_SPEED) { ++ ctrl |= E1000_CTRL_SPD_100; ++ *phy_ctrl |= MII_CR_SPEED_100; ++ *phy_ctrl &= ~(MII_CR_SPEED_1000 | MII_CR_SPEED_10); ++ DEBUGOUT("Forcing 100mb\n"); ++ } else { ++ ctrl &= ~(E1000_CTRL_SPD_1000 | E1000_CTRL_SPD_100); ++ *phy_ctrl |= MII_CR_SPEED_10; ++ *phy_ctrl &= ~(MII_CR_SPEED_1000 | MII_CR_SPEED_100); ++ DEBUGOUT("Forcing 10mb\n"); ++ } ++ ++ e1000_config_collision_dist_generic(hw); ++ ++ E1000_WRITE_REG(hw, E1000_CTRL, ctrl); ++} ++ ++/** ++ * e1000_set_d3_lplu_state_generic - Sets low power link up state for D3 ++ * @hw: pointer to the HW structure ++ * @active: boolean used to enable/disable lplu ++ * ++ * Success returns 0, Failure returns 1 ++ * ++ * The low power link up (lplu) state is set to the power management level D3 ++ * and SmartSpeed is disabled when active is true, else clear lplu for D3 ++ * and enable Smartspeed. LPLU and Smartspeed are mutually exclusive. LPLU ++ * is used during Dx states where the power conservation is most important. ++ * During driver activity, SmartSpeed should be enabled so performance is ++ * maintained. ++ **/ ++s32 e1000_set_d3_lplu_state_generic(struct e1000_hw *hw, bool active) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u16 data; ++ ++ DEBUGFUNC("e1000_set_d3_lplu_state_generic"); ++ ++ ret_val = e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &data); ++ if (ret_val) ++ goto out; ++ ++ if (!active) { ++ data &= ~IGP02E1000_PM_D3_LPLU; ++ ret_val = e1000_write_phy_reg(hw, ++ IGP02E1000_PHY_POWER_MGMT, ++ data); ++ if (ret_val) ++ goto out; ++ /* ++ * LPLU and SmartSpeed are mutually exclusive. LPLU is used ++ * during Dx states where the power conservation is most ++ * important. During driver activity we should enable ++ * SmartSpeed, so performance is maintained. ++ */ ++ if (phy->smart_speed == e1000_smart_speed_on) { ++ ret_val = e1000_read_phy_reg(hw, ++ IGP01E1000_PHY_PORT_CONFIG, ++ &data); ++ if (ret_val) ++ goto out; ++ ++ data |= IGP01E1000_PSCFR_SMART_SPEED; ++ ret_val = e1000_write_phy_reg(hw, ++ IGP01E1000_PHY_PORT_CONFIG, ++ data); ++ if (ret_val) ++ goto out; ++ } else if (phy->smart_speed == e1000_smart_speed_off) { ++ ret_val = e1000_read_phy_reg(hw, ++ IGP01E1000_PHY_PORT_CONFIG, ++ &data); ++ if (ret_val) ++ goto out; ++ ++ data &= ~IGP01E1000_PSCFR_SMART_SPEED; ++ ret_val = e1000_write_phy_reg(hw, ++ IGP01E1000_PHY_PORT_CONFIG, ++ data); ++ if (ret_val) ++ goto out; ++ } ++ } else if ((phy->autoneg_advertised == E1000_ALL_SPEED_DUPLEX) || ++ (phy->autoneg_advertised == E1000_ALL_NOT_GIG) || ++ (phy->autoneg_advertised == E1000_ALL_10_SPEED)) { ++ data |= IGP02E1000_PM_D3_LPLU; ++ ret_val = e1000_write_phy_reg(hw, ++ IGP02E1000_PHY_POWER_MGMT, ++ data); ++ if (ret_val) ++ goto out; ++ ++ /* When LPLU is enabled, we should disable SmartSpeed */ ++ ret_val = e1000_read_phy_reg(hw, ++ IGP01E1000_PHY_PORT_CONFIG, ++ &data); ++ if (ret_val) ++ goto out; ++ ++ data &= ~IGP01E1000_PSCFR_SMART_SPEED; ++ ret_val = e1000_write_phy_reg(hw, ++ IGP01E1000_PHY_PORT_CONFIG, ++ data); ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_check_downshift_generic - Checks whether a downshift in speed occured ++ * @hw: pointer to the HW structure ++ * ++ * Success returns 0, Failure returns 1 ++ * ++ * A downshift is detected by querying the PHY link health. ++ **/ ++s32 e1000_check_downshift_generic(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u16 phy_data, offset, mask; ++ ++ DEBUGFUNC("e1000_check_downshift_generic"); ++ ++ switch (phy->type) { ++ case e1000_phy_m88: ++ case e1000_phy_gg82563: ++ offset = M88E1000_PHY_SPEC_STATUS; ++ mask = M88E1000_PSSR_DOWNSHIFT; ++ break; ++ case e1000_phy_igp_2: ++ case e1000_phy_igp: ++ case e1000_phy_igp_3: ++ offset = IGP01E1000_PHY_LINK_HEALTH; ++ mask = IGP01E1000_PLHR_SS_DOWNGRADE; ++ break; ++ default: ++ /* speed downshift not supported */ ++ phy->speed_downgraded = FALSE; ++ ret_val = E1000_SUCCESS; ++ goto out; ++ } ++ ++ ret_val = e1000_read_phy_reg(hw, offset, &phy_data); ++ ++ if (!ret_val) ++ phy->speed_downgraded = (phy_data & mask) ? TRUE : FALSE; ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_check_polarity_m88 - Checks the polarity. ++ * @hw: pointer to the HW structure ++ * ++ * Success returns 0, Failure returns -E1000_ERR_PHY (-2) ++ * ++ * Polarity is determined based on the PHY specific status register. ++ **/ ++s32 e1000_check_polarity_m88(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u16 data; ++ ++ DEBUGFUNC("e1000_check_polarity_m88"); ++ ++ ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_STATUS, &data); ++ ++ if (!ret_val) ++ phy->cable_polarity = (data & M88E1000_PSSR_REV_POLARITY) ++ ? e1000_rev_polarity_reversed ++ : e1000_rev_polarity_normal; ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_check_polarity_igp - Checks the polarity. ++ * @hw: pointer to the HW structure ++ * ++ * Success returns 0, Failure returns -E1000_ERR_PHY (-2) ++ * ++ * Polarity is determined based on the PHY port status register, and the ++ * current speed (since there is no polarity at 100Mbps). ++ **/ ++s32 e1000_check_polarity_igp(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u16 data, offset, mask; ++ ++ DEBUGFUNC("e1000_check_polarity_igp"); ++ ++ /* ++ * Polarity is determined based on the speed of ++ * our connection. ++ */ ++ ret_val = e1000_read_phy_reg(hw, IGP01E1000_PHY_PORT_STATUS, &data); ++ if (ret_val) ++ goto out; ++ ++ if ((data & IGP01E1000_PSSR_SPEED_MASK) == ++ IGP01E1000_PSSR_SPEED_1000MBPS) { ++ offset = IGP01E1000_PHY_PCS_INIT_REG; ++ mask = IGP01E1000_PHY_POLARITY_MASK; ++ } else { ++ /* ++ * This really only applies to 10Mbps since ++ * there is no polarity for 100Mbps (always 0). ++ */ ++ offset = IGP01E1000_PHY_PORT_STATUS; ++ mask = IGP01E1000_PSSR_POLARITY_REVERSED; ++ } ++ ++ ret_val = e1000_read_phy_reg(hw, offset, &data); ++ ++ if (!ret_val) ++ phy->cable_polarity = (data & mask) ++ ? e1000_rev_polarity_reversed ++ : e1000_rev_polarity_normal; ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_wait_autoneg_generic - Wait for auto-neg compeletion ++ * @hw: pointer to the HW structure ++ * ++ * Waits for auto-negotiation to complete or for the auto-negotiation time ++ * limit to expire, which ever happens first. ++ **/ ++s32 e1000_wait_autoneg_generic(struct e1000_hw *hw) ++{ ++ s32 ret_val = E1000_SUCCESS; ++ u16 i, phy_status; ++ ++ DEBUGFUNC("e1000_wait_autoneg_generic"); ++ ++ /* Break after autoneg completes or PHY_AUTO_NEG_LIMIT expires. */ ++ for (i = PHY_AUTO_NEG_LIMIT; i > 0; i--) { ++ ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &phy_status); ++ if (ret_val) ++ break; ++ ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &phy_status); ++ if (ret_val) ++ break; ++ if (phy_status & MII_SR_AUTONEG_COMPLETE) ++ break; ++ msec_delay(100); ++ } ++ ++ /* ++ * PHY_AUTO_NEG_TIME expiration doesn't guarantee auto-negotiation ++ * has completed. ++ */ ++ return ret_val; ++} ++ ++/** ++ * e1000_phy_has_link_generic - Polls PHY for link ++ * @hw: pointer to the HW structure ++ * @iterations: number of times to poll for link ++ * @usec_interval: delay between polling attempts ++ * @success: pointer to whether polling was successful or not ++ * ++ * Polls the PHY status register for link, 'iterations' number of times. ++ **/ ++s32 e1000_phy_has_link_generic(struct e1000_hw *hw, u32 iterations, ++ u32 usec_interval, bool *success) ++{ ++ s32 ret_val = E1000_SUCCESS; ++ u16 i, phy_status; ++ ++ DEBUGFUNC("e1000_phy_has_link_generic"); ++ ++ for (i = 0; i < iterations; i++) { ++ /* ++ * Some PHYs require the PHY_STATUS register to be read ++ * twice due to the link bit being sticky. No harm doing ++ * it across the board. ++ */ ++ ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &phy_status); ++ if (ret_val) ++ break; ++ ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &phy_status); ++ if (ret_val) ++ break; ++ if (phy_status & MII_SR_LINK_STATUS) ++ break; ++ if (usec_interval >= 1000) ++ msec_delay_irq(usec_interval/1000); ++ else ++ usec_delay(usec_interval); ++ } ++ ++ *success = (i < iterations) ? TRUE : FALSE; ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_get_cable_length_m88 - Determine cable length for m88 PHY ++ * @hw: pointer to the HW structure ++ * ++ * Reads the PHY specific status register to retrieve the cable length ++ * information. The cable length is determined by averaging the minimum and ++ * maximum values to get the "average" cable length. The m88 PHY has four ++ * possible cable length values, which are: ++ * Register Value Cable Length ++ * 0 < 50 meters ++ * 1 50 - 80 meters ++ * 2 80 - 110 meters ++ * 3 110 - 140 meters ++ * 4 > 140 meters ++ **/ ++s32 e1000_get_cable_length_m88(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u16 phy_data, index; ++ ++ DEBUGFUNC("e1000_get_cable_length_m88"); ++ ++ ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_STATUS, &phy_data); ++ if (ret_val) ++ goto out; ++ ++ index = (phy_data & M88E1000_PSSR_CABLE_LENGTH) >> ++ M88E1000_PSSR_CABLE_LENGTH_SHIFT; ++ phy->min_cable_length = e1000_m88_cable_length_table[index]; ++ phy->max_cable_length = e1000_m88_cable_length_table[index+1]; ++ ++ phy->cable_length = (phy->min_cable_length + phy->max_cable_length) / 2; ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_get_cable_length_igp_2 - Determine cable length for igp2 PHY ++ * @hw: pointer to the HW structure ++ * ++ * The automatic gain control (agc) normalizes the amplitude of the ++ * received signal, adjusting for the attenuation produced by the ++ * cable. By reading the AGC registers, which reperesent the ++ * cobination of course and fine gain value, the value can be put ++ * into a lookup table to obtain the approximate cable length ++ * for each channel. ++ **/ ++s32 e1000_get_cable_length_igp_2(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val = E1000_SUCCESS; ++ u16 phy_data, i, agc_value = 0; ++ u16 cur_agc_index, max_agc_index = 0; ++ u16 min_agc_index = IGP02E1000_CABLE_LENGTH_TABLE_SIZE - 1; ++ u16 agc_reg_array[IGP02E1000_PHY_CHANNEL_NUM] = ++ {IGP02E1000_PHY_AGC_A, ++ IGP02E1000_PHY_AGC_B, ++ IGP02E1000_PHY_AGC_C, ++ IGP02E1000_PHY_AGC_D}; ++ ++ DEBUGFUNC("e1000_get_cable_length_igp_2"); ++ ++ /* Read the AGC registers for all channels */ ++ for (i = 0; i < IGP02E1000_PHY_CHANNEL_NUM; i++) { ++ ret_val = e1000_read_phy_reg(hw, agc_reg_array[i], &phy_data); ++ if (ret_val) ++ goto out; ++ ++ /* ++ * Getting bits 15:9, which represent the combination of ++ * course and fine gain values. The result is a number ++ * that can be put into the lookup table to obtain the ++ * approximate cable length. ++ */ ++ cur_agc_index = (phy_data >> IGP02E1000_AGC_LENGTH_SHIFT) & ++ IGP02E1000_AGC_LENGTH_MASK; ++ ++ /* Array index bound check. */ ++ if ((cur_agc_index >= IGP02E1000_CABLE_LENGTH_TABLE_SIZE) || ++ (cur_agc_index == 0)) { ++ ret_val = -E1000_ERR_PHY; ++ goto out; ++ } ++ ++ /* Remove min & max AGC values from calculation. */ ++ if (e1000_igp_2_cable_length_table[min_agc_index] > ++ e1000_igp_2_cable_length_table[cur_agc_index]) ++ min_agc_index = cur_agc_index; ++ if (e1000_igp_2_cable_length_table[max_agc_index] < ++ e1000_igp_2_cable_length_table[cur_agc_index]) ++ max_agc_index = cur_agc_index; ++ ++ agc_value += e1000_igp_2_cable_length_table[cur_agc_index]; ++ } ++ ++ agc_value -= (e1000_igp_2_cable_length_table[min_agc_index] + ++ e1000_igp_2_cable_length_table[max_agc_index]); ++ agc_value /= (IGP02E1000_PHY_CHANNEL_NUM - 2); ++ ++ /* Calculate cable length with the error range of +/- 10 meters. */ ++ phy->min_cable_length = ((agc_value - IGP02E1000_AGC_RANGE) > 0) ? ++ (agc_value - IGP02E1000_AGC_RANGE) : 0; ++ phy->max_cable_length = agc_value + IGP02E1000_AGC_RANGE; ++ ++ phy->cable_length = (phy->min_cable_length + phy->max_cable_length) / 2; ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_get_phy_info_m88 - Retrieve PHY information ++ * @hw: pointer to the HW structure ++ * ++ * Valid for only copper links. Read the PHY status register (sticky read) ++ * to verify that link is up. Read the PHY special control register to ++ * determine the polarity and 10base-T extended distance. Read the PHY ++ * special status register to determine MDI/MDIx and current speed. If ++ * speed is 1000, then determine cable length, local and remote receiver. ++ **/ ++s32 e1000_get_phy_info_m88(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u16 phy_data; ++ bool link; ++ ++ DEBUGFUNC("e1000_get_phy_info_m88"); ++ ++ if (hw->phy.media_type != e1000_media_type_copper) { ++ DEBUGOUT("Phy info is only valid for copper media\n"); ++ ret_val = -E1000_ERR_CONFIG; ++ goto out; ++ } ++ ++ ret_val = e1000_phy_has_link_generic(hw, 1, 0, &link); ++ if (ret_val) ++ goto out; ++ ++ if (!link) { ++ DEBUGOUT("Phy info is only valid if link is up\n"); ++ ret_val = -E1000_ERR_CONFIG; ++ goto out; ++ } ++ ++ ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data); ++ if (ret_val) ++ goto out; ++ ++ phy->polarity_correction = (phy_data & M88E1000_PSCR_POLARITY_REVERSAL) ++ ? TRUE ++ : FALSE; ++ ++ ret_val = e1000_check_polarity_m88(hw); ++ if (ret_val) ++ goto out; ++ ++ ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_STATUS, &phy_data); ++ if (ret_val) ++ goto out; ++ ++ phy->is_mdix = (phy_data & M88E1000_PSSR_MDIX) ? TRUE : FALSE; ++ ++ if ((phy_data & M88E1000_PSSR_SPEED) == M88E1000_PSSR_1000MBS) { ++ ret_val = e1000_get_cable_length(hw); ++ if (ret_val) ++ goto out; ++ ++ ret_val = e1000_read_phy_reg(hw, PHY_1000T_STATUS, &phy_data); ++ if (ret_val) ++ goto out; ++ ++ phy->local_rx = (phy_data & SR_1000T_LOCAL_RX_STATUS) ++ ? e1000_1000t_rx_status_ok ++ : e1000_1000t_rx_status_not_ok; ++ ++ phy->remote_rx = (phy_data & SR_1000T_REMOTE_RX_STATUS) ++ ? e1000_1000t_rx_status_ok ++ : e1000_1000t_rx_status_not_ok; ++ } else { ++ /* Set values to "undefined" */ ++ phy->cable_length = E1000_CABLE_LENGTH_UNDEFINED; ++ phy->local_rx = e1000_1000t_rx_status_undefined; ++ phy->remote_rx = e1000_1000t_rx_status_undefined; ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_get_phy_info_igp - Retrieve igp PHY information ++ * @hw: pointer to the HW structure ++ * ++ * Read PHY status to determine if link is up. If link is up, then ++ * set/determine 10base-T extended distance and polarity correction. Read ++ * PHY port status to determine MDI/MDIx and speed. Based on the speed, ++ * determine on the cable length, local and remote receiver. ++ **/ ++s32 e1000_get_phy_info_igp(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u16 data; ++ bool link; ++ ++ DEBUGFUNC("e1000_get_phy_info_igp"); ++ ++ ret_val = e1000_phy_has_link_generic(hw, 1, 0, &link); ++ if (ret_val) ++ goto out; ++ ++ if (!link) { ++ DEBUGOUT("Phy info is only valid if link is up\n"); ++ ret_val = -E1000_ERR_CONFIG; ++ goto out; ++ } ++ ++ phy->polarity_correction = TRUE; ++ ++ ret_val = e1000_check_polarity_igp(hw); ++ if (ret_val) ++ goto out; ++ ++ ret_val = e1000_read_phy_reg(hw, IGP01E1000_PHY_PORT_STATUS, &data); ++ if (ret_val) ++ goto out; ++ ++ phy->is_mdix = (data & IGP01E1000_PSSR_MDIX) ? TRUE : FALSE; ++ ++ if ((data & IGP01E1000_PSSR_SPEED_MASK) == ++ IGP01E1000_PSSR_SPEED_1000MBPS) { ++ ret_val = e1000_get_cable_length(hw); ++ if (ret_val) ++ goto out; ++ ++ ret_val = e1000_read_phy_reg(hw, PHY_1000T_STATUS, &data); ++ if (ret_val) ++ goto out; ++ ++ phy->local_rx = (data & SR_1000T_LOCAL_RX_STATUS) ++ ? e1000_1000t_rx_status_ok ++ : e1000_1000t_rx_status_not_ok; ++ ++ phy->remote_rx = (data & SR_1000T_REMOTE_RX_STATUS) ++ ? e1000_1000t_rx_status_ok ++ : e1000_1000t_rx_status_not_ok; ++ } else { ++ phy->cable_length = E1000_CABLE_LENGTH_UNDEFINED; ++ phy->local_rx = e1000_1000t_rx_status_undefined; ++ phy->remote_rx = e1000_1000t_rx_status_undefined; ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_phy_sw_reset_generic - PHY software reset ++ * @hw: pointer to the HW structure ++ * ++ * Does a software reset of the PHY by reading the PHY control register and ++ * setting/write the control register reset bit to the PHY. ++ **/ ++s32 e1000_phy_sw_reset_generic(struct e1000_hw *hw) ++{ ++ s32 ret_val; ++ u16 phy_ctrl; ++ ++ DEBUGFUNC("e1000_phy_sw_reset_generic"); ++ ++ ret_val = e1000_read_phy_reg(hw, PHY_CONTROL, &phy_ctrl); ++ if (ret_val) ++ goto out; ++ ++ phy_ctrl |= MII_CR_RESET; ++ ret_val = e1000_write_phy_reg(hw, PHY_CONTROL, phy_ctrl); ++ if (ret_val) ++ goto out; ++ ++ usec_delay(1); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_phy_hw_reset_generic - PHY hardware reset ++ * @hw: pointer to the HW structure ++ * ++ * Verify the reset block is not blocking us from resetting. Acquire ++ * semaphore (if necessary) and read/set/write the device control reset ++ * bit in the PHY. Wait the appropriate delay time for the device to ++ * reset and relase the semaphore (if necessary). ++ **/ ++s32 e1000_phy_hw_reset_generic(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u32 ctrl; ++ ++ DEBUGFUNC("e1000_phy_hw_reset_generic"); ++ ++ ret_val = e1000_check_reset_block(hw); ++ if (ret_val) { ++ ret_val = E1000_SUCCESS; ++ goto out; ++ } ++ ++ ret_val = e1000_acquire_phy(hw); ++ if (ret_val) ++ goto out; ++ ++ ctrl = E1000_READ_REG(hw, E1000_CTRL); ++ E1000_WRITE_REG(hw, E1000_CTRL, ctrl | E1000_CTRL_PHY_RST); ++ E1000_WRITE_FLUSH(hw); ++ ++ usec_delay(phy->reset_delay_us); ++ ++ E1000_WRITE_REG(hw, E1000_CTRL, ctrl); ++ E1000_WRITE_FLUSH(hw); ++ ++ usec_delay(150); ++ ++ e1000_release_phy(hw); ++ ++ ret_val = e1000_get_phy_cfg_done(hw); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_get_cfg_done_generic - Generic configuration done ++ * @hw: pointer to the HW structure ++ * ++ * Generic function to wait 10 milli-seconds for configuration to complete ++ * and return success. ++ **/ ++s32 e1000_get_cfg_done_generic(struct e1000_hw *hw) ++{ ++ DEBUGFUNC("e1000_get_cfg_done_generic"); ++ ++ msec_delay_irq(10); ++ ++ return E1000_SUCCESS; ++} ++ ++/* Internal function pointers */ ++ ++/** ++ * e1000_get_phy_cfg_done - Generic PHY configuration done ++ * @hw: pointer to the HW structure ++ * ++ * Return success if silicon family did not implement a family specific ++ * get_cfg_done function. ++ **/ ++static s32 e1000_get_phy_cfg_done(struct e1000_hw *hw) ++{ ++ if (hw->func.get_cfg_done) ++ return hw->func.get_cfg_done(hw); ++ ++ return E1000_SUCCESS; ++} ++ ++/** ++ * e1000_release_phy - Generic release PHY ++ * @hw: pointer to the HW structure ++ * ++ * Return if silicon family does not require a semaphore when accessing the ++ * PHY. ++ **/ ++static void e1000_release_phy(struct e1000_hw *hw) ++{ ++ if (hw->func.release_phy) ++ hw->func.release_phy(hw); ++} ++ ++/** ++ * e1000_acquire_phy - Generic acquire PHY ++ * @hw: pointer to the HW structure ++ * ++ * Return success if silicon family does not require a semaphore when ++ * accessing the PHY. ++ **/ ++static s32 e1000_acquire_phy(struct e1000_hw *hw) ++{ ++ if (hw->func.acquire_phy) ++ return hw->func.acquire_phy(hw); ++ ++ return E1000_SUCCESS; ++} ++ ++/** ++ * e1000_phy_force_speed_duplex - Generic force PHY speed/duplex ++ * @hw: pointer to the HW structure ++ * ++ * When the silicon family has not implemented a forced speed/duplex ++ * function for the PHY, simply return E1000_SUCCESS. ++ **/ ++s32 e1000_phy_force_speed_duplex(struct e1000_hw *hw) ++{ ++ if (hw->func.force_speed_duplex) ++ return hw->func.force_speed_duplex(hw); ++ ++ return E1000_SUCCESS; ++} ++ ++/** ++ * e1000_phy_init_script_igp3 - Inits the IGP3 PHY ++ * @hw: pointer to the HW structure ++ * ++ * Initializes a Intel Gigabit PHY3 when an EEPROM is not present. ++ **/ ++s32 e1000_phy_init_script_igp3(struct e1000_hw *hw) ++{ ++ DEBUGOUT("Running IGP 3 PHY init script\n"); ++ ++ /* PHY init IGP 3 */ ++ /* Enable rise/fall, 10-mode work in class-A */ ++ e1000_write_phy_reg(hw, 0x2F5B, 0x9018); ++ /* Remove all caps from Replica path filter */ ++ e1000_write_phy_reg(hw, 0x2F52, 0x0000); ++ /* Bias trimming for ADC, AFE and Driver (Default) */ ++ e1000_write_phy_reg(hw, 0x2FB1, 0x8B24); ++ /* Increase Hybrid poly bias */ ++ e1000_write_phy_reg(hw, 0x2FB2, 0xF8F0); ++ /* Add 4% to Tx amplitude in Giga mode */ ++ e1000_write_phy_reg(hw, 0x2010, 0x10B0); ++ /* Disable trimming (TTT) */ ++ e1000_write_phy_reg(hw, 0x2011, 0x0000); ++ /* Poly DC correction to 94.6% + 2% for all channels */ ++ e1000_write_phy_reg(hw, 0x20DD, 0x249A); ++ /* ABS DC correction to 95.9% */ ++ e1000_write_phy_reg(hw, 0x20DE, 0x00D3); ++ /* BG temp curve trim */ ++ e1000_write_phy_reg(hw, 0x28B4, 0x04CE); ++ /* Increasing ADC OPAMP stage 1 currents to max */ ++ e1000_write_phy_reg(hw, 0x2F70, 0x29E4); ++ /* Force 1000 ( required for enabling PHY regs configuration) */ ++ e1000_write_phy_reg(hw, 0x0000, 0x0140); ++ /* Set upd_freq to 6 */ ++ e1000_write_phy_reg(hw, 0x1F30, 0x1606); ++ /* Disable NPDFE */ ++ e1000_write_phy_reg(hw, 0x1F31, 0xB814); ++ /* Disable adaptive fixed FFE (Default) */ ++ e1000_write_phy_reg(hw, 0x1F35, 0x002A); ++ /* Enable FFE hysteresis */ ++ e1000_write_phy_reg(hw, 0x1F3E, 0x0067); ++ /* Fixed FFE for short cable lengths */ ++ e1000_write_phy_reg(hw, 0x1F54, 0x0065); ++ /* Fixed FFE for medium cable lengths */ ++ e1000_write_phy_reg(hw, 0x1F55, 0x002A); ++ /* Fixed FFE for long cable lengths */ ++ e1000_write_phy_reg(hw, 0x1F56, 0x002A); ++ /* Enable Adaptive Clip Threshold */ ++ e1000_write_phy_reg(hw, 0x1F72, 0x3FB0); ++ /* AHT reset limit to 1 */ ++ e1000_write_phy_reg(hw, 0x1F76, 0xC0FF); ++ /* Set AHT master delay to 127 msec */ ++ e1000_write_phy_reg(hw, 0x1F77, 0x1DEC); ++ /* Set scan bits for AHT */ ++ e1000_write_phy_reg(hw, 0x1F78, 0xF9EF); ++ /* Set AHT Preset bits */ ++ e1000_write_phy_reg(hw, 0x1F79, 0x0210); ++ /* Change integ_factor of channel A to 3 */ ++ e1000_write_phy_reg(hw, 0x1895, 0x0003); ++ /* Change prop_factor of channels BCD to 8 */ ++ e1000_write_phy_reg(hw, 0x1796, 0x0008); ++ /* Change cg_icount + enable integbp for channels BCD */ ++ e1000_write_phy_reg(hw, 0x1798, 0xD008); ++ /* ++ * Change cg_icount + enable integbp + change prop_factor_master ++ * to 8 for channel A ++ */ ++ e1000_write_phy_reg(hw, 0x1898, 0xD918); ++ /* Disable AHT in Slave mode on channel A */ ++ e1000_write_phy_reg(hw, 0x187A, 0x0800); ++ /* ++ * Enable LPLU and disable AN to 1000 in non-D0a states, ++ * Enable SPD+B2B ++ */ ++ e1000_write_phy_reg(hw, 0x0019, 0x008D); ++ /* Enable restart AN on an1000_dis change */ ++ e1000_write_phy_reg(hw, 0x001B, 0x2080); ++ /* Enable wh_fifo read clock in 10/100 modes */ ++ e1000_write_phy_reg(hw, 0x0014, 0x0045); ++ /* Restart AN, Speed selection is 1000 */ ++ e1000_write_phy_reg(hw, 0x0000, 0x1340); ++ ++ return E1000_SUCCESS; ++} ++ ++/** ++ * e1000_get_phy_type_from_id - Get PHY type from id ++ * @phy_id: phy_id read from the phy ++ * ++ * Returns the phy type from the id. ++ **/ ++e1000_phy_type e1000_get_phy_type_from_id(u32 phy_id) ++{ ++ e1000_phy_type phy_type = e1000_phy_unknown; ++ ++ switch (phy_id) { ++ case M88E1000_I_PHY_ID: ++ case M88E1000_E_PHY_ID: ++ case M88E1111_I_PHY_ID: ++ case M88E1011_I_PHY_ID: ++ phy_type = e1000_phy_m88; ++ break; ++ case IGP01E1000_I_PHY_ID: /* IGP 1 & 2 share this */ ++ phy_type = e1000_phy_igp_2; ++ break; ++ case GG82563_E_PHY_ID: ++ phy_type = e1000_phy_gg82563; ++ break; ++ case IGP03E1000_E_PHY_ID: ++ phy_type = e1000_phy_igp_3; ++ break; ++ case IFE_E_PHY_ID: ++ case IFE_PLUS_E_PHY_ID: ++ case IFE_C_E_PHY_ID: ++ phy_type = e1000_phy_ife; ++ break; ++ default: ++ phy_type = e1000_phy_unknown; ++ break; ++ } ++ return phy_type; ++} ++ ++/** ++ * e1000_power_up_phy_copper - Restore copper link in case of PHY power down ++ * @hw: pointer to the HW structure ++ * ++ * In the case of a PHY power down to save power, or to turn off link during a ++ * driver unload, or wake on lan is not enabled, restore the link to previous ++ * settings. ++ **/ ++void e1000_power_up_phy_copper(struct e1000_hw *hw) ++{ ++ u16 mii_reg = 0; ++ ++ /* The PHY will retain its settings across a power down/up cycle */ ++ e1000_read_phy_reg(hw, PHY_CONTROL, &mii_reg); ++ mii_reg &= ~MII_CR_POWER_DOWN; ++ e1000_write_phy_reg(hw, PHY_CONTROL, mii_reg); ++} ++ ++/** ++ * e1000_power_down_phy_copper - Restore copper link in case of PHY power down ++ * @hw: pointer to the HW structure ++ * ++ * In the case of a PHY power down to save power, or to turn off link during a ++ * driver unload, or wake on lan is not enabled, restore the link to previous ++ * settings. ++ **/ ++void e1000_power_down_phy_copper(struct e1000_hw *hw) ++{ ++ u16 mii_reg = 0; ++ ++ /* The PHY will retain its settings across a power down/up cycle */ ++ e1000_read_phy_reg(hw, PHY_CONTROL, &mii_reg); ++ mii_reg |= MII_CR_POWER_DOWN; ++ e1000_write_phy_reg(hw, PHY_CONTROL, mii_reg); ++ msec_delay(1); ++} +--- linux/drivers/xenomai/net/drivers/experimental/e1000/e1000_osdep.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/experimental/e1000/e1000_osdep.h 2021-04-07 16:01:27.667633538 +0800 +@@ -0,0 +1,124 @@ ++/******************************************************************************* ++ ++ Intel PRO/1000 Linux driver ++ Copyright(c) 1999 - 2008 Intel Corporation. ++ ++ This program is free software; you can redistribute it and/or modify it ++ under the terms and conditions of the GNU General Public License, ++ version 2, as published by the Free Software Foundation. ++ ++ This program is distributed in the hope it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ You should have received a copy of the GNU General Public License along with ++ this program; if not, write to the Free Software Foundation, Inc., ++ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. ++ ++ The full GNU General Public License is included in this distribution in ++ the file called "COPYING". ++ ++ Contact Information: ++ Linux NICS ++ e1000-devel Mailing List ++ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 ++ ++*******************************************************************************/ ++ ++ ++/* glue for the OS-dependent part of e1000 ++ * includes register access macros ++ */ ++ ++#ifndef _E1000_OSDEP_H_ ++#define _E1000_OSDEP_H_ ++ ++#include ++#include ++#include ++#include ++ ++#include "kcompat.h" ++ ++#define usec_delay(x) udelay(x) ++#ifndef msec_delay ++#define msec_delay(x) do { if(in_interrupt()) { \ ++ /* Don't sleep in interrupt context! */ \ ++ BUG(); \ ++ } else { \ ++ msleep(x); \ ++ } } while (0) ++ ++/* Some workarounds require millisecond delays and are run during interrupt ++ * context. Most notably, when establishing link, the phy may need tweaking ++ * but cannot process phy register reads/writes faster than millisecond ++ * intervals...and we establish link due to a "link status change" interrupt. ++ */ ++#define msec_delay_irq(x) mdelay(x) ++#endif ++ ++#define PCI_COMMAND_REGISTER PCI_COMMAND ++#define CMD_MEM_WRT_INVALIDATE PCI_COMMAND_INVALIDATE ++#define ETH_ADDR_LEN ETH_ALEN ++ ++#ifdef __BIG_ENDIAN ++#define E1000_BIG_ENDIAN __BIG_ENDIAN ++#endif ++ ++ ++#define DEBUGOUT(S) ++#define DEBUGOUT1(S, A...) ++ ++#define DEBUGFUNC(F) DEBUGOUT(F "\n") ++#define DEBUGOUT2 DEBUGOUT1 ++#define DEBUGOUT3 DEBUGOUT2 ++#define DEBUGOUT7 DEBUGOUT3 ++ ++#define E1000_REGISTER(a, reg) (((a)->mac.type >= e1000_82543) \ ++ ? reg \ ++ : e1000_translate_register_82542(reg)) ++ ++#define E1000_WRITE_REG(a, reg, value) ( \ ++ writel((value), ((a)->hw_addr + E1000_REGISTER(a, reg)))) ++ ++#define E1000_READ_REG(a, reg) (readl((a)->hw_addr + E1000_REGISTER(a, reg))) ++ ++#define E1000_WRITE_REG_ARRAY(a, reg, offset, value) ( \ ++ writel((value), ((a)->hw_addr + E1000_REGISTER(a, reg) + ((offset) << 2)))) ++ ++#define E1000_READ_REG_ARRAY(a, reg, offset) ( \ ++ readl((a)->hw_addr + E1000_REGISTER(a, reg) + ((offset) << 2))) ++ ++#define E1000_READ_REG_ARRAY_DWORD E1000_READ_REG_ARRAY ++#define E1000_WRITE_REG_ARRAY_DWORD E1000_WRITE_REG_ARRAY ++ ++#define E1000_WRITE_REG_ARRAY_WORD(a, reg, offset, value) ( \ ++ writew((value), ((a)->hw_addr + E1000_REGISTER(a, reg) + ((offset) << 1)))) ++ ++#define E1000_READ_REG_ARRAY_WORD(a, reg, offset) ( \ ++ readw((a)->hw_addr + E1000_REGISTER(a, reg) + ((offset) << 1))) ++ ++#define E1000_WRITE_REG_ARRAY_BYTE(a, reg, offset, value) ( \ ++ writeb((value), ((a)->hw_addr + E1000_REGISTER(a, reg) + (offset)))) ++ ++#define E1000_READ_REG_ARRAY_BYTE(a, reg, offset) ( \ ++ readb((a)->hw_addr + E1000_REGISTER(a, reg) + (offset))) ++ ++#define E1000_WRITE_REG_IO(a, reg, offset) do { \ ++ outl(reg, ((a)->io_base)); \ ++ outl(offset, ((a)->io_base + 4)); } while(0) ++ ++#define E1000_WRITE_FLUSH(a) E1000_READ_REG(a, E1000_STATUS) ++ ++#define E1000_WRITE_FLASH_REG(a, reg, value) ( \ ++ writel((value), ((a)->flash_address + reg))) ++ ++#define E1000_WRITE_FLASH_REG16(a, reg, value) ( \ ++ writew((value), ((a)->flash_address + reg))) ++ ++#define E1000_READ_FLASH_REG(a, reg) (readl((a)->flash_address + reg)) ++ ++#define E1000_READ_FLASH_REG16(a, reg) (readw((a)->flash_address + reg)) ++ ++#endif /* _E1000_OSDEP_H_ */ +--- linux/drivers/xenomai/net/drivers/experimental/e1000/e1000_mac.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/experimental/e1000/e1000_mac.c 2021-04-07 16:01:27.663633543 +0800 +@@ -0,0 +1,2039 @@ ++/******************************************************************************* ++ ++ Intel PRO/1000 Linux driver ++ Copyright(c) 1999 - 2008 Intel Corporation. ++ ++ This program is free software; you can redistribute it and/or modify it ++ under the terms and conditions of the GNU General Public License, ++ version 2, as published by the Free Software Foundation. ++ ++ This program is distributed in the hope it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ You should have received a copy of the GNU General Public License along with ++ this program; if not, write to the Free Software Foundation, Inc., ++ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. ++ ++ The full GNU General Public License is included in this distribution in ++ the file called "COPYING". ++ ++ Contact Information: ++ Linux NICS ++ e1000-devel Mailing List ++ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 ++ ++*******************************************************************************/ ++ ++#include "e1000_api.h" ++#include "e1000_mac.h" ++ ++/** ++ * e1000_remove_device_generic - Free device specific structure ++ * @hw: pointer to the HW structure ++ * ++ * If a device specific structure was allocated, this function will ++ * free it. ++ **/ ++void e1000_remove_device_generic(struct e1000_hw *hw) ++{ ++ DEBUGFUNC("e1000_remove_device_generic"); ++ ++ /* Freeing the dev_spec member of e1000_hw structure */ ++ e1000_free_dev_spec_struct(hw); ++} ++ ++/** ++ * e1000_get_bus_info_pci_generic - Get PCI(x) bus information ++ * @hw: pointer to the HW structure ++ * ++ * Determines and stores the system bus information for a particular ++ * network interface. The following bus information is determined and stored: ++ * bus speed, bus width, type (PCI/PCIx), and PCI(-x) function. ++ **/ ++s32 e1000_get_bus_info_pci_generic(struct e1000_hw *hw) ++{ ++ struct e1000_bus_info *bus = &hw->bus; ++ u32 status = E1000_READ_REG(hw, E1000_STATUS); ++ s32 ret_val = E1000_SUCCESS; ++ u16 pci_header_type; ++ ++ DEBUGFUNC("e1000_get_bus_info_pci_generic"); ++ ++ /* PCI or PCI-X? */ ++ bus->type = (status & E1000_STATUS_PCIX_MODE) ++ ? e1000_bus_type_pcix ++ : e1000_bus_type_pci; ++ ++ /* Bus speed */ ++ if (bus->type == e1000_bus_type_pci) { ++ bus->speed = (status & E1000_STATUS_PCI66) ++ ? e1000_bus_speed_66 ++ : e1000_bus_speed_33; ++ } else { ++ switch (status & E1000_STATUS_PCIX_SPEED) { ++ case E1000_STATUS_PCIX_SPEED_66: ++ bus->speed = e1000_bus_speed_66; ++ break; ++ case E1000_STATUS_PCIX_SPEED_100: ++ bus->speed = e1000_bus_speed_100; ++ break; ++ case E1000_STATUS_PCIX_SPEED_133: ++ bus->speed = e1000_bus_speed_133; ++ break; ++ default: ++ bus->speed = e1000_bus_speed_reserved; ++ break; ++ } ++ } ++ ++ /* Bus width */ ++ bus->width = (status & E1000_STATUS_BUS64) ++ ? e1000_bus_width_64 ++ : e1000_bus_width_32; ++ ++ /* Which PCI(-X) function? */ ++ e1000_read_pci_cfg(hw, PCI_HEADER_TYPE_REGISTER, &pci_header_type); ++ if (pci_header_type & PCI_HEADER_TYPE_MULTIFUNC) ++ bus->func = (status & E1000_STATUS_FUNC_MASK) ++ >> E1000_STATUS_FUNC_SHIFT; ++ else ++ bus->func = 0; ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_get_bus_info_pcie_generic - Get PCIe bus information ++ * @hw: pointer to the HW structure ++ * ++ * Determines and stores the system bus information for a particular ++ * network interface. The following bus information is determined and stored: ++ * bus speed, bus width, type (PCIe), and PCIe function. ++ **/ ++s32 e1000_get_bus_info_pcie_generic(struct e1000_hw *hw) ++{ ++ struct e1000_bus_info *bus = &hw->bus; ++ s32 ret_val; ++ u32 status; ++ u16 pcie_link_status, pci_header_type; ++ ++ DEBUGFUNC("e1000_get_bus_info_pcie_generic"); ++ ++ bus->type = e1000_bus_type_pci_express; ++ bus->speed = e1000_bus_speed_2500; ++ ++ ret_val = e1000_read_pcie_cap_reg(hw, ++ PCIE_LINK_STATUS, ++ &pcie_link_status); ++ if (ret_val) ++ bus->width = e1000_bus_width_unknown; ++ else ++ bus->width = (e1000_bus_width)((pcie_link_status & ++ PCIE_LINK_WIDTH_MASK) >> ++ PCIE_LINK_WIDTH_SHIFT); ++ ++ e1000_read_pci_cfg(hw, PCI_HEADER_TYPE_REGISTER, &pci_header_type); ++ if (pci_header_type & PCI_HEADER_TYPE_MULTIFUNC) { ++ status = E1000_READ_REG(hw, E1000_STATUS); ++ bus->func = (status & E1000_STATUS_FUNC_MASK) ++ >> E1000_STATUS_FUNC_SHIFT; ++ } else { ++ bus->func = 0; ++ } ++ ++ return E1000_SUCCESS; ++} ++ ++/** ++ * e1000_clear_vfta_generic - Clear VLAN filter table ++ * @hw: pointer to the HW structure ++ * ++ * Clears the register array which contains the VLAN filter table by ++ * setting all the values to 0. ++ **/ ++void e1000_clear_vfta_generic(struct e1000_hw *hw) ++{ ++ u32 offset; ++ ++ DEBUGFUNC("e1000_clear_vfta_generic"); ++ ++ for (offset = 0; offset < E1000_VLAN_FILTER_TBL_SIZE; offset++) { ++ E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, offset, 0); ++ E1000_WRITE_FLUSH(hw); ++ } ++} ++ ++/** ++ * e1000_write_vfta_generic - Write value to VLAN filter table ++ * @hw: pointer to the HW structure ++ * @offset: register offset in VLAN filter table ++ * @value: register value written to VLAN filter table ++ * ++ * Writes value at the given offset in the register array which stores ++ * the VLAN filter table. ++ **/ ++void e1000_write_vfta_generic(struct e1000_hw *hw, u32 offset, u32 value) ++{ ++ DEBUGFUNC("e1000_write_vfta_generic"); ++ ++ E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, offset, value); ++ E1000_WRITE_FLUSH(hw); ++} ++ ++/** ++ * e1000_init_rx_addrs_generic - Initialize receive address's ++ * @hw: pointer to the HW structure ++ * @rar_count: receive address registers ++ * ++ * Setups the receive address registers by setting the base receive address ++ * register to the devices MAC address and clearing all the other receive ++ * address registers to 0. ++ **/ ++void e1000_init_rx_addrs_generic(struct e1000_hw *hw, u16 rar_count) ++{ ++ u32 i; ++ ++ DEBUGFUNC("e1000_init_rx_addrs_generic"); ++ ++ /* Setup the receive address */ ++ DEBUGOUT("Programming MAC Address into RAR[0]\n"); ++ ++ e1000_rar_set_generic(hw, hw->mac.addr, 0); ++ ++ /* Zero out the other (rar_entry_count - 1) receive addresses */ ++ DEBUGOUT1("Clearing RAR[1-%u]\n", rar_count-1); ++ for (i = 1; i < rar_count; i++) { ++ E1000_WRITE_REG_ARRAY(hw, E1000_RA, (i << 1), 0); ++ E1000_WRITE_FLUSH(hw); ++ E1000_WRITE_REG_ARRAY(hw, E1000_RA, ((i << 1) + 1), 0); ++ E1000_WRITE_FLUSH(hw); ++ } ++} ++ ++/** ++ * e1000_check_alt_mac_addr_generic - Check for alternate MAC addr ++ * @hw: pointer to the HW structure ++ * ++ * Checks the nvm for an alternate MAC address. An alternate MAC address ++ * can be setup by pre-boot software and must be treated like a permanent ++ * address and must override the actual permanent MAC address. If an ++ * alternate MAC address is found it is saved in the hw struct and ++ * programmed into RAR0 and the function returns success, otherwise the ++ * function returns an error. ++ **/ ++s32 e1000_check_alt_mac_addr_generic(struct e1000_hw *hw) ++{ ++ u32 i; ++ s32 ret_val = E1000_SUCCESS; ++ u16 offset, nvm_alt_mac_addr_offset, nvm_data; ++ u8 alt_mac_addr[ETH_ADDR_LEN]; ++ ++ DEBUGFUNC("e1000_check_alt_mac_addr_generic"); ++ ++ ret_val = e1000_read_nvm(hw, NVM_ALT_MAC_ADDR_PTR, 1, ++ &nvm_alt_mac_addr_offset); ++ if (ret_val) { ++ DEBUGOUT("NVM Read Error\n"); ++ goto out; ++ } ++ ++ if (nvm_alt_mac_addr_offset == 0xFFFF) { ++ ret_val = -(E1000_NOT_IMPLEMENTED); ++ goto out; ++ } ++ ++ if (hw->bus.func == E1000_FUNC_1) ++ nvm_alt_mac_addr_offset += ETH_ADDR_LEN/sizeof(u16); ++ ++ for (i = 0; i < ETH_ADDR_LEN; i += 2) { ++ offset = nvm_alt_mac_addr_offset + (i >> 1); ++ ret_val = e1000_read_nvm(hw, offset, 1, &nvm_data); ++ if (ret_val) { ++ DEBUGOUT("NVM Read Error\n"); ++ goto out; ++ } ++ ++ alt_mac_addr[i] = (u8)(nvm_data & 0xFF); ++ alt_mac_addr[i + 1] = (u8)(nvm_data >> 8); ++ } ++ ++ /* if multicast bit is set, the alternate address will not be used */ ++ if (alt_mac_addr[0] & 0x01) { ++ ret_val = -(E1000_NOT_IMPLEMENTED); ++ goto out; ++ } ++ ++ for (i = 0; i < ETH_ADDR_LEN; i++) ++ hw->mac.addr[i] = hw->mac.perm_addr[i] = alt_mac_addr[i]; ++ ++ e1000_rar_set(hw, hw->mac.perm_addr, 0); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_rar_set_generic - Set receive address register ++ * @hw: pointer to the HW structure ++ * @addr: pointer to the receive address ++ * @index: receive address array register ++ * ++ * Sets the receive address array register at index to the address passed ++ * in by addr. ++ **/ ++void e1000_rar_set_generic(struct e1000_hw *hw, u8 *addr, u32 index) ++{ ++ u32 rar_low, rar_high; ++ ++ DEBUGFUNC("e1000_rar_set_generic"); ++ ++ /* ++ * HW expects these in little endian so we reverse the byte order ++ * from network order (big endian) to little endian ++ */ ++ rar_low = ((u32) addr[0] | ++ ((u32) addr[1] << 8) | ++ ((u32) addr[2] << 16) | ((u32) addr[3] << 24)); ++ ++ rar_high = ((u32) addr[4] | ((u32) addr[5] << 8)); ++ ++ /* If MAC address zero, no need to set the AV bit */ ++ if (rar_low || rar_high) { ++ if (!hw->mac.disable_av) ++ rar_high |= E1000_RAH_AV; ++ } ++ ++ E1000_WRITE_REG_ARRAY(hw, E1000_RA, (index << 1), rar_low); ++ E1000_WRITE_REG_ARRAY(hw, E1000_RA, ((index << 1) + 1), rar_high); ++} ++ ++/** ++ * e1000_mta_set_generic - Set multicast filter table address ++ * @hw: pointer to the HW structure ++ * @hash_value: determines the MTA register and bit to set ++ * ++ * The multicast table address is a register array of 32-bit registers. ++ * The hash_value is used to determine what register the bit is in, the ++ * current value is read, the new bit is OR'd in and the new value is ++ * written back into the register. ++ **/ ++void e1000_mta_set_generic(struct e1000_hw *hw, u32 hash_value) ++{ ++ u32 hash_bit, hash_reg, mta; ++ ++ DEBUGFUNC("e1000_mta_set_generic"); ++ /* ++ * The MTA is a register array of 32-bit registers. It is ++ * treated like an array of (32*mta_reg_count) bits. We want to ++ * set bit BitArray[hash_value]. So we figure out what register ++ * the bit is in, read it, OR in the new bit, then write ++ * back the new value. The (hw->mac.mta_reg_count - 1) serves as a ++ * mask to bits 31:5 of the hash value which gives us the ++ * register we're modifying. The hash bit within that register ++ * is determined by the lower 5 bits of the hash value. ++ */ ++ hash_reg = (hash_value >> 5) & (hw->mac.mta_reg_count - 1); ++ hash_bit = hash_value & 0x1F; ++ ++ mta = E1000_READ_REG_ARRAY(hw, E1000_MTA, hash_reg); ++ ++ mta |= (1 << hash_bit); ++ ++ E1000_WRITE_REG_ARRAY(hw, E1000_MTA, hash_reg, mta); ++ E1000_WRITE_FLUSH(hw); ++} ++ ++/** ++ * e1000_update_mc_addr_list_generic - Update Multicast addresses ++ * @hw: pointer to the HW structure ++ * @mc_addr_list: array of multicast addresses to program ++ * @mc_addr_count: number of multicast addresses to program ++ * @rar_used_count: the first RAR register free to program ++ * @rar_count: total number of supported Receive Address Registers ++ * ++ * Updates the Receive Address Registers and Multicast Table Array. ++ * The caller must have a packed mc_addr_list of multicast addresses. ++ * The parameter rar_count will usually be hw->mac.rar_entry_count ++ * unless there are workarounds that change this. ++ **/ ++void e1000_update_mc_addr_list_generic(struct e1000_hw *hw, ++ u8 *mc_addr_list, u32 mc_addr_count, ++ u32 rar_used_count, u32 rar_count) ++{ ++ u32 hash_value; ++ u32 i; ++ ++ DEBUGFUNC("e1000_update_mc_addr_list_generic"); ++ ++ /* ++ * Load the first set of multicast addresses into the exact ++ * filters (RAR). If there are not enough to fill the RAR ++ * array, clear the filters. ++ */ ++ for (i = rar_used_count; i < rar_count; i++) { ++ if (mc_addr_count) { ++ e1000_rar_set(hw, mc_addr_list, i); ++ mc_addr_count--; ++ mc_addr_list += ETH_ADDR_LEN; ++ } else { ++ E1000_WRITE_REG_ARRAY(hw, E1000_RA, i << 1, 0); ++ E1000_WRITE_FLUSH(hw); ++ E1000_WRITE_REG_ARRAY(hw, E1000_RA, (i << 1) + 1, 0); ++ E1000_WRITE_FLUSH(hw); ++ } ++ } ++ ++ /* Clear the old settings from the MTA */ ++ DEBUGOUT("Clearing MTA\n"); ++ for (i = 0; i < hw->mac.mta_reg_count; i++) { ++ E1000_WRITE_REG_ARRAY(hw, E1000_MTA, i, 0); ++ E1000_WRITE_FLUSH(hw); ++ } ++ ++ /* Load any remaining multicast addresses into the hash table. */ ++ for (; mc_addr_count > 0; mc_addr_count--) { ++ hash_value = e1000_hash_mc_addr(hw, mc_addr_list); ++ DEBUGOUT1("Hash value = 0x%03X\n", hash_value); ++ e1000_mta_set(hw, hash_value); ++ mc_addr_list += ETH_ADDR_LEN; ++ } ++} ++ ++/** ++ * e1000_hash_mc_addr_generic - Generate a multicast hash value ++ * @hw: pointer to the HW structure ++ * @mc_addr: pointer to a multicast address ++ * ++ * Generates a multicast address hash value which is used to determine ++ * the multicast filter table array address and new table value. See ++ * e1000_mta_set_generic() ++ **/ ++u32 e1000_hash_mc_addr_generic(struct e1000_hw *hw, u8 *mc_addr) ++{ ++ u32 hash_value, hash_mask; ++ u8 bit_shift = 0; ++ ++ DEBUGFUNC("e1000_hash_mc_addr_generic"); ++ ++ /* Register count multiplied by bits per register */ ++ hash_mask = (hw->mac.mta_reg_count * 32) - 1; ++ ++ /* ++ * For a mc_filter_type of 0, bit_shift is the number of left-shifts ++ * where 0xFF would still fall within the hash mask. ++ */ ++ while (hash_mask >> bit_shift != 0xFF) ++ bit_shift++; ++ ++ /* ++ * The portion of the address that is used for the hash table ++ * is determined by the mc_filter_type setting. ++ * The algorithm is such that there is a total of 8 bits of shifting. ++ * The bit_shift for a mc_filter_type of 0 represents the number of ++ * left-shifts where the MSB of mc_addr[5] would still fall within ++ * the hash_mask. Case 0 does this exactly. Since there are a total ++ * of 8 bits of shifting, then mc_addr[4] will shift right the ++ * remaining number of bits. Thus 8 - bit_shift. The rest of the ++ * cases are a variation of this algorithm...essentially raising the ++ * number of bits to shift mc_addr[5] left, while still keeping the ++ * 8-bit shifting total. ++ * ++ * For example, given the following Destination MAC Address and an ++ * mta register count of 128 (thus a 4096-bit vector and 0xFFF mask), ++ * we can see that the bit_shift for case 0 is 4. These are the hash ++ * values resulting from each mc_filter_type... ++ * [0] [1] [2] [3] [4] [5] ++ * 01 AA 00 12 34 56 ++ * LSB MSB ++ * ++ * case 0: hash_value = ((0x34 >> 4) | (0x56 << 4)) & 0xFFF = 0x563 ++ * case 1: hash_value = ((0x34 >> 3) | (0x56 << 5)) & 0xFFF = 0xAC6 ++ * case 2: hash_value = ((0x34 >> 2) | (0x56 << 6)) & 0xFFF = 0x163 ++ * case 3: hash_value = ((0x34 >> 0) | (0x56 << 8)) & 0xFFF = 0x634 ++ */ ++ switch (hw->mac.mc_filter_type) { ++ default: ++ case 0: ++ break; ++ case 1: ++ bit_shift += 1; ++ break; ++ case 2: ++ bit_shift += 2; ++ break; ++ case 3: ++ bit_shift += 4; ++ break; ++ } ++ ++ hash_value = hash_mask & (((mc_addr[4] >> (8 - bit_shift)) | ++ (((u16) mc_addr[5]) << bit_shift))); ++ ++ return hash_value; ++} ++ ++/** ++ * e1000_pcix_mmrbc_workaround_generic - Fix incorrect MMRBC value ++ * @hw: pointer to the HW structure ++ * ++ * In certain situations, a system BIOS may report that the PCIx maximum ++ * memory read byte count (MMRBC) value is higher than than the actual ++ * value. We check the PCIx command regsiter with the current PCIx status ++ * regsiter. ++ **/ ++void e1000_pcix_mmrbc_workaround_generic(struct e1000_hw *hw) ++{ ++ u16 cmd_mmrbc; ++ u16 pcix_cmd; ++ u16 pcix_stat_hi_word; ++ u16 stat_mmrbc; ++ ++ DEBUGFUNC("e1000_pcix_mmrbc_workaround_generic"); ++ ++ /* Workaround for PCI-X issue when BIOS sets MMRBC incorrectly */ ++ if (hw->bus.type != e1000_bus_type_pcix) ++ return; ++ ++ e1000_read_pci_cfg(hw, PCIX_COMMAND_REGISTER, &pcix_cmd); ++ e1000_read_pci_cfg(hw, PCIX_STATUS_REGISTER_HI, &pcix_stat_hi_word); ++ cmd_mmrbc = (pcix_cmd & PCIX_COMMAND_MMRBC_MASK) >> ++ PCIX_COMMAND_MMRBC_SHIFT; ++ stat_mmrbc = (pcix_stat_hi_word & PCIX_STATUS_HI_MMRBC_MASK) >> ++ PCIX_STATUS_HI_MMRBC_SHIFT; ++ if (stat_mmrbc == PCIX_STATUS_HI_MMRBC_4K) ++ stat_mmrbc = PCIX_STATUS_HI_MMRBC_2K; ++ if (cmd_mmrbc > stat_mmrbc) { ++ pcix_cmd &= ~PCIX_COMMAND_MMRBC_MASK; ++ pcix_cmd |= stat_mmrbc << PCIX_COMMAND_MMRBC_SHIFT; ++ e1000_write_pci_cfg(hw, PCIX_COMMAND_REGISTER, &pcix_cmd); ++ } ++} ++ ++/** ++ * e1000_clear_hw_cntrs_base_generic - Clear base hardware counters ++ * @hw: pointer to the HW structure ++ * ++ * Clears the base hardware counters by reading the counter registers. ++ **/ ++void e1000_clear_hw_cntrs_base_generic(struct e1000_hw *hw) ++{ ++ volatile u32 temp; ++ ++ DEBUGFUNC("e1000_clear_hw_cntrs_base_generic"); ++ ++ temp = E1000_READ_REG(hw, E1000_CRCERRS); ++ temp = E1000_READ_REG(hw, E1000_SYMERRS); ++ temp = E1000_READ_REG(hw, E1000_MPC); ++ temp = E1000_READ_REG(hw, E1000_SCC); ++ temp = E1000_READ_REG(hw, E1000_ECOL); ++ temp = E1000_READ_REG(hw, E1000_MCC); ++ temp = E1000_READ_REG(hw, E1000_LATECOL); ++ temp = E1000_READ_REG(hw, E1000_COLC); ++ temp = E1000_READ_REG(hw, E1000_DC); ++ temp = E1000_READ_REG(hw, E1000_SEC); ++ temp = E1000_READ_REG(hw, E1000_RLEC); ++ temp = E1000_READ_REG(hw, E1000_XONRXC); ++ temp = E1000_READ_REG(hw, E1000_XONTXC); ++ temp = E1000_READ_REG(hw, E1000_XOFFRXC); ++ temp = E1000_READ_REG(hw, E1000_XOFFTXC); ++ temp = E1000_READ_REG(hw, E1000_FCRUC); ++ temp = E1000_READ_REG(hw, E1000_GPRC); ++ temp = E1000_READ_REG(hw, E1000_BPRC); ++ temp = E1000_READ_REG(hw, E1000_MPRC); ++ temp = E1000_READ_REG(hw, E1000_GPTC); ++ temp = E1000_READ_REG(hw, E1000_GORCL); ++ temp = E1000_READ_REG(hw, E1000_GORCH); ++ temp = E1000_READ_REG(hw, E1000_GOTCL); ++ temp = E1000_READ_REG(hw, E1000_GOTCH); ++ temp = E1000_READ_REG(hw, E1000_RNBC); ++ temp = E1000_READ_REG(hw, E1000_RUC); ++ temp = E1000_READ_REG(hw, E1000_RFC); ++ temp = E1000_READ_REG(hw, E1000_ROC); ++ temp = E1000_READ_REG(hw, E1000_RJC); ++ temp = E1000_READ_REG(hw, E1000_TORL); ++ temp = E1000_READ_REG(hw, E1000_TORH); ++ temp = E1000_READ_REG(hw, E1000_TOTL); ++ temp = E1000_READ_REG(hw, E1000_TOTH); ++ temp = E1000_READ_REG(hw, E1000_TPR); ++ temp = E1000_READ_REG(hw, E1000_TPT); ++ temp = E1000_READ_REG(hw, E1000_MPTC); ++ temp = E1000_READ_REG(hw, E1000_BPTC); ++} ++ ++/** ++ * e1000_check_for_copper_link_generic - Check for link (Copper) ++ * @hw: pointer to the HW structure ++ * ++ * Checks to see of the link status of the hardware has changed. If a ++ * change in link status has been detected, then we read the PHY registers ++ * to get the current speed/duplex if link exists. ++ **/ ++s32 e1000_check_for_copper_link_generic(struct e1000_hw *hw) ++{ ++ struct e1000_mac_info *mac = &hw->mac; ++ s32 ret_val; ++ bool link; ++ ++ DEBUGFUNC("e1000_check_for_copper_link"); ++ ++ /* ++ * We only want to go out to the PHY registers to see if Auto-Neg ++ * has completed and/or if our link status has changed. The ++ * get_link_status flag is set upon receiving a Link Status ++ * Change or Rx Sequence Error interrupt. ++ */ ++ if (!mac->get_link_status) { ++ ret_val = E1000_SUCCESS; ++ goto out; ++ } ++ ++ /* ++ * First we want to see if the MII Status Register reports ++ * link. If so, then we want to get the current speed/duplex ++ * of the PHY. ++ */ ++ ret_val = e1000_phy_has_link_generic(hw, 1, 0, &link); ++ if (ret_val) ++ goto out; ++ ++ if (!link) ++ goto out; /* No link detected */ ++ ++ mac->get_link_status = FALSE; ++ ++ /* ++ * Check if there was DownShift, must be checked ++ * immediately after link-up ++ */ ++ e1000_check_downshift_generic(hw); ++ ++ /* ++ * If we are forcing speed/duplex, then we simply return since ++ * we have already determined whether we have link or not. ++ */ ++ if (!mac->autoneg) { ++ ret_val = -E1000_ERR_CONFIG; ++ goto out; ++ } ++ ++ /* ++ * Auto-Neg is enabled. Auto Speed Detection takes care ++ * of MAC speed/duplex configuration. So we only need to ++ * configure Collision Distance in the MAC. ++ */ ++ e1000_config_collision_dist_generic(hw); ++ ++ /* ++ * Configure Flow Control now that Auto-Neg has completed. ++ * First, we need to restore the desired flow control ++ * settings because we may have had to re-autoneg with a ++ * different link partner. ++ */ ++ ret_val = e1000_config_fc_after_link_up_generic(hw); ++ if (ret_val) { ++ DEBUGOUT("Error configuring flow control\n"); ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_check_for_fiber_link_generic - Check for link (Fiber) ++ * @hw: pointer to the HW structure ++ * ++ * Checks for link up on the hardware. If link is not up and we have ++ * a signal, then we need to force link up. ++ **/ ++s32 e1000_check_for_fiber_link_generic(struct e1000_hw *hw) ++{ ++ struct e1000_mac_info *mac = &hw->mac; ++ u32 rxcw; ++ u32 ctrl; ++ u32 status; ++ s32 ret_val = E1000_SUCCESS; ++ ++ DEBUGFUNC("e1000_check_for_fiber_link_generic"); ++ ++ ctrl = E1000_READ_REG(hw, E1000_CTRL); ++ status = E1000_READ_REG(hw, E1000_STATUS); ++ rxcw = E1000_READ_REG(hw, E1000_RXCW); ++ ++ /* ++ * If we don't have link (auto-negotiation failed or link partner ++ * cannot auto-negotiate), the cable is plugged in (we have signal), ++ * and our link partner is not trying to auto-negotiate with us (we ++ * are receiving idles or data), we need to force link up. We also ++ * need to give auto-negotiation time to complete, in case the cable ++ * was just plugged in. The autoneg_failed flag does this. ++ */ ++ /* (ctrl & E1000_CTRL_SWDPIN1) == 1 == have signal */ ++ if ((ctrl & E1000_CTRL_SWDPIN1) && (!(status & E1000_STATUS_LU)) && ++ (!(rxcw & E1000_RXCW_C))) { ++ if (mac->autoneg_failed == 0) { ++ mac->autoneg_failed = 1; ++ goto out; ++ } ++ DEBUGOUT("NOT RXing /C/, disable AutoNeg and force link.\n"); ++ ++ /* Disable auto-negotiation in the TXCW register */ ++ E1000_WRITE_REG(hw, E1000_TXCW, (mac->txcw & ~E1000_TXCW_ANE)); ++ ++ /* Force link-up and also force full-duplex. */ ++ ctrl = E1000_READ_REG(hw, E1000_CTRL); ++ ctrl |= (E1000_CTRL_SLU | E1000_CTRL_FD); ++ E1000_WRITE_REG(hw, E1000_CTRL, ctrl); ++ ++ /* Configure Flow Control after forcing link up. */ ++ ret_val = e1000_config_fc_after_link_up_generic(hw); ++ if (ret_val) { ++ DEBUGOUT("Error configuring flow control\n"); ++ goto out; ++ } ++ } else if ((ctrl & E1000_CTRL_SLU) && (rxcw & E1000_RXCW_C)) { ++ /* ++ * If we are forcing link and we are receiving /C/ ordered ++ * sets, re-enable auto-negotiation in the TXCW register ++ * and disable forced link in the Device Control register ++ * in an attempt to auto-negotiate with our link partner. ++ */ ++ DEBUGOUT("RXing /C/, enable AutoNeg and stop forcing link.\n"); ++ E1000_WRITE_REG(hw, E1000_TXCW, mac->txcw); ++ E1000_WRITE_REG(hw, E1000_CTRL, (ctrl & ~E1000_CTRL_SLU)); ++ ++ mac->serdes_has_link = TRUE; ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_check_for_serdes_link_generic - Check for link (Serdes) ++ * @hw: pointer to the HW structure ++ * ++ * Checks for link up on the hardware. If link is not up and we have ++ * a signal, then we need to force link up. ++ **/ ++s32 e1000_check_for_serdes_link_generic(struct e1000_hw *hw) ++{ ++ struct e1000_mac_info *mac = &hw->mac; ++ u32 rxcw; ++ u32 ctrl; ++ u32 status; ++ s32 ret_val = E1000_SUCCESS; ++ ++ DEBUGFUNC("e1000_check_for_serdes_link_generic"); ++ ++ ctrl = E1000_READ_REG(hw, E1000_CTRL); ++ status = E1000_READ_REG(hw, E1000_STATUS); ++ rxcw = E1000_READ_REG(hw, E1000_RXCW); ++ ++ /* ++ * If we don't have link (auto-negotiation failed or link partner ++ * cannot auto-negotiate), and our link partner is not trying to ++ * auto-negotiate with us (we are receiving idles or data), ++ * we need to force link up. We also need to give auto-negotiation ++ * time to complete. ++ */ ++ /* (ctrl & E1000_CTRL_SWDPIN1) == 1 == have signal */ ++ if ((!(status & E1000_STATUS_LU)) && (!(rxcw & E1000_RXCW_C))) { ++ if (mac->autoneg_failed == 0) { ++ mac->autoneg_failed = 1; ++ goto out; ++ } ++ DEBUGOUT("NOT RXing /C/, disable AutoNeg and force link.\n"); ++ ++ /* Disable auto-negotiation in the TXCW register */ ++ E1000_WRITE_REG(hw, E1000_TXCW, (mac->txcw & ~E1000_TXCW_ANE)); ++ ++ /* Force link-up and also force full-duplex. */ ++ ctrl = E1000_READ_REG(hw, E1000_CTRL); ++ ctrl |= (E1000_CTRL_SLU | E1000_CTRL_FD); ++ E1000_WRITE_REG(hw, E1000_CTRL, ctrl); ++ ++ /* Configure Flow Control after forcing link up. */ ++ ret_val = e1000_config_fc_after_link_up_generic(hw); ++ if (ret_val) { ++ DEBUGOUT("Error configuring flow control\n"); ++ goto out; ++ } ++ } else if ((ctrl & E1000_CTRL_SLU) && (rxcw & E1000_RXCW_C)) { ++ /* ++ * If we are forcing link and we are receiving /C/ ordered ++ * sets, re-enable auto-negotiation in the TXCW register ++ * and disable forced link in the Device Control register ++ * in an attempt to auto-negotiate with our link partner. ++ */ ++ DEBUGOUT("RXing /C/, enable AutoNeg and stop forcing link.\n"); ++ E1000_WRITE_REG(hw, E1000_TXCW, mac->txcw); ++ E1000_WRITE_REG(hw, E1000_CTRL, (ctrl & ~E1000_CTRL_SLU)); ++ ++ mac->serdes_has_link = TRUE; ++ } else if (!(E1000_TXCW_ANE & E1000_READ_REG(hw, E1000_TXCW))) { ++ /* ++ * If we force link for non-auto-negotiation switch, check ++ * link status based on MAC synchronization for internal ++ * serdes media type. ++ */ ++ /* SYNCH bit and IV bit are sticky. */ ++ usec_delay(10); ++ if (E1000_RXCW_SYNCH & E1000_READ_REG(hw, E1000_RXCW)) { ++ if (!(rxcw & E1000_RXCW_IV)) { ++ mac->serdes_has_link = TRUE; ++ DEBUGOUT("SERDES: Link is up.\n"); ++ } ++ } else { ++ mac->serdes_has_link = FALSE; ++ DEBUGOUT("SERDES: Link is down.\n"); ++ } ++ } ++ ++ if (E1000_TXCW_ANE & E1000_READ_REG(hw, E1000_TXCW)) { ++ status = E1000_READ_REG(hw, E1000_STATUS); ++ mac->serdes_has_link = (status & E1000_STATUS_LU) ++ ? TRUE ++ : FALSE; ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_setup_link_generic - Setup flow control and link settings ++ * @hw: pointer to the HW structure ++ * ++ * Determines which flow control settings to use, then configures flow ++ * control. Calls the appropriate media-specific link configuration ++ * function. Assuming the adapter has a valid link partner, a valid link ++ * should be established. Assumes the hardware has previously been reset ++ * and the transmitter and receiver are not enabled. ++ **/ ++s32 e1000_setup_link_generic(struct e1000_hw *hw) ++{ ++ struct e1000_functions *func = &hw->func; ++ s32 ret_val = E1000_SUCCESS; ++ ++ DEBUGFUNC("e1000_setup_link_generic"); ++ ++ /* ++ * In the case of the phy reset being blocked, we already have a link. ++ * We do not need to set it up again. ++ */ ++ if (e1000_check_reset_block(hw)) ++ goto out; ++ ++ /* ++ * If flow control is set to default, set flow control based on ++ * the EEPROM flow control settings. ++ */ ++ if (hw->fc.type == e1000_fc_default) { ++ ret_val = e1000_set_default_fc_generic(hw); ++ if (ret_val) ++ goto out; ++ } ++ ++ /* ++ * We want to save off the original Flow Control configuration just ++ * in case we get disconnected and then reconnected into a different ++ * hub or switch with different Flow Control capabilities. ++ */ ++ hw->fc.original_type = hw->fc.type; ++ ++ DEBUGOUT1("After fix-ups FlowControl is now = %x\n", hw->fc.type); ++ ++ /* Call the necessary media_type subroutine to configure the link. */ ++ ret_val = func->setup_physical_interface(hw); ++ if (ret_val) ++ goto out; ++ ++ /* ++ * Initialize the flow control address, type, and PAUSE timer ++ * registers to their default values. This is done even if flow ++ * control is disabled, because it does not hurt anything to ++ * initialize these registers. ++ */ ++ DEBUGOUT("Initializing the Flow Control address, type and timer regs\n"); ++ E1000_WRITE_REG(hw, E1000_FCT, FLOW_CONTROL_TYPE); ++ E1000_WRITE_REG(hw, E1000_FCAH, FLOW_CONTROL_ADDRESS_HIGH); ++ E1000_WRITE_REG(hw, E1000_FCAL, FLOW_CONTROL_ADDRESS_LOW); ++ ++ E1000_WRITE_REG(hw, E1000_FCTTV, hw->fc.pause_time); ++ ++ ret_val = e1000_set_fc_watermarks_generic(hw); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_setup_fiber_serdes_link_generic - Setup link for fiber/serdes ++ * @hw: pointer to the HW structure ++ * ++ * Configures collision distance and flow control for fiber and serdes ++ * links. Upon successful setup, poll for link. ++ **/ ++s32 e1000_setup_fiber_serdes_link_generic(struct e1000_hw *hw) ++{ ++ u32 ctrl; ++ s32 ret_val = E1000_SUCCESS; ++ ++ DEBUGFUNC("e1000_setup_fiber_serdes_link_generic"); ++ ++ ctrl = E1000_READ_REG(hw, E1000_CTRL); ++ ++ /* Take the link out of reset */ ++ ctrl &= ~E1000_CTRL_LRST; ++ ++ e1000_config_collision_dist_generic(hw); ++ ++ ret_val = e1000_commit_fc_settings_generic(hw); ++ if (ret_val) ++ goto out; ++ ++ /* ++ * Since auto-negotiation is enabled, take the link out of reset (the ++ * link will be in reset, because we previously reset the chip). This ++ * will restart auto-negotiation. If auto-negotiation is successful ++ * then the link-up status bit will be set and the flow control enable ++ * bits (RFCE and TFCE) will be set according to their negotiated value. ++ */ ++ DEBUGOUT("Auto-negotiation enabled\n"); ++ ++ E1000_WRITE_REG(hw, E1000_CTRL, ctrl); ++ E1000_WRITE_FLUSH(hw); ++ msec_delay(1); ++ ++ /* ++ * For these adapters, the SW defineable pin 1 is set when the optics ++ * detect a signal. If we have a signal, then poll for a "Link-Up" ++ * indication. ++ */ ++ if (hw->phy.media_type == e1000_media_type_internal_serdes || ++ (E1000_READ_REG(hw, E1000_CTRL) & E1000_CTRL_SWDPIN1)) { ++ ret_val = e1000_poll_fiber_serdes_link_generic(hw); ++ } else { ++ DEBUGOUT("No signal detected\n"); ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_config_collision_dist_generic - Configure collision distance ++ * @hw: pointer to the HW structure ++ * ++ * Configures the collision distance to the default value and is used ++ * during link setup. Currently no func pointer exists and all ++ * implementations are handled in the generic version of this function. ++ **/ ++void e1000_config_collision_dist_generic(struct e1000_hw *hw) ++{ ++ u32 tctl; ++ ++ DEBUGFUNC("e1000_config_collision_dist_generic"); ++ ++ tctl = E1000_READ_REG(hw, E1000_TCTL); ++ ++ tctl &= ~E1000_TCTL_COLD; ++ tctl |= E1000_COLLISION_DISTANCE << E1000_COLD_SHIFT; ++ ++ E1000_WRITE_REG(hw, E1000_TCTL, tctl); ++ E1000_WRITE_FLUSH(hw); ++} ++ ++/** ++ * e1000_poll_fiber_serdes_link_generic - Poll for link up ++ * @hw: pointer to the HW structure ++ * ++ * Polls for link up by reading the status register, if link fails to come ++ * up with auto-negotiation, then the link is forced if a signal is detected. ++ **/ ++s32 e1000_poll_fiber_serdes_link_generic(struct e1000_hw *hw) ++{ ++ struct e1000_mac_info *mac = &hw->mac; ++ u32 i, status; ++ s32 ret_val = E1000_SUCCESS; ++ ++ DEBUGFUNC("e1000_poll_fiber_serdes_link_generic"); ++ ++ /* ++ * If we have a signal (the cable is plugged in, or assumed true for ++ * serdes media) then poll for a "Link-Up" indication in the Device ++ * Status Register. Time-out if a link isn't seen in 500 milliseconds ++ * seconds (Auto-negotiation should complete in less than 500 ++ * milliseconds even if the other end is doing it in SW). ++ */ ++ for (i = 0; i < FIBER_LINK_UP_LIMIT; i++) { ++ msec_delay(10); ++ status = E1000_READ_REG(hw, E1000_STATUS); ++ if (status & E1000_STATUS_LU) ++ break; ++ } ++ if (i == FIBER_LINK_UP_LIMIT) { ++ DEBUGOUT("Never got a valid link from auto-neg!!!\n"); ++ mac->autoneg_failed = 1; ++ /* ++ * AutoNeg failed to achieve a link, so we'll call ++ * mac->check_for_link. This routine will force the ++ * link up if we detect a signal. This will allow us to ++ * communicate with non-autonegotiating link partners. ++ */ ++ ret_val = e1000_check_for_link(hw); ++ if (ret_val) { ++ DEBUGOUT("Error while checking for link\n"); ++ goto out; ++ } ++ mac->autoneg_failed = 0; ++ } else { ++ mac->autoneg_failed = 0; ++ DEBUGOUT("Valid Link Found\n"); ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_commit_fc_settings_generic - Configure flow control ++ * @hw: pointer to the HW structure ++ * ++ * Write the flow control settings to the Transmit Config Word Register (TXCW) ++ * base on the flow control settings in e1000_mac_info. ++ **/ ++s32 e1000_commit_fc_settings_generic(struct e1000_hw *hw) ++{ ++ struct e1000_mac_info *mac = &hw->mac; ++ u32 txcw; ++ s32 ret_val = E1000_SUCCESS; ++ ++ DEBUGFUNC("e1000_commit_fc_settings_generic"); ++ ++ /* ++ * Check for a software override of the flow control settings, and ++ * setup the device accordingly. If auto-negotiation is enabled, then ++ * software will have to set the "PAUSE" bits to the correct value in ++ * the Transmit Config Word Register (TXCW) and re-start auto- ++ * negotiation. However, if auto-negotiation is disabled, then ++ * software will have to manually configure the two flow control enable ++ * bits in the CTRL register. ++ * ++ * The possible values of the "fc" parameter are: ++ * 0: Flow control is completely disabled ++ * 1: Rx flow control is enabled (we can receive pause frames, ++ * but not send pause frames). ++ * 2: Tx flow control is enabled (we can send pause frames but we ++ * do not support receiving pause frames). ++ * 3: Both Rx and Tx flow control (symmetric) are enabled. ++ */ ++ switch (hw->fc.type) { ++ case e1000_fc_none: ++ /* Flow control completely disabled by a software over-ride. */ ++ txcw = (E1000_TXCW_ANE | E1000_TXCW_FD); ++ break; ++ case e1000_fc_rx_pause: ++ /* ++ * Rx Flow control is enabled and Tx Flow control is disabled ++ * by a software over-ride. Since there really isn't a way to ++ * advertise that we are capable of Rx Pause ONLY, we will ++ * advertise that we support both symmetric and asymmetric RX ++ * PAUSE. Later, we will disable the adapter's ability to send ++ * PAUSE frames. ++ */ ++ txcw = (E1000_TXCW_ANE | E1000_TXCW_FD | E1000_TXCW_PAUSE_MASK); ++ break; ++ case e1000_fc_tx_pause: ++ /* ++ * Tx Flow control is enabled, and Rx Flow control is disabled, ++ * by a software over-ride. ++ */ ++ txcw = (E1000_TXCW_ANE | E1000_TXCW_FD | E1000_TXCW_ASM_DIR); ++ break; ++ case e1000_fc_full: ++ /* ++ * Flow control (both Rx and Tx) is enabled by a software ++ * over-ride. ++ */ ++ txcw = (E1000_TXCW_ANE | E1000_TXCW_FD | E1000_TXCW_PAUSE_MASK); ++ break; ++ default: ++ DEBUGOUT("Flow control param set incorrectly\n"); ++ ret_val = -E1000_ERR_CONFIG; ++ goto out; ++ break; ++ } ++ ++ E1000_WRITE_REG(hw, E1000_TXCW, txcw); ++ mac->txcw = txcw; ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_set_fc_watermarks_generic - Set flow control high/low watermarks ++ * @hw: pointer to the HW structure ++ * ++ * Sets the flow control high/low threshold (watermark) registers. If ++ * flow control XON frame transmission is enabled, then set XON frame ++ * tansmission as well. ++ **/ ++s32 e1000_set_fc_watermarks_generic(struct e1000_hw *hw) ++{ ++ s32 ret_val = E1000_SUCCESS; ++ u32 fcrtl = 0, fcrth = 0; ++ ++ DEBUGFUNC("e1000_set_fc_watermarks_generic"); ++ ++ /* ++ * Set the flow control receive threshold registers. Normally, ++ * these registers will be set to a default threshold that may be ++ * adjusted later by the driver's runtime code. However, if the ++ * ability to transmit pause frames is not enabled, then these ++ * registers will be set to 0. ++ */ ++ if (hw->fc.type & e1000_fc_tx_pause) { ++ /* ++ * We need to set up the Receive Threshold high and low water ++ * marks as well as (optionally) enabling the transmission of ++ * XON frames. ++ */ ++ fcrtl = hw->fc.low_water; ++ if (hw->fc.send_xon) ++ fcrtl |= E1000_FCRTL_XONE; ++ ++ fcrth = hw->fc.high_water; ++ } ++ E1000_WRITE_REG(hw, E1000_FCRTL, fcrtl); ++ E1000_WRITE_REG(hw, E1000_FCRTH, fcrth); ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_set_default_fc_generic - Set flow control default values ++ * @hw: pointer to the HW structure ++ * ++ * Read the EEPROM for the default values for flow control and store the ++ * values. ++ **/ ++s32 e1000_set_default_fc_generic(struct e1000_hw *hw) ++{ ++ s32 ret_val = E1000_SUCCESS; ++ u16 nvm_data; ++ ++ DEBUGFUNC("e1000_set_default_fc_generic"); ++ ++ /* ++ * Read and store word 0x0F of the EEPROM. This word contains bits ++ * that determine the hardware's default PAUSE (flow control) mode, ++ * a bit that determines whether the HW defaults to enabling or ++ * disabling auto-negotiation, and the direction of the ++ * SW defined pins. If there is no SW over-ride of the flow ++ * control setting, then the variable hw->fc will ++ * be initialized based on a value in the EEPROM. ++ */ ++ ret_val = e1000_read_nvm(hw, NVM_INIT_CONTROL2_REG, 1, &nvm_data); ++ ++ if (ret_val) { ++ DEBUGOUT("NVM Read Error\n"); ++ goto out; ++ } ++ ++ if ((nvm_data & NVM_WORD0F_PAUSE_MASK) == 0) ++ hw->fc.type = e1000_fc_none; ++ else if ((nvm_data & NVM_WORD0F_PAUSE_MASK) == ++ NVM_WORD0F_ASM_DIR) ++ hw->fc.type = e1000_fc_tx_pause; ++ else ++ hw->fc.type = e1000_fc_full; ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_force_mac_fc_generic - Force the MAC's flow control settings ++ * @hw: pointer to the HW structure ++ * ++ * Force the MAC's flow control settings. Sets the TFCE and RFCE bits in the ++ * device control register to reflect the adapter settings. TFCE and RFCE ++ * need to be explicitly set by software when a copper PHY is used because ++ * autonegotiation is managed by the PHY rather than the MAC. Software must ++ * also configure these bits when link is forced on a fiber connection. ++ **/ ++s32 e1000_force_mac_fc_generic(struct e1000_hw *hw) ++{ ++ u32 ctrl; ++ s32 ret_val = E1000_SUCCESS; ++ ++ DEBUGFUNC("e1000_force_mac_fc_generic"); ++ ++ ctrl = E1000_READ_REG(hw, E1000_CTRL); ++ ++ /* ++ * Because we didn't get link via the internal auto-negotiation ++ * mechanism (we either forced link or we got link via PHY ++ * auto-neg), we have to manually enable/disable transmit an ++ * receive flow control. ++ * ++ * The "Case" statement below enables/disable flow control ++ * according to the "hw->fc.type" parameter. ++ * ++ * The possible values of the "fc" parameter are: ++ * 0: Flow control is completely disabled ++ * 1: Rx flow control is enabled (we can receive pause ++ * frames but not send pause frames). ++ * 2: Tx flow control is enabled (we can send pause frames ++ * frames but we do not receive pause frames). ++ * 3: Both Rx and Tx flow control (symmetric) is enabled. ++ * other: No other values should be possible at this point. ++ */ ++ DEBUGOUT1("hw->fc.type = %u\n", hw->fc.type); ++ ++ switch (hw->fc.type) { ++ case e1000_fc_none: ++ ctrl &= (~(E1000_CTRL_TFCE | E1000_CTRL_RFCE)); ++ break; ++ case e1000_fc_rx_pause: ++ ctrl &= (~E1000_CTRL_TFCE); ++ ctrl |= E1000_CTRL_RFCE; ++ break; ++ case e1000_fc_tx_pause: ++ ctrl &= (~E1000_CTRL_RFCE); ++ ctrl |= E1000_CTRL_TFCE; ++ break; ++ case e1000_fc_full: ++ ctrl |= (E1000_CTRL_TFCE | E1000_CTRL_RFCE); ++ break; ++ default: ++ DEBUGOUT("Flow control param set incorrectly\n"); ++ ret_val = -E1000_ERR_CONFIG; ++ goto out; ++ } ++ ++ E1000_WRITE_REG(hw, E1000_CTRL, ctrl); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_config_fc_after_link_up_generic - Configures flow control after link ++ * @hw: pointer to the HW structure ++ * ++ * Checks the status of auto-negotiation after link up to ensure that the ++ * speed and duplex were not forced. If the link needed to be forced, then ++ * flow control needs to be forced also. If auto-negotiation is enabled ++ * and did not fail, then we configure flow control based on our link ++ * partner. ++ **/ ++s32 e1000_config_fc_after_link_up_generic(struct e1000_hw *hw) ++{ ++ struct e1000_mac_info *mac = &hw->mac; ++ s32 ret_val = E1000_SUCCESS; ++ u16 mii_status_reg, mii_nway_adv_reg, mii_nway_lp_ability_reg; ++ u16 speed, duplex; ++ ++ DEBUGFUNC("e1000_config_fc_after_link_up_generic"); ++ ++ /* ++ * Check for the case where we have fiber media and auto-neg failed ++ * so we had to force link. In this case, we need to force the ++ * configuration of the MAC to match the "fc" parameter. ++ */ ++ if (mac->autoneg_failed) { ++ if (hw->phy.media_type == e1000_media_type_fiber || ++ hw->phy.media_type == e1000_media_type_internal_serdes) ++ ret_val = e1000_force_mac_fc_generic(hw); ++ } else { ++ if (hw->phy.media_type == e1000_media_type_copper) ++ ret_val = e1000_force_mac_fc_generic(hw); ++ } ++ ++ if (ret_val) { ++ DEBUGOUT("Error forcing flow control settings\n"); ++ goto out; ++ } ++ ++ /* ++ * Check for the case where we have copper media and auto-neg is ++ * enabled. In this case, we need to check and see if Auto-Neg ++ * has completed, and if so, how the PHY and link partner has ++ * flow control configured. ++ */ ++ if ((hw->phy.media_type == e1000_media_type_copper) && mac->autoneg) { ++ /* ++ * Read the MII Status Register and check to see if AutoNeg ++ * has completed. We read this twice because this reg has ++ * some "sticky" (latched) bits. ++ */ ++ ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg); ++ if (ret_val) ++ goto out; ++ ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg); ++ if (ret_val) ++ goto out; ++ ++ if (!(mii_status_reg & MII_SR_AUTONEG_COMPLETE)) { ++ DEBUGOUT("Copper PHY and Auto Neg " ++ "has not completed.\n"); ++ goto out; ++ } ++ ++ /* ++ * The AutoNeg process has completed, so we now need to ++ * read both the Auto Negotiation Advertisement ++ * Register (Address 4) and the Auto_Negotiation Base ++ * Page Ability Register (Address 5) to determine how ++ * flow control was negotiated. ++ */ ++ ret_val = e1000_read_phy_reg(hw, PHY_AUTONEG_ADV, ++ &mii_nway_adv_reg); ++ if (ret_val) ++ goto out; ++ ret_val = e1000_read_phy_reg(hw, PHY_LP_ABILITY, ++ &mii_nway_lp_ability_reg); ++ if (ret_val) ++ goto out; ++ ++ /* ++ * Two bits in the Auto Negotiation Advertisement Register ++ * (Address 4) and two bits in the Auto Negotiation Base ++ * Page Ability Register (Address 5) determine flow control ++ * for both the PHY and the link partner. The following ++ * table, taken out of the IEEE 802.3ab/D6.0 dated March 25, ++ * 1999, describes these PAUSE resolution bits and how flow ++ * control is determined based upon these settings. ++ * NOTE: DC = Don't Care ++ * ++ * LOCAL DEVICE | LINK PARTNER ++ * PAUSE | ASM_DIR | PAUSE | ASM_DIR | NIC Resolution ++ *-------|---------|-------|---------|-------------------- ++ * 0 | 0 | DC | DC | e1000_fc_none ++ * 0 | 1 | 0 | DC | e1000_fc_none ++ * 0 | 1 | 1 | 0 | e1000_fc_none ++ * 0 | 1 | 1 | 1 | e1000_fc_tx_pause ++ * 1 | 0 | 0 | DC | e1000_fc_none ++ * 1 | DC | 1 | DC | e1000_fc_full ++ * 1 | 1 | 0 | 0 | e1000_fc_none ++ * 1 | 1 | 0 | 1 | e1000_fc_rx_pause ++ * ++ * Are both PAUSE bits set to 1? If so, this implies ++ * Symmetric Flow Control is enabled at both ends. The ++ * ASM_DIR bits are irrelevant per the spec. ++ * ++ * For Symmetric Flow Control: ++ * ++ * LOCAL DEVICE | LINK PARTNER ++ * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result ++ *-------|---------|-------|---------|-------------------- ++ * 1 | DC | 1 | DC | E1000_fc_full ++ * ++ */ ++ if ((mii_nway_adv_reg & NWAY_AR_PAUSE) && ++ (mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE)) { ++ /* ++ * Now we need to check if the user selected Rx ONLY ++ * of pause frames. In this case, we had to advertise ++ * FULL flow control because we could not advertise RX ++ * ONLY. Hence, we must now check to see if we need to ++ * turn OFF the TRANSMISSION of PAUSE frames. ++ */ ++ if (hw->fc.original_type == e1000_fc_full) { ++ hw->fc.type = e1000_fc_full; ++ DEBUGOUT("Flow Control = FULL.\r\n"); ++ } else { ++ hw->fc.type = e1000_fc_rx_pause; ++ DEBUGOUT("Flow Control = " ++ "RX PAUSE frames only.\r\n"); ++ } ++ } ++ /* ++ * For receiving PAUSE frames ONLY. ++ * ++ * LOCAL DEVICE | LINK PARTNER ++ * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result ++ *-------|---------|-------|---------|-------------------- ++ * 0 | 1 | 1 | 1 | e1000_fc_tx_pause ++ */ ++ else if (!(mii_nway_adv_reg & NWAY_AR_PAUSE) && ++ (mii_nway_adv_reg & NWAY_AR_ASM_DIR) && ++ (mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE) && ++ (mii_nway_lp_ability_reg & NWAY_LPAR_ASM_DIR)) { ++ hw->fc.type = e1000_fc_tx_pause; ++ DEBUGOUT("Flow Control = TX PAUSE frames only.\r\n"); ++ } ++ /* ++ * For transmitting PAUSE frames ONLY. ++ * ++ * LOCAL DEVICE | LINK PARTNER ++ * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result ++ *-------|---------|-------|---------|-------------------- ++ * 1 | 1 | 0 | 1 | e1000_fc_rx_pause ++ */ ++ else if ((mii_nway_adv_reg & NWAY_AR_PAUSE) && ++ (mii_nway_adv_reg & NWAY_AR_ASM_DIR) && ++ !(mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE) && ++ (mii_nway_lp_ability_reg & NWAY_LPAR_ASM_DIR)) { ++ hw->fc.type = e1000_fc_rx_pause; ++ DEBUGOUT("Flow Control = RX PAUSE frames only.\r\n"); ++ } else { ++ /* ++ * Per the IEEE spec, at this point flow control ++ * should be disabled. ++ */ ++ hw->fc.type = e1000_fc_none; ++ DEBUGOUT("Flow Control = NONE.\r\n"); ++ } ++ ++ /* ++ * Now we need to do one last check... If we auto- ++ * negotiated to HALF DUPLEX, flow control should not be ++ * enabled per IEEE 802.3 spec. ++ */ ++ ret_val = e1000_get_speed_and_duplex(hw, &speed, &duplex); ++ if (ret_val) { ++ DEBUGOUT("Error getting link speed and duplex\n"); ++ goto out; ++ } ++ ++ if (duplex == HALF_DUPLEX) ++ hw->fc.type = e1000_fc_none; ++ ++ /* ++ * Now we call a subroutine to actually force the MAC ++ * controller to use the correct flow control settings. ++ */ ++ ret_val = e1000_force_mac_fc_generic(hw); ++ if (ret_val) { ++ DEBUGOUT("Error forcing flow control settings\n"); ++ goto out; ++ } ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_get_speed_and_duplex_copper_generic - Retreive current speed/duplex ++ * @hw: pointer to the HW structure ++ * @speed: stores the current speed ++ * @duplex: stores the current duplex ++ * ++ * Read the status register for the current speed/duplex and store the current ++ * speed and duplex for copper connections. ++ **/ ++s32 e1000_get_speed_and_duplex_copper_generic(struct e1000_hw *hw, u16 *speed, ++ u16 *duplex) ++{ ++ u32 status; ++ ++ DEBUGFUNC("e1000_get_speed_and_duplex_copper_generic"); ++ ++ status = E1000_READ_REG(hw, E1000_STATUS); ++ if (status & E1000_STATUS_SPEED_1000) { ++ *speed = SPEED_1000; ++ DEBUGOUT("1000 Mbs, "); ++ } else if (status & E1000_STATUS_SPEED_100) { ++ *speed = SPEED_100; ++ DEBUGOUT("100 Mbs, "); ++ } else { ++ *speed = SPEED_10; ++ DEBUGOUT("10 Mbs, "); ++ } ++ ++ if (status & E1000_STATUS_FD) { ++ *duplex = FULL_DUPLEX; ++ DEBUGOUT("Full Duplex\n"); ++ } else { ++ *duplex = HALF_DUPLEX; ++ DEBUGOUT("Half Duplex\n"); ++ } ++ ++ return E1000_SUCCESS; ++} ++ ++/** ++ * e1000_get_speed_and_duplex_fiber_generic - Retreive current speed/duplex ++ * @hw: pointer to the HW structure ++ * @speed: stores the current speed ++ * @duplex: stores the current duplex ++ * ++ * Sets the speed and duplex to gigabit full duplex (the only possible option) ++ * for fiber/serdes links. ++ **/ ++s32 e1000_get_speed_and_duplex_fiber_serdes_generic(struct e1000_hw *hw, ++ u16 *speed, u16 *duplex) ++{ ++ DEBUGFUNC("e1000_get_speed_and_duplex_fiber_serdes_generic"); ++ ++ *speed = SPEED_1000; ++ *duplex = FULL_DUPLEX; ++ ++ return E1000_SUCCESS; ++} ++ ++/** ++ * e1000_get_hw_semaphore_generic - Acquire hardware semaphore ++ * @hw: pointer to the HW structure ++ * ++ * Acquire the HW semaphore to access the PHY or NVM ++ **/ ++s32 e1000_get_hw_semaphore_generic(struct e1000_hw *hw) ++{ ++ u32 swsm; ++ s32 ret_val = E1000_SUCCESS; ++ s32 timeout = hw->nvm.word_size + 1; ++ s32 i = 0; ++ ++ DEBUGFUNC("e1000_get_hw_semaphore_generic"); ++ ++ /* Get the SW semaphore */ ++ while (i < timeout) { ++ swsm = E1000_READ_REG(hw, E1000_SWSM); ++ if (!(swsm & E1000_SWSM_SMBI)) ++ break; ++ ++ usec_delay(50); ++ i++; ++ } ++ ++ if (i == timeout) { ++ DEBUGOUT("Driver can't access device - SMBI bit is set.\n"); ++ ret_val = -E1000_ERR_NVM; ++ goto out; ++ } ++ ++ /* Get the FW semaphore. */ ++ for (i = 0; i < timeout; i++) { ++ swsm = E1000_READ_REG(hw, E1000_SWSM); ++ E1000_WRITE_REG(hw, E1000_SWSM, swsm | E1000_SWSM_SWESMBI); ++ ++ /* Semaphore acquired if bit latched */ ++ if (E1000_READ_REG(hw, E1000_SWSM) & E1000_SWSM_SWESMBI) ++ break; ++ ++ usec_delay(50); ++ } ++ ++ if (i == timeout) { ++ /* Release semaphores */ ++ e1000_put_hw_semaphore_generic(hw); ++ DEBUGOUT("Driver can't access the NVM\n"); ++ ret_val = -E1000_ERR_NVM; ++ goto out; ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_put_hw_semaphore_generic - Release hardware semaphore ++ * @hw: pointer to the HW structure ++ * ++ * Release hardware semaphore used to access the PHY or NVM ++ **/ ++void e1000_put_hw_semaphore_generic(struct e1000_hw *hw) ++{ ++ u32 swsm; ++ ++ DEBUGFUNC("e1000_put_hw_semaphore_generic"); ++ ++ swsm = E1000_READ_REG(hw, E1000_SWSM); ++ ++ swsm &= ~(E1000_SWSM_SMBI | E1000_SWSM_SWESMBI); ++ ++ E1000_WRITE_REG(hw, E1000_SWSM, swsm); ++} ++ ++/** ++ * e1000_get_auto_rd_done_generic - Check for auto read completion ++ * @hw: pointer to the HW structure ++ * ++ * Check EEPROM for Auto Read done bit. ++ **/ ++s32 e1000_get_auto_rd_done_generic(struct e1000_hw *hw) ++{ ++ s32 i = 0; ++ s32 ret_val = E1000_SUCCESS; ++ ++ DEBUGFUNC("e1000_get_auto_rd_done_generic"); ++ ++ while (i < AUTO_READ_DONE_TIMEOUT) { ++ if (E1000_READ_REG(hw, E1000_EECD) & E1000_EECD_AUTO_RD) ++ break; ++ msec_delay(1); ++ i++; ++ } ++ ++ if (i == AUTO_READ_DONE_TIMEOUT) { ++ DEBUGOUT("Auto read by HW from NVM has not completed.\n"); ++ ret_val = -E1000_ERR_RESET; ++ goto out; ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_valid_led_default_generic - Verify a valid default LED config ++ * @hw: pointer to the HW structure ++ * @data: pointer to the NVM (EEPROM) ++ * ++ * Read the EEPROM for the current default LED configuration. If the ++ * LED configuration is not valid, set to a valid LED configuration. ++ **/ ++s32 e1000_valid_led_default_generic(struct e1000_hw *hw, u16 *data) ++{ ++ s32 ret_val; ++ ++ DEBUGFUNC("e1000_valid_led_default_generic"); ++ ++ ret_val = e1000_read_nvm(hw, NVM_ID_LED_SETTINGS, 1, data); ++ if (ret_val) { ++ DEBUGOUT("NVM Read Error\n"); ++ goto out; ++ } ++ ++ if (*data == ID_LED_RESERVED_0000 || *data == ID_LED_RESERVED_FFFF) ++ *data = ID_LED_DEFAULT; ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_id_led_init_generic - ++ * @hw: pointer to the HW structure ++ * ++ **/ ++s32 e1000_id_led_init_generic(struct e1000_hw * hw) ++{ ++ struct e1000_mac_info *mac = &hw->mac; ++ s32 ret_val; ++ const u32 ledctl_mask = 0x000000FF; ++ const u32 ledctl_on = E1000_LEDCTL_MODE_LED_ON; ++ const u32 ledctl_off = E1000_LEDCTL_MODE_LED_OFF; ++ u16 data, i, temp; ++ const u16 led_mask = 0x0F; ++ ++ DEBUGFUNC("e1000_id_led_init_generic"); ++ ++ ret_val = hw->func.valid_led_default(hw, &data); ++ if (ret_val) ++ goto out; ++ ++ mac->ledctl_default = E1000_READ_REG(hw, E1000_LEDCTL); ++ mac->ledctl_mode1 = mac->ledctl_default; ++ mac->ledctl_mode2 = mac->ledctl_default; ++ ++ for (i = 0; i < 4; i++) { ++ temp = (data >> (i << 2)) & led_mask; ++ switch (temp) { ++ case ID_LED_ON1_DEF2: ++ case ID_LED_ON1_ON2: ++ case ID_LED_ON1_OFF2: ++ mac->ledctl_mode1 &= ~(ledctl_mask << (i << 3)); ++ mac->ledctl_mode1 |= ledctl_on << (i << 3); ++ break; ++ case ID_LED_OFF1_DEF2: ++ case ID_LED_OFF1_ON2: ++ case ID_LED_OFF1_OFF2: ++ mac->ledctl_mode1 &= ~(ledctl_mask << (i << 3)); ++ mac->ledctl_mode1 |= ledctl_off << (i << 3); ++ break; ++ default: ++ /* Do nothing */ ++ break; ++ } ++ switch (temp) { ++ case ID_LED_DEF1_ON2: ++ case ID_LED_ON1_ON2: ++ case ID_LED_OFF1_ON2: ++ mac->ledctl_mode2 &= ~(ledctl_mask << (i << 3)); ++ mac->ledctl_mode2 |= ledctl_on << (i << 3); ++ break; ++ case ID_LED_DEF1_OFF2: ++ case ID_LED_ON1_OFF2: ++ case ID_LED_OFF1_OFF2: ++ mac->ledctl_mode2 &= ~(ledctl_mask << (i << 3)); ++ mac->ledctl_mode2 |= ledctl_off << (i << 3); ++ break; ++ default: ++ /* Do nothing */ ++ break; ++ } ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_setup_led_generic - Configures SW controllable LED ++ * @hw: pointer to the HW structure ++ * ++ * This prepares the SW controllable LED for use and saves the current state ++ * of the LED so it can be later restored. ++ **/ ++s32 e1000_setup_led_generic(struct e1000_hw *hw) ++{ ++ u32 ledctl; ++ s32 ret_val = E1000_SUCCESS; ++ ++ DEBUGFUNC("e1000_setup_led_generic"); ++ ++ if (hw->func.setup_led != e1000_setup_led_generic) { ++ ret_val = -E1000_ERR_CONFIG; ++ goto out; ++ } ++ ++ if (hw->phy.media_type == e1000_media_type_fiber) { ++ ledctl = E1000_READ_REG(hw, E1000_LEDCTL); ++ hw->mac.ledctl_default = ledctl; ++ /* Turn off LED0 */ ++ ledctl &= ~(E1000_LEDCTL_LED0_IVRT | ++ E1000_LEDCTL_LED0_BLINK | ++ E1000_LEDCTL_LED0_MODE_MASK); ++ ledctl |= (E1000_LEDCTL_MODE_LED_OFF << ++ E1000_LEDCTL_LED0_MODE_SHIFT); ++ E1000_WRITE_REG(hw, E1000_LEDCTL, ledctl); ++ } else if (hw->phy.media_type == e1000_media_type_copper) { ++ E1000_WRITE_REG(hw, E1000_LEDCTL, hw->mac.ledctl_mode1); ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_cleanup_led_generic - Set LED config to default operation ++ * @hw: pointer to the HW structure ++ * ++ * Remove the current LED configuration and set the LED configuration ++ * to the default value, saved from the EEPROM. ++ **/ ++s32 e1000_cleanup_led_generic(struct e1000_hw *hw) ++{ ++ s32 ret_val = E1000_SUCCESS; ++ ++ DEBUGFUNC("e1000_cleanup_led_generic"); ++ ++ if (hw->func.cleanup_led != e1000_cleanup_led_generic) { ++ ret_val = -E1000_ERR_CONFIG; ++ goto out; ++ } ++ ++ E1000_WRITE_REG(hw, E1000_LEDCTL, hw->mac.ledctl_default); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_blink_led_generic - Blink LED ++ * @hw: pointer to the HW structure ++ * ++ * Blink the led's which are set to be on. ++ **/ ++s32 e1000_blink_led_generic(struct e1000_hw *hw) ++{ ++ u32 ledctl_blink = 0; ++ u32 i; ++ ++ DEBUGFUNC("e1000_blink_led_generic"); ++ ++ if (hw->phy.media_type == e1000_media_type_fiber) { ++ /* always blink LED0 for PCI-E fiber */ ++ ledctl_blink = E1000_LEDCTL_LED0_BLINK | ++ (E1000_LEDCTL_MODE_LED_ON << E1000_LEDCTL_LED0_MODE_SHIFT); ++ } else { ++ /* ++ * set the blink bit for each LED that's "on" (0x0E) ++ * in ledctl_mode2 ++ */ ++ ledctl_blink = hw->mac.ledctl_mode2; ++ for (i = 0; i < 4; i++) ++ if (((hw->mac.ledctl_mode2 >> (i * 8)) & 0xFF) == ++ E1000_LEDCTL_MODE_LED_ON) ++ ledctl_blink |= (E1000_LEDCTL_LED0_BLINK << ++ (i * 8)); ++ } ++ ++ E1000_WRITE_REG(hw, E1000_LEDCTL, ledctl_blink); ++ ++ return E1000_SUCCESS; ++} ++ ++/** ++ * e1000_led_on_generic - Turn LED on ++ * @hw: pointer to the HW structure ++ * ++ * Turn LED on. ++ **/ ++s32 e1000_led_on_generic(struct e1000_hw *hw) ++{ ++ u32 ctrl; ++ ++ DEBUGFUNC("e1000_led_on_generic"); ++ ++ switch (hw->phy.media_type) { ++ case e1000_media_type_fiber: ++ ctrl = E1000_READ_REG(hw, E1000_CTRL); ++ ctrl &= ~E1000_CTRL_SWDPIN0; ++ ctrl |= E1000_CTRL_SWDPIO0; ++ E1000_WRITE_REG(hw, E1000_CTRL, ctrl); ++ break; ++ case e1000_media_type_copper: ++ E1000_WRITE_REG(hw, E1000_LEDCTL, hw->mac.ledctl_mode2); ++ break; ++ default: ++ break; ++ } ++ ++ return E1000_SUCCESS; ++} ++ ++/** ++ * e1000_led_off_generic - Turn LED off ++ * @hw: pointer to the HW structure ++ * ++ * Turn LED off. ++ **/ ++s32 e1000_led_off_generic(struct e1000_hw *hw) ++{ ++ u32 ctrl; ++ ++ DEBUGFUNC("e1000_led_off_generic"); ++ ++ switch (hw->phy.media_type) { ++ case e1000_media_type_fiber: ++ ctrl = E1000_READ_REG(hw, E1000_CTRL); ++ ctrl |= E1000_CTRL_SWDPIN0; ++ ctrl |= E1000_CTRL_SWDPIO0; ++ E1000_WRITE_REG(hw, E1000_CTRL, ctrl); ++ break; ++ case e1000_media_type_copper: ++ E1000_WRITE_REG(hw, E1000_LEDCTL, hw->mac.ledctl_mode1); ++ break; ++ default: ++ break; ++ } ++ ++ return E1000_SUCCESS; ++} ++ ++/** ++ * e1000_set_pcie_no_snoop_generic - Set PCI-express capabilities ++ * @hw: pointer to the HW structure ++ * @no_snoop: bitmap of snoop events ++ * ++ * Set the PCI-express register to snoop for events enabled in 'no_snoop'. ++ **/ ++void e1000_set_pcie_no_snoop_generic(struct e1000_hw *hw, u32 no_snoop) ++{ ++ u32 gcr; ++ ++ DEBUGFUNC("e1000_set_pcie_no_snoop_generic"); ++ ++ if (hw->bus.type != e1000_bus_type_pci_express) ++ goto out; ++ ++ if (no_snoop) { ++ gcr = E1000_READ_REG(hw, E1000_GCR); ++ gcr &= ~(PCIE_NO_SNOOP_ALL); ++ gcr |= no_snoop; ++ E1000_WRITE_REG(hw, E1000_GCR, gcr); ++ } ++out: ++ return; ++} ++ ++/** ++ * e1000_disable_pcie_master_generic - Disables PCI-express master access ++ * @hw: pointer to the HW structure ++ * ++ * Returns 0 (E1000_SUCCESS) if successful, else returns -10 ++ * (-E1000_ERR_MASTER_REQUESTS_PENDING) if master disable bit has not casued ++ * the master requests to be disabled. ++ * ++ * Disables PCI-Express master access and verifies there are no pending ++ * requests. ++ **/ ++s32 e1000_disable_pcie_master_generic(struct e1000_hw *hw) ++{ ++ u32 ctrl; ++ s32 timeout = MASTER_DISABLE_TIMEOUT; ++ s32 ret_val = E1000_SUCCESS; ++ ++ DEBUGFUNC("e1000_disable_pcie_master_generic"); ++ ++ if (hw->bus.type != e1000_bus_type_pci_express) ++ goto out; ++ ++ ctrl = E1000_READ_REG(hw, E1000_CTRL); ++ ctrl |= E1000_CTRL_GIO_MASTER_DISABLE; ++ E1000_WRITE_REG(hw, E1000_CTRL, ctrl); ++ ++ while (timeout) { ++ if (!(E1000_READ_REG(hw, E1000_STATUS) & ++ E1000_STATUS_GIO_MASTER_ENABLE)) ++ break; ++ usec_delay(100); ++ timeout--; ++ } ++ ++ if (!timeout) { ++ DEBUGOUT("Master requests are pending.\n"); ++ ret_val = -E1000_ERR_MASTER_REQUESTS_PENDING; ++ goto out; ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_reset_adaptive_generic - Reset Adaptive Interframe Spacing ++ * @hw: pointer to the HW structure ++ * ++ * Reset the Adaptive Interframe Spacing throttle to default values. ++ **/ ++void e1000_reset_adaptive_generic(struct e1000_hw *hw) ++{ ++ struct e1000_mac_info *mac = &hw->mac; ++ ++ DEBUGFUNC("e1000_reset_adaptive_generic"); ++ ++ if (!mac->adaptive_ifs) { ++ DEBUGOUT("Not in Adaptive IFS mode!\n"); ++ goto out; ++ } ++ ++ if (!mac->ifs_params_forced) { ++ mac->current_ifs_val = 0; ++ mac->ifs_min_val = IFS_MIN; ++ mac->ifs_max_val = IFS_MAX; ++ mac->ifs_step_size = IFS_STEP; ++ mac->ifs_ratio = IFS_RATIO; ++ } ++ ++ mac->in_ifs_mode = FALSE; ++ E1000_WRITE_REG(hw, E1000_AIT, 0); ++out: ++ return; ++} ++ ++/** ++ * e1000_update_adaptive_generic - Update Adaptive Interframe Spacing ++ * @hw: pointer to the HW structure ++ * ++ * Update the Adaptive Interframe Spacing Throttle value based on the ++ * time between transmitted packets and time between collisions. ++ **/ ++void e1000_update_adaptive_generic(struct e1000_hw *hw) ++{ ++ struct e1000_mac_info *mac = &hw->mac; ++ ++ DEBUGFUNC("e1000_update_adaptive_generic"); ++ ++ if (!mac->adaptive_ifs) { ++ DEBUGOUT("Not in Adaptive IFS mode!\n"); ++ goto out; ++ } ++ ++ if ((mac->collision_delta * mac->ifs_ratio) > mac->tx_packet_delta) { ++ if (mac->tx_packet_delta > MIN_NUM_XMITS) { ++ mac->in_ifs_mode = TRUE; ++ if (mac->current_ifs_val < mac->ifs_max_val) { ++ if (!mac->current_ifs_val) ++ mac->current_ifs_val = mac->ifs_min_val; ++ else ++ mac->current_ifs_val += ++ mac->ifs_step_size; ++ E1000_WRITE_REG(hw, E1000_AIT, mac->current_ifs_val); ++ } ++ } ++ } else { ++ if (mac->in_ifs_mode && ++ (mac->tx_packet_delta <= MIN_NUM_XMITS)) { ++ mac->current_ifs_val = 0; ++ mac->in_ifs_mode = FALSE; ++ E1000_WRITE_REG(hw, E1000_AIT, 0); ++ } ++ } ++out: ++ return; ++} ++ ++/** ++ * e1000_validate_mdi_setting_generic - Verify MDI/MDIx settings ++ * @hw: pointer to the HW structure ++ * ++ * Verify that when not using auto-negotitation that MDI/MDIx is correctly ++ * set, which is forced to MDI mode only. ++ **/ ++s32 e1000_validate_mdi_setting_generic(struct e1000_hw *hw) ++{ ++ s32 ret_val = E1000_SUCCESS; ++ ++ DEBUGFUNC("e1000_validate_mdi_setting_generic"); ++ ++ if (!hw->mac.autoneg && (hw->phy.mdix == 0 || hw->phy.mdix == 3)) { ++ DEBUGOUT("Invalid MDI setting detected\n"); ++ hw->phy.mdix = 1; ++ ret_val = -E1000_ERR_CONFIG; ++ goto out; ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_write_8bit_ctrl_reg_generic - Write a 8bit CTRL register ++ * @hw: pointer to the HW structure ++ * @reg: 32bit register offset such as E1000_SCTL ++ * @offset: register offset to write to ++ * @data: data to write at register offset ++ * ++ * Writes an address/data control type register. There are several of these ++ * and they all have the format address << 8 | data and bit 31 is polled for ++ * completion. ++ **/ ++s32 e1000_write_8bit_ctrl_reg_generic(struct e1000_hw *hw, u32 reg, ++ u32 offset, u8 data) ++{ ++ u32 i, regvalue = 0; ++ s32 ret_val = E1000_SUCCESS; ++ ++ DEBUGFUNC("e1000_write_8bit_ctrl_reg_generic"); ++ ++ /* Set up the address and data */ ++ regvalue = ((u32)data) | (offset << E1000_GEN_CTL_ADDRESS_SHIFT); ++ E1000_WRITE_REG(hw, reg, regvalue); ++ ++ /* Poll the ready bit to see if the MDI read completed */ ++ for (i = 0; i < E1000_GEN_POLL_TIMEOUT; i++) { ++ usec_delay(5); ++ regvalue = E1000_READ_REG(hw, reg); ++ if (regvalue & E1000_GEN_CTL_READY) ++ break; ++ } ++ if (!(regvalue & E1000_GEN_CTL_READY)) { ++ DEBUGOUT1("Reg %08x did not indicate ready\n", reg); ++ ret_val = -E1000_ERR_PHY; ++ goto out; ++ } ++ ++out: ++ return ret_val; ++} +--- linux/drivers/xenomai/net/drivers/experimental/e1000/e1000_api.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/experimental/e1000/e1000_api.h 2021-04-07 16:01:27.658633550 +0800 +@@ -0,0 +1,166 @@ ++/******************************************************************************* ++ ++ Intel PRO/1000 Linux driver ++ Copyright(c) 1999 - 2008 Intel Corporation. ++ ++ This program is free software; you can redistribute it and/or modify it ++ under the terms and conditions of the GNU General Public License, ++ version 2, as published by the Free Software Foundation. ++ ++ This program is distributed in the hope it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ You should have received a copy of the GNU General Public License along with ++ this program; if not, write to the Free Software Foundation, Inc., ++ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. ++ ++ The full GNU General Public License is included in this distribution in ++ the file called "COPYING". ++ ++ Contact Information: ++ Linux NICS ++ e1000-devel Mailing List ++ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 ++ ++*******************************************************************************/ ++ ++#ifndef _E1000_API_H_ ++#define _E1000_API_H_ ++ ++#include "e1000_hw.h" ++ ++extern void e1000_init_function_pointers_82542(struct e1000_hw *hw); ++extern void e1000_init_function_pointers_82543(struct e1000_hw *hw); ++extern void e1000_init_function_pointers_82540(struct e1000_hw *hw); ++extern void e1000_init_function_pointers_82571(struct e1000_hw *hw); ++extern void e1000_init_function_pointers_82541(struct e1000_hw *hw); ++extern void e1000_init_function_pointers_80003es2lan(struct e1000_hw *hw); ++extern void e1000_init_function_pointers_ich8lan(struct e1000_hw *hw); ++ ++s32 e1000_set_mac_type(struct e1000_hw *hw); ++s32 e1000_setup_init_funcs(struct e1000_hw *hw, bool init_device); ++s32 e1000_init_mac_params(struct e1000_hw *hw); ++s32 e1000_init_nvm_params(struct e1000_hw *hw); ++s32 e1000_init_phy_params(struct e1000_hw *hw); ++void e1000_remove_device(struct e1000_hw *hw); ++s32 e1000_get_bus_info(struct e1000_hw *hw); ++void e1000_clear_vfta(struct e1000_hw *hw); ++void e1000_write_vfta(struct e1000_hw *hw, u32 offset, u32 value); ++s32 e1000_force_mac_fc(struct e1000_hw *hw); ++s32 e1000_check_for_link(struct e1000_hw *hw); ++s32 e1000_reset_hw(struct e1000_hw *hw); ++s32 e1000_init_hw(struct e1000_hw *hw); ++s32 e1000_setup_link(struct e1000_hw *hw); ++s32 e1000_get_speed_and_duplex(struct e1000_hw *hw, u16 *speed, ++ u16 *duplex); ++s32 e1000_disable_pcie_master(struct e1000_hw *hw); ++void e1000_config_collision_dist(struct e1000_hw *hw); ++void e1000_rar_set(struct e1000_hw *hw, u8 *addr, u32 index); ++void e1000_mta_set(struct e1000_hw *hw, u32 hash_value); ++u32 e1000_hash_mc_addr(struct e1000_hw *hw, u8 *mc_addr); ++void e1000_update_mc_addr_list(struct e1000_hw *hw, ++ u8 *mc_addr_list, u32 mc_addr_count, ++ u32 rar_used_count, u32 rar_count); ++s32 e1000_setup_led(struct e1000_hw *hw); ++s32 e1000_cleanup_led(struct e1000_hw *hw); ++s32 e1000_check_reset_block(struct e1000_hw *hw); ++s32 e1000_blink_led(struct e1000_hw *hw); ++s32 e1000_led_on(struct e1000_hw *hw); ++s32 e1000_led_off(struct e1000_hw *hw); ++void e1000_reset_adaptive(struct e1000_hw *hw); ++void e1000_update_adaptive(struct e1000_hw *hw); ++s32 e1000_get_cable_length(struct e1000_hw *hw); ++s32 e1000_validate_mdi_setting(struct e1000_hw *hw); ++s32 e1000_read_phy_reg(struct e1000_hw *hw, u32 offset, u16 *data); ++s32 e1000_write_phy_reg(struct e1000_hw *hw, u32 offset, u16 data); ++s32 e1000_write_8bit_ctrl_reg(struct e1000_hw *hw, u32 reg, ++ u32 offset, u8 data); ++s32 e1000_get_phy_info(struct e1000_hw *hw); ++s32 e1000_phy_hw_reset(struct e1000_hw *hw); ++s32 e1000_phy_commit(struct e1000_hw *hw); ++void e1000_power_up_phy(struct e1000_hw *hw); ++void e1000_power_down_phy(struct e1000_hw *hw); ++s32 e1000_read_mac_addr(struct e1000_hw *hw); ++s32 e1000_read_pba_num(struct e1000_hw *hw, u32 *part_num); ++void e1000_reload_nvm(struct e1000_hw *hw); ++s32 e1000_update_nvm_checksum(struct e1000_hw *hw); ++s32 e1000_validate_nvm_checksum(struct e1000_hw *hw); ++s32 e1000_read_nvm(struct e1000_hw *hw, u16 offset, u16 words, u16 *data); ++s32 e1000_read_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 *data); ++s32 e1000_write_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 data); ++s32 e1000_write_nvm(struct e1000_hw *hw, u16 offset, u16 words, ++ u16 *data); ++s32 e1000_wait_autoneg(struct e1000_hw *hw); ++s32 e1000_set_d3_lplu_state(struct e1000_hw *hw, bool active); ++s32 e1000_set_d0_lplu_state(struct e1000_hw *hw, bool active); ++bool e1000_check_mng_mode(struct e1000_hw *hw); ++bool e1000_enable_mng_pass_thru(struct e1000_hw *hw); ++bool e1000_enable_tx_pkt_filtering(struct e1000_hw *hw); ++s32 e1000_mng_enable_host_if(struct e1000_hw *hw); ++s32 e1000_mng_host_if_write(struct e1000_hw *hw, ++ u8 *buffer, u16 length, u16 offset, u8 *sum); ++s32 e1000_mng_write_cmd_header(struct e1000_hw *hw, ++ struct e1000_host_mng_command_header *hdr); ++s32 e1000_mng_write_dhcp_info(struct e1000_hw * hw, ++ u8 *buffer, u16 length); ++void e1000_tbi_adjust_stats_82543(struct e1000_hw *hw, ++ struct e1000_hw_stats *stats, ++ u32 frame_len, u8 *mac_addr, ++ u32 max_frame_size); ++void e1000_set_tbi_compatibility_82543(struct e1000_hw *hw, ++ bool state); ++bool e1000_tbi_sbp_enabled_82543(struct e1000_hw *hw); ++u32 e1000_translate_register_82542(u32 reg); ++void e1000_init_script_state_82541(struct e1000_hw *hw, bool state); ++bool e1000_get_laa_state_82571(struct e1000_hw *hw); ++void e1000_set_laa_state_82571(struct e1000_hw *hw, bool state); ++void e1000_set_kmrn_lock_loss_workaround_ich8lan(struct e1000_hw *hw, ++ bool state); ++void e1000_igp3_phy_powerdown_workaround_ich8lan(struct e1000_hw *hw); ++void e1000_gig_downshift_workaround_ich8lan(struct e1000_hw *hw); ++ ++ ++/* ++ * TBI_ACCEPT macro definition: ++ * ++ * This macro requires: ++ * adapter = a pointer to struct e1000_hw ++ * status = the 8 bit status field of the Rx descriptor with EOP set ++ * error = the 8 bit error field of the Rx descriptor with EOP set ++ * length = the sum of all the length fields of the Rx descriptors that ++ * make up the current frame ++ * last_byte = the last byte of the frame DMAed by the hardware ++ * max_frame_length = the maximum frame length we want to accept. ++ * min_frame_length = the minimum frame length we want to accept. ++ * ++ * This macro is a conditional that should be used in the interrupt ++ * handler's Rx processing routine when RxErrors have been detected. ++ * ++ * Typical use: ++ * ... ++ * if (TBI_ACCEPT) { ++ * accept_frame = TRUE; ++ * e1000_tbi_adjust_stats(adapter, MacAddress); ++ * frame_length--; ++ * } else { ++ * accept_frame = FALSE; ++ * } ++ * ... ++ */ ++ ++/* The carrier extension symbol, as received by the NIC. */ ++#define CARRIER_EXTENSION 0x0F ++ ++#define TBI_ACCEPT(a, status, errors, length, last_byte, min_frame_size, max_frame_size) \ ++ (e1000_tbi_sbp_enabled_82543(a) && \ ++ (((errors) & E1000_RXD_ERR_FRAME_ERR_MASK) == E1000_RXD_ERR_CE) && \ ++ ((last_byte) == CARRIER_EXTENSION) && \ ++ (((status) & E1000_RXD_STAT_VP) ? \ ++ (((length) > (min_frame_size - VLAN_TAG_SIZE)) && \ ++ ((length) <= (max_frame_size + 1))) : \ ++ (((length) > min_frame_size) && \ ++ ((length) <= (max_frame_size + VLAN_TAG_SIZE + 1))))) ++ ++#endif +--- linux/drivers/xenomai/net/drivers/experimental/e1000/e1000_ethtool.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/experimental/e1000/e1000_ethtool.c 2021-04-07 16:01:27.653633557 +0800 +@@ -0,0 +1,2205 @@ ++/******************************************************************************* ++ ++ Intel PRO/1000 Linux driver ++ Copyright(c) 1999 - 2008 Intel Corporation. ++ ++ This program is free software; you can redistribute it and/or modify it ++ under the terms and conditions of the GNU General Public License, ++ version 2, as published by the Free Software Foundation. ++ ++ This program is distributed in the hope it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ You should have received a copy of the GNU General Public License along with ++ this program; if not, write to the Free Software Foundation, Inc., ++ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. ++ ++ The full GNU General Public License is included in this distribution in ++ the file called "COPYING". ++ ++ Contact Information: ++ Linux NICS ++ e1000-devel Mailing List ++ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 ++ ++*******************************************************************************/ ++ ++/* ethtool support for e1000 */ ++ ++#include ++ ++#ifdef SIOCETHTOOL ++#include ++ ++#include "e1000.h" ++#include "e1000_82541.h" ++#ifdef NETIF_F_HW_VLAN_TX ++#include ++#endif ++ ++#ifdef ETHTOOL_OPS_COMPAT ++#include "kcompat_ethtool.c" ++#endif ++ ++#ifdef ETHTOOL_GSTATS ++struct e1000_stats { ++ char stat_string[ETH_GSTRING_LEN]; ++ int sizeof_stat; ++ int stat_offset; ++}; ++ ++#define E1000_STAT(m) sizeof(((struct e1000_adapter *)0)->m), \ ++ offsetof(struct e1000_adapter, m) ++static const struct e1000_stats e1000_gstrings_stats[] = { ++ { "rx_packets", E1000_STAT(stats.gprc) }, ++ { "tx_packets", E1000_STAT(stats.gptc) }, ++ { "rx_bytes", E1000_STAT(stats.gorc) }, ++ { "tx_bytes", E1000_STAT(stats.gotc) }, ++ { "rx_broadcast", E1000_STAT(stats.bprc) }, ++ { "tx_broadcast", E1000_STAT(stats.bptc) }, ++ { "rx_multicast", E1000_STAT(stats.mprc) }, ++ { "tx_multicast", E1000_STAT(stats.mptc) }, ++ { "rx_errors", E1000_STAT(net_stats.rx_errors) }, ++ { "tx_errors", E1000_STAT(net_stats.tx_errors) }, ++ { "tx_dropped", E1000_STAT(net_stats.tx_dropped) }, ++ { "multicast", E1000_STAT(stats.mprc) }, ++ { "collisions", E1000_STAT(stats.colc) }, ++ { "rx_length_errors", E1000_STAT(net_stats.rx_length_errors) }, ++ { "rx_over_errors", E1000_STAT(net_stats.rx_over_errors) }, ++ { "rx_crc_errors", E1000_STAT(stats.crcerrs) }, ++ { "rx_frame_errors", E1000_STAT(net_stats.rx_frame_errors) }, ++ { "rx_no_buffer_count", E1000_STAT(stats.rnbc) }, ++ { "rx_missed_errors", E1000_STAT(stats.mpc) }, ++ { "tx_aborted_errors", E1000_STAT(stats.ecol) }, ++ { "tx_carrier_errors", E1000_STAT(stats.tncrs) }, ++ { "tx_fifo_errors", E1000_STAT(net_stats.tx_fifo_errors) }, ++ { "tx_heartbeat_errors", E1000_STAT(net_stats.tx_heartbeat_errors) }, ++ { "tx_window_errors", E1000_STAT(stats.latecol) }, ++ { "tx_abort_late_coll", E1000_STAT(stats.latecol) }, ++ { "tx_deferred_ok", E1000_STAT(stats.dc) }, ++ { "tx_single_coll_ok", E1000_STAT(stats.scc) }, ++ { "tx_multi_coll_ok", E1000_STAT(stats.mcc) }, ++ { "tx_timeout_count", E1000_STAT(tx_timeout_count) }, ++ { "tx_restart_queue", E1000_STAT(restart_queue) }, ++ { "rx_long_length_errors", E1000_STAT(stats.roc) }, ++ { "rx_short_length_errors", E1000_STAT(stats.ruc) }, ++ { "rx_align_errors", E1000_STAT(stats.algnerrc) }, ++ { "tx_tcp_seg_good", E1000_STAT(stats.tsctc) }, ++ { "tx_tcp_seg_failed", E1000_STAT(stats.tsctfc) }, ++ { "rx_flow_control_xon", E1000_STAT(stats.xonrxc) }, ++ { "rx_flow_control_xoff", E1000_STAT(stats.xoffrxc) }, ++ { "tx_flow_control_xon", E1000_STAT(stats.xontxc) }, ++ { "tx_flow_control_xoff", E1000_STAT(stats.xofftxc) }, ++ { "rx_long_byte_count", E1000_STAT(stats.gorc) }, ++ { "rx_csum_offload_good", E1000_STAT(hw_csum_good) }, ++ { "rx_csum_offload_errors", E1000_STAT(hw_csum_err) }, ++ { "rx_header_split", E1000_STAT(rx_hdr_split) }, ++ { "alloc_rx_buff_failed", E1000_STAT(alloc_rx_buff_failed) }, ++ { "tx_smbus", E1000_STAT(stats.mgptc) }, ++ { "rx_smbus", E1000_STAT(stats.mgprc) }, ++ { "dropped_smbus", E1000_STAT(stats.mgpdc) }, ++}; ++ ++#ifdef CONFIG_E1000_MQ ++#define E1000_QUEUE_STATS_LEN \ ++ ((((((struct e1000_adapter *)netdev->priv)->num_rx_queues > 1) ? \ ++ ((struct e1000_adapter *)netdev->priv)->num_rx_queues : 0 ) + \ ++ (((((struct e1000_adapter *)netdev->priv)->num_tx_queues > 1) ? \ ++ ((struct e1000_adapter *)netdev->priv)->num_tx_queues : 0 ))) * \ ++ (sizeof(struct e1000_queue_stats) / sizeof(u64))) ++#else ++#define E1000_QUEUE_STATS_LEN 0 ++#endif ++#define E1000_GLOBAL_STATS_LEN \ ++ sizeof(e1000_gstrings_stats) / sizeof(struct e1000_stats) ++#define E1000_STATS_LEN (E1000_GLOBAL_STATS_LEN + E1000_QUEUE_STATS_LEN) ++#endif /* ETHTOOL_GSTATS */ ++#ifdef ETHTOOL_TEST ++static const char e1000_gstrings_test[][ETH_GSTRING_LEN] = { ++ "Register test (offline)", "Eeprom test (offline)", ++ "Interrupt test (offline)", "Loopback test (offline)", ++ "Link test (on/offline)" ++}; ++#define E1000_TEST_LEN sizeof(e1000_gstrings_test) / ETH_GSTRING_LEN ++#endif /* ETHTOOL_TEST */ ++ ++static int e1000_get_settings(struct net_device *netdev, ++ struct ethtool_cmd *ecmd) ++{ ++ struct e1000_adapter *adapter = netdev_priv(netdev); ++ struct e1000_hw *hw = &adapter->hw; ++ u32 status; ++ ++ if (hw->phy.media_type == e1000_media_type_copper) { ++ ++ ecmd->supported = (SUPPORTED_10baseT_Half | ++ SUPPORTED_10baseT_Full | ++ SUPPORTED_100baseT_Half | ++ SUPPORTED_100baseT_Full | ++ SUPPORTED_1000baseT_Full| ++ SUPPORTED_Autoneg | ++ SUPPORTED_TP); ++ if (hw->phy.type == e1000_phy_ife) ++ ecmd->supported &= ~SUPPORTED_1000baseT_Full; ++ ecmd->advertising = ADVERTISED_TP; ++ ++ if (hw->mac.autoneg == 1) { ++ ecmd->advertising |= ADVERTISED_Autoneg; ++ /* the e1000 autoneg seems to match ethtool nicely */ ++ ecmd->advertising |= hw->phy.autoneg_advertised; ++ } ++ ++ ecmd->port = PORT_TP; ++ ecmd->phy_address = hw->phy.addr; ++ ++ if (hw->mac.type == e1000_82543) ++ ecmd->transceiver = XCVR_EXTERNAL; ++ else ++ ecmd->transceiver = XCVR_INTERNAL; ++ ++ } else { ++ ecmd->supported = (SUPPORTED_1000baseT_Full | ++ SUPPORTED_FIBRE | ++ SUPPORTED_Autoneg); ++ ++ ecmd->advertising = (ADVERTISED_1000baseT_Full | ++ ADVERTISED_FIBRE | ++ ADVERTISED_Autoneg); ++ ++ ecmd->port = PORT_FIBRE; ++ ++ if (hw->mac.type >= e1000_82545) ++ ecmd->transceiver = XCVR_INTERNAL; ++ else ++ ecmd->transceiver = XCVR_EXTERNAL; ++ } ++ ++ status = E1000_READ_REG(&adapter->hw, E1000_STATUS); ++ ++ if (status & E1000_STATUS_LU) { ++ ++ if ((status & E1000_STATUS_SPEED_1000) || ++ hw->phy.media_type != e1000_media_type_copper) ++ ecmd->speed = SPEED_1000; ++ else if (status & E1000_STATUS_SPEED_100) ++ ecmd->speed = SPEED_100; ++ else ++ ecmd->speed = SPEED_10; ++ ++ if ((status & E1000_STATUS_FD) || ++ hw->phy.media_type != e1000_media_type_copper) ++ ecmd->duplex = DUPLEX_FULL; ++ else ++ ecmd->duplex = DUPLEX_HALF; ++ } else { ++ ecmd->speed = -1; ++ ecmd->duplex = -1; ++ } ++ ++ ecmd->autoneg = ((hw->phy.media_type == e1000_media_type_fiber) || ++ hw->mac.autoneg) ? AUTONEG_ENABLE : AUTONEG_DISABLE; ++ return 0; ++} ++ ++static int e1000_set_settings(struct net_device *netdev, ++ struct ethtool_cmd *ecmd) ++{ ++ struct e1000_adapter *adapter = netdev_priv(netdev); ++ struct e1000_hw *hw = &adapter->hw; ++ ++ /* When SoL/IDER sessions are active, autoneg/speed/duplex ++ * cannot be changed */ ++ if (e1000_check_reset_block(hw)) { ++ DPRINTK(DRV, ERR, "Cannot change link characteristics " ++ "when SoL/IDER is active.\n"); ++ return -EINVAL; ++ } ++ ++ while (test_and_set_bit(__E1000_RESETTING, &adapter->state)) ++ msleep(1); ++ ++ if (ecmd->autoneg == AUTONEG_ENABLE) { ++ hw->mac.autoneg = 1; ++ if (hw->phy.media_type == e1000_media_type_fiber) ++ hw->phy.autoneg_advertised = ADVERTISED_1000baseT_Full | ++ ADVERTISED_FIBRE | ++ ADVERTISED_Autoneg; ++ else ++ hw->phy.autoneg_advertised = ecmd->advertising | ++ ADVERTISED_TP | ++ ADVERTISED_Autoneg; ++ ecmd->advertising = hw->phy.autoneg_advertised; ++ if (adapter->fc_autoneg) ++ hw->fc.original_type = e1000_fc_default; ++ } else { ++ if (e1000_set_spd_dplx(adapter, ecmd->speed + ecmd->duplex)) { ++ clear_bit(__E1000_RESETTING, &adapter->state); ++ return -EINVAL; ++ } ++ } ++ ++ /* reset the link */ ++ ++ if (netif_running(adapter->netdev)) { ++ e1000_down(adapter); ++ e1000_up(adapter); ++ } else { ++ e1000_reset(adapter); ++ } ++ ++ clear_bit(__E1000_RESETTING, &adapter->state); ++ return 0; ++} ++ ++static void e1000_get_pauseparam(struct net_device *netdev, ++ struct ethtool_pauseparam *pause) ++{ ++ struct e1000_adapter *adapter = netdev_priv(netdev); ++ struct e1000_hw *hw = &adapter->hw; ++ ++ pause->autoneg = ++ (adapter->fc_autoneg ? AUTONEG_ENABLE : AUTONEG_DISABLE); ++ ++ if (hw->fc.type == e1000_fc_rx_pause) ++ pause->rx_pause = 1; ++ else if (hw->fc.type == e1000_fc_tx_pause) ++ pause->tx_pause = 1; ++ else if (hw->fc.type == e1000_fc_full) { ++ pause->rx_pause = 1; ++ pause->tx_pause = 1; ++ } ++} ++ ++static int e1000_set_pauseparam(struct net_device *netdev, ++ struct ethtool_pauseparam *pause) ++{ ++ struct e1000_adapter *adapter = netdev_priv(netdev); ++ struct e1000_hw *hw = &adapter->hw; ++ int retval = 0; ++ ++ adapter->fc_autoneg = pause->autoneg; ++ ++ while (test_and_set_bit(__E1000_RESETTING, &adapter->state)) ++ msleep(1); ++ ++ if (pause->rx_pause && pause->tx_pause) ++ hw->fc.type = e1000_fc_full; ++ else if (pause->rx_pause && !pause->tx_pause) ++ hw->fc.type = e1000_fc_rx_pause; ++ else if (!pause->rx_pause && pause->tx_pause) ++ hw->fc.type = e1000_fc_tx_pause; ++ else if (!pause->rx_pause && !pause->tx_pause) ++ hw->fc.type = e1000_fc_none; ++ ++ hw->fc.original_type = hw->fc.type; ++ ++ if (adapter->fc_autoneg == AUTONEG_ENABLE) { ++ hw->fc.type = e1000_fc_default; ++ if (netif_running(adapter->netdev)) { ++ e1000_down(adapter); ++ e1000_up(adapter); ++ } else { ++ e1000_reset(adapter); ++ } ++ } else { ++ retval = ((hw->phy.media_type == e1000_media_type_fiber) ? ++ e1000_setup_link(hw) : e1000_force_mac_fc(hw)); ++ } ++ ++ clear_bit(__E1000_RESETTING, &adapter->state); ++ return retval; ++} ++ ++static u32 e1000_get_rx_csum(struct net_device *netdev) ++{ ++ struct e1000_adapter *adapter = netdev_priv(netdev); ++ return adapter->rx_csum; ++} ++ ++static int e1000_set_rx_csum(struct net_device *netdev, u32 data) ++{ ++ struct e1000_adapter *adapter = netdev_priv(netdev); ++ adapter->rx_csum = data; ++ ++ if (netif_running(netdev)) ++ e1000_reinit_locked(adapter); ++ else ++ e1000_reset(adapter); ++ return 0; ++} ++ ++static u32 e1000_get_tx_csum(struct net_device *netdev) ++{ ++ return (netdev->features & NETIF_F_HW_CSUM) != 0; ++} ++ ++static int e1000_set_tx_csum(struct net_device *netdev, u32 data) ++{ ++ struct e1000_adapter *adapter = netdev_priv(netdev); ++ ++ if (adapter->hw.mac.type < e1000_82543) { ++ if (!data) ++ return -EINVAL; ++ return 0; ++ } ++ ++ if (data) ++ netdev->features |= NETIF_F_HW_CSUM; ++ else ++ netdev->features &= ~NETIF_F_HW_CSUM; ++ ++ return 0; ++} ++ ++#ifdef NETIF_F_TSO ++static int e1000_set_tso(struct net_device *netdev, u32 data) ++{ ++ struct e1000_adapter *adapter = netdev_priv(netdev); ++ int i; ++ struct net_device *v_netdev; ++ if (!(adapter->flags & E1000_FLAG_HAS_TSO)) ++ return data ? -EINVAL : 0; ++ ++ if (data) { ++ netdev->features |= NETIF_F_TSO; ++#ifdef NETIF_F_TSO6 ++ if (adapter->flags & E1000_FLAG_HAS_TSO6) ++ netdev->features |= NETIF_F_TSO6; ++#endif ++ } else { ++ netdev->features &= ~NETIF_F_TSO; ++#ifdef NETIF_F_TSO6 ++ if (adapter->flags & E1000_FLAG_HAS_TSO6) ++ netdev->features &= ~NETIF_F_TSO6; ++#endif ++#ifdef NETIF_F_HW_VLAN_TX ++ /* disable TSO on all VLANs if they're present */ ++ if (!adapter->vlgrp) ++ goto tso_out; ++ for (i = 0; i < VLAN_N_VID; i++) { ++ v_netdev = vlan_group_get_device(adapter->vlgrp, i); ++ if (!v_netdev) ++ continue; ++ ++ v_netdev->features &= ~NETIF_F_TSO; ++#ifdef NETIF_F_TSO6 ++ if (adapter->flags & E1000_FLAG_HAS_TSO6) ++ v_netdev->features &= ~NETIF_F_TSO6; ++#endif ++ vlan_group_set_device(adapter->vlgrp, i, v_netdev); ++ } ++#endif ++ } ++ ++tso_out: ++ DPRINTK(PROBE, INFO, "TSO is %s\n", data ? "Enabled" : "Disabled"); ++ adapter->flags |= E1000_FLAG_TSO_FORCE; ++ return 0; ++} ++#endif /* NETIF_F_TSO */ ++ ++static u32 e1000_get_msglevel(struct net_device *netdev) ++{ ++ struct e1000_adapter *adapter = netdev_priv(netdev); ++ return adapter->msg_enable; ++} ++ ++static void e1000_set_msglevel(struct net_device *netdev, u32 data) ++{ ++ struct e1000_adapter *adapter = netdev_priv(netdev); ++ adapter->msg_enable = data; ++} ++ ++static int e1000_get_regs_len(struct net_device *netdev) ++{ ++#define E1000_REGS_LEN 32 ++ return E1000_REGS_LEN * sizeof(u32); ++} ++ ++static void e1000_get_regs(struct net_device *netdev, ++ struct ethtool_regs *regs, void *p) ++{ ++ struct e1000_adapter *adapter = netdev_priv(netdev); ++ struct e1000_hw *hw = &adapter->hw; ++ u32 *regs_buff = p; ++ u16 phy_data; ++ ++ memset(p, 0, E1000_REGS_LEN * sizeof(u32)); ++ ++ regs->version = (1 << 24) | (hw->revision_id << 16) | hw->device_id; ++ ++ regs_buff[0] = E1000_READ_REG(hw, E1000_CTRL); ++ regs_buff[1] = E1000_READ_REG(hw, E1000_STATUS); ++ ++ regs_buff[2] = E1000_READ_REG(hw, E1000_RCTL); ++ regs_buff[3] = E1000_READ_REG(hw, E1000_RDLEN(0)); ++ regs_buff[4] = E1000_READ_REG(hw, E1000_RDH(0)); ++ regs_buff[5] = E1000_READ_REG(hw, E1000_RDT(0)); ++ regs_buff[6] = E1000_READ_REG(hw, E1000_RDTR); ++ ++ regs_buff[7] = E1000_READ_REG(hw, E1000_TCTL); ++ regs_buff[8] = E1000_READ_REG(hw, E1000_TDLEN(0)); ++ regs_buff[9] = E1000_READ_REG(hw, E1000_TDH(0)); ++ regs_buff[10] = E1000_READ_REG(hw, E1000_TDT(0)); ++ regs_buff[11] = E1000_READ_REG(hw, E1000_TIDV); ++ ++ regs_buff[12] = adapter->hw.phy.type; /* PHY type (IGP=1, M88=0) */ ++ if (hw->phy.type == e1000_phy_igp) { ++ e1000_write_phy_reg(hw, IGP01E1000_PHY_PAGE_SELECT, ++ IGP01E1000_PHY_AGC_A); ++ e1000_read_phy_reg(hw, IGP01E1000_PHY_AGC_A & ++ IGP01E1000_PHY_PAGE_SELECT, &phy_data); ++ regs_buff[13] = (u32)phy_data; /* cable length */ ++ e1000_write_phy_reg(hw, IGP01E1000_PHY_PAGE_SELECT, ++ IGP01E1000_PHY_AGC_B); ++ e1000_read_phy_reg(hw, IGP01E1000_PHY_AGC_B & ++ IGP01E1000_PHY_PAGE_SELECT, &phy_data); ++ regs_buff[14] = (u32)phy_data; /* cable length */ ++ e1000_write_phy_reg(hw, IGP01E1000_PHY_PAGE_SELECT, ++ IGP01E1000_PHY_AGC_C); ++ e1000_read_phy_reg(hw, IGP01E1000_PHY_AGC_C & ++ IGP01E1000_PHY_PAGE_SELECT, &phy_data); ++ regs_buff[15] = (u32)phy_data; /* cable length */ ++ e1000_write_phy_reg(hw, IGP01E1000_PHY_PAGE_SELECT, ++ IGP01E1000_PHY_AGC_D); ++ e1000_read_phy_reg(hw, IGP01E1000_PHY_AGC_D & ++ IGP01E1000_PHY_PAGE_SELECT, &phy_data); ++ regs_buff[16] = (u32)phy_data; /* cable length */ ++ regs_buff[17] = 0; /* extended 10bt distance (not needed) */ ++ e1000_write_phy_reg(hw, IGP01E1000_PHY_PAGE_SELECT, 0x0); ++ e1000_read_phy_reg(hw, IGP01E1000_PHY_PORT_STATUS & ++ IGP01E1000_PHY_PAGE_SELECT, &phy_data); ++ regs_buff[18] = (u32)phy_data; /* cable polarity */ ++ e1000_write_phy_reg(hw, IGP01E1000_PHY_PAGE_SELECT, ++ IGP01E1000_PHY_PCS_INIT_REG); ++ e1000_read_phy_reg(hw, IGP01E1000_PHY_PCS_INIT_REG & ++ IGP01E1000_PHY_PAGE_SELECT, &phy_data); ++ regs_buff[19] = (u32)phy_data; /* cable polarity */ ++ regs_buff[20] = 0; /* polarity correction enabled (always) */ ++ regs_buff[22] = 0; /* phy receive errors (unavailable) */ ++ regs_buff[23] = regs_buff[18]; /* mdix mode */ ++ e1000_write_phy_reg(hw, IGP01E1000_PHY_PAGE_SELECT, 0x0); ++ } else { ++ e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_STATUS, &phy_data); ++ regs_buff[13] = (u32)phy_data; /* cable length */ ++ regs_buff[14] = 0; /* Dummy (to align w/ IGP phy reg dump) */ ++ regs_buff[15] = 0; /* Dummy (to align w/ IGP phy reg dump) */ ++ regs_buff[16] = 0; /* Dummy (to align w/ IGP phy reg dump) */ ++ e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data); ++ regs_buff[17] = (u32)phy_data; /* extended 10bt distance */ ++ regs_buff[18] = regs_buff[13]; /* cable polarity */ ++ regs_buff[19] = 0; /* Dummy (to align w/ IGP phy reg dump) */ ++ regs_buff[20] = regs_buff[17]; /* polarity correction */ ++ /* phy receive errors */ ++ regs_buff[22] = adapter->phy_stats.receive_errors; ++ regs_buff[23] = regs_buff[13]; /* mdix mode */ ++ } ++ regs_buff[21] = adapter->phy_stats.idle_errors; /* phy idle errors */ ++ e1000_read_phy_reg(hw, PHY_1000T_STATUS, &phy_data); ++ regs_buff[24] = (u32)phy_data; /* phy local receiver status */ ++ regs_buff[25] = regs_buff[24]; /* phy remote receiver status */ ++ if (hw->mac.type >= e1000_82540 && ++ hw->mac.type < e1000_82571 && ++ hw->phy.media_type == e1000_media_type_copper) { ++ regs_buff[26] = E1000_READ_REG(hw, E1000_MANC); ++ } ++} ++ ++static int e1000_get_eeprom_len(struct net_device *netdev) ++{ ++ struct e1000_adapter *adapter = netdev_priv(netdev); ++ return adapter->hw.nvm.word_size * 2; ++} ++ ++static int e1000_get_eeprom(struct net_device *netdev, ++ struct ethtool_eeprom *eeprom, u8 *bytes) ++{ ++ struct e1000_adapter *adapter = netdev_priv(netdev); ++ struct e1000_hw *hw = &adapter->hw; ++ u16 *eeprom_buff; ++ int first_word, last_word; ++ int ret_val = 0; ++ u16 i; ++ ++ if (eeprom->len == 0) ++ return -EINVAL; ++ ++ eeprom->magic = hw->vendor_id | (hw->device_id << 16); ++ ++ first_word = eeprom->offset >> 1; ++ last_word = (eeprom->offset + eeprom->len - 1) >> 1; ++ ++ eeprom_buff = kmalloc(sizeof(u16) * ++ (last_word - first_word + 1), GFP_KERNEL); ++ if (!eeprom_buff) ++ return -ENOMEM; ++ ++ if (hw->nvm.type == e1000_nvm_eeprom_spi) ++ ret_val = e1000_read_nvm(hw, first_word, ++ last_word - first_word + 1, ++ eeprom_buff); ++ else { ++ for (i = 0; i < last_word - first_word + 1; i++) ++ if ((ret_val = e1000_read_nvm(hw, first_word + i, 1, ++ &eeprom_buff[i]))) ++ break; ++ } ++ ++ /* Device's eeprom is always little-endian, word addressable */ ++ for (i = 0; i < last_word - first_word + 1; i++) ++ le16_to_cpus(&eeprom_buff[i]); ++ ++ memcpy(bytes, (u8 *)eeprom_buff + (eeprom->offset & 1), ++ eeprom->len); ++ kfree(eeprom_buff); ++ ++ return ret_val; ++} ++ ++static int e1000_set_eeprom(struct net_device *netdev, ++ struct ethtool_eeprom *eeprom, u8 *bytes) ++{ ++ struct e1000_adapter *adapter = netdev_priv(netdev); ++ struct e1000_hw *hw = &adapter->hw; ++ u16 *eeprom_buff; ++ void *ptr; ++ int max_len, first_word, last_word, ret_val = 0; ++ u16 i; ++ ++ if (eeprom->len == 0) ++ return -EOPNOTSUPP; ++ ++ if (eeprom->magic != (hw->vendor_id | (hw->device_id << 16))) ++ return -EFAULT; ++ ++ max_len = hw->nvm.word_size * 2; ++ ++ first_word = eeprom->offset >> 1; ++ last_word = (eeprom->offset + eeprom->len - 1) >> 1; ++ eeprom_buff = kmalloc(max_len, GFP_KERNEL); ++ if (!eeprom_buff) ++ return -ENOMEM; ++ ++ ptr = (void *)eeprom_buff; ++ ++ if (eeprom->offset & 1) { ++ /* need read/modify/write of first changed EEPROM word */ ++ /* only the second byte of the word is being modified */ ++ ret_val = e1000_read_nvm(hw, first_word, 1, ++ &eeprom_buff[0]); ++ ptr++; ++ } ++ if (((eeprom->offset + eeprom->len) & 1) && (ret_val == 0)) { ++ /* need read/modify/write of last changed EEPROM word */ ++ /* only the first byte of the word is being modified */ ++ ret_val = e1000_read_nvm(hw, last_word, 1, ++ &eeprom_buff[last_word - first_word]); ++ } ++ ++ /* Device's eeprom is always little-endian, word addressable */ ++ for (i = 0; i < last_word - first_word + 1; i++) ++ le16_to_cpus(&eeprom_buff[i]); ++ ++ memcpy(ptr, bytes, eeprom->len); ++ ++ for (i = 0; i < last_word - first_word + 1; i++) ++ eeprom_buff[i] = cpu_to_le16(eeprom_buff[i]); ++ ++ ret_val = e1000_write_nvm(hw, first_word, ++ last_word - first_word + 1, eeprom_buff); ++ ++ /* Update the checksum over the first part of the EEPROM if needed ++ * and flush shadow RAM for 82573 controllers */ ++ if ((ret_val == 0) && ((first_word <= NVM_CHECKSUM_REG) || ++ (hw->mac.type == e1000_82573))) ++ e1000_update_nvm_checksum(hw); ++ ++ kfree(eeprom_buff); ++ return ret_val; ++} ++ ++static void e1000_get_drvinfo(struct net_device *netdev, ++ struct ethtool_drvinfo *drvinfo) ++{ ++ struct e1000_adapter *adapter = netdev_priv(netdev); ++ char firmware_version[32]; ++ u16 eeprom_data; ++ ++ strncpy(drvinfo->driver, e1000_driver_name, 32); ++ strncpy(drvinfo->version, e1000_driver_version, 32); ++ ++ /* EEPROM image version # is reported as firmware version # for ++ * 8257{1|2|3} controllers */ ++ e1000_read_nvm(&adapter->hw, 5, 1, &eeprom_data); ++ switch (adapter->hw.mac.type) { ++ case e1000_82571: ++ case e1000_82572: ++ case e1000_82573: ++ case e1000_80003es2lan: ++ case e1000_ich8lan: ++ case e1000_ich9lan: ++ sprintf(firmware_version, "%d.%d-%d", ++ (eeprom_data & 0xF000) >> 12, ++ (eeprom_data & 0x0FF0) >> 4, ++ eeprom_data & 0x000F); ++ break; ++ default: ++ sprintf(firmware_version, "N/A"); ++ } ++ ++ strncpy(drvinfo->fw_version, firmware_version, 32); ++ strncpy(drvinfo->bus_info, pci_name(adapter->pdev), 32); ++ drvinfo->n_stats = E1000_STATS_LEN; ++ drvinfo->testinfo_len = E1000_TEST_LEN; ++ drvinfo->regdump_len = e1000_get_regs_len(netdev); ++ drvinfo->eedump_len = e1000_get_eeprom_len(netdev); ++} ++ ++static void e1000_get_ringparam(struct net_device *netdev, ++ struct ethtool_ringparam *ring) ++{ ++ struct e1000_adapter *adapter = netdev_priv(netdev); ++ e1000_mac_type mac_type = adapter->hw.mac.type; ++ struct e1000_tx_ring *tx_ring = adapter->tx_ring; ++ struct e1000_rx_ring *rx_ring = adapter->rx_ring; ++ ++ ring->rx_max_pending = (mac_type < e1000_82544) ? E1000_MAX_RXD : ++ E1000_MAX_82544_RXD; ++ ring->tx_max_pending = (mac_type < e1000_82544) ? E1000_MAX_TXD : ++ E1000_MAX_82544_TXD; ++ ring->rx_mini_max_pending = 0; ++ ring->rx_jumbo_max_pending = 0; ++ ring->rx_pending = rx_ring->count; ++ ring->tx_pending = tx_ring->count; ++ ring->rx_mini_pending = 0; ++ ring->rx_jumbo_pending = 0; ++} ++ ++static int e1000_set_ringparam(struct net_device *netdev, ++ struct ethtool_ringparam *ring) ++{ ++ struct e1000_adapter *adapter = netdev_priv(netdev); ++ e1000_mac_type mac_type = adapter->hw.mac.type; ++ struct e1000_tx_ring *tx_ring, *tx_old; ++ struct e1000_rx_ring *rx_ring, *rx_old; ++ int i, err, tx_ring_size, rx_ring_size; ++ ++ if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending)) ++ return -EINVAL; ++ ++ tx_ring_size = sizeof(struct e1000_tx_ring) * adapter->num_tx_queues; ++ rx_ring_size = sizeof(struct e1000_rx_ring) * adapter->num_rx_queues; ++ ++ while (test_and_set_bit(__E1000_RESETTING, &adapter->state)) ++ msleep(1); ++ ++ if (netif_running(adapter->netdev)) ++ e1000_down(adapter); ++ ++ tx_old = adapter->tx_ring; ++ rx_old = adapter->rx_ring; ++ ++ err = -ENOMEM; ++ tx_ring = kzalloc(tx_ring_size, GFP_KERNEL); ++ if (!tx_ring) ++ goto err_alloc_tx; ++ /* use a memcpy to save any previously configured ++ * items like napi structs from having to be ++ * reinitialized */ ++ memcpy(tx_ring, tx_old, tx_ring_size); ++ ++ rx_ring = kzalloc(rx_ring_size, GFP_KERNEL); ++ if (!rx_ring) ++ goto err_alloc_rx; ++ memcpy(rx_ring, rx_old, rx_ring_size); ++ ++ adapter->tx_ring = tx_ring; ++ adapter->rx_ring = rx_ring; ++ ++ rx_ring->count = max(ring->rx_pending,(u32)E1000_MIN_RXD); ++ rx_ring->count = min(rx_ring->count,(u32)(mac_type < e1000_82544 ? ++ E1000_MAX_RXD : E1000_MAX_82544_RXD)); ++ rx_ring->count = ALIGN(rx_ring->count, REQ_RX_DESCRIPTOR_MULTIPLE); ++ ++ tx_ring->count = max(ring->tx_pending,(u32)E1000_MIN_TXD); ++ tx_ring->count = min(tx_ring->count,(u32)(mac_type < e1000_82544 ? ++ E1000_MAX_TXD : E1000_MAX_82544_TXD)); ++ tx_ring->count = ALIGN(tx_ring->count, REQ_TX_DESCRIPTOR_MULTIPLE); ++ ++ /* overwrite the counts with the new values */ ++ for (i = 0; i < adapter->num_tx_queues; i++) ++ tx_ring[i].count = tx_ring->count; ++ ++ for (i = 0; i < adapter->num_rx_queues; i++) ++ rx_ring[i].count = rx_ring->count; ++ ++ if (netif_running(adapter->netdev)) { ++ /* Try to get new resources before deleting old */ ++ if ((err = e1000_setup_all_rx_resources(adapter))) ++ goto err_setup_rx; ++ if ((err = e1000_setup_all_tx_resources(adapter))) ++ goto err_setup_tx; ++ ++ /* restore the old in order to free it, ++ * then add in the new */ ++ adapter->rx_ring = rx_old; ++ adapter->tx_ring = tx_old; ++ e1000_free_all_rx_resources(adapter); ++ e1000_free_all_tx_resources(adapter); ++ kfree(tx_old); ++ kfree(rx_old); ++ adapter->rx_ring = rx_ring; ++ adapter->tx_ring = tx_ring; ++ if ((err = e1000_up(adapter))) ++ goto err_setup; ++ } ++ ++ clear_bit(__E1000_RESETTING, &adapter->state); ++ return 0; ++err_setup_tx: ++ e1000_free_all_rx_resources(adapter); ++err_setup_rx: ++ adapter->rx_ring = rx_old; ++ adapter->tx_ring = tx_old; ++ kfree(rx_ring); ++err_alloc_rx: ++ kfree(tx_ring); ++err_alloc_tx: ++ e1000_up(adapter); ++err_setup: ++ clear_bit(__E1000_RESETTING, &adapter->state); ++ return err; ++} ++ ++static bool reg_pattern_test(struct e1000_adapter *adapter, u64 *data, ++ int reg, int offset, u32 mask, u32 write) ++{ \ ++ u32 pat, val; ++ static const u32 test[] = ++ {0x5A5A5A5A, 0xA5A5A5A5, 0x00000000, 0xFFFFFFFF}; ++ for (pat = 0; pat < ARRAY_SIZE(test); pat++) { ++ E1000_WRITE_REG_ARRAY(&adapter->hw, reg, offset, ++ (test[pat] & write)); ++ val = E1000_READ_REG_ARRAY(&adapter->hw, reg, offset); ++ if (val != (test[pat] & write & mask)) { ++ DPRINTK(DRV, ERR, "pattern test reg %04X failed: got " ++ "0x%08X expected 0x%08X\n", ++ E1000_REGISTER(&adapter->hw, reg) + offset, ++ val, (test[pat] & write & mask)); ++ *data = E1000_REGISTER(&adapter->hw, reg); ++ return 1; ++ } ++ } ++ return 0; ++} ++ ++static bool reg_set_and_check(struct e1000_adapter *adapter, u64 *data, ++ int reg, u32 mask, u32 write) ++{ ++ u32 val; ++ E1000_WRITE_REG(&adapter->hw, reg, write & mask); ++ val = E1000_READ_REG(&adapter->hw, reg); ++ if ((write & mask) != (val & mask)) { ++ DPRINTK(DRV, ERR, "set/check reg %04X test failed: got 0x%08X" ++ "expected 0x%08X\n", reg, (val & mask), (write & mask)); ++ *data = E1000_REGISTER(&adapter->hw, reg); ++ return 1; ++ } ++ return 0; ++} ++#define REG_PATTERN_TEST_ARRAY(reg, offset, mask, write) \ ++ do { \ ++ if (reg_pattern_test(adapter, data, reg, offset, mask, write)) \ ++ return 1; \ ++ } while (0) ++#define REG_PATTERN_TEST(reg, mask, write) \ ++ REG_PATTERN_TEST_ARRAY(reg, 0, mask, write) ++ ++#define REG_SET_AND_CHECK(reg, mask, write) \ ++ do { \ ++ if (reg_set_and_check(adapter, data, reg, mask, write)) \ ++ return 1; \ ++ } while (0) ++ ++static int e1000_reg_test(struct e1000_adapter *adapter, u64 *data) ++{ ++ struct e1000_mac_info *mac = &adapter->hw.mac; ++ u32 value, before, after; ++ u32 i, toggle; ++ ++ /* The status register is Read Only, so a write should fail. ++ * Some bits that get toggled are ignored. ++ */ ++ switch (mac->type) { ++ /* there are several bits on newer hardware that are r/w */ ++ case e1000_82571: ++ case e1000_82572: ++ case e1000_80003es2lan: ++ toggle = 0x7FFFF3FF; ++ break; ++ case e1000_82573: ++ case e1000_ich8lan: ++ case e1000_ich9lan: ++ toggle = 0x7FFFF033; ++ break; ++ default: ++ toggle = 0xFFFFF833; ++ break; ++ } ++ ++ before = E1000_READ_REG(&adapter->hw, E1000_STATUS); ++ value = (E1000_READ_REG(&adapter->hw, E1000_STATUS) & toggle); ++ E1000_WRITE_REG(&adapter->hw, E1000_STATUS, toggle); ++ after = E1000_READ_REG(&adapter->hw, E1000_STATUS) & toggle; ++ if (value != after) { ++ DPRINTK(DRV, ERR, "failed STATUS register test got: " ++ "0x%08X expected: 0x%08X\n", after, value); ++ *data = 1; ++ return 1; ++ } ++ /* restore previous status */ ++ E1000_WRITE_REG(&adapter->hw, E1000_STATUS, before); ++ ++ if ((mac->type != e1000_ich8lan) && ++ (mac->type != e1000_ich9lan)) { ++ REG_PATTERN_TEST(E1000_FCAL, 0xFFFFFFFF, 0xFFFFFFFF); ++ REG_PATTERN_TEST(E1000_FCAH, 0x0000FFFF, 0xFFFFFFFF); ++ REG_PATTERN_TEST(E1000_FCT, 0x0000FFFF, 0xFFFFFFFF); ++ REG_PATTERN_TEST(E1000_VET, 0x0000FFFF, 0xFFFFFFFF); ++ } ++ ++ REG_PATTERN_TEST(E1000_RDTR, 0x0000FFFF, 0xFFFFFFFF); ++ REG_PATTERN_TEST(E1000_RDBAH(0), 0xFFFFFFFF, 0xFFFFFFFF); ++ REG_PATTERN_TEST(E1000_RDLEN(0), 0x000FFF80, 0x000FFFFF); ++ REG_PATTERN_TEST(E1000_RDH(0), 0x0000FFFF, 0x0000FFFF); ++ REG_PATTERN_TEST(E1000_RDT(0), 0x0000FFFF, 0x0000FFFF); ++ REG_PATTERN_TEST(E1000_FCRTH, 0x0000FFF8, 0x0000FFF8); ++ REG_PATTERN_TEST(E1000_FCTTV, 0x0000FFFF, 0x0000FFFF); ++ REG_PATTERN_TEST(E1000_TIPG, 0x3FFFFFFF, 0x3FFFFFFF); ++ REG_PATTERN_TEST(E1000_TDBAH(0), 0xFFFFFFFF, 0xFFFFFFFF); ++ REG_PATTERN_TEST(E1000_TDLEN(0), 0x000FFF80, 0x000FFFFF); ++ ++ REG_SET_AND_CHECK(E1000_RCTL, 0xFFFFFFFF, 0x00000000); ++ ++ before = (((mac->type == e1000_ich8lan) || ++ (mac->type == e1000_ich9lan)) ? 0x06C3B33E : 0x06DFB3FE); ++ REG_SET_AND_CHECK(E1000_RCTL, before, 0x003FFFFB); ++ REG_SET_AND_CHECK(E1000_TCTL, 0xFFFFFFFF, 0x00000000); ++ ++ if (mac->type >= e1000_82543) { ++ ++ REG_SET_AND_CHECK(E1000_RCTL, before, 0xFFFFFFFF); ++ REG_PATTERN_TEST(E1000_RDBAL(0), 0xFFFFFFF0, 0xFFFFFFFF); ++ if ((mac->type != e1000_ich8lan) && ++ (mac->type != e1000_ich9lan)) ++ REG_PATTERN_TEST(E1000_TXCW, 0xC000FFFF, 0x0000FFFF); ++ REG_PATTERN_TEST(E1000_TDBAL(0), 0xFFFFFFF0, 0xFFFFFFFF); ++ REG_PATTERN_TEST(E1000_TIDV, 0x0000FFFF, 0x0000FFFF); ++ for (i = 0; i < mac->rar_entry_count; i++) { ++ REG_PATTERN_TEST_ARRAY(E1000_RA, ((i << 1) + 1), ++ 0x8003FFFF, 0xFFFFFFFF); ++ } ++ ++ } else { ++ ++ REG_SET_AND_CHECK(E1000_RCTL, 0xFFFFFFFF, 0x01FFFFFF); ++ REG_PATTERN_TEST(E1000_RDBAL(0), 0xFFFFF000, 0xFFFFFFFF); ++ REG_PATTERN_TEST(E1000_TXCW, 0x0000FFFF, 0x0000FFFF); ++ REG_PATTERN_TEST(E1000_TDBAL(0), 0xFFFFF000, 0xFFFFFFFF); ++ ++ } ++ ++ for (i = 0; i < mac->mta_reg_count; i++) ++ REG_PATTERN_TEST_ARRAY(E1000_MTA, i, 0xFFFFFFFF, 0xFFFFFFFF); ++ ++ *data = 0; ++ return 0; ++} ++ ++static int e1000_eeprom_test(struct e1000_adapter *adapter, u64 *data) ++{ ++ u16 temp; ++ u16 checksum = 0; ++ u16 i; ++ ++ *data = 0; ++ /* Read and add up the contents of the EEPROM */ ++ for (i = 0; i < (NVM_CHECKSUM_REG + 1); i++) { ++ if ((e1000_read_nvm(&adapter->hw, i, 1, &temp)) < 0) { ++ *data = 1; ++ break; ++ } ++ checksum += temp; ++ } ++ ++ /* If Checksum is not Correct return error else test passed */ ++ if ((checksum != (u16) NVM_SUM) && !(*data)) ++ *data = 2; ++ ++ return *data; ++} ++ ++static irqreturn_t e1000_test_intr(int irq, void *data) ++{ ++ struct net_device *netdev = (struct net_device *) data; ++ struct e1000_adapter *adapter = netdev_priv(netdev); ++ ++ adapter->test_icr |= E1000_READ_REG(&adapter->hw, E1000_ICR); ++ ++ return IRQ_HANDLED; ++} ++ ++static int e1000_intr_test(struct e1000_adapter *adapter, u64 *data) ++{ ++ struct net_device *netdev = adapter->netdev; ++ u32 mask, i=0, shared_int = TRUE; ++ u32 irq = adapter->pdev->irq; ++ ++ *data = 0; ++ ++ /* NOTE: we don't test MSI interrupts here, yet */ ++ /* Hook up test interrupt handler just for this test */ ++ if (!request_irq(irq, &e1000_test_intr, IRQF_PROBE_SHARED, netdev->name, ++ netdev)) ++ shared_int = FALSE; ++ else if (request_irq(irq, &e1000_test_intr, IRQF_SHARED, ++ netdev->name, netdev)) { ++ *data = 1; ++ return -1; ++ } ++ DPRINTK(HW, INFO, "testing %s interrupt\n", ++ (shared_int ? "shared" : "unshared")); ++ ++ /* Disable all the interrupts */ ++ E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xFFFFFFFF); ++ msleep(10); ++ ++ /* Test each interrupt */ ++ for (; i < 10; i++) { ++ ++ if (((adapter->hw.mac.type == e1000_ich8lan) || ++ (adapter->hw.mac.type == e1000_ich9lan)) && i == 8) ++ continue; ++ ++ /* Interrupt to test */ ++ mask = 1 << i; ++ ++ if (!shared_int) { ++ /* Disable the interrupt to be reported in ++ * the cause register and then force the same ++ * interrupt and see if one gets posted. If ++ * an interrupt was posted to the bus, the ++ * test failed. ++ */ ++ adapter->test_icr = 0; ++ E1000_WRITE_REG(&adapter->hw, E1000_IMC, mask); ++ E1000_WRITE_REG(&adapter->hw, E1000_ICS, mask); ++ msleep(10); ++ ++ if (adapter->test_icr & mask) { ++ *data = 3; ++ break; ++ } ++ } ++ ++ /* Enable the interrupt to be reported in ++ * the cause register and then force the same ++ * interrupt and see if one gets posted. If ++ * an interrupt was not posted to the bus, the ++ * test failed. ++ */ ++ adapter->test_icr = 0; ++ E1000_WRITE_REG(&adapter->hw, E1000_IMS, mask); ++ E1000_WRITE_REG(&adapter->hw, E1000_ICS, mask); ++ msleep(10); ++ ++ if (!(adapter->test_icr & mask)) { ++ *data = 4; ++ break; ++ } ++ ++ if (!shared_int) { ++ /* Disable the other interrupts to be reported in ++ * the cause register and then force the other ++ * interrupts and see if any get posted. If ++ * an interrupt was posted to the bus, the ++ * test failed. ++ */ ++ adapter->test_icr = 0; ++ E1000_WRITE_REG(&adapter->hw, E1000_IMC, ++ ~mask & 0x00007FFF); ++ E1000_WRITE_REG(&adapter->hw, E1000_ICS, ++ ~mask & 0x00007FFF); ++ msleep(10); ++ ++ if (adapter->test_icr) { ++ *data = 5; ++ break; ++ } ++ } ++ } ++ ++ /* Disable all the interrupts */ ++ E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xFFFFFFFF); ++ msleep(10); ++ ++ /* Unhook test interrupt handler */ ++ free_irq(irq, netdev); ++ ++ return *data; ++} ++ ++static void e1000_free_desc_rings(struct e1000_adapter *adapter) ++{ ++ struct e1000_tx_ring *tx_ring = &adapter->test_tx_ring; ++ struct e1000_rx_ring *rx_ring = &adapter->test_rx_ring; ++ struct pci_dev *pdev = adapter->pdev; ++ int i; ++ ++ if (tx_ring->desc && tx_ring->buffer_info) { ++ for (i = 0; i < tx_ring->count; i++) { ++ if (tx_ring->buffer_info[i].dma) ++ pci_unmap_single(pdev, tx_ring->buffer_info[i].dma, ++ tx_ring->buffer_info[i].length, ++ PCI_DMA_TODEVICE); ++ if (tx_ring->buffer_info[i].skb) ++ dev_kfree_skb(tx_ring->buffer_info[i].skb); ++ } ++ } ++ ++ if (rx_ring->desc && rx_ring->buffer_info) { ++ for (i = 0; i < rx_ring->count; i++) { ++ if (rx_ring->buffer_info[i].dma) ++ pci_unmap_single(pdev, rx_ring->buffer_info[i].dma, ++ E1000_RXBUFFER_2048, ++ PCI_DMA_FROMDEVICE); ++ if (rx_ring->buffer_info[i].skb) ++ dev_kfree_skb(rx_ring->buffer_info[i].skb); ++ } ++ } ++ ++ if (tx_ring->desc) { ++ pci_free_consistent(pdev, tx_ring->size, tx_ring->desc, tx_ring->dma); ++ tx_ring->desc = NULL; ++ } ++ if (rx_ring->desc) { ++ pci_free_consistent(pdev, rx_ring->size, rx_ring->desc, rx_ring->dma); ++ rx_ring->desc = NULL; ++ } ++ ++ kfree(tx_ring->buffer_info); ++ tx_ring->buffer_info = NULL; ++ kfree(rx_ring->buffer_info); ++ rx_ring->buffer_info = NULL; ++ ++ return; ++} ++ ++static int e1000_setup_desc_rings(struct e1000_adapter *adapter) ++{ ++ struct e1000_tx_ring *tx_ring = &adapter->test_tx_ring; ++ struct e1000_rx_ring *rx_ring = &adapter->test_rx_ring; ++ struct pci_dev *pdev = adapter->pdev; ++ u32 rctl; ++ int i, ret_val; ++ ++ /* Setup Tx descriptor ring and Tx buffers */ ++ ++ if (!tx_ring->count) ++ tx_ring->count = E1000_DEFAULT_TXD; ++ ++ if (!(tx_ring->buffer_info = kcalloc(tx_ring->count, ++ sizeof(struct e1000_buffer), ++ GFP_KERNEL))) { ++ ret_val = 1; ++ goto err_nomem; ++ } ++ ++ tx_ring->size = tx_ring->count * sizeof(struct e1000_tx_desc); ++ tx_ring->size = ALIGN(tx_ring->size, 4096); ++ if (!(tx_ring->desc = pci_alloc_consistent(pdev, tx_ring->size, ++ &tx_ring->dma))) { ++ ret_val = 2; ++ goto err_nomem; ++ } ++ tx_ring->next_to_use = tx_ring->next_to_clean = 0; ++ ++ E1000_WRITE_REG(&adapter->hw, E1000_TDBAL(0), ++ ((u64) tx_ring->dma & 0x00000000FFFFFFFF)); ++ E1000_WRITE_REG(&adapter->hw, E1000_TDBAH(0), ((u64) tx_ring->dma >> 32)); ++ E1000_WRITE_REG(&adapter->hw, E1000_TDLEN(0), ++ tx_ring->count * sizeof(struct e1000_tx_desc)); ++ E1000_WRITE_REG(&adapter->hw, E1000_TDH(0), 0); ++ E1000_WRITE_REG(&adapter->hw, E1000_TDT(0), 0); ++ E1000_WRITE_REG(&adapter->hw, E1000_TCTL, ++ E1000_TCTL_MULR | ++ E1000_TCTL_PSP | E1000_TCTL_EN | ++ E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT | ++ E1000_COLLISION_DISTANCE << E1000_COLD_SHIFT); ++ ++ for (i = 0; i < tx_ring->count; i++) { ++ struct e1000_tx_desc *tx_desc = E1000_TX_DESC(*tx_ring, i); ++ struct sk_buff *skb; ++ unsigned int size = 1024; ++ ++ if (!(skb = alloc_skb(size, GFP_KERNEL))) { ++ ret_val = 3; ++ goto err_nomem; ++ } ++ skb_put(skb, size); ++ tx_ring->buffer_info[i].skb = skb; ++ tx_ring->buffer_info[i].length = skb->len; ++ tx_ring->buffer_info[i].dma = ++ pci_map_single(pdev, skb->data, skb->len, ++ PCI_DMA_TODEVICE); ++ tx_desc->buffer_addr = cpu_to_le64(tx_ring->buffer_info[i].dma); ++ tx_desc->lower.data = cpu_to_le32(skb->len); ++ tx_desc->lower.data |= cpu_to_le32(E1000_TXD_CMD_EOP | ++ E1000_TXD_CMD_IFCS); ++ if (adapter->hw.mac.type < e1000_82543) ++ tx_desc->lower.data |= E1000_TXD_CMD_RPS; ++ else ++ tx_desc->lower.data |= E1000_TXD_CMD_RS; ++ ++ tx_desc->upper.data = 0; ++ } ++ ++ /* Setup Rx descriptor ring and Rx buffers */ ++ ++ if (!rx_ring->count) ++ rx_ring->count = E1000_DEFAULT_RXD; ++ ++ if (!(rx_ring->buffer_info = kcalloc(rx_ring->count, ++ sizeof(struct e1000_rx_buffer), ++ GFP_KERNEL))) { ++ ret_val = 4; ++ goto err_nomem; ++ } ++ ++ rx_ring->size = rx_ring->count * sizeof(struct e1000_rx_desc); ++ if (!(rx_ring->desc = pci_alloc_consistent(pdev, rx_ring->size, ++ &rx_ring->dma))) { ++ ret_val = 5; ++ goto err_nomem; ++ } ++ rx_ring->next_to_use = rx_ring->next_to_clean = 0; ++ ++ rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); ++ E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); ++ E1000_WRITE_REG(&adapter->hw, E1000_RDBAL(0), ++ ((u64) rx_ring->dma & 0xFFFFFFFF)); ++ E1000_WRITE_REG(&adapter->hw, E1000_RDBAH(0), ((u64) rx_ring->dma >> 32)); ++ E1000_WRITE_REG(&adapter->hw, E1000_RDLEN(0), rx_ring->size); ++ E1000_WRITE_REG(&adapter->hw, E1000_RDH(0), 0); ++ E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), 0); ++ rctl = E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_SZ_2048 | ++ E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF | ++ (adapter->hw.mac.mc_filter_type << E1000_RCTL_MO_SHIFT); ++ E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl); ++ ++ for (i = 0; i < rx_ring->count; i++) { ++ struct e1000_rx_desc *rx_desc = E1000_RX_DESC(*rx_ring, i); ++ struct sk_buff *skb; ++ ++ if (!(skb = alloc_skb(E1000_RXBUFFER_2048 + NET_IP_ALIGN, ++ GFP_KERNEL))) { ++ ret_val = 6; ++ goto err_nomem; ++ } ++ skb_reserve(skb, NET_IP_ALIGN); ++ rx_ring->buffer_info[i].skb = skb; ++ rx_ring->buffer_info[i].dma = ++ pci_map_single(pdev, skb->data, E1000_RXBUFFER_2048, ++ PCI_DMA_FROMDEVICE); ++ rx_desc->buffer_addr = cpu_to_le64(rx_ring->buffer_info[i].dma); ++ memset(skb->data, 0x00, skb->len); ++ } ++ ++ return 0; ++ ++err_nomem: ++ e1000_free_desc_rings(adapter); ++ return ret_val; ++} ++ ++static void e1000_phy_disable_receiver(struct e1000_adapter *adapter) ++{ ++ /* Write out to PHY registers 29 and 30 to disable the Receiver. */ ++ e1000_write_phy_reg(&adapter->hw, 29, 0x001F); ++ e1000_write_phy_reg(&adapter->hw, 30, 0x8FFC); ++ e1000_write_phy_reg(&adapter->hw, 29, 0x001A); ++ e1000_write_phy_reg(&adapter->hw, 30, 0x8FF0); ++} ++ ++static void e1000_phy_reset_clk_and_crs(struct e1000_adapter *adapter) ++{ ++ u16 phy_reg; ++ ++ /* Because we reset the PHY above, we need to re-force TX_CLK in the ++ * Extended PHY Specific Control Register to 25MHz clock. This ++ * value defaults back to a 2.5MHz clock when the PHY is reset. ++ */ ++ e1000_read_phy_reg(&adapter->hw, M88E1000_EXT_PHY_SPEC_CTRL, &phy_reg); ++ phy_reg |= M88E1000_EPSCR_TX_CLK_25; ++ e1000_write_phy_reg(&adapter->hw, ++ M88E1000_EXT_PHY_SPEC_CTRL, phy_reg); ++ ++ /* In addition, because of the s/w reset above, we need to enable ++ * CRS on TX. This must be set for both full and half duplex ++ * operation. ++ */ ++ e1000_read_phy_reg(&adapter->hw, M88E1000_PHY_SPEC_CTRL, &phy_reg); ++ phy_reg |= M88E1000_PSCR_ASSERT_CRS_ON_TX; ++ e1000_write_phy_reg(&adapter->hw, ++ M88E1000_PHY_SPEC_CTRL, phy_reg); ++} ++ ++static int e1000_nonintegrated_phy_loopback(struct e1000_adapter *adapter) ++{ ++ u32 ctrl_reg; ++ u16 phy_reg; ++ ++ /* Setup the Device Control Register for PHY loopback test. */ ++ ++ ctrl_reg = E1000_READ_REG(&adapter->hw, E1000_CTRL); ++ ctrl_reg |= (E1000_CTRL_ILOS | /* Invert Loss-Of-Signal */ ++ E1000_CTRL_FRCSPD | /* Set the Force Speed Bit */ ++ E1000_CTRL_FRCDPX | /* Set the Force Duplex Bit */ ++ E1000_CTRL_SPD_1000 | /* Force Speed to 1000 */ ++ E1000_CTRL_FD); /* Force Duplex to FULL */ ++ ++ E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl_reg); ++ ++ /* Read the PHY Specific Control Register (0x10) */ ++ e1000_read_phy_reg(&adapter->hw, M88E1000_PHY_SPEC_CTRL, &phy_reg); ++ ++ /* Clear Auto-Crossover bits in PHY Specific Control Register ++ * (bits 6:5). ++ */ ++ phy_reg &= ~M88E1000_PSCR_AUTO_X_MODE; ++ e1000_write_phy_reg(&adapter->hw, M88E1000_PHY_SPEC_CTRL, phy_reg); ++ ++ /* Perform software reset on the PHY */ ++ e1000_phy_commit(&adapter->hw); ++ ++ /* Have to setup TX_CLK and TX_CRS after software reset */ ++ e1000_phy_reset_clk_and_crs(adapter); ++ ++ e1000_write_phy_reg(&adapter->hw, PHY_CONTROL, 0x8100); ++ ++ /* Wait for reset to complete. */ ++ udelay(500); ++ ++ /* Have to setup TX_CLK and TX_CRS after software reset */ ++ e1000_phy_reset_clk_and_crs(adapter); ++ ++ /* Write out to PHY registers 29 and 30 to disable the Receiver. */ ++ e1000_phy_disable_receiver(adapter); ++ ++ /* Set the loopback bit in the PHY control register. */ ++ e1000_read_phy_reg(&adapter->hw, PHY_CONTROL, &phy_reg); ++ phy_reg |= MII_CR_LOOPBACK; ++ e1000_write_phy_reg(&adapter->hw, PHY_CONTROL, phy_reg); ++ ++ /* Setup TX_CLK and TX_CRS one more time. */ ++ e1000_phy_reset_clk_and_crs(adapter); ++ ++ /* Check Phy Configuration */ ++ e1000_read_phy_reg(&adapter->hw, PHY_CONTROL, &phy_reg); ++ if (phy_reg != 0x4100) ++ return 9; ++ ++ e1000_read_phy_reg(&adapter->hw, M88E1000_EXT_PHY_SPEC_CTRL, &phy_reg); ++ if (phy_reg != 0x0070) ++ return 10; ++ ++ e1000_read_phy_reg(&adapter->hw, 29, &phy_reg); ++ if (phy_reg != 0x001A) ++ return 11; ++ ++ return 0; ++} ++ ++static int e1000_integrated_phy_loopback(struct e1000_adapter *adapter) ++{ ++ u32 ctrl_reg = 0; ++ u32 stat_reg = 0; ++ ++ adapter->hw.mac.autoneg = FALSE; ++ ++ if (adapter->hw.phy.type == e1000_phy_m88) { ++ /* Auto-MDI/MDIX Off */ ++ e1000_write_phy_reg(&adapter->hw, ++ M88E1000_PHY_SPEC_CTRL, 0x0808); ++ /* reset to update Auto-MDI/MDIX */ ++ e1000_write_phy_reg(&adapter->hw, PHY_CONTROL, 0x9140); ++ /* autoneg off */ ++ e1000_write_phy_reg(&adapter->hw, PHY_CONTROL, 0x8140); ++ } else if (adapter->hw.phy.type == e1000_phy_gg82563) ++ e1000_write_phy_reg(&adapter->hw, ++ GG82563_PHY_KMRN_MODE_CTRL, ++ 0x1CC); ++ ++ ctrl_reg = E1000_READ_REG(&adapter->hw, E1000_CTRL); ++ ++ if (adapter->hw.phy.type == e1000_phy_ife) { ++ /* force 100, set loopback */ ++ e1000_write_phy_reg(&adapter->hw, PHY_CONTROL, 0x6100); ++ ++ /* Now set up the MAC to the same speed/duplex as the PHY. */ ++ ctrl_reg &= ~E1000_CTRL_SPD_SEL; /* Clear the speed sel bits */ ++ ctrl_reg |= (E1000_CTRL_FRCSPD | /* Set the Force Speed Bit */ ++ E1000_CTRL_FRCDPX | /* Set the Force Duplex Bit */ ++ E1000_CTRL_SPD_100 |/* Force Speed to 100 */ ++ E1000_CTRL_FD); /* Force Duplex to FULL */ ++ } else { ++ /* force 1000, set loopback */ ++ e1000_write_phy_reg(&adapter->hw, PHY_CONTROL, 0x4140); ++ ++ /* Now set up the MAC to the same speed/duplex as the PHY. */ ++ ctrl_reg = E1000_READ_REG(&adapter->hw, E1000_CTRL); ++ ctrl_reg &= ~E1000_CTRL_SPD_SEL; /* Clear the speed sel bits */ ++ ctrl_reg |= (E1000_CTRL_FRCSPD | /* Set the Force Speed Bit */ ++ E1000_CTRL_FRCDPX | /* Set the Force Duplex Bit */ ++ E1000_CTRL_SPD_1000 |/* Force Speed to 1000 */ ++ E1000_CTRL_FD); /* Force Duplex to FULL */ ++ } ++ ++ if (adapter->hw.phy.media_type == e1000_media_type_copper && ++ adapter->hw.phy.type == e1000_phy_m88) { ++ ctrl_reg |= E1000_CTRL_ILOS; /* Invert Loss of Signal */ ++ } else { ++ /* Set the ILOS bit on the fiber Nic if half duplex link is ++ * detected. */ ++ stat_reg = E1000_READ_REG(&adapter->hw, E1000_STATUS); ++ if ((stat_reg & E1000_STATUS_FD) == 0) ++ ctrl_reg |= (E1000_CTRL_ILOS | E1000_CTRL_SLU); ++ } ++ ++ E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl_reg); ++ ++ /* Disable the receiver on the PHY so when a cable is plugged in, the ++ * PHY does not begin to autoneg when a cable is reconnected to the NIC. ++ */ ++ if (adapter->hw.phy.type == e1000_phy_m88) ++ e1000_phy_disable_receiver(adapter); ++ ++ udelay(500); ++ ++ return 0; ++} ++ ++static int e1000_set_82571_fiber_loopback(struct e1000_adapter *adapter) ++{ ++ struct e1000_hw *hw = &adapter->hw; ++ u32 ctrl = E1000_READ_REG(hw, E1000_CTRL); ++ int link = 0; ++ ++ /* special requirements for 82571/82572 fiber adapters */ ++ ++ /* jump through hoops to make sure link is up because serdes ++ * link is hardwired up */ ++ ctrl |= E1000_CTRL_SLU; ++ E1000_WRITE_REG(hw, E1000_CTRL, ctrl); ++ ++ /* disable autoneg */ ++ ctrl = E1000_READ_REG(hw, E1000_TXCW); ++ ctrl &= ~(1 << 31); ++ E1000_WRITE_REG(hw, E1000_TXCW, ctrl); ++ ++ link = (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU); ++ ++ if (!link) { ++ /* set invert loss of signal */ ++ ctrl = E1000_READ_REG(hw, E1000_CTRL); ++ ctrl |= E1000_CTRL_ILOS; ++ E1000_WRITE_REG(hw, E1000_CTRL, ctrl); ++ } ++ ++ /* special write to serdes control register to enable SerDes analog ++ * loopback */ ++#define E1000_SERDES_LB_ON 0x410 ++ E1000_WRITE_REG(hw, E1000_SCTL, E1000_SERDES_LB_ON); ++ msleep(10); ++ ++ return 0; ++} ++ ++static int e1000_set_phy_loopback(struct e1000_adapter *adapter) ++{ ++ u16 phy_reg = 0; ++ u16 count = 0; ++ ++ switch (adapter->hw.mac.type) { ++ case e1000_82543: ++ if (adapter->hw.phy.media_type == e1000_media_type_copper) { ++ /* Attempt to setup Loopback mode on Non-integrated PHY. ++ * Some PHY registers get corrupted at random, so ++ * attempt this 10 times. ++ */ ++ while (e1000_nonintegrated_phy_loopback(adapter) && ++ count++ < 10); ++ if (count < 11) ++ return 0; ++ } ++ break; ++ ++ case e1000_82544: ++ case e1000_82540: ++ case e1000_82545: ++ case e1000_82545_rev_3: ++ case e1000_82546: ++ case e1000_82546_rev_3: ++ case e1000_82541: ++ case e1000_82541_rev_2: ++ case e1000_82547: ++ case e1000_82547_rev_2: ++ case e1000_82571: ++ case e1000_82572: ++ case e1000_82573: ++ case e1000_80003es2lan: ++ case e1000_ich8lan: ++ case e1000_ich9lan: ++ return e1000_integrated_phy_loopback(adapter); ++ break; ++ ++ default: ++ /* Default PHY loopback work is to read the MII ++ * control register and assert bit 14 (loopback mode). ++ */ ++ e1000_read_phy_reg(&adapter->hw, PHY_CONTROL, &phy_reg); ++ phy_reg |= MII_CR_LOOPBACK; ++ e1000_write_phy_reg(&adapter->hw, PHY_CONTROL, phy_reg); ++ return 0; ++ break; ++ } ++ ++ return 8; ++} ++ ++/* only call this for fiber/serdes connections to es2lan */ ++static int e1000_set_es2lan_mac_loopback(struct e1000_adapter *adapter) ++{ ++ struct e1000_hw *hw = &adapter->hw; ++ u32 ctrlext = E1000_READ_REG(hw, E1000_CTRL_EXT); ++ u32 ctrl = E1000_READ_REG(hw, E1000_CTRL); ++ ++ /* save CTRL_EXT to restore later, reuse an empty variable (unused ++ on mac_type 80003es2lan) */ ++ adapter->tx_fifo_head = ctrlext; ++ ++ /* clear the serdes mode bits, putting the device into mac loopback */ ++ ctrlext &= ~E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES; ++ E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrlext); ++ ++ /* force speed to 1000/FD, link up */ ++ ctrl &= ~(E1000_CTRL_SPD_1000 | E1000_CTRL_SPD_100); ++ ctrl |= (E1000_CTRL_SLU | E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX | ++ E1000_CTRL_SPD_1000 | E1000_CTRL_FD); ++ E1000_WRITE_REG(hw, E1000_CTRL, ctrl); ++ ++ /* set mac loopback */ ++ ctrl = E1000_READ_REG(hw, E1000_RCTL); ++ ctrl |= E1000_RCTL_LBM_MAC; ++ E1000_WRITE_REG(hw, E1000_RCTL, ctrl); ++ ++ /* set testing mode parameters (no need to reset later) */ ++#define KMRNCTRLSTA_OPMODE (0x1F << 16) ++#define KMRNCTRLSTA_OPMODE_1GB_FD_GMII 0x0582 ++ E1000_WRITE_REG(hw, E1000_KMRNCTRLSTA, ++ (KMRNCTRLSTA_OPMODE | KMRNCTRLSTA_OPMODE_1GB_FD_GMII)); ++ ++ return 0; ++} ++ ++static int e1000_setup_loopback_test(struct e1000_adapter *adapter) ++{ ++ struct e1000_hw *hw = &adapter->hw; ++ u32 rctl; ++ ++ if (hw->phy.media_type == e1000_media_type_fiber || ++ hw->phy.media_type == e1000_media_type_internal_serdes) { ++ switch (hw->mac.type) { ++ case e1000_80003es2lan: ++ return e1000_set_es2lan_mac_loopback(adapter); ++ break; ++ case e1000_82545: ++ case e1000_82546: ++ case e1000_82545_rev_3: ++ case e1000_82546_rev_3: ++ return e1000_set_phy_loopback(adapter); ++ break; ++ case e1000_82571: ++ case e1000_82572: ++ return e1000_set_82571_fiber_loopback(adapter); ++ break; ++ default: ++ rctl = E1000_READ_REG(hw, E1000_RCTL); ++ rctl |= E1000_RCTL_LBM_TCVR; ++ E1000_WRITE_REG(hw, E1000_RCTL, rctl); ++ return 0; ++ } ++ } else if (hw->phy.media_type == e1000_media_type_copper) ++ return e1000_set_phy_loopback(adapter); ++ ++ return 7; ++} ++ ++static void e1000_loopback_cleanup(struct e1000_adapter *adapter) ++{ ++ struct e1000_hw *hw = &adapter->hw; ++ u32 rctl; ++ u16 phy_reg; ++ ++ rctl = E1000_READ_REG(hw, E1000_RCTL); ++ rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC); ++ E1000_WRITE_REG(hw, E1000_RCTL, rctl); ++ ++ switch (hw->mac.type) { ++ case e1000_80003es2lan: ++ if (hw->phy.media_type == e1000_media_type_fiber || ++ hw->phy.media_type == e1000_media_type_internal_serdes) { ++ /* restore CTRL_EXT, stealing space from tx_fifo_head */ ++ E1000_WRITE_REG(hw, E1000_CTRL_EXT, adapter->tx_fifo_head); ++ adapter->tx_fifo_head = 0; ++ } ++ /* fall through */ ++ case e1000_82571: ++ case e1000_82572: ++ if (hw->phy.media_type == e1000_media_type_fiber || ++ hw->phy.media_type == e1000_media_type_internal_serdes) { ++#define E1000_SERDES_LB_OFF 0x400 ++ E1000_WRITE_REG(hw, E1000_SCTL, E1000_SERDES_LB_OFF); ++ msleep(10); ++ break; ++ } ++ /* Fall Through */ ++ case e1000_82545: ++ case e1000_82546: ++ case e1000_82545_rev_3: ++ case e1000_82546_rev_3: ++ default: ++ hw->mac.autoneg = TRUE; ++ if (hw->phy.type == e1000_phy_gg82563) ++ e1000_write_phy_reg(hw, ++ GG82563_PHY_KMRN_MODE_CTRL, ++ 0x180); ++ e1000_read_phy_reg(hw, PHY_CONTROL, &phy_reg); ++ if (phy_reg & MII_CR_LOOPBACK) { ++ phy_reg &= ~MII_CR_LOOPBACK; ++ e1000_write_phy_reg(hw, PHY_CONTROL, phy_reg); ++ e1000_phy_commit(hw); ++ } ++ break; ++ } ++} ++ ++static void e1000_create_lbtest_frame(struct sk_buff *skb, ++ unsigned int frame_size) ++{ ++ memset(skb->data, 0xFF, frame_size); ++ frame_size &= ~1; ++ memset(&skb->data[frame_size / 2], 0xAA, frame_size / 2 - 1); ++ memset(&skb->data[frame_size / 2 + 10], 0xBE, 1); ++ memset(&skb->data[frame_size / 2 + 12], 0xAF, 1); ++} ++ ++static int e1000_check_lbtest_frame(struct sk_buff *skb, unsigned int frame_size) ++{ ++ frame_size &= ~1; ++ if (*(skb->data + 3) == 0xFF) { ++ if ((*(skb->data + frame_size / 2 + 10) == 0xBE) && ++ (*(skb->data + frame_size / 2 + 12) == 0xAF)) { ++ return 0; ++ } ++ } ++ return 13; ++} ++ ++static int e1000_run_loopback_test(struct e1000_adapter *adapter) ++{ ++ struct e1000_tx_ring *tx_ring = &adapter->test_tx_ring; ++ struct e1000_rx_ring *rx_ring = &adapter->test_rx_ring; ++ struct pci_dev *pdev = adapter->pdev; ++ int i, j, k, l, lc, good_cnt, ret_val=0; ++ unsigned long time; ++ ++ E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), rx_ring->count - 1); ++ ++ /* Calculate the loop count based on the largest descriptor ring ++ * The idea is to wrap the largest ring a number of times using 64 ++ * send/receive pairs during each loop ++ */ ++ ++ if (rx_ring->count <= tx_ring->count) ++ lc = ((tx_ring->count / 64) * 2) + 1; ++ else ++ lc = ((rx_ring->count / 64) * 2) + 1; ++ ++ k = l = 0; ++ for (j = 0; j <= lc; j++) { /* loop count loop */ ++ for (i = 0; i < 64; i++) { /* send the packets */ ++ e1000_create_lbtest_frame(tx_ring->buffer_info[k].skb, ++ 1024); ++ pci_dma_sync_single_for_device(pdev, ++ tx_ring->buffer_info[k].dma, ++ tx_ring->buffer_info[k].length, ++ PCI_DMA_TODEVICE); ++ if (unlikely(++k == tx_ring->count)) k = 0; ++ } ++ E1000_WRITE_REG(&adapter->hw, E1000_TDT(0), k); ++ msleep(200); ++ time = jiffies; /* set the start time for the receive */ ++ good_cnt = 0; ++ do { /* receive the sent packets */ ++ pci_dma_sync_single_for_cpu(pdev, ++ rx_ring->buffer_info[l].dma, ++ E1000_RXBUFFER_2048, ++ PCI_DMA_FROMDEVICE); ++ ++ ret_val = e1000_check_lbtest_frame( ++ rx_ring->buffer_info[l].skb, ++ 1024); ++ if (!ret_val) ++ good_cnt++; ++ if (unlikely(++l == rx_ring->count)) l = 0; ++ /* time + 20 msecs (200 msecs on 2.4) is more than ++ * enough time to complete the receives, if it's ++ * exceeded, break and error off ++ */ ++ } while (good_cnt < 64 && jiffies < (time + 20)); ++ if (good_cnt != 64) { ++ ret_val = 13; /* ret_val is the same as mis-compare */ ++ break; ++ } ++ if (jiffies >= (time + 20)) { ++ ret_val = 14; /* error code for time out error */ ++ break; ++ } ++ } /* end loop count loop */ ++ return ret_val; ++} ++ ++static int e1000_loopback_test(struct e1000_adapter *adapter, u64 *data) ++{ ++ /* PHY loopback cannot be performed if SoL/IDER ++ * sessions are active */ ++ if (e1000_check_reset_block(&adapter->hw)) { ++ DPRINTK(DRV, ERR, "Cannot do PHY loopback test " ++ "when SoL/IDER is active.\n"); ++ *data = 0; ++ goto out; ++ } ++ ++ if ((*data = e1000_setup_desc_rings(adapter))) ++ goto out; ++ if ((*data = e1000_setup_loopback_test(adapter))) ++ goto err_loopback; ++ *data = e1000_run_loopback_test(adapter); ++ e1000_loopback_cleanup(adapter); ++ ++err_loopback: ++ e1000_free_desc_rings(adapter); ++out: ++ return *data; ++} ++ ++static int e1000_link_test(struct e1000_adapter *adapter, u64 *data) ++{ ++ *data = 0; ++ if (adapter->hw.phy.media_type == e1000_media_type_internal_serdes) { ++ int i = 0; ++ adapter->hw.mac.serdes_has_link = FALSE; ++ ++ /* On some blade server designs, link establishment ++ * could take as long as 2-3 minutes */ ++ do { ++ e1000_check_for_link(&adapter->hw); ++ if (adapter->hw.mac.serdes_has_link == TRUE) ++ return *data; ++ msleep(20); ++ } while (i++ < 3750); ++ ++ *data = 1; ++ } else { ++ e1000_check_for_link(&adapter->hw); ++ if (adapter->hw.mac.autoneg) ++ msleep(4000); ++ ++ if (!(E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) { ++ *data = 1; ++ } ++ } ++ return *data; ++} ++ ++static int e1000_diag_test_count(struct net_device *netdev) ++{ ++ return E1000_TEST_LEN; ++} ++ ++static void e1000_diag_test(struct net_device *netdev, ++ struct ethtool_test *eth_test, u64 *data) ++{ ++ struct e1000_adapter *adapter = netdev_priv(netdev); ++ u16 autoneg_advertised; ++ u8 forced_speed_duplex, autoneg; ++ bool if_running = netif_running(netdev); ++ ++ set_bit(__E1000_TESTING, &adapter->state); ++ if (eth_test->flags == ETH_TEST_FL_OFFLINE) { ++ /* Offline tests */ ++ ++ /* save speed, duplex, autoneg settings */ ++ autoneg_advertised = adapter->hw.phy.autoneg_advertised; ++ forced_speed_duplex = adapter->hw.mac.forced_speed_duplex; ++ autoneg = adapter->hw.mac.autoneg; ++ ++ DPRINTK(HW, INFO, "offline testing starting\n"); ++ ++ /* Link test performed before hardware reset so autoneg doesn't ++ * interfere with test result */ ++ if (e1000_link_test(adapter, &data[4])) ++ eth_test->flags |= ETH_TEST_FL_FAILED; ++ ++ if (if_running) ++ /* indicate we're in test mode */ ++ dev_close(netdev); ++ else ++ e1000_reset(adapter); ++ ++ if (e1000_reg_test(adapter, &data[0])) ++ eth_test->flags |= ETH_TEST_FL_FAILED; ++ ++ e1000_reset(adapter); ++ if (e1000_eeprom_test(adapter, &data[1])) ++ eth_test->flags |= ETH_TEST_FL_FAILED; ++ ++ e1000_reset(adapter); ++ if (e1000_intr_test(adapter, &data[2])) ++ eth_test->flags |= ETH_TEST_FL_FAILED; ++ ++ e1000_reset(adapter); ++ /* make sure the phy is powered up */ ++ if (adapter->hw.phy.media_type == e1000_media_type_copper) { ++ e1000_power_up_phy(&adapter->hw); ++ e1000_setup_link(&adapter->hw); ++ } ++ if (e1000_loopback_test(adapter, &data[3])) ++ eth_test->flags |= ETH_TEST_FL_FAILED; ++ ++ /* restore speed, duplex, autoneg settings */ ++ adapter->hw.phy.autoneg_advertised = autoneg_advertised; ++ adapter->hw.mac.forced_speed_duplex = forced_speed_duplex; ++ adapter->hw.mac.autoneg = autoneg; ++ ++ /* force this routine to wait until autoneg complete/timeout */ ++ adapter->hw.phy.autoneg_wait_to_complete = TRUE; ++ e1000_reset(adapter); ++ adapter->hw.phy.autoneg_wait_to_complete = FALSE; ++ ++ clear_bit(__E1000_TESTING, &adapter->state); ++ if (if_running) ++ dev_open(netdev); ++ } else { ++ DPRINTK(HW, INFO, "online testing starting\n"); ++ /* Online tests */ ++ if (e1000_link_test(adapter, &data[4])) ++ eth_test->flags |= ETH_TEST_FL_FAILED; ++ ++ /* Online tests aren't run; pass by default */ ++ data[0] = 0; ++ data[1] = 0; ++ data[2] = 0; ++ data[3] = 0; ++ ++ clear_bit(__E1000_TESTING, &adapter->state); ++ } ++ msleep_interruptible(4 * 1000); ++} ++ ++static int e1000_wol_exclusion(struct e1000_adapter *adapter, ++ struct ethtool_wolinfo *wol) ++{ ++ struct e1000_hw *hw = &adapter->hw; ++ int retval = 1; /* fail by default */ ++ ++ switch (hw->device_id) { ++ case E1000_DEV_ID_82542: ++ case E1000_DEV_ID_82543GC_FIBER: ++ case E1000_DEV_ID_82543GC_COPPER: ++ case E1000_DEV_ID_82544EI_FIBER: ++ case E1000_DEV_ID_82546EB_QUAD_COPPER: ++ case E1000_DEV_ID_82545EM_FIBER: ++ case E1000_DEV_ID_82545EM_COPPER: ++ case E1000_DEV_ID_82546GB_QUAD_COPPER: ++ case E1000_DEV_ID_82546GB_PCIE: ++ case E1000_DEV_ID_82571EB_SERDES_QUAD: ++ /* these don't support WoL at all */ ++ wol->supported = 0; ++ break; ++ case E1000_DEV_ID_82546EB_FIBER: ++ case E1000_DEV_ID_82546GB_FIBER: ++ case E1000_DEV_ID_82571EB_FIBER: ++ case E1000_DEV_ID_82571EB_SERDES: ++ case E1000_DEV_ID_82571EB_COPPER: ++ /* Wake events not supported on port B */ ++ if (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_FUNC_1) { ++ wol->supported = 0; ++ break; ++ } ++ /* return success for non excluded adapter ports */ ++ retval = 0; ++ break; ++ case E1000_DEV_ID_82571EB_QUAD_COPPER: ++ case E1000_DEV_ID_82571EB_QUAD_FIBER: ++ case E1000_DEV_ID_82571EB_QUAD_COPPER_LP: ++ case E1000_DEV_ID_82571PT_QUAD_COPPER: ++ case E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3: ++ /* quad port adapters only support WoL on port A */ ++ if (!(adapter->flags & E1000_FLAG_QUAD_PORT_A)) { ++ wol->supported = 0; ++ break; ++ } ++ /* return success for non excluded adapter ports */ ++ retval = 0; ++ break; ++ default: ++ /* dual port cards only support WoL on port A from now on ++ * unless it was enabled in the eeprom for port B ++ * so exclude FUNC_1 ports from having WoL enabled */ ++ if (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_FUNC_1 && ++ !adapter->eeprom_wol) { ++ wol->supported = 0; ++ break; ++ } ++ ++ retval = 0; ++ } ++ ++ return retval; ++} ++ ++static void e1000_get_wol(struct net_device *netdev, ++ struct ethtool_wolinfo *wol) ++{ ++ struct e1000_adapter *adapter = netdev_priv(netdev); ++ ++ wol->supported = WAKE_UCAST | WAKE_MCAST | ++ WAKE_BCAST | WAKE_MAGIC; ++ wol->wolopts = 0; ++ ++ /* this function will set ->supported = 0 and return 1 if wol is not ++ * supported by this hardware */ ++ if (e1000_wol_exclusion(adapter, wol)) ++ return; ++ ++ /* apply any specific unsupported masks here */ ++ switch (adapter->hw.device_id) { ++ case E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3: ++ /* KSP3 does not support UCAST wake-ups */ ++ wol->supported &= ~WAKE_UCAST; ++ ++ if (adapter->wol & E1000_WUFC_EX) ++ DPRINTK(DRV, ERR, "Interface does not support " ++ "directed (unicast) frame wake-up packets\n"); ++ break; ++ default: ++ break; ++ } ++ ++ if (adapter->wol & E1000_WUFC_EX) ++ wol->wolopts |= WAKE_UCAST; ++ if (adapter->wol & E1000_WUFC_MC) ++ wol->wolopts |= WAKE_MCAST; ++ if (adapter->wol & E1000_WUFC_BC) ++ wol->wolopts |= WAKE_BCAST; ++ if (adapter->wol & E1000_WUFC_MAG) ++ wol->wolopts |= WAKE_MAGIC; ++ ++ return; ++} ++ ++static int e1000_set_wol(struct net_device *netdev, ++ struct ethtool_wolinfo *wol) ++{ ++ struct e1000_adapter *adapter = netdev_priv(netdev); ++ struct e1000_hw *hw = &adapter->hw; ++ ++ if (wol->wolopts & (WAKE_PHY | WAKE_ARP | WAKE_MAGICSECURE)) ++ return -EOPNOTSUPP; ++ ++ if (e1000_wol_exclusion(adapter, wol)) ++ return wol->wolopts ? -EOPNOTSUPP : 0; ++ ++ switch (hw->device_id) { ++ case E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3: ++ if (wol->wolopts & WAKE_UCAST) { ++ DPRINTK(DRV, ERR, "Interface does not support " ++ "directed (unicast) frame wake-up packets\n"); ++ return -EOPNOTSUPP; ++ } ++ break; ++ default: ++ break; ++ } ++ ++ /* these settings will always override what we currently have */ ++ adapter->wol = 0; ++ ++ if (wol->wolopts & WAKE_UCAST) ++ adapter->wol |= E1000_WUFC_EX; ++ if (wol->wolopts & WAKE_MCAST) ++ adapter->wol |= E1000_WUFC_MC; ++ if (wol->wolopts & WAKE_BCAST) ++ adapter->wol |= E1000_WUFC_BC; ++ if (wol->wolopts & WAKE_MAGIC) ++ adapter->wol |= E1000_WUFC_MAG; ++ ++ return 0; ++} ++ ++/* toggle LED 4 times per second = 2 "blinks" per second */ ++#define E1000_ID_INTERVAL (HZ/4) ++ ++/* bit defines for adapter->led_status */ ++#define E1000_LED_ON 0 ++ ++static void e1000_led_blink_callback(unsigned long data) ++{ ++ struct e1000_adapter *adapter = (struct e1000_adapter *) data; ++ ++ if (test_and_change_bit(E1000_LED_ON, &adapter->led_status)) ++ e1000_led_off(&adapter->hw); ++ else ++ e1000_led_on(&adapter->hw); ++ ++ mod_timer(&adapter->blink_timer, jiffies + E1000_ID_INTERVAL); ++} ++ ++static int e1000_phys_id(struct net_device *netdev, u32 data) ++{ ++ struct e1000_adapter *adapter = netdev_priv(netdev); ++ ++ if (!data) ++ data = INT_MAX; ++ ++ if (adapter->hw.mac.type < e1000_82571) { ++ if (!adapter->blink_timer.function) { ++ init_timer(&adapter->blink_timer); ++ adapter->blink_timer.function = e1000_led_blink_callback; ++ adapter->blink_timer.data = (unsigned long) adapter; ++ } ++ e1000_setup_led(&adapter->hw); ++ mod_timer(&adapter->blink_timer, jiffies); ++ msleep_interruptible(data * 1000); ++ del_timer_sync(&adapter->blink_timer); ++ } else if (adapter->hw.phy.type == e1000_phy_ife) { ++ if (!adapter->blink_timer.function) { ++ init_timer(&adapter->blink_timer); ++ adapter->blink_timer.function = e1000_led_blink_callback; ++ adapter->blink_timer.data = (unsigned long) adapter; ++ } ++ mod_timer(&adapter->blink_timer, jiffies); ++ msleep_interruptible(data * 1000); ++ del_timer_sync(&adapter->blink_timer); ++ e1000_write_phy_reg(&(adapter->hw), IFE_PHY_SPECIAL_CONTROL_LED, 0); ++ } else { ++ e1000_blink_led(&adapter->hw); ++ msleep_interruptible(data * 1000); ++ } ++ ++ e1000_led_off(&adapter->hw); ++ clear_bit(E1000_LED_ON, &adapter->led_status); ++ e1000_cleanup_led(&adapter->hw); ++ ++ return 0; ++} ++ ++static int e1000_get_coalesce(struct net_device *netdev, ++ struct ethtool_coalesce *ec) ++{ ++ struct e1000_adapter *adapter = netdev_priv(netdev); ++ ++ if (adapter->itr_setting <= 3) ++ ec->rx_coalesce_usecs = adapter->itr_setting; ++ else ++ ec->rx_coalesce_usecs = 1000000 / adapter->itr_setting; ++ ++ return 0; ++} ++ ++static int e1000_set_coalesce(struct net_device *netdev, ++ struct ethtool_coalesce *ec) ++{ ++ struct e1000_adapter *adapter = netdev_priv(netdev); ++ ++ if ((ec->rx_coalesce_usecs > E1000_MAX_ITR_USECS) || ++ ((ec->rx_coalesce_usecs > 3) && ++ (ec->rx_coalesce_usecs < E1000_MIN_ITR_USECS)) || ++ (ec->rx_coalesce_usecs == 2)) ++ return -EINVAL; ++ ++ if (!(adapter->flags & E1000_FLAG_HAS_INTR_MODERATION)) ++ return -ENOTSUPP; ++ ++ if (ec->rx_coalesce_usecs <= 3) { ++ adapter->itr = 20000; ++ adapter->itr_setting = ec->rx_coalesce_usecs; ++ } else { ++ adapter->itr = (1000000 / ec->rx_coalesce_usecs); ++ adapter->itr_setting = adapter->itr & ~3; ++ } ++ ++ if (adapter->itr_setting != 0) ++ E1000_WRITE_REG(&adapter->hw, E1000_ITR, ++ 1000000000 / (adapter->itr * 256)); ++ else ++ E1000_WRITE_REG(&adapter->hw, E1000_ITR, 0); ++ ++ return 0; ++} ++ ++static int e1000_nway_reset(struct net_device *netdev) ++{ ++ struct e1000_adapter *adapter = netdev_priv(netdev); ++ if (netif_running(netdev)) ++ e1000_reinit_locked(adapter); ++ return 0; ++} ++ ++static int e1000_get_stats_count(struct net_device *netdev) ++{ ++ return E1000_STATS_LEN; ++} ++ ++static void e1000_get_ethtool_stats(struct net_device *netdev, ++ struct ethtool_stats *stats, u64 *data) ++{ ++ struct e1000_adapter *adapter = netdev_priv(netdev); ++#ifdef CONFIG_E1000_MQ ++ u64 *queue_stat; ++ int stat_count = sizeof(struct e1000_queue_stats) / sizeof(u64); ++ int j, k; ++#endif ++ int i; ++ ++ e1000_update_stats(adapter); ++ for (i = 0; i < E1000_GLOBAL_STATS_LEN; i++) { ++ char *p = (char *)adapter+e1000_gstrings_stats[i].stat_offset; ++ data[i] = (e1000_gstrings_stats[i].sizeof_stat == ++ sizeof(u64)) ? *(u64 *)p : *(u32 *)p; ++ } ++#ifdef CONFIG_E1000_MQ ++ if (adapter->num_tx_queues > 1) { ++ for (j = 0; j < adapter->num_tx_queues; j++) { ++ queue_stat = (u64 *)&adapter->tx_ring[j].tx_stats; ++ for (k = 0; k < stat_count; k++) ++ data[i + k] = queue_stat[k]; ++ i += k; ++ } ++ } ++ if (adapter->num_rx_queues > 1) { ++ for (j = 0; j < adapter->num_rx_queues; j++) { ++ queue_stat = (u64 *)&adapter->rx_ring[j].rx_stats; ++ for (k = 0; k < stat_count; k++) ++ data[i + k] = queue_stat[k]; ++ i += k; ++ } ++ } ++#endif ++/* BUG_ON(i != E1000_STATS_LEN); */ ++} ++ ++static void e1000_get_strings(struct net_device *netdev, u32 stringset, ++ u8 *data) ++{ ++#ifdef CONFIG_E1000_MQ ++ struct e1000_adapter *adapter = netdev_priv(netdev); ++#endif ++ u8 *p = data; ++ int i; ++ ++ switch (stringset) { ++ case ETH_SS_TEST: ++ memcpy(data, *e1000_gstrings_test, ++ E1000_TEST_LEN*ETH_GSTRING_LEN); ++ break; ++ case ETH_SS_STATS: ++ for (i = 0; i < E1000_GLOBAL_STATS_LEN; i++) { ++ memcpy(p, e1000_gstrings_stats[i].stat_string, ++ ETH_GSTRING_LEN); ++ p += ETH_GSTRING_LEN; ++ } ++#ifdef CONFIG_E1000_MQ ++ if (adapter->num_tx_queues > 1) { ++ for (i = 0; i < adapter->num_tx_queues; i++) { ++ sprintf(p, "tx_queue_%u_packets", i); ++ p += ETH_GSTRING_LEN; ++ sprintf(p, "tx_queue_%u_bytes", i); ++ p += ETH_GSTRING_LEN; ++ } ++ } ++ if (adapter->num_rx_queues > 1) { ++ for (i = 0; i < adapter->num_rx_queues; i++) { ++ sprintf(p, "rx_queue_%u_packets", i); ++ p += ETH_GSTRING_LEN; ++ sprintf(p, "rx_queue_%u_bytes", i); ++ p += ETH_GSTRING_LEN; ++ } ++ } ++#endif ++/* BUG_ON(p - data != E1000_STATS_LEN * ETH_GSTRING_LEN); */ ++ break; ++ } ++} ++ ++static struct ethtool_ops e1000_ethtool_ops = { ++ .get_settings = e1000_get_settings, ++ .set_settings = e1000_set_settings, ++ .get_drvinfo = e1000_get_drvinfo, ++ .get_regs_len = e1000_get_regs_len, ++ .get_regs = e1000_get_regs, ++ .get_wol = e1000_get_wol, ++ .set_wol = e1000_set_wol, ++ .get_msglevel = e1000_get_msglevel, ++ .set_msglevel = e1000_set_msglevel, ++ .nway_reset = e1000_nway_reset, ++ .get_link = ethtool_op_get_link, ++ .get_eeprom_len = e1000_get_eeprom_len, ++ .get_eeprom = e1000_get_eeprom, ++ .set_eeprom = e1000_set_eeprom, ++ .get_ringparam = e1000_get_ringparam, ++ .set_ringparam = e1000_set_ringparam, ++ .get_pauseparam = e1000_get_pauseparam, ++ .set_pauseparam = e1000_set_pauseparam, ++ .get_rx_csum = e1000_get_rx_csum, ++ .set_rx_csum = e1000_set_rx_csum, ++ .get_tx_csum = e1000_get_tx_csum, ++ .set_tx_csum = e1000_set_tx_csum, ++ .get_sg = ethtool_op_get_sg, ++ .set_sg = ethtool_op_set_sg, ++#ifdef NETIF_F_TSO ++ .get_tso = ethtool_op_get_tso, ++ .set_tso = e1000_set_tso, ++#endif ++ .self_test_count = e1000_diag_test_count, ++ .self_test = e1000_diag_test, ++ .get_strings = e1000_get_strings, ++ .phys_id = e1000_phys_id, ++ .get_stats_count = e1000_get_stats_count, ++ .get_ethtool_stats = e1000_get_ethtool_stats, ++#ifdef ETHTOOL_GPERMADDR ++ .get_perm_addr = ethtool_op_get_perm_addr, ++#endif ++ .get_coalesce = e1000_get_coalesce, ++ .set_coalesce = e1000_set_coalesce, ++}; ++ ++void e1000_set_ethtool_ops(struct net_device *netdev) ++{ ++ SET_ETHTOOL_OPS(netdev, &e1000_ethtool_ops); ++} ++#endif /* SIOCETHTOOL */ +--- linux/drivers/xenomai/net/drivers/experimental/e1000/e1000_defines.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/experimental/e1000/e1000_defines.h 2021-04-07 16:01:27.648633565 +0800 +@@ -0,0 +1,1397 @@ ++/******************************************************************************* ++ ++ Intel PRO/1000 Linux driver ++ Copyright(c) 1999 - 2008 Intel Corporation. ++ ++ This program is free software; you can redistribute it and/or modify it ++ under the terms and conditions of the GNU General Public License, ++ version 2, as published by the Free Software Foundation. ++ ++ This program is distributed in the hope it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ You should have received a copy of the GNU General Public License along with ++ this program; if not, write to the Free Software Foundation, Inc., ++ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. ++ ++ The full GNU General Public License is included in this distribution in ++ the file called "COPYING". ++ ++ Contact Information: ++ Linux NICS ++ e1000-devel Mailing List ++ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 ++ ++*******************************************************************************/ ++ ++#ifndef _E1000_DEFINES_H_ ++#define _E1000_DEFINES_H_ ++ ++/* Number of Transmit and Receive Descriptors must be a multiple of 8 */ ++#define REQ_TX_DESCRIPTOR_MULTIPLE 8 ++#define REQ_RX_DESCRIPTOR_MULTIPLE 8 ++ ++/* Definitions for power management and wakeup registers */ ++/* Wake Up Control */ ++#define E1000_WUC_APME 0x00000001 /* APM Enable */ ++#define E1000_WUC_PME_EN 0x00000002 /* PME Enable */ ++#define E1000_WUC_PME_STATUS 0x00000004 /* PME Status */ ++#define E1000_WUC_APMPME 0x00000008 /* Assert PME on APM Wakeup */ ++#define E1000_WUC_LSCWE 0x00000010 /* Link Status wake up enable */ ++#define E1000_WUC_LSCWO 0x00000020 /* Link Status wake up override */ ++#define E1000_WUC_SPM 0x80000000 /* Enable SPM */ ++#define E1000_WUC_PHY_WAKE 0x00000100 /* if PHY supports wakeup */ ++ ++/* Wake Up Filter Control */ ++#define E1000_WUFC_LNKC 0x00000001 /* Link Status Change Wakeup Enable */ ++#define E1000_WUFC_MAG 0x00000002 /* Magic Packet Wakeup Enable */ ++#define E1000_WUFC_EX 0x00000004 /* Directed Exact Wakeup Enable */ ++#define E1000_WUFC_MC 0x00000008 /* Directed Multicast Wakeup Enable */ ++#define E1000_WUFC_BC 0x00000010 /* Broadcast Wakeup Enable */ ++#define E1000_WUFC_ARP 0x00000020 /* ARP Request Packet Wakeup Enable */ ++#define E1000_WUFC_IPV4 0x00000040 /* Directed IPv4 Packet Wakeup Enable */ ++#define E1000_WUFC_IPV6 0x00000080 /* Directed IPv6 Packet Wakeup Enable */ ++#define E1000_WUFC_IGNORE_TCO 0x00008000 /* Ignore WakeOn TCO packets */ ++#define E1000_WUFC_FLX0 0x00010000 /* Flexible Filter 0 Enable */ ++#define E1000_WUFC_FLX1 0x00020000 /* Flexible Filter 1 Enable */ ++#define E1000_WUFC_FLX2 0x00040000 /* Flexible Filter 2 Enable */ ++#define E1000_WUFC_FLX3 0x00080000 /* Flexible Filter 3 Enable */ ++#define E1000_WUFC_ALL_FILTERS 0x000F00FF /* Mask for all wakeup filters */ ++#define E1000_WUFC_FLX_OFFSET 16 /* Offset to the Flexible Filters bits */ ++#define E1000_WUFC_FLX_FILTERS 0x000F0000 /* Mask for the 4 flexible filters */ ++ ++/* Wake Up Status */ ++#define E1000_WUS_LNKC E1000_WUFC_LNKC ++#define E1000_WUS_MAG E1000_WUFC_MAG ++#define E1000_WUS_EX E1000_WUFC_EX ++#define E1000_WUS_MC E1000_WUFC_MC ++#define E1000_WUS_BC E1000_WUFC_BC ++#define E1000_WUS_ARP E1000_WUFC_ARP ++#define E1000_WUS_IPV4 E1000_WUFC_IPV4 ++#define E1000_WUS_IPV6 E1000_WUFC_IPV6 ++#define E1000_WUS_FLX0 E1000_WUFC_FLX0 ++#define E1000_WUS_FLX1 E1000_WUFC_FLX1 ++#define E1000_WUS_FLX2 E1000_WUFC_FLX2 ++#define E1000_WUS_FLX3 E1000_WUFC_FLX3 ++#define E1000_WUS_FLX_FILTERS E1000_WUFC_FLX_FILTERS ++ ++/* Wake Up Packet Length */ ++#define E1000_WUPL_LENGTH_MASK 0x0FFF /* Only the lower 12 bits are valid */ ++ ++/* Four Flexible Filters are supported */ ++#define E1000_FLEXIBLE_FILTER_COUNT_MAX 4 ++ ++/* Each Flexible Filter is at most 128 (0x80) bytes in length */ ++#define E1000_FLEXIBLE_FILTER_SIZE_MAX 128 ++ ++#define E1000_FFLT_SIZE E1000_FLEXIBLE_FILTER_COUNT_MAX ++#define E1000_FFMT_SIZE E1000_FLEXIBLE_FILTER_SIZE_MAX ++#define E1000_FFVT_SIZE E1000_FLEXIBLE_FILTER_SIZE_MAX ++ ++/* Extended Device Control */ ++#define E1000_CTRL_EXT_GPI0_EN 0x00000001 /* Maps SDP4 to GPI0 */ ++#define E1000_CTRL_EXT_GPI1_EN 0x00000002 /* Maps SDP5 to GPI1 */ ++#define E1000_CTRL_EXT_PHYINT_EN E1000_CTRL_EXT_GPI1_EN ++#define E1000_CTRL_EXT_GPI2_EN 0x00000004 /* Maps SDP6 to GPI2 */ ++#define E1000_CTRL_EXT_GPI3_EN 0x00000008 /* Maps SDP7 to GPI3 */ ++/* Reserved (bits 4,5) in >= 82575 */ ++#define E1000_CTRL_EXT_SDP4_DATA 0x00000010 /* Value of SW Defineable Pin 4 */ ++#define E1000_CTRL_EXT_SDP5_DATA 0x00000020 /* Value of SW Defineable Pin 5 */ ++#define E1000_CTRL_EXT_PHY_INT E1000_CTRL_EXT_SDP5_DATA ++#define E1000_CTRL_EXT_SDP6_DATA 0x00000040 /* Value of SW Defineable Pin 6 */ ++#define E1000_CTRL_EXT_SDP7_DATA 0x00000080 /* Value of SW Defineable Pin 7 */ ++/* SDP 4/5 (bits 8,9) are reserved in >= 82575 */ ++#define E1000_CTRL_EXT_SDP4_DIR 0x00000100 /* Direction of SDP4 0=in 1=out */ ++#define E1000_CTRL_EXT_SDP5_DIR 0x00000200 /* Direction of SDP5 0=in 1=out */ ++#define E1000_CTRL_EXT_SDP6_DIR 0x00000400 /* Direction of SDP6 0=in 1=out */ ++#define E1000_CTRL_EXT_SDP7_DIR 0x00000800 /* Direction of SDP7 0=in 1=out */ ++#define E1000_CTRL_EXT_ASDCHK 0x00001000 /* Initiate an ASD sequence */ ++#define E1000_CTRL_EXT_EE_RST 0x00002000 /* Reinitialize from EEPROM */ ++#define E1000_CTRL_EXT_IPS 0x00004000 /* Invert Power State */ ++#define E1000_CTRL_EXT_SPD_BYPS 0x00008000 /* Speed Select Bypass */ ++#define E1000_CTRL_EXT_RO_DIS 0x00020000 /* Relaxed Ordering disable */ ++#define E1000_CTRL_EXT_LINK_MODE_MASK 0x00C00000 ++#define E1000_CTRL_EXT_LINK_MODE_GMII 0x00000000 ++#define E1000_CTRL_EXT_LINK_MODE_TBI 0x00C00000 ++#define E1000_CTRL_EXT_LINK_MODE_KMRN 0x00000000 ++#define E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES 0x00C00000 ++#define E1000_CTRL_EXT_LINK_MODE_PCIX_SERDES 0x00800000 ++#define E1000_CTRL_EXT_LINK_MODE_SGMII 0x00800000 ++#define E1000_CTRL_EXT_EIAME 0x01000000 ++#define E1000_CTRL_EXT_IRCA 0x00000001 ++#define E1000_CTRL_EXT_WR_WMARK_MASK 0x03000000 ++#define E1000_CTRL_EXT_WR_WMARK_256 0x00000000 ++#define E1000_CTRL_EXT_WR_WMARK_320 0x01000000 ++#define E1000_CTRL_EXT_WR_WMARK_384 0x02000000 ++#define E1000_CTRL_EXT_WR_WMARK_448 0x03000000 ++#define E1000_CTRL_EXT_CANC 0x04000000 /* Interrupt delay cancellation */ ++#define E1000_CTRL_EXT_DRV_LOAD 0x10000000 /* Driver loaded bit for FW */ ++/* IAME enable bit (27) was removed in >= 82575 */ ++#define E1000_CTRL_EXT_IAME 0x08000000 /* Interrupt acknowledge Auto-mask */ ++#define E1000_CTRL_EXT_INT_TIMER_CLR 0x20000000 /* Clear Interrupt timers after IMS clear */ ++#define E1000_CRTL_EXT_PB_PAREN 0x01000000 /* packet buffer parity error detection enabled */ ++#define E1000_CTRL_EXT_DF_PAREN 0x02000000 /* descriptor FIFO parity error detection enable */ ++#define E1000_CTRL_EXT_GHOST_PAREN 0x40000000 ++#define E1000_CTRL_EXT_PBA_CLR 0x80000000 /* PBA Clear */ ++#define E1000_I2CCMD_REG_ADDR_SHIFT 16 ++#define E1000_I2CCMD_REG_ADDR 0x00FF0000 ++#define E1000_I2CCMD_PHY_ADDR_SHIFT 24 ++#define E1000_I2CCMD_PHY_ADDR 0x07000000 ++#define E1000_I2CCMD_OPCODE_READ 0x08000000 ++#define E1000_I2CCMD_OPCODE_WRITE 0x00000000 ++#define E1000_I2CCMD_RESET 0x10000000 ++#define E1000_I2CCMD_READY 0x20000000 ++#define E1000_I2CCMD_INTERRUPT_ENA 0x40000000 ++#define E1000_I2CCMD_ERROR 0x80000000 ++#define E1000_MAX_SGMII_PHY_REG_ADDR 255 ++#define E1000_I2CCMD_PHY_TIMEOUT 200 ++ ++/* Receive Decriptor bit definitions */ ++#define E1000_RXD_STAT_DD 0x01 /* Descriptor Done */ ++#define E1000_RXD_STAT_EOP 0x02 /* End of Packet */ ++#define E1000_RXD_STAT_IXSM 0x04 /* Ignore checksum */ ++#define E1000_RXD_STAT_VP 0x08 /* IEEE VLAN Packet */ ++#define E1000_RXD_STAT_UDPCS 0x10 /* UDP xsum caculated */ ++#define E1000_RXD_STAT_TCPCS 0x20 /* TCP xsum calculated */ ++#define E1000_RXD_STAT_IPCS 0x40 /* IP xsum calculated */ ++#define E1000_RXD_STAT_PIF 0x80 /* passed in-exact filter */ ++#define E1000_RXD_STAT_CRCV 0x100 /* Speculative CRC Valid */ ++#define E1000_RXD_STAT_IPIDV 0x200 /* IP identification valid */ ++#define E1000_RXD_STAT_UDPV 0x400 /* Valid UDP checksum */ ++#define E1000_RXD_STAT_DYNINT 0x800 /* Pkt caused INT via DYNINT */ ++#define E1000_RXD_STAT_ACK 0x8000 /* ACK Packet indication */ ++#define E1000_RXD_ERR_CE 0x01 /* CRC Error */ ++#define E1000_RXD_ERR_SE 0x02 /* Symbol Error */ ++#define E1000_RXD_ERR_SEQ 0x04 /* Sequence Error */ ++#define E1000_RXD_ERR_CXE 0x10 /* Carrier Extension Error */ ++#define E1000_RXD_ERR_TCPE 0x20 /* TCP/UDP Checksum Error */ ++#define E1000_RXD_ERR_IPE 0x40 /* IP Checksum Error */ ++#define E1000_RXD_ERR_RXE 0x80 /* Rx Data Error */ ++#define E1000_RXD_SPC_VLAN_MASK 0x0FFF /* VLAN ID is in lower 12 bits */ ++#define E1000_RXD_SPC_PRI_MASK 0xE000 /* Priority is in upper 3 bits */ ++#define E1000_RXD_SPC_PRI_SHIFT 13 ++#define E1000_RXD_SPC_CFI_MASK 0x1000 /* CFI is bit 12 */ ++#define E1000_RXD_SPC_CFI_SHIFT 12 ++ ++#define E1000_RXDEXT_STATERR_CE 0x01000000 ++#define E1000_RXDEXT_STATERR_SE 0x02000000 ++#define E1000_RXDEXT_STATERR_SEQ 0x04000000 ++#define E1000_RXDEXT_STATERR_CXE 0x10000000 ++#define E1000_RXDEXT_STATERR_TCPE 0x20000000 ++#define E1000_RXDEXT_STATERR_IPE 0x40000000 ++#define E1000_RXDEXT_STATERR_RXE 0x80000000 ++ ++/* mask to determine if packets should be dropped due to frame errors */ ++#define E1000_RXD_ERR_FRAME_ERR_MASK ( \ ++ E1000_RXD_ERR_CE | \ ++ E1000_RXD_ERR_SE | \ ++ E1000_RXD_ERR_SEQ | \ ++ E1000_RXD_ERR_CXE | \ ++ E1000_RXD_ERR_RXE) ++ ++/* Same mask, but for extended and packet split descriptors */ ++#define E1000_RXDEXT_ERR_FRAME_ERR_MASK ( \ ++ E1000_RXDEXT_STATERR_CE | \ ++ E1000_RXDEXT_STATERR_SE | \ ++ E1000_RXDEXT_STATERR_SEQ | \ ++ E1000_RXDEXT_STATERR_CXE | \ ++ E1000_RXDEXT_STATERR_RXE) ++ ++#define E1000_MRQC_ENABLE_MASK 0x00000007 ++#define E1000_MRQC_ENABLE_RSS_2Q 0x00000001 ++#define E1000_MRQC_ENABLE_RSS_INT 0x00000004 ++#define E1000_MRQC_RSS_FIELD_MASK 0xFFFF0000 ++#define E1000_MRQC_RSS_FIELD_IPV4_TCP 0x00010000 ++#define E1000_MRQC_RSS_FIELD_IPV4 0x00020000 ++#define E1000_MRQC_RSS_FIELD_IPV6_TCP_EX 0x00040000 ++#define E1000_MRQC_RSS_FIELD_IPV6_EX 0x00080000 ++#define E1000_MRQC_RSS_FIELD_IPV6 0x00100000 ++#define E1000_MRQC_RSS_FIELD_IPV6_TCP 0x00200000 ++ ++#define E1000_RXDPS_HDRSTAT_HDRSP 0x00008000 ++#define E1000_RXDPS_HDRSTAT_HDRLEN_MASK 0x000003FF ++ ++/* Management Control */ ++#define E1000_MANC_SMBUS_EN 0x00000001 /* SMBus Enabled - RO */ ++#define E1000_MANC_ASF_EN 0x00000002 /* ASF Enabled - RO */ ++#define E1000_MANC_R_ON_FORCE 0x00000004 /* Reset on Force TCO - RO */ ++#define E1000_MANC_RMCP_EN 0x00000100 /* Enable RCMP 026Fh Filtering */ ++#define E1000_MANC_0298_EN 0x00000200 /* Enable RCMP 0298h Filtering */ ++#define E1000_MANC_IPV4_EN 0x00000400 /* Enable IPv4 */ ++#define E1000_MANC_IPV6_EN 0x00000800 /* Enable IPv6 */ ++#define E1000_MANC_SNAP_EN 0x00001000 /* Accept LLC/SNAP */ ++#define E1000_MANC_ARP_EN 0x00002000 /* Enable ARP Request Filtering */ ++/* Enable Neighbor Discovery Filtering */ ++#define E1000_MANC_NEIGHBOR_EN 0x00004000 ++#define E1000_MANC_ARP_RES_EN 0x00008000 /* Enable ARP response Filtering */ ++#define E1000_MANC_TCO_RESET 0x00010000 /* TCO Reset Occurred */ ++#define E1000_MANC_RCV_TCO_EN 0x00020000 /* Receive TCO Packets Enabled */ ++#define E1000_MANC_REPORT_STATUS 0x00040000 /* Status Reporting Enabled */ ++#define E1000_MANC_RCV_ALL 0x00080000 /* Receive All Enabled */ ++#define E1000_MANC_BLK_PHY_RST_ON_IDE 0x00040000 /* Block phy resets */ ++/* Enable MAC address filtering */ ++#define E1000_MANC_EN_MAC_ADDR_FILTER 0x00100000 ++/* Enable MNG packets to host memory */ ++#define E1000_MANC_EN_MNG2HOST 0x00200000 ++/* Enable IP address filtering */ ++#define E1000_MANC_EN_IP_ADDR_FILTER 0x00400000 ++#define E1000_MANC_EN_XSUM_FILTER 0x00800000 /* Enable checksum filtering */ ++#define E1000_MANC_BR_EN 0x01000000 /* Enable broadcast filtering */ ++#define E1000_MANC_SMB_REQ 0x01000000 /* SMBus Request */ ++#define E1000_MANC_SMB_GNT 0x02000000 /* SMBus Grant */ ++#define E1000_MANC_SMB_CLK_IN 0x04000000 /* SMBus Clock In */ ++#define E1000_MANC_SMB_DATA_IN 0x08000000 /* SMBus Data In */ ++#define E1000_MANC_SMB_DATA_OUT 0x10000000 /* SMBus Data Out */ ++#define E1000_MANC_SMB_CLK_OUT 0x20000000 /* SMBus Clock Out */ ++ ++#define E1000_MANC_SMB_DATA_OUT_SHIFT 28 /* SMBus Data Out Shift */ ++#define E1000_MANC_SMB_CLK_OUT_SHIFT 29 /* SMBus Clock Out Shift */ ++ ++/* Receive Control */ ++#define E1000_RCTL_RST 0x00000001 /* Software reset */ ++#define E1000_RCTL_EN 0x00000002 /* enable */ ++#define E1000_RCTL_SBP 0x00000004 /* store bad packet */ ++#define E1000_RCTL_UPE 0x00000008 /* unicast promiscuous enable */ ++#define E1000_RCTL_MPE 0x00000010 /* multicast promiscuous enab */ ++#define E1000_RCTL_LPE 0x00000020 /* long packet enable */ ++#define E1000_RCTL_LBM_NO 0x00000000 /* no loopback mode */ ++#define E1000_RCTL_LBM_MAC 0x00000040 /* MAC loopback mode */ ++#define E1000_RCTL_LBM_SLP 0x00000080 /* serial link loopback mode */ ++#define E1000_RCTL_LBM_TCVR 0x000000C0 /* tcvr loopback mode */ ++#define E1000_RCTL_DTYP_MASK 0x00000C00 /* Descriptor type mask */ ++#define E1000_RCTL_DTYP_PS 0x00000400 /* Packet Split descriptor */ ++#define E1000_RCTL_RDMTS_HALF 0x00000000 /* rx desc min threshold size */ ++#define E1000_RCTL_RDMTS_QUAT 0x00000100 /* rx desc min threshold size */ ++#define E1000_RCTL_RDMTS_EIGTH 0x00000200 /* rx desc min threshold size */ ++#define E1000_RCTL_MO_SHIFT 12 /* multicast offset shift */ ++#define E1000_RCTL_MO_0 0x00000000 /* multicast offset 11:0 */ ++#define E1000_RCTL_MO_1 0x00001000 /* multicast offset 12:1 */ ++#define E1000_RCTL_MO_2 0x00002000 /* multicast offset 13:2 */ ++#define E1000_RCTL_MO_3 0x00003000 /* multicast offset 15:4 */ ++#define E1000_RCTL_MDR 0x00004000 /* multicast desc ring 0 */ ++#define E1000_RCTL_BAM 0x00008000 /* broadcast enable */ ++/* these buffer sizes are valid if E1000_RCTL_BSEX is 0 */ ++#define E1000_RCTL_SZ_2048 0x00000000 /* rx buffer size 2048 */ ++#define E1000_RCTL_SZ_1024 0x00010000 /* rx buffer size 1024 */ ++#define E1000_RCTL_SZ_512 0x00020000 /* rx buffer size 512 */ ++#define E1000_RCTL_SZ_256 0x00030000 /* rx buffer size 256 */ ++/* these buffer sizes are valid if E1000_RCTL_BSEX is 1 */ ++#define E1000_RCTL_SZ_16384 0x00010000 /* rx buffer size 16384 */ ++#define E1000_RCTL_SZ_8192 0x00020000 /* rx buffer size 8192 */ ++#define E1000_RCTL_SZ_4096 0x00030000 /* rx buffer size 4096 */ ++#define E1000_RCTL_VFE 0x00040000 /* vlan filter enable */ ++#define E1000_RCTL_CFIEN 0x00080000 /* canonical form enable */ ++#define E1000_RCTL_CFI 0x00100000 /* canonical form indicator */ ++#define E1000_RCTL_DPF 0x00400000 /* discard pause frames */ ++#define E1000_RCTL_PMCF 0x00800000 /* pass MAC control frames */ ++#define E1000_RCTL_BSEX 0x02000000 /* Buffer size extension */ ++#define E1000_RCTL_SECRC 0x04000000 /* Strip Ethernet CRC */ ++#define E1000_RCTL_FLXBUF_MASK 0x78000000 /* Flexible buffer size */ ++#define E1000_RCTL_FLXBUF_SHIFT 27 /* Flexible buffer shift */ ++ ++/* ++ * Use byte values for the following shift parameters ++ * Usage: ++ * psrctl |= (((ROUNDUP(value0, 128) >> E1000_PSRCTL_BSIZE0_SHIFT) & ++ * E1000_PSRCTL_BSIZE0_MASK) | ++ * ((ROUNDUP(value1, 1024) >> E1000_PSRCTL_BSIZE1_SHIFT) & ++ * E1000_PSRCTL_BSIZE1_MASK) | ++ * ((ROUNDUP(value2, 1024) << E1000_PSRCTL_BSIZE2_SHIFT) & ++ * E1000_PSRCTL_BSIZE2_MASK) | ++ * ((ROUNDUP(value3, 1024) << E1000_PSRCTL_BSIZE3_SHIFT) |; ++ * E1000_PSRCTL_BSIZE3_MASK)) ++ * where value0 = [128..16256], default=256 ++ * value1 = [1024..64512], default=4096 ++ * value2 = [0..64512], default=4096 ++ * value3 = [0..64512], default=0 ++ */ ++ ++#define E1000_PSRCTL_BSIZE0_MASK 0x0000007F ++#define E1000_PSRCTL_BSIZE1_MASK 0x00003F00 ++#define E1000_PSRCTL_BSIZE2_MASK 0x003F0000 ++#define E1000_PSRCTL_BSIZE3_MASK 0x3F000000 ++ ++#define E1000_PSRCTL_BSIZE0_SHIFT 7 /* Shift _right_ 7 */ ++#define E1000_PSRCTL_BSIZE1_SHIFT 2 /* Shift _right_ 2 */ ++#define E1000_PSRCTL_BSIZE2_SHIFT 6 /* Shift _left_ 6 */ ++#define E1000_PSRCTL_BSIZE3_SHIFT 14 /* Shift _left_ 14 */ ++ ++/* SWFW_SYNC Definitions */ ++#define E1000_SWFW_EEP_SM 0x1 ++#define E1000_SWFW_PHY0_SM 0x2 ++#define E1000_SWFW_PHY1_SM 0x4 ++#define E1000_SWFW_CSR_SM 0x8 ++ ++/* FACTPS Definitions */ ++#define E1000_FACTPS_LFS 0x40000000 /* LAN Function Select */ ++/* Device Control */ ++#define E1000_CTRL_FD 0x00000001 /* Full duplex.0=half; 1=full */ ++#define E1000_CTRL_BEM 0x00000002 /* Endian Mode.0=little,1=big */ ++#define E1000_CTRL_PRIOR 0x00000004 /* Priority on PCI. 0=rx,1=fair */ ++#define E1000_CTRL_GIO_MASTER_DISABLE 0x00000004 /*Blocks new Master requests */ ++#define E1000_CTRL_LRST 0x00000008 /* Link reset. 0=normal,1=reset */ ++#define E1000_CTRL_TME 0x00000010 /* Test mode. 0=normal,1=test */ ++#define E1000_CTRL_SLE 0x00000020 /* Serial Link on 0=dis,1=en */ ++#define E1000_CTRL_ASDE 0x00000020 /* Auto-speed detect enable */ ++#define E1000_CTRL_SLU 0x00000040 /* Set link up (Force Link) */ ++#define E1000_CTRL_ILOS 0x00000080 /* Invert Loss-Of Signal */ ++#define E1000_CTRL_SPD_SEL 0x00000300 /* Speed Select Mask */ ++#define E1000_CTRL_SPD_10 0x00000000 /* Force 10Mb */ ++#define E1000_CTRL_SPD_100 0x00000100 /* Force 100Mb */ ++#define E1000_CTRL_SPD_1000 0x00000200 /* Force 1Gb */ ++#define E1000_CTRL_BEM32 0x00000400 /* Big Endian 32 mode */ ++#define E1000_CTRL_FRCSPD 0x00000800 /* Force Speed */ ++#define E1000_CTRL_FRCDPX 0x00001000 /* Force Duplex */ ++#define E1000_CTRL_D_UD_EN 0x00002000 /* Dock/Undock enable */ ++#define E1000_CTRL_D_UD_POLARITY 0x00004000 /* Defined polarity of Dock/Undock indication in SDP[0] */ ++#define E1000_CTRL_FORCE_PHY_RESET 0x00008000 /* Reset both PHY ports, through PHYRST_N pin */ ++#define E1000_CTRL_EXT_LINK_EN 0x00010000 /* enable link status from external LINK_0 and LINK_1 pins */ ++#define E1000_CTRL_SWDPIN0 0x00040000 /* SWDPIN 0 value */ ++#define E1000_CTRL_SWDPIN1 0x00080000 /* SWDPIN 1 value */ ++#define E1000_CTRL_SWDPIN2 0x00100000 /* SWDPIN 2 value */ ++#define E1000_CTRL_SWDPIN3 0x00200000 /* SWDPIN 3 value */ ++#define E1000_CTRL_SWDPIO0 0x00400000 /* SWDPIN 0 Input or output */ ++#define E1000_CTRL_SWDPIO1 0x00800000 /* SWDPIN 1 input or output */ ++#define E1000_CTRL_SWDPIO2 0x01000000 /* SWDPIN 2 input or output */ ++#define E1000_CTRL_SWDPIO3 0x02000000 /* SWDPIN 3 input or output */ ++#define E1000_CTRL_RST 0x04000000 /* Global reset */ ++#define E1000_CTRL_RFCE 0x08000000 /* Receive Flow Control enable */ ++#define E1000_CTRL_TFCE 0x10000000 /* Transmit flow control enable */ ++#define E1000_CTRL_RTE 0x20000000 /* Routing tag enable */ ++#define E1000_CTRL_VME 0x40000000 /* IEEE VLAN mode enable */ ++#define E1000_CTRL_PHY_RST 0x80000000 /* PHY Reset */ ++#define E1000_CTRL_SW2FW_INT 0x02000000 /* Initiate an interrupt to manageability engine */ ++#define E1000_CTRL_I2C_ENA 0x02000000 /* I2C enable */ ++ ++/* Bit definitions for the Management Data IO (MDIO) and Management Data ++ * Clock (MDC) pins in the Device Control Register. ++ */ ++#define E1000_CTRL_PHY_RESET_DIR E1000_CTRL_SWDPIO0 ++#define E1000_CTRL_PHY_RESET E1000_CTRL_SWDPIN0 ++#define E1000_CTRL_MDIO_DIR E1000_CTRL_SWDPIO2 ++#define E1000_CTRL_MDIO E1000_CTRL_SWDPIN2 ++#define E1000_CTRL_MDC_DIR E1000_CTRL_SWDPIO3 ++#define E1000_CTRL_MDC E1000_CTRL_SWDPIN3 ++#define E1000_CTRL_PHY_RESET_DIR4 E1000_CTRL_EXT_SDP4_DIR ++#define E1000_CTRL_PHY_RESET4 E1000_CTRL_EXT_SDP4_DATA ++ ++#define E1000_CONNSW_ENRGSRC 0x4 ++#define E1000_PCS_LCTL_FLV_LINK_UP 1 ++#define E1000_PCS_LCTL_FSV_10 0 ++#define E1000_PCS_LCTL_FSV_100 2 ++#define E1000_PCS_LCTL_FSV_1000 4 ++#define E1000_PCS_LCTL_FDV_FULL 8 ++#define E1000_PCS_LCTL_FSD 0x10 ++#define E1000_PCS_LCTL_FORCE_LINK 0x20 ++#define E1000_PCS_LCTL_LOW_LINK_LATCH 0x40 ++#define E1000_PCS_LCTL_AN_ENABLE 0x10000 ++#define E1000_PCS_LCTL_AN_RESTART 0x20000 ++#define E1000_PCS_LCTL_AN_TIMEOUT 0x40000 ++#define E1000_PCS_LCTL_AN_SGMII_BYPASS 0x80000 ++#define E1000_PCS_LCTL_AN_SGMII_TRIGGER 0x100000 ++#define E1000_PCS_LCTL_FAST_LINK_TIMER 0x1000000 ++#define E1000_PCS_LCTL_LINK_OK_FIX 0x2000000 ++#define E1000_PCS_LCTL_CRS_ON_NI 0x4000000 ++#define E1000_ENABLE_SERDES_LOOPBACK 0x0410 ++ ++#define E1000_PCS_LSTS_LINK_OK 1 ++#define E1000_PCS_LSTS_SPEED_10 0 ++#define E1000_PCS_LSTS_SPEED_100 2 ++#define E1000_PCS_LSTS_SPEED_1000 4 ++#define E1000_PCS_LSTS_DUPLEX_FULL 8 ++#define E1000_PCS_LSTS_SYNK_OK 0x10 ++#define E1000_PCS_LSTS_AN_COMPLETE 0x10000 ++#define E1000_PCS_LSTS_AN_PAGE_RX 0x20000 ++#define E1000_PCS_LSTS_AN_TIMED_OUT 0x40000 ++#define E1000_PCS_LSTS_AN_REMOTE_FAULT 0x80000 ++#define E1000_PCS_LSTS_AN_ERROR_RWS 0x100000 ++ ++/* Device Status */ ++#define E1000_STATUS_FD 0x00000001 /* Full duplex.0=half,1=full */ ++#define E1000_STATUS_LU 0x00000002 /* Link up.0=no,1=link */ ++#define E1000_STATUS_FUNC_MASK 0x0000000C /* PCI Function Mask */ ++#define E1000_STATUS_FUNC_SHIFT 2 ++#define E1000_STATUS_FUNC_0 0x00000000 /* Function 0 */ ++#define E1000_STATUS_FUNC_1 0x00000004 /* Function 1 */ ++#define E1000_STATUS_TXOFF 0x00000010 /* transmission paused */ ++#define E1000_STATUS_TBIMODE 0x00000020 /* TBI mode */ ++#define E1000_STATUS_SPEED_MASK 0x000000C0 ++#define E1000_STATUS_SPEED_10 0x00000000 /* Speed 10Mb/s */ ++#define E1000_STATUS_SPEED_100 0x00000040 /* Speed 100Mb/s */ ++#define E1000_STATUS_SPEED_1000 0x00000080 /* Speed 1000Mb/s */ ++#define E1000_STATUS_LAN_INIT_DONE 0x00000200 /* Lan Init Completion by NVM */ ++#define E1000_STATUS_ASDV 0x00000300 /* Auto speed detect value */ ++#define E1000_STATUS_DOCK_CI 0x00000800 /* Change in Dock/Undock state. Clear on write '0'. */ ++#define E1000_STATUS_GIO_MASTER_ENABLE 0x00080000 /* Status of Master requests. */ ++#define E1000_STATUS_MTXCKOK 0x00000400 /* MTX clock running OK */ ++#define E1000_STATUS_PCI66 0x00000800 /* In 66Mhz slot */ ++#define E1000_STATUS_BUS64 0x00001000 /* In 64 bit slot */ ++#define E1000_STATUS_PCIX_MODE 0x00002000 /* PCI-X mode */ ++#define E1000_STATUS_PCIX_SPEED 0x0000C000 /* PCI-X bus speed */ ++#define E1000_STATUS_BMC_SKU_0 0x00100000 /* BMC USB redirect disabled */ ++#define E1000_STATUS_BMC_SKU_1 0x00200000 /* BMC SRAM disabled */ ++#define E1000_STATUS_BMC_SKU_2 0x00400000 /* BMC SDRAM disabled */ ++#define E1000_STATUS_BMC_CRYPTO 0x00800000 /* BMC crypto disabled */ ++#define E1000_STATUS_BMC_LITE 0x01000000 /* BMC external code execution disabled */ ++#define E1000_STATUS_RGMII_ENABLE 0x02000000 /* RGMII disabled */ ++#define E1000_STATUS_FUSE_8 0x04000000 ++#define E1000_STATUS_FUSE_9 0x08000000 ++#define E1000_STATUS_SERDES0_DIS 0x10000000 /* SERDES disabled on port 0 */ ++#define E1000_STATUS_SERDES1_DIS 0x20000000 /* SERDES disabled on port 1 */ ++ ++/* Constants used to intrepret the masked PCI-X bus speed. */ ++#define E1000_STATUS_PCIX_SPEED_66 0x00000000 /* PCI-X bus speed 50-66 MHz */ ++#define E1000_STATUS_PCIX_SPEED_100 0x00004000 /* PCI-X bus speed 66-100 MHz */ ++#define E1000_STATUS_PCIX_SPEED_133 0x00008000 /* PCI-X bus speed 100-133 MHz */ ++ ++#define SPEED_10 10 ++#define SPEED_100 100 ++#define SPEED_1000 1000 ++#define HALF_DUPLEX 1 ++#define FULL_DUPLEX 2 ++ ++#define PHY_FORCE_TIME 20 ++ ++#define ADVERTISE_10_HALF 0x0001 ++#define ADVERTISE_10_FULL 0x0002 ++#define ADVERTISE_100_HALF 0x0004 ++#define ADVERTISE_100_FULL 0x0008 ++#define ADVERTISE_1000_HALF 0x0010 /* Not used, just FYI */ ++#define ADVERTISE_1000_FULL 0x0020 ++ ++/* 1000/H is not supported, nor spec-compliant. */ ++#define E1000_ALL_SPEED_DUPLEX ( ADVERTISE_10_HALF | ADVERTISE_10_FULL | \ ++ ADVERTISE_100_HALF | ADVERTISE_100_FULL | \ ++ ADVERTISE_1000_FULL) ++#define E1000_ALL_NOT_GIG ( ADVERTISE_10_HALF | ADVERTISE_10_FULL | \ ++ ADVERTISE_100_HALF | ADVERTISE_100_FULL) ++#define E1000_ALL_100_SPEED (ADVERTISE_100_HALF | ADVERTISE_100_FULL) ++#define E1000_ALL_10_SPEED (ADVERTISE_10_HALF | ADVERTISE_10_FULL) ++#define E1000_ALL_FULL_DUPLEX (ADVERTISE_10_FULL | ADVERTISE_100_FULL | \ ++ ADVERTISE_1000_FULL) ++#define E1000_ALL_HALF_DUPLEX (ADVERTISE_10_HALF | ADVERTISE_100_HALF) ++ ++#define AUTONEG_ADVERTISE_SPEED_DEFAULT E1000_ALL_SPEED_DUPLEX ++ ++/* LED Control */ ++#define E1000_LEDCTL_LED0_MODE_MASK 0x0000000F ++#define E1000_LEDCTL_LED0_MODE_SHIFT 0 ++#define E1000_LEDCTL_LED0_BLINK_RATE 0x00000020 ++#define E1000_LEDCTL_LED0_IVRT 0x00000040 ++#define E1000_LEDCTL_LED0_BLINK 0x00000080 ++#define E1000_LEDCTL_LED1_MODE_MASK 0x00000F00 ++#define E1000_LEDCTL_LED1_MODE_SHIFT 8 ++#define E1000_LEDCTL_LED1_BLINK_RATE 0x00002000 ++#define E1000_LEDCTL_LED1_IVRT 0x00004000 ++#define E1000_LEDCTL_LED1_BLINK 0x00008000 ++#define E1000_LEDCTL_LED2_MODE_MASK 0x000F0000 ++#define E1000_LEDCTL_LED2_MODE_SHIFT 16 ++#define E1000_LEDCTL_LED2_BLINK_RATE 0x00200000 ++#define E1000_LEDCTL_LED2_IVRT 0x00400000 ++#define E1000_LEDCTL_LED2_BLINK 0x00800000 ++#define E1000_LEDCTL_LED3_MODE_MASK 0x0F000000 ++#define E1000_LEDCTL_LED3_MODE_SHIFT 24 ++#define E1000_LEDCTL_LED3_BLINK_RATE 0x20000000 ++#define E1000_LEDCTL_LED3_IVRT 0x40000000 ++#define E1000_LEDCTL_LED3_BLINK 0x80000000 ++ ++#define E1000_LEDCTL_MODE_LINK_10_1000 0x0 ++#define E1000_LEDCTL_MODE_LINK_100_1000 0x1 ++#define E1000_LEDCTL_MODE_LINK_UP 0x2 ++#define E1000_LEDCTL_MODE_ACTIVITY 0x3 ++#define E1000_LEDCTL_MODE_LINK_ACTIVITY 0x4 ++#define E1000_LEDCTL_MODE_LINK_10 0x5 ++#define E1000_LEDCTL_MODE_LINK_100 0x6 ++#define E1000_LEDCTL_MODE_LINK_1000 0x7 ++#define E1000_LEDCTL_MODE_PCIX_MODE 0x8 ++#define E1000_LEDCTL_MODE_FULL_DUPLEX 0x9 ++#define E1000_LEDCTL_MODE_COLLISION 0xA ++#define E1000_LEDCTL_MODE_BUS_SPEED 0xB ++#define E1000_LEDCTL_MODE_BUS_SIZE 0xC ++#define E1000_LEDCTL_MODE_PAUSED 0xD ++#define E1000_LEDCTL_MODE_LED_ON 0xE ++#define E1000_LEDCTL_MODE_LED_OFF 0xF ++ ++/* Transmit Descriptor bit definitions */ ++#define E1000_TXD_DTYP_D 0x00100000 /* Data Descriptor */ ++#define E1000_TXD_DTYP_C 0x00000000 /* Context Descriptor */ ++#define E1000_TXD_POPTS_SHIFT 8 /* POPTS shift */ ++#define E1000_TXD_POPTS_IXSM 0x01 /* Insert IP checksum */ ++#define E1000_TXD_POPTS_TXSM 0x02 /* Insert TCP/UDP checksum */ ++#define E1000_TXD_CMD_EOP 0x01000000 /* End of Packet */ ++#define E1000_TXD_CMD_IFCS 0x02000000 /* Insert FCS (Ethernet CRC) */ ++#define E1000_TXD_CMD_IC 0x04000000 /* Insert Checksum */ ++#define E1000_TXD_CMD_RS 0x08000000 /* Report Status */ ++#define E1000_TXD_CMD_RPS 0x10000000 /* Report Packet Sent */ ++#define E1000_TXD_CMD_DEXT 0x20000000 /* Descriptor extension (0 = legacy) */ ++#define E1000_TXD_CMD_VLE 0x40000000 /* Add VLAN tag */ ++#define E1000_TXD_CMD_IDE 0x80000000 /* Enable Tidv register */ ++#define E1000_TXD_STAT_DD 0x00000001 /* Descriptor Done */ ++#define E1000_TXD_STAT_EC 0x00000002 /* Excess Collisions */ ++#define E1000_TXD_STAT_LC 0x00000004 /* Late Collisions */ ++#define E1000_TXD_STAT_TU 0x00000008 /* Transmit underrun */ ++#define E1000_TXD_CMD_TCP 0x01000000 /* TCP packet */ ++#define E1000_TXD_CMD_IP 0x02000000 /* IP packet */ ++#define E1000_TXD_CMD_TSE 0x04000000 /* TCP Seg enable */ ++#define E1000_TXD_STAT_TC 0x00000004 /* Tx Underrun */ ++/* Extended desc bits for Linksec and timesync */ ++ ++/* Transmit Control */ ++#define E1000_TCTL_RST 0x00000001 /* software reset */ ++#define E1000_TCTL_EN 0x00000002 /* enable tx */ ++#define E1000_TCTL_BCE 0x00000004 /* busy check enable */ ++#define E1000_TCTL_PSP 0x00000008 /* pad short packets */ ++#define E1000_TCTL_CT 0x00000ff0 /* collision threshold */ ++#define E1000_TCTL_COLD 0x003ff000 /* collision distance */ ++#define E1000_TCTL_SWXOFF 0x00400000 /* SW Xoff transmission */ ++#define E1000_TCTL_PBE 0x00800000 /* Packet Burst Enable */ ++#define E1000_TCTL_RTLC 0x01000000 /* Re-transmit on late collision */ ++#define E1000_TCTL_NRTU 0x02000000 /* No Re-transmit on underrun */ ++#define E1000_TCTL_MULR 0x10000000 /* Multiple request support */ ++ ++/* Transmit Arbitration Count */ ++#define E1000_TARC0_ENABLE 0x00000400 /* Enable Tx Queue 0 */ ++ ++/* SerDes Control */ ++#define E1000_SCTL_DISABLE_SERDES_LOOPBACK 0x0400 ++ ++/* Receive Checksum Control */ ++#define E1000_RXCSUM_PCSS_MASK 0x000000FF /* Packet Checksum Start */ ++#define E1000_RXCSUM_IPOFL 0x00000100 /* IPv4 checksum offload */ ++#define E1000_RXCSUM_TUOFL 0x00000200 /* TCP / UDP checksum offload */ ++#define E1000_RXCSUM_IPV6OFL 0x00000400 /* IPv6 checksum offload */ ++#define E1000_RXCSUM_CRCOFL 0x00000800 /* CRC32 offload enable */ ++#define E1000_RXCSUM_IPPCSE 0x00001000 /* IP payload checksum enable */ ++#define E1000_RXCSUM_PCSD 0x00002000 /* packet checksum disabled */ ++ ++/* Header split receive */ ++#define E1000_RFCTL_ISCSI_DIS 0x00000001 ++#define E1000_RFCTL_ISCSI_DWC_MASK 0x0000003E ++#define E1000_RFCTL_ISCSI_DWC_SHIFT 1 ++#define E1000_RFCTL_NFSW_DIS 0x00000040 ++#define E1000_RFCTL_NFSR_DIS 0x00000080 ++#define E1000_RFCTL_NFS_VER_MASK 0x00000300 ++#define E1000_RFCTL_NFS_VER_SHIFT 8 ++#define E1000_RFCTL_IPV6_DIS 0x00000400 ++#define E1000_RFCTL_IPV6_XSUM_DIS 0x00000800 ++#define E1000_RFCTL_ACK_DIS 0x00001000 ++#define E1000_RFCTL_ACKD_DIS 0x00002000 ++#define E1000_RFCTL_IPFRSP_DIS 0x00004000 ++#define E1000_RFCTL_EXTEN 0x00008000 ++#define E1000_RFCTL_IPV6_EX_DIS 0x00010000 ++#define E1000_RFCTL_NEW_IPV6_EXT_DIS 0x00020000 ++ ++/* Collision related configuration parameters */ ++#define E1000_COLLISION_THRESHOLD 15 ++#define E1000_CT_SHIFT 4 ++#define E1000_COLLISION_DISTANCE 63 ++#define E1000_COLD_SHIFT 12 ++ ++/* Default values for the transmit IPG register */ ++#define DEFAULT_82542_TIPG_IPGT 10 ++#define DEFAULT_82543_TIPG_IPGT_FIBER 9 ++#define DEFAULT_82543_TIPG_IPGT_COPPER 8 ++ ++#define E1000_TIPG_IPGT_MASK 0x000003FF ++#define E1000_TIPG_IPGR1_MASK 0x000FFC00 ++#define E1000_TIPG_IPGR2_MASK 0x3FF00000 ++ ++#define DEFAULT_82542_TIPG_IPGR1 2 ++#define DEFAULT_82543_TIPG_IPGR1 8 ++#define E1000_TIPG_IPGR1_SHIFT 10 ++ ++#define DEFAULT_82542_TIPG_IPGR2 10 ++#define DEFAULT_82543_TIPG_IPGR2 6 ++#define DEFAULT_80003ES2LAN_TIPG_IPGR2 7 ++#define E1000_TIPG_IPGR2_SHIFT 20 ++ ++/* Ethertype field values */ ++#define ETHERNET_IEEE_VLAN_TYPE 0x8100 /* 802.3ac packet */ ++ ++#define ETHERNET_FCS_SIZE 4 ++#define MAX_JUMBO_FRAME_SIZE 0x3F00 ++ ++/* Extended Configuration Control and Size */ ++#define E1000_EXTCNF_CTRL_MDIO_SW_OWNERSHIP 0x00000020 ++#define E1000_EXTCNF_CTRL_LCD_WRITE_ENABLE 0x00000001 ++#define E1000_EXTCNF_CTRL_SWFLAG 0x00000020 ++#define E1000_EXTCNF_SIZE_EXT_PCIE_LENGTH_MASK 0x00FF0000 ++#define E1000_EXTCNF_SIZE_EXT_PCIE_LENGTH_SHIFT 16 ++#define E1000_EXTCNF_CTRL_EXT_CNF_POINTER_MASK 0x0FFF0000 ++#define E1000_EXTCNF_CTRL_EXT_CNF_POINTER_SHIFT 16 ++ ++#define E1000_PHY_CTRL_SPD_EN 0x00000001 ++#define E1000_PHY_CTRL_D0A_LPLU 0x00000002 ++#define E1000_PHY_CTRL_NOND0A_LPLU 0x00000004 ++#define E1000_PHY_CTRL_NOND0A_GBE_DISABLE 0x00000008 ++#define E1000_PHY_CTRL_GBE_DISABLE 0x00000040 ++ ++#define E1000_KABGTXD_BGSQLBIAS 0x00050000 ++ ++/* PBA constants */ ++#define E1000_PBA_8K 0x0008 /* 8KB */ ++#define E1000_PBA_12K 0x000C /* 12KB */ ++#define E1000_PBA_16K 0x0010 /* 16KB */ ++#define E1000_PBA_20K 0x0014 ++#define E1000_PBA_22K 0x0016 ++#define E1000_PBA_24K 0x0018 ++#define E1000_PBA_30K 0x001E ++#define E1000_PBA_32K 0x0020 ++#define E1000_PBA_34K 0x0022 ++#define E1000_PBA_38K 0x0026 ++#define E1000_PBA_40K 0x0028 ++#define E1000_PBA_48K 0x0030 /* 48KB */ ++#define E1000_PBA_64K 0x0040 /* 64KB */ ++ ++#define E1000_PBS_16K E1000_PBA_16K ++#define E1000_PBS_24K E1000_PBA_24K ++ ++#define IFS_MAX 80 ++#define IFS_MIN 40 ++#define IFS_RATIO 4 ++#define IFS_STEP 10 ++#define MIN_NUM_XMITS 1000 ++ ++/* SW Semaphore Register */ ++#define E1000_SWSM_SMBI 0x00000001 /* Driver Semaphore bit */ ++#define E1000_SWSM_SWESMBI 0x00000002 /* FW Semaphore bit */ ++#define E1000_SWSM_WMNG 0x00000004 /* Wake MNG Clock */ ++#define E1000_SWSM_DRV_LOAD 0x00000008 /* Driver Loaded Bit */ ++ ++/* Interrupt Cause Read */ ++#define E1000_ICR_TXDW 0x00000001 /* Transmit desc written back */ ++#define E1000_ICR_TXQE 0x00000002 /* Transmit Queue empty */ ++#define E1000_ICR_LSC 0x00000004 /* Link Status Change */ ++#define E1000_ICR_RXSEQ 0x00000008 /* rx sequence error */ ++#define E1000_ICR_RXDMT0 0x00000010 /* rx desc min. threshold (0) */ ++#define E1000_ICR_RXO 0x00000040 /* rx overrun */ ++#define E1000_ICR_RXT0 0x00000080 /* rx timer intr (ring 0) */ ++#define E1000_ICR_MDAC 0x00000200 /* MDIO access complete */ ++#define E1000_ICR_RXCFG 0x00000400 /* Rx /c/ ordered set */ ++#define E1000_ICR_GPI_EN0 0x00000800 /* GP Int 0 */ ++#define E1000_ICR_GPI_EN1 0x00001000 /* GP Int 1 */ ++#define E1000_ICR_GPI_EN2 0x00002000 /* GP Int 2 */ ++#define E1000_ICR_GPI_EN3 0x00004000 /* GP Int 3 */ ++#define E1000_ICR_TXD_LOW 0x00008000 ++#define E1000_ICR_SRPD 0x00010000 ++#define E1000_ICR_ACK 0x00020000 /* Receive Ack frame */ ++#define E1000_ICR_MNG 0x00040000 /* Manageability event */ ++#define E1000_ICR_DOCK 0x00080000 /* Dock/Undock */ ++#define E1000_ICR_INT_ASSERTED 0x80000000 /* If this bit asserted, the driver should claim the interrupt */ ++#define E1000_ICR_RXD_FIFO_PAR0 0x00100000 /* queue 0 Rx descriptor FIFO parity error */ ++#define E1000_ICR_TXD_FIFO_PAR0 0x00200000 /* queue 0 Tx descriptor FIFO parity error */ ++#define E1000_ICR_HOST_ARB_PAR 0x00400000 /* host arb read buffer parity error */ ++#define E1000_ICR_PB_PAR 0x00800000 /* packet buffer parity error */ ++#define E1000_ICR_RXD_FIFO_PAR1 0x01000000 /* queue 1 Rx descriptor FIFO parity error */ ++#define E1000_ICR_TXD_FIFO_PAR1 0x02000000 /* queue 1 Tx descriptor FIFO parity error */ ++#define E1000_ICR_ALL_PARITY 0x03F00000 /* all parity error bits */ ++#define E1000_ICR_DSW 0x00000020 /* FW changed the status of DISSW bit in the FWSM */ ++#define E1000_ICR_PHYINT 0x00001000 /* LAN connected device generates an interrupt */ ++#define E1000_ICR_EPRST 0x00100000 /* ME handware reset occurs */ ++ ++/* Extended Interrupt Cause Read */ ++#define E1000_EICR_RX_QUEUE0 0x00000001 /* Rx Queue 0 Interrupt */ ++#define E1000_EICR_RX_QUEUE1 0x00000002 /* Rx Queue 1 Interrupt */ ++#define E1000_EICR_RX_QUEUE2 0x00000004 /* Rx Queue 2 Interrupt */ ++#define E1000_EICR_RX_QUEUE3 0x00000008 /* Rx Queue 3 Interrupt */ ++#define E1000_EICR_TX_QUEUE0 0x00000100 /* Tx Queue 0 Interrupt */ ++#define E1000_EICR_TX_QUEUE1 0x00000200 /* Tx Queue 1 Interrupt */ ++#define E1000_EICR_TX_QUEUE2 0x00000400 /* Tx Queue 2 Interrupt */ ++#define E1000_EICR_TX_QUEUE3 0x00000800 /* Tx Queue 3 Interrupt */ ++#define E1000_EICR_TCP_TIMER 0x40000000 /* TCP Timer */ ++#define E1000_EICR_OTHER 0x80000000 /* Interrupt Cause Active */ ++/* TCP Timer */ ++#define E1000_TCPTIMER_KS 0x00000100 /* KickStart */ ++#define E1000_TCPTIMER_COUNT_ENABLE 0x00000200 /* Count Enable */ ++#define E1000_TCPTIMER_COUNT_FINISH 0x00000400 /* Count finish */ ++#define E1000_TCPTIMER_LOOP 0x00000800 /* Loop */ ++ ++/* ++ * This defines the bits that are set in the Interrupt Mask ++ * Set/Read Register. Each bit is documented below: ++ * o RXDMT0 = Receive Descriptor Minimum Threshold hit (ring 0) ++ * o RXSEQ = Receive Sequence Error ++ */ ++#define POLL_IMS_ENABLE_MASK ( \ ++ E1000_IMS_RXDMT0 | \ ++ E1000_IMS_RXSEQ) ++ ++/* ++ * This defines the bits that are set in the Interrupt Mask ++ * Set/Read Register. Each bit is documented below: ++ * o RXT0 = Receiver Timer Interrupt (ring 0) ++ * o TXDW = Transmit Descriptor Written Back ++ * o RXDMT0 = Receive Descriptor Minimum Threshold hit (ring 0) ++ * o RXSEQ = Receive Sequence Error ++ * o LSC = Link Status Change ++ */ ++#define IMS_ENABLE_MASK ( \ ++ E1000_IMS_RXT0 | \ ++ E1000_IMS_TXDW | \ ++ E1000_IMS_RXDMT0 | \ ++ E1000_IMS_RXSEQ | \ ++ E1000_IMS_LSC) ++ ++/* Interrupt Mask Set */ ++#define E1000_IMS_TXDW E1000_ICR_TXDW /* Transmit desc written back */ ++#define E1000_IMS_TXQE E1000_ICR_TXQE /* Transmit Queue empty */ ++#define E1000_IMS_LSC E1000_ICR_LSC /* Link Status Change */ ++#define E1000_IMS_RXSEQ E1000_ICR_RXSEQ /* rx sequence error */ ++#define E1000_IMS_RXDMT0 E1000_ICR_RXDMT0 /* rx desc min. threshold */ ++#define E1000_IMS_RXO E1000_ICR_RXO /* rx overrun */ ++#define E1000_IMS_RXT0 E1000_ICR_RXT0 /* rx timer intr */ ++#define E1000_IMS_MDAC E1000_ICR_MDAC /* MDIO access complete */ ++#define E1000_IMS_RXCFG E1000_ICR_RXCFG /* Rx /c/ ordered set */ ++#define E1000_IMS_GPI_EN0 E1000_ICR_GPI_EN0 /* GP Int 0 */ ++#define E1000_IMS_GPI_EN1 E1000_ICR_GPI_EN1 /* GP Int 1 */ ++#define E1000_IMS_GPI_EN2 E1000_ICR_GPI_EN2 /* GP Int 2 */ ++#define E1000_IMS_GPI_EN3 E1000_ICR_GPI_EN3 /* GP Int 3 */ ++#define E1000_IMS_TXD_LOW E1000_ICR_TXD_LOW ++#define E1000_IMS_SRPD E1000_ICR_SRPD ++#define E1000_IMS_ACK E1000_ICR_ACK /* Receive Ack frame */ ++#define E1000_IMS_MNG E1000_ICR_MNG /* Manageability event */ ++#define E1000_IMS_DOCK E1000_ICR_DOCK /* Dock/Undock */ ++#define E1000_IMS_RXD_FIFO_PAR0 E1000_ICR_RXD_FIFO_PAR0 /* queue 0 Rx descriptor FIFO parity error */ ++#define E1000_IMS_TXD_FIFO_PAR0 E1000_ICR_TXD_FIFO_PAR0 /* queue 0 Tx descriptor FIFO parity error */ ++#define E1000_IMS_HOST_ARB_PAR E1000_ICR_HOST_ARB_PAR /* host arb read buffer parity error */ ++#define E1000_IMS_PB_PAR E1000_ICR_PB_PAR /* packet buffer parity error */ ++#define E1000_IMS_RXD_FIFO_PAR1 E1000_ICR_RXD_FIFO_PAR1 /* queue 1 Rx descriptor FIFO parity error */ ++#define E1000_IMS_TXD_FIFO_PAR1 E1000_ICR_TXD_FIFO_PAR1 /* queue 1 Tx descriptor FIFO parity error */ ++#define E1000_IMS_DSW E1000_ICR_DSW ++#define E1000_IMS_PHYINT E1000_ICR_PHYINT ++#define E1000_IMS_EPRST E1000_ICR_EPRST ++ ++/* Extended Interrupt Mask Set */ ++#define E1000_EIMS_RX_QUEUE0 E1000_EICR_RX_QUEUE0 /* Rx Queue 0 Interrupt */ ++#define E1000_EIMS_RX_QUEUE1 E1000_EICR_RX_QUEUE1 /* Rx Queue 1 Interrupt */ ++#define E1000_EIMS_RX_QUEUE2 E1000_EICR_RX_QUEUE2 /* Rx Queue 2 Interrupt */ ++#define E1000_EIMS_RX_QUEUE3 E1000_EICR_RX_QUEUE3 /* Rx Queue 3 Interrupt */ ++#define E1000_EIMS_TX_QUEUE0 E1000_EICR_TX_QUEUE0 /* Tx Queue 0 Interrupt */ ++#define E1000_EIMS_TX_QUEUE1 E1000_EICR_TX_QUEUE1 /* Tx Queue 1 Interrupt */ ++#define E1000_EIMS_TX_QUEUE2 E1000_EICR_TX_QUEUE2 /* Tx Queue 2 Interrupt */ ++#define E1000_EIMS_TX_QUEUE3 E1000_EICR_TX_QUEUE3 /* Tx Queue 3 Interrupt */ ++#define E1000_EIMS_TCP_TIMER E1000_EICR_TCP_TIMER /* TCP Timer */ ++#define E1000_EIMS_OTHER E1000_EICR_OTHER /* Interrupt Cause Active */ ++ ++/* Interrupt Cause Set */ ++#define E1000_ICS_TXDW E1000_ICR_TXDW /* Transmit desc written back */ ++#define E1000_ICS_TXQE E1000_ICR_TXQE /* Transmit Queue empty */ ++#define E1000_ICS_LSC E1000_ICR_LSC /* Link Status Change */ ++#define E1000_ICS_RXSEQ E1000_ICR_RXSEQ /* rx sequence error */ ++#define E1000_ICS_RXDMT0 E1000_ICR_RXDMT0 /* rx desc min. threshold */ ++#define E1000_ICS_RXO E1000_ICR_RXO /* rx overrun */ ++#define E1000_ICS_RXT0 E1000_ICR_RXT0 /* rx timer intr */ ++#define E1000_ICS_MDAC E1000_ICR_MDAC /* MDIO access complete */ ++#define E1000_ICS_RXCFG E1000_ICR_RXCFG /* Rx /c/ ordered set */ ++#define E1000_ICS_GPI_EN0 E1000_ICR_GPI_EN0 /* GP Int 0 */ ++#define E1000_ICS_GPI_EN1 E1000_ICR_GPI_EN1 /* GP Int 1 */ ++#define E1000_ICS_GPI_EN2 E1000_ICR_GPI_EN2 /* GP Int 2 */ ++#define E1000_ICS_GPI_EN3 E1000_ICR_GPI_EN3 /* GP Int 3 */ ++#define E1000_ICS_TXD_LOW E1000_ICR_TXD_LOW ++#define E1000_ICS_SRPD E1000_ICR_SRPD ++#define E1000_ICS_ACK E1000_ICR_ACK /* Receive Ack frame */ ++#define E1000_ICS_MNG E1000_ICR_MNG /* Manageability event */ ++#define E1000_ICS_DOCK E1000_ICR_DOCK /* Dock/Undock */ ++#define E1000_ICS_RXD_FIFO_PAR0 E1000_ICR_RXD_FIFO_PAR0 /* queue 0 Rx descriptor FIFO parity error */ ++#define E1000_ICS_TXD_FIFO_PAR0 E1000_ICR_TXD_FIFO_PAR0 /* queue 0 Tx descriptor FIFO parity error */ ++#define E1000_ICS_HOST_ARB_PAR E1000_ICR_HOST_ARB_PAR /* host arb read buffer parity error */ ++#define E1000_ICS_PB_PAR E1000_ICR_PB_PAR /* packet buffer parity error */ ++#define E1000_ICS_RXD_FIFO_PAR1 E1000_ICR_RXD_FIFO_PAR1 /* queue 1 Rx descriptor FIFO parity error */ ++#define E1000_ICS_TXD_FIFO_PAR1 E1000_ICR_TXD_FIFO_PAR1 /* queue 1 Tx descriptor FIFO parity error */ ++#define E1000_ICS_DSW E1000_ICR_DSW ++#define E1000_ICS_PHYINT E1000_ICR_PHYINT ++#define E1000_ICS_EPRST E1000_ICR_EPRST ++ ++/* Extended Interrupt Cause Set */ ++#define E1000_EICS_RX_QUEUE0 E1000_EICR_RX_QUEUE0 /* Rx Queue 0 Interrupt */ ++#define E1000_EICS_RX_QUEUE1 E1000_EICR_RX_QUEUE1 /* Rx Queue 1 Interrupt */ ++#define E1000_EICS_RX_QUEUE2 E1000_EICR_RX_QUEUE2 /* Rx Queue 2 Interrupt */ ++#define E1000_EICS_RX_QUEUE3 E1000_EICR_RX_QUEUE3 /* Rx Queue 3 Interrupt */ ++#define E1000_EICS_TX_QUEUE0 E1000_EICR_TX_QUEUE0 /* Tx Queue 0 Interrupt */ ++#define E1000_EICS_TX_QUEUE1 E1000_EICR_TX_QUEUE1 /* Tx Queue 1 Interrupt */ ++#define E1000_EICS_TX_QUEUE2 E1000_EICR_TX_QUEUE2 /* Tx Queue 2 Interrupt */ ++#define E1000_EICS_TX_QUEUE3 E1000_EICR_TX_QUEUE3 /* Tx Queue 3 Interrupt */ ++#define E1000_EICS_TCP_TIMER E1000_EICR_TCP_TIMER /* TCP Timer */ ++#define E1000_EICS_OTHER E1000_EICR_OTHER /* Interrupt Cause Active */ ++ ++/* Transmit Descriptor Control */ ++#define E1000_TXDCTL_PTHRESH 0x0000003F /* TXDCTL Prefetch Threshold */ ++#define E1000_TXDCTL_HTHRESH 0x00003F00 /* TXDCTL Host Threshold */ ++#define E1000_TXDCTL_WTHRESH 0x003F0000 /* TXDCTL Writeback Threshold */ ++#define E1000_TXDCTL_GRAN 0x01000000 /* TXDCTL Granularity */ ++#define E1000_TXDCTL_LWTHRESH 0xFE000000 /* TXDCTL Low Threshold */ ++#define E1000_TXDCTL_FULL_TX_DESC_WB 0x01010000 /* GRAN=1, WTHRESH=1 */ ++#define E1000_TXDCTL_MAX_TX_DESC_PREFETCH 0x0100001F /* GRAN=1, PTHRESH=31 */ ++/* Enable the counting of descriptors still to be processed. */ ++#define E1000_TXDCTL_COUNT_DESC 0x00400000 ++ ++/* Flow Control Constants */ ++#define FLOW_CONTROL_ADDRESS_LOW 0x00C28001 ++#define FLOW_CONTROL_ADDRESS_HIGH 0x00000100 ++#define FLOW_CONTROL_TYPE 0x8808 ++ ++/* 802.1q VLAN Packet Size */ ++#define VLAN_TAG_SIZE 4 /* 802.3ac tag (not DMA'd) */ ++#define E1000_VLAN_FILTER_TBL_SIZE 128 /* VLAN Filter Table (4096 bits) */ ++ ++/* Receive Address */ ++/* ++ * Number of high/low register pairs in the RAR. The RAR (Receive Address ++ * Registers) holds the directed and multicast addresses that we monitor. ++ * Technically, we have 16 spots. However, we reserve one of these spots ++ * (RAR[15]) for our directed address used by controllers with ++ * manageability enabled, allowing us room for 15 multicast addresses. ++ */ ++#define E1000_RAR_ENTRIES 15 ++#define E1000_RAH_AV 0x80000000 /* Receive descriptor valid */ ++ ++/* Error Codes */ ++#define E1000_SUCCESS 0 ++#define E1000_ERR_NVM 1 ++#define E1000_ERR_PHY 2 ++#define E1000_ERR_CONFIG 3 ++#define E1000_ERR_PARAM 4 ++#define E1000_ERR_MAC_INIT 5 ++#define E1000_ERR_PHY_TYPE 6 ++#define E1000_ERR_RESET 9 ++#define E1000_ERR_MASTER_REQUESTS_PENDING 10 ++#define E1000_ERR_HOST_INTERFACE_COMMAND 11 ++#define E1000_BLK_PHY_RESET 12 ++#define E1000_ERR_SWFW_SYNC 13 ++#define E1000_NOT_IMPLEMENTED 14 ++ ++/* Loop limit on how long we wait for auto-negotiation to complete */ ++#define FIBER_LINK_UP_LIMIT 50 ++#define COPPER_LINK_UP_LIMIT 10 ++#define PHY_AUTO_NEG_LIMIT 45 ++#define PHY_FORCE_LIMIT 20 ++/* Number of 100 microseconds we wait for PCI Express master disable */ ++#define MASTER_DISABLE_TIMEOUT 800 ++/* Number of milliseconds we wait for PHY configuration done after MAC reset */ ++#define PHY_CFG_TIMEOUT 100 ++/* Number of 2 milliseconds we wait for acquiring MDIO ownership. */ ++#define MDIO_OWNERSHIP_TIMEOUT 10 ++/* Number of milliseconds for NVM auto read done after MAC reset. */ ++#define AUTO_READ_DONE_TIMEOUT 10 ++ ++/* Flow Control */ ++#define E1000_FCRTH_RTH 0x0000FFF8 /* Mask Bits[15:3] for RTH */ ++#define E1000_FCRTH_XFCE 0x80000000 /* External Flow Control Enable */ ++#define E1000_FCRTL_RTL 0x0000FFF8 /* Mask Bits[15:3] for RTL */ ++#define E1000_FCRTL_XONE 0x80000000 /* Enable XON frame transmission */ ++ ++/* Transmit Configuration Word */ ++#define E1000_TXCW_FD 0x00000020 /* TXCW full duplex */ ++#define E1000_TXCW_HD 0x00000040 /* TXCW half duplex */ ++#define E1000_TXCW_PAUSE 0x00000080 /* TXCW sym pause request */ ++#define E1000_TXCW_ASM_DIR 0x00000100 /* TXCW astm pause direction */ ++#define E1000_TXCW_PAUSE_MASK 0x00000180 /* TXCW pause request mask */ ++#define E1000_TXCW_RF 0x00003000 /* TXCW remote fault */ ++#define E1000_TXCW_NP 0x00008000 /* TXCW next page */ ++#define E1000_TXCW_CW 0x0000ffff /* TxConfigWord mask */ ++#define E1000_TXCW_TXC 0x40000000 /* Transmit Config control */ ++#define E1000_TXCW_ANE 0x80000000 /* Auto-neg enable */ ++ ++/* Receive Configuration Word */ ++#define E1000_RXCW_CW 0x0000ffff /* RxConfigWord mask */ ++#define E1000_RXCW_NC 0x04000000 /* Receive config no carrier */ ++#define E1000_RXCW_IV 0x08000000 /* Receive config invalid */ ++#define E1000_RXCW_CC 0x10000000 /* Receive config change */ ++#define E1000_RXCW_C 0x20000000 /* Receive config */ ++#define E1000_RXCW_SYNCH 0x40000000 /* Receive config synch */ ++#define E1000_RXCW_ANC 0x80000000 /* Auto-neg complete */ ++ ++/* PCI Express Control */ ++#define E1000_GCR_RXD_NO_SNOOP 0x00000001 ++#define E1000_GCR_RXDSCW_NO_SNOOP 0x00000002 ++#define E1000_GCR_RXDSCR_NO_SNOOP 0x00000004 ++#define E1000_GCR_TXD_NO_SNOOP 0x00000008 ++#define E1000_GCR_TXDSCW_NO_SNOOP 0x00000010 ++#define E1000_GCR_TXDSCR_NO_SNOOP 0x00000020 ++ ++#define PCIE_NO_SNOOP_ALL (E1000_GCR_RXD_NO_SNOOP | \ ++ E1000_GCR_RXDSCW_NO_SNOOP | \ ++ E1000_GCR_RXDSCR_NO_SNOOP | \ ++ E1000_GCR_TXD_NO_SNOOP | \ ++ E1000_GCR_TXDSCW_NO_SNOOP | \ ++ E1000_GCR_TXDSCR_NO_SNOOP) ++ ++/* PHY Control Register */ ++#define MII_CR_SPEED_SELECT_MSB 0x0040 /* bits 6,13: 10=1000, 01=100, 00=10 */ ++#define MII_CR_COLL_TEST_ENABLE 0x0080 /* Collision test enable */ ++#define MII_CR_FULL_DUPLEX 0x0100 /* FDX =1, half duplex =0 */ ++#define MII_CR_RESTART_AUTO_NEG 0x0200 /* Restart auto negotiation */ ++#define MII_CR_ISOLATE 0x0400 /* Isolate PHY from MII */ ++#define MII_CR_POWER_DOWN 0x0800 /* Power down */ ++#define MII_CR_AUTO_NEG_EN 0x1000 /* Auto Neg Enable */ ++#define MII_CR_SPEED_SELECT_LSB 0x2000 /* bits 6,13: 10=1000, 01=100, 00=10 */ ++#define MII_CR_LOOPBACK 0x4000 /* 0 = normal, 1 = loopback */ ++#define MII_CR_RESET 0x8000 /* 0 = normal, 1 = PHY reset */ ++#define MII_CR_SPEED_1000 0x0040 ++#define MII_CR_SPEED_100 0x2000 ++#define MII_CR_SPEED_10 0x0000 ++ ++/* PHY Status Register */ ++#define MII_SR_EXTENDED_CAPS 0x0001 /* Extended register capabilities */ ++#define MII_SR_JABBER_DETECT 0x0002 /* Jabber Detected */ ++#define MII_SR_LINK_STATUS 0x0004 /* Link Status 1 = link */ ++#define MII_SR_AUTONEG_CAPS 0x0008 /* Auto Neg Capable */ ++#define MII_SR_REMOTE_FAULT 0x0010 /* Remote Fault Detect */ ++#define MII_SR_AUTONEG_COMPLETE 0x0020 /* Auto Neg Complete */ ++#define MII_SR_PREAMBLE_SUPPRESS 0x0040 /* Preamble may be suppressed */ ++#define MII_SR_EXTENDED_STATUS 0x0100 /* Ext. status info in Reg 0x0F */ ++#define MII_SR_100T2_HD_CAPS 0x0200 /* 100T2 Half Duplex Capable */ ++#define MII_SR_100T2_FD_CAPS 0x0400 /* 100T2 Full Duplex Capable */ ++#define MII_SR_10T_HD_CAPS 0x0800 /* 10T Half Duplex Capable */ ++#define MII_SR_10T_FD_CAPS 0x1000 /* 10T Full Duplex Capable */ ++#define MII_SR_100X_HD_CAPS 0x2000 /* 100X Half Duplex Capable */ ++#define MII_SR_100X_FD_CAPS 0x4000 /* 100X Full Duplex Capable */ ++#define MII_SR_100T4_CAPS 0x8000 /* 100T4 Capable */ ++ ++/* Autoneg Advertisement Register */ ++#define NWAY_AR_SELECTOR_FIELD 0x0001 /* indicates IEEE 802.3 CSMA/CD */ ++#define NWAY_AR_10T_HD_CAPS 0x0020 /* 10T Half Duplex Capable */ ++#define NWAY_AR_10T_FD_CAPS 0x0040 /* 10T Full Duplex Capable */ ++#define NWAY_AR_100TX_HD_CAPS 0x0080 /* 100TX Half Duplex Capable */ ++#define NWAY_AR_100TX_FD_CAPS 0x0100 /* 100TX Full Duplex Capable */ ++#define NWAY_AR_100T4_CAPS 0x0200 /* 100T4 Capable */ ++#define NWAY_AR_PAUSE 0x0400 /* Pause operation desired */ ++#define NWAY_AR_ASM_DIR 0x0800 /* Asymmetric Pause Direction bit */ ++#define NWAY_AR_REMOTE_FAULT 0x2000 /* Remote Fault detected */ ++#define NWAY_AR_NEXT_PAGE 0x8000 /* Next Page ability supported */ ++ ++/* Link Partner Ability Register (Base Page) */ ++#define NWAY_LPAR_SELECTOR_FIELD 0x0000 /* LP protocol selector field */ ++#define NWAY_LPAR_10T_HD_CAPS 0x0020 /* LP is 10T Half Duplex Capable */ ++#define NWAY_LPAR_10T_FD_CAPS 0x0040 /* LP is 10T Full Duplex Capable */ ++#define NWAY_LPAR_100TX_HD_CAPS 0x0080 /* LP is 100TX Half Duplex Capable */ ++#define NWAY_LPAR_100TX_FD_CAPS 0x0100 /* LP is 100TX Full Duplex Capable */ ++#define NWAY_LPAR_100T4_CAPS 0x0200 /* LP is 100T4 Capable */ ++#define NWAY_LPAR_PAUSE 0x0400 /* LP Pause operation desired */ ++#define NWAY_LPAR_ASM_DIR 0x0800 /* LP Asymmetric Pause Direction bit */ ++#define NWAY_LPAR_REMOTE_FAULT 0x2000 /* LP has detected Remote Fault */ ++#define NWAY_LPAR_ACKNOWLEDGE 0x4000 /* LP has rx'd link code word */ ++#define NWAY_LPAR_NEXT_PAGE 0x8000 /* Next Page ability supported */ ++ ++/* Autoneg Expansion Register */ ++#define NWAY_ER_LP_NWAY_CAPS 0x0001 /* LP has Auto Neg Capability */ ++#define NWAY_ER_PAGE_RXD 0x0002 /* LP is 10T Half Duplex Capable */ ++#define NWAY_ER_NEXT_PAGE_CAPS 0x0004 /* LP is 10T Full Duplex Capable */ ++#define NWAY_ER_LP_NEXT_PAGE_CAPS 0x0008 /* LP is 100TX Half Duplex Capable */ ++#define NWAY_ER_PAR_DETECT_FAULT 0x0010 /* LP is 100TX Full Duplex Capable */ ++ ++/* 1000BASE-T Control Register */ ++#define CR_1000T_ASYM_PAUSE 0x0080 /* Advertise asymmetric pause bit */ ++#define CR_1000T_HD_CAPS 0x0100 /* Advertise 1000T HD capability */ ++#define CR_1000T_FD_CAPS 0x0200 /* Advertise 1000T FD capability */ ++#define CR_1000T_REPEATER_DTE 0x0400 /* 1=Repeater/switch device port */ ++ /* 0=DTE device */ ++#define CR_1000T_MS_VALUE 0x0800 /* 1=Configure PHY as Master */ ++ /* 0=Configure PHY as Slave */ ++#define CR_1000T_MS_ENABLE 0x1000 /* 1=Master/Slave manual config value */ ++ /* 0=Automatic Master/Slave config */ ++#define CR_1000T_TEST_MODE_NORMAL 0x0000 /* Normal Operation */ ++#define CR_1000T_TEST_MODE_1 0x2000 /* Transmit Waveform test */ ++#define CR_1000T_TEST_MODE_2 0x4000 /* Master Transmit Jitter test */ ++#define CR_1000T_TEST_MODE_3 0x6000 /* Slave Transmit Jitter test */ ++#define CR_1000T_TEST_MODE_4 0x8000 /* Transmitter Distortion test */ ++ ++/* 1000BASE-T Status Register */ ++#define SR_1000T_IDLE_ERROR_CNT 0x00FF /* Num idle errors since last read */ ++#define SR_1000T_ASYM_PAUSE_DIR 0x0100 /* LP asymmetric pause direction bit */ ++#define SR_1000T_LP_HD_CAPS 0x0400 /* LP is 1000T HD capable */ ++#define SR_1000T_LP_FD_CAPS 0x0800 /* LP is 1000T FD capable */ ++#define SR_1000T_REMOTE_RX_STATUS 0x1000 /* Remote receiver OK */ ++#define SR_1000T_LOCAL_RX_STATUS 0x2000 /* Local receiver OK */ ++#define SR_1000T_MS_CONFIG_RES 0x4000 /* 1=Local Tx is Master, 0=Slave */ ++#define SR_1000T_MS_CONFIG_FAULT 0x8000 /* Master/Slave config fault */ ++ ++#define SR_1000T_PHY_EXCESSIVE_IDLE_ERR_COUNT 5 ++ ++/* PHY 1000 MII Register/Bit Definitions */ ++/* PHY Registers defined by IEEE */ ++#define PHY_CONTROL 0x00 /* Control Register */ ++#define PHY_STATUS 0x01 /* Status Regiser */ ++#define PHY_ID1 0x02 /* Phy Id Reg (word 1) */ ++#define PHY_ID2 0x03 /* Phy Id Reg (word 2) */ ++#define PHY_AUTONEG_ADV 0x04 /* Autoneg Advertisement */ ++#define PHY_LP_ABILITY 0x05 /* Link Partner Ability (Base Page) */ ++#define PHY_AUTONEG_EXP 0x06 /* Autoneg Expansion Reg */ ++#define PHY_NEXT_PAGE_TX 0x07 /* Next Page Tx */ ++#define PHY_LP_NEXT_PAGE 0x08 /* Link Partner Next Page */ ++#define PHY_1000T_CTRL 0x09 /* 1000Base-T Control Reg */ ++#define PHY_1000T_STATUS 0x0A /* 1000Base-T Status Reg */ ++#define PHY_EXT_STATUS 0x0F /* Extended Status Reg */ ++ ++/* NVM Control */ ++#define E1000_EECD_SK 0x00000001 /* NVM Clock */ ++#define E1000_EECD_CS 0x00000002 /* NVM Chip Select */ ++#define E1000_EECD_DI 0x00000004 /* NVM Data In */ ++#define E1000_EECD_DO 0x00000008 /* NVM Data Out */ ++#define E1000_EECD_FWE_MASK 0x00000030 ++#define E1000_EECD_FWE_DIS 0x00000010 /* Disable FLASH writes */ ++#define E1000_EECD_FWE_EN 0x00000020 /* Enable FLASH writes */ ++#define E1000_EECD_FWE_SHIFT 4 ++#define E1000_EECD_REQ 0x00000040 /* NVM Access Request */ ++#define E1000_EECD_GNT 0x00000080 /* NVM Access Grant */ ++#define E1000_EECD_PRES 0x00000100 /* NVM Present */ ++#define E1000_EECD_SIZE 0x00000200 /* NVM Size (0=64 word 1=256 word) */ ++/* NVM Addressing bits based on type 0=small, 1=large */ ++#define E1000_EECD_ADDR_BITS 0x00000400 ++#define E1000_EECD_TYPE 0x00002000 /* NVM Type (1-SPI, 0-Microwire) */ ++#define E1000_NVM_GRANT_ATTEMPTS 1000 /* NVM # attempts to gain grant */ ++#define E1000_EECD_AUTO_RD 0x00000200 /* NVM Auto Read done */ ++#define E1000_EECD_SIZE_EX_MASK 0x00007800 /* NVM Size */ ++#define E1000_EECD_SIZE_EX_SHIFT 11 ++#define E1000_EECD_NVADDS 0x00018000 /* NVM Address Size */ ++#define E1000_EECD_SELSHAD 0x00020000 /* Select Shadow RAM */ ++#define E1000_EECD_INITSRAM 0x00040000 /* Initialize Shadow RAM */ ++#define E1000_EECD_FLUPD 0x00080000 /* Update FLASH */ ++#define E1000_EECD_AUPDEN 0x00100000 /* Enable Autonomous FLASH update */ ++#define E1000_EECD_SHADV 0x00200000 /* Shadow RAM Data Valid */ ++#define E1000_EECD_SEC1VAL 0x00400000 /* Sector One Valid */ ++#define E1000_EECD_SECVAL_SHIFT 22 ++ ++#define E1000_NVM_SWDPIN0 0x0001 /* SWDPIN 0 NVM Value */ ++#define E1000_NVM_LED_LOGIC 0x0020 /* Led Logic Word */ ++#define E1000_NVM_RW_REG_DATA 16 /* Offset to data in NVM read/write registers */ ++#define E1000_NVM_RW_REG_DONE 2 /* Offset to READ/WRITE done bit */ ++#define E1000_NVM_RW_REG_START 1 /* Start operation */ ++#define E1000_NVM_RW_ADDR_SHIFT 2 /* Shift to the address bits */ ++#define E1000_NVM_POLL_WRITE 1 /* Flag for polling for write complete */ ++#define E1000_NVM_POLL_READ 0 /* Flag for polling for read complete */ ++#define E1000_FLASH_UPDATES 2000 ++ ++/* NVM Word Offsets */ ++#define NVM_COMPAT 0x0003 ++#define NVM_ID_LED_SETTINGS 0x0004 ++#define NVM_VERSION 0x0005 ++#define NVM_SERDES_AMPLITUDE 0x0006 /* For SERDES output amplitude adjustment. */ ++#define NVM_PHY_CLASS_WORD 0x0007 ++#define NVM_INIT_CONTROL1_REG 0x000A ++#define NVM_INIT_CONTROL2_REG 0x000F ++#define NVM_SWDEF_PINS_CTRL_PORT_1 0x0010 ++#define NVM_INIT_CONTROL3_PORT_B 0x0014 ++#define NVM_INIT_3GIO_3 0x001A ++#define NVM_SWDEF_PINS_CTRL_PORT_0 0x0020 ++#define NVM_INIT_CONTROL3_PORT_A 0x0024 ++#define NVM_CFG 0x0012 ++#define NVM_FLASH_VERSION 0x0032 ++#define NVM_ALT_MAC_ADDR_PTR 0x0037 ++#define NVM_CHECKSUM_REG 0x003F ++ ++#define E1000_NVM_CFG_DONE_PORT_0 0x40000 /* MNG config cycle done */ ++#define E1000_NVM_CFG_DONE_PORT_1 0x80000 /* ...for second port */ ++ ++/* Mask bits for fields in Word 0x0f of the NVM */ ++#define NVM_WORD0F_PAUSE_MASK 0x3000 ++#define NVM_WORD0F_PAUSE 0x1000 ++#define NVM_WORD0F_ASM_DIR 0x2000 ++#define NVM_WORD0F_ANE 0x0800 ++#define NVM_WORD0F_SWPDIO_EXT_MASK 0x00F0 ++#define NVM_WORD0F_LPLU 0x0001 ++ ++/* Mask bits for fields in Word 0x1a of the NVM */ ++#define NVM_WORD1A_ASPM_MASK 0x000C ++ ++/* For checksumming, the sum of all words in the NVM should equal 0xBABA. */ ++#define NVM_SUM 0xBABA ++ ++#define NVM_MAC_ADDR_OFFSET 0 ++#define NVM_PBA_OFFSET_0 8 ++#define NVM_PBA_OFFSET_1 9 ++#define NVM_RESERVED_WORD 0xFFFF ++#define NVM_PHY_CLASS_A 0x8000 ++#define NVM_SERDES_AMPLITUDE_MASK 0x000F ++#define NVM_SIZE_MASK 0x1C00 ++#define NVM_SIZE_SHIFT 10 ++#define NVM_WORD_SIZE_BASE_SHIFT 6 ++#define NVM_SWDPIO_EXT_SHIFT 4 ++ ++/* NVM Commands - Microwire */ ++#define NVM_READ_OPCODE_MICROWIRE 0x6 /* NVM read opcode */ ++#define NVM_WRITE_OPCODE_MICROWIRE 0x5 /* NVM write opcode */ ++#define NVM_ERASE_OPCODE_MICROWIRE 0x7 /* NVM erase opcode */ ++#define NVM_EWEN_OPCODE_MICROWIRE 0x13 /* NVM erase/write enable */ ++#define NVM_EWDS_OPCODE_MICROWIRE 0x10 /* NVM erast/write disable */ ++ ++/* NVM Commands - SPI */ ++#define NVM_MAX_RETRY_SPI 5000 /* Max wait of 5ms, for RDY signal */ ++#define NVM_READ_OPCODE_SPI 0x03 /* NVM read opcode */ ++#define NVM_WRITE_OPCODE_SPI 0x02 /* NVM write opcode */ ++#define NVM_A8_OPCODE_SPI 0x08 /* opcode bit-3 = address bit-8 */ ++#define NVM_WREN_OPCODE_SPI 0x06 /* NVM set Write Enable latch */ ++#define NVM_WRDI_OPCODE_SPI 0x04 /* NVM reset Write Enable latch */ ++#define NVM_RDSR_OPCODE_SPI 0x05 /* NVM read Status register */ ++#define NVM_WRSR_OPCODE_SPI 0x01 /* NVM write Status register */ ++ ++/* SPI NVM Status Register */ ++#define NVM_STATUS_RDY_SPI 0x01 ++#define NVM_STATUS_WEN_SPI 0x02 ++#define NVM_STATUS_BP0_SPI 0x04 ++#define NVM_STATUS_BP1_SPI 0x08 ++#define NVM_STATUS_WPEN_SPI 0x80 ++ ++/* Word definitions for ID LED Settings */ ++#define ID_LED_RESERVED_0000 0x0000 ++#define ID_LED_RESERVED_FFFF 0xFFFF ++#define ID_LED_DEFAULT ((ID_LED_OFF1_ON2 << 12) | \ ++ (ID_LED_OFF1_OFF2 << 8) | \ ++ (ID_LED_DEF1_DEF2 << 4) | \ ++ (ID_LED_DEF1_DEF2)) ++#define ID_LED_DEF1_DEF2 0x1 ++#define ID_LED_DEF1_ON2 0x2 ++#define ID_LED_DEF1_OFF2 0x3 ++#define ID_LED_ON1_DEF2 0x4 ++#define ID_LED_ON1_ON2 0x5 ++#define ID_LED_ON1_OFF2 0x6 ++#define ID_LED_OFF1_DEF2 0x7 ++#define ID_LED_OFF1_ON2 0x8 ++#define ID_LED_OFF1_OFF2 0x9 ++ ++#define IGP_ACTIVITY_LED_MASK 0xFFFFF0FF ++#define IGP_ACTIVITY_LED_ENABLE 0x0300 ++#define IGP_LED3_MODE 0x07000000 ++ ++/* PCI/PCI-X/PCI-EX Config space */ ++#define PCIX_COMMAND_REGISTER 0xE6 ++#define PCIX_STATUS_REGISTER_LO 0xE8 ++#define PCIX_STATUS_REGISTER_HI 0xEA ++#define PCI_HEADER_TYPE_REGISTER 0x0E ++#define PCIE_LINK_STATUS 0x12 ++ ++#define PCIX_COMMAND_MMRBC_MASK 0x000C ++#define PCIX_COMMAND_MMRBC_SHIFT 0x2 ++#define PCIX_STATUS_HI_MMRBC_MASK 0x0060 ++#define PCIX_STATUS_HI_MMRBC_SHIFT 0x5 ++#define PCIX_STATUS_HI_MMRBC_4K 0x3 ++#define PCIX_STATUS_HI_MMRBC_2K 0x2 ++#define PCIX_STATUS_LO_FUNC_MASK 0x7 ++#define PCI_HEADER_TYPE_MULTIFUNC 0x80 ++#define PCIE_LINK_WIDTH_MASK 0x3F0 ++#define PCIE_LINK_WIDTH_SHIFT 4 ++ ++#ifndef ETH_ADDR_LEN ++#define ETH_ADDR_LEN 6 ++#endif ++ ++#define PHY_REVISION_MASK 0xFFFFFFF0 ++#define MAX_PHY_REG_ADDRESS 0x1F /* 5 bit address bus (0-0x1F) */ ++#define MAX_PHY_MULTI_PAGE_REG 0xF ++ ++/* Bit definitions for valid PHY IDs. */ ++/* ++ * I = Integrated ++ * E = External ++ */ ++#define M88E1000_E_PHY_ID 0x01410C50 ++#define M88E1000_I_PHY_ID 0x01410C30 ++#define M88E1011_I_PHY_ID 0x01410C20 ++#define IGP01E1000_I_PHY_ID 0x02A80380 ++#define M88E1011_I_REV_4 0x04 ++#define M88E1111_I_PHY_ID 0x01410CC0 ++#define GG82563_E_PHY_ID 0x01410CA0 ++#define IGP03E1000_E_PHY_ID 0x02A80390 ++#define IFE_E_PHY_ID 0x02A80330 ++#define IFE_PLUS_E_PHY_ID 0x02A80320 ++#define IFE_C_E_PHY_ID 0x02A80310 ++#define M88_VENDOR 0x0141 ++ ++/* M88E1000 Specific Registers */ ++#define M88E1000_PHY_SPEC_CTRL 0x10 /* PHY Specific Control Register */ ++#define M88E1000_PHY_SPEC_STATUS 0x11 /* PHY Specific Status Register */ ++#define M88E1000_INT_ENABLE 0x12 /* Interrupt Enable Register */ ++#define M88E1000_INT_STATUS 0x13 /* Interrupt Status Register */ ++#define M88E1000_EXT_PHY_SPEC_CTRL 0x14 /* Extended PHY Specific Control */ ++#define M88E1000_RX_ERR_CNTR 0x15 /* Receive Error Counter */ ++ ++#define M88E1000_PHY_EXT_CTRL 0x1A /* PHY extend control register */ ++#define M88E1000_PHY_PAGE_SELECT 0x1D /* Reg 29 for page number setting */ ++#define M88E1000_PHY_GEN_CONTROL 0x1E /* Its meaning depends on reg 29 */ ++#define M88E1000_PHY_VCO_REG_BIT8 0x100 /* Bits 8 & 11 are adjusted for */ ++#define M88E1000_PHY_VCO_REG_BIT11 0x800 /* improved BER performance */ ++ ++/* M88E1000 PHY Specific Control Register */ ++#define M88E1000_PSCR_JABBER_DISABLE 0x0001 /* 1=Jabber Function disabled */ ++#define M88E1000_PSCR_POLARITY_REVERSAL 0x0002 /* 1=Polarity Reversal enabled */ ++#define M88E1000_PSCR_SQE_TEST 0x0004 /* 1=SQE Test enabled */ ++/* 1=CLK125 low, 0=CLK125 toggling */ ++#define M88E1000_PSCR_CLK125_DISABLE 0x0010 ++#define M88E1000_PSCR_MDI_MANUAL_MODE 0x0000 /* MDI Crossover Mode bits 6:5 */ ++ /* Manual MDI configuration */ ++#define M88E1000_PSCR_MDIX_MANUAL_MODE 0x0020 /* Manual MDIX configuration */ ++/* 1000BASE-T: Auto crossover, 100BASE-TX/10BASE-T: MDI Mode */ ++#define M88E1000_PSCR_AUTO_X_1000T 0x0040 ++/* Auto crossover enabled all speeds */ ++#define M88E1000_PSCR_AUTO_X_MODE 0x0060 ++/* ++ * 1=Enable Extended 10BASE-T distance (Lower 10BASE-T Rx Threshold ++ * 0=Normal 10BASE-T Rx Threshold ++ */ ++#define M88E1000_PSCR_EN_10BT_EXT_DIST 0x0080 ++/* 1=5-bit interface in 100BASE-TX, 0=MII interface in 100BASE-TX */ ++#define M88E1000_PSCR_MII_5BIT_ENABLE 0x0100 ++#define M88E1000_PSCR_SCRAMBLER_DISABLE 0x0200 /* 1=Scrambler disable */ ++#define M88E1000_PSCR_FORCE_LINK_GOOD 0x0400 /* 1=Force link good */ ++#define M88E1000_PSCR_ASSERT_CRS_ON_TX 0x0800 /* 1=Assert CRS on Transmit */ ++ ++/* M88E1000 PHY Specific Status Register */ ++#define M88E1000_PSSR_JABBER 0x0001 /* 1=Jabber */ ++#define M88E1000_PSSR_REV_POLARITY 0x0002 /* 1=Polarity reversed */ ++#define M88E1000_PSSR_DOWNSHIFT 0x0020 /* 1=Downshifted */ ++#define M88E1000_PSSR_MDIX 0x0040 /* 1=MDIX; 0=MDI */ ++/* ++ * 0 = <50M ++ * 1 = 50-80M ++ * 2 = 80-110M ++ * 3 = 110-140M ++ * 4 = >140M ++ */ ++#define M88E1000_PSSR_CABLE_LENGTH 0x0380 ++#define M88E1000_PSSR_LINK 0x0400 /* 1=Link up, 0=Link down */ ++#define M88E1000_PSSR_SPD_DPLX_RESOLVED 0x0800 /* 1=Speed & Duplex resolved */ ++#define M88E1000_PSSR_PAGE_RCVD 0x1000 /* 1=Page received */ ++#define M88E1000_PSSR_DPLX 0x2000 /* 1=Duplex 0=Half Duplex */ ++#define M88E1000_PSSR_SPEED 0xC000 /* Speed, bits 14:15 */ ++#define M88E1000_PSSR_10MBS 0x0000 /* 00=10Mbs */ ++#define M88E1000_PSSR_100MBS 0x4000 /* 01=100Mbs */ ++#define M88E1000_PSSR_1000MBS 0x8000 /* 10=1000Mbs */ ++ ++#define M88E1000_PSSR_CABLE_LENGTH_SHIFT 7 ++ ++/* M88E1000 Extended PHY Specific Control Register */ ++#define M88E1000_EPSCR_FIBER_LOOPBACK 0x4000 /* 1=Fiber loopback */ ++/* ++ * 1 = Lost lock detect enabled. ++ * Will assert lost lock and bring ++ * link down if idle not seen ++ * within 1ms in 1000BASE-T ++ */ ++#define M88E1000_EPSCR_DOWN_NO_IDLE 0x8000 ++/* ++ * Number of times we will attempt to autonegotiate before downshifting if we ++ * are the master ++ */ ++#define M88E1000_EPSCR_MASTER_DOWNSHIFT_MASK 0x0C00 ++#define M88E1000_EPSCR_MASTER_DOWNSHIFT_1X 0x0000 ++#define M88E1000_EPSCR_MASTER_DOWNSHIFT_2X 0x0400 ++#define M88E1000_EPSCR_MASTER_DOWNSHIFT_3X 0x0800 ++#define M88E1000_EPSCR_MASTER_DOWNSHIFT_4X 0x0C00 ++/* ++ * Number of times we will attempt to autonegotiate before downshifting if we ++ * are the slave ++ */ ++#define M88E1000_EPSCR_SLAVE_DOWNSHIFT_MASK 0x0300 ++#define M88E1000_EPSCR_SLAVE_DOWNSHIFT_DIS 0x0000 ++#define M88E1000_EPSCR_SLAVE_DOWNSHIFT_1X 0x0100 ++#define M88E1000_EPSCR_SLAVE_DOWNSHIFT_2X 0x0200 ++#define M88E1000_EPSCR_SLAVE_DOWNSHIFT_3X 0x0300 ++#define M88E1000_EPSCR_TX_CLK_2_5 0x0060 /* 2.5 MHz TX_CLK */ ++#define M88E1000_EPSCR_TX_CLK_25 0x0070 /* 25 MHz TX_CLK */ ++#define M88E1000_EPSCR_TX_CLK_0 0x0000 /* NO TX_CLK */ ++ ++/* M88EC018 Rev 2 specific DownShift settings */ ++#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_MASK 0x0E00 ++#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_1X 0x0000 ++#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_2X 0x0200 ++#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_3X 0x0400 ++#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_4X 0x0600 ++#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_5X 0x0800 ++#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_6X 0x0A00 ++#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_7X 0x0C00 ++#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_8X 0x0E00 ++ ++/* ++ * Bits... ++ * 15-5: page ++ * 4-0: register offset ++ */ ++#define GG82563_PAGE_SHIFT 5 ++#define GG82563_REG(page, reg) \ ++ (((page) << GG82563_PAGE_SHIFT) | ((reg) & MAX_PHY_REG_ADDRESS)) ++#define GG82563_MIN_ALT_REG 30 ++ ++/* GG82563 Specific Registers */ ++#define GG82563_PHY_SPEC_CTRL \ ++ GG82563_REG(0, 16) /* PHY Specific Control */ ++#define GG82563_PHY_SPEC_STATUS \ ++ GG82563_REG(0, 17) /* PHY Specific Status */ ++#define GG82563_PHY_INT_ENABLE \ ++ GG82563_REG(0, 18) /* Interrupt Enable */ ++#define GG82563_PHY_SPEC_STATUS_2 \ ++ GG82563_REG(0, 19) /* PHY Specific Status 2 */ ++#define GG82563_PHY_RX_ERR_CNTR \ ++ GG82563_REG(0, 21) /* Receive Error Counter */ ++#define GG82563_PHY_PAGE_SELECT \ ++ GG82563_REG(0, 22) /* Page Select */ ++#define GG82563_PHY_SPEC_CTRL_2 \ ++ GG82563_REG(0, 26) /* PHY Specific Control 2 */ ++#define GG82563_PHY_PAGE_SELECT_ALT \ ++ GG82563_REG(0, 29) /* Alternate Page Select */ ++#define GG82563_PHY_TEST_CLK_CTRL \ ++ GG82563_REG(0, 30) /* Test Clock Control (use reg. 29 to select) */ ++ ++#define GG82563_PHY_MAC_SPEC_CTRL \ ++ GG82563_REG(2, 21) /* MAC Specific Control Register */ ++#define GG82563_PHY_MAC_SPEC_CTRL_2 \ ++ GG82563_REG(2, 26) /* MAC Specific Control 2 */ ++ ++#define GG82563_PHY_DSP_DISTANCE \ ++ GG82563_REG(5, 26) /* DSP Distance */ ++ ++/* Page 193 - Port Control Registers */ ++#define GG82563_PHY_KMRN_MODE_CTRL \ ++ GG82563_REG(193, 16) /* Kumeran Mode Control */ ++#define GG82563_PHY_PORT_RESET \ ++ GG82563_REG(193, 17) /* Port Reset */ ++#define GG82563_PHY_REVISION_ID \ ++ GG82563_REG(193, 18) /* Revision ID */ ++#define GG82563_PHY_DEVICE_ID \ ++ GG82563_REG(193, 19) /* Device ID */ ++#define GG82563_PHY_PWR_MGMT_CTRL \ ++ GG82563_REG(193, 20) /* Power Management Control */ ++#define GG82563_PHY_RATE_ADAPT_CTRL \ ++ GG82563_REG(193, 25) /* Rate Adaptation Control */ ++ ++/* Page 194 - KMRN Registers */ ++#define GG82563_PHY_KMRN_FIFO_CTRL_STAT \ ++ GG82563_REG(194, 16) /* FIFO's Control/Status */ ++#define GG82563_PHY_KMRN_CTRL \ ++ GG82563_REG(194, 17) /* Control */ ++#define GG82563_PHY_INBAND_CTRL \ ++ GG82563_REG(194, 18) /* Inband Control */ ++#define GG82563_PHY_KMRN_DIAGNOSTIC \ ++ GG82563_REG(194, 19) /* Diagnostic */ ++#define GG82563_PHY_ACK_TIMEOUTS \ ++ GG82563_REG(194, 20) /* Acknowledge Timeouts */ ++#define GG82563_PHY_ADV_ABILITY \ ++ GG82563_REG(194, 21) /* Advertised Ability */ ++#define GG82563_PHY_LINK_PARTNER_ADV_ABILITY \ ++ GG82563_REG(194, 23) /* Link Partner Advertised Ability */ ++#define GG82563_PHY_ADV_NEXT_PAGE \ ++ GG82563_REG(194, 24) /* Advertised Next Page */ ++#define GG82563_PHY_LINK_PARTNER_ADV_NEXT_PAGE \ ++ GG82563_REG(194, 25) /* Link Partner Advertised Next page */ ++#define GG82563_PHY_KMRN_MISC \ ++ GG82563_REG(194, 26) /* Misc. */ ++ ++/* MDI Control */ ++#define E1000_MDIC_DATA_MASK 0x0000FFFF ++#define E1000_MDIC_REG_MASK 0x001F0000 ++#define E1000_MDIC_REG_SHIFT 16 ++#define E1000_MDIC_PHY_MASK 0x03E00000 ++#define E1000_MDIC_PHY_SHIFT 21 ++#define E1000_MDIC_OP_WRITE 0x04000000 ++#define E1000_MDIC_OP_READ 0x08000000 ++#define E1000_MDIC_READY 0x10000000 ++#define E1000_MDIC_INT_EN 0x20000000 ++#define E1000_MDIC_ERROR 0x40000000 ++ ++/* SerDes Control */ ++#define E1000_GEN_CTL_READY 0x80000000 ++#define E1000_GEN_CTL_ADDRESS_SHIFT 8 ++#define E1000_GEN_POLL_TIMEOUT 640 ++ ++#endif +--- linux/drivers/xenomai/net/drivers/experimental/e1000/e1000_82542.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/experimental/e1000/e1000_82542.c 2021-04-07 16:01:27.643633572 +0800 +@@ -0,0 +1,543 @@ ++/******************************************************************************* ++ ++ Intel PRO/1000 Linux driver ++ Copyright(c) 1999 - 2008 Intel Corporation. ++ ++ This program is free software; you can redistribute it and/or modify it ++ under the terms and conditions of the GNU General Public License, ++ version 2, as published by the Free Software Foundation. ++ ++ This program is distributed in the hope it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ You should have received a copy of the GNU General Public License along with ++ this program; if not, write to the Free Software Foundation, Inc., ++ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. ++ ++ The full GNU General Public License is included in this distribution in ++ the file called "COPYING". ++ ++ Contact Information: ++ Linux NICS ++ e1000-devel Mailing List ++ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 ++ ++*******************************************************************************/ ++ ++/* e1000_82542 (rev 1 & 2) ++ */ ++ ++#include "e1000_api.h" ++ ++static s32 e1000_init_phy_params_82542(struct e1000_hw *hw); ++static s32 e1000_init_nvm_params_82542(struct e1000_hw *hw); ++static s32 e1000_init_mac_params_82542(struct e1000_hw *hw); ++static s32 e1000_get_bus_info_82542(struct e1000_hw *hw); ++static s32 e1000_reset_hw_82542(struct e1000_hw *hw); ++static s32 e1000_init_hw_82542(struct e1000_hw *hw); ++static s32 e1000_setup_link_82542(struct e1000_hw *hw); ++static s32 e1000_led_on_82542(struct e1000_hw *hw); ++static s32 e1000_led_off_82542(struct e1000_hw *hw); ++static void e1000_clear_hw_cntrs_82542(struct e1000_hw *hw); ++ ++struct e1000_dev_spec_82542 { ++ bool dma_fairness; ++}; ++ ++/** ++ * e1000_init_phy_params_82542 - Init PHY func ptrs. ++ * @hw: pointer to the HW structure ++ * ++ * This is a function pointer entry point called by the api module. ++ **/ ++static s32 e1000_init_phy_params_82542(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val = E1000_SUCCESS; ++ ++ DEBUGFUNC("e1000_init_phy_params_82542"); ++ ++ phy->type = e1000_phy_none; ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_init_nvm_params_82542 - Init NVM func ptrs. ++ * @hw: pointer to the HW structure ++ * ++ * This is a function pointer entry point called by the api module. ++ **/ ++static s32 e1000_init_nvm_params_82542(struct e1000_hw *hw) ++{ ++ struct e1000_nvm_info *nvm = &hw->nvm; ++ struct e1000_functions *func = &hw->func; ++ ++ DEBUGFUNC("e1000_init_nvm_params_82542"); ++ ++ nvm->address_bits = 6; ++ nvm->delay_usec = 50; ++ nvm->opcode_bits = 3; ++ nvm->type = e1000_nvm_eeprom_microwire; ++ nvm->word_size = 64; ++ ++ /* Function Pointers */ ++ func->read_nvm = e1000_read_nvm_microwire; ++ func->release_nvm = e1000_stop_nvm; ++ func->write_nvm = e1000_write_nvm_microwire; ++ func->update_nvm = e1000_update_nvm_checksum_generic; ++ func->validate_nvm = e1000_validate_nvm_checksum_generic; ++ ++ return E1000_SUCCESS; ++} ++ ++/** ++ * e1000_init_mac_params_82542 - Init MAC func ptrs. ++ * @hw: pointer to the HW structure ++ * ++ * This is a function pointer entry point called by the api module. ++ **/ ++static s32 e1000_init_mac_params_82542(struct e1000_hw *hw) ++{ ++ struct e1000_mac_info *mac = &hw->mac; ++ struct e1000_functions *func = &hw->func; ++ s32 ret_val = E1000_SUCCESS; ++ ++ DEBUGFUNC("e1000_init_mac_params_82542"); ++ ++ /* Set media type */ ++ hw->phy.media_type = e1000_media_type_fiber; ++ ++ /* Set mta register count */ ++ mac->mta_reg_count = 128; ++ /* Set rar entry count */ ++ mac->rar_entry_count = E1000_RAR_ENTRIES; ++ ++ /* Function pointers */ ++ ++ /* bus type/speed/width */ ++ func->get_bus_info = e1000_get_bus_info_82542; ++ /* reset */ ++ func->reset_hw = e1000_reset_hw_82542; ++ /* hw initialization */ ++ func->init_hw = e1000_init_hw_82542; ++ /* link setup */ ++ func->setup_link = e1000_setup_link_82542; ++ /* phy/fiber/serdes setup */ ++ func->setup_physical_interface = e1000_setup_fiber_serdes_link_generic; ++ /* check for link */ ++ func->check_for_link = e1000_check_for_fiber_link_generic; ++ /* multicast address update */ ++ func->update_mc_addr_list = e1000_update_mc_addr_list_generic; ++ /* writing VFTA */ ++ func->write_vfta = e1000_write_vfta_generic; ++ /* clearing VFTA */ ++ func->clear_vfta = e1000_clear_vfta_generic; ++ /* setting MTA */ ++ func->mta_set = e1000_mta_set_generic; ++ /* turn on/off LED */ ++ func->led_on = e1000_led_on_82542; ++ func->led_off = e1000_led_off_82542; ++ /* remove device */ ++ func->remove_device = e1000_remove_device_generic; ++ /* clear hardware counters */ ++ func->clear_hw_cntrs = e1000_clear_hw_cntrs_82542; ++ /* link info */ ++ func->get_link_up_info = e1000_get_speed_and_duplex_fiber_serdes_generic; ++ ++ hw->dev_spec_size = sizeof(struct e1000_dev_spec_82542); ++ ++ /* Device-specific structure allocation */ ++ ret_val = e1000_alloc_zeroed_dev_spec_struct(hw, hw->dev_spec_size); ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_init_function_pointers_82542 - Init func ptrs. ++ * @hw: pointer to the HW structure ++ * ++ * The only function explicitly called by the api module to initialize ++ * all function pointers and parameters. ++ **/ ++void e1000_init_function_pointers_82542(struct e1000_hw *hw) ++{ ++ DEBUGFUNC("e1000_init_function_pointers_82542"); ++ ++ hw->func.init_mac_params = e1000_init_mac_params_82542; ++ hw->func.init_nvm_params = e1000_init_nvm_params_82542; ++ hw->func.init_phy_params = e1000_init_phy_params_82542; ++} ++ ++/** ++ * e1000_get_bus_info_82542 - Obtain bus information for adapter ++ * @hw: pointer to the HW structure ++ * ++ * This will obtain information about the HW bus for which the ++ * adaper is attached and stores it in the hw structure. This is a function ++ * pointer entry point called by the api module. ++ **/ ++static s32 e1000_get_bus_info_82542(struct e1000_hw *hw) ++{ ++ DEBUGFUNC("e1000_get_bus_info_82542"); ++ ++ hw->bus.type = e1000_bus_type_pci; ++ hw->bus.speed = e1000_bus_speed_unknown; ++ hw->bus.width = e1000_bus_width_unknown; ++ ++ return E1000_SUCCESS; ++} ++ ++/** ++ * e1000_reset_hw_82542 - Reset hardware ++ * @hw: pointer to the HW structure ++ * ++ * This resets the hardware into a known state. This is a ++ * function pointer entry point called by the api module. ++ **/ ++static s32 e1000_reset_hw_82542(struct e1000_hw *hw) ++{ ++ struct e1000_bus_info *bus = &hw->bus; ++ s32 ret_val = E1000_SUCCESS; ++ u32 ctrl, icr; ++ ++ DEBUGFUNC("e1000_reset_hw_82542"); ++ ++ if (hw->revision_id == E1000_REVISION_2) { ++ DEBUGOUT("Disabling MWI on 82542 rev 2\n"); ++ e1000_pci_clear_mwi(hw); ++ } ++ ++ DEBUGOUT("Masking off all interrupts\n"); ++ E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff); ++ ++ E1000_WRITE_REG(hw, E1000_RCTL, 0); ++ E1000_WRITE_REG(hw, E1000_TCTL, E1000_TCTL_PSP); ++ E1000_WRITE_FLUSH(hw); ++ ++ /* ++ * Delay to allow any outstanding PCI transactions to complete before ++ * resetting the device ++ */ ++ msec_delay(10); ++ ++ ctrl = E1000_READ_REG(hw, E1000_CTRL); ++ ++ DEBUGOUT("Issuing a global reset to 82542/82543 MAC\n"); ++ E1000_WRITE_REG(hw, E1000_CTRL, ctrl | E1000_CTRL_RST); ++ ++ e1000_reload_nvm(hw); ++ msec_delay(2); ++ ++ E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff); ++ icr = E1000_READ_REG(hw, E1000_ICR); ++ ++ if (hw->revision_id == E1000_REVISION_2) { ++ if (bus->pci_cmd_word & CMD_MEM_WRT_INVALIDATE) ++ e1000_pci_set_mwi(hw); ++ } ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_init_hw_82542 - Initialize hardware ++ * @hw: pointer to the HW structure ++ * ++ * This inits the hardware readying it for operation. This is a ++ * function pointer entry point called by the api module. ++ **/ ++static s32 e1000_init_hw_82542(struct e1000_hw *hw) ++{ ++ struct e1000_mac_info *mac = &hw->mac; ++ struct e1000_dev_spec_82542 *dev_spec; ++ s32 ret_val = E1000_SUCCESS; ++ u32 ctrl; ++ u16 i; ++ ++ DEBUGFUNC("e1000_init_hw_82542"); ++ ++ dev_spec = (struct e1000_dev_spec_82542 *)hw->dev_spec; ++ ++ /* Disabling VLAN filtering */ ++ E1000_WRITE_REG(hw, E1000_VET, 0); ++ e1000_clear_vfta(hw); ++ ++ /* For 82542 (rev 2.0), disable MWI and put the receiver into reset */ ++ if (hw->revision_id == E1000_REVISION_2) { ++ DEBUGOUT("Disabling MWI on 82542 rev 2.0\n"); ++ e1000_pci_clear_mwi(hw); ++ E1000_WRITE_REG(hw, E1000_RCTL, E1000_RCTL_RST); ++ E1000_WRITE_FLUSH(hw); ++ msec_delay(5); ++ } ++ ++ /* Setup the receive address. */ ++ e1000_init_rx_addrs_generic(hw, mac->rar_entry_count); ++ ++ /* For 82542 (rev 2.0), take the receiver out of reset and enable MWI */ ++ if (hw->revision_id == E1000_REVISION_2) { ++ E1000_WRITE_REG(hw, E1000_RCTL, 0); ++ E1000_WRITE_FLUSH(hw); ++ msec_delay(1); ++ if (hw->bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE) ++ e1000_pci_set_mwi(hw); ++ } ++ ++ /* Zero out the Multicast HASH table */ ++ DEBUGOUT("Zeroing the MTA\n"); ++ for (i = 0; i < mac->mta_reg_count; i++) ++ E1000_WRITE_REG_ARRAY(hw, E1000_MTA, i, 0); ++ ++ /* ++ * Set the PCI priority bit correctly in the CTRL register. This ++ * determines if the adapter gives priority to receives, or if it ++ * gives equal priority to transmits and receives. ++ */ ++ if (dev_spec->dma_fairness) { ++ ctrl = E1000_READ_REG(hw, E1000_CTRL); ++ E1000_WRITE_REG(hw, E1000_CTRL, ctrl | E1000_CTRL_PRIOR); ++ } ++ ++ /* Setup link and flow control */ ++ ret_val = e1000_setup_link_82542(hw); ++ ++ /* ++ * Clear all of the statistics registers (clear on read). It is ++ * important that we do this after we have tried to establish link ++ * because the symbol error count will increment wildly if there ++ * is no link. ++ */ ++ e1000_clear_hw_cntrs_82542(hw); ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_setup_link_82542 - Setup flow control and link settings ++ * @hw: pointer to the HW structure ++ * ++ * Determines which flow control settings to use, then configures flow ++ * control. Calls the appropriate media-specific link configuration ++ * function. Assuming the adapter has a valid link partner, a valid link ++ * should be established. Assumes the hardware has previously been reset ++ * and the transmitter and receiver are not enabled. This is a function ++ * pointer entry point called by the api module. ++ **/ ++static s32 e1000_setup_link_82542(struct e1000_hw *hw) ++{ ++ struct e1000_mac_info *mac = &hw->mac; ++ struct e1000_functions *func = &hw->func; ++ s32 ret_val = E1000_SUCCESS; ++ ++ DEBUGFUNC("e1000_setup_link_82542"); ++ ++ ret_val = e1000_set_default_fc_generic(hw); ++ if (ret_val) ++ goto out; ++ ++ hw->fc.type &= ~e1000_fc_tx_pause; ++ ++ if (mac->report_tx_early == 1) ++ hw->fc.type &= ~e1000_fc_rx_pause; ++ ++ /* ++ * We want to save off the original Flow Control configuration just in ++ * case we get disconnected and then reconnected into a different hub ++ * or switch with different Flow Control capabilities. ++ */ ++ hw->fc.original_type = hw->fc.type; ++ ++ DEBUGOUT1("After fix-ups FlowControl is now = %x\n", hw->fc.type); ++ ++ /* Call the necessary subroutine to configure the link. */ ++ ret_val = func->setup_physical_interface(hw); ++ if (ret_val) ++ goto out; ++ ++ /* ++ * Initialize the flow control address, type, and PAUSE timer ++ * registers to their default values. This is done even if flow ++ * control is disabled, because it does not hurt anything to ++ * initialize these registers. ++ */ ++ DEBUGOUT("Initializing Flow Control address, type and timer regs\n"); ++ ++ E1000_WRITE_REG(hw, E1000_FCAL, FLOW_CONTROL_ADDRESS_LOW); ++ E1000_WRITE_REG(hw, E1000_FCAH, FLOW_CONTROL_ADDRESS_HIGH); ++ E1000_WRITE_REG(hw, E1000_FCT, FLOW_CONTROL_TYPE); ++ ++ E1000_WRITE_REG(hw, E1000_FCTTV, hw->fc.pause_time); ++ ++ ret_val = e1000_set_fc_watermarks_generic(hw); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_led_on_82542 - Turn on SW controllable LED ++ * @hw: pointer to the HW structure ++ * ++ * Turns the SW defined LED on. This is a function pointer entry point ++ * called by the api module. ++ **/ ++static s32 e1000_led_on_82542(struct e1000_hw *hw) ++{ ++ u32 ctrl = E1000_READ_REG(hw, E1000_CTRL); ++ ++ DEBUGFUNC("e1000_led_on_82542"); ++ ++ ctrl |= E1000_CTRL_SWDPIN0; ++ ctrl |= E1000_CTRL_SWDPIO0; ++ E1000_WRITE_REG(hw, E1000_CTRL, ctrl); ++ ++ return E1000_SUCCESS; ++} ++ ++/** ++ * e1000_led_off_82542 - Turn off SW controllable LED ++ * @hw: pointer to the HW structure ++ * ++ * Turns the SW defined LED off. This is a function pointer entry point ++ * called by the api module. ++ **/ ++static s32 e1000_led_off_82542(struct e1000_hw *hw) ++{ ++ u32 ctrl = E1000_READ_REG(hw, E1000_CTRL); ++ ++ DEBUGFUNC("e1000_led_off_82542"); ++ ++ ctrl &= ~E1000_CTRL_SWDPIN0; ++ ctrl |= E1000_CTRL_SWDPIO0; ++ E1000_WRITE_REG(hw, E1000_CTRL, ctrl); ++ ++ return E1000_SUCCESS; ++} ++ ++/** ++ * e1000_translate_register_82542 - Translate the proper regiser offset ++ * @reg: e1000 register to be read ++ * ++ * Registers in 82542 are located in different offsets than other adapters ++ * even though they function in the same manner. This function takes in ++ * the name of the register to read and returns the correct offset for ++ * 82542 silicon. ++ **/ ++u32 e1000_translate_register_82542(u32 reg) ++{ ++ /* ++ * Some of the 82542 registers are located at different ++ * offsets than they are in newer adapters. ++ * Despite the difference in location, the registers ++ * function in the same manner. ++ */ ++ switch (reg) { ++ case E1000_RA: ++ reg = 0x00040; ++ break; ++ case E1000_RDTR: ++ reg = 0x00108; ++ break; ++ case E1000_RDBAL(0): ++ reg = 0x00110; ++ break; ++ case E1000_RDBAH(0): ++ reg = 0x00114; ++ break; ++ case E1000_RDLEN(0): ++ reg = 0x00118; ++ break; ++ case E1000_RDH(0): ++ reg = 0x00120; ++ break; ++ case E1000_RDT(0): ++ reg = 0x00128; ++ break; ++ case E1000_RDBAL(1): ++ reg = 0x00138; ++ break; ++ case E1000_RDBAH(1): ++ reg = 0x0013C; ++ break; ++ case E1000_RDLEN(1): ++ reg = 0x00140; ++ break; ++ case E1000_RDH(1): ++ reg = 0x00148; ++ break; ++ case E1000_RDT(1): ++ reg = 0x00150; ++ break; ++ case E1000_FCRTH: ++ reg = 0x00160; ++ break; ++ case E1000_FCRTL: ++ reg = 0x00168; ++ break; ++ case E1000_MTA: ++ reg = 0x00200; ++ break; ++ case E1000_TDBAL(0): ++ reg = 0x00420; ++ break; ++ case E1000_TDBAH(0): ++ reg = 0x00424; ++ break; ++ case E1000_TDLEN(0): ++ reg = 0x00428; ++ break; ++ case E1000_TDH(0): ++ reg = 0x00430; ++ break; ++ case E1000_TDT(0): ++ reg = 0x00438; ++ break; ++ case E1000_TIDV: ++ reg = 0x00440; ++ break; ++ case E1000_VFTA: ++ reg = 0x00600; ++ break; ++ case E1000_TDFH: ++ reg = 0x08010; ++ break; ++ case E1000_TDFT: ++ reg = 0x08018; ++ break; ++ default: ++ break; ++ } ++ ++ return reg; ++} ++ ++/** ++ * e1000_clear_hw_cntrs_82542 - Clear device specific hardware counters ++ * @hw: pointer to the HW structure ++ * ++ * Clears the hardware counters by reading the counter registers. ++ **/ ++static void e1000_clear_hw_cntrs_82542(struct e1000_hw *hw) ++{ ++ volatile u32 temp; ++ ++ DEBUGFUNC("e1000_clear_hw_cntrs_82542"); ++ ++ e1000_clear_hw_cntrs_base_generic(hw); ++ ++ temp = E1000_READ_REG(hw, E1000_PRC64); ++ temp = E1000_READ_REG(hw, E1000_PRC127); ++ temp = E1000_READ_REG(hw, E1000_PRC255); ++ temp = E1000_READ_REG(hw, E1000_PRC511); ++ temp = E1000_READ_REG(hw, E1000_PRC1023); ++ temp = E1000_READ_REG(hw, E1000_PRC1522); ++ temp = E1000_READ_REG(hw, E1000_PTC64); ++ temp = E1000_READ_REG(hw, E1000_PTC127); ++ temp = E1000_READ_REG(hw, E1000_PTC255); ++ temp = E1000_READ_REG(hw, E1000_PTC511); ++ temp = E1000_READ_REG(hw, E1000_PTC1023); ++ temp = E1000_READ_REG(hw, E1000_PTC1522); ++} +--- linux/drivers/xenomai/net/drivers/experimental/e1000/e1000_82543.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/experimental/e1000/e1000_82543.c 2021-04-07 16:01:27.638633579 +0800 +@@ -0,0 +1,1654 @@ ++/******************************************************************************* ++ ++ Intel PRO/1000 Linux driver ++ Copyright(c) 1999 - 2008 Intel Corporation. ++ ++ This program is free software; you can redistribute it and/or modify it ++ under the terms and conditions of the GNU General Public License, ++ version 2, as published by the Free Software Foundation. ++ ++ This program is distributed in the hope it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ You should have received a copy of the GNU General Public License along with ++ this program; if not, write to the Free Software Foundation, Inc., ++ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. ++ ++ The full GNU General Public License is included in this distribution in ++ the file called "COPYING". ++ ++ Contact Information: ++ Linux NICS ++ e1000-devel Mailing List ++ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 ++ ++*******************************************************************************/ ++ ++/* e1000_82543 ++ * e1000_82544 ++ */ ++ ++#include "e1000_api.h" ++#include "e1000_82543.h" ++ ++static s32 e1000_init_phy_params_82543(struct e1000_hw *hw); ++static s32 e1000_init_nvm_params_82543(struct e1000_hw *hw); ++static s32 e1000_init_mac_params_82543(struct e1000_hw *hw); ++static s32 e1000_read_phy_reg_82543(struct e1000_hw *hw, u32 offset, ++ u16 *data); ++static s32 e1000_write_phy_reg_82543(struct e1000_hw *hw, u32 offset, ++ u16 data); ++static s32 e1000_phy_force_speed_duplex_82543(struct e1000_hw *hw); ++static s32 e1000_phy_hw_reset_82543(struct e1000_hw *hw); ++static s32 e1000_reset_hw_82543(struct e1000_hw *hw); ++static s32 e1000_init_hw_82543(struct e1000_hw *hw); ++static s32 e1000_setup_link_82543(struct e1000_hw *hw); ++static s32 e1000_setup_copper_link_82543(struct e1000_hw *hw); ++static s32 e1000_setup_fiber_link_82543(struct e1000_hw *hw); ++static s32 e1000_check_for_copper_link_82543(struct e1000_hw *hw); ++static s32 e1000_check_for_fiber_link_82543(struct e1000_hw *hw); ++static s32 e1000_led_on_82543(struct e1000_hw *hw); ++static s32 e1000_led_off_82543(struct e1000_hw *hw); ++static void e1000_write_vfta_82543(struct e1000_hw *hw, u32 offset, ++ u32 value); ++static void e1000_mta_set_82543(struct e1000_hw *hw, u32 hash_value); ++static void e1000_clear_hw_cntrs_82543(struct e1000_hw *hw); ++static s32 e1000_config_mac_to_phy_82543(struct e1000_hw *hw); ++static bool e1000_init_phy_disabled_82543(struct e1000_hw *hw); ++static void e1000_lower_mdi_clk_82543(struct e1000_hw *hw, u32 *ctrl); ++static s32 e1000_polarity_reversal_workaround_82543(struct e1000_hw *hw); ++static void e1000_raise_mdi_clk_82543(struct e1000_hw *hw, u32 *ctrl); ++static u16 e1000_shift_in_mdi_bits_82543(struct e1000_hw *hw); ++static void e1000_shift_out_mdi_bits_82543(struct e1000_hw *hw, u32 data, ++ u16 count); ++static bool e1000_tbi_compatibility_enabled_82543(struct e1000_hw *hw); ++static void e1000_set_tbi_sbp_82543(struct e1000_hw *hw, bool state); ++ ++struct e1000_dev_spec_82543 { ++ u32 tbi_compatibility; ++ bool dma_fairness; ++ bool init_phy_disabled; ++}; ++ ++/** ++ * e1000_init_phy_params_82543 - Init PHY func ptrs. ++ * @hw: pointer to the HW structure ++ * ++ * This is a function pointer entry point called by the api module. ++ **/ ++static s32 e1000_init_phy_params_82543(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ struct e1000_functions *func = &hw->func; ++ s32 ret_val = E1000_SUCCESS; ++ ++ DEBUGFUNC("e1000_init_phy_params_82543"); ++ ++ if (hw->phy.media_type != e1000_media_type_copper) { ++ phy->type = e1000_phy_none; ++ goto out; ++ } else { ++ func->power_up_phy = e1000_power_up_phy_copper; ++ func->power_down_phy = e1000_power_down_phy_copper; ++ } ++ ++ phy->addr = 1; ++ phy->autoneg_mask = AUTONEG_ADVERTISE_SPEED_DEFAULT; ++ phy->reset_delay_us = 10000; ++ phy->type = e1000_phy_m88; ++ ++ /* Function Pointers */ ++ func->check_polarity = e1000_check_polarity_m88; ++ func->commit_phy = e1000_phy_sw_reset_generic; ++ func->force_speed_duplex = e1000_phy_force_speed_duplex_82543; ++ func->get_cable_length = e1000_get_cable_length_m88; ++ func->get_cfg_done = e1000_get_cfg_done_generic; ++ func->read_phy_reg = (hw->mac.type == e1000_82543) ++ ? e1000_read_phy_reg_82543 ++ : e1000_read_phy_reg_m88; ++ func->reset_phy = (hw->mac.type == e1000_82543) ++ ? e1000_phy_hw_reset_82543 ++ : e1000_phy_hw_reset_generic; ++ func->write_phy_reg = (hw->mac.type == e1000_82543) ++ ? e1000_write_phy_reg_82543 ++ : e1000_write_phy_reg_m88; ++ func->get_phy_info = e1000_get_phy_info_m88; ++ ++ /* ++ * The external PHY of the 82543 can be in a funky state. ++ * Resetting helps us read the PHY registers for acquiring ++ * the PHY ID. ++ */ ++ if (!e1000_init_phy_disabled_82543(hw)) { ++ ret_val = e1000_phy_hw_reset(hw); ++ if (ret_val) { ++ DEBUGOUT("Resetting PHY during init failed.\n"); ++ goto out; ++ } ++ msec_delay(20); ++ } ++ ++ ret_val = e1000_get_phy_id(hw); ++ if (ret_val) ++ goto out; ++ ++ /* Verify phy id */ ++ switch (hw->mac.type) { ++ case e1000_82543: ++ if (phy->id != M88E1000_E_PHY_ID) { ++ ret_val = -E1000_ERR_PHY; ++ goto out; ++ } ++ break; ++ case e1000_82544: ++ if (phy->id != M88E1000_I_PHY_ID) { ++ ret_val = -E1000_ERR_PHY; ++ goto out; ++ } ++ break; ++ default: ++ ret_val = -E1000_ERR_PHY; ++ goto out; ++ break; ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_init_nvm_params_82543 - Init NVM func ptrs. ++ * @hw: pointer to the HW structure ++ * ++ * This is a function pointer entry point called by the api module. ++ **/ ++static s32 e1000_init_nvm_params_82543(struct e1000_hw *hw) ++{ ++ struct e1000_nvm_info *nvm = &hw->nvm; ++ struct e1000_functions *func = &hw->func; ++ ++ DEBUGFUNC("e1000_init_nvm_params_82543"); ++ ++ nvm->type = e1000_nvm_eeprom_microwire; ++ nvm->word_size = 64; ++ nvm->delay_usec = 50; ++ nvm->address_bits = 6; ++ nvm->opcode_bits = 3; ++ ++ /* Function Pointers */ ++ func->read_nvm = e1000_read_nvm_microwire; ++ func->update_nvm = e1000_update_nvm_checksum_generic; ++ func->valid_led_default = e1000_valid_led_default_generic; ++ func->validate_nvm = e1000_validate_nvm_checksum_generic; ++ func->write_nvm = e1000_write_nvm_microwire; ++ ++ return E1000_SUCCESS; ++} ++ ++/** ++ * e1000_init_mac_params_82543 - Init MAC func ptrs. ++ * @hw: pointer to the HW structure ++ * ++ * This is a function pointer entry point called by the api module. ++ **/ ++static s32 e1000_init_mac_params_82543(struct e1000_hw *hw) ++{ ++ struct e1000_mac_info *mac = &hw->mac; ++ struct e1000_functions *func = &hw->func; ++ s32 ret_val; ++ ++ DEBUGFUNC("e1000_init_mac_params_82543"); ++ ++ /* Set media type */ ++ switch (hw->device_id) { ++ case E1000_DEV_ID_82543GC_FIBER: ++ case E1000_DEV_ID_82544EI_FIBER: ++ hw->phy.media_type = e1000_media_type_fiber; ++ break; ++ default: ++ hw->phy.media_type = e1000_media_type_copper; ++ break; ++ } ++ ++ /* Set mta register count */ ++ mac->mta_reg_count = 128; ++ /* Set rar entry count */ ++ mac->rar_entry_count = E1000_RAR_ENTRIES; ++ ++ /* Function pointers */ ++ ++ /* bus type/speed/width */ ++ func->get_bus_info = e1000_get_bus_info_pci_generic; ++ /* reset */ ++ func->reset_hw = e1000_reset_hw_82543; ++ /* hw initialization */ ++ func->init_hw = e1000_init_hw_82543; ++ /* link setup */ ++ func->setup_link = e1000_setup_link_82543; ++ /* physical interface setup */ ++ func->setup_physical_interface = ++ (hw->phy.media_type == e1000_media_type_copper) ++ ? e1000_setup_copper_link_82543 ++ : e1000_setup_fiber_link_82543; ++ /* check for link */ ++ func->check_for_link = ++ (hw->phy.media_type == e1000_media_type_copper) ++ ? e1000_check_for_copper_link_82543 ++ : e1000_check_for_fiber_link_82543; ++ /* link info */ ++ func->get_link_up_info = ++ (hw->phy.media_type == e1000_media_type_copper) ++ ? e1000_get_speed_and_duplex_copper_generic ++ : e1000_get_speed_and_duplex_fiber_serdes_generic; ++ /* multicast address update */ ++ func->update_mc_addr_list = e1000_update_mc_addr_list_generic; ++ /* writing VFTA */ ++ func->write_vfta = e1000_write_vfta_82543; ++ /* clearing VFTA */ ++ func->clear_vfta = e1000_clear_vfta_generic; ++ /* setting MTA */ ++ func->mta_set = e1000_mta_set_82543; ++ /* turn on/off LED */ ++ func->led_on = e1000_led_on_82543; ++ func->led_off = e1000_led_off_82543; ++ /* remove device */ ++ func->remove_device = e1000_remove_device_generic; ++ /* clear hardware counters */ ++ func->clear_hw_cntrs = e1000_clear_hw_cntrs_82543; ++ ++ hw->dev_spec_size = sizeof(struct e1000_dev_spec_82543); ++ ++ /* Device-specific structure allocation */ ++ ret_val = e1000_alloc_zeroed_dev_spec_struct(hw, hw->dev_spec_size); ++ if (ret_val) ++ goto out; ++ ++ /* Set tbi compatibility */ ++ if ((hw->mac.type != e1000_82543) || ++ (hw->phy.media_type == e1000_media_type_fiber)) ++ e1000_set_tbi_compatibility_82543(hw, FALSE); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_init_function_pointers_82543 - Init func ptrs. ++ * @hw: pointer to the HW structure ++ * ++ * The only function explicitly called by the api module to initialize ++ * all function pointers and parameters. ++ **/ ++void e1000_init_function_pointers_82543(struct e1000_hw *hw) ++{ ++ DEBUGFUNC("e1000_init_function_pointers_82543"); ++ ++ hw->func.init_mac_params = e1000_init_mac_params_82543; ++ hw->func.init_nvm_params = e1000_init_nvm_params_82543; ++ hw->func.init_phy_params = e1000_init_phy_params_82543; ++} ++ ++/** ++ * e1000_tbi_compatibility_enabled_82543 - Returns TBI compat status ++ * @hw: pointer to the HW structure ++ * ++ * Returns the curent status of 10-bit Interface (TBI) compatibility ++ * (enabled/disabled). ++ **/ ++static bool e1000_tbi_compatibility_enabled_82543(struct e1000_hw *hw) ++{ ++ struct e1000_dev_spec_82543 *dev_spec; ++ bool state = FALSE; ++ ++ DEBUGFUNC("e1000_tbi_compatibility_enabled_82543"); ++ ++ if (hw->mac.type != e1000_82543) { ++ DEBUGOUT("TBI compatibility workaround for 82543 only.\n"); ++ goto out; ++ } ++ ++ dev_spec = (struct e1000_dev_spec_82543 *)hw->dev_spec; ++ ++ if (!dev_spec) { ++ DEBUGOUT("dev_spec pointer is set to NULL.\n"); ++ goto out; ++ } ++ ++ state = (dev_spec->tbi_compatibility & TBI_COMPAT_ENABLED) ++ ? TRUE : FALSE; ++ ++out: ++ return state; ++} ++ ++/** ++ * e1000_set_tbi_compatibility_82543 - Set TBI compatibility ++ * @hw: pointer to the HW structure ++ * @state: enable/disable TBI compatibility ++ * ++ * Enables or disabled 10-bit Interface (TBI) compatibility. ++ **/ ++void e1000_set_tbi_compatibility_82543(struct e1000_hw *hw, bool state) ++{ ++ struct e1000_dev_spec_82543 *dev_spec; ++ ++ DEBUGFUNC("e1000_set_tbi_compatibility_82543"); ++ ++ if (hw->mac.type != e1000_82543) { ++ DEBUGOUT("TBI compatibility workaround for 82543 only.\n"); ++ goto out; ++ } ++ ++ dev_spec = (struct e1000_dev_spec_82543 *)hw->dev_spec; ++ ++ if (!dev_spec) { ++ DEBUGOUT("dev_spec pointer is set to NULL.\n"); ++ goto out; ++ } ++ ++ if (state) ++ dev_spec->tbi_compatibility |= TBI_COMPAT_ENABLED; ++ else ++ dev_spec->tbi_compatibility &= ~TBI_COMPAT_ENABLED; ++ ++out: ++ return; ++} ++ ++/** ++ * e1000_tbi_sbp_enabled_82543 - Returns TBI SBP status ++ * @hw: pointer to the HW structure ++ * ++ * Returns the curent status of 10-bit Interface (TBI) store bad packet (SBP) ++ * (enabled/disabled). ++ **/ ++bool e1000_tbi_sbp_enabled_82543(struct e1000_hw *hw) ++{ ++ struct e1000_dev_spec_82543 *dev_spec; ++ bool state = FALSE; ++ ++ DEBUGFUNC("e1000_tbi_sbp_enabled_82543"); ++ ++ if (hw->mac.type != e1000_82543) { ++ DEBUGOUT("TBI compatibility workaround for 82543 only.\n"); ++ goto out; ++ } ++ ++ dev_spec = (struct e1000_dev_spec_82543 *)hw->dev_spec; ++ ++ if (!dev_spec) { ++ DEBUGOUT("dev_spec pointer is set to NULL.\n"); ++ goto out; ++ } ++ ++ state = (dev_spec->tbi_compatibility & TBI_SBP_ENABLED) ++ ? TRUE : FALSE; ++ ++out: ++ return state; ++} ++ ++/** ++ * e1000_set_tbi_sbp_82543 - Set TBI SBP ++ * @hw: pointer to the HW structure ++ * @state: enable/disable TBI store bad packet ++ * ++ * Enables or disabled 10-bit Interface (TBI) store bad packet (SBP). ++ **/ ++static void e1000_set_tbi_sbp_82543(struct e1000_hw *hw, bool state) ++{ ++ struct e1000_dev_spec_82543 *dev_spec; ++ ++ DEBUGFUNC("e1000_set_tbi_sbp_82543"); ++ ++ dev_spec = (struct e1000_dev_spec_82543 *)hw->dev_spec; ++ ++ if (state && e1000_tbi_compatibility_enabled_82543(hw)) ++ dev_spec->tbi_compatibility |= TBI_SBP_ENABLED; ++ else ++ dev_spec->tbi_compatibility &= ~TBI_SBP_ENABLED; ++ ++ return; ++} ++ ++/** ++ * e1000_init_phy_disabled_82543 - Returns init PHY status ++ * @hw: pointer to the HW structure ++ * ++ * Returns the current status of whether PHY initialization is disabled. ++ * True if PHY initialization is disabled else false. ++ **/ ++static bool e1000_init_phy_disabled_82543(struct e1000_hw *hw) ++{ ++ struct e1000_dev_spec_82543 *dev_spec; ++ bool ret_val; ++ ++ DEBUGFUNC("e1000_init_phy_disabled_82543"); ++ ++ if (hw->mac.type != e1000_82543) { ++ ret_val = FALSE; ++ goto out; ++ } ++ ++ dev_spec = (struct e1000_dev_spec_82543 *)hw->dev_spec; ++ ++ if (!dev_spec) { ++ DEBUGOUT("dev_spec pointer is set to NULL.\n"); ++ ret_val = FALSE; ++ goto out; ++ } ++ ++ ret_val = dev_spec->init_phy_disabled; ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_tbi_adjust_stats_82543 - Adjust stats when TBI enabled ++ * @hw: pointer to the HW structure ++ * @stats: Struct containing statistic register values ++ * @frame_len: The length of the frame in question ++ * @mac_addr: The Ethernet destination address of the frame in question ++ * @max_frame_size: The maximum frame size ++ * ++ * Adjusts the statistic counters when a frame is accepted by TBI_ACCEPT ++ **/ ++void e1000_tbi_adjust_stats_82543(struct e1000_hw *hw, ++ struct e1000_hw_stats *stats, u32 frame_len, ++ u8 *mac_addr, u32 max_frame_size) ++{ ++ if (!(e1000_tbi_sbp_enabled_82543(hw))) ++ goto out; ++ ++ /* First adjust the frame length. */ ++ frame_len--; ++ /* ++ * We need to adjust the statistics counters, since the hardware ++ * counters overcount this packet as a CRC error and undercount ++ * the packet as a good packet ++ */ ++ /* This packet should not be counted as a CRC error. */ ++ stats->crcerrs--; ++ /* This packet does count as a Good Packet Received. */ ++ stats->gprc++; ++ ++ /* Adjust the Good Octets received counters */ ++ stats->gorc += frame_len; ++ ++ /* ++ * Is this a broadcast or multicast? Check broadcast first, ++ * since the test for a multicast frame will test positive on ++ * a broadcast frame. ++ */ ++ if ((mac_addr[0] == 0xff) && (mac_addr[1] == 0xff)) ++ /* Broadcast packet */ ++ stats->bprc++; ++ else if (*mac_addr & 0x01) ++ /* Multicast packet */ ++ stats->mprc++; ++ ++ /* ++ * In this case, the hardware has overcounted the number of ++ * oversize frames. ++ */ ++ if ((frame_len == max_frame_size) && (stats->roc > 0)) ++ stats->roc--; ++ ++ /* ++ * Adjust the bin counters when the extra byte put the frame in the ++ * wrong bin. Remember that the frame_len was adjusted above. ++ */ ++ if (frame_len == 64) { ++ stats->prc64++; ++ stats->prc127--; ++ } else if (frame_len == 127) { ++ stats->prc127++; ++ stats->prc255--; ++ } else if (frame_len == 255) { ++ stats->prc255++; ++ stats->prc511--; ++ } else if (frame_len == 511) { ++ stats->prc511++; ++ stats->prc1023--; ++ } else if (frame_len == 1023) { ++ stats->prc1023++; ++ stats->prc1522--; ++ } else if (frame_len == 1522) { ++ stats->prc1522++; ++ } ++ ++out: ++ return; ++} ++ ++/** ++ * e1000_read_phy_reg_82543 - Read PHY register ++ * @hw: pointer to the HW structure ++ * @offset: register offset to be read ++ * @data: pointer to the read data ++ * ++ * Reads the PHY at offset and stores the information read to data. ++ **/ ++static s32 e1000_read_phy_reg_82543(struct e1000_hw *hw, u32 offset, u16 *data) ++{ ++ u32 mdic; ++ s32 ret_val = E1000_SUCCESS; ++ ++ DEBUGFUNC("e1000_read_phy_reg_82543"); ++ ++ if (offset > MAX_PHY_REG_ADDRESS) { ++ DEBUGOUT1("PHY Address %d is out of range\n", offset); ++ ret_val = -E1000_ERR_PARAM; ++ goto out; ++ } ++ ++ /* ++ * We must first send a preamble through the MDIO pin to signal the ++ * beginning of an MII instruction. This is done by sending 32 ++ * consecutive "1" bits. ++ */ ++ e1000_shift_out_mdi_bits_82543(hw, PHY_PREAMBLE, PHY_PREAMBLE_SIZE); ++ ++ /* ++ * Now combine the next few fields that are required for a read ++ * operation. We use this method instead of calling the ++ * e1000_shift_out_mdi_bits routine five different times. The format ++ * of an MII read instruction consists of a shift out of 14 bits and ++ * is defined as follows: ++ * ++ * followed by a shift in of 18 bits. This first two bits shifted in ++ * are TurnAround bits used to avoid contention on the MDIO pin when a ++ * READ operation is performed. These two bits are thrown away ++ * followed by a shift in of 16 bits which contains the desired data. ++ */ ++ mdic = (offset | (hw->phy.addr << 5) | ++ (PHY_OP_READ << 10) | (PHY_SOF << 12)); ++ ++ e1000_shift_out_mdi_bits_82543(hw, mdic, 14); ++ ++ /* ++ * Now that we've shifted out the read command to the MII, we need to ++ * "shift in" the 16-bit value (18 total bits) of the requested PHY ++ * register address. ++ */ ++ *data = e1000_shift_in_mdi_bits_82543(hw); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_write_phy_reg_82543 - Write PHY register ++ * @hw: pointer to the HW structure ++ * @offset: register offset to be written ++ * @data: pointer to the data to be written at offset ++ * ++ * Writes data to the PHY at offset. ++ **/ ++static s32 e1000_write_phy_reg_82543(struct e1000_hw *hw, u32 offset, u16 data) ++{ ++ u32 mdic; ++ s32 ret_val = E1000_SUCCESS; ++ ++ DEBUGFUNC("e1000_write_phy_reg_82543"); ++ ++ if (offset > MAX_PHY_REG_ADDRESS) { ++ DEBUGOUT1("PHY Address %d is out of range\n", offset); ++ ret_val = -E1000_ERR_PARAM; ++ goto out; ++ } ++ ++ /* ++ * We'll need to use the SW defined pins to shift the write command ++ * out to the PHY. We first send a preamble to the PHY to signal the ++ * beginning of the MII instruction. This is done by sending 32 ++ * consecutive "1" bits. ++ */ ++ e1000_shift_out_mdi_bits_82543(hw, PHY_PREAMBLE, PHY_PREAMBLE_SIZE); ++ ++ /* ++ * Now combine the remaining required fields that will indicate a ++ * write operation. We use this method instead of calling the ++ * e1000_shift_out_mdi_bits routine for each field in the command. The ++ * format of a MII write instruction is as follows: ++ * . ++ */ ++ mdic = ((PHY_TURNAROUND) | (offset << 2) | (hw->phy.addr << 7) | ++ (PHY_OP_WRITE << 12) | (PHY_SOF << 14)); ++ mdic <<= 16; ++ mdic |= (u32) data; ++ ++ e1000_shift_out_mdi_bits_82543(hw, mdic, 32); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_raise_mdi_clk_82543 - Raise Management Data Input clock ++ * @hw: pointer to the HW structure ++ * @ctrl: pointer to the control register ++ * ++ * Raise the management data input clock by setting the MDC bit in the control ++ * register. ++ **/ ++static void e1000_raise_mdi_clk_82543(struct e1000_hw *hw, u32 *ctrl) ++{ ++ /* ++ * Raise the clock input to the Management Data Clock (by setting the ++ * MDC bit), and then delay a sufficient amount of time. ++ */ ++ E1000_WRITE_REG(hw, E1000_CTRL, (*ctrl | E1000_CTRL_MDC)); ++ E1000_WRITE_FLUSH(hw); ++ usec_delay(10); ++} ++ ++/** ++ * e1000_lower_mdi_clk_82543 - Lower Management Data Input clock ++ * @hw: pointer to the HW structure ++ * @ctrl: pointer to the control register ++ * ++ * Lower the management data input clock by clearing the MDC bit in the ++ * control register. ++ **/ ++static void e1000_lower_mdi_clk_82543(struct e1000_hw *hw, u32 *ctrl) ++{ ++ /* ++ * Lower the clock input to the Management Data Clock (by clearing the ++ * MDC bit), and then delay a sufficient amount of time. ++ */ ++ E1000_WRITE_REG(hw, E1000_CTRL, (*ctrl & ~E1000_CTRL_MDC)); ++ E1000_WRITE_FLUSH(hw); ++ usec_delay(10); ++} ++ ++/** ++ * e1000_shift_out_mdi_bits_82543 - Shift data bits our to the PHY ++ * @hw: pointer to the HW structure ++ * @data: data to send to the PHY ++ * @count: number of bits to shift out ++ * ++ * We need to shift 'count' bits out to the PHY. So, the value in the ++ * "data" parameter will be shifted out to the PHY one bit at a time. ++ * In order to do this, "data" must be broken down into bits. ++ **/ ++static void e1000_shift_out_mdi_bits_82543(struct e1000_hw *hw, u32 data, ++ u16 count) ++{ ++ u32 ctrl, mask; ++ ++ /* ++ * We need to shift "count" number of bits out to the PHY. So, the ++ * value in the "data" parameter will be shifted out to the PHY one ++ * bit at a time. In order to do this, "data" must be broken down ++ * into bits. ++ */ ++ mask = 0x01; ++ mask <<= (count -1); ++ ++ ctrl = E1000_READ_REG(hw, E1000_CTRL); ++ ++ /* Set MDIO_DIR and MDC_DIR direction bits to be used as output pins. */ ++ ctrl |= (E1000_CTRL_MDIO_DIR | E1000_CTRL_MDC_DIR); ++ ++ while (mask) { ++ /* ++ * A "1" is shifted out to the PHY by setting the MDIO bit to ++ * "1" and then raising and lowering the Management Data Clock. ++ * A "0" is shifted out to the PHY by setting the MDIO bit to ++ * "0" and then raising and lowering the clock. ++ */ ++ if (data & mask) ctrl |= E1000_CTRL_MDIO; ++ else ctrl &= ~E1000_CTRL_MDIO; ++ ++ E1000_WRITE_REG(hw, E1000_CTRL, ctrl); ++ E1000_WRITE_FLUSH(hw); ++ ++ usec_delay(10); ++ ++ e1000_raise_mdi_clk_82543(hw, &ctrl); ++ e1000_lower_mdi_clk_82543(hw, &ctrl); ++ ++ mask >>= 1; ++ } ++} ++ ++/** ++ * e1000_shift_in_mdi_bits_82543 - Shift data bits in from the PHY ++ * @hw: pointer to the HW structure ++ * ++ * In order to read a register from the PHY, we need to shift 18 bits ++ * in from the PHY. Bits are "shifted in" by raising the clock input to ++ * the PHY (setting the MDC bit), and then reading the value of the data out ++ * MDIO bit. ++ **/ ++static u16 e1000_shift_in_mdi_bits_82543(struct e1000_hw *hw) ++{ ++ u32 ctrl; ++ u16 data = 0; ++ u8 i; ++ ++ /* ++ * In order to read a register from the PHY, we need to shift in a ++ * total of 18 bits from the PHY. The first two bit (turnaround) ++ * times are used to avoid contention on the MDIO pin when a read ++ * operation is performed. These two bits are ignored by us and ++ * thrown away. Bits are "shifted in" by raising the input to the ++ * Management Data Clock (setting the MDC bit) and then reading the ++ * value of the MDIO bit. ++ */ ++ ctrl = E1000_READ_REG(hw, E1000_CTRL); ++ ++ /* ++ * Clear MDIO_DIR (SWDPIO1) to indicate this bit is to be used as ++ * input. ++ */ ++ ctrl &= ~E1000_CTRL_MDIO_DIR; ++ ctrl &= ~E1000_CTRL_MDIO; ++ ++ E1000_WRITE_REG(hw, E1000_CTRL, ctrl); ++ E1000_WRITE_FLUSH(hw); ++ ++ /* ++ * Raise and lower the clock before reading in the data. This accounts ++ * for the turnaround bits. The first clock occurred when we clocked ++ * out the last bit of the Register Address. ++ */ ++ e1000_raise_mdi_clk_82543(hw, &ctrl); ++ e1000_lower_mdi_clk_82543(hw, &ctrl); ++ ++ for (data = 0, i = 0; i < 16; i++) { ++ data <<= 1; ++ e1000_raise_mdi_clk_82543(hw, &ctrl); ++ ctrl = E1000_READ_REG(hw, E1000_CTRL); ++ /* Check to see if we shifted in a "1". */ ++ if (ctrl & E1000_CTRL_MDIO) ++ data |= 1; ++ e1000_lower_mdi_clk_82543(hw, &ctrl); ++ } ++ ++ e1000_raise_mdi_clk_82543(hw, &ctrl); ++ e1000_lower_mdi_clk_82543(hw, &ctrl); ++ ++ return data; ++} ++ ++/** ++ * e1000_phy_force_speed_duplex_82543 - Force speed/duplex for PHY ++ * @hw: pointer to the HW structure ++ * ++ * Calls the function to force speed and duplex for the m88 PHY, and ++ * if the PHY is not auto-negotiating and the speed is forced to 10Mbit, ++ * then call the function for polarity reversal workaround. ++ **/ ++static s32 e1000_phy_force_speed_duplex_82543(struct e1000_hw *hw) ++{ ++ s32 ret_val; ++ ++ DEBUGFUNC("e1000_phy_force_speed_duplex_82543"); ++ ++ ret_val = e1000_phy_force_speed_duplex_m88(hw); ++ if (ret_val) ++ goto out; ++ ++ if (!hw->mac.autoneg && ++ (hw->mac.forced_speed_duplex & E1000_ALL_10_SPEED)) ++ ret_val = e1000_polarity_reversal_workaround_82543(hw); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_polarity_reversal_workaround_82543 - Workaround polarity reversal ++ * @hw: pointer to the HW structure ++ * ++ * When forcing link to 10 Full or 10 Half, the PHY can reverse the polarity ++ * inadvertantly. To workaround the issue, we disable the transmitter on ++ * the PHY until we have established the link partner's link parameters. ++ **/ ++static s32 e1000_polarity_reversal_workaround_82543(struct e1000_hw *hw) ++{ ++ s32 ret_val; ++ u16 mii_status_reg; ++ u16 i; ++ bool link; ++ ++ /* Polarity reversal workaround for forced 10F/10H links. */ ++ ++ /* Disable the transmitter on the PHY */ ++ ++ ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_PAGE_SELECT, 0x0019); ++ if (ret_val) ++ goto out; ++ ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_GEN_CONTROL, 0xFFFF); ++ if (ret_val) ++ goto out; ++ ++ ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_PAGE_SELECT, 0x0000); ++ if (ret_val) ++ goto out; ++ ++ /* ++ * This loop will early-out if the NO link condition has been met. ++ * In other words, DO NOT use e1000_phy_has_link_generic() here. ++ */ ++ for (i = PHY_FORCE_TIME; i > 0; i--) { ++ /* ++ * Read the MII Status Register and wait for Link Status bit ++ * to be clear. ++ */ ++ ++ ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg); ++ if (ret_val) ++ goto out; ++ ++ ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg); ++ if (ret_val) ++ goto out; ++ ++ if ((mii_status_reg & ~MII_SR_LINK_STATUS) == 0) ++ break; ++ msec_delay_irq(100); ++ } ++ ++ /* Recommended delay time after link has been lost */ ++ msec_delay_irq(1000); ++ ++ /* Now we will re-enable the transmitter on the PHY */ ++ ++ ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_PAGE_SELECT, 0x0019); ++ if (ret_val) ++ goto out; ++ msec_delay_irq(50); ++ ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_GEN_CONTROL, 0xFFF0); ++ if (ret_val) ++ goto out; ++ msec_delay_irq(50); ++ ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_GEN_CONTROL, 0xFF00); ++ if (ret_val) ++ goto out; ++ msec_delay_irq(50); ++ ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_GEN_CONTROL, 0x0000); ++ if (ret_val) ++ goto out; ++ ++ ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_PAGE_SELECT, 0x0000); ++ if (ret_val) ++ goto out; ++ ++ /* ++ * Read the MII Status Register and wait for Link Status bit ++ * to be set. ++ */ ++ ret_val = e1000_phy_has_link_generic(hw, PHY_FORCE_TIME, 100000, &link); ++ if (ret_val) ++ goto out; ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_phy_hw_reset_82543 - PHY hardware reset ++ * @hw: pointer to the HW structure ++ * ++ * Sets the PHY_RESET_DIR bit in the extended device control register ++ * to put the PHY into a reset and waits for completion. Once the reset ++ * has been accomplished, clear the PHY_RESET_DIR bit to take the PHY out ++ * of reset. This is a function pointer entry point called by the api module. ++ **/ ++static s32 e1000_phy_hw_reset_82543(struct e1000_hw *hw) ++{ ++ struct e1000_functions *func = &hw->func; ++ u32 ctrl_ext; ++ s32 ret_val; ++ ++ DEBUGFUNC("e1000_phy_hw_reset_82543"); ++ ++ /* ++ * Read the Extended Device Control Register, assert the PHY_RESET_DIR ++ * bit to put the PHY into reset... ++ */ ++ ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT); ++ ctrl_ext |= E1000_CTRL_EXT_SDP4_DIR; ++ ctrl_ext &= ~E1000_CTRL_EXT_SDP4_DATA; ++ E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext); ++ E1000_WRITE_FLUSH(hw); ++ ++ msec_delay(10); ++ ++ /* ...then take it out of reset. */ ++ ctrl_ext |= E1000_CTRL_EXT_SDP4_DATA; ++ E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext); ++ E1000_WRITE_FLUSH(hw); ++ ++ usec_delay(150); ++ ++ ret_val = func->get_cfg_done(hw); ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_reset_hw_82543 - Reset hardware ++ * @hw: pointer to the HW structure ++ * ++ * This resets the hardware into a known state. This is a ++ * function pointer entry point called by the api module. ++ **/ ++static s32 e1000_reset_hw_82543(struct e1000_hw *hw) ++{ ++ u32 ctrl, icr; ++ s32 ret_val = E1000_SUCCESS; ++ ++ DEBUGFUNC("e1000_reset_hw_82543"); ++ ++ DEBUGOUT("Masking off all interrupts\n"); ++ E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff); ++ ++ E1000_WRITE_REG(hw, E1000_RCTL, 0); ++ E1000_WRITE_REG(hw, E1000_TCTL, E1000_TCTL_PSP); ++ E1000_WRITE_FLUSH(hw); ++ ++ e1000_set_tbi_sbp_82543(hw, FALSE); ++ ++ /* ++ * Delay to allow any outstanding PCI transactions to complete before ++ * resetting the device ++ */ ++ msec_delay(10); ++ ++ ctrl = E1000_READ_REG(hw, E1000_CTRL); ++ ++ DEBUGOUT("Issuing a global reset to 82543/82544 MAC\n"); ++ if (hw->mac.type == e1000_82543) { ++ E1000_WRITE_REG(hw, E1000_CTRL, ctrl | E1000_CTRL_RST); ++ } else { ++ /* ++ * The 82544 can't ACK the 64-bit write when issuing the ++ * reset, so use IO-mapping as a workaround. ++ */ ++ E1000_WRITE_REG_IO(hw, E1000_CTRL, ctrl | E1000_CTRL_RST); ++ } ++ ++ /* ++ * After MAC reset, force reload of NVM to restore power-on ++ * settings to device. ++ */ ++ e1000_reload_nvm(hw); ++ msec_delay(2); ++ ++ /* Masking off and clearing any pending interrupts */ ++ E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff); ++ icr = E1000_READ_REG(hw, E1000_ICR); ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_init_hw_82543 - Initialize hardware ++ * @hw: pointer to the HW structure ++ * ++ * This inits the hardware readying it for operation. ++ **/ ++static s32 e1000_init_hw_82543(struct e1000_hw *hw) ++{ ++ struct e1000_mac_info *mac = &hw->mac; ++ struct e1000_dev_spec_82543 *dev_spec; ++ u32 ctrl; ++ s32 ret_val; ++ u16 i; ++ ++ DEBUGFUNC("e1000_init_hw_82543"); ++ ++ dev_spec = (struct e1000_dev_spec_82543 *)hw->dev_spec; ++ ++ if (!dev_spec) { ++ DEBUGOUT("dev_spec pointer is set to NULL.\n"); ++ ret_val = -E1000_ERR_CONFIG; ++ goto out; ++ } ++ ++ /* Disabling VLAN filtering */ ++ E1000_WRITE_REG(hw, E1000_VET, 0); ++ e1000_clear_vfta(hw); ++ ++ /* Setup the receive address. */ ++ e1000_init_rx_addrs_generic(hw, mac->rar_entry_count); ++ ++ /* Zero out the Multicast HASH table */ ++ DEBUGOUT("Zeroing the MTA\n"); ++ for (i = 0; i < mac->mta_reg_count; i++) { ++ E1000_WRITE_REG_ARRAY(hw, E1000_MTA, i, 0); ++ E1000_WRITE_FLUSH(hw); ++ } ++ ++ /* ++ * Set the PCI priority bit correctly in the CTRL register. This ++ * determines if the adapter gives priority to receives, or if it ++ * gives equal priority to transmits and receives. ++ */ ++ if (hw->mac.type == e1000_82543 && dev_spec->dma_fairness) { ++ ctrl = E1000_READ_REG(hw, E1000_CTRL); ++ E1000_WRITE_REG(hw, E1000_CTRL, ctrl | E1000_CTRL_PRIOR); ++ } ++ ++ e1000_pcix_mmrbc_workaround_generic(hw); ++ ++ /* Setup link and flow control */ ++ ret_val = e1000_setup_link(hw); ++ ++ /* ++ * Clear all of the statistics registers (clear on read). It is ++ * important that we do this after we have tried to establish link ++ * because the symbol error count will increment wildly if there ++ * is no link. ++ */ ++ e1000_clear_hw_cntrs_82543(hw); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_setup_link_82543 - Setup flow control and link settings ++ * @hw: pointer to the HW structure ++ * ++ * Read the EEPROM to determine the initial polarity value and write the ++ * extended device control register with the information before calling ++ * the generic setup link function, which does the following: ++ * Determines which flow control settings to use, then configures flow ++ * control. Calls the appropriate media-specific link configuration ++ * function. Assuming the adapter has a valid link partner, a valid link ++ * should be established. Assumes the hardware has previously been reset ++ * and the transmitter and receiver are not enabled. ++ **/ ++static s32 e1000_setup_link_82543(struct e1000_hw *hw) ++{ ++ u32 ctrl_ext; ++ s32 ret_val; ++ u16 data; ++ ++ DEBUGFUNC("e1000_setup_link_82543"); ++ ++ /* ++ * Take the 4 bits from NVM word 0xF that determine the initial ++ * polarity value for the SW controlled pins, and setup the ++ * Extended Device Control reg with that info. ++ * This is needed because one of the SW controlled pins is used for ++ * signal detection. So this should be done before phy setup. ++ */ ++ if (hw->mac.type == e1000_82543) { ++ ret_val = e1000_read_nvm(hw, NVM_INIT_CONTROL2_REG, 1, &data); ++ if (ret_val) { ++ DEBUGOUT("NVM Read Error\n"); ++ ret_val = -E1000_ERR_NVM; ++ goto out; ++ } ++ ctrl_ext = ((data & NVM_WORD0F_SWPDIO_EXT_MASK) << ++ NVM_SWDPIO_EXT_SHIFT); ++ E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext); ++ } ++ ++ ret_val = e1000_setup_link_generic(hw); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_setup_copper_link_82543 - Configure copper link settings ++ * @hw: pointer to the HW structure ++ * ++ * Configures the link for auto-neg or forced speed and duplex. Then we check ++ * for link, once link is established calls to configure collision distance ++ * and flow control are called. ++ **/ ++static s32 e1000_setup_copper_link_82543(struct e1000_hw *hw) ++{ ++ u32 ctrl; ++ s32 ret_val; ++ bool link; ++ ++ DEBUGFUNC("e1000_setup_copper_link_82543"); ++ ++ ctrl = E1000_READ_REG(hw, E1000_CTRL) | E1000_CTRL_SLU; ++ /* ++ * With 82543, we need to force speed and duplex on the MAC ++ * equal to what the PHY speed and duplex configuration is. ++ * In addition, we need to perform a hardware reset on the ++ * PHY to take it out of reset. ++ */ ++ if (hw->mac.type == e1000_82543) { ++ ctrl |= (E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX); ++ E1000_WRITE_REG(hw, E1000_CTRL, ctrl); ++ ret_val = e1000_phy_hw_reset(hw); ++ if (ret_val) ++ goto out; ++ hw->phy.reset_disable = FALSE; ++ } else { ++ ctrl &= ~(E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX); ++ E1000_WRITE_REG(hw, E1000_CTRL, ctrl); ++ } ++ ++ /* Set MDI/MDI-X, Polarity Reversal, and downshift settings */ ++ ret_val = e1000_copper_link_setup_m88(hw); ++ if (ret_val) ++ goto out; ++ ++ if (hw->mac.autoneg) { ++ /* ++ * Setup autoneg and flow control advertisement and perform ++ * autonegotiation. ++ */ ++ ret_val = e1000_copper_link_autoneg(hw); ++ if (ret_val) ++ goto out; ++ } else { ++ /* ++ * PHY will be set to 10H, 10F, 100H or 100F ++ * depending on user settings. ++ */ ++ DEBUGOUT("Forcing Speed and Duplex\n"); ++ ret_val = e1000_phy_force_speed_duplex_82543(hw); ++ if (ret_val) { ++ DEBUGOUT("Error Forcing Speed and Duplex\n"); ++ goto out; ++ } ++ } ++ ++ /* ++ * Check link status. Wait up to 100 microseconds for link to become ++ * valid. ++ */ ++ ret_val = e1000_phy_has_link_generic(hw, ++ COPPER_LINK_UP_LIMIT, ++ 10, ++ &link); ++ if (ret_val) ++ goto out; ++ ++ ++ if (link) { ++ DEBUGOUT("Valid link established!!!\n"); ++ /* Config the MAC and PHY after link is up */ ++ if (hw->mac.type == e1000_82544) { ++ e1000_config_collision_dist_generic(hw); ++ } else { ++ ret_val = e1000_config_mac_to_phy_82543(hw); ++ if (ret_val) ++ goto out; ++ } ++ ret_val = e1000_config_fc_after_link_up_generic(hw); ++ } else { ++ DEBUGOUT("Unable to establish link!!!\n"); ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_setup_fiber_link_82543 - Setup link for fiber ++ * @hw: pointer to the HW structure ++ * ++ * Configures collision distance and flow control for fiber links. Upon ++ * successful setup, poll for link. ++ **/ ++static s32 e1000_setup_fiber_link_82543(struct e1000_hw *hw) ++{ ++ u32 ctrl; ++ s32 ret_val; ++ ++ DEBUGFUNC("e1000_setup_fiber_link_82543"); ++ ++ ctrl = E1000_READ_REG(hw, E1000_CTRL); ++ ++ /* Take the link out of reset */ ++ ctrl &= ~E1000_CTRL_LRST; ++ ++ e1000_config_collision_dist_generic(hw); ++ ++ ret_val = e1000_commit_fc_settings_generic(hw); ++ if (ret_val) ++ goto out; ++ ++ DEBUGOUT("Auto-negotiation enabled\n"); ++ ++ E1000_WRITE_REG(hw, E1000_CTRL, ctrl); ++ E1000_WRITE_FLUSH(hw); ++ msec_delay(1); ++ ++ /* ++ * For these adapters, the SW defineable pin 1 is cleared when the ++ * optics detect a signal. If we have a signal, then poll for a ++ * "Link-Up" indication. ++ */ ++ if (!(E1000_READ_REG(hw, E1000_CTRL) & E1000_CTRL_SWDPIN1)) { ++ ret_val = e1000_poll_fiber_serdes_link_generic(hw); ++ } else { ++ DEBUGOUT("No signal detected\n"); ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_check_for_copper_link_82543 - Check for link (Copper) ++ * @hw: pointer to the HW structure ++ * ++ * Checks the phy for link, if link exists, do the following: ++ * - check for downshift ++ * - do polarity workaround (if necessary) ++ * - configure collision distance ++ * - configure flow control after link up ++ * - configure tbi compatibility ++ **/ ++static s32 e1000_check_for_copper_link_82543(struct e1000_hw *hw) ++{ ++ struct e1000_mac_info *mac = &hw->mac; ++ u32 icr, rctl; ++ s32 ret_val; ++ u16 speed, duplex; ++ bool link; ++ ++ DEBUGFUNC("e1000_check_for_copper_link_82543"); ++ ++ if (!mac->get_link_status) { ++ ret_val = E1000_SUCCESS; ++ goto out; ++ } ++ ++ ret_val = e1000_phy_has_link_generic(hw, 1, 0, &link); ++ if (ret_val) ++ goto out; ++ ++ if (!link) ++ goto out; /* No link detected */ ++ ++ mac->get_link_status = FALSE; ++ ++ e1000_check_downshift_generic(hw); ++ ++ /* ++ * If we are forcing speed/duplex, then we can return since ++ * we have already determined whether we have link or not. ++ */ ++ if (!mac->autoneg) { ++ /* ++ * If speed and duplex are forced to 10H or 10F, then we will ++ * implement the polarity reversal workaround. We disable ++ * interrupts first, and upon returning, place the devices ++ * interrupt state to its previous value except for the link ++ * status change interrupt which will happened due to the ++ * execution of this workaround. ++ */ ++ if (mac->forced_speed_duplex & E1000_ALL_10_SPEED) { ++ E1000_WRITE_REG(hw, E1000_IMC, 0xFFFFFFFF); ++ ret_val = e1000_polarity_reversal_workaround_82543(hw); ++ icr = E1000_READ_REG(hw, E1000_ICR); ++ E1000_WRITE_REG(hw, E1000_ICS, (icr & ~E1000_ICS_LSC)); ++ E1000_WRITE_REG(hw, E1000_IMS, IMS_ENABLE_MASK); ++ } ++ ++ ret_val = -E1000_ERR_CONFIG; ++ goto out; ++ } ++ ++ /* ++ * We have a M88E1000 PHY and Auto-Neg is enabled. If we ++ * have Si on board that is 82544 or newer, Auto ++ * Speed Detection takes care of MAC speed/duplex ++ * configuration. So we only need to configure Collision ++ * Distance in the MAC. Otherwise, we need to force ++ * speed/duplex on the MAC to the current PHY speed/duplex ++ * settings. ++ */ ++ if (mac->type == e1000_82544) ++ e1000_config_collision_dist_generic(hw); ++ else { ++ ret_val = e1000_config_mac_to_phy_82543(hw); ++ if (ret_val) { ++ DEBUGOUT("Error configuring MAC to PHY settings\n"); ++ goto out; ++ } ++ } ++ ++ /* ++ * Configure Flow Control now that Auto-Neg has completed. ++ * First, we need to restore the desired flow control ++ * settings because we may have had to re-autoneg with a ++ * different link partner. ++ */ ++ ret_val = e1000_config_fc_after_link_up_generic(hw); ++ if (ret_val) { ++ DEBUGOUT("Error configuring flow control\n"); ++ } ++ ++ /* ++ * At this point we know that we are on copper and we have ++ * auto-negotiated link. These are conditions for checking the link ++ * partner capability register. We use the link speed to determine if ++ * TBI compatibility needs to be turned on or off. If the link is not ++ * at gigabit speed, then TBI compatibility is not needed. If we are ++ * at gigabit speed, we turn on TBI compatibility. ++ */ ++ if (e1000_tbi_compatibility_enabled_82543(hw)) { ++ ret_val = e1000_get_speed_and_duplex(hw, &speed, &duplex); ++ if (ret_val) { ++ DEBUGOUT("Error getting link speed and duplex\n"); ++ return ret_val; ++ } ++ if (speed != SPEED_1000) { ++ /* ++ * If link speed is not set to gigabit speed, ++ * we do not need to enable TBI compatibility. ++ */ ++ if (e1000_tbi_sbp_enabled_82543(hw)) { ++ /* ++ * If we previously were in the mode, ++ * turn it off. ++ */ ++ e1000_set_tbi_sbp_82543(hw, FALSE); ++ rctl = E1000_READ_REG(hw, E1000_RCTL); ++ rctl &= ~E1000_RCTL_SBP; ++ E1000_WRITE_REG(hw, E1000_RCTL, rctl); ++ } ++ } else { ++ /* ++ * If TBI compatibility is was previously off, ++ * turn it on. For compatibility with a TBI link ++ * partner, we will store bad packets. Some ++ * frames have an additional byte on the end and ++ * will look like CRC errors to to the hardware. ++ */ ++ if (!e1000_tbi_sbp_enabled_82543(hw)) { ++ e1000_set_tbi_sbp_82543(hw, TRUE); ++ rctl = E1000_READ_REG(hw, E1000_RCTL); ++ rctl |= E1000_RCTL_SBP; ++ E1000_WRITE_REG(hw, E1000_RCTL, rctl); ++ } ++ } ++ } ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_check_for_fiber_link_82543 - Check for link (Fiber) ++ * @hw: pointer to the HW structure ++ * ++ * Checks for link up on the hardware. If link is not up and we have ++ * a signal, then we need to force link up. ++ **/ ++static s32 e1000_check_for_fiber_link_82543(struct e1000_hw *hw) ++{ ++ struct e1000_mac_info *mac = &hw->mac; ++ u32 rxcw, ctrl, status; ++ s32 ret_val = E1000_SUCCESS; ++ ++ DEBUGFUNC("e1000_check_for_fiber_link_82543"); ++ ++ ctrl = E1000_READ_REG(hw, E1000_CTRL); ++ status = E1000_READ_REG(hw, E1000_STATUS); ++ rxcw = E1000_READ_REG(hw, E1000_RXCW); ++ ++ /* ++ * If we don't have link (auto-negotiation failed or link partner ++ * cannot auto-negotiate), the cable is plugged in (we have signal), ++ * and our link partner is not trying to auto-negotiate with us (we ++ * are receiving idles or data), we need to force link up. We also ++ * need to give auto-negotiation time to complete, in case the cable ++ * was just plugged in. The autoneg_failed flag does this. ++ */ ++ /* (ctrl & E1000_CTRL_SWDPIN1) == 0 == have signal */ ++ if ((!(ctrl & E1000_CTRL_SWDPIN1)) && ++ (!(status & E1000_STATUS_LU)) && ++ (!(rxcw & E1000_RXCW_C))) { ++ if (mac->autoneg_failed == 0) { ++ mac->autoneg_failed = 1; ++ ret_val = 0; ++ goto out; ++ } ++ DEBUGOUT("NOT RXing /C/, disable AutoNeg and force link.\n"); ++ ++ /* Disable auto-negotiation in the TXCW register */ ++ E1000_WRITE_REG(hw, E1000_TXCW, (mac->txcw & ~E1000_TXCW_ANE)); ++ ++ /* Force link-up and also force full-duplex. */ ++ ctrl = E1000_READ_REG(hw, E1000_CTRL); ++ ctrl |= (E1000_CTRL_SLU | E1000_CTRL_FD); ++ E1000_WRITE_REG(hw, E1000_CTRL, ctrl); ++ ++ /* Configure Flow Control after forcing link up. */ ++ ret_val = e1000_config_fc_after_link_up_generic(hw); ++ if (ret_val) { ++ DEBUGOUT("Error configuring flow control\n"); ++ goto out; ++ } ++ } else if ((ctrl & E1000_CTRL_SLU) && (rxcw & E1000_RXCW_C)) { ++ /* ++ * If we are forcing link and we are receiving /C/ ordered ++ * sets, re-enable auto-negotiation in the TXCW register ++ * and disable forced link in the Device Control register ++ * in an attempt to auto-negotiate with our link partner. ++ */ ++ DEBUGOUT("RXing /C/, enable AutoNeg and stop forcing link.\n"); ++ E1000_WRITE_REG(hw, E1000_TXCW, mac->txcw); ++ E1000_WRITE_REG(hw, E1000_CTRL, (ctrl & ~E1000_CTRL_SLU)); ++ ++ mac->serdes_has_link = TRUE; ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_config_mac_to_phy_82543 - Configure MAC to PHY settings ++ * @hw: pointer to the HW structure ++ * ++ * For the 82543 silicon, we need to set the MAC to match the settings ++ * of the PHY, even if the PHY is auto-negotiating. ++ **/ ++static s32 e1000_config_mac_to_phy_82543(struct e1000_hw *hw) ++{ ++ u32 ctrl; ++ s32 ret_val; ++ u16 phy_data; ++ ++ DEBUGFUNC("e1000_config_mac_to_phy_82543"); ++ ++ /* Set the bits to force speed and duplex */ ++ ctrl = E1000_READ_REG(hw, E1000_CTRL); ++ ctrl |= (E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX); ++ ctrl &= ~(E1000_CTRL_SPD_SEL | E1000_CTRL_ILOS); ++ ++ /* ++ * Set up duplex in the Device Control and Transmit Control ++ * registers depending on negotiated values. ++ */ ++ ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_STATUS, &phy_data); ++ if (ret_val) ++ goto out; ++ ++ ctrl &= ~E1000_CTRL_FD; ++ if (phy_data & M88E1000_PSSR_DPLX) ++ ctrl |= E1000_CTRL_FD; ++ ++ e1000_config_collision_dist_generic(hw); ++ ++ /* ++ * Set up speed in the Device Control register depending on ++ * negotiated values. ++ */ ++ if ((phy_data & M88E1000_PSSR_SPEED) == M88E1000_PSSR_1000MBS) ++ ctrl |= E1000_CTRL_SPD_1000; ++ else if ((phy_data & M88E1000_PSSR_SPEED) == M88E1000_PSSR_100MBS) ++ ctrl |= E1000_CTRL_SPD_100; ++ ++ E1000_WRITE_REG(hw, E1000_CTRL, ctrl); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_write_vfta_82543 - Write value to VLAN filter table ++ * @hw: pointer to the HW structure ++ * @offset: the 32-bit offset in which to write the value to. ++ * @value: the 32-bit value to write at location offset. ++ * ++ * This writes a 32-bit value to a 32-bit offset in the VLAN filter ++ * table. ++ **/ ++static void e1000_write_vfta_82543(struct e1000_hw *hw, u32 offset, u32 value) ++{ ++ u32 temp; ++ ++ DEBUGFUNC("e1000_write_vfta_82543"); ++ ++ if ((hw->mac.type == e1000_82544) && (offset & 1)) { ++ temp = E1000_READ_REG_ARRAY(hw, E1000_VFTA, offset - 1); ++ E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, offset, value); ++ E1000_WRITE_FLUSH(hw); ++ E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, offset - 1, temp); ++ E1000_WRITE_FLUSH(hw); ++ } else { ++ e1000_write_vfta_generic(hw, offset, value); ++ } ++} ++ ++/** ++ * e1000_mta_set_82543 - Set multicast filter table address ++ * @hw: pointer to the HW structure ++ * @hash_value: determines the MTA register and bit to set ++ * ++ * The multicast table address is a register array of 32-bit registers. ++ * The hash_value is used to determine what register the bit is in, the ++ * current value is read, the new bit is OR'd in and the new value is ++ * written back into the register. ++ **/ ++static void e1000_mta_set_82543(struct e1000_hw *hw, u32 hash_value) ++{ ++ u32 hash_bit, hash_reg, mta, temp; ++ ++ DEBUGFUNC("e1000_mta_set_82543"); ++ ++ hash_reg = (hash_value >> 5); ++ ++ /* ++ * If we are on an 82544 and we are trying to write an odd offset ++ * in the MTA, save off the previous entry before writing and ++ * restore the old value after writing. ++ */ ++ if ((hw->mac.type == e1000_82544) && (hash_reg & 1)) { ++ hash_reg &= (hw->mac.mta_reg_count - 1); ++ hash_bit = hash_value & 0x1F; ++ mta = E1000_READ_REG_ARRAY(hw, E1000_MTA, hash_reg); ++ mta |= (1 << hash_bit); ++ temp = E1000_READ_REG_ARRAY(hw, E1000_MTA, hash_reg - 1); ++ ++ E1000_WRITE_REG_ARRAY(hw, E1000_MTA, hash_reg, mta); ++ E1000_WRITE_FLUSH(hw); ++ E1000_WRITE_REG_ARRAY(hw, E1000_MTA, hash_reg - 1, temp); ++ E1000_WRITE_FLUSH(hw); ++ } else { ++ e1000_mta_set_generic(hw, hash_value); ++ } ++} ++ ++/** ++ * e1000_led_on_82543 - Turn on SW controllable LED ++ * @hw: pointer to the HW structure ++ * ++ * Turns the SW defined LED on. This is a function pointer entry point ++ * called by the api module. ++ **/ ++static s32 e1000_led_on_82543(struct e1000_hw *hw) ++{ ++ u32 ctrl = E1000_READ_REG(hw, E1000_CTRL); ++ ++ DEBUGFUNC("e1000_led_on_82543"); ++ ++ if (hw->mac.type == e1000_82544 && ++ hw->phy.media_type == e1000_media_type_copper) { ++ /* Clear SW-defineable Pin 0 to turn on the LED */ ++ ctrl &= ~E1000_CTRL_SWDPIN0; ++ ctrl |= E1000_CTRL_SWDPIO0; ++ } else { ++ /* Fiber 82544 and all 82543 use this method */ ++ ctrl |= E1000_CTRL_SWDPIN0; ++ ctrl |= E1000_CTRL_SWDPIO0; ++ } ++ E1000_WRITE_REG(hw, E1000_CTRL, ctrl); ++ ++ return E1000_SUCCESS; ++} ++ ++/** ++ * e1000_led_off_82543 - Turn off SW controllable LED ++ * @hw: pointer to the HW structure ++ * ++ * Turns the SW defined LED off. This is a function pointer entry point ++ * called by the api module. ++ **/ ++static s32 e1000_led_off_82543(struct e1000_hw *hw) ++{ ++ u32 ctrl = E1000_READ_REG(hw, E1000_CTRL); ++ ++ DEBUGFUNC("e1000_led_off_82543"); ++ ++ if (hw->mac.type == e1000_82544 && ++ hw->phy.media_type == e1000_media_type_copper) { ++ /* Set SW-defineable Pin 0 to turn off the LED */ ++ ctrl |= E1000_CTRL_SWDPIN0; ++ ctrl |= E1000_CTRL_SWDPIO0; ++ } else { ++ ctrl &= ~E1000_CTRL_SWDPIN0; ++ ctrl |= E1000_CTRL_SWDPIO0; ++ } ++ E1000_WRITE_REG(hw, E1000_CTRL, ctrl); ++ ++ return E1000_SUCCESS; ++} ++ ++/** ++ * e1000_clear_hw_cntrs_82543 - Clear device specific hardware counters ++ * @hw: pointer to the HW structure ++ * ++ * Clears the hardware counters by reading the counter registers. ++ **/ ++static void e1000_clear_hw_cntrs_82543(struct e1000_hw *hw) ++{ ++ volatile u32 temp; ++ ++ DEBUGFUNC("e1000_clear_hw_cntrs_82543"); ++ ++ e1000_clear_hw_cntrs_base_generic(hw); ++ ++ temp = E1000_READ_REG(hw, E1000_PRC64); ++ temp = E1000_READ_REG(hw, E1000_PRC127); ++ temp = E1000_READ_REG(hw, E1000_PRC255); ++ temp = E1000_READ_REG(hw, E1000_PRC511); ++ temp = E1000_READ_REG(hw, E1000_PRC1023); ++ temp = E1000_READ_REG(hw, E1000_PRC1522); ++ temp = E1000_READ_REG(hw, E1000_PTC64); ++ temp = E1000_READ_REG(hw, E1000_PTC127); ++ temp = E1000_READ_REG(hw, E1000_PTC255); ++ temp = E1000_READ_REG(hw, E1000_PTC511); ++ temp = E1000_READ_REG(hw, E1000_PTC1023); ++ temp = E1000_READ_REG(hw, E1000_PTC1522); ++ ++ temp = E1000_READ_REG(hw, E1000_ALGNERRC); ++ temp = E1000_READ_REG(hw, E1000_RXERRC); ++ temp = E1000_READ_REG(hw, E1000_TNCRS); ++ temp = E1000_READ_REG(hw, E1000_CEXTERR); ++ temp = E1000_READ_REG(hw, E1000_TSCTC); ++ temp = E1000_READ_REG(hw, E1000_TSCTFC); ++} +--- linux/drivers/xenomai/net/drivers/experimental/e1000/kcompat.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/experimental/e1000/kcompat.h 2021-04-07 16:01:27.633633586 +0800 +@@ -0,0 +1,603 @@ ++/******************************************************************************* ++ ++ Intel PRO/1000 Linux driver ++ Copyright(c) 1999 - 2008 Intel Corporation. ++ ++ This program is free software; you can redistribute it and/or modify it ++ under the terms and conditions of the GNU General Public License, ++ version 2, as published by the Free Software Foundation. ++ ++ This program is distributed in the hope it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ You should have received a copy of the GNU General Public License along with ++ this program; if not, write to the Free Software Foundation, Inc., ++ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. ++ ++ The full GNU General Public License is included in this distribution in ++ the file called "COPYING". ++ ++ Contact Information: ++ Linux NICS ++ e1000-devel Mailing List ++ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 ++ ++*******************************************************************************/ ++ ++#ifndef _KCOMPAT_H_ ++#define _KCOMPAT_H_ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++/* NAPI enable/disable flags here */ ++ ++ ++#ifdef _E1000_H_ ++#ifdef CONFIG_E1000_NAPI ++#define NAPI ++#endif ++#ifdef E1000_NAPI ++#undef NAPI ++#define NAPI ++#endif ++#ifdef E1000_NO_NAPI ++#undef NAPI ++#endif ++#endif ++ ++#ifdef _IGB_H_ ++#define NAPI ++#endif ++ ++#ifdef _IXGB_H_ ++#ifdef CONFIG_IXGB_NAPI ++#define NAPI ++#endif ++#ifdef IXGB_NAPI ++#undef NAPI ++#define NAPI ++#endif ++#ifdef IXGB_NO_NAPI ++#undef NAPI ++#endif ++#endif ++ ++ ++#ifdef DRIVER_E1000 ++#define adapter_struct e1000_adapter ++#endif ++ ++ ++// RTNET settings ++#ifdef NAPI ++#undef NAPI ++#endif ++ ++#undef NETIF_F_TSO ++#undef NETIF_F_HW_VLAN_TX ++#undef CONFIG_NET_POLL_CONTROLLER ++#ifdef ETHTOOL_GPERMADDR ++#undef ETHTOOL_GPERMADDR ++#endif ++ ++ ++/* and finally set defines so that the code sees the changes */ ++#ifdef NAPI ++#ifndef CONFIG_E1000_NAPI ++#define CONFIG_E1000_NAPI ++#endif ++#ifndef CONFIG_IXGB_NAPI ++#define CONFIG_IXGB_NAPI ++#endif ++#else ++#undef CONFIG_E1000_NAPI ++#undef CONFIG_IXGB_NAPI ++#endif ++ ++/* packet split disable/enable */ ++#ifdef DISABLE_PACKET_SPLIT ++#undef CONFIG_E1000_DISABLE_PACKET_SPLIT ++#define CONFIG_E1000_DISABLE_PACKET_SPLIT ++#endif ++ ++/* MSI compatibility code for all kernels and drivers */ ++#ifdef DISABLE_PCI_MSI ++#undef CONFIG_PCI_MSI ++#endif ++ ++#ifdef DISABLE_PM ++#undef CONFIG_PM ++#endif ++ ++#ifdef DISABLE_NET_POLL_CONTROLLER ++#undef CONFIG_NET_POLL_CONTROLLER ++#endif ++ ++#ifndef PMSG_SUSPEND ++#define PMSG_SUSPEND 3 ++#endif ++ ++/* generic boolean compatibility */ ++#undef TRUE ++#undef FALSE ++#define TRUE true ++#define FALSE false ++#ifdef GCC_VERSION ++#if ( GCC_VERSION < 3000 ) ++#define _Bool char ++#endif ++#endif ++#ifndef bool ++#define bool _Bool ++#define true 1 ++#define false 0 ++#endif ++ ++ ++#ifndef module_param ++#define module_param(v,t,p) MODULE_PARM(v, "i"); ++#endif ++ ++#ifndef DMA_64BIT_MASK ++#define DMA_64BIT_MASK 0xffffffffffffffffULL ++#endif ++ ++#ifndef DMA_32BIT_MASK ++#define DMA_32BIT_MASK 0x00000000ffffffffULL ++#endif ++ ++#ifndef PCI_CAP_ID_EXP ++#define PCI_CAP_ID_EXP 0x10 ++#endif ++ ++#ifndef mmiowb ++#ifdef CONFIG_IA64 ++#define mmiowb() asm volatile ("mf.a" ::: "memory") ++#else ++#define mmiowb() ++#endif ++#endif ++ ++#ifndef SET_NETDEV_DEV ++#define SET_NETDEV_DEV(net, pdev) ++#endif ++ ++#ifndef HAVE_FREE_NETDEV ++#define free_netdev(x) kfree(x) ++#endif ++ ++#ifdef HAVE_POLL_CONTROLLER ++#define CONFIG_NET_POLL_CONTROLLER ++#endif ++ ++#ifndef NETDEV_TX_OK ++#define NETDEV_TX_OK 0 ++#endif ++ ++#ifndef NETDEV_TX_BUSY ++#define NETDEV_TX_BUSY 1 ++#endif ++ ++#ifndef NETDEV_TX_LOCKED ++#define NETDEV_TX_LOCKED -1 ++#endif ++ ++#ifndef SKB_DATAREF_SHIFT ++/* if we do not have the infrastructure to detect if skb_header is cloned ++ just return false in all cases */ ++#define skb_header_cloned(x) 0 ++#endif ++ ++#ifndef NETIF_F_GSO ++#define gso_size tso_size ++#define gso_segs tso_segs ++#endif ++ ++#ifndef CHECKSUM_PARTIAL ++#define CHECKSUM_PARTIAL CHECKSUM_HW ++#define CHECKSUM_COMPLETE CHECKSUM_HW ++#endif ++ ++#ifndef __read_mostly ++#define __read_mostly ++#endif ++ ++#ifndef MII_RESV1 ++#define MII_RESV1 0x17 /* Reserved... */ ++#endif ++ ++#ifndef unlikely ++#define unlikely(_x) _x ++#define likely(_x) _x ++#endif ++ ++#ifndef WARN_ON ++#define WARN_ON(x) ++#endif ++ ++#ifndef PCI_DEVICE ++#define PCI_DEVICE(vend,dev) \ ++ .vendor = (vend), .device = (dev), \ ++ .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID ++#endif ++ ++#ifndef num_online_cpus ++#define num_online_cpus() smp_num_cpus ++#endif ++ ++#ifndef _LINUX_RANDOM_H ++#include ++#endif ++ ++#ifndef DECLARE_BITMAP ++#ifndef BITS_TO_LONGS ++#define BITS_TO_LONGS(bits) (((bits)+BITS_PER_LONG-1)/BITS_PER_LONG) ++#endif ++#define DECLARE_BITMAP(name,bits) long name[BITS_TO_LONGS(bits)] ++#endif ++ ++#ifndef VLAN_HLEN ++#define VLAN_HLEN 4 ++#endif ++ ++#ifndef VLAN_ETH_HLEN ++#define VLAN_ETH_HLEN 18 ++#endif ++ ++#ifndef VLAN_ETH_FRAME_LEN ++#define VLAN_ETH_FRAME_LEN 1518 ++#endif ++ ++ ++/*****************************************************************************/ ++/* Installations with ethtool version without eeprom, adapter id, or statistics ++ * support */ ++ ++#ifndef ETH_GSTRING_LEN ++#define ETH_GSTRING_LEN 32 ++#endif ++ ++#ifndef ETHTOOL_GSTATS ++#define ETHTOOL_GSTATS 0x1d ++#undef ethtool_drvinfo ++#define ethtool_drvinfo k_ethtool_drvinfo ++struct k_ethtool_drvinfo { ++ u32 cmd; ++ char driver[32]; ++ char version[32]; ++ char fw_version[32]; ++ char bus_info[32]; ++ char reserved1[32]; ++ char reserved2[16]; ++ u32 n_stats; ++ u32 testinfo_len; ++ u32 eedump_len; ++ u32 regdump_len; ++}; ++ ++struct ethtool_stats { ++ u32 cmd; ++ u32 n_stats; ++ u64 data[0]; ++}; ++#endif /* ETHTOOL_GSTATS */ ++ ++#ifndef ETHTOOL_PHYS_ID ++#define ETHTOOL_PHYS_ID 0x1c ++#endif /* ETHTOOL_PHYS_ID */ ++ ++#ifndef ETHTOOL_GSTRINGS ++#define ETHTOOL_GSTRINGS 0x1b ++enum ethtool_stringset { ++ ETH_SS_TEST = 0, ++ ETH_SS_STATS, ++}; ++struct ethtool_gstrings { ++ u32 cmd; /* ETHTOOL_GSTRINGS */ ++ u32 string_set; /* string set id e.c. ETH_SS_TEST, etc*/ ++ u32 len; /* number of strings in the string set */ ++ u8 data[0]; ++}; ++#endif /* ETHTOOL_GSTRINGS */ ++ ++#ifndef ETHTOOL_TEST ++#define ETHTOOL_TEST 0x1a ++enum ethtool_test_flags { ++ ETH_TEST_FL_OFFLINE = (1 << 0), ++ ETH_TEST_FL_FAILED = (1 << 1), ++}; ++struct ethtool_test { ++ u32 cmd; ++ u32 flags; ++ u32 reserved; ++ u32 len; ++ u64 data[0]; ++}; ++#endif /* ETHTOOL_TEST */ ++ ++#ifndef ETHTOOL_GEEPROM ++#define ETHTOOL_GEEPROM 0xb ++#undef ETHTOOL_GREGS ++struct ethtool_eeprom { ++ u32 cmd; ++ u32 magic; ++ u32 offset; ++ u32 len; ++ u8 data[0]; ++}; ++ ++struct ethtool_value { ++ u32 cmd; ++ u32 data; ++}; ++#endif /* ETHTOOL_GEEPROM */ ++ ++#ifndef ETHTOOL_GLINK ++#define ETHTOOL_GLINK 0xa ++#endif /* ETHTOOL_GLINK */ ++ ++#ifndef ETHTOOL_GREGS ++#define ETHTOOL_GREGS 0x00000004 /* Get NIC registers */ ++#define ethtool_regs _kc_ethtool_regs ++/* for passing big chunks of data */ ++struct _kc_ethtool_regs { ++ u32 cmd; ++ u32 version; /* driver-specific, indicates different chips/revs */ ++ u32 len; /* bytes */ ++ u8 data[0]; ++}; ++#endif /* ETHTOOL_GREGS */ ++ ++#ifndef ETHTOOL_GMSGLVL ++#define ETHTOOL_GMSGLVL 0x00000007 /* Get driver message level */ ++#endif ++#ifndef ETHTOOL_SMSGLVL ++#define ETHTOOL_SMSGLVL 0x00000008 /* Set driver msg level, priv. */ ++#endif ++#ifndef ETHTOOL_NWAY_RST ++#define ETHTOOL_NWAY_RST 0x00000009 /* Restart autonegotiation, priv */ ++#endif ++#ifndef ETHTOOL_GLINK ++#define ETHTOOL_GLINK 0x0000000a /* Get link status */ ++#endif ++#ifndef ETHTOOL_GEEPROM ++#define ETHTOOL_GEEPROM 0x0000000b /* Get EEPROM data */ ++#endif ++#ifndef ETHTOOL_SEEPROM ++#define ETHTOOL_SEEPROM 0x0000000c /* Set EEPROM data */ ++#endif ++#ifndef ETHTOOL_GCOALESCE ++#define ETHTOOL_GCOALESCE 0x0000000e /* Get coalesce config */ ++/* for configuring coalescing parameters of chip */ ++#define ethtool_coalesce _kc_ethtool_coalesce ++struct _kc_ethtool_coalesce { ++ u32 cmd; /* ETHTOOL_{G,S}COALESCE */ ++ ++ /* How many usecs to delay an RX interrupt after ++ * a packet arrives. If 0, only rx_max_coalesced_frames ++ * is used. ++ */ ++ u32 rx_coalesce_usecs; ++ ++ /* How many packets to delay an RX interrupt after ++ * a packet arrives. If 0, only rx_coalesce_usecs is ++ * used. It is illegal to set both usecs and max frames ++ * to zero as this would cause RX interrupts to never be ++ * generated. ++ */ ++ u32 rx_max_coalesced_frames; ++ ++ /* Same as above two parameters, except that these values ++ * apply while an IRQ is being serviced by the host. Not ++ * all cards support this feature and the values are ignored ++ * in that case. ++ */ ++ u32 rx_coalesce_usecs_irq; ++ u32 rx_max_coalesced_frames_irq; ++ ++ /* How many usecs to delay a TX interrupt after ++ * a packet is sent. If 0, only tx_max_coalesced_frames ++ * is used. ++ */ ++ u32 tx_coalesce_usecs; ++ ++ /* How many packets to delay a TX interrupt after ++ * a packet is sent. If 0, only tx_coalesce_usecs is ++ * used. It is illegal to set both usecs and max frames ++ * to zero as this would cause TX interrupts to never be ++ * generated. ++ */ ++ u32 tx_max_coalesced_frames; ++ ++ /* Same as above two parameters, except that these values ++ * apply while an IRQ is being serviced by the host. Not ++ * all cards support this feature and the values are ignored ++ * in that case. ++ */ ++ u32 tx_coalesce_usecs_irq; ++ u32 tx_max_coalesced_frames_irq; ++ ++ /* How many usecs to delay in-memory statistics ++ * block updates. Some drivers do not have an in-memory ++ * statistic block, and in such cases this value is ignored. ++ * This value must not be zero. ++ */ ++ u32 stats_block_coalesce_usecs; ++ ++ /* Adaptive RX/TX coalescing is an algorithm implemented by ++ * some drivers to improve latency under low packet rates and ++ * improve throughput under high packet rates. Some drivers ++ * only implement one of RX or TX adaptive coalescing. Anything ++ * not implemented by the driver causes these values to be ++ * silently ignored. ++ */ ++ u32 use_adaptive_rx_coalesce; ++ u32 use_adaptive_tx_coalesce; ++ ++ /* When the packet rate (measured in packets per second) ++ * is below pkt_rate_low, the {rx,tx}_*_low parameters are ++ * used. ++ */ ++ u32 pkt_rate_low; ++ u32 rx_coalesce_usecs_low; ++ u32 rx_max_coalesced_frames_low; ++ u32 tx_coalesce_usecs_low; ++ u32 tx_max_coalesced_frames_low; ++ ++ /* When the packet rate is below pkt_rate_high but above ++ * pkt_rate_low (both measured in packets per second) the ++ * normal {rx,tx}_* coalescing parameters are used. ++ */ ++ ++ /* When the packet rate is (measured in packets per second) ++ * is above pkt_rate_high, the {rx,tx}_*_high parameters are ++ * used. ++ */ ++ u32 pkt_rate_high; ++ u32 rx_coalesce_usecs_high; ++ u32 rx_max_coalesced_frames_high; ++ u32 tx_coalesce_usecs_high; ++ u32 tx_max_coalesced_frames_high; ++ ++ /* How often to do adaptive coalescing packet rate sampling, ++ * measured in seconds. Must not be zero. ++ */ ++ u32 rate_sample_interval; ++}; ++#endif /* ETHTOOL_GCOALESCE */ ++ ++#ifndef ETHTOOL_SCOALESCE ++#define ETHTOOL_SCOALESCE 0x0000000f /* Set coalesce config. */ ++#endif ++#ifndef ETHTOOL_GRINGPARAM ++#define ETHTOOL_GRINGPARAM 0x00000010 /* Get ring parameters */ ++/* for configuring RX/TX ring parameters */ ++#define ethtool_ringparam _kc_ethtool_ringparam ++struct _kc_ethtool_ringparam { ++ u32 cmd; /* ETHTOOL_{G,S}RINGPARAM */ ++ ++ /* Read only attributes. These indicate the maximum number ++ * of pending RX/TX ring entries the driver will allow the ++ * user to set. ++ */ ++ u32 rx_max_pending; ++ u32 rx_mini_max_pending; ++ u32 rx_jumbo_max_pending; ++ u32 tx_max_pending; ++ ++ /* Values changeable by the user. The valid values are ++ * in the range 1 to the "*_max_pending" counterpart above. ++ */ ++ u32 rx_pending; ++ u32 rx_mini_pending; ++ u32 rx_jumbo_pending; ++ u32 tx_pending; ++}; ++#endif /* ETHTOOL_GRINGPARAM */ ++ ++#ifndef ETHTOOL_SRINGPARAM ++#define ETHTOOL_SRINGPARAM 0x00000011 /* Set ring parameters, priv. */ ++#endif ++#ifndef ETHTOOL_GPAUSEPARAM ++#define ETHTOOL_GPAUSEPARAM 0x00000012 /* Get pause parameters */ ++/* for configuring link flow control parameters */ ++#define ethtool_pauseparam _kc_ethtool_pauseparam ++struct _kc_ethtool_pauseparam { ++ u32 cmd; /* ETHTOOL_{G,S}PAUSEPARAM */ ++ ++ /* If the link is being auto-negotiated (via ethtool_cmd.autoneg ++ * being true) the user may set 'autoneg' here non-zero to have the ++ * pause parameters be auto-negotiated too. In such a case, the ++ * {rx,tx}_pause values below determine what capabilities are ++ * advertised. ++ * ++ * If 'autoneg' is zero or the link is not being auto-negotiated, ++ * then {rx,tx}_pause force the driver to use/not-use pause ++ * flow control. ++ */ ++ u32 autoneg; ++ u32 rx_pause; ++ u32 tx_pause; ++}; ++#endif /* ETHTOOL_GPAUSEPARAM */ ++ ++#ifndef ETHTOOL_SPAUSEPARAM ++#define ETHTOOL_SPAUSEPARAM 0x00000013 /* Set pause parameters. */ ++#endif ++#ifndef ETHTOOL_GRXCSUM ++#define ETHTOOL_GRXCSUM 0x00000014 /* Get RX hw csum enable (ethtool_value) */ ++#endif ++#ifndef ETHTOOL_SRXCSUM ++#define ETHTOOL_SRXCSUM 0x00000015 /* Set RX hw csum enable (ethtool_value) */ ++#endif ++#ifndef ETHTOOL_GTXCSUM ++#define ETHTOOL_GTXCSUM 0x00000016 /* Get TX hw csum enable (ethtool_value) */ ++#endif ++#ifndef ETHTOOL_STXCSUM ++#define ETHTOOL_STXCSUM 0x00000017 /* Set TX hw csum enable (ethtool_value) */ ++#endif ++#ifndef ETHTOOL_GSG ++#define ETHTOOL_GSG 0x00000018 /* Get scatter-gather enable ++ * (ethtool_value) */ ++#endif ++#ifndef ETHTOOL_SSG ++#define ETHTOOL_SSG 0x00000019 /* Set scatter-gather enable ++ * (ethtool_value). */ ++#endif ++#ifndef ETHTOOL_TEST ++#define ETHTOOL_TEST 0x0000001a /* execute NIC self-test, priv. */ ++#endif ++#ifndef ETHTOOL_GSTRINGS ++#define ETHTOOL_GSTRINGS 0x0000001b /* get specified string set */ ++#endif ++#ifndef ETHTOOL_PHYS_ID ++#define ETHTOOL_PHYS_ID 0x0000001c /* identify the NIC */ ++#endif ++#ifndef ETHTOOL_GSTATS ++#define ETHTOOL_GSTATS 0x0000001d /* get NIC-specific statistics */ ++#endif ++#ifndef ETHTOOL_GTSO ++#define ETHTOOL_GTSO 0x0000001e /* Get TSO enable (ethtool_value) */ ++#endif ++#ifndef ETHTOOL_STSO ++#define ETHTOOL_STSO 0x0000001f /* Set TSO enable (ethtool_value) */ ++#endif ++ ++#ifndef ETHTOOL_BUSINFO_LEN ++#define ETHTOOL_BUSINFO_LEN 32 ++#endif ++ ++#ifndef HAVE_PCI_SET_MWI ++#define pci_set_mwi(X) pci_write_config_word(X, \ ++ PCI_COMMAND, adapter->hw.bus.pci_cmd_word | \ ++ PCI_COMMAND_INVALIDATE); ++#define pci_clear_mwi(X) pci_write_config_word(X, \ ++ PCI_COMMAND, adapter->hw.bus.pci_cmd_word & \ ++ ~PCI_COMMAND_INVALIDATE); ++#endif ++ ++ ++#undef HAVE_PCI_ERS ++ ++#endif /* _KCOMPAT_H_ */ +--- linux/drivers/xenomai/net/drivers/experimental/e1000/e1000_82571.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/experimental/e1000/e1000_82571.c 2021-04-07 16:01:27.629633592 +0800 +@@ -0,0 +1,1430 @@ ++/******************************************************************************* ++ ++ Intel PRO/1000 Linux driver ++ Copyright(c) 1999 - 2008 Intel Corporation. ++ ++ This program is free software; you can redistribute it and/or modify it ++ under the terms and conditions of the GNU General Public License, ++ version 2, as published by the Free Software Foundation. ++ ++ This program is distributed in the hope it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ You should have received a copy of the GNU General Public License along with ++ this program; if not, write to the Free Software Foundation, Inc., ++ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. ++ ++ The full GNU General Public License is included in this distribution in ++ the file called "COPYING". ++ ++ Contact Information: ++ Linux NICS ++ e1000-devel Mailing List ++ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 ++ ++*******************************************************************************/ ++ ++/* e1000_82571 ++ * e1000_82572 ++ * e1000_82573 ++ * e1000_82574 ++ */ ++ ++#include "e1000_api.h" ++#include "e1000_82571.h" ++ ++static s32 e1000_init_phy_params_82571(struct e1000_hw *hw); ++static s32 e1000_init_nvm_params_82571(struct e1000_hw *hw); ++static s32 e1000_init_mac_params_82571(struct e1000_hw *hw); ++static s32 e1000_acquire_nvm_82571(struct e1000_hw *hw); ++static void e1000_release_nvm_82571(struct e1000_hw *hw); ++static s32 e1000_write_nvm_82571(struct e1000_hw *hw, u16 offset, ++ u16 words, u16 *data); ++static s32 e1000_update_nvm_checksum_82571(struct e1000_hw *hw); ++static s32 e1000_validate_nvm_checksum_82571(struct e1000_hw *hw); ++static s32 e1000_get_cfg_done_82571(struct e1000_hw *hw); ++static s32 e1000_set_d0_lplu_state_82571(struct e1000_hw *hw, ++ bool active); ++static s32 e1000_reset_hw_82571(struct e1000_hw *hw); ++static s32 e1000_init_hw_82571(struct e1000_hw *hw); ++static void e1000_clear_vfta_82571(struct e1000_hw *hw); ++static void e1000_update_mc_addr_list_82571(struct e1000_hw *hw, ++ u8 *mc_addr_list, u32 mc_addr_count, ++ u32 rar_used_count, u32 rar_count); ++static s32 e1000_setup_link_82571(struct e1000_hw *hw); ++static s32 e1000_setup_copper_link_82571(struct e1000_hw *hw); ++static s32 e1000_setup_fiber_serdes_link_82571(struct e1000_hw *hw); ++static s32 e1000_valid_led_default_82571(struct e1000_hw *hw, u16 *data); ++static void e1000_clear_hw_cntrs_82571(struct e1000_hw *hw); ++static s32 e1000_get_hw_semaphore_82571(struct e1000_hw *hw); ++static s32 e1000_fix_nvm_checksum_82571(struct e1000_hw *hw); ++static s32 e1000_get_phy_id_82571(struct e1000_hw *hw); ++static void e1000_put_hw_semaphore_82571(struct e1000_hw *hw); ++static void e1000_initialize_hw_bits_82571(struct e1000_hw *hw); ++static s32 e1000_write_nvm_eewr_82571(struct e1000_hw *hw, u16 offset, ++ u16 words, u16 *data); ++static s32 e1000_read_mac_addr_82571(struct e1000_hw *hw); ++static void e1000_power_down_phy_copper_82571(struct e1000_hw *hw); ++ ++struct e1000_dev_spec_82571 { ++ bool laa_is_present; ++}; ++ ++/** ++ * e1000_init_phy_params_82571 - Init PHY func ptrs. ++ * @hw: pointer to the HW structure ++ * ++ * This is a function pointer entry point called by the api module. ++ **/ ++static s32 e1000_init_phy_params_82571(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ struct e1000_functions *func = &hw->func; ++ s32 ret_val = E1000_SUCCESS; ++ ++ DEBUGFUNC("e1000_init_phy_params_82571"); ++ ++ if (hw->phy.media_type != e1000_media_type_copper) { ++ phy->type = e1000_phy_none; ++ goto out; ++ } ++ ++ phy->addr = 1; ++ phy->autoneg_mask = AUTONEG_ADVERTISE_SPEED_DEFAULT; ++ phy->reset_delay_us = 100; ++ ++ func->acquire_phy = e1000_get_hw_semaphore_82571; ++ func->check_polarity = e1000_check_polarity_igp; ++ func->check_reset_block = e1000_check_reset_block_generic; ++ func->release_phy = e1000_put_hw_semaphore_82571; ++ func->reset_phy = e1000_phy_hw_reset_generic; ++ func->set_d0_lplu_state = e1000_set_d0_lplu_state_82571; ++ func->set_d3_lplu_state = e1000_set_d3_lplu_state_generic; ++ func->power_up_phy = e1000_power_up_phy_copper; ++ func->power_down_phy = e1000_power_down_phy_copper_82571; ++ ++ switch (hw->mac.type) { ++ case e1000_82571: ++ case e1000_82572: ++ phy->type = e1000_phy_igp_2; ++ func->get_cfg_done = e1000_get_cfg_done_82571; ++ func->get_phy_info = e1000_get_phy_info_igp; ++ func->force_speed_duplex = e1000_phy_force_speed_duplex_igp; ++ func->get_cable_length = e1000_get_cable_length_igp_2; ++ func->read_phy_reg = e1000_read_phy_reg_igp; ++ func->write_phy_reg = e1000_write_phy_reg_igp; ++ ++ /* This uses above function pointers */ ++ ret_val = e1000_get_phy_id_82571(hw); ++ ++ /* Verify PHY ID */ ++ if (phy->id != IGP01E1000_I_PHY_ID) { ++ ret_val = -E1000_ERR_PHY; ++ goto out; ++ } ++ break; ++ case e1000_82573: ++ phy->type = e1000_phy_m88; ++ func->get_cfg_done = e1000_get_cfg_done_generic; ++ func->get_phy_info = e1000_get_phy_info_m88; ++ func->commit_phy = e1000_phy_sw_reset_generic; ++ func->force_speed_duplex = e1000_phy_force_speed_duplex_m88; ++ func->get_cable_length = e1000_get_cable_length_m88; ++ func->read_phy_reg = e1000_read_phy_reg_m88; ++ func->write_phy_reg = e1000_write_phy_reg_m88; ++ ++ /* This uses above function pointers */ ++ ret_val = e1000_get_phy_id_82571(hw); ++ ++ /* Verify PHY ID */ ++ if (phy->id != M88E1111_I_PHY_ID) { ++ ret_val = -E1000_ERR_PHY; ++ DEBUGOUT1("PHY ID unknown: type = 0x%08x\n", phy->id); ++ goto out; ++ } ++ break; ++ default: ++ ret_val = -E1000_ERR_PHY; ++ goto out; ++ break; ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_init_nvm_params_82571 - Init NVM func ptrs. ++ * @hw: pointer to the HW structure ++ * ++ * This is a function pointer entry point called by the api module. ++ **/ ++static s32 e1000_init_nvm_params_82571(struct e1000_hw *hw) ++{ ++ struct e1000_nvm_info *nvm = &hw->nvm; ++ struct e1000_functions *func = &hw->func; ++ u32 eecd = E1000_READ_REG(hw, E1000_EECD); ++ u16 size; ++ ++ DEBUGFUNC("e1000_init_nvm_params_82571"); ++ ++ nvm->opcode_bits = 8; ++ nvm->delay_usec = 1; ++ switch (nvm->override) { ++ case e1000_nvm_override_spi_large: ++ nvm->page_size = 32; ++ nvm->address_bits = 16; ++ break; ++ case e1000_nvm_override_spi_small: ++ nvm->page_size = 8; ++ nvm->address_bits = 8; ++ break; ++ default: ++ nvm->page_size = eecd & E1000_EECD_ADDR_BITS ? 32 : 8; ++ nvm->address_bits = eecd & E1000_EECD_ADDR_BITS ? 16 : 8; ++ break; ++ } ++ ++ switch (hw->mac.type) { ++ case e1000_82573: ++ if (((eecd >> 15) & 0x3) == 0x3) { ++ nvm->type = e1000_nvm_flash_hw; ++ nvm->word_size = 2048; ++ /* ++ * Autonomous Flash update bit must be cleared due ++ * to Flash update issue. ++ */ ++ eecd &= ~E1000_EECD_AUPDEN; ++ E1000_WRITE_REG(hw, E1000_EECD, eecd); ++ break; ++ } ++ /* Fall Through */ ++ default: ++ nvm->type = e1000_nvm_eeprom_spi; ++ size = (u16)((eecd & E1000_EECD_SIZE_EX_MASK) >> ++ E1000_EECD_SIZE_EX_SHIFT); ++ /* ++ * Added to a constant, "size" becomes the left-shift value ++ * for setting word_size. ++ */ ++ size += NVM_WORD_SIZE_BASE_SHIFT; ++ ++ /* EEPROM access above 16k is unsupported */ ++ if (size > 14) ++ size = 14; ++ nvm->word_size = 1 << size; ++ break; ++ } ++ ++ /* Function Pointers */ ++ func->acquire_nvm = e1000_acquire_nvm_82571; ++ func->read_nvm = (hw->mac.type == e1000_82573) ++ ? e1000_read_nvm_eerd ++ : e1000_read_nvm_spi; ++ func->release_nvm = e1000_release_nvm_82571; ++ func->update_nvm = e1000_update_nvm_checksum_82571; ++ func->validate_nvm = e1000_validate_nvm_checksum_82571; ++ func->valid_led_default = e1000_valid_led_default_82571; ++ func->write_nvm = e1000_write_nvm_82571; ++ ++ return E1000_SUCCESS; ++} ++ ++/** ++ * e1000_init_mac_params_82571 - Init MAC func ptrs. ++ * @hw: pointer to the HW structure ++ * ++ * This is a function pointer entry point called by the api module. ++ **/ ++static s32 e1000_init_mac_params_82571(struct e1000_hw *hw) ++{ ++ struct e1000_mac_info *mac = &hw->mac; ++ struct e1000_functions *func = &hw->func; ++ s32 ret_val = E1000_SUCCESS; ++ ++ DEBUGFUNC("e1000_init_mac_params_82571"); ++ ++ /* Set media type */ ++ switch (hw->device_id) { ++ case E1000_DEV_ID_82571EB_FIBER: ++ case E1000_DEV_ID_82572EI_FIBER: ++ case E1000_DEV_ID_82571EB_QUAD_FIBER: ++ hw->phy.media_type = e1000_media_type_fiber; ++ break; ++ case E1000_DEV_ID_82571EB_SERDES: ++ case E1000_DEV_ID_82571EB_SERDES_DUAL: ++ case E1000_DEV_ID_82571EB_SERDES_QUAD: ++ case E1000_DEV_ID_82572EI_SERDES: ++ hw->phy.media_type = e1000_media_type_internal_serdes; ++ break; ++ default: ++ hw->phy.media_type = e1000_media_type_copper; ++ break; ++ } ++ ++ /* Set mta register count */ ++ mac->mta_reg_count = 128; ++ /* Set rar entry count */ ++ mac->rar_entry_count = E1000_RAR_ENTRIES; ++ /* Set if part includes ASF firmware */ ++ mac->asf_firmware_present = TRUE; ++ /* Set if manageability features are enabled. */ ++ mac->arc_subsystem_valid = ++ (E1000_READ_REG(hw, E1000_FWSM) & E1000_FWSM_MODE_MASK) ++ ? TRUE : FALSE; ++ ++ /* Function pointers */ ++ ++ /* bus type/speed/width */ ++ func->get_bus_info = e1000_get_bus_info_pcie_generic; ++ /* reset */ ++ func->reset_hw = e1000_reset_hw_82571; ++ /* hw initialization */ ++ func->init_hw = e1000_init_hw_82571; ++ /* link setup */ ++ func->setup_link = e1000_setup_link_82571; ++ /* physical interface link setup */ ++ func->setup_physical_interface = ++ (hw->phy.media_type == e1000_media_type_copper) ++ ? e1000_setup_copper_link_82571 ++ : e1000_setup_fiber_serdes_link_82571; ++ /* check for link */ ++ switch (hw->phy.media_type) { ++ case e1000_media_type_copper: ++ func->check_for_link = e1000_check_for_copper_link_generic; ++ break; ++ case e1000_media_type_fiber: ++ func->check_for_link = e1000_check_for_fiber_link_generic; ++ break; ++ case e1000_media_type_internal_serdes: ++ func->check_for_link = e1000_check_for_serdes_link_generic; ++ break; ++ default: ++ ret_val = -E1000_ERR_CONFIG; ++ goto out; ++ break; ++ } ++ /* check management mode */ ++ func->check_mng_mode = e1000_check_mng_mode_generic; ++ /* multicast address update */ ++ func->update_mc_addr_list = e1000_update_mc_addr_list_82571; ++ /* writing VFTA */ ++ func->write_vfta = e1000_write_vfta_generic; ++ /* clearing VFTA */ ++ func->clear_vfta = e1000_clear_vfta_82571; ++ /* setting MTA */ ++ func->mta_set = e1000_mta_set_generic; ++ /* read mac address */ ++ func->read_mac_addr = e1000_read_mac_addr_82571; ++ /* blink LED */ ++ func->blink_led = e1000_blink_led_generic; ++ /* setup LED */ ++ func->setup_led = e1000_setup_led_generic; ++ /* cleanup LED */ ++ func->cleanup_led = e1000_cleanup_led_generic; ++ /* turn on/off LED */ ++ func->led_on = e1000_led_on_generic; ++ func->led_off = e1000_led_off_generic; ++ /* remove device */ ++ func->remove_device = e1000_remove_device_generic; ++ /* clear hardware counters */ ++ func->clear_hw_cntrs = e1000_clear_hw_cntrs_82571; ++ /* link info */ ++ func->get_link_up_info = ++ (hw->phy.media_type == e1000_media_type_copper) ++ ? e1000_get_speed_and_duplex_copper_generic ++ : e1000_get_speed_and_duplex_fiber_serdes_generic; ++ ++ hw->dev_spec_size = sizeof(struct e1000_dev_spec_82571); ++ ++ /* Device-specific structure allocation */ ++ ret_val = e1000_alloc_zeroed_dev_spec_struct(hw, hw->dev_spec_size); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_init_function_pointers_82571 - Init func ptrs. ++ * @hw: pointer to the HW structure ++ * ++ * The only function explicitly called by the api module to initialize ++ * all function pointers and parameters. ++ **/ ++void e1000_init_function_pointers_82571(struct e1000_hw *hw) ++{ ++ DEBUGFUNC("e1000_init_function_pointers_82571"); ++ ++ hw->func.init_mac_params = e1000_init_mac_params_82571; ++ hw->func.init_nvm_params = e1000_init_nvm_params_82571; ++ hw->func.init_phy_params = e1000_init_phy_params_82571; ++} ++ ++/** ++ * e1000_get_phy_id_82571 - Retrieve the PHY ID and revision ++ * @hw: pointer to the HW structure ++ * ++ * Reads the PHY registers and stores the PHY ID and possibly the PHY ++ * revision in the hardware structure. ++ **/ ++static s32 e1000_get_phy_id_82571(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val = E1000_SUCCESS; ++ ++ DEBUGFUNC("e1000_get_phy_id_82571"); ++ ++ switch (hw->mac.type) { ++ case e1000_82571: ++ case e1000_82572: ++ /* ++ * The 82571 firmware may still be configuring the PHY. ++ * In this case, we cannot access the PHY until the ++ * configuration is done. So we explicitly set the ++ * PHY ID. ++ */ ++ phy->id = IGP01E1000_I_PHY_ID; ++ break; ++ case e1000_82573: ++ ret_val = e1000_get_phy_id(hw); ++ break; ++ default: ++ ret_val = -E1000_ERR_PHY; ++ break; ++ } ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_get_hw_semaphore_82571 - Acquire hardware semaphore ++ * @hw: pointer to the HW structure ++ * ++ * Acquire the HW semaphore to access the PHY or NVM ++ **/ ++static s32 e1000_get_hw_semaphore_82571(struct e1000_hw *hw) ++{ ++ u32 swsm; ++ s32 ret_val = E1000_SUCCESS; ++ s32 timeout = hw->nvm.word_size + 1; ++ s32 i = 0; ++ ++ DEBUGFUNC("e1000_get_hw_semaphore_82571"); ++ ++ /* Get the FW semaphore. */ ++ for (i = 0; i < timeout; i++) { ++ swsm = E1000_READ_REG(hw, E1000_SWSM); ++ E1000_WRITE_REG(hw, E1000_SWSM, swsm | E1000_SWSM_SWESMBI); ++ ++ /* Semaphore acquired if bit latched */ ++ if (E1000_READ_REG(hw, E1000_SWSM) & E1000_SWSM_SWESMBI) ++ break; ++ ++ usec_delay(50); ++ } ++ ++ if (i == timeout) { ++ /* Release semaphores */ ++ e1000_put_hw_semaphore_generic(hw); ++ DEBUGOUT("Driver can't access the NVM\n"); ++ ret_val = -E1000_ERR_NVM; ++ goto out; ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_put_hw_semaphore_82571 - Release hardware semaphore ++ * @hw: pointer to the HW structure ++ * ++ * Release hardware semaphore used to access the PHY or NVM ++ **/ ++static void e1000_put_hw_semaphore_82571(struct e1000_hw *hw) ++{ ++ u32 swsm; ++ ++ DEBUGFUNC("e1000_put_hw_semaphore_82571"); ++ ++ swsm = E1000_READ_REG(hw, E1000_SWSM); ++ ++ swsm &= ~E1000_SWSM_SWESMBI; ++ ++ E1000_WRITE_REG(hw, E1000_SWSM, swsm); ++} ++ ++/** ++ * e1000_acquire_nvm_82571 - Request for access to the EEPROM ++ * @hw: pointer to the HW structure ++ * ++ * To gain access to the EEPROM, first we must obtain a hardware semaphore. ++ * Then for non-82573 hardware, set the EEPROM access request bit and wait ++ * for EEPROM access grant bit. If the access grant bit is not set, release ++ * hardware semaphore. ++ **/ ++static s32 e1000_acquire_nvm_82571(struct e1000_hw *hw) ++{ ++ s32 ret_val; ++ ++ DEBUGFUNC("e1000_acquire_nvm_82571"); ++ ++ ret_val = e1000_get_hw_semaphore_82571(hw); ++ if (ret_val) ++ goto out; ++ ++ if (hw->mac.type != e1000_82573) ++ ret_val = e1000_acquire_nvm_generic(hw); ++ ++ if (ret_val) ++ e1000_put_hw_semaphore_82571(hw); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_release_nvm_82571 - Release exclusive access to EEPROM ++ * @hw: pointer to the HW structure ++ * ++ * Stop any current commands to the EEPROM and clear the EEPROM request bit. ++ **/ ++static void e1000_release_nvm_82571(struct e1000_hw *hw) ++{ ++ DEBUGFUNC("e1000_release_nvm_82571"); ++ ++ e1000_release_nvm_generic(hw); ++ e1000_put_hw_semaphore_82571(hw); ++} ++ ++/** ++ * e1000_write_nvm_82571 - Write to EEPROM using appropriate interface ++ * @hw: pointer to the HW structure ++ * @offset: offset within the EEPROM to be written to ++ * @words: number of words to write ++ * @data: 16 bit word(s) to be written to the EEPROM ++ * ++ * For non-82573 silicon, write data to EEPROM at offset using SPI interface. ++ * ++ * If e1000_update_nvm_checksum is not called after this function, the ++ * EEPROM will most likley contain an invalid checksum. ++ **/ ++static s32 e1000_write_nvm_82571(struct e1000_hw *hw, u16 offset, u16 words, ++ u16 *data) ++{ ++ s32 ret_val = E1000_SUCCESS; ++ ++ DEBUGFUNC("e1000_write_nvm_82571"); ++ ++ switch (hw->mac.type) { ++ case e1000_82573: ++ ret_val = e1000_write_nvm_eewr_82571(hw, offset, words, data); ++ break; ++ case e1000_82571: ++ case e1000_82572: ++ ret_val = e1000_write_nvm_spi(hw, offset, words, data); ++ break; ++ default: ++ ret_val = -E1000_ERR_NVM; ++ break; ++ } ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_update_nvm_checksum_82571 - Update EEPROM checksum ++ * @hw: pointer to the HW structure ++ * ++ * Updates the EEPROM checksum by reading/adding each word of the EEPROM ++ * up to the checksum. Then calculates the EEPROM checksum and writes the ++ * value to the EEPROM. ++ **/ ++static s32 e1000_update_nvm_checksum_82571(struct e1000_hw *hw) ++{ ++ u32 eecd; ++ s32 ret_val; ++ u16 i; ++ ++ DEBUGFUNC("e1000_update_nvm_checksum_82571"); ++ ++ ret_val = e1000_update_nvm_checksum_generic(hw); ++ if (ret_val) ++ goto out; ++ ++ /* ++ * If our nvm is an EEPROM, then we're done ++ * otherwise, commit the checksum to the flash NVM. ++ */ ++ if (hw->nvm.type != e1000_nvm_flash_hw) ++ goto out; ++ ++ /* Check for pending operations. */ ++ for (i = 0; i < E1000_FLASH_UPDATES; i++) { ++ msec_delay(1); ++ if ((E1000_READ_REG(hw, E1000_EECD) & E1000_EECD_FLUPD) == 0) ++ break; ++ } ++ ++ if (i == E1000_FLASH_UPDATES) { ++ ret_val = -E1000_ERR_NVM; ++ goto out; ++ } ++ ++ /* Reset the firmware if using STM opcode. */ ++ if ((E1000_READ_REG(hw, E1000_FLOP) & 0xFF00) == E1000_STM_OPCODE) { ++ /* ++ * The enabling of and the actual reset must be done ++ * in two write cycles. ++ */ ++ E1000_WRITE_REG(hw, E1000_HICR, E1000_HICR_FW_RESET_ENABLE); ++ E1000_WRITE_FLUSH(hw); ++ E1000_WRITE_REG(hw, E1000_HICR, E1000_HICR_FW_RESET); ++ } ++ ++ /* Commit the write to flash */ ++ eecd = E1000_READ_REG(hw, E1000_EECD) | E1000_EECD_FLUPD; ++ E1000_WRITE_REG(hw, E1000_EECD, eecd); ++ ++ for (i = 0; i < E1000_FLASH_UPDATES; i++) { ++ msec_delay(1); ++ if ((E1000_READ_REG(hw, E1000_EECD) & E1000_EECD_FLUPD) == 0) ++ break; ++ } ++ ++ if (i == E1000_FLASH_UPDATES) { ++ ret_val = -E1000_ERR_NVM; ++ goto out; ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_validate_nvm_checksum_82571 - Validate EEPROM checksum ++ * @hw: pointer to the HW structure ++ * ++ * Calculates the EEPROM checksum by reading/adding each word of the EEPROM ++ * and then verifies that the sum of the EEPROM is equal to 0xBABA. ++ **/ ++static s32 e1000_validate_nvm_checksum_82571(struct e1000_hw *hw) ++{ ++ DEBUGFUNC("e1000_validate_nvm_checksum_82571"); ++ ++ if (hw->nvm.type == e1000_nvm_flash_hw) ++ e1000_fix_nvm_checksum_82571(hw); ++ ++ return e1000_validate_nvm_checksum_generic(hw); ++} ++ ++/** ++ * e1000_write_nvm_eewr_82571 - Write to EEPROM for 82573 silicon ++ * @hw: pointer to the HW structure ++ * @offset: offset within the EEPROM to be written to ++ * @words: number of words to write ++ * @data: 16 bit word(s) to be written to the EEPROM ++ * ++ * After checking for invalid values, poll the EEPROM to ensure the previous ++ * command has completed before trying to write the next word. After write ++ * poll for completion. ++ * ++ * If e1000_update_nvm_checksum is not called after this function, the ++ * EEPROM will most likley contain an invalid checksum. ++ **/ ++static s32 e1000_write_nvm_eewr_82571(struct e1000_hw *hw, u16 offset, ++ u16 words, u16 *data) ++{ ++ struct e1000_nvm_info *nvm = &hw->nvm; ++ u32 i, eewr = 0; ++ s32 ret_val = 0; ++ ++ DEBUGFUNC("e1000_write_nvm_eewr_82571"); ++ ++ /* ++ * A check for invalid values: offset too large, too many words, ++ * and not enough words. ++ */ ++ if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) || ++ (words == 0)) { ++ DEBUGOUT("nvm parameter(s) out of bounds\n"); ++ ret_val = -E1000_ERR_NVM; ++ goto out; ++ } ++ ++ for (i = 0; i < words; i++) { ++ eewr = (data[i] << E1000_NVM_RW_REG_DATA) | ++ ((offset+i) << E1000_NVM_RW_ADDR_SHIFT) | ++ E1000_NVM_RW_REG_START; ++ ++ ret_val = e1000_poll_eerd_eewr_done(hw, E1000_NVM_POLL_WRITE); ++ if (ret_val) ++ break; ++ ++ E1000_WRITE_REG(hw, E1000_EEWR, eewr); ++ ++ ret_val = e1000_poll_eerd_eewr_done(hw, E1000_NVM_POLL_WRITE); ++ if (ret_val) ++ break; ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_get_cfg_done_82571 - Poll for configuration done ++ * @hw: pointer to the HW structure ++ * ++ * Reads the management control register for the config done bit to be set. ++ **/ ++static s32 e1000_get_cfg_done_82571(struct e1000_hw *hw) ++{ ++ s32 timeout = PHY_CFG_TIMEOUT; ++ s32 ret_val = E1000_SUCCESS; ++ ++ DEBUGFUNC("e1000_get_cfg_done_82571"); ++ ++ while (timeout) { ++ if (E1000_READ_REG(hw, E1000_EEMNGCTL) & E1000_NVM_CFG_DONE_PORT_0) ++ break; ++ msec_delay(1); ++ timeout--; ++ } ++ if (!timeout) { ++ DEBUGOUT("MNG configuration cycle has not completed.\n"); ++ ret_val = -E1000_ERR_RESET; ++ goto out; ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_set_d0_lplu_state_82571 - Set Low Power Linkup D0 state ++ * @hw: pointer to the HW structure ++ * @active: TRUE to enable LPLU, FALSE to disable ++ * ++ * Sets the LPLU D0 state according to the active flag. When activating LPLU ++ * this function also disables smart speed and vice versa. LPLU will not be ++ * activated unless the device autonegotiation advertisement meets standards ++ * of either 10 or 10/100 or 10/100/1000 at all duplexes. This is a function ++ * pointer entry point only called by PHY setup routines. ++ **/ ++static s32 e1000_set_d0_lplu_state_82571(struct e1000_hw *hw, bool active) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u16 data; ++ ++ DEBUGFUNC("e1000_set_d0_lplu_state_82571"); ++ ++ ret_val = e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &data); ++ if (ret_val) ++ goto out; ++ ++ if (active) { ++ data |= IGP02E1000_PM_D0_LPLU; ++ ret_val = e1000_write_phy_reg(hw, ++ IGP02E1000_PHY_POWER_MGMT, ++ data); ++ if (ret_val) ++ goto out; ++ ++ /* When LPLU is enabled, we should disable SmartSpeed */ ++ ret_val = e1000_read_phy_reg(hw, ++ IGP01E1000_PHY_PORT_CONFIG, ++ &data); ++ data &= ~IGP01E1000_PSCFR_SMART_SPEED; ++ ret_val = e1000_write_phy_reg(hw, ++ IGP01E1000_PHY_PORT_CONFIG, ++ data); ++ if (ret_val) ++ goto out; ++ } else { ++ data &= ~IGP02E1000_PM_D0_LPLU; ++ ret_val = e1000_write_phy_reg(hw, ++ IGP02E1000_PHY_POWER_MGMT, ++ data); ++ /* ++ * LPLU and SmartSpeed are mutually exclusive. LPLU is used ++ * during Dx states where the power conservation is most ++ * important. During driver activity we should enable ++ * SmartSpeed, so performance is maintained. ++ */ ++ if (phy->smart_speed == e1000_smart_speed_on) { ++ ret_val = e1000_read_phy_reg(hw, ++ IGP01E1000_PHY_PORT_CONFIG, ++ &data); ++ if (ret_val) ++ goto out; ++ ++ data |= IGP01E1000_PSCFR_SMART_SPEED; ++ ret_val = e1000_write_phy_reg(hw, ++ IGP01E1000_PHY_PORT_CONFIG, ++ data); ++ if (ret_val) ++ goto out; ++ } else if (phy->smart_speed == e1000_smart_speed_off) { ++ ret_val = e1000_read_phy_reg(hw, ++ IGP01E1000_PHY_PORT_CONFIG, ++ &data); ++ if (ret_val) ++ goto out; ++ ++ data &= ~IGP01E1000_PSCFR_SMART_SPEED; ++ ret_val = e1000_write_phy_reg(hw, ++ IGP01E1000_PHY_PORT_CONFIG, ++ data); ++ if (ret_val) ++ goto out; ++ } ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_reset_hw_82571 - Reset hardware ++ * @hw: pointer to the HW structure ++ * ++ * This resets the hardware into a known state. This is a ++ * function pointer entry point called by the api module. ++ **/ ++static s32 e1000_reset_hw_82571(struct e1000_hw *hw) ++{ ++ u32 ctrl, extcnf_ctrl, ctrl_ext, icr; ++ s32 ret_val; ++ u16 i = 0; ++ ++ DEBUGFUNC("e1000_reset_hw_82571"); ++ ++ /* ++ * Prevent the PCI-E bus from sticking if there is no TLP connection ++ * on the last TLP read/write transaction when MAC is reset. ++ */ ++ ret_val = e1000_disable_pcie_master_generic(hw); ++ if (ret_val) { ++ DEBUGOUT("PCI-E Master disable polling has failed.\n"); ++ } ++ ++ DEBUGOUT("Masking off all interrupts\n"); ++ E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff); ++ ++ E1000_WRITE_REG(hw, E1000_RCTL, 0); ++ E1000_WRITE_REG(hw, E1000_TCTL, E1000_TCTL_PSP); ++ E1000_WRITE_FLUSH(hw); ++ ++ msec_delay(10); ++ ++ /* ++ * Must acquire the MDIO ownership before MAC reset. ++ * Ownership defaults to firmware after a reset. ++ */ ++ if (hw->mac.type == e1000_82573) { ++ extcnf_ctrl = E1000_READ_REG(hw, E1000_EXTCNF_CTRL); ++ extcnf_ctrl |= E1000_EXTCNF_CTRL_MDIO_SW_OWNERSHIP; ++ ++ do { ++ E1000_WRITE_REG(hw, E1000_EXTCNF_CTRL, extcnf_ctrl); ++ extcnf_ctrl = E1000_READ_REG(hw, E1000_EXTCNF_CTRL); ++ ++ if (extcnf_ctrl & E1000_EXTCNF_CTRL_MDIO_SW_OWNERSHIP) ++ break; ++ ++ extcnf_ctrl |= E1000_EXTCNF_CTRL_MDIO_SW_OWNERSHIP; ++ ++ msec_delay(2); ++ i++; ++ } while (i < MDIO_OWNERSHIP_TIMEOUT); ++ } ++ ++ ctrl = E1000_READ_REG(hw, E1000_CTRL); ++ ++ DEBUGOUT("Issuing a global reset to MAC\n"); ++ E1000_WRITE_REG(hw, E1000_CTRL, ctrl | E1000_CTRL_RST); ++ ++ if (hw->nvm.type == e1000_nvm_flash_hw) { ++ usec_delay(10); ++ ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT); ++ ctrl_ext |= E1000_CTRL_EXT_EE_RST; ++ E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext); ++ E1000_WRITE_FLUSH(hw); ++ } ++ ++ ret_val = e1000_get_auto_rd_done_generic(hw); ++ if (ret_val) ++ /* We don't want to continue accessing MAC registers. */ ++ goto out; ++ ++ /* ++ * Phy configuration from NVM just starts after EECD_AUTO_RD is set. ++ * Need to wait for Phy configuration completion before accessing ++ * NVM and Phy. ++ */ ++ if (hw->mac.type == e1000_82573) ++ msec_delay(25); ++ ++ /* Clear any pending interrupt events. */ ++ E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff); ++ icr = E1000_READ_REG(hw, E1000_ICR); ++ ++ if (!(e1000_check_alt_mac_addr_generic(hw))) ++ e1000_set_laa_state_82571(hw, TRUE); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_init_hw_82571 - Initialize hardware ++ * @hw: pointer to the HW structure ++ * ++ * This inits the hardware readying it for operation. ++ **/ ++static s32 e1000_init_hw_82571(struct e1000_hw *hw) ++{ ++ struct e1000_mac_info *mac = &hw->mac; ++ u32 reg_data; ++ s32 ret_val; ++ u16 i, rar_count = mac->rar_entry_count; ++ ++ DEBUGFUNC("e1000_init_hw_82571"); ++ ++ e1000_initialize_hw_bits_82571(hw); ++ ++ /* Initialize identification LED */ ++ ret_val = e1000_id_led_init_generic(hw); ++ if (ret_val) { ++ DEBUGOUT("Error initializing identification LED\n"); ++ /* This is not fatal and we should not stop init due to this */ ++ } ++ ++ /* Disabling VLAN filtering */ ++ DEBUGOUT("Initializing the IEEE VLAN\n"); ++ e1000_clear_vfta(hw); ++ ++ /* Setup the receive address. */ ++ /* ++ * If, however, a locally administered address was assigned to the ++ * 82571, we must reserve a RAR for it to work around an issue where ++ * resetting one port will reload the MAC on the other port. ++ */ ++ if (e1000_get_laa_state_82571(hw)) ++ rar_count--; ++ e1000_init_rx_addrs_generic(hw, rar_count); ++ ++ /* Zero out the Multicast HASH table */ ++ DEBUGOUT("Zeroing the MTA\n"); ++ for (i = 0; i < mac->mta_reg_count; i++) ++ E1000_WRITE_REG_ARRAY(hw, E1000_MTA, i, 0); ++ ++ /* Setup link and flow control */ ++ ret_val = e1000_setup_link(hw); ++ ++ /* Set the transmit descriptor write-back policy */ ++ reg_data = E1000_READ_REG(hw, E1000_TXDCTL(0)); ++ reg_data = (reg_data & ~E1000_TXDCTL_WTHRESH) | ++ E1000_TXDCTL_FULL_TX_DESC_WB | ++ E1000_TXDCTL_COUNT_DESC; ++ E1000_WRITE_REG(hw, E1000_TXDCTL(0), reg_data); ++ ++ /* ...for both queues. */ ++ if (mac->type != e1000_82573) { ++ reg_data = E1000_READ_REG(hw, E1000_TXDCTL(1)); ++ reg_data = (reg_data & ~E1000_TXDCTL_WTHRESH) | ++ E1000_TXDCTL_FULL_TX_DESC_WB | ++ E1000_TXDCTL_COUNT_DESC; ++ E1000_WRITE_REG(hw, E1000_TXDCTL(1), reg_data); ++ } else { ++ e1000_enable_tx_pkt_filtering(hw); ++ reg_data = E1000_READ_REG(hw, E1000_GCR); ++ reg_data |= E1000_GCR_L1_ACT_WITHOUT_L0S_RX; ++ E1000_WRITE_REG(hw, E1000_GCR, reg_data); ++ } ++ ++ /* ++ * Clear all of the statistics registers (clear on read). It is ++ * important that we do this after we have tried to establish link ++ * because the symbol error count will increment wildly if there ++ * is no link. ++ */ ++ e1000_clear_hw_cntrs_82571(hw); ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_initialize_hw_bits_82571 - Initialize hardware-dependent bits ++ * @hw: pointer to the HW structure ++ * ++ * Initializes required hardware-dependent bits needed for normal operation. ++ **/ ++static void e1000_initialize_hw_bits_82571(struct e1000_hw *hw) ++{ ++ u32 reg; ++ ++ DEBUGFUNC("e1000_initialize_hw_bits_82571"); ++ ++ if (hw->mac.disable_hw_init_bits) ++ goto out; ++ ++ /* Transmit Descriptor Control 0 */ ++ reg = E1000_READ_REG(hw, E1000_TXDCTL(0)); ++ reg |= (1 << 22); ++ E1000_WRITE_REG(hw, E1000_TXDCTL(0), reg); ++ ++ /* Transmit Descriptor Control 1 */ ++ reg = E1000_READ_REG(hw, E1000_TXDCTL(1)); ++ reg |= (1 << 22); ++ E1000_WRITE_REG(hw, E1000_TXDCTL(1), reg); ++ ++ /* Transmit Arbitration Control 0 */ ++ reg = E1000_READ_REG(hw, E1000_TARC(0)); ++ reg &= ~(0xF << 27); /* 30:27 */ ++ switch (hw->mac.type) { ++ case e1000_82571: ++ case e1000_82572: ++ reg |= (1 << 23) | (1 << 24) | (1 << 25) | (1 << 26); ++ break; ++ default: ++ break; ++ } ++ E1000_WRITE_REG(hw, E1000_TARC(0), reg); ++ ++ /* Transmit Arbitration Control 1 */ ++ reg = E1000_READ_REG(hw, E1000_TARC(1)); ++ switch (hw->mac.type) { ++ case e1000_82571: ++ case e1000_82572: ++ reg &= ~((1 << 29) | (1 << 30)); ++ reg |= (1 << 22) | (1 << 24) | (1 << 25) | (1 << 26); ++ if (E1000_READ_REG(hw, E1000_TCTL) & E1000_TCTL_MULR) ++ reg &= ~(1 << 28); ++ else ++ reg |= (1 << 28); ++ E1000_WRITE_REG(hw, E1000_TARC(1), reg); ++ break; ++ default: ++ break; ++ } ++ ++ /* Device Control */ ++ if (hw->mac.type == e1000_82573) { ++ reg = E1000_READ_REG(hw, E1000_CTRL); ++ reg &= ~(1 << 29); ++ E1000_WRITE_REG(hw, E1000_CTRL, reg); ++ } ++ ++ /* Extended Device Control */ ++ if (hw->mac.type == e1000_82573) { ++ reg = E1000_READ_REG(hw, E1000_CTRL_EXT); ++ reg &= ~(1 << 23); ++ reg |= (1 << 22); ++ E1000_WRITE_REG(hw, E1000_CTRL_EXT, reg); ++ } ++ ++out: ++ return; ++} ++ ++/** ++ * e1000_clear_vfta_82571 - Clear VLAN filter table ++ * @hw: pointer to the HW structure ++ * ++ * Clears the register array which contains the VLAN filter table by ++ * setting all the values to 0. ++ **/ ++static void e1000_clear_vfta_82571(struct e1000_hw *hw) ++{ ++ u32 offset; ++ u32 vfta_value = 0; ++ u32 vfta_offset = 0; ++ u32 vfta_bit_in_reg = 0; ++ ++ DEBUGFUNC("e1000_clear_vfta_82571"); ++ ++ if (hw->mac.type == e1000_82573) { ++ if (hw->mng_cookie.vlan_id != 0) { ++ /* ++ * The VFTA is a 4096b bit-field, each identifying ++ * a single VLAN ID. The following operations ++ * determine which 32b entry (i.e. offset) into the ++ * array we want to set the VLAN ID (i.e. bit) of ++ * the manageability unit. ++ */ ++ vfta_offset = (hw->mng_cookie.vlan_id >> ++ E1000_VFTA_ENTRY_SHIFT) & ++ E1000_VFTA_ENTRY_MASK; ++ vfta_bit_in_reg = 1 << (hw->mng_cookie.vlan_id & ++ E1000_VFTA_ENTRY_BIT_SHIFT_MASK); ++ } ++ } ++ for (offset = 0; offset < E1000_VLAN_FILTER_TBL_SIZE; offset++) { ++ /* ++ * If the offset we want to clear is the same offset of the ++ * manageability VLAN ID, then clear all bits except that of ++ * the manageability unit. ++ */ ++ vfta_value = (offset == vfta_offset) ? vfta_bit_in_reg : 0; ++ E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, offset, vfta_value); ++ E1000_WRITE_FLUSH(hw); ++ } ++} ++ ++/** ++ * e1000_update_mc_addr_list_82571 - Update Multicast addresses ++ * @hw: pointer to the HW structure ++ * @mc_addr_list: array of multicast addresses to program ++ * @mc_addr_count: number of multicast addresses to program ++ * @rar_used_count: the first RAR register free to program ++ * @rar_count: total number of supported Receive Address Registers ++ * ++ * Updates the Receive Address Registers and Multicast Table Array. ++ * The caller must have a packed mc_addr_list of multicast addresses. ++ * The parameter rar_count will usually be hw->mac.rar_entry_count ++ * unless there are workarounds that change this. ++ **/ ++static void e1000_update_mc_addr_list_82571(struct e1000_hw *hw, ++ u8 *mc_addr_list, u32 mc_addr_count, ++ u32 rar_used_count, u32 rar_count) ++{ ++ DEBUGFUNC("e1000_update_mc_addr_list_82571"); ++ ++ if (e1000_get_laa_state_82571(hw)) ++ rar_count--; ++ ++ e1000_update_mc_addr_list_generic(hw, mc_addr_list, mc_addr_count, ++ rar_used_count, rar_count); ++} ++ ++/** ++ * e1000_setup_link_82571 - Setup flow control and link settings ++ * @hw: pointer to the HW structure ++ * ++ * Determines which flow control settings to use, then configures flow ++ * control. Calls the appropriate media-specific link configuration ++ * function. Assuming the adapter has a valid link partner, a valid link ++ * should be established. Assumes the hardware has previously been reset ++ * and the transmitter and receiver are not enabled. ++ **/ ++static s32 e1000_setup_link_82571(struct e1000_hw *hw) ++{ ++ DEBUGFUNC("e1000_setup_link_82571"); ++ ++ /* ++ * 82573 does not have a word in the NVM to determine ++ * the default flow control setting, so we explicitly ++ * set it to full. ++ */ ++ if (hw->mac.type == e1000_82573) ++ hw->fc.type = e1000_fc_full; ++ ++ return e1000_setup_link_generic(hw); ++} ++ ++/** ++ * e1000_setup_copper_link_82571 - Configure copper link settings ++ * @hw: pointer to the HW structure ++ * ++ * Configures the link for auto-neg or forced speed and duplex. Then we check ++ * for link, once link is established calls to configure collision distance ++ * and flow control are called. ++ **/ ++static s32 e1000_setup_copper_link_82571(struct e1000_hw *hw) ++{ ++ u32 ctrl, led_ctrl; ++ s32 ret_val; ++ ++ DEBUGFUNC("e1000_setup_copper_link_82571"); ++ ++ ctrl = E1000_READ_REG(hw, E1000_CTRL); ++ ctrl |= E1000_CTRL_SLU; ++ ctrl &= ~(E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX); ++ E1000_WRITE_REG(hw, E1000_CTRL, ctrl); ++ ++ switch (hw->phy.type) { ++ case e1000_phy_m88: ++ ret_val = e1000_copper_link_setup_m88(hw); ++ break; ++ case e1000_phy_igp_2: ++ ret_val = e1000_copper_link_setup_igp(hw); ++ /* Setup activity LED */ ++ led_ctrl = E1000_READ_REG(hw, E1000_LEDCTL); ++ led_ctrl &= IGP_ACTIVITY_LED_MASK; ++ led_ctrl |= (IGP_ACTIVITY_LED_ENABLE | IGP_LED3_MODE); ++ E1000_WRITE_REG(hw, E1000_LEDCTL, led_ctrl); ++ break; ++ default: ++ ret_val = -E1000_ERR_PHY; ++ break; ++ } ++ ++ if (ret_val) ++ goto out; ++ ++ ret_val = e1000_setup_copper_link_generic(hw); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_setup_fiber_serdes_link_82571 - Setup link for fiber/serdes ++ * @hw: pointer to the HW structure ++ * ++ * Configures collision distance and flow control for fiber and serdes links. ++ * Upon successful setup, poll for link. ++ **/ ++static s32 e1000_setup_fiber_serdes_link_82571(struct e1000_hw *hw) ++{ ++ DEBUGFUNC("e1000_setup_fiber_serdes_link_82571"); ++ ++ switch (hw->mac.type) { ++ case e1000_82571: ++ case e1000_82572: ++ /* ++ * If SerDes loopback mode is entered, there is no form ++ * of reset to take the adapter out of that mode. So we ++ * have to explicitly take the adapter out of loopback ++ * mode. This prevents drivers from twidling their thumbs ++ * if another tool failed to take it out of loopback mode. ++ */ ++ E1000_WRITE_REG(hw, E1000_SCTL, E1000_SCTL_DISABLE_SERDES_LOOPBACK); ++ break; ++ default: ++ break; ++ } ++ ++ return e1000_setup_fiber_serdes_link_generic(hw); ++} ++ ++/** ++ * e1000_valid_led_default_82571 - Verify a valid default LED config ++ * @hw: pointer to the HW structure ++ * @data: pointer to the NVM (EEPROM) ++ * ++ * Read the EEPROM for the current default LED configuration. If the ++ * LED configuration is not valid, set to a valid LED configuration. ++ **/ ++static s32 e1000_valid_led_default_82571(struct e1000_hw *hw, u16 *data) ++{ ++ s32 ret_val; ++ ++ DEBUGFUNC("e1000_valid_led_default_82571"); ++ ++ ret_val = e1000_read_nvm(hw, NVM_ID_LED_SETTINGS, 1, data); ++ if (ret_val) { ++ DEBUGOUT("NVM Read Error\n"); ++ goto out; ++ } ++ ++ if (hw->mac.type == e1000_82573 && ++ *data == ID_LED_RESERVED_F746) ++ *data = ID_LED_DEFAULT_82573; ++ else if (*data == ID_LED_RESERVED_0000 || ++ *data == ID_LED_RESERVED_FFFF) ++ *data = ID_LED_DEFAULT; ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_get_laa_state_82571 - Get locally administered address state ++ * @hw: pointer to the HW structure ++ * ++ * Retrieve and return the current locally administed address state. ++ **/ ++bool e1000_get_laa_state_82571(struct e1000_hw *hw) ++{ ++ struct e1000_dev_spec_82571 *dev_spec; ++ bool state = FALSE; ++ ++ DEBUGFUNC("e1000_get_laa_state_82571"); ++ ++ if (hw->mac.type != e1000_82571) ++ goto out; ++ ++ dev_spec = (struct e1000_dev_spec_82571 *)hw->dev_spec; ++ ++ state = dev_spec->laa_is_present; ++ ++out: ++ return state; ++} ++ ++/** ++ * e1000_set_laa_state_82571 - Set locally administered address state ++ * @hw: pointer to the HW structure ++ * @state: enable/disable locally administered address ++ * ++ * Enable/Disable the current locally administed address state. ++ **/ ++void e1000_set_laa_state_82571(struct e1000_hw *hw, bool state) ++{ ++ struct e1000_dev_spec_82571 *dev_spec; ++ ++ DEBUGFUNC("e1000_set_laa_state_82571"); ++ ++ if (hw->mac.type != e1000_82571) ++ goto out; ++ ++ dev_spec = (struct e1000_dev_spec_82571 *)hw->dev_spec; ++ ++ dev_spec->laa_is_present = state; ++ ++ /* If workaround is activated... */ ++ if (state) { ++ /* ++ * Hold a copy of the LAA in RAR[14] This is done so that ++ * between the time RAR[0] gets clobbered and the time it ++ * gets fixed, the actual LAA is in one of the RARs and no ++ * incoming packets directed to this port are dropped. ++ * Eventually the LAA will be in RAR[0] and RAR[14]. ++ */ ++ e1000_rar_set_generic(hw, hw->mac.addr, ++ hw->mac.rar_entry_count - 1); ++ } ++ ++out: ++ return; ++} ++ ++/** ++ * e1000_fix_nvm_checksum_82571 - Fix EEPROM checksum ++ * @hw: pointer to the HW structure ++ * ++ * Verifies that the EEPROM has completed the update. After updating the ++ * EEPROM, we need to check bit 15 in work 0x23 for the checksum fix. If ++ * the checksum fix is not implemented, we need to set the bit and update ++ * the checksum. Otherwise, if bit 15 is set and the checksum is incorrect, ++ * we need to return bad checksum. ++ **/ ++static s32 e1000_fix_nvm_checksum_82571(struct e1000_hw *hw) ++{ ++ struct e1000_nvm_info *nvm = &hw->nvm; ++ s32 ret_val = E1000_SUCCESS; ++ u16 data; ++ ++ DEBUGFUNC("e1000_fix_nvm_checksum_82571"); ++ ++ if (nvm->type != e1000_nvm_flash_hw) ++ goto out; ++ ++ /* ++ * Check bit 4 of word 10h. If it is 0, firmware is done updating ++ * 10h-12h. Checksum may need to be fixed. ++ */ ++ ret_val = e1000_read_nvm(hw, 0x10, 1, &data); ++ if (ret_val) ++ goto out; ++ ++ if (!(data & 0x10)) { ++ /* ++ * Read 0x23 and check bit 15. This bit is a 1 ++ * when the checksum has already been fixed. If ++ * the checksum is still wrong and this bit is a ++ * 1, we need to return bad checksum. Otherwise, ++ * we need to set this bit to a 1 and update the ++ * checksum. ++ */ ++ ret_val = e1000_read_nvm(hw, 0x23, 1, &data); ++ if (ret_val) ++ goto out; ++ ++ if (!(data & 0x8000)) { ++ data |= 0x8000; ++ ret_val = e1000_write_nvm(hw, 0x23, 1, &data); ++ if (ret_val) ++ goto out; ++ ret_val = e1000_update_nvm_checksum(hw); ++ } ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_read_mac_addr_82571 - Read device MAC address ++ * @hw: pointer to the HW structure ++ **/ ++static s32 e1000_read_mac_addr_82571(struct e1000_hw *hw) ++{ ++ s32 ret_val = E1000_SUCCESS; ++ ++ DEBUGFUNC("e1000_read_mac_addr_82571"); ++ if (e1000_check_alt_mac_addr_generic(hw)) ++ ret_val = e1000_read_mac_addr_generic(hw); ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_power_down_phy_copper_82571 - Remove link during PHY power down ++ * @hw: pointer to the HW structure ++ * ++ * In the case of a PHY power down to save power, or to turn off link during a ++ * driver unload, or wake on lan is not enabled, remove the link. ++ **/ ++static void e1000_power_down_phy_copper_82571(struct e1000_hw *hw) ++{ ++ /* If the management interface is not enabled, then power down */ ++ if (!(e1000_check_mng_mode(hw) || e1000_check_reset_block(hw))) ++ e1000_power_down_phy_copper(hw); ++ ++ return; ++} ++ ++/** ++ * e1000_clear_hw_cntrs_82571 - Clear device specific hardware counters ++ * @hw: pointer to the HW structure ++ * ++ * Clears the hardware counters by reading the counter registers. ++ **/ ++static void e1000_clear_hw_cntrs_82571(struct e1000_hw *hw) ++{ ++ volatile u32 temp; ++ ++ DEBUGFUNC("e1000_clear_hw_cntrs_82571"); ++ ++ e1000_clear_hw_cntrs_base_generic(hw); ++ temp = E1000_READ_REG(hw, E1000_PRC64); ++ temp = E1000_READ_REG(hw, E1000_PRC127); ++ temp = E1000_READ_REG(hw, E1000_PRC255); ++ temp = E1000_READ_REG(hw, E1000_PRC511); ++ temp = E1000_READ_REG(hw, E1000_PRC1023); ++ temp = E1000_READ_REG(hw, E1000_PRC1522); ++ temp = E1000_READ_REG(hw, E1000_PTC64); ++ temp = E1000_READ_REG(hw, E1000_PTC127); ++ temp = E1000_READ_REG(hw, E1000_PTC255); ++ temp = E1000_READ_REG(hw, E1000_PTC511); ++ temp = E1000_READ_REG(hw, E1000_PTC1023); ++ temp = E1000_READ_REG(hw, E1000_PTC1522); ++ ++ temp = E1000_READ_REG(hw, E1000_ALGNERRC); ++ temp = E1000_READ_REG(hw, E1000_RXERRC); ++ temp = E1000_READ_REG(hw, E1000_TNCRS); ++ temp = E1000_READ_REG(hw, E1000_CEXTERR); ++ temp = E1000_READ_REG(hw, E1000_TSCTC); ++ temp = E1000_READ_REG(hw, E1000_TSCTFC); ++ ++ temp = E1000_READ_REG(hw, E1000_MGTPRC); ++ temp = E1000_READ_REG(hw, E1000_MGTPDC); ++ temp = E1000_READ_REG(hw, E1000_MGTPTC); ++ ++ temp = E1000_READ_REG(hw, E1000_IAC); ++ temp = E1000_READ_REG(hw, E1000_ICRXOC); ++ ++ temp = E1000_READ_REG(hw, E1000_ICRXPTC); ++ temp = E1000_READ_REG(hw, E1000_ICRXATC); ++ temp = E1000_READ_REG(hw, E1000_ICTXPTC); ++ temp = E1000_READ_REG(hw, E1000_ICTXATC); ++ temp = E1000_READ_REG(hw, E1000_ICTXQEC); ++ temp = E1000_READ_REG(hw, E1000_ICTXQMTC); ++ temp = E1000_READ_REG(hw, E1000_ICRXDMTC); ++} +--- linux/drivers/xenomai/net/drivers/experimental/e1000/e1000_param.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/experimental/e1000/e1000_param.c 2021-04-07 16:01:27.624633599 +0800 +@@ -0,0 +1,894 @@ ++/******************************************************************************* ++ ++ Intel PRO/1000 Linux driver ++ Copyright(c) 1999 - 2008 Intel Corporation. ++ ++ This program is free software; you can redistribute it and/or modify it ++ under the terms and conditions of the GNU General Public License, ++ version 2, as published by the Free Software Foundation. ++ ++ This program is distributed in the hope it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ You should have received a copy of the GNU General Public License along with ++ this program; if not, write to the Free Software Foundation, Inc., ++ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. ++ ++ The full GNU General Public License is included in this distribution in ++ the file called "COPYING". ++ ++ Contact Information: ++ Linux NICS ++ e1000-devel Mailing List ++ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 ++ ++*******************************************************************************/ ++ ++ ++#include ++ ++#include "e1000.h" ++ ++/* This is the only thing that needs to be changed to adjust the ++ * maximum number of ports that the driver can manage. ++ */ ++ ++#define E1000_MAX_NIC 32 ++ ++#define OPTION_UNSET -1 ++#define OPTION_DISABLED 0 ++#define OPTION_ENABLED 1 ++ ++/* All parameters are treated the same, as an integer array of values. ++ * This macro just reduces the need to repeat the same declaration code ++ * over and over (plus this helps to avoid typo bugs). ++ */ ++ ++#define E1000_PARAM_INIT { [0 ... E1000_MAX_NIC] = OPTION_UNSET } ++#ifndef module_param_array ++/* Module Parameters are always initialized to -1, so that the driver ++ * can tell the difference between no user specified value or the ++ * user asking for the default value. ++ * The true default values are loaded in when e1000_check_options is called. ++ * ++ * This is a GCC extension to ANSI C. ++ * See the item "Labeled Elements in Initializers" in the section ++ * "Extensions to the C Language Family" of the GCC documentation. ++ */ ++ ++#define E1000_PARAM(X, desc) \ ++ static const int X[E1000_MAX_NIC+1] = E1000_PARAM_INIT; \ ++ MODULE_PARM(X, "1-" __MODULE_STRING(E1000_MAX_NIC) "i"); \ ++ MODULE_PARM_DESC(X, desc); ++#else ++#define E1000_PARAM(X, desc) \ ++ static int X[E1000_MAX_NIC+1] = E1000_PARAM_INIT; \ ++ static unsigned int num_##X = 0; \ ++ module_param_array_named(X, X, int, &num_##X, 0); \ ++ MODULE_PARM_DESC(X, desc); ++#endif ++ ++/* Transmit Descriptor Count ++ * ++ * Valid Range: 80-256 for 82542 and 82543 gigabit ethernet controllers ++ * Valid Range: 80-4096 for 82544 and newer ++ * ++ * Default Value: 256 ++ */ ++E1000_PARAM(TxDescriptors, "Number of transmit descriptors"); ++ ++/* Receive Descriptor Count ++ * ++ * Valid Range: 80-256 for 82542 and 82543 gigabit ethernet controllers ++ * Valid Range: 80-4096 for 82544 and newer ++ * ++ * Default Value: 256 ++ */ ++E1000_PARAM(RxDescriptors, "Number of receive descriptors"); ++ ++/* User Specified Speed Override ++ * ++ * Valid Range: 0, 10, 100, 1000 ++ * - 0 - auto-negotiate at all supported speeds ++ * - 10 - only link at 10 Mbps ++ * - 100 - only link at 100 Mbps ++ * - 1000 - only link at 1000 Mbps ++ * ++ * Default Value: 0 ++ */ ++E1000_PARAM(Speed, "Speed setting"); ++ ++/* User Specified Duplex Override ++ * ++ * Valid Range: 0-2 ++ * - 0 - auto-negotiate for duplex ++ * - 1 - only link at half duplex ++ * - 2 - only link at full duplex ++ * ++ * Default Value: 0 ++ */ ++E1000_PARAM(Duplex, "Duplex setting"); ++ ++/* Auto-negotiation Advertisement Override ++ * ++ * Valid Range: 0x01-0x0F, 0x20-0x2F (copper); 0x20 (fiber) ++ * ++ * The AutoNeg value is a bit mask describing which speed and duplex ++ * combinations should be advertised during auto-negotiation. ++ * The supported speed and duplex modes are listed below ++ * ++ * Bit 7 6 5 4 3 2 1 0 ++ * Speed (Mbps) N/A N/A 1000 N/A 100 100 10 10 ++ * Duplex Full Full Half Full Half ++ * ++ * Default Value: 0x2F (copper); 0x20 (fiber) ++ */ ++E1000_PARAM(AutoNeg, "Advertised auto-negotiation setting"); ++#define AUTONEG_ADV_DEFAULT 0x2F ++#define AUTONEG_ADV_MASK 0x2F ++ ++/* User Specified Flow Control Override ++ * ++ * Valid Range: 0-3 ++ * - 0 - No Flow Control ++ * - 1 - Rx only, respond to PAUSE frames but do not generate them ++ * - 2 - Tx only, generate PAUSE frames but ignore them on receive ++ * - 3 - Full Flow Control Support ++ * ++ * Default Value: Read flow control settings from the EEPROM ++ */ ++E1000_PARAM(FlowControl, "Flow Control setting"); ++#define FLOW_CONTROL_DEFAULT FLOW_CONTROL_FULL ++ ++/* XsumRX - Receive Checksum Offload Enable/Disable ++ * ++ * Valid Range: 0, 1 ++ * - 0 - disables all checksum offload ++ * - 1 - enables receive IP/TCP/UDP checksum offload ++ * on 82543 and newer -based NICs ++ * ++ * Default Value: 1 ++ */ ++E1000_PARAM(XsumRX, "Disable or enable Receive Checksum offload"); ++ ++/* Transmit Interrupt Delay in units of 1.024 microseconds ++ * Tx interrupt delay needs to typically be set to something non zero ++ * ++ * Valid Range: 0-65535 ++ */ ++E1000_PARAM(TxIntDelay, "Transmit Interrupt Delay"); ++#define DEFAULT_TIDV 0 ++#define MAX_TXDELAY 0xFFFF ++#define MIN_TXDELAY 0 ++ ++/* Transmit Absolute Interrupt Delay in units of 1.024 microseconds ++ * ++ * Valid Range: 0-65535 ++ */ ++E1000_PARAM(TxAbsIntDelay, "Transmit Absolute Interrupt Delay"); ++#define DEFAULT_TADV 0 ++#define MAX_TXABSDELAY 0xFFFF ++#define MIN_TXABSDELAY 0 ++ ++/* Receive Interrupt Delay in units of 1.024 microseconds ++ * hardware will likely hang if you set this to anything but zero. ++ * ++ * Valid Range: 0-65535 ++ */ ++E1000_PARAM(RxIntDelay, "Receive Interrupt Delay"); ++#define DEFAULT_RDTR 0 ++#define MAX_RXDELAY 0xFFFF ++#define MIN_RXDELAY 0 ++ ++/* Receive Absolute Interrupt Delay in units of 1.024 microseconds ++ * ++ * Valid Range: 0-65535 ++ */ ++E1000_PARAM(RxAbsIntDelay, "Receive Absolute Interrupt Delay"); ++#define DEFAULT_RADV 0 ++#define MAX_RXABSDELAY 0xFFFF ++#define MIN_RXABSDELAY 0 ++ ++/* Interrupt Throttle Rate (interrupts/sec) ++ * ++ * Valid Range: 100-100000 (0=off, 1=dynamic, 3=dynamic conservative) ++ */ ++E1000_PARAM(InterruptThrottleRate, "Interrupt Throttling Rate"); ++#define DEFAULT_ITR 0 ++#define MAX_ITR 100000 ++#define MIN_ITR 100 ++ ++/* Enable Smart Power Down of the PHY ++ * ++ * Valid Range: 0, 1 ++ * ++ * Default Value: 0 (disabled) ++ */ ++E1000_PARAM(SmartPowerDownEnable, "Enable PHY smart power down"); ++ ++/* Enable Kumeran Lock Loss workaround ++ * ++ * Valid Range: 0, 1 ++ * ++ * Default Value: 1 (enabled) ++ */ ++E1000_PARAM(KumeranLockLoss, "Enable Kumeran lock loss workaround"); ++ ++ ++struct e1000_option { ++ enum { enable_option, range_option, list_option } type; ++ const char *name; ++ const char *err; ++ int def; ++ union { ++ struct { /* range_option info */ ++ int min; ++ int max; ++ } r; ++ struct { /* list_option info */ ++ int nr; ++ struct e1000_opt_list { int i; char *str; } *p; ++ } l; ++ } arg; ++}; ++ ++static int e1000_validate_option(unsigned int *value, ++ const struct e1000_option *opt, ++ struct e1000_adapter *adapter) ++{ ++ if (*value == OPTION_UNSET) { ++ *value = opt->def; ++ return 0; ++ } ++ ++ switch (opt->type) { ++ case enable_option: ++ switch (*value) { ++ case OPTION_ENABLED: ++ DPRINTK(PROBE, INFO, "%s Enabled\n", opt->name); ++ return 0; ++ case OPTION_DISABLED: ++ DPRINTK(PROBE, INFO, "%s Disabled\n", opt->name); ++ return 0; ++ } ++ break; ++ case range_option: ++ if (*value >= opt->arg.r.min && *value <= opt->arg.r.max) { ++ DPRINTK(PROBE, INFO, ++ "%s set to %i\n", opt->name, *value); ++ return 0; ++ } ++ break; ++ case list_option: { ++ int i; ++ struct e1000_opt_list *ent; ++ ++ for (i = 0; i < opt->arg.l.nr; i++) { ++ ent = &opt->arg.l.p[i]; ++ if (*value == ent->i) { ++ if (ent->str[0] != '\0') ++ DPRINTK(PROBE, INFO, "%s\n", ent->str); ++ return 0; ++ } ++ } ++ } ++ break; ++ default: ++ BUG(); ++ } ++ ++ DPRINTK(PROBE, INFO, "Invalid %s value specified (%i) %s\n", ++ opt->name, *value, opt->err); ++ *value = opt->def; ++ return -1; ++} ++ ++static void e1000_check_fiber_options(struct e1000_adapter *adapter); ++static void e1000_check_copper_options(struct e1000_adapter *adapter); ++ ++/** ++ * e1000_check_options - Range Checking for Command Line Parameters ++ * @adapter: board private structure ++ * ++ * This routine checks all command line parameters for valid user ++ * input. If an invalid value is given, or if no user specified ++ * value exists, a default value is used. The final value is stored ++ * in a variable in the adapter structure. ++ **/ ++void e1000_check_options(struct e1000_adapter *adapter) ++{ ++ struct e1000_hw *hw = &adapter->hw; ++ int bd = adapter->bd_number; ++ if (bd >= E1000_MAX_NIC) { ++ DPRINTK(PROBE, NOTICE, ++ "Warning: no configuration for board #%i\n", bd); ++ DPRINTK(PROBE, NOTICE, "Using defaults for all values\n"); ++#ifndef module_param_array ++ bd = E1000_MAX_NIC; ++#endif ++ } ++ ++ { /* Transmit Descriptor Count */ ++ struct e1000_option opt = { ++ .type = range_option, ++ .name = "Transmit Descriptors", ++ .err = "using default of " ++ __MODULE_STRING(E1000_DEFAULT_TXD), ++ .def = E1000_DEFAULT_TXD, ++ .arg = { .r = { .min = E1000_MIN_TXD }} ++ }; ++ struct e1000_tx_ring *tx_ring = adapter->tx_ring; ++ int i; ++ opt.arg.r.max = hw->mac.type < e1000_82544 ? ++ E1000_MAX_TXD : E1000_MAX_82544_TXD; ++ ++#ifdef module_param_array ++ if (num_TxDescriptors > bd) { ++#endif ++ tx_ring->count = TxDescriptors[bd]; ++ e1000_validate_option(&tx_ring->count, &opt, adapter); ++ tx_ring->count = ALIGN(tx_ring->count, ++ REQ_TX_DESCRIPTOR_MULTIPLE); ++#ifdef module_param_array ++ } else { ++ tx_ring->count = opt.def; ++ } ++#endif ++ for (i = 0; i < adapter->num_tx_queues; i++) ++ tx_ring[i].count = tx_ring->count; ++ } ++ { /* Receive Descriptor Count */ ++ struct e1000_option opt = { ++ .type = range_option, ++ .name = "Receive Descriptors", ++ .err = "using default of " ++ __MODULE_STRING(E1000_DEFAULT_RXD), ++ .def = E1000_DEFAULT_RXD, ++ .arg = { .r = { .min = E1000_MIN_RXD }} ++ }; ++ struct e1000_rx_ring *rx_ring = adapter->rx_ring; ++ int i; ++ opt.arg.r.max = hw->mac.type < e1000_82544 ? E1000_MAX_RXD : ++ E1000_MAX_82544_RXD; ++ ++#ifdef module_param_array ++ if (num_RxDescriptors > bd) { ++#endif ++ rx_ring->count = RxDescriptors[bd]; ++ e1000_validate_option(&rx_ring->count, &opt, adapter); ++ rx_ring->count = ALIGN(rx_ring->count, ++ REQ_RX_DESCRIPTOR_MULTIPLE); ++#ifdef module_param_array ++ } else { ++ rx_ring->count = opt.def; ++ } ++#endif ++ for (i = 0; i < adapter->num_rx_queues; i++) ++ rx_ring[i].count = rx_ring->count; ++ } ++ { /* Checksum Offload Enable/Disable */ ++ struct e1000_option opt = { ++ .type = enable_option, ++ .name = "Checksum Offload", ++ .err = "defaulting to Enabled", ++ .def = OPTION_ENABLED ++ }; ++ ++#ifdef module_param_array ++ if (num_XsumRX > bd) { ++#endif ++ unsigned int rx_csum = XsumRX[bd]; ++ e1000_validate_option(&rx_csum, &opt, adapter); ++ adapter->rx_csum = rx_csum; ++#ifdef module_param_array ++ } else { ++ adapter->rx_csum = opt.def; ++ } ++#endif ++ } ++ { /* Flow Control */ ++ ++ struct e1000_opt_list fc_list[] = ++ {{ e1000_fc_none, "Flow Control Disabled" }, ++ { e1000_fc_rx_pause,"Flow Control Receive Only" }, ++ { e1000_fc_tx_pause,"Flow Control Transmit Only" }, ++ { e1000_fc_full, "Flow Control Enabled" }, ++ { e1000_fc_default, "Flow Control Hardware Default" }}; ++ ++ struct e1000_option opt = { ++ .type = list_option, ++ .name = "Flow Control", ++ .err = "reading default settings from EEPROM", ++ .def = e1000_fc_default, ++ .arg = { .l = { .nr = ARRAY_SIZE(fc_list), ++ .p = fc_list }} ++ }; ++ ++#ifdef module_param_array ++ if (num_FlowControl > bd) { ++#endif ++ unsigned int fc = FlowControl[bd]; ++ e1000_validate_option(&fc, &opt, adapter); ++ hw->fc.original_type = fc; ++ hw->fc.type = fc; ++#ifdef module_param_array ++ } else { ++ hw->fc.original_type = opt.def; ++ hw->fc.type = opt.def; ++ } ++#endif ++ } ++ { /* Transmit Interrupt Delay */ ++ struct e1000_option opt = { ++ .type = range_option, ++ .name = "Transmit Interrupt Delay", ++ .err = "using default of " __MODULE_STRING(DEFAULT_TIDV), ++ .def = DEFAULT_TIDV, ++ .arg = { .r = { .min = MIN_TXDELAY, ++ .max = MAX_TXDELAY }} ++ }; ++ ++#ifdef module_param_array ++ if (num_TxIntDelay > bd) { ++#endif ++ adapter->tx_int_delay = TxIntDelay[bd]; ++ e1000_validate_option(&adapter->tx_int_delay, &opt, ++ adapter); ++#ifdef module_param_array ++ } else { ++ adapter->tx_int_delay = opt.def; ++ } ++#endif ++ } ++ { /* Transmit Absolute Interrupt Delay */ ++ struct e1000_option opt = { ++ .type = range_option, ++ .name = "Transmit Absolute Interrupt Delay", ++ .err = "using default of " __MODULE_STRING(DEFAULT_TADV), ++ .def = DEFAULT_TADV, ++ .arg = { .r = { .min = MIN_TXABSDELAY, ++ .max = MAX_TXABSDELAY }} ++ }; ++ ++#ifdef module_param_array ++ if (num_TxAbsIntDelay > bd) { ++#endif ++ adapter->tx_abs_int_delay = TxAbsIntDelay[bd]; ++ e1000_validate_option(&adapter->tx_abs_int_delay, &opt, ++ adapter); ++#ifdef module_param_array ++ } else { ++ adapter->tx_abs_int_delay = opt.def; ++ } ++#endif ++ } ++ { /* Receive Interrupt Delay */ ++ struct e1000_option opt = { ++ .type = range_option, ++ .name = "Receive Interrupt Delay", ++ .err = "using default of " __MODULE_STRING(DEFAULT_RDTR), ++ .def = DEFAULT_RDTR, ++ .arg = { .r = { .min = MIN_RXDELAY, ++ .max = MAX_RXDELAY }} ++ }; ++ ++ /* modify min and default if 82573 for slow ping w/a, ++ * a value greater than 8 needs to be set for RDTR */ ++ ++#ifdef module_param_array ++ if (num_RxIntDelay > bd) { ++#endif ++ adapter->rx_int_delay = RxIntDelay[bd]; ++ e1000_validate_option(&adapter->rx_int_delay, &opt, ++ adapter); ++#ifdef module_param_array ++ } else { ++ adapter->rx_int_delay = opt.def; ++ } ++#endif ++ } ++ { /* Receive Absolute Interrupt Delay */ ++ struct e1000_option opt = { ++ .type = range_option, ++ .name = "Receive Absolute Interrupt Delay", ++ .err = "using default of " __MODULE_STRING(DEFAULT_RADV), ++ .def = DEFAULT_RADV, ++ .arg = { .r = { .min = MIN_RXABSDELAY, ++ .max = MAX_RXABSDELAY }} ++ }; ++ ++#ifdef module_param_array ++ if (num_RxAbsIntDelay > bd) { ++#endif ++ adapter->rx_abs_int_delay = RxAbsIntDelay[bd]; ++ e1000_validate_option(&adapter->rx_abs_int_delay, &opt, ++ adapter); ++#ifdef module_param_array ++ } else { ++ adapter->rx_abs_int_delay = opt.def; ++ } ++#endif ++ } ++ { /* Interrupt Throttling Rate */ ++ struct e1000_option opt = { ++ .type = range_option, ++ .name = "Interrupt Throttling Rate (ints/sec)", ++ .err = "using default of " __MODULE_STRING(DEFAULT_ITR), ++ .def = DEFAULT_ITR, ++ .arg = { .r = { .min = MIN_ITR, ++ .max = MAX_ITR }} ++ }; ++ ++#ifdef module_param_array ++ if (num_InterruptThrottleRate > bd) { ++#endif ++ adapter->itr = InterruptThrottleRate[bd]; ++ switch (adapter->itr) { ++ case 0: ++ DPRINTK(PROBE, INFO, "%s turned off\n", ++ opt.name); ++ break; ++ case 1: ++ DPRINTK(PROBE, INFO, "%s set to dynamic mode\n", ++ opt.name); ++ adapter->itr_setting = adapter->itr; ++ adapter->itr = 20000; ++ break; ++ case 3: ++ DPRINTK(PROBE, INFO, ++ "%s set to dynamic conservative mode\n", ++ opt.name); ++ adapter->itr_setting = adapter->itr; ++ adapter->itr = 20000; ++ break; ++ default: ++ e1000_validate_option(&adapter->itr, &opt, ++ adapter); ++ /* save the setting, because the dynamic bits change itr */ ++ /* clear the lower two bits because they are ++ * used as control */ ++ adapter->itr_setting = adapter->itr & ~3; ++ break; ++ } ++#ifdef module_param_array ++ } else { ++ adapter->itr_setting = opt.def; ++ adapter->itr = 20000; ++ } ++#endif ++ } ++ { /* Smart Power Down */ ++ struct e1000_option opt = { ++ .type = enable_option, ++ .name = "PHY Smart Power Down", ++ .err = "defaulting to Disabled", ++ .def = OPTION_DISABLED ++ }; ++ ++#ifdef module_param_array ++ if (num_SmartPowerDownEnable > bd) { ++#endif ++ unsigned int spd = SmartPowerDownEnable[bd]; ++ e1000_validate_option(&spd, &opt, adapter); ++ adapter->flags |= spd ? E1000_FLAG_SMART_POWER_DOWN : 0; ++#ifdef module_param_array ++ } else { ++ adapter->flags &= ~E1000_FLAG_SMART_POWER_DOWN; ++ } ++#endif ++ } ++ { /* Kumeran Lock Loss Workaround */ ++ struct e1000_option opt = { ++ .type = enable_option, ++ .name = "Kumeran Lock Loss Workaround", ++ .err = "defaulting to Enabled", ++ .def = OPTION_ENABLED ++ }; ++ ++#ifdef module_param_array ++ if (num_KumeranLockLoss > bd) { ++#endif ++ unsigned int kmrn_lock_loss = KumeranLockLoss[bd]; ++ e1000_validate_option(&kmrn_lock_loss, &opt, adapter); ++ if (hw->mac.type == e1000_ich8lan) ++ e1000_set_kmrn_lock_loss_workaround_ich8lan(hw, ++ kmrn_lock_loss); ++#ifdef module_param_array ++ } else { ++ if (hw->mac.type == e1000_ich8lan) ++ e1000_set_kmrn_lock_loss_workaround_ich8lan(hw, ++ opt.def); ++ } ++#endif ++ } ++ ++ switch (hw->phy.media_type) { ++ case e1000_media_type_fiber: ++ case e1000_media_type_internal_serdes: ++ e1000_check_fiber_options(adapter); ++ break; ++ case e1000_media_type_copper: ++ e1000_check_copper_options(adapter); ++ break; ++ default: ++ BUG(); ++ } ++ ++} ++ ++/** ++ * e1000_check_fiber_options - Range Checking for Link Options, Fiber Version ++ * @adapter: board private structure ++ * ++ * Handles speed and duplex options on fiber adapters ++ **/ ++static void e1000_check_fiber_options(struct e1000_adapter *adapter) ++{ ++ int bd = adapter->bd_number; ++#ifndef module_param_array ++ bd = bd > E1000_MAX_NIC ? E1000_MAX_NIC : bd; ++ if ((Speed[bd] != OPTION_UNSET)) { ++#else ++ if (num_Speed > bd) { ++#endif ++ DPRINTK(PROBE, INFO, "Speed not valid for fiber adapters, " ++ "parameter ignored\n"); ++ } ++ ++#ifndef module_param_array ++ if ((Duplex[bd] != OPTION_UNSET)) { ++#else ++ if (num_Duplex > bd) { ++#endif ++ DPRINTK(PROBE, INFO, "Duplex not valid for fiber adapters, " ++ "parameter ignored\n"); ++ } ++ ++#ifndef module_param_array ++ if ((AutoNeg[bd] != OPTION_UNSET) && (AutoNeg[bd] != 0x20)) { ++#else ++ if ((num_AutoNeg > bd) && (AutoNeg[bd] != 0x20)) { ++#endif ++ DPRINTK(PROBE, INFO, "AutoNeg other than 1000/Full is " ++ "not valid for fiber adapters, " ++ "parameter ignored\n"); ++ } ++} ++ ++/** ++ * e1000_check_copper_options - Range Checking for Link Options, Copper Version ++ * @adapter: board private structure ++ * ++ * Handles speed and duplex options on copper adapters ++ **/ ++static void e1000_check_copper_options(struct e1000_adapter *adapter) ++{ ++ struct e1000_hw *hw = &adapter->hw; ++ unsigned int speed, dplx, an; ++ int bd = adapter->bd_number; ++#ifndef module_param_array ++ bd = bd > E1000_MAX_NIC ? E1000_MAX_NIC : bd; ++#endif ++ ++ { /* Speed */ ++ struct e1000_opt_list speed_list[] = {{ 0, "" }, ++ { SPEED_10, "" }, ++ { SPEED_100, "" }, ++ { SPEED_1000, "" }}; ++ ++ struct e1000_option opt = { ++ .type = list_option, ++ .name = "Speed", ++ .err = "parameter ignored", ++ .def = 0, ++ .arg = { .l = { .nr = ARRAY_SIZE(speed_list), ++ .p = speed_list }} ++ }; ++ ++#ifdef module_param_array ++ if (num_Speed > bd) { ++#endif ++ speed = Speed[bd]; ++ e1000_validate_option(&speed, &opt, adapter); ++#ifdef module_param_array ++ } else { ++ speed = opt.def; ++ } ++#endif ++ } ++ { /* Duplex */ ++ struct e1000_opt_list dplx_list[] = {{ 0, "" }, ++ { HALF_DUPLEX, "" }, ++ { FULL_DUPLEX, "" }}; ++ ++ struct e1000_option opt = { ++ .type = list_option, ++ .name = "Duplex", ++ .err = "parameter ignored", ++ .def = 0, ++ .arg = { .l = { .nr = ARRAY_SIZE(dplx_list), ++ .p = dplx_list }} ++ }; ++ ++ if (e1000_check_reset_block(hw)) { ++ DPRINTK(PROBE, INFO, ++ "Link active due to SoL/IDER Session. " ++ "Speed/Duplex/AutoNeg parameter ignored.\n"); ++ return; ++ } ++#ifdef module_param_array ++ if (num_Duplex > bd) { ++#endif ++ dplx = Duplex[bd]; ++ e1000_validate_option(&dplx, &opt, adapter); ++#ifdef module_param_array ++ } else { ++ dplx = opt.def; ++ } ++#endif ++ } ++ ++#ifdef module_param_array ++ if ((num_AutoNeg > bd) && (speed != 0 || dplx != 0)) { ++#else ++ if (AutoNeg[bd] != OPTION_UNSET && (speed != 0 || dplx != 0)) { ++#endif ++ DPRINTK(PROBE, INFO, ++ "AutoNeg specified along with Speed or Duplex, " ++ "parameter ignored\n"); ++ hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; ++ } else { /* Autoneg */ ++ struct e1000_opt_list an_list[] = ++ #define AA "AutoNeg advertising " ++ {{ 0x01, AA "10/HD" }, ++ { 0x02, AA "10/FD" }, ++ { 0x03, AA "10/FD, 10/HD" }, ++ { 0x04, AA "100/HD" }, ++ { 0x05, AA "100/HD, 10/HD" }, ++ { 0x06, AA "100/HD, 10/FD" }, ++ { 0x07, AA "100/HD, 10/FD, 10/HD" }, ++ { 0x08, AA "100/FD" }, ++ { 0x09, AA "100/FD, 10/HD" }, ++ { 0x0a, AA "100/FD, 10/FD" }, ++ { 0x0b, AA "100/FD, 10/FD, 10/HD" }, ++ { 0x0c, AA "100/FD, 100/HD" }, ++ { 0x0d, AA "100/FD, 100/HD, 10/HD" }, ++ { 0x0e, AA "100/FD, 100/HD, 10/FD" }, ++ { 0x0f, AA "100/FD, 100/HD, 10/FD, 10/HD" }, ++ { 0x20, AA "1000/FD" }, ++ { 0x21, AA "1000/FD, 10/HD" }, ++ { 0x22, AA "1000/FD, 10/FD" }, ++ { 0x23, AA "1000/FD, 10/FD, 10/HD" }, ++ { 0x24, AA "1000/FD, 100/HD" }, ++ { 0x25, AA "1000/FD, 100/HD, 10/HD" }, ++ { 0x26, AA "1000/FD, 100/HD, 10/FD" }, ++ { 0x27, AA "1000/FD, 100/HD, 10/FD, 10/HD" }, ++ { 0x28, AA "1000/FD, 100/FD" }, ++ { 0x29, AA "1000/FD, 100/FD, 10/HD" }, ++ { 0x2a, AA "1000/FD, 100/FD, 10/FD" }, ++ { 0x2b, AA "1000/FD, 100/FD, 10/FD, 10/HD" }, ++ { 0x2c, AA "1000/FD, 100/FD, 100/HD" }, ++ { 0x2d, AA "1000/FD, 100/FD, 100/HD, 10/HD" }, ++ { 0x2e, AA "1000/FD, 100/FD, 100/HD, 10/FD" }, ++ { 0x2f, AA "1000/FD, 100/FD, 100/HD, 10/FD, 10/HD" }}; ++ ++ struct e1000_option opt = { ++ .type = list_option, ++ .name = "AutoNeg", ++ .err = "parameter ignored", ++ .def = AUTONEG_ADV_DEFAULT, ++ .arg = { .l = { .nr = ARRAY_SIZE(an_list), ++ .p = an_list }} ++ }; ++ ++#ifdef module_param_array ++ if (num_AutoNeg > bd) { ++#endif ++ an = AutoNeg[bd]; ++ e1000_validate_option(&an, &opt, adapter); ++#ifdef module_param_array ++ } else { ++ an = opt.def; ++ } ++#endif ++ hw->phy.autoneg_advertised = an; ++ } ++ ++ switch (speed + dplx) { ++ case 0: ++ hw->mac.autoneg = adapter->fc_autoneg = TRUE; ++#ifdef module_param_array ++ if ((num_Speed > bd) && (speed != 0 || dplx != 0)) ++#else ++ if (Speed[bd] != OPTION_UNSET || Duplex[bd] != OPTION_UNSET) ++#endif ++ DPRINTK(PROBE, INFO, ++ "Speed and duplex autonegotiation enabled\n"); ++ break; ++ case HALF_DUPLEX: ++ DPRINTK(PROBE, INFO, "Half Duplex specified without Speed\n"); ++ DPRINTK(PROBE, INFO, "Using Autonegotiation at " ++ "Half Duplex only\n"); ++ hw->mac.autoneg = adapter->fc_autoneg = TRUE; ++ hw->phy.autoneg_advertised = ADVERTISE_10_HALF | ++ ADVERTISE_100_HALF; ++ break; ++ case FULL_DUPLEX: ++ DPRINTK(PROBE, INFO, "Full Duplex specified without Speed\n"); ++ DPRINTK(PROBE, INFO, "Using Autonegotiation at " ++ "Full Duplex only\n"); ++ hw->mac.autoneg = adapter->fc_autoneg = TRUE; ++ hw->phy.autoneg_advertised = ADVERTISE_10_FULL | ++ ADVERTISE_100_FULL | ++ ADVERTISE_1000_FULL; ++ break; ++ case SPEED_10: ++ DPRINTK(PROBE, INFO, "10 Mbps Speed specified " ++ "without Duplex\n"); ++ DPRINTK(PROBE, INFO, "Using Autonegotiation at 10 Mbps only\n"); ++ hw->mac.autoneg = adapter->fc_autoneg = TRUE; ++ hw->phy.autoneg_advertised = ADVERTISE_10_HALF | ++ ADVERTISE_10_FULL; ++ break; ++ case SPEED_10 + HALF_DUPLEX: ++ DPRINTK(PROBE, INFO, "Forcing to 10 Mbps Half Duplex\n"); ++ hw->mac.autoneg = adapter->fc_autoneg = FALSE; ++ hw->mac.forced_speed_duplex = ADVERTISE_10_HALF; ++ hw->phy.autoneg_advertised = 0; ++ break; ++ case SPEED_10 + FULL_DUPLEX: ++ DPRINTK(PROBE, INFO, "Forcing to 10 Mbps Full Duplex\n"); ++ hw->mac.autoneg = adapter->fc_autoneg = FALSE; ++ hw->mac.forced_speed_duplex = ADVERTISE_10_FULL; ++ hw->phy.autoneg_advertised = 0; ++ break; ++ case SPEED_100: ++ DPRINTK(PROBE, INFO, "100 Mbps Speed specified " ++ "without Duplex\n"); ++ DPRINTK(PROBE, INFO, "Using Autonegotiation at " ++ "100 Mbps only\n"); ++ hw->mac.autoneg = adapter->fc_autoneg = TRUE; ++ hw->phy.autoneg_advertised = ADVERTISE_100_HALF | ++ ADVERTISE_100_FULL; ++ break; ++ case SPEED_100 + HALF_DUPLEX: ++ DPRINTK(PROBE, INFO, "Forcing to 100 Mbps Half Duplex\n"); ++ hw->mac.autoneg = adapter->fc_autoneg = FALSE; ++ hw->mac.forced_speed_duplex = ADVERTISE_100_HALF; ++ hw->phy.autoneg_advertised = 0; ++ break; ++ case SPEED_100 + FULL_DUPLEX: ++ DPRINTK(PROBE, INFO, "Forcing to 100 Mbps Full Duplex\n"); ++ hw->mac.autoneg = adapter->fc_autoneg = FALSE; ++ hw->mac.forced_speed_duplex = ADVERTISE_100_FULL; ++ hw->phy.autoneg_advertised = 0; ++ break; ++ case SPEED_1000: ++ DPRINTK(PROBE, INFO, "1000 Mbps Speed specified without " ++ "Duplex\n"); ++ goto full_duplex_only; ++ case SPEED_1000 + HALF_DUPLEX: ++ DPRINTK(PROBE, INFO, ++ "Half Duplex is not supported at 1000 Mbps\n"); ++ /* fall through */ ++ case SPEED_1000 + FULL_DUPLEX: ++full_duplex_only: ++ DPRINTK(PROBE, INFO, ++ "Using Autonegotiation at 1000 Mbps Full Duplex only\n"); ++ hw->mac.autoneg = adapter->fc_autoneg = TRUE; ++ hw->phy.autoneg_advertised = ADVERTISE_1000_FULL; ++ break; ++ default: ++ BUG(); ++ } ++ ++ /* Speed, AutoNeg and MDI/MDI-X must all play nice */ ++ if (e1000_validate_mdi_setting(&(adapter->hw)) < 0) { ++ DPRINTK(PROBE, INFO, ++ "Speed, AutoNeg and MDI-X specifications are " ++ "incompatible. Setting MDI-X to a compatible value.\n"); ++ } ++} ++ +--- linux/drivers/xenomai/net/drivers/experimental/e1000/e1000_82540.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/experimental/e1000/e1000_82540.c 2021-04-07 16:01:27.619633606 +0800 +@@ -0,0 +1,680 @@ ++/******************************************************************************* ++ ++ Intel PRO/1000 Linux driver ++ Copyright(c) 1999 - 2008 Intel Corporation. ++ ++ This program is free software; you can redistribute it and/or modify it ++ under the terms and conditions of the GNU General Public License, ++ version 2, as published by the Free Software Foundation. ++ ++ This program is distributed in the hope it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ You should have received a copy of the GNU General Public License along with ++ this program; if not, write to the Free Software Foundation, Inc., ++ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. ++ ++ The full GNU General Public License is included in this distribution in ++ the file called "COPYING". ++ ++ Contact Information: ++ Linux NICS ++ e1000-devel Mailing List ++ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 ++ ++*******************************************************************************/ ++ ++/* e1000_82540 ++ * e1000_82545 ++ * e1000_82546 ++ * e1000_82545_rev_3 ++ * e1000_82546_rev_3 ++ */ ++ ++#include "e1000_api.h" ++ ++static s32 e1000_init_phy_params_82540(struct e1000_hw *hw); ++static s32 e1000_init_nvm_params_82540(struct e1000_hw *hw); ++static s32 e1000_init_mac_params_82540(struct e1000_hw *hw); ++static s32 e1000_adjust_serdes_amplitude_82540(struct e1000_hw *hw); ++static void e1000_clear_hw_cntrs_82540(struct e1000_hw *hw); ++static s32 e1000_init_hw_82540(struct e1000_hw *hw); ++static s32 e1000_reset_hw_82540(struct e1000_hw *hw); ++static s32 e1000_set_phy_mode_82540(struct e1000_hw *hw); ++static s32 e1000_set_vco_speed_82540(struct e1000_hw *hw); ++static s32 e1000_setup_copper_link_82540(struct e1000_hw *hw); ++static s32 e1000_setup_fiber_serdes_link_82540(struct e1000_hw *hw); ++static void e1000_power_down_phy_copper_82540(struct e1000_hw *hw); ++ ++/** ++ * e1000_init_phy_params_82540 - Init PHY func ptrs. ++ * @hw: pointer to the HW structure ++ * ++ * This is a function pointer entry point called by the api module. ++ **/ ++static s32 e1000_init_phy_params_82540(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ struct e1000_functions *func = &hw->func; ++ s32 ret_val = E1000_SUCCESS; ++ ++ phy->addr = 1; ++ phy->autoneg_mask = AUTONEG_ADVERTISE_SPEED_DEFAULT; ++ phy->reset_delay_us = 10000; ++ phy->type = e1000_phy_m88; ++ ++ /* Function Pointers */ ++ func->check_polarity = e1000_check_polarity_m88; ++ func->commit_phy = e1000_phy_sw_reset_generic; ++ func->force_speed_duplex = e1000_phy_force_speed_duplex_m88; ++ func->get_cable_length = e1000_get_cable_length_m88; ++ func->get_cfg_done = e1000_get_cfg_done_generic; ++ func->read_phy_reg = e1000_read_phy_reg_m88; ++ func->reset_phy = e1000_phy_hw_reset_generic; ++ func->write_phy_reg = e1000_write_phy_reg_m88; ++ func->get_phy_info = e1000_get_phy_info_m88; ++ func->power_up_phy = e1000_power_up_phy_copper; ++ func->power_down_phy = e1000_power_down_phy_copper_82540; ++ ++ ret_val = e1000_get_phy_id(hw); ++ if (ret_val) ++ goto out; ++ ++ /* Verify phy id */ ++ switch (hw->mac.type) { ++ case e1000_82540: ++ case e1000_82545: ++ case e1000_82545_rev_3: ++ case e1000_82546: ++ case e1000_82546_rev_3: ++ if (phy->id == M88E1011_I_PHY_ID) ++ break; ++ /* Fall Through */ ++ default: ++ ret_val = -E1000_ERR_PHY; ++ goto out; ++ break; ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_init_nvm_params_82540 - Init NVM func ptrs. ++ * @hw: pointer to the HW structure ++ * ++ * This is a function pointer entry point called by the api module. ++ **/ ++static s32 e1000_init_nvm_params_82540(struct e1000_hw *hw) ++{ ++ struct e1000_nvm_info *nvm = &hw->nvm; ++ struct e1000_functions *func = &hw->func; ++ u32 eecd = E1000_READ_REG(hw, E1000_EECD); ++ ++ DEBUGFUNC("e1000_init_nvm_params_82540"); ++ ++ nvm->type = e1000_nvm_eeprom_microwire; ++ nvm->delay_usec = 50; ++ nvm->opcode_bits = 3; ++ switch (nvm->override) { ++ case e1000_nvm_override_microwire_large: ++ nvm->address_bits = 8; ++ nvm->word_size = 256; ++ break; ++ case e1000_nvm_override_microwire_small: ++ nvm->address_bits = 6; ++ nvm->word_size = 64; ++ break; ++ default: ++ nvm->address_bits = eecd & E1000_EECD_SIZE ? 8 : 6; ++ nvm->word_size = eecd & E1000_EECD_SIZE ? 256 : 64; ++ break; ++ } ++ ++ /* Function Pointers */ ++ func->acquire_nvm = e1000_acquire_nvm_generic; ++ func->read_nvm = e1000_read_nvm_microwire; ++ func->release_nvm = e1000_release_nvm_generic; ++ func->update_nvm = e1000_update_nvm_checksum_generic; ++ func->valid_led_default = e1000_valid_led_default_generic; ++ func->validate_nvm = e1000_validate_nvm_checksum_generic; ++ func->write_nvm = e1000_write_nvm_microwire; ++ ++ return E1000_SUCCESS; ++} ++ ++/** ++ * e1000_init_mac_params_82540 - Init MAC func ptrs. ++ * @hw: pointer to the HW structure ++ * ++ * This is a function pointer entry point called by the api module. ++ **/ ++static s32 e1000_init_mac_params_82540(struct e1000_hw *hw) ++{ ++ struct e1000_mac_info *mac = &hw->mac; ++ struct e1000_functions *func = &hw->func; ++ s32 ret_val = E1000_SUCCESS; ++ ++ DEBUGFUNC("e1000_init_mac_params_82540"); ++ ++ /* Set media type */ ++ switch (hw->device_id) { ++ case E1000_DEV_ID_82545EM_FIBER: ++ case E1000_DEV_ID_82545GM_FIBER: ++ case E1000_DEV_ID_82546EB_FIBER: ++ case E1000_DEV_ID_82546GB_FIBER: ++ hw->phy.media_type = e1000_media_type_fiber; ++ break; ++ case E1000_DEV_ID_82545GM_SERDES: ++ case E1000_DEV_ID_82546GB_SERDES: ++ hw->phy.media_type = e1000_media_type_internal_serdes; ++ break; ++ default: ++ hw->phy.media_type = e1000_media_type_copper; ++ break; ++ } ++ ++ /* Set mta register count */ ++ mac->mta_reg_count = 128; ++ /* Set rar entry count */ ++ mac->rar_entry_count = E1000_RAR_ENTRIES; ++ ++ /* Function pointers */ ++ ++ /* bus type/speed/width */ ++ func->get_bus_info = e1000_get_bus_info_pci_generic; ++ /* reset */ ++ func->reset_hw = e1000_reset_hw_82540; ++ /* hw initialization */ ++ func->init_hw = e1000_init_hw_82540; ++ /* link setup */ ++ func->setup_link = e1000_setup_link_generic; ++ /* physical interface setup */ ++ func->setup_physical_interface = ++ (hw->phy.media_type == e1000_media_type_copper) ++ ? e1000_setup_copper_link_82540 ++ : e1000_setup_fiber_serdes_link_82540; ++ /* check for link */ ++ switch (hw->phy.media_type) { ++ case e1000_media_type_copper: ++ func->check_for_link = e1000_check_for_copper_link_generic; ++ break; ++ case e1000_media_type_fiber: ++ func->check_for_link = e1000_check_for_fiber_link_generic; ++ break; ++ case e1000_media_type_internal_serdes: ++ func->check_for_link = e1000_check_for_serdes_link_generic; ++ break; ++ default: ++ ret_val = -E1000_ERR_CONFIG; ++ goto out; ++ break; ++ } ++ /* link info */ ++ func->get_link_up_info = ++ (hw->phy.media_type == e1000_media_type_copper) ++ ? e1000_get_speed_and_duplex_copper_generic ++ : e1000_get_speed_and_duplex_fiber_serdes_generic; ++ /* multicast address update */ ++ func->update_mc_addr_list = e1000_update_mc_addr_list_generic; ++ /* writing VFTA */ ++ func->write_vfta = e1000_write_vfta_generic; ++ /* clearing VFTA */ ++ func->clear_vfta = e1000_clear_vfta_generic; ++ /* setting MTA */ ++ func->mta_set = e1000_mta_set_generic; ++ /* setup LED */ ++ func->setup_led = e1000_setup_led_generic; ++ /* cleanup LED */ ++ func->cleanup_led = e1000_cleanup_led_generic; ++ /* turn on/off LED */ ++ func->led_on = e1000_led_on_generic; ++ func->led_off = e1000_led_off_generic; ++ /* clear hardware counters */ ++ func->clear_hw_cntrs = e1000_clear_hw_cntrs_82540; ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_init_function_pointers_82540 - Init func ptrs. ++ * @hw: pointer to the HW structure ++ * ++ * The only function explicitly called by the api module to initialize ++ * all function pointers and parameters. ++ **/ ++void e1000_init_function_pointers_82540(struct e1000_hw *hw) ++{ ++ DEBUGFUNC("e1000_init_function_pointers_82540"); ++ ++ hw->func.init_mac_params = e1000_init_mac_params_82540; ++ hw->func.init_nvm_params = e1000_init_nvm_params_82540; ++ hw->func.init_phy_params = e1000_init_phy_params_82540; ++} ++ ++/** ++ * e1000_reset_hw_82540 - Reset hardware ++ * @hw: pointer to the HW structure ++ * ++ * This resets the hardware into a known state. This is a ++ * function pointer entry point called by the api module. ++ **/ ++static s32 e1000_reset_hw_82540(struct e1000_hw *hw) ++{ ++ u32 ctrl, icr, manc; ++ s32 ret_val = E1000_SUCCESS; ++ ++ DEBUGFUNC("e1000_reset_hw_82540"); ++ ++ DEBUGOUT("Masking off all interrupts\n"); ++ E1000_WRITE_REG(hw, E1000_IMC, 0xFFFFFFFF); ++ ++ E1000_WRITE_REG(hw, E1000_RCTL, 0); ++ E1000_WRITE_REG(hw, E1000_TCTL, E1000_TCTL_PSP); ++ E1000_WRITE_FLUSH(hw); ++ ++ /* ++ * Delay to allow any outstanding PCI transactions to complete ++ * before resetting the device. ++ */ ++ msec_delay(10); ++ ++ ctrl = E1000_READ_REG(hw, E1000_CTRL); ++ ++ DEBUGOUT("Issuing a global reset to 82540/82545/82546 MAC\n"); ++ switch (hw->mac.type) { ++ case e1000_82545_rev_3: ++ case e1000_82546_rev_3: ++ E1000_WRITE_REG(hw, E1000_CTRL_DUP, ctrl | E1000_CTRL_RST); ++ break; ++ default: ++ /* ++ * These controllers can't ack the 64-bit write when ++ * issuing the reset, so we use IO-mapping as a ++ * workaround to issue the reset. ++ */ ++ E1000_WRITE_REG_IO(hw, E1000_CTRL, ctrl | E1000_CTRL_RST); ++ break; ++ } ++ ++ /* Wait for EEPROM reload */ ++ msec_delay(5); ++ ++ /* Disable HW ARPs on ASF enabled adapters */ ++ manc = E1000_READ_REG(hw, E1000_MANC); ++ manc &= ~E1000_MANC_ARP_EN; ++ E1000_WRITE_REG(hw, E1000_MANC, manc); ++ ++ E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff); ++ icr = E1000_READ_REG(hw, E1000_ICR); ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_init_hw_82540 - Initialize hardware ++ * @hw: pointer to the HW structure ++ * ++ * This inits the hardware readying it for operation. This is a ++ * function pointer entry point called by the api module. ++ **/ ++static s32 e1000_init_hw_82540(struct e1000_hw *hw) ++{ ++ struct e1000_mac_info *mac = &hw->mac; ++ u32 txdctl, ctrl_ext; ++ s32 ret_val = E1000_SUCCESS; ++ u16 i; ++ ++ DEBUGFUNC("e1000_init_hw_82540"); ++ ++ /* Initialize identification LED */ ++ ret_val = e1000_id_led_init_generic(hw); ++ if (ret_val) { ++ DEBUGOUT("Error initializing identification LED\n"); ++ /* This is not fatal and we should not stop init due to this */ ++ } ++ ++ /* Disabling VLAN filtering */ ++ DEBUGOUT("Initializing the IEEE VLAN\n"); ++ if (mac->type < e1000_82545_rev_3) ++ E1000_WRITE_REG(hw, E1000_VET, 0); ++ ++ e1000_clear_vfta(hw); ++ ++ /* Setup the receive address. */ ++ e1000_init_rx_addrs_generic(hw, mac->rar_entry_count); ++ ++ /* Zero out the Multicast HASH table */ ++ DEBUGOUT("Zeroing the MTA\n"); ++ for (i = 0; i < mac->mta_reg_count; i++) { ++ E1000_WRITE_REG_ARRAY(hw, E1000_MTA, i, 0); ++ /* ++ * Avoid back to back register writes by adding the register ++ * read (flush). This is to protect against some strange ++ * bridge configurations that may issue Memory Write Block ++ * (MWB) to our register space. The *_rev_3 hardware at ++ * least doesn't respond correctly to every other dword in an ++ * MWB to our register space. ++ */ ++ E1000_WRITE_FLUSH(hw); ++ } ++ ++ if (mac->type < e1000_82545_rev_3) ++ e1000_pcix_mmrbc_workaround_generic(hw); ++ ++ /* Setup link and flow control */ ++ ret_val = e1000_setup_link(hw); ++ ++ txdctl = E1000_READ_REG(hw, E1000_TXDCTL(0)); ++ txdctl = (txdctl & ~E1000_TXDCTL_WTHRESH) | ++ E1000_TXDCTL_FULL_TX_DESC_WB; ++ E1000_WRITE_REG(hw, E1000_TXDCTL(0), txdctl); ++ ++ /* ++ * Clear all of the statistics registers (clear on read). It is ++ * important that we do this after we have tried to establish link ++ * because the symbol error count will increment wildly if there ++ * is no link. ++ */ ++ e1000_clear_hw_cntrs_82540(hw); ++ ++ if ((hw->device_id == E1000_DEV_ID_82546GB_QUAD_COPPER) || ++ (hw->device_id == E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3)) { ++ ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT); ++ /* ++ * Relaxed ordering must be disabled to avoid a parity ++ * error crash in a PCI slot. ++ */ ++ ctrl_ext |= E1000_CTRL_EXT_RO_DIS; ++ E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext); ++ } ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_setup_copper_link_82540 - Configure copper link settings ++ * @hw: pointer to the HW structure ++ * ++ * Calls the appropriate function to configure the link for auto-neg or forced ++ * speed and duplex. Then we check for link, once link is established calls ++ * to configure collision distance and flow control are called. If link is ++ * not established, we return -E1000_ERR_PHY (-2). This is a function ++ * pointer entry point called by the api module. ++ **/ ++static s32 e1000_setup_copper_link_82540(struct e1000_hw *hw) ++{ ++ u32 ctrl; ++ s32 ret_val = E1000_SUCCESS; ++ u16 data; ++ ++ DEBUGFUNC("e1000_setup_copper_link_82540"); ++ ++ ctrl = E1000_READ_REG(hw, E1000_CTRL); ++ ctrl |= E1000_CTRL_SLU; ++ ctrl &= ~(E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX); ++ E1000_WRITE_REG(hw, E1000_CTRL, ctrl); ++ ++ ret_val = e1000_set_phy_mode_82540(hw); ++ if (ret_val) ++ goto out; ++ ++ if (hw->mac.type == e1000_82545_rev_3 || ++ hw->mac.type == e1000_82546_rev_3) { ++ ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, &data); ++ if (ret_val) ++ goto out; ++ data |= 0x00000008; ++ ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, data); ++ if (ret_val) ++ goto out; ++ } ++ ++ ret_val = e1000_copper_link_setup_m88(hw); ++ if (ret_val) ++ goto out; ++ ++ ret_val = e1000_setup_copper_link_generic(hw); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_setup_fiber_serdes_link_82540 - Setup link for fiber/serdes ++ * @hw: pointer to the HW structure ++ * ++ * Set the output amplitude to the value in the EEPROM and adjust the VCO ++ * speed to improve Bit Error Rate (BER) performance. Configures collision ++ * distance and flow control for fiber and serdes links. Upon successful ++ * setup, poll for link. This is a function pointer entry point called by ++ * the api module. ++ **/ ++static s32 e1000_setup_fiber_serdes_link_82540(struct e1000_hw *hw) ++{ ++ struct e1000_mac_info *mac = &hw->mac; ++ s32 ret_val = E1000_SUCCESS; ++ ++ DEBUGFUNC("e1000_setup_fiber_serdes_link_82540"); ++ ++ switch (mac->type) { ++ case e1000_82545_rev_3: ++ case e1000_82546_rev_3: ++ if (hw->phy.media_type == e1000_media_type_internal_serdes) { ++ /* ++ * If we're on serdes media, adjust the output ++ * amplitude to value set in the EEPROM. ++ */ ++ ret_val = e1000_adjust_serdes_amplitude_82540(hw); ++ if (ret_val) ++ goto out; ++ } ++ /* Adjust VCO speed to improve BER performance */ ++ ret_val = e1000_set_vco_speed_82540(hw); ++ if (ret_val) ++ goto out; ++ default: ++ break; ++ } ++ ++ ret_val = e1000_setup_fiber_serdes_link_generic(hw); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_adjust_serdes_amplitude_82540 - Adjust amplitude based on EEPROM ++ * @hw: pointer to the HW structure ++ * ++ * Adjust the SERDES ouput amplitude based on the EEPROM settings. ++ **/ ++static s32 e1000_adjust_serdes_amplitude_82540(struct e1000_hw *hw) ++{ ++ s32 ret_val = E1000_SUCCESS; ++ u16 nvm_data; ++ ++ DEBUGFUNC("e1000_adjust_serdes_amplitude_82540"); ++ ++ ret_val = e1000_read_nvm(hw, NVM_SERDES_AMPLITUDE, 1, &nvm_data); ++ if (ret_val) ++ goto out; ++ ++ if (nvm_data != NVM_RESERVED_WORD) { ++ /* Adjust serdes output amplitude only. */ ++ nvm_data &= NVM_SERDES_AMPLITUDE_MASK; ++ ret_val = e1000_write_phy_reg(hw, ++ M88E1000_PHY_EXT_CTRL, ++ nvm_data); ++ if (ret_val) ++ goto out; ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_set_vco_speed_82540 - Set VCO speed for better performance ++ * @hw: pointer to the HW structure ++ * ++ * Set the VCO speed to improve Bit Error Rate (BER) performance. ++ **/ ++static s32 e1000_set_vco_speed_82540(struct e1000_hw *hw) ++{ ++ s32 ret_val = E1000_SUCCESS; ++ u16 default_page = 0; ++ u16 phy_data; ++ ++ DEBUGFUNC("e1000_set_vco_speed_82540"); ++ ++ /* Set PHY register 30, page 5, bit 8 to 0 */ ++ ++ ret_val = e1000_read_phy_reg(hw, ++ M88E1000_PHY_PAGE_SELECT, ++ &default_page); ++ if (ret_val) ++ goto out; ++ ++ ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_PAGE_SELECT, 0x0005); ++ if (ret_val) ++ goto out; ++ ++ ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_GEN_CONTROL, &phy_data); ++ if (ret_val) ++ goto out; ++ ++ phy_data &= ~M88E1000_PHY_VCO_REG_BIT8; ++ ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_GEN_CONTROL, phy_data); ++ if (ret_val) ++ goto out; ++ ++ /* Set PHY register 30, page 4, bit 11 to 1 */ ++ ++ ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_PAGE_SELECT, 0x0004); ++ if (ret_val) ++ goto out; ++ ++ ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_GEN_CONTROL, &phy_data); ++ if (ret_val) ++ goto out; ++ ++ phy_data |= M88E1000_PHY_VCO_REG_BIT11; ++ ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_GEN_CONTROL, phy_data); ++ if (ret_val) ++ goto out; ++ ++ ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_PAGE_SELECT, ++ default_page); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_set_phy_mode_82540 - Set PHY to class A mode ++ * @hw: pointer to the HW structure ++ * ++ * Sets the PHY to class A mode and assumes the following operations will ++ * follow to enable the new class mode: ++ * 1. Do a PHY soft reset. ++ * 2. Restart auto-negotiation or force link. ++ **/ ++static s32 e1000_set_phy_mode_82540(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val = E1000_SUCCESS; ++ u16 nvm_data; ++ ++ DEBUGFUNC("e1000_set_phy_mode_82540"); ++ ++ if (hw->mac.type != e1000_82545_rev_3) ++ goto out; ++ ++ ret_val = e1000_read_nvm(hw, NVM_PHY_CLASS_WORD, 1, &nvm_data); ++ if (ret_val) { ++ ret_val = -E1000_ERR_PHY; ++ goto out; ++ } ++ ++ if ((nvm_data != NVM_RESERVED_WORD) && (nvm_data & NVM_PHY_CLASS_A)) { ++ ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_PAGE_SELECT, ++ 0x000B); ++ if (ret_val) { ++ ret_val = -E1000_ERR_PHY; ++ goto out; ++ } ++ ret_val = e1000_write_phy_reg(hw, ++ M88E1000_PHY_GEN_CONTROL, ++ 0x8104); ++ if (ret_val) { ++ ret_val = -E1000_ERR_PHY; ++ goto out; ++ } ++ ++ phy->reset_disable = FALSE; ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_power_down_phy_copper_82540 - Remove link in case of PHY power down ++ * @hw: pointer to the HW structure ++ * ++ * In the case of a PHY power down to save power, or to turn off link during a ++ * driver unload, or wake on lan is not enabled, remove the link. ++ **/ ++static void e1000_power_down_phy_copper_82540(struct e1000_hw *hw) ++{ ++ /* If the management interface is not enabled, then power down */ ++ if (!(E1000_READ_REG(hw, E1000_MANC) & E1000_MANC_SMBUS_EN)) ++ e1000_power_down_phy_copper(hw); ++ ++ return; ++} ++ ++/** ++ * e1000_clear_hw_cntrs_82540 - Clear device specific hardware counters ++ * @hw: pointer to the HW structure ++ * ++ * Clears the hardware counters by reading the counter registers. ++ **/ ++static void e1000_clear_hw_cntrs_82540(struct e1000_hw *hw) ++{ ++ volatile u32 temp; ++ ++ DEBUGFUNC("e1000_clear_hw_cntrs_82540"); ++ ++ e1000_clear_hw_cntrs_base_generic(hw); ++ ++ temp = E1000_READ_REG(hw, E1000_PRC64); ++ temp = E1000_READ_REG(hw, E1000_PRC127); ++ temp = E1000_READ_REG(hw, E1000_PRC255); ++ temp = E1000_READ_REG(hw, E1000_PRC511); ++ temp = E1000_READ_REG(hw, E1000_PRC1023); ++ temp = E1000_READ_REG(hw, E1000_PRC1522); ++ temp = E1000_READ_REG(hw, E1000_PTC64); ++ temp = E1000_READ_REG(hw, E1000_PTC127); ++ temp = E1000_READ_REG(hw, E1000_PTC255); ++ temp = E1000_READ_REG(hw, E1000_PTC511); ++ temp = E1000_READ_REG(hw, E1000_PTC1023); ++ temp = E1000_READ_REG(hw, E1000_PTC1522); ++ ++ temp = E1000_READ_REG(hw, E1000_ALGNERRC); ++ temp = E1000_READ_REG(hw, E1000_RXERRC); ++ temp = E1000_READ_REG(hw, E1000_TNCRS); ++ temp = E1000_READ_REG(hw, E1000_CEXTERR); ++ temp = E1000_READ_REG(hw, E1000_TSCTC); ++ temp = E1000_READ_REG(hw, E1000_TSCTFC); ++ ++ temp = E1000_READ_REG(hw, E1000_MGTPRC); ++ temp = E1000_READ_REG(hw, E1000_MGTPDC); ++ temp = E1000_READ_REG(hw, E1000_MGTPTC); ++} ++ +--- linux/drivers/xenomai/net/drivers/experimental/rt2500/rt2x00core.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/experimental/rt2500/rt2x00core.c 2021-04-07 16:01:27.614633613 +0800 +@@ -0,0 +1,444 @@ ++/* rt2x00core.c ++ * ++ * Copyright (C) 2004 - 2005 rt2x00-2.0.0-b3 SourceForge Project ++ * ++ * 2006 rtnet adaption by Daniel Gregorek ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the ++ * Free Software Foundation, Inc., ++ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++/* ++ * Module: rt2x00core ++ * Abstract: rt2x00 core routines. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++ ++#include "rt2x00.h" ++ ++#ifdef DRV_NAME ++#undef DRV_NAME ++#define DRV_NAME "rt_rt2x00core" ++#endif /* DRV_NAME */ ++ ++static int rt2x00_radio_on(struct _rt2x00_core *core); ++static int rt2x00_radio_off(struct _rt2x00_core *core); ++ ++static int cards[MAX_UNITS] = { [0 ...(MAX_UNITS - 1)] = 1 }; ++module_param_array(cards, int, NULL, 0444); ++MODULE_PARM_DESC(cards, "array of cards to be supported (e.g. 1,0,1)"); ++ ++/* ++ * Writes the pending configuration to the device ++ */ ++static void rt2x00_update_config(struct _rt2x00_core *core) ++{ ++ u16 update_flags = 0x0000; ++ ++ if (!test_bit(DEVICE_ENABLED, &core->flags) && ++ !test_bit(DEVICE_RADIO_ON, &core->flags)) ++ return; ++ ++ if (test_and_set_bit(DEVICE_CONFIG_UPDATE, &core->flags)) ++ return; ++ ++ update_flags = core->config.update_flags; ++ core->config.update_flags = 0; ++ ++ if (likely(update_flags)) ++ core->handler->dev_update_config(core, update_flags); ++ ++ clear_bit(DEVICE_CONFIG_UPDATE, &core->flags); ++} ++ ++/* ++ * Radio control. ++ */ ++static int rt2x00_radio_on(struct _rt2x00_core *core) ++{ ++ int status = 0x00000000; ++ ++ if (test_bit(DEVICE_RADIO_ON, &core->flags)) { ++ WARNING("Radio already on.\n"); ++ return -ENOTCONN; ++ } ++ ++ status = core->handler->dev_radio_on(core); ++ if (status) ++ return status; ++ ++ set_bit(DEVICE_RADIO_ON, &core->flags); ++ ++ return 0; ++} ++ ++static int rt2x00_radio_off(struct _rt2x00_core *core) ++{ ++ if (!test_and_clear_bit(DEVICE_RADIO_ON, &core->flags)) { ++ WARNING("Radio already off.\n"); ++ return -ENOTCONN; ++ } ++ ++ core->handler->dev_radio_off(core); ++ ++ return 0; ++} ++ ++/* ++ * user space io handler ++ */ ++static int rt2x00_ioctl(struct rtnet_device *rtnet_dev, struct ifreq *ifr, ++ int request) ++{ ++ struct rtwlan_device *rtwlan_dev = rtnetdev_priv(rtnet_dev); ++ struct _rt2x00_core *core = rtwlan_priv(rtwlan_dev); ++ struct rtwlan_cmd *cmd; ++ u8 rate, dsss_rate, ofdm_rate; ++ u32 address, value; ++ ++ cmd = (struct rtwlan_cmd *)ifr->ifr_data; ++ ++ switch (request) { ++ case IOC_RTWLAN_IFINFO: ++ cmd->args.info.bitrate = core->config.bitrate; ++ cmd->args.info.channel = core->config.channel; ++ cmd->args.info.retry = core->config.short_retry; ++ cmd->args.info.txpower = core->config.txpower; ++ cmd->args.info.bbpsens = core->config.bbpsens; ++ cmd->args.info.mode = core->rtwlan_dev->mode; ++ cmd->args.info.rx_packets = core->rtwlan_dev->stats.rx_packets; ++ cmd->args.info.tx_packets = core->rtwlan_dev->stats.tx_packets; ++ cmd->args.info.tx_retry = core->rtwlan_dev->stats.tx_retry; ++ cmd->args.info.autoresponder = ++ core->config.config_flags & CONFIG_AUTORESP ? 1 : 0; ++ cmd->args.info.dropbcast = ++ core->config.config_flags & CONFIG_DROP_BCAST ? 1 : 0; ++ cmd->args.info.dropmcast = ++ core->config.config_flags & CONFIG_DROP_MCAST ? 1 : 0; ++ DEBUG("rtwlan_dev->mode=%d\n", rtwlan_dev->mode); ++ break; ++ case IOC_RTWLAN_BITRATE: ++ rate = cmd->args.set.bitrate; ++ ofdm_rate = ieee80211_is_ofdm_rate(rate); ++ dsss_rate = ieee80211_is_dsss_rate(rate); ++ DEBUG("bitrate=%d\n", rate); ++ if (!(dsss_rate ^ ofdm_rate)) ++ NOTICE("Rate %d is not DSSS and not OFDM.\n", rate); ++ core->config.bitrate = rate; ++ core->config.update_flags |= UPDATE_BITRATE; ++ break; ++ case IOC_RTWLAN_CHANNEL: ++ DEBUG("channel=%d\n", cmd->args.set.channel); ++ core->config.channel = cmd->args.set.channel; ++ core->config.update_flags |= UPDATE_CHANNEL; ++ break; ++ case IOC_RTWLAN_RETRY: ++ core->config.short_retry = cmd->args.set.retry; ++ core->config.update_flags |= UPDATE_RETRY; ++ break; ++ case IOC_RTWLAN_TXPOWER: ++ core->config.txpower = cmd->args.set.txpower; ++ core->config.update_flags |= UPDATE_TXPOWER; ++ break; ++ case IOC_RTWLAN_AUTORESP: ++ if (cmd->args.set.autoresponder) ++ core->config.config_flags |= CONFIG_AUTORESP; ++ else ++ core->config.config_flags &= ~CONFIG_AUTORESP; ++ core->config.update_flags |= UPDATE_AUTORESP; ++ break; ++ case IOC_RTWLAN_DROPBCAST: ++ if (cmd->args.set.dropbcast) ++ core->config.config_flags |= CONFIG_DROP_BCAST; ++ else ++ core->config.config_flags &= ~CONFIG_DROP_BCAST; ++ core->config.update_flags |= UPDATE_PACKET_FILTER; ++ break; ++ case IOC_RTWLAN_DROPMCAST: ++ if (cmd->args.set.dropmcast) ++ core->config.config_flags |= CONFIG_DROP_MCAST; ++ else ++ core->config.config_flags &= ~CONFIG_DROP_MCAST; ++ core->config.update_flags |= UPDATE_PACKET_FILTER; ++ break; ++ case IOC_RTWLAN_TXMODE: ++ core->rtwlan_dev->mode = cmd->args.set.mode; ++ break; ++ case IOC_RTWLAN_BBPSENS: ++ value = cmd->args.set.bbpsens; ++ if (value < 0) ++ value = 0; ++ if (value > 127) ++ value = 127; ++ core->config.bbpsens = value; ++ core->config.update_flags |= UPDATE_BBPSENS; ++ break; ++ case IOC_RTWLAN_REGREAD: ++ case IOC_RTWLAN_BBPREAD: ++ address = cmd->args.reg.address; ++ core->handler->dev_register_access(core, request, address, ++ &value); ++ cmd->args.reg.value = value; ++ break; ++ case IOC_RTWLAN_REGWRITE: ++ case IOC_RTWLAN_BBPWRITE: ++ address = cmd->args.reg.address; ++ value = cmd->args.reg.value; ++ core->handler->dev_register_access(core, request, address, ++ &value); ++ break; ++ default: ++ ERROR("Unknown request!\n"); ++ return -1; ++ } ++ ++ if (request != IOC_RTWLAN_IFINFO) ++ rt2x00_update_config(core); ++ ++ return 0; ++} ++ ++/* ++ * TX/RX related routines. ++ */ ++static int rt2x00_start_xmit(struct rtskb *rtskb, ++ struct rtnet_device *rtnet_dev) ++{ ++ struct rtwlan_device *rtwlan_dev = rtnetdev_priv(rtnet_dev); ++ struct _rt2x00_core *core = rtwlan_priv(rtwlan_dev); ++ u16 xmit_flags = 0x0000; ++ u8 rate = 0x00; ++ ++ if (unlikely(rtskb)) { ++ rate = core->config.bitrate; ++ if (ieee80211_is_ofdm_rate(rate)) ++ xmit_flags |= XMIT_OFDM; ++ ++ /* Check if the packet should be acknowledged */ ++ if (core->rtwlan_dev->mode == RTWLAN_TXMODE_ACK) ++ xmit_flags |= XMIT_ACK; ++ ++ if (core->handler->dev_xmit_packet(core, rtskb, rate, ++ xmit_flags)) ++ ERROR("Packet dropped !"); ++ ++ dev_kfree_rtskb(rtskb); ++ } ++ ++ return 0; ++} ++ ++/*** ++ * rt2x00_open ++ * @rtdev ++ */ ++static int rt2x00_open(struct rtnet_device *rtnet_dev) ++{ ++ struct rtwlan_device *rtwlan_dev = rtnetdev_priv(rtnet_dev); ++ struct _rt2x00_core *core = rtwlan_priv(rtwlan_dev); ++ int status = 0x00000000; ++ ++ DEBUG("Start.\n"); ++ ++ if (test_and_set_bit(DEVICE_ENABLED, &core->flags)) { ++ ERROR("device already enabled.\n"); ++ return -EBUSY; ++ } ++ ++ /* ++ * Start rtnet interface. ++ */ ++ rt_stack_connect(rtnet_dev, &STACK_manager); ++ ++ status = rt2x00_radio_on(core); ++ if (status) { ++ clear_bit(DEVICE_ENABLED, &core->flags); ++ ERROR("Couldn't activate radio.\n"); ++ return status; ++ } ++ ++ core->config.led_status = 1; ++ core->config.update_flags |= UPDATE_LED_STATUS; ++ rt2x00_update_config(core); ++ ++ rtnetif_start_queue(rtnet_dev); ++ ++ DEBUG("Exit success.\n"); ++ ++ return 0; ++} ++ ++/*** ++ * rt2x00_close ++ * @rtdev ++ */ ++static int rt2x00_close(struct rtnet_device *rtnet_dev) ++{ ++ struct rtwlan_device *rtwlan_dev = rtnetdev_priv(rtnet_dev); ++ struct _rt2x00_core *core = rtwlan_priv(rtwlan_dev); ++ ++ DEBUG("Start.\n"); ++ ++ if (!test_and_clear_bit(DEVICE_ENABLED, &core->flags)) { ++ ERROR("device already disabled.\n"); ++ return -EBUSY; ++ } ++ ++ rt2x00_radio_off(core); ++ ++ rtnetif_stop_queue(rtnet_dev); ++ rt_stack_disconnect(rtnet_dev); ++ ++ return 0; ++} ++ ++/* ++ * Initialization handlers. ++ */ ++static void rt2x00_init_config(struct _rt2x00_core *core) ++{ ++ DEBUG("Start.\n"); ++ ++ memset(&core->config.bssid, '\0', sizeof(core->config.bssid)); ++ ++ core->config.channel = 1; ++ core->config.bitrate = capabilities.bitrate[0]; ++ core->config.bbpsens = 50; ++ core->config.config_flags = 0; ++ core->config.config_flags |= ++ CONFIG_DROP_BCAST | CONFIG_DROP_MCAST | CONFIG_AUTORESP; ++ core->config.short_retry = 4; ++ core->config.long_retry = 7; ++ core->config.txpower = 100; ++ core->config.plcp = 48; ++ core->config.sifs = 10; ++ core->config.slot_time = 20; ++ core->rtwlan_dev->mode = RTWLAN_TXMODE_RAW; ++ core->config.update_flags = UPDATE_ALL_CONFIG; ++} ++ ++struct rtnet_device *rt2x00_core_probe(struct _rt2x00_dev_handler *handler, ++ void *priv, u32 sizeof_dev) ++{ ++ struct rtnet_device *rtnet_dev = NULL; ++ struct _rt2x00_core *core = NULL; ++ struct rtwlan_device *rtwlan_dev = NULL; ++ static int cards_found = -1; ++ int err; ++ ++ DEBUG("Start.\n"); ++ ++ cards_found++; ++ if (cards[cards_found] == 0) ++ goto exit; ++ ++ rtnet_dev = ++ rtwlan_alloc_dev(sizeof_dev + sizeof(*core), RX_ENTRIES * 2); ++ if (!rtnet_dev) ++ goto exit; ++ ++ rt_rtdev_connect(rtnet_dev, &RTDEV_manager); ++ rtnet_dev->vers = RTDEV_VERS_2_0; ++ ++ rtwlan_dev = rtnetdev_priv(rtnet_dev); ++ memset(rtwlan_dev, 0x00, sizeof(*rtwlan_dev)); ++ ++ core = rtwlan_priv(rtwlan_dev); ++ memset(core, 0x00, sizeof(*core)); ++ ++ core->rtwlan_dev = rtwlan_dev; ++ core->handler = handler; ++ core->priv = (void *)core + sizeof(*core); ++ core->rtnet_dev = rtnet_dev; ++ ++ /* Set configuration default values. */ ++ rt2x00_init_config(core); ++ ++ if (core->handler->dev_probe && core->handler->dev_probe(core, priv)) { ++ ERROR("device probe failed.\n"); ++ goto exit; ++ } ++ INFO("Device " MAC_FMT " detected.\n", MAC_ARG(rtnet_dev->dev_addr)); ++ ++ rtwlan_dev->hard_start_xmit = rt2x00_start_xmit; ++ ++ rtnet_dev->open = &rt2x00_open; ++ rtnet_dev->stop = &rt2x00_close; ++ rtnet_dev->do_ioctl = &rt2x00_ioctl; ++ rtnet_dev->hard_header = &rt_eth_header; ++ ++ if ((err = rt_register_rtnetdev(rtnet_dev)) != 0) { ++ rtdev_free(rtnet_dev); ++ ERROR("rtnet_device registration failed.\n"); ++ printk("err=%d\n", err); ++ goto exit_dev_remove; ++ } ++ ++ set_bit(DEVICE_AWAKE, &core->flags); ++ ++ return rtnet_dev; ++ ++exit_dev_remove: ++ if (core->handler->dev_remove) ++ core->handler->dev_remove(core); ++ ++exit: ++ return NULL; ++} ++EXPORT_SYMBOL_GPL(rt2x00_core_probe); ++ ++void rt2x00_core_remove(struct rtnet_device *rtnet_dev) ++{ ++ rt_unregister_rtnetdev(rtnet_dev); ++ rt_rtdev_disconnect(rtnet_dev); ++ ++ rtdev_free(rtnet_dev); ++} ++EXPORT_SYMBOL_GPL(rt2x00_core_remove); ++ ++/* ++ * RT2x00 core module information. ++ */ ++static char version[] = DRV_NAME " - " DRV_VERSION; ++ ++MODULE_AUTHOR(DRV_AUTHOR); ++MODULE_DESCRIPTION("RTnet rt2500 PCI WLAN driver (Core Module)"); ++MODULE_LICENSE("GPL"); ++ ++static int __init rt2x00_core_init(void) ++{ ++ printk(KERN_INFO "Loading module: %s\n", version); ++ return 0; ++} ++ ++static void __exit rt2x00_core_exit(void) ++{ ++ printk(KERN_INFO "Unloading module: %s\n", version); ++} ++ ++module_init(rt2x00_core_init); ++module_exit(rt2x00_core_exit); +--- linux/drivers/xenomai/net/drivers/experimental/rt2500/rt2500pci.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/experimental/rt2500/rt2500pci.c 2021-04-07 16:01:27.610633619 +0800 +@@ -0,0 +1,1274 @@ ++/* rt2500pci.c ++ * ++ * Copyright (C) 2004 - 2005 rt2x00-2.0.0-b3 SourceForge Project ++ * ++ * 2006 rtnet adaption by Daniel Gregorek ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the ++ * Free Software Foundation, Inc., ++ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++/* ++ * Module: rt_rt2500pci ++ * Abstract: rt2500pci device specific routines. ++ * Supported chipsets: RT2560. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++#include "rt2x00.h" ++#include "rt2500pci.h" ++ ++#include ++ ++#ifdef DRV_NAME ++#undef DRV_NAME ++#define DRV_NAME "rt_rt2500pci" ++#endif /* DRV_NAME */ ++ ++/* handler for direct register access from core module */ ++static int rt2x00_dev_register_access(struct _rt2x00_core *core, int request, ++ u32 address, u32 *value) ++{ ++ struct _rt2x00_pci *rt2x00pci = rt2x00_priv(core); ++ u8 u8_value; ++ ++ switch (request) { ++ case IOC_RTWLAN_REGREAD: ++ rt2x00_register_read(rt2x00pci, address, value); ++ break; ++ case IOC_RTWLAN_REGWRITE: ++ rt2x00_register_write(rt2x00pci, address, *value); ++ break; ++ case IOC_RTWLAN_BBPREAD: ++ rt2x00_bbp_regread(rt2x00pci, address, &u8_value); ++ *value = u8_value; ++ break; ++ case IOC_RTWLAN_BBPWRITE: ++ rt2x00_bbp_regwrite(rt2x00pci, address, *value); ++ break; ++ default: ++ return -1; ++ } ++ ++ return 0; ++} ++ ++/* ++ * Interrupt routines. ++ * rt2x00_interrupt_txdone processes all transmitted packetss results. ++ * rt2x00_interrupt_rxdone processes all received rx packets. ++ */ ++static void rt2x00_interrupt_txdone(struct _data_ring *ring) ++{ ++ struct rtwlan_device *rtwlan_dev = rtnetdev_priv(ring->core->rtnet_dev); ++ struct _txd *txd = NULL; ++ u8 tx_result = 0x00; ++ /* u8 retry_count = 0x00; */ ++ ++ do { ++ txd = DESC_ADDR_DONE(ring); ++ ++ if (rt2x00_get_field32(txd->word0, TXD_W0_OWNER_NIC) || ++ !rt2x00_get_field32(txd->word0, TXD_W0_VALID)) ++ break; ++ ++ if (ring->ring_type == RING_TX) { ++ tx_result = ++ rt2x00_get_field32(txd->word0, TXD_W0_RESULT); ++ /* retry_count = rt2x00_get_field32(txd->word0, TXD_W0_RETRY_COUNT); */ ++ ++ switch (tx_result) { ++ case TX_SUCCESS: ++ rtwlan_dev->stats.tx_packets++; ++ break; ++ case TX_SUCCESS_RETRY: ++ rtwlan_dev->stats.tx_retry++; ++ break; ++ case TX_FAIL_RETRY: ++ DEBUG("TX_FAIL_RETRY.\n"); ++ break; ++ case TX_FAIL_INVALID: ++ DEBUG("TX_FAIL_INVALID.\n"); ++ break; ++ case TX_FAIL_OTHER: ++ DEBUG("TX_FAIL_OTHER.\n"); ++ break; ++ default: ++ DEBUG("Unknown tx result.\n"); ++ } ++ } ++ ++ rt2x00_set_field32(&txd->word0, TXD_W0_VALID, 0); ++ ++ rt2x00_ring_index_done_inc(ring); ++ } while (!rt2x00_ring_empty(ring)); ++} ++ ++static void rt2x00_interrupt_rxdone(struct _data_ring *ring, ++ nanosecs_abs_t *time_stamp) ++{ ++ struct _rt2x00_pci *rt2x00pci = rt2x00_priv(ring->core); ++ struct rtnet_device *rtnet_dev = ring->core->rtnet_dev; ++ struct rtwlan_device *rtwlan_dev = rtnetdev_priv(rtnet_dev); ++ struct _rxd *rxd = NULL; ++ struct rtskb *rtskb; ++ void *data = NULL; ++ u16 size = 0x0000; ++ /* u16 rssi = 0x0000; */ ++ ++ while (1) { ++ rxd = DESC_ADDR(ring); ++ data = DATA_ADDR(ring); ++ ++ if (rt2x00_get_field32(rxd->word0, RXD_W0_OWNER_NIC)) ++ break; ++ ++ size = rt2x00_get_field32(rxd->word0, RXD_W0_DATABYTE_COUNT); ++ /* rssi = rt2x00_get_field32(rxd->word2, RXD_W2_RSSI); */ ++ ++ /* prepare rtskb */ ++ rtskb = rtnetdev_alloc_rtskb(rtnet_dev, size + NET_IP_ALIGN); ++ if (!rtskb) { ++ ERROR("Couldn't allocate rtskb, packet dropped.\n"); ++ break; ++ } ++ rtskb->time_stamp = *time_stamp; ++ rtskb_reserve(rtskb, NET_IP_ALIGN); ++ ++ memcpy(rtskb->data, data, size); ++ rtskb_put(rtskb, size); ++ ++ /* give incoming frame to rtwlan stack */ ++ rtwlan_rx(rtskb, rtnet_dev); ++ ++ rtwlan_dev->stats.rx_packets++; ++ ++ rt2x00_set_field32(&rxd->word0, RXD_W0_OWNER_NIC, 1); ++ rt2x00_ring_index_inc(&rt2x00pci->rx); ++ } ++} ++ ++int rt2x00_interrupt(rtdm_irq_t *irq_handle) ++{ ++ nanosecs_abs_t time_stamp = rtdm_clock_read(); ++ ++ struct rtnet_device *rtnet_dev = ++ rtdm_irq_get_arg(irq_handle, struct rtnet_device); ++ struct rtwlan_device *rtwlan_dev = rtnetdev_priv(rtnet_dev); ++ struct _rt2x00_core *core = rtwlan_priv(rtwlan_dev); ++ struct _rt2x00_pci *rt2x00pci = rt2x00_priv(core); ++ unsigned int old_packet_cnt = rtwlan_dev->stats.rx_packets; ++ u32 reg = 0x00000000; ++ ++ rtdm_lock_get(&rt2x00pci->lock); ++ ++ rt2x00_register_read(rt2x00pci, CSR7, ®); ++ rt2x00_register_write(rt2x00pci, CSR7, reg); ++ ++ if (!reg) { ++ rtdm_lock_put(&rt2x00pci->lock); ++ return RTDM_IRQ_NONE; ++ } ++ ++ if (rt2x00_get_field32( ++ reg, ++ CSR7_TBCN_EXPIRE)) /* Beacon timer expired interrupt. */ ++ DEBUG("Beacon timer expired.\n"); ++ if (rt2x00_get_field32(reg, CSR7_RXDONE)) /* Rx ring done interrupt. */ ++ rt2x00_interrupt_rxdone(&rt2x00pci->rx, &time_stamp); ++ if (rt2x00_get_field32( ++ reg, ++ CSR7_TXDONE_ATIMRING)) /* Atim ring transmit done interrupt. */ ++ DEBUG("AtimTxDone.\n"); ++ if (rt2x00_get_field32( ++ reg, ++ CSR7_TXDONE_PRIORING)) /* Priority ring transmit done interrupt. */ ++ DEBUG("PrioTxDone.\n"); ++ if (rt2x00_get_field32( ++ reg, ++ CSR7_TXDONE_TXRING)) /* Tx ring transmit done interrupt. */ ++ rt2x00_interrupt_txdone(&rt2x00pci->tx); ++ ++ rtdm_lock_put(&rt2x00pci->lock); ++ ++ if (old_packet_cnt != rtwlan_dev->stats.rx_packets) ++ rt_mark_stack_mgr(rtnet_dev); ++ ++ return RTDM_IRQ_HANDLED; ++} ++ ++void rt2x00_init_eeprom(struct _rt2x00_pci *rt2x00pci, ++ struct _rt2x00_config *config) ++{ ++ u32 reg = 0x00000000; ++ u16 eeprom = 0x0000; ++ ++ /* ++ * 1 - Detect EEPROM width. ++ */ ++ rt2x00_register_read(rt2x00pci, CSR21, ®); ++ rt2x00pci->eeprom_width = rt2x00_get_field32(reg, CSR21_TYPE_93C46) ? ++ EEPROM_WIDTH_93c46 : ++ EEPROM_WIDTH_93c66; ++ ++ /* ++ * 2 - Identify rf chipset. ++ */ ++ eeprom = rt2x00_eeprom_read_word(rt2x00pci, EEPROM_ANTENNA); ++ set_chip(&rt2x00pci->chip, RT2560, ++ rt2x00_get_field16(eeprom, EEPROM_ANTENNA_RF_TYPE)); ++ ++ /* ++ * 3 - Identify default antenna configuration. ++ */ ++ config->antenna_tx = ++ rt2x00_get_field16(eeprom, EEPROM_ANTENNA_TX_DEFAULT); ++ config->antenna_rx = ++ rt2x00_get_field16(eeprom, EEPROM_ANTENNA_RX_DEFAULT); ++ ++ DEBUG("antenna_tx=%d antenna_rx=%d\n", config->antenna_tx, ++ config->antenna_rx); ++ ++ /* ++ * 4 - Read BBP data from EEPROM and store in private structure. ++ */ ++ memset(&rt2x00pci->eeprom, 0x00, sizeof(rt2x00pci->eeprom)); ++ for (eeprom = 0; eeprom < EEPROM_BBP_SIZE; eeprom++) ++ rt2x00pci->eeprom[eeprom] = rt2x00_eeprom_read_word( ++ rt2x00pci, EEPROM_BBP_START + eeprom); ++} ++ ++void rt2x00_dev_read_mac(struct _rt2x00_pci *rt2x00pci, ++ struct rtnet_device *rtnet_dev) ++{ ++ u32 reg[2]; ++ ++ memset(®, 0x00, sizeof(reg)); ++ ++ rt2x00_register_multiread(rt2x00pci, CSR3, ®[0], sizeof(reg)); ++ ++ rtnet_dev->dev_addr[0] = rt2x00_get_field32(reg[0], CSR3_BYTE0); ++ rtnet_dev->dev_addr[1] = rt2x00_get_field32(reg[0], CSR3_BYTE1); ++ rtnet_dev->dev_addr[2] = rt2x00_get_field32(reg[0], CSR3_BYTE2); ++ rtnet_dev->dev_addr[3] = rt2x00_get_field32(reg[0], CSR3_BYTE3); ++ rtnet_dev->dev_addr[4] = rt2x00_get_field32(reg[1], CSR4_BYTE4); ++ rtnet_dev->dev_addr[5] = rt2x00_get_field32(reg[1], CSR4_BYTE5); ++ ++ rtnet_dev->addr_len = 6; ++} ++ ++int rt2x00_dev_probe(struct _rt2x00_core *core, void *priv) ++{ ++ struct pci_dev *pci_dev = (struct pci_dev *)priv; ++ struct _rt2x00_pci *rt2x00pci = core->priv; ++ ++ memset(rt2x00pci, 0x00, sizeof(*rt2x00pci)); ++ ++ if (unlikely(!pci_dev)) { ++ ERROR("invalid priv pointer.\n"); ++ return -ENODEV; ++ } ++ rt2x00pci->pci_dev = pci_dev; ++ ++ rt2x00pci->rx.data_addr = NULL; ++ rt2x00pci->tx.data_addr = NULL; ++ ++ rt2x00pci->csr_addr = ioremap(pci_resource_start(pci_dev, 0), ++ pci_resource_len(pci_dev, 0)); ++ if (!rt2x00pci->csr_addr) { ++ ERROR("ioremap failed.\n"); ++ return -ENOMEM; ++ } ++ ++ rt2x00_init_eeprom(rt2x00pci, &core->config); ++ rt2x00_dev_read_mac(rt2x00pci, core->rtnet_dev); ++ ++ return 0; ++} ++ ++int rt2x00_dev_remove(struct _rt2x00_core *core) ++{ ++ struct _rt2x00_pci *rt2x00pci = rt2x00_priv(core); ++ ++ if (rt2x00pci->csr_addr) { ++ iounmap(rt2x00pci->csr_addr); ++ rt2x00pci->csr_addr = NULL; ++ } ++ ++ return 0; ++} ++ ++/* ++ * rt2x00_clear_ring ++ * During the initialization some of the descriptor variables are filled in. ++ * The default value of the owner variable is different between the types of the descriptor, ++ * DMA ring entries that receive packets are owned by the device untill a packet is received. ++ * DMA ring entries that are used to transmit a packet are owned by the module untill the device, ++ * for these rings the valid bit is set to 0 to indicate it is ready for use. ++ * should transmit the packet that particular DMA ring entry. ++ * The BUFFER_ADDRESS variable is used to link a descriptor to a packet data block. ++ */ ++static void rt2x00_clear_ring(struct _rt2x00_pci *rt2x00pci, ++ struct _data_ring *ring) ++{ ++ struct _rxd *rxd = NULL; ++ struct _txd *txd = NULL; ++ dma_addr_t data_dma = ++ ring->data_dma + (ring->max_entries * ring->desc_size); ++ u8 counter = 0x00; ++ ++ memset(ring->data_addr, 0x00, ring->mem_size); ++ ++ for (; counter < ring->max_entries; counter++) { ++ if (ring->ring_type == RING_RX) { ++ rxd = (struct _rxd *)__DESC_ADDR(ring, counter); ++ ++ rt2x00_set_field32(&rxd->word1, RXD_W1_BUFFER_ADDRESS, ++ data_dma); ++ rt2x00_set_field32(&rxd->word0, RXD_W0_OWNER_NIC, 1); ++ } else { ++ txd = (struct _txd *)__DESC_ADDR(ring, counter); ++ ++ rt2x00_set_field32(&txd->word1, TXD_W1_BUFFER_ADDRESS, ++ data_dma); ++ rt2x00_set_field32(&txd->word0, TXD_W0_VALID, 0); ++ rt2x00_set_field32(&txd->word0, TXD_W0_OWNER_NIC, 0); ++ } ++ ++ data_dma += ring->entry_size; ++ } ++ ++ rt2x00_ring_clear_index(ring); ++} ++ ++/* ++ * rt2x00_init_ring_register ++ * The registers should be updated with the descriptor size and the ++ * number of entries of each ring. ++ * The address of the first entry of the descriptor ring is written to the register ++ * corresponding to the ring. ++ */ ++static void rt2x00_init_ring_register(struct _rt2x00_pci *rt2x00pci) ++{ ++ u32 reg = 0x00000000; ++ ++ /* Initialize ring register for RX/TX */ ++ ++ rt2x00_set_field32(®, TXCSR2_TXD_SIZE, rt2x00pci->tx.desc_size); ++ rt2x00_set_field32(®, TXCSR2_NUM_TXD, rt2x00pci->tx.max_entries); ++ rt2x00_register_write(rt2x00pci, TXCSR2, reg); ++ ++ reg = 0x00000000; ++ rt2x00_set_field32(®, TXCSR3_TX_RING_REGISTER, ++ rt2x00pci->tx.data_dma); ++ rt2x00_register_write(rt2x00pci, TXCSR3, reg); ++ ++ reg = 0x00000000; ++ rt2x00_set_field32(®, RXCSR1_RXD_SIZE, rt2x00pci->rx.desc_size); ++ rt2x00_set_field32(®, RXCSR1_NUM_RXD, rt2x00pci->rx.max_entries); ++ rt2x00_register_write(rt2x00pci, RXCSR1, reg); ++ ++ reg = 0x00000000; ++ rt2x00_set_field32(®, RXCSR2_RX_RING_REGISTER, ++ rt2x00pci->rx.data_dma); ++ rt2x00_register_write(rt2x00pci, RXCSR2, reg); ++} ++ ++static int rt2x00_init_registers(struct _rt2x00_pci *rt2x00pci) ++{ ++ u32 reg = 0x00000000; ++ ++ DEBUG("Start.\n"); ++ ++ rt2x00_register_write(rt2x00pci, PWRCSR0, cpu_to_le32(0x3f3b3100)); ++ ++ rt2x00_register_write(rt2x00pci, PSCSR0, cpu_to_le32(0x00020002)); ++ rt2x00_register_write(rt2x00pci, PSCSR1, cpu_to_le32(0x00000002)); ++ rt2x00_register_write(rt2x00pci, PSCSR2, cpu_to_le32(0x00020002)); ++ rt2x00_register_write(rt2x00pci, PSCSR3, cpu_to_le32(0x00000002)); ++ ++ rt2x00_register_read(rt2x00pci, TIMECSR, ®); ++ rt2x00_set_field32(®, TIMECSR_US_COUNT, 33); ++ rt2x00_set_field32(®, TIMECSR_US_64_COUNT, 63); ++ rt2x00_set_field32(®, TIMECSR_BEACON_EXPECT, 0); ++ rt2x00_register_write(rt2x00pci, TIMECSR, reg); ++ ++ rt2x00_register_read(rt2x00pci, CSR9, ®); ++ rt2x00_set_field32(®, CSR9_MAX_FRAME_UNIT, ++ (rt2x00pci->rx.entry_size / 128)); ++ rt2x00_register_write(rt2x00pci, CSR9, reg); ++ ++ rt2x00_register_write(rt2x00pci, CNT3, cpu_to_le32(0x3f080000)); ++ ++ rt2x00_register_read(rt2x00pci, RXCSR0, ®); ++ rt2x00_set_field32(®, RXCSR0_DISABLE_RX, 0); ++ rt2x00_set_field32(®, RXCSR0_DROP_CONTROL, 0); ++ rt2x00_register_write(rt2x00pci, RXCSR0, reg); ++ ++ rt2x00_register_write(rt2x00pci, MACCSR0, cpu_to_le32(0x00213223)); ++ ++ rt2x00_register_read(rt2x00pci, MACCSR1, ®); ++ rt2x00_set_field32(®, MACCSR1_AUTO_TXBBP, 1); ++ rt2x00_set_field32(®, MACCSR1_AUTO_RXBBP, 1); ++ rt2x00_register_write(rt2x00pci, MACCSR1, reg); ++ ++ rt2x00_register_read(rt2x00pci, MACCSR2, ®); ++ rt2x00_set_field32(®, MACCSR2_DELAY, 64); ++ rt2x00_register_write(rt2x00pci, MACCSR2, reg); ++ ++ rt2x00_register_read(rt2x00pci, RXCSR3, ®); ++ rt2x00_set_field32(®, RXCSR3_BBP_ID0, 47); /* Signal. */ ++ rt2x00_set_field32(®, RXCSR3_BBP_ID0_VALID, 1); ++ rt2x00_set_field32(®, RXCSR3_BBP_ID1, 51); /* Rssi. */ ++ rt2x00_set_field32(®, RXCSR3_BBP_ID1_VALID, 1); ++ rt2x00_set_field32(®, RXCSR3_BBP_ID2, 42); /* OFDM Rate. */ ++ rt2x00_set_field32(®, RXCSR3_BBP_ID2_VALID, 1); ++ rt2x00_set_field32(®, RXCSR3_BBP_ID3, 51); /* OFDM. */ ++ rt2x00_set_field32(®, RXCSR3_BBP_ID3_VALID, 1); ++ rt2x00_register_write(rt2x00pci, RXCSR3, reg); ++ ++ rt2x00_register_read(rt2x00pci, RALINKCSR, ®); ++ rt2x00_set_field32(®, RALINKCSR_AR_BBP_DATA0, 17); ++ rt2x00_set_field32(®, RALINKCSR_AR_BBP_ID0, 26); ++ rt2x00_set_field32(®, RALINKCSR_AR_BBP_VALID0, 1); ++ rt2x00_set_field32(®, RALINKCSR_AR_BBP_DATA1, 0); ++ rt2x00_set_field32(®, RALINKCSR_AR_BBP_ID1, 26); ++ rt2x00_set_field32(®, RALINKCSR_AR_BBP_VALID1, 1); ++ rt2x00_register_write(rt2x00pci, RALINKCSR, reg); ++ ++ rt2x00_register_write(rt2x00pci, BBPCSR1, cpu_to_le32(0x82188200)); ++ ++ rt2x00_register_write(rt2x00pci, TXACKCSR0, cpu_to_le32(0x00000020)); ++ ++ rt2x00_register_write(rt2x00pci, ARTCSR0, cpu_to_le32(0x7038140a)); ++ rt2x00_register_write(rt2x00pci, ARTCSR1, cpu_to_le32(0x1d21252d)); ++ rt2x00_register_write(rt2x00pci, ARTCSR2, cpu_to_le32(0x1919191d)); ++ ++ /* disable Beacon timer */ ++ rt2x00_register_write(rt2x00pci, CSR14, 0x0); ++ ++ reg = 0x00000000; ++ rt2x00_set_field32(®, LEDCSR_ON_PERIOD, 30); ++ rt2x00_set_field32(®, LEDCSR_OFF_PERIOD, 70); ++ rt2x00_set_field32(®, LEDCSR_LINK, 0); ++ rt2x00_set_field32(®, LEDCSR_ACTIVITY, 0); ++ rt2x00_register_write(rt2x00pci, LEDCSR, reg); ++ ++ reg = 0x00000000; ++ rt2x00_set_field32(®, CSR1_SOFT_RESET, 1); ++ rt2x00_register_write(rt2x00pci, CSR1, reg); ++ ++ reg = 0x00000000; ++ rt2x00_set_field32(®, CSR1_HOST_READY, 1); ++ rt2x00_register_write(rt2x00pci, CSR1, reg); ++ ++ /* ++ * We must clear the FCS and FIFI error count. ++ * These registers are cleared on read, so we may pass a useless variable to store the value. ++ */ ++ rt2x00_register_read(rt2x00pci, CNT0, ®); ++ rt2x00_register_read(rt2x00pci, CNT4, ®); ++ ++ return 0; ++} ++ ++static void rt2x00_init_write_mac(struct _rt2x00_pci *rt2x00pci, ++ struct rtnet_device *rtnet_dev) ++{ ++ u32 reg[2]; ++ ++ memset(®, 0x00, sizeof(reg)); ++ ++ rt2x00_set_field32(®[0], CSR3_BYTE0, rtnet_dev->dev_addr[0]); ++ rt2x00_set_field32(®[0], CSR3_BYTE1, rtnet_dev->dev_addr[1]); ++ rt2x00_set_field32(®[0], CSR3_BYTE2, rtnet_dev->dev_addr[2]); ++ rt2x00_set_field32(®[0], CSR3_BYTE3, rtnet_dev->dev_addr[3]); ++ rt2x00_set_field32(®[1], CSR4_BYTE4, rtnet_dev->dev_addr[4]); ++ rt2x00_set_field32(®[1], CSR4_BYTE5, rtnet_dev->dev_addr[5]); ++ ++ rt2x00_register_multiwrite(rt2x00pci, CSR3, ®[0], sizeof(reg)); ++} ++ ++static int rt2x00_init_bbp(struct _rt2x00_pci *rt2x00pci) ++{ ++ u8 reg_id = 0x00; ++ u8 value = 0x00; ++ u8 counter = 0x00; ++ ++ for (counter = 0x00; counter < REGISTER_BUSY_COUNT; counter++) { ++ rt2x00_bbp_regread(rt2x00pci, 0x00, &value); ++ if ((value != 0xff) && (value != 0x00)) ++ goto continue_csr_init; ++ NOTICE("Waiting for BBP register.\n"); ++ } ++ ++ ERROR("hardware problem, BBP register access failed, aborting.\n"); ++ return -EACCES; ++ ++continue_csr_init: ++ rt2x00_bbp_regwrite(rt2x00pci, 3, 0x02); ++ rt2x00_bbp_regwrite(rt2x00pci, 4, 0x19); ++ rt2x00_bbp_regwrite(rt2x00pci, 14, 0x1c); ++ rt2x00_bbp_regwrite(rt2x00pci, 15, 0x30); ++ rt2x00_bbp_regwrite(rt2x00pci, 16, 0xac); ++ rt2x00_bbp_regwrite(rt2x00pci, 17, 0x48); ++ rt2x00_bbp_regwrite(rt2x00pci, 18, 0x18); ++ rt2x00_bbp_regwrite(rt2x00pci, 19, 0xff); ++ rt2x00_bbp_regwrite(rt2x00pci, 20, 0x1e); ++ rt2x00_bbp_regwrite(rt2x00pci, 21, 0x08); ++ rt2x00_bbp_regwrite(rt2x00pci, 22, 0x08); ++ rt2x00_bbp_regwrite(rt2x00pci, 23, 0x08); ++ rt2x00_bbp_regwrite(rt2x00pci, 24, 0x70); ++ rt2x00_bbp_regwrite(rt2x00pci, 25, 0x40); ++ rt2x00_bbp_regwrite(rt2x00pci, 26, 0x08); ++ rt2x00_bbp_regwrite(rt2x00pci, 27, 0x23); ++ rt2x00_bbp_regwrite(rt2x00pci, 30, 0x10); ++ rt2x00_bbp_regwrite(rt2x00pci, 31, 0x2b); ++ rt2x00_bbp_regwrite(rt2x00pci, 32, 0xb9); ++ rt2x00_bbp_regwrite(rt2x00pci, 34, 0x12); ++ rt2x00_bbp_regwrite(rt2x00pci, 35, 0x50); ++ rt2x00_bbp_regwrite(rt2x00pci, 39, 0xc4); ++ rt2x00_bbp_regwrite(rt2x00pci, 40, 0x02); ++ rt2x00_bbp_regwrite(rt2x00pci, 41, 0x60); ++ rt2x00_bbp_regwrite(rt2x00pci, 53, 0x10); ++ rt2x00_bbp_regwrite(rt2x00pci, 54, 0x18); ++ rt2x00_bbp_regwrite(rt2x00pci, 56, 0x08); ++ rt2x00_bbp_regwrite(rt2x00pci, 57, 0x10); ++ rt2x00_bbp_regwrite(rt2x00pci, 58, 0x08); ++ rt2x00_bbp_regwrite(rt2x00pci, 61, 0x6d); ++ rt2x00_bbp_regwrite(rt2x00pci, 62, 0x10); ++ ++ DEBUG("Start reading EEPROM contents...\n"); ++ for (counter = 0; counter < EEPROM_BBP_SIZE; counter++) { ++ if (rt2x00pci->eeprom[counter] != 0xffff && ++ rt2x00pci->eeprom[counter] != 0x0000) { ++ reg_id = rt2x00_get_field16(rt2x00pci->eeprom[counter], ++ EEPROM_BBP_REG_ID); ++ value = rt2x00_get_field16(rt2x00pci->eeprom[counter], ++ EEPROM_BBP_VALUE); ++ DEBUG("BBP reg_id: 0x%02x, value: 0x%02x.\n", reg_id, ++ value); ++ rt2x00_bbp_regwrite(rt2x00pci, reg_id, value); ++ } ++ } ++ DEBUG("...End of EEPROM contents.\n"); ++ ++ return 0; ++} ++ ++/* ++ * Device radio routines. ++ * When the radio is switched on or off, the TX and RX ++ * should always be reset using the TXCSR0 and RXCSR0 registers. ++ * The radio itself is switched on and off using the PWRCSR0 register. ++ */ ++ ++static int rt2x00_dev_radio_on(struct _rt2x00_core *core) ++{ ++ struct _rt2x00_pci *rt2x00pci = rt2x00_priv(core); ++ u32 reg = 0x00000000; ++ int retval; ++ ++ if (rt2x00_pci_alloc_rings(core)) ++ goto exit_fail; ++ ++ rt2x00_clear_ring(rt2x00pci, &rt2x00pci->rx); ++ rt2x00_clear_ring(rt2x00pci, &rt2x00pci->tx); ++ ++ rt2x00_init_ring_register(rt2x00pci); ++ ++ if (rt2x00_init_registers(rt2x00pci)) ++ goto exit_fail; ++ ++ rt2x00_init_write_mac(rt2x00pci, core->rtnet_dev); ++ ++ if (rt2x00_init_bbp(rt2x00pci)) ++ goto exit_fail; ++ ++ /* ++ * Clear interrupts. ++ */ ++ rt2x00_register_read(rt2x00pci, CSR7, ®); ++ rt2x00_register_write(rt2x00pci, CSR7, reg); ++ ++ /* Register rtdm-irq */ ++ retval = rtdm_irq_request(&rt2x00pci->irq_handle, core->rtnet_dev->irq, ++ rt2x00_interrupt, 0, core->rtnet_dev->name, ++ core->rtnet_dev); ++ ++ /* ++ * Enable interrupts. ++ */ ++ rt2x00_register_read(rt2x00pci, CSR8, ®); ++ rt2x00_set_field32(®, CSR8_TBCN_EXPIRE, 0); ++ rt2x00_set_field32(®, CSR8_TXDONE_TXRING, 0); ++ rt2x00_set_field32(®, CSR8_TXDONE_ATIMRING, 0); ++ rt2x00_set_field32(®, CSR8_TXDONE_PRIORING, 0); ++ rt2x00_set_field32(®, CSR8_RXDONE, 0); ++ rt2x00_register_write(rt2x00pci, CSR8, reg); ++ ++ return 0; ++ ++exit_fail: ++ rt2x00_pci_free_rings(core); ++ ++ return -ENOMEM; ++} ++ ++static int rt2x00_dev_radio_off(struct _rt2x00_core *core) ++{ ++ struct _rt2x00_pci *rt2x00pci = rt2x00_priv(core); ++ u32 reg = 0x00000000; ++ int retval = 0; ++ ++ rt2x00_register_write(rt2x00pci, PWRCSR0, cpu_to_le32(0x00000000)); ++ ++ rt2x00_register_read(rt2x00pci, TXCSR0, ®); ++ rt2x00_set_field32(®, TXCSR0_ABORT, 1); ++ rt2x00_register_write(rt2x00pci, TXCSR0, reg); ++ ++ rt2x00_register_read(rt2x00pci, RXCSR0, ®); ++ rt2x00_set_field32(®, RXCSR0_DISABLE_RX, 1); ++ rt2x00_register_write(rt2x00pci, RXCSR0, reg); ++ ++ rt2x00_register_read(rt2x00pci, LEDCSR, ®); ++ rt2x00_set_field32(®, LEDCSR_LINK, 0); ++ rt2x00_register_write(rt2x00pci, LEDCSR, reg); ++ ++ rt2x00_register_read(rt2x00pci, CSR8, ®); ++ rt2x00_set_field32(®, CSR8_TBCN_EXPIRE, 1); ++ rt2x00_set_field32(®, CSR8_TXDONE_TXRING, 1); ++ rt2x00_set_field32(®, CSR8_TXDONE_ATIMRING, 1); ++ rt2x00_set_field32(®, CSR8_TXDONE_PRIORING, 1); ++ rt2x00_set_field32(®, CSR8_RXDONE, 1); ++ rt2x00_register_write(rt2x00pci, CSR8, reg); ++ ++ rt2x00_pci_free_rings(core); ++ ++ if ((retval = rtdm_irq_free(&rt2x00pci->irq_handle)) != 0) ++ ERROR("rtdm_irq_free=%d\n", retval); ++ ++ rt_stack_disconnect(core->rtnet_dev); ++ ++ return retval; ++} ++ ++/* ++ * Configuration handlers. ++ */ ++ ++static void rt2x00_dev_update_autoresp(struct _rt2x00_pci *rt2x00pci, ++ struct _rt2x00_config *config) ++{ ++ u32 reg = 0; ++ ++ DEBUG("Start.\n"); ++ ++ rt2x00_register_read(rt2x00pci, TXCSR1, ®); ++ ++ if (config->config_flags & CONFIG_AUTORESP) ++ rt2x00_set_field32(®, TXCSR1_AUTORESPONDER, 1); ++ else ++ rt2x00_set_field32(®, TXCSR1_AUTORESPONDER, 0); ++ ++ rt2x00_register_write(rt2x00pci, TXCSR1, reg); ++} ++ ++static void rt2x00_dev_update_bbpsens(struct _rt2x00_pci *rt2x00pci, ++ struct _rt2x00_config *config) ++{ ++ rt2x00_bbp_regwrite(rt2x00pci, 0x11, config->bbpsens); ++} ++ ++static void rt2x00_dev_update_bssid(struct _rt2x00_pci *rt2x00pci, ++ struct _rt2x00_config *config) ++{ ++ u32 reg[2]; ++ ++ memset(®, 0x00, sizeof(reg)); ++ ++ rt2x00_set_field32(®[0], CSR5_BYTE0, config->bssid[0]); ++ rt2x00_set_field32(®[0], CSR5_BYTE1, config->bssid[1]); ++ rt2x00_set_field32(®[0], CSR5_BYTE2, config->bssid[2]); ++ rt2x00_set_field32(®[0], CSR5_BYTE3, config->bssid[3]); ++ rt2x00_set_field32(®[1], CSR6_BYTE4, config->bssid[4]); ++ rt2x00_set_field32(®[1], CSR6_BYTE5, config->bssid[5]); ++ ++ rt2x00_register_multiwrite(rt2x00pci, CSR5, ®[0], sizeof(reg)); ++} ++ ++static void rt2x00_dev_update_packet_filter(struct _rt2x00_pci *rt2x00pci, ++ struct _rt2x00_config *config) ++{ ++ u32 reg = 0x00000000; ++ ++ DEBUG("Start.\n"); ++ ++ rt2x00_register_read(rt2x00pci, RXCSR0, ®); ++ ++ rt2x00_set_field32(®, RXCSR0_DROP_TODS, 0); ++ rt2x00_set_field32(®, RXCSR0_DROP_NOT_TO_ME, 1); ++ rt2x00_set_field32(®, RXCSR0_DROP_CRC, 1); ++ rt2x00_set_field32(®, RXCSR0_DROP_PHYSICAL, 1); ++ rt2x00_set_field32(®, RXCSR0_DROP_CONTROL, 1); ++ rt2x00_set_field32(®, RXCSR0_DROP_VERSION_ERROR, 1); ++ rt2x00_set_field32(®, RXCSR0_DROP_NOT_TO_ME, 1); ++ ++ /* ++ * This looks like a bug, but for an unknown reason the register seems to swap the bits !!! ++ */ ++ if (config->config_flags & CONFIG_DROP_BCAST) ++ rt2x00_set_field32(®, RXCSR0_DROP_MCAST, 1); ++ else ++ rt2x00_set_field32(®, RXCSR0_DROP_MCAST, 0); ++ ++ if (config->config_flags & CONFIG_DROP_MCAST) ++ rt2x00_set_field32(®, RXCSR0_DROP_BCAST, 1); ++ else ++ rt2x00_set_field32(®, RXCSR0_DROP_BCAST, 0); ++ ++ rt2x00_register_write(rt2x00pci, RXCSR0, reg); ++} ++ ++static void rt2x00_dev_update_channel(struct _rt2x00_pci *rt2x00pci, ++ struct _rt2x00_config *config) ++{ ++ u8 txpower = rt2x00_get_txpower(&rt2x00pci->chip, config->txpower); ++ u32 reg = 0x00000000; ++ ++ if (rt2x00_get_rf_value(&rt2x00pci->chip, config->channel, ++ &rt2x00pci->channel)) { ++ ERROR("RF values for chip %04x and channel %d not found.\n", ++ rt2x00_get_rf(&rt2x00pci->chip), config->channel); ++ return; ++ } ++ ++ /* ++ * Set TXpower. ++ */ ++ rt2x00_set_field32(&rt2x00pci->channel.rf3, RF3_TXPOWER, txpower); ++ ++ /* ++ * For RT2525 we should first set the channel to half band higher. ++ */ ++ if (rt2x00_rf(&rt2x00pci->chip, RF2525)) { ++ rt2x00_rf_regwrite(rt2x00pci, rt2x00pci->channel.rf1); ++ rt2x00_rf_regwrite(rt2x00pci, rt2x00pci->channel.rf2 + ++ cpu_to_le32(0x00000020)); ++ rt2x00_rf_regwrite(rt2x00pci, rt2x00pci->channel.rf3); ++ if (rt2x00pci->channel.rf4) ++ rt2x00_rf_regwrite(rt2x00pci, rt2x00pci->channel.rf4); ++ } ++ ++ rt2x00_rf_regwrite(rt2x00pci, rt2x00pci->channel.rf1); ++ rt2x00_rf_regwrite(rt2x00pci, rt2x00pci->channel.rf2); ++ rt2x00_rf_regwrite(rt2x00pci, rt2x00pci->channel.rf3); ++ if (rt2x00pci->channel.rf4) ++ rt2x00_rf_regwrite(rt2x00pci, rt2x00pci->channel.rf4); ++ ++ /* ++ * Channel 14 requires the Japan filter bit to be set. ++ */ ++ rt2x00_bbp_regwrite(rt2x00pci, 70, ++ (config->channel == 14) ? 0x4e : 0x46); ++ ++ msleep(1); ++ ++ /* ++ * Clear false CRC during channel switch. ++ */ ++ rt2x00_register_read(rt2x00pci, CNT0, ®); ++ ++ DEBUG("Switching to channel %d. RF1: 0x%08x, RF2: 0x%08x, RF3: 0x%08x, RF4: 0x%08x.\n", ++ config->channel, rt2x00pci->channel.rf1, rt2x00pci->channel.rf2, ++ rt2x00pci->channel.rf3, rt2x00pci->channel.rf4); ++} ++ ++static void rt2x00_dev_update_rate(struct _rt2x00_pci *rt2x00pci, ++ struct _rt2x00_config *config) ++{ ++ u32 value = 0x00000000; ++ u32 reg = 0x00000000; ++ u8 counter = 0x00; ++ ++ DEBUG("Start.\n"); ++ ++ rt2x00_register_read(rt2x00pci, TXCSR1, ®); ++ ++ value = config->sifs + (2 * config->slot_time) + config->plcp + ++ get_preamble(config) + ++ get_duration(ACK_SIZE, capabilities.bitrate[0]); ++ rt2x00_set_field32(®, TXCSR1_ACK_TIMEOUT, value); ++ ++ value = config->sifs + config->plcp + get_preamble(config) + ++ get_duration(ACK_SIZE, capabilities.bitrate[0]); ++ rt2x00_set_field32(®, TXCSR1_ACK_CONSUME_TIME, value); ++ ++ rt2x00_set_field32(®, TXCSR1_TSF_OFFSET, 0x18); ++ rt2x00_set_field32(®, TXCSR1_AUTORESPONDER, 1); ++ ++ rt2x00_register_write(rt2x00pci, TXCSR1, reg); ++ ++ reg = 0x00000000; ++ for (counter = 0; counter < 12; counter++) { ++ reg |= cpu_to_le32(0x00000001 << counter); ++ if (capabilities.bitrate[counter] == config->bitrate) ++ break; ++ } ++ ++ rt2x00_register_write(rt2x00pci, ARCSR1, reg); ++} ++ ++static void rt2x00_dev_update_txpower(struct _rt2x00_pci *rt2x00pci, ++ struct _rt2x00_config *config) ++{ ++ u8 txpower = rt2x00_get_txpower(&rt2x00pci->chip, config->txpower); ++ ++ DEBUG("Start.\n"); ++ ++ rt2x00_set_field32(&rt2x00pci->channel.rf3, RF3_TXPOWER, txpower); ++ rt2x00_rf_regwrite(rt2x00pci, rt2x00pci->channel.rf3); ++} ++ ++static void rt2x00_dev_update_antenna(struct _rt2x00_pci *rt2x00pci, ++ struct _rt2x00_config *config) ++{ ++ u32 reg; ++ u8 reg_rx; ++ u8 reg_tx; ++ ++ rt2x00_register_read(rt2x00pci, BBPCSR1, ®); ++ rt2x00_bbp_regread(rt2x00pci, 14, ®_rx); ++ rt2x00_bbp_regread(rt2x00pci, 2, ®_tx); ++ ++ /* TX antenna select */ ++ if (config->antenna_tx == 1) { ++ /* Antenna A */ ++ reg_tx = (reg_tx & 0xfc) | 0x00; ++ reg = (reg & 0xfffcfffc) | 0x00; ++ } else if (config->antenna_tx == 2) { ++ /* Antenna B */ ++ reg_tx = (reg_tx & 0xfc) | 0x02; ++ reg = (reg & 0xfffcfffc) | 0x00020002; ++ } else { ++ /* Diversity */ ++ reg_tx = (reg_tx & 0xfc) | 0x02; ++ reg = (reg & 0xfffcfffc) | 0x00020002; ++ } ++ ++ /* RX antenna select */ ++ if (config->antenna_rx == 1) ++ reg_rx = (reg_rx & 0xfc) | 0x00; ++ else if (config->antenna_rx == 2) ++ reg_rx = (reg_rx & 0xfc) | 0x02; ++ else ++ reg_rx = (reg_rx & 0xfc) | 0x02; ++ ++ /* ++ * RT2525E and RT5222 need to flip I/Q ++ */ ++ if (rt2x00_rf(&rt2x00pci->chip, RF5222)) { ++ reg |= 0x00040004; ++ reg_tx |= 0x04; ++ } else if (rt2x00_rf(&rt2x00pci->chip, RF2525E)) { ++ reg |= 0x00040004; ++ reg_tx |= 0x04; ++ reg_rx |= 0xfb; ++ } ++ ++ rt2x00_register_write(rt2x00pci, BBPCSR1, reg); ++ rt2x00_bbp_regwrite(rt2x00pci, 14, reg_rx); ++ rt2x00_bbp_regwrite(rt2x00pci, 2, reg_tx); ++} ++ ++static void rt2x00_dev_update_duration(struct _rt2x00_pci *rt2x00pci, ++ struct _rt2x00_config *config) ++{ ++ u32 reg = 0x00000000; ++ ++ DEBUG("Start.\n"); ++ ++ rt2x00_register_read(rt2x00pci, CSR11, ®); ++ rt2x00_set_field32(®, CSR11_CWMIN, 5); /* 2^5 = 32. */ ++ rt2x00_set_field32(®, CSR11_CWMAX, 10); /* 2^10 = 1024. */ ++ rt2x00_set_field32(®, CSR11_SLOT_TIME, config->slot_time); ++ rt2x00_set_field32(®, CSR11_CW_SELECT, 1); ++ rt2x00_register_write(rt2x00pci, CSR11, reg); ++ ++ rt2x00_register_read(rt2x00pci, CSR18, ®); ++ rt2x00_set_field32(®, CSR18_SIFS, config->sifs); ++ rt2x00_set_field32(®, CSR18_PIFS, config->sifs + config->slot_time); ++ rt2x00_register_write(rt2x00pci, CSR18, reg); ++ ++ rt2x00_register_read(rt2x00pci, CSR19, ®); ++ rt2x00_set_field32(®, CSR19_DIFS, ++ config->sifs + (2 * config->slot_time)); ++ rt2x00_set_field32(®, CSR19_EIFS, ++ config->sifs + ++ get_duration((IEEE80211_HEADER + ACK_SIZE), ++ capabilities.bitrate[0])); ++ rt2x00_register_write(rt2x00pci, CSR19, reg); ++} ++ ++static void rt2x00_dev_update_retry(struct _rt2x00_pci *rt2x00pci, ++ struct _rt2x00_config *config) ++{ ++ u32 reg = 0x00000000; ++ ++ rt2x00_register_read(rt2x00pci, CSR11, ®); ++ rt2x00_set_field32(®, CSR11_LONG_RETRY, config->long_retry); ++ rt2x00_set_field32(®, CSR11_SHORT_RETRY, config->short_retry); ++ rt2x00_register_write(rt2x00pci, CSR11, reg); ++} ++ ++static void rt2x00_dev_update_preamble(struct _rt2x00_pci *rt2x00pci, ++ struct _rt2x00_config *config) ++{ ++ u32 reg[4]; ++ u32 preamble = 0x00000000; ++ ++ memset(®, 0x00, sizeof(reg)); ++ ++ reg[0] = cpu_to_le32(0x00700400 | preamble); /* ARCSR2 */ ++ reg[1] = cpu_to_le32(0x00380401 | preamble); /* ARCSR3 */ ++ reg[2] = cpu_to_le32(0x00150402 | preamble); /* ARCSR4 */ ++ reg[3] = cpu_to_le32(0x000b8403 | preamble); /* ARCSR5 */ ++ ++ rt2x00_register_multiwrite(rt2x00pci, ARCSR2, ®[0], sizeof(reg)); ++} ++ ++static void rt2x00_dev_update_led(struct _rt2x00_pci *rt2x00pci, ++ struct _rt2x00_config *config) ++{ ++ u32 reg = 0x00000000; ++ ++ rt2x00_register_read(rt2x00pci, LEDCSR, ®); ++ rt2x00_set_field32(®, LEDCSR_LINK, config->led_status ? 1 : 0); ++ rt2x00_register_write(rt2x00pci, LEDCSR, reg); ++} ++ ++static int rt2x00_dev_update_config(struct _rt2x00_core *core, u16 update_flags) ++{ ++ struct _rt2x00_pci *rt2x00pci = rt2x00_priv(core); ++ ++ DEBUG("Start.\n"); ++ ++ if (update_flags & UPDATE_BSSID) ++ rt2x00_dev_update_bssid(rt2x00pci, &core->config); ++ ++ if (update_flags & UPDATE_PACKET_FILTER) ++ rt2x00_dev_update_packet_filter(rt2x00pci, &core->config); ++ ++ if (update_flags & UPDATE_CHANNEL) ++ rt2x00_dev_update_channel(rt2x00pci, &core->config); ++ ++ if (update_flags & UPDATE_BITRATE) ++ rt2x00_dev_update_rate(rt2x00pci, &core->config); ++ ++ if (update_flags & UPDATE_TXPOWER) ++ rt2x00_dev_update_txpower(rt2x00pci, &core->config); ++ ++ if (update_flags & UPDATE_ANTENNA) ++ rt2x00_dev_update_antenna(rt2x00pci, &core->config); ++ ++ if (update_flags & UPDATE_DURATION) ++ rt2x00_dev_update_duration(rt2x00pci, &core->config); ++ ++ if (update_flags & UPDATE_RETRY) ++ rt2x00_dev_update_retry(rt2x00pci, &core->config); ++ ++ if (update_flags & UPDATE_PREAMBLE) ++ rt2x00_dev_update_preamble(rt2x00pci, &core->config); ++ ++ if (update_flags & UPDATE_LED_STATUS) ++ rt2x00_dev_update_led(rt2x00pci, &core->config); ++ ++ if (update_flags & UPDATE_AUTORESP) ++ rt2x00_dev_update_autoresp(rt2x00pci, &core->config); ++ ++ if (update_flags & UPDATE_BBPSENS) ++ rt2x00_dev_update_bbpsens(rt2x00pci, &core->config); ++ ++ DEBUG("Exit.\n"); ++ ++ return 0; ++} ++ ++/* ++ * Transmission routines. ++ * rt2x00_write_tx_desc will write the txd descriptor. ++ * rt2x00_dev_xmit_packet will copy the packets to the appropriate DMA ring. ++ */ ++ ++/* ++ * PLCP_SIGNAL, PLCP_SERVICE, PLCP_LENGTH_LOW and PLCP_LENGTH_HIGH are BBP registers. ++ * For RT2460 devices we need, besides the value we want to write, ++ * also set the busy bit (0x8000) and the register number (0x0f00). ++ * The value we want to write is stored in 0x00ff. ++ * For PLCP_SIGNAL we can optionally enable SHORT_PREAMBLE. ++ * For PLCP_SERVICE we can set the length extension bit according to ++ * 802.11b standard 18.2.3.5. ++ */ ++static void rt2x00_write_tx_desc(struct _rt2x00_pci *rt2x00pci, ++ struct _txd *txd, u32 packet_size, u16 rate, ++ u16 xmit_flags) ++{ ++ u32 residual = 0x00000000; ++ u32 duration = 0x00000000; ++ u16 signal = 0x0000; ++ u16 service = 0x0000; ++ u16 length_low = 0x0000; ++ u16 length_high = 0x0000; ++ ++ rt2x00_set_field32(&txd->word0, TXD_W0_VALID, 1); ++ rt2x00_set_field32(&txd->word0, TXD_W0_DATABYTE_COUNT, packet_size); ++ rt2x00_set_field32(&txd->word0, TXD_W0_ACK, ++ (xmit_flags & XMIT_ACK) ? 1 : 0); ++ rt2x00_set_field32(&txd->word0, TXD_W0_RETRY_MODE, ++ (xmit_flags & XMIT_LONG_RETRY) ? 1 : 0); ++ rt2x00_set_field32(&txd->word0, TXD_W0_TIMESTAMP, ++ (xmit_flags & XMIT_TIMESTAMP) ? 1 : 0); ++ rt2x00_set_field32(&txd->word0, TXD_W0_MORE_FRAG, ++ (xmit_flags & XMIT_MORE_FRAGS) ? 1 : 0); ++ rt2x00_set_field32(&txd->word0, TXD_W0_MORE_FRAG, ++ (xmit_flags & XMIT_RTS) ? 1 : 0); ++ rt2x00_set_field32(&txd->word10, TXD_W10_RTS, ++ (xmit_flags & XMIT_RTS) ? 1 : 0); ++ rt2x00_set_field32(&txd->word0, TXD_W0_OFDM, ++ (xmit_flags & XMIT_OFDM) ? 1 : 0); ++ ++ packet_size += 4; ++ ++ if (xmit_flags & XMIT_OFDM) { ++ /* ++ * convert length to microseconds. ++ */ ++ length_high = (packet_size >> 6) & 0x3f; ++ length_low = (packet_size & 0x3f); ++ } else { ++ residual = get_duration_res(packet_size, rate); ++ duration = get_duration(packet_size, rate); ++ ++ if (residual != 0) ++ duration++; ++ ++ length_high = duration >> 8; ++ length_low = duration & 0xff; ++ } ++ ++ signal |= 0x8500 | rt2x00_get_plcp(rate); ++ if (xmit_flags & XMIT_SHORT_PREAMBLE) ++ signal |= 0x0008; ++ ++ service |= 0x0600 | 0x0004; ++ if (residual <= (8 % 11)) ++ service |= 0x0080; ++ ++ rt2x00_set_field32(&txd->word3, TXD_W3_PLCP_SIGNAL, signal); ++ rt2x00_set_field32(&txd->word3, TXD_W3_PLCP_SERVICE, service); ++ rt2x00_set_field32(&txd->word3, TXD_W3_PLCP_LENGTH_LOW, length_low); ++ rt2x00_set_field32(&txd->word3, TXD_W3_PLCP_LENGTH_HIGH, length_high); ++ ++ /* set XMIT_IFS to XMIT_IFS_NONE */ ++ rt2x00_set_field32(&txd->word0, TXD_W0_IFS, XMIT_IFS_NONE); ++ ++ /* highest priority */ ++ rt2x00_set_field32(&txd->word2, TXD_W2_CWMIN, 1); ++ rt2x00_set_field32(&txd->word2, TXD_W2_CWMAX, 2); ++ rt2x00_set_field32(&txd->word2, TXD_W2_AIFS, 1); ++ ++ /* ++ * set this last, after this the device can start transmitting the packet. ++ */ ++ rt2x00_set_field32(&txd->word0, TXD_W0_OWNER_NIC, 1); ++} ++ ++static int rt2x00_dev_xmit_packet(struct _rt2x00_core *core, ++ struct rtskb *rtskb, u16 rate, u16 xmit_flags) ++{ ++ struct _rt2x00_pci *rt2x00pci = rt2x00_priv(core); ++ struct _data_ring *ring = NULL; ++ struct _txd *txd = NULL; ++ void *data = NULL; ++ u32 reg = 0x00000000; ++ rtdm_lockctx_t context; ++ ++ rtdm_lock_get_irqsave(&rt2x00pci->lock, context); ++ ++ /* load tx-control register */ ++ rt2x00_register_read(rt2x00pci, TXCSR0, ®); ++ ++ /* select tx-descriptor ring and prepare xmit */ ++ ring = &rt2x00pci->tx; ++ rt2x00_set_field32(®, TXCSR0_KICK_TX, 1); ++ ++ txd = DESC_ADDR(ring); ++ data = DATA_ADDR(ring); ++ ++ if (rt2x00_get_field32(txd->word0, TXD_W0_OWNER_NIC) || ++ rt2x00_get_field32(txd->word0, TXD_W0_VALID)) { ++ rtdm_lock_put_irqrestore(&rt2x00pci->lock, context); ++ return -ENOMEM; ++ } ++ ++ /* get and patch time stamp just before the transmission */ ++ if (rtskb->xmit_stamp) ++ *rtskb->xmit_stamp = ++ cpu_to_be64(rtdm_clock_read() + *rtskb->xmit_stamp); ++ ++ /* copy rtskb to dma */ ++ memcpy(data, rtskb->data, rtskb->len); ++ ++ rt2x00_write_tx_desc(rt2x00pci, txd, rtskb->len, rate, xmit_flags); ++ rt2x00_ring_index_inc(ring); ++ ++ /* let the device do the rest ... */ ++ rt2x00_register_write(rt2x00pci, TXCSR0, reg); ++ ++ rtdm_lock_put_irqrestore(&rt2x00pci->lock, context); ++ ++ return 0; ++} ++ ++/* ++ * PCI device handlers for usage by core module. ++ */ ++static struct _rt2x00_dev_handler rt2x00_pci_handler = { ++ ++ .dev_module = THIS_MODULE, ++ .dev_probe = rt2x00_dev_probe, ++ .dev_remove = rt2x00_dev_remove, ++ .dev_radio_on = rt2x00_dev_radio_on, ++ .dev_radio_off = rt2x00_dev_radio_off, ++ .dev_update_config = rt2x00_dev_update_config, ++ .dev_register_access = rt2x00_dev_register_access, ++ .dev_xmit_packet = rt2x00_dev_xmit_packet, ++}; ++ ++int rt2x00_pci_probe(struct pci_dev *pci_dev, const struct pci_device_id *id) ++{ ++ struct rtnet_device *rtnet_dev = NULL; ++ int status = 0x00000000; ++ ++ DEBUG("start.\n"); ++ ++ if (id->driver_data != RT2560) { ++ ERROR("detected device not supported.\n"); ++ status = -ENODEV; ++ goto exit; ++ } ++ ++ if (pci_enable_device(pci_dev)) { ++ ERROR("enable device failed.\n"); ++ status = -EIO; ++ goto exit; ++ } ++ ++ pci_set_master(pci_dev); ++ ++ if (pci_set_dma_mask(pci_dev, DMA_BIT_MASK(64)) && ++ pci_set_dma_mask(pci_dev, DMA_BIT_MASK(32))) { ++ ERROR("PCI DMA not supported\n"); ++ status = -EIO; ++ goto exit_disable_device; ++ } ++ ++ if (pci_request_regions(pci_dev, pci_name(pci_dev))) { ++ ERROR("PCI request regions failed.\n"); ++ status = -EBUSY; ++ goto exit_disable_device; ++ } ++ INFO("pci_dev->irq=%d\n", pci_dev->irq); ++ ++ rtnet_dev = rt2x00_core_probe(&rt2x00_pci_handler, pci_dev, ++ sizeof(struct _rt2x00_pci)); ++ ++ if (!rtnet_dev) { ++ ERROR("rtnet_device allocation failed.\n"); ++ status = -ENOMEM; ++ goto exit_release_regions; ++ } ++ ++ rtnet_dev->irq = pci_dev->irq; ++ ++ pci_set_drvdata(pci_dev, rtnet_dev); ++ ++ return 0; ++ ++exit_release_regions: ++ pci_release_regions(pci_dev); ++ ++exit_disable_device: ++ if (status != -EBUSY) ++ pci_disable_device(pci_dev); ++ ++exit: ++ return status; ++} ++ ++static void rt2x00_pci_remove(struct pci_dev *pci_dev) ++{ ++ struct rtnet_device *rtnet_dev = pci_get_drvdata(pci_dev); ++ ++ rt2x00_core_remove(rtnet_dev); ++ pci_set_drvdata(pci_dev, NULL); ++ pci_release_regions(pci_dev); ++ pci_disable_device(pci_dev); ++} ++ ++/* ++ * RT2500 PCI module information. ++ */ ++char version[] = DRV_NAME " - " DRV_VERSION; ++ ++struct pci_device_id rt2x00_device_pci_tbl[] = { ++ { PCI_DEVICE(0x1814, 0x0201), ++ .driver_data = RT2560 }, /* Ralink 802.11g */ ++ { ++ 0, ++ } ++}; ++ ++MODULE_AUTHOR(DRV_AUTHOR); ++MODULE_DESCRIPTION("RTnet rt2500 PCI WLAN driver (PCI Module)"); ++MODULE_LICENSE("GPL"); ++ ++struct pci_driver rt2x00_pci_driver = { ++ .name = DRV_NAME, ++ .id_table = rt2x00_device_pci_tbl, ++ .probe = rt2x00_pci_probe, ++ .remove = rt2x00_pci_remove, ++}; ++ ++static int __init rt2x00_pci_init(void) ++{ ++ rtdm_printk(KERN_INFO "Loading module: %s\n", version); ++ return pci_register_driver(&rt2x00_pci_driver); ++} ++ ++static void __exit rt2x00_pci_exit(void) ++{ ++ rtdm_printk(KERN_INFO "Unloading module: %s\n", version); ++ pci_unregister_driver(&rt2x00_pci_driver); ++} ++ ++module_init(rt2x00_pci_init); ++module_exit(rt2x00_pci_exit); +--- linux/drivers/xenomai/net/drivers/experimental/rt2500/Makefile 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/experimental/rt2500/Makefile 2021-04-07 16:01:27.605633626 +0800 +@@ -0,0 +1,6 @@ ++ccflags-y += -Idrivers/xenomai/net/stack/include ++ ++obj-$(CONFIG_XENO_DRIVERS_NET_DRV_RT2500) += rt_rt2x00core.o rt_rt2500pci.o ++ ++rt_rt2x00core-y := rt2x00core.o ++rt_rt2500pci-y := rt2500pci.o +--- linux/drivers/xenomai/net/drivers/experimental/rt2500/rt2x00.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/experimental/rt2500/rt2x00.h 2021-04-07 16:01:27.600633633 +0800 +@@ -0,0 +1,649 @@ ++/* rt2x00.h ++ * ++ * Copyright (C) 2004 - 2005 rt2x00-2.0.0-b3 SourceForge Project ++ * ++ * 2006 rtnet adaption by Daniel Gregorek ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the ++ * Free Software Foundation, Inc., ++ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++/* ++ Module: rt2x00 ++ Abstract: rt2x00 global information. ++ Supported chipsets: RT2560 ++*/ ++ ++#ifndef RT2X00_H ++#define RT2X00_H ++ ++#include ++#include ++ ++#include ++#include ++ ++#define MAX_UNITS 2 ++ ++/* ++ * Module information. ++ */ ++#define DRV_NAME "rt2x00" ++#define DRV_VERSION "0.1" ++#define DRV_AUTHOR "Daniel Gregorek " ++//#define CONFIG_RT2X00_DEBUG ++ ++/* ++ * Debug defines. ++ * The debug variable will be exported by the device specific module. ++ * For this reason this variable must be set to extern to make it accessible ++ * to the core module as well. ++ */ ++#ifdef CONFIG_RT2X00_DEBUG ++extern int rt2x00_debug_level; ++#define DEBUG_PRINTK(__message...) \ ++ do { \ ++ rtdm_printk(__message); \ ++ } while (0) ++#else /* CONFIG_RT2X00_DEBUG */ ++#define DEBUG_PRINTK(__message...) \ ++ do { \ ++ } while (0) ++#endif /* CONFIG_RT2X00_DEBUG */ ++ ++/* ++ * Various debug levels. ++ * PANIC and ERROR indicates serious problems within the module, ++ * these should never be ignored and thus we will always print the message. ++ */ ++#define PANIC(__message, __args...) \ ++ rtdm_printk(KERN_PANIC DRV_NAME "->%s: Panic - " __message, \ ++ __FUNCTION__, ##__args); ++#define ERROR(__message, __args...) \ ++ rtdm_printk(KERN_ERR DRV_NAME "->%s: Error - " __message, \ ++ __FUNCTION__, ##__args); ++#define WARNING(__message, __args...) \ ++ rtdm_printk(KERN_WARNING DRV_NAME "->%s: Warning - " __message, \ ++ __FUNCTION__, ##__args); ++#define NOTICE(__message, __args...) \ ++ rtdm_printk(KERN_NOTICE DRV_NAME "->%s: Notice - " __message, \ ++ __FUNCTION__, ##__args); ++#define INFO(__message, __args...) \ ++ rtdm_printk(KERN_INFO DRV_NAME "->%s: Info - " __message, \ ++ __FUNCTION__, ##__args); ++#define DEBUG(__message, __args...) \ ++ DEBUG_PRINTK(KERN_DEBUG DRV_NAME "->%s: Debug - " __message, \ ++ __FUNCTION__, ##__args); ++ ++/* ++ * RT2x00 ring types. ++ */ ++ ++/* ++ * Ring names. ++ */ ++#define RING_RX 0x01 /* Ring used for receiving packets. */ ++#define RING_TX 0x02 /* Ring used for transmitting normal packets. */ ++ ++/* ++ * Ring sizes. ++ */ ++#define DATA_FRAME_SIZE 2432 ++#define MGMT_FRAME_SIZE 256 ++ ++/* ++ * RT2x00 xmit flags. ++ */ ++#define XMIT_IFS_SIFS 0x0001 ++#define XMIT_IFS_BACKOFF 0x0002 ++#define XMIT_IFS_NEW_BACKOFF 0x0004 ++#define XMIT_IFS_NONE 0x0008 ++#define XMIT_NEW_SEQUENCE 0x0010 ++#define XMIT_ACK 0x0020 ++#define XMIT_TIMESTAMP 0x0040 ++#define XMIT_RTS 0x0080 ++#define XMIT_OFDM 0x0100 ++#define XMIT_LONG_RETRY 0x0200 ++#define XMIT_MORE_FRAGS 0x0400 ++#define XMIT_SHORT_PREAMBLE 0x0800 ++#define XMIT_START 0x1000 ++ ++/* ++ * RT2x00 Statistics flags. ++ */ ++#define STATS_TX_RESULT 0x01 ++#define STATS_TX_RETRY_COUNT 0x02 ++#define STATS_RX_CRC 0x10 ++#define STATS_RX_PHYSICAL 0x20 ++#define STATS_RX_QUALITY 0x40 ++#define STATS_RX_DROP 0x80 ++ ++/* ++ * TX result flags. ++ */ ++#define TX_SUCCESS 0 ++#define TX_SUCCESS_RETRY 1 ++#define TX_FAIL_RETRY 2 ++#define TX_FAIL_INVALID 3 ++#define TX_FAIL_OTHER 4 ++ ++/* ++ * Channel type defines. ++ */ ++#define CHANNEL_OFDM 0x01 ++#define CHANNEL_UNII_LOW 0x02 ++#define CHANNEL_HIPERLAN2 0x04 ++#define CHANNEL_UNII_HIGH 0x08 ++ ++#define CHANNEL_OFDM_MIN 1 ++#define CHANNEL_OFDM_MAX 14 ++#define CHANNEL_UNII_LOW_MIN 36 ++#define CHANNEL_UNII_LOW_MAX 64 ++#define CHANNEL_HIPERLAN2_MIN 100 ++#define CHANNEL_HIPERLAN2_MAX 140 ++#define CHANNEL_UNII_HIGH_MIN 149 ++#define CHANNEL_UNII_HIGH_MAX 161 ++ ++/* ++ * Device 802.11abg capabilities. ++ */ ++static struct _rt2x00_capabilities { ++ u8 txpower[6]; ++ u8 bitrate[12]; ++} __attribute__ ((packed)) capabilities = { ++ /* ++ * tx-power. ++ */ ++ .txpower = { ++ 3, 12, 25, 50, 75, 100, ++ }, ++ ++ /* ++ * Bitrates ++ */ ++ .bitrate = { ++ 2, 4, 11, 22, /* CCK. */ ++ 12, 18, 24, 36, 48, 72, 96, 108, /* OFDM. */ ++ }, ++}; ++ ++struct _rt2x00_config { ++ u8 config_flags; ++#define CONFIG_DROP_BCAST 0x0001 ++#define CONFIG_DROP_MCAST 0x0002 ++#define CONFIG_AUTORESP 0x0004 ++ ++ u8 antenna_tx; ++ u8 antenna_rx; ++ ++ u8 bssid[ETH_ALEN]; ++ u8 short_retry; ++ u8 long_retry; ++ ++ u8 channel; ++ u8 bitrate; /* 0.5Mbit/sec */ ++ u8 txpower; /* % */ ++ ++ u8 bbpsens; ++ ++ /* ++ * LED status ++ */ ++ u8 led_status; ++ ++ u16 __pad2; /* For alignment only. */ ++ ++ /* ++ * Duration values in us. ++ */ ++ u8 plcp; ++ u8 sifs; ++ u8 slot_time; ++ ++ /* ++ * Configuration values that have to be updated to device. ++ */ ++ u16 update_flags; ++#define UPDATE_ALL_CONFIG 0xffff ++#define UPDATE_BSSID 0x0001 ++#define UPDATE_PACKET_FILTER 0x0002 ++#define UPDATE_CHANNEL 0x0004 ++#define UPDATE_BITRATE 0x0008 ++#define UPDATE_RETRY 0x0010 ++#define UPDATE_TXPOWER 0x0020 ++#define UPDATE_ANTENNA 0x0040 ++#define UPDATE_DURATION 0x0080 ++#define UPDATE_PREAMBLE 0x0100 ++#define UPDATE_AUTORESP 0x0200 ++#define UPDATE_LED_STATUS 0x0400 ++#define UPDATE_BBPSENS 0x0800 ++ ++} __attribute__((packed)); ++ ++struct _rt2x00_core { ++ /* ++ * RT2x00 device status flags (atomic read/write access). ++ */ ++ unsigned long flags; ++ ++#define DEVICE_ENABLED 0 /* Device has been opened. */ ++#define DEVICE_AWAKE 1 /* Device is not suspended. */ ++#define DEVICE_RADIO_ON 2 /* Device antenna is enabled. */ ++#define DEVICE_CONFIG_UPDATE 3 /* Device is updating configuration. */ ++ ++ /* ++ * Device handler. ++ */ ++ struct _rt2x00_dev_handler *handler; ++ ++ /* ++ * RTnet device we belong to. ++ */ ++ struct rtnet_device *rtnet_dev; ++ ++ /* ++ * RTwlan stack structure. ++ */ ++ struct rtwlan_device *rtwlan_dev; ++ ++ /* ++ * Device configuration. ++ */ ++ struct _rt2x00_config config; ++ ++ void *priv; ++ ++} __attribute__((packed)); ++ ++/* ++ * Device specific handlers. ++ */ ++struct _rt2x00_dev_handler { ++ /* ++ * Device specific module. ++ */ ++ struct module *dev_module; ++ ++ /* ++ * Initialization handlers. ++ */ ++ int (*dev_probe)(struct _rt2x00_core *core, void *priv); ++ int (*dev_remove)(struct _rt2x00_core *core); ++ ++ /* ++ * Radio control. ++ */ ++ int (*dev_radio_on)(struct _rt2x00_core *core); ++ int (*dev_radio_off)(struct _rt2x00_core *core); ++ ++ /* ++ * Configuration handlers. ++ */ ++ int (*dev_update_config)(struct _rt2x00_core *core, u16 update_flags); ++ ++ /* ++ * xmit handler. ++ */ ++ int (*dev_xmit_packet)(struct _rt2x00_core *core, struct rtskb *rtskb, ++ u16 rate, u16 xmit_flags); ++ ++ /* ++ * Handler for direct access to register from core. ++ */ ++ int (*dev_register_access)(struct _rt2x00_core *core, int request, ++ u32 address, u32 *value); ++ ++} __attribute__((packed)); ++ ++static inline void *rt2x00_priv(const struct _rt2x00_core *core) ++{ ++ return core->priv; ++} ++ ++/* ++ * Duration calculations ++ * The rate variable passed is: 2 * real_rate (in Mb/s). ++ * Therefore length has to be multiplied with 8 to convert bytes to bits and mulltiply the length ++ * with 2 to compensate for the difference between real_rate and the rate variable. ++ */ ++#define ACK_SIZE 14 ++#define IEEE80211_HEADER 24 ++ ++static inline u16 get_duration(const unsigned int size, const u8 rate) ++{ ++ return ((size * 8 * 2) / rate); ++} ++ ++static inline u16 get_duration_res(const unsigned int size, const u8 rate) ++{ ++ return ((size * 8 * 2) % rate); ++} ++ ++static inline u16 get_preamble(const struct _rt2x00_config *config) ++{ ++ return 144; ++} ++ ++/* ++ * Register handlers. ++ * We store the position of a register field inside a field structure, ++ * This will simplify the process of setting and reading a certain field ++ * inside the register. ++ */ ++struct _rt2x00_field16 { ++ u16 bit_offset; ++ u16 bit_mask; ++} __attribute__((packed)); ++ ++struct _rt2x00_field32 { ++ u32 bit_offset; ++ u32 bit_mask; ++} __attribute__((packed)); ++ ++#define FIELD16(__offset, __mask) \ ++ ((struct _rt2x00_field16){ (__offset), (__mask) }) ++#define FIELD32(__offset, __mask) \ ++ ((struct _rt2x00_field32){ (__offset), (__mask) }) ++ ++static inline void rt2x00_set_field32(u32 *reg, ++ const struct _rt2x00_field32 field, ++ const u32 value) ++{ ++ *reg &= cpu_to_le32(~(field.bit_mask)); ++ *reg |= cpu_to_le32((value << field.bit_offset) & field.bit_mask); ++} ++ ++static inline void rt2x00_set_field32_nb(u32 *reg, ++ const struct _rt2x00_field32 field, ++ const u32 value) ++{ ++ *reg &= ~(field.bit_mask); ++ *reg |= (value << field.bit_offset) & field.bit_mask; ++} ++ ++static inline u32 rt2x00_get_field32(const u32 reg, ++ const struct _rt2x00_field32 field) ++{ ++ return (le32_to_cpu(reg) & field.bit_mask) >> field.bit_offset; ++} ++ ++static inline u32 rt2x00_get_field32_nb(const u32 reg, ++ const struct _rt2x00_field32 field) ++{ ++ return (reg & field.bit_mask) >> field.bit_offset; ++} ++ ++static inline void rt2x00_set_field16(u16 *reg, ++ const struct _rt2x00_field16 field, ++ const u16 value) ++{ ++ *reg &= cpu_to_le16(~(field.bit_mask)); ++ *reg |= cpu_to_le16((value << field.bit_offset) & field.bit_mask); ++} ++ ++static inline void rt2x00_set_field16_nb(u16 *reg, ++ const struct _rt2x00_field16 field, ++ const u16 value) ++{ ++ *reg &= ~(field.bit_mask); ++ *reg |= (value << field.bit_offset) & field.bit_mask; ++} ++ ++static inline u16 rt2x00_get_field16(const u16 reg, ++ const struct _rt2x00_field16 field) ++{ ++ return (le16_to_cpu(reg) & field.bit_mask) >> field.bit_offset; ++} ++ ++static inline u16 rt2x00_get_field16_nb(const u16 reg, ++ const struct _rt2x00_field16 field) ++{ ++ return (reg & field.bit_mask) >> field.bit_offset; ++} ++ ++/* ++ * rf register sructure for channel selection. ++ */ ++struct _rf_channel { ++ u32 rf1; ++ u32 rf2; ++ u32 rf3; ++ u32 rf4; ++} __attribute__((packed)); ++ ++/* ++ * Chipset identification ++ * The chipset on the device is composed of a RT and RF chip. ++ * The chipset combination is important for determining device capabilities. ++ */ ++struct _rt2x00_chip { ++ u16 rt; ++ u16 rf; ++} __attribute__((packed)); ++ ++/* ++ * Set chipset data. ++ * Some rf values for RT2400 devices are equal to rf values for RT2500 devices. ++ * To prevent problems, all rf values will be masked to clearly seperate each chipset. ++ */ ++static inline void set_chip(struct _rt2x00_chip *chipset, const u16 rt, ++ const u16 rf) ++{ ++ INFO("Chipset detected - rt: %04x, rf: %04x.\n", rt, rf); ++ ++ chipset->rt = rt; ++ chipset->rf = rf | (chipset->rt & 0xff00); ++} ++ ++static inline char rt2x00_rt(const struct _rt2x00_chip *chipset, const u16 chip) ++{ ++ return (chipset->rt == chip); ++} ++ ++static inline char rt2x00_rf(const struct _rt2x00_chip *chipset, const u16 chip) ++{ ++ return (chipset->rf == chip); ++} ++ ++static inline u16 rt2x00_get_rf(const struct _rt2x00_chip *chipset) ++{ ++ return chipset->rf; ++} ++ ++/* ++ * _data_ring ++ * Data rings are used by the device to send and receive packets. ++ * The data_addr is the base address of the data memory. ++ * Device specifice information is pointed to by the priv pointer. ++ * The index values may only be changed with the functions ring_index_inc() ++ * and ring_index_done_inc(). ++ */ ++struct _data_ring { ++ /* ++ * Base address of packet ring. ++ */ ++ dma_addr_t data_dma; ++ void *data_addr; ++ ++ /* ++ * Private device specific data. ++ */ ++ void *priv; ++ struct _rt2x00_core *core; ++ ++ /* ++ * Current index values. ++ */ ++ u8 index; ++ u8 index_done; ++ ++ /* ++ * Ring type set with RING_* define. ++ */ ++ u8 ring_type; ++ ++ /* ++ * Number of entries in this ring. ++ */ ++ u8 max_entries; ++ ++ /* ++ * Size of packet and descriptor in bytes. ++ */ ++ u16 entry_size; ++ u16 desc_size; ++ ++ /* ++ * Total allocated memory size. ++ */ ++ u32 mem_size; ++} __attribute__((packed)); ++ ++/* ++ * Number of entries in a packet ring. ++ */ ++#define RX_ENTRIES 8 ++#define TX_ENTRIES 8 ++#define ATIM_ENTRIES 1 ++#define PRIO_ENTRIES 2 ++#define BEACON_ENTRIES 1 ++ ++/* ++ * Initialization and cleanup routines. ++ */ ++static inline void rt2x00_init_ring(struct _rt2x00_core *core, ++ struct _data_ring *ring, const u8 ring_type, ++ const u16 max_entries, const u16 entry_size, ++ const u16 desc_size) ++{ ++ ring->core = core; ++ ring->index = 0; ++ ring->index_done = 0; ++ ring->ring_type = ring_type; ++ ring->max_entries = max_entries; ++ ring->entry_size = entry_size; ++ ring->desc_size = desc_size; ++ ring->mem_size = ++ ring->max_entries * (ring->desc_size + ring->entry_size); ++} ++ ++static inline void rt2x00_deinit_ring(struct _data_ring *ring) ++{ ++ ring->core = NULL; ++ ring->index = 0; ++ ring->index_done = 0; ++ ring->ring_type = 0; ++ ring->max_entries = 0; ++ ring->entry_size = 0; ++ ring->desc_size = 0; ++ ring->mem_size = 0; ++} ++ ++/* ++ * Ring index manipulation functions. ++ */ ++static inline void rt2x00_ring_index_inc(struct _data_ring *ring) ++{ ++ ring->index = (++ring->index < ring->max_entries) ? ring->index : 0; ++} ++ ++static inline void rt2x00_ring_index_done_inc(struct _data_ring *ring) ++{ ++ ring->index_done = ++ (++ring->index_done < ring->max_entries) ? ring->index_done : 0; ++} ++ ++static inline void rt2x00_ring_clear_index(struct _data_ring *ring) ++{ ++ ring->index = 0; ++ ring->index_done = 0; ++} ++ ++static inline u8 rt2x00_ring_empty(struct _data_ring *ring) ++{ ++ return ring->index_done == ring->index; ++} ++ ++static inline u8 rt2x00_ring_free_entries(struct _data_ring *ring) ++{ ++ if (ring->index >= ring->index_done) ++ return ring->max_entries - (ring->index - ring->index_done); ++ else ++ return ring->index_done - ring->index; ++} ++ ++/* ++ * Return PLCP value matching the rate. ++ * PLCP values according to ieee802.11a-1999 p.14. ++ */ ++static inline u8 rt2x00_get_plcp(const u8 rate) ++{ ++ u8 counter = 0x00; ++ u8 plcp[12] = { ++ 0x00, 0x01, 0x02, 0x03, /* CCK. */ ++ 0x0b, 0x0f, 0x0a, 0x0e, 0x09, 0x0d, 0x08, 0x0c, /* OFDM. */ ++ }; ++ ++ for (; counter < 12; counter++) { ++ if (capabilities.bitrate[counter] == rate) ++ return plcp[counter]; ++ } ++ ++ return 0xff; ++} ++ ++#define OFDM_CHANNEL(__channel) \ ++ ((__channel) >= CHANNEL_OFDM_MIN && (__channel) <= CHANNEL_OFDM_MAX) ++#define UNII_LOW_CHANNEL(__channel) \ ++ ((__channel) >= CHANNEL_UNII_LOW_MIN && \ ++ (__channel) <= CHANNEL_UNII_LOW_MAX) ++#define HIPERLAN2_CHANNEL(__channel) \ ++ ((__channel) >= CHANNEL_HIPERLAN2_MIN && \ ++ (__channel) <= CHANNEL_HIPERLAN2_MAX) ++#define UNII_HIGH_CHANNEL(__channel) \ ++ ((__channel) >= CHANNEL_UNII_HIGH_MIN && \ ++ (__channel) <= CHANNEL_UNII_HIGH_MAX) ++ ++/* ++ * Return the index value of the channel starting from the first channel of the range. ++ * Where range can be OFDM, UNII (low), HiperLAN2 or UNII (high). ++ */ ++static inline int rt2x00_get_channel_index(const u8 channel) ++{ ++ if (OFDM_CHANNEL(channel)) ++ return (channel - 1); ++ ++ if (channel % 4) ++ return -EINVAL; ++ ++ if (UNII_LOW_CHANNEL(channel)) ++ return ((channel - CHANNEL_UNII_LOW_MIN) / 4); ++ else if (HIPERLAN2_CHANNEL(channel)) ++ return ((channel - CHANNEL_HIPERLAN2_MIN) / 4); ++ else if (UNII_HIGH_CHANNEL(channel)) ++ return ((channel - CHANNEL_UNII_HIGH_MIN) / 4); ++ return -EINVAL; ++} ++ ++/* ++ * RT2x00 core module functions that can be used in the device specific modules. ++ */ ++extern struct rtnet_device * ++rt2x00_core_probe(struct _rt2x00_dev_handler *handler, void *priv, ++ u32 sizeof_dev); ++extern void rt2x00_core_remove(struct rtnet_device *rtnet_dev); ++ ++#endif +--- linux/drivers/xenomai/net/drivers/experimental/rt2500/Kconfig 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/experimental/rt2500/Kconfig 2021-04-07 16:01:27.595633640 +0800 +@@ -0,0 +1,4 @@ ++config XENO_DRIVERS_NET_DRV_RT2500 ++ depends on XENO_DRIVERS_NET && PCI ++ tristate "Ralink 2500 WLAN" ++ select XENO_DRIVERS_NET_RTWLAN +--- linux/drivers/xenomai/net/drivers/experimental/rt2500/rt2500pci.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/experimental/rt2500/rt2500pci.h 2021-04-07 16:01:27.591633646 +0800 +@@ -0,0 +1,1498 @@ ++/* rt2500pci.h ++ * ++ * Copyright (C) 2004 - 2005 rt2x00-2.0.0-b3 SourceForge Project ++ * ++ * 2006 rtnet adaption by Daniel Gregorek ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the ++ * Free Software Foundation, Inc., ++ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++/* ++ * Module: rt2500pci ++ * Abstract: Data structures and registers for the rt2500pci module. ++ * Supported chipsets: RT2560. ++ */ ++ ++#ifndef RT2500PCI_H ++#define RT2500PCI_H ++ ++/* ++ * RT chip defines ++ */ ++#define RT2560 0x0201 ++ ++/* ++ * RF chip defines ++ */ ++#define RF2522 0x0200 ++#define RF2523 0x0201 ++#define RF2524 0x0202 ++#define RF2525 0x0203 ++#define RF2525E 0x0204 ++#define RF5222 0x0210 ++ ++/* ++ * Control/Status Registers(CSR). ++ */ ++#define CSR0 0x0000 /* ASIC revision number. */ ++#define CSR1 0x0004 /* System control register. */ ++#define CSR2 0x0008 /* System admin status register (invalid). */ ++#define CSR3 0x000c /* STA MAC address register 0. */ ++#define CSR4 0x0010 /* STA MAC address register 1. */ ++#define CSR5 0x0014 /* BSSID register 0. */ ++#define CSR6 0x0018 /* BSSID register 1. */ ++#define CSR7 0x001c /* Interrupt source register. */ ++#define CSR8 0x0020 /* Interrupt mask register. */ ++#define CSR9 0x0024 /* Maximum frame length register. */ ++#define SECCSR0 0x0028 /* WEP control register. */ ++#define CSR11 0x002c /* Back-off control register. */ ++#define CSR12 0x0030 /* Synchronization configuration register 0. */ ++#define CSR13 0x0034 /* Synchronization configuration register 1. */ ++#define CSR14 0x0038 /* Synchronization control register. */ ++#define CSR15 0x003c /* Synchronization status register. */ ++#define CSR16 0x0040 /* TSF timer register 0. */ ++#define CSR17 0x0044 /* TSF timer register 1. */ ++#define CSR18 0x0048 /* IFS timer register 0. */ ++#define CSR19 0x004c /* IFS timer register 1. */ ++#define CSR20 0x0050 /* WakeUp register. */ ++#define CSR21 0x0054 /* EEPROM control register. */ ++#define CSR22 0x0058 /* CFP Control Register. */ ++ ++/* ++ * Transmit related CSRs. ++ */ ++#define TXCSR0 0x0060 /* TX control register. */ ++#define TXCSR1 0x0064 /* TX configuration register. */ ++#define TXCSR2 0x0068 /* TX descriptor configuratioon register. */ ++#define TXCSR3 0x006c /* TX Ring Base address register. */ ++#define TXCSR4 0x0070 /* TX Atim Ring Base address register. */ ++#define TXCSR5 0x0074 /* TX Prio Ring Base address register. */ ++#define TXCSR6 0x0078 /* Beacon base address. */ ++#define TXCSR7 0x007c /* AutoResponder Control Register. */ ++#define TXCSR8 0x0098 /* CCK TX BBP registers. */ ++#define TXCSR9 0x0094 /* OFDM TX BBP registers. */ ++ ++/* ++ * Receive related CSRs. ++ */ ++#define RXCSR0 0x0080 /* RX control register. */ ++#define RXCSR1 0x0084 /* RX descriptor configuration register. */ ++#define RXCSR2 0x0088 /* RX Ring base address register. */ ++#define RXCSR3 0x0090 /* BBP ID register 0 */ ++#define ARCSR1 0x009c /* Auto Responder PLCP config register 1. */ ++ ++/* ++ * PCI control CSRs. ++ */ ++#define PCICSR 0x008c /* PCI control register. */ ++ ++/* ++ * Statistic Register. ++ */ ++#define CNT0 0x00a0 /* FCS error count. */ ++#define TIMECSR2 0x00a8 ++#define CNT1 0x00ac /* PLCP error count. */ ++#define CNT2 0x00b0 /* long error count. */ ++#define TIMECSR3 0x00b4 ++#define CNT3 0x00b8 /* CCA false alarm count. */ ++#define CNT4 0x00bc /* Rx FIFO overflow count. */ ++#define CNT5 0x00c0 /* Tx FIFO underrun count. */ ++ ++/* ++ * Baseband Control Register. ++ */ ++#define PWRCSR0 0x00c4 /* Power mode configuration. */ ++#define PSCSR0 0x00c8 /* Power state transition time. */ ++#define PSCSR1 0x00cc /* Power state transition time. */ ++#define PSCSR2 0x00d0 /* Power state transition time. */ ++#define PSCSR3 0x00d4 /* Power state transition time. */ ++#define PWRCSR1 0x00d8 /* Manual power control / status. */ ++#define TIMECSR 0x00dc /* Timer control. */ ++#define MACCSR0 0x00e0 /* MAC configuration. */ ++#define MACCSR1 0x00e4 /* MAC configuration. */ ++#define RALINKCSR 0x00e8 /* Ralink Auto-reset register. */ ++#define BCNCSR 0x00ec /* Beacon interval control register. */ ++ ++/* ++ * BBP / RF / IF Control Register. ++ */ ++#define BBPCSR 0x00f0 /* BBP serial control. */ ++#define RFCSR 0x00f4 /* RF serial control. */ ++#define LEDCSR 0x00f8 /* LED control register */ ++ ++#define SECCSR3 0x00fc /* AES control register. */ ++ ++/* ++ * ASIC pointer information. ++ */ ++#define RXPTR 0x0100 /* Current RX ring address. */ ++#define TXPTR 0x0104 /* Current Tx ring address. */ ++#define PRIPTR 0x0108 /* Current Priority ring address. */ ++#define ATIMPTR 0x010c /* Current ATIM ring address. */ ++ ++#define TXACKCSR0 0x0110 /* TX ACK timeout. */ ++#define ACKCNT0 0x0114 /* TX ACK timeout count. */ ++#define ACKCNT1 0x0118 /* RX ACK timeout count. */ ++ ++/* ++ * GPIO and others. ++ */ ++#define GPIOCSR 0x0120 /* GPIO. */ ++#define FIFOCSR0 0x0128 /* TX FIFO pointer. */ ++#define FIFOCSR1 0x012c /* RX FIFO pointer. */ ++#define BCNCSR1 0x0130 /* Tx BEACON offset time, unit: 1 usec. */ ++#define MACCSR2 0x0134 /* TX_PE to RX_PE delay time, unit: 1 PCI clock cycle. */ ++#define TESTCSR 0x0138 /* TEST mode selection register. */ ++#define ARCSR2 0x013c /* 1 Mbps ACK/CTS PLCP. */ ++#define ARCSR3 0x0140 /* 2 Mbps ACK/CTS PLCP. */ ++#define ARCSR4 0x0144 /* 5.5 Mbps ACK/CTS PLCP. */ ++#define ARCSR5 0x0148 /* 11 Mbps ACK/CTS PLCP. */ ++#define ARTCSR0 0x014c /* ACK/CTS payload consumed time for 1/2/5.5/11 mbps. */ ++#define ARTCSR1 \ ++ 0x0150 /* OFDM ACK/CTS payload consumed time for 6/9/12/18 mbps. */ ++#define ARTCSR2 \ ++ 0x0154 /* OFDM ACK/CTS payload consumed time for 24/36/48/54 mbps. */ ++#define SECCSR1 0x0158 /* WEP control register. */ ++#define BBPCSR1 0x015c /* BBP TX configuration. */ ++#define DBANDCSR0 0x0160 /* Dual band configuration register 0. */ ++#define DBANDCSR1 0x0164 /* Dual band configuration register 1. */ ++#define BBPPCSR 0x0168 /* BBP Pin control register. */ ++#define DBGSEL0 0x016c /* MAC special debug mode selection register 0. */ ++#define DBGSEL1 0x0170 /* MAC special debug mode selection register 1. */ ++#define BISTCSR 0x0174 /* BBP BIST register. */ ++#define MCAST0 0x0178 /* multicast filter register 0. */ ++#define MCAST1 0x017c /* multicast filter register 1. */ ++#define UARTCSR0 0x0180 /* UART1 TX register. */ ++#define UARTCSR1 0x0184 /* UART1 RX register. */ ++#define UARTCSR3 0x0188 /* UART1 frame control register. */ ++#define UARTCSR4 0x018c /* UART1 buffer control register. */ ++#define UART2CSR0 0x0190 /* UART2 TX register. */ ++#define UART2CSR1 0x0194 /* UART2 RX register. */ ++#define UART2CSR3 0x0198 /* UART2 frame control register. */ ++#define UART2CSR4 0x019c /* UART2 buffer control register. */ ++ ++/* ++ * EEPROM addresses ++ */ ++#define EEPROM_ANTENNA 0x10 ++#define EEPROM_GEOGRAPHY 0x12 ++#define EEPROM_BBP_START 0x13 ++#define EEPROM_BBP_END 0x22 ++ ++#define EEPROM_BBP_SIZE 16 ++ ++/* ++ * CSR Registers. ++ * Some values are set in TU, whereas 1 TU == 1024 us. ++ */ ++ ++/* ++ * CSR1: System control register. ++ */ ++#define CSR1_SOFT_RESET \ ++ FIELD32(0, 0x00000001) /* Software reset, 1: reset, 0: normal. */ ++#define CSR1_BBP_RESET \ ++ FIELD32(1, 0x00000002) /* Hardware reset, 1: reset, 0, release. */ ++#define CSR1_HOST_READY \ ++ FIELD32(2, 0x00000004) /* Host ready after initialization. */ ++ ++/* ++ * CSR3: STA MAC address register 0. ++ */ ++#define CSR3_BYTE0 FIELD32(0, 0x000000ff) /* MAC address byte 0. */ ++#define CSR3_BYTE1 FIELD32(8, 0x0000ff00) /* MAC address byte 1. */ ++#define CSR3_BYTE2 FIELD32(16, 0x00ff0000) /* MAC address byte 2. */ ++#define CSR3_BYTE3 FIELD32(24, 0xff000000) /* MAC address byte 3. */ ++ ++/* ++ * CSR4: STA MAC address register 1. ++ */ ++#define CSR4_BYTE4 FIELD32(0, 0x000000ff) /* MAC address byte 4. */ ++#define CSR4_BYTE5 FIELD32(8, 0x0000ff00) /* MAC address byte 5. */ ++ ++/* ++ * CSR5: BSSID register 0. ++ */ ++#define CSR5_BYTE0 FIELD32(0, 0x000000ff) /* BSSID address byte 0. */ ++#define CSR5_BYTE1 FIELD32(8, 0x0000ff00) /* BSSID address byte 1. */ ++#define CSR5_BYTE2 FIELD32(16, 0x00ff0000) /* BSSID address byte 2. */ ++#define CSR5_BYTE3 FIELD32(24, 0xff000000) /* BSSID address byte 3. */ ++ ++/* ++ * CSR6: BSSID register 1. ++ */ ++#define CSR6_BYTE4 FIELD32(0, 0x000000ff) /* BSSID address byte 4. */ ++#define CSR6_BYTE5 FIELD32(8, 0x0000ff00) /* BSSID address byte 5. */ ++ ++/* ++ * CSR7: Interrupt source register. ++ * Write 1 to clear. ++ */ ++#define CSR7_TBCN_EXPIRE \ ++ FIELD32(0, 0x00000001) /* beacon timer expired interrupt. */ ++#define CSR7_TWAKE_EXPIRE \ ++ FIELD32(1, 0x00000002) /* wakeup timer expired interrupt. */ ++#define CSR7_TATIMW_EXPIRE \ ++ FIELD32(2, 0x00000004) /* timer of atim window expired interrupt. */ ++#define CSR7_TXDONE_TXRING \ ++ FIELD32(3, 0x00000008) /* tx ring transmit done interrupt. */ ++#define CSR7_TXDONE_ATIMRING \ ++ FIELD32(4, 0x00000010) /* atim ring transmit done interrupt. */ ++#define CSR7_TXDONE_PRIORING \ ++ FIELD32(5, 0x00000020) /* priority ring transmit done interrupt. */ ++#define CSR7_RXDONE FIELD32(6, 0x00000040) /* receive done interrupt. */ ++#define CSR7_DECRYPTION_DONE \ ++ FIELD32(7, 0x00000080) /* Decryption done interrupt. */ ++#define CSR7_ENCRYPTION_DONE \ ++ FIELD32(8, 0x00000100) /* Encryption done interrupt. */ ++#define CSR7_UART1_TX_TRESHOLD \ ++ FIELD32(9, 0x00000200) /* UART1 TX reaches threshold. */ ++#define CSR7_UART1_RX_TRESHOLD \ ++ FIELD32(10, 0x00000400) /* UART1 RX reaches threshold. */ ++#define CSR7_UART1_IDLE_TRESHOLD \ ++ FIELD32(11, 0x00000800) /* UART1 IDLE over threshold. */ ++#define CSR7_UART1_TX_BUFF_ERROR \ ++ FIELD32(12, 0x00001000) /* UART1 TX buffer error. */ ++#define CSR7_UART1_RX_BUFF_ERROR \ ++ FIELD32(13, 0x00002000) /* UART1 RX buffer error. */ ++#define CSR7_UART2_TX_TRESHOLD \ ++ FIELD32(14, 0x00004000) /* UART2 TX reaches threshold. */ ++#define CSR7_UART2_RX_TRESHOLD \ ++ FIELD32(15, 0x00008000) /* UART2 RX reaches threshold. */ ++#define CSR7_UART2_IDLE_TRESHOLD \ ++ FIELD32(16, 0x00010000) /* UART2 IDLE over threshold. */ ++#define CSR7_UART2_TX_BUFF_ERROR \ ++ FIELD32(17, 0x00020000) /* UART2 TX buffer error. */ ++#define CSR7_UART2_RX_BUFF_ERROR \ ++ FIELD32(18, 0x00040000) /* UART2 RX buffer error. */ ++#define CSR7_TIMER_CSR3_EXPIRE \ ++ FIELD32(19, \ ++ 0x00080000) /* TIMECSR3 timer expired (802.1H quiet period). */ ++ ++/* ++ * CSR8: Interrupt mask register. ++ * Write 1 to mask interrupt. ++ */ ++#define CSR8_TBCN_EXPIRE \ ++ FIELD32(0, 0x00000001) /* beacon timer expired interrupt. */ ++#define CSR8_TWAKE_EXPIRE \ ++ FIELD32(1, 0x00000002) /* wakeup timer expired interrupt. */ ++#define CSR8_TATIMW_EXPIRE \ ++ FIELD32(2, 0x00000004) /* timer of atim window expired interrupt. */ ++#define CSR8_TXDONE_TXRING \ ++ FIELD32(3, 0x00000008) /* tx ring transmit done interrupt. */ ++#define CSR8_TXDONE_ATIMRING \ ++ FIELD32(4, 0x00000010) /* atim ring transmit done interrupt. */ ++#define CSR8_TXDONE_PRIORING \ ++ FIELD32(5, 0x00000020) /* priority ring transmit done interrupt. */ ++#define CSR8_RXDONE FIELD32(6, 0x00000040) /* receive done interrupt. */ ++#define CSR8_DECRYPTION_DONE \ ++ FIELD32(7, 0x00000080) /* Decryption done interrupt. */ ++#define CSR8_ENCRYPTION_DONE \ ++ FIELD32(8, 0x00000100) /* Encryption done interrupt. */ ++#define CSR8_UART1_TX_TRESHOLD \ ++ FIELD32(9, 0x00000200) /* UART1 TX reaches threshold. */ ++#define CSR8_UART1_RX_TRESHOLD \ ++ FIELD32(10, 0x00000400) /* UART1 RX reaches threshold. */ ++#define CSR8_UART1_IDLE_TRESHOLD \ ++ FIELD32(11, 0x00000800) /* UART1 IDLE over threshold. */ ++#define CSR8_UART1_TX_BUFF_ERROR \ ++ FIELD32(12, 0x00001000) /* UART1 TX buffer error. */ ++#define CSR8_UART1_RX_BUFF_ERROR \ ++ FIELD32(13, 0x00002000) /* UART1 RX buffer error. */ ++#define CSR8_UART2_TX_TRESHOLD \ ++ FIELD32(14, 0x00004000) /* UART2 TX reaches threshold. */ ++#define CSR8_UART2_RX_TRESHOLD \ ++ FIELD32(15, 0x00008000) /* UART2 RX reaches threshold. */ ++#define CSR8_UART2_IDLE_TRESHOLD \ ++ FIELD32(16, 0x00010000) /* UART2 IDLE over threshold. */ ++#define CSR8_UART2_TX_BUFF_ERROR \ ++ FIELD32(17, 0x00020000) /* UART2 TX buffer error. */ ++#define CSR8_UART2_RX_BUFF_ERROR \ ++ FIELD32(18, 0x00040000) /* UART2 RX buffer error. */ ++#define CSR8_TIMER_CSR3_EXPIRE \ ++ FIELD32(19, \ ++ 0x00080000) /* TIMECSR3 timer expired (802.1H quiet period). */ ++ ++/* ++ * CSR9: Maximum frame length register. ++ */ ++#define CSR9_MAX_FRAME_UNIT \ ++ FIELD32(7, \ ++ 0x00000f80) /* maximum frame length in 128b unit, default: 12. */ ++ ++/* ++ * SECCSR0: WEP control register. ++ */ ++#define SECCSR0_KICK_DECRYPT \ ++ FIELD32(0, 0x00000001) /* Kick decryption engine, self-clear. */ ++#define SECCSR0_ONE_SHOT \ ++ FIELD32(1, 0x00000002) /* 0: ring mode, 1: One shot only mode. */ ++#define SECCSR0_DESC_ADDRESS \ ++ FIELD32(2, 0xfffffffc) /* Descriptor physical address of frame. */ ++ ++/* ++ * CSR11: Back-off control register. ++ */ ++#define CSR11_CWMIN \ ++ FIELD32(0, 0x0000000f) /* CWmin. Default cwmin is 31 (2^5 - 1). */ ++#define CSR11_CWMAX \ ++ FIELD32(4, 0x000000f0) /* CWmax. Default cwmax is 1023 (2^10 - 1). */ ++#define CSR11_SLOT_TIME \ ++ FIELD32(8, 0x00001f00) /* slot time, default is 20us for 802.11b */ ++#define CSR11_CW_SELECT \ ++ FIELD32(13, \ ++ 0x00002000) /* CWmin/CWmax selection, 1: Register, 0: TXD. */ ++#define CSR11_LONG_RETRY FIELD32(16, 0x00ff0000) /* long retry count. */ ++#define CSR11_SHORT_RETRY FIELD32(24, 0xff000000) /* short retry count. */ ++ ++/* ++ * CSR12: Synchronization configuration register 0. ++ * All units in 1/16 TU. ++ */ ++#define CSR12_BEACON_INTERVAL \ ++ FIELD32(0, 0x0000ffff) /* beacon interval, default is 100 TU. */ ++#define CSR12_CFPMAX_DURATION \ ++ FIELD32(16, 0xffff0000) /* cfp maximum duration, default is 100 TU. */ ++ ++/* ++ * CSR13: Synchronization configuration register 1. ++ * All units in 1/16 TU. ++ */ ++#define CSR13_ATIMW_DURATION FIELD32(0, 0x0000ffff) /* atim window duration. */ ++#define CSR13_CFP_PERIOD \ ++ FIELD32(16, 0x00ff0000) /* cfp period, default is 0 TU. */ ++ ++/* ++ * CSR14: Synchronization control register. ++ */ ++#define CSR14_TSF_COUNT FIELD32(0, 0x00000001) /* enable tsf auto counting. */ ++#define CSR14_TSF_SYNC \ ++ FIELD32(1, \ ++ 0x00000006) /* tsf sync, 0: disable, 1: infra, 2: ad-hoc mode. */ ++#define CSR14_TBCN FIELD32(3, 0x00000008) /* enable tbcn with reload value. */ ++#define CSR14_TCFP \ ++ FIELD32(4, 0x00000010) /* enable tcfp & cfp / cp switching. */ ++#define CSR14_TATIMW \ ++ FIELD32(5, 0x00000020) /* enable tatimw & atim window switching. */ ++#define CSR14_BEACON_GEN FIELD32(6, 0x00000040) /* enable beacon generator. */ ++#define CSR14_CFP_COUNT_PRELOAD \ ++ FIELD32(8, 0x0000ff00) /* cfp count preload value. */ ++#define CSR14_TBCM_PRELOAD \ ++ FIELD32(16, 0xffff0000) /* tbcn preload value in units of 64us. */ ++ ++/* ++ * CSR15: Synchronization status register. ++ */ ++#define CSR15_CFP \ ++ FIELD32(0, 0x00000001) /* ASIC is in contention-free period. */ ++#define CSR15_ATIMW FIELD32(1, 0x00000002) /* ASIC is in ATIM window. */ ++#define CSR15_BEACON_SENT FIELD32(2, 0x00000004) /* Beacon is send. */ ++ ++/* ++ * CSR16: TSF timer register 0. ++ */ ++#define CSR16_LOW_TSFTIMER FIELD32(0, 0xffffffff) ++ ++/* ++ * CSR17: TSF timer register 1. ++ */ ++#define CSR17_HIGH_TSFTIMER FIELD32(0, 0xffffffff) ++ ++/* ++ * CSR18: IFS timer register 0. ++ */ ++#define CSR18_SIFS FIELD32(0, 0x000001ff) /* sifs, default is 10 us. */ ++#define CSR18_PIFS FIELD32(16, 0x01f00000) /* pifs, default is 30 us. */ ++ ++/* ++ * CSR19: IFS timer register 1. ++ */ ++#define CSR19_DIFS FIELD32(0, 0x0000ffff) /* difs, default is 50 us. */ ++#define CSR19_EIFS FIELD32(16, 0xffff0000) /* eifs, default is 364 us. */ ++ ++/* ++ * CSR20: Wakeup timer register. ++ */ ++#define CSR20_DELAY_AFTER_TBCN \ ++ FIELD32(0, \ ++ 0x0000ffff) /* delay after tbcn expired in units of 1/16 TU. */ ++#define CSR20_TBCN_BEFORE_WAKEUP \ ++ FIELD32(16, 0x00ff0000) /* number of beacon before wakeup. */ ++#define CSR20_AUTOWAKE \ ++ FIELD32(24, 0x01000000) /* enable auto wakeup / sleep mechanism. */ ++ ++/* ++ * CSR21: EEPROM control register. ++ */ ++#define CSR21_RELOAD \ ++ FIELD32(0, 0x00000001) /* Write 1 to reload eeprom content. */ ++#define CSR21_EEPROM_DATA_CLOCK FIELD32(1, 0x00000002) ++#define CSR21_EEPROM_CHIP_SELECT FIELD32(2, 0x00000004) ++#define CSR21_EEPROM_DATA_IN FIELD32(3, 0x00000008) ++#define CSR21_EEPROM_DATA_OUT FIELD32(4, 0x00000010) ++#define CSR21_TYPE_93C46 FIELD32(5, 0x00000020) /* 1: 93c46, 0:93c66. */ ++ ++/* ++ * CSR22: CFP control register. ++ */ ++#define CSR22_CFP_DURATION_REMAIN \ ++ FIELD32(0, 0x0000ffff) /* cfp duration remain, in units of TU. */ ++#define CSR22_RELOAD_CFP_DURATION \ ++ FIELD32(16, 0x00010000) /* Write 1 to reload cfp duration remain. */ ++ ++/* ++ * TX / RX Registers. ++ * Some values are set in TU, whereas 1 TU == 1024 us. ++ */ ++ ++/* ++ * TXCSR0: TX Control Register. ++ */ ++#define TXCSR0_KICK_TX FIELD32(0, 0x00000001) /* kick tx ring. */ ++#define TXCSR0_KICK_ATIM FIELD32(1, 0x00000002) /* kick atim ring. */ ++#define TXCSR0_KICK_PRIO FIELD32(2, 0x00000004) /* kick priority ring. */ ++#define TXCSR0_ABORT \ ++ FIELD32(3, 0x00000008) /* abort all transmit related ring operation. */ ++ ++/* ++ * TXCSR1: TX Configuration Register. ++ */ ++#define TXCSR1_ACK_TIMEOUT \ ++ FIELD32(0, \ ++ 0x000001ff) /* ack timeout, default = sifs + 2*slottime + acktime @ 1mbps. */ ++#define TXCSR1_ACK_CONSUME_TIME \ ++ FIELD32(9, \ ++ 0x0003fe00) /* ack consume time, default = sifs + acktime @ 1mbps. */ ++#define TXCSR1_TSF_OFFSET FIELD32(18, 0x00fc0000) /* insert tsf offset. */ ++#define TXCSR1_AUTORESPONDER \ ++ FIELD32(24, \ ++ 0x01000000) /* enable auto responder which include ack & cts. */ ++ ++/* ++ * TXCSR2: Tx descriptor configuration register. ++ */ ++#define TXCSR2_TXD_SIZE \ ++ FIELD32(0, 0x000000ff) /* tx descriptor size, default is 48. */ ++#define TXCSR2_NUM_TXD FIELD32(8, 0x0000ff00) /* number of txd in ring. */ ++#define TXCSR2_NUM_ATIM FIELD32(16, 0x00ff0000) /* number of atim in ring. */ ++#define TXCSR2_NUM_PRIO \ ++ FIELD32(24, 0xff000000) /* number of priority in ring. */ ++ ++/* ++ * TXCSR3: TX Ring Base address register. ++ */ ++#define TXCSR3_TX_RING_REGISTER FIELD32(0, 0xffffffff) ++ ++/* ++ * TXCSR4: TX Atim Ring Base address register. ++ */ ++#define TXCSR4_ATIM_RING_REGISTER FIELD32(0, 0xffffffff) ++ ++/* ++ * TXCSR5: TX Prio Ring Base address register. ++ */ ++#define TXCSR5_PRIO_RING_REGISTER FIELD32(0, 0xffffffff) ++ ++/* ++ * TXCSR6: Beacon Base address register. ++ */ ++#define TXCSR6_BEACON_REGISTER FIELD32(0, 0xffffffff) ++ ++/* ++ * TXCSR7: Auto responder control register. ++ */ ++#define TXCSR7_AR_POWERMANAGEMENT \ ++ FIELD32(0, 0x00000001) /* auto responder power management bit. */ ++ ++/* ++ * TXCSR8: CCK Tx BBP register. ++ */ ++#define TXCSR8_CCK_SIGNAL \ ++ FIELD32(0, 0x000000ff) /* BBP rate field address for CCK. */ ++#define TXCSR8_CCK_SERVICE \ ++ FIELD32(8, 0x0000ff00) /* BBP service field address for CCK. */ ++#define TXCSR8_CCK_LENGTH_LOW \ ++ FIELD32(16, 0x00ff0000) /* BBP length low byte address for CCK. */ ++#define TXCSR8_CCK_LENGTH_HIGH \ ++ FIELD32(24, 0xff000000) /* BBP length high byte address for CCK. */ ++ ++/* ++ * TXCSR9: OFDM TX BBP registers ++ */ ++#define TXCSR9_OFDM_RATE \ ++ FIELD32(0, 0x000000ff) /* BBP rate field address for OFDM. */ ++#define TXCSR9_OFDM_SERVICE \ ++ FIELD32(8, 0x0000ff00) /* BBP service field address for OFDM. */ ++#define TXCSR9_OFDM_LENGTH_LOW \ ++ FIELD32(16, 0x00ff0000) /* BBP length low byte address for OFDM. */ ++#define TXCSR9_OFDM_LENGTH_HIGH \ ++ FIELD32(24, 0xff000000) /* BBP length high byte address for OFDM. */ ++ ++/* ++ * RXCSR0: RX Control Register. ++ */ ++#define RXCSR0_DISABLE_RX FIELD32(0, 0x00000001) /* disable rx engine. */ ++#define RXCSR0_DROP_CRC FIELD32(1, 0x00000002) /* drop crc error. */ ++#define RXCSR0_DROP_PHYSICAL FIELD32(2, 0x00000004) /* drop physical error. */ ++#define RXCSR0_DROP_CONTROL FIELD32(3, 0x00000008) /* drop control frame. */ ++#define RXCSR0_DROP_NOT_TO_ME \ ++ FIELD32(4, 0x00000010) /* drop not to me unicast frame. */ ++#define RXCSR0_DROP_TODS \ ++ FIELD32(5, 0x00000020) /* drop frame tods bit is true. */ ++#define RXCSR0_DROP_VERSION_ERROR \ ++ FIELD32(6, 0x00000040) /* drop version error frame. */ ++#define RXCSR0_PASS_CRC \ ++ FIELD32(7, 0x00000080) /* pass all packets with crc attached. */ ++#define RXCSR0_PASS_PLCP \ ++ FIELD32(8, \ ++ 0x00000100) /* Pass all packets with 4 bytes PLCP attached. */ ++#define RXCSR0_DROP_MCAST FIELD32(9, 0x00000200) /* Drop multicast frames. */ ++#define RXCSR0_DROP_BCAST FIELD32(10, 0x00000400) /* Drop broadcast frames. */ ++#define RXCSR0_ENABLE_QOS \ ++ FIELD32(11, 0x00000800) /* Accept QOS data frame and parse QOS field. */ ++ ++/* ++ * RXCSR1: RX descriptor configuration register. ++ */ ++#define RXCSR1_RXD_SIZE \ ++ FIELD32(0, 0x000000ff) /* rx descriptor size, default is 32b. */ ++#define RXCSR1_NUM_RXD FIELD32(8, 0x0000ff00) /* number of rxd in ring. */ ++ ++/* ++ * RXCSR2: RX Ring base address register. ++ */ ++#define RXCSR2_RX_RING_REGISTER FIELD32(0, 0xffffffff) ++ ++/* ++ * RXCSR3: BBP ID register for Rx operation. ++ */ ++#define RXCSR3_BBP_ID0 FIELD32(0, 0x0000007f) /* bbp register 0 id. */ ++#define RXCSR3_BBP_ID0_VALID \ ++ FIELD32(7, 0x00000080) /* bbp register 0 id is valid or not. */ ++#define RXCSR3_BBP_ID1 FIELD32(8, 0x00007f00) /* bbp register 1 id. */ ++#define RXCSR3_BBP_ID1_VALID \ ++ FIELD32(15, 0x00008000) /* bbp register 1 id is valid or not. */ ++#define RXCSR3_BBP_ID2 FIELD32(16, 0x007f0000) /* bbp register 2 id. */ ++#define RXCSR3_BBP_ID2_VALID \ ++ FIELD32(23, 0x00800000) /* bbp register 2 id is valid or not. */ ++#define RXCSR3_BBP_ID3 FIELD32(24, 0x7f000000) /* bbp register 3 id. */ ++#define RXCSR3_BBP_ID3_VALID \ ++ FIELD32(31, 0x80000000) /* bbp register 3 id is valid or not. */ ++ ++/* ++ * ARCSR1: Auto Responder PLCP config register 1. ++ */ ++#define ARCSR1_AR_BBP_DATA2 \ ++ FIELD32(0, 0x000000ff) /* Auto responder BBP register 2 data. */ ++#define ARCSR1_AR_BBP_ID2 \ ++ FIELD32(8, 0x0000ff00) /* Auto responder BBP register 2 Id. */ ++#define ARCSR1_AR_BBP_DATA3 \ ++ FIELD32(16, 0x00ff0000) /* Auto responder BBP register 3 data. */ ++#define ARCSR1_AR_BBP_ID3 \ ++ FIELD32(24, 0xff000000) /* Auto responder BBP register 3 Id. */ ++ ++/* ++ * Miscellaneous Registers. ++ * Some values are set in TU, whereas 1 TU == 1024 us. ++ */ ++ ++/* ++ * PCISR: PCI control register. ++ */ ++#define PCICSR_BIG_ENDIAN \ ++ FIELD32(0, 0x00000001) /* 1: big endian, 0: little endian. */ ++#define PCICSR_RX_TRESHOLD \ ++ FIELD32(1, 0x00000006) /* rx threshold in dw to start pci access */ ++/* 0: 16dw (default), 1: 8dw, 2: 4dw, 3: 32dw. */ ++#define PCICSR_TX_TRESHOLD \ ++ FIELD32(3, 0x00000018) /* tx threshold in dw to start pci access */ ++/* 0: 0dw (default), 1: 1dw, 2: 4dw, 3: forward. */ ++#define PCICSR_BURST_LENTH FIELD32(5, 0x00000060) /* pci burst length */ ++/* 0: 4dw (default, 1: 8dw, 2: 16dw, 3:32dw. */ ++#define PCICSR_ENABLE_CLK FIELD32(7, 0x00000080) /* enable clk_run, */ ++/* pci clock can't going down to non-operational. */ ++#define PCICSR_READ_MULTIPLE \ ++ FIELD32(8, 0x00000100) /* Enable memory read multiple. */ ++#define PCICSR_WRITE_INVALID \ ++ FIELD32(9, 0x00000200) /* Enable memory write & invalid. */ ++ ++/* ++ * PWRCSR1: Manual power control / status register. ++ * state: 0 deep_sleep, 1: sleep, 2: standby, 3: awake. ++ */ ++#define PWRCSR1_SET_STATE \ ++ FIELD32(0, \ ++ 0x00000001) /* set state. Write 1 to trigger, self cleared. */ ++#define PWRCSR1_BBP_DESIRE_STATE FIELD32(1, 0x00000006) /* BBP desired state. */ ++#define PWRCSR1_RF_DESIRE_STATE FIELD32(3, 0x00000018) /* RF desired state. */ ++#define PWRCSR1_BBP_CURR_STATE FIELD32(5, 0x00000060) /* BBP current state. */ ++#define PWRCSR1_RF_CURR_STATE FIELD32(7, 0x00000180) /* RF current state. */ ++#define PWRCSR1_PUT_TO_SLEEP \ ++ FIELD32(9, \ ++ 0x00000200) /* put to sleep. Write 1 to trigger, self cleared. */ ++ ++/* ++ * TIMECSR: Timer control register. ++ */ ++#define TIMECSR_US_COUNT \ ++ FIELD32(0, 0x000000ff) /* 1 us timer count in units of clock cycles. */ ++#define TIMECSR_US_64_COUNT \ ++ FIELD32(8, 0x0000ff00) /* 64 us timer count in units of 1 us timer. */ ++#define TIMECSR_BEACON_EXPECT \ ++ FIELD32(16, 0x00070000) /* Beacon expect window. */ ++ ++/* ++ * MACCSR1: MAC configuration register 1. ++ */ ++#define MACCSR1_KICK_RX \ ++ FIELD32(0, 0x00000001) /* kick one-shot rx in one-shot rx mode. */ ++#define MACCSR1_ONESHOT_RXMODE \ ++ FIELD32(1, 0x00000002) /* enable one-shot rx mode for debugging. */ ++#define MACCSR1_BBPRX_RESET_MODE \ ++ FIELD32(2, 0x00000004) /* ralink bbp rx reset mode. */ ++#define MACCSR1_AUTO_TXBBP \ ++ FIELD32(3, 0x00000008) /* auto tx logic access bbp control register. */ ++#define MACCSR1_AUTO_RXBBP \ ++ FIELD32(4, 0x00000010) /* auto rx logic access bbp control register. */ ++#define MACCSR1_LOOPBACK FIELD32(5, 0x00000060) /* loopback mode. */ ++/* 0: normal, 1: internal, 2: external, 3:rsvd. */ ++#define MACCSR1_INTERSIL_IF \ ++ FIELD32(7, 0x00000080) /* intersil if calibration pin. */ ++ ++/* ++ * RALINKCSR: Ralink Rx auto-reset BBCR. ++ */ ++#define RALINKCSR_AR_BBP_DATA0 \ ++ FIELD32(0, 0x000000ff) /* auto reset bbp register 0 data. */ ++#define RALINKCSR_AR_BBP_ID0 \ ++ FIELD32(8, 0x00007f00) /* auto reset bbp register 0 id. */ ++#define RALINKCSR_AR_BBP_VALID0 \ ++ FIELD32(15, 0x00008000) /* auto reset bbp register 0 valid. */ ++#define RALINKCSR_AR_BBP_DATA1 \ ++ FIELD32(16, 0x00ff0000) /* auto reset bbp register 1 data. */ ++#define RALINKCSR_AR_BBP_ID1 \ ++ FIELD32(24, 0x7f000000) /* auto reset bbp register 1 id. */ ++#define RALINKCSR_AR_BBP_VALID1 \ ++ FIELD32(31, 0x80000000) /* auto reset bbp register 1 valid. */ ++ ++/* ++ * BCNCSR: Beacon interval control register. ++ */ ++#define BCNCSR_CHANGE \ ++ FIELD32(0, 0x00000001) /* write one to change beacon interval. */ ++#define BCNCSR_DELTATIME FIELD32(1, 0x0000001e) /* the delta time value. */ ++#define BCNCSR_NUM_BEACON \ ++ FIELD32(5, 0x00001fe0) /* number of beacon according to mode. */ ++#define BCNCSR_MODE FIELD32(13, 0x00006000) /* please refer to asic specs. */ ++#define BCNCSR_PLUS \ ++ FIELD32(15, 0x00008000) /* plus or minus delta time value. */ ++ ++/* ++ * BBPCSR: BBP serial control register. ++ */ ++#define BBPCSR_VALUE \ ++ FIELD32(0, 0x000000ff) /* register value to program into bbp. */ ++#define BBPCSR_REGNUM FIELD32(8, 0x00007f00) /* selected bbp register. */ ++#define BBPCSR_BUSY \ ++ FIELD32(15, 0x00008000) /* 1: asic is busy execute bbp programming. */ ++#define BBPCSR_WRITE_CONTROL \ ++ FIELD32(16, 0x00010000) /* 1: write bbp, 0: read bbp. */ ++ ++/* ++ * RFCSR: RF serial control register. ++ */ ++#define RFCSR_VALUE \ ++ FIELD32(0, 0x00ffffff) /* register value + id to program into rf/if. */ ++#define RFCSR_NUMBER_OF_BITS \ ++ FIELD32(24, \ ++ 0x1f000000) /* number of bits used in value (i:20, rfmd:22). */ ++#define RFCSR_IF_SELECT \ ++ FIELD32(29, 0x20000000) /* chip to program: 0: rf, 1: if. */ ++#define RFCSR_PLL_LD FIELD32(30, 0x40000000) /* rf pll_ld status. */ ++#define RFCSR_BUSY \ ++ FIELD32(31, 0x80000000) /* 1: asic is busy execute rf programming. */ ++ ++/* ++ * LEDCSR: LED control register. ++ */ ++#define LEDCSR_ON_PERIOD FIELD32(0, 0x000000ff) /* on period, default 70ms. */ ++#define LEDCSR_OFF_PERIOD FIELD32(8, 0x0000ff00) /* off period, default 30ms. */ ++#define LEDCSR_LINK FIELD32(16, 0x00010000) /* 0: linkoff, 1: linkup. */ ++#define LEDCSR_ACTIVITY FIELD32(17, 0x00020000) /* 0: idle, 1: active. */ ++#define LEDCSR_LINK_POLARITY \ ++ FIELD32(18, 0x00040000) /* 0: active low, 1: active high. */ ++#define LEDCSR_ACTIVITY_POLARITY \ ++ FIELD32(19, 0x00080000) /* 0: active low, 1: active high. */ ++#define LEDCSR_LED_DEFAULT \ ++ FIELD32(20, 0x00100000) /* LED state for "enable" 0: ON, 1: OFF. */ ++ ++/* ++ * GPIOCSR: GPIO control register. ++ */ ++#define GPIOCSR_BIT0 FIELD32(0, 0x00000001) ++#define GPIOCSR_BIT1 FIELD32(1, 0x00000002) ++#define GPIOCSR_BIT2 FIELD32(2, 0x00000004) ++#define GPIOCSR_BIT3 FIELD32(3, 0x00000008) ++#define GPIOCSR_BIT4 FIELD32(4, 0x00000010) ++#define GPIOCSR_BIT5 FIELD32(5, 0x00000020) ++#define GPIOCSR_BIT6 FIELD32(6, 0x00000040) ++#define GPIOCSR_BIT7 FIELD32(7, 0x00000080) ++#define GPIOCSR_DIR0 FIELD32(8, 0x00000100) ++#define GPIOCSR_DIR1 FIELD32(9, 0x00000200) ++#define GPIOCSR_DIR2 FIELD32(10, 0x00000400) ++#define GPIOCSR_DIR3 FIELD32(11, 0x00000800) ++#define GPIOCSR_DIR4 FIELD32(12, 0x00001000) ++#define GPIOCSR_DIR5 FIELD32(13, 0x00002000) ++#define GPIOCSR_DIR6 FIELD32(14, 0x00004000) ++#define GPIOCSR_DIR7 FIELD32(15, 0x00008000) ++ ++/* ++ * BCNCSR1: Tx BEACON offset time control register. ++ */ ++#define BCNCSR1_PRELOAD \ ++ FIELD32(0, 0x0000ffff) /* beacon timer offset in units of usec. */ ++#define BCNCSR1_BEACON_CWMIN FIELD32(16, 0x000f0000) /* 2^CwMin. */ ++ ++/* ++ * MACCSR2: TX_PE to RX_PE turn-around time control register ++ */ ++#define MACCSR2_DELAY \ ++ FIELD32(0, \ ++ 0x000000ff) /* RX_PE low width, in units of pci clock cycle. */ ++ ++/* ++ * SECCSR1_RT2509: WEP control register ++ */ ++#define SECCSR1_KICK_ENCRYPT \ ++ FIELD32(0, 0x00000001) /* Kick encryption engine, self-clear. */ ++#define SECCSR1_ONE_SHOT \ ++ FIELD32(1, 0x00000002) /* 0: ring mode, 1: One shot only mode. */ ++#define SECCSR1_DESC_ADDRESS \ ++ FIELD32(2, 0xfffffffc) /* Descriptor physical address of frame. */ ++ ++/* ++ * RF registers ++ */ ++#define RF1_TUNER FIELD32(17, 0x00020000) ++#define RF3_TUNER FIELD32(8, 0x00000100) ++#define RF3_TXPOWER FIELD32(9, 0x00003e00) ++ ++/* ++ * EEPROM content format. ++ * The wordsize of the EEPROM is 16 bits. ++ */ ++ ++/* ++ * EEPROM operation defines. ++ */ ++#define EEPROM_WIDTH_93c46 6 ++#define EEPROM_WIDTH_93c66 8 ++#define EEPROM_WRITE_OPCODE 0x05 ++#define EEPROM_READ_OPCODE 0x06 ++ ++/* ++ * EEPROM antenna. ++ */ ++#define EEPROM_ANTENNA_NUM FIELD16(0, 0x0003) /* Number of antenna's. */ ++#define EEPROM_ANTENNA_TX_DEFAULT \ ++ FIELD16(2, 0x000c) /* Default antenna 0: diversity, 1: A, 2: B. */ ++#define EEPROM_ANTENNA_RX_DEFAULT \ ++ FIELD16(4, 0x0030) /* Default antenna 0: diversity, 1: A, 2: B. */ ++#define EEPROM_ANTENNA_LED_MODE \ ++ FIELD16(6, 0x01c0) /* 0: default, 1: TX/RX activity, */ ++/* 2: Single LED (ignore link), 3: reserved. */ ++#define EEPROM_ANTENNA_DYN_TXAGC \ ++ FIELD16(9, 0x0200) /* Dynamic TX AGC control. */ ++#define EEPROM_ANTENNA_HARDWARE_RADIO \ ++ FIELD16(10, 0x0400) /* 1: Hardware controlled radio. Read GPIO0. */ ++#define EEPROM_ANTENNA_RF_TYPE \ ++ FIELD16(11, 0xf800) /* rf_type of this adapter. */ ++ ++/* ++ * EEPROM geography. ++ */ ++#define EEPROM_GEOGRAPHY_GEO \ ++ FIELD16(8, 0x0f00) /* Default geography setting for device. */ ++ ++/* ++ * EEPROM NIC config. ++ */ ++#define EEPROM_NIC_CARDBUS_ACCEL FIELD16(0, 0x0001) /* 0: enable, 1: disable. */ ++#define EEPROM_NIC_DYN_BBP_TUNE FIELD16(1, 0x0002) /* 0: enable, 1: disable. */ ++#define EEPROM_NIC_CCK_TX_POWER \ ++ FIELD16(2, 0x000c) /* CCK TX power compensation. */ ++ ++/* ++ * EEPROM TX power. ++ */ ++#define EEPROM_TX_POWER1 FIELD16(0, 0x00ff) ++#define EEPROM_TX_POWER2 FIELD16(8, 0xff00) ++ ++/* ++ * EEPROM BBP. ++ */ ++#define EEPROM_BBP_VALUE FIELD16(0, 0x00ff) ++#define EEPROM_BBP_REG_ID FIELD16(8, 0xff00) ++ ++/* ++ * EEPROM VERSION. ++ */ ++#define EEPROM_VERSION_FAE FIELD16(0, 0x00ff) /* FAE release number. */ ++#define EEPROM_VERSION FIELD16(8, 0xff00) ++ ++/* ++ * DMA ring defines and data structures. ++ */ ++ ++/* ++ * Size of a single descriptor. ++ */ ++#define SIZE_DESCRIPTOR 48 ++ ++/* ++ * TX descriptor format for TX, PRIO, ATIM and Beacon Ring. ++ */ ++struct _txd { ++ u32 word0; ++#define TXD_W0_OWNER_NIC FIELD32(0, 0x00000001) ++#define TXD_W0_VALID FIELD32(1, 0x00000002) ++#define TXD_W0_RESULT FIELD32(2, 0x0000001c) /* Set by device. */ ++#define TXD_W0_RETRY_COUNT FIELD32(5, 0x000000e0) /* Set by device. */ ++#define TXD_W0_MORE_FRAG FIELD32(8, 0x00000100) /* Set by device. */ ++#define TXD_W0_ACK FIELD32(9, 0x00000200) ++#define TXD_W0_TIMESTAMP FIELD32(10, 0x00000400) ++#define TXD_W0_OFDM FIELD32(11, 0x00000800) ++#define TXD_W0_CIPHER_OWNER FIELD32(12, 0x00001000) ++#define TXD_W0_IFS FIELD32(13, 0x00006000) ++#define TXD_W0_RETRY_MODE FIELD32(15, 0x00008000) ++#define TXD_W0_DATABYTE_COUNT FIELD32(16, 0x0fff0000) ++#define TXD_W0_CIPHER_ALG FIELD32(29, 0xe0000000) ++ ++ u32 word1; ++#define TXD_W1_BUFFER_ADDRESS FIELD32(0, 0xffffffff) ++ ++ u32 word2; ++#define TXD_W2_IV_OFFSET FIELD32(0, 0x0000003f) ++#define TXD_W2_AIFS FIELD32(6, 0x000000c0) ++#define TXD_W2_CWMIN FIELD32(8, 0x00000f00) ++#define TXD_W2_CWMAX FIELD32(12, 0x0000f000) ++ ++ u32 word3; ++#define TXD_W3_PLCP_SIGNAL FIELD32(0, 0x000000ff) ++#define TXD_W3_PLCP_SERVICE FIELD32(8, 0x0000ff00) ++#define TXD_W3_PLCP_LENGTH_LOW FIELD32(16, 0x00ff0000) ++#define TXD_W3_PLCP_LENGTH_HIGH FIELD32(24, 0xff000000) ++ ++ u32 word4; ++#define TXD_W4_IV FIELD32(0, 0xffffffff) ++ ++ u32 word5; ++#define TXD_W5_EIV FIELD32(0, 0xffffffff) ++ ++ u32 word6; ++#define TXD_W6_KEY FIELD32(0, 0xffffffff) ++ ++ u32 word7; ++#define TXD_W7_KEY FIELD32(0, 0xffffffff) ++ ++ u32 word8; ++#define TXD_W8_KEY FIELD32(0, 0xffffffff) ++ ++ u32 word9; ++#define TXD_W9_KEY FIELD32(0, 0xffffffff) ++ ++ u32 word10; ++#define TXD_W10_RTS FIELD32(0, 0x00000001) ++#define TXD_W10_TX_RATE FIELD32(0, 0x000000fe) /* For module only. */ ++} __attribute__((packed)); ++ ++/* ++ * RX descriptor format for RX Ring. ++ */ ++struct _rxd { ++ u32 word0; ++#define RXD_W0_OWNER_NIC FIELD32(0, 0x00000001) ++#define RXD_W0_UNICAST_TO_ME FIELD32(1, 0x00000002) ++#define RXD_W0_MULTICAST FIELD32(2, 0x00000004) ++#define RXD_W0_BROADCAST FIELD32(3, 0x00000008) ++#define RXD_W0_MY_BSS FIELD32(4, 0x00000010) ++#define RXD_W0_CRC FIELD32(5, 0x00000020) ++#define RXD_W0_OFDM FIELD32(6, 0x00000040) ++#define RXD_W0_PHYSICAL_ERROR FIELD32(7, 0x00000080) ++#define RXD_W0_CIPHER_OWNER FIELD32(8, 0x00000100) ++#define RXD_W0_ICV_ERROR FIELD32(9, 0x00000200) ++#define RXD_W0_IV_OFFSET FIELD32(10, 0x0000fc00) ++#define RXD_W0_DATABYTE_COUNT FIELD32(16, 0x0fff0000) ++#define RXD_W0_CIPHER_ALG FIELD32(29, 0xe0000000) ++ ++ u32 word1; ++#define RXD_W1_BUFFER_ADDRESS FIELD32(0, 0xffffffff) ++ ++ u32 word2; ++#define RXD_W2_BBR0 FIELD32(0, 0x000000ff) ++#define RXD_W2_RSSI FIELD32(8, 0x0000ff00) ++#define RXD_W2_TA FIELD32(16, 0xffff0000) ++ ++ u32 word3; ++#define RXD_W3_TA FIELD32(0, 0xffffffff) ++ ++ u32 word4; ++#define RXD_W4_IV FIELD32(0, 0xffffffff) ++ ++ u32 word5; ++#define RXD_W5_EIV FIELD32(0, 0xffffffff) ++ ++ u32 word6; ++#define RXD_W6_KEY FIELD32(0, 0xffffffff) ++ ++ u32 word7; ++#define RXD_W7_KEY FIELD32(0, 0xffffffff) ++ ++ u32 word8; ++#define RXD_W8_KEY FIELD32(0, 0xffffffff) ++ ++ u32 word9; ++#define RXD_W9_KEY FIELD32(0, 0xffffffff) ++ ++ u32 word10; ++#define RXD_W10_DROP FIELD32(0, 0x00000001) ++} __attribute__((packed)); ++ ++/* ++ * _rt2x00_pci ++ * This is the main structure which contains all variables required to communicate with the PCI device. ++ */ ++struct _rt2x00_pci { ++ /* ++ * PCI device structure. ++ */ ++ struct pci_dev *pci_dev; ++ ++ /* ++ * Chipset identification. ++ */ ++ struct _rt2x00_chip chip; ++ ++ /* ++ * csr_addr ++ * Base address of device registers, all exact register addresses are calculated from this address. ++ */ ++ void __iomem *csr_addr; ++ ++ /* ++ * RF register values for current channel. ++ */ ++ struct _rf_channel channel; ++ ++ /* ++ * EEPROM bus width. ++ */ ++ u8 eeprom_width; ++ ++ u8 __pad; /* For alignment only. */ ++ ++ /* ++ * EEPROM BBP data. ++ */ ++ u16 eeprom[EEPROM_BBP_SIZE]; ++ ++ /* ++ * DMA packet ring. ++ */ ++ struct _data_ring rx; ++ struct _data_ring tx; ++ ++ rtdm_irq_t irq_handle; ++ rtdm_lock_t lock; ++ ++} __attribute__((packed)); ++ ++static int rt2x00_get_rf_value(const struct _rt2x00_chip *chip, ++ const u8 channel, struct _rf_channel *rf_reg) ++{ ++ int index = 0x00; ++ ++ index = rt2x00_get_channel_index(channel); ++ if (index < 0) ++ return -EINVAL; ++ ++ memset(rf_reg, 0x00, sizeof(*rf_reg)); ++ ++ if (rt2x00_rf(chip, RF2522)) { ++ rf_reg->rf1 = 0x00002050; ++ rf_reg->rf3 = 0x00000101; ++ goto update_rf2_1; ++ } ++ if (rt2x00_rf(chip, RF2523)) { ++ rf_reg->rf1 = 0x00022010; ++ rf_reg->rf3 = 0x000e0111; ++ rf_reg->rf4 = 0x00000a1b; ++ goto update_rf2_2; ++ } ++ if (rt2x00_rf(chip, RF2524)) { ++ rf_reg->rf1 = 0x00032020; ++ rf_reg->rf3 = 0x00000101; ++ rf_reg->rf4 = 0x00000a1b; ++ goto update_rf2_2; ++ } ++ if (rt2x00_rf(chip, RF2525)) { ++ rf_reg->rf1 = 0x00022020; ++ rf_reg->rf2 = 0x00080000; ++ rf_reg->rf3 = 0x00060111; ++ rf_reg->rf4 = 0x00000a1b; ++ goto update_rf2_2; ++ } ++ if (rt2x00_rf(chip, RF2525E)) { ++ rf_reg->rf2 = 0x00080000; ++ rf_reg->rf3 = 0x00060111; ++ goto update_rf2_3; ++ } ++ if (rt2x00_rf(chip, RF5222)) { ++ rf_reg->rf3 = 0x00000101; ++ goto update_rf2_3; ++ } ++ ++ return -EINVAL; ++ ++update_rf2_1: /* RF2522. */ ++ rf_reg->rf2 = 0x000c1fda + (index * 0x14); ++ if (channel == 14) ++ rf_reg->rf2 += 0x0000001c; ++ goto exit; ++ ++update_rf2_2: /* RF2523, RF2524, RF2525. */ ++ rf_reg->rf2 |= 0x00000c9e + (index * 0x04); ++ if (rf_reg->rf2 & 0x00000040) ++ rf_reg->rf2 += 0x00000040; ++ if (channel == 14) { ++ rf_reg->rf2 += 0x08; ++ rf_reg->rf4 &= ~0x00000018; ++ } ++ goto exit; ++ ++update_rf2_3: /* RF2525E, RF5222. */ ++ if (OFDM_CHANNEL(channel)) { ++ rf_reg->rf1 = 0x00022020; ++ rf_reg->rf2 |= 0x00001136 + (index * 0x04); ++ if (rf_reg->rf2 & 0x00000040) ++ rf_reg->rf2 += 0x00000040; ++ if (channel == 14) { ++ rf_reg->rf2 += 0x04; ++ rf_reg->rf4 = 0x00000a1b; ++ } else { ++ rf_reg->rf4 = 0x00000a0b; ++ } ++ } else if (UNII_LOW_CHANNEL(channel)) { ++ rf_reg->rf1 = 0x00022010; ++ rf_reg->rf2 = 0x00018896 + (index * 0x04); ++ rf_reg->rf4 = 0x00000a1f; ++ } else if (HIPERLAN2_CHANNEL(channel)) { ++ rf_reg->rf1 = 0x00022010; ++ rf_reg->rf2 = 0x00008802 + (index * 0x04); ++ rf_reg->rf4 = 0x00000a0f; ++ } else if (UNII_HIGH_CHANNEL(channel)) { ++ rf_reg->rf1 = 0x00022020; ++ rf_reg->rf2 = 0x000090a6 + (index * 0x08); ++ rf_reg->rf4 = 0x00000a07; ++ } ++ ++exit: ++ rf_reg->rf1 = cpu_to_le32(rf_reg->rf1); ++ rf_reg->rf2 = cpu_to_le32(rf_reg->rf2); ++ rf_reg->rf3 = cpu_to_le32(rf_reg->rf3); ++ rf_reg->rf4 = cpu_to_le32(rf_reg->rf4); ++ ++ return 0; ++} ++ ++/* ++ * Get txpower value in dBm mathing the requested percentage. ++ */ ++static inline u8 rt2x00_get_txpower(const struct _rt2x00_chip *chip, ++ const u8 tx_power) ++{ ++ return tx_power / 100 * 31; ++ ++ /* ++ if(tx_power <= 3) ++ return 19; ++ else if(tx_power <= 12) ++ return 22; ++ else if(tx_power <= 25) ++ return 25; ++ else if(tx_power <= 50) ++ return 28; ++ else if(tx_power <= 75) ++ return 30; ++ else if(tx_power <= 100) ++ return 31; ++ ++ ERROR("Invalid tx_power.\n"); ++ return 31; ++ */ ++} ++ ++/* ++ * Ring handlers. ++ */ ++static inline int ++rt2x00_pci_alloc_ring(struct _rt2x00_core *core, struct _data_ring *ring, ++ const u8 ring_type, const u16 max_entries, ++ const u16 entry_size, const u16 desc_size) ++{ ++ struct _rt2x00_pci *rt2x00pci = rt2x00_priv(core); ++ ++ rt2x00_init_ring(core, ring, ring_type, max_entries, entry_size, ++ desc_size); ++ ++ ring->data_addr = ++ dma_alloc_coherent(&rt2x00pci->pci_dev->dev, ring->mem_size, ++ &ring->data_dma, GFP_KERNEL); ++ if (!ring->data_addr) ++ return -ENOMEM; ++ ++ memset(ring->data_addr, 0x00, ring->mem_size); ++ ++ return 0; ++} ++ ++static int rt2x00_pci_alloc_rings(struct _rt2x00_core *core) ++{ ++ struct _rt2x00_pci *rt2x00pci = rt2x00_priv(core); ++ ++ if (rt2x00_pci_alloc_ring(core, &rt2x00pci->rx, RING_RX, RX_ENTRIES, ++ DATA_FRAME_SIZE, SIZE_DESCRIPTOR) || ++ rt2x00_pci_alloc_ring(core, &rt2x00pci->tx, RING_TX, TX_ENTRIES, ++ DATA_FRAME_SIZE, SIZE_DESCRIPTOR)) { ++ ERROR("DMA allocation failed.\n"); ++ return -ENOMEM; ++ } ++ ++ return 0; ++} ++ ++static inline void rt2x00_pci_free_ring(struct _data_ring *ring) ++{ ++ struct _rt2x00_pci *rt2x00pci = rt2x00_priv(ring->core); ++ ++ if (ring->data_addr) ++ dma_free_coherent(&rt2x00pci->pci_dev->dev, ring->mem_size, ++ ring->data_addr, ring->data_dma); ++ ring->data_addr = NULL; ++ ++ rt2x00_deinit_ring(ring); ++} ++ ++static void rt2x00_pci_free_rings(struct _rt2x00_core *core) ++{ ++ struct _rt2x00_pci *rt2x00pci = rt2x00_priv(core); ++ ++ rt2x00_pci_free_ring(&rt2x00pci->rx); ++ rt2x00_pci_free_ring(&rt2x00pci->tx); ++} ++ ++/* ++ * Macro's for calculating exact position in data ring. ++ */ ++#define DESC_BASE(__ring) ((void *)((__ring)->data_addr)) ++#define DATA_BASE(__ring) \ ++ ((void *)(DESC_BASE(__ring) + \ ++ ((__ring)->max_entries * (__ring)->desc_size))) ++ ++#define __DESC_ADDR(__ring, __index) \ ++ ((void *)(DESC_BASE(__ring) + ((__index) * (__ring)->desc_size))) ++#define __DATA_ADDR(__ring, __index) \ ++ ((void *)(DATA_BASE(__ring) + ((__index) * (__ring)->entry_size))) ++ ++#define DESC_ADDR(__ring) (__DESC_ADDR(__ring, (__ring)->index)) ++#define DESC_ADDR_DONE(__ring) (__DESC_ADDR(__ring, (__ring)->index_done)) ++ ++#define DATA_ADDR(__ring) (__DATA_ADDR(__ring, (__ring)->index)) ++#define DATA_ADDR_DONE(__ring) (__DATA_ADDR(__ring, (__ring)->index_done)) ++ ++/* ++ * Register access. ++ * All access to the registers will go through rt2x00_register_read and rt2x00_register_write. ++ * BBP and RF register require indirect register access through the register BBPCSR and RFCSR. ++ * The indirect register access work with busy bits, and a read or write function call can fail. ++ * Specific fields within a register can be accessed using the set and get field routines, ++ * these function will handle the requirement of little_endian and big_endian conversions. ++ */ ++#define REGISTER_BUSY_COUNT \ ++ 10 /* Number of retries before failing access BBP & RF indirect register */ ++#define REGISTER_BUSY_DELAY \ ++ 100 /* Delay between each register access retry. (us) */ ++ ++static void rt2x00_register_read(const struct _rt2x00_pci *rt2x00pci, ++ const unsigned long offset, u32 *value) ++{ ++ *value = readl((void *)(rt2x00pci->csr_addr + offset)); ++} ++ ++static void rt2x00_register_multiread(const struct _rt2x00_pci *rt2x00pci, ++ const unsigned long offset, u32 *value, ++ const u16 length) ++{ ++ memcpy_fromio((void *)value, (void *)(rt2x00pci->csr_addr + offset), ++ length); ++} ++ ++static void rt2x00_register_write(const struct _rt2x00_pci *rt2x00pci, ++ const unsigned long offset, const u32 value) ++{ ++ writel(value, (void *)(rt2x00pci->csr_addr + offset)); ++} ++ ++static void rt2x00_register_multiwrite(const struct _rt2x00_pci *rt2x00pci, ++ const unsigned long offset, u32 *value, ++ const u16 length) ++{ ++ memcpy_toio((void *)(rt2x00pci->csr_addr + offset), (void *)value, ++ length); ++} ++ ++static void rt2x00_bbp_regwrite(const struct _rt2x00_pci *rt2x00pci, ++ const u8 reg_id, const u8 value) ++{ ++ u32 reg = 0x00000000; ++ u8 counter = 0x00; ++ ++ for (counter = 0x00; counter < REGISTER_BUSY_COUNT; counter++) { ++ rt2x00_register_read(rt2x00pci, BBPCSR, ®); ++ if (!rt2x00_get_field32(reg, BBPCSR_BUSY)) ++ goto bbp_write; ++ udelay(REGISTER_BUSY_DELAY); ++ } ++ ++ ERROR("BBPCSR register busy. Write failed\n"); ++ return; ++ ++bbp_write: ++ reg = 0x00000000; ++ rt2x00_set_field32(®, BBPCSR_VALUE, value); ++ rt2x00_set_field32(®, BBPCSR_REGNUM, reg_id); ++ rt2x00_set_field32(®, BBPCSR_BUSY, 1); ++ rt2x00_set_field32(®, BBPCSR_WRITE_CONTROL, 1); ++ ++ rt2x00_register_write(rt2x00pci, BBPCSR, reg); ++} ++ ++static void rt2x00_bbp_regread(const struct _rt2x00_pci *rt2x00pci, ++ const u8 reg_id, u8 *value) ++{ ++ u32 reg = 0x00000000; ++ u8 counter = 0x00; ++ ++ /* ++ * We first have to acquire the requested BBP register, ++ * so we write the register id into the BBP register first. ++ */ ++ rt2x00_set_field32(®, BBPCSR_REGNUM, reg_id); ++ rt2x00_set_field32(®, BBPCSR_BUSY, 1); ++ rt2x00_set_field32(®, BBPCSR_WRITE_CONTROL, 0); ++ ++ rt2x00_register_write(rt2x00pci, BBPCSR, reg); ++ ++ for (counter = 0x00; counter < REGISTER_BUSY_COUNT; counter++) { ++ rt2x00_register_read(rt2x00pci, BBPCSR, ®); ++ if (!rt2x00_get_field32(reg, BBPCSR_BUSY)) { ++ *value = rt2x00_get_field32(reg, BBPCSR_VALUE); ++ return; ++ } ++ udelay(REGISTER_BUSY_DELAY); ++ } ++ ++ ERROR("BBPCSR register busy. Read failed\n"); ++ *value = 0xff; ++} ++ ++static void rt2x00_rf_regwrite(const struct _rt2x00_pci *rt2x00pci, ++ const u32 value) ++{ ++ u32 reg = 0x00000000; ++ u8 counter = 0x00; ++ ++ for (counter = 0x00; counter < REGISTER_BUSY_COUNT; counter++) { ++ rt2x00_register_read(rt2x00pci, RFCSR, ®); ++ if (!rt2x00_get_field32(reg, RFCSR_BUSY)) ++ goto rf_write; ++ udelay(REGISTER_BUSY_DELAY); ++ } ++ ++ ERROR("RFCSR register busy. Write failed\n"); ++ return; ++ ++rf_write: ++ reg = value; ++ rt2x00_set_field32(®, RFCSR_NUMBER_OF_BITS, 20); ++ rt2x00_set_field32(®, RFCSR_IF_SELECT, 0); ++ rt2x00_set_field32(®, RFCSR_BUSY, 1); ++ ++ // printk(KERN_INFO "DEBUG: %s:%d: reg=%x\n", __FILE__, __LINE__, reg); ++ ++ rt2x00_register_write(rt2x00pci, RFCSR, reg); ++} ++ ++/* ++ * EEPROM access. ++ * The EEPROM is being accessed by word index. ++ * rt2x00_eeprom_read_word is the main access function that can be called by ++ * the rest of the module. It will take the index number of the eeprom word ++ * and the bus width. ++ */ ++static inline void rt2x00_eeprom_pulse_high(const struct _rt2x00_pci *rt2x00pci, ++ u32 *flags) ++{ ++ rt2x00_set_field32(flags, CSR21_EEPROM_DATA_CLOCK, 1); ++ rt2x00_register_write(rt2x00pci, CSR21, *flags); ++ udelay(1); ++} ++ ++static inline void rt2x00_eeprom_pulse_low(const struct _rt2x00_pci *rt2x00pci, ++ u32 *flags) ++{ ++ rt2x00_set_field32(flags, CSR21_EEPROM_DATA_CLOCK, 0); ++ rt2x00_register_write(rt2x00pci, CSR21, *flags); ++ udelay(1); ++} ++ ++static void rt2x00_eeprom_shift_out_bits(const struct _rt2x00_pci *rt2x00pci, ++ const u16 data, const u16 count) ++{ ++ u32 flags = 0x00000000; ++ u32 mask = 0x0001 << (count - 1); ++ ++ rt2x00_register_read(rt2x00pci, CSR21, &flags); ++ ++ /* ++ * Clear data flags. ++ */ ++ rt2x00_set_field32(&flags, CSR21_EEPROM_DATA_IN, 0); ++ rt2x00_set_field32(&flags, CSR21_EEPROM_DATA_OUT, 0); ++ ++ /* ++ * Start writing all bits. ++ */ ++ do { ++ /* ++ * Only set the data_in flag when we are at the correct bit. ++ */ ++ rt2x00_set_field32(&flags, CSR21_EEPROM_DATA_IN, ++ (data & mask) ? 1 : 0); ++ ++ rt2x00_register_write(rt2x00pci, CSR21, flags); ++ ++ rt2x00_eeprom_pulse_high(rt2x00pci, &flags); ++ rt2x00_eeprom_pulse_low(rt2x00pci, &flags); ++ ++ /* ++ * Shift to next bit. ++ */ ++ mask >>= 1; ++ } while (mask); ++ ++ rt2x00_set_field32(&flags, CSR21_EEPROM_DATA_IN, 0); ++ rt2x00_register_write(rt2x00pci, CSR21, flags); ++} ++ ++static void rt2x00_eeprom_shift_in_bits(const struct _rt2x00_pci *rt2x00pci, ++ u16 *data) ++{ ++ u32 flags = 0x00000000; ++ u8 counter = 0x00; ++ ++ rt2x00_register_read(rt2x00pci, CSR21, &flags); ++ ++ /* ++ * Clear data flags. ++ */ ++ rt2x00_set_field32(&flags, CSR21_EEPROM_DATA_IN, 0); ++ rt2x00_set_field32(&flags, CSR21_EEPROM_DATA_OUT, 0); ++ ++ /* ++ * Start reading all 16 bits. ++ */ ++ for (counter = 0; counter < 16; counter++) { ++ /* ++ * Shift to the next bit. ++ */ ++ *data <<= 1; ++ ++ rt2x00_eeprom_pulse_high(rt2x00pci, &flags); ++ ++ rt2x00_register_read(rt2x00pci, CSR21, &flags); ++ ++ /* ++ * Clear data_in flag and set the data bit to 1 when the data_out flag is set. ++ */ ++ rt2x00_set_field32(&flags, CSR21_EEPROM_DATA_IN, 0); ++ if (rt2x00_get_field32(flags, CSR21_EEPROM_DATA_OUT)) ++ *data |= 1; ++ ++ rt2x00_eeprom_pulse_low(rt2x00pci, &flags); ++ } ++} ++ ++static u16 rt2x00_eeprom_read_word(const struct _rt2x00_pci *rt2x00pci, ++ const u8 word) ++{ ++ u32 flags = 0x00000000; ++ u16 data = 0x0000; ++ ++ /* ++ * Clear all flags, and enable chip select. ++ */ ++ rt2x00_register_read(rt2x00pci, CSR21, &flags); ++ rt2x00_set_field32(&flags, CSR21_EEPROM_DATA_IN, 0); ++ rt2x00_set_field32(&flags, CSR21_EEPROM_DATA_OUT, 0); ++ rt2x00_set_field32(&flags, CSR21_EEPROM_DATA_CLOCK, 0); ++ rt2x00_set_field32(&flags, CSR21_EEPROM_CHIP_SELECT, 1); ++ rt2x00_register_write(rt2x00pci, CSR21, flags); ++ ++ /* ++ * kick a pulse. ++ */ ++ rt2x00_eeprom_pulse_high(rt2x00pci, &flags); ++ rt2x00_eeprom_pulse_low(rt2x00pci, &flags); ++ ++ /* ++ * Select the read opcode and bus_width. ++ */ ++ rt2x00_eeprom_shift_out_bits(rt2x00pci, EEPROM_READ_OPCODE, 3); ++ rt2x00_eeprom_shift_out_bits(rt2x00pci, word, rt2x00pci->eeprom_width); ++ ++ rt2x00_eeprom_shift_in_bits(rt2x00pci, &data); ++ ++ /* ++ * Clear chip_select and data_in flags. ++ */ ++ rt2x00_register_read(rt2x00pci, CSR21, &flags); ++ rt2x00_set_field32(&flags, CSR21_EEPROM_DATA_IN, 0); ++ rt2x00_set_field32(&flags, CSR21_EEPROM_CHIP_SELECT, 0); ++ rt2x00_register_write(rt2x00pci, CSR21, flags); ++ ++ /* ++ * kick a pulse. ++ */ ++ rt2x00_eeprom_pulse_high(rt2x00pci, &flags); ++ rt2x00_eeprom_pulse_low(rt2x00pci, &flags); ++ ++ return data; ++} ++ ++#endif /* RT2500PCI_H */ +--- linux/drivers/xenomai/net/drivers/experimental/Makefile 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/experimental/Makefile 2021-04-07 16:01:27.586633653 +0800 +@@ -0,0 +1,9 @@ ++ccflags-y += -Idrivers/xenomai/net/stack/include ++ ++obj-$(CONFIG_XENO_DRIVERS_NET_DRV_RT2500) += rt2500/ ++ ++obj-$(CONFIG_XENO_DRIVERS_NET_DRV_E1000_NEW) += e1000/ ++ ++obj-$(CONFIG_RTNET_DRV_3C59X) += rt_3c59x.o ++ ++rt_3c59x-y := 3c59x.o +--- linux/drivers/xenomai/net/drivers/experimental/3c59x.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/experimental/3c59x.c 2021-04-07 16:01:27.581633660 +0800 +@@ -0,0 +1,2749 @@ ++#warning ********************************************************************* ++#warning This driver is probably not real-time safe! Under certain conditions ++#warning it can cause interrupt locks of up to 1 second (issue_and_wait). We ++#warning need a rewrite of critical parts, but we are lacking the knowledge ++#warning about the hardware details (e.g. how long does a normal delay take => ++#warning apply this value and throw an error message on timeouts). ++#warning ********************************************************************* ++ ++/* EtherLinkXL.c: A 3Com EtherLink PCI III/XL ethernet driver for linux / RTnet. */ ++/* ++ RTnet porting 2002 by Mathias Koehrer (mathias_koehrer@yahoo.de) ++ -- Support only for PCI boards, EISA stuff ignored... ++ ++ Originally written 1996-1999 by Donald Becker. ++ ++ This software may be used and distributed according to the terms ++ of the GNU General Public License, incorporated herein by reference. ++ ++ This driver is for the 3Com "Vortex" and "Boomerang" series ethercards. ++ Members of the series include Fast EtherLink 3c590/3c592/3c595/3c597 ++ and the EtherLink XL 3c900 and 3c905 cards. ++ ++ Problem reports and questions should be directed to ++ vortex@scyld.com ++ ++ The author may be reached as becker@scyld.com, or C/O ++ Scyld Computing Corporation ++ 410 Severn Ave., Suite 210 ++ Annapolis MD 21403 ++ ++ Linux Kernel Additions: ++ ++ 0.99H+lk0.9 - David S. Miller - softnet, PCI DMA updates ++ 0.99H+lk1.0 - Jeff Garzik ++ Remove compatibility defines for kernel versions < 2.2.x. ++ Update for new 2.3.x module interface ++ LK1.1.2 (March 19, 2000) ++ * New PCI interface (jgarzik) ++ ++ LK1.1.3 25 April 2000, Andrew Morton ++ - Merged with 3c575_cb.c ++ - Don't set RxComplete in boomerang interrupt enable reg ++ - spinlock in vortex_timer to protect mdio functions ++ - disable local interrupts around call to vortex_interrupt in ++ vortex_tx_timeout() (So vortex_interrupt can use spin_lock()) ++ - Select window 3 in vortex_timer()'s write to Wn3_MAC_Ctrl ++ - In vortex_start_xmit(), move the lock to _after_ we've altered ++ vp->cur_tx and vp->tx_full. This defeats the race between ++ vortex_start_xmit() and vortex_interrupt which was identified ++ by Bogdan Costescu. ++ - Merged back support for six new cards from various sources ++ - Set vortex_have_pci if pci_module_init returns zero (fixes cardbus ++ insertion oops) ++ - Tell it that 3c905C has NWAY for 100bT autoneg ++ - Fix handling of SetStatusEnd in 'Too much work..' code, as ++ per 2.3.99's 3c575_cb (Dave Hinds). ++ - Split ISR into two for vortex & boomerang ++ - Fix MOD_INC/DEC races ++ - Handle resource allocation failures. ++ - Fix 3CCFE575CT LED polarity ++ - Make tx_interrupt_mitigation the default ++ ++ LK1.1.4 25 April 2000, Andrew Morton ++ - Add extra TxReset to vortex_up() to fix 575_cb hotplug initialisation probs. ++ - Put vortex_info_tbl into __devinitdata ++ - In the vortex_error StatsFull HACK, disable stats in vp->intr_enable as well ++ as in the hardware. ++ - Increased the loop counter in issue_and_wait from 2,000 to 4,000. ++ ++ LK1.1.5 28 April 2000, andrewm ++ - Added powerpc defines (John Daniel said these work...) ++ - Some extra diagnostics ++ - In vortex_error(), reset the Tx on maxCollisions. Otherwise most ++ chips usually get a Tx timeout. ++ - Added extra_reset module parm ++ - Replaced some inline timer manip with mod_timer ++ (Franois romieu ) ++ - In vortex_up(), don't make Wn3_config initialisation dependent upon has_nway ++ (this came across from 3c575_cb). ++ ++ LK1.1.6 06 Jun 2000, andrewm ++ - Backed out the PPC defines. ++ - Use del_timer_sync(), mod_timer(). ++ - Fix wrapped ulong comparison in boomerang_rx() ++ - Add IS_TORNADO, use it to suppress 3c905C checksum error msg ++ (Donald Becker, I Lee Hetherington ) ++ - Replace union wn3_config with BFINS/BFEXT manipulation for ++ sparc64 (Pete Zaitcev, Peter Jones) ++ - In vortex_error, do_tx_reset and vortex_tx_timeout(Vortex): ++ do a netif_wake_queue() to better recover from errors. (Anders Pedersen, ++ Donald Becker) ++ - Print a warning on out-of-memory (rate limited to 1 per 10 secs) ++ - Added two more Cardbus 575 NICs: 5b57 and 6564 (Paul Wagland) ++ ++ LK1.1.7 2 Jul 2000 andrewm ++ - Better handling of shared IRQs ++ - Reset the transmitter on a Tx reclaim error ++ - Fixed crash under OOM during vortex_open() (Mark Hemment) ++ - Fix Rx cessation problem during OOM (help from Mark Hemment) ++ - The spinlocks around the mdio access were blocking interrupts for 300uS. ++ Fix all this to use spin_lock_bh() within mdio_read/write ++ - Only write to TxFreeThreshold if it's a boomerang - other NICs don't ++ have one. ++ - Added 802.3x MAC-layer flow control support ++ ++ LK1.1.8 13 Aug 2000 andrewm ++ - Ignore request_region() return value - already reserved if Cardbus. ++ - Merged some additional Cardbus flags from Don's 0.99Qk ++ - Some fixes for 3c556 (Fred Maciel) ++ - Fix for EISA initialisation (Jan Rekorajski) ++ - Renamed MII_XCVR_PWR and EEPROM_230 to align with 3c575_cb and D. Becker's drivers ++ - Fixed MII_XCVR_PWR for 3CCFE575CT ++ - Added INVERT_LED_PWR, used it. ++ - Backed out the extra_reset stuff ++ ++ LK1.1.9 12 Sep 2000 andrewm ++ - Backed out the tx_reset_resume flags. It was a no-op. ++ - In vortex_error, don't reset the Tx on txReclaim errors ++ - In vortex_error, don't reset the Tx on maxCollisions errors. ++ Hence backed out all the DownListPtr logic here. ++ - In vortex_error, give Tornado cards a partial TxReset on ++ maxCollisions (David Hinds). Defined MAX_COLLISION_RESET for this. ++ - Redid some driver flags and device names based on pcmcia_cs-3.1.20. ++ - Fixed a bug where, if vp->tx_full is set when the interface ++ is downed, it remains set when the interface is upped. Bad ++ things happen. ++ ++ LK1.1.10 17 Sep 2000 andrewm ++ - Added EEPROM_8BIT for 3c555 (Fred Maciel) ++ - Added experimental support for the 3c556B Laptop Hurricane (Louis Gerbarg) ++ - Add HAS_NWAY to "3c900 Cyclone 10Mbps TPO" ++ ++ LK1.1.11 13 Nov 2000 andrewm ++ - Dump MOD_INC/DEC_USE_COUNT, use SET_MODULE_OWNER ++ ++ LK1.1.12 1 Jan 2001 andrewm (2.4.0-pre1) ++ - Call pci_enable_device before we request our IRQ (Tobias Ringstrom) ++ - Add 3c590 PCI latency timer hack to vortex_probe1 (from 0.99Ra) ++ - Added extended issue_and_wait for the 3c905CX. ++ - Look for an MII on PHY index 24 first (3c905CX oddity). ++ - Add HAS_NWAY to 3cSOHO100-TX (Brett Frankenberger) ++ - Don't free skbs we don't own on oom path in vortex_open(). ++ ++ LK1.1.13 27 Jan 2001 ++ - Added explicit `medialock' flag so we can truly ++ lock the media type down with `options'. ++ - "check ioremap return and some tidbits" (Arnaldo Carvalho de Melo ) ++ - Added and used EEPROM_NORESET for 3c556B PM resumes. ++ - Fixed leakage of vp->rx_ring. ++ - Break out separate HAS_HWCKSM device capability flag. ++ - Kill vp->tx_full (ANK) ++ - Merge zerocopy fragment handling (ANK?) ++ ++ LK1.1.14 15 Feb 2001 ++ - Enable WOL. Can be turned on with `enable_wol' module option. ++ - EISA and PCI initialisation fixes (jgarzik, Manfred Spraul) ++ - If a device's internalconfig register reports it has NWAY, ++ use it, even if autoselect is enabled. ++ ++ LK1.1.15 6 June 2001 akpm ++ - Prevent double counting of received bytes (Lars Christensen) ++ - Add ethtool support (jgarzik) ++ - Add module parm descriptions (Andrzej M. Krzysztofowicz) ++ - Implemented alloc_etherdev() API ++ - Special-case the 'Tx error 82' message. ++ ++ LK1.1.16 18 July 2001 akpm ++ - Make NETIF_F_SG dependent upon nr_free_highpages(), not on CONFIG_HIGHMEM ++ - Lessen verbosity of bootup messages ++ - Fix WOL - use new PM API functions. ++ - Use netif_running() instead of vp->open in suspend/resume. ++ - Don't reset the interface logic on open/close/rmmod. It upsets ++ autonegotiation, and hence DHCP (from 0.99T). ++ - Back out EEPROM_NORESET flag because of the above (we do it for all ++ NICs). ++ - Correct 3c982 identification string ++ - Rename wait_for_completion() to issue_and_wait() to avoid completion.h ++ clash. ++ ++ - See http://www.uow.edu.au/~andrewm/linux/#3c59x-2.3 for more details. ++ - Also see Documentation/networking/vortex.txt ++*/ ++ ++/* ++ * FIXME: This driver _could_ support MTU changing, but doesn't. See Don's hamachi.c implementation ++ * as well as other drivers ++ * ++ * NOTE: If you make 'vortex_debug' a constant (#define vortex_debug 0) the driver shrinks by 2k ++ * due to dead code elimination. There will be some performance benefits from this due to ++ * elimination of all the tests and reduced cache footprint. ++ */ ++ ++ ++#define DRV_NAME "3c59x" ++#define DRV_VERSION "LK1.1.16" ++#define DRV_RELDATE "19 July 2001" ++ ++ ++ ++/* A few values that may be tweaked. */ ++/* Keep the ring sizes a power of two for efficiency. */ ++#define TX_RING_SIZE 16 ++#define RX_RING_SIZE 8 /*** RTnet ***/ ++#define PKT_BUF_SZ 1536 /* Size of each temporary Rx buffer.*/ ++ ++/* "Knobs" that adjust features and parameters. */ ++/* Set the copy breakpoint for the copy-only-tiny-frames scheme. ++ Setting to > 1512 effectively disables this feature. */ ++/*** RTnet ***/ ++/*** RTnet ***/ ++/* Allow setting MTU to a larger size, bypassing the normal ethernet setup. */ ++static const int mtu = 1500; ++/* Maximum events (Rx packets, etc.) to handle at each interrupt. */ ++static int max_interrupt_work = 32; ++/* Tx timeout interval (millisecs) */ ++// *** RTnet *** ++//static int watchdog = 5000; ++// *** RTnet *** ++ ++/* Allow aggregation of Tx interrupts. Saves CPU load at the cost ++ * of possible Tx stalls if the system is blocking interrupts ++ * somewhere else. Undefine this to disable. ++ */ ++#define tx_interrupt_mitigation 1 ++ ++/* Put out somewhat more debugging messages. (0: no msg, 1 minimal .. 6). */ ++#define vortex_debug debug ++#ifdef VORTEX_DEBUG ++static int vortex_debug = VORTEX_DEBUG; ++#else ++static int vortex_debug = 1; ++#endif ++ ++#ifndef __OPTIMIZE__ ++#error You must compile this file with the correct options! ++#error See the last lines of the source file. ++#error You must compile this driver with "-O". ++#endif ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include /* For NR_IRQS only. */ ++#include ++#include ++ ++// *** RTnet *** ++#include ++ ++static int cards = INT_MAX; ++module_param(cards, int, 0444); ++MODULE_PARM_DESC(cards, "number of cards to be supported"); ++// *** RTnet *** ++ ++/* Kernel compatibility defines, some common to David Hinds' PCMCIA package. ++ This is only in the support-all-kernels source code. */ ++ ++#define RUN_AT(x) (jiffies + (x)) ++ ++#include ++ ++// *** RTnet - no power management *** ++#undef pci_set_power_state ++#define pci_set_power_state null_set_power_state ++static inline int null_set_power_state(struct pci_dev *dev, int state) ++{ ++ return 0; ++} ++// *** RTnet *** ++ ++ ++static char version[] = ++ DRV_NAME " for RTnet : Donald Becker and others. www.scyld.com/network/vortex.html\n"; ++ ++MODULE_AUTHOR("Donald Becker "); ++MODULE_DESCRIPTION("3Com 3c59x/3c9xx ethernet driver for RTnet " ++ DRV_VERSION " " DRV_RELDATE); ++MODULE_LICENSE("GPL"); ++ ++/* Operational parameter that usually are not changed. */ ++ ++/* The Vortex size is twice that of the original EtherLinkIII series: the ++ runtime register window, window 1, is now always mapped in. ++ The Boomerang size is twice as large as the Vortex -- it has additional ++ bus master control registers. */ ++#define VORTEX_TOTAL_SIZE 0x20 ++#define BOOMERANG_TOTAL_SIZE 0x40 ++ ++/* Set iff a MII transceiver on any interface requires mdio preamble. ++ This only set with the original DP83840 on older 3c905 boards, so the extra ++ code size of a per-interface flag is not worthwhile. */ ++static char mii_preamble_required; ++ ++#define PFX DRV_NAME ": " ++ ++ ++ ++/* ++ Theory of Operation ++ ++ I. Board Compatibility ++ ++ This device driver is designed for the 3Com FastEtherLink and FastEtherLink ++ XL, 3Com's PCI to 10/100baseT adapters. It also works with the 10Mbs ++ versions of the FastEtherLink cards. The supported product IDs are ++ 3c590, 3c592, 3c595, 3c597, 3c900, 3c905 ++ ++ The related ISA 3c515 is supported with a separate driver, 3c515.c, included ++ with the kernel source or available from ++ cesdis.gsfc.nasa.gov:/pub/linux/drivers/3c515.html ++ ++ II. Board-specific settings ++ ++ PCI bus devices are configured by the system at boot time, so no jumpers ++ need to be set on the board. The system BIOS should be set to assign the ++ PCI INTA signal to an otherwise unused system IRQ line. ++ ++ The EEPROM settings for media type and forced-full-duplex are observed. ++ The EEPROM media type should be left at the default "autoselect" unless using ++ 10base2 or AUI connections which cannot be reliably detected. ++ ++ III. Driver operation ++ ++ The 3c59x series use an interface that's very similar to the previous 3c5x9 ++ series. The primary interface is two programmed-I/O FIFOs, with an ++ alternate single-contiguous-region bus-master transfer (see next). ++ ++ The 3c900 "Boomerang" series uses a full-bus-master interface with separate ++ lists of transmit and receive descriptors, similar to the AMD LANCE/PCnet, ++ DEC Tulip and Intel Speedo3. The first chip version retains a compatible ++ programmed-I/O interface that has been removed in 'B' and subsequent board ++ revisions. ++ ++ One extension that is advertised in a very large font is that the adapters ++ are capable of being bus masters. On the Vortex chip this capability was ++ only for a single contiguous region making it far less useful than the full ++ bus master capability. There is a significant performance impact of taking ++ an extra interrupt or polling for the completion of each transfer, as well ++ as difficulty sharing the single transfer engine between the transmit and ++ receive threads. Using DMA transfers is a win only with large blocks or ++ with the flawed versions of the Intel Orion motherboard PCI controller. ++ ++ The Boomerang chip's full-bus-master interface is useful, and has the ++ currently-unused advantages over other similar chips that queued transmit ++ packets may be reordered and receive buffer groups are associated with a ++ single frame. ++ ++ With full-bus-master support, this driver uses a "RX_COPYBREAK" scheme. ++ Rather than a fixed intermediate receive buffer, this scheme allocates ++ full-sized skbuffs as receive buffers. The value RX_COPYBREAK is used as ++ the copying breakpoint: it is chosen to trade-off the memory wasted by ++ passing the full-sized skbuff to the queue layer for all frames vs. the ++ copying cost of copying a frame to a correctly-sized skbuff. ++ ++ IIIC. Synchronization ++ The driver runs as two independent, single-threaded flows of control. One ++ is the send-packet routine, which enforces single-threaded use by the ++ dev->tbusy flag. The other thread is the interrupt handler, which is single ++ threaded by the hardware and other software. ++ ++ IV. Notes ++ ++ Thanks to Cameron Spitzer and Terry Murphy of 3Com for providing development ++ 3c590, 3c595, and 3c900 boards. ++ The name "Vortex" is the internal 3Com project name for the PCI ASIC, and ++ the EISA version is called "Demon". According to Terry these names come ++ from rides at the local amusement park. ++ ++ The new chips support both ethernet (1.5K) and FDDI (4.5K) packet sizes! ++ This driver only supports ethernet packets because of the skbuff allocation ++ limit of 4K. ++*/ ++ ++/* This table drives the PCI probe routines. It's mostly boilerplate in all ++ of the drivers, and will likely be provided by some future kernel. ++*/ ++enum pci_flags_bit { ++ PCI_USES_IO=1, PCI_USES_MEM=2, PCI_USES_MASTER=4, ++ PCI_ADDR0=0x10<<0, PCI_ADDR1=0x10<<1, PCI_ADDR2=0x10<<2, PCI_ADDR3=0x10<<3, ++}; ++ ++enum { IS_VORTEX=1, IS_BOOMERANG=2, IS_CYCLONE=4, IS_TORNADO=8, ++ EEPROM_8BIT=0x10, /* AKPM: Uses 0x230 as the base bitmaps for EEPROM reads */ ++ HAS_PWR_CTRL=0x20, HAS_MII=0x40, HAS_NWAY=0x80, HAS_CB_FNS=0x100, ++ INVERT_MII_PWR=0x200, INVERT_LED_PWR=0x400, MAX_COLLISION_RESET=0x800, ++ EEPROM_OFFSET=0x1000, HAS_HWCKSM=0x2000 }; ++ ++enum vortex_chips { ++ CH_3C590 = 0, ++ CH_3C592, ++ CH_3C597, ++ CH_3C595_1, ++ CH_3C595_2, ++ ++ CH_3C595_3, ++ CH_3C900_1, ++ CH_3C900_2, ++ CH_3C900_3, ++ CH_3C900_4, ++ ++ CH_3C900_5, ++ CH_3C900B_FL, ++ CH_3C905_1, ++ CH_3C905_2, ++ CH_3C905B_1, ++ ++ CH_3C905B_2, ++ CH_3C905B_FX, ++ CH_3C905C, ++ CH_3C980, ++ CH_3C9805, ++ ++ CH_3CSOHO100_TX, ++ CH_3C555, ++ CH_3C556, ++ CH_3C556B, ++ CH_3C575, ++ ++ CH_3C575_1, ++ CH_3CCFE575, ++ CH_3CCFE575CT, ++ CH_3CCFE656, ++ CH_3CCFEM656, ++ ++ CH_3CCFEM656_1, ++ CH_3C450, ++}; ++ ++ ++/* note: this array directly indexed by above enums, and MUST ++ * be kept in sync with both the enums above, and the PCI device ++ * table below ++ */ ++static struct vortex_chip_info { ++ const char *name; ++ int flags; ++ int drv_flags; ++ int io_size; ++} vortex_info_tbl[] = { ++#define EISA_TBL_OFFSET 0 /* Offset of this entry for vortex_eisa_init */ ++ {"3c590 Vortex 10Mbps", ++ PCI_USES_IO|PCI_USES_MASTER, IS_VORTEX, 32, }, ++ {"3c592 EISA 10Mbps Demon/Vortex", /* AKPM: from Don's 3c59x_cb.c 0.49H */ ++ PCI_USES_IO|PCI_USES_MASTER, IS_VORTEX, 32, }, ++ {"3c597 EISA Fast Demon/Vortex", /* AKPM: from Don's 3c59x_cb.c 0.49H */ ++ PCI_USES_IO|PCI_USES_MASTER, IS_VORTEX, 32, }, ++ {"3c595 Vortex 100baseTx", ++ PCI_USES_IO|PCI_USES_MASTER, IS_VORTEX, 32, }, ++ {"3c595 Vortex 100baseT4", ++ PCI_USES_IO|PCI_USES_MASTER, IS_VORTEX, 32, }, ++ ++ {"3c595 Vortex 100base-MII", ++ PCI_USES_IO|PCI_USES_MASTER, IS_VORTEX, 32, }, ++ {"3c900 Boomerang 10baseT", ++ PCI_USES_IO|PCI_USES_MASTER, IS_BOOMERANG, 64, }, ++ {"3c900 Boomerang 10Mbps Combo", ++ PCI_USES_IO|PCI_USES_MASTER, IS_BOOMERANG, 64, }, ++ {"3c900 Cyclone 10Mbps TPO", /* AKPM: from Don's 0.99M */ ++ PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_NWAY|HAS_HWCKSM, 128, }, ++ {"3c900 Cyclone 10Mbps Combo", ++ PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_HWCKSM, 128, }, ++ ++ {"3c900 Cyclone 10Mbps TPC", /* AKPM: from Don's 0.99M */ ++ PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_HWCKSM, 128, }, ++ {"3c900B-FL Cyclone 10base-FL", ++ PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_HWCKSM, 128, }, ++ {"3c905 Boomerang 100baseTx", ++ PCI_USES_IO|PCI_USES_MASTER, IS_BOOMERANG|HAS_MII, 64, }, ++ {"3c905 Boomerang 100baseT4", ++ PCI_USES_IO|PCI_USES_MASTER, IS_BOOMERANG|HAS_MII, 64, }, ++ {"3c905B Cyclone 100baseTx", ++ PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_NWAY|HAS_HWCKSM, 128, }, ++ ++ {"3c905B Cyclone 10/100/BNC", ++ PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_NWAY|HAS_HWCKSM, 128, }, ++ {"3c905B-FX Cyclone 100baseFx", ++ PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_HWCKSM, 128, }, ++ {"3c905C Tornado", ++ PCI_USES_IO|PCI_USES_MASTER, IS_TORNADO|HAS_NWAY|HAS_HWCKSM, 128, }, ++ {"3c980 Cyclone", ++ PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_HWCKSM, 128, }, ++ {"3c982 Dual Port Server Cyclone", ++ PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_HWCKSM, 128, }, ++ ++ {"3cSOHO100-TX Hurricane", ++ PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_NWAY|HAS_HWCKSM, 128, }, ++ {"3c555 Laptop Hurricane", ++ PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|EEPROM_8BIT|HAS_HWCKSM, 128, }, ++ {"3c556 Laptop Tornado", ++ PCI_USES_IO|PCI_USES_MASTER, IS_TORNADO|HAS_NWAY|EEPROM_8BIT|HAS_CB_FNS|INVERT_MII_PWR| ++ HAS_HWCKSM, 128, }, ++ {"3c556B Laptop Hurricane", ++ PCI_USES_IO|PCI_USES_MASTER, IS_TORNADO|HAS_NWAY|EEPROM_OFFSET|HAS_CB_FNS|INVERT_MII_PWR| ++ HAS_HWCKSM, 128, }, ++ {"3c575 [Megahertz] 10/100 LAN CardBus", ++ PCI_USES_IO|PCI_USES_MASTER, IS_BOOMERANG|HAS_MII|EEPROM_8BIT, 128, }, ++ ++ {"3c575 Boomerang CardBus", ++ PCI_USES_IO|PCI_USES_MASTER, IS_BOOMERANG|HAS_MII|EEPROM_8BIT, 128, }, ++ {"3CCFE575BT Cyclone CardBus", ++ PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_NWAY|HAS_CB_FNS|EEPROM_8BIT| ++ INVERT_LED_PWR|HAS_HWCKSM, 128, }, ++ {"3CCFE575CT Tornado CardBus", ++ PCI_USES_IO|PCI_USES_MASTER, IS_TORNADO|HAS_NWAY|HAS_CB_FNS|EEPROM_8BIT|INVERT_MII_PWR| ++ MAX_COLLISION_RESET|HAS_HWCKSM, 128, }, ++ {"3CCFE656 Cyclone CardBus", ++ PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_NWAY|HAS_CB_FNS|EEPROM_8BIT|INVERT_MII_PWR| ++ INVERT_LED_PWR|HAS_HWCKSM, 128, }, ++ {"3CCFEM656B Cyclone+Winmodem CardBus", ++ PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_NWAY|HAS_CB_FNS|EEPROM_8BIT|INVERT_MII_PWR| ++ INVERT_LED_PWR|HAS_HWCKSM, 128, }, ++ ++ {"3CXFEM656C Tornado+Winmodem CardBus", /* From pcmcia-cs-3.1.5 */ ++ PCI_USES_IO|PCI_USES_MASTER, IS_TORNADO|HAS_NWAY|HAS_CB_FNS|EEPROM_8BIT|INVERT_MII_PWR| ++ MAX_COLLISION_RESET|HAS_HWCKSM, 128, }, ++ {"3c450 HomePNA Tornado", /* AKPM: from Don's 0.99Q */ ++ PCI_USES_IO|PCI_USES_MASTER, IS_TORNADO|HAS_NWAY|HAS_HWCKSM, 128, }, ++ {0,}, /* 0 terminated list. */ ++}; ++ ++ ++static struct pci_device_id vortex_pci_tbl[] = { ++ { 0x10B7, 0x5900, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C590 }, ++ { 0x10B7, 0x5920, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C592 }, ++ { 0x10B7, 0x5970, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C597 }, ++ { 0x10B7, 0x5950, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C595_1 }, ++ { 0x10B7, 0x5951, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C595_2 }, ++ ++ { 0x10B7, 0x5952, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C595_3 }, ++ { 0x10B7, 0x9000, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C900_1 }, ++ { 0x10B7, 0x9001, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C900_2 }, ++ { 0x10B7, 0x9004, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C900_3 }, ++ { 0x10B7, 0x9005, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C900_4 }, ++ ++ { 0x10B7, 0x9006, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C900_5 }, ++ { 0x10B7, 0x900A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C900B_FL }, ++ { 0x10B7, 0x9050, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C905_1 }, ++ { 0x10B7, 0x9051, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C905_2 }, ++ { 0x10B7, 0x9055, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C905B_1 }, ++ ++ { 0x10B7, 0x9058, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C905B_2 }, ++ { 0x10B7, 0x905A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C905B_FX }, ++ { 0x10B7, 0x9200, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C905C }, ++ { 0x10B7, 0x9800, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C980 }, ++ { 0x10B7, 0x9805, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C9805 }, ++ ++ { 0x10B7, 0x7646, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3CSOHO100_TX }, ++ { 0x10B7, 0x5055, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C555 }, ++ { 0x10B7, 0x6055, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C556 }, ++ { 0x10B7, 0x6056, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C556B }, ++ { 0x10B7, 0x5b57, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C575 }, ++ ++ { 0x10B7, 0x5057, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C575_1 }, ++ { 0x10B7, 0x5157, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3CCFE575 }, ++ { 0x10B7, 0x5257, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3CCFE575CT }, ++ { 0x10B7, 0x6560, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3CCFE656 }, ++ { 0x10B7, 0x6562, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3CCFEM656 }, ++ ++ { 0x10B7, 0x6564, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3CCFEM656_1 }, ++ { 0x10B7, 0x4500, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C450 }, ++ {0,} /* 0 terminated list. */ ++}; ++MODULE_DEVICE_TABLE(pci, vortex_pci_tbl); ++ ++ ++/* Operational definitions. ++ These are not used by other compilation units and thus are not ++ exported in a ".h" file. ++ ++ First the windows. There are eight register windows, with the command ++ and status registers available in each. ++*/ ++#define EL3WINDOW(win_num) outw(SelectWindow + (win_num), ioaddr + EL3_CMD) ++#define EL3_CMD 0x0e ++#define EL3_STATUS 0x0e ++ ++/* The top five bits written to EL3_CMD are a command, the lower ++ 11 bits are the parameter, if applicable. ++ Note that 11 parameters bits was fine for ethernet, but the new chip ++ can handle FDDI length frames (~4500 octets) and now parameters count ++ 32-bit 'Dwords' rather than octets. */ ++ ++enum vortex_cmd { ++ TotalReset = 0<<11, SelectWindow = 1<<11, StartCoax = 2<<11, ++ RxDisable = 3<<11, RxEnable = 4<<11, RxReset = 5<<11, ++ UpStall = 6<<11, UpUnstall = (6<<11)+1, ++ DownStall = (6<<11)+2, DownUnstall = (6<<11)+3, ++ RxDiscard = 8<<11, TxEnable = 9<<11, TxDisable = 10<<11, TxReset = 11<<11, ++ FakeIntr = 12<<11, AckIntr = 13<<11, SetIntrEnb = 14<<11, ++ SetStatusEnb = 15<<11, SetRxFilter = 16<<11, SetRxThreshold = 17<<11, ++ SetTxThreshold = 18<<11, SetTxStart = 19<<11, ++ StartDMAUp = 20<<11, StartDMADown = (20<<11)+1, StatsEnable = 21<<11, ++ StatsDisable = 22<<11, StopCoax = 23<<11, SetFilterBit = 25<<11,}; ++ ++/* The SetRxFilter command accepts the following classes: */ ++enum RxFilter { ++ RxStation = 1, RxMulticast = 2, RxBroadcast = 4, RxProm = 8 }; ++ ++/* Bits in the general status register. */ ++enum vortex_status { ++ IntLatch = 0x0001, HostError = 0x0002, TxComplete = 0x0004, ++ TxAvailable = 0x0008, RxComplete = 0x0010, RxEarly = 0x0020, ++ IntReq = 0x0040, StatsFull = 0x0080, ++ DMADone = 1<<8, DownComplete = 1<<9, UpComplete = 1<<10, ++ DMAInProgress = 1<<11, /* DMA controller is still busy.*/ ++ CmdInProgress = 1<<12, /* EL3_CMD is still busy.*/ ++}; ++ ++/* Register window 1 offsets, the window used in normal operation. ++ On the Vortex this window is always mapped at offsets 0x10-0x1f. */ ++enum Window1 { ++ TX_FIFO = 0x10, RX_FIFO = 0x10, RxErrors = 0x14, ++ RxStatus = 0x18, Timer=0x1A, TxStatus = 0x1B, ++ TxFree = 0x1C, /* Remaining free bytes in Tx buffer. */ ++}; ++enum Window0 { ++ Wn0EepromCmd = 10, /* Window 0: EEPROM command register. */ ++ Wn0EepromData = 12, /* Window 0: EEPROM results register. */ ++ IntrStatus=0x0E, /* Valid in all windows. */ ++}; ++enum Win0_EEPROM_bits { ++ EEPROM_Read = 0x80, EEPROM_WRITE = 0x40, EEPROM_ERASE = 0xC0, ++ EEPROM_EWENB = 0x30, /* Enable erasing/writing for 10 msec. */ ++ EEPROM_EWDIS = 0x00, /* Disable EWENB before 10 msec timeout. */ ++}; ++/* EEPROM locations. */ ++enum eeprom_offset { ++ PhysAddr01=0, PhysAddr23=1, PhysAddr45=2, ModelID=3, ++ EtherLink3ID=7, IFXcvrIO=8, IRQLine=9, ++ NodeAddr01=10, NodeAddr23=11, NodeAddr45=12, ++ DriverTune=13, Checksum=15}; ++ ++enum Window2 { /* Window 2. */ ++ Wn2_ResetOptions=12, ++}; ++enum Window3 { /* Window 3: MAC/config bits. */ ++ Wn3_Config=0, Wn3_MAC_Ctrl=6, Wn3_Options=8, ++}; ++ ++#define BFEXT(value, offset, bitcount) \ ++ ((((unsigned long)(value)) >> (offset)) & ((1 << (bitcount)) - 1)) ++ ++#define BFINS(lhs, rhs, offset, bitcount) \ ++ (((lhs) & ~((((1 << (bitcount)) - 1)) << (offset))) | \ ++ (((rhs) & ((1 << (bitcount)) - 1)) << (offset))) ++ ++#define RAM_SIZE(v) BFEXT(v, 0, 3) ++#define RAM_WIDTH(v) BFEXT(v, 3, 1) ++#define RAM_SPEED(v) BFEXT(v, 4, 2) ++#define ROM_SIZE(v) BFEXT(v, 6, 2) ++#define RAM_SPLIT(v) BFEXT(v, 16, 2) ++#define XCVR(v) BFEXT(v, 20, 4) ++#define AUTOSELECT(v) BFEXT(v, 24, 1) ++ ++enum Window4 { /* Window 4: Xcvr/media bits. */ ++ Wn4_FIFODiag = 4, Wn4_NetDiag = 6, Wn4_PhysicalMgmt=8, Wn4_Media = 10, ++}; ++enum Win4_Media_bits { ++ Media_SQE = 0x0008, /* Enable SQE error counting for AUI. */ ++ Media_10TP = 0x00C0, /* Enable link beat and jabber for 10baseT. */ ++ Media_Lnk = 0x0080, /* Enable just link beat for 100TX/100FX. */ ++ Media_LnkBeat = 0x0800, ++}; ++enum Window7 { /* Window 7: Bus Master control. */ ++ Wn7_MasterAddr = 0, Wn7_MasterLen = 6, Wn7_MasterStatus = 12, ++}; ++/* Boomerang bus master control registers. */ ++enum MasterCtrl { ++ PktStatus = 0x20, DownListPtr = 0x24, FragAddr = 0x28, FragLen = 0x2c, ++ TxFreeThreshold = 0x2f, UpPktStatus = 0x30, UpListPtr = 0x38, ++}; ++ ++/* The Rx and Tx descriptor lists. ++ Caution Alpha hackers: these types are 32 bits! Note also the 8 byte ++ alignment contraint on tx_ring[] and rx_ring[]. */ ++#define LAST_FRAG 0x80000000 /* Last Addr/Len pair in descriptor. */ ++#define DN_COMPLETE 0x00010000 /* This packet has been downloaded */ ++struct boom_rx_desc { ++ u32 next; /* Last entry points to 0. */ ++ s32 status; ++ u32 addr; /* Up to 63 addr/len pairs possible. */ ++ s32 length; /* Set LAST_FRAG to indicate last pair. */ ++}; ++/* Values for the Rx status entry. */ ++enum rx_desc_status { ++ RxDComplete=0x00008000, RxDError=0x4000, ++ /* See boomerang_rx() for actual error bits */ ++ IPChksumErr=1<<25, TCPChksumErr=1<<26, UDPChksumErr=1<<27, ++ IPChksumValid=1<<29, TCPChksumValid=1<<30, UDPChksumValid=1<<31, ++}; ++ ++// *** RTnet *** ++//#ifdef MAX_SKB_FRAGS ++//#define DO_ZEROCOPY 1 ++//#else ++#define DO_ZEROCOPY 0 ++//#endif ++ ++struct boom_tx_desc { ++ u32 next; /* Last entry points to 0. */ ++ s32 status; /* bits 0:12 length, others see below. */ ++#if DO_ZEROCOPY ++ struct { ++ u32 addr; ++ s32 length; ++ } frag[1+MAX_SKB_FRAGS]; ++#else ++ u32 addr; ++ s32 length; ++#endif ++}; ++ ++/* Values for the Tx status entry. */ ++enum tx_desc_status { ++ CRCDisable=0x2000, TxDComplete=0x8000, ++ AddIPChksum=0x02000000, AddTCPChksum=0x04000000, AddUDPChksum=0x08000000, ++ TxIntrUploaded=0x80000000, /* IRQ when in FIFO, but maybe not sent. */ ++}; ++ ++/* Chip features we care about in vp->capabilities, read from the EEPROM. */ ++enum ChipCaps { CapBusMaster=0x20, CapPwrMgmt=0x2000 }; ++ ++struct vortex_private { ++ /* The Rx and Tx rings should be quad-word-aligned. */ ++ struct boom_rx_desc* rx_ring; ++ struct boom_tx_desc* tx_ring; ++ dma_addr_t rx_ring_dma; ++ dma_addr_t tx_ring_dma; ++ /* The addresses of transmit- and receive-in-place skbuffs. */ ++ ++ // *** RTnet *** ++ struct rtskb *tx_skbuff[TX_RING_SIZE]; ++ struct rtskb *rx_skbuff[RX_RING_SIZE]; ++ // *** RTnet *** ++ ++ struct rtnet_device *next_module; /* NULL if PCI device */ ++ unsigned int cur_rx, cur_tx; /* The next free ring entry */ ++ unsigned int dirty_rx, dirty_tx; /* The ring entries to be free()ed. */ ++ struct net_device_stats stats; ++ struct rtskb *tx_skb; /* Packet being eaten by bus master ctrl. */ ++ dma_addr_t tx_skb_dma; /* Allocated DMA address for bus master ctrl DMA. */ ++ ++ /* PCI configuration space information. */ ++ struct pci_dev *pdev; ++ char *cb_fn_base; /* CardBus function status addr space. */ ++ ++ /* Some values here only for performance evaluation and path-coverage */ ++ int rx_nocopy, rx_copy, queued_packet, rx_csumhits; ++ int card_idx; ++ ++ /* The remainder are related to chip state, mostly media selection. */ ++ struct timer_list timer; /* Media selection timer. */ ++ struct timer_list rx_oom_timer; /* Rx skb allocation retry timer */ ++ int options; /* User-settable misc. driver options. */ ++ unsigned int media_override:4, /* Passed-in media type. */ ++ default_media:4, /* Read from the EEPROM/Wn3_Config. */ ++ full_duplex:1, force_fd:1, autoselect:1, ++ bus_master:1, /* Vortex can only do a fragment bus-m. */ ++ full_bus_master_tx:1, full_bus_master_rx:2, /* Boomerang */ ++ flow_ctrl:1, /* Use 802.3x flow control (PAUSE only) */ ++ partner_flow_ctrl:1, /* Partner supports flow control */ ++ has_nway:1, ++ enable_wol:1, /* Wake-on-LAN is enabled */ ++ pm_state_valid:1, /* power_state[] has sane contents */ ++ open:1, ++ medialock:1, ++ must_free_region:1; /* Flag: if zero, Cardbus owns the I/O region */ ++ int drv_flags; ++ u16 status_enable; ++ u16 intr_enable; ++ u16 available_media; /* From Wn3_Options. */ ++ u16 capabilities, info1, info2; /* Various, from EEPROM. */ ++ u16 advertising; /* NWay media advertisement */ ++ unsigned char phys[2]; /* MII device addresses. */ ++ u16 deferred; /* Resend these interrupts when we ++ * bale from the ISR */ ++ u16 io_size; /* Size of PCI region (for release_region) */ ++ rtdm_lock_t lock; /* Serialise access to device & its vortex_private */ ++ spinlock_t mdio_lock; /* Serialise access to mdio hardware */ ++ u32 power_state[16]; ++ rtdm_irq_t irq_handle; ++}; ++ ++/* The action to take with a media selection timer tick. ++ Note that we deviate from the 3Com order by checking 10base2 before AUI. ++*/ ++enum xcvr_types { ++ XCVR_10baseT=0, XCVR_AUI, XCVR_10baseTOnly, XCVR_10base2, XCVR_100baseTx, ++ XCVR_100baseFx, XCVR_MII=6, XCVR_NWAY=8, XCVR_ExtMII=9, XCVR_Default=10, ++}; ++ ++static struct media_table { ++ char *name; ++ unsigned int media_bits:16, /* Bits to set in Wn4_Media register. */ ++ mask:8, /* The transceiver-present bit in Wn3_Config.*/ ++ next:8; /* The media type to try next. */ ++ int wait; /* Time before we check media status. */ ++} media_tbl[] = { ++ { "10baseT", Media_10TP,0x08, XCVR_10base2, (14*HZ)/10}, ++ { "10Mbs AUI", Media_SQE, 0x20, XCVR_Default, (1*HZ)/10}, ++ { "undefined", 0, 0x80, XCVR_10baseT, 10000}, ++ { "10base2", 0, 0x10, XCVR_AUI, (1*HZ)/10}, ++ { "100baseTX", Media_Lnk, 0x02, XCVR_100baseFx, (14*HZ)/10}, ++ { "100baseFX", Media_Lnk, 0x04, XCVR_MII, (14*HZ)/10}, ++ { "MII", 0, 0x41, XCVR_10baseT, 3*HZ }, ++ { "undefined", 0, 0x01, XCVR_10baseT, 10000}, ++ { "Autonegotiate", 0, 0x41, XCVR_10baseT, 3*HZ}, ++ { "MII-External", 0, 0x41, XCVR_10baseT, 3*HZ }, ++ { "Default", 0, 0xFF, XCVR_10baseT, 10000}, ++}; ++ ++static int vortex_probe1(struct pci_dev *pdev, long ioaddr, int irq, ++ int chip_idx, int card_idx); ++static void vortex_up(struct rtnet_device *rtdev); ++static void vortex_down(struct rtnet_device *rtdev); ++static int vortex_open(struct rtnet_device *rtdev); ++static void mdio_sync(long ioaddr, int bits); ++static int mdio_read(struct rtnet_device *rtdev, int phy_id, int location); ++static void mdio_write(struct rtnet_device *vp, int phy_id, int location, int value); ++ ++// *** RTnet *** ++//static void vortex_timer(unsigned long arg); ++//static void rx_oom_timer(unsigned long arg); ++// *** RTnet *** ++ ++static int vortex_start_xmit(struct rtskb *skb, struct rtnet_device *rtdev); ++static int boomerang_start_xmit(struct rtskb *skb, struct rtnet_device *rtdev); ++static int vortex_rx(struct rtnet_device *rtdev, int *packets, nanosecs_abs_t *time_stamp); ++static int boomerang_rx(struct rtnet_device *rtdev, int *packets, nanosecs_abs_t *time_stamp); ++static int vortex_interrupt(rtdm_irq_t *irq_handle); ++static int boomerang_interrupt(rtdm_irq_t *irq_handle); ++static int vortex_close(struct rtnet_device *rtdev); ++static void dump_tx_ring(struct rtnet_device *rtdev); ++ ++static void update_stats(long ioaddr, struct rtnet_device *dev); ++static struct net_device_stats *vortex_get_stats(struct rtnet_device *rtdev); ++ ++static void set_rx_mode(struct rtnet_device *rtdev); ++ ++// *** RTnet *** ++//static int vortex_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); ++//static void vortex_tx_timeout(struct net_device *dev); ++// *** RTnet *** ++ ++static void acpi_set_WOL(struct rtnet_device *rtdev); ++ ++/* This driver uses 'options' to pass the media type, full-duplex flag, etc. */ ++/* Option count limit only -- unlimited interfaces are supported. */ ++#define MAX_UNITS 8 ++static int options[MAX_UNITS] = { -1, -1, -1, -1, -1, -1, -1, -1,}; ++static int full_duplex[MAX_UNITS] = {-1, -1, -1, -1, -1, -1, -1, -1}; ++static int hw_checksums[MAX_UNITS] = {-1, -1, -1, -1, -1, -1, -1, -1}; ++static int flow_ctrl[MAX_UNITS] = {-1, -1, -1, -1, -1, -1, -1, -1}; ++static int enable_wol[MAX_UNITS] = {-1, -1, -1, -1, -1, -1, -1, -1}; ++ ++module_param(debug, int, 0444); ++module_param_array(options, int, NULL, 0444); ++module_param_array(full_duplex, int, NULL, 0444); ++module_param_array(hw_checksums, int, NULL, 0444); ++module_param_array(flow_ctrl, int, NULL, 0444); ++module_param_array(enable_wol, int, NULL, 0444); ++/*** RTnet *** ++ MODULE_PARM(rx_copybreak, "i"); ++ *** RTnet ***/ ++module_param(max_interrupt_work, int, 0444); ++/*** RTnet *** ++ MODULE_PARM(compaq_ioaddr, "i"); ++ MODULE_PARM(compaq_irq, "i"); ++ MODULE_PARM(compaq_device_id, "i"); ++ MODULE_PARM(watchdog, "i"); ++ *** RTnet ***/ ++MODULE_PARM_DESC(debug, "3c59x debug level (0-6)"); ++MODULE_PARM_DESC(options, "3c59x: Bits 0-3: media type, bit 4: bus mastering, bit 9: full duplex"); ++MODULE_PARM_DESC(full_duplex, "3c59x full duplex setting(s) (1)"); ++MODULE_PARM_DESC(hw_checksums, "3c59x Hardware checksum checking by adapter(s) (0-1)"); ++MODULE_PARM_DESC(flow_ctrl, "3c59x 802.3x flow control usage (PAUSE only) (0-1)"); ++MODULE_PARM_DESC(enable_wol, "3c59x: Turn on Wake-on-LAN for adapter(s) (0-1)"); ++/*** RTnet *** ++ MODULE_PARM_DESC(rx_copybreak, "3c59x copy breakpoint for copy-only-tiny-frames"); ++ *** RTnet ***/ ++MODULE_PARM_DESC(max_interrupt_work, "3c59x maximum events handled per interrupt"); ++/*** RTnet *** ++ MODULE_PARM_DESC(compaq_ioaddr, "3c59x PCI I/O base address (Compaq BIOS problem workaround)"); ++ MODULE_PARM_DESC(compaq_irq, "3c59x PCI IRQ number (Compaq BIOS problem workaround)"); ++ MODULE_PARM_DESC(compaq_device_id, "3c59x PCI device ID (Compaq BIOS problem workaround)"); ++ MODULE_PARM_DESC(watchdog, "3c59x transmit timeout in milliseconds"); ++ *** RTnet ***/ ++ ++/* #define dev_alloc_skb dev_alloc_skb_debug */ ++ ++/* A list of all installed Vortex EISA devices, for removing the driver module. */ ++static struct rtnet_device *root_vortex_eisa_dev; ++ ++/* Variables to work-around the Compaq PCI BIOS32 problem. */ ++// *** RTnet *** ++//static int compaq_ioaddr, compaq_irq, compaq_device_id = 0x5900; ++// *** RTnet *** ++ ++static int vortex_cards_found; ++ ++#ifdef CONFIG_PM ++ ++#endif /* CONFIG_PM */ ++ ++/* returns count found (>= 0), or negative on error */ ++ ++/* returns count (>= 0), or negative on error */ ++static int vortex_init_one (struct pci_dev *pdev, ++ const struct pci_device_id *ent) ++{ ++ int rc; ++ ++ if( vortex_cards_found >= cards ) ++ return -ENODEV; ++ ++ /* wake up and enable device */ ++ if (pci_enable_device (pdev)) { ++ rc = -EIO; ++ } else { ++ rc = vortex_probe1 (pdev, pci_resource_start (pdev, 0), pdev->irq, ++ ent->driver_data, vortex_cards_found); ++ if (rc == 0) ++ vortex_cards_found++; ++ } ++ return rc; ++} ++ ++/* ++ * Start up the PCI device which is described by *pdev. ++ * Return 0 on success. ++ * ++ * NOTE: pdev can be NULL, for the case of an EISA driver ++ */ ++static int vortex_probe1(struct pci_dev *pdev, ++ long ioaddr, int irq, ++ int chip_idx, int card_idx) ++{ ++ // *** RTnet *** ++ struct rtnet_device *rtdev = NULL; ++ // *** RTnet *** ++ ++ struct vortex_private *vp; ++ int option; ++ unsigned int eeprom[0x40], checksum = 0; /* EEPROM contents */ ++ int i, step; ++ static int printed_version; ++ int retval, print_info; ++ struct vortex_chip_info * const vci = &vortex_info_tbl[chip_idx]; ++ const char *print_name; ++ ++ ++ ++ if (!printed_version) { ++ printk (version); ++ printed_version = 1; ++ } ++ ++ print_name = pdev ? pci_name(pdev) : "3c59x"; ++ ++ // *** RTnet *** ++ rtdev = rt_alloc_etherdev(sizeof(*vp), RX_RING_SIZE * 2 + TX_RING_SIZE); ++ retval = -ENOMEM; ++ if (!rtdev) { ++ printk (KERN_ERR PFX "unable to allocate etherdev, aborting\n"); ++ goto out; ++ } ++ rtdev_alloc_name(rtdev, "rteth%d"); ++ memset(rtdev->priv, 0, sizeof(*vp)); ++ rt_rtdev_connect(rtdev, &RTDEV_manager); ++ rtdev->vers = RTDEV_VERS_2_0; ++ // *** RTnet *** ++ ++ vp = rtdev->priv; ++ ++ /* The lower four bits are the media type. */ ++ if (rtdev->mem_start) { ++ /* ++ * The 'options' param is passed in as the third arg to the ++ * LILO 'ether=' argument for non-modular use ++ */ ++ option = rtdev->mem_start; ++ } ++ else if (card_idx < MAX_UNITS) ++ option = options[card_idx]; ++ else ++ option = -1; ++ ++ if (option > 0) { ++ if (option & 0x8000) ++ vortex_debug = 7; ++ if (option & 0x4000) ++ vortex_debug = 2; ++ if (option & 0x0400) ++ vp->enable_wol = 1; ++ } ++ ++ print_info = (vortex_debug > 1); ++ if (print_info) ++ printk (KERN_INFO "See Documentation/networking/vortex.txt\n"); ++ ++ printk(KERN_INFO "%s: 3Com %s %s at 0x%lx. Vers " DRV_VERSION "\n", ++ print_name, ++ pdev ? "PCI" : "EISA", ++ vci->name, ++ ioaddr); ++ ++ rtdev->base_addr = ioaddr; ++ rtdev->irq = irq; ++ rtdev->mtu = mtu; ++ vp->drv_flags = vci->drv_flags; ++ vp->has_nway = (vci->drv_flags & HAS_NWAY) ? 1 : 0; ++ vp->io_size = vci->io_size; ++ vp->card_idx = card_idx; ++ ++ /* module list only for EISA devices */ ++ if (pdev == NULL) { ++ vp->next_module = root_vortex_eisa_dev; ++ root_vortex_eisa_dev = rtdev; ++ } ++ ++ /* PCI-only startup logic */ ++ if (pdev) { ++ /* EISA resources already marked, so only PCI needs to do this here */ ++ /* Ignore return value, because Cardbus drivers already allocate for us */ ++ if (!request_region(ioaddr, vci->io_size, print_name)) ++ printk(KERN_INFO "rt_3c50x: request region failed\n"); ++ else ++ vp->must_free_region = 1; ++ ++ /* enable bus-mastering if necessary */ ++ if (vci->flags & PCI_USES_MASTER) ++ pci_set_master (pdev); ++ ++ if (vci->drv_flags & IS_VORTEX) { ++ u8 pci_latency; ++ u8 new_latency = 248; ++ ++ /* Check the PCI latency value. On the 3c590 series the latency timer ++ must be set to the maximum value to avoid data corruption that occurs ++ when the timer expires during a transfer. This bug exists the Vortex ++ chip only. */ ++ pci_read_config_byte(pdev, PCI_LATENCY_TIMER, &pci_latency); ++ if (pci_latency < new_latency) { ++ printk(KERN_INFO "%s: Overriding PCI latency" ++ " timer (CFLT) setting of %d, new value is %d.\n", ++ print_name, pci_latency, new_latency); ++ pci_write_config_byte(pdev, PCI_LATENCY_TIMER, new_latency); ++ } ++ } ++ } ++ ++ rtdm_lock_init(&vp->lock); ++ spin_lock_init(&vp->mdio_lock); ++ vp->pdev = pdev; ++ ++ /* Makes sure rings are at least 16 byte aligned. */ ++ vp->rx_ring = pci_alloc_consistent(pdev, sizeof(struct boom_rx_desc) * RX_RING_SIZE ++ + sizeof(struct boom_tx_desc) * TX_RING_SIZE, ++ &vp->rx_ring_dma); ++ retval = -ENOMEM; ++ if (vp->rx_ring == 0) ++ goto free_region; ++ ++ vp->tx_ring = (struct boom_tx_desc *)(vp->rx_ring + RX_RING_SIZE); ++ vp->tx_ring_dma = vp->rx_ring_dma + sizeof(struct boom_rx_desc) * RX_RING_SIZE; ++ ++ /* if we are a PCI driver, we store info in pdev->driver_data ++ * instead of a module list */ ++ if (pdev) ++ pci_set_drvdata(pdev, rtdev); ++ ++ vp->media_override = 7; ++ if (option >= 0) { ++ vp->media_override = ((option & 7) == 2) ? 0 : option & 15; ++ if (vp->media_override != 7) ++ vp->medialock = 1; ++ vp->full_duplex = (option & 0x200) ? 1 : 0; ++ vp->bus_master = (option & 16) ? 1 : 0; ++ } ++ ++ if (card_idx < MAX_UNITS) { ++ if (full_duplex[card_idx] > 0) ++ vp->full_duplex = 1; ++ if (flow_ctrl[card_idx] > 0) ++ vp->flow_ctrl = 1; ++ if (enable_wol[card_idx] > 0) ++ vp->enable_wol = 1; ++ } ++ ++ vp->force_fd = vp->full_duplex; ++ vp->options = option; ++ ++ /* Read the station address from the EEPROM. */ ++ EL3WINDOW(0); ++ { ++ int base; ++ ++ if (vci->drv_flags & EEPROM_8BIT) ++ base = 0x230; ++ else if (vci->drv_flags & EEPROM_OFFSET) ++ base = EEPROM_Read + 0x30; ++ else ++ base = EEPROM_Read; ++ ++ for (i = 0; i < 0x40; i++) { ++ int timer; ++ outw(base + i, ioaddr + Wn0EepromCmd); ++ /* Pause for at least 162 us. for the read to take place. */ ++ for (timer = 10; timer >= 0; timer--) { ++ udelay(162); ++ if ((inw(ioaddr + Wn0EepromCmd) & 0x8000) == 0) ++ break; ++ } ++ eeprom[i] = inw(ioaddr + Wn0EepromData); ++ } ++ } ++ for (i = 0; i < 0x18; i++) ++ checksum ^= eeprom[i]; ++ checksum = (checksum ^ (checksum >> 8)) & 0xff; ++ if (checksum != 0x00) { /* Grrr, needless incompatible change 3Com. */ ++ while (i < 0x21) ++ checksum ^= eeprom[i++]; ++ checksum = (checksum ^ (checksum >> 8)) & 0xff; ++ } ++ if ((checksum != 0x00) && !(vci->drv_flags & IS_TORNADO)) ++ printk(" ***INVALID CHECKSUM %4.4x*** ", checksum); ++ ++ for (i = 0; i < 3; i++) ++ ((u16 *)rtdev->dev_addr)[i] = htons(eeprom[i + 10]); ++ if (print_info) { ++ for (i = 0; i < 6; i++) ++ printk("%c%2.2x", i ? ':' : ' ', rtdev->dev_addr[i]); ++ } ++ EL3WINDOW(2); ++ for (i = 0; i < 6; i++) ++ outb(rtdev->dev_addr[i], ioaddr + i); ++ ++#ifdef __sparc__ ++ if (print_info) ++ printk(", IRQ %s\n", __irq_itoa(rtdev->irq)); ++#else ++ if (print_info) ++ printk(", IRQ %d\n", rtdev->irq); ++ /* Tell them about an invalid IRQ. */ ++ if (rtdev->irq <= 0 || rtdev->irq >= NR_IRQS) ++ printk(KERN_WARNING " *** Warning: IRQ %d is unlikely to work! ***\n", ++ rtdev->irq); ++#endif ++ ++ EL3WINDOW(4); ++ step = (inb(ioaddr + Wn4_NetDiag) & 0x1e) >> 1; ++ if (print_info) { ++ printk(KERN_INFO " product code %02x%02x rev %02x.%d date %02d-" ++ "%02d-%02d\n", eeprom[6]&0xff, eeprom[6]>>8, eeprom[0x14], ++ step, (eeprom[4]>>5) & 15, eeprom[4] & 31, eeprom[4]>>9); ++ } ++ ++ ++ if (pdev && vci->drv_flags & HAS_CB_FNS) { ++ unsigned long fn_st_addr; /* Cardbus function status space */ ++ unsigned short n; ++ ++ fn_st_addr = pci_resource_start (pdev, 2); ++ if (fn_st_addr) { ++ vp->cb_fn_base = ioremap(fn_st_addr, 128); ++ retval = -ENOMEM; ++ if (!vp->cb_fn_base) ++ goto free_ring; ++ } ++ if (print_info) { ++ printk(KERN_INFO "%s: CardBus functions mapped %8.8lx->%p\n", ++ print_name, fn_st_addr, vp->cb_fn_base); ++ } ++ EL3WINDOW(2); ++ ++ n = inw(ioaddr + Wn2_ResetOptions) & ~0x4010; ++ if (vp->drv_flags & INVERT_LED_PWR) ++ n |= 0x10; ++ if (vp->drv_flags & INVERT_MII_PWR) ++ n |= 0x4000; ++ outw(n, ioaddr + Wn2_ResetOptions); ++ } ++ ++ /* Extract our information from the EEPROM data. */ ++ vp->info1 = eeprom[13]; ++ vp->info2 = eeprom[15]; ++ vp->capabilities = eeprom[16]; ++ ++ if (vp->info1 & 0x8000) { ++ vp->full_duplex = 1; ++ if (print_info) ++ printk(KERN_INFO "Full duplex capable\n"); ++ } ++ ++ { ++ static const char * ram_split[] = {"5:3", "3:1", "1:1", "3:5"}; ++ unsigned int config; ++ EL3WINDOW(3); ++ vp->available_media = inw(ioaddr + Wn3_Options); ++ if ((vp->available_media & 0xff) == 0) /* Broken 3c916 */ ++ vp->available_media = 0x40; ++ config = inl(ioaddr + Wn3_Config); ++ if (print_info) { ++ printk(KERN_DEBUG " Internal config register is %4.4x, " ++ "transceivers %#x.\n", config, inw(ioaddr + Wn3_Options)); ++ printk(KERN_INFO " %dK %s-wide RAM %s Rx:Tx split, %s%s interface.\n", ++ 8 << RAM_SIZE(config), ++ RAM_WIDTH(config) ? "word" : "byte", ++ ram_split[RAM_SPLIT(config)], ++ AUTOSELECT(config) ? "autoselect/" : "", ++ XCVR(config) > XCVR_ExtMII ? "" : ++ media_tbl[XCVR(config)].name); ++ } ++ vp->default_media = XCVR(config); ++ if (vp->default_media == XCVR_NWAY) ++ vp->has_nway = 1; ++ vp->autoselect = AUTOSELECT(config); ++ } ++ ++ if (vp->media_override != 7) { ++ printk(KERN_INFO "%s: Media override to transceiver type %d (%s).\n", ++ print_name, vp->media_override, ++ media_tbl[vp->media_override].name); ++ rtdev->if_port = vp->media_override; ++ } else ++ rtdev->if_port = vp->default_media; ++ ++ if (rtdev->if_port == XCVR_MII || rtdev->if_port == XCVR_NWAY) { ++ int phy, phy_idx = 0; ++ EL3WINDOW(4); ++ mii_preamble_required++; ++ mii_preamble_required++; ++ mdio_read(rtdev, 24, 1); ++ for (phy = 0; phy < 32 && phy_idx < 1; phy++) { ++ int mii_status, phyx; ++ ++ /* ++ * For the 3c905CX we look at index 24 first, because it bogusly ++ * reports an external PHY at all indices ++ */ ++ if (phy == 0) ++ phyx = 24; ++ else if (phy <= 24) ++ phyx = phy - 1; ++ else ++ phyx = phy; ++ mii_status = mdio_read(rtdev, phyx, 1); ++ if (mii_status && mii_status != 0xffff) { ++ vp->phys[phy_idx++] = phyx; ++ if (print_info) { ++ printk(KERN_INFO " MII transceiver found at address %d," ++ " status %4x.\n", phyx, mii_status); ++ } ++ if ((mii_status & 0x0040) == 0) ++ mii_preamble_required++; ++ } ++ } ++ mii_preamble_required--; ++ if (phy_idx == 0) { ++ printk(KERN_WARNING" ***WARNING*** No MII transceivers found!\n"); ++ vp->phys[0] = 24; ++ } else { ++ vp->advertising = mdio_read(rtdev, vp->phys[0], 4); ++ if (vp->full_duplex) { ++ /* Only advertise the FD media types. */ ++ vp->advertising &= ~0x02A0; ++ mdio_write(rtdev, vp->phys[0], 4, vp->advertising); ++ } ++ } ++ } ++ ++ if (vp->capabilities & CapBusMaster) { ++ vp->full_bus_master_tx = 1; ++ if (print_info) { ++ printk(KERN_INFO " Enabling bus-master transmits and %s receives.\n", ++ (vp->info2 & 1) ? "early" : "whole-frame" ); ++ } ++ vp->full_bus_master_rx = (vp->info2 & 1) ? 1 : 2; ++ vp->bus_master = 0; /* AKPM: vortex only */ ++ } ++ ++ // *** RTnet *** ++ /* The 3c59x-specific entries in the device structure. */ ++ rtdev->open = vortex_open; ++ if (vp->full_bus_master_tx) { ++ rtdev->hard_start_xmit = boomerang_start_xmit; ++ /* Actually, it still should work with iommu. */ ++ rtdev->features |= NETIF_F_SG; ++ if (((hw_checksums[card_idx] == -1) && (vp->drv_flags & HAS_HWCKSM)) || ++ (hw_checksums[card_idx] == 1)) { ++ rtdev->features |= NETIF_F_IP_CSUM; ++ } ++ } else { ++ rtdev->hard_start_xmit = vortex_start_xmit; ++ } ++ rtdev->get_stats = vortex_get_stats; ++ ++ if (print_info) { ++ printk(KERN_INFO "%s: scatter/gather %sabled. h/w checksums %sabled\n", ++ print_name, ++ (rtdev->features & NETIF_F_SG) ? "en":"dis", ++ (rtdev->features & NETIF_F_IP_CSUM) ? "en":"dis"); ++ } ++ ++ rtdev->stop = vortex_close; ++ retval = rt_register_rtnetdev(rtdev); ++ if (retval) { ++ printk(KERN_ERR "rt_3c59x: rtnet device registration failed %d\n",retval); ++ goto free_ring; ++ } ++ return 0; ++ ++ // *** RTnet *** ++ ++ free_ring: ++ pci_free_consistent(pdev, ++ sizeof(struct boom_rx_desc) * RX_RING_SIZE ++ + sizeof(struct boom_tx_desc) * TX_RING_SIZE, ++ vp->rx_ring, ++ vp->rx_ring_dma); ++ free_region: ++ if (vp->must_free_region) ++ release_region(ioaddr, vci->io_size); ++ rtdev_free (rtdev); ++ printk(KERN_ERR PFX "vortex_probe1 fails. Returns %d\n", retval); ++ out: ++ return retval; ++} ++ ++static void ++issue_and_wait(struct rtnet_device *rtdev, int cmd) ++{ ++ int i; ++ ++ outw(cmd, rtdev->base_addr + EL3_CMD); ++ for (i = 0; i < 2000; i++) { ++ if (!(inw(rtdev->base_addr + EL3_STATUS) & CmdInProgress)) ++ return; ++ } ++ ++ /* OK, that didn't work. Do it the slow way. One second */ ++ for (i = 0; i < 100000; i++) { ++ if (!(inw(rtdev->base_addr + EL3_STATUS) & CmdInProgress)) { ++ if (vortex_debug > 1) ++ rtdm_printk(KERN_INFO "%s: command 0x%04x took %d usecs\n", ++ rtdev->name, cmd, i * 10); ++ return; ++ } ++ udelay(10); ++ } ++ rtdm_printk(KERN_ERR "%s: command 0x%04x did not complete! Status=0x%x\n", ++ rtdev->name, cmd, inw(rtdev->base_addr + EL3_STATUS)); ++} ++ ++static void ++vortex_up(struct rtnet_device *rtdev) ++{ ++ long ioaddr = rtdev->base_addr; ++ struct vortex_private *vp = (struct vortex_private *)rtdev->priv; ++ unsigned int config; ++ int i; ++ ++ if (vp->pdev && vp->enable_wol) { ++ pci_set_power_state(vp->pdev, 0); /* Go active */ ++ pci_restore_state(vp->pdev, vp->power_state); ++ } ++ ++ /* Before initializing select the active media port. */ ++ EL3WINDOW(3); ++ config = inl(ioaddr + Wn3_Config); ++ ++ if (vp->media_override != 7) { ++ printk(KERN_INFO "%s: Media override to transceiver %d (%s).\n", ++ rtdev->name, vp->media_override, ++ media_tbl[vp->media_override].name); ++ rtdev->if_port = vp->media_override; ++ } else if (vp->autoselect) { ++ if (vp->has_nway) { ++ if (vortex_debug > 1) ++ printk(KERN_INFO "%s: using NWAY device table, not %d\n", ++ rtdev->name, rtdev->if_port); ++ rtdev->if_port = XCVR_NWAY; ++ } else { ++ /* Find first available media type, starting with 100baseTx. */ ++ rtdev->if_port = XCVR_100baseTx; ++ while (! (vp->available_media & media_tbl[rtdev->if_port].mask)) ++ rtdev->if_port = media_tbl[rtdev->if_port].next; ++ if (vortex_debug > 1) ++ printk(KERN_INFO "%s: first available media type: %s\n", ++ rtdev->name, media_tbl[rtdev->if_port].name); ++ } ++ } else { ++ rtdev->if_port = vp->default_media; ++ if (vortex_debug > 1) ++ printk(KERN_INFO "%s: using default media %s\n", ++ rtdev->name, media_tbl[rtdev->if_port].name); ++ } ++ ++ init_timer(&vp->timer); ++ vp->timer.expires = RUN_AT(media_tbl[rtdev->if_port].wait); ++ vp->timer.data = (unsigned long)rtdev; ++ // *** RTnet vp->timer.function = vortex_timer; /* timer handler */ ++ // *** RTnet add_timer(&vp->timer); ++ ++ init_timer(&vp->rx_oom_timer); ++ vp->rx_oom_timer.data = (unsigned long)rtdev; ++ // **** RTnet *** vp->rx_oom_timer.function = rx_oom_timer; ++ ++ if (vortex_debug > 1) ++ printk(KERN_DEBUG "%s: Initial media type %s.\n", ++ rtdev->name, media_tbl[rtdev->if_port].name); ++ ++ vp->full_duplex = vp->force_fd; ++ config = BFINS(config, rtdev->if_port, 20, 4); ++ if (vortex_debug > 6) ++ printk(KERN_DEBUG "vortex_up(): writing 0x%x to InternalConfig\n", config); ++ outl(config, ioaddr + Wn3_Config); ++ ++ if (rtdev->if_port == XCVR_MII || rtdev->if_port == XCVR_NWAY) { ++ int mii_reg1, mii_reg5; ++ EL3WINDOW(4); ++ /* Read BMSR (reg1) only to clear old status. */ ++ mii_reg1 = mdio_read(rtdev, vp->phys[0], 1); ++ mii_reg5 = mdio_read(rtdev, vp->phys[0], 5); ++ if (mii_reg5 == 0xffff || mii_reg5 == 0x0000) ++ ; /* No MII device or no link partner report */ ++ else if ((mii_reg5 & 0x0100) != 0 /* 100baseTx-FD */ ++ || (mii_reg5 & 0x00C0) == 0x0040) /* 10T-FD, but not 100-HD */ ++ vp->full_duplex = 1; ++ vp->partner_flow_ctrl = ((mii_reg5 & 0x0400) != 0); ++ if (vortex_debug > 1) ++ printk(KERN_INFO "%s: MII #%d status %4.4x, link partner capability %4.4x," ++ " info1 %04x, setting %s-duplex.\n", ++ rtdev->name, vp->phys[0], ++ mii_reg1, mii_reg5, ++ vp->info1, ((vp->info1 & 0x8000) || vp->full_duplex) ? "full" : "half"); ++ EL3WINDOW(3); ++ } ++ ++ /* Set the full-duplex bit. */ ++ outw( ((vp->info1 & 0x8000) || vp->full_duplex ? 0x20 : 0) | ++ (rtdev->mtu > 1500 ? 0x40 : 0) | ++ ((vp->full_duplex && vp->flow_ctrl && vp->partner_flow_ctrl) ? 0x100 : 0), ++ ioaddr + Wn3_MAC_Ctrl); ++ ++ if (vortex_debug > 1) { ++ printk(KERN_DEBUG "%s: vortex_up() InternalConfig %8.8x.\n", ++ rtdev->name, config); ++ } ++ ++ issue_and_wait(rtdev, TxReset); ++ /* ++ * Don't reset the PHY - that upsets autonegotiation during DHCP operations. ++ */ ++ issue_and_wait(rtdev, RxReset|0x04); ++ ++ outw(SetStatusEnb | 0x00, ioaddr + EL3_CMD); ++ ++ if (vortex_debug > 1) { ++ EL3WINDOW(4); ++ printk(KERN_DEBUG "%s: vortex_up() irq %d media status %4.4x.\n", ++ rtdev->name, rtdev->irq, inw(ioaddr + Wn4_Media)); ++ } ++ ++ /* Set the station address and mask in window 2 each time opened. */ ++ EL3WINDOW(2); ++ for (i = 0; i < 6; i++) ++ outb(rtdev->dev_addr[i], ioaddr + i); ++ for (; i < 12; i+=2) ++ outw(0, ioaddr + i); ++ ++ if (vp->cb_fn_base) { ++ unsigned short n = inw(ioaddr + Wn2_ResetOptions) & ~0x4010; ++ if (vp->drv_flags & INVERT_LED_PWR) ++ n |= 0x10; ++ if (vp->drv_flags & INVERT_MII_PWR) ++ n |= 0x4000; ++ outw(n, ioaddr + Wn2_ResetOptions); ++ } ++ ++ if (rtdev->if_port == XCVR_10base2) ++ /* Start the thinnet transceiver. We should really wait 50ms...*/ ++ outw(StartCoax, ioaddr + EL3_CMD); ++ if (rtdev->if_port != XCVR_NWAY) { ++ EL3WINDOW(4); ++ outw((inw(ioaddr + Wn4_Media) & ~(Media_10TP|Media_SQE)) | ++ media_tbl[rtdev->if_port].media_bits, ioaddr + Wn4_Media); ++ } ++ ++ /* Switch to the stats window, and clear all stats by reading. */ ++ outw(StatsDisable, ioaddr + EL3_CMD); ++ EL3WINDOW(6); ++ for (i = 0; i < 10; i++) ++ inb(ioaddr + i); ++ inw(ioaddr + 10); ++ inw(ioaddr + 12); ++ /* New: On the Vortex we must also clear the BadSSD counter. */ ++ EL3WINDOW(4); ++ inb(ioaddr + 12); ++ /* ..and on the Boomerang we enable the extra statistics bits. */ ++ outw(0x0040, ioaddr + Wn4_NetDiag); ++ ++ /* Switch to register set 7 for normal use. */ ++ EL3WINDOW(7); ++ ++ if (vp->full_bus_master_rx) { /* Boomerang bus master. */ ++ vp->cur_rx = vp->dirty_rx = 0; ++ /* Initialize the RxEarly register as recommended. */ ++ outw(SetRxThreshold + (1536>>2), ioaddr + EL3_CMD); ++ outl(0x0020, ioaddr + PktStatus); ++ outl(vp->rx_ring_dma, ioaddr + UpListPtr); ++ } ++ if (vp->full_bus_master_tx) { /* Boomerang bus master Tx. */ ++ vp->cur_tx = vp->dirty_tx = 0; ++ if (vp->drv_flags & IS_BOOMERANG) ++ outb(PKT_BUF_SZ>>8, ioaddr + TxFreeThreshold); /* Room for a packet. */ ++ /* Clear the Rx, Tx rings. */ ++ for (i = 0; i < RX_RING_SIZE; i++) /* AKPM: this is done in vortex_open, too */ ++ vp->rx_ring[i].status = 0; ++ for (i = 0; i < TX_RING_SIZE; i++) ++ vp->tx_skbuff[i] = 0; ++ outl(0, ioaddr + DownListPtr); ++ } ++ /* Set receiver mode: presumably accept b-case and phys addr only. */ ++ set_rx_mode(rtdev); ++ outw(StatsEnable, ioaddr + EL3_CMD); /* Turn on statistics. */ ++ ++// issue_and_wait(dev, SetTxStart|0x07ff); ++ outw(RxEnable, ioaddr + EL3_CMD); /* Enable the receiver. */ ++ outw(TxEnable, ioaddr + EL3_CMD); /* Enable transmitter. */ ++ /* Allow status bits to be seen. */ ++ vp->status_enable = SetStatusEnb | HostError|IntReq|StatsFull|TxComplete| ++ (vp->full_bus_master_tx ? DownComplete : TxAvailable) | ++ (vp->full_bus_master_rx ? UpComplete : RxComplete) | ++ (vp->bus_master ? DMADone : 0); ++ vp->intr_enable = SetIntrEnb | IntLatch | TxAvailable | ++ (vp->full_bus_master_rx ? 0 : RxComplete) | ++ StatsFull | HostError | TxComplete | IntReq ++ | (vp->bus_master ? DMADone : 0) | UpComplete | DownComplete; ++ outw(vp->status_enable, ioaddr + EL3_CMD); ++ /* Ack all pending events, and set active indicator mask. */ ++ outw(AckIntr | IntLatch | TxAvailable | RxEarly | IntReq, ++ ioaddr + EL3_CMD); ++ outw(vp->intr_enable, ioaddr + EL3_CMD); ++ if (vp->cb_fn_base) /* The PCMCIA people are idiots. */ ++ writel(0x8000, vp->cb_fn_base + 4); ++ rtnetif_start_queue (rtdev); ++} ++ ++static int ++vortex_open(struct rtnet_device *rtdev) ++{ ++ struct vortex_private *vp = (struct vortex_private *)rtdev->priv; ++ int i; ++ int retval; ++ ++ // *** RTnet *** ++ rt_stack_connect(rtdev, &STACK_manager); ++ ++ if ((retval = rtdm_irq_request(&vp->irq_handle, rtdev->irq, ++ (vp->full_bus_master_rx ? boomerang_interrupt : vortex_interrupt), ++ 0, "rt_3c59x", rtdev))) { ++ printk(KERN_ERR "%s: Could not reserve IRQ %d\n", rtdev->name, rtdev->irq); ++ goto out; ++ } ++ // *** RTnet *** ++ ++ if (vp->full_bus_master_rx) { /* Boomerang bus master. */ ++ if (vortex_debug > 2) ++ printk(KERN_DEBUG "%s: Filling in the Rx ring.\n", rtdev->name); ++ for (i = 0; i < RX_RING_SIZE; i++) { ++ struct rtskb *skb; // *** RTnet ++ vp->rx_ring[i].next = cpu_to_le32(vp->rx_ring_dma + sizeof(struct boom_rx_desc) * (i+1)); ++ vp->rx_ring[i].status = 0; /* Clear complete bit. */ ++ vp->rx_ring[i].length = cpu_to_le32(PKT_BUF_SZ | LAST_FRAG); ++ skb = rtnetdev_alloc_rtskb(rtdev, PKT_BUF_SZ); ++ vp->rx_skbuff[i] = skb; ++ if (skb == NULL) ++ break; /* Bad news! */ ++ // *** RTnet *** ++ rtskb_reserve(skb, 2); /* Align IP on 16 byte boundaries */ ++ vp->rx_ring[i].addr = cpu_to_le32(pci_map_single(vp->pdev, ++ skb->tail, PKT_BUF_SZ, PCI_DMA_FROMDEVICE)); ++ // *** RTnet *** ++ } ++ if (i != RX_RING_SIZE) { ++ int j; ++ printk(KERN_EMERG "%s: no memory for rx ring\n", rtdev->name); ++ for (j = 0; j < i; j++) { ++ if (vp->rx_skbuff[j]) { ++ dev_kfree_rtskb(vp->rx_skbuff[j]); ++ vp->rx_skbuff[j] = 0; ++ } ++ } ++ retval = -ENOMEM; ++ goto out_free_irq; ++ } ++ /* Wrap the ring. */ ++ vp->rx_ring[i-1].next = cpu_to_le32(vp->rx_ring_dma); ++ } ++ ++ vortex_up(rtdev); ++ return 0; ++ ++ out_free_irq: ++ ++ // *** RTnet *** ++ if ( (i=rtdm_irq_free(&vp->irq_handle))<0 ) ++ return i; ++ rt_stack_disconnect(rtdev); ++ // *** RTnet *** ++ out: ++ if (vortex_debug > 1) ++ printk(KERN_ERR "%s: vortex_open() fails: returning %d\n", rtdev->name, retval); ++ return retval; ++} ++ ++/* ++ * Handle uncommon interrupt sources. This is a separate routine to minimize ++ * the cache impact. ++ */ ++static void ++vortex_error(struct rtnet_device *rtdev, int status, nanosecs_abs_t *time_stamp) ++{ ++ struct vortex_private *vp = (struct vortex_private *)rtdev->priv; ++ long ioaddr = rtdev->base_addr; ++ int do_tx_reset = 0, reset_mask = 0; ++ unsigned char tx_status = 0; ++ int packets=0; ++ ++ if (vortex_debug > 2) { ++ rtdm_printk(KERN_ERR "%s: vortex_error(), status=0x%x\n", rtdev->name, status); ++ } ++ ++ if (status & TxComplete) { /* Really "TxError" for us. */ ++ tx_status = inb(ioaddr + TxStatus); ++ /* Presumably a tx-timeout. We must merely re-enable. */ ++ if (vortex_debug > 2 ++ || (tx_status != 0x88 && vortex_debug > 0)) { ++ rtdm_printk(KERN_ERR "%s: Transmit error, Tx status register %2.2x.\n", ++ rtdev->name, tx_status); ++ if (tx_status == 0x82) { ++ rtdm_printk(KERN_ERR "Probably a duplex mismatch. See " ++ "Documentation/networking/vortex.txt\n"); ++ } ++ dump_tx_ring(rtdev); ++ } ++ if (tx_status & 0x14) vp->stats.tx_fifo_errors++; ++ if (tx_status & 0x38) vp->stats.tx_aborted_errors++; ++ outb(0, ioaddr + TxStatus); ++ if (tx_status & 0x30) { /* txJabber or txUnderrun */ ++ do_tx_reset = 1; ++ } else if ((tx_status & 0x08) && (vp->drv_flags & MAX_COLLISION_RESET)) { /* maxCollisions */ ++ do_tx_reset = 1; ++ reset_mask = 0x0108; /* Reset interface logic, but not download logic */ ++ } else { /* Merely re-enable the transmitter. */ ++ outw(TxEnable, ioaddr + EL3_CMD); ++ } ++ } ++ ++ if (status & RxEarly) { /* Rx early is unused. */ ++ vortex_rx(rtdev, &packets, time_stamp); ++ outw(AckIntr | RxEarly, ioaddr + EL3_CMD); ++ } ++ if (status & StatsFull) { /* Empty statistics. */ ++ static int DoneDidThat; ++ if (vortex_debug > 4) ++ rtdm_printk(KERN_DEBUG "%s: Updating stats.\n", rtdev->name); ++ // *** RTnet *** update_stats(ioaddr, dev); ++ /* HACK: Disable statistics as an interrupt source. */ ++ /* This occurs when we have the wrong media type! */ ++ if (DoneDidThat == 0 && ++ inw(ioaddr + EL3_STATUS) & StatsFull) { ++ rtdm_printk(KERN_WARNING "%s: Updating statistics failed, disabling " ++ "stats as an interrupt source.\n", rtdev->name); ++ EL3WINDOW(5); ++ outw(SetIntrEnb | (inw(ioaddr + 10) & ~StatsFull), ioaddr + EL3_CMD); ++ vp->intr_enable &= ~StatsFull; ++ EL3WINDOW(7); ++ DoneDidThat++; ++ } ++ } ++ if (status & IntReq) { /* Restore all interrupt sources. */ ++ outw(vp->status_enable, ioaddr + EL3_CMD); ++ outw(vp->intr_enable, ioaddr + EL3_CMD); ++ } ++ if (status & HostError) { ++ u16 fifo_diag; ++ EL3WINDOW(4); ++ fifo_diag = inw(ioaddr + Wn4_FIFODiag); ++ rtdm_printk(KERN_ERR "%s: Host error, FIFO diagnostic register %4.4x.\n", ++ rtdev->name, fifo_diag); ++ /* Adapter failure requires Tx/Rx reset and reinit. */ ++ if (vp->full_bus_master_tx) { ++ int bus_status = inl(ioaddr + PktStatus); ++ /* 0x80000000 PCI master abort. */ ++ /* 0x40000000 PCI target abort. */ ++ if (vortex_debug) ++ rtdm_printk(KERN_ERR "%s: PCI bus error, bus status %8.8x\n", rtdev->name, bus_status); ++ ++ /* In this case, blow the card away */ ++ vortex_down(rtdev); ++ issue_and_wait(rtdev, TotalReset | 0xff); ++ vortex_up(rtdev); /* AKPM: bug. vortex_up() assumes that the rx ring is full. It may not be. */ ++ } else if (fifo_diag & 0x0400) ++ do_tx_reset = 1; ++ if (fifo_diag & 0x3000) { ++ /* Reset Rx fifo and upload logic */ ++ issue_and_wait(rtdev, RxReset|0x07); ++ /* Set the Rx filter to the current state. */ ++ set_rx_mode(rtdev); ++ outw(RxEnable, ioaddr + EL3_CMD); /* Re-enable the receiver. */ ++ outw(AckIntr | HostError, ioaddr + EL3_CMD); ++ } ++ } ++ ++ if (do_tx_reset) { ++ issue_and_wait(rtdev, TxReset|reset_mask); ++ outw(TxEnable, ioaddr + EL3_CMD); ++ if (!vp->full_bus_master_tx) ++ rtnetif_wake_queue(rtdev); ++ } ++} ++ ++static int ++vortex_start_xmit(struct rtskb *skb, struct rtnet_device *rtdev) ++{ ++ struct vortex_private *vp = (struct vortex_private *)rtdev->priv; ++ long ioaddr = rtdev->base_addr; ++ rtdm_lockctx_t context; ++ ++ /* Put out the doubleword header... */ ++ outl(skb->len, ioaddr + TX_FIFO); ++ if (vp->bus_master) { ++ /* Set the bus-master controller to transfer the packet. */ ++ int len = (skb->len + 3) & ~3; ++ outl( vp->tx_skb_dma = pci_map_single(vp->pdev, skb->data, ++ len, PCI_DMA_TODEVICE), ++ ioaddr + Wn7_MasterAddr); ++ outw(len, ioaddr + Wn7_MasterLen); ++ vp->tx_skb = skb; ++ ++ rtdm_lock_irqsave(context); ++ if (unlikely(skb->xmit_stamp != NULL)) ++ *skb->xmit_stamp = cpu_to_be64(rtdm_clock_read() + ++ *skb->xmit_stamp); ++ outw(StartDMADown, ioaddr + EL3_CMD); ++ rtdm_lock_irqrestore(context); ++ ++ /* rtnetif_wake_queue() will be called at the DMADone interrupt. */ ++ } else { ++ rtdm_printk("rt_3x59x: UNSUPPORTED CODE PATH (device is lacking DMA support)!\n"); ++ /* ... and the packet rounded to a doubleword. */ ++ outsl(ioaddr + TX_FIFO, skb->data, (skb->len + 3) >> 2); ++ dev_kfree_rtskb (skb); ++ if (inw(ioaddr + TxFree) > 1536) { ++ rtnetif_start_queue (rtdev); /* AKPM: redundant? */ ++ } else { ++ /* Interrupt us when the FIFO has room for max-sized packet. */ ++ rtnetif_stop_queue(rtdev); ++ outw(SetTxThreshold + (1536>>2), ioaddr + EL3_CMD); ++ } ++ } ++ ++ //rtdev->trans_start = jiffies; ++ ++ /* Clear the Tx status stack. */ ++ { ++ int tx_status; ++ int i = 32; ++ ++ while (--i > 0 && (tx_status = inb(ioaddr + TxStatus)) > 0) { ++ if (tx_status & 0x3C) { /* A Tx-disabling error occurred. */ ++ if (vortex_debug > 2) ++ printk(KERN_DEBUG "%s: Tx error, status %2.2x.\n", ++ rtdev->name, tx_status); ++ if (tx_status & 0x04) vp->stats.tx_fifo_errors++; ++ if (tx_status & 0x38) vp->stats.tx_aborted_errors++; ++ if (tx_status & 0x30) { ++ issue_and_wait(rtdev, TxReset); ++ } ++ outw(TxEnable, ioaddr + EL3_CMD); ++ } ++ outb(0x00, ioaddr + TxStatus); /* Pop the status stack. */ ++ } ++ } ++ return 0; ++} ++ ++static int ++boomerang_start_xmit(struct rtskb *skb, struct rtnet_device *rtdev) ++{ ++ struct vortex_private *vp = (struct vortex_private *)rtdev->priv; ++ long ioaddr = rtdev->base_addr; ++ /* Calculate the next Tx descriptor entry. */ ++ int entry = vp->cur_tx % TX_RING_SIZE; ++ struct boom_tx_desc *prev_entry = &vp->tx_ring[(vp->cur_tx-1) % TX_RING_SIZE]; ++ rtdm_lockctx_t context; ++ ++ if (vortex_debug > 6) { ++ rtdm_printk(KERN_DEBUG "boomerang_start_xmit()\n"); ++ if (vortex_debug > 3) ++ rtdm_printk(KERN_DEBUG "%s: Trying to send a packet, Tx index %d.\n", ++ rtdev->name, vp->cur_tx); ++ } ++ ++ if (vp->cur_tx - vp->dirty_tx >= TX_RING_SIZE) { ++ if (vortex_debug > 0) ++ rtdm_printk(KERN_WARNING "%s: BUG! Tx Ring full, refusing to send buffer.\n", ++ rtdev->name); ++ rtnetif_stop_queue(rtdev); ++ return 1; ++ } ++ ++ vp->tx_skbuff[entry] = skb; ++ ++ vp->tx_ring[entry].next = 0; ++#if DO_ZEROCOPY ++ if (skb->ip_summed != CHECKSUM_HW) ++ vp->tx_ring[entry].status = cpu_to_le32(skb->len | TxIntrUploaded); ++ else ++ vp->tx_ring[entry].status = cpu_to_le32(skb->len | TxIntrUploaded | AddTCPChksum); ++ ++ if (!skb_shinfo(skb)->nr_frags) { ++ { ++// int j; ++// for (j=0; jlen; j++) ++// { ++// rtdm_printk("%02x ", skb->data[j]); ++// } ++ ++ } ++ vp->tx_ring[entry].frag[0].addr = cpu_to_le32(pci_map_single(vp->pdev, ++ skb->data, skb->len, PCI_DMA_TODEVICE)); ++ vp->tx_ring[entry].frag[0].length = cpu_to_le32(skb->len | LAST_FRAG); ++ } else { ++ int i; ++ ++ vp->tx_ring[entry].frag[0].addr = cpu_to_le32(pci_map_single(vp->pdev, ++ skb->data, skb->len, PCI_DMA_TODEVICE)); ++ vp->tx_ring[entry].frag[0].length = cpu_to_le32(skb->len); ++ ++ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { ++ skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; ++ ++ vp->tx_ring[entry].frag[i+1].addr = ++ cpu_to_le32(pci_map_single(vp->pdev, // *** RTnet: page mapping correct? Or is this code never used? ++ (void*)page_address(frag->page) + frag->page_offset, ++ frag->size, PCI_DMA_TODEVICE)); ++ ++ if (i == skb_shinfo(skb)->nr_frags-1) ++ vp->tx_ring[entry].frag[i+1].length = cpu_to_le32(frag->size|LAST_FRAG); ++ else ++ vp->tx_ring[entry].frag[i+1].length = cpu_to_le32(frag->size); ++ } ++ } ++#else ++ vp->tx_ring[entry].addr = cpu_to_le32(pci_map_single(vp->pdev, ++ skb->data, skb->len, PCI_DMA_TODEVICE)); ++ vp->tx_ring[entry].length = cpu_to_le32(skb->len | LAST_FRAG); ++ vp->tx_ring[entry].status = cpu_to_le32(skb->len | TxIntrUploaded); ++#endif ++ ++ // *** RTnet *** ++ rtdm_irq_disable(&vp->irq_handle); ++ rtdm_lock_get(&vp->lock); ++ // *** RTnet *** ++ ++ /* Wait for the stall to complete. */ ++ issue_and_wait(rtdev, DownStall); ++ ++ rtdm_lock_irqsave(context); ++ if (unlikely(skb->xmit_stamp != NULL)) ++ *skb->xmit_stamp = cpu_to_be64(rtdm_clock_read() + *skb->xmit_stamp); ++ ++ prev_entry->next = cpu_to_le32(vp->tx_ring_dma + entry * sizeof(struct boom_tx_desc)); ++ if (inl(ioaddr + DownListPtr) == 0) { ++ outl(vp->tx_ring_dma + entry * sizeof(struct boom_tx_desc), ioaddr + DownListPtr); ++ vp->queued_packet++; ++ } ++ ++ vp->cur_tx++; ++ if (vp->cur_tx - vp->dirty_tx > TX_RING_SIZE - 1) { ++ rtnetif_stop_queue (rtdev); ++ } else { /* Clear previous interrupt enable. */ ++#if defined(tx_interrupt_mitigation) ++ /* Dubious. If in boomeang_interrupt "faster" cyclone ifdef ++ * were selected, this would corrupt DN_COMPLETE. No? ++ */ ++ prev_entry->status &= cpu_to_le32(~TxIntrUploaded); ++#endif ++ } ++ outw(DownUnstall, ioaddr + EL3_CMD); ++ rtdm_lock_put_irqrestore(&vp->lock, context); ++ rtdm_irq_enable(&vp->irq_handle); ++ //rtdev->trans_start = jiffies; ++ return 0; ++} ++ ++/* The interrupt handler does all of the Rx thread work and cleans up ++ after the Tx thread. */ ++ ++/* ++ * This is the ISR for the vortex series chips. ++ * full_bus_master_tx == 0 && full_bus_master_rx == 0 ++ */ ++ ++static int vortex_interrupt(rtdm_irq_t *irq_handle) ++{ ++ // *** RTnet *** ++ nanosecs_abs_t time_stamp = rtdm_clock_read(); ++ struct rtnet_device *rtdev = rtdm_irq_get_arg(irq_handle, struct rtnet_device); ++ int packets = 0; ++ // *** RTnet *** ++ ++ struct vortex_private *vp = (struct vortex_private *)rtdev->priv; ++ long ioaddr; ++ int status; ++ int work_done = max_interrupt_work; ++ ++ ioaddr = rtdev->base_addr; ++ rtdm_lock_get(&vp->lock); ++ ++ status = inw(ioaddr + EL3_STATUS); ++ ++ if (vortex_debug > 6) ++ printk("vortex_interrupt(). status=0x%4x\n", status); ++ ++ if ((status & IntLatch) == 0) ++ goto handler_exit; /* No interrupt: shared IRQs cause this */ ++ ++ if (status & IntReq) { ++ status |= vp->deferred; ++ vp->deferred = 0; ++ } ++ ++ if (status == 0xffff) /* h/w no longer present (hotplug)? */ ++ goto handler_exit; ++ ++ if (vortex_debug > 4) ++ rtdm_printk(KERN_DEBUG "%s: interrupt, status %4.4x, latency %d ticks.\n", ++ rtdev->name, status, inb(ioaddr + Timer)); ++ ++ do { ++ if (vortex_debug > 5) ++ rtdm_printk(KERN_DEBUG "%s: In interrupt loop, status %4.4x.\n", ++ rtdev->name, status); ++ if (status & RxComplete) ++ vortex_rx(rtdev, &packets, &time_stamp); ++ ++ if (status & TxAvailable) { ++ if (vortex_debug > 5) ++ rtdm_printk(KERN_DEBUG " TX room bit was handled.\n"); ++ /* There's room in the FIFO for a full-sized packet. */ ++ outw(AckIntr | TxAvailable, ioaddr + EL3_CMD); ++ rtnetif_wake_queue (rtdev); ++ } ++ ++ if (status & DMADone) { ++ if (inw(ioaddr + Wn7_MasterStatus) & 0x1000) { ++ outw(0x1000, ioaddr + Wn7_MasterStatus); /* Ack the event. */ ++ pci_unmap_single(vp->pdev, vp->tx_skb_dma, (vp->tx_skb->len + 3) & ~3, PCI_DMA_TODEVICE); ++ dev_kfree_rtskb(vp->tx_skb); /* Release the transferred buffer */ ++ if (inw(ioaddr + TxFree) > 1536) { ++ /* ++ * AKPM: FIXME: I don't think we need this. If the queue was stopped due to ++ * insufficient FIFO room, the TxAvailable test will succeed and call ++ * rtnetif_wake_queue() ++ */ ++ rtnetif_wake_queue(rtdev); ++ } else { /* Interrupt when FIFO has room for max-sized packet. */ ++ outw(SetTxThreshold + (1536>>2), ioaddr + EL3_CMD); ++ rtnetif_stop_queue(rtdev); ++ } ++ } ++ } ++ /* Check for all uncommon interrupts at once. */ ++ if (status & (HostError | RxEarly | StatsFull | TxComplete | IntReq)) { ++ if (status == 0xffff) ++ break; ++ vortex_error(rtdev, status, &time_stamp); ++ } ++ ++ if (--work_done < 0) { ++ rtdm_printk(KERN_WARNING "%s: Too much work in interrupt, status " ++ "%4.4x.\n", rtdev->name, status); ++ /* Disable all pending interrupts. */ ++ do { ++ vp->deferred |= status; ++ outw(SetStatusEnb | (~vp->deferred & vp->status_enable), ++ ioaddr + EL3_CMD); ++ outw(AckIntr | (vp->deferred & 0x7ff), ioaddr + EL3_CMD); ++ } while ((status = inw(ioaddr + EL3_CMD)) & IntLatch); ++ /* The timer will reenable interrupts. */ ++ mod_timer(&vp->timer, jiffies + 1*HZ); ++ break; ++ } ++ /* Acknowledge the IRQ. */ ++ outw(AckIntr | IntReq | IntLatch, ioaddr + EL3_CMD); ++ } while ((status = inw(ioaddr + EL3_STATUS)) & (IntLatch | RxComplete)); ++ ++ if (vortex_debug > 4) ++ rtdm_printk(KERN_DEBUG "%s: exiting interrupt, status %4.4x.\n", ++ rtdev->name, status); ++ handler_exit: ++ rtdm_lock_put(&vp->lock); ++ if (packets > 0) ++ rt_mark_stack_mgr(rtdev); ++ ++ return RTDM_IRQ_HANDLED; ++} ++ ++/* ++ * This is the ISR for the boomerang series chips. ++ * full_bus_master_tx == 1 && full_bus_master_rx == 1 ++ */ ++ ++static int boomerang_interrupt(rtdm_irq_t *irq_handle) ++{ ++ // *** RTnet *** ++ nanosecs_abs_t time_stamp = rtdm_clock_read(); ++ struct rtnet_device *rtdev = rtdm_irq_get_arg(irq_handle, struct rtnet_device); ++ int packets = 0; ++ // *** RTnet *** ++ ++ struct vortex_private *vp = (struct vortex_private *)rtdev->priv; ++ long ioaddr; ++ int status; ++ int work_done = max_interrupt_work; ++ ++ ioaddr = rtdev->base_addr; ++ ++ /* ++ * It seems dopey to put the spinlock this early, but we could race against vortex_tx_timeout ++ * and boomerang_start_xmit ++ */ ++ rtdm_lock_get(&vp->lock); ++ ++ status = inw(ioaddr + EL3_STATUS); ++ ++ if (vortex_debug > 6) ++ rtdm_printk(KERN_DEBUG "boomerang_interrupt. status=0x%4x\n", status); ++ ++ if ((status & IntLatch) == 0) ++ goto handler_exit; /* No interrupt: shared IRQs can cause this */ ++ ++ if (status == 0xffff) { /* h/w no longer present (hotplug)? */ ++ if (vortex_debug > 1) ++ rtdm_printk(KERN_DEBUG "boomerang_interrupt(1): status = 0xffff\n"); ++ goto handler_exit; ++ } ++ ++ if (status & IntReq) { ++ status |= vp->deferred; ++ vp->deferred = 0; ++ } ++ ++ if (vortex_debug > 4) ++ rtdm_printk(KERN_DEBUG "%s: interrupt, status %4.4x, latency %d ticks.\n", ++ rtdev->name, status, inb(ioaddr + Timer)); ++ do { ++ if (vortex_debug > 5) ++ rtdm_printk(KERN_DEBUG "%s: In interrupt loop, status %4.4x.\n", ++ rtdev->name, status); ++ if (status & UpComplete) { ++ outw(AckIntr | UpComplete, ioaddr + EL3_CMD); ++ if (vortex_debug > 5) ++ rtdm_printk(KERN_DEBUG "boomerang_interrupt->boomerang_rx\n"); ++ boomerang_rx(rtdev, &packets, &time_stamp); ++ } ++ ++ if (status & DownComplete) { ++ unsigned int dirty_tx = vp->dirty_tx; ++ ++ outw(AckIntr | DownComplete, ioaddr + EL3_CMD); ++ while (vp->cur_tx - dirty_tx > 0) { ++ int entry = dirty_tx % TX_RING_SIZE; ++ if (inl(ioaddr + DownListPtr) == ++ vp->tx_ring_dma + entry * sizeof(struct boom_tx_desc)) ++ break; /* It still hasn't been processed. */ ++ ++ if (vp->tx_skbuff[entry]) { ++ struct rtskb *skb = vp->tx_skbuff[entry]; ++#if DO_ZEROCOPY ++ int i; ++ for (i=0; i<=skb_shinfo(skb)->nr_frags; i++) ++ pci_unmap_single(vp->pdev, ++ le32_to_cpu(vp->tx_ring[entry].frag[i].addr), ++ le32_to_cpu(vp->tx_ring[entry].frag[i].length)&0xFFF, ++ PCI_DMA_TODEVICE); ++#else ++ pci_unmap_single(vp->pdev, ++ le32_to_cpu(vp->tx_ring[entry].addr), skb->len, PCI_DMA_TODEVICE); ++#endif ++ dev_kfree_rtskb(skb); ++ vp->tx_skbuff[entry] = 0; ++ } else { ++ rtdm_printk(KERN_DEBUG "boomerang_interrupt: no skb!\n"); ++ } ++ /* vp->stats.tx_packets++; Counted below. */ ++ dirty_tx++; ++ } ++ vp->dirty_tx = dirty_tx; ++ if (vp->cur_tx - dirty_tx <= TX_RING_SIZE - 1) { ++ if (vortex_debug > 6) ++ rtdm_printk(KERN_DEBUG "boomerang_interrupt: wake queue\n"); ++ rtnetif_wake_queue (rtdev); ++ } ++ } ++ ++ /* Check for all uncommon interrupts at once. */ ++ if (status & (HostError | RxEarly | StatsFull | TxComplete | IntReq)) ++ vortex_error(rtdev, status, &time_stamp); ++ ++ if (--work_done < 0) { ++ rtdm_printk(KERN_WARNING "%s: Too much work in interrupt, status " ++ "%4.4x.\n", rtdev->name, status); ++ /* Disable all pending interrupts. */ ++ do { ++ vp->deferred |= status; ++ outw(SetStatusEnb | (~vp->deferred & vp->status_enable), ++ ioaddr + EL3_CMD); ++ outw(AckIntr | (vp->deferred & 0x7ff), ioaddr + EL3_CMD); ++ } while ((status = inw(ioaddr + EL3_CMD)) & IntLatch); ++ /* The timer will reenable interrupts. */ ++ mod_timer(&vp->timer, jiffies + 1*HZ); ++ break; ++ } ++ /* Acknowledge the IRQ. */ ++ outw(AckIntr | IntReq | IntLatch, ioaddr + EL3_CMD); ++ if (vp->cb_fn_base) /* The PCMCIA people are idiots. */ ++ writel(0x8000, vp->cb_fn_base + 4); ++ ++ } while ((status = inw(ioaddr + EL3_STATUS)) & IntLatch); ++ ++ if (vortex_debug > 4) ++ rtdm_printk(KERN_DEBUG "%s: exiting interrupt, status %4.4x.\n", ++ rtdev->name, status); ++ handler_exit: ++ rtdm_lock_put(&vp->lock); ++ if (packets > 0) ++ rt_mark_stack_mgr(rtdev); ++ ++ return RTDM_IRQ_HANDLED; ++} ++ ++static int vortex_rx(struct rtnet_device *rtdev, int *packets, nanosecs_abs_t *time_stamp) ++{ ++ struct vortex_private *vp = (struct vortex_private *)rtdev->priv; ++ long ioaddr = rtdev->base_addr; ++ int i; ++ short rx_status; ++ ++ if (vortex_debug > 5) ++ printk(KERN_DEBUG "vortex_rx(): status %4.4x, rx_status %4.4x.\n", ++ inw(ioaddr+EL3_STATUS), inw(ioaddr+RxStatus)); ++ while ((rx_status = inw(ioaddr + RxStatus)) > 0) { ++ if (rx_status & 0x4000) { /* Error, update stats. */ ++ unsigned char rx_error = inb(ioaddr + RxErrors); ++ if (vortex_debug > 2) ++ printk(KERN_DEBUG " Rx error: status %2.2x.\n", rx_error); ++ vp->stats.rx_errors++; ++ if (rx_error & 0x01) vp->stats.rx_over_errors++; ++ if (rx_error & 0x02) vp->stats.rx_length_errors++; ++ if (rx_error & 0x04) vp->stats.rx_frame_errors++; ++ if (rx_error & 0x08) vp->stats.rx_crc_errors++; ++ if (rx_error & 0x10) vp->stats.rx_length_errors++; ++ } else { ++ /* The packet length: up to 4.5K!. */ ++ int pkt_len = rx_status & 0x1fff; ++ struct rtskb *skb; ++ ++ skb = rtnetdev_alloc_rtskb(rtdev, pkt_len + 5); ++ if (vortex_debug > 4) ++ printk(KERN_DEBUG "Receiving packet size %d status %4.4x.\n", ++ pkt_len, rx_status); ++ if (skb != NULL) { ++ rtskb_reserve(skb, 2); /* Align IP on 16 byte boundaries */ ++ /* 'skb_put()' points to the start of sk_buff data area. */ ++ if (vp->bus_master && ++ ! (inw(ioaddr + Wn7_MasterStatus) & 0x8000)) { ++ dma_addr_t dma = pci_map_single(vp->pdev, ++ rtskb_put(skb, pkt_len), ++ pkt_len, PCI_DMA_FROMDEVICE); ++ outl(dma, ioaddr + Wn7_MasterAddr); ++ outw((skb->len + 3) & ~3, ioaddr + Wn7_MasterLen); ++ outw(StartDMAUp, ioaddr + EL3_CMD); ++ while (inw(ioaddr + Wn7_MasterStatus) & 0x8000) ++ ; ++ pci_unmap_single(vp->pdev, dma, pkt_len, PCI_DMA_FROMDEVICE); ++ } else { ++ insl(ioaddr + RX_FIFO, rtskb_put(skb, pkt_len), ++ (pkt_len + 3) >> 2); ++ } ++ outw(RxDiscard, ioaddr + EL3_CMD); /* Pop top Rx packet. */ ++ skb->protocol = rt_eth_type_trans(skb, rtdev); ++ skb->time_stamp = *time_stamp; ++ rtnetif_rx(skb); ++ //rtdev->last_rx = jiffies; ++ vp->stats.rx_packets++; ++ (*packets)++; ++ ++ /* Wait a limited time to go to next packet. */ ++ for (i = 200; i >= 0; i--) ++ if ( ! (inw(ioaddr + EL3_STATUS) & CmdInProgress)) ++ break; ++ continue; ++ } else if (vortex_debug > 0) ++ printk(KERN_NOTICE "%s: No memory to allocate a sk_buff of " ++ "size %d.\n", rtdev->name, pkt_len); ++ } ++ vp->stats.rx_dropped++; ++ issue_and_wait(rtdev, RxDiscard); ++ } ++ ++ return 0; ++} ++ ++static int ++boomerang_rx(struct rtnet_device *rtdev, int *packets, nanosecs_abs_t *time_stamp) ++{ ++ struct vortex_private *vp = (struct vortex_private *)rtdev->priv; ++ int entry = vp->cur_rx % RX_RING_SIZE; ++ long ioaddr = rtdev->base_addr; ++ int rx_status; ++ int rx_work_limit = vp->dirty_rx + RX_RING_SIZE - vp->cur_rx; ++ ++ ++ if (vortex_debug > 5) ++ rtdm_printk(KERN_DEBUG "boomerang_rx(): status %4.4x\n", inw(ioaddr+EL3_STATUS)); ++ ++ while ((rx_status = le32_to_cpu(vp->rx_ring[entry].status)) & RxDComplete){ ++ if (--rx_work_limit < 0) ++ break; ++ if (rx_status & RxDError) { /* Error, update stats. */ ++ unsigned char rx_error = rx_status >> 16; ++ if (vortex_debug > 2) ++ rtdm_printk(KERN_DEBUG " Rx error: status %2.2x.\n", rx_error); ++ vp->stats.rx_errors++; ++ if (rx_error & 0x01) vp->stats.rx_over_errors++; ++ if (rx_error & 0x02) vp->stats.rx_length_errors++; ++ if (rx_error & 0x04) vp->stats.rx_frame_errors++; ++ if (rx_error & 0x08) vp->stats.rx_crc_errors++; ++ if (rx_error & 0x10) vp->stats.rx_length_errors++; ++ } else { ++ /* The packet length: up to 4.5K!. */ ++ int pkt_len = rx_status & 0x1fff; ++ struct rtskb *skb; ++ dma_addr_t dma = le32_to_cpu(vp->rx_ring[entry].addr); ++ ++ if (vortex_debug > 4) ++ rtdm_printk(KERN_DEBUG "Receiving packet size %d status %4.4x.\n", ++ pkt_len, rx_status); ++ ++ /* Check if the packet is long enough to just accept without ++ copying to a properly sized skbuff. */ ++ { ++/*** RTnet ***/ ++ /* Pass up the skbuff already on the Rx ring. */ ++ skb = vp->rx_skbuff[entry]; ++ vp->rx_skbuff[entry] = NULL; ++ rtskb_put(skb, pkt_len); ++ pci_unmap_single(vp->pdev, dma, PKT_BUF_SZ, PCI_DMA_FROMDEVICE); ++ vp->rx_nocopy++; ++ } ++ skb->protocol = rt_eth_type_trans(skb, rtdev); ++ skb->time_stamp = *time_stamp; ++ { /* Use hardware checksum info. */ ++ int csum_bits = rx_status & 0xee000000; ++ if (csum_bits && ++ (csum_bits == (IPChksumValid | TCPChksumValid) || ++ csum_bits == (IPChksumValid | UDPChksumValid))) { ++ skb->ip_summed = CHECKSUM_UNNECESSARY; ++ vp->rx_csumhits++; ++ } ++ } ++ rtnetif_rx(skb); ++ //rtdev->last_rx = jiffies; ++ vp->stats.rx_packets++; ++ (*packets)++; ++ } ++ entry = (++vp->cur_rx) % RX_RING_SIZE; ++ } ++ /* Refill the Rx ring buffers. */ ++ for (; vp->cur_rx - vp->dirty_rx > 0; vp->dirty_rx++) { ++ struct rtskb *skb; ++ entry = vp->dirty_rx % RX_RING_SIZE; ++ if (vp->rx_skbuff[entry] == NULL) { ++ skb = rtnetdev_alloc_rtskb(rtdev, PKT_BUF_SZ); ++ if (skb == NULL) { ++ static unsigned long last_jif; ++ if ((jiffies - last_jif) > 10 * HZ) { ++ rtdm_printk(KERN_WARNING "%s: memory shortage\n", rtdev->name); ++ last_jif = jiffies; ++ } ++ if ((vp->cur_rx - vp->dirty_rx) == RX_RING_SIZE) ++ { ++ // *** RTnet *** mod_timer(&vp->rx_oom_timer, RUN_AT(HZ * 1)); ++ ; ++ } ++ break; /* Bad news! */ ++ } ++ rtskb_reserve(skb, 2); /* Align IP on 16 byte boundaries */ ++ vp->rx_ring[entry].addr = cpu_to_le32(pci_map_single(vp->pdev, ++ skb->tail, PKT_BUF_SZ, PCI_DMA_FROMDEVICE)); ++ vp->rx_skbuff[entry] = skb; ++ } ++ vp->rx_ring[entry].status = 0; /* Clear complete bit. */ ++ outw(UpUnstall, ioaddr + EL3_CMD); ++ } ++ return 0; ++} ++ ++/* ++ * If we've hit a total OOM refilling the Rx ring we poll once a second ++ * for some memory. Otherwise there is no way to restart the rx process. ++ */ ++static void ++vortex_down(struct rtnet_device *rtdev) ++{ ++ struct vortex_private *vp = (struct vortex_private *)rtdev->priv; ++ long ioaddr = rtdev->base_addr; ++ ++ rtnetif_stop_queue (rtdev); ++ ++ del_timer_sync(&vp->rx_oom_timer); ++ del_timer_sync(&vp->timer); ++ ++ /* Turn off statistics ASAP. We update vp->stats below. */ ++ outw(StatsDisable, ioaddr + EL3_CMD); ++ ++ /* Disable the receiver and transmitter. */ ++ outw(RxDisable, ioaddr + EL3_CMD); ++ outw(TxDisable, ioaddr + EL3_CMD); ++ ++ if (rtdev->if_port == XCVR_10base2) ++ /* Turn off thinnet power. Green! */ ++ outw(StopCoax, ioaddr + EL3_CMD); ++ ++ outw(SetIntrEnb | 0x0000, ioaddr + EL3_CMD); ++ ++ // *** RTnet *** update_stats(ioaddr, dev); ++ if (vp->full_bus_master_rx) ++ outl(0, ioaddr + UpListPtr); ++ if (vp->full_bus_master_tx) ++ outl(0, ioaddr + DownListPtr); ++ ++ if (vp->pdev && vp->enable_wol) { ++ pci_save_state(vp->pdev, vp->power_state); ++ acpi_set_WOL(rtdev); ++ } ++} ++ ++static int ++vortex_close(struct rtnet_device *rtdev) ++{ ++ struct vortex_private *vp = (struct vortex_private *)rtdev->priv; ++ long ioaddr = rtdev->base_addr; ++ int i; ++ ++ // rtnet_device is always present after vortex_open was called. ++ //if (netif_device_present(dev)) ++ // vortex_down(dev); ++ vortex_down(rtdev); ++ ++ if (vortex_debug > 1) { ++ printk(KERN_DEBUG"%s: vortex_close() status %4.4x, Tx status %2.2x.\n", ++ rtdev->name, inw(ioaddr + EL3_STATUS), inb(ioaddr + TxStatus)); ++ printk(KERN_DEBUG "%s: vortex close stats: rx_nocopy %d rx_copy %d" ++ " tx_queued %d Rx pre-checksummed %d.\n", ++ rtdev->name, vp->rx_nocopy, vp->rx_copy, vp->queued_packet, vp->rx_csumhits); ++ } ++ ++#if DO_ZEROCOPY ++ if ( vp->rx_csumhits && ++ ((vp->drv_flags & HAS_HWCKSM) == 0) && ++ (hw_checksums[vp->card_idx] == -1)) { ++ printk(KERN_WARNING "%s supports hardware checksums, and we're not using them!\n", rtdev->name); ++ printk(KERN_WARNING "Please see http://www.uow.edu.au/~andrewm/zerocopy.html\n"); ++ } ++#endif ++ ++ // *** RTnet *** ++ if ( (i=rtdm_irq_free(&vp->irq_handle))<0 ) ++ return i; ++ ++ rt_stack_disconnect(rtdev); ++ ++ // *** RTnet *** ++ ++ if (vp->full_bus_master_rx) { /* Free Boomerang bus master Rx buffers. */ ++ for (i = 0; i < RX_RING_SIZE; i++) ++ if (vp->rx_skbuff[i]) { ++ pci_unmap_single( vp->pdev, le32_to_cpu(vp->rx_ring[i].addr), ++ PKT_BUF_SZ, PCI_DMA_FROMDEVICE); ++ dev_kfree_rtskb(vp->rx_skbuff[i]); ++ vp->rx_skbuff[i] = 0; ++ } ++ } ++ if (vp->full_bus_master_tx) { /* Free Boomerang bus master Tx buffers. */ ++ for (i = 0; i < TX_RING_SIZE; i++) { ++ if (vp->tx_skbuff[i]) { ++ struct rtskb *skb = vp->tx_skbuff[i]; ++#if DO_ZEROCOPY ++ int k; ++ ++ for (k=0; k<=skb_shinfo(skb)->nr_frags; k++) ++ pci_unmap_single(vp->pdev, ++ le32_to_cpu(vp->tx_ring[i].frag[k].addr), ++ le32_to_cpu(vp->tx_ring[i].frag[k].length)&0xFFF, ++ PCI_DMA_TODEVICE); ++#else ++ pci_unmap_single(vp->pdev, le32_to_cpu(vp->tx_ring[i].addr), skb->len, PCI_DMA_TODEVICE); ++#endif ++ dev_kfree_rtskb(skb); ++ vp->tx_skbuff[i] = 0; ++ } ++ } ++ } ++ ++ return 0; ++} ++ ++static void ++dump_tx_ring(struct rtnet_device *rtdev) ++{ ++ if (vortex_debug > 0) { ++ struct vortex_private *vp = (struct vortex_private *)rtdev->priv; ++ long ioaddr = rtdev->base_addr; ++ ++ if (vp->full_bus_master_tx) { ++ int i; ++ int stalled = inl(ioaddr + PktStatus) & 0x04; /* Possible racy. But it's only debug stuff */ ++ ++ rtdm_printk(KERN_ERR " Flags; bus-master %d, dirty %d(%d) current %d(%d)\n", ++ vp->full_bus_master_tx, ++ vp->dirty_tx, vp->dirty_tx % TX_RING_SIZE, ++ vp->cur_tx, vp->cur_tx % TX_RING_SIZE); ++ rtdm_printk(KERN_ERR " Transmit list %8.8x vs. %p.\n", ++ inl(ioaddr + DownListPtr), ++ &vp->tx_ring[vp->dirty_tx % TX_RING_SIZE]); ++ issue_and_wait(rtdev, DownStall); ++ for (i = 0; i < TX_RING_SIZE; i++) { ++ rtdm_printk(KERN_ERR " %d: @%p length %8.8x status %8.8x\n", i, ++ &vp->tx_ring[i], ++#if DO_ZEROCOPY ++ le32_to_cpu(vp->tx_ring[i].frag[0].length), ++#else ++ le32_to_cpu(vp->tx_ring[i].length), ++#endif ++ le32_to_cpu(vp->tx_ring[i].status)); ++ } ++ if (!stalled) ++ outw(DownUnstall, ioaddr + EL3_CMD); ++ } ++ } ++} ++ ++static struct net_device_stats *vortex_get_stats(struct rtnet_device *rtdev) ++{ ++ struct vortex_private *vp = (struct vortex_private *)rtdev->priv; ++ rtdm_lockctx_t flags; ++ ++ if (rtnetif_device_present(rtdev)) { /* AKPM: Used to be netif_running */ ++ rtdm_lock_get_irqsave (&vp->lock, flags); ++ update_stats(rtdev->base_addr, rtdev); ++ rtdm_lock_put_irqrestore (&vp->lock, flags); ++ } ++ return &vp->stats; ++} ++ ++/* Update statistics. ++ Unlike with the EL3 we need not worry about interrupts changing ++ the window setting from underneath us, but we must still guard ++ against a race condition with a StatsUpdate interrupt updating the ++ table. This is done by checking that the ASM (!) code generated uses ++ atomic updates with '+='. ++*/ ++static void update_stats(long ioaddr, struct rtnet_device *rtdev) ++{ ++ struct vortex_private *vp = (struct vortex_private *)rtdev->priv; ++ int old_window = inw(ioaddr + EL3_CMD); ++ ++ if (old_window == 0xffff) /* Chip suspended or ejected. */ ++ return; ++ /* Unlike the 3c5x9 we need not turn off stats updates while reading. */ ++ /* Switch to the stats window, and read everything. */ ++ EL3WINDOW(6); ++ vp->stats.tx_carrier_errors += inb(ioaddr + 0); ++ vp->stats.tx_heartbeat_errors += inb(ioaddr + 1); ++ /* Multiple collisions. */ inb(ioaddr + 2); ++ vp->stats.collisions += inb(ioaddr + 3); ++ vp->stats.tx_window_errors += inb(ioaddr + 4); ++ vp->stats.rx_fifo_errors += inb(ioaddr + 5); ++ vp->stats.tx_packets += inb(ioaddr + 6); ++ vp->stats.tx_packets += (inb(ioaddr + 9)&0x30) << 4; ++ /* Rx packets */ inb(ioaddr + 7); /* Must read to clear */ ++ /* Tx deferrals */ inb(ioaddr + 8); ++ /* Don't bother with register 9, an extension of registers 6&7. ++ If we do use the 6&7 values the atomic update assumption above ++ is invalid. */ ++ vp->stats.rx_bytes += inw(ioaddr + 10); ++ vp->stats.tx_bytes += inw(ioaddr + 12); ++ /* New: On the Vortex we must also clear the BadSSD counter. */ ++ EL3WINDOW(4); ++ inb(ioaddr + 12); ++ ++ { ++ u8 up = inb(ioaddr + 13); ++ vp->stats.rx_bytes += (up & 0x0f) << 16; ++ vp->stats.tx_bytes += (up & 0xf0) << 12; ++ } ++ ++ EL3WINDOW(old_window >> 13); ++ return; ++} ++ ++/* Pre-Cyclone chips have no documented multicast filter, so the only ++ multicast setting is to receive all multicast frames. At least ++ the chip has a very clean way to set the mode, unlike many others. */ ++static void set_rx_mode(struct rtnet_device *rtdev) ++{ ++ long ioaddr = rtdev->base_addr; ++ int new_mode; ++ ++ if (rtdev->flags & IFF_PROMISC) { ++ if (vortex_debug > 0) ++ printk(KERN_NOTICE "%s: Setting promiscuous mode.\n", rtdev->name); ++ new_mode = SetRxFilter|RxStation|RxMulticast|RxBroadcast|RxProm; ++ } else if (rtdev->flags & IFF_ALLMULTI) { ++ new_mode = SetRxFilter|RxStation|RxMulticast|RxBroadcast; ++ } else ++ new_mode = SetRxFilter | RxStation | RxBroadcast; ++ ++ outw(new_mode, ioaddr + EL3_CMD); ++} ++ ++/* MII transceiver control section. ++ Read and write the MII registers using software-generated serial ++ MDIO protocol. See the MII specifications or DP83840A data sheet ++ for details. */ ++ ++/* The maximum data clock rate is 2.5 Mhz. The minimum timing is usually ++ met by back-to-back PCI I/O cycles, but we insert a delay to avoid ++ "overclocking" issues. */ ++#define mdio_delay() inl(mdio_addr) ++ ++#define MDIO_SHIFT_CLK 0x01 ++#define MDIO_DIR_WRITE 0x04 ++#define MDIO_DATA_WRITE0 (0x00 | MDIO_DIR_WRITE) ++#define MDIO_DATA_WRITE1 (0x02 | MDIO_DIR_WRITE) ++#define MDIO_DATA_READ 0x02 ++#define MDIO_ENB_IN 0x00 ++ ++/* Generate the preamble required for initial synchronization and ++ a few older transceivers. */ ++static void mdio_sync(long ioaddr, int bits) ++{ ++ long mdio_addr = ioaddr + Wn4_PhysicalMgmt; ++ ++ /* Establish sync by sending at least 32 logic ones. */ ++ while (-- bits >= 0) { ++ outw(MDIO_DATA_WRITE1, mdio_addr); ++ mdio_delay(); ++ outw(MDIO_DATA_WRITE1 | MDIO_SHIFT_CLK, mdio_addr); ++ mdio_delay(); ++ } ++} ++ ++static int mdio_read(struct rtnet_device *rtdev, int phy_id, int location) ++{ ++ struct vortex_private *vp = (struct vortex_private *)rtdev->priv; ++ int i; ++ long ioaddr = rtdev->base_addr; ++ int read_cmd = (0xf6 << 10) | (phy_id << 5) | location; ++ unsigned int retval = 0; ++ long mdio_addr = ioaddr + Wn4_PhysicalMgmt; ++ ++ spin_lock_bh(&vp->mdio_lock); ++ ++ if (mii_preamble_required) ++ mdio_sync(ioaddr, 32); ++ ++ /* Shift the read command bits out. */ ++ for (i = 14; i >= 0; i--) { ++ int dataval = (read_cmd&(1< 0; i--) { ++ outw(MDIO_ENB_IN, mdio_addr); ++ mdio_delay(); ++ retval = (retval << 1) | ((inw(mdio_addr) & MDIO_DATA_READ) ? 1 : 0); ++ outw(MDIO_ENB_IN | MDIO_SHIFT_CLK, mdio_addr); ++ mdio_delay(); ++ } ++ spin_unlock_bh(&vp->mdio_lock); ++ return retval & 0x20000 ? 0xffff : retval>>1 & 0xffff; ++} ++ ++static void mdio_write(struct rtnet_device *rtdev, int phy_id, int location, int value) ++{ ++ struct vortex_private *vp = (struct vortex_private *)rtdev->priv; ++ long ioaddr = rtdev->base_addr; ++ int write_cmd = 0x50020000 | (phy_id << 23) | (location << 18) | value; ++ long mdio_addr = ioaddr + Wn4_PhysicalMgmt; ++ int i; ++ ++ spin_lock_bh(&vp->mdio_lock); ++ ++ if (mii_preamble_required) ++ mdio_sync(ioaddr, 32); ++ ++ /* Shift the command bits out. */ ++ for (i = 31; i >= 0; i--) { ++ int dataval = (write_cmd&(1<= 0; i--) { ++ outw(MDIO_ENB_IN, mdio_addr); ++ mdio_delay(); ++ outw(MDIO_ENB_IN | MDIO_SHIFT_CLK, mdio_addr); ++ mdio_delay(); ++ } ++ spin_unlock_bh(&vp->mdio_lock); ++ return; ++} ++ ++/* ACPI: Advanced Configuration and Power Interface. */ ++/* Set Wake-On-LAN mode and put the board into D3 (power-down) state. */ ++static void acpi_set_WOL(struct rtnet_device *rtdev) ++{ ++ struct vortex_private *vp = (struct vortex_private *)rtdev->priv; ++ long ioaddr = rtdev->base_addr; ++ ++ /* Power up on: 1==Downloaded Filter, 2==Magic Packets, 4==Link Status. */ ++ EL3WINDOW(7); ++ outw(2, ioaddr + 0x0c); ++ /* The RxFilter must accept the WOL frames. */ ++ outw(SetRxFilter|RxStation|RxMulticast|RxBroadcast, ioaddr + EL3_CMD); ++ outw(RxEnable, ioaddr + EL3_CMD); ++ ++ /* Change the power state to D3; RxEnable doesn't take effect. */ ++ pci_enable_wake(vp->pdev, 0, 1); ++ pci_set_power_state(vp->pdev, 3); ++} ++ ++ ++static void vortex_remove_one (struct pci_dev *pdev) ++{ ++ struct vortex_private *vp; ++ // *** RTnet *** ++ struct rtnet_device *rtdev = pci_get_drvdata (pdev); ++ ++ ++ ++ if (!rtdev) { ++ printk("vortex_remove_one called for EISA device!\n"); ++ BUG(); ++ } ++ ++ vp = rtdev->priv; ++ ++ /* AKPM: FIXME: we should have ++ * if (vp->cb_fn_base) iounmap(vp->cb_fn_base); ++ * here ++ */ ++ rt_unregister_rtnetdev(rtdev); ++ /* Should really use issue_and_wait() here */ ++ outw(TotalReset|0x14, rtdev->base_addr + EL3_CMD); ++ ++ if (vp->pdev && vp->enable_wol) { ++ pci_set_power_state(vp->pdev, 0); /* Go active */ ++ if (vp->pm_state_valid) ++ pci_restore_state(vp->pdev, vp->power_state); ++ } ++ ++ pci_free_consistent(pdev, ++ sizeof(struct boom_rx_desc) * RX_RING_SIZE ++ + sizeof(struct boom_tx_desc) * TX_RING_SIZE, ++ vp->rx_ring, ++ vp->rx_ring_dma); ++ if (vp->must_free_region) ++ release_region(rtdev->base_addr, vp->io_size); ++ // *** RTnet *** ++ rtdev_free(rtdev); ++ // *** RTnet *** ++} ++ ++ ++static struct pci_driver vortex_driver = { ++ name: "3c59x_rt", ++ probe: vortex_init_one, ++ remove: vortex_remove_one, ++ id_table: vortex_pci_tbl, ++#ifdef CONFIG_PM ++ suspend: NULL, ++ resume: NULL, ++#endif ++}; ++ ++ ++static int vortex_have_pci; ++ ++ ++static int __init vortex_init (void) ++{ ++ int pci_rc; ++ ++ pci_rc = pci_register_driver(&vortex_driver); ++ ++ if (pci_rc == 0) ++ vortex_have_pci = 1; ++ ++ return (vortex_have_pci) ? 0 : -ENODEV; ++} ++ ++ ++static void __exit vortex_cleanup (void) ++{ ++ if (vortex_have_pci) ++ pci_unregister_driver (&vortex_driver); ++} ++ ++module_init(vortex_init); ++module_exit(vortex_cleanup); +--- linux/drivers/xenomai/net/drivers/experimental/Kconfig 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/experimental/Kconfig 2021-04-07 16:01:27.576633667 +0800 +@@ -0,0 +1,17 @@ ++config XENO_DRIVERS_NET_EXP_DRIVERS ++ depends on XENO_DRIVERS_NET && PCI ++ bool "Experimental Drivers" ++ ++if XENO_DRIVERS_NET_EXP_DRIVERS ++ ++config XENO_DRIVERS_NET_DRV_3C59X ++ depends on PCI ++ tristate "3Com 59x" ++ ++config XENO_DRIVERS_NET_DRV_E1000_NEW ++ depends on PCI ++ tristate "New Intel(R) PRO/1000 (Gigabit)" ++ ++source "drivers/xenomai/net/drivers/experimental/rt2500/Kconfig" ++ ++endif +--- linux/drivers/xenomai/net/drivers/mpc52xx_fec/rt_mpc52xx_fec.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/mpc52xx_fec/rt_mpc52xx_fec.h 2021-04-07 16:01:27.571633675 +0800 +@@ -0,0 +1,428 @@ ++/* ++ * arch/ppc/5xxx_io/fec.h ++ * ++ * Header file for the MPC5xxx Fast Ethernet Controller driver ++ * ++ * Author: Dale Farnsworth ++ * ++ * Copyright 2003 MontaVista Software ++ * ++ * 2003 (c) MontaVista, Software, Inc. This file is licensed under the terms ++ * of the GNU General Public License version 2. This program is licensed ++ * "as is" without any warranty of any kind, whether express or implied. ++ */ ++ ++#ifndef __RT_MPC52XX_FEC_H_ ++#define __RT_MPC52XX_FEC_H_ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* Define board specific options */ ++#define CONFIG_XENO_DRIVERS_NET_USE_MDIO ++#define CONFIG_XENO_DRIVERS_NET_FEC_GENERIC_PHY ++#define CONFIG_XENO_DRIVERS_NET_FEC_LXT971 ++#undef CONFIG_XENO_DRIVERS_NET_FEC_DP83847 ++ ++/* Tunable constants */ ++#define MPC5xxx_FEC_RECV_BUFFER_SIZE 1518 /* max receive packet size */ ++#define MPC5xxx_FEC_RECV_BUFFER_SIZE_BC 2048 /* max receive packet size */ ++#define MPC5xxx_FEC_TBD_NUM 256 /* max transmit packets */ ++#define MPC5xxx_FEC_RBD_NUM 256 /* max receive packets */ ++ ++struct mpc5xxx_fec { ++ volatile u32 fec_id; /* FEC + 0x000 */ ++ volatile u32 ievent; /* FEC + 0x004 */ ++ volatile u32 imask; /* FEC + 0x008 */ ++ ++ volatile u32 reserved0[1]; /* FEC + 0x00C */ ++ volatile u32 r_des_active; /* FEC + 0x010 */ ++ volatile u32 x_des_active; /* FEC + 0x014 */ ++ volatile u32 r_des_active_cl; /* FEC + 0x018 */ ++ volatile u32 x_des_active_cl; /* FEC + 0x01C */ ++ volatile u32 ivent_set; /* FEC + 0x020 */ ++ volatile u32 ecntrl; /* FEC + 0x024 */ ++ ++ volatile u32 reserved1[6]; /* FEC + 0x028-03C */ ++ volatile u32 mii_data; /* FEC + 0x040 */ ++ volatile u32 mii_speed; /* FEC + 0x044 */ ++ volatile u32 mii_status; /* FEC + 0x048 */ ++ ++ volatile u32 reserved2[5]; /* FEC + 0x04C-05C */ ++ volatile u32 mib_data; /* FEC + 0x060 */ ++ volatile u32 mib_control; /* FEC + 0x064 */ ++ ++ volatile u32 reserved3[6]; /* FEC + 0x068-7C */ ++ volatile u32 r_activate; /* FEC + 0x080 */ ++ volatile u32 r_cntrl; /* FEC + 0x084 */ ++ volatile u32 r_hash; /* FEC + 0x088 */ ++ volatile u32 r_data; /* FEC + 0x08C */ ++ volatile u32 ar_done; /* FEC + 0x090 */ ++ volatile u32 r_test; /* FEC + 0x094 */ ++ volatile u32 r_mib; /* FEC + 0x098 */ ++ volatile u32 r_da_low; /* FEC + 0x09C */ ++ volatile u32 r_da_high; /* FEC + 0x0A0 */ ++ ++ volatile u32 reserved4[7]; /* FEC + 0x0A4-0BC */ ++ volatile u32 x_activate; /* FEC + 0x0C0 */ ++ volatile u32 x_cntrl; /* FEC + 0x0C4 */ ++ volatile u32 backoff; /* FEC + 0x0C8 */ ++ volatile u32 x_data; /* FEC + 0x0CC */ ++ volatile u32 x_status; /* FEC + 0x0D0 */ ++ volatile u32 x_mib; /* FEC + 0x0D4 */ ++ volatile u32 x_test; /* FEC + 0x0D8 */ ++ volatile u32 fdxfc_da1; /* FEC + 0x0DC */ ++ volatile u32 fdxfc_da2; /* FEC + 0x0E0 */ ++ volatile u32 paddr1; /* FEC + 0x0E4 */ ++ volatile u32 paddr2; /* FEC + 0x0E8 */ ++ volatile u32 op_pause; /* FEC + 0x0EC */ ++ ++ volatile u32 reserved5[4]; /* FEC + 0x0F0-0FC */ ++ volatile u32 instr_reg; /* FEC + 0x100 */ ++ volatile u32 context_reg; /* FEC + 0x104 */ ++ volatile u32 test_cntrl; /* FEC + 0x108 */ ++ volatile u32 acc_reg; /* FEC + 0x10C */ ++ volatile u32 ones; /* FEC + 0x110 */ ++ volatile u32 zeros; /* FEC + 0x114 */ ++ volatile u32 iaddr1; /* FEC + 0x118 */ ++ volatile u32 iaddr2; /* FEC + 0x11C */ ++ volatile u32 gaddr1; /* FEC + 0x120 */ ++ volatile u32 gaddr2; /* FEC + 0x124 */ ++ volatile u32 random; /* FEC + 0x128 */ ++ volatile u32 rand1; /* FEC + 0x12C */ ++ volatile u32 tmp; /* FEC + 0x130 */ ++ ++ volatile u32 reserved6[3]; /* FEC + 0x134-13C */ ++ volatile u32 fifo_id; /* FEC + 0x140 */ ++ volatile u32 x_wmrk; /* FEC + 0x144 */ ++ volatile u32 fcntrl; /* FEC + 0x148 */ ++ volatile u32 r_bound; /* FEC + 0x14C */ ++ volatile u32 r_fstart; /* FEC + 0x150 */ ++ volatile u32 r_count; /* FEC + 0x154 */ ++ volatile u32 r_lag; /* FEC + 0x158 */ ++ volatile u32 r_read; /* FEC + 0x15C */ ++ volatile u32 r_write; /* FEC + 0x160 */ ++ volatile u32 x_count; /* FEC + 0x164 */ ++ volatile u32 x_lag; /* FEC + 0x168 */ ++ volatile u32 x_retry; /* FEC + 0x16C */ ++ volatile u32 x_write; /* FEC + 0x170 */ ++ volatile u32 x_read; /* FEC + 0x174 */ ++ ++ volatile u32 reserved7[2]; /* FEC + 0x178-17C */ ++ volatile u32 fm_cntrl; /* FEC + 0x180 */ ++ volatile u32 rfifo_data; /* FEC + 0x184 */ ++ volatile u32 rfifo_status; /* FEC + 0x188 */ ++ volatile u32 rfifo_cntrl; /* FEC + 0x18C */ ++ volatile u32 rfifo_lrf_ptr; /* FEC + 0x190 */ ++ volatile u32 rfifo_lwf_ptr; /* FEC + 0x194 */ ++ volatile u32 rfifo_alarm; /* FEC + 0x198 */ ++ volatile u32 rfifo_rdptr; /* FEC + 0x19C */ ++ volatile u32 rfifo_wrptr; /* FEC + 0x1A0 */ ++ volatile u32 tfifo_data; /* FEC + 0x1A4 */ ++ volatile u32 tfifo_status; /* FEC + 0x1A8 */ ++ volatile u32 tfifo_cntrl; /* FEC + 0x1AC */ ++ volatile u32 tfifo_lrf_ptr; /* FEC + 0x1B0 */ ++ volatile u32 tfifo_lwf_ptr; /* FEC + 0x1B4 */ ++ volatile u32 tfifo_alarm; /* FEC + 0x1B8 */ ++ volatile u32 tfifo_rdptr; /* FEC + 0x1BC */ ++ volatile u32 tfifo_wrptr; /* FEC + 0x1C0 */ ++ ++ volatile u32 reset_cntrl; /* FEC + 0x1C4 */ ++ volatile u32 xmit_fsm; /* FEC + 0x1C8 */ ++ ++ volatile u32 reserved8[3]; /* FEC + 0x1CC-1D4 */ ++ volatile u32 rdes_data0; /* FEC + 0x1D8 */ ++ volatile u32 rdes_data1; /* FEC + 0x1DC */ ++ volatile u32 r_length; /* FEC + 0x1E0 */ ++ volatile u32 x_length; /* FEC + 0x1E4 */ ++ volatile u32 x_addr; /* FEC + 0x1E8 */ ++ volatile u32 cdes_data; /* FEC + 0x1EC */ ++ volatile u32 status; /* FEC + 0x1F0 */ ++ volatile u32 dma_control; /* FEC + 0x1F4 */ ++ volatile u32 des_cmnd; /* FEC + 0x1F8 */ ++ volatile u32 data; /* FEC + 0x1FC */ ++ ++ volatile u32 rmon_t_drop; /* FEC + 0x200 */ ++ volatile u32 rmon_t_packets; /* FEC + 0x204 */ ++ volatile u32 rmon_t_bc_pkt; /* FEC + 0x208 */ ++ volatile u32 rmon_t_mc_pkt; /* FEC + 0x20C */ ++ volatile u32 rmon_t_crc_align; /* FEC + 0x210 */ ++ volatile u32 rmon_t_undersize; /* FEC + 0x214 */ ++ volatile u32 rmon_t_oversize; /* FEC + 0x218 */ ++ volatile u32 rmon_t_frag; /* FEC + 0x21C */ ++ volatile u32 rmon_t_jab; /* FEC + 0x220 */ ++ volatile u32 rmon_t_col; /* FEC + 0x224 */ ++ volatile u32 rmon_t_p64; /* FEC + 0x228 */ ++ volatile u32 rmon_t_p65to127; /* FEC + 0x22C */ ++ volatile u32 rmon_t_p128to255; /* FEC + 0x230 */ ++ volatile u32 rmon_t_p256to511; /* FEC + 0x234 */ ++ volatile u32 rmon_t_p512to1023; /* FEC + 0x238 */ ++ volatile u32 rmon_t_p1024to2047; /* FEC + 0x23C */ ++ volatile u32 rmon_t_p_gte2048; /* FEC + 0x240 */ ++ volatile u32 rmon_t_octets; /* FEC + 0x244 */ ++ volatile u32 ieee_t_drop; /* FEC + 0x248 */ ++ volatile u32 ieee_t_frame_ok; /* FEC + 0x24C */ ++ volatile u32 ieee_t_1col; /* FEC + 0x250 */ ++ volatile u32 ieee_t_mcol; /* FEC + 0x254 */ ++ volatile u32 ieee_t_def; /* FEC + 0x258 */ ++ volatile u32 ieee_t_lcol; /* FEC + 0x25C */ ++ volatile u32 ieee_t_excol; /* FEC + 0x260 */ ++ volatile u32 ieee_t_macerr; /* FEC + 0x264 */ ++ volatile u32 ieee_t_cserr; /* FEC + 0x268 */ ++ volatile u32 ieee_t_sqe; /* FEC + 0x26C */ ++ volatile u32 t_fdxfc; /* FEC + 0x270 */ ++ volatile u32 ieee_t_octets_ok; /* FEC + 0x274 */ ++ ++ volatile u32 reserved9[2]; /* FEC + 0x278-27C */ ++ volatile u32 rmon_r_drop; /* FEC + 0x280 */ ++ volatile u32 rmon_r_packets; /* FEC + 0x284 */ ++ volatile u32 rmon_r_bc_pkt; /* FEC + 0x288 */ ++ volatile u32 rmon_r_mc_pkt; /* FEC + 0x28C */ ++ volatile u32 rmon_r_crc_align; /* FEC + 0x290 */ ++ volatile u32 rmon_r_undersize; /* FEC + 0x294 */ ++ volatile u32 rmon_r_oversize; /* FEC + 0x298 */ ++ volatile u32 rmon_r_frag; /* FEC + 0x29C */ ++ volatile u32 rmon_r_jab; /* FEC + 0x2A0 */ ++ ++ volatile u32 rmon_r_resvd_0; /* FEC + 0x2A4 */ ++ ++ volatile u32 rmon_r_p64; /* FEC + 0x2A8 */ ++ volatile u32 rmon_r_p65to127; /* FEC + 0x2AC */ ++ volatile u32 rmon_r_p128to255; /* FEC + 0x2B0 */ ++ volatile u32 rmon_r_p256to511; /* FEC + 0x2B4 */ ++ volatile u32 rmon_r_p512to1023; /* FEC + 0x2B8 */ ++ volatile u32 rmon_r_p1024to2047; /* FEC + 0x2BC */ ++ volatile u32 rmon_r_p_gte2048; /* FEC + 0x2C0 */ ++ volatile u32 rmon_r_octets; /* FEC + 0x2C4 */ ++ volatile u32 ieee_r_drop; /* FEC + 0x2C8 */ ++ volatile u32 ieee_r_frame_ok; /* FEC + 0x2CC */ ++ volatile u32 ieee_r_crc; /* FEC + 0x2D0 */ ++ volatile u32 ieee_r_align; /* FEC + 0x2D4 */ ++ volatile u32 r_macerr; /* FEC + 0x2D8 */ ++ volatile u32 r_fdxfc; /* FEC + 0x2DC */ ++ volatile u32 ieee_r_octets_ok; /* FEC + 0x2E0 */ ++ ++ volatile u32 reserved10[6]; /* FEC + 0x2E4-2FC */ ++ ++ volatile u32 reserved11[64]; /* FEC + 0x300-3FF */ ++}; ++ ++#define MPC5xxx_FEC_MIB_DISABLE 0x80000000 ++ ++#define MPC5xxx_FEC_IEVENT_HBERR 0x80000000 ++#define MPC5xxx_FEC_IEVENT_BABR 0x40000000 ++#define MPC5xxx_FEC_IEVENT_BABT 0x20000000 ++#define MPC5xxx_FEC_IEVENT_GRA 0x10000000 ++#define MPC5xxx_FEC_IEVENT_TFINT 0x08000000 ++#define MPC5xxx_FEC_IEVENT_MII 0x00800000 ++#define MPC5xxx_FEC_IEVENT_LATE_COL 0x00200000 ++#define MPC5xxx_FEC_IEVENT_COL_RETRY_LIM 0x00100000 ++#define MPC5xxx_FEC_IEVENT_XFIFO_UN 0x00080000 ++#define MPC5xxx_FEC_IEVENT_XFIFO_ERROR 0x00040000 ++#define MPC5xxx_FEC_IEVENT_RFIFO_ERROR 0x00020000 ++ ++#define MPC5xxx_FEC_IMASK_HBERR 0x80000000 ++#define MPC5xxx_FEC_IMASK_BABR 0x40000000 ++#define MPC5xxx_FEC_IMASK_BABT 0x20000000 ++#define MPC5xxx_FEC_IMASK_GRA 0x10000000 ++#define MPC5xxx_FEC_IMASK_MII 0x00800000 ++#define MPC5xxx_FEC_IMASK_LATE_COL 0x00200000 ++#define MPC5xxx_FEC_IMASK_COL_RETRY_LIM 0x00100000 ++#define MPC5xxx_FEC_IMASK_XFIFO_UN 0x00080000 ++#define MPC5xxx_FEC_IMASK_XFIFO_ERROR 0x00040000 ++#define MPC5xxx_FEC_IMASK_RFIFO_ERROR 0x00020000 ++ ++#define MPC5xxx_FEC_RCNTRL_MAX_FL_SHIFT 16 ++#define MPC5xxx_FEC_RCNTRL_LOOP 0x01 ++#define MPC5xxx_FEC_RCNTRL_DRT 0x02 ++#define MPC5xxx_FEC_RCNTRL_MII_MODE 0x04 ++#define MPC5xxx_FEC_RCNTRL_PROM 0x08 ++#define MPC5xxx_FEC_RCNTRL_BC_REJ 0x10 ++#define MPC5xxx_FEC_RCNTRL_FCE 0x20 ++ ++#define MPC5xxx_FEC_TCNTRL_GTS 0x00000001 ++#define MPC5xxx_FEC_TCNTRL_HBC 0x00000002 ++#define MPC5xxx_FEC_TCNTRL_FDEN 0x00000004 ++#define MPC5xxx_FEC_TCNTRL_TFC_PAUSE 0x00000008 ++#define MPC5xxx_FEC_TCNTRL_RFC_PAUSE 0x00000010 ++ ++#define MPC5xxx_FEC_ECNTRL_RESET 0x00000001 ++#define MPC5xxx_FEC_ECNTRL_ETHER_EN 0x00000002 ++ ++#define MPC5xxx_FEC_RESET_DELAY 50 /* uS */ ++ ++ ++/* Receive & Transmit Buffer Descriptor definitions */ ++struct mpc5xxx_fec_bd { ++ volatile u32 status; ++ volatile u32 data; ++}; ++ ++/* Receive data buffer format */ ++struct mpc5xxx_rbuf { ++ u8 data[MPC5xxx_FEC_RECV_BUFFER_SIZE_BC]; ++}; ++ ++struct fec_queue { ++ volatile struct mpc5xxx_fec_bd *bd_base; ++ struct rtskb **skb_base; ++ u16 last_index; ++ u16 start_index; ++ u16 finish_index; ++}; ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO ++#define MII_ADVERTISE_HALF (ADVERTISE_100HALF | ADVERTISE_10HALF | \ ++ ADVERTISE_CSMA) ++ ++#define MII_ADVERTISE_ALL (ADVERTISE_100FULL | ADVERTISE_10FULL | \ ++ MII_ADVERTISE_HALF) ++#ifdef PHY_INTERRUPT ++#define MII_ADVERTISE_DEFAULT MII_ADVERTISE_ALL ++#else ++#define MII_ADVERTISE_DEFAULT MII_ADVERTISE_HALF ++#endif ++ ++typedef struct { ++ uint mii_data; ++ void (*funct)(uint mii_reg, struct rtnet_device *dev, uint data); ++} phy_cmd_t; ++ ++typedef struct { ++ uint id; ++ char *name; ++ ++ const phy_cmd_t *config; ++ const phy_cmd_t *startup; ++ const phy_cmd_t *ack_int; ++ const phy_cmd_t *shutdown; ++} phy_info_t; ++#endif /* CONFIG_XENO_DRIVERS_NET_USE_MDIO */ ++ ++struct mpc5xxx_fec_priv { ++ int full_duplex; ++ int tx_full; ++ int r_tasknum; ++ int t_tasknum; ++ int r_irq; ++ int t_irq; ++ rtdm_irq_t irq_handle; ++ rtdm_irq_t r_irq_handle; ++ rtdm_irq_t t_irq_handle; ++ u32 last_transmit_time; ++ u32 last_receive_time; ++ struct mpc5xxx_fec *fec; ++ struct mpc5xxx_sram_fec *sram; ++ struct mpc5xxx_gpio *gpio; ++ struct mpc5xxx_sdma *sdma; ++ struct fec_queue r_queue; ++ struct rtskb *rskb[MPC5xxx_FEC_RBD_NUM]; ++ struct fec_queue t_queue; ++ struct rtskb *tskb[MPC5xxx_FEC_TBD_NUM]; ++ rtdm_lock_t lock; ++ unsigned long open_time; ++ struct net_device_stats stats; ++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO ++ uint phy_id; ++ uint phy_id_done; ++ uint phy_status; ++ uint phy_speed; ++ phy_info_t *phy; ++ struct tq_struct phy_task; ++ volatile uint sequence_done; ++ uint link; ++ uint phy_addr; ++ ++ struct tq_struct link_up_task; ++ int duplex_change; ++ int link_up; ++ ++ struct timer_list phy_timer_list; ++ u16 old_status; ++#endif /* CONFIG_XENO_DRIVERS_NET_USE_MDIO */ ++}; ++ ++struct mpc5xxx_sram_fec { ++ volatile struct mpc5xxx_fec_bd tbd[MPC5xxx_FEC_TBD_NUM]; ++ volatile struct mpc5xxx_fec_bd rbd[MPC5xxx_FEC_RBD_NUM]; ++}; ++ ++#define MPC5xxx_FEC_RBD_READY 0x40000000 ++#define MPC5xxx_FEC_RBD_RFD 0x08000000 /* receive frame done */ ++ ++#define MPC5xxx_FEC_RBD_INIT MPC5xxx_FEC_RBD_READY ++ ++#define MPC5xxx_FEC_TBD_READY 0x40000000 ++#define MPC5xxx_FEC_TBD_TFD 0x08000000 /* transmit frame done */ ++#define MPC5xxx_FEC_TBD_INT 0x04000000 /* Interrupt */ ++ ++#define MPC5xxx_FEC_TBD_INIT (MPC5xxx_FEC_TBD_INT | MPC5xxx_FEC_TBD_TFD | \ ++ MPC5xxx_FEC_TBD_READY) ++ ++ ++ ++/* MII-related definitions */ ++#define MPC5xxx_FEC_MII_DATA_ST 0x40000000 /* Start frame */ ++#define MPC5xxx_FEC_MII_DATA_OP_RD 0x20000000 /* Perform read */ ++#define MPC5xxx_FEC_MII_DATA_OP_WR 0x10000000 /* Perform write */ ++#define MPC5xxx_FEC_MII_DATA_PA_MSK 0x0f800000 /* PHY Address mask */ ++#define MPC5xxx_FEC_MII_DATA_RA_MSK 0x007c0000 /* PHY Register mask */ ++#define MPC5xxx_FEC_MII_DATA_TA 0x00020000 /* Turnaround */ ++#define MPC5xxx_FEC_MII_DATA_DATAMSK 0x00000fff /* PHY data mask */ ++ ++#define MPC5xxx_FEC_MII_DATA_RA_SHIFT 0x12 /* MII reg addr bits */ ++#define MPC5xxx_FEC_MII_DATA_PA_SHIFT 0x17 /* MII PHY addr bits */ ++ ++#define MPC5xxx_FEC_MII_SPEED (5 * 2) ++ ++const char mpc5xxx_fec_name[] = "eth0"; ++ ++struct mibCounters { ++ unsigned int byteReceived; ++ unsigned int byteSent; ++ unsigned int framesReceived; ++ unsigned int framesSent; ++ unsigned int totalByteReceived; ++ unsigned int totalFramesReceived; ++ unsigned int broadcastFramesReceived; ++ unsigned int multicastFramesReceived; ++ unsigned int cRCError; ++ unsigned int oversizeFrames; ++ unsigned int fragments; ++ unsigned int jabber; ++ unsigned int collision; ++ unsigned int lateCollision; ++ unsigned int frames64; ++ unsigned int frames65_127; ++ unsigned int frames128_255; ++ unsigned int frames256_511; ++ unsigned int frames512_1023; ++ unsigned int frames1024_MaxSize; ++ unsigned int macRxError; ++ unsigned int droppedFrames; ++ unsigned int outMulticastFrames; ++ unsigned int outBroadcastFrames; ++ unsigned int undersizeFrames; ++}; ++ ++#define MPC5xxx_FEC_WATCHDOG_TIMEOUT ((400*HZ)/1000) ++ ++ ++#define MPC5xxx_FEC_FRAME_LAST 0x08000000 /* Last */ ++#define MPC5xxx_FEC_FRAME_M 0x01000000 /* M? */ ++#define MPC5xxx_FEC_FRAME_BC 0x00800000 /* Broadcast */ ++#define MPC5xxx_FEC_FRAME_MC 0x00400000 /* Multicast */ ++#define MPC5xxx_FEC_FRAME_LG 0x00200000 /* Length error */ ++#define MPC5xxx_FEC_FRAME_NO 0x00100000 /* Non-octet aligned frame error */ ++#define MPC5xxx_FEC_FRAME_CR 0x00040000 /* CRC frame error */ ++#define MPC5xxx_FEC_FRAME_OV 0x00020000 /* Overrun error */ ++#define MPC5xxx_FEC_FRAME_TR 0x00010000 /* Truncated error */ ++ ++ ++ ++#endif /* __RT_MPC52XX_FEC_H_ */ +--- linux/drivers/xenomai/net/drivers/mpc52xx_fec/Makefile 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/mpc52xx_fec/Makefile 2021-04-07 16:01:27.566633682 +0800 +@@ -0,0 +1,5 @@ ++ccflags-y += -Idrivers/xenomai/net/stack/include ++ ++obj-$(CONFIG_XENO_DRIVERS_NET_DRV_MPC52XX_FEC) += rt_mpc52xx_fec.o ++ ++rt_mpc52xx_fec-y := mpc52xx_fec.o +--- linux/drivers/xenomai/net/drivers/mpc52xx_fec/mpc52xx_fec.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/mpc52xx_fec/mpc52xx_fec.c 2021-04-07 16:01:27.562633688 +0800 +@@ -0,0 +1,1985 @@ ++/* ++ * arch/ppc/5xxx_io/fec.c ++ * ++ * Driver for the MPC5200 Fast Ethernet Controller ++ * Support for MPC5100 FEC has been removed, contact the author if you need it ++ * ++ * Author: Dale Farnsworth ++ * ++ * 2003 (c) MontaVista, Software, Inc. This file is licensed under the terms ++ * of the GNU General Public License version 2. This program is licensed ++ * "as is" without any warranty of any kind, whether express or implied. ++ * ++ * Ported to RTnet from "linuxppc_2_4_devel/arch/ppc/5xxx_io/fec.c". ++ * Copyright (c) 2008 Wolfgang Grandegger ++ */ ++ ++/* #define PARANOID_CHECKS*/ ++/* #define MUST_ALIGN_TRANSMIT_DATA*/ ++#define MUST_UNALIGN_RECEIVE_DATA ++/* #define EXIT_ISR_AT_MEMORY_SQUEEZE*/ ++/* #define DISPLAY_WARNINGS*/ ++ ++#ifdef ORIGINAL_CODE ++static const char *version = "fec.c v0.2\n"; ++#endif /* ORIGINAL_CODE */ ++ ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "rt_mpc52xx_fec.h" ++#ifdef CONFIG_UBOOT ++#include ++#endif ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_FASTROUTE ++#error "Fast Routing on MPC5200 ethernet not supported" ++#endif ++ ++MODULE_AUTHOR("Maintainer: Wolfgang Grandegger "); ++MODULE_DESCRIPTION("RTnet driver for MPC52xx FEC"); ++MODULE_LICENSE("GPL"); ++ ++static unsigned int rx_pool_size = 0; ++MODULE_PARM(rx_pool_size, "i"); ++MODULE_PARM_DESC(rx_pool_size, "Receive buffer pool size"); ++ ++#define printk(fmt,args...) rtdm_printk (fmt ,##args) ++ ++static struct rtnet_device *mpc5xxx_fec_dev; ++static int mpc5xxx_fec_interrupt(rtdm_irq_t *irq_handle); ++static int mpc5xxx_fec_receive_interrupt(rtdm_irq_t *irq_handle); ++static int mpc5xxx_fec_transmit_interrupt(rtdm_irq_t *irq_handle); ++static struct net_device_stats *mpc5xxx_fec_get_stats(struct rtnet_device *dev); ++#ifdef ORIGINAL_CODE ++static void mpc5xxx_fec_set_multicast_list(struct rtnet_device *dev); ++#endif /* ORIGINAL_CODE */ ++static void mpc5xxx_fec_reinit(struct rtnet_device* dev); ++static int mpc5xxx_fec_setup(struct rtnet_device *dev, int reinit); ++static int mpc5xxx_fec_cleanup(struct rtnet_device *dev, int reinit); ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO ++static void mpc5xxx_fec_mii(struct rtnet_device *dev); ++#ifdef ORIGINAL_CODE ++static int mpc5xxx_fec_ioctl(struct rtnet_device *, struct ifreq *rq, int cmd); ++static int mpc5xxx_netdev_ethtool_ioctl(struct rtnet_device *dev, void *useraddr); ++#endif /* ORIGINAL_CODE */ ++static void mdio_timer_callback(unsigned long data); ++static void mii_display_status(struct rtnet_device *dev); ++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO_NOT_YET ++static void mpc5xxx_mdio_callback(uint regval, struct rtnet_device *dev, uint data); ++static int mpc5xxx_mdio_read(struct rtnet_device *dev, int phy_id, int location); ++#endif ++ ++static void mpc5xxx_fec_update_stat(struct rtnet_device *); ++ ++/* MII processing. We keep this as simple as possible. Requests are ++ * placed on the list (if there is room). When the request is finished ++ * by the MII, an optional function may be called. ++ */ ++typedef struct mii_list { ++ uint mii_regval; ++ void (*mii_func)(uint val, struct rtnet_device *dev, uint data); ++ struct mii_list *mii_next; ++ uint mii_data; ++} mii_list_t; ++ ++#define NMII 20 ++mii_list_t mii_cmds[NMII]; ++mii_list_t *mii_free; ++mii_list_t *mii_head; ++mii_list_t *mii_tail; ++ ++typedef struct mdio_read_data { ++ u16 regval; ++ struct task_struct *sleeping_task; ++} mdio_read_data_t; ++ ++static int mii_queue(struct rtnet_device *dev, int request, ++ void (*func)(uint, struct rtnet_device *, uint), uint data); ++ ++/* Make MII read/write commands for the FEC. ++ * */ ++#define mk_mii_read(REG) (0x60020000 | ((REG & 0x1f) << 18)) ++#define mk_mii_write(REG, VAL) (0x50020000 | ((REG & 0x1f) << 18) | \ ++ (VAL & 0xffff)) ++#define mk_mii_end 0 ++ ++/* Register definitions for the PHY. ++*/ ++ ++#define MII_REG_CR 0 /* Control Register */ ++#define MII_REG_SR 1 /* Status Register */ ++#define MII_REG_PHYIR1 2 /* PHY Identification Register 1 */ ++#define MII_REG_PHYIR2 3 /* PHY Identification Register 2 */ ++#define MII_REG_ANAR 4 /* A-N Advertisement Register */ ++#define MII_REG_ANLPAR 5 /* A-N Link Partner Ability Register */ ++#define MII_REG_ANER 6 /* A-N Expansion Register */ ++#define MII_REG_ANNPTR 7 /* A-N Next Page Transmit Register */ ++#define MII_REG_ANLPRNPR 8 /* A-N Link Partner Received Next Page Reg. */ ++ ++/* values for phy_status */ ++ ++#define PHY_CONF_ANE 0x0001 /* 1 auto-negotiation enabled */ ++#define PHY_CONF_LOOP 0x0002 /* 1 loopback mode enabled */ ++#define PHY_CONF_SPMASK 0x00f0 /* mask for speed */ ++#define PHY_CONF_10HDX 0x0010 /* 10 Mbit half duplex supported */ ++#define PHY_CONF_10FDX 0x0020 /* 10 Mbit full duplex supported */ ++#define PHY_CONF_100HDX 0x0040 /* 100 Mbit half duplex supported */ ++#define PHY_CONF_100FDX 0x0080 /* 100 Mbit full duplex supported */ ++ ++#define PHY_STAT_LINK 0x0100 /* 1 up - 0 down */ ++#define PHY_STAT_FAULT 0x0200 /* 1 remote fault */ ++#define PHY_STAT_ANC 0x0400 /* 1 auto-negotiation complete */ ++#define PHY_STAT_SPMASK 0xf000 /* mask for speed */ ++#define PHY_STAT_10HDX 0x1000 /* 10 Mbit half duplex selected */ ++#define PHY_STAT_10FDX 0x2000 /* 10 Mbit full duplex selected */ ++#define PHY_STAT_100HDX 0x4000 /* 100 Mbit half duplex selected */ ++#define PHY_STAT_100FDX 0x8000 /* 100 Mbit full duplex selected */ ++ ++#endif /* CONFIG_XENO_DRIVERS_NET_USE_MDIO */ ++ ++u8 mpc5xxx_fec_mac_addr[6]; ++u8 null_mac[6]; ++ ++#ifdef ORIGINAL_CODE ++static void mpc5xxx_fec_tx_timeout(struct rtnet_device *dev) ++{ ++ struct mpc5xxx_fec_priv *priv = (struct mpc5xxx_fec_priv *)dev->priv; ++ ++ priv->stats.tx_errors++; ++ ++ if (!priv->tx_full) ++ rtnetif_wake_queue(dev); ++} ++#endif /* ORIGINAL_CODE */ ++ ++static void ++mpc5xxx_fec_set_paddr(struct rtnet_device *dev, u8 *mac) ++{ ++ struct mpc5xxx_fec_priv *priv = (struct mpc5xxx_fec_priv *)dev->priv; ++ struct mpc5xxx_fec *fec = priv->fec; ++ ++ out_be32(&fec->paddr1, (mac[0]<<24) | (mac[1]<<16) ++ | (mac[2]<<8) | (mac[3]<<0)); ++ out_be32(&fec->paddr2, (mac[4]<<24) | (mac[5]<<16) | 0x8808); ++} ++ ++#ifdef ORIGINAL_CODE ++static int ++mpc5xxx_fec_set_mac_address(struct rtnet_device *dev, void *addr) ++{ ++ struct sockaddr *sock = (struct sockaddr *)addr; ++ ++ mpc5xxx_fec_set_paddr(dev, sock->sa_data); ++ return 0; ++} ++#endif /* ORIGINAL_CODE */ ++ ++/* This function is called to start or restart the FEC during a link ++ * change. This happens on fifo errors or when switching between half ++ * and full duplex. ++ */ ++static void ++mpc5xxx_fec_restart(struct rtnet_device *dev, int duplex) ++{ ++ struct mpc5xxx_fec_priv *priv = (struct mpc5xxx_fec_priv *)dev->priv; ++ struct mpc5xxx_fec *fec = priv->fec; ++ u32 rcntrl; ++ u32 tcntrl; ++ int i; ++ ++#if MPC5xxx_FEC_DEBUG > 1 ++ printk("mpc5xxx_fec_restart\n"); ++#endif ++ out_be32(&fec->rfifo_status, in_be32(&fec->rfifo_status) & 0x700000); ++ out_be32(&fec->tfifo_status, in_be32(&fec->tfifo_status) & 0x700000); ++ out_be32(&fec->reset_cntrl, 0x1000000); ++ ++ /* Whack a reset. We should wait for this. */ ++ out_be32(&fec->ecntrl, MPC5xxx_FEC_ECNTRL_RESET); ++ for (i = 0; i < MPC5xxx_FEC_RESET_DELAY; ++i) { ++ if ((in_be32(&fec->ecntrl) & MPC5xxx_FEC_ECNTRL_RESET) == 0) ++ break; ++ udelay(1); ++ } ++ if (i == MPC5xxx_FEC_RESET_DELAY) ++ printk ("FEC Reset timeout!\n"); ++ ++ /* Set station address. */ ++ out_be32(&fec->paddr1, *(u32 *)&dev->dev_addr[0]); ++ out_be32(&fec->paddr2, ++ ((*(u16 *)&dev->dev_addr[4]) << 16) | 0x8808); ++ ++#ifdef ORIGINAL_CODE ++ mpc5xxx_fec_set_multicast_list(dev); ++#endif /* ORIGINAL_CODE */ ++ ++ rcntrl = MPC5xxx_FEC_RECV_BUFFER_SIZE << 16; /* max frame length */ ++ rcntrl |= MPC5xxx_FEC_RCNTRL_FCE; ++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO ++ rcntrl |= MPC5xxx_FEC_RCNTRL_MII_MODE; ++#endif ++ if (duplex) ++ tcntrl = MPC5xxx_FEC_TCNTRL_FDEN; /* FD enable */ ++ else { ++ rcntrl |= MPC5xxx_FEC_RCNTRL_DRT; ++ tcntrl = 0; ++ } ++ out_be32(&fec->r_cntrl, rcntrl); ++ out_be32(&fec->x_cntrl, tcntrl); ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO ++ /* Set MII speed. */ ++ out_be32(&fec->mii_speed, priv->phy_speed); ++#endif ++ ++ priv->full_duplex = duplex; ++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO ++ priv->duplex_change = 0; ++#endif ++#if MPC5xxx_FEC_DEBUG > 4 ++ printk("%s: duplex set to %d\n", dev->name, priv->full_duplex); ++#endif ++ ++ /* Clear any outstanding interrupt. */ ++ out_be32(&fec->ievent, 0xffffffff); /* clear intr events */ ++ ++ /* Enable interrupts we wish to service. ++ */ ++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO ++ out_be32(&fec->imask, 0xf0fe0000); /* enable all intr but tfint */ ++#else ++ out_be32(&fec->imask, 0xf07e0000); /* enable all intr but tfint */ ++#endif ++ ++ /* And last, enable the transmit and receive processing. ++ */ ++ out_be32(&fec->ecntrl, MPC5xxx_FEC_ECNTRL_ETHER_EN); ++ out_be32(&fec->r_des_active, 0x01000000); ++ ++ /* The tx ring is no longer full. */ ++ if (priv->tx_full) ++ { ++ priv->tx_full = 0; ++ rtnetif_wake_queue(dev); ++ } ++} ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO ++static void ++mpc5xxx_fec_mii(struct rtnet_device *dev) ++{ ++ struct mpc5xxx_fec_priv *priv = (struct mpc5xxx_fec_priv *)dev->priv; ++ struct mpc5xxx_fec *fec = priv->fec; ++ mii_list_t *mip; ++ uint mii_reg; ++ ++ mii_reg = in_be32(&fec->mii_data); ++ ++ if ((mip = mii_head) == NULL) { ++ printk("MII and no head!\n"); ++ return; ++ } ++#if MPC5xxx_FEC_DEBUG > 4 ++ printk("mpc5xxx_fec_mii %08x %08x %08x\n", ++ mii_reg, (u32)mip->mii_func, mip->mii_data); ++#endif ++ ++ if (mip->mii_func != NULL) ++ (*(mip->mii_func))(mii_reg, dev, mip->mii_data); ++ ++ mii_head = mip->mii_next; ++ mip->mii_next = mii_free; ++ mii_free = mip; ++ ++ if ((mip = mii_head) != NULL) ++ out_be32(&fec->mii_data, mip->mii_regval); ++} ++ ++static int ++mii_queue(struct rtnet_device *dev, int regval, void (*func)(uint, struct rtnet_device *, uint), uint data) ++{ ++ struct mpc5xxx_fec_priv *priv = (struct mpc5xxx_fec_priv *)dev->priv; ++ struct mpc5xxx_fec *fec = priv->fec; ++ rtdm_lockctx_t context; ++ mii_list_t *mip; ++ int retval; ++ ++#if MPC5xxx_FEC_DEBUG > 4 ++ printk("mii_queue: %08x %08x %08x\n", regval, (u32)func, data); ++#endif ++ ++ /* Add PHY address to register command. ++ */ ++ regval |= priv->phy_addr << 23; ++ ++ retval = 0; ++ ++ rtdm_lock_get_irqsave(&priv->lock, context); ++ ++ if ((mip = mii_free) != NULL) { ++ mii_free = mip->mii_next; ++ mip->mii_regval = regval; ++ mip->mii_func = func; ++ mip->mii_next = NULL; ++ mip->mii_data = data; ++ if (mii_head) { ++ mii_tail->mii_next = mip; ++ mii_tail = mip; ++ } else { ++ mii_head = mii_tail = mip; ++ out_be32(&fec->mii_data, regval); ++ } ++ } else ++ retval = 1; ++ ++ rtdm_lock_put_irqrestore(&priv->lock, context); ++ ++ return retval; ++} ++ ++static void mii_do_cmd(struct rtnet_device *dev, const phy_cmd_t *c) ++{ ++ int k; ++ ++ if (!c) ++ return; ++ ++ for (k = 0; (c+k)->mii_data != mk_mii_end; k++) ++ mii_queue(dev, (c+k)->mii_data, (c+k)->funct, 0); ++} ++ ++static void mii_parse_sr(uint mii_reg, struct rtnet_device *dev, uint data) ++{ ++ struct mpc5xxx_fec_priv *priv = (struct mpc5xxx_fec_priv *)dev->priv; ++ uint s = priv->phy_status; ++ ++ s &= ~(PHY_STAT_LINK | PHY_STAT_FAULT | PHY_STAT_ANC); ++ ++ if (mii_reg & 0x0004) ++ s |= PHY_STAT_LINK; ++ if (mii_reg & 0x0010) ++ s |= PHY_STAT_FAULT; ++ if (mii_reg & 0x0020) ++ s |= PHY_STAT_ANC; ++ ++ priv->phy_status = s; ++ priv->link = (s & PHY_STAT_LINK) ? 1 : 0; ++} ++ ++static void mii_parse_cr(uint mii_reg, struct rtnet_device *dev, uint data) ++{ ++ struct mpc5xxx_fec_priv *priv = (struct mpc5xxx_fec_priv *)dev->priv; ++ uint s = priv->phy_status; ++ ++ s &= ~(PHY_CONF_ANE | PHY_CONF_LOOP); ++ ++ if (mii_reg & 0x1000) ++ s |= PHY_CONF_ANE; ++ if (mii_reg & 0x4000) ++ s |= PHY_CONF_LOOP; ++ ++ priv->phy_status = s; ++} ++ ++static void mii_parse_anar(uint mii_reg, struct rtnet_device *dev, uint data) ++{ ++ struct mpc5xxx_fec_priv *priv = (struct mpc5xxx_fec_priv *)dev->priv; ++ uint s = priv->phy_status; ++ ++ s &= ~(PHY_CONF_SPMASK); ++ ++ if (mii_reg & 0x0020) ++ s |= PHY_CONF_10HDX; ++ if (mii_reg & 0x0040) ++ s |= PHY_CONF_10FDX; ++ if (mii_reg & 0x0080) ++ s |= PHY_CONF_100HDX; ++ if (mii_reg & 0x0100) ++ s |= PHY_CONF_100FDX; ++ ++ priv->phy_status = s; ++} ++ ++/* ------------------------------------------------------------------------- */ ++/* Generic PHY support. Should work for all PHYs, but does not support link ++ * change interrupts. ++ */ ++#ifdef CONFIG_XENO_DRIVERS_NET_FEC_GENERIC_PHY ++ ++static phy_info_t phy_info_generic = { ++ 0x00000000, /* 0-->match any PHY */ ++ "GENERIC", ++ ++ (const phy_cmd_t []) { /* config */ ++ /* advertise only half-duplex capabilities */ ++ { mk_mii_write(MII_ADVERTISE, MII_ADVERTISE_HALF), ++ mii_parse_anar }, ++ ++ /* enable auto-negotiation */ ++ { mk_mii_write(MII_BMCR, BMCR_ANENABLE), mii_parse_cr }, ++ { mk_mii_end, } ++ }, ++ (const phy_cmd_t []) { /* startup */ ++ /* restart auto-negotiation */ ++ { mk_mii_write(MII_BMCR, (BMCR_ANENABLE | BMCR_ANRESTART)), ++ NULL }, ++ { mk_mii_end, } ++ }, ++ (const phy_cmd_t []) { /* ack_int */ ++ /* We don't actually use the ack_int table with a generic ++ * PHY, but putting a reference to mii_parse_sr here keeps ++ * us from getting a compiler warning about unused static ++ * functions in the case where we only compile in generic ++ * PHY support. ++ */ ++ { mk_mii_read(MII_BMSR), mii_parse_sr }, ++ { mk_mii_end, } ++ }, ++ (const phy_cmd_t []) { /* shutdown */ ++ { mk_mii_end, } ++ }, ++}; ++#endif /* CONFIG_XENO_DRIVERS_NET_FEC_GENERIC_PHY */ ++ ++/* ------------------------------------------------------------------------- */ ++/* The Level one LXT971 is used on some of my custom boards */ ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_FEC_LXT971 ++ ++/* register definitions for the 971 */ ++ ++#define MII_LXT971_PCR 16 /* Port Control Register */ ++#define MII_LXT971_SR2 17 /* Status Register 2 */ ++#define MII_LXT971_IER 18 /* Interrupt Enable Register */ ++#define MII_LXT971_ISR 19 /* Interrupt Status Register */ ++#define MII_LXT971_LCR 20 /* LED Control Register */ ++#define MII_LXT971_TCR 30 /* Transmit Control Register */ ++ ++static void mii_parse_lxt971_sr2(uint mii_reg, struct rtnet_device *dev, uint data) ++{ ++ struct mpc5xxx_fec_priv *priv = (struct mpc5xxx_fec_priv *)dev->priv; ++ uint s = priv->phy_status; ++ ++ s &= ~(PHY_STAT_SPMASK); ++ ++ if (mii_reg & 0x4000) { ++ if (mii_reg & 0x0200) ++ s |= PHY_STAT_100FDX; ++ else ++ s |= PHY_STAT_100HDX; ++ } ++ else { ++ if (mii_reg & 0x0200) ++ s |= PHY_STAT_10FDX; ++ else ++ s |= PHY_STAT_10HDX; ++ } ++ if (mii_reg & 0x0008) ++ s |= PHY_STAT_FAULT; ++ ++ /* Record the new full_duplex value only if the link is up ++ * (so we don't bother restarting the driver on duplex ++ * changes when the link is down). ++ */ ++ if (priv->link) { ++ int prev_duplex = priv->full_duplex; ++ priv->full_duplex = ((mii_reg & 0x0200) != 0); ++ if (priv->full_duplex != prev_duplex) { ++ /* trigger a restart with changed duplex */ ++ priv->duplex_change = 1; ++#if MPC5xxx_FEC_DEBUG > 1 ++ printk("%s: duplex change: %s\n", ++ dev->name, priv->full_duplex ? "full" : "half"); ++#endif ++ } ++ } ++ priv->phy_status = s; ++} ++ ++static phy_info_t phy_info_lxt971 = { ++ 0x0001378e, ++ "LXT971", ++ ++ (const phy_cmd_t []) { /* config */ ++#ifdef MPC5100_FIX10HDX ++ { mk_mii_write(MII_REG_ANAR, 0x021), NULL }, /* 10 Mbps, HD */ ++#else ++/* { mk_mii_write(MII_REG_ANAR, 0x0A1), NULL }, *//* 10/100, HD */ ++ { mk_mii_write(MII_REG_ANAR, 0x01E1), NULL }, /* 10/100, FD */ ++#endif ++ { mk_mii_read(MII_REG_CR), mii_parse_cr }, ++ { mk_mii_read(MII_REG_ANAR), mii_parse_anar }, ++ { mk_mii_end, } ++ }, ++ (const phy_cmd_t []) { /* startup - enable interrupts */ ++ { mk_mii_write(MII_LXT971_IER, 0x00f2), NULL }, ++ { mk_mii_write(MII_REG_CR, 0x1200), NULL }, /* autonegotiate */ ++ ++ /* Somehow does the 971 tell me that the link is down ++ * the first read after power-up. ++ * read here to get a valid value in ack_int */ ++ ++ { mk_mii_read(MII_REG_SR), mii_parse_sr }, ++#if defined(CONFIG_UC101) ++ { mk_mii_write(MII_LXT971_LCR, 0x4122), NULL }, /* LED settings */ ++#endif ++ { mk_mii_end, } ++ }, ++ (const phy_cmd_t []) { /* ack_int */ ++ /* find out the current status */ ++ ++ { mk_mii_read(MII_REG_SR), mii_parse_sr }, ++ { mk_mii_read(MII_LXT971_SR2), mii_parse_lxt971_sr2 }, ++ ++ /* we only need to read ISR to acknowledge */ ++ ++ { mk_mii_read(MII_LXT971_ISR), NULL }, ++ { mk_mii_end, } ++ }, ++ (const phy_cmd_t []) { /* shutdown - disable interrupts */ ++ { mk_mii_write(MII_LXT971_IER, 0x0000), NULL }, ++ { mk_mii_end, } ++ }, ++}; ++ ++#endif /* CONFIG_XENO_DRIVERS_NET_FEC_LXT971 */ ++ ++/* ----------------------------------------------------------------- */ ++/* The National Semiconductor DP83847 is used on a INKA 4X0 board */ ++/* ----------------------------------------------------------------- */ ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_FEC_DP83847 ++ ++/* Register definitions */ ++#define MII_DP83847_PHYSTS 0x10 /* PHY Status Register */ ++ ++static void mii_parse_dp83847_physts(uint mii_reg, struct rtnet_device *dev, uint data) ++{ ++ struct mpc5xxx_fec_priv *priv = (struct mpc5xxx_fec_priv *)dev->priv; ++ uint s = priv->phy_status; ++ ++ s &= ~(PHY_STAT_SPMASK); ++ ++ if (mii_reg & 0x2) { ++ if (mii_reg & 0x4) ++ s |= PHY_STAT_10FDX; ++ else ++ s |= PHY_STAT_10HDX; ++ } ++ else { ++ if (mii_reg & 0x4) ++ s |= PHY_STAT_100FDX; ++ else ++ s |= PHY_STAT_100HDX; ++ } ++ if (mii_reg & 0x40) ++ s |= PHY_STAT_FAULT; ++ ++ priv->full_duplex = ((mii_reg & 0x4) != 0); ++ ++ priv->phy_status = s; ++} ++ ++static phy_info_t phy_info_dp83847 = { ++ 0x020005c3, ++ "DP83847", ++ ++ (const phy_cmd_t []) { /* config */ ++ { mk_mii_write(MII_REG_ANAR, 0x01E1), NULL }, /* Auto-Negociation Register Control set to */ ++ /* auto-negociate 10/100MBps, Half/Full duplex */ ++ { mk_mii_read(MII_REG_CR), mii_parse_cr }, ++ { mk_mii_read(MII_REG_ANAR), mii_parse_anar }, ++ { mk_mii_end, } ++ }, ++ (const phy_cmd_t []) { /* startup */ ++ { mk_mii_write(MII_REG_CR, 0x1200), NULL }, /* Enable and Restart Auto-Negotiation */ ++ { mk_mii_read(MII_REG_SR), mii_parse_sr }, ++ { mk_mii_read(MII_REG_CR), mii_parse_cr }, ++ { mk_mii_read(MII_DP83847_PHYSTS), mii_parse_dp83847_physts }, ++ { mk_mii_end, } ++ }, ++ (const phy_cmd_t []) { /* ack_int */ ++ { mk_mii_read(MII_REG_SR), mii_parse_sr }, ++ { mk_mii_read(MII_REG_CR), mii_parse_cr }, ++ { mk_mii_read(MII_DP83847_PHYSTS), mii_parse_dp83847_physts }, ++ { mk_mii_end, } ++ }, ++ (const phy_cmd_t []) { /* shutdown - disable interrupts */ ++ { mk_mii_end, } ++ } ++}; ++ ++#endif /* CONFIG_XENO_DRIVERS_NET_FEC_DP83847 */ ++ ++static phy_info_t *phy_info[] = { ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_FEC_LXT971 ++ &phy_info_lxt971, ++#endif /* CONFIG_XENO_DRIVERS_NET_FEC_LXT971 */ ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_FEC_DP83847 ++ &phy_info_dp83847, ++#endif /* CONFIG_XENO_DRIVERS_NET_FEC_DP83847 */ ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_FEC_GENERIC_PHY ++ /* Generic PHY support. This must be the last PHY in the table. ++ * It will be used to support any PHY that doesn't match a previous ++ * entry in the table. ++ */ ++ &phy_info_generic, ++#endif /* CONFIG_XENO_DRIVERS_NET_FEC_GENERIC_PHY */ ++ ++ NULL ++}; ++ ++static void mii_display_config(struct rtnet_device *dev) ++{ ++ struct mpc5xxx_fec_priv *priv = (struct mpc5xxx_fec_priv *)dev->priv; ++ uint s = priv->phy_status; ++ ++ printk("%s: config: auto-negotiation ", dev->name); ++ ++ if (s & PHY_CONF_ANE) ++ printk("on"); ++ else ++ printk("off"); ++ ++ if (s & PHY_CONF_100FDX) ++ printk(", 100FDX"); ++ if (s & PHY_CONF_100HDX) ++ printk(", 100HDX"); ++ if (s & PHY_CONF_10FDX) ++ printk(", 10FDX"); ++ if (s & PHY_CONF_10HDX) ++ printk(", 10HDX"); ++ if (!(s & PHY_CONF_SPMASK)) ++ printk(", No speed/duplex selected?"); ++ ++ if (s & PHY_CONF_LOOP) ++ printk(", loopback enabled"); ++ ++ printk(".\n"); ++ ++ priv->sequence_done = 1; ++} ++ ++static void mii_queue_config(uint mii_reg, struct rtnet_device *dev, uint data) ++{ ++ struct mpc5xxx_fec_priv *priv = (struct mpc5xxx_fec_priv *)dev->priv; ++ ++ priv->phy_task.routine = (void *)mii_display_config; ++ priv->phy_task.data = dev; ++ schedule_task(&priv->phy_task); ++} ++ ++ ++phy_cmd_t phy_cmd_config[] = { { mk_mii_read(MII_REG_CR), mii_queue_config }, ++ { mk_mii_end, } }; ++ ++ ++/* Read remainder of PHY ID. ++*/ ++static void ++mii_discover_phy3(uint mii_reg, struct rtnet_device *dev, uint data) ++{ ++ struct mpc5xxx_fec_priv *priv = (struct mpc5xxx_fec_priv *)dev->priv; ++ int i; ++ ++ priv->phy_id |= (mii_reg & 0xffff); ++ ++ for (i = 0; phy_info[i]; i++) { ++ if (phy_info[i]->id == (priv->phy_id >> 4) || !phy_info[i]->id) ++ break; ++ if (phy_info[i]->id == 0) /* check generic entry */ ++ break; ++ } ++ ++ if (!phy_info[i]) ++ panic("%s: PHY id 0x%08x is not supported!\n", ++ dev->name, priv->phy_id); ++ ++ priv->phy = phy_info[i]; ++ priv->phy_id_done = 1; ++ ++ printk("%s: Phy @ 0x%x, type %s (0x%08x)\n", ++ dev->name, priv->phy_addr, priv->phy->name, priv->phy_id); ++#if defined(CONFIG_UC101) ++ mii_do_cmd(dev, priv->phy->startup); ++#endif ++} ++ ++/* Scan all of the MII PHY addresses looking for someone to respond ++ * with a valid ID. This usually happens quickly. ++ */ ++static void ++mii_discover_phy(uint mii_reg, struct rtnet_device *dev, uint data) ++{ ++ struct mpc5xxx_fec_priv *priv = (struct mpc5xxx_fec_priv *)dev->priv; ++ uint phytype; ++ ++#if MPC5xxx_FEC_DEBUG > 4 ++ printk("mii_discover_phy\n"); ++#endif ++ ++ if ((phytype = (mii_reg & 0xffff)) != 0xffff) { ++ /* Got first part of ID, now get remainder. ++ */ ++ priv->phy_id = phytype << 16; ++ mii_queue(dev, mk_mii_read(MII_REG_PHYIR2), mii_discover_phy3, 0); ++ } else { ++ priv->phy_addr++; ++ if (priv->phy_addr < 32) ++ mii_queue(dev, mk_mii_read(MII_REG_PHYIR1), ++ mii_discover_phy, 0); ++ else ++ printk("fec: No PHY device found.\n"); ++ } ++} ++ ++static void ++mpc5xxx_fec_link_up(struct rtnet_device *dev) ++{ ++ struct mpc5xxx_fec_priv *priv = (struct mpc5xxx_fec_priv *)(dev->priv); ++ ++ printk("mpc5xxx_fec_link_up: link_up=%d\n", priv->link_up); ++#ifdef ORIGINAL_CODE ++ priv->link_up = 0; ++#endif /* ORIGINAL_CODE */ ++ mii_display_status(dev); ++ if (priv->duplex_change) { ++#if MPC5xxx_FEC_DEBUG > 1 ++ printk("%s: restarting with %s duplex...\n", ++ dev->name, priv->full_duplex ? "full" : "half"); ++#endif ++ mpc5xxx_fec_restart(dev, priv->full_duplex); ++ } ++} ++ ++/* ++ * Execute the ack_int command set and schedules next timer call back. ++ */ ++static void mdio_timer_callback(unsigned long data) ++{ ++ struct rtnet_device *dev = (struct rtnet_device *)data; ++ struct mpc5xxx_fec_priv *priv = (struct mpc5xxx_fec_priv *)(dev->priv); ++ mii_do_cmd(dev, priv->phy->ack_int); ++ ++ if (priv->link_up) { ++#ifdef ORIGINAL_CODE ++ priv->link_up_task.routine = (void *)mpc5xxx_fec_link_up; ++ priv->link_up_task.data = dev; ++ schedule_task(&priv->link_up_task); ++#else ++ mpc5xxx_fec_link_up(dev); ++ return; ++#endif /* ORIGINAL_CODE */ ++ } ++ /* Reschedule in 1 second */ ++ priv->phy_timer_list.expires = jiffies + (1000 * HZ / 1000); ++ add_timer(&priv->phy_timer_list); ++} ++ ++/* ++ * Displays the current status of the PHY. ++ */ ++static void mii_display_status(struct rtnet_device *dev) ++{ ++ struct mpc5xxx_fec_priv *priv = dev->priv; ++ uint s = priv->phy_status; ++ ++ printk("%s: status: ", dev->name); ++ ++ if (!priv->link) { ++ printk("link down"); ++ } else { ++ printk("link up"); ++ ++ switch(s & PHY_STAT_SPMASK) { ++ case PHY_STAT_100FDX: printk(", 100 Mbps Full Duplex"); break; ++ case PHY_STAT_100HDX: printk(", 100 Mbps Half Duplex"); break; ++ case PHY_STAT_10FDX: printk(", 10 Mbps Full Duplex"); break; ++ case PHY_STAT_10HDX: printk(", 10 Mbps Half Duplex"); break; ++ default: ++ printk(", Unknown speed/duplex"); ++ } ++ ++ if (s & PHY_STAT_ANC) ++ printk(", auto-negotiation complete"); ++ } ++ ++ if (s & PHY_STAT_FAULT) ++ printk(", remote fault"); ++ ++ printk(".\n"); ++} ++#endif /* CONFIG_XENO_DRIVERS_NET_USE_MDIO */ ++ ++ ++#define RFIFO_DATA 0xf0003184 ++#define TFIFO_DATA 0xf00031a4 ++ ++/* ++ * Initialize FEC receive task. ++ * Returns task number of FEC receive task. ++ * Returns -1 on failure ++ */ ++int ++mpc5xxx_fec_rx_task_setup(int num_bufs, int maxbufsize) ++{ ++ static TaskSetupParamSet_t params; ++ int tasknum; ++ ++ params.NumBD = num_bufs; ++ params.Size.MaxBuf = maxbufsize; ++ params.StartAddrSrc = RFIFO_DATA; ++ params.IncrSrc = 0; ++ params.SzSrc = 4; ++ params.IncrDst = 4; ++ params.SzDst = 4; ++ ++ tasknum = TaskSetup(TASK_FEC_RX, ¶ms); ++ ++ /* clear pending interrupt bits */ ++ TaskIntClear(tasknum); ++ ++ return tasknum; ++} ++ ++/* ++ * Initialize FEC transmit task. ++ * Returns task number of FEC transmit task. ++ * Returns -1 on failure ++ */ ++int ++mpc5xxx_fec_tx_task_setup(int num_bufs) ++{ ++ static TaskSetupParamSet_t params; ++ int tasknum; ++ ++ params.NumBD = num_bufs; ++ params.IncrSrc = 4; ++ params.SzSrc = 4; ++ params.StartAddrDst = TFIFO_DATA; ++ params.IncrDst = 0; ++ params.SzDst = 4; ++ ++ tasknum = TaskSetup(TASK_FEC_TX, ¶ms); ++ ++ /* clear pending interrupt bits */ ++ TaskIntClear(tasknum); ++ ++ return tasknum; ++} ++ ++ ++ ++#ifdef PARANOID_CHECKS ++static volatile int tx_fifo_cnt, tx_fifo_ipos, tx_fifo_opos; ++static volatile int rx_fifo_opos; ++#endif ++ ++static struct rtskb *tx_fifo_skb[MPC5xxx_FEC_TBD_NUM]; ++static struct rtskb *rx_fifo_skb[MPC5xxx_FEC_RBD_NUM]; ++static BDIdx mpc5xxx_bdi_tx = 0; ++ ++ ++static int ++mpc5xxx_fec_setup(struct rtnet_device *dev, int reinit) ++{ ++ struct mpc5xxx_fec_priv *priv = (struct mpc5xxx_fec_priv *)dev->priv; ++ struct mpc5xxx_xlb *xlb = (struct mpc5xxx_xlb *)MPC5xxx_XLB; ++ struct rtskb *skb; ++ int i; ++ struct mpc5xxx_rbuf *rbuf; ++ struct mpc5xxx_fec *fec = priv->fec; ++ u32 u32_value; ++ u16 u16_value; ++ ++#if MPC5xxx_FEC_DEBUG > 1 ++ printk("mpc5xxx_fec_setup\n"); ++#endif ++ ++ mpc5xxx_fec_set_paddr(dev, dev->dev_addr); ++ ++ /* ++ * Initialize receive queue ++ */ ++ priv->r_tasknum = mpc5xxx_fec_rx_task_setup(MPC5xxx_FEC_RBD_NUM, ++ MPC5xxx_FEC_RECV_BUFFER_SIZE_BC); ++ TaskBDReset(priv->r_tasknum); ++ for(i=0;idata; ++ } ++ bdi_a = TaskBDAssign(priv->r_tasknum, ++ (void*)virt_to_phys((void *)&rbuf->data), ++ 0, sizeof *rbuf, MPC5xxx_FEC_RBD_INIT); ++ if(bdi_a<0) ++ panic("mpc5xxx_fec_setup: error while TaskBDAssign, err=%i\n",(int)bdi_a); ++ } ++#ifdef PARANOID_CHECKS ++ rx_fifo_opos = 0; ++#endif ++ ++ /* ++ * Initialize transmit queue ++ */ ++ if(!reinit) { ++ priv->t_tasknum = mpc5xxx_fec_tx_task_setup(MPC5xxx_FEC_TBD_NUM); ++ TaskBDReset(priv->t_tasknum); ++ mpc5xxx_bdi_tx = 0; ++ for(i=0;isequence_done) { ++ if (!priv->phy) { ++ printk("mpc5xxx_fec_setup: PHY not configured\n"); ++ return -ENODEV; /* No PHY we understand */ ++ } ++ ++ mii_do_cmd(dev, priv->phy->config); ++ mii_do_cmd(dev, phy_cmd_config); /* display configuration */ ++ while(!priv->sequence_done) ++ schedule(); ++ ++ mii_do_cmd(dev, priv->phy->startup); ++ } ++ } ++#endif /* CONFIG_XENO_DRIVERS_NET_USE_MDIO */ ++ ++ dev->irq = MPC5xxx_FEC_IRQ; ++ priv->r_irq = MPC5xxx_SDMA_IRQ_BASE + priv->r_tasknum; ++ priv->t_irq = MPC5xxx_SDMA_IRQ_BASE + priv->t_tasknum; ++ ++ if ((i = rtdm_irq_request(&priv->irq_handle, dev->irq, ++ mpc5xxx_fec_interrupt, 0, ++ "rteth_err", dev))) { ++ printk(KERN_ERR "FEC interrupt allocation failed\n"); ++ return i; ++ } ++ ++ if ((i = rtdm_irq_request(&priv->r_irq_handle, priv->r_irq, ++ mpc5xxx_fec_receive_interrupt, 0, ++ "rteth_recv", dev))) { ++ printk(KERN_ERR "FEC receive task interrupt allocation failed\n"); ++ return i; ++ } ++ ++ if ((i = rtdm_irq_request(&priv->t_irq_handle, priv->t_irq, ++ mpc5xxx_fec_transmit_interrupt, 0, ++ "rteth_xmit", dev))) { ++ printk(KERN_ERR "FEC transmit task interrupt allocation failed\n"); ++ return i; ++ } ++ ++ rt_stack_connect(dev, &STACK_manager); ++ ++ u32_value = in_be32(&priv->gpio->port_config); ++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO ++ u32_value |= 0x00050000; /* 100MBit with MD */ ++#else ++ u32_value |= 0x00020000; /* 10MBit with 7-wire */ ++#endif ++ out_be32(&priv->gpio->port_config, u32_value); ++ ++ } ++ ++ out_be32(&fec->op_pause, 0x00010020); /* change to 0xffff0020 ??? */ ++ out_be32(&fec->rfifo_cntrl, 0x0f240000); ++ out_be32(&fec->rfifo_alarm, 0x0000030c); ++ out_be32(&fec->tfifo_cntrl, 0x0f240000); ++ out_be32(&fec->tfifo_alarm, 0x00000100); ++ out_be32(&fec->x_wmrk, 0x3); /* xmit fifo watermark = 256 */ ++ out_be32(&fec->xmit_fsm, 0x03000000); /* enable crc generation */ ++ out_be32(&fec->iaddr1, 0x00000000); /* No individual filter */ ++ out_be32(&fec->iaddr2, 0x00000000); /* No individual filter */ ++ ++#ifdef CONFIG_MPC5200 ++ /* Disable COMM Bus Prefetch */ ++ u16_value = in_be16(&priv->sdma->PtdCntrl); ++ u16_value |= 1; ++ out_be16(&priv->sdma->PtdCntrl, u16_value); ++ ++ /* Disable (or enable?) BestComm XLB address snooping */ ++ out_be32(&xlb->config, in_be32(&xlb->config) | MPC5200B_XLB_CONF_BSDIS); ++#endif ++ ++ if(!reinit) { ++#if !defined(CONFIG_XENO_DRIVERS_NET_USE_MDIO) ++ mpc5xxx_fec_restart (dev, 0); /* always use half duplex mode only */ ++#else ++#ifdef CONFIG_UBOOT ++ extern unsigned char __res[]; ++ bd_t *bd = (bd_t *)__res; ++#define MPC5xxx_IPBFREQ bd->bi_ipbfreq ++#else ++#define MPC5xxx_IPBFREQ CONFIG_PPC_5xxx_IPBFREQ ++#endif ++ ++ for (i=0; iphy_speed = (((MPC5xxx_IPBFREQ >> 20) / 5) << 1); ++ ++ /*mpc5xxx_fec_restart (dev, 0);*/ /* half duplex, negotiate speed */ ++ mpc5xxx_fec_restart (dev, 1); /* full duplex, negotiate speed */ ++ ++ /* Queue up command to detect the PHY and initialize the ++ * remainder of the interface. ++ */ ++ priv->phy_id_done = 0; ++ priv->phy_addr = 0; ++ mii_queue(dev, mk_mii_read(MII_REG_PHYIR1), mii_discover_phy, 0); ++ ++ priv->old_status = 0; ++ ++ /* ++ * Read MIB counters in order to reset them, ++ * then zero all the stats fields in memory ++ */ ++ mpc5xxx_fec_update_stat(dev); ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO ++ if (reinit) { ++ if (!priv->sequence_done) { ++ if (!priv->phy) { ++ printk("mpc5xxx_fec_open: PHY not configured\n"); ++ return -ENODEV; /* No PHY we understand */ ++ } ++ ++ mii_do_cmd(dev, priv->phy->config); ++ mii_do_cmd(dev, phy_cmd_config); /* display configuration */ ++ while(!priv->sequence_done) ++ schedule(); ++ ++ mii_do_cmd(dev, priv->phy->startup); ++ ++ /* ++ * Currently, MII link interrupts are not supported, ++ * so start the 100 msec timer to monitor the link up event. ++ */ ++ init_timer(&priv->phy_timer_list); ++ ++ priv->phy_timer_list.expires = jiffies + (100 * HZ / 1000); ++ priv->phy_timer_list.data = (unsigned long)dev; ++ priv->phy_timer_list.function = mdio_timer_callback; ++ add_timer(&priv->phy_timer_list); ++ ++ printk("%s: Waiting for the link to be up...\n", dev->name); ++ while (priv->link == 0) { ++ schedule(); ++ } ++ mii_display_status(dev); ++ if (priv->full_duplex == 0) { /* FD is not negotiated, restart the fec in HD */ ++ mpc5xxx_fec_restart(dev, 0); ++ } ++ } ++ } ++#endif /* CONFIG_XENO_DRIVERS_NET_USE_MDIO */ ++#endif ++ } ++ else { ++ mpc5xxx_fec_restart (dev, 0); ++ } ++ ++ rtnetif_start_queue(dev); ++ ++ TaskStart(priv->r_tasknum, TASK_AUTOSTART_ENABLE, ++ priv->r_tasknum, TASK_INTERRUPT_ENABLE); ++ ++ if(reinit) { ++ TaskStart(priv->t_tasknum, TASK_AUTOSTART_ENABLE, ++ priv->t_tasknum, TASK_INTERRUPT_ENABLE); ++ } ++ ++ return 0; ++ ++eagain: ++ printk("mpc5xxx_fec_setup: failed\n"); ++ for (i=0; ir_tasknum); ++ ++ return -EAGAIN; ++} ++ ++static int ++mpc5xxx_fec_open(struct rtnet_device *dev) ++{ ++ return mpc5xxx_fec_setup(dev,0); ++} ++ ++/* This will only be invoked if your driver is _not_ in XOFF state. ++ * What this means is that you need not check it, and that this ++ * invariant will hold if you make sure that the netif_*_queue() ++ * calls are done at the proper times. ++ */ ++static int ++mpc5xxx_fec_hard_start_xmit(struct rtskb *skb, struct rtnet_device *dev) ++{ ++ struct mpc5xxx_fec_priv *priv = (struct mpc5xxx_fec_priv *)dev->priv; ++ rtdm_lockctx_t context; ++ int pad; ++ short length; ++ BDIdx bdi_a; ++ ++#if MPC5xxx_FEC_DEBUG > 4 ++ printk("mpc5xxx_fec_hard_start_xmit:\n"); ++ printk("dev %08x, priv %08x, skb %08x\n", ++ (u32)dev, (u32)priv, (u32)skb); ++#endif ++#if MPC5xxx_FEC_DEBUG > 0 ++ if (fec_start_status(&priv->t_queue) & MPC5xxx_FEC_TBD_TFD) ++ panic("MPC5xxx transmit queue overrun\n"); ++#endif ++ ++ length = skb->len; ++#ifdef MUST_ALIGN_TRANSMIT_DATA ++ pad = (int)skb->data & 3; ++ if (pad) { ++ void *old_data = skb->data; ++ rtskb_push(skb, pad); ++ memcpy(skb->data, old_data, length); ++ rtskb_trim(skb, length); ++ } ++#endif ++ /* Zero out up to the minimum length ethernet packet size, ++ * so we don't inadvertently expose sensitive data ++ */ ++ pad = ETH_ZLEN - skb->len; ++ if (pad > 0) { ++ skb = rtskb_padto(skb, ETH_ZLEN); ++ if (skb == 0) { ++ printk("rtskb_padto failed\n"); ++ return 0; ++ } ++ length += pad; ++ } ++ ++ flush_dcache_range((u32)skb->data, (u32)skb->data + length); ++ ++ rtdm_lock_get_irqsave(&priv->lock, context); ++ ++ bdi_a = TaskBDAssign(priv->t_tasknum,(void*)virt_to_phys((void *)skb->data), ++ NULL,length,MPC5xxx_FEC_TBD_INIT); ++ ++#ifdef PARANOID_CHECKS ++ /* check for other errors during assignment*/ ++ if((bdi_a<0)||(bdi_a>=MPC5xxx_FEC_TBD_NUM)) ++ panic("mpc5xxx_fec_hard_start_xmit: error while TaskBDAssign, err=%i\n",(int)bdi_a); ++ ++ /* sanity check: bdi must always equal tx_fifo_ipos*/ ++ if(bdi_a!=tx_fifo_ipos) ++ panic("bdi_a!=tx_fifo_ipos: %i, %i\n",(int)bdi_a,tx_fifo_ipos); ++ ++ tx_fifo_cnt++; ++ tx_fifo_ipos++; ++ if(tx_fifo_ipos==MPC5xxx_FEC_TBD_NUM) tx_fifo_ipos=0; ++ ++ /* check number of BDs in use*/ ++ if(TaskBDInUse(priv->t_tasknum)!=tx_fifo_cnt) ++ panic("TaskBDInUse != tx_fifo_cnt: %i %i\n",TaskBDInUse(priv->t_tasknum),tx_fifo_cnt); ++#endif ++ ++ tx_fifo_skb[bdi_a]=skb; ++ ++#ifdef ORIGINAL_CODE ++ dev->trans_start = jiffies; ++#endif /* ORIGINAL_CODE */ ++ ++ /* Get and patch time stamp just before the transmission */ ++ if (skb->xmit_stamp) ++ *skb->xmit_stamp = cpu_to_be64(rtdm_clock_read() + *skb->xmit_stamp); ++ ++ TaskStart(priv->t_tasknum, TASK_AUTOSTART_ENABLE, priv->t_tasknum, TASK_INTERRUPT_ENABLE); ++ ++ if(TaskBDInUse(priv->t_tasknum)==MPC5xxx_FEC_TBD_NUM) { ++ priv->tx_full = 1; ++ rtnetif_stop_queue(dev); ++ } ++ rtdm_lock_put_irqrestore(&priv->lock, context); ++ ++ return 0; ++} ++ ++/* This handles SDMA transmit task interrupts ++ */ ++static int ++mpc5xxx_fec_transmit_interrupt(rtdm_irq_t *irq_handle) ++{ ++ struct rtnet_device *dev = rtdm_irq_get_arg(irq_handle, struct rtnet_device); ++ struct mpc5xxx_fec_priv *priv = (struct mpc5xxx_fec_priv *)dev->priv; ++ BDIdx bdi_r; ++ ++ rtdm_lock_get(&priv->lock); ++ ++ while(TaskBDInUse(priv->t_tasknum)) { ++ ++ /* relase BD*/ ++ bdi_r = TaskBDRelease(priv->t_tasknum); ++ ++ /* we are done if we can't release any more BDs*/ ++ if(bdi_r==TASK_ERR_BD_BUSY) break; ++ /* if(bdi_r<0) break;*/ ++ ++#ifdef PARANOID_CHECKS ++ /* check for other errors during release*/ ++ if((bdi_r<0)||(bdi_r>=MPC5xxx_FEC_TBD_NUM)) ++ panic("mpc5xxx_fec_transmit_interrupt: error while TaskBDRelease, err=%i\n",(int)bdi_r); ++ ++ tx_fifo_cnt--; ++ tx_fifo_opos++; ++ if(tx_fifo_opos==MPC5xxx_FEC_TBD_NUM) tx_fifo_opos=0; ++ ++ /* sanity check: bdi_r must always equal tx_fifo_opos*/ ++ if(bdi_r!=tx_fifo_opos) { ++ panic("bdi_r!=tx_fifo_opos: %i, %i\n",(int)bdi_r,tx_fifo_opos); ++ } ++ ++ /* check number of BDs in use*/ ++ if(TaskBDInUse(priv->t_tasknum)!=tx_fifo_cnt) ++ panic("TaskBDInUse != tx_fifo_cnt: %i %i\n",TaskBDInUse(priv->t_tasknum),tx_fifo_cnt); ++#endif ++ ++ if((tx_fifo_skb[mpc5xxx_bdi_tx])==0) ++ panic("skb confusion in tx\n"); ++ ++ dev_kfree_rtskb(tx_fifo_skb[mpc5xxx_bdi_tx]); ++ tx_fifo_skb[mpc5xxx_bdi_tx]=0; ++ ++ mpc5xxx_bdi_tx = bdi_r; ++ ++ if(TaskBDInUse(priv->t_tasknum)tx_full = 0; ++ ++ } ++ ++ if (rtnetif_queue_stopped(dev) && !priv->tx_full) ++ rtnetif_wake_queue(dev); ++ ++ rtdm_lock_put(&priv->lock); ++ ++ return RTDM_IRQ_HANDLED; ++} ++ ++static BDIdx mpc5xxx_bdi_rx = 0; ++ ++static int ++mpc5xxx_fec_receive_interrupt(rtdm_irq_t *irq_handle) ++{ ++ struct rtnet_device *dev = rtdm_irq_get_arg(irq_handle, struct rtnet_device); ++ struct mpc5xxx_fec_priv *priv = (struct mpc5xxx_fec_priv *)dev->priv; ++ struct rtskb *skb; ++ struct rtskb *nskb; ++ struct mpc5xxx_rbuf *rbuf; ++ struct mpc5xxx_rbuf *nrbuf; ++ u32 status; ++ int length; ++ BDIdx bdi_a, bdi_r; ++ int discard = 0; ++ int dropped = 0; ++ int packets = 0; ++ nanosecs_abs_t time_stamp = rtdm_clock_read(); ++ ++ while(1) { ++ ++ /* release BD*/ ++ bdi_r = TaskBDRelease(priv->r_tasknum); ++ ++ /* we are done if we can't release any more BDs*/ ++ if(bdi_r==TASK_ERR_BD_BUSY) break; ++ ++#ifdef PARANOID_CHECKS ++ /* check for other errors during release*/ ++ if((bdi_r<0)||(bdi_r>=MPC5xxx_FEC_RBD_NUM)) ++ panic("mpc5xxx_fec_receive_interrupt: error while TaskBDRelease, err=%i\n",(int)bdi_r); ++ ++ rx_fifo_opos++; ++ if(rx_fifo_opos==MPC5xxx_FEC_RBD_NUM) rx_fifo_opos=0; ++ ++ if(bdi_r != rx_fifo_opos) ++ panic("bdi_r != rx_fifo_opos: %i, %i\n",bdi_r, rx_fifo_opos); ++#endif ++ ++ /* get BD status in order to determine length*/ ++ status = TaskGetBD(priv->r_tasknum,mpc5xxx_bdi_rx)->Status; ++ ++ /* determine packet length and pointer to socket buffer / actual data*/ ++ skb = rx_fifo_skb[mpc5xxx_bdi_rx]; ++ length = (status & 0xffff) - 4; ++ rbuf = (struct mpc5xxx_rbuf *)skb->data; ++ ++#ifndef EXIT_ISR_AT_MEMORY_SQUEEZE ++ /* in case of a memory squeeze, we just drop all packets, because*/ ++ /* subsequent allocations will also fail.*/ ++ if(discard!=3) { ++#endif ++ ++ /* check for frame errors*/ ++ if(status&0x00370000) { ++ /* frame error, drop */ ++#ifdef DISPLAY_WARNINGS ++ if(status&MPC5xxx_FEC_FRAME_LG) ++ printk("%s: Frame length error, dropping packet (status=0x%08x)\n",dev->name,status); ++ if(status&MPC5xxx_FEC_FRAME_NO) ++ printk("%s: Non-octet aligned frame error, dropping packet (status=0x%08x)\n",dev->name,status); ++ if(status&MPC5xxx_FEC_FRAME_CR) ++ printk("%s: Frame CRC error, dropping packet (status=0x%08x)\n",dev->name,status); ++ if(status&MPC5xxx_FEC_FRAME_OV) ++ printk("%s: FIFO overrun error, dropping packet (status=0x%08x)\n",dev->name,status); ++ if(status&MPC5xxx_FEC_FRAME_TR) ++ printk("%s: Frame truncated error, dropping packet (status=0x%08x)\n",dev->name,status); ++#endif ++ discard=1; ++ } ++ else if (length>(MPC5xxx_FEC_RECV_BUFFER_SIZE-4)) { ++ /* packet too big, drop */ ++#ifdef DISPLAY_WARNINGS ++ printk("%s: Frame too big, dropping packet (length=%i)\n",dev->name,length); ++#endif ++ discard=2; ++ } ++ else { ++ /* allocate replacement skb */ ++ nskb = dev_alloc_rtskb(sizeof *nrbuf, dev); ++ if (nskb == NULL) { ++ /* memory squeeze, drop */ ++ discard=3; ++ dropped++; ++ } ++ else { ++ discard=0; ++ } ++ } ++ ++#ifndef EXIT_ISR_AT_MEMORY_SQUEEZE ++ } ++ else { ++ dropped++; ++ } ++#endif ++ ++ if (discard) { ++ priv->stats.rx_dropped++; ++ nrbuf = (struct mpc5xxx_rbuf *)skb->data; ++ } ++ else { ++#ifdef MUST_UNALIGN_RECEIVE_DATA ++ rtskb_reserve(nskb,2); ++#endif ++ nrbuf = (struct mpc5xxx_rbuf *)rtskb_put(nskb, sizeof *nrbuf); ++ ++ /* only invalidate the number of bytes in dcache actually received*/ ++#ifdef MUST_UNALIGN_RECEIVE_DATA ++ invalidate_dcache_range((u32)rbuf - 2, (u32)rbuf + length); ++#else ++ invalidate_dcache_range((u32)rbuf, (u32)rbuf + length); ++#endif ++ rtskb_trim(skb, length); ++ skb->protocol = rt_eth_type_trans(skb, dev); ++ skb->time_stamp = time_stamp; ++ rtnetif_rx(skb); ++ packets++; ++#ifdef ORIGINAL_CODE ++ dev->last_rx = jiffies; ++#endif /* ORIGINAL_CODE */ ++ rx_fifo_skb[mpc5xxx_bdi_rx] = nskb; ++ } ++ ++ /* Assign new socket buffer to BD*/ ++ bdi_a = TaskBDAssign(priv->r_tasknum, (void*)virt_to_phys((void *)&nrbuf->data), ++ 0, sizeof *nrbuf, MPC5xxx_FEC_RBD_INIT); ++ ++#ifdef PARANOID_CHECKS ++ /* check for errors during assignment*/ ++ if((bdi_a<0)||(bdi_r>=MPC5xxx_FEC_RBD_NUM)) ++ panic("mpc5xxx_fec_receive_interrupt: error while TaskBDAssign, err=%i\n",(int)bdi_a); ++ ++ /* check if Assign/Release sequence numbers are ok*/ ++ if(((bdi_a+1)%MPC5xxx_FEC_RBD_NUM) != bdi_r) ++ panic("bdi_a+1 != bdi_r: %i %i\n",(int)((bdi_a+1)%MPC5xxx_FEC_RBD_NUM),(int)bdi_r); ++#endif ++ ++ mpc5xxx_bdi_rx = bdi_r; ++ ++#ifdef EXIT_ISR_AT_MEMORY_SQUEEZE ++ /* if we couldn't get memory for a new socket buffer, then it doesn't*/ ++ /* make sense to proceed.*/ ++ if (discard==3) ++ break; ++#endif ++ ++ } ++ ++#ifdef DISPLAY_WARNINGS ++ if(dropped) { ++ printk("%s: Memory squeeze, dropped %i packets\n",dev->name,dropped); ++ } ++#endif ++ TaskStart(priv->r_tasknum, TASK_AUTOSTART_ENABLE, priv->r_tasknum, TASK_INTERRUPT_ENABLE); ++ ++ if (packets > 0) ++ rt_mark_stack_mgr(dev); ++ return RTDM_IRQ_HANDLED; ++} ++ ++ ++static void ++mpc5xxx_fec_reinit(struct rtnet_device *dev) ++{ ++ int retval; ++ printk("mpc5xxx_fec_reinit\n"); ++ mpc5xxx_fec_cleanup(dev,1); ++ retval=mpc5xxx_fec_setup(dev,1); ++ if(retval) panic("reinit failed\n"); ++} ++ ++ ++static int ++mpc5xxx_fec_interrupt(rtdm_irq_t *irq_handle) ++{ ++ struct rtnet_device *dev = rtdm_irq_get_arg(irq_handle, struct rtnet_device); ++ struct mpc5xxx_fec_priv *priv = (struct mpc5xxx_fec_priv *)dev->priv; ++ struct mpc5xxx_fec *fec = priv->fec; ++ int ievent; ++ ++#if MPC5xxx_FEC_DEBUG > 4 ++ printk("mpc5xxx_fec_interrupt:\n"); ++#endif ++ ++ ievent = in_be32(&fec->ievent); ++ out_be32(&fec->ievent, ievent); /* clear pending events */ ++ ++ if (ievent & (MPC5xxx_FEC_IEVENT_RFIFO_ERROR | ++ MPC5xxx_FEC_IEVENT_XFIFO_ERROR)) { ++ if (ievent & MPC5xxx_FEC_IEVENT_RFIFO_ERROR) ++ printk(KERN_WARNING "MPC5xxx_FEC_IEVENT_RFIFO_ERROR\n"); ++ if (ievent & MPC5xxx_FEC_IEVENT_XFIFO_ERROR) ++ printk(KERN_WARNING "MPC5xxx_FEC_IEVENT_XFIFO_ERROR\n"); ++ mpc5xxx_fec_reinit(dev); ++ } ++ else if (ievent & MPC5xxx_FEC_IEVENT_MII) { ++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO ++ mpc5xxx_fec_mii(dev); ++#else ++ printk("%s[%d] %s: unexpected MPC5xxx_FEC_IEVENT_MII\n", ++ __FILE__, __LINE__, __FUNCTION__); ++#endif /* CONFIG_XENO_DRIVERS_NET_USE_MDIO */ ++ } ++ ++ return RTDM_IRQ_HANDLED; ++} ++ ++static int ++mpc5xxx_fec_cleanup(struct rtnet_device *dev, int reinit) ++{ ++ struct mpc5xxx_fec_priv *priv = (struct mpc5xxx_fec_priv *)dev->priv; ++ struct mpc5xxx_fec *fec = priv->fec; ++ unsigned long timeout; ++ int i; ++ ++ priv->open_time = 0; ++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO ++ priv->sequence_done = 0; ++#endif ++ ++ rtnetif_stop_queue(dev); ++ ++ /* Wait for rx queue to drain */ ++ if(!reinit) { ++ timeout = jiffies + 2*HZ; ++ while (TaskBDInUse(priv->t_tasknum) && (jiffies < timeout)) { ++ set_current_state(TASK_INTERRUPTIBLE); ++ schedule_timeout(HZ/10); ++ } ++ } ++ ++ /* Disable FEC interrupts */ ++ out_be32(&fec->imask, 0x0); ++ ++ /* Stop FEC */ ++ out_be32(&fec->ecntrl, in_be32(&fec->ecntrl) & ~0x2); ++ ++ /* Disable the rx and tx queues. */ ++ TaskStop(priv->r_tasknum); ++ TaskStop(priv->t_tasknum); ++ ++ /* Release irqs */ ++ if(!reinit) { ++ rtdm_irq_disable(&priv->irq_handle); ++ rtdm_irq_disable(&priv->r_irq_handle); ++ rtdm_irq_disable(&priv->t_irq_handle); ++ rtdm_irq_free(&priv->irq_handle); ++ rtdm_irq_free(&priv->r_irq_handle); ++ rtdm_irq_free(&priv->t_irq_handle); ++ rt_stack_disconnect(dev); ++ } ++ ++ /* Free rx Buffers */ ++ if(!reinit) { ++ for (i=0; ipriv; ++ struct net_device_stats *stats = &priv->stats; ++ struct mpc5xxx_fec *fec = priv->fec; ++ ++ stats->rx_bytes = in_be32(&fec->rmon_r_octets); ++ stats->rx_packets = in_be32(&fec->rmon_r_packets); ++ stats->rx_errors = stats->rx_packets - ( ++ in_be32(&fec->ieee_r_frame_ok) + ++ in_be32(&fec->rmon_r_mc_pkt)); ++ stats->tx_bytes = in_be32(&fec->rmon_t_octets); ++ stats->tx_packets = in_be32(&fec->rmon_t_packets); ++ stats->tx_errors = stats->tx_packets - ( ++ in_be32(&fec->ieee_t_frame_ok) + ++ in_be32(&fec->rmon_t_col) + ++ in_be32(&fec->ieee_t_1col) + ++ in_be32(&fec->ieee_t_mcol) + ++ in_be32(&fec->ieee_t_def)); ++ stats->multicast = in_be32(&fec->rmon_r_mc_pkt); ++ stats->collisions = in_be32(&fec->rmon_t_col); ++ ++ /* detailed rx_errors: */ ++ stats->rx_length_errors = in_be32(&fec->rmon_r_undersize) ++ + in_be32(&fec->rmon_r_oversize) ++ + in_be32(&fec->rmon_r_frag) ++ + in_be32(&fec->rmon_r_jab); ++ stats->rx_over_errors = in_be32(&fec->r_macerr); ++ stats->rx_crc_errors = in_be32(&fec->ieee_r_crc); ++ stats->rx_frame_errors = in_be32(&fec->ieee_r_align); ++ stats->rx_fifo_errors = in_be32(&fec->rmon_r_drop); ++ stats->rx_missed_errors = in_be32(&fec->rmon_r_drop); ++ ++ /* detailed tx_errors: */ ++ stats->tx_aborted_errors = 0; ++ stats->tx_carrier_errors = in_be32(&fec->ieee_t_cserr); ++ stats->tx_fifo_errors = in_be32(&fec->rmon_t_drop) + ++ in_be32(&fec->ieee_t_macerr); ++ stats->tx_heartbeat_errors = in_be32(&fec->ieee_t_sqe); ++ stats->tx_window_errors = in_be32(&fec->ieee_t_lcol); ++ ++ return stats; ++} ++ ++static void ++mpc5xxx_fec_update_stat(struct rtnet_device *dev) ++{ ++ struct mpc5xxx_fec_priv *priv = (struct mpc5xxx_fec_priv *)dev->priv; ++ struct net_device_stats *stats = &priv->stats; ++ struct mpc5xxx_fec *fec = priv->fec; ++ ++ out_be32(&fec->mib_control, MPC5xxx_FEC_MIB_DISABLE); ++ memset_io(&fec->rmon_t_drop, 0, ++ (u32)&fec->reserved10 - (u32)&fec->rmon_t_drop); ++ out_be32(&fec->mib_control, 0); ++ memset(stats, 0, sizeof *stats); ++ mpc5xxx_fec_get_stats(dev); ++} ++ ++#ifdef ORIGINAL_CODE ++/* ++ * Set or clear the multicast filter for this adaptor. ++ */ ++static void ++mpc5xxx_fec_set_multicast_list(struct rtnet_device *dev) ++{ ++ struct mpc5xxx_fec_priv *priv = (struct mpc5xxx_fec_priv *)dev->priv; ++ struct mpc5xxx_fec *fec = priv->fec; ++ u32 u32_value; ++ ++ if (dev->flags & IFF_PROMISC) { ++ printk("%s: Promiscuous mode enabled.\n", dev->name); ++ u32_value = in_be32(&fec->r_cntrl); ++ u32_value |= MPC5xxx_FEC_RCNTRL_PROM; ++ out_be32(&fec->r_cntrl, u32_value); ++ } ++ else if (dev->flags & IFF_ALLMULTI) { ++ u32_value = in_be32(&fec->r_cntrl); ++ u32_value &= ~MPC5xxx_FEC_RCNTRL_PROM; ++ out_be32(&fec->r_cntrl, u32_value); ++ out_be32(&fec->gaddr1, 0xffffffff); ++ out_be32(&fec->gaddr2, 0xffffffff); ++ } ++ else { ++ u32 crc; ++ int i; ++ struct dev_mc_list *dmi; ++ u32 gaddr1 = 0x00000000; ++ u32 gaddr2 = 0x00000000; ++ ++ dmi = dev->mc_list; ++ for (i=0; imc_count; i++) { ++ crc = ether_crc_le(6, dmi->dmi_addr) >> 26; ++ if (crc >= 32) ++ gaddr1 |= 1 << (crc-32); ++ else ++ gaddr2 |= 1 << crc; ++ dmi = dmi->next; ++ } ++ out_be32(&fec->gaddr1, gaddr1); ++ out_be32(&fec->gaddr2, gaddr2); ++ } ++} ++#endif /* ORIGINAL_CODE */ ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO_NOT_YET ++static void mpc5xxx_mdio_callback(uint regval, struct rtnet_device *dev, uint data) ++{ ++ mdio_read_data_t* mrd = (mdio_read_data_t *)data; ++ mrd->regval = 0xFFFF & regval; ++ wake_up_process(mrd->sleeping_task); ++} ++ ++static int mpc5xxx_mdio_read(struct rtnet_device *dev, int phy_id, int location) ++{ ++ uint retval; ++ mdio_read_data_t* mrd = (mdio_read_data_t *)kmalloc(sizeof(*mrd), ++ GFP_KERNEL); ++ ++ mrd->sleeping_task = current; ++ set_current_state(TASK_INTERRUPTIBLE); ++ mii_queue(dev, mk_mii_read(location), ++ mpc5xxx_mdio_callback, (unsigned int) mrd); ++ schedule(); ++ ++ retval = mrd->regval; ++ ++ kfree(mrd); ++ ++ return retval; ++} ++#endif /* CONFIG_XENO_DRIVERS_NET_USE_MDIO_NOT_YET */ ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO_NOT_YET_XXX ++static void mpc5xxx_mdio_write(struct rtnet_device *dev, int phy_id, int location, int value) ++{ ++ mii_queue(dev, mk_mii_write(location, value), NULL, 0); ++} ++#endif /* CONFIG_XENO_DRIVERS_NET_USE_MDIO_NOT_YET */ ++#endif /* CONFIG_XENO_DRIVERS_NET_USE_MDIO */ ++ ++#ifdef ORIGINAL_CODE ++static int ++mpc5xxx_netdev_ethtool_ioctl(struct rtnet_device *dev, void *useraddr) ++{ ++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO_NOT_YET_XXX ++ struct mpc5xxx_fec_priv *private = (struct mpc5xxx_fec_priv *)dev->priv; ++#endif ++ u32 ethcmd; ++ ++ if (copy_from_user(ðcmd, useraddr, sizeof ethcmd)) ++ return -EFAULT; ++ ++ switch (ethcmd) { ++ ++ /* Get driver info */ ++ case ETHTOOL_GDRVINFO:{ ++ struct ethtool_drvinfo info = { ETHTOOL_GDRVINFO }; ++ strncpy(info.driver, "gt64260", ++ sizeof info.driver - 1); ++ strncpy(info.version, version, ++ sizeof info.version - 1); ++ if (copy_to_user(useraddr, &info, sizeof info)) ++ return -EFAULT; ++ return 0; ++ } ++ /* get settings */ ++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO_NOT_YET_XXX ++ case ETHTOOL_GSET:{ ++ struct ethtool_cmd ecmd = { ETHTOOL_GSET }; ++ spin_lock_irq(&private->lock); ++ mii_ethtool_gset(&private->mii_if, &ecmd); ++ spin_unlock_irq(&private->lock); ++ if (copy_to_user(useraddr, &ecmd, sizeof ecmd)) ++ return -EFAULT; ++ return 0; ++ } ++ /* set settings */ ++ case ETHTOOL_SSET:{ ++ int r; ++ struct ethtool_cmd ecmd; ++ if (copy_from_user(&ecmd, useraddr, sizeof ecmd)) ++ return -EFAULT; ++ spin_lock_irq(&private->lock); ++ r = mii_ethtool_sset(&private->mii_if, &ecmd); ++ spin_unlock_irq(&private->lock); ++ return r; ++ } ++ /* restart autonegotiation */ ++ case ETHTOOL_NWAY_RST:{ ++ return mii_nway_restart(&private->mii_if); ++ } ++ /* get link status */ ++ case ETHTOOL_GLINK:{ ++ struct ethtool_value edata = { ETHTOOL_GLINK }; ++ edata.data = mii_link_ok(&private->mii_if); ++ if (copy_to_user(useraddr, &edata, sizeof edata)) ++ return -EFAULT; ++ return 0; ++ } ++#endif ++ /* get message-level */ ++ case ETHTOOL_GMSGLVL:{ ++ struct ethtool_value edata = { ETHTOOL_GMSGLVL }; ++ edata.data = 0; /* XXX */ ++ if (copy_to_user(useraddr, &edata, sizeof edata)) ++ return -EFAULT; ++ return 0; ++ } ++ /* set message-level */ ++ case ETHTOOL_SMSGLVL:{ ++ struct ethtool_value edata; ++ if (copy_from_user(&edata, useraddr, sizeof edata)) ++ return -EFAULT; ++/* debug = edata.data; *//* XXX */ ++ return 0; ++ } ++ } ++ return -EOPNOTSUPP; ++} ++ ++static int ++mpc5xxx_fec_ioctl(struct rtnet_device *dev, struct ifreq *rq, int cmd) ++{ ++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO_NOT_YET_XXX ++ struct mii_ioctl_data *data = (struct mii_ioctl_data *) &rq->ifr_data; ++ int phy = dev->base_addr & 0x1f; ++#endif ++ int retval; ++ ++ switch (cmd) { ++ case SIOCETHTOOL: ++ retval = mpc5xxx_netdev_ethtool_ioctl( ++ dev, (void *) rq->ifr_data); ++ break; ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO_NOT_YET_XXX ++ case SIOCGMIIPHY: /* Get address of MII PHY in use. */ ++ case SIOCDEVPRIVATE: /* for binary compat, remove in 2.5 */ ++ data->phy_id = phy; ++ /* Fall through */ ++ ++ case SIOCGMIIREG: /* Read MII PHY register. */ ++ case SIOCDEVPRIVATE + 1: /* for binary compat, remove in 2.5 */ ++ data->val_out = ++ mpc5xxx_mdio_read(dev, data->phy_id&0x1f, ++ data->reg_num&0x1f); ++ retval = 0; ++ break; ++ ++ case SIOCSMIIREG: /* Write MII PHY register. */ ++ case SIOCDEVPRIVATE + 2: /* for binary compat, remove in 2.5 */ ++ if (!capable(CAP_NET_ADMIN)) { ++ retval = -EPERM; ++ } else { ++ mpc5xxx_mdio_write(dev, data->phy_id & 0x1f, ++ data->reg_num & 0x1f, data->val_in); ++ retval = 0; ++ } ++ break; ++#endif ++ ++ default: ++ retval = -EOPNOTSUPP; ++ break; ++ } ++ return retval; ++} ++ ++static void __init ++mpc5xxx_fec_str2mac(char *str, unsigned char *mac) ++{ ++ int i; ++ u64 val64; ++ ++ val64 = simple_strtoull(str, NULL, 16); ++ ++ for (i = 0; i < 6; i++) ++ mac[5-i] = val64 >> (i*8); ++} ++ ++static int __init ++mpc5xxx_fec_mac_setup(char *mac_address) ++{ ++ mpc5xxx_fec_str2mac(mac_address, mpc5xxx_fec_mac_addr); ++ return 0; ++} ++ ++__setup("mpc5xxx_mac=", mpc5xxx_fec_mac_setup); ++#endif /* ORIGINAL_CODE */ ++ ++static int __init ++mpc5xxx_fec_init(void) ++{ ++ struct mpc5xxx_fec *fec; ++ struct rtnet_device *dev; ++ struct mpc5xxx_fec_priv *priv; ++ int err = 0; ++ ++#if MPC5xxx_FEC_DEBUG > 1 ++ printk("mpc5xxx_fec_init\n"); ++#endif ++ ++ if (!rx_pool_size) ++ rx_pool_size = MPC5xxx_FEC_RBD_NUM * 2; ++ ++ dev = rt_alloc_etherdev(sizeof(*priv), rx_pool_size + MPC5xxx_FEC_TBD_NUM); ++ if (!dev) ++ return -EIO; ++ rtdev_alloc_name(dev, "rteth%d"); ++ memset(dev->priv, 0, sizeof(*priv)); ++ rt_rtdev_connect(dev, &RTDEV_manager); ++ dev->vers = RTDEV_VERS_2_0; ++ ++ ++ mpc5xxx_fec_dev = dev; ++ priv = (struct mpc5xxx_fec_priv *)dev->priv; ++#if MPC5xxx_FEC_DEBUG > 1 ++ printk("fec_priv %08x\n", (u32)priv); ++#endif ++ priv->fec = fec = (struct mpc5xxx_fec *)MPC5xxx_FEC; ++ priv->gpio = (struct mpc5xxx_gpio *)MPC5xxx_GPIO; ++ priv->sdma = (struct mpc5xxx_sdma *)MPC5xxx_SDMA; ++ ++ rtdm_lock_init(&priv->lock); ++ dev->open = mpc5xxx_fec_open; ++ dev->stop = mpc5xxx_fec_close; ++ dev->hard_start_xmit = mpc5xxx_fec_hard_start_xmit; ++ //FIXME dev->hard_header = &rt_eth_header; ++ dev->get_stats = mpc5xxx_fec_get_stats; ++#ifdef ORIGINAL_CODE ++ dev->do_ioctl = mpc5xxx_fec_ioctl; ++ dev->set_mac_address = mpc5xxx_fec_set_mac_address; ++ dev->set_multicast_list = mpc5xxx_fec_set_multicast_list; ++ ++ dev->tx_timeout = mpc5xxx_fec_tx_timeout; ++ dev->watchdog_timeo = MPC5xxx_FEC_WATCHDOG_TIMEOUT; ++#endif /* ORIGINAL_CODE */ ++ dev->flags &= ~IFF_RUNNING; ++ ++ if ((err = rt_register_rtnetdev(dev))) ++ goto abort; ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_FASTROUTE ++ dev->accept_fastpath = mpc5xxx_fec_accept_fastpath; ++#endif ++ if (memcmp(mpc5xxx_fec_mac_addr, null_mac, 6) != 0) ++ memcpy(dev->dev_addr, mpc5xxx_fec_mac_addr, 6); ++ else { ++ *(u32 *)&dev->dev_addr[0] = in_be32(&fec->paddr1); ++ *(u16 *)&dev->dev_addr[4] = in_be16((u16*)&fec->paddr2); ++ } ++ ++ /* ++ * Read MIB counters in order to reset them, ++ * then zero all the stats fields in memory ++ */ ++ mpc5xxx_fec_update_stat(dev); ++ ++ return 0; ++ ++abort: ++ rtdev_free(dev); ++ ++ return err; ++} ++ ++static void __exit ++mpc5xxx_fec_uninit(void) ++{ ++ struct rtnet_device *dev = mpc5xxx_fec_dev; ++ struct mpc5xxx_fec_priv *priv = (struct mpc5xxx_fec_priv *)dev->priv; ++ ++ rt_stack_disconnect(dev); ++ rt_unregister_rtnetdev(dev); ++ rt_rtdev_disconnect(dev); ++ printk("%s: unloaded\n", dev->name); ++ rtdev_free(dev); ++ dev->priv = NULL; ++} ++ ++static int __init ++mpc5xxx_fec_module_init(void) ++{ ++ return mpc5xxx_fec_init(); ++} ++ ++static void __exit ++mpc5xxx_fec_module_exit(void) ++{ ++ mpc5xxx_fec_uninit(); ++} ++ ++module_init(mpc5xxx_fec_module_init); ++module_exit(mpc5xxx_fec_module_exit); +--- linux/drivers/xenomai/net/drivers/loopback.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/loopback.c 2021-04-07 16:01:27.557633695 +0800 +@@ -0,0 +1,139 @@ ++/* loopback.c ++ * ++ * Copyright (C) 2002 Ulrich Marx ++ * extended by Jose Carlos Billalabeitia and Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ */ ++ ++#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt ++ ++#include ++#include ++#include ++#include ++ ++#include ++ ++#include ++#include ++ ++MODULE_AUTHOR("Maintainer: Jan Kiszka "); ++MODULE_DESCRIPTION("RTnet loopback driver"); ++MODULE_LICENSE("GPL"); ++ ++static struct rtnet_device *rt_loopback_dev; ++ ++/*** ++ * rt_loopback_open ++ * @rtdev ++ */ ++static int rt_loopback_open(struct rtnet_device *rtdev) ++{ ++ rt_stack_connect(rtdev, &STACK_manager); ++ rtnetif_start_queue(rtdev); ++ ++ return 0; ++} ++ ++/*** ++ * rt_loopback_close ++ * @rtdev ++ */ ++static int rt_loopback_close(struct rtnet_device *rtdev) ++{ ++ rtnetif_stop_queue(rtdev); ++ rt_stack_disconnect(rtdev); ++ ++ return 0; ++} ++ ++/*** ++ * rt_loopback_xmit - begin packet transmission ++ * @skb: packet to be sent ++ * @dev: network device to which packet is sent ++ * ++ */ ++static int rt_loopback_xmit(struct rtskb *rtskb, struct rtnet_device *rtdev) ++{ ++ /* write transmission stamp - in case any protocol ever gets the idea to ++ ask the lookback device for this service... */ ++ if (rtskb->xmit_stamp) ++ *rtskb->xmit_stamp = ++ cpu_to_be64(rtdm_clock_read() + *rtskb->xmit_stamp); ++ ++ /* make sure that critical fields are re-intialised */ ++ rtskb->chain_end = rtskb; ++ ++ /* parse the Ethernet header as usual */ ++ rtskb->protocol = rt_eth_type_trans(rtskb, rtdev); ++ ++ rt_stack_deliver(rtskb); ++ ++ return 0; ++} ++ ++/*** ++ * loopback_init ++ */ ++static int __init loopback_init(void) ++{ ++ int err; ++ struct rtnet_device *rtdev; ++ ++ pr_info("initializing loopback interface...\n"); ++ ++ if ((rtdev = rt_alloc_etherdev(0, 1)) == NULL) ++ return -ENODEV; ++ ++ rt_rtdev_connect(rtdev, &RTDEV_manager); ++ ++ strcpy(rtdev->name, "rtlo"); ++ ++ rtdev->vers = RTDEV_VERS_2_0; ++ rtdev->open = &rt_loopback_open; ++ rtdev->stop = &rt_loopback_close; ++ rtdev->hard_start_xmit = &rt_loopback_xmit; ++ rtdev->flags |= IFF_LOOPBACK; ++ rtdev->flags &= ~IFF_BROADCAST; ++ rtdev->features |= NETIF_F_LLTX; ++ ++ if ((err = rt_register_rtnetdev(rtdev)) != 0) { ++ rtdev_free(rtdev); ++ return err; ++ } ++ ++ rt_loopback_dev = rtdev; ++ ++ return 0; ++} ++ ++/*** ++ * loopback_cleanup ++ */ ++static void __exit loopback_cleanup(void) ++{ ++ struct rtnet_device *rtdev = rt_loopback_dev; ++ ++ pr_info("removing loopback interface...\n"); ++ ++ rt_unregister_rtnetdev(rtdev); ++ rt_rtdev_disconnect(rtdev); ++ ++ rtdev_free(rtdev); ++} ++ ++module_init(loopback_init); ++module_exit(loopback_cleanup); +--- linux/drivers/xenomai/net/drivers/8139too.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/8139too.c 2021-04-07 16:01:27.545633712 +0800 +@@ -0,0 +1,1727 @@ ++/*** ++ * rt_8139too.c - Realtime driver for ++ * for more information, look to end of file or '8139too.c' ++ * ++ * Copyright (C) 2002 Ulrich Marx ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ */ ++ ++ /* ++ * This Version was modified by Fabian Koch ++ * It includes a different implementation of the 'cards' module parameter ++ * we are using an array of integers to determine which cards to use ++ * for RTnet (e.g. cards=0,1,0) ++ * ++ * Thanks to Jan Kiszka for this idea ++ */ ++ ++#define DRV_NAME "rt_8139too" ++#define DRV_VERSION "0.9.24-rt0.7" ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* *** RTnet *** */ ++#include ++ ++#define MAX_UNITS 8 ++#define DEFAULT_RX_POOL_SIZE 16 ++ ++static int cards[MAX_UNITS] = { [0 ... (MAX_UNITS-1)] = 1 }; ++static int media[MAX_UNITS] = { [0 ... (MAX_UNITS-1)] = -1 }; ++static unsigned int rx_pool_size = DEFAULT_RX_POOL_SIZE; ++module_param_array(cards, int, NULL, 0444); ++module_param_array(media, int, NULL, 0444); ++module_param(rx_pool_size, uint, 0444); ++MODULE_PARM_DESC(cards, "array of cards to be supported (e.g. 1,0,1)"); ++MODULE_PARM_DESC(media, "8139too: Bits 4+9: force full duplex, bit 5: 100Mbps"); ++MODULE_PARM_DESC(rx_pool_size, "number of receive buffers"); ++ ++/* *** RTnet *** */ ++ ++ ++#define RTL8139_DRIVER_NAME DRV_NAME " Fast Ethernet driver " DRV_VERSION ++#define PFX DRV_NAME ": " ++ ++/* enable PIO instead of MMIO, if CONFIG_8139TOO_PIO is selected */ ++/* *** RTnet *** ++#ifdef CONFIG_8139TOO_PIO ++#define USE_IO_OPS 1 ++#endif ++ *** RTnet *** */ ++ ++/* Size of the in-memory receive ring. */ ++#define RX_BUF_LEN_IDX 2 /* 0==8K, 1==16K, 2==32K, 3==64K */ ++#define RX_BUF_LEN (8192 << RX_BUF_LEN_IDX) ++#define RX_BUF_PAD 16 ++#define RX_BUF_WRAP_PAD 2048 /* spare padding to handle lack of packet wrap */ ++#define RX_BUF_TOT_LEN (RX_BUF_LEN + RX_BUF_PAD + RX_BUF_WRAP_PAD) ++ ++/* Number of Tx descriptor registers. */ ++#define NUM_TX_DESC 4 ++ ++/* max supported ethernet frame size -- must be at least (rtdev->mtu+14+4).*/ ++#define MAX_ETH_FRAME_SIZE 1536 ++ ++/* Size of the Tx bounce buffers -- must be at least (rtdev->mtu+14+4). */ ++#define TX_BUF_SIZE MAX_ETH_FRAME_SIZE ++#define TX_BUF_TOT_LEN (TX_BUF_SIZE * NUM_TX_DESC) ++ ++/* PCI Tuning Parameters ++ Threshold is bytes transferred to chip before transmission starts. */ ++#define TX_FIFO_THRESH 256 /* In bytes, rounded down to 32 byte units. */ ++ ++/* The following settings are log_2(bytes)-4: 0 == 16 bytes .. 6==1024, 7==end of packet. */ ++#define RX_FIFO_THRESH 7 /* Rx buffer level before first PCI xfer. */ ++#define RX_DMA_BURST 7 /* Maximum PCI burst, '6' is 1024 */ ++#define TX_DMA_BURST 6 /* Maximum PCI burst, '6' is 1024 */ ++#define TX_RETRY 8 /* 0-15. retries = 16 + (TX_RETRY * 16) */ ++ ++/* Operational parameters that usually are not changed. */ ++/* Time in jiffies before concluding the transmitter is hung. */ ++#define TX_TIMEOUT (6*HZ) ++ ++ ++enum { ++ HAS_MII_XCVR = 0x010000, ++ HAS_CHIP_XCVR = 0x020000, ++ HAS_LNK_CHNG = 0x040000, ++}; ++ ++#define RTL_MIN_IO_SIZE 0x80 ++#define RTL8139B_IO_SIZE 256 ++ ++#define RTL8129_CAPS HAS_MII_XCVR ++#define RTL8139_CAPS HAS_CHIP_XCVR|HAS_LNK_CHNG ++ ++typedef enum { ++ RTL8139 = 0, ++ RTL8139_CB, ++ SMC1211TX, ++ /*MPX5030,*/ ++ DELTA8139, ++ ADDTRON8139, ++ DFE538TX, ++ DFE690TXD, ++ FE2000VX, ++ ALLIED8139, ++ RTL8129, ++} board_t; ++ ++ ++/* indexed by board_t, above */ ++static struct { ++ const char *name; ++ u32 hw_flags; ++} board_info[] = { ++ { "RealTek RTL8139", RTL8139_CAPS }, ++ { "RealTek RTL8129", RTL8129_CAPS }, ++}; ++ ++ ++static struct pci_device_id rtl8139_pci_tbl[] = { ++ {0x10ec, 0x8139, PCI_ANY_ID, PCI_ANY_ID, 0, 0, RTL8139 }, ++ {0x10ec, 0x8138, PCI_ANY_ID, PCI_ANY_ID, 0, 0, RTL8139 }, ++ {0x1113, 0x1211, PCI_ANY_ID, PCI_ANY_ID, 0, 0, RTL8139 }, ++ {0x1500, 0x1360, PCI_ANY_ID, PCI_ANY_ID, 0, 0, RTL8139 }, ++ {0x4033, 0x1360, PCI_ANY_ID, PCI_ANY_ID, 0, 0, RTL8139 }, ++ {0x1186, 0x1300, PCI_ANY_ID, PCI_ANY_ID, 0, 0, RTL8139 }, ++ {0x1186, 0x1340, PCI_ANY_ID, PCI_ANY_ID, 0, 0, RTL8139 }, ++ {0x13d1, 0xab06, PCI_ANY_ID, PCI_ANY_ID, 0, 0, RTL8139 }, ++ {0x1259, 0xa117, PCI_ANY_ID, PCI_ANY_ID, 0, 0, RTL8139 }, ++ {0x1259, 0xa11e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, RTL8139 }, ++ {0x14ea, 0xab06, PCI_ANY_ID, PCI_ANY_ID, 0, 0, RTL8139 }, ++ {0x14ea, 0xab07, PCI_ANY_ID, PCI_ANY_ID, 0, 0, RTL8139 }, ++ {0x11db, 0x1234, PCI_ANY_ID, PCI_ANY_ID, 0, 0, RTL8139 }, ++ {0x1432, 0x9130, PCI_ANY_ID, PCI_ANY_ID, 0, 0, RTL8139 }, ++ {0x02ac, 0x1012, PCI_ANY_ID, PCI_ANY_ID, 0, 0, RTL8139 }, ++ {0x018a, 0x0106, PCI_ANY_ID, PCI_ANY_ID, 0, 0, RTL8139 }, ++ {0x126c, 0x1211, PCI_ANY_ID, PCI_ANY_ID, 0, 0, RTL8139 }, ++ {0x1743, 0x8139, PCI_ANY_ID, PCI_ANY_ID, 0, 0, RTL8139 }, ++ {0x021b, 0x8139, PCI_ANY_ID, PCI_ANY_ID, 0, 0, RTL8139 }, ++ ++#ifdef CONFIG_SH_SECUREEDGE5410 ++ /* Bogus 8139 silicon reports 8129 without external PROM :-( */ ++ {0x10ec, 0x8129, PCI_ANY_ID, PCI_ANY_ID, 0, 0, RTL8139 }, ++#endif ++#ifdef CONFIG_8139TOO_8129 ++ {0x10ec, 0x8129, PCI_ANY_ID, PCI_ANY_ID, 0, 0, RTL8129 }, ++#endif ++ ++ /* some crazy cards report invalid vendor ids like ++ * 0x0001 here. The other ids are valid and constant, ++ * so we simply don't match on the main vendor id. ++ */ ++ {PCI_ANY_ID, 0x8139, 0x10ec, 0x8139, 0, 0, RTL8139 }, ++ {PCI_ANY_ID, 0x8139, 0x1186, 0x1300, 0, 0, RTL8139 }, ++ {PCI_ANY_ID, 0x8139, 0x13d1, 0xab06, 0, 0, RTL8139 }, ++ ++ {0,} ++}; ++MODULE_DEVICE_TABLE (pci, rtl8139_pci_tbl); ++ ++/* The rest of these values should never change. */ ++ ++/* Symbolic offsets to registers. */ ++enum RTL8139_registers { ++ MAC0 = 0, /* Ethernet hardware address. */ ++ MAR0 = 8, /* Multicast filter. */ ++ TxStatus0 = 0x10, /* Transmit status (Four 32bit registers). */ ++ TxAddr0 = 0x20, /* Tx descriptors (also four 32bit). */ ++ RxBuf = 0x30, ++ ChipCmd = 0x37, ++ RxBufPtr = 0x38, ++ RxBufAddr = 0x3A, ++ IntrMask = 0x3C, ++ IntrStatus = 0x3E, ++ TxConfig = 0x40, ++ ChipVersion = 0x43, ++ RxConfig = 0x44, ++ Timer = 0x48, /* A general-purpose counter. */ ++ RxMissed = 0x4C, /* 24 bits valid, write clears. */ ++ Cfg9346 = 0x50, ++ Config0 = 0x51, ++ Config1 = 0x52, ++ FlashReg = 0x54, ++ MediaStatus = 0x58, ++ Config3 = 0x59, ++ Config4 = 0x5A, /* absent on RTL-8139A */ ++ HltClk = 0x5B, ++ MultiIntr = 0x5C, ++ TxSummary = 0x60, ++ BasicModeCtrl = 0x62, ++ BasicModeStatus = 0x64, ++ NWayAdvert = 0x66, ++ NWayLPAR = 0x68, ++ NWayExpansion = 0x6A, ++ /* Undocumented registers, but required for proper operation. */ ++ FIFOTMS = 0x70, /* FIFO Control and test. */ ++ CSCR = 0x74, /* Chip Status and Configuration Register. */ ++ PARA78 = 0x78, ++ PARA7c = 0x7c, /* Magic transceiver parameter register. */ ++ Config5 = 0xD8, /* absent on RTL-8139A */ ++}; ++ ++enum ClearBitMasks { ++ MultiIntrClear = 0xF000, ++ ChipCmdClear = 0xE2, ++ Config1Clear = (1<<7)|(1<<6)|(1<<3)|(1<<2)|(1<<1), ++}; ++ ++enum ChipCmdBits { ++ CmdReset = 0x10, ++ CmdRxEnb = 0x08, ++ CmdTxEnb = 0x04, ++ RxBufEmpty = 0x01, ++}; ++ ++/* Interrupt register bits, using my own meaningful names. */ ++enum IntrStatusBits { ++ PCIErr = 0x8000, ++ PCSTimeout = 0x4000, ++ RxFIFOOver = 0x40, ++ RxUnderrun = 0x20, ++ RxOverflow = 0x10, ++ TxErr = 0x08, ++ TxOK = 0x04, ++ RxErr = 0x02, ++ RxOK = 0x01, ++ ++ RxAckBits = RxFIFOOver | RxOverflow | RxOK, ++}; ++ ++enum TxStatusBits { ++ TxHostOwns = 0x2000, ++ TxUnderrun = 0x4000, ++ TxStatOK = 0x8000, ++ TxOutOfWindow = 0x20000000, ++ TxAborted = 0x40000000, ++ TxCarrierLost = 0x80000000, ++}; ++enum RxStatusBits { ++ RxMulticast = 0x8000, ++ RxPhysical = 0x4000, ++ RxBroadcast = 0x2000, ++ RxBadSymbol = 0x0020, ++ RxRunt = 0x0010, ++ RxTooLong = 0x0008, ++ RxCRCErr = 0x0004, ++ RxBadAlign = 0x0002, ++ RxStatusOK = 0x0001, ++}; ++ ++/* Bits in RxConfig. */ ++enum rx_mode_bits { ++ AcceptErr = 0x20, ++ AcceptRunt = 0x10, ++ AcceptBroadcast = 0x08, ++ AcceptMulticast = 0x04, ++ AcceptMyPhys = 0x02, ++ AcceptAllPhys = 0x01, ++}; ++ ++/* Bits in TxConfig. */ ++enum tx_config_bits { ++ ++ /* Interframe Gap Time. Only TxIFG96 doesn't violate IEEE 802.3 */ ++ TxIFGShift = 24, ++ TxIFG84 = (0 << TxIFGShift), /* 8.4us / 840ns (10 / 100Mbps) */ ++ TxIFG88 = (1 << TxIFGShift), /* 8.8us / 880ns (10 / 100Mbps) */ ++ TxIFG92 = (2 << TxIFGShift), /* 9.2us / 920ns (10 / 100Mbps) */ ++ TxIFG96 = (3 << TxIFGShift), /* 9.6us / 960ns (10 / 100Mbps) */ ++ ++ TxLoopBack = (1 << 18) | (1 << 17), /* enable loopback test mode */ ++ TxCRC = (1 << 16), /* DISABLE appending CRC to end of Tx packets */ ++ TxClearAbt = (1 << 0), /* Clear abort (WO) */ ++ TxDMAShift = 8, /* DMA burst value (0-7) is shifted this many bits */ ++ TxRetryShift = 4, /* TXRR value (0-15) is shifted this many bits */ ++ ++ TxVersionMask = 0x7C800000, /* mask out version bits 30-26, 23 */ ++}; ++ ++/* Bits in Config1 */ ++enum Config1Bits { ++ Cfg1_PM_Enable = 0x01, ++ Cfg1_VPD_Enable = 0x02, ++ Cfg1_PIO = 0x04, ++ Cfg1_MMIO = 0x08, ++ LWAKE = 0x10, /* not on 8139, 8139A */ ++ Cfg1_Driver_Load = 0x20, ++ Cfg1_LED0 = 0x40, ++ Cfg1_LED1 = 0x80, ++ SLEEP = (1 << 1), /* only on 8139, 8139A */ ++ PWRDN = (1 << 0), /* only on 8139, 8139A */ ++}; ++ ++/* Bits in Config3 */ ++enum Config3Bits { ++ Cfg3_FBtBEn = (1 << 0), /* 1 = Fast Back to Back */ ++ Cfg3_FuncRegEn = (1 << 1), /* 1 = enable CardBus Function registers */ ++ Cfg3_CLKRUN_En = (1 << 2), /* 1 = enable CLKRUN */ ++ Cfg3_CardB_En = (1 << 3), /* 1 = enable CardBus registers */ ++ Cfg3_LinkUp = (1 << 4), /* 1 = wake up on link up */ ++ Cfg3_Magic = (1 << 5), /* 1 = wake up on Magic Packet (tm) */ ++ Cfg3_PARM_En = (1 << 6), /* 0 = software can set twister parameters */ ++ Cfg3_GNTSel = (1 << 7), /* 1 = delay 1 clock from PCI GNT signal */ ++}; ++ ++/* Bits in Config4 */ ++enum Config4Bits { ++ LWPTN = (1 << 2), /* not on 8139, 8139A */ ++}; ++ ++/* Bits in Config5 */ ++enum Config5Bits { ++ Cfg5_PME_STS = (1 << 0), /* 1 = PCI reset resets PME_Status */ ++ Cfg5_LANWake = (1 << 1), /* 1 = enable LANWake signal */ ++ Cfg5_LDPS = (1 << 2), /* 0 = save power when link is down */ ++ Cfg5_FIFOAddrPtr = (1 << 3), /* Realtek internal SRAM testing */ ++ Cfg5_UWF = (1 << 4), /* 1 = accept unicast wakeup frame */ ++ Cfg5_MWF = (1 << 5), /* 1 = accept multicast wakeup frame */ ++ Cfg5_BWF = (1 << 6), /* 1 = accept broadcast wakeup frame */ ++}; ++ ++enum RxConfigBits { ++ /* rx fifo threshold */ ++ RxCfgFIFOShift = 13, ++ RxCfgFIFONone = (7 << RxCfgFIFOShift), ++ ++ /* Max DMA burst */ ++ RxCfgDMAShift = 8, ++ RxCfgDMAUnlimited = (7 << RxCfgDMAShift), ++ ++ /* rx ring buffer length */ ++ RxCfgRcv8K = 0, ++ RxCfgRcv16K = (1 << 11), ++ RxCfgRcv32K = (1 << 12), ++ RxCfgRcv64K = (1 << 11) | (1 << 12), ++ ++ /* Disable packet wrap at end of Rx buffer */ ++ RxNoWrap = (1 << 7), ++}; ++ ++ ++/* Twister tuning parameters from RealTek. ++ Completely undocumented, but required to tune bad links. */ ++enum CSCRBits { ++ CSCR_LinkOKBit = 0x0400, ++ CSCR_LinkChangeBit = 0x0800, ++ CSCR_LinkStatusBits = 0x0f000, ++ CSCR_LinkDownOffCmd = 0x003c0, ++ CSCR_LinkDownCmd = 0x0f3c0, ++}; ++ ++ ++enum Cfg9346Bits { ++ Cfg9346_Lock = 0x00, ++ Cfg9346_Unlock = 0xC0, ++}; ++ ++ ++#define PARA78_default 0x78fa8388 ++#define PARA7c_default 0xcb38de43 /* param[0][3] */ ++#define PARA7c_xxx 0xcb38de43 ++/*static const unsigned long param[4][4] = { ++ {0xcb39de43, 0xcb39ce43, 0xfb38de03, 0xcb38de43}, ++ {0xcb39de43, 0xcb39ce43, 0xcb39ce83, 0xcb39ce83}, ++ {0xcb39de43, 0xcb39ce43, 0xcb39ce83, 0xcb39ce83}, ++ {0xbb39de43, 0xbb39ce43, 0xbb39ce83, 0xbb39ce83} ++};*/ ++ ++typedef enum { ++ CH_8139 = 0, ++ CH_8139_K, ++ CH_8139A, ++ CH_8139B, ++ CH_8130, ++ CH_8139C, ++} chip_t; ++ ++enum chip_flags { ++ HasHltClk = (1 << 0), ++ HasLWake = (1 << 1), ++}; ++ ++ ++/* directly indexed by chip_t, above */ ++const static struct { ++ const char *name; ++ u8 version; /* from RTL8139C docs */ ++ u32 flags; ++} rtl_chip_info[] = { ++ { "RTL-8139", ++ 0x40, ++ HasHltClk, ++ }, ++ ++ { "RTL-8139 rev K", ++ 0x60, ++ HasHltClk, ++ }, ++ ++ { "RTL-8139A", ++ 0x70, ++ HasHltClk, /* XXX undocumented? */ ++ }, ++ ++ { "RTL-8139A rev G", ++ 0x72, ++ HasHltClk, /* XXX undocumented? */ ++ }, ++ ++ { "RTL-8139B", ++ 0x78, ++ HasLWake, ++ }, ++ ++ { "RTL-8130", ++ 0x7C, ++ HasLWake, ++ }, ++ ++ { "RTL-8139C", ++ 0x74, ++ HasLWake, ++ }, ++ ++ { "RTL-8100", ++ 0x7A, ++ HasLWake, ++ }, ++ ++ { "RTL-8100B/8139D", ++ 0x75, ++ HasHltClk /* XXX undocumented? */ ++ | HasLWake, ++ }, ++ ++ { "RTL-8101", ++ 0x77, ++ HasLWake, ++ }, ++}; ++ ++struct rtl_extra_stats { ++ unsigned long early_rx; ++ unsigned long tx_buf_mapped; ++ unsigned long tx_timeouts; ++ unsigned long rx_lost_in_ring; ++}; ++ ++struct rtl8139_private { ++ void *mmio_addr; ++ int drv_flags; ++ struct pci_dev *pci_dev; ++ struct net_device_stats stats; ++ unsigned char *rx_ring; ++ unsigned int cur_rx; /* Index into the Rx buffer of next Rx pkt. */ ++ unsigned int tx_flag; ++ unsigned long cur_tx; ++ unsigned long dirty_tx; ++ unsigned char *tx_buf[NUM_TX_DESC]; /* Tx bounce buffers */ ++ unsigned char *tx_bufs; /* Tx bounce buffer region. */ ++ dma_addr_t rx_ring_dma; ++ dma_addr_t tx_bufs_dma; ++ signed char phys[4]; /* MII device addresses. */ ++ char twistie, twist_row, twist_col; /* Twister tune state. */ ++ unsigned int default_port:4; /* Last rtdev->if_port value. */ ++ unsigned int medialock:1; /* Don't sense media type. */ ++ rtdm_lock_t lock; ++ chip_t chipset; ++ pid_t thr_pid; ++ u32 rx_config; ++ struct rtl_extra_stats xstats; ++ int time_to_die; ++ struct mii_if_info mii; ++ rtdm_irq_t irq_handle; ++}; ++ ++MODULE_AUTHOR ("Jeff Garzik "); ++MODULE_DESCRIPTION ("RealTek RTL-8139 Fast Ethernet driver"); ++MODULE_LICENSE("GPL"); ++ ++static int read_eeprom (void *ioaddr, int location, int addr_len); ++static int mdio_read (struct rtnet_device *rtdev, int phy_id, int location); ++static void mdio_write (struct rtnet_device *rtdev, int phy_id, int location, int val); ++ ++ ++static int rtl8139_open (struct rtnet_device *rtdev); ++static int rtl8139_close (struct rtnet_device *rtdev); ++static int rtl8139_interrupt (rtdm_irq_t *irq_handle); ++static int rtl8139_start_xmit (struct rtskb *skb, struct rtnet_device *rtdev); ++ ++static int rtl8139_ioctl(struct rtnet_device *, struct ifreq *rq, int cmd); ++static struct net_device_stats *rtl8139_get_stats(struct rtnet_device*rtdev); ++ ++static void rtl8139_init_ring (struct rtnet_device *rtdev); ++static void rtl8139_set_rx_mode (struct rtnet_device *rtdev); ++static void __set_rx_mode (struct rtnet_device *rtdev); ++static void rtl8139_hw_start (struct rtnet_device *rtdev); ++ ++#ifdef USE_IO_OPS ++ ++#define RTL_R8(reg) inb (((unsigned long)ioaddr) + (reg)) ++#define RTL_R16(reg) inw (((unsigned long)ioaddr) + (reg)) ++#define RTL_R32(reg) inl (((unsigned long)ioaddr) + (reg)) ++#define RTL_W8(reg, val8) outb ((val8), ((unsigned long)ioaddr) + (reg)) ++#define RTL_W16(reg, val16) outw ((val16), ((unsigned long)ioaddr) + (reg)) ++#define RTL_W32(reg, val32) outl ((val32), ((unsigned long)ioaddr) + (reg)) ++#define RTL_W8_F RTL_W8 ++#define RTL_W16_F RTL_W16 ++#define RTL_W32_F RTL_W32 ++#undef readb ++#undef readw ++#undef readl ++#undef writeb ++#undef writew ++#undef writel ++#define readb(addr) inb((unsigned long)(addr)) ++#define readw(addr) inw((unsigned long)(addr)) ++#define readl(addr) inl((unsigned long)(addr)) ++#define writeb(val,addr) outb((val),(unsigned long)(addr)) ++#define writew(val,addr) outw((val),(unsigned long)(addr)) ++#define writel(val,addr) outl((val),(unsigned long)(addr)) ++ ++#else ++ ++/* write MMIO register, with flush */ ++/* Flush avoids rtl8139 bug w/ posted MMIO writes */ ++#define RTL_W8_F(reg, val8) do { writeb ((val8), ioaddr + (reg)); readb (ioaddr + (reg)); } while (0) ++#define RTL_W16_F(reg, val16) do { writew ((val16), ioaddr + (reg)); readw (ioaddr + (reg)); } while (0) ++#define RTL_W32_F(reg, val32) do { writel ((val32), ioaddr + (reg)); readl (ioaddr + (reg)); } while (0) ++ ++ ++#define MMIO_FLUSH_AUDIT_COMPLETE 1 ++#if MMIO_FLUSH_AUDIT_COMPLETE ++ ++/* write MMIO register */ ++#define RTL_W8(reg, val8) writeb ((val8), ioaddr + (reg)) ++#define RTL_W16(reg, val16) writew ((val16), ioaddr + (reg)) ++#define RTL_W32(reg, val32) writel ((val32), ioaddr + (reg)) ++ ++#else ++ ++/* write MMIO register, then flush */ ++#define RTL_W8 RTL_W8_F ++#define RTL_W16 RTL_W16_F ++#define RTL_W32 RTL_W32_F ++ ++#endif /* MMIO_FLUSH_AUDIT_COMPLETE */ ++ ++/* read MMIO register */ ++#define RTL_R8(reg) readb (ioaddr + (reg)) ++#define RTL_R16(reg) readw (ioaddr + (reg)) ++#define RTL_R32(reg) readl (ioaddr + (reg)) ++ ++#endif /* USE_IO_OPS */ ++ ++ ++static const u16 rtl8139_intr_mask = ++ PCIErr | PCSTimeout | RxUnderrun | RxOverflow | RxFIFOOver | ++ TxErr | TxOK | RxErr | RxOK; ++ ++static const unsigned int rtl8139_rx_config = ++ RxCfgRcv32K | RxNoWrap | ++ (RX_FIFO_THRESH << RxCfgFIFOShift) | ++ (RX_DMA_BURST << RxCfgDMAShift); ++ ++static const unsigned int rtl8139_tx_config = ++ TxIFG96 | (TX_DMA_BURST << TxDMAShift) | (TX_RETRY << TxRetryShift); ++ ++ ++ ++ ++static void rtl8139_chip_reset (void *ioaddr) ++{ ++ int i; ++ ++ /* Soft reset the chip. */ ++ RTL_W8 (ChipCmd, CmdReset); ++ ++ /* Check that the chip has finished the reset. */ ++ for (i = 1000; i > 0; i--) { ++ barrier(); ++ if ((RTL_R8 (ChipCmd) & CmdReset) == 0) ++ break; ++ udelay (10); ++ } ++} ++ ++ ++static int rtl8139_init_board (struct pci_dev *pdev, ++ struct rtnet_device **dev_out) ++{ ++ void *ioaddr; ++ struct rtnet_device *rtdev; ++ struct rtl8139_private *tp; ++ u8 tmp8; ++ int rc; ++ unsigned int i; ++#ifdef USE_IO_OPS ++ u32 pio_start, pio_end, pio_flags, pio_len; ++#endif ++ unsigned long mmio_start, mmio_flags, mmio_len; ++ u32 tmp; ++ ++ ++ *dev_out = NULL; ++ ++ /* dev and rtdev->priv zeroed in alloc_etherdev */ ++ rtdev=rt_alloc_etherdev(sizeof (struct rtl8139_private), ++ rx_pool_size + NUM_TX_DESC); ++ if (rtdev==NULL) { ++ rtdm_printk (KERN_ERR PFX "%s: Unable to alloc new net device\n", pci_name(pdev)); ++ return -ENOMEM; ++ } ++ rtdev_alloc_name(rtdev, "rteth%d"); ++ ++ rt_rtdev_connect(rtdev, &RTDEV_manager); ++ ++ rtdev->vers = RTDEV_VERS_2_0; ++ rtdev->sysbind = &pdev->dev; ++ tp = rtdev->priv; ++ tp->pci_dev = pdev; ++ ++ /* enable device (incl. PCI PM wakeup and hotplug setup) */ ++ rc = pci_enable_device (pdev); ++ if (rc) ++ goto err_out; ++ ++ rc = pci_request_regions (pdev, "rtnet8139too"); ++ if (rc) ++ goto err_out; ++ ++ /* enable PCI bus-mastering */ ++ pci_set_master (pdev); ++ ++ mmio_start = pci_resource_start (pdev, 1); ++ mmio_flags = pci_resource_flags (pdev, 1); ++ mmio_len = pci_resource_len (pdev, 1); ++ ++ /* set this immediately, we need to know before ++ * we talk to the chip directly */ ++#ifdef USE_IO_OPS ++ pio_start = pci_resource_start (pdev, 0); ++ pio_end = pci_resource_end (pdev, 0); ++ pio_flags = pci_resource_flags (pdev, 0); ++ pio_len = pci_resource_len (pdev, 0); ++ ++ /* make sure PCI base addr 0 is PIO */ ++ if (!(pio_flags & IORESOURCE_IO)) { ++ rtdm_printk (KERN_ERR PFX "%s: region #0 not a PIO resource, aborting\n", pci_name(pdev)); ++ rc = -ENODEV; ++ goto err_out; ++ } ++ /* check for weird/broken PCI region reporting */ ++ if (pio_len < RTL_MIN_IO_SIZE) { ++ rtdm_printk (KERN_ERR PFX "%s: Invalid PCI I/O region size(s), aborting\n", pci_name(pdev)); ++ rc = -ENODEV; ++ goto err_out; ++ } ++#else ++ /* make sure PCI base addr 1 is MMIO */ ++ if (!(mmio_flags & IORESOURCE_MEM)) { ++ rtdm_printk(KERN_ERR PFX "%s: region #1 not an MMIO resource, aborting\n", pci_name(pdev)); ++ rc = -ENODEV; ++ goto err_out; ++ } ++ if (mmio_len < RTL_MIN_IO_SIZE) { ++ rtdm_printk(KERN_ERR PFX "%s: Invalid PCI mem region size(s), aborting\n", pci_name(pdev)); ++ rc = -ENODEV; ++ goto err_out; ++ } ++#endif ++ ++#ifdef USE_IO_OPS ++ ioaddr = (void *) pio_start; ++ rtdev->base_addr = pio_start; ++ tp->mmio_addr = ioaddr; ++#else ++ /* ioremap MMIO region */ ++ ioaddr = ioremap (mmio_start, mmio_len); ++ if (ioaddr == NULL) { ++ rtdm_printk(KERN_ERR PFX "%s: cannot remap MMIO, aborting\n", pci_name(pdev)); ++ rc = -EIO; ++ goto err_out; ++ } ++ rtdev->base_addr = (long) ioaddr; ++ tp->mmio_addr = ioaddr; ++#endif /* USE_IO_OPS */ ++ ++ /* Bring old chips out of low-power mode. */ ++ RTL_W8 (HltClk, 'R'); ++ ++ /* check for missing/broken hardware */ ++ if (RTL_R32 (TxConfig) == 0xFFFFFFFF) { ++ rtdm_printk(KERN_ERR PFX "%s: Chip not responding, ignoring board\n", pci_name(pdev)); ++ rc = -EIO; ++ goto err_out; ++ } ++ ++ /* identify chip attached to board */ ++ tmp = RTL_R8 (ChipVersion); ++ for (i = 0; i < ARRAY_SIZE (rtl_chip_info); i++) ++ if (tmp == rtl_chip_info[i].version) { ++ tp->chipset = i; ++ goto match; ++ } ++ ++ rtdm_printk("rt8139too: unknown chip version, assuming RTL-8139\n"); ++ rtdm_printk("rt8139too: TxConfig = 0x%08x\n", RTL_R32 (TxConfig)); ++ ++ tp->chipset = 0; ++ ++match: ++ if (tp->chipset >= CH_8139B) { ++ u8 new_tmp8 = tmp8 = RTL_R8 (Config1); ++ if ((rtl_chip_info[tp->chipset].flags & HasLWake) && ++ (tmp8 & LWAKE)) ++ new_tmp8 &= ~LWAKE; ++ new_tmp8 |= Cfg1_PM_Enable; ++ if (new_tmp8 != tmp8) { ++ RTL_W8 (Cfg9346, Cfg9346_Unlock); ++ RTL_W8 (Config1, tmp8); ++ RTL_W8 (Cfg9346, Cfg9346_Lock); ++ } ++ if (rtl_chip_info[tp->chipset].flags & HasLWake) { ++ tmp8 = RTL_R8 (Config4); ++ if (tmp8 & LWPTN) { ++ RTL_W8 (Cfg9346, Cfg9346_Unlock); ++ RTL_W8 (Config4, tmp8 & ~LWPTN); ++ RTL_W8 (Cfg9346, Cfg9346_Lock); ++ } ++ } ++ } else { ++ tmp8 = RTL_R8 (Config1); ++ tmp8 &= ~(SLEEP | PWRDN); ++ RTL_W8 (Config1, tmp8); ++ } ++ ++ rtl8139_chip_reset (ioaddr); ++ ++ *dev_out = rtdev; ++ return 0; ++ ++err_out: ++#ifndef USE_IO_OPS ++ if (tp->mmio_addr) iounmap (tp->mmio_addr); ++#endif /* !USE_IO_OPS */ ++ /* it's ok to call this even if we have no regions to free */ ++ pci_release_regions (pdev); ++ rtdev_free(rtdev); ++ pci_set_drvdata (pdev, NULL); ++ ++ return rc; ++} ++ ++ ++ ++ ++static int rtl8139_init_one (struct pci_dev *pdev, ++ const struct pci_device_id *ent) ++{ ++ struct rtnet_device *rtdev = NULL; ++ struct rtl8139_private *tp; ++ int i, addr_len; ++ int option; ++ void *ioaddr; ++ static int board_idx = -1; ++ ++ board_idx++; ++ ++ if( cards[board_idx] == 0) ++ return -ENODEV; ++ ++ /* when we're built into the kernel, the driver version message ++ * is only printed if at least one 8139 board has been found ++ */ ++#ifndef MODULE ++ { ++ static int printed_version; ++ if (!printed_version++) ++ rtdm_printk (KERN_INFO RTL8139_DRIVER_NAME "\n"); ++ } ++#endif ++ ++ if ((i=rtl8139_init_board (pdev, &rtdev)) < 0) ++ return i; ++ ++ ++ tp = rtdev->priv; ++ ioaddr = tp->mmio_addr; ++ ++ addr_len = read_eeprom (ioaddr, 0, 8) == 0x8129 ? 8 : 6; ++ for (i = 0; i < 3; i++) ++ ((u16 *) (rtdev->dev_addr))[i] = ++ le16_to_cpu (read_eeprom (ioaddr, i + 7, addr_len)); ++ ++ /* The Rtl8139-specific entries in the device structure. */ ++ rtdev->open = rtl8139_open; ++ rtdev->stop = rtl8139_close; ++ rtdev->hard_header = &rt_eth_header; ++ rtdev->hard_start_xmit = rtl8139_start_xmit; ++ rtdev->do_ioctl = rtl8139_ioctl; ++ rtdev->get_stats = rtl8139_get_stats; ++ ++ /*rtdev->set_multicast_list = rtl8139_set_rx_mode; */ ++ rtdev->features |= NETIF_F_SG|NETIF_F_HW_CSUM; ++ ++ rtdev->irq = pdev->irq; ++ ++ /* rtdev->priv/tp zeroed and aligned in init_etherdev */ ++ tp = rtdev->priv; ++ ++ /* note: tp->chipset set in rtl8139_init_board */ ++ tp->drv_flags = board_info[ent->driver_data].hw_flags; ++ tp->mmio_addr = ioaddr; ++ rtdm_lock_init (&tp->lock); ++ ++ if ( (i=rt_register_rtnetdev(rtdev)) ) ++ goto err_out; ++ ++ pci_set_drvdata (pdev, rtdev); ++ ++ tp->phys[0] = 32; ++ ++ /* The lower four bits are the media type. */ ++ option = (board_idx >= MAX_UNITS) ? 0 : media[board_idx]; ++ if (option > 0) { ++ tp->mii.full_duplex = (option & 0x210) ? 1 : 0; ++ tp->default_port = option & 0xFF; ++ if (tp->default_port) ++ tp->medialock = 1; ++ } ++ if (tp->default_port) { ++ rtdm_printk(KERN_INFO " Forcing %dMbps %s-duplex operation.\n", ++ (option & 0x20 ? 100 : 10), ++ (option & 0x10 ? "full" : "half")); ++ mdio_write(rtdev, tp->phys[0], 0, ++ ((option & 0x20) ? 0x2000 : 0) | /* 100Mbps? */ ++ ((option & 0x10) ? 0x0100 : 0)); /* Full duplex? */ ++ } ++ ++ ++ /* Put the chip into low-power mode. */ ++ if (rtl_chip_info[tp->chipset].flags & HasHltClk) ++ RTL_W8 (HltClk, 'H'); /* 'R' would leave the clock running. */ ++ ++ return 0; ++ ++ ++err_out: ++#ifndef USE_IO_OPS ++ if (tp->mmio_addr) iounmap (tp->mmio_addr); ++#endif /* !USE_IO_OPS */ ++ /* it's ok to call this even if we have no regions to free */ ++ pci_release_regions (pdev); ++ rtdev_free(rtdev); ++ pci_set_drvdata (pdev, NULL); ++ ++ return i; ++} ++ ++ ++static void rtl8139_remove_one (struct pci_dev *pdev) ++{ ++ struct rtnet_device *rtdev = pci_get_drvdata(pdev); ++ ++#ifndef USE_IO_OPS ++ struct rtl8139_private *tp = rtdev->priv; ++ ++ if (tp->mmio_addr) ++ iounmap (tp->mmio_addr); ++#endif /* !USE_IO_OPS */ ++ ++ /* it's ok to call this even if we have no regions to free */ ++ rt_unregister_rtnetdev(rtdev); ++ rt_rtdev_disconnect(rtdev); ++ ++ pci_release_regions(pdev); ++ pci_set_drvdata(pdev, NULL); ++ ++ rtdev_free(rtdev); ++} ++ ++ ++/* Serial EEPROM section. */ ++ ++/* EEPROM_Ctrl bits. */ ++#define EE_SHIFT_CLK 0x04 /* EEPROM shift clock. */ ++#define EE_CS 0x08 /* EEPROM chip select. */ ++#define EE_DATA_WRITE 0x02 /* EEPROM chip data in. */ ++#define EE_WRITE_0 0x00 ++#define EE_WRITE_1 0x02 ++#define EE_DATA_READ 0x01 /* EEPROM chip data out. */ ++#define EE_ENB (0x80 | EE_CS) ++ ++/* Delay between EEPROM clock transitions. ++ No extra delay is needed with 33Mhz PCI, but 66Mhz may change this. ++ */ ++ ++#define eeprom_delay() readl(ee_addr) ++ ++/* The EEPROM commands include the alway-set leading bit. */ ++#define EE_WRITE_CMD (5) ++#define EE_READ_CMD (6) ++#define EE_ERASE_CMD (7) ++ ++static int read_eeprom (void *ioaddr, int location, int addr_len) ++{ ++ int i; ++ unsigned retval = 0; ++ void *ee_addr = ioaddr + Cfg9346; ++ int read_cmd = location | (EE_READ_CMD << addr_len); ++ ++ writeb (EE_ENB & ~EE_CS, ee_addr); ++ writeb (EE_ENB, ee_addr); ++ eeprom_delay (); ++ ++ /* Shift the read command bits out. */ ++ for (i = 4 + addr_len; i >= 0; i--) { ++ int dataval = (read_cmd & (1 << i)) ? EE_DATA_WRITE : 0; ++ writeb (EE_ENB | dataval, ee_addr); ++ eeprom_delay (); ++ writeb (EE_ENB | dataval | EE_SHIFT_CLK, ee_addr); ++ eeprom_delay (); ++ } ++ writeb (EE_ENB, ee_addr); ++ eeprom_delay (); ++ ++ for (i = 16; i > 0; i--) { ++ writeb (EE_ENB | EE_SHIFT_CLK, ee_addr); ++ eeprom_delay (); ++ retval = ++ (retval << 1) | ((readb (ee_addr) & EE_DATA_READ) ? 1 : ++ 0); ++ writeb (EE_ENB, ee_addr); ++ eeprom_delay (); ++ } ++ ++ /* Terminate the EEPROM access. */ ++ writeb (~EE_CS, ee_addr); ++ eeprom_delay (); ++ ++ return retval; ++} ++ ++/* MII serial management: mostly bogus for now. */ ++/* Read and write the MII management registers using software-generated ++ serial MDIO protocol. ++ The maximum data clock rate is 2.5 Mhz. The minimum timing is usually ++ met by back-to-back PCI I/O cycles, but we insert a delay to avoid ++ "overclocking" issues. */ ++#define MDIO_DIR 0x80 ++#define MDIO_DATA_OUT 0x04 ++#define MDIO_DATA_IN 0x02 ++#define MDIO_CLK 0x01 ++#define MDIO_WRITE0 (MDIO_DIR) ++#define MDIO_WRITE1 (MDIO_DIR | MDIO_DATA_OUT) ++ ++#define mdio_delay(mdio_addr) readb(mdio_addr) ++ ++ ++ ++static char mii_2_8139_map[8] = { ++ BasicModeCtrl, ++ BasicModeStatus, ++ 0, ++ 0, ++ NWayAdvert, ++ NWayLPAR, ++ NWayExpansion, ++ 0 ++}; ++ ++#ifdef CONFIG_8139TOO_8129 ++/* Syncronize the MII management interface by shifting 32 one bits out. */ ++static void mdio_sync (void *mdio_addr) ++{ ++ int i; ++ ++ for (i = 32; i >= 0; i--) { ++ writeb (MDIO_WRITE1, mdio_addr); ++ mdio_delay (mdio_addr); ++ writeb (MDIO_WRITE1 | MDIO_CLK, mdio_addr); ++ mdio_delay (mdio_addr); ++ } ++} ++#endif ++ ++ ++static int mdio_read (struct rtnet_device *rtdev, int phy_id, int location) ++{ ++ struct rtl8139_private *tp = rtdev->priv; ++ int retval = 0; ++#ifdef CONFIG_8139TOO_8129 ++ void *mdio_addr = tp->mmio_addr + Config4; ++ int mii_cmd = (0xf6 << 10) | (phy_id << 5) | location; ++ int i; ++#endif ++ ++ if (phy_id > 31) { /* Really a 8139. Use internal registers. */ ++ return location < 8 && mii_2_8139_map[location] ? ++ readw (tp->mmio_addr + mii_2_8139_map[location]) : 0; ++ } ++ ++#ifdef CONFIG_8139TOO_8129 ++ mdio_sync (mdio_addr); ++ /* Shift the read command bits out. */ ++ for (i = 15; i >= 0; i--) { ++ int dataval = (mii_cmd & (1 << i)) ? MDIO_DATA_OUT : 0; ++ ++ writeb (MDIO_DIR | dataval, mdio_addr); ++ mdio_delay (mdio_addr); ++ writeb (MDIO_DIR | dataval | MDIO_CLK, mdio_addr); ++ mdio_delay (mdio_addr); ++ } ++ ++ /* Read the two transition, 16 data, and wire-idle bits. */ ++ for (i = 19; i > 0; i--) { ++ writeb (0, mdio_addr); ++ mdio_delay (mdio_addr); ++ retval = (retval << 1) | ((readb (mdio_addr) & MDIO_DATA_IN) ? 1 : 0); ++ writeb (MDIO_CLK, mdio_addr); ++ mdio_delay (mdio_addr); ++ } ++#endif ++ ++ return (retval >> 1) & 0xffff; ++} ++ ++ ++static void mdio_write (struct rtnet_device *rtdev, int phy_id, int location, ++ int value) ++{ ++ struct rtl8139_private *tp = rtdev->priv; ++#ifdef CONFIG_8139TOO_8129 ++ void *mdio_addr = tp->mmio_addr + Config4; ++ int mii_cmd = (0x5002 << 16) | (phy_id << 23) | (location << 18) | value; ++ int i; ++#endif ++ ++ if (phy_id > 31) { /* Really a 8139. Use internal registers. */ ++ void *ioaddr = tp->mmio_addr; ++ if (location == 0) { ++ RTL_W8 (Cfg9346, Cfg9346_Unlock); ++ RTL_W16 (BasicModeCtrl, value); ++ RTL_W8 (Cfg9346, Cfg9346_Lock); ++ } else if (location < 8 && mii_2_8139_map[location]) ++ RTL_W16 (mii_2_8139_map[location], value); ++ return; ++ } ++ ++#ifdef CONFIG_8139TOO_8129 ++ mdio_sync (mdio_addr); ++ ++ /* Shift the command bits out. */ ++ for (i = 31; i >= 0; i--) { ++ int dataval = ++ (mii_cmd & (1 << i)) ? MDIO_WRITE1 : MDIO_WRITE0; ++ writeb (dataval, mdio_addr); ++ mdio_delay (mdio_addr); ++ writeb (dataval | MDIO_CLK, mdio_addr); ++ mdio_delay (mdio_addr); ++ } ++ /* Clear out extra bits. */ ++ for (i = 2; i > 0; i--) { ++ writeb (0, mdio_addr); ++ mdio_delay (mdio_addr); ++ writeb (MDIO_CLK, mdio_addr); ++ mdio_delay (mdio_addr); ++ } ++#endif ++} ++ ++static int rtl8139_open (struct rtnet_device *rtdev) ++{ ++ struct rtl8139_private *tp = rtdev->priv; ++ int retval; ++ ++ rt_stack_connect(rtdev, &STACK_manager); ++ ++ retval = rtdm_irq_request(&tp->irq_handle, rtdev->irq, ++ rtl8139_interrupt, RTDM_IRQTYPE_SHARED, ++ rtdev->name, rtdev); ++ if (retval) ++ return retval; ++ ++ tp->tx_bufs = pci_alloc_consistent(tp->pci_dev, TX_BUF_TOT_LEN, &tp->tx_bufs_dma); ++ tp->rx_ring = pci_alloc_consistent(tp->pci_dev, RX_BUF_TOT_LEN, &tp->rx_ring_dma); ++ ++ if (tp->tx_bufs == NULL || tp->rx_ring == NULL) { ++ rtdm_irq_free(&tp->irq_handle); ++ if (tp->tx_bufs) ++ pci_free_consistent(tp->pci_dev, TX_BUF_TOT_LEN, tp->tx_bufs, tp->tx_bufs_dma); ++ if (tp->rx_ring) ++ pci_free_consistent(tp->pci_dev, RX_BUF_TOT_LEN, tp->rx_ring, tp->rx_ring_dma); ++ ++ return -ENOMEM; ++ } ++ /* FIXME: create wrapper for duplex_lock vs. force_media ++ tp->mii.full_duplex = tp->mii.duplex_lock; */ ++ tp->tx_flag = (TX_FIFO_THRESH << 11) & 0x003f0000; ++ tp->twistie = 1; ++ tp->time_to_die = 0; ++ ++ rtl8139_init_ring (rtdev); ++ rtl8139_hw_start (rtdev); ++ ++ return 0; ++} ++ ++ ++static void rtl_check_media (struct rtnet_device *rtdev) ++{ ++ struct rtl8139_private *tp = rtdev->priv; ++ u16 mii_lpa; ++ ++ if (tp->phys[0] < 0) ++ return; ++ ++ mii_lpa = mdio_read(rtdev, tp->phys[0], MII_LPA); ++ if (mii_lpa == 0xffff) ++ return; ++ ++ tp->mii.full_duplex = (mii_lpa & LPA_100FULL) == LPA_100FULL || ++ (mii_lpa & 0x00C0) == LPA_10FULL; ++} ++ ++ ++/* Start the hardware at open or resume. */ ++static void rtl8139_hw_start (struct rtnet_device *rtdev) ++{ ++ struct rtl8139_private *tp = rtdev->priv; ++ void *ioaddr = tp->mmio_addr; ++ u32 i; ++ u8 tmp; ++ ++ /* Bring old chips out of low-power mode. */ ++ if (rtl_chip_info[tp->chipset].flags & HasHltClk) ++ RTL_W8 (HltClk, 'R'); ++ ++ rtl8139_chip_reset(ioaddr); ++ ++ /* unlock Config[01234] and BMCR register writes */ ++ RTL_W8_F (Cfg9346, Cfg9346_Unlock); ++ /* Restore our idea of the MAC address. */ ++ RTL_W32_F (MAC0 + 0, cpu_to_le32 (*(u32 *) (rtdev->dev_addr + 0))); ++ RTL_W32_F (MAC0 + 4, cpu_to_le32 (*(u32 *) (rtdev->dev_addr + 4))); ++ ++ tp->cur_rx = 0; ++ ++ /* init Rx ring buffer DMA address */ ++ RTL_W32_F (RxBuf, tp->rx_ring_dma); ++ ++ /* Must enable Tx/Rx before setting transfer thresholds! */ ++ RTL_W8 (ChipCmd, CmdRxEnb | CmdTxEnb); ++ ++ tp->rx_config = rtl8139_rx_config | AcceptBroadcast | AcceptMyPhys; ++ RTL_W32 (RxConfig, tp->rx_config); ++ ++ /* Check this value: the documentation for IFG contradicts ifself. */ ++ RTL_W32 (TxConfig, rtl8139_tx_config); ++ ++ rtl_check_media (rtdev); ++ ++ if (tp->chipset >= CH_8139B) { ++ /* Disable magic packet scanning, which is enabled ++ * when PM is enabled in Config1. It can be reenabled ++ * via ETHTOOL_SWOL if desired. */ ++ RTL_W8 (Config3, RTL_R8 (Config3) & ~Cfg3_Magic); ++ } ++ ++ /* Lock Config[01234] and BMCR register writes */ ++ RTL_W8 (Cfg9346, Cfg9346_Lock); ++ ++ /* init Tx buffer DMA addresses */ ++ for (i = 0; i < NUM_TX_DESC; i++) ++ RTL_W32_F (TxAddr0 + (i * 4), tp->tx_bufs_dma + (tp->tx_buf[i] - tp->tx_bufs)); ++ ++ RTL_W32 (RxMissed, 0); ++ ++ rtl8139_set_rx_mode (rtdev); ++ ++ /* no early-rx interrupts */ ++ RTL_W16 (MultiIntr, RTL_R16 (MultiIntr) & MultiIntrClear); ++ ++ /* make sure RxTx has started */ ++ tmp = RTL_R8 (ChipCmd); ++ if ((!(tmp & CmdRxEnb)) || (!(tmp & CmdTxEnb))) ++ RTL_W8 (ChipCmd, CmdRxEnb | CmdTxEnb); ++ ++ /* Enable all known interrupts by setting the interrupt mask. */ ++ RTL_W16 (IntrMask, rtl8139_intr_mask); ++ ++ rtnetif_start_queue (rtdev); ++} ++ ++ ++/* Initialize the Rx and Tx rings, along with various 'dev' bits. */ ++static void rtl8139_init_ring (struct rtnet_device *rtdev) ++{ ++ struct rtl8139_private *tp = rtdev->priv; ++ int i; ++ ++ tp->cur_rx = 0; ++ tp->cur_tx = 0; ++ tp->dirty_tx = 0; ++ ++ for (i = 0; i < NUM_TX_DESC; i++) ++ tp->tx_buf[i] = &tp->tx_bufs[i * TX_BUF_SIZE]; ++} ++ ++ ++static void rtl8139_tx_clear (struct rtl8139_private *tp) ++{ ++ tp->cur_tx = 0; ++ tp->dirty_tx = 0; ++ ++ /* XXX account for unsent Tx packets in tp->stats.tx_dropped */ ++} ++ ++ ++ ++static int rtl8139_start_xmit (struct rtskb *skb, struct rtnet_device *rtdev) ++{ ++ struct rtl8139_private *tp = rtdev->priv; ++ ++ void *ioaddr = tp->mmio_addr; ++ unsigned int entry; ++ unsigned int len = skb->len; ++ rtdm_lockctx_t context; ++ ++ /* Calculate the next Tx descriptor entry. */ ++ entry = tp->cur_tx % NUM_TX_DESC; ++ ++ if (likely(len < TX_BUF_SIZE)) { ++ if (unlikely(skb->xmit_stamp != NULL)) { ++ rtdm_lock_irqsave(context); ++ *skb->xmit_stamp = cpu_to_be64(rtdm_clock_read() + ++ *skb->xmit_stamp); ++ /* typically, we are only copying a few bytes here */ ++ rtskb_copy_and_csum_dev(skb, tp->tx_buf[entry]); ++ } else { ++ /* copy larger packets outside the lock */ ++ rtskb_copy_and_csum_dev(skb, tp->tx_buf[entry]); ++ rtdm_lock_irqsave(context); ++ } ++ } else { ++ dev_kfree_rtskb(skb); ++ tp->stats.tx_dropped++; ++ return 0; ++ } ++ ++ ++ /* Note: the chip doesn't have auto-pad! */ ++ rtdm_lock_get(&tp->lock); ++ RTL_W32_F (TxStatus0 + (entry * sizeof (u32)), tp->tx_flag | max(len, (unsigned int)ETH_ZLEN)); ++ tp->cur_tx++; ++ wmb(); ++ if ((tp->cur_tx - NUM_TX_DESC) == tp->dirty_tx) ++ rtnetif_stop_queue (rtdev); ++ rtdm_lock_put_irqrestore(&tp->lock, context); ++ ++ dev_kfree_rtskb(skb); ++ ++#ifdef DEBUG ++ rtdm_printk ("%s: Queued Tx packet size %u to slot %d.\n", rtdev->name, len, entry); ++#endif ++ return 0; ++} ++ ++static int rtl8139_ioctl(struct rtnet_device *rtdev, struct ifreq *ifr, int cmd) ++{ ++ struct rtl8139_private *tp = rtdev->priv; ++ void *ioaddr = tp->mmio_addr; ++ int nReturn = 0; ++ struct ethtool_value *value; ++ ++ switch (cmd) { ++ case SIOCETHTOOL: ++ /* TODO: user-safe parameter access, most probably one layer higher */ ++ value = (struct ethtool_value *)ifr->ifr_data; ++ if (value->cmd == ETHTOOL_GLINK) ++ { ++ if (RTL_R16(CSCR) & CSCR_LinkOKBit) ++ value->data = 1; ++ else ++ value->data = 0; ++ } ++ break; ++ ++ default: ++ nReturn = -EOPNOTSUPP; ++ break; ++ } ++ return nReturn; ++} ++ ++static struct net_device_stats *rtl8139_get_stats(struct rtnet_device*rtdev) ++{ ++ struct rtl8139_private *tp = rtdev->priv; ++ return &tp->stats; ++} ++ ++static void rtl8139_tx_interrupt (struct rtnet_device *rtdev, ++ struct rtl8139_private *tp, ++ void *ioaddr) ++{ ++ unsigned long dirty_tx, tx_left; ++ ++ dirty_tx = tp->dirty_tx; ++ tx_left = tp->cur_tx - dirty_tx; ++ ++ while (tx_left > 0) { ++ int entry = dirty_tx % NUM_TX_DESC; ++ int txstatus; ++ ++ txstatus = RTL_R32 (TxStatus0 + (entry * sizeof (u32))); ++ ++ if (!(txstatus & (TxStatOK | TxUnderrun | TxAborted))) ++ break; /* It still hasn't been Txed */ ++ ++ /* Note: TxCarrierLost is always asserted at 100mbps. */ ++ if (txstatus & (TxOutOfWindow | TxAborted)) { ++ /* There was an major error, log it. */ ++ rtdm_printk("%s: Transmit error, Tx status %8.8x.\n", ++ rtdev->name, txstatus); ++ tp->stats.tx_errors++; ++ if (txstatus & TxAborted) { ++ tp->stats.tx_aborted_errors++; ++ RTL_W32 (TxConfig, TxClearAbt); ++ RTL_W16 (IntrStatus, TxErr); ++ wmb(); ++ } ++ if (txstatus & TxCarrierLost) ++ tp->stats.tx_carrier_errors++; ++ if (txstatus & TxOutOfWindow) ++ tp->stats.tx_window_errors++; ++#ifdef ETHER_STATS ++ if ((txstatus & 0x0f000000) == 0x0f000000) ++ tp->stats.collisions16++; ++#endif ++ } else { ++ if (txstatus & TxUnderrun) { ++ /* Add 64 to the Tx FIFO threshold. */ ++ if (tp->tx_flag < 0x00300000) ++ tp->tx_flag += 0x00020000; ++ tp->stats.tx_fifo_errors++; ++ } ++ tp->stats.collisions += (txstatus >> 24) & 15; ++ tp->stats.tx_bytes += txstatus & 0x7ff; ++ tp->stats.tx_packets++; ++ } ++ ++ dirty_tx++; ++ tx_left--; ++ } ++ ++ /* only wake the queue if we did work, and the queue is stopped */ ++ if (tp->dirty_tx != dirty_tx) { ++ tp->dirty_tx = dirty_tx; ++ mb(); ++ if (rtnetif_queue_stopped (rtdev)) ++ rtnetif_wake_queue (rtdev); ++ } ++} ++ ++ ++/* TODO: clean this up! Rx reset need not be this intensive */ ++static void rtl8139_rx_err ++(u32 rx_status, struct rtnet_device *rtdev, struct rtl8139_private *tp, void *ioaddr) ++{ ++/* u8 tmp8; ++#ifndef CONFIG_8139_NEW_RX_RESET ++ int tmp_work; ++#endif */ ++ ++ /* RTnet-TODO: We really need an error manager to handle such issues... */ ++ rtdm_printk("%s: FATAL - Ethernet frame had errors, status %8.8x.\n", ++ rtdev->name, rx_status); ++} ++ ++ ++static void rtl8139_rx_interrupt (struct rtnet_device *rtdev, ++ struct rtl8139_private *tp, void *ioaddr, ++ nanosecs_abs_t *time_stamp) ++{ ++ unsigned char *rx_ring; ++ u16 cur_rx; ++ ++ rx_ring = tp->rx_ring; ++ cur_rx = tp->cur_rx; ++ ++ while ((RTL_R8 (ChipCmd) & RxBufEmpty) == 0) { ++ int ring_offset = cur_rx % RX_BUF_LEN; ++ u32 rx_status; ++ unsigned int rx_size; ++ unsigned int pkt_size; ++ struct rtskb *skb; ++ ++ rmb(); ++ ++ /* read size+status of next frame from DMA ring buffer */ ++ rx_status = le32_to_cpu (*(u32 *) (rx_ring + ring_offset)); ++ rx_size = rx_status >> 16; ++ pkt_size = rx_size - 4; ++ ++ /* Packet copy from FIFO still in progress. ++ * Theoretically, this should never happen ++ * since EarlyRx is disabled. ++ */ ++ if (rx_size == 0xfff0) { ++ tp->xstats.early_rx++; ++ break; ++ } ++ ++ /* If Rx err or invalid rx_size/rx_status received ++ * (which happens if we get lost in the ring), ++ * Rx process gets reset, so we abort any further ++ * Rx processing. ++ */ ++ if ((rx_size > (MAX_ETH_FRAME_SIZE+4)) || ++ (rx_size < 8) || ++ (!(rx_status & RxStatusOK))) { ++ rtl8139_rx_err (rx_status, rtdev, tp, ioaddr); ++ return; ++ } ++ ++ /* Malloc up new buffer, compatible with net-2e. */ ++ /* Omit the four octet CRC from the length. */ ++ ++ /* TODO: consider allocating skb's outside of ++ * interrupt context, both to speed interrupt processing, ++ * and also to reduce the chances of having to ++ * drop packets here under memory pressure. ++ */ ++ ++ skb = rtnetdev_alloc_rtskb(rtdev, pkt_size + 2); ++ if (skb) { ++ skb->time_stamp = *time_stamp; ++ rtskb_reserve (skb, 2); /* 16 byte align the IP fields. */ ++ ++ ++ /* eth_copy_and_sum (skb, &rx_ring[ring_offset + 4], pkt_size, 0); */ ++ memcpy (skb->data, &rx_ring[ring_offset + 4], pkt_size); ++ rtskb_put (skb, pkt_size); ++ skb->protocol = rt_eth_type_trans (skb, rtdev); ++ rtnetif_rx (skb); ++ tp->stats.rx_bytes += pkt_size; ++ tp->stats.rx_packets++; ++ } else { ++ rtdm_printk (KERN_WARNING"%s: Memory squeeze, dropping packet.\n", rtdev->name); ++ tp->stats.rx_dropped++; ++ } ++ ++ cur_rx = (cur_rx + rx_size + 4 + 3) & ~3; ++ RTL_W16 (RxBufPtr, cur_rx - 16); ++ ++ if (RTL_R16 (IntrStatus) & RxAckBits) ++ RTL_W16_F (IntrStatus, RxAckBits); ++ } ++ ++ tp->cur_rx = cur_rx; ++} ++ ++ ++static void rtl8139_weird_interrupt (struct rtnet_device *rtdev, ++ struct rtl8139_private *tp, ++ void *ioaddr, ++ int status, int link_changed) ++{ ++ rtdm_printk ("%s: Abnormal interrupt, status %8.8x.\n", ++ rtdev->name, status); ++ ++ /* Update the error count. */ ++ tp->stats.rx_missed_errors += RTL_R32 (RxMissed); ++ RTL_W32 (RxMissed, 0); ++ ++ if ((status & RxUnderrun) && link_changed && (tp->drv_flags & HAS_LNK_CHNG)) { ++ /* Really link-change on new chips. */ ++ status &= ~RxUnderrun; ++ } ++ ++ /* XXX along with rtl8139_rx_err, are we double-counting errors? */ ++ if (status & ++ (RxUnderrun | RxOverflow | RxErr | RxFIFOOver)) ++ tp->stats.rx_errors++; ++ ++ if (status & PCSTimeout) ++ tp->stats.rx_length_errors++; ++ ++ if (status & (RxUnderrun | RxFIFOOver)) ++ tp->stats.rx_fifo_errors++; ++ ++ if (status & PCIErr) { ++ u16 pci_cmd_status; ++ pci_read_config_word (tp->pci_dev, PCI_STATUS, &pci_cmd_status); ++ pci_write_config_word (tp->pci_dev, PCI_STATUS, pci_cmd_status); ++ ++ rtdm_printk (KERN_ERR "%s: PCI Bus error %4.4x.\n", rtdev->name, pci_cmd_status); ++ } ++} ++ ++/* The interrupt handler does all of the Rx thread work and cleans up ++ after the Tx thread. */ ++static int rtl8139_interrupt(rtdm_irq_t *irq_handle) ++{ ++ nanosecs_abs_t time_stamp = rtdm_clock_read(); ++ struct rtnet_device *rtdev = rtdm_irq_get_arg(irq_handle, struct rtnet_device); ++ struct rtl8139_private *tp = rtdev->priv; ++ void *ioaddr = tp->mmio_addr; ++ int ackstat; ++ int status; ++ int link_changed = 0; /* avoid bogus "uninit" warning */ ++ int saved_status = 0; ++ int ret = RTDM_IRQ_NONE; ++ ++ rtdm_lock_get(&tp->lock); ++ ++ status = RTL_R16(IntrStatus); ++ ++ /* h/w no longer present (hotplug?) or major error, bail */ ++ if (unlikely(status == 0xFFFF) || unlikely(!(status & rtl8139_intr_mask))) ++ goto out; ++ ++ ret = RTDM_IRQ_HANDLED; ++ ++ /* close possible race with dev_close */ ++ if (unlikely(!rtnetif_running(rtdev))) { ++ RTL_W16(IntrMask, 0); ++ goto out; ++ } ++ ++ /* Acknowledge all of the current interrupt sources ASAP, but ++ first get an additional status bit from CSCR. */ ++ if (unlikely(status & RxUnderrun)) ++ link_changed = RTL_R16(CSCR) & CSCR_LinkChangeBit; ++ ++ /* The chip takes special action when we clear RxAckBits, ++ * so we clear them later in rtl8139_rx_interrupt ++ */ ++ ackstat = status & ~(RxAckBits | TxErr); ++ if (ackstat) ++ RTL_W16(IntrStatus, ackstat); ++ ++ if (status & RxAckBits) { ++ saved_status |= RxAckBits; ++ rtl8139_rx_interrupt(rtdev, tp, ioaddr, &time_stamp); ++ } ++ ++ /* Check uncommon events with one test. */ ++ if (unlikely(status & (PCIErr | PCSTimeout | RxUnderrun | RxErr))) ++ rtl8139_weird_interrupt(rtdev, tp, ioaddr, status, link_changed); ++ ++ if (status & (TxOK |TxErr)) { ++ rtl8139_tx_interrupt(rtdev, tp, ioaddr); ++ if (status & TxErr) { ++ RTL_W16(IntrStatus, TxErr); ++ saved_status |= TxErr; ++ } ++ } ++ out: ++ rtdm_lock_put(&tp->lock); ++ ++ if (saved_status & RxAckBits) ++ rt_mark_stack_mgr(rtdev); ++ ++ if (saved_status & TxErr) ++ rtnetif_err_tx(rtdev); ++ ++ return ret; ++} ++ ++ ++static int rtl8139_close (struct rtnet_device *rtdev) ++{ ++ struct rtl8139_private *tp = rtdev->priv; ++ void *ioaddr = tp->mmio_addr; ++ rtdm_lockctx_t context; ++ ++ printk ("%s: Shutting down ethercard, status was 0x%4.4x.\n", rtdev->name, RTL_R16 (IntrStatus)); ++ ++ rtnetif_stop_queue (rtdev); ++ ++ rtdm_lock_get_irqsave (&tp->lock, context); ++ /* Stop the chip's Tx and Rx DMA processes. */ ++ RTL_W8 (ChipCmd, 0); ++ /* Disable interrupts by clearing the interrupt mask. */ ++ RTL_W16 (IntrMask, 0); ++ /* Update the error counts. */ ++ tp->stats.rx_missed_errors += RTL_R32 (RxMissed); ++ RTL_W32 (RxMissed, 0); ++ rtdm_lock_put_irqrestore (&tp->lock, context); ++ ++ rtdm_irq_free(&tp->irq_handle); ++ ++ rt_stack_disconnect(rtdev); ++ ++ rtl8139_tx_clear (tp); ++ ++ pci_free_consistent(tp->pci_dev, RX_BUF_TOT_LEN, tp->rx_ring, tp->rx_ring_dma); ++ pci_free_consistent(tp->pci_dev, TX_BUF_TOT_LEN, tp->tx_bufs, tp->tx_bufs_dma); ++ tp->rx_ring = NULL; ++ tp->tx_bufs = NULL; ++ ++ /* Green! Put the chip in low-power mode. */ ++ RTL_W8 (Cfg9346, Cfg9346_Unlock); ++ ++ if (rtl_chip_info[tp->chipset].flags & HasHltClk) ++ RTL_W8 (HltClk, 'H'); /* 'R' would leave the clock running. */ ++ ++ return 0; ++} ++ ++ ++ ++/* Set or clear the multicast filter for this adaptor. ++ This routine is not state sensitive and need not be SMP locked. */ ++static void __set_rx_mode (struct rtnet_device *rtdev) ++{ ++ struct rtl8139_private *tp = rtdev->priv; ++ void *ioaddr = tp->mmio_addr; ++ u32 mc_filter[2]; /* Multicast hash filter */ ++ int rx_mode; ++ u32 tmp; ++ ++#ifdef DEBUG ++ rtdm_printk ("%s: rtl8139_set_rx_mode(%4.4x) done -- Rx config %8.8lx.\n", ++ rtdev->name, rtdev->flags, RTL_R32 (RxConfig)); ++#endif ++ ++ /* Note: do not reorder, GCC is clever about common statements. */ ++ if (rtdev->flags & IFF_PROMISC) { ++ /* Unconditionally log net taps. */ ++ /*printk (KERN_NOTICE "%s: Promiscuous mode enabled.\n", rtdev->name);*/ ++ rx_mode = AcceptBroadcast | AcceptMulticast | AcceptMyPhys | AcceptAllPhys; ++ mc_filter[1] = mc_filter[0] = 0xffffffff; ++ } else if (rtdev->flags & IFF_ALLMULTI) { ++ /* Too many to filter perfectly -- accept all multicasts. */ ++ rx_mode = AcceptBroadcast | AcceptMulticast | AcceptMyPhys; ++ mc_filter[1] = mc_filter[0] = 0xffffffff; ++ } else { ++ rx_mode = AcceptBroadcast | AcceptMyPhys; ++ mc_filter[1] = mc_filter[0] = 0; ++ } ++ ++ /* We can safely update without stopping the chip. */ ++ tmp = rtl8139_rx_config | rx_mode; ++ if (tp->rx_config != tmp) { ++ RTL_W32_F (RxConfig, tmp); ++ tp->rx_config = tmp; ++ } ++ RTL_W32_F (MAR0 + 0, mc_filter[0]); ++ RTL_W32_F (MAR0 + 4, mc_filter[1]); ++} ++ ++static void rtl8139_set_rx_mode (struct rtnet_device *rtdev) ++{ ++ rtdm_lockctx_t context; ++ struct rtl8139_private *tp = rtdev->priv; ++ ++ rtdm_lock_get_irqsave (&tp->lock, context); ++ __set_rx_mode(rtdev); ++ rtdm_lock_put_irqrestore (&tp->lock, context); ++} ++ ++static struct pci_driver rtl8139_pci_driver = { ++ name: DRV_NAME, ++ id_table: rtl8139_pci_tbl, ++ probe: rtl8139_init_one, ++ remove: rtl8139_remove_one, ++ suspend: NULL, ++ resume: NULL, ++}; ++ ++ ++static int __init rtl8139_init_module (void) ++{ ++ /* when we're a module, we always print a version message, ++ * even if no 8139 board is found. ++ */ ++ ++#ifdef MODULE ++ printk (KERN_INFO RTL8139_DRIVER_NAME "\n"); ++#endif ++ ++ return pci_register_driver (&rtl8139_pci_driver); ++} ++ ++ ++static void __exit rtl8139_cleanup_module (void) ++{ ++ pci_unregister_driver (&rtl8139_pci_driver); ++} ++ ++ ++module_init(rtl8139_init_module); ++module_exit(rtl8139_cleanup_module); +--- linux/drivers/xenomai/net/drivers/igb/e1000_i210.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/igb/e1000_i210.h 2021-04-07 16:01:27.533633729 +0800 +@@ -0,0 +1,93 @@ ++/* Intel(R) Gigabit Ethernet Linux driver ++ * Copyright(c) 2007-2014 Intel Corporation. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms and conditions of the GNU General Public License, ++ * version 2, as published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ * ++ * You should have received a copy of the GNU General Public License along with ++ * this program; if not, see . ++ * ++ * The full GNU General Public License is included in this distribution in ++ * the file called "COPYING". ++ * ++ * Contact Information: ++ * e1000-devel Mailing List ++ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 ++ */ ++ ++#ifndef _E1000_I210_H_ ++#define _E1000_I210_H_ ++ ++s32 igb_acquire_swfw_sync_i210(struct e1000_hw *hw, u16 mask); ++void igb_release_swfw_sync_i210(struct e1000_hw *hw, u16 mask); ++s32 igb_valid_led_default_i210(struct e1000_hw *hw, u16 *data); ++s32 igb_read_invm_version(struct e1000_hw *hw, ++ struct e1000_fw_version *invm_ver); ++s32 igb_read_xmdio_reg(struct e1000_hw *hw, u16 addr, u8 dev_addr, u16 *data); ++s32 igb_write_xmdio_reg(struct e1000_hw *hw, u16 addr, u8 dev_addr, u16 data); ++s32 igb_init_nvm_params_i210(struct e1000_hw *hw); ++bool igb_get_flash_presence_i210(struct e1000_hw *hw); ++s32 igb_pll_workaround_i210(struct e1000_hw *hw); ++ ++#define E1000_STM_OPCODE 0xDB00 ++#define E1000_EEPROM_FLASH_SIZE_WORD 0x11 ++ ++#define INVM_DWORD_TO_RECORD_TYPE(invm_dword) \ ++ (u8)((invm_dword) & 0x7) ++#define INVM_DWORD_TO_WORD_ADDRESS(invm_dword) \ ++ (u8)(((invm_dword) & 0x0000FE00) >> 9) ++#define INVM_DWORD_TO_WORD_DATA(invm_dword) \ ++ (u16)(((invm_dword) & 0xFFFF0000) >> 16) ++ ++enum E1000_INVM_STRUCTURE_TYPE { ++ E1000_INVM_UNINITIALIZED_STRUCTURE = 0x00, ++ E1000_INVM_WORD_AUTOLOAD_STRUCTURE = 0x01, ++ E1000_INVM_CSR_AUTOLOAD_STRUCTURE = 0x02, ++ E1000_INVM_PHY_REGISTER_AUTOLOAD_STRUCTURE = 0x03, ++ E1000_INVM_RSA_KEY_SHA256_STRUCTURE = 0x04, ++ E1000_INVM_INVALIDATED_STRUCTURE = 0x0F, ++}; ++ ++#define E1000_INVM_RSA_KEY_SHA256_DATA_SIZE_IN_DWORDS 8 ++#define E1000_INVM_CSR_AUTOLOAD_DATA_SIZE_IN_DWORDS 1 ++#define E1000_INVM_ULT_BYTES_SIZE 8 ++#define E1000_INVM_RECORD_SIZE_IN_BYTES 4 ++#define E1000_INVM_VER_FIELD_ONE 0x1FF8 ++#define E1000_INVM_VER_FIELD_TWO 0x7FE000 ++#define E1000_INVM_IMGTYPE_FIELD 0x1F800000 ++ ++#define E1000_INVM_MAJOR_MASK 0x3F0 ++#define E1000_INVM_MINOR_MASK 0xF ++#define E1000_INVM_MAJOR_SHIFT 4 ++ ++#define ID_LED_DEFAULT_I210 ((ID_LED_OFF1_ON2 << 8) | \ ++ (ID_LED_DEF1_DEF2 << 4) | \ ++ (ID_LED_OFF1_OFF2)) ++#define ID_LED_DEFAULT_I210_SERDES ((ID_LED_DEF1_DEF2 << 8) | \ ++ (ID_LED_DEF1_DEF2 << 4) | \ ++ (ID_LED_OFF1_ON2)) ++ ++/* NVM offset defaults for i211 device */ ++#define NVM_INIT_CTRL_2_DEFAULT_I211 0X7243 ++#define NVM_INIT_CTRL_4_DEFAULT_I211 0x00C1 ++#define NVM_LED_1_CFG_DEFAULT_I211 0x0184 ++#define NVM_LED_0_2_CFG_DEFAULT_I211 0x200C ++ ++/* PLL Defines */ ++#define E1000_PCI_PMCSR 0x44 ++#define E1000_PCI_PMCSR_D3 0x03 ++#define E1000_MAX_PLL_TRIES 5 ++#define E1000_PHY_PLL_UNCONF 0xFF ++#define E1000_PHY_PLL_FREQ_PAGE 0xFC0000 ++#define E1000_PHY_PLL_FREQ_REG 0x000E ++#define E1000_INVM_DEFAULT_AL 0x202F ++#define E1000_INVM_AUTOLOAD 0x0A ++#define E1000_INVM_PLL_WO_VAL 0x0010 ++ ++#endif +--- linux/drivers/xenomai/net/drivers/igb/e1000_regs.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/igb/e1000_regs.h 2021-04-07 16:01:27.528633736 +0800 +@@ -0,0 +1,427 @@ ++/* Intel(R) Gigabit Ethernet Linux driver ++ * Copyright(c) 2007-2014 Intel Corporation. ++ * RTnet port 2009 Vladimir Zapolskiy ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms and conditions of the GNU General Public License, ++ * version 2, as published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ * ++ * You should have received a copy of the GNU General Public License along with ++ * this program; if not, see . ++ * ++ * The full GNU General Public License is included in this distribution in ++ * the file called "COPYING". ++ * ++ * Contact Information: ++ * e1000-devel Mailing List ++ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 ++ */ ++ ++#ifndef _E1000_REGS_H_ ++#define _E1000_REGS_H_ ++ ++#define E1000_CTRL 0x00000 /* Device Control - RW */ ++#define E1000_STATUS 0x00008 /* Device Status - RO */ ++#define E1000_EECD 0x00010 /* EEPROM/Flash Control - RW */ ++#define E1000_EERD 0x00014 /* EEPROM Read - RW */ ++#define E1000_CTRL_EXT 0x00018 /* Extended Device Control - RW */ ++#define E1000_MDIC 0x00020 /* MDI Control - RW */ ++#define E1000_MDICNFG 0x00E04 /* MDI Config - RW */ ++#define E1000_SCTL 0x00024 /* SerDes Control - RW */ ++#define E1000_FCAL 0x00028 /* Flow Control Address Low - RW */ ++#define E1000_FCAH 0x0002C /* Flow Control Address High -RW */ ++#define E1000_FCT 0x00030 /* Flow Control Type - RW */ ++#define E1000_CONNSW 0x00034 /* Copper/Fiber switch control - RW */ ++#define E1000_VET 0x00038 /* VLAN Ether Type - RW */ ++#define E1000_TSSDP 0x0003C /* Time Sync SDP Configuration Register - RW */ ++#define E1000_ICR 0x000C0 /* Interrupt Cause Read - R/clr */ ++#define E1000_ITR 0x000C4 /* Interrupt Throttling Rate - RW */ ++#define E1000_ICS 0x000C8 /* Interrupt Cause Set - WO */ ++#define E1000_IMS 0x000D0 /* Interrupt Mask Set - RW */ ++#define E1000_IMC 0x000D8 /* Interrupt Mask Clear - WO */ ++#define E1000_IAM 0x000E0 /* Interrupt Acknowledge Auto Mask */ ++#define E1000_RCTL 0x00100 /* RX Control - RW */ ++#define E1000_FCTTV 0x00170 /* Flow Control Transmit Timer Value - RW */ ++#define E1000_TXCW 0x00178 /* TX Configuration Word - RW */ ++#define E1000_EICR 0x01580 /* Ext. Interrupt Cause Read - R/clr */ ++#define E1000_EITR(_n) (0x01680 + (0x4 * (_n))) ++#define E1000_EICS 0x01520 /* Ext. Interrupt Cause Set - W0 */ ++#define E1000_EIMS 0x01524 /* Ext. Interrupt Mask Set/Read - RW */ ++#define E1000_EIMC 0x01528 /* Ext. Interrupt Mask Clear - WO */ ++#define E1000_EIAC 0x0152C /* Ext. Interrupt Auto Clear - RW */ ++#define E1000_EIAM 0x01530 /* Ext. Interrupt Ack Auto Clear Mask - RW */ ++#define E1000_GPIE 0x01514 /* General Purpose Interrupt Enable - RW */ ++#define E1000_IVAR0 0x01700 /* Interrupt Vector Allocation (array) - RW */ ++#define E1000_IVAR_MISC 0x01740 /* IVAR for "other" causes - RW */ ++#define E1000_TCTL 0x00400 /* TX Control - RW */ ++#define E1000_TCTL_EXT 0x00404 /* Extended TX Control - RW */ ++#define E1000_TIPG 0x00410 /* TX Inter-packet gap -RW */ ++#define E1000_AIT 0x00458 /* Adaptive Interframe Spacing Throttle - RW */ ++#define E1000_LEDCTL 0x00E00 /* LED Control - RW */ ++#define E1000_LEDMUX 0x08130 /* LED MUX Control */ ++#define E1000_PBA 0x01000 /* Packet Buffer Allocation - RW */ ++#define E1000_PBS 0x01008 /* Packet Buffer Size */ ++#define E1000_EEMNGCTL 0x01010 /* MNG EEprom Control */ ++#define E1000_EEARBC_I210 0x12024 /* EEPROM Auto Read Bus Control */ ++#define E1000_EEWR 0x0102C /* EEPROM Write Register - RW */ ++#define E1000_I2CCMD 0x01028 /* SFPI2C Command Register - RW */ ++#define E1000_FRTIMER 0x01048 /* Free Running Timer - RW */ ++#define E1000_TCPTIMER 0x0104C /* TCP Timer - RW */ ++#define E1000_FCRTL 0x02160 /* Flow Control Receive Threshold Low - RW */ ++#define E1000_FCRTH 0x02168 /* Flow Control Receive Threshold High - RW */ ++#define E1000_FCRTV 0x02460 /* Flow Control Refresh Timer Value - RW */ ++#define E1000_I2CPARAMS 0x0102C /* SFPI2C Parameters Register - RW */ ++#define E1000_I2CBB_EN 0x00000100 /* I2C - Bit Bang Enable */ ++#define E1000_I2C_CLK_OUT 0x00000200 /* I2C- Clock */ ++#define E1000_I2C_DATA_OUT 0x00000400 /* I2C- Data Out */ ++#define E1000_I2C_DATA_OE_N 0x00000800 /* I2C- Data Output Enable */ ++#define E1000_I2C_DATA_IN 0x00001000 /* I2C- Data In */ ++#define E1000_I2C_CLK_OE_N 0x00002000 /* I2C- Clock Output Enable */ ++#define E1000_I2C_CLK_IN 0x00004000 /* I2C- Clock In */ ++#define E1000_MPHY_ADDR_CTRL 0x0024 /* GbE MPHY Address Control */ ++#define E1000_MPHY_DATA 0x0E10 /* GBE MPHY Data */ ++#define E1000_MPHY_STAT 0x0E0C /* GBE MPHY Statistics */ ++ ++/* IEEE 1588 TIMESYNCH */ ++#define E1000_TSYNCRXCTL 0x0B620 /* Rx Time Sync Control register - RW */ ++#define E1000_TSYNCTXCTL 0x0B614 /* Tx Time Sync Control register - RW */ ++#define E1000_TSYNCRXCFG 0x05F50 /* Time Sync Rx Configuration - RW */ ++#define E1000_RXSTMPL 0x0B624 /* Rx timestamp Low - RO */ ++#define E1000_RXSTMPH 0x0B628 /* Rx timestamp High - RO */ ++#define E1000_RXSATRL 0x0B62C /* Rx timestamp attribute low - RO */ ++#define E1000_RXSATRH 0x0B630 /* Rx timestamp attribute high - RO */ ++#define E1000_TXSTMPL 0x0B618 /* Tx timestamp value Low - RO */ ++#define E1000_TXSTMPH 0x0B61C /* Tx timestamp value High - RO */ ++#define E1000_SYSTIML 0x0B600 /* System time register Low - RO */ ++#define E1000_SYSTIMH 0x0B604 /* System time register High - RO */ ++#define E1000_TIMINCA 0x0B608 /* Increment attributes register - RW */ ++#define E1000_TSAUXC 0x0B640 /* Timesync Auxiliary Control register */ ++#define E1000_TRGTTIML0 0x0B644 /* Target Time Register 0 Low - RW */ ++#define E1000_TRGTTIMH0 0x0B648 /* Target Time Register 0 High - RW */ ++#define E1000_TRGTTIML1 0x0B64C /* Target Time Register 1 Low - RW */ ++#define E1000_TRGTTIMH1 0x0B650 /* Target Time Register 1 High - RW */ ++#define E1000_AUXSTMPL0 0x0B65C /* Auxiliary Time Stamp 0 Register Low - RO */ ++#define E1000_AUXSTMPH0 0x0B660 /* Auxiliary Time Stamp 0 Register High - RO */ ++#define E1000_AUXSTMPL1 0x0B664 /* Auxiliary Time Stamp 1 Register Low - RO */ ++#define E1000_AUXSTMPH1 0x0B668 /* Auxiliary Time Stamp 1 Register High - RO */ ++#define E1000_SYSTIMR 0x0B6F8 /* System time register Residue */ ++#define E1000_TSICR 0x0B66C /* Interrupt Cause Register */ ++#define E1000_TSIM 0x0B674 /* Interrupt Mask Register */ ++ ++/* Filtering Registers */ ++#define E1000_SAQF(_n) (0x5980 + 4 * (_n)) ++#define E1000_DAQF(_n) (0x59A0 + 4 * (_n)) ++#define E1000_SPQF(_n) (0x59C0 + 4 * (_n)) ++#define E1000_FTQF(_n) (0x59E0 + 4 * (_n)) ++#define E1000_SAQF0 E1000_SAQF(0) ++#define E1000_DAQF0 E1000_DAQF(0) ++#define E1000_SPQF0 E1000_SPQF(0) ++#define E1000_FTQF0 E1000_FTQF(0) ++#define E1000_SYNQF(_n) (0x055FC + (4 * (_n))) /* SYN Packet Queue Fltr */ ++#define E1000_ETQF(_n) (0x05CB0 + (4 * (_n))) /* EType Queue Fltr */ ++ ++#define E1000_RQDPC(_n) (0x0C030 + ((_n) * 0x40)) ++ ++/* DMA Coalescing registers */ ++#define E1000_DMACR 0x02508 /* Control Register */ ++#define E1000_DMCTXTH 0x03550 /* Transmit Threshold */ ++#define E1000_DMCTLX 0x02514 /* Time to Lx Request */ ++#define E1000_DMCRTRH 0x05DD0 /* Receive Packet Rate Threshold */ ++#define E1000_DMCCNT 0x05DD4 /* Current Rx Count */ ++#define E1000_FCRTC 0x02170 /* Flow Control Rx high watermark */ ++#define E1000_PCIEMISC 0x05BB8 /* PCIE misc config register */ ++ ++/* TX Rate Limit Registers */ ++#define E1000_RTTDQSEL 0x3604 /* Tx Desc Plane Queue Select - WO */ ++#define E1000_RTTBCNRM 0x3690 /* Tx BCN Rate-scheduler MMW */ ++#define E1000_RTTBCNRC 0x36B0 /* Tx BCN Rate-Scheduler Config - WO */ ++ ++/* Split and Replication RX Control - RW */ ++#define E1000_RXPBS 0x02404 /* Rx Packet Buffer Size - RW */ ++ ++/* Thermal sensor configuration and status registers */ ++#define E1000_THMJT 0x08100 /* Junction Temperature */ ++#define E1000_THLOWTC 0x08104 /* Low Threshold Control */ ++#define E1000_THMIDTC 0x08108 /* Mid Threshold Control */ ++#define E1000_THHIGHTC 0x0810C /* High Threshold Control */ ++#define E1000_THSTAT 0x08110 /* Thermal Sensor Status */ ++ ++/* Convenience macros ++ * ++ * Note: "_n" is the queue number of the register to be written to. ++ * ++ * Example usage: ++ * E1000_RDBAL_REG(current_rx_queue) ++ */ ++#define E1000_RDBAL(_n) ((_n) < 4 ? (0x02800 + ((_n) * 0x100)) \ ++ : (0x0C000 + ((_n) * 0x40))) ++#define E1000_RDBAH(_n) ((_n) < 4 ? (0x02804 + ((_n) * 0x100)) \ ++ : (0x0C004 + ((_n) * 0x40))) ++#define E1000_RDLEN(_n) ((_n) < 4 ? (0x02808 + ((_n) * 0x100)) \ ++ : (0x0C008 + ((_n) * 0x40))) ++#define E1000_SRRCTL(_n) ((_n) < 4 ? (0x0280C + ((_n) * 0x100)) \ ++ : (0x0C00C + ((_n) * 0x40))) ++#define E1000_RDH(_n) ((_n) < 4 ? (0x02810 + ((_n) * 0x100)) \ ++ : (0x0C010 + ((_n) * 0x40))) ++#define E1000_RDT(_n) ((_n) < 4 ? (0x02818 + ((_n) * 0x100)) \ ++ : (0x0C018 + ((_n) * 0x40))) ++#define E1000_RXDCTL(_n) ((_n) < 4 ? (0x02828 + ((_n) * 0x100)) \ ++ : (0x0C028 + ((_n) * 0x40))) ++#define E1000_TDBAL(_n) ((_n) < 4 ? (0x03800 + ((_n) * 0x100)) \ ++ : (0x0E000 + ((_n) * 0x40))) ++#define E1000_TDBAH(_n) ((_n) < 4 ? (0x03804 + ((_n) * 0x100)) \ ++ : (0x0E004 + ((_n) * 0x40))) ++#define E1000_TDLEN(_n) ((_n) < 4 ? (0x03808 + ((_n) * 0x100)) \ ++ : (0x0E008 + ((_n) * 0x40))) ++#define E1000_TDH(_n) ((_n) < 4 ? (0x03810 + ((_n) * 0x100)) \ ++ : (0x0E010 + ((_n) * 0x40))) ++#define E1000_TDT(_n) ((_n) < 4 ? (0x03818 + ((_n) * 0x100)) \ ++ : (0x0E018 + ((_n) * 0x40))) ++#define E1000_TXDCTL(_n) ((_n) < 4 ? (0x03828 + ((_n) * 0x100)) \ ++ : (0x0E028 + ((_n) * 0x40))) ++#define E1000_RXCTL(_n) ((_n) < 4 ? (0x02814 + ((_n) * 0x100)) : \ ++ (0x0C014 + ((_n) * 0x40))) ++#define E1000_DCA_RXCTRL(_n) E1000_RXCTL(_n) ++#define E1000_TXCTL(_n) ((_n) < 4 ? (0x03814 + ((_n) * 0x100)) : \ ++ (0x0E014 + ((_n) * 0x40))) ++#define E1000_DCA_TXCTRL(_n) E1000_TXCTL(_n) ++#define E1000_TDWBAL(_n) ((_n) < 4 ? (0x03838 + ((_n) * 0x100)) \ ++ : (0x0E038 + ((_n) * 0x40))) ++#define E1000_TDWBAH(_n) ((_n) < 4 ? (0x0383C + ((_n) * 0x100)) \ ++ : (0x0E03C + ((_n) * 0x40))) ++ ++#define E1000_RXPBS 0x02404 /* Rx Packet Buffer Size - RW */ ++#define E1000_TXPBS 0x03404 /* Tx Packet Buffer Size - RW */ ++ ++#define E1000_TDFH 0x03410 /* TX Data FIFO Head - RW */ ++#define E1000_TDFT 0x03418 /* TX Data FIFO Tail - RW */ ++#define E1000_TDFHS 0x03420 /* TX Data FIFO Head Saved - RW */ ++#define E1000_TDFPC 0x03430 /* TX Data FIFO Packet Count - RW */ ++#define E1000_DTXCTL 0x03590 /* DMA TX Control - RW */ ++#define E1000_CRCERRS 0x04000 /* CRC Error Count - R/clr */ ++#define E1000_ALGNERRC 0x04004 /* Alignment Error Count - R/clr */ ++#define E1000_SYMERRS 0x04008 /* Symbol Error Count - R/clr */ ++#define E1000_RXERRC 0x0400C /* Receive Error Count - R/clr */ ++#define E1000_MPC 0x04010 /* Missed Packet Count - R/clr */ ++#define E1000_SCC 0x04014 /* Single Collision Count - R/clr */ ++#define E1000_ECOL 0x04018 /* Excessive Collision Count - R/clr */ ++#define E1000_MCC 0x0401C /* Multiple Collision Count - R/clr */ ++#define E1000_LATECOL 0x04020 /* Late Collision Count - R/clr */ ++#define E1000_COLC 0x04028 /* Collision Count - R/clr */ ++#define E1000_DC 0x04030 /* Defer Count - R/clr */ ++#define E1000_TNCRS 0x04034 /* TX-No CRS - R/clr */ ++#define E1000_SEC 0x04038 /* Sequence Error Count - R/clr */ ++#define E1000_CEXTERR 0x0403C /* Carrier Extension Error Count - R/clr */ ++#define E1000_RLEC 0x04040 /* Receive Length Error Count - R/clr */ ++#define E1000_XONRXC 0x04048 /* XON RX Count - R/clr */ ++#define E1000_XONTXC 0x0404C /* XON TX Count - R/clr */ ++#define E1000_XOFFRXC 0x04050 /* XOFF RX Count - R/clr */ ++#define E1000_XOFFTXC 0x04054 /* XOFF TX Count - R/clr */ ++#define E1000_FCRUC 0x04058 /* Flow Control RX Unsupported Count- R/clr */ ++#define E1000_PRC64 0x0405C /* Packets RX (64 bytes) - R/clr */ ++#define E1000_PRC127 0x04060 /* Packets RX (65-127 bytes) - R/clr */ ++#define E1000_PRC255 0x04064 /* Packets RX (128-255 bytes) - R/clr */ ++#define E1000_PRC511 0x04068 /* Packets RX (255-511 bytes) - R/clr */ ++#define E1000_PRC1023 0x0406C /* Packets RX (512-1023 bytes) - R/clr */ ++#define E1000_PRC1522 0x04070 /* Packets RX (1024-1522 bytes) - R/clr */ ++#define E1000_GPRC 0x04074 /* Good Packets RX Count - R/clr */ ++#define E1000_BPRC 0x04078 /* Broadcast Packets RX Count - R/clr */ ++#define E1000_MPRC 0x0407C /* Multicast Packets RX Count - R/clr */ ++#define E1000_GPTC 0x04080 /* Good Packets TX Count - R/clr */ ++#define E1000_GORCL 0x04088 /* Good Octets RX Count Low - R/clr */ ++#define E1000_GORCH 0x0408C /* Good Octets RX Count High - R/clr */ ++#define E1000_GOTCL 0x04090 /* Good Octets TX Count Low - R/clr */ ++#define E1000_GOTCH 0x04094 /* Good Octets TX Count High - R/clr */ ++#define E1000_RNBC 0x040A0 /* RX No Buffers Count - R/clr */ ++#define E1000_RUC 0x040A4 /* RX Undersize Count - R/clr */ ++#define E1000_RFC 0x040A8 /* RX Fragment Count - R/clr */ ++#define E1000_ROC 0x040AC /* RX Oversize Count - R/clr */ ++#define E1000_RJC 0x040B0 /* RX Jabber Count - R/clr */ ++#define E1000_MGTPRC 0x040B4 /* Management Packets RX Count - R/clr */ ++#define E1000_MGTPDC 0x040B8 /* Management Packets Dropped Count - R/clr */ ++#define E1000_MGTPTC 0x040BC /* Management Packets TX Count - R/clr */ ++#define E1000_TORL 0x040C0 /* Total Octets RX Low - R/clr */ ++#define E1000_TORH 0x040C4 /* Total Octets RX High - R/clr */ ++#define E1000_TOTL 0x040C8 /* Total Octets TX Low - R/clr */ ++#define E1000_TOTH 0x040CC /* Total Octets TX High - R/clr */ ++#define E1000_TPR 0x040D0 /* Total Packets RX - R/clr */ ++#define E1000_TPT 0x040D4 /* Total Packets TX - R/clr */ ++#define E1000_PTC64 0x040D8 /* Packets TX (64 bytes) - R/clr */ ++#define E1000_PTC127 0x040DC /* Packets TX (65-127 bytes) - R/clr */ ++#define E1000_PTC255 0x040E0 /* Packets TX (128-255 bytes) - R/clr */ ++#define E1000_PTC511 0x040E4 /* Packets TX (256-511 bytes) - R/clr */ ++#define E1000_PTC1023 0x040E8 /* Packets TX (512-1023 bytes) - R/clr */ ++#define E1000_PTC1522 0x040EC /* Packets TX (1024-1522 Bytes) - R/clr */ ++#define E1000_MPTC 0x040F0 /* Multicast Packets TX Count - R/clr */ ++#define E1000_BPTC 0x040F4 /* Broadcast Packets TX Count - R/clr */ ++#define E1000_TSCTC 0x040F8 /* TCP Segmentation Context TX - R/clr */ ++#define E1000_TSCTFC 0x040FC /* TCP Segmentation Context TX Fail - R/clr */ ++#define E1000_IAC 0x04100 /* Interrupt Assertion Count */ ++/* Interrupt Cause Rx Packet Timer Expire Count */ ++#define E1000_ICRXPTC 0x04104 ++/* Interrupt Cause Rx Absolute Timer Expire Count */ ++#define E1000_ICRXATC 0x04108 ++/* Interrupt Cause Tx Packet Timer Expire Count */ ++#define E1000_ICTXPTC 0x0410C ++/* Interrupt Cause Tx Absolute Timer Expire Count */ ++#define E1000_ICTXATC 0x04110 ++/* Interrupt Cause Tx Queue Empty Count */ ++#define E1000_ICTXQEC 0x04118 ++/* Interrupt Cause Tx Queue Minimum Threshold Count */ ++#define E1000_ICTXQMTC 0x0411C ++/* Interrupt Cause Rx Descriptor Minimum Threshold Count */ ++#define E1000_ICRXDMTC 0x04120 ++#define E1000_ICRXOC 0x04124 /* Interrupt Cause Receiver Overrun Count */ ++#define E1000_PCS_CFG0 0x04200 /* PCS Configuration 0 - RW */ ++#define E1000_PCS_LCTL 0x04208 /* PCS Link Control - RW */ ++#define E1000_PCS_LSTAT 0x0420C /* PCS Link Status - RO */ ++#define E1000_CBTMPC 0x0402C /* Circuit Breaker TX Packet Count */ ++#define E1000_HTDPMC 0x0403C /* Host Transmit Discarded Packets */ ++#define E1000_CBRMPC 0x040FC /* Circuit Breaker RX Packet Count */ ++#define E1000_RPTHC 0x04104 /* Rx Packets To Host */ ++#define E1000_HGPTC 0x04118 /* Host Good Packets TX Count */ ++#define E1000_HTCBDPC 0x04124 /* Host TX Circuit Breaker Dropped Count */ ++#define E1000_HGORCL 0x04128 /* Host Good Octets Received Count Low */ ++#define E1000_HGORCH 0x0412C /* Host Good Octets Received Count High */ ++#define E1000_HGOTCL 0x04130 /* Host Good Octets Transmit Count Low */ ++#define E1000_HGOTCH 0x04134 /* Host Good Octets Transmit Count High */ ++#define E1000_LENERRS 0x04138 /* Length Errors Count */ ++#define E1000_SCVPC 0x04228 /* SerDes/SGMII Code Violation Pkt Count */ ++#define E1000_PCS_ANADV 0x04218 /* AN advertisement - RW */ ++#define E1000_PCS_LPAB 0x0421C /* Link Partner Ability - RW */ ++#define E1000_PCS_NPTX 0x04220 /* AN Next Page Transmit - RW */ ++#define E1000_PCS_LPABNP 0x04224 /* Link Partner Ability Next Page - RW */ ++#define E1000_RXCSUM 0x05000 /* RX Checksum Control - RW */ ++#define E1000_RLPML 0x05004 /* RX Long Packet Max Length */ ++#define E1000_RFCTL 0x05008 /* Receive Filter Control*/ ++#define E1000_MTA 0x05200 /* Multicast Table Array - RW Array */ ++#define E1000_RA 0x05400 /* Receive Address - RW Array */ ++#define E1000_RA2 0x054E0 /* 2nd half of Rx address array - RW Array */ ++#define E1000_PSRTYPE(_i) (0x05480 + ((_i) * 4)) ++#define E1000_RAL(_i) (((_i) <= 15) ? (0x05400 + ((_i) * 8)) : \ ++ (0x054E0 + ((_i - 16) * 8))) ++#define E1000_RAH(_i) (((_i) <= 15) ? (0x05404 + ((_i) * 8)) : \ ++ (0x054E4 + ((_i - 16) * 8))) ++#define E1000_IP4AT_REG(_i) (0x05840 + ((_i) * 8)) ++#define E1000_IP6AT_REG(_i) (0x05880 + ((_i) * 4)) ++#define E1000_WUPM_REG(_i) (0x05A00 + ((_i) * 4)) ++#define E1000_FFMT_REG(_i) (0x09000 + ((_i) * 8)) ++#define E1000_FFVT_REG(_i) (0x09800 + ((_i) * 8)) ++#define E1000_FFLT_REG(_i) (0x05F00 + ((_i) * 8)) ++#define E1000_VFTA 0x05600 /* VLAN Filter Table Array - RW Array */ ++#define E1000_VT_CTL 0x0581C /* VMDq Control - RW */ ++#define E1000_WUC 0x05800 /* Wakeup Control - RW */ ++#define E1000_WUFC 0x05808 /* Wakeup Filter Control - RW */ ++#define E1000_WUS 0x05810 /* Wakeup Status - RO */ ++#define E1000_MANC 0x05820 /* Management Control - RW */ ++#define E1000_IPAV 0x05838 /* IP Address Valid - RW */ ++#define E1000_WUPL 0x05900 /* Wakeup Packet Length - RW */ ++ ++#define E1000_SW_FW_SYNC 0x05B5C /* Software-Firmware Synchronization - RW */ ++#define E1000_CCMCTL 0x05B48 /* CCM Control Register */ ++#define E1000_GIOCTL 0x05B44 /* GIO Analog Control Register */ ++#define E1000_SCCTL 0x05B4C /* PCIc PLL Configuration Register */ ++#define E1000_GCR 0x05B00 /* PCI-Ex Control */ ++#define E1000_FACTPS 0x05B30 /* Function Active and Power State to MNG */ ++#define E1000_SWSM 0x05B50 /* SW Semaphore */ ++#define E1000_FWSM 0x05B54 /* FW Semaphore */ ++#define E1000_DCA_CTRL 0x05B74 /* DCA Control - RW */ ++ ++/* RSS registers */ ++#define E1000_MRQC 0x05818 /* Multiple Receive Control - RW */ ++#define E1000_IMIR(_i) (0x05A80 + ((_i) * 4)) /* Immediate Interrupt */ ++#define E1000_IMIREXT(_i) (0x05AA0 + ((_i) * 4)) /* Immediate Interrupt Ext*/ ++#define E1000_IMIRVP 0x05AC0 /* Immediate Interrupt RX VLAN Priority - RW */ ++/* MSI-X Allocation Register (_i) - RW */ ++#define E1000_MSIXBM(_i) (0x01600 + ((_i) * 4)) ++/* Redirection Table - RW Array */ ++#define E1000_RETA(_i) (0x05C00 + ((_i) * 4)) ++#define E1000_RSSRK(_i) (0x05C80 + ((_i) * 4)) /* RSS Random Key - RW Array */ ++ ++/* VT Registers */ ++#define E1000_MBVFICR 0x00C80 /* Mailbox VF Cause - RWC */ ++#define E1000_MBVFIMR 0x00C84 /* Mailbox VF int Mask - RW */ ++#define E1000_VFLRE 0x00C88 /* VF Register Events - RWC */ ++#define E1000_VFRE 0x00C8C /* VF Receive Enables */ ++#define E1000_VFTE 0x00C90 /* VF Transmit Enables */ ++#define E1000_QDE 0x02408 /* Queue Drop Enable - RW */ ++#define E1000_DTXSWC 0x03500 /* DMA Tx Switch Control - RW */ ++#define E1000_WVBR 0x03554 /* VM Wrong Behavior - RWS */ ++#define E1000_RPLOLR 0x05AF0 /* Replication Offload - RW */ ++#define E1000_UTA 0x0A000 /* Unicast Table Array - RW */ ++#define E1000_IOVTCL 0x05BBC /* IOV Control Register */ ++#define E1000_TXSWC 0x05ACC /* Tx Switch Control */ ++#define E1000_LVMMC 0x03548 /* Last VM Misbehavior cause */ ++/* These act per VF so an array friendly macro is used */ ++#define E1000_P2VMAILBOX(_n) (0x00C00 + (4 * (_n))) ++#define E1000_VMBMEM(_n) (0x00800 + (64 * (_n))) ++#define E1000_VMOLR(_n) (0x05AD0 + (4 * (_n))) ++#define E1000_DVMOLR(_n) (0x0C038 + (64 * (_n))) ++#define E1000_VLVF(_n) (0x05D00 + (4 * (_n))) /* VLAN VM Filter */ ++#define E1000_VMVIR(_n) (0x03700 + (4 * (_n))) ++ ++struct e1000_hw; ++ ++u32 igb_rd32(struct e1000_hw *hw, u32 reg); ++ ++/* write operations, indexed using DWORDS */ ++#define wr32(reg, val) \ ++do { \ ++ u8 __iomem *hw_addr = READ_ONCE((hw)->hw_addr); \ ++ if (!E1000_REMOVED(hw_addr)) \ ++ writel((val), &hw_addr[(reg)]); \ ++} while (0) ++ ++#define rd32(reg) (igb_rd32(hw, reg)) ++ ++#define wrfl() ((void)rd32(E1000_STATUS)) ++ ++#define array_wr32(reg, offset, value) \ ++ wr32((reg) + ((offset) << 2), (value)) ++ ++#define array_rd32(reg, offset) \ ++ (readl(hw->hw_addr + reg + ((offset) << 2))) ++ ++/* DMA Coalescing registers */ ++#define E1000_PCIEMISC 0x05BB8 /* PCIE misc config register */ ++ ++/* Energy Efficient Ethernet "EEE" register */ ++#define E1000_IPCNFG 0x0E38 /* Internal PHY Configuration */ ++#define E1000_EEER 0x0E30 /* Energy Efficient Ethernet */ ++#define E1000_EEE_SU 0X0E34 /* EEE Setup */ ++#define E1000_EMIADD 0x10 /* Extended Memory Indirect Address */ ++#define E1000_EMIDATA 0x11 /* Extended Memory Indirect Data */ ++#define E1000_MMDAC 13 /* MMD Access Control */ ++#define E1000_MMDAAD 14 /* MMD Access Address/Data */ ++ ++/* Thermal Sensor Register */ ++#define E1000_THSTAT 0x08110 /* Thermal Sensor Status */ ++ ++/* OS2BMC Registers */ ++#define E1000_B2OSPC 0x08FE0 /* BMC2OS packets sent by BMC */ ++#define E1000_B2OGPRC 0x04158 /* BMC2OS packets received by host */ ++#define E1000_O2BGPTC 0x08FE4 /* OS2BMC packets received by BMC */ ++#define E1000_O2BSPC 0x0415C /* OS2BMC packets transmitted by host */ ++ ++#define E1000_SRWR 0x12018 /* Shadow Ram Write Register - RW */ ++#define E1000_I210_FLMNGCTL 0x12038 ++#define E1000_I210_FLMNGDATA 0x1203C ++#define E1000_I210_FLMNGCNT 0x12040 ++ ++#define E1000_I210_FLSWCTL 0x12048 ++#define E1000_I210_FLSWDATA 0x1204C ++#define E1000_I210_FLSWCNT 0x12050 ++ ++#define E1000_I210_FLA 0x1201C ++ ++#define E1000_INVM_DATA_REG(_n) (0x12120 + 4*(_n)) ++#define E1000_INVM_SIZE 64 /* Number of INVM Data Registers */ ++ ++#define E1000_REMOVED(h) unlikely(!(h)) ++ ++#endif +--- linux/drivers/xenomai/net/drivers/igb/e1000_mac.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/igb/e1000_mac.h 2021-04-07 16:01:27.523633743 +0800 +@@ -0,0 +1,88 @@ ++/* Intel(R) Gigabit Ethernet Linux driver ++ * Copyright(c) 2007-2014 Intel Corporation. ++ * RTnet port 2009 Vladimir Zapolskiy ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms and conditions of the GNU General Public License, ++ * version 2, as published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ * ++ * You should have received a copy of the GNU General Public License along with ++ * this program; if not, see . ++ * ++ * The full GNU General Public License is included in this distribution in ++ * the file called "COPYING". ++ * ++ * Contact Information: ++ * e1000-devel Mailing List ++ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 ++ */ ++ ++#ifndef _E1000_MAC_H_ ++#define _E1000_MAC_H_ ++ ++#include "e1000_hw.h" ++ ++#include "e1000_phy.h" ++#include "e1000_nvm.h" ++#include "e1000_defines.h" ++#include "e1000_i210.h" ++ ++/* Functions that should not be called directly from drivers but can be used ++ * by other files in this 'shared code' ++ */ ++s32 igb_blink_led(struct e1000_hw *hw); ++s32 igb_check_for_copper_link(struct e1000_hw *hw); ++s32 igb_cleanup_led(struct e1000_hw *hw); ++s32 igb_config_fc_after_link_up(struct e1000_hw *hw); ++s32 igb_disable_pcie_master(struct e1000_hw *hw); ++s32 igb_force_mac_fc(struct e1000_hw *hw); ++s32 igb_get_auto_rd_done(struct e1000_hw *hw); ++s32 igb_get_bus_info_pcie(struct e1000_hw *hw); ++s32 igb_get_hw_semaphore(struct e1000_hw *hw); ++s32 igb_get_speed_and_duplex_copper(struct e1000_hw *hw, u16 *speed, ++ u16 *duplex); ++s32 igb_id_led_init(struct e1000_hw *hw); ++s32 igb_led_off(struct e1000_hw *hw); ++void igb_update_mc_addr_list(struct e1000_hw *hw, ++ u8 *mc_addr_list, u32 mc_addr_count); ++s32 igb_setup_link(struct e1000_hw *hw); ++s32 igb_validate_mdi_setting(struct e1000_hw *hw); ++s32 igb_write_8bit_ctrl_reg(struct e1000_hw *hw, u32 reg, ++ u32 offset, u8 data); ++ ++void igb_clear_hw_cntrs_base(struct e1000_hw *hw); ++void igb_clear_vfta(struct e1000_hw *hw); ++void igb_clear_vfta_i350(struct e1000_hw *hw); ++s32 igb_vfta_set(struct e1000_hw *hw, u32 vid, bool add); ++void igb_config_collision_dist(struct e1000_hw *hw); ++void igb_init_rx_addrs(struct e1000_hw *hw, u16 rar_count); ++void igb_mta_set(struct e1000_hw *hw, u32 hash_value); ++void igb_put_hw_semaphore(struct e1000_hw *hw); ++void igb_rar_set(struct e1000_hw *hw, u8 *addr, u32 index); ++s32 igb_check_alt_mac_addr(struct e1000_hw *hw); ++ ++bool igb_enable_mng_pass_thru(struct e1000_hw *hw); ++ ++enum e1000_mng_mode { ++ e1000_mng_mode_none = 0, ++ e1000_mng_mode_asf, ++ e1000_mng_mode_pt, ++ e1000_mng_mode_ipmi, ++ e1000_mng_mode_host_if_only ++}; ++ ++#define E1000_FACTPS_MNGCG 0x20000000 ++ ++#define E1000_FWSM_MODE_MASK 0xE ++#define E1000_FWSM_MODE_SHIFT 1 ++ ++#define E1000_MNG_DHCP_COOKIE_STATUS_VLAN 0x2 ++ ++void e1000_init_function_pointers_82575(struct e1000_hw *hw); ++ ++#endif +--- linux/drivers/xenomai/net/drivers/igb/igb.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/igb/igb.h 2021-04-07 16:01:27.519633749 +0800 +@@ -0,0 +1,557 @@ ++/* Intel(R) Gigabit Ethernet Linux driver ++ * Copyright(c) 2007-2014 Intel Corporation. ++ * RTnet port 2009 Vladimir Zapolskiy ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms and conditions of the GNU General Public License, ++ * version 2, as published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ * ++ * You should have received a copy of the GNU General Public License along with ++ * this program; if not, see . ++ * ++ * The full GNU General Public License is included in this distribution in ++ * the file called "COPYING". ++ * ++ * Contact Information: ++ * e1000-devel Mailing List ++ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 ++ */ ++ ++/* Linux PRO/1000 Ethernet Driver main header file */ ++ ++#ifndef _IGB_H_ ++#define _IGB_H_ ++ ++#include "e1000_mac.h" ++#include "e1000_82575.h" ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++ ++struct igb_adapter; ++ ++#define E1000_PCS_CFG_IGN_SD 1 ++ ++/* Interrupt defines */ ++#define IGB_START_ITR 648 /* ~6000 ints/sec */ ++#define IGB_4K_ITR 980 ++#define IGB_20K_ITR 196 ++#define IGB_70K_ITR 56 ++ ++/* TX/RX descriptor defines */ ++#define IGB_DEFAULT_TXD 256 ++#define IGB_DEFAULT_TX_WORK 128 ++#define IGB_MIN_TXD 80 ++#define IGB_MAX_TXD 4096 ++ ++#define IGB_DEFAULT_RXD 256 ++#define IGB_MIN_RXD 80 ++#define IGB_MAX_RXD 4096 ++ ++#define IGB_DEFAULT_ITR 3 /* dynamic */ ++#define IGB_MAX_ITR_USECS 10000 ++#define IGB_MIN_ITR_USECS 10 ++#define NON_Q_VECTORS 1 ++#define MAX_Q_VECTORS 8 ++#define MAX_MSIX_ENTRIES 10 ++ ++/* Transmit and receive queues */ ++#define IGB_MAX_RX_QUEUES 8 ++#define IGB_MAX_RX_QUEUES_82575 4 ++#define IGB_MAX_RX_QUEUES_I211 2 ++#define IGB_MAX_TX_QUEUES 8 ++#define IGB_MAX_VF_MC_ENTRIES 30 ++#define IGB_MAX_VF_FUNCTIONS 8 ++#define IGB_MAX_VFTA_ENTRIES 128 ++#define IGB_82576_VF_DEV_ID 0x10CA ++#define IGB_I350_VF_DEV_ID 0x1520 ++ ++/* NVM version defines */ ++#define IGB_MAJOR_MASK 0xF000 ++#define IGB_MINOR_MASK 0x0FF0 ++#define IGB_BUILD_MASK 0x000F ++#define IGB_COMB_VER_MASK 0x00FF ++#define IGB_MAJOR_SHIFT 12 ++#define IGB_MINOR_SHIFT 4 ++#define IGB_COMB_VER_SHFT 8 ++#define IGB_NVM_VER_INVALID 0xFFFF ++#define IGB_ETRACK_SHIFT 16 ++#define NVM_ETRACK_WORD 0x0042 ++#define NVM_COMB_VER_OFF 0x0083 ++#define NVM_COMB_VER_PTR 0x003d ++ ++struct vf_data_storage { ++ unsigned char vf_mac_addresses[ETH_ALEN]; ++ u16 vf_mc_hashes[IGB_MAX_VF_MC_ENTRIES]; ++ u16 num_vf_mc_hashes; ++ u16 vlans_enabled; ++ u32 flags; ++ unsigned long last_nack; ++ u16 pf_vlan; /* When set, guest VLAN config not allowed. */ ++ u16 pf_qos; ++ u16 tx_rate; ++ bool spoofchk_enabled; ++}; ++ ++#define IGB_VF_FLAG_CTS 0x00000001 /* VF is clear to send data */ ++#define IGB_VF_FLAG_UNI_PROMISC 0x00000002 /* VF has unicast promisc */ ++#define IGB_VF_FLAG_MULTI_PROMISC 0x00000004 /* VF has multicast promisc */ ++#define IGB_VF_FLAG_PF_SET_MAC 0x00000008 /* PF has set MAC address */ ++ ++/* RX descriptor control thresholds. ++ * PTHRESH - MAC will consider prefetch if it has fewer than this number of ++ * descriptors available in its onboard memory. ++ * Setting this to 0 disables RX descriptor prefetch. ++ * HTHRESH - MAC will only prefetch if there are at least this many descriptors ++ * available in host memory. ++ * If PTHRESH is 0, this should also be 0. ++ * WTHRESH - RX descriptor writeback threshold - MAC will delay writing back ++ * descriptors until either it has this many to write back, or the ++ * ITR timer expires. ++ */ ++#define IGB_RX_PTHRESH ((hw->mac.type == e1000_i354) ? 12 : 8) ++#define IGB_RX_HTHRESH 8 ++#define IGB_TX_PTHRESH ((hw->mac.type == e1000_i354) ? 20 : 8) ++#define IGB_TX_HTHRESH 1 ++#define IGB_RX_WTHRESH ((hw->mac.type == e1000_82576 && \ ++ (adapter->flags & IGB_FLAG_HAS_MSIX)) ? 1 : 4) ++#define IGB_TX_WTHRESH ((hw->mac.type == e1000_82576 && \ ++ (adapter->flags & IGB_FLAG_HAS_MSIX)) ? 1 : 16) ++ ++/* this is the size past which hardware will drop packets when setting LPE=0 */ ++#define MAXIMUM_ETHERNET_VLAN_SIZE 1522 ++ ++/* Supported Rx Buffer Sizes */ ++#define IGB_RXBUFFER_256 256 ++#define IGB_RXBUFFER_2048 2048 ++#define IGB_RX_HDR_LEN IGB_RXBUFFER_256 ++#define IGB_RX_BUFSZ IGB_RXBUFFER_2048 ++ ++/* How many Rx Buffers do we bundle into one write to the hardware ? */ ++#define IGB_RX_BUFFER_WRITE 16 /* Must be power of 2 */ ++ ++#define AUTO_ALL_MODES 0 ++#define IGB_EEPROM_APME 0x0400 ++ ++#ifndef IGB_MASTER_SLAVE ++/* Switch to override PHY master/slave setting */ ++#define IGB_MASTER_SLAVE e1000_ms_hw_default ++#endif ++ ++#define IGB_MNG_VLAN_NONE -1 ++ ++enum igb_tx_flags { ++ /* cmd_type flags */ ++ IGB_TX_FLAGS_VLAN = 0x01, ++ IGB_TX_FLAGS_TSO = 0x02, ++ IGB_TX_FLAGS_TSTAMP = 0x04, ++ ++ /* olinfo flags */ ++ IGB_TX_FLAGS_IPV4 = 0x10, ++ IGB_TX_FLAGS_CSUM = 0x20, ++}; ++ ++/* VLAN info */ ++#define IGB_TX_FLAGS_VLAN_MASK 0xffff0000 ++#define IGB_TX_FLAGS_VLAN_SHIFT 16 ++ ++/* The largest size we can write to the descriptor is 65535. In order to ++ * maintain a power of two alignment we have to limit ourselves to 32K. ++ */ ++#define IGB_MAX_TXD_PWR 15 ++#define IGB_MAX_DATA_PER_TXD (1 << IGB_MAX_TXD_PWR) ++ ++/* Tx Descriptors needed, worst case */ ++#define TXD_USE_COUNT(S) DIV_ROUND_UP((S), IGB_MAX_DATA_PER_TXD) ++#define DESC_NEEDED (MAX_SKB_FRAGS + 4) ++ ++/* EEPROM byte offsets */ ++#define IGB_SFF_8472_SWAP 0x5C ++#define IGB_SFF_8472_COMP 0x5E ++ ++/* Bitmasks */ ++#define IGB_SFF_ADDRESSING_MODE 0x4 ++#define IGB_SFF_8472_UNSUP 0x00 ++ ++/* wrapper around a pointer to a socket buffer, ++ * so a DMA handle can be stored along with the buffer ++ */ ++struct igb_tx_buffer { ++ union e1000_adv_tx_desc *next_to_watch; ++ unsigned long time_stamp; ++ struct rtskb *skb; ++ unsigned int bytecount; ++ u16 gso_segs; ++ __be16 protocol; ++ ++ u32 tx_flags; ++}; ++ ++struct igb_rx_buffer { ++ dma_addr_t dma; ++ struct rtskb *skb; ++}; ++ ++struct igb_tx_queue_stats { ++ u64 packets; ++ u64 bytes; ++ u64 restart_queue; ++ u64 restart_queue2; ++}; ++ ++struct igb_rx_queue_stats { ++ u64 packets; ++ u64 bytes; ++ u64 drops; ++ u64 csum_err; ++ u64 alloc_failed; ++}; ++ ++struct igb_ring_container { ++ struct igb_ring *ring; /* pointer to linked list of rings */ ++ unsigned int total_bytes; /* total bytes processed this int */ ++ unsigned int total_packets; /* total packets processed this int */ ++ u16 work_limit; /* total work allowed per interrupt */ ++ u8 count; /* total number of rings in vector */ ++ u8 itr; /* current ITR setting for ring */ ++}; ++ ++struct igb_ring { ++ struct igb_q_vector *q_vector; /* backlink to q_vector */ ++ struct rtnet_device *netdev; /* back pointer to net_device */ ++ struct device *dev; /* device pointer for dma mapping */ ++ union { /* array of buffer info structs */ ++ struct igb_tx_buffer *tx_buffer_info; ++ struct igb_rx_buffer *rx_buffer_info; ++ }; ++ void *desc; /* descriptor ring memory */ ++ unsigned long flags; /* ring specific flags */ ++ void __iomem *tail; /* pointer to ring tail register */ ++ dma_addr_t dma; /* phys address of the ring */ ++ unsigned int size; /* length of desc. ring in bytes */ ++ ++ u16 count; /* number of desc. in the ring */ ++ u8 queue_index; /* logical index of the ring*/ ++ u8 reg_idx; /* physical index of the ring */ ++ ++ /* everything past this point are written often */ ++ u16 next_to_clean; ++ u16 next_to_use; ++ u16 next_to_alloc; ++ ++ union { ++ /* TX */ ++ struct { ++ struct igb_tx_queue_stats tx_stats; ++ }; ++ /* RX */ ++ struct { ++ struct igb_rx_queue_stats rx_stats; ++ u16 rx_buffer_len; ++ }; ++ }; ++} ____cacheline_internodealigned_in_smp; ++ ++struct igb_q_vector { ++ struct igb_adapter *adapter; /* backlink */ ++ int cpu; /* CPU for DCA */ ++ u32 eims_value; /* EIMS mask value */ ++ ++ u16 itr_val; ++ u8 set_itr; ++ void __iomem *itr_register; ++ ++ struct igb_ring_container rx, tx; ++ ++ struct rcu_head rcu; /* to avoid race with update stats on free */ ++ char name[IFNAMSIZ + 9]; ++ ++ /* for dynamic allocation of rings associated with this q_vector */ ++ struct igb_ring ring[0] ____cacheline_internodealigned_in_smp; ++}; ++ ++enum e1000_ring_flags_t { ++ IGB_RING_FLAG_RX_SCTP_CSUM, ++ IGB_RING_FLAG_RX_LB_VLAN_BSWAP, ++ IGB_RING_FLAG_TX_CTX_IDX, ++ IGB_RING_FLAG_TX_DETECT_HANG ++}; ++ ++#define IGB_TXD_DCMD (E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS) ++ ++#define IGB_RX_DESC(R, i) \ ++ (&(((union e1000_adv_rx_desc *)((R)->desc))[i])) ++#define IGB_TX_DESC(R, i) \ ++ (&(((union e1000_adv_tx_desc *)((R)->desc))[i])) ++#define IGB_TX_CTXTDESC(R, i) \ ++ (&(((struct e1000_adv_tx_context_desc *)((R)->desc))[i])) ++ ++/* igb_test_staterr - tests bits within Rx descriptor status and error fields */ ++static inline __le32 igb_test_staterr(union e1000_adv_rx_desc *rx_desc, ++ const u32 stat_err_bits) ++{ ++ return rx_desc->wb.upper.status_error & cpu_to_le32(stat_err_bits); ++} ++ ++/* igb_desc_unused - calculate if we have unused descriptors */ ++static inline int igb_desc_unused(struct igb_ring *ring) ++{ ++ if (ring->next_to_clean > ring->next_to_use) ++ return ring->next_to_clean - ring->next_to_use - 1; ++ ++ return ring->count + ring->next_to_clean - ring->next_to_use - 1; ++} ++ ++#ifdef CONFIG_IGB_HWMON ++ ++#define IGB_HWMON_TYPE_LOC 0 ++#define IGB_HWMON_TYPE_TEMP 1 ++#define IGB_HWMON_TYPE_CAUTION 2 ++#define IGB_HWMON_TYPE_MAX 3 ++ ++struct hwmon_attr { ++ struct device_attribute dev_attr; ++ struct e1000_hw *hw; ++ struct e1000_thermal_diode_data *sensor; ++ char name[12]; ++ }; ++ ++struct hwmon_buff { ++ struct attribute_group group; ++ const struct attribute_group *groups[2]; ++ struct attribute *attrs[E1000_MAX_SENSORS * 4 + 1]; ++ struct hwmon_attr hwmon_list[E1000_MAX_SENSORS * 4]; ++ unsigned int n_hwmon; ++ }; ++#endif ++ ++#define IGB_N_EXTTS 2 ++#define IGB_N_PEROUT 2 ++#define IGB_N_SDP 4 ++#define IGB_RETA_SIZE 128 ++ ++/* board specific private data structure */ ++struct igb_adapter { ++ unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; ++ ++ struct rtnet_device *netdev; ++ ++ unsigned long state; ++ unsigned int flags; ++ ++ unsigned int num_q_vectors; ++ struct msix_entry msix_entries[MAX_MSIX_ENTRIES]; ++ rtdm_irq_t msix_irq_handle[MAX_MSIX_ENTRIES]; ++ rtdm_irq_t irq_handle; ++ rtdm_nrtsig_t watchdog_nrtsig; ++ spinlock_t stats64_lock; ++ ++ /* Interrupt Throttle Rate */ ++ u32 rx_itr_setting; ++ u32 tx_itr_setting; ++ u16 tx_itr; ++ u16 rx_itr; ++ ++ /* TX */ ++ u16 tx_work_limit; ++ u32 tx_timeout_count; ++ int num_tx_queues; ++ struct igb_ring *tx_ring[16]; ++ ++ /* RX */ ++ int num_rx_queues; ++ struct igb_ring *rx_ring[16]; ++ ++ u32 max_frame_size; ++ u32 min_frame_size; ++ ++ struct timer_list watchdog_timer; ++ struct timer_list phy_info_timer; ++ ++ u16 mng_vlan_id; ++ u32 bd_number; ++ u32 wol; ++ u32 en_mng_pt; ++ u16 link_speed; ++ u16 link_duplex; ++ ++ struct work_struct reset_task; ++ struct work_struct watchdog_task; ++ bool fc_autoneg; ++ u8 tx_timeout_factor; ++ struct timer_list blink_timer; ++ unsigned long led_status; ++ ++ /* OS defined structs */ ++ struct pci_dev *pdev; ++ ++ struct net_device_stats net_stats; ++ ++ /* structs defined in e1000_hw.h */ ++ struct e1000_hw hw; ++ struct e1000_hw_stats stats; ++ struct e1000_phy_info phy_info; ++ ++ u32 test_icr; ++ struct igb_ring test_tx_ring; ++ struct igb_ring test_rx_ring; ++ ++ struct igb_q_vector *q_vector[MAX_Q_VECTORS]; ++ u32 eims_enable_mask; ++ u32 eims_other; ++ ++ /* to not mess up cache alignment, always add to the bottom */ ++ u16 tx_ring_count; ++ u16 rx_ring_count; ++ int vf_rate_link_speed; ++ u32 rss_queues; ++ u32 wvbr; ++ u32 *shadow_vfta; ++ ++ unsigned long last_rx_timestamp; ++ ++ char fw_version[32]; ++#ifdef CONFIG_IGB_HWMON ++ struct hwmon_buff *igb_hwmon_buff; ++ bool ets; ++#endif ++ struct i2c_algo_bit_data i2c_algo; ++ struct i2c_adapter i2c_adap; ++ struct i2c_client *i2c_client; ++ u32 rss_indir_tbl_init; ++ u8 rss_indir_tbl[IGB_RETA_SIZE]; ++ ++ unsigned long link_check_timeout; ++ int copper_tries; ++ struct e1000_info ei; ++ u16 eee_advert; ++}; ++ ++#define IGB_FLAG_HAS_MSI (1 << 0) ++#define IGB_FLAG_DCA_ENABLED (1 << 1) ++#define IGB_FLAG_QUAD_PORT_A (1 << 2) ++#define IGB_FLAG_QUEUE_PAIRS (1 << 3) ++#define IGB_FLAG_DMAC (1 << 4) ++#define IGB_FLAG_PTP (1 << 5) ++#define IGB_FLAG_RSS_FIELD_IPV4_UDP (1 << 6) ++#define IGB_FLAG_RSS_FIELD_IPV6_UDP (1 << 7) ++#define IGB_FLAG_WOL_SUPPORTED (1 << 8) ++#define IGB_FLAG_NEED_LINK_UPDATE (1 << 9) ++#define IGB_FLAG_MEDIA_RESET (1 << 10) ++#define IGB_FLAG_MAS_CAPABLE (1 << 11) ++#define IGB_FLAG_MAS_ENABLE (1 << 12) ++#define IGB_FLAG_HAS_MSIX (1 << 13) ++#define IGB_FLAG_EEE (1 << 14) ++ ++/* Media Auto Sense */ ++#define IGB_MAS_ENABLE_0 0X0001 ++#define IGB_MAS_ENABLE_1 0X0002 ++#define IGB_MAS_ENABLE_2 0X0004 ++#define IGB_MAS_ENABLE_3 0X0008 ++ ++/* DMA Coalescing defines */ ++#define IGB_MIN_TXPBSIZE 20408 ++#define IGB_TX_BUF_4096 4096 ++#define IGB_DMCTLX_DCFLUSH_DIS 0x80000000 /* Disable DMA Coal Flush */ ++ ++#define IGB_82576_TSYNC_SHIFT 19 ++#define IGB_TS_HDR_LEN 16 ++enum e1000_state_t { ++ __IGB_TESTING, ++ __IGB_RESETTING, ++ __IGB_DOWN, ++ __IGB_PTP_TX_IN_PROGRESS, ++}; ++ ++enum igb_boards { ++ board_82575, ++}; ++ ++extern char igb_driver_name[]; ++extern char igb_driver_version[]; ++ ++int igb_up(struct igb_adapter *); ++void igb_down(struct igb_adapter *); ++void igb_reinit_locked(struct igb_adapter *); ++void igb_reset(struct igb_adapter *); ++int igb_reinit_queues(struct igb_adapter *); ++void igb_write_rss_indir_tbl(struct igb_adapter *); ++int igb_set_spd_dplx(struct igb_adapter *, u32, u8); ++int igb_setup_tx_resources(struct igb_ring *); ++int igb_setup_rx_resources(struct igb_ring *); ++void igb_free_tx_resources(struct igb_ring *); ++void igb_free_rx_resources(struct igb_ring *); ++void igb_configure_tx_ring(struct igb_adapter *, struct igb_ring *); ++void igb_configure_rx_ring(struct igb_adapter *, struct igb_ring *); ++void igb_setup_tctl(struct igb_adapter *); ++void igb_setup_rctl(struct igb_adapter *); ++netdev_tx_t igb_xmit_frame_ring(struct rtskb *, struct igb_ring *); ++void igb_unmap_and_free_tx_resource(struct igb_ring *, struct igb_tx_buffer *); ++void igb_alloc_rx_buffers(struct igb_ring *, u16); ++void igb_update_stats(struct igb_adapter *); ++bool igb_has_link(struct igb_adapter *adapter); ++void igb_set_ethtool_ops(struct rtnet_device *); ++void igb_power_up_link(struct igb_adapter *); ++void igb_set_fw_version(struct igb_adapter *); ++void igb_ptp_init(struct igb_adapter *adapter); ++void igb_ptp_stop(struct igb_adapter *adapter); ++void igb_ptp_reset(struct igb_adapter *adapter); ++void igb_ptp_rx_hang(struct igb_adapter *adapter); ++void igb_ptp_rx_rgtstamp(struct igb_q_vector *q_vector, struct rtskb *skb); ++void igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, unsigned char *va, ++ struct rtskb *skb); ++int igb_ptp_set_ts_config(struct rtnet_device *netdev, struct ifreq *ifr); ++int igb_ptp_get_ts_config(struct rtnet_device *netdev, struct ifreq *ifr); ++#ifdef CONFIG_IGB_HWMON ++void igb_sysfs_exit(struct igb_adapter *adapter); ++int igb_sysfs_init(struct igb_adapter *adapter); ++#endif ++static inline s32 igb_reset_phy(struct e1000_hw *hw) ++{ ++ if (hw->phy.ops.reset) ++ return hw->phy.ops.reset(hw); ++ ++ return 0; ++} ++ ++static inline s32 igb_read_phy_reg(struct e1000_hw *hw, u32 offset, u16 *data) ++{ ++ if (hw->phy.ops.read_reg) ++ return hw->phy.ops.read_reg(hw, offset, data); ++ ++ return 0; ++} ++ ++static inline s32 igb_write_phy_reg(struct e1000_hw *hw, u32 offset, u16 data) ++{ ++ if (hw->phy.ops.write_reg) ++ return hw->phy.ops.write_reg(hw, offset, data); ++ ++ return 0; ++} ++ ++static inline s32 igb_get_phy_info(struct e1000_hw *hw) ++{ ++ if (hw->phy.ops.get_phy_info) ++ return hw->phy.ops.get_phy_info(hw); ++ ++ return 0; ++} ++ ++static inline struct rtnet_device *txring_txq(const struct igb_ring *tx_ring) ++{ ++ return tx_ring->netdev; ++} ++ ++#endif /* _IGB_H_ */ +--- linux/drivers/xenomai/net/drivers/igb/e1000_hw.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/igb/e1000_hw.h 2021-04-07 16:01:27.514633756 +0800 +@@ -0,0 +1,570 @@ ++/* Intel(R) Gigabit Ethernet Linux driver ++ * Copyright(c) 2007-2014 Intel Corporation. ++ * RTnet port 2009 Vladimir Zapolskiy ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms and conditions of the GNU General Public License, ++ * ++ * This program is distributed in the hope it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ * ++ * You should have received a copy of the GNU General Public License along with ++ * this program; if not, see . ++ * ++ * The full GNU General Public License is included in this distribution in ++ * the file called "COPYING". ++ * ++ * Contact Information: ++ * e1000-devel Mailing List ++ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 ++ */ ++ ++#ifndef _E1000_HW_H_ ++#define _E1000_HW_H_ ++ ++#include ++#include ++#include ++#include ++#include ++ ++#include "e1000_regs.h" ++#include "e1000_defines.h" ++ ++struct e1000_hw; ++ ++#define E1000_DEV_ID_82576 0x10C9 ++#define E1000_DEV_ID_82576_FIBER 0x10E6 ++#define E1000_DEV_ID_82576_SERDES 0x10E7 ++#define E1000_DEV_ID_82576_QUAD_COPPER 0x10E8 ++#define E1000_DEV_ID_82576_QUAD_COPPER_ET2 0x1526 ++#define E1000_DEV_ID_82576_NS 0x150A ++#define E1000_DEV_ID_82576_NS_SERDES 0x1518 ++#define E1000_DEV_ID_82576_SERDES_QUAD 0x150D ++#define E1000_DEV_ID_82575EB_COPPER 0x10A7 ++#define E1000_DEV_ID_82575EB_FIBER_SERDES 0x10A9 ++#define E1000_DEV_ID_82575GB_QUAD_COPPER 0x10D6 ++#define E1000_DEV_ID_82580_COPPER 0x150E ++#define E1000_DEV_ID_82580_FIBER 0x150F ++#define E1000_DEV_ID_82580_SERDES 0x1510 ++#define E1000_DEV_ID_82580_SGMII 0x1511 ++#define E1000_DEV_ID_82580_COPPER_DUAL 0x1516 ++#define E1000_DEV_ID_82580_QUAD_FIBER 0x1527 ++#define E1000_DEV_ID_DH89XXCC_SGMII 0x0438 ++#define E1000_DEV_ID_DH89XXCC_SERDES 0x043A ++#define E1000_DEV_ID_DH89XXCC_BACKPLANE 0x043C ++#define E1000_DEV_ID_DH89XXCC_SFP 0x0440 ++#define E1000_DEV_ID_I350_COPPER 0x1521 ++#define E1000_DEV_ID_I350_FIBER 0x1522 ++#define E1000_DEV_ID_I350_SERDES 0x1523 ++#define E1000_DEV_ID_I350_SGMII 0x1524 ++#define E1000_DEV_ID_I210_COPPER 0x1533 ++#define E1000_DEV_ID_I210_FIBER 0x1536 ++#define E1000_DEV_ID_I210_SERDES 0x1537 ++#define E1000_DEV_ID_I210_SGMII 0x1538 ++#define E1000_DEV_ID_I210_COPPER_FLASHLESS 0x157B ++#define E1000_DEV_ID_I210_SERDES_FLASHLESS 0x157C ++#define E1000_DEV_ID_I211_COPPER 0x1539 ++#define E1000_DEV_ID_I354_BACKPLANE_1GBPS 0x1F40 ++#define E1000_DEV_ID_I354_SGMII 0x1F41 ++#define E1000_DEV_ID_I354_BACKPLANE_2_5GBPS 0x1F45 ++ ++#define E1000_REVISION_2 2 ++#define E1000_REVISION_4 4 ++ ++#define E1000_FUNC_0 0 ++#define E1000_FUNC_1 1 ++#define E1000_FUNC_2 2 ++#define E1000_FUNC_3 3 ++ ++#define E1000_ALT_MAC_ADDRESS_OFFSET_LAN0 0 ++#define E1000_ALT_MAC_ADDRESS_OFFSET_LAN1 3 ++#define E1000_ALT_MAC_ADDRESS_OFFSET_LAN2 6 ++#define E1000_ALT_MAC_ADDRESS_OFFSET_LAN3 9 ++ ++enum e1000_mac_type { ++ e1000_undefined = 0, ++ e1000_82575, ++ e1000_82576, ++ e1000_82580, ++ e1000_i350, ++ e1000_i354, ++ e1000_i210, ++ e1000_i211, ++ e1000_num_macs /* List is 1-based, so subtract 1 for true count. */ ++}; ++ ++enum e1000_media_type { ++ e1000_media_type_unknown = 0, ++ e1000_media_type_copper = 1, ++ e1000_media_type_fiber = 2, ++ e1000_media_type_internal_serdes = 3, ++ e1000_num_media_types ++}; ++ ++enum e1000_nvm_type { ++ e1000_nvm_unknown = 0, ++ e1000_nvm_none, ++ e1000_nvm_eeprom_spi, ++ e1000_nvm_flash_hw, ++ e1000_nvm_invm, ++ e1000_nvm_flash_sw ++}; ++ ++enum e1000_nvm_override { ++ e1000_nvm_override_none = 0, ++ e1000_nvm_override_spi_small, ++ e1000_nvm_override_spi_large, ++}; ++ ++enum e1000_phy_type { ++ e1000_phy_unknown = 0, ++ e1000_phy_none, ++ e1000_phy_m88, ++ e1000_phy_igp, ++ e1000_phy_igp_2, ++ e1000_phy_gg82563, ++ e1000_phy_igp_3, ++ e1000_phy_ife, ++ e1000_phy_82580, ++ e1000_phy_i210, ++}; ++ ++enum e1000_bus_type { ++ e1000_bus_type_unknown = 0, ++ e1000_bus_type_pci, ++ e1000_bus_type_pcix, ++ e1000_bus_type_pci_express, ++ e1000_bus_type_reserved ++}; ++ ++enum e1000_bus_speed { ++ e1000_bus_speed_unknown = 0, ++ e1000_bus_speed_33, ++ e1000_bus_speed_66, ++ e1000_bus_speed_100, ++ e1000_bus_speed_120, ++ e1000_bus_speed_133, ++ e1000_bus_speed_2500, ++ e1000_bus_speed_5000, ++ e1000_bus_speed_reserved ++}; ++ ++enum e1000_bus_width { ++ e1000_bus_width_unknown = 0, ++ e1000_bus_width_pcie_x1, ++ e1000_bus_width_pcie_x2, ++ e1000_bus_width_pcie_x4 = 4, ++ e1000_bus_width_pcie_x8 = 8, ++ e1000_bus_width_32, ++ e1000_bus_width_64, ++ e1000_bus_width_reserved ++}; ++ ++enum e1000_1000t_rx_status { ++ e1000_1000t_rx_status_not_ok = 0, ++ e1000_1000t_rx_status_ok, ++ e1000_1000t_rx_status_undefined = 0xFF ++}; ++ ++enum e1000_rev_polarity { ++ e1000_rev_polarity_normal = 0, ++ e1000_rev_polarity_reversed, ++ e1000_rev_polarity_undefined = 0xFF ++}; ++ ++enum e1000_fc_mode { ++ e1000_fc_none = 0, ++ e1000_fc_rx_pause, ++ e1000_fc_tx_pause, ++ e1000_fc_full, ++ e1000_fc_default = 0xFF ++}; ++ ++/* Statistics counters collected by the MAC */ ++struct e1000_hw_stats { ++ u64 crcerrs; ++ u64 algnerrc; ++ u64 symerrs; ++ u64 rxerrc; ++ u64 mpc; ++ u64 scc; ++ u64 ecol; ++ u64 mcc; ++ u64 latecol; ++ u64 colc; ++ u64 dc; ++ u64 tncrs; ++ u64 sec; ++ u64 cexterr; ++ u64 rlec; ++ u64 xonrxc; ++ u64 xontxc; ++ u64 xoffrxc; ++ u64 xofftxc; ++ u64 fcruc; ++ u64 prc64; ++ u64 prc127; ++ u64 prc255; ++ u64 prc511; ++ u64 prc1023; ++ u64 prc1522; ++ u64 gprc; ++ u64 bprc; ++ u64 mprc; ++ u64 gptc; ++ u64 gorc; ++ u64 gotc; ++ u64 rnbc; ++ u64 ruc; ++ u64 rfc; ++ u64 roc; ++ u64 rjc; ++ u64 mgprc; ++ u64 mgpdc; ++ u64 mgptc; ++ u64 tor; ++ u64 tot; ++ u64 tpr; ++ u64 tpt; ++ u64 ptc64; ++ u64 ptc127; ++ u64 ptc255; ++ u64 ptc511; ++ u64 ptc1023; ++ u64 ptc1522; ++ u64 mptc; ++ u64 bptc; ++ u64 tsctc; ++ u64 tsctfc; ++ u64 iac; ++ u64 icrxptc; ++ u64 icrxatc; ++ u64 ictxptc; ++ u64 ictxatc; ++ u64 ictxqec; ++ u64 ictxqmtc; ++ u64 icrxdmtc; ++ u64 icrxoc; ++ u64 cbtmpc; ++ u64 htdpmc; ++ u64 cbrdpc; ++ u64 cbrmpc; ++ u64 rpthc; ++ u64 hgptc; ++ u64 htcbdpc; ++ u64 hgorc; ++ u64 hgotc; ++ u64 lenerrs; ++ u64 scvpc; ++ u64 hrmpc; ++ u64 doosync; ++ u64 o2bgptc; ++ u64 o2bspc; ++ u64 b2ospc; ++ u64 b2ogprc; ++}; ++ ++struct e1000_host_mng_dhcp_cookie { ++ u32 signature; ++ u8 status; ++ u8 reserved0; ++ u16 vlan_id; ++ u32 reserved1; ++ u16 reserved2; ++ u8 reserved3; ++ u8 checksum; ++}; ++ ++/* Host Interface "Rev 1" */ ++struct e1000_host_command_header { ++ u8 command_id; ++ u8 command_length; ++ u8 command_options; ++ u8 checksum; ++}; ++ ++#define E1000_HI_MAX_DATA_LENGTH 252 ++struct e1000_host_command_info { ++ struct e1000_host_command_header command_header; ++ u8 command_data[E1000_HI_MAX_DATA_LENGTH]; ++}; ++ ++/* Host Interface "Rev 2" */ ++struct e1000_host_mng_command_header { ++ u8 command_id; ++ u8 checksum; ++ u16 reserved1; ++ u16 reserved2; ++ u16 command_length; ++}; ++ ++#define E1000_HI_MAX_MNG_DATA_LENGTH 0x6F8 ++struct e1000_host_mng_command_info { ++ struct e1000_host_mng_command_header command_header; ++ u8 command_data[E1000_HI_MAX_MNG_DATA_LENGTH]; ++}; ++ ++#include "e1000_mac.h" ++#include "e1000_phy.h" ++#include "e1000_nvm.h" ++#include "e1000_mbx.h" ++ ++struct e1000_mac_operations { ++ s32 (*check_for_link)(struct e1000_hw *); ++ s32 (*reset_hw)(struct e1000_hw *); ++ s32 (*init_hw)(struct e1000_hw *); ++ bool (*check_mng_mode)(struct e1000_hw *); ++ s32 (*setup_physical_interface)(struct e1000_hw *); ++ void (*rar_set)(struct e1000_hw *, u8 *, u32); ++ s32 (*read_mac_addr)(struct e1000_hw *); ++ s32 (*get_speed_and_duplex)(struct e1000_hw *, u16 *, u16 *); ++ s32 (*acquire_swfw_sync)(struct e1000_hw *, u16); ++ void (*release_swfw_sync)(struct e1000_hw *, u16); ++#ifdef CONFIG_IGB_HWMON ++ s32 (*get_thermal_sensor_data)(struct e1000_hw *); ++ s32 (*init_thermal_sensor_thresh)(struct e1000_hw *); ++#endif ++ ++}; ++ ++struct e1000_phy_operations { ++ s32 (*acquire)(struct e1000_hw *); ++ s32 (*check_polarity)(struct e1000_hw *); ++ s32 (*check_reset_block)(struct e1000_hw *); ++ s32 (*force_speed_duplex)(struct e1000_hw *); ++ s32 (*get_cfg_done)(struct e1000_hw *hw); ++ s32 (*get_cable_length)(struct e1000_hw *); ++ s32 (*get_phy_info)(struct e1000_hw *); ++ s32 (*read_reg)(struct e1000_hw *, u32, u16 *); ++ void (*release)(struct e1000_hw *); ++ s32 (*reset)(struct e1000_hw *); ++ s32 (*set_d0_lplu_state)(struct e1000_hw *, bool); ++ s32 (*set_d3_lplu_state)(struct e1000_hw *, bool); ++ s32 (*write_reg)(struct e1000_hw *, u32, u16); ++ s32 (*read_i2c_byte)(struct e1000_hw *, u8, u8, u8 *); ++ s32 (*write_i2c_byte)(struct e1000_hw *, u8, u8, u8); ++}; ++ ++struct e1000_nvm_operations { ++ s32 (*acquire)(struct e1000_hw *); ++ s32 (*read)(struct e1000_hw *, u16, u16, u16 *); ++ void (*release)(struct e1000_hw *); ++ s32 (*write)(struct e1000_hw *, u16, u16, u16 *); ++ s32 (*update)(struct e1000_hw *); ++ s32 (*validate)(struct e1000_hw *); ++ s32 (*valid_led_default)(struct e1000_hw *, u16 *); ++}; ++ ++#define E1000_MAX_SENSORS 3 ++ ++struct e1000_thermal_diode_data { ++ u8 location; ++ u8 temp; ++ u8 caution_thresh; ++ u8 max_op_thresh; ++}; ++ ++struct e1000_thermal_sensor_data { ++ struct e1000_thermal_diode_data sensor[E1000_MAX_SENSORS]; ++}; ++ ++struct e1000_info { ++ s32 (*get_invariants)(struct e1000_hw *); ++ struct e1000_mac_operations *mac_ops; ++ struct e1000_phy_operations *phy_ops; ++ struct e1000_nvm_operations *nvm_ops; ++}; ++ ++extern const struct e1000_info e1000_82575_info; ++ ++struct e1000_mac_info { ++ struct e1000_mac_operations ops; ++ ++ u8 addr[6]; ++ u8 perm_addr[6]; ++ ++ enum e1000_mac_type type; ++ ++ u32 ledctl_default; ++ u32 ledctl_mode1; ++ u32 ledctl_mode2; ++ u32 mc_filter_type; ++ u32 txcw; ++ ++ u16 mta_reg_count; ++ u16 uta_reg_count; ++ ++ /* Maximum size of the MTA register table in all supported adapters */ ++ #define MAX_MTA_REG 128 ++ u32 mta_shadow[MAX_MTA_REG]; ++ u16 rar_entry_count; ++ ++ u8 forced_speed_duplex; ++ ++ bool adaptive_ifs; ++ bool arc_subsystem_valid; ++ bool asf_firmware_present; ++ bool autoneg; ++ bool autoneg_failed; ++ bool disable_hw_init_bits; ++ bool get_link_status; ++ bool ifs_params_forced; ++ bool in_ifs_mode; ++ bool report_tx_early; ++ bool serdes_has_link; ++ bool tx_pkt_filtering; ++ struct e1000_thermal_sensor_data thermal_sensor_data; ++}; ++ ++struct e1000_phy_info { ++ struct e1000_phy_operations ops; ++ ++ enum e1000_phy_type type; ++ ++ enum e1000_1000t_rx_status local_rx; ++ enum e1000_1000t_rx_status remote_rx; ++ enum e1000_ms_type ms_type; ++ enum e1000_ms_type original_ms_type; ++ enum e1000_rev_polarity cable_polarity; ++ enum e1000_smart_speed smart_speed; ++ ++ u32 addr; ++ u32 id; ++ u32 reset_delay_us; /* in usec */ ++ u32 revision; ++ ++ enum e1000_media_type media_type; ++ ++ u16 autoneg_advertised; ++ u16 autoneg_mask; ++ u16 cable_length; ++ u16 max_cable_length; ++ u16 min_cable_length; ++ ++ u8 mdix; ++ ++ bool disable_polarity_correction; ++ bool is_mdix; ++ bool polarity_correction; ++ bool reset_disable; ++ bool speed_downgraded; ++ bool autoneg_wait_to_complete; ++}; ++ ++struct e1000_nvm_info { ++ struct e1000_nvm_operations ops; ++ enum e1000_nvm_type type; ++ enum e1000_nvm_override override; ++ ++ u32 flash_bank_size; ++ u32 flash_base_addr; ++ ++ u16 word_size; ++ u16 delay_usec; ++ u16 address_bits; ++ u16 opcode_bits; ++ u16 page_size; ++}; ++ ++struct e1000_bus_info { ++ enum e1000_bus_type type; ++ enum e1000_bus_speed speed; ++ enum e1000_bus_width width; ++ ++ u32 snoop; ++ ++ u16 func; ++ u16 pci_cmd_word; ++}; ++ ++struct e1000_fc_info { ++ u32 high_water; /* Flow control high-water mark */ ++ u32 low_water; /* Flow control low-water mark */ ++ u16 pause_time; /* Flow control pause timer */ ++ bool send_xon; /* Flow control send XON */ ++ bool strict_ieee; /* Strict IEEE mode */ ++ enum e1000_fc_mode current_mode; /* Type of flow control */ ++ enum e1000_fc_mode requested_mode; ++}; ++ ++struct e1000_mbx_operations { ++ s32 (*init_params)(struct e1000_hw *hw); ++ s32 (*read)(struct e1000_hw *, u32 *, u16, u16); ++ s32 (*write)(struct e1000_hw *, u32 *, u16, u16); ++ s32 (*read_posted)(struct e1000_hw *, u32 *, u16, u16); ++ s32 (*write_posted)(struct e1000_hw *, u32 *, u16, u16); ++ s32 (*check_for_msg)(struct e1000_hw *, u16); ++ s32 (*check_for_ack)(struct e1000_hw *, u16); ++ s32 (*check_for_rst)(struct e1000_hw *, u16); ++}; ++ ++struct e1000_mbx_stats { ++ u32 msgs_tx; ++ u32 msgs_rx; ++ ++ u32 acks; ++ u32 reqs; ++ u32 rsts; ++}; ++ ++struct e1000_mbx_info { ++ struct e1000_mbx_operations ops; ++ struct e1000_mbx_stats stats; ++ u32 timeout; ++ u32 usec_delay; ++ u16 size; ++}; ++ ++struct e1000_dev_spec_82575 { ++ bool sgmii_active; ++ bool global_device_reset; ++ bool eee_disable; ++ bool clear_semaphore_once; ++ struct e1000_sfp_flags eth_flags; ++ bool module_plugged; ++ u8 media_port; ++ bool media_changed; ++ bool mas_capable; ++}; ++ ++struct e1000_hw { ++ void *back; ++ ++ u8 __iomem *hw_addr; ++ u8 __iomem *flash_address; ++ unsigned long io_base; ++ ++ struct e1000_mac_info mac; ++ struct e1000_fc_info fc; ++ struct e1000_phy_info phy; ++ struct e1000_nvm_info nvm; ++ struct e1000_bus_info bus; ++ struct e1000_mbx_info mbx; ++ struct e1000_host_mng_dhcp_cookie mng_cookie; ++ ++ union { ++ struct e1000_dev_spec_82575 _82575; ++ } dev_spec; ++ ++ u16 device_id; ++ u16 subsystem_vendor_id; ++ u16 subsystem_device_id; ++ u16 vendor_id; ++ ++ u8 revision_id; ++}; ++ ++struct rtnet_device *igb_get_hw_dev(struct e1000_hw *hw); ++#define hw_dbg(format, arg...) \ ++ rtdev_dbg(igb_get_hw_dev(hw), format, ##arg) ++ ++/* These functions must be implemented by drivers */ ++s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value); ++s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value); ++ ++void igb_read_pci_cfg(struct e1000_hw *hw, u32 reg, u16 *value); ++void igb_write_pci_cfg(struct e1000_hw *hw, u32 reg, u16 *value); ++#endif /* _E1000_HW_H_ */ +--- linux/drivers/xenomai/net/drivers/igb/Makefile 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/igb/Makefile 2021-04-07 16:01:27.509633763 +0800 +@@ -0,0 +1,13 @@ ++ccflags-y += -Idrivers/xenomai/net/stack/include ++ ++obj-$(CONFIG_XENO_DRIVERS_NET_DRV_IGB) += rt_igb.o ++ ++rt_igb-y := \ ++ e1000_82575.o \ ++ e1000_i210.o \ ++ e1000_mac.o \ ++ e1000_mbx.o \ ++ e1000_nvm.o \ ++ e1000_phy.o \ ++ igb_hwmon.o \ ++ igb_main.o +--- linux/drivers/xenomai/net/drivers/igb/igb_hwmon.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/igb/igb_hwmon.c 2021-04-07 16:01:27.505633769 +0800 +@@ -0,0 +1,249 @@ ++/* Intel(R) Gigabit Ethernet Linux driver ++ * Copyright(c) 2007-2014 Intel Corporation. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms and conditions of the GNU General Public License, ++ * version 2, as published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ * ++ * You should have received a copy of the GNU General Public License along with ++ * this program; if not, see . ++ * ++ * The full GNU General Public License is included in this distribution in ++ * the file called "COPYING". ++ * ++ * Contact Information: ++ * e1000-devel Mailing List ++ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 ++ */ ++ ++#include "igb.h" ++#include "e1000_82575.h" ++#include "e1000_hw.h" ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#ifdef CONFIG_IGB_HWMON ++static struct i2c_board_info i350_sensor_info = { ++ I2C_BOARD_INFO("i350bb", (0Xf8 >> 1)), ++}; ++ ++/* hwmon callback functions */ ++static ssize_t igb_hwmon_show_location(struct device *dev, ++ struct device_attribute *attr, ++ char *buf) ++{ ++ struct hwmon_attr *igb_attr = container_of(attr, struct hwmon_attr, ++ dev_attr); ++ return sprintf(buf, "loc%u\n", ++ igb_attr->sensor->location); ++} ++ ++static ssize_t igb_hwmon_show_temp(struct device *dev, ++ struct device_attribute *attr, ++ char *buf) ++{ ++ struct hwmon_attr *igb_attr = container_of(attr, struct hwmon_attr, ++ dev_attr); ++ unsigned int value; ++ ++ /* reset the temp field */ ++ igb_attr->hw->mac.ops.get_thermal_sensor_data(igb_attr->hw); ++ ++ value = igb_attr->sensor->temp; ++ ++ /* display millidegree */ ++ value *= 1000; ++ ++ return sprintf(buf, "%u\n", value); ++} ++ ++static ssize_t igb_hwmon_show_cautionthresh(struct device *dev, ++ struct device_attribute *attr, ++ char *buf) ++{ ++ struct hwmon_attr *igb_attr = container_of(attr, struct hwmon_attr, ++ dev_attr); ++ unsigned int value = igb_attr->sensor->caution_thresh; ++ ++ /* display millidegree */ ++ value *= 1000; ++ ++ return sprintf(buf, "%u\n", value); ++} ++ ++static ssize_t igb_hwmon_show_maxopthresh(struct device *dev, ++ struct device_attribute *attr, ++ char *buf) ++{ ++ struct hwmon_attr *igb_attr = container_of(attr, struct hwmon_attr, ++ dev_attr); ++ unsigned int value = igb_attr->sensor->max_op_thresh; ++ ++ /* display millidegree */ ++ value *= 1000; ++ ++ return sprintf(buf, "%u\n", value); ++} ++ ++/* igb_add_hwmon_attr - Create hwmon attr table for a hwmon sysfs file. ++ * @ adapter: pointer to the adapter structure ++ * @ offset: offset in the eeprom sensor data table ++ * @ type: type of sensor data to display ++ * ++ * For each file we want in hwmon's sysfs interface we need a device_attribute ++ * This is included in our hwmon_attr struct that contains the references to ++ * the data structures we need to get the data to display. ++ */ ++static int igb_add_hwmon_attr(struct igb_adapter *adapter, ++ unsigned int offset, int type) ++{ ++ int rc; ++ unsigned int n_attr; ++ struct hwmon_attr *igb_attr; ++ ++ n_attr = adapter->igb_hwmon_buff->n_hwmon; ++ igb_attr = &adapter->igb_hwmon_buff->hwmon_list[n_attr]; ++ ++ switch (type) { ++ case IGB_HWMON_TYPE_LOC: ++ igb_attr->dev_attr.show = igb_hwmon_show_location; ++ snprintf(igb_attr->name, sizeof(igb_attr->name), ++ "temp%u_label", offset + 1); ++ break; ++ case IGB_HWMON_TYPE_TEMP: ++ igb_attr->dev_attr.show = igb_hwmon_show_temp; ++ snprintf(igb_attr->name, sizeof(igb_attr->name), ++ "temp%u_input", offset + 1); ++ break; ++ case IGB_HWMON_TYPE_CAUTION: ++ igb_attr->dev_attr.show = igb_hwmon_show_cautionthresh; ++ snprintf(igb_attr->name, sizeof(igb_attr->name), ++ "temp%u_max", offset + 1); ++ break; ++ case IGB_HWMON_TYPE_MAX: ++ igb_attr->dev_attr.show = igb_hwmon_show_maxopthresh; ++ snprintf(igb_attr->name, sizeof(igb_attr->name), ++ "temp%u_crit", offset + 1); ++ break; ++ default: ++ rc = -EPERM; ++ return rc; ++ } ++ ++ /* These always the same regardless of type */ ++ igb_attr->sensor = ++ &adapter->hw.mac.thermal_sensor_data.sensor[offset]; ++ igb_attr->hw = &adapter->hw; ++ igb_attr->dev_attr.store = NULL; ++ igb_attr->dev_attr.attr.mode = S_IRUGO; ++ igb_attr->dev_attr.attr.name = igb_attr->name; ++ sysfs_attr_init(&igb_attr->dev_attr.attr); ++ ++ adapter->igb_hwmon_buff->attrs[n_attr] = &igb_attr->dev_attr.attr; ++ ++ ++adapter->igb_hwmon_buff->n_hwmon; ++ ++ return 0; ++} ++ ++static void igb_sysfs_del_adapter(struct igb_adapter *adapter) ++{ ++} ++ ++/* called from igb_main.c */ ++void igb_sysfs_exit(struct igb_adapter *adapter) ++{ ++ igb_sysfs_del_adapter(adapter); ++} ++ ++/* called from igb_main.c */ ++int igb_sysfs_init(struct igb_adapter *adapter) ++{ ++ struct hwmon_buff *igb_hwmon; ++ struct i2c_client *client; ++ struct device *hwmon_dev; ++ unsigned int i; ++ int rc = 0; ++ ++ /* If this method isn't defined we don't support thermals */ ++ if (adapter->hw.mac.ops.init_thermal_sensor_thresh == NULL) ++ goto exit; ++ ++ /* Don't create thermal hwmon interface if no sensors present */ ++ rc = (adapter->hw.mac.ops.init_thermal_sensor_thresh(&adapter->hw)); ++ if (rc) ++ goto exit; ++ ++ igb_hwmon = devm_kzalloc(&adapter->pdev->dev, sizeof(*igb_hwmon), ++ GFP_KERNEL); ++ if (!igb_hwmon) { ++ rc = -ENOMEM; ++ goto exit; ++ } ++ adapter->igb_hwmon_buff = igb_hwmon; ++ ++ for (i = 0; i < E1000_MAX_SENSORS; i++) { ++ ++ /* Only create hwmon sysfs entries for sensors that have ++ * meaningful data. ++ */ ++ if (adapter->hw.mac.thermal_sensor_data.sensor[i].location == 0) ++ continue; ++ ++ /* Bail if any hwmon attr struct fails to initialize */ ++ rc = igb_add_hwmon_attr(adapter, i, IGB_HWMON_TYPE_CAUTION); ++ if (rc) ++ goto exit; ++ rc = igb_add_hwmon_attr(adapter, i, IGB_HWMON_TYPE_LOC); ++ if (rc) ++ goto exit; ++ rc = igb_add_hwmon_attr(adapter, i, IGB_HWMON_TYPE_TEMP); ++ if (rc) ++ goto exit; ++ rc = igb_add_hwmon_attr(adapter, i, IGB_HWMON_TYPE_MAX); ++ if (rc) ++ goto exit; ++ } ++ ++ /* init i2c_client */ ++ client = i2c_new_device(&adapter->i2c_adap, &i350_sensor_info); ++ if (client == NULL) { ++ dev_info(&adapter->pdev->dev, ++ "Failed to create new i2c device.\n"); ++ rc = -ENODEV; ++ goto exit; ++ } ++ adapter->i2c_client = client; ++ ++ igb_hwmon->groups[0] = &igb_hwmon->group; ++ igb_hwmon->group.attrs = igb_hwmon->attrs; ++ ++ hwmon_dev = devm_hwmon_device_register_with_groups(&adapter->pdev->dev, ++ client->name, ++ igb_hwmon, ++ igb_hwmon->groups); ++ if (IS_ERR(hwmon_dev)) { ++ rc = PTR_ERR(hwmon_dev); ++ goto err; ++ } ++ ++ goto exit; ++ ++err: ++ igb_sysfs_del_adapter(adapter); ++exit: ++ return rc; ++} ++#endif +--- linux/drivers/xenomai/net/drivers/igb/e1000_82575.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/igb/e1000_82575.h 2021-04-07 16:01:27.500633776 +0800 +@@ -0,0 +1,280 @@ ++/* Intel(R) Gigabit Ethernet Linux driver ++ * Copyright(c) 2007-2014 Intel Corporation. ++ * RTnet port 2009 Vladimir Zapolskiy ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms and conditions of the GNU General Public License, ++ * version 2, as published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ * ++ * You should have received a copy of the GNU General Public License along with ++ * this program; if not, see . ++ * ++ * The full GNU General Public License is included in this distribution in ++ * the file called "COPYING". ++ * ++ * Contact Information: ++ * e1000-devel Mailing List ++ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 ++ */ ++ ++#ifndef _E1000_82575_H_ ++#define _E1000_82575_H_ ++ ++void igb_shutdown_serdes_link_82575(struct e1000_hw *hw); ++void igb_power_up_serdes_link_82575(struct e1000_hw *hw); ++void igb_power_down_phy_copper_82575(struct e1000_hw *hw); ++void igb_rx_fifo_flush_82575(struct e1000_hw *hw); ++s32 igb_read_i2c_byte(struct e1000_hw *hw, u8 byte_offset, u8 dev_addr, ++ u8 *data); ++s32 igb_write_i2c_byte(struct e1000_hw *hw, u8 byte_offset, u8 dev_addr, ++ u8 data); ++ ++#define ID_LED_DEFAULT_82575_SERDES ((ID_LED_DEF1_DEF2 << 12) | \ ++ (ID_LED_DEF1_DEF2 << 8) | \ ++ (ID_LED_DEF1_DEF2 << 4) | \ ++ (ID_LED_OFF1_ON2)) ++ ++#define E1000_RAR_ENTRIES_82575 16 ++#define E1000_RAR_ENTRIES_82576 24 ++#define E1000_RAR_ENTRIES_82580 24 ++#define E1000_RAR_ENTRIES_I350 32 ++ ++#define E1000_SW_SYNCH_MB 0x00000100 ++#define E1000_STAT_DEV_RST_SET 0x00100000 ++#define E1000_CTRL_DEV_RST 0x20000000 ++ ++/* SRRCTL bit definitions */ ++#define E1000_SRRCTL_BSIZEPKT_SHIFT 10 /* Shift _right_ */ ++#define E1000_SRRCTL_BSIZEHDRSIZE_SHIFT 2 /* Shift _left_ */ ++#define E1000_SRRCTL_DESCTYPE_ADV_ONEBUF 0x02000000 ++#define E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS 0x0A000000 ++#define E1000_SRRCTL_DROP_EN 0x80000000 ++#define E1000_SRRCTL_TIMESTAMP 0x40000000 ++ ++ ++#define E1000_MRQC_ENABLE_RSS_4Q 0x00000002 ++#define E1000_MRQC_ENABLE_VMDQ 0x00000003 ++#define E1000_MRQC_RSS_FIELD_IPV4_UDP 0x00400000 ++#define E1000_MRQC_ENABLE_VMDQ_RSS_2Q 0x00000005 ++#define E1000_MRQC_RSS_FIELD_IPV6_UDP 0x00800000 ++#define E1000_MRQC_RSS_FIELD_IPV6_UDP_EX 0x01000000 ++ ++#define E1000_EICR_TX_QUEUE ( \ ++ E1000_EICR_TX_QUEUE0 | \ ++ E1000_EICR_TX_QUEUE1 | \ ++ E1000_EICR_TX_QUEUE2 | \ ++ E1000_EICR_TX_QUEUE3) ++ ++#define E1000_EICR_RX_QUEUE ( \ ++ E1000_EICR_RX_QUEUE0 | \ ++ E1000_EICR_RX_QUEUE1 | \ ++ E1000_EICR_RX_QUEUE2 | \ ++ E1000_EICR_RX_QUEUE3) ++ ++/* Immediate Interrupt Rx (A.K.A. Low Latency Interrupt) */ ++#define E1000_IMIREXT_SIZE_BP 0x00001000 /* Packet size bypass */ ++#define E1000_IMIREXT_CTRL_BP 0x00080000 /* Bypass check of ctrl bits */ ++ ++/* Receive Descriptor - Advanced */ ++union e1000_adv_rx_desc { ++ struct { ++ __le64 pkt_addr; /* Packet buffer address */ ++ __le64 hdr_addr; /* Header buffer address */ ++ } read; ++ struct { ++ struct { ++ struct { ++ __le16 pkt_info; /* RSS type, Packet type */ ++ __le16 hdr_info; /* Split Head, buf len */ ++ } lo_dword; ++ union { ++ __le32 rss; /* RSS Hash */ ++ struct { ++ __le16 ip_id; /* IP id */ ++ __le16 csum; /* Packet Checksum */ ++ } csum_ip; ++ } hi_dword; ++ } lower; ++ struct { ++ __le32 status_error; /* ext status/error */ ++ __le16 length; /* Packet length */ ++ __le16 vlan; /* VLAN tag */ ++ } upper; ++ } wb; /* writeback */ ++}; ++ ++#define E1000_RXDADV_HDRBUFLEN_MASK 0x7FE0 ++#define E1000_RXDADV_HDRBUFLEN_SHIFT 5 ++#define E1000_RXDADV_STAT_TS 0x10000 /* Pkt was time stamped */ ++#define E1000_RXDADV_STAT_TSIP 0x08000 /* timestamp in packet */ ++ ++/* Transmit Descriptor - Advanced */ ++union e1000_adv_tx_desc { ++ struct { ++ __le64 buffer_addr; /* Address of descriptor's data buf */ ++ __le32 cmd_type_len; ++ __le32 olinfo_status; ++ } read; ++ struct { ++ __le64 rsvd; /* Reserved */ ++ __le32 nxtseq_seed; ++ __le32 status; ++ } wb; ++}; ++ ++/* Adv Transmit Descriptor Config Masks */ ++#define E1000_ADVTXD_MAC_TSTAMP 0x00080000 /* IEEE1588 Timestamp packet */ ++#define E1000_ADVTXD_DTYP_CTXT 0x00200000 /* Advanced Context Descriptor */ ++#define E1000_ADVTXD_DTYP_DATA 0x00300000 /* Advanced Data Descriptor */ ++#define E1000_ADVTXD_DCMD_EOP 0x01000000 /* End of Packet */ ++#define E1000_ADVTXD_DCMD_IFCS 0x02000000 /* Insert FCS (Ethernet CRC) */ ++#define E1000_ADVTXD_DCMD_RS 0x08000000 /* Report Status */ ++#define E1000_ADVTXD_DCMD_DEXT 0x20000000 /* Descriptor extension (1=Adv) */ ++#define E1000_ADVTXD_DCMD_VLE 0x40000000 /* VLAN pkt enable */ ++#define E1000_ADVTXD_DCMD_TSE 0x80000000 /* TCP Seg enable */ ++#define E1000_ADVTXD_PAYLEN_SHIFT 14 /* Adv desc PAYLEN shift */ ++ ++/* Context descriptors */ ++struct e1000_adv_tx_context_desc { ++ __le32 vlan_macip_lens; ++ __le32 seqnum_seed; ++ __le32 type_tucmd_mlhl; ++ __le32 mss_l4len_idx; ++}; ++ ++#define E1000_ADVTXD_MACLEN_SHIFT 9 /* Adv ctxt desc mac len shift */ ++#define E1000_ADVTXD_TUCMD_IPV4 0x00000400 /* IP Packet Type: 1=IPv4 */ ++#define E1000_ADVTXD_TUCMD_L4T_TCP 0x00000800 /* L4 Packet TYPE of TCP */ ++#define E1000_ADVTXD_TUCMD_L4T_SCTP 0x00001000 /* L4 packet TYPE of SCTP */ ++/* IPSec Encrypt Enable for ESP */ ++#define E1000_ADVTXD_L4LEN_SHIFT 8 /* Adv ctxt L4LEN shift */ ++#define E1000_ADVTXD_MSS_SHIFT 16 /* Adv ctxt MSS shift */ ++/* Adv ctxt IPSec SA IDX mask */ ++/* Adv ctxt IPSec ESP len mask */ ++ ++/* Additional Transmit Descriptor Control definitions */ ++#define E1000_TXDCTL_QUEUE_ENABLE 0x02000000 /* Enable specific Tx Queue */ ++/* Tx Queue Arbitration Priority 0=low, 1=high */ ++ ++/* Additional Receive Descriptor Control definitions */ ++#define E1000_RXDCTL_QUEUE_ENABLE 0x02000000 /* Enable specific Rx Queue */ ++ ++/* Direct Cache Access (DCA) definitions */ ++#define E1000_DCA_CTRL_DCA_MODE_DISABLE 0x01 /* DCA Disable */ ++#define E1000_DCA_CTRL_DCA_MODE_CB2 0x02 /* DCA Mode CB2 */ ++ ++#define E1000_DCA_RXCTRL_CPUID_MASK 0x0000001F /* Rx CPUID Mask */ ++#define E1000_DCA_RXCTRL_DESC_DCA_EN (1 << 5) /* DCA Rx Desc enable */ ++#define E1000_DCA_RXCTRL_HEAD_DCA_EN (1 << 6) /* DCA Rx Desc header enable */ ++#define E1000_DCA_RXCTRL_DATA_DCA_EN (1 << 7) /* DCA Rx Desc payload enable */ ++#define E1000_DCA_RXCTRL_DESC_RRO_EN (1 << 9) /* DCA Rx rd Desc Relax Order */ ++ ++#define E1000_DCA_TXCTRL_CPUID_MASK 0x0000001F /* Tx CPUID Mask */ ++#define E1000_DCA_TXCTRL_DESC_DCA_EN (1 << 5) /* DCA Tx Desc enable */ ++#define E1000_DCA_TXCTRL_DESC_RRO_EN (1 << 9) /* Tx rd Desc Relax Order */ ++#define E1000_DCA_TXCTRL_TX_WB_RO_EN (1 << 11) /* Tx Desc writeback RO bit */ ++#define E1000_DCA_TXCTRL_DATA_RRO_EN (1 << 13) /* Tx rd data Relax Order */ ++ ++/* Additional DCA related definitions, note change in position of CPUID */ ++#define E1000_DCA_TXCTRL_CPUID_MASK_82576 0xFF000000 /* Tx CPUID Mask */ ++#define E1000_DCA_RXCTRL_CPUID_MASK_82576 0xFF000000 /* Rx CPUID Mask */ ++#define E1000_DCA_TXCTRL_CPUID_SHIFT 24 /* Tx CPUID now in the last byte */ ++#define E1000_DCA_RXCTRL_CPUID_SHIFT 24 /* Rx CPUID now in the last byte */ ++ ++/* ETQF register bit definitions */ ++#define E1000_ETQF_FILTER_ENABLE (1 << 26) ++#define E1000_ETQF_1588 (1 << 30) ++ ++/* FTQF register bit definitions */ ++#define E1000_FTQF_VF_BP 0x00008000 ++#define E1000_FTQF_1588_TIME_STAMP 0x08000000 ++#define E1000_FTQF_MASK 0xF0000000 ++#define E1000_FTQF_MASK_PROTO_BP 0x10000000 ++#define E1000_FTQF_MASK_SOURCE_PORT_BP 0x80000000 ++ ++#define E1000_NVM_APME_82575 0x0400 ++#define MAX_NUM_VFS 8 ++ ++#define E1000_DTXSWC_MAC_SPOOF_MASK 0x000000FF /* Per VF MAC spoof control */ ++#define E1000_DTXSWC_VLAN_SPOOF_MASK 0x0000FF00 /* Per VF VLAN spoof control */ ++#define E1000_DTXSWC_LLE_MASK 0x00FF0000 /* Per VF Local LB enables */ ++#define E1000_DTXSWC_VLAN_SPOOF_SHIFT 8 ++#define E1000_DTXSWC_VMDQ_LOOPBACK_EN (1 << 31) /* global VF LB enable */ ++ ++/* Easy defines for setting default pool, would normally be left a zero */ ++#define E1000_VT_CTL_DEFAULT_POOL_SHIFT 7 ++#define E1000_VT_CTL_DEFAULT_POOL_MASK (0x7 << E1000_VT_CTL_DEFAULT_POOL_SHIFT) ++ ++/* Other useful VMD_CTL register defines */ ++#define E1000_VT_CTL_IGNORE_MAC (1 << 28) ++#define E1000_VT_CTL_DISABLE_DEF_POOL (1 << 29) ++#define E1000_VT_CTL_VM_REPL_EN (1 << 30) ++ ++/* Per VM Offload register setup */ ++#define E1000_VMOLR_RLPML_MASK 0x00003FFF /* Long Packet Maximum Length mask */ ++#define E1000_VMOLR_LPE 0x00010000 /* Accept Long packet */ ++#define E1000_VMOLR_RSSE 0x00020000 /* Enable RSS */ ++#define E1000_VMOLR_AUPE 0x01000000 /* Accept untagged packets */ ++#define E1000_VMOLR_ROMPE 0x02000000 /* Accept overflow multicast */ ++#define E1000_VMOLR_ROPE 0x04000000 /* Accept overflow unicast */ ++#define E1000_VMOLR_BAM 0x08000000 /* Accept Broadcast packets */ ++#define E1000_VMOLR_MPME 0x10000000 /* Multicast promiscuous mode */ ++#define E1000_VMOLR_STRVLAN 0x40000000 /* Vlan stripping enable */ ++#define E1000_VMOLR_STRCRC 0x80000000 /* CRC stripping enable */ ++ ++#define E1000_DVMOLR_HIDEVLAN 0x20000000 /* Hide vlan enable */ ++#define E1000_DVMOLR_STRVLAN 0x40000000 /* Vlan stripping enable */ ++#define E1000_DVMOLR_STRCRC 0x80000000 /* CRC stripping enable */ ++ ++#define E1000_VLVF_ARRAY_SIZE 32 ++#define E1000_VLVF_VLANID_MASK 0x00000FFF ++#define E1000_VLVF_POOLSEL_SHIFT 12 ++#define E1000_VLVF_POOLSEL_MASK (0xFF << E1000_VLVF_POOLSEL_SHIFT) ++#define E1000_VLVF_LVLAN 0x00100000 ++#define E1000_VLVF_VLANID_ENABLE 0x80000000 ++ ++#define E1000_VMVIR_VLANA_DEFAULT 0x40000000 /* Always use default VLAN */ ++#define E1000_VMVIR_VLANA_NEVER 0x80000000 /* Never insert VLAN tag */ ++ ++#define E1000_IOVCTL 0x05BBC ++#define E1000_IOVCTL_REUSE_VFQ 0x00000001 ++ ++#define E1000_RPLOLR_STRVLAN 0x40000000 ++#define E1000_RPLOLR_STRCRC 0x80000000 ++ ++#define E1000_DTXCTL_8023LL 0x0004 ++#define E1000_DTXCTL_VLAN_ADDED 0x0008 ++#define E1000_DTXCTL_OOS_ENABLE 0x0010 ++#define E1000_DTXCTL_MDP_EN 0x0020 ++#define E1000_DTXCTL_SPOOF_INT 0x0040 ++ ++#define E1000_EEPROM_PCS_AUTONEG_DISABLE_BIT (1 << 14) ++ ++#define ALL_QUEUES 0xFFFF ++ ++/* RX packet buffer size defines */ ++#define E1000_RXPBS_SIZE_MASK_82576 0x0000007F ++void igb_vmdq_set_anti_spoofing_pf(struct e1000_hw *, bool, int); ++void igb_vmdq_set_loopback_pf(struct e1000_hw *, bool); ++void igb_vmdq_set_replication_pf(struct e1000_hw *, bool); ++u16 igb_rxpbs_adjust_82580(u32 data); ++s32 igb_read_emi_reg(struct e1000_hw *, u16 addr, u16 *data); ++s32 igb_set_eee_i350(struct e1000_hw *, bool adv1G, bool adv100M); ++s32 igb_set_eee_i354(struct e1000_hw *, bool adv1G, bool adv100M); ++s32 igb_get_eee_status_i354(struct e1000_hw *hw, bool *status); ++ ++#define E1000_I2C_THERMAL_SENSOR_ADDR 0xF8 ++#define E1000_EMC_INTERNAL_DATA 0x00 ++#define E1000_EMC_INTERNAL_THERM_LIMIT 0x20 ++#define E1000_EMC_DIODE1_DATA 0x01 ++#define E1000_EMC_DIODE1_THERM_LIMIT 0x19 ++#define E1000_EMC_DIODE2_DATA 0x23 ++#define E1000_EMC_DIODE2_THERM_LIMIT 0x1A ++#define E1000_EMC_DIODE3_DATA 0x2A ++#define E1000_EMC_DIODE3_THERM_LIMIT 0x30 ++#endif +--- linux/drivers/xenomai/net/drivers/igb/e1000_phy.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/igb/e1000_phy.h 2021-04-07 16:01:27.496633782 +0800 +@@ -0,0 +1,175 @@ ++/* Intel(R) Gigabit Ethernet Linux driver ++ * Copyright(c) 2007-2014 Intel Corporation. ++ * RTnet port 2009 Vladimir Zapolskiy ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms and conditions of the GNU General Public License, ++ * version 2, as published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ * ++ * You should have received a copy of the GNU General Public License along with ++ * this program; if not, see . ++ * ++ * The full GNU General Public License is included in this distribution in ++ * the file called "COPYING". ++ * ++ * Contact Information: ++ * e1000-devel Mailing List ++ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 ++ */ ++ ++#ifndef _E1000_PHY_H_ ++#define _E1000_PHY_H_ ++ ++enum e1000_ms_type { ++ e1000_ms_hw_default = 0, ++ e1000_ms_force_master, ++ e1000_ms_force_slave, ++ e1000_ms_auto ++}; ++ ++enum e1000_smart_speed { ++ e1000_smart_speed_default = 0, ++ e1000_smart_speed_on, ++ e1000_smart_speed_off ++}; ++ ++s32 igb_check_downshift(struct e1000_hw *hw); ++s32 igb_check_reset_block(struct e1000_hw *hw); ++s32 igb_copper_link_setup_igp(struct e1000_hw *hw); ++s32 igb_copper_link_setup_m88(struct e1000_hw *hw); ++s32 igb_copper_link_setup_m88_gen2(struct e1000_hw *hw); ++s32 igb_phy_force_speed_duplex_igp(struct e1000_hw *hw); ++s32 igb_phy_force_speed_duplex_m88(struct e1000_hw *hw); ++s32 igb_get_cable_length_m88(struct e1000_hw *hw); ++s32 igb_get_cable_length_m88_gen2(struct e1000_hw *hw); ++s32 igb_get_cable_length_igp_2(struct e1000_hw *hw); ++s32 igb_get_phy_id(struct e1000_hw *hw); ++s32 igb_get_phy_info_igp(struct e1000_hw *hw); ++s32 igb_get_phy_info_m88(struct e1000_hw *hw); ++s32 igb_phy_sw_reset(struct e1000_hw *hw); ++s32 igb_phy_hw_reset(struct e1000_hw *hw); ++s32 igb_read_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 *data); ++s32 igb_set_d3_lplu_state(struct e1000_hw *hw, bool active); ++s32 igb_setup_copper_link(struct e1000_hw *hw); ++s32 igb_write_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 data); ++s32 igb_phy_has_link(struct e1000_hw *hw, u32 iterations, ++ u32 usec_interval, bool *success); ++void igb_power_up_phy_copper(struct e1000_hw *hw); ++void igb_power_down_phy_copper(struct e1000_hw *hw); ++s32 igb_phy_init_script_igp3(struct e1000_hw *hw); ++s32 igb_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data); ++s32 igb_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data); ++s32 igb_read_phy_reg_i2c(struct e1000_hw *hw, u32 offset, u16 *data); ++s32 igb_write_phy_reg_i2c(struct e1000_hw *hw, u32 offset, u16 data); ++s32 igb_read_sfp_data_byte(struct e1000_hw *hw, u16 offset, u8 *data); ++s32 igb_copper_link_setup_82580(struct e1000_hw *hw); ++s32 igb_get_phy_info_82580(struct e1000_hw *hw); ++s32 igb_phy_force_speed_duplex_82580(struct e1000_hw *hw); ++s32 igb_get_cable_length_82580(struct e1000_hw *hw); ++s32 igb_read_phy_reg_gs40g(struct e1000_hw *hw, u32 offset, u16 *data); ++s32 igb_write_phy_reg_gs40g(struct e1000_hw *hw, u32 offset, u16 data); ++s32 igb_check_polarity_m88(struct e1000_hw *hw); ++ ++/* IGP01E1000 Specific Registers */ ++#define IGP01E1000_PHY_PORT_CONFIG 0x10 /* Port Config */ ++#define IGP01E1000_PHY_PORT_STATUS 0x11 /* Status */ ++#define IGP01E1000_PHY_PORT_CTRL 0x12 /* Control */ ++#define IGP01E1000_PHY_LINK_HEALTH 0x13 /* PHY Link Health */ ++#define IGP02E1000_PHY_POWER_MGMT 0x19 /* Power Management */ ++#define IGP01E1000_PHY_PAGE_SELECT 0x1F /* Page Select */ ++#define IGP01E1000_PHY_PCS_INIT_REG 0x00B4 ++#define IGP01E1000_PHY_POLARITY_MASK 0x0078 ++#define IGP01E1000_PSCR_AUTO_MDIX 0x1000 ++#define IGP01E1000_PSCR_FORCE_MDI_MDIX 0x2000 /* 0=MDI, 1=MDIX */ ++#define IGP01E1000_PSCFR_SMART_SPEED 0x0080 ++ ++#define I82580_ADDR_REG 16 ++#define I82580_CFG_REG 22 ++#define I82580_CFG_ASSERT_CRS_ON_TX (1 << 15) ++#define I82580_CFG_ENABLE_DOWNSHIFT (3 << 10) /* auto downshift 100/10 */ ++#define I82580_CTRL_REG 23 ++#define I82580_CTRL_DOWNSHIFT_MASK (7 << 10) ++ ++/* 82580 specific PHY registers */ ++#define I82580_PHY_CTRL_2 18 ++#define I82580_PHY_LBK_CTRL 19 ++#define I82580_PHY_STATUS_2 26 ++#define I82580_PHY_DIAG_STATUS 31 ++ ++/* I82580 PHY Status 2 */ ++#define I82580_PHY_STATUS2_REV_POLARITY 0x0400 ++#define I82580_PHY_STATUS2_MDIX 0x0800 ++#define I82580_PHY_STATUS2_SPEED_MASK 0x0300 ++#define I82580_PHY_STATUS2_SPEED_1000MBPS 0x0200 ++#define I82580_PHY_STATUS2_SPEED_100MBPS 0x0100 ++ ++/* I82580 PHY Control 2 */ ++#define I82580_PHY_CTRL2_MANUAL_MDIX 0x0200 ++#define I82580_PHY_CTRL2_AUTO_MDI_MDIX 0x0400 ++#define I82580_PHY_CTRL2_MDIX_CFG_MASK 0x0600 ++ ++/* I82580 PHY Diagnostics Status */ ++#define I82580_DSTATUS_CABLE_LENGTH 0x03FC ++#define I82580_DSTATUS_CABLE_LENGTH_SHIFT 2 ++ ++/* 82580 PHY Power Management */ ++#define E1000_82580_PHY_POWER_MGMT 0xE14 ++#define E1000_82580_PM_SPD 0x0001 /* Smart Power Down */ ++#define E1000_82580_PM_D0_LPLU 0x0002 /* For D0a states */ ++#define E1000_82580_PM_D3_LPLU 0x0004 /* For all other states */ ++#define E1000_82580_PM_GO_LINKD 0x0020 /* Go Link Disconnect */ ++ ++/* Enable flexible speed on link-up */ ++#define IGP02E1000_PM_D0_LPLU 0x0002 /* For D0a states */ ++#define IGP02E1000_PM_D3_LPLU 0x0004 /* For all other states */ ++#define IGP01E1000_PLHR_SS_DOWNGRADE 0x8000 ++#define IGP01E1000_PSSR_POLARITY_REVERSED 0x0002 ++#define IGP01E1000_PSSR_MDIX 0x0800 ++#define IGP01E1000_PSSR_SPEED_MASK 0xC000 ++#define IGP01E1000_PSSR_SPEED_1000MBPS 0xC000 ++#define IGP02E1000_PHY_CHANNEL_NUM 4 ++#define IGP02E1000_PHY_AGC_A 0x11B1 ++#define IGP02E1000_PHY_AGC_B 0x12B1 ++#define IGP02E1000_PHY_AGC_C 0x14B1 ++#define IGP02E1000_PHY_AGC_D 0x18B1 ++#define IGP02E1000_AGC_LENGTH_SHIFT 9 /* Course - 15:13, Fine - 12:9 */ ++#define IGP02E1000_AGC_LENGTH_MASK 0x7F ++#define IGP02E1000_AGC_RANGE 15 ++ ++#define E1000_CABLE_LENGTH_UNDEFINED 0xFF ++ ++/* GS40G - I210 PHY defines */ ++#define GS40G_PAGE_SELECT 0x16 ++#define GS40G_PAGE_SHIFT 16 ++#define GS40G_OFFSET_MASK 0xFFFF ++#define GS40G_PAGE_2 0x20000 ++#define GS40G_MAC_REG2 0x15 ++#define GS40G_MAC_LB 0x4140 ++#define GS40G_MAC_SPEED_1G 0X0006 ++#define GS40G_COPPER_SPEC 0x0010 ++#define GS40G_LINE_LB 0x4000 ++ ++/* SFP modules ID memory locations */ ++#define E1000_SFF_IDENTIFIER_OFFSET 0x00 ++#define E1000_SFF_IDENTIFIER_SFF 0x02 ++#define E1000_SFF_IDENTIFIER_SFP 0x03 ++ ++#define E1000_SFF_ETH_FLAGS_OFFSET 0x06 ++/* Flags for SFP modules compatible with ETH up to 1Gb */ ++struct e1000_sfp_flags { ++ u8 e1000_base_sx:1; ++ u8 e1000_base_lx:1; ++ u8 e1000_base_cx:1; ++ u8 e1000_base_t:1; ++ u8 e100_base_lx:1; ++ u8 e100_base_fx:1; ++ u8 e10_base_bx10:1; ++ u8 e10_base_px:1; ++}; ++ ++#endif +--- linux/drivers/xenomai/net/drivers/igb/e1000_nvm.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/igb/e1000_nvm.h 2021-04-07 16:01:27.491633789 +0800 +@@ -0,0 +1,57 @@ ++/* Intel(R) Gigabit Ethernet Linux driver ++ * Copyright(c) 2007-2014 Intel Corporation. ++ * RTnet port 2009 Vladimir Zapolskiy ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms and conditions of the GNU General Public License, ++ * version 2, as published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ * ++ * You should have received a copy of the GNU General Public License along with ++ * this program; if not, see . ++ * ++ * The full GNU General Public License is included in this distribution in ++ * the file called "COPYING". ++ * ++ * Contact Information: ++ * e1000-devel Mailing List ++ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 ++ */ ++ ++#ifndef _E1000_NVM_H_ ++#define _E1000_NVM_H_ ++ ++s32 igb_acquire_nvm(struct e1000_hw *hw); ++void igb_release_nvm(struct e1000_hw *hw); ++s32 igb_read_mac_addr(struct e1000_hw *hw); ++s32 igb_read_part_num(struct e1000_hw *hw, u32 *part_num); ++s32 igb_read_part_string(struct e1000_hw *hw, u8 *part_num, ++ u32 part_num_size); ++s32 igb_read_nvm_eerd(struct e1000_hw *hw, u16 offset, u16 words, u16 *data); ++s32 igb_read_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, u16 *data); ++s32 igb_write_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, u16 *data); ++s32 igb_validate_nvm_checksum(struct e1000_hw *hw); ++s32 igb_update_nvm_checksum(struct e1000_hw *hw); ++ ++struct e1000_fw_version { ++ u32 etrack_id; ++ u16 eep_major; ++ u16 eep_minor; ++ u16 eep_build; ++ ++ u8 invm_major; ++ u8 invm_minor; ++ u8 invm_img_type; ++ ++ bool or_valid; ++ u16 or_major; ++ u16 or_build; ++ u16 or_patch; ++}; ++void igb_get_fw_version(struct e1000_hw *hw, struct e1000_fw_version *fw_vers); ++ ++#endif +--- linux/drivers/xenomai/net/drivers/igb/e1000_82575.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/igb/e1000_82575.c 2021-04-07 16:01:27.486633796 +0800 +@@ -0,0 +1,2889 @@ ++/* Intel(R) Gigabit Ethernet Linux driver ++ * Copyright(c) 2007-2015 Intel Corporation. ++ * RTnet port 2009 Vladimir Zapolskiy ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms and conditions of the GNU General Public License, ++ * version 2, as published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ * ++ * You should have received a copy of the GNU General Public License along with ++ * this program; if not, see . ++ * ++ * The full GNU General Public License is included in this distribution in ++ * the file called "COPYING". ++ * ++ * Contact Information: ++ * e1000-devel Mailing List ++ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 ++ */ ++ ++/* e1000_82575 ++ * e1000_82576 ++ */ ++ ++#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt ++ ++#include ++#include ++#include ++ ++#include "e1000_mac.h" ++#include "e1000_82575.h" ++#include "e1000_i210.h" ++ ++static s32 igb_get_invariants_82575(struct e1000_hw *); ++static s32 igb_acquire_phy_82575(struct e1000_hw *); ++static void igb_release_phy_82575(struct e1000_hw *); ++static s32 igb_acquire_nvm_82575(struct e1000_hw *); ++static void igb_release_nvm_82575(struct e1000_hw *); ++static s32 igb_check_for_link_82575(struct e1000_hw *); ++static s32 igb_get_cfg_done_82575(struct e1000_hw *); ++static s32 igb_init_hw_82575(struct e1000_hw *); ++static s32 igb_phy_hw_reset_sgmii_82575(struct e1000_hw *); ++static s32 igb_read_phy_reg_sgmii_82575(struct e1000_hw *, u32, u16 *); ++static s32 igb_read_phy_reg_82580(struct e1000_hw *, u32, u16 *); ++static s32 igb_write_phy_reg_82580(struct e1000_hw *, u32, u16); ++static s32 igb_reset_hw_82575(struct e1000_hw *); ++static s32 igb_reset_hw_82580(struct e1000_hw *); ++static s32 igb_set_d0_lplu_state_82575(struct e1000_hw *, bool); ++static s32 igb_set_d0_lplu_state_82580(struct e1000_hw *, bool); ++static s32 igb_set_d3_lplu_state_82580(struct e1000_hw *, bool); ++static s32 igb_setup_copper_link_82575(struct e1000_hw *); ++static s32 igb_setup_serdes_link_82575(struct e1000_hw *); ++static s32 igb_write_phy_reg_sgmii_82575(struct e1000_hw *, u32, u16); ++static void igb_clear_hw_cntrs_82575(struct e1000_hw *); ++static s32 igb_acquire_swfw_sync_82575(struct e1000_hw *, u16); ++static s32 igb_get_pcs_speed_and_duplex_82575(struct e1000_hw *, u16 *, ++ u16 *); ++static s32 igb_get_phy_id_82575(struct e1000_hw *); ++static void igb_release_swfw_sync_82575(struct e1000_hw *, u16); ++static bool igb_sgmii_active_82575(struct e1000_hw *); ++static s32 igb_reset_init_script_82575(struct e1000_hw *); ++static s32 igb_read_mac_addr_82575(struct e1000_hw *); ++static s32 igb_set_pcie_completion_timeout(struct e1000_hw *hw); ++static s32 igb_reset_mdicnfg_82580(struct e1000_hw *hw); ++static s32 igb_validate_nvm_checksum_82580(struct e1000_hw *hw); ++static s32 igb_update_nvm_checksum_82580(struct e1000_hw *hw); ++static s32 igb_validate_nvm_checksum_i350(struct e1000_hw *hw); ++static s32 igb_update_nvm_checksum_i350(struct e1000_hw *hw); ++static const u16 e1000_82580_rxpbs_table[] = { ++ 36, 72, 144, 1, 2, 4, 8, 16, 35, 70, 140 }; ++ ++/** ++ * igb_sgmii_uses_mdio_82575 - Determine if I2C pins are for external MDIO ++ * @hw: pointer to the HW structure ++ * ++ * Called to determine if the I2C pins are being used for I2C or as an ++ * external MDIO interface since the two options are mutually exclusive. ++ **/ ++static bool igb_sgmii_uses_mdio_82575(struct e1000_hw *hw) ++{ ++ u32 reg = 0; ++ bool ext_mdio = false; ++ ++ switch (hw->mac.type) { ++ case e1000_82575: ++ case e1000_82576: ++ reg = rd32(E1000_MDIC); ++ ext_mdio = !!(reg & E1000_MDIC_DEST); ++ break; ++ case e1000_82580: ++ case e1000_i350: ++ case e1000_i354: ++ case e1000_i210: ++ case e1000_i211: ++ reg = rd32(E1000_MDICNFG); ++ ext_mdio = !!(reg & E1000_MDICNFG_EXT_MDIO); ++ break; ++ default: ++ break; ++ } ++ return ext_mdio; ++} ++ ++/** ++ * igb_check_for_link_media_swap - Check which M88E1112 interface linked ++ * @hw: pointer to the HW structure ++ * ++ * Poll the M88E1112 interfaces to see which interface achieved link. ++ */ ++static s32 igb_check_for_link_media_swap(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u16 data; ++ u8 port = 0; ++ ++ /* Check the copper medium. */ ++ ret_val = phy->ops.write_reg(hw, E1000_M88E1112_PAGE_ADDR, 0); ++ if (ret_val) ++ return ret_val; ++ ++ ret_val = phy->ops.read_reg(hw, E1000_M88E1112_STATUS, &data); ++ if (ret_val) ++ return ret_val; ++ ++ if (data & E1000_M88E1112_STATUS_LINK) ++ port = E1000_MEDIA_PORT_COPPER; ++ ++ /* Check the other medium. */ ++ ret_val = phy->ops.write_reg(hw, E1000_M88E1112_PAGE_ADDR, 1); ++ if (ret_val) ++ return ret_val; ++ ++ ret_val = phy->ops.read_reg(hw, E1000_M88E1112_STATUS, &data); ++ if (ret_val) ++ return ret_val; ++ ++ /* reset page to 0 */ ++ ret_val = phy->ops.write_reg(hw, E1000_M88E1112_PAGE_ADDR, 0); ++ if (ret_val) ++ return ret_val; ++ ++ if (data & E1000_M88E1112_STATUS_LINK) ++ port = E1000_MEDIA_PORT_OTHER; ++ ++ /* Determine if a swap needs to happen. */ ++ if (port && (hw->dev_spec._82575.media_port != port)) { ++ hw->dev_spec._82575.media_port = port; ++ hw->dev_spec._82575.media_changed = true; ++ } else { ++ ret_val = igb_check_for_link_82575(hw); ++ } ++ ++ return 0; ++} ++ ++/** ++ * igb_init_phy_params_82575 - Init PHY func ptrs. ++ * @hw: pointer to the HW structure ++ **/ ++static s32 igb_init_phy_params_82575(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val = 0; ++ u32 ctrl_ext; ++ ++ if (hw->phy.media_type != e1000_media_type_copper) { ++ phy->type = e1000_phy_none; ++ goto out; ++ } ++ ++ phy->autoneg_mask = AUTONEG_ADVERTISE_SPEED_DEFAULT; ++ phy->reset_delay_us = 100; ++ ++ ctrl_ext = rd32(E1000_CTRL_EXT); ++ ++ if (igb_sgmii_active_82575(hw)) { ++ phy->ops.reset = igb_phy_hw_reset_sgmii_82575; ++ ctrl_ext |= E1000_CTRL_I2C_ENA; ++ } else { ++ phy->ops.reset = igb_phy_hw_reset; ++ ctrl_ext &= ~E1000_CTRL_I2C_ENA; ++ } ++ ++ wr32(E1000_CTRL_EXT, ctrl_ext); ++ igb_reset_mdicnfg_82580(hw); ++ ++ if (igb_sgmii_active_82575(hw) && !igb_sgmii_uses_mdio_82575(hw)) { ++ phy->ops.read_reg = igb_read_phy_reg_sgmii_82575; ++ phy->ops.write_reg = igb_write_phy_reg_sgmii_82575; ++ } else { ++ switch (hw->mac.type) { ++ case e1000_82580: ++ case e1000_i350: ++ case e1000_i354: ++ phy->ops.read_reg = igb_read_phy_reg_82580; ++ phy->ops.write_reg = igb_write_phy_reg_82580; ++ break; ++ case e1000_i210: ++ case e1000_i211: ++ phy->ops.read_reg = igb_read_phy_reg_gs40g; ++ phy->ops.write_reg = igb_write_phy_reg_gs40g; ++ break; ++ default: ++ phy->ops.read_reg = igb_read_phy_reg_igp; ++ phy->ops.write_reg = igb_write_phy_reg_igp; ++ } ++ } ++ ++ /* set lan id */ ++ hw->bus.func = (rd32(E1000_STATUS) & E1000_STATUS_FUNC_MASK) >> ++ E1000_STATUS_FUNC_SHIFT; ++ ++ /* Set phy->phy_addr and phy->id. */ ++ ret_val = igb_get_phy_id_82575(hw); ++ if (ret_val) ++ return ret_val; ++ ++ /* Verify phy id and set remaining function pointers */ ++ switch (phy->id) { ++ case M88E1543_E_PHY_ID: ++ case I347AT4_E_PHY_ID: ++ case M88E1112_E_PHY_ID: ++ case M88E1111_I_PHY_ID: ++ phy->type = e1000_phy_m88; ++ phy->ops.check_polarity = igb_check_polarity_m88; ++ phy->ops.get_phy_info = igb_get_phy_info_m88; ++ if (phy->id != M88E1111_I_PHY_ID) ++ phy->ops.get_cable_length = ++ igb_get_cable_length_m88_gen2; ++ else ++ phy->ops.get_cable_length = igb_get_cable_length_m88; ++ phy->ops.force_speed_duplex = igb_phy_force_speed_duplex_m88; ++ /* Check if this PHY is confgured for media swap. */ ++ if (phy->id == M88E1112_E_PHY_ID) { ++ u16 data; ++ ++ ret_val = phy->ops.write_reg(hw, ++ E1000_M88E1112_PAGE_ADDR, ++ 2); ++ if (ret_val) ++ goto out; ++ ++ ret_val = phy->ops.read_reg(hw, ++ E1000_M88E1112_MAC_CTRL_1, ++ &data); ++ if (ret_val) ++ goto out; ++ ++ data = (data & E1000_M88E1112_MAC_CTRL_1_MODE_MASK) >> ++ E1000_M88E1112_MAC_CTRL_1_MODE_SHIFT; ++ if (data == E1000_M88E1112_AUTO_COPPER_SGMII || ++ data == E1000_M88E1112_AUTO_COPPER_BASEX) ++ hw->mac.ops.check_for_link = ++ igb_check_for_link_media_swap; ++ } ++ break; ++ case IGP03E1000_E_PHY_ID: ++ phy->type = e1000_phy_igp_3; ++ phy->ops.get_phy_info = igb_get_phy_info_igp; ++ phy->ops.get_cable_length = igb_get_cable_length_igp_2; ++ phy->ops.force_speed_duplex = igb_phy_force_speed_duplex_igp; ++ phy->ops.set_d0_lplu_state = igb_set_d0_lplu_state_82575; ++ phy->ops.set_d3_lplu_state = igb_set_d3_lplu_state; ++ break; ++ case I82580_I_PHY_ID: ++ case I350_I_PHY_ID: ++ phy->type = e1000_phy_82580; ++ phy->ops.force_speed_duplex = ++ igb_phy_force_speed_duplex_82580; ++ phy->ops.get_cable_length = igb_get_cable_length_82580; ++ phy->ops.get_phy_info = igb_get_phy_info_82580; ++ phy->ops.set_d0_lplu_state = igb_set_d0_lplu_state_82580; ++ phy->ops.set_d3_lplu_state = igb_set_d3_lplu_state_82580; ++ break; ++ case I210_I_PHY_ID: ++ phy->type = e1000_phy_i210; ++ phy->ops.check_polarity = igb_check_polarity_m88; ++ phy->ops.get_phy_info = igb_get_phy_info_m88; ++ phy->ops.get_cable_length = igb_get_cable_length_m88_gen2; ++ phy->ops.set_d0_lplu_state = igb_set_d0_lplu_state_82580; ++ phy->ops.set_d3_lplu_state = igb_set_d3_lplu_state_82580; ++ phy->ops.force_speed_duplex = igb_phy_force_speed_duplex_m88; ++ break; ++ default: ++ ret_val = -E1000_ERR_PHY; ++ goto out; ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_init_nvm_params_82575 - Init NVM func ptrs. ++ * @hw: pointer to the HW structure ++ **/ ++static s32 igb_init_nvm_params_82575(struct e1000_hw *hw) ++{ ++ struct e1000_nvm_info *nvm = &hw->nvm; ++ u32 eecd = rd32(E1000_EECD); ++ u16 size; ++ ++ size = (u16)((eecd & E1000_EECD_SIZE_EX_MASK) >> ++ E1000_EECD_SIZE_EX_SHIFT); ++ ++ /* Added to a constant, "size" becomes the left-shift value ++ * for setting word_size. ++ */ ++ size += NVM_WORD_SIZE_BASE_SHIFT; ++ ++ /* Just in case size is out of range, cap it to the largest ++ * EEPROM size supported ++ */ ++ if (size > 15) ++ size = 15; ++ ++ nvm->word_size = 1 << size; ++ nvm->opcode_bits = 8; ++ nvm->delay_usec = 1; ++ ++ switch (nvm->override) { ++ case e1000_nvm_override_spi_large: ++ nvm->page_size = 32; ++ nvm->address_bits = 16; ++ break; ++ case e1000_nvm_override_spi_small: ++ nvm->page_size = 8; ++ nvm->address_bits = 8; ++ break; ++ default: ++ nvm->page_size = eecd & E1000_EECD_ADDR_BITS ? 32 : 8; ++ nvm->address_bits = eecd & E1000_EECD_ADDR_BITS ? ++ 16 : 8; ++ break; ++ } ++ if (nvm->word_size == (1 << 15)) ++ nvm->page_size = 128; ++ ++ nvm->type = e1000_nvm_eeprom_spi; ++ ++ /* NVM Function Pointers */ ++ nvm->ops.acquire = igb_acquire_nvm_82575; ++ nvm->ops.release = igb_release_nvm_82575; ++ nvm->ops.write = igb_write_nvm_spi; ++ nvm->ops.validate = igb_validate_nvm_checksum; ++ nvm->ops.update = igb_update_nvm_checksum; ++ if (nvm->word_size < (1 << 15)) ++ nvm->ops.read = igb_read_nvm_eerd; ++ else ++ nvm->ops.read = igb_read_nvm_spi; ++ ++ /* override generic family function pointers for specific descendants */ ++ switch (hw->mac.type) { ++ case e1000_82580: ++ nvm->ops.validate = igb_validate_nvm_checksum_82580; ++ nvm->ops.update = igb_update_nvm_checksum_82580; ++ break; ++ case e1000_i354: ++ case e1000_i350: ++ nvm->ops.validate = igb_validate_nvm_checksum_i350; ++ nvm->ops.update = igb_update_nvm_checksum_i350; ++ break; ++ default: ++ break; ++ } ++ ++ return 0; ++} ++ ++/** ++ * igb_init_mac_params_82575 - Init MAC func ptrs. ++ * @hw: pointer to the HW structure ++ **/ ++static s32 igb_init_mac_params_82575(struct e1000_hw *hw) ++{ ++ struct e1000_mac_info *mac = &hw->mac; ++ struct e1000_dev_spec_82575 *dev_spec = &hw->dev_spec._82575; ++ ++ /* Set mta register count */ ++ mac->mta_reg_count = 128; ++ /* Set rar entry count */ ++ switch (mac->type) { ++ case e1000_82576: ++ mac->rar_entry_count = E1000_RAR_ENTRIES_82576; ++ break; ++ case e1000_82580: ++ mac->rar_entry_count = E1000_RAR_ENTRIES_82580; ++ break; ++ case e1000_i350: ++ case e1000_i354: ++ mac->rar_entry_count = E1000_RAR_ENTRIES_I350; ++ break; ++ default: ++ mac->rar_entry_count = E1000_RAR_ENTRIES_82575; ++ break; ++ } ++ /* reset */ ++ if (mac->type >= e1000_82580) ++ mac->ops.reset_hw = igb_reset_hw_82580; ++ else ++ mac->ops.reset_hw = igb_reset_hw_82575; ++ ++ if (mac->type >= e1000_i210) { ++ mac->ops.acquire_swfw_sync = igb_acquire_swfw_sync_i210; ++ mac->ops.release_swfw_sync = igb_release_swfw_sync_i210; ++ ++ } else { ++ mac->ops.acquire_swfw_sync = igb_acquire_swfw_sync_82575; ++ mac->ops.release_swfw_sync = igb_release_swfw_sync_82575; ++ } ++ ++ /* Set if part includes ASF firmware */ ++ mac->asf_firmware_present = true; ++ /* Set if manageability features are enabled. */ ++ mac->arc_subsystem_valid = ++ (rd32(E1000_FWSM) & E1000_FWSM_MODE_MASK) ++ ? true : false; ++ /* enable EEE on i350 parts and later parts */ ++ if (mac->type >= e1000_i350) ++ dev_spec->eee_disable = false; ++ else ++ dev_spec->eee_disable = true; ++ /* Allow a single clear of the SW semaphore on I210 and newer */ ++ if (mac->type >= e1000_i210) ++ dev_spec->clear_semaphore_once = true; ++ /* physical interface link setup */ ++ mac->ops.setup_physical_interface = ++ (hw->phy.media_type == e1000_media_type_copper) ++ ? igb_setup_copper_link_82575 ++ : igb_setup_serdes_link_82575; ++ ++ if (mac->type == e1000_82580) { ++ switch (hw->device_id) { ++ /* feature not supported on these id's */ ++ case E1000_DEV_ID_DH89XXCC_SGMII: ++ case E1000_DEV_ID_DH89XXCC_SERDES: ++ case E1000_DEV_ID_DH89XXCC_BACKPLANE: ++ case E1000_DEV_ID_DH89XXCC_SFP: ++ break; ++ default: ++ hw->dev_spec._82575.mas_capable = true; ++ break; ++ } ++ } ++ return 0; ++} ++ ++/** ++ * igb_set_sfp_media_type_82575 - derives SFP module media type. ++ * @hw: pointer to the HW structure ++ * ++ * The media type is chosen based on SFP module. ++ * compatibility flags retrieved from SFP ID EEPROM. ++ **/ ++static s32 igb_set_sfp_media_type_82575(struct e1000_hw *hw) ++{ ++ s32 ret_val = E1000_ERR_CONFIG; ++ u32 ctrl_ext = 0; ++ struct e1000_dev_spec_82575 *dev_spec = &hw->dev_spec._82575; ++ struct e1000_sfp_flags *eth_flags = &dev_spec->eth_flags; ++ u8 tranceiver_type = 0; ++ s32 timeout = 3; ++ ++ /* Turn I2C interface ON and power on sfp cage */ ++ ctrl_ext = rd32(E1000_CTRL_EXT); ++ ctrl_ext &= ~E1000_CTRL_EXT_SDP3_DATA; ++ wr32(E1000_CTRL_EXT, ctrl_ext | E1000_CTRL_I2C_ENA); ++ ++ wrfl(); ++ ++ /* Read SFP module data */ ++ while (timeout) { ++ ret_val = igb_read_sfp_data_byte(hw, ++ E1000_I2CCMD_SFP_DATA_ADDR(E1000_SFF_IDENTIFIER_OFFSET), ++ &tranceiver_type); ++ if (ret_val == 0) ++ break; ++ msleep(100); ++ timeout--; ++ } ++ if (ret_val != 0) ++ goto out; ++ ++ ret_val = igb_read_sfp_data_byte(hw, ++ E1000_I2CCMD_SFP_DATA_ADDR(E1000_SFF_ETH_FLAGS_OFFSET), ++ (u8 *)eth_flags); ++ if (ret_val != 0) ++ goto out; ++ ++ /* Check if there is some SFP module plugged and powered */ ++ if ((tranceiver_type == E1000_SFF_IDENTIFIER_SFP) || ++ (tranceiver_type == E1000_SFF_IDENTIFIER_SFF)) { ++ dev_spec->module_plugged = true; ++ if (eth_flags->e1000_base_lx || eth_flags->e1000_base_sx) { ++ hw->phy.media_type = e1000_media_type_internal_serdes; ++ } else if (eth_flags->e100_base_fx) { ++ dev_spec->sgmii_active = true; ++ hw->phy.media_type = e1000_media_type_internal_serdes; ++ } else if (eth_flags->e1000_base_t) { ++ dev_spec->sgmii_active = true; ++ hw->phy.media_type = e1000_media_type_copper; ++ } else { ++ hw->phy.media_type = e1000_media_type_unknown; ++ hw_dbg("PHY module has not been recognized\n"); ++ goto out; ++ } ++ } else { ++ hw->phy.media_type = e1000_media_type_unknown; ++ } ++ ret_val = 0; ++out: ++ /* Restore I2C interface setting */ ++ wr32(E1000_CTRL_EXT, ctrl_ext); ++ return ret_val; ++} ++ ++static s32 igb_get_invariants_82575(struct e1000_hw *hw) ++{ ++ struct e1000_mac_info *mac = &hw->mac; ++ struct e1000_dev_spec_82575 *dev_spec = &hw->dev_spec._82575; ++ s32 ret_val; ++ u32 ctrl_ext = 0; ++ u32 link_mode = 0; ++ ++ switch (hw->device_id) { ++ case E1000_DEV_ID_82575EB_COPPER: ++ case E1000_DEV_ID_82575EB_FIBER_SERDES: ++ case E1000_DEV_ID_82575GB_QUAD_COPPER: ++ mac->type = e1000_82575; ++ break; ++ case E1000_DEV_ID_82576: ++ case E1000_DEV_ID_82576_NS: ++ case E1000_DEV_ID_82576_NS_SERDES: ++ case E1000_DEV_ID_82576_FIBER: ++ case E1000_DEV_ID_82576_SERDES: ++ case E1000_DEV_ID_82576_QUAD_COPPER: ++ case E1000_DEV_ID_82576_QUAD_COPPER_ET2: ++ case E1000_DEV_ID_82576_SERDES_QUAD: ++ mac->type = e1000_82576; ++ break; ++ case E1000_DEV_ID_82580_COPPER: ++ case E1000_DEV_ID_82580_FIBER: ++ case E1000_DEV_ID_82580_QUAD_FIBER: ++ case E1000_DEV_ID_82580_SERDES: ++ case E1000_DEV_ID_82580_SGMII: ++ case E1000_DEV_ID_82580_COPPER_DUAL: ++ case E1000_DEV_ID_DH89XXCC_SGMII: ++ case E1000_DEV_ID_DH89XXCC_SERDES: ++ case E1000_DEV_ID_DH89XXCC_BACKPLANE: ++ case E1000_DEV_ID_DH89XXCC_SFP: ++ mac->type = e1000_82580; ++ break; ++ case E1000_DEV_ID_I350_COPPER: ++ case E1000_DEV_ID_I350_FIBER: ++ case E1000_DEV_ID_I350_SERDES: ++ case E1000_DEV_ID_I350_SGMII: ++ mac->type = e1000_i350; ++ break; ++ case E1000_DEV_ID_I210_COPPER: ++ case E1000_DEV_ID_I210_FIBER: ++ case E1000_DEV_ID_I210_SERDES: ++ case E1000_DEV_ID_I210_SGMII: ++ case E1000_DEV_ID_I210_COPPER_FLASHLESS: ++ case E1000_DEV_ID_I210_SERDES_FLASHLESS: ++ mac->type = e1000_i210; ++ break; ++ case E1000_DEV_ID_I211_COPPER: ++ mac->type = e1000_i211; ++ break; ++ case E1000_DEV_ID_I354_BACKPLANE_1GBPS: ++ case E1000_DEV_ID_I354_SGMII: ++ case E1000_DEV_ID_I354_BACKPLANE_2_5GBPS: ++ mac->type = e1000_i354; ++ break; ++ default: ++ return -E1000_ERR_MAC_INIT; ++ } ++ ++ /* Set media type */ ++ /* The 82575 uses bits 22:23 for link mode. The mode can be changed ++ * based on the EEPROM. We cannot rely upon device ID. There ++ * is no distinguishable difference between fiber and internal ++ * SerDes mode on the 82575. There can be an external PHY attached ++ * on the SGMII interface. For this, we'll set sgmii_active to true. ++ */ ++ hw->phy.media_type = e1000_media_type_copper; ++ dev_spec->sgmii_active = false; ++ dev_spec->module_plugged = false; ++ ++ ctrl_ext = rd32(E1000_CTRL_EXT); ++ ++ link_mode = ctrl_ext & E1000_CTRL_EXT_LINK_MODE_MASK; ++ switch (link_mode) { ++ case E1000_CTRL_EXT_LINK_MODE_1000BASE_KX: ++ hw->phy.media_type = e1000_media_type_internal_serdes; ++ break; ++ case E1000_CTRL_EXT_LINK_MODE_SGMII: ++ /* Get phy control interface type set (MDIO vs. I2C)*/ ++ if (igb_sgmii_uses_mdio_82575(hw)) { ++ hw->phy.media_type = e1000_media_type_copper; ++ dev_spec->sgmii_active = true; ++ break; ++ } ++ /* fall through for I2C based SGMII */ ++ case E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES: ++ /* read media type from SFP EEPROM */ ++ ret_val = igb_set_sfp_media_type_82575(hw); ++ if ((ret_val != 0) || ++ (hw->phy.media_type == e1000_media_type_unknown)) { ++ /* If media type was not identified then return media ++ * type defined by the CTRL_EXT settings. ++ */ ++ hw->phy.media_type = e1000_media_type_internal_serdes; ++ ++ if (link_mode == E1000_CTRL_EXT_LINK_MODE_SGMII) { ++ hw->phy.media_type = e1000_media_type_copper; ++ dev_spec->sgmii_active = true; ++ } ++ ++ break; ++ } ++ ++ /* do not change link mode for 100BaseFX */ ++ if (dev_spec->eth_flags.e100_base_fx) ++ break; ++ ++ /* change current link mode setting */ ++ ctrl_ext &= ~E1000_CTRL_EXT_LINK_MODE_MASK; ++ ++ if (hw->phy.media_type == e1000_media_type_copper) ++ ctrl_ext |= E1000_CTRL_EXT_LINK_MODE_SGMII; ++ else ++ ctrl_ext |= E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES; ++ ++ wr32(E1000_CTRL_EXT, ctrl_ext); ++ ++ break; ++ default: ++ break; ++ } ++ ++ /* mac initialization and operations */ ++ ret_val = igb_init_mac_params_82575(hw); ++ if (ret_val) ++ goto out; ++ ++ /* NVM initialization */ ++ ret_val = igb_init_nvm_params_82575(hw); ++ switch (hw->mac.type) { ++ case e1000_i210: ++ case e1000_i211: ++ ret_val = igb_init_nvm_params_i210(hw); ++ break; ++ default: ++ break; ++ } ++ ++ if (ret_val) ++ goto out; ++ ++ /* if part supports SR-IOV then initialize mailbox parameters */ ++ switch (mac->type) { ++ case e1000_82576: ++ case e1000_i350: ++ igb_init_mbx_params_pf(hw); ++ break; ++ default: ++ break; ++ } ++ ++ /* setup PHY parameters */ ++ ret_val = igb_init_phy_params_82575(hw); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_acquire_phy_82575 - Acquire rights to access PHY ++ * @hw: pointer to the HW structure ++ * ++ * Acquire access rights to the correct PHY. This is a ++ * function pointer entry point called by the api module. ++ **/ ++static s32 igb_acquire_phy_82575(struct e1000_hw *hw) ++{ ++ u16 mask = E1000_SWFW_PHY0_SM; ++ ++ if (hw->bus.func == E1000_FUNC_1) ++ mask = E1000_SWFW_PHY1_SM; ++ else if (hw->bus.func == E1000_FUNC_2) ++ mask = E1000_SWFW_PHY2_SM; ++ else if (hw->bus.func == E1000_FUNC_3) ++ mask = E1000_SWFW_PHY3_SM; ++ ++ return hw->mac.ops.acquire_swfw_sync(hw, mask); ++} ++ ++/** ++ * igb_release_phy_82575 - Release rights to access PHY ++ * @hw: pointer to the HW structure ++ * ++ * A wrapper to release access rights to the correct PHY. This is a ++ * function pointer entry point called by the api module. ++ **/ ++static void igb_release_phy_82575(struct e1000_hw *hw) ++{ ++ u16 mask = E1000_SWFW_PHY0_SM; ++ ++ if (hw->bus.func == E1000_FUNC_1) ++ mask = E1000_SWFW_PHY1_SM; ++ else if (hw->bus.func == E1000_FUNC_2) ++ mask = E1000_SWFW_PHY2_SM; ++ else if (hw->bus.func == E1000_FUNC_3) ++ mask = E1000_SWFW_PHY3_SM; ++ ++ hw->mac.ops.release_swfw_sync(hw, mask); ++} ++ ++/** ++ * igb_read_phy_reg_sgmii_82575 - Read PHY register using sgmii ++ * @hw: pointer to the HW structure ++ * @offset: register offset to be read ++ * @data: pointer to the read data ++ * ++ * Reads the PHY register at offset using the serial gigabit media independent ++ * interface and stores the retrieved information in data. ++ **/ ++static s32 igb_read_phy_reg_sgmii_82575(struct e1000_hw *hw, u32 offset, ++ u16 *data) ++{ ++ s32 ret_val = -E1000_ERR_PARAM; ++ ++ if (offset > E1000_MAX_SGMII_PHY_REG_ADDR) { ++ hw_dbg("PHY Address %u is out of range\n", offset); ++ goto out; ++ } ++ ++ ret_val = hw->phy.ops.acquire(hw); ++ if (ret_val) ++ goto out; ++ ++ ret_val = igb_read_phy_reg_i2c(hw, offset, data); ++ ++ hw->phy.ops.release(hw); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_write_phy_reg_sgmii_82575 - Write PHY register using sgmii ++ * @hw: pointer to the HW structure ++ * @offset: register offset to write to ++ * @data: data to write at register offset ++ * ++ * Writes the data to PHY register at the offset using the serial gigabit ++ * media independent interface. ++ **/ ++static s32 igb_write_phy_reg_sgmii_82575(struct e1000_hw *hw, u32 offset, ++ u16 data) ++{ ++ s32 ret_val = -E1000_ERR_PARAM; ++ ++ ++ if (offset > E1000_MAX_SGMII_PHY_REG_ADDR) { ++ hw_dbg("PHY Address %d is out of range\n", offset); ++ goto out; ++ } ++ ++ ret_val = hw->phy.ops.acquire(hw); ++ if (ret_val) ++ goto out; ++ ++ ret_val = igb_write_phy_reg_i2c(hw, offset, data); ++ ++ hw->phy.ops.release(hw); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_get_phy_id_82575 - Retrieve PHY addr and id ++ * @hw: pointer to the HW structure ++ * ++ * Retrieves the PHY address and ID for both PHY's which do and do not use ++ * sgmi interface. ++ **/ ++static s32 igb_get_phy_id_82575(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val = 0; ++ u16 phy_id; ++ u32 ctrl_ext; ++ u32 mdic; ++ ++ /* Extra read required for some PHY's on i354 */ ++ if (hw->mac.type == e1000_i354) ++ igb_get_phy_id(hw); ++ ++ /* For SGMII PHYs, we try the list of possible addresses until ++ * we find one that works. For non-SGMII PHYs ++ * (e.g. integrated copper PHYs), an address of 1 should ++ * work. The result of this function should mean phy->phy_addr ++ * and phy->id are set correctly. ++ */ ++ if (!(igb_sgmii_active_82575(hw))) { ++ phy->addr = 1; ++ ret_val = igb_get_phy_id(hw); ++ goto out; ++ } ++ ++ if (igb_sgmii_uses_mdio_82575(hw)) { ++ switch (hw->mac.type) { ++ case e1000_82575: ++ case e1000_82576: ++ mdic = rd32(E1000_MDIC); ++ mdic &= E1000_MDIC_PHY_MASK; ++ phy->addr = mdic >> E1000_MDIC_PHY_SHIFT; ++ break; ++ case e1000_82580: ++ case e1000_i350: ++ case e1000_i354: ++ case e1000_i210: ++ case e1000_i211: ++ mdic = rd32(E1000_MDICNFG); ++ mdic &= E1000_MDICNFG_PHY_MASK; ++ phy->addr = mdic >> E1000_MDICNFG_PHY_SHIFT; ++ break; ++ default: ++ ret_val = -E1000_ERR_PHY; ++ goto out; ++ } ++ ret_val = igb_get_phy_id(hw); ++ goto out; ++ } ++ ++ /* Power on sgmii phy if it is disabled */ ++ ctrl_ext = rd32(E1000_CTRL_EXT); ++ wr32(E1000_CTRL_EXT, ctrl_ext & ~E1000_CTRL_EXT_SDP3_DATA); ++ wrfl(); ++ msleep(300); ++ ++ /* The address field in the I2CCMD register is 3 bits and 0 is invalid. ++ * Therefore, we need to test 1-7 ++ */ ++ for (phy->addr = 1; phy->addr < 8; phy->addr++) { ++ ret_val = igb_read_phy_reg_sgmii_82575(hw, PHY_ID1, &phy_id); ++ if (ret_val == 0) { ++ hw_dbg("Vendor ID 0x%08X read at address %u\n", ++ phy_id, phy->addr); ++ /* At the time of this writing, The M88 part is ++ * the only supported SGMII PHY product. ++ */ ++ if (phy_id == M88_VENDOR) ++ break; ++ } else { ++ hw_dbg("PHY address %u was unreadable\n", phy->addr); ++ } ++ } ++ ++ /* A valid PHY type couldn't be found. */ ++ if (phy->addr == 8) { ++ phy->addr = 0; ++ ret_val = -E1000_ERR_PHY; ++ goto out; ++ } else { ++ ret_val = igb_get_phy_id(hw); ++ } ++ ++ /* restore previous sfp cage power state */ ++ wr32(E1000_CTRL_EXT, ctrl_ext); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_phy_hw_reset_sgmii_82575 - Performs a PHY reset ++ * @hw: pointer to the HW structure ++ * ++ * Resets the PHY using the serial gigabit media independent interface. ++ **/ ++static s32 igb_phy_hw_reset_sgmii_82575(struct e1000_hw *hw) ++{ ++ s32 ret_val; ++ ++ /* This isn't a true "hard" reset, but is the only reset ++ * available to us at this time. ++ */ ++ ++ hw_dbg("Soft resetting SGMII attached PHY...\n"); ++ ++ /* SFP documentation requires the following to configure the SPF module ++ * to work on SGMII. No further documentation is given. ++ */ ++ ret_val = hw->phy.ops.write_reg(hw, 0x1B, 0x8084); ++ if (ret_val) ++ goto out; ++ ++ ret_val = igb_phy_sw_reset(hw); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_set_d0_lplu_state_82575 - Set Low Power Linkup D0 state ++ * @hw: pointer to the HW structure ++ * @active: true to enable LPLU, false to disable ++ * ++ * Sets the LPLU D0 state according to the active flag. When ++ * activating LPLU this function also disables smart speed ++ * and vice versa. LPLU will not be activated unless the ++ * device autonegotiation advertisement meets standards of ++ * either 10 or 10/100 or 10/100/1000 at all duplexes. ++ * This is a function pointer entry point only called by ++ * PHY setup routines. ++ **/ ++static s32 igb_set_d0_lplu_state_82575(struct e1000_hw *hw, bool active) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u16 data; ++ ++ ret_val = phy->ops.read_reg(hw, IGP02E1000_PHY_POWER_MGMT, &data); ++ if (ret_val) ++ goto out; ++ ++ if (active) { ++ data |= IGP02E1000_PM_D0_LPLU; ++ ret_val = phy->ops.write_reg(hw, IGP02E1000_PHY_POWER_MGMT, ++ data); ++ if (ret_val) ++ goto out; ++ ++ /* When LPLU is enabled, we should disable SmartSpeed */ ++ ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CONFIG, ++ &data); ++ data &= ~IGP01E1000_PSCFR_SMART_SPEED; ++ ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_PORT_CONFIG, ++ data); ++ if (ret_val) ++ goto out; ++ } else { ++ data &= ~IGP02E1000_PM_D0_LPLU; ++ ret_val = phy->ops.write_reg(hw, IGP02E1000_PHY_POWER_MGMT, ++ data); ++ /* LPLU and SmartSpeed are mutually exclusive. LPLU is used ++ * during Dx states where the power conservation is most ++ * important. During driver activity we should enable ++ * SmartSpeed, so performance is maintained. ++ */ ++ if (phy->smart_speed == e1000_smart_speed_on) { ++ ret_val = phy->ops.read_reg(hw, ++ IGP01E1000_PHY_PORT_CONFIG, &data); ++ if (ret_val) ++ goto out; ++ ++ data |= IGP01E1000_PSCFR_SMART_SPEED; ++ ret_val = phy->ops.write_reg(hw, ++ IGP01E1000_PHY_PORT_CONFIG, data); ++ if (ret_val) ++ goto out; ++ } else if (phy->smart_speed == e1000_smart_speed_off) { ++ ret_val = phy->ops.read_reg(hw, ++ IGP01E1000_PHY_PORT_CONFIG, &data); ++ if (ret_val) ++ goto out; ++ ++ data &= ~IGP01E1000_PSCFR_SMART_SPEED; ++ ret_val = phy->ops.write_reg(hw, ++ IGP01E1000_PHY_PORT_CONFIG, data); ++ if (ret_val) ++ goto out; ++ } ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_set_d0_lplu_state_82580 - Set Low Power Linkup D0 state ++ * @hw: pointer to the HW structure ++ * @active: true to enable LPLU, false to disable ++ * ++ * Sets the LPLU D0 state according to the active flag. When ++ * activating LPLU this function also disables smart speed ++ * and vice versa. LPLU will not be activated unless the ++ * device autonegotiation advertisement meets standards of ++ * either 10 or 10/100 or 10/100/1000 at all duplexes. ++ * This is a function pointer entry point only called by ++ * PHY setup routines. ++ **/ ++static s32 igb_set_d0_lplu_state_82580(struct e1000_hw *hw, bool active) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ u16 data; ++ ++ data = rd32(E1000_82580_PHY_POWER_MGMT); ++ ++ if (active) { ++ data |= E1000_82580_PM_D0_LPLU; ++ ++ /* When LPLU is enabled, we should disable SmartSpeed */ ++ data &= ~E1000_82580_PM_SPD; ++ } else { ++ data &= ~E1000_82580_PM_D0_LPLU; ++ ++ /* LPLU and SmartSpeed are mutually exclusive. LPLU is used ++ * during Dx states where the power conservation is most ++ * important. During driver activity we should enable ++ * SmartSpeed, so performance is maintained. ++ */ ++ if (phy->smart_speed == e1000_smart_speed_on) ++ data |= E1000_82580_PM_SPD; ++ else if (phy->smart_speed == e1000_smart_speed_off) ++ data &= ~E1000_82580_PM_SPD; } ++ ++ wr32(E1000_82580_PHY_POWER_MGMT, data); ++ return 0; ++} ++ ++/** ++ * igb_set_d3_lplu_state_82580 - Sets low power link up state for D3 ++ * @hw: pointer to the HW structure ++ * @active: boolean used to enable/disable lplu ++ * ++ * Success returns 0, Failure returns 1 ++ * ++ * The low power link up (lplu) state is set to the power management level D3 ++ * and SmartSpeed is disabled when active is true, else clear lplu for D3 ++ * and enable Smartspeed. LPLU and Smartspeed are mutually exclusive. LPLU ++ * is used during Dx states where the power conservation is most important. ++ * During driver activity, SmartSpeed should be enabled so performance is ++ * maintained. ++ **/ ++static s32 igb_set_d3_lplu_state_82580(struct e1000_hw *hw, bool active) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ u16 data; ++ ++ data = rd32(E1000_82580_PHY_POWER_MGMT); ++ ++ if (!active) { ++ data &= ~E1000_82580_PM_D3_LPLU; ++ /* LPLU and SmartSpeed are mutually exclusive. LPLU is used ++ * during Dx states where the power conservation is most ++ * important. During driver activity we should enable ++ * SmartSpeed, so performance is maintained. ++ */ ++ if (phy->smart_speed == e1000_smart_speed_on) ++ data |= E1000_82580_PM_SPD; ++ else if (phy->smart_speed == e1000_smart_speed_off) ++ data &= ~E1000_82580_PM_SPD; ++ } else if ((phy->autoneg_advertised == E1000_ALL_SPEED_DUPLEX) || ++ (phy->autoneg_advertised == E1000_ALL_NOT_GIG) || ++ (phy->autoneg_advertised == E1000_ALL_10_SPEED)) { ++ data |= E1000_82580_PM_D3_LPLU; ++ /* When LPLU is enabled, we should disable SmartSpeed */ ++ data &= ~E1000_82580_PM_SPD; ++ } ++ ++ wr32(E1000_82580_PHY_POWER_MGMT, data); ++ return 0; ++} ++ ++/** ++ * igb_acquire_nvm_82575 - Request for access to EEPROM ++ * @hw: pointer to the HW structure ++ * ++ * Acquire the necessary semaphores for exclusive access to the EEPROM. ++ * Set the EEPROM access request bit and wait for EEPROM access grant bit. ++ * Return successful if access grant bit set, else clear the request for ++ * EEPROM access and return -E1000_ERR_NVM (-1). ++ **/ ++static s32 igb_acquire_nvm_82575(struct e1000_hw *hw) ++{ ++ s32 ret_val; ++ ++ ret_val = hw->mac.ops.acquire_swfw_sync(hw, E1000_SWFW_EEP_SM); ++ if (ret_val) ++ goto out; ++ ++ ret_val = igb_acquire_nvm(hw); ++ ++ if (ret_val) ++ hw->mac.ops.release_swfw_sync(hw, E1000_SWFW_EEP_SM); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_release_nvm_82575 - Release exclusive access to EEPROM ++ * @hw: pointer to the HW structure ++ * ++ * Stop any current commands to the EEPROM and clear the EEPROM request bit, ++ * then release the semaphores acquired. ++ **/ ++static void igb_release_nvm_82575(struct e1000_hw *hw) ++{ ++ igb_release_nvm(hw); ++ hw->mac.ops.release_swfw_sync(hw, E1000_SWFW_EEP_SM); ++} ++ ++/** ++ * igb_acquire_swfw_sync_82575 - Acquire SW/FW semaphore ++ * @hw: pointer to the HW structure ++ * @mask: specifies which semaphore to acquire ++ * ++ * Acquire the SW/FW semaphore to access the PHY or NVM. The mask ++ * will also specify which port we're acquiring the lock for. ++ **/ ++static s32 igb_acquire_swfw_sync_82575(struct e1000_hw *hw, u16 mask) ++{ ++ u32 swfw_sync; ++ u32 swmask = mask; ++ u32 fwmask = mask << 16; ++ s32 ret_val = 0; ++ s32 i = 0, timeout = 200; ++ ++ while (i < timeout) { ++ if (igb_get_hw_semaphore(hw)) { ++ ret_val = -E1000_ERR_SWFW_SYNC; ++ goto out; ++ } ++ ++ swfw_sync = rd32(E1000_SW_FW_SYNC); ++ if (!(swfw_sync & (fwmask | swmask))) ++ break; ++ ++ /* Firmware currently using resource (fwmask) ++ * or other software thread using resource (swmask) ++ */ ++ igb_put_hw_semaphore(hw); ++ mdelay(5); ++ i++; ++ } ++ ++ if (i == timeout) { ++ hw_dbg("Driver can't access resource, SW_FW_SYNC timeout.\n"); ++ ret_val = -E1000_ERR_SWFW_SYNC; ++ goto out; ++ } ++ ++ swfw_sync |= swmask; ++ wr32(E1000_SW_FW_SYNC, swfw_sync); ++ ++ igb_put_hw_semaphore(hw); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_release_swfw_sync_82575 - Release SW/FW semaphore ++ * @hw: pointer to the HW structure ++ * @mask: specifies which semaphore to acquire ++ * ++ * Release the SW/FW semaphore used to access the PHY or NVM. The mask ++ * will also specify which port we're releasing the lock for. ++ **/ ++static void igb_release_swfw_sync_82575(struct e1000_hw *hw, u16 mask) ++{ ++ u32 swfw_sync; ++ ++ while (igb_get_hw_semaphore(hw) != 0) ++ ; /* Empty */ ++ ++ swfw_sync = rd32(E1000_SW_FW_SYNC); ++ swfw_sync &= ~mask; ++ wr32(E1000_SW_FW_SYNC, swfw_sync); ++ ++ igb_put_hw_semaphore(hw); ++} ++ ++/** ++ * igb_get_cfg_done_82575 - Read config done bit ++ * @hw: pointer to the HW structure ++ * ++ * Read the management control register for the config done bit for ++ * completion status. NOTE: silicon which is EEPROM-less will fail trying ++ * to read the config done bit, so an error is *ONLY* logged and returns ++ * 0. If we were to return with error, EEPROM-less silicon ++ * would not be able to be reset or change link. ++ **/ ++static s32 igb_get_cfg_done_82575(struct e1000_hw *hw) ++{ ++ s32 timeout = PHY_CFG_TIMEOUT; ++ u32 mask = E1000_NVM_CFG_DONE_PORT_0; ++ ++ if (hw->bus.func == 1) ++ mask = E1000_NVM_CFG_DONE_PORT_1; ++ else if (hw->bus.func == E1000_FUNC_2) ++ mask = E1000_NVM_CFG_DONE_PORT_2; ++ else if (hw->bus.func == E1000_FUNC_3) ++ mask = E1000_NVM_CFG_DONE_PORT_3; ++ ++ while (timeout) { ++ if (rd32(E1000_EEMNGCTL) & mask) ++ break; ++ usleep_range(1000, 2000); ++ timeout--; ++ } ++ if (!timeout) ++ hw_dbg("MNG configuration cycle has not completed.\n"); ++ ++ /* If EEPROM is not marked present, init the PHY manually */ ++ if (((rd32(E1000_EECD) & E1000_EECD_PRES) == 0) && ++ (hw->phy.type == e1000_phy_igp_3)) ++ igb_phy_init_script_igp3(hw); ++ ++ return 0; ++} ++ ++/** ++ * igb_get_link_up_info_82575 - Get link speed/duplex info ++ * @hw: pointer to the HW structure ++ * @speed: stores the current speed ++ * @duplex: stores the current duplex ++ * ++ * This is a wrapper function, if using the serial gigabit media independent ++ * interface, use PCS to retrieve the link speed and duplex information. ++ * Otherwise, use the generic function to get the link speed and duplex info. ++ **/ ++static s32 igb_get_link_up_info_82575(struct e1000_hw *hw, u16 *speed, ++ u16 *duplex) ++{ ++ s32 ret_val; ++ ++ if (hw->phy.media_type != e1000_media_type_copper) ++ ret_val = igb_get_pcs_speed_and_duplex_82575(hw, speed, ++ duplex); ++ else ++ ret_val = igb_get_speed_and_duplex_copper(hw, speed, ++ duplex); ++ ++ return ret_val; ++} ++ ++/** ++ * igb_check_for_link_82575 - Check for link ++ * @hw: pointer to the HW structure ++ * ++ * If sgmii is enabled, then use the pcs register to determine link, otherwise ++ * use the generic interface for determining link. ++ **/ ++static s32 igb_check_for_link_82575(struct e1000_hw *hw) ++{ ++ s32 ret_val; ++ u16 speed, duplex; ++ ++ if (hw->phy.media_type != e1000_media_type_copper) { ++ ret_val = igb_get_pcs_speed_and_duplex_82575(hw, &speed, ++ &duplex); ++ /* Use this flag to determine if link needs to be checked or ++ * not. If we have link clear the flag so that we do not ++ * continue to check for link. ++ */ ++ hw->mac.get_link_status = !hw->mac.serdes_has_link; ++ ++ /* Configure Flow Control now that Auto-Neg has completed. ++ * First, we need to restore the desired flow control ++ * settings because we may have had to re-autoneg with a ++ * different link partner. ++ */ ++ ret_val = igb_config_fc_after_link_up(hw); ++ if (ret_val) ++ hw_dbg("Error configuring flow control\n"); ++ } else { ++ ret_val = igb_check_for_copper_link(hw); ++ } ++ ++ return ret_val; ++} ++ ++/** ++ * igb_power_up_serdes_link_82575 - Power up the serdes link after shutdown ++ * @hw: pointer to the HW structure ++ **/ ++void igb_power_up_serdes_link_82575(struct e1000_hw *hw) ++{ ++ u32 reg; ++ ++ ++ if ((hw->phy.media_type != e1000_media_type_internal_serdes) && ++ !igb_sgmii_active_82575(hw)) ++ return; ++ ++ /* Enable PCS to turn on link */ ++ reg = rd32(E1000_PCS_CFG0); ++ reg |= E1000_PCS_CFG_PCS_EN; ++ wr32(E1000_PCS_CFG0, reg); ++ ++ /* Power up the laser */ ++ reg = rd32(E1000_CTRL_EXT); ++ reg &= ~E1000_CTRL_EXT_SDP3_DATA; ++ wr32(E1000_CTRL_EXT, reg); ++ ++ /* flush the write to verify completion */ ++ wrfl(); ++ usleep_range(1000, 2000); ++} ++ ++/** ++ * igb_get_pcs_speed_and_duplex_82575 - Retrieve current speed/duplex ++ * @hw: pointer to the HW structure ++ * @speed: stores the current speed ++ * @duplex: stores the current duplex ++ * ++ * Using the physical coding sub-layer (PCS), retrieve the current speed and ++ * duplex, then store the values in the pointers provided. ++ **/ ++static s32 igb_get_pcs_speed_and_duplex_82575(struct e1000_hw *hw, u16 *speed, ++ u16 *duplex) ++{ ++ struct e1000_mac_info *mac = &hw->mac; ++ u32 pcs, status; ++ ++ /* Set up defaults for the return values of this function */ ++ mac->serdes_has_link = false; ++ *speed = 0; ++ *duplex = 0; ++ ++ /* Read the PCS Status register for link state. For non-copper mode, ++ * the status register is not accurate. The PCS status register is ++ * used instead. ++ */ ++ pcs = rd32(E1000_PCS_LSTAT); ++ ++ /* The link up bit determines when link is up on autoneg. The sync ok ++ * gets set once both sides sync up and agree upon link. Stable link ++ * can be determined by checking for both link up and link sync ok ++ */ ++ if ((pcs & E1000_PCS_LSTS_LINK_OK) && (pcs & E1000_PCS_LSTS_SYNK_OK)) { ++ mac->serdes_has_link = true; ++ ++ /* Detect and store PCS speed */ ++ if (pcs & E1000_PCS_LSTS_SPEED_1000) ++ *speed = SPEED_1000; ++ else if (pcs & E1000_PCS_LSTS_SPEED_100) ++ *speed = SPEED_100; ++ else ++ *speed = SPEED_10; ++ ++ /* Detect and store PCS duplex */ ++ if (pcs & E1000_PCS_LSTS_DUPLEX_FULL) ++ *duplex = FULL_DUPLEX; ++ else ++ *duplex = HALF_DUPLEX; ++ ++ /* Check if it is an I354 2.5Gb backplane connection. */ ++ if (mac->type == e1000_i354) { ++ status = rd32(E1000_STATUS); ++ if ((status & E1000_STATUS_2P5_SKU) && ++ !(status & E1000_STATUS_2P5_SKU_OVER)) { ++ *speed = SPEED_2500; ++ *duplex = FULL_DUPLEX; ++ hw_dbg("2500 Mbs, "); ++ hw_dbg("Full Duplex\n"); ++ } ++ } ++ ++ } ++ ++ return 0; ++} ++ ++/** ++ * igb_shutdown_serdes_link_82575 - Remove link during power down ++ * @hw: pointer to the HW structure ++ * ++ * In the case of fiber serdes, shut down optics and PCS on driver unload ++ * when management pass thru is not enabled. ++ **/ ++void igb_shutdown_serdes_link_82575(struct e1000_hw *hw) ++{ ++ u32 reg; ++ ++ if (hw->phy.media_type != e1000_media_type_internal_serdes && ++ igb_sgmii_active_82575(hw)) ++ return; ++ ++ if (!igb_enable_mng_pass_thru(hw)) { ++ /* Disable PCS to turn off link */ ++ reg = rd32(E1000_PCS_CFG0); ++ reg &= ~E1000_PCS_CFG_PCS_EN; ++ wr32(E1000_PCS_CFG0, reg); ++ ++ /* shutdown the laser */ ++ reg = rd32(E1000_CTRL_EXT); ++ reg |= E1000_CTRL_EXT_SDP3_DATA; ++ wr32(E1000_CTRL_EXT, reg); ++ ++ /* flush the write to verify completion */ ++ wrfl(); ++ usleep_range(1000, 2000); ++ } ++} ++ ++/** ++ * igb_reset_hw_82575 - Reset hardware ++ * @hw: pointer to the HW structure ++ * ++ * This resets the hardware into a known state. This is a ++ * function pointer entry point called by the api module. ++ **/ ++static s32 igb_reset_hw_82575(struct e1000_hw *hw) ++{ ++ u32 ctrl; ++ s32 ret_val; ++ ++ /* Prevent the PCI-E bus from sticking if there is no TLP connection ++ * on the last TLP read/write transaction when MAC is reset. ++ */ ++ ret_val = igb_disable_pcie_master(hw); ++ if (ret_val) ++ hw_dbg("PCI-E Master disable polling has failed.\n"); ++ ++ /* set the completion timeout for interface */ ++ ret_val = igb_set_pcie_completion_timeout(hw); ++ if (ret_val) ++ hw_dbg("PCI-E Set completion timeout has failed.\n"); ++ ++ hw_dbg("Masking off all interrupts\n"); ++ wr32(E1000_IMC, 0xffffffff); ++ ++ wr32(E1000_RCTL, 0); ++ wr32(E1000_TCTL, E1000_TCTL_PSP); ++ wrfl(); ++ ++ usleep_range(10000, 20000); ++ ++ ctrl = rd32(E1000_CTRL); ++ ++ hw_dbg("Issuing a global reset to MAC\n"); ++ wr32(E1000_CTRL, ctrl | E1000_CTRL_RST); ++ ++ ret_val = igb_get_auto_rd_done(hw); ++ if (ret_val) { ++ /* When auto config read does not complete, do not ++ * return with an error. This can happen in situations ++ * where there is no eeprom and prevents getting link. ++ */ ++ hw_dbg("Auto Read Done did not complete\n"); ++ } ++ ++ /* If EEPROM is not present, run manual init scripts */ ++ if ((rd32(E1000_EECD) & E1000_EECD_PRES) == 0) ++ igb_reset_init_script_82575(hw); ++ ++ /* Clear any pending interrupt events. */ ++ wr32(E1000_IMC, 0xffffffff); ++ rd32(E1000_ICR); ++ ++ /* Install any alternate MAC address into RAR0 */ ++ ret_val = igb_check_alt_mac_addr(hw); ++ ++ return ret_val; ++} ++ ++/** ++ * igb_init_hw_82575 - Initialize hardware ++ * @hw: pointer to the HW structure ++ * ++ * This inits the hardware readying it for operation. ++ **/ ++static s32 igb_init_hw_82575(struct e1000_hw *hw) ++{ ++ struct e1000_mac_info *mac = &hw->mac; ++ s32 ret_val; ++ u16 i, rar_count = mac->rar_entry_count; ++ ++ if ((hw->mac.type >= e1000_i210) && ++ !(igb_get_flash_presence_i210(hw))) { ++ ret_val = igb_pll_workaround_i210(hw); ++ if (ret_val) ++ return ret_val; ++ } ++ ++ /* Initialize identification LED */ ++ ret_val = igb_id_led_init(hw); ++ if (ret_val) { ++ hw_dbg("Error initializing identification LED\n"); ++ /* This is not fatal and we should not stop init due to this */ ++ } ++ ++ /* Disabling VLAN filtering */ ++ hw_dbg("Initializing the IEEE VLAN\n"); ++ if ((hw->mac.type == e1000_i350) || (hw->mac.type == e1000_i354)) ++ igb_clear_vfta_i350(hw); ++ else ++ igb_clear_vfta(hw); ++ ++ /* Setup the receive address */ ++ igb_init_rx_addrs(hw, rar_count); ++ ++ /* Zero out the Multicast HASH table */ ++ hw_dbg("Zeroing the MTA\n"); ++ for (i = 0; i < mac->mta_reg_count; i++) ++ array_wr32(E1000_MTA, i, 0); ++ ++ /* Zero out the Unicast HASH table */ ++ hw_dbg("Zeroing the UTA\n"); ++ for (i = 0; i < mac->uta_reg_count; i++) ++ array_wr32(E1000_UTA, i, 0); ++ ++ /* Setup link and flow control */ ++ ret_val = igb_setup_link(hw); ++ ++ /* Clear all of the statistics registers (clear on read). It is ++ * important that we do this after we have tried to establish link ++ * because the symbol error count will increment wildly if there ++ * is no link. ++ */ ++ igb_clear_hw_cntrs_82575(hw); ++ return ret_val; ++} ++ ++/** ++ * igb_setup_copper_link_82575 - Configure copper link settings ++ * @hw: pointer to the HW structure ++ * ++ * Configures the link for auto-neg or forced speed and duplex. Then we check ++ * for link, once link is established calls to configure collision distance ++ * and flow control are called. ++ **/ ++static s32 igb_setup_copper_link_82575(struct e1000_hw *hw) ++{ ++ u32 ctrl; ++ s32 ret_val; ++ u32 phpm_reg; ++ ++ ctrl = rd32(E1000_CTRL); ++ ctrl |= E1000_CTRL_SLU; ++ ctrl &= ~(E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX); ++ wr32(E1000_CTRL, ctrl); ++ ++ /* Clear Go Link Disconnect bit on supported devices */ ++ switch (hw->mac.type) { ++ case e1000_82580: ++ case e1000_i350: ++ case e1000_i210: ++ case e1000_i211: ++ phpm_reg = rd32(E1000_82580_PHY_POWER_MGMT); ++ phpm_reg &= ~E1000_82580_PM_GO_LINKD; ++ wr32(E1000_82580_PHY_POWER_MGMT, phpm_reg); ++ break; ++ default: ++ break; ++ } ++ ++ ret_val = igb_setup_serdes_link_82575(hw); ++ if (ret_val) ++ goto out; ++ ++ if (igb_sgmii_active_82575(hw) && !hw->phy.reset_disable) { ++ /* allow time for SFP cage time to power up phy */ ++ msleep(300); ++ ++ ret_val = hw->phy.ops.reset(hw); ++ if (ret_val) { ++ hw_dbg("Error resetting the PHY.\n"); ++ goto out; ++ } ++ } ++ switch (hw->phy.type) { ++ case e1000_phy_i210: ++ case e1000_phy_m88: ++ switch (hw->phy.id) { ++ case I347AT4_E_PHY_ID: ++ case M88E1112_E_PHY_ID: ++ case M88E1543_E_PHY_ID: ++ case I210_I_PHY_ID: ++ ret_val = igb_copper_link_setup_m88_gen2(hw); ++ break; ++ default: ++ ret_val = igb_copper_link_setup_m88(hw); ++ break; ++ } ++ break; ++ case e1000_phy_igp_3: ++ ret_val = igb_copper_link_setup_igp(hw); ++ break; ++ case e1000_phy_82580: ++ ret_val = igb_copper_link_setup_82580(hw); ++ break; ++ default: ++ ret_val = -E1000_ERR_PHY; ++ break; ++ } ++ ++ if (ret_val) ++ goto out; ++ ++ ret_val = igb_setup_copper_link(hw); ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_setup_serdes_link_82575 - Setup link for serdes ++ * @hw: pointer to the HW structure ++ * ++ * Configure the physical coding sub-layer (PCS) link. The PCS link is ++ * used on copper connections where the serialized gigabit media independent ++ * interface (sgmii), or serdes fiber is being used. Configures the link ++ * for auto-negotiation or forces speed/duplex. ++ **/ ++static s32 igb_setup_serdes_link_82575(struct e1000_hw *hw) ++{ ++ u32 ctrl_ext, ctrl_reg, reg, anadv_reg; ++ bool pcs_autoneg; ++ s32 ret_val = 0; ++ u16 data; ++ ++ if ((hw->phy.media_type != e1000_media_type_internal_serdes) && ++ !igb_sgmii_active_82575(hw)) ++ return ret_val; ++ ++ ++ /* On the 82575, SerDes loopback mode persists until it is ++ * explicitly turned off or a power cycle is performed. A read to ++ * the register does not indicate its status. Therefore, we ensure ++ * loopback mode is disabled during initialization. ++ */ ++ wr32(E1000_SCTL, E1000_SCTL_DISABLE_SERDES_LOOPBACK); ++ ++ /* power on the sfp cage if present and turn on I2C */ ++ ctrl_ext = rd32(E1000_CTRL_EXT); ++ ctrl_ext &= ~E1000_CTRL_EXT_SDP3_DATA; ++ ctrl_ext |= E1000_CTRL_I2C_ENA; ++ wr32(E1000_CTRL_EXT, ctrl_ext); ++ ++ ctrl_reg = rd32(E1000_CTRL); ++ ctrl_reg |= E1000_CTRL_SLU; ++ ++ if (hw->mac.type == e1000_82575 || hw->mac.type == e1000_82576) { ++ /* set both sw defined pins */ ++ ctrl_reg |= E1000_CTRL_SWDPIN0 | E1000_CTRL_SWDPIN1; ++ ++ /* Set switch control to serdes energy detect */ ++ reg = rd32(E1000_CONNSW); ++ reg |= E1000_CONNSW_ENRGSRC; ++ wr32(E1000_CONNSW, reg); ++ } ++ ++ reg = rd32(E1000_PCS_LCTL); ++ ++ /* default pcs_autoneg to the same setting as mac autoneg */ ++ pcs_autoneg = hw->mac.autoneg; ++ ++ switch (ctrl_ext & E1000_CTRL_EXT_LINK_MODE_MASK) { ++ case E1000_CTRL_EXT_LINK_MODE_SGMII: ++ /* sgmii mode lets the phy handle forcing speed/duplex */ ++ pcs_autoneg = true; ++ /* autoneg time out should be disabled for SGMII mode */ ++ reg &= ~(E1000_PCS_LCTL_AN_TIMEOUT); ++ break; ++ case E1000_CTRL_EXT_LINK_MODE_1000BASE_KX: ++ /* disable PCS autoneg and support parallel detect only */ ++ pcs_autoneg = false; ++ default: ++ if (hw->mac.type == e1000_82575 || ++ hw->mac.type == e1000_82576) { ++ ret_val = hw->nvm.ops.read(hw, NVM_COMPAT, 1, &data); ++ if (ret_val) { ++ hw_dbg(KERN_DEBUG "NVM Read Error\n\n"); ++ return ret_val; ++ } ++ ++ if (data & E1000_EEPROM_PCS_AUTONEG_DISABLE_BIT) ++ pcs_autoneg = false; ++ } ++ ++ /* non-SGMII modes only supports a speed of 1000/Full for the ++ * link so it is best to just force the MAC and let the pcs ++ * link either autoneg or be forced to 1000/Full ++ */ ++ ctrl_reg |= E1000_CTRL_SPD_1000 | E1000_CTRL_FRCSPD | ++ E1000_CTRL_FD | E1000_CTRL_FRCDPX; ++ ++ /* set speed of 1000/Full if speed/duplex is forced */ ++ reg |= E1000_PCS_LCTL_FSV_1000 | E1000_PCS_LCTL_FDV_FULL; ++ break; ++ } ++ ++ wr32(E1000_CTRL, ctrl_reg); ++ ++ /* New SerDes mode allows for forcing speed or autonegotiating speed ++ * at 1gb. Autoneg should be default set by most drivers. This is the ++ * mode that will be compatible with older link partners and switches. ++ * However, both are supported by the hardware and some drivers/tools. ++ */ ++ reg &= ~(E1000_PCS_LCTL_AN_ENABLE | E1000_PCS_LCTL_FLV_LINK_UP | ++ E1000_PCS_LCTL_FSD | E1000_PCS_LCTL_FORCE_LINK); ++ ++ if (pcs_autoneg) { ++ /* Set PCS register for autoneg */ ++ reg |= E1000_PCS_LCTL_AN_ENABLE | /* Enable Autoneg */ ++ E1000_PCS_LCTL_AN_RESTART; /* Restart autoneg */ ++ ++ /* Disable force flow control for autoneg */ ++ reg &= ~E1000_PCS_LCTL_FORCE_FCTRL; ++ ++ /* Configure flow control advertisement for autoneg */ ++ anadv_reg = rd32(E1000_PCS_ANADV); ++ anadv_reg &= ~(E1000_TXCW_ASM_DIR | E1000_TXCW_PAUSE); ++ switch (hw->fc.requested_mode) { ++ case e1000_fc_full: ++ case e1000_fc_rx_pause: ++ anadv_reg |= E1000_TXCW_ASM_DIR; ++ anadv_reg |= E1000_TXCW_PAUSE; ++ break; ++ case e1000_fc_tx_pause: ++ anadv_reg |= E1000_TXCW_ASM_DIR; ++ break; ++ default: ++ break; ++ } ++ wr32(E1000_PCS_ANADV, anadv_reg); ++ ++ hw_dbg("Configuring Autoneg:PCS_LCTL=0x%08X\n", reg); ++ } else { ++ /* Set PCS register for forced link */ ++ reg |= E1000_PCS_LCTL_FSD; /* Force Speed */ ++ ++ /* Force flow control for forced link */ ++ reg |= E1000_PCS_LCTL_FORCE_FCTRL; ++ ++ hw_dbg("Configuring Forced Link:PCS_LCTL=0x%08X\n", reg); ++ } ++ ++ wr32(E1000_PCS_LCTL, reg); ++ ++ if (!pcs_autoneg && !igb_sgmii_active_82575(hw)) ++ igb_force_mac_fc(hw); ++ ++ return ret_val; ++} ++ ++/** ++ * igb_sgmii_active_82575 - Return sgmii state ++ * @hw: pointer to the HW structure ++ * ++ * 82575 silicon has a serialized gigabit media independent interface (sgmii) ++ * which can be enabled for use in the embedded applications. Simply ++ * return the current state of the sgmii interface. ++ **/ ++static bool igb_sgmii_active_82575(struct e1000_hw *hw) ++{ ++ struct e1000_dev_spec_82575 *dev_spec = &hw->dev_spec._82575; ++ return dev_spec->sgmii_active; ++} ++ ++/** ++ * igb_reset_init_script_82575 - Inits HW defaults after reset ++ * @hw: pointer to the HW structure ++ * ++ * Inits recommended HW defaults after a reset when there is no EEPROM ++ * detected. This is only for the 82575. ++ **/ ++static s32 igb_reset_init_script_82575(struct e1000_hw *hw) ++{ ++ if (hw->mac.type == e1000_82575) { ++ hw_dbg("Running reset init script for 82575\n"); ++ /* SerDes configuration via SERDESCTRL */ ++ igb_write_8bit_ctrl_reg(hw, E1000_SCTL, 0x00, 0x0C); ++ igb_write_8bit_ctrl_reg(hw, E1000_SCTL, 0x01, 0x78); ++ igb_write_8bit_ctrl_reg(hw, E1000_SCTL, 0x1B, 0x23); ++ igb_write_8bit_ctrl_reg(hw, E1000_SCTL, 0x23, 0x15); ++ ++ /* CCM configuration via CCMCTL register */ ++ igb_write_8bit_ctrl_reg(hw, E1000_CCMCTL, 0x14, 0x00); ++ igb_write_8bit_ctrl_reg(hw, E1000_CCMCTL, 0x10, 0x00); ++ ++ /* PCIe lanes configuration */ ++ igb_write_8bit_ctrl_reg(hw, E1000_GIOCTL, 0x00, 0xEC); ++ igb_write_8bit_ctrl_reg(hw, E1000_GIOCTL, 0x61, 0xDF); ++ igb_write_8bit_ctrl_reg(hw, E1000_GIOCTL, 0x34, 0x05); ++ igb_write_8bit_ctrl_reg(hw, E1000_GIOCTL, 0x2F, 0x81); ++ ++ /* PCIe PLL Configuration */ ++ igb_write_8bit_ctrl_reg(hw, E1000_SCCTL, 0x02, 0x47); ++ igb_write_8bit_ctrl_reg(hw, E1000_SCCTL, 0x14, 0x00); ++ igb_write_8bit_ctrl_reg(hw, E1000_SCCTL, 0x10, 0x00); ++ } ++ ++ return 0; ++} ++ ++/** ++ * igb_read_mac_addr_82575 - Read device MAC address ++ * @hw: pointer to the HW structure ++ **/ ++static s32 igb_read_mac_addr_82575(struct e1000_hw *hw) ++{ ++ s32 ret_val = 0; ++ ++ /* If there's an alternate MAC address place it in RAR0 ++ * so that it will override the Si installed default perm ++ * address. ++ */ ++ ret_val = igb_check_alt_mac_addr(hw); ++ if (ret_val) ++ goto out; ++ ++ ret_val = igb_read_mac_addr(hw); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_power_down_phy_copper_82575 - Remove link during PHY power down ++ * @hw: pointer to the HW structure ++ * ++ * In the case of a PHY power down to save power, or to turn off link during a ++ * driver unload, or wake on lan is not enabled, remove the link. ++ **/ ++void igb_power_down_phy_copper_82575(struct e1000_hw *hw) ++{ ++ /* If the management interface is not enabled, then power down */ ++ if (!(igb_enable_mng_pass_thru(hw) || igb_check_reset_block(hw))) ++ igb_power_down_phy_copper(hw); ++} ++ ++/** ++ * igb_clear_hw_cntrs_82575 - Clear device specific hardware counters ++ * @hw: pointer to the HW structure ++ * ++ * Clears the hardware counters by reading the counter registers. ++ **/ ++static void igb_clear_hw_cntrs_82575(struct e1000_hw *hw) ++{ ++ igb_clear_hw_cntrs_base(hw); ++ ++ rd32(E1000_PRC64); ++ rd32(E1000_PRC127); ++ rd32(E1000_PRC255); ++ rd32(E1000_PRC511); ++ rd32(E1000_PRC1023); ++ rd32(E1000_PRC1522); ++ rd32(E1000_PTC64); ++ rd32(E1000_PTC127); ++ rd32(E1000_PTC255); ++ rd32(E1000_PTC511); ++ rd32(E1000_PTC1023); ++ rd32(E1000_PTC1522); ++ ++ rd32(E1000_ALGNERRC); ++ rd32(E1000_RXERRC); ++ rd32(E1000_TNCRS); ++ rd32(E1000_CEXTERR); ++ rd32(E1000_TSCTC); ++ rd32(E1000_TSCTFC); ++ ++ rd32(E1000_MGTPRC); ++ rd32(E1000_MGTPDC); ++ rd32(E1000_MGTPTC); ++ ++ rd32(E1000_IAC); ++ rd32(E1000_ICRXOC); ++ ++ rd32(E1000_ICRXPTC); ++ rd32(E1000_ICRXATC); ++ rd32(E1000_ICTXPTC); ++ rd32(E1000_ICTXATC); ++ rd32(E1000_ICTXQEC); ++ rd32(E1000_ICTXQMTC); ++ rd32(E1000_ICRXDMTC); ++ ++ rd32(E1000_CBTMPC); ++ rd32(E1000_HTDPMC); ++ rd32(E1000_CBRMPC); ++ rd32(E1000_RPTHC); ++ rd32(E1000_HGPTC); ++ rd32(E1000_HTCBDPC); ++ rd32(E1000_HGORCL); ++ rd32(E1000_HGORCH); ++ rd32(E1000_HGOTCL); ++ rd32(E1000_HGOTCH); ++ rd32(E1000_LENERRS); ++ ++ /* This register should not be read in copper configurations */ ++ if (hw->phy.media_type == e1000_media_type_internal_serdes || ++ igb_sgmii_active_82575(hw)) ++ rd32(E1000_SCVPC); ++} ++ ++/** ++ * igb_rx_fifo_flush_82575 - Clean rx fifo after RX enable ++ * @hw: pointer to the HW structure ++ * ++ * After rx enable if manageability is enabled then there is likely some ++ * bad data at the start of the fifo and possibly in the DMA fifo. This ++ * function clears the fifos and flushes any packets that came in as rx was ++ * being enabled. ++ **/ ++void igb_rx_fifo_flush_82575(struct e1000_hw *hw) ++{ ++ u32 rctl, rlpml, rxdctl[4], rfctl, temp_rctl, rx_enabled; ++ int i, ms_wait; ++ ++ /* disable IPv6 options as per hardware errata */ ++ rfctl = rd32(E1000_RFCTL); ++ rfctl |= E1000_RFCTL_IPV6_EX_DIS; ++ wr32(E1000_RFCTL, rfctl); ++ ++ if (hw->mac.type != e1000_82575 || ++ !(rd32(E1000_MANC) & E1000_MANC_RCV_TCO_EN)) ++ return; ++ ++ /* Disable all RX queues */ ++ for (i = 0; i < 4; i++) { ++ rxdctl[i] = rd32(E1000_RXDCTL(i)); ++ wr32(E1000_RXDCTL(i), ++ rxdctl[i] & ~E1000_RXDCTL_QUEUE_ENABLE); ++ } ++ /* Poll all queues to verify they have shut down */ ++ for (ms_wait = 0; ms_wait < 10; ms_wait++) { ++ usleep_range(1000, 2000); ++ rx_enabled = 0; ++ for (i = 0; i < 4; i++) ++ rx_enabled |= rd32(E1000_RXDCTL(i)); ++ if (!(rx_enabled & E1000_RXDCTL_QUEUE_ENABLE)) ++ break; ++ } ++ ++ if (ms_wait == 10) ++ hw_dbg("Queue disable timed out after 10ms\n"); ++ ++ /* Clear RLPML, RCTL.SBP, RFCTL.LEF, and set RCTL.LPE so that all ++ * incoming packets are rejected. Set enable and wait 2ms so that ++ * any packet that was coming in as RCTL.EN was set is flushed ++ */ ++ wr32(E1000_RFCTL, rfctl & ~E1000_RFCTL_LEF); ++ ++ rlpml = rd32(E1000_RLPML); ++ wr32(E1000_RLPML, 0); ++ ++ rctl = rd32(E1000_RCTL); ++ temp_rctl = rctl & ~(E1000_RCTL_EN | E1000_RCTL_SBP); ++ temp_rctl |= E1000_RCTL_LPE; ++ ++ wr32(E1000_RCTL, temp_rctl); ++ wr32(E1000_RCTL, temp_rctl | E1000_RCTL_EN); ++ wrfl(); ++ usleep_range(2000, 3000); ++ ++ /* Enable RX queues that were previously enabled and restore our ++ * previous state ++ */ ++ for (i = 0; i < 4; i++) ++ wr32(E1000_RXDCTL(i), rxdctl[i]); ++ wr32(E1000_RCTL, rctl); ++ wrfl(); ++ ++ wr32(E1000_RLPML, rlpml); ++ wr32(E1000_RFCTL, rfctl); ++ ++ /* Flush receive errors generated by workaround */ ++ rd32(E1000_ROC); ++ rd32(E1000_RNBC); ++ rd32(E1000_MPC); ++} ++ ++/** ++ * igb_set_pcie_completion_timeout - set pci-e completion timeout ++ * @hw: pointer to the HW structure ++ * ++ * The defaults for 82575 and 82576 should be in the range of 50us to 50ms, ++ * however the hardware default for these parts is 500us to 1ms which is less ++ * than the 10ms recommended by the pci-e spec. To address this we need to ++ * increase the value to either 10ms to 200ms for capability version 1 config, ++ * or 16ms to 55ms for version 2. ++ **/ ++static s32 igb_set_pcie_completion_timeout(struct e1000_hw *hw) ++{ ++ u32 gcr = rd32(E1000_GCR); ++ s32 ret_val = 0; ++ u16 pcie_devctl2; ++ ++ /* only take action if timeout value is defaulted to 0 */ ++ if (gcr & E1000_GCR_CMPL_TMOUT_MASK) ++ goto out; ++ ++ /* if capabilities version is type 1 we can write the ++ * timeout of 10ms to 200ms through the GCR register ++ */ ++ if (!(gcr & E1000_GCR_CAP_VER2)) { ++ gcr |= E1000_GCR_CMPL_TMOUT_10ms; ++ goto out; ++ } ++ ++ /* for version 2 capabilities we need to write the config space ++ * directly in order to set the completion timeout value for ++ * 16ms to 55ms ++ */ ++ ret_val = igb_read_pcie_cap_reg(hw, PCIE_DEVICE_CONTROL2, ++ &pcie_devctl2); ++ if (ret_val) ++ goto out; ++ ++ pcie_devctl2 |= PCIE_DEVICE_CONTROL2_16ms; ++ ++ ret_val = igb_write_pcie_cap_reg(hw, PCIE_DEVICE_CONTROL2, ++ &pcie_devctl2); ++out: ++ /* disable completion timeout resend */ ++ gcr &= ~E1000_GCR_CMPL_TMOUT_RESEND; ++ ++ wr32(E1000_GCR, gcr); ++ return ret_val; ++} ++ ++/** ++ * igb_vmdq_set_anti_spoofing_pf - enable or disable anti-spoofing ++ * @hw: pointer to the hardware struct ++ * @enable: state to enter, either enabled or disabled ++ * @pf: Physical Function pool - do not set anti-spoofing for the PF ++ * ++ * enables/disables L2 switch anti-spoofing functionality. ++ **/ ++void igb_vmdq_set_anti_spoofing_pf(struct e1000_hw *hw, bool enable, int pf) ++{ ++ u32 reg_val, reg_offset; ++ ++ switch (hw->mac.type) { ++ case e1000_82576: ++ reg_offset = E1000_DTXSWC; ++ break; ++ case e1000_i350: ++ case e1000_i354: ++ reg_offset = E1000_TXSWC; ++ break; ++ default: ++ return; ++ } ++ ++ reg_val = rd32(reg_offset); ++ if (enable) { ++ reg_val |= (E1000_DTXSWC_MAC_SPOOF_MASK | ++ E1000_DTXSWC_VLAN_SPOOF_MASK); ++ /* The PF can spoof - it has to in order to ++ * support emulation mode NICs ++ */ ++ reg_val ^= (1 << pf | 1 << (pf + MAX_NUM_VFS)); ++ } else { ++ reg_val &= ~(E1000_DTXSWC_MAC_SPOOF_MASK | ++ E1000_DTXSWC_VLAN_SPOOF_MASK); ++ } ++ wr32(reg_offset, reg_val); ++} ++ ++/** ++ * igb_vmdq_set_loopback_pf - enable or disable vmdq loopback ++ * @hw: pointer to the hardware struct ++ * @enable: state to enter, either enabled or disabled ++ * ++ * enables/disables L2 switch loopback functionality. ++ **/ ++void igb_vmdq_set_loopback_pf(struct e1000_hw *hw, bool enable) ++{ ++ u32 dtxswc; ++ ++ switch (hw->mac.type) { ++ case e1000_82576: ++ dtxswc = rd32(E1000_DTXSWC); ++ if (enable) ++ dtxswc |= E1000_DTXSWC_VMDQ_LOOPBACK_EN; ++ else ++ dtxswc &= ~E1000_DTXSWC_VMDQ_LOOPBACK_EN; ++ wr32(E1000_DTXSWC, dtxswc); ++ break; ++ case e1000_i354: ++ case e1000_i350: ++ dtxswc = rd32(E1000_TXSWC); ++ if (enable) ++ dtxswc |= E1000_DTXSWC_VMDQ_LOOPBACK_EN; ++ else ++ dtxswc &= ~E1000_DTXSWC_VMDQ_LOOPBACK_EN; ++ wr32(E1000_TXSWC, dtxswc); ++ break; ++ default: ++ /* Currently no other hardware supports loopback */ ++ break; ++ } ++ ++} ++ ++/** ++ * igb_vmdq_set_replication_pf - enable or disable vmdq replication ++ * @hw: pointer to the hardware struct ++ * @enable: state to enter, either enabled or disabled ++ * ++ * enables/disables replication of packets across multiple pools. ++ **/ ++void igb_vmdq_set_replication_pf(struct e1000_hw *hw, bool enable) ++{ ++ u32 vt_ctl = rd32(E1000_VT_CTL); ++ ++ if (enable) ++ vt_ctl |= E1000_VT_CTL_VM_REPL_EN; ++ else ++ vt_ctl &= ~E1000_VT_CTL_VM_REPL_EN; ++ ++ wr32(E1000_VT_CTL, vt_ctl); ++} ++ ++/** ++ * igb_read_phy_reg_82580 - Read 82580 MDI control register ++ * @hw: pointer to the HW structure ++ * @offset: register offset to be read ++ * @data: pointer to the read data ++ * ++ * Reads the MDI control register in the PHY at offset and stores the ++ * information read to data. ++ **/ ++static s32 igb_read_phy_reg_82580(struct e1000_hw *hw, u32 offset, u16 *data) ++{ ++ s32 ret_val; ++ ++ ret_val = hw->phy.ops.acquire(hw); ++ if (ret_val) ++ goto out; ++ ++ ret_val = igb_read_phy_reg_mdic(hw, offset, data); ++ ++ hw->phy.ops.release(hw); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_write_phy_reg_82580 - Write 82580 MDI control register ++ * @hw: pointer to the HW structure ++ * @offset: register offset to write to ++ * @data: data to write to register at offset ++ * ++ * Writes data to MDI control register in the PHY at offset. ++ **/ ++static s32 igb_write_phy_reg_82580(struct e1000_hw *hw, u32 offset, u16 data) ++{ ++ s32 ret_val; ++ ++ ++ ret_val = hw->phy.ops.acquire(hw); ++ if (ret_val) ++ goto out; ++ ++ ret_val = igb_write_phy_reg_mdic(hw, offset, data); ++ ++ hw->phy.ops.release(hw); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_reset_mdicnfg_82580 - Reset MDICNFG destination and com_mdio bits ++ * @hw: pointer to the HW structure ++ * ++ * This resets the the MDICNFG.Destination and MDICNFG.Com_MDIO bits based on ++ * the values found in the EEPROM. This addresses an issue in which these ++ * bits are not restored from EEPROM after reset. ++ **/ ++static s32 igb_reset_mdicnfg_82580(struct e1000_hw *hw) ++{ ++ s32 ret_val = 0; ++ u32 mdicnfg; ++ u16 nvm_data = 0; ++ ++ if (hw->mac.type != e1000_82580) ++ goto out; ++ if (!igb_sgmii_active_82575(hw)) ++ goto out; ++ ++ ret_val = hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A + ++ NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1, ++ &nvm_data); ++ if (ret_val) { ++ hw_dbg("NVM Read Error\n"); ++ goto out; ++ } ++ ++ mdicnfg = rd32(E1000_MDICNFG); ++ if (nvm_data & NVM_WORD24_EXT_MDIO) ++ mdicnfg |= E1000_MDICNFG_EXT_MDIO; ++ if (nvm_data & NVM_WORD24_COM_MDIO) ++ mdicnfg |= E1000_MDICNFG_COM_MDIO; ++ wr32(E1000_MDICNFG, mdicnfg); ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_reset_hw_82580 - Reset hardware ++ * @hw: pointer to the HW structure ++ * ++ * This resets function or entire device (all ports, etc.) ++ * to a known state. ++ **/ ++static s32 igb_reset_hw_82580(struct e1000_hw *hw) ++{ ++ s32 ret_val = 0; ++ /* BH SW mailbox bit in SW_FW_SYNC */ ++ u16 swmbsw_mask = E1000_SW_SYNCH_MB; ++ u32 ctrl; ++ bool global_device_reset = hw->dev_spec._82575.global_device_reset; ++ ++ hw->dev_spec._82575.global_device_reset = false; ++ ++ /* due to hw errata, global device reset doesn't always ++ * work on 82580 ++ */ ++ if (hw->mac.type == e1000_82580) ++ global_device_reset = false; ++ ++ /* Get current control state. */ ++ ctrl = rd32(E1000_CTRL); ++ ++ /* Prevent the PCI-E bus from sticking if there is no TLP connection ++ * on the last TLP read/write transaction when MAC is reset. ++ */ ++ ret_val = igb_disable_pcie_master(hw); ++ if (ret_val) ++ hw_dbg("PCI-E Master disable polling has failed.\n"); ++ ++ hw_dbg("Masking off all interrupts\n"); ++ wr32(E1000_IMC, 0xffffffff); ++ wr32(E1000_RCTL, 0); ++ wr32(E1000_TCTL, E1000_TCTL_PSP); ++ wrfl(); ++ ++ usleep_range(10000, 11000); ++ ++ /* Determine whether or not a global dev reset is requested */ ++ if (global_device_reset && ++ hw->mac.ops.acquire_swfw_sync(hw, swmbsw_mask)) ++ global_device_reset = false; ++ ++ if (global_device_reset && ++ !(rd32(E1000_STATUS) & E1000_STAT_DEV_RST_SET)) ++ ctrl |= E1000_CTRL_DEV_RST; ++ else ++ ctrl |= E1000_CTRL_RST; ++ ++ wr32(E1000_CTRL, ctrl); ++ wrfl(); ++ ++ /* Add delay to insure DEV_RST has time to complete */ ++ if (global_device_reset) ++ usleep_range(5000, 6000); ++ ++ ret_val = igb_get_auto_rd_done(hw); ++ if (ret_val) { ++ /* When auto config read does not complete, do not ++ * return with an error. This can happen in situations ++ * where there is no eeprom and prevents getting link. ++ */ ++ hw_dbg("Auto Read Done did not complete\n"); ++ } ++ ++ /* clear global device reset status bit */ ++ wr32(E1000_STATUS, E1000_STAT_DEV_RST_SET); ++ ++ /* Clear any pending interrupt events. */ ++ wr32(E1000_IMC, 0xffffffff); ++ rd32(E1000_ICR); ++ ++ ret_val = igb_reset_mdicnfg_82580(hw); ++ if (ret_val) ++ hw_dbg("Could not reset MDICNFG based on EEPROM\n"); ++ ++ /* Install any alternate MAC address into RAR0 */ ++ ret_val = igb_check_alt_mac_addr(hw); ++ ++ /* Release semaphore */ ++ if (global_device_reset) ++ hw->mac.ops.release_swfw_sync(hw, swmbsw_mask); ++ ++ return ret_val; ++} ++ ++/** ++ * igb_rxpbs_adjust_82580 - adjust RXPBS value to reflect actual RX PBA size ++ * @data: data received by reading RXPBS register ++ * ++ * The 82580 uses a table based approach for packet buffer allocation sizes. ++ * This function converts the retrieved value into the correct table value ++ * 0x0 0x1 0x2 0x3 0x4 0x5 0x6 0x7 ++ * 0x0 36 72 144 1 2 4 8 16 ++ * 0x8 35 70 140 rsv rsv rsv rsv rsv ++ */ ++u16 igb_rxpbs_adjust_82580(u32 data) ++{ ++ u16 ret_val = 0; ++ ++ if (data < ARRAY_SIZE(e1000_82580_rxpbs_table)) ++ ret_val = e1000_82580_rxpbs_table[data]; ++ ++ return ret_val; ++} ++ ++/** ++ * igb_validate_nvm_checksum_with_offset - Validate EEPROM ++ * checksum ++ * @hw: pointer to the HW structure ++ * @offset: offset in words of the checksum protected region ++ * ++ * Calculates the EEPROM checksum by reading/adding each word of the EEPROM ++ * and then verifies that the sum of the EEPROM is equal to 0xBABA. ++ **/ ++static s32 igb_validate_nvm_checksum_with_offset(struct e1000_hw *hw, ++ u16 offset) ++{ ++ s32 ret_val = 0; ++ u16 checksum = 0; ++ u16 i, nvm_data; ++ ++ for (i = offset; i < ((NVM_CHECKSUM_REG + offset) + 1); i++) { ++ ret_val = hw->nvm.ops.read(hw, i, 1, &nvm_data); ++ if (ret_val) { ++ hw_dbg("NVM Read Error\n"); ++ goto out; ++ } ++ checksum += nvm_data; ++ } ++ ++ if (checksum != (u16) NVM_SUM) { ++ hw_dbg("NVM Checksum Invalid\n"); ++ ret_val = -E1000_ERR_NVM; ++ goto out; ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_update_nvm_checksum_with_offset - Update EEPROM ++ * checksum ++ * @hw: pointer to the HW structure ++ * @offset: offset in words of the checksum protected region ++ * ++ * Updates the EEPROM checksum by reading/adding each word of the EEPROM ++ * up to the checksum. Then calculates the EEPROM checksum and writes the ++ * value to the EEPROM. ++ **/ ++static s32 igb_update_nvm_checksum_with_offset(struct e1000_hw *hw, u16 offset) ++{ ++ s32 ret_val; ++ u16 checksum = 0; ++ u16 i, nvm_data; ++ ++ for (i = offset; i < (NVM_CHECKSUM_REG + offset); i++) { ++ ret_val = hw->nvm.ops.read(hw, i, 1, &nvm_data); ++ if (ret_val) { ++ hw_dbg("NVM Read Error while updating checksum.\n"); ++ goto out; ++ } ++ checksum += nvm_data; ++ } ++ checksum = (u16) NVM_SUM - checksum; ++ ret_val = hw->nvm.ops.write(hw, (NVM_CHECKSUM_REG + offset), 1, ++ &checksum); ++ if (ret_val) ++ hw_dbg("NVM Write Error while updating checksum.\n"); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_validate_nvm_checksum_82580 - Validate EEPROM checksum ++ * @hw: pointer to the HW structure ++ * ++ * Calculates the EEPROM section checksum by reading/adding each word of ++ * the EEPROM and then verifies that the sum of the EEPROM is ++ * equal to 0xBABA. ++ **/ ++static s32 igb_validate_nvm_checksum_82580(struct e1000_hw *hw) ++{ ++ s32 ret_val = 0; ++ u16 eeprom_regions_count = 1; ++ u16 j, nvm_data; ++ u16 nvm_offset; ++ ++ ret_val = hw->nvm.ops.read(hw, NVM_COMPATIBILITY_REG_3, 1, &nvm_data); ++ if (ret_val) { ++ hw_dbg("NVM Read Error\n"); ++ goto out; ++ } ++ ++ if (nvm_data & NVM_COMPATIBILITY_BIT_MASK) { ++ /* if checksums compatibility bit is set validate checksums ++ * for all 4 ports. ++ */ ++ eeprom_regions_count = 4; ++ } ++ ++ for (j = 0; j < eeprom_regions_count; j++) { ++ nvm_offset = NVM_82580_LAN_FUNC_OFFSET(j); ++ ret_val = igb_validate_nvm_checksum_with_offset(hw, ++ nvm_offset); ++ if (ret_val != 0) ++ goto out; ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_update_nvm_checksum_82580 - Update EEPROM checksum ++ * @hw: pointer to the HW structure ++ * ++ * Updates the EEPROM section checksums for all 4 ports by reading/adding ++ * each word of the EEPROM up to the checksum. Then calculates the EEPROM ++ * checksum and writes the value to the EEPROM. ++ **/ ++static s32 igb_update_nvm_checksum_82580(struct e1000_hw *hw) ++{ ++ s32 ret_val; ++ u16 j, nvm_data; ++ u16 nvm_offset; ++ ++ ret_val = hw->nvm.ops.read(hw, NVM_COMPATIBILITY_REG_3, 1, &nvm_data); ++ if (ret_val) { ++ hw_dbg("NVM Read Error while updating checksum compatibility bit.\n"); ++ goto out; ++ } ++ ++ if ((nvm_data & NVM_COMPATIBILITY_BIT_MASK) == 0) { ++ /* set compatibility bit to validate checksums appropriately */ ++ nvm_data = nvm_data | NVM_COMPATIBILITY_BIT_MASK; ++ ret_val = hw->nvm.ops.write(hw, NVM_COMPATIBILITY_REG_3, 1, ++ &nvm_data); ++ if (ret_val) { ++ hw_dbg("NVM Write Error while updating checksum compatibility bit.\n"); ++ goto out; ++ } ++ } ++ ++ for (j = 0; j < 4; j++) { ++ nvm_offset = NVM_82580_LAN_FUNC_OFFSET(j); ++ ret_val = igb_update_nvm_checksum_with_offset(hw, nvm_offset); ++ if (ret_val) ++ goto out; ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_validate_nvm_checksum_i350 - Validate EEPROM checksum ++ * @hw: pointer to the HW structure ++ * ++ * Calculates the EEPROM section checksum by reading/adding each word of ++ * the EEPROM and then verifies that the sum of the EEPROM is ++ * equal to 0xBABA. ++ **/ ++static s32 igb_validate_nvm_checksum_i350(struct e1000_hw *hw) ++{ ++ s32 ret_val = 0; ++ u16 j; ++ u16 nvm_offset; ++ ++ for (j = 0; j < 4; j++) { ++ nvm_offset = NVM_82580_LAN_FUNC_OFFSET(j); ++ ret_val = igb_validate_nvm_checksum_with_offset(hw, ++ nvm_offset); ++ if (ret_val != 0) ++ goto out; ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_update_nvm_checksum_i350 - Update EEPROM checksum ++ * @hw: pointer to the HW structure ++ * ++ * Updates the EEPROM section checksums for all 4 ports by reading/adding ++ * each word of the EEPROM up to the checksum. Then calculates the EEPROM ++ * checksum and writes the value to the EEPROM. ++ **/ ++static s32 igb_update_nvm_checksum_i350(struct e1000_hw *hw) ++{ ++ s32 ret_val = 0; ++ u16 j; ++ u16 nvm_offset; ++ ++ for (j = 0; j < 4; j++) { ++ nvm_offset = NVM_82580_LAN_FUNC_OFFSET(j); ++ ret_val = igb_update_nvm_checksum_with_offset(hw, nvm_offset); ++ if (ret_val != 0) ++ goto out; ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * __igb_access_emi_reg - Read/write EMI register ++ * @hw: pointer to the HW structure ++ * @addr: EMI address to program ++ * @data: pointer to value to read/write from/to the EMI address ++ * @read: boolean flag to indicate read or write ++ **/ ++static s32 __igb_access_emi_reg(struct e1000_hw *hw, u16 address, ++ u16 *data, bool read) ++{ ++ s32 ret_val = 0; ++ ++ ret_val = hw->phy.ops.write_reg(hw, E1000_EMIADD, address); ++ if (ret_val) ++ return ret_val; ++ ++ if (read) ++ ret_val = hw->phy.ops.read_reg(hw, E1000_EMIDATA, data); ++ else ++ ret_val = hw->phy.ops.write_reg(hw, E1000_EMIDATA, *data); ++ ++ return ret_val; ++} ++ ++/** ++ * igb_read_emi_reg - Read Extended Management Interface register ++ * @hw: pointer to the HW structure ++ * @addr: EMI address to program ++ * @data: value to be read from the EMI address ++ **/ ++s32 igb_read_emi_reg(struct e1000_hw *hw, u16 addr, u16 *data) ++{ ++ return __igb_access_emi_reg(hw, addr, data, true); ++} ++ ++/** ++ * igb_set_eee_i350 - Enable/disable EEE support ++ * @hw: pointer to the HW structure ++ * @adv1G: boolean flag enabling 1G EEE advertisement ++ * @adv100m: boolean flag enabling 100M EEE advertisement ++ * ++ * Enable/disable EEE based on setting in dev_spec structure. ++ * ++ **/ ++s32 igb_set_eee_i350(struct e1000_hw *hw, bool adv1G, bool adv100M) ++{ ++ u32 ipcnfg, eeer; ++ ++ if ((hw->mac.type < e1000_i350) || ++ (hw->phy.media_type != e1000_media_type_copper)) ++ goto out; ++ ipcnfg = rd32(E1000_IPCNFG); ++ eeer = rd32(E1000_EEER); ++ ++ /* enable or disable per user setting */ ++ if (!(hw->dev_spec._82575.eee_disable)) { ++ u32 eee_su = rd32(E1000_EEE_SU); ++ ++ if (adv100M) ++ ipcnfg |= E1000_IPCNFG_EEE_100M_AN; ++ else ++ ipcnfg &= ~E1000_IPCNFG_EEE_100M_AN; ++ ++ if (adv1G) ++ ipcnfg |= E1000_IPCNFG_EEE_1G_AN; ++ else ++ ipcnfg &= ~E1000_IPCNFG_EEE_1G_AN; ++ ++ eeer |= (E1000_EEER_TX_LPI_EN | E1000_EEER_RX_LPI_EN | ++ E1000_EEER_LPI_FC); ++ ++ /* This bit should not be set in normal operation. */ ++ if (eee_su & E1000_EEE_SU_LPI_CLK_STP) ++ hw_dbg("LPI Clock Stop Bit should not be set!\n"); ++ ++ } else { ++ ipcnfg &= ~(E1000_IPCNFG_EEE_1G_AN | ++ E1000_IPCNFG_EEE_100M_AN); ++ eeer &= ~(E1000_EEER_TX_LPI_EN | ++ E1000_EEER_RX_LPI_EN | ++ E1000_EEER_LPI_FC); ++ } ++ wr32(E1000_IPCNFG, ipcnfg); ++ wr32(E1000_EEER, eeer); ++ rd32(E1000_IPCNFG); ++ rd32(E1000_EEER); ++out: ++ ++ return 0; ++} ++ ++/** ++ * igb_set_eee_i354 - Enable/disable EEE support ++ * @hw: pointer to the HW structure ++ * @adv1G: boolean flag enabling 1G EEE advertisement ++ * @adv100m: boolean flag enabling 100M EEE advertisement ++ * ++ * Enable/disable EEE legacy mode based on setting in dev_spec structure. ++ * ++ **/ ++s32 igb_set_eee_i354(struct e1000_hw *hw, bool adv1G, bool adv100M) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val = 0; ++ u16 phy_data; ++ ++ if ((hw->phy.media_type != e1000_media_type_copper) || ++ (phy->id != M88E1543_E_PHY_ID)) ++ goto out; ++ ++ if (!hw->dev_spec._82575.eee_disable) { ++ /* Switch to PHY page 18. */ ++ ret_val = phy->ops.write_reg(hw, E1000_M88E1543_PAGE_ADDR, 18); ++ if (ret_val) ++ goto out; ++ ++ ret_val = phy->ops.read_reg(hw, E1000_M88E1543_EEE_CTRL_1, ++ &phy_data); ++ if (ret_val) ++ goto out; ++ ++ phy_data |= E1000_M88E1543_EEE_CTRL_1_MS; ++ ret_val = phy->ops.write_reg(hw, E1000_M88E1543_EEE_CTRL_1, ++ phy_data); ++ if (ret_val) ++ goto out; ++ ++ /* Return the PHY to page 0. */ ++ ret_val = phy->ops.write_reg(hw, E1000_M88E1543_PAGE_ADDR, 0); ++ if (ret_val) ++ goto out; ++ ++ /* Turn on EEE advertisement. */ ++ ret_val = igb_read_xmdio_reg(hw, E1000_EEE_ADV_ADDR_I354, ++ E1000_EEE_ADV_DEV_I354, ++ &phy_data); ++ if (ret_val) ++ goto out; ++ ++ if (adv100M) ++ phy_data |= E1000_EEE_ADV_100_SUPPORTED; ++ else ++ phy_data &= ~E1000_EEE_ADV_100_SUPPORTED; ++ ++ if (adv1G) ++ phy_data |= E1000_EEE_ADV_1000_SUPPORTED; ++ else ++ phy_data &= ~E1000_EEE_ADV_1000_SUPPORTED; ++ ++ ret_val = igb_write_xmdio_reg(hw, E1000_EEE_ADV_ADDR_I354, ++ E1000_EEE_ADV_DEV_I354, ++ phy_data); ++ } else { ++ /* Turn off EEE advertisement. */ ++ ret_val = igb_read_xmdio_reg(hw, E1000_EEE_ADV_ADDR_I354, ++ E1000_EEE_ADV_DEV_I354, ++ &phy_data); ++ if (ret_val) ++ goto out; ++ ++ phy_data &= ~(E1000_EEE_ADV_100_SUPPORTED | ++ E1000_EEE_ADV_1000_SUPPORTED); ++ ret_val = igb_write_xmdio_reg(hw, E1000_EEE_ADV_ADDR_I354, ++ E1000_EEE_ADV_DEV_I354, ++ phy_data); ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_get_eee_status_i354 - Get EEE status ++ * @hw: pointer to the HW structure ++ * @status: EEE status ++ * ++ * Get EEE status by guessing based on whether Tx or Rx LPI indications have ++ * been received. ++ **/ ++s32 igb_get_eee_status_i354(struct e1000_hw *hw, bool *status) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val = 0; ++ u16 phy_data; ++ ++ /* Check if EEE is supported on this device. */ ++ if ((hw->phy.media_type != e1000_media_type_copper) || ++ (phy->id != M88E1543_E_PHY_ID)) ++ goto out; ++ ++ ret_val = igb_read_xmdio_reg(hw, E1000_PCS_STATUS_ADDR_I354, ++ E1000_PCS_STATUS_DEV_I354, ++ &phy_data); ++ if (ret_val) ++ goto out; ++ ++ *status = phy_data & (E1000_PCS_STATUS_TX_LPI_RCVD | ++ E1000_PCS_STATUS_RX_LPI_RCVD) ? true : false; ++ ++out: ++ return ret_val; ++} ++ ++static const u8 e1000_emc_temp_data[4] = { ++ E1000_EMC_INTERNAL_DATA, ++ E1000_EMC_DIODE1_DATA, ++ E1000_EMC_DIODE2_DATA, ++ E1000_EMC_DIODE3_DATA ++}; ++static const u8 e1000_emc_therm_limit[4] = { ++ E1000_EMC_INTERNAL_THERM_LIMIT, ++ E1000_EMC_DIODE1_THERM_LIMIT, ++ E1000_EMC_DIODE2_THERM_LIMIT, ++ E1000_EMC_DIODE3_THERM_LIMIT ++}; ++ ++#ifdef CONFIG_IGB_HWMON ++/** ++ * igb_get_thermal_sensor_data_generic - Gathers thermal sensor data ++ * @hw: pointer to hardware structure ++ * ++ * Updates the temperatures in mac.thermal_sensor_data ++ **/ ++static s32 igb_get_thermal_sensor_data_generic(struct e1000_hw *hw) ++{ ++ u16 ets_offset; ++ u16 ets_cfg; ++ u16 ets_sensor; ++ u8 num_sensors; ++ u8 sensor_index; ++ u8 sensor_location; ++ u8 i; ++ struct e1000_thermal_sensor_data *data = &hw->mac.thermal_sensor_data; ++ ++ if ((hw->mac.type != e1000_i350) || (hw->bus.func != 0)) ++ return E1000_NOT_IMPLEMENTED; ++ ++ data->sensor[0].temp = (rd32(E1000_THMJT) & 0xFF); ++ ++ /* Return the internal sensor only if ETS is unsupported */ ++ hw->nvm.ops.read(hw, NVM_ETS_CFG, 1, &ets_offset); ++ if ((ets_offset == 0x0000) || (ets_offset == 0xFFFF)) ++ return 0; ++ ++ hw->nvm.ops.read(hw, ets_offset, 1, &ets_cfg); ++ if (((ets_cfg & NVM_ETS_TYPE_MASK) >> NVM_ETS_TYPE_SHIFT) ++ != NVM_ETS_TYPE_EMC) ++ return E1000_NOT_IMPLEMENTED; ++ ++ num_sensors = (ets_cfg & NVM_ETS_NUM_SENSORS_MASK); ++ if (num_sensors > E1000_MAX_SENSORS) ++ num_sensors = E1000_MAX_SENSORS; ++ ++ for (i = 1; i < num_sensors; i++) { ++ hw->nvm.ops.read(hw, (ets_offset + i), 1, &ets_sensor); ++ sensor_index = ((ets_sensor & NVM_ETS_DATA_INDEX_MASK) >> ++ NVM_ETS_DATA_INDEX_SHIFT); ++ sensor_location = ((ets_sensor & NVM_ETS_DATA_LOC_MASK) >> ++ NVM_ETS_DATA_LOC_SHIFT); ++ ++ if (sensor_location != 0) ++ hw->phy.ops.read_i2c_byte(hw, ++ e1000_emc_temp_data[sensor_index], ++ E1000_I2C_THERMAL_SENSOR_ADDR, ++ &data->sensor[i].temp); ++ } ++ return 0; ++} ++ ++/** ++ * igb_init_thermal_sensor_thresh_generic - Sets thermal sensor thresholds ++ * @hw: pointer to hardware structure ++ * ++ * Sets the thermal sensor thresholds according to the NVM map ++ * and save off the threshold and location values into mac.thermal_sensor_data ++ **/ ++static s32 igb_init_thermal_sensor_thresh_generic(struct e1000_hw *hw) ++{ ++ u16 ets_offset; ++ u16 ets_cfg; ++ u16 ets_sensor; ++ u8 low_thresh_delta; ++ u8 num_sensors; ++ u8 sensor_index; ++ u8 sensor_location; ++ u8 therm_limit; ++ u8 i; ++ struct e1000_thermal_sensor_data *data = &hw->mac.thermal_sensor_data; ++ ++ if ((hw->mac.type != e1000_i350) || (hw->bus.func != 0)) ++ return E1000_NOT_IMPLEMENTED; ++ ++ memset(data, 0, sizeof(struct e1000_thermal_sensor_data)); ++ ++ data->sensor[0].location = 0x1; ++ data->sensor[0].caution_thresh = ++ (rd32(E1000_THHIGHTC) & 0xFF); ++ data->sensor[0].max_op_thresh = ++ (rd32(E1000_THLOWTC) & 0xFF); ++ ++ /* Return the internal sensor only if ETS is unsupported */ ++ hw->nvm.ops.read(hw, NVM_ETS_CFG, 1, &ets_offset); ++ if ((ets_offset == 0x0000) || (ets_offset == 0xFFFF)) ++ return 0; ++ ++ hw->nvm.ops.read(hw, ets_offset, 1, &ets_cfg); ++ if (((ets_cfg & NVM_ETS_TYPE_MASK) >> NVM_ETS_TYPE_SHIFT) ++ != NVM_ETS_TYPE_EMC) ++ return E1000_NOT_IMPLEMENTED; ++ ++ low_thresh_delta = ((ets_cfg & NVM_ETS_LTHRES_DELTA_MASK) >> ++ NVM_ETS_LTHRES_DELTA_SHIFT); ++ num_sensors = (ets_cfg & NVM_ETS_NUM_SENSORS_MASK); ++ ++ for (i = 1; i <= num_sensors; i++) { ++ hw->nvm.ops.read(hw, (ets_offset + i), 1, &ets_sensor); ++ sensor_index = ((ets_sensor & NVM_ETS_DATA_INDEX_MASK) >> ++ NVM_ETS_DATA_INDEX_SHIFT); ++ sensor_location = ((ets_sensor & NVM_ETS_DATA_LOC_MASK) >> ++ NVM_ETS_DATA_LOC_SHIFT); ++ therm_limit = ets_sensor & NVM_ETS_DATA_HTHRESH_MASK; ++ ++ hw->phy.ops.write_i2c_byte(hw, ++ e1000_emc_therm_limit[sensor_index], ++ E1000_I2C_THERMAL_SENSOR_ADDR, ++ therm_limit); ++ ++ if ((i < E1000_MAX_SENSORS) && (sensor_location != 0)) { ++ data->sensor[i].location = sensor_location; ++ data->sensor[i].caution_thresh = therm_limit; ++ data->sensor[i].max_op_thresh = therm_limit - ++ low_thresh_delta; ++ } ++ } ++ return 0; ++} ++ ++#endif ++static struct e1000_mac_operations e1000_mac_ops_82575 = { ++ .init_hw = igb_init_hw_82575, ++ .check_for_link = igb_check_for_link_82575, ++ .rar_set = igb_rar_set, ++ .read_mac_addr = igb_read_mac_addr_82575, ++ .get_speed_and_duplex = igb_get_link_up_info_82575, ++#ifdef CONFIG_IGB_HWMON ++ .get_thermal_sensor_data = igb_get_thermal_sensor_data_generic, ++ .init_thermal_sensor_thresh = igb_init_thermal_sensor_thresh_generic, ++#endif ++}; ++ ++static struct e1000_phy_operations e1000_phy_ops_82575 = { ++ .acquire = igb_acquire_phy_82575, ++ .get_cfg_done = igb_get_cfg_done_82575, ++ .release = igb_release_phy_82575, ++ .write_i2c_byte = igb_write_i2c_byte, ++ .read_i2c_byte = igb_read_i2c_byte, ++}; ++ ++static struct e1000_nvm_operations e1000_nvm_ops_82575 = { ++ .acquire = igb_acquire_nvm_82575, ++ .read = igb_read_nvm_eerd, ++ .release = igb_release_nvm_82575, ++ .write = igb_write_nvm_spi, ++}; ++ ++const struct e1000_info e1000_82575_info = { ++ .get_invariants = igb_get_invariants_82575, ++ .mac_ops = &e1000_mac_ops_82575, ++ .phy_ops = &e1000_phy_ops_82575, ++ .nvm_ops = &e1000_nvm_ops_82575, ++}; ++ +--- linux/drivers/xenomai/net/drivers/igb/e1000_nvm.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/igb/e1000_nvm.c 2021-04-07 16:01:27.481633803 +0800 +@@ -0,0 +1,803 @@ ++/* Intel(R) Gigabit Ethernet Linux driver ++ * Copyright(c) 2007-2014 Intel Corporation. ++ * RTnet port 2009 Vladimir Zapolskiy ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms and conditions of the GNU General Public License, ++ * version 2, as published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ * ++ * You should have received a copy of the GNU General Public License along with ++ * this program; if not, see . ++ * ++ * The full GNU General Public License is included in this distribution in ++ * the file called "COPYING". ++ * ++ * Contact Information: ++ * e1000-devel Mailing List ++ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 ++ */ ++ ++#include ++#include ++ ++#include "e1000_mac.h" ++#include "e1000_nvm.h" ++ ++/** ++ * igb_raise_eec_clk - Raise EEPROM clock ++ * @hw: pointer to the HW structure ++ * @eecd: pointer to the EEPROM ++ * ++ * Enable/Raise the EEPROM clock bit. ++ **/ ++static void igb_raise_eec_clk(struct e1000_hw *hw, u32 *eecd) ++{ ++ *eecd = *eecd | E1000_EECD_SK; ++ wr32(E1000_EECD, *eecd); ++ wrfl(); ++ udelay(hw->nvm.delay_usec); ++} ++ ++/** ++ * igb_lower_eec_clk - Lower EEPROM clock ++ * @hw: pointer to the HW structure ++ * @eecd: pointer to the EEPROM ++ * ++ * Clear/Lower the EEPROM clock bit. ++ **/ ++static void igb_lower_eec_clk(struct e1000_hw *hw, u32 *eecd) ++{ ++ *eecd = *eecd & ~E1000_EECD_SK; ++ wr32(E1000_EECD, *eecd); ++ wrfl(); ++ udelay(hw->nvm.delay_usec); ++} ++ ++/** ++ * igb_shift_out_eec_bits - Shift data bits our to the EEPROM ++ * @hw: pointer to the HW structure ++ * @data: data to send to the EEPROM ++ * @count: number of bits to shift out ++ * ++ * We need to shift 'count' bits out to the EEPROM. So, the value in the ++ * "data" parameter will be shifted out to the EEPROM one bit at a time. ++ * In order to do this, "data" must be broken down into bits. ++ **/ ++static void igb_shift_out_eec_bits(struct e1000_hw *hw, u16 data, u16 count) ++{ ++ struct e1000_nvm_info *nvm = &hw->nvm; ++ u32 eecd = rd32(E1000_EECD); ++ u32 mask; ++ ++ mask = 0x01 << (count - 1); ++ if (nvm->type == e1000_nvm_eeprom_spi) ++ eecd |= E1000_EECD_DO; ++ ++ do { ++ eecd &= ~E1000_EECD_DI; ++ ++ if (data & mask) ++ eecd |= E1000_EECD_DI; ++ ++ wr32(E1000_EECD, eecd); ++ wrfl(); ++ ++ udelay(nvm->delay_usec); ++ ++ igb_raise_eec_clk(hw, &eecd); ++ igb_lower_eec_clk(hw, &eecd); ++ ++ mask >>= 1; ++ } while (mask); ++ ++ eecd &= ~E1000_EECD_DI; ++ wr32(E1000_EECD, eecd); ++} ++ ++/** ++ * igb_shift_in_eec_bits - Shift data bits in from the EEPROM ++ * @hw: pointer to the HW structure ++ * @count: number of bits to shift in ++ * ++ * In order to read a register from the EEPROM, we need to shift 'count' bits ++ * in from the EEPROM. Bits are "shifted in" by raising the clock input to ++ * the EEPROM (setting the SK bit), and then reading the value of the data out ++ * "DO" bit. During this "shifting in" process the data in "DI" bit should ++ * always be clear. ++ **/ ++static u16 igb_shift_in_eec_bits(struct e1000_hw *hw, u16 count) ++{ ++ u32 eecd; ++ u32 i; ++ u16 data; ++ ++ eecd = rd32(E1000_EECD); ++ ++ eecd &= ~(E1000_EECD_DO | E1000_EECD_DI); ++ data = 0; ++ ++ for (i = 0; i < count; i++) { ++ data <<= 1; ++ igb_raise_eec_clk(hw, &eecd); ++ ++ eecd = rd32(E1000_EECD); ++ ++ eecd &= ~E1000_EECD_DI; ++ if (eecd & E1000_EECD_DO) ++ data |= 1; ++ ++ igb_lower_eec_clk(hw, &eecd); ++ } ++ ++ return data; ++} ++ ++/** ++ * igb_poll_eerd_eewr_done - Poll for EEPROM read/write completion ++ * @hw: pointer to the HW structure ++ * @ee_reg: EEPROM flag for polling ++ * ++ * Polls the EEPROM status bit for either read or write completion based ++ * upon the value of 'ee_reg'. ++ **/ ++static s32 igb_poll_eerd_eewr_done(struct e1000_hw *hw, int ee_reg) ++{ ++ u32 attempts = 100000; ++ u32 i, reg = 0; ++ s32 ret_val = -E1000_ERR_NVM; ++ ++ for (i = 0; i < attempts; i++) { ++ if (ee_reg == E1000_NVM_POLL_READ) ++ reg = rd32(E1000_EERD); ++ else ++ reg = rd32(E1000_EEWR); ++ ++ if (reg & E1000_NVM_RW_REG_DONE) { ++ ret_val = 0; ++ break; ++ } ++ ++ udelay(5); ++ } ++ ++ return ret_val; ++} ++ ++/** ++ * igb_acquire_nvm - Generic request for access to EEPROM ++ * @hw: pointer to the HW structure ++ * ++ * Set the EEPROM access request bit and wait for EEPROM access grant bit. ++ * Return successful if access grant bit set, else clear the request for ++ * EEPROM access and return -E1000_ERR_NVM (-1). ++ **/ ++s32 igb_acquire_nvm(struct e1000_hw *hw) ++{ ++ u32 eecd = rd32(E1000_EECD); ++ s32 timeout = E1000_NVM_GRANT_ATTEMPTS; ++ s32 ret_val = 0; ++ ++ ++ wr32(E1000_EECD, eecd | E1000_EECD_REQ); ++ eecd = rd32(E1000_EECD); ++ ++ while (timeout) { ++ if (eecd & E1000_EECD_GNT) ++ break; ++ udelay(5); ++ eecd = rd32(E1000_EECD); ++ timeout--; ++ } ++ ++ if (!timeout) { ++ eecd &= ~E1000_EECD_REQ; ++ wr32(E1000_EECD, eecd); ++ hw_dbg("Could not acquire NVM grant\n"); ++ ret_val = -E1000_ERR_NVM; ++ } ++ ++ return ret_val; ++} ++ ++/** ++ * igb_standby_nvm - Return EEPROM to standby state ++ * @hw: pointer to the HW structure ++ * ++ * Return the EEPROM to a standby state. ++ **/ ++static void igb_standby_nvm(struct e1000_hw *hw) ++{ ++ struct e1000_nvm_info *nvm = &hw->nvm; ++ u32 eecd = rd32(E1000_EECD); ++ ++ if (nvm->type == e1000_nvm_eeprom_spi) { ++ /* Toggle CS to flush commands */ ++ eecd |= E1000_EECD_CS; ++ wr32(E1000_EECD, eecd); ++ wrfl(); ++ udelay(nvm->delay_usec); ++ eecd &= ~E1000_EECD_CS; ++ wr32(E1000_EECD, eecd); ++ wrfl(); ++ udelay(nvm->delay_usec); ++ } ++} ++ ++/** ++ * e1000_stop_nvm - Terminate EEPROM command ++ * @hw: pointer to the HW structure ++ * ++ * Terminates the current command by inverting the EEPROM's chip select pin. ++ **/ ++static void e1000_stop_nvm(struct e1000_hw *hw) ++{ ++ u32 eecd; ++ ++ eecd = rd32(E1000_EECD); ++ if (hw->nvm.type == e1000_nvm_eeprom_spi) { ++ /* Pull CS high */ ++ eecd |= E1000_EECD_CS; ++ igb_lower_eec_clk(hw, &eecd); ++ } ++} ++ ++/** ++ * igb_release_nvm - Release exclusive access to EEPROM ++ * @hw: pointer to the HW structure ++ * ++ * Stop any current commands to the EEPROM and clear the EEPROM request bit. ++ **/ ++void igb_release_nvm(struct e1000_hw *hw) ++{ ++ u32 eecd; ++ ++ e1000_stop_nvm(hw); ++ ++ eecd = rd32(E1000_EECD); ++ eecd &= ~E1000_EECD_REQ; ++ wr32(E1000_EECD, eecd); ++} ++ ++/** ++ * igb_ready_nvm_eeprom - Prepares EEPROM for read/write ++ * @hw: pointer to the HW structure ++ * ++ * Setups the EEPROM for reading and writing. ++ **/ ++static s32 igb_ready_nvm_eeprom(struct e1000_hw *hw) ++{ ++ struct e1000_nvm_info *nvm = &hw->nvm; ++ u32 eecd = rd32(E1000_EECD); ++ s32 ret_val = 0; ++ u16 timeout = 0; ++ u8 spi_stat_reg; ++ ++ ++ if (nvm->type == e1000_nvm_eeprom_spi) { ++ /* Clear SK and CS */ ++ eecd &= ~(E1000_EECD_CS | E1000_EECD_SK); ++ wr32(E1000_EECD, eecd); ++ wrfl(); ++ udelay(1); ++ timeout = NVM_MAX_RETRY_SPI; ++ ++ /* Read "Status Register" repeatedly until the LSB is cleared. ++ * The EEPROM will signal that the command has been completed ++ * by clearing bit 0 of the internal status register. If it's ++ * not cleared within 'timeout', then error out. ++ */ ++ while (timeout) { ++ igb_shift_out_eec_bits(hw, NVM_RDSR_OPCODE_SPI, ++ hw->nvm.opcode_bits); ++ spi_stat_reg = (u8)igb_shift_in_eec_bits(hw, 8); ++ if (!(spi_stat_reg & NVM_STATUS_RDY_SPI)) ++ break; ++ ++ udelay(5); ++ igb_standby_nvm(hw); ++ timeout--; ++ } ++ ++ if (!timeout) { ++ hw_dbg("SPI NVM Status error\n"); ++ ret_val = -E1000_ERR_NVM; ++ goto out; ++ } ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_read_nvm_spi - Read EEPROM's using SPI ++ * @hw: pointer to the HW structure ++ * @offset: offset of word in the EEPROM to read ++ * @words: number of words to read ++ * @data: word read from the EEPROM ++ * ++ * Reads a 16 bit word from the EEPROM. ++ **/ ++s32 igb_read_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) ++{ ++ struct e1000_nvm_info *nvm = &hw->nvm; ++ u32 i = 0; ++ s32 ret_val; ++ u16 word_in; ++ u8 read_opcode = NVM_READ_OPCODE_SPI; ++ ++ /* A check for invalid values: offset too large, too many words, ++ * and not enough words. ++ */ ++ if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) || ++ (words == 0)) { ++ hw_dbg("nvm parameter(s) out of bounds\n"); ++ ret_val = -E1000_ERR_NVM; ++ goto out; ++ } ++ ++ ret_val = nvm->ops.acquire(hw); ++ if (ret_val) ++ goto out; ++ ++ ret_val = igb_ready_nvm_eeprom(hw); ++ if (ret_val) ++ goto release; ++ ++ igb_standby_nvm(hw); ++ ++ if ((nvm->address_bits == 8) && (offset >= 128)) ++ read_opcode |= NVM_A8_OPCODE_SPI; ++ ++ /* Send the READ command (opcode + addr) */ ++ igb_shift_out_eec_bits(hw, read_opcode, nvm->opcode_bits); ++ igb_shift_out_eec_bits(hw, (u16)(offset*2), nvm->address_bits); ++ ++ /* Read the data. SPI NVMs increment the address with each byte ++ * read and will roll over if reading beyond the end. This allows ++ * us to read the whole NVM from any offset ++ */ ++ for (i = 0; i < words; i++) { ++ word_in = igb_shift_in_eec_bits(hw, 16); ++ data[i] = (word_in >> 8) | (word_in << 8); ++ } ++ ++release: ++ nvm->ops.release(hw); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_read_nvm_eerd - Reads EEPROM using EERD register ++ * @hw: pointer to the HW structure ++ * @offset: offset of word in the EEPROM to read ++ * @words: number of words to read ++ * @data: word read from the EEPROM ++ * ++ * Reads a 16 bit word from the EEPROM using the EERD register. ++ **/ ++s32 igb_read_nvm_eerd(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) ++{ ++ struct e1000_nvm_info *nvm = &hw->nvm; ++ u32 i, eerd = 0; ++ s32 ret_val = 0; ++ ++ /* A check for invalid values: offset too large, too many words, ++ * and not enough words. ++ */ ++ if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) || ++ (words == 0)) { ++ hw_dbg("nvm parameter(s) out of bounds\n"); ++ ret_val = -E1000_ERR_NVM; ++ goto out; ++ } ++ ++ for (i = 0; i < words; i++) { ++ eerd = ((offset+i) << E1000_NVM_RW_ADDR_SHIFT) + ++ E1000_NVM_RW_REG_START; ++ ++ wr32(E1000_EERD, eerd); ++ ret_val = igb_poll_eerd_eewr_done(hw, E1000_NVM_POLL_READ); ++ if (ret_val) ++ break; ++ ++ data[i] = (rd32(E1000_EERD) >> ++ E1000_NVM_RW_REG_DATA); ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_write_nvm_spi - Write to EEPROM using SPI ++ * @hw: pointer to the HW structure ++ * @offset: offset within the EEPROM to be written to ++ * @words: number of words to write ++ * @data: 16 bit word(s) to be written to the EEPROM ++ * ++ * Writes data to EEPROM at offset using SPI interface. ++ * ++ * If e1000_update_nvm_checksum is not called after this function , the ++ * EEPROM will most likley contain an invalid checksum. ++ **/ ++s32 igb_write_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) ++{ ++ struct e1000_nvm_info *nvm = &hw->nvm; ++ s32 ret_val = -E1000_ERR_NVM; ++ u16 widx = 0; ++ ++ /* A check for invalid values: offset too large, too many words, ++ * and not enough words. ++ */ ++ if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) || ++ (words == 0)) { ++ hw_dbg("nvm parameter(s) out of bounds\n"); ++ return ret_val; ++ } ++ ++ while (widx < words) { ++ u8 write_opcode = NVM_WRITE_OPCODE_SPI; ++ ++ ret_val = nvm->ops.acquire(hw); ++ if (ret_val) ++ return ret_val; ++ ++ ret_val = igb_ready_nvm_eeprom(hw); ++ if (ret_val) { ++ nvm->ops.release(hw); ++ return ret_val; ++ } ++ ++ igb_standby_nvm(hw); ++ ++ /* Send the WRITE ENABLE command (8 bit opcode) */ ++ igb_shift_out_eec_bits(hw, NVM_WREN_OPCODE_SPI, ++ nvm->opcode_bits); ++ ++ igb_standby_nvm(hw); ++ ++ /* Some SPI eeproms use the 8th address bit embedded in the ++ * opcode ++ */ ++ if ((nvm->address_bits == 8) && (offset >= 128)) ++ write_opcode |= NVM_A8_OPCODE_SPI; ++ ++ /* Send the Write command (8-bit opcode + addr) */ ++ igb_shift_out_eec_bits(hw, write_opcode, nvm->opcode_bits); ++ igb_shift_out_eec_bits(hw, (u16)((offset + widx) * 2), ++ nvm->address_bits); ++ ++ /* Loop to allow for up to whole page write of eeprom */ ++ while (widx < words) { ++ u16 word_out = data[widx]; ++ ++ word_out = (word_out >> 8) | (word_out << 8); ++ igb_shift_out_eec_bits(hw, word_out, 16); ++ widx++; ++ ++ if ((((offset + widx) * 2) % nvm->page_size) == 0) { ++ igb_standby_nvm(hw); ++ break; ++ } ++ } ++ usleep_range(1000, 2000); ++ nvm->ops.release(hw); ++ } ++ ++ return ret_val; ++} ++ ++/** ++ * igb_read_part_string - Read device part number ++ * @hw: pointer to the HW structure ++ * @part_num: pointer to device part number ++ * @part_num_size: size of part number buffer ++ * ++ * Reads the product board assembly (PBA) number from the EEPROM and stores ++ * the value in part_num. ++ **/ ++s32 igb_read_part_string(struct e1000_hw *hw, u8 *part_num, u32 part_num_size) ++{ ++ s32 ret_val; ++ u16 nvm_data; ++ u16 pointer; ++ u16 offset; ++ u16 length; ++ ++ if (part_num == NULL) { ++ hw_dbg("PBA string buffer was null\n"); ++ ret_val = E1000_ERR_INVALID_ARGUMENT; ++ goto out; ++ } ++ ++ ret_val = hw->nvm.ops.read(hw, NVM_PBA_OFFSET_0, 1, &nvm_data); ++ if (ret_val) { ++ hw_dbg("NVM Read Error\n"); ++ goto out; ++ } ++ ++ ret_val = hw->nvm.ops.read(hw, NVM_PBA_OFFSET_1, 1, &pointer); ++ if (ret_val) { ++ hw_dbg("NVM Read Error\n"); ++ goto out; ++ } ++ ++ /* if nvm_data is not ptr guard the PBA must be in legacy format which ++ * means pointer is actually our second data word for the PBA number ++ * and we can decode it into an ascii string ++ */ ++ if (nvm_data != NVM_PBA_PTR_GUARD) { ++ hw_dbg("NVM PBA number is not stored as string\n"); ++ ++ /* we will need 11 characters to store the PBA */ ++ if (part_num_size < 11) { ++ hw_dbg("PBA string buffer too small\n"); ++ return E1000_ERR_NO_SPACE; ++ } ++ ++ /* extract hex string from data and pointer */ ++ part_num[0] = (nvm_data >> 12) & 0xF; ++ part_num[1] = (nvm_data >> 8) & 0xF; ++ part_num[2] = (nvm_data >> 4) & 0xF; ++ part_num[3] = nvm_data & 0xF; ++ part_num[4] = (pointer >> 12) & 0xF; ++ part_num[5] = (pointer >> 8) & 0xF; ++ part_num[6] = '-'; ++ part_num[7] = 0; ++ part_num[8] = (pointer >> 4) & 0xF; ++ part_num[9] = pointer & 0xF; ++ ++ /* put a null character on the end of our string */ ++ part_num[10] = '\0'; ++ ++ /* switch all the data but the '-' to hex char */ ++ for (offset = 0; offset < 10; offset++) { ++ if (part_num[offset] < 0xA) ++ part_num[offset] += '0'; ++ else if (part_num[offset] < 0x10) ++ part_num[offset] += 'A' - 0xA; ++ } ++ ++ goto out; ++ } ++ ++ ret_val = hw->nvm.ops.read(hw, pointer, 1, &length); ++ if (ret_val) { ++ hw_dbg("NVM Read Error\n"); ++ goto out; ++ } ++ ++ if (length == 0xFFFF || length == 0) { ++ hw_dbg("NVM PBA number section invalid length\n"); ++ ret_val = E1000_ERR_NVM_PBA_SECTION; ++ goto out; ++ } ++ /* check if part_num buffer is big enough */ ++ if (part_num_size < (((u32)length * 2) - 1)) { ++ hw_dbg("PBA string buffer too small\n"); ++ ret_val = E1000_ERR_NO_SPACE; ++ goto out; ++ } ++ ++ /* trim pba length from start of string */ ++ pointer++; ++ length--; ++ ++ for (offset = 0; offset < length; offset++) { ++ ret_val = hw->nvm.ops.read(hw, pointer + offset, 1, &nvm_data); ++ if (ret_val) { ++ hw_dbg("NVM Read Error\n"); ++ goto out; ++ } ++ part_num[offset * 2] = (u8)(nvm_data >> 8); ++ part_num[(offset * 2) + 1] = (u8)(nvm_data & 0xFF); ++ } ++ part_num[offset * 2] = '\0'; ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_read_mac_addr - Read device MAC address ++ * @hw: pointer to the HW structure ++ * ++ * Reads the device MAC address from the EEPROM and stores the value. ++ * Since devices with two ports use the same EEPROM, we increment the ++ * last bit in the MAC address for the second port. ++ **/ ++s32 igb_read_mac_addr(struct e1000_hw *hw) ++{ ++ u32 rar_high; ++ u32 rar_low; ++ u16 i; ++ ++ rar_high = rd32(E1000_RAH(0)); ++ rar_low = rd32(E1000_RAL(0)); ++ ++ for (i = 0; i < E1000_RAL_MAC_ADDR_LEN; i++) ++ hw->mac.perm_addr[i] = (u8)(rar_low >> (i*8)); ++ ++ for (i = 0; i < E1000_RAH_MAC_ADDR_LEN; i++) ++ hw->mac.perm_addr[i+4] = (u8)(rar_high >> (i*8)); ++ ++ for (i = 0; i < ETH_ALEN; i++) ++ hw->mac.addr[i] = hw->mac.perm_addr[i]; ++ ++ return 0; ++} ++ ++/** ++ * igb_validate_nvm_checksum - Validate EEPROM checksum ++ * @hw: pointer to the HW structure ++ * ++ * Calculates the EEPROM checksum by reading/adding each word of the EEPROM ++ * and then verifies that the sum of the EEPROM is equal to 0xBABA. ++ **/ ++s32 igb_validate_nvm_checksum(struct e1000_hw *hw) ++{ ++ s32 ret_val = 0; ++ u16 checksum = 0; ++ u16 i, nvm_data; ++ ++ for (i = 0; i < (NVM_CHECKSUM_REG + 1); i++) { ++ ret_val = hw->nvm.ops.read(hw, i, 1, &nvm_data); ++ if (ret_val) { ++ hw_dbg("NVM Read Error\n"); ++ goto out; ++ } ++ checksum += nvm_data; ++ } ++ ++ if (checksum != (u16) NVM_SUM) { ++ hw_dbg("NVM Checksum Invalid\n"); ++ ret_val = -E1000_ERR_NVM; ++ goto out; ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_update_nvm_checksum - Update EEPROM checksum ++ * @hw: pointer to the HW structure ++ * ++ * Updates the EEPROM checksum by reading/adding each word of the EEPROM ++ * up to the checksum. Then calculates the EEPROM checksum and writes the ++ * value to the EEPROM. ++ **/ ++s32 igb_update_nvm_checksum(struct e1000_hw *hw) ++{ ++ s32 ret_val; ++ u16 checksum = 0; ++ u16 i, nvm_data; ++ ++ for (i = 0; i < NVM_CHECKSUM_REG; i++) { ++ ret_val = hw->nvm.ops.read(hw, i, 1, &nvm_data); ++ if (ret_val) { ++ hw_dbg("NVM Read Error while updating checksum.\n"); ++ goto out; ++ } ++ checksum += nvm_data; ++ } ++ checksum = (u16) NVM_SUM - checksum; ++ ret_val = hw->nvm.ops.write(hw, NVM_CHECKSUM_REG, 1, &checksum); ++ if (ret_val) ++ hw_dbg("NVM Write Error while updating checksum.\n"); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_get_fw_version - Get firmware version information ++ * @hw: pointer to the HW structure ++ * @fw_vers: pointer to output structure ++ * ++ * unsupported MAC types will return all 0 version structure ++ **/ ++void igb_get_fw_version(struct e1000_hw *hw, struct e1000_fw_version *fw_vers) ++{ ++ u16 eeprom_verh, eeprom_verl, etrack_test, fw_version; ++ u8 q, hval, rem, result; ++ u16 comb_verh, comb_verl, comb_offset; ++ ++ memset(fw_vers, 0, sizeof(struct e1000_fw_version)); ++ ++ /* basic eeprom version numbers and bits used vary by part and by tool ++ * used to create the nvm images. Check which data format we have. ++ */ ++ hw->nvm.ops.read(hw, NVM_ETRACK_HIWORD, 1, &etrack_test); ++ switch (hw->mac.type) { ++ case e1000_i211: ++ igb_read_invm_version(hw, fw_vers); ++ return; ++ case e1000_82575: ++ case e1000_82576: ++ case e1000_82580: ++ /* Use this format, unless EETRACK ID exists, ++ * then use alternate format ++ */ ++ if ((etrack_test & NVM_MAJOR_MASK) != NVM_ETRACK_VALID) { ++ hw->nvm.ops.read(hw, NVM_VERSION, 1, &fw_version); ++ fw_vers->eep_major = (fw_version & NVM_MAJOR_MASK) ++ >> NVM_MAJOR_SHIFT; ++ fw_vers->eep_minor = (fw_version & NVM_MINOR_MASK) ++ >> NVM_MINOR_SHIFT; ++ fw_vers->eep_build = (fw_version & NVM_IMAGE_ID_MASK); ++ goto etrack_id; ++ } ++ break; ++ case e1000_i210: ++ if (!(igb_get_flash_presence_i210(hw))) { ++ igb_read_invm_version(hw, fw_vers); ++ return; ++ } ++ /* fall through */ ++ case e1000_i350: ++ /* find combo image version */ ++ hw->nvm.ops.read(hw, NVM_COMB_VER_PTR, 1, &comb_offset); ++ if ((comb_offset != 0x0) && ++ (comb_offset != NVM_VER_INVALID)) { ++ ++ hw->nvm.ops.read(hw, (NVM_COMB_VER_OFF + comb_offset ++ + 1), 1, &comb_verh); ++ hw->nvm.ops.read(hw, (NVM_COMB_VER_OFF + comb_offset), ++ 1, &comb_verl); ++ ++ /* get Option Rom version if it exists and is valid */ ++ if ((comb_verh && comb_verl) && ++ ((comb_verh != NVM_VER_INVALID) && ++ (comb_verl != NVM_VER_INVALID))) { ++ ++ fw_vers->or_valid = true; ++ fw_vers->or_major = ++ comb_verl >> NVM_COMB_VER_SHFT; ++ fw_vers->or_build = ++ (comb_verl << NVM_COMB_VER_SHFT) ++ | (comb_verh >> NVM_COMB_VER_SHFT); ++ fw_vers->or_patch = ++ comb_verh & NVM_COMB_VER_MASK; ++ } ++ } ++ break; ++ default: ++ return; ++ } ++ hw->nvm.ops.read(hw, NVM_VERSION, 1, &fw_version); ++ fw_vers->eep_major = (fw_version & NVM_MAJOR_MASK) ++ >> NVM_MAJOR_SHIFT; ++ ++ /* check for old style version format in newer images*/ ++ if ((fw_version & NVM_NEW_DEC_MASK) == 0x0) { ++ eeprom_verl = (fw_version & NVM_COMB_VER_MASK); ++ } else { ++ eeprom_verl = (fw_version & NVM_MINOR_MASK) ++ >> NVM_MINOR_SHIFT; ++ } ++ /* Convert minor value to hex before assigning to output struct ++ * Val to be converted will not be higher than 99, per tool output ++ */ ++ q = eeprom_verl / NVM_HEX_CONV; ++ hval = q * NVM_HEX_TENS; ++ rem = eeprom_verl % NVM_HEX_CONV; ++ result = hval + rem; ++ fw_vers->eep_minor = result; ++ ++etrack_id: ++ if ((etrack_test & NVM_MAJOR_MASK) == NVM_ETRACK_VALID) { ++ hw->nvm.ops.read(hw, NVM_ETRACK_WORD, 1, &eeprom_verl); ++ hw->nvm.ops.read(hw, (NVM_ETRACK_WORD + 1), 1, &eeprom_verh); ++ fw_vers->etrack_id = (eeprom_verh << NVM_ETRACK_SHIFT) ++ | eeprom_verl; ++ } ++} +--- linux/drivers/xenomai/net/drivers/igb/e1000_mbx.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/igb/e1000_mbx.h 2021-04-07 16:01:27.477633809 +0800 +@@ -0,0 +1,73 @@ ++/* Intel(R) Gigabit Ethernet Linux driver ++ * Copyright(c) 2007-2014 Intel Corporation. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms and conditions of the GNU General Public License, ++ * version 2, as published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ * ++ * You should have received a copy of the GNU General Public License along with ++ * this program; if not, see . ++ * ++ * The full GNU General Public License is included in this distribution in ++ * the file called "COPYING". ++ * ++ * Contact Information: ++ * e1000-devel Mailing List ++ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 ++ */ ++ ++#ifndef _E1000_MBX_H_ ++#define _E1000_MBX_H_ ++ ++#include "e1000_hw.h" ++ ++#define E1000_P2VMAILBOX_STS 0x00000001 /* Initiate message send to VF */ ++#define E1000_P2VMAILBOX_ACK 0x00000002 /* Ack message recv'd from VF */ ++#define E1000_P2VMAILBOX_VFU 0x00000004 /* VF owns the mailbox buffer */ ++#define E1000_P2VMAILBOX_PFU 0x00000008 /* PF owns the mailbox buffer */ ++#define E1000_P2VMAILBOX_RVFU 0x00000010 /* Reset VFU - used when VF stuck */ ++ ++#define E1000_MBVFICR_VFREQ_MASK 0x000000FF /* bits for VF messages */ ++#define E1000_MBVFICR_VFREQ_VF1 0x00000001 /* bit for VF 1 message */ ++#define E1000_MBVFICR_VFACK_MASK 0x00FF0000 /* bits for VF acks */ ++#define E1000_MBVFICR_VFACK_VF1 0x00010000 /* bit for VF 1 ack */ ++ ++#define E1000_VFMAILBOX_SIZE 16 /* 16 32 bit words - 64 bytes */ ++ ++/* If it's a E1000_VF_* msg then it originates in the VF and is sent to the ++ * PF. The reverse is true if it is E1000_PF_*. ++ * Message ACK's are the value or'd with 0xF0000000 ++ */ ++/* Messages below or'd with this are the ACK */ ++#define E1000_VT_MSGTYPE_ACK 0x80000000 ++/* Messages below or'd with this are the NACK */ ++#define E1000_VT_MSGTYPE_NACK 0x40000000 ++/* Indicates that VF is still clear to send requests */ ++#define E1000_VT_MSGTYPE_CTS 0x20000000 ++#define E1000_VT_MSGINFO_SHIFT 16 ++/* bits 23:16 are used for exra info for certain messages */ ++#define E1000_VT_MSGINFO_MASK (0xFF << E1000_VT_MSGINFO_SHIFT) ++ ++#define E1000_VF_RESET 0x01 /* VF requests reset */ ++#define E1000_VF_SET_MAC_ADDR 0x02 /* VF requests to set MAC addr */ ++#define E1000_VF_SET_MULTICAST 0x03 /* VF requests to set MC addr */ ++#define E1000_VF_SET_VLAN 0x04 /* VF requests to set VLAN */ ++#define E1000_VF_SET_LPE 0x05 /* VF requests to set VMOLR.LPE */ ++#define E1000_VF_SET_PROMISC 0x06 /*VF requests to clear VMOLR.ROPE/MPME*/ ++#define E1000_VF_SET_PROMISC_MULTICAST (0x02 << E1000_VT_MSGINFO_SHIFT) ++ ++#define E1000_PF_CONTROL_MSG 0x0100 /* PF control message */ ++ ++s32 igb_read_mbx(struct e1000_hw *, u32 *, u16, u16); ++s32 igb_write_mbx(struct e1000_hw *, u32 *, u16, u16); ++s32 igb_check_for_msg(struct e1000_hw *, u16); ++s32 igb_check_for_ack(struct e1000_hw *, u16); ++s32 igb_check_for_rst(struct e1000_hw *, u16); ++s32 igb_init_mbx_params_pf(struct e1000_hw *); ++ ++#endif /* _E1000_MBX_H_ */ +--- linux/drivers/xenomai/net/drivers/igb/e1000_phy.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/igb/e1000_phy.c 2021-04-07 16:01:27.472633816 +0800 +@@ -0,0 +1,2512 @@ ++/* Intel(R) Gigabit Ethernet Linux driver ++ * Copyright(c) 2007-2014 Intel Corporation. ++ * RTnet port 2009 Vladimir Zapolskiy ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms and conditions of the GNU General Public License, ++ * version 2, as published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ * ++ * You should have received a copy of the GNU General Public License along with ++ * this program; if not, see . ++ * ++ * The full GNU General Public License is included in this distribution in ++ * the file called "COPYING". ++ * ++ * Contact Information: ++ * e1000-devel Mailing List ++ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 ++ */ ++ ++#include ++#include ++ ++#include "e1000_mac.h" ++#include "e1000_phy.h" ++ ++static s32 igb_phy_setup_autoneg(struct e1000_hw *hw); ++static void igb_phy_force_speed_duplex_setup(struct e1000_hw *hw, ++ u16 *phy_ctrl); ++static s32 igb_wait_autoneg(struct e1000_hw *hw); ++static s32 igb_set_master_slave_mode(struct e1000_hw *hw); ++ ++/* Cable length tables */ ++static const u16 e1000_m88_cable_length_table[] = { ++ 0, 50, 80, 110, 140, 140, E1000_CABLE_LENGTH_UNDEFINED }; ++#define M88E1000_CABLE_LENGTH_TABLE_SIZE \ ++ (sizeof(e1000_m88_cable_length_table) / \ ++ sizeof(e1000_m88_cable_length_table[0])) ++ ++static const u16 e1000_igp_2_cable_length_table[] = { ++ 0, 0, 0, 0, 0, 0, 0, 0, 3, 5, 8, 11, 13, 16, 18, 21, ++ 0, 0, 0, 3, 6, 10, 13, 16, 19, 23, 26, 29, 32, 35, 38, 41, ++ 6, 10, 14, 18, 22, 26, 30, 33, 37, 41, 44, 48, 51, 54, 58, 61, ++ 21, 26, 31, 35, 40, 44, 49, 53, 57, 61, 65, 68, 72, 75, 79, 82, ++ 40, 45, 51, 56, 61, 66, 70, 75, 79, 83, 87, 91, 94, 98, 101, 104, ++ 60, 66, 72, 77, 82, 87, 92, 96, 100, 104, 108, 111, 114, 117, 119, 121, ++ 83, 89, 95, 100, 105, 109, 113, 116, 119, 122, 124, ++ 104, 109, 114, 118, 121, 124}; ++#define IGP02E1000_CABLE_LENGTH_TABLE_SIZE \ ++ (sizeof(e1000_igp_2_cable_length_table) / \ ++ sizeof(e1000_igp_2_cable_length_table[0])) ++ ++/** ++ * igb_check_reset_block - Check if PHY reset is blocked ++ * @hw: pointer to the HW structure ++ * ++ * Read the PHY management control register and check whether a PHY reset ++ * is blocked. If a reset is not blocked return 0, otherwise ++ * return E1000_BLK_PHY_RESET (12). ++ **/ ++s32 igb_check_reset_block(struct e1000_hw *hw) ++{ ++ u32 manc; ++ ++ manc = rd32(E1000_MANC); ++ ++ return (manc & E1000_MANC_BLK_PHY_RST_ON_IDE) ? E1000_BLK_PHY_RESET : 0; ++} ++ ++/** ++ * igb_get_phy_id - Retrieve the PHY ID and revision ++ * @hw: pointer to the HW structure ++ * ++ * Reads the PHY registers and stores the PHY ID and possibly the PHY ++ * revision in the hardware structure. ++ **/ ++s32 igb_get_phy_id(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val = 0; ++ u16 phy_id; ++ ++ ret_val = phy->ops.read_reg(hw, PHY_ID1, &phy_id); ++ if (ret_val) ++ goto out; ++ ++ phy->id = (u32)(phy_id << 16); ++ udelay(20); ++ ret_val = phy->ops.read_reg(hw, PHY_ID2, &phy_id); ++ if (ret_val) ++ goto out; ++ ++ phy->id |= (u32)(phy_id & PHY_REVISION_MASK); ++ phy->revision = (u32)(phy_id & ~PHY_REVISION_MASK); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_phy_reset_dsp - Reset PHY DSP ++ * @hw: pointer to the HW structure ++ * ++ * Reset the digital signal processor. ++ **/ ++static s32 igb_phy_reset_dsp(struct e1000_hw *hw) ++{ ++ s32 ret_val = 0; ++ ++ if (!(hw->phy.ops.write_reg)) ++ goto out; ++ ++ ret_val = hw->phy.ops.write_reg(hw, M88E1000_PHY_GEN_CONTROL, 0xC1); ++ if (ret_val) ++ goto out; ++ ++ ret_val = hw->phy.ops.write_reg(hw, M88E1000_PHY_GEN_CONTROL, 0); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_read_phy_reg_mdic - Read MDI control register ++ * @hw: pointer to the HW structure ++ * @offset: register offset to be read ++ * @data: pointer to the read data ++ * ++ * Reads the MDI control regsiter in the PHY at offset and stores the ++ * information read to data. ++ **/ ++s32 igb_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ u32 i, mdic = 0; ++ s32 ret_val = 0; ++ ++ if (offset > MAX_PHY_REG_ADDRESS) { ++ hw_dbg("PHY Address %d is out of range\n", offset); ++ ret_val = -E1000_ERR_PARAM; ++ goto out; ++ } ++ ++ /* Set up Op-code, Phy Address, and register offset in the MDI ++ * Control register. The MAC will take care of interfacing with the ++ * PHY to retrieve the desired data. ++ */ ++ mdic = ((offset << E1000_MDIC_REG_SHIFT) | ++ (phy->addr << E1000_MDIC_PHY_SHIFT) | ++ (E1000_MDIC_OP_READ)); ++ ++ wr32(E1000_MDIC, mdic); ++ ++ /* Poll the ready bit to see if the MDI read completed ++ * Increasing the time out as testing showed failures with ++ * the lower time out ++ */ ++ for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) { ++ udelay(50); ++ mdic = rd32(E1000_MDIC); ++ if (mdic & E1000_MDIC_READY) ++ break; ++ } ++ if (!(mdic & E1000_MDIC_READY)) { ++ hw_dbg("MDI Read did not complete\n"); ++ ret_val = -E1000_ERR_PHY; ++ goto out; ++ } ++ if (mdic & E1000_MDIC_ERROR) { ++ hw_dbg("MDI Error\n"); ++ ret_val = -E1000_ERR_PHY; ++ goto out; ++ } ++ *data = (u16) mdic; ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_write_phy_reg_mdic - Write MDI control register ++ * @hw: pointer to the HW structure ++ * @offset: register offset to write to ++ * @data: data to write to register at offset ++ * ++ * Writes data to MDI control register in the PHY at offset. ++ **/ ++s32 igb_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ u32 i, mdic = 0; ++ s32 ret_val = 0; ++ ++ if (offset > MAX_PHY_REG_ADDRESS) { ++ hw_dbg("PHY Address %d is out of range\n", offset); ++ ret_val = -E1000_ERR_PARAM; ++ goto out; ++ } ++ ++ /* Set up Op-code, Phy Address, and register offset in the MDI ++ * Control register. The MAC will take care of interfacing with the ++ * PHY to retrieve the desired data. ++ */ ++ mdic = (((u32)data) | ++ (offset << E1000_MDIC_REG_SHIFT) | ++ (phy->addr << E1000_MDIC_PHY_SHIFT) | ++ (E1000_MDIC_OP_WRITE)); ++ ++ wr32(E1000_MDIC, mdic); ++ ++ /* Poll the ready bit to see if the MDI read completed ++ * Increasing the time out as testing showed failures with ++ * the lower time out ++ */ ++ for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) { ++ udelay(50); ++ mdic = rd32(E1000_MDIC); ++ if (mdic & E1000_MDIC_READY) ++ break; ++ } ++ if (!(mdic & E1000_MDIC_READY)) { ++ hw_dbg("MDI Write did not complete\n"); ++ ret_val = -E1000_ERR_PHY; ++ goto out; ++ } ++ if (mdic & E1000_MDIC_ERROR) { ++ hw_dbg("MDI Error\n"); ++ ret_val = -E1000_ERR_PHY; ++ goto out; ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_read_phy_reg_i2c - Read PHY register using i2c ++ * @hw: pointer to the HW structure ++ * @offset: register offset to be read ++ * @data: pointer to the read data ++ * ++ * Reads the PHY register at offset using the i2c interface and stores the ++ * retrieved information in data. ++ **/ ++s32 igb_read_phy_reg_i2c(struct e1000_hw *hw, u32 offset, u16 *data) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ u32 i, i2ccmd = 0; ++ ++ /* Set up Op-code, Phy Address, and register address in the I2CCMD ++ * register. The MAC will take care of interfacing with the ++ * PHY to retrieve the desired data. ++ */ ++ i2ccmd = ((offset << E1000_I2CCMD_REG_ADDR_SHIFT) | ++ (phy->addr << E1000_I2CCMD_PHY_ADDR_SHIFT) | ++ (E1000_I2CCMD_OPCODE_READ)); ++ ++ wr32(E1000_I2CCMD, i2ccmd); ++ ++ /* Poll the ready bit to see if the I2C read completed */ ++ for (i = 0; i < E1000_I2CCMD_PHY_TIMEOUT; i++) { ++ udelay(50); ++ i2ccmd = rd32(E1000_I2CCMD); ++ if (i2ccmd & E1000_I2CCMD_READY) ++ break; ++ } ++ if (!(i2ccmd & E1000_I2CCMD_READY)) { ++ hw_dbg("I2CCMD Read did not complete\n"); ++ return -E1000_ERR_PHY; ++ } ++ if (i2ccmd & E1000_I2CCMD_ERROR) { ++ hw_dbg("I2CCMD Error bit set\n"); ++ return -E1000_ERR_PHY; ++ } ++ ++ /* Need to byte-swap the 16-bit value. */ ++ *data = ((i2ccmd >> 8) & 0x00FF) | ((i2ccmd << 8) & 0xFF00); ++ ++ return 0; ++} ++ ++/** ++ * igb_write_phy_reg_i2c - Write PHY register using i2c ++ * @hw: pointer to the HW structure ++ * @offset: register offset to write to ++ * @data: data to write at register offset ++ * ++ * Writes the data to PHY register at the offset using the i2c interface. ++ **/ ++s32 igb_write_phy_reg_i2c(struct e1000_hw *hw, u32 offset, u16 data) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ u32 i, i2ccmd = 0; ++ u16 phy_data_swapped; ++ ++ /* Prevent overwritting SFP I2C EEPROM which is at A0 address.*/ ++ if ((hw->phy.addr == 0) || (hw->phy.addr > 7)) { ++ hw_dbg("PHY I2C Address %d is out of range.\n", ++ hw->phy.addr); ++ return -E1000_ERR_CONFIG; ++ } ++ ++ /* Swap the data bytes for the I2C interface */ ++ phy_data_swapped = ((data >> 8) & 0x00FF) | ((data << 8) & 0xFF00); ++ ++ /* Set up Op-code, Phy Address, and register address in the I2CCMD ++ * register. The MAC will take care of interfacing with the ++ * PHY to retrieve the desired data. ++ */ ++ i2ccmd = ((offset << E1000_I2CCMD_REG_ADDR_SHIFT) | ++ (phy->addr << E1000_I2CCMD_PHY_ADDR_SHIFT) | ++ E1000_I2CCMD_OPCODE_WRITE | ++ phy_data_swapped); ++ ++ wr32(E1000_I2CCMD, i2ccmd); ++ ++ /* Poll the ready bit to see if the I2C read completed */ ++ for (i = 0; i < E1000_I2CCMD_PHY_TIMEOUT; i++) { ++ udelay(50); ++ i2ccmd = rd32(E1000_I2CCMD); ++ if (i2ccmd & E1000_I2CCMD_READY) ++ break; ++ } ++ if (!(i2ccmd & E1000_I2CCMD_READY)) { ++ hw_dbg("I2CCMD Write did not complete\n"); ++ return -E1000_ERR_PHY; ++ } ++ if (i2ccmd & E1000_I2CCMD_ERROR) { ++ hw_dbg("I2CCMD Error bit set\n"); ++ return -E1000_ERR_PHY; ++ } ++ ++ return 0; ++} ++ ++/** ++ * igb_read_sfp_data_byte - Reads SFP module data. ++ * @hw: pointer to the HW structure ++ * @offset: byte location offset to be read ++ * @data: read data buffer pointer ++ * ++ * Reads one byte from SFP module data stored ++ * in SFP resided EEPROM memory or SFP diagnostic area. ++ * Function should be called with ++ * E1000_I2CCMD_SFP_DATA_ADDR() for SFP module database access ++ * E1000_I2CCMD_SFP_DIAG_ADDR() for SFP diagnostics parameters ++ * access ++ **/ ++s32 igb_read_sfp_data_byte(struct e1000_hw *hw, u16 offset, u8 *data) ++{ ++ u32 i = 0; ++ u32 i2ccmd = 0; ++ u32 data_local = 0; ++ ++ if (offset > E1000_I2CCMD_SFP_DIAG_ADDR(255)) { ++ hw_dbg("I2CCMD command address exceeds upper limit\n"); ++ return -E1000_ERR_PHY; ++ } ++ ++ /* Set up Op-code, EEPROM Address,in the I2CCMD ++ * register. The MAC will take care of interfacing with the ++ * EEPROM to retrieve the desired data. ++ */ ++ i2ccmd = ((offset << E1000_I2CCMD_REG_ADDR_SHIFT) | ++ E1000_I2CCMD_OPCODE_READ); ++ ++ wr32(E1000_I2CCMD, i2ccmd); ++ ++ /* Poll the ready bit to see if the I2C read completed */ ++ for (i = 0; i < E1000_I2CCMD_PHY_TIMEOUT; i++) { ++ udelay(50); ++ data_local = rd32(E1000_I2CCMD); ++ if (data_local & E1000_I2CCMD_READY) ++ break; ++ } ++ if (!(data_local & E1000_I2CCMD_READY)) { ++ hw_dbg("I2CCMD Read did not complete\n"); ++ return -E1000_ERR_PHY; ++ } ++ if (data_local & E1000_I2CCMD_ERROR) { ++ hw_dbg("I2CCMD Error bit set\n"); ++ return -E1000_ERR_PHY; ++ } ++ *data = (u8) data_local & 0xFF; ++ ++ return 0; ++} ++ ++/** ++ * igb_read_phy_reg_igp - Read igp PHY register ++ * @hw: pointer to the HW structure ++ * @offset: register offset to be read ++ * @data: pointer to the read data ++ * ++ * Acquires semaphore, if necessary, then reads the PHY register at offset ++ * and storing the retrieved information in data. Release any acquired ++ * semaphores before exiting. ++ **/ ++s32 igb_read_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 *data) ++{ ++ s32 ret_val = 0; ++ ++ if (!(hw->phy.ops.acquire)) ++ goto out; ++ ++ ret_val = hw->phy.ops.acquire(hw); ++ if (ret_val) ++ goto out; ++ ++ if (offset > MAX_PHY_MULTI_PAGE_REG) { ++ ret_val = igb_write_phy_reg_mdic(hw, ++ IGP01E1000_PHY_PAGE_SELECT, ++ (u16)offset); ++ if (ret_val) { ++ hw->phy.ops.release(hw); ++ goto out; ++ } ++ } ++ ++ ret_val = igb_read_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & offset, ++ data); ++ ++ hw->phy.ops.release(hw); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_write_phy_reg_igp - Write igp PHY register ++ * @hw: pointer to the HW structure ++ * @offset: register offset to write to ++ * @data: data to write at register offset ++ * ++ * Acquires semaphore, if necessary, then writes the data to PHY register ++ * at the offset. Release any acquired semaphores before exiting. ++ **/ ++s32 igb_write_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 data) ++{ ++ s32 ret_val = 0; ++ ++ if (!(hw->phy.ops.acquire)) ++ goto out; ++ ++ ret_val = hw->phy.ops.acquire(hw); ++ if (ret_val) ++ goto out; ++ ++ if (offset > MAX_PHY_MULTI_PAGE_REG) { ++ ret_val = igb_write_phy_reg_mdic(hw, ++ IGP01E1000_PHY_PAGE_SELECT, ++ (u16)offset); ++ if (ret_val) { ++ hw->phy.ops.release(hw); ++ goto out; ++ } ++ } ++ ++ ret_val = igb_write_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & offset, ++ data); ++ ++ hw->phy.ops.release(hw); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_copper_link_setup_82580 - Setup 82580 PHY for copper link ++ * @hw: pointer to the HW structure ++ * ++ * Sets up Carrier-sense on Transmit and downshift values. ++ **/ ++s32 igb_copper_link_setup_82580(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u16 phy_data; ++ ++ if (phy->reset_disable) { ++ ret_val = 0; ++ goto out; ++ } ++ ++ if (phy->type == e1000_phy_82580) { ++ ret_val = hw->phy.ops.reset(hw); ++ if (ret_val) { ++ hw_dbg("Error resetting the PHY.\n"); ++ goto out; ++ } ++ } ++ ++ /* Enable CRS on TX. This must be set for half-duplex operation. */ ++ ret_val = phy->ops.read_reg(hw, I82580_CFG_REG, &phy_data); ++ if (ret_val) ++ goto out; ++ ++ phy_data |= I82580_CFG_ASSERT_CRS_ON_TX; ++ ++ /* Enable downshift */ ++ phy_data |= I82580_CFG_ENABLE_DOWNSHIFT; ++ ++ ret_val = phy->ops.write_reg(hw, I82580_CFG_REG, phy_data); ++ if (ret_val) ++ goto out; ++ ++ /* Set MDI/MDIX mode */ ++ ret_val = phy->ops.read_reg(hw, I82580_PHY_CTRL_2, &phy_data); ++ if (ret_val) ++ goto out; ++ phy_data &= ~I82580_PHY_CTRL2_MDIX_CFG_MASK; ++ /* Options: ++ * 0 - Auto (default) ++ * 1 - MDI mode ++ * 2 - MDI-X mode ++ */ ++ switch (hw->phy.mdix) { ++ case 1: ++ break; ++ case 2: ++ phy_data |= I82580_PHY_CTRL2_MANUAL_MDIX; ++ break; ++ case 0: ++ default: ++ phy_data |= I82580_PHY_CTRL2_AUTO_MDI_MDIX; ++ break; ++ } ++ ret_val = hw->phy.ops.write_reg(hw, I82580_PHY_CTRL_2, phy_data); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_copper_link_setup_m88 - Setup m88 PHY's for copper link ++ * @hw: pointer to the HW structure ++ * ++ * Sets up MDI/MDI-X and polarity for m88 PHY's. If necessary, transmit clock ++ * and downshift values are set also. ++ **/ ++s32 igb_copper_link_setup_m88(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u16 phy_data; ++ ++ if (phy->reset_disable) { ++ ret_val = 0; ++ goto out; ++ } ++ ++ /* Enable CRS on TX. This must be set for half-duplex operation. */ ++ ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data); ++ if (ret_val) ++ goto out; ++ ++ phy_data |= M88E1000_PSCR_ASSERT_CRS_ON_TX; ++ ++ /* Options: ++ * MDI/MDI-X = 0 (default) ++ * 0 - Auto for all speeds ++ * 1 - MDI mode ++ * 2 - MDI-X mode ++ * 3 - Auto for 1000Base-T only (MDI-X for 10/100Base-T modes) ++ */ ++ phy_data &= ~M88E1000_PSCR_AUTO_X_MODE; ++ ++ switch (phy->mdix) { ++ case 1: ++ phy_data |= M88E1000_PSCR_MDI_MANUAL_MODE; ++ break; ++ case 2: ++ phy_data |= M88E1000_PSCR_MDIX_MANUAL_MODE; ++ break; ++ case 3: ++ phy_data |= M88E1000_PSCR_AUTO_X_1000T; ++ break; ++ case 0: ++ default: ++ phy_data |= M88E1000_PSCR_AUTO_X_MODE; ++ break; ++ } ++ ++ /* Options: ++ * disable_polarity_correction = 0 (default) ++ * Automatic Correction for Reversed Cable Polarity ++ * 0 - Disabled ++ * 1 - Enabled ++ */ ++ phy_data &= ~M88E1000_PSCR_POLARITY_REVERSAL; ++ if (phy->disable_polarity_correction == 1) ++ phy_data |= M88E1000_PSCR_POLARITY_REVERSAL; ++ ++ ret_val = phy->ops.write_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data); ++ if (ret_val) ++ goto out; ++ ++ if (phy->revision < E1000_REVISION_4) { ++ /* Force TX_CLK in the Extended PHY Specific Control Register ++ * to 25MHz clock. ++ */ ++ ret_val = phy->ops.read_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL, ++ &phy_data); ++ if (ret_val) ++ goto out; ++ ++ phy_data |= M88E1000_EPSCR_TX_CLK_25; ++ ++ if ((phy->revision == E1000_REVISION_2) && ++ (phy->id == M88E1111_I_PHY_ID)) { ++ /* 82573L PHY - set the downshift counter to 5x. */ ++ phy_data &= ~M88EC018_EPSCR_DOWNSHIFT_COUNTER_MASK; ++ phy_data |= M88EC018_EPSCR_DOWNSHIFT_COUNTER_5X; ++ } else { ++ /* Configure Master and Slave downshift values */ ++ phy_data &= ~(M88E1000_EPSCR_MASTER_DOWNSHIFT_MASK | ++ M88E1000_EPSCR_SLAVE_DOWNSHIFT_MASK); ++ phy_data |= (M88E1000_EPSCR_MASTER_DOWNSHIFT_1X | ++ M88E1000_EPSCR_SLAVE_DOWNSHIFT_1X); ++ } ++ ret_val = phy->ops.write_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL, ++ phy_data); ++ if (ret_val) ++ goto out; ++ } ++ ++ /* Commit the changes. */ ++ ret_val = igb_phy_sw_reset(hw); ++ if (ret_val) { ++ hw_dbg("Error committing the PHY changes\n"); ++ goto out; ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_copper_link_setup_m88_gen2 - Setup m88 PHY's for copper link ++ * @hw: pointer to the HW structure ++ * ++ * Sets up MDI/MDI-X and polarity for i347-AT4, m88e1322 and m88e1112 PHY's. ++ * Also enables and sets the downshift parameters. ++ **/ ++s32 igb_copper_link_setup_m88_gen2(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u16 phy_data; ++ ++ if (phy->reset_disable) ++ return 0; ++ ++ /* Enable CRS on Tx. This must be set for half-duplex operation. */ ++ ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ /* Options: ++ * MDI/MDI-X = 0 (default) ++ * 0 - Auto for all speeds ++ * 1 - MDI mode ++ * 2 - MDI-X mode ++ * 3 - Auto for 1000Base-T only (MDI-X for 10/100Base-T modes) ++ */ ++ phy_data &= ~M88E1000_PSCR_AUTO_X_MODE; ++ ++ switch (phy->mdix) { ++ case 1: ++ phy_data |= M88E1000_PSCR_MDI_MANUAL_MODE; ++ break; ++ case 2: ++ phy_data |= M88E1000_PSCR_MDIX_MANUAL_MODE; ++ break; ++ case 3: ++ /* M88E1112 does not support this mode) */ ++ if (phy->id != M88E1112_E_PHY_ID) { ++ phy_data |= M88E1000_PSCR_AUTO_X_1000T; ++ break; ++ } ++ case 0: ++ default: ++ phy_data |= M88E1000_PSCR_AUTO_X_MODE; ++ break; ++ } ++ ++ /* Options: ++ * disable_polarity_correction = 0 (default) ++ * Automatic Correction for Reversed Cable Polarity ++ * 0 - Disabled ++ * 1 - Enabled ++ */ ++ phy_data &= ~M88E1000_PSCR_POLARITY_REVERSAL; ++ if (phy->disable_polarity_correction == 1) ++ phy_data |= M88E1000_PSCR_POLARITY_REVERSAL; ++ ++ /* Enable downshift and setting it to X6 */ ++ if (phy->id == M88E1543_E_PHY_ID) { ++ phy_data &= ~I347AT4_PSCR_DOWNSHIFT_ENABLE; ++ ret_val = ++ phy->ops.write_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ ret_val = igb_phy_sw_reset(hw); ++ if (ret_val) { ++ hw_dbg("Error committing the PHY changes\n"); ++ return ret_val; ++ } ++ } ++ ++ phy_data &= ~I347AT4_PSCR_DOWNSHIFT_MASK; ++ phy_data |= I347AT4_PSCR_DOWNSHIFT_6X; ++ phy_data |= I347AT4_PSCR_DOWNSHIFT_ENABLE; ++ ++ ret_val = phy->ops.write_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ /* Commit the changes. */ ++ ret_val = igb_phy_sw_reset(hw); ++ if (ret_val) { ++ hw_dbg("Error committing the PHY changes\n"); ++ return ret_val; ++ } ++ ret_val = igb_set_master_slave_mode(hw); ++ if (ret_val) ++ return ret_val; ++ ++ return 0; ++} ++ ++/** ++ * igb_copper_link_setup_igp - Setup igp PHY's for copper link ++ * @hw: pointer to the HW structure ++ * ++ * Sets up LPLU, MDI/MDI-X, polarity, Smartspeed and Master/Slave config for ++ * igp PHY's. ++ **/ ++s32 igb_copper_link_setup_igp(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u16 data; ++ ++ if (phy->reset_disable) { ++ ret_val = 0; ++ goto out; ++ } ++ ++ ret_val = phy->ops.reset(hw); ++ if (ret_val) { ++ hw_dbg("Error resetting the PHY.\n"); ++ goto out; ++ } ++ ++ /* Wait 100ms for MAC to configure PHY from NVM settings, to avoid ++ * timeout issues when LFS is enabled. ++ */ ++ msleep(100); ++ ++ /* The NVM settings will configure LPLU in D3 for ++ * non-IGP1 PHYs. ++ */ ++ if (phy->type == e1000_phy_igp) { ++ /* disable lplu d3 during driver init */ ++ if (phy->ops.set_d3_lplu_state) ++ ret_val = phy->ops.set_d3_lplu_state(hw, false); ++ if (ret_val) { ++ hw_dbg("Error Disabling LPLU D3\n"); ++ goto out; ++ } ++ } ++ ++ /* disable lplu d0 during driver init */ ++ ret_val = phy->ops.set_d0_lplu_state(hw, false); ++ if (ret_val) { ++ hw_dbg("Error Disabling LPLU D0\n"); ++ goto out; ++ } ++ /* Configure mdi-mdix settings */ ++ ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CTRL, &data); ++ if (ret_val) ++ goto out; ++ ++ data &= ~IGP01E1000_PSCR_AUTO_MDIX; ++ ++ switch (phy->mdix) { ++ case 1: ++ data &= ~IGP01E1000_PSCR_FORCE_MDI_MDIX; ++ break; ++ case 2: ++ data |= IGP01E1000_PSCR_FORCE_MDI_MDIX; ++ break; ++ case 0: ++ default: ++ data |= IGP01E1000_PSCR_AUTO_MDIX; ++ break; ++ } ++ ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_PORT_CTRL, data); ++ if (ret_val) ++ goto out; ++ ++ /* set auto-master slave resolution settings */ ++ if (hw->mac.autoneg) { ++ /* when autonegotiation advertisement is only 1000Mbps then we ++ * should disable SmartSpeed and enable Auto MasterSlave ++ * resolution as hardware default. ++ */ ++ if (phy->autoneg_advertised == ADVERTISE_1000_FULL) { ++ /* Disable SmartSpeed */ ++ ret_val = phy->ops.read_reg(hw, ++ IGP01E1000_PHY_PORT_CONFIG, ++ &data); ++ if (ret_val) ++ goto out; ++ ++ data &= ~IGP01E1000_PSCFR_SMART_SPEED; ++ ret_val = phy->ops.write_reg(hw, ++ IGP01E1000_PHY_PORT_CONFIG, ++ data); ++ if (ret_val) ++ goto out; ++ ++ /* Set auto Master/Slave resolution process */ ++ ret_val = phy->ops.read_reg(hw, PHY_1000T_CTRL, &data); ++ if (ret_val) ++ goto out; ++ ++ data &= ~CR_1000T_MS_ENABLE; ++ ret_val = phy->ops.write_reg(hw, PHY_1000T_CTRL, data); ++ if (ret_val) ++ goto out; ++ } ++ ++ ret_val = phy->ops.read_reg(hw, PHY_1000T_CTRL, &data); ++ if (ret_val) ++ goto out; ++ ++ /* load defaults for future use */ ++ phy->original_ms_type = (data & CR_1000T_MS_ENABLE) ? ++ ((data & CR_1000T_MS_VALUE) ? ++ e1000_ms_force_master : ++ e1000_ms_force_slave) : ++ e1000_ms_auto; ++ ++ switch (phy->ms_type) { ++ case e1000_ms_force_master: ++ data |= (CR_1000T_MS_ENABLE | CR_1000T_MS_VALUE); ++ break; ++ case e1000_ms_force_slave: ++ data |= CR_1000T_MS_ENABLE; ++ data &= ~(CR_1000T_MS_VALUE); ++ break; ++ case e1000_ms_auto: ++ data &= ~CR_1000T_MS_ENABLE; ++ default: ++ break; ++ } ++ ret_val = phy->ops.write_reg(hw, PHY_1000T_CTRL, data); ++ if (ret_val) ++ goto out; ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_copper_link_autoneg - Setup/Enable autoneg for copper link ++ * @hw: pointer to the HW structure ++ * ++ * Performs initial bounds checking on autoneg advertisement parameter, then ++ * configure to advertise the full capability. Setup the PHY to autoneg ++ * and restart the negotiation process between the link partner. If ++ * autoneg_wait_to_complete, then wait for autoneg to complete before exiting. ++ **/ ++static s32 igb_copper_link_autoneg(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u16 phy_ctrl; ++ ++ /* Perform some bounds checking on the autoneg advertisement ++ * parameter. ++ */ ++ phy->autoneg_advertised &= phy->autoneg_mask; ++ ++ /* If autoneg_advertised is zero, we assume it was not defaulted ++ * by the calling code so we set to advertise full capability. ++ */ ++ if (phy->autoneg_advertised == 0) ++ phy->autoneg_advertised = phy->autoneg_mask; ++ ++ hw_dbg("Reconfiguring auto-neg advertisement params\n"); ++ ret_val = igb_phy_setup_autoneg(hw); ++ if (ret_val) { ++ hw_dbg("Error Setting up Auto-Negotiation\n"); ++ goto out; ++ } ++ hw_dbg("Restarting Auto-Neg\n"); ++ ++ /* Restart auto-negotiation by setting the Auto Neg Enable bit and ++ * the Auto Neg Restart bit in the PHY control register. ++ */ ++ ret_val = phy->ops.read_reg(hw, PHY_CONTROL, &phy_ctrl); ++ if (ret_val) ++ goto out; ++ ++ phy_ctrl |= (MII_CR_AUTO_NEG_EN | MII_CR_RESTART_AUTO_NEG); ++ ret_val = phy->ops.write_reg(hw, PHY_CONTROL, phy_ctrl); ++ if (ret_val) ++ goto out; ++ ++ /* Does the user want to wait for Auto-Neg to complete here, or ++ * check at a later time (for example, callback routine). ++ */ ++ if (phy->autoneg_wait_to_complete) { ++ ret_val = igb_wait_autoneg(hw); ++ if (ret_val) { ++ hw_dbg("Error while waiting for autoneg to complete\n"); ++ goto out; ++ } ++ } ++ ++ hw->mac.get_link_status = true; ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_phy_setup_autoneg - Configure PHY for auto-negotiation ++ * @hw: pointer to the HW structure ++ * ++ * Reads the MII auto-neg advertisement register and/or the 1000T control ++ * register and if the PHY is already setup for auto-negotiation, then ++ * return successful. Otherwise, setup advertisement and flow control to ++ * the appropriate values for the wanted auto-negotiation. ++ **/ ++static s32 igb_phy_setup_autoneg(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u16 mii_autoneg_adv_reg; ++ u16 mii_1000t_ctrl_reg = 0; ++ ++ phy->autoneg_advertised &= phy->autoneg_mask; ++ ++ /* Read the MII Auto-Neg Advertisement Register (Address 4). */ ++ ret_val = phy->ops.read_reg(hw, PHY_AUTONEG_ADV, &mii_autoneg_adv_reg); ++ if (ret_val) ++ goto out; ++ ++ if (phy->autoneg_mask & ADVERTISE_1000_FULL) { ++ /* Read the MII 1000Base-T Control Register (Address 9). */ ++ ret_val = phy->ops.read_reg(hw, PHY_1000T_CTRL, ++ &mii_1000t_ctrl_reg); ++ if (ret_val) ++ goto out; ++ } ++ ++ /* Need to parse both autoneg_advertised and fc and set up ++ * the appropriate PHY registers. First we will parse for ++ * autoneg_advertised software override. Since we can advertise ++ * a plethora of combinations, we need to check each bit ++ * individually. ++ */ ++ ++ /* First we clear all the 10/100 mb speed bits in the Auto-Neg ++ * Advertisement Register (Address 4) and the 1000 mb speed bits in ++ * the 1000Base-T Control Register (Address 9). ++ */ ++ mii_autoneg_adv_reg &= ~(NWAY_AR_100TX_FD_CAPS | ++ NWAY_AR_100TX_HD_CAPS | ++ NWAY_AR_10T_FD_CAPS | ++ NWAY_AR_10T_HD_CAPS); ++ mii_1000t_ctrl_reg &= ~(CR_1000T_HD_CAPS | CR_1000T_FD_CAPS); ++ ++ hw_dbg("autoneg_advertised %x\n", phy->autoneg_advertised); ++ ++ /* Do we want to advertise 10 Mb Half Duplex? */ ++ if (phy->autoneg_advertised & ADVERTISE_10_HALF) { ++ hw_dbg("Advertise 10mb Half duplex\n"); ++ mii_autoneg_adv_reg |= NWAY_AR_10T_HD_CAPS; ++ } ++ ++ /* Do we want to advertise 10 Mb Full Duplex? */ ++ if (phy->autoneg_advertised & ADVERTISE_10_FULL) { ++ hw_dbg("Advertise 10mb Full duplex\n"); ++ mii_autoneg_adv_reg |= NWAY_AR_10T_FD_CAPS; ++ } ++ ++ /* Do we want to advertise 100 Mb Half Duplex? */ ++ if (phy->autoneg_advertised & ADVERTISE_100_HALF) { ++ hw_dbg("Advertise 100mb Half duplex\n"); ++ mii_autoneg_adv_reg |= NWAY_AR_100TX_HD_CAPS; ++ } ++ ++ /* Do we want to advertise 100 Mb Full Duplex? */ ++ if (phy->autoneg_advertised & ADVERTISE_100_FULL) { ++ hw_dbg("Advertise 100mb Full duplex\n"); ++ mii_autoneg_adv_reg |= NWAY_AR_100TX_FD_CAPS; ++ } ++ ++ /* We do not allow the Phy to advertise 1000 Mb Half Duplex */ ++ if (phy->autoneg_advertised & ADVERTISE_1000_HALF) ++ hw_dbg("Advertise 1000mb Half duplex request denied!\n"); ++ ++ /* Do we want to advertise 1000 Mb Full Duplex? */ ++ if (phy->autoneg_advertised & ADVERTISE_1000_FULL) { ++ hw_dbg("Advertise 1000mb Full duplex\n"); ++ mii_1000t_ctrl_reg |= CR_1000T_FD_CAPS; ++ } ++ ++ /* Check for a software override of the flow control settings, and ++ * setup the PHY advertisement registers accordingly. If ++ * auto-negotiation is enabled, then software will have to set the ++ * "PAUSE" bits to the correct value in the Auto-Negotiation ++ * Advertisement Register (PHY_AUTONEG_ADV) and re-start auto- ++ * negotiation. ++ * ++ * The possible values of the "fc" parameter are: ++ * 0: Flow control is completely disabled ++ * 1: Rx flow control is enabled (we can receive pause frames ++ * but not send pause frames). ++ * 2: Tx flow control is enabled (we can send pause frames ++ * but we do not support receiving pause frames). ++ * 3: Both Rx and TX flow control (symmetric) are enabled. ++ * other: No software override. The flow control configuration ++ * in the EEPROM is used. ++ */ ++ switch (hw->fc.current_mode) { ++ case e1000_fc_none: ++ /* Flow control (RX & TX) is completely disabled by a ++ * software over-ride. ++ */ ++ mii_autoneg_adv_reg &= ~(NWAY_AR_ASM_DIR | NWAY_AR_PAUSE); ++ break; ++ case e1000_fc_rx_pause: ++ /* RX Flow control is enabled, and TX Flow control is ++ * disabled, by a software over-ride. ++ * ++ * Since there really isn't a way to advertise that we are ++ * capable of RX Pause ONLY, we will advertise that we ++ * support both symmetric and asymmetric RX PAUSE. Later ++ * (in e1000_config_fc_after_link_up) we will disable the ++ * hw's ability to send PAUSE frames. ++ */ ++ mii_autoneg_adv_reg |= (NWAY_AR_ASM_DIR | NWAY_AR_PAUSE); ++ break; ++ case e1000_fc_tx_pause: ++ /* TX Flow control is enabled, and RX Flow control is ++ * disabled, by a software over-ride. ++ */ ++ mii_autoneg_adv_reg |= NWAY_AR_ASM_DIR; ++ mii_autoneg_adv_reg &= ~NWAY_AR_PAUSE; ++ break; ++ case e1000_fc_full: ++ /* Flow control (both RX and TX) is enabled by a software ++ * over-ride. ++ */ ++ mii_autoneg_adv_reg |= (NWAY_AR_ASM_DIR | NWAY_AR_PAUSE); ++ break; ++ default: ++ hw_dbg("Flow control param set incorrectly\n"); ++ ret_val = -E1000_ERR_CONFIG; ++ goto out; ++ } ++ ++ ret_val = phy->ops.write_reg(hw, PHY_AUTONEG_ADV, mii_autoneg_adv_reg); ++ if (ret_val) ++ goto out; ++ ++ hw_dbg("Auto-Neg Advertising %x\n", mii_autoneg_adv_reg); ++ ++ if (phy->autoneg_mask & ADVERTISE_1000_FULL) { ++ ret_val = phy->ops.write_reg(hw, ++ PHY_1000T_CTRL, ++ mii_1000t_ctrl_reg); ++ if (ret_val) ++ goto out; ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_setup_copper_link - Configure copper link settings ++ * @hw: pointer to the HW structure ++ * ++ * Calls the appropriate function to configure the link for auto-neg or forced ++ * speed and duplex. Then we check for link, once link is established calls ++ * to configure collision distance and flow control are called. If link is ++ * not established, we return -E1000_ERR_PHY (-2). ++ **/ ++s32 igb_setup_copper_link(struct e1000_hw *hw) ++{ ++ s32 ret_val; ++ bool link; ++ ++ if (hw->mac.autoneg) { ++ /* Setup autoneg and flow control advertisement and perform ++ * autonegotiation. ++ */ ++ ret_val = igb_copper_link_autoneg(hw); ++ if (ret_val) ++ goto out; ++ } else { ++ /* PHY will be set to 10H, 10F, 100H or 100F ++ * depending on user settings. ++ */ ++ hw_dbg("Forcing Speed and Duplex\n"); ++ ret_val = hw->phy.ops.force_speed_duplex(hw); ++ if (ret_val) { ++ hw_dbg("Error Forcing Speed and Duplex\n"); ++ goto out; ++ } ++ } ++ ++ /* Check link status. Wait up to 100 microseconds for link to become ++ * valid. ++ */ ++ ret_val = igb_phy_has_link(hw, COPPER_LINK_UP_LIMIT, 10, &link); ++ if (ret_val) ++ goto out; ++ ++ if (link) { ++ hw_dbg("Valid link established!!!\n"); ++ igb_config_collision_dist(hw); ++ ret_val = igb_config_fc_after_link_up(hw); ++ } else { ++ hw_dbg("Unable to establish link!!!\n"); ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_phy_force_speed_duplex_igp - Force speed/duplex for igp PHY ++ * @hw: pointer to the HW structure ++ * ++ * Calls the PHY setup function to force speed and duplex. Clears the ++ * auto-crossover to force MDI manually. Waits for link and returns ++ * successful if link up is successful, else -E1000_ERR_PHY (-2). ++ **/ ++s32 igb_phy_force_speed_duplex_igp(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u16 phy_data; ++ bool link; ++ ++ ret_val = phy->ops.read_reg(hw, PHY_CONTROL, &phy_data); ++ if (ret_val) ++ goto out; ++ ++ igb_phy_force_speed_duplex_setup(hw, &phy_data); ++ ++ ret_val = phy->ops.write_reg(hw, PHY_CONTROL, phy_data); ++ if (ret_val) ++ goto out; ++ ++ /* Clear Auto-Crossover to force MDI manually. IGP requires MDI ++ * forced whenever speed and duplex are forced. ++ */ ++ ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CTRL, &phy_data); ++ if (ret_val) ++ goto out; ++ ++ phy_data &= ~IGP01E1000_PSCR_AUTO_MDIX; ++ phy_data &= ~IGP01E1000_PSCR_FORCE_MDI_MDIX; ++ ++ ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_PORT_CTRL, phy_data); ++ if (ret_val) ++ goto out; ++ ++ hw_dbg("IGP PSCR: %X\n", phy_data); ++ ++ udelay(1); ++ ++ if (phy->autoneg_wait_to_complete) { ++ hw_dbg("Waiting for forced speed/duplex link on IGP phy.\n"); ++ ++ ret_val = igb_phy_has_link(hw, PHY_FORCE_LIMIT, 10000, &link); ++ if (ret_val) ++ goto out; ++ ++ if (!link) ++ hw_dbg("Link taking longer than expected.\n"); ++ ++ /* Try once more */ ++ ret_val = igb_phy_has_link(hw, PHY_FORCE_LIMIT, 10000, &link); ++ if (ret_val) ++ goto out; ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_phy_force_speed_duplex_m88 - Force speed/duplex for m88 PHY ++ * @hw: pointer to the HW structure ++ * ++ * Calls the PHY setup function to force speed and duplex. Clears the ++ * auto-crossover to force MDI manually. Resets the PHY to commit the ++ * changes. If time expires while waiting for link up, we reset the DSP. ++ * After reset, TX_CLK and CRS on TX must be set. Return successful upon ++ * successful completion, else return corresponding error code. ++ **/ ++s32 igb_phy_force_speed_duplex_m88(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u16 phy_data; ++ bool link; ++ ++ /* I210 and I211 devices support Auto-Crossover in forced operation. */ ++ if (phy->type != e1000_phy_i210) { ++ /* Clear Auto-Crossover to force MDI manually. M88E1000 ++ * requires MDI forced whenever speed and duplex are forced. ++ */ ++ ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_CTRL, ++ &phy_data); ++ if (ret_val) ++ goto out; ++ ++ phy_data &= ~M88E1000_PSCR_AUTO_X_MODE; ++ ret_val = phy->ops.write_reg(hw, M88E1000_PHY_SPEC_CTRL, ++ phy_data); ++ if (ret_val) ++ goto out; ++ ++ hw_dbg("M88E1000 PSCR: %X\n", phy_data); ++ } ++ ++ ret_val = phy->ops.read_reg(hw, PHY_CONTROL, &phy_data); ++ if (ret_val) ++ goto out; ++ ++ igb_phy_force_speed_duplex_setup(hw, &phy_data); ++ ++ ret_val = phy->ops.write_reg(hw, PHY_CONTROL, phy_data); ++ if (ret_val) ++ goto out; ++ ++ /* Reset the phy to commit changes. */ ++ ret_val = igb_phy_sw_reset(hw); ++ if (ret_val) ++ goto out; ++ ++ if (phy->autoneg_wait_to_complete) { ++ hw_dbg("Waiting for forced speed/duplex link on M88 phy.\n"); ++ ++ ret_val = igb_phy_has_link(hw, PHY_FORCE_LIMIT, 100000, &link); ++ if (ret_val) ++ goto out; ++ ++ if (!link) { ++ bool reset_dsp = true; ++ ++ switch (hw->phy.id) { ++ case I347AT4_E_PHY_ID: ++ case M88E1112_E_PHY_ID: ++ case I210_I_PHY_ID: ++ reset_dsp = false; ++ break; ++ default: ++ if (hw->phy.type != e1000_phy_m88) ++ reset_dsp = false; ++ break; ++ } ++ if (!reset_dsp) ++ hw_dbg("Link taking longer than expected.\n"); ++ else { ++ /* We didn't get link. ++ * Reset the DSP and cross our fingers. ++ */ ++ ret_val = phy->ops.write_reg(hw, ++ M88E1000_PHY_PAGE_SELECT, ++ 0x001d); ++ if (ret_val) ++ goto out; ++ ret_val = igb_phy_reset_dsp(hw); ++ if (ret_val) ++ goto out; ++ } ++ } ++ ++ /* Try once more */ ++ ret_val = igb_phy_has_link(hw, PHY_FORCE_LIMIT, ++ 100000, &link); ++ if (ret_val) ++ goto out; ++ } ++ ++ if (hw->phy.type != e1000_phy_m88 || ++ hw->phy.id == I347AT4_E_PHY_ID || ++ hw->phy.id == M88E1112_E_PHY_ID || ++ hw->phy.id == I210_I_PHY_ID) ++ goto out; ++ ++ ret_val = phy->ops.read_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL, &phy_data); ++ if (ret_val) ++ goto out; ++ ++ /* Resetting the phy means we need to re-force TX_CLK in the ++ * Extended PHY Specific Control Register to 25MHz clock from ++ * the reset value of 2.5MHz. ++ */ ++ phy_data |= M88E1000_EPSCR_TX_CLK_25; ++ ret_val = phy->ops.write_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL, phy_data); ++ if (ret_val) ++ goto out; ++ ++ /* In addition, we must re-enable CRS on Tx for both half and full ++ * duplex. ++ */ ++ ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data); ++ if (ret_val) ++ goto out; ++ ++ phy_data |= M88E1000_PSCR_ASSERT_CRS_ON_TX; ++ ret_val = phy->ops.write_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_phy_force_speed_duplex_setup - Configure forced PHY speed/duplex ++ * @hw: pointer to the HW structure ++ * @phy_ctrl: pointer to current value of PHY_CONTROL ++ * ++ * Forces speed and duplex on the PHY by doing the following: disable flow ++ * control, force speed/duplex on the MAC, disable auto speed detection, ++ * disable auto-negotiation, configure duplex, configure speed, configure ++ * the collision distance, write configuration to CTRL register. The ++ * caller must write to the PHY_CONTROL register for these settings to ++ * take affect. ++ **/ ++static void igb_phy_force_speed_duplex_setup(struct e1000_hw *hw, ++ u16 *phy_ctrl) ++{ ++ struct e1000_mac_info *mac = &hw->mac; ++ u32 ctrl; ++ ++ /* Turn off flow control when forcing speed/duplex */ ++ hw->fc.current_mode = e1000_fc_none; ++ ++ /* Force speed/duplex on the mac */ ++ ctrl = rd32(E1000_CTRL); ++ ctrl |= (E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX); ++ ctrl &= ~E1000_CTRL_SPD_SEL; ++ ++ /* Disable Auto Speed Detection */ ++ ctrl &= ~E1000_CTRL_ASDE; ++ ++ /* Disable autoneg on the phy */ ++ *phy_ctrl &= ~MII_CR_AUTO_NEG_EN; ++ ++ /* Forcing Full or Half Duplex? */ ++ if (mac->forced_speed_duplex & E1000_ALL_HALF_DUPLEX) { ++ ctrl &= ~E1000_CTRL_FD; ++ *phy_ctrl &= ~MII_CR_FULL_DUPLEX; ++ hw_dbg("Half Duplex\n"); ++ } else { ++ ctrl |= E1000_CTRL_FD; ++ *phy_ctrl |= MII_CR_FULL_DUPLEX; ++ hw_dbg("Full Duplex\n"); ++ } ++ ++ /* Forcing 10mb or 100mb? */ ++ if (mac->forced_speed_duplex & E1000_ALL_100_SPEED) { ++ ctrl |= E1000_CTRL_SPD_100; ++ *phy_ctrl |= MII_CR_SPEED_100; ++ *phy_ctrl &= ~(MII_CR_SPEED_1000 | MII_CR_SPEED_10); ++ hw_dbg("Forcing 100mb\n"); ++ } else { ++ ctrl &= ~(E1000_CTRL_SPD_1000 | E1000_CTRL_SPD_100); ++ *phy_ctrl |= MII_CR_SPEED_10; ++ *phy_ctrl &= ~(MII_CR_SPEED_1000 | MII_CR_SPEED_100); ++ hw_dbg("Forcing 10mb\n"); ++ } ++ ++ igb_config_collision_dist(hw); ++ ++ wr32(E1000_CTRL, ctrl); ++} ++ ++/** ++ * igb_set_d3_lplu_state - Sets low power link up state for D3 ++ * @hw: pointer to the HW structure ++ * @active: boolean used to enable/disable lplu ++ * ++ * Success returns 0, Failure returns 1 ++ * ++ * The low power link up (lplu) state is set to the power management level D3 ++ * and SmartSpeed is disabled when active is true, else clear lplu for D3 ++ * and enable Smartspeed. LPLU and Smartspeed are mutually exclusive. LPLU ++ * is used during Dx states where the power conservation is most important. ++ * During driver activity, SmartSpeed should be enabled so performance is ++ * maintained. ++ **/ ++s32 igb_set_d3_lplu_state(struct e1000_hw *hw, bool active) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val = 0; ++ u16 data; ++ ++ if (!(hw->phy.ops.read_reg)) ++ goto out; ++ ++ ret_val = phy->ops.read_reg(hw, IGP02E1000_PHY_POWER_MGMT, &data); ++ if (ret_val) ++ goto out; ++ ++ if (!active) { ++ data &= ~IGP02E1000_PM_D3_LPLU; ++ ret_val = phy->ops.write_reg(hw, IGP02E1000_PHY_POWER_MGMT, ++ data); ++ if (ret_val) ++ goto out; ++ /* LPLU and SmartSpeed are mutually exclusive. LPLU is used ++ * during Dx states where the power conservation is most ++ * important. During driver activity we should enable ++ * SmartSpeed, so performance is maintained. ++ */ ++ if (phy->smart_speed == e1000_smart_speed_on) { ++ ret_val = phy->ops.read_reg(hw, ++ IGP01E1000_PHY_PORT_CONFIG, ++ &data); ++ if (ret_val) ++ goto out; ++ ++ data |= IGP01E1000_PSCFR_SMART_SPEED; ++ ret_val = phy->ops.write_reg(hw, ++ IGP01E1000_PHY_PORT_CONFIG, ++ data); ++ if (ret_val) ++ goto out; ++ } else if (phy->smart_speed == e1000_smart_speed_off) { ++ ret_val = phy->ops.read_reg(hw, ++ IGP01E1000_PHY_PORT_CONFIG, ++ &data); ++ if (ret_val) ++ goto out; ++ ++ data &= ~IGP01E1000_PSCFR_SMART_SPEED; ++ ret_val = phy->ops.write_reg(hw, ++ IGP01E1000_PHY_PORT_CONFIG, ++ data); ++ if (ret_val) ++ goto out; ++ } ++ } else if ((phy->autoneg_advertised == E1000_ALL_SPEED_DUPLEX) || ++ (phy->autoneg_advertised == E1000_ALL_NOT_GIG) || ++ (phy->autoneg_advertised == E1000_ALL_10_SPEED)) { ++ data |= IGP02E1000_PM_D3_LPLU; ++ ret_val = phy->ops.write_reg(hw, IGP02E1000_PHY_POWER_MGMT, ++ data); ++ if (ret_val) ++ goto out; ++ ++ /* When LPLU is enabled, we should disable SmartSpeed */ ++ ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CONFIG, ++ &data); ++ if (ret_val) ++ goto out; ++ ++ data &= ~IGP01E1000_PSCFR_SMART_SPEED; ++ ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_PORT_CONFIG, ++ data); ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_check_downshift - Checks whether a downshift in speed occurred ++ * @hw: pointer to the HW structure ++ * ++ * Success returns 0, Failure returns 1 ++ * ++ * A downshift is detected by querying the PHY link health. ++ **/ ++s32 igb_check_downshift(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u16 phy_data, offset, mask; ++ ++ switch (phy->type) { ++ case e1000_phy_i210: ++ case e1000_phy_m88: ++ case e1000_phy_gg82563: ++ offset = M88E1000_PHY_SPEC_STATUS; ++ mask = M88E1000_PSSR_DOWNSHIFT; ++ break; ++ case e1000_phy_igp_2: ++ case e1000_phy_igp: ++ case e1000_phy_igp_3: ++ offset = IGP01E1000_PHY_LINK_HEALTH; ++ mask = IGP01E1000_PLHR_SS_DOWNGRADE; ++ break; ++ default: ++ /* speed downshift not supported */ ++ phy->speed_downgraded = false; ++ ret_val = 0; ++ goto out; ++ } ++ ++ ret_val = phy->ops.read_reg(hw, offset, &phy_data); ++ ++ if (!ret_val) ++ phy->speed_downgraded = (phy_data & mask) ? true : false; ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_check_polarity_m88 - Checks the polarity. ++ * @hw: pointer to the HW structure ++ * ++ * Success returns 0, Failure returns -E1000_ERR_PHY (-2) ++ * ++ * Polarity is determined based on the PHY specific status register. ++ **/ ++s32 igb_check_polarity_m88(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u16 data; ++ ++ ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_STATUS, &data); ++ ++ if (!ret_val) ++ phy->cable_polarity = (data & M88E1000_PSSR_REV_POLARITY) ++ ? e1000_rev_polarity_reversed ++ : e1000_rev_polarity_normal; ++ ++ return ret_val; ++} ++ ++/** ++ * igb_check_polarity_igp - Checks the polarity. ++ * @hw: pointer to the HW structure ++ * ++ * Success returns 0, Failure returns -E1000_ERR_PHY (-2) ++ * ++ * Polarity is determined based on the PHY port status register, and the ++ * current speed (since there is no polarity at 100Mbps). ++ **/ ++static s32 igb_check_polarity_igp(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u16 data, offset, mask; ++ ++ /* Polarity is determined based on the speed of ++ * our connection. ++ */ ++ ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_STATUS, &data); ++ if (ret_val) ++ goto out; ++ ++ if ((data & IGP01E1000_PSSR_SPEED_MASK) == ++ IGP01E1000_PSSR_SPEED_1000MBPS) { ++ offset = IGP01E1000_PHY_PCS_INIT_REG; ++ mask = IGP01E1000_PHY_POLARITY_MASK; ++ } else { ++ /* This really only applies to 10Mbps since ++ * there is no polarity for 100Mbps (always 0). ++ */ ++ offset = IGP01E1000_PHY_PORT_STATUS; ++ mask = IGP01E1000_PSSR_POLARITY_REVERSED; ++ } ++ ++ ret_val = phy->ops.read_reg(hw, offset, &data); ++ ++ if (!ret_val) ++ phy->cable_polarity = (data & mask) ++ ? e1000_rev_polarity_reversed ++ : e1000_rev_polarity_normal; ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_wait_autoneg - Wait for auto-neg completion ++ * @hw: pointer to the HW structure ++ * ++ * Waits for auto-negotiation to complete or for the auto-negotiation time ++ * limit to expire, which ever happens first. ++ **/ ++static s32 igb_wait_autoneg(struct e1000_hw *hw) ++{ ++ s32 ret_val = 0; ++ u16 i, phy_status; ++ ++ /* Break after autoneg completes or PHY_AUTO_NEG_LIMIT expires. */ ++ for (i = PHY_AUTO_NEG_LIMIT; i > 0; i--) { ++ ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &phy_status); ++ if (ret_val) ++ break; ++ ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &phy_status); ++ if (ret_val) ++ break; ++ if (phy_status & MII_SR_AUTONEG_COMPLETE) ++ break; ++ msleep(100); ++ } ++ ++ /* PHY_AUTO_NEG_TIME expiration doesn't guarantee auto-negotiation ++ * has completed. ++ */ ++ return ret_val; ++} ++ ++/** ++ * igb_phy_has_link - Polls PHY for link ++ * @hw: pointer to the HW structure ++ * @iterations: number of times to poll for link ++ * @usec_interval: delay between polling attempts ++ * @success: pointer to whether polling was successful or not ++ * ++ * Polls the PHY status register for link, 'iterations' number of times. ++ **/ ++s32 igb_phy_has_link(struct e1000_hw *hw, u32 iterations, ++ u32 usec_interval, bool *success) ++{ ++ s32 ret_val = 0; ++ u16 i, phy_status; ++ ++ for (i = 0; i < iterations; i++) { ++ /* Some PHYs require the PHY_STATUS register to be read ++ * twice due to the link bit being sticky. No harm doing ++ * it across the board. ++ */ ++ ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &phy_status); ++ if (ret_val && usec_interval > 0) { ++ /* If the first read fails, another entity may have ++ * ownership of the resources, wait and try again to ++ * see if they have relinquished the resources yet. ++ */ ++ if (usec_interval >= 1000) ++ mdelay(usec_interval/1000); ++ else ++ udelay(usec_interval); ++ } ++ ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &phy_status); ++ if (ret_val) ++ break; ++ if (phy_status & MII_SR_LINK_STATUS) ++ break; ++ if (usec_interval >= 1000) ++ mdelay(usec_interval/1000); ++ else ++ udelay(usec_interval); ++ } ++ ++ *success = (i < iterations) ? true : false; ++ ++ return ret_val; ++} ++ ++/** ++ * igb_get_cable_length_m88 - Determine cable length for m88 PHY ++ * @hw: pointer to the HW structure ++ * ++ * Reads the PHY specific status register to retrieve the cable length ++ * information. The cable length is determined by averaging the minimum and ++ * maximum values to get the "average" cable length. The m88 PHY has four ++ * possible cable length values, which are: ++ * Register Value Cable Length ++ * 0 < 50 meters ++ * 1 50 - 80 meters ++ * 2 80 - 110 meters ++ * 3 110 - 140 meters ++ * 4 > 140 meters ++ **/ ++s32 igb_get_cable_length_m88(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u16 phy_data, index; ++ ++ ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_STATUS, &phy_data); ++ if (ret_val) ++ goto out; ++ ++ index = (phy_data & M88E1000_PSSR_CABLE_LENGTH) >> ++ M88E1000_PSSR_CABLE_LENGTH_SHIFT; ++ if (index >= M88E1000_CABLE_LENGTH_TABLE_SIZE - 1) { ++ ret_val = -E1000_ERR_PHY; ++ goto out; ++ } ++ ++ phy->min_cable_length = e1000_m88_cable_length_table[index]; ++ phy->max_cable_length = e1000_m88_cable_length_table[index + 1]; ++ ++ phy->cable_length = (phy->min_cable_length + phy->max_cable_length) / 2; ++ ++out: ++ return ret_val; ++} ++ ++s32 igb_get_cable_length_m88_gen2(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u16 phy_data, phy_data2, index, default_page, is_cm; ++ ++ switch (hw->phy.id) { ++ case I210_I_PHY_ID: ++ /* Get cable length from PHY Cable Diagnostics Control Reg */ ++ ret_val = phy->ops.read_reg(hw, (0x7 << GS40G_PAGE_SHIFT) + ++ (I347AT4_PCDL + phy->addr), ++ &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ /* Check if the unit of cable length is meters or cm */ ++ ret_val = phy->ops.read_reg(hw, (0x7 << GS40G_PAGE_SHIFT) + ++ I347AT4_PCDC, &phy_data2); ++ if (ret_val) ++ return ret_val; ++ ++ is_cm = !(phy_data2 & I347AT4_PCDC_CABLE_LENGTH_UNIT); ++ ++ /* Populate the phy structure with cable length in meters */ ++ phy->min_cable_length = phy_data / (is_cm ? 100 : 1); ++ phy->max_cable_length = phy_data / (is_cm ? 100 : 1); ++ phy->cable_length = phy_data / (is_cm ? 100 : 1); ++ break; ++ case M88E1543_E_PHY_ID: ++ case I347AT4_E_PHY_ID: ++ /* Remember the original page select and set it to 7 */ ++ ret_val = phy->ops.read_reg(hw, I347AT4_PAGE_SELECT, ++ &default_page); ++ if (ret_val) ++ goto out; ++ ++ ret_val = phy->ops.write_reg(hw, I347AT4_PAGE_SELECT, 0x07); ++ if (ret_val) ++ goto out; ++ ++ /* Get cable length from PHY Cable Diagnostics Control Reg */ ++ ret_val = phy->ops.read_reg(hw, (I347AT4_PCDL + phy->addr), ++ &phy_data); ++ if (ret_val) ++ goto out; ++ ++ /* Check if the unit of cable length is meters or cm */ ++ ret_val = phy->ops.read_reg(hw, I347AT4_PCDC, &phy_data2); ++ if (ret_val) ++ goto out; ++ ++ is_cm = !(phy_data2 & I347AT4_PCDC_CABLE_LENGTH_UNIT); ++ ++ /* Populate the phy structure with cable length in meters */ ++ phy->min_cable_length = phy_data / (is_cm ? 100 : 1); ++ phy->max_cable_length = phy_data / (is_cm ? 100 : 1); ++ phy->cable_length = phy_data / (is_cm ? 100 : 1); ++ ++ /* Reset the page selec to its original value */ ++ ret_val = phy->ops.write_reg(hw, I347AT4_PAGE_SELECT, ++ default_page); ++ if (ret_val) ++ goto out; ++ break; ++ case M88E1112_E_PHY_ID: ++ /* Remember the original page select and set it to 5 */ ++ ret_val = phy->ops.read_reg(hw, I347AT4_PAGE_SELECT, ++ &default_page); ++ if (ret_val) ++ goto out; ++ ++ ret_val = phy->ops.write_reg(hw, I347AT4_PAGE_SELECT, 0x05); ++ if (ret_val) ++ goto out; ++ ++ ret_val = phy->ops.read_reg(hw, M88E1112_VCT_DSP_DISTANCE, ++ &phy_data); ++ if (ret_val) ++ goto out; ++ ++ index = (phy_data & M88E1000_PSSR_CABLE_LENGTH) >> ++ M88E1000_PSSR_CABLE_LENGTH_SHIFT; ++ if (index >= M88E1000_CABLE_LENGTH_TABLE_SIZE - 1) { ++ ret_val = -E1000_ERR_PHY; ++ goto out; ++ } ++ ++ phy->min_cable_length = e1000_m88_cable_length_table[index]; ++ phy->max_cable_length = e1000_m88_cable_length_table[index + 1]; ++ ++ phy->cable_length = (phy->min_cable_length + ++ phy->max_cable_length) / 2; ++ ++ /* Reset the page select to its original value */ ++ ret_val = phy->ops.write_reg(hw, I347AT4_PAGE_SELECT, ++ default_page); ++ if (ret_val) ++ goto out; ++ ++ break; ++ default: ++ ret_val = -E1000_ERR_PHY; ++ goto out; ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_get_cable_length_igp_2 - Determine cable length for igp2 PHY ++ * @hw: pointer to the HW structure ++ * ++ * The automatic gain control (agc) normalizes the amplitude of the ++ * received signal, adjusting for the attenuation produced by the ++ * cable. By reading the AGC registers, which represent the ++ * combination of coarse and fine gain value, the value can be put ++ * into a lookup table to obtain the approximate cable length ++ * for each channel. ++ **/ ++s32 igb_get_cable_length_igp_2(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val = 0; ++ u16 phy_data, i, agc_value = 0; ++ u16 cur_agc_index, max_agc_index = 0; ++ u16 min_agc_index = IGP02E1000_CABLE_LENGTH_TABLE_SIZE - 1; ++ static const u16 agc_reg_array[IGP02E1000_PHY_CHANNEL_NUM] = { ++ IGP02E1000_PHY_AGC_A, ++ IGP02E1000_PHY_AGC_B, ++ IGP02E1000_PHY_AGC_C, ++ IGP02E1000_PHY_AGC_D ++ }; ++ ++ /* Read the AGC registers for all channels */ ++ for (i = 0; i < IGP02E1000_PHY_CHANNEL_NUM; i++) { ++ ret_val = phy->ops.read_reg(hw, agc_reg_array[i], &phy_data); ++ if (ret_val) ++ goto out; ++ ++ /* Getting bits 15:9, which represent the combination of ++ * coarse and fine gain values. The result is a number ++ * that can be put into the lookup table to obtain the ++ * approximate cable length. ++ */ ++ cur_agc_index = (phy_data >> IGP02E1000_AGC_LENGTH_SHIFT) & ++ IGP02E1000_AGC_LENGTH_MASK; ++ ++ /* Array index bound check. */ ++ if ((cur_agc_index >= IGP02E1000_CABLE_LENGTH_TABLE_SIZE) || ++ (cur_agc_index == 0)) { ++ ret_val = -E1000_ERR_PHY; ++ goto out; ++ } ++ ++ /* Remove min & max AGC values from calculation. */ ++ if (e1000_igp_2_cable_length_table[min_agc_index] > ++ e1000_igp_2_cable_length_table[cur_agc_index]) ++ min_agc_index = cur_agc_index; ++ if (e1000_igp_2_cable_length_table[max_agc_index] < ++ e1000_igp_2_cable_length_table[cur_agc_index]) ++ max_agc_index = cur_agc_index; ++ ++ agc_value += e1000_igp_2_cable_length_table[cur_agc_index]; ++ } ++ ++ agc_value -= (e1000_igp_2_cable_length_table[min_agc_index] + ++ e1000_igp_2_cable_length_table[max_agc_index]); ++ agc_value /= (IGP02E1000_PHY_CHANNEL_NUM - 2); ++ ++ /* Calculate cable length with the error range of +/- 10 meters. */ ++ phy->min_cable_length = ((agc_value - IGP02E1000_AGC_RANGE) > 0) ? ++ (agc_value - IGP02E1000_AGC_RANGE) : 0; ++ phy->max_cable_length = agc_value + IGP02E1000_AGC_RANGE; ++ ++ phy->cable_length = (phy->min_cable_length + phy->max_cable_length) / 2; ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_get_phy_info_m88 - Retrieve PHY information ++ * @hw: pointer to the HW structure ++ * ++ * Valid for only copper links. Read the PHY status register (sticky read) ++ * to verify that link is up. Read the PHY special control register to ++ * determine the polarity and 10base-T extended distance. Read the PHY ++ * special status register to determine MDI/MDIx and current speed. If ++ * speed is 1000, then determine cable length, local and remote receiver. ++ **/ ++s32 igb_get_phy_info_m88(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u16 phy_data; ++ bool link; ++ ++ if (phy->media_type != e1000_media_type_copper) { ++ hw_dbg("Phy info is only valid for copper media\n"); ++ ret_val = -E1000_ERR_CONFIG; ++ goto out; ++ } ++ ++ ret_val = igb_phy_has_link(hw, 1, 0, &link); ++ if (ret_val) ++ goto out; ++ ++ if (!link) { ++ hw_dbg("Phy info is only valid if link is up\n"); ++ ret_val = -E1000_ERR_CONFIG; ++ goto out; ++ } ++ ++ ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data); ++ if (ret_val) ++ goto out; ++ ++ phy->polarity_correction = (phy_data & M88E1000_PSCR_POLARITY_REVERSAL) ++ ? true : false; ++ ++ ret_val = igb_check_polarity_m88(hw); ++ if (ret_val) ++ goto out; ++ ++ ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_STATUS, &phy_data); ++ if (ret_val) ++ goto out; ++ ++ phy->is_mdix = (phy_data & M88E1000_PSSR_MDIX) ? true : false; ++ ++ if ((phy_data & M88E1000_PSSR_SPEED) == M88E1000_PSSR_1000MBS) { ++ ret_val = phy->ops.get_cable_length(hw); ++ if (ret_val) ++ goto out; ++ ++ ret_val = phy->ops.read_reg(hw, PHY_1000T_STATUS, &phy_data); ++ if (ret_val) ++ goto out; ++ ++ phy->local_rx = (phy_data & SR_1000T_LOCAL_RX_STATUS) ++ ? e1000_1000t_rx_status_ok ++ : e1000_1000t_rx_status_not_ok; ++ ++ phy->remote_rx = (phy_data & SR_1000T_REMOTE_RX_STATUS) ++ ? e1000_1000t_rx_status_ok ++ : e1000_1000t_rx_status_not_ok; ++ } else { ++ /* Set values to "undefined" */ ++ phy->cable_length = E1000_CABLE_LENGTH_UNDEFINED; ++ phy->local_rx = e1000_1000t_rx_status_undefined; ++ phy->remote_rx = e1000_1000t_rx_status_undefined; ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_get_phy_info_igp - Retrieve igp PHY information ++ * @hw: pointer to the HW structure ++ * ++ * Read PHY status to determine if link is up. If link is up, then ++ * set/determine 10base-T extended distance and polarity correction. Read ++ * PHY port status to determine MDI/MDIx and speed. Based on the speed, ++ * determine on the cable length, local and remote receiver. ++ **/ ++s32 igb_get_phy_info_igp(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u16 data; ++ bool link; ++ ++ ret_val = igb_phy_has_link(hw, 1, 0, &link); ++ if (ret_val) ++ goto out; ++ ++ if (!link) { ++ hw_dbg("Phy info is only valid if link is up\n"); ++ ret_val = -E1000_ERR_CONFIG; ++ goto out; ++ } ++ ++ phy->polarity_correction = true; ++ ++ ret_val = igb_check_polarity_igp(hw); ++ if (ret_val) ++ goto out; ++ ++ ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_STATUS, &data); ++ if (ret_val) ++ goto out; ++ ++ phy->is_mdix = (data & IGP01E1000_PSSR_MDIX) ? true : false; ++ ++ if ((data & IGP01E1000_PSSR_SPEED_MASK) == ++ IGP01E1000_PSSR_SPEED_1000MBPS) { ++ ret_val = phy->ops.get_cable_length(hw); ++ if (ret_val) ++ goto out; ++ ++ ret_val = phy->ops.read_reg(hw, PHY_1000T_STATUS, &data); ++ if (ret_val) ++ goto out; ++ ++ phy->local_rx = (data & SR_1000T_LOCAL_RX_STATUS) ++ ? e1000_1000t_rx_status_ok ++ : e1000_1000t_rx_status_not_ok; ++ ++ phy->remote_rx = (data & SR_1000T_REMOTE_RX_STATUS) ++ ? e1000_1000t_rx_status_ok ++ : e1000_1000t_rx_status_not_ok; ++ } else { ++ phy->cable_length = E1000_CABLE_LENGTH_UNDEFINED; ++ phy->local_rx = e1000_1000t_rx_status_undefined; ++ phy->remote_rx = e1000_1000t_rx_status_undefined; ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_phy_sw_reset - PHY software reset ++ * @hw: pointer to the HW structure ++ * ++ * Does a software reset of the PHY by reading the PHY control register and ++ * setting/write the control register reset bit to the PHY. ++ **/ ++s32 igb_phy_sw_reset(struct e1000_hw *hw) ++{ ++ s32 ret_val = 0; ++ u16 phy_ctrl; ++ ++ if (!(hw->phy.ops.read_reg)) ++ goto out; ++ ++ ret_val = hw->phy.ops.read_reg(hw, PHY_CONTROL, &phy_ctrl); ++ if (ret_val) ++ goto out; ++ ++ phy_ctrl |= MII_CR_RESET; ++ ret_val = hw->phy.ops.write_reg(hw, PHY_CONTROL, phy_ctrl); ++ if (ret_val) ++ goto out; ++ ++ udelay(1); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_phy_hw_reset - PHY hardware reset ++ * @hw: pointer to the HW structure ++ * ++ * Verify the reset block is not blocking us from resetting. Acquire ++ * semaphore (if necessary) and read/set/write the device control reset ++ * bit in the PHY. Wait the appropriate delay time for the device to ++ * reset and release the semaphore (if necessary). ++ **/ ++s32 igb_phy_hw_reset(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u32 ctrl; ++ ++ ret_val = igb_check_reset_block(hw); ++ if (ret_val) { ++ ret_val = 0; ++ goto out; ++ } ++ ++ ret_val = phy->ops.acquire(hw); ++ if (ret_val) ++ goto out; ++ ++ ctrl = rd32(E1000_CTRL); ++ wr32(E1000_CTRL, ctrl | E1000_CTRL_PHY_RST); ++ wrfl(); ++ ++ udelay(phy->reset_delay_us); ++ ++ wr32(E1000_CTRL, ctrl); ++ wrfl(); ++ ++ udelay(150); ++ ++ phy->ops.release(hw); ++ ++ ret_val = phy->ops.get_cfg_done(hw); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_phy_init_script_igp3 - Inits the IGP3 PHY ++ * @hw: pointer to the HW structure ++ * ++ * Initializes a Intel Gigabit PHY3 when an EEPROM is not present. ++ **/ ++s32 igb_phy_init_script_igp3(struct e1000_hw *hw) ++{ ++ hw_dbg("Running IGP 3 PHY init script\n"); ++ ++ /* PHY init IGP 3 */ ++ /* Enable rise/fall, 10-mode work in class-A */ ++ hw->phy.ops.write_reg(hw, 0x2F5B, 0x9018); ++ /* Remove all caps from Replica path filter */ ++ hw->phy.ops.write_reg(hw, 0x2F52, 0x0000); ++ /* Bias trimming for ADC, AFE and Driver (Default) */ ++ hw->phy.ops.write_reg(hw, 0x2FB1, 0x8B24); ++ /* Increase Hybrid poly bias */ ++ hw->phy.ops.write_reg(hw, 0x2FB2, 0xF8F0); ++ /* Add 4% to TX amplitude in Giga mode */ ++ hw->phy.ops.write_reg(hw, 0x2010, 0x10B0); ++ /* Disable trimming (TTT) */ ++ hw->phy.ops.write_reg(hw, 0x2011, 0x0000); ++ /* Poly DC correction to 94.6% + 2% for all channels */ ++ hw->phy.ops.write_reg(hw, 0x20DD, 0x249A); ++ /* ABS DC correction to 95.9% */ ++ hw->phy.ops.write_reg(hw, 0x20DE, 0x00D3); ++ /* BG temp curve trim */ ++ hw->phy.ops.write_reg(hw, 0x28B4, 0x04CE); ++ /* Increasing ADC OPAMP stage 1 currents to max */ ++ hw->phy.ops.write_reg(hw, 0x2F70, 0x29E4); ++ /* Force 1000 ( required for enabling PHY regs configuration) */ ++ hw->phy.ops.write_reg(hw, 0x0000, 0x0140); ++ /* Set upd_freq to 6 */ ++ hw->phy.ops.write_reg(hw, 0x1F30, 0x1606); ++ /* Disable NPDFE */ ++ hw->phy.ops.write_reg(hw, 0x1F31, 0xB814); ++ /* Disable adaptive fixed FFE (Default) */ ++ hw->phy.ops.write_reg(hw, 0x1F35, 0x002A); ++ /* Enable FFE hysteresis */ ++ hw->phy.ops.write_reg(hw, 0x1F3E, 0x0067); ++ /* Fixed FFE for short cable lengths */ ++ hw->phy.ops.write_reg(hw, 0x1F54, 0x0065); ++ /* Fixed FFE for medium cable lengths */ ++ hw->phy.ops.write_reg(hw, 0x1F55, 0x002A); ++ /* Fixed FFE for long cable lengths */ ++ hw->phy.ops.write_reg(hw, 0x1F56, 0x002A); ++ /* Enable Adaptive Clip Threshold */ ++ hw->phy.ops.write_reg(hw, 0x1F72, 0x3FB0); ++ /* AHT reset limit to 1 */ ++ hw->phy.ops.write_reg(hw, 0x1F76, 0xC0FF); ++ /* Set AHT master delay to 127 msec */ ++ hw->phy.ops.write_reg(hw, 0x1F77, 0x1DEC); ++ /* Set scan bits for AHT */ ++ hw->phy.ops.write_reg(hw, 0x1F78, 0xF9EF); ++ /* Set AHT Preset bits */ ++ hw->phy.ops.write_reg(hw, 0x1F79, 0x0210); ++ /* Change integ_factor of channel A to 3 */ ++ hw->phy.ops.write_reg(hw, 0x1895, 0x0003); ++ /* Change prop_factor of channels BCD to 8 */ ++ hw->phy.ops.write_reg(hw, 0x1796, 0x0008); ++ /* Change cg_icount + enable integbp for channels BCD */ ++ hw->phy.ops.write_reg(hw, 0x1798, 0xD008); ++ /* Change cg_icount + enable integbp + change prop_factor_master ++ * to 8 for channel A ++ */ ++ hw->phy.ops.write_reg(hw, 0x1898, 0xD918); ++ /* Disable AHT in Slave mode on channel A */ ++ hw->phy.ops.write_reg(hw, 0x187A, 0x0800); ++ /* Enable LPLU and disable AN to 1000 in non-D0a states, ++ * Enable SPD+B2B ++ */ ++ hw->phy.ops.write_reg(hw, 0x0019, 0x008D); ++ /* Enable restart AN on an1000_dis change */ ++ hw->phy.ops.write_reg(hw, 0x001B, 0x2080); ++ /* Enable wh_fifo read clock in 10/100 modes */ ++ hw->phy.ops.write_reg(hw, 0x0014, 0x0045); ++ /* Restart AN, Speed selection is 1000 */ ++ hw->phy.ops.write_reg(hw, 0x0000, 0x1340); ++ ++ return 0; ++} ++ ++/** ++ * igb_power_up_phy_copper - Restore copper link in case of PHY power down ++ * @hw: pointer to the HW structure ++ * ++ * In the case of a PHY power down to save power, or to turn off link during a ++ * driver unload, restore the link to previous settings. ++ **/ ++void igb_power_up_phy_copper(struct e1000_hw *hw) ++{ ++ u16 mii_reg = 0; ++ ++ /* The PHY will retain its settings across a power down/up cycle */ ++ hw->phy.ops.read_reg(hw, PHY_CONTROL, &mii_reg); ++ mii_reg &= ~MII_CR_POWER_DOWN; ++ hw->phy.ops.write_reg(hw, PHY_CONTROL, mii_reg); ++} ++ ++/** ++ * igb_power_down_phy_copper - Power down copper PHY ++ * @hw: pointer to the HW structure ++ * ++ * Power down PHY to save power when interface is down and wake on lan ++ * is not enabled. ++ **/ ++void igb_power_down_phy_copper(struct e1000_hw *hw) ++{ ++ u16 mii_reg = 0; ++ ++ /* The PHY will retain its settings across a power down/up cycle */ ++ hw->phy.ops.read_reg(hw, PHY_CONTROL, &mii_reg); ++ mii_reg |= MII_CR_POWER_DOWN; ++ hw->phy.ops.write_reg(hw, PHY_CONTROL, mii_reg); ++ usleep_range(1000, 2000); ++} ++ ++/** ++ * igb_check_polarity_82580 - Checks the polarity. ++ * @hw: pointer to the HW structure ++ * ++ * Success returns 0, Failure returns -E1000_ERR_PHY (-2) ++ * ++ * Polarity is determined based on the PHY specific status register. ++ **/ ++static s32 igb_check_polarity_82580(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u16 data; ++ ++ ++ ret_val = phy->ops.read_reg(hw, I82580_PHY_STATUS_2, &data); ++ ++ if (!ret_val) ++ phy->cable_polarity = (data & I82580_PHY_STATUS2_REV_POLARITY) ++ ? e1000_rev_polarity_reversed ++ : e1000_rev_polarity_normal; ++ ++ return ret_val; ++} ++ ++/** ++ * igb_phy_force_speed_duplex_82580 - Force speed/duplex for I82580 PHY ++ * @hw: pointer to the HW structure ++ * ++ * Calls the PHY setup function to force speed and duplex. Clears the ++ * auto-crossover to force MDI manually. Waits for link and returns ++ * successful if link up is successful, else -E1000_ERR_PHY (-2). ++ **/ ++s32 igb_phy_force_speed_duplex_82580(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u16 phy_data; ++ bool link; ++ ++ ret_val = phy->ops.read_reg(hw, PHY_CONTROL, &phy_data); ++ if (ret_val) ++ goto out; ++ ++ igb_phy_force_speed_duplex_setup(hw, &phy_data); ++ ++ ret_val = phy->ops.write_reg(hw, PHY_CONTROL, phy_data); ++ if (ret_val) ++ goto out; ++ ++ /* Clear Auto-Crossover to force MDI manually. 82580 requires MDI ++ * forced whenever speed and duplex are forced. ++ */ ++ ret_val = phy->ops.read_reg(hw, I82580_PHY_CTRL_2, &phy_data); ++ if (ret_val) ++ goto out; ++ ++ phy_data &= ~I82580_PHY_CTRL2_MDIX_CFG_MASK; ++ ++ ret_val = phy->ops.write_reg(hw, I82580_PHY_CTRL_2, phy_data); ++ if (ret_val) ++ goto out; ++ ++ hw_dbg("I82580_PHY_CTRL_2: %X\n", phy_data); ++ ++ udelay(1); ++ ++ if (phy->autoneg_wait_to_complete) { ++ hw_dbg("Waiting for forced speed/duplex link on 82580 phy\n"); ++ ++ ret_val = igb_phy_has_link(hw, PHY_FORCE_LIMIT, 100000, &link); ++ if (ret_val) ++ goto out; ++ ++ if (!link) ++ hw_dbg("Link taking longer than expected.\n"); ++ ++ /* Try once more */ ++ ret_val = igb_phy_has_link(hw, PHY_FORCE_LIMIT, 100000, &link); ++ if (ret_val) ++ goto out; ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_get_phy_info_82580 - Retrieve I82580 PHY information ++ * @hw: pointer to the HW structure ++ * ++ * Read PHY status to determine if link is up. If link is up, then ++ * set/determine 10base-T extended distance and polarity correction. Read ++ * PHY port status to determine MDI/MDIx and speed. Based on the speed, ++ * determine on the cable length, local and remote receiver. ++ **/ ++s32 igb_get_phy_info_82580(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u16 data; ++ bool link; ++ ++ ret_val = igb_phy_has_link(hw, 1, 0, &link); ++ if (ret_val) ++ goto out; ++ ++ if (!link) { ++ hw_dbg("Phy info is only valid if link is up\n"); ++ ret_val = -E1000_ERR_CONFIG; ++ goto out; ++ } ++ ++ phy->polarity_correction = true; ++ ++ ret_val = igb_check_polarity_82580(hw); ++ if (ret_val) ++ goto out; ++ ++ ret_val = phy->ops.read_reg(hw, I82580_PHY_STATUS_2, &data); ++ if (ret_val) ++ goto out; ++ ++ phy->is_mdix = (data & I82580_PHY_STATUS2_MDIX) ? true : false; ++ ++ if ((data & I82580_PHY_STATUS2_SPEED_MASK) == ++ I82580_PHY_STATUS2_SPEED_1000MBPS) { ++ ret_val = hw->phy.ops.get_cable_length(hw); ++ if (ret_val) ++ goto out; ++ ++ ret_val = phy->ops.read_reg(hw, PHY_1000T_STATUS, &data); ++ if (ret_val) ++ goto out; ++ ++ phy->local_rx = (data & SR_1000T_LOCAL_RX_STATUS) ++ ? e1000_1000t_rx_status_ok ++ : e1000_1000t_rx_status_not_ok; ++ ++ phy->remote_rx = (data & SR_1000T_REMOTE_RX_STATUS) ++ ? e1000_1000t_rx_status_ok ++ : e1000_1000t_rx_status_not_ok; ++ } else { ++ phy->cable_length = E1000_CABLE_LENGTH_UNDEFINED; ++ phy->local_rx = e1000_1000t_rx_status_undefined; ++ phy->remote_rx = e1000_1000t_rx_status_undefined; ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_get_cable_length_82580 - Determine cable length for 82580 PHY ++ * @hw: pointer to the HW structure ++ * ++ * Reads the diagnostic status register and verifies result is valid before ++ * placing it in the phy_cable_length field. ++ **/ ++s32 igb_get_cable_length_82580(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u16 phy_data, length; ++ ++ ret_val = phy->ops.read_reg(hw, I82580_PHY_DIAG_STATUS, &phy_data); ++ if (ret_val) ++ goto out; ++ ++ length = (phy_data & I82580_DSTATUS_CABLE_LENGTH) >> ++ I82580_DSTATUS_CABLE_LENGTH_SHIFT; ++ ++ if (length == E1000_CABLE_LENGTH_UNDEFINED) ++ ret_val = -E1000_ERR_PHY; ++ ++ phy->cable_length = length; ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_write_phy_reg_gs40g - Write GS40G PHY register ++ * @hw: pointer to the HW structure ++ * @offset: lower half is register offset to write to ++ * upper half is page to use. ++ * @data: data to write at register offset ++ * ++ * Acquires semaphore, if necessary, then writes the data to PHY register ++ * at the offset. Release any acquired semaphores before exiting. ++ **/ ++s32 igb_write_phy_reg_gs40g(struct e1000_hw *hw, u32 offset, u16 data) ++{ ++ s32 ret_val; ++ u16 page = offset >> GS40G_PAGE_SHIFT; ++ ++ offset = offset & GS40G_OFFSET_MASK; ++ ret_val = hw->phy.ops.acquire(hw); ++ if (ret_val) ++ return ret_val; ++ ++ ret_val = igb_write_phy_reg_mdic(hw, GS40G_PAGE_SELECT, page); ++ if (ret_val) ++ goto release; ++ ret_val = igb_write_phy_reg_mdic(hw, offset, data); ++ ++release: ++ hw->phy.ops.release(hw); ++ return ret_val; ++} ++ ++/** ++ * igb_read_phy_reg_gs40g - Read GS40G PHY register ++ * @hw: pointer to the HW structure ++ * @offset: lower half is register offset to read to ++ * upper half is page to use. ++ * @data: data to read at register offset ++ * ++ * Acquires semaphore, if necessary, then reads the data in the PHY register ++ * at the offset. Release any acquired semaphores before exiting. ++ **/ ++s32 igb_read_phy_reg_gs40g(struct e1000_hw *hw, u32 offset, u16 *data) ++{ ++ s32 ret_val; ++ u16 page = offset >> GS40G_PAGE_SHIFT; ++ ++ offset = offset & GS40G_OFFSET_MASK; ++ ret_val = hw->phy.ops.acquire(hw); ++ if (ret_val) ++ return ret_val; ++ ++ ret_val = igb_write_phy_reg_mdic(hw, GS40G_PAGE_SELECT, page); ++ if (ret_val) ++ goto release; ++ ret_val = igb_read_phy_reg_mdic(hw, offset, data); ++ ++release: ++ hw->phy.ops.release(hw); ++ return ret_val; ++} ++ ++/** ++ * igb_set_master_slave_mode - Setup PHY for Master/slave mode ++ * @hw: pointer to the HW structure ++ * ++ * Sets up Master/slave mode ++ **/ ++static s32 igb_set_master_slave_mode(struct e1000_hw *hw) ++{ ++ s32 ret_val; ++ u16 phy_data; ++ ++ /* Resolve Master/Slave mode */ ++ ret_val = hw->phy.ops.read_reg(hw, PHY_1000T_CTRL, &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ /* load defaults for future use */ ++ hw->phy.original_ms_type = (phy_data & CR_1000T_MS_ENABLE) ? ++ ((phy_data & CR_1000T_MS_VALUE) ? ++ e1000_ms_force_master : ++ e1000_ms_force_slave) : e1000_ms_auto; ++ ++ switch (hw->phy.ms_type) { ++ case e1000_ms_force_master: ++ phy_data |= (CR_1000T_MS_ENABLE | CR_1000T_MS_VALUE); ++ break; ++ case e1000_ms_force_slave: ++ phy_data |= CR_1000T_MS_ENABLE; ++ phy_data &= ~(CR_1000T_MS_VALUE); ++ break; ++ case e1000_ms_auto: ++ phy_data &= ~CR_1000T_MS_ENABLE; ++ /* fall-through */ ++ default: ++ break; ++ } ++ ++ return hw->phy.ops.write_reg(hw, PHY_1000T_CTRL, phy_data); ++} +--- linux/drivers/xenomai/net/drivers/igb/e1000_mac.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/igb/e1000_mac.c 2021-04-07 16:01:27.467633823 +0800 +@@ -0,0 +1,1607 @@ ++/* Intel(R) Gigabit Ethernet Linux driver ++ * Copyright(c) 2007-2014 Intel Corporation. ++ * RTnet port 2009 Vladimir Zapolskiy ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms and conditions of the GNU General Public License, ++ * version 2, as published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ * ++ * You should have received a copy of the GNU General Public License along with ++ * this program; if not, see . ++ * ++ * The full GNU General Public License is included in this distribution in ++ * the file called "COPYING". ++ * ++ * Contact Information: ++ * e1000-devel Mailing List ++ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++#include "e1000_mac.h" ++ ++#include "igb.h" ++ ++static s32 igb_set_default_fc(struct e1000_hw *hw); ++static s32 igb_set_fc_watermarks(struct e1000_hw *hw); ++ ++/** ++ * igb_get_bus_info_pcie - Get PCIe bus information ++ * @hw: pointer to the HW structure ++ * ++ * Determines and stores the system bus information for a particular ++ * network interface. The following bus information is determined and stored: ++ * bus speed, bus width, type (PCIe), and PCIe function. ++ **/ ++s32 igb_get_bus_info_pcie(struct e1000_hw *hw) ++{ ++ struct e1000_bus_info *bus = &hw->bus; ++ s32 ret_val; ++ u32 reg; ++ u16 pcie_link_status; ++ ++ bus->type = e1000_bus_type_pci_express; ++ ++ ret_val = igb_read_pcie_cap_reg(hw, ++ PCI_EXP_LNKSTA, ++ &pcie_link_status); ++ if (ret_val) { ++ bus->width = e1000_bus_width_unknown; ++ bus->speed = e1000_bus_speed_unknown; ++ } else { ++ switch (pcie_link_status & PCI_EXP_LNKSTA_CLS) { ++ case PCI_EXP_LNKSTA_CLS_2_5GB: ++ bus->speed = e1000_bus_speed_2500; ++ break; ++ case PCI_EXP_LNKSTA_CLS_5_0GB: ++ bus->speed = e1000_bus_speed_5000; ++ break; ++ default: ++ bus->speed = e1000_bus_speed_unknown; ++ break; ++ } ++ ++ bus->width = (enum e1000_bus_width)((pcie_link_status & ++ PCI_EXP_LNKSTA_NLW) >> ++ PCI_EXP_LNKSTA_NLW_SHIFT); ++ } ++ ++ reg = rd32(E1000_STATUS); ++ bus->func = (reg & E1000_STATUS_FUNC_MASK) >> E1000_STATUS_FUNC_SHIFT; ++ ++ return 0; ++} ++ ++/** ++ * igb_clear_vfta - Clear VLAN filter table ++ * @hw: pointer to the HW structure ++ * ++ * Clears the register array which contains the VLAN filter table by ++ * setting all the values to 0. ++ **/ ++void igb_clear_vfta(struct e1000_hw *hw) ++{ ++ u32 offset; ++ ++ for (offset = 0; offset < E1000_VLAN_FILTER_TBL_SIZE; offset++) { ++ array_wr32(E1000_VFTA, offset, 0); ++ wrfl(); ++ } ++} ++ ++/** ++ * igb_write_vfta - Write value to VLAN filter table ++ * @hw: pointer to the HW structure ++ * @offset: register offset in VLAN filter table ++ * @value: register value written to VLAN filter table ++ * ++ * Writes value at the given offset in the register array which stores ++ * the VLAN filter table. ++ **/ ++static void igb_write_vfta(struct e1000_hw *hw, u32 offset, u32 value) ++{ ++ array_wr32(E1000_VFTA, offset, value); ++ wrfl(); ++} ++ ++/* Due to a hw errata, if the host tries to configure the VFTA register ++ * while performing queries from the BMC or DMA, then the VFTA in some ++ * cases won't be written. ++ */ ++ ++/** ++ * igb_clear_vfta_i350 - Clear VLAN filter table ++ * @hw: pointer to the HW structure ++ * ++ * Clears the register array which contains the VLAN filter table by ++ * setting all the values to 0. ++ **/ ++void igb_clear_vfta_i350(struct e1000_hw *hw) ++{ ++ u32 offset; ++ int i; ++ ++ for (offset = 0; offset < E1000_VLAN_FILTER_TBL_SIZE; offset++) { ++ for (i = 0; i < 10; i++) ++ array_wr32(E1000_VFTA, offset, 0); ++ ++ wrfl(); ++ } ++} ++ ++/** ++ * igb_write_vfta_i350 - Write value to VLAN filter table ++ * @hw: pointer to the HW structure ++ * @offset: register offset in VLAN filter table ++ * @value: register value written to VLAN filter table ++ * ++ * Writes value at the given offset in the register array which stores ++ * the VLAN filter table. ++ **/ ++static void igb_write_vfta_i350(struct e1000_hw *hw, u32 offset, u32 value) ++{ ++ int i; ++ ++ for (i = 0; i < 10; i++) ++ array_wr32(E1000_VFTA, offset, value); ++ ++ wrfl(); ++} ++ ++/** ++ * igb_init_rx_addrs - Initialize receive address's ++ * @hw: pointer to the HW structure ++ * @rar_count: receive address registers ++ * ++ * Setups the receive address registers by setting the base receive address ++ * register to the devices MAC address and clearing all the other receive ++ * address registers to 0. ++ **/ ++void igb_init_rx_addrs(struct e1000_hw *hw, u16 rar_count) ++{ ++ u32 i; ++ u8 mac_addr[ETH_ALEN] = {0}; ++ ++ /* Setup the receive address */ ++ hw_dbg("Programming MAC Address into RAR[0]\n"); ++ ++ hw->mac.ops.rar_set(hw, hw->mac.addr, 0); ++ ++ /* Zero out the other (rar_entry_count - 1) receive addresses */ ++ hw_dbg("Clearing RAR[1-%u]\n", rar_count-1); ++ for (i = 1; i < rar_count; i++) ++ hw->mac.ops.rar_set(hw, mac_addr, i); ++} ++ ++/** ++ * igb_vfta_set - enable or disable vlan in VLAN filter table ++ * @hw: pointer to the HW structure ++ * @vid: VLAN id to add or remove ++ * @add: if true add filter, if false remove ++ * ++ * Sets or clears a bit in the VLAN filter table array based on VLAN id ++ * and if we are adding or removing the filter ++ **/ ++s32 igb_vfta_set(struct e1000_hw *hw, u32 vid, bool add) ++{ ++ u32 index = (vid >> E1000_VFTA_ENTRY_SHIFT) & E1000_VFTA_ENTRY_MASK; ++ u32 mask = 1 << (vid & E1000_VFTA_ENTRY_BIT_SHIFT_MASK); ++ u32 vfta; ++ struct igb_adapter *adapter = hw->back; ++ s32 ret_val = 0; ++ ++ vfta = adapter->shadow_vfta[index]; ++ ++ /* bit was set/cleared before we started */ ++ if ((!!(vfta & mask)) == add) { ++ ret_val = -E1000_ERR_CONFIG; ++ } else { ++ if (add) ++ vfta |= mask; ++ else ++ vfta &= ~mask; ++ } ++ if ((hw->mac.type == e1000_i350) || (hw->mac.type == e1000_i354)) ++ igb_write_vfta_i350(hw, index, vfta); ++ else ++ igb_write_vfta(hw, index, vfta); ++ adapter->shadow_vfta[index] = vfta; ++ ++ return ret_val; ++} ++ ++/** ++ * igb_check_alt_mac_addr - Check for alternate MAC addr ++ * @hw: pointer to the HW structure ++ * ++ * Checks the nvm for an alternate MAC address. An alternate MAC address ++ * can be setup by pre-boot software and must be treated like a permanent ++ * address and must override the actual permanent MAC address. If an ++ * alternate MAC address is found it is saved in the hw struct and ++ * programmed into RAR0 and the function returns success, otherwise the ++ * function returns an error. ++ **/ ++s32 igb_check_alt_mac_addr(struct e1000_hw *hw) ++{ ++ u32 i; ++ s32 ret_val = 0; ++ u16 offset, nvm_alt_mac_addr_offset, nvm_data; ++ u8 alt_mac_addr[ETH_ALEN]; ++ ++ /* Alternate MAC address is handled by the option ROM for 82580 ++ * and newer. SW support not required. ++ */ ++ if (hw->mac.type >= e1000_82580) ++ goto out; ++ ++ ret_val = hw->nvm.ops.read(hw, NVM_ALT_MAC_ADDR_PTR, 1, ++ &nvm_alt_mac_addr_offset); ++ if (ret_val) { ++ hw_dbg("NVM Read Error\n"); ++ goto out; ++ } ++ ++ if ((nvm_alt_mac_addr_offset == 0xFFFF) || ++ (nvm_alt_mac_addr_offset == 0x0000)) ++ /* There is no Alternate MAC Address */ ++ goto out; ++ ++ if (hw->bus.func == E1000_FUNC_1) ++ nvm_alt_mac_addr_offset += E1000_ALT_MAC_ADDRESS_OFFSET_LAN1; ++ if (hw->bus.func == E1000_FUNC_2) ++ nvm_alt_mac_addr_offset += E1000_ALT_MAC_ADDRESS_OFFSET_LAN2; ++ ++ if (hw->bus.func == E1000_FUNC_3) ++ nvm_alt_mac_addr_offset += E1000_ALT_MAC_ADDRESS_OFFSET_LAN3; ++ for (i = 0; i < ETH_ALEN; i += 2) { ++ offset = nvm_alt_mac_addr_offset + (i >> 1); ++ ret_val = hw->nvm.ops.read(hw, offset, 1, &nvm_data); ++ if (ret_val) { ++ hw_dbg("NVM Read Error\n"); ++ goto out; ++ } ++ ++ alt_mac_addr[i] = (u8)(nvm_data & 0xFF); ++ alt_mac_addr[i + 1] = (u8)(nvm_data >> 8); ++ } ++ ++ /* if multicast bit is set, the alternate address will not be used */ ++ if (is_multicast_ether_addr(alt_mac_addr)) { ++ hw_dbg("Ignoring Alternate Mac Address with MC bit set\n"); ++ goto out; ++ } ++ ++ /* We have a valid alternate MAC address, and we want to treat it the ++ * same as the normal permanent MAC address stored by the HW into the ++ * RAR. Do this by mapping this address into RAR0. ++ */ ++ hw->mac.ops.rar_set(hw, alt_mac_addr, 0); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_rar_set - Set receive address register ++ * @hw: pointer to the HW structure ++ * @addr: pointer to the receive address ++ * @index: receive address array register ++ * ++ * Sets the receive address array register at index to the address passed ++ * in by addr. ++ **/ ++void igb_rar_set(struct e1000_hw *hw, u8 *addr, u32 index) ++{ ++ u32 rar_low, rar_high; ++ ++ /* HW expects these in little endian so we reverse the byte order ++ * from network order (big endian) to little endian ++ */ ++ rar_low = ((u32) addr[0] | ++ ((u32) addr[1] << 8) | ++ ((u32) addr[2] << 16) | ((u32) addr[3] << 24)); ++ ++ rar_high = ((u32) addr[4] | ((u32) addr[5] << 8)); ++ ++ /* If MAC address zero, no need to set the AV bit */ ++ if (rar_low || rar_high) ++ rar_high |= E1000_RAH_AV; ++ ++ /* Some bridges will combine consecutive 32-bit writes into ++ * a single burst write, which will malfunction on some parts. ++ * The flushes avoid this. ++ */ ++ wr32(E1000_RAL(index), rar_low); ++ wrfl(); ++ wr32(E1000_RAH(index), rar_high); ++ wrfl(); ++} ++ ++/** ++ * igb_mta_set - Set multicast filter table address ++ * @hw: pointer to the HW structure ++ * @hash_value: determines the MTA register and bit to set ++ * ++ * The multicast table address is a register array of 32-bit registers. ++ * The hash_value is used to determine what register the bit is in, the ++ * current value is read, the new bit is OR'd in and the new value is ++ * written back into the register. ++ **/ ++void igb_mta_set(struct e1000_hw *hw, u32 hash_value) ++{ ++ u32 hash_bit, hash_reg, mta; ++ ++ /* The MTA is a register array of 32-bit registers. It is ++ * treated like an array of (32*mta_reg_count) bits. We want to ++ * set bit BitArray[hash_value]. So we figure out what register ++ * the bit is in, read it, OR in the new bit, then write ++ * back the new value. The (hw->mac.mta_reg_count - 1) serves as a ++ * mask to bits 31:5 of the hash value which gives us the ++ * register we're modifying. The hash bit within that register ++ * is determined by the lower 5 bits of the hash value. ++ */ ++ hash_reg = (hash_value >> 5) & (hw->mac.mta_reg_count - 1); ++ hash_bit = hash_value & 0x1F; ++ ++ mta = array_rd32(E1000_MTA, hash_reg); ++ ++ mta |= (1 << hash_bit); ++ ++ array_wr32(E1000_MTA, hash_reg, mta); ++ wrfl(); ++} ++ ++/** ++ * igb_hash_mc_addr - Generate a multicast hash value ++ * @hw: pointer to the HW structure ++ * @mc_addr: pointer to a multicast address ++ * ++ * Generates a multicast address hash value which is used to determine ++ * the multicast filter table array address and new table value. See ++ * igb_mta_set() ++ **/ ++static u32 igb_hash_mc_addr(struct e1000_hw *hw, u8 *mc_addr) ++{ ++ u32 hash_value, hash_mask; ++ u8 bit_shift = 0; ++ ++ /* Register count multiplied by bits per register */ ++ hash_mask = (hw->mac.mta_reg_count * 32) - 1; ++ ++ /* For a mc_filter_type of 0, bit_shift is the number of left-shifts ++ * where 0xFF would still fall within the hash mask. ++ */ ++ while (hash_mask >> bit_shift != 0xFF) ++ bit_shift++; ++ ++ /* The portion of the address that is used for the hash table ++ * is determined by the mc_filter_type setting. ++ * The algorithm is such that there is a total of 8 bits of shifting. ++ * The bit_shift for a mc_filter_type of 0 represents the number of ++ * left-shifts where the MSB of mc_addr[5] would still fall within ++ * the hash_mask. Case 0 does this exactly. Since there are a total ++ * of 8 bits of shifting, then mc_addr[4] will shift right the ++ * remaining number of bits. Thus 8 - bit_shift. The rest of the ++ * cases are a variation of this algorithm...essentially raising the ++ * number of bits to shift mc_addr[5] left, while still keeping the ++ * 8-bit shifting total. ++ * ++ * For example, given the following Destination MAC Address and an ++ * mta register count of 128 (thus a 4096-bit vector and 0xFFF mask), ++ * we can see that the bit_shift for case 0 is 4. These are the hash ++ * values resulting from each mc_filter_type... ++ * [0] [1] [2] [3] [4] [5] ++ * 01 AA 00 12 34 56 ++ * LSB MSB ++ * ++ * case 0: hash_value = ((0x34 >> 4) | (0x56 << 4)) & 0xFFF = 0x563 ++ * case 1: hash_value = ((0x34 >> 3) | (0x56 << 5)) & 0xFFF = 0xAC6 ++ * case 2: hash_value = ((0x34 >> 2) | (0x56 << 6)) & 0xFFF = 0x163 ++ * case 3: hash_value = ((0x34 >> 0) | (0x56 << 8)) & 0xFFF = 0x634 ++ */ ++ switch (hw->mac.mc_filter_type) { ++ default: ++ case 0: ++ break; ++ case 1: ++ bit_shift += 1; ++ break; ++ case 2: ++ bit_shift += 2; ++ break; ++ case 3: ++ bit_shift += 4; ++ break; ++ } ++ ++ hash_value = hash_mask & (((mc_addr[4] >> (8 - bit_shift)) | ++ (((u16) mc_addr[5]) << bit_shift))); ++ ++ return hash_value; ++} ++ ++/** ++ * igb_update_mc_addr_list - Update Multicast addresses ++ * @hw: pointer to the HW structure ++ * @mc_addr_list: array of multicast addresses to program ++ * @mc_addr_count: number of multicast addresses to program ++ * ++ * Updates entire Multicast Table Array. ++ * The caller must have a packed mc_addr_list of multicast addresses. ++ **/ ++void igb_update_mc_addr_list(struct e1000_hw *hw, ++ u8 *mc_addr_list, u32 mc_addr_count) ++{ ++ u32 hash_value, hash_bit, hash_reg; ++ int i; ++ ++ /* clear mta_shadow */ ++ memset(&hw->mac.mta_shadow, 0, sizeof(hw->mac.mta_shadow)); ++ ++ /* update mta_shadow from mc_addr_list */ ++ for (i = 0; (u32) i < mc_addr_count; i++) { ++ hash_value = igb_hash_mc_addr(hw, mc_addr_list); ++ ++ hash_reg = (hash_value >> 5) & (hw->mac.mta_reg_count - 1); ++ hash_bit = hash_value & 0x1F; ++ ++ hw->mac.mta_shadow[hash_reg] |= (1 << hash_bit); ++ mc_addr_list += (ETH_ALEN); ++ } ++ ++ /* replace the entire MTA table */ ++ for (i = hw->mac.mta_reg_count - 1; i >= 0; i--) ++ array_wr32(E1000_MTA, i, hw->mac.mta_shadow[i]); ++ wrfl(); ++} ++ ++/** ++ * igb_clear_hw_cntrs_base - Clear base hardware counters ++ * @hw: pointer to the HW structure ++ * ++ * Clears the base hardware counters by reading the counter registers. ++ **/ ++void igb_clear_hw_cntrs_base(struct e1000_hw *hw) ++{ ++ rd32(E1000_CRCERRS); ++ rd32(E1000_SYMERRS); ++ rd32(E1000_MPC); ++ rd32(E1000_SCC); ++ rd32(E1000_ECOL); ++ rd32(E1000_MCC); ++ rd32(E1000_LATECOL); ++ rd32(E1000_COLC); ++ rd32(E1000_DC); ++ rd32(E1000_SEC); ++ rd32(E1000_RLEC); ++ rd32(E1000_XONRXC); ++ rd32(E1000_XONTXC); ++ rd32(E1000_XOFFRXC); ++ rd32(E1000_XOFFTXC); ++ rd32(E1000_FCRUC); ++ rd32(E1000_GPRC); ++ rd32(E1000_BPRC); ++ rd32(E1000_MPRC); ++ rd32(E1000_GPTC); ++ rd32(E1000_GORCL); ++ rd32(E1000_GORCH); ++ rd32(E1000_GOTCL); ++ rd32(E1000_GOTCH); ++ rd32(E1000_RNBC); ++ rd32(E1000_RUC); ++ rd32(E1000_RFC); ++ rd32(E1000_ROC); ++ rd32(E1000_RJC); ++ rd32(E1000_TORL); ++ rd32(E1000_TORH); ++ rd32(E1000_TOTL); ++ rd32(E1000_TOTH); ++ rd32(E1000_TPR); ++ rd32(E1000_TPT); ++ rd32(E1000_MPTC); ++ rd32(E1000_BPTC); ++} ++ ++/** ++ * igb_check_for_copper_link - Check for link (Copper) ++ * @hw: pointer to the HW structure ++ * ++ * Checks to see of the link status of the hardware has changed. If a ++ * change in link status has been detected, then we read the PHY registers ++ * to get the current speed/duplex if link exists. ++ **/ ++s32 igb_check_for_copper_link(struct e1000_hw *hw) ++{ ++ struct e1000_mac_info *mac = &hw->mac; ++ s32 ret_val; ++ bool link; ++ ++ /* We only want to go out to the PHY registers to see if Auto-Neg ++ * has completed and/or if our link status has changed. The ++ * get_link_status flag is set upon receiving a Link Status ++ * Change or Rx Sequence Error interrupt. ++ */ ++ if (!mac->get_link_status) { ++ ret_val = 0; ++ goto out; ++ } ++ ++ /* First we want to see if the MII Status Register reports ++ * link. If so, then we want to get the current speed/duplex ++ * of the PHY. ++ */ ++ ret_val = igb_phy_has_link(hw, 1, 0, &link); ++ if (ret_val) ++ goto out; ++ ++ if (!link) ++ goto out; /* No link detected */ ++ ++ mac->get_link_status = false; ++ ++ /* Check if there was DownShift, must be checked ++ * immediately after link-up ++ */ ++ igb_check_downshift(hw); ++ ++ /* If we are forcing speed/duplex, then we simply return since ++ * we have already determined whether we have link or not. ++ */ ++ if (!mac->autoneg) { ++ ret_val = -E1000_ERR_CONFIG; ++ goto out; ++ } ++ ++ /* Auto-Neg is enabled. Auto Speed Detection takes care ++ * of MAC speed/duplex configuration. So we only need to ++ * configure Collision Distance in the MAC. ++ */ ++ igb_config_collision_dist(hw); ++ ++ /* Configure Flow Control now that Auto-Neg has completed. ++ * First, we need to restore the desired flow control ++ * settings because we may have had to re-autoneg with a ++ * different link partner. ++ */ ++ ret_val = igb_config_fc_after_link_up(hw); ++ if (ret_val) ++ hw_dbg("Error configuring flow control\n"); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_setup_link - Setup flow control and link settings ++ * @hw: pointer to the HW structure ++ * ++ * Determines which flow control settings to use, then configures flow ++ * control. Calls the appropriate media-specific link configuration ++ * function. Assuming the adapter has a valid link partner, a valid link ++ * should be established. Assumes the hardware has previously been reset ++ * and the transmitter and receiver are not enabled. ++ **/ ++s32 igb_setup_link(struct e1000_hw *hw) ++{ ++ s32 ret_val = 0; ++ ++ /* In the case of the phy reset being blocked, we already have a link. ++ * We do not need to set it up again. ++ */ ++ if (igb_check_reset_block(hw)) ++ goto out; ++ ++ /* If requested flow control is set to default, set flow control ++ * based on the EEPROM flow control settings. ++ */ ++ if (hw->fc.requested_mode == e1000_fc_default) { ++ ret_val = igb_set_default_fc(hw); ++ if (ret_val) ++ goto out; ++ } ++ ++ /* We want to save off the original Flow Control configuration just ++ * in case we get disconnected and then reconnected into a different ++ * hub or switch with different Flow Control capabilities. ++ */ ++ hw->fc.current_mode = hw->fc.requested_mode; ++ ++ hw_dbg("After fix-ups FlowControl is now = %x\n", hw->fc.current_mode); ++ ++ /* Call the necessary media_type subroutine to configure the link. */ ++ ret_val = hw->mac.ops.setup_physical_interface(hw); ++ if (ret_val) ++ goto out; ++ ++ /* Initialize the flow control address, type, and PAUSE timer ++ * registers to their default values. This is done even if flow ++ * control is disabled, because it does not hurt anything to ++ * initialize these registers. ++ */ ++ hw_dbg("Initializing the Flow Control address, type and timer regs\n"); ++ wr32(E1000_FCT, FLOW_CONTROL_TYPE); ++ wr32(E1000_FCAH, FLOW_CONTROL_ADDRESS_HIGH); ++ wr32(E1000_FCAL, FLOW_CONTROL_ADDRESS_LOW); ++ ++ wr32(E1000_FCTTV, hw->fc.pause_time); ++ ++ ret_val = igb_set_fc_watermarks(hw); ++ ++out: ++ ++ return ret_val; ++} ++ ++/** ++ * igb_config_collision_dist - Configure collision distance ++ * @hw: pointer to the HW structure ++ * ++ * Configures the collision distance to the default value and is used ++ * during link setup. Currently no func pointer exists and all ++ * implementations are handled in the generic version of this function. ++ **/ ++void igb_config_collision_dist(struct e1000_hw *hw) ++{ ++ u32 tctl; ++ ++ tctl = rd32(E1000_TCTL); ++ ++ tctl &= ~E1000_TCTL_COLD; ++ tctl |= E1000_COLLISION_DISTANCE << E1000_COLD_SHIFT; ++ ++ wr32(E1000_TCTL, tctl); ++ wrfl(); ++} ++ ++/** ++ * igb_set_fc_watermarks - Set flow control high/low watermarks ++ * @hw: pointer to the HW structure ++ * ++ * Sets the flow control high/low threshold (watermark) registers. If ++ * flow control XON frame transmission is enabled, then set XON frame ++ * tansmission as well. ++ **/ ++static s32 igb_set_fc_watermarks(struct e1000_hw *hw) ++{ ++ s32 ret_val = 0; ++ u32 fcrtl = 0, fcrth = 0; ++ ++ /* Set the flow control receive threshold registers. Normally, ++ * these registers will be set to a default threshold that may be ++ * adjusted later by the driver's runtime code. However, if the ++ * ability to transmit pause frames is not enabled, then these ++ * registers will be set to 0. ++ */ ++ if (hw->fc.current_mode & e1000_fc_tx_pause) { ++ /* We need to set up the Receive Threshold high and low water ++ * marks as well as (optionally) enabling the transmission of ++ * XON frames. ++ */ ++ fcrtl = hw->fc.low_water; ++ if (hw->fc.send_xon) ++ fcrtl |= E1000_FCRTL_XONE; ++ ++ fcrth = hw->fc.high_water; ++ } ++ wr32(E1000_FCRTL, fcrtl); ++ wr32(E1000_FCRTH, fcrth); ++ ++ return ret_val; ++} ++ ++/** ++ * igb_set_default_fc - Set flow control default values ++ * @hw: pointer to the HW structure ++ * ++ * Read the EEPROM for the default values for flow control and store the ++ * values. ++ **/ ++static s32 igb_set_default_fc(struct e1000_hw *hw) ++{ ++ s32 ret_val = 0; ++ u16 lan_offset; ++ u16 nvm_data; ++ ++ /* Read and store word 0x0F of the EEPROM. This word contains bits ++ * that determine the hardware's default PAUSE (flow control) mode, ++ * a bit that determines whether the HW defaults to enabling or ++ * disabling auto-negotiation, and the direction of the ++ * SW defined pins. If there is no SW over-ride of the flow ++ * control setting, then the variable hw->fc will ++ * be initialized based on a value in the EEPROM. ++ */ ++ if (hw->mac.type == e1000_i350) { ++ lan_offset = NVM_82580_LAN_FUNC_OFFSET(hw->bus.func); ++ ret_val = hw->nvm.ops.read(hw, NVM_INIT_CONTROL2_REG ++ + lan_offset, 1, &nvm_data); ++ } else { ++ ret_val = hw->nvm.ops.read(hw, NVM_INIT_CONTROL2_REG, ++ 1, &nvm_data); ++ } ++ ++ if (ret_val) { ++ hw_dbg("NVM Read Error\n"); ++ goto out; ++ } ++ ++ if ((nvm_data & NVM_WORD0F_PAUSE_MASK) == 0) ++ hw->fc.requested_mode = e1000_fc_none; ++ else if ((nvm_data & NVM_WORD0F_PAUSE_MASK) == ++ NVM_WORD0F_ASM_DIR) ++ hw->fc.requested_mode = e1000_fc_tx_pause; ++ else ++ hw->fc.requested_mode = e1000_fc_full; ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_force_mac_fc - Force the MAC's flow control settings ++ * @hw: pointer to the HW structure ++ * ++ * Force the MAC's flow control settings. Sets the TFCE and RFCE bits in the ++ * device control register to reflect the adapter settings. TFCE and RFCE ++ * need to be explicitly set by software when a copper PHY is used because ++ * autonegotiation is managed by the PHY rather than the MAC. Software must ++ * also configure these bits when link is forced on a fiber connection. ++ **/ ++s32 igb_force_mac_fc(struct e1000_hw *hw) ++{ ++ u32 ctrl; ++ s32 ret_val = 0; ++ ++ ctrl = rd32(E1000_CTRL); ++ ++ /* Because we didn't get link via the internal auto-negotiation ++ * mechanism (we either forced link or we got link via PHY ++ * auto-neg), we have to manually enable/disable transmit an ++ * receive flow control. ++ * ++ * The "Case" statement below enables/disable flow control ++ * according to the "hw->fc.current_mode" parameter. ++ * ++ * The possible values of the "fc" parameter are: ++ * 0: Flow control is completely disabled ++ * 1: Rx flow control is enabled (we can receive pause ++ * frames but not send pause frames). ++ * 2: Tx flow control is enabled (we can send pause frames ++ * frames but we do not receive pause frames). ++ * 3: Both Rx and TX flow control (symmetric) is enabled. ++ * other: No other values should be possible at this point. ++ */ ++ hw_dbg("hw->fc.current_mode = %u\n", hw->fc.current_mode); ++ ++ switch (hw->fc.current_mode) { ++ case e1000_fc_none: ++ ctrl &= (~(E1000_CTRL_TFCE | E1000_CTRL_RFCE)); ++ break; ++ case e1000_fc_rx_pause: ++ ctrl &= (~E1000_CTRL_TFCE); ++ ctrl |= E1000_CTRL_RFCE; ++ break; ++ case e1000_fc_tx_pause: ++ ctrl &= (~E1000_CTRL_RFCE); ++ ctrl |= E1000_CTRL_TFCE; ++ break; ++ case e1000_fc_full: ++ ctrl |= (E1000_CTRL_TFCE | E1000_CTRL_RFCE); ++ break; ++ default: ++ hw_dbg("Flow control param set incorrectly\n"); ++ ret_val = -E1000_ERR_CONFIG; ++ goto out; ++ } ++ ++ wr32(E1000_CTRL, ctrl); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_config_fc_after_link_up - Configures flow control after link ++ * @hw: pointer to the HW structure ++ * ++ * Checks the status of auto-negotiation after link up to ensure that the ++ * speed and duplex were not forced. If the link needed to be forced, then ++ * flow control needs to be forced also. If auto-negotiation is enabled ++ * and did not fail, then we configure flow control based on our link ++ * partner. ++ **/ ++s32 igb_config_fc_after_link_up(struct e1000_hw *hw) ++{ ++ struct e1000_mac_info *mac = &hw->mac; ++ s32 ret_val = 0; ++ u32 pcs_status_reg, pcs_adv_reg, pcs_lp_ability_reg, pcs_ctrl_reg; ++ u16 mii_status_reg, mii_nway_adv_reg, mii_nway_lp_ability_reg; ++ u16 speed, duplex; ++ ++ /* Check for the case where we have fiber media and auto-neg failed ++ * so we had to force link. In this case, we need to force the ++ * configuration of the MAC to match the "fc" parameter. ++ */ ++ if (mac->autoneg_failed) { ++ if (hw->phy.media_type == e1000_media_type_internal_serdes) ++ ret_val = igb_force_mac_fc(hw); ++ } else { ++ if (hw->phy.media_type == e1000_media_type_copper) ++ ret_val = igb_force_mac_fc(hw); ++ } ++ ++ if (ret_val) { ++ hw_dbg("Error forcing flow control settings\n"); ++ goto out; ++ } ++ ++ /* Check for the case where we have copper media and auto-neg is ++ * enabled. In this case, we need to check and see if Auto-Neg ++ * has completed, and if so, how the PHY and link partner has ++ * flow control configured. ++ */ ++ if ((hw->phy.media_type == e1000_media_type_copper) && mac->autoneg) { ++ /* Read the MII Status Register and check to see if AutoNeg ++ * has completed. We read this twice because this reg has ++ * some "sticky" (latched) bits. ++ */ ++ ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, ++ &mii_status_reg); ++ if (ret_val) ++ goto out; ++ ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, ++ &mii_status_reg); ++ if (ret_val) ++ goto out; ++ ++ if (!(mii_status_reg & MII_SR_AUTONEG_COMPLETE)) { ++ hw_dbg("Copper PHY and Auto Neg has not completed.\n"); ++ goto out; ++ } ++ ++ /* The AutoNeg process has completed, so we now need to ++ * read both the Auto Negotiation Advertisement ++ * Register (Address 4) and the Auto_Negotiation Base ++ * Page Ability Register (Address 5) to determine how ++ * flow control was negotiated. ++ */ ++ ret_val = hw->phy.ops.read_reg(hw, PHY_AUTONEG_ADV, ++ &mii_nway_adv_reg); ++ if (ret_val) ++ goto out; ++ ret_val = hw->phy.ops.read_reg(hw, PHY_LP_ABILITY, ++ &mii_nway_lp_ability_reg); ++ if (ret_val) ++ goto out; ++ ++ /* Two bits in the Auto Negotiation Advertisement Register ++ * (Address 4) and two bits in the Auto Negotiation Base ++ * Page Ability Register (Address 5) determine flow control ++ * for both the PHY and the link partner. The following ++ * table, taken out of the IEEE 802.3ab/D6.0 dated March 25, ++ * 1999, describes these PAUSE resolution bits and how flow ++ * control is determined based upon these settings. ++ * NOTE: DC = Don't Care ++ * ++ * LOCAL DEVICE | LINK PARTNER ++ * PAUSE | ASM_DIR | PAUSE | ASM_DIR | NIC Resolution ++ *-------|---------|-------|---------|-------------------- ++ * 0 | 0 | DC | DC | e1000_fc_none ++ * 0 | 1 | 0 | DC | e1000_fc_none ++ * 0 | 1 | 1 | 0 | e1000_fc_none ++ * 0 | 1 | 1 | 1 | e1000_fc_tx_pause ++ * 1 | 0 | 0 | DC | e1000_fc_none ++ * 1 | DC | 1 | DC | e1000_fc_full ++ * 1 | 1 | 0 | 0 | e1000_fc_none ++ * 1 | 1 | 0 | 1 | e1000_fc_rx_pause ++ * ++ * Are both PAUSE bits set to 1? If so, this implies ++ * Symmetric Flow Control is enabled at both ends. The ++ * ASM_DIR bits are irrelevant per the spec. ++ * ++ * For Symmetric Flow Control: ++ * ++ * LOCAL DEVICE | LINK PARTNER ++ * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result ++ *-------|---------|-------|---------|-------------------- ++ * 1 | DC | 1 | DC | E1000_fc_full ++ * ++ */ ++ if ((mii_nway_adv_reg & NWAY_AR_PAUSE) && ++ (mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE)) { ++ /* Now we need to check if the user selected RX ONLY ++ * of pause frames. In this case, we had to advertise ++ * FULL flow control because we could not advertise RX ++ * ONLY. Hence, we must now check to see if we need to ++ * turn OFF the TRANSMISSION of PAUSE frames. ++ */ ++ if (hw->fc.requested_mode == e1000_fc_full) { ++ hw->fc.current_mode = e1000_fc_full; ++ hw_dbg("Flow Control = FULL.\n"); ++ } else { ++ hw->fc.current_mode = e1000_fc_rx_pause; ++ hw_dbg("Flow Control = RX PAUSE frames only.\n"); ++ } ++ } ++ /* For receiving PAUSE frames ONLY. ++ * ++ * LOCAL DEVICE | LINK PARTNER ++ * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result ++ *-------|---------|-------|---------|-------------------- ++ * 0 | 1 | 1 | 1 | e1000_fc_tx_pause ++ */ ++ else if (!(mii_nway_adv_reg & NWAY_AR_PAUSE) && ++ (mii_nway_adv_reg & NWAY_AR_ASM_DIR) && ++ (mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE) && ++ (mii_nway_lp_ability_reg & NWAY_LPAR_ASM_DIR)) { ++ hw->fc.current_mode = e1000_fc_tx_pause; ++ hw_dbg("Flow Control = TX PAUSE frames only.\n"); ++ } ++ /* For transmitting PAUSE frames ONLY. ++ * ++ * LOCAL DEVICE | LINK PARTNER ++ * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result ++ *-------|---------|-------|---------|-------------------- ++ * 1 | 1 | 0 | 1 | e1000_fc_rx_pause ++ */ ++ else if ((mii_nway_adv_reg & NWAY_AR_PAUSE) && ++ (mii_nway_adv_reg & NWAY_AR_ASM_DIR) && ++ !(mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE) && ++ (mii_nway_lp_ability_reg & NWAY_LPAR_ASM_DIR)) { ++ hw->fc.current_mode = e1000_fc_rx_pause; ++ hw_dbg("Flow Control = RX PAUSE frames only.\n"); ++ } ++ /* Per the IEEE spec, at this point flow control should be ++ * disabled. However, we want to consider that we could ++ * be connected to a legacy switch that doesn't advertise ++ * desired flow control, but can be forced on the link ++ * partner. So if we advertised no flow control, that is ++ * what we will resolve to. If we advertised some kind of ++ * receive capability (Rx Pause Only or Full Flow Control) ++ * and the link partner advertised none, we will configure ++ * ourselves to enable Rx Flow Control only. We can do ++ * this safely for two reasons: If the link partner really ++ * didn't want flow control enabled, and we enable Rx, no ++ * harm done since we won't be receiving any PAUSE frames ++ * anyway. If the intent on the link partner was to have ++ * flow control enabled, then by us enabling RX only, we ++ * can at least receive pause frames and process them. ++ * This is a good idea because in most cases, since we are ++ * predominantly a server NIC, more times than not we will ++ * be asked to delay transmission of packets than asking ++ * our link partner to pause transmission of frames. ++ */ ++ else if ((hw->fc.requested_mode == e1000_fc_none) || ++ (hw->fc.requested_mode == e1000_fc_tx_pause) || ++ (hw->fc.strict_ieee)) { ++ hw->fc.current_mode = e1000_fc_none; ++ hw_dbg("Flow Control = NONE.\n"); ++ } else { ++ hw->fc.current_mode = e1000_fc_rx_pause; ++ hw_dbg("Flow Control = RX PAUSE frames only.\n"); ++ } ++ ++ /* Now we need to do one last check... If we auto- ++ * negotiated to HALF DUPLEX, flow control should not be ++ * enabled per IEEE 802.3 spec. ++ */ ++ ret_val = hw->mac.ops.get_speed_and_duplex(hw, &speed, &duplex); ++ if (ret_val) { ++ hw_dbg("Error getting link speed and duplex\n"); ++ goto out; ++ } ++ ++ if (duplex == HALF_DUPLEX) ++ hw->fc.current_mode = e1000_fc_none; ++ ++ /* Now we call a subroutine to actually force the MAC ++ * controller to use the correct flow control settings. ++ */ ++ ret_val = igb_force_mac_fc(hw); ++ if (ret_val) { ++ hw_dbg("Error forcing flow control settings\n"); ++ goto out; ++ } ++ } ++ /* Check for the case where we have SerDes media and auto-neg is ++ * enabled. In this case, we need to check and see if Auto-Neg ++ * has completed, and if so, how the PHY and link partner has ++ * flow control configured. ++ */ ++ if ((hw->phy.media_type == e1000_media_type_internal_serdes) ++ && mac->autoneg) { ++ /* Read the PCS_LSTS and check to see if AutoNeg ++ * has completed. ++ */ ++ pcs_status_reg = rd32(E1000_PCS_LSTAT); ++ ++ if (!(pcs_status_reg & E1000_PCS_LSTS_AN_COMPLETE)) { ++ hw_dbg("PCS Auto Neg has not completed.\n"); ++ return ret_val; ++ } ++ ++ /* The AutoNeg process has completed, so we now need to ++ * read both the Auto Negotiation Advertisement ++ * Register (PCS_ANADV) and the Auto_Negotiation Base ++ * Page Ability Register (PCS_LPAB) to determine how ++ * flow control was negotiated. ++ */ ++ pcs_adv_reg = rd32(E1000_PCS_ANADV); ++ pcs_lp_ability_reg = rd32(E1000_PCS_LPAB); ++ ++ /* Two bits in the Auto Negotiation Advertisement Register ++ * (PCS_ANADV) and two bits in the Auto Negotiation Base ++ * Page Ability Register (PCS_LPAB) determine flow control ++ * for both the PHY and the link partner. The following ++ * table, taken out of the IEEE 802.3ab/D6.0 dated March 25, ++ * 1999, describes these PAUSE resolution bits and how flow ++ * control is determined based upon these settings. ++ * NOTE: DC = Don't Care ++ * ++ * LOCAL DEVICE | LINK PARTNER ++ * PAUSE | ASM_DIR | PAUSE | ASM_DIR | NIC Resolution ++ *-------|---------|-------|---------|-------------------- ++ * 0 | 0 | DC | DC | e1000_fc_none ++ * 0 | 1 | 0 | DC | e1000_fc_none ++ * 0 | 1 | 1 | 0 | e1000_fc_none ++ * 0 | 1 | 1 | 1 | e1000_fc_tx_pause ++ * 1 | 0 | 0 | DC | e1000_fc_none ++ * 1 | DC | 1 | DC | e1000_fc_full ++ * 1 | 1 | 0 | 0 | e1000_fc_none ++ * 1 | 1 | 0 | 1 | e1000_fc_rx_pause ++ * ++ * Are both PAUSE bits set to 1? If so, this implies ++ * Symmetric Flow Control is enabled at both ends. The ++ * ASM_DIR bits are irrelevant per the spec. ++ * ++ * For Symmetric Flow Control: ++ * ++ * LOCAL DEVICE | LINK PARTNER ++ * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result ++ *-------|---------|-------|---------|-------------------- ++ * 1 | DC | 1 | DC | e1000_fc_full ++ * ++ */ ++ if ((pcs_adv_reg & E1000_TXCW_PAUSE) && ++ (pcs_lp_ability_reg & E1000_TXCW_PAUSE)) { ++ /* Now we need to check if the user selected Rx ONLY ++ * of pause frames. In this case, we had to advertise ++ * FULL flow control because we could not advertise Rx ++ * ONLY. Hence, we must now check to see if we need to ++ * turn OFF the TRANSMISSION of PAUSE frames. ++ */ ++ if (hw->fc.requested_mode == e1000_fc_full) { ++ hw->fc.current_mode = e1000_fc_full; ++ hw_dbg("Flow Control = FULL.\n"); ++ } else { ++ hw->fc.current_mode = e1000_fc_rx_pause; ++ hw_dbg("Flow Control = Rx PAUSE frames only.\n"); ++ } ++ } ++ /* For receiving PAUSE frames ONLY. ++ * ++ * LOCAL DEVICE | LINK PARTNER ++ * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result ++ *-------|---------|-------|---------|-------------------- ++ * 0 | 1 | 1 | 1 | e1000_fc_tx_pause ++ */ ++ else if (!(pcs_adv_reg & E1000_TXCW_PAUSE) && ++ (pcs_adv_reg & E1000_TXCW_ASM_DIR) && ++ (pcs_lp_ability_reg & E1000_TXCW_PAUSE) && ++ (pcs_lp_ability_reg & E1000_TXCW_ASM_DIR)) { ++ hw->fc.current_mode = e1000_fc_tx_pause; ++ hw_dbg("Flow Control = Tx PAUSE frames only.\n"); ++ } ++ /* For transmitting PAUSE frames ONLY. ++ * ++ * LOCAL DEVICE | LINK PARTNER ++ * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result ++ *-------|---------|-------|---------|-------------------- ++ * 1 | 1 | 0 | 1 | e1000_fc_rx_pause ++ */ ++ else if ((pcs_adv_reg & E1000_TXCW_PAUSE) && ++ (pcs_adv_reg & E1000_TXCW_ASM_DIR) && ++ !(pcs_lp_ability_reg & E1000_TXCW_PAUSE) && ++ (pcs_lp_ability_reg & E1000_TXCW_ASM_DIR)) { ++ hw->fc.current_mode = e1000_fc_rx_pause; ++ hw_dbg("Flow Control = Rx PAUSE frames only.\n"); ++ } else { ++ /* Per the IEEE spec, at this point flow control ++ * should be disabled. ++ */ ++ hw->fc.current_mode = e1000_fc_none; ++ hw_dbg("Flow Control = NONE.\n"); ++ } ++ ++ /* Now we call a subroutine to actually force the MAC ++ * controller to use the correct flow control settings. ++ */ ++ pcs_ctrl_reg = rd32(E1000_PCS_LCTL); ++ pcs_ctrl_reg |= E1000_PCS_LCTL_FORCE_FCTRL; ++ wr32(E1000_PCS_LCTL, pcs_ctrl_reg); ++ ++ ret_val = igb_force_mac_fc(hw); ++ if (ret_val) { ++ hw_dbg("Error forcing flow control settings\n"); ++ return ret_val; ++ } ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_get_speed_and_duplex_copper - Retrieve current speed/duplex ++ * @hw: pointer to the HW structure ++ * @speed: stores the current speed ++ * @duplex: stores the current duplex ++ * ++ * Read the status register for the current speed/duplex and store the current ++ * speed and duplex for copper connections. ++ **/ ++s32 igb_get_speed_and_duplex_copper(struct e1000_hw *hw, u16 *speed, ++ u16 *duplex) ++{ ++ u32 status; ++ ++ status = rd32(E1000_STATUS); ++ if (status & E1000_STATUS_SPEED_1000) { ++ *speed = SPEED_1000; ++ hw_dbg("1000 Mbs, "); ++ } else if (status & E1000_STATUS_SPEED_100) { ++ *speed = SPEED_100; ++ hw_dbg("100 Mbs, "); ++ } else { ++ *speed = SPEED_10; ++ hw_dbg("10 Mbs, "); ++ } ++ ++ if (status & E1000_STATUS_FD) { ++ *duplex = FULL_DUPLEX; ++ hw_dbg("Full Duplex\n"); ++ } else { ++ *duplex = HALF_DUPLEX; ++ hw_dbg("Half Duplex\n"); ++ } ++ ++ return 0; ++} ++ ++/** ++ * igb_get_hw_semaphore - Acquire hardware semaphore ++ * @hw: pointer to the HW structure ++ * ++ * Acquire the HW semaphore to access the PHY or NVM ++ **/ ++s32 igb_get_hw_semaphore(struct e1000_hw *hw) ++{ ++ u32 swsm; ++ s32 ret_val = 0; ++ s32 timeout = hw->nvm.word_size + 1; ++ s32 i = 0; ++ ++ /* Get the SW semaphore */ ++ while (i < timeout) { ++ swsm = rd32(E1000_SWSM); ++ if (!(swsm & E1000_SWSM_SMBI)) ++ break; ++ ++ udelay(50); ++ i++; ++ } ++ ++ if (i == timeout) { ++ hw_dbg("Driver can't access device - SMBI bit is set.\n"); ++ ret_val = -E1000_ERR_NVM; ++ goto out; ++ } ++ ++ /* Get the FW semaphore. */ ++ for (i = 0; i < timeout; i++) { ++ swsm = rd32(E1000_SWSM); ++ wr32(E1000_SWSM, swsm | E1000_SWSM_SWESMBI); ++ ++ /* Semaphore acquired if bit latched */ ++ if (rd32(E1000_SWSM) & E1000_SWSM_SWESMBI) ++ break; ++ ++ udelay(50); ++ } ++ ++ if (i == timeout) { ++ /* Release semaphores */ ++ igb_put_hw_semaphore(hw); ++ hw_dbg("Driver can't access the NVM\n"); ++ ret_val = -E1000_ERR_NVM; ++ goto out; ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_put_hw_semaphore - Release hardware semaphore ++ * @hw: pointer to the HW structure ++ * ++ * Release hardware semaphore used to access the PHY or NVM ++ **/ ++void igb_put_hw_semaphore(struct e1000_hw *hw) ++{ ++ u32 swsm; ++ ++ swsm = rd32(E1000_SWSM); ++ ++ swsm &= ~(E1000_SWSM_SMBI | E1000_SWSM_SWESMBI); ++ ++ wr32(E1000_SWSM, swsm); ++} ++ ++/** ++ * igb_get_auto_rd_done - Check for auto read completion ++ * @hw: pointer to the HW structure ++ * ++ * Check EEPROM for Auto Read done bit. ++ **/ ++s32 igb_get_auto_rd_done(struct e1000_hw *hw) ++{ ++ s32 i = 0; ++ s32 ret_val = 0; ++ ++ ++ while (i < AUTO_READ_DONE_TIMEOUT) { ++ if (rd32(E1000_EECD) & E1000_EECD_AUTO_RD) ++ break; ++ usleep_range(1000, 2000); ++ i++; ++ } ++ ++ if (i == AUTO_READ_DONE_TIMEOUT) { ++ hw_dbg("Auto read by HW from NVM has not completed.\n"); ++ ret_val = -E1000_ERR_RESET; ++ goto out; ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_valid_led_default - Verify a valid default LED config ++ * @hw: pointer to the HW structure ++ * @data: pointer to the NVM (EEPROM) ++ * ++ * Read the EEPROM for the current default LED configuration. If the ++ * LED configuration is not valid, set to a valid LED configuration. ++ **/ ++static s32 igb_valid_led_default(struct e1000_hw *hw, u16 *data) ++{ ++ s32 ret_val; ++ ++ ret_val = hw->nvm.ops.read(hw, NVM_ID_LED_SETTINGS, 1, data); ++ if (ret_val) { ++ hw_dbg("NVM Read Error\n"); ++ goto out; ++ } ++ ++ if (*data == ID_LED_RESERVED_0000 || *data == ID_LED_RESERVED_FFFF) { ++ switch (hw->phy.media_type) { ++ case e1000_media_type_internal_serdes: ++ *data = ID_LED_DEFAULT_82575_SERDES; ++ break; ++ case e1000_media_type_copper: ++ default: ++ *data = ID_LED_DEFAULT; ++ break; ++ } ++ } ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_id_led_init - ++ * @hw: pointer to the HW structure ++ * ++ **/ ++s32 igb_id_led_init(struct e1000_hw *hw) ++{ ++ struct e1000_mac_info *mac = &hw->mac; ++ s32 ret_val; ++ const u32 ledctl_mask = 0x000000FF; ++ const u32 ledctl_on = E1000_LEDCTL_MODE_LED_ON; ++ const u32 ledctl_off = E1000_LEDCTL_MODE_LED_OFF; ++ u16 data, i, temp; ++ const u16 led_mask = 0x0F; ++ ++ /* i210 and i211 devices have different LED mechanism */ ++ if ((hw->mac.type == e1000_i210) || ++ (hw->mac.type == e1000_i211)) ++ ret_val = igb_valid_led_default_i210(hw, &data); ++ else ++ ret_val = igb_valid_led_default(hw, &data); ++ ++ if (ret_val) ++ goto out; ++ ++ mac->ledctl_default = rd32(E1000_LEDCTL); ++ mac->ledctl_mode1 = mac->ledctl_default; ++ mac->ledctl_mode2 = mac->ledctl_default; ++ ++ for (i = 0; i < 4; i++) { ++ temp = (data >> (i << 2)) & led_mask; ++ switch (temp) { ++ case ID_LED_ON1_DEF2: ++ case ID_LED_ON1_ON2: ++ case ID_LED_ON1_OFF2: ++ mac->ledctl_mode1 &= ~(ledctl_mask << (i << 3)); ++ mac->ledctl_mode1 |= ledctl_on << (i << 3); ++ break; ++ case ID_LED_OFF1_DEF2: ++ case ID_LED_OFF1_ON2: ++ case ID_LED_OFF1_OFF2: ++ mac->ledctl_mode1 &= ~(ledctl_mask << (i << 3)); ++ mac->ledctl_mode1 |= ledctl_off << (i << 3); ++ break; ++ default: ++ /* Do nothing */ ++ break; ++ } ++ switch (temp) { ++ case ID_LED_DEF1_ON2: ++ case ID_LED_ON1_ON2: ++ case ID_LED_OFF1_ON2: ++ mac->ledctl_mode2 &= ~(ledctl_mask << (i << 3)); ++ mac->ledctl_mode2 |= ledctl_on << (i << 3); ++ break; ++ case ID_LED_DEF1_OFF2: ++ case ID_LED_ON1_OFF2: ++ case ID_LED_OFF1_OFF2: ++ mac->ledctl_mode2 &= ~(ledctl_mask << (i << 3)); ++ mac->ledctl_mode2 |= ledctl_off << (i << 3); ++ break; ++ default: ++ /* Do nothing */ ++ break; ++ } ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_cleanup_led - Set LED config to default operation ++ * @hw: pointer to the HW structure ++ * ++ * Remove the current LED configuration and set the LED configuration ++ * to the default value, saved from the EEPROM. ++ **/ ++s32 igb_cleanup_led(struct e1000_hw *hw) ++{ ++ wr32(E1000_LEDCTL, hw->mac.ledctl_default); ++ return 0; ++} ++ ++/** ++ * igb_blink_led - Blink LED ++ * @hw: pointer to the HW structure ++ * ++ * Blink the led's which are set to be on. ++ **/ ++s32 igb_blink_led(struct e1000_hw *hw) ++{ ++ u32 ledctl_blink = 0; ++ u32 i; ++ ++ if (hw->phy.media_type == e1000_media_type_fiber) { ++ /* always blink LED0 for PCI-E fiber */ ++ ledctl_blink = E1000_LEDCTL_LED0_BLINK | ++ (E1000_LEDCTL_MODE_LED_ON << E1000_LEDCTL_LED0_MODE_SHIFT); ++ } else { ++ /* Set the blink bit for each LED that's "on" (0x0E) ++ * (or "off" if inverted) in ledctl_mode2. The blink ++ * logic in hardware only works when mode is set to "on" ++ * so it must be changed accordingly when the mode is ++ * "off" and inverted. ++ */ ++ ledctl_blink = hw->mac.ledctl_mode2; ++ for (i = 0; i < 32; i += 8) { ++ u32 mode = (hw->mac.ledctl_mode2 >> i) & ++ E1000_LEDCTL_LED0_MODE_MASK; ++ u32 led_default = hw->mac.ledctl_default >> i; ++ ++ if ((!(led_default & E1000_LEDCTL_LED0_IVRT) && ++ (mode == E1000_LEDCTL_MODE_LED_ON)) || ++ ((led_default & E1000_LEDCTL_LED0_IVRT) && ++ (mode == E1000_LEDCTL_MODE_LED_OFF))) { ++ ledctl_blink &= ++ ~(E1000_LEDCTL_LED0_MODE_MASK << i); ++ ledctl_blink |= (E1000_LEDCTL_LED0_BLINK | ++ E1000_LEDCTL_MODE_LED_ON) << i; ++ } ++ } ++ } ++ ++ wr32(E1000_LEDCTL, ledctl_blink); ++ ++ return 0; ++} ++ ++/** ++ * igb_led_off - Turn LED off ++ * @hw: pointer to the HW structure ++ * ++ * Turn LED off. ++ **/ ++s32 igb_led_off(struct e1000_hw *hw) ++{ ++ switch (hw->phy.media_type) { ++ case e1000_media_type_copper: ++ wr32(E1000_LEDCTL, hw->mac.ledctl_mode1); ++ break; ++ default: ++ break; ++ } ++ ++ return 0; ++} ++ ++/** ++ * igb_disable_pcie_master - Disables PCI-express master access ++ * @hw: pointer to the HW structure ++ * ++ * Returns 0 (0) if successful, else returns -10 ++ * (-E1000_ERR_MASTER_REQUESTS_PENDING) if master disable bit has not caused ++ * the master requests to be disabled. ++ * ++ * Disables PCI-Express master access and verifies there are no pending ++ * requests. ++ **/ ++s32 igb_disable_pcie_master(struct e1000_hw *hw) ++{ ++ u32 ctrl; ++ s32 timeout = MASTER_DISABLE_TIMEOUT; ++ s32 ret_val = 0; ++ ++ if (hw->bus.type != e1000_bus_type_pci_express) ++ goto out; ++ ++ ctrl = rd32(E1000_CTRL); ++ ctrl |= E1000_CTRL_GIO_MASTER_DISABLE; ++ wr32(E1000_CTRL, ctrl); ++ ++ while (timeout) { ++ if (!(rd32(E1000_STATUS) & ++ E1000_STATUS_GIO_MASTER_ENABLE)) ++ break; ++ udelay(100); ++ timeout--; ++ } ++ ++ if (!timeout) { ++ hw_dbg("Master requests are pending.\n"); ++ ret_val = -E1000_ERR_MASTER_REQUESTS_PENDING; ++ goto out; ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_validate_mdi_setting - Verify MDI/MDIx settings ++ * @hw: pointer to the HW structure ++ * ++ * Verify that when not using auto-negotitation that MDI/MDIx is correctly ++ * set, which is forced to MDI mode only. ++ **/ ++s32 igb_validate_mdi_setting(struct e1000_hw *hw) ++{ ++ s32 ret_val = 0; ++ ++ /* All MDI settings are supported on 82580 and newer. */ ++ if (hw->mac.type >= e1000_82580) ++ goto out; ++ ++ if (!hw->mac.autoneg && (hw->phy.mdix == 0 || hw->phy.mdix == 3)) { ++ hw_dbg("Invalid MDI setting detected\n"); ++ hw->phy.mdix = 1; ++ ret_val = -E1000_ERR_CONFIG; ++ goto out; ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_write_8bit_ctrl_reg - Write a 8bit CTRL register ++ * @hw: pointer to the HW structure ++ * @reg: 32bit register offset such as E1000_SCTL ++ * @offset: register offset to write to ++ * @data: data to write at register offset ++ * ++ * Writes an address/data control type register. There are several of these ++ * and they all have the format address << 8 | data and bit 31 is polled for ++ * completion. ++ **/ ++s32 igb_write_8bit_ctrl_reg(struct e1000_hw *hw, u32 reg, ++ u32 offset, u8 data) ++{ ++ u32 i, regvalue = 0; ++ s32 ret_val = 0; ++ ++ /* Set up the address and data */ ++ regvalue = ((u32)data) | (offset << E1000_GEN_CTL_ADDRESS_SHIFT); ++ wr32(reg, regvalue); ++ ++ /* Poll the ready bit to see if the MDI read completed */ ++ for (i = 0; i < E1000_GEN_POLL_TIMEOUT; i++) { ++ udelay(5); ++ regvalue = rd32(reg); ++ if (regvalue & E1000_GEN_CTL_READY) ++ break; ++ } ++ if (!(regvalue & E1000_GEN_CTL_READY)) { ++ hw_dbg("Reg %08x did not indicate ready\n", reg); ++ ret_val = -E1000_ERR_PHY; ++ goto out; ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_enable_mng_pass_thru - Enable processing of ARP's ++ * @hw: pointer to the HW structure ++ * ++ * Verifies the hardware needs to leave interface enabled so that frames can ++ * be directed to and from the management interface. ++ **/ ++bool igb_enable_mng_pass_thru(struct e1000_hw *hw) ++{ ++ u32 manc; ++ u32 fwsm, factps; ++ bool ret_val = false; ++ ++ if (!hw->mac.asf_firmware_present) ++ goto out; ++ ++ manc = rd32(E1000_MANC); ++ ++ if (!(manc & E1000_MANC_RCV_TCO_EN)) ++ goto out; ++ ++ if (hw->mac.arc_subsystem_valid) { ++ fwsm = rd32(E1000_FWSM); ++ factps = rd32(E1000_FACTPS); ++ ++ if (!(factps & E1000_FACTPS_MNGCG) && ++ ((fwsm & E1000_FWSM_MODE_MASK) == ++ (e1000_mng_mode_pt << E1000_FWSM_MODE_SHIFT))) { ++ ret_val = true; ++ goto out; ++ } ++ } else { ++ if ((manc & E1000_MANC_SMBUS_EN) && ++ !(manc & E1000_MANC_ASF_EN)) { ++ ret_val = true; ++ goto out; ++ } ++ } ++ ++out: ++ return ret_val; ++} +--- linux/drivers/xenomai/net/drivers/igb/e1000_mbx.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/igb/e1000_mbx.c 2021-04-07 16:01:27.462633830 +0800 +@@ -0,0 +1,443 @@ ++/* Intel(R) Gigabit Ethernet Linux driver ++ * Copyright(c) 2007-2014 Intel Corporation. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms and conditions of the GNU General Public License, ++ * version 2, as published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ * ++ * You should have received a copy of the GNU General Public License along with ++ * this program; if not, see . ++ * ++ * The full GNU General Public License is included in this distribution in ++ * the file called "COPYING". ++ * ++ * Contact Information: ++ * e1000-devel Mailing List ++ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 ++ */ ++ ++#include "e1000_mbx.h" ++ ++/** ++ * igb_read_mbx - Reads a message from the mailbox ++ * @hw: pointer to the HW structure ++ * @msg: The message buffer ++ * @size: Length of buffer ++ * @mbx_id: id of mailbox to read ++ * ++ * returns SUCCESS if it successfully read message from buffer ++ **/ ++s32 igb_read_mbx(struct e1000_hw *hw, u32 *msg, u16 size, u16 mbx_id) ++{ ++ struct e1000_mbx_info *mbx = &hw->mbx; ++ s32 ret_val = -E1000_ERR_MBX; ++ ++ /* limit read to size of mailbox */ ++ if (size > mbx->size) ++ size = mbx->size; ++ ++ if (mbx->ops.read) ++ ret_val = mbx->ops.read(hw, msg, size, mbx_id); ++ ++ return ret_val; ++} ++ ++/** ++ * igb_write_mbx - Write a message to the mailbox ++ * @hw: pointer to the HW structure ++ * @msg: The message buffer ++ * @size: Length of buffer ++ * @mbx_id: id of mailbox to write ++ * ++ * returns SUCCESS if it successfully copied message into the buffer ++ **/ ++s32 igb_write_mbx(struct e1000_hw *hw, u32 *msg, u16 size, u16 mbx_id) ++{ ++ struct e1000_mbx_info *mbx = &hw->mbx; ++ s32 ret_val = 0; ++ ++ if (size > mbx->size) ++ ret_val = -E1000_ERR_MBX; ++ ++ else if (mbx->ops.write) ++ ret_val = mbx->ops.write(hw, msg, size, mbx_id); ++ ++ return ret_val; ++} ++ ++/** ++ * igb_check_for_msg - checks to see if someone sent us mail ++ * @hw: pointer to the HW structure ++ * @mbx_id: id of mailbox to check ++ * ++ * returns SUCCESS if the Status bit was found or else ERR_MBX ++ **/ ++s32 igb_check_for_msg(struct e1000_hw *hw, u16 mbx_id) ++{ ++ struct e1000_mbx_info *mbx = &hw->mbx; ++ s32 ret_val = -E1000_ERR_MBX; ++ ++ if (mbx->ops.check_for_msg) ++ ret_val = mbx->ops.check_for_msg(hw, mbx_id); ++ ++ return ret_val; ++} ++ ++/** ++ * igb_check_for_ack - checks to see if someone sent us ACK ++ * @hw: pointer to the HW structure ++ * @mbx_id: id of mailbox to check ++ * ++ * returns SUCCESS if the Status bit was found or else ERR_MBX ++ **/ ++s32 igb_check_for_ack(struct e1000_hw *hw, u16 mbx_id) ++{ ++ struct e1000_mbx_info *mbx = &hw->mbx; ++ s32 ret_val = -E1000_ERR_MBX; ++ ++ if (mbx->ops.check_for_ack) ++ ret_val = mbx->ops.check_for_ack(hw, mbx_id); ++ ++ return ret_val; ++} ++ ++/** ++ * igb_check_for_rst - checks to see if other side has reset ++ * @hw: pointer to the HW structure ++ * @mbx_id: id of mailbox to check ++ * ++ * returns SUCCESS if the Status bit was found or else ERR_MBX ++ **/ ++s32 igb_check_for_rst(struct e1000_hw *hw, u16 mbx_id) ++{ ++ struct e1000_mbx_info *mbx = &hw->mbx; ++ s32 ret_val = -E1000_ERR_MBX; ++ ++ if (mbx->ops.check_for_rst) ++ ret_val = mbx->ops.check_for_rst(hw, mbx_id); ++ ++ return ret_val; ++} ++ ++/** ++ * igb_poll_for_msg - Wait for message notification ++ * @hw: pointer to the HW structure ++ * @mbx_id: id of mailbox to write ++ * ++ * returns SUCCESS if it successfully received a message notification ++ **/ ++static s32 igb_poll_for_msg(struct e1000_hw *hw, u16 mbx_id) ++{ ++ struct e1000_mbx_info *mbx = &hw->mbx; ++ int countdown = mbx->timeout; ++ ++ if (!countdown || !mbx->ops.check_for_msg) ++ goto out; ++ ++ while (countdown && mbx->ops.check_for_msg(hw, mbx_id)) { ++ countdown--; ++ if (!countdown) ++ break; ++ udelay(mbx->usec_delay); ++ } ++ ++ /* if we failed, all future posted messages fail until reset */ ++ if (!countdown) ++ mbx->timeout = 0; ++out: ++ return countdown ? 0 : -E1000_ERR_MBX; ++} ++ ++/** ++ * igb_poll_for_ack - Wait for message acknowledgement ++ * @hw: pointer to the HW structure ++ * @mbx_id: id of mailbox to write ++ * ++ * returns SUCCESS if it successfully received a message acknowledgement ++ **/ ++static s32 igb_poll_for_ack(struct e1000_hw *hw, u16 mbx_id) ++{ ++ struct e1000_mbx_info *mbx = &hw->mbx; ++ int countdown = mbx->timeout; ++ ++ if (!countdown || !mbx->ops.check_for_ack) ++ goto out; ++ ++ while (countdown && mbx->ops.check_for_ack(hw, mbx_id)) { ++ countdown--; ++ if (!countdown) ++ break; ++ udelay(mbx->usec_delay); ++ } ++ ++ /* if we failed, all future posted messages fail until reset */ ++ if (!countdown) ++ mbx->timeout = 0; ++out: ++ return countdown ? 0 : -E1000_ERR_MBX; ++} ++ ++/** ++ * igb_read_posted_mbx - Wait for message notification and receive message ++ * @hw: pointer to the HW structure ++ * @msg: The message buffer ++ * @size: Length of buffer ++ * @mbx_id: id of mailbox to write ++ * ++ * returns SUCCESS if it successfully received a message notification and ++ * copied it into the receive buffer. ++ **/ ++static s32 igb_read_posted_mbx(struct e1000_hw *hw, u32 *msg, u16 size, ++ u16 mbx_id) ++{ ++ struct e1000_mbx_info *mbx = &hw->mbx; ++ s32 ret_val = -E1000_ERR_MBX; ++ ++ if (!mbx->ops.read) ++ goto out; ++ ++ ret_val = igb_poll_for_msg(hw, mbx_id); ++ ++ if (!ret_val) ++ ret_val = mbx->ops.read(hw, msg, size, mbx_id); ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_write_posted_mbx - Write a message to the mailbox, wait for ack ++ * @hw: pointer to the HW structure ++ * @msg: The message buffer ++ * @size: Length of buffer ++ * @mbx_id: id of mailbox to write ++ * ++ * returns SUCCESS if it successfully copied message into the buffer and ++ * received an ack to that message within delay * timeout period ++ **/ ++static s32 igb_write_posted_mbx(struct e1000_hw *hw, u32 *msg, u16 size, ++ u16 mbx_id) ++{ ++ struct e1000_mbx_info *mbx = &hw->mbx; ++ s32 ret_val = -E1000_ERR_MBX; ++ ++ /* exit if either we can't write or there isn't a defined timeout */ ++ if (!mbx->ops.write || !mbx->timeout) ++ goto out; ++ ++ /* send msg */ ++ ret_val = mbx->ops.write(hw, msg, size, mbx_id); ++ ++ /* if msg sent wait until we receive an ack */ ++ if (!ret_val) ++ ret_val = igb_poll_for_ack(hw, mbx_id); ++out: ++ return ret_val; ++} ++ ++static s32 igb_check_for_bit_pf(struct e1000_hw *hw, u32 mask) ++{ ++ u32 mbvficr = rd32(E1000_MBVFICR); ++ s32 ret_val = -E1000_ERR_MBX; ++ ++ if (mbvficr & mask) { ++ ret_val = 0; ++ wr32(E1000_MBVFICR, mask); ++ } ++ ++ return ret_val; ++} ++ ++/** ++ * igb_check_for_msg_pf - checks to see if the VF has sent mail ++ * @hw: pointer to the HW structure ++ * @vf_number: the VF index ++ * ++ * returns SUCCESS if the VF has set the Status bit or else ERR_MBX ++ **/ ++static s32 igb_check_for_msg_pf(struct e1000_hw *hw, u16 vf_number) ++{ ++ s32 ret_val = -E1000_ERR_MBX; ++ ++ if (!igb_check_for_bit_pf(hw, E1000_MBVFICR_VFREQ_VF1 << vf_number)) { ++ ret_val = 0; ++ hw->mbx.stats.reqs++; ++ } ++ ++ return ret_val; ++} ++ ++/** ++ * igb_check_for_ack_pf - checks to see if the VF has ACKed ++ * @hw: pointer to the HW structure ++ * @vf_number: the VF index ++ * ++ * returns SUCCESS if the VF has set the Status bit or else ERR_MBX ++ **/ ++static s32 igb_check_for_ack_pf(struct e1000_hw *hw, u16 vf_number) ++{ ++ s32 ret_val = -E1000_ERR_MBX; ++ ++ if (!igb_check_for_bit_pf(hw, E1000_MBVFICR_VFACK_VF1 << vf_number)) { ++ ret_val = 0; ++ hw->mbx.stats.acks++; ++ } ++ ++ return ret_val; ++} ++ ++/** ++ * igb_check_for_rst_pf - checks to see if the VF has reset ++ * @hw: pointer to the HW structure ++ * @vf_number: the VF index ++ * ++ * returns SUCCESS if the VF has set the Status bit or else ERR_MBX ++ **/ ++static s32 igb_check_for_rst_pf(struct e1000_hw *hw, u16 vf_number) ++{ ++ u32 vflre = rd32(E1000_VFLRE); ++ s32 ret_val = -E1000_ERR_MBX; ++ ++ if (vflre & (1 << vf_number)) { ++ ret_val = 0; ++ wr32(E1000_VFLRE, (1 << vf_number)); ++ hw->mbx.stats.rsts++; ++ } ++ ++ return ret_val; ++} ++ ++/** ++ * igb_obtain_mbx_lock_pf - obtain mailbox lock ++ * @hw: pointer to the HW structure ++ * @vf_number: the VF index ++ * ++ * return SUCCESS if we obtained the mailbox lock ++ **/ ++static s32 igb_obtain_mbx_lock_pf(struct e1000_hw *hw, u16 vf_number) ++{ ++ s32 ret_val = -E1000_ERR_MBX; ++ u32 p2v_mailbox; ++ ++ /* Take ownership of the buffer */ ++ wr32(E1000_P2VMAILBOX(vf_number), E1000_P2VMAILBOX_PFU); ++ ++ /* reserve mailbox for vf use */ ++ p2v_mailbox = rd32(E1000_P2VMAILBOX(vf_number)); ++ if (p2v_mailbox & E1000_P2VMAILBOX_PFU) ++ ret_val = 0; ++ ++ return ret_val; ++} ++ ++/** ++ * igb_write_mbx_pf - Places a message in the mailbox ++ * @hw: pointer to the HW structure ++ * @msg: The message buffer ++ * @size: Length of buffer ++ * @vf_number: the VF index ++ * ++ * returns SUCCESS if it successfully copied message into the buffer ++ **/ ++static s32 igb_write_mbx_pf(struct e1000_hw *hw, u32 *msg, u16 size, ++ u16 vf_number) ++{ ++ s32 ret_val; ++ u16 i; ++ ++ /* lock the mailbox to prevent pf/vf race condition */ ++ ret_val = igb_obtain_mbx_lock_pf(hw, vf_number); ++ if (ret_val) ++ goto out_no_write; ++ ++ /* flush msg and acks as we are overwriting the message buffer */ ++ igb_check_for_msg_pf(hw, vf_number); ++ igb_check_for_ack_pf(hw, vf_number); ++ ++ /* copy the caller specified message to the mailbox memory buffer */ ++ for (i = 0; i < size; i++) ++ array_wr32(E1000_VMBMEM(vf_number), i, msg[i]); ++ ++ /* Interrupt VF to tell it a message has been sent and release buffer*/ ++ wr32(E1000_P2VMAILBOX(vf_number), E1000_P2VMAILBOX_STS); ++ ++ /* update stats */ ++ hw->mbx.stats.msgs_tx++; ++ ++out_no_write: ++ return ret_val; ++ ++} ++ ++/** ++ * igb_read_mbx_pf - Read a message from the mailbox ++ * @hw: pointer to the HW structure ++ * @msg: The message buffer ++ * @size: Length of buffer ++ * @vf_number: the VF index ++ * ++ * This function copies a message from the mailbox buffer to the caller's ++ * memory buffer. The presumption is that the caller knows that there was ++ * a message due to a VF request so no polling for message is needed. ++ **/ ++static s32 igb_read_mbx_pf(struct e1000_hw *hw, u32 *msg, u16 size, ++ u16 vf_number) ++{ ++ s32 ret_val; ++ u16 i; ++ ++ /* lock the mailbox to prevent pf/vf race condition */ ++ ret_val = igb_obtain_mbx_lock_pf(hw, vf_number); ++ if (ret_val) ++ goto out_no_read; ++ ++ /* copy the message to the mailbox memory buffer */ ++ for (i = 0; i < size; i++) ++ msg[i] = array_rd32(E1000_VMBMEM(vf_number), i); ++ ++ /* Acknowledge the message and release buffer */ ++ wr32(E1000_P2VMAILBOX(vf_number), E1000_P2VMAILBOX_ACK); ++ ++ /* update stats */ ++ hw->mbx.stats.msgs_rx++; ++ ++out_no_read: ++ return ret_val; ++} ++ ++/** ++ * e1000_init_mbx_params_pf - set initial values for pf mailbox ++ * @hw: pointer to the HW structure ++ * ++ * Initializes the hw->mbx struct to correct values for pf mailbox ++ */ ++s32 igb_init_mbx_params_pf(struct e1000_hw *hw) ++{ ++ struct e1000_mbx_info *mbx = &hw->mbx; ++ ++ mbx->timeout = 0; ++ mbx->usec_delay = 0; ++ ++ mbx->size = E1000_VFMAILBOX_SIZE; ++ ++ mbx->ops.read = igb_read_mbx_pf; ++ mbx->ops.write = igb_write_mbx_pf; ++ mbx->ops.read_posted = igb_read_posted_mbx; ++ mbx->ops.write_posted = igb_write_posted_mbx; ++ mbx->ops.check_for_msg = igb_check_for_msg_pf; ++ mbx->ops.check_for_ack = igb_check_for_ack_pf; ++ mbx->ops.check_for_rst = igb_check_for_rst_pf; ++ ++ mbx->stats.msgs_tx = 0; ++ mbx->stats.msgs_rx = 0; ++ mbx->stats.reqs = 0; ++ mbx->stats.acks = 0; ++ mbx->stats.rsts = 0; ++ ++ return 0; ++} ++ +--- linux/drivers/xenomai/net/drivers/igb/e1000_i210.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/igb/e1000_i210.c 2021-04-07 16:01:27.457633838 +0800 +@@ -0,0 +1,902 @@ ++/* Intel(R) Gigabit Ethernet Linux driver ++ * Copyright(c) 2007-2014 Intel Corporation. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms and conditions of the GNU General Public License, ++ * version 2, as published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ * ++ * You should have received a copy of the GNU General Public License along with ++ * this program; if not, see . ++ * ++ * The full GNU General Public License is included in this distribution in ++ * the file called "COPYING". ++ * ++ * Contact Information: ++ * e1000-devel Mailing List ++ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 ++ */ ++ ++/* e1000_i210 ++ * e1000_i211 ++ */ ++ ++#include ++#include ++ ++#include "e1000_hw.h" ++#include "e1000_i210.h" ++ ++static s32 igb_update_flash_i210(struct e1000_hw *hw); ++ ++/** ++ * igb_get_hw_semaphore_i210 - Acquire hardware semaphore ++ * @hw: pointer to the HW structure ++ * ++ * Acquire the HW semaphore to access the PHY or NVM ++ */ ++static s32 igb_get_hw_semaphore_i210(struct e1000_hw *hw) ++{ ++ u32 swsm; ++ s32 timeout = hw->nvm.word_size + 1; ++ s32 i = 0; ++ ++ /* Get the SW semaphore */ ++ while (i < timeout) { ++ swsm = rd32(E1000_SWSM); ++ if (!(swsm & E1000_SWSM_SMBI)) ++ break; ++ ++ udelay(50); ++ i++; ++ } ++ ++ if (i == timeout) { ++ /* In rare circumstances, the SW semaphore may already be held ++ * unintentionally. Clear the semaphore once before giving up. ++ */ ++ if (hw->dev_spec._82575.clear_semaphore_once) { ++ hw->dev_spec._82575.clear_semaphore_once = false; ++ igb_put_hw_semaphore(hw); ++ for (i = 0; i < timeout; i++) { ++ swsm = rd32(E1000_SWSM); ++ if (!(swsm & E1000_SWSM_SMBI)) ++ break; ++ ++ udelay(50); ++ } ++ } ++ ++ /* If we do not have the semaphore here, we have to give up. */ ++ if (i == timeout) { ++ hw_dbg("Driver can't access device - SMBI bit is set.\n"); ++ return -E1000_ERR_NVM; ++ } ++ } ++ ++ /* Get the FW semaphore. */ ++ for (i = 0; i < timeout; i++) { ++ swsm = rd32(E1000_SWSM); ++ wr32(E1000_SWSM, swsm | E1000_SWSM_SWESMBI); ++ ++ /* Semaphore acquired if bit latched */ ++ if (rd32(E1000_SWSM) & E1000_SWSM_SWESMBI) ++ break; ++ ++ udelay(50); ++ } ++ ++ if (i == timeout) { ++ /* Release semaphores */ ++ igb_put_hw_semaphore(hw); ++ hw_dbg("Driver can't access the NVM\n"); ++ return -E1000_ERR_NVM; ++ } ++ ++ return 0; ++} ++ ++/** ++ * igb_acquire_nvm_i210 - Request for access to EEPROM ++ * @hw: pointer to the HW structure ++ * ++ * Acquire the necessary semaphores for exclusive access to the EEPROM. ++ * Set the EEPROM access request bit and wait for EEPROM access grant bit. ++ * Return successful if access grant bit set, else clear the request for ++ * EEPROM access and return -E1000_ERR_NVM (-1). ++ **/ ++static s32 igb_acquire_nvm_i210(struct e1000_hw *hw) ++{ ++ return igb_acquire_swfw_sync_i210(hw, E1000_SWFW_EEP_SM); ++} ++ ++/** ++ * igb_release_nvm_i210 - Release exclusive access to EEPROM ++ * @hw: pointer to the HW structure ++ * ++ * Stop any current commands to the EEPROM and clear the EEPROM request bit, ++ * then release the semaphores acquired. ++ **/ ++static void igb_release_nvm_i210(struct e1000_hw *hw) ++{ ++ igb_release_swfw_sync_i210(hw, E1000_SWFW_EEP_SM); ++} ++ ++/** ++ * igb_acquire_swfw_sync_i210 - Acquire SW/FW semaphore ++ * @hw: pointer to the HW structure ++ * @mask: specifies which semaphore to acquire ++ * ++ * Acquire the SW/FW semaphore to access the PHY or NVM. The mask ++ * will also specify which port we're acquiring the lock for. ++ **/ ++s32 igb_acquire_swfw_sync_i210(struct e1000_hw *hw, u16 mask) ++{ ++ u32 swfw_sync; ++ u32 swmask = mask; ++ u32 fwmask = mask << 16; ++ s32 ret_val = 0; ++ s32 i = 0, timeout = 200; /* FIXME: find real value to use here */ ++ ++ while (i < timeout) { ++ if (igb_get_hw_semaphore_i210(hw)) { ++ ret_val = -E1000_ERR_SWFW_SYNC; ++ goto out; ++ } ++ ++ swfw_sync = rd32(E1000_SW_FW_SYNC); ++ if (!(swfw_sync & (fwmask | swmask))) ++ break; ++ ++ /* Firmware currently using resource (fwmask) */ ++ igb_put_hw_semaphore(hw); ++ mdelay(5); ++ i++; ++ } ++ ++ if (i == timeout) { ++ hw_dbg("Driver can't access resource, SW_FW_SYNC timeout.\n"); ++ ret_val = -E1000_ERR_SWFW_SYNC; ++ goto out; ++ } ++ ++ swfw_sync |= swmask; ++ wr32(E1000_SW_FW_SYNC, swfw_sync); ++ ++ igb_put_hw_semaphore(hw); ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_release_swfw_sync_i210 - Release SW/FW semaphore ++ * @hw: pointer to the HW structure ++ * @mask: specifies which semaphore to acquire ++ * ++ * Release the SW/FW semaphore used to access the PHY or NVM. The mask ++ * will also specify which port we're releasing the lock for. ++ **/ ++void igb_release_swfw_sync_i210(struct e1000_hw *hw, u16 mask) ++{ ++ u32 swfw_sync; ++ ++ while (igb_get_hw_semaphore_i210(hw)) ++ ; /* Empty */ ++ ++ swfw_sync = rd32(E1000_SW_FW_SYNC); ++ swfw_sync &= ~mask; ++ wr32(E1000_SW_FW_SYNC, swfw_sync); ++ ++ igb_put_hw_semaphore(hw); ++} ++ ++/** ++ * igb_read_nvm_srrd_i210 - Reads Shadow Ram using EERD register ++ * @hw: pointer to the HW structure ++ * @offset: offset of word in the Shadow Ram to read ++ * @words: number of words to read ++ * @data: word read from the Shadow Ram ++ * ++ * Reads a 16 bit word from the Shadow Ram using the EERD register. ++ * Uses necessary synchronization semaphores. ++ **/ ++static s32 igb_read_nvm_srrd_i210(struct e1000_hw *hw, u16 offset, u16 words, ++ u16 *data) ++{ ++ s32 status = 0; ++ u16 i, count; ++ ++ /* We cannot hold synchronization semaphores for too long, ++ * because of forceful takeover procedure. However it is more efficient ++ * to read in bursts than synchronizing access for each word. ++ */ ++ for (i = 0; i < words; i += E1000_EERD_EEWR_MAX_COUNT) { ++ count = (words - i) / E1000_EERD_EEWR_MAX_COUNT > 0 ? ++ E1000_EERD_EEWR_MAX_COUNT : (words - i); ++ if (!(hw->nvm.ops.acquire(hw))) { ++ status = igb_read_nvm_eerd(hw, offset, count, ++ data + i); ++ hw->nvm.ops.release(hw); ++ } else { ++ status = E1000_ERR_SWFW_SYNC; ++ } ++ ++ if (status) ++ break; ++ } ++ ++ return status; ++} ++ ++/** ++ * igb_write_nvm_srwr - Write to Shadow Ram using EEWR ++ * @hw: pointer to the HW structure ++ * @offset: offset within the Shadow Ram to be written to ++ * @words: number of words to write ++ * @data: 16 bit word(s) to be written to the Shadow Ram ++ * ++ * Writes data to Shadow Ram at offset using EEWR register. ++ * ++ * If igb_update_nvm_checksum is not called after this function , the ++ * Shadow Ram will most likely contain an invalid checksum. ++ **/ ++static s32 igb_write_nvm_srwr(struct e1000_hw *hw, u16 offset, u16 words, ++ u16 *data) ++{ ++ struct e1000_nvm_info *nvm = &hw->nvm; ++ u32 i, k, eewr = 0; ++ u32 attempts = 100000; ++ s32 ret_val = 0; ++ ++ /* A check for invalid values: offset too large, too many words, ++ * too many words for the offset, and not enough words. ++ */ ++ if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) || ++ (words == 0)) { ++ hw_dbg("nvm parameter(s) out of bounds\n"); ++ ret_val = -E1000_ERR_NVM; ++ goto out; ++ } ++ ++ for (i = 0; i < words; i++) { ++ eewr = ((offset+i) << E1000_NVM_RW_ADDR_SHIFT) | ++ (data[i] << E1000_NVM_RW_REG_DATA) | ++ E1000_NVM_RW_REG_START; ++ ++ wr32(E1000_SRWR, eewr); ++ ++ for (k = 0; k < attempts; k++) { ++ if (E1000_NVM_RW_REG_DONE & ++ rd32(E1000_SRWR)) { ++ ret_val = 0; ++ break; ++ } ++ udelay(5); ++ } ++ ++ if (ret_val) { ++ hw_dbg("Shadow RAM write EEWR timed out\n"); ++ break; ++ } ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_write_nvm_srwr_i210 - Write to Shadow RAM using EEWR ++ * @hw: pointer to the HW structure ++ * @offset: offset within the Shadow RAM to be written to ++ * @words: number of words to write ++ * @data: 16 bit word(s) to be written to the Shadow RAM ++ * ++ * Writes data to Shadow RAM at offset using EEWR register. ++ * ++ * If e1000_update_nvm_checksum is not called after this function , the ++ * data will not be committed to FLASH and also Shadow RAM will most likely ++ * contain an invalid checksum. ++ * ++ * If error code is returned, data and Shadow RAM may be inconsistent - buffer ++ * partially written. ++ **/ ++static s32 igb_write_nvm_srwr_i210(struct e1000_hw *hw, u16 offset, u16 words, ++ u16 *data) ++{ ++ s32 status = 0; ++ u16 i, count; ++ ++ /* We cannot hold synchronization semaphores for too long, ++ * because of forceful takeover procedure. However it is more efficient ++ * to write in bursts than synchronizing access for each word. ++ */ ++ for (i = 0; i < words; i += E1000_EERD_EEWR_MAX_COUNT) { ++ count = (words - i) / E1000_EERD_EEWR_MAX_COUNT > 0 ? ++ E1000_EERD_EEWR_MAX_COUNT : (words - i); ++ if (!(hw->nvm.ops.acquire(hw))) { ++ status = igb_write_nvm_srwr(hw, offset, count, ++ data + i); ++ hw->nvm.ops.release(hw); ++ } else { ++ status = E1000_ERR_SWFW_SYNC; ++ } ++ ++ if (status) ++ break; ++ } ++ ++ return status; ++} ++ ++/** ++ * igb_read_invm_word_i210 - Reads OTP ++ * @hw: pointer to the HW structure ++ * @address: the word address (aka eeprom offset) to read ++ * @data: pointer to the data read ++ * ++ * Reads 16-bit words from the OTP. Return error when the word is not ++ * stored in OTP. ++ **/ ++static s32 igb_read_invm_word_i210(struct e1000_hw *hw, u8 address, u16 *data) ++{ ++ s32 status = -E1000_ERR_INVM_VALUE_NOT_FOUND; ++ u32 invm_dword; ++ u16 i; ++ u8 record_type, word_address; ++ ++ for (i = 0; i < E1000_INVM_SIZE; i++) { ++ invm_dword = rd32(E1000_INVM_DATA_REG(i)); ++ /* Get record type */ ++ record_type = INVM_DWORD_TO_RECORD_TYPE(invm_dword); ++ if (record_type == E1000_INVM_UNINITIALIZED_STRUCTURE) ++ break; ++ if (record_type == E1000_INVM_CSR_AUTOLOAD_STRUCTURE) ++ i += E1000_INVM_CSR_AUTOLOAD_DATA_SIZE_IN_DWORDS; ++ if (record_type == E1000_INVM_RSA_KEY_SHA256_STRUCTURE) ++ i += E1000_INVM_RSA_KEY_SHA256_DATA_SIZE_IN_DWORDS; ++ if (record_type == E1000_INVM_WORD_AUTOLOAD_STRUCTURE) { ++ word_address = INVM_DWORD_TO_WORD_ADDRESS(invm_dword); ++ if (word_address == address) { ++ *data = INVM_DWORD_TO_WORD_DATA(invm_dword); ++ hw_dbg("Read INVM Word 0x%02x = %x\n", ++ address, *data); ++ status = 0; ++ break; ++ } ++ } ++ } ++ if (status) ++ hw_dbg("Requested word 0x%02x not found in OTP\n", address); ++ return status; ++} ++ ++/** ++ * igb_read_invm_i210 - Read invm wrapper function for I210/I211 ++ * @hw: pointer to the HW structure ++ * @words: number of words to read ++ * @data: pointer to the data read ++ * ++ * Wrapper function to return data formerly found in the NVM. ++ **/ ++static s32 igb_read_invm_i210(struct e1000_hw *hw, u16 offset, ++ u16 words __always_unused, u16 *data) ++{ ++ s32 ret_val = 0; ++ ++ /* Only the MAC addr is required to be present in the iNVM */ ++ switch (offset) { ++ case NVM_MAC_ADDR: ++ ret_val = igb_read_invm_word_i210(hw, (u8)offset, &data[0]); ++ ret_val |= igb_read_invm_word_i210(hw, (u8)offset+1, ++ &data[1]); ++ ret_val |= igb_read_invm_word_i210(hw, (u8)offset+2, ++ &data[2]); ++ if (ret_val) ++ hw_dbg("MAC Addr not found in iNVM\n"); ++ break; ++ case NVM_INIT_CTRL_2: ++ ret_val = igb_read_invm_word_i210(hw, (u8)offset, data); ++ if (ret_val) { ++ *data = NVM_INIT_CTRL_2_DEFAULT_I211; ++ ret_val = 0; ++ } ++ break; ++ case NVM_INIT_CTRL_4: ++ ret_val = igb_read_invm_word_i210(hw, (u8)offset, data); ++ if (ret_val) { ++ *data = NVM_INIT_CTRL_4_DEFAULT_I211; ++ ret_val = 0; ++ } ++ break; ++ case NVM_LED_1_CFG: ++ ret_val = igb_read_invm_word_i210(hw, (u8)offset, data); ++ if (ret_val) { ++ *data = NVM_LED_1_CFG_DEFAULT_I211; ++ ret_val = 0; ++ } ++ break; ++ case NVM_LED_0_2_CFG: ++ ret_val = igb_read_invm_word_i210(hw, (u8)offset, data); ++ if (ret_val) { ++ *data = NVM_LED_0_2_CFG_DEFAULT_I211; ++ ret_val = 0; ++ } ++ break; ++ case NVM_ID_LED_SETTINGS: ++ ret_val = igb_read_invm_word_i210(hw, (u8)offset, data); ++ if (ret_val) { ++ *data = ID_LED_RESERVED_FFFF; ++ ret_val = 0; ++ } ++ break; ++ case NVM_SUB_DEV_ID: ++ *data = hw->subsystem_device_id; ++ break; ++ case NVM_SUB_VEN_ID: ++ *data = hw->subsystem_vendor_id; ++ break; ++ case NVM_DEV_ID: ++ *data = hw->device_id; ++ break; ++ case NVM_VEN_ID: ++ *data = hw->vendor_id; ++ break; ++ default: ++ hw_dbg("NVM word 0x%02x is not mapped.\n", offset); ++ *data = NVM_RESERVED_WORD; ++ break; ++ } ++ return ret_val; ++} ++ ++/** ++ * igb_read_invm_version - Reads iNVM version and image type ++ * @hw: pointer to the HW structure ++ * @invm_ver: version structure for the version read ++ * ++ * Reads iNVM version and image type. ++ **/ ++s32 igb_read_invm_version(struct e1000_hw *hw, ++ struct e1000_fw_version *invm_ver) { ++ u32 *record = NULL; ++ u32 *next_record = NULL; ++ u32 i = 0; ++ u32 invm_dword = 0; ++ u32 invm_blocks = E1000_INVM_SIZE - (E1000_INVM_ULT_BYTES_SIZE / ++ E1000_INVM_RECORD_SIZE_IN_BYTES); ++ u32 buffer[E1000_INVM_SIZE]; ++ s32 status = -E1000_ERR_INVM_VALUE_NOT_FOUND; ++ u16 version = 0; ++ ++ /* Read iNVM memory */ ++ for (i = 0; i < E1000_INVM_SIZE; i++) { ++ invm_dword = rd32(E1000_INVM_DATA_REG(i)); ++ buffer[i] = invm_dword; ++ } ++ ++ /* Read version number */ ++ for (i = 1; i < invm_blocks; i++) { ++ record = &buffer[invm_blocks - i]; ++ next_record = &buffer[invm_blocks - i + 1]; ++ ++ /* Check if we have first version location used */ ++ if ((i == 1) && ((*record & E1000_INVM_VER_FIELD_ONE) == 0)) { ++ version = 0; ++ status = 0; ++ break; ++ } ++ /* Check if we have second version location used */ ++ else if ((i == 1) && ++ ((*record & E1000_INVM_VER_FIELD_TWO) == 0)) { ++ version = (*record & E1000_INVM_VER_FIELD_ONE) >> 3; ++ status = 0; ++ break; ++ } ++ /* Check if we have odd version location ++ * used and it is the last one used ++ */ ++ else if ((((*record & E1000_INVM_VER_FIELD_ONE) == 0) && ++ ((*record & 0x3) == 0)) || (((*record & 0x3) != 0) && ++ (i != 1))) { ++ version = (*next_record & E1000_INVM_VER_FIELD_TWO) ++ >> 13; ++ status = 0; ++ break; ++ } ++ /* Check if we have even version location ++ * used and it is the last one used ++ */ ++ else if (((*record & E1000_INVM_VER_FIELD_TWO) == 0) && ++ ((*record & 0x3) == 0)) { ++ version = (*record & E1000_INVM_VER_FIELD_ONE) >> 3; ++ status = 0; ++ break; ++ } ++ } ++ ++ if (!status) { ++ invm_ver->invm_major = (version & E1000_INVM_MAJOR_MASK) ++ >> E1000_INVM_MAJOR_SHIFT; ++ invm_ver->invm_minor = version & E1000_INVM_MINOR_MASK; ++ } ++ /* Read Image Type */ ++ for (i = 1; i < invm_blocks; i++) { ++ record = &buffer[invm_blocks - i]; ++ next_record = &buffer[invm_blocks - i + 1]; ++ ++ /* Check if we have image type in first location used */ ++ if ((i == 1) && ((*record & E1000_INVM_IMGTYPE_FIELD) == 0)) { ++ invm_ver->invm_img_type = 0; ++ status = 0; ++ break; ++ } ++ /* Check if we have image type in first location used */ ++ else if ((((*record & 0x3) == 0) && ++ ((*record & E1000_INVM_IMGTYPE_FIELD) == 0)) || ++ ((((*record & 0x3) != 0) && (i != 1)))) { ++ invm_ver->invm_img_type = ++ (*next_record & E1000_INVM_IMGTYPE_FIELD) >> 23; ++ status = 0; ++ break; ++ } ++ } ++ return status; ++} ++ ++/** ++ * igb_validate_nvm_checksum_i210 - Validate EEPROM checksum ++ * @hw: pointer to the HW structure ++ * ++ * Calculates the EEPROM checksum by reading/adding each word of the EEPROM ++ * and then verifies that the sum of the EEPROM is equal to 0xBABA. ++ **/ ++static s32 igb_validate_nvm_checksum_i210(struct e1000_hw *hw) ++{ ++ s32 status = 0; ++ s32 (*read_op_ptr)(struct e1000_hw *, u16, u16, u16 *); ++ ++ if (!(hw->nvm.ops.acquire(hw))) { ++ ++ /* Replace the read function with semaphore grabbing with ++ * the one that skips this for a while. ++ * We have semaphore taken already here. ++ */ ++ read_op_ptr = hw->nvm.ops.read; ++ hw->nvm.ops.read = igb_read_nvm_eerd; ++ ++ status = igb_validate_nvm_checksum(hw); ++ ++ /* Revert original read operation. */ ++ hw->nvm.ops.read = read_op_ptr; ++ ++ hw->nvm.ops.release(hw); ++ } else { ++ status = E1000_ERR_SWFW_SYNC; ++ } ++ ++ return status; ++} ++ ++/** ++ * igb_update_nvm_checksum_i210 - Update EEPROM checksum ++ * @hw: pointer to the HW structure ++ * ++ * Updates the EEPROM checksum by reading/adding each word of the EEPROM ++ * up to the checksum. Then calculates the EEPROM checksum and writes the ++ * value to the EEPROM. Next commit EEPROM data onto the Flash. ++ **/ ++static s32 igb_update_nvm_checksum_i210(struct e1000_hw *hw) ++{ ++ s32 ret_val = 0; ++ u16 checksum = 0; ++ u16 i, nvm_data; ++ ++ /* Read the first word from the EEPROM. If this times out or fails, do ++ * not continue or we could be in for a very long wait while every ++ * EEPROM read fails ++ */ ++ ret_val = igb_read_nvm_eerd(hw, 0, 1, &nvm_data); ++ if (ret_val) { ++ hw_dbg("EEPROM read failed\n"); ++ goto out; ++ } ++ ++ if (!(hw->nvm.ops.acquire(hw))) { ++ /* Do not use hw->nvm.ops.write, hw->nvm.ops.read ++ * because we do not want to take the synchronization ++ * semaphores twice here. ++ */ ++ ++ for (i = 0; i < NVM_CHECKSUM_REG; i++) { ++ ret_val = igb_read_nvm_eerd(hw, i, 1, &nvm_data); ++ if (ret_val) { ++ hw->nvm.ops.release(hw); ++ hw_dbg("NVM Read Error while updating checksum.\n"); ++ goto out; ++ } ++ checksum += nvm_data; ++ } ++ checksum = (u16) NVM_SUM - checksum; ++ ret_val = igb_write_nvm_srwr(hw, NVM_CHECKSUM_REG, 1, ++ &checksum); ++ if (ret_val) { ++ hw->nvm.ops.release(hw); ++ hw_dbg("NVM Write Error while updating checksum.\n"); ++ goto out; ++ } ++ ++ hw->nvm.ops.release(hw); ++ ++ ret_val = igb_update_flash_i210(hw); ++ } else { ++ ret_val = -E1000_ERR_SWFW_SYNC; ++ } ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_pool_flash_update_done_i210 - Pool FLUDONE status. ++ * @hw: pointer to the HW structure ++ * ++ **/ ++static s32 igb_pool_flash_update_done_i210(struct e1000_hw *hw) ++{ ++ s32 ret_val = -E1000_ERR_NVM; ++ u32 i, reg; ++ ++ for (i = 0; i < E1000_FLUDONE_ATTEMPTS; i++) { ++ reg = rd32(E1000_EECD); ++ if (reg & E1000_EECD_FLUDONE_I210) { ++ ret_val = 0; ++ break; ++ } ++ udelay(5); ++ } ++ ++ return ret_val; ++} ++ ++/** ++ * igb_get_flash_presence_i210 - Check if flash device is detected. ++ * @hw: pointer to the HW structure ++ * ++ **/ ++bool igb_get_flash_presence_i210(struct e1000_hw *hw) ++{ ++ u32 eec = 0; ++ bool ret_val = false; ++ ++ eec = rd32(E1000_EECD); ++ if (eec & E1000_EECD_FLASH_DETECTED_I210) ++ ret_val = true; ++ ++ return ret_val; ++} ++ ++/** ++ * igb_update_flash_i210 - Commit EEPROM to the flash ++ * @hw: pointer to the HW structure ++ * ++ **/ ++static s32 igb_update_flash_i210(struct e1000_hw *hw) ++{ ++ s32 ret_val = 0; ++ u32 flup; ++ ++ ret_val = igb_pool_flash_update_done_i210(hw); ++ if (ret_val == -E1000_ERR_NVM) { ++ hw_dbg("Flash update time out\n"); ++ goto out; ++ } ++ ++ flup = rd32(E1000_EECD) | E1000_EECD_FLUPD_I210; ++ wr32(E1000_EECD, flup); ++ ++ ret_val = igb_pool_flash_update_done_i210(hw); ++ if (ret_val) ++ hw_dbg("Flash update complete\n"); ++ else ++ hw_dbg("Flash update time out\n"); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * igb_valid_led_default_i210 - Verify a valid default LED config ++ * @hw: pointer to the HW structure ++ * @data: pointer to the NVM (EEPROM) ++ * ++ * Read the EEPROM for the current default LED configuration. If the ++ * LED configuration is not valid, set to a valid LED configuration. ++ **/ ++s32 igb_valid_led_default_i210(struct e1000_hw *hw, u16 *data) ++{ ++ s32 ret_val; ++ ++ ret_val = hw->nvm.ops.read(hw, NVM_ID_LED_SETTINGS, 1, data); ++ if (ret_val) { ++ hw_dbg("NVM Read Error\n"); ++ goto out; ++ } ++ ++ if (*data == ID_LED_RESERVED_0000 || *data == ID_LED_RESERVED_FFFF) { ++ switch (hw->phy.media_type) { ++ case e1000_media_type_internal_serdes: ++ *data = ID_LED_DEFAULT_I210_SERDES; ++ break; ++ case e1000_media_type_copper: ++ default: ++ *data = ID_LED_DEFAULT_I210; ++ break; ++ } ++ } ++out: ++ return ret_val; ++} ++ ++/** ++ * __igb_access_xmdio_reg - Read/write XMDIO register ++ * @hw: pointer to the HW structure ++ * @address: XMDIO address to program ++ * @dev_addr: device address to program ++ * @data: pointer to value to read/write from/to the XMDIO address ++ * @read: boolean flag to indicate read or write ++ **/ ++static s32 __igb_access_xmdio_reg(struct e1000_hw *hw, u16 address, ++ u8 dev_addr, u16 *data, bool read) ++{ ++ s32 ret_val = 0; ++ ++ ret_val = hw->phy.ops.write_reg(hw, E1000_MMDAC, dev_addr); ++ if (ret_val) ++ return ret_val; ++ ++ ret_val = hw->phy.ops.write_reg(hw, E1000_MMDAAD, address); ++ if (ret_val) ++ return ret_val; ++ ++ ret_val = hw->phy.ops.write_reg(hw, E1000_MMDAC, E1000_MMDAC_FUNC_DATA | ++ dev_addr); ++ if (ret_val) ++ return ret_val; ++ ++ if (read) ++ ret_val = hw->phy.ops.read_reg(hw, E1000_MMDAAD, data); ++ else ++ ret_val = hw->phy.ops.write_reg(hw, E1000_MMDAAD, *data); ++ if (ret_val) ++ return ret_val; ++ ++ /* Recalibrate the device back to 0 */ ++ ret_val = hw->phy.ops.write_reg(hw, E1000_MMDAC, 0); ++ if (ret_val) ++ return ret_val; ++ ++ return ret_val; ++} ++ ++/** ++ * igb_read_xmdio_reg - Read XMDIO register ++ * @hw: pointer to the HW structure ++ * @addr: XMDIO address to program ++ * @dev_addr: device address to program ++ * @data: value to be read from the EMI address ++ **/ ++s32 igb_read_xmdio_reg(struct e1000_hw *hw, u16 addr, u8 dev_addr, u16 *data) ++{ ++ return __igb_access_xmdio_reg(hw, addr, dev_addr, data, true); ++} ++ ++/** ++ * igb_write_xmdio_reg - Write XMDIO register ++ * @hw: pointer to the HW structure ++ * @addr: XMDIO address to program ++ * @dev_addr: device address to program ++ * @data: value to be written to the XMDIO address ++ **/ ++s32 igb_write_xmdio_reg(struct e1000_hw *hw, u16 addr, u8 dev_addr, u16 data) ++{ ++ return __igb_access_xmdio_reg(hw, addr, dev_addr, &data, false); ++} ++ ++/** ++ * igb_init_nvm_params_i210 - Init NVM func ptrs. ++ * @hw: pointer to the HW structure ++ **/ ++s32 igb_init_nvm_params_i210(struct e1000_hw *hw) ++{ ++ s32 ret_val = 0; ++ struct e1000_nvm_info *nvm = &hw->nvm; ++ ++ nvm->ops.acquire = igb_acquire_nvm_i210; ++ nvm->ops.release = igb_release_nvm_i210; ++ nvm->ops.valid_led_default = igb_valid_led_default_i210; ++ ++ /* NVM Function Pointers */ ++ if (igb_get_flash_presence_i210(hw)) { ++ hw->nvm.type = e1000_nvm_flash_hw; ++ nvm->ops.read = igb_read_nvm_srrd_i210; ++ nvm->ops.write = igb_write_nvm_srwr_i210; ++ nvm->ops.validate = igb_validate_nvm_checksum_i210; ++ nvm->ops.update = igb_update_nvm_checksum_i210; ++ } else { ++ hw->nvm.type = e1000_nvm_invm; ++ nvm->ops.read = igb_read_invm_i210; ++ nvm->ops.write = NULL; ++ nvm->ops.validate = NULL; ++ nvm->ops.update = NULL; ++ } ++ return ret_val; ++} ++ ++/** ++ * igb_pll_workaround_i210 ++ * @hw: pointer to the HW structure ++ * ++ * Works around an errata in the PLL circuit where it occasionally ++ * provides the wrong clock frequency after power up. ++ **/ ++s32 igb_pll_workaround_i210(struct e1000_hw *hw) ++{ ++ s32 ret_val; ++ u32 wuc, mdicnfg, ctrl, ctrl_ext, reg_val; ++ u16 nvm_word, phy_word, pci_word, tmp_nvm; ++ int i; ++ ++ /* Get and set needed register values */ ++ wuc = rd32(E1000_WUC); ++ mdicnfg = rd32(E1000_MDICNFG); ++ reg_val = mdicnfg & ~E1000_MDICNFG_EXT_MDIO; ++ wr32(E1000_MDICNFG, reg_val); ++ ++ /* Get data from NVM, or set default */ ++ ret_val = igb_read_invm_word_i210(hw, E1000_INVM_AUTOLOAD, ++ &nvm_word); ++ if (ret_val) ++ nvm_word = E1000_INVM_DEFAULT_AL; ++ tmp_nvm = nvm_word | E1000_INVM_PLL_WO_VAL; ++ for (i = 0; i < E1000_MAX_PLL_TRIES; i++) { ++ /* check current state directly from internal PHY */ ++ igb_read_phy_reg_gs40g(hw, (E1000_PHY_PLL_FREQ_PAGE | ++ E1000_PHY_PLL_FREQ_REG), &phy_word); ++ if ((phy_word & E1000_PHY_PLL_UNCONF) ++ != E1000_PHY_PLL_UNCONF) { ++ ret_val = 0; ++ break; ++ } else { ++ ret_val = -E1000_ERR_PHY; ++ } ++ /* directly reset the internal PHY */ ++ ctrl = rd32(E1000_CTRL); ++ wr32(E1000_CTRL, ctrl|E1000_CTRL_PHY_RST); ++ ++ ctrl_ext = rd32(E1000_CTRL_EXT); ++ ctrl_ext |= (E1000_CTRL_EXT_PHYPDEN | E1000_CTRL_EXT_SDLPE); ++ wr32(E1000_CTRL_EXT, ctrl_ext); ++ ++ wr32(E1000_WUC, 0); ++ reg_val = (E1000_INVM_AUTOLOAD << 4) | (tmp_nvm << 16); ++ wr32(E1000_EEARBC_I210, reg_val); ++ ++ igb_read_pci_cfg(hw, E1000_PCI_PMCSR, &pci_word); ++ pci_word |= E1000_PCI_PMCSR_D3; ++ igb_write_pci_cfg(hw, E1000_PCI_PMCSR, &pci_word); ++ usleep_range(1000, 2000); ++ pci_word &= ~E1000_PCI_PMCSR_D3; ++ igb_write_pci_cfg(hw, E1000_PCI_PMCSR, &pci_word); ++ reg_val = (E1000_INVM_AUTOLOAD << 4) | (nvm_word << 16); ++ wr32(E1000_EEARBC_I210, reg_val); ++ ++ /* restore WUC register */ ++ wr32(E1000_WUC, wuc); ++ } ++ /* restore MDICNFG setting */ ++ wr32(E1000_MDICNFG, mdicnfg); ++ return ret_val; ++} +--- linux/drivers/xenomai/net/drivers/igb/e1000_defines.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/igb/e1000_defines.h 2021-04-07 16:01:27.452633845 +0800 +@@ -0,0 +1,1018 @@ ++/* Intel(R) Gigabit Ethernet Linux driver ++ * Copyright(c) 2007-2014 Intel Corporation. ++ * RTnet port 2009 Vladimir Zapolskiy ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms and conditions of the GNU General Public License, ++ * version 2, as published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ * ++ * You should have received a copy of the GNU General Public License along with ++ * this program; if not, see . ++ * ++ * The full GNU General Public License is included in this distribution in ++ * the file called "COPYING". ++ * ++ * Contact Information: ++ * e1000-devel Mailing List ++ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 ++ */ ++ ++#ifndef _E1000_DEFINES_H_ ++#define _E1000_DEFINES_H_ ++ ++/* Number of Transmit and Receive Descriptors must be a multiple of 8 */ ++#define REQ_TX_DESCRIPTOR_MULTIPLE 8 ++#define REQ_RX_DESCRIPTOR_MULTIPLE 8 ++ ++/* Definitions for power management and wakeup registers */ ++/* Wake Up Control */ ++#define E1000_WUC_PME_EN 0x00000002 /* PME Enable */ ++ ++/* Wake Up Filter Control */ ++#define E1000_WUFC_LNKC 0x00000001 /* Link Status Change Wakeup Enable */ ++#define E1000_WUFC_MAG 0x00000002 /* Magic Packet Wakeup Enable */ ++#define E1000_WUFC_EX 0x00000004 /* Directed Exact Wakeup Enable */ ++#define E1000_WUFC_MC 0x00000008 /* Directed Multicast Wakeup Enable */ ++#define E1000_WUFC_BC 0x00000010 /* Broadcast Wakeup Enable */ ++ ++/* Extended Device Control */ ++#define E1000_CTRL_EXT_SDP2_DATA 0x00000040 /* Value of SW Defineable Pin 2 */ ++#define E1000_CTRL_EXT_SDP3_DATA 0x00000080 /* Value of SW Defineable Pin 3 */ ++#define E1000_CTRL_EXT_SDP2_DIR 0x00000400 /* SDP2 Data direction */ ++#define E1000_CTRL_EXT_SDP3_DIR 0x00000800 /* SDP3 Data direction */ ++ ++/* Physical Func Reset Done Indication */ ++#define E1000_CTRL_EXT_PFRSTD 0x00004000 ++#define E1000_CTRL_EXT_SDLPE 0X00040000 /* SerDes Low Power Enable */ ++#define E1000_CTRL_EXT_LINK_MODE_MASK 0x00C00000 ++#define E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES 0x00C00000 ++#define E1000_CTRL_EXT_LINK_MODE_1000BASE_KX 0x00400000 ++#define E1000_CTRL_EXT_LINK_MODE_SGMII 0x00800000 ++#define E1000_CTRL_EXT_LINK_MODE_GMII 0x00000000 ++#define E1000_CTRL_EXT_EIAME 0x01000000 ++#define E1000_CTRL_EXT_IRCA 0x00000001 ++/* Interrupt delay cancellation */ ++/* Driver loaded bit for FW */ ++#define E1000_CTRL_EXT_DRV_LOAD 0x10000000 ++/* Interrupt acknowledge Auto-mask */ ++/* Clear Interrupt timers after IMS clear */ ++/* packet buffer parity error detection enabled */ ++/* descriptor FIFO parity error detection enable */ ++#define E1000_CTRL_EXT_PBA_CLR 0x80000000 /* PBA Clear */ ++#define E1000_CTRL_EXT_PHYPDEN 0x00100000 ++#define E1000_I2CCMD_REG_ADDR_SHIFT 16 ++#define E1000_I2CCMD_PHY_ADDR_SHIFT 24 ++#define E1000_I2CCMD_OPCODE_READ 0x08000000 ++#define E1000_I2CCMD_OPCODE_WRITE 0x00000000 ++#define E1000_I2CCMD_READY 0x20000000 ++#define E1000_I2CCMD_ERROR 0x80000000 ++#define E1000_I2CCMD_SFP_DATA_ADDR(a) (0x0000 + (a)) ++#define E1000_I2CCMD_SFP_DIAG_ADDR(a) (0x0100 + (a)) ++#define E1000_MAX_SGMII_PHY_REG_ADDR 255 ++#define E1000_I2CCMD_PHY_TIMEOUT 200 ++#define E1000_IVAR_VALID 0x80 ++#define E1000_GPIE_NSICR 0x00000001 ++#define E1000_GPIE_MSIX_MODE 0x00000010 ++#define E1000_GPIE_EIAME 0x40000000 ++#define E1000_GPIE_PBA 0x80000000 ++ ++/* Receive Descriptor bit definitions */ ++#define E1000_RXD_STAT_DD 0x01 /* Descriptor Done */ ++#define E1000_RXD_STAT_EOP 0x02 /* End of Packet */ ++#define E1000_RXD_STAT_IXSM 0x04 /* Ignore checksum */ ++#define E1000_RXD_STAT_VP 0x08 /* IEEE VLAN Packet */ ++#define E1000_RXD_STAT_UDPCS 0x10 /* UDP xsum calculated */ ++#define E1000_RXD_STAT_TCPCS 0x20 /* TCP xsum calculated */ ++#define E1000_RXD_STAT_TS 0x10000 /* Pkt was time stamped */ ++ ++#define E1000_RXDEXT_STATERR_LB 0x00040000 ++#define E1000_RXDEXT_STATERR_CE 0x01000000 ++#define E1000_RXDEXT_STATERR_SE 0x02000000 ++#define E1000_RXDEXT_STATERR_SEQ 0x04000000 ++#define E1000_RXDEXT_STATERR_CXE 0x10000000 ++#define E1000_RXDEXT_STATERR_TCPE 0x20000000 ++#define E1000_RXDEXT_STATERR_IPE 0x40000000 ++#define E1000_RXDEXT_STATERR_RXE 0x80000000 ++ ++/* Same mask, but for extended and packet split descriptors */ ++#define E1000_RXDEXT_ERR_FRAME_ERR_MASK ( \ ++ E1000_RXDEXT_STATERR_CE | \ ++ E1000_RXDEXT_STATERR_SE | \ ++ E1000_RXDEXT_STATERR_SEQ | \ ++ E1000_RXDEXT_STATERR_CXE | \ ++ E1000_RXDEXT_STATERR_RXE) ++ ++#define E1000_MRQC_RSS_FIELD_IPV4_TCP 0x00010000 ++#define E1000_MRQC_RSS_FIELD_IPV4 0x00020000 ++#define E1000_MRQC_RSS_FIELD_IPV6_TCP_EX 0x00040000 ++#define E1000_MRQC_RSS_FIELD_IPV6 0x00100000 ++#define E1000_MRQC_RSS_FIELD_IPV6_TCP 0x00200000 ++ ++ ++/* Management Control */ ++#define E1000_MANC_SMBUS_EN 0x00000001 /* SMBus Enabled - RO */ ++#define E1000_MANC_ASF_EN 0x00000002 /* ASF Enabled - RO */ ++#define E1000_MANC_EN_BMC2OS 0x10000000 /* OSBMC is Enabled or not */ ++/* Enable Neighbor Discovery Filtering */ ++#define E1000_MANC_RCV_TCO_EN 0x00020000 /* Receive TCO Packets Enabled */ ++#define E1000_MANC_BLK_PHY_RST_ON_IDE 0x00040000 /* Block phy resets */ ++/* Enable MAC address filtering */ ++#define E1000_MANC_EN_MAC_ADDR_FILTER 0x00100000 ++ ++/* Receive Control */ ++#define E1000_RCTL_EN 0x00000002 /* enable */ ++#define E1000_RCTL_SBP 0x00000004 /* store bad packet */ ++#define E1000_RCTL_UPE 0x00000008 /* unicast promiscuous enable */ ++#define E1000_RCTL_MPE 0x00000010 /* multicast promiscuous enab */ ++#define E1000_RCTL_LPE 0x00000020 /* long packet enable */ ++#define E1000_RCTL_LBM_MAC 0x00000040 /* MAC loopback mode */ ++#define E1000_RCTL_LBM_TCVR 0x000000C0 /* tcvr loopback mode */ ++#define E1000_RCTL_RDMTS_HALF 0x00000000 /* rx desc min threshold size */ ++#define E1000_RCTL_MO_SHIFT 12 /* multicast offset shift */ ++#define E1000_RCTL_BAM 0x00008000 /* broadcast enable */ ++#define E1000_RCTL_SZ_512 0x00020000 /* rx buffer size 512 */ ++#define E1000_RCTL_SZ_256 0x00030000 /* rx buffer size 256 */ ++#define E1000_RCTL_VFE 0x00040000 /* vlan filter enable */ ++#define E1000_RCTL_CFIEN 0x00080000 /* canonical form enable */ ++#define E1000_RCTL_DPF 0x00400000 /* Discard Pause Frames */ ++#define E1000_RCTL_PMCF 0x00800000 /* pass MAC control frames */ ++#define E1000_RCTL_SECRC 0x04000000 /* Strip Ethernet CRC */ ++ ++/* Use byte values for the following shift parameters ++ * Usage: ++ * psrctl |= (((ROUNDUP(value0, 128) >> E1000_PSRCTL_BSIZE0_SHIFT) & ++ * E1000_PSRCTL_BSIZE0_MASK) | ++ * ((ROUNDUP(value1, 1024) >> E1000_PSRCTL_BSIZE1_SHIFT) & ++ * E1000_PSRCTL_BSIZE1_MASK) | ++ * ((ROUNDUP(value2, 1024) << E1000_PSRCTL_BSIZE2_SHIFT) & ++ * E1000_PSRCTL_BSIZE2_MASK) | ++ * ((ROUNDUP(value3, 1024) << E1000_PSRCTL_BSIZE3_SHIFT) |; ++ * E1000_PSRCTL_BSIZE3_MASK)) ++ * where value0 = [128..16256], default=256 ++ * value1 = [1024..64512], default=4096 ++ * value2 = [0..64512], default=4096 ++ * value3 = [0..64512], default=0 ++ */ ++ ++#define E1000_PSRCTL_BSIZE0_MASK 0x0000007F ++#define E1000_PSRCTL_BSIZE1_MASK 0x00003F00 ++#define E1000_PSRCTL_BSIZE2_MASK 0x003F0000 ++#define E1000_PSRCTL_BSIZE3_MASK 0x3F000000 ++ ++#define E1000_PSRCTL_BSIZE0_SHIFT 7 /* Shift _right_ 7 */ ++#define E1000_PSRCTL_BSIZE1_SHIFT 2 /* Shift _right_ 2 */ ++#define E1000_PSRCTL_BSIZE2_SHIFT 6 /* Shift _left_ 6 */ ++#define E1000_PSRCTL_BSIZE3_SHIFT 14 /* Shift _left_ 14 */ ++ ++/* SWFW_SYNC Definitions */ ++#define E1000_SWFW_EEP_SM 0x1 ++#define E1000_SWFW_PHY0_SM 0x2 ++#define E1000_SWFW_PHY1_SM 0x4 ++#define E1000_SWFW_PHY2_SM 0x20 ++#define E1000_SWFW_PHY3_SM 0x40 ++ ++/* FACTPS Definitions */ ++/* Device Control */ ++#define E1000_CTRL_FD 0x00000001 /* Full duplex.0=half; 1=full */ ++#define E1000_CTRL_GIO_MASTER_DISABLE 0x00000004 /*Blocks new Master requests */ ++#define E1000_CTRL_LRST 0x00000008 /* Link reset. 0=normal,1=reset */ ++#define E1000_CTRL_ASDE 0x00000020 /* Auto-speed detect enable */ ++#define E1000_CTRL_SLU 0x00000040 /* Set link up (Force Link) */ ++#define E1000_CTRL_ILOS 0x00000080 /* Invert Loss-Of Signal */ ++#define E1000_CTRL_SPD_SEL 0x00000300 /* Speed Select Mask */ ++#define E1000_CTRL_SPD_100 0x00000100 /* Force 100Mb */ ++#define E1000_CTRL_SPD_1000 0x00000200 /* Force 1Gb */ ++#define E1000_CTRL_FRCSPD 0x00000800 /* Force Speed */ ++#define E1000_CTRL_FRCDPX 0x00001000 /* Force Duplex */ ++/* Defined polarity of Dock/Undock indication in SDP[0] */ ++/* Reset both PHY ports, through PHYRST_N pin */ ++/* enable link status from external LINK_0 and LINK_1 pins */ ++#define E1000_CTRL_SWDPIN0 0x00040000 /* SWDPIN 0 value */ ++#define E1000_CTRL_SWDPIN1 0x00080000 /* SWDPIN 1 value */ ++#define E1000_CTRL_SDP0_DIR 0x00400000 /* SDP0 Data direction */ ++#define E1000_CTRL_SDP1_DIR 0x00800000 /* SDP1 Data direction */ ++#define E1000_CTRL_RST 0x04000000 /* Global reset */ ++#define E1000_CTRL_RFCE 0x08000000 /* Receive Flow Control enable */ ++#define E1000_CTRL_TFCE 0x10000000 /* Transmit flow control enable */ ++#define E1000_CTRL_VME 0x40000000 /* IEEE VLAN mode enable */ ++#define E1000_CTRL_PHY_RST 0x80000000 /* PHY Reset */ ++/* Initiate an interrupt to manageability engine */ ++#define E1000_CTRL_I2C_ENA 0x02000000 /* I2C enable */ ++ ++/* Bit definitions for the Management Data IO (MDIO) and Management Data ++ * Clock (MDC) pins in the Device Control Register. ++ */ ++ ++#define E1000_CONNSW_ENRGSRC 0x4 ++#define E1000_CONNSW_PHYSD 0x400 ++#define E1000_CONNSW_PHY_PDN 0x800 ++#define E1000_CONNSW_SERDESD 0x200 ++#define E1000_CONNSW_AUTOSENSE_CONF 0x2 ++#define E1000_CONNSW_AUTOSENSE_EN 0x1 ++#define E1000_PCS_CFG_PCS_EN 8 ++#define E1000_PCS_LCTL_FLV_LINK_UP 1 ++#define E1000_PCS_LCTL_FSV_100 2 ++#define E1000_PCS_LCTL_FSV_1000 4 ++#define E1000_PCS_LCTL_FDV_FULL 8 ++#define E1000_PCS_LCTL_FSD 0x10 ++#define E1000_PCS_LCTL_FORCE_LINK 0x20 ++#define E1000_PCS_LCTL_FORCE_FCTRL 0x80 ++#define E1000_PCS_LCTL_AN_ENABLE 0x10000 ++#define E1000_PCS_LCTL_AN_RESTART 0x20000 ++#define E1000_PCS_LCTL_AN_TIMEOUT 0x40000 ++#define E1000_ENABLE_SERDES_LOOPBACK 0x0410 ++ ++#define E1000_PCS_LSTS_LINK_OK 1 ++#define E1000_PCS_LSTS_SPEED_100 2 ++#define E1000_PCS_LSTS_SPEED_1000 4 ++#define E1000_PCS_LSTS_DUPLEX_FULL 8 ++#define E1000_PCS_LSTS_SYNK_OK 0x10 ++ ++/* Device Status */ ++#define E1000_STATUS_FD 0x00000001 /* Full duplex.0=half,1=full */ ++#define E1000_STATUS_LU 0x00000002 /* Link up.0=no,1=link */ ++#define E1000_STATUS_FUNC_MASK 0x0000000C /* PCI Function Mask */ ++#define E1000_STATUS_FUNC_SHIFT 2 ++#define E1000_STATUS_FUNC_1 0x00000004 /* Function 1 */ ++#define E1000_STATUS_TXOFF 0x00000010 /* transmission paused */ ++#define E1000_STATUS_SPEED_100 0x00000040 /* Speed 100Mb/s */ ++#define E1000_STATUS_SPEED_1000 0x00000080 /* Speed 1000Mb/s */ ++/* Change in Dock/Undock state. Clear on write '0'. */ ++/* Status of Master requests. */ ++#define E1000_STATUS_GIO_MASTER_ENABLE 0x00080000 ++/* BMC external code execution disabled */ ++ ++#define E1000_STATUS_2P5_SKU 0x00001000 /* Val of 2.5GBE SKU strap */ ++#define E1000_STATUS_2P5_SKU_OVER 0x00002000 /* Val of 2.5GBE SKU Over */ ++/* Constants used to intrepret the masked PCI-X bus speed. */ ++ ++#define SPEED_10 10 ++#define SPEED_100 100 ++#define SPEED_1000 1000 ++#define SPEED_2500 2500 ++#define HALF_DUPLEX 1 ++#define FULL_DUPLEX 2 ++ ++ ++#define ADVERTISE_10_HALF 0x0001 ++#define ADVERTISE_10_FULL 0x0002 ++#define ADVERTISE_100_HALF 0x0004 ++#define ADVERTISE_100_FULL 0x0008 ++#define ADVERTISE_1000_HALF 0x0010 /* Not used, just FYI */ ++#define ADVERTISE_1000_FULL 0x0020 ++ ++/* 1000/H is not supported, nor spec-compliant. */ ++#define E1000_ALL_SPEED_DUPLEX (ADVERTISE_10_HALF | ADVERTISE_10_FULL | \ ++ ADVERTISE_100_HALF | ADVERTISE_100_FULL | \ ++ ADVERTISE_1000_FULL) ++#define E1000_ALL_NOT_GIG (ADVERTISE_10_HALF | ADVERTISE_10_FULL | \ ++ ADVERTISE_100_HALF | ADVERTISE_100_FULL) ++#define E1000_ALL_100_SPEED (ADVERTISE_100_HALF | ADVERTISE_100_FULL) ++#define E1000_ALL_10_SPEED (ADVERTISE_10_HALF | ADVERTISE_10_FULL) ++#define E1000_ALL_FULL_DUPLEX (ADVERTISE_10_FULL | ADVERTISE_100_FULL | \ ++ ADVERTISE_1000_FULL) ++#define E1000_ALL_HALF_DUPLEX (ADVERTISE_10_HALF | ADVERTISE_100_HALF) ++ ++#define AUTONEG_ADVERTISE_SPEED_DEFAULT E1000_ALL_SPEED_DUPLEX ++ ++/* LED Control */ ++#define E1000_LEDCTL_LED0_MODE_SHIFT 0 ++#define E1000_LEDCTL_LED0_BLINK 0x00000080 ++#define E1000_LEDCTL_LED0_MODE_MASK 0x0000000F ++#define E1000_LEDCTL_LED0_IVRT 0x00000040 ++ ++#define E1000_LEDCTL_MODE_LED_ON 0xE ++#define E1000_LEDCTL_MODE_LED_OFF 0xF ++ ++/* Transmit Descriptor bit definitions */ ++#define E1000_TXD_POPTS_IXSM 0x01 /* Insert IP checksum */ ++#define E1000_TXD_POPTS_TXSM 0x02 /* Insert TCP/UDP checksum */ ++#define E1000_TXD_CMD_EOP 0x01000000 /* End of Packet */ ++#define E1000_TXD_CMD_IFCS 0x02000000 /* Insert FCS (Ethernet CRC) */ ++#define E1000_TXD_CMD_RS 0x08000000 /* Report Status */ ++#define E1000_TXD_CMD_DEXT 0x20000000 /* Descriptor extension (0 = legacy) */ ++#define E1000_TXD_STAT_DD 0x00000001 /* Descriptor Done */ ++/* Extended desc bits for Linksec and timesync */ ++ ++/* Transmit Control */ ++#define E1000_TCTL_EN 0x00000002 /* enable tx */ ++#define E1000_TCTL_PSP 0x00000008 /* pad short packets */ ++#define E1000_TCTL_CT 0x00000ff0 /* collision threshold */ ++#define E1000_TCTL_COLD 0x003ff000 /* collision distance */ ++#define E1000_TCTL_RTLC 0x01000000 /* Re-transmit on late collision */ ++ ++/* DMA Coalescing register fields */ ++#define E1000_DMACR_DMACWT_MASK 0x00003FFF /* DMA Coal Watchdog Timer */ ++#define E1000_DMACR_DMACTHR_MASK 0x00FF0000 /* DMA Coal Rx Threshold */ ++#define E1000_DMACR_DMACTHR_SHIFT 16 ++#define E1000_DMACR_DMAC_LX_MASK 0x30000000 /* Lx when no PCIe trans */ ++#define E1000_DMACR_DMAC_LX_SHIFT 28 ++#define E1000_DMACR_DMAC_EN 0x80000000 /* Enable DMA Coalescing */ ++/* DMA Coalescing BMC-to-OS Watchdog Enable */ ++#define E1000_DMACR_DC_BMC2OSW_EN 0x00008000 ++ ++#define E1000_DMCTXTH_DMCTTHR_MASK 0x00000FFF /* DMA Coal Tx Threshold */ ++ ++#define E1000_DMCTLX_TTLX_MASK 0x00000FFF /* Time to LX request */ ++ ++#define E1000_DMCRTRH_UTRESH_MASK 0x0007FFFF /* Rx Traffic Rate Thresh */ ++#define E1000_DMCRTRH_LRPRCW 0x80000000 /* Rx pkt rate curr window */ ++ ++#define E1000_DMCCNT_CCOUNT_MASK 0x01FFFFFF /* DMA Coal Rx Current Cnt */ ++ ++#define E1000_FCRTC_RTH_COAL_MASK 0x0003FFF0 /* FC Rx Thresh High val */ ++#define E1000_FCRTC_RTH_COAL_SHIFT 4 ++#define E1000_PCIEMISC_LX_DECISION 0x00000080 /* Lx power decision */ ++ ++/* Timestamp in Rx buffer */ ++#define E1000_RXPBS_CFG_TS_EN 0x80000000 ++ ++#define I210_RXPBSIZE_DEFAULT 0x000000A2 /* RXPBSIZE default */ ++#define I210_TXPBSIZE_DEFAULT 0x04000014 /* TXPBSIZE default */ ++ ++/* SerDes Control */ ++#define E1000_SCTL_DISABLE_SERDES_LOOPBACK 0x0400 ++ ++/* Receive Checksum Control */ ++#define E1000_RXCSUM_IPOFL 0x00000100 /* IPv4 checksum offload */ ++#define E1000_RXCSUM_TUOFL 0x00000200 /* TCP / UDP checksum offload */ ++#define E1000_RXCSUM_CRCOFL 0x00000800 /* CRC32 offload enable */ ++#define E1000_RXCSUM_PCSD 0x00002000 /* packet checksum disabled */ ++ ++/* Header split receive */ ++#define E1000_RFCTL_IPV6_EX_DIS 0x00010000 ++#define E1000_RFCTL_LEF 0x00040000 ++ ++/* Collision related configuration parameters */ ++#define E1000_COLLISION_THRESHOLD 15 ++#define E1000_CT_SHIFT 4 ++#define E1000_COLLISION_DISTANCE 63 ++#define E1000_COLD_SHIFT 12 ++ ++/* Ethertype field values */ ++#define ETHERNET_IEEE_VLAN_TYPE 0x8100 /* 802.3ac packet */ ++ ++#define MAX_JUMBO_FRAME_SIZE 0x3F00 ++ ++/* PBA constants */ ++#define E1000_PBA_34K 0x0022 ++#define E1000_PBA_64K 0x0040 /* 64KB */ ++ ++/* SW Semaphore Register */ ++#define E1000_SWSM_SMBI 0x00000001 /* Driver Semaphore bit */ ++#define E1000_SWSM_SWESMBI 0x00000002 /* FW Semaphore bit */ ++ ++/* Interrupt Cause Read */ ++#define E1000_ICR_TXDW 0x00000001 /* Transmit desc written back */ ++#define E1000_ICR_LSC 0x00000004 /* Link Status Change */ ++#define E1000_ICR_RXSEQ 0x00000008 /* rx sequence error */ ++#define E1000_ICR_RXDMT0 0x00000010 /* rx desc min. threshold (0) */ ++#define E1000_ICR_RXT0 0x00000080 /* rx timer intr (ring 0) */ ++#define E1000_ICR_VMMB 0x00000100 /* VM MB event */ ++#define E1000_ICR_TS 0x00080000 /* Time Sync Interrupt */ ++#define E1000_ICR_DRSTA 0x40000000 /* Device Reset Asserted */ ++/* If this bit asserted, the driver should claim the interrupt */ ++#define E1000_ICR_INT_ASSERTED 0x80000000 ++/* LAN connected device generates an interrupt */ ++#define E1000_ICR_DOUTSYNC 0x10000000 /* NIC DMA out of sync */ ++ ++/* Extended Interrupt Cause Read */ ++#define E1000_EICR_RX_QUEUE0 0x00000001 /* Rx Queue 0 Interrupt */ ++#define E1000_EICR_RX_QUEUE1 0x00000002 /* Rx Queue 1 Interrupt */ ++#define E1000_EICR_RX_QUEUE2 0x00000004 /* Rx Queue 2 Interrupt */ ++#define E1000_EICR_RX_QUEUE3 0x00000008 /* Rx Queue 3 Interrupt */ ++#define E1000_EICR_TX_QUEUE0 0x00000100 /* Tx Queue 0 Interrupt */ ++#define E1000_EICR_TX_QUEUE1 0x00000200 /* Tx Queue 1 Interrupt */ ++#define E1000_EICR_TX_QUEUE2 0x00000400 /* Tx Queue 2 Interrupt */ ++#define E1000_EICR_TX_QUEUE3 0x00000800 /* Tx Queue 3 Interrupt */ ++#define E1000_EICR_OTHER 0x80000000 /* Interrupt Cause Active */ ++/* TCP Timer */ ++ ++/* This defines the bits that are set in the Interrupt Mask ++ * Set/Read Register. Each bit is documented below: ++ * o RXT0 = Receiver Timer Interrupt (ring 0) ++ * o TXDW = Transmit Descriptor Written Back ++ * o RXDMT0 = Receive Descriptor Minimum Threshold hit (ring 0) ++ * o RXSEQ = Receive Sequence Error ++ * o LSC = Link Status Change ++ */ ++#define IMS_ENABLE_MASK ( \ ++ E1000_IMS_RXT0 | \ ++ E1000_IMS_TXDW | \ ++ E1000_IMS_RXDMT0 | \ ++ E1000_IMS_RXSEQ | \ ++ E1000_IMS_LSC | \ ++ E1000_IMS_DOUTSYNC) ++ ++/* Interrupt Mask Set */ ++#define E1000_IMS_TXDW E1000_ICR_TXDW /* Transmit desc written back */ ++#define E1000_IMS_LSC E1000_ICR_LSC /* Link Status Change */ ++#define E1000_IMS_VMMB E1000_ICR_VMMB /* Mail box activity */ ++#define E1000_IMS_TS E1000_ICR_TS /* Time Sync Interrupt */ ++#define E1000_IMS_RXSEQ E1000_ICR_RXSEQ /* rx sequence error */ ++#define E1000_IMS_RXDMT0 E1000_ICR_RXDMT0 /* rx desc min. threshold */ ++#define E1000_IMS_RXT0 E1000_ICR_RXT0 /* rx timer intr */ ++#define E1000_IMS_DRSTA E1000_ICR_DRSTA /* Device Reset Asserted */ ++#define E1000_IMS_DOUTSYNC E1000_ICR_DOUTSYNC /* NIC DMA out of sync */ ++ ++/* Extended Interrupt Mask Set */ ++#define E1000_EIMS_OTHER E1000_EICR_OTHER /* Interrupt Cause Active */ ++ ++/* Interrupt Cause Set */ ++#define E1000_ICS_LSC E1000_ICR_LSC /* Link Status Change */ ++#define E1000_ICS_RXDMT0 E1000_ICR_RXDMT0 /* rx desc min. threshold */ ++#define E1000_ICS_DRSTA E1000_ICR_DRSTA /* Device Reset Aserted */ ++ ++/* Extended Interrupt Cause Set */ ++/* E1000_EITR_CNT_IGNR is only for 82576 and newer */ ++#define E1000_EITR_CNT_IGNR 0x80000000 /* Don't reset counters on write */ ++ ++ ++/* Transmit Descriptor Control */ ++/* Enable the counting of descriptors still to be processed. */ ++ ++/* Flow Control Constants */ ++#define FLOW_CONTROL_ADDRESS_LOW 0x00C28001 ++#define FLOW_CONTROL_ADDRESS_HIGH 0x00000100 ++#define FLOW_CONTROL_TYPE 0x8808 ++ ++/* Transmit Config Word */ ++#define E1000_TXCW_ASM_DIR 0x00000100 /* TXCW astm pause direction */ ++#define E1000_TXCW_PAUSE 0x00000080 /* TXCW sym pause request */ ++ ++/* 802.1q VLAN Packet Size */ ++#define VLAN_TAG_SIZE 4 /* 802.3ac tag (not DMA'd) */ ++#define E1000_VLAN_FILTER_TBL_SIZE 128 /* VLAN Filter Table (4096 bits) */ ++ ++/* Receive Address */ ++/* Number of high/low register pairs in the RAR. The RAR (Receive Address ++ * Registers) holds the directed and multicast addresses that we monitor. ++ * Technically, we have 16 spots. However, we reserve one of these spots ++ * (RAR[15]) for our directed address used by controllers with ++ * manageability enabled, allowing us room for 15 multicast addresses. ++ */ ++#define E1000_RAH_AV 0x80000000 /* Receive descriptor valid */ ++#define E1000_RAL_MAC_ADDR_LEN 4 ++#define E1000_RAH_MAC_ADDR_LEN 2 ++#define E1000_RAH_POOL_MASK 0x03FC0000 ++#define E1000_RAH_POOL_1 0x00040000 ++ ++/* Error Codes */ ++#define E1000_ERR_NVM 1 ++#define E1000_ERR_PHY 2 ++#define E1000_ERR_CONFIG 3 ++#define E1000_ERR_PARAM 4 ++#define E1000_ERR_MAC_INIT 5 ++#define E1000_ERR_RESET 9 ++#define E1000_ERR_MASTER_REQUESTS_PENDING 10 ++#define E1000_BLK_PHY_RESET 12 ++#define E1000_ERR_SWFW_SYNC 13 ++#define E1000_NOT_IMPLEMENTED 14 ++#define E1000_ERR_MBX 15 ++#define E1000_ERR_INVALID_ARGUMENT 16 ++#define E1000_ERR_NO_SPACE 17 ++#define E1000_ERR_NVM_PBA_SECTION 18 ++#define E1000_ERR_INVM_VALUE_NOT_FOUND 19 ++#define E1000_ERR_I2C 20 ++ ++/* Loop limit on how long we wait for auto-negotiation to complete */ ++#define COPPER_LINK_UP_LIMIT 10 ++#define PHY_AUTO_NEG_LIMIT 45 ++#define PHY_FORCE_LIMIT 20 ++/* Number of 100 microseconds we wait for PCI Express master disable */ ++#define MASTER_DISABLE_TIMEOUT 800 ++/* Number of milliseconds we wait for PHY configuration done after MAC reset */ ++#define PHY_CFG_TIMEOUT 100 ++/* Number of 2 milliseconds we wait for acquiring MDIO ownership. */ ++/* Number of milliseconds for NVM auto read done after MAC reset. */ ++#define AUTO_READ_DONE_TIMEOUT 10 ++ ++/* Flow Control */ ++#define E1000_FCRTL_XONE 0x80000000 /* Enable XON frame transmission */ ++ ++#define E1000_TSYNCTXCTL_VALID 0x00000001 /* tx timestamp valid */ ++#define E1000_TSYNCTXCTL_ENABLED 0x00000010 /* enable tx timestampping */ ++ ++#define E1000_TSYNCRXCTL_VALID 0x00000001 /* rx timestamp valid */ ++#define E1000_TSYNCRXCTL_TYPE_MASK 0x0000000E /* rx type mask */ ++#define E1000_TSYNCRXCTL_TYPE_L2_V2 0x00 ++#define E1000_TSYNCRXCTL_TYPE_L4_V1 0x02 ++#define E1000_TSYNCRXCTL_TYPE_L2_L4_V2 0x04 ++#define E1000_TSYNCRXCTL_TYPE_ALL 0x08 ++#define E1000_TSYNCRXCTL_TYPE_EVENT_V2 0x0A ++#define E1000_TSYNCRXCTL_ENABLED 0x00000010 /* enable rx timestampping */ ++ ++#define E1000_TSYNCRXCFG_PTP_V1_CTRLT_MASK 0x000000FF ++#define E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE 0x00 ++#define E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE 0x01 ++#define E1000_TSYNCRXCFG_PTP_V1_FOLLOWUP_MESSAGE 0x02 ++#define E1000_TSYNCRXCFG_PTP_V1_DELAY_RESP_MESSAGE 0x03 ++#define E1000_TSYNCRXCFG_PTP_V1_MANAGEMENT_MESSAGE 0x04 ++ ++#define E1000_TSYNCRXCFG_PTP_V2_MSGID_MASK 0x00000F00 ++#define E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE 0x0000 ++#define E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE 0x0100 ++#define E1000_TSYNCRXCFG_PTP_V2_PATH_DELAY_REQ_MESSAGE 0x0200 ++#define E1000_TSYNCRXCFG_PTP_V2_PATH_DELAY_RESP_MESSAGE 0x0300 ++#define E1000_TSYNCRXCFG_PTP_V2_FOLLOWUP_MESSAGE 0x0800 ++#define E1000_TSYNCRXCFG_PTP_V2_DELAY_RESP_MESSAGE 0x0900 ++#define E1000_TSYNCRXCFG_PTP_V2_PATH_DELAY_FOLLOWUP_MESSAGE 0x0A00 ++#define E1000_TSYNCRXCFG_PTP_V2_ANNOUNCE_MESSAGE 0x0B00 ++#define E1000_TSYNCRXCFG_PTP_V2_SIGNALLING_MESSAGE 0x0C00 ++#define E1000_TSYNCRXCFG_PTP_V2_MANAGEMENT_MESSAGE 0x0D00 ++ ++#define E1000_TIMINCA_16NS_SHIFT 24 ++ ++/* Time Sync Interrupt Cause/Mask Register Bits */ ++ ++#define TSINTR_SYS_WRAP (1 << 0) /* SYSTIM Wrap around. */ ++#define TSINTR_TXTS (1 << 1) /* Transmit Timestamp. */ ++#define TSINTR_RXTS (1 << 2) /* Receive Timestamp. */ ++#define TSINTR_TT0 (1 << 3) /* Target Time 0 Trigger. */ ++#define TSINTR_TT1 (1 << 4) /* Target Time 1 Trigger. */ ++#define TSINTR_AUTT0 (1 << 5) /* Auxiliary Timestamp 0 Taken. */ ++#define TSINTR_AUTT1 (1 << 6) /* Auxiliary Timestamp 1 Taken. */ ++#define TSINTR_TADJ (1 << 7) /* Time Adjust Done. */ ++ ++#define TSYNC_INTERRUPTS TSINTR_TXTS ++#define E1000_TSICR_TXTS TSINTR_TXTS ++ ++/* TSAUXC Configuration Bits */ ++#define TSAUXC_EN_TT0 (1 << 0) /* Enable target time 0. */ ++#define TSAUXC_EN_TT1 (1 << 1) /* Enable target time 1. */ ++#define TSAUXC_EN_CLK0 (1 << 2) /* Enable Configurable Frequency Clock 0. */ ++#define TSAUXC_SAMP_AUT0 (1 << 3) /* Latch SYSTIML/H into AUXSTMPL/0. */ ++#define TSAUXC_ST0 (1 << 4) /* Start Clock 0 Toggle on Target Time 0. */ ++#define TSAUXC_EN_CLK1 (1 << 5) /* Enable Configurable Frequency Clock 1. */ ++#define TSAUXC_SAMP_AUT1 (1 << 6) /* Latch SYSTIML/H into AUXSTMPL/1. */ ++#define TSAUXC_ST1 (1 << 7) /* Start Clock 1 Toggle on Target Time 1. */ ++#define TSAUXC_EN_TS0 (1 << 8) /* Enable hardware timestamp 0. */ ++#define TSAUXC_AUTT0 (1 << 9) /* Auxiliary Timestamp Taken. */ ++#define TSAUXC_EN_TS1 (1 << 10) /* Enable hardware timestamp 0. */ ++#define TSAUXC_AUTT1 (1 << 11) /* Auxiliary Timestamp Taken. */ ++#define TSAUXC_PLSG (1 << 17) /* Generate a pulse. */ ++#define TSAUXC_DISABLE (1 << 31) /* Disable SYSTIM Count Operation. */ ++ ++/* SDP Configuration Bits */ ++#define AUX0_SEL_SDP0 (0 << 0) /* Assign SDP0 to auxiliary time stamp 0. */ ++#define AUX0_SEL_SDP1 (1 << 0) /* Assign SDP1 to auxiliary time stamp 0. */ ++#define AUX0_SEL_SDP2 (2 << 0) /* Assign SDP2 to auxiliary time stamp 0. */ ++#define AUX0_SEL_SDP3 (3 << 0) /* Assign SDP3 to auxiliary time stamp 0. */ ++#define AUX0_TS_SDP_EN (1 << 2) /* Enable auxiliary time stamp trigger 0. */ ++#define AUX1_SEL_SDP0 (0 << 3) /* Assign SDP0 to auxiliary time stamp 1. */ ++#define AUX1_SEL_SDP1 (1 << 3) /* Assign SDP1 to auxiliary time stamp 1. */ ++#define AUX1_SEL_SDP2 (2 << 3) /* Assign SDP2 to auxiliary time stamp 1. */ ++#define AUX1_SEL_SDP3 (3 << 3) /* Assign SDP3 to auxiliary time stamp 1. */ ++#define AUX1_TS_SDP_EN (1 << 5) /* Enable auxiliary time stamp trigger 1. */ ++#define TS_SDP0_SEL_TT0 (0 << 6) /* Target time 0 is output on SDP0. */ ++#define TS_SDP0_SEL_TT1 (1 << 6) /* Target time 1 is output on SDP0. */ ++#define TS_SDP0_SEL_FC0 (2 << 6) /* Freq clock 0 is output on SDP0. */ ++#define TS_SDP0_SEL_FC1 (3 << 6) /* Freq clock 1 is output on SDP0. */ ++#define TS_SDP0_EN (1 << 8) /* SDP0 is assigned to Tsync. */ ++#define TS_SDP1_SEL_TT0 (0 << 9) /* Target time 0 is output on SDP1. */ ++#define TS_SDP1_SEL_TT1 (1 << 9) /* Target time 1 is output on SDP1. */ ++#define TS_SDP1_SEL_FC0 (2 << 9) /* Freq clock 0 is output on SDP1. */ ++#define TS_SDP1_SEL_FC1 (3 << 9) /* Freq clock 1 is output on SDP1. */ ++#define TS_SDP1_EN (1 << 11) /* SDP1 is assigned to Tsync. */ ++#define TS_SDP2_SEL_TT0 (0 << 12) /* Target time 0 is output on SDP2. */ ++#define TS_SDP2_SEL_TT1 (1 << 12) /* Target time 1 is output on SDP2. */ ++#define TS_SDP2_SEL_FC0 (2 << 12) /* Freq clock 0 is output on SDP2. */ ++#define TS_SDP2_SEL_FC1 (3 << 12) /* Freq clock 1 is output on SDP2. */ ++#define TS_SDP2_EN (1 << 14) /* SDP2 is assigned to Tsync. */ ++#define TS_SDP3_SEL_TT0 (0 << 15) /* Target time 0 is output on SDP3. */ ++#define TS_SDP3_SEL_TT1 (1 << 15) /* Target time 1 is output on SDP3. */ ++#define TS_SDP3_SEL_FC0 (2 << 15) /* Freq clock 0 is output on SDP3. */ ++#define TS_SDP3_SEL_FC1 (3 << 15) /* Freq clock 1 is output on SDP3. */ ++#define TS_SDP3_EN (1 << 17) /* SDP3 is assigned to Tsync. */ ++ ++#define E1000_MDICNFG_EXT_MDIO 0x80000000 /* MDI ext/int destination */ ++#define E1000_MDICNFG_COM_MDIO 0x40000000 /* MDI shared w/ lan 0 */ ++#define E1000_MDICNFG_PHY_MASK 0x03E00000 ++#define E1000_MDICNFG_PHY_SHIFT 21 ++ ++#define E1000_MEDIA_PORT_COPPER 1 ++#define E1000_MEDIA_PORT_OTHER 2 ++#define E1000_M88E1112_AUTO_COPPER_SGMII 0x2 ++#define E1000_M88E1112_AUTO_COPPER_BASEX 0x3 ++#define E1000_M88E1112_STATUS_LINK 0x0004 /* Interface Link Bit */ ++#define E1000_M88E1112_MAC_CTRL_1 0x10 ++#define E1000_M88E1112_MAC_CTRL_1_MODE_MASK 0x0380 /* Mode Select */ ++#define E1000_M88E1112_MAC_CTRL_1_MODE_SHIFT 7 ++#define E1000_M88E1112_PAGE_ADDR 0x16 ++#define E1000_M88E1112_STATUS 0x01 ++ ++/* PCI Express Control */ ++#define E1000_GCR_CMPL_TMOUT_MASK 0x0000F000 ++#define E1000_GCR_CMPL_TMOUT_10ms 0x00001000 ++#define E1000_GCR_CMPL_TMOUT_RESEND 0x00010000 ++#define E1000_GCR_CAP_VER2 0x00040000 ++ ++/* mPHY Address Control and Data Registers */ ++#define E1000_MPHY_ADDR_CTL 0x0024 /* mPHY Address Control Register */ ++#define E1000_MPHY_ADDR_CTL_OFFSET_MASK 0xFFFF0000 ++#define E1000_MPHY_DATA 0x0E10 /* mPHY Data Register */ ++ ++/* mPHY PCS CLK Register */ ++#define E1000_MPHY_PCS_CLK_REG_OFFSET 0x0004 /* mPHY PCS CLK AFE CSR Offset */ ++/* mPHY Near End Digital Loopback Override Bit */ ++#define E1000_MPHY_PCS_CLK_REG_DIGINELBEN 0x10 ++ ++#define E1000_PCS_LCTL_FORCE_FCTRL 0x80 ++#define E1000_PCS_LSTS_AN_COMPLETE 0x10000 ++ ++/* PHY Control Register */ ++#define MII_CR_FULL_DUPLEX 0x0100 /* FDX =1, half duplex =0 */ ++#define MII_CR_RESTART_AUTO_NEG 0x0200 /* Restart auto negotiation */ ++#define MII_CR_POWER_DOWN 0x0800 /* Power down */ ++#define MII_CR_AUTO_NEG_EN 0x1000 /* Auto Neg Enable */ ++#define MII_CR_LOOPBACK 0x4000 /* 0 = normal, 1 = loopback */ ++#define MII_CR_RESET 0x8000 /* 0 = normal, 1 = PHY reset */ ++#define MII_CR_SPEED_1000 0x0040 ++#define MII_CR_SPEED_100 0x2000 ++#define MII_CR_SPEED_10 0x0000 ++ ++/* PHY Status Register */ ++#define MII_SR_LINK_STATUS 0x0004 /* Link Status 1 = link */ ++#define MII_SR_AUTONEG_COMPLETE 0x0020 /* Auto Neg Complete */ ++ ++/* Autoneg Advertisement Register */ ++#define NWAY_AR_10T_HD_CAPS 0x0020 /* 10T Half Duplex Capable */ ++#define NWAY_AR_10T_FD_CAPS 0x0040 /* 10T Full Duplex Capable */ ++#define NWAY_AR_100TX_HD_CAPS 0x0080 /* 100TX Half Duplex Capable */ ++#define NWAY_AR_100TX_FD_CAPS 0x0100 /* 100TX Full Duplex Capable */ ++#define NWAY_AR_PAUSE 0x0400 /* Pause operation desired */ ++#define NWAY_AR_ASM_DIR 0x0800 /* Asymmetric Pause Direction bit */ ++ ++/* Link Partner Ability Register (Base Page) */ ++#define NWAY_LPAR_PAUSE 0x0400 /* LP Pause operation desired */ ++#define NWAY_LPAR_ASM_DIR 0x0800 /* LP Asymmetric Pause Direction bit */ ++ ++/* Autoneg Expansion Register */ ++ ++/* 1000BASE-T Control Register */ ++#define CR_1000T_HD_CAPS 0x0100 /* Advertise 1000T HD capability */ ++#define CR_1000T_FD_CAPS 0x0200 /* Advertise 1000T FD capability */ ++#define CR_1000T_MS_VALUE 0x0800 /* 1=Configure PHY as Master */ ++ /* 0=Configure PHY as Slave */ ++#define CR_1000T_MS_ENABLE 0x1000 /* 1=Master/Slave manual config value */ ++ /* 0=Automatic Master/Slave config */ ++ ++/* 1000BASE-T Status Register */ ++#define SR_1000T_REMOTE_RX_STATUS 0x1000 /* Remote receiver OK */ ++#define SR_1000T_LOCAL_RX_STATUS 0x2000 /* Local receiver OK */ ++ ++ ++/* PHY 1000 MII Register/Bit Definitions */ ++/* PHY Registers defined by IEEE */ ++#define PHY_CONTROL 0x00 /* Control Register */ ++#define PHY_STATUS 0x01 /* Status Register */ ++#define PHY_ID1 0x02 /* Phy Id Reg (word 1) */ ++#define PHY_ID2 0x03 /* Phy Id Reg (word 2) */ ++#define PHY_AUTONEG_ADV 0x04 /* Autoneg Advertisement */ ++#define PHY_LP_ABILITY 0x05 /* Link Partner Ability (Base Page) */ ++#define PHY_1000T_CTRL 0x09 /* 1000Base-T Control Reg */ ++#define PHY_1000T_STATUS 0x0A /* 1000Base-T Status Reg */ ++ ++/* NVM Control */ ++#define E1000_EECD_SK 0x00000001 /* NVM Clock */ ++#define E1000_EECD_CS 0x00000002 /* NVM Chip Select */ ++#define E1000_EECD_DI 0x00000004 /* NVM Data In */ ++#define E1000_EECD_DO 0x00000008 /* NVM Data Out */ ++#define E1000_EECD_REQ 0x00000040 /* NVM Access Request */ ++#define E1000_EECD_GNT 0x00000080 /* NVM Access Grant */ ++#define E1000_EECD_PRES 0x00000100 /* NVM Present */ ++/* NVM Addressing bits based on type 0=small, 1=large */ ++#define E1000_EECD_ADDR_BITS 0x00000400 ++#define E1000_NVM_GRANT_ATTEMPTS 1000 /* NVM # attempts to gain grant */ ++#define E1000_EECD_AUTO_RD 0x00000200 /* NVM Auto Read done */ ++#define E1000_EECD_SIZE_EX_MASK 0x00007800 /* NVM Size */ ++#define E1000_EECD_SIZE_EX_SHIFT 11 ++#define E1000_EECD_FLUPD_I210 0x00800000 /* Update FLASH */ ++#define E1000_EECD_FLUDONE_I210 0x04000000 /* Update FLASH done*/ ++#define E1000_EECD_FLASH_DETECTED_I210 0x00080000 /* FLASH detected */ ++#define E1000_FLUDONE_ATTEMPTS 20000 ++#define E1000_EERD_EEWR_MAX_COUNT 512 /* buffered EEPROM words rw */ ++#define E1000_I210_FIFO_SEL_RX 0x00 ++#define E1000_I210_FIFO_SEL_TX_QAV(_i) (0x02 + (_i)) ++#define E1000_I210_FIFO_SEL_TX_LEGACY E1000_I210_FIFO_SEL_TX_QAV(0) ++#define E1000_I210_FIFO_SEL_BMC2OS_TX 0x06 ++#define E1000_I210_FIFO_SEL_BMC2OS_RX 0x01 ++#define E1000_I210_FLASH_SECTOR_SIZE 0x1000 /* 4KB FLASH sector unit size */ ++/* Secure FLASH mode requires removing MSb */ ++#define E1000_I210_FW_PTR_MASK 0x7FFF ++/* Firmware code revision field word offset*/ ++#define E1000_I210_FW_VER_OFFSET 328 ++#define E1000_EECD_FLUPD_I210 0x00800000 /* Update FLASH */ ++#define E1000_EECD_FLUDONE_I210 0x04000000 /* Update FLASH done*/ ++#define E1000_FLUDONE_ATTEMPTS 20000 ++#define E1000_EERD_EEWR_MAX_COUNT 512 /* buffered EEPROM words rw */ ++#define E1000_I210_FIFO_SEL_RX 0x00 ++#define E1000_I210_FIFO_SEL_TX_QAV(_i) (0x02 + (_i)) ++#define E1000_I210_FIFO_SEL_TX_LEGACY E1000_I210_FIFO_SEL_TX_QAV(0) ++#define E1000_I210_FIFO_SEL_BMC2OS_TX 0x06 ++#define E1000_I210_FIFO_SEL_BMC2OS_RX 0x01 ++ ++ ++/* Offset to data in NVM read/write registers */ ++#define E1000_NVM_RW_REG_DATA 16 ++#define E1000_NVM_RW_REG_DONE 2 /* Offset to READ/WRITE done bit */ ++#define E1000_NVM_RW_REG_START 1 /* Start operation */ ++#define E1000_NVM_RW_ADDR_SHIFT 2 /* Shift to the address bits */ ++#define E1000_NVM_POLL_READ 0 /* Flag for polling for read complete */ ++ ++/* NVM Word Offsets */ ++#define NVM_COMPAT 0x0003 ++#define NVM_ID_LED_SETTINGS 0x0004 /* SERDES output amplitude */ ++#define NVM_VERSION 0x0005 ++#define NVM_INIT_CONTROL2_REG 0x000F ++#define NVM_INIT_CONTROL3_PORT_B 0x0014 ++#define NVM_INIT_CONTROL3_PORT_A 0x0024 ++#define NVM_ALT_MAC_ADDR_PTR 0x0037 ++#define NVM_CHECKSUM_REG 0x003F ++#define NVM_COMPATIBILITY_REG_3 0x0003 ++#define NVM_COMPATIBILITY_BIT_MASK 0x8000 ++#define NVM_MAC_ADDR 0x0000 ++#define NVM_SUB_DEV_ID 0x000B ++#define NVM_SUB_VEN_ID 0x000C ++#define NVM_DEV_ID 0x000D ++#define NVM_VEN_ID 0x000E ++#define NVM_INIT_CTRL_2 0x000F ++#define NVM_INIT_CTRL_4 0x0013 ++#define NVM_LED_1_CFG 0x001C ++#define NVM_LED_0_2_CFG 0x001F ++#define NVM_ETRACK_WORD 0x0042 ++#define NVM_ETRACK_HIWORD 0x0043 ++#define NVM_COMB_VER_OFF 0x0083 ++#define NVM_COMB_VER_PTR 0x003d ++ ++/* NVM version defines */ ++#define NVM_MAJOR_MASK 0xF000 ++#define NVM_MINOR_MASK 0x0FF0 ++#define NVM_IMAGE_ID_MASK 0x000F ++#define NVM_COMB_VER_MASK 0x00FF ++#define NVM_MAJOR_SHIFT 12 ++#define NVM_MINOR_SHIFT 4 ++#define NVM_COMB_VER_SHFT 8 ++#define NVM_VER_INVALID 0xFFFF ++#define NVM_ETRACK_SHIFT 16 ++#define NVM_ETRACK_VALID 0x8000 ++#define NVM_NEW_DEC_MASK 0x0F00 ++#define NVM_HEX_CONV 16 ++#define NVM_HEX_TENS 10 ++ ++#define NVM_ETS_CFG 0x003E ++#define NVM_ETS_LTHRES_DELTA_MASK 0x07C0 ++#define NVM_ETS_LTHRES_DELTA_SHIFT 6 ++#define NVM_ETS_TYPE_MASK 0x0038 ++#define NVM_ETS_TYPE_SHIFT 3 ++#define NVM_ETS_TYPE_EMC 0x000 ++#define NVM_ETS_NUM_SENSORS_MASK 0x0007 ++#define NVM_ETS_DATA_LOC_MASK 0x3C00 ++#define NVM_ETS_DATA_LOC_SHIFT 10 ++#define NVM_ETS_DATA_INDEX_MASK 0x0300 ++#define NVM_ETS_DATA_INDEX_SHIFT 8 ++#define NVM_ETS_DATA_HTHRESH_MASK 0x00FF ++ ++#define E1000_NVM_CFG_DONE_PORT_0 0x040000 /* MNG config cycle done */ ++#define E1000_NVM_CFG_DONE_PORT_1 0x080000 /* ...for second port */ ++#define E1000_NVM_CFG_DONE_PORT_2 0x100000 /* ...for third port */ ++#define E1000_NVM_CFG_DONE_PORT_3 0x200000 /* ...for fourth port */ ++ ++#define NVM_82580_LAN_FUNC_OFFSET(a) (a ? (0x40 + (0x40 * a)) : 0) ++ ++/* Mask bits for fields in Word 0x24 of the NVM */ ++#define NVM_WORD24_COM_MDIO 0x0008 /* MDIO interface shared */ ++#define NVM_WORD24_EXT_MDIO 0x0004 /* MDIO accesses routed external */ ++ ++/* Mask bits for fields in Word 0x0f of the NVM */ ++#define NVM_WORD0F_PAUSE_MASK 0x3000 ++#define NVM_WORD0F_ASM_DIR 0x2000 ++ ++/* Mask bits for fields in Word 0x1a of the NVM */ ++ ++/* length of string needed to store part num */ ++#define E1000_PBANUM_LENGTH 11 ++ ++/* For checksumming, the sum of all words in the NVM should equal 0xBABA. */ ++#define NVM_SUM 0xBABA ++ ++#define NVM_PBA_OFFSET_0 8 ++#define NVM_PBA_OFFSET_1 9 ++#define NVM_RESERVED_WORD 0xFFFF ++#define NVM_PBA_PTR_GUARD 0xFAFA ++#define NVM_WORD_SIZE_BASE_SHIFT 6 ++ ++/* NVM Commands - Microwire */ ++ ++/* NVM Commands - SPI */ ++#define NVM_MAX_RETRY_SPI 5000 /* Max wait of 5ms, for RDY signal */ ++#define NVM_WRITE_OPCODE_SPI 0x02 /* NVM write opcode */ ++#define NVM_READ_OPCODE_SPI 0x03 /* NVM read opcode */ ++#define NVM_A8_OPCODE_SPI 0x08 /* opcode bit-3 = address bit-8 */ ++#define NVM_WREN_OPCODE_SPI 0x06 /* NVM set Write Enable latch */ ++#define NVM_RDSR_OPCODE_SPI 0x05 /* NVM read Status register */ ++ ++/* SPI NVM Status Register */ ++#define NVM_STATUS_RDY_SPI 0x01 ++ ++/* Word definitions for ID LED Settings */ ++#define ID_LED_RESERVED_0000 0x0000 ++#define ID_LED_RESERVED_FFFF 0xFFFF ++#define ID_LED_DEFAULT ((ID_LED_OFF1_ON2 << 12) | \ ++ (ID_LED_OFF1_OFF2 << 8) | \ ++ (ID_LED_DEF1_DEF2 << 4) | \ ++ (ID_LED_DEF1_DEF2)) ++#define ID_LED_DEF1_DEF2 0x1 ++#define ID_LED_DEF1_ON2 0x2 ++#define ID_LED_DEF1_OFF2 0x3 ++#define ID_LED_ON1_DEF2 0x4 ++#define ID_LED_ON1_ON2 0x5 ++#define ID_LED_ON1_OFF2 0x6 ++#define ID_LED_OFF1_DEF2 0x7 ++#define ID_LED_OFF1_ON2 0x8 ++#define ID_LED_OFF1_OFF2 0x9 ++ ++#define IGP_ACTIVITY_LED_MASK 0xFFFFF0FF ++#define IGP_ACTIVITY_LED_ENABLE 0x0300 ++#define IGP_LED3_MODE 0x07000000 ++ ++/* PCI/PCI-X/PCI-EX Config space */ ++#define PCIE_DEVICE_CONTROL2 0x28 ++#define PCIE_DEVICE_CONTROL2_16ms 0x0005 ++ ++#define PHY_REVISION_MASK 0xFFFFFFF0 ++#define MAX_PHY_REG_ADDRESS 0x1F /* 5 bit address bus (0-0x1F) */ ++#define MAX_PHY_MULTI_PAGE_REG 0xF ++ ++/* Bit definitions for valid PHY IDs. */ ++/* I = Integrated ++ * E = External ++ */ ++#define M88E1111_I_PHY_ID 0x01410CC0 ++#define M88E1112_E_PHY_ID 0x01410C90 ++#define I347AT4_E_PHY_ID 0x01410DC0 ++#define IGP03E1000_E_PHY_ID 0x02A80390 ++#define I82580_I_PHY_ID 0x015403A0 ++#define I350_I_PHY_ID 0x015403B0 ++#define M88_VENDOR 0x0141 ++#define I210_I_PHY_ID 0x01410C00 ++#define M88E1543_E_PHY_ID 0x01410EA0 ++ ++/* M88E1000 Specific Registers */ ++#define M88E1000_PHY_SPEC_CTRL 0x10 /* PHY Specific Control Register */ ++#define M88E1000_PHY_SPEC_STATUS 0x11 /* PHY Specific Status Register */ ++#define M88E1000_EXT_PHY_SPEC_CTRL 0x14 /* Extended PHY Specific Control */ ++ ++#define M88E1000_PHY_PAGE_SELECT 0x1D /* Reg 29 for page number setting */ ++#define M88E1000_PHY_GEN_CONTROL 0x1E /* Its meaning depends on reg 29 */ ++ ++/* M88E1000 PHY Specific Control Register */ ++#define M88E1000_PSCR_POLARITY_REVERSAL 0x0002 /* 1=Polarity Reversal enabled */ ++/* 1=CLK125 low, 0=CLK125 toggling */ ++#define M88E1000_PSCR_MDI_MANUAL_MODE 0x0000 /* MDI Crossover Mode bits 6:5 */ ++ /* Manual MDI configuration */ ++#define M88E1000_PSCR_MDIX_MANUAL_MODE 0x0020 /* Manual MDIX configuration */ ++/* 1000BASE-T: Auto crossover, 100BASE-TX/10BASE-T: MDI Mode */ ++#define M88E1000_PSCR_AUTO_X_1000T 0x0040 ++/* Auto crossover enabled all speeds */ ++#define M88E1000_PSCR_AUTO_X_MODE 0x0060 ++/* 1=Enable Extended 10BASE-T distance (Lower 10BASE-T Rx Threshold ++ * 0=Normal 10BASE-T Rx Threshold ++ */ ++/* 1=5-bit interface in 100BASE-TX, 0=MII interface in 100BASE-TX */ ++#define M88E1000_PSCR_ASSERT_CRS_ON_TX 0x0800 /* 1=Assert CRS on Transmit */ ++ ++/* M88E1000 PHY Specific Status Register */ ++#define M88E1000_PSSR_REV_POLARITY 0x0002 /* 1=Polarity reversed */ ++#define M88E1000_PSSR_DOWNSHIFT 0x0020 /* 1=Downshifted */ ++#define M88E1000_PSSR_MDIX 0x0040 /* 1=MDIX; 0=MDI */ ++/* 0 = <50M ++ * 1 = 50-80M ++ * 2 = 80-110M ++ * 3 = 110-140M ++ * 4 = >140M ++ */ ++#define M88E1000_PSSR_CABLE_LENGTH 0x0380 ++#define M88E1000_PSSR_SPEED 0xC000 /* Speed, bits 14:15 */ ++#define M88E1000_PSSR_1000MBS 0x8000 /* 10=1000Mbs */ ++ ++#define M88E1000_PSSR_CABLE_LENGTH_SHIFT 7 ++ ++/* M88E1000 Extended PHY Specific Control Register */ ++/* 1 = Lost lock detect enabled. ++ * Will assert lost lock and bring ++ * link down if idle not seen ++ * within 1ms in 1000BASE-T ++ */ ++/* Number of times we will attempt to autonegotiate before downshifting if we ++ * are the master ++ */ ++#define M88E1000_EPSCR_MASTER_DOWNSHIFT_MASK 0x0C00 ++#define M88E1000_EPSCR_MASTER_DOWNSHIFT_1X 0x0000 ++/* Number of times we will attempt to autonegotiate before downshifting if we ++ * are the slave ++ */ ++#define M88E1000_EPSCR_SLAVE_DOWNSHIFT_MASK 0x0300 ++#define M88E1000_EPSCR_SLAVE_DOWNSHIFT_1X 0x0100 ++#define M88E1000_EPSCR_TX_CLK_25 0x0070 /* 25 MHz TX_CLK */ ++ ++/* Intel i347-AT4 Registers */ ++ ++#define I347AT4_PCDL 0x10 /* PHY Cable Diagnostics Length */ ++#define I347AT4_PCDC 0x15 /* PHY Cable Diagnostics Control */ ++#define I347AT4_PAGE_SELECT 0x16 ++ ++/* i347-AT4 Extended PHY Specific Control Register */ ++ ++/* Number of times we will attempt to autonegotiate before downshifting if we ++ * are the master ++ */ ++#define I347AT4_PSCR_DOWNSHIFT_ENABLE 0x0800 ++#define I347AT4_PSCR_DOWNSHIFT_MASK 0x7000 ++#define I347AT4_PSCR_DOWNSHIFT_1X 0x0000 ++#define I347AT4_PSCR_DOWNSHIFT_2X 0x1000 ++#define I347AT4_PSCR_DOWNSHIFT_3X 0x2000 ++#define I347AT4_PSCR_DOWNSHIFT_4X 0x3000 ++#define I347AT4_PSCR_DOWNSHIFT_5X 0x4000 ++#define I347AT4_PSCR_DOWNSHIFT_6X 0x5000 ++#define I347AT4_PSCR_DOWNSHIFT_7X 0x6000 ++#define I347AT4_PSCR_DOWNSHIFT_8X 0x7000 ++ ++/* i347-AT4 PHY Cable Diagnostics Control */ ++#define I347AT4_PCDC_CABLE_LENGTH_UNIT 0x0400 /* 0=cm 1=meters */ ++ ++/* Marvell 1112 only registers */ ++#define M88E1112_VCT_DSP_DISTANCE 0x001A ++ ++/* M88EC018 Rev 2 specific DownShift settings */ ++#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_MASK 0x0E00 ++#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_5X 0x0800 ++ ++/* MDI Control */ ++#define E1000_MDIC_DATA_MASK 0x0000FFFF ++#define E1000_MDIC_REG_MASK 0x001F0000 ++#define E1000_MDIC_REG_SHIFT 16 ++#define E1000_MDIC_PHY_MASK 0x03E00000 ++#define E1000_MDIC_PHY_SHIFT 21 ++#define E1000_MDIC_OP_WRITE 0x04000000 ++#define E1000_MDIC_OP_READ 0x08000000 ++#define E1000_MDIC_READY 0x10000000 ++#define E1000_MDIC_INT_EN 0x20000000 ++#define E1000_MDIC_ERROR 0x40000000 ++#define E1000_MDIC_DEST 0x80000000 ++ ++/* Thermal Sensor */ ++#define E1000_THSTAT_PWR_DOWN 0x00000001 /* Power Down Event */ ++#define E1000_THSTAT_LINK_THROTTLE 0x00000002 /* Link Speed Throttle Event */ ++ ++/* Energy Efficient Ethernet */ ++#define E1000_IPCNFG_EEE_1G_AN 0x00000008 /* EEE Enable 1G AN */ ++#define E1000_IPCNFG_EEE_100M_AN 0x00000004 /* EEE Enable 100M AN */ ++#define E1000_EEER_TX_LPI_EN 0x00010000 /* EEE Tx LPI Enable */ ++#define E1000_EEER_RX_LPI_EN 0x00020000 /* EEE Rx LPI Enable */ ++#define E1000_EEER_FRC_AN 0x10000000 /* Enable EEE in loopback */ ++#define E1000_EEER_LPI_FC 0x00040000 /* EEE Enable on FC */ ++#define E1000_EEE_SU_LPI_CLK_STP 0X00800000 /* EEE LPI Clock Stop */ ++#define E1000_EEER_EEE_NEG 0x20000000 /* EEE capability nego */ ++#define E1000_EEE_LP_ADV_ADDR_I350 0x040F /* EEE LP Advertisement */ ++#define E1000_EEE_LP_ADV_DEV_I210 7 /* EEE LP Adv Device */ ++#define E1000_EEE_LP_ADV_ADDR_I210 61 /* EEE LP Adv Register */ ++#define E1000_MMDAC_FUNC_DATA 0x4000 /* Data, no post increment */ ++#define E1000_M88E1543_PAGE_ADDR 0x16 /* Page Offset Register */ ++#define E1000_M88E1543_EEE_CTRL_1 0x0 ++#define E1000_M88E1543_EEE_CTRL_1_MS 0x0001 /* EEE Master/Slave */ ++#define E1000_EEE_ADV_DEV_I354 7 ++#define E1000_EEE_ADV_ADDR_I354 60 ++#define E1000_EEE_ADV_100_SUPPORTED (1 << 1) /* 100BaseTx EEE Supported */ ++#define E1000_EEE_ADV_1000_SUPPORTED (1 << 2) /* 1000BaseT EEE Supported */ ++#define E1000_PCS_STATUS_DEV_I354 3 ++#define E1000_PCS_STATUS_ADDR_I354 1 ++#define E1000_PCS_STATUS_TX_LPI_IND 0x0200 /* Tx in LPI state */ ++#define E1000_PCS_STATUS_RX_LPI_RCVD 0x0400 ++#define E1000_PCS_STATUS_TX_LPI_RCVD 0x0800 ++ ++/* SerDes Control */ ++#define E1000_GEN_CTL_READY 0x80000000 ++#define E1000_GEN_CTL_ADDRESS_SHIFT 8 ++#define E1000_GEN_POLL_TIMEOUT 640 ++ ++#define E1000_VFTA_ENTRY_SHIFT 5 ++#define E1000_VFTA_ENTRY_MASK 0x7F ++#define E1000_VFTA_ENTRY_BIT_SHIFT_MASK 0x1F ++ ++/* DMA Coalescing register fields */ ++#define E1000_PCIEMISC_LX_DECISION 0x00000080 /* Lx power on DMA coal */ ++ ++/* Tx Rate-Scheduler Config fields */ ++#define E1000_RTTBCNRC_RS_ENA 0x80000000 ++#define E1000_RTTBCNRC_RF_DEC_MASK 0x00003FFF ++#define E1000_RTTBCNRC_RF_INT_SHIFT 14 ++#define E1000_RTTBCNRC_RF_INT_MASK \ ++ (E1000_RTTBCNRC_RF_DEC_MASK << E1000_RTTBCNRC_RF_INT_SHIFT) ++ ++#endif +--- linux/drivers/xenomai/net/drivers/igb/igb_main.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/igb/igb_main.c 2021-04-07 16:01:27.448633850 +0800 +@@ -0,0 +1,5676 @@ ++/* Intel(R) Gigabit Ethernet Linux driver ++ * Copyright(c) 2007-2015 Intel Corporation. ++ * RTnet port 2009 Vladimir Zapolskiy ++ * Copyright(c) 2015 Gilles Chanteperdrix ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms and conditions of the GNU General Public License, ++ * version 2, as published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ * ++ * You should have received a copy of the GNU General Public License along with ++ * this program; if not, see . ++ * ++ * The full GNU General Public License is included in this distribution in ++ * the file called "COPYING". ++ * ++ * Contact Information: ++ * e1000-devel Mailing List ++ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 ++ */ ++ ++#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "igb.h" ++ ++#include ++ ++// RTNET redefines ++#ifdef NETIF_F_TSO ++#undef NETIF_F_TSO ++#define NETIF_F_TSO 0 ++#endif ++ ++#ifdef NETIF_F_TSO6 ++#undef NETIF_F_TSO6 ++#define NETIF_F_TSO6 0 ++#endif ++ ++#ifdef NETIF_F_HW_VLAN_TX ++#undef NETIF_F_HW_VLAN_TX ++#define NETIF_F_HW_VLAN_TX 0 ++#endif ++ ++#ifdef NETIF_F_HW_VLAN_RX ++#undef NETIF_F_HW_VLAN_RX ++#define NETIF_F_HW_VLAN_RX 0 ++#endif ++ ++#ifdef NETIF_F_HW_VLAN_FILTER ++#undef NETIF_F_HW_VLAN_FILTER ++#define NETIF_F_HW_VLAN_FILTER 0 ++#endif ++ ++#ifdef IGB_MAX_TX_QUEUES ++#undef IGB_MAX_TX_QUEUES ++#define IGB_MAX_TX_QUEUES 1 ++#endif ++ ++#ifdef IGB_MAX_RX_QUEUES ++#undef IGB_MAX_RX_QUEUES ++#define IGB_MAX_RX_QUEUES 1 ++#endif ++ ++#ifdef CONFIG_IGB_NAPI ++#undef CONFIG_IGB_NAPI ++#endif ++ ++#ifdef IGB_HAVE_TX_TIMEOUT ++#undef IGB_HAVE_TX_TIMEOUT ++#endif ++ ++#ifdef ETHTOOL_GPERMADDR ++#undef ETHTOOL_GPERMADDR ++#endif ++ ++#ifdef CONFIG_PM ++#undef CONFIG_PM ++#endif ++ ++#ifdef CONFIG_NET_POLL_CONTROLLER ++#undef CONFIG_NET_POLL_CONTROLLER ++#endif ++ ++#ifdef MAX_SKB_FRAGS ++#undef MAX_SKB_FRAGS ++#define MAX_SKB_FRAGS 1 ++#endif ++ ++#ifdef IGB_FRAMES_SUPPORT ++#undef IGB_FRAMES_SUPPORT ++#endif ++ ++#define MAJ 5 ++#define MIN 2 ++#define BUILD 18 ++#define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \ ++__stringify(BUILD) "-k" ++char igb_driver_name[] = "rt_igb"; ++char igb_driver_version[] = DRV_VERSION; ++static const char igb_driver_string[] = ++ "Intel(R) Gigabit Ethernet Network Driver"; ++static const char igb_copyright[] = ++ "Copyright (c) 2007-2014 Intel Corporation."; ++ ++static const struct e1000_info *igb_info_tbl[] = { ++ [board_82575] = &e1000_82575_info, ++}; ++ ++#define MAX_UNITS 8 ++static int InterruptThrottle = 0; ++module_param(InterruptThrottle, uint, 0); ++MODULE_PARM_DESC(InterruptThrottle, "Throttle interrupts (boolean, false by default)"); ++ ++static const struct pci_device_id igb_pci_tbl[] = { ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_I354_BACKPLANE_1GBPS) }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_I354_SGMII) }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS) }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_I211_COPPER), board_82575 }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_COPPER), board_82575 }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_FIBER), board_82575 }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_SERDES), board_82575 }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_SGMII), board_82575 }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_COPPER_FLASHLESS), board_82575 }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_SERDES_FLASHLESS), board_82575 }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 }, ++ /* required last entry */ ++ {0, } ++}; ++ ++MODULE_DEVICE_TABLE(pci, igb_pci_tbl); ++ ++static int igb_setup_all_tx_resources(struct igb_adapter *); ++static int igb_setup_all_rx_resources(struct igb_adapter *); ++static void igb_free_all_tx_resources(struct igb_adapter *); ++static void igb_free_all_rx_resources(struct igb_adapter *); ++static void igb_setup_mrqc(struct igb_adapter *); ++static int igb_probe(struct pci_dev *, const struct pci_device_id *); ++static void igb_remove(struct pci_dev *pdev); ++static int igb_sw_init(struct igb_adapter *); ++static int igb_open(struct rtnet_device *); ++static int igb_close(struct rtnet_device *); ++static void igb_configure(struct igb_adapter *); ++static void igb_configure_tx(struct igb_adapter *); ++static void igb_configure_rx(struct igb_adapter *); ++static void igb_clean_all_tx_rings(struct igb_adapter *); ++static void igb_clean_all_rx_rings(struct igb_adapter *); ++static void igb_clean_tx_ring(struct igb_ring *); ++static void igb_clean_rx_ring(struct igb_ring *); ++static void igb_set_rx_mode(struct rtnet_device *); ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,14,0) ++static void igb_update_phy_info(struct timer_list *); ++static void igb_watchdog(struct timer_list *); ++#else ++static void igb_update_phy_info(unsigned long); ++static void igb_watchdog(unsigned long); ++#endif ++static void igb_watchdog_task(struct work_struct *); ++static netdev_tx_t igb_xmit_frame(struct rtskb *skb, struct rtnet_device *); ++static struct net_device_stats *igb_get_stats(struct rtnet_device *); ++static int igb_intr(rtdm_irq_t *irq_handle); ++static int igb_intr_msi(rtdm_irq_t *irq_handle); ++static void igb_nrtsig_watchdog(rtdm_nrtsig_t *sig, void *data); ++static irqreturn_t igb_msix_other(int irq, void *); ++static int igb_msix_ring(rtdm_irq_t *irq_handle); ++static void igb_poll(struct igb_q_vector *); ++static bool igb_clean_tx_irq(struct igb_q_vector *); ++static bool igb_clean_rx_irq(struct igb_q_vector *, int); ++static int igb_ioctl(struct rtnet_device *, struct ifreq *ifr, int cmd); ++static void igb_reset_task(struct work_struct *); ++static void igb_vlan_mode(struct rtnet_device *netdev, ++ netdev_features_t features); ++static int igb_vlan_rx_add_vid(struct rtnet_device *, __be16, u16); ++static void igb_restore_vlan(struct igb_adapter *); ++static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8); ++ ++#ifdef CONFIG_PM ++#ifdef CONFIG_PM_SLEEP ++static int igb_suspend(struct device *); ++#endif ++static int igb_resume(struct device *); ++static int igb_runtime_suspend(struct device *dev); ++static int igb_runtime_resume(struct device *dev); ++static int igb_runtime_idle(struct device *dev); ++static const struct dev_pm_ops igb_pm_ops = { ++ SET_SYSTEM_SLEEP_PM_OPS(igb_suspend, igb_resume) ++ SET_RUNTIME_PM_OPS(igb_runtime_suspend, igb_runtime_resume, ++ igb_runtime_idle) ++}; ++#endif ++static void igb_shutdown(struct pci_dev *); ++static int igb_pci_sriov_configure(struct pci_dev *dev, int num_vfs); ++#ifdef CONFIG_NET_POLL_CONTROLLER ++/* for netdump / net console */ ++static void igb_netpoll(struct rtnet_device *); ++#endif ++ ++static pci_ers_result_t igb_io_error_detected(struct pci_dev *, ++ pci_channel_state_t); ++static pci_ers_result_t igb_io_slot_reset(struct pci_dev *); ++static void igb_io_resume(struct pci_dev *); ++ ++static const struct pci_error_handlers igb_err_handler = { ++ .error_detected = igb_io_error_detected, ++ .slot_reset = igb_io_slot_reset, ++ .resume = igb_io_resume, ++}; ++ ++static void igb_init_dmac(struct igb_adapter *adapter, u32 pba); ++ ++static struct pci_driver igb_driver = { ++ .name = igb_driver_name, ++ .id_table = igb_pci_tbl, ++ .probe = igb_probe, ++ .remove = igb_remove, ++#ifdef CONFIG_PM ++ .driver.pm = &igb_pm_ops, ++#endif ++ .shutdown = igb_shutdown, ++ .sriov_configure = igb_pci_sriov_configure, ++ .err_handler = &igb_err_handler ++}; ++ ++MODULE_AUTHOR("Intel Corporation, "); ++MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver"); ++MODULE_LICENSE("GPL"); ++MODULE_VERSION(DRV_VERSION); ++ ++#define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV|NETIF_MSG_PROBE|NETIF_MSG_LINK) ++static int local_debug = -1; ++module_param_named(debug, local_debug, int, 0); ++MODULE_PARM_DESC(debug, "debug level (0=none,...,16=all)"); ++ ++struct igb_reg_info { ++ u32 ofs; ++ char *name; ++}; ++ ++static const struct igb_reg_info igb_reg_info_tbl[] = { ++ ++ /* General Registers */ ++ {E1000_CTRL, "CTRL"}, ++ {E1000_STATUS, "STATUS"}, ++ {E1000_CTRL_EXT, "CTRL_EXT"}, ++ ++ /* Interrupt Registers */ ++ {E1000_ICR, "ICR"}, ++ ++ /* RX Registers */ ++ {E1000_RCTL, "RCTL"}, ++ {E1000_RDLEN(0), "RDLEN"}, ++ {E1000_RDH(0), "RDH"}, ++ {E1000_RDT(0), "RDT"}, ++ {E1000_RXDCTL(0), "RXDCTL"}, ++ {E1000_RDBAL(0), "RDBAL"}, ++ {E1000_RDBAH(0), "RDBAH"}, ++ ++ /* TX Registers */ ++ {E1000_TCTL, "TCTL"}, ++ {E1000_TDBAL(0), "TDBAL"}, ++ {E1000_TDBAH(0), "TDBAH"}, ++ {E1000_TDLEN(0), "TDLEN"}, ++ {E1000_TDH(0), "TDH"}, ++ {E1000_TDT(0), "TDT"}, ++ {E1000_TXDCTL(0), "TXDCTL"}, ++ {E1000_TDFH, "TDFH"}, ++ {E1000_TDFT, "TDFT"}, ++ {E1000_TDFHS, "TDFHS"}, ++ {E1000_TDFPC, "TDFPC"}, ++ ++ /* List Terminator */ ++ {} ++}; ++ ++/* igb_regdump - register printout routine */ ++static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo) ++{ ++ int n = 0; ++ char rname[16]; ++ u32 regs[8]; ++ ++ switch (reginfo->ofs) { ++ case E1000_RDLEN(0): ++ for (n = 0; n < 4; n++) ++ regs[n] = rd32(E1000_RDLEN(n)); ++ break; ++ case E1000_RDH(0): ++ for (n = 0; n < 4; n++) ++ regs[n] = rd32(E1000_RDH(n)); ++ break; ++ case E1000_RDT(0): ++ for (n = 0; n < 4; n++) ++ regs[n] = rd32(E1000_RDT(n)); ++ break; ++ case E1000_RXDCTL(0): ++ for (n = 0; n < 4; n++) ++ regs[n] = rd32(E1000_RXDCTL(n)); ++ break; ++ case E1000_RDBAL(0): ++ for (n = 0; n < 4; n++) ++ regs[n] = rd32(E1000_RDBAL(n)); ++ break; ++ case E1000_RDBAH(0): ++ for (n = 0; n < 4; n++) ++ regs[n] = rd32(E1000_RDBAH(n)); ++ break; ++ case E1000_TDBAL(0): ++ for (n = 0; n < 4; n++) ++ regs[n] = rd32(E1000_RDBAL(n)); ++ break; ++ case E1000_TDBAH(0): ++ for (n = 0; n < 4; n++) ++ regs[n] = rd32(E1000_TDBAH(n)); ++ break; ++ case E1000_TDLEN(0): ++ for (n = 0; n < 4; n++) ++ regs[n] = rd32(E1000_TDLEN(n)); ++ break; ++ case E1000_TDH(0): ++ for (n = 0; n < 4; n++) ++ regs[n] = rd32(E1000_TDH(n)); ++ break; ++ case E1000_TDT(0): ++ for (n = 0; n < 4; n++) ++ regs[n] = rd32(E1000_TDT(n)); ++ break; ++ case E1000_TXDCTL(0): ++ for (n = 0; n < 4; n++) ++ regs[n] = rd32(E1000_TXDCTL(n)); ++ break; ++ default: ++ pr_info("%-15s %08x\n", reginfo->name, rd32(reginfo->ofs)); ++ return; ++ } ++ ++ snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]"); ++ pr_info("%-15s %08x %08x %08x %08x\n", rname, regs[0], regs[1], ++ regs[2], regs[3]); ++} ++ ++/* igb_dump - Print registers, Tx-rings and Rx-rings */ ++static void igb_dump(struct igb_adapter *adapter) ++{ ++ struct rtnet_device *netdev = adapter->netdev; ++ struct e1000_hw *hw = &adapter->hw; ++ struct igb_reg_info *reginfo; ++ struct igb_ring *tx_ring; ++ union e1000_adv_tx_desc *tx_desc; ++ struct my_u0 { u64 a; u64 b; } *u0; ++ struct igb_ring *rx_ring; ++ union e1000_adv_rx_desc *rx_desc; ++ u32 staterr; ++ u16 i, n; ++ ++ /* Print netdevice Info */ ++ if (netdev) { ++ dev_info(&adapter->pdev->dev, "Net device Info\n"); ++ pr_info("Device Name\n"); ++ pr_info("%s\n", netdev->name); ++ } ++ ++ /* Print Registers */ ++ dev_info(&adapter->pdev->dev, "Register Dump\n"); ++ pr_info(" Register Name Value\n"); ++ for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl; ++ reginfo->name; reginfo++) { ++ igb_regdump(hw, reginfo); ++ } ++ ++ /* Print TX Ring Summary */ ++ if (!netdev || !rtnetif_running(netdev)) ++ goto exit; ++ ++ dev_info(&adapter->pdev->dev, "TX Rings Summary\n"); ++ pr_info("Queue [NTU] [NTC] [bi(ntc)->dma ] leng ntw timestamp\n"); ++ for (n = 0; n < adapter->num_tx_queues; n++) { ++ struct igb_tx_buffer *buffer_info; ++ tx_ring = adapter->tx_ring[n]; ++ buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_clean]; ++ pr_info(" %5d %5X %5X %p %016llX\n", ++ n, tx_ring->next_to_use, tx_ring->next_to_clean, ++ buffer_info->next_to_watch, ++ (u64)buffer_info->time_stamp); ++ } ++ ++ dev_info(&adapter->pdev->dev, "TX Rings Dump\n"); ++ ++ /* Transmit Descriptor Formats ++ * ++ * Advanced Transmit Descriptor ++ * +--------------------------------------------------------------+ ++ * 0 | Buffer Address [63:0] | ++ * +--------------------------------------------------------------+ ++ * 8 | PAYLEN | PORTS |CC|IDX | STA | DCMD |DTYP|MAC|RSV| DTALEN | ++ * +--------------------------------------------------------------+ ++ * 63 46 45 40 39 38 36 35 32 31 24 15 0 ++ */ ++ ++ for (n = 0; n < adapter->num_tx_queues; n++) { ++ tx_ring = adapter->tx_ring[n]; ++ pr_info("------------------------------------\n"); ++ pr_info("TX QUEUE INDEX = %d\n", tx_ring->queue_index); ++ pr_info("------------------------------------\n"); ++ pr_info("T [desc] [address 63:0 ] [PlPOCIStDDM Ln] " ++ "[bi->dma ] leng ntw timestamp " ++ "bi->skb\n"); ++ ++ for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) { ++ const char *next_desc; ++ struct igb_tx_buffer *buffer_info; ++ tx_desc = IGB_TX_DESC(tx_ring, i); ++ buffer_info = &tx_ring->tx_buffer_info[i]; ++ u0 = (struct my_u0 *)tx_desc; ++ if (i == tx_ring->next_to_use && ++ i == tx_ring->next_to_clean) ++ next_desc = " NTC/U"; ++ else if (i == tx_ring->next_to_use) ++ next_desc = " NTU"; ++ else if (i == tx_ring->next_to_clean) ++ next_desc = " NTC"; ++ else ++ next_desc = ""; ++ ++ pr_info("T [0x%03X] %016llX %016llX" ++ " %p %016llX %p%s\n", i, ++ le64_to_cpu(u0->a), ++ le64_to_cpu(u0->b), ++ buffer_info->next_to_watch, ++ (u64)buffer_info->time_stamp, ++ buffer_info->skb, next_desc); ++ ++ if (buffer_info->skb) ++ print_hex_dump(KERN_INFO, "", ++ DUMP_PREFIX_ADDRESS, ++ 16, 1, buffer_info->skb->data, ++ 14, ++ true); ++ } ++ } ++ ++ /* Print RX Rings Summary */ ++ dev_info(&adapter->pdev->dev, "RX Rings Summary\n"); ++ pr_info("Queue [NTU] [NTC]\n"); ++ for (n = 0; n < adapter->num_rx_queues; n++) { ++ rx_ring = adapter->rx_ring[n]; ++ pr_info(" %5d %5X %5X\n", ++ n, rx_ring->next_to_use, rx_ring->next_to_clean); ++ } ++ ++ /* Print RX Rings */ ++ dev_info(&adapter->pdev->dev, "RX Rings Dump\n"); ++ ++ /* Advanced Receive Descriptor (Read) Format ++ * 63 1 0 ++ * +-----------------------------------------------------+ ++ * 0 | Packet Buffer Address [63:1] |A0/NSE| ++ * +----------------------------------------------+------+ ++ * 8 | Header Buffer Address [63:1] | DD | ++ * +-----------------------------------------------------+ ++ * ++ * ++ * Advanced Receive Descriptor (Write-Back) Format ++ * ++ * 63 48 47 32 31 30 21 20 17 16 4 3 0 ++ * +------------------------------------------------------+ ++ * 0 | Packet IP |SPH| HDR_LEN | RSV|Packet| RSS | ++ * | Checksum Ident | | | | Type | Type | ++ * +------------------------------------------------------+ ++ * 8 | VLAN Tag | Length | Extended Error | Extended Status | ++ * +------------------------------------------------------+ ++ * 63 48 47 32 31 20 19 0 ++ */ ++ ++ for (n = 0; n < adapter->num_rx_queues; n++) { ++ rx_ring = adapter->rx_ring[n]; ++ pr_info("------------------------------------\n"); ++ pr_info("RX QUEUE INDEX = %d\n", rx_ring->queue_index); ++ pr_info("------------------------------------\n"); ++ pr_info("R [desc] [ PktBuf A0] [ HeadBuf DD] " ++ "[bi->dma ] [bi->skb] <-- Adv Rx Read format\n"); ++ pr_info("RWB[desc] [PcsmIpSHl PtRs] [vl er S cks ln] -----" ++ "----------- [bi->skb] <-- Adv Rx Write-Back format\n"); ++ ++ for (i = 0; i < rx_ring->count; i++) { ++ const char *next_desc; ++ struct igb_rx_buffer *buffer_info; ++ buffer_info = &rx_ring->rx_buffer_info[i]; ++ rx_desc = IGB_RX_DESC(rx_ring, i); ++ u0 = (struct my_u0 *)rx_desc; ++ staterr = le32_to_cpu(rx_desc->wb.upper.status_error); ++ ++ if (i == rx_ring->next_to_use) ++ next_desc = " NTU"; ++ else if (i == rx_ring->next_to_clean) ++ next_desc = " NTC"; ++ else ++ next_desc = ""; ++ ++ if (staterr & E1000_RXD_STAT_DD) { ++ /* Descriptor Done */ ++ pr_info("%s[0x%03X] %016llX %016llX ---------------- %s\n", ++ "RWB", i, ++ le64_to_cpu(u0->a), ++ le64_to_cpu(u0->b), ++ next_desc); ++ } else { ++ pr_info("%s[0x%03X] %016llX %016llX %016llX %s\n", ++ "R ", i, ++ le64_to_cpu(u0->a), ++ le64_to_cpu(u0->b), ++ (u64)buffer_info->dma, ++ next_desc); ++ ++ } ++ } ++ } ++ ++exit: ++ return; ++} ++ ++/** ++ * igb_get_hw_dev - return device ++ * @hw: pointer to hardware structure ++ * ++ * used by hardware layer to print debugging information ++ **/ ++struct rtnet_device *igb_get_hw_dev(struct e1000_hw *hw) ++{ ++ struct igb_adapter *adapter = hw->back; ++ return adapter->netdev; ++} ++ ++/** ++ * igb_init_module - Driver Registration Routine ++ * ++ * igb_init_module is the first routine called when the driver is ++ * loaded. All it does is register with the PCI subsystem. ++ **/ ++static int __init igb_init_module(void) ++{ ++ int ret; ++ ++ pr_info("%s - version %s\n", ++ igb_driver_string, igb_driver_version); ++ pr_info("%s\n", igb_copyright); ++ ++ ret = pci_register_driver(&igb_driver); ++ return ret; ++} ++ ++module_init(igb_init_module); ++ ++/** ++ * igb_exit_module - Driver Exit Cleanup Routine ++ * ++ * igb_exit_module is called just before the driver is removed ++ * from memory. ++ **/ ++static void __exit igb_exit_module(void) ++{ ++ pci_unregister_driver(&igb_driver); ++} ++ ++module_exit(igb_exit_module); ++ ++#define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1)) ++/** ++ * igb_cache_ring_register - Descriptor ring to register mapping ++ * @adapter: board private structure to initialize ++ * ++ * Once we know the feature-set enabled for the device, we'll cache ++ * the register offset the descriptor ring is assigned to. ++ **/ ++static void igb_cache_ring_register(struct igb_adapter *adapter) ++{ ++ int i = 0, j = 0; ++ u32 rbase_offset = 0; ++ ++ switch (adapter->hw.mac.type) { ++ case e1000_82576: ++ /* The queues are allocated for virtualization such that VF 0 ++ * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc. ++ * In order to avoid collision we start at the first free queue ++ * and continue consuming queues in the same sequence ++ */ ++ /* Fall through */ ++ case e1000_82575: ++ case e1000_82580: ++ case e1000_i350: ++ case e1000_i354: ++ case e1000_i210: ++ case e1000_i211: ++ /* Fall through */ ++ default: ++ for (; i < adapter->num_rx_queues; i++) ++ adapter->rx_ring[i]->reg_idx = rbase_offset + i; ++ for (; j < adapter->num_tx_queues; j++) ++ adapter->tx_ring[j]->reg_idx = rbase_offset + j; ++ break; ++ } ++} ++ ++u32 igb_rd32(struct e1000_hw *hw, u32 reg) ++{ ++ struct igb_adapter *igb = container_of(hw, struct igb_adapter, hw); ++ u8 __iomem *hw_addr = READ_ONCE(hw->hw_addr); ++ u32 value = 0; ++ ++ if (E1000_REMOVED(hw_addr)) ++ return ~value; ++ ++ value = readl(&hw_addr[reg]); ++ ++ /* reads should not return all F's */ ++ if (!(~value) && (!reg || !(~readl(hw_addr)))) { ++ struct rtnet_device *netdev = igb->netdev; ++ hw->hw_addr = NULL; ++ rtnetif_device_detach(netdev); ++ rtdev_err(netdev, "PCIe link lost, device now detached\n"); ++ } ++ ++ return value; ++} ++ ++/** ++ * igb_write_ivar - configure ivar for given MSI-X vector ++ * @hw: pointer to the HW structure ++ * @msix_vector: vector number we are allocating to a given ring ++ * @index: row index of IVAR register to write within IVAR table ++ * @offset: column offset of in IVAR, should be multiple of 8 ++ * ++ * This function is intended to handle the writing of the IVAR register ++ * for adapters 82576 and newer. The IVAR table consists of 2 columns, ++ * each containing an cause allocation for an Rx and Tx ring, and a ++ * variable number of rows depending on the number of queues supported. ++ **/ ++static void igb_write_ivar(struct e1000_hw *hw, int msix_vector, ++ int index, int offset) ++{ ++ u32 ivar = array_rd32(E1000_IVAR0, index); ++ ++ /* clear any bits that are currently set */ ++ ivar &= ~((u32)0xFF << offset); ++ ++ /* write vector and valid bit */ ++ ivar |= (msix_vector | E1000_IVAR_VALID) << offset; ++ ++ array_wr32(E1000_IVAR0, index, ivar); ++} ++ ++#define IGB_N0_QUEUE -1 ++static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector) ++{ ++ struct igb_adapter *adapter = q_vector->adapter; ++ struct e1000_hw *hw = &adapter->hw; ++ int rx_queue = IGB_N0_QUEUE; ++ int tx_queue = IGB_N0_QUEUE; ++ u32 msixbm = 0; ++ ++ if (q_vector->rx.ring) ++ rx_queue = q_vector->rx.ring->reg_idx; ++ if (q_vector->tx.ring) ++ tx_queue = q_vector->tx.ring->reg_idx; ++ ++ switch (hw->mac.type) { ++ case e1000_82575: ++ /* The 82575 assigns vectors using a bitmask, which matches the ++ * bitmask for the EICR/EIMS/EIMC registers. To assign one ++ * or more queues to a vector, we write the appropriate bits ++ * into the MSIXBM register for that vector. ++ */ ++ if (rx_queue > IGB_N0_QUEUE) ++ msixbm = E1000_EICR_RX_QUEUE0 << rx_queue; ++ if (tx_queue > IGB_N0_QUEUE) ++ msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue; ++ if (!(adapter->flags & IGB_FLAG_HAS_MSIX) && msix_vector == 0) ++ msixbm |= E1000_EIMS_OTHER; ++ array_wr32(E1000_MSIXBM(0), msix_vector, msixbm); ++ q_vector->eims_value = msixbm; ++ break; ++ case e1000_82576: ++ /* 82576 uses a table that essentially consists of 2 columns ++ * with 8 rows. The ordering is column-major so we use the ++ * lower 3 bits as the row index, and the 4th bit as the ++ * column offset. ++ */ ++ if (rx_queue > IGB_N0_QUEUE) ++ igb_write_ivar(hw, msix_vector, ++ rx_queue & 0x7, ++ (rx_queue & 0x8) << 1); ++ if (tx_queue > IGB_N0_QUEUE) ++ igb_write_ivar(hw, msix_vector, ++ tx_queue & 0x7, ++ ((tx_queue & 0x8) << 1) + 8); ++ q_vector->eims_value = 1 << msix_vector; ++ break; ++ case e1000_82580: ++ case e1000_i350: ++ case e1000_i354: ++ case e1000_i210: ++ case e1000_i211: ++ /* On 82580 and newer adapters the scheme is similar to 82576 ++ * however instead of ordering column-major we have things ++ * ordered row-major. So we traverse the table by using ++ * bit 0 as the column offset, and the remaining bits as the ++ * row index. ++ */ ++ if (rx_queue > IGB_N0_QUEUE) ++ igb_write_ivar(hw, msix_vector, ++ rx_queue >> 1, ++ (rx_queue & 0x1) << 4); ++ if (tx_queue > IGB_N0_QUEUE) ++ igb_write_ivar(hw, msix_vector, ++ tx_queue >> 1, ++ ((tx_queue & 0x1) << 4) + 8); ++ q_vector->eims_value = 1 << msix_vector; ++ break; ++ default: ++ BUG(); ++ break; ++ } ++ ++ /* add q_vector eims value to global eims_enable_mask */ ++ adapter->eims_enable_mask |= q_vector->eims_value; ++ ++ /* configure q_vector to set itr on first interrupt */ ++ q_vector->set_itr = 1; ++} ++ ++/** ++ * igb_configure_msix - Configure MSI-X hardware ++ * @adapter: board private structure to initialize ++ * ++ * igb_configure_msix sets up the hardware to properly ++ * generate MSI-X interrupts. ++ **/ ++static void igb_configure_msix(struct igb_adapter *adapter) ++{ ++ u32 tmp; ++ int i, vector = 0; ++ struct e1000_hw *hw = &adapter->hw; ++ ++ adapter->eims_enable_mask = 0; ++ ++ /* set vector for other causes, i.e. link changes */ ++ switch (hw->mac.type) { ++ case e1000_82575: ++ tmp = rd32(E1000_CTRL_EXT); ++ /* enable MSI-X PBA support*/ ++ tmp |= E1000_CTRL_EXT_PBA_CLR; ++ ++ /* Auto-Mask interrupts upon ICR read. */ ++ tmp |= E1000_CTRL_EXT_EIAME; ++ tmp |= E1000_CTRL_EXT_IRCA; ++ ++ wr32(E1000_CTRL_EXT, tmp); ++ ++ /* enable msix_other interrupt */ ++ array_wr32(E1000_MSIXBM(0), vector++, E1000_EIMS_OTHER); ++ adapter->eims_other = E1000_EIMS_OTHER; ++ ++ break; ++ ++ case e1000_82576: ++ case e1000_82580: ++ case e1000_i350: ++ case e1000_i354: ++ case e1000_i210: ++ case e1000_i211: ++ /* Turn on MSI-X capability first, or our settings ++ * won't stick. And it will take days to debug. ++ */ ++ wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE | ++ E1000_GPIE_PBA | E1000_GPIE_EIAME | ++ E1000_GPIE_NSICR); ++ ++ /* enable msix_other interrupt */ ++ adapter->eims_other = 1 << vector; ++ tmp = (vector++ | E1000_IVAR_VALID) << 8; ++ ++ wr32(E1000_IVAR_MISC, tmp); ++ break; ++ default: ++ /* do nothing, since nothing else supports MSI-X */ ++ break; ++ } /* switch (hw->mac.type) */ ++ ++ adapter->eims_enable_mask |= adapter->eims_other; ++ ++ for (i = 0; i < adapter->num_q_vectors; i++) ++ igb_assign_vector(adapter->q_vector[i], vector++); ++ ++ wrfl(); ++} ++ ++/** ++ * igb_request_msix - Initialize MSI-X interrupts ++ * @adapter: board private structure to initialize ++ * ++ * igb_request_msix allocates MSI-X vectors and requests interrupts from the ++ * kernel. ++ **/ ++static int igb_request_msix(struct igb_adapter *adapter) ++{ ++ struct rtnet_device *netdev = adapter->netdev; ++ struct e1000_hw *hw = &adapter->hw; ++ int i, err = 0, vector = 0, free_vector = 0; ++ ++ err = request_irq(adapter->msix_entries[vector].vector, ++ igb_msix_other, 0, netdev->name, adapter); ++ if (err) ++ goto err_out; ++ ++ for (i = 0; i < adapter->num_q_vectors; i++) { ++ struct igb_q_vector *q_vector = adapter->q_vector[i]; ++ ++ vector++; ++ ++ q_vector->itr_register = hw->hw_addr + E1000_EITR(vector); ++ ++ if (q_vector->rx.ring && q_vector->tx.ring) ++ sprintf(q_vector->name, "%s-TxRx-%u", netdev->name, ++ q_vector->rx.ring->queue_index); ++ else if (q_vector->tx.ring) ++ sprintf(q_vector->name, "%s-tx-%u", netdev->name, ++ q_vector->tx.ring->queue_index); ++ else if (q_vector->rx.ring) ++ sprintf(q_vector->name, "%s-rx-%u", netdev->name, ++ q_vector->rx.ring->queue_index); ++ else ++ sprintf(q_vector->name, "%s-unused", netdev->name); ++ ++ err = rtdm_irq_request(&adapter->msix_irq_handle[vector], ++ adapter->msix_entries[vector].vector, ++ igb_msix_ring, 0, q_vector->name, q_vector); ++ if (err) ++ goto err_free; ++ } ++ ++ igb_configure_msix(adapter); ++ return 0; ++ ++err_free: ++ /* free already assigned IRQs */ ++ free_irq(adapter->msix_entries[free_vector++].vector, adapter); ++ ++ vector--; ++ for (i = 0; i < vector; i++) ++ rtdm_irq_free(&adapter->msix_irq_handle[free_vector++]); ++err_out: ++ return err; ++} ++ ++/** ++ * igb_free_q_vector - Free memory allocated for specific interrupt vector ++ * @adapter: board private structure to initialize ++ * @v_idx: Index of vector to be freed ++ * ++ * This function frees the memory allocated to the q_vector. ++ **/ ++static void igb_free_q_vector(struct igb_adapter *adapter, int v_idx) ++{ ++ struct igb_q_vector *q_vector = adapter->q_vector[v_idx]; ++ ++ adapter->q_vector[v_idx] = NULL; ++ ++ /* igb_get_stats64() might access the rings on this vector, ++ * we must wait a grace period before freeing it. ++ */ ++ if (q_vector) ++ kfree_rcu(q_vector, rcu); ++} ++ ++/** ++ * igb_reset_q_vector - Reset config for interrupt vector ++ * @adapter: board private structure to initialize ++ * @v_idx: Index of vector to be reset ++ * ++ * If NAPI is enabled it will delete any references to the ++ * NAPI struct. This is preparation for igb_free_q_vector. ++ **/ ++static void igb_reset_q_vector(struct igb_adapter *adapter, int v_idx) ++{ ++ struct igb_q_vector *q_vector = adapter->q_vector[v_idx]; ++ ++ /* Coming from igb_set_interrupt_capability, the vectors are not yet ++ * allocated. So, q_vector is NULL so we should stop here. ++ */ ++ if (!q_vector) ++ return; ++ ++ if (q_vector->tx.ring) ++ adapter->tx_ring[q_vector->tx.ring->queue_index] = NULL; ++ ++ if (q_vector->rx.ring) ++ adapter->rx_ring[q_vector->rx.ring->queue_index] = NULL; ++} ++ ++static void igb_reset_interrupt_capability(struct igb_adapter *adapter) ++{ ++ int v_idx = adapter->num_q_vectors; ++ ++ if (adapter->flags & IGB_FLAG_HAS_MSIX) ++ pci_disable_msix(adapter->pdev); ++ else if (adapter->flags & IGB_FLAG_HAS_MSI) ++ pci_disable_msi(adapter->pdev); ++ ++ while (v_idx--) ++ igb_reset_q_vector(adapter, v_idx); ++} ++ ++/** ++ * igb_free_q_vectors - Free memory allocated for interrupt vectors ++ * @adapter: board private structure to initialize ++ * ++ * This function frees the memory allocated to the q_vectors. In addition if ++ * NAPI is enabled it will delete any references to the NAPI struct prior ++ * to freeing the q_vector. ++ **/ ++static void igb_free_q_vectors(struct igb_adapter *adapter) ++{ ++ int v_idx = adapter->num_q_vectors; ++ ++ adapter->num_tx_queues = 0; ++ adapter->num_rx_queues = 0; ++ adapter->num_q_vectors = 0; ++ ++ while (v_idx--) { ++ igb_reset_q_vector(adapter, v_idx); ++ igb_free_q_vector(adapter, v_idx); ++ } ++} ++ ++/** ++ * igb_clear_interrupt_scheme - reset the device to a state of no interrupts ++ * @adapter: board private structure to initialize ++ * ++ * This function resets the device so that it has 0 Rx queues, Tx queues, and ++ * MSI-X interrupts allocated. ++ */ ++static void igb_clear_interrupt_scheme(struct igb_adapter *adapter) ++{ ++ igb_free_q_vectors(adapter); ++ igb_reset_interrupt_capability(adapter); ++} ++ ++/** ++ * igb_set_interrupt_capability - set MSI or MSI-X if supported ++ * @adapter: board private structure to initialize ++ * @msix: boolean value of MSIX capability ++ * ++ * Attempt to configure interrupts using the best available ++ * capabilities of the hardware and kernel. ++ **/ ++static void igb_set_interrupt_capability(struct igb_adapter *adapter, bool msix) ++{ ++ int err; ++ int numvecs, i; ++ ++ if (!msix) ++ goto msi_only; ++ adapter->flags |= IGB_FLAG_HAS_MSIX; ++ ++ /* Number of supported queues. */ ++ adapter->num_rx_queues = adapter->rss_queues; ++ adapter->num_tx_queues = adapter->rss_queues; ++ ++ /* start with one vector for every Rx queue */ ++ numvecs = adapter->num_rx_queues; ++ ++ /* if Tx handler is separate add 1 for every Tx queue */ ++ if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS)) ++ numvecs += adapter->num_tx_queues; ++ ++ /* store the number of vectors reserved for queues */ ++ adapter->num_q_vectors = numvecs; ++ ++ /* add 1 vector for link status interrupts */ ++ numvecs++; ++ for (i = 0; i < numvecs; i++) ++ adapter->msix_entries[i].entry = i; ++ ++ err = pci_enable_msix_range(adapter->pdev, ++ adapter->msix_entries, ++ numvecs, ++ numvecs); ++ if (err > 0) ++ return; ++ ++ igb_reset_interrupt_capability(adapter); ++ ++ /* If we can't do MSI-X, try MSI */ ++msi_only: ++ adapter->flags &= ~IGB_FLAG_HAS_MSIX; ++ adapter->rss_queues = 1; ++ adapter->flags |= IGB_FLAG_QUEUE_PAIRS; ++ adapter->num_rx_queues = 1; ++ adapter->num_tx_queues = 1; ++ adapter->num_q_vectors = 1; ++ if (!pci_enable_msi(adapter->pdev)) ++ adapter->flags |= IGB_FLAG_HAS_MSI; ++} ++ ++static void igb_add_ring(struct igb_ring *ring, ++ struct igb_ring_container *head) ++{ ++ head->ring = ring; ++ head->count++; ++} ++ ++/** ++ * igb_alloc_q_vector - Allocate memory for a single interrupt vector ++ * @adapter: board private structure to initialize ++ * @v_count: q_vectors allocated on adapter, used for ring interleaving ++ * @v_idx: index of vector in adapter struct ++ * @txr_count: total number of Tx rings to allocate ++ * @txr_idx: index of first Tx ring to allocate ++ * @rxr_count: total number of Rx rings to allocate ++ * @rxr_idx: index of first Rx ring to allocate ++ * ++ * We allocate one q_vector. If allocation fails we return -ENOMEM. ++ **/ ++static int igb_alloc_q_vector(struct igb_adapter *adapter, ++ int v_count, int v_idx, ++ int txr_count, int txr_idx, ++ int rxr_count, int rxr_idx) ++{ ++ struct igb_q_vector *q_vector; ++ struct igb_ring *ring; ++ int ring_count, size; ++ ++ /* igb only supports 1 Tx and/or 1 Rx queue per vector */ ++ if (txr_count > 1 || rxr_count > 1) ++ return -ENOMEM; ++ ++ ring_count = txr_count + rxr_count; ++ size = sizeof(struct igb_q_vector) + ++ (sizeof(struct igb_ring) * ring_count); ++ ++ /* allocate q_vector and rings */ ++ q_vector = adapter->q_vector[v_idx]; ++ if (!q_vector) ++ q_vector = kzalloc(size, GFP_KERNEL); ++ else ++ memset(q_vector, 0, size); ++ if (!q_vector) ++ return -ENOMEM; ++ ++ /* tie q_vector and adapter together */ ++ adapter->q_vector[v_idx] = q_vector; ++ q_vector->adapter = adapter; ++ ++ /* initialize work limits */ ++ q_vector->tx.work_limit = adapter->tx_work_limit; ++ ++ /* initialize ITR configuration */ ++ q_vector->itr_register = adapter->hw.hw_addr + E1000_EITR(0); ++ q_vector->itr_val = IGB_START_ITR; ++ ++ /* initialize pointer to rings */ ++ ring = q_vector->ring; ++ ++ /* intialize ITR */ ++ if (rxr_count) { ++ /* rx or rx/tx vector */ ++ if (!adapter->rx_itr_setting || adapter->rx_itr_setting > 3) ++ q_vector->itr_val = adapter->rx_itr_setting; ++ } else { ++ /* tx only vector */ ++ if (!adapter->tx_itr_setting || adapter->tx_itr_setting > 3) ++ q_vector->itr_val = adapter->tx_itr_setting; ++ } ++ ++ if (txr_count) { ++ /* assign generic ring traits */ ++ ring->dev = &adapter->pdev->dev; ++ ring->netdev = adapter->netdev; ++ ++ /* configure backlink on ring */ ++ ring->q_vector = q_vector; ++ ++ /* update q_vector Tx values */ ++ igb_add_ring(ring, &q_vector->tx); ++ ++ /* For 82575, context index must be unique per ring. */ ++ if (adapter->hw.mac.type == e1000_82575) ++ set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags); ++ ++ /* apply Tx specific ring traits */ ++ ring->count = adapter->tx_ring_count; ++ ring->queue_index = txr_idx; ++ ++ /* assign ring to adapter */ ++ adapter->tx_ring[txr_idx] = ring; ++ ++ /* push pointer to next ring */ ++ ring++; ++ } ++ ++ if (rxr_count) { ++ /* assign generic ring traits */ ++ ring->dev = &adapter->pdev->dev; ++ ring->netdev = adapter->netdev; ++ ++ /* configure backlink on ring */ ++ ring->q_vector = q_vector; ++ ++ /* update q_vector Rx values */ ++ igb_add_ring(ring, &q_vector->rx); ++ ++ /* set flag indicating ring supports SCTP checksum offload */ ++ if (adapter->hw.mac.type >= e1000_82576) ++ set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags); ++ ++ /* On i350, i354, i210, and i211, loopback VLAN packets ++ * have the tag byte-swapped. ++ */ ++ if (adapter->hw.mac.type >= e1000_i350) ++ set_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags); ++ ++ /* apply Rx specific ring traits */ ++ ring->count = adapter->rx_ring_count; ++ ring->queue_index = rxr_idx; ++ ++ /* assign ring to adapter */ ++ adapter->rx_ring[rxr_idx] = ring; ++ } ++ ++ return 0; ++} ++ ++ ++/** ++ * igb_alloc_q_vectors - Allocate memory for interrupt vectors ++ * @adapter: board private structure to initialize ++ * ++ * We allocate one q_vector per queue interrupt. If allocation fails we ++ * return -ENOMEM. ++ **/ ++static int igb_alloc_q_vectors(struct igb_adapter *adapter) ++{ ++ int q_vectors = adapter->num_q_vectors; ++ int rxr_remaining = adapter->num_rx_queues; ++ int txr_remaining = adapter->num_tx_queues; ++ int rxr_idx = 0, txr_idx = 0, v_idx = 0; ++ int err; ++ ++ if (q_vectors >= (rxr_remaining + txr_remaining)) { ++ for (; rxr_remaining; v_idx++) { ++ err = igb_alloc_q_vector(adapter, q_vectors, v_idx, ++ 0, 0, 1, rxr_idx); ++ ++ if (err) ++ goto err_out; ++ ++ /* update counts and index */ ++ rxr_remaining--; ++ rxr_idx++; ++ } ++ } ++ ++ for (; v_idx < q_vectors; v_idx++) { ++ int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - v_idx); ++ int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - v_idx); ++ ++ err = igb_alloc_q_vector(adapter, q_vectors, v_idx, ++ tqpv, txr_idx, rqpv, rxr_idx); ++ ++ if (err) ++ goto err_out; ++ ++ /* update counts and index */ ++ rxr_remaining -= rqpv; ++ txr_remaining -= tqpv; ++ rxr_idx++; ++ txr_idx++; ++ } ++ ++ return 0; ++ ++err_out: ++ adapter->num_tx_queues = 0; ++ adapter->num_rx_queues = 0; ++ adapter->num_q_vectors = 0; ++ ++ while (v_idx--) ++ igb_free_q_vector(adapter, v_idx); ++ ++ return -ENOMEM; ++} ++ ++/** ++ * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors ++ * @adapter: board private structure to initialize ++ * @msix: boolean value of MSIX capability ++ * ++ * This function initializes the interrupts and allocates all of the queues. ++ **/ ++static int igb_init_interrupt_scheme(struct igb_adapter *adapter, bool msix) ++{ ++ struct pci_dev *pdev = adapter->pdev; ++ int err; ++ ++ igb_set_interrupt_capability(adapter, msix); ++ ++ err = igb_alloc_q_vectors(adapter); ++ if (err) { ++ dev_err(&pdev->dev, "Unable to allocate memory for vectors\n"); ++ goto err_alloc_q_vectors; ++ } ++ ++ igb_cache_ring_register(adapter); ++ ++ return 0; ++ ++err_alloc_q_vectors: ++ igb_reset_interrupt_capability(adapter); ++ return err; ++} ++ ++/** ++ * igb_request_irq - initialize interrupts ++ * @adapter: board private structure to initialize ++ * ++ * Attempts to configure interrupts using the best available ++ * capabilities of the hardware and kernel. ++ **/ ++static int igb_request_irq(struct igb_adapter *adapter) ++{ ++ struct rtnet_device *netdev = adapter->netdev; ++ struct pci_dev *pdev = adapter->pdev; ++ int err = 0; ++ ++ rt_stack_connect(netdev, &STACK_manager); ++ ++ if (adapter->flags & IGB_FLAG_HAS_MSIX) { ++ err = igb_request_msix(adapter); ++ if (!err) ++ goto request_done; ++ /* fall back to MSI */ ++ igb_free_all_tx_resources(adapter); ++ igb_free_all_rx_resources(adapter); ++ ++ igb_clear_interrupt_scheme(adapter); ++ err = igb_init_interrupt_scheme(adapter, false); ++ if (err) ++ goto request_done; ++ ++ igb_setup_all_tx_resources(adapter); ++ igb_setup_all_rx_resources(adapter); ++ igb_configure(adapter); ++ } ++ ++ igb_assign_vector(adapter->q_vector[0], 0); ++ ++ if (adapter->flags & IGB_FLAG_HAS_MSI) { ++ err = rtdm_irq_request(&adapter->irq_handle, ++ pdev->irq, igb_intr_msi, 0, ++ netdev->name, adapter); ++ if (!err) ++ goto request_done; ++ ++ /* fall back to legacy interrupts */ ++ igb_reset_interrupt_capability(adapter); ++ adapter->flags &= ~IGB_FLAG_HAS_MSI; ++ } ++ ++ err = rtdm_irq_request(&adapter->irq_handle, ++ pdev->irq, igb_intr, IRQF_SHARED, ++ netdev->name, adapter); ++ ++ if (err) ++ dev_err(&pdev->dev, "Error %d getting interrupt\n", ++ err); ++ ++request_done: ++ return err; ++} ++ ++static void igb_free_irq(struct igb_adapter *adapter) ++{ ++ if (adapter->flags & IGB_FLAG_HAS_MSIX) { ++ int vector = 0, i; ++ ++ free_irq(adapter->msix_entries[vector++].vector, adapter); ++ ++ for (i = 0; i < adapter->num_q_vectors; i++) ++ rtdm_irq_free(&adapter->msix_irq_handle[vector++]); ++ } else { ++ rtdm_irq_free(&adapter->irq_handle); ++ } ++} ++ ++/** ++ * igb_irq_disable - Mask off interrupt generation on the NIC ++ * @adapter: board private structure ++ **/ ++static void igb_irq_disable(struct igb_adapter *adapter) ++{ ++ struct e1000_hw *hw = &adapter->hw; ++ ++ /* we need to be careful when disabling interrupts. The VFs are also ++ * mapped into these registers and so clearing the bits can cause ++ * issues on the VF drivers so we only need to clear what we set ++ */ ++ if (adapter->flags & IGB_FLAG_HAS_MSIX) { ++ u32 regval = rd32(E1000_EIAM); ++ ++ wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask); ++ wr32(E1000_EIMC, adapter->eims_enable_mask); ++ regval = rd32(E1000_EIAC); ++ wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask); ++ } ++ ++ wr32(E1000_IAM, 0); ++ wr32(E1000_IMC, ~0); ++ wrfl(); ++ ++ msleep(10); ++} ++ ++/** ++ * igb_irq_enable - Enable default interrupt generation settings ++ * @adapter: board private structure ++ **/ ++static void igb_irq_enable(struct igb_adapter *adapter) ++{ ++ struct e1000_hw *hw = &adapter->hw; ++ ++ if (adapter->flags & IGB_FLAG_HAS_MSIX) { ++ u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_DRSTA; ++ u32 regval = rd32(E1000_EIAC); ++ ++ wr32(E1000_EIAC, regval | adapter->eims_enable_mask); ++ regval = rd32(E1000_EIAM); ++ wr32(E1000_EIAM, regval | adapter->eims_enable_mask); ++ wr32(E1000_EIMS, adapter->eims_enable_mask); ++ wr32(E1000_IMS, ims); ++ } else { ++ wr32(E1000_IMS, IMS_ENABLE_MASK | ++ E1000_IMS_DRSTA); ++ wr32(E1000_IAM, IMS_ENABLE_MASK | ++ E1000_IMS_DRSTA); ++ } ++} ++ ++static void igb_update_mng_vlan(struct igb_adapter *adapter) ++{ ++ struct e1000_hw *hw = &adapter->hw; ++ u16 vid = adapter->hw.mng_cookie.vlan_id; ++ u16 old_vid = adapter->mng_vlan_id; ++ ++ if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) { ++ /* add VID to filter table */ ++ igb_vfta_set(hw, vid, true); ++ adapter->mng_vlan_id = vid; ++ } else { ++ adapter->mng_vlan_id = IGB_MNG_VLAN_NONE; ++ } ++ ++ if ((old_vid != (u16)IGB_MNG_VLAN_NONE) && ++ (vid != old_vid) && ++ !test_bit(old_vid, adapter->active_vlans)) { ++ /* remove VID from filter table */ ++ igb_vfta_set(hw, old_vid, false); ++ } ++} ++ ++/** ++ * igb_release_hw_control - release control of the h/w to f/w ++ * @adapter: address of board private structure ++ * ++ * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit. ++ * For ASF and Pass Through versions of f/w this means that the ++ * driver is no longer loaded. ++ **/ ++static void igb_release_hw_control(struct igb_adapter *adapter) ++{ ++ struct e1000_hw *hw = &adapter->hw; ++ u32 ctrl_ext; ++ ++ /* Let firmware take over control of h/w */ ++ ctrl_ext = rd32(E1000_CTRL_EXT); ++ wr32(E1000_CTRL_EXT, ++ ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD); ++} ++ ++/** ++ * igb_get_hw_control - get control of the h/w from f/w ++ * @adapter: address of board private structure ++ * ++ * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit. ++ * For ASF and Pass Through versions of f/w this means that ++ * the driver is loaded. ++ **/ ++static void igb_get_hw_control(struct igb_adapter *adapter) ++{ ++ struct e1000_hw *hw = &adapter->hw; ++ u32 ctrl_ext; ++ ++ /* Let firmware know the driver has taken over */ ++ ctrl_ext = rd32(E1000_CTRL_EXT); ++ wr32(E1000_CTRL_EXT, ++ ctrl_ext | E1000_CTRL_EXT_DRV_LOAD); ++} ++ ++/** ++ * igb_configure - configure the hardware for RX and TX ++ * @adapter: private board structure ++ **/ ++static void igb_configure(struct igb_adapter *adapter) ++{ ++ struct rtnet_device *netdev = adapter->netdev; ++ int i; ++ ++ igb_get_hw_control(adapter); ++ igb_set_rx_mode(netdev); ++ ++ igb_restore_vlan(adapter); ++ ++ igb_setup_tctl(adapter); ++ igb_setup_mrqc(adapter); ++ igb_setup_rctl(adapter); ++ ++ igb_configure_tx(adapter); ++ igb_configure_rx(adapter); ++ ++ igb_rx_fifo_flush_82575(&adapter->hw); ++ ++ /* call igb_desc_unused which always leaves ++ * at least 1 descriptor unused to make sure ++ * next_to_use != next_to_clean ++ */ ++ for (i = 0; i < adapter->num_rx_queues; i++) { ++ struct igb_ring *ring = adapter->rx_ring[i]; ++ igb_alloc_rx_buffers(ring, igb_desc_unused(ring)); ++ } ++} ++ ++/** ++ * igb_power_up_link - Power up the phy/serdes link ++ * @adapter: address of board private structure ++ **/ ++void igb_power_up_link(struct igb_adapter *adapter) ++{ ++ igb_reset_phy(&adapter->hw); ++ ++ if (adapter->hw.phy.media_type == e1000_media_type_copper) ++ igb_power_up_phy_copper(&adapter->hw); ++ else ++ igb_power_up_serdes_link_82575(&adapter->hw); ++ ++ igb_setup_link(&adapter->hw); ++} ++ ++/** ++ * igb_power_down_link - Power down the phy/serdes link ++ * @adapter: address of board private structure ++ */ ++static void igb_power_down_link(struct igb_adapter *adapter) ++{ ++ if (adapter->hw.phy.media_type == e1000_media_type_copper) ++ igb_power_down_phy_copper_82575(&adapter->hw); ++ else ++ igb_shutdown_serdes_link_82575(&adapter->hw); ++} ++ ++/** ++ * Detect and switch function for Media Auto Sense ++ * @adapter: address of the board private structure ++ **/ ++static void igb_check_swap_media(struct igb_adapter *adapter) ++{ ++ struct e1000_hw *hw = &adapter->hw; ++ u32 ctrl_ext, connsw; ++ bool swap_now = false; ++ ++ ctrl_ext = rd32(E1000_CTRL_EXT); ++ connsw = rd32(E1000_CONNSW); ++ ++ /* need to live swap if current media is copper and we have fiber/serdes ++ * to go to. ++ */ ++ ++ if ((hw->phy.media_type == e1000_media_type_copper) && ++ (!(connsw & E1000_CONNSW_AUTOSENSE_EN))) { ++ swap_now = true; ++ } else if (!(connsw & E1000_CONNSW_SERDESD)) { ++ /* copper signal takes time to appear */ ++ if (adapter->copper_tries < 4) { ++ adapter->copper_tries++; ++ connsw |= E1000_CONNSW_AUTOSENSE_CONF; ++ wr32(E1000_CONNSW, connsw); ++ return; ++ } else { ++ adapter->copper_tries = 0; ++ if ((connsw & E1000_CONNSW_PHYSD) && ++ (!(connsw & E1000_CONNSW_PHY_PDN))) { ++ swap_now = true; ++ connsw &= ~E1000_CONNSW_AUTOSENSE_CONF; ++ wr32(E1000_CONNSW, connsw); ++ } ++ } ++ } ++ ++ if (!swap_now) ++ return; ++ ++ switch (hw->phy.media_type) { ++ case e1000_media_type_copper: ++ rtdev_info(adapter->netdev, ++ "MAS: changing media to fiber/serdes\n"); ++ ctrl_ext |= ++ E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES; ++ adapter->flags |= IGB_FLAG_MEDIA_RESET; ++ adapter->copper_tries = 0; ++ break; ++ case e1000_media_type_internal_serdes: ++ case e1000_media_type_fiber: ++ rtdev_info(adapter->netdev, ++ "MAS: changing media to copper\n"); ++ ctrl_ext &= ++ ~E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES; ++ adapter->flags |= IGB_FLAG_MEDIA_RESET; ++ break; ++ default: ++ /* shouldn't get here during regular operation */ ++ rtdev_err(adapter->netdev, ++ "AMS: Invalid media type found, returning\n"); ++ break; ++ } ++ wr32(E1000_CTRL_EXT, ctrl_ext); ++} ++ ++/** ++ * igb_up - Open the interface and prepare it to handle traffic ++ * @adapter: board private structure ++ **/ ++int igb_up(struct igb_adapter *adapter) ++{ ++ struct e1000_hw *hw = &adapter->hw; ++ ++ /* hardware has been reset, we need to reload some things */ ++ igb_configure(adapter); ++ ++ clear_bit(__IGB_DOWN, &adapter->state); ++ ++ if (adapter->flags & IGB_FLAG_HAS_MSIX) ++ igb_configure_msix(adapter); ++ else ++ igb_assign_vector(adapter->q_vector[0], 0); ++ ++ /* Clear any pending interrupts. */ ++ rd32(E1000_ICR); ++ igb_irq_enable(adapter); ++ ++ rtnetif_start_queue(adapter->netdev); ++ ++ /* start the watchdog. */ ++ hw->mac.get_link_status = 1; ++ schedule_work(&adapter->watchdog_task); ++ ++ if ((adapter->flags & IGB_FLAG_EEE) && ++ (!hw->dev_spec._82575.eee_disable)) ++ adapter->eee_advert = MDIO_EEE_100TX | MDIO_EEE_1000T; ++ ++ return 0; ++} ++ ++void igb_down(struct igb_adapter *adapter) ++{ ++ struct rtnet_device *netdev = adapter->netdev; ++ struct e1000_hw *hw = &adapter->hw; ++ u32 tctl, rctl; ++ ++ /* signal that we're down so the interrupt handler does not ++ * reschedule our watchdog timer ++ */ ++ set_bit(__IGB_DOWN, &adapter->state); ++ ++ /* disable receives in the hardware */ ++ rctl = rd32(E1000_RCTL); ++ wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN); ++ /* flush and sleep below */ ++ ++ rtnetif_stop_queue(netdev); ++ ++ /* disable transmits in the hardware */ ++ tctl = rd32(E1000_TCTL); ++ tctl &= ~E1000_TCTL_EN; ++ wr32(E1000_TCTL, tctl); ++ /* flush both disables and wait for them to finish */ ++ wrfl(); ++ usleep_range(10000, 11000); ++ ++ igb_irq_disable(adapter); ++ ++ adapter->flags &= ~IGB_FLAG_NEED_LINK_UPDATE; ++ ++ del_timer_sync(&adapter->watchdog_timer); ++ del_timer_sync(&adapter->phy_info_timer); ++ ++ /* record the stats before reset*/ ++ spin_lock(&adapter->stats64_lock); ++ igb_update_stats(adapter); ++ spin_unlock(&adapter->stats64_lock); ++ ++ rtnetif_carrier_off(netdev); ++ adapter->link_speed = 0; ++ adapter->link_duplex = 0; ++ ++ if (!pci_channel_offline(adapter->pdev)) ++ igb_reset(adapter); ++ igb_clean_all_tx_rings(adapter); ++ igb_clean_all_rx_rings(adapter); ++} ++ ++void igb_reinit_locked(struct igb_adapter *adapter) ++{ ++ WARN_ON(in_interrupt()); ++ while (test_and_set_bit(__IGB_RESETTING, &adapter->state)) ++ usleep_range(1000, 2000); ++ igb_down(adapter); ++ igb_up(adapter); ++ clear_bit(__IGB_RESETTING, &adapter->state); ++} ++ ++/** igb_enable_mas - Media Autosense re-enable after swap ++ * ++ * @adapter: adapter struct ++ **/ ++static void igb_enable_mas(struct igb_adapter *adapter) ++{ ++ struct e1000_hw *hw = &adapter->hw; ++ u32 connsw = rd32(E1000_CONNSW); ++ ++ /* configure for SerDes media detect */ ++ if ((hw->phy.media_type == e1000_media_type_copper) && ++ (!(connsw & E1000_CONNSW_SERDESD))) { ++ connsw |= E1000_CONNSW_ENRGSRC; ++ connsw |= E1000_CONNSW_AUTOSENSE_EN; ++ wr32(E1000_CONNSW, connsw); ++ wrfl(); ++ } ++} ++ ++void igb_reset(struct igb_adapter *adapter) ++{ ++ struct pci_dev *pdev = adapter->pdev; ++ struct e1000_hw *hw = &adapter->hw; ++ struct e1000_mac_info *mac = &hw->mac; ++ struct e1000_fc_info *fc = &hw->fc; ++ u32 pba = 0, tx_space, min_tx_space, min_rx_space, hwm; ++ ++ /* Repartition Pba for greater than 9k mtu ++ * To take effect CTRL.RST is required. ++ */ ++ switch (mac->type) { ++ case e1000_i350: ++ case e1000_i354: ++ case e1000_82580: ++ pba = rd32(E1000_RXPBS); ++ pba = igb_rxpbs_adjust_82580(pba); ++ break; ++ case e1000_82576: ++ pba = rd32(E1000_RXPBS); ++ pba &= E1000_RXPBS_SIZE_MASK_82576; ++ break; ++ case e1000_82575: ++ case e1000_i210: ++ case e1000_i211: ++ default: ++ pba = E1000_PBA_34K; ++ break; ++ } ++ ++ if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) && ++ (mac->type < e1000_82576)) { ++ /* adjust PBA for jumbo frames */ ++ wr32(E1000_PBA, pba); ++ ++ /* To maintain wire speed transmits, the Tx FIFO should be ++ * large enough to accommodate two full transmit packets, ++ * rounded up to the next 1KB and expressed in KB. Likewise, ++ * the Rx FIFO should be large enough to accommodate at least ++ * one full receive packet and is similarly rounded up and ++ * expressed in KB. ++ */ ++ pba = rd32(E1000_PBA); ++ /* upper 16 bits has Tx packet buffer allocation size in KB */ ++ tx_space = pba >> 16; ++ /* lower 16 bits has Rx packet buffer allocation size in KB */ ++ pba &= 0xffff; ++ /* the Tx fifo also stores 16 bytes of information about the Tx ++ * but don't include ethernet FCS because hardware appends it ++ */ ++ min_tx_space = (adapter->max_frame_size + ++ sizeof(union e1000_adv_tx_desc) - ++ ETH_FCS_LEN) * 2; ++ min_tx_space = ALIGN(min_tx_space, 1024); ++ min_tx_space >>= 10; ++ /* software strips receive CRC, so leave room for it */ ++ min_rx_space = adapter->max_frame_size; ++ min_rx_space = ALIGN(min_rx_space, 1024); ++ min_rx_space >>= 10; ++ ++ /* If current Tx allocation is less than the min Tx FIFO size, ++ * and the min Tx FIFO size is less than the current Rx FIFO ++ * allocation, take space away from current Rx allocation ++ */ ++ if (tx_space < min_tx_space && ++ ((min_tx_space - tx_space) < pba)) { ++ pba = pba - (min_tx_space - tx_space); ++ ++ /* if short on Rx space, Rx wins and must trump Tx ++ * adjustment ++ */ ++ if (pba < min_rx_space) ++ pba = min_rx_space; ++ } ++ wr32(E1000_PBA, pba); ++ } ++ ++ /* flow control settings */ ++ /* The high water mark must be low enough to fit one full frame ++ * (or the size used for early receive) above it in the Rx FIFO. ++ * Set it to the lower of: ++ * - 90% of the Rx FIFO size, or ++ * - the full Rx FIFO size minus one full frame ++ */ ++ hwm = min(((pba << 10) * 9 / 10), ++ ((pba << 10) - 2 * adapter->max_frame_size)); ++ ++ fc->high_water = hwm & 0xFFFFFFF0; /* 16-byte granularity */ ++ fc->low_water = fc->high_water - 16; ++ fc->pause_time = 0xFFFF; ++ fc->send_xon = 1; ++ fc->current_mode = fc->requested_mode; ++ ++ /* Allow time for pending master requests to run */ ++ hw->mac.ops.reset_hw(hw); ++ wr32(E1000_WUC, 0); ++ ++ if (adapter->flags & IGB_FLAG_MEDIA_RESET) { ++ /* need to resetup here after media swap */ ++ adapter->ei.get_invariants(hw); ++ adapter->flags &= ~IGB_FLAG_MEDIA_RESET; ++ } ++ if ((mac->type == e1000_82575) && ++ (adapter->flags & IGB_FLAG_MAS_ENABLE)) { ++ igb_enable_mas(adapter); ++ } ++ if (hw->mac.ops.init_hw(hw)) ++ dev_err(&pdev->dev, "Hardware Error\n"); ++ ++ /* Flow control settings reset on hardware reset, so guarantee flow ++ * control is off when forcing speed. ++ */ ++ if (!hw->mac.autoneg) ++ igb_force_mac_fc(hw); ++ ++ igb_init_dmac(adapter, pba); ++#ifdef CONFIG_IGB_HWMON ++ /* Re-initialize the thermal sensor on i350 devices. */ ++ if (!test_bit(__IGB_DOWN, &adapter->state)) { ++ if (mac->type == e1000_i350 && hw->bus.func == 0) { ++ /* If present, re-initialize the external thermal sensor ++ * interface. ++ */ ++ if (adapter->ets) ++ mac->ops.init_thermal_sensor_thresh(hw); ++ } ++ } ++#endif ++ /* Re-establish EEE setting */ ++ if (hw->phy.media_type == e1000_media_type_copper) { ++ switch (mac->type) { ++ case e1000_i350: ++ case e1000_i210: ++ case e1000_i211: ++ igb_set_eee_i350(hw, true, true); ++ break; ++ case e1000_i354: ++ igb_set_eee_i354(hw, true, true); ++ break; ++ default: ++ break; ++ } ++ } ++ if (!rtnetif_running(adapter->netdev)) ++ igb_power_down_link(adapter); ++ ++ igb_update_mng_vlan(adapter); ++ ++ /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */ ++ wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE); ++ ++ igb_get_phy_info(hw); ++} ++ ++ ++/** ++ * igb_set_fw_version - Configure version string for ethtool ++ * @adapter: adapter struct ++ **/ ++void igb_set_fw_version(struct igb_adapter *adapter) ++{ ++ struct e1000_hw *hw = &adapter->hw; ++ struct e1000_fw_version fw; ++ ++ igb_get_fw_version(hw, &fw); ++ ++ switch (hw->mac.type) { ++ case e1000_i210: ++ case e1000_i211: ++ if (!(igb_get_flash_presence_i210(hw))) { ++ snprintf(adapter->fw_version, ++ sizeof(adapter->fw_version), ++ "%2d.%2d-%d", ++ fw.invm_major, fw.invm_minor, ++ fw.invm_img_type); ++ break; ++ } ++ /* fall through */ ++ default: ++ /* if option is rom valid, display its version too */ ++ if (fw.or_valid) { ++ snprintf(adapter->fw_version, ++ sizeof(adapter->fw_version), ++ "%d.%d, 0x%08x, %d.%d.%d", ++ fw.eep_major, fw.eep_minor, fw.etrack_id, ++ fw.or_major, fw.or_build, fw.or_patch); ++ /* no option rom */ ++ } else if (fw.etrack_id != 0X0000) { ++ snprintf(adapter->fw_version, ++ sizeof(adapter->fw_version), ++ "%d.%d, 0x%08x", ++ fw.eep_major, fw.eep_minor, fw.etrack_id); ++ } else { ++ snprintf(adapter->fw_version, ++ sizeof(adapter->fw_version), ++ "%d.%d.%d", ++ fw.eep_major, fw.eep_minor, fw.eep_build); ++ } ++ break; ++ } ++} ++ ++/** ++ * igb_init_mas - init Media Autosense feature if enabled in the NVM ++ * ++ * @adapter: adapter struct ++ **/ ++static void igb_init_mas(struct igb_adapter *adapter) ++{ ++ struct e1000_hw *hw = &adapter->hw; ++ u16 eeprom_data; ++ ++ hw->nvm.ops.read(hw, NVM_COMPAT, 1, &eeprom_data); ++ switch (hw->bus.func) { ++ case E1000_FUNC_0: ++ if (eeprom_data & IGB_MAS_ENABLE_0) { ++ adapter->flags |= IGB_FLAG_MAS_ENABLE; ++ rtdev_info(adapter->netdev, ++ "MAS: Enabling Media Autosense for port %d\n", ++ hw->bus.func); ++ } ++ break; ++ case E1000_FUNC_1: ++ if (eeprom_data & IGB_MAS_ENABLE_1) { ++ adapter->flags |= IGB_FLAG_MAS_ENABLE; ++ rtdev_info(adapter->netdev, ++ "MAS: Enabling Media Autosense for port %d\n", ++ hw->bus.func); ++ } ++ break; ++ case E1000_FUNC_2: ++ if (eeprom_data & IGB_MAS_ENABLE_2) { ++ adapter->flags |= IGB_FLAG_MAS_ENABLE; ++ rtdev_info(adapter->netdev, ++ "MAS: Enabling Media Autosense for port %d\n", ++ hw->bus.func); ++ } ++ break; ++ case E1000_FUNC_3: ++ if (eeprom_data & IGB_MAS_ENABLE_3) { ++ adapter->flags |= IGB_FLAG_MAS_ENABLE; ++ rtdev_info(adapter->netdev, ++ "MAS: Enabling Media Autosense for port %d\n", ++ hw->bus.func); ++ } ++ break; ++ default: ++ /* Shouldn't get here */ ++ rtdev_err(adapter->netdev, ++ "MAS: Invalid port configuration, returning\n"); ++ break; ++ } ++} ++ ++static dma_addr_t igb_map_rtskb(struct rtnet_device *netdev, ++ struct rtskb *skb) ++{ ++ struct igb_adapter *adapter = netdev->priv; ++ struct device *dev = &adapter->pdev->dev; ++ dma_addr_t addr; ++ ++ addr = dma_map_single(dev, skb->buf_start, RTSKB_SIZE, ++ DMA_BIDIRECTIONAL); ++ if (dma_mapping_error(dev, addr)) { ++ dev_err(dev, "DMA map failed\n"); ++ return RTSKB_UNMAPPED; ++ } ++ return addr; ++} ++ ++static void igb_unmap_rtskb(struct rtnet_device *netdev, ++ struct rtskb *skb) ++{ ++ struct igb_adapter *adapter = netdev->priv; ++ struct device *dev = &adapter->pdev->dev; ++ ++ dma_unmap_single(dev, skb->buf_dma_addr, RTSKB_SIZE, ++ DMA_BIDIRECTIONAL); ++} ++ ++/** ++ * igb_probe - Device Initialization Routine ++ * @pdev: PCI device information struct ++ * @ent: entry in igb_pci_tbl ++ * ++ * Returns 0 on success, negative on failure ++ * ++ * igb_probe initializes an adapter identified by a pci_dev structure. ++ * The OS initialization, configuring of the adapter private structure, ++ * and a hardware reset occur. ++ **/ ++static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) ++{ ++ struct rtnet_device *netdev; ++ struct igb_adapter *adapter; ++ struct e1000_hw *hw; ++ u16 eeprom_data = 0; ++ s32 ret_val; ++ static int global_quad_port_a; /* global quad port a indication */ ++ const struct e1000_info *ei = igb_info_tbl[ent->driver_data]; ++ int err, pci_using_dac; ++ u8 part_str[E1000_PBANUM_LENGTH]; ++ ++ /* Catch broken hardware that put the wrong VF device ID in ++ * the PCIe SR-IOV capability. ++ */ ++ if (pdev->is_virtfn) { ++ WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n", ++ pci_name(pdev), pdev->vendor, pdev->device); ++ return -EINVAL; ++ } ++ ++ err = pci_enable_device_mem(pdev); ++ if (err) ++ return err; ++ ++ pci_using_dac = 0; ++ err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); ++ if (!err) { ++ pci_using_dac = 1; ++ } else { ++ err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); ++ if (err) { ++ dev_err(&pdev->dev, ++ "No usable DMA configuration, aborting\n"); ++ goto err_dma; ++ } ++ } ++ ++ err = pci_request_selected_regions(pdev, pci_select_bars(pdev, ++ IORESOURCE_MEM), ++ igb_driver_name); ++ if (err) ++ goto err_pci_reg; ++ ++ pci_enable_pcie_error_reporting(pdev); ++ ++ pci_set_master(pdev); ++ pci_save_state(pdev); ++ ++ err = -ENOMEM; ++ netdev = rt_alloc_etherdev(sizeof(*adapter), ++ 2 * IGB_DEFAULT_RXD + IGB_DEFAULT_TXD); ++ if (!netdev) ++ goto err_alloc_etherdev; ++ ++ rtdev_alloc_name(netdev, "rteth%d"); ++ rt_rtdev_connect(netdev, &RTDEV_manager); ++ ++ netdev->vers = RTDEV_VERS_2_0; ++ netdev->sysbind = &pdev->dev; ++ ++ pci_set_drvdata(pdev, netdev); ++ adapter = rtnetdev_priv(netdev); ++ adapter->netdev = netdev; ++ adapter->pdev = pdev; ++ hw = &adapter->hw; ++ hw->back = adapter; ++ ++ err = -EIO; ++ hw->hw_addr = pci_iomap(pdev, 0, 0); ++ if (!hw->hw_addr) ++ goto err_ioremap; ++ ++ netdev->open = igb_open; ++ netdev->stop = igb_close; ++ netdev->hard_start_xmit = igb_xmit_frame; ++ netdev->get_stats = igb_get_stats; ++ netdev->map_rtskb = igb_map_rtskb; ++ netdev->unmap_rtskb = igb_unmap_rtskb; ++ netdev->do_ioctl = igb_ioctl; ++#if 0 ++ netdev->set_multicast_list = igb_set_multi; ++ netdev->set_mac_address = igb_set_mac; ++ netdev->change_mtu = igb_change_mtu; ++ ++ // No ethtool support for now ++ igb_set_ethtool_ops(netdev); ++ netdev->watchdog_timeo = 5 * HZ; ++#endif ++ ++ strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1); ++ ++ netdev->mem_start = pci_resource_start(pdev, 0); ++ netdev->mem_end = pci_resource_end(pdev, 0); ++ ++ /* PCI config space info */ ++ hw->vendor_id = pdev->vendor; ++ hw->device_id = pdev->device; ++ hw->revision_id = pdev->revision; ++ hw->subsystem_vendor_id = pdev->subsystem_vendor; ++ hw->subsystem_device_id = pdev->subsystem_device; ++ ++ /* Copy the default MAC, PHY and NVM function pointers */ ++ memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops)); ++ memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops)); ++ memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops)); ++ /* Initialize skew-specific constants */ ++ err = ei->get_invariants(hw); ++ if (err) ++ goto err_sw_init; ++ ++ /* setup the private structure */ ++ err = igb_sw_init(adapter); ++ if (err) ++ goto err_sw_init; ++ ++ igb_get_bus_info_pcie(hw); ++ ++ hw->phy.autoneg_wait_to_complete = false; ++ ++ /* Copper options */ ++ if (hw->phy.media_type == e1000_media_type_copper) { ++ hw->phy.mdix = AUTO_ALL_MODES; ++ hw->phy.disable_polarity_correction = false; ++ hw->phy.ms_type = e1000_ms_hw_default; ++ } ++ ++ if (igb_check_reset_block(hw)) ++ dev_info(&pdev->dev, ++ "PHY reset is blocked due to SOL/IDER session.\n"); ++ ++ /* features is initialized to 0 in allocation, it might have bits ++ * set by igb_sw_init so we should use an or instead of an ++ * assignment. ++ */ ++ netdev->features |= NETIF_F_SG | ++ NETIF_F_IP_CSUM | ++ NETIF_F_IPV6_CSUM | ++ NETIF_F_TSO | ++ NETIF_F_TSO6 | ++ NETIF_F_RXHASH | ++ NETIF_F_RXCSUM | ++ NETIF_F_HW_VLAN_CTAG_RX | ++ NETIF_F_HW_VLAN_CTAG_TX; ++ ++#if 0 ++ /* set this bit last since it cannot be part of hw_features */ ++ netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; ++#endif ++ ++ netdev->priv_flags |= IFF_SUPP_NOFCS; ++ ++ if (pci_using_dac) ++ netdev->features |= NETIF_F_HIGHDMA; ++ ++ netdev->priv_flags |= IFF_UNICAST_FLT; ++ ++ adapter->en_mng_pt = igb_enable_mng_pass_thru(hw); ++ ++ /* before reading the NVM, reset the controller to put the device in a ++ * known good starting state ++ */ ++ hw->mac.ops.reset_hw(hw); ++ ++ /* make sure the NVM is good , i211/i210 parts can have special NVM ++ * that doesn't contain a checksum ++ */ ++ switch (hw->mac.type) { ++ case e1000_i210: ++ case e1000_i211: ++ if (igb_get_flash_presence_i210(hw)) { ++ if (hw->nvm.ops.validate(hw) < 0) { ++ dev_err(&pdev->dev, ++ "The NVM Checksum Is Not Valid\n"); ++ err = -EIO; ++ goto err_eeprom; ++ } ++ } ++ break; ++ default: ++ if (hw->nvm.ops.validate(hw) < 0) { ++ dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n"); ++ err = -EIO; ++ goto err_eeprom; ++ } ++ break; ++ } ++ ++ /* copy the MAC address out of the NVM */ ++ if (hw->mac.ops.read_mac_addr(hw)) ++ dev_err(&pdev->dev, "NVM Read Error\n"); ++ ++ memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len); ++ ++ if (!is_valid_ether_addr(netdev->dev_addr)) { ++ dev_err(&pdev->dev, "Invalid MAC Address\n"); ++ err = -EIO; ++ goto err_eeprom; ++ } ++ ++ /* get firmware version for ethtool -i */ ++ igb_set_fw_version(adapter); ++ ++ /* configure RXPBSIZE and TXPBSIZE */ ++ if (hw->mac.type == e1000_i210) { ++ wr32(E1000_RXPBS, I210_RXPBSIZE_DEFAULT); ++ wr32(E1000_TXPBS, I210_TXPBSIZE_DEFAULT); ++ } ++ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,14,0) ++ timer_setup(&adapter->watchdog_timer, igb_watchdog, 0); ++ timer_setup(&adapter->phy_info_timer, igb_update_phy_info, 0); ++#else /* < 4.14 */ ++ setup_timer(&adapter->watchdog_timer, igb_watchdog, ++ (unsigned long) adapter); ++ setup_timer(&adapter->phy_info_timer, igb_update_phy_info, ++ (unsigned long) adapter); ++#endif /* < 4.14 */ ++ ++ INIT_WORK(&adapter->reset_task, igb_reset_task); ++ INIT_WORK(&adapter->watchdog_task, igb_watchdog_task); ++ rtdm_nrtsig_init(&adapter->watchdog_nrtsig, ++ igb_nrtsig_watchdog, adapter); ++ ++ /* Initialize link properties that are user-changeable */ ++ adapter->fc_autoneg = true; ++ hw->mac.autoneg = true; ++ hw->phy.autoneg_advertised = 0x2f; ++ ++ hw->fc.requested_mode = e1000_fc_default; ++ hw->fc.current_mode = e1000_fc_default; ++ ++ igb_validate_mdi_setting(hw); ++ ++ /* By default, support wake on port A */ ++ if (hw->bus.func == 0) ++ adapter->flags |= IGB_FLAG_WOL_SUPPORTED; ++ ++ /* Check the NVM for wake support on non-port A ports */ ++ if (hw->mac.type >= e1000_82580) ++ hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A + ++ NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1, ++ &eeprom_data); ++ else if (hw->bus.func == 1) ++ hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data); ++ ++ if (eeprom_data & IGB_EEPROM_APME) ++ adapter->flags |= IGB_FLAG_WOL_SUPPORTED; ++ ++ /* now that we have the eeprom settings, apply the special cases where ++ * the eeprom may be wrong or the board simply won't support wake on ++ * lan on a particular port ++ */ ++ switch (pdev->device) { ++ case E1000_DEV_ID_82575GB_QUAD_COPPER: ++ adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED; ++ break; ++ case E1000_DEV_ID_82575EB_FIBER_SERDES: ++ case E1000_DEV_ID_82576_FIBER: ++ case E1000_DEV_ID_82576_SERDES: ++ /* Wake events only supported on port A for dual fiber ++ * regardless of eeprom setting ++ */ ++ if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1) ++ adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED; ++ break; ++ case E1000_DEV_ID_82576_QUAD_COPPER: ++ case E1000_DEV_ID_82576_QUAD_COPPER_ET2: ++ /* if quad port adapter, disable WoL on all but port A */ ++ if (global_quad_port_a != 0) ++ adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED; ++ else ++ adapter->flags |= IGB_FLAG_QUAD_PORT_A; ++ /* Reset for multiple quad port adapters */ ++ if (++global_quad_port_a == 4) ++ global_quad_port_a = 0; ++ break; ++ default: ++ /* If the device can't wake, don't set software support */ ++ if (!device_can_wakeup(&adapter->pdev->dev)) ++ adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED; ++ } ++ ++ /* initialize the wol settings based on the eeprom settings */ ++ if (adapter->flags & IGB_FLAG_WOL_SUPPORTED) ++ adapter->wol |= E1000_WUFC_MAG; ++ ++ /* Some vendors want WoL disabled by default, but still supported */ ++ if ((hw->mac.type == e1000_i350) && ++ (pdev->subsystem_vendor == PCI_VENDOR_ID_HP)) { ++ adapter->flags |= IGB_FLAG_WOL_SUPPORTED; ++ adapter->wol = 0; ++ } ++ ++ device_set_wakeup_enable(&adapter->pdev->dev, ++ adapter->flags & IGB_FLAG_WOL_SUPPORTED); ++ ++ /* reset the hardware with the new settings */ ++ igb_reset(adapter); ++ ++ /* let the f/w know that the h/w is now under the control of the ++ * driver. ++ */ ++ igb_get_hw_control(adapter); ++ ++ strcpy(netdev->name, "rteth%d"); ++ err = rt_register_rtnetdev(netdev); ++ if (err) ++ goto err_release_hw_control; ++ ++ /* carrier off reporting is important to ethtool even BEFORE open */ ++ rtnetif_carrier_off(netdev); ++ ++#ifdef CONFIG_IGB_HWMON ++ /* Initialize the thermal sensor on i350 devices. */ ++ if (hw->mac.type == e1000_i350 && hw->bus.func == 0) { ++ u16 ets_word; ++ ++ /* Read the NVM to determine if this i350 device supports an ++ * external thermal sensor. ++ */ ++ hw->nvm.ops.read(hw, NVM_ETS_CFG, 1, &ets_word); ++ if (ets_word != 0x0000 && ets_word != 0xFFFF) ++ adapter->ets = true; ++ else ++ adapter->ets = false; ++ if (igb_sysfs_init(adapter)) ++ dev_err(&pdev->dev, ++ "failed to allocate sysfs resources\n"); ++ } else { ++ adapter->ets = false; ++ } ++#endif ++ /* Check if Media Autosense is enabled */ ++ adapter->ei = *ei; ++ if (hw->dev_spec._82575.mas_capable) ++ igb_init_mas(adapter); ++ ++ dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n"); ++ /* print bus type/speed/width info, not applicable to i354 */ ++ if (hw->mac.type != e1000_i354) { ++ dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n", ++ netdev->name, ++ ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" : ++ (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" : ++ "unknown"), ++ ((hw->bus.width == e1000_bus_width_pcie_x4) ? ++ "Width x4" : ++ (hw->bus.width == e1000_bus_width_pcie_x2) ? ++ "Width x2" : ++ (hw->bus.width == e1000_bus_width_pcie_x1) ? ++ "Width x1" : "unknown"), netdev->dev_addr); ++ } ++ ++ if ((hw->mac.type >= e1000_i210 || ++ igb_get_flash_presence_i210(hw))) { ++ ret_val = igb_read_part_string(hw, part_str, ++ E1000_PBANUM_LENGTH); ++ } else { ++ ret_val = -E1000_ERR_INVM_VALUE_NOT_FOUND; ++ } ++ ++ if (ret_val) ++ strcpy(part_str, "Unknown"); ++ dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str); ++ dev_info(&pdev->dev, ++ "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n", ++ (adapter->flags & IGB_FLAG_HAS_MSIX) ? "MSI-X" : ++ (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy", ++ adapter->num_rx_queues, adapter->num_tx_queues); ++ if (hw->phy.media_type == e1000_media_type_copper) { ++ switch (hw->mac.type) { ++ case e1000_i350: ++ case e1000_i210: ++ case e1000_i211: ++ /* Enable EEE for internal copper PHY devices */ ++ err = igb_set_eee_i350(hw, true, true); ++ if ((!err) && ++ (!hw->dev_spec._82575.eee_disable)) { ++ adapter->eee_advert = ++ MDIO_EEE_100TX | MDIO_EEE_1000T; ++ adapter->flags |= IGB_FLAG_EEE; ++ } ++ break; ++ case e1000_i354: ++ if ((rd32(E1000_CTRL_EXT) & ++ E1000_CTRL_EXT_LINK_MODE_SGMII)) { ++ err = igb_set_eee_i354(hw, true, true); ++ if ((!err) && ++ (!hw->dev_spec._82575.eee_disable)) { ++ adapter->eee_advert = ++ MDIO_EEE_100TX | MDIO_EEE_1000T; ++ adapter->flags |= IGB_FLAG_EEE; ++ } ++ } ++ break; ++ default: ++ break; ++ } ++ } ++ pm_runtime_put_noidle(&pdev->dev); ++ return 0; ++ ++err_release_hw_control: ++ igb_release_hw_control(adapter); ++ memset(&adapter->i2c_adap, 0, sizeof(adapter->i2c_adap)); ++err_eeprom: ++ if (!igb_check_reset_block(hw)) ++ igb_reset_phy(hw); ++ ++ if (hw->flash_address) ++ iounmap(hw->flash_address); ++err_sw_init: ++ igb_clear_interrupt_scheme(adapter); ++ pci_iounmap(pdev, hw->hw_addr); ++err_ioremap: ++ rtdev_free(netdev); ++err_alloc_etherdev: ++ pci_release_selected_regions(pdev, ++ pci_select_bars(pdev, IORESOURCE_MEM)); ++err_pci_reg: ++err_dma: ++ pci_disable_device(pdev); ++ return err; ++} ++ ++/** ++ * igb_remove_i2c - Cleanup I2C interface ++ * @adapter: pointer to adapter structure ++ **/ ++static void igb_remove_i2c(struct igb_adapter *adapter) ++{ ++ /* free the adapter bus structure */ ++ i2c_del_adapter(&adapter->i2c_adap); ++} ++ ++/** ++ * igb_remove - Device Removal Routine ++ * @pdev: PCI device information struct ++ * ++ * igb_remove is called by the PCI subsystem to alert the driver ++ * that it should release a PCI device. The could be caused by a ++ * Hot-Plug event, or because the driver is going to be removed from ++ * memory. ++ **/ ++static void igb_remove(struct pci_dev *pdev) ++{ ++ struct rtnet_device *netdev = pci_get_drvdata(pdev); ++ struct igb_adapter *adapter = rtnetdev_priv(netdev); ++ struct e1000_hw *hw = &adapter->hw; ++ ++ rtdev_down(netdev); ++ igb_down(adapter); ++ ++ pm_runtime_get_noresume(&pdev->dev); ++#ifdef CONFIG_IGB_HWMON ++ igb_sysfs_exit(adapter); ++#endif ++ igb_remove_i2c(adapter); ++ /* The watchdog timer may be rescheduled, so explicitly ++ * disable watchdog from being rescheduled. ++ */ ++ del_timer_sync(&adapter->watchdog_timer); ++ del_timer_sync(&adapter->phy_info_timer); ++ ++ cancel_work_sync(&adapter->reset_task); ++ cancel_work_sync(&adapter->watchdog_task); ++ ++ /* Release control of h/w to f/w. If f/w is AMT enabled, this ++ * would have already happened in close and is redundant. ++ */ ++ igb_release_hw_control(adapter); ++ ++ rt_rtdev_disconnect(netdev); ++ rt_unregister_rtnetdev(netdev); ++ ++ igb_clear_interrupt_scheme(adapter); ++ ++ pci_iounmap(pdev, hw->hw_addr); ++ if (hw->flash_address) ++ iounmap(hw->flash_address); ++ pci_release_selected_regions(pdev, ++ pci_select_bars(pdev, IORESOURCE_MEM)); ++ ++ kfree(adapter->shadow_vfta); ++ rtdev_free(netdev); ++ ++ pci_disable_pcie_error_reporting(pdev); ++ ++ pci_disable_device(pdev); ++} ++ ++/** ++ * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space ++ * @adapter: board private structure to initialize ++ * ++ * This function initializes the vf specific data storage and then attempts to ++ * allocate the VFs. The reason for ordering it this way is because it is much ++ * mor expensive time wise to disable SR-IOV than it is to allocate and free ++ * the memory for the VFs. ++ **/ ++static void igb_probe_vfs(struct igb_adapter *adapter) ++{ ++} ++ ++static void igb_init_queue_configuration(struct igb_adapter *adapter) ++{ ++ struct e1000_hw *hw = &adapter->hw; ++ u32 max_rss_queues; ++ ++ max_rss_queues = 1; ++ adapter->rss_queues = max_rss_queues; ++ ++ /* Determine if we need to pair queues. */ ++ switch (hw->mac.type) { ++ case e1000_82575: ++ case e1000_i211: ++ /* Device supports enough interrupts without queue pairing. */ ++ break; ++ case e1000_82576: ++ /* If VFs are going to be allocated with RSS queues then we ++ * should pair the queues in order to conserve interrupts due ++ * to limited supply. ++ */ ++ /* fall through */ ++ case e1000_82580: ++ case e1000_i350: ++ case e1000_i354: ++ case e1000_i210: ++ default: ++ /* If rss_queues > half of max_rss_queues, pair the queues in ++ * order to conserve interrupts due to limited supply. ++ */ ++ if (adapter->rss_queues > (max_rss_queues / 2)) ++ adapter->flags |= IGB_FLAG_QUEUE_PAIRS; ++ break; ++ } ++} ++ ++/** ++ * igb_sw_init - Initialize general software structures (struct igb_adapter) ++ * @adapter: board private structure to initialize ++ * ++ * igb_sw_init initializes the Adapter private data structure. ++ * Fields are initialized based on PCI device information and ++ * OS network device settings (MTU size). ++ **/ ++static int igb_sw_init(struct igb_adapter *adapter) ++{ ++ struct e1000_hw *hw = &adapter->hw; ++ struct rtnet_device *netdev = adapter->netdev; ++ struct pci_dev *pdev = adapter->pdev; ++ ++ pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word); ++ ++ /* set default ring sizes */ ++ adapter->tx_ring_count = IGB_DEFAULT_TXD; ++ adapter->rx_ring_count = IGB_DEFAULT_RXD; ++ ++ /* set default ITR values */ ++ if (InterruptThrottle) { ++ adapter->rx_itr_setting = IGB_DEFAULT_ITR; ++ adapter->tx_itr_setting = IGB_DEFAULT_ITR; ++ } else { ++ adapter->rx_itr_setting = IGB_MIN_ITR_USECS; ++ adapter->tx_itr_setting = IGB_MIN_ITR_USECS; ++ } ++ ++ /* set default work limits */ ++ adapter->tx_work_limit = IGB_DEFAULT_TX_WORK; ++ ++ adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN + ++ VLAN_HLEN; ++ adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN; ++ ++ spin_lock_init(&adapter->stats64_lock); ++ ++ igb_init_queue_configuration(adapter); ++ ++ /* Setup and initialize a copy of the hw vlan table array */ ++ adapter->shadow_vfta = kcalloc(E1000_VLAN_FILTER_TBL_SIZE, sizeof(u32), ++ GFP_ATOMIC); ++ ++ /* This call may decrease the number of queues */ ++ if (igb_init_interrupt_scheme(adapter, true)) { ++ dev_err(&pdev->dev, "Unable to allocate memory for queues\n"); ++ return -ENOMEM; ++ } ++ ++ igb_probe_vfs(adapter); ++ ++ /* Explicitly disable IRQ since the NIC can be in any state. */ ++ igb_irq_disable(adapter); ++ ++ if (hw->mac.type >= e1000_i350) ++ adapter->flags &= ~IGB_FLAG_DMAC; ++ ++ set_bit(__IGB_DOWN, &adapter->state); ++ return 0; ++} ++ ++/** ++ * igb_open - Called when a network interface is made active ++ * @netdev: network interface device structure ++ * ++ * Returns 0 on success, negative value on failure ++ * ++ * The open entry point is called when a network interface is made ++ * active by the system (IFF_UP). At this point all resources needed ++ * for transmit and receive operations are allocated, the interrupt ++ * handler is registered with the OS, the watchdog timer is started, ++ * and the stack is notified that the interface is ready. ++ **/ ++static int __igb_open(struct rtnet_device *netdev, bool resuming) ++{ ++ struct igb_adapter *adapter = rtnetdev_priv(netdev); ++ struct e1000_hw *hw = &adapter->hw; ++ struct pci_dev *pdev = adapter->pdev; ++ int err; ++ ++ /* disallow open during test */ ++ if (test_bit(__IGB_TESTING, &adapter->state)) { ++ WARN_ON(resuming); ++ return -EBUSY; ++ } ++ ++ if (!resuming) ++ pm_runtime_get_sync(&pdev->dev); ++ ++ rtnetif_carrier_off(netdev); ++ ++ /* allocate transmit descriptors */ ++ err = igb_setup_all_tx_resources(adapter); ++ if (err) ++ goto err_setup_tx; ++ ++ /* allocate receive descriptors */ ++ err = igb_setup_all_rx_resources(adapter); ++ if (err) ++ goto err_setup_rx; ++ ++ igb_power_up_link(adapter); ++ ++ /* before we allocate an interrupt, we must be ready to handle it. ++ * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt ++ * as soon as we call pci_request_irq, so we have to setup our ++ * clean_rx handler before we do so. ++ */ ++ igb_configure(adapter); ++ ++ err = igb_request_irq(adapter); ++ if (err) ++ goto err_req_irq; ++ ++ /* From here on the code is the same as igb_up() */ ++ clear_bit(__IGB_DOWN, &adapter->state); ++ ++ /* Clear any pending interrupts. */ ++ rd32(E1000_ICR); ++ ++ igb_irq_enable(adapter); ++ ++ rtnetif_start_queue(netdev); ++ ++ if (!resuming) ++ pm_runtime_put(&pdev->dev); ++ ++ /* start the watchdog. */ ++ hw->mac.get_link_status = 1; ++ schedule_work(&adapter->watchdog_task); ++ ++ return 0; ++ ++err_req_irq: ++ igb_release_hw_control(adapter); ++ igb_power_down_link(adapter); ++ igb_free_all_rx_resources(adapter); ++err_setup_rx: ++ igb_free_all_tx_resources(adapter); ++err_setup_tx: ++ igb_reset(adapter); ++ if (!resuming) ++ pm_runtime_put(&pdev->dev); ++ ++ return err; ++} ++ ++static int igb_open(struct rtnet_device *netdev) ++{ ++ return __igb_open(netdev, false); ++} ++ ++/** ++ * igb_close - Disables a network interface ++ * @netdev: network interface device structure ++ * ++ * Returns 0, this is not allowed to fail ++ * ++ * The close entry point is called when an interface is de-activated ++ * by the OS. The hardware is still under the driver's control, but ++ * needs to be disabled. A global MAC reset is issued to stop the ++ * hardware, and all transmit and receive resources are freed. ++ **/ ++static int __igb_close(struct rtnet_device *netdev, bool suspending) ++{ ++ struct igb_adapter *adapter = rtnetdev_priv(netdev); ++ struct pci_dev *pdev = adapter->pdev; ++ ++ WARN_ON(test_bit(__IGB_RESETTING, &adapter->state)); ++ ++ if (!suspending) ++ pm_runtime_get_sync(&pdev->dev); ++ ++ igb_down(adapter); ++ igb_free_irq(adapter); ++ ++ rt_stack_disconnect(netdev); ++ ++ igb_free_all_tx_resources(adapter); ++ igb_free_all_rx_resources(adapter); ++ ++ if (!suspending) ++ pm_runtime_put_sync(&pdev->dev); ++ return 0; ++} ++ ++static int igb_close(struct rtnet_device *netdev) ++{ ++ return __igb_close(netdev, false); ++} ++ ++/** ++ * igb_setup_tx_resources - allocate Tx resources (Descriptors) ++ * @tx_ring: tx descriptor ring (for a specific queue) to setup ++ * ++ * Return 0 on success, negative on failure ++ **/ ++int igb_setup_tx_resources(struct igb_ring *tx_ring) ++{ ++ struct device *dev = tx_ring->dev; ++ int size; ++ ++ size = sizeof(struct igb_tx_buffer) * tx_ring->count; ++ ++ tx_ring->tx_buffer_info = vzalloc(size); ++ if (!tx_ring->tx_buffer_info) ++ goto err; ++ ++ /* round up to nearest 4K */ ++ tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc); ++ tx_ring->size = ALIGN(tx_ring->size, 4096); ++ ++ tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size, ++ &tx_ring->dma, GFP_KERNEL); ++ if (!tx_ring->desc) ++ goto err; ++ ++ tx_ring->next_to_use = 0; ++ tx_ring->next_to_clean = 0; ++ ++ return 0; ++ ++err: ++ vfree(tx_ring->tx_buffer_info); ++ tx_ring->tx_buffer_info = NULL; ++ dev_err(dev, "Unable to allocate memory for the Tx descriptor ring\n"); ++ return -ENOMEM; ++} ++ ++/** ++ * igb_setup_all_tx_resources - wrapper to allocate Tx resources ++ * (Descriptors) for all queues ++ * @adapter: board private structure ++ * ++ * Return 0 on success, negative on failure ++ **/ ++static int igb_setup_all_tx_resources(struct igb_adapter *adapter) ++{ ++ struct pci_dev *pdev = adapter->pdev; ++ int i, err = 0; ++ ++ for (i = 0; i < adapter->num_tx_queues; i++) { ++ err = igb_setup_tx_resources(adapter->tx_ring[i]); ++ if (err) { ++ dev_err(&pdev->dev, ++ "Allocation for Tx Queue %u failed\n", i); ++ for (i--; i >= 0; i--) ++ igb_free_tx_resources(adapter->tx_ring[i]); ++ break; ++ } ++ } ++ ++ return err; ++} ++ ++/** ++ * igb_setup_tctl - configure the transmit control registers ++ * @adapter: Board private structure ++ **/ ++void igb_setup_tctl(struct igb_adapter *adapter) ++{ ++ struct e1000_hw *hw = &adapter->hw; ++ u32 tctl; ++ ++ /* disable queue 0 which is enabled by default on 82575 and 82576 */ ++ wr32(E1000_TXDCTL(0), 0); ++ ++ /* Program the Transmit Control Register */ ++ tctl = rd32(E1000_TCTL); ++ tctl &= ~E1000_TCTL_CT; ++ tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC | ++ (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT); ++ ++ igb_config_collision_dist(hw); ++ ++ /* Enable transmits */ ++ tctl |= E1000_TCTL_EN; ++ ++ wr32(E1000_TCTL, tctl); ++} ++ ++/** ++ * igb_configure_tx_ring - Configure transmit ring after Reset ++ * @adapter: board private structure ++ * @ring: tx ring to configure ++ * ++ * Configure a transmit ring after a reset. ++ **/ ++void igb_configure_tx_ring(struct igb_adapter *adapter, ++ struct igb_ring *ring) ++{ ++ struct e1000_hw *hw = &adapter->hw; ++ u32 txdctl = 0; ++ u64 tdba = ring->dma; ++ int reg_idx = ring->reg_idx; ++ ++ /* disable the queue */ ++ wr32(E1000_TXDCTL(reg_idx), 0); ++ wrfl(); ++ mdelay(10); ++ ++ wr32(E1000_TDLEN(reg_idx), ++ ring->count * sizeof(union e1000_adv_tx_desc)); ++ wr32(E1000_TDBAL(reg_idx), ++ tdba & 0x00000000ffffffffULL); ++ wr32(E1000_TDBAH(reg_idx), tdba >> 32); ++ ++ ring->tail = hw->hw_addr + E1000_TDT(reg_idx); ++ wr32(E1000_TDH(reg_idx), 0); ++ writel(0, ring->tail); ++ ++ txdctl |= IGB_TX_PTHRESH; ++ txdctl |= IGB_TX_HTHRESH << 8; ++ txdctl |= IGB_TX_WTHRESH << 16; ++ ++ txdctl |= E1000_TXDCTL_QUEUE_ENABLE; ++ wr32(E1000_TXDCTL(reg_idx), txdctl); ++} ++ ++/** ++ * igb_configure_tx - Configure transmit Unit after Reset ++ * @adapter: board private structure ++ * ++ * Configure the Tx unit of the MAC after a reset. ++ **/ ++static void igb_configure_tx(struct igb_adapter *adapter) ++{ ++ int i; ++ ++ for (i = 0; i < adapter->num_tx_queues; i++) ++ igb_configure_tx_ring(adapter, adapter->tx_ring[i]); ++} ++ ++/** ++ * igb_setup_rx_resources - allocate Rx resources (Descriptors) ++ * @rx_ring: Rx descriptor ring (for a specific queue) to setup ++ * ++ * Returns 0 on success, negative on failure ++ **/ ++int igb_setup_rx_resources(struct igb_ring *rx_ring) ++{ ++ struct device *dev = rx_ring->dev; ++ int size; ++ ++ size = sizeof(struct igb_rx_buffer) * rx_ring->count; ++ ++ rx_ring->rx_buffer_info = vzalloc(size); ++ if (!rx_ring->rx_buffer_info) ++ goto err; ++ ++ /* Round up to nearest 4K */ ++ rx_ring->size = rx_ring->count * sizeof(union e1000_adv_rx_desc); ++ rx_ring->size = ALIGN(rx_ring->size, 4096); ++ ++ rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size, ++ &rx_ring->dma, GFP_KERNEL); ++ if (!rx_ring->desc) ++ goto err; ++ ++ rx_ring->next_to_alloc = 0; ++ rx_ring->next_to_clean = 0; ++ rx_ring->next_to_use = 0; ++ ++ return 0; ++ ++err: ++ vfree(rx_ring->rx_buffer_info); ++ rx_ring->rx_buffer_info = NULL; ++ dev_err(dev, "Unable to allocate memory for the Rx descriptor ring\n"); ++ return -ENOMEM; ++} ++ ++/** ++ * igb_setup_all_rx_resources - wrapper to allocate Rx resources ++ * (Descriptors) for all queues ++ * @adapter: board private structure ++ * ++ * Return 0 on success, negative on failure ++ **/ ++static int igb_setup_all_rx_resources(struct igb_adapter *adapter) ++{ ++ struct pci_dev *pdev = adapter->pdev; ++ int i, err = 0; ++ ++ for (i = 0; i < adapter->num_rx_queues; i++) { ++ err = igb_setup_rx_resources(adapter->rx_ring[i]); ++ if (err) { ++ dev_err(&pdev->dev, ++ "Allocation for Rx Queue %u failed\n", i); ++ for (i--; i >= 0; i--) ++ igb_free_rx_resources(adapter->rx_ring[i]); ++ break; ++ } ++ } ++ ++ return err; ++} ++ ++/** ++ * igb_setup_mrqc - configure the multiple receive queue control registers ++ * @adapter: Board private structure ++ **/ ++static void igb_setup_mrqc(struct igb_adapter *adapter) ++{ ++ struct e1000_hw *hw = &adapter->hw; ++ u32 mrqc, rxcsum; ++ u32 j, num_rx_queues; ++ u32 rss_key[10]; ++ ++ get_random_bytes(rss_key, sizeof(rss_key)); ++ for (j = 0; j < 10; j++) ++ wr32(E1000_RSSRK(j), rss_key[j]); ++ ++ num_rx_queues = adapter->rss_queues; ++ ++ switch (hw->mac.type) { ++ case e1000_82576: ++ /* 82576 supports 2 RSS queues for SR-IOV */ ++ break; ++ default: ++ break; ++ } ++ ++ if (adapter->rss_indir_tbl_init != num_rx_queues) { ++ for (j = 0; j < IGB_RETA_SIZE; j++) ++ adapter->rss_indir_tbl[j] = ++ (j * num_rx_queues) / IGB_RETA_SIZE; ++ adapter->rss_indir_tbl_init = num_rx_queues; ++ } ++ ++ /* Disable raw packet checksumming so that RSS hash is placed in ++ * descriptor on writeback. No need to enable TCP/UDP/IP checksum ++ * offloads as they are enabled by default ++ */ ++ rxcsum = rd32(E1000_RXCSUM); ++ rxcsum |= E1000_RXCSUM_PCSD; ++ ++ if (adapter->hw.mac.type >= e1000_82576) ++ /* Enable Receive Checksum Offload for SCTP */ ++ rxcsum |= E1000_RXCSUM_CRCOFL; ++ ++ /* Don't need to set TUOFL or IPOFL, they default to 1 */ ++ wr32(E1000_RXCSUM, rxcsum); ++ ++ /* Generate RSS hash based on packet types, TCP/UDP ++ * port numbers and/or IPv4/v6 src and dst addresses ++ */ ++ mrqc = E1000_MRQC_RSS_FIELD_IPV4 | ++ E1000_MRQC_RSS_FIELD_IPV4_TCP | ++ E1000_MRQC_RSS_FIELD_IPV6 | ++ E1000_MRQC_RSS_FIELD_IPV6_TCP | ++ E1000_MRQC_RSS_FIELD_IPV6_TCP_EX; ++ ++ if (adapter->flags & IGB_FLAG_RSS_FIELD_IPV4_UDP) ++ mrqc |= E1000_MRQC_RSS_FIELD_IPV4_UDP; ++ if (adapter->flags & IGB_FLAG_RSS_FIELD_IPV6_UDP) ++ mrqc |= E1000_MRQC_RSS_FIELD_IPV6_UDP; ++ ++ /* If VMDq is enabled then we set the appropriate mode for that, else ++ * we default to RSS so that an RSS hash is calculated per packet even ++ * if we are only using one queue ++ */ ++ if (hw->mac.type != e1000_i211) ++ mrqc |= E1000_MRQC_ENABLE_RSS_4Q; ++ ++ wr32(E1000_MRQC, mrqc); ++} ++ ++/** ++ * igb_setup_rctl - configure the receive control registers ++ * @adapter: Board private structure ++ **/ ++void igb_setup_rctl(struct igb_adapter *adapter) ++{ ++ struct e1000_hw *hw = &adapter->hw; ++ u32 rctl; ++ ++ rctl = rd32(E1000_RCTL); ++ ++ rctl &= ~(3 << E1000_RCTL_MO_SHIFT); ++ rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC); ++ ++ rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF | ++ (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT); ++ ++ /* enable stripping of CRC. It's unlikely this will break BMC ++ * redirection as it did with e1000. Newer features require ++ * that the HW strips the CRC. ++ */ ++ rctl |= E1000_RCTL_SECRC; ++ ++ /* disable store bad packets and clear size bits. */ ++ rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256); ++ ++ /* enable LPE to prevent packets larger than max_frame_size */ ++ rctl |= E1000_RCTL_LPE; ++ ++ /* disable queue 0 to prevent tail write w/o re-config */ ++ wr32(E1000_RXDCTL(0), 0); ++ ++ /* This is useful for sniffing bad packets. */ ++ if (adapter->netdev->features & NETIF_F_RXALL) { ++ /* UPE and MPE will be handled by normal PROMISC logic ++ * in e1000e_set_rx_mode ++ */ ++ rctl |= (E1000_RCTL_SBP | /* Receive bad packets */ ++ E1000_RCTL_BAM | /* RX All Bcast Pkts */ ++ E1000_RCTL_PMCF); /* RX All MAC Ctrl Pkts */ ++ ++ rctl &= ~(E1000_RCTL_VFE | /* Disable VLAN filter */ ++ E1000_RCTL_DPF | /* Allow filtered pause */ ++ E1000_RCTL_CFIEN); /* Dis VLAN CFIEN Filter */ ++ /* Do not mess with E1000_CTRL_VME, it affects transmit as well, ++ * and that breaks VLANs. ++ */ ++ } ++ ++ wr32(E1000_RCTL, rctl); ++} ++ ++/** ++ * igb_rlpml_set - set maximum receive packet size ++ * @adapter: board private structure ++ * ++ * Configure maximum receivable packet size. ++ **/ ++static void igb_rlpml_set(struct igb_adapter *adapter) ++{ ++ u32 max_frame_size = adapter->max_frame_size; ++ struct e1000_hw *hw = &adapter->hw; ++ ++ wr32(E1000_RLPML, max_frame_size); ++} ++ ++static inline void igb_set_vmolr(struct igb_adapter *adapter, ++ int vfn, bool aupe) ++{ ++ struct e1000_hw *hw = &adapter->hw; ++ u32 vmolr; ++ ++ /* This register exists only on 82576 and newer so if we are older then ++ * we should exit and do nothing ++ */ ++ if (hw->mac.type < e1000_82576) ++ return; ++ ++ vmolr = rd32(E1000_VMOLR(vfn)); ++ vmolr |= E1000_VMOLR_STRVLAN; /* Strip vlan tags */ ++ if (hw->mac.type == e1000_i350) { ++ u32 dvmolr; ++ ++ dvmolr = rd32(E1000_DVMOLR(vfn)); ++ dvmolr |= E1000_DVMOLR_STRVLAN; ++ wr32(E1000_DVMOLR(vfn), dvmolr); ++ } ++ if (aupe) ++ vmolr |= E1000_VMOLR_AUPE; /* Accept untagged packets */ ++ else ++ vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */ ++ ++ /* clear all bits that might not be set */ ++ vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE); ++ ++ if (adapter->rss_queues > 1) ++ vmolr |= E1000_VMOLR_RSSE; /* enable RSS */ ++ /* for VMDq only allow the VFs and pool 0 to accept broadcast and ++ * multicast packets ++ */ ++ vmolr |= E1000_VMOLR_BAM; /* Accept broadcast */ ++ ++ wr32(E1000_VMOLR(vfn), vmolr); ++} ++ ++/** ++ * igb_configure_rx_ring - Configure a receive ring after Reset ++ * @adapter: board private structure ++ * @ring: receive ring to be configured ++ * ++ * Configure the Rx unit of the MAC after a reset. ++ **/ ++void igb_configure_rx_ring(struct igb_adapter *adapter, ++ struct igb_ring *ring) ++{ ++ struct e1000_hw *hw = &adapter->hw; ++ u64 rdba = ring->dma; ++ int reg_idx = ring->reg_idx; ++ u32 srrctl = 0, rxdctl = 0; ++ ++ ring->rx_buffer_len = max_t(u32, adapter->max_frame_size, ++ MAXIMUM_ETHERNET_VLAN_SIZE); ++ ++ /* disable the queue */ ++ wr32(E1000_RXDCTL(reg_idx), 0); ++ ++ /* Set DMA base address registers */ ++ wr32(E1000_RDBAL(reg_idx), ++ rdba & 0x00000000ffffffffULL); ++ wr32(E1000_RDBAH(reg_idx), rdba >> 32); ++ wr32(E1000_RDLEN(reg_idx), ++ ring->count * sizeof(union e1000_adv_rx_desc)); ++ ++ /* initialize head and tail */ ++ ring->tail = hw->hw_addr + E1000_RDT(reg_idx); ++ wr32(E1000_RDH(reg_idx), 0); ++ writel(0, ring->tail); ++ ++ /* set descriptor configuration */ ++ srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT; ++ srrctl |= IGB_RX_BUFSZ >> E1000_SRRCTL_BSIZEPKT_SHIFT; ++ srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF; ++ if (hw->mac.type >= e1000_82580) ++ srrctl |= E1000_SRRCTL_TIMESTAMP; ++ /* Only set Drop Enable if we are supporting multiple queues */ ++ if (adapter->num_rx_queues > 1) ++ srrctl |= E1000_SRRCTL_DROP_EN; ++ ++ wr32(E1000_SRRCTL(reg_idx), srrctl); ++ ++ /* set filtering for VMDQ pools */ ++ igb_set_vmolr(adapter, reg_idx & 0x7, true); ++ ++ rxdctl |= IGB_RX_PTHRESH; ++ rxdctl |= IGB_RX_HTHRESH << 8; ++ rxdctl |= IGB_RX_WTHRESH << 16; ++ ++ /* enable receive descriptor fetching */ ++ rxdctl |= E1000_RXDCTL_QUEUE_ENABLE; ++ wr32(E1000_RXDCTL(reg_idx), rxdctl); ++} ++ ++/** ++ * igb_configure_rx - Configure receive Unit after Reset ++ * @adapter: board private structure ++ * ++ * Configure the Rx unit of the MAC after a reset. ++ **/ ++static void igb_configure_rx(struct igb_adapter *adapter) ++{ ++ int i; ++ ++ /* set the correct pool for the PF default MAC address in entry 0 */ ++ igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0, 0); ++ ++ /* Setup the HW Rx Head and Tail Descriptor Pointers and ++ * the Base and Length of the Rx Descriptor Ring ++ */ ++ for (i = 0; i < adapter->num_rx_queues; i++) ++ igb_configure_rx_ring(adapter, adapter->rx_ring[i]); ++} ++ ++/** ++ * igb_free_tx_resources - Free Tx Resources per Queue ++ * @tx_ring: Tx descriptor ring for a specific queue ++ * ++ * Free all transmit software resources ++ **/ ++void igb_free_tx_resources(struct igb_ring *tx_ring) ++{ ++ igb_clean_tx_ring(tx_ring); ++ ++ vfree(tx_ring->tx_buffer_info); ++ tx_ring->tx_buffer_info = NULL; ++ ++ /* if not set, then don't free */ ++ if (!tx_ring->desc) ++ return; ++ ++ dma_free_coherent(tx_ring->dev, tx_ring->size, ++ tx_ring->desc, tx_ring->dma); ++ ++ tx_ring->desc = NULL; ++} ++ ++/** ++ * igb_free_all_tx_resources - Free Tx Resources for All Queues ++ * @adapter: board private structure ++ * ++ * Free all transmit software resources ++ **/ ++static void igb_free_all_tx_resources(struct igb_adapter *adapter) ++{ ++ int i; ++ ++ for (i = 0; i < adapter->num_tx_queues; i++) ++ if (adapter->tx_ring[i]) ++ igb_free_tx_resources(adapter->tx_ring[i]); ++} ++ ++void igb_unmap_and_free_tx_resource(struct igb_ring *ring, ++ struct igb_tx_buffer *tx_buffer) ++{ ++ if (tx_buffer->skb) { ++ kfree_rtskb(tx_buffer->skb); ++ tx_buffer->skb = NULL; ++ } ++ tx_buffer->next_to_watch = NULL; ++ /* buffer_info must be completely set up in the transmit path */ ++} ++ ++/** ++ * igb_clean_tx_ring - Free Tx Buffers ++ * @tx_ring: ring to be cleaned ++ **/ ++static void igb_clean_tx_ring(struct igb_ring *tx_ring) ++{ ++ struct igb_tx_buffer *buffer_info; ++ unsigned long size; ++ u16 i; ++ ++ if (!tx_ring->tx_buffer_info) ++ return; ++ /* Free all the Tx ring sk_buffs */ ++ ++ for (i = 0; i < tx_ring->count; i++) { ++ buffer_info = &tx_ring->tx_buffer_info[i]; ++ igb_unmap_and_free_tx_resource(tx_ring, buffer_info); ++ } ++ ++ size = sizeof(struct igb_tx_buffer) * tx_ring->count; ++ memset(tx_ring->tx_buffer_info, 0, size); ++ ++ /* Zero out the descriptor ring */ ++ memset(tx_ring->desc, 0, tx_ring->size); ++ ++ tx_ring->next_to_use = 0; ++ tx_ring->next_to_clean = 0; ++} ++ ++/** ++ * igb_clean_all_tx_rings - Free Tx Buffers for all queues ++ * @adapter: board private structure ++ **/ ++static void igb_clean_all_tx_rings(struct igb_adapter *adapter) ++{ ++ int i; ++ ++ for (i = 0; i < adapter->num_tx_queues; i++) ++ if (adapter->tx_ring[i]) ++ igb_clean_tx_ring(adapter->tx_ring[i]); ++} ++ ++/** ++ * igb_free_rx_resources - Free Rx Resources ++ * @rx_ring: ring to clean the resources from ++ * ++ * Free all receive software resources ++ **/ ++void igb_free_rx_resources(struct igb_ring *rx_ring) ++{ ++ igb_clean_rx_ring(rx_ring); ++ ++ vfree(rx_ring->rx_buffer_info); ++ rx_ring->rx_buffer_info = NULL; ++ ++ /* if not set, then don't free */ ++ if (!rx_ring->desc) ++ return; ++ ++ dma_free_coherent(rx_ring->dev, rx_ring->size, ++ rx_ring->desc, rx_ring->dma); ++ ++ rx_ring->desc = NULL; ++} ++ ++/** ++ * igb_free_all_rx_resources - Free Rx Resources for All Queues ++ * @adapter: board private structure ++ * ++ * Free all receive software resources ++ **/ ++static void igb_free_all_rx_resources(struct igb_adapter *adapter) ++{ ++ int i; ++ ++ for (i = 0; i < adapter->num_rx_queues; i++) ++ if (adapter->rx_ring[i]) ++ igb_free_rx_resources(adapter->rx_ring[i]); ++} ++ ++/** ++ * igb_clean_rx_ring - Free Rx Buffers per Queue ++ * @rx_ring: ring to free buffers from ++ **/ ++static void igb_clean_rx_ring(struct igb_ring *rx_ring) ++{ ++ unsigned long size; ++ u16 i; ++ ++ if (!rx_ring->rx_buffer_info) ++ return; ++ ++ /* Free all the Rx ring sk_buffs */ ++ for (i = 0; i < rx_ring->count; i++) { ++ struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i]; ++ ++ if (buffer_info->dma) ++ buffer_info->dma = 0; ++ ++ if (buffer_info->skb) { ++ kfree_rtskb(buffer_info->skb); ++ buffer_info->skb = NULL; ++ } ++ } ++ ++ size = sizeof(struct igb_rx_buffer) * rx_ring->count; ++ memset(rx_ring->rx_buffer_info, 0, size); ++ ++ /* Zero out the descriptor ring */ ++ memset(rx_ring->desc, 0, rx_ring->size); ++ ++ rx_ring->next_to_clean = 0; ++ rx_ring->next_to_use = 0; ++} ++ ++/** ++ * igb_clean_all_rx_rings - Free Rx Buffers for all queues ++ * @adapter: board private structure ++ **/ ++static void igb_clean_all_rx_rings(struct igb_adapter *adapter) ++{ ++ int i; ++ ++ for (i = 0; i < adapter->num_rx_queues; i++) ++ if (adapter->rx_ring[i]) ++ igb_clean_rx_ring(adapter->rx_ring[i]); ++} ++ ++/** ++ * igb_write_mc_addr_list - write multicast addresses to MTA ++ * @netdev: network interface device structure ++ * ++ * Writes multicast address list to the MTA hash table. ++ * Returns: -ENOMEM on failure ++ * 0 on no addresses written ++ * X on writing X addresses to MTA ++ **/ ++static int igb_write_mc_addr_list(struct rtnet_device *netdev) ++{ ++ struct igb_adapter *adapter = rtnetdev_priv(netdev); ++ struct e1000_hw *hw = &adapter->hw; ++#if 0 ++ struct netdev_hw_addr *ha; ++ u8 *mta_list; ++ int i; ++ if (netdev_mc_empty(netdev)) { ++ /* nothing to program, so clear mc list */ ++ igb_update_mc_addr_list(hw, NULL, 0); ++ igb_restore_vf_multicasts(adapter); ++ return 0; ++ } ++ ++ mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC); ++ if (!mta_list) ++ return -ENOMEM; ++ ++ /* The shared function expects a packed array of only addresses. */ ++ i = 0; ++ netdev_for_each_mc_addr(ha, netdev) ++ memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN); ++ ++ igb_update_mc_addr_list(hw, mta_list, i); ++ kfree(mta_list); ++ ++ return netdev_mc_count(netdev); ++#else ++ igb_update_mc_addr_list(hw, NULL, 0); ++ return 0; ++#endif ++} ++ ++/** ++ * igb_write_uc_addr_list - write unicast addresses to RAR table ++ * @netdev: network interface device structure ++ * ++ * Writes unicast address list to the RAR table. ++ * Returns: -ENOMEM on failure/insufficient address space ++ * 0 on no addresses written ++ * X on writing X addresses to the RAR table ++ **/ ++static int igb_write_uc_addr_list(struct rtnet_device *netdev) ++{ ++ struct igb_adapter *adapter = rtnetdev_priv(netdev); ++ struct e1000_hw *hw = &adapter->hw; ++ unsigned int vfn = 0; ++ unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1); ++ int count = 0; ++ ++ /* write the addresses in reverse order to avoid write combining */ ++ for (; rar_entries > 0 ; rar_entries--) { ++ wr32(E1000_RAH(rar_entries), 0); ++ wr32(E1000_RAL(rar_entries), 0); ++ } ++ wrfl(); ++ ++ return count; ++} ++ ++/** ++ * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set ++ * @netdev: network interface device structure ++ * ++ * The set_rx_mode entry point is called whenever the unicast or multicast ++ * address lists or the network interface flags are updated. This routine is ++ * responsible for configuring the hardware for proper unicast, multicast, ++ * promiscuous mode, and all-multi behavior. ++ **/ ++static void igb_set_rx_mode(struct rtnet_device *netdev) ++{ ++ struct igb_adapter *adapter = rtnetdev_priv(netdev); ++ struct e1000_hw *hw = &adapter->hw; ++ unsigned int vfn = 0; ++ u32 rctl, vmolr = 0; ++ int count; ++ ++ /* Check for Promiscuous and All Multicast modes */ ++ rctl = rd32(E1000_RCTL); ++ ++ /* clear the effected bits */ ++ rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE); ++ ++ if (netdev->flags & IFF_PROMISC) { ++ rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE); ++ vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME); ++ } else { ++ if (netdev->flags & IFF_ALLMULTI) { ++ rctl |= E1000_RCTL_MPE; ++ vmolr |= E1000_VMOLR_MPME; ++ } else { ++ /* Write addresses to the MTA, if the attempt fails ++ * then we should just turn on promiscuous mode so ++ * that we can at least receive multicast traffic ++ */ ++ count = igb_write_mc_addr_list(netdev); ++ if (count < 0) { ++ rctl |= E1000_RCTL_MPE; ++ vmolr |= E1000_VMOLR_MPME; ++ } else if (count) { ++ vmolr |= E1000_VMOLR_ROMPE; ++ } ++ } ++ /* Write addresses to available RAR registers, if there is not ++ * sufficient space to store all the addresses then enable ++ * unicast promiscuous mode ++ */ ++ count = igb_write_uc_addr_list(netdev); ++ if (count < 0) { ++ rctl |= E1000_RCTL_UPE; ++ vmolr |= E1000_VMOLR_ROPE; ++ } ++ rctl |= E1000_RCTL_VFE; ++ } ++ wr32(E1000_RCTL, rctl); ++ ++ /* In order to support SR-IOV and eventually VMDq it is necessary to set ++ * the VMOLR to enable the appropriate modes. Without this workaround ++ * we will have issues with VLAN tag stripping not being done for frames ++ * that are only arriving because we are the default pool ++ */ ++ if ((hw->mac.type < e1000_82576) || (hw->mac.type > e1000_i350)) ++ return; ++ ++ vmolr |= rd32(E1000_VMOLR(vfn)) & ++ ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE); ++ wr32(E1000_VMOLR(vfn), vmolr); ++} ++ ++static void igb_check_wvbr(struct igb_adapter *adapter) ++{ ++ struct e1000_hw *hw = &adapter->hw; ++ u32 wvbr = 0; ++ ++ switch (hw->mac.type) { ++ case e1000_82576: ++ case e1000_i350: ++ wvbr = rd32(E1000_WVBR); ++ if (!wvbr) ++ return; ++ break; ++ default: ++ break; ++ } ++ ++ adapter->wvbr |= wvbr; ++} ++ ++#define IGB_STAGGERED_QUEUE_OFFSET 8 ++ ++/* Need to wait a few seconds after link up to get diagnostic information from ++ * the phy ++ */ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,14,0) ++static void igb_update_phy_info(struct timer_list *t) ++{ ++ struct igb_adapter *adapter = from_timer(adapter, t, phy_info_timer); ++#else /* < 4.14 */ ++static void igb_update_phy_info(unsigned long data) ++{ ++ struct igb_adapter *adapter = (struct igb_adapter *) data; ++#endif /* < 4.14 */ ++ igb_get_phy_info(&adapter->hw); ++} ++ ++/** ++ * igb_has_link - check shared code for link and determine up/down ++ * @adapter: pointer to driver private info ++ **/ ++bool igb_has_link(struct igb_adapter *adapter) ++{ ++ struct e1000_hw *hw = &adapter->hw; ++ bool link_active = false; ++ ++ /* get_link_status is set on LSC (link status) interrupt or ++ * rx sequence error interrupt. get_link_status will stay ++ * false until the e1000_check_for_link establishes link ++ * for copper adapters ONLY ++ */ ++ switch (hw->phy.media_type) { ++ case e1000_media_type_copper: ++ if (!hw->mac.get_link_status) ++ return true; ++ case e1000_media_type_internal_serdes: ++ hw->mac.ops.check_for_link(hw); ++ link_active = !hw->mac.get_link_status; ++ break; ++ default: ++ case e1000_media_type_unknown: ++ break; ++ } ++ ++ if (((hw->mac.type == e1000_i210) || ++ (hw->mac.type == e1000_i211)) && ++ (hw->phy.id == I210_I_PHY_ID)) { ++ if (!rtnetif_carrier_ok(adapter->netdev)) { ++ adapter->flags &= ~IGB_FLAG_NEED_LINK_UPDATE; ++ } else if (!(adapter->flags & IGB_FLAG_NEED_LINK_UPDATE)) { ++ adapter->flags |= IGB_FLAG_NEED_LINK_UPDATE; ++ adapter->link_check_timeout = jiffies; ++ } ++ } ++ ++ return link_active; ++} ++ ++static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event) ++{ ++ bool ret = false; ++ u32 ctrl_ext, thstat; ++ ++ /* check for thermal sensor event on i350 copper only */ ++ if (hw->mac.type == e1000_i350) { ++ thstat = rd32(E1000_THSTAT); ++ ctrl_ext = rd32(E1000_CTRL_EXT); ++ ++ if ((hw->phy.media_type == e1000_media_type_copper) && ++ !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) ++ ret = !!(thstat & event); ++ } ++ ++ return ret; ++} ++ ++/** ++ * igb_check_lvmmc - check for malformed packets received ++ * and indicated in LVMMC register ++ * @adapter: pointer to adapter ++ **/ ++static void igb_check_lvmmc(struct igb_adapter *adapter) ++{ ++ struct e1000_hw *hw = &adapter->hw; ++ u32 lvmmc; ++ ++ lvmmc = rd32(E1000_LVMMC); ++ if (lvmmc) { ++ if (unlikely(net_ratelimit())) { ++ rtdev_warn(adapter->netdev, ++ "malformed Tx packet detected and dropped, LVMMC:0x%08x\n", ++ lvmmc); ++ } ++ } ++} ++ ++/** ++ * igb_watchdog - Timer Call-back ++ * @data: pointer to adapter cast into an unsigned long ++ **/ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,14,0) ++static void igb_watchdog(struct timer_list *t) ++{ ++ struct igb_adapter *adapter = from_timer(adapter, t, watchdog_timer); ++#else /* < 4.14 */ ++static void igb_watchdog(unsigned long data) ++{ ++ struct igb_adapter *adapter = (struct igb_adapter *)data; ++#endif /* < 4.14 */ ++ /* Do the rest outside of interrupt context */ ++ schedule_work(&adapter->watchdog_task); ++} ++ ++static void igb_watchdog_task(struct work_struct *work) ++{ ++ struct igb_adapter *adapter = container_of(work, ++ struct igb_adapter, ++ watchdog_task); ++ struct e1000_hw *hw = &adapter->hw; ++ struct e1000_phy_info *phy = &hw->phy; ++ struct rtnet_device *netdev = adapter->netdev; ++ u32 link; ++ int i; ++ u32 connsw; ++ ++ link = igb_has_link(adapter); ++ ++ if (adapter->flags & IGB_FLAG_NEED_LINK_UPDATE) { ++ if (time_after(jiffies, (adapter->link_check_timeout + HZ))) ++ adapter->flags &= ~IGB_FLAG_NEED_LINK_UPDATE; ++ else ++ link = false; ++ } ++ ++ /* Force link down if we have fiber to swap to */ ++ if (adapter->flags & IGB_FLAG_MAS_ENABLE) { ++ if (hw->phy.media_type == e1000_media_type_copper) { ++ connsw = rd32(E1000_CONNSW); ++ if (!(connsw & E1000_CONNSW_AUTOSENSE_EN)) ++ link = 0; ++ } ++ } ++ if (link) { ++ /* Perform a reset if the media type changed. */ ++ if (hw->dev_spec._82575.media_changed) { ++ hw->dev_spec._82575.media_changed = false; ++ adapter->flags |= IGB_FLAG_MEDIA_RESET; ++ igb_reset(adapter); ++ } ++ /* Cancel scheduled suspend requests. */ ++ pm_runtime_resume(adapter->pdev->dev.parent); ++ ++ if (!rtnetif_carrier_ok(netdev)) { ++ u32 ctrl; ++ ++ hw->mac.ops.get_speed_and_duplex(hw, ++ &adapter->link_speed, ++ &adapter->link_duplex); ++ ++ ctrl = rd32(E1000_CTRL); ++ /* Links status message must follow this format */ ++ rtdev_info(netdev, ++ "igb: %s NIC Link is Up %d Mbps %s Duplex, Flow Control: %s\n", ++ netdev->name, ++ adapter->link_speed, ++ adapter->link_duplex == FULL_DUPLEX ? ++ "Full" : "Half", ++ (ctrl & E1000_CTRL_TFCE) && ++ (ctrl & E1000_CTRL_RFCE) ? "RX/TX" : ++ (ctrl & E1000_CTRL_RFCE) ? "RX" : ++ (ctrl & E1000_CTRL_TFCE) ? "TX" : "None"); ++ ++ /* disable EEE if enabled */ ++ if ((adapter->flags & IGB_FLAG_EEE) && ++ (adapter->link_duplex == HALF_DUPLEX)) { ++ dev_info(&adapter->pdev->dev, ++ "EEE Disabled: unsupported at half duplex. Re-enable using ethtool when at full duplex.\n"); ++ adapter->hw.dev_spec._82575.eee_disable = true; ++ adapter->flags &= ~IGB_FLAG_EEE; ++ } ++ ++ /* check if SmartSpeed worked */ ++ igb_check_downshift(hw); ++ if (phy->speed_downgraded) ++ rtdev_warn(netdev, "Link Speed was downgraded by SmartSpeed\n"); ++ ++ /* check for thermal sensor event */ ++ if (igb_thermal_sensor_event(hw, ++ E1000_THSTAT_LINK_THROTTLE)) ++ rtdev_info(netdev, "The network adapter link speed was downshifted because it overheated\n"); ++ ++ /* adjust timeout factor according to speed/duplex */ ++ adapter->tx_timeout_factor = 1; ++ switch (adapter->link_speed) { ++ case SPEED_10: ++ adapter->tx_timeout_factor = 14; ++ break; ++ case SPEED_100: ++ /* maybe add some timeout factor ? */ ++ break; ++ } ++ ++ rtnetif_carrier_on(netdev); ++ ++ /* link state has changed, schedule phy info update */ ++ if (!test_bit(__IGB_DOWN, &adapter->state)) ++ mod_timer(&adapter->phy_info_timer, ++ round_jiffies(jiffies + 2 * HZ)); ++ } ++ } else { ++ if (rtnetif_carrier_ok(netdev)) { ++ adapter->link_speed = 0; ++ adapter->link_duplex = 0; ++ ++ /* check for thermal sensor event */ ++ if (igb_thermal_sensor_event(hw, ++ E1000_THSTAT_PWR_DOWN)) { ++ rtdev_err(netdev, "The network adapter was stopped because it overheated\n"); ++ } ++ ++ /* Links status message must follow this format */ ++ rtdev_info(netdev, "igb: %s NIC Link is Down\n", ++ netdev->name); ++ rtnetif_carrier_off(netdev); ++ ++ /* link state has changed, schedule phy info update */ ++ if (!test_bit(__IGB_DOWN, &adapter->state)) ++ mod_timer(&adapter->phy_info_timer, ++ round_jiffies(jiffies + 2 * HZ)); ++ ++ /* link is down, time to check for alternate media */ ++ if (adapter->flags & IGB_FLAG_MAS_ENABLE) { ++ igb_check_swap_media(adapter); ++ if (adapter->flags & IGB_FLAG_MEDIA_RESET) { ++ schedule_work(&adapter->reset_task); ++ /* return immediately */ ++ return; ++ } ++ } ++ pm_schedule_suspend(adapter->pdev->dev.parent, ++ MSEC_PER_SEC * 5); ++ ++ /* also check for alternate media here */ ++ } else if (!rtnetif_carrier_ok(netdev) && ++ (adapter->flags & IGB_FLAG_MAS_ENABLE)) { ++ igb_check_swap_media(adapter); ++ if (adapter->flags & IGB_FLAG_MEDIA_RESET) { ++ schedule_work(&adapter->reset_task); ++ /* return immediately */ ++ return; ++ } ++ } ++ } ++ ++ spin_lock(&adapter->stats64_lock); ++ igb_update_stats(adapter); ++ spin_unlock(&adapter->stats64_lock); ++ ++ for (i = 0; i < adapter->num_tx_queues; i++) { ++ struct igb_ring *tx_ring = adapter->tx_ring[i]; ++ if (!rtnetif_carrier_ok(netdev)) { ++ /* We've lost link, so the controller stops DMA, ++ * but we've got queued Tx work that's never going ++ * to get done, so reset controller to flush Tx. ++ * (Do the reset outside of interrupt context). ++ */ ++ if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) { ++ adapter->tx_timeout_count++; ++ schedule_work(&adapter->reset_task); ++ /* return immediately since reset is imminent */ ++ return; ++ } ++ } ++ ++ /* Force detection of hung controller every watchdog period */ ++ set_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags); ++ } ++ ++ /* Cause software interrupt to ensure Rx ring is cleaned */ ++ if (adapter->flags & IGB_FLAG_HAS_MSIX) { ++ u32 eics = 0; ++ ++ for (i = 0; i < adapter->num_q_vectors; i++) ++ eics |= adapter->q_vector[i]->eims_value; ++ wr32(E1000_EICS, eics); ++ } else { ++ wr32(E1000_ICS, E1000_ICS_RXDMT0); ++ } ++ ++ /* Check LVMMC register on i350/i354 only */ ++ if ((adapter->hw.mac.type == e1000_i350) || ++ (adapter->hw.mac.type == e1000_i354)) ++ igb_check_lvmmc(adapter); ++ ++ /* Reset the timer */ ++ if (!test_bit(__IGB_DOWN, &adapter->state)) { ++ if (adapter->flags & IGB_FLAG_NEED_LINK_UPDATE) ++ mod_timer(&adapter->watchdog_timer, ++ round_jiffies(jiffies + HZ)); ++ else ++ mod_timer(&adapter->watchdog_timer, ++ round_jiffies(jiffies + 2 * HZ)); ++ } ++} ++ ++enum latency_range { ++ lowest_latency = 0, ++ low_latency = 1, ++ bulk_latency = 2, ++ latency_invalid = 255 ++}; ++ ++/** ++ * igb_update_ring_itr - update the dynamic ITR value based on packet size ++ * @q_vector: pointer to q_vector ++ * ++ * Stores a new ITR value based on strictly on packet size. This ++ * algorithm is less sophisticated than that used in igb_update_itr, ++ * due to the difficulty of synchronizing statistics across multiple ++ * receive rings. The divisors and thresholds used by this function ++ * were determined based on theoretical maximum wire speed and testing ++ * data, in order to minimize response time while increasing bulk ++ * throughput. ++ * This functionality is controlled by ethtool's coalescing settings. ++ * NOTE: This function is called only when operating in a multiqueue ++ * receive environment. ++ **/ ++static void igb_update_ring_itr(struct igb_q_vector *q_vector) ++{ ++ int new_val = q_vector->itr_val; ++ int avg_wire_size = 0; ++ struct igb_adapter *adapter = q_vector->adapter; ++ unsigned int packets; ++ ++ if (!InterruptThrottle) ++ return; ++ ++ /* For non-gigabit speeds, just fix the interrupt rate at 4000 ++ * ints/sec - ITR timer value of 120 ticks. ++ */ ++ if (adapter->link_speed != SPEED_1000) { ++ new_val = IGB_4K_ITR; ++ goto set_itr_val; ++ } ++ ++ packets = q_vector->rx.total_packets; ++ if (packets) ++ avg_wire_size = q_vector->rx.total_bytes / packets; ++ ++ packets = q_vector->tx.total_packets; ++ if (packets) ++ avg_wire_size = max_t(u32, avg_wire_size, ++ q_vector->tx.total_bytes / packets); ++ ++ /* if avg_wire_size isn't set no work was done */ ++ if (!avg_wire_size) ++ goto clear_counts; ++ ++ /* Add 24 bytes to size to account for CRC, preamble, and gap */ ++ avg_wire_size += 24; ++ ++ /* Don't starve jumbo frames */ ++ avg_wire_size = min(avg_wire_size, 3000); ++ ++ /* Give a little boost to mid-size frames */ ++ if ((avg_wire_size > 300) && (avg_wire_size < 1200)) ++ new_val = avg_wire_size / 3; ++ else ++ new_val = avg_wire_size / 2; ++ ++ /* conservative mode (itr 3) eliminates the lowest_latency setting */ ++ if (new_val < IGB_20K_ITR && ++ ((q_vector->rx.ring && adapter->rx_itr_setting == 3) || ++ (!q_vector->rx.ring && adapter->tx_itr_setting == 3))) ++ new_val = IGB_20K_ITR; ++ ++set_itr_val: ++ if (new_val != q_vector->itr_val) { ++ q_vector->itr_val = new_val; ++ q_vector->set_itr = 1; ++ } ++clear_counts: ++ q_vector->rx.total_bytes = 0; ++ q_vector->rx.total_packets = 0; ++ q_vector->tx.total_bytes = 0; ++ q_vector->tx.total_packets = 0; ++} ++ ++/** ++ * igb_update_itr - update the dynamic ITR value based on statistics ++ * @q_vector: pointer to q_vector ++ * @ring_container: ring info to update the itr for ++ * ++ * Stores a new ITR value based on packets and byte ++ * counts during the last interrupt. The advantage of per interrupt ++ * computation is faster updates and more accurate ITR for the current ++ * traffic pattern. Constants in this function were computed ++ * based on theoretical maximum wire speed and thresholds were set based ++ * on testing data as well as attempting to minimize response time ++ * while increasing bulk throughput. ++ * This functionality is controlled by ethtool's coalescing settings. ++ * NOTE: These calculations are only valid when operating in a single- ++ * queue environment. ++ **/ ++static void igb_update_itr(struct igb_q_vector *q_vector, ++ struct igb_ring_container *ring_container) ++{ ++ unsigned int packets = ring_container->total_packets; ++ unsigned int bytes = ring_container->total_bytes; ++ u8 itrval = ring_container->itr; ++ ++ /* no packets, exit with status unchanged */ ++ if (packets == 0) ++ return; ++ ++ switch (itrval) { ++ case lowest_latency: ++ /* handle TSO and jumbo frames */ ++ if (bytes/packets > 8000) ++ itrval = bulk_latency; ++ else if ((packets < 5) && (bytes > 512)) ++ itrval = low_latency; ++ break; ++ case low_latency: /* 50 usec aka 20000 ints/s */ ++ if (bytes > 10000) { ++ /* this if handles the TSO accounting */ ++ if (bytes/packets > 8000) ++ itrval = bulk_latency; ++ else if ((packets < 10) || ((bytes/packets) > 1200)) ++ itrval = bulk_latency; ++ else if ((packets > 35)) ++ itrval = lowest_latency; ++ } else if (bytes/packets > 2000) { ++ itrval = bulk_latency; ++ } else if (packets <= 2 && bytes < 512) { ++ itrval = lowest_latency; ++ } ++ break; ++ case bulk_latency: /* 250 usec aka 4000 ints/s */ ++ if (bytes > 25000) { ++ if (packets > 35) ++ itrval = low_latency; ++ } else if (bytes < 1500) { ++ itrval = low_latency; ++ } ++ break; ++ } ++ ++ /* clear work counters since we have the values we need */ ++ ring_container->total_bytes = 0; ++ ring_container->total_packets = 0; ++ ++ /* write updated itr to ring container */ ++ ring_container->itr = itrval; ++} ++ ++static void igb_set_itr(struct igb_q_vector *q_vector) ++{ ++ struct igb_adapter *adapter = q_vector->adapter; ++ u32 new_itr = q_vector->itr_val; ++ u8 current_itr = 0; ++ ++ if (!InterruptThrottle) ++ return; ++ ++ /* for non-gigabit speeds, just fix the interrupt rate at 4000 */ ++ if (adapter->link_speed != SPEED_1000) { ++ current_itr = 0; ++ new_itr = IGB_4K_ITR; ++ goto set_itr_now; ++ } ++ ++ igb_update_itr(q_vector, &q_vector->tx); ++ igb_update_itr(q_vector, &q_vector->rx); ++ ++ current_itr = max(q_vector->rx.itr, q_vector->tx.itr); ++ ++ /* conservative mode (itr 3) eliminates the lowest_latency setting */ ++ if (current_itr == lowest_latency && ++ ((q_vector->rx.ring && adapter->rx_itr_setting == 3) || ++ (!q_vector->rx.ring && adapter->tx_itr_setting == 3))) ++ current_itr = low_latency; ++ ++ switch (current_itr) { ++ /* counts and packets in update_itr are dependent on these numbers */ ++ case lowest_latency: ++ new_itr = IGB_70K_ITR; /* 70,000 ints/sec */ ++ break; ++ case low_latency: ++ new_itr = IGB_20K_ITR; /* 20,000 ints/sec */ ++ break; ++ case bulk_latency: ++ new_itr = IGB_4K_ITR; /* 4,000 ints/sec */ ++ break; ++ default: ++ break; ++ } ++ ++set_itr_now: ++ if (new_itr != q_vector->itr_val) { ++ /* this attempts to bias the interrupt rate towards Bulk ++ * by adding intermediate steps when interrupt rate is ++ * increasing ++ */ ++ new_itr = new_itr > q_vector->itr_val ? ++ max((new_itr * q_vector->itr_val) / ++ (new_itr + (q_vector->itr_val >> 2)), ++ new_itr) : new_itr; ++ /* Don't write the value here; it resets the adapter's ++ * internal timer, and causes us to delay far longer than ++ * we should between interrupts. Instead, we write the ITR ++ * value at the beginning of the next interrupt so the timing ++ * ends up being correct. ++ */ ++ q_vector->itr_val = new_itr; ++ q_vector->set_itr = 1; ++ } ++} ++ ++ ++#define IGB_SET_FLAG(_input, _flag, _result) \ ++ ((_flag <= _result) ? \ ++ ((u32)(_input & _flag) * (_result / _flag)) : \ ++ ((u32)(_input & _flag) / (_flag / _result))) ++ ++static u32 igb_tx_cmd_type(struct rtskb *skb, u32 tx_flags) ++{ ++ /* set type for advanced descriptor with frame checksum insertion */ ++ u32 cmd_type = E1000_ADVTXD_DTYP_DATA | ++ E1000_ADVTXD_DCMD_DEXT | ++ E1000_ADVTXD_DCMD_IFCS; ++ ++ return cmd_type; ++} ++ ++static void igb_tx_olinfo_status(struct igb_ring *tx_ring, ++ union e1000_adv_tx_desc *tx_desc, ++ u32 tx_flags, unsigned int paylen) ++{ ++ u32 olinfo_status = paylen << E1000_ADVTXD_PAYLEN_SHIFT; ++ ++ /* 82575 requires a unique index per ring */ ++ if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags)) ++ olinfo_status |= tx_ring->reg_idx << 4; ++ ++ tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status); ++} ++ ++static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size) ++{ ++ struct rtnet_device *netdev = tx_ring->netdev; ++ ++ rtnetif_stop_queue(netdev); ++ ++ /* Herbert's original patch had: ++ * smp_mb__after_netif_stop_queue(); ++ * but since that doesn't exist yet, just open code it. ++ */ ++ smp_mb(); ++ ++ /* We need to check again in a case another CPU has just ++ * made room available. ++ */ ++ if (igb_desc_unused(tx_ring) < size) ++ return -EBUSY; ++ ++ /* A reprieve! */ ++ rtnetif_wake_queue(netdev); ++ ++ tx_ring->tx_stats.restart_queue2++; ++ ++ return 0; ++} ++ ++static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size) ++{ ++ if (igb_desc_unused(tx_ring) >= size) ++ return 0; ++ return __igb_maybe_stop_tx(tx_ring, size); ++} ++ ++static void igb_tx_map(struct igb_ring *tx_ring, ++ struct igb_tx_buffer *first, ++ const u8 hdr_len) ++{ ++ struct rtskb *skb = first->skb; ++ struct igb_tx_buffer *tx_buffer; ++ union e1000_adv_tx_desc *tx_desc; ++ dma_addr_t dma; ++ unsigned int size; ++ u32 tx_flags = first->tx_flags; ++ u32 cmd_type = igb_tx_cmd_type(skb, tx_flags); ++ u16 i = tx_ring->next_to_use; ++ ++ /* first descriptor is also last, set RS and EOP bits */ ++ cmd_type |= IGB_TXD_DCMD; ++ tx_desc = IGB_TX_DESC(tx_ring, i); ++ ++ igb_tx_olinfo_status(tx_ring, tx_desc, tx_flags, skb->len - hdr_len); ++ ++ size = skb->len; ++ ++ dma = rtskb_data_dma_addr(skb, 0); ++ ++ tx_buffer = first; ++ ++ tx_desc->read.buffer_addr = cpu_to_le64(dma); ++ tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type ^ size); ++ ++ /* set the timestamp */ ++ first->time_stamp = jiffies; ++ first->next_to_watch = tx_desc; ++ ++ i++; ++ tx_desc++; ++ if (i == tx_ring->count) { ++ tx_desc = IGB_TX_DESC(tx_ring, 0); ++ i = 0; ++ } ++ ++ /* Force memory writes to complete before letting h/w know there ++ * are new descriptors to fetch. (Only applicable for weak-ordered ++ * memory model archs, such as IA-64). ++ * ++ * We also need this memory barrier to make certain all of the ++ * status bits have been updated before next_to_watch is written. ++ */ ++ wmb(); ++ ++ if (skb->xmit_stamp) ++ *skb->xmit_stamp = ++ cpu_to_be64(rtdm_clock_read() + *skb->xmit_stamp); ++ /* set next_to_watch value indicating a packet is present */ ++ tx_ring->next_to_use = i; ++ ++ /* Make sure there is space in the ring for the next send. */ ++ igb_maybe_stop_tx(tx_ring, DESC_NEEDED); ++ ++ writel(i, tx_ring->tail); ++ ++ /* we need this if more than one processor can write to our tail ++ * at a time, it synchronizes IO on IA64/Altix systems ++ */ ++ mmiowb(); ++ ++ return; ++} ++ ++netdev_tx_t igb_xmit_frame_ring(struct rtskb *skb, ++ struct igb_ring *tx_ring) ++{ ++ struct igb_tx_buffer *first; ++ u32 tx_flags = 0; ++ u16 count = 2; ++ u8 hdr_len = 0; ++ ++ /* need: 1 descriptor per page * PAGE_SIZE/IGB_MAX_DATA_PER_TXD, ++ * + 1 desc for skb_headlen/IGB_MAX_DATA_PER_TXD, ++ * + 2 desc gap to keep tail from touching head, ++ * + 1 desc for context descriptor, ++ * otherwise try next time ++ */ ++ if (igb_maybe_stop_tx(tx_ring, count + 3)) { ++ /* this is a hard error */ ++ return NETDEV_TX_BUSY; ++ } ++ ++ if (skb->protocol == htons(ETH_P_IP)) ++ tx_flags |= IGB_TX_FLAGS_IPV4; ++ ++ /* record the location of the first descriptor for this packet */ ++ first = &tx_ring->tx_buffer_info[tx_ring->next_to_use]; ++ first->skb = skb; ++ first->bytecount = skb->len; ++ first->gso_segs = 1; ++ ++ /* record initial flags and protocol */ ++ first->tx_flags = tx_flags; ++ first->protocol = skb->protocol; ++ ++ igb_tx_map(tx_ring, first, hdr_len); ++ ++ return NETDEV_TX_OK; ++} ++ ++static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter, ++ struct rtskb *skb) ++{ ++ return adapter->tx_ring[0]; ++} ++ ++static netdev_tx_t igb_xmit_frame(struct rtskb *skb, ++ struct rtnet_device *netdev) ++{ ++ struct igb_adapter *adapter = rtnetdev_priv(netdev); ++ ++ if (test_bit(__IGB_DOWN, &adapter->state)) { ++ kfree_rtskb(skb); ++ return NETDEV_TX_OK; ++ } ++ ++ if (skb->len <= 0) { ++ kfree_rtskb(skb); ++ return NETDEV_TX_OK; ++ } ++ ++ /* The minimum packet size with TCTL.PSP set is 17 so pad the skb ++ * in order to meet this minimum size requirement. ++ */ ++ if (skb->len < 17) { ++ skb = rtskb_padto(skb, 17); ++ if (!skb) ++ return NETDEV_TX_OK; ++ } ++ ++ return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb)); ++} ++ ++static void igb_reset_task(struct work_struct *work) ++{ ++ struct igb_adapter *adapter; ++ adapter = container_of(work, struct igb_adapter, reset_task); ++ ++ igb_dump(adapter); ++ rtdev_err(adapter->netdev, "Reset adapter\n"); ++ igb_reinit_locked(adapter); ++} ++ ++/** ++ * igb_get_stats - Get System Network Statistics ++ * @netdev: network interface device structure ++ * ++ * Returns the address of the device statistics structure. ++ * The statistics are actually updated from the timer callback. ++ **/ ++static struct net_device_stats * ++igb_get_stats(struct rtnet_device *netdev) ++{ ++ struct igb_adapter *adapter = netdev->priv; ++ ++ /* only return the current stats */ ++ return &adapter->net_stats; ++} ++ ++/** ++ * igb_update_stats - Update the board statistics counters ++ * @adapter: board private structure ++ **/ ++void igb_update_stats(struct igb_adapter *adapter) ++{ ++ struct e1000_hw *hw = &adapter->hw; ++ struct pci_dev *pdev = adapter->pdev; ++ struct net_device_stats *net_stats; ++ u32 reg, mpc; ++ int i; ++ u64 bytes, packets; ++ ++ /* Prevent stats update while adapter is being reset, or if the pci ++ * connection is down. ++ */ ++ if (adapter->link_speed == 0) ++ return; ++ if (pci_channel_offline(pdev)) ++ return; ++ ++ net_stats = &adapter->net_stats; ++ bytes = 0; ++ packets = 0; ++ ++ rcu_read_lock(); ++ for (i = 0; i < adapter->num_rx_queues; i++) { ++ struct igb_ring *ring = adapter->rx_ring[i]; ++ u32 rqdpc = rd32(E1000_RQDPC(i)); ++ if (hw->mac.type >= e1000_i210) ++ wr32(E1000_RQDPC(i), 0); ++ ++ if (rqdpc) { ++ ring->rx_stats.drops += rqdpc; ++ net_stats->rx_fifo_errors += rqdpc; ++ } ++ ++ bytes += ring->rx_stats.bytes; ++ packets += ring->rx_stats.packets; ++ } ++ ++ net_stats->rx_bytes = bytes; ++ net_stats->rx_packets = packets; ++ ++ bytes = 0; ++ packets = 0; ++ for (i = 0; i < adapter->num_tx_queues; i++) { ++ struct igb_ring *ring = adapter->tx_ring[i]; ++ bytes += ring->tx_stats.bytes; ++ packets += ring->tx_stats.packets; ++ } ++ net_stats->tx_bytes = bytes; ++ net_stats->tx_packets = packets; ++ rcu_read_unlock(); ++ ++ /* read stats registers */ ++ adapter->stats.crcerrs += rd32(E1000_CRCERRS); ++ adapter->stats.gprc += rd32(E1000_GPRC); ++ adapter->stats.gorc += rd32(E1000_GORCL); ++ rd32(E1000_GORCH); /* clear GORCL */ ++ adapter->stats.bprc += rd32(E1000_BPRC); ++ adapter->stats.mprc += rd32(E1000_MPRC); ++ adapter->stats.roc += rd32(E1000_ROC); ++ ++ adapter->stats.prc64 += rd32(E1000_PRC64); ++ adapter->stats.prc127 += rd32(E1000_PRC127); ++ adapter->stats.prc255 += rd32(E1000_PRC255); ++ adapter->stats.prc511 += rd32(E1000_PRC511); ++ adapter->stats.prc1023 += rd32(E1000_PRC1023); ++ adapter->stats.prc1522 += rd32(E1000_PRC1522); ++ adapter->stats.symerrs += rd32(E1000_SYMERRS); ++ adapter->stats.sec += rd32(E1000_SEC); ++ ++ mpc = rd32(E1000_MPC); ++ adapter->stats.mpc += mpc; ++ net_stats->rx_fifo_errors += mpc; ++ adapter->stats.scc += rd32(E1000_SCC); ++ adapter->stats.ecol += rd32(E1000_ECOL); ++ adapter->stats.mcc += rd32(E1000_MCC); ++ adapter->stats.latecol += rd32(E1000_LATECOL); ++ adapter->stats.dc += rd32(E1000_DC); ++ adapter->stats.rlec += rd32(E1000_RLEC); ++ adapter->stats.xonrxc += rd32(E1000_XONRXC); ++ adapter->stats.xontxc += rd32(E1000_XONTXC); ++ adapter->stats.xoffrxc += rd32(E1000_XOFFRXC); ++ adapter->stats.xofftxc += rd32(E1000_XOFFTXC); ++ adapter->stats.fcruc += rd32(E1000_FCRUC); ++ adapter->stats.gptc += rd32(E1000_GPTC); ++ adapter->stats.gotc += rd32(E1000_GOTCL); ++ rd32(E1000_GOTCH); /* clear GOTCL */ ++ adapter->stats.rnbc += rd32(E1000_RNBC); ++ adapter->stats.ruc += rd32(E1000_RUC); ++ adapter->stats.rfc += rd32(E1000_RFC); ++ adapter->stats.rjc += rd32(E1000_RJC); ++ adapter->stats.tor += rd32(E1000_TORH); ++ adapter->stats.tot += rd32(E1000_TOTH); ++ adapter->stats.tpr += rd32(E1000_TPR); ++ ++ adapter->stats.ptc64 += rd32(E1000_PTC64); ++ adapter->stats.ptc127 += rd32(E1000_PTC127); ++ adapter->stats.ptc255 += rd32(E1000_PTC255); ++ adapter->stats.ptc511 += rd32(E1000_PTC511); ++ adapter->stats.ptc1023 += rd32(E1000_PTC1023); ++ adapter->stats.ptc1522 += rd32(E1000_PTC1522); ++ ++ adapter->stats.mptc += rd32(E1000_MPTC); ++ adapter->stats.bptc += rd32(E1000_BPTC); ++ ++ adapter->stats.tpt += rd32(E1000_TPT); ++ adapter->stats.colc += rd32(E1000_COLC); ++ ++ adapter->stats.algnerrc += rd32(E1000_ALGNERRC); ++ /* read internal phy specific stats */ ++ reg = rd32(E1000_CTRL_EXT); ++ if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) { ++ adapter->stats.rxerrc += rd32(E1000_RXERRC); ++ ++ /* this stat has invalid values on i210/i211 */ ++ if ((hw->mac.type != e1000_i210) && ++ (hw->mac.type != e1000_i211)) ++ adapter->stats.tncrs += rd32(E1000_TNCRS); ++ } ++ ++ adapter->stats.tsctc += rd32(E1000_TSCTC); ++ adapter->stats.tsctfc += rd32(E1000_TSCTFC); ++ ++ adapter->stats.iac += rd32(E1000_IAC); ++ adapter->stats.icrxoc += rd32(E1000_ICRXOC); ++ adapter->stats.icrxptc += rd32(E1000_ICRXPTC); ++ adapter->stats.icrxatc += rd32(E1000_ICRXATC); ++ adapter->stats.ictxptc += rd32(E1000_ICTXPTC); ++ adapter->stats.ictxatc += rd32(E1000_ICTXATC); ++ adapter->stats.ictxqec += rd32(E1000_ICTXQEC); ++ adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC); ++ adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC); ++ ++ /* Fill out the OS statistics structure */ ++ net_stats->multicast = adapter->stats.mprc; ++ net_stats->collisions = adapter->stats.colc; ++ ++ /* Rx Errors */ ++ ++ /* RLEC on some newer hardware can be incorrect so build ++ * our own version based on RUC and ROC ++ */ ++ net_stats->rx_errors = adapter->stats.rxerrc + ++ adapter->stats.crcerrs + adapter->stats.algnerrc + ++ adapter->stats.ruc + adapter->stats.roc + ++ adapter->stats.cexterr; ++ net_stats->rx_length_errors = adapter->stats.ruc + ++ adapter->stats.roc; ++ net_stats->rx_crc_errors = adapter->stats.crcerrs; ++ net_stats->rx_frame_errors = adapter->stats.algnerrc; ++ net_stats->rx_missed_errors = adapter->stats.mpc; ++ ++ /* Tx Errors */ ++ net_stats->tx_errors = adapter->stats.ecol + ++ adapter->stats.latecol; ++ net_stats->tx_aborted_errors = adapter->stats.ecol; ++ net_stats->tx_window_errors = adapter->stats.latecol; ++ net_stats->tx_carrier_errors = adapter->stats.tncrs; ++ ++ /* Tx Dropped needs to be maintained elsewhere */ ++ ++ /* Management Stats */ ++ adapter->stats.mgptc += rd32(E1000_MGTPTC); ++ adapter->stats.mgprc += rd32(E1000_MGTPRC); ++ adapter->stats.mgpdc += rd32(E1000_MGTPDC); ++ ++ /* OS2BMC Stats */ ++ reg = rd32(E1000_MANC); ++ if (reg & E1000_MANC_EN_BMC2OS) { ++ adapter->stats.o2bgptc += rd32(E1000_O2BGPTC); ++ adapter->stats.o2bspc += rd32(E1000_O2BSPC); ++ adapter->stats.b2ospc += rd32(E1000_B2OSPC); ++ adapter->stats.b2ogprc += rd32(E1000_B2OGPRC); ++ } ++} ++ ++static void igb_nrtsig_watchdog(rtdm_nrtsig_t *sig, void *data) ++{ ++ struct igb_adapter *adapter = data; ++ mod_timer(&adapter->watchdog_timer, jiffies + 1); ++} ++ ++static void igb_other_handler(struct igb_adapter *adapter, u32 icr, bool root) ++{ ++ struct e1000_hw *hw = &adapter->hw; ++ ++ if (icr & E1000_ICR_DRSTA) ++ rtdm_schedule_nrt_work(&adapter->reset_task); ++ ++ if (icr & E1000_ICR_DOUTSYNC) { ++ /* HW is reporting DMA is out of sync */ ++ adapter->stats.doosync++; ++ /* The DMA Out of Sync is also indication of a spoof event ++ * in IOV mode. Check the Wrong VM Behavior register to ++ * see if it is really a spoof event. ++ */ ++ igb_check_wvbr(adapter); ++ } ++ ++ if (icr & E1000_ICR_LSC) { ++ hw->mac.get_link_status = 1; ++ /* guard against interrupt when we're going down */ ++ if (!test_bit(__IGB_DOWN, &adapter->state)) { ++ if (root) ++ mod_timer(&adapter->watchdog_timer, ++ jiffies + 1); ++ else ++ rtdm_nrtsig_pend(&adapter->watchdog_nrtsig); ++ } ++ } ++} ++ ++static irqreturn_t igb_msix_other(int irq, void *data) ++{ ++ struct igb_adapter *adapter = data; ++ struct e1000_hw *hw = &adapter->hw; ++ u32 icr = rd32(E1000_ICR); ++ /* reading ICR causes bit 31 of EICR to be cleared */ ++ ++ igb_other_handler(adapter, icr, true); ++ ++ wr32(E1000_EIMS, adapter->eims_other); ++ ++ return IRQ_HANDLED; ++} ++ ++static void igb_write_itr(struct igb_q_vector *q_vector) ++{ ++ struct igb_adapter *adapter = q_vector->adapter; ++ u32 itr_val = (q_vector->itr_val + 0x3) & 0x7FFC; ++ ++ if (!q_vector->set_itr) ++ return; ++ ++ if (!itr_val) ++ itr_val = 0x4; ++ ++ if (adapter->hw.mac.type == e1000_82575) ++ itr_val |= itr_val << 16; ++ else ++ itr_val |= E1000_EITR_CNT_IGNR; ++ ++ writel(itr_val, q_vector->itr_register); ++ q_vector->set_itr = 0; ++} ++ ++static int igb_msix_ring(rtdm_irq_t *ih) ++{ ++ struct igb_q_vector *q_vector = ++ rtdm_irq_get_arg(ih, struct igb_q_vector); ++ ++ /* Write the ITR value calculated from the previous interrupt. */ ++ igb_write_itr(q_vector); ++ ++ igb_poll(q_vector); ++ ++ return RTDM_IRQ_HANDLED; ++} ++ ++ ++/** ++ * igb_intr_msi - Interrupt Handler ++ * @irq: interrupt number ++ * @data: pointer to a network interface device structure ++ **/ ++static int igb_intr_msi(rtdm_irq_t *ih) ++{ ++ struct igb_adapter *adapter = ++ rtdm_irq_get_arg(ih, struct igb_adapter); ++ struct igb_q_vector *q_vector = adapter->q_vector[0]; ++ struct e1000_hw *hw = &adapter->hw; ++ u32 icr = rd32(E1000_ICR); ++ ++ igb_write_itr(q_vector); ++ ++ igb_other_handler(adapter, icr, false); ++ ++ igb_poll(q_vector); ++ ++ return RTDM_IRQ_HANDLED; ++} ++ ++/** ++ * igb_intr - Legacy Interrupt Handler ++ * @irq: interrupt number ++ * @data: pointer to a network interface device structure ++ **/ ++static int igb_intr(rtdm_irq_t *ih) ++{ ++ struct igb_adapter *adapter = ++ rtdm_irq_get_arg(ih, struct igb_adapter); ++ struct igb_q_vector *q_vector = adapter->q_vector[0]; ++ struct e1000_hw *hw = &adapter->hw; ++ /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No ++ * need for the IMC write ++ */ ++ u32 icr = rd32(E1000_ICR); ++ ++ /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is ++ * not set, then the adapter didn't send an interrupt ++ */ ++ if (!(icr & E1000_ICR_INT_ASSERTED)) ++ return IRQ_NONE; ++ ++ igb_write_itr(q_vector); ++ ++ igb_other_handler(adapter, icr, false); ++ ++ igb_poll(q_vector); ++ ++ return RTDM_IRQ_HANDLED; ++} ++ ++static void igb_ring_irq_enable(struct igb_q_vector *q_vector) ++{ ++ struct igb_adapter *adapter = q_vector->adapter; ++ struct e1000_hw *hw = &adapter->hw; ++ ++ if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) || ++ (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) { ++ if (adapter->num_q_vectors == 1) ++ igb_set_itr(q_vector); ++ else ++ igb_update_ring_itr(q_vector); ++ } ++ ++ if (!test_bit(__IGB_DOWN, &adapter->state)) { ++ if (adapter->flags & IGB_FLAG_HAS_MSIX) ++ wr32(E1000_EIMS, q_vector->eims_value); ++ else ++ igb_irq_enable(adapter); ++ } ++} ++ ++/** ++ * igb_poll - NAPI Rx polling callback ++ * @napi: napi polling structure ++ * @budget: count of how many packets we should handle ++ **/ ++static void igb_poll(struct igb_q_vector *q_vector) ++{ ++ if (q_vector->tx.ring) ++ igb_clean_tx_irq(q_vector); ++ ++ if (q_vector->rx.ring) ++ igb_clean_rx_irq(q_vector, 64); ++ ++ igb_ring_irq_enable(q_vector); ++} ++ ++/** ++ * igb_clean_tx_irq - Reclaim resources after transmit completes ++ * @q_vector: pointer to q_vector containing needed info ++ * ++ * returns true if ring is completely cleaned ++ **/ ++static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) ++{ ++ struct igb_adapter *adapter = q_vector->adapter; ++ struct igb_ring *tx_ring = q_vector->tx.ring; ++ struct igb_tx_buffer *tx_buffer; ++ union e1000_adv_tx_desc *tx_desc; ++ unsigned int total_bytes = 0, total_packets = 0; ++ unsigned int budget = q_vector->tx.work_limit; ++ unsigned int i = tx_ring->next_to_clean; ++ ++ if (test_bit(__IGB_DOWN, &adapter->state)) ++ return true; ++ ++ tx_buffer = &tx_ring->tx_buffer_info[i]; ++ tx_desc = IGB_TX_DESC(tx_ring, i); ++ i -= tx_ring->count; ++ ++ do { ++ union e1000_adv_tx_desc *eop_desc = tx_buffer->next_to_watch; ++ ++ /* if next_to_watch is not set then there is no work pending */ ++ if (!eop_desc) ++ break; ++ ++ /* prevent any other reads prior to eop_desc */ ++ read_barrier_depends(); ++ ++ /* if DD is not set pending work has not been completed */ ++ if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD))) ++ break; ++ ++ /* clear next_to_watch to prevent false hangs */ ++ tx_buffer->next_to_watch = NULL; ++ ++ /* update the statistics for this packet */ ++ total_bytes += tx_buffer->bytecount; ++ total_packets += tx_buffer->gso_segs; ++ ++ /* free the skb */ ++ kfree_rtskb(tx_buffer->skb); ++ ++ /* clear tx_buffer data */ ++ tx_buffer->skb = NULL; ++ ++ /* clear last DMA location and unmap remaining buffers */ ++ while (tx_desc != eop_desc) { ++ tx_buffer++; ++ tx_desc++; ++ i++; ++ if (unlikely(!i)) { ++ i -= tx_ring->count; ++ tx_buffer = tx_ring->tx_buffer_info; ++ tx_desc = IGB_TX_DESC(tx_ring, 0); ++ } ++ } ++ ++ /* move us one more past the eop_desc for start of next pkt */ ++ tx_buffer++; ++ tx_desc++; ++ i++; ++ if (unlikely(!i)) { ++ i -= tx_ring->count; ++ tx_buffer = tx_ring->tx_buffer_info; ++ tx_desc = IGB_TX_DESC(tx_ring, 0); ++ } ++ ++ /* issue prefetch for next Tx descriptor */ ++ prefetch(tx_desc); ++ ++ /* update budget accounting */ ++ budget--; ++ } while (likely(budget)); ++ ++ i += tx_ring->count; ++ tx_ring->next_to_clean = i; ++ tx_ring->tx_stats.bytes += total_bytes; ++ tx_ring->tx_stats.packets += total_packets; ++ q_vector->tx.total_bytes += total_bytes; ++ q_vector->tx.total_packets += total_packets; ++ ++ if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) { ++ struct e1000_hw *hw = &adapter->hw; ++ ++ /* Detect a transmit hang in hardware, this serializes the ++ * check with the clearing of time_stamp and movement of i ++ */ ++ clear_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags); ++ if (tx_buffer->next_to_watch && ++ time_after(jiffies, tx_buffer->time_stamp + ++ (adapter->tx_timeout_factor * HZ)) && ++ !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) { ++ ++ /* detected Tx unit hang */ ++ dev_err(tx_ring->dev, ++ "Detected Tx Unit Hang\n" ++ " Tx Queue <%d>\n" ++ " TDH <%x>\n" ++ " TDT <%x>\n" ++ " next_to_use <%x>\n" ++ " next_to_clean <%x>\n" ++ "buffer_info[next_to_clean]\n" ++ " time_stamp <%lx>\n" ++ " next_to_watch <%p>\n" ++ " jiffies <%lx>\n" ++ " desc.status <%x>\n", ++ tx_ring->queue_index, ++ rd32(E1000_TDH(tx_ring->reg_idx)), ++ readl(tx_ring->tail), ++ tx_ring->next_to_use, ++ tx_ring->next_to_clean, ++ tx_buffer->time_stamp, ++ tx_buffer->next_to_watch, ++ jiffies, ++ tx_buffer->next_to_watch->wb.status); ++ rtnetif_stop_queue(tx_ring->netdev); ++ ++ /* we are about to reset, no point in enabling stuff */ ++ return true; ++ } ++ } ++ ++#define TX_WAKE_THRESHOLD (DESC_NEEDED * 2) ++ if (unlikely(total_packets && ++ rtnetif_carrier_ok(tx_ring->netdev) && ++ igb_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD)) { ++ /* Make sure that anybody stopping the queue after this ++ * sees the new next_to_clean. ++ */ ++ smp_mb(); ++ if (rtnetif_queue_stopped(tx_ring->netdev) && ++ !(test_bit(__IGB_DOWN, &adapter->state))) { ++ rtnetif_wake_queue(tx_ring->netdev); ++ ++ tx_ring->tx_stats.restart_queue++; ++ } ++ } ++ ++ return !!budget; ++} ++ ++static struct rtskb *igb_fetch_rx_buffer(struct igb_ring *rx_ring, ++ union e1000_adv_rx_desc *rx_desc) ++{ ++ struct igb_rx_buffer *rx_buffer; ++ struct rtskb *skb; ++ ++ rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean]; ++ skb = rx_buffer->skb; ++ prefetchw(skb->data); ++ ++ /* pull the header of the skb in */ ++ rtskb_put(skb, le16_to_cpu(rx_desc->wb.upper.length)); ++ rx_buffer->skb = NULL; ++ rx_buffer->dma = 0; ++ ++ return skb; ++} ++ ++static inline void igb_rx_checksum(struct igb_ring *ring, ++ union e1000_adv_rx_desc *rx_desc, ++ struct rtskb *skb) ++{ ++ skb->ip_summed = CHECKSUM_NONE; ++ ++ /* Ignore Checksum bit is set */ ++ if (igb_test_staterr(rx_desc, E1000_RXD_STAT_IXSM)) ++ return; ++ ++ /* Rx checksum disabled via ethtool */ ++ if (!(ring->netdev->features & NETIF_F_RXCSUM)) ++ return; ++ ++ /* TCP/UDP checksum error bit is set */ ++ if (igb_test_staterr(rx_desc, ++ E1000_RXDEXT_STATERR_TCPE | ++ E1000_RXDEXT_STATERR_IPE)) { ++ /* work around errata with sctp packets where the TCPE aka ++ * L4E bit is set incorrectly on 64 byte (60 byte w/o crc) ++ * packets, (aka let the stack check the crc32c) ++ */ ++ if (!((skb->len == 60) && ++ test_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) { ++ ring->rx_stats.csum_err++; ++ } ++ /* let the stack verify checksum errors */ ++ return; ++ } ++ /* It must be a TCP or UDP packet with a valid checksum */ ++ if (igb_test_staterr(rx_desc, E1000_RXD_STAT_TCPCS | ++ E1000_RXD_STAT_UDPCS)) ++ skb->ip_summed = CHECKSUM_UNNECESSARY; ++ ++ dev_dbg(ring->dev, "cksum success: bits %08X\n", ++ le32_to_cpu(rx_desc->wb.upper.status_error)); ++} ++ ++/** ++ * igb_is_non_eop - process handling of non-EOP buffers ++ * @rx_ring: Rx ring being processed ++ * @rx_desc: Rx descriptor for current buffer ++ * @skb: current socket buffer containing buffer in progress ++ * ++ * This function updates next to clean. If the buffer is an EOP buffer ++ * this function exits returning false, otherwise it will place the ++ * sk_buff in the next buffer to be chained and return true indicating ++ * that this is in fact a non-EOP buffer. ++ **/ ++static bool igb_is_non_eop(struct igb_ring *rx_ring, ++ union e1000_adv_rx_desc *rx_desc) ++{ ++ u32 ntc = rx_ring->next_to_clean + 1; ++ ++ /* fetch, update, and store next to clean */ ++ ntc = (ntc < rx_ring->count) ? ntc : 0; ++ rx_ring->next_to_clean = ntc; ++ ++ prefetch(IGB_RX_DESC(rx_ring, ntc)); ++ ++ if (likely(igb_test_staterr(rx_desc, E1000_RXD_STAT_EOP))) ++ return false; ++ ++ return true; ++} ++ ++/** ++ * igb_cleanup_headers - Correct corrupted or empty headers ++ * @rx_ring: rx descriptor ring packet is being transacted on ++ * @rx_desc: pointer to the EOP Rx descriptor ++ * @skb: pointer to current skb being fixed ++ * ++ * Address the case where we are pulling data in on pages only ++ * and as such no data is present in the skb header. ++ * ++ * In addition if skb is not at least 60 bytes we need to pad it so that ++ * it is large enough to qualify as a valid Ethernet frame. ++ * ++ * Returns true if an error was encountered and skb was freed. ++ **/ ++static bool igb_cleanup_headers(struct igb_ring *rx_ring, ++ union e1000_adv_rx_desc *rx_desc, ++ struct rtskb *skb) ++{ ++ if (unlikely((igb_test_staterr(rx_desc, ++ E1000_RXDEXT_ERR_FRAME_ERR_MASK)))) { ++ struct rtnet_device *netdev = rx_ring->netdev; ++ if (!(netdev->features & NETIF_F_RXALL)) { ++ kfree_rtskb(skb); ++ return true; ++ } ++ } ++ ++ return false; ++} ++ ++/** ++ * igb_process_skb_fields - Populate skb header fields from Rx descriptor ++ * @rx_ring: rx descriptor ring packet is being transacted on ++ * @rx_desc: pointer to the EOP Rx descriptor ++ * @skb: pointer to current skb being populated ++ * ++ * This function checks the ring, descriptor, and packet information in ++ * order to populate the hash, checksum, VLAN, timestamp, protocol, and ++ * other fields within the skb. ++ **/ ++static void igb_process_skb_fields(struct igb_ring *rx_ring, ++ union e1000_adv_rx_desc *rx_desc, ++ struct rtskb *skb) ++{ ++ igb_rx_checksum(rx_ring, rx_desc, skb); ++ ++ skb->protocol = rt_eth_type_trans(skb, rx_ring->netdev); ++} ++ ++static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget) ++{ ++ struct igb_ring *rx_ring = q_vector->rx.ring; ++ unsigned int total_bytes = 0, total_packets = 0; ++ u16 cleaned_count = igb_desc_unused(rx_ring); ++ nanosecs_abs_t time_stamp = rtdm_clock_read(); ++ struct rtskb *skb; ++ ++ while (likely(total_packets < budget)) { ++ union e1000_adv_rx_desc *rx_desc; ++ ++ /* return some buffers to hardware, one at a time is too slow */ ++ if (cleaned_count >= IGB_RX_BUFFER_WRITE) { ++ igb_alloc_rx_buffers(rx_ring, cleaned_count); ++ cleaned_count = 0; ++ } ++ ++ rx_desc = IGB_RX_DESC(rx_ring, rx_ring->next_to_clean); ++ ++ if (!rx_desc->wb.upper.status_error) ++ break; ++ ++ /* This memory barrier is needed to keep us from reading ++ * any other fields out of the rx_desc until we know the ++ * descriptor has been written back ++ */ ++ rmb(); ++ ++ /* retrieve a buffer from the ring */ ++ skb = igb_fetch_rx_buffer(rx_ring, rx_desc); ++ skb->time_stamp = time_stamp; ++ ++ cleaned_count++; ++ ++ /* fetch next buffer in frame if non-eop */ ++ if (igb_is_non_eop(rx_ring, rx_desc)) { ++ kfree_rtskb(skb); ++ continue; ++ } ++ ++ /* verify the packet layout is correct */ ++ if (igb_cleanup_headers(rx_ring, rx_desc, skb)) ++ continue; ++ ++ /* probably a little skewed due to removing CRC */ ++ total_bytes += skb->len; ++ ++ /* populate checksum, timestamp, VLAN, and protocol */ ++ igb_process_skb_fields(rx_ring, rx_desc, skb); ++ ++ rtnetif_rx(skb); ++ ++ /* reset skb pointer */ ++ skb = NULL; ++ ++ /* update budget accounting */ ++ total_packets++; ++ } ++ ++ rx_ring->rx_stats.packets += total_packets; ++ rx_ring->rx_stats.bytes += total_bytes; ++ q_vector->rx.total_packets += total_packets; ++ q_vector->rx.total_bytes += total_bytes; ++ ++ if (cleaned_count) ++ igb_alloc_rx_buffers(rx_ring, cleaned_count); ++ ++ if (total_packets) ++ rt_mark_stack_mgr(q_vector->adapter->netdev); ++ ++ return total_packets < budget; ++} ++ ++static bool igb_alloc_mapped_skb(struct igb_ring *rx_ring, ++ struct igb_rx_buffer *bi) ++{ ++ struct igb_adapter *adapter = rx_ring->q_vector->adapter; ++ struct rtskb *skb = bi->skb; ++ dma_addr_t dma = bi->dma; ++ ++ if (dma) ++ return true; ++ ++ if (likely(!skb)) { ++ skb = rtnetdev_alloc_rtskb(adapter->netdev, ++ rx_ring->rx_buffer_len + NET_IP_ALIGN); ++ if (!skb) { ++ rx_ring->rx_stats.alloc_failed++; ++ return false; ++ } ++ ++ rtskb_reserve(skb, NET_IP_ALIGN); ++ skb->rtdev = adapter->netdev; ++ ++ bi->skb = skb; ++ bi->dma = rtskb_data_dma_addr(skb, 0); ++ } ++ ++ return true; ++} ++ ++/** ++ * igb_alloc_rx_buffers - Replace used receive buffers; packet split ++ * @adapter: address of board private structure ++ **/ ++void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count) ++{ ++ union e1000_adv_rx_desc *rx_desc; ++ struct igb_rx_buffer *bi; ++ u16 i = rx_ring->next_to_use; ++ ++ /* nothing to do */ ++ if (!cleaned_count) ++ return; ++ ++ rx_desc = IGB_RX_DESC(rx_ring, i); ++ bi = &rx_ring->rx_buffer_info[i]; ++ i -= rx_ring->count; ++ ++ do { ++ if (!igb_alloc_mapped_skb(rx_ring, bi)) ++ break; ++ ++ /* Refresh the desc even if buffer_addrs didn't change ++ * because each write-back erases this info. ++ */ ++ rx_desc->read.pkt_addr = cpu_to_le64(bi->dma); ++ ++ rx_desc++; ++ bi++; ++ i++; ++ if (unlikely(!i)) { ++ rx_desc = IGB_RX_DESC(rx_ring, 0); ++ bi = rx_ring->rx_buffer_info; ++ i -= rx_ring->count; ++ } ++ ++ /* clear the status bits for the next_to_use descriptor */ ++ rx_desc->wb.upper.status_error = 0; ++ ++ cleaned_count--; ++ } while (cleaned_count); ++ ++ i += rx_ring->count; ++ ++ if (rx_ring->next_to_use != i) { ++ /* record the next descriptor to use */ ++ rx_ring->next_to_use = i; ++ ++ /* Force memory writes to complete before letting h/w ++ * know there are new descriptors to fetch. (Only ++ * applicable for weak-ordered memory model archs, ++ * such as IA-64). ++ */ ++ wmb(); ++ writel(i, rx_ring->tail); ++ } ++} ++ ++/** ++ * igb_mii_ioctl - ++ * @netdev: ++ * @ifreq: ++ * @cmd: ++ **/ ++static int igb_mii_ioctl(struct rtnet_device *netdev, struct ifreq *ifr, int cmd) ++{ ++ struct igb_adapter *adapter = rtnetdev_priv(netdev); ++ struct mii_ioctl_data *data = if_mii(ifr); ++ ++ if (adapter->hw.phy.media_type != e1000_media_type_copper) ++ return -EOPNOTSUPP; ++ ++ switch (cmd) { ++ case SIOCGMIIPHY: ++ data->phy_id = adapter->hw.phy.addr; ++ break; ++ case SIOCGMIIREG: ++ if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F, ++ &data->val_out)) ++ return -EIO; ++ break; ++ case SIOCSMIIREG: ++ default: ++ return -EOPNOTSUPP; ++ } ++ return 0; ++} ++ ++/** ++ * igb_ioctl - ++ * @netdev: ++ * @ifreq: ++ * @cmd: ++ **/ ++static int igb_ioctl(struct rtnet_device *netdev, struct ifreq *ifr, int cmd) ++{ ++ if (rtdm_in_rt_context()) ++ return -ENOSYS; ++ ++ switch (cmd) { ++ case SIOCGMIIPHY: ++ case SIOCGMIIREG: ++ case SIOCSMIIREG: ++ return igb_mii_ioctl(netdev, ifr, cmd); ++ ++ default: ++ return -EOPNOTSUPP; ++ } ++} ++ ++void igb_read_pci_cfg(struct e1000_hw *hw, u32 reg, u16 *value) ++{ ++ struct igb_adapter *adapter = hw->back; ++ ++ pci_read_config_word(adapter->pdev, reg, value); ++} ++ ++void igb_write_pci_cfg(struct e1000_hw *hw, u32 reg, u16 *value) ++{ ++ struct igb_adapter *adapter = hw->back; ++ ++ pci_write_config_word(adapter->pdev, reg, *value); ++} ++ ++s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value) ++{ ++ struct igb_adapter *adapter = hw->back; ++ ++ if (pcie_capability_read_word(adapter->pdev, reg, value)) ++ return -E1000_ERR_CONFIG; ++ ++ return 0; ++} ++ ++s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value) ++{ ++ struct igb_adapter *adapter = hw->back; ++ ++ if (pcie_capability_write_word(adapter->pdev, reg, *value)) ++ return -E1000_ERR_CONFIG; ++ ++ return 0; ++} ++ ++static void igb_vlan_mode(struct rtnet_device *netdev, netdev_features_t features) ++{ ++ struct igb_adapter *adapter = rtnetdev_priv(netdev); ++ struct e1000_hw *hw = &adapter->hw; ++ u32 ctrl; ++ ++ /* disable VLAN tag insert/strip */ ++ ctrl = rd32(E1000_CTRL); ++ ctrl &= ~E1000_CTRL_VME; ++ wr32(E1000_CTRL, ctrl); ++ ++ igb_rlpml_set(adapter); ++} ++ ++static int igb_vlan_rx_add_vid(struct rtnet_device *netdev, ++ __be16 proto, u16 vid) ++{ ++ struct igb_adapter *adapter = rtnetdev_priv(netdev); ++ struct e1000_hw *hw = &adapter->hw; ++ ++ /* add the filter since PF can receive vlans w/o entry in vlvf */ ++ igb_vfta_set(hw, vid, true); ++ ++ set_bit(vid, adapter->active_vlans); ++ ++ return 0; ++} ++ ++static void igb_restore_vlan(struct igb_adapter *adapter) ++{ ++ u16 vid; ++ ++ igb_vlan_mode(adapter->netdev, adapter->netdev->features); ++ ++ for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID) ++ igb_vlan_rx_add_vid(adapter->netdev, htons(ETH_P_8021Q), vid); ++} ++ ++static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake, ++ bool runtime) ++{ ++ struct rtnet_device *netdev = pci_get_drvdata(pdev); ++ struct igb_adapter *adapter = rtnetdev_priv(netdev); ++ struct e1000_hw *hw = &adapter->hw; ++ u32 ctrl, rctl, status; ++ u32 wufc = runtime ? E1000_WUFC_LNKC : adapter->wol; ++#ifdef CONFIG_PM ++ int retval = 0; ++#endif ++ ++ rtnetif_device_detach(netdev); ++ ++ if (rtnetif_running(netdev)) ++ __igb_close(netdev, true); ++ ++ igb_clear_interrupt_scheme(adapter); ++ ++#ifdef CONFIG_PM ++ retval = pci_save_state(pdev); ++ if (retval) ++ return retval; ++#endif ++ ++ status = rd32(E1000_STATUS); ++ if (status & E1000_STATUS_LU) ++ wufc &= ~E1000_WUFC_LNKC; ++ ++ if (wufc) { ++ igb_setup_rctl(adapter); ++ igb_set_rx_mode(netdev); ++ ++ /* turn on all-multi mode if wake on multicast is enabled */ ++ if (wufc & E1000_WUFC_MC) { ++ rctl = rd32(E1000_RCTL); ++ rctl |= E1000_RCTL_MPE; ++ wr32(E1000_RCTL, rctl); ++ } ++ ++ ctrl = rd32(E1000_CTRL); ++ /* advertise wake from D3Cold */ ++ #define E1000_CTRL_ADVD3WUC 0x00100000 ++ /* phy power management enable */ ++ #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000 ++ ctrl |= E1000_CTRL_ADVD3WUC; ++ wr32(E1000_CTRL, ctrl); ++ ++ /* Allow time for pending master requests to run */ ++ igb_disable_pcie_master(hw); ++ ++ wr32(E1000_WUC, E1000_WUC_PME_EN); ++ wr32(E1000_WUFC, wufc); ++ } else { ++ wr32(E1000_WUC, 0); ++ wr32(E1000_WUFC, 0); ++ } ++ ++ *enable_wake = wufc || adapter->en_mng_pt; ++ if (!*enable_wake) ++ igb_power_down_link(adapter); ++ else ++ igb_power_up_link(adapter); ++ ++ /* Release control of h/w to f/w. If f/w is AMT enabled, this ++ * would have already happened in close and is redundant. ++ */ ++ igb_release_hw_control(adapter); ++ ++ pci_disable_device(pdev); ++ ++ return 0; ++} ++ ++#ifdef CONFIG_PM ++#ifdef CONFIG_PM_SLEEP ++static int igb_suspend(struct device *dev) ++{ ++ int retval; ++ bool wake; ++ struct pci_dev *pdev = to_pci_dev(dev); ++ ++ retval = __igb_shutdown(pdev, &wake, 0); ++ if (retval) ++ return retval; ++ ++ if (wake) { ++ pci_prepare_to_sleep(pdev); ++ } else { ++ pci_wake_from_d3(pdev, false); ++ pci_set_power_state(pdev, PCI_D3hot); ++ } ++ ++ return 0; ++} ++#endif /* CONFIG_PM_SLEEP */ ++ ++static int igb_resume(struct device *dev) ++{ ++ struct pci_dev *pdev = to_pci_dev(dev); ++ struct rtnet_device *netdev = pci_get_drvdata(pdev); ++ struct igb_adapter *adapter = rtnetdev_priv(netdev); ++ struct e1000_hw *hw = &adapter->hw; ++ u32 err; ++ ++ pci_set_power_state(pdev, PCI_D0); ++ pci_restore_state(pdev); ++ pci_save_state(pdev); ++ ++ if (!pci_device_is_present(pdev)) ++ return -ENODEV; ++ err = pci_enable_device_mem(pdev); ++ if (err) { ++ dev_err(&pdev->dev, ++ "igb: Cannot enable PCI device from suspend\n"); ++ return err; ++ } ++ pci_set_master(pdev); ++ ++ pci_enable_wake(pdev, PCI_D3hot, 0); ++ pci_enable_wake(pdev, PCI_D3cold, 0); ++ ++ if (igb_init_interrupt_scheme(adapter, true)) { ++ dev_err(&pdev->dev, "Unable to allocate memory for queues\n"); ++ return -ENOMEM; ++ } ++ ++ igb_reset(adapter); ++ ++ /* let the f/w know that the h/w is now under the control of the ++ * driver. ++ */ ++ igb_get_hw_control(adapter); ++ ++ wr32(E1000_WUS, ~0); ++ ++ if (netdev->flags & IFF_UP) { ++ rtnl_lock(); ++ err = __igb_open(netdev, true); ++ rtnl_unlock(); ++ if (err) ++ return err; ++ } ++ ++ rtnetif_device_attach(netdev); ++ return 0; ++} ++ ++static int igb_runtime_idle(struct device *dev) ++{ ++ struct pci_dev *pdev = to_pci_dev(dev); ++ struct rtnet_device *netdev = pci_get_drvdata(pdev); ++ struct igb_adapter *adapter = rtnetdev_priv(netdev); ++ ++ if (!igb_has_link(adapter)) ++ pm_schedule_suspend(dev, MSEC_PER_SEC * 5); ++ ++ return -EBUSY; ++} ++ ++static int igb_runtime_suspend(struct device *dev) ++{ ++ struct pci_dev *pdev = to_pci_dev(dev); ++ int retval; ++ bool wake; ++ ++ retval = __igb_shutdown(pdev, &wake, 1); ++ if (retval) ++ return retval; ++ ++ if (wake) { ++ pci_prepare_to_sleep(pdev); ++ } else { ++ pci_wake_from_d3(pdev, false); ++ pci_set_power_state(pdev, PCI_D3hot); ++ } ++ ++ return 0; ++} ++ ++static int igb_runtime_resume(struct device *dev) ++{ ++ return igb_resume(dev); ++} ++#endif /* CONFIG_PM */ ++ ++static void igb_shutdown(struct pci_dev *pdev) ++{ ++ bool wake; ++ ++ __igb_shutdown(pdev, &wake, 0); ++ ++ if (system_state == SYSTEM_POWER_OFF) { ++ pci_wake_from_d3(pdev, wake); ++ pci_set_power_state(pdev, PCI_D3hot); ++ } ++} ++ ++static int igb_pci_sriov_configure(struct pci_dev *dev, int num_vfs) ++{ ++ return 0; ++} ++ ++/** ++ * igb_io_error_detected - called when PCI error is detected ++ * @pdev: Pointer to PCI device ++ * @state: The current pci connection state ++ * ++ * This function is called after a PCI bus error affecting ++ * this device has been detected. ++ **/ ++static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev, ++ pci_channel_state_t state) ++{ ++ struct rtnet_device *netdev = pci_get_drvdata(pdev); ++ struct igb_adapter *adapter = rtnetdev_priv(netdev); ++ ++ rtnetif_device_detach(netdev); ++ ++ if (state == pci_channel_io_perm_failure) ++ return PCI_ERS_RESULT_DISCONNECT; ++ ++ if (rtnetif_running(netdev)) ++ igb_down(adapter); ++ pci_disable_device(pdev); ++ ++ /* Request a slot slot reset. */ ++ return PCI_ERS_RESULT_NEED_RESET; ++} ++ ++/** ++ * igb_io_slot_reset - called after the pci bus has been reset. ++ * @pdev: Pointer to PCI device ++ * ++ * Restart the card from scratch, as if from a cold-boot. Implementation ++ * resembles the first-half of the igb_resume routine. ++ **/ ++static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev) ++{ ++ struct rtnet_device *netdev = pci_get_drvdata(pdev); ++ struct igb_adapter *adapter = rtnetdev_priv(netdev); ++ struct e1000_hw *hw = &adapter->hw; ++ pci_ers_result_t result; ++ int err; ++ ++ if (pci_enable_device_mem(pdev)) { ++ dev_err(&pdev->dev, ++ "Cannot re-enable PCI device after reset.\n"); ++ result = PCI_ERS_RESULT_DISCONNECT; ++ } else { ++ pci_set_master(pdev); ++ pci_restore_state(pdev); ++ pci_save_state(pdev); ++ ++ pci_enable_wake(pdev, PCI_D3hot, 0); ++ pci_enable_wake(pdev, PCI_D3cold, 0); ++ ++ igb_reset(adapter); ++ wr32(E1000_WUS, ~0); ++ result = PCI_ERS_RESULT_RECOVERED; ++ } ++ ++ err = pci_cleanup_aer_uncorrect_error_status(pdev); ++ if (err) { ++ dev_err(&pdev->dev, ++ "pci_cleanup_aer_uncorrect_error_status failed 0x%0x\n", ++ err); ++ /* non-fatal, continue */ ++ } ++ ++ return result; ++} ++ ++/** ++ * igb_io_resume - called when traffic can start flowing again. ++ * @pdev: Pointer to PCI device ++ * ++ * This callback is called when the error recovery driver tells us that ++ * its OK to resume normal operation. Implementation resembles the ++ * second-half of the igb_resume routine. ++ */ ++static void igb_io_resume(struct pci_dev *pdev) ++{ ++ struct rtnet_device *netdev = pci_get_drvdata(pdev); ++ struct igb_adapter *adapter = rtnetdev_priv(netdev); ++ ++ if (rtnetif_running(netdev)) { ++ if (igb_up(adapter)) { ++ dev_err(&pdev->dev, "igb_up failed after reset\n"); ++ return; ++ } ++ } ++ ++ rtnetif_device_attach(netdev); ++ ++ /* let the f/w know that the h/w is now under the control of the ++ * driver. ++ */ ++ igb_get_hw_control(adapter); ++} ++ ++static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index, ++ u8 qsel) ++{ ++ u32 rar_low, rar_high; ++ struct e1000_hw *hw = &adapter->hw; ++ ++ /* HW expects these in little endian so we reverse the byte order ++ * from network order (big endian) to little endian ++ */ ++ rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) | ++ ((u32) addr[2] << 16) | ((u32) addr[3] << 24)); ++ rar_high = ((u32) addr[4] | ((u32) addr[5] << 8)); ++ ++ /* Indicate to hardware the Address is Valid. */ ++ rar_high |= E1000_RAH_AV; ++ ++ if (hw->mac.type == e1000_82575) ++ rar_high |= E1000_RAH_POOL_1 * qsel; ++ else ++ rar_high |= E1000_RAH_POOL_1 << qsel; ++ ++ wr32(E1000_RAL(index), rar_low); ++ wrfl(); ++ wr32(E1000_RAH(index), rar_high); ++ wrfl(); ++} ++ ++static void igb_init_dmac(struct igb_adapter *adapter, u32 pba) ++{ ++ struct e1000_hw *hw = &adapter->hw; ++ u32 dmac_thr; ++ u16 hwm; ++ ++ if (hw->mac.type > e1000_82580) { ++ if (adapter->flags & IGB_FLAG_DMAC) { ++ u32 reg; ++ ++ /* force threshold to 0. */ ++ wr32(E1000_DMCTXTH, 0); ++ ++ /* DMA Coalescing high water mark needs to be greater ++ * than the Rx threshold. Set hwm to PBA - max frame ++ * size in 16B units, capping it at PBA - 6KB. ++ */ ++ hwm = 64 * pba - adapter->max_frame_size / 16; ++ if (hwm < 64 * (pba - 6)) ++ hwm = 64 * (pba - 6); ++ reg = rd32(E1000_FCRTC); ++ reg &= ~E1000_FCRTC_RTH_COAL_MASK; ++ reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT) ++ & E1000_FCRTC_RTH_COAL_MASK); ++ wr32(E1000_FCRTC, reg); ++ ++ /* Set the DMA Coalescing Rx threshold to PBA - 2 * max ++ * frame size, capping it at PBA - 10KB. ++ */ ++ dmac_thr = pba - adapter->max_frame_size / 512; ++ if (dmac_thr < pba - 10) ++ dmac_thr = pba - 10; ++ reg = rd32(E1000_DMACR); ++ reg &= ~E1000_DMACR_DMACTHR_MASK; ++ reg |= ((dmac_thr << E1000_DMACR_DMACTHR_SHIFT) ++ & E1000_DMACR_DMACTHR_MASK); ++ ++ /* transition to L0x or L1 if available..*/ ++ reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK); ++ ++ /* watchdog timer= +-1000 usec in 32usec intervals */ ++ reg |= (1000 >> 5); ++ ++ /* Disable BMC-to-OS Watchdog Enable */ ++ if (hw->mac.type != e1000_i354) ++ reg &= ~E1000_DMACR_DC_BMC2OSW_EN; ++ ++ wr32(E1000_DMACR, reg); ++ ++ /* no lower threshold to disable ++ * coalescing(smart fifb)-UTRESH=0 ++ */ ++ wr32(E1000_DMCRTRH, 0); ++ ++ reg = (IGB_DMCTLX_DCFLUSH_DIS | 0x4); ++ ++ wr32(E1000_DMCTLX, reg); ++ ++ /* free space in tx packet buffer to wake from ++ * DMA coal ++ */ ++ wr32(E1000_DMCTXTH, (IGB_MIN_TXPBSIZE - ++ (IGB_TX_BUF_4096 + adapter->max_frame_size)) >> 6); ++ ++ /* make low power state decision controlled ++ * by DMA coal ++ */ ++ reg = rd32(E1000_PCIEMISC); ++ reg &= ~E1000_PCIEMISC_LX_DECISION; ++ wr32(E1000_PCIEMISC, reg); ++ } /* endif adapter->dmac is not disabled */ ++ } else if (hw->mac.type == e1000_82580) { ++ u32 reg = rd32(E1000_PCIEMISC); ++ ++ wr32(E1000_PCIEMISC, reg & ~E1000_PCIEMISC_LX_DECISION); ++ wr32(E1000_DMACR, 0); ++ } ++} ++ ++/** ++ * igb_read_i2c_byte - Reads 8 bit word over I2C ++ * @hw: pointer to hardware structure ++ * @byte_offset: byte offset to read ++ * @dev_addr: device address ++ * @data: value read ++ * ++ * Performs byte read operation over I2C interface at ++ * a specified device address. ++ **/ ++s32 igb_read_i2c_byte(struct e1000_hw *hw, u8 byte_offset, ++ u8 dev_addr, u8 *data) ++{ ++ struct igb_adapter *adapter = container_of(hw, struct igb_adapter, hw); ++ struct i2c_client *this_client = adapter->i2c_client; ++ s32 status; ++ u16 swfw_mask = 0; ++ ++ if (!this_client) ++ return E1000_ERR_I2C; ++ ++ swfw_mask = E1000_SWFW_PHY0_SM; ++ ++ if (hw->mac.ops.acquire_swfw_sync(hw, swfw_mask)) ++ return E1000_ERR_SWFW_SYNC; ++ ++ status = i2c_smbus_read_byte_data(this_client, byte_offset); ++ hw->mac.ops.release_swfw_sync(hw, swfw_mask); ++ ++ if (status < 0) ++ return E1000_ERR_I2C; ++ else { ++ *data = status; ++ return 0; ++ } ++} ++ ++/** ++ * igb_write_i2c_byte - Writes 8 bit word over I2C ++ * @hw: pointer to hardware structure ++ * @byte_offset: byte offset to write ++ * @dev_addr: device address ++ * @data: value to write ++ * ++ * Performs byte write operation over I2C interface at ++ * a specified device address. ++ **/ ++s32 igb_write_i2c_byte(struct e1000_hw *hw, u8 byte_offset, ++ u8 dev_addr, u8 data) ++{ ++ struct igb_adapter *adapter = container_of(hw, struct igb_adapter, hw); ++ struct i2c_client *this_client = adapter->i2c_client; ++ s32 status; ++ u16 swfw_mask = E1000_SWFW_PHY0_SM; ++ ++ if (!this_client) ++ return E1000_ERR_I2C; ++ ++ if (hw->mac.ops.acquire_swfw_sync(hw, swfw_mask)) ++ return E1000_ERR_SWFW_SYNC; ++ status = i2c_smbus_write_byte_data(this_client, byte_offset, data); ++ hw->mac.ops.release_swfw_sync(hw, swfw_mask); ++ ++ if (status) ++ return E1000_ERR_I2C; ++ else ++ return 0; ++ ++} ++ ++int igb_reinit_queues(struct igb_adapter *adapter) ++{ ++ struct rtnet_device *netdev = adapter->netdev; ++ struct pci_dev *pdev = adapter->pdev; ++ int err = 0; ++ ++ if (rtnetif_running(netdev)) ++ igb_close(netdev); ++ ++ igb_reset_interrupt_capability(adapter); ++ ++ if (igb_init_interrupt_scheme(adapter, true)) { ++ dev_err(&pdev->dev, "Unable to allocate memory for queues\n"); ++ return -ENOMEM; ++ } ++ ++ if (rtnetif_running(netdev)) ++ err = igb_open(netdev); ++ ++ return err; ++} ++/* igb_main.c */ +--- linux/drivers/xenomai/net/drivers/e1000/e1000_main.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/e1000/e1000_main.c 2021-04-07 16:01:27.442633859 +0800 +@@ -0,0 +1,3171 @@ ++/******************************************************************************* ++ ++ ++ Copyright(c) 1999 - 2006 Intel Corporation. All rights reserved. ++ ++ This program is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by the Free ++ Software Foundation; either version 2 of the License, or (at your option) ++ any later version. ++ ++ This program is distributed in the hope that it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ You should have received a copy of the GNU General Public License along with ++ this program; if not, write to the Free Software Foundation, Inc., 59 ++ Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ ++ The full GNU General Public License is included in this distribution in the ++ file called LICENSE. ++ ++ Contact Information: ++ Linux NICS ++ e1000-devel Mailing List ++ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 ++ ++*******************************************************************************/ ++ ++#include "e1000.h" ++ ++/* Change Log ++ * ++ * Port to rtnet (0.9.3) by Mathias Koehrer. Base version: e1000-7.1.9 ++ * 8-Aug-2006 ++ * ++ * 7.0.36 10-Mar-2006 ++ * o fixups for compilation issues on older kernels ++ * 7.0.35 3-Mar-2006 ++ * 7.0.34 ++ * o Major performance fixes by understanding relationship of rx_buffer_len ++ * to window size growth. _ps and legacy receive paths changed ++ * o merge with kernel changes ++ * o legacy receive path went back to single descriptor model for jumbos ++ * 7.0.33 3-Feb-2006 ++ * o Added another fix for the pass false carrier bit ++ * 7.0.32 24-Jan-2006 ++ * o Need to rebuild with noew version number for the pass false carrier ++ * fix in e1000_hw.c ++ * 7.0.30 18-Jan-2006 ++ * o fixup for tso workaround to disable it for pci-x ++ * o fix mem leak on 82542 ++ * o fixes for 10 Mb/s connections and incorrect stats ++ * 7.0.28 01/06/2006 ++ * o hardware workaround to only set "speed mode" bit for 1G link. ++ * 7.0.26 12/23/2005 ++ * o wake on lan support modified for device ID 10B5 ++ * o fix dhcp + vlan issue not making it to the iAMT firmware ++ * 7.0.24 12/9/2005 ++ * o New hardware support for the Gigabit NIC embedded in the south bridge ++ * o Fixes to the recycling logic (skb->tail) from IBM LTC ++ * 6.3.7 11/18/2005 ++ * o Honor eeprom setting for enabling/disabling Wake On Lan ++ * 6.3.5 11/17/2005 ++ * o Fix memory leak in rx ring handling for PCI Express adapters ++ * 6.3.4 11/8/05 ++ * o Patch from Jesper Juhl to remove redundant NULL checks for kfree ++ * 6.3.2 9/20/05 ++ * o Render logic that sets/resets DRV_LOAD as inline functions to ++ * avoid code replication. If f/w is AMT then set DRV_LOAD only when ++ * network interface is open. ++ * o Handle DRV_LOAD set/reset in cases where AMT uses VLANs. ++ * o Adjust PBA partioning for Jumbo frames using MTU size and not ++ * rx_buffer_len ++ * 6.3.1 9/19/05 ++ * o Use adapter->tx_timeout_factor in Tx Hung Detect logic ++ * (e1000_clean_tx_irq) ++ * o Support for 8086:10B5 device (Quad Port) ++ */ ++ ++char e1000_driver_name[] = "rt_e1000"; ++static char e1000_driver_string[] = "Intel(R) PRO/1000 Network Driver"; ++#ifndef CONFIG_E1000_NAPI ++#define DRIVERNAPI ++#else ++#define DRIVERNAPI "-NAPI" ++#endif ++#define DRV_VERSION "7.1.9"DRIVERNAPI ++char e1000_driver_version[] = DRV_VERSION; ++static char e1000_copyright[] = "Copyright (c) 1999-2006 Intel Corporation."; ++ ++/* e1000_pci_tbl - PCI Device ID Table ++ * ++ * Last entry must be all 0s ++ * ++ * Macro expands to... ++ * {PCI_DEVICE(PCI_VENDOR_ID_INTEL, device_id)} ++ */ ++static struct pci_device_id e1000_pci_tbl[] = { ++ INTEL_E1000_ETHERNET_DEVICE(0x1000), ++ INTEL_E1000_ETHERNET_DEVICE(0x1001), ++ INTEL_E1000_ETHERNET_DEVICE(0x1004), ++ INTEL_E1000_ETHERNET_DEVICE(0x1008), ++ INTEL_E1000_ETHERNET_DEVICE(0x1009), ++ INTEL_E1000_ETHERNET_DEVICE(0x100C), ++ INTEL_E1000_ETHERNET_DEVICE(0x100D), ++ INTEL_E1000_ETHERNET_DEVICE(0x100E), ++ INTEL_E1000_ETHERNET_DEVICE(0x100F), ++ INTEL_E1000_ETHERNET_DEVICE(0x1010), ++ INTEL_E1000_ETHERNET_DEVICE(0x1011), ++ INTEL_E1000_ETHERNET_DEVICE(0x1012), ++ INTEL_E1000_ETHERNET_DEVICE(0x1013), ++ INTEL_E1000_ETHERNET_DEVICE(0x1014), ++ INTEL_E1000_ETHERNET_DEVICE(0x1015), ++ INTEL_E1000_ETHERNET_DEVICE(0x1016), ++ INTEL_E1000_ETHERNET_DEVICE(0x1017), ++ INTEL_E1000_ETHERNET_DEVICE(0x1018), ++ INTEL_E1000_ETHERNET_DEVICE(0x1019), ++ INTEL_E1000_ETHERNET_DEVICE(0x101A), ++ INTEL_E1000_ETHERNET_DEVICE(0x101D), ++ INTEL_E1000_ETHERNET_DEVICE(0x101E), ++ INTEL_E1000_ETHERNET_DEVICE(0x1026), ++ INTEL_E1000_ETHERNET_DEVICE(0x1027), ++ INTEL_E1000_ETHERNET_DEVICE(0x1028), ++ INTEL_E1000_ETHERNET_DEVICE(0x1049), ++ INTEL_E1000_ETHERNET_DEVICE(0x104A), ++ INTEL_E1000_ETHERNET_DEVICE(0x104B), ++ INTEL_E1000_ETHERNET_DEVICE(0x104C), ++ INTEL_E1000_ETHERNET_DEVICE(0x104D), ++ INTEL_E1000_ETHERNET_DEVICE(0x105E), ++ INTEL_E1000_ETHERNET_DEVICE(0x105F), ++ INTEL_E1000_ETHERNET_DEVICE(0x1060), ++ INTEL_E1000_ETHERNET_DEVICE(0x1075), ++ INTEL_E1000_ETHERNET_DEVICE(0x1076), ++ INTEL_E1000_ETHERNET_DEVICE(0x1077), ++ INTEL_E1000_ETHERNET_DEVICE(0x1078), ++ INTEL_E1000_ETHERNET_DEVICE(0x1079), ++ INTEL_E1000_ETHERNET_DEVICE(0x107A), ++ INTEL_E1000_ETHERNET_DEVICE(0x107B), ++ INTEL_E1000_ETHERNET_DEVICE(0x107C), ++ INTEL_E1000_ETHERNET_DEVICE(0x107D), ++ INTEL_E1000_ETHERNET_DEVICE(0x107E), ++ INTEL_E1000_ETHERNET_DEVICE(0x107F), ++ INTEL_E1000_ETHERNET_DEVICE(0x108A), ++ INTEL_E1000_ETHERNET_DEVICE(0x108B), ++ INTEL_E1000_ETHERNET_DEVICE(0x108C), ++ INTEL_E1000_ETHERNET_DEVICE(0x1096), ++ INTEL_E1000_ETHERNET_DEVICE(0x1098), ++ INTEL_E1000_ETHERNET_DEVICE(0x1099), ++ INTEL_E1000_ETHERNET_DEVICE(0x109A), ++ INTEL_E1000_ETHERNET_DEVICE(0x10A4), ++ INTEL_E1000_ETHERNET_DEVICE(0x10B5), ++ INTEL_E1000_ETHERNET_DEVICE(0x10B9), ++ INTEL_E1000_ETHERNET_DEVICE(0x10BA), ++ INTEL_E1000_ETHERNET_DEVICE(0x10BB), ++ INTEL_E1000_ETHERNET_DEVICE(0x10BC), ++ INTEL_E1000_ETHERNET_DEVICE(0x10C4), ++ INTEL_E1000_ETHERNET_DEVICE(0x10C5), ++ /* required last entry */ ++ {0,} ++}; ++ ++MODULE_DEVICE_TABLE(pci, e1000_pci_tbl); ++ ++int e1000_up(struct e1000_adapter *adapter); ++void e1000_down(struct e1000_adapter *adapter); ++void e1000_reinit_locked(struct e1000_adapter *adapter); ++void e1000_reset(struct e1000_adapter *adapter); ++int e1000_set_spd_dplx(struct e1000_adapter *adapter, uint16_t spddplx); ++int e1000_setup_all_tx_resources(struct e1000_adapter *adapter); ++int e1000_setup_all_rx_resources(struct e1000_adapter *adapter); ++void e1000_free_all_tx_resources(struct e1000_adapter *adapter); ++void e1000_free_all_rx_resources(struct e1000_adapter *adapter); ++static int e1000_setup_tx_resources(struct e1000_adapter *adapter, ++ struct e1000_tx_ring *txdr); ++static int e1000_setup_rx_resources(struct e1000_adapter *adapter, ++ struct e1000_rx_ring *rxdr); ++static void e1000_free_tx_resources(struct e1000_adapter *adapter, ++ struct e1000_tx_ring *tx_ring); ++static void e1000_free_rx_resources(struct e1000_adapter *adapter, ++ struct e1000_rx_ring *rx_ring); ++ ++static int e1000_init_module(void); ++static void e1000_exit_module(void); ++static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent); ++static void e1000_remove(struct pci_dev *pdev); ++static int e1000_alloc_queues(struct e1000_adapter *adapter); ++static int e1000_sw_init(struct e1000_adapter *adapter); ++static int e1000_open(struct rtnet_device *netdev); ++static int e1000_close(struct rtnet_device *netdev); ++static void e1000_configure_tx(struct e1000_adapter *adapter); ++static void e1000_configure_rx(struct e1000_adapter *adapter); ++static void e1000_setup_rctl(struct e1000_adapter *adapter); ++static void e1000_clean_all_tx_rings(struct e1000_adapter *adapter); ++static void e1000_clean_all_rx_rings(struct e1000_adapter *adapter); ++static void e1000_clean_tx_ring(struct e1000_adapter *adapter, ++ struct e1000_tx_ring *tx_ring); ++static void e1000_clean_rx_ring(struct e1000_adapter *adapter, ++ struct e1000_rx_ring *rx_ring); ++static void e1000_set_multi(struct rtnet_device *netdev); ++static void e1000_update_phy_info_task(struct work_struct *work); ++static void e1000_watchdog(struct work_struct *work); ++static void e1000_82547_tx_fifo_stall_task(struct work_struct *work); ++static int e1000_xmit_frame(struct rtskb *skb, struct rtnet_device *netdev); ++static int e1000_intr(rtdm_irq_t *irq_handle); ++static boolean_t e1000_clean_tx_irq(struct e1000_adapter *adapter, ++ struct e1000_tx_ring *tx_ring); ++static boolean_t e1000_clean_rx_irq(struct e1000_adapter *adapter, ++ struct e1000_rx_ring *rx_ring, ++ nanosecs_abs_t *time_stamp); ++static void e1000_alloc_rx_buffers(struct e1000_adapter *adapter, ++ struct e1000_rx_ring *rx_ring, ++ int cleaned_count); ++#ifdef SIOCGMIIPHY ++#endif ++void e1000_set_ethtool_ops(struct rtnet_device *netdev); ++#ifdef ETHTOOL_OPS_COMPAT ++extern int ethtool_ioctl(struct ifreq *ifr); ++#endif ++static void e1000_enter_82542_rst(struct e1000_adapter *adapter); ++static void e1000_leave_82542_rst(struct e1000_adapter *adapter); ++static void e1000_smartspeed(struct e1000_adapter *adapter); ++static int e1000_82547_fifo_workaround(struct e1000_adapter *adapter, ++ struct rtskb *skb); ++ ++ ++ ++ ++ ++/* Exported from other modules */ ++ ++extern void e1000_check_options(struct e1000_adapter *adapter); ++ ++static struct pci_driver e1000_driver = { ++ .name = e1000_driver_name, ++ .id_table = e1000_pci_tbl, ++ .probe = e1000_probe, ++ .remove = e1000_remove, ++}; ++ ++MODULE_AUTHOR("Intel Corporation, "); ++MODULE_DESCRIPTION("Intel(R) PRO/1000 Network Driver for rtnet"); ++MODULE_LICENSE("GPL"); ++MODULE_VERSION(DRV_VERSION); ++ ++static int local_debug = NETIF_MSG_DRV | NETIF_MSG_PROBE; ++module_param_named(debug, local_debug, int, 0); ++MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)"); ++ ++ ++#define MAX_UNITS 8 ++ ++static int cards[MAX_UNITS] = { [0 ... (MAX_UNITS-1)] = 1 }; ++module_param_array(cards, int, NULL, 0444); ++MODULE_PARM_DESC(cards, "array of cards to be supported (eg. 1,0,1)"); ++ ++ ++#define kmalloc(a,b) rtdm_malloc(a) ++#define vmalloc(a) rtdm_malloc(a) ++#define kfree(a) rtdm_free(a) ++#define vfree(a) rtdm_free(a) ++ ++ ++/** ++ * e1000_init_module - Driver Registration Routine ++ * ++ * e1000_init_module is the first routine called when the driver is ++ * loaded. All it does is register with the PCI subsystem. ++ **/ ++ ++static int __init ++e1000_init_module(void) ++{ ++ int ret; ++ printk(KERN_INFO "%s - version %s\n", ++ e1000_driver_string, e1000_driver_version); ++ ++ printk(KERN_INFO "%s\n", e1000_copyright); ++ ++ ret = pci_register_driver(&e1000_driver); ++ return ret; ++} ++ ++module_init(e1000_init_module); ++ ++/** ++ * e1000_exit_module - Driver Exit Cleanup Routine ++ * ++ * e1000_exit_module is called just before the driver is removed ++ * from memory. ++ **/ ++ ++static void __exit ++e1000_exit_module(void) ++{ ++ pci_unregister_driver(&e1000_driver); ++} ++ ++module_exit(e1000_exit_module); ++ ++static int e1000_request_irq(struct e1000_adapter *adapter) ++{ ++ struct rtnet_device *netdev = adapter->netdev; ++ int flags, err = 0; ++ ++ flags = RTDM_IRQTYPE_SHARED; ++#ifdef CONFIG_PCI_MSI ++ if (adapter->hw.mac_type > e1000_82547_rev_2) { ++ adapter->have_msi = TRUE; ++ if ((err = pci_enable_msi(adapter->pdev))) { ++ DPRINTK(PROBE, ERR, ++ "Unable to allocate MSI interrupt Error: %d\n", err); ++ adapter->have_msi = FALSE; ++ } ++ } ++ if (adapter->have_msi) ++ flags = 0; ++#endif ++ rt_stack_connect(netdev, &STACK_manager); ++ if ((err = rtdm_irq_request(&adapter->irq_handle, adapter->pdev->irq, ++ e1000_intr, flags, netdev->name, netdev))) ++ DPRINTK(PROBE, ERR, ++ "Unable to allocate interrupt Error: %d\n", err); ++ ++ return err; ++} ++ ++static void e1000_free_irq(struct e1000_adapter *adapter) ++{ ++ // struct rtnet_device *netdev = adapter->netdev; ++ ++ rtdm_irq_free(&adapter->irq_handle); ++ ++#ifdef CONFIG_PCI_MSI ++ if (adapter->have_msi) ++ pci_disable_msi(adapter->pdev); ++#endif ++} ++ ++/** ++ * e1000_irq_disable - Mask off interrupt generation on the NIC ++ * @adapter: board private structure ++ **/ ++ ++static void ++e1000_irq_disable(struct e1000_adapter *adapter) ++{ ++ atomic_inc(&adapter->irq_sem); ++ E1000_WRITE_REG(&adapter->hw, IMC, ~0); ++ E1000_WRITE_FLUSH(&adapter->hw); ++ synchronize_irq(adapter->pdev->irq); ++} ++ ++/** ++ * e1000_irq_enable - Enable default interrupt generation settings ++ * @adapter: board private structure ++ **/ ++ ++static void ++e1000_irq_enable(struct e1000_adapter *adapter) ++{ ++ if (likely(atomic_dec_and_test(&adapter->irq_sem))) { ++ E1000_WRITE_REG(&adapter->hw, IMS, IMS_ENABLE_MASK); ++ E1000_WRITE_FLUSH(&adapter->hw); ++ } ++} ++ ++/** ++ * e1000_release_hw_control - release control of the h/w to f/w ++ * @adapter: address of board private structure ++ * ++ * e1000_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit. ++ * For ASF and Pass Through versions of f/w this means that the ++ * driver is no longer loaded. For AMT version (only with 82573) i ++ * of the f/w this means that the netowrk i/f is closed. ++ * ++ **/ ++ ++static void ++e1000_release_hw_control(struct e1000_adapter *adapter) ++{ ++ uint32_t ctrl_ext; ++ uint32_t swsm; ++ uint32_t extcnf; ++ ++ /* Let firmware taken over control of h/w */ ++ switch (adapter->hw.mac_type) { ++ case e1000_82571: ++ case e1000_82572: ++ case e1000_80003es2lan: ++ ctrl_ext = E1000_READ_REG(&adapter->hw, CTRL_EXT); ++ E1000_WRITE_REG(&adapter->hw, CTRL_EXT, ++ ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD); ++ break; ++ case e1000_82573: ++ swsm = E1000_READ_REG(&adapter->hw, SWSM); ++ E1000_WRITE_REG(&adapter->hw, SWSM, ++ swsm & ~E1000_SWSM_DRV_LOAD); ++ case e1000_ich8lan: ++ extcnf = E1000_READ_REG(&adapter->hw, CTRL_EXT); ++ E1000_WRITE_REG(&adapter->hw, CTRL_EXT, ++ extcnf & ~E1000_CTRL_EXT_DRV_LOAD); ++ break; ++ default: ++ break; ++ } ++} ++ ++/** ++ * e1000_get_hw_control - get control of the h/w from f/w ++ * @adapter: address of board private structure ++ * ++ * e1000_get_hw_control sets {CTRL_EXT|FWSM}:DRV_LOAD bit. ++ * For ASF and Pass Through versions of f/w this means that ++ * the driver is loaded. For AMT version (only with 82573) ++ * of the f/w this means that the netowrk i/f is open. ++ * ++ **/ ++ ++static void ++e1000_get_hw_control(struct e1000_adapter *adapter) ++{ ++ uint32_t ctrl_ext; ++ uint32_t swsm; ++ uint32_t extcnf; ++ /* Let firmware know the driver has taken over */ ++ switch (adapter->hw.mac_type) { ++ case e1000_82571: ++ case e1000_82572: ++ case e1000_80003es2lan: ++ ctrl_ext = E1000_READ_REG(&adapter->hw, CTRL_EXT); ++ E1000_WRITE_REG(&adapter->hw, CTRL_EXT, ++ ctrl_ext | E1000_CTRL_EXT_DRV_LOAD); ++ break; ++ case e1000_82573: ++ swsm = E1000_READ_REG(&adapter->hw, SWSM); ++ E1000_WRITE_REG(&adapter->hw, SWSM, ++ swsm | E1000_SWSM_DRV_LOAD); ++ break; ++ case e1000_ich8lan: ++ extcnf = E1000_READ_REG(&adapter->hw, EXTCNF_CTRL); ++ E1000_WRITE_REG(&adapter->hw, EXTCNF_CTRL, ++ extcnf | E1000_EXTCNF_CTRL_SWFLAG); ++ break; ++ default: ++ break; ++ } ++} ++ ++int ++e1000_up(struct e1000_adapter *adapter) ++{ ++ struct rtnet_device *netdev = adapter->netdev; ++ int i; ++ ++ /* hardware has been reset, we need to reload some things */ ++ ++ e1000_set_multi(netdev); ++ ++ ++ e1000_configure_tx(adapter); ++ e1000_setup_rctl(adapter); ++ e1000_configure_rx(adapter); ++ /* call E1000_DESC_UNUSED which always leaves ++ * at least 1 descriptor unused to make sure ++ * next_to_use != next_to_clean */ ++ for (i = 0; i < adapter->num_rx_queues; i++) { ++ struct e1000_rx_ring *ring = &adapter->rx_ring[i]; ++ adapter->alloc_rx_buf(adapter, ring, ++ E1000_DESC_UNUSED(ring)); ++ } ++ ++ // TODO makoehre adapter->tx_queue_len = netdev->tx_queue_len; ++ ++ schedule_delayed_work(&adapter->watchdog_task, 1); ++ ++ e1000_irq_enable(adapter); ++ ++ return 0; ++} ++ ++/** ++ * e1000_power_up_phy - restore link in case the phy was powered down ++ * @adapter: address of board private structure ++ * ++ * The phy may be powered down to save power and turn off link when the ++ * driver is unloaded and wake on lan is not enabled (among others) ++ * *** this routine MUST be followed by a call to e1000_reset *** ++ * ++ **/ ++ ++static void e1000_power_up_phy(struct e1000_adapter *adapter) ++{ ++ uint16_t mii_reg = 0; ++ ++ /* Just clear the power down bit to wake the phy back up */ ++ if (adapter->hw.media_type == e1000_media_type_copper) { ++ /* according to the manual, the phy will retain its ++ * settings across a power-down/up cycle */ ++ e1000_read_phy_reg(&adapter->hw, PHY_CTRL, &mii_reg); ++ mii_reg &= ~MII_CR_POWER_DOWN; ++ e1000_write_phy_reg(&adapter->hw, PHY_CTRL, mii_reg); ++ } ++} ++ ++static void e1000_power_down_phy(struct e1000_adapter *adapter) ++{ ++ boolean_t mng_mode_enabled = (adapter->hw.mac_type >= e1000_82571) && ++ e1000_check_mng_mode(&adapter->hw); ++ /* Power down the PHY so no link is implied when interface is down * ++ * The PHY cannot be powered down if any of the following is TRUE * ++ * (a) WoL is enabled ++ * (b) AMT is active ++ * (c) SoL/IDER session is active */ ++ if (!adapter->wol && adapter->hw.mac_type >= e1000_82540 && ++ adapter->hw.mac_type != e1000_ich8lan && ++ adapter->hw.media_type == e1000_media_type_copper && ++ !(E1000_READ_REG(&adapter->hw, MANC) & E1000_MANC_SMBUS_EN) && ++ !mng_mode_enabled && ++ !e1000_check_phy_reset_block(&adapter->hw)) { ++ uint16_t mii_reg = 0; ++ e1000_read_phy_reg(&adapter->hw, PHY_CTRL, &mii_reg); ++ mii_reg |= MII_CR_POWER_DOWN; ++ e1000_write_phy_reg(&adapter->hw, PHY_CTRL, mii_reg); ++ mdelay(1); ++ } ++} ++ ++static void e1000_down_and_stop(struct e1000_adapter *adapter) ++{ ++ cancel_work_sync(&adapter->reset_task); ++ cancel_delayed_work_sync(&adapter->watchdog_task); ++ cancel_delayed_work_sync(&adapter->phy_info_task); ++ cancel_delayed_work_sync(&adapter->fifo_stall_task); ++} ++ ++void ++e1000_down(struct e1000_adapter *adapter) ++{ ++ struct rtnet_device *netdev = adapter->netdev; ++ ++ e1000_irq_disable(adapter); ++ ++ e1000_down_and_stop(adapter); ++ ++ // TODO makoehre netdev->tx_queue_len = adapter->tx_queue_len; ++ adapter->link_speed = 0; ++ adapter->link_duplex = 0; ++ rtnetif_carrier_off(netdev); ++ rtnetif_stop_queue(netdev); ++ ++ e1000_reset(adapter); ++ e1000_clean_all_tx_rings(adapter); ++ e1000_clean_all_rx_rings(adapter); ++} ++ ++void ++e1000_reinit_locked(struct e1000_adapter *adapter) ++{ ++ WARN_ON(in_interrupt()); ++ if (test_and_set_bit(__E1000_RESETTING, &adapter->flags)) ++ msleep(1); ++ e1000_down(adapter); ++ e1000_up(adapter); ++ clear_bit(__E1000_RESETTING, &adapter->flags); ++} ++ ++void ++e1000_reset(struct e1000_adapter *adapter) ++{ ++ uint32_t pba; ++ uint16_t fc_high_water_mark = E1000_FC_HIGH_DIFF; ++ ++ /* Repartition Pba for greater than 9k mtu ++ * To take effect CTRL.RST is required. ++ */ ++ ++ switch (adapter->hw.mac_type) { ++ case e1000_82547: ++ case e1000_82547_rev_2: ++ pba = E1000_PBA_30K; ++ break; ++ case e1000_82571: ++ case e1000_82572: ++ case e1000_80003es2lan: ++ pba = E1000_PBA_38K; ++ break; ++ case e1000_82573: ++ pba = E1000_PBA_12K; ++ break; ++ case e1000_ich8lan: ++ pba = E1000_PBA_8K; ++ break; ++ default: ++ pba = E1000_PBA_48K; ++ break; ++ } ++ ++ if ((adapter->hw.mac_type != e1000_82573) && ++ (adapter->netdev->mtu > E1000_RXBUFFER_8192)) ++ pba -= 8; /* allocate more FIFO for Tx */ ++ ++ ++ if (adapter->hw.mac_type == e1000_82547) { ++ adapter->tx_fifo_head = 0; ++ adapter->tx_head_addr = pba << E1000_TX_HEAD_ADDR_SHIFT; ++ adapter->tx_fifo_size = ++ (E1000_PBA_40K - pba) << E1000_PBA_BYTES_SHIFT; ++ atomic_set(&adapter->tx_fifo_stall, 0); ++ } ++ ++ E1000_WRITE_REG(&adapter->hw, PBA, pba); ++ ++ /* flow control settings */ ++ /* Set the FC high water mark to 90% of the FIFO size. ++ * Required to clear last 3 LSB */ ++ fc_high_water_mark = ((pba * 9216)/10) & 0xFFF8; ++ /* We can't use 90% on small FIFOs because the remainder ++ * would be less than 1 full frame. In this case, we size ++ * it to allow at least a full frame above the high water ++ * mark. */ ++ if (pba < E1000_PBA_16K) ++ fc_high_water_mark = (pba * 1024) - 1600; ++ ++ adapter->hw.fc_high_water = fc_high_water_mark; ++ adapter->hw.fc_low_water = fc_high_water_mark - 8; ++ if (adapter->hw.mac_type == e1000_80003es2lan) ++ adapter->hw.fc_pause_time = 0xFFFF; ++ else ++ adapter->hw.fc_pause_time = E1000_FC_PAUSE_TIME; ++ adapter->hw.fc_send_xon = 1; ++ adapter->hw.fc = adapter->hw.original_fc; ++ ++ /* Allow time for pending master requests to run */ ++ e1000_reset_hw(&adapter->hw); ++ if (adapter->hw.mac_type >= e1000_82544) ++ E1000_WRITE_REG(&adapter->hw, WUC, 0); ++ if (e1000_init_hw(&adapter->hw)) ++ DPRINTK(PROBE, ERR, "Hardware Error\n"); ++ /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */ ++ E1000_WRITE_REG(&adapter->hw, VET, ETHERNET_IEEE_VLAN_TYPE); ++ ++ E1000_WRITE_REG(&adapter->hw, AIT, 0); // Set adaptive interframe spacing to zero ++ ++ // e1000_reset_adaptive(&adapter->hw); ++ e1000_phy_get_info(&adapter->hw, &adapter->phy_info); ++ ++ if (!adapter->smart_power_down && ++ (adapter->hw.mac_type == e1000_82571 || ++ adapter->hw.mac_type == e1000_82572)) { ++ uint16_t phy_data = 0; ++ /* speed up time to link by disabling smart power down, ignore ++ * the return value of this function because there is nothing ++ * different we would do if it failed */ ++ e1000_read_phy_reg(&adapter->hw, IGP02E1000_PHY_POWER_MGMT, ++ &phy_data); ++ phy_data &= ~IGP02E1000_PM_SPD; ++ e1000_write_phy_reg(&adapter->hw, IGP02E1000_PHY_POWER_MGMT, ++ phy_data); ++ } ++ ++} ++ ++static void ++e1000_reset_task(struct work_struct *work) ++{ ++ struct e1000_adapter *adapter = ++ container_of(work, struct e1000_adapter, reset_task); ++ ++ e1000_reinit_locked(adapter); ++} ++ ++/** ++ * e1000_probe - Device Initialization Routine ++ * @pdev: PCI device information struct ++ * @ent: entry in e1000_pci_tbl ++ * ++ * Returns 0 on success, negative on failure ++ * ++ * e1000_probe initializes an adapter identified by a pci_dev structure. ++ * The OS initialization, configuring of the adapter private structure, ++ * and a hardware reset occur. ++ **/ ++ ++static int e1000_probe(struct pci_dev *pdev, ++ const struct pci_device_id *ent) ++{ ++ struct rtnet_device *netdev; ++ struct e1000_adapter *adapter; ++ unsigned long mmio_start, mmio_len; ++ unsigned long flash_start, flash_len; ++ ++ static int cards_found = 0; ++ static int e1000_ksp3_port_a = 0; /* global ksp3 port a indication */ ++ int i, err; ++ uint16_t eeprom_data; ++ uint16_t eeprom_apme_mask = E1000_EEPROM_APME; ++ ++ if (cards[cards_found++] == 0) ++ { ++ return -ENODEV; ++ } ++ ++ if ((err = pci_enable_device(pdev))) ++ return err; ++ ++ if ((err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) || ++ (err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)))) { ++ if ((err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32))) && ++ (err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)))) { ++ E1000_ERR("No usable DMA configuration, aborting\n"); ++ return err; ++ } ++ } ++ ++ if ((err = pci_request_regions(pdev, e1000_driver_name))) ++ return err; ++ ++ pci_set_master(pdev); ++ ++ netdev = rt_alloc_etherdev(sizeof(struct e1000_adapter), 48); ++ if (!netdev) { ++ err = -ENOMEM; ++ goto err_alloc_etherdev; ++ } ++ memset(netdev->priv, 0, sizeof(struct e1000_adapter)); ++ ++ rt_rtdev_connect(netdev, &RTDEV_manager); ++ ++ ++ // SET_NETDEV_DEV(netdev, &pdev->dev); ++ netdev->vers = RTDEV_VERS_2_0; ++ netdev->sysbind = &pdev->dev; ++ ++ pci_set_drvdata(pdev, netdev); ++ adapter = netdev->priv; ++ adapter->netdev = netdev; ++ adapter->pdev = pdev; ++ adapter->hw.back = adapter; ++ adapter->msg_enable = (1 << local_debug) - 1; ++ ++ mmio_start = pci_resource_start(pdev, BAR_0); ++ mmio_len = pci_resource_len(pdev, BAR_0); ++ ++ adapter->hw.hw_addr = ioremap(mmio_start, mmio_len); ++ if (!adapter->hw.hw_addr) { ++ err = -EIO; ++ goto err_ioremap; ++ } ++ ++ for (i = BAR_1; i <= BAR_5; i++) { ++ if (pci_resource_len(pdev, i) == 0) ++ continue; ++ if (pci_resource_flags(pdev, i) & IORESOURCE_IO) { ++ adapter->hw.io_base = pci_resource_start(pdev, i); ++ break; ++ } ++ } ++ ++ netdev->open = &e1000_open; ++ netdev->stop = &e1000_close; ++ netdev->hard_start_xmit = &e1000_xmit_frame; ++ // netdev->get_stats = &e1000_get_stats; ++ // netdev->set_multicast_list = &e1000_set_multi; ++ // netdev->set_mac_address = &e1000_set_mac; ++ // netdev->change_mtu = &e1000_change_mtu; ++ // netdev->do_ioctl = &e1000_ioctl; ++ // e1000_set_ethtool_ops(netdev); ++ strcpy(netdev->name, pci_name(pdev)); ++ ++ netdev->mem_start = mmio_start; ++ netdev->mem_end = mmio_start + mmio_len; ++ netdev->base_addr = adapter->hw.io_base; ++ ++ adapter->bd_number = cards_found - 1; ++ ++ /* setup the private structure */ ++ ++ if ((err = e1000_sw_init(adapter))) ++ goto err_sw_init; ++ ++ /* Flash BAR mapping must happen after e1000_sw_init ++ * because it depends on mac_type */ ++ if ((adapter->hw.mac_type == e1000_ich8lan) && ++ (pci_resource_flags(pdev, 1) & IORESOURCE_MEM)) { ++ flash_start = pci_resource_start(pdev, 1); ++ flash_len = pci_resource_len(pdev, 1); ++ adapter->hw.flash_address = ioremap(flash_start, flash_len); ++ if (!adapter->hw.flash_address) { ++ err = -EIO; ++ goto err_flashmap; ++ } ++ } ++ ++ if ((err = e1000_check_phy_reset_block(&adapter->hw))) ++ DPRINTK(PROBE, INFO, "PHY reset is blocked due to SOL/IDER session.\n"); ++ ++ /* if ksp3, indicate if it's port a being setup */ ++ if (pdev->device == E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3 && ++ e1000_ksp3_port_a == 0) ++ adapter->ksp3_port_a = 1; ++ e1000_ksp3_port_a++; ++ /* Reset for multiple KP3 adapters */ ++ if (e1000_ksp3_port_a == 4) ++ e1000_ksp3_port_a = 0; ++ ++ netdev->features |= NETIF_F_LLTX; ++ ++ adapter->en_mng_pt = e1000_enable_mng_pass_thru(&adapter->hw); ++ ++ /* initialize eeprom parameters */ ++ ++ if (e1000_init_eeprom_params(&adapter->hw)) { ++ E1000_ERR("EEPROM initialization failed\n"); ++ return -EIO; ++ } ++ ++ /* before reading the EEPROM, reset the controller to ++ * put the device in a known good starting state */ ++ ++ e1000_reset_hw(&adapter->hw); ++ ++ /* make sure the EEPROM is good */ ++ ++ if (e1000_validate_eeprom_checksum(&adapter->hw) < 0) { ++ DPRINTK(PROBE, ERR, "The EEPROM Checksum Is Not Valid\n"); ++ err = -EIO; ++ goto err_eeprom; ++ } ++ ++ /* copy the MAC address out of the EEPROM */ ++ ++ if (e1000_read_mac_addr(&adapter->hw)) ++ DPRINTK(PROBE, ERR, "EEPROM Read Error\n"); ++ memcpy(netdev->dev_addr, adapter->hw.mac_addr, netdev->addr_len); ++#ifdef ETHTOOL_GPERMADDR ++ memcpy(netdev->perm_addr, adapter->hw.mac_addr, netdev->addr_len); ++ ++ if (!is_valid_ether_addr(netdev->perm_addr)) { ++#else ++ if (!is_valid_ether_addr(netdev->dev_addr)) { ++#endif ++ DPRINTK(PROBE, ERR, "Invalid MAC Address\n"); ++ err = -EIO; ++ goto err_eeprom; ++ } ++ ++ e1000_read_part_num(&adapter->hw, &(adapter->part_num)); ++ ++ e1000_get_bus_info(&adapter->hw); ++ ++ INIT_DELAYED_WORK(&adapter->watchdog_task, e1000_watchdog); ++ INIT_DELAYED_WORK(&adapter->fifo_stall_task, ++ e1000_82547_tx_fifo_stall_task); ++ INIT_DELAYED_WORK(&adapter->phy_info_task, e1000_update_phy_info_task); ++ INIT_WORK(&adapter->reset_task, ++ (void (*)(struct work_struct *))e1000_reset_task); ++ ++ /* we're going to reset, so assume we have no link for now */ ++ ++ rtnetif_carrier_off(netdev); ++ rtnetif_stop_queue(netdev); ++ ++ e1000_check_options(adapter); ++ ++ /* Initial Wake on LAN setting ++ * If APM wake is enabled in the EEPROM, ++ * enable the ACPI Magic Packet filter ++ */ ++ ++ switch (adapter->hw.mac_type) { ++ case e1000_82542_rev2_0: ++ case e1000_82542_rev2_1: ++ case e1000_82543: ++ break; ++ case e1000_82544: ++ e1000_read_eeprom(&adapter->hw, ++ EEPROM_INIT_CONTROL2_REG, 1, &eeprom_data); ++ eeprom_apme_mask = E1000_EEPROM_82544_APM; ++ break; ++ case e1000_ich8lan: ++ e1000_read_eeprom(&adapter->hw, ++ EEPROM_INIT_CONTROL1_REG, 1, &eeprom_data); ++ eeprom_apme_mask = E1000_EEPROM_ICH8_APME; ++ break; ++ case e1000_82546: ++ case e1000_82546_rev_3: ++ case e1000_82571: ++ case e1000_80003es2lan: ++ if (E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_FUNC_1){ ++ e1000_read_eeprom(&adapter->hw, ++ EEPROM_INIT_CONTROL3_PORT_B, 1, &eeprom_data); ++ break; ++ } ++ /* Fall Through */ ++ default: ++ e1000_read_eeprom(&adapter->hw, ++ EEPROM_INIT_CONTROL3_PORT_A, 1, &eeprom_data); ++ break; ++ } ++ if (eeprom_data & eeprom_apme_mask) ++ adapter->wol |= E1000_WUFC_MAG; ++ ++ /* print bus type/speed/width info */ ++ { ++ struct e1000_hw *hw = &adapter->hw; ++ DPRINTK(PROBE, INFO, "(PCI%s:%s:%s) ", ++ ((hw->bus_type == e1000_bus_type_pcix) ? "-X" : ++ (hw->bus_type == e1000_bus_type_pci_express ? " Express":"")), ++ ((hw->bus_speed == e1000_bus_speed_2500) ? "2.5Gb/s" : ++ (hw->bus_speed == e1000_bus_speed_133) ? "133MHz" : ++ (hw->bus_speed == e1000_bus_speed_120) ? "120MHz" : ++ (hw->bus_speed == e1000_bus_speed_100) ? "100MHz" : ++ (hw->bus_speed == e1000_bus_speed_66) ? "66MHz" : "33MHz"), ++ ((hw->bus_width == e1000_bus_width_64) ? "64-bit" : ++ (hw->bus_width == e1000_bus_width_pciex_4) ? "Width x4" : ++ (hw->bus_width == e1000_bus_width_pciex_1) ? "Width x1" : ++ "32-bit")); ++ } ++ ++ printk(KERN_INFO "e1000: hw "); ++ for (i = 0; i < 6; i++) ++ printk(KERN_CONT "%2.2x%c", netdev->dev_addr[i], i == 5 ? '\n' : ':'); ++ ++ /* reset the hardware with the new settings */ ++ e1000_reset(adapter); ++ ++ /* If the controller is 82573 and f/w is AMT, do not set ++ * DRV_LOAD until the interface is up. For all other cases, ++ * let the f/w know that the h/w is now under the control ++ * of the driver. */ ++ if (adapter->hw.mac_type != e1000_82573 || ++ !e1000_check_mng_mode(&adapter->hw)) ++ e1000_get_hw_control(adapter); ++ ++ strcpy(netdev->name, "rteth%d"); ++ if ((err = rt_register_rtnetdev(netdev))) ++ goto err_register; ++ ++ DPRINTK(PROBE, INFO, "Intel(R) PRO/1000 Network Connection\n"); ++ ++ return 0; ++ ++err_register: ++ if (adapter->hw.flash_address) ++ iounmap(adapter->hw.flash_address); ++err_flashmap: ++err_sw_init: ++err_eeprom: ++ iounmap(adapter->hw.hw_addr); ++err_ioremap: ++ rtdev_free(netdev); ++err_alloc_etherdev: ++ pci_release_regions(pdev); ++ return err; ++} ++ ++/** ++ * e1000_remove - Device Removal Routine ++ * @pdev: PCI device information struct ++ * ++ * e1000_remove is called by the PCI subsystem to alert the driver ++ * that it should release a PCI device. The could be caused by a ++ * Hot-Plug event, or because the driver is going to be removed from ++ * memory. ++ **/ ++ ++static void e1000_remove(struct pci_dev *pdev) ++{ ++ struct rtnet_device *netdev = pci_get_drvdata(pdev); ++ struct e1000_adapter *adapter = netdev->priv; ++ uint32_t manc; ++ ++ e1000_down_and_stop(adapter); ++ ++ if (adapter->hw.mac_type >= e1000_82540 && ++ adapter->hw.mac_type != e1000_ich8lan && ++ adapter->hw.media_type == e1000_media_type_copper) { ++ manc = E1000_READ_REG(&adapter->hw, MANC); ++ if (manc & E1000_MANC_SMBUS_EN) { ++ manc |= E1000_MANC_ARP_EN; ++ E1000_WRITE_REG(&adapter->hw, MANC, manc); ++ } ++ } ++ ++ /* Release control of h/w to f/w. If f/w is AMT enabled, this ++ * would have already happened in close and is redundant. */ ++ e1000_release_hw_control(adapter); ++ ++ rt_unregister_rtnetdev(netdev); ++ ++ if (!e1000_check_phy_reset_block(&adapter->hw)) ++ e1000_phy_hw_reset(&adapter->hw); ++ ++ kfree(adapter->tx_ring); ++ kfree(adapter->rx_ring); ++ ++ ++ iounmap(adapter->hw.hw_addr); ++ if (adapter->hw.flash_address) ++ iounmap(adapter->hw.flash_address); ++ pci_release_regions(pdev); ++ ++ rtdev_free(netdev); ++ ++ pci_disable_device(pdev); ++} ++ ++/** ++ * e1000_sw_init - Initialize general software structures (struct e1000_adapter) ++ * @adapter: board private structure to initialize ++ * ++ * e1000_sw_init initializes the Adapter private data structure. ++ * Fields are initialized based on PCI device information and ++ * OS network device settings (MTU size). ++ **/ ++ ++static int e1000_sw_init(struct e1000_adapter *adapter) ++{ ++ struct e1000_hw *hw = &adapter->hw; ++ struct rtnet_device *netdev = adapter->netdev; ++ struct pci_dev *pdev = adapter->pdev; ++ ++ /* PCI config space info */ ++ ++ hw->vendor_id = pdev->vendor; ++ hw->device_id = pdev->device; ++ hw->subsystem_vendor_id = pdev->subsystem_vendor; ++ hw->subsystem_id = pdev->subsystem_device; ++ ++ pci_read_config_byte(pdev, PCI_REVISION_ID, &hw->revision_id); ++ ++ pci_read_config_word(pdev, PCI_COMMAND, &hw->pci_cmd_word); ++ ++ adapter->rx_buffer_len = MAXIMUM_ETHERNET_FRAME_SIZE; ++ adapter->rx_ps_bsize0 = E1000_RXBUFFER_128; ++ hw->max_frame_size = netdev->mtu + ++ ENET_HEADER_SIZE + ETHERNET_FCS_SIZE; ++ hw->min_frame_size = MINIMUM_ETHERNET_FRAME_SIZE; ++ ++ /* identify the MAC */ ++ ++ if (e1000_set_mac_type(hw)) { ++ DPRINTK(PROBE, ERR, "Unknown MAC Type\n"); ++ return -EIO; ++ } ++ ++ switch (hw->mac_type) { ++ default: ++ break; ++ case e1000_82541: ++ case e1000_82547: ++ case e1000_82541_rev_2: ++ case e1000_82547_rev_2: ++ hw->phy_init_script = 1; ++ break; ++ } ++ ++ e1000_set_media_type(hw); ++ ++ hw->wait_autoneg_complete = FALSE; ++ hw->tbi_compatibility_en = TRUE; ++ hw->adaptive_ifs = FALSE; ++ ++ /* Copper options */ ++ ++ if (hw->media_type == e1000_media_type_copper) { ++ hw->mdix = AUTO_ALL_MODES; ++ hw->disable_polarity_correction = FALSE; ++ hw->master_slave = E1000_MASTER_SLAVE; ++ } ++ ++ adapter->num_tx_queues = 1; ++ adapter->num_rx_queues = 1; ++ ++ ++ if (e1000_alloc_queues(adapter)) { ++ DPRINTK(PROBE, ERR, "Unable to allocate memory for queues\n"); ++ return -ENOMEM; ++ } ++ ++ atomic_set(&adapter->irq_sem, 1); ++ ++ return 0; ++} ++ ++/** ++ * e1000_alloc_queues - Allocate memory for all rings ++ * @adapter: board private structure to initialize ++ * ++ * We allocate one ring per queue at run-time since we don't know the ++ * number of queues at compile-time. The polling_netdev array is ++ * intended for Multiqueue, but should work fine with a single queue. ++ **/ ++ ++static int e1000_alloc_queues(struct e1000_adapter *adapter) ++{ ++ int size; ++ ++ size = sizeof(struct e1000_tx_ring) * adapter->num_tx_queues; ++ adapter->tx_ring = kmalloc(size, GFP_KERNEL); ++ if (!adapter->tx_ring) ++ return -ENOMEM; ++ memset(adapter->tx_ring, 0, size); ++ ++ size = sizeof(struct e1000_rx_ring) * adapter->num_rx_queues; ++ adapter->rx_ring = kmalloc(size, GFP_KERNEL); ++ if (!adapter->rx_ring) { ++ kfree(adapter->tx_ring); ++ return -ENOMEM; ++ } ++ memset(adapter->rx_ring, 0, size); ++ ++ ++ return E1000_SUCCESS; ++} ++ ++/** ++ * e1000_open - Called when a network interface is made active ++ * @netdev: network interface device structure ++ * ++ * Returns 0 on success, negative value on failure ++ * ++ * The open entry point is called when a network interface is made ++ * active by the system (IFF_UP). At this point all resources needed ++ * for transmit and receive operations are allocated, the interrupt ++ * handler is registered with the OS, the watchdog timer is started, ++ * and the stack is notified that the interface is ready. ++ **/ ++ ++static int ++e1000_open(struct rtnet_device *netdev) ++{ ++ struct e1000_adapter *adapter = netdev->priv; ++ int err; ++ ++ /* disallow open during test */ ++ if (test_bit(__E1000_DRIVER_TESTING, &adapter->flags)) ++ return -EBUSY; ++ ++ /* allocate transmit descriptors */ ++ ++ if ((err = e1000_setup_all_tx_resources(adapter))) ++ goto err_setup_tx; ++ ++ /* allocate receive descriptors */ ++ ++ if ((err = e1000_setup_all_rx_resources(adapter))) ++ goto err_setup_rx; ++ ++ err = e1000_request_irq(adapter); ++ if (err) ++ goto err_up; ++ ++ e1000_power_up_phy(adapter); ++ ++ if ((err = e1000_up(adapter))) ++ goto err_up; ++ ++ /* If AMT is enabled, let the firmware know that the network ++ * interface is now open */ ++ if (adapter->hw.mac_type == e1000_82573 && ++ e1000_check_mng_mode(&adapter->hw)) ++ e1000_get_hw_control(adapter); ++ ++ /* Wait for the hardware to come up */ ++ msleep(3000); ++ ++ return E1000_SUCCESS; ++ ++err_up: ++ e1000_free_all_rx_resources(adapter); ++err_setup_rx: ++ e1000_free_all_tx_resources(adapter); ++err_setup_tx: ++ e1000_reset(adapter); ++ ++ return err; ++} ++ ++/** ++ * e1000_close - Disables a network interface ++ * @netdev: network interface device structure ++ * ++ * Returns 0, this is not allowed to fail ++ * ++ * The close entry point is called when an interface is de-activated ++ * by the OS. The hardware is still under the drivers control, but ++ * needs to be disabled. A global MAC reset is issued to stop the ++ * hardware, and all transmit and receive resources are freed. ++ **/ ++ ++static int ++e1000_close(struct rtnet_device *netdev) ++{ ++ struct e1000_adapter *adapter = netdev->priv; ++ ++ WARN_ON(test_bit(__E1000_RESETTING, &adapter->flags)); ++ e1000_down(adapter); ++ e1000_power_down_phy(adapter); ++ e1000_free_irq(adapter); ++ ++ e1000_free_all_tx_resources(adapter); ++ e1000_free_all_rx_resources(adapter); ++ ++ ++ /* If AMT is enabled, let the firmware know that the network ++ * interface is now closed */ ++ if (adapter->hw.mac_type == e1000_82573 && ++ e1000_check_mng_mode(&adapter->hw)) ++ e1000_release_hw_control(adapter); ++ ++ return 0; ++} ++ ++/** ++ * e1000_check_64k_bound - check that memory doesn't cross 64kB boundary ++ * @adapter: address of board private structure ++ * @start: address of beginning of memory ++ * @len: length of memory ++ **/ ++static boolean_t ++e1000_check_64k_bound(struct e1000_adapter *adapter, ++ void *start, unsigned long len) ++{ ++ unsigned long begin = (unsigned long) start; ++ unsigned long end = begin + len; ++ ++ /* First rev 82545 and 82546 need to not allow any memory ++ * write location to cross 64k boundary due to errata 23 */ ++ if (adapter->hw.mac_type == e1000_82545 || ++ adapter->hw.mac_type == e1000_82546) { ++ return ((begin ^ (end - 1)) >> 16) != 0 ? FALSE : TRUE; ++ } ++ ++ return TRUE; ++} ++ ++/** ++ * e1000_setup_tx_resources - allocate Tx resources (Descriptors) ++ * @adapter: board private structure ++ * @txdr: tx descriptor ring (for a specific queue) to setup ++ * ++ * Return 0 on success, negative on failure ++ **/ ++ ++static int ++e1000_setup_tx_resources(struct e1000_adapter *adapter, ++ struct e1000_tx_ring *txdr) ++{ ++ struct pci_dev *pdev = adapter->pdev; ++ int size; ++ ++ size = sizeof(struct e1000_buffer) * txdr->count; ++ txdr->buffer_info = vmalloc(size); ++ if (!txdr->buffer_info) { ++ DPRINTK(PROBE, ERR, ++ "Unable to allocate memory for the transmit descriptor ring\n"); ++ return -ENOMEM; ++ } ++ memset(txdr->buffer_info, 0, size); ++ ++ /* round up to nearest 4K */ ++ ++ txdr->size = txdr->count * sizeof(struct e1000_tx_desc); ++ E1000_ROUNDUP(txdr->size, 4096); ++ ++ txdr->desc = pci_alloc_consistent(pdev, txdr->size, &txdr->dma); ++ if (!txdr->desc) { ++setup_tx_desc_die: ++ vfree(txdr->buffer_info); ++ DPRINTK(PROBE, ERR, ++ "Unable to allocate memory for the transmit descriptor ring\n"); ++ return -ENOMEM; ++ } ++ ++ /* Fix for errata 23, can't cross 64kB boundary */ ++ if (!e1000_check_64k_bound(adapter, txdr->desc, txdr->size)) { ++ void *olddesc = txdr->desc; ++ dma_addr_t olddma = txdr->dma; ++ DPRINTK(TX_ERR, ERR, "txdr align check failed: %u bytes " ++ "at %p\n", txdr->size, txdr->desc); ++ /* Try again, without freeing the previous */ ++ txdr->desc = pci_alloc_consistent(pdev, txdr->size, &txdr->dma); ++ /* Failed allocation, critical failure */ ++ if (!txdr->desc) { ++ pci_free_consistent(pdev, txdr->size, olddesc, olddma); ++ goto setup_tx_desc_die; ++ } ++ ++ if (!e1000_check_64k_bound(adapter, txdr->desc, txdr->size)) { ++ /* give up */ ++ pci_free_consistent(pdev, txdr->size, txdr->desc, ++ txdr->dma); ++ pci_free_consistent(pdev, txdr->size, olddesc, olddma); ++ DPRINTK(PROBE, ERR, ++ "Unable to allocate aligned memory " ++ "for the transmit descriptor ring\n"); ++ vfree(txdr->buffer_info); ++ return -ENOMEM; ++ } else { ++ /* Free old allocation, new allocation was successful */ ++ pci_free_consistent(pdev, txdr->size, olddesc, olddma); ++ } ++ } ++ memset(txdr->desc, 0, txdr->size); ++ ++ txdr->next_to_use = 0; ++ txdr->next_to_clean = 0; ++ rtdm_lock_init(&txdr->tx_lock); ++ ++ return 0; ++} ++ ++/** ++ * e1000_setup_all_tx_resources - wrapper to allocate Tx resources ++ * (Descriptors) for all queues ++ * @adapter: board private structure ++ * ++ * If this function returns with an error, then it's possible one or ++ * more of the rings is populated (while the rest are not). It is the ++ * callers duty to clean those orphaned rings. ++ * ++ * Return 0 on success, negative on failure ++ **/ ++ ++int ++e1000_setup_all_tx_resources(struct e1000_adapter *adapter) ++{ ++ int i, err = 0; ++ ++ for (i = 0; i < adapter->num_tx_queues; i++) { ++ err = e1000_setup_tx_resources(adapter, &adapter->tx_ring[i]); ++ if (err) { ++ DPRINTK(PROBE, ERR, ++ "Allocation for Tx Queue %u failed\n", i); ++ break; ++ } ++ } ++ ++ return err; ++} ++ ++/** ++ * e1000_configure_tx - Configure 8254x Transmit Unit after Reset ++ * @adapter: board private structure ++ * ++ * Configure the Tx unit of the MAC after a reset. ++ **/ ++ ++static void ++e1000_configure_tx(struct e1000_adapter *adapter) ++{ ++ uint64_t tdba; ++ struct e1000_hw *hw = &adapter->hw; ++ uint32_t tdlen, tctl, tipg, tarc; ++ uint32_t ipgr1, ipgr2; ++ ++ /* Setup the HW Tx Head and Tail descriptor pointers */ ++ ++ switch (adapter->num_tx_queues) { ++ case 1: ++ default: ++ tdba = adapter->tx_ring[0].dma; ++ tdlen = adapter->tx_ring[0].count * ++ sizeof(struct e1000_tx_desc); ++ E1000_WRITE_REG(hw, TDLEN, tdlen); ++ E1000_WRITE_REG(hw, TDBAH, (tdba >> 32)); ++ E1000_WRITE_REG(hw, TDBAL, (tdba & 0x00000000ffffffffULL)); ++ E1000_WRITE_REG(hw, TDT, 0); ++ E1000_WRITE_REG(hw, TDH, 0); ++ adapter->tx_ring[0].tdh = E1000_TDH; ++ adapter->tx_ring[0].tdt = E1000_TDT; ++ break; ++ } ++ ++ /* Set the default values for the Tx Inter Packet Gap timer */ ++ ++ if (hw->media_type == e1000_media_type_fiber || ++ hw->media_type == e1000_media_type_internal_serdes) ++ tipg = DEFAULT_82543_TIPG_IPGT_FIBER; ++ else ++ tipg = DEFAULT_82543_TIPG_IPGT_COPPER; ++ ++ switch (hw->mac_type) { ++ case e1000_82542_rev2_0: ++ case e1000_82542_rev2_1: ++ tipg = DEFAULT_82542_TIPG_IPGT; ++ ipgr1 = DEFAULT_82542_TIPG_IPGR1; ++ ipgr2 = DEFAULT_82542_TIPG_IPGR2; ++ break; ++ case e1000_80003es2lan: ++ ipgr1 = DEFAULT_82543_TIPG_IPGR1; ++ ipgr2 = DEFAULT_80003ES2LAN_TIPG_IPGR2; ++ break; ++ default: ++ ipgr1 = DEFAULT_82543_TIPG_IPGR1; ++ ipgr2 = DEFAULT_82543_TIPG_IPGR2; ++ break; ++ } ++ tipg |= ipgr1 << E1000_TIPG_IPGR1_SHIFT; ++ tipg |= ipgr2 << E1000_TIPG_IPGR2_SHIFT; ++ E1000_WRITE_REG(hw, TIPG, tipg); ++ ++ /* Set the Tx Interrupt Delay register */ ++ ++ E1000_WRITE_REG(hw, TIDV, adapter->tx_int_delay); ++ if (hw->mac_type >= e1000_82540) ++ E1000_WRITE_REG(hw, TADV, adapter->tx_abs_int_delay); ++ ++ /* Program the Transmit Control Register */ ++ ++ tctl = E1000_READ_REG(hw, TCTL); ++ ++ tctl &= ~E1000_TCTL_CT; ++ tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC | ++ (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT); ++ ++#ifdef DISABLE_MULR ++ /* disable Multiple Reads for debugging */ ++ tctl &= ~E1000_TCTL_MULR; ++#endif ++ ++ if (hw->mac_type == e1000_82571 || hw->mac_type == e1000_82572) { ++ tarc = E1000_READ_REG(hw, TARC0); ++ tarc |= ((1 << 25) | (1 << 21)); ++ E1000_WRITE_REG(hw, TARC0, tarc); ++ tarc = E1000_READ_REG(hw, TARC1); ++ tarc |= (1 << 25); ++ if (tctl & E1000_TCTL_MULR) ++ tarc &= ~(1 << 28); ++ else ++ tarc |= (1 << 28); ++ E1000_WRITE_REG(hw, TARC1, tarc); ++ } else if (hw->mac_type == e1000_80003es2lan) { ++ tarc = E1000_READ_REG(hw, TARC0); ++ tarc |= 1; ++ if (hw->media_type == e1000_media_type_internal_serdes) ++ tarc |= (1 << 20); ++ E1000_WRITE_REG(hw, TARC0, tarc); ++ tarc = E1000_READ_REG(hw, TARC1); ++ tarc |= 1; ++ E1000_WRITE_REG(hw, TARC1, tarc); ++ } ++ ++ e1000_config_collision_dist(hw); ++ ++ /* Setup Transmit Descriptor Settings for eop descriptor */ ++ adapter->txd_cmd = E1000_TXD_CMD_IDE | E1000_TXD_CMD_EOP | ++ E1000_TXD_CMD_IFCS; ++ ++ if (hw->mac_type < e1000_82543) ++ adapter->txd_cmd |= E1000_TXD_CMD_RPS; ++ else ++ adapter->txd_cmd |= E1000_TXD_CMD_RS; ++ ++ /* Cache if we're 82544 running in PCI-X because we'll ++ * need this to apply a workaround later in the send path. */ ++ if (hw->mac_type == e1000_82544 && ++ hw->bus_type == e1000_bus_type_pcix) ++ adapter->pcix_82544 = 1; ++ ++ E1000_WRITE_REG(hw, TCTL, tctl); ++ ++} ++ ++/** ++ * e1000_setup_rx_resources - allocate Rx resources (Descriptors) ++ * @adapter: board private structure ++ * @rxdr: rx descriptor ring (for a specific queue) to setup ++ * ++ * Returns 0 on success, negative on failure ++ **/ ++ ++static int ++e1000_setup_rx_resources(struct e1000_adapter *adapter, ++ struct e1000_rx_ring *rxdr) ++{ ++ struct pci_dev *pdev = adapter->pdev; ++ int size, desc_len; ++ ++ size = sizeof(struct e1000_buffer) * rxdr->count; ++ rxdr->buffer_info = vmalloc(size); ++ if (!rxdr->buffer_info) { ++ DPRINTK(PROBE, ERR, ++ "Unable to allocate memory for the receive descriptor ring\n"); ++ return -ENOMEM; ++ } ++ memset(rxdr->buffer_info, 0, size); ++ ++ size = sizeof(struct e1000_ps_page) * rxdr->count; ++ rxdr->ps_page = kmalloc(size, GFP_KERNEL); ++ if (!rxdr->ps_page) { ++ vfree(rxdr->buffer_info); ++ DPRINTK(PROBE, ERR, ++ "Unable to allocate memory for the receive descriptor ring\n"); ++ return -ENOMEM; ++ } ++ memset(rxdr->ps_page, 0, size); ++ ++ size = sizeof(struct e1000_ps_page_dma) * rxdr->count; ++ rxdr->ps_page_dma = kmalloc(size, GFP_KERNEL); ++ if (!rxdr->ps_page_dma) { ++ vfree(rxdr->buffer_info); ++ kfree(rxdr->ps_page); ++ DPRINTK(PROBE, ERR, ++ "Unable to allocate memory for the receive descriptor ring\n"); ++ return -ENOMEM; ++ } ++ memset(rxdr->ps_page_dma, 0, size); ++ ++ if (adapter->hw.mac_type <= e1000_82547_rev_2) ++ desc_len = sizeof(struct e1000_rx_desc); ++ else ++ desc_len = sizeof(union e1000_rx_desc_packet_split); ++ ++ /* Round up to nearest 4K */ ++ ++ rxdr->size = rxdr->count * desc_len; ++ E1000_ROUNDUP(rxdr->size, 4096); ++ ++ rxdr->desc = pci_alloc_consistent(pdev, rxdr->size, &rxdr->dma); ++ ++ if (!rxdr->desc) { ++ DPRINTK(PROBE, ERR, ++ "Unable to allocate memory for the receive descriptor ring\n"); ++setup_rx_desc_die: ++ vfree(rxdr->buffer_info); ++ kfree(rxdr->ps_page); ++ kfree(rxdr->ps_page_dma); ++ return -ENOMEM; ++ } ++ ++ /* Fix for errata 23, can't cross 64kB boundary */ ++ if (!e1000_check_64k_bound(adapter, rxdr->desc, rxdr->size)) { ++ void *olddesc = rxdr->desc; ++ dma_addr_t olddma = rxdr->dma; ++ DPRINTK(RX_ERR, ERR, "rxdr align check failed: %u bytes " ++ "at %p\n", rxdr->size, rxdr->desc); ++ /* Try again, without freeing the previous */ ++ rxdr->desc = pci_alloc_consistent(pdev, rxdr->size, &rxdr->dma); ++ /* Failed allocation, critical failure */ ++ if (!rxdr->desc) { ++ pci_free_consistent(pdev, rxdr->size, olddesc, olddma); ++ DPRINTK(PROBE, ERR, ++ "Unable to allocate memory " ++ "for the receive descriptor ring\n"); ++ goto setup_rx_desc_die; ++ } ++ ++ if (!e1000_check_64k_bound(adapter, rxdr->desc, rxdr->size)) { ++ /* give up */ ++ pci_free_consistent(pdev, rxdr->size, rxdr->desc, ++ rxdr->dma); ++ pci_free_consistent(pdev, rxdr->size, olddesc, olddma); ++ DPRINTK(PROBE, ERR, ++ "Unable to allocate aligned memory " ++ "for the receive descriptor ring\n"); ++ goto setup_rx_desc_die; ++ } else { ++ /* Free old allocation, new allocation was successful */ ++ pci_free_consistent(pdev, rxdr->size, olddesc, olddma); ++ } ++ } ++ memset(rxdr->desc, 0, rxdr->size); ++ ++ rxdr->next_to_clean = 0; ++ rxdr->next_to_use = 0; ++ ++ return 0; ++} ++ ++/** ++ * e1000_setup_all_rx_resources - wrapper to allocate Rx resources ++ * (Descriptors) for all queues ++ * @adapter: board private structure ++ * ++ * If this function returns with an error, then it's possible one or ++ * more of the rings is populated (while the rest are not). It is the ++ * callers duty to clean those orphaned rings. ++ * ++ * Return 0 on success, negative on failure ++ **/ ++ ++int ++e1000_setup_all_rx_resources(struct e1000_adapter *adapter) ++{ ++ int i, err = 0; ++ ++ for (i = 0; i < adapter->num_rx_queues; i++) { ++ err = e1000_setup_rx_resources(adapter, &adapter->rx_ring[i]); ++ if (err) { ++ DPRINTK(PROBE, ERR, ++ "Allocation for Rx Queue %u failed\n", i); ++ break; ++ } ++ } ++ ++ return err; ++} ++ ++/** ++ * e1000_setup_rctl - configure the receive control registers ++ * @adapter: Board private structure ++ **/ ++#define PAGE_USE_COUNT(S) (((S) >> PAGE_SHIFT) + \ ++ (((S) & (PAGE_SIZE - 1)) ? 1 : 0)) ++static void ++e1000_setup_rctl(struct e1000_adapter *adapter) ++{ ++ uint32_t rctl; ++#ifndef CONFIG_E1000_DISABLE_PACKET_SPLIT ++ uint32_t pages = 0; ++#endif ++ ++ rctl = E1000_READ_REG(&adapter->hw, RCTL); ++ ++ rctl &= ~(3 << E1000_RCTL_MO_SHIFT); ++ ++ rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | ++ E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF | ++ (adapter->hw.mc_filter_type << E1000_RCTL_MO_SHIFT); ++ ++ /* FIXME: disable the stripping of CRC because it breaks ++ * BMC firmware connected over SMBUS ++ if (adapter->hw.mac_type > e1000_82543) ++ rctl |= E1000_RCTL_SECRC; ++ */ ++ ++ if (adapter->hw.tbi_compatibility_on == 1) ++ rctl |= E1000_RCTL_SBP; ++ else ++ rctl &= ~E1000_RCTL_SBP; ++ ++ if (adapter->netdev->mtu <= ETH_DATA_LEN) ++ rctl &= ~E1000_RCTL_LPE; ++ else ++ rctl |= E1000_RCTL_LPE; ++ ++ /* Setup buffer sizes */ ++ rctl &= ~E1000_RCTL_SZ_4096; ++ rctl |= E1000_RCTL_BSEX; ++ switch (adapter->rx_buffer_len) { ++ case E1000_RXBUFFER_256: ++ rctl |= E1000_RCTL_SZ_256; ++ rctl &= ~E1000_RCTL_BSEX; ++ break; ++ case E1000_RXBUFFER_512: ++ rctl |= E1000_RCTL_SZ_512; ++ rctl &= ~E1000_RCTL_BSEX; ++ break; ++ case E1000_RXBUFFER_1024: ++ rctl |= E1000_RCTL_SZ_1024; ++ rctl &= ~E1000_RCTL_BSEX; ++ break; ++ case E1000_RXBUFFER_2048: ++ default: ++ rctl |= E1000_RCTL_SZ_2048; ++ rctl &= ~E1000_RCTL_BSEX; ++ break; ++ case E1000_RXBUFFER_4096: ++ rctl |= E1000_RCTL_SZ_4096; ++ break; ++ case E1000_RXBUFFER_8192: ++ rctl |= E1000_RCTL_SZ_8192; ++ break; ++ case E1000_RXBUFFER_16384: ++ rctl |= E1000_RCTL_SZ_16384; ++ break; ++ } ++ ++ adapter->rx_ps_pages = 0; ++ ++ E1000_WRITE_REG(&adapter->hw, RCTL, rctl); ++} ++ ++/** ++ * e1000_configure_rx - Configure 8254x Receive Unit after Reset ++ * @adapter: board private structure ++ * ++ * Configure the Rx unit of the MAC after a reset. ++ **/ ++ ++static void ++e1000_configure_rx(struct e1000_adapter *adapter) ++{ ++ uint64_t rdba; ++ struct e1000_hw *hw = &adapter->hw; ++ uint32_t rdlen, rctl, rxcsum, ctrl_ext; ++ ++ { ++ rdlen = adapter->rx_ring[0].count * ++ sizeof(struct e1000_rx_desc); ++ adapter->clean_rx = NULL; /* unused */ ++ adapter->alloc_rx_buf = e1000_alloc_rx_buffers; ++ } ++ ++ /* disable receives while setting up the descriptors */ ++ rctl = E1000_READ_REG(hw, RCTL); ++ E1000_WRITE_REG(hw, RCTL, rctl & ~E1000_RCTL_EN); ++ ++ /* set the Receive Delay Timer Register */ ++ E1000_WRITE_REG(hw, RDTR, adapter->rx_int_delay); ++ ++ if (hw->mac_type >= e1000_82540) { ++ E1000_WRITE_REG(hw, RADV, adapter->rx_abs_int_delay); ++ if (adapter->itr > 1) ++ E1000_WRITE_REG(hw, ITR, ++ 1000000000 / (adapter->itr * 256)); ++ } ++ ++ if (hw->mac_type >= e1000_82571) { ++ ctrl_ext = E1000_READ_REG(hw, CTRL_EXT); ++ /* Reset delay timers after every interrupt */ ++ ctrl_ext |= E1000_CTRL_EXT_INT_TIMER_CLR; ++ E1000_WRITE_REG(hw, CTRL_EXT, ctrl_ext); ++ E1000_WRITE_REG(hw, IAM, ~0); ++ E1000_WRITE_FLUSH(hw); ++ } ++ ++ /* Setup the HW Rx Head and Tail Descriptor Pointers and ++ * the Base and Length of the Rx Descriptor Ring */ ++ switch (adapter->num_rx_queues) { ++ case 1: ++ default: ++ rdba = adapter->rx_ring[0].dma; ++ E1000_WRITE_REG(hw, RDLEN, rdlen); ++ E1000_WRITE_REG(hw, RDBAH, (rdba >> 32)); ++ E1000_WRITE_REG(hw, RDBAL, (rdba & 0x00000000ffffffffULL)); ++ E1000_WRITE_REG(hw, RDT, 0); ++ E1000_WRITE_REG(hw, RDH, 0); ++ adapter->rx_ring[0].rdh = E1000_RDH; ++ adapter->rx_ring[0].rdt = E1000_RDT; ++ break; ++ } ++ ++ /* Enable 82543 Receive Checksum Offload for TCP and UDP */ ++ if (hw->mac_type >= e1000_82543) { ++ rxcsum = E1000_READ_REG(hw, RXCSUM); ++ if (adapter->rx_csum == TRUE) { ++ rxcsum |= E1000_RXCSUM_TUOFL; ++ ++ } else { ++ rxcsum &= ~E1000_RXCSUM_TUOFL; ++ /* don't need to clear IPPCSE as it defaults to 0 */ ++ } ++ E1000_WRITE_REG(hw, RXCSUM, rxcsum); ++ } ++ ++ ++ /* Enable Receives */ ++ E1000_WRITE_REG(hw, RCTL, rctl); ++} ++ ++/** ++ * e1000_free_tx_resources - Free Tx Resources per Queue ++ * @adapter: board private structure ++ * @tx_ring: Tx descriptor ring for a specific queue ++ * ++ * Free all transmit software resources ++ **/ ++ ++static void ++e1000_free_tx_resources(struct e1000_adapter *adapter, ++ struct e1000_tx_ring *tx_ring) ++{ ++ struct pci_dev *pdev = adapter->pdev; ++ ++ e1000_clean_tx_ring(adapter, tx_ring); ++ ++ vfree(tx_ring->buffer_info); ++ tx_ring->buffer_info = NULL; ++ ++ pci_free_consistent(pdev, tx_ring->size, tx_ring->desc, tx_ring->dma); ++ ++ tx_ring->desc = NULL; ++} ++ ++/** ++ * e1000_free_all_tx_resources - Free Tx Resources for All Queues ++ * @adapter: board private structure ++ * ++ * Free all transmit software resources ++ **/ ++ ++void ++e1000_free_all_tx_resources(struct e1000_adapter *adapter) ++{ ++ int i; ++ ++ for (i = 0; i < adapter->num_tx_queues; i++) ++ e1000_free_tx_resources(adapter, &adapter->tx_ring[i]); ++} ++ ++static void ++e1000_unmap_and_free_tx_resource(struct e1000_adapter *adapter, ++ struct e1000_buffer *buffer_info) ++{ ++ if (buffer_info->dma) { ++ pci_unmap_page(adapter->pdev, ++ buffer_info->dma, ++ buffer_info->length, ++ PCI_DMA_TODEVICE); ++ } ++ if (buffer_info->skb) ++ kfree_rtskb(buffer_info->skb); ++ memset(buffer_info, 0, sizeof(struct e1000_buffer)); ++} ++ ++/** ++ * e1000_clean_tx_ring - Free Tx Buffers ++ * @adapter: board private structure ++ * @tx_ring: ring to be cleaned ++ **/ ++ ++static void ++e1000_clean_tx_ring(struct e1000_adapter *adapter, ++ struct e1000_tx_ring *tx_ring) ++{ ++ struct e1000_buffer *buffer_info; ++ unsigned long size; ++ unsigned int i; ++ ++ /* Free all the Tx ring sk_buffs */ ++ ++ for (i = 0; i < tx_ring->count; i++) { ++ buffer_info = &tx_ring->buffer_info[i]; ++ e1000_unmap_and_free_tx_resource(adapter, buffer_info); ++ } ++ ++ size = sizeof(struct e1000_buffer) * tx_ring->count; ++ memset(tx_ring->buffer_info, 0, size); ++ ++ /* Zero out the descriptor ring */ ++ ++ memset(tx_ring->desc, 0, tx_ring->size); ++ ++ tx_ring->next_to_use = 0; ++ tx_ring->next_to_clean = 0; ++ tx_ring->last_tx_tso = 0; ++ ++ writel(0, adapter->hw.hw_addr + tx_ring->tdh); ++ writel(0, adapter->hw.hw_addr + tx_ring->tdt); ++} ++ ++/** ++ * e1000_clean_all_tx_rings - Free Tx Buffers for all queues ++ * @adapter: board private structure ++ **/ ++ ++static void ++e1000_clean_all_tx_rings(struct e1000_adapter *adapter) ++{ ++ int i; ++ ++ for (i = 0; i < adapter->num_tx_queues; i++) ++ e1000_clean_tx_ring(adapter, &adapter->tx_ring[i]); ++} ++ ++/** ++ * e1000_free_rx_resources - Free Rx Resources ++ * @adapter: board private structure ++ * @rx_ring: ring to clean the resources from ++ * ++ * Free all receive software resources ++ **/ ++ ++static void ++e1000_free_rx_resources(struct e1000_adapter *adapter, ++ struct e1000_rx_ring *rx_ring) ++{ ++ struct pci_dev *pdev = adapter->pdev; ++ ++ e1000_clean_rx_ring(adapter, rx_ring); ++ ++ vfree(rx_ring->buffer_info); ++ rx_ring->buffer_info = NULL; ++ kfree(rx_ring->ps_page); ++ rx_ring->ps_page = NULL; ++ kfree(rx_ring->ps_page_dma); ++ rx_ring->ps_page_dma = NULL; ++ ++ pci_free_consistent(pdev, rx_ring->size, rx_ring->desc, rx_ring->dma); ++ ++ rx_ring->desc = NULL; ++} ++ ++/** ++ * e1000_free_all_rx_resources - Free Rx Resources for All Queues ++ * @adapter: board private structure ++ * ++ * Free all receive software resources ++ **/ ++ ++void ++e1000_free_all_rx_resources(struct e1000_adapter *adapter) ++{ ++ int i; ++ ++ for (i = 0; i < adapter->num_rx_queues; i++) ++ e1000_free_rx_resources(adapter, &adapter->rx_ring[i]); ++} ++ ++/** ++ * e1000_clean_rx_ring - Free Rx Buffers per Queue ++ * @adapter: board private structure ++ * @rx_ring: ring to free buffers from ++ **/ ++ ++static void ++e1000_clean_rx_ring(struct e1000_adapter *adapter, ++ struct e1000_rx_ring *rx_ring) ++{ ++ struct e1000_buffer *buffer_info; ++ struct pci_dev *pdev = adapter->pdev; ++ unsigned long size; ++ unsigned int i; ++ ++ /* Free all the Rx ring sk_buffs */ ++ for (i = 0; i < rx_ring->count; i++) { ++ buffer_info = &rx_ring->buffer_info[i]; ++ if (buffer_info->skb) { ++ pci_unmap_single(pdev, ++ buffer_info->dma, ++ buffer_info->length, ++ PCI_DMA_FROMDEVICE); ++ ++ kfree_rtskb(buffer_info->skb); ++ buffer_info->skb = NULL; ++ } ++ } ++ ++ size = sizeof(struct e1000_buffer) * rx_ring->count; ++ memset(rx_ring->buffer_info, 0, size); ++ size = sizeof(struct e1000_ps_page) * rx_ring->count; ++ memset(rx_ring->ps_page, 0, size); ++ size = sizeof(struct e1000_ps_page_dma) * rx_ring->count; ++ memset(rx_ring->ps_page_dma, 0, size); ++ ++ /* Zero out the descriptor ring */ ++ ++ memset(rx_ring->desc, 0, rx_ring->size); ++ ++ rx_ring->next_to_clean = 0; ++ rx_ring->next_to_use = 0; ++ ++ writel(0, adapter->hw.hw_addr + rx_ring->rdh); ++ writel(0, adapter->hw.hw_addr + rx_ring->rdt); ++} ++ ++/** ++ * e1000_clean_all_rx_rings - Free Rx Buffers for all queues ++ * @adapter: board private structure ++ **/ ++ ++static void ++e1000_clean_all_rx_rings(struct e1000_adapter *adapter) ++{ ++ int i; ++ ++ for (i = 0; i < adapter->num_rx_queues; i++) ++ e1000_clean_rx_ring(adapter, &adapter->rx_ring[i]); ++} ++ ++/* The 82542 2.0 (revision 2) needs to have the receive unit in reset ++ * and memory write and invalidate disabled for certain operations ++ */ ++static void ++e1000_enter_82542_rst(struct e1000_adapter *adapter) ++{ ++ struct rtnet_device *netdev = adapter->netdev; ++ uint32_t rctl; ++ ++ e1000_pci_clear_mwi(&adapter->hw); ++ ++ rctl = E1000_READ_REG(&adapter->hw, RCTL); ++ rctl |= E1000_RCTL_RST; ++ E1000_WRITE_REG(&adapter->hw, RCTL, rctl); ++ E1000_WRITE_FLUSH(&adapter->hw); ++ mdelay(5); ++ ++ if (rtnetif_running(netdev)) ++ e1000_clean_all_rx_rings(adapter); ++} ++ ++static void ++e1000_leave_82542_rst(struct e1000_adapter *adapter) ++{ ++ struct rtnet_device *netdev = adapter->netdev; ++ uint32_t rctl; ++ ++ rctl = E1000_READ_REG(&adapter->hw, RCTL); ++ rctl &= ~E1000_RCTL_RST; ++ E1000_WRITE_REG(&adapter->hw, RCTL, rctl); ++ E1000_WRITE_FLUSH(&adapter->hw); ++ mdelay(5); ++ ++ if (adapter->hw.pci_cmd_word & PCI_COMMAND_INVALIDATE) ++ e1000_pci_set_mwi(&adapter->hw); ++ ++ if (rtnetif_running(netdev)) { ++ /* No need to loop, because 82542 supports only 1 queue */ ++ struct e1000_rx_ring *ring = &adapter->rx_ring[0]; ++ e1000_configure_rx(adapter); ++ adapter->alloc_rx_buf(adapter, ring, E1000_DESC_UNUSED(ring)); ++ } ++} ++ ++/** ++ * e1000_set_multi - Multicast and Promiscuous mode set ++ * @netdev: network interface device structure ++ * ++ * The set_multi entry point is called whenever the multicast address ++ * list or the network interface flags are updated. This routine is ++ * responsible for configuring the hardware for proper multicast, ++ * promiscuous mode, and all-multi behavior. ++ **/ ++ ++static void ++e1000_set_multi(struct rtnet_device *netdev) ++{ ++ struct e1000_adapter *adapter = netdev->priv; ++ struct e1000_hw *hw = &adapter->hw; ++ uint32_t rctl; ++ int i, rar_entries = E1000_RAR_ENTRIES; ++ int mta_reg_count = (hw->mac_type == e1000_ich8lan) ? ++ E1000_NUM_MTA_REGISTERS_ICH8LAN : ++ E1000_NUM_MTA_REGISTERS; ++ ++ if (adapter->hw.mac_type == e1000_ich8lan) ++ rar_entries = E1000_RAR_ENTRIES_ICH8LAN; ++ ++ /* reserve RAR[14] for LAA over-write work-around */ ++ if (adapter->hw.mac_type == e1000_82571) ++ rar_entries--; ++ ++ /* Check for Promiscuous and All Multicast modes */ ++ ++ rctl = E1000_READ_REG(hw, RCTL); ++ ++ if (netdev->flags & IFF_PROMISC) { ++ rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE); ++ } else if (netdev->flags & IFF_ALLMULTI) { ++ rctl |= E1000_RCTL_MPE; ++ rctl &= ~E1000_RCTL_UPE; ++ } else { ++ rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE); ++ } ++ ++ E1000_WRITE_REG(hw, RCTL, rctl); ++ ++ /* 82542 2.0 needs to be in reset to write receive address registers */ ++ ++ if (hw->mac_type == e1000_82542_rev2_0) ++ e1000_enter_82542_rst(adapter); ++ ++ /* load the first 14 multicast address into the exact filters 1-14 ++ * RAR 0 is used for the station MAC adddress ++ * if there are not 14 addresses, go ahead and clear the filters ++ * -- with 82571 controllers only 0-13 entries are filled here ++ */ ++ ++ for (i = 1; i < rar_entries; i++) { ++ E1000_WRITE_REG_ARRAY(hw, RA, i << 1, 0); ++ E1000_WRITE_FLUSH(hw); ++ E1000_WRITE_REG_ARRAY(hw, RA, (i << 1) + 1, 0); ++ E1000_WRITE_FLUSH(hw); ++ } ++ ++ /* clear the old settings from the multicast hash table */ ++ ++ for (i = 0; i < mta_reg_count; i++) { ++ E1000_WRITE_REG_ARRAY(hw, MTA, i, 0); ++ E1000_WRITE_FLUSH(hw); ++ } ++ ++ if (hw->mac_type == e1000_82542_rev2_0) ++ e1000_leave_82542_rst(adapter); ++} ++ ++/** ++ * e1000_update_phy_info_task - get phy info ++ * @work: work struct contained inside adapter struct ++ * ++ * Need to wait a few seconds after link up to get diagnostic information from ++ * the phy ++ */ ++static void e1000_update_phy_info_task(struct work_struct *work) ++{ ++ struct e1000_adapter *adapter = container_of(work, ++ struct e1000_adapter, ++ phy_info_task.work); ++ e1000_phy_get_info(&adapter->hw, &adapter->phy_info); ++} ++ ++/** ++ * e1000_82547_tx_fifo_stall_task - task to complete work ++ * @work: work struct contained inside adapter struct ++ **/ ++ ++static void e1000_82547_tx_fifo_stall_task(struct work_struct *work) ++{ ++ struct e1000_adapter *adapter = container_of(work, ++ struct e1000_adapter, ++ fifo_stall_task.work); ++ struct rtnet_device *netdev = adapter->netdev; ++ uint32_t tctl; ++ ++ if (atomic_read(&adapter->tx_fifo_stall)) { ++ if ((E1000_READ_REG(&adapter->hw, TDT) == ++ E1000_READ_REG(&adapter->hw, TDH)) && ++ (E1000_READ_REG(&adapter->hw, TDFT) == ++ E1000_READ_REG(&adapter->hw, TDFH)) && ++ (E1000_READ_REG(&adapter->hw, TDFTS) == ++ E1000_READ_REG(&adapter->hw, TDFHS))) { ++ tctl = E1000_READ_REG(&adapter->hw, TCTL); ++ E1000_WRITE_REG(&adapter->hw, TCTL, ++ tctl & ~E1000_TCTL_EN); ++ E1000_WRITE_REG(&adapter->hw, TDFT, ++ adapter->tx_head_addr); ++ E1000_WRITE_REG(&adapter->hw, TDFH, ++ adapter->tx_head_addr); ++ E1000_WRITE_REG(&adapter->hw, TDFTS, ++ adapter->tx_head_addr); ++ E1000_WRITE_REG(&adapter->hw, TDFHS, ++ adapter->tx_head_addr); ++ E1000_WRITE_REG(&adapter->hw, TCTL, tctl); ++ E1000_WRITE_FLUSH(&adapter->hw); ++ ++ adapter->tx_fifo_head = 0; ++ atomic_set(&adapter->tx_fifo_stall, 0); ++ rtnetif_wake_queue(netdev); ++ } else { ++ schedule_delayed_work(&adapter->fifo_stall_task, 1); ++ } ++ } ++} ++ ++/** ++ * e1000_watchdog - work function ++ * @work: work struct contained inside adapter struct ++ **/ ++static void e1000_watchdog(struct work_struct *work) ++{ ++ struct e1000_adapter *adapter = container_of(work, ++ struct e1000_adapter, ++ watchdog_task.work); ++ struct rtnet_device *netdev = adapter->netdev; ++ struct e1000_tx_ring *txdr = adapter->tx_ring; ++ uint32_t link, tctl; ++ int32_t ret_val; ++ ++ ret_val = e1000_check_for_link(&adapter->hw); ++ if ((ret_val == E1000_ERR_PHY) && ++ (adapter->hw.phy_type == e1000_phy_igp_3) && ++ (E1000_READ_REG(&adapter->hw, CTRL) & E1000_PHY_CTRL_GBE_DISABLE)) { ++ /* See e1000_kumeran_lock_loss_workaround() */ ++ DPRINTK(LINK, INFO, ++ "Gigabit has been disabled, downgrading speed\n"); ++ } ++ if (adapter->hw.mac_type == e1000_82573) { ++ e1000_enable_tx_pkt_filtering(&adapter->hw); ++ } ++ ++ if ((adapter->hw.media_type == e1000_media_type_internal_serdes) && ++ !(E1000_READ_REG(&adapter->hw, TXCW) & E1000_TXCW_ANE)) ++ link = !adapter->hw.serdes_link_down; ++ else ++ link = E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_LU; ++ ++ if (link) { ++ if (!rtnetif_carrier_ok(netdev)) { ++ boolean_t txb2b = 1; ++ e1000_get_speed_and_duplex(&adapter->hw, ++ &adapter->link_speed, ++ &adapter->link_duplex); ++ ++ DPRINTK(LINK, INFO, "NIC Link is Up %d Mbps %s\n", ++ adapter->link_speed, ++ adapter->link_duplex == FULL_DUPLEX ? ++ "Full Duplex" : "Half Duplex"); ++ ++ /* tweak tx_queue_len according to speed/duplex ++ * and adjust the timeout factor */ ++ // TODO makoehre netdev->tx_queue_len = adapter->tx_queue_len; ++ adapter->tx_timeout_factor = 1; ++ switch (adapter->link_speed) { ++ case SPEED_10: ++ txb2b = 0; ++ // TODO makoehre netdev->tx_queue_len = 10; ++ adapter->tx_timeout_factor = 8; ++ break; ++ case SPEED_100: ++ txb2b = 0; ++ // TODO makoehre netdev->tx_queue_len = 100; ++ /* maybe add some timeout factor ? */ ++ break; ++ } ++ ++ if ((adapter->hw.mac_type == e1000_82571 || ++ adapter->hw.mac_type == e1000_82572) && ++ txb2b == 0) { ++#define SPEED_MODE_BIT (1 << 21) ++ uint32_t tarc0; ++ tarc0 = E1000_READ_REG(&adapter->hw, TARC0); ++ tarc0 &= ~SPEED_MODE_BIT; ++ E1000_WRITE_REG(&adapter->hw, TARC0, tarc0); ++ } ++ ++ ++ /* enable transmits in the hardware, need to do this ++ * after setting TARC0 */ ++ tctl = E1000_READ_REG(&adapter->hw, TCTL); ++ tctl |= E1000_TCTL_EN; ++ E1000_WRITE_REG(&adapter->hw, TCTL, tctl); ++ ++ rtnetif_carrier_on(netdev); ++ rtnetif_wake_queue(netdev); ++ schedule_delayed_work(&adapter->phy_info_task, 2 * HZ); ++ adapter->smartspeed = 0; ++ } ++ } else { ++ if (rtnetif_carrier_ok(netdev)) { ++ adapter->link_speed = 0; ++ adapter->link_duplex = 0; ++ DPRINTK(LINK, INFO, "NIC Link is Down\n"); ++ rtnetif_carrier_off(netdev); ++ rtnetif_stop_queue(netdev); ++ schedule_delayed_work(&adapter->phy_info_task, 2 * HZ); ++ ++ /* 80003ES2LAN workaround-- ++ * For packet buffer work-around on link down event; ++ * disable receives in the ISR and ++ * reset device here in the watchdog ++ */ ++ if (adapter->hw.mac_type == e1000_80003es2lan) ++ /* reset device */ ++ schedule_work(&adapter->reset_task); ++ } ++ ++ e1000_smartspeed(adapter); ++ } ++ ++ ++ adapter->hw.tx_packet_delta = adapter->stats.tpt - adapter->tpt_old; ++ adapter->tpt_old = adapter->stats.tpt; ++ adapter->hw.collision_delta = adapter->stats.colc - adapter->colc_old; ++ adapter->colc_old = adapter->stats.colc; ++ ++ adapter->gorcl = adapter->stats.gorcl - adapter->gorcl_old; ++ adapter->gorcl_old = adapter->stats.gorcl; ++ adapter->gotcl = adapter->stats.gotcl - adapter->gotcl_old; ++ adapter->gotcl_old = adapter->stats.gotcl; ++ ++ // e1000_update_adaptive(&adapter->hw); ++ ++ if (!rtnetif_carrier_ok(netdev)) { ++ if (E1000_DESC_UNUSED(txdr) + 1 < txdr->count) { ++ /* We've lost link, so the controller stops DMA, ++ * but we've got queued Tx work that's never going ++ * to get done, so reset controller to flush Tx. ++ * (Do the reset outside of interrupt context). */ ++ adapter->tx_timeout_count++; ++ schedule_work(&adapter->reset_task); ++ } ++ } ++ ++ /* Dynamic mode for Interrupt Throttle Rate (ITR) */ ++ if (adapter->hw.mac_type >= e1000_82540 && adapter->itr == 1) { ++ /* Symmetric Tx/Rx gets a reduced ITR=2000; Total ++ * asymmetrical Tx or Rx gets ITR=8000; everyone ++ * else is between 2000-8000. */ ++ uint32_t goc = (adapter->gotcl + adapter->gorcl) / 10000; ++ uint32_t dif = (adapter->gotcl > adapter->gorcl ? ++ adapter->gotcl - adapter->gorcl : ++ adapter->gorcl - adapter->gotcl) / 10000; ++ uint32_t itr = goc > 0 ? (dif * 6000 / goc + 2000) : 8000; ++ E1000_WRITE_REG(&adapter->hw, ITR, 1000000000 / (itr * 256)); ++ } ++ ++ /* Cause software interrupt to ensure rx ring is cleaned */ ++ E1000_WRITE_REG(&adapter->hw, ICS, E1000_ICS_RXDMT0); ++ ++ /* Force detection of hung controller every watchdog period */ ++ adapter->detect_tx_hung = TRUE; ++ ++ /* With 82571 controllers, LAA may be overwritten due to controller ++ * reset from the other port. Set the appropriate LAA in RAR[0] */ ++ if (adapter->hw.mac_type == e1000_82571 && adapter->hw.laa_is_present) ++ e1000_rar_set(&adapter->hw, adapter->hw.mac_addr, 0); ++ ++ /* Reschedule the task */ ++ schedule_delayed_work(&adapter->watchdog_task, 2 * HZ); ++} ++ ++#define E1000_TX_FLAGS_CSUM 0x00000001 ++#define E1000_TX_FLAGS_VLAN 0x00000002 ++#define E1000_TX_FLAGS_TSO 0x00000004 ++#define E1000_TX_FLAGS_IPV4 0x00000008 ++#define E1000_TX_FLAGS_VLAN_MASK 0xffff0000 ++#define E1000_TX_FLAGS_VLAN_SHIFT 16 ++ ++ ++static boolean_t ++e1000_tx_csum(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring, ++ struct rtskb *skb) ++{ ++ struct e1000_context_desc *context_desc; ++ struct e1000_buffer *buffer_info; ++ unsigned int i; ++ uint8_t css; ++ ++ if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) { ++ css = skb->h.raw - skb->data; ++ ++ i = tx_ring->next_to_use; ++ buffer_info = &tx_ring->buffer_info[i]; ++ context_desc = E1000_CONTEXT_DESC(*tx_ring, i); ++ ++ context_desc->upper_setup.tcp_fields.tucss = css; ++ context_desc->upper_setup.tcp_fields.tucso = css + skb->csum; ++ context_desc->upper_setup.tcp_fields.tucse = 0; ++ context_desc->tcp_seg_setup.data = 0; ++ context_desc->cmd_and_length = cpu_to_le32(E1000_TXD_CMD_DEXT); ++ ++ buffer_info->time_stamp = jiffies; ++ ++ if (unlikely(++i == tx_ring->count)) i = 0; ++ tx_ring->next_to_use = i; ++ ++ return TRUE; ++ } ++ ++ return FALSE; ++} ++ ++#define E1000_MAX_TXD_PWR 12 ++#define E1000_MAX_DATA_PER_TXD (1<len; ++ unsigned int offset = 0, size, count = 0, i; ++ ++ i = tx_ring->next_to_use; ++ ++ while (len) { ++ buffer_info = &tx_ring->buffer_info[i]; ++ size = min(len, max_per_txd); ++ /* work-around for errata 10 and it applies ++ * to all controllers in PCI-X mode ++ * The fix is to make sure that the first descriptor of a ++ * packet is smaller than 2048 - 16 - 16 (or 2016) bytes ++ */ ++ if (unlikely((adapter->hw.bus_type == e1000_bus_type_pcix) && ++ (size > 2015) && count == 0)) ++ size = 2015; ++ ++ /* Workaround for potential 82544 hang in PCI-X. Avoid ++ * terminating buffers within evenly-aligned dwords. */ ++ if (unlikely(adapter->pcix_82544 && ++ !((unsigned long)(skb->data + offset + size - 1) & 4) && ++ size > 4)) ++ size -= 4; ++ ++ buffer_info->length = size; ++ buffer_info->dma = ++ pci_map_single(adapter->pdev, ++ skb->data + offset, ++ size, ++ PCI_DMA_TODEVICE); ++ buffer_info->time_stamp = jiffies; ++ ++ len -= size; ++ offset += size; ++ count++; ++ if (unlikely(++i == tx_ring->count)) i = 0; ++ } ++ ++ ++ i = (i == 0) ? tx_ring->count - 1 : i - 1; ++ tx_ring->buffer_info[i].skb = skb; ++ tx_ring->buffer_info[first].next_to_watch = i; ++ ++ return count; ++} ++ ++static void ++e1000_tx_queue(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring, ++ int tx_flags, int count, nanosecs_abs_t *xmit_stamp) ++{ ++ struct e1000_tx_desc *tx_desc = NULL; ++ struct e1000_buffer *buffer_info; ++ uint32_t txd_upper = 0, txd_lower = E1000_TXD_CMD_IFCS; ++ unsigned int i; ++ ++ ++ if (likely(tx_flags & E1000_TX_FLAGS_CSUM)) { ++ txd_lower |= E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D; ++ txd_upper |= E1000_TXD_POPTS_TXSM << 8; ++ } ++ ++ i = tx_ring->next_to_use; ++ ++ while (count--) { ++ buffer_info = &tx_ring->buffer_info[i]; ++ tx_desc = E1000_TX_DESC(*tx_ring, i); ++ tx_desc->buffer_addr = cpu_to_le64(buffer_info->dma); ++ tx_desc->lower.data = ++ cpu_to_le32(txd_lower | buffer_info->length); ++ tx_desc->upper.data = cpu_to_le32(txd_upper); ++ if (unlikely(++i == tx_ring->count)) i = 0; ++ } ++ ++ tx_desc->lower.data |= cpu_to_le32(adapter->txd_cmd); ++ ++ if (xmit_stamp) ++ *xmit_stamp = cpu_to_be64(rtdm_clock_read() + *xmit_stamp); ++ ++ /* Force memory writes to complete before letting h/w ++ * know there are new descriptors to fetch. (Only ++ * applicable for weak-ordered memory model archs, ++ * such as IA-64). */ ++ wmb(); ++ ++ tx_ring->next_to_use = i; ++ writel(i, adapter->hw.hw_addr + tx_ring->tdt); ++} ++ ++/** ++ * 82547 workaround to avoid controller hang in half-duplex environment. ++ * The workaround is to avoid queuing a large packet that would span ++ * the internal Tx FIFO ring boundary by notifying the stack to resend ++ * the packet at a later time. This gives the Tx FIFO an opportunity to ++ * flush all packets. When that occurs, we reset the Tx FIFO pointers ++ * to the beginning of the Tx FIFO. ++ **/ ++ ++#define E1000_FIFO_HDR 0x10 ++#define E1000_82547_PAD_LEN 0x3E0 ++ ++static int ++e1000_82547_fifo_workaround(struct e1000_adapter *adapter, struct rtskb *skb) ++{ ++ uint32_t fifo_space = adapter->tx_fifo_size - adapter->tx_fifo_head; ++ uint32_t skb_fifo_len = skb->len + E1000_FIFO_HDR; ++ ++ E1000_ROUNDUP(skb_fifo_len, E1000_FIFO_HDR); ++ ++ if (adapter->link_duplex != HALF_DUPLEX) ++ goto no_fifo_stall_required; ++ ++ if (atomic_read(&adapter->tx_fifo_stall)) ++ return 1; ++ ++ if (skb_fifo_len >= (E1000_82547_PAD_LEN + fifo_space)) { ++ atomic_set(&adapter->tx_fifo_stall, 1); ++ return 1; ++ } ++ ++no_fifo_stall_required: ++ adapter->tx_fifo_head += skb_fifo_len; ++ if (adapter->tx_fifo_head >= adapter->tx_fifo_size) ++ adapter->tx_fifo_head -= adapter->tx_fifo_size; ++ return 0; ++} ++ ++#define MINIMUM_DHCP_PACKET_SIZE 282 ++static int ++e1000_transfer_dhcp_info(struct e1000_adapter *adapter, struct rtskb *skb) ++{ ++ struct e1000_hw *hw = &adapter->hw; ++ uint16_t length, offset; ++ if (skb->len > MINIMUM_DHCP_PACKET_SIZE) { ++ struct ethhdr *eth = (struct ethhdr *) skb->data; ++ if ((htons(ETH_P_IP) == eth->h_proto)) { ++ const struct iphdr *ip = ++ (struct iphdr *)((uint8_t *)skb->data+14); ++ if (IPPROTO_UDP == ip->protocol) { ++ struct udphdr *udp = ++ (struct udphdr *)((uint8_t *)ip + ++ (ip->ihl << 2)); ++ if (ntohs(udp->dest) == 67) { ++ offset = (uint8_t *)udp + 8 - skb->data; ++ length = skb->len - offset; ++ ++ return e1000_mng_write_dhcp_info(hw, ++ (uint8_t *)udp + 8, ++ length); ++ } ++ } ++ } ++ } ++ return 0; ++} ++ ++#define TXD_USE_COUNT(S, X) (((S) >> (X)) + 1 ) ++static int ++e1000_xmit_frame(struct rtskb *skb, struct rtnet_device *netdev) ++{ ++ struct e1000_adapter *adapter = netdev->priv; ++ struct e1000_tx_ring *tx_ring; ++ unsigned int first, max_per_txd = E1000_MAX_DATA_PER_TXD; ++ unsigned int max_txd_pwr = E1000_MAX_TXD_PWR; ++ unsigned int tx_flags = 0; ++ unsigned int len = skb->len; ++ rtdm_lockctx_t context; ++ unsigned int nr_frags = 0; ++ unsigned int mss = 0; ++ int count = 0; ++ ++ /* This goes back to the question of how to logically map a tx queue ++ * to a flow. Right now, performance is impacted slightly negatively ++ * if using multiple tx queues. If the stack breaks away from a ++ * single qdisc implementation, we can look at this again. */ ++ tx_ring = adapter->tx_ring; ++ ++ if (unlikely(skb->len <= 0)) { ++ kfree_rtskb(skb); ++ return NETDEV_TX_OK; ++ } ++ ++ if (skb->ip_summed == CHECKSUM_PARTIAL) ++ count++; ++ ++ ++ count += TXD_USE_COUNT(len, max_txd_pwr); ++ ++ if (adapter->pcix_82544) ++ count++; ++ ++ /* work-around for errata 10 and it applies to all controllers ++ * in PCI-X mode, so add one more descriptor to the count ++ */ ++ if (unlikely((adapter->hw.bus_type == e1000_bus_type_pcix) && ++ (len > 2015))) ++ count++; ++ ++ ++ if (adapter->hw.tx_pkt_filtering && ++ (adapter->hw.mac_type == e1000_82573)) ++ e1000_transfer_dhcp_info(adapter, skb); ++ ++ rtdm_lock_get_irqsave(&tx_ring->tx_lock, context); ++ ++ /* need: count + 2 desc gap to keep tail from touching ++ * head, otherwise try next time */ ++ if (unlikely(E1000_DESC_UNUSED(tx_ring) < count + 2)) { ++ rtnetif_stop_queue(netdev); ++ rtdm_lock_put_irqrestore(&tx_ring->tx_lock, context); ++ rtdm_printk("FATAL: rt_e1000 ran into tail close to head situation!\n"); ++ return NETDEV_TX_BUSY; ++ } ++ ++ if (unlikely(adapter->hw.mac_type == e1000_82547)) { ++ if (unlikely(e1000_82547_fifo_workaround(adapter, skb))) { ++ rtnetif_stop_queue(netdev); ++ rtdm_lock_put_irqrestore(&tx_ring->tx_lock, context); ++ ++ /* FIXME: warn the user earlier, i.e. on startup if ++ half-duplex is detected! */ ++ rtdm_printk("FATAL: rt_e1000 ran into 82547 " ++ "controller bug!\n"); ++ return NETDEV_TX_BUSY; ++ } ++ } ++ ++ first = tx_ring->next_to_use; ++ ++ if (likely(e1000_tx_csum(adapter, tx_ring, skb))) ++ tx_flags |= E1000_TX_FLAGS_CSUM; ++ ++ e1000_tx_queue(adapter, tx_ring, tx_flags, ++ e1000_tx_map(adapter, tx_ring, skb, first, ++ max_per_txd, nr_frags, mss), ++ skb->xmit_stamp); ++ ++ rtdm_lock_put_irqrestore(&tx_ring->tx_lock, context); ++ ++ return NETDEV_TX_OK; ++} ++ ++/** ++ * e1000_intr - Interrupt Handler ++ * @irq: interrupt number ++ * @data: pointer to a network interface device structure ++ * @pt_regs: CPU registers structure ++ **/ ++ ++static int ++e1000_intr(rtdm_irq_t *irq_handle) ++ /* int irq, void *data, struct pt_regs *regs) */ ++{ ++ ++ struct rtnet_device *netdev = rtdm_irq_get_arg(irq_handle, struct rtnet_device); ++ struct e1000_adapter *adapter = netdev->priv; ++ struct e1000_hw *hw = &adapter->hw; ++ uint32_t rctl, icr = E1000_READ_REG(hw, ICR); ++ int i; ++ nanosecs_abs_t time_stamp = rtdm_clock_read(); ++ ++ if (unlikely(!icr)) { ++ return RTDM_IRQ_NONE; /* Not our interrupt */ ++ } ++ if (unlikely(icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))) { ++ hw->get_link_status = 1; ++ /* 80003ES2LAN workaround-- ++ * For packet buffer work-around on link down event; ++ * disable receives here in the ISR and ++ * reset adapter in watchdog ++ */ ++ if (rtnetif_carrier_ok(netdev) && ++ (adapter->hw.mac_type == e1000_80003es2lan)) { ++ /* disable receives */ ++ rctl = E1000_READ_REG(hw, RCTL); ++ E1000_WRITE_REG(hw, RCTL, rctl & ~E1000_RCTL_EN); ++ } ++ /* FIXME: we need to handle this via some yet-to-be-invented ++ error manager (Linux botton-half and/or kthread) ++ mod_timer(&adapter->watchdog_timer, jiffies);*/ ++ } ++ ++ /* Writing IMC and IMS is needed for 82547. ++ * Due to Hub Link bus being occupied, an interrupt ++ * de-assertion message is not able to be sent. ++ * When an interrupt assertion message is generated later, ++ * two messages are re-ordered and sent out. ++ * That causes APIC to think 82547 is in de-assertion ++ * state, while 82547 is in assertion state, resulting ++ * in dead lock. Writing IMC forces 82547 into ++ * de-assertion state. ++ */ ++ if (hw->mac_type == e1000_82547 || hw->mac_type == e1000_82547_rev_2) { ++ atomic_inc(&adapter->irq_sem); ++ E1000_WRITE_REG(hw, IMC, ~0); ++ } ++ ++ adapter->data_received = 0; ++ ++ for (i = 0; i < E1000_MAX_INTR; i++) ++ if (unlikely(!e1000_clean_rx_irq(adapter, adapter->rx_ring, ++ &time_stamp) & ++ !e1000_clean_tx_irq(adapter, adapter->tx_ring))) ++ break; ++ ++ if (hw->mac_type == e1000_82547 || hw->mac_type == e1000_82547_rev_2) ++ e1000_irq_enable(adapter); ++ ++ ++ if (adapter->data_received) ++ rt_mark_stack_mgr(netdev); ++ return RTDM_IRQ_HANDLED; ++} ++ ++/** ++ * e1000_clean_tx_irq - Reclaim resources after transmit completes ++ * @adapter: board private structure ++ **/ ++ ++static boolean_t ++e1000_clean_tx_irq(struct e1000_adapter *adapter, ++ struct e1000_tx_ring *tx_ring) ++{ ++ struct rtnet_device *netdev = adapter->netdev; ++ struct e1000_tx_desc *tx_desc, *eop_desc; ++ struct e1000_buffer *buffer_info; ++ unsigned int i, eop; ++ boolean_t cleaned = FALSE; ++ ++ i = tx_ring->next_to_clean; ++ eop = tx_ring->buffer_info[i].next_to_watch; ++ eop_desc = E1000_TX_DESC(*tx_ring, eop); ++ ++ while (eop_desc->upper.data & cpu_to_le32(E1000_TXD_STAT_DD)) { ++ for (cleaned = FALSE; !cleaned; ) { ++ tx_desc = E1000_TX_DESC(*tx_ring, i); ++ buffer_info = &tx_ring->buffer_info[i]; ++ cleaned = (i == eop); ++ ++ e1000_unmap_and_free_tx_resource(adapter, buffer_info); ++ memset(tx_desc, 0, sizeof(struct e1000_tx_desc)); ++ ++ if (unlikely(++i == tx_ring->count)) i = 0; ++ } ++ ++ ++ eop = tx_ring->buffer_info[i].next_to_watch; ++ eop_desc = E1000_TX_DESC(*tx_ring, eop); ++ } ++ ++ tx_ring->next_to_clean = i; ++ ++#define TX_WAKE_THRESHOLD 32 ++ if (unlikely(cleaned && rtnetif_queue_stopped(netdev) && ++ rtnetif_carrier_ok(netdev))) { ++ rtdm_lock_get(&tx_ring->tx_lock); ++ if (rtnetif_queue_stopped(netdev) && ++ (E1000_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD)) ++ rtnetif_wake_queue(netdev); ++ rtdm_lock_put(&tx_ring->tx_lock); ++ } ++ ++ if (adapter->detect_tx_hung) { ++ /* Detect a transmit hang in hardware, this serializes the ++ * check with the clearing of time_stamp and movement of i */ ++ adapter->detect_tx_hung = FALSE; ++ if (tx_ring->buffer_info[eop].dma && ++ time_after(jiffies, tx_ring->buffer_info[eop].time_stamp + ++ (adapter->tx_timeout_factor * HZ)) ++ && !(E1000_READ_REG(&adapter->hw, STATUS) & ++ E1000_STATUS_TXOFF)) { ++ ++ /* detected Tx unit hang */ ++ DPRINTK(DRV, ERR, "Detected Tx Unit Hang\n" ++ " Tx Queue <%lu>\n" ++ " TDH <%x>\n" ++ " TDT <%x>\n" ++ " next_to_use <%x>\n" ++ " next_to_clean <%x>\n" ++ "buffer_info[next_to_clean]\n" ++ " time_stamp <%lx>\n" ++ " next_to_watch <%x>\n" ++ " jiffies <%lx>\n" ++ " next_to_watch.status <%x>\n", ++ (unsigned long)((tx_ring - adapter->tx_ring) / ++ sizeof(struct e1000_tx_ring)), ++ readl(adapter->hw.hw_addr + tx_ring->tdh), ++ readl(adapter->hw.hw_addr + tx_ring->tdt), ++ tx_ring->next_to_use, ++ tx_ring->next_to_clean, ++ tx_ring->buffer_info[eop].time_stamp, ++ eop, ++ jiffies, ++ eop_desc->upper.fields.status); ++ rtnetif_stop_queue(netdev); ++ } ++ } ++ return cleaned; ++} ++ ++/** ++ * e1000_rx_checksum - Receive Checksum Offload for 82543 ++ * @adapter: board private structure ++ * @status_err: receive descriptor status and error fields ++ * @csum: receive descriptor csum field ++ * @sk_buff: socket buffer with received data ++ **/ ++ ++static void ++e1000_rx_checksum(struct e1000_adapter *adapter, ++ uint32_t status_err, uint32_t csum, ++ struct rtskb *skb) ++{ ++ uint16_t status = (uint16_t)status_err; ++ uint8_t errors = (uint8_t)(status_err >> 24); ++ skb->ip_summed = CHECKSUM_NONE; ++ ++ /* 82543 or newer only */ ++ if (unlikely(adapter->hw.mac_type < e1000_82543)) return; ++ /* Ignore Checksum bit is set */ ++ if (unlikely(status & E1000_RXD_STAT_IXSM)) return; ++ /* TCP/UDP checksum error bit is set */ ++ if (unlikely(errors & E1000_RXD_ERR_TCPE)) { ++ /* let the stack verify checksum errors */ ++ adapter->hw_csum_err++; ++ return; ++ } ++ /* TCP/UDP Checksum has not been calculated */ ++ if (adapter->hw.mac_type <= e1000_82547_rev_2) { ++ if (!(status & E1000_RXD_STAT_TCPCS)) ++ return; ++ } else { ++ if (!(status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))) ++ return; ++ } ++ /* It must be a TCP or UDP packet with a valid checksum */ ++ if (likely(status & E1000_RXD_STAT_TCPCS)) { ++ /* TCP checksum is good */ ++ skb->ip_summed = CHECKSUM_UNNECESSARY; ++ } else if (adapter->hw.mac_type > e1000_82547_rev_2) { ++ /* IP fragment with UDP payload */ ++ /* Hardware complements the payload checksum, so we undo it ++ * and then put the value in host order for further stack use. ++ */ ++ csum = ntohl(csum ^ 0xFFFF); ++ skb->csum = csum; ++ skb->ip_summed = CHECKSUM_PARTIAL; ++ } ++ adapter->hw_csum_good++; ++} ++ ++/** ++ * e1000_clean_rx_irq - Send received data up the network stack; legacy ++ * @adapter: board private structure ++ **/ ++ ++static boolean_t ++e1000_clean_rx_irq(struct e1000_adapter *adapter, ++ struct e1000_rx_ring *rx_ring, ++ nanosecs_abs_t *time_stamp) ++{ ++ struct rtnet_device *netdev = adapter->netdev; ++ struct pci_dev *pdev = adapter->pdev; ++ struct e1000_rx_desc *rx_desc, *next_rxd; ++ struct e1000_buffer *buffer_info, *next_buffer; ++ uint32_t length; ++ uint8_t last_byte; ++ unsigned int i; ++ int cleaned_count = 0; ++ boolean_t cleaned = FALSE; ++ ++ i = rx_ring->next_to_clean; ++ rx_desc = E1000_RX_DESC(*rx_ring, i); ++ buffer_info = &rx_ring->buffer_info[i]; ++ ++ while (rx_desc->status & E1000_RXD_STAT_DD) { ++ struct rtskb *skb, *next_skb; ++ u8 status; ++ ++ status = rx_desc->status; ++ skb = buffer_info->skb; ++ buffer_info->skb = NULL; ++ ++ prefetch(skb->data - NET_IP_ALIGN); ++ ++ if (++i == rx_ring->count) i = 0; ++ next_rxd = E1000_RX_DESC(*rx_ring, i); ++ prefetch(next_rxd); ++ ++ next_buffer = &rx_ring->buffer_info[i]; ++ next_skb = next_buffer->skb; ++ prefetch(next_skb->data - NET_IP_ALIGN); ++ ++ cleaned = TRUE; ++ cleaned_count++; ++ pci_unmap_single(pdev, ++ buffer_info->dma, ++ buffer_info->length, ++ PCI_DMA_FROMDEVICE); ++ ++ length = le16_to_cpu(rx_desc->length); ++ ++ if (unlikely(!(status & E1000_RXD_STAT_EOP))) { ++ /* All receives must fit into a single buffer */ ++ E1000_DBG("%s: Receive packet consumed multiple" ++ " buffers\n", netdev->name); ++ /* recycle */ ++ buffer_info->skb = skb; ++ goto next_desc; ++ } ++ ++ if (unlikely(rx_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK)) { ++ last_byte = *(skb->data + length - 1); ++ if (TBI_ACCEPT(&adapter->hw, status, ++ rx_desc->errors, length, last_byte)) { ++ length--; ++ } else { ++ /* recycle */ ++ buffer_info->skb = skb; ++ goto next_desc; ++ } ++ } ++ ++ /* code added for copybreak, this should improve ++ * performance for small packets with large amounts ++ * of reassembly being done in the stack */ ++ rtskb_put(skb, length); ++ ++ /* end copybreak code */ ++ ++ /* Receive Checksum Offload */ ++ e1000_rx_checksum(adapter, ++ (uint32_t)(status) | ++ ((uint32_t)(rx_desc->errors) << 24), ++ le16_to_cpu(rx_desc->csum), skb); ++ ++ skb->protocol = rt_eth_type_trans(skb, netdev); ++ skb->time_stamp = *time_stamp; ++ rtnetif_rx(skb); ++ adapter->data_received = 1; // Set flag for the main interrupt routine ++ ++next_desc: ++ rx_desc->status = 0; ++ ++ /* return some buffers to hardware, one at a time is too slow */ ++ if (unlikely(cleaned_count >= E1000_RX_BUFFER_WRITE)) { ++ adapter->alloc_rx_buf(adapter, rx_ring, cleaned_count); ++ cleaned_count = 0; ++ } ++ ++ /* use prefetched values */ ++ rx_desc = next_rxd; ++ buffer_info = next_buffer; ++ } ++ rx_ring->next_to_clean = i; ++ ++ cleaned_count = E1000_DESC_UNUSED(rx_ring); ++ if (cleaned_count) ++ adapter->alloc_rx_buf(adapter, rx_ring, cleaned_count); ++ ++ return cleaned; ++} ++ ++/** ++ * e1000_alloc_rx_buffers - Replace used receive buffers; legacy & extended ++ * @adapter: address of board private structure ++ **/ ++ ++static void ++e1000_alloc_rx_buffers(struct e1000_adapter *adapter, ++ struct e1000_rx_ring *rx_ring, ++ int cleaned_count) ++{ ++ struct rtnet_device *netdev = adapter->netdev; ++ struct pci_dev *pdev = adapter->pdev; ++ struct e1000_rx_desc *rx_desc; ++ struct e1000_buffer *buffer_info; ++ struct rtskb *skb; ++ unsigned int i; ++ unsigned int bufsz = adapter->rx_buffer_len + NET_IP_ALIGN; ++ ++ i = rx_ring->next_to_use; ++ buffer_info = &rx_ring->buffer_info[i]; ++ ++ while (cleaned_count--) { ++ if (!(skb = buffer_info->skb)) ++ skb = rtnetdev_alloc_rtskb(netdev, bufsz); ++ else { ++ rtskb_trim(skb, 0); ++ goto map_skb; ++ } ++ ++ if (unlikely(!skb)) { ++ /* Better luck next round */ ++ adapter->alloc_rx_buff_failed++; ++ break; ++ } ++ ++ /* Fix for errata 23, can't cross 64kB boundary */ ++ if (!e1000_check_64k_bound(adapter, skb->data, bufsz)) { ++ struct rtskb *oldskb = skb; ++ DPRINTK(RX_ERR, ERR, "skb align check failed: %u bytes " ++ "at %p\n", bufsz, skb->data); ++ /* Try again, without freeing the previous */ ++ skb = rtnetdev_alloc_rtskb(netdev, bufsz); ++ /* Failed allocation, critical failure */ ++ if (!skb) { ++ kfree_rtskb(oldskb); ++ break; ++ } ++ ++ if (!e1000_check_64k_bound(adapter, skb->data, bufsz)) { ++ /* give up */ ++ kfree_rtskb(skb); ++ kfree_rtskb(oldskb); ++ break; /* while !buffer_info->skb */ ++ } else { ++ /* Use new allocation */ ++ kfree_rtskb(oldskb); ++ } ++ } ++ /* Make buffer alignment 2 beyond a 16 byte boundary ++ * this will result in a 16 byte aligned IP header after ++ * the 14 byte MAC header is removed ++ */ ++ rtskb_reserve(skb, NET_IP_ALIGN); ++ ++ buffer_info->skb = skb; ++ buffer_info->length = adapter->rx_buffer_len; ++map_skb: ++ buffer_info->dma = pci_map_single(pdev, ++ skb->data, ++ adapter->rx_buffer_len, ++ PCI_DMA_FROMDEVICE); ++ ++ /* Fix for errata 23, can't cross 64kB boundary */ ++ if (!e1000_check_64k_bound(adapter, ++ (void *)(unsigned long)buffer_info->dma, ++ adapter->rx_buffer_len)) { ++ DPRINTK(RX_ERR, ERR, ++ "dma align check failed: %u bytes at %p\n", ++ adapter->rx_buffer_len, ++ (void *)(unsigned long)buffer_info->dma); ++ kfree_rtskb(skb); ++ buffer_info->skb = NULL; ++ ++ pci_unmap_single(pdev, buffer_info->dma, ++ adapter->rx_buffer_len, ++ PCI_DMA_FROMDEVICE); ++ ++ break; /* while !buffer_info->skb */ ++ } ++ rx_desc = E1000_RX_DESC(*rx_ring, i); ++ rx_desc->buffer_addr = cpu_to_le64(buffer_info->dma); ++ ++ if (unlikely(++i == rx_ring->count)) ++ i = 0; ++ buffer_info = &rx_ring->buffer_info[i]; ++ } ++ ++ if (likely(rx_ring->next_to_use != i)) { ++ rx_ring->next_to_use = i; ++ if (unlikely(i-- == 0)) ++ i = (rx_ring->count - 1); ++ ++ /* Force memory writes to complete before letting h/w ++ * know there are new descriptors to fetch. (Only ++ * applicable for weak-ordered memory model archs, ++ * such as IA-64). */ ++ wmb(); ++ writel(i, adapter->hw.hw_addr + rx_ring->rdt); ++ } ++} ++ ++ ++/** ++ * e1000_smartspeed - Workaround for SmartSpeed on 82541 and 82547 controllers. ++ * @adapter: ++ **/ ++ ++static void ++e1000_smartspeed(struct e1000_adapter *adapter) ++{ ++ uint16_t phy_status; ++ uint16_t phy_ctrl; ++ ++ if ((adapter->hw.phy_type != e1000_phy_igp) || !adapter->hw.autoneg || ++ !(adapter->hw.autoneg_advertised & ADVERTISE_1000_FULL)) ++ return; ++ ++ if (adapter->smartspeed == 0) { ++ /* If Master/Slave config fault is asserted twice, ++ * we assume back-to-back */ ++ e1000_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_status); ++ if (!(phy_status & SR_1000T_MS_CONFIG_FAULT)) return; ++ e1000_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_status); ++ if (!(phy_status & SR_1000T_MS_CONFIG_FAULT)) return; ++ e1000_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_ctrl); ++ if (phy_ctrl & CR_1000T_MS_ENABLE) { ++ phy_ctrl &= ~CR_1000T_MS_ENABLE; ++ e1000_write_phy_reg(&adapter->hw, PHY_1000T_CTRL, ++ phy_ctrl); ++ adapter->smartspeed++; ++ if (!e1000_phy_setup_autoneg(&adapter->hw) && ++ !e1000_read_phy_reg(&adapter->hw, PHY_CTRL, ++ &phy_ctrl)) { ++ phy_ctrl |= (MII_CR_AUTO_NEG_EN | ++ MII_CR_RESTART_AUTO_NEG); ++ e1000_write_phy_reg(&adapter->hw, PHY_CTRL, ++ phy_ctrl); ++ } ++ } ++ return; ++ } else if (adapter->smartspeed == E1000_SMARTSPEED_DOWNSHIFT) { ++ /* If still no link, perhaps using 2/3 pair cable */ ++ e1000_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_ctrl); ++ phy_ctrl |= CR_1000T_MS_ENABLE; ++ e1000_write_phy_reg(&adapter->hw, PHY_1000T_CTRL, phy_ctrl); ++ if (!e1000_phy_setup_autoneg(&adapter->hw) && ++ !e1000_read_phy_reg(&adapter->hw, PHY_CTRL, &phy_ctrl)) { ++ phy_ctrl |= (MII_CR_AUTO_NEG_EN | ++ MII_CR_RESTART_AUTO_NEG); ++ e1000_write_phy_reg(&adapter->hw, PHY_CTRL, phy_ctrl); ++ } ++ } ++ /* Restart process after E1000_SMARTSPEED_MAX iterations */ ++ if (adapter->smartspeed++ == E1000_SMARTSPEED_MAX) ++ adapter->smartspeed = 0; ++} ++ ++ ++ ++void ++e1000_pci_set_mwi(struct e1000_hw *hw) ++{ ++ struct e1000_adapter *adapter = hw->back; ++#ifdef HAVE_PCI_SET_MWI ++ int ret_val = pci_set_mwi(adapter->pdev); ++ ++ if (ret_val) ++ DPRINTK(PROBE, ERR, "Error in setting MWI\n"); ++#else ++ pci_write_config_word(adapter->pdev, PCI_COMMAND, ++ adapter->hw.pci_cmd_word | ++ PCI_COMMAND_INVALIDATE); ++#endif ++} ++ ++void ++e1000_pci_clear_mwi(struct e1000_hw *hw) ++{ ++ struct e1000_adapter *adapter = hw->back; ++ ++#ifdef HAVE_PCI_SET_MWI ++ pci_clear_mwi(adapter->pdev); ++#else ++ pci_write_config_word(adapter->pdev, PCI_COMMAND, ++ adapter->hw.pci_cmd_word & ++ ~PCI_COMMAND_INVALIDATE); ++#endif ++} ++ ++void ++e1000_read_pci_cfg(struct e1000_hw *hw, uint32_t reg, uint16_t *value) ++{ ++ struct e1000_adapter *adapter = hw->back; ++ ++ pci_read_config_word(adapter->pdev, reg, value); ++} ++ ++void ++e1000_write_pci_cfg(struct e1000_hw *hw, uint32_t reg, uint16_t *value) ++{ ++ struct e1000_adapter *adapter = hw->back; ++ ++ pci_write_config_word(adapter->pdev, reg, *value); ++} ++ ++uint32_t ++e1000_io_read(struct e1000_hw *hw, unsigned long port) ++{ ++ return inl(port); ++} ++ ++void ++e1000_io_write(struct e1000_hw *hw, unsigned long port, uint32_t value) ++{ ++ outl(value, port); ++} ++ ++ ++int ++e1000_set_spd_dplx(struct e1000_adapter *adapter, uint16_t spddplx) ++{ ++ adapter->hw.autoneg = 0; ++ ++ /* Fiber NICs only allow 1000 gbps Full duplex */ ++ if ((adapter->hw.media_type == e1000_media_type_fiber) && ++ spddplx != (SPEED_1000 + DUPLEX_FULL)) { ++ DPRINTK(PROBE, ERR, "Unsupported Speed/Duplex configuration\n"); ++ return -EINVAL; ++ } ++ ++ switch (spddplx) { ++ case SPEED_10 + DUPLEX_HALF: ++ adapter->hw.forced_speed_duplex = e1000_10_half; ++ break; ++ case SPEED_10 + DUPLEX_FULL: ++ adapter->hw.forced_speed_duplex = e1000_10_full; ++ break; ++ case SPEED_100 + DUPLEX_HALF: ++ adapter->hw.forced_speed_duplex = e1000_100_half; ++ break; ++ case SPEED_100 + DUPLEX_FULL: ++ adapter->hw.forced_speed_duplex = e1000_100_full; ++ break; ++ case SPEED_1000 + DUPLEX_FULL: ++ adapter->hw.autoneg = 1; ++ adapter->hw.autoneg_advertised = ADVERTISE_1000_FULL; ++ break; ++ case SPEED_1000 + DUPLEX_HALF: /* not supported */ ++ default: ++ DPRINTK(PROBE, ERR, "Unsupported Speed/Duplex configuration\n"); ++ return -EINVAL; ++ } ++ return 0; ++} +--- linux/drivers/xenomai/net/drivers/e1000/e1000.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/e1000/e1000.h 2021-04-07 16:01:27.437633866 +0800 +@@ -0,0 +1,391 @@ ++/******************************************************************************* ++ ++ ++ Copyright(c) 1999 - 2006 Intel Corporation. All rights reserved. ++ ++ This program is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by the Free ++ Software Foundation; either version 2 of the License, or (at your option) ++ any later version. ++ ++ This program is distributed in the hope that it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ You should have received a copy of the GNU General Public License along with ++ this program; if not, write to the Free Software Foundation, Inc., 59 ++ Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ ++ The full GNU General Public License is included in this distribution in the ++ file called LICENSE. ++ ++ Contact Information: ++ Linux NICS ++ e1000-devel Mailing List ++ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 ++ ++*******************************************************************************/ ++ ++ ++/* Linux PRO/1000 Ethernet Driver main header file */ ++ ++#ifndef _E1000_H_ ++#define _E1000_H_ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#ifdef NETIF_F_ISO ++#undef NETIF_F_ISO ++#endif ++ ++#ifdef NETIF_F_TSO ++#include ++#endif ++#ifdef SIOCGMIIPHY ++#include ++#endif ++#ifdef SIOCETHTOOL ++#include ++#endif ++ ++#ifdef NETIF_F_HW_VLAN_TX ++#undef NETIF_F_HW_VLAN_TX ++#endif ++ ++#ifdef NETIF_F_HW_VLAN_TX ++#include ++#endif ++ ++// RTNET ++#include ++ ++ ++#define BAR_0 0 ++#define BAR_1 1 ++#define BAR_5 5 ++ ++#include "kcompat.h" ++#define INTEL_E1000_ETHERNET_DEVICE(device_id) {\ ++ PCI_DEVICE(PCI_VENDOR_ID_INTEL, device_id)} ++ ++struct e1000_adapter; ++ ++#include "e1000_hw.h" ++ ++#ifdef DBG ++#define E1000_DBG(args...) printk(KERN_DEBUG "e1000: " args) ++#else ++#define E1000_DBG(args...) ++#endif ++ ++#define E1000_ERR(args...) printk(KERN_ERR "e1000: " args) ++ ++#define PFX "e1000: " ++#define DPRINTK(nlevel, klevel, fmt, args...) \ ++ (void)((NETIF_MSG_##nlevel & adapter->msg_enable) && \ ++ printk(KERN_##klevel PFX "%s: %s: " fmt, adapter->netdev->name, \ ++ __FUNCTION__ , ## args)) ++ ++#define E1000_MAX_INTR 10 ++ ++/* TX/RX descriptor defines */ ++#define E1000_DEFAULT_TXD 256 ++#define E1000_MAX_TXD 256 ++#define E1000_MIN_TXD 80 ++#define E1000_MAX_82544_TXD 4096 ++ ++#define E1000_DEFAULT_RXD 256 ++#define E1000_MAX_RXD 256 ++#define E1000_MIN_RXD 80 ++#define E1000_MAX_82544_RXD 4096 ++ ++/* Supported Rx Buffer Sizes */ ++#define E1000_RXBUFFER_128 128 /* Used for packet split */ ++#define E1000_RXBUFFER_256 256 /* Used for packet split */ ++#define E1000_RXBUFFER_512 512 ++#define E1000_RXBUFFER_1024 1024 ++#define E1000_RXBUFFER_2048 2048 ++#define E1000_RXBUFFER_4096 4096 ++#define E1000_RXBUFFER_8192 8192 ++#define E1000_RXBUFFER_16384 16384 ++ ++/* SmartSpeed delimiters */ ++#define E1000_SMARTSPEED_DOWNSHIFT 3 ++#define E1000_SMARTSPEED_MAX 15 ++ ++/* Packet Buffer allocations */ ++#define E1000_PBA_BYTES_SHIFT 0xA ++#define E1000_TX_HEAD_ADDR_SHIFT 7 ++#define E1000_PBA_TX_MASK 0xFFFF0000 ++ ++/* Flow Control Watermarks */ ++#define E1000_FC_HIGH_DIFF 0x1638 /* High: 5688 bytes below Rx FIFO size */ ++#define E1000_FC_LOW_DIFF 0x1640 /* Low: 5696 bytes below Rx FIFO size */ ++ ++#define E1000_FC_PAUSE_TIME 0x0680 /* 858 usec */ ++ ++/* How many Tx Descriptors do we need to call netif_wake_queue ? */ ++#define E1000_TX_QUEUE_WAKE 16 ++/* How many Rx Buffers do we bundle into one write to the hardware ? */ ++#define E1000_RX_BUFFER_WRITE 16 /* Must be power of 2 */ ++ ++#define AUTO_ALL_MODES 0 ++#define E1000_EEPROM_82544_APM 0x0004 ++#define E1000_EEPROM_ICH8_APME 0x0004 ++#define E1000_EEPROM_APME 0x0400 ++ ++#ifndef E1000_MASTER_SLAVE ++/* Switch to override PHY master/slave setting */ ++#define E1000_MASTER_SLAVE e1000_ms_hw_default ++#endif ++ ++#ifdef NETIF_F_HW_VLAN_TX ++#define E1000_MNG_VLAN_NONE -1 ++#endif ++/* Number of packet split data buffers (not including the header buffer) */ ++#define PS_PAGE_BUFFERS MAX_PS_BUFFERS-1 ++ ++/* only works for sizes that are powers of 2 */ ++#define E1000_ROUNDUP(i, size) ((i) = (((i) + (size) - 1) & ~((size) - 1))) ++ ++/* wrapper around a pointer to a socket buffer, ++ * so a DMA handle can be stored along with the buffer */ ++struct e1000_buffer { ++ struct rtskb *skb; ++ dma_addr_t dma; ++ unsigned long time_stamp; ++ uint16_t length; ++ uint16_t next_to_watch; ++}; ++ ++ ++struct e1000_ps_page { struct page *ps_page[PS_PAGE_BUFFERS]; }; ++struct e1000_ps_page_dma { uint64_t ps_page_dma[PS_PAGE_BUFFERS]; }; ++ ++struct e1000_tx_ring { ++ /* pointer to the descriptor ring memory */ ++ void *desc; ++ /* physical address of the descriptor ring */ ++ dma_addr_t dma; ++ /* length of descriptor ring in bytes */ ++ unsigned int size; ++ /* number of descriptors in the ring */ ++ unsigned int count; ++ /* next descriptor to associate a buffer with */ ++ unsigned int next_to_use; ++ /* next descriptor to check for DD status bit */ ++ unsigned int next_to_clean; ++ /* array of buffer information structs */ ++ struct e1000_buffer *buffer_info; ++ ++ rtdm_lock_t tx_lock; ++ uint16_t tdh; ++ uint16_t tdt; ++ boolean_t last_tx_tso; ++}; ++ ++struct e1000_rx_ring { ++ /* pointer to the descriptor ring memory */ ++ void *desc; ++ /* physical address of the descriptor ring */ ++ dma_addr_t dma; ++ /* length of descriptor ring in bytes */ ++ unsigned int size; ++ /* number of descriptors in the ring */ ++ unsigned int count; ++ /* next descriptor to associate a buffer with */ ++ unsigned int next_to_use; ++ /* next descriptor to check for DD status bit */ ++ unsigned int next_to_clean; ++ /* array of buffer information structs */ ++ struct e1000_buffer *buffer_info; ++ /* arrays of page information for packet split */ ++ struct e1000_ps_page *ps_page; ++ struct e1000_ps_page_dma *ps_page_dma; ++ ++ /* cpu for rx queue */ ++ int cpu; ++ ++ uint16_t rdh; ++ uint16_t rdt; ++}; ++ ++#define E1000_DESC_UNUSED(R) \ ++ ((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \ ++ (R)->next_to_clean - (R)->next_to_use - 1) ++ ++#define E1000_RX_DESC_PS(R, i) \ ++ (&(((union e1000_rx_desc_packet_split *)((R).desc))[i])) ++#define E1000_RX_DESC_EXT(R, i) \ ++ (&(((union e1000_rx_desc_extended *)((R).desc))[i])) ++#define E1000_GET_DESC(R, i, type) (&(((struct type *)((R).desc))[i])) ++#define E1000_RX_DESC(R, i) E1000_GET_DESC(R, i, e1000_rx_desc) ++#define E1000_TX_DESC(R, i) E1000_GET_DESC(R, i, e1000_tx_desc) ++#define E1000_CONTEXT_DESC(R, i) E1000_GET_DESC(R, i, e1000_context_desc) ++ ++/* board specific private data structure */ ++ ++struct e1000_adapter { ++#ifdef NETIF_F_HW_VLAN_TX ++ struct vlan_group *vlgrp; ++ uint16_t mng_vlan_id; ++#endif ++ uint32_t bd_number; ++ uint32_t rx_buffer_len; ++ uint32_t part_num; ++ uint32_t wol; ++ uint32_t ksp3_port_a; ++ uint32_t smartspeed; ++ uint32_t en_mng_pt; ++ uint16_t link_speed; ++ uint16_t link_duplex; ++#ifdef CONFIG_E1000_NAPI ++ spinlock_t tx_queue_lock; ++#endif ++ atomic_t irq_sem; ++ struct work_struct reset_task; ++ uint8_t fc_autoneg; ++ ++#ifdef ETHTOOL_PHYS_ID ++ struct timer_list blink_timer; ++ unsigned long led_status; ++#endif ++ ++ /* TX */ ++ struct e1000_tx_ring *tx_ring; /* One per active queue */ ++ unsigned long tx_queue_len; ++ uint32_t txd_cmd; ++ uint32_t tx_int_delay; ++ uint32_t tx_abs_int_delay; ++ uint32_t gotcl; ++ uint64_t gotcl_old; ++ uint64_t tpt_old; ++ uint64_t colc_old; ++ uint32_t tx_timeout_count; ++ uint32_t tx_fifo_head; ++ uint32_t tx_head_addr; ++ uint32_t tx_fifo_size; ++ uint8_t tx_timeout_factor; ++ atomic_t tx_fifo_stall; ++ boolean_t pcix_82544; ++ boolean_t detect_tx_hung; ++ ++ /* RX */ ++#ifdef CONFIG_E1000_NAPI ++ boolean_t (*clean_rx) (struct e1000_adapter *adapter, ++ struct e1000_rx_ring *rx_ring, ++ int *work_done, int work_to_do); ++#else ++ boolean_t (*clean_rx) (struct e1000_adapter *adapter, ++ struct e1000_rx_ring *rx_ring); ++#endif ++ void (*alloc_rx_buf) (struct e1000_adapter *adapter, ++ struct e1000_rx_ring *rx_ring, ++ int cleaned_count); ++ struct e1000_rx_ring *rx_ring; /* One per active queue */ ++#ifdef CONFIG_E1000_NAPI ++ struct net_device *polling_netdev; /* One per active queue */ ++#endif ++ int num_tx_queues; ++ int num_rx_queues; ++ ++ uint64_t hw_csum_err; ++ uint64_t hw_csum_good; ++ uint64_t rx_hdr_split; ++ uint32_t alloc_rx_buff_failed; ++ uint32_t rx_int_delay; ++ uint32_t rx_abs_int_delay; ++ boolean_t rx_csum; ++ unsigned int rx_ps_pages; ++ uint32_t gorcl; ++ uint64_t gorcl_old; ++ uint16_t rx_ps_bsize0; ++ ++ /* Interrupt Throttle Rate */ ++ uint32_t itr; ++ ++ /* OS defined structs */ ++ struct rtnet_device *netdev; ++ struct pci_dev *pdev; ++ struct net_device_stats net_stats; ++ ++ rtdm_irq_t irq_handle; ++ boolean_t data_received; ++ ++ /* structs defined in e1000_hw.h */ ++ struct e1000_hw hw; ++ struct e1000_hw_stats stats; ++ struct e1000_phy_info phy_info; ++ struct e1000_phy_stats phy_stats; ++ ++#ifdef ETHTOOL_TEST ++ uint32_t test_icr; ++ struct e1000_tx_ring test_tx_ring; ++ struct e1000_rx_ring test_rx_ring; ++#endif ++ ++#ifdef E1000_COUNT_ICR ++ uint64_t icr_txdw; ++ uint64_t icr_txqe; ++ uint64_t icr_lsc; ++ uint64_t icr_rxseq; ++ uint64_t icr_rxdmt; ++ uint64_t icr_rxo; ++ uint64_t icr_rxt; ++ uint64_t icr_mdac; ++ uint64_t icr_rxcfg; ++ uint64_t icr_gpi; ++#endif ++ ++ uint32_t *config_space; ++ int msg_enable; ++#ifdef CONFIG_PCI_MSI ++ boolean_t have_msi; ++#endif ++ /* to not mess up cache alignment, always add to the bottom */ ++#ifdef NETIF_F_TSO ++ boolean_t tso_force; ++#endif ++ boolean_t smart_power_down; /* phy smart power down */ ++ unsigned long flags; ++ ++ struct delayed_work watchdog_task; ++ struct delayed_work fifo_stall_task; ++ struct delayed_work phy_info_task; ++}; ++ ++enum e1000_state_t { ++ __E1000_DRIVER_TESTING, ++ __E1000_RESETTING, ++}; ++#endif /* _E1000_H_ */ +--- linux/drivers/xenomai/net/drivers/e1000/e1000_hw.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/e1000/e1000_hw.h 2021-04-07 16:01:27.433633872 +0800 +@@ -0,0 +1,3454 @@ ++/******************************************************************************* ++ ++ ++ Copyright(c) 1999 - 2006 Intel Corporation. All rights reserved. ++ ++ This program is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by the Free ++ Software Foundation; either version 2 of the License, or (at your option) ++ any later version. ++ ++ This program is distributed in the hope that it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ You should have received a copy of the GNU General Public License along with ++ this program; if not, write to the Free Software Foundation, Inc., 59 ++ Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ ++ The full GNU General Public License is included in this distribution in the ++ file called LICENSE. ++ ++ Contact Information: ++ Linux NICS ++ e1000-devel Mailing List ++ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 ++ ++*******************************************************************************/ ++ ++/* e1000_hw.h ++ * Structures, enums, and macros for the MAC ++ */ ++ ++#ifndef _E1000_HW_H_ ++#define _E1000_HW_H_ ++ ++#include "e1000_osdep.h" ++ ++ ++/* Forward declarations of structures used by the shared code */ ++struct e1000_hw; ++struct e1000_hw_stats; ++ ++/* Enumerated types specific to the e1000 hardware */ ++/* Media Access Controlers */ ++typedef enum { ++ e1000_undefined = 0, ++ e1000_82542_rev2_0, ++ e1000_82542_rev2_1, ++ e1000_82543, ++ e1000_82544, ++ e1000_82540, ++ e1000_82545, ++ e1000_82545_rev_3, ++ e1000_82546, ++ e1000_82546_rev_3, ++ e1000_82541, ++ e1000_82541_rev_2, ++ e1000_82547, ++ e1000_82547_rev_2, ++ e1000_82571, ++ e1000_82572, ++ e1000_82573, ++ e1000_80003es2lan, ++ e1000_ich8lan, ++ e1000_num_macs ++} e1000_mac_type; ++ ++typedef enum { ++ e1000_eeprom_uninitialized = 0, ++ e1000_eeprom_spi, ++ e1000_eeprom_microwire, ++ e1000_eeprom_flash, ++ e1000_eeprom_ich8, ++ e1000_eeprom_none, /* No NVM support */ ++ e1000_num_eeprom_types ++} e1000_eeprom_type; ++ ++/* Media Types */ ++typedef enum { ++ e1000_media_type_copper = 0, ++ e1000_media_type_fiber = 1, ++ e1000_media_type_internal_serdes = 2, ++ e1000_num_media_types ++} e1000_media_type; ++ ++typedef enum { ++ e1000_10_half = 0, ++ e1000_10_full = 1, ++ e1000_100_half = 2, ++ e1000_100_full = 3 ++} e1000_speed_duplex_type; ++ ++/* Flow Control Settings */ ++typedef enum { ++ e1000_fc_none = 0, ++ e1000_fc_rx_pause = 1, ++ e1000_fc_tx_pause = 2, ++ e1000_fc_full = 3, ++ e1000_fc_default = 0xFF ++} e1000_fc_type; ++ ++struct e1000_shadow_ram { ++ uint16_t eeprom_word; ++ boolean_t modified; ++}; ++ ++/* PCI bus types */ ++typedef enum { ++ e1000_bus_type_unknown = 0, ++ e1000_bus_type_pci, ++ e1000_bus_type_pcix, ++ e1000_bus_type_pci_express, ++ e1000_bus_type_reserved ++} e1000_bus_type; ++ ++/* PCI bus speeds */ ++typedef enum { ++ e1000_bus_speed_unknown = 0, ++ e1000_bus_speed_33, ++ e1000_bus_speed_66, ++ e1000_bus_speed_100, ++ e1000_bus_speed_120, ++ e1000_bus_speed_133, ++ e1000_bus_speed_2500, ++ e1000_bus_speed_reserved ++} e1000_bus_speed; ++ ++/* PCI bus widths */ ++typedef enum { ++ e1000_bus_width_unknown = 0, ++ e1000_bus_width_32, ++ e1000_bus_width_64, ++ e1000_bus_width_pciex_1, ++ e1000_bus_width_pciex_2, ++ e1000_bus_width_pciex_4, ++ e1000_bus_width_reserved ++} e1000_bus_width; ++ ++/* PHY status info structure and supporting enums */ ++typedef enum { ++ e1000_cable_length_50 = 0, ++ e1000_cable_length_50_80, ++ e1000_cable_length_80_110, ++ e1000_cable_length_110_140, ++ e1000_cable_length_140, ++ e1000_cable_length_undefined = 0xFF ++} e1000_cable_length; ++ ++typedef enum { ++ e1000_gg_cable_length_60 = 0, ++ e1000_gg_cable_length_60_115 = 1, ++ e1000_gg_cable_length_115_150 = 2, ++ e1000_gg_cable_length_150 = 4 ++} e1000_gg_cable_length; ++ ++typedef enum { ++ e1000_igp_cable_length_10 = 10, ++ e1000_igp_cable_length_20 = 20, ++ e1000_igp_cable_length_30 = 30, ++ e1000_igp_cable_length_40 = 40, ++ e1000_igp_cable_length_50 = 50, ++ e1000_igp_cable_length_60 = 60, ++ e1000_igp_cable_length_70 = 70, ++ e1000_igp_cable_length_80 = 80, ++ e1000_igp_cable_length_90 = 90, ++ e1000_igp_cable_length_100 = 100, ++ e1000_igp_cable_length_110 = 110, ++ e1000_igp_cable_length_115 = 115, ++ e1000_igp_cable_length_120 = 120, ++ e1000_igp_cable_length_130 = 130, ++ e1000_igp_cable_length_140 = 140, ++ e1000_igp_cable_length_150 = 150, ++ e1000_igp_cable_length_160 = 160, ++ e1000_igp_cable_length_170 = 170, ++ e1000_igp_cable_length_180 = 180 ++} e1000_igp_cable_length; ++ ++typedef enum { ++ e1000_10bt_ext_dist_enable_normal = 0, ++ e1000_10bt_ext_dist_enable_lower, ++ e1000_10bt_ext_dist_enable_undefined = 0xFF ++} e1000_10bt_ext_dist_enable; ++ ++typedef enum { ++ e1000_rev_polarity_normal = 0, ++ e1000_rev_polarity_reversed, ++ e1000_rev_polarity_undefined = 0xFF ++} e1000_rev_polarity; ++ ++typedef enum { ++ e1000_downshift_normal = 0, ++ e1000_downshift_activated, ++ e1000_downshift_undefined = 0xFF ++} e1000_downshift; ++ ++typedef enum { ++ e1000_smart_speed_default = 0, ++ e1000_smart_speed_on, ++ e1000_smart_speed_off ++} e1000_smart_speed; ++ ++typedef enum { ++ e1000_polarity_reversal_enabled = 0, ++ e1000_polarity_reversal_disabled, ++ e1000_polarity_reversal_undefined = 0xFF ++} e1000_polarity_reversal; ++ ++typedef enum { ++ e1000_auto_x_mode_manual_mdi = 0, ++ e1000_auto_x_mode_manual_mdix, ++ e1000_auto_x_mode_auto1, ++ e1000_auto_x_mode_auto2, ++ e1000_auto_x_mode_undefined = 0xFF ++} e1000_auto_x_mode; ++ ++typedef enum { ++ e1000_1000t_rx_status_not_ok = 0, ++ e1000_1000t_rx_status_ok, ++ e1000_1000t_rx_status_undefined = 0xFF ++} e1000_1000t_rx_status; ++ ++typedef enum { ++ e1000_phy_m88 = 0, ++ e1000_phy_igp, ++ e1000_phy_igp_2, ++ e1000_phy_gg82563, ++ e1000_phy_igp_3, ++ e1000_phy_ife, ++ e1000_phy_undefined = 0xFF ++} e1000_phy_type; ++ ++typedef enum { ++ e1000_ms_hw_default = 0, ++ e1000_ms_force_master, ++ e1000_ms_force_slave, ++ e1000_ms_auto ++} e1000_ms_type; ++ ++typedef enum { ++ e1000_ffe_config_enabled = 0, ++ e1000_ffe_config_active, ++ e1000_ffe_config_blocked ++} e1000_ffe_config; ++ ++typedef enum { ++ e1000_dsp_config_disabled = 0, ++ e1000_dsp_config_enabled, ++ e1000_dsp_config_activated, ++ e1000_dsp_config_undefined = 0xFF ++} e1000_dsp_config; ++ ++struct e1000_phy_info { ++ e1000_cable_length cable_length; ++ e1000_10bt_ext_dist_enable extended_10bt_distance; ++ e1000_rev_polarity cable_polarity; ++ e1000_downshift downshift; ++ e1000_polarity_reversal polarity_correction; ++ e1000_auto_x_mode mdix_mode; ++ e1000_1000t_rx_status local_rx; ++ e1000_1000t_rx_status remote_rx; ++}; ++ ++struct e1000_phy_stats { ++ uint32_t idle_errors; ++ uint32_t receive_errors; ++}; ++ ++struct e1000_eeprom_info { ++ e1000_eeprom_type type; ++ uint16_t word_size; ++ uint16_t opcode_bits; ++ uint16_t address_bits; ++ uint16_t delay_usec; ++ uint16_t page_size; ++ boolean_t use_eerd; ++ boolean_t use_eewr; ++}; ++ ++/* Flex ASF Information */ ++#define E1000_HOST_IF_MAX_SIZE 2048 ++ ++typedef enum { ++ e1000_byte_align = 0, ++ e1000_word_align = 1, ++ e1000_dword_align = 2 ++} e1000_align_type; ++ ++ ++ ++/* Error Codes */ ++#define E1000_SUCCESS 0 ++#define E1000_ERR_EEPROM 1 ++#define E1000_ERR_PHY 2 ++#define E1000_ERR_CONFIG 3 ++#define E1000_ERR_PARAM 4 ++#define E1000_ERR_MAC_TYPE 5 ++#define E1000_ERR_PHY_TYPE 6 ++#define E1000_ERR_RESET 9 ++#define E1000_ERR_MASTER_REQUESTS_PENDING 10 ++#define E1000_ERR_HOST_INTERFACE_COMMAND 11 ++#define E1000_BLK_PHY_RESET 12 ++#define E1000_ERR_SWFW_SYNC 13 ++ ++/* Function prototypes */ ++/* Initialization */ ++int32_t e1000_reset_hw(struct e1000_hw *hw); ++int32_t e1000_init_hw(struct e1000_hw *hw); ++int32_t e1000_id_led_init(struct e1000_hw * hw); ++int32_t e1000_set_mac_type(struct e1000_hw *hw); ++void e1000_set_media_type(struct e1000_hw *hw); ++ ++/* Link Configuration */ ++int32_t e1000_setup_link(struct e1000_hw *hw); ++int32_t e1000_phy_setup_autoneg(struct e1000_hw *hw); ++void e1000_config_collision_dist(struct e1000_hw *hw); ++int32_t e1000_config_fc_after_link_up(struct e1000_hw *hw); ++int32_t e1000_check_for_link(struct e1000_hw *hw); ++int32_t e1000_get_speed_and_duplex(struct e1000_hw *hw, uint16_t * speed, uint16_t * duplex); ++int32_t e1000_wait_autoneg(struct e1000_hw *hw); ++int32_t e1000_force_mac_fc(struct e1000_hw *hw); ++ ++/* PHY */ ++int32_t e1000_read_phy_reg(struct e1000_hw *hw, uint32_t reg_addr, uint16_t *phy_data); ++int32_t e1000_write_phy_reg(struct e1000_hw *hw, uint32_t reg_addr, uint16_t data); ++int32_t e1000_phy_hw_reset(struct e1000_hw *hw); ++int32_t e1000_phy_reset(struct e1000_hw *hw); ++void e1000_phy_powerdown_workaround(struct e1000_hw *hw); ++int32_t e1000_kumeran_lock_loss_workaround(struct e1000_hw *hw); ++int32_t e1000_duplex_reversal(struct e1000_hw *hw); ++int32_t e1000_init_lcd_from_nvm_config_region(struct e1000_hw *hw, uint32_t cnf_base_addr, uint32_t cnf_size); ++int32_t e1000_init_lcd_from_nvm(struct e1000_hw *hw); ++int32_t e1000_detect_gig_phy(struct e1000_hw *hw); ++int32_t e1000_phy_get_info(struct e1000_hw *hw, struct e1000_phy_info *phy_info); ++int32_t e1000_phy_m88_get_info(struct e1000_hw *hw, struct e1000_phy_info *phy_info); ++int32_t e1000_phy_igp_get_info(struct e1000_hw *hw, struct e1000_phy_info *phy_info); ++int32_t e1000_get_cable_length(struct e1000_hw *hw, uint16_t *min_length, uint16_t *max_length); ++int32_t e1000_check_polarity(struct e1000_hw *hw, uint16_t *polarity); ++int32_t e1000_check_downshift(struct e1000_hw *hw); ++int32_t e1000_validate_mdi_setting(struct e1000_hw *hw); ++int32_t e1000_read_kmrn_reg(struct e1000_hw *hw, uint32_t reg_addr, uint16_t *data); ++int32_t e1000_write_kmrn_reg(struct e1000_hw *hw, uint32_t reg_addr, uint16_t data); ++ ++/* EEPROM Functions */ ++int32_t e1000_init_eeprom_params(struct e1000_hw *hw); ++boolean_t e1000_is_onboard_nvm_eeprom(struct e1000_hw *hw); ++int32_t e1000_read_eeprom_eerd(struct e1000_hw *hw, uint16_t offset, uint16_t words, uint16_t *data); ++int32_t e1000_write_eeprom_eewr(struct e1000_hw *hw, uint16_t offset, uint16_t words, uint16_t *data); ++int32_t e1000_poll_eerd_eewr_done(struct e1000_hw *hw, int eerd); ++ ++/* MNG HOST IF functions */ ++uint32_t e1000_enable_mng_pass_thru(struct e1000_hw *hw); ++ ++#define E1000_MNG_DHCP_TX_PAYLOAD_CMD 64 ++#define E1000_HI_MAX_MNG_DATA_LENGTH 0x6F8 /* Host Interface data length */ ++ ++#define E1000_MNG_DHCP_COMMAND_TIMEOUT 10 /* Time in ms to process MNG command */ ++#define E1000_MNG_DHCP_COOKIE_OFFSET 0x6F0 /* Cookie offset */ ++#define E1000_MNG_DHCP_COOKIE_LENGTH 0x10 /* Cookie length */ ++#define E1000_MNG_IAMT_MODE 0x3 ++#define E1000_MNG_ICH_IAMT_MODE 0x2 ++#define E1000_IAMT_SIGNATURE 0x544D4149 /* Intel(R) Active Management Technology signature */ ++ ++#define E1000_MNG_DHCP_COOKIE_STATUS_PARSING_SUPPORT 0x1 /* DHCP parsing enabled */ ++#define E1000_MNG_DHCP_COOKIE_STATUS_VLAN_SUPPORT 0x2 /* DHCP parsing enabled */ ++#define E1000_VFTA_ENTRY_SHIFT 0x5 ++#define E1000_VFTA_ENTRY_MASK 0x7F ++#define E1000_VFTA_ENTRY_BIT_SHIFT_MASK 0x1F ++ ++struct e1000_host_mng_command_header { ++ uint8_t command_id; ++ uint8_t checksum; ++ uint16_t reserved1; ++ uint16_t reserved2; ++ uint16_t command_length; ++}; ++ ++struct e1000_host_mng_command_info { ++ struct e1000_host_mng_command_header command_header; /* Command Head/Command Result Head has 4 bytes */ ++ uint8_t command_data[E1000_HI_MAX_MNG_DATA_LENGTH]; /* Command data can length 0..0x658*/ ++}; ++#ifdef E1000_BIG_ENDIAN ++struct e1000_host_mng_dhcp_cookie{ ++ uint32_t signature; ++ uint16_t vlan_id; ++ uint8_t reserved0; ++ uint8_t status; ++ uint32_t reserved1; ++ uint8_t checksum; ++ uint8_t reserved3; ++ uint16_t reserved2; ++}; ++#else ++struct e1000_host_mng_dhcp_cookie{ ++ uint32_t signature; ++ uint8_t status; ++ uint8_t reserved0; ++ uint16_t vlan_id; ++ uint32_t reserved1; ++ uint16_t reserved2; ++ uint8_t reserved3; ++ uint8_t checksum; ++}; ++#endif ++ ++int32_t e1000_mng_write_dhcp_info(struct e1000_hw *hw, uint8_t *buffer, ++ uint16_t length); ++boolean_t e1000_check_mng_mode(struct e1000_hw *hw); ++boolean_t e1000_enable_tx_pkt_filtering(struct e1000_hw *hw); ++int32_t e1000_mng_enable_host_if(struct e1000_hw *hw); ++int32_t e1000_mng_host_if_write(struct e1000_hw *hw, uint8_t *buffer, ++ uint16_t length, uint16_t offset, uint8_t *sum); ++int32_t e1000_mng_write_cmd_header(struct e1000_hw* hw, ++ struct e1000_host_mng_command_header* hdr); ++ ++int32_t e1000_mng_write_commit(struct e1000_hw *hw); ++ ++int32_t e1000_read_eeprom(struct e1000_hw *hw, uint16_t reg, uint16_t words, uint16_t *data); ++int32_t e1000_validate_eeprom_checksum(struct e1000_hw *hw); ++int32_t e1000_update_eeprom_checksum(struct e1000_hw *hw); ++int32_t e1000_write_eeprom(struct e1000_hw *hw, uint16_t reg, uint16_t words, uint16_t *data); ++int32_t e1000_read_part_num(struct e1000_hw *hw, uint32_t * part_num); ++int32_t e1000_read_mac_addr(struct e1000_hw * hw); ++int32_t e1000_swfw_sync_acquire(struct e1000_hw *hw, uint16_t mask); ++void e1000_swfw_sync_release(struct e1000_hw *hw, uint16_t mask); ++void e1000_release_software_flag(struct e1000_hw *hw); ++int32_t e1000_get_software_flag(struct e1000_hw *hw); ++ ++/* Filters (multicast, vlan, receive) */ ++void e1000_init_rx_addrs(struct e1000_hw *hw); ++void e1000_mc_addr_list_update(struct e1000_hw *hw, uint8_t * mc_addr_list, uint32_t mc_addr_count, uint32_t pad, uint32_t rar_used_count); ++uint32_t e1000_hash_mc_addr(struct e1000_hw *hw, uint8_t * mc_addr); ++void e1000_mta_set(struct e1000_hw *hw, uint32_t hash_value); ++void e1000_rar_set(struct e1000_hw *hw, uint8_t * mc_addr, uint32_t rar_index); ++void e1000_write_vfta(struct e1000_hw *hw, uint32_t offset, uint32_t value); ++void e1000_clear_vfta(struct e1000_hw *hw); ++ ++/* LED functions */ ++int32_t e1000_setup_led(struct e1000_hw *hw); ++int32_t e1000_cleanup_led(struct e1000_hw *hw); ++int32_t e1000_led_on(struct e1000_hw *hw); ++int32_t e1000_led_off(struct e1000_hw *hw); ++int32_t e1000_blink_led_start(struct e1000_hw *hw); ++ ++/* Adaptive IFS Functions */ ++ ++/* Everything else */ ++void e1000_clear_hw_cntrs(struct e1000_hw *hw); ++void e1000_reset_adaptive(struct e1000_hw *hw); ++void e1000_update_adaptive(struct e1000_hw *hw); ++void e1000_tbi_adjust_stats(struct e1000_hw *hw, struct e1000_hw_stats *stats, uint32_t frame_len, uint8_t * mac_addr); ++void e1000_get_bus_info(struct e1000_hw *hw); ++void e1000_pci_set_mwi(struct e1000_hw *hw); ++void e1000_pci_clear_mwi(struct e1000_hw *hw); ++void e1000_read_pci_cfg(struct e1000_hw *hw, uint32_t reg, uint16_t * value); ++void e1000_write_pci_cfg(struct e1000_hw *hw, uint32_t reg, uint16_t * value); ++/* Port I/O is only supported on 82544 and newer */ ++uint32_t e1000_io_read(struct e1000_hw *hw, unsigned long port); ++uint32_t e1000_read_reg_io(struct e1000_hw *hw, uint32_t offset); ++void e1000_io_write(struct e1000_hw *hw, unsigned long port, uint32_t value); ++void e1000_write_reg_io(struct e1000_hw *hw, uint32_t offset, uint32_t value); ++int32_t e1000_config_dsp_after_link_change(struct e1000_hw *hw, boolean_t link_up); ++int32_t e1000_set_d3_lplu_state(struct e1000_hw *hw, boolean_t active); ++int32_t e1000_set_d0_lplu_state(struct e1000_hw *hw, boolean_t active); ++void e1000_set_pci_express_master_disable(struct e1000_hw *hw); ++void e1000_enable_pciex_master(struct e1000_hw *hw); ++int32_t e1000_disable_pciex_master(struct e1000_hw *hw); ++int32_t e1000_get_auto_rd_done(struct e1000_hw *hw); ++int32_t e1000_get_phy_cfg_done(struct e1000_hw *hw); ++int32_t e1000_get_software_semaphore(struct e1000_hw *hw); ++void e1000_release_software_semaphore(struct e1000_hw *hw); ++int32_t e1000_check_phy_reset_block(struct e1000_hw *hw); ++int32_t e1000_get_hw_eeprom_semaphore(struct e1000_hw *hw); ++void e1000_put_hw_eeprom_semaphore(struct e1000_hw *hw); ++int32_t e1000_commit_shadow_ram(struct e1000_hw *hw); ++uint8_t e1000_arc_subsystem_valid(struct e1000_hw *hw); ++int32_t e1000_set_pci_ex_no_snoop(struct e1000_hw *hw, uint32_t no_snoop); ++ ++int32_t e1000_read_ich8_byte(struct e1000_hw *hw, uint32_t index, ++ uint8_t *data); ++int32_t e1000_verify_write_ich8_byte(struct e1000_hw *hw, uint32_t index, ++ uint8_t byte); ++int32_t e1000_write_ich8_byte(struct e1000_hw *hw, uint32_t index, ++ uint8_t byte); ++int32_t e1000_read_ich8_word(struct e1000_hw *hw, uint32_t index, ++ uint16_t *data); ++int32_t e1000_write_ich8_word(struct e1000_hw *hw, uint32_t index, ++ uint16_t word); ++int32_t e1000_read_ich8_data(struct e1000_hw *hw, uint32_t index, ++ uint32_t size, uint16_t *data); ++int32_t e1000_write_ich8_data(struct e1000_hw *hw, uint32_t index, ++ uint32_t size, uint16_t data); ++int32_t e1000_read_eeprom_ich8(struct e1000_hw *hw, uint16_t offset, ++ uint16_t words, uint16_t *data); ++int32_t e1000_write_eeprom_ich8(struct e1000_hw *hw, uint16_t offset, ++ uint16_t words, uint16_t *data); ++int32_t e1000_erase_ich8_4k_segment(struct e1000_hw *hw, uint32_t segment); ++int32_t e1000_ich8_cycle_init(struct e1000_hw *hw); ++int32_t e1000_ich8_flash_cycle(struct e1000_hw *hw, uint32_t timeout); ++int32_t e1000_phy_ife_get_info(struct e1000_hw *hw, ++ struct e1000_phy_info *phy_info); ++int32_t e1000_ife_disable_dynamic_power_down(struct e1000_hw *hw); ++int32_t e1000_ife_enable_dynamic_power_down(struct e1000_hw *hw); ++ ++#define E1000_READ_REG_IO(a, reg) \ ++ e1000_read_reg_io((a), E1000_##reg) ++#define E1000_WRITE_REG_IO(a, reg, val) \ ++ e1000_write_reg_io((a), E1000_##reg, val) ++ ++/* PCI Device IDs */ ++#define E1000_DEV_ID_82542 0x1000 ++#define E1000_DEV_ID_82543GC_FIBER 0x1001 ++#define E1000_DEV_ID_82543GC_COPPER 0x1004 ++#define E1000_DEV_ID_82544EI_COPPER 0x1008 ++#define E1000_DEV_ID_82544EI_FIBER 0x1009 ++#define E1000_DEV_ID_82544GC_COPPER 0x100C ++#define E1000_DEV_ID_82544GC_LOM 0x100D ++#define E1000_DEV_ID_82540EM 0x100E ++#define E1000_DEV_ID_82540EM_LOM 0x1015 ++#define E1000_DEV_ID_82540EP_LOM 0x1016 ++#define E1000_DEV_ID_82540EP 0x1017 ++#define E1000_DEV_ID_82540EP_LP 0x101E ++#define E1000_DEV_ID_82545EM_COPPER 0x100F ++#define E1000_DEV_ID_82545EM_FIBER 0x1011 ++#define E1000_DEV_ID_82545GM_COPPER 0x1026 ++#define E1000_DEV_ID_82545GM_FIBER 0x1027 ++#define E1000_DEV_ID_82545GM_SERDES 0x1028 ++#define E1000_DEV_ID_82546EB_COPPER 0x1010 ++#define E1000_DEV_ID_82546EB_FIBER 0x1012 ++#define E1000_DEV_ID_82546EB_QUAD_COPPER 0x101D ++#define E1000_DEV_ID_82541EI 0x1013 ++#define E1000_DEV_ID_82541EI_MOBILE 0x1018 ++#define E1000_DEV_ID_82541ER_LOM 0x1014 ++#define E1000_DEV_ID_82541ER 0x1078 ++#define E1000_DEV_ID_82547GI 0x1075 ++#define E1000_DEV_ID_82541GI 0x1076 ++#define E1000_DEV_ID_82541GI_MOBILE 0x1077 ++#define E1000_DEV_ID_82541GI_LF 0x107C ++#define E1000_DEV_ID_82546GB_COPPER 0x1079 ++#define E1000_DEV_ID_82546GB_FIBER 0x107A ++#define E1000_DEV_ID_82546GB_SERDES 0x107B ++#define E1000_DEV_ID_82546GB_PCIE 0x108A ++#define E1000_DEV_ID_82546GB_QUAD_COPPER 0x1099 ++#define E1000_DEV_ID_82547EI 0x1019 ++#define E1000_DEV_ID_82547EI_MOBILE 0x101A ++#define E1000_DEV_ID_82571EB_COPPER 0x105E ++#define E1000_DEV_ID_82571EB_FIBER 0x105F ++#define E1000_DEV_ID_82571EB_SERDES 0x1060 ++#define E1000_DEV_ID_82571EB_QUAD_COPPER 0x10A4 ++#define E1000_DEV_ID_82571EB_QUAD_COPPER_LOWPROFILE 0x10BC ++#define E1000_DEV_ID_82572EI_COPPER 0x107D ++#define E1000_DEV_ID_82572EI_FIBER 0x107E ++#define E1000_DEV_ID_82572EI_SERDES 0x107F ++#define E1000_DEV_ID_82572EI 0x10B9 ++#define E1000_DEV_ID_82573E 0x108B ++#define E1000_DEV_ID_82573E_IAMT 0x108C ++#define E1000_DEV_ID_82573L 0x109A ++#define E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3 0x10B5 ++#define E1000_DEV_ID_80003ES2LAN_COPPER_DPT 0x1096 ++#define E1000_DEV_ID_80003ES2LAN_SERDES_DPT 0x1098 ++#define E1000_DEV_ID_80003ES2LAN_COPPER_SPT 0x10BA ++#define E1000_DEV_ID_80003ES2LAN_SERDES_SPT 0x10BB ++ ++#define E1000_DEV_ID_ICH8_IGP_M_AMT 0x1049 ++#define E1000_DEV_ID_ICH8_IGP_AMT 0x104A ++#define E1000_DEV_ID_ICH8_IGP_C 0x104B ++#define E1000_DEV_ID_ICH8_IFE 0x104C ++#define E1000_DEV_ID_ICH8_IFE_GT 0x10C4 ++#define E1000_DEV_ID_ICH8_IFE_G 0x10C5 ++#define E1000_DEV_ID_ICH8_IGP_M 0x104D ++ ++ ++#define NODE_ADDRESS_SIZE 6 ++#define ETH_LENGTH_OF_ADDRESS 6 ++ ++/* MAC decode size is 128K - This is the size of BAR0 */ ++#define MAC_DECODE_SIZE (128 * 1024) ++ ++#define E1000_82542_2_0_REV_ID 2 ++#define E1000_82542_2_1_REV_ID 3 ++#define E1000_REVISION_0 0 ++#define E1000_REVISION_1 1 ++#define E1000_REVISION_2 2 ++#define E1000_REVISION_3 3 ++ ++#define SPEED_10 10 ++#define SPEED_100 100 ++#define SPEED_1000 1000 ++#define HALF_DUPLEX 1 ++#define FULL_DUPLEX 2 ++ ++/* The sizes (in bytes) of a ethernet packet */ ++#define ENET_HEADER_SIZE 14 ++#define MAXIMUM_ETHERNET_FRAME_SIZE 1518 /* With FCS */ ++#define MINIMUM_ETHERNET_FRAME_SIZE 64 /* With FCS */ ++#define ETHERNET_FCS_SIZE 4 ++#define MAXIMUM_ETHERNET_PACKET_SIZE \ ++ (MAXIMUM_ETHERNET_FRAME_SIZE - ETHERNET_FCS_SIZE) ++#define MINIMUM_ETHERNET_PACKET_SIZE \ ++ (MINIMUM_ETHERNET_FRAME_SIZE - ETHERNET_FCS_SIZE) ++#define CRC_LENGTH ETHERNET_FCS_SIZE ++#define MAX_JUMBO_FRAME_SIZE 0x3F00 ++ ++ ++/* 802.1q VLAN Packet Sizes */ ++#define VLAN_TAG_SIZE 4 /* 802.3ac tag (not DMAed) */ ++ ++/* Ethertype field values */ ++#define ETHERNET_IEEE_VLAN_TYPE 0x8100 /* 802.3ac packet */ ++#define ETHERNET_IP_TYPE 0x0800 /* IP packets */ ++#define ETHERNET_ARP_TYPE 0x0806 /* Address Resolution Protocol (ARP) */ ++ ++/* Packet Header defines */ ++#define IP_PROTOCOL_TCP 6 ++#define IP_PROTOCOL_UDP 0x11 ++ ++/* This defines the bits that are set in the Interrupt Mask ++ * Set/Read Register. Each bit is documented below: ++ * o RXDMT0 = Receive Descriptor Minimum Threshold hit (ring 0) ++ * o RXSEQ = Receive Sequence Error ++ */ ++#define POLL_IMS_ENABLE_MASK ( \ ++ E1000_IMS_RXDMT0 | \ ++ E1000_IMS_RXSEQ) ++ ++/* This defines the bits that are set in the Interrupt Mask ++ * Set/Read Register. Each bit is documented below: ++ * o RXT0 = Receiver Timer Interrupt (ring 0) ++ * o TXDW = Transmit Descriptor Written Back ++ * o RXDMT0 = Receive Descriptor Minimum Threshold hit (ring 0) ++ * o RXSEQ = Receive Sequence Error ++ * o LSC = Link Status Change ++ */ ++#define IMS_ENABLE_MASK ( \ ++ E1000_IMS_RXT0 | \ ++ E1000_IMS_TXDW | \ ++ E1000_IMS_RXDMT0 | \ ++ E1000_IMS_RXSEQ | \ ++ E1000_IMS_LSC) ++ ++/* Additional interrupts need to be handled for e1000_ich8lan: ++ DSW = The FW changed the status of the DISSW bit in FWSM ++ PHYINT = The LAN connected device generates an interrupt ++ EPRST = Manageability reset event */ ++#define IMS_ICH8LAN_ENABLE_MASK (\ ++ E1000_IMS_DSW | \ ++ E1000_IMS_PHYINT | \ ++ E1000_IMS_EPRST) ++ ++/* Number of high/low register pairs in the RAR. The RAR (Receive Address ++ * Registers) holds the directed and multicast addresses that we monitor. We ++ * reserve one of these spots for our directed address, allowing us room for ++ * E1000_RAR_ENTRIES - 1 multicast addresses. ++ */ ++#define E1000_RAR_ENTRIES 15 ++#define E1000_RAR_ENTRIES_ICH8LAN 7 ++ ++#define MIN_NUMBER_OF_DESCRIPTORS 8 ++#define MAX_NUMBER_OF_DESCRIPTORS 0xFFF8 ++ ++/* Receive Descriptor */ ++struct e1000_rx_desc { ++ uint64_t buffer_addr; /* Address of the descriptor's data buffer */ ++ uint16_t length; /* Length of data DMAed into data buffer */ ++ uint16_t csum; /* Packet checksum */ ++ uint8_t status; /* Descriptor status */ ++ uint8_t errors; /* Descriptor Errors */ ++ uint16_t special; ++}; ++ ++/* Receive Descriptor - Extended */ ++union e1000_rx_desc_extended { ++ struct { ++ uint64_t buffer_addr; ++ uint64_t reserved; ++ } read; ++ struct { ++ struct { ++ uint32_t mrq; /* Multiple Rx Queues */ ++ union { ++ uint32_t rss; /* RSS Hash */ ++ struct { ++ uint16_t ip_id; /* IP id */ ++ uint16_t csum; /* Packet Checksum */ ++ } csum_ip; ++ } hi_dword; ++ } lower; ++ struct { ++ uint32_t status_error; /* ext status/error */ ++ uint16_t length; ++ uint16_t vlan; /* VLAN tag */ ++ } upper; ++ } wb; /* writeback */ ++}; ++ ++#define MAX_PS_BUFFERS 4 ++/* Receive Descriptor - Packet Split */ ++union e1000_rx_desc_packet_split { ++ struct { ++ /* one buffer for protocol header(s), three data buffers */ ++ uint64_t buffer_addr[MAX_PS_BUFFERS]; ++ } read; ++ struct { ++ struct { ++ uint32_t mrq; /* Multiple Rx Queues */ ++ union { ++ uint32_t rss; /* RSS Hash */ ++ struct { ++ uint16_t ip_id; /* IP id */ ++ uint16_t csum; /* Packet Checksum */ ++ } csum_ip; ++ } hi_dword; ++ } lower; ++ struct { ++ uint32_t status_error; /* ext status/error */ ++ uint16_t length0; /* length of buffer 0 */ ++ uint16_t vlan; /* VLAN tag */ ++ } middle; ++ struct { ++ uint16_t header_status; ++ uint16_t length[3]; /* length of buffers 1-3 */ ++ } upper; ++ uint64_t reserved; ++ } wb; /* writeback */ ++}; ++ ++/* Receive Decriptor bit definitions */ ++#define E1000_RXD_STAT_DD 0x01 /* Descriptor Done */ ++#define E1000_RXD_STAT_EOP 0x02 /* End of Packet */ ++#define E1000_RXD_STAT_IXSM 0x04 /* Ignore checksum */ ++#define E1000_RXD_STAT_VP 0x08 /* IEEE VLAN Packet */ ++#define E1000_RXD_STAT_UDPCS 0x10 /* UDP xsum caculated */ ++#define E1000_RXD_STAT_TCPCS 0x20 /* TCP xsum calculated */ ++#define E1000_RXD_STAT_IPCS 0x40 /* IP xsum calculated */ ++#define E1000_RXD_STAT_PIF 0x80 /* passed in-exact filter */ ++#define E1000_RXD_STAT_IPIDV 0x200 /* IP identification valid */ ++#define E1000_RXD_STAT_UDPV 0x400 /* Valid UDP checksum */ ++#define E1000_RXD_STAT_ACK 0x8000 /* ACK Packet indication */ ++#define E1000_RXD_ERR_CE 0x01 /* CRC Error */ ++#define E1000_RXD_ERR_SE 0x02 /* Symbol Error */ ++#define E1000_RXD_ERR_SEQ 0x04 /* Sequence Error */ ++#define E1000_RXD_ERR_CXE 0x10 /* Carrier Extension Error */ ++#define E1000_RXD_ERR_TCPE 0x20 /* TCP/UDP Checksum Error */ ++#define E1000_RXD_ERR_IPE 0x40 /* IP Checksum Error */ ++#define E1000_RXD_ERR_RXE 0x80 /* Rx Data Error */ ++#define E1000_RXD_SPC_VLAN_MASK 0x0FFF /* VLAN ID is in lower 12 bits */ ++#define E1000_RXD_SPC_PRI_MASK 0xE000 /* Priority is in upper 3 bits */ ++#define E1000_RXD_SPC_PRI_SHIFT 13 ++#define E1000_RXD_SPC_CFI_MASK 0x1000 /* CFI is bit 12 */ ++#define E1000_RXD_SPC_CFI_SHIFT 12 ++ ++#define E1000_RXDEXT_STATERR_CE 0x01000000 ++#define E1000_RXDEXT_STATERR_SE 0x02000000 ++#define E1000_RXDEXT_STATERR_SEQ 0x04000000 ++#define E1000_RXDEXT_STATERR_CXE 0x10000000 ++#define E1000_RXDEXT_STATERR_TCPE 0x20000000 ++#define E1000_RXDEXT_STATERR_IPE 0x40000000 ++#define E1000_RXDEXT_STATERR_RXE 0x80000000 ++ ++#define E1000_RXDPS_HDRSTAT_HDRSP 0x00008000 ++#define E1000_RXDPS_HDRSTAT_HDRLEN_MASK 0x000003FF ++ ++/* mask to determine if packets should be dropped due to frame errors */ ++#define E1000_RXD_ERR_FRAME_ERR_MASK ( \ ++ E1000_RXD_ERR_CE | \ ++ E1000_RXD_ERR_SE | \ ++ E1000_RXD_ERR_SEQ | \ ++ E1000_RXD_ERR_CXE | \ ++ E1000_RXD_ERR_RXE) ++ ++ ++/* Same mask, but for extended and packet split descriptors */ ++#define E1000_RXDEXT_ERR_FRAME_ERR_MASK ( \ ++ E1000_RXDEXT_STATERR_CE | \ ++ E1000_RXDEXT_STATERR_SE | \ ++ E1000_RXDEXT_STATERR_SEQ | \ ++ E1000_RXDEXT_STATERR_CXE | \ ++ E1000_RXDEXT_STATERR_RXE) ++ ++/* Transmit Descriptor */ ++struct e1000_tx_desc { ++ uint64_t buffer_addr; /* Address of the descriptor's data buffer */ ++ union { ++ uint32_t data; ++ struct { ++ uint16_t length; /* Data buffer length */ ++ uint8_t cso; /* Checksum offset */ ++ uint8_t cmd; /* Descriptor control */ ++ } flags; ++ } lower; ++ union { ++ uint32_t data; ++ struct { ++ uint8_t status; /* Descriptor status */ ++ uint8_t css; /* Checksum start */ ++ uint16_t special; ++ } fields; ++ } upper; ++}; ++ ++/* Transmit Descriptor bit definitions */ ++#define E1000_TXD_DTYP_D 0x00100000 /* Data Descriptor */ ++#define E1000_TXD_DTYP_C 0x00000000 /* Context Descriptor */ ++#define E1000_TXD_POPTS_IXSM 0x01 /* Insert IP checksum */ ++#define E1000_TXD_POPTS_TXSM 0x02 /* Insert TCP/UDP checksum */ ++#define E1000_TXD_CMD_EOP 0x01000000 /* End of Packet */ ++#define E1000_TXD_CMD_IFCS 0x02000000 /* Insert FCS (Ethernet CRC) */ ++#define E1000_TXD_CMD_IC 0x04000000 /* Insert Checksum */ ++#define E1000_TXD_CMD_RS 0x08000000 /* Report Status */ ++#define E1000_TXD_CMD_RPS 0x10000000 /* Report Packet Sent */ ++#define E1000_TXD_CMD_DEXT 0x20000000 /* Descriptor extension (0 = legacy) */ ++#define E1000_TXD_CMD_VLE 0x40000000 /* Add VLAN tag */ ++#define E1000_TXD_CMD_IDE 0x80000000 /* Enable Tidv register */ ++#define E1000_TXD_STAT_DD 0x00000001 /* Descriptor Done */ ++#define E1000_TXD_STAT_EC 0x00000002 /* Excess Collisions */ ++#define E1000_TXD_STAT_LC 0x00000004 /* Late Collisions */ ++#define E1000_TXD_STAT_TU 0x00000008 /* Transmit underrun */ ++#define E1000_TXD_CMD_TCP 0x01000000 /* TCP packet */ ++#define E1000_TXD_CMD_IP 0x02000000 /* IP packet */ ++#define E1000_TXD_CMD_TSE 0x04000000 /* TCP Seg enable */ ++#define E1000_TXD_STAT_TC 0x00000004 /* Tx Underrun */ ++ ++/* Offload Context Descriptor */ ++struct e1000_context_desc { ++ union { ++ uint32_t ip_config; ++ struct { ++ uint8_t ipcss; /* IP checksum start */ ++ uint8_t ipcso; /* IP checksum offset */ ++ uint16_t ipcse; /* IP checksum end */ ++ } ip_fields; ++ } lower_setup; ++ union { ++ uint32_t tcp_config; ++ struct { ++ uint8_t tucss; /* TCP checksum start */ ++ uint8_t tucso; /* TCP checksum offset */ ++ uint16_t tucse; /* TCP checksum end */ ++ } tcp_fields; ++ } upper_setup; ++ uint32_t cmd_and_length; /* */ ++ union { ++ uint32_t data; ++ struct { ++ uint8_t status; /* Descriptor status */ ++ uint8_t hdr_len; /* Header length */ ++ uint16_t mss; /* Maximum segment size */ ++ } fields; ++ } tcp_seg_setup; ++}; ++ ++/* Offload data descriptor */ ++struct e1000_data_desc { ++ uint64_t buffer_addr; /* Address of the descriptor's buffer address */ ++ union { ++ uint32_t data; ++ struct { ++ uint16_t length; /* Data buffer length */ ++ uint8_t typ_len_ext; /* */ ++ uint8_t cmd; /* */ ++ } flags; ++ } lower; ++ union { ++ uint32_t data; ++ struct { ++ uint8_t status; /* Descriptor status */ ++ uint8_t popts; /* Packet Options */ ++ uint16_t special; /* */ ++ } fields; ++ } upper; ++}; ++ ++/* Filters */ ++#define E1000_NUM_UNICAST 16 /* Unicast filter entries */ ++#define E1000_MC_TBL_SIZE 128 /* Multicast Filter Table (4096 bits) */ ++#define E1000_VLAN_FILTER_TBL_SIZE 128 /* VLAN Filter Table (4096 bits) */ ++ ++#define E1000_NUM_UNICAST_ICH8LAN 7 ++#define E1000_MC_TBL_SIZE_ICH8LAN 32 ++ ++ ++/* Receive Address Register */ ++struct e1000_rar { ++ volatile uint32_t low; /* receive address low */ ++ volatile uint32_t high; /* receive address high */ ++}; ++ ++/* Number of entries in the Multicast Table Array (MTA). */ ++#define E1000_NUM_MTA_REGISTERS 128 ++#define E1000_NUM_MTA_REGISTERS_ICH8LAN 32 ++ ++/* IPv4 Address Table Entry */ ++struct e1000_ipv4_at_entry { ++ volatile uint32_t ipv4_addr; /* IP Address (RW) */ ++ volatile uint32_t reserved; ++}; ++ ++/* Four wakeup IP addresses are supported */ ++#define E1000_WAKEUP_IP_ADDRESS_COUNT_MAX 4 ++#define E1000_IP4AT_SIZE E1000_WAKEUP_IP_ADDRESS_COUNT_MAX ++#define E1000_IP4AT_SIZE_ICH8LAN 3 ++#define E1000_IP6AT_SIZE 1 ++ ++/* IPv6 Address Table Entry */ ++struct e1000_ipv6_at_entry { ++ volatile uint8_t ipv6_addr[16]; ++}; ++ ++/* Flexible Filter Length Table Entry */ ++struct e1000_fflt_entry { ++ volatile uint32_t length; /* Flexible Filter Length (RW) */ ++ volatile uint32_t reserved; ++}; ++ ++/* Flexible Filter Mask Table Entry */ ++struct e1000_ffmt_entry { ++ volatile uint32_t mask; /* Flexible Filter Mask (RW) */ ++ volatile uint32_t reserved; ++}; ++ ++/* Flexible Filter Value Table Entry */ ++struct e1000_ffvt_entry { ++ volatile uint32_t value; /* Flexible Filter Value (RW) */ ++ volatile uint32_t reserved; ++}; ++ ++/* Four Flexible Filters are supported */ ++#define E1000_FLEXIBLE_FILTER_COUNT_MAX 4 ++ ++/* Each Flexible Filter is at most 128 (0x80) bytes in length */ ++#define E1000_FLEXIBLE_FILTER_SIZE_MAX 128 ++ ++#define E1000_FFLT_SIZE E1000_FLEXIBLE_FILTER_COUNT_MAX ++#define E1000_FFMT_SIZE E1000_FLEXIBLE_FILTER_SIZE_MAX ++#define E1000_FFVT_SIZE E1000_FLEXIBLE_FILTER_SIZE_MAX ++ ++#define E1000_DISABLE_SERDES_LOOPBACK 0x0400 ++ ++/* Register Set. (82543, 82544) ++ * ++ * Registers are defined to be 32 bits and should be accessed as 32 bit values. ++ * These registers are physically located on the NIC, but are mapped into the ++ * host memory address space. ++ * ++ * RW - register is both readable and writable ++ * RO - register is read only ++ * WO - register is write only ++ * R/clr - register is read only and is cleared when read ++ * A - register array ++ */ ++#define E1000_CTRL 0x00000 /* Device Control - RW */ ++#define E1000_CTRL_DUP 0x00004 /* Device Control Duplicate (Shadow) - RW */ ++#define E1000_STATUS 0x00008 /* Device Status - RO */ ++#define E1000_EECD 0x00010 /* EEPROM/Flash Control - RW */ ++#define E1000_EERD 0x00014 /* EEPROM Read - RW */ ++#define E1000_CTRL_EXT 0x00018 /* Extended Device Control - RW */ ++#define E1000_FLA 0x0001C /* Flash Access - RW */ ++#define E1000_MDIC 0x00020 /* MDI Control - RW */ ++#define E1000_SCTL 0x00024 /* SerDes Control - RW */ ++#define E1000_FEXTNVM 0x00028 /* Future Extended NVM register */ ++#define E1000_FCAL 0x00028 /* Flow Control Address Low - RW */ ++#define E1000_FCAH 0x0002C /* Flow Control Address High -RW */ ++#define E1000_FCT 0x00030 /* Flow Control Type - RW */ ++#define E1000_VET 0x00038 /* VLAN Ether Type - RW */ ++#define E1000_ICR 0x000C0 /* Interrupt Cause Read - R/clr */ ++#define E1000_ITR 0x000C4 /* Interrupt Throttling Rate - RW */ ++#define E1000_ICS 0x000C8 /* Interrupt Cause Set - WO */ ++#define E1000_IMS 0x000D0 /* Interrupt Mask Set - RW */ ++#define E1000_IMC 0x000D8 /* Interrupt Mask Clear - WO */ ++#define E1000_IAM 0x000E0 /* Interrupt Acknowledge Auto Mask */ ++#define E1000_RCTL 0x00100 /* RX Control - RW */ ++#define E1000_RDTR1 0x02820 /* RX Delay Timer (1) - RW */ ++#define E1000_RDBAL1 0x02900 /* RX Descriptor Base Address Low (1) - RW */ ++#define E1000_RDBAH1 0x02904 /* RX Descriptor Base Address High (1) - RW */ ++#define E1000_RDLEN1 0x02908 /* RX Descriptor Length (1) - RW */ ++#define E1000_RDH1 0x02910 /* RX Descriptor Head (1) - RW */ ++#define E1000_RDT1 0x02918 /* RX Descriptor Tail (1) - RW */ ++#define E1000_FCTTV 0x00170 /* Flow Control Transmit Timer Value - RW */ ++#define E1000_TXCW 0x00178 /* TX Configuration Word - RW */ ++#define E1000_RXCW 0x00180 /* RX Configuration Word - RO */ ++#define E1000_TCTL 0x00400 /* TX Control - RW */ ++#define E1000_TCTL_EXT 0x00404 /* Extended TX Control - RW */ ++#define E1000_TIPG 0x00410 /* TX Inter-packet gap -RW */ ++#define E1000_TBT 0x00448 /* TX Burst Timer - RW */ ++#define E1000_AIT 0x00458 /* Adaptive Interframe Spacing Throttle - RW */ ++#define E1000_LEDCTL 0x00E00 /* LED Control - RW */ ++#define E1000_EXTCNF_CTRL 0x00F00 /* Extended Configuration Control */ ++#define E1000_EXTCNF_SIZE 0x00F08 /* Extended Configuration Size */ ++#define E1000_PHY_CTRL 0x00F10 /* PHY Control Register in CSR */ ++#define FEXTNVM_SW_CONFIG 0x0001 ++#define E1000_PBA 0x01000 /* Packet Buffer Allocation - RW */ ++#define E1000_PBS 0x01008 /* Packet Buffer Size */ ++#define E1000_EEMNGCTL 0x01010 /* MNG EEprom Control */ ++#define E1000_FLASH_UPDATES 1000 ++#define E1000_EEARBC 0x01024 /* EEPROM Auto Read Bus Control */ ++#define E1000_FLASHT 0x01028 /* FLASH Timer Register */ ++#define E1000_EEWR 0x0102C /* EEPROM Write Register - RW */ ++#define E1000_FLSWCTL 0x01030 /* FLASH control register */ ++#define E1000_FLSWDATA 0x01034 /* FLASH data register */ ++#define E1000_FLSWCNT 0x01038 /* FLASH Access Counter */ ++#define E1000_FLOP 0x0103C /* FLASH Opcode Register */ ++#define E1000_ERT 0x02008 /* Early Rx Threshold - RW */ ++#define E1000_FCRTL 0x02160 /* Flow Control Receive Threshold Low - RW */ ++#define E1000_FCRTH 0x02168 /* Flow Control Receive Threshold High - RW */ ++#define E1000_PSRCTL 0x02170 /* Packet Split Receive Control - RW */ ++#define E1000_RDBAL 0x02800 /* RX Descriptor Base Address Low - RW */ ++#define E1000_RDBAH 0x02804 /* RX Descriptor Base Address High - RW */ ++#define E1000_RDLEN 0x02808 /* RX Descriptor Length - RW */ ++#define E1000_RDH 0x02810 /* RX Descriptor Head - RW */ ++#define E1000_RDT 0x02818 /* RX Descriptor Tail - RW */ ++#define E1000_RDTR 0x02820 /* RX Delay Timer - RW */ ++#define E1000_RDBAL0 E1000_RDBAL /* RX Desc Base Address Low (0) - RW */ ++#define E1000_RDBAH0 E1000_RDBAH /* RX Desc Base Address High (0) - RW */ ++#define E1000_RDLEN0 E1000_RDLEN /* RX Desc Length (0) - RW */ ++#define E1000_RDH0 E1000_RDH /* RX Desc Head (0) - RW */ ++#define E1000_RDT0 E1000_RDT /* RX Desc Tail (0) - RW */ ++#define E1000_RDTR0 E1000_RDTR /* RX Delay Timer (0) - RW */ ++#define E1000_RXDCTL 0x02828 /* RX Descriptor Control queue 0 - RW */ ++#define E1000_RXDCTL1 0x02928 /* RX Descriptor Control queue 1 - RW */ ++#define E1000_RADV 0x0282C /* RX Interrupt Absolute Delay Timer - RW */ ++#define E1000_RSRPD 0x02C00 /* RX Small Packet Detect - RW */ ++#define E1000_RAID 0x02C08 /* Receive Ack Interrupt Delay - RW */ ++#define E1000_TXDMAC 0x03000 /* TX DMA Control - RW */ ++#define E1000_KABGTXD 0x03004 /* AFE Band Gap Transmit Ref Data */ ++#define E1000_TDFH 0x03410 /* TX Data FIFO Head - RW */ ++#define E1000_TDFT 0x03418 /* TX Data FIFO Tail - RW */ ++#define E1000_TDFHS 0x03420 /* TX Data FIFO Head Saved - RW */ ++#define E1000_TDFTS 0x03428 /* TX Data FIFO Tail Saved - RW */ ++#define E1000_TDFPC 0x03430 /* TX Data FIFO Packet Count - RW */ ++#define E1000_TDBAL 0x03800 /* TX Descriptor Base Address Low - RW */ ++#define E1000_TDBAH 0x03804 /* TX Descriptor Base Address High - RW */ ++#define E1000_TDLEN 0x03808 /* TX Descriptor Length - RW */ ++#define E1000_TDH 0x03810 /* TX Descriptor Head - RW */ ++#define E1000_TDT 0x03818 /* TX Descripotr Tail - RW */ ++#define E1000_TIDV 0x03820 /* TX Interrupt Delay Value - RW */ ++#define E1000_TXDCTL 0x03828 /* TX Descriptor Control - RW */ ++#define E1000_TADV 0x0382C /* TX Interrupt Absolute Delay Val - RW */ ++#define E1000_TSPMT 0x03830 /* TCP Segmentation PAD & Min Threshold - RW */ ++#define E1000_TARC0 0x03840 /* TX Arbitration Count (0) */ ++#define E1000_TDBAL1 0x03900 /* TX Desc Base Address Low (1) - RW */ ++#define E1000_TDBAH1 0x03904 /* TX Desc Base Address High (1) - RW */ ++#define E1000_TDLEN1 0x03908 /* TX Desc Length (1) - RW */ ++#define E1000_TDH1 0x03910 /* TX Desc Head (1) - RW */ ++#define E1000_TDT1 0x03918 /* TX Desc Tail (1) - RW */ ++#define E1000_TXDCTL1 0x03928 /* TX Descriptor Control (1) - RW */ ++#define E1000_TARC1 0x03940 /* TX Arbitration Count (1) */ ++#define E1000_CRCERRS 0x04000 /* CRC Error Count - R/clr */ ++#define E1000_ALGNERRC 0x04004 /* Alignment Error Count - R/clr */ ++#define E1000_SYMERRS 0x04008 /* Symbol Error Count - R/clr */ ++#define E1000_RXERRC 0x0400C /* Receive Error Count - R/clr */ ++#define E1000_MPC 0x04010 /* Missed Packet Count - R/clr */ ++#define E1000_SCC 0x04014 /* Single Collision Count - R/clr */ ++#define E1000_ECOL 0x04018 /* Excessive Collision Count - R/clr */ ++#define E1000_MCC 0x0401C /* Multiple Collision Count - R/clr */ ++#define E1000_LATECOL 0x04020 /* Late Collision Count - R/clr */ ++#define E1000_COLC 0x04028 /* Collision Count - R/clr */ ++#define E1000_DC 0x04030 /* Defer Count - R/clr */ ++#define E1000_TNCRS 0x04034 /* TX-No CRS - R/clr */ ++#define E1000_SEC 0x04038 /* Sequence Error Count - R/clr */ ++#define E1000_CEXTERR 0x0403C /* Carrier Extension Error Count - R/clr */ ++#define E1000_RLEC 0x04040 /* Receive Length Error Count - R/clr */ ++#define E1000_XONRXC 0x04048 /* XON RX Count - R/clr */ ++#define E1000_XONTXC 0x0404C /* XON TX Count - R/clr */ ++#define E1000_XOFFRXC 0x04050 /* XOFF RX Count - R/clr */ ++#define E1000_XOFFTXC 0x04054 /* XOFF TX Count - R/clr */ ++#define E1000_FCRUC 0x04058 /* Flow Control RX Unsupported Count- R/clr */ ++#define E1000_PRC64 0x0405C /* Packets RX (64 bytes) - R/clr */ ++#define E1000_PRC127 0x04060 /* Packets RX (65-127 bytes) - R/clr */ ++#define E1000_PRC255 0x04064 /* Packets RX (128-255 bytes) - R/clr */ ++#define E1000_PRC511 0x04068 /* Packets RX (255-511 bytes) - R/clr */ ++#define E1000_PRC1023 0x0406C /* Packets RX (512-1023 bytes) - R/clr */ ++#define E1000_PRC1522 0x04070 /* Packets RX (1024-1522 bytes) - R/clr */ ++#define E1000_GPRC 0x04074 /* Good Packets RX Count - R/clr */ ++#define E1000_BPRC 0x04078 /* Broadcast Packets RX Count - R/clr */ ++#define E1000_MPRC 0x0407C /* Multicast Packets RX Count - R/clr */ ++#define E1000_GPTC 0x04080 /* Good Packets TX Count - R/clr */ ++#define E1000_GORCL 0x04088 /* Good Octets RX Count Low - R/clr */ ++#define E1000_GORCH 0x0408C /* Good Octets RX Count High - R/clr */ ++#define E1000_GOTCL 0x04090 /* Good Octets TX Count Low - R/clr */ ++#define E1000_GOTCH 0x04094 /* Good Octets TX Count High - R/clr */ ++#define E1000_RNBC 0x040A0 /* RX No Buffers Count - R/clr */ ++#define E1000_RUC 0x040A4 /* RX Undersize Count - R/clr */ ++#define E1000_RFC 0x040A8 /* RX Fragment Count - R/clr */ ++#define E1000_ROC 0x040AC /* RX Oversize Count - R/clr */ ++#define E1000_RJC 0x040B0 /* RX Jabber Count - R/clr */ ++#define E1000_MGTPRC 0x040B4 /* Management Packets RX Count - R/clr */ ++#define E1000_MGTPDC 0x040B8 /* Management Packets Dropped Count - R/clr */ ++#define E1000_MGTPTC 0x040BC /* Management Packets TX Count - R/clr */ ++#define E1000_TORL 0x040C0 /* Total Octets RX Low - R/clr */ ++#define E1000_TORH 0x040C4 /* Total Octets RX High - R/clr */ ++#define E1000_TOTL 0x040C8 /* Total Octets TX Low - R/clr */ ++#define E1000_TOTH 0x040CC /* Total Octets TX High - R/clr */ ++#define E1000_TPR 0x040D0 /* Total Packets RX - R/clr */ ++#define E1000_TPT 0x040D4 /* Total Packets TX - R/clr */ ++#define E1000_PTC64 0x040D8 /* Packets TX (64 bytes) - R/clr */ ++#define E1000_PTC127 0x040DC /* Packets TX (65-127 bytes) - R/clr */ ++#define E1000_PTC255 0x040E0 /* Packets TX (128-255 bytes) - R/clr */ ++#define E1000_PTC511 0x040E4 /* Packets TX (256-511 bytes) - R/clr */ ++#define E1000_PTC1023 0x040E8 /* Packets TX (512-1023 bytes) - R/clr */ ++#define E1000_PTC1522 0x040EC /* Packets TX (1024-1522 Bytes) - R/clr */ ++#define E1000_MPTC 0x040F0 /* Multicast Packets TX Count - R/clr */ ++#define E1000_BPTC 0x040F4 /* Broadcast Packets TX Count - R/clr */ ++#define E1000_TSCTC 0x040F8 /* TCP Segmentation Context TX - R/clr */ ++#define E1000_TSCTFC 0x040FC /* TCP Segmentation Context TX Fail - R/clr */ ++#define E1000_IAC 0x04100 /* Interrupt Assertion Count */ ++#define E1000_ICRXPTC 0x04104 /* Interrupt Cause Rx Packet Timer Expire Count */ ++#define E1000_ICRXATC 0x04108 /* Interrupt Cause Rx Absolute Timer Expire Count */ ++#define E1000_ICTXPTC 0x0410C /* Interrupt Cause Tx Packet Timer Expire Count */ ++#define E1000_ICTXATC 0x04110 /* Interrupt Cause Tx Absolute Timer Expire Count */ ++#define E1000_ICTXQEC 0x04118 /* Interrupt Cause Tx Queue Empty Count */ ++#define E1000_ICTXQMTC 0x0411C /* Interrupt Cause Tx Queue Minimum Threshold Count */ ++#define E1000_ICRXDMTC 0x04120 /* Interrupt Cause Rx Descriptor Minimum Threshold Count */ ++#define E1000_ICRXOC 0x04124 /* Interrupt Cause Receiver Overrun Count */ ++#define E1000_RXCSUM 0x05000 /* RX Checksum Control - RW */ ++#define E1000_RFCTL 0x05008 /* Receive Filter Control*/ ++#define E1000_MTA 0x05200 /* Multicast Table Array - RW Array */ ++#define E1000_RA 0x05400 /* Receive Address - RW Array */ ++#define E1000_VFTA 0x05600 /* VLAN Filter Table Array - RW Array */ ++#define E1000_WUC 0x05800 /* Wakeup Control - RW */ ++#define E1000_WUFC 0x05808 /* Wakeup Filter Control - RW */ ++#define E1000_WUS 0x05810 /* Wakeup Status - RO */ ++#define E1000_MANC 0x05820 /* Management Control - RW */ ++#define E1000_IPAV 0x05838 /* IP Address Valid - RW */ ++#define E1000_IP4AT 0x05840 /* IPv4 Address Table - RW Array */ ++#define E1000_IP6AT 0x05880 /* IPv6 Address Table - RW Array */ ++#define E1000_WUPL 0x05900 /* Wakeup Packet Length - RW */ ++#define E1000_WUPM 0x05A00 /* Wakeup Packet Memory - RO A */ ++#define E1000_FFLT 0x05F00 /* Flexible Filter Length Table - RW Array */ ++#define E1000_HOST_IF 0x08800 /* Host Interface */ ++#define E1000_FFMT 0x09000 /* Flexible Filter Mask Table - RW Array */ ++#define E1000_FFVT 0x09800 /* Flexible Filter Value Table - RW Array */ ++ ++#define E1000_KUMCTRLSTA 0x00034 /* MAC-PHY interface - RW */ ++#define E1000_MDPHYA 0x0003C /* PHY address - RW */ ++#define E1000_MANC2H 0x05860 /* Managment Control To Host - RW */ ++#define E1000_SW_FW_SYNC 0x05B5C /* Software-Firmware Synchronization - RW */ ++ ++#define E1000_GCR 0x05B00 /* PCI-Ex Control */ ++#define E1000_GSCL_1 0x05B10 /* PCI-Ex Statistic Control #1 */ ++#define E1000_GSCL_2 0x05B14 /* PCI-Ex Statistic Control #2 */ ++#define E1000_GSCL_3 0x05B18 /* PCI-Ex Statistic Control #3 */ ++#define E1000_GSCL_4 0x05B1C /* PCI-Ex Statistic Control #4 */ ++#define E1000_FACTPS 0x05B30 /* Function Active and Power State to MNG */ ++#define E1000_SWSM 0x05B50 /* SW Semaphore */ ++#define E1000_FWSM 0x05B54 /* FW Semaphore */ ++#define E1000_FFLT_DBG 0x05F04 /* Debug Register */ ++#define E1000_HICR 0x08F00 /* Host Inteface Control */ ++ ++/* RSS registers */ ++#define E1000_CPUVEC 0x02C10 /* CPU Vector Register - RW */ ++#define E1000_MRQC 0x05818 /* Multiple Receive Control - RW */ ++#define E1000_RETA 0x05C00 /* Redirection Table - RW Array */ ++#define E1000_RSSRK 0x05C80 /* RSS Random Key - RW Array */ ++#define E1000_RSSIM 0x05864 /* RSS Interrupt Mask */ ++#define E1000_RSSIR 0x05868 /* RSS Interrupt Request */ ++/* Register Set (82542) ++ * ++ * Some of the 82542 registers are located at different offsets than they are ++ * in more current versions of the 8254x. Despite the difference in location, ++ * the registers function in the same manner. ++ */ ++#define E1000_82542_CTRL E1000_CTRL ++#define E1000_82542_CTRL_DUP E1000_CTRL_DUP ++#define E1000_82542_STATUS E1000_STATUS ++#define E1000_82542_EECD E1000_EECD ++#define E1000_82542_EERD E1000_EERD ++#define E1000_82542_CTRL_EXT E1000_CTRL_EXT ++#define E1000_82542_FLA E1000_FLA ++#define E1000_82542_MDIC E1000_MDIC ++#define E1000_82542_SCTL E1000_SCTL ++#define E1000_82542_FEXTNVM E1000_FEXTNVM ++#define E1000_82542_FCAL E1000_FCAL ++#define E1000_82542_FCAH E1000_FCAH ++#define E1000_82542_FCT E1000_FCT ++#define E1000_82542_VET E1000_VET ++#define E1000_82542_RA 0x00040 ++#define E1000_82542_ICR E1000_ICR ++#define E1000_82542_ITR E1000_ITR ++#define E1000_82542_ICS E1000_ICS ++#define E1000_82542_IMS E1000_IMS ++#define E1000_82542_IMC E1000_IMC ++#define E1000_82542_RCTL E1000_RCTL ++#define E1000_82542_RDTR 0x00108 ++#define E1000_82542_RDBAL 0x00110 ++#define E1000_82542_RDBAH 0x00114 ++#define E1000_82542_RDLEN 0x00118 ++#define E1000_82542_RDH 0x00120 ++#define E1000_82542_RDT 0x00128 ++#define E1000_82542_RDTR0 E1000_82542_RDTR ++#define E1000_82542_RDBAL0 E1000_82542_RDBAL ++#define E1000_82542_RDBAH0 E1000_82542_RDBAH ++#define E1000_82542_RDLEN0 E1000_82542_RDLEN ++#define E1000_82542_RDH0 E1000_82542_RDH ++#define E1000_82542_RDT0 E1000_82542_RDT ++#define E1000_82542_SRRCTL(_n) (0x280C + ((_n) << 8)) /* Split and Replication ++ * RX Control - RW */ ++#define E1000_82542_DCA_RXCTRL(_n) (0x02814 + ((_n) << 8)) ++#define E1000_82542_RDBAH3 0x02B04 /* RX Desc Base High Queue 3 - RW */ ++#define E1000_82542_RDBAL3 0x02B00 /* RX Desc Low Queue 3 - RW */ ++#define E1000_82542_RDLEN3 0x02B08 /* RX Desc Length Queue 3 - RW */ ++#define E1000_82542_RDH3 0x02B10 /* RX Desc Head Queue 3 - RW */ ++#define E1000_82542_RDT3 0x02B18 /* RX Desc Tail Queue 3 - RW */ ++#define E1000_82542_RDBAL2 0x02A00 /* RX Desc Base Low Queue 2 - RW */ ++#define E1000_82542_RDBAH2 0x02A04 /* RX Desc Base High Queue 2 - RW */ ++#define E1000_82542_RDLEN2 0x02A08 /* RX Desc Length Queue 2 - RW */ ++#define E1000_82542_RDH2 0x02A10 /* RX Desc Head Queue 2 - RW */ ++#define E1000_82542_RDT2 0x02A18 /* RX Desc Tail Queue 2 - RW */ ++#define E1000_82542_RDTR1 0x00130 ++#define E1000_82542_RDBAL1 0x00138 ++#define E1000_82542_RDBAH1 0x0013C ++#define E1000_82542_RDLEN1 0x00140 ++#define E1000_82542_RDH1 0x00148 ++#define E1000_82542_RDT1 0x00150 ++#define E1000_82542_FCRTH 0x00160 ++#define E1000_82542_FCRTL 0x00168 ++#define E1000_82542_FCTTV E1000_FCTTV ++#define E1000_82542_TXCW E1000_TXCW ++#define E1000_82542_RXCW E1000_RXCW ++#define E1000_82542_MTA 0x00200 ++#define E1000_82542_TCTL E1000_TCTL ++#define E1000_82542_TCTL_EXT E1000_TCTL_EXT ++#define E1000_82542_TIPG E1000_TIPG ++#define E1000_82542_TDBAL 0x00420 ++#define E1000_82542_TDBAH 0x00424 ++#define E1000_82542_TDLEN 0x00428 ++#define E1000_82542_TDH 0x00430 ++#define E1000_82542_TDT 0x00438 ++#define E1000_82542_TIDV 0x00440 ++#define E1000_82542_TBT E1000_TBT ++#define E1000_82542_AIT E1000_AIT ++#define E1000_82542_VFTA 0x00600 ++#define E1000_82542_LEDCTL E1000_LEDCTL ++#define E1000_82542_PBA E1000_PBA ++#define E1000_82542_PBS E1000_PBS ++#define E1000_82542_EEMNGCTL E1000_EEMNGCTL ++#define E1000_82542_EEARBC E1000_EEARBC ++#define E1000_82542_FLASHT E1000_FLASHT ++#define E1000_82542_EEWR E1000_EEWR ++#define E1000_82542_FLSWCTL E1000_FLSWCTL ++#define E1000_82542_FLSWDATA E1000_FLSWDATA ++#define E1000_82542_FLSWCNT E1000_FLSWCNT ++#define E1000_82542_FLOP E1000_FLOP ++#define E1000_82542_EXTCNF_CTRL E1000_EXTCNF_CTRL ++#define E1000_82542_EXTCNF_SIZE E1000_EXTCNF_SIZE ++#define E1000_82542_PHY_CTRL E1000_PHY_CTRL ++#define E1000_82542_ERT E1000_ERT ++#define E1000_82542_RXDCTL E1000_RXDCTL ++#define E1000_82542_RXDCTL1 E1000_RXDCTL1 ++#define E1000_82542_RADV E1000_RADV ++#define E1000_82542_RSRPD E1000_RSRPD ++#define E1000_82542_TXDMAC E1000_TXDMAC ++#define E1000_82542_KABGTXD E1000_KABGTXD ++#define E1000_82542_TDFHS E1000_TDFHS ++#define E1000_82542_TDFTS E1000_TDFTS ++#define E1000_82542_TDFPC E1000_TDFPC ++#define E1000_82542_TXDCTL E1000_TXDCTL ++#define E1000_82542_TADV E1000_TADV ++#define E1000_82542_TSPMT E1000_TSPMT ++#define E1000_82542_CRCERRS E1000_CRCERRS ++#define E1000_82542_ALGNERRC E1000_ALGNERRC ++#define E1000_82542_SYMERRS E1000_SYMERRS ++#define E1000_82542_RXERRC E1000_RXERRC ++#define E1000_82542_MPC E1000_MPC ++#define E1000_82542_SCC E1000_SCC ++#define E1000_82542_ECOL E1000_ECOL ++#define E1000_82542_MCC E1000_MCC ++#define E1000_82542_LATECOL E1000_LATECOL ++#define E1000_82542_COLC E1000_COLC ++#define E1000_82542_DC E1000_DC ++#define E1000_82542_TNCRS E1000_TNCRS ++#define E1000_82542_SEC E1000_SEC ++#define E1000_82542_CEXTERR E1000_CEXTERR ++#define E1000_82542_RLEC E1000_RLEC ++#define E1000_82542_XONRXC E1000_XONRXC ++#define E1000_82542_XONTXC E1000_XONTXC ++#define E1000_82542_XOFFRXC E1000_XOFFRXC ++#define E1000_82542_XOFFTXC E1000_XOFFTXC ++#define E1000_82542_FCRUC E1000_FCRUC ++#define E1000_82542_PRC64 E1000_PRC64 ++#define E1000_82542_PRC127 E1000_PRC127 ++#define E1000_82542_PRC255 E1000_PRC255 ++#define E1000_82542_PRC511 E1000_PRC511 ++#define E1000_82542_PRC1023 E1000_PRC1023 ++#define E1000_82542_PRC1522 E1000_PRC1522 ++#define E1000_82542_GPRC E1000_GPRC ++#define E1000_82542_BPRC E1000_BPRC ++#define E1000_82542_MPRC E1000_MPRC ++#define E1000_82542_GPTC E1000_GPTC ++#define E1000_82542_GORCL E1000_GORCL ++#define E1000_82542_GORCH E1000_GORCH ++#define E1000_82542_GOTCL E1000_GOTCL ++#define E1000_82542_GOTCH E1000_GOTCH ++#define E1000_82542_RNBC E1000_RNBC ++#define E1000_82542_RUC E1000_RUC ++#define E1000_82542_RFC E1000_RFC ++#define E1000_82542_ROC E1000_ROC ++#define E1000_82542_RJC E1000_RJC ++#define E1000_82542_MGTPRC E1000_MGTPRC ++#define E1000_82542_MGTPDC E1000_MGTPDC ++#define E1000_82542_MGTPTC E1000_MGTPTC ++#define E1000_82542_TORL E1000_TORL ++#define E1000_82542_TORH E1000_TORH ++#define E1000_82542_TOTL E1000_TOTL ++#define E1000_82542_TOTH E1000_TOTH ++#define E1000_82542_TPR E1000_TPR ++#define E1000_82542_TPT E1000_TPT ++#define E1000_82542_PTC64 E1000_PTC64 ++#define E1000_82542_PTC127 E1000_PTC127 ++#define E1000_82542_PTC255 E1000_PTC255 ++#define E1000_82542_PTC511 E1000_PTC511 ++#define E1000_82542_PTC1023 E1000_PTC1023 ++#define E1000_82542_PTC1522 E1000_PTC1522 ++#define E1000_82542_MPTC E1000_MPTC ++#define E1000_82542_BPTC E1000_BPTC ++#define E1000_82542_TSCTC E1000_TSCTC ++#define E1000_82542_TSCTFC E1000_TSCTFC ++#define E1000_82542_RXCSUM E1000_RXCSUM ++#define E1000_82542_WUC E1000_WUC ++#define E1000_82542_WUFC E1000_WUFC ++#define E1000_82542_WUS E1000_WUS ++#define E1000_82542_MANC E1000_MANC ++#define E1000_82542_IPAV E1000_IPAV ++#define E1000_82542_IP4AT E1000_IP4AT ++#define E1000_82542_IP6AT E1000_IP6AT ++#define E1000_82542_WUPL E1000_WUPL ++#define E1000_82542_WUPM E1000_WUPM ++#define E1000_82542_FFLT E1000_FFLT ++#define E1000_82542_TDFH 0x08010 ++#define E1000_82542_TDFT 0x08018 ++#define E1000_82542_FFMT E1000_FFMT ++#define E1000_82542_FFVT E1000_FFVT ++#define E1000_82542_HOST_IF E1000_HOST_IF ++#define E1000_82542_IAM E1000_IAM ++#define E1000_82542_EEMNGCTL E1000_EEMNGCTL ++#define E1000_82542_PSRCTL E1000_PSRCTL ++#define E1000_82542_RAID E1000_RAID ++#define E1000_82542_TARC0 E1000_TARC0 ++#define E1000_82542_TDBAL1 E1000_TDBAL1 ++#define E1000_82542_TDBAH1 E1000_TDBAH1 ++#define E1000_82542_TDLEN1 E1000_TDLEN1 ++#define E1000_82542_TDH1 E1000_TDH1 ++#define E1000_82542_TDT1 E1000_TDT1 ++#define E1000_82542_TXDCTL1 E1000_TXDCTL1 ++#define E1000_82542_TARC1 E1000_TARC1 ++#define E1000_82542_RFCTL E1000_RFCTL ++#define E1000_82542_GCR E1000_GCR ++#define E1000_82542_GSCL_1 E1000_GSCL_1 ++#define E1000_82542_GSCL_2 E1000_GSCL_2 ++#define E1000_82542_GSCL_3 E1000_GSCL_3 ++#define E1000_82542_GSCL_4 E1000_GSCL_4 ++#define E1000_82542_FACTPS E1000_FACTPS ++#define E1000_82542_SWSM E1000_SWSM ++#define E1000_82542_FWSM E1000_FWSM ++#define E1000_82542_FFLT_DBG E1000_FFLT_DBG ++#define E1000_82542_IAC E1000_IAC ++#define E1000_82542_ICRXPTC E1000_ICRXPTC ++#define E1000_82542_ICRXATC E1000_ICRXATC ++#define E1000_82542_ICTXPTC E1000_ICTXPTC ++#define E1000_82542_ICTXATC E1000_ICTXATC ++#define E1000_82542_ICTXQEC E1000_ICTXQEC ++#define E1000_82542_ICTXQMTC E1000_ICTXQMTC ++#define E1000_82542_ICRXDMTC E1000_ICRXDMTC ++#define E1000_82542_ICRXOC E1000_ICRXOC ++#define E1000_82542_HICR E1000_HICR ++ ++#define E1000_82542_CPUVEC E1000_CPUVEC ++#define E1000_82542_MRQC E1000_MRQC ++#define E1000_82542_RETA E1000_RETA ++#define E1000_82542_RSSRK E1000_RSSRK ++#define E1000_82542_RSSIM E1000_RSSIM ++#define E1000_82542_RSSIR E1000_RSSIR ++#define E1000_82542_KUMCTRLSTA E1000_KUMCTRLSTA ++#define E1000_82542_SW_FW_SYNC E1000_SW_FW_SYNC ++ ++/* Statistics counters collected by the MAC */ ++struct e1000_hw_stats { ++ uint64_t crcerrs; ++ uint64_t algnerrc; ++ uint64_t symerrs; ++ uint64_t rxerrc; ++ uint64_t mpc; ++ uint64_t scc; ++ uint64_t ecol; ++ uint64_t mcc; ++ uint64_t latecol; ++ uint64_t colc; ++ uint64_t dc; ++ uint64_t tncrs; ++ uint64_t sec; ++ uint64_t cexterr; ++ uint64_t rlec; ++ uint64_t xonrxc; ++ uint64_t xontxc; ++ uint64_t xoffrxc; ++ uint64_t xofftxc; ++ uint64_t fcruc; ++ uint64_t prc64; ++ uint64_t prc127; ++ uint64_t prc255; ++ uint64_t prc511; ++ uint64_t prc1023; ++ uint64_t prc1522; ++ uint64_t gprc; ++ uint64_t bprc; ++ uint64_t mprc; ++ uint64_t gptc; ++ uint64_t gorcl; ++ uint64_t gorch; ++ uint64_t gotcl; ++ uint64_t gotch; ++ uint64_t rnbc; ++ uint64_t ruc; ++ uint64_t rfc; ++ uint64_t roc; ++ uint64_t rjc; ++ uint64_t mgprc; ++ uint64_t mgpdc; ++ uint64_t mgptc; ++ uint64_t torl; ++ uint64_t torh; ++ uint64_t totl; ++ uint64_t toth; ++ uint64_t tpr; ++ uint64_t tpt; ++ uint64_t ptc64; ++ uint64_t ptc127; ++ uint64_t ptc255; ++ uint64_t ptc511; ++ uint64_t ptc1023; ++ uint64_t ptc1522; ++ uint64_t mptc; ++ uint64_t bptc; ++ uint64_t tsctc; ++ uint64_t tsctfc; ++ uint64_t iac; ++ uint64_t icrxptc; ++ uint64_t icrxatc; ++ uint64_t ictxptc; ++ uint64_t ictxatc; ++ uint64_t ictxqec; ++ uint64_t ictxqmtc; ++ uint64_t icrxdmtc; ++ uint64_t icrxoc; ++}; ++ ++/* Structure containing variables used by the shared code (e1000_hw.c) */ ++struct e1000_hw { ++ uint8_t *hw_addr; ++ uint8_t *flash_address; ++ e1000_mac_type mac_type; ++ e1000_phy_type phy_type; ++ uint32_t phy_init_script; ++ e1000_media_type media_type; ++ void *back; ++ struct e1000_shadow_ram *eeprom_shadow_ram; ++ uint32_t flash_bank_size; ++ uint32_t flash_base_addr; ++ e1000_fc_type fc; ++ e1000_bus_speed bus_speed; ++ e1000_bus_width bus_width; ++ e1000_bus_type bus_type; ++ struct e1000_eeprom_info eeprom; ++ e1000_ms_type master_slave; ++ e1000_ms_type original_master_slave; ++ e1000_ffe_config ffe_config_state; ++ uint32_t asf_firmware_present; ++ uint32_t eeprom_semaphore_present; ++ uint32_t swfw_sync_present; ++ uint32_t swfwhw_semaphore_present; ++ unsigned long io_base; ++ uint32_t phy_id; ++ uint32_t phy_revision; ++ uint32_t phy_addr; ++ uint32_t original_fc; ++ uint32_t txcw; ++ uint32_t autoneg_failed; ++ uint32_t max_frame_size; ++ uint32_t min_frame_size; ++ uint32_t mc_filter_type; ++ uint32_t num_mc_addrs; ++ uint32_t collision_delta; ++ uint32_t tx_packet_delta; ++ uint32_t ledctl_default; ++ uint32_t ledctl_mode1; ++ uint32_t ledctl_mode2; ++ boolean_t tx_pkt_filtering; ++ struct e1000_host_mng_dhcp_cookie mng_cookie; ++ uint16_t phy_spd_default; ++ uint16_t autoneg_advertised; ++ uint16_t pci_cmd_word; ++ uint16_t fc_high_water; ++ uint16_t fc_low_water; ++ uint16_t fc_pause_time; ++ uint16_t current_ifs_val; ++ uint16_t ifs_min_val; ++ uint16_t ifs_max_val; ++ uint16_t ifs_step_size; ++ uint16_t ifs_ratio; ++ uint16_t device_id; ++ uint16_t vendor_id; ++ uint16_t subsystem_id; ++ uint16_t subsystem_vendor_id; ++ uint8_t revision_id; ++ uint8_t autoneg; ++ uint8_t mdix; ++ uint8_t forced_speed_duplex; ++ uint8_t wait_autoneg_complete; ++ uint8_t dma_fairness; ++ uint8_t mac_addr[NODE_ADDRESS_SIZE]; ++ uint8_t perm_mac_addr[NODE_ADDRESS_SIZE]; ++ boolean_t disable_polarity_correction; ++ boolean_t speed_downgraded; ++ e1000_smart_speed smart_speed; ++ e1000_dsp_config dsp_config_state; ++ boolean_t get_link_status; ++ boolean_t serdes_link_down; ++ boolean_t tbi_compatibility_en; ++ boolean_t tbi_compatibility_on; ++ boolean_t laa_is_present; ++ boolean_t phy_reset_disable; ++ boolean_t fc_send_xon; ++ boolean_t fc_strict_ieee; ++ boolean_t report_tx_early; ++ boolean_t adaptive_ifs; ++ boolean_t ifs_params_forced; ++ boolean_t in_ifs_mode; ++ boolean_t mng_reg_access_disabled; ++ boolean_t leave_av_bit_off; ++ boolean_t kmrn_lock_loss_workaround_disabled; ++}; ++ ++ ++#define E1000_EEPROM_SWDPIN0 0x0001 /* SWDPIN 0 EEPROM Value */ ++#define E1000_EEPROM_LED_LOGIC 0x0020 /* Led Logic Word */ ++#define E1000_EEPROM_RW_REG_DATA 16 /* Offset to data in EEPROM read/write registers */ ++#define E1000_EEPROM_RW_REG_DONE 2 /* Offset to READ/WRITE done bit */ ++#define E1000_EEPROM_RW_REG_START 1 /* First bit for telling part to start operation */ ++#define E1000_EEPROM_RW_ADDR_SHIFT 2 /* Shift to the address bits */ ++#define E1000_EEPROM_POLL_WRITE 1 /* Flag for polling for write complete */ ++#define E1000_EEPROM_POLL_READ 0 /* Flag for polling for read complete */ ++/* Register Bit Masks */ ++/* Device Control */ ++#define E1000_CTRL_FD 0x00000001 /* Full duplex.0=half; 1=full */ ++#define E1000_CTRL_BEM 0x00000002 /* Endian Mode.0=little,1=big */ ++#define E1000_CTRL_PRIOR 0x00000004 /* Priority on PCI. 0=rx,1=fair */ ++#define E1000_CTRL_GIO_MASTER_DISABLE 0x00000004 /*Blocks new Master requests */ ++#define E1000_CTRL_LRST 0x00000008 /* Link reset. 0=normal,1=reset */ ++#define E1000_CTRL_TME 0x00000010 /* Test mode. 0=normal,1=test */ ++#define E1000_CTRL_SLE 0x00000020 /* Serial Link on 0=dis,1=en */ ++#define E1000_CTRL_ASDE 0x00000020 /* Auto-speed detect enable */ ++#define E1000_CTRL_SLU 0x00000040 /* Set link up (Force Link) */ ++#define E1000_CTRL_ILOS 0x00000080 /* Invert Loss-Of Signal */ ++#define E1000_CTRL_SPD_SEL 0x00000300 /* Speed Select Mask */ ++#define E1000_CTRL_SPD_10 0x00000000 /* Force 10Mb */ ++#define E1000_CTRL_SPD_100 0x00000100 /* Force 100Mb */ ++#define E1000_CTRL_SPD_1000 0x00000200 /* Force 1Gb */ ++#define E1000_CTRL_BEM32 0x00000400 /* Big Endian 32 mode */ ++#define E1000_CTRL_FRCSPD 0x00000800 /* Force Speed */ ++#define E1000_CTRL_FRCDPX 0x00001000 /* Force Duplex */ ++#define E1000_CTRL_D_UD_EN 0x00002000 /* Dock/Undock enable */ ++#define E1000_CTRL_D_UD_POLARITY 0x00004000 /* Defined polarity of Dock/Undock indication in SDP[0] */ ++#define E1000_CTRL_FORCE_PHY_RESET 0x00008000 /* Reset both PHY ports, through PHYRST_N pin */ ++#define E1000_CTRL_EXT_LINK_EN 0x00010000 /* enable link status from external LINK_0 and LINK_1 pins */ ++#define E1000_CTRL_SWDPIN0 0x00040000 /* SWDPIN 0 value */ ++#define E1000_CTRL_SWDPIN1 0x00080000 /* SWDPIN 1 value */ ++#define E1000_CTRL_SWDPIN2 0x00100000 /* SWDPIN 2 value */ ++#define E1000_CTRL_SWDPIN3 0x00200000 /* SWDPIN 3 value */ ++#define E1000_CTRL_SWDPIO0 0x00400000 /* SWDPIN 0 Input or output */ ++#define E1000_CTRL_SWDPIO1 0x00800000 /* SWDPIN 1 input or output */ ++#define E1000_CTRL_SWDPIO2 0x01000000 /* SWDPIN 2 input or output */ ++#define E1000_CTRL_SWDPIO3 0x02000000 /* SWDPIN 3 input or output */ ++#define E1000_CTRL_RST 0x04000000 /* Global reset */ ++#define E1000_CTRL_RFCE 0x08000000 /* Receive Flow Control enable */ ++#define E1000_CTRL_TFCE 0x10000000 /* Transmit flow control enable */ ++#define E1000_CTRL_RTE 0x20000000 /* Routing tag enable */ ++#define E1000_CTRL_VME 0x40000000 /* IEEE VLAN mode enable */ ++#define E1000_CTRL_PHY_RST 0x80000000 /* PHY Reset */ ++#define E1000_CTRL_SW2FW_INT 0x02000000 /* Initiate an interrupt to manageability engine */ ++ ++/* Device Status */ ++#define E1000_STATUS_FD 0x00000001 /* Full duplex.0=half,1=full */ ++#define E1000_STATUS_LU 0x00000002 /* Link up.0=no,1=link */ ++#define E1000_STATUS_FUNC_MASK 0x0000000C /* PCI Function Mask */ ++#define E1000_STATUS_FUNC_SHIFT 2 ++#define E1000_STATUS_FUNC_0 0x00000000 /* Function 0 */ ++#define E1000_STATUS_FUNC_1 0x00000004 /* Function 1 */ ++#define E1000_STATUS_TXOFF 0x00000010 /* transmission paused */ ++#define E1000_STATUS_TBIMODE 0x00000020 /* TBI mode */ ++#define E1000_STATUS_SPEED_MASK 0x000000C0 ++#define E1000_STATUS_SPEED_10 0x00000000 /* Speed 10Mb/s */ ++#define E1000_STATUS_SPEED_100 0x00000040 /* Speed 100Mb/s */ ++#define E1000_STATUS_SPEED_1000 0x00000080 /* Speed 1000Mb/s */ ++#define E1000_STATUS_LAN_INIT_DONE 0x00000200 /* Lan Init Completion ++ by EEPROM/Flash */ ++#define E1000_STATUS_ASDV 0x00000300 /* Auto speed detect value */ ++#define E1000_STATUS_DOCK_CI 0x00000800 /* Change in Dock/Undock state. Clear on write '0'. */ ++#define E1000_STATUS_GIO_MASTER_ENABLE 0x00080000 /* Status of Master requests. */ ++#define E1000_STATUS_MTXCKOK 0x00000400 /* MTX clock running OK */ ++#define E1000_STATUS_PCI66 0x00000800 /* In 66Mhz slot */ ++#define E1000_STATUS_BUS64 0x00001000 /* In 64 bit slot */ ++#define E1000_STATUS_PCIX_MODE 0x00002000 /* PCI-X mode */ ++#define E1000_STATUS_PCIX_SPEED 0x0000C000 /* PCI-X bus speed */ ++#define E1000_STATUS_BMC_SKU_0 0x00100000 /* BMC USB redirect disabled */ ++#define E1000_STATUS_BMC_SKU_1 0x00200000 /* BMC SRAM disabled */ ++#define E1000_STATUS_BMC_SKU_2 0x00400000 /* BMC SDRAM disabled */ ++#define E1000_STATUS_BMC_CRYPTO 0x00800000 /* BMC crypto disabled */ ++#define E1000_STATUS_BMC_LITE 0x01000000 /* BMC external code execution disabled */ ++#define E1000_STATUS_RGMII_ENABLE 0x02000000 /* RGMII disabled */ ++#define E1000_STATUS_FUSE_8 0x04000000 ++#define E1000_STATUS_FUSE_9 0x08000000 ++#define E1000_STATUS_SERDES0_DIS 0x10000000 /* SERDES disabled on port 0 */ ++#define E1000_STATUS_SERDES1_DIS 0x20000000 /* SERDES disabled on port 1 */ ++ ++/* Constants used to intrepret the masked PCI-X bus speed. */ ++#define E1000_STATUS_PCIX_SPEED_66 0x00000000 /* PCI-X bus speed 50-66 MHz */ ++#define E1000_STATUS_PCIX_SPEED_100 0x00004000 /* PCI-X bus speed 66-100 MHz */ ++#define E1000_STATUS_PCIX_SPEED_133 0x00008000 /* PCI-X bus speed 100-133 MHz */ ++ ++/* EEPROM/Flash Control */ ++#define E1000_EECD_SK 0x00000001 /* EEPROM Clock */ ++#define E1000_EECD_CS 0x00000002 /* EEPROM Chip Select */ ++#define E1000_EECD_DI 0x00000004 /* EEPROM Data In */ ++#define E1000_EECD_DO 0x00000008 /* EEPROM Data Out */ ++#define E1000_EECD_FWE_MASK 0x00000030 ++#define E1000_EECD_FWE_DIS 0x00000010 /* Disable FLASH writes */ ++#define E1000_EECD_FWE_EN 0x00000020 /* Enable FLASH writes */ ++#define E1000_EECD_FWE_SHIFT 4 ++#define E1000_EECD_REQ 0x00000040 /* EEPROM Access Request */ ++#define E1000_EECD_GNT 0x00000080 /* EEPROM Access Grant */ ++#define E1000_EECD_PRES 0x00000100 /* EEPROM Present */ ++#define E1000_EECD_SIZE 0x00000200 /* EEPROM Size (0=64 word 1=256 word) */ ++#define E1000_EECD_ADDR_BITS 0x00000400 /* EEPROM Addressing bits based on type ++ * (0-small, 1-large) */ ++#define E1000_EECD_TYPE 0x00002000 /* EEPROM Type (1-SPI, 0-Microwire) */ ++#ifndef E1000_EEPROM_GRANT_ATTEMPTS ++#define E1000_EEPROM_GRANT_ATTEMPTS 1000 /* EEPROM # attempts to gain grant */ ++#endif ++#define E1000_EECD_AUTO_RD 0x00000200 /* EEPROM Auto Read done */ ++#define E1000_EECD_SIZE_EX_MASK 0x00007800 /* EEprom Size */ ++#define E1000_EECD_SIZE_EX_SHIFT 11 ++#define E1000_EECD_NVADDS 0x00018000 /* NVM Address Size */ ++#define E1000_EECD_SELSHAD 0x00020000 /* Select Shadow RAM */ ++#define E1000_EECD_INITSRAM 0x00040000 /* Initialize Shadow RAM */ ++#define E1000_EECD_FLUPD 0x00080000 /* Update FLASH */ ++#define E1000_EECD_AUPDEN 0x00100000 /* Enable Autonomous FLASH update */ ++#define E1000_EECD_SHADV 0x00200000 /* Shadow RAM Data Valid */ ++#define E1000_EECD_SEC1VAL 0x00400000 /* Sector One Valid */ ++#define E1000_EECD_SECVAL_SHIFT 22 ++#define E1000_STM_OPCODE 0xDB00 ++#define E1000_HICR_FW_RESET 0xC0 ++ ++#define E1000_SHADOW_RAM_WORDS 2048 ++#define E1000_ICH8_NVM_SIG_WORD 0x13 ++#define E1000_ICH8_NVM_SIG_MASK 0xC0 ++ ++/* EEPROM Read */ ++#define E1000_EERD_START 0x00000001 /* Start Read */ ++#define E1000_EERD_DONE 0x00000010 /* Read Done */ ++#define E1000_EERD_ADDR_SHIFT 8 ++#define E1000_EERD_ADDR_MASK 0x0000FF00 /* Read Address */ ++#define E1000_EERD_DATA_SHIFT 16 ++#define E1000_EERD_DATA_MASK 0xFFFF0000 /* Read Data */ ++ ++/* SPI EEPROM Status Register */ ++#define EEPROM_STATUS_RDY_SPI 0x01 ++#define EEPROM_STATUS_WEN_SPI 0x02 ++#define EEPROM_STATUS_BP0_SPI 0x04 ++#define EEPROM_STATUS_BP1_SPI 0x08 ++#define EEPROM_STATUS_WPEN_SPI 0x80 ++ ++/* Extended Device Control */ ++#define E1000_CTRL_EXT_GPI0_EN 0x00000001 /* Maps SDP4 to GPI0 */ ++#define E1000_CTRL_EXT_GPI1_EN 0x00000002 /* Maps SDP5 to GPI1 */ ++#define E1000_CTRL_EXT_PHYINT_EN E1000_CTRL_EXT_GPI1_EN ++#define E1000_CTRL_EXT_GPI2_EN 0x00000004 /* Maps SDP6 to GPI2 */ ++#define E1000_CTRL_EXT_GPI3_EN 0x00000008 /* Maps SDP7 to GPI3 */ ++#define E1000_CTRL_EXT_SDP4_DATA 0x00000010 /* Value of SW Defineable Pin 4 */ ++#define E1000_CTRL_EXT_SDP5_DATA 0x00000020 /* Value of SW Defineable Pin 5 */ ++#define E1000_CTRL_EXT_PHY_INT E1000_CTRL_EXT_SDP5_DATA ++#define E1000_CTRL_EXT_SDP6_DATA 0x00000040 /* Value of SW Defineable Pin 6 */ ++#define E1000_CTRL_EXT_SDP7_DATA 0x00000080 /* Value of SW Defineable Pin 7 */ ++#define E1000_CTRL_EXT_SDP4_DIR 0x00000100 /* Direction of SDP4 0=in 1=out */ ++#define E1000_CTRL_EXT_SDP5_DIR 0x00000200 /* Direction of SDP5 0=in 1=out */ ++#define E1000_CTRL_EXT_SDP6_DIR 0x00000400 /* Direction of SDP6 0=in 1=out */ ++#define E1000_CTRL_EXT_SDP7_DIR 0x00000800 /* Direction of SDP7 0=in 1=out */ ++#define E1000_CTRL_EXT_ASDCHK 0x00001000 /* Initiate an ASD sequence */ ++#define E1000_CTRL_EXT_EE_RST 0x00002000 /* Reinitialize from EEPROM */ ++#define E1000_CTRL_EXT_IPS 0x00004000 /* Invert Power State */ ++#define E1000_CTRL_EXT_SPD_BYPS 0x00008000 /* Speed Select Bypass */ ++#define E1000_CTRL_EXT_RO_DIS 0x00020000 /* Relaxed Ordering disable */ ++#define E1000_CTRL_EXT_LINK_MODE_MASK 0x00C00000 ++#define E1000_CTRL_EXT_LINK_MODE_GMII 0x00000000 ++#define E1000_CTRL_EXT_LINK_MODE_TBI 0x00C00000 ++#define E1000_CTRL_EXT_LINK_MODE_KMRN 0x00000000 ++#define E1000_CTRL_EXT_LINK_MODE_SERDES 0x00C00000 ++#define E1000_CTRL_EXT_WR_WMARK_MASK 0x03000000 ++#define E1000_CTRL_EXT_WR_WMARK_256 0x00000000 ++#define E1000_CTRL_EXT_WR_WMARK_320 0x01000000 ++#define E1000_CTRL_EXT_WR_WMARK_384 0x02000000 ++#define E1000_CTRL_EXT_WR_WMARK_448 0x03000000 ++#define E1000_CTRL_EXT_DRV_LOAD 0x10000000 /* Driver loaded bit for FW */ ++#define E1000_CTRL_EXT_IAME 0x08000000 /* Interrupt acknowledge Auto-mask */ ++#define E1000_CTRL_EXT_INT_TIMER_CLR 0x20000000 /* Clear Interrupt timers after IMS clear */ ++#define E1000_CRTL_EXT_PB_PAREN 0x01000000 /* packet buffer parity error detection enabled */ ++#define E1000_CTRL_EXT_DF_PAREN 0x02000000 /* descriptor FIFO parity error detection enable */ ++#define E1000_CTRL_EXT_GHOST_PAREN 0x40000000 ++ ++/* MDI Control */ ++#define E1000_MDIC_DATA_MASK 0x0000FFFF ++#define E1000_MDIC_REG_MASK 0x001F0000 ++#define E1000_MDIC_REG_SHIFT 16 ++#define E1000_MDIC_PHY_MASK 0x03E00000 ++#define E1000_MDIC_PHY_SHIFT 21 ++#define E1000_MDIC_OP_WRITE 0x04000000 ++#define E1000_MDIC_OP_READ 0x08000000 ++#define E1000_MDIC_READY 0x10000000 ++#define E1000_MDIC_INT_EN 0x20000000 ++#define E1000_MDIC_ERROR 0x40000000 ++ ++#define E1000_KUMCTRLSTA_MASK 0x0000FFFF ++#define E1000_KUMCTRLSTA_OFFSET 0x001F0000 ++#define E1000_KUMCTRLSTA_OFFSET_SHIFT 16 ++#define E1000_KUMCTRLSTA_REN 0x00200000 ++ ++#define E1000_KUMCTRLSTA_OFFSET_FIFO_CTRL 0x00000000 ++#define E1000_KUMCTRLSTA_OFFSET_CTRL 0x00000001 ++#define E1000_KUMCTRLSTA_OFFSET_INB_CTRL 0x00000002 ++#define E1000_KUMCTRLSTA_OFFSET_DIAG 0x00000003 ++#define E1000_KUMCTRLSTA_OFFSET_TIMEOUTS 0x00000004 ++#define E1000_KUMCTRLSTA_OFFSET_INB_PARAM 0x00000009 ++#define E1000_KUMCTRLSTA_OFFSET_HD_CTRL 0x00000010 ++#define E1000_KUMCTRLSTA_OFFSET_M2P_SERDES 0x0000001E ++#define E1000_KUMCTRLSTA_OFFSET_M2P_MODES 0x0000001F ++ ++/* FIFO Control */ ++#define E1000_KUMCTRLSTA_FIFO_CTRL_RX_BYPASS 0x00000008 ++#define E1000_KUMCTRLSTA_FIFO_CTRL_TX_BYPASS 0x00000800 ++ ++/* In-Band Control */ ++#define E1000_KUMCTRLSTA_INB_CTRL_LINK_STATUS_TX_TIMEOUT_DEFAULT 0x00000500 ++#define E1000_KUMCTRLSTA_INB_CTRL_DIS_PADDING 0x00000010 ++ ++/* Half-Duplex Control */ ++#define E1000_KUMCTRLSTA_HD_CTRL_10_100_DEFAULT 0x00000004 ++#define E1000_KUMCTRLSTA_HD_CTRL_1000_DEFAULT 0x00000000 ++ ++#define E1000_KUMCTRLSTA_OFFSET_K0S_CTRL 0x0000001E ++ ++#define E1000_KUMCTRLSTA_DIAG_FELPBK 0x2000 ++#define E1000_KUMCTRLSTA_DIAG_NELPBK 0x1000 ++ ++#define E1000_KUMCTRLSTA_K0S_100_EN 0x2000 ++#define E1000_KUMCTRLSTA_K0S_GBE_EN 0x1000 ++#define E1000_KUMCTRLSTA_K0S_ENTRY_LATENCY_MASK 0x0003 ++ ++#define E1000_KABGTXD_BGSQLBIAS 0x00050000 ++ ++#define E1000_PHY_CTRL_SPD_EN 0x00000001 ++#define E1000_PHY_CTRL_D0A_LPLU 0x00000002 ++#define E1000_PHY_CTRL_NOND0A_LPLU 0x00000004 ++#define E1000_PHY_CTRL_NOND0A_GBE_DISABLE 0x00000008 ++#define E1000_PHY_CTRL_GBE_DISABLE 0x00000040 ++#define E1000_PHY_CTRL_B2B_EN 0x00000080 ++ ++/* LED Control */ ++#define E1000_LEDCTL_LED0_MODE_MASK 0x0000000F ++#define E1000_LEDCTL_LED0_MODE_SHIFT 0 ++#define E1000_LEDCTL_LED0_BLINK_RATE 0x0000020 ++#define E1000_LEDCTL_LED0_IVRT 0x00000040 ++#define E1000_LEDCTL_LED0_BLINK 0x00000080 ++#define E1000_LEDCTL_LED1_MODE_MASK 0x00000F00 ++#define E1000_LEDCTL_LED1_MODE_SHIFT 8 ++#define E1000_LEDCTL_LED1_BLINK_RATE 0x0002000 ++#define E1000_LEDCTL_LED1_IVRT 0x00004000 ++#define E1000_LEDCTL_LED1_BLINK 0x00008000 ++#define E1000_LEDCTL_LED2_MODE_MASK 0x000F0000 ++#define E1000_LEDCTL_LED2_MODE_SHIFT 16 ++#define E1000_LEDCTL_LED2_BLINK_RATE 0x00200000 ++#define E1000_LEDCTL_LED2_IVRT 0x00400000 ++#define E1000_LEDCTL_LED2_BLINK 0x00800000 ++#define E1000_LEDCTL_LED3_MODE_MASK 0x0F000000 ++#define E1000_LEDCTL_LED3_MODE_SHIFT 24 ++#define E1000_LEDCTL_LED3_BLINK_RATE 0x20000000 ++#define E1000_LEDCTL_LED3_IVRT 0x40000000 ++#define E1000_LEDCTL_LED3_BLINK 0x80000000 ++ ++#define E1000_LEDCTL_MODE_LINK_10_1000 0x0 ++#define E1000_LEDCTL_MODE_LINK_100_1000 0x1 ++#define E1000_LEDCTL_MODE_LINK_UP 0x2 ++#define E1000_LEDCTL_MODE_ACTIVITY 0x3 ++#define E1000_LEDCTL_MODE_LINK_ACTIVITY 0x4 ++#define E1000_LEDCTL_MODE_LINK_10 0x5 ++#define E1000_LEDCTL_MODE_LINK_100 0x6 ++#define E1000_LEDCTL_MODE_LINK_1000 0x7 ++#define E1000_LEDCTL_MODE_PCIX_MODE 0x8 ++#define E1000_LEDCTL_MODE_FULL_DUPLEX 0x9 ++#define E1000_LEDCTL_MODE_COLLISION 0xA ++#define E1000_LEDCTL_MODE_BUS_SPEED 0xB ++#define E1000_LEDCTL_MODE_BUS_SIZE 0xC ++#define E1000_LEDCTL_MODE_PAUSED 0xD ++#define E1000_LEDCTL_MODE_LED_ON 0xE ++#define E1000_LEDCTL_MODE_LED_OFF 0xF ++ ++/* Receive Address */ ++#define E1000_RAH_AV 0x80000000 /* Receive descriptor valid */ ++ ++/* Interrupt Cause Read */ ++#define E1000_ICR_TXDW 0x00000001 /* Transmit desc written back */ ++#define E1000_ICR_TXQE 0x00000002 /* Transmit Queue empty */ ++#define E1000_ICR_LSC 0x00000004 /* Link Status Change */ ++#define E1000_ICR_RXSEQ 0x00000008 /* rx sequence error */ ++#define E1000_ICR_RXDMT0 0x00000010 /* rx desc min. threshold (0) */ ++#define E1000_ICR_RXO 0x00000040 /* rx overrun */ ++#define E1000_ICR_RXT0 0x00000080 /* rx timer intr (ring 0) */ ++#define E1000_ICR_MDAC 0x00000200 /* MDIO access complete */ ++#define E1000_ICR_RXCFG 0x00000400 /* RX /c/ ordered set */ ++#define E1000_ICR_GPI_EN0 0x00000800 /* GP Int 0 */ ++#define E1000_ICR_GPI_EN1 0x00001000 /* GP Int 1 */ ++#define E1000_ICR_GPI_EN2 0x00002000 /* GP Int 2 */ ++#define E1000_ICR_GPI_EN3 0x00004000 /* GP Int 3 */ ++#define E1000_ICR_TXD_LOW 0x00008000 ++#define E1000_ICR_SRPD 0x00010000 ++#define E1000_ICR_ACK 0x00020000 /* Receive Ack frame */ ++#define E1000_ICR_MNG 0x00040000 /* Manageability event */ ++#define E1000_ICR_DOCK 0x00080000 /* Dock/Undock */ ++#define E1000_ICR_INT_ASSERTED 0x80000000 /* If this bit asserted, the driver should claim the interrupt */ ++#define E1000_ICR_RXD_FIFO_PAR0 0x00100000 /* queue 0 Rx descriptor FIFO parity error */ ++#define E1000_ICR_TXD_FIFO_PAR0 0x00200000 /* queue 0 Tx descriptor FIFO parity error */ ++#define E1000_ICR_HOST_ARB_PAR 0x00400000 /* host arb read buffer parity error */ ++#define E1000_ICR_PB_PAR 0x00800000 /* packet buffer parity error */ ++#define E1000_ICR_RXD_FIFO_PAR1 0x01000000 /* queue 1 Rx descriptor FIFO parity error */ ++#define E1000_ICR_TXD_FIFO_PAR1 0x02000000 /* queue 1 Tx descriptor FIFO parity error */ ++#define E1000_ICR_ALL_PARITY 0x03F00000 /* all parity error bits */ ++#define E1000_ICR_DSW 0x00000020 /* FW changed the status of DISSW bit in the FWSM */ ++#define E1000_ICR_PHYINT 0x00001000 /* LAN connected device generates an interrupt */ ++#define E1000_ICR_EPRST 0x00100000 /* ME handware reset occurs */ ++ ++/* Interrupt Cause Set */ ++#define E1000_ICS_TXDW E1000_ICR_TXDW /* Transmit desc written back */ ++#define E1000_ICS_TXQE E1000_ICR_TXQE /* Transmit Queue empty */ ++#define E1000_ICS_LSC E1000_ICR_LSC /* Link Status Change */ ++#define E1000_ICS_RXSEQ E1000_ICR_RXSEQ /* rx sequence error */ ++#define E1000_ICS_RXDMT0 E1000_ICR_RXDMT0 /* rx desc min. threshold */ ++#define E1000_ICS_RXO E1000_ICR_RXO /* rx overrun */ ++#define E1000_ICS_RXT0 E1000_ICR_RXT0 /* rx timer intr */ ++#define E1000_ICS_MDAC E1000_ICR_MDAC /* MDIO access complete */ ++#define E1000_ICS_RXCFG E1000_ICR_RXCFG /* RX /c/ ordered set */ ++#define E1000_ICS_GPI_EN0 E1000_ICR_GPI_EN0 /* GP Int 0 */ ++#define E1000_ICS_GPI_EN1 E1000_ICR_GPI_EN1 /* GP Int 1 */ ++#define E1000_ICS_GPI_EN2 E1000_ICR_GPI_EN2 /* GP Int 2 */ ++#define E1000_ICS_GPI_EN3 E1000_ICR_GPI_EN3 /* GP Int 3 */ ++#define E1000_ICS_TXD_LOW E1000_ICR_TXD_LOW ++#define E1000_ICS_SRPD E1000_ICR_SRPD ++#define E1000_ICS_ACK E1000_ICR_ACK /* Receive Ack frame */ ++#define E1000_ICS_MNG E1000_ICR_MNG /* Manageability event */ ++#define E1000_ICS_DOCK E1000_ICR_DOCK /* Dock/Undock */ ++#define E1000_ICS_RXD_FIFO_PAR0 E1000_ICR_RXD_FIFO_PAR0 /* queue 0 Rx descriptor FIFO parity error */ ++#define E1000_ICS_TXD_FIFO_PAR0 E1000_ICR_TXD_FIFO_PAR0 /* queue 0 Tx descriptor FIFO parity error */ ++#define E1000_ICS_HOST_ARB_PAR E1000_ICR_HOST_ARB_PAR /* host arb read buffer parity error */ ++#define E1000_ICS_PB_PAR E1000_ICR_PB_PAR /* packet buffer parity error */ ++#define E1000_ICS_RXD_FIFO_PAR1 E1000_ICR_RXD_FIFO_PAR1 /* queue 1 Rx descriptor FIFO parity error */ ++#define E1000_ICS_TXD_FIFO_PAR1 E1000_ICR_TXD_FIFO_PAR1 /* queue 1 Tx descriptor FIFO parity error */ ++#define E1000_ICS_DSW E1000_ICR_DSW ++#define E1000_ICS_PHYINT E1000_ICR_PHYINT ++#define E1000_ICS_EPRST E1000_ICR_EPRST ++ ++/* Interrupt Mask Set */ ++#define E1000_IMS_TXDW E1000_ICR_TXDW /* Transmit desc written back */ ++#define E1000_IMS_TXQE E1000_ICR_TXQE /* Transmit Queue empty */ ++#define E1000_IMS_LSC E1000_ICR_LSC /* Link Status Change */ ++#define E1000_IMS_RXSEQ E1000_ICR_RXSEQ /* rx sequence error */ ++#define E1000_IMS_RXDMT0 E1000_ICR_RXDMT0 /* rx desc min. threshold */ ++#define E1000_IMS_RXO E1000_ICR_RXO /* rx overrun */ ++#define E1000_IMS_RXT0 E1000_ICR_RXT0 /* rx timer intr */ ++#define E1000_IMS_MDAC E1000_ICR_MDAC /* MDIO access complete */ ++#define E1000_IMS_RXCFG E1000_ICR_RXCFG /* RX /c/ ordered set */ ++#define E1000_IMS_GPI_EN0 E1000_ICR_GPI_EN0 /* GP Int 0 */ ++#define E1000_IMS_GPI_EN1 E1000_ICR_GPI_EN1 /* GP Int 1 */ ++#define E1000_IMS_GPI_EN2 E1000_ICR_GPI_EN2 /* GP Int 2 */ ++#define E1000_IMS_GPI_EN3 E1000_ICR_GPI_EN3 /* GP Int 3 */ ++#define E1000_IMS_TXD_LOW E1000_ICR_TXD_LOW ++#define E1000_IMS_SRPD E1000_ICR_SRPD ++#define E1000_IMS_ACK E1000_ICR_ACK /* Receive Ack frame */ ++#define E1000_IMS_MNG E1000_ICR_MNG /* Manageability event */ ++#define E1000_IMS_DOCK E1000_ICR_DOCK /* Dock/Undock */ ++#define E1000_IMS_RXD_FIFO_PAR0 E1000_ICR_RXD_FIFO_PAR0 /* queue 0 Rx descriptor FIFO parity error */ ++#define E1000_IMS_TXD_FIFO_PAR0 E1000_ICR_TXD_FIFO_PAR0 /* queue 0 Tx descriptor FIFO parity error */ ++#define E1000_IMS_HOST_ARB_PAR E1000_ICR_HOST_ARB_PAR /* host arb read buffer parity error */ ++#define E1000_IMS_PB_PAR E1000_ICR_PB_PAR /* packet buffer parity error */ ++#define E1000_IMS_RXD_FIFO_PAR1 E1000_ICR_RXD_FIFO_PAR1 /* queue 1 Rx descriptor FIFO parity error */ ++#define E1000_IMS_TXD_FIFO_PAR1 E1000_ICR_TXD_FIFO_PAR1 /* queue 1 Tx descriptor FIFO parity error */ ++#define E1000_IMS_DSW E1000_ICR_DSW ++#define E1000_IMS_PHYINT E1000_ICR_PHYINT ++#define E1000_IMS_EPRST E1000_ICR_EPRST ++ ++/* Interrupt Mask Clear */ ++#define E1000_IMC_TXDW E1000_ICR_TXDW /* Transmit desc written back */ ++#define E1000_IMC_TXQE E1000_ICR_TXQE /* Transmit Queue empty */ ++#define E1000_IMC_LSC E1000_ICR_LSC /* Link Status Change */ ++#define E1000_IMC_RXSEQ E1000_ICR_RXSEQ /* rx sequence error */ ++#define E1000_IMC_RXDMT0 E1000_ICR_RXDMT0 /* rx desc min. threshold */ ++#define E1000_IMC_RXO E1000_ICR_RXO /* rx overrun */ ++#define E1000_IMC_RXT0 E1000_ICR_RXT0 /* rx timer intr */ ++#define E1000_IMC_MDAC E1000_ICR_MDAC /* MDIO access complete */ ++#define E1000_IMC_RXCFG E1000_ICR_RXCFG /* RX /c/ ordered set */ ++#define E1000_IMC_GPI_EN0 E1000_ICR_GPI_EN0 /* GP Int 0 */ ++#define E1000_IMC_GPI_EN1 E1000_ICR_GPI_EN1 /* GP Int 1 */ ++#define E1000_IMC_GPI_EN2 E1000_ICR_GPI_EN2 /* GP Int 2 */ ++#define E1000_IMC_GPI_EN3 E1000_ICR_GPI_EN3 /* GP Int 3 */ ++#define E1000_IMC_TXD_LOW E1000_ICR_TXD_LOW ++#define E1000_IMC_SRPD E1000_ICR_SRPD ++#define E1000_IMC_ACK E1000_ICR_ACK /* Receive Ack frame */ ++#define E1000_IMC_MNG E1000_ICR_MNG /* Manageability event */ ++#define E1000_IMC_DOCK E1000_ICR_DOCK /* Dock/Undock */ ++#define E1000_IMC_RXD_FIFO_PAR0 E1000_ICR_RXD_FIFO_PAR0 /* queue 0 Rx descriptor FIFO parity error */ ++#define E1000_IMC_TXD_FIFO_PAR0 E1000_ICR_TXD_FIFO_PAR0 /* queue 0 Tx descriptor FIFO parity error */ ++#define E1000_IMC_HOST_ARB_PAR E1000_ICR_HOST_ARB_PAR /* host arb read buffer parity error */ ++#define E1000_IMC_PB_PAR E1000_ICR_PB_PAR /* packet buffer parity error */ ++#define E1000_IMC_RXD_FIFO_PAR1 E1000_ICR_RXD_FIFO_PAR1 /* queue 1 Rx descriptor FIFO parity error */ ++#define E1000_IMC_TXD_FIFO_PAR1 E1000_ICR_TXD_FIFO_PAR1 /* queue 1 Tx descriptor FIFO parity error */ ++#define E1000_IMC_DSW E1000_ICR_DSW ++#define E1000_IMC_PHYINT E1000_ICR_PHYINT ++#define E1000_IMC_EPRST E1000_ICR_EPRST ++ ++/* Receive Control */ ++#define E1000_RCTL_RST 0x00000001 /* Software reset */ ++#define E1000_RCTL_EN 0x00000002 /* enable */ ++#define E1000_RCTL_SBP 0x00000004 /* store bad packet */ ++#define E1000_RCTL_UPE 0x00000008 /* unicast promiscuous enable */ ++#define E1000_RCTL_MPE 0x00000010 /* multicast promiscuous enab */ ++#define E1000_RCTL_LPE 0x00000020 /* long packet enable */ ++#define E1000_RCTL_LBM_NO 0x00000000 /* no loopback mode */ ++#define E1000_RCTL_LBM_MAC 0x00000040 /* MAC loopback mode */ ++#define E1000_RCTL_LBM_SLP 0x00000080 /* serial link loopback mode */ ++#define E1000_RCTL_LBM_TCVR 0x000000C0 /* tcvr loopback mode */ ++#define E1000_RCTL_DTYP_MASK 0x00000C00 /* Descriptor type mask */ ++#define E1000_RCTL_DTYP_PS 0x00000400 /* Packet Split descriptor */ ++#define E1000_RCTL_RDMTS_HALF 0x00000000 /* rx desc min threshold size */ ++#define E1000_RCTL_RDMTS_QUAT 0x00000100 /* rx desc min threshold size */ ++#define E1000_RCTL_RDMTS_EIGTH 0x00000200 /* rx desc min threshold size */ ++#define E1000_RCTL_MO_SHIFT 12 /* multicast offset shift */ ++#define E1000_RCTL_MO_0 0x00000000 /* multicast offset 11:0 */ ++#define E1000_RCTL_MO_1 0x00001000 /* multicast offset 12:1 */ ++#define E1000_RCTL_MO_2 0x00002000 /* multicast offset 13:2 */ ++#define E1000_RCTL_MO_3 0x00003000 /* multicast offset 15:4 */ ++#define E1000_RCTL_MDR 0x00004000 /* multicast desc ring 0 */ ++#define E1000_RCTL_BAM 0x00008000 /* broadcast enable */ ++/* these buffer sizes are valid if E1000_RCTL_BSEX is 0 */ ++#define E1000_RCTL_SZ_2048 0x00000000 /* rx buffer size 2048 */ ++#define E1000_RCTL_SZ_1024 0x00010000 /* rx buffer size 1024 */ ++#define E1000_RCTL_SZ_512 0x00020000 /* rx buffer size 512 */ ++#define E1000_RCTL_SZ_256 0x00030000 /* rx buffer size 256 */ ++/* these buffer sizes are valid if E1000_RCTL_BSEX is 1 */ ++#define E1000_RCTL_SZ_16384 0x00010000 /* rx buffer size 16384 */ ++#define E1000_RCTL_SZ_8192 0x00020000 /* rx buffer size 8192 */ ++#define E1000_RCTL_SZ_4096 0x00030000 /* rx buffer size 4096 */ ++#define E1000_RCTL_VFE 0x00040000 /* vlan filter enable */ ++#define E1000_RCTL_CFIEN 0x00080000 /* canonical form enable */ ++#define E1000_RCTL_CFI 0x00100000 /* canonical form indicator */ ++#define E1000_RCTL_DPF 0x00400000 /* discard pause frames */ ++#define E1000_RCTL_PMCF 0x00800000 /* pass MAC control frames */ ++#define E1000_RCTL_BSEX 0x02000000 /* Buffer size extension */ ++#define E1000_RCTL_SECRC 0x04000000 /* Strip Ethernet CRC */ ++#define E1000_RCTL_FLXBUF_MASK 0x78000000 /* Flexible buffer size */ ++#define E1000_RCTL_FLXBUF_SHIFT 27 /* Flexible buffer shift */ ++ ++/* Use byte values for the following shift parameters ++ * Usage: ++ * psrctl |= (((ROUNDUP(value0, 128) >> E1000_PSRCTL_BSIZE0_SHIFT) & ++ * E1000_PSRCTL_BSIZE0_MASK) | ++ * ((ROUNDUP(value1, 1024) >> E1000_PSRCTL_BSIZE1_SHIFT) & ++ * E1000_PSRCTL_BSIZE1_MASK) | ++ * ((ROUNDUP(value2, 1024) << E1000_PSRCTL_BSIZE2_SHIFT) & ++ * E1000_PSRCTL_BSIZE2_MASK) | ++ * ((ROUNDUP(value3, 1024) << E1000_PSRCTL_BSIZE3_SHIFT) |; ++ * E1000_PSRCTL_BSIZE3_MASK)) ++ * where value0 = [128..16256], default=256 ++ * value1 = [1024..64512], default=4096 ++ * value2 = [0..64512], default=4096 ++ * value3 = [0..64512], default=0 ++ */ ++ ++#define E1000_PSRCTL_BSIZE0_MASK 0x0000007F ++#define E1000_PSRCTL_BSIZE1_MASK 0x00003F00 ++#define E1000_PSRCTL_BSIZE2_MASK 0x003F0000 ++#define E1000_PSRCTL_BSIZE3_MASK 0x3F000000 ++ ++#define E1000_PSRCTL_BSIZE0_SHIFT 7 /* Shift _right_ 7 */ ++#define E1000_PSRCTL_BSIZE1_SHIFT 2 /* Shift _right_ 2 */ ++#define E1000_PSRCTL_BSIZE2_SHIFT 6 /* Shift _left_ 6 */ ++#define E1000_PSRCTL_BSIZE3_SHIFT 14 /* Shift _left_ 14 */ ++ ++/* SW_W_SYNC definitions */ ++#define E1000_SWFW_EEP_SM 0x0001 ++#define E1000_SWFW_PHY0_SM 0x0002 ++#define E1000_SWFW_PHY1_SM 0x0004 ++#define E1000_SWFW_MAC_CSR_SM 0x0008 ++ ++/* Receive Descriptor */ ++#define E1000_RDT_DELAY 0x0000ffff /* Delay timer (1=1024us) */ ++#define E1000_RDT_FPDB 0x80000000 /* Flush descriptor block */ ++#define E1000_RDLEN_LEN 0x0007ff80 /* descriptor length */ ++#define E1000_RDH_RDH 0x0000ffff /* receive descriptor head */ ++#define E1000_RDT_RDT 0x0000ffff /* receive descriptor tail */ ++ ++/* Flow Control */ ++#define E1000_FCRTH_RTH 0x0000FFF8 /* Mask Bits[15:3] for RTH */ ++#define E1000_FCRTH_XFCE 0x80000000 /* External Flow Control Enable */ ++#define E1000_FCRTL_RTL 0x0000FFF8 /* Mask Bits[15:3] for RTL */ ++#define E1000_FCRTL_XONE 0x80000000 /* Enable XON frame transmission */ ++ ++/* Header split receive */ ++#define E1000_RFCTL_ISCSI_DIS 0x00000001 ++#define E1000_RFCTL_ISCSI_DWC_MASK 0x0000003E ++#define E1000_RFCTL_ISCSI_DWC_SHIFT 1 ++#define E1000_RFCTL_NFSW_DIS 0x00000040 ++#define E1000_RFCTL_NFSR_DIS 0x00000080 ++#define E1000_RFCTL_NFS_VER_MASK 0x00000300 ++#define E1000_RFCTL_NFS_VER_SHIFT 8 ++#define E1000_RFCTL_IPV6_DIS 0x00000400 ++#define E1000_RFCTL_IPV6_XSUM_DIS 0x00000800 ++#define E1000_RFCTL_ACK_DIS 0x00001000 ++#define E1000_RFCTL_ACKD_DIS 0x00002000 ++#define E1000_RFCTL_IPFRSP_DIS 0x00004000 ++#define E1000_RFCTL_EXTEN 0x00008000 ++#define E1000_RFCTL_IPV6_EX_DIS 0x00010000 ++#define E1000_RFCTL_NEW_IPV6_EXT_DIS 0x00020000 ++ ++/* Receive Descriptor Control */ ++#define E1000_RXDCTL_PTHRESH 0x0000003F /* RXDCTL Prefetch Threshold */ ++#define E1000_RXDCTL_HTHRESH 0x00003F00 /* RXDCTL Host Threshold */ ++#define E1000_RXDCTL_WTHRESH 0x003F0000 /* RXDCTL Writeback Threshold */ ++#define E1000_RXDCTL_GRAN 0x01000000 /* RXDCTL Granularity */ ++ ++/* Transmit Descriptor Control */ ++#define E1000_TXDCTL_PTHRESH 0x000000FF /* TXDCTL Prefetch Threshold */ ++#define E1000_TXDCTL_HTHRESH 0x0000FF00 /* TXDCTL Host Threshold */ ++#define E1000_TXDCTL_WTHRESH 0x00FF0000 /* TXDCTL Writeback Threshold */ ++#define E1000_TXDCTL_GRAN 0x01000000 /* TXDCTL Granularity */ ++#define E1000_TXDCTL_LWTHRESH 0xFE000000 /* TXDCTL Low Threshold */ ++#define E1000_TXDCTL_FULL_TX_DESC_WB 0x01010000 /* GRAN=1, WTHRESH=1 */ ++#define E1000_TXDCTL_COUNT_DESC 0x00400000 /* Enable the counting of desc. ++ still to be processed. */ ++/* Transmit Configuration Word */ ++#define E1000_TXCW_FD 0x00000020 /* TXCW full duplex */ ++#define E1000_TXCW_HD 0x00000040 /* TXCW half duplex */ ++#define E1000_TXCW_PAUSE 0x00000080 /* TXCW sym pause request */ ++#define E1000_TXCW_ASM_DIR 0x00000100 /* TXCW astm pause direction */ ++#define E1000_TXCW_PAUSE_MASK 0x00000180 /* TXCW pause request mask */ ++#define E1000_TXCW_RF 0x00003000 /* TXCW remote fault */ ++#define E1000_TXCW_NP 0x00008000 /* TXCW next page */ ++#define E1000_TXCW_CW 0x0000ffff /* TxConfigWord mask */ ++#define E1000_TXCW_TXC 0x40000000 /* Transmit Config control */ ++#define E1000_TXCW_ANE 0x80000000 /* Auto-neg enable */ ++ ++/* Receive Configuration Word */ ++#define E1000_RXCW_CW 0x0000ffff /* RxConfigWord mask */ ++#define E1000_RXCW_NC 0x04000000 /* Receive config no carrier */ ++#define E1000_RXCW_IV 0x08000000 /* Receive config invalid */ ++#define E1000_RXCW_CC 0x10000000 /* Receive config change */ ++#define E1000_RXCW_C 0x20000000 /* Receive config */ ++#define E1000_RXCW_SYNCH 0x40000000 /* Receive config synch */ ++#define E1000_RXCW_ANC 0x80000000 /* Auto-neg complete */ ++ ++/* Transmit Control */ ++#define E1000_TCTL_RST 0x00000001 /* software reset */ ++#define E1000_TCTL_EN 0x00000002 /* enable tx */ ++#define E1000_TCTL_BCE 0x00000004 /* busy check enable */ ++#define E1000_TCTL_PSP 0x00000008 /* pad short packets */ ++#define E1000_TCTL_CT 0x00000ff0 /* collision threshold */ ++#define E1000_TCTL_COLD 0x003ff000 /* collision distance */ ++#define E1000_TCTL_SWXOFF 0x00400000 /* SW Xoff transmission */ ++#define E1000_TCTL_PBE 0x00800000 /* Packet Burst Enable */ ++#define E1000_TCTL_RTLC 0x01000000 /* Re-transmit on late collision */ ++#define E1000_TCTL_NRTU 0x02000000 /* No Re-transmit on underrun */ ++#define E1000_TCTL_MULR 0x10000000 /* Multiple request support */ ++/* Extended Transmit Control */ ++#define E1000_TCTL_EXT_BST_MASK 0x000003FF /* Backoff Slot Time */ ++#define E1000_TCTL_EXT_GCEX_MASK 0x000FFC00 /* Gigabit Carry Extend Padding */ ++ ++#define DEFAULT_80003ES2LAN_TCTL_EXT_GCEX 0x00010000 ++ ++/* Receive Checksum Control */ ++#define E1000_RXCSUM_PCSS_MASK 0x000000FF /* Packet Checksum Start */ ++#define E1000_RXCSUM_IPOFL 0x00000100 /* IPv4 checksum offload */ ++#define E1000_RXCSUM_TUOFL 0x00000200 /* TCP / UDP checksum offload */ ++#define E1000_RXCSUM_IPV6OFL 0x00000400 /* IPv6 checksum offload */ ++#define E1000_RXCSUM_IPPCSE 0x00001000 /* IP payload checksum enable */ ++#define E1000_RXCSUM_PCSD 0x00002000 /* packet checksum disabled */ ++ ++/* Multiple Receive Queue Control */ ++#define E1000_MRQC_ENABLE_MASK 0x00000003 ++#define E1000_MRQC_ENABLE_RSS_2Q 0x00000001 ++#define E1000_MRQC_ENABLE_RSS_INT 0x00000004 ++#define E1000_MRQC_RSS_FIELD_MASK 0xFFFF0000 ++#define E1000_MRQC_RSS_FIELD_IPV4_TCP 0x00010000 ++#define E1000_MRQC_RSS_FIELD_IPV4 0x00020000 ++#define E1000_MRQC_RSS_FIELD_IPV6_TCP_EX 0x00040000 ++#define E1000_MRQC_RSS_FIELD_IPV6_EX 0x00080000 ++#define E1000_MRQC_RSS_FIELD_IPV6 0x00100000 ++#define E1000_MRQC_RSS_FIELD_IPV6_TCP 0x00200000 ++ ++/* Definitions for power management and wakeup registers */ ++/* Wake Up Control */ ++#define E1000_WUC_APME 0x00000001 /* APM Enable */ ++#define E1000_WUC_PME_EN 0x00000002 /* PME Enable */ ++#define E1000_WUC_PME_STATUS 0x00000004 /* PME Status */ ++#define E1000_WUC_APMPME 0x00000008 /* Assert PME on APM Wakeup */ ++#define E1000_WUC_SPM 0x80000000 /* Enable SPM */ ++ ++/* Wake Up Filter Control */ ++#define E1000_WUFC_LNKC 0x00000001 /* Link Status Change Wakeup Enable */ ++#define E1000_WUFC_MAG 0x00000002 /* Magic Packet Wakeup Enable */ ++#define E1000_WUFC_EX 0x00000004 /* Directed Exact Wakeup Enable */ ++#define E1000_WUFC_MC 0x00000008 /* Directed Multicast Wakeup Enable */ ++#define E1000_WUFC_BC 0x00000010 /* Broadcast Wakeup Enable */ ++#define E1000_WUFC_ARP 0x00000020 /* ARP Request Packet Wakeup Enable */ ++#define E1000_WUFC_IPV4 0x00000040 /* Directed IPv4 Packet Wakeup Enable */ ++#define E1000_WUFC_IPV6 0x00000080 /* Directed IPv6 Packet Wakeup Enable */ ++#define E1000_WUFC_IGNORE_TCO 0x00008000 /* Ignore WakeOn TCO packets */ ++#define E1000_WUFC_FLX0 0x00010000 /* Flexible Filter 0 Enable */ ++#define E1000_WUFC_FLX1 0x00020000 /* Flexible Filter 1 Enable */ ++#define E1000_WUFC_FLX2 0x00040000 /* Flexible Filter 2 Enable */ ++#define E1000_WUFC_FLX3 0x00080000 /* Flexible Filter 3 Enable */ ++#define E1000_WUFC_ALL_FILTERS 0x000F00FF /* Mask for all wakeup filters */ ++#define E1000_WUFC_FLX_OFFSET 16 /* Offset to the Flexible Filters bits */ ++#define E1000_WUFC_FLX_FILTERS 0x000F0000 /* Mask for the 4 flexible filters */ ++ ++/* Wake Up Status */ ++#define E1000_WUS_LNKC 0x00000001 /* Link Status Changed */ ++#define E1000_WUS_MAG 0x00000002 /* Magic Packet Received */ ++#define E1000_WUS_EX 0x00000004 /* Directed Exact Received */ ++#define E1000_WUS_MC 0x00000008 /* Directed Multicast Received */ ++#define E1000_WUS_BC 0x00000010 /* Broadcast Received */ ++#define E1000_WUS_ARP 0x00000020 /* ARP Request Packet Received */ ++#define E1000_WUS_IPV4 0x00000040 /* Directed IPv4 Packet Wakeup Received */ ++#define E1000_WUS_IPV6 0x00000080 /* Directed IPv6 Packet Wakeup Received */ ++#define E1000_WUS_FLX0 0x00010000 /* Flexible Filter 0 Match */ ++#define E1000_WUS_FLX1 0x00020000 /* Flexible Filter 1 Match */ ++#define E1000_WUS_FLX2 0x00040000 /* Flexible Filter 2 Match */ ++#define E1000_WUS_FLX3 0x00080000 /* Flexible Filter 3 Match */ ++#define E1000_WUS_FLX_FILTERS 0x000F0000 /* Mask for the 4 flexible filters */ ++ ++/* Management Control */ ++#define E1000_MANC_SMBUS_EN 0x00000001 /* SMBus Enabled - RO */ ++#define E1000_MANC_ASF_EN 0x00000002 /* ASF Enabled - RO */ ++#define E1000_MANC_R_ON_FORCE 0x00000004 /* Reset on Force TCO - RO */ ++#define E1000_MANC_RMCP_EN 0x00000100 /* Enable RCMP 026Fh Filtering */ ++#define E1000_MANC_0298_EN 0x00000200 /* Enable RCMP 0298h Filtering */ ++#define E1000_MANC_IPV4_EN 0x00000400 /* Enable IPv4 */ ++#define E1000_MANC_IPV6_EN 0x00000800 /* Enable IPv6 */ ++#define E1000_MANC_SNAP_EN 0x00001000 /* Accept LLC/SNAP */ ++#define E1000_MANC_ARP_EN 0x00002000 /* Enable ARP Request Filtering */ ++#define E1000_MANC_NEIGHBOR_EN 0x00004000 /* Enable Neighbor Discovery ++ * Filtering */ ++#define E1000_MANC_ARP_RES_EN 0x00008000 /* Enable ARP response Filtering */ ++#define E1000_MANC_TCO_RESET 0x00010000 /* TCO Reset Occurred */ ++#define E1000_MANC_RCV_TCO_EN 0x00020000 /* Receive TCO Packets Enabled */ ++#define E1000_MANC_REPORT_STATUS 0x00040000 /* Status Reporting Enabled */ ++#define E1000_MANC_RCV_ALL 0x00080000 /* Receive All Enabled */ ++#define E1000_MANC_BLK_PHY_RST_ON_IDE 0x00040000 /* Block phy resets */ ++#define E1000_MANC_EN_MAC_ADDR_FILTER 0x00100000 /* Enable MAC address ++ * filtering */ ++#define E1000_MANC_EN_MNG2HOST 0x00200000 /* Enable MNG packets to host ++ * memory */ ++#define E1000_MANC_EN_IP_ADDR_FILTER 0x00400000 /* Enable IP address ++ * filtering */ ++#define E1000_MANC_EN_XSUM_FILTER 0x00800000 /* Enable checksum filtering */ ++#define E1000_MANC_BR_EN 0x01000000 /* Enable broadcast filtering */ ++#define E1000_MANC_SMB_REQ 0x01000000 /* SMBus Request */ ++#define E1000_MANC_SMB_GNT 0x02000000 /* SMBus Grant */ ++#define E1000_MANC_SMB_CLK_IN 0x04000000 /* SMBus Clock In */ ++#define E1000_MANC_SMB_DATA_IN 0x08000000 /* SMBus Data In */ ++#define E1000_MANC_SMB_DATA_OUT 0x10000000 /* SMBus Data Out */ ++#define E1000_MANC_SMB_CLK_OUT 0x20000000 /* SMBus Clock Out */ ++ ++#define E1000_MANC_SMB_DATA_OUT_SHIFT 28 /* SMBus Data Out Shift */ ++#define E1000_MANC_SMB_CLK_OUT_SHIFT 29 /* SMBus Clock Out Shift */ ++ ++/* SW Semaphore Register */ ++#define E1000_SWSM_SMBI 0x00000001 /* Driver Semaphore bit */ ++#define E1000_SWSM_SWESMBI 0x00000002 /* FW Semaphore bit */ ++#define E1000_SWSM_WMNG 0x00000004 /* Wake MNG Clock */ ++#define E1000_SWSM_DRV_LOAD 0x00000008 /* Driver Loaded Bit */ ++ ++/* FW Semaphore Register */ ++#define E1000_FWSM_MODE_MASK 0x0000000E /* FW mode */ ++#define E1000_FWSM_MODE_SHIFT 1 ++#define E1000_FWSM_FW_VALID 0x00008000 /* FW established a valid mode */ ++ ++#define E1000_FWSM_RSPCIPHY 0x00000040 /* Reset PHY on PCI reset */ ++#define E1000_FWSM_DISSW 0x10000000 /* FW disable SW Write Access */ ++#define E1000_FWSM_SKUSEL_MASK 0x60000000 /* LAN SKU select */ ++#define E1000_FWSM_SKUEL_SHIFT 29 ++#define E1000_FWSM_SKUSEL_EMB 0x0 /* Embedded SKU */ ++#define E1000_FWSM_SKUSEL_CONS 0x1 /* Consumer SKU */ ++#define E1000_FWSM_SKUSEL_PERF_100 0x2 /* Perf & Corp 10/100 SKU */ ++#define E1000_FWSM_SKUSEL_PERF_GBE 0x3 /* Perf & Copr GbE SKU */ ++ ++/* FFLT Debug Register */ ++#define E1000_FFLT_DBG_INVC 0x00100000 /* Invalid /C/ code handling */ ++ ++typedef enum { ++ e1000_mng_mode_none = 0, ++ e1000_mng_mode_asf, ++ e1000_mng_mode_pt, ++ e1000_mng_mode_ipmi, ++ e1000_mng_mode_host_interface_only ++} e1000_mng_mode; ++ ++/* Host Inteface Control Register */ ++#define E1000_HICR_EN 0x00000001 /* Enable Bit - RO */ ++#define E1000_HICR_C 0x00000002 /* Driver sets this bit when done ++ * to put command in RAM */ ++#define E1000_HICR_SV 0x00000004 /* Status Validity */ ++#define E1000_HICR_FWR 0x00000080 /* FW reset. Set by the Host */ ++ ++/* Host Interface Command Interface - Address range 0x8800-0x8EFF */ ++#define E1000_HI_MAX_DATA_LENGTH 252 /* Host Interface data length */ ++#define E1000_HI_MAX_BLOCK_BYTE_LENGTH 1792 /* Number of bytes in range */ ++#define E1000_HI_MAX_BLOCK_DWORD_LENGTH 448 /* Number of dwords in range */ ++#define E1000_HI_COMMAND_TIMEOUT 500 /* Time in ms to process HI command */ ++ ++struct e1000_host_command_header { ++ uint8_t command_id; ++ uint8_t command_length; ++ uint8_t command_options; /* I/F bits for command, status for return */ ++ uint8_t checksum; ++}; ++struct e1000_host_command_info { ++ struct e1000_host_command_header command_header; /* Command Head/Command Result Head has 4 bytes */ ++ uint8_t command_data[E1000_HI_MAX_DATA_LENGTH]; /* Command data can length 0..252 */ ++}; ++ ++/* Host SMB register #0 */ ++#define E1000_HSMC0R_CLKIN 0x00000001 /* SMB Clock in */ ++#define E1000_HSMC0R_DATAIN 0x00000002 /* SMB Data in */ ++#define E1000_HSMC0R_DATAOUT 0x00000004 /* SMB Data out */ ++#define E1000_HSMC0R_CLKOUT 0x00000008 /* SMB Clock out */ ++ ++/* Host SMB register #1 */ ++#define E1000_HSMC1R_CLKIN E1000_HSMC0R_CLKIN ++#define E1000_HSMC1R_DATAIN E1000_HSMC0R_DATAIN ++#define E1000_HSMC1R_DATAOUT E1000_HSMC0R_DATAOUT ++#define E1000_HSMC1R_CLKOUT E1000_HSMC0R_CLKOUT ++ ++/* FW Status Register */ ++#define E1000_FWSTS_FWS_MASK 0x000000FF /* FW Status */ ++ ++/* Wake Up Packet Length */ ++#define E1000_WUPL_LENGTH_MASK 0x0FFF /* Only the lower 12 bits are valid */ ++ ++#define E1000_MDALIGN 4096 ++ ++/* PCI-Ex registers*/ ++ ++/* PCI-Ex Control Register */ ++#define E1000_GCR_RXD_NO_SNOOP 0x00000001 ++#define E1000_GCR_RXDSCW_NO_SNOOP 0x00000002 ++#define E1000_GCR_RXDSCR_NO_SNOOP 0x00000004 ++#define E1000_GCR_TXD_NO_SNOOP 0x00000008 ++#define E1000_GCR_TXDSCW_NO_SNOOP 0x00000010 ++#define E1000_GCR_TXDSCR_NO_SNOOP 0x00000020 ++ ++#define PCI_EX_NO_SNOOP_ALL (E1000_GCR_RXD_NO_SNOOP | \ ++ E1000_GCR_RXDSCW_NO_SNOOP | \ ++ E1000_GCR_RXDSCR_NO_SNOOP | \ ++ E1000_GCR_TXD_NO_SNOOP | \ ++ E1000_GCR_TXDSCW_NO_SNOOP | \ ++ E1000_GCR_TXDSCR_NO_SNOOP) ++ ++#define PCI_EX_82566_SNOOP_ALL PCI_EX_NO_SNOOP_ALL ++ ++#define E1000_GCR_L1_ACT_WITHOUT_L0S_RX 0x08000000 ++/* Function Active and Power State to MNG */ ++#define E1000_FACTPS_FUNC0_POWER_STATE_MASK 0x00000003 ++#define E1000_FACTPS_LAN0_VALID 0x00000004 ++#define E1000_FACTPS_FUNC0_AUX_EN 0x00000008 ++#define E1000_FACTPS_FUNC1_POWER_STATE_MASK 0x000000C0 ++#define E1000_FACTPS_FUNC1_POWER_STATE_SHIFT 6 ++#define E1000_FACTPS_LAN1_VALID 0x00000100 ++#define E1000_FACTPS_FUNC1_AUX_EN 0x00000200 ++#define E1000_FACTPS_FUNC2_POWER_STATE_MASK 0x00003000 ++#define E1000_FACTPS_FUNC2_POWER_STATE_SHIFT 12 ++#define E1000_FACTPS_IDE_ENABLE 0x00004000 ++#define E1000_FACTPS_FUNC2_AUX_EN 0x00008000 ++#define E1000_FACTPS_FUNC3_POWER_STATE_MASK 0x000C0000 ++#define E1000_FACTPS_FUNC3_POWER_STATE_SHIFT 18 ++#define E1000_FACTPS_SP_ENABLE 0x00100000 ++#define E1000_FACTPS_FUNC3_AUX_EN 0x00200000 ++#define E1000_FACTPS_FUNC4_POWER_STATE_MASK 0x03000000 ++#define E1000_FACTPS_FUNC4_POWER_STATE_SHIFT 24 ++#define E1000_FACTPS_IPMI_ENABLE 0x04000000 ++#define E1000_FACTPS_FUNC4_AUX_EN 0x08000000 ++#define E1000_FACTPS_MNGCG 0x20000000 ++#define E1000_FACTPS_LAN_FUNC_SEL 0x40000000 ++#define E1000_FACTPS_PM_STATE_CHANGED 0x80000000 ++ ++/* EEPROM Commands - Microwire */ ++#define EEPROM_READ_OPCODE_MICROWIRE 0x6 /* EEPROM read opcode */ ++#define EEPROM_WRITE_OPCODE_MICROWIRE 0x5 /* EEPROM write opcode */ ++#define EEPROM_ERASE_OPCODE_MICROWIRE 0x7 /* EEPROM erase opcode */ ++#define EEPROM_EWEN_OPCODE_MICROWIRE 0x13 /* EEPROM erase/write enable */ ++#define EEPROM_EWDS_OPCODE_MICROWIRE 0x10 /* EEPROM erast/write disable */ ++ ++/* EEPROM Commands - SPI */ ++#define EEPROM_MAX_RETRY_SPI 5000 /* Max wait of 5ms, for RDY signal */ ++#define EEPROM_READ_OPCODE_SPI 0x03 /* EEPROM read opcode */ ++#define EEPROM_WRITE_OPCODE_SPI 0x02 /* EEPROM write opcode */ ++#define EEPROM_A8_OPCODE_SPI 0x08 /* opcode bit-3 = address bit-8 */ ++#define EEPROM_WREN_OPCODE_SPI 0x06 /* EEPROM set Write Enable latch */ ++#define EEPROM_WRDI_OPCODE_SPI 0x04 /* EEPROM reset Write Enable latch */ ++#define EEPROM_RDSR_OPCODE_SPI 0x05 /* EEPROM read Status register */ ++#define EEPROM_WRSR_OPCODE_SPI 0x01 /* EEPROM write Status register */ ++#define EEPROM_ERASE4K_OPCODE_SPI 0x20 /* EEPROM ERASE 4KB */ ++#define EEPROM_ERASE64K_OPCODE_SPI 0xD8 /* EEPROM ERASE 64KB */ ++#define EEPROM_ERASE256_OPCODE_SPI 0xDB /* EEPROM ERASE 256B */ ++ ++/* EEPROM Size definitions */ ++#define EEPROM_WORD_SIZE_SHIFT 6 ++#define EEPROM_SIZE_SHIFT 10 ++#define EEPROM_SIZE_MASK 0x1C00 ++ ++/* EEPROM Word Offsets */ ++#define EEPROM_COMPAT 0x0003 ++#define EEPROM_ID_LED_SETTINGS 0x0004 ++#define EEPROM_VERSION 0x0005 ++#define EEPROM_SERDES_AMPLITUDE 0x0006 /* For SERDES output amplitude adjustment. */ ++#define EEPROM_PHY_CLASS_WORD 0x0007 ++#define EEPROM_INIT_CONTROL1_REG 0x000A ++#define EEPROM_INIT_CONTROL2_REG 0x000F ++#define EEPROM_SWDEF_PINS_CTRL_PORT_1 0x0010 ++#define EEPROM_INIT_CONTROL3_PORT_B 0x0014 ++#define EEPROM_INIT_3GIO_3 0x001A ++#define EEPROM_SWDEF_PINS_CTRL_PORT_0 0x0020 ++#define EEPROM_INIT_CONTROL3_PORT_A 0x0024 ++#define EEPROM_CFG 0x0012 ++#define EEPROM_FLASH_VERSION 0x0032 ++#define EEPROM_CHECKSUM_REG 0x003F ++ ++#define E1000_EEPROM_CFG_DONE 0x00040000 /* MNG config cycle done */ ++#define E1000_EEPROM_CFG_DONE_PORT_1 0x00080000 /* ...for second port */ ++ ++/* Word definitions for ID LED Settings */ ++#define ID_LED_RESERVED_0000 0x0000 ++#define ID_LED_RESERVED_FFFF 0xFFFF ++#define ID_LED_RESERVED_82573 0xF746 ++#define ID_LED_DEFAULT_82573 0x1811 ++#define ID_LED_DEFAULT ((ID_LED_OFF1_ON2 << 12) | \ ++ (ID_LED_OFF1_OFF2 << 8) | \ ++ (ID_LED_DEF1_DEF2 << 4) | \ ++ (ID_LED_DEF1_DEF2)) ++#define ID_LED_DEFAULT_ICH8LAN ((ID_LED_DEF1_DEF2 << 12) | \ ++ (ID_LED_DEF1_OFF2 << 8) | \ ++ (ID_LED_DEF1_ON2 << 4) | \ ++ (ID_LED_DEF1_DEF2)) ++#define ID_LED_DEF1_DEF2 0x1 ++#define ID_LED_DEF1_ON2 0x2 ++#define ID_LED_DEF1_OFF2 0x3 ++#define ID_LED_ON1_DEF2 0x4 ++#define ID_LED_ON1_ON2 0x5 ++#define ID_LED_ON1_OFF2 0x6 ++#define ID_LED_OFF1_DEF2 0x7 ++#define ID_LED_OFF1_ON2 0x8 ++#define ID_LED_OFF1_OFF2 0x9 ++ ++#define IGP_ACTIVITY_LED_MASK 0xFFFFF0FF ++#define IGP_ACTIVITY_LED_ENABLE 0x0300 ++#define IGP_LED3_MODE 0x07000000 ++ ++ ++/* Mask bits for SERDES amplitude adjustment in Word 6 of the EEPROM */ ++#define EEPROM_SERDES_AMPLITUDE_MASK 0x000F ++ ++/* Mask bit for PHY class in Word 7 of the EEPROM */ ++#define EEPROM_PHY_CLASS_A 0x8000 ++ ++/* Mask bits for fields in Word 0x0a of the EEPROM */ ++#define EEPROM_WORD0A_ILOS 0x0010 ++#define EEPROM_WORD0A_SWDPIO 0x01E0 ++#define EEPROM_WORD0A_LRST 0x0200 ++#define EEPROM_WORD0A_FD 0x0400 ++#define EEPROM_WORD0A_66MHZ 0x0800 ++ ++/* Mask bits for fields in Word 0x0f of the EEPROM */ ++#define EEPROM_WORD0F_PAUSE_MASK 0x3000 ++#define EEPROM_WORD0F_PAUSE 0x1000 ++#define EEPROM_WORD0F_ASM_DIR 0x2000 ++#define EEPROM_WORD0F_ANE 0x0800 ++#define EEPROM_WORD0F_SWPDIO_EXT 0x00F0 ++#define EEPROM_WORD0F_LPLU 0x0001 ++ ++/* Mask bits for fields in Word 0x10/0x20 of the EEPROM */ ++#define EEPROM_WORD1020_GIGA_DISABLE 0x0010 ++#define EEPROM_WORD1020_GIGA_DISABLE_NON_D0A 0x0008 ++ ++/* Mask bits for fields in Word 0x1a of the EEPROM */ ++#define EEPROM_WORD1A_ASPM_MASK 0x000C ++ ++/* For checksumming, the sum of all words in the EEPROM should equal 0xBABA. */ ++#define EEPROM_SUM 0xBABA ++ ++/* EEPROM Map defines (WORD OFFSETS)*/ ++#define EEPROM_NODE_ADDRESS_BYTE_0 0 ++#define EEPROM_PBA_BYTE_1 8 ++ ++#define EEPROM_RESERVED_WORD 0xFFFF ++ ++/* EEPROM Map Sizes (Byte Counts) */ ++#define PBA_SIZE 4 ++ ++/* Collision related configuration parameters */ ++#define E1000_COLLISION_THRESHOLD 15 ++#define E1000_CT_SHIFT 4 ++/* Collision distance is a 0-based value that applies to ++ * half-duplex-capable hardware only. */ ++#define E1000_COLLISION_DISTANCE 63 ++#define E1000_COLLISION_DISTANCE_82542 64 ++#define E1000_FDX_COLLISION_DISTANCE E1000_COLLISION_DISTANCE ++#define E1000_HDX_COLLISION_DISTANCE E1000_COLLISION_DISTANCE ++#define E1000_COLD_SHIFT 12 ++ ++/* Number of Transmit and Receive Descriptors must be a multiple of 8 */ ++#define REQ_TX_DESCRIPTOR_MULTIPLE 8 ++#define REQ_RX_DESCRIPTOR_MULTIPLE 8 ++ ++/* Default values for the transmit IPG register */ ++#define DEFAULT_82542_TIPG_IPGT 10 ++#define DEFAULT_82543_TIPG_IPGT_FIBER 9 ++#define DEFAULT_82543_TIPG_IPGT_COPPER 8 ++ ++#define E1000_TIPG_IPGT_MASK 0x000003FF ++#define E1000_TIPG_IPGR1_MASK 0x000FFC00 ++#define E1000_TIPG_IPGR2_MASK 0x3FF00000 ++ ++#define DEFAULT_82542_TIPG_IPGR1 2 ++#define DEFAULT_82543_TIPG_IPGR1 8 ++#define E1000_TIPG_IPGR1_SHIFT 10 ++ ++#define DEFAULT_82542_TIPG_IPGR2 10 ++#define DEFAULT_82543_TIPG_IPGR2 6 ++#define DEFAULT_80003ES2LAN_TIPG_IPGR2 7 ++#define E1000_TIPG_IPGR2_SHIFT 20 ++ ++#define DEFAULT_80003ES2LAN_TIPG_IPGT_10_100 0x00000009 ++#define DEFAULT_80003ES2LAN_TIPG_IPGT_1000 0x00000008 ++#define E1000_TXDMAC_DPP 0x00000001 ++ ++/* Adaptive IFS defines */ ++#define TX_THRESHOLD_START 8 ++#define TX_THRESHOLD_INCREMENT 10 ++#define TX_THRESHOLD_DECREMENT 1 ++#define TX_THRESHOLD_STOP 190 ++#define TX_THRESHOLD_DISABLE 0 ++#define TX_THRESHOLD_TIMER_MS 10000 ++#define MIN_NUM_XMITS 1000 ++#define IFS_MAX 80 ++#define IFS_STEP 10 ++#define IFS_MIN 40 ++#define IFS_RATIO 4 ++ ++/* Extended Configuration Control and Size */ ++#define E1000_EXTCNF_CTRL_PCIE_WRITE_ENABLE 0x00000001 ++#define E1000_EXTCNF_CTRL_PHY_WRITE_ENABLE 0x00000002 ++#define E1000_EXTCNF_CTRL_D_UD_ENABLE 0x00000004 ++#define E1000_EXTCNF_CTRL_D_UD_LATENCY 0x00000008 ++#define E1000_EXTCNF_CTRL_D_UD_OWNER 0x00000010 ++#define E1000_EXTCNF_CTRL_MDIO_SW_OWNERSHIP 0x00000020 ++#define E1000_EXTCNF_CTRL_MDIO_HW_OWNERSHIP 0x00000040 ++#define E1000_EXTCNF_CTRL_EXT_CNF_POINTER 0x0FFF0000 ++ ++#define E1000_EXTCNF_SIZE_EXT_PHY_LENGTH 0x000000FF ++#define E1000_EXTCNF_SIZE_EXT_DOCK_LENGTH 0x0000FF00 ++#define E1000_EXTCNF_SIZE_EXT_PCIE_LENGTH 0x00FF0000 ++#define E1000_EXTCNF_CTRL_LCD_WRITE_ENABLE 0x00000001 ++#define E1000_EXTCNF_CTRL_SWFLAG 0x00000020 ++ ++/* PBA constants */ ++#define E1000_PBA_8K 0x0008 /* 8KB, default Rx allocation */ ++#define E1000_PBA_12K 0x000C /* 12KB, default Rx allocation */ ++#define E1000_PBA_16K 0x0010 /* 16KB, default TX allocation */ ++#define E1000_PBA_22K 0x0016 ++#define E1000_PBA_24K 0x0018 ++#define E1000_PBA_30K 0x001E ++#define E1000_PBA_32K 0x0020 ++#define E1000_PBA_34K 0x0022 ++#define E1000_PBA_38K 0x0026 ++#define E1000_PBA_40K 0x0028 ++#define E1000_PBA_48K 0x0030 /* 48KB, default RX allocation */ ++ ++#define E1000_PBS_16K E1000_PBA_16K ++ ++/* Flow Control Constants */ ++#define FLOW_CONTROL_ADDRESS_LOW 0x00C28001 ++#define FLOW_CONTROL_ADDRESS_HIGH 0x00000100 ++#define FLOW_CONTROL_TYPE 0x8808 ++ ++/* The historical defaults for the flow control values are given below. */ ++#define FC_DEFAULT_HI_THRESH (0x8000) /* 32KB */ ++#define FC_DEFAULT_LO_THRESH (0x4000) /* 16KB */ ++#define FC_DEFAULT_TX_TIMER (0x100) /* ~130 us */ ++ ++/* PCIX Config space */ ++#define PCIX_COMMAND_REGISTER 0xE6 ++#define PCIX_STATUS_REGISTER_LO 0xE8 ++#define PCIX_STATUS_REGISTER_HI 0xEA ++ ++#define PCIX_COMMAND_MMRBC_MASK 0x000C ++#define PCIX_COMMAND_MMRBC_SHIFT 0x2 ++#define PCIX_STATUS_HI_MMRBC_MASK 0x0060 ++#define PCIX_STATUS_HI_MMRBC_SHIFT 0x5 ++#define PCIX_STATUS_HI_MMRBC_4K 0x3 ++#define PCIX_STATUS_HI_MMRBC_2K 0x2 ++ ++ ++/* Number of bits required to shift right the "pause" bits from the ++ * EEPROM (bits 13:12) to the "pause" (bits 8:7) field in the TXCW register. ++ */ ++#define PAUSE_SHIFT 5 ++ ++/* Number of bits required to shift left the "SWDPIO" bits from the ++ * EEPROM (bits 8:5) to the "SWDPIO" (bits 25:22) field in the CTRL register. ++ */ ++#define SWDPIO_SHIFT 17 ++ ++/* Number of bits required to shift left the "SWDPIO_EXT" bits from the ++ * EEPROM word F (bits 7:4) to the bits 11:8 of The Extended CTRL register. ++ */ ++#define SWDPIO__EXT_SHIFT 4 ++ ++/* Number of bits required to shift left the "ILOS" bit from the EEPROM ++ * (bit 4) to the "ILOS" (bit 7) field in the CTRL register. ++ */ ++#define ILOS_SHIFT 3 ++ ++ ++#define RECEIVE_BUFFER_ALIGN_SIZE (256) ++ ++/* Number of milliseconds we wait for auto-negotiation to complete */ ++#define LINK_UP_TIMEOUT 500 ++ ++/* Number of 100 microseconds we wait for PCI Express master disable */ ++#define MASTER_DISABLE_TIMEOUT 800 ++/* Number of milliseconds we wait for Eeprom auto read bit done after MAC reset */ ++#define AUTO_READ_DONE_TIMEOUT 10 ++/* Number of milliseconds we wait for PHY configuration done after MAC reset */ ++#define PHY_CFG_TIMEOUT 100 ++ ++#define E1000_TX_BUFFER_SIZE ((uint32_t)1514) ++ ++/* The carrier extension symbol, as received by the NIC. */ ++#define CARRIER_EXTENSION 0x0F ++ ++/* TBI_ACCEPT macro definition: ++ * ++ * This macro requires: ++ * adapter = a pointer to struct e1000_hw ++ * status = the 8 bit status field of the RX descriptor with EOP set ++ * error = the 8 bit error field of the RX descriptor with EOP set ++ * length = the sum of all the length fields of the RX descriptors that ++ * make up the current frame ++ * last_byte = the last byte of the frame DMAed by the hardware ++ * max_frame_length = the maximum frame length we want to accept. ++ * min_frame_length = the minimum frame length we want to accept. ++ * ++ * This macro is a conditional that should be used in the interrupt ++ * handler's Rx processing routine when RxErrors have been detected. ++ * ++ * Typical use: ++ * ... ++ * if (TBI_ACCEPT) { ++ * accept_frame = TRUE; ++ * e1000_tbi_adjust_stats(adapter, MacAddress); ++ * frame_length--; ++ * } else { ++ * accept_frame = FALSE; ++ * } ++ * ... ++ */ ++ ++#define TBI_ACCEPT(adapter, status, errors, length, last_byte) \ ++ ((adapter)->tbi_compatibility_on && \ ++ (((errors) & E1000_RXD_ERR_FRAME_ERR_MASK) == E1000_RXD_ERR_CE) && \ ++ ((last_byte) == CARRIER_EXTENSION) && \ ++ (((status) & E1000_RXD_STAT_VP) ? \ ++ (((length) > ((adapter)->min_frame_size - VLAN_TAG_SIZE)) && \ ++ ((length) <= ((adapter)->max_frame_size + 1))) : \ ++ (((length) > (adapter)->min_frame_size) && \ ++ ((length) <= ((adapter)->max_frame_size + VLAN_TAG_SIZE + 1))))) ++ ++ ++/* Structures, enums, and macros for the PHY */ ++ ++/* Bit definitions for the Management Data IO (MDIO) and Management Data ++ * Clock (MDC) pins in the Device Control Register. ++ */ ++#define E1000_CTRL_PHY_RESET_DIR E1000_CTRL_SWDPIO0 ++#define E1000_CTRL_PHY_RESET E1000_CTRL_SWDPIN0 ++#define E1000_CTRL_MDIO_DIR E1000_CTRL_SWDPIO2 ++#define E1000_CTRL_MDIO E1000_CTRL_SWDPIN2 ++#define E1000_CTRL_MDC_DIR E1000_CTRL_SWDPIO3 ++#define E1000_CTRL_MDC E1000_CTRL_SWDPIN3 ++#define E1000_CTRL_PHY_RESET_DIR4 E1000_CTRL_EXT_SDP4_DIR ++#define E1000_CTRL_PHY_RESET4 E1000_CTRL_EXT_SDP4_DATA ++ ++/* PHY 1000 MII Register/Bit Definitions */ ++/* PHY Registers defined by IEEE */ ++#define PHY_CTRL 0x00 /* Control Register */ ++#define PHY_STATUS 0x01 /* Status Regiser */ ++#define PHY_ID1 0x02 /* Phy Id Reg (word 1) */ ++#define PHY_ID2 0x03 /* Phy Id Reg (word 2) */ ++#define PHY_AUTONEG_ADV 0x04 /* Autoneg Advertisement */ ++#define PHY_LP_ABILITY 0x05 /* Link Partner Ability (Base Page) */ ++#define PHY_AUTONEG_EXP 0x06 /* Autoneg Expansion Reg */ ++#define PHY_NEXT_PAGE_TX 0x07 /* Next Page TX */ ++#define PHY_LP_NEXT_PAGE 0x08 /* Link Partner Next Page */ ++#define PHY_1000T_CTRL 0x09 /* 1000Base-T Control Reg */ ++#define PHY_1000T_STATUS 0x0A /* 1000Base-T Status Reg */ ++#define PHY_EXT_STATUS 0x0F /* Extended Status Reg */ ++ ++#define MAX_PHY_REG_ADDRESS 0x1F /* 5 bit address bus (0-0x1F) */ ++#define MAX_PHY_MULTI_PAGE_REG 0xF /* Registers equal on all pages */ ++ ++/* M88E1000 Specific Registers */ ++#define M88E1000_PHY_SPEC_CTRL 0x10 /* PHY Specific Control Register */ ++#define M88E1000_PHY_SPEC_STATUS 0x11 /* PHY Specific Status Register */ ++#define M88E1000_INT_ENABLE 0x12 /* Interrupt Enable Register */ ++#define M88E1000_INT_STATUS 0x13 /* Interrupt Status Register */ ++#define M88E1000_EXT_PHY_SPEC_CTRL 0x14 /* Extended PHY Specific Control */ ++#define M88E1000_RX_ERR_CNTR 0x15 /* Receive Error Counter */ ++ ++#define M88E1000_PHY_EXT_CTRL 0x1A /* PHY extend control register */ ++#define M88E1000_PHY_PAGE_SELECT 0x1D /* Reg 29 for page number setting */ ++#define M88E1000_PHY_GEN_CONTROL 0x1E /* Its meaning depends on reg 29 */ ++#define M88E1000_PHY_VCO_REG_BIT8 0x100 /* Bits 8 & 11 are adjusted for */ ++#define M88E1000_PHY_VCO_REG_BIT11 0x800 /* improved BER performance */ ++ ++#define IGP01E1000_IEEE_REGS_PAGE 0x0000 ++#define IGP01E1000_IEEE_RESTART_AUTONEG 0x3300 ++#define IGP01E1000_IEEE_FORCE_GIGA 0x0140 ++ ++/* IGP01E1000 Specific Registers */ ++#define IGP01E1000_PHY_PORT_CONFIG 0x10 /* PHY Specific Port Config Register */ ++#define IGP01E1000_PHY_PORT_STATUS 0x11 /* PHY Specific Status Register */ ++#define IGP01E1000_PHY_PORT_CTRL 0x12 /* PHY Specific Control Register */ ++#define IGP01E1000_PHY_LINK_HEALTH 0x13 /* PHY Link Health Register */ ++#define IGP01E1000_GMII_FIFO 0x14 /* GMII FIFO Register */ ++#define IGP01E1000_PHY_CHANNEL_QUALITY 0x15 /* PHY Channel Quality Register */ ++#define IGP02E1000_PHY_POWER_MGMT 0x19 ++#define IGP01E1000_PHY_PAGE_SELECT 0x1F /* PHY Page Select Core Register */ ++ ++/* IGP01E1000 AGC Registers - stores the cable length values*/ ++#define IGP01E1000_PHY_AGC_A 0x1172 ++#define IGP01E1000_PHY_AGC_B 0x1272 ++#define IGP01E1000_PHY_AGC_C 0x1472 ++#define IGP01E1000_PHY_AGC_D 0x1872 ++ ++/* IGP02E1000 AGC Registers for cable length values */ ++#define IGP02E1000_PHY_AGC_A 0x11B1 ++#define IGP02E1000_PHY_AGC_B 0x12B1 ++#define IGP02E1000_PHY_AGC_C 0x14B1 ++#define IGP02E1000_PHY_AGC_D 0x18B1 ++ ++/* IGP01E1000 DSP Reset Register */ ++#define IGP01E1000_PHY_DSP_RESET 0x1F33 ++#define IGP01E1000_PHY_DSP_SET 0x1F71 ++#define IGP01E1000_PHY_DSP_FFE 0x1F35 ++ ++#define IGP01E1000_PHY_CHANNEL_NUM 4 ++#define IGP02E1000_PHY_CHANNEL_NUM 4 ++ ++#define IGP01E1000_PHY_AGC_PARAM_A 0x1171 ++#define IGP01E1000_PHY_AGC_PARAM_B 0x1271 ++#define IGP01E1000_PHY_AGC_PARAM_C 0x1471 ++#define IGP01E1000_PHY_AGC_PARAM_D 0x1871 ++ ++#define IGP01E1000_PHY_EDAC_MU_INDEX 0xC000 ++#define IGP01E1000_PHY_EDAC_SIGN_EXT_9_BITS 0x8000 ++ ++#define IGP01E1000_PHY_ANALOG_TX_STATE 0x2890 ++#define IGP01E1000_PHY_ANALOG_CLASS_A 0x2000 ++#define IGP01E1000_PHY_FORCE_ANALOG_ENABLE 0x0004 ++#define IGP01E1000_PHY_DSP_FFE_CM_CP 0x0069 ++ ++#define IGP01E1000_PHY_DSP_FFE_DEFAULT 0x002A ++/* IGP01E1000 PCS Initialization register - stores the polarity status when ++ * speed = 1000 Mbps. */ ++#define IGP01E1000_PHY_PCS_INIT_REG 0x00B4 ++#define IGP01E1000_PHY_PCS_CTRL_REG 0x00B5 ++ ++#define IGP01E1000_ANALOG_REGS_PAGE 0x20C0 ++ ++/* Bits... ++ * 15-5: page ++ * 4-0: register offset ++ */ ++#define GG82563_PAGE_SHIFT 5 ++#define GG82563_REG(page, reg) \ ++ (((page) << GG82563_PAGE_SHIFT) | ((reg) & MAX_PHY_REG_ADDRESS)) ++#define GG82563_MIN_ALT_REG 30 ++ ++/* GG82563 Specific Registers */ ++#define GG82563_PHY_SPEC_CTRL \ ++ GG82563_REG(0, 16) /* PHY Specific Control */ ++#define GG82563_PHY_SPEC_STATUS \ ++ GG82563_REG(0, 17) /* PHY Specific Status */ ++#define GG82563_PHY_INT_ENABLE \ ++ GG82563_REG(0, 18) /* Interrupt Enable */ ++#define GG82563_PHY_SPEC_STATUS_2 \ ++ GG82563_REG(0, 19) /* PHY Specific Status 2 */ ++#define GG82563_PHY_RX_ERR_CNTR \ ++ GG82563_REG(0, 21) /* Receive Error Counter */ ++#define GG82563_PHY_PAGE_SELECT \ ++ GG82563_REG(0, 22) /* Page Select */ ++#define GG82563_PHY_SPEC_CTRL_2 \ ++ GG82563_REG(0, 26) /* PHY Specific Control 2 */ ++#define GG82563_PHY_PAGE_SELECT_ALT \ ++ GG82563_REG(0, 29) /* Alternate Page Select */ ++#define GG82563_PHY_TEST_CLK_CTRL \ ++ GG82563_REG(0, 30) /* Test Clock Control (use reg. 29 to select) */ ++ ++#define GG82563_PHY_MAC_SPEC_CTRL \ ++ GG82563_REG(2, 21) /* MAC Specific Control Register */ ++#define GG82563_PHY_MAC_SPEC_CTRL_2 \ ++ GG82563_REG(2, 26) /* MAC Specific Control 2 */ ++ ++#define GG82563_PHY_DSP_DISTANCE \ ++ GG82563_REG(5, 26) /* DSP Distance */ ++ ++/* Page 193 - Port Control Registers */ ++#define GG82563_PHY_KMRN_MODE_CTRL \ ++ GG82563_REG(193, 16) /* Kumeran Mode Control */ ++#define GG82563_PHY_PORT_RESET \ ++ GG82563_REG(193, 17) /* Port Reset */ ++#define GG82563_PHY_REVISION_ID \ ++ GG82563_REG(193, 18) /* Revision ID */ ++#define GG82563_PHY_DEVICE_ID \ ++ GG82563_REG(193, 19) /* Device ID */ ++#define GG82563_PHY_PWR_MGMT_CTRL \ ++ GG82563_REG(193, 20) /* Power Management Control */ ++#define GG82563_PHY_RATE_ADAPT_CTRL \ ++ GG82563_REG(193, 25) /* Rate Adaptation Control */ ++ ++/* Page 194 - KMRN Registers */ ++#define GG82563_PHY_KMRN_FIFO_CTRL_STAT \ ++ GG82563_REG(194, 16) /* FIFO's Control/Status */ ++#define GG82563_PHY_KMRN_CTRL \ ++ GG82563_REG(194, 17) /* Control */ ++#define GG82563_PHY_INBAND_CTRL \ ++ GG82563_REG(194, 18) /* Inband Control */ ++#define GG82563_PHY_KMRN_DIAGNOSTIC \ ++ GG82563_REG(194, 19) /* Diagnostic */ ++#define GG82563_PHY_ACK_TIMEOUTS \ ++ GG82563_REG(194, 20) /* Acknowledge Timeouts */ ++#define GG82563_PHY_ADV_ABILITY \ ++ GG82563_REG(194, 21) /* Advertised Ability */ ++#define GG82563_PHY_LINK_PARTNER_ADV_ABILITY \ ++ GG82563_REG(194, 23) /* Link Partner Advertised Ability */ ++#define GG82563_PHY_ADV_NEXT_PAGE \ ++ GG82563_REG(194, 24) /* Advertised Next Page */ ++#define GG82563_PHY_LINK_PARTNER_ADV_NEXT_PAGE \ ++ GG82563_REG(194, 25) /* Link Partner Advertised Next page */ ++#define GG82563_PHY_KMRN_MISC \ ++ GG82563_REG(194, 26) /* Misc. */ ++ ++/* PHY Control Register */ ++#define MII_CR_SPEED_SELECT_MSB 0x0040 /* bits 6,13: 10=1000, 01=100, 00=10 */ ++#define MII_CR_COLL_TEST_ENABLE 0x0080 /* Collision test enable */ ++#define MII_CR_FULL_DUPLEX 0x0100 /* FDX =1, half duplex =0 */ ++#define MII_CR_RESTART_AUTO_NEG 0x0200 /* Restart auto negotiation */ ++#define MII_CR_ISOLATE 0x0400 /* Isolate PHY from MII */ ++#define MII_CR_POWER_DOWN 0x0800 /* Power down */ ++#define MII_CR_AUTO_NEG_EN 0x1000 /* Auto Neg Enable */ ++#define MII_CR_SPEED_SELECT_LSB 0x2000 /* bits 6,13: 10=1000, 01=100, 00=10 */ ++#define MII_CR_LOOPBACK 0x4000 /* 0 = normal, 1 = loopback */ ++#define MII_CR_RESET 0x8000 /* 0 = normal, 1 = PHY reset */ ++ ++/* PHY Status Register */ ++#define MII_SR_EXTENDED_CAPS 0x0001 /* Extended register capabilities */ ++#define MII_SR_JABBER_DETECT 0x0002 /* Jabber Detected */ ++#define MII_SR_LINK_STATUS 0x0004 /* Link Status 1 = link */ ++#define MII_SR_AUTONEG_CAPS 0x0008 /* Auto Neg Capable */ ++#define MII_SR_REMOTE_FAULT 0x0010 /* Remote Fault Detect */ ++#define MII_SR_AUTONEG_COMPLETE 0x0020 /* Auto Neg Complete */ ++#define MII_SR_PREAMBLE_SUPPRESS 0x0040 /* Preamble may be suppressed */ ++#define MII_SR_EXTENDED_STATUS 0x0100 /* Ext. status info in Reg 0x0F */ ++#define MII_SR_100T2_HD_CAPS 0x0200 /* 100T2 Half Duplex Capable */ ++#define MII_SR_100T2_FD_CAPS 0x0400 /* 100T2 Full Duplex Capable */ ++#define MII_SR_10T_HD_CAPS 0x0800 /* 10T Half Duplex Capable */ ++#define MII_SR_10T_FD_CAPS 0x1000 /* 10T Full Duplex Capable */ ++#define MII_SR_100X_HD_CAPS 0x2000 /* 100X Half Duplex Capable */ ++#define MII_SR_100X_FD_CAPS 0x4000 /* 100X Full Duplex Capable */ ++#define MII_SR_100T4_CAPS 0x8000 /* 100T4 Capable */ ++ ++/* Autoneg Advertisement Register */ ++#define NWAY_AR_SELECTOR_FIELD 0x0001 /* indicates IEEE 802.3 CSMA/CD */ ++#define NWAY_AR_10T_HD_CAPS 0x0020 /* 10T Half Duplex Capable */ ++#define NWAY_AR_10T_FD_CAPS 0x0040 /* 10T Full Duplex Capable */ ++#define NWAY_AR_100TX_HD_CAPS 0x0080 /* 100TX Half Duplex Capable */ ++#define NWAY_AR_100TX_FD_CAPS 0x0100 /* 100TX Full Duplex Capable */ ++#define NWAY_AR_100T4_CAPS 0x0200 /* 100T4 Capable */ ++#define NWAY_AR_PAUSE 0x0400 /* Pause operation desired */ ++#define NWAY_AR_ASM_DIR 0x0800 /* Asymmetric Pause Direction bit */ ++#define NWAY_AR_REMOTE_FAULT 0x2000 /* Remote Fault detected */ ++#define NWAY_AR_NEXT_PAGE 0x8000 /* Next Page ability supported */ ++ ++/* Link Partner Ability Register (Base Page) */ ++#define NWAY_LPAR_SELECTOR_FIELD 0x0000 /* LP protocol selector field */ ++#define NWAY_LPAR_10T_HD_CAPS 0x0020 /* LP is 10T Half Duplex Capable */ ++#define NWAY_LPAR_10T_FD_CAPS 0x0040 /* LP is 10T Full Duplex Capable */ ++#define NWAY_LPAR_100TX_HD_CAPS 0x0080 /* LP is 100TX Half Duplex Capable */ ++#define NWAY_LPAR_100TX_FD_CAPS 0x0100 /* LP is 100TX Full Duplex Capable */ ++#define NWAY_LPAR_100T4_CAPS 0x0200 /* LP is 100T4 Capable */ ++#define NWAY_LPAR_PAUSE 0x0400 /* LP Pause operation desired */ ++#define NWAY_LPAR_ASM_DIR 0x0800 /* LP Asymmetric Pause Direction bit */ ++#define NWAY_LPAR_REMOTE_FAULT 0x2000 /* LP has detected Remote Fault */ ++#define NWAY_LPAR_ACKNOWLEDGE 0x4000 /* LP has rx'd link code word */ ++#define NWAY_LPAR_NEXT_PAGE 0x8000 /* Next Page ability supported */ ++ ++/* Autoneg Expansion Register */ ++#define NWAY_ER_LP_NWAY_CAPS 0x0001 /* LP has Auto Neg Capability */ ++#define NWAY_ER_PAGE_RXD 0x0002 /* LP is 10T Half Duplex Capable */ ++#define NWAY_ER_NEXT_PAGE_CAPS 0x0004 /* LP is 10T Full Duplex Capable */ ++#define NWAY_ER_LP_NEXT_PAGE_CAPS 0x0008 /* LP is 100TX Half Duplex Capable */ ++#define NWAY_ER_PAR_DETECT_FAULT 0x0010 /* LP is 100TX Full Duplex Capable */ ++ ++/* Next Page TX Register */ ++#define NPTX_MSG_CODE_FIELD 0x0001 /* NP msg code or unformatted data */ ++#define NPTX_TOGGLE 0x0800 /* Toggles between exchanges ++ * of different NP ++ */ ++#define NPTX_ACKNOWLDGE2 0x1000 /* 1 = will comply with msg ++ * 0 = cannot comply with msg ++ */ ++#define NPTX_MSG_PAGE 0x2000 /* formatted(1)/unformatted(0) pg */ ++#define NPTX_NEXT_PAGE 0x8000 /* 1 = addition NP will follow ++ * 0 = sending last NP ++ */ ++ ++/* Link Partner Next Page Register */ ++#define LP_RNPR_MSG_CODE_FIELD 0x0001 /* NP msg code or unformatted data */ ++#define LP_RNPR_TOGGLE 0x0800 /* Toggles between exchanges ++ * of different NP ++ */ ++#define LP_RNPR_ACKNOWLDGE2 0x1000 /* 1 = will comply with msg ++ * 0 = cannot comply with msg ++ */ ++#define LP_RNPR_MSG_PAGE 0x2000 /* formatted(1)/unformatted(0) pg */ ++#define LP_RNPR_ACKNOWLDGE 0x4000 /* 1 = ACK / 0 = NO ACK */ ++#define LP_RNPR_NEXT_PAGE 0x8000 /* 1 = addition NP will follow ++ * 0 = sending last NP ++ */ ++ ++/* 1000BASE-T Control Register */ ++#define CR_1000T_ASYM_PAUSE 0x0080 /* Advertise asymmetric pause bit */ ++#define CR_1000T_HD_CAPS 0x0100 /* Advertise 1000T HD capability */ ++#define CR_1000T_FD_CAPS 0x0200 /* Advertise 1000T FD capability */ ++#define CR_1000T_REPEATER_DTE 0x0400 /* 1=Repeater/switch device port */ ++ /* 0=DTE device */ ++#define CR_1000T_MS_VALUE 0x0800 /* 1=Configure PHY as Master */ ++ /* 0=Configure PHY as Slave */ ++#define CR_1000T_MS_ENABLE 0x1000 /* 1=Master/Slave manual config value */ ++ /* 0=Automatic Master/Slave config */ ++#define CR_1000T_TEST_MODE_NORMAL 0x0000 /* Normal Operation */ ++#define CR_1000T_TEST_MODE_1 0x2000 /* Transmit Waveform test */ ++#define CR_1000T_TEST_MODE_2 0x4000 /* Master Transmit Jitter test */ ++#define CR_1000T_TEST_MODE_3 0x6000 /* Slave Transmit Jitter test */ ++#define CR_1000T_TEST_MODE_4 0x8000 /* Transmitter Distortion test */ ++ ++/* 1000BASE-T Status Register */ ++#define SR_1000T_IDLE_ERROR_CNT 0x00FF /* Num idle errors since last read */ ++#define SR_1000T_ASYM_PAUSE_DIR 0x0100 /* LP asymmetric pause direction bit */ ++#define SR_1000T_LP_HD_CAPS 0x0400 /* LP is 1000T HD capable */ ++#define SR_1000T_LP_FD_CAPS 0x0800 /* LP is 1000T FD capable */ ++#define SR_1000T_REMOTE_RX_STATUS 0x1000 /* Remote receiver OK */ ++#define SR_1000T_LOCAL_RX_STATUS 0x2000 /* Local receiver OK */ ++#define SR_1000T_MS_CONFIG_RES 0x4000 /* 1=Local TX is Master, 0=Slave */ ++#define SR_1000T_MS_CONFIG_FAULT 0x8000 /* Master/Slave config fault */ ++#define SR_1000T_REMOTE_RX_STATUS_SHIFT 12 ++#define SR_1000T_LOCAL_RX_STATUS_SHIFT 13 ++#define SR_1000T_PHY_EXCESSIVE_IDLE_ERR_COUNT 5 ++#define FFE_IDLE_ERR_COUNT_TIMEOUT_20 20 ++#define FFE_IDLE_ERR_COUNT_TIMEOUT_100 100 ++ ++/* Extended Status Register */ ++#define IEEE_ESR_1000T_HD_CAPS 0x1000 /* 1000T HD capable */ ++#define IEEE_ESR_1000T_FD_CAPS 0x2000 /* 1000T FD capable */ ++#define IEEE_ESR_1000X_HD_CAPS 0x4000 /* 1000X HD capable */ ++#define IEEE_ESR_1000X_FD_CAPS 0x8000 /* 1000X FD capable */ ++ ++#define PHY_TX_POLARITY_MASK 0x0100 /* register 10h bit 8 (polarity bit) */ ++#define PHY_TX_NORMAL_POLARITY 0 /* register 10h bit 8 (normal polarity) */ ++ ++#define AUTO_POLARITY_DISABLE 0x0010 /* register 11h bit 4 */ ++ /* (0=enable, 1=disable) */ ++ ++/* M88E1000 PHY Specific Control Register */ ++#define M88E1000_PSCR_JABBER_DISABLE 0x0001 /* 1=Jabber Function disabled */ ++#define M88E1000_PSCR_POLARITY_REVERSAL 0x0002 /* 1=Polarity Reversal enabled */ ++#define M88E1000_PSCR_SQE_TEST 0x0004 /* 1=SQE Test enabled */ ++#define M88E1000_PSCR_CLK125_DISABLE 0x0010 /* 1=CLK125 low, ++ * 0=CLK125 toggling ++ */ ++#define M88E1000_PSCR_MDI_MANUAL_MODE 0x0000 /* MDI Crossover Mode bits 6:5 */ ++ /* Manual MDI configuration */ ++#define M88E1000_PSCR_MDIX_MANUAL_MODE 0x0020 /* Manual MDIX configuration */ ++#define M88E1000_PSCR_AUTO_X_1000T 0x0040 /* 1000BASE-T: Auto crossover, ++ * 100BASE-TX/10BASE-T: ++ * MDI Mode ++ */ ++#define M88E1000_PSCR_AUTO_X_MODE 0x0060 /* Auto crossover enabled ++ * all speeds. ++ */ ++#define M88E1000_PSCR_10BT_EXT_DIST_ENABLE 0x0080 ++ /* 1=Enable Extended 10BASE-T distance ++ * (Lower 10BASE-T RX Threshold) ++ * 0=Normal 10BASE-T RX Threshold */ ++#define M88E1000_PSCR_MII_5BIT_ENABLE 0x0100 ++ /* 1=5-Bit interface in 100BASE-TX ++ * 0=MII interface in 100BASE-TX */ ++#define M88E1000_PSCR_SCRAMBLER_DISABLE 0x0200 /* 1=Scrambler disable */ ++#define M88E1000_PSCR_FORCE_LINK_GOOD 0x0400 /* 1=Force link good */ ++#define M88E1000_PSCR_ASSERT_CRS_ON_TX 0x0800 /* 1=Assert CRS on Transmit */ ++ ++#define M88E1000_PSCR_POLARITY_REVERSAL_SHIFT 1 ++#define M88E1000_PSCR_AUTO_X_MODE_SHIFT 5 ++#define M88E1000_PSCR_10BT_EXT_DIST_ENABLE_SHIFT 7 ++ ++/* M88E1000 PHY Specific Status Register */ ++#define M88E1000_PSSR_JABBER 0x0001 /* 1=Jabber */ ++#define M88E1000_PSSR_REV_POLARITY 0x0002 /* 1=Polarity reversed */ ++#define M88E1000_PSSR_DOWNSHIFT 0x0020 /* 1=Downshifted */ ++#define M88E1000_PSSR_MDIX 0x0040 /* 1=MDIX; 0=MDI */ ++#define M88E1000_PSSR_CABLE_LENGTH 0x0380 /* 0=<50M;1=50-80M;2=80-110M; ++ * 3=110-140M;4=>140M */ ++#define M88E1000_PSSR_LINK 0x0400 /* 1=Link up, 0=Link down */ ++#define M88E1000_PSSR_SPD_DPLX_RESOLVED 0x0800 /* 1=Speed & Duplex resolved */ ++#define M88E1000_PSSR_PAGE_RCVD 0x1000 /* 1=Page received */ ++#define M88E1000_PSSR_DPLX 0x2000 /* 1=Duplex 0=Half Duplex */ ++#define M88E1000_PSSR_SPEED 0xC000 /* Speed, bits 14:15 */ ++#define M88E1000_PSSR_10MBS 0x0000 /* 00=10Mbs */ ++#define M88E1000_PSSR_100MBS 0x4000 /* 01=100Mbs */ ++#define M88E1000_PSSR_1000MBS 0x8000 /* 10=1000Mbs */ ++ ++#define M88E1000_PSSR_REV_POLARITY_SHIFT 1 ++#define M88E1000_PSSR_DOWNSHIFT_SHIFT 5 ++#define M88E1000_PSSR_MDIX_SHIFT 6 ++#define M88E1000_PSSR_CABLE_LENGTH_SHIFT 7 ++ ++/* M88E1000 Extended PHY Specific Control Register */ ++#define M88E1000_EPSCR_FIBER_LOOPBACK 0x4000 /* 1=Fiber loopback */ ++#define M88E1000_EPSCR_DOWN_NO_IDLE 0x8000 /* 1=Lost lock detect enabled. ++ * Will assert lost lock and bring ++ * link down if idle not seen ++ * within 1ms in 1000BASE-T ++ */ ++/* Number of times we will attempt to autonegotiate before downshifting if we ++ * are the master */ ++#define M88E1000_EPSCR_MASTER_DOWNSHIFT_MASK 0x0C00 ++#define M88E1000_EPSCR_MASTER_DOWNSHIFT_1X 0x0000 ++#define M88E1000_EPSCR_MASTER_DOWNSHIFT_2X 0x0400 ++#define M88E1000_EPSCR_MASTER_DOWNSHIFT_3X 0x0800 ++#define M88E1000_EPSCR_MASTER_DOWNSHIFT_4X 0x0C00 ++/* Number of times we will attempt to autonegotiate before downshifting if we ++ * are the slave */ ++#define M88E1000_EPSCR_SLAVE_DOWNSHIFT_MASK 0x0300 ++#define M88E1000_EPSCR_SLAVE_DOWNSHIFT_DIS 0x0000 ++#define M88E1000_EPSCR_SLAVE_DOWNSHIFT_1X 0x0100 ++#define M88E1000_EPSCR_SLAVE_DOWNSHIFT_2X 0x0200 ++#define M88E1000_EPSCR_SLAVE_DOWNSHIFT_3X 0x0300 ++#define M88E1000_EPSCR_TX_CLK_2_5 0x0060 /* 2.5 MHz TX_CLK */ ++#define M88E1000_EPSCR_TX_CLK_25 0x0070 /* 25 MHz TX_CLK */ ++#define M88E1000_EPSCR_TX_CLK_0 0x0000 /* NO TX_CLK */ ++ ++/* M88EC018 Rev 2 specific DownShift settings */ ++#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_MASK 0x0E00 ++#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_1X 0x0000 ++#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_2X 0x0200 ++#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_3X 0x0400 ++#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_4X 0x0600 ++#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_5X 0x0800 ++#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_6X 0x0A00 ++#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_7X 0x0C00 ++#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_8X 0x0E00 ++ ++/* IGP01E1000 Specific Port Config Register - R/W */ ++#define IGP01E1000_PSCFR_AUTO_MDIX_PAR_DETECT 0x0010 ++#define IGP01E1000_PSCFR_PRE_EN 0x0020 ++#define IGP01E1000_PSCFR_SMART_SPEED 0x0080 ++#define IGP01E1000_PSCFR_DISABLE_TPLOOPBACK 0x0100 ++#define IGP01E1000_PSCFR_DISABLE_JABBER 0x0400 ++#define IGP01E1000_PSCFR_DISABLE_TRANSMIT 0x2000 ++ ++/* IGP01E1000 Specific Port Status Register - R/O */ ++#define IGP01E1000_PSSR_AUTONEG_FAILED 0x0001 /* RO LH SC */ ++#define IGP01E1000_PSSR_POLARITY_REVERSED 0x0002 ++#define IGP01E1000_PSSR_CABLE_LENGTH 0x007C ++#define IGP01E1000_PSSR_FULL_DUPLEX 0x0200 ++#define IGP01E1000_PSSR_LINK_UP 0x0400 ++#define IGP01E1000_PSSR_MDIX 0x0800 ++#define IGP01E1000_PSSR_SPEED_MASK 0xC000 /* speed bits mask */ ++#define IGP01E1000_PSSR_SPEED_10MBPS 0x4000 ++#define IGP01E1000_PSSR_SPEED_100MBPS 0x8000 ++#define IGP01E1000_PSSR_SPEED_1000MBPS 0xC000 ++#define IGP01E1000_PSSR_CABLE_LENGTH_SHIFT 0x0002 /* shift right 2 */ ++#define IGP01E1000_PSSR_MDIX_SHIFT 0x000B /* shift right 11 */ ++ ++/* IGP01E1000 Specific Port Control Register - R/W */ ++#define IGP01E1000_PSCR_TP_LOOPBACK 0x0010 ++#define IGP01E1000_PSCR_CORRECT_NC_SCMBLR 0x0200 ++#define IGP01E1000_PSCR_TEN_CRS_SELECT 0x0400 ++#define IGP01E1000_PSCR_FLIP_CHIP 0x0800 ++#define IGP01E1000_PSCR_AUTO_MDIX 0x1000 ++#define IGP01E1000_PSCR_FORCE_MDI_MDIX 0x2000 /* 0-MDI, 1-MDIX */ ++ ++/* IGP01E1000 Specific Port Link Health Register */ ++#define IGP01E1000_PLHR_SS_DOWNGRADE 0x8000 ++#define IGP01E1000_PLHR_GIG_SCRAMBLER_ERROR 0x4000 ++#define IGP01E1000_PLHR_MASTER_FAULT 0x2000 ++#define IGP01E1000_PLHR_MASTER_RESOLUTION 0x1000 ++#define IGP01E1000_PLHR_GIG_REM_RCVR_NOK 0x0800 /* LH */ ++#define IGP01E1000_PLHR_IDLE_ERROR_CNT_OFLOW 0x0400 /* LH */ ++#define IGP01E1000_PLHR_DATA_ERR_1 0x0200 /* LH */ ++#define IGP01E1000_PLHR_DATA_ERR_0 0x0100 ++#define IGP01E1000_PLHR_AUTONEG_FAULT 0x0040 ++#define IGP01E1000_PLHR_AUTONEG_ACTIVE 0x0010 ++#define IGP01E1000_PLHR_VALID_CHANNEL_D 0x0008 ++#define IGP01E1000_PLHR_VALID_CHANNEL_C 0x0004 ++#define IGP01E1000_PLHR_VALID_CHANNEL_B 0x0002 ++#define IGP01E1000_PLHR_VALID_CHANNEL_A 0x0001 ++ ++/* IGP01E1000 Channel Quality Register */ ++#define IGP01E1000_MSE_CHANNEL_D 0x000F ++#define IGP01E1000_MSE_CHANNEL_C 0x00F0 ++#define IGP01E1000_MSE_CHANNEL_B 0x0F00 ++#define IGP01E1000_MSE_CHANNEL_A 0xF000 ++ ++#define IGP02E1000_PM_SPD 0x0001 /* Smart Power Down */ ++#define IGP02E1000_PM_D3_LPLU 0x0004 /* Enable LPLU in non-D0a modes */ ++#define IGP02E1000_PM_D0_LPLU 0x0002 /* Enable LPLU in D0a mode */ ++ ++/* IGP01E1000 DSP reset macros */ ++#define DSP_RESET_ENABLE 0x0 ++#define DSP_RESET_DISABLE 0x2 ++#define E1000_MAX_DSP_RESETS 10 ++ ++/* IGP01E1000 & IGP02E1000 AGC Registers */ ++ ++#define IGP01E1000_AGC_LENGTH_SHIFT 7 /* Coarse - 13:11, Fine - 10:7 */ ++#define IGP02E1000_AGC_LENGTH_SHIFT 9 /* Coarse - 15:13, Fine - 12:9 */ ++ ++/* IGP02E1000 AGC Register Length 9-bit mask */ ++#define IGP02E1000_AGC_LENGTH_MASK 0x7F ++ ++/* 7 bits (3 Coarse + 4 Fine) --> 128 optional values */ ++#define IGP01E1000_AGC_LENGTH_TABLE_SIZE 128 ++#define IGP02E1000_AGC_LENGTH_TABLE_SIZE 113 ++ ++/* The precision error of the cable length is +/- 10 meters */ ++#define IGP01E1000_AGC_RANGE 10 ++#define IGP02E1000_AGC_RANGE 15 ++ ++/* IGP01E1000 PCS Initialization register */ ++/* bits 3:6 in the PCS registers stores the channels polarity */ ++#define IGP01E1000_PHY_POLARITY_MASK 0x0078 ++ ++/* IGP01E1000 GMII FIFO Register */ ++#define IGP01E1000_GMII_FLEX_SPD 0x10 /* Enable flexible speed ++ * on Link-Up */ ++#define IGP01E1000_GMII_SPD 0x20 /* Enable SPD */ ++ ++/* IGP01E1000 Analog Register */ ++#define IGP01E1000_ANALOG_SPARE_FUSE_STATUS 0x20D1 ++#define IGP01E1000_ANALOG_FUSE_STATUS 0x20D0 ++#define IGP01E1000_ANALOG_FUSE_CONTROL 0x20DC ++#define IGP01E1000_ANALOG_FUSE_BYPASS 0x20DE ++ ++#define IGP01E1000_ANALOG_FUSE_POLY_MASK 0xF000 ++#define IGP01E1000_ANALOG_FUSE_FINE_MASK 0x0F80 ++#define IGP01E1000_ANALOG_FUSE_COARSE_MASK 0x0070 ++#define IGP01E1000_ANALOG_SPARE_FUSE_ENABLED 0x0100 ++#define IGP01E1000_ANALOG_FUSE_ENABLE_SW_CONTROL 0x0002 ++ ++#define IGP01E1000_ANALOG_FUSE_COARSE_THRESH 0x0040 ++#define IGP01E1000_ANALOG_FUSE_COARSE_10 0x0010 ++#define IGP01E1000_ANALOG_FUSE_FINE_1 0x0080 ++#define IGP01E1000_ANALOG_FUSE_FINE_10 0x0500 ++ ++/* GG82563 PHY Specific Status Register (Page 0, Register 16 */ ++#define GG82563_PSCR_DISABLE_JABBER 0x0001 /* 1=Disable Jabber */ ++#define GG82563_PSCR_POLARITY_REVERSAL_DISABLE 0x0002 /* 1=Polarity Reversal Disabled */ ++#define GG82563_PSCR_POWER_DOWN 0x0004 /* 1=Power Down */ ++#define GG82563_PSCR_COPPER_TRANSMITER_DISABLE 0x0008 /* 1=Transmitter Disabled */ ++#define GG82563_PSCR_CROSSOVER_MODE_MASK 0x0060 ++#define GG82563_PSCR_CROSSOVER_MODE_MDI 0x0000 /* 00=Manual MDI configuration */ ++#define GG82563_PSCR_CROSSOVER_MODE_MDIX 0x0020 /* 01=Manual MDIX configuration */ ++#define GG82563_PSCR_CROSSOVER_MODE_AUTO 0x0060 /* 11=Automatic crossover */ ++#define GG82563_PSCR_ENALBE_EXTENDED_DISTANCE 0x0080 /* 1=Enable Extended Distance */ ++#define GG82563_PSCR_ENERGY_DETECT_MASK 0x0300 ++#define GG82563_PSCR_ENERGY_DETECT_OFF 0x0000 /* 00,01=Off */ ++#define GG82563_PSCR_ENERGY_DETECT_RX 0x0200 /* 10=Sense on Rx only (Energy Detect) */ ++#define GG82563_PSCR_ENERGY_DETECT_RX_TM 0x0300 /* 11=Sense and Tx NLP */ ++#define GG82563_PSCR_FORCE_LINK_GOOD 0x0400 /* 1=Force Link Good */ ++#define GG82563_PSCR_DOWNSHIFT_ENABLE 0x0800 /* 1=Enable Downshift */ ++#define GG82563_PSCR_DOWNSHIFT_COUNTER_MASK 0x7000 ++#define GG82563_PSCR_DOWNSHIFT_COUNTER_SHIFT 12 ++ ++/* PHY Specific Status Register (Page 0, Register 17) */ ++#define GG82563_PSSR_JABBER 0x0001 /* 1=Jabber */ ++#define GG82563_PSSR_POLARITY 0x0002 /* 1=Polarity Reversed */ ++#define GG82563_PSSR_LINK 0x0008 /* 1=Link is Up */ ++#define GG82563_PSSR_ENERGY_DETECT 0x0010 /* 1=Sleep, 0=Active */ ++#define GG82563_PSSR_DOWNSHIFT 0x0020 /* 1=Downshift */ ++#define GG82563_PSSR_CROSSOVER_STATUS 0x0040 /* 1=MDIX, 0=MDI */ ++#define GG82563_PSSR_RX_PAUSE_ENABLED 0x0100 /* 1=Receive Pause Enabled */ ++#define GG82563_PSSR_TX_PAUSE_ENABLED 0x0200 /* 1=Transmit Pause Enabled */ ++#define GG82563_PSSR_LINK_UP 0x0400 /* 1=Link Up */ ++#define GG82563_PSSR_SPEED_DUPLEX_RESOLVED 0x0800 /* 1=Resolved */ ++#define GG82563_PSSR_PAGE_RECEIVED 0x1000 /* 1=Page Received */ ++#define GG82563_PSSR_DUPLEX 0x2000 /* 1-Full-Duplex */ ++#define GG82563_PSSR_SPEED_MASK 0xC000 ++#define GG82563_PSSR_SPEED_10MBPS 0x0000 /* 00=10Mbps */ ++#define GG82563_PSSR_SPEED_100MBPS 0x4000 /* 01=100Mbps */ ++#define GG82563_PSSR_SPEED_1000MBPS 0x8000 /* 10=1000Mbps */ ++ ++/* PHY Specific Status Register 2 (Page 0, Register 19) */ ++#define GG82563_PSSR2_JABBER 0x0001 /* 1=Jabber */ ++#define GG82563_PSSR2_POLARITY_CHANGED 0x0002 /* 1=Polarity Changed */ ++#define GG82563_PSSR2_ENERGY_DETECT_CHANGED 0x0010 /* 1=Energy Detect Changed */ ++#define GG82563_PSSR2_DOWNSHIFT_INTERRUPT 0x0020 /* 1=Downshift Detected */ ++#define GG82563_PSSR2_MDI_CROSSOVER_CHANGE 0x0040 /* 1=Crossover Changed */ ++#define GG82563_PSSR2_FALSE_CARRIER 0x0100 /* 1=False Carrier */ ++#define GG82563_PSSR2_SYMBOL_ERROR 0x0200 /* 1=Symbol Error */ ++#define GG82563_PSSR2_LINK_STATUS_CHANGED 0x0400 /* 1=Link Status Changed */ ++#define GG82563_PSSR2_AUTO_NEG_COMPLETED 0x0800 /* 1=Auto-Neg Completed */ ++#define GG82563_PSSR2_PAGE_RECEIVED 0x1000 /* 1=Page Received */ ++#define GG82563_PSSR2_DUPLEX_CHANGED 0x2000 /* 1=Duplex Changed */ ++#define GG82563_PSSR2_SPEED_CHANGED 0x4000 /* 1=Speed Changed */ ++#define GG82563_PSSR2_AUTO_NEG_ERROR 0x8000 /* 1=Auto-Neg Error */ ++ ++/* PHY Specific Control Register 2 (Page 0, Register 26) */ ++#define GG82563_PSCR2_10BT_POLARITY_FORCE 0x0002 /* 1=Force Negative Polarity */ ++#define GG82563_PSCR2_1000MB_TEST_SELECT_MASK 0x000C ++#define GG82563_PSCR2_1000MB_TEST_SELECT_NORMAL 0x0000 /* 00,01=Normal Operation */ ++#define GG82563_PSCR2_1000MB_TEST_SELECT_112NS 0x0008 /* 10=Select 112ns Sequence */ ++#define GG82563_PSCR2_1000MB_TEST_SELECT_16NS 0x000C /* 11=Select 16ns Sequence */ ++#define GG82563_PSCR2_REVERSE_AUTO_NEG 0x2000 /* 1=Reverse Auto-Negotiation */ ++#define GG82563_PSCR2_1000BT_DISABLE 0x4000 /* 1=Disable 1000BASE-T */ ++#define GG82563_PSCR2_TRANSMITER_TYPE_MASK 0x8000 ++#define GG82563_PSCR2_TRANSMITTER_TYPE_CLASS_B 0x0000 /* 0=Class B */ ++#define GG82563_PSCR2_TRANSMITTER_TYPE_CLASS_A 0x8000 /* 1=Class A */ ++ ++/* MAC Specific Control Register (Page 2, Register 21) */ ++/* Tx clock speed for Link Down and 1000BASE-T for the following speeds */ ++#define GG82563_MSCR_TX_CLK_MASK 0x0007 ++#define GG82563_MSCR_TX_CLK_10MBPS_2_5MHZ 0x0004 ++#define GG82563_MSCR_TX_CLK_100MBPS_25MHZ 0x0005 ++#define GG82563_MSCR_TX_CLK_1000MBPS_2_5MHZ 0x0006 ++#define GG82563_MSCR_TX_CLK_1000MBPS_25MHZ 0x0007 ++ ++#define GG82563_MSCR_ASSERT_CRS_ON_TX 0x0010 /* 1=Assert */ ++ ++/* DSP Distance Register (Page 5, Register 26) */ ++#define GG82563_DSPD_CABLE_LENGTH 0x0007 /* 0 = <50M; ++ 1 = 50-80M; ++ 2 = 80-110M; ++ 3 = 110-140M; ++ 4 = >140M */ ++ ++/* Kumeran Mode Control Register (Page 193, Register 16) */ ++#define GG82563_KMCR_PHY_LEDS_EN 0x0020 /* 1=PHY LEDs, 0=Kumeran Inband LEDs */ ++#define GG82563_KMCR_FORCE_LINK_UP 0x0040 /* 1=Force Link Up */ ++#define GG82563_KMCR_SUPPRESS_SGMII_EPD_EXT 0x0080 ++#define GG82563_KMCR_MDIO_BUS_SPEED_SELECT_MASK 0x0400 ++#define GG82563_KMCR_MDIO_BUS_SPEED_SELECT 0x0400 /* 1=6.25MHz, 0=0.8MHz */ ++#define GG82563_KMCR_PASS_FALSE_CARRIER 0x0800 ++ ++/* Power Management Control Register (Page 193, Register 20) */ ++#define GG82563_PMCR_ENABLE_ELECTRICAL_IDLE 0x0001 /* 1=Enalbe SERDES Electrical Idle */ ++#define GG82563_PMCR_DISABLE_PORT 0x0002 /* 1=Disable Port */ ++#define GG82563_PMCR_DISABLE_SERDES 0x0004 /* 1=Disable SERDES */ ++#define GG82563_PMCR_REVERSE_AUTO_NEG 0x0008 /* 1=Enable Reverse Auto-Negotiation */ ++#define GG82563_PMCR_DISABLE_1000_NON_D0 0x0010 /* 1=Disable 1000Mbps Auto-Neg in non D0 */ ++#define GG82563_PMCR_DISABLE_1000 0x0020 /* 1=Disable 1000Mbps Auto-Neg Always */ ++#define GG82563_PMCR_REVERSE_AUTO_NEG_D0A 0x0040 /* 1=Enable D0a Reverse Auto-Negotiation */ ++#define GG82563_PMCR_FORCE_POWER_STATE 0x0080 /* 1=Force Power State */ ++#define GG82563_PMCR_PROGRAMMED_POWER_STATE_MASK 0x0300 ++#define GG82563_PMCR_PROGRAMMED_POWER_STATE_DR 0x0000 /* 00=Dr */ ++#define GG82563_PMCR_PROGRAMMED_POWER_STATE_D0U 0x0100 /* 01=D0u */ ++#define GG82563_PMCR_PROGRAMMED_POWER_STATE_D0A 0x0200 /* 10=D0a */ ++#define GG82563_PMCR_PROGRAMMED_POWER_STATE_D3 0x0300 /* 11=D3 */ ++ ++/* In-Band Control Register (Page 194, Register 18) */ ++#define GG82563_ICR_DIS_PADDING 0x0010 /* Disable Padding Use */ ++ ++ ++/* Bit definitions for valid PHY IDs. */ ++/* I = Integrated ++ * E = External ++ */ ++#define M88E1000_E_PHY_ID 0x01410C50 ++#define M88E1000_I_PHY_ID 0x01410C30 ++#define M88E1011_I_PHY_ID 0x01410C20 ++#define IGP01E1000_I_PHY_ID 0x02A80380 ++#define M88E1000_12_PHY_ID M88E1000_E_PHY_ID ++#define M88E1000_14_PHY_ID M88E1000_E_PHY_ID ++#define M88E1011_I_REV_4 0x04 ++#define M88E1111_I_PHY_ID 0x01410CC0 ++#define L1LXT971A_PHY_ID 0x001378E0 ++#define GG82563_E_PHY_ID 0x01410CA0 ++ ++ ++/* Bits... ++ * 15-5: page ++ * 4-0: register offset ++ */ ++#define PHY_PAGE_SHIFT 5 ++#define PHY_REG(page, reg) \ ++ (((page) << PHY_PAGE_SHIFT) | ((reg) & MAX_PHY_REG_ADDRESS)) ++ ++#define IGP3_PHY_PORT_CTRL \ ++ PHY_REG(769, 17) /* Port General Configuration */ ++#define IGP3_PHY_RATE_ADAPT_CTRL \ ++ PHY_REG(769, 25) /* Rate Adapter Control Register */ ++ ++#define IGP3_KMRN_FIFO_CTRL_STATS \ ++ PHY_REG(770, 16) /* KMRN FIFO's control/status register */ ++#define IGP3_KMRN_POWER_MNG_CTRL \ ++ PHY_REG(770, 17) /* KMRN Power Management Control Register */ ++#define IGP3_KMRN_INBAND_CTRL \ ++ PHY_REG(770, 18) /* KMRN Inband Control Register */ ++#define IGP3_KMRN_DIAG \ ++ PHY_REG(770, 19) /* KMRN Diagnostic register */ ++#define IGP3_KMRN_DIAG_PCS_LOCK_LOSS 0x0002 /* RX PCS is not synced */ ++#define IGP3_KMRN_ACK_TIMEOUT \ ++ PHY_REG(770, 20) /* KMRN Acknowledge Timeouts register */ ++ ++#define IGP3_VR_CTRL \ ++ PHY_REG(776, 18) /* Voltage regulator control register */ ++#define IGP3_VR_CTRL_MODE_SHUT 0x0200 /* Enter powerdown, shutdown VRs */ ++ ++#define IGP3_CAPABILITY \ ++ PHY_REG(776, 19) /* IGP3 Capability Register */ ++ ++/* Capabilities for SKU Control */ ++#define IGP3_CAP_INITIATE_TEAM 0x0001 /* Able to initiate a team */ ++#define IGP3_CAP_WFM 0x0002 /* Support WoL and PXE */ ++#define IGP3_CAP_ASF 0x0004 /* Support ASF */ ++#define IGP3_CAP_LPLU 0x0008 /* Support Low Power Link Up */ ++#define IGP3_CAP_DC_AUTO_SPEED 0x0010 /* Support AC/DC Auto Link Speed */ ++#define IGP3_CAP_SPD 0x0020 /* Support Smart Power Down */ ++#define IGP3_CAP_MULT_QUEUE 0x0040 /* Support 2 tx & 2 rx queues */ ++#define IGP3_CAP_RSS 0x0080 /* Support RSS */ ++#define IGP3_CAP_8021PQ 0x0100 /* Support 802.1Q & 802.1p */ ++#define IGP3_CAP_AMT_CB 0x0200 /* Support active manageability and circuit breaker */ ++ ++#define IGP3_PPC_JORDAN_EN 0x0001 ++#define IGP3_PPC_JORDAN_GIGA_SPEED 0x0002 ++ ++#define IGP3_KMRN_PMC_EE_IDLE_LINK_DIS 0x0001 ++#define IGP3_KMRN_PMC_K0S_ENTRY_LATENCY_MASK 0x001E ++#define IGP3_KMRN_PMC_K0S_MODE1_EN_GIGA 0x0020 ++#define IGP3_KMRN_PMC_K0S_MODE1_EN_100 0x0040 ++ ++#define IGP3E1000_PHY_MISC_CTRL 0x1B /* Misc. Ctrl register */ ++#define IGP3_PHY_MISC_DUPLEX_MANUAL_SET 0x1000 /* Duplex Manual Set */ ++ ++#define IGP3_KMRN_EXT_CTRL PHY_REG(770, 18) ++#define IGP3_KMRN_EC_DIS_INBAND 0x0080 ++ ++#define IGP03E1000_E_PHY_ID 0x02A80390 ++#define IFE_E_PHY_ID 0x02A80330 /* 10/100 PHY */ ++#define IFE_PLUS_E_PHY_ID 0x02A80320 ++#define IFE_C_E_PHY_ID 0x02A80310 ++ ++#define IFE_PHY_EXTENDED_STATUS_CONTROL 0x10 /* 100BaseTx Extended Status, Control and Address */ ++#define IFE_PHY_SPECIAL_CONTROL 0x11 /* 100BaseTx PHY special control register */ ++#define IFE_PHY_RCV_FALSE_CARRIER 0x13 /* 100BaseTx Receive False Carrier Counter */ ++#define IFE_PHY_RCV_DISCONNECT 0x14 /* 100BaseTx Receive Disconnet Counter */ ++#define IFE_PHY_RCV_ERROT_FRAME 0x15 /* 100BaseTx Receive Error Frame Counter */ ++#define IFE_PHY_RCV_SYMBOL_ERR 0x16 /* Receive Symbol Error Counter */ ++#define IFE_PHY_PREM_EOF_ERR 0x17 /* 100BaseTx Receive Premature End Of Frame Error Counter */ ++#define IFE_PHY_RCV_EOF_ERR 0x18 /* 10BaseT Receive End Of Frame Error Counter */ ++#define IFE_PHY_TX_JABBER_DETECT 0x19 /* 10BaseT Transmit Jabber Detect Counter */ ++#define IFE_PHY_EQUALIZER 0x1A /* PHY Equalizer Control and Status */ ++#define IFE_PHY_SPECIAL_CONTROL_LED 0x1B /* PHY special control and LED configuration */ ++#define IFE_PHY_MDIX_CONTROL 0x1C /* MDI/MDI-X Control register */ ++#define IFE_PHY_HWI_CONTROL 0x1D /* Hardware Integrity Control (HWI) */ ++ ++#define IFE_PESC_REDUCED_POWER_DOWN_DISABLE 0x2000 /* Defaut 1 = Disable auto reduced power down */ ++#define IFE_PESC_100BTX_POWER_DOWN 0x0400 /* Indicates the power state of 100BASE-TX */ ++#define IFE_PESC_10BTX_POWER_DOWN 0x0200 /* Indicates the power state of 10BASE-T */ ++#define IFE_PESC_POLARITY_REVERSED 0x0100 /* Indicates 10BASE-T polarity */ ++#define IFE_PESC_PHY_ADDR_MASK 0x007C /* Bit 6:2 for sampled PHY address */ ++#define IFE_PESC_SPEED 0x0002 /* Auto-negotiation speed result 1=100Mbs, 0=10Mbs */ ++#define IFE_PESC_DUPLEX 0x0001 /* Auto-negotiation duplex result 1=Full, 0=Half */ ++#define IFE_PESC_POLARITY_REVERSED_SHIFT 8 ++ ++#define IFE_PSC_DISABLE_DYNAMIC_POWER_DOWN 0x0100 /* 1 = Dyanmic Power Down disabled */ ++#define IFE_PSC_FORCE_POLARITY 0x0020 /* 1=Reversed Polarity, 0=Normal */ ++#define IFE_PSC_AUTO_POLARITY_DISABLE 0x0010 /* 1=Auto Polarity Disabled, 0=Enabled */ ++#define IFE_PSC_JABBER_FUNC_DISABLE 0x0001 /* 1=Jabber Disabled, 0=Normal Jabber Operation */ ++#define IFE_PSC_FORCE_POLARITY_SHIFT 5 ++#define IFE_PSC_AUTO_POLARITY_DISABLE_SHIFT 4 ++ ++#define IFE_PMC_AUTO_MDIX 0x0080 /* 1=enable MDI/MDI-X feature, default 0=disabled */ ++#define IFE_PMC_FORCE_MDIX 0x0040 /* 1=force MDIX-X, 0=force MDI */ ++#define IFE_PMC_MDIX_STATUS 0x0020 /* 1=MDI-X, 0=MDI */ ++#define IFE_PMC_AUTO_MDIX_COMPLETE 0x0010 /* Resolution algorthm is completed */ ++#define IFE_PMC_MDIX_MODE_SHIFT 6 ++#define IFE_PHC_MDIX_RESET_ALL_MASK 0x0000 /* Disable auto MDI-X */ ++ ++#define IFE_PHC_HWI_ENABLE 0x8000 /* Enable the HWI feature */ ++#define IFE_PHC_ABILITY_CHECK 0x4000 /* 1= Test Passed, 0=failed */ ++#define IFE_PHC_TEST_EXEC 0x2000 /* PHY launch test pulses on the wire */ ++#define IFE_PHC_HIGHZ 0x0200 /* 1 = Open Circuit */ ++#define IFE_PHC_LOWZ 0x0400 /* 1 = Short Circuit */ ++#define IFE_PHC_LOW_HIGH_Z_MASK 0x0600 /* Mask for indication type of problem on the line */ ++#define IFE_PHC_DISTANCE_MASK 0x01FF /* Mask for distance to the cable problem, in 80cm granularity */ ++#define IFE_PHC_RESET_ALL_MASK 0x0000 /* Disable HWI */ ++#define IFE_PSCL_PROBE_MODE 0x0020 /* LED Probe mode */ ++#define IFE_PSCL_PROBE_LEDS_OFF 0x0006 /* Force LEDs 0 and 2 off */ ++#define IFE_PSCL_PROBE_LEDS_ON 0x0007 /* Force LEDs 0 and 2 on */ ++ ++#define ICH8_FLASH_COMMAND_TIMEOUT 500 /* 500 ms , should be adjusted */ ++#define ICH8_FLASH_CYCLE_REPEAT_COUNT 10 /* 10 cycles , should be adjusted */ ++#define ICH8_FLASH_SEG_SIZE_256 256 ++#define ICH8_FLASH_SEG_SIZE_4K 4096 ++#define ICH8_FLASH_SEG_SIZE_64K 65536 ++ ++#define ICH8_CYCLE_READ 0x0 ++#define ICH8_CYCLE_RESERVED 0x1 ++#define ICH8_CYCLE_WRITE 0x2 ++#define ICH8_CYCLE_ERASE 0x3 ++ ++#define ICH8_FLASH_GFPREG 0x0000 ++#define ICH8_FLASH_HSFSTS 0x0004 ++#define ICH8_FLASH_HSFCTL 0x0006 ++#define ICH8_FLASH_FADDR 0x0008 ++#define ICH8_FLASH_FDATA0 0x0010 ++#define ICH8_FLASH_FRACC 0x0050 ++#define ICH8_FLASH_FREG0 0x0054 ++#define ICH8_FLASH_FREG1 0x0058 ++#define ICH8_FLASH_FREG2 0x005C ++#define ICH8_FLASH_FREG3 0x0060 ++#define ICH8_FLASH_FPR0 0x0074 ++#define ICH8_FLASH_FPR1 0x0078 ++#define ICH8_FLASH_SSFSTS 0x0090 ++#define ICH8_FLASH_SSFCTL 0x0092 ++#define ICH8_FLASH_PREOP 0x0094 ++#define ICH8_FLASH_OPTYPE 0x0096 ++#define ICH8_FLASH_OPMENU 0x0098 ++ ++#define ICH8_FLASH_REG_MAPSIZE 0x00A0 ++#define ICH8_FLASH_SECTOR_SIZE 4096 ++#define ICH8_GFPREG_BASE_MASK 0x1FFF ++#define ICH8_FLASH_LINEAR_ADDR_MASK 0x00FFFFFF ++ ++/* ICH8 GbE Flash Hardware Sequencing Flash Status Register bit breakdown */ ++/* Offset 04h HSFSTS */ ++union ich8_hws_flash_status { ++ struct ich8_hsfsts { ++#ifdef E1000_BIG_ENDIAN ++ uint16_t reserved2 :6; ++ uint16_t fldesvalid :1; ++ uint16_t flockdn :1; ++ uint16_t flcdone :1; ++ uint16_t flcerr :1; ++ uint16_t dael :1; ++ uint16_t berasesz :2; ++ uint16_t flcinprog :1; ++ uint16_t reserved1 :2; ++#else ++ uint16_t flcdone :1; /* bit 0 Flash Cycle Done */ ++ uint16_t flcerr :1; /* bit 1 Flash Cycle Error */ ++ uint16_t dael :1; /* bit 2 Direct Access error Log */ ++ uint16_t berasesz :2; /* bit 4:3 Block/Sector Erase Size */ ++ uint16_t flcinprog :1; /* bit 5 flash SPI cycle in Progress */ ++ uint16_t reserved1 :2; /* bit 13:6 Reserved */ ++ uint16_t reserved2 :6; /* bit 13:6 Reserved */ ++ uint16_t fldesvalid :1; /* bit 14 Flash Descriptor Valid */ ++ uint16_t flockdn :1; /* bit 15 Flash Configuration Lock-Down */ ++#endif ++ } hsf_status; ++ uint16_t regval; ++}; ++ ++/* ICH8 GbE Flash Hardware Sequencing Flash control Register bit breakdown */ ++/* Offset 06h FLCTL */ ++union ich8_hws_flash_ctrl { ++ struct ich8_hsflctl { ++#ifdef E1000_BIG_ENDIAN ++ uint16_t fldbcount :2; ++ uint16_t flockdn :6; ++ uint16_t flcgo :1; ++ uint16_t flcycle :2; ++ uint16_t reserved :5; ++#else ++ uint16_t flcgo :1; /* 0 Flash Cycle Go */ ++ uint16_t flcycle :2; /* 2:1 Flash Cycle */ ++ uint16_t reserved :5; /* 7:3 Reserved */ ++ uint16_t fldbcount :2; /* 9:8 Flash Data Byte Count */ ++ uint16_t flockdn :6; /* 15:10 Reserved */ ++#endif ++ } hsf_ctrl; ++ uint16_t regval; ++}; ++ ++/* ICH8 Flash Region Access Permissions */ ++union ich8_hws_flash_regacc { ++ struct ich8_flracc { ++#ifdef E1000_BIG_ENDIAN ++ uint32_t gmwag :8; ++ uint32_t gmrag :8; ++ uint32_t grwa :8; ++ uint32_t grra :8; ++#else ++ uint32_t grra :8; /* 0:7 GbE region Read Access */ ++ uint32_t grwa :8; /* 8:15 GbE region Write Access */ ++ uint32_t gmrag :8; /* 23:16 GbE Master Read Access Grant */ ++ uint32_t gmwag :8; /* 31:24 GbE Master Write Access Grant */ ++#endif ++ } hsf_flregacc; ++ uint16_t regval; ++}; ++ ++/* Miscellaneous PHY bit definitions. */ ++#define PHY_PREAMBLE 0xFFFFFFFF ++#define PHY_SOF 0x01 ++#define PHY_OP_READ 0x02 ++#define PHY_OP_WRITE 0x01 ++#define PHY_TURNAROUND 0x02 ++#define PHY_PREAMBLE_SIZE 32 ++#define MII_CR_SPEED_1000 0x0040 ++#define MII_CR_SPEED_100 0x2000 ++#define MII_CR_SPEED_10 0x0000 ++#define E1000_PHY_ADDRESS 0x01 ++#define PHY_AUTO_NEG_TIME 45 /* 4.5 Seconds */ ++#define PHY_FORCE_TIME 20 /* 2.0 Seconds */ ++#define PHY_REVISION_MASK 0xFFFFFFF0 ++#define DEVICE_SPEED_MASK 0x00000300 /* Device Ctrl Reg Speed Mask */ ++#define REG4_SPEED_MASK 0x01E0 ++#define REG9_SPEED_MASK 0x0300 ++#define ADVERTISE_10_HALF 0x0001 ++#define ADVERTISE_10_FULL 0x0002 ++#define ADVERTISE_100_HALF 0x0004 ++#define ADVERTISE_100_FULL 0x0008 ++#define ADVERTISE_1000_HALF 0x0010 ++#define ADVERTISE_1000_FULL 0x0020 ++#define AUTONEG_ADVERTISE_SPEED_DEFAULT 0x002F /* Everything but 1000-Half */ ++#define AUTONEG_ADVERTISE_10_100_ALL 0x000F /* All 10/100 speeds*/ ++#define AUTONEG_ADVERTISE_10_ALL 0x0003 /* 10Mbps Full & Half speeds*/ ++ ++#endif /* _E1000_HW_H_ */ +--- linux/drivers/xenomai/net/drivers/e1000/Makefile 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/e1000/Makefile 2021-04-07 16:01:27.428633879 +0800 +@@ -0,0 +1,8 @@ ++ccflags-y += -Idrivers/xenomai/net/stack/include ++ ++obj-$(CONFIG_XENO_DRIVERS_NET_DRV_E1000) += rt_e1000.o ++ ++rt_e1000-y := \ ++ e1000_hw.o \ ++ e1000_main.o \ ++ e1000_param.o +--- linux/drivers/xenomai/net/drivers/e1000/e1000_osdep.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/e1000/e1000_osdep.h 2021-04-07 16:01:27.423633886 +0800 +@@ -0,0 +1,148 @@ ++/******************************************************************************* ++ ++ ++ Copyright(c) 1999 - 2006 Intel Corporation. All rights reserved. ++ ++ This program is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by the Free ++ Software Foundation; either version 2 of the License, or (at your option) ++ any later version. ++ ++ This program is distributed in the hope that it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ You should have received a copy of the GNU General Public License along with ++ this program; if not, write to the Free Software Foundation, Inc., 59 ++ Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ ++ The full GNU General Public License is included in this distribution in the ++ file called LICENSE. ++ ++ Contact Information: ++ Linux NICS ++ e1000-devel Mailing List ++ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 ++ ++*******************************************************************************/ ++ ++ ++/* glue for the OS independent part of e1000 ++ * includes register access macros ++ */ ++ ++#ifndef _E1000_OSDEP_H_ ++#define _E1000_OSDEP_H_ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include "kcompat.h" ++ ++#define usec_delay(x) udelay(x) ++#ifndef msec_delay ++#define msec_delay(x) do { if(in_interrupt()) { \ ++ /* Don't mdelay in interrupt context! */ \ ++ BUG(); \ ++ } else { \ ++ msleep(x); \ ++ } } while (0) ++ ++/* Some workarounds require millisecond delays and are run during interrupt ++ * context. Most notably, when establishing link, the phy may need tweaking ++ * but cannot process phy register reads/writes faster than millisecond ++ * intervals...and we establish link due to a "link status change" interrupt. ++ */ ++#define msec_delay_irq(x) mdelay(x) ++#endif ++ ++#define PCI_COMMAND_REGISTER PCI_COMMAND ++#define CMD_MEM_WRT_INVALIDATE PCI_COMMAND_INVALIDATE ++ ++typedef enum { ++#undef FALSE ++ FALSE = 0, ++#undef TRUE ++ TRUE = 1 ++} boolean_t; ++ ++#define MSGOUT(S, A, B) printk(KERN_DEBUG S "\n", A, B) ++ ++#ifdef DBG ++#define DEBUGOUT(S) printk(KERN_DEBUG S "\n") ++#define DEBUGOUT1(S, A...) printk(KERN_DEBUG S "\n", A) ++#else ++#define DEBUGOUT(S) ++#define DEBUGOUT1(S, A...) ++#endif ++ ++#define DEBUGFUNC(F) DEBUGOUT(F) ++#define DEBUGOUT2 DEBUGOUT1 ++#define DEBUGOUT3 DEBUGOUT2 ++#define DEBUGOUT7 DEBUGOUT3 ++ ++#ifdef __BIG_ENDIAN ++#define E1000_BIG_ENDIAN __BIG_ENDIAN ++#endif ++ ++#define E1000_WRITE_REG(a, reg, value) ( \ ++ writel((value), ((a)->hw_addr + \ ++ (((a)->mac_type >= e1000_82543) ? E1000_##reg : E1000_82542_##reg)))) ++ ++#define E1000_READ_REG(a, reg) ( \ ++ readl((a)->hw_addr + \ ++ (((a)->mac_type >= e1000_82543) ? E1000_##reg : E1000_82542_##reg))) ++ ++#define E1000_WRITE_REG_ARRAY(a, reg, offset, value) ( \ ++ writel((value), ((a)->hw_addr + \ ++ (((a)->mac_type >= e1000_82543) ? E1000_##reg : E1000_82542_##reg) + \ ++ ((offset) << 2)))) ++ ++#define E1000_READ_REG_ARRAY(a, reg, offset) ( \ ++ readl((a)->hw_addr + \ ++ (((a)->mac_type >= e1000_82543) ? E1000_##reg : E1000_82542_##reg) + \ ++ ((offset) << 2))) ++ ++#define E1000_READ_REG_ARRAY_DWORD E1000_READ_REG_ARRAY ++#define E1000_WRITE_REG_ARRAY_DWORD E1000_WRITE_REG_ARRAY ++ ++#define E1000_WRITE_REG_ARRAY_WORD(a, reg, offset, value) ( \ ++ writew((value), ((a)->hw_addr + \ ++ (((a)->mac_type >= e1000_82543) ? E1000_##reg : E1000_82542_##reg) + \ ++ ((offset) << 1)))) ++ ++#define E1000_READ_REG_ARRAY_WORD(a, reg, offset) ( \ ++ readw((a)->hw_addr + \ ++ (((a)->mac_type >= e1000_82543) ? E1000_##reg : E1000_82542_##reg) + \ ++ ((offset) << 1))) ++ ++#define E1000_WRITE_REG_ARRAY_BYTE(a, reg, offset, value) ( \ ++ writeb((value), ((a)->hw_addr + \ ++ (((a)->mac_type >= e1000_82543) ? E1000_##reg : E1000_82542_##reg) + \ ++ (offset)))) ++ ++#define E1000_READ_REG_ARRAY_BYTE(a, reg, offset) ( \ ++ readb((a)->hw_addr + \ ++ (((a)->mac_type >= e1000_82543) ? E1000_##reg : E1000_82542_##reg) + \ ++ (offset))) ++ ++#define E1000_WRITE_FLUSH(a) E1000_READ_REG(a, STATUS) ++ ++#define E1000_WRITE_ICH8_REG(a, reg, value) ( \ ++ writel((value), ((a)->flash_address + reg))) ++ ++#define E1000_READ_ICH8_REG(a, reg) ( \ ++ readl((a)->flash_address + reg)) ++ ++#define E1000_WRITE_ICH8_REG16(a, reg, value) ( \ ++ writew((value), ((a)->flash_address + reg))) ++ ++#define E1000_READ_ICH8_REG16(a, reg) ( \ ++ readw((a)->flash_address + reg)) ++ ++ ++#endif /* _E1000_OSDEP_H_ */ +--- linux/drivers/xenomai/net/drivers/e1000/kcompat.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/e1000/kcompat.h 2021-04-07 16:01:27.418633893 +0800 +@@ -0,0 +1,446 @@ ++/******************************************************************************* ++ ++ ++ Copyright(c) 1999 - 2006 Intel Corporation. All rights reserved. ++ ++ This program is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by the Free ++ Software Foundation; either version 2 of the License, or (at your option) ++ any later version. ++ ++ This program is distributed in the hope that it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ You should have received a copy of the GNU General Public License along with ++ this program; if not, write to the Free Software Foundation, Inc., 59 ++ Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ ++ The full GNU General Public License is included in this distribution in the ++ file called LICENSE. ++ ++ Contact Information: ++ Linux NICS ++ e1000-devel Mailing List ++ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 ++ ++*******************************************************************************/ ++ ++#ifndef _KCOMPAT_H_ ++#define _KCOMPAT_H_ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++ ++#ifndef IRQ_HANDLED ++#define irqreturn_t void ++#define IRQ_HANDLED ++#define IRQ_NONE ++#endif ++ ++#ifndef SET_NETDEV_DEV ++#define SET_NETDEV_DEV(net, pdev) ++#endif ++ ++/* Useful settings for rtnet */ ++#undef MAX_SKB_FRAGS ++#undef NETIF_F_TSO ++#undef E1000_COUNT_ICR ++#undef NETIF_F_HW_VLAN_TX ++#undef CONFIG_NET_POLL_CONTROLLER ++#undef ETHTOOL_OPS_COMPAT ++#undef ETHTOOL_GPERMADDR ++ ++#ifndef HAVE_FREE_NETDEV ++#define free_netdev(x) kfree(x) ++#endif ++ ++#undef E1000_NAPI ++#undef CONFIG_E1000_NAPI ++ ++#undef CONFIG_E1000_DISABLE_PACKET_SPLIT ++#define CONFIG_E1000_DISABLE_PACKET_SPLIT 1 ++ ++ ++#ifdef DISABLE_PCI_MSI ++#undef CONFIG_PCI_MSI ++#endif ++ ++#ifdef DISABLE_PM ++#undef CONFIG_PM ++#endif ++#undef CONFIG_PM ++ ++#ifndef module_param ++#define module_param(v,t,p) MODULE_PARM(v, "i"); ++#endif ++ ++#ifndef DMA_64BIT_MASK ++#define DMA_64BIT_MASK 0xffffffffffffffffULL ++#endif ++ ++#ifndef DMA_32BIT_MASK ++#define DMA_32BIT_MASK 0x00000000ffffffffULL ++#endif ++ ++/*****************************************************************************/ ++#ifndef unlikely ++#define unlikely(_x) _x ++#define likely(_x) _x ++#endif ++/*****************************************************************************/ ++ ++#ifndef PCI_DEVICE ++#define PCI_DEVICE(vend,dev) \ ++ .vendor = (vend), .device = (dev), \ ++ .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID ++#endif ++ ++/*****************************************************************************/ ++/* Installations with ethtool version without eeprom, adapter id, or statistics ++ * support */ ++#ifndef ETHTOOL_GSTATS ++#define ETHTOOL_GSTATS 0x1d ++#undef ethtool_drvinfo ++#define ethtool_drvinfo k_ethtool_drvinfo ++struct k_ethtool_drvinfo { ++ uint32_t cmd; ++ char driver[32]; ++ char version[32]; ++ char fw_version[32]; ++ char bus_info[32]; ++ char reserved1[32]; ++ char reserved2[16]; ++ uint32_t n_stats; ++ uint32_t testinfo_len; ++ uint32_t eedump_len; ++ uint32_t regdump_len; ++}; ++ ++struct ethtool_stats { ++ uint32_t cmd; ++ uint32_t n_stats; ++ uint64_t data[0]; ++}; ++ ++#ifndef ETHTOOL_PHYS_ID ++#define ETHTOOL_PHYS_ID 0x1c ++#ifndef ETHTOOL_GSTRINGS ++#define ETHTOOL_GSTRINGS 0x1b ++enum ethtool_stringset { ++ ETH_SS_TEST = 0, ++ ETH_SS_STATS, ++}; ++struct ethtool_gstrings { ++ u32 cmd; /* ETHTOOL_GSTRINGS */ ++ u32 string_set; /* string set id e.c. ETH_SS_TEST, etc*/ ++ u32 len; /* number of strings in the string set */ ++ u8 data[0]; ++}; ++#ifndef ETHTOOL_TEST ++#define ETHTOOL_TEST 0x1a ++enum ethtool_test_flags { ++ ETH_TEST_FL_OFFLINE = (1 << 0), ++ ETH_TEST_FL_FAILED = (1 << 1), ++}; ++struct ethtool_test { ++ uint32_t cmd; ++ uint32_t flags; ++ uint32_t reserved; ++ uint32_t len; ++ uint64_t data[0]; ++}; ++#ifndef ETHTOOL_GEEPROM ++#define ETHTOOL_GEEPROM 0xb ++#undef ETHTOOL_GREGS ++struct ethtool_eeprom { ++ uint32_t cmd; ++ uint32_t magic; ++ uint32_t offset; ++ uint32_t len; ++ uint8_t data[0]; ++}; ++ ++struct ethtool_value { ++ uint32_t cmd; ++ uint32_t data; ++}; ++ ++#ifndef ETHTOOL_GLINK ++#define ETHTOOL_GLINK 0xa ++#endif /* Ethtool version without link support */ ++#endif /* Ethtool version without eeprom support */ ++#endif /* Ethtool version without test support */ ++#endif /* Ethtool version without strings support */ ++#endif /* Ethtool version wihtout adapter id support */ ++#endif /* Ethtool version without statistics support */ ++ ++#ifndef ETHTOOL_GREGS ++#define ETHTOOL_GREGS 0x00000004 /* Get NIC registers */ ++#define ethtool_regs _kc_ethtool_regs ++/* for passing big chunks of data */ ++struct _kc_ethtool_regs { ++ u32 cmd; ++ u32 version; /* driver-specific, indicates different chips/revs */ ++ u32 len; /* bytes */ ++ u8 data[0]; ++}; ++#endif ++#ifndef ETHTOOL_GMSGLVL ++#define ETHTOOL_GMSGLVL 0x00000007 /* Get driver message level */ ++#endif ++#ifndef ETHTOOL_SMSGLVL ++#define ETHTOOL_SMSGLVL 0x00000008 /* Set driver msg level, priv. */ ++#endif ++#ifndef ETHTOOL_NWAY_RST ++#define ETHTOOL_NWAY_RST 0x00000009 /* Restart autonegotiation, priv */ ++#endif ++#ifndef ETHTOOL_GLINK ++#define ETHTOOL_GLINK 0x0000000a /* Get link status */ ++#endif ++#ifndef ETHTOOL_GEEPROM ++#define ETHTOOL_GEEPROM 0x0000000b /* Get EEPROM data */ ++#endif ++#ifndef ETHTOOL_SEEPROM ++#define ETHTOOL_SEEPROM 0x0000000c /* Set EEPROM data */ ++#endif ++#ifndef ETHTOOL_GCOALESCE ++#define ETHTOOL_GCOALESCE 0x0000000e /* Get coalesce config */ ++/* for configuring coalescing parameters of chip */ ++#define ethtool_coalesce _kc_ethtool_coalesce ++struct _kc_ethtool_coalesce { ++ u32 cmd; /* ETHTOOL_{G,S}COALESCE */ ++ ++ /* How many usecs to delay an RX interrupt after ++ * a packet arrives. If 0, only rx_max_coalesced_frames ++ * is used. ++ */ ++ u32 rx_coalesce_usecs; ++ ++ /* How many packets to delay an RX interrupt after ++ * a packet arrives. If 0, only rx_coalesce_usecs is ++ * used. It is illegal to set both usecs and max frames ++ * to zero as this would cause RX interrupts to never be ++ * generated. ++ */ ++ u32 rx_max_coalesced_frames; ++ ++ /* Same as above two parameters, except that these values ++ * apply while an IRQ is being serviced by the host. Not ++ * all cards support this feature and the values are ignored ++ * in that case. ++ */ ++ u32 rx_coalesce_usecs_irq; ++ u32 rx_max_coalesced_frames_irq; ++ ++ /* How many usecs to delay a TX interrupt after ++ * a packet is sent. If 0, only tx_max_coalesced_frames ++ * is used. ++ */ ++ u32 tx_coalesce_usecs; ++ ++ /* How many packets to delay a TX interrupt after ++ * a packet is sent. If 0, only tx_coalesce_usecs is ++ * used. It is illegal to set both usecs and max frames ++ * to zero as this would cause TX interrupts to never be ++ * generated. ++ */ ++ u32 tx_max_coalesced_frames; ++ ++ /* Same as above two parameters, except that these values ++ * apply while an IRQ is being serviced by the host. Not ++ * all cards support this feature and the values are ignored ++ * in that case. ++ */ ++ u32 tx_coalesce_usecs_irq; ++ u32 tx_max_coalesced_frames_irq; ++ ++ /* How many usecs to delay in-memory statistics ++ * block updates. Some drivers do not have an in-memory ++ * statistic block, and in such cases this value is ignored. ++ * This value must not be zero. ++ */ ++ u32 stats_block_coalesce_usecs; ++ ++ /* Adaptive RX/TX coalescing is an algorithm implemented by ++ * some drivers to improve latency under low packet rates and ++ * improve throughput under high packet rates. Some drivers ++ * only implement one of RX or TX adaptive coalescing. Anything ++ * not implemented by the driver causes these values to be ++ * silently ignored. ++ */ ++ u32 use_adaptive_rx_coalesce; ++ u32 use_adaptive_tx_coalesce; ++ ++ /* When the packet rate (measured in packets per second) ++ * is below pkt_rate_low, the {rx,tx}_*_low parameters are ++ * used. ++ */ ++ u32 pkt_rate_low; ++ u32 rx_coalesce_usecs_low; ++ u32 rx_max_coalesced_frames_low; ++ u32 tx_coalesce_usecs_low; ++ u32 tx_max_coalesced_frames_low; ++ ++ /* When the packet rate is below pkt_rate_high but above ++ * pkt_rate_low (both measured in packets per second) the ++ * normal {rx,tx}_* coalescing parameters are used. ++ */ ++ ++ /* When the packet rate is (measured in packets per second) ++ * is above pkt_rate_high, the {rx,tx}_*_high parameters are ++ * used. ++ */ ++ u32 pkt_rate_high; ++ u32 rx_coalesce_usecs_high; ++ u32 rx_max_coalesced_frames_high; ++ u32 tx_coalesce_usecs_high; ++ u32 tx_max_coalesced_frames_high; ++ ++ /* How often to do adaptive coalescing packet rate sampling, ++ * measured in seconds. Must not be zero. ++ */ ++ u32 rate_sample_interval; ++}; ++#endif ++#ifndef ETHTOOL_SCOALESCE ++#define ETHTOOL_SCOALESCE 0x0000000f /* Set coalesce config. */ ++#endif ++#ifndef ETHTOOL_GRINGPARAM ++#define ETHTOOL_GRINGPARAM 0x00000010 /* Get ring parameters */ ++/* for configuring RX/TX ring parameters */ ++#define ethtool_ringparam _kc_ethtool_ringparam ++struct _kc_ethtool_ringparam { ++ u32 cmd; /* ETHTOOL_{G,S}RINGPARAM */ ++ ++ /* Read only attributes. These indicate the maximum number ++ * of pending RX/TX ring entries the driver will allow the ++ * user to set. ++ */ ++ u32 rx_max_pending; ++ u32 rx_mini_max_pending; ++ u32 rx_jumbo_max_pending; ++ u32 tx_max_pending; ++ ++ /* Values changeable by the user. The valid values are ++ * in the range 1 to the "*_max_pending" counterpart above. ++ */ ++ u32 rx_pending; ++ u32 rx_mini_pending; ++ u32 rx_jumbo_pending; ++ u32 tx_pending; ++}; ++#endif ++#ifndef ETHTOOL_SRINGPARAM ++#define ETHTOOL_SRINGPARAM 0x00000011 /* Set ring parameters, priv. */ ++#endif ++#ifndef ETHTOOL_GPAUSEPARAM ++#define ETHTOOL_GPAUSEPARAM 0x00000012 /* Get pause parameters */ ++/* for configuring link flow control parameters */ ++#define ethtool_pauseparam _kc_ethtool_pauseparam ++struct _kc_ethtool_pauseparam { ++ u32 cmd; /* ETHTOOL_{G,S}PAUSEPARAM */ ++ ++ /* If the link is being auto-negotiated (via ethtool_cmd.autoneg ++ * being true) the user may set 'autonet' here non-zero to have the ++ * pause parameters be auto-negotiated too. In such a case, the ++ * {rx,tx}_pause values below determine what capabilities are ++ * advertised. ++ * ++ * If 'autoneg' is zero or the link is not being auto-negotiated, ++ * then {rx,tx}_pause force the driver to use/not-use pause ++ * flow control. ++ */ ++ u32 autoneg; ++ u32 rx_pause; ++ u32 tx_pause; ++}; ++#endif ++#ifndef ETHTOOL_SPAUSEPARAM ++#define ETHTOOL_SPAUSEPARAM 0x00000013 /* Set pause parameters. */ ++#endif ++#ifndef ETHTOOL_GRXCSUM ++#define ETHTOOL_GRXCSUM 0x00000014 /* Get RX hw csum enable (ethtool_value) */ ++#endif ++#ifndef ETHTOOL_SRXCSUM ++#define ETHTOOL_SRXCSUM 0x00000015 /* Set RX hw csum enable (ethtool_value) */ ++#endif ++#ifndef ETHTOOL_GTXCSUM ++#define ETHTOOL_GTXCSUM 0x00000016 /* Get TX hw csum enable (ethtool_value) */ ++#endif ++#ifndef ETHTOOL_STXCSUM ++#define ETHTOOL_STXCSUM 0x00000017 /* Set TX hw csum enable (ethtool_value) */ ++#endif ++#ifndef ETHTOOL_GSG ++#define ETHTOOL_GSG 0x00000018 /* Get scatter-gather enable ++ * (ethtool_value) */ ++#endif ++#ifndef ETHTOOL_SSG ++#define ETHTOOL_SSG 0x00000019 /* Set scatter-gather enable ++ * (ethtool_value). */ ++#endif ++#ifndef ETHTOOL_TEST ++#define ETHTOOL_TEST 0x0000001a /* execute NIC self-test, priv. */ ++#endif ++#ifndef ETHTOOL_GSTRINGS ++#define ETHTOOL_GSTRINGS 0x0000001b /* get specified string set */ ++#endif ++#ifndef ETHTOOL_PHYS_ID ++#define ETHTOOL_PHYS_ID 0x0000001c /* identify the NIC */ ++#endif ++#ifndef ETHTOOL_GSTATS ++#define ETHTOOL_GSTATS 0x0000001d /* get NIC-specific statistics */ ++#endif ++#ifndef ETHTOOL_GTSO ++#define ETHTOOL_GTSO 0x0000001e /* Get TSO enable (ethtool_value) */ ++#endif ++#ifndef ETHTOOL_STSO ++#define ETHTOOL_STSO 0x0000001f /* Set TSO enable (ethtool_value) */ ++#endif ++ ++#ifndef NET_IP_ALIGN ++#define NET_IP_ALIGN 2 ++#endif ++ ++#ifndef NETDEV_TX_OK ++#define NETDEV_TX_OK 0 /* driver took care of the packet */ ++#endif ++ ++#ifndef NETDEV_TX_BUSY ++#define NETDEV_TX_BUSY 1 /* driver tx path was busy */ ++#endif ++ ++#ifndef NETDEV_TX_LOCKED ++#define NETDEV_TX_LOCKED -1 /* driver tx lock was already taken */ ++#endif ++ ++/* if we do not have the infrastructure to detect if skb_header is cloned * ++ * just return false in all cases */ ++#ifndef SKB_DATAREF_SHIFT ++#define skb_header_cloned(x) 0 ++#endif /* SKB_DATAREF_SHIFT not defined */ ++ ++#ifndef WARN_ON ++#define WARN_ON(x) ++#endif ++ ++#define USE_DRIVER_SHUTDOWN_HANDLER ++ ++#ifndef SA_PROBEIRQ ++#define SA_PROBEIRQ 0 ++#endif ++ ++#endif /* _KCOMPAT_H_ */ +--- linux/drivers/xenomai/net/drivers/e1000/e1000_hw.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/e1000/e1000_hw.c 2021-04-07 16:01:27.414633899 +0800 +@@ -0,0 +1,9092 @@ ++/******************************************************************************* ++ ++ ++ Copyright(c) 1999 - 2006 Intel Corporation. All rights reserved. ++ ++ This program is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by the Free ++ Software Foundation; either version 2 of the License, or (at your option) ++ any later version. ++ ++ This program is distributed in the hope that it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ You should have received a copy of the GNU General Public License along with ++ this program; if not, write to the Free Software Foundation, Inc., 59 ++ Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ ++ The full GNU General Public License is included in this distribution in the ++ file called LICENSE. ++ ++ Contact Information: ++ Linux NICS ++ e1000-devel Mailing List ++ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 ++ ++*******************************************************************************/ ++ ++/* e1000_hw.c ++ * Shared functions for accessing and configuring the MAC ++ */ ++ ++ ++#include "e1000_hw.h" ++ ++static int32_t e1000_set_phy_type(struct e1000_hw *hw); ++static void e1000_phy_init_script(struct e1000_hw *hw); ++static int32_t e1000_setup_copper_link(struct e1000_hw *hw); ++static int32_t e1000_setup_fiber_serdes_link(struct e1000_hw *hw); ++static int32_t e1000_adjust_serdes_amplitude(struct e1000_hw *hw); ++static int32_t e1000_phy_force_speed_duplex(struct e1000_hw *hw); ++static int32_t e1000_config_mac_to_phy(struct e1000_hw *hw); ++static void e1000_raise_mdi_clk(struct e1000_hw *hw, uint32_t *ctrl); ++static void e1000_lower_mdi_clk(struct e1000_hw *hw, uint32_t *ctrl); ++static void e1000_shift_out_mdi_bits(struct e1000_hw *hw, uint32_t data, ++ uint16_t count); ++static uint16_t e1000_shift_in_mdi_bits(struct e1000_hw *hw); ++static int32_t e1000_phy_reset_dsp(struct e1000_hw *hw); ++static int32_t e1000_write_eeprom_spi(struct e1000_hw *hw, uint16_t offset, ++ uint16_t words, uint16_t *data); ++static int32_t e1000_write_eeprom_microwire(struct e1000_hw *hw, ++ uint16_t offset, uint16_t words, ++ uint16_t *data); ++static int32_t e1000_spi_eeprom_ready(struct e1000_hw *hw); ++static void e1000_raise_ee_clk(struct e1000_hw *hw, uint32_t *eecd); ++static void e1000_lower_ee_clk(struct e1000_hw *hw, uint32_t *eecd); ++static void e1000_shift_out_ee_bits(struct e1000_hw *hw, uint16_t data, ++ uint16_t count); ++static int32_t e1000_write_phy_reg_ex(struct e1000_hw *hw, uint32_t reg_addr, ++ uint16_t phy_data); ++static int32_t e1000_read_phy_reg_ex(struct e1000_hw *hw,uint32_t reg_addr, ++ uint16_t *phy_data); ++static uint16_t e1000_shift_in_ee_bits(struct e1000_hw *hw, uint16_t count); ++static int32_t e1000_acquire_eeprom(struct e1000_hw *hw); ++static void e1000_release_eeprom(struct e1000_hw *hw); ++static void e1000_standby_eeprom(struct e1000_hw *hw); ++static int32_t e1000_set_vco_speed(struct e1000_hw *hw); ++static int32_t e1000_polarity_reversal_workaround(struct e1000_hw *hw); ++static int32_t e1000_set_phy_mode(struct e1000_hw *hw); ++static int32_t e1000_host_if_read_cookie(struct e1000_hw *hw, uint8_t *buffer); ++static uint8_t e1000_calculate_mng_checksum(char *buffer, uint32_t length); ++static int32_t e1000_configure_kmrn_for_10_100(struct e1000_hw *hw, ++ uint16_t duplex); ++static int32_t e1000_configure_kmrn_for_1000(struct e1000_hw *hw); ++ ++/* IGP cable length table */ ++static const ++uint16_t e1000_igp_cable_length_table[IGP01E1000_AGC_LENGTH_TABLE_SIZE] = ++ { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, ++ 5, 10, 10, 10, 10, 10, 10, 10, 20, 20, 20, 20, 20, 25, 25, 25, ++ 25, 25, 25, 25, 30, 30, 30, 30, 40, 40, 40, 40, 40, 40, 40, 40, ++ 40, 50, 50, 50, 50, 50, 50, 50, 60, 60, 60, 60, 60, 60, 60, 60, ++ 60, 70, 70, 70, 70, 70, 70, 80, 80, 80, 80, 80, 80, 90, 90, 90, ++ 90, 90, 90, 90, 90, 90, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, ++ 100, 100, 100, 100, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, ++ 110, 110, 110, 110, 110, 110, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120}; ++ ++static const ++uint16_t e1000_igp_2_cable_length_table[IGP02E1000_AGC_LENGTH_TABLE_SIZE] = ++ { 0, 0, 0, 0, 0, 0, 0, 0, 3, 5, 8, 11, 13, 16, 18, 21, ++ 0, 0, 0, 3, 6, 10, 13, 16, 19, 23, 26, 29, 32, 35, 38, 41, ++ 6, 10, 14, 18, 22, 26, 30, 33, 37, 41, 44, 48, 51, 54, 58, 61, ++ 21, 26, 31, 35, 40, 44, 49, 53, 57, 61, 65, 68, 72, 75, 79, 82, ++ 40, 45, 51, 56, 61, 66, 70, 75, 79, 83, 87, 91, 94, 98, 101, 104, ++ 60, 66, 72, 77, 82, 87, 92, 96, 100, 104, 108, 111, 114, 117, 119, 121, ++ 83, 89, 95, 100, 105, 109, 113, 116, 119, 122, 124, ++ 104, 109, 114, 118, 121, 124}; ++ ++ ++/****************************************************************************** ++ * Set the phy type member in the hw struct. ++ * ++ * hw - Struct containing variables accessed by shared code ++ *****************************************************************************/ ++int32_t ++e1000_set_phy_type(struct e1000_hw *hw) ++{ ++ DEBUGFUNC("e1000_set_phy_type"); ++ ++ if (hw->mac_type == e1000_undefined) ++ return -E1000_ERR_PHY_TYPE; ++ ++ switch (hw->phy_id) { ++ case M88E1000_E_PHY_ID: ++ case M88E1000_I_PHY_ID: ++ case M88E1011_I_PHY_ID: ++ case M88E1111_I_PHY_ID: ++ hw->phy_type = e1000_phy_m88; ++ break; ++ case IGP01E1000_I_PHY_ID: ++ if (hw->mac_type == e1000_82541 || ++ hw->mac_type == e1000_82541_rev_2 || ++ hw->mac_type == e1000_82547 || ++ hw->mac_type == e1000_82547_rev_2) { ++ hw->phy_type = e1000_phy_igp; ++ break; ++ } ++ case IGP03E1000_E_PHY_ID: ++ hw->phy_type = e1000_phy_igp_3; ++ break; ++ case IFE_E_PHY_ID: ++ case IFE_PLUS_E_PHY_ID: ++ case IFE_C_E_PHY_ID: ++ hw->phy_type = e1000_phy_ife; ++ break; ++ case GG82563_E_PHY_ID: ++ if (hw->mac_type == e1000_80003es2lan) { ++ hw->phy_type = e1000_phy_gg82563; ++ break; ++ } ++ /* Fall Through */ ++ default: ++ /* Should never have loaded on this device */ ++ hw->phy_type = e1000_phy_undefined; ++ return -E1000_ERR_PHY_TYPE; ++ } ++ ++ return E1000_SUCCESS; ++} ++ ++ ++/****************************************************************************** ++ * IGP phy init script - initializes the GbE PHY ++ * ++ * hw - Struct containing variables accessed by shared code ++ *****************************************************************************/ ++static void ++e1000_phy_init_script(struct e1000_hw *hw) ++{ ++ uint32_t ret_val; ++ uint16_t phy_saved_data; ++ ++ DEBUGFUNC("e1000_phy_init_script"); ++ ++ if (hw->phy_init_script) { ++ msec_delay(20); ++ ++ /* Save off the current value of register 0x2F5B to be restored at ++ * the end of this routine. */ ++ ret_val = e1000_read_phy_reg(hw, 0x2F5B, &phy_saved_data); ++ ++ /* Disabled the PHY transmitter */ ++ e1000_write_phy_reg(hw, 0x2F5B, 0x0003); ++ ++ msec_delay(20); ++ ++ e1000_write_phy_reg(hw,0x0000,0x0140); ++ ++ msec_delay(5); ++ ++ switch (hw->mac_type) { ++ case e1000_82541: ++ case e1000_82547: ++ e1000_write_phy_reg(hw, 0x1F95, 0x0001); ++ ++ e1000_write_phy_reg(hw, 0x1F71, 0xBD21); ++ ++ e1000_write_phy_reg(hw, 0x1F79, 0x0018); ++ ++ e1000_write_phy_reg(hw, 0x1F30, 0x1600); ++ ++ e1000_write_phy_reg(hw, 0x1F31, 0x0014); ++ ++ e1000_write_phy_reg(hw, 0x1F32, 0x161C); ++ ++ e1000_write_phy_reg(hw, 0x1F94, 0x0003); ++ ++ e1000_write_phy_reg(hw, 0x1F96, 0x003F); ++ ++ e1000_write_phy_reg(hw, 0x2010, 0x0008); ++ break; ++ ++ case e1000_82541_rev_2: ++ case e1000_82547_rev_2: ++ e1000_write_phy_reg(hw, 0x1F73, 0x0099); ++ break; ++ default: ++ break; ++ } ++ ++ e1000_write_phy_reg(hw, 0x0000, 0x3300); ++ ++ msec_delay(20); ++ ++ /* Now enable the transmitter */ ++ e1000_write_phy_reg(hw, 0x2F5B, phy_saved_data); ++ ++ if (hw->mac_type == e1000_82547) { ++ uint16_t fused, fine, coarse; ++ ++ /* Move to analog registers page */ ++ e1000_read_phy_reg(hw, IGP01E1000_ANALOG_SPARE_FUSE_STATUS, &fused); ++ ++ if (!(fused & IGP01E1000_ANALOG_SPARE_FUSE_ENABLED)) { ++ e1000_read_phy_reg(hw, IGP01E1000_ANALOG_FUSE_STATUS, &fused); ++ ++ fine = fused & IGP01E1000_ANALOG_FUSE_FINE_MASK; ++ coarse = fused & IGP01E1000_ANALOG_FUSE_COARSE_MASK; ++ ++ if (coarse > IGP01E1000_ANALOG_FUSE_COARSE_THRESH) { ++ coarse -= IGP01E1000_ANALOG_FUSE_COARSE_10; ++ fine -= IGP01E1000_ANALOG_FUSE_FINE_1; ++ } else if (coarse == IGP01E1000_ANALOG_FUSE_COARSE_THRESH) ++ fine -= IGP01E1000_ANALOG_FUSE_FINE_10; ++ ++ fused = (fused & IGP01E1000_ANALOG_FUSE_POLY_MASK) | ++ (fine & IGP01E1000_ANALOG_FUSE_FINE_MASK) | ++ (coarse & IGP01E1000_ANALOG_FUSE_COARSE_MASK); ++ ++ e1000_write_phy_reg(hw, IGP01E1000_ANALOG_FUSE_CONTROL, fused); ++ e1000_write_phy_reg(hw, IGP01E1000_ANALOG_FUSE_BYPASS, ++ IGP01E1000_ANALOG_FUSE_ENABLE_SW_CONTROL); ++ } ++ } ++ } ++} ++ ++/****************************************************************************** ++ * Set the mac type member in the hw struct. ++ * ++ * hw - Struct containing variables accessed by shared code ++ *****************************************************************************/ ++int32_t ++e1000_set_mac_type(struct e1000_hw *hw) ++{ ++ DEBUGFUNC("e1000_set_mac_type"); ++ ++ switch (hw->device_id) { ++ case E1000_DEV_ID_82542: ++ switch (hw->revision_id) { ++ case E1000_82542_2_0_REV_ID: ++ hw->mac_type = e1000_82542_rev2_0; ++ break; ++ case E1000_82542_2_1_REV_ID: ++ hw->mac_type = e1000_82542_rev2_1; ++ break; ++ default: ++ /* Invalid 82542 revision ID */ ++ return -E1000_ERR_MAC_TYPE; ++ } ++ break; ++ case E1000_DEV_ID_82543GC_FIBER: ++ case E1000_DEV_ID_82543GC_COPPER: ++ hw->mac_type = e1000_82543; ++ break; ++ case E1000_DEV_ID_82544EI_COPPER: ++ case E1000_DEV_ID_82544EI_FIBER: ++ case E1000_DEV_ID_82544GC_COPPER: ++ case E1000_DEV_ID_82544GC_LOM: ++ hw->mac_type = e1000_82544; ++ break; ++ case E1000_DEV_ID_82540EM: ++ case E1000_DEV_ID_82540EM_LOM: ++ case E1000_DEV_ID_82540EP: ++ case E1000_DEV_ID_82540EP_LOM: ++ case E1000_DEV_ID_82540EP_LP: ++ hw->mac_type = e1000_82540; ++ break; ++ case E1000_DEV_ID_82545EM_COPPER: ++ case E1000_DEV_ID_82545EM_FIBER: ++ hw->mac_type = e1000_82545; ++ break; ++ case E1000_DEV_ID_82545GM_COPPER: ++ case E1000_DEV_ID_82545GM_FIBER: ++ case E1000_DEV_ID_82545GM_SERDES: ++ hw->mac_type = e1000_82545_rev_3; ++ break; ++ case E1000_DEV_ID_82546EB_COPPER: ++ case E1000_DEV_ID_82546EB_FIBER: ++ case E1000_DEV_ID_82546EB_QUAD_COPPER: ++ hw->mac_type = e1000_82546; ++ break; ++ case E1000_DEV_ID_82546GB_COPPER: ++ case E1000_DEV_ID_82546GB_FIBER: ++ case E1000_DEV_ID_82546GB_SERDES: ++ case E1000_DEV_ID_82546GB_PCIE: ++ case E1000_DEV_ID_82546GB_QUAD_COPPER: ++ case E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3: ++ hw->mac_type = e1000_82546_rev_3; ++ break; ++ case E1000_DEV_ID_82541EI: ++ case E1000_DEV_ID_82541EI_MOBILE: ++ case E1000_DEV_ID_82541ER_LOM: ++ hw->mac_type = e1000_82541; ++ break; ++ case E1000_DEV_ID_82541ER: ++ case E1000_DEV_ID_82541GI: ++ case E1000_DEV_ID_82541GI_LF: ++ case E1000_DEV_ID_82541GI_MOBILE: ++ hw->mac_type = e1000_82541_rev_2; ++ break; ++ case E1000_DEV_ID_82547EI: ++ case E1000_DEV_ID_82547EI_MOBILE: ++ hw->mac_type = e1000_82547; ++ break; ++ case E1000_DEV_ID_82547GI: ++ hw->mac_type = e1000_82547_rev_2; ++ break; ++ case E1000_DEV_ID_82571EB_COPPER: ++ case E1000_DEV_ID_82571EB_FIBER: ++ case E1000_DEV_ID_82571EB_SERDES: ++ case E1000_DEV_ID_82571EB_QUAD_COPPER: ++ case E1000_DEV_ID_82571EB_QUAD_COPPER_LOWPROFILE: ++ hw->mac_type = e1000_82571; ++ break; ++ case E1000_DEV_ID_82572EI_COPPER: ++ case E1000_DEV_ID_82572EI_FIBER: ++ case E1000_DEV_ID_82572EI_SERDES: ++ case E1000_DEV_ID_82572EI: ++ hw->mac_type = e1000_82572; ++ break; ++ case E1000_DEV_ID_82573E: ++ case E1000_DEV_ID_82573E_IAMT: ++ case E1000_DEV_ID_82573L: ++ hw->mac_type = e1000_82573; ++ break; ++ case E1000_DEV_ID_80003ES2LAN_COPPER_SPT: ++ case E1000_DEV_ID_80003ES2LAN_SERDES_SPT: ++ case E1000_DEV_ID_80003ES2LAN_COPPER_DPT: ++ case E1000_DEV_ID_80003ES2LAN_SERDES_DPT: ++ hw->mac_type = e1000_80003es2lan; ++ break; ++ case E1000_DEV_ID_ICH8_IGP_M_AMT: ++ case E1000_DEV_ID_ICH8_IGP_AMT: ++ case E1000_DEV_ID_ICH8_IGP_C: ++ case E1000_DEV_ID_ICH8_IFE: ++ case E1000_DEV_ID_ICH8_IFE_GT: ++ case E1000_DEV_ID_ICH8_IFE_G: ++ case E1000_DEV_ID_ICH8_IGP_M: ++ hw->mac_type = e1000_ich8lan; ++ break; ++ default: ++ /* Should never have loaded on this device */ ++ return -E1000_ERR_MAC_TYPE; ++ } ++ ++ switch (hw->mac_type) { ++ case e1000_ich8lan: ++ hw->swfwhw_semaphore_present = TRUE; ++ hw->asf_firmware_present = TRUE; ++ break; ++ case e1000_80003es2lan: ++ hw->swfw_sync_present = TRUE; ++ /* fall through */ ++ case e1000_82571: ++ case e1000_82572: ++ case e1000_82573: ++ hw->eeprom_semaphore_present = TRUE; ++ /* fall through */ ++ case e1000_82541: ++ case e1000_82547: ++ case e1000_82541_rev_2: ++ case e1000_82547_rev_2: ++ hw->asf_firmware_present = TRUE; ++ break; ++ default: ++ break; ++ } ++ ++ return E1000_SUCCESS; ++} ++ ++/***************************************************************************** ++ * Set media type and TBI compatibility. ++ * ++ * hw - Struct containing variables accessed by shared code ++ * **************************************************************************/ ++void ++e1000_set_media_type(struct e1000_hw *hw) ++{ ++ uint32_t status; ++ ++ DEBUGFUNC("e1000_set_media_type"); ++ ++ if (hw->mac_type != e1000_82543) { ++ /* tbi_compatibility is only valid on 82543 */ ++ hw->tbi_compatibility_en = FALSE; ++ } ++ ++ switch (hw->device_id) { ++ case E1000_DEV_ID_82545GM_SERDES: ++ case E1000_DEV_ID_82546GB_SERDES: ++ case E1000_DEV_ID_82571EB_SERDES: ++ case E1000_DEV_ID_82572EI_SERDES: ++ case E1000_DEV_ID_80003ES2LAN_SERDES_DPT: ++ hw->media_type = e1000_media_type_internal_serdes; ++ break; ++ default: ++ switch (hw->mac_type) { ++ case e1000_82542_rev2_0: ++ case e1000_82542_rev2_1: ++ hw->media_type = e1000_media_type_fiber; ++ break; ++ case e1000_ich8lan: ++ case e1000_82573: ++ /* The STATUS_TBIMODE bit is reserved or reused for the this ++ * device. ++ */ ++ hw->media_type = e1000_media_type_copper; ++ break; ++ default: ++ status = E1000_READ_REG(hw, STATUS); ++ if (status & E1000_STATUS_TBIMODE) { ++ hw->media_type = e1000_media_type_fiber; ++ /* tbi_compatibility not valid on fiber */ ++ hw->tbi_compatibility_en = FALSE; ++ } else { ++ hw->media_type = e1000_media_type_copper; ++ } ++ break; ++ } ++ } ++} ++ ++/****************************************************************************** ++ * Reset the transmit and receive units; mask and clear all interrupts. ++ * ++ * hw - Struct containing variables accessed by shared code ++ *****************************************************************************/ ++int32_t ++e1000_reset_hw(struct e1000_hw *hw) ++{ ++ uint32_t ctrl; ++ uint32_t ctrl_ext; ++ uint32_t icr; ++ uint32_t manc; ++ uint32_t led_ctrl; ++ uint32_t timeout; ++ uint32_t extcnf_ctrl; ++ int32_t ret_val; ++ ++ DEBUGFUNC("e1000_reset_hw"); ++ ++ /* For 82542 (rev 2.0), disable MWI before issuing a device reset */ ++ if (hw->mac_type == e1000_82542_rev2_0) { ++ DEBUGOUT("Disabling MWI on 82542 rev 2.0\n"); ++ e1000_pci_clear_mwi(hw); ++ } ++ ++ if (hw->bus_type == e1000_bus_type_pci_express) { ++ /* Prevent the PCI-E bus from sticking if there is no TLP connection ++ * on the last TLP read/write transaction when MAC is reset. ++ */ ++ if (e1000_disable_pciex_master(hw) != E1000_SUCCESS) { ++ DEBUGOUT("PCI-E Master disable polling has failed.\n"); ++ } ++ } ++ ++ /* Clear interrupt mask to stop board from generating interrupts */ ++ DEBUGOUT("Masking off all interrupts\n"); ++ E1000_WRITE_REG(hw, IMC, 0xffffffff); ++ ++ /* Disable the Transmit and Receive units. Then delay to allow ++ * any pending transactions to complete before we hit the MAC with ++ * the global reset. ++ */ ++ E1000_WRITE_REG(hw, RCTL, 0); ++ E1000_WRITE_REG(hw, TCTL, E1000_TCTL_PSP); ++ E1000_WRITE_FLUSH(hw); ++ ++ /* The tbi_compatibility_on Flag must be cleared when Rctl is cleared. */ ++ hw->tbi_compatibility_on = FALSE; ++ ++ /* Delay to allow any outstanding PCI transactions to complete before ++ * resetting the device ++ */ ++ msec_delay(10); ++ ++ ctrl = E1000_READ_REG(hw, CTRL); ++ ++ /* Must reset the PHY before resetting the MAC */ ++ if ((hw->mac_type == e1000_82541) || (hw->mac_type == e1000_82547)) { ++ E1000_WRITE_REG(hw, CTRL, (ctrl | E1000_CTRL_PHY_RST)); ++ msec_delay(5); ++ } ++ ++ /* Must acquire the MDIO ownership before MAC reset. ++ * Ownership defaults to firmware after a reset. */ ++ if (hw->mac_type == e1000_82573) { ++ timeout = 10; ++ ++ extcnf_ctrl = E1000_READ_REG(hw, EXTCNF_CTRL); ++ extcnf_ctrl |= E1000_EXTCNF_CTRL_MDIO_SW_OWNERSHIP; ++ ++ do { ++ E1000_WRITE_REG(hw, EXTCNF_CTRL, extcnf_ctrl); ++ extcnf_ctrl = E1000_READ_REG(hw, EXTCNF_CTRL); ++ ++ if (extcnf_ctrl & E1000_EXTCNF_CTRL_MDIO_SW_OWNERSHIP) ++ break; ++ else ++ extcnf_ctrl |= E1000_EXTCNF_CTRL_MDIO_SW_OWNERSHIP; ++ ++ msec_delay(2); ++ timeout--; ++ } while (timeout); ++ } ++ ++ /* Workaround for ICH8 bit corruption issue in FIFO memory */ ++ if (hw->mac_type == e1000_ich8lan) { ++ /* Set Tx and Rx buffer allocation to 8k apiece. */ ++ E1000_WRITE_REG(hw, PBA, E1000_PBA_8K); ++ /* Set Packet Buffer Size to 16k. */ ++ E1000_WRITE_REG(hw, PBS, E1000_PBS_16K); ++ } ++ ++ /* Issue a global reset to the MAC. This will reset the chip's ++ * transmit, receive, DMA, and link units. It will not effect ++ * the current PCI configuration. The global reset bit is self- ++ * clearing, and should clear within a microsecond. ++ */ ++ DEBUGOUT("Issuing a global reset to MAC\n"); ++ ++ switch (hw->mac_type) { ++ case e1000_82544: ++ case e1000_82540: ++ case e1000_82545: ++ case e1000_82546: ++ case e1000_82541: ++ case e1000_82541_rev_2: ++ /* These controllers can't ack the 64-bit write when issuing the ++ * reset, so use IO-mapping as a workaround to issue the reset */ ++ E1000_WRITE_REG_IO(hw, CTRL, (ctrl | E1000_CTRL_RST)); ++ break; ++ case e1000_82545_rev_3: ++ case e1000_82546_rev_3: ++ /* Reset is performed on a shadow of the control register */ ++ E1000_WRITE_REG(hw, CTRL_DUP, (ctrl | E1000_CTRL_RST)); ++ break; ++ case e1000_ich8lan: ++ if (!hw->phy_reset_disable && ++ e1000_check_phy_reset_block(hw) == E1000_SUCCESS) { ++ /* e1000_ich8lan PHY HW reset requires MAC CORE reset ++ * at the same time to make sure the interface between ++ * MAC and the external PHY is reset. ++ */ ++ ctrl |= E1000_CTRL_PHY_RST; ++ } ++ ++ e1000_get_software_flag(hw); ++ E1000_WRITE_REG(hw, CTRL, (ctrl | E1000_CTRL_RST)); ++ msec_delay(5); ++ break; ++ default: ++ E1000_WRITE_REG(hw, CTRL, (ctrl | E1000_CTRL_RST)); ++ break; ++ } ++ ++ /* After MAC reset, force reload of EEPROM to restore power-on settings to ++ * device. Later controllers reload the EEPROM automatically, so just wait ++ * for reload to complete. ++ */ ++ switch (hw->mac_type) { ++ case e1000_82542_rev2_0: ++ case e1000_82542_rev2_1: ++ case e1000_82543: ++ case e1000_82544: ++ /* Wait for reset to complete */ ++ usec_delay(10); ++ ctrl_ext = E1000_READ_REG(hw, CTRL_EXT); ++ ctrl_ext |= E1000_CTRL_EXT_EE_RST; ++ E1000_WRITE_REG(hw, CTRL_EXT, ctrl_ext); ++ E1000_WRITE_FLUSH(hw); ++ /* Wait for EEPROM reload */ ++ msec_delay(2); ++ break; ++ case e1000_82541: ++ case e1000_82541_rev_2: ++ case e1000_82547: ++ case e1000_82547_rev_2: ++ /* Wait for EEPROM reload */ ++ msec_delay(20); ++ break; ++ case e1000_82573: ++ if (e1000_is_onboard_nvm_eeprom(hw) == FALSE) { ++ usec_delay(10); ++ ctrl_ext = E1000_READ_REG(hw, CTRL_EXT); ++ ctrl_ext |= E1000_CTRL_EXT_EE_RST; ++ E1000_WRITE_REG(hw, CTRL_EXT, ctrl_ext); ++ E1000_WRITE_FLUSH(hw); ++ } ++ /* fall through */ ++ case e1000_82571: ++ case e1000_82572: ++ case e1000_ich8lan: ++ case e1000_80003es2lan: ++ ret_val = e1000_get_auto_rd_done(hw); ++ if (ret_val) ++ /* We don't want to continue accessing MAC registers. */ ++ return ret_val; ++ break; ++ default: ++ /* Wait for EEPROM reload (it happens automatically) */ ++ msec_delay(5); ++ break; ++ } ++ ++ /* Disable HW ARPs */ ++ manc = E1000_READ_REG(hw, MANC); ++ manc &= ~(E1000_MANC_ARP_EN | E1000_MANC_ARP_RES_EN); ++ E1000_WRITE_REG(hw, MANC, manc); ++ ++ if ((hw->mac_type == e1000_82541) || (hw->mac_type == e1000_82547)) { ++ e1000_phy_init_script(hw); ++ ++ /* Configure activity LED after PHY reset */ ++ led_ctrl = E1000_READ_REG(hw, LEDCTL); ++ led_ctrl &= IGP_ACTIVITY_LED_MASK; ++ led_ctrl |= (IGP_ACTIVITY_LED_ENABLE | IGP_LED3_MODE); ++ E1000_WRITE_REG(hw, LEDCTL, led_ctrl); ++ } ++ ++ /* Clear interrupt mask to stop board from generating interrupts */ ++ DEBUGOUT("Masking off all interrupts\n"); ++ E1000_WRITE_REG(hw, IMC, 0xffffffff); ++ ++ /* Clear any pending interrupt events. */ ++ icr = E1000_READ_REG(hw, ICR); ++ ++ /* If MWI was previously enabled, reenable it. */ ++ if (hw->mac_type == e1000_82542_rev2_0) { ++ if (hw->pci_cmd_word & CMD_MEM_WRT_INVALIDATE) ++ e1000_pci_set_mwi(hw); ++ } ++ ++ if (hw->mac_type == e1000_ich8lan) { ++ uint32_t kab = E1000_READ_REG(hw, KABGTXD); ++ kab |= E1000_KABGTXD_BGSQLBIAS; ++ E1000_WRITE_REG(hw, KABGTXD, kab); ++ } ++ ++ return E1000_SUCCESS; ++} ++ ++/****************************************************************************** ++ * Performs basic configuration of the adapter. ++ * ++ * hw - Struct containing variables accessed by shared code ++ * ++ * Assumes that the controller has previously been reset and is in a ++ * post-reset uninitialized state. Initializes the receive address registers, ++ * multicast table, and VLAN filter table. Calls routines to setup link ++ * configuration and flow control settings. Clears all on-chip counters. Leaves ++ * the transmit and receive units disabled and uninitialized. ++ *****************************************************************************/ ++int32_t ++e1000_init_hw(struct e1000_hw *hw) ++{ ++ uint32_t ctrl; ++ uint32_t i; ++ int32_t ret_val; ++ uint16_t pcix_cmd_word; ++ uint16_t pcix_stat_hi_word; ++ uint16_t cmd_mmrbc; ++ uint16_t stat_mmrbc; ++ uint32_t mta_size; ++ uint32_t reg_data; ++ uint32_t ctrl_ext; ++ ++ DEBUGFUNC("e1000_init_hw"); ++ ++ /* Initialize Identification LED */ ++ ret_val = e1000_id_led_init(hw); ++ if (ret_val) { ++ DEBUGOUT("Error Initializing Identification LED\n"); ++ return ret_val; ++ } ++ ++ /* Set the media type and TBI compatibility */ ++ e1000_set_media_type(hw); ++ ++ /* Disabling VLAN filtering. */ ++ DEBUGOUT("Initializing the IEEE VLAN\n"); ++ /* VET hardcoded to standard value and VFTA removed in ICH8 LAN */ ++ if (hw->mac_type != e1000_ich8lan) { ++ if (hw->mac_type < e1000_82545_rev_3) ++ E1000_WRITE_REG(hw, VET, 0); ++ e1000_clear_vfta(hw); ++ } ++ ++ /* For 82542 (rev 2.0), disable MWI and put the receiver into reset */ ++ if (hw->mac_type == e1000_82542_rev2_0) { ++ DEBUGOUT("Disabling MWI on 82542 rev 2.0\n"); ++ e1000_pci_clear_mwi(hw); ++ E1000_WRITE_REG(hw, RCTL, E1000_RCTL_RST); ++ E1000_WRITE_FLUSH(hw); ++ msec_delay(5); ++ } ++ ++ /* Setup the receive address. This involves initializing all of the Receive ++ * Address Registers (RARs 0 - 15). ++ */ ++ e1000_init_rx_addrs(hw); ++ ++ /* For 82542 (rev 2.0), take the receiver out of reset and enable MWI */ ++ if (hw->mac_type == e1000_82542_rev2_0) { ++ E1000_WRITE_REG(hw, RCTL, 0); ++ E1000_WRITE_FLUSH(hw); ++ msec_delay(1); ++ if (hw->pci_cmd_word & CMD_MEM_WRT_INVALIDATE) ++ e1000_pci_set_mwi(hw); ++ } ++ ++ /* Zero out the Multicast HASH table */ ++ DEBUGOUT("Zeroing the MTA\n"); ++ mta_size = E1000_MC_TBL_SIZE; ++ if (hw->mac_type == e1000_ich8lan) ++ mta_size = E1000_MC_TBL_SIZE_ICH8LAN; ++ for (i = 0; i < mta_size; i++) { ++ E1000_WRITE_REG_ARRAY(hw, MTA, i, 0); ++ /* use write flush to prevent Memory Write Block (MWB) from ++ * occuring when accessing our register space */ ++ E1000_WRITE_FLUSH(hw); ++ } ++ ++ /* Set the PCI priority bit correctly in the CTRL register. This ++ * determines if the adapter gives priority to receives, or if it ++ * gives equal priority to transmits and receives. Valid only on ++ * 82542 and 82543 silicon. ++ */ ++ if (hw->dma_fairness && hw->mac_type <= e1000_82543) { ++ ctrl = E1000_READ_REG(hw, CTRL); ++ E1000_WRITE_REG(hw, CTRL, ctrl | E1000_CTRL_PRIOR); ++ } ++ ++ switch (hw->mac_type) { ++ case e1000_82545_rev_3: ++ case e1000_82546_rev_3: ++ break; ++ default: ++ /* Workaround for PCI-X problem when BIOS sets MMRBC incorrectly. */ ++ if (hw->bus_type == e1000_bus_type_pcix) { ++ e1000_read_pci_cfg(hw, PCIX_COMMAND_REGISTER, &pcix_cmd_word); ++ e1000_read_pci_cfg(hw, PCIX_STATUS_REGISTER_HI, ++ &pcix_stat_hi_word); ++ cmd_mmrbc = (pcix_cmd_word & PCIX_COMMAND_MMRBC_MASK) >> ++ PCIX_COMMAND_MMRBC_SHIFT; ++ stat_mmrbc = (pcix_stat_hi_word & PCIX_STATUS_HI_MMRBC_MASK) >> ++ PCIX_STATUS_HI_MMRBC_SHIFT; ++ if (stat_mmrbc == PCIX_STATUS_HI_MMRBC_4K) ++ stat_mmrbc = PCIX_STATUS_HI_MMRBC_2K; ++ if (cmd_mmrbc > stat_mmrbc) { ++ pcix_cmd_word &= ~PCIX_COMMAND_MMRBC_MASK; ++ pcix_cmd_word |= stat_mmrbc << PCIX_COMMAND_MMRBC_SHIFT; ++ e1000_write_pci_cfg(hw, PCIX_COMMAND_REGISTER, ++ &pcix_cmd_word); ++ } ++ } ++ break; ++ } ++ ++ /* More time needed for PHY to initialize */ ++ if (hw->mac_type == e1000_ich8lan) ++ msec_delay(15); ++ ++ /* Call a subroutine to configure the link and setup flow control. */ ++ ret_val = e1000_setup_link(hw); ++ ++ /* Set the transmit descriptor write-back policy */ ++ if (hw->mac_type > e1000_82544) { ++ ctrl = E1000_READ_REG(hw, TXDCTL); ++ ctrl = (ctrl & ~E1000_TXDCTL_WTHRESH) | E1000_TXDCTL_FULL_TX_DESC_WB; ++ switch (hw->mac_type) { ++ default: ++ break; ++ case e1000_82571: ++ case e1000_82572: ++ case e1000_82573: ++ case e1000_ich8lan: ++ case e1000_80003es2lan: ++ ctrl |= E1000_TXDCTL_COUNT_DESC; ++ break; ++ } ++ E1000_WRITE_REG(hw, TXDCTL, ctrl); ++ } ++ ++ if (hw->mac_type == e1000_82573) { ++ e1000_enable_tx_pkt_filtering(hw); ++ } ++ ++ switch (hw->mac_type) { ++ default: ++ break; ++ case e1000_80003es2lan: ++ /* Enable retransmit on late collisions */ ++ reg_data = E1000_READ_REG(hw, TCTL); ++ reg_data |= E1000_TCTL_RTLC; ++ E1000_WRITE_REG(hw, TCTL, reg_data); ++ ++ /* Configure Gigabit Carry Extend Padding */ ++ reg_data = E1000_READ_REG(hw, TCTL_EXT); ++ reg_data &= ~E1000_TCTL_EXT_GCEX_MASK; ++ reg_data |= DEFAULT_80003ES2LAN_TCTL_EXT_GCEX; ++ E1000_WRITE_REG(hw, TCTL_EXT, reg_data); ++ ++ /* Configure Transmit Inter-Packet Gap */ ++ reg_data = E1000_READ_REG(hw, TIPG); ++ reg_data &= ~E1000_TIPG_IPGT_MASK; ++ reg_data |= DEFAULT_80003ES2LAN_TIPG_IPGT_1000; ++ E1000_WRITE_REG(hw, TIPG, reg_data); ++ ++ reg_data = E1000_READ_REG_ARRAY(hw, FFLT, 0x0001); ++ reg_data &= ~0x00100000; ++ E1000_WRITE_REG_ARRAY(hw, FFLT, 0x0001, reg_data); ++ /* Fall through */ ++ case e1000_82571: ++ case e1000_82572: ++ case e1000_ich8lan: ++ ctrl = E1000_READ_REG(hw, TXDCTL1); ++ ctrl = (ctrl & ~E1000_TXDCTL_WTHRESH) | E1000_TXDCTL_FULL_TX_DESC_WB; ++ if (hw->mac_type >= e1000_82571) ++ ctrl |= E1000_TXDCTL_COUNT_DESC; ++ E1000_WRITE_REG(hw, TXDCTL1, ctrl); ++ break; ++ } ++ ++ ++ ++ if (hw->mac_type == e1000_82573) { ++ uint32_t gcr = E1000_READ_REG(hw, GCR); ++ gcr |= E1000_GCR_L1_ACT_WITHOUT_L0S_RX; ++ E1000_WRITE_REG(hw, GCR, gcr); ++ } ++ ++ /* Clear all of the statistics registers (clear on read). It is ++ * important that we do this after we have tried to establish link ++ * because the symbol error count will increment wildly if there ++ * is no link. ++ */ ++ e1000_clear_hw_cntrs(hw); ++ ++ /* ICH8/Nahum No-snoop bits are opposite polarity. ++ * Set to snoop by default after reset. */ ++ if (hw->mac_type == e1000_ich8lan) ++ e1000_set_pci_ex_no_snoop(hw, PCI_EX_82566_SNOOP_ALL); ++ ++ if (hw->device_id == E1000_DEV_ID_82546GB_QUAD_COPPER || ++ hw->device_id == E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3) { ++ ctrl_ext = E1000_READ_REG(hw, CTRL_EXT); ++ /* Relaxed ordering must be disabled to avoid a parity ++ * error crash in a PCI slot. */ ++ ctrl_ext |= E1000_CTRL_EXT_RO_DIS; ++ E1000_WRITE_REG(hw, CTRL_EXT, ctrl_ext); ++ } ++ ++ return ret_val; ++} ++ ++/****************************************************************************** ++ * Adjust SERDES output amplitude based on EEPROM setting. ++ * ++ * hw - Struct containing variables accessed by shared code. ++ *****************************************************************************/ ++static int32_t ++e1000_adjust_serdes_amplitude(struct e1000_hw *hw) ++{ ++ uint16_t eeprom_data; ++ int32_t ret_val; ++ ++ DEBUGFUNC("e1000_adjust_serdes_amplitude"); ++ ++ if (hw->media_type != e1000_media_type_internal_serdes) ++ return E1000_SUCCESS; ++ ++ switch (hw->mac_type) { ++ case e1000_82545_rev_3: ++ case e1000_82546_rev_3: ++ break; ++ default: ++ return E1000_SUCCESS; ++ } ++ ++ ret_val = e1000_read_eeprom(hw, EEPROM_SERDES_AMPLITUDE, 1, &eeprom_data); ++ if (ret_val) { ++ return ret_val; ++ } ++ ++ if (eeprom_data != EEPROM_RESERVED_WORD) { ++ /* Adjust SERDES output amplitude only. */ ++ eeprom_data &= EEPROM_SERDES_AMPLITUDE_MASK; ++ ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_EXT_CTRL, eeprom_data); ++ if (ret_val) ++ return ret_val; ++ } ++ ++ return E1000_SUCCESS; ++} ++ ++/****************************************************************************** ++ * Configures flow control and link settings. ++ * ++ * hw - Struct containing variables accessed by shared code ++ * ++ * Determines which flow control settings to use. Calls the apropriate media- ++ * specific link configuration function. Configures the flow control settings. ++ * Assuming the adapter has a valid link partner, a valid link should be ++ * established. Assumes the hardware has previously been reset and the ++ * transmitter and receiver are not enabled. ++ *****************************************************************************/ ++int32_t ++e1000_setup_link(struct e1000_hw *hw) ++{ ++ uint32_t ctrl_ext; ++ int32_t ret_val; ++ uint16_t eeprom_data; ++ ++ DEBUGFUNC("e1000_setup_link"); ++ ++ /* In the case of the phy reset being blocked, we already have a link. ++ * We do not have to set it up again. */ ++ if (e1000_check_phy_reset_block(hw)) ++ return E1000_SUCCESS; ++ ++ /* Read and store word 0x0F of the EEPROM. This word contains bits ++ * that determine the hardware's default PAUSE (flow control) mode, ++ * a bit that determines whether the HW defaults to enabling or ++ * disabling auto-negotiation, and the direction of the ++ * SW defined pins. If there is no SW over-ride of the flow ++ * control setting, then the variable hw->fc will ++ * be initialized based on a value in the EEPROM. ++ */ ++ if (hw->fc == e1000_fc_default) { ++ switch (hw->mac_type) { ++ case e1000_ich8lan: ++ case e1000_82573: ++ hw->fc = e1000_fc_full; ++ break; ++ default: ++ ret_val = e1000_read_eeprom(hw, EEPROM_INIT_CONTROL2_REG, ++ 1, &eeprom_data); ++ if (ret_val) { ++ DEBUGOUT("EEPROM Read Error\n"); ++ return -E1000_ERR_EEPROM; ++ } ++ if ((eeprom_data & EEPROM_WORD0F_PAUSE_MASK) == 0) ++ hw->fc = e1000_fc_none; ++ else if ((eeprom_data & EEPROM_WORD0F_PAUSE_MASK) == ++ EEPROM_WORD0F_ASM_DIR) ++ hw->fc = e1000_fc_tx_pause; ++ else ++ hw->fc = e1000_fc_full; ++ break; ++ } ++ } ++ ++ /* We want to save off the original Flow Control configuration just ++ * in case we get disconnected and then reconnected into a different ++ * hub or switch with different Flow Control capabilities. ++ */ ++ if (hw->mac_type == e1000_82542_rev2_0) ++ hw->fc &= (~e1000_fc_tx_pause); ++ ++ if ((hw->mac_type < e1000_82543) && (hw->report_tx_early == 1)) ++ hw->fc &= (~e1000_fc_rx_pause); ++ ++ hw->original_fc = hw->fc; ++ ++ DEBUGOUT1("After fix-ups FlowControl is now = %x\n", hw->fc); ++ ++ /* Take the 4 bits from EEPROM word 0x0F that determine the initial ++ * polarity value for the SW controlled pins, and setup the ++ * Extended Device Control reg with that info. ++ * This is needed because one of the SW controlled pins is used for ++ * signal detection. So this should be done before e1000_setup_pcs_link() ++ * or e1000_phy_setup() is called. ++ */ ++ if (hw->mac_type == e1000_82543) { ++ ret_val = e1000_read_eeprom(hw, EEPROM_INIT_CONTROL2_REG, ++ 1, &eeprom_data); ++ if (ret_val) { ++ DEBUGOUT("EEPROM Read Error\n"); ++ return -E1000_ERR_EEPROM; ++ } ++ ctrl_ext = ((eeprom_data & EEPROM_WORD0F_SWPDIO_EXT) << ++ SWDPIO__EXT_SHIFT); ++ E1000_WRITE_REG(hw, CTRL_EXT, ctrl_ext); ++ } ++ ++ /* Call the necessary subroutine to configure the link. */ ++ ret_val = (hw->media_type == e1000_media_type_copper) ? ++ e1000_setup_copper_link(hw) : ++ e1000_setup_fiber_serdes_link(hw); ++ ++ /* Initialize the flow control address, type, and PAUSE timer ++ * registers to their default values. This is done even if flow ++ * control is disabled, because it does not hurt anything to ++ * initialize these registers. ++ */ ++ DEBUGOUT("Initializing the Flow Control address, type and timer regs\n"); ++ ++ /* FCAL/H and FCT are hardcoded to standard values in e1000_ich8lan. */ ++ if (hw->mac_type != e1000_ich8lan) { ++ E1000_WRITE_REG(hw, FCT, FLOW_CONTROL_TYPE); ++ E1000_WRITE_REG(hw, FCAH, FLOW_CONTROL_ADDRESS_HIGH); ++ E1000_WRITE_REG(hw, FCAL, FLOW_CONTROL_ADDRESS_LOW); ++ } ++ ++ E1000_WRITE_REG(hw, FCTTV, hw->fc_pause_time); ++ ++ /* Set the flow control receive threshold registers. Normally, ++ * these registers will be set to a default threshold that may be ++ * adjusted later by the driver's runtime code. However, if the ++ * ability to transmit pause frames in not enabled, then these ++ * registers will be set to 0. ++ */ ++ if (!(hw->fc & e1000_fc_tx_pause)) { ++ E1000_WRITE_REG(hw, FCRTL, 0); ++ E1000_WRITE_REG(hw, FCRTH, 0); ++ } else { ++ /* We need to set up the Receive Threshold high and low water marks ++ * as well as (optionally) enabling the transmission of XON frames. ++ */ ++ if (hw->fc_send_xon) { ++ E1000_WRITE_REG(hw, FCRTL, (hw->fc_low_water | E1000_FCRTL_XONE)); ++ E1000_WRITE_REG(hw, FCRTH, hw->fc_high_water); ++ } else { ++ E1000_WRITE_REG(hw, FCRTL, hw->fc_low_water); ++ E1000_WRITE_REG(hw, FCRTH, hw->fc_high_water); ++ } ++ } ++ return ret_val; ++} ++ ++/****************************************************************************** ++ * Sets up link for a fiber based or serdes based adapter ++ * ++ * hw - Struct containing variables accessed by shared code ++ * ++ * Manipulates Physical Coding Sublayer functions in order to configure ++ * link. Assumes the hardware has been previously reset and the transmitter ++ * and receiver are not enabled. ++ *****************************************************************************/ ++static int32_t ++e1000_setup_fiber_serdes_link(struct e1000_hw *hw) ++{ ++ uint32_t ctrl; ++ uint32_t status; ++ uint32_t txcw = 0; ++ uint32_t i; ++ uint32_t signal = 0; ++ int32_t ret_val; ++ ++ DEBUGFUNC("e1000_setup_fiber_serdes_link"); ++ ++ /* On 82571 and 82572 Fiber connections, SerDes loopback mode persists ++ * until explicitly turned off or a power cycle is performed. A read to ++ * the register does not indicate its status. Therefore, we ensure ++ * loopback mode is disabled during initialization. ++ */ ++ if (hw->mac_type == e1000_82571 || hw->mac_type == e1000_82572) ++ E1000_WRITE_REG(hw, SCTL, E1000_DISABLE_SERDES_LOOPBACK); ++ ++ /* On adapters with a MAC newer than 82544, SW Defineable pin 1 will be ++ * set when the optics detect a signal. On older adapters, it will be ++ * cleared when there is a signal. This applies to fiber media only. ++ * If we're on serdes media, adjust the output amplitude to value set in ++ * the EEPROM. ++ */ ++ ctrl = E1000_READ_REG(hw, CTRL); ++ if (hw->media_type == e1000_media_type_fiber) ++ signal = (hw->mac_type > e1000_82544) ? E1000_CTRL_SWDPIN1 : 0; ++ ++ ret_val = e1000_adjust_serdes_amplitude(hw); ++ if (ret_val) ++ return ret_val; ++ ++ /* Take the link out of reset */ ++ ctrl &= ~(E1000_CTRL_LRST); ++ ++ /* Adjust VCO speed to improve BER performance */ ++ ret_val = e1000_set_vco_speed(hw); ++ if (ret_val) ++ return ret_val; ++ ++ e1000_config_collision_dist(hw); ++ ++ /* Check for a software override of the flow control settings, and setup ++ * the device accordingly. If auto-negotiation is enabled, then software ++ * will have to set the "PAUSE" bits to the correct value in the Tranmsit ++ * Config Word Register (TXCW) and re-start auto-negotiation. However, if ++ * auto-negotiation is disabled, then software will have to manually ++ * configure the two flow control enable bits in the CTRL register. ++ * ++ * The possible values of the "fc" parameter are: ++ * 0: Flow control is completely disabled ++ * 1: Rx flow control is enabled (we can receive pause frames, but ++ * not send pause frames). ++ * 2: Tx flow control is enabled (we can send pause frames but we do ++ * not support receiving pause frames). ++ * 3: Both Rx and TX flow control (symmetric) are enabled. ++ */ ++ switch (hw->fc) { ++ case e1000_fc_none: ++ /* Flow control is completely disabled by a software over-ride. */ ++ txcw = (E1000_TXCW_ANE | E1000_TXCW_FD); ++ break; ++ case e1000_fc_rx_pause: ++ /* RX Flow control is enabled and TX Flow control is disabled by a ++ * software over-ride. Since there really isn't a way to advertise ++ * that we are capable of RX Pause ONLY, we will advertise that we ++ * support both symmetric and asymmetric RX PAUSE. Later, we will ++ * disable the adapter's ability to send PAUSE frames. ++ */ ++ txcw = (E1000_TXCW_ANE | E1000_TXCW_FD | E1000_TXCW_PAUSE_MASK); ++ break; ++ case e1000_fc_tx_pause: ++ /* TX Flow control is enabled, and RX Flow control is disabled, by a ++ * software over-ride. ++ */ ++ txcw = (E1000_TXCW_ANE | E1000_TXCW_FD | E1000_TXCW_ASM_DIR); ++ break; ++ case e1000_fc_full: ++ /* Flow control (both RX and TX) is enabled by a software over-ride. */ ++ txcw = (E1000_TXCW_ANE | E1000_TXCW_FD | E1000_TXCW_PAUSE_MASK); ++ break; ++ default: ++ DEBUGOUT("Flow control param set incorrectly\n"); ++ return -E1000_ERR_CONFIG; ++ break; ++ } ++ ++ /* Since auto-negotiation is enabled, take the link out of reset (the link ++ * will be in reset, because we previously reset the chip). This will ++ * restart auto-negotiation. If auto-neogtiation is successful then the ++ * link-up status bit will be set and the flow control enable bits (RFCE ++ * and TFCE) will be set according to their negotiated value. ++ */ ++ DEBUGOUT("Auto-negotiation enabled\n"); ++ ++ E1000_WRITE_REG(hw, TXCW, txcw); ++ E1000_WRITE_REG(hw, CTRL, ctrl); ++ E1000_WRITE_FLUSH(hw); ++ ++ hw->txcw = txcw; ++ msec_delay(1); ++ ++ /* If we have a signal (the cable is plugged in) then poll for a "Link-Up" ++ * indication in the Device Status Register. Time-out if a link isn't ++ * seen in 500 milliseconds seconds (Auto-negotiation should complete in ++ * less than 500 milliseconds even if the other end is doing it in SW). ++ * For internal serdes, we just assume a signal is present, then poll. ++ */ ++ if (hw->media_type == e1000_media_type_internal_serdes || ++ (E1000_READ_REG(hw, CTRL) & E1000_CTRL_SWDPIN1) == signal) { ++ DEBUGOUT("Looking for Link\n"); ++ for (i = 0; i < (LINK_UP_TIMEOUT / 10); i++) { ++ msec_delay(10); ++ status = E1000_READ_REG(hw, STATUS); ++ if (status & E1000_STATUS_LU) break; ++ } ++ if (i == (LINK_UP_TIMEOUT / 10)) { ++ DEBUGOUT("Never got a valid link from auto-neg!!!\n"); ++ hw->autoneg_failed = 1; ++ /* AutoNeg failed to achieve a link, so we'll call ++ * e1000_check_for_link. This routine will force the link up if ++ * we detect a signal. This will allow us to communicate with ++ * non-autonegotiating link partners. ++ */ ++ ret_val = e1000_check_for_link(hw); ++ if (ret_val) { ++ DEBUGOUT("Error while checking for link\n"); ++ return ret_val; ++ } ++ hw->autoneg_failed = 0; ++ } else { ++ hw->autoneg_failed = 0; ++ DEBUGOUT("Valid Link Found\n"); ++ } ++ } else { ++ DEBUGOUT("No Signal Detected\n"); ++ } ++ return E1000_SUCCESS; ++} ++ ++/****************************************************************************** ++* Make sure we have a valid PHY and change PHY mode before link setup. ++* ++* hw - Struct containing variables accessed by shared code ++******************************************************************************/ ++static int32_t ++e1000_copper_link_preconfig(struct e1000_hw *hw) ++{ ++ uint32_t ctrl; ++ int32_t ret_val; ++ uint16_t phy_data; ++ ++ DEBUGFUNC("e1000_copper_link_preconfig"); ++ ++ ctrl = E1000_READ_REG(hw, CTRL); ++ /* With 82543, we need to force speed and duplex on the MAC equal to what ++ * the PHY speed and duplex configuration is. In addition, we need to ++ * perform a hardware reset on the PHY to take it out of reset. ++ */ ++ if (hw->mac_type > e1000_82543) { ++ ctrl |= E1000_CTRL_SLU; ++ ctrl &= ~(E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX); ++ E1000_WRITE_REG(hw, CTRL, ctrl); ++ } else { ++ ctrl |= (E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX | E1000_CTRL_SLU); ++ E1000_WRITE_REG(hw, CTRL, ctrl); ++ ret_val = e1000_phy_hw_reset(hw); ++ if (ret_val) ++ return ret_val; ++ } ++ ++ /* Make sure we have a valid PHY */ ++ ret_val = e1000_detect_gig_phy(hw); ++ if (ret_val) { ++ DEBUGOUT("Error, did not detect valid phy.\n"); ++ return ret_val; ++ } ++ DEBUGOUT1("Phy ID = %x \n", hw->phy_id); ++ ++ /* Set PHY to class A mode (if necessary) */ ++ ret_val = e1000_set_phy_mode(hw); ++ if (ret_val) ++ return ret_val; ++ ++ if ((hw->mac_type == e1000_82545_rev_3) || ++ (hw->mac_type == e1000_82546_rev_3)) { ++ ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data); ++ phy_data |= 0x00000008; ++ ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data); ++ } ++ ++ if (hw->mac_type <= e1000_82543 || ++ hw->mac_type == e1000_82541 || hw->mac_type == e1000_82547 || ++ hw->mac_type == e1000_82541_rev_2 || hw->mac_type == e1000_82547_rev_2) ++ hw->phy_reset_disable = FALSE; ++ ++ return E1000_SUCCESS; ++} ++ ++ ++/******************************************************************** ++* Copper link setup for e1000_phy_igp series. ++* ++* hw - Struct containing variables accessed by shared code ++*********************************************************************/ ++static int32_t ++e1000_copper_link_igp_setup(struct e1000_hw *hw) ++{ ++ uint32_t led_ctrl; ++ int32_t ret_val; ++ uint16_t phy_data; ++ ++ DEBUGFUNC("e1000_copper_link_igp_setup"); ++ ++ if (hw->phy_reset_disable) ++ return E1000_SUCCESS; ++ ++ ret_val = e1000_phy_reset(hw); ++ if (ret_val) { ++ DEBUGOUT("Error Resetting the PHY\n"); ++ return ret_val; ++ } ++ ++ /* Wait 15ms for MAC to configure PHY from eeprom settings */ ++ msec_delay(15); ++ if (hw->mac_type != e1000_ich8lan) { ++ /* Configure activity LED after PHY reset */ ++ led_ctrl = E1000_READ_REG(hw, LEDCTL); ++ led_ctrl &= IGP_ACTIVITY_LED_MASK; ++ led_ctrl |= (IGP_ACTIVITY_LED_ENABLE | IGP_LED3_MODE); ++ E1000_WRITE_REG(hw, LEDCTL, led_ctrl); ++ } ++ ++ /* disable lplu d3 during driver init */ ++ ret_val = e1000_set_d3_lplu_state(hw, FALSE); ++ if (ret_val) { ++ DEBUGOUT("Error Disabling LPLU D3\n"); ++ return ret_val; ++ } ++ ++ /* disable lplu d0 during driver init */ ++ ret_val = e1000_set_d0_lplu_state(hw, FALSE); ++ if (ret_val) { ++ DEBUGOUT("Error Disabling LPLU D0\n"); ++ return ret_val; ++ } ++ /* Configure mdi-mdix settings */ ++ ret_val = e1000_read_phy_reg(hw, IGP01E1000_PHY_PORT_CTRL, &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ if ((hw->mac_type == e1000_82541) || (hw->mac_type == e1000_82547)) { ++ hw->dsp_config_state = e1000_dsp_config_disabled; ++ /* Force MDI for earlier revs of the IGP PHY */ ++ phy_data &= ~(IGP01E1000_PSCR_AUTO_MDIX | IGP01E1000_PSCR_FORCE_MDI_MDIX); ++ hw->mdix = 1; ++ ++ } else { ++ hw->dsp_config_state = e1000_dsp_config_enabled; ++ phy_data &= ~IGP01E1000_PSCR_AUTO_MDIX; ++ ++ switch (hw->mdix) { ++ case 1: ++ phy_data &= ~IGP01E1000_PSCR_FORCE_MDI_MDIX; ++ break; ++ case 2: ++ phy_data |= IGP01E1000_PSCR_FORCE_MDI_MDIX; ++ break; ++ case 0: ++ default: ++ phy_data |= IGP01E1000_PSCR_AUTO_MDIX; ++ break; ++ } ++ } ++ ret_val = e1000_write_phy_reg(hw, IGP01E1000_PHY_PORT_CTRL, phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ /* set auto-master slave resolution settings */ ++ if (hw->autoneg) { ++ e1000_ms_type phy_ms_setting = hw->master_slave; ++ ++ if (hw->ffe_config_state == e1000_ffe_config_active) ++ hw->ffe_config_state = e1000_ffe_config_enabled; ++ ++ if (hw->dsp_config_state == e1000_dsp_config_activated) ++ hw->dsp_config_state = e1000_dsp_config_enabled; ++ ++ /* when autonegotiation advertisment is only 1000Mbps then we ++ * should disable SmartSpeed and enable Auto MasterSlave ++ * resolution as hardware default. */ ++ if (hw->autoneg_advertised == ADVERTISE_1000_FULL) { ++ /* Disable SmartSpeed */ ++ ret_val = e1000_read_phy_reg(hw, IGP01E1000_PHY_PORT_CONFIG, ++ &phy_data); ++ if (ret_val) ++ return ret_val; ++ phy_data &= ~IGP01E1000_PSCFR_SMART_SPEED; ++ ret_val = e1000_write_phy_reg(hw, IGP01E1000_PHY_PORT_CONFIG, ++ phy_data); ++ if (ret_val) ++ return ret_val; ++ /* Set auto Master/Slave resolution process */ ++ ret_val = e1000_read_phy_reg(hw, PHY_1000T_CTRL, &phy_data); ++ if (ret_val) ++ return ret_val; ++ phy_data &= ~CR_1000T_MS_ENABLE; ++ ret_val = e1000_write_phy_reg(hw, PHY_1000T_CTRL, phy_data); ++ if (ret_val) ++ return ret_val; ++ } ++ ++ ret_val = e1000_read_phy_reg(hw, PHY_1000T_CTRL, &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ /* load defaults for future use */ ++ hw->original_master_slave = (phy_data & CR_1000T_MS_ENABLE) ? ++ ((phy_data & CR_1000T_MS_VALUE) ? ++ e1000_ms_force_master : ++ e1000_ms_force_slave) : ++ e1000_ms_auto; ++ ++ switch (phy_ms_setting) { ++ case e1000_ms_force_master: ++ phy_data |= (CR_1000T_MS_ENABLE | CR_1000T_MS_VALUE); ++ break; ++ case e1000_ms_force_slave: ++ phy_data |= CR_1000T_MS_ENABLE; ++ phy_data &= ~(CR_1000T_MS_VALUE); ++ break; ++ case e1000_ms_auto: ++ phy_data &= ~CR_1000T_MS_ENABLE; ++ default: ++ break; ++ } ++ ret_val = e1000_write_phy_reg(hw, PHY_1000T_CTRL, phy_data); ++ if (ret_val) ++ return ret_val; ++ } ++ ++ return E1000_SUCCESS; ++} ++ ++/******************************************************************** ++* Copper link setup for e1000_phy_gg82563 series. ++* ++* hw - Struct containing variables accessed by shared code ++*********************************************************************/ ++static int32_t ++e1000_copper_link_ggp_setup(struct e1000_hw *hw) ++{ ++ int32_t ret_val; ++ uint16_t phy_data; ++ uint32_t reg_data; ++ ++ DEBUGFUNC("e1000_copper_link_ggp_setup"); ++ ++ if (!hw->phy_reset_disable) { ++ ++ /* Enable CRS on TX for half-duplex operation. */ ++ ret_val = e1000_read_phy_reg(hw, GG82563_PHY_MAC_SPEC_CTRL, ++ &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ phy_data |= GG82563_MSCR_ASSERT_CRS_ON_TX; ++ /* Use 25MHz for both link down and 1000BASE-T for Tx clock */ ++ phy_data |= GG82563_MSCR_TX_CLK_1000MBPS_25MHZ; ++ ++ ret_val = e1000_write_phy_reg(hw, GG82563_PHY_MAC_SPEC_CTRL, ++ phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ /* Options: ++ * MDI/MDI-X = 0 (default) ++ * 0 - Auto for all speeds ++ * 1 - MDI mode ++ * 2 - MDI-X mode ++ * 3 - Auto for 1000Base-T only (MDI-X for 10/100Base-T modes) ++ */ ++ ret_val = e1000_read_phy_reg(hw, GG82563_PHY_SPEC_CTRL, &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ phy_data &= ~GG82563_PSCR_CROSSOVER_MODE_MASK; ++ ++ switch (hw->mdix) { ++ case 1: ++ phy_data |= GG82563_PSCR_CROSSOVER_MODE_MDI; ++ break; ++ case 2: ++ phy_data |= GG82563_PSCR_CROSSOVER_MODE_MDIX; ++ break; ++ case 0: ++ default: ++ phy_data |= GG82563_PSCR_CROSSOVER_MODE_AUTO; ++ break; ++ } ++ ++ /* Options: ++ * disable_polarity_correction = 0 (default) ++ * Automatic Correction for Reversed Cable Polarity ++ * 0 - Disabled ++ * 1 - Enabled ++ */ ++ phy_data &= ~GG82563_PSCR_POLARITY_REVERSAL_DISABLE; ++ if (hw->disable_polarity_correction == 1) ++ phy_data |= GG82563_PSCR_POLARITY_REVERSAL_DISABLE; ++ ret_val = e1000_write_phy_reg(hw, GG82563_PHY_SPEC_CTRL, phy_data); ++ ++ if (ret_val) ++ return ret_val; ++ ++ /* SW Reset the PHY so all changes take effect */ ++ ret_val = e1000_phy_reset(hw); ++ if (ret_val) { ++ DEBUGOUT("Error Resetting the PHY\n"); ++ return ret_val; ++ } ++ } /* phy_reset_disable */ ++ ++ if (hw->mac_type == e1000_80003es2lan) { ++ /* Bypass RX and TX FIFO's */ ++ ret_val = e1000_write_kmrn_reg(hw, E1000_KUMCTRLSTA_OFFSET_FIFO_CTRL, ++ E1000_KUMCTRLSTA_FIFO_CTRL_RX_BYPASS | ++ E1000_KUMCTRLSTA_FIFO_CTRL_TX_BYPASS); ++ if (ret_val) ++ return ret_val; ++ ++ ret_val = e1000_read_phy_reg(hw, GG82563_PHY_SPEC_CTRL_2, &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ phy_data &= ~GG82563_PSCR2_REVERSE_AUTO_NEG; ++ ret_val = e1000_write_phy_reg(hw, GG82563_PHY_SPEC_CTRL_2, phy_data); ++ ++ if (ret_val) ++ return ret_val; ++ ++ reg_data = E1000_READ_REG(hw, CTRL_EXT); ++ reg_data &= ~(E1000_CTRL_EXT_LINK_MODE_MASK); ++ E1000_WRITE_REG(hw, CTRL_EXT, reg_data); ++ ++ ret_val = e1000_read_phy_reg(hw, GG82563_PHY_PWR_MGMT_CTRL, ++ &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ /* Do not init these registers when the HW is in IAMT mode, since the ++ * firmware will have already initialized them. We only initialize ++ * them if the HW is not in IAMT mode. ++ */ ++ if (e1000_check_mng_mode(hw) == FALSE) { ++ /* Enable Electrical Idle on the PHY */ ++ phy_data |= GG82563_PMCR_ENABLE_ELECTRICAL_IDLE; ++ ret_val = e1000_write_phy_reg(hw, GG82563_PHY_PWR_MGMT_CTRL, ++ phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ ret_val = e1000_read_phy_reg(hw, GG82563_PHY_KMRN_MODE_CTRL, ++ &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ phy_data &= ~GG82563_KMCR_PASS_FALSE_CARRIER; ++ ret_val = e1000_write_phy_reg(hw, GG82563_PHY_KMRN_MODE_CTRL, ++ phy_data); ++ ++ if (ret_val) ++ return ret_val; ++ } ++ ++ /* Workaround: Disable padding in Kumeran interface in the MAC ++ * and in the PHY to avoid CRC errors. ++ */ ++ ret_val = e1000_read_phy_reg(hw, GG82563_PHY_INBAND_CTRL, ++ &phy_data); ++ if (ret_val) ++ return ret_val; ++ phy_data |= GG82563_ICR_DIS_PADDING; ++ ret_val = e1000_write_phy_reg(hw, GG82563_PHY_INBAND_CTRL, ++ phy_data); ++ if (ret_val) ++ return ret_val; ++ } ++ ++ return E1000_SUCCESS; ++} ++ ++/******************************************************************** ++* Copper link setup for e1000_phy_m88 series. ++* ++* hw - Struct containing variables accessed by shared code ++*********************************************************************/ ++static int32_t ++e1000_copper_link_mgp_setup(struct e1000_hw *hw) ++{ ++ int32_t ret_val; ++ uint16_t phy_data; ++ ++ DEBUGFUNC("e1000_copper_link_mgp_setup"); ++ ++ if (hw->phy_reset_disable) ++ return E1000_SUCCESS; ++ ++ /* Enable CRS on TX. This must be set for half-duplex operation. */ ++ ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ phy_data |= M88E1000_PSCR_ASSERT_CRS_ON_TX; ++ ++ /* Options: ++ * MDI/MDI-X = 0 (default) ++ * 0 - Auto for all speeds ++ * 1 - MDI mode ++ * 2 - MDI-X mode ++ * 3 - Auto for 1000Base-T only (MDI-X for 10/100Base-T modes) ++ */ ++ phy_data &= ~M88E1000_PSCR_AUTO_X_MODE; ++ ++ switch (hw->mdix) { ++ case 1: ++ phy_data |= M88E1000_PSCR_MDI_MANUAL_MODE; ++ break; ++ case 2: ++ phy_data |= M88E1000_PSCR_MDIX_MANUAL_MODE; ++ break; ++ case 3: ++ phy_data |= M88E1000_PSCR_AUTO_X_1000T; ++ break; ++ case 0: ++ default: ++ phy_data |= M88E1000_PSCR_AUTO_X_MODE; ++ break; ++ } ++ ++ /* Options: ++ * disable_polarity_correction = 0 (default) ++ * Automatic Correction for Reversed Cable Polarity ++ * 0 - Disabled ++ * 1 - Enabled ++ */ ++ phy_data &= ~M88E1000_PSCR_POLARITY_REVERSAL; ++ if (hw->disable_polarity_correction == 1) ++ phy_data |= M88E1000_PSCR_POLARITY_REVERSAL; ++ ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ if (hw->phy_revision < M88E1011_I_REV_4) { ++ /* Force TX_CLK in the Extended PHY Specific Control Register ++ * to 25MHz clock. ++ */ ++ ret_val = e1000_read_phy_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL, &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ phy_data |= M88E1000_EPSCR_TX_CLK_25; ++ ++ if ((hw->phy_revision == E1000_REVISION_2) && ++ (hw->phy_id == M88E1111_I_PHY_ID)) { ++ /* Vidalia Phy, set the downshift counter to 5x */ ++ phy_data &= ~(M88EC018_EPSCR_DOWNSHIFT_COUNTER_MASK); ++ phy_data |= M88EC018_EPSCR_DOWNSHIFT_COUNTER_5X; ++ ret_val = e1000_write_phy_reg(hw, ++ M88E1000_EXT_PHY_SPEC_CTRL, phy_data); ++ if (ret_val) ++ return ret_val; ++ } else { ++ /* Configure Master and Slave downshift values */ ++ phy_data &= ~(M88E1000_EPSCR_MASTER_DOWNSHIFT_MASK | ++ M88E1000_EPSCR_SLAVE_DOWNSHIFT_MASK); ++ phy_data |= (M88E1000_EPSCR_MASTER_DOWNSHIFT_1X | ++ M88E1000_EPSCR_SLAVE_DOWNSHIFT_1X); ++ ret_val = e1000_write_phy_reg(hw, ++ M88E1000_EXT_PHY_SPEC_CTRL, phy_data); ++ if (ret_val) ++ return ret_val; ++ } ++ } ++ ++ /* SW Reset the PHY so all changes take effect */ ++ ret_val = e1000_phy_reset(hw); ++ if (ret_val) { ++ DEBUGOUT("Error Resetting the PHY\n"); ++ return ret_val; ++ } ++ ++ return E1000_SUCCESS; ++} ++ ++/******************************************************************** ++* Setup auto-negotiation and flow control advertisements, ++* and then perform auto-negotiation. ++* ++* hw - Struct containing variables accessed by shared code ++*********************************************************************/ ++static int32_t ++e1000_copper_link_autoneg(struct e1000_hw *hw) ++{ ++ int32_t ret_val; ++ uint16_t phy_data; ++ ++ DEBUGFUNC("e1000_copper_link_autoneg"); ++ ++ /* Perform some bounds checking on the hw->autoneg_advertised ++ * parameter. If this variable is zero, then set it to the default. ++ */ ++ hw->autoneg_advertised &= AUTONEG_ADVERTISE_SPEED_DEFAULT; ++ ++ /* If autoneg_advertised is zero, we assume it was not defaulted ++ * by the calling code so we set to advertise full capability. ++ */ ++ if (hw->autoneg_advertised == 0) ++ hw->autoneg_advertised = AUTONEG_ADVERTISE_SPEED_DEFAULT; ++ ++ /* IFE phy only supports 10/100 */ ++ if (hw->phy_type == e1000_phy_ife) ++ hw->autoneg_advertised &= AUTONEG_ADVERTISE_10_100_ALL; ++ ++ DEBUGOUT("Reconfiguring auto-neg advertisement params\n"); ++ ret_val = e1000_phy_setup_autoneg(hw); ++ if (ret_val) { ++ DEBUGOUT("Error Setting up Auto-Negotiation\n"); ++ return ret_val; ++ } ++ DEBUGOUT("Restarting Auto-Neg\n"); ++ ++ /* Restart auto-negotiation by setting the Auto Neg Enable bit and ++ * the Auto Neg Restart bit in the PHY control register. ++ */ ++ ret_val = e1000_read_phy_reg(hw, PHY_CTRL, &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ phy_data |= (MII_CR_AUTO_NEG_EN | MII_CR_RESTART_AUTO_NEG); ++ ret_val = e1000_write_phy_reg(hw, PHY_CTRL, phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ /* Does the user want to wait for Auto-Neg to complete here, or ++ * check at a later time (for example, callback routine). ++ */ ++ if (hw->wait_autoneg_complete) { ++ ret_val = e1000_wait_autoneg(hw); ++ if (ret_val) { ++ DEBUGOUT("Error while waiting for autoneg to complete\n"); ++ return ret_val; ++ } ++ } ++ ++ hw->get_link_status = TRUE; ++ ++ return E1000_SUCCESS; ++} ++ ++/******************************************************************** ++* Copper link setup for e1000_phy_ife (Fast Ethernet PHY) series. ++* ++* hw - Struct containing variables accessed by shared code ++*********************************************************************/ ++static int32_t ++e1000_copper_link_ife_setup(struct e1000_hw *hw) ++{ ++ if (hw->phy_reset_disable) ++ return E1000_SUCCESS; ++ return E1000_SUCCESS; ++} ++ ++/****************************************************************************** ++* Config the MAC and the PHY after link is up. ++* 1) Set up the MAC to the current PHY speed/duplex ++* if we are on 82543. If we ++* are on newer silicon, we only need to configure ++* collision distance in the Transmit Control Register. ++* 2) Set up flow control on the MAC to that established with ++* the link partner. ++* 3) Config DSP to improve Gigabit link quality for some PHY revisions. ++* ++* hw - Struct containing variables accessed by shared code ++******************************************************************************/ ++static int32_t ++e1000_copper_link_postconfig(struct e1000_hw *hw) ++{ ++ int32_t ret_val; ++ DEBUGFUNC("e1000_copper_link_postconfig"); ++ ++ if (hw->mac_type >= e1000_82544) { ++ e1000_config_collision_dist(hw); ++ } else { ++ ret_val = e1000_config_mac_to_phy(hw); ++ if (ret_val) { ++ DEBUGOUT("Error configuring MAC to PHY settings\n"); ++ return ret_val; ++ } ++ } ++ ret_val = e1000_config_fc_after_link_up(hw); ++ if (ret_val) { ++ DEBUGOUT("Error Configuring Flow Control\n"); ++ return ret_val; ++ } ++ ++ /* Config DSP to improve Giga link quality */ ++ if (hw->phy_type == e1000_phy_igp) { ++ ret_val = e1000_config_dsp_after_link_change(hw, TRUE); ++ if (ret_val) { ++ DEBUGOUT("Error Configuring DSP after link up\n"); ++ return ret_val; ++ } ++ } ++ ++ return E1000_SUCCESS; ++} ++ ++/****************************************************************************** ++* Detects which PHY is present and setup the speed and duplex ++* ++* hw - Struct containing variables accessed by shared code ++******************************************************************************/ ++static int32_t ++e1000_setup_copper_link(struct e1000_hw *hw) ++{ ++ int32_t ret_val; ++ uint16_t i; ++ uint16_t phy_data; ++ uint16_t reg_data; ++ ++ DEBUGFUNC("e1000_setup_copper_link"); ++ ++ switch (hw->mac_type) { ++ case e1000_80003es2lan: ++ case e1000_ich8lan: ++ /* Set the mac to wait the maximum time between each ++ * iteration and increase the max iterations when ++ * polling the phy; this fixes erroneous timeouts at 10Mbps. */ ++ ret_val = e1000_write_kmrn_reg(hw, GG82563_REG(0x34, 4), 0xFFFF); ++ if (ret_val) ++ return ret_val; ++ ret_val = e1000_read_kmrn_reg(hw, GG82563_REG(0x34, 9), ®_data); ++ if (ret_val) ++ return ret_val; ++ reg_data |= 0x3F; ++ ret_val = e1000_write_kmrn_reg(hw, GG82563_REG(0x34, 9), reg_data); ++ if (ret_val) ++ return ret_val; ++ default: ++ break; ++ } ++ ++ /* Check if it is a valid PHY and set PHY mode if necessary. */ ++ ret_val = e1000_copper_link_preconfig(hw); ++ if (ret_val) ++ return ret_val; ++ ++ switch (hw->mac_type) { ++ case e1000_80003es2lan: ++ /* Kumeran registers are written-only */ ++ reg_data = E1000_KUMCTRLSTA_INB_CTRL_LINK_STATUS_TX_TIMEOUT_DEFAULT; ++ reg_data |= E1000_KUMCTRLSTA_INB_CTRL_DIS_PADDING; ++ ret_val = e1000_write_kmrn_reg(hw, E1000_KUMCTRLSTA_OFFSET_INB_CTRL, ++ reg_data); ++ if (ret_val) ++ return ret_val; ++ break; ++ default: ++ break; ++ } ++ ++ if (hw->phy_type == e1000_phy_igp || ++ hw->phy_type == e1000_phy_igp_3 || ++ hw->phy_type == e1000_phy_igp_2) { ++ ret_val = e1000_copper_link_igp_setup(hw); ++ if (ret_val) ++ return ret_val; ++ } else if (hw->phy_type == e1000_phy_m88) { ++ ret_val = e1000_copper_link_mgp_setup(hw); ++ if (ret_val) ++ return ret_val; ++ } else if (hw->phy_type == e1000_phy_gg82563) { ++ ret_val = e1000_copper_link_ggp_setup(hw); ++ if (ret_val) ++ return ret_val; ++ } else if (hw->phy_type == e1000_phy_ife) { ++ ret_val = e1000_copper_link_ife_setup(hw); ++ if (ret_val) ++ return ret_val; ++ } ++ ++ if (hw->autoneg) { ++ /* Setup autoneg and flow control advertisement ++ * and perform autonegotiation */ ++ ret_val = e1000_copper_link_autoneg(hw); ++ if (ret_val) ++ return ret_val; ++ } else { ++ /* PHY will be set to 10H, 10F, 100H,or 100F ++ * depending on value from forced_speed_duplex. */ ++ DEBUGOUT("Forcing speed and duplex\n"); ++ ret_val = e1000_phy_force_speed_duplex(hw); ++ if (ret_val) { ++ DEBUGOUT("Error Forcing Speed and Duplex\n"); ++ return ret_val; ++ } ++ } ++ ++ /* Check link status. Wait up to 100 microseconds for link to become ++ * valid. ++ */ ++ for (i = 0; i < 10; i++) { ++ ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &phy_data); ++ if (ret_val) ++ return ret_val; ++ ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ if (phy_data & MII_SR_LINK_STATUS) { ++ /* Config the MAC and PHY after link is up */ ++ ret_val = e1000_copper_link_postconfig(hw); ++ if (ret_val) ++ return ret_val; ++ ++ DEBUGOUT("Valid link established!!!\n"); ++ return E1000_SUCCESS; ++ } ++ usec_delay(10); ++ } ++ ++ DEBUGOUT("Unable to establish link!!!\n"); ++ return E1000_SUCCESS; ++} ++ ++/****************************************************************************** ++* Configure the MAC-to-PHY interface for 10/100Mbps ++* ++* hw - Struct containing variables accessed by shared code ++******************************************************************************/ ++static int32_t ++e1000_configure_kmrn_for_10_100(struct e1000_hw *hw, uint16_t duplex) ++{ ++ int32_t ret_val = E1000_SUCCESS; ++ uint32_t tipg; ++ uint16_t reg_data; ++ ++ DEBUGFUNC("e1000_configure_kmrn_for_10_100"); ++ ++ reg_data = E1000_KUMCTRLSTA_HD_CTRL_10_100_DEFAULT; ++ ret_val = e1000_write_kmrn_reg(hw, E1000_KUMCTRLSTA_OFFSET_HD_CTRL, ++ reg_data); ++ if (ret_val) ++ return ret_val; ++ ++ /* Configure Transmit Inter-Packet Gap */ ++ tipg = E1000_READ_REG(hw, TIPG); ++ tipg &= ~E1000_TIPG_IPGT_MASK; ++ tipg |= DEFAULT_80003ES2LAN_TIPG_IPGT_10_100; ++ E1000_WRITE_REG(hw, TIPG, tipg); ++ ++ ret_val = e1000_read_phy_reg(hw, GG82563_PHY_KMRN_MODE_CTRL, ®_data); ++ ++ if (ret_val) ++ return ret_val; ++ ++ if (duplex == HALF_DUPLEX) ++ reg_data |= GG82563_KMCR_PASS_FALSE_CARRIER; ++ else ++ reg_data &= ~GG82563_KMCR_PASS_FALSE_CARRIER; ++ ++ ret_val = e1000_write_phy_reg(hw, GG82563_PHY_KMRN_MODE_CTRL, reg_data); ++ ++ return ret_val; ++} ++ ++static int32_t ++e1000_configure_kmrn_for_1000(struct e1000_hw *hw) ++{ ++ int32_t ret_val = E1000_SUCCESS; ++ uint16_t reg_data; ++ uint32_t tipg; ++ ++ DEBUGFUNC("e1000_configure_kmrn_for_1000"); ++ ++ reg_data = E1000_KUMCTRLSTA_HD_CTRL_1000_DEFAULT; ++ ret_val = e1000_write_kmrn_reg(hw, E1000_KUMCTRLSTA_OFFSET_HD_CTRL, ++ reg_data); ++ if (ret_val) ++ return ret_val; ++ ++ /* Configure Transmit Inter-Packet Gap */ ++ tipg = E1000_READ_REG(hw, TIPG); ++ tipg &= ~E1000_TIPG_IPGT_MASK; ++ tipg |= DEFAULT_80003ES2LAN_TIPG_IPGT_1000; ++ E1000_WRITE_REG(hw, TIPG, tipg); ++ ++ ret_val = e1000_read_phy_reg(hw, GG82563_PHY_KMRN_MODE_CTRL, ®_data); ++ ++ if (ret_val) ++ return ret_val; ++ ++ reg_data &= ~GG82563_KMCR_PASS_FALSE_CARRIER; ++ ret_val = e1000_write_phy_reg(hw, GG82563_PHY_KMRN_MODE_CTRL, reg_data); ++ ++ return ret_val; ++} ++ ++/****************************************************************************** ++* Configures PHY autoneg and flow control advertisement settings ++* ++* hw - Struct containing variables accessed by shared code ++******************************************************************************/ ++int32_t ++e1000_phy_setup_autoneg(struct e1000_hw *hw) ++{ ++ int32_t ret_val; ++ uint16_t mii_autoneg_adv_reg; ++ uint16_t mii_1000t_ctrl_reg; ++ ++ DEBUGFUNC("e1000_phy_setup_autoneg"); ++ ++ /* Read the MII Auto-Neg Advertisement Register (Address 4). */ ++ ret_val = e1000_read_phy_reg(hw, PHY_AUTONEG_ADV, &mii_autoneg_adv_reg); ++ if (ret_val) ++ return ret_val; ++ ++ if (hw->phy_type != e1000_phy_ife) { ++ /* Read the MII 1000Base-T Control Register (Address 9). */ ++ ret_val = e1000_read_phy_reg(hw, PHY_1000T_CTRL, &mii_1000t_ctrl_reg); ++ if (ret_val) ++ return ret_val; ++ } else ++ mii_1000t_ctrl_reg=0; ++ ++ /* Need to parse both autoneg_advertised and fc and set up ++ * the appropriate PHY registers. First we will parse for ++ * autoneg_advertised software override. Since we can advertise ++ * a plethora of combinations, we need to check each bit ++ * individually. ++ */ ++ ++ /* First we clear all the 10/100 mb speed bits in the Auto-Neg ++ * Advertisement Register (Address 4) and the 1000 mb speed bits in ++ * the 1000Base-T Control Register (Address 9). ++ */ ++ mii_autoneg_adv_reg &= ~REG4_SPEED_MASK; ++ mii_1000t_ctrl_reg &= ~REG9_SPEED_MASK; ++ ++ DEBUGOUT1("autoneg_advertised %x\n", hw->autoneg_advertised); ++ ++ /* Do we want to advertise 10 Mb Half Duplex? */ ++ if (hw->autoneg_advertised & ADVERTISE_10_HALF) { ++ DEBUGOUT("Advertise 10mb Half duplex\n"); ++ mii_autoneg_adv_reg |= NWAY_AR_10T_HD_CAPS; ++ } ++ ++ /* Do we want to advertise 10 Mb Full Duplex? */ ++ if (hw->autoneg_advertised & ADVERTISE_10_FULL) { ++ DEBUGOUT("Advertise 10mb Full duplex\n"); ++ mii_autoneg_adv_reg |= NWAY_AR_10T_FD_CAPS; ++ } ++ ++ /* Do we want to advertise 100 Mb Half Duplex? */ ++ if (hw->autoneg_advertised & ADVERTISE_100_HALF) { ++ DEBUGOUT("Advertise 100mb Half duplex\n"); ++ mii_autoneg_adv_reg |= NWAY_AR_100TX_HD_CAPS; ++ } ++ ++ /* Do we want to advertise 100 Mb Full Duplex? */ ++ if (hw->autoneg_advertised & ADVERTISE_100_FULL) { ++ DEBUGOUT("Advertise 100mb Full duplex\n"); ++ mii_autoneg_adv_reg |= NWAY_AR_100TX_FD_CAPS; ++ } ++ ++ /* We do not allow the Phy to advertise 1000 Mb Half Duplex */ ++ if (hw->autoneg_advertised & ADVERTISE_1000_HALF) { ++ DEBUGOUT("Advertise 1000mb Half duplex requested, request denied!\n"); ++ } ++ ++ /* Do we want to advertise 1000 Mb Full Duplex? */ ++ if (hw->autoneg_advertised & ADVERTISE_1000_FULL) { ++ DEBUGOUT("Advertise 1000mb Full duplex\n"); ++ mii_1000t_ctrl_reg |= CR_1000T_FD_CAPS; ++ if (hw->phy_type == e1000_phy_ife) { ++ DEBUGOUT("e1000_phy_ife is a 10/100 PHY. Gigabit speed is not supported.\n"); ++ } ++ } ++ ++ /* Check for a software override of the flow control settings, and ++ * setup the PHY advertisement registers accordingly. If ++ * auto-negotiation is enabled, then software will have to set the ++ * "PAUSE" bits to the correct value in the Auto-Negotiation ++ * Advertisement Register (PHY_AUTONEG_ADV) and re-start auto-negotiation. ++ * ++ * The possible values of the "fc" parameter are: ++ * 0: Flow control is completely disabled ++ * 1: Rx flow control is enabled (we can receive pause frames ++ * but not send pause frames). ++ * 2: Tx flow control is enabled (we can send pause frames ++ * but we do not support receiving pause frames). ++ * 3: Both Rx and TX flow control (symmetric) are enabled. ++ * other: No software override. The flow control configuration ++ * in the EEPROM is used. ++ */ ++ switch (hw->fc) { ++ case e1000_fc_none: /* 0 */ ++ /* Flow control (RX & TX) is completely disabled by a ++ * software over-ride. ++ */ ++ mii_autoneg_adv_reg &= ~(NWAY_AR_ASM_DIR | NWAY_AR_PAUSE); ++ break; ++ case e1000_fc_rx_pause: /* 1 */ ++ /* RX Flow control is enabled, and TX Flow control is ++ * disabled, by a software over-ride. ++ */ ++ /* Since there really isn't a way to advertise that we are ++ * capable of RX Pause ONLY, we will advertise that we ++ * support both symmetric and asymmetric RX PAUSE. Later ++ * (in e1000_config_fc_after_link_up) we will disable the ++ *hw's ability to send PAUSE frames. ++ */ ++ mii_autoneg_adv_reg |= (NWAY_AR_ASM_DIR | NWAY_AR_PAUSE); ++ break; ++ case e1000_fc_tx_pause: /* 2 */ ++ /* TX Flow control is enabled, and RX Flow control is ++ * disabled, by a software over-ride. ++ */ ++ mii_autoneg_adv_reg |= NWAY_AR_ASM_DIR; ++ mii_autoneg_adv_reg &= ~NWAY_AR_PAUSE; ++ break; ++ case e1000_fc_full: /* 3 */ ++ /* Flow control (both RX and TX) is enabled by a software ++ * over-ride. ++ */ ++ mii_autoneg_adv_reg |= (NWAY_AR_ASM_DIR | NWAY_AR_PAUSE); ++ break; ++ default: ++ DEBUGOUT("Flow control param set incorrectly\n"); ++ return -E1000_ERR_CONFIG; ++ } ++ ++ ret_val = e1000_write_phy_reg(hw, PHY_AUTONEG_ADV, mii_autoneg_adv_reg); ++ if (ret_val) ++ return ret_val; ++ ++ DEBUGOUT1("Auto-Neg Advertising %x\n", mii_autoneg_adv_reg); ++ ++ if (hw->phy_type != e1000_phy_ife) { ++ ret_val = e1000_write_phy_reg(hw, PHY_1000T_CTRL, mii_1000t_ctrl_reg); ++ if (ret_val) ++ return ret_val; ++ } ++ ++ return E1000_SUCCESS; ++} ++ ++/****************************************************************************** ++* Force PHY speed and duplex settings to hw->forced_speed_duplex ++* ++* hw - Struct containing variables accessed by shared code ++******************************************************************************/ ++static int32_t ++e1000_phy_force_speed_duplex(struct e1000_hw *hw) ++{ ++ uint32_t ctrl; ++ int32_t ret_val; ++ uint16_t mii_ctrl_reg; ++ uint16_t mii_status_reg; ++ uint16_t phy_data; ++ uint16_t i; ++ ++ DEBUGFUNC("e1000_phy_force_speed_duplex"); ++ ++ /* Turn off Flow control if we are forcing speed and duplex. */ ++ hw->fc = e1000_fc_none; ++ ++ DEBUGOUT1("hw->fc = %d\n", hw->fc); ++ ++ /* Read the Device Control Register. */ ++ ctrl = E1000_READ_REG(hw, CTRL); ++ ++ /* Set the bits to Force Speed and Duplex in the Device Ctrl Reg. */ ++ ctrl |= (E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX); ++ ctrl &= ~(DEVICE_SPEED_MASK); ++ ++ /* Clear the Auto Speed Detect Enable bit. */ ++ ctrl &= ~E1000_CTRL_ASDE; ++ ++ /* Read the MII Control Register. */ ++ ret_val = e1000_read_phy_reg(hw, PHY_CTRL, &mii_ctrl_reg); ++ if (ret_val) ++ return ret_val; ++ ++ /* We need to disable autoneg in order to force link and duplex. */ ++ ++ mii_ctrl_reg &= ~MII_CR_AUTO_NEG_EN; ++ ++ /* Are we forcing Full or Half Duplex? */ ++ if (hw->forced_speed_duplex == e1000_100_full || ++ hw->forced_speed_duplex == e1000_10_full) { ++ /* We want to force full duplex so we SET the full duplex bits in the ++ * Device and MII Control Registers. ++ */ ++ ctrl |= E1000_CTRL_FD; ++ mii_ctrl_reg |= MII_CR_FULL_DUPLEX; ++ DEBUGOUT("Full Duplex\n"); ++ } else { ++ /* We want to force half duplex so we CLEAR the full duplex bits in ++ * the Device and MII Control Registers. ++ */ ++ ctrl &= ~E1000_CTRL_FD; ++ mii_ctrl_reg &= ~MII_CR_FULL_DUPLEX; ++ DEBUGOUT("Half Duplex\n"); ++ } ++ ++ /* Are we forcing 100Mbps??? */ ++ if (hw->forced_speed_duplex == e1000_100_full || ++ hw->forced_speed_duplex == e1000_100_half) { ++ /* Set the 100Mb bit and turn off the 1000Mb and 10Mb bits. */ ++ ctrl |= E1000_CTRL_SPD_100; ++ mii_ctrl_reg |= MII_CR_SPEED_100; ++ mii_ctrl_reg &= ~(MII_CR_SPEED_1000 | MII_CR_SPEED_10); ++ DEBUGOUT("Forcing 100mb "); ++ } else { ++ /* Set the 10Mb bit and turn off the 1000Mb and 100Mb bits. */ ++ ctrl &= ~(E1000_CTRL_SPD_1000 | E1000_CTRL_SPD_100); ++ mii_ctrl_reg |= MII_CR_SPEED_10; ++ mii_ctrl_reg &= ~(MII_CR_SPEED_1000 | MII_CR_SPEED_100); ++ DEBUGOUT("Forcing 10mb "); ++ } ++ ++ e1000_config_collision_dist(hw); ++ ++ /* Write the configured values back to the Device Control Reg. */ ++ E1000_WRITE_REG(hw, CTRL, ctrl); ++ ++ if ((hw->phy_type == e1000_phy_m88) || ++ (hw->phy_type == e1000_phy_gg82563)) { ++ ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ /* Clear Auto-Crossover to force MDI manually. M88E1000 requires MDI ++ * forced whenever speed are duplex are forced. ++ */ ++ phy_data &= ~M88E1000_PSCR_AUTO_X_MODE; ++ ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ DEBUGOUT1("M88E1000 PSCR: %x \n", phy_data); ++ ++ /* Need to reset the PHY or these changes will be ignored */ ++ mii_ctrl_reg |= MII_CR_RESET; ++ /* Disable MDI-X support for 10/100 */ ++ } else if (hw->phy_type == e1000_phy_ife) { ++ ret_val = e1000_read_phy_reg(hw, IFE_PHY_MDIX_CONTROL, &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ phy_data &= ~IFE_PMC_AUTO_MDIX; ++ phy_data &= ~IFE_PMC_FORCE_MDIX; ++ ++ ret_val = e1000_write_phy_reg(hw, IFE_PHY_MDIX_CONTROL, phy_data); ++ if (ret_val) ++ return ret_val; ++ } else { ++ /* Clear Auto-Crossover to force MDI manually. IGP requires MDI ++ * forced whenever speed or duplex are forced. ++ */ ++ ret_val = e1000_read_phy_reg(hw, IGP01E1000_PHY_PORT_CTRL, &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ phy_data &= ~IGP01E1000_PSCR_AUTO_MDIX; ++ phy_data &= ~IGP01E1000_PSCR_FORCE_MDI_MDIX; ++ ++ ret_val = e1000_write_phy_reg(hw, IGP01E1000_PHY_PORT_CTRL, phy_data); ++ if (ret_val) ++ return ret_val; ++ } ++ ++ /* Write back the modified PHY MII control register. */ ++ ret_val = e1000_write_phy_reg(hw, PHY_CTRL, mii_ctrl_reg); ++ if (ret_val) ++ return ret_val; ++ ++ usec_delay(1); ++ ++ /* The wait_autoneg_complete flag may be a little misleading here. ++ * Since we are forcing speed and duplex, Auto-Neg is not enabled. ++ * But we do want to delay for a period while forcing only so we ++ * don't generate false No Link messages. So we will wait here ++ * only if the user has set wait_autoneg_complete to 1, which is ++ * the default. ++ */ ++ if (hw->wait_autoneg_complete) { ++ /* We will wait for autoneg to complete. */ ++ DEBUGOUT("Waiting for forced speed/duplex link.\n"); ++ mii_status_reg = 0; ++ ++ /* We will wait for autoneg to complete or 4.5 seconds to expire. */ ++ for (i = PHY_FORCE_TIME; i > 0; i--) { ++ /* Read the MII Status Register and wait for Auto-Neg Complete bit ++ * to be set. ++ */ ++ ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg); ++ if (ret_val) ++ return ret_val; ++ ++ ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg); ++ if (ret_val) ++ return ret_val; ++ ++ if (mii_status_reg & MII_SR_LINK_STATUS) break; ++ msec_delay(100); ++ } ++ if ((i == 0) && ++ ((hw->phy_type == e1000_phy_m88) || ++ (hw->phy_type == e1000_phy_gg82563))) { ++ /* We didn't get link. Reset the DSP and wait again for link. */ ++ ret_val = e1000_phy_reset_dsp(hw); ++ if (ret_val) { ++ DEBUGOUT("Error Resetting PHY DSP\n"); ++ return ret_val; ++ } ++ } ++ /* This loop will early-out if the link condition has been met. */ ++ for (i = PHY_FORCE_TIME; i > 0; i--) { ++ if (mii_status_reg & MII_SR_LINK_STATUS) break; ++ msec_delay(100); ++ /* Read the MII Status Register and wait for Auto-Neg Complete bit ++ * to be set. ++ */ ++ ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg); ++ if (ret_val) ++ return ret_val; ++ ++ ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg); ++ if (ret_val) ++ return ret_val; ++ } ++ } ++ ++ if (hw->phy_type == e1000_phy_m88) { ++ /* Because we reset the PHY above, we need to re-force TX_CLK in the ++ * Extended PHY Specific Control Register to 25MHz clock. This value ++ * defaults back to a 2.5MHz clock when the PHY is reset. ++ */ ++ ret_val = e1000_read_phy_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL, &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ phy_data |= M88E1000_EPSCR_TX_CLK_25; ++ ret_val = e1000_write_phy_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL, phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ /* In addition, because of the s/w reset above, we need to enable CRS on ++ * TX. This must be set for both full and half duplex operation. ++ */ ++ ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ phy_data |= M88E1000_PSCR_ASSERT_CRS_ON_TX; ++ ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ if ((hw->mac_type == e1000_82544 || hw->mac_type == e1000_82543) && ++ (!hw->autoneg) && (hw->forced_speed_duplex == e1000_10_full || ++ hw->forced_speed_duplex == e1000_10_half)) { ++ ret_val = e1000_polarity_reversal_workaround(hw); ++ if (ret_val) ++ return ret_val; ++ } ++ } else if (hw->phy_type == e1000_phy_gg82563) { ++ /* The TX_CLK of the Extended PHY Specific Control Register defaults ++ * to 2.5MHz on a reset. We need to re-force it back to 25MHz, if ++ * we're not in a forced 10/duplex configuration. */ ++ ret_val = e1000_read_phy_reg(hw, GG82563_PHY_MAC_SPEC_CTRL, &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ phy_data &= ~GG82563_MSCR_TX_CLK_MASK; ++ if ((hw->forced_speed_duplex == e1000_10_full) || ++ (hw->forced_speed_duplex == e1000_10_half)) ++ phy_data |= GG82563_MSCR_TX_CLK_10MBPS_2_5MHZ; ++ else ++ phy_data |= GG82563_MSCR_TX_CLK_100MBPS_25MHZ; ++ ++ /* Also due to the reset, we need to enable CRS on Tx. */ ++ phy_data |= GG82563_MSCR_ASSERT_CRS_ON_TX; ++ ++ ret_val = e1000_write_phy_reg(hw, GG82563_PHY_MAC_SPEC_CTRL, phy_data); ++ if (ret_val) ++ return ret_val; ++ } ++ return E1000_SUCCESS; ++} ++ ++/****************************************************************************** ++* Sets the collision distance in the Transmit Control register ++* ++* hw - Struct containing variables accessed by shared code ++* ++* Link should have been established previously. Reads the speed and duplex ++* information from the Device Status register. ++******************************************************************************/ ++void ++e1000_config_collision_dist(struct e1000_hw *hw) ++{ ++ uint32_t tctl, coll_dist; ++ ++ DEBUGFUNC("e1000_config_collision_dist"); ++ ++ if (hw->mac_type < e1000_82543) ++ coll_dist = E1000_COLLISION_DISTANCE_82542; ++ else ++ coll_dist = E1000_COLLISION_DISTANCE; ++ ++ tctl = E1000_READ_REG(hw, TCTL); ++ ++ tctl &= ~E1000_TCTL_COLD; ++ tctl |= coll_dist << E1000_COLD_SHIFT; ++ ++ E1000_WRITE_REG(hw, TCTL, tctl); ++ E1000_WRITE_FLUSH(hw); ++} ++ ++/****************************************************************************** ++* Sets MAC speed and duplex settings to reflect the those in the PHY ++* ++* hw - Struct containing variables accessed by shared code ++* mii_reg - data to write to the MII control register ++* ++* The contents of the PHY register containing the needed information need to ++* be passed in. ++******************************************************************************/ ++static int32_t ++e1000_config_mac_to_phy(struct e1000_hw *hw) ++{ ++ uint32_t ctrl; ++ int32_t ret_val; ++ uint16_t phy_data; ++ ++ DEBUGFUNC("e1000_config_mac_to_phy"); ++ ++ /* 82544 or newer MAC, Auto Speed Detection takes care of ++ * MAC speed/duplex configuration.*/ ++ if (hw->mac_type >= e1000_82544) ++ return E1000_SUCCESS; ++ ++ /* Read the Device Control Register and set the bits to Force Speed ++ * and Duplex. ++ */ ++ ctrl = E1000_READ_REG(hw, CTRL); ++ ctrl |= (E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX); ++ ctrl &= ~(E1000_CTRL_SPD_SEL | E1000_CTRL_ILOS); ++ ++ /* Set up duplex in the Device Control and Transmit Control ++ * registers depending on negotiated values. ++ */ ++ ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_STATUS, &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ if (phy_data & M88E1000_PSSR_DPLX) ++ ctrl |= E1000_CTRL_FD; ++ else ++ ctrl &= ~E1000_CTRL_FD; ++ ++ e1000_config_collision_dist(hw); ++ ++ /* Set up speed in the Device Control register depending on ++ * negotiated values. ++ */ ++ if ((phy_data & M88E1000_PSSR_SPEED) == M88E1000_PSSR_1000MBS) ++ ctrl |= E1000_CTRL_SPD_1000; ++ else if ((phy_data & M88E1000_PSSR_SPEED) == M88E1000_PSSR_100MBS) ++ ctrl |= E1000_CTRL_SPD_100; ++ ++ /* Write the configured values back to the Device Control Reg. */ ++ E1000_WRITE_REG(hw, CTRL, ctrl); ++ return E1000_SUCCESS; ++} ++ ++/****************************************************************************** ++ * Forces the MAC's flow control settings. ++ * ++ * hw - Struct containing variables accessed by shared code ++ * ++ * Sets the TFCE and RFCE bits in the device control register to reflect ++ * the adapter settings. TFCE and RFCE need to be explicitly set by ++ * software when a Copper PHY is used because autonegotiation is managed ++ * by the PHY rather than the MAC. Software must also configure these ++ * bits when link is forced on a fiber connection. ++ *****************************************************************************/ ++int32_t ++e1000_force_mac_fc(struct e1000_hw *hw) ++{ ++ uint32_t ctrl; ++ ++ DEBUGFUNC("e1000_force_mac_fc"); ++ ++ /* Get the current configuration of the Device Control Register */ ++ ctrl = E1000_READ_REG(hw, CTRL); ++ ++ /* Because we didn't get link via the internal auto-negotiation ++ * mechanism (we either forced link or we got link via PHY ++ * auto-neg), we have to manually enable/disable transmit an ++ * receive flow control. ++ * ++ * The "Case" statement below enables/disable flow control ++ * according to the "hw->fc" parameter. ++ * ++ * The possible values of the "fc" parameter are: ++ * 0: Flow control is completely disabled ++ * 1: Rx flow control is enabled (we can receive pause ++ * frames but not send pause frames). ++ * 2: Tx flow control is enabled (we can send pause frames ++ * frames but we do not receive pause frames). ++ * 3: Both Rx and TX flow control (symmetric) is enabled. ++ * other: No other values should be possible at this point. ++ */ ++ ++ switch (hw->fc) { ++ case e1000_fc_none: ++ ctrl &= (~(E1000_CTRL_TFCE | E1000_CTRL_RFCE)); ++ break; ++ case e1000_fc_rx_pause: ++ ctrl &= (~E1000_CTRL_TFCE); ++ ctrl |= E1000_CTRL_RFCE; ++ break; ++ case e1000_fc_tx_pause: ++ ctrl &= (~E1000_CTRL_RFCE); ++ ctrl |= E1000_CTRL_TFCE; ++ break; ++ case e1000_fc_full: ++ ctrl |= (E1000_CTRL_TFCE | E1000_CTRL_RFCE); ++ break; ++ default: ++ DEBUGOUT("Flow control param set incorrectly\n"); ++ return -E1000_ERR_CONFIG; ++ } ++ ++ /* Disable TX Flow Control for 82542 (rev 2.0) */ ++ if (hw->mac_type == e1000_82542_rev2_0) ++ ctrl &= (~E1000_CTRL_TFCE); ++ ++ E1000_WRITE_REG(hw, CTRL, ctrl); ++ return E1000_SUCCESS; ++} ++ ++/****************************************************************************** ++ * Configures flow control settings after link is established ++ * ++ * hw - Struct containing variables accessed by shared code ++ * ++ * Should be called immediately after a valid link has been established. ++ * Forces MAC flow control settings if link was forced. When in MII/GMII mode ++ * and autonegotiation is enabled, the MAC flow control settings will be set ++ * based on the flow control negotiated by the PHY. In TBI mode, the TFCE ++ * and RFCE bits will be automaticaly set to the negotiated flow control mode. ++ *****************************************************************************/ ++int32_t ++e1000_config_fc_after_link_up(struct e1000_hw *hw) ++{ ++ int32_t ret_val; ++ uint16_t mii_status_reg; ++ uint16_t mii_nway_adv_reg; ++ uint16_t mii_nway_lp_ability_reg; ++ uint16_t speed; ++ uint16_t duplex; ++ ++ DEBUGFUNC("e1000_config_fc_after_link_up"); ++ ++ /* Check for the case where we have fiber media and auto-neg failed ++ * so we had to force link. In this case, we need to force the ++ * configuration of the MAC to match the "fc" parameter. ++ */ ++ if (((hw->media_type == e1000_media_type_fiber) && (hw->autoneg_failed)) || ++ ((hw->media_type == e1000_media_type_internal_serdes) && ++ (hw->autoneg_failed)) || ++ ((hw->media_type == e1000_media_type_copper) && (!hw->autoneg))) { ++ ret_val = e1000_force_mac_fc(hw); ++ if (ret_val) { ++ DEBUGOUT("Error forcing flow control settings\n"); ++ return ret_val; ++ } ++ } ++ ++ /* Check for the case where we have copper media and auto-neg is ++ * enabled. In this case, we need to check and see if Auto-Neg ++ * has completed, and if so, how the PHY and link partner has ++ * flow control configured. ++ */ ++ if ((hw->media_type == e1000_media_type_copper) && hw->autoneg) { ++ /* Read the MII Status Register and check to see if AutoNeg ++ * has completed. We read this twice because this reg has ++ * some "sticky" (latched) bits. ++ */ ++ ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg); ++ if (ret_val) ++ return ret_val; ++ ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg); ++ if (ret_val) ++ return ret_val; ++ ++ if (mii_status_reg & MII_SR_AUTONEG_COMPLETE) { ++ /* The AutoNeg process has completed, so we now need to ++ * read both the Auto Negotiation Advertisement Register ++ * (Address 4) and the Auto_Negotiation Base Page Ability ++ * Register (Address 5) to determine how flow control was ++ * negotiated. ++ */ ++ ret_val = e1000_read_phy_reg(hw, PHY_AUTONEG_ADV, ++ &mii_nway_adv_reg); ++ if (ret_val) ++ return ret_val; ++ ret_val = e1000_read_phy_reg(hw, PHY_LP_ABILITY, ++ &mii_nway_lp_ability_reg); ++ if (ret_val) ++ return ret_val; ++ ++ /* Two bits in the Auto Negotiation Advertisement Register ++ * (Address 4) and two bits in the Auto Negotiation Base ++ * Page Ability Register (Address 5) determine flow control ++ * for both the PHY and the link partner. The following ++ * table, taken out of the IEEE 802.3ab/D6.0 dated March 25, ++ * 1999, describes these PAUSE resolution bits and how flow ++ * control is determined based upon these settings. ++ * NOTE: DC = Don't Care ++ * ++ * LOCAL DEVICE | LINK PARTNER ++ * PAUSE | ASM_DIR | PAUSE | ASM_DIR | NIC Resolution ++ *-------|---------|-------|---------|-------------------- ++ * 0 | 0 | DC | DC | e1000_fc_none ++ * 0 | 1 | 0 | DC | e1000_fc_none ++ * 0 | 1 | 1 | 0 | e1000_fc_none ++ * 0 | 1 | 1 | 1 | e1000_fc_tx_pause ++ * 1 | 0 | 0 | DC | e1000_fc_none ++ * 1 | DC | 1 | DC | e1000_fc_full ++ * 1 | 1 | 0 | 0 | e1000_fc_none ++ * 1 | 1 | 0 | 1 | e1000_fc_rx_pause ++ * ++ */ ++ /* Are both PAUSE bits set to 1? If so, this implies ++ * Symmetric Flow Control is enabled at both ends. The ++ * ASM_DIR bits are irrelevant per the spec. ++ * ++ * For Symmetric Flow Control: ++ * ++ * LOCAL DEVICE | LINK PARTNER ++ * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result ++ *-------|---------|-------|---------|-------------------- ++ * 1 | DC | 1 | DC | e1000_fc_full ++ * ++ */ ++ if ((mii_nway_adv_reg & NWAY_AR_PAUSE) && ++ (mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE)) { ++ /* Now we need to check if the user selected RX ONLY ++ * of pause frames. In this case, we had to advertise ++ * FULL flow control because we could not advertise RX ++ * ONLY. Hence, we must now check to see if we need to ++ * turn OFF the TRANSMISSION of PAUSE frames. ++ */ ++ if (hw->original_fc == e1000_fc_full) { ++ hw->fc = e1000_fc_full; ++ DEBUGOUT("Flow Control = FULL.\n"); ++ } else { ++ hw->fc = e1000_fc_rx_pause; ++ DEBUGOUT("Flow Control = RX PAUSE frames only.\n"); ++ } ++ } ++ /* For receiving PAUSE frames ONLY. ++ * ++ * LOCAL DEVICE | LINK PARTNER ++ * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result ++ *-------|---------|-------|---------|-------------------- ++ * 0 | 1 | 1 | 1 | e1000_fc_tx_pause ++ * ++ */ ++ else if (!(mii_nway_adv_reg & NWAY_AR_PAUSE) && ++ (mii_nway_adv_reg & NWAY_AR_ASM_DIR) && ++ (mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE) && ++ (mii_nway_lp_ability_reg & NWAY_LPAR_ASM_DIR)) { ++ hw->fc = e1000_fc_tx_pause; ++ DEBUGOUT("Flow Control = TX PAUSE frames only.\n"); ++ } ++ /* For transmitting PAUSE frames ONLY. ++ * ++ * LOCAL DEVICE | LINK PARTNER ++ * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result ++ *-------|---------|-------|---------|-------------------- ++ * 1 | 1 | 0 | 1 | e1000_fc_rx_pause ++ * ++ */ ++ else if ((mii_nway_adv_reg & NWAY_AR_PAUSE) && ++ (mii_nway_adv_reg & NWAY_AR_ASM_DIR) && ++ !(mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE) && ++ (mii_nway_lp_ability_reg & NWAY_LPAR_ASM_DIR)) { ++ hw->fc = e1000_fc_rx_pause; ++ DEBUGOUT("Flow Control = RX PAUSE frames only.\n"); ++ } ++ /* Per the IEEE spec, at this point flow control should be ++ * disabled. However, we want to consider that we could ++ * be connected to a legacy switch that doesn't advertise ++ * desired flow control, but can be forced on the link ++ * partner. So if we advertised no flow control, that is ++ * what we will resolve to. If we advertised some kind of ++ * receive capability (Rx Pause Only or Full Flow Control) ++ * and the link partner advertised none, we will configure ++ * ourselves to enable Rx Flow Control only. We can do ++ * this safely for two reasons: If the link partner really ++ * didn't want flow control enabled, and we enable Rx, no ++ * harm done since we won't be receiving any PAUSE frames ++ * anyway. If the intent on the link partner was to have ++ * flow control enabled, then by us enabling RX only, we ++ * can at least receive pause frames and process them. ++ * This is a good idea because in most cases, since we are ++ * predominantly a server NIC, more times than not we will ++ * be asked to delay transmission of packets than asking ++ * our link partner to pause transmission of frames. ++ */ ++ else if ((hw->original_fc == e1000_fc_none || ++ hw->original_fc == e1000_fc_tx_pause) || ++ hw->fc_strict_ieee) { ++ hw->fc = e1000_fc_none; ++ DEBUGOUT("Flow Control = NONE.\n"); ++ } else { ++ hw->fc = e1000_fc_rx_pause; ++ DEBUGOUT("Flow Control = RX PAUSE frames only.\n"); ++ } ++ ++ /* Now we need to do one last check... If we auto- ++ * negotiated to HALF DUPLEX, flow control should not be ++ * enabled per IEEE 802.3 spec. ++ */ ++ ret_val = e1000_get_speed_and_duplex(hw, &speed, &duplex); ++ if (ret_val) { ++ DEBUGOUT("Error getting link speed and duplex\n"); ++ return ret_val; ++ } ++ ++ if (duplex == HALF_DUPLEX) ++ hw->fc = e1000_fc_none; ++ ++ /* Now we call a subroutine to actually force the MAC ++ * controller to use the correct flow control settings. ++ */ ++ ret_val = e1000_force_mac_fc(hw); ++ if (ret_val) { ++ DEBUGOUT("Error forcing flow control settings\n"); ++ return ret_val; ++ } ++ } else { ++ DEBUGOUT("Copper PHY and Auto Neg has not completed.\n"); ++ } ++ } ++ return E1000_SUCCESS; ++} ++ ++/****************************************************************************** ++ * Checks to see if the link status of the hardware has changed. ++ * ++ * hw - Struct containing variables accessed by shared code ++ * ++ * Called by any function that needs to check the link status of the adapter. ++ *****************************************************************************/ ++int32_t ++e1000_check_for_link(struct e1000_hw *hw) ++{ ++ uint32_t rxcw = 0; ++ uint32_t ctrl; ++ uint32_t status; ++ uint32_t rctl; ++ uint32_t icr; ++ uint32_t signal = 0; ++ int32_t ret_val; ++ uint16_t phy_data; ++ ++ DEBUGFUNC("e1000_check_for_link"); ++ ++ ctrl = E1000_READ_REG(hw, CTRL); ++ status = E1000_READ_REG(hw, STATUS); ++ ++ /* On adapters with a MAC newer than 82544, SW Defineable pin 1 will be ++ * set when the optics detect a signal. On older adapters, it will be ++ * cleared when there is a signal. This applies to fiber media only. ++ */ ++ if ((hw->media_type == e1000_media_type_fiber) || ++ (hw->media_type == e1000_media_type_internal_serdes)) { ++ rxcw = E1000_READ_REG(hw, RXCW); ++ ++ if (hw->media_type == e1000_media_type_fiber) { ++ signal = (hw->mac_type > e1000_82544) ? E1000_CTRL_SWDPIN1 : 0; ++ if (status & E1000_STATUS_LU) ++ hw->get_link_status = FALSE; ++ } ++ } ++ ++ /* If we have a copper PHY then we only want to go out to the PHY ++ * registers to see if Auto-Neg has completed and/or if our link ++ * status has changed. The get_link_status flag will be set if we ++ * receive a Link Status Change interrupt or we have Rx Sequence ++ * Errors. ++ */ ++ if ((hw->media_type == e1000_media_type_copper) && hw->get_link_status) { ++ /* First we want to see if the MII Status Register reports ++ * link. If so, then we want to get the current speed/duplex ++ * of the PHY. ++ * Read the register twice since the link bit is sticky. ++ */ ++ ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &phy_data); ++ if (ret_val) ++ return ret_val; ++ ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ if (phy_data & MII_SR_LINK_STATUS) { ++ hw->get_link_status = FALSE; ++ /* Check if there was DownShift, must be checked immediately after ++ * link-up */ ++ e1000_check_downshift(hw); ++ ++ /* If we are on 82544 or 82543 silicon and speed/duplex ++ * are forced to 10H or 10F, then we will implement the polarity ++ * reversal workaround. We disable interrupts first, and upon ++ * returning, place the devices interrupt state to its previous ++ * value except for the link status change interrupt which will ++ * happen due to the execution of this workaround. ++ */ ++ ++ if ((hw->mac_type == e1000_82544 || hw->mac_type == e1000_82543) && ++ (!hw->autoneg) && ++ (hw->forced_speed_duplex == e1000_10_full || ++ hw->forced_speed_duplex == e1000_10_half)) { ++ E1000_WRITE_REG(hw, IMC, 0xffffffff); ++ ret_val = e1000_polarity_reversal_workaround(hw); ++ icr = E1000_READ_REG(hw, ICR); ++ E1000_WRITE_REG(hw, ICS, (icr & ~E1000_ICS_LSC)); ++ E1000_WRITE_REG(hw, IMS, IMS_ENABLE_MASK); ++ } ++ ++ } else { ++ /* No link detected */ ++ e1000_config_dsp_after_link_change(hw, FALSE); ++ return 0; ++ } ++ ++ /* If we are forcing speed/duplex, then we simply return since ++ * we have already determined whether we have link or not. ++ */ ++ if (!hw->autoneg) return -E1000_ERR_CONFIG; ++ ++ /* optimize the dsp settings for the igp phy */ ++ e1000_config_dsp_after_link_change(hw, TRUE); ++ ++ /* We have a M88E1000 PHY and Auto-Neg is enabled. If we ++ * have Si on board that is 82544 or newer, Auto ++ * Speed Detection takes care of MAC speed/duplex ++ * configuration. So we only need to configure Collision ++ * Distance in the MAC. Otherwise, we need to force ++ * speed/duplex on the MAC to the current PHY speed/duplex ++ * settings. ++ */ ++ if (hw->mac_type >= e1000_82544) ++ e1000_config_collision_dist(hw); ++ else { ++ ret_val = e1000_config_mac_to_phy(hw); ++ if (ret_val) { ++ DEBUGOUT("Error configuring MAC to PHY settings\n"); ++ return ret_val; ++ } ++ } ++ ++ /* Configure Flow Control now that Auto-Neg has completed. First, we ++ * need to restore the desired flow control settings because we may ++ * have had to re-autoneg with a different link partner. ++ */ ++ ret_val = e1000_config_fc_after_link_up(hw); ++ if (ret_val) { ++ DEBUGOUT("Error configuring flow control\n"); ++ return ret_val; ++ } ++ ++ /* At this point we know that we are on copper and we have ++ * auto-negotiated link. These are conditions for checking the link ++ * partner capability register. We use the link speed to determine if ++ * TBI compatibility needs to be turned on or off. If the link is not ++ * at gigabit speed, then TBI compatibility is not needed. If we are ++ * at gigabit speed, we turn on TBI compatibility. ++ */ ++ if (hw->tbi_compatibility_en) { ++ uint16_t speed, duplex; ++ ret_val = e1000_get_speed_and_duplex(hw, &speed, &duplex); ++ if (ret_val) { ++ DEBUGOUT("Error getting link speed and duplex\n"); ++ return ret_val; ++ } ++ if (speed != SPEED_1000) { ++ /* If link speed is not set to gigabit speed, we do not need ++ * to enable TBI compatibility. ++ */ ++ if (hw->tbi_compatibility_on) { ++ /* If we previously were in the mode, turn it off. */ ++ rctl = E1000_READ_REG(hw, RCTL); ++ rctl &= ~E1000_RCTL_SBP; ++ E1000_WRITE_REG(hw, RCTL, rctl); ++ hw->tbi_compatibility_on = FALSE; ++ } ++ } else { ++ /* If TBI compatibility is was previously off, turn it on. For ++ * compatibility with a TBI link partner, we will store bad ++ * packets. Some frames have an additional byte on the end and ++ * will look like CRC errors to to the hardware. ++ */ ++ if (!hw->tbi_compatibility_on) { ++ hw->tbi_compatibility_on = TRUE; ++ rctl = E1000_READ_REG(hw, RCTL); ++ rctl |= E1000_RCTL_SBP; ++ E1000_WRITE_REG(hw, RCTL, rctl); ++ } ++ } ++ } ++ } ++ /* If we don't have link (auto-negotiation failed or link partner cannot ++ * auto-negotiate), the cable is plugged in (we have signal), and our ++ * link partner is not trying to auto-negotiate with us (we are receiving ++ * idles or data), we need to force link up. We also need to give ++ * auto-negotiation time to complete, in case the cable was just plugged ++ * in. The autoneg_failed flag does this. ++ */ ++ else if ((((hw->media_type == e1000_media_type_fiber) && ++ ((ctrl & E1000_CTRL_SWDPIN1) == signal)) || ++ (hw->media_type == e1000_media_type_internal_serdes)) && ++ (!(status & E1000_STATUS_LU)) && ++ (!(rxcw & E1000_RXCW_C))) { ++ if (hw->autoneg_failed == 0) { ++ hw->autoneg_failed = 1; ++ return 0; ++ } ++ DEBUGOUT("NOT RXing /C/, disable AutoNeg and force link.\n"); ++ ++ /* Disable auto-negotiation in the TXCW register */ ++ E1000_WRITE_REG(hw, TXCW, (hw->txcw & ~E1000_TXCW_ANE)); ++ ++ /* Force link-up and also force full-duplex. */ ++ ctrl = E1000_READ_REG(hw, CTRL); ++ ctrl |= (E1000_CTRL_SLU | E1000_CTRL_FD); ++ E1000_WRITE_REG(hw, CTRL, ctrl); ++ ++ /* Configure Flow Control after forcing link up. */ ++ ret_val = e1000_config_fc_after_link_up(hw); ++ if (ret_val) { ++ DEBUGOUT("Error configuring flow control\n"); ++ return ret_val; ++ } ++ } ++ /* If we are forcing link and we are receiving /C/ ordered sets, re-enable ++ * auto-negotiation in the TXCW register and disable forced link in the ++ * Device Control register in an attempt to auto-negotiate with our link ++ * partner. ++ */ ++ else if (((hw->media_type == e1000_media_type_fiber) || ++ (hw->media_type == e1000_media_type_internal_serdes)) && ++ (ctrl & E1000_CTRL_SLU) && (rxcw & E1000_RXCW_C)) { ++ DEBUGOUT("RXing /C/, enable AutoNeg and stop forcing link.\n"); ++ E1000_WRITE_REG(hw, TXCW, hw->txcw); ++ E1000_WRITE_REG(hw, CTRL, (ctrl & ~E1000_CTRL_SLU)); ++ ++ hw->serdes_link_down = FALSE; ++ } ++ /* If we force link for non-auto-negotiation switch, check link status ++ * based on MAC synchronization for internal serdes media type. ++ */ ++ else if ((hw->media_type == e1000_media_type_internal_serdes) && ++ !(E1000_TXCW_ANE & E1000_READ_REG(hw, TXCW))) { ++ /* SYNCH bit and IV bit are sticky. */ ++ usec_delay(10); ++ if (E1000_RXCW_SYNCH & E1000_READ_REG(hw, RXCW)) { ++ if (!(rxcw & E1000_RXCW_IV)) { ++ hw->serdes_link_down = FALSE; ++ DEBUGOUT("SERDES: Link is up.\n"); ++ } ++ } else { ++ hw->serdes_link_down = TRUE; ++ DEBUGOUT("SERDES: Link is down.\n"); ++ } ++ } ++ if ((hw->media_type == e1000_media_type_internal_serdes) && ++ (E1000_TXCW_ANE & E1000_READ_REG(hw, TXCW))) { ++ hw->serdes_link_down = !(E1000_STATUS_LU & E1000_READ_REG(hw, STATUS)); ++ } ++ return E1000_SUCCESS; ++} ++ ++/****************************************************************************** ++ * Detects the current speed and duplex settings of the hardware. ++ * ++ * hw - Struct containing variables accessed by shared code ++ * speed - Speed of the connection ++ * duplex - Duplex setting of the connection ++ *****************************************************************************/ ++int32_t ++e1000_get_speed_and_duplex(struct e1000_hw *hw, ++ uint16_t *speed, ++ uint16_t *duplex) ++{ ++ uint32_t status; ++ int32_t ret_val; ++ uint16_t phy_data; ++ ++ DEBUGFUNC("e1000_get_speed_and_duplex"); ++ ++ if (hw->mac_type >= e1000_82543) { ++ status = E1000_READ_REG(hw, STATUS); ++ if (status & E1000_STATUS_SPEED_1000) { ++ *speed = SPEED_1000; ++ DEBUGOUT("1000 Mbs, "); ++ } else if (status & E1000_STATUS_SPEED_100) { ++ *speed = SPEED_100; ++ DEBUGOUT("100 Mbs, "); ++ } else { ++ *speed = SPEED_10; ++ DEBUGOUT("10 Mbs, "); ++ } ++ ++ if (status & E1000_STATUS_FD) { ++ *duplex = FULL_DUPLEX; ++ DEBUGOUT("Full Duplex\n"); ++ } else { ++ *duplex = HALF_DUPLEX; ++ DEBUGOUT(" Half Duplex\n"); ++ } ++ } else { ++ DEBUGOUT("1000 Mbs, Full Duplex\n"); ++ *speed = SPEED_1000; ++ *duplex = FULL_DUPLEX; ++ } ++ ++ /* IGP01 PHY may advertise full duplex operation after speed downgrade even ++ * if it is operating at half duplex. Here we set the duplex settings to ++ * match the duplex in the link partner's capabilities. ++ */ ++ if (hw->phy_type == e1000_phy_igp && hw->speed_downgraded) { ++ ret_val = e1000_read_phy_reg(hw, PHY_AUTONEG_EXP, &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ if (!(phy_data & NWAY_ER_LP_NWAY_CAPS)) ++ *duplex = HALF_DUPLEX; ++ else { ++ ret_val = e1000_read_phy_reg(hw, PHY_LP_ABILITY, &phy_data); ++ if (ret_val) ++ return ret_val; ++ if ((*speed == SPEED_100 && !(phy_data & NWAY_LPAR_100TX_FD_CAPS)) || ++ (*speed == SPEED_10 && !(phy_data & NWAY_LPAR_10T_FD_CAPS))) ++ *duplex = HALF_DUPLEX; ++ } ++ } ++ ++ if ((hw->mac_type == e1000_80003es2lan) && ++ (hw->media_type == e1000_media_type_copper)) { ++ if (*speed == SPEED_1000) ++ ret_val = e1000_configure_kmrn_for_1000(hw); ++ else ++ ret_val = e1000_configure_kmrn_for_10_100(hw, *duplex); ++ if (ret_val) ++ return ret_val; ++ } ++ ++ if ((hw->phy_type == e1000_phy_igp_3) && (*speed == SPEED_1000)) { ++ ret_val = e1000_kumeran_lock_loss_workaround(hw); ++ if (ret_val) ++ return ret_val; ++ } ++ ++ return E1000_SUCCESS; ++} ++ ++/****************************************************************************** ++* Blocks until autoneg completes or times out (~4.5 seconds) ++* ++* hw - Struct containing variables accessed by shared code ++******************************************************************************/ ++int32_t ++e1000_wait_autoneg(struct e1000_hw *hw) ++{ ++ int32_t ret_val; ++ uint16_t i; ++ uint16_t phy_data; ++ ++ DEBUGFUNC("e1000_wait_autoneg"); ++ DEBUGOUT("Waiting for Auto-Neg to complete.\n"); ++ ++ /* We will wait for autoneg to complete or 4.5 seconds to expire. */ ++ for (i = PHY_AUTO_NEG_TIME; i > 0; i--) { ++ /* Read the MII Status Register and wait for Auto-Neg ++ * Complete bit to be set. ++ */ ++ ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &phy_data); ++ if (ret_val) ++ return ret_val; ++ ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &phy_data); ++ if (ret_val) ++ return ret_val; ++ if (phy_data & MII_SR_AUTONEG_COMPLETE) { ++ return E1000_SUCCESS; ++ } ++ msec_delay(100); ++ } ++ return E1000_SUCCESS; ++} ++ ++/****************************************************************************** ++* Raises the Management Data Clock ++* ++* hw - Struct containing variables accessed by shared code ++* ctrl - Device control register's current value ++******************************************************************************/ ++static void ++e1000_raise_mdi_clk(struct e1000_hw *hw, ++ uint32_t *ctrl) ++{ ++ /* Raise the clock input to the Management Data Clock (by setting the MDC ++ * bit), and then delay 10 microseconds. ++ */ ++ E1000_WRITE_REG(hw, CTRL, (*ctrl | E1000_CTRL_MDC)); ++ E1000_WRITE_FLUSH(hw); ++ usec_delay(10); ++} ++ ++/****************************************************************************** ++* Lowers the Management Data Clock ++* ++* hw - Struct containing variables accessed by shared code ++* ctrl - Device control register's current value ++******************************************************************************/ ++static void ++e1000_lower_mdi_clk(struct e1000_hw *hw, ++ uint32_t *ctrl) ++{ ++ /* Lower the clock input to the Management Data Clock (by clearing the MDC ++ * bit), and then delay 10 microseconds. ++ */ ++ E1000_WRITE_REG(hw, CTRL, (*ctrl & ~E1000_CTRL_MDC)); ++ E1000_WRITE_FLUSH(hw); ++ usec_delay(10); ++} ++ ++/****************************************************************************** ++* Shifts data bits out to the PHY ++* ++* hw - Struct containing variables accessed by shared code ++* data - Data to send out to the PHY ++* count - Number of bits to shift out ++* ++* Bits are shifted out in MSB to LSB order. ++******************************************************************************/ ++static void ++e1000_shift_out_mdi_bits(struct e1000_hw *hw, ++ uint32_t data, ++ uint16_t count) ++{ ++ uint32_t ctrl; ++ uint32_t mask; ++ ++ /* We need to shift "count" number of bits out to the PHY. So, the value ++ * in the "data" parameter will be shifted out to the PHY one bit at a ++ * time. In order to do this, "data" must be broken down into bits. ++ */ ++ mask = 0x01; ++ mask <<= (count - 1); ++ ++ ctrl = E1000_READ_REG(hw, CTRL); ++ ++ /* Set MDIO_DIR and MDC_DIR direction bits to be used as output pins. */ ++ ctrl |= (E1000_CTRL_MDIO_DIR | E1000_CTRL_MDC_DIR); ++ ++ while (mask) { ++ /* A "1" is shifted out to the PHY by setting the MDIO bit to "1" and ++ * then raising and lowering the Management Data Clock. A "0" is ++ * shifted out to the PHY by setting the MDIO bit to "0" and then ++ * raising and lowering the clock. ++ */ ++ if (data & mask) ++ ctrl |= E1000_CTRL_MDIO; ++ else ++ ctrl &= ~E1000_CTRL_MDIO; ++ ++ E1000_WRITE_REG(hw, CTRL, ctrl); ++ E1000_WRITE_FLUSH(hw); ++ ++ usec_delay(10); ++ ++ e1000_raise_mdi_clk(hw, &ctrl); ++ e1000_lower_mdi_clk(hw, &ctrl); ++ ++ mask = mask >> 1; ++ } ++} ++ ++/****************************************************************************** ++* Shifts data bits in from the PHY ++* ++* hw - Struct containing variables accessed by shared code ++* ++* Bits are shifted in in MSB to LSB order. ++******************************************************************************/ ++static uint16_t ++e1000_shift_in_mdi_bits(struct e1000_hw *hw) ++{ ++ uint32_t ctrl; ++ uint16_t data = 0; ++ uint8_t i; ++ ++ /* In order to read a register from the PHY, we need to shift in a total ++ * of 18 bits from the PHY. The first two bit (turnaround) times are used ++ * to avoid contention on the MDIO pin when a read operation is performed. ++ * These two bits are ignored by us and thrown away. Bits are "shifted in" ++ * by raising the input to the Management Data Clock (setting the MDC bit), ++ * and then reading the value of the MDIO bit. ++ */ ++ ctrl = E1000_READ_REG(hw, CTRL); ++ ++ /* Clear MDIO_DIR (SWDPIO1) to indicate this bit is to be used as input. */ ++ ctrl &= ~E1000_CTRL_MDIO_DIR; ++ ctrl &= ~E1000_CTRL_MDIO; ++ ++ E1000_WRITE_REG(hw, CTRL, ctrl); ++ E1000_WRITE_FLUSH(hw); ++ ++ /* Raise and Lower the clock before reading in the data. This accounts for ++ * the turnaround bits. The first clock occurred when we clocked out the ++ * last bit of the Register Address. ++ */ ++ e1000_raise_mdi_clk(hw, &ctrl); ++ e1000_lower_mdi_clk(hw, &ctrl); ++ ++ for (data = 0, i = 0; i < 16; i++) { ++ data = data << 1; ++ e1000_raise_mdi_clk(hw, &ctrl); ++ ctrl = E1000_READ_REG(hw, CTRL); ++ /* Check to see if we shifted in a "1". */ ++ if (ctrl & E1000_CTRL_MDIO) ++ data |= 1; ++ e1000_lower_mdi_clk(hw, &ctrl); ++ } ++ ++ e1000_raise_mdi_clk(hw, &ctrl); ++ e1000_lower_mdi_clk(hw, &ctrl); ++ ++ return data; ++} ++ ++int32_t ++e1000_swfw_sync_acquire(struct e1000_hw *hw, uint16_t mask) ++{ ++ uint32_t swfw_sync = 0; ++ uint32_t swmask = mask; ++ uint32_t fwmask = mask << 16; ++ int32_t timeout = 200; ++ ++ DEBUGFUNC("e1000_swfw_sync_acquire"); ++ ++ if (hw->swfwhw_semaphore_present) ++ return e1000_get_software_flag(hw); ++ ++ if (!hw->swfw_sync_present) ++ return e1000_get_hw_eeprom_semaphore(hw); ++ ++ while (timeout) { ++ if (e1000_get_hw_eeprom_semaphore(hw)) ++ return -E1000_ERR_SWFW_SYNC; ++ ++ swfw_sync = E1000_READ_REG(hw, SW_FW_SYNC); ++ if (!(swfw_sync & (fwmask | swmask))) { ++ break; ++ } ++ ++ /* firmware currently using resource (fwmask) */ ++ /* or other software thread currently using resource (swmask) */ ++ e1000_put_hw_eeprom_semaphore(hw); ++ msec_delay_irq(5); ++ timeout--; ++ } ++ ++ if (!timeout) { ++ DEBUGOUT("Driver can't access resource, SW_FW_SYNC timeout.\n"); ++ return -E1000_ERR_SWFW_SYNC; ++ } ++ ++ swfw_sync |= swmask; ++ E1000_WRITE_REG(hw, SW_FW_SYNC, swfw_sync); ++ ++ e1000_put_hw_eeprom_semaphore(hw); ++ return E1000_SUCCESS; ++} ++ ++void ++e1000_swfw_sync_release(struct e1000_hw *hw, uint16_t mask) ++{ ++ uint32_t swfw_sync; ++ uint32_t swmask = mask; ++ ++ DEBUGFUNC("e1000_swfw_sync_release"); ++ ++ if (hw->swfwhw_semaphore_present) { ++ e1000_release_software_flag(hw); ++ return; ++ } ++ ++ if (!hw->swfw_sync_present) { ++ e1000_put_hw_eeprom_semaphore(hw); ++ return; ++ } ++ ++ /* if (e1000_get_hw_eeprom_semaphore(hw)) ++ * return -E1000_ERR_SWFW_SYNC; */ ++ while (e1000_get_hw_eeprom_semaphore(hw) != E1000_SUCCESS); ++ /* empty */ ++ ++ swfw_sync = E1000_READ_REG(hw, SW_FW_SYNC); ++ swfw_sync &= ~swmask; ++ E1000_WRITE_REG(hw, SW_FW_SYNC, swfw_sync); ++ ++ e1000_put_hw_eeprom_semaphore(hw); ++} ++ ++/***************************************************************************** ++* Reads the value from a PHY register, if the value is on a specific non zero ++* page, sets the page first. ++* hw - Struct containing variables accessed by shared code ++* reg_addr - address of the PHY register to read ++******************************************************************************/ ++int32_t ++e1000_read_phy_reg(struct e1000_hw *hw, ++ uint32_t reg_addr, ++ uint16_t *phy_data) ++{ ++ uint32_t ret_val; ++ uint16_t swfw; ++ ++ DEBUGFUNC("e1000_read_phy_reg"); ++ ++ if ((hw->mac_type == e1000_80003es2lan) && ++ (E1000_READ_REG(hw, STATUS) & E1000_STATUS_FUNC_1)) { ++ swfw = E1000_SWFW_PHY1_SM; ++ } else { ++ swfw = E1000_SWFW_PHY0_SM; ++ } ++ if (e1000_swfw_sync_acquire(hw, swfw)) ++ return -E1000_ERR_SWFW_SYNC; ++ ++ if ((hw->phy_type == e1000_phy_igp || ++ hw->phy_type == e1000_phy_igp_3 || ++ hw->phy_type == e1000_phy_igp_2) && ++ (reg_addr > MAX_PHY_MULTI_PAGE_REG)) { ++ ret_val = e1000_write_phy_reg_ex(hw, IGP01E1000_PHY_PAGE_SELECT, ++ (uint16_t)reg_addr); ++ if (ret_val) { ++ e1000_swfw_sync_release(hw, swfw); ++ return ret_val; ++ } ++ } else if (hw->phy_type == e1000_phy_gg82563) { ++ if (((reg_addr & MAX_PHY_REG_ADDRESS) > MAX_PHY_MULTI_PAGE_REG) || ++ (hw->mac_type == e1000_80003es2lan)) { ++ /* Select Configuration Page */ ++ if ((reg_addr & MAX_PHY_REG_ADDRESS) < GG82563_MIN_ALT_REG) { ++ ret_val = e1000_write_phy_reg_ex(hw, GG82563_PHY_PAGE_SELECT, ++ (uint16_t)((uint16_t)reg_addr >> GG82563_PAGE_SHIFT)); ++ } else { ++ /* Use Alternative Page Select register to access ++ * registers 30 and 31 ++ */ ++ ret_val = e1000_write_phy_reg_ex(hw, ++ GG82563_PHY_PAGE_SELECT_ALT, ++ (uint16_t)((uint16_t)reg_addr >> GG82563_PAGE_SHIFT)); ++ } ++ ++ if (ret_val) { ++ e1000_swfw_sync_release(hw, swfw); ++ return ret_val; ++ } ++ } ++ } ++ ++ ret_val = e1000_read_phy_reg_ex(hw, MAX_PHY_REG_ADDRESS & reg_addr, ++ phy_data); ++ ++ e1000_swfw_sync_release(hw, swfw); ++ return ret_val; ++} ++ ++int32_t ++e1000_read_phy_reg_ex(struct e1000_hw *hw, ++ uint32_t reg_addr, ++ uint16_t *phy_data) ++{ ++ uint32_t i; ++ uint32_t mdic = 0; ++ const uint32_t phy_addr = 1; ++ ++ DEBUGFUNC("e1000_read_phy_reg_ex"); ++ ++ if (reg_addr > MAX_PHY_REG_ADDRESS) { ++ DEBUGOUT1("PHY Address %d is out of range\n", reg_addr); ++ return -E1000_ERR_PARAM; ++ } ++ ++ if (hw->mac_type > e1000_82543) { ++ /* Set up Op-code, Phy Address, and register address in the MDI ++ * Control register. The MAC will take care of interfacing with the ++ * PHY to retrieve the desired data. ++ */ ++ mdic = ((reg_addr << E1000_MDIC_REG_SHIFT) | ++ (phy_addr << E1000_MDIC_PHY_SHIFT) | ++ (E1000_MDIC_OP_READ)); ++ ++ E1000_WRITE_REG(hw, MDIC, mdic); ++ ++ /* Poll the ready bit to see if the MDI read completed */ ++ for (i = 0; i < 64; i++) { ++ usec_delay(50); ++ mdic = E1000_READ_REG(hw, MDIC); ++ if (mdic & E1000_MDIC_READY) break; ++ } ++ if (!(mdic & E1000_MDIC_READY)) { ++ DEBUGOUT("MDI Read did not complete\n"); ++ return -E1000_ERR_PHY; ++ } ++ if (mdic & E1000_MDIC_ERROR) { ++ DEBUGOUT("MDI Error\n"); ++ return -E1000_ERR_PHY; ++ } ++ *phy_data = (uint16_t) mdic; ++ } else { ++ /* We must first send a preamble through the MDIO pin to signal the ++ * beginning of an MII instruction. This is done by sending 32 ++ * consecutive "1" bits. ++ */ ++ e1000_shift_out_mdi_bits(hw, PHY_PREAMBLE, PHY_PREAMBLE_SIZE); ++ ++ /* Now combine the next few fields that are required for a read ++ * operation. We use this method instead of calling the ++ * e1000_shift_out_mdi_bits routine five different times. The format of ++ * a MII read instruction consists of a shift out of 14 bits and is ++ * defined as follows: ++ * ++ * followed by a shift in of 18 bits. This first two bits shifted in ++ * are TurnAround bits used to avoid contention on the MDIO pin when a ++ * READ operation is performed. These two bits are thrown away ++ * followed by a shift in of 16 bits which contains the desired data. ++ */ ++ mdic = ((reg_addr) | (phy_addr << 5) | ++ (PHY_OP_READ << 10) | (PHY_SOF << 12)); ++ ++ e1000_shift_out_mdi_bits(hw, mdic, 14); ++ ++ /* Now that we've shifted out the read command to the MII, we need to ++ * "shift in" the 16-bit value (18 total bits) of the requested PHY ++ * register address. ++ */ ++ *phy_data = e1000_shift_in_mdi_bits(hw); ++ } ++ return E1000_SUCCESS; ++} ++ ++/****************************************************************************** ++* Writes a value to a PHY register ++* ++* hw - Struct containing variables accessed by shared code ++* reg_addr - address of the PHY register to write ++* data - data to write to the PHY ++******************************************************************************/ ++int32_t ++e1000_write_phy_reg(struct e1000_hw *hw, ++ uint32_t reg_addr, ++ uint16_t phy_data) ++{ ++ uint32_t ret_val; ++ uint16_t swfw; ++ ++ DEBUGFUNC("e1000_write_phy_reg"); ++ ++ if ((hw->mac_type == e1000_80003es2lan) && ++ (E1000_READ_REG(hw, STATUS) & E1000_STATUS_FUNC_1)) { ++ swfw = E1000_SWFW_PHY1_SM; ++ } else { ++ swfw = E1000_SWFW_PHY0_SM; ++ } ++ if (e1000_swfw_sync_acquire(hw, swfw)) ++ return -E1000_ERR_SWFW_SYNC; ++ ++ if ((hw->phy_type == e1000_phy_igp || ++ hw->phy_type == e1000_phy_igp_3 || ++ hw->phy_type == e1000_phy_igp_2) && ++ (reg_addr > MAX_PHY_MULTI_PAGE_REG)) { ++ ret_val = e1000_write_phy_reg_ex(hw, IGP01E1000_PHY_PAGE_SELECT, ++ (uint16_t)reg_addr); ++ if (ret_val) { ++ e1000_swfw_sync_release(hw, swfw); ++ return ret_val; ++ } ++ } else if (hw->phy_type == e1000_phy_gg82563) { ++ if (((reg_addr & MAX_PHY_REG_ADDRESS) > MAX_PHY_MULTI_PAGE_REG) || ++ (hw->mac_type == e1000_80003es2lan)) { ++ /* Select Configuration Page */ ++ if ((reg_addr & MAX_PHY_REG_ADDRESS) < GG82563_MIN_ALT_REG) { ++ ret_val = e1000_write_phy_reg_ex(hw, GG82563_PHY_PAGE_SELECT, ++ (uint16_t)((uint16_t)reg_addr >> GG82563_PAGE_SHIFT)); ++ } else { ++ /* Use Alternative Page Select register to access ++ * registers 30 and 31 ++ */ ++ ret_val = e1000_write_phy_reg_ex(hw, ++ GG82563_PHY_PAGE_SELECT_ALT, ++ (uint16_t)((uint16_t)reg_addr >> GG82563_PAGE_SHIFT)); ++ } ++ ++ if (ret_val) { ++ e1000_swfw_sync_release(hw, swfw); ++ return ret_val; ++ } ++ } ++ } ++ ++ ret_val = e1000_write_phy_reg_ex(hw, MAX_PHY_REG_ADDRESS & reg_addr, ++ phy_data); ++ ++ e1000_swfw_sync_release(hw, swfw); ++ return ret_val; ++} ++ ++int32_t ++e1000_write_phy_reg_ex(struct e1000_hw *hw, ++ uint32_t reg_addr, ++ uint16_t phy_data) ++{ ++ uint32_t i; ++ uint32_t mdic = 0; ++ const uint32_t phy_addr = 1; ++ ++ DEBUGFUNC("e1000_write_phy_reg_ex"); ++ ++ if (reg_addr > MAX_PHY_REG_ADDRESS) { ++ DEBUGOUT1("PHY Address %d is out of range\n", reg_addr); ++ return -E1000_ERR_PARAM; ++ } ++ ++ if (hw->mac_type > e1000_82543) { ++ /* Set up Op-code, Phy Address, register address, and data intended ++ * for the PHY register in the MDI Control register. The MAC will take ++ * care of interfacing with the PHY to send the desired data. ++ */ ++ mdic = (((uint32_t) phy_data) | ++ (reg_addr << E1000_MDIC_REG_SHIFT) | ++ (phy_addr << E1000_MDIC_PHY_SHIFT) | ++ (E1000_MDIC_OP_WRITE)); ++ ++ E1000_WRITE_REG(hw, MDIC, mdic); ++ ++ /* Poll the ready bit to see if the MDI read completed */ ++ for (i = 0; i < 641; i++) { ++ usec_delay(5); ++ mdic = E1000_READ_REG(hw, MDIC); ++ if (mdic & E1000_MDIC_READY) break; ++ } ++ if (!(mdic & E1000_MDIC_READY)) { ++ DEBUGOUT("MDI Write did not complete\n"); ++ return -E1000_ERR_PHY; ++ } ++ } else { ++ /* We'll need to use the SW defined pins to shift the write command ++ * out to the PHY. We first send a preamble to the PHY to signal the ++ * beginning of the MII instruction. This is done by sending 32 ++ * consecutive "1" bits. ++ */ ++ e1000_shift_out_mdi_bits(hw, PHY_PREAMBLE, PHY_PREAMBLE_SIZE); ++ ++ /* Now combine the remaining required fields that will indicate a ++ * write operation. We use this method instead of calling the ++ * e1000_shift_out_mdi_bits routine for each field in the command. The ++ * format of a MII write instruction is as follows: ++ * . ++ */ ++ mdic = ((PHY_TURNAROUND) | (reg_addr << 2) | (phy_addr << 7) | ++ (PHY_OP_WRITE << 12) | (PHY_SOF << 14)); ++ mdic <<= 16; ++ mdic |= (uint32_t) phy_data; ++ ++ e1000_shift_out_mdi_bits(hw, mdic, 32); ++ } ++ ++ return E1000_SUCCESS; ++} ++ ++int32_t ++e1000_read_kmrn_reg(struct e1000_hw *hw, ++ uint32_t reg_addr, ++ uint16_t *data) ++{ ++ uint32_t reg_val; ++ uint16_t swfw; ++ DEBUGFUNC("e1000_read_kmrn_reg"); ++ ++ if ((hw->mac_type == e1000_80003es2lan) && ++ (E1000_READ_REG(hw, STATUS) & E1000_STATUS_FUNC_1)) { ++ swfw = E1000_SWFW_PHY1_SM; ++ } else { ++ swfw = E1000_SWFW_PHY0_SM; ++ } ++ if (e1000_swfw_sync_acquire(hw, swfw)) ++ return -E1000_ERR_SWFW_SYNC; ++ ++ /* Write register address */ ++ reg_val = ((reg_addr << E1000_KUMCTRLSTA_OFFSET_SHIFT) & ++ E1000_KUMCTRLSTA_OFFSET) | ++ E1000_KUMCTRLSTA_REN; ++ E1000_WRITE_REG(hw, KUMCTRLSTA, reg_val); ++ usec_delay(2); ++ ++ /* Read the data returned */ ++ reg_val = E1000_READ_REG(hw, KUMCTRLSTA); ++ *data = (uint16_t)reg_val; ++ ++ e1000_swfw_sync_release(hw, swfw); ++ return E1000_SUCCESS; ++} ++ ++int32_t ++e1000_write_kmrn_reg(struct e1000_hw *hw, ++ uint32_t reg_addr, ++ uint16_t data) ++{ ++ uint32_t reg_val; ++ uint16_t swfw; ++ DEBUGFUNC("e1000_write_kmrn_reg"); ++ ++ if ((hw->mac_type == e1000_80003es2lan) && ++ (E1000_READ_REG(hw, STATUS) & E1000_STATUS_FUNC_1)) { ++ swfw = E1000_SWFW_PHY1_SM; ++ } else { ++ swfw = E1000_SWFW_PHY0_SM; ++ } ++ if (e1000_swfw_sync_acquire(hw, swfw)) ++ return -E1000_ERR_SWFW_SYNC; ++ ++ reg_val = ((reg_addr << E1000_KUMCTRLSTA_OFFSET_SHIFT) & ++ E1000_KUMCTRLSTA_OFFSET) | data; ++ E1000_WRITE_REG(hw, KUMCTRLSTA, reg_val); ++ usec_delay(2); ++ ++ e1000_swfw_sync_release(hw, swfw); ++ return E1000_SUCCESS; ++} ++ ++/****************************************************************************** ++* Returns the PHY to the power-on reset state ++* ++* hw - Struct containing variables accessed by shared code ++******************************************************************************/ ++int32_t ++e1000_phy_hw_reset(struct e1000_hw *hw) ++{ ++ uint32_t ctrl, ctrl_ext; ++ uint32_t led_ctrl; ++ int32_t ret_val; ++ uint16_t swfw; ++ ++ DEBUGFUNC("e1000_phy_hw_reset"); ++ ++ /* In the case of the phy reset being blocked, it's not an error, we ++ * simply return success without performing the reset. */ ++ ret_val = e1000_check_phy_reset_block(hw); ++ if (ret_val) ++ return E1000_SUCCESS; ++ ++ DEBUGOUT("Resetting Phy...\n"); ++ ++ if (hw->mac_type > e1000_82543) { ++ if ((hw->mac_type == e1000_80003es2lan) && ++ (E1000_READ_REG(hw, STATUS) & E1000_STATUS_FUNC_1)) { ++ swfw = E1000_SWFW_PHY1_SM; ++ } else { ++ swfw = E1000_SWFW_PHY0_SM; ++ } ++ if (e1000_swfw_sync_acquire(hw, swfw)) { ++ e1000_release_software_semaphore(hw); ++ return -E1000_ERR_SWFW_SYNC; ++ } ++ /* Read the device control register and assert the E1000_CTRL_PHY_RST ++ * bit. Then, take it out of reset. ++ * For pre-e1000_82571 hardware, we delay for 10ms between the assert ++ * and deassert. For e1000_82571 hardware and later, we instead delay ++ * for 50us between and 10ms after the deassertion. ++ */ ++ ctrl = E1000_READ_REG(hw, CTRL); ++ E1000_WRITE_REG(hw, CTRL, ctrl | E1000_CTRL_PHY_RST); ++ E1000_WRITE_FLUSH(hw); ++ ++ if (hw->mac_type < e1000_82571) ++ msec_delay(10); ++ else ++ usec_delay(100); ++ ++ E1000_WRITE_REG(hw, CTRL, ctrl); ++ E1000_WRITE_FLUSH(hw); ++ ++ if (hw->mac_type >= e1000_82571) ++ msec_delay_irq(10); ++ e1000_swfw_sync_release(hw, swfw); ++ } else { ++ /* Read the Extended Device Control Register, assert the PHY_RESET_DIR ++ * bit to put the PHY into reset. Then, take it out of reset. ++ */ ++ ctrl_ext = E1000_READ_REG(hw, CTRL_EXT); ++ ctrl_ext |= E1000_CTRL_EXT_SDP4_DIR; ++ ctrl_ext &= ~E1000_CTRL_EXT_SDP4_DATA; ++ E1000_WRITE_REG(hw, CTRL_EXT, ctrl_ext); ++ E1000_WRITE_FLUSH(hw); ++ msec_delay(10); ++ ctrl_ext |= E1000_CTRL_EXT_SDP4_DATA; ++ E1000_WRITE_REG(hw, CTRL_EXT, ctrl_ext); ++ E1000_WRITE_FLUSH(hw); ++ } ++ usec_delay(150); ++ ++ if ((hw->mac_type == e1000_82541) || (hw->mac_type == e1000_82547)) { ++ /* Configure activity LED after PHY reset */ ++ led_ctrl = E1000_READ_REG(hw, LEDCTL); ++ led_ctrl &= IGP_ACTIVITY_LED_MASK; ++ led_ctrl |= (IGP_ACTIVITY_LED_ENABLE | IGP_LED3_MODE); ++ E1000_WRITE_REG(hw, LEDCTL, led_ctrl); ++ } ++ ++ /* Wait for FW to finish PHY configuration. */ ++ ret_val = e1000_get_phy_cfg_done(hw); ++ e1000_release_software_semaphore(hw); ++ ++ if ((hw->mac_type == e1000_ich8lan) && ++ (hw->phy_type == e1000_phy_igp_3)) { ++ ret_val = e1000_init_lcd_from_nvm(hw); ++ if (ret_val) ++ return ret_val; ++ } ++ return ret_val; ++} ++ ++/****************************************************************************** ++* Resets the PHY ++* ++* hw - Struct containing variables accessed by shared code ++* ++* Sets bit 15 of the MII Control regiser ++******************************************************************************/ ++int32_t ++e1000_phy_reset(struct e1000_hw *hw) ++{ ++ int32_t ret_val; ++ uint16_t phy_data; ++ ++ DEBUGFUNC("e1000_phy_reset"); ++ ++ /* In the case of the phy reset being blocked, it's not an error, we ++ * simply return success without performing the reset. */ ++ ret_val = e1000_check_phy_reset_block(hw); ++ if (ret_val) ++ return E1000_SUCCESS; ++ ++ switch (hw->mac_type) { ++ case e1000_82541_rev_2: ++ case e1000_82571: ++ case e1000_82572: ++ case e1000_ich8lan: ++ ret_val = e1000_phy_hw_reset(hw); ++ if (ret_val) ++ return ret_val; ++ ++ break; ++ default: ++ ret_val = e1000_read_phy_reg(hw, PHY_CTRL, &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ phy_data |= MII_CR_RESET; ++ ret_val = e1000_write_phy_reg(hw, PHY_CTRL, phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ usec_delay(1); ++ break; ++ } ++ ++ if (hw->phy_type == e1000_phy_igp || hw->phy_type == e1000_phy_igp_2) ++ e1000_phy_init_script(hw); ++ ++ return E1000_SUCCESS; ++} ++ ++/****************************************************************************** ++* Work-around for 82566 power-down: on D3 entry- ++* 1) disable gigabit link ++* 2) write VR power-down enable ++* 3) read it back ++* if successful continue, else issue LCD reset and repeat ++* ++* hw - struct containing variables accessed by shared code ++******************************************************************************/ ++void ++e1000_phy_powerdown_workaround(struct e1000_hw *hw) ++{ ++ int32_t reg; ++ uint16_t phy_data; ++ int32_t retry = 0; ++ ++ DEBUGFUNC("e1000_phy_powerdown_workaround"); ++ ++ if (hw->phy_type != e1000_phy_igp_3) ++ return; ++ ++ do { ++ /* Disable link */ ++ reg = E1000_READ_REG(hw, PHY_CTRL); ++ E1000_WRITE_REG(hw, PHY_CTRL, reg | E1000_PHY_CTRL_GBE_DISABLE | ++ E1000_PHY_CTRL_NOND0A_GBE_DISABLE); ++ ++ /* Write VR power-down enable */ ++ e1000_read_phy_reg(hw, IGP3_VR_CTRL, &phy_data); ++ e1000_write_phy_reg(hw, IGP3_VR_CTRL, phy_data | ++ IGP3_VR_CTRL_MODE_SHUT); ++ ++ /* Read it back and test */ ++ e1000_read_phy_reg(hw, IGP3_VR_CTRL, &phy_data); ++ if ((phy_data & IGP3_VR_CTRL_MODE_SHUT) || retry) ++ break; ++ ++ /* Issue PHY reset and repeat at most one more time */ ++ reg = E1000_READ_REG(hw, CTRL); ++ E1000_WRITE_REG(hw, CTRL, reg | E1000_CTRL_PHY_RST); ++ retry++; ++ } while (retry); ++ ++ return; ++ ++} ++ ++/****************************************************************************** ++* Work-around for 82566 Kumeran PCS lock loss: ++* On link status change (i.e. PCI reset, speed change) and link is up and ++* speed is gigabit- ++* 0) if workaround is optionally disabled do nothing ++* 1) wait 1ms for Kumeran link to come up ++* 2) check Kumeran Diagnostic register PCS lock loss bit ++* 3) if not set the link is locked (all is good), otherwise... ++* 4) reset the PHY ++* 5) repeat up to 10 times ++* Note: this is only called for IGP3 copper when speed is 1gb. ++* ++* hw - struct containing variables accessed by shared code ++******************************************************************************/ ++int32_t ++e1000_kumeran_lock_loss_workaround(struct e1000_hw *hw) ++{ ++ int32_t ret_val; ++ int32_t reg; ++ int32_t cnt; ++ uint16_t phy_data; ++ ++ if (hw->kmrn_lock_loss_workaround_disabled) ++ return E1000_SUCCESS; ++ ++ /* Make sure link is up before proceeding. If not just return. ++ * Attempting this while link is negotiating fouls up link ++ * stability */ ++ ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &phy_data); ++ ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &phy_data); ++ ++ if (phy_data & MII_SR_LINK_STATUS) { ++ for (cnt = 0; cnt < 10; cnt++) { ++ /* read once to clear */ ++ ret_val = e1000_read_phy_reg(hw, IGP3_KMRN_DIAG, &phy_data); ++ if (ret_val) ++ return ret_val; ++ /* and again to get new status */ ++ ret_val = e1000_read_phy_reg(hw, IGP3_KMRN_DIAG, &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ /* check for PCS lock */ ++ if (!(phy_data & IGP3_KMRN_DIAG_PCS_LOCK_LOSS)) ++ return E1000_SUCCESS; ++ ++ /* Issue PHY reset */ ++ e1000_phy_hw_reset(hw); ++ msec_delay_irq(5); ++ } ++ /* Disable GigE link negotiation */ ++ reg = E1000_READ_REG(hw, PHY_CTRL); ++ E1000_WRITE_REG(hw, PHY_CTRL, reg | E1000_PHY_CTRL_GBE_DISABLE | ++ E1000_PHY_CTRL_NOND0A_GBE_DISABLE); ++ ++ /* unable to acquire PCS lock */ ++ return E1000_ERR_PHY; ++ } ++ ++ return E1000_SUCCESS; ++} ++ ++/****************************************************************************** ++* Probes the expected PHY address for known PHY IDs ++* ++* hw - Struct containing variables accessed by shared code ++******************************************************************************/ ++int32_t ++e1000_detect_gig_phy(struct e1000_hw *hw) ++{ ++ int32_t phy_init_status, ret_val; ++ uint16_t phy_id_high, phy_id_low; ++ boolean_t match = FALSE; ++ ++ DEBUGFUNC("e1000_detect_gig_phy"); ++ ++ /* The 82571 firmware may still be configuring the PHY. In this ++ * case, we cannot access the PHY until the configuration is done. So ++ * we explicitly set the PHY values. */ ++ if (hw->mac_type == e1000_82571 || ++ hw->mac_type == e1000_82572) { ++ hw->phy_id = IGP01E1000_I_PHY_ID; ++ hw->phy_type = e1000_phy_igp_2; ++ return E1000_SUCCESS; ++ } ++ ++ /* ESB-2 PHY reads require e1000_phy_gg82563 to be set because of a work- ++ * around that forces PHY page 0 to be set or the reads fail. The rest of ++ * the code in this routine uses e1000_read_phy_reg to read the PHY ID. ++ * So for ESB-2 we need to have this set so our reads won't fail. If the ++ * attached PHY is not a e1000_phy_gg82563, the routines below will figure ++ * this out as well. */ ++ if (hw->mac_type == e1000_80003es2lan) ++ hw->phy_type = e1000_phy_gg82563; ++ ++ /* Read the PHY ID Registers to identify which PHY is onboard. */ ++ ret_val = e1000_read_phy_reg(hw, PHY_ID1, &phy_id_high); ++ if (ret_val) ++ return ret_val; ++ ++ hw->phy_id = (uint32_t) (phy_id_high << 16); ++ usec_delay(20); ++ ret_val = e1000_read_phy_reg(hw, PHY_ID2, &phy_id_low); ++ if (ret_val) ++ return ret_val; ++ ++ hw->phy_id |= (uint32_t) (phy_id_low & PHY_REVISION_MASK); ++ hw->phy_revision = (uint32_t) phy_id_low & ~PHY_REVISION_MASK; ++ ++ switch (hw->mac_type) { ++ case e1000_82543: ++ if (hw->phy_id == M88E1000_E_PHY_ID) match = TRUE; ++ break; ++ case e1000_82544: ++ if (hw->phy_id == M88E1000_I_PHY_ID) match = TRUE; ++ break; ++ case e1000_82540: ++ case e1000_82545: ++ case e1000_82545_rev_3: ++ case e1000_82546: ++ case e1000_82546_rev_3: ++ if (hw->phy_id == M88E1011_I_PHY_ID) match = TRUE; ++ break; ++ case e1000_82541: ++ case e1000_82541_rev_2: ++ case e1000_82547: ++ case e1000_82547_rev_2: ++ if (hw->phy_id == IGP01E1000_I_PHY_ID) match = TRUE; ++ break; ++ case e1000_82573: ++ if (hw->phy_id == M88E1111_I_PHY_ID) match = TRUE; ++ break; ++ case e1000_80003es2lan: ++ if (hw->phy_id == GG82563_E_PHY_ID) match = TRUE; ++ break; ++ case e1000_ich8lan: ++ if (hw->phy_id == IGP03E1000_E_PHY_ID) match = TRUE; ++ if (hw->phy_id == IFE_E_PHY_ID) match = TRUE; ++ if (hw->phy_id == IFE_PLUS_E_PHY_ID) match = TRUE; ++ if (hw->phy_id == IFE_C_E_PHY_ID) match = TRUE; ++ break; ++ default: ++ DEBUGOUT1("Invalid MAC type %d\n", hw->mac_type); ++ return -E1000_ERR_CONFIG; ++ } ++ phy_init_status = e1000_set_phy_type(hw); ++ ++ if ((match) && (phy_init_status == E1000_SUCCESS)) { ++ DEBUGOUT1("PHY ID 0x%X detected\n", hw->phy_id); ++ return E1000_SUCCESS; ++ } ++ DEBUGOUT1("Invalid PHY ID 0x%X\n", hw->phy_id); ++ return -E1000_ERR_PHY; ++} ++ ++/****************************************************************************** ++* Resets the PHY's DSP ++* ++* hw - Struct containing variables accessed by shared code ++******************************************************************************/ ++static int32_t ++e1000_phy_reset_dsp(struct e1000_hw *hw) ++{ ++ int32_t ret_val; ++ DEBUGFUNC("e1000_phy_reset_dsp"); ++ ++ do { ++ if (hw->phy_type != e1000_phy_gg82563) { ++ ret_val = e1000_write_phy_reg(hw, 29, 0x001d); ++ if (ret_val) break; ++ } ++ ret_val = e1000_write_phy_reg(hw, 30, 0x00c1); ++ if (ret_val) break; ++ ret_val = e1000_write_phy_reg(hw, 30, 0x0000); ++ if (ret_val) break; ++ ret_val = E1000_SUCCESS; ++ } while (0); ++ ++ return ret_val; ++} ++ ++/****************************************************************************** ++* Get PHY information from various PHY registers for igp PHY only. ++* ++* hw - Struct containing variables accessed by shared code ++* phy_info - PHY information structure ++******************************************************************************/ ++int32_t ++e1000_phy_igp_get_info(struct e1000_hw *hw, ++ struct e1000_phy_info *phy_info) ++{ ++ int32_t ret_val; ++ uint16_t phy_data, polarity, min_length, max_length, average; ++ ++ DEBUGFUNC("e1000_phy_igp_get_info"); ++ ++ /* The downshift status is checked only once, after link is established, ++ * and it stored in the hw->speed_downgraded parameter. */ ++ phy_info->downshift = (e1000_downshift)hw->speed_downgraded; ++ ++ /* IGP01E1000 does not need to support it. */ ++ phy_info->extended_10bt_distance = e1000_10bt_ext_dist_enable_normal; ++ ++ /* IGP01E1000 always correct polarity reversal */ ++ phy_info->polarity_correction = e1000_polarity_reversal_enabled; ++ ++ /* Check polarity status */ ++ ret_val = e1000_check_polarity(hw, &polarity); ++ if (ret_val) ++ return ret_val; ++ ++ phy_info->cable_polarity = polarity; ++ ++ ret_val = e1000_read_phy_reg(hw, IGP01E1000_PHY_PORT_STATUS, &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ phy_info->mdix_mode = (phy_data & IGP01E1000_PSSR_MDIX) >> ++ IGP01E1000_PSSR_MDIX_SHIFT; ++ ++ if ((phy_data & IGP01E1000_PSSR_SPEED_MASK) == ++ IGP01E1000_PSSR_SPEED_1000MBPS) { ++ /* Local/Remote Receiver Information are only valid at 1000 Mbps */ ++ ret_val = e1000_read_phy_reg(hw, PHY_1000T_STATUS, &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ phy_info->local_rx = (phy_data & SR_1000T_LOCAL_RX_STATUS) >> ++ SR_1000T_LOCAL_RX_STATUS_SHIFT; ++ phy_info->remote_rx = (phy_data & SR_1000T_REMOTE_RX_STATUS) >> ++ SR_1000T_REMOTE_RX_STATUS_SHIFT; ++ ++ /* Get cable length */ ++ ret_val = e1000_get_cable_length(hw, &min_length, &max_length); ++ if (ret_val) ++ return ret_val; ++ ++ /* Translate to old method */ ++ average = (max_length + min_length) / 2; ++ ++ if (average <= e1000_igp_cable_length_50) ++ phy_info->cable_length = e1000_cable_length_50; ++ else if (average <= e1000_igp_cable_length_80) ++ phy_info->cable_length = e1000_cable_length_50_80; ++ else if (average <= e1000_igp_cable_length_110) ++ phy_info->cable_length = e1000_cable_length_80_110; ++ else if (average <= e1000_igp_cable_length_140) ++ phy_info->cable_length = e1000_cable_length_110_140; ++ else ++ phy_info->cable_length = e1000_cable_length_140; ++ } ++ ++ return E1000_SUCCESS; ++} ++ ++/****************************************************************************** ++* Get PHY information from various PHY registers for ife PHY only. ++* ++* hw - Struct containing variables accessed by shared code ++* phy_info - PHY information structure ++******************************************************************************/ ++int32_t ++e1000_phy_ife_get_info(struct e1000_hw *hw, ++ struct e1000_phy_info *phy_info) ++{ ++ int32_t ret_val; ++ uint16_t phy_data, polarity; ++ ++ DEBUGFUNC("e1000_phy_ife_get_info"); ++ ++ phy_info->downshift = (e1000_downshift)hw->speed_downgraded; ++ phy_info->extended_10bt_distance = e1000_10bt_ext_dist_enable_normal; ++ ++ ret_val = e1000_read_phy_reg(hw, IFE_PHY_SPECIAL_CONTROL, &phy_data); ++ if (ret_val) ++ return ret_val; ++ phy_info->polarity_correction = ++ (phy_data & IFE_PSC_AUTO_POLARITY_DISABLE) >> ++ IFE_PSC_AUTO_POLARITY_DISABLE_SHIFT; ++ ++ if (phy_info->polarity_correction == e1000_polarity_reversal_enabled) { ++ ret_val = e1000_check_polarity(hw, &polarity); ++ if (ret_val) ++ return ret_val; ++ } else { ++ /* Polarity is forced. */ ++ polarity = (phy_data & IFE_PSC_FORCE_POLARITY) >> ++ IFE_PSC_FORCE_POLARITY_SHIFT; ++ } ++ phy_info->cable_polarity = polarity; ++ ++ ret_val = e1000_read_phy_reg(hw, IFE_PHY_MDIX_CONTROL, &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ phy_info->mdix_mode = ++ (phy_data & (IFE_PMC_AUTO_MDIX | IFE_PMC_FORCE_MDIX)) >> ++ IFE_PMC_MDIX_MODE_SHIFT; ++ ++ return E1000_SUCCESS; ++} ++ ++/****************************************************************************** ++* Get PHY information from various PHY registers fot m88 PHY only. ++* ++* hw - Struct containing variables accessed by shared code ++* phy_info - PHY information structure ++******************************************************************************/ ++int32_t ++e1000_phy_m88_get_info(struct e1000_hw *hw, ++ struct e1000_phy_info *phy_info) ++{ ++ int32_t ret_val; ++ uint16_t phy_data, polarity; ++ ++ DEBUGFUNC("e1000_phy_m88_get_info"); ++ ++ /* The downshift status is checked only once, after link is established, ++ * and it stored in the hw->speed_downgraded parameter. */ ++ phy_info->downshift = (e1000_downshift)hw->speed_downgraded; ++ ++ ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ phy_info->extended_10bt_distance = ++ (phy_data & M88E1000_PSCR_10BT_EXT_DIST_ENABLE) >> ++ M88E1000_PSCR_10BT_EXT_DIST_ENABLE_SHIFT; ++ phy_info->polarity_correction = ++ (phy_data & M88E1000_PSCR_POLARITY_REVERSAL) >> ++ M88E1000_PSCR_POLARITY_REVERSAL_SHIFT; ++ ++ /* Check polarity status */ ++ ret_val = e1000_check_polarity(hw, &polarity); ++ if (ret_val) ++ return ret_val; ++ phy_info->cable_polarity = polarity; ++ ++ ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_STATUS, &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ phy_info->mdix_mode = (phy_data & M88E1000_PSSR_MDIX) >> ++ M88E1000_PSSR_MDIX_SHIFT; ++ ++ if ((phy_data & M88E1000_PSSR_SPEED) == M88E1000_PSSR_1000MBS) { ++ /* Cable Length Estimation and Local/Remote Receiver Information ++ * are only valid at 1000 Mbps. ++ */ ++ if (hw->phy_type != e1000_phy_gg82563) { ++ phy_info->cable_length = ((phy_data & M88E1000_PSSR_CABLE_LENGTH) >> ++ M88E1000_PSSR_CABLE_LENGTH_SHIFT); ++ } else { ++ ret_val = e1000_read_phy_reg(hw, GG82563_PHY_DSP_DISTANCE, ++ &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ phy_info->cable_length = phy_data & GG82563_DSPD_CABLE_LENGTH; ++ } ++ ++ ret_val = e1000_read_phy_reg(hw, PHY_1000T_STATUS, &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ phy_info->local_rx = (phy_data & SR_1000T_LOCAL_RX_STATUS) >> ++ SR_1000T_LOCAL_RX_STATUS_SHIFT; ++ ++ phy_info->remote_rx = (phy_data & SR_1000T_REMOTE_RX_STATUS) >> ++ SR_1000T_REMOTE_RX_STATUS_SHIFT; ++ } ++ ++ return E1000_SUCCESS; ++} ++ ++/****************************************************************************** ++* Get PHY information from various PHY registers ++* ++* hw - Struct containing variables accessed by shared code ++* phy_info - PHY information structure ++******************************************************************************/ ++int32_t ++e1000_phy_get_info(struct e1000_hw *hw, ++ struct e1000_phy_info *phy_info) ++{ ++ int32_t ret_val; ++ uint16_t phy_data; ++ ++ DEBUGFUNC("e1000_phy_get_info"); ++ ++ phy_info->cable_length = e1000_cable_length_undefined; ++ phy_info->extended_10bt_distance = e1000_10bt_ext_dist_enable_undefined; ++ phy_info->cable_polarity = e1000_rev_polarity_undefined; ++ phy_info->downshift = e1000_downshift_undefined; ++ phy_info->polarity_correction = e1000_polarity_reversal_undefined; ++ phy_info->mdix_mode = e1000_auto_x_mode_undefined; ++ phy_info->local_rx = e1000_1000t_rx_status_undefined; ++ phy_info->remote_rx = e1000_1000t_rx_status_undefined; ++ ++ if (hw->media_type != e1000_media_type_copper) { ++ DEBUGOUT("PHY info is only valid for copper media\n"); ++ return -E1000_ERR_CONFIG; ++ } ++ ++ ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ if ((phy_data & MII_SR_LINK_STATUS) != MII_SR_LINK_STATUS) { ++ DEBUGOUT("PHY info is only valid if link is up\n"); ++ return -E1000_ERR_CONFIG; ++ } ++ ++ if (hw->phy_type == e1000_phy_igp || ++ hw->phy_type == e1000_phy_igp_3 || ++ hw->phy_type == e1000_phy_igp_2) ++ return e1000_phy_igp_get_info(hw, phy_info); ++ else if (hw->phy_type == e1000_phy_ife) ++ return e1000_phy_ife_get_info(hw, phy_info); ++ else ++ return e1000_phy_m88_get_info(hw, phy_info); ++} ++ ++int32_t ++e1000_validate_mdi_setting(struct e1000_hw *hw) ++{ ++ DEBUGFUNC("e1000_validate_mdi_settings"); ++ ++ if (!hw->autoneg && (hw->mdix == 0 || hw->mdix == 3)) { ++ DEBUGOUT("Invalid MDI setting detected\n"); ++ hw->mdix = 1; ++ return -E1000_ERR_CONFIG; ++ } ++ return E1000_SUCCESS; ++} ++ ++ ++/****************************************************************************** ++ * Sets up eeprom variables in the hw struct. Must be called after mac_type ++ * is configured. Additionally, if this is ICH8, the flash controller GbE ++ * registers must be mapped, or this will crash. ++ * ++ * hw - Struct containing variables accessed by shared code ++ *****************************************************************************/ ++int32_t ++e1000_init_eeprom_params(struct e1000_hw *hw) ++{ ++ struct e1000_eeprom_info *eeprom = &hw->eeprom; ++ uint32_t eecd = E1000_READ_REG(hw, EECD); ++ int32_t ret_val = E1000_SUCCESS; ++ uint16_t eeprom_size; ++ ++ DEBUGFUNC("e1000_init_eeprom_params"); ++ ++ switch (hw->mac_type) { ++ case e1000_82542_rev2_0: ++ case e1000_82542_rev2_1: ++ case e1000_82543: ++ case e1000_82544: ++ eeprom->type = e1000_eeprom_microwire; ++ eeprom->word_size = 64; ++ eeprom->opcode_bits = 3; ++ eeprom->address_bits = 6; ++ eeprom->delay_usec = 50; ++ eeprom->use_eerd = FALSE; ++ eeprom->use_eewr = FALSE; ++ break; ++ case e1000_82540: ++ case e1000_82545: ++ case e1000_82545_rev_3: ++ case e1000_82546: ++ case e1000_82546_rev_3: ++ eeprom->type = e1000_eeprom_microwire; ++ eeprom->opcode_bits = 3; ++ eeprom->delay_usec = 50; ++ if (eecd & E1000_EECD_SIZE) { ++ eeprom->word_size = 256; ++ eeprom->address_bits = 8; ++ } else { ++ eeprom->word_size = 64; ++ eeprom->address_bits = 6; ++ } ++ eeprom->use_eerd = FALSE; ++ eeprom->use_eewr = FALSE; ++ break; ++ case e1000_82541: ++ case e1000_82541_rev_2: ++ case e1000_82547: ++ case e1000_82547_rev_2: ++ if (eecd & E1000_EECD_TYPE) { ++ eeprom->type = e1000_eeprom_spi; ++ eeprom->opcode_bits = 8; ++ eeprom->delay_usec = 1; ++ if (eecd & E1000_EECD_ADDR_BITS) { ++ eeprom->page_size = 32; ++ eeprom->address_bits = 16; ++ } else { ++ eeprom->page_size = 8; ++ eeprom->address_bits = 8; ++ } ++ } else { ++ eeprom->type = e1000_eeprom_microwire; ++ eeprom->opcode_bits = 3; ++ eeprom->delay_usec = 50; ++ if (eecd & E1000_EECD_ADDR_BITS) { ++ eeprom->word_size = 256; ++ eeprom->address_bits = 8; ++ } else { ++ eeprom->word_size = 64; ++ eeprom->address_bits = 6; ++ } ++ } ++ eeprom->use_eerd = FALSE; ++ eeprom->use_eewr = FALSE; ++ break; ++ case e1000_82571: ++ case e1000_82572: ++ eeprom->type = e1000_eeprom_spi; ++ eeprom->opcode_bits = 8; ++ eeprom->delay_usec = 1; ++ if (eecd & E1000_EECD_ADDR_BITS) { ++ eeprom->page_size = 32; ++ eeprom->address_bits = 16; ++ } else { ++ eeprom->page_size = 8; ++ eeprom->address_bits = 8; ++ } ++ eeprom->use_eerd = FALSE; ++ eeprom->use_eewr = FALSE; ++ break; ++ case e1000_82573: ++ eeprom->type = e1000_eeprom_spi; ++ eeprom->opcode_bits = 8; ++ eeprom->delay_usec = 1; ++ if (eecd & E1000_EECD_ADDR_BITS) { ++ eeprom->page_size = 32; ++ eeprom->address_bits = 16; ++ } else { ++ eeprom->page_size = 8; ++ eeprom->address_bits = 8; ++ } ++ eeprom->use_eerd = TRUE; ++ eeprom->use_eewr = TRUE; ++ if (e1000_is_onboard_nvm_eeprom(hw) == FALSE) { ++ eeprom->type = e1000_eeprom_flash; ++ eeprom->word_size = 2048; ++ ++ /* Ensure that the Autonomous FLASH update bit is cleared due to ++ * Flash update issue on parts which use a FLASH for NVM. */ ++ eecd &= ~E1000_EECD_AUPDEN; ++ E1000_WRITE_REG(hw, EECD, eecd); ++ } ++ break; ++ case e1000_80003es2lan: ++ eeprom->type = e1000_eeprom_spi; ++ eeprom->opcode_bits = 8; ++ eeprom->delay_usec = 1; ++ if (eecd & E1000_EECD_ADDR_BITS) { ++ eeprom->page_size = 32; ++ eeprom->address_bits = 16; ++ } else { ++ eeprom->page_size = 8; ++ eeprom->address_bits = 8; ++ } ++ eeprom->use_eerd = TRUE; ++ eeprom->use_eewr = FALSE; ++ break; ++ case e1000_ich8lan: ++ { ++ int32_t i = 0; ++ uint32_t flash_size = E1000_READ_ICH8_REG(hw, ICH8_FLASH_GFPREG); ++ ++ eeprom->type = e1000_eeprom_ich8; ++ eeprom->use_eerd = FALSE; ++ eeprom->use_eewr = FALSE; ++ eeprom->word_size = E1000_SHADOW_RAM_WORDS; ++ ++ /* Zero the shadow RAM structure. But don't load it from NVM ++ * so as to save time for driver init */ ++ if (hw->eeprom_shadow_ram != NULL) { ++ for (i = 0; i < E1000_SHADOW_RAM_WORDS; i++) { ++ hw->eeprom_shadow_ram[i].modified = FALSE; ++ hw->eeprom_shadow_ram[i].eeprom_word = 0xFFFF; ++ } ++ } ++ ++ hw->flash_base_addr = (flash_size & ICH8_GFPREG_BASE_MASK) * ++ ICH8_FLASH_SECTOR_SIZE; ++ ++ hw->flash_bank_size = ((flash_size >> 16) & ICH8_GFPREG_BASE_MASK) + 1; ++ hw->flash_bank_size -= (flash_size & ICH8_GFPREG_BASE_MASK); ++ hw->flash_bank_size *= ICH8_FLASH_SECTOR_SIZE; ++ hw->flash_bank_size /= 2 * sizeof(uint16_t); ++ ++ break; ++ } ++ default: ++ break; ++ } ++ ++ if (eeprom->type == e1000_eeprom_spi) { ++ /* eeprom_size will be an enum [0..8] that maps to eeprom sizes 128B to ++ * 32KB (incremented by powers of 2). ++ */ ++ if (hw->mac_type <= e1000_82547_rev_2) { ++ /* Set to default value for initial eeprom read. */ ++ eeprom->word_size = 64; ++ ret_val = e1000_read_eeprom(hw, EEPROM_CFG, 1, &eeprom_size); ++ if (ret_val) ++ return ret_val; ++ eeprom_size = (eeprom_size & EEPROM_SIZE_MASK) >> EEPROM_SIZE_SHIFT; ++ /* 256B eeprom size was not supported in earlier hardware, so we ++ * bump eeprom_size up one to ensure that "1" (which maps to 256B) ++ * is never the result used in the shifting logic below. */ ++ if (eeprom_size) ++ eeprom_size++; ++ } else { ++ eeprom_size = (uint16_t)((eecd & E1000_EECD_SIZE_EX_MASK) >> ++ E1000_EECD_SIZE_EX_SHIFT); ++ } ++ ++ eeprom->word_size = 1 << (eeprom_size + EEPROM_WORD_SIZE_SHIFT); ++ } ++ return ret_val; ++} ++ ++/****************************************************************************** ++ * Raises the EEPROM's clock input. ++ * ++ * hw - Struct containing variables accessed by shared code ++ * eecd - EECD's current value ++ *****************************************************************************/ ++static void ++e1000_raise_ee_clk(struct e1000_hw *hw, ++ uint32_t *eecd) ++{ ++ /* Raise the clock input to the EEPROM (by setting the SK bit), and then ++ * wait microseconds. ++ */ ++ *eecd = *eecd | E1000_EECD_SK; ++ E1000_WRITE_REG(hw, EECD, *eecd); ++ E1000_WRITE_FLUSH(hw); ++ usec_delay(hw->eeprom.delay_usec); ++} ++ ++/****************************************************************************** ++ * Lowers the EEPROM's clock input. ++ * ++ * hw - Struct containing variables accessed by shared code ++ * eecd - EECD's current value ++ *****************************************************************************/ ++static void ++e1000_lower_ee_clk(struct e1000_hw *hw, ++ uint32_t *eecd) ++{ ++ /* Lower the clock input to the EEPROM (by clearing the SK bit), and then ++ * wait 50 microseconds. ++ */ ++ *eecd = *eecd & ~E1000_EECD_SK; ++ E1000_WRITE_REG(hw, EECD, *eecd); ++ E1000_WRITE_FLUSH(hw); ++ usec_delay(hw->eeprom.delay_usec); ++} ++ ++/****************************************************************************** ++ * Shift data bits out to the EEPROM. ++ * ++ * hw - Struct containing variables accessed by shared code ++ * data - data to send to the EEPROM ++ * count - number of bits to shift out ++ *****************************************************************************/ ++static void ++e1000_shift_out_ee_bits(struct e1000_hw *hw, ++ uint16_t data, ++ uint16_t count) ++{ ++ struct e1000_eeprom_info *eeprom = &hw->eeprom; ++ uint32_t eecd; ++ uint32_t mask; ++ ++ /* We need to shift "count" bits out to the EEPROM. So, value in the ++ * "data" parameter will be shifted out to the EEPROM one bit at a time. ++ * In order to do this, "data" must be broken down into bits. ++ */ ++ mask = 0x01 << (count - 1); ++ eecd = E1000_READ_REG(hw, EECD); ++ if (eeprom->type == e1000_eeprom_microwire) { ++ eecd &= ~E1000_EECD_DO; ++ } else if (eeprom->type == e1000_eeprom_spi) { ++ eecd |= E1000_EECD_DO; ++ } ++ do { ++ /* A "1" is shifted out to the EEPROM by setting bit "DI" to a "1", ++ * and then raising and then lowering the clock (the SK bit controls ++ * the clock input to the EEPROM). A "0" is shifted out to the EEPROM ++ * by setting "DI" to "0" and then raising and then lowering the clock. ++ */ ++ eecd &= ~E1000_EECD_DI; ++ ++ if (data & mask) ++ eecd |= E1000_EECD_DI; ++ ++ E1000_WRITE_REG(hw, EECD, eecd); ++ E1000_WRITE_FLUSH(hw); ++ ++ usec_delay(eeprom->delay_usec); ++ ++ e1000_raise_ee_clk(hw, &eecd); ++ e1000_lower_ee_clk(hw, &eecd); ++ ++ mask = mask >> 1; ++ ++ } while (mask); ++ ++ /* We leave the "DI" bit set to "0" when we leave this routine. */ ++ eecd &= ~E1000_EECD_DI; ++ E1000_WRITE_REG(hw, EECD, eecd); ++} ++ ++/****************************************************************************** ++ * Shift data bits in from the EEPROM ++ * ++ * hw - Struct containing variables accessed by shared code ++ *****************************************************************************/ ++static uint16_t ++e1000_shift_in_ee_bits(struct e1000_hw *hw, ++ uint16_t count) ++{ ++ uint32_t eecd; ++ uint32_t i; ++ uint16_t data; ++ ++ /* In order to read a register from the EEPROM, we need to shift 'count' ++ * bits in from the EEPROM. Bits are "shifted in" by raising the clock ++ * input to the EEPROM (setting the SK bit), and then reading the value of ++ * the "DO" bit. During this "shifting in" process the "DI" bit should ++ * always be clear. ++ */ ++ ++ eecd = E1000_READ_REG(hw, EECD); ++ ++ eecd &= ~(E1000_EECD_DO | E1000_EECD_DI); ++ data = 0; ++ ++ for (i = 0; i < count; i++) { ++ data = data << 1; ++ e1000_raise_ee_clk(hw, &eecd); ++ ++ eecd = E1000_READ_REG(hw, EECD); ++ ++ eecd &= ~(E1000_EECD_DI); ++ if (eecd & E1000_EECD_DO) ++ data |= 1; ++ ++ e1000_lower_ee_clk(hw, &eecd); ++ } ++ ++ return data; ++} ++ ++/****************************************************************************** ++ * Prepares EEPROM for access ++ * ++ * hw - Struct containing variables accessed by shared code ++ * ++ * Lowers EEPROM clock. Clears input pin. Sets the chip select pin. This ++ * function should be called before issuing a command to the EEPROM. ++ *****************************************************************************/ ++static int32_t ++e1000_acquire_eeprom(struct e1000_hw *hw) ++{ ++ struct e1000_eeprom_info *eeprom = &hw->eeprom; ++ uint32_t eecd, i=0; ++ ++ DEBUGFUNC("e1000_acquire_eeprom"); ++ ++ if (e1000_swfw_sync_acquire(hw, E1000_SWFW_EEP_SM)) ++ return -E1000_ERR_SWFW_SYNC; ++ eecd = E1000_READ_REG(hw, EECD); ++ ++ if (hw->mac_type != e1000_82573) { ++ /* Request EEPROM Access */ ++ if (hw->mac_type > e1000_82544) { ++ eecd |= E1000_EECD_REQ; ++ E1000_WRITE_REG(hw, EECD, eecd); ++ eecd = E1000_READ_REG(hw, EECD); ++ while ((!(eecd & E1000_EECD_GNT)) && ++ (i < E1000_EEPROM_GRANT_ATTEMPTS)) { ++ i++; ++ usec_delay(5); ++ eecd = E1000_READ_REG(hw, EECD); ++ } ++ if (!(eecd & E1000_EECD_GNT)) { ++ eecd &= ~E1000_EECD_REQ; ++ E1000_WRITE_REG(hw, EECD, eecd); ++ DEBUGOUT("Could not acquire EEPROM grant\n"); ++ e1000_swfw_sync_release(hw, E1000_SWFW_EEP_SM); ++ return -E1000_ERR_EEPROM; ++ } ++ } ++ } ++ ++ /* Setup EEPROM for Read/Write */ ++ ++ if (eeprom->type == e1000_eeprom_microwire) { ++ /* Clear SK and DI */ ++ eecd &= ~(E1000_EECD_DI | E1000_EECD_SK); ++ E1000_WRITE_REG(hw, EECD, eecd); ++ ++ /* Set CS */ ++ eecd |= E1000_EECD_CS; ++ E1000_WRITE_REG(hw, EECD, eecd); ++ } else if (eeprom->type == e1000_eeprom_spi) { ++ /* Clear SK and CS */ ++ eecd &= ~(E1000_EECD_CS | E1000_EECD_SK); ++ E1000_WRITE_REG(hw, EECD, eecd); ++ usec_delay(1); ++ } ++ ++ return E1000_SUCCESS; ++} ++ ++/****************************************************************************** ++ * Returns EEPROM to a "standby" state ++ * ++ * hw - Struct containing variables accessed by shared code ++ *****************************************************************************/ ++static void ++e1000_standby_eeprom(struct e1000_hw *hw) ++{ ++ struct e1000_eeprom_info *eeprom = &hw->eeprom; ++ uint32_t eecd; ++ ++ eecd = E1000_READ_REG(hw, EECD); ++ ++ if (eeprom->type == e1000_eeprom_microwire) { ++ eecd &= ~(E1000_EECD_CS | E1000_EECD_SK); ++ E1000_WRITE_REG(hw, EECD, eecd); ++ E1000_WRITE_FLUSH(hw); ++ usec_delay(eeprom->delay_usec); ++ ++ /* Clock high */ ++ eecd |= E1000_EECD_SK; ++ E1000_WRITE_REG(hw, EECD, eecd); ++ E1000_WRITE_FLUSH(hw); ++ usec_delay(eeprom->delay_usec); ++ ++ /* Select EEPROM */ ++ eecd |= E1000_EECD_CS; ++ E1000_WRITE_REG(hw, EECD, eecd); ++ E1000_WRITE_FLUSH(hw); ++ usec_delay(eeprom->delay_usec); ++ ++ /* Clock low */ ++ eecd &= ~E1000_EECD_SK; ++ E1000_WRITE_REG(hw, EECD, eecd); ++ E1000_WRITE_FLUSH(hw); ++ usec_delay(eeprom->delay_usec); ++ } else if (eeprom->type == e1000_eeprom_spi) { ++ /* Toggle CS to flush commands */ ++ eecd |= E1000_EECD_CS; ++ E1000_WRITE_REG(hw, EECD, eecd); ++ E1000_WRITE_FLUSH(hw); ++ usec_delay(eeprom->delay_usec); ++ eecd &= ~E1000_EECD_CS; ++ E1000_WRITE_REG(hw, EECD, eecd); ++ E1000_WRITE_FLUSH(hw); ++ usec_delay(eeprom->delay_usec); ++ } ++} ++ ++/****************************************************************************** ++ * Terminates a command by inverting the EEPROM's chip select pin ++ * ++ * hw - Struct containing variables accessed by shared code ++ *****************************************************************************/ ++static void ++e1000_release_eeprom(struct e1000_hw *hw) ++{ ++ uint32_t eecd; ++ ++ DEBUGFUNC("e1000_release_eeprom"); ++ ++ eecd = E1000_READ_REG(hw, EECD); ++ ++ if (hw->eeprom.type == e1000_eeprom_spi) { ++ eecd |= E1000_EECD_CS; /* Pull CS high */ ++ eecd &= ~E1000_EECD_SK; /* Lower SCK */ ++ ++ E1000_WRITE_REG(hw, EECD, eecd); ++ ++ usec_delay(hw->eeprom.delay_usec); ++ } else if (hw->eeprom.type == e1000_eeprom_microwire) { ++ /* cleanup eeprom */ ++ ++ /* CS on Microwire is active-high */ ++ eecd &= ~(E1000_EECD_CS | E1000_EECD_DI); ++ ++ E1000_WRITE_REG(hw, EECD, eecd); ++ ++ /* Rising edge of clock */ ++ eecd |= E1000_EECD_SK; ++ E1000_WRITE_REG(hw, EECD, eecd); ++ E1000_WRITE_FLUSH(hw); ++ usec_delay(hw->eeprom.delay_usec); ++ ++ /* Falling edge of clock */ ++ eecd &= ~E1000_EECD_SK; ++ E1000_WRITE_REG(hw, EECD, eecd); ++ E1000_WRITE_FLUSH(hw); ++ usec_delay(hw->eeprom.delay_usec); ++ } ++ ++ /* Stop requesting EEPROM access */ ++ if (hw->mac_type > e1000_82544) { ++ eecd &= ~E1000_EECD_REQ; ++ E1000_WRITE_REG(hw, EECD, eecd); ++ } ++ ++ e1000_swfw_sync_release(hw, E1000_SWFW_EEP_SM); ++} ++ ++/****************************************************************************** ++ * Reads a 16 bit word from the EEPROM. ++ * ++ * hw - Struct containing variables accessed by shared code ++ *****************************************************************************/ ++int32_t ++e1000_spi_eeprom_ready(struct e1000_hw *hw) ++{ ++ uint16_t retry_count = 0; ++ uint8_t spi_stat_reg; ++ ++ DEBUGFUNC("e1000_spi_eeprom_ready"); ++ ++ /* Read "Status Register" repeatedly until the LSB is cleared. The ++ * EEPROM will signal that the command has been completed by clearing ++ * bit 0 of the internal status register. If it's not cleared within ++ * 5 milliseconds, then error out. ++ */ ++ retry_count = 0; ++ do { ++ e1000_shift_out_ee_bits(hw, EEPROM_RDSR_OPCODE_SPI, ++ hw->eeprom.opcode_bits); ++ spi_stat_reg = (uint8_t)e1000_shift_in_ee_bits(hw, 8); ++ if (!(spi_stat_reg & EEPROM_STATUS_RDY_SPI)) ++ break; ++ ++ usec_delay(5); ++ retry_count += 5; ++ ++ e1000_standby_eeprom(hw); ++ } while (retry_count < EEPROM_MAX_RETRY_SPI); ++ ++ /* ATMEL SPI write time could vary from 0-20mSec on 3.3V devices (and ++ * only 0-5mSec on 5V devices) ++ */ ++ if (retry_count >= EEPROM_MAX_RETRY_SPI) { ++ DEBUGOUT("SPI EEPROM Status error\n"); ++ return -E1000_ERR_EEPROM; ++ } ++ ++ return E1000_SUCCESS; ++} ++ ++/****************************************************************************** ++ * Reads a 16 bit word from the EEPROM. ++ * ++ * hw - Struct containing variables accessed by shared code ++ * offset - offset of word in the EEPROM to read ++ * data - word read from the EEPROM ++ * words - number of words to read ++ *****************************************************************************/ ++int32_t ++e1000_read_eeprom(struct e1000_hw *hw, ++ uint16_t offset, ++ uint16_t words, ++ uint16_t *data) ++{ ++ struct e1000_eeprom_info *eeprom = &hw->eeprom; ++ uint32_t i = 0; ++ int32_t ret_val; ++ ++ DEBUGFUNC("e1000_read_eeprom"); ++ ++ /* A check for invalid values: offset too large, too many words, and not ++ * enough words. ++ */ ++ if ((offset >= eeprom->word_size) || (words > eeprom->word_size - offset) || ++ (words == 0)) { ++ DEBUGOUT("\"words\" parameter out of bounds\n"); ++ return -E1000_ERR_EEPROM; ++ } ++ ++ /* FLASH reads without acquiring the semaphore are safe */ ++ if (e1000_is_onboard_nvm_eeprom(hw) == TRUE && ++ hw->eeprom.use_eerd == FALSE) { ++ switch (hw->mac_type) { ++ case e1000_80003es2lan: ++ break; ++ default: ++ /* Prepare the EEPROM for reading */ ++ if (e1000_acquire_eeprom(hw) != E1000_SUCCESS) ++ return -E1000_ERR_EEPROM; ++ break; ++ } ++ } ++ ++ if (eeprom->use_eerd == TRUE) { ++ ret_val = e1000_read_eeprom_eerd(hw, offset, words, data); ++ if ((e1000_is_onboard_nvm_eeprom(hw) == TRUE) || ++ (hw->mac_type != e1000_82573)) ++ e1000_release_eeprom(hw); ++ return ret_val; ++ } ++ ++ if (eeprom->type == e1000_eeprom_ich8) ++ return e1000_read_eeprom_ich8(hw, offset, words, data); ++ ++ if (eeprom->type == e1000_eeprom_spi) { ++ uint16_t word_in; ++ uint8_t read_opcode = EEPROM_READ_OPCODE_SPI; ++ ++ if (e1000_spi_eeprom_ready(hw)) { ++ e1000_release_eeprom(hw); ++ return -E1000_ERR_EEPROM; ++ } ++ ++ e1000_standby_eeprom(hw); ++ ++ /* Some SPI eeproms use the 8th address bit embedded in the opcode */ ++ if ((eeprom->address_bits == 8) && (offset >= 128)) ++ read_opcode |= EEPROM_A8_OPCODE_SPI; ++ ++ /* Send the READ command (opcode + addr) */ ++ e1000_shift_out_ee_bits(hw, read_opcode, eeprom->opcode_bits); ++ e1000_shift_out_ee_bits(hw, (uint16_t)(offset*2), eeprom->address_bits); ++ ++ /* Read the data. The address of the eeprom internally increments with ++ * each byte (spi) being read, saving on the overhead of eeprom setup ++ * and tear-down. The address counter will roll over if reading beyond ++ * the size of the eeprom, thus allowing the entire memory to be read ++ * starting from any offset. */ ++ for (i = 0; i < words; i++) { ++ word_in = e1000_shift_in_ee_bits(hw, 16); ++ data[i] = (word_in >> 8) | (word_in << 8); ++ } ++ } else if (eeprom->type == e1000_eeprom_microwire) { ++ for (i = 0; i < words; i++) { ++ /* Send the READ command (opcode + addr) */ ++ e1000_shift_out_ee_bits(hw, EEPROM_READ_OPCODE_MICROWIRE, ++ eeprom->opcode_bits); ++ e1000_shift_out_ee_bits(hw, (uint16_t)(offset + i), ++ eeprom->address_bits); ++ ++ /* Read the data. For microwire, each word requires the overhead ++ * of eeprom setup and tear-down. */ ++ data[i] = e1000_shift_in_ee_bits(hw, 16); ++ e1000_standby_eeprom(hw); ++ } ++ } ++ ++ /* End this read operation */ ++ e1000_release_eeprom(hw); ++ ++ return E1000_SUCCESS; ++} ++ ++/****************************************************************************** ++ * Reads a 16 bit word from the EEPROM using the EERD register. ++ * ++ * hw - Struct containing variables accessed by shared code ++ * offset - offset of word in the EEPROM to read ++ * data - word read from the EEPROM ++ * words - number of words to read ++ *****************************************************************************/ ++int32_t ++e1000_read_eeprom_eerd(struct e1000_hw *hw, ++ uint16_t offset, ++ uint16_t words, ++ uint16_t *data) ++{ ++ uint32_t i, eerd = 0; ++ int32_t error = 0; ++ ++ for (i = 0; i < words; i++) { ++ eerd = ((offset+i) << E1000_EEPROM_RW_ADDR_SHIFT) + ++ E1000_EEPROM_RW_REG_START; ++ ++ E1000_WRITE_REG(hw, EERD, eerd); ++ error = e1000_poll_eerd_eewr_done(hw, E1000_EEPROM_POLL_READ); ++ ++ if (error) { ++ break; ++ } ++ data[i] = (E1000_READ_REG(hw, EERD) >> E1000_EEPROM_RW_REG_DATA); ++ ++ } ++ ++ return error; ++} ++ ++/****************************************************************************** ++ * Writes a 16 bit word from the EEPROM using the EEWR register. ++ * ++ * hw - Struct containing variables accessed by shared code ++ * offset - offset of word in the EEPROM to read ++ * data - word read from the EEPROM ++ * words - number of words to read ++ *****************************************************************************/ ++int32_t ++e1000_write_eeprom_eewr(struct e1000_hw *hw, ++ uint16_t offset, ++ uint16_t words, ++ uint16_t *data) ++{ ++ uint32_t register_value = 0; ++ uint32_t i = 0; ++ int32_t error = 0; ++ ++ if (e1000_swfw_sync_acquire(hw, E1000_SWFW_EEP_SM)) ++ return -E1000_ERR_SWFW_SYNC; ++ ++ for (i = 0; i < words; i++) { ++ register_value = (data[i] << E1000_EEPROM_RW_REG_DATA) | ++ ((offset+i) << E1000_EEPROM_RW_ADDR_SHIFT) | ++ E1000_EEPROM_RW_REG_START; ++ ++ error = e1000_poll_eerd_eewr_done(hw, E1000_EEPROM_POLL_WRITE); ++ if (error) { ++ break; ++ } ++ ++ E1000_WRITE_REG(hw, EEWR, register_value); ++ ++ error = e1000_poll_eerd_eewr_done(hw, E1000_EEPROM_POLL_WRITE); ++ ++ if (error) { ++ break; ++ } ++ } ++ ++ e1000_swfw_sync_release(hw, E1000_SWFW_EEP_SM); ++ return error; ++} ++ ++/****************************************************************************** ++ * Polls the status bit (bit 1) of the EERD to determine when the read is done. ++ * ++ * hw - Struct containing variables accessed by shared code ++ *****************************************************************************/ ++int32_t ++e1000_poll_eerd_eewr_done(struct e1000_hw *hw, int eerd) ++{ ++ uint32_t attempts = 100000; ++ uint32_t i, reg = 0; ++ int32_t done = E1000_ERR_EEPROM; ++ ++ for (i = 0; i < attempts; i++) { ++ if (eerd == E1000_EEPROM_POLL_READ) ++ reg = E1000_READ_REG(hw, EERD); ++ else ++ reg = E1000_READ_REG(hw, EEWR); ++ ++ if (reg & E1000_EEPROM_RW_REG_DONE) { ++ done = E1000_SUCCESS; ++ break; ++ } ++ usec_delay(5); ++ } ++ ++ return done; ++} ++ ++/*************************************************************************** ++* Description: Determines if the onboard NVM is FLASH or EEPROM. ++* ++* hw - Struct containing variables accessed by shared code ++****************************************************************************/ ++boolean_t ++e1000_is_onboard_nvm_eeprom(struct e1000_hw *hw) ++{ ++ uint32_t eecd = 0; ++ ++ DEBUGFUNC("e1000_is_onboard_nvm_eeprom"); ++ ++ if (hw->mac_type == e1000_ich8lan) ++ return FALSE; ++ ++ if (hw->mac_type == e1000_82573) { ++ eecd = E1000_READ_REG(hw, EECD); ++ ++ /* Isolate bits 15 & 16 */ ++ eecd = ((eecd >> 15) & 0x03); ++ ++ /* If both bits are set, device is Flash type */ ++ if (eecd == 0x03) { ++ return FALSE; ++ } ++ } ++ return TRUE; ++} ++ ++/****************************************************************************** ++ * Verifies that the EEPROM has a valid checksum ++ * ++ * hw - Struct containing variables accessed by shared code ++ * ++ * Reads the first 64 16 bit words of the EEPROM and sums the values read. ++ * If the the sum of the 64 16 bit words is 0xBABA, the EEPROM's checksum is ++ * valid. ++ *****************************************************************************/ ++int32_t ++e1000_validate_eeprom_checksum(struct e1000_hw *hw) ++{ ++ uint16_t checksum = 0; ++ uint16_t i, eeprom_data; ++ ++ DEBUGFUNC("e1000_validate_eeprom_checksum"); ++ ++ if ((hw->mac_type == e1000_82573) && ++ (e1000_is_onboard_nvm_eeprom(hw) == FALSE)) { ++ /* Check bit 4 of word 10h. If it is 0, firmware is done updating ++ * 10h-12h. Checksum may need to be fixed. */ ++ e1000_read_eeprom(hw, 0x10, 1, &eeprom_data); ++ if ((eeprom_data & 0x10) == 0) { ++ /* Read 0x23 and check bit 15. This bit is a 1 when the checksum ++ * has already been fixed. If the checksum is still wrong and this ++ * bit is a 1, we need to return bad checksum. Otherwise, we need ++ * to set this bit to a 1 and update the checksum. */ ++ e1000_read_eeprom(hw, 0x23, 1, &eeprom_data); ++ if ((eeprom_data & 0x8000) == 0) { ++ eeprom_data |= 0x8000; ++ e1000_write_eeprom(hw, 0x23, 1, &eeprom_data); ++ e1000_update_eeprom_checksum(hw); ++ } ++ } ++ } ++ ++ if (hw->mac_type == e1000_ich8lan) { ++ /* Drivers must allocate the shadow ram structure for the ++ * EEPROM checksum to be updated. Otherwise, this bit as well ++ * as the checksum must both be set correctly for this ++ * validation to pass. ++ */ ++ e1000_read_eeprom(hw, 0x19, 1, &eeprom_data); ++ if ((eeprom_data & 0x40) == 0) { ++ eeprom_data |= 0x40; ++ e1000_write_eeprom(hw, 0x19, 1, &eeprom_data); ++ e1000_update_eeprom_checksum(hw); ++ } ++ } ++ ++ for (i = 0; i < (EEPROM_CHECKSUM_REG + 1); i++) { ++ if (e1000_read_eeprom(hw, i, 1, &eeprom_data) < 0) { ++ DEBUGOUT("EEPROM Read Error\n"); ++ return -E1000_ERR_EEPROM; ++ } ++ checksum += eeprom_data; ++ } ++ ++ if (checksum == (uint16_t) EEPROM_SUM) ++ return E1000_SUCCESS; ++ else { ++ DEBUGOUT("EEPROM Checksum Invalid\n"); ++ return -E1000_ERR_EEPROM; ++ } ++} ++ ++/****************************************************************************** ++ * Calculates the EEPROM checksum and writes it to the EEPROM ++ * ++ * hw - Struct containing variables accessed by shared code ++ * ++ * Sums the first 63 16 bit words of the EEPROM. Subtracts the sum from 0xBABA. ++ * Writes the difference to word offset 63 of the EEPROM. ++ *****************************************************************************/ ++int32_t ++e1000_update_eeprom_checksum(struct e1000_hw *hw) ++{ ++ uint32_t ctrl_ext; ++ uint16_t checksum = 0; ++ uint16_t i, eeprom_data; ++ ++ DEBUGFUNC("e1000_update_eeprom_checksum"); ++ ++ for (i = 0; i < EEPROM_CHECKSUM_REG; i++) { ++ if (e1000_read_eeprom(hw, i, 1, &eeprom_data) < 0) { ++ DEBUGOUT("EEPROM Read Error\n"); ++ return -E1000_ERR_EEPROM; ++ } ++ checksum += eeprom_data; ++ } ++ checksum = (uint16_t) EEPROM_SUM - checksum; ++ if (e1000_write_eeprom(hw, EEPROM_CHECKSUM_REG, 1, &checksum) < 0) { ++ DEBUGOUT("EEPROM Write Error\n"); ++ return -E1000_ERR_EEPROM; ++ } else if (hw->eeprom.type == e1000_eeprom_flash) { ++ e1000_commit_shadow_ram(hw); ++ } else if (hw->eeprom.type == e1000_eeprom_ich8) { ++ e1000_commit_shadow_ram(hw); ++ /* Reload the EEPROM, or else modifications will not appear ++ * until after next adapter reset. */ ++ ctrl_ext = E1000_READ_REG(hw, CTRL_EXT); ++ ctrl_ext |= E1000_CTRL_EXT_EE_RST; ++ E1000_WRITE_REG(hw, CTRL_EXT, ctrl_ext); ++ msec_delay(10); ++ } ++ return E1000_SUCCESS; ++} ++ ++/****************************************************************************** ++ * Parent function for writing words to the different EEPROM types. ++ * ++ * hw - Struct containing variables accessed by shared code ++ * offset - offset within the EEPROM to be written to ++ * words - number of words to write ++ * data - 16 bit word to be written to the EEPROM ++ * ++ * If e1000_update_eeprom_checksum is not called after this function, the ++ * EEPROM will most likely contain an invalid checksum. ++ *****************************************************************************/ ++int32_t ++e1000_write_eeprom(struct e1000_hw *hw, ++ uint16_t offset, ++ uint16_t words, ++ uint16_t *data) ++{ ++ struct e1000_eeprom_info *eeprom = &hw->eeprom; ++ int32_t status = 0; ++ ++ DEBUGFUNC("e1000_write_eeprom"); ++ ++ /* A check for invalid values: offset too large, too many words, and not ++ * enough words. ++ */ ++ if ((offset >= eeprom->word_size) || (words > eeprom->word_size - offset) || ++ (words == 0)) { ++ DEBUGOUT("\"words\" parameter out of bounds\n"); ++ return -E1000_ERR_EEPROM; ++ } ++ ++ /* 82573 writes only through eewr */ ++ if (eeprom->use_eewr == TRUE) ++ return e1000_write_eeprom_eewr(hw, offset, words, data); ++ ++ if (eeprom->type == e1000_eeprom_ich8) ++ return e1000_write_eeprom_ich8(hw, offset, words, data); ++ ++ /* Prepare the EEPROM for writing */ ++ if (e1000_acquire_eeprom(hw) != E1000_SUCCESS) ++ return -E1000_ERR_EEPROM; ++ ++ if (eeprom->type == e1000_eeprom_microwire) { ++ status = e1000_write_eeprom_microwire(hw, offset, words, data); ++ } else { ++ status = e1000_write_eeprom_spi(hw, offset, words, data); ++ msec_delay(10); ++ } ++ ++ /* Done with writing */ ++ e1000_release_eeprom(hw); ++ ++ return status; ++} ++ ++/****************************************************************************** ++ * Writes a 16 bit word to a given offset in an SPI EEPROM. ++ * ++ * hw - Struct containing variables accessed by shared code ++ * offset - offset within the EEPROM to be written to ++ * words - number of words to write ++ * data - pointer to array of 8 bit words to be written to the EEPROM ++ * ++ *****************************************************************************/ ++int32_t ++e1000_write_eeprom_spi(struct e1000_hw *hw, ++ uint16_t offset, ++ uint16_t words, ++ uint16_t *data) ++{ ++ struct e1000_eeprom_info *eeprom = &hw->eeprom; ++ uint16_t widx = 0; ++ ++ DEBUGFUNC("e1000_write_eeprom_spi"); ++ ++ while (widx < words) { ++ uint8_t write_opcode = EEPROM_WRITE_OPCODE_SPI; ++ ++ if (e1000_spi_eeprom_ready(hw)) return -E1000_ERR_EEPROM; ++ ++ e1000_standby_eeprom(hw); ++ ++ /* Send the WRITE ENABLE command (8 bit opcode ) */ ++ e1000_shift_out_ee_bits(hw, EEPROM_WREN_OPCODE_SPI, ++ eeprom->opcode_bits); ++ ++ e1000_standby_eeprom(hw); ++ ++ /* Some SPI eeproms use the 8th address bit embedded in the opcode */ ++ if ((eeprom->address_bits == 8) && (offset >= 128)) ++ write_opcode |= EEPROM_A8_OPCODE_SPI; ++ ++ /* Send the Write command (8-bit opcode + addr) */ ++ e1000_shift_out_ee_bits(hw, write_opcode, eeprom->opcode_bits); ++ ++ e1000_shift_out_ee_bits(hw, (uint16_t)((offset + widx)*2), ++ eeprom->address_bits); ++ ++ /* Send the data */ ++ ++ /* Loop to allow for up to whole page write (32 bytes) of eeprom */ ++ while (widx < words) { ++ uint16_t word_out = data[widx]; ++ word_out = (word_out >> 8) | (word_out << 8); ++ e1000_shift_out_ee_bits(hw, word_out, 16); ++ widx++; ++ ++ /* Some larger eeprom sizes are capable of a 32-byte PAGE WRITE ++ * operation, while the smaller eeproms are capable of an 8-byte ++ * PAGE WRITE operation. Break the inner loop to pass new address ++ */ ++ if ((((offset + widx)*2) % eeprom->page_size) == 0) { ++ e1000_standby_eeprom(hw); ++ break; ++ } ++ } ++ } ++ ++ return E1000_SUCCESS; ++} ++ ++/****************************************************************************** ++ * Writes a 16 bit word to a given offset in a Microwire EEPROM. ++ * ++ * hw - Struct containing variables accessed by shared code ++ * offset - offset within the EEPROM to be written to ++ * words - number of words to write ++ * data - pointer to array of 16 bit words to be written to the EEPROM ++ * ++ *****************************************************************************/ ++int32_t ++e1000_write_eeprom_microwire(struct e1000_hw *hw, ++ uint16_t offset, ++ uint16_t words, ++ uint16_t *data) ++{ ++ struct e1000_eeprom_info *eeprom = &hw->eeprom; ++ uint32_t eecd; ++ uint16_t words_written = 0; ++ uint16_t i = 0; ++ ++ DEBUGFUNC("e1000_write_eeprom_microwire"); ++ ++ /* Send the write enable command to the EEPROM (3-bit opcode plus ++ * 6/8-bit dummy address beginning with 11). It's less work to include ++ * the 11 of the dummy address as part of the opcode than it is to shift ++ * it over the correct number of bits for the address. This puts the ++ * EEPROM into write/erase mode. ++ */ ++ e1000_shift_out_ee_bits(hw, EEPROM_EWEN_OPCODE_MICROWIRE, ++ (uint16_t)(eeprom->opcode_bits + 2)); ++ ++ e1000_shift_out_ee_bits(hw, 0, (uint16_t)(eeprom->address_bits - 2)); ++ ++ /* Prepare the EEPROM */ ++ e1000_standby_eeprom(hw); ++ ++ while (words_written < words) { ++ /* Send the Write command (3-bit opcode + addr) */ ++ e1000_shift_out_ee_bits(hw, EEPROM_WRITE_OPCODE_MICROWIRE, ++ eeprom->opcode_bits); ++ ++ e1000_shift_out_ee_bits(hw, (uint16_t)(offset + words_written), ++ eeprom->address_bits); ++ ++ /* Send the data */ ++ e1000_shift_out_ee_bits(hw, data[words_written], 16); ++ ++ /* Toggle the CS line. This in effect tells the EEPROM to execute ++ * the previous command. ++ */ ++ e1000_standby_eeprom(hw); ++ ++ /* Read DO repeatedly until it is high (equal to '1'). The EEPROM will ++ * signal that the command has been completed by raising the DO signal. ++ * If DO does not go high in 10 milliseconds, then error out. ++ */ ++ for (i = 0; i < 200; i++) { ++ eecd = E1000_READ_REG(hw, EECD); ++ if (eecd & E1000_EECD_DO) break; ++ usec_delay(50); ++ } ++ if (i == 200) { ++ DEBUGOUT("EEPROM Write did not complete\n"); ++ return -E1000_ERR_EEPROM; ++ } ++ ++ /* Recover from write */ ++ e1000_standby_eeprom(hw); ++ ++ words_written++; ++ } ++ ++ /* Send the write disable command to the EEPROM (3-bit opcode plus ++ * 6/8-bit dummy address beginning with 10). It's less work to include ++ * the 10 of the dummy address as part of the opcode than it is to shift ++ * it over the correct number of bits for the address. This takes the ++ * EEPROM out of write/erase mode. ++ */ ++ e1000_shift_out_ee_bits(hw, EEPROM_EWDS_OPCODE_MICROWIRE, ++ (uint16_t)(eeprom->opcode_bits + 2)); ++ ++ e1000_shift_out_ee_bits(hw, 0, (uint16_t)(eeprom->address_bits - 2)); ++ ++ return E1000_SUCCESS; ++} ++ ++/****************************************************************************** ++ * Flushes the cached eeprom to NVM. This is done by saving the modified values ++ * in the eeprom cache and the non modified values in the currently active bank ++ * to the new bank. ++ * ++ * hw - Struct containing variables accessed by shared code ++ * offset - offset of word in the EEPROM to read ++ * data - word read from the EEPROM ++ * words - number of words to read ++ *****************************************************************************/ ++int32_t ++e1000_commit_shadow_ram(struct e1000_hw *hw) ++{ ++ uint32_t attempts = 100000; ++ uint32_t eecd = 0; ++ uint32_t flop = 0; ++ uint32_t i = 0; ++ int32_t error = E1000_SUCCESS; ++ uint32_t old_bank_offset = 0; ++ uint32_t new_bank_offset = 0; ++ uint32_t sector_retries = 0; ++ uint8_t low_byte = 0; ++ uint8_t high_byte = 0; ++ uint8_t temp_byte = 0; ++ boolean_t sector_write_failed = FALSE; ++ ++ if (hw->mac_type == e1000_82573) { ++ /* The flop register will be used to determine if flash type is STM */ ++ flop = E1000_READ_REG(hw, FLOP); ++ for (i=0; i < attempts; i++) { ++ eecd = E1000_READ_REG(hw, EECD); ++ if ((eecd & E1000_EECD_FLUPD) == 0) { ++ break; ++ } ++ usec_delay(5); ++ } ++ ++ if (i == attempts) { ++ return -E1000_ERR_EEPROM; ++ } ++ ++ /* If STM opcode located in bits 15:8 of flop, reset firmware */ ++ if ((flop & 0xFF00) == E1000_STM_OPCODE) { ++ E1000_WRITE_REG(hw, HICR, E1000_HICR_FW_RESET); ++ } ++ ++ /* Perform the flash update */ ++ E1000_WRITE_REG(hw, EECD, eecd | E1000_EECD_FLUPD); ++ ++ for (i=0; i < attempts; i++) { ++ eecd = E1000_READ_REG(hw, EECD); ++ if ((eecd & E1000_EECD_FLUPD) == 0) { ++ break; ++ } ++ usec_delay(5); ++ } ++ ++ if (i == attempts) { ++ return -E1000_ERR_EEPROM; ++ } ++ } ++ ++ if (hw->mac_type == e1000_ich8lan && hw->eeprom_shadow_ram != NULL) { ++ /* We're writing to the opposite bank so if we're on bank 1, ++ * write to bank 0 etc. We also need to erase the segment that ++ * is going to be written */ ++ if (!(E1000_READ_REG(hw, EECD) & E1000_EECD_SEC1VAL)) { ++ new_bank_offset = hw->flash_bank_size * 2; ++ old_bank_offset = 0; ++ e1000_erase_ich8_4k_segment(hw, 1); ++ } else { ++ old_bank_offset = hw->flash_bank_size * 2; ++ new_bank_offset = 0; ++ e1000_erase_ich8_4k_segment(hw, 0); ++ } ++ ++ do { ++ sector_write_failed = FALSE; ++ /* Loop for every byte in the shadow RAM, ++ * which is in units of words. */ ++ for (i = 0; i < E1000_SHADOW_RAM_WORDS; i++) { ++ /* Determine whether to write the value stored ++ * in the other NVM bank or a modified value stored ++ * in the shadow RAM */ ++ if (hw->eeprom_shadow_ram[i].modified == TRUE) { ++ low_byte = (uint8_t)hw->eeprom_shadow_ram[i].eeprom_word; ++ e1000_read_ich8_byte(hw, (i << 1) + old_bank_offset, ++ &temp_byte); ++ usec_delay(100); ++ error = e1000_verify_write_ich8_byte(hw, ++ (i << 1) + new_bank_offset, ++ low_byte); ++ if (error != E1000_SUCCESS) ++ sector_write_failed = TRUE; ++ high_byte = ++ (uint8_t)(hw->eeprom_shadow_ram[i].eeprom_word >> 8); ++ e1000_read_ich8_byte(hw, (i << 1) + old_bank_offset + 1, ++ &temp_byte); ++ usec_delay(100); ++ } else { ++ e1000_read_ich8_byte(hw, (i << 1) + old_bank_offset, ++ &low_byte); ++ usec_delay(100); ++ error = e1000_verify_write_ich8_byte(hw, ++ (i << 1) + new_bank_offset, low_byte); ++ if (error != E1000_SUCCESS) ++ sector_write_failed = TRUE; ++ e1000_read_ich8_byte(hw, (i << 1) + old_bank_offset + 1, ++ &high_byte); ++ } ++ ++ /* If the word is 0x13, then make sure the signature bits ++ * (15:14) are 11b until the commit has completed. ++ * This will allow us to write 10b which indicates the ++ * signature is valid. We want to do this after the write ++ * has completed so that we don't mark the segment valid ++ * while the write is still in progress */ ++ if (i == E1000_ICH8_NVM_SIG_WORD) ++ high_byte = E1000_ICH8_NVM_SIG_MASK | high_byte; ++ ++ error = e1000_verify_write_ich8_byte(hw, ++ (i << 1) + new_bank_offset + 1, high_byte); ++ if (error != E1000_SUCCESS) ++ sector_write_failed = TRUE; ++ ++ if (sector_write_failed == FALSE) { ++ /* Clear the now not used entry in the cache */ ++ hw->eeprom_shadow_ram[i].modified = FALSE; ++ hw->eeprom_shadow_ram[i].eeprom_word = 0xFFFF; ++ } ++ } ++ ++ /* Don't bother writing the segment valid bits if sector ++ * programming failed. */ ++ if (sector_write_failed == FALSE) { ++ /* Finally validate the new segment by setting bit 15:14 ++ * to 10b in word 0x13 , this can be done without an ++ * erase as well since these bits are 11 to start with ++ * and we need to change bit 14 to 0b */ ++ e1000_read_ich8_byte(hw, ++ E1000_ICH8_NVM_SIG_WORD * 2 + 1 + new_bank_offset, ++ &high_byte); ++ high_byte &= 0xBF; ++ error = e1000_verify_write_ich8_byte(hw, ++ E1000_ICH8_NVM_SIG_WORD * 2 + 1 + new_bank_offset, ++ high_byte); ++ if (error != E1000_SUCCESS) ++ sector_write_failed = TRUE; ++ ++ /* And invalidate the previously valid segment by setting ++ * its signature word (0x13) high_byte to 0b. This can be ++ * done without an erase because flash erase sets all bits ++ * to 1's. We can write 1's to 0's without an erase */ ++ error = e1000_verify_write_ich8_byte(hw, ++ E1000_ICH8_NVM_SIG_WORD * 2 + 1 + old_bank_offset, ++ 0); ++ if (error != E1000_SUCCESS) ++ sector_write_failed = TRUE; ++ } ++ } while (++sector_retries < 10 && sector_write_failed == TRUE); ++ } ++ ++ return error; ++} ++ ++/****************************************************************************** ++ * Reads the adapter's part number from the EEPROM ++ * ++ * hw - Struct containing variables accessed by shared code ++ * part_num - Adapter's part number ++ *****************************************************************************/ ++int32_t ++e1000_read_part_num(struct e1000_hw *hw, ++ uint32_t *part_num) ++{ ++ uint16_t offset = EEPROM_PBA_BYTE_1; ++ uint16_t eeprom_data; ++ ++ DEBUGFUNC("e1000_read_part_num"); ++ ++ /* Get word 0 from EEPROM */ ++ if (e1000_read_eeprom(hw, offset, 1, &eeprom_data) < 0) { ++ DEBUGOUT("EEPROM Read Error\n"); ++ return -E1000_ERR_EEPROM; ++ } ++ /* Save word 0 in upper half of part_num */ ++ *part_num = (uint32_t) (eeprom_data << 16); ++ ++ /* Get word 1 from EEPROM */ ++ if (e1000_read_eeprom(hw, ++offset, 1, &eeprom_data) < 0) { ++ DEBUGOUT("EEPROM Read Error\n"); ++ return -E1000_ERR_EEPROM; ++ } ++ /* Save word 1 in lower half of part_num */ ++ *part_num |= eeprom_data; ++ ++ return E1000_SUCCESS; ++} ++ ++/****************************************************************************** ++ * Reads the adapter's MAC address from the EEPROM and inverts the LSB for the ++ * second function of dual function devices ++ * ++ * hw - Struct containing variables accessed by shared code ++ *****************************************************************************/ ++int32_t ++e1000_read_mac_addr(struct e1000_hw * hw) ++{ ++ uint16_t offset; ++ uint16_t eeprom_data, i; ++ ++ DEBUGFUNC("e1000_read_mac_addr"); ++ ++ for (i = 0; i < NODE_ADDRESS_SIZE; i += 2) { ++ offset = i >> 1; ++ if (e1000_read_eeprom(hw, offset, 1, &eeprom_data) < 0) { ++ DEBUGOUT("EEPROM Read Error\n"); ++ return -E1000_ERR_EEPROM; ++ } ++ hw->perm_mac_addr[i] = (uint8_t) (eeprom_data & 0x00FF); ++ hw->perm_mac_addr[i+1] = (uint8_t) (eeprom_data >> 8); ++ } ++ ++ switch (hw->mac_type) { ++ default: ++ break; ++ case e1000_82546: ++ case e1000_82546_rev_3: ++ case e1000_82571: ++ case e1000_80003es2lan: ++ if (E1000_READ_REG(hw, STATUS) & E1000_STATUS_FUNC_1) ++ hw->perm_mac_addr[5] ^= 0x01; ++ break; ++ } ++ ++ for (i = 0; i < NODE_ADDRESS_SIZE; i++) ++ hw->mac_addr[i] = hw->perm_mac_addr[i]; ++ return E1000_SUCCESS; ++} ++ ++/****************************************************************************** ++ * Initializes receive address filters. ++ * ++ * hw - Struct containing variables accessed by shared code ++ * ++ * Places the MAC address in receive address register 0 and clears the rest ++ * of the receive addresss registers. Clears the multicast table. Assumes ++ * the receiver is in reset when the routine is called. ++ *****************************************************************************/ ++void ++e1000_init_rx_addrs(struct e1000_hw *hw) ++{ ++ uint32_t i; ++ uint32_t rar_num; ++ ++ DEBUGFUNC("e1000_init_rx_addrs"); ++ ++ /* Setup the receive address. */ ++ DEBUGOUT("Programming MAC Address into RAR[0]\n"); ++ ++ e1000_rar_set(hw, hw->mac_addr, 0); ++ ++ rar_num = E1000_RAR_ENTRIES; ++ ++ /* Reserve a spot for the Locally Administered Address to work around ++ * an 82571 issue in which a reset on one port will reload the MAC on ++ * the other port. */ ++ if ((hw->mac_type == e1000_82571) && (hw->laa_is_present == TRUE)) ++ rar_num -= 1; ++ if (hw->mac_type == e1000_ich8lan) ++ rar_num = E1000_RAR_ENTRIES_ICH8LAN; ++ ++ /* Zero out the other 15 receive addresses. */ ++ DEBUGOUT("Clearing RAR[1-15]\n"); ++ for (i = 1; i < rar_num; i++) { ++ E1000_WRITE_REG_ARRAY(hw, RA, (i << 1), 0); ++ E1000_WRITE_FLUSH(hw); ++ E1000_WRITE_REG_ARRAY(hw, RA, ((i << 1) + 1), 0); ++ E1000_WRITE_FLUSH(hw); ++ } ++} ++ ++/****************************************************************************** ++ * Updates the MAC's list of multicast addresses. ++ * ++ * hw - Struct containing variables accessed by shared code ++ * mc_addr_list - the list of new multicast addresses ++ * mc_addr_count - number of addresses ++ * pad - number of bytes between addresses in the list ++ * rar_used_count - offset where to start adding mc addresses into the RAR's ++ * ++ * The given list replaces any existing list. Clears the last 15 receive ++ * address registers and the multicast table. Uses receive address registers ++ * for the first 15 multicast addresses, and hashes the rest into the ++ * multicast table. ++ *****************************************************************************/ ++void ++e1000_mc_addr_list_update(struct e1000_hw *hw, ++ uint8_t *mc_addr_list, ++ uint32_t mc_addr_count, ++ uint32_t pad, ++ uint32_t rar_used_count) ++{ ++ uint32_t hash_value; ++ uint32_t i; ++ uint32_t num_rar_entry; ++ uint32_t num_mta_entry; ++ ++ DEBUGFUNC("e1000_mc_addr_list_update"); ++ ++ /* Set the new number of MC addresses that we are being requested to use. */ ++ hw->num_mc_addrs = mc_addr_count; ++ ++ /* Clear RAR[1-15] */ ++ DEBUGOUT(" Clearing RAR[1-15]\n"); ++ num_rar_entry = E1000_RAR_ENTRIES; ++ if (hw->mac_type == e1000_ich8lan) ++ num_rar_entry = E1000_RAR_ENTRIES_ICH8LAN; ++ /* Reserve a spot for the Locally Administered Address to work around ++ * an 82571 issue in which a reset on one port will reload the MAC on ++ * the other port. */ ++ if ((hw->mac_type == e1000_82571) && (hw->laa_is_present == TRUE)) ++ num_rar_entry -= 1; ++ ++ for (i = rar_used_count; i < num_rar_entry; i++) { ++ E1000_WRITE_REG_ARRAY(hw, RA, (i << 1), 0); ++ E1000_WRITE_FLUSH(hw); ++ E1000_WRITE_REG_ARRAY(hw, RA, ((i << 1) + 1), 0); ++ E1000_WRITE_FLUSH(hw); ++ } ++ ++ /* Clear the MTA */ ++ DEBUGOUT(" Clearing MTA\n"); ++ num_mta_entry = E1000_NUM_MTA_REGISTERS; ++ if (hw->mac_type == e1000_ich8lan) ++ num_mta_entry = E1000_NUM_MTA_REGISTERS_ICH8LAN; ++ for (i = 0; i < num_mta_entry; i++) { ++ E1000_WRITE_REG_ARRAY(hw, MTA, i, 0); ++ E1000_WRITE_FLUSH(hw); ++ } ++ ++ /* Add the new addresses */ ++ for (i = 0; i < mc_addr_count; i++) { ++ DEBUGOUT(" Adding the multicast addresses:\n"); ++ DEBUGOUT7(" MC Addr #%d =%.2X %.2X %.2X %.2X %.2X %.2X\n", i, ++ mc_addr_list[i * (ETH_LENGTH_OF_ADDRESS + pad)], ++ mc_addr_list[i * (ETH_LENGTH_OF_ADDRESS + pad) + 1], ++ mc_addr_list[i * (ETH_LENGTH_OF_ADDRESS + pad) + 2], ++ mc_addr_list[i * (ETH_LENGTH_OF_ADDRESS + pad) + 3], ++ mc_addr_list[i * (ETH_LENGTH_OF_ADDRESS + pad) + 4], ++ mc_addr_list[i * (ETH_LENGTH_OF_ADDRESS + pad) + 5]); ++ ++ hash_value = e1000_hash_mc_addr(hw, ++ mc_addr_list + ++ (i * (ETH_LENGTH_OF_ADDRESS + pad))); ++ ++ DEBUGOUT1(" Hash value = 0x%03X\n", hash_value); ++ ++ /* Place this multicast address in the RAR if there is room, * ++ * else put it in the MTA ++ */ ++ if (rar_used_count < num_rar_entry) { ++ e1000_rar_set(hw, ++ mc_addr_list + (i * (ETH_LENGTH_OF_ADDRESS + pad)), ++ rar_used_count); ++ rar_used_count++; ++ } else { ++ e1000_mta_set(hw, hash_value); ++ } ++ } ++ DEBUGOUT("MC Update Complete\n"); ++} ++ ++/****************************************************************************** ++ * Hashes an address to determine its location in the multicast table ++ * ++ * hw - Struct containing variables accessed by shared code ++ * mc_addr - the multicast address to hash ++ *****************************************************************************/ ++uint32_t ++e1000_hash_mc_addr(struct e1000_hw *hw, ++ uint8_t *mc_addr) ++{ ++ uint32_t hash_value = 0; ++ ++ /* The portion of the address that is used for the hash table is ++ * determined by the mc_filter_type setting. ++ */ ++ switch (hw->mc_filter_type) { ++ /* [0] [1] [2] [3] [4] [5] ++ * 01 AA 00 12 34 56 ++ * LSB MSB ++ */ ++ case 0: ++ if (hw->mac_type == e1000_ich8lan) { ++ /* [47:38] i.e. 0x158 for above example address */ ++ hash_value = ((mc_addr[4] >> 6) | (((uint16_t) mc_addr[5]) << 2)); ++ } else { ++ /* [47:36] i.e. 0x563 for above example address */ ++ hash_value = ((mc_addr[4] >> 4) | (((uint16_t) mc_addr[5]) << 4)); ++ } ++ break; ++ case 1: ++ if (hw->mac_type == e1000_ich8lan) { ++ /* [46:37] i.e. 0x2B1 for above example address */ ++ hash_value = ((mc_addr[4] >> 5) | (((uint16_t) mc_addr[5]) << 3)); ++ } else { ++ /* [46:35] i.e. 0xAC6 for above example address */ ++ hash_value = ((mc_addr[4] >> 3) | (((uint16_t) mc_addr[5]) << 5)); ++ } ++ break; ++ case 2: ++ if (hw->mac_type == e1000_ich8lan) { ++ /*[45:36] i.e. 0x163 for above example address */ ++ hash_value = ((mc_addr[4] >> 4) | (((uint16_t) mc_addr[5]) << 4)); ++ } else { ++ /* [45:34] i.e. 0x5D8 for above example address */ ++ hash_value = ((mc_addr[4] >> 2) | (((uint16_t) mc_addr[5]) << 6)); ++ } ++ break; ++ case 3: ++ if (hw->mac_type == e1000_ich8lan) { ++ /* [43:34] i.e. 0x18D for above example address */ ++ hash_value = ((mc_addr[4] >> 2) | (((uint16_t) mc_addr[5]) << 6)); ++ } else { ++ /* [43:32] i.e. 0x634 for above example address */ ++ hash_value = ((mc_addr[4]) | (((uint16_t) mc_addr[5]) << 8)); ++ } ++ break; ++ } ++ ++ hash_value &= 0xFFF; ++ if (hw->mac_type == e1000_ich8lan) ++ hash_value &= 0x3FF; ++ ++ return hash_value; ++} ++ ++/****************************************************************************** ++ * Sets the bit in the multicast table corresponding to the hash value. ++ * ++ * hw - Struct containing variables accessed by shared code ++ * hash_value - Multicast address hash value ++ *****************************************************************************/ ++void ++e1000_mta_set(struct e1000_hw *hw, ++ uint32_t hash_value) ++{ ++ uint32_t hash_bit, hash_reg; ++ uint32_t mta; ++ uint32_t temp; ++ ++ /* The MTA is a register array of 128 32-bit registers. ++ * It is treated like an array of 4096 bits. We want to set ++ * bit BitArray[hash_value]. So we figure out what register ++ * the bit is in, read it, OR in the new bit, then write ++ * back the new value. The register is determined by the ++ * upper 7 bits of the hash value and the bit within that ++ * register are determined by the lower 5 bits of the value. ++ */ ++ hash_reg = (hash_value >> 5) & 0x7F; ++ if (hw->mac_type == e1000_ich8lan) ++ hash_reg &= 0x1F; ++ hash_bit = hash_value & 0x1F; ++ ++ mta = E1000_READ_REG_ARRAY(hw, MTA, hash_reg); ++ ++ mta |= (1 << hash_bit); ++ ++ /* If we are on an 82544 and we are trying to write an odd offset ++ * in the MTA, save off the previous entry before writing and ++ * restore the old value after writing. ++ */ ++ if ((hw->mac_type == e1000_82544) && ((hash_reg & 0x1) == 1)) { ++ temp = E1000_READ_REG_ARRAY(hw, MTA, (hash_reg - 1)); ++ E1000_WRITE_REG_ARRAY(hw, MTA, hash_reg, mta); ++ E1000_WRITE_FLUSH(hw); ++ E1000_WRITE_REG_ARRAY(hw, MTA, (hash_reg - 1), temp); ++ E1000_WRITE_FLUSH(hw); ++ } else { ++ E1000_WRITE_REG_ARRAY(hw, MTA, hash_reg, mta); ++ E1000_WRITE_FLUSH(hw); ++ } ++} ++ ++/****************************************************************************** ++ * Puts an ethernet address into a receive address register. ++ * ++ * hw - Struct containing variables accessed by shared code ++ * addr - Address to put into receive address register ++ * index - Receive address register to write ++ *****************************************************************************/ ++void ++e1000_rar_set(struct e1000_hw *hw, ++ uint8_t *addr, ++ uint32_t index) ++{ ++ uint32_t rar_low, rar_high; ++ ++ /* HW expects these in little endian so we reverse the byte order ++ * from network order (big endian) to little endian ++ */ ++ rar_low = ((uint32_t) addr[0] | ++ ((uint32_t) addr[1] << 8) | ++ ((uint32_t) addr[2] << 16) | ((uint32_t) addr[3] << 24)); ++ rar_high = ((uint32_t) addr[4] | ((uint32_t) addr[5] << 8)); ++ ++ /* Disable Rx and flush all Rx frames before enabling RSS to avoid Rx ++ * unit hang. ++ * ++ * Description: ++ * If there are any Rx frames queued up or otherwise present in the HW ++ * before RSS is enabled, and then we enable RSS, the HW Rx unit will ++ * hang. To work around this issue, we have to disable receives and ++ * flush out all Rx frames before we enable RSS. To do so, we modify we ++ * redirect all Rx traffic to manageability and then reset the HW. ++ * This flushes away Rx frames, and (since the redirections to ++ * manageability persists across resets) keeps new ones from coming in ++ * while we work. Then, we clear the Address Valid AV bit for all MAC ++ * addresses and undo the re-direction to manageability. ++ * Now, frames are coming in again, but the MAC won't accept them, so ++ * far so good. We now proceed to initialize RSS (if necessary) and ++ * configure the Rx unit. Last, we re-enable the AV bits and continue ++ * on our merry way. ++ */ ++ switch (hw->mac_type) { ++ case e1000_82571: ++ case e1000_82572: ++ case e1000_80003es2lan: ++ if (hw->leave_av_bit_off == TRUE) ++ break; ++ default: ++ /* Indicate to hardware the Address is Valid. */ ++ rar_high |= E1000_RAH_AV; ++ break; ++ } ++ ++ E1000_WRITE_REG_ARRAY(hw, RA, (index << 1), rar_low); ++ E1000_WRITE_FLUSH(hw); ++ E1000_WRITE_REG_ARRAY(hw, RA, ((index << 1) + 1), rar_high); ++ E1000_WRITE_FLUSH(hw); ++} ++ ++/****************************************************************************** ++ * Writes a value to the specified offset in the VLAN filter table. ++ * ++ * hw - Struct containing variables accessed by shared code ++ * offset - Offset in VLAN filer table to write ++ * value - Value to write into VLAN filter table ++ *****************************************************************************/ ++void ++e1000_write_vfta(struct e1000_hw *hw, ++ uint32_t offset, ++ uint32_t value) ++{ ++ uint32_t temp; ++ ++ if (hw->mac_type == e1000_ich8lan) ++ return; ++ ++ if ((hw->mac_type == e1000_82544) && ((offset & 0x1) == 1)) { ++ temp = E1000_READ_REG_ARRAY(hw, VFTA, (offset - 1)); ++ E1000_WRITE_REG_ARRAY(hw, VFTA, offset, value); ++ E1000_WRITE_FLUSH(hw); ++ E1000_WRITE_REG_ARRAY(hw, VFTA, (offset - 1), temp); ++ E1000_WRITE_FLUSH(hw); ++ } else { ++ E1000_WRITE_REG_ARRAY(hw, VFTA, offset, value); ++ E1000_WRITE_FLUSH(hw); ++ } ++} ++ ++/****************************************************************************** ++ * Clears the VLAN filer table ++ * ++ * hw - Struct containing variables accessed by shared code ++ *****************************************************************************/ ++void ++e1000_clear_vfta(struct e1000_hw *hw) ++{ ++ uint32_t offset; ++ uint32_t vfta_value = 0; ++ uint32_t vfta_offset = 0; ++ uint32_t vfta_bit_in_reg = 0; ++ ++ if (hw->mac_type == e1000_ich8lan) ++ return; ++ ++ if (hw->mac_type == e1000_82573) { ++ if (hw->mng_cookie.vlan_id != 0) { ++ /* The VFTA is a 4096b bit-field, each identifying a single VLAN ++ * ID. The following operations determine which 32b entry ++ * (i.e. offset) into the array we want to set the VLAN ID ++ * (i.e. bit) of the manageability unit. */ ++ vfta_offset = (hw->mng_cookie.vlan_id >> ++ E1000_VFTA_ENTRY_SHIFT) & ++ E1000_VFTA_ENTRY_MASK; ++ vfta_bit_in_reg = 1 << (hw->mng_cookie.vlan_id & ++ E1000_VFTA_ENTRY_BIT_SHIFT_MASK); ++ } ++ } ++ for (offset = 0; offset < E1000_VLAN_FILTER_TBL_SIZE; offset++) { ++ /* If the offset we want to clear is the same offset of the ++ * manageability VLAN ID, then clear all bits except that of the ++ * manageability unit */ ++ vfta_value = (offset == vfta_offset) ? vfta_bit_in_reg : 0; ++ E1000_WRITE_REG_ARRAY(hw, VFTA, offset, vfta_value); ++ E1000_WRITE_FLUSH(hw); ++ } ++} ++ ++int32_t ++e1000_id_led_init(struct e1000_hw * hw) ++{ ++ uint32_t ledctl; ++ const uint32_t ledctl_mask = 0x000000FF; ++ const uint32_t ledctl_on = E1000_LEDCTL_MODE_LED_ON; ++ const uint32_t ledctl_off = E1000_LEDCTL_MODE_LED_OFF; ++ uint16_t eeprom_data, i, temp; ++ const uint16_t led_mask = 0x0F; ++ ++ DEBUGFUNC("e1000_id_led_init"); ++ ++ if (hw->mac_type < e1000_82540) { ++ /* Nothing to do */ ++ return E1000_SUCCESS; ++ } ++ ++ ledctl = E1000_READ_REG(hw, LEDCTL); ++ hw->ledctl_default = ledctl; ++ hw->ledctl_mode1 = hw->ledctl_default; ++ hw->ledctl_mode2 = hw->ledctl_default; ++ ++ if (e1000_read_eeprom(hw, EEPROM_ID_LED_SETTINGS, 1, &eeprom_data) < 0) { ++ DEBUGOUT("EEPROM Read Error\n"); ++ return -E1000_ERR_EEPROM; ++ } ++ ++ if ((hw->mac_type == e1000_82573) && ++ (eeprom_data == ID_LED_RESERVED_82573)) ++ eeprom_data = ID_LED_DEFAULT_82573; ++ else if ((eeprom_data == ID_LED_RESERVED_0000) || ++ (eeprom_data == ID_LED_RESERVED_FFFF)) { ++ if (hw->mac_type == e1000_ich8lan) ++ eeprom_data = ID_LED_DEFAULT_ICH8LAN; ++ else ++ eeprom_data = ID_LED_DEFAULT; ++ } ++ for (i = 0; i < 4; i++) { ++ temp = (eeprom_data >> (i << 2)) & led_mask; ++ switch (temp) { ++ case ID_LED_ON1_DEF2: ++ case ID_LED_ON1_ON2: ++ case ID_LED_ON1_OFF2: ++ hw->ledctl_mode1 &= ~(ledctl_mask << (i << 3)); ++ hw->ledctl_mode1 |= ledctl_on << (i << 3); ++ break; ++ case ID_LED_OFF1_DEF2: ++ case ID_LED_OFF1_ON2: ++ case ID_LED_OFF1_OFF2: ++ hw->ledctl_mode1 &= ~(ledctl_mask << (i << 3)); ++ hw->ledctl_mode1 |= ledctl_off << (i << 3); ++ break; ++ default: ++ /* Do nothing */ ++ break; ++ } ++ switch (temp) { ++ case ID_LED_DEF1_ON2: ++ case ID_LED_ON1_ON2: ++ case ID_LED_OFF1_ON2: ++ hw->ledctl_mode2 &= ~(ledctl_mask << (i << 3)); ++ hw->ledctl_mode2 |= ledctl_on << (i << 3); ++ break; ++ case ID_LED_DEF1_OFF2: ++ case ID_LED_ON1_OFF2: ++ case ID_LED_OFF1_OFF2: ++ hw->ledctl_mode2 &= ~(ledctl_mask << (i << 3)); ++ hw->ledctl_mode2 |= ledctl_off << (i << 3); ++ break; ++ default: ++ /* Do nothing */ ++ break; ++ } ++ } ++ return E1000_SUCCESS; ++} ++ ++/****************************************************************************** ++ * Prepares SW controlable LED for use and saves the current state of the LED. ++ * ++ * hw - Struct containing variables accessed by shared code ++ *****************************************************************************/ ++int32_t ++e1000_setup_led(struct e1000_hw *hw) ++{ ++ uint32_t ledctl; ++ int32_t ret_val = E1000_SUCCESS; ++ ++ DEBUGFUNC("e1000_setup_led"); ++ ++ switch (hw->mac_type) { ++ case e1000_82542_rev2_0: ++ case e1000_82542_rev2_1: ++ case e1000_82543: ++ case e1000_82544: ++ /* No setup necessary */ ++ break; ++ case e1000_82541: ++ case e1000_82547: ++ case e1000_82541_rev_2: ++ case e1000_82547_rev_2: ++ /* Turn off PHY Smart Power Down (if enabled) */ ++ ret_val = e1000_read_phy_reg(hw, IGP01E1000_GMII_FIFO, ++ &hw->phy_spd_default); ++ if (ret_val) ++ return ret_val; ++ ret_val = e1000_write_phy_reg(hw, IGP01E1000_GMII_FIFO, ++ (uint16_t)(hw->phy_spd_default & ++ ~IGP01E1000_GMII_SPD)); ++ if (ret_val) ++ return ret_val; ++ /* Fall Through */ ++ default: ++ if (hw->media_type == e1000_media_type_fiber) { ++ ledctl = E1000_READ_REG(hw, LEDCTL); ++ /* Save current LEDCTL settings */ ++ hw->ledctl_default = ledctl; ++ /* Turn off LED0 */ ++ ledctl &= ~(E1000_LEDCTL_LED0_IVRT | ++ E1000_LEDCTL_LED0_BLINK | ++ E1000_LEDCTL_LED0_MODE_MASK); ++ ledctl |= (E1000_LEDCTL_MODE_LED_OFF << ++ E1000_LEDCTL_LED0_MODE_SHIFT); ++ E1000_WRITE_REG(hw, LEDCTL, ledctl); ++ } else if (hw->media_type == e1000_media_type_copper) ++ E1000_WRITE_REG(hw, LEDCTL, hw->ledctl_mode1); ++ break; ++ } ++ ++ return E1000_SUCCESS; ++} ++ ++ ++/****************************************************************************** ++ * Used on 82571 and later Si that has LED blink bits. ++ * Callers must use their own timer and should have already called ++ * e1000_id_led_init() ++ * Call e1000_cleanup led() to stop blinking ++ * ++ * hw - Struct containing variables accessed by shared code ++ *****************************************************************************/ ++int32_t ++e1000_blink_led_start(struct e1000_hw *hw) ++{ ++ int16_t i; ++ uint32_t ledctl_blink = 0; ++ ++ DEBUGFUNC("e1000_id_led_blink_on"); ++ ++ if (hw->mac_type < e1000_82571) { ++ /* Nothing to do */ ++ return E1000_SUCCESS; ++ } ++ if (hw->media_type == e1000_media_type_fiber) { ++ /* always blink LED0 for PCI-E fiber */ ++ ledctl_blink = E1000_LEDCTL_LED0_BLINK | ++ (E1000_LEDCTL_MODE_LED_ON << E1000_LEDCTL_LED0_MODE_SHIFT); ++ } else { ++ /* set the blink bit for each LED that's "on" (0x0E) in ledctl_mode2 */ ++ ledctl_blink = hw->ledctl_mode2; ++ for (i=0; i < 4; i++) ++ if (((hw->ledctl_mode2 >> (i * 8)) & 0xFF) == ++ E1000_LEDCTL_MODE_LED_ON) ++ ledctl_blink |= (E1000_LEDCTL_LED0_BLINK << (i * 8)); ++ } ++ ++ E1000_WRITE_REG(hw, LEDCTL, ledctl_blink); ++ ++ return E1000_SUCCESS; ++} ++ ++/****************************************************************************** ++ * Restores the saved state of the SW controlable LED. ++ * ++ * hw - Struct containing variables accessed by shared code ++ *****************************************************************************/ ++int32_t ++e1000_cleanup_led(struct e1000_hw *hw) ++{ ++ int32_t ret_val = E1000_SUCCESS; ++ ++ DEBUGFUNC("e1000_cleanup_led"); ++ ++ switch (hw->mac_type) { ++ case e1000_82542_rev2_0: ++ case e1000_82542_rev2_1: ++ case e1000_82543: ++ case e1000_82544: ++ /* No cleanup necessary */ ++ break; ++ case e1000_82541: ++ case e1000_82547: ++ case e1000_82541_rev_2: ++ case e1000_82547_rev_2: ++ /* Turn on PHY Smart Power Down (if previously enabled) */ ++ ret_val = e1000_write_phy_reg(hw, IGP01E1000_GMII_FIFO, ++ hw->phy_spd_default); ++ if (ret_val) ++ return ret_val; ++ /* Fall Through */ ++ default: ++ if (hw->phy_type == e1000_phy_ife) { ++ e1000_write_phy_reg(hw, IFE_PHY_SPECIAL_CONTROL_LED, 0); ++ break; ++ } ++ /* Restore LEDCTL settings */ ++ E1000_WRITE_REG(hw, LEDCTL, hw->ledctl_default); ++ break; ++ } ++ ++ return E1000_SUCCESS; ++} ++ ++/****************************************************************************** ++ * Turns on the software controllable LED ++ * ++ * hw - Struct containing variables accessed by shared code ++ *****************************************************************************/ ++int32_t ++e1000_led_on(struct e1000_hw *hw) ++{ ++ uint32_t ctrl = E1000_READ_REG(hw, CTRL); ++ ++ DEBUGFUNC("e1000_led_on"); ++ ++ switch (hw->mac_type) { ++ case e1000_82542_rev2_0: ++ case e1000_82542_rev2_1: ++ case e1000_82543: ++ /* Set SW Defineable Pin 0 to turn on the LED */ ++ ctrl |= E1000_CTRL_SWDPIN0; ++ ctrl |= E1000_CTRL_SWDPIO0; ++ break; ++ case e1000_82544: ++ if (hw->media_type == e1000_media_type_fiber) { ++ /* Set SW Defineable Pin 0 to turn on the LED */ ++ ctrl |= E1000_CTRL_SWDPIN0; ++ ctrl |= E1000_CTRL_SWDPIO0; ++ } else { ++ /* Clear SW Defineable Pin 0 to turn on the LED */ ++ ctrl &= ~E1000_CTRL_SWDPIN0; ++ ctrl |= E1000_CTRL_SWDPIO0; ++ } ++ break; ++ default: ++ if (hw->media_type == e1000_media_type_fiber) { ++ /* Clear SW Defineable Pin 0 to turn on the LED */ ++ ctrl &= ~E1000_CTRL_SWDPIN0; ++ ctrl |= E1000_CTRL_SWDPIO0; ++ } else if (hw->phy_type == e1000_phy_ife) { ++ e1000_write_phy_reg(hw, IFE_PHY_SPECIAL_CONTROL_LED, ++ (IFE_PSCL_PROBE_MODE | IFE_PSCL_PROBE_LEDS_ON)); ++ } else if (hw->media_type == e1000_media_type_copper) { ++ E1000_WRITE_REG(hw, LEDCTL, hw->ledctl_mode2); ++ return E1000_SUCCESS; ++ } ++ break; ++ } ++ ++ E1000_WRITE_REG(hw, CTRL, ctrl); ++ ++ return E1000_SUCCESS; ++} ++ ++/****************************************************************************** ++ * Turns off the software controllable LED ++ * ++ * hw - Struct containing variables accessed by shared code ++ *****************************************************************************/ ++int32_t ++e1000_led_off(struct e1000_hw *hw) ++{ ++ uint32_t ctrl = E1000_READ_REG(hw, CTRL); ++ ++ DEBUGFUNC("e1000_led_off"); ++ ++ switch (hw->mac_type) { ++ case e1000_82542_rev2_0: ++ case e1000_82542_rev2_1: ++ case e1000_82543: ++ /* Clear SW Defineable Pin 0 to turn off the LED */ ++ ctrl &= ~E1000_CTRL_SWDPIN0; ++ ctrl |= E1000_CTRL_SWDPIO0; ++ break; ++ case e1000_82544: ++ if (hw->media_type == e1000_media_type_fiber) { ++ /* Clear SW Defineable Pin 0 to turn off the LED */ ++ ctrl &= ~E1000_CTRL_SWDPIN0; ++ ctrl |= E1000_CTRL_SWDPIO0; ++ } else { ++ /* Set SW Defineable Pin 0 to turn off the LED */ ++ ctrl |= E1000_CTRL_SWDPIN0; ++ ctrl |= E1000_CTRL_SWDPIO0; ++ } ++ break; ++ default: ++ if (hw->media_type == e1000_media_type_fiber) { ++ /* Set SW Defineable Pin 0 to turn off the LED */ ++ ctrl |= E1000_CTRL_SWDPIN0; ++ ctrl |= E1000_CTRL_SWDPIO0; ++ } else if (hw->phy_type == e1000_phy_ife) { ++ e1000_write_phy_reg(hw, IFE_PHY_SPECIAL_CONTROL_LED, ++ (IFE_PSCL_PROBE_MODE | IFE_PSCL_PROBE_LEDS_OFF)); ++ } else if (hw->media_type == e1000_media_type_copper) { ++ E1000_WRITE_REG(hw, LEDCTL, hw->ledctl_mode1); ++ return E1000_SUCCESS; ++ } ++ break; ++ } ++ ++ E1000_WRITE_REG(hw, CTRL, ctrl); ++ ++ return E1000_SUCCESS; ++} ++ ++/****************************************************************************** ++ * Clears all hardware statistics counters. ++ * ++ * hw - Struct containing variables accessed by shared code ++ *****************************************************************************/ ++void ++e1000_clear_hw_cntrs(struct e1000_hw *hw) ++{ ++ volatile uint32_t temp; ++ ++ temp = E1000_READ_REG(hw, CRCERRS); ++ temp = E1000_READ_REG(hw, SYMERRS); ++ temp = E1000_READ_REG(hw, MPC); ++ temp = E1000_READ_REG(hw, SCC); ++ temp = E1000_READ_REG(hw, ECOL); ++ temp = E1000_READ_REG(hw, MCC); ++ temp = E1000_READ_REG(hw, LATECOL); ++ temp = E1000_READ_REG(hw, COLC); ++ temp = E1000_READ_REG(hw, DC); ++ temp = E1000_READ_REG(hw, SEC); ++ temp = E1000_READ_REG(hw, RLEC); ++ temp = E1000_READ_REG(hw, XONRXC); ++ temp = E1000_READ_REG(hw, XONTXC); ++ temp = E1000_READ_REG(hw, XOFFRXC); ++ temp = E1000_READ_REG(hw, XOFFTXC); ++ temp = E1000_READ_REG(hw, FCRUC); ++ ++ if (hw->mac_type != e1000_ich8lan) { ++ temp = E1000_READ_REG(hw, PRC64); ++ temp = E1000_READ_REG(hw, PRC127); ++ temp = E1000_READ_REG(hw, PRC255); ++ temp = E1000_READ_REG(hw, PRC511); ++ temp = E1000_READ_REG(hw, PRC1023); ++ temp = E1000_READ_REG(hw, PRC1522); ++ } ++ ++ temp = E1000_READ_REG(hw, GPRC); ++ temp = E1000_READ_REG(hw, BPRC); ++ temp = E1000_READ_REG(hw, MPRC); ++ temp = E1000_READ_REG(hw, GPTC); ++ temp = E1000_READ_REG(hw, GORCL); ++ temp = E1000_READ_REG(hw, GORCH); ++ temp = E1000_READ_REG(hw, GOTCL); ++ temp = E1000_READ_REG(hw, GOTCH); ++ temp = E1000_READ_REG(hw, RNBC); ++ temp = E1000_READ_REG(hw, RUC); ++ temp = E1000_READ_REG(hw, RFC); ++ temp = E1000_READ_REG(hw, ROC); ++ temp = E1000_READ_REG(hw, RJC); ++ temp = E1000_READ_REG(hw, TORL); ++ temp = E1000_READ_REG(hw, TORH); ++ temp = E1000_READ_REG(hw, TOTL); ++ temp = E1000_READ_REG(hw, TOTH); ++ temp = E1000_READ_REG(hw, TPR); ++ temp = E1000_READ_REG(hw, TPT); ++ ++ if (hw->mac_type != e1000_ich8lan) { ++ temp = E1000_READ_REG(hw, PTC64); ++ temp = E1000_READ_REG(hw, PTC127); ++ temp = E1000_READ_REG(hw, PTC255); ++ temp = E1000_READ_REG(hw, PTC511); ++ temp = E1000_READ_REG(hw, PTC1023); ++ temp = E1000_READ_REG(hw, PTC1522); ++ } ++ ++ temp = E1000_READ_REG(hw, MPTC); ++ temp = E1000_READ_REG(hw, BPTC); ++ ++ if (hw->mac_type < e1000_82543) return; ++ ++ temp = E1000_READ_REG(hw, ALGNERRC); ++ temp = E1000_READ_REG(hw, RXERRC); ++ temp = E1000_READ_REG(hw, TNCRS); ++ temp = E1000_READ_REG(hw, CEXTERR); ++ temp = E1000_READ_REG(hw, TSCTC); ++ temp = E1000_READ_REG(hw, TSCTFC); ++ ++ if (hw->mac_type <= e1000_82544) return; ++ ++ temp = E1000_READ_REG(hw, MGTPRC); ++ temp = E1000_READ_REG(hw, MGTPDC); ++ temp = E1000_READ_REG(hw, MGTPTC); ++ ++ if (hw->mac_type <= e1000_82547_rev_2) return; ++ ++ temp = E1000_READ_REG(hw, IAC); ++ temp = E1000_READ_REG(hw, ICRXOC); ++ ++ if (hw->mac_type == e1000_ich8lan) return; ++ ++ temp = E1000_READ_REG(hw, ICRXPTC); ++ temp = E1000_READ_REG(hw, ICRXATC); ++ temp = E1000_READ_REG(hw, ICTXPTC); ++ temp = E1000_READ_REG(hw, ICTXATC); ++ temp = E1000_READ_REG(hw, ICTXQEC); ++ temp = E1000_READ_REG(hw, ICTXQMTC); ++ temp = E1000_READ_REG(hw, ICRXDMTC); ++} ++ ++/****************************************************************************** ++ * Resets Adaptive IFS to its default state. ++ * ++ * hw - Struct containing variables accessed by shared code ++ * ++ * Call this after e1000_init_hw. You may override the IFS defaults by setting ++ * hw->ifs_params_forced to TRUE. However, you must initialize hw-> ++ * current_ifs_val, ifs_min_val, ifs_max_val, ifs_step_size, and ifs_ratio ++ * before calling this function. ++ *****************************************************************************/ ++void ++e1000_reset_adaptive(struct e1000_hw *hw) ++{ ++ DEBUGFUNC("e1000_reset_adaptive"); ++ ++ if (hw->adaptive_ifs) { ++ if (!hw->ifs_params_forced) { ++ hw->current_ifs_val = 0; ++ hw->ifs_min_val = IFS_MIN; ++ hw->ifs_max_val = IFS_MAX; ++ hw->ifs_step_size = IFS_STEP; ++ hw->ifs_ratio = IFS_RATIO; ++ } ++ hw->in_ifs_mode = FALSE; ++ E1000_WRITE_REG(hw, AIT, 0); ++ } else { ++ DEBUGOUT("Not in Adaptive IFS mode!\n"); ++ } ++} ++ ++/****************************************************************************** ++ * Called during the callback/watchdog routine to update IFS value based on ++ * the ratio of transmits to collisions. ++ * ++ * hw - Struct containing variables accessed by shared code ++ * tx_packets - Number of transmits since last callback ++ * total_collisions - Number of collisions since last callback ++ *****************************************************************************/ ++void ++e1000_update_adaptive(struct e1000_hw *hw) ++{ ++ DEBUGFUNC("e1000_update_adaptive"); ++ ++ if (hw->adaptive_ifs) { ++ if ((hw->collision_delta * hw->ifs_ratio) > hw->tx_packet_delta) { ++ if (hw->tx_packet_delta > MIN_NUM_XMITS) { ++ hw->in_ifs_mode = TRUE; ++ if (hw->current_ifs_val < hw->ifs_max_val) { ++ if (hw->current_ifs_val == 0) ++ hw->current_ifs_val = hw->ifs_min_val; ++ else ++ hw->current_ifs_val += hw->ifs_step_size; ++ E1000_WRITE_REG(hw, AIT, hw->current_ifs_val); ++ } ++ } ++ } else { ++ if (hw->in_ifs_mode && (hw->tx_packet_delta <= MIN_NUM_XMITS)) { ++ hw->current_ifs_val = 0; ++ hw->in_ifs_mode = FALSE; ++ E1000_WRITE_REG(hw, AIT, 0); ++ } ++ } ++ } else { ++ DEBUGOUT("Not in Adaptive IFS mode!\n"); ++ } ++} ++ ++/****************************************************************************** ++ * Adjusts the statistic counters when a frame is accepted by TBI_ACCEPT ++ * ++ * hw - Struct containing variables accessed by shared code ++ * frame_len - The length of the frame in question ++ * mac_addr - The Ethernet destination address of the frame in question ++ *****************************************************************************/ ++void ++e1000_tbi_adjust_stats(struct e1000_hw *hw, ++ struct e1000_hw_stats *stats, ++ uint32_t frame_len, ++ uint8_t *mac_addr) ++{ ++ uint64_t carry_bit; ++ ++ /* First adjust the frame length. */ ++ frame_len--; ++ /* We need to adjust the statistics counters, since the hardware ++ * counters overcount this packet as a CRC error and undercount ++ * the packet as a good packet ++ */ ++ /* This packet should not be counted as a CRC error. */ ++ stats->crcerrs--; ++ /* This packet does count as a Good Packet Received. */ ++ stats->gprc++; ++ ++ /* Adjust the Good Octets received counters */ ++ carry_bit = 0x80000000 & stats->gorcl; ++ stats->gorcl += frame_len; ++ /* If the high bit of Gorcl (the low 32 bits of the Good Octets ++ * Received Count) was one before the addition, ++ * AND it is zero after, then we lost the carry out, ++ * need to add one to Gorch (Good Octets Received Count High). ++ * This could be simplified if all environments supported ++ * 64-bit integers. ++ */ ++ if (carry_bit && ((stats->gorcl & 0x80000000) == 0)) ++ stats->gorch++; ++ /* Is this a broadcast or multicast? Check broadcast first, ++ * since the test for a multicast frame will test positive on ++ * a broadcast frame. ++ */ ++ if ((mac_addr[0] == (uint8_t) 0xff) && (mac_addr[1] == (uint8_t) 0xff)) ++ /* Broadcast packet */ ++ stats->bprc++; ++ else if (*mac_addr & 0x01) ++ /* Multicast packet */ ++ stats->mprc++; ++ ++ if (frame_len == hw->max_frame_size) { ++ /* In this case, the hardware has overcounted the number of ++ * oversize frames. ++ */ ++ if (stats->roc > 0) ++ stats->roc--; ++ } ++ ++ /* Adjust the bin counters when the extra byte put the frame in the ++ * wrong bin. Remember that the frame_len was adjusted above. ++ */ ++ if (frame_len == 64) { ++ stats->prc64++; ++ stats->prc127--; ++ } else if (frame_len == 127) { ++ stats->prc127++; ++ stats->prc255--; ++ } else if (frame_len == 255) { ++ stats->prc255++; ++ stats->prc511--; ++ } else if (frame_len == 511) { ++ stats->prc511++; ++ stats->prc1023--; ++ } else if (frame_len == 1023) { ++ stats->prc1023++; ++ stats->prc1522--; ++ } else if (frame_len == 1522) { ++ stats->prc1522++; ++ } ++} ++ ++/****************************************************************************** ++ * Gets the current PCI bus type, speed, and width of the hardware ++ * ++ * hw - Struct containing variables accessed by shared code ++ *****************************************************************************/ ++void ++e1000_get_bus_info(struct e1000_hw *hw) ++{ ++ uint32_t status; ++ ++ switch (hw->mac_type) { ++ case e1000_82542_rev2_0: ++ case e1000_82542_rev2_1: ++ hw->bus_type = e1000_bus_type_unknown; ++ hw->bus_speed = e1000_bus_speed_unknown; ++ hw->bus_width = e1000_bus_width_unknown; ++ break; ++ case e1000_82572: ++ case e1000_82573: ++ hw->bus_type = e1000_bus_type_pci_express; ++ hw->bus_speed = e1000_bus_speed_2500; ++ hw->bus_width = e1000_bus_width_pciex_1; ++ break; ++ case e1000_82571: ++ case e1000_ich8lan: ++ case e1000_80003es2lan: ++ hw->bus_type = e1000_bus_type_pci_express; ++ hw->bus_speed = e1000_bus_speed_2500; ++ hw->bus_width = e1000_bus_width_pciex_4; ++ break; ++ default: ++ status = E1000_READ_REG(hw, STATUS); ++ hw->bus_type = (status & E1000_STATUS_PCIX_MODE) ? ++ e1000_bus_type_pcix : e1000_bus_type_pci; ++ ++ if (hw->device_id == E1000_DEV_ID_82546EB_QUAD_COPPER) { ++ hw->bus_speed = (hw->bus_type == e1000_bus_type_pci) ? ++ e1000_bus_speed_66 : e1000_bus_speed_120; ++ } else if (hw->bus_type == e1000_bus_type_pci) { ++ hw->bus_speed = (status & E1000_STATUS_PCI66) ? ++ e1000_bus_speed_66 : e1000_bus_speed_33; ++ } else { ++ switch (status & E1000_STATUS_PCIX_SPEED) { ++ case E1000_STATUS_PCIX_SPEED_66: ++ hw->bus_speed = e1000_bus_speed_66; ++ break; ++ case E1000_STATUS_PCIX_SPEED_100: ++ hw->bus_speed = e1000_bus_speed_100; ++ break; ++ case E1000_STATUS_PCIX_SPEED_133: ++ hw->bus_speed = e1000_bus_speed_133; ++ break; ++ default: ++ hw->bus_speed = e1000_bus_speed_reserved; ++ break; ++ } ++ } ++ hw->bus_width = (status & E1000_STATUS_BUS64) ? ++ e1000_bus_width_64 : e1000_bus_width_32; ++ break; ++ } ++} ++/****************************************************************************** ++ * Reads a value from one of the devices registers using port I/O (as opposed ++ * memory mapped I/O). Only 82544 and newer devices support port I/O. ++ * ++ * hw - Struct containing variables accessed by shared code ++ * offset - offset to read from ++ *****************************************************************************/ ++uint32_t ++e1000_read_reg_io(struct e1000_hw *hw, ++ uint32_t offset) ++{ ++ unsigned long io_addr = hw->io_base; ++ unsigned long io_data = hw->io_base + 4; ++ ++ e1000_io_write(hw, io_addr, offset); ++ return e1000_io_read(hw, io_data); ++} ++ ++/****************************************************************************** ++ * Writes a value to one of the devices registers using port I/O (as opposed to ++ * memory mapped I/O). Only 82544 and newer devices support port I/O. ++ * ++ * hw - Struct containing variables accessed by shared code ++ * offset - offset to write to ++ * value - value to write ++ *****************************************************************************/ ++void ++e1000_write_reg_io(struct e1000_hw *hw, ++ uint32_t offset, ++ uint32_t value) ++{ ++ unsigned long io_addr = hw->io_base; ++ unsigned long io_data = hw->io_base + 4; ++ ++ e1000_io_write(hw, io_addr, offset); ++ e1000_io_write(hw, io_data, value); ++} ++ ++ ++/****************************************************************************** ++ * Estimates the cable length. ++ * ++ * hw - Struct containing variables accessed by shared code ++ * min_length - The estimated minimum length ++ * max_length - The estimated maximum length ++ * ++ * returns: - E1000_ERR_XXX ++ * E1000_SUCCESS ++ * ++ * This function always returns a ranged length (minimum & maximum). ++ * So for M88 phy's, this function interprets the one value returned from the ++ * register to the minimum and maximum range. ++ * For IGP phy's, the function calculates the range by the AGC registers. ++ *****************************************************************************/ ++int32_t ++e1000_get_cable_length(struct e1000_hw *hw, ++ uint16_t *min_length, ++ uint16_t *max_length) ++{ ++ int32_t ret_val; ++ uint16_t agc_value = 0; ++ uint16_t i, phy_data; ++ uint16_t cable_length; ++ ++ DEBUGFUNC("e1000_get_cable_length"); ++ ++ *min_length = *max_length = 0; ++ ++ /* Use old method for Phy older than IGP */ ++ if (hw->phy_type == e1000_phy_m88) { ++ ++ ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_STATUS, ++ &phy_data); ++ if (ret_val) ++ return ret_val; ++ cable_length = (phy_data & M88E1000_PSSR_CABLE_LENGTH) >> ++ M88E1000_PSSR_CABLE_LENGTH_SHIFT; ++ ++ /* Convert the enum value to ranged values */ ++ switch (cable_length) { ++ case e1000_cable_length_50: ++ *min_length = 0; ++ *max_length = e1000_igp_cable_length_50; ++ break; ++ case e1000_cable_length_50_80: ++ *min_length = e1000_igp_cable_length_50; ++ *max_length = e1000_igp_cable_length_80; ++ break; ++ case e1000_cable_length_80_110: ++ *min_length = e1000_igp_cable_length_80; ++ *max_length = e1000_igp_cable_length_110; ++ break; ++ case e1000_cable_length_110_140: ++ *min_length = e1000_igp_cable_length_110; ++ *max_length = e1000_igp_cable_length_140; ++ break; ++ case e1000_cable_length_140: ++ *min_length = e1000_igp_cable_length_140; ++ *max_length = e1000_igp_cable_length_170; ++ break; ++ default: ++ return -E1000_ERR_PHY; ++ break; ++ } ++ } else if (hw->phy_type == e1000_phy_gg82563) { ++ ret_val = e1000_read_phy_reg(hw, GG82563_PHY_DSP_DISTANCE, ++ &phy_data); ++ if (ret_val) ++ return ret_val; ++ cable_length = phy_data & GG82563_DSPD_CABLE_LENGTH; ++ ++ switch (cable_length) { ++ case e1000_gg_cable_length_60: ++ *min_length = 0; ++ *max_length = e1000_igp_cable_length_60; ++ break; ++ case e1000_gg_cable_length_60_115: ++ *min_length = e1000_igp_cable_length_60; ++ *max_length = e1000_igp_cable_length_115; ++ break; ++ case e1000_gg_cable_length_115_150: ++ *min_length = e1000_igp_cable_length_115; ++ *max_length = e1000_igp_cable_length_150; ++ break; ++ case e1000_gg_cable_length_150: ++ *min_length = e1000_igp_cable_length_150; ++ *max_length = e1000_igp_cable_length_180; ++ break; ++ default: ++ return -E1000_ERR_PHY; ++ break; ++ } ++ } else if (hw->phy_type == e1000_phy_igp) { /* For IGP PHY */ ++ uint16_t cur_agc_value; ++ uint16_t min_agc_value = IGP01E1000_AGC_LENGTH_TABLE_SIZE; ++ uint16_t agc_reg_array[IGP01E1000_PHY_CHANNEL_NUM] = ++ {IGP01E1000_PHY_AGC_A, ++ IGP01E1000_PHY_AGC_B, ++ IGP01E1000_PHY_AGC_C, ++ IGP01E1000_PHY_AGC_D}; ++ /* Read the AGC registers for all channels */ ++ for (i = 0; i < IGP01E1000_PHY_CHANNEL_NUM; i++) { ++ ++ ret_val = e1000_read_phy_reg(hw, agc_reg_array[i], &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ cur_agc_value = phy_data >> IGP01E1000_AGC_LENGTH_SHIFT; ++ ++ /* Value bound check. */ ++ if ((cur_agc_value >= IGP01E1000_AGC_LENGTH_TABLE_SIZE - 1) || ++ (cur_agc_value == 0)) ++ return -E1000_ERR_PHY; ++ ++ agc_value += cur_agc_value; ++ ++ /* Update minimal AGC value. */ ++ if (min_agc_value > cur_agc_value) ++ min_agc_value = cur_agc_value; ++ } ++ ++ /* Remove the minimal AGC result for length < 50m */ ++ if (agc_value < IGP01E1000_PHY_CHANNEL_NUM * e1000_igp_cable_length_50) { ++ agc_value -= min_agc_value; ++ ++ /* Get the average length of the remaining 3 channels */ ++ agc_value /= (IGP01E1000_PHY_CHANNEL_NUM - 1); ++ } else { ++ /* Get the average length of all the 4 channels. */ ++ agc_value /= IGP01E1000_PHY_CHANNEL_NUM; ++ } ++ ++ /* Set the range of the calculated length. */ ++ *min_length = ((e1000_igp_cable_length_table[agc_value] - ++ IGP01E1000_AGC_RANGE) > 0) ? ++ (e1000_igp_cable_length_table[agc_value] - ++ IGP01E1000_AGC_RANGE) : 0; ++ *max_length = e1000_igp_cable_length_table[agc_value] + ++ IGP01E1000_AGC_RANGE; ++ } else if (hw->phy_type == e1000_phy_igp_2 || ++ hw->phy_type == e1000_phy_igp_3) { ++ uint16_t cur_agc_index, max_agc_index = 0; ++ uint16_t min_agc_index = IGP02E1000_AGC_LENGTH_TABLE_SIZE - 1; ++ uint16_t agc_reg_array[IGP02E1000_PHY_CHANNEL_NUM] = ++ {IGP02E1000_PHY_AGC_A, ++ IGP02E1000_PHY_AGC_B, ++ IGP02E1000_PHY_AGC_C, ++ IGP02E1000_PHY_AGC_D}; ++ /* Read the AGC registers for all channels */ ++ for (i = 0; i < IGP02E1000_PHY_CHANNEL_NUM; i++) { ++ ret_val = e1000_read_phy_reg(hw, agc_reg_array[i], &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ /* Getting bits 15:9, which represent the combination of course and ++ * fine gain values. The result is a number that can be put into ++ * the lookup table to obtain the approximate cable length. */ ++ cur_agc_index = (phy_data >> IGP02E1000_AGC_LENGTH_SHIFT) & ++ IGP02E1000_AGC_LENGTH_MASK; ++ ++ /* Array index bound check. */ ++ if ((cur_agc_index >= IGP02E1000_AGC_LENGTH_TABLE_SIZE) || ++ (cur_agc_index == 0)) ++ return -E1000_ERR_PHY; ++ ++ /* Remove min & max AGC values from calculation. */ ++ if (e1000_igp_2_cable_length_table[min_agc_index] > ++ e1000_igp_2_cable_length_table[cur_agc_index]) ++ min_agc_index = cur_agc_index; ++ if (e1000_igp_2_cable_length_table[max_agc_index] < ++ e1000_igp_2_cable_length_table[cur_agc_index]) ++ max_agc_index = cur_agc_index; ++ ++ agc_value += e1000_igp_2_cable_length_table[cur_agc_index]; ++ } ++ ++ agc_value -= (e1000_igp_2_cable_length_table[min_agc_index] + ++ e1000_igp_2_cable_length_table[max_agc_index]); ++ agc_value /= (IGP02E1000_PHY_CHANNEL_NUM - 2); ++ ++ /* Calculate cable length with the error range of +/- 10 meters. */ ++ *min_length = ((agc_value - IGP02E1000_AGC_RANGE) > 0) ? ++ (agc_value - IGP02E1000_AGC_RANGE) : 0; ++ *max_length = agc_value + IGP02E1000_AGC_RANGE; ++ } ++ ++ return E1000_SUCCESS; ++} ++ ++/****************************************************************************** ++ * Check the cable polarity ++ * ++ * hw - Struct containing variables accessed by shared code ++ * polarity - output parameter : 0 - Polarity is not reversed ++ * 1 - Polarity is reversed. ++ * ++ * returns: - E1000_ERR_XXX ++ * E1000_SUCCESS ++ * ++ * For phy's older then IGP, this function simply reads the polarity bit in the ++ * Phy Status register. For IGP phy's, this bit is valid only if link speed is ++ * 10 Mbps. If the link speed is 100 Mbps there is no polarity so this bit will ++ * return 0. If the link speed is 1000 Mbps the polarity status is in the ++ * IGP01E1000_PHY_PCS_INIT_REG. ++ *****************************************************************************/ ++int32_t ++e1000_check_polarity(struct e1000_hw *hw, ++ uint16_t *polarity) ++{ ++ int32_t ret_val; ++ uint16_t phy_data; ++ ++ DEBUGFUNC("e1000_check_polarity"); ++ ++ if ((hw->phy_type == e1000_phy_m88) || ++ (hw->phy_type == e1000_phy_gg82563)) { ++ /* return the Polarity bit in the Status register. */ ++ ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_STATUS, ++ &phy_data); ++ if (ret_val) ++ return ret_val; ++ *polarity = (phy_data & M88E1000_PSSR_REV_POLARITY) >> ++ M88E1000_PSSR_REV_POLARITY_SHIFT; ++ } else if (hw->phy_type == e1000_phy_igp || ++ hw->phy_type == e1000_phy_igp_3 || ++ hw->phy_type == e1000_phy_igp_2) { ++ /* Read the Status register to check the speed */ ++ ret_val = e1000_read_phy_reg(hw, IGP01E1000_PHY_PORT_STATUS, ++ &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ /* If speed is 1000 Mbps, must read the IGP01E1000_PHY_PCS_INIT_REG to ++ * find the polarity status */ ++ if ((phy_data & IGP01E1000_PSSR_SPEED_MASK) == ++ IGP01E1000_PSSR_SPEED_1000MBPS) { ++ ++ /* Read the GIG initialization PCS register (0x00B4) */ ++ ret_val = e1000_read_phy_reg(hw, IGP01E1000_PHY_PCS_INIT_REG, ++ &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ /* Check the polarity bits */ ++ *polarity = (phy_data & IGP01E1000_PHY_POLARITY_MASK) ? 1 : 0; ++ } else { ++ /* For 10 Mbps, read the polarity bit in the status register. (for ++ * 100 Mbps this bit is always 0) */ ++ *polarity = phy_data & IGP01E1000_PSSR_POLARITY_REVERSED; ++ } ++ } else if (hw->phy_type == e1000_phy_ife) { ++ ret_val = e1000_read_phy_reg(hw, IFE_PHY_EXTENDED_STATUS_CONTROL, ++ &phy_data); ++ if (ret_val) ++ return ret_val; ++ *polarity = (phy_data & IFE_PESC_POLARITY_REVERSED) >> ++ IFE_PESC_POLARITY_REVERSED_SHIFT; ++ } ++ return E1000_SUCCESS; ++} ++ ++/****************************************************************************** ++ * Check if Downshift occured ++ * ++ * hw - Struct containing variables accessed by shared code ++ * downshift - output parameter : 0 - No Downshift ocured. ++ * 1 - Downshift ocured. ++ * ++ * returns: - E1000_ERR_XXX ++ * E1000_SUCCESS ++ * ++ * For phy's older then IGP, this function reads the Downshift bit in the Phy ++ * Specific Status register. For IGP phy's, it reads the Downgrade bit in the ++ * Link Health register. In IGP this bit is latched high, so the driver must ++ * read it immediately after link is established. ++ *****************************************************************************/ ++int32_t ++e1000_check_downshift(struct e1000_hw *hw) ++{ ++ int32_t ret_val; ++ uint16_t phy_data; ++ ++ DEBUGFUNC("e1000_check_downshift"); ++ ++ if (hw->phy_type == e1000_phy_igp || ++ hw->phy_type == e1000_phy_igp_3 || ++ hw->phy_type == e1000_phy_igp_2) { ++ ret_val = e1000_read_phy_reg(hw, IGP01E1000_PHY_LINK_HEALTH, ++ &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ hw->speed_downgraded = (phy_data & IGP01E1000_PLHR_SS_DOWNGRADE) ? 1 : 0; ++ } else if ((hw->phy_type == e1000_phy_m88) || ++ (hw->phy_type == e1000_phy_gg82563)) { ++ ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_STATUS, ++ &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ hw->speed_downgraded = (phy_data & M88E1000_PSSR_DOWNSHIFT) >> ++ M88E1000_PSSR_DOWNSHIFT_SHIFT; ++ } else if (hw->phy_type == e1000_phy_ife) { ++ /* e1000_phy_ife supports 10/100 speed only */ ++ hw->speed_downgraded = FALSE; ++ } ++ ++ return E1000_SUCCESS; ++} ++ ++/***************************************************************************** ++ * ++ * 82541_rev_2 & 82547_rev_2 have the capability to configure the DSP when a ++ * gigabit link is achieved to improve link quality. ++ * ++ * hw: Struct containing variables accessed by shared code ++ * ++ * returns: - E1000_ERR_PHY if fail to read/write the PHY ++ * E1000_SUCCESS at any other case. ++ * ++ ****************************************************************************/ ++ ++int32_t ++e1000_config_dsp_after_link_change(struct e1000_hw *hw, ++ boolean_t link_up) ++{ ++ int32_t ret_val; ++ uint16_t phy_data, phy_saved_data, speed, duplex, i; ++ uint16_t dsp_reg_array[IGP01E1000_PHY_CHANNEL_NUM] = ++ {IGP01E1000_PHY_AGC_PARAM_A, ++ IGP01E1000_PHY_AGC_PARAM_B, ++ IGP01E1000_PHY_AGC_PARAM_C, ++ IGP01E1000_PHY_AGC_PARAM_D}; ++ uint16_t min_length, max_length; ++ ++ DEBUGFUNC("e1000_config_dsp_after_link_change"); ++ ++ if (hw->phy_type != e1000_phy_igp) ++ return E1000_SUCCESS; ++ ++ if (link_up) { ++ ret_val = e1000_get_speed_and_duplex(hw, &speed, &duplex); ++ if (ret_val) { ++ DEBUGOUT("Error getting link speed and duplex\n"); ++ return ret_val; ++ } ++ ++ if (speed == SPEED_1000) { ++ ++ ret_val = e1000_get_cable_length(hw, &min_length, &max_length); ++ if (ret_val) ++ return ret_val; ++ ++ if ((hw->dsp_config_state == e1000_dsp_config_enabled) && ++ min_length >= e1000_igp_cable_length_50) { ++ ++ for (i = 0; i < IGP01E1000_PHY_CHANNEL_NUM; i++) { ++ ret_val = e1000_read_phy_reg(hw, dsp_reg_array[i], ++ &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ phy_data &= ~IGP01E1000_PHY_EDAC_MU_INDEX; ++ ++ ret_val = e1000_write_phy_reg(hw, dsp_reg_array[i], ++ phy_data); ++ if (ret_val) ++ return ret_val; ++ } ++ hw->dsp_config_state = e1000_dsp_config_activated; ++ } ++ ++ if ((hw->ffe_config_state == e1000_ffe_config_enabled) && ++ (min_length < e1000_igp_cable_length_50)) { ++ ++ uint16_t ffe_idle_err_timeout = FFE_IDLE_ERR_COUNT_TIMEOUT_20; ++ uint32_t idle_errs = 0; ++ ++ /* clear previous idle error counts */ ++ ret_val = e1000_read_phy_reg(hw, PHY_1000T_STATUS, ++ &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ for (i = 0; i < ffe_idle_err_timeout; i++) { ++ usec_delay(1000); ++ ret_val = e1000_read_phy_reg(hw, PHY_1000T_STATUS, ++ &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ idle_errs += (phy_data & SR_1000T_IDLE_ERROR_CNT); ++ if (idle_errs > SR_1000T_PHY_EXCESSIVE_IDLE_ERR_COUNT) { ++ hw->ffe_config_state = e1000_ffe_config_active; ++ ++ ret_val = e1000_write_phy_reg(hw, ++ IGP01E1000_PHY_DSP_FFE, ++ IGP01E1000_PHY_DSP_FFE_CM_CP); ++ if (ret_val) ++ return ret_val; ++ break; ++ } ++ ++ if (idle_errs) ++ ffe_idle_err_timeout = FFE_IDLE_ERR_COUNT_TIMEOUT_100; ++ } ++ } ++ } ++ } else { ++ if (hw->dsp_config_state == e1000_dsp_config_activated) { ++ /* Save off the current value of register 0x2F5B to be restored at ++ * the end of the routines. */ ++ ret_val = e1000_read_phy_reg(hw, 0x2F5B, &phy_saved_data); ++ ++ if (ret_val) ++ return ret_val; ++ ++ /* Disable the PHY transmitter */ ++ ret_val = e1000_write_phy_reg(hw, 0x2F5B, 0x0003); ++ ++ if (ret_val) ++ return ret_val; ++ ++ msec_delay_irq(20); ++ ++ ret_val = e1000_write_phy_reg(hw, 0x0000, ++ IGP01E1000_IEEE_FORCE_GIGA); ++ if (ret_val) ++ return ret_val; ++ for (i = 0; i < IGP01E1000_PHY_CHANNEL_NUM; i++) { ++ ret_val = e1000_read_phy_reg(hw, dsp_reg_array[i], &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ phy_data &= ~IGP01E1000_PHY_EDAC_MU_INDEX; ++ phy_data |= IGP01E1000_PHY_EDAC_SIGN_EXT_9_BITS; ++ ++ ret_val = e1000_write_phy_reg(hw,dsp_reg_array[i], phy_data); ++ if (ret_val) ++ return ret_val; ++ } ++ ++ ret_val = e1000_write_phy_reg(hw, 0x0000, ++ IGP01E1000_IEEE_RESTART_AUTONEG); ++ if (ret_val) ++ return ret_val; ++ ++ msec_delay_irq(20); ++ ++ /* Now enable the transmitter */ ++ ret_val = e1000_write_phy_reg(hw, 0x2F5B, phy_saved_data); ++ ++ if (ret_val) ++ return ret_val; ++ ++ hw->dsp_config_state = e1000_dsp_config_enabled; ++ } ++ ++ if (hw->ffe_config_state == e1000_ffe_config_active) { ++ /* Save off the current value of register 0x2F5B to be restored at ++ * the end of the routines. */ ++ ret_val = e1000_read_phy_reg(hw, 0x2F5B, &phy_saved_data); ++ ++ if (ret_val) ++ return ret_val; ++ ++ /* Disable the PHY transmitter */ ++ ret_val = e1000_write_phy_reg(hw, 0x2F5B, 0x0003); ++ ++ if (ret_val) ++ return ret_val; ++ ++ msec_delay_irq(20); ++ ++ ret_val = e1000_write_phy_reg(hw, 0x0000, ++ IGP01E1000_IEEE_FORCE_GIGA); ++ if (ret_val) ++ return ret_val; ++ ret_val = e1000_write_phy_reg(hw, IGP01E1000_PHY_DSP_FFE, ++ IGP01E1000_PHY_DSP_FFE_DEFAULT); ++ if (ret_val) ++ return ret_val; ++ ++ ret_val = e1000_write_phy_reg(hw, 0x0000, ++ IGP01E1000_IEEE_RESTART_AUTONEG); ++ if (ret_val) ++ return ret_val; ++ ++ msec_delay_irq(20); ++ ++ /* Now enable the transmitter */ ++ ret_val = e1000_write_phy_reg(hw, 0x2F5B, phy_saved_data); ++ ++ if (ret_val) ++ return ret_val; ++ ++ hw->ffe_config_state = e1000_ffe_config_enabled; ++ } ++ } ++ return E1000_SUCCESS; ++} ++ ++/***************************************************************************** ++ * Set PHY to class A mode ++ * Assumes the following operations will follow to enable the new class mode. ++ * 1. Do a PHY soft reset ++ * 2. Restart auto-negotiation or force link. ++ * ++ * hw - Struct containing variables accessed by shared code ++ ****************************************************************************/ ++static int32_t ++e1000_set_phy_mode(struct e1000_hw *hw) ++{ ++ int32_t ret_val; ++ uint16_t eeprom_data; ++ ++ DEBUGFUNC("e1000_set_phy_mode"); ++ ++ if ((hw->mac_type == e1000_82545_rev_3) && ++ (hw->media_type == e1000_media_type_copper)) { ++ ret_val = e1000_read_eeprom(hw, EEPROM_PHY_CLASS_WORD, 1, &eeprom_data); ++ if (ret_val) { ++ return ret_val; ++ } ++ ++ if ((eeprom_data != EEPROM_RESERVED_WORD) && ++ (eeprom_data & EEPROM_PHY_CLASS_A)) { ++ ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_PAGE_SELECT, 0x000B); ++ if (ret_val) ++ return ret_val; ++ ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_GEN_CONTROL, 0x8104); ++ if (ret_val) ++ return ret_val; ++ ++ hw->phy_reset_disable = FALSE; ++ } ++ } ++ ++ return E1000_SUCCESS; ++} ++ ++/***************************************************************************** ++ * ++ * This function sets the lplu state according to the active flag. When ++ * activating lplu this function also disables smart speed and vise versa. ++ * lplu will not be activated unless the device autonegotiation advertisment ++ * meets standards of either 10 or 10/100 or 10/100/1000 at all duplexes. ++ * hw: Struct containing variables accessed by shared code ++ * active - true to enable lplu false to disable lplu. ++ * ++ * returns: - E1000_ERR_PHY if fail to read/write the PHY ++ * E1000_SUCCESS at any other case. ++ * ++ ****************************************************************************/ ++ ++int32_t ++e1000_set_d3_lplu_state(struct e1000_hw *hw, ++ boolean_t active) ++{ ++ uint32_t phy_ctrl = 0; ++ int32_t ret_val; ++ uint16_t phy_data; ++ DEBUGFUNC("e1000_set_d3_lplu_state"); ++ ++ if (hw->phy_type != e1000_phy_igp && hw->phy_type != e1000_phy_igp_2 ++ && hw->phy_type != e1000_phy_igp_3) ++ return E1000_SUCCESS; ++ ++ /* During driver activity LPLU should not be used or it will attain link ++ * from the lowest speeds starting from 10Mbps. The capability is used for ++ * Dx transitions and states */ ++ if (hw->mac_type == e1000_82541_rev_2 || hw->mac_type == e1000_82547_rev_2) { ++ ret_val = e1000_read_phy_reg(hw, IGP01E1000_GMII_FIFO, &phy_data); ++ if (ret_val) ++ return ret_val; ++ } else if (hw->mac_type == e1000_ich8lan) { ++ /* MAC writes into PHY register based on the state transition ++ * and start auto-negotiation. SW driver can overwrite the settings ++ * in CSR PHY power control E1000_PHY_CTRL register. */ ++ phy_ctrl = E1000_READ_REG(hw, PHY_CTRL); ++ } else { ++ ret_val = e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_data); ++ if (ret_val) ++ return ret_val; ++ } ++ ++ if (!active) { ++ if (hw->mac_type == e1000_82541_rev_2 || ++ hw->mac_type == e1000_82547_rev_2) { ++ phy_data &= ~IGP01E1000_GMII_FLEX_SPD; ++ ret_val = e1000_write_phy_reg(hw, IGP01E1000_GMII_FIFO, phy_data); ++ if (ret_val) ++ return ret_val; ++ } else { ++ if (hw->mac_type == e1000_ich8lan) { ++ phy_ctrl &= ~E1000_PHY_CTRL_NOND0A_LPLU; ++ E1000_WRITE_REG(hw, PHY_CTRL, phy_ctrl); ++ } else { ++ phy_data &= ~IGP02E1000_PM_D3_LPLU; ++ ret_val = e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, ++ phy_data); ++ if (ret_val) ++ return ret_val; ++ } ++ } ++ ++ /* LPLU and SmartSpeed are mutually exclusive. LPLU is used during ++ * Dx states where the power conservation is most important. During ++ * driver activity we should enable SmartSpeed, so performance is ++ * maintained. */ ++ if (hw->smart_speed == e1000_smart_speed_on) { ++ ret_val = e1000_read_phy_reg(hw, IGP01E1000_PHY_PORT_CONFIG, ++ &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ phy_data |= IGP01E1000_PSCFR_SMART_SPEED; ++ ret_val = e1000_write_phy_reg(hw, IGP01E1000_PHY_PORT_CONFIG, ++ phy_data); ++ if (ret_val) ++ return ret_val; ++ } else if (hw->smart_speed == e1000_smart_speed_off) { ++ ret_val = e1000_read_phy_reg(hw, IGP01E1000_PHY_PORT_CONFIG, ++ &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ phy_data &= ~IGP01E1000_PSCFR_SMART_SPEED; ++ ret_val = e1000_write_phy_reg(hw, IGP01E1000_PHY_PORT_CONFIG, ++ phy_data); ++ if (ret_val) ++ return ret_val; ++ } ++ ++ } else if ((hw->autoneg_advertised == AUTONEG_ADVERTISE_SPEED_DEFAULT) || ++ (hw->autoneg_advertised == AUTONEG_ADVERTISE_10_ALL ) || ++ (hw->autoneg_advertised == AUTONEG_ADVERTISE_10_100_ALL)) { ++ ++ if (hw->mac_type == e1000_82541_rev_2 || ++ hw->mac_type == e1000_82547_rev_2) { ++ phy_data |= IGP01E1000_GMII_FLEX_SPD; ++ ret_val = e1000_write_phy_reg(hw, IGP01E1000_GMII_FIFO, phy_data); ++ if (ret_val) ++ return ret_val; ++ } else { ++ if (hw->mac_type == e1000_ich8lan) { ++ phy_ctrl |= E1000_PHY_CTRL_NOND0A_LPLU; ++ E1000_WRITE_REG(hw, PHY_CTRL, phy_ctrl); ++ } else { ++ phy_data |= IGP02E1000_PM_D3_LPLU; ++ ret_val = e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, ++ phy_data); ++ if (ret_val) ++ return ret_val; ++ } ++ } ++ ++ /* When LPLU is enabled we should disable SmartSpeed */ ++ ret_val = e1000_read_phy_reg(hw, IGP01E1000_PHY_PORT_CONFIG, &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ phy_data &= ~IGP01E1000_PSCFR_SMART_SPEED; ++ ret_val = e1000_write_phy_reg(hw, IGP01E1000_PHY_PORT_CONFIG, phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ } ++ return E1000_SUCCESS; ++} ++ ++/***************************************************************************** ++ * ++ * This function sets the lplu d0 state according to the active flag. When ++ * activating lplu this function also disables smart speed and vise versa. ++ * lplu will not be activated unless the device autonegotiation advertisment ++ * meets standards of either 10 or 10/100 or 10/100/1000 at all duplexes. ++ * hw: Struct containing variables accessed by shared code ++ * active - true to enable lplu false to disable lplu. ++ * ++ * returns: - E1000_ERR_PHY if fail to read/write the PHY ++ * E1000_SUCCESS at any other case. ++ * ++ ****************************************************************************/ ++ ++int32_t ++e1000_set_d0_lplu_state(struct e1000_hw *hw, ++ boolean_t active) ++{ ++ uint32_t phy_ctrl = 0; ++ int32_t ret_val; ++ uint16_t phy_data; ++ DEBUGFUNC("e1000_set_d0_lplu_state"); ++ ++ if (hw->mac_type <= e1000_82547_rev_2) ++ return E1000_SUCCESS; ++ ++ if (hw->mac_type == e1000_ich8lan) { ++ phy_ctrl = E1000_READ_REG(hw, PHY_CTRL); ++ } else { ++ ret_val = e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_data); ++ if (ret_val) ++ return ret_val; ++ } ++ ++ if (!active) { ++ if (hw->mac_type == e1000_ich8lan) { ++ phy_ctrl &= ~E1000_PHY_CTRL_D0A_LPLU; ++ E1000_WRITE_REG(hw, PHY_CTRL, phy_ctrl); ++ } else { ++ phy_data &= ~IGP02E1000_PM_D0_LPLU; ++ ret_val = e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_data); ++ if (ret_val) ++ return ret_val; ++ } ++ ++ /* LPLU and SmartSpeed are mutually exclusive. LPLU is used during ++ * Dx states where the power conservation is most important. During ++ * driver activity we should enable SmartSpeed, so performance is ++ * maintained. */ ++ if (hw->smart_speed == e1000_smart_speed_on) { ++ ret_val = e1000_read_phy_reg(hw, IGP01E1000_PHY_PORT_CONFIG, ++ &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ phy_data |= IGP01E1000_PSCFR_SMART_SPEED; ++ ret_val = e1000_write_phy_reg(hw, IGP01E1000_PHY_PORT_CONFIG, ++ phy_data); ++ if (ret_val) ++ return ret_val; ++ } else if (hw->smart_speed == e1000_smart_speed_off) { ++ ret_val = e1000_read_phy_reg(hw, IGP01E1000_PHY_PORT_CONFIG, ++ &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ phy_data &= ~IGP01E1000_PSCFR_SMART_SPEED; ++ ret_val = e1000_write_phy_reg(hw, IGP01E1000_PHY_PORT_CONFIG, ++ phy_data); ++ if (ret_val) ++ return ret_val; ++ } ++ ++ ++ } else { ++ ++ if (hw->mac_type == e1000_ich8lan) { ++ phy_ctrl |= E1000_PHY_CTRL_D0A_LPLU; ++ E1000_WRITE_REG(hw, PHY_CTRL, phy_ctrl); ++ } else { ++ phy_data |= IGP02E1000_PM_D0_LPLU; ++ ret_val = e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_data); ++ if (ret_val) ++ return ret_val; ++ } ++ ++ /* When LPLU is enabled we should disable SmartSpeed */ ++ ret_val = e1000_read_phy_reg(hw, IGP01E1000_PHY_PORT_CONFIG, &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ phy_data &= ~IGP01E1000_PSCFR_SMART_SPEED; ++ ret_val = e1000_write_phy_reg(hw, IGP01E1000_PHY_PORT_CONFIG, phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ } ++ return E1000_SUCCESS; ++} ++ ++/****************************************************************************** ++ * Change VCO speed register to improve Bit Error Rate performance of SERDES. ++ * ++ * hw - Struct containing variables accessed by shared code ++ *****************************************************************************/ ++static int32_t ++e1000_set_vco_speed(struct e1000_hw *hw) ++{ ++ int32_t ret_val; ++ uint16_t default_page = 0; ++ uint16_t phy_data; ++ ++ DEBUGFUNC("e1000_set_vco_speed"); ++ ++ switch (hw->mac_type) { ++ case e1000_82545_rev_3: ++ case e1000_82546_rev_3: ++ break; ++ default: ++ return E1000_SUCCESS; ++ } ++ ++ /* Set PHY register 30, page 5, bit 8 to 0 */ ++ ++ ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_PAGE_SELECT, &default_page); ++ if (ret_val) ++ return ret_val; ++ ++ ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_PAGE_SELECT, 0x0005); ++ if (ret_val) ++ return ret_val; ++ ++ ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_GEN_CONTROL, &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ phy_data &= ~M88E1000_PHY_VCO_REG_BIT8; ++ ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_GEN_CONTROL, phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ /* Set PHY register 30, page 4, bit 11 to 1 */ ++ ++ ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_PAGE_SELECT, 0x0004); ++ if (ret_val) ++ return ret_val; ++ ++ ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_GEN_CONTROL, &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ phy_data |= M88E1000_PHY_VCO_REG_BIT11; ++ ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_GEN_CONTROL, phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_PAGE_SELECT, default_page); ++ if (ret_val) ++ return ret_val; ++ ++ return E1000_SUCCESS; ++} ++ ++ ++/***************************************************************************** ++ * This function reads the cookie from ARC ram. ++ * ++ * returns: - E1000_SUCCESS . ++ ****************************************************************************/ ++int32_t ++e1000_host_if_read_cookie(struct e1000_hw * hw, uint8_t *buffer) ++{ ++ uint8_t i; ++ uint32_t offset = E1000_MNG_DHCP_COOKIE_OFFSET; ++ uint8_t length = E1000_MNG_DHCP_COOKIE_LENGTH; ++ ++ length = (length >> 2); ++ offset = (offset >> 2); ++ ++ for (i = 0; i < length; i++) { ++ *((uint32_t *) buffer + i) = ++ E1000_READ_REG_ARRAY_DWORD(hw, HOST_IF, offset + i); ++ } ++ return E1000_SUCCESS; ++} ++ ++ ++/***************************************************************************** ++ * This function checks whether the HOST IF is enabled for command operaton ++ * and also checks whether the previous command is completed. ++ * It busy waits in case of previous command is not completed. ++ * ++ * returns: - E1000_ERR_HOST_INTERFACE_COMMAND in case if is not ready or ++ * timeout ++ * - E1000_SUCCESS for success. ++ ****************************************************************************/ ++int32_t ++e1000_mng_enable_host_if(struct e1000_hw * hw) ++{ ++ uint32_t hicr; ++ uint8_t i; ++ ++ /* Check that the host interface is enabled. */ ++ hicr = E1000_READ_REG(hw, HICR); ++ if ((hicr & E1000_HICR_EN) == 0) { ++ DEBUGOUT("E1000_HOST_EN bit disabled.\n"); ++ return -E1000_ERR_HOST_INTERFACE_COMMAND; ++ } ++ /* check the previous command is completed */ ++ for (i = 0; i < E1000_MNG_DHCP_COMMAND_TIMEOUT; i++) { ++ hicr = E1000_READ_REG(hw, HICR); ++ if (!(hicr & E1000_HICR_C)) ++ break; ++ msec_delay_irq(1); ++ } ++ ++ if (i == E1000_MNG_DHCP_COMMAND_TIMEOUT) { ++ DEBUGOUT("Previous command timeout failed .\n"); ++ return -E1000_ERR_HOST_INTERFACE_COMMAND; ++ } ++ return E1000_SUCCESS; ++} ++ ++/***************************************************************************** ++ * This function writes the buffer content at the offset given on the host if. ++ * It also does alignment considerations to do the writes in most efficient way. ++ * Also fills up the sum of the buffer in *buffer parameter. ++ * ++ * returns - E1000_SUCCESS for success. ++ ****************************************************************************/ ++int32_t ++e1000_mng_host_if_write(struct e1000_hw * hw, uint8_t *buffer, ++ uint16_t length, uint16_t offset, uint8_t *sum) ++{ ++ uint8_t *tmp; ++ uint8_t *bufptr = buffer; ++ uint32_t data = 0; ++ uint16_t remaining, i, j, prev_bytes; ++ ++ /* sum = only sum of the data and it is not checksum */ ++ ++ if (length == 0 || offset + length > E1000_HI_MAX_MNG_DATA_LENGTH) { ++ return -E1000_ERR_PARAM; ++ } ++ ++ tmp = (uint8_t *)&data; ++ prev_bytes = offset & 0x3; ++ offset &= 0xFFFC; ++ offset >>= 2; ++ ++ if (prev_bytes) { ++ data = E1000_READ_REG_ARRAY_DWORD(hw, HOST_IF, offset); ++ for (j = prev_bytes; j < sizeof(uint32_t); j++) { ++ *(tmp + j) = *bufptr++; ++ *sum += *(tmp + j); ++ } ++ E1000_WRITE_REG_ARRAY_DWORD(hw, HOST_IF, offset, data); ++ length -= j - prev_bytes; ++ offset++; ++ } ++ ++ remaining = length & 0x3; ++ length -= remaining; ++ ++ /* Calculate length in DWORDs */ ++ length >>= 2; ++ ++ /* The device driver writes the relevant command block into the ++ * ram area. */ ++ for (i = 0; i < length; i++) { ++ for (j = 0; j < sizeof(uint32_t); j++) { ++ *(tmp + j) = *bufptr++; ++ *sum += *(tmp + j); ++ } ++ ++ E1000_WRITE_REG_ARRAY_DWORD(hw, HOST_IF, offset + i, data); ++ } ++ if (remaining) { ++ for (j = 0; j < sizeof(uint32_t); j++) { ++ if (j < remaining) ++ *(tmp + j) = *bufptr++; ++ else ++ *(tmp + j) = 0; ++ ++ *sum += *(tmp + j); ++ } ++ E1000_WRITE_REG_ARRAY_DWORD(hw, HOST_IF, offset + i, data); ++ } ++ ++ return E1000_SUCCESS; ++} ++ ++ ++/***************************************************************************** ++ * This function writes the command header after does the checksum calculation. ++ * ++ * returns - E1000_SUCCESS for success. ++ ****************************************************************************/ ++int32_t ++e1000_mng_write_cmd_header(struct e1000_hw * hw, ++ struct e1000_host_mng_command_header * hdr) ++{ ++ uint16_t i; ++ uint8_t sum; ++ uint8_t *buffer; ++ ++ /* Write the whole command header structure which includes sum of ++ * the buffer */ ++ ++ uint16_t length = sizeof(struct e1000_host_mng_command_header); ++ ++ sum = hdr->checksum; ++ hdr->checksum = 0; ++ ++ buffer = (uint8_t *) hdr; ++ i = length; ++ while (i--) ++ sum += buffer[i]; ++ ++ hdr->checksum = 0 - sum; ++ ++ length >>= 2; ++ /* The device driver writes the relevant command block into the ram area. */ ++ for (i = 0; i < length; i++) { ++ E1000_WRITE_REG_ARRAY_DWORD(hw, HOST_IF, i, *((uint32_t *) hdr + i)); ++ E1000_WRITE_FLUSH(hw); ++ } ++ ++ return E1000_SUCCESS; ++} ++ ++ ++/***************************************************************************** ++ * This function indicates to ARC that a new command is pending which completes ++ * one write operation by the driver. ++ * ++ * returns - E1000_SUCCESS for success. ++ ****************************************************************************/ ++int32_t ++e1000_mng_write_commit(struct e1000_hw * hw) ++{ ++ uint32_t hicr; ++ ++ hicr = E1000_READ_REG(hw, HICR); ++ /* Setting this bit tells the ARC that a new command is pending. */ ++ E1000_WRITE_REG(hw, HICR, hicr | E1000_HICR_C); ++ ++ return E1000_SUCCESS; ++} ++ ++ ++/***************************************************************************** ++ * This function checks the mode of the firmware. ++ * ++ * returns - TRUE when the mode is IAMT or FALSE. ++ ****************************************************************************/ ++boolean_t ++e1000_check_mng_mode(struct e1000_hw *hw) ++{ ++ uint32_t fwsm; ++ ++ fwsm = E1000_READ_REG(hw, FWSM); ++ ++ if (hw->mac_type == e1000_ich8lan) { ++ if ((fwsm & E1000_FWSM_MODE_MASK) == ++ (E1000_MNG_ICH_IAMT_MODE << E1000_FWSM_MODE_SHIFT)) ++ return TRUE; ++ } else if ((fwsm & E1000_FWSM_MODE_MASK) == ++ (E1000_MNG_IAMT_MODE << E1000_FWSM_MODE_SHIFT)) ++ return TRUE; ++ ++ return FALSE; ++} ++ ++ ++/***************************************************************************** ++ * This function writes the dhcp info . ++ ****************************************************************************/ ++int32_t ++e1000_mng_write_dhcp_info(struct e1000_hw * hw, uint8_t *buffer, ++ uint16_t length) ++{ ++ int32_t ret_val; ++ struct e1000_host_mng_command_header hdr; ++ ++ hdr.command_id = E1000_MNG_DHCP_TX_PAYLOAD_CMD; ++ hdr.command_length = length; ++ hdr.reserved1 = 0; ++ hdr.reserved2 = 0; ++ hdr.checksum = 0; ++ ++ ret_val = e1000_mng_enable_host_if(hw); ++ if (ret_val == E1000_SUCCESS) { ++ ret_val = e1000_mng_host_if_write(hw, buffer, length, sizeof(hdr), ++ &(hdr.checksum)); ++ if (ret_val == E1000_SUCCESS) { ++ ret_val = e1000_mng_write_cmd_header(hw, &hdr); ++ if (ret_val == E1000_SUCCESS) ++ ret_val = e1000_mng_write_commit(hw); ++ } ++ } ++ return ret_val; ++} ++ ++ ++/***************************************************************************** ++ * This function calculates the checksum. ++ * ++ * returns - checksum of buffer contents. ++ ****************************************************************************/ ++uint8_t ++e1000_calculate_mng_checksum(char *buffer, uint32_t length) ++{ ++ uint8_t sum = 0; ++ uint32_t i; ++ ++ if (!buffer) ++ return 0; ++ ++ for (i=0; i < length; i++) ++ sum += buffer[i]; ++ ++ return (uint8_t) (0 - sum); ++} ++ ++/***************************************************************************** ++ * This function checks whether tx pkt filtering needs to be enabled or not. ++ * ++ * returns - TRUE for packet filtering or FALSE. ++ ****************************************************************************/ ++boolean_t ++e1000_enable_tx_pkt_filtering(struct e1000_hw *hw) ++{ ++ /* called in init as well as watchdog timer functions */ ++ ++ int32_t ret_val, checksum; ++ boolean_t tx_filter = FALSE; ++ struct e1000_host_mng_dhcp_cookie *hdr = &(hw->mng_cookie); ++ uint8_t *buffer = (uint8_t *) &(hw->mng_cookie); ++ ++ if (e1000_check_mng_mode(hw)) { ++ ret_val = e1000_mng_enable_host_if(hw); ++ if (ret_val == E1000_SUCCESS) { ++ ret_val = e1000_host_if_read_cookie(hw, buffer); ++ if (ret_val == E1000_SUCCESS) { ++ checksum = hdr->checksum; ++ hdr->checksum = 0; ++ if ((hdr->signature == E1000_IAMT_SIGNATURE) && ++ checksum == e1000_calculate_mng_checksum((char *)buffer, ++ E1000_MNG_DHCP_COOKIE_LENGTH)) { ++ if (hdr->status & ++ E1000_MNG_DHCP_COOKIE_STATUS_PARSING_SUPPORT) ++ tx_filter = TRUE; ++ } else ++ tx_filter = TRUE; ++ } else ++ tx_filter = TRUE; ++ } ++ } ++ ++ hw->tx_pkt_filtering = tx_filter; ++ return tx_filter; ++} ++ ++/****************************************************************************** ++ * Verifies the hardware needs to allow ARPs to be processed by the host ++ * ++ * hw - Struct containing variables accessed by shared code ++ * ++ * returns: - TRUE/FALSE ++ * ++ *****************************************************************************/ ++uint32_t ++e1000_enable_mng_pass_thru(struct e1000_hw *hw) ++{ ++ uint32_t manc; ++ uint32_t fwsm, factps; ++ ++ if (hw->asf_firmware_present) { ++ manc = E1000_READ_REG(hw, MANC); ++ ++ if (!(manc & E1000_MANC_RCV_TCO_EN) || ++ !(manc & E1000_MANC_EN_MAC_ADDR_FILTER)) ++ return FALSE; ++ if (e1000_arc_subsystem_valid(hw) == TRUE) { ++ fwsm = E1000_READ_REG(hw, FWSM); ++ factps = E1000_READ_REG(hw, FACTPS); ++ ++ if (((fwsm & E1000_FWSM_MODE_MASK) == ++ (e1000_mng_mode_pt << E1000_FWSM_MODE_SHIFT)) && ++ (factps & E1000_FACTPS_MNGCG)) ++ return TRUE; ++ } else ++ if ((manc & E1000_MANC_SMBUS_EN) && !(manc & E1000_MANC_ASF_EN)) ++ return TRUE; ++ } ++ return FALSE; ++} ++ ++static int32_t ++e1000_polarity_reversal_workaround(struct e1000_hw *hw) ++{ ++ int32_t ret_val; ++ uint16_t mii_status_reg; ++ uint16_t i; ++ ++ /* Polarity reversal workaround for forced 10F/10H links. */ ++ ++ /* Disable the transmitter on the PHY */ ++ ++ ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_PAGE_SELECT, 0x0019); ++ if (ret_val) ++ return ret_val; ++ ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_GEN_CONTROL, 0xFFFF); ++ if (ret_val) ++ return ret_val; ++ ++ ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_PAGE_SELECT, 0x0000); ++ if (ret_val) ++ return ret_val; ++ ++ /* This loop will early-out if the NO link condition has been met. */ ++ for (i = PHY_FORCE_TIME; i > 0; i--) { ++ /* Read the MII Status Register and wait for Link Status bit ++ * to be clear. ++ */ ++ ++ ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg); ++ if (ret_val) ++ return ret_val; ++ ++ ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg); ++ if (ret_val) ++ return ret_val; ++ ++ if ((mii_status_reg & ~MII_SR_LINK_STATUS) == 0) break; ++ msec_delay_irq(100); ++ } ++ ++ /* Recommended delay time after link has been lost */ ++ msec_delay_irq(1000); ++ ++ /* Now we will re-enable th transmitter on the PHY */ ++ ++ ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_PAGE_SELECT, 0x0019); ++ if (ret_val) ++ return ret_val; ++ msec_delay_irq(50); ++ ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_GEN_CONTROL, 0xFFF0); ++ if (ret_val) ++ return ret_val; ++ msec_delay_irq(50); ++ ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_GEN_CONTROL, 0xFF00); ++ if (ret_val) ++ return ret_val; ++ msec_delay_irq(50); ++ ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_GEN_CONTROL, 0x0000); ++ if (ret_val) ++ return ret_val; ++ ++ ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_PAGE_SELECT, 0x0000); ++ if (ret_val) ++ return ret_val; ++ ++ /* This loop will early-out if the link condition has been met. */ ++ for (i = PHY_FORCE_TIME; i > 0; i--) { ++ /* Read the MII Status Register and wait for Link Status bit ++ * to be set. ++ */ ++ ++ ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg); ++ if (ret_val) ++ return ret_val; ++ ++ ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg); ++ if (ret_val) ++ return ret_val; ++ ++ if (mii_status_reg & MII_SR_LINK_STATUS) break; ++ msec_delay_irq(100); ++ } ++ return E1000_SUCCESS; ++} ++ ++/*************************************************************************** ++ * ++ * Disables PCI-Express master access. ++ * ++ * hw: Struct containing variables accessed by shared code ++ * ++ * returns: - none. ++ * ++ ***************************************************************************/ ++void ++e1000_set_pci_express_master_disable(struct e1000_hw *hw) ++{ ++ uint32_t ctrl; ++ ++ DEBUGFUNC("e1000_set_pci_express_master_disable"); ++ ++ if (hw->bus_type != e1000_bus_type_pci_express) ++ return; ++ ++ ctrl = E1000_READ_REG(hw, CTRL); ++ ctrl |= E1000_CTRL_GIO_MASTER_DISABLE; ++ E1000_WRITE_REG(hw, CTRL, ctrl); ++} ++ ++/*************************************************************************** ++ * ++ * Enables PCI-Express master access. ++ * ++ * hw: Struct containing variables accessed by shared code ++ * ++ * returns: - none. ++ * ++ ***************************************************************************/ ++void ++e1000_enable_pciex_master(struct e1000_hw *hw) ++{ ++ uint32_t ctrl; ++ ++ DEBUGFUNC("e1000_enable_pciex_master"); ++ ++ if (hw->bus_type != e1000_bus_type_pci_express) ++ return; ++ ++ ctrl = E1000_READ_REG(hw, CTRL); ++ ctrl &= ~E1000_CTRL_GIO_MASTER_DISABLE; ++ E1000_WRITE_REG(hw, CTRL, ctrl); ++} ++ ++/******************************************************************************* ++ * ++ * Disables PCI-Express master access and verifies there are no pending requests ++ * ++ * hw: Struct containing variables accessed by shared code ++ * ++ * returns: - E1000_ERR_MASTER_REQUESTS_PENDING if master disable bit hasn't ++ * caused the master requests to be disabled. ++ * E1000_SUCCESS master requests disabled. ++ * ++ ******************************************************************************/ ++int32_t ++e1000_disable_pciex_master(struct e1000_hw *hw) ++{ ++ int32_t timeout = MASTER_DISABLE_TIMEOUT; /* 80ms */ ++ ++ DEBUGFUNC("e1000_disable_pciex_master"); ++ ++ if (hw->bus_type != e1000_bus_type_pci_express) ++ return E1000_SUCCESS; ++ ++ e1000_set_pci_express_master_disable(hw); ++ ++ while (timeout) { ++ if (!(E1000_READ_REG(hw, STATUS) & E1000_STATUS_GIO_MASTER_ENABLE)) ++ break; ++ else ++ usec_delay(100); ++ timeout--; ++ } ++ ++ if (!timeout) { ++ DEBUGOUT("Master requests are pending.\n"); ++ return -E1000_ERR_MASTER_REQUESTS_PENDING; ++ } ++ ++ return E1000_SUCCESS; ++} ++ ++/******************************************************************************* ++ * ++ * Check for EEPROM Auto Read bit done. ++ * ++ * hw: Struct containing variables accessed by shared code ++ * ++ * returns: - E1000_ERR_RESET if fail to reset MAC ++ * E1000_SUCCESS at any other case. ++ * ++ ******************************************************************************/ ++int32_t ++e1000_get_auto_rd_done(struct e1000_hw *hw) ++{ ++ int32_t timeout = AUTO_READ_DONE_TIMEOUT; ++ ++ DEBUGFUNC("e1000_get_auto_rd_done"); ++ ++ switch (hw->mac_type) { ++ default: ++ msec_delay(5); ++ break; ++ case e1000_82571: ++ case e1000_82572: ++ case e1000_82573: ++ case e1000_80003es2lan: ++ case e1000_ich8lan: ++ while (timeout) { ++ if (E1000_READ_REG(hw, EECD) & E1000_EECD_AUTO_RD) ++ break; ++ else msec_delay(1); ++ timeout--; ++ } ++ ++ if (!timeout) { ++ DEBUGOUT("Auto read by HW from EEPROM has not completed.\n"); ++ return -E1000_ERR_RESET; ++ } ++ break; ++ } ++ ++ /* PHY configuration from NVM just starts after EECD_AUTO_RD sets to high. ++ * Need to wait for PHY configuration completion before accessing NVM ++ * and PHY. */ ++ if (hw->mac_type == e1000_82573) ++ msec_delay(25); ++ ++ return E1000_SUCCESS; ++} ++ ++/*************************************************************************** ++ * Checks if the PHY configuration is done ++ * ++ * hw: Struct containing variables accessed by shared code ++ * ++ * returns: - E1000_ERR_RESET if fail to reset MAC ++ * E1000_SUCCESS at any other case. ++ * ++ ***************************************************************************/ ++int32_t ++e1000_get_phy_cfg_done(struct e1000_hw *hw) ++{ ++ int32_t timeout = PHY_CFG_TIMEOUT; ++ uint32_t cfg_mask = E1000_EEPROM_CFG_DONE; ++ ++ DEBUGFUNC("e1000_get_phy_cfg_done"); ++ ++ switch (hw->mac_type) { ++ default: ++ msec_delay_irq(10); ++ break; ++ case e1000_80003es2lan: ++ /* Separate *_CFG_DONE_* bit for each port */ ++ if (E1000_READ_REG(hw, STATUS) & E1000_STATUS_FUNC_1) ++ cfg_mask = E1000_EEPROM_CFG_DONE_PORT_1; ++ /* Fall Through */ ++ case e1000_82571: ++ case e1000_82572: ++ while (timeout) { ++ if (E1000_READ_REG(hw, EEMNGCTL) & cfg_mask) ++ break; ++ else ++ msec_delay(1); ++ timeout--; ++ } ++ ++ if (!timeout) { ++ DEBUGOUT("MNG configuration cycle has not completed.\n"); ++ return -E1000_ERR_RESET; ++ } ++ break; ++ } ++ ++ return E1000_SUCCESS; ++} ++ ++/*************************************************************************** ++ * ++ * Using the combination of SMBI and SWESMBI semaphore bits when resetting ++ * adapter or Eeprom access. ++ * ++ * hw: Struct containing variables accessed by shared code ++ * ++ * returns: - E1000_ERR_EEPROM if fail to access EEPROM. ++ * E1000_SUCCESS at any other case. ++ * ++ ***************************************************************************/ ++int32_t ++e1000_get_hw_eeprom_semaphore(struct e1000_hw *hw) ++{ ++ int32_t timeout; ++ uint32_t swsm; ++ ++ DEBUGFUNC("e1000_get_hw_eeprom_semaphore"); ++ ++ if (!hw->eeprom_semaphore_present) ++ return E1000_SUCCESS; ++ ++ if (hw->mac_type == e1000_80003es2lan) { ++ /* Get the SW semaphore. */ ++ if (e1000_get_software_semaphore(hw) != E1000_SUCCESS) ++ return -E1000_ERR_EEPROM; ++ } ++ ++ /* Get the FW semaphore. */ ++ timeout = hw->eeprom.word_size + 1; ++ while (timeout) { ++ swsm = E1000_READ_REG(hw, SWSM); ++ swsm |= E1000_SWSM_SWESMBI; ++ E1000_WRITE_REG(hw, SWSM, swsm); ++ /* if we managed to set the bit we got the semaphore. */ ++ swsm = E1000_READ_REG(hw, SWSM); ++ if (swsm & E1000_SWSM_SWESMBI) ++ break; ++ ++ usec_delay(50); ++ timeout--; ++ } ++ ++ if (!timeout) { ++ /* Release semaphores */ ++ e1000_put_hw_eeprom_semaphore(hw); ++ DEBUGOUT("Driver can't access the Eeprom - SWESMBI bit is set.\n"); ++ return -E1000_ERR_EEPROM; ++ } ++ ++ return E1000_SUCCESS; ++} ++ ++/*************************************************************************** ++ * This function clears HW semaphore bits. ++ * ++ * hw: Struct containing variables accessed by shared code ++ * ++ * returns: - None. ++ * ++ ***************************************************************************/ ++void ++e1000_put_hw_eeprom_semaphore(struct e1000_hw *hw) ++{ ++ uint32_t swsm; ++ ++ DEBUGFUNC("e1000_put_hw_eeprom_semaphore"); ++ ++ if (!hw->eeprom_semaphore_present) ++ return; ++ ++ swsm = E1000_READ_REG(hw, SWSM); ++ if (hw->mac_type == e1000_80003es2lan) { ++ /* Release both semaphores. */ ++ swsm &= ~(E1000_SWSM_SMBI | E1000_SWSM_SWESMBI); ++ } else ++ swsm &= ~(E1000_SWSM_SWESMBI); ++ E1000_WRITE_REG(hw, SWSM, swsm); ++} ++ ++/*************************************************************************** ++ * ++ * Obtaining software semaphore bit (SMBI) before resetting PHY. ++ * ++ * hw: Struct containing variables accessed by shared code ++ * ++ * returns: - E1000_ERR_RESET if fail to obtain semaphore. ++ * E1000_SUCCESS at any other case. ++ * ++ ***************************************************************************/ ++int32_t ++e1000_get_software_semaphore(struct e1000_hw *hw) ++{ ++ int32_t timeout = hw->eeprom.word_size + 1; ++ uint32_t swsm; ++ ++ DEBUGFUNC("e1000_get_software_semaphore"); ++ ++ if (hw->mac_type != e1000_80003es2lan) ++ return E1000_SUCCESS; ++ ++ while (timeout) { ++ swsm = E1000_READ_REG(hw, SWSM); ++ /* If SMBI bit cleared, it is now set and we hold the semaphore */ ++ if (!(swsm & E1000_SWSM_SMBI)) ++ break; ++ msec_delay_irq(1); ++ timeout--; ++ } ++ ++ if (!timeout) { ++ DEBUGOUT("Driver can't access device - SMBI bit is set.\n"); ++ return -E1000_ERR_RESET; ++ } ++ ++ return E1000_SUCCESS; ++} ++ ++/*************************************************************************** ++ * ++ * Release semaphore bit (SMBI). ++ * ++ * hw: Struct containing variables accessed by shared code ++ * ++ ***************************************************************************/ ++void ++e1000_release_software_semaphore(struct e1000_hw *hw) ++{ ++ uint32_t swsm; ++ ++ DEBUGFUNC("e1000_release_software_semaphore"); ++ ++ if (hw->mac_type != e1000_80003es2lan) ++ return; ++ ++ swsm = E1000_READ_REG(hw, SWSM); ++ /* Release the SW semaphores.*/ ++ swsm &= ~E1000_SWSM_SMBI; ++ E1000_WRITE_REG(hw, SWSM, swsm); ++} ++ ++/****************************************************************************** ++ * Checks if PHY reset is blocked due to SOL/IDER session, for example. ++ * Returning E1000_BLK_PHY_RESET isn't necessarily an error. But it's up to ++ * the caller to figure out how to deal with it. ++ * ++ * hw - Struct containing variables accessed by shared code ++ * ++ * returns: - E1000_BLK_PHY_RESET ++ * E1000_SUCCESS ++ * ++ *****************************************************************************/ ++int32_t ++e1000_check_phy_reset_block(struct e1000_hw *hw) ++{ ++ uint32_t manc = 0; ++ uint32_t fwsm = 0; ++ ++ if (hw->mac_type == e1000_ich8lan) { ++ fwsm = E1000_READ_REG(hw, FWSM); ++ return (fwsm & E1000_FWSM_RSPCIPHY) ? E1000_SUCCESS ++ : E1000_BLK_PHY_RESET; ++ } ++ ++ if (hw->mac_type > e1000_82547_rev_2) ++ manc = E1000_READ_REG(hw, MANC); ++ return (manc & E1000_MANC_BLK_PHY_RST_ON_IDE) ? ++ E1000_BLK_PHY_RESET : E1000_SUCCESS; ++} ++ ++uint8_t ++e1000_arc_subsystem_valid(struct e1000_hw *hw) ++{ ++ uint32_t fwsm; ++ ++ /* On 8257x silicon, registers in the range of 0x8800 - 0x8FFC ++ * may not be provided a DMA clock when no manageability features are ++ * enabled. We do not want to perform any reads/writes to these registers ++ * if this is the case. We read FWSM to determine the manageability mode. ++ */ ++ switch (hw->mac_type) { ++ case e1000_82571: ++ case e1000_82572: ++ case e1000_82573: ++ case e1000_80003es2lan: ++ fwsm = E1000_READ_REG(hw, FWSM); ++ if ((fwsm & E1000_FWSM_MODE_MASK) != 0) ++ return TRUE; ++ break; ++ case e1000_ich8lan: ++ return TRUE; ++ default: ++ break; ++ } ++ return FALSE; ++} ++ ++ ++/****************************************************************************** ++ * Configure PCI-Ex no-snoop ++ * ++ * hw - Struct containing variables accessed by shared code. ++ * no_snoop - Bitmap of no-snoop events. ++ * ++ * returns: E1000_SUCCESS ++ * ++ *****************************************************************************/ ++int32_t ++e1000_set_pci_ex_no_snoop(struct e1000_hw *hw, uint32_t no_snoop) ++{ ++ uint32_t gcr_reg = 0; ++ ++ DEBUGFUNC("e1000_set_pci_ex_no_snoop"); ++ ++ if (hw->bus_type == e1000_bus_type_unknown) ++ e1000_get_bus_info(hw); ++ ++ if (hw->bus_type != e1000_bus_type_pci_express) ++ return E1000_SUCCESS; ++ ++ if (no_snoop) { ++ gcr_reg = E1000_READ_REG(hw, GCR); ++ gcr_reg &= ~(PCI_EX_NO_SNOOP_ALL); ++ gcr_reg |= no_snoop; ++ E1000_WRITE_REG(hw, GCR, gcr_reg); ++ } ++ if (hw->mac_type == e1000_ich8lan) { ++ uint32_t ctrl_ext; ++ ++ E1000_WRITE_REG(hw, GCR, PCI_EX_82566_SNOOP_ALL); ++ ++ ctrl_ext = E1000_READ_REG(hw, CTRL_EXT); ++ ctrl_ext |= E1000_CTRL_EXT_RO_DIS; ++ E1000_WRITE_REG(hw, CTRL_EXT, ctrl_ext); ++ } ++ ++ return E1000_SUCCESS; ++} ++ ++/*************************************************************************** ++ * ++ * Get software semaphore FLAG bit (SWFLAG). ++ * SWFLAG is used to synchronize the access to all shared resource between ++ * SW, FW and HW. ++ * ++ * hw: Struct containing variables accessed by shared code ++ * ++ ***************************************************************************/ ++int32_t ++e1000_get_software_flag(struct e1000_hw *hw) ++{ ++ int32_t timeout = PHY_CFG_TIMEOUT; ++ uint32_t extcnf_ctrl; ++ ++ DEBUGFUNC("e1000_get_software_flag"); ++ ++ if (hw->mac_type == e1000_ich8lan) { ++ while (timeout) { ++ extcnf_ctrl = E1000_READ_REG(hw, EXTCNF_CTRL); ++ extcnf_ctrl |= E1000_EXTCNF_CTRL_SWFLAG; ++ E1000_WRITE_REG(hw, EXTCNF_CTRL, extcnf_ctrl); ++ ++ extcnf_ctrl = E1000_READ_REG(hw, EXTCNF_CTRL); ++ if (extcnf_ctrl & E1000_EXTCNF_CTRL_SWFLAG) ++ break; ++ msec_delay_irq(1); ++ timeout--; ++ } ++ ++ if (!timeout) { ++ DEBUGOUT("FW or HW locks the resource too long.\n"); ++ return -E1000_ERR_CONFIG; ++ } ++ } ++ ++ return E1000_SUCCESS; ++} ++ ++/*************************************************************************** ++ * ++ * Release software semaphore FLAG bit (SWFLAG). ++ * SWFLAG is used to synchronize the access to all shared resource between ++ * SW, FW and HW. ++ * ++ * hw: Struct containing variables accessed by shared code ++ * ++ ***************************************************************************/ ++void ++e1000_release_software_flag(struct e1000_hw *hw) ++{ ++ uint32_t extcnf_ctrl; ++ ++ DEBUGFUNC("e1000_release_software_flag"); ++ ++ if (hw->mac_type == e1000_ich8lan) { ++ extcnf_ctrl= E1000_READ_REG(hw, EXTCNF_CTRL); ++ extcnf_ctrl &= ~E1000_EXTCNF_CTRL_SWFLAG; ++ E1000_WRITE_REG(hw, EXTCNF_CTRL, extcnf_ctrl); ++ } ++ ++ return; ++} ++ ++/*************************************************************************** ++ * ++ * Disable dynamic power down mode in ife PHY. ++ * It can be used to workaround band-gap problem. ++ * ++ * hw: Struct containing variables accessed by shared code ++ * ++ ***************************************************************************/ ++int32_t ++e1000_ife_disable_dynamic_power_down(struct e1000_hw *hw) ++{ ++ uint16_t phy_data; ++ int32_t ret_val = E1000_SUCCESS; ++ ++ DEBUGFUNC("e1000_ife_disable_dynamic_power_down"); ++ ++ if (hw->phy_type == e1000_phy_ife) { ++ ret_val = e1000_read_phy_reg(hw, IFE_PHY_SPECIAL_CONTROL, &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ phy_data |= IFE_PSC_DISABLE_DYNAMIC_POWER_DOWN; ++ ret_val = e1000_write_phy_reg(hw, IFE_PHY_SPECIAL_CONTROL, phy_data); ++ } ++ ++ return ret_val; ++} ++ ++/*************************************************************************** ++ * ++ * Enable dynamic power down mode in ife PHY. ++ * It can be used to workaround band-gap problem. ++ * ++ * hw: Struct containing variables accessed by shared code ++ * ++ ***************************************************************************/ ++int32_t ++e1000_ife_enable_dynamic_power_down(struct e1000_hw *hw) ++{ ++ uint16_t phy_data; ++ int32_t ret_val = E1000_SUCCESS; ++ ++ DEBUGFUNC("e1000_ife_enable_dynamic_power_down"); ++ ++ if (hw->phy_type == e1000_phy_ife) { ++ ret_val = e1000_read_phy_reg(hw, IFE_PHY_SPECIAL_CONTROL, &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ phy_data &= ~IFE_PSC_DISABLE_DYNAMIC_POWER_DOWN; ++ ret_val = e1000_write_phy_reg(hw, IFE_PHY_SPECIAL_CONTROL, phy_data); ++ } ++ ++ return ret_val; ++} ++ ++/****************************************************************************** ++ * Reads a 16 bit word or words from the EEPROM using the ICH8's flash access ++ * register. ++ * ++ * hw - Struct containing variables accessed by shared code ++ * offset - offset of word in the EEPROM to read ++ * data - word read from the EEPROM ++ * words - number of words to read ++ *****************************************************************************/ ++int32_t ++e1000_read_eeprom_ich8(struct e1000_hw *hw, uint16_t offset, uint16_t words, ++ uint16_t *data) ++{ ++ int32_t error = E1000_SUCCESS; ++ uint32_t flash_bank = 0; ++ uint32_t act_offset = 0; ++ uint32_t bank_offset = 0; ++ uint16_t word = 0; ++ uint16_t i = 0; ++ ++ /* We need to know which is the valid flash bank. In the event ++ * that we didn't allocate eeprom_shadow_ram, we may not be ++ * managing flash_bank. So it cannot be trusted and needs ++ * to be updated with each read. ++ */ ++ /* Value of bit 22 corresponds to the flash bank we're on. */ ++ flash_bank = (E1000_READ_REG(hw, EECD) & E1000_EECD_SEC1VAL) ? 1 : 0; ++ ++ /* Adjust offset appropriately if we're on bank 1 - adjust for word size */ ++ bank_offset = flash_bank * (hw->flash_bank_size * 2); ++ ++ error = e1000_get_software_flag(hw); ++ if (error != E1000_SUCCESS) ++ return error; ++ ++ for (i = 0; i < words; i++) { ++ if (hw->eeprom_shadow_ram != NULL && ++ hw->eeprom_shadow_ram[offset+i].modified == TRUE) { ++ data[i] = hw->eeprom_shadow_ram[offset+i].eeprom_word; ++ } else { ++ /* The NVM part needs a byte offset, hence * 2 */ ++ act_offset = bank_offset + ((offset + i) * 2); ++ error = e1000_read_ich8_word(hw, act_offset, &word); ++ if (error != E1000_SUCCESS) ++ break; ++ data[i] = word; ++ } ++ } ++ ++ e1000_release_software_flag(hw); ++ ++ return error; ++} ++ ++/****************************************************************************** ++ * Writes a 16 bit word or words to the EEPROM using the ICH8's flash access ++ * register. Actually, writes are written to the shadow ram cache in the hw ++ * structure hw->e1000_shadow_ram. e1000_commit_shadow_ram flushes this to ++ * the NVM, which occurs when the NVM checksum is updated. ++ * ++ * hw - Struct containing variables accessed by shared code ++ * offset - offset of word in the EEPROM to write ++ * words - number of words to write ++ * data - words to write to the EEPROM ++ *****************************************************************************/ ++int32_t ++e1000_write_eeprom_ich8(struct e1000_hw *hw, uint16_t offset, uint16_t words, ++ uint16_t *data) ++{ ++ uint32_t i = 0; ++ int32_t error = E1000_SUCCESS; ++ ++ error = e1000_get_software_flag(hw); ++ if (error != E1000_SUCCESS) ++ return error; ++ ++ /* A driver can write to the NVM only if it has eeprom_shadow_ram ++ * allocated. Subsequent reads to the modified words are read from ++ * this cached structure as well. Writes will only go into this ++ * cached structure unless it's followed by a call to ++ * e1000_update_eeprom_checksum() where it will commit the changes ++ * and clear the "modified" field. ++ */ ++ if (hw->eeprom_shadow_ram != NULL) { ++ for (i = 0; i < words; i++) { ++ if ((offset + i) < E1000_SHADOW_RAM_WORDS) { ++ hw->eeprom_shadow_ram[offset+i].modified = TRUE; ++ hw->eeprom_shadow_ram[offset+i].eeprom_word = data[i]; ++ } else { ++ error = -E1000_ERR_EEPROM; ++ break; ++ } ++ } ++ } else { ++ /* Drivers have the option to not allocate eeprom_shadow_ram as long ++ * as they don't perform any NVM writes. An attempt in doing so ++ * will result in this error. ++ */ ++ error = -E1000_ERR_EEPROM; ++ } ++ ++ e1000_release_software_flag(hw); ++ ++ return error; ++} ++ ++/****************************************************************************** ++ * This function does initial flash setup so that a new read/write/erase cycle ++ * can be started. ++ * ++ * hw - The pointer to the hw structure ++ ****************************************************************************/ ++int32_t ++e1000_ich8_cycle_init(struct e1000_hw *hw) ++{ ++ union ich8_hws_flash_status hsfsts; ++ int32_t error = E1000_ERR_EEPROM; ++ int32_t i = 0; ++ ++ DEBUGFUNC("e1000_ich8_cycle_init"); ++ ++ hsfsts.regval = E1000_READ_ICH8_REG16(hw, ICH8_FLASH_HSFSTS); ++ ++ /* May be check the Flash Des Valid bit in Hw status */ ++ if (hsfsts.hsf_status.fldesvalid == 0) { ++ DEBUGOUT("Flash descriptor invalid. SW Sequencing must be used."); ++ return error; ++ } ++ ++ /* Clear FCERR in Hw status by writing 1 */ ++ /* Clear DAEL in Hw status by writing a 1 */ ++ hsfsts.hsf_status.flcerr = 1; ++ hsfsts.hsf_status.dael = 1; ++ ++ E1000_WRITE_ICH8_REG16(hw, ICH8_FLASH_HSFSTS, hsfsts.regval); ++ ++ /* Either we should have a hardware SPI cycle in progress bit to check ++ * against, in order to start a new cycle or FDONE bit should be changed ++ * in the hardware so that it is 1 after harware reset, which can then be ++ * used as an indication whether a cycle is in progress or has been ++ * completed .. we should also have some software semaphore mechanism to ++ * guard FDONE or the cycle in progress bit so that two threads access to ++ * those bits can be sequentiallized or a way so that 2 threads dont ++ * start the cycle at the same time */ ++ ++ if (hsfsts.hsf_status.flcinprog == 0) { ++ /* There is no cycle running at present, so we can start a cycle */ ++ /* Begin by setting Flash Cycle Done. */ ++ hsfsts.hsf_status.flcdone = 1; ++ E1000_WRITE_ICH8_REG16(hw, ICH8_FLASH_HSFSTS, hsfsts.regval); ++ error = E1000_SUCCESS; ++ } else { ++ /* otherwise poll for sometime so the current cycle has a chance ++ * to end before giving up. */ ++ for (i = 0; i < ICH8_FLASH_COMMAND_TIMEOUT; i++) { ++ hsfsts.regval = E1000_READ_ICH8_REG16(hw, ICH8_FLASH_HSFSTS); ++ if (hsfsts.hsf_status.flcinprog == 0) { ++ error = E1000_SUCCESS; ++ break; ++ } ++ usec_delay(1); ++ } ++ if (error == E1000_SUCCESS) { ++ /* Successful in waiting for previous cycle to timeout, ++ * now set the Flash Cycle Done. */ ++ hsfsts.hsf_status.flcdone = 1; ++ E1000_WRITE_ICH8_REG16(hw, ICH8_FLASH_HSFSTS, hsfsts.regval); ++ } else { ++ DEBUGOUT("Flash controller busy, cannot get access"); ++ } ++ } ++ return error; ++} ++ ++/****************************************************************************** ++ * This function starts a flash cycle and waits for its completion ++ * ++ * hw - The pointer to the hw structure ++ ****************************************************************************/ ++int32_t ++e1000_ich8_flash_cycle(struct e1000_hw *hw, uint32_t timeout) ++{ ++ union ich8_hws_flash_ctrl hsflctl; ++ union ich8_hws_flash_status hsfsts; ++ int32_t error = E1000_ERR_EEPROM; ++ uint32_t i = 0; ++ ++ /* Start a cycle by writing 1 in Flash Cycle Go in Hw Flash Control */ ++ hsflctl.regval = E1000_READ_ICH8_REG16(hw, ICH8_FLASH_HSFCTL); ++ hsflctl.hsf_ctrl.flcgo = 1; ++ E1000_WRITE_ICH8_REG16(hw, ICH8_FLASH_HSFCTL, hsflctl.regval); ++ ++ /* wait till FDONE bit is set to 1 */ ++ do { ++ hsfsts.regval = E1000_READ_ICH8_REG16(hw, ICH8_FLASH_HSFSTS); ++ if (hsfsts.hsf_status.flcdone == 1) ++ break; ++ usec_delay(1); ++ i++; ++ } while (i < timeout); ++ if (hsfsts.hsf_status.flcdone == 1 && hsfsts.hsf_status.flcerr == 0) { ++ error = E1000_SUCCESS; ++ } ++ return error; ++} ++ ++/****************************************************************************** ++ * Reads a byte or word from the NVM using the ICH8 flash access registers. ++ * ++ * hw - The pointer to the hw structure ++ * index - The index of the byte or word to read. ++ * size - Size of data to read, 1=byte 2=word ++ * data - Pointer to the word to store the value read. ++ *****************************************************************************/ ++int32_t ++e1000_read_ich8_data(struct e1000_hw *hw, uint32_t index, ++ uint32_t size, uint16_t* data) ++{ ++ union ich8_hws_flash_status hsfsts; ++ union ich8_hws_flash_ctrl hsflctl; ++ uint32_t flash_linear_address; ++ uint32_t flash_data = 0; ++ int32_t error = -E1000_ERR_EEPROM; ++ int32_t count = 0; ++ ++ DEBUGFUNC("e1000_read_ich8_data"); ++ ++ if (size < 1 || size > 2 || data == 0x0 || ++ index > ICH8_FLASH_LINEAR_ADDR_MASK) ++ return error; ++ ++ flash_linear_address = (ICH8_FLASH_LINEAR_ADDR_MASK & index) + ++ hw->flash_base_addr; ++ ++ do { ++ usec_delay(1); ++ /* Steps */ ++ error = e1000_ich8_cycle_init(hw); ++ if (error != E1000_SUCCESS) ++ break; ++ ++ hsflctl.regval = E1000_READ_ICH8_REG16(hw, ICH8_FLASH_HSFCTL); ++ /* 0b/1b corresponds to 1 or 2 byte size, respectively. */ ++ hsflctl.hsf_ctrl.fldbcount = size - 1; ++ hsflctl.hsf_ctrl.flcycle = ICH8_CYCLE_READ; ++ E1000_WRITE_ICH8_REG16(hw, ICH8_FLASH_HSFCTL, hsflctl.regval); ++ ++ /* Write the last 24 bits of index into Flash Linear address field in ++ * Flash Address */ ++ /* TODO: TBD maybe check the index against the size of flash */ ++ ++ E1000_WRITE_ICH8_REG(hw, ICH8_FLASH_FADDR, flash_linear_address); ++ ++ error = e1000_ich8_flash_cycle(hw, ICH8_FLASH_COMMAND_TIMEOUT); ++ ++ /* Check if FCERR is set to 1, if set to 1, clear it and try the whole ++ * sequence a few more times, else read in (shift in) the Flash Data0, ++ * the order is least significant byte first msb to lsb */ ++ if (error == E1000_SUCCESS) { ++ flash_data = E1000_READ_ICH8_REG(hw, ICH8_FLASH_FDATA0); ++ if (size == 1) { ++ *data = (uint8_t)(flash_data & 0x000000FF); ++ } else if (size == 2) { ++ *data = (uint16_t)(flash_data & 0x0000FFFF); ++ } ++ break; ++ } else { ++ /* If we've gotten here, then things are probably completely hosed, ++ * but if the error condition is detected, it won't hurt to give ++ * it another try...ICH8_FLASH_CYCLE_REPEAT_COUNT times. ++ */ ++ hsfsts.regval = E1000_READ_ICH8_REG16(hw, ICH8_FLASH_HSFSTS); ++ if (hsfsts.hsf_status.flcerr == 1) { ++ /* Repeat for some time before giving up. */ ++ continue; ++ } else if (hsfsts.hsf_status.flcdone == 0) { ++ DEBUGOUT("Timeout error - flash cycle did not complete."); ++ break; ++ } ++ } ++ } while (count++ < ICH8_FLASH_CYCLE_REPEAT_COUNT); ++ ++ return error; ++} ++ ++/****************************************************************************** ++ * Writes One /two bytes to the NVM using the ICH8 flash access registers. ++ * ++ * hw - The pointer to the hw structure ++ * index - The index of the byte/word to read. ++ * size - Size of data to read, 1=byte 2=word ++ * data - The byte(s) to write to the NVM. ++ *****************************************************************************/ ++int32_t ++e1000_write_ich8_data(struct e1000_hw *hw, uint32_t index, uint32_t size, ++ uint16_t data) ++{ ++ union ich8_hws_flash_status hsfsts; ++ union ich8_hws_flash_ctrl hsflctl; ++ uint32_t flash_linear_address; ++ uint32_t flash_data = 0; ++ int32_t error = -E1000_ERR_EEPROM; ++ int32_t count = 0; ++ ++ DEBUGFUNC("e1000_write_ich8_data"); ++ ++ if (size < 1 || size > 2 || data > size * 0xff || ++ index > ICH8_FLASH_LINEAR_ADDR_MASK) ++ return error; ++ ++ flash_linear_address = (ICH8_FLASH_LINEAR_ADDR_MASK & index) + ++ hw->flash_base_addr; ++ ++ do { ++ usec_delay(1); ++ /* Steps */ ++ error = e1000_ich8_cycle_init(hw); ++ if (error != E1000_SUCCESS) ++ break; ++ ++ hsflctl.regval = E1000_READ_ICH8_REG16(hw, ICH8_FLASH_HSFCTL); ++ /* 0b/1b corresponds to 1 or 2 byte size, respectively. */ ++ hsflctl.hsf_ctrl.fldbcount = size -1; ++ hsflctl.hsf_ctrl.flcycle = ICH8_CYCLE_WRITE; ++ E1000_WRITE_ICH8_REG16(hw, ICH8_FLASH_HSFCTL, hsflctl.regval); ++ ++ /* Write the last 24 bits of index into Flash Linear address field in ++ * Flash Address */ ++ E1000_WRITE_ICH8_REG(hw, ICH8_FLASH_FADDR, flash_linear_address); ++ ++ if (size == 1) ++ flash_data = (uint32_t)data & 0x00FF; ++ else ++ flash_data = (uint32_t)data; ++ ++ E1000_WRITE_ICH8_REG(hw, ICH8_FLASH_FDATA0, flash_data); ++ ++ /* check if FCERR is set to 1 , if set to 1, clear it and try the whole ++ * sequence a few more times else done */ ++ error = e1000_ich8_flash_cycle(hw, ICH8_FLASH_COMMAND_TIMEOUT); ++ if (error == E1000_SUCCESS) { ++ break; ++ } else { ++ /* If we're here, then things are most likely completely hosed, ++ * but if the error condition is detected, it won't hurt to give ++ * it another try...ICH8_FLASH_CYCLE_REPEAT_COUNT times. ++ */ ++ hsfsts.regval = E1000_READ_ICH8_REG16(hw, ICH8_FLASH_HSFSTS); ++ if (hsfsts.hsf_status.flcerr == 1) { ++ /* Repeat for some time before giving up. */ ++ continue; ++ } else if (hsfsts.hsf_status.flcdone == 0) { ++ DEBUGOUT("Timeout error - flash cycle did not complete."); ++ break; ++ } ++ } ++ } while (count++ < ICH8_FLASH_CYCLE_REPEAT_COUNT); ++ ++ return error; ++} ++ ++/****************************************************************************** ++ * Reads a single byte from the NVM using the ICH8 flash access registers. ++ * ++ * hw - pointer to e1000_hw structure ++ * index - The index of the byte to read. ++ * data - Pointer to a byte to store the value read. ++ *****************************************************************************/ ++int32_t ++e1000_read_ich8_byte(struct e1000_hw *hw, uint32_t index, uint8_t* data) ++{ ++ int32_t status = E1000_SUCCESS; ++ uint16_t word = 0; ++ ++ status = e1000_read_ich8_data(hw, index, 1, &word); ++ if (status == E1000_SUCCESS) { ++ *data = (uint8_t)word; ++ } ++ ++ return status; ++} ++ ++/****************************************************************************** ++ * Writes a single byte to the NVM using the ICH8 flash access registers. ++ * Performs verification by reading back the value and then going through ++ * a retry algorithm before giving up. ++ * ++ * hw - pointer to e1000_hw structure ++ * index - The index of the byte to write. ++ * byte - The byte to write to the NVM. ++ *****************************************************************************/ ++int32_t ++e1000_verify_write_ich8_byte(struct e1000_hw *hw, uint32_t index, uint8_t byte) ++{ ++ int32_t error = E1000_SUCCESS; ++ int32_t program_retries; ++ uint8_t temp_byte = 0; ++ ++ e1000_write_ich8_byte(hw, index, byte); ++ usec_delay(100); ++ ++ for (program_retries = 0; program_retries < 100; program_retries++) { ++ e1000_read_ich8_byte(hw, index, &temp_byte); ++ if (temp_byte == byte) ++ break; ++ usec_delay(10); ++ e1000_write_ich8_byte(hw, index, byte); ++ usec_delay(100); ++ } ++ if (program_retries == 100) ++ error = E1000_ERR_EEPROM; ++ ++ return error; ++} ++ ++/****************************************************************************** ++ * Writes a single byte to the NVM using the ICH8 flash access registers. ++ * ++ * hw - pointer to e1000_hw structure ++ * index - The index of the byte to read. ++ * data - The byte to write to the NVM. ++ *****************************************************************************/ ++int32_t ++e1000_write_ich8_byte(struct e1000_hw *hw, uint32_t index, uint8_t data) ++{ ++ int32_t status = E1000_SUCCESS; ++ uint16_t word = (uint16_t)data; ++ ++ status = e1000_write_ich8_data(hw, index, 1, word); ++ ++ return status; ++} ++ ++/****************************************************************************** ++ * Reads a word from the NVM using the ICH8 flash access registers. ++ * ++ * hw - pointer to e1000_hw structure ++ * index - The starting byte index of the word to read. ++ * data - Pointer to a word to store the value read. ++ *****************************************************************************/ ++int32_t ++e1000_read_ich8_word(struct e1000_hw *hw, uint32_t index, uint16_t *data) ++{ ++ int32_t status = E1000_SUCCESS; ++ status = e1000_read_ich8_data(hw, index, 2, data); ++ return status; ++} ++ ++/****************************************************************************** ++ * Writes a word to the NVM using the ICH8 flash access registers. ++ * ++ * hw - pointer to e1000_hw structure ++ * index - The starting byte index of the word to read. ++ * data - The word to write to the NVM. ++ *****************************************************************************/ ++int32_t ++e1000_write_ich8_word(struct e1000_hw *hw, uint32_t index, uint16_t data) ++{ ++ int32_t status = E1000_SUCCESS; ++ status = e1000_write_ich8_data(hw, index, 2, data); ++ return status; ++} ++ ++/****************************************************************************** ++ * Erases the bank specified. Each bank is a 4k block. Segments are 0 based. ++ * segment N is 4096 * N + flash_reg_addr. ++ * ++ * hw - pointer to e1000_hw structure ++ * segment - 0 for first segment, 1 for second segment, etc. ++ *****************************************************************************/ ++int32_t ++e1000_erase_ich8_4k_segment(struct e1000_hw *hw, uint32_t segment) ++{ ++ union ich8_hws_flash_status hsfsts; ++ union ich8_hws_flash_ctrl hsflctl; ++ uint32_t flash_linear_address; ++ int32_t count = 0; ++ int32_t error = E1000_ERR_EEPROM; ++ int32_t iteration, seg_size; ++ int32_t sector_size; ++ int32_t j = 0; ++ int32_t error_flag = 0; ++ ++ hsfsts.regval = E1000_READ_ICH8_REG16(hw, ICH8_FLASH_HSFSTS); ++ ++ /* Determine HW Sector size: Read BERASE bits of Hw flash Status register */ ++ /* 00: The Hw sector is 256 bytes, hence we need to erase 16 ++ * consecutive sectors. The start index for the nth Hw sector can be ++ * calculated as = segment * 4096 + n * 256 ++ * 01: The Hw sector is 4K bytes, hence we need to erase 1 sector. ++ * The start index for the nth Hw sector can be calculated ++ * as = segment * 4096 ++ * 10: Error condition ++ * 11: The Hw sector size is much bigger than the size asked to ++ * erase...error condition */ ++ if (hsfsts.hsf_status.berasesz == 0x0) { ++ /* Hw sector size 256 */ ++ sector_size = seg_size = ICH8_FLASH_SEG_SIZE_256; ++ iteration = ICH8_FLASH_SECTOR_SIZE / ICH8_FLASH_SEG_SIZE_256; ++ } else if (hsfsts.hsf_status.berasesz == 0x1) { ++ sector_size = seg_size = ICH8_FLASH_SEG_SIZE_4K; ++ iteration = 1; ++ } else if (hsfsts.hsf_status.berasesz == 0x3) { ++ sector_size = seg_size = ICH8_FLASH_SEG_SIZE_64K; ++ iteration = 1; ++ } else { ++ return error; ++ } ++ ++ for (j = 0; j < iteration ; j++) { ++ do { ++ count++; ++ /* Steps */ ++ error = e1000_ich8_cycle_init(hw); ++ if (error != E1000_SUCCESS) { ++ error_flag = 1; ++ break; ++ } ++ ++ /* Write a value 11 (block Erase) in Flash Cycle field in Hw flash ++ * Control */ ++ hsflctl.regval = E1000_READ_ICH8_REG16(hw, ICH8_FLASH_HSFCTL); ++ hsflctl.hsf_ctrl.flcycle = ICH8_CYCLE_ERASE; ++ E1000_WRITE_ICH8_REG16(hw, ICH8_FLASH_HSFCTL, hsflctl.regval); ++ ++ /* Write the last 24 bits of an index within the block into Flash ++ * Linear address field in Flash Address. This probably needs to ++ * be calculated here based off the on-chip segment size and the ++ * software segment size assumed (4K) */ ++ /* TBD */ ++ flash_linear_address = segment * sector_size + j * seg_size; ++ flash_linear_address &= ICH8_FLASH_LINEAR_ADDR_MASK; ++ flash_linear_address += hw->flash_base_addr; ++ ++ E1000_WRITE_ICH8_REG(hw, ICH8_FLASH_FADDR, flash_linear_address); ++ ++ error = e1000_ich8_flash_cycle(hw, 1000000); ++ /* Check if FCERR is set to 1. If 1, clear it and try the whole ++ * sequence a few more times else Done */ ++ if (error == E1000_SUCCESS) { ++ break; ++ } else { ++ hsfsts.regval = E1000_READ_ICH8_REG16(hw, ICH8_FLASH_HSFSTS); ++ if (hsfsts.hsf_status.flcerr == 1) { ++ /* repeat for some time before giving up */ ++ continue; ++ } else if (hsfsts.hsf_status.flcdone == 0) { ++ error_flag = 1; ++ break; ++ } ++ } ++ } while ((count < ICH8_FLASH_CYCLE_REPEAT_COUNT) && !error_flag); ++ if (error_flag == 1) ++ break; ++ } ++ if (error_flag != 1) ++ error = E1000_SUCCESS; ++ return error; ++} ++ ++/****************************************************************************** ++ * ++ * Reverse duplex setting without breaking the link. ++ * ++ * hw: Struct containing variables accessed by shared code ++ * ++ *****************************************************************************/ ++int32_t ++e1000_duplex_reversal(struct e1000_hw *hw) ++{ ++ int32_t ret_val; ++ uint16_t phy_data; ++ ++ if (hw->phy_type != e1000_phy_igp_3) ++ return E1000_SUCCESS; ++ ++ ret_val = e1000_read_phy_reg(hw, PHY_CTRL, &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ phy_data ^= MII_CR_FULL_DUPLEX; ++ ++ ret_val = e1000_write_phy_reg(hw, PHY_CTRL, phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ ret_val = e1000_read_phy_reg(hw, IGP3E1000_PHY_MISC_CTRL, &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ phy_data |= IGP3_PHY_MISC_DUPLEX_MANUAL_SET; ++ ret_val = e1000_write_phy_reg(hw, IGP3E1000_PHY_MISC_CTRL, phy_data); ++ ++ return ret_val; ++} ++ ++int32_t ++e1000_init_lcd_from_nvm_config_region(struct e1000_hw *hw, ++ uint32_t cnf_base_addr, uint32_t cnf_size) ++{ ++ uint32_t ret_val = E1000_SUCCESS; ++ uint16_t word_addr, reg_data, reg_addr; ++ uint16_t i; ++ ++ /* cnf_base_addr is in DWORD */ ++ word_addr = (uint16_t)(cnf_base_addr << 1); ++ ++ /* cnf_size is returned in size of dwords */ ++ for (i = 0; i < cnf_size; i++) { ++ ret_val = e1000_read_eeprom(hw, (word_addr + i*2), 1, ®_data); ++ if (ret_val) ++ return ret_val; ++ ++ ret_val = e1000_read_eeprom(hw, (word_addr + i*2 + 1), 1, ®_addr); ++ if (ret_val) ++ return ret_val; ++ ++ ret_val = e1000_get_software_flag(hw); ++ if (ret_val != E1000_SUCCESS) ++ return ret_val; ++ ++ ret_val = e1000_write_phy_reg_ex(hw, (uint32_t)reg_addr, reg_data); ++ ++ e1000_release_software_flag(hw); ++ } ++ ++ return ret_val; ++} ++ ++ ++int32_t ++e1000_init_lcd_from_nvm(struct e1000_hw *hw) ++{ ++ uint32_t reg_data, cnf_base_addr, cnf_size, ret_val, loop; ++ ++ if (hw->phy_type != e1000_phy_igp_3) ++ return E1000_SUCCESS; ++ ++ /* Check if SW needs configure the PHY */ ++ reg_data = E1000_READ_REG(hw, FEXTNVM); ++ if (!(reg_data & FEXTNVM_SW_CONFIG)) ++ return E1000_SUCCESS; ++ ++ /* Wait for basic configuration completes before proceeding*/ ++ loop = 0; ++ do { ++ reg_data = E1000_READ_REG(hw, STATUS) & E1000_STATUS_LAN_INIT_DONE; ++ usec_delay(100); ++ loop++; ++ } while ((!reg_data) && (loop < 50)); ++ ++ /* Clear the Init Done bit for the next init event */ ++ reg_data = E1000_READ_REG(hw, STATUS); ++ reg_data &= ~E1000_STATUS_LAN_INIT_DONE; ++ E1000_WRITE_REG(hw, STATUS, reg_data); ++ ++ /* Make sure HW does not configure LCD from PHY extended configuration ++ before SW configuration */ ++ reg_data = E1000_READ_REG(hw, EXTCNF_CTRL); ++ if ((reg_data & E1000_EXTCNF_CTRL_LCD_WRITE_ENABLE) == 0x0000) { ++ reg_data = E1000_READ_REG(hw, EXTCNF_SIZE); ++ cnf_size = reg_data & E1000_EXTCNF_SIZE_EXT_PCIE_LENGTH; ++ cnf_size >>= 16; ++ if (cnf_size) { ++ reg_data = E1000_READ_REG(hw, EXTCNF_CTRL); ++ cnf_base_addr = reg_data & E1000_EXTCNF_CTRL_EXT_CNF_POINTER; ++ /* cnf_base_addr is in DWORD */ ++ cnf_base_addr >>= 16; ++ ++ /* Configure LCD from extended configuration region. */ ++ ret_val = e1000_init_lcd_from_nvm_config_region(hw, cnf_base_addr, ++ cnf_size); ++ if (ret_val) ++ return ret_val; ++ } ++ } ++ ++ return E1000_SUCCESS; ++} ++ ++ ++ +--- linux/drivers/xenomai/net/drivers/e1000/e1000_param.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/e1000/e1000_param.c 2021-04-07 16:01:27.381633946 +0800 +@@ -0,0 +1,906 @@ ++/******************************************************************************* ++ ++ ++ Copyright(c) 1999 - 2006 Intel Corporation. All rights reserved. ++ ++ This program is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by the Free ++ Software Foundation; either version 2 of the License, or (at your option) ++ any later version. ++ ++ This program is distributed in the hope that it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ You should have received a copy of the GNU General Public License along with ++ this program; if not, write to the Free Software Foundation, Inc., 59 ++ Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ ++ The full GNU General Public License is included in this distribution in the ++ file called LICENSE. ++ ++ Contact Information: ++ Linux NICS ++ e1000-devel Mailing List ++ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 ++ ++*******************************************************************************/ ++ ++#include "e1000.h" ++ ++/* This is the only thing that needs to be changed to adjust the ++ * maximum number of ports that the driver can manage. ++ */ ++ ++#define E1000_MAX_NIC 32 ++ ++#define OPTION_UNSET -1 ++#define OPTION_DISABLED 0 ++#define OPTION_ENABLED 1 ++ ++/* All parameters are treated the same, as an integer array of values. ++ * This macro just reduces the need to repeat the same declaration code ++ * over and over (plus this helps to avoid typo bugs). ++ */ ++ ++#define E1000_PARAM_INIT { [0 ... E1000_MAX_NIC] = OPTION_UNSET } ++#ifndef module_param_array ++/* Module Parameters are always initialized to -1, so that the driver ++ * can tell the difference between no user specified value or the ++ * user asking for the default value. ++ * The true default values are loaded in when e1000_check_options is called. ++ * ++ * This is a GCC extension to ANSI C. ++ * See the item "Labeled Elements in Initializers" in the section ++ * "Extensions to the C Language Family" of the GCC documentation. ++ */ ++ ++#define E1000_PARAM(X, desc) \ ++ static const int X[E1000_MAX_NIC+1] = E1000_PARAM_INIT; \ ++ MODULE_PARM(X, "1-" __MODULE_STRING(E1000_MAX_NIC) "i"); \ ++ MODULE_PARM_DESC(X, desc); ++#else ++#define E1000_PARAM(X, desc) \ ++ static int X[E1000_MAX_NIC+1] = E1000_PARAM_INIT; \ ++ static int num_##X = 0; \ ++ module_param_array_named(X, X, int, &num_##X, 0); \ ++ MODULE_PARM_DESC(X, desc); ++#endif ++ ++/* Transmit Descriptor Count ++ * ++ * Valid Range: 80-256 for 82542 and 82543 gigabit ethernet controllers ++ * Valid Range: 80-4096 for 82544 and newer ++ * ++ * Default Value: 256 ++ */ ++ ++E1000_PARAM(TxDescriptors, "Number of transmit descriptors"); ++ ++/* Receive Descriptor Count ++ * ++ * Valid Range: 80-256 for 82542 and 82543 gigabit ethernet controllers ++ * Valid Range: 80-4096 for 82544 and newer ++ * ++ * Default Value: 256 ++ */ ++ ++E1000_PARAM(RxDescriptors, "Number of receive descriptors"); ++ ++/* User Specified Speed Override ++ * ++ * Valid Range: 0, 10, 100, 1000 ++ * - 0 - auto-negotiate at all supported speeds ++ * - 10 - only link at 10 Mbps ++ * - 100 - only link at 100 Mbps ++ * - 1000 - only link at 1000 Mbps ++ * ++ * Default Value: 0 ++ */ ++ ++E1000_PARAM(Speed, "Speed setting"); ++ ++/* User Specified Duplex Override ++ * ++ * Valid Range: 0-2 ++ * - 0 - auto-negotiate for duplex ++ * - 1 - only link at half duplex ++ * - 2 - only link at full duplex ++ * ++ * Default Value: 0 ++ */ ++ ++E1000_PARAM(Duplex, "Duplex setting"); ++ ++/* Auto-negotiation Advertisement Override ++ * ++ * Valid Range: 0x01-0x0F, 0x20-0x2F (copper); 0x20 (fiber) ++ * ++ * The AutoNeg value is a bit mask describing which speed and duplex ++ * combinations should be advertised during auto-negotiation. ++ * The supported speed and duplex modes are listed below ++ * ++ * Bit 7 6 5 4 3 2 1 0 ++ * Speed (Mbps) N/A N/A 1000 N/A 100 100 10 10 ++ * Duplex Full Full Half Full Half ++ * ++ * Default Value: 0x2F (copper); 0x20 (fiber) ++ */ ++ ++E1000_PARAM(AutoNeg, "Advertised auto-negotiation setting"); ++ ++/* User Specified Flow Control Override ++ * ++ * Valid Range: 0-3 ++ * - 0 - No Flow Control ++ * - 1 - Rx only, respond to PAUSE frames but do not generate them ++ * - 2 - Tx only, generate PAUSE frames but ignore them on receive ++ * - 3 - Full Flow Control Support ++ * ++ * Default Value: Read flow control settings from the EEPROM ++ */ ++ ++E1000_PARAM(FlowControl, "Flow Control setting"); ++ ++/* XsumRX - Receive Checksum Offload Enable/Disable ++ * ++ * Valid Range: 0, 1 ++ * - 0 - disables all checksum offload ++ * - 1 - enables receive IP/TCP/UDP checksum offload ++ * on 82543 and newer -based NICs ++ * ++ * Default Value: 1 ++ */ ++ ++E1000_PARAM(XsumRX, "Disable or enable Receive Checksum offload"); ++ ++/* Transmit Interrupt Delay in units of 1.024 microseconds ++ * ++ * Valid Range: 0-65535 ++ * ++ * Default Value: 0 for rtnet ++ */ ++ ++E1000_PARAM(TxIntDelay, "Transmit Interrupt Delay"); ++ ++/* Transmit Absolute Interrupt Delay in units of 1.024 microseconds ++ * ++ * Valid Range: 0-65535 ++ * ++ * Default Value: 0 ++ */ ++ ++E1000_PARAM(TxAbsIntDelay, "Transmit Absolute Interrupt Delay"); ++ ++/* Receive Interrupt Delay in units of 1.024 microseconds ++ * ++ * Valid Range: 0-65535 ++ * ++ * Default Value: 0 ++ */ ++ ++E1000_PARAM(RxIntDelay, "Receive Interrupt Delay"); ++ ++/* Receive Absolute Interrupt Delay in units of 1.024 microseconds ++ * ++ * Valid Range: 0-65535 ++ * ++ * Default Value: 0 for rtnet ++ */ ++ ++E1000_PARAM(RxAbsIntDelay, "Receive Absolute Interrupt Delay"); ++ ++/* Interrupt Throttle Rate (interrupts/sec) ++ * ++ * Valid Range: 100-100000 (0=off, 1=dynamic) ++ * ++ * Default Value: 0 for rtnet ++ */ ++ ++E1000_PARAM(InterruptThrottleRate, "Interrupt Throttling Rate"); ++ ++/* Enable Smart Power Down of the PHY ++ * ++ * Valid Range: 0, 1 ++ * ++ * Default Value: 0 (disabled) ++ */ ++ ++E1000_PARAM(SmartPowerDownEnable, "Enable PHY smart power down"); ++ ++/* Enable Kumeran Lock Loss workaround ++ * ++ * Valid Range: 0, 1 ++ * ++ * Default Value: 1 (enabled) ++ */ ++ ++E1000_PARAM(KumeranLockLoss, "Enable Kumeran lock loss workaround"); ++ ++#define AUTONEG_ADV_DEFAULT 0x2F ++#define AUTONEG_ADV_MASK 0x2F ++#define FLOW_CONTROL_DEFAULT FLOW_CONTROL_FULL ++ ++#define DEFAULT_RDTR 0 ++#define MAX_RXDELAY 0xFFFF ++#define MIN_RXDELAY 0 ++ ++#define DEFAULT_RADV 0 ++#define MAX_RXABSDELAY 0xFFFF ++#define MIN_RXABSDELAY 0 ++ ++#define DEFAULT_TIDV 0 ++#define MAX_TXDELAY 0xFFFF ++#define MIN_TXDELAY 0 ++ ++#define DEFAULT_TADV 0 ++#define MAX_TXABSDELAY 0xFFFF ++#define MIN_TXABSDELAY 0 ++ ++#define DEFAULT_ITR 0 ++#define MAX_ITR 100000 ++#define MIN_ITR 100 ++ ++struct e1000_option { ++ enum { enable_option, range_option, list_option } type; ++ char *name; ++ char *err; ++ int def; ++ union { ++ struct { /* range_option info */ ++ int min; ++ int max; ++ } r; ++ struct { /* list_option info */ ++ int nr; ++ struct e1000_opt_list { int i; char *str; } *p; ++ } l; ++ } arg; ++}; ++ ++static int e1000_validate_option(int *value, struct e1000_option *opt, ++ struct e1000_adapter *adapter) ++{ ++ if (*value == OPTION_UNSET) { ++ *value = opt->def; ++ return 0; ++ } ++ ++ switch (opt->type) { ++ case enable_option: ++ switch (*value) { ++ case OPTION_ENABLED: ++ DPRINTK(PROBE, INFO, "%s Enabled\n", opt->name); ++ return 0; ++ case OPTION_DISABLED: ++ DPRINTK(PROBE, INFO, "%s Disabled\n", opt->name); ++ return 0; ++ } ++ break; ++ case range_option: ++ if (*value >= opt->arg.r.min && *value <= opt->arg.r.max) { ++ DPRINTK(PROBE, INFO, ++ "%s set to %i\n", opt->name, *value); ++ return 0; ++ } ++ break; ++ case list_option: { ++ int i; ++ struct e1000_opt_list *ent; ++ ++ for (i = 0; i < opt->arg.l.nr; i++) { ++ ent = &opt->arg.l.p[i]; ++ if (*value == ent->i) { ++ if (ent->str[0] != '\0') ++ DPRINTK(PROBE, INFO, "%s\n", ent->str); ++ return 0; ++ } ++ } ++ } ++ break; ++ default: ++ BUG(); ++ } ++ ++ DPRINTK(PROBE, INFO, "Invalid %s value specified (%i) %s\n", ++ opt->name, *value, opt->err); ++ *value = opt->def; ++ return -1; ++} ++ ++static void e1000_check_fiber_options(struct e1000_adapter *adapter); ++static void e1000_check_copper_options(struct e1000_adapter *adapter); ++ ++/** ++ * e1000_check_options - Range Checking for Command Line Parameters ++ * @adapter: board private structure ++ * ++ * This routine checks all command line parameters for valid user ++ * input. If an invalid value is given, or if no user specified ++ * value exists, a default value is used. The final value is stored ++ * in a variable in the adapter structure. ++ **/ ++ ++void e1000_check_options(struct e1000_adapter *adapter) ++{ ++ int bd = adapter->bd_number; ++ if (bd >= E1000_MAX_NIC) { ++ DPRINTK(PROBE, NOTICE, ++ "Warning: no configuration for board #%i\n", bd); ++ DPRINTK(PROBE, NOTICE, "Using defaults for all values\n"); ++#ifndef module_param_array ++ bd = E1000_MAX_NIC; ++#endif ++ } ++ ++ { /* Transmit Descriptor Count */ ++ struct e1000_option opt = { ++ .type = range_option, ++ .name = "Transmit Descriptors", ++ .err = "using default of " ++ __MODULE_STRING(E1000_DEFAULT_TXD), ++ .def = E1000_DEFAULT_TXD, ++ .arg = { .r = { .min = E1000_MIN_TXD }} ++ }; ++ struct e1000_tx_ring *tx_ring = adapter->tx_ring; ++ int i; ++ e1000_mac_type mac_type = adapter->hw.mac_type; ++ opt.arg.r.max = mac_type < e1000_82544 ? ++ E1000_MAX_TXD : E1000_MAX_82544_TXD; ++ ++#ifdef module_param_array ++ if (num_TxDescriptors > bd) { ++#endif ++ tx_ring->count = TxDescriptors[bd]; ++ e1000_validate_option(&tx_ring->count, &opt, adapter); ++ E1000_ROUNDUP(tx_ring->count, ++ REQ_TX_DESCRIPTOR_MULTIPLE); ++#ifdef module_param_array ++ } else { ++ tx_ring->count = opt.def; ++ } ++#endif ++ for (i = 0; i < adapter->num_tx_queues; i++) ++ tx_ring[i].count = tx_ring->count; ++ } ++ { /* Receive Descriptor Count */ ++ struct e1000_option opt = { ++ .type = range_option, ++ .name = "Receive Descriptors", ++ .err = "using default of " ++ __MODULE_STRING(E1000_DEFAULT_RXD), ++ .def = E1000_DEFAULT_RXD, ++ .arg = { .r = { .min = E1000_MIN_RXD }} ++ }; ++ struct e1000_rx_ring *rx_ring = adapter->rx_ring; ++ int i; ++ e1000_mac_type mac_type = adapter->hw.mac_type; ++ opt.arg.r.max = mac_type < e1000_82544 ? E1000_MAX_RXD : ++ E1000_MAX_82544_RXD; ++ ++#ifdef module_param_array ++ if (num_RxDescriptors > bd) { ++#endif ++ rx_ring->count = RxDescriptors[bd]; ++ e1000_validate_option(&rx_ring->count, &opt, adapter); ++ E1000_ROUNDUP(rx_ring->count, ++ REQ_RX_DESCRIPTOR_MULTIPLE); ++#ifdef module_param_array ++ } else { ++ rx_ring->count = opt.def; ++ } ++#endif ++ for (i = 0; i < adapter->num_rx_queues; i++) ++ rx_ring[i].count = rx_ring->count; ++ } ++ { /* Checksum Offload Enable/Disable */ ++ struct e1000_option opt = { ++ .type = enable_option, ++ .name = "Checksum Offload", ++ .err = "defaulting to Enabled", ++ .def = OPTION_ENABLED ++ }; ++ ++#ifdef module_param_array ++ if (num_XsumRX > bd) { ++#endif ++ int rx_csum = XsumRX[bd]; ++ e1000_validate_option(&rx_csum, &opt, adapter); ++ adapter->rx_csum = rx_csum; ++#ifdef module_param_array ++ } else { ++ adapter->rx_csum = opt.def; ++ } ++#endif ++ } ++ { /* Flow Control */ ++ ++ struct e1000_opt_list fc_list[] = ++ {{ e1000_fc_none, "Flow Control Disabled" }, ++ { e1000_fc_rx_pause,"Flow Control Receive Only" }, ++ { e1000_fc_tx_pause,"Flow Control Transmit Only" }, ++ { e1000_fc_full, "Flow Control Enabled" }, ++ { e1000_fc_default, "Flow Control Hardware Default" }}; ++ ++ struct e1000_option opt = { ++ .type = list_option, ++ .name = "Flow Control", ++ .err = "reading default settings from EEPROM", ++ .def = e1000_fc_default, ++ .arg = { .l = { .nr = ARRAY_SIZE(fc_list), ++ .p = fc_list }} ++ }; ++ ++#ifdef module_param_array ++ if (num_FlowControl > bd) { ++#endif ++ int fc = FlowControl[bd]; ++ e1000_validate_option(&fc, &opt, adapter); ++ adapter->hw.fc = adapter->hw.original_fc = fc; ++#ifdef module_param_array ++ } else { ++ adapter->hw.fc = adapter->hw.original_fc = opt.def; ++ } ++#endif ++ } ++ { /* Transmit Interrupt Delay */ ++ struct e1000_option opt = { ++ .type = range_option, ++ .name = "Transmit Interrupt Delay", ++ .err = "using default of " __MODULE_STRING(DEFAULT_TIDV), ++ .def = DEFAULT_TIDV, ++ .arg = { .r = { .min = MIN_TXDELAY, ++ .max = MAX_TXDELAY }} ++ }; ++ ++#ifdef module_param_array ++ if (num_TxIntDelay > bd) { ++#endif ++ adapter->tx_int_delay = TxIntDelay[bd]; ++ e1000_validate_option(&adapter->tx_int_delay, &opt, ++ adapter); ++#ifdef module_param_array ++ } else { ++ adapter->tx_int_delay = opt.def; ++ } ++#endif ++ } ++ { /* Transmit Absolute Interrupt Delay */ ++ struct e1000_option opt = { ++ .type = range_option, ++ .name = "Transmit Absolute Interrupt Delay", ++ .err = "using default of " __MODULE_STRING(DEFAULT_TADV), ++ .def = DEFAULT_TADV, ++ .arg = { .r = { .min = MIN_TXABSDELAY, ++ .max = MAX_TXABSDELAY }} ++ }; ++ ++#ifdef module_param_array ++ if (num_TxAbsIntDelay > bd) { ++#endif ++ adapter->tx_abs_int_delay = TxAbsIntDelay[bd]; ++ e1000_validate_option(&adapter->tx_abs_int_delay, &opt, ++ adapter); ++#ifdef module_param_array ++ } else { ++ adapter->tx_abs_int_delay = opt.def; ++ } ++#endif ++ } ++ { /* Receive Interrupt Delay */ ++ struct e1000_option opt = { ++ .type = range_option, ++ .name = "Receive Interrupt Delay", ++ .err = "using default of " __MODULE_STRING(DEFAULT_RDTR), ++ .def = DEFAULT_RDTR, ++ .arg = { .r = { .min = MIN_RXDELAY, ++ .max = MAX_RXDELAY }} ++ }; ++ ++#ifdef module_param_array ++ if (num_RxIntDelay > bd) { ++#endif ++ adapter->rx_int_delay = RxIntDelay[bd]; ++ e1000_validate_option(&adapter->rx_int_delay, &opt, ++ adapter); ++#ifdef module_param_array ++ } else { ++ adapter->rx_int_delay = opt.def; ++ } ++#endif ++ } ++ { /* Receive Absolute Interrupt Delay */ ++ struct e1000_option opt = { ++ .type = range_option, ++ .name = "Receive Absolute Interrupt Delay", ++ .err = "using default of " __MODULE_STRING(DEFAULT_RADV), ++ .def = DEFAULT_RADV, ++ .arg = { .r = { .min = MIN_RXABSDELAY, ++ .max = MAX_RXABSDELAY }} ++ }; ++ ++#ifdef module_param_array ++ if (num_RxAbsIntDelay > bd) { ++#endif ++ adapter->rx_abs_int_delay = RxAbsIntDelay[bd]; ++ e1000_validate_option(&adapter->rx_abs_int_delay, &opt, ++ adapter); ++#ifdef module_param_array ++ } else { ++ adapter->rx_abs_int_delay = opt.def; ++ } ++#endif ++ } ++ { /* Interrupt Throttling Rate */ ++ struct e1000_option opt = { ++ .type = range_option, ++ .name = "Interrupt Throttling Rate (ints/sec)", ++ .err = "using default of " __MODULE_STRING(DEFAULT_ITR), ++ .def = DEFAULT_ITR, ++ .arg = { .r = { .min = MIN_ITR, ++ .max = MAX_ITR }} ++ }; ++ ++#ifdef module_param_array ++ if (num_InterruptThrottleRate > bd) { ++#endif ++ adapter->itr = InterruptThrottleRate[bd]; ++ switch (adapter->itr) { ++ case 0: ++ DPRINTK(PROBE, INFO, "%s turned off\n", ++ opt.name); ++ break; ++ case 1: ++ DPRINTK(PROBE, INFO, "%s set to dynamic mode\n", ++ opt.name); ++ break; ++ default: ++ e1000_validate_option(&adapter->itr, &opt, ++ adapter); ++ break; ++ } ++#ifdef module_param_array ++ } else { ++ adapter->itr = opt.def; ++ } ++#endif ++ } ++ { /* Smart Power Down */ ++ struct e1000_option opt = { ++ .type = enable_option, ++ .name = "PHY Smart Power Down", ++ .err = "defaulting to Disabled", ++ .def = OPTION_DISABLED ++ }; ++ ++#ifdef module_param_array ++ if (num_SmartPowerDownEnable > bd) { ++#endif ++ int spd = SmartPowerDownEnable[bd]; ++ e1000_validate_option(&spd, &opt, adapter); ++ adapter->smart_power_down = spd; ++#ifdef module_param_array ++ } else { ++ adapter->smart_power_down = opt.def; ++ } ++#endif ++ } ++ { /* Kumeran Lock Loss Workaround */ ++ struct e1000_option opt = { ++ .type = enable_option, ++ .name = "Kumeran Lock Loss Workaround", ++ .err = "defaulting to Enabled", ++ .def = OPTION_ENABLED ++ }; ++ ++#ifdef module_param_array ++ if (num_KumeranLockLoss > bd) { ++#endif ++ int kmrn_lock_loss = KumeranLockLoss[bd]; ++ e1000_validate_option(&kmrn_lock_loss, &opt, adapter); ++ adapter->hw.kmrn_lock_loss_workaround_disabled = !kmrn_lock_loss; ++#ifdef module_param_array ++ } else { ++ adapter->hw.kmrn_lock_loss_workaround_disabled = !opt.def; ++ } ++#endif ++ } ++ ++ switch (adapter->hw.media_type) { ++ case e1000_media_type_fiber: ++ case e1000_media_type_internal_serdes: ++ e1000_check_fiber_options(adapter); ++ break; ++ case e1000_media_type_copper: ++ e1000_check_copper_options(adapter); ++ break; ++ default: ++ BUG(); ++ } ++} ++ ++/** ++ * e1000_check_fiber_options - Range Checking for Link Options, Fiber Version ++ * @adapter: board private structure ++ * ++ * Handles speed and duplex options on fiber adapters ++ **/ ++ ++static void e1000_check_fiber_options(struct e1000_adapter *adapter) ++{ ++ int bd = adapter->bd_number; ++#ifndef module_param_array ++ bd = bd > E1000_MAX_NIC ? E1000_MAX_NIC : bd; ++ if ((Speed[bd] != OPTION_UNSET)) { ++#else ++ if (num_Speed > bd) { ++#endif ++ DPRINTK(PROBE, INFO, "Speed not valid for fiber adapters, " ++ "parameter ignored\n"); ++ } ++ ++#ifndef module_param_array ++ if ((Duplex[bd] != OPTION_UNSET)) { ++#else ++ if (num_Duplex > bd) { ++#endif ++ DPRINTK(PROBE, INFO, "Duplex not valid for fiber adapters, " ++ "parameter ignored\n"); ++ } ++ ++#ifndef module_param_array ++ if ((AutoNeg[bd] != OPTION_UNSET) && (AutoNeg[bd] != 0x20)) { ++#else ++ if ((num_AutoNeg > bd) && (AutoNeg[bd] != 0x20)) { ++#endif ++ DPRINTK(PROBE, INFO, "AutoNeg other than 1000/Full is " ++ "not valid for fiber adapters, " ++ "parameter ignored\n"); ++ } ++} ++ ++/** ++ * e1000_check_copper_options - Range Checking for Link Options, Copper Version ++ * @adapter: board private structure ++ * ++ * Handles speed and duplex options on copper adapters ++ **/ ++ ++static void e1000_check_copper_options(struct e1000_adapter *adapter) ++{ ++ int speed, dplx, an; ++ int bd = adapter->bd_number; ++#ifndef module_param_array ++ bd = bd > E1000_MAX_NIC ? E1000_MAX_NIC : bd; ++#endif ++ ++ { /* Speed */ ++ struct e1000_opt_list speed_list[] = {{ 0, "" }, ++ { SPEED_10, "" }, ++ { SPEED_100, "" }, ++ { SPEED_1000, "" }}; ++ ++ struct e1000_option opt = { ++ .type = list_option, ++ .name = "Speed", ++ .err = "parameter ignored", ++ .def = 0, ++ .arg = { .l = { .nr = ARRAY_SIZE(speed_list), ++ .p = speed_list }} ++ }; ++ ++#ifdef module_param_array ++ if (num_Speed > bd) { ++#endif ++ speed = Speed[bd]; ++ e1000_validate_option(&speed, &opt, adapter); ++#ifdef module_param_array ++ } else { ++ speed = opt.def; ++ } ++#endif ++ } ++ { /* Duplex */ ++ struct e1000_opt_list dplx_list[] = {{ 0, "" }, ++ { HALF_DUPLEX, "" }, ++ { FULL_DUPLEX, "" }}; ++ ++ struct e1000_option opt = { ++ .type = list_option, ++ .name = "Duplex", ++ .err = "parameter ignored", ++ .def = 0, ++ .arg = { .l = { .nr = ARRAY_SIZE(dplx_list), ++ .p = dplx_list }} ++ }; ++ ++ if (e1000_check_phy_reset_block(&adapter->hw)) { ++ DPRINTK(PROBE, INFO, ++ "Link active due to SoL/IDER Session. " ++ "Speed/Duplex/AutoNeg parameter ignored.\n"); ++ return; ++ } ++#ifdef module_param_array ++ if (num_Duplex > bd) { ++#endif ++ dplx = Duplex[bd]; ++ e1000_validate_option(&dplx, &opt, adapter); ++#ifdef module_param_array ++ } else { ++ dplx = opt.def; ++ } ++#endif ++ } ++ ++#ifdef module_param_array ++ if ((num_AutoNeg > bd) && (speed != 0 || dplx != 0)) { ++#else ++ if (AutoNeg[bd] != OPTION_UNSET && (speed != 0 || dplx != 0)) { ++#endif ++ DPRINTK(PROBE, INFO, ++ "AutoNeg specified along with Speed or Duplex, " ++ "parameter ignored\n"); ++ adapter->hw.autoneg_advertised = AUTONEG_ADV_DEFAULT; ++ } else { /* Autoneg */ ++ struct e1000_opt_list an_list[] = ++ #define AA "AutoNeg advertising " ++ {{ 0x01, AA "10/HD" }, ++ { 0x02, AA "10/FD" }, ++ { 0x03, AA "10/FD, 10/HD" }, ++ { 0x04, AA "100/HD" }, ++ { 0x05, AA "100/HD, 10/HD" }, ++ { 0x06, AA "100/HD, 10/FD" }, ++ { 0x07, AA "100/HD, 10/FD, 10/HD" }, ++ { 0x08, AA "100/FD" }, ++ { 0x09, AA "100/FD, 10/HD" }, ++ { 0x0a, AA "100/FD, 10/FD" }, ++ { 0x0b, AA "100/FD, 10/FD, 10/HD" }, ++ { 0x0c, AA "100/FD, 100/HD" }, ++ { 0x0d, AA "100/FD, 100/HD, 10/HD" }, ++ { 0x0e, AA "100/FD, 100/HD, 10/FD" }, ++ { 0x0f, AA "100/FD, 100/HD, 10/FD, 10/HD" }, ++ { 0x20, AA "1000/FD" }, ++ { 0x21, AA "1000/FD, 10/HD" }, ++ { 0x22, AA "1000/FD, 10/FD" }, ++ { 0x23, AA "1000/FD, 10/FD, 10/HD" }, ++ { 0x24, AA "1000/FD, 100/HD" }, ++ { 0x25, AA "1000/FD, 100/HD, 10/HD" }, ++ { 0x26, AA "1000/FD, 100/HD, 10/FD" }, ++ { 0x27, AA "1000/FD, 100/HD, 10/FD, 10/HD" }, ++ { 0x28, AA "1000/FD, 100/FD" }, ++ { 0x29, AA "1000/FD, 100/FD, 10/HD" }, ++ { 0x2a, AA "1000/FD, 100/FD, 10/FD" }, ++ { 0x2b, AA "1000/FD, 100/FD, 10/FD, 10/HD" }, ++ { 0x2c, AA "1000/FD, 100/FD, 100/HD" }, ++ { 0x2d, AA "1000/FD, 100/FD, 100/HD, 10/HD" }, ++ { 0x2e, AA "1000/FD, 100/FD, 100/HD, 10/FD" }, ++ { 0x2f, AA "1000/FD, 100/FD, 100/HD, 10/FD, 10/HD" }}; ++ ++ struct e1000_option opt = { ++ .type = list_option, ++ .name = "AutoNeg", ++ .err = "parameter ignored", ++ .def = AUTONEG_ADV_DEFAULT, ++ .arg = { .l = { .nr = ARRAY_SIZE(an_list), ++ .p = an_list }} ++ }; ++ ++#ifdef module_param_array ++ if (num_AutoNeg > bd) { ++#endif ++ an = AutoNeg[bd]; ++ e1000_validate_option(&an, &opt, adapter); ++#ifdef module_param_array ++ } else { ++ an = opt.def; ++ } ++#endif ++ adapter->hw.autoneg_advertised = an; ++ } ++ ++ switch (speed + dplx) { ++ case 0: ++ adapter->hw.autoneg = adapter->fc_autoneg = 1; ++#ifdef module_param_array ++ if ((num_Speed > bd) && (speed != 0 || dplx != 0)) ++#else ++ if (Speed[bd] != OPTION_UNSET || Duplex[bd] != OPTION_UNSET) ++#endif ++ DPRINTK(PROBE, INFO, ++ "Speed and duplex autonegotiation enabled\n"); ++ break; ++ case HALF_DUPLEX: ++ DPRINTK(PROBE, INFO, "Half Duplex specified without Speed\n"); ++ DPRINTK(PROBE, INFO, "Using Autonegotiation at " ++ "Half Duplex only\n"); ++ adapter->hw.autoneg = adapter->fc_autoneg = 1; ++ adapter->hw.autoneg_advertised = ADVERTISE_10_HALF | ++ ADVERTISE_100_HALF; ++ break; ++ case FULL_DUPLEX: ++ DPRINTK(PROBE, INFO, "Full Duplex specified without Speed\n"); ++ DPRINTK(PROBE, INFO, "Using Autonegotiation at " ++ "Full Duplex only\n"); ++ adapter->hw.autoneg = adapter->fc_autoneg = 1; ++ adapter->hw.autoneg_advertised = ADVERTISE_10_FULL | ++ ADVERTISE_100_FULL | ++ ADVERTISE_1000_FULL; ++ break; ++ case SPEED_10: ++ DPRINTK(PROBE, INFO, "10 Mbps Speed specified " ++ "without Duplex\n"); ++ DPRINTK(PROBE, INFO, "Using Autonegotiation at 10 Mbps only\n"); ++ adapter->hw.autoneg = adapter->fc_autoneg = 1; ++ adapter->hw.autoneg_advertised = ADVERTISE_10_HALF | ++ ADVERTISE_10_FULL; ++ break; ++ case SPEED_10 + HALF_DUPLEX: ++ DPRINTK(PROBE, INFO, "Forcing to 10 Mbps Half Duplex\n"); ++ adapter->hw.autoneg = adapter->fc_autoneg = 0; ++ adapter->hw.forced_speed_duplex = e1000_10_half; ++ adapter->hw.autoneg_advertised = 0; ++ break; ++ case SPEED_10 + FULL_DUPLEX: ++ DPRINTK(PROBE, INFO, "Forcing to 10 Mbps Full Duplex\n"); ++ adapter->hw.autoneg = adapter->fc_autoneg = 0; ++ adapter->hw.forced_speed_duplex = e1000_10_full; ++ adapter->hw.autoneg_advertised = 0; ++ break; ++ case SPEED_100: ++ DPRINTK(PROBE, INFO, "100 Mbps Speed specified " ++ "without Duplex\n"); ++ DPRINTK(PROBE, INFO, "Using Autonegotiation at " ++ "100 Mbps only\n"); ++ adapter->hw.autoneg = adapter->fc_autoneg = 1; ++ adapter->hw.autoneg_advertised = ADVERTISE_100_HALF | ++ ADVERTISE_100_FULL; ++ break; ++ case SPEED_100 + HALF_DUPLEX: ++ DPRINTK(PROBE, INFO, "Forcing to 100 Mbps Half Duplex\n"); ++ adapter->hw.autoneg = adapter->fc_autoneg = 0; ++ adapter->hw.forced_speed_duplex = e1000_100_half; ++ adapter->hw.autoneg_advertised = 0; ++ break; ++ case SPEED_100 + FULL_DUPLEX: ++ DPRINTK(PROBE, INFO, "Forcing to 100 Mbps Full Duplex\n"); ++ adapter->hw.autoneg = adapter->fc_autoneg = 0; ++ adapter->hw.forced_speed_duplex = e1000_100_full; ++ adapter->hw.autoneg_advertised = 0; ++ break; ++ case SPEED_1000: ++ DPRINTK(PROBE, INFO, "1000 Mbps Speed specified without " ++ "Duplex\n"); ++ DPRINTK(PROBE, INFO, ++ "Using Autonegotiation at 1000 Mbps " ++ "Full Duplex only\n"); ++ adapter->hw.autoneg = adapter->fc_autoneg = 1; ++ adapter->hw.autoneg_advertised = ADVERTISE_1000_FULL; ++ break; ++ case SPEED_1000 + HALF_DUPLEX: ++ DPRINTK(PROBE, INFO, ++ "Half Duplex is not supported at 1000 Mbps\n"); ++ DPRINTK(PROBE, INFO, ++ "Using Autonegotiation at 1000 Mbps " ++ "Full Duplex only\n"); ++ adapter->hw.autoneg = adapter->fc_autoneg = 1; ++ adapter->hw.autoneg_advertised = ADVERTISE_1000_FULL; ++ break; ++ case SPEED_1000 + FULL_DUPLEX: ++ DPRINTK(PROBE, INFO, ++ "Using Autonegotiation at 1000 Mbps Full Duplex only\n"); ++ adapter->hw.autoneg = adapter->fc_autoneg = 1; ++ adapter->hw.autoneg_advertised = ADVERTISE_1000_FULL; ++ break; ++ default: ++ BUG(); ++ } ++ ++ /* Speed, AutoNeg and MDI/MDI-X must all play nice */ ++ if (e1000_validate_mdi_setting(&(adapter->hw)) < 0) { ++ DPRINTK(PROBE, INFO, ++ "Speed, AutoNeg and MDI-X specifications are " ++ "incompatible. Setting MDI-X to a compatible value.\n"); ++ } ++} ++ +--- linux/drivers/xenomai/net/drivers/natsemi.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/natsemi.c 2021-04-07 16:01:27.370633962 +0800 +@@ -0,0 +1,2094 @@ ++/* natsemi.c: A Linux PCI Ethernet driver for the NatSemi DP8381x series. */ ++/* ++ Written/copyright 1999-2001 by Donald Becker. ++ Portions copyright (c) 2001,2002 Sun Microsystems (thockin@sun.com) ++ Portions copyright 2001,2002 Manfred Spraul (manfred@colorfullife.com) ++ ++ This software may be used and distributed according to the terms of ++ the GNU General Public License (GPL), incorporated herein by reference. ++ Drivers based on or derived from this code fall under the GPL and must ++ retain the authorship, copyright and license notice. This file is not ++ a complete program and may only be used when the entire operating ++ system is licensed under the GPL. License for under other terms may be ++ available. Contact the original author for details. ++ ++ The original author may be reached as becker@scyld.com, or at ++ Scyld Computing Corporation ++ 410 Severn Ave., Suite 210 ++ Annapolis MD 21403 ++ ++ Support information and updates available at ++ http://www.scyld.com/network/netsemi.html ++ ++ ++ Linux kernel modifications: ++ ++ Version 1.0.1: ++ - Spinlock fixes ++ - Bug fixes and better intr performance (Tjeerd) ++ Version 1.0.2: ++ - Now reads correct MAC address from eeprom ++ Version 1.0.3: ++ - Eliminate redundant priv->tx_full flag ++ - Call netif_start_queue from dev->tx_timeout ++ - wmb() in start_tx() to flush data ++ - Update Tx locking ++ - Clean up PCI enable (davej) ++ Version 1.0.4: ++ - Merge Donald Becker's natsemi.c version 1.07 ++ Version 1.0.5: ++ - { fill me in } ++ Version 1.0.6: ++ * ethtool support (jgarzik) ++ * Proper initialization of the card (which sometimes ++ fails to occur and leaves the card in a non-functional ++ state). (uzi) ++ ++ * Some documented register settings to optimize some ++ of the 100Mbit autodetection circuitry in rev C cards. (uzi) ++ ++ * Polling of the PHY intr for stuff like link state ++ change and auto- negotiation to finally work properly. (uzi) ++ ++ * One-liner removal of a duplicate declaration of ++ netdev_error(). (uzi) ++ ++ Version 1.0.7: (Manfred Spraul) ++ * pci dma ++ * SMP locking update ++ * full reset added into tx_timeout ++ * correct multicast hash generation (both big and little endian) ++ [copied from a natsemi driver version ++ from Myrio Corporation, Greg Smith] ++ * suspend/resume ++ ++ version 1.0.8 (Tim Hockin ) ++ * ETHTOOL_* support ++ * Wake on lan support (Erik Gilling) ++ * MXDMA fixes for serverworks ++ * EEPROM reload ++ ++ version 1.0.9 (Manfred Spraul) ++ * Main change: fix lack of synchronize ++ netif_close/netif_suspend against a last interrupt ++ or packet. ++ * do not enable superflous interrupts (e.g. the ++ drivers relies on TxDone - TxIntr not needed) ++ * wait that the hardware has really stopped in close ++ and suspend. ++ * workaround for the (at least) gcc-2.95.1 compiler ++ problem. Also simplifies the code a bit. ++ * disable_irq() in tx_timeout - needed to protect ++ against rx interrupts. ++ * stop the nic before switching into silent rx mode ++ for wol (required according to docu). ++ ++ version 1.0.10: ++ * use long for ee_addr (various) ++ * print pointers properly (DaveM) ++ * include asm/irq.h (?) ++ ++ version 1.0.11: ++ * check and reset if PHY errors appear (Adrian Sun) ++ * WoL cleanup (Tim Hockin) ++ * Magic number cleanup (Tim Hockin) ++ * Don't reload EEPROM on every reset (Tim Hockin) ++ * Save and restore EEPROM state across reset (Tim Hockin) ++ * MDIO Cleanup (Tim Hockin) ++ * Reformat register offsets/bits (jgarzik) ++ ++ version 1.0.12: ++ * ETHTOOL_* further support (Tim Hockin) ++ ++ version 1.0.13: ++ * ETHTOOL_[G]EEPROM support (Tim Hockin) ++ ++ version 1.0.13: ++ * crc cleanup (Matt Domsch ) ++ ++ version 1.0.14: ++ * Cleanup some messages and autoneg in ethtool (Tim Hockin) ++ ++ version 1.0.15: ++ * Get rid of cable_magic flag ++ * use new (National provided) solution for cable magic issue ++ ++ version 1.0.16: ++ * call netdev_rx() for RxErrors (Manfred Spraul) ++ * formatting and cleanups ++ * change options and full_duplex arrays to be zero ++ initialized ++ * enable only the WoL and PHY interrupts in wol mode ++ ++ version 1.0.17: ++ * only do cable_magic on 83815 and early 83816 (Tim Hockin) ++ * create a function for rx refill (Manfred Spraul) ++ * combine drain_ring and init_ring (Manfred Spraul) ++ * oom handling (Manfred Spraul) ++ * hands_off instead of playing with netif_device_{de,a}ttach ++ (Manfred Spraul) ++ * be sure to write the MAC back to the chip (Manfred Spraul) ++ * lengthen EEPROM timeout, and always warn about timeouts ++ (Manfred Spraul) ++ * comments update (Manfred) ++ * do the right thing on a phy-reset (Manfred and Tim) ++ ++ TODO: ++ * big endian support with CFG:BEM instead of cpu_to_le32 ++ * support for an external PHY ++ * NAPI ++ ++ Ported to RTNET: December 2003, Erik Buit ++*/ ++ ++#if !defined(__OPTIMIZE__) ++#warning You must compile this file with the correct options! ++#warning See the last lines of the source file. ++#error You must compile this driver with "-O". ++#endif ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include /* Processor type for cache alignment. */ ++#include ++#include ++#include ++ ++/*** RTnet ***/ ++#include ++ ++#define MAX_UNITS 8 /* More are supported, limit only on options */ ++#define DEFAULT_RX_POOL_SIZE 16 ++ ++static int cards[MAX_UNITS] = { [0 ... (MAX_UNITS-1)] = 1 }; ++module_param_array(cards, int, NULL, 0444); ++MODULE_PARM_DESC(cards, "array of cards to be supported (e.g. 1,0,1)"); ++/*** RTnet ***/ ++ ++#define DRV_NAME "natsemi-rt" ++#define DRV_VERSION "1.07+LK1.0.17-RTnet-0.2" ++#define DRV_RELDATE "Dec 16, 2003" ++ ++/* Updated to recommendations in pci-skeleton v2.03. */ ++ ++/* The user-configurable values. ++ These may be modified when a driver module is loaded.*/ ++ ++#define NATSEMI_DEF_MSG (NETIF_MSG_DRV | \ ++ NETIF_MSG_LINK | \ ++ NETIF_MSG_WOL | \ ++ NETIF_MSG_RX_ERR | \ ++ NETIF_MSG_TX_ERR) ++static int local_debug = -1; ++ ++/* Maximum events (Rx packets, etc.) to handle at each interrupt. */ ++static int max_interrupt_work = 20; ++static int mtu; ++ ++/* Set the copy breakpoint for the copy-only-tiny-frames scheme. ++ Setting to > 1518 effectively disables this feature. */ ++/*** RTnet *** ++static int rx_copybreak; ++ *** RTnet ***/ ++ ++/* Used to pass the media type, etc. ++ Both 'options[]' and 'full_duplex[]' should exist for driver ++ interoperability. ++ The media type is usually passed in 'options[]'. ++*/ ++static int options[MAX_UNITS]; ++static int full_duplex[MAX_UNITS]; ++ ++/* Operational parameters that are set at compile time. */ ++ ++/* Keep the ring sizes a power of two for compile efficiency. ++ The compiler will convert '%'<2^N> into a bit mask. ++ Making the Tx ring too large decreases the effectiveness of channel ++ bonding and packet priority. ++ There are no ill effects from too-large receive rings. */ ++#define TX_RING_SIZE 16 ++#define TX_QUEUE_LEN 10 /* Limit ring entries actually used, min 4. */ ++#define RX_RING_SIZE 8 /*** RTnet ***/ ++ ++/* Operational parameters that usually are not changed. */ ++/* Time in jiffies before concluding the transmitter is hung. */ ++#define TX_TIMEOUT (2*HZ) ++ ++#define NATSEMI_HW_TIMEOUT 400 ++#define NATSEMI_TIMER_FREQ 3*HZ ++#define NATSEMI_PG0_NREGS 64 ++#define NATSEMI_RFDR_NREGS 8 ++#define NATSEMI_PG1_NREGS 4 ++#define NATSEMI_NREGS (NATSEMI_PG0_NREGS + NATSEMI_RFDR_NREGS + \ ++ NATSEMI_PG1_NREGS) ++#define NATSEMI_REGS_VER 1 /* v1 added RFDR registers */ ++#define NATSEMI_REGS_SIZE (NATSEMI_NREGS * sizeof(u32)) ++#define NATSEMI_EEPROM_SIZE 24 /* 12 16-bit values */ ++ ++#define PKT_BUF_SZ 1536 /* Size of each temporary Rx buffer. */ ++ ++/* These identify the driver base version and may not be removed. */ ++static char version[] = ++ KERN_INFO DRV_NAME " dp8381x driver, version " ++ DRV_VERSION ", " DRV_RELDATE "\n" ++ KERN_INFO " originally by Donald Becker \n" ++ KERN_INFO " http://www.scyld.com/network/natsemi.html\n" ++ KERN_INFO " 2.4.x kernel port by Jeff Garzik, Tjeerd Mulder\n" ++ KERN_INFO " RTnet port by Erik Buit\n"; ++ ++MODULE_AUTHOR("Erik Buit"); ++MODULE_DESCRIPTION("RTnet National Semiconductor DP8381x series PCI Ethernet driver"); ++MODULE_LICENSE("GPL"); ++ ++module_param(max_interrupt_work, int, 0444); ++module_param(mtu, int, 0444); ++module_param_named(debug, local_debug, int, 0444); ++/*** RTnet *** ++MODULE_PARM(rx_copybreak, "i"); ++ *** RTnet ***/ ++module_param_array(options, int, NULL, 0444); ++module_param_array(full_duplex, int, NULL, 0444); ++MODULE_PARM_DESC(max_interrupt_work, ++ "DP8381x maximum events handled per interrupt"); ++MODULE_PARM_DESC(mtu, "DP8381x MTU (all boards)"); ++MODULE_PARM_DESC(debug, "DP8381x default debug level"); ++/*** RTnet *** ++MODULE_PARM_DESC(rx_copybreak, ++ "DP8381x copy breakpoint for copy-only-tiny-frames"); ++ *** RTnet ***/ ++MODULE_PARM_DESC(options, "DP8381x: Bits 0-3: media type, bit 17: full duplex"); ++MODULE_PARM_DESC(full_duplex, "DP8381x full duplex setting(s) (1)"); ++ ++/* ++ Theory of Operation ++ ++I. Board Compatibility ++ ++This driver is designed for National Semiconductor DP83815 PCI Ethernet NIC. ++It also works with other chips in in the DP83810 series. ++ ++II. Board-specific settings ++ ++This driver requires the PCI interrupt line to be valid. ++It honors the EEPROM-set values. ++ ++III. Driver operation ++ ++IIIa. Ring buffers ++ ++This driver uses two statically allocated fixed-size descriptor lists ++formed into rings by a branch from the final descriptor to the beginning of ++the list. The ring sizes are set at compile time by RX/TX_RING_SIZE. ++The NatSemi design uses a 'next descriptor' pointer that the driver forms ++into a list. ++ ++IIIb/c. Transmit/Receive Structure ++ ++This driver uses a zero-copy receive and transmit scheme. ++The driver allocates full frame size skbuffs for the Rx ring buffers at ++open() time and passes the skb->data field to the chip as receive data ++buffers. When an incoming frame is less than RX_COPYBREAK bytes long, ++a fresh skbuff is allocated and the frame is copied to the new skbuff. ++When the incoming frame is larger, the skbuff is passed directly up the ++protocol stack. Buffers consumed this way are replaced by newly allocated ++skbuffs in a later phase of receives. ++ ++The RX_COPYBREAK value is chosen to trade-off the memory wasted by ++using a full-sized skbuff for small frames vs. the copying costs of larger ++frames. New boards are typically used in generously configured machines ++and the underfilled buffers have negligible impact compared to the benefit of ++a single allocation size, so the default value of zero results in never ++copying packets. When copying is done, the cost is usually mitigated by using ++a combined copy/checksum routine. Copying also preloads the cache, which is ++most useful with small frames. ++ ++A subtle aspect of the operation is that unaligned buffers are not permitted ++by the hardware. Thus the IP header at offset 14 in an ethernet frame isn't ++longword aligned for further processing. On copies frames are put into the ++skbuff at an offset of "+2", 16-byte aligning the IP header. ++ ++IIId. Synchronization ++ ++Most operations are synchronized on the np->lock irq spinlock, except the ++performance critical codepaths: ++ ++The rx process only runs in the interrupt handler. Access from outside ++the interrupt handler is only permitted after disable_irq(). ++ ++The rx process usually runs under the dev->xmit_lock. If np->intr_tx_reap ++is set, then access is permitted under spin_lock_irq(&np->lock). ++ ++Thus configuration functions that want to access everything must call ++ disable_irq(dev->irq); ++ spin_lock_bh(dev->xmit_lock); ++ spin_lock_irq(&np->lock); ++ ++IV. Notes ++ ++NatSemi PCI network controllers are very uncommon. ++ ++IVb. References ++ ++http://www.scyld.com/expert/100mbps.html ++http://www.scyld.com/expert/NWay.html ++Datasheet is available from: ++http://www.national.com/pf/DP/DP83815.html ++ ++IVc. Errata ++ ++None characterised. ++*/ ++ ++ ++ ++enum pcistuff { ++ PCI_USES_IO = 0x01, ++ PCI_USES_MEM = 0x02, ++ PCI_USES_MASTER = 0x04, ++ PCI_ADDR0 = 0x08, ++ PCI_ADDR1 = 0x10, ++}; ++ ++/* MMIO operations required */ ++#define PCI_IOTYPE (PCI_USES_MASTER | PCI_USES_MEM | PCI_ADDR1) ++ ++ ++/* array of board data directly indexed by pci_tbl[x].driver_data */ ++static struct { ++ const char *name; ++ unsigned long flags; ++} natsemi_pci_info[] = { ++ { "NatSemi DP8381[56]", PCI_IOTYPE }, ++}; ++ ++static struct pci_device_id natsemi_pci_tbl[] = { ++ { PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_83815, PCI_ANY_ID, PCI_ANY_ID, }, ++ { 0, }, ++}; ++MODULE_DEVICE_TABLE(pci, natsemi_pci_tbl); ++ ++/* Offsets to the device registers. ++ Unlike software-only systems, device drivers interact with complex hardware. ++ It's not useful to define symbolic names for every register bit in the ++ device. ++*/ ++enum register_offsets { ++ ChipCmd = 0x00, ++ ChipConfig = 0x04, ++ EECtrl = 0x08, ++ PCIBusCfg = 0x0C, ++ IntrStatus = 0x10, ++ IntrMask = 0x14, ++ IntrEnable = 0x18, ++ IntrHoldoff = 0x16, /* DP83816 only */ ++ TxRingPtr = 0x20, ++ TxConfig = 0x24, ++ RxRingPtr = 0x30, ++ RxConfig = 0x34, ++ ClkRun = 0x3C, ++ WOLCmd = 0x40, ++ PauseCmd = 0x44, ++ RxFilterAddr = 0x48, ++ RxFilterData = 0x4C, ++ BootRomAddr = 0x50, ++ BootRomData = 0x54, ++ SiliconRev = 0x58, ++ StatsCtrl = 0x5C, ++ StatsData = 0x60, ++ RxPktErrs = 0x60, ++ RxMissed = 0x68, ++ RxCRCErrs = 0x64, ++ BasicControl = 0x80, ++ BasicStatus = 0x84, ++ AnegAdv = 0x90, ++ AnegPeer = 0x94, ++ PhyStatus = 0xC0, ++ MIntrCtrl = 0xC4, ++ MIntrStatus = 0xC8, ++ PhyCtrl = 0xE4, ++ ++ /* These are from the spec, around page 78... on a separate table. ++ * The meaning of these registers depend on the value of PGSEL. */ ++ PGSEL = 0xCC, ++ PMDCSR = 0xE4, ++ TSTDAT = 0xFC, ++ DSPCFG = 0xF4, ++ SDCFG = 0xF8 ++}; ++/* the values for the 'magic' registers above (PGSEL=1) */ ++#define PMDCSR_VAL 0x189c /* enable preferred adaptation circuitry */ ++#define TSTDAT_VAL 0x0 ++#define DSPCFG_VAL 0x5040 ++#define SDCFG_VAL 0x008c /* set voltage thresholds for Signal Detect */ ++#define DSPCFG_LOCK 0x20 /* coefficient lock bit in DSPCFG */ ++#define TSTDAT_FIXED 0xe8 /* magic number for bad coefficients */ ++ ++/* misc PCI space registers */ ++enum pci_register_offsets { ++ PCIPM = 0x44, ++}; ++ ++enum ChipCmd_bits { ++ ChipReset = 0x100, ++ RxReset = 0x20, ++ TxReset = 0x10, ++ RxOff = 0x08, ++ RxOn = 0x04, ++ TxOff = 0x02, ++ TxOn = 0x01, ++}; ++ ++enum ChipConfig_bits { ++ CfgPhyDis = 0x200, ++ CfgPhyRst = 0x400, ++ CfgExtPhy = 0x1000, ++ CfgAnegEnable = 0x2000, ++ CfgAneg100 = 0x4000, ++ CfgAnegFull = 0x8000, ++ CfgAnegDone = 0x8000000, ++ CfgFullDuplex = 0x20000000, ++ CfgSpeed100 = 0x40000000, ++ CfgLink = 0x80000000, ++}; ++ ++enum EECtrl_bits { ++ EE_ShiftClk = 0x04, ++ EE_DataIn = 0x01, ++ EE_ChipSelect = 0x08, ++ EE_DataOut = 0x02, ++}; ++ ++enum PCIBusCfg_bits { ++ EepromReload = 0x4, ++}; ++ ++/* Bits in the interrupt status/mask registers. */ ++enum IntrStatus_bits { ++ IntrRxDone = 0x0001, ++ IntrRxIntr = 0x0002, ++ IntrRxErr = 0x0004, ++ IntrRxEarly = 0x0008, ++ IntrRxIdle = 0x0010, ++ IntrRxOverrun = 0x0020, ++ IntrTxDone = 0x0040, ++ IntrTxIntr = 0x0080, ++ IntrTxErr = 0x0100, ++ IntrTxIdle = 0x0200, ++ IntrTxUnderrun = 0x0400, ++ StatsMax = 0x0800, ++ SWInt = 0x1000, ++ WOLPkt = 0x2000, ++ LinkChange = 0x4000, ++ IntrHighBits = 0x8000, ++ RxStatusFIFOOver = 0x10000, ++ IntrPCIErr = 0xf00000, ++ RxResetDone = 0x1000000, ++ TxResetDone = 0x2000000, ++ IntrAbnormalSummary = 0xCD20, ++}; ++ ++/* ++ * Default Interrupts: ++ * Rx OK, Rx Packet Error, Rx Overrun, ++ * Tx OK, Tx Packet Error, Tx Underrun, ++ * MIB Service, Phy Interrupt, High Bits, ++ * Rx Status FIFO overrun, ++ * Received Target Abort, Received Master Abort, ++ * Signalled System Error, Received Parity Error ++ */ ++#define DEFAULT_INTR 0x00f1cd65 ++ ++enum TxConfig_bits { ++ TxDrthMask = 0x3f, ++ TxFlthMask = 0x3f00, ++ TxMxdmaMask = 0x700000, ++ TxMxdma_512 = 0x0, ++ TxMxdma_4 = 0x100000, ++ TxMxdma_8 = 0x200000, ++ TxMxdma_16 = 0x300000, ++ TxMxdma_32 = 0x400000, ++ TxMxdma_64 = 0x500000, ++ TxMxdma_128 = 0x600000, ++ TxMxdma_256 = 0x700000, ++ TxCollRetry = 0x800000, ++ TxAutoPad = 0x10000000, ++ TxMacLoop = 0x20000000, ++ TxHeartIgn = 0x40000000, ++ TxCarrierIgn = 0x80000000 ++}; ++ ++enum RxConfig_bits { ++ RxDrthMask = 0x3e, ++ RxMxdmaMask = 0x700000, ++ RxMxdma_512 = 0x0, ++ RxMxdma_4 = 0x100000, ++ RxMxdma_8 = 0x200000, ++ RxMxdma_16 = 0x300000, ++ RxMxdma_32 = 0x400000, ++ RxMxdma_64 = 0x500000, ++ RxMxdma_128 = 0x600000, ++ RxMxdma_256 = 0x700000, ++ RxAcceptLong = 0x8000000, ++ RxAcceptTx = 0x10000000, ++ RxAcceptRunt = 0x40000000, ++ RxAcceptErr = 0x80000000 ++}; ++ ++enum ClkRun_bits { ++ PMEEnable = 0x100, ++ PMEStatus = 0x8000, ++}; ++ ++enum WolCmd_bits { ++ WakePhy = 0x1, ++ WakeUnicast = 0x2, ++ WakeMulticast = 0x4, ++ WakeBroadcast = 0x8, ++ WakeArp = 0x10, ++ WakePMatch0 = 0x20, ++ WakePMatch1 = 0x40, ++ WakePMatch2 = 0x80, ++ WakePMatch3 = 0x100, ++ WakeMagic = 0x200, ++ WakeMagicSecure = 0x400, ++ SecureHack = 0x100000, ++ WokePhy = 0x400000, ++ WokeUnicast = 0x800000, ++ WokeMulticast = 0x1000000, ++ WokeBroadcast = 0x2000000, ++ WokeArp = 0x4000000, ++ WokePMatch0 = 0x8000000, ++ WokePMatch1 = 0x10000000, ++ WokePMatch2 = 0x20000000, ++ WokePMatch3 = 0x40000000, ++ WokeMagic = 0x80000000, ++ WakeOptsSummary = 0x7ff ++}; ++ ++enum RxFilterAddr_bits { ++ RFCRAddressMask = 0x3ff, ++ AcceptMulticast = 0x00200000, ++ AcceptMyPhys = 0x08000000, ++ AcceptAllPhys = 0x10000000, ++ AcceptAllMulticast = 0x20000000, ++ AcceptBroadcast = 0x40000000, ++ RxFilterEnable = 0x80000000 ++}; ++ ++enum StatsCtrl_bits { ++ StatsWarn = 0x1, ++ StatsFreeze = 0x2, ++ StatsClear = 0x4, ++ StatsStrobe = 0x8, ++}; ++ ++enum MIntrCtrl_bits { ++ MICRIntEn = 0x2, ++}; ++ ++enum PhyCtrl_bits { ++ PhyAddrMask = 0xf, ++}; ++ ++/* values we might find in the silicon revision register */ ++#define SRR_DP83815_C 0x0302 ++#define SRR_DP83815_D 0x0403 ++#define SRR_DP83816_A4 0x0504 ++#define SRR_DP83816_A5 0x0505 ++ ++/* The Rx and Tx buffer descriptors. */ ++/* Note that using only 32 bit fields simplifies conversion to big-endian ++ architectures. */ ++struct netdev_desc { ++ u32 next_desc; ++ s32 cmd_status; ++ u32 addr; ++ u32 software_use; ++}; ++ ++/* Bits in network_desc.status */ ++enum desc_status_bits { ++ DescOwn=0x80000000, DescMore=0x40000000, DescIntr=0x20000000, ++ DescNoCRC=0x10000000, DescPktOK=0x08000000, ++ DescSizeMask=0xfff, ++ ++ DescTxAbort=0x04000000, DescTxFIFO=0x02000000, ++ DescTxCarrier=0x01000000, DescTxDefer=0x00800000, ++ DescTxExcDefer=0x00400000, DescTxOOWCol=0x00200000, ++ DescTxExcColl=0x00100000, DescTxCollCount=0x000f0000, ++ ++ DescRxAbort=0x04000000, DescRxOver=0x02000000, ++ DescRxDest=0x01800000, DescRxLong=0x00400000, ++ DescRxRunt=0x00200000, DescRxInvalid=0x00100000, ++ DescRxCRC=0x00080000, DescRxAlign=0x00040000, ++ DescRxLoop=0x00020000, DesRxColl=0x00010000, ++}; ++ ++struct netdev_private { ++ /* Descriptor rings first for alignment */ ++ dma_addr_t ring_dma; ++ struct netdev_desc *rx_ring; ++ struct netdev_desc *tx_ring; ++ /* The addresses of receive-in-place skbuffs */ ++ struct rtskb *rx_skbuff[RX_RING_SIZE]; /*** RTnet ***/ ++ dma_addr_t rx_dma[RX_RING_SIZE]; ++ /* address of a sent-in-place packet/buffer, for later free() */ ++ struct rtskb *tx_skbuff[TX_RING_SIZE]; /*** RTnet ***/ ++ dma_addr_t tx_dma[TX_RING_SIZE]; ++ struct net_device_stats stats; ++ /* Media monitoring timer */ ++ struct timer_list timer; ++ /* Frequently used values: keep some adjacent for cache effect */ ++ struct pci_dev *pci_dev; ++ struct netdev_desc *rx_head_desc; ++ /* Producer/consumer ring indices */ ++ unsigned int cur_rx, dirty_rx; ++ unsigned int cur_tx, dirty_tx; ++ /* Based on MTU+slack. */ ++ unsigned int rx_buf_sz; ++ int oom; ++ /* Do not touch the nic registers */ ++ int hands_off; ++ /* These values are keep track of the transceiver/media in use */ ++ unsigned int full_duplex; ++ /* Rx filter */ ++ u32 cur_rx_mode; ++ u32 rx_filter[16]; ++ /* FIFO and PCI burst thresholds */ ++ u32 tx_config, rx_config; ++ /* original contents of ClkRun register */ ++ u32 SavedClkRun; ++ /* silicon revision */ ++ u32 srr; ++ /* expected DSPCFG value */ ++ u16 dspcfg; ++ /* MII transceiver section */ ++ u16 advertising; ++ unsigned int iosize; ++ rtdm_lock_t lock; ++ u32 msg_enable; ++ ++ rtdm_irq_t irq_handle; ++}; ++ ++static int eeprom_read(long ioaddr, int location); ++static int mdio_read(struct rtnet_device *dev, int phy_id, int reg); ++/*static void mdio_write(struct rtnet_device *dev, int phy_id, int reg, u16 data);*/ ++static void natsemi_reset(struct rtnet_device *dev); ++static void natsemi_reload_eeprom(struct rtnet_device *dev); ++static void natsemi_stop_rxtx(struct rtnet_device *dev); ++static int netdev_open(struct rtnet_device *dev); ++static void do_cable_magic(struct rtnet_device *dev); ++static void undo_cable_magic(struct rtnet_device *dev); ++static void check_link(struct rtnet_device *dev); ++/*static void netdev_timer(unsigned long data);*/ ++static void dump_ring(struct rtnet_device *dev); ++/*static void tx_timeout(struct rtnet_device *dev);*/ ++static int alloc_ring(struct rtnet_device *dev); ++static void refill_rx(struct rtnet_device *dev); ++static void init_ring(struct rtnet_device *dev); ++static void drain_tx(struct rtnet_device *dev); ++static void drain_ring(struct rtnet_device *dev); ++static void free_ring(struct rtnet_device *dev); ++/*static void reinit_ring(struct rtnet_device *dev);*/ ++static void init_registers(struct rtnet_device *dev); ++static int start_tx(struct rtskb *skb, struct rtnet_device *dev); ++static int intr_handler(rtdm_irq_t *irq_handle); ++static void netdev_error(struct rtnet_device *dev, int intr_status); ++static void netdev_rx(struct rtnet_device *dev, nanosecs_abs_t *time_stamp); ++static void netdev_tx_done(struct rtnet_device *dev); ++static void __set_rx_mode(struct rtnet_device *dev); ++/*static void set_rx_mode(struct rtnet_device *dev);*/ ++static void __get_stats(struct rtnet_device *rtdev); ++static struct net_device_stats *get_stats(struct rtnet_device *dev); ++/*static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); ++static int netdev_set_wol(struct rtnet_device *dev, u32 newval); ++static int netdev_get_wol(struct rtnet_device *dev, u32 *supported, u32 *cur); ++static int netdev_set_sopass(struct rtnet_device *dev, u8 *newval); ++static int netdev_get_sopass(struct rtnet_device *dev, u8 *data); ++static int netdev_get_ecmd(struct rtnet_device *dev, struct ethtool_cmd *ecmd); ++static int netdev_set_ecmd(struct rtnet_device *dev, struct ethtool_cmd *ecmd); ++static void enable_wol_mode(struct rtnet_device *dev, int enable_intr);*/ ++static int netdev_close(struct rtnet_device *dev); ++/*static int netdev_get_regs(struct rtnet_device *dev, u8 *buf); ++static int netdev_get_eeprom(struct rtnet_device *dev, u8 *buf);*/ ++ ++ ++static int natsemi_probe1 (struct pci_dev *pdev, ++ const struct pci_device_id *ent) ++{ ++ struct rtnet_device *dev; /*** RTnet ***/ ++ struct netdev_private *np; ++ int i, option, irq, chip_idx = ent->driver_data; ++ static int find_cnt = -1; ++ unsigned long ioaddr, iosize; ++ const int pcibar = 1; /* PCI base address register */ ++ int prev_eedata; ++ u32 tmp; ++ ++/* when built into the kernel, we only print version if device is found */ ++#ifndef MODULE ++ static int printed_version; ++ if (!printed_version++) ++ rtdm_printk(version); ++#endif ++ ++ i = pci_enable_device(pdev); ++ if (i) return i; ++ ++ /* natsemi has a non-standard PM control register ++ * in PCI config space. Some boards apparently need ++ * to be brought to D0 in this manner. ++ */ ++ pci_read_config_dword(pdev, PCIPM, &tmp); ++ if (tmp & PCI_PM_CTRL_STATE_MASK) { ++ /* D0 state, disable PME assertion */ ++ u32 newtmp = tmp & ~PCI_PM_CTRL_STATE_MASK; ++ pci_write_config_dword(pdev, PCIPM, newtmp); ++ } ++ ++ find_cnt++; ++ ioaddr = pci_resource_start(pdev, pcibar); ++ iosize = pci_resource_len(pdev, pcibar); ++ irq = pdev->irq; ++ ++/*** RTnet ***/ ++ if (cards[find_cnt] == 0) ++ goto err_out; ++/*** RTnet ***/ ++ ++ if (natsemi_pci_info[chip_idx].flags & PCI_USES_MASTER) ++ pci_set_master(pdev); ++ ++/*** RTnet ***/ ++ dev = rt_alloc_etherdev(sizeof(struct netdev_private), ++ RX_RING_SIZE * 2 + TX_RING_SIZE); ++ if (dev == NULL) { ++ rtdm_printk(KERN_ERR "init_ethernet failed for card #%d\n", find_cnt); ++ goto err_out; ++ } ++ rtdev_alloc_name(dev, "rteth%d"); ++ rt_rtdev_connect(dev, &RTDEV_manager); ++ dev->vers = RTDEV_VERS_2_0; ++ dev->sysbind = &pdev->dev; ++/*** RTnet ***/ ++ ++ i = pci_request_regions(pdev, dev->name); ++ if (i) { ++/*** RTnet ***/ ++ rt_rtdev_disconnect(dev); ++ rtdev_free(dev); ++/*** RTnet ***/ ++ return i; ++ } ++ ++ { ++ void *mmio = ioremap (ioaddr, iosize); ++ if (!mmio) { ++ pci_release_regions(pdev); ++/*** RTnet ***/ ++ rt_rtdev_disconnect(dev); ++ rtdev_free(dev); ++/*** RTnet ***/ ++ return -ENOMEM; ++ } ++ ioaddr = (unsigned long) mmio; ++ } ++ ++ /* Work around the dropped serial bit. */ ++ prev_eedata = eeprom_read(ioaddr, 6); ++ for (i = 0; i < 3; i++) { ++ int eedata = eeprom_read(ioaddr, i + 7); ++ dev->dev_addr[i*2] = (eedata << 1) + (prev_eedata >> 15); ++ dev->dev_addr[i*2+1] = eedata >> 7; ++ prev_eedata = eedata; ++ } ++ ++ dev->base_addr = ioaddr; ++ dev->irq = irq; ++ ++ np = dev->priv; ++ ++ np->pci_dev = pdev; ++ pci_set_drvdata(pdev, dev); ++ np->iosize = iosize; ++ rtdm_lock_init(&np->lock); ++ np->msg_enable = (local_debug >= 0) ? (1<hands_off = 0; ++ ++ /* Reset the chip to erase previous misconfiguration. */ ++ natsemi_reload_eeprom(dev); ++ natsemi_reset(dev); ++ ++ option = find_cnt < MAX_UNITS ? options[find_cnt] : 0; ++ if (dev->mem_start) ++ option = dev->mem_start; ++ ++ /* The lower four bits are the media type. */ ++ if (option) { ++ if (option & 0x200) ++ np->full_duplex = 1; ++ if (option & 15) ++ rtdm_printk(KERN_INFO ++ "%s: ignoring user supplied media type %d", ++ dev->name, option & 15); ++ } ++ if (find_cnt < MAX_UNITS && full_duplex[find_cnt]) ++ np->full_duplex = 1; ++ ++ /* The chip-specific entries in the device structure. */ ++ dev->open = &netdev_open; ++ dev->hard_start_xmit = &start_tx; ++ dev->stop = &netdev_close; ++ dev->get_stats = &get_stats; ++/*** RTnet *** ++ dev->set_multicast_list = &set_rx_mode; ++ dev->do_ioctl = &netdev_ioctl; ++ dev->tx_timeout = &tx_timeout; ++ dev->watchdog_timeo = TX_TIMEOUT; ++ *** RTnet ***/ ++ ++ if (mtu) ++ dev->mtu = mtu; ++ ++/*** RTnet ***/ ++ i = rt_register_rtnetdev(dev); ++ if (i) { ++ goto err_out_unmap; ++ } ++/*** RTnet ***/ ++ ++ rtnetif_carrier_off(dev); ++ ++ if (netif_msg_drv(np)) { ++ rtdm_printk(KERN_INFO "%s: %s at %#08lx, ", ++ dev->name, natsemi_pci_info[chip_idx].name, ioaddr); ++ for (i = 0; i < ETH_ALEN-1; i++) ++ rtdm_printk("%02x:", dev->dev_addr[i]); ++ rtdm_printk("%02x, IRQ %d.\n", dev->dev_addr[i], irq); ++ } ++ ++ np->advertising = mdio_read(dev, 1, MII_ADVERTISE); ++ if ((readl((void *)(ioaddr + ChipConfig)) & 0xe000) != 0xe000 ++ && netif_msg_probe(np)) { ++ u32 chip_config = readl((void *)(ioaddr + ChipConfig)); ++ rtdm_printk(KERN_INFO "%s: Transceiver default autonegotiation %s " ++ "10%s %s duplex.\n", ++ dev->name, ++ chip_config & CfgAnegEnable ? ++ "enabled, advertise" : "disabled, force", ++ chip_config & CfgAneg100 ? "0" : "", ++ chip_config & CfgAnegFull ? "full" : "half"); ++ } ++ if (netif_msg_probe(np)) ++ rtdm_printk(KERN_INFO ++ "%s: Transceiver status %#04x advertising %#04x.\n", ++ dev->name, mdio_read(dev, 1, MII_BMSR), ++ np->advertising); ++ ++ /* save the silicon revision for later querying */ ++ np->srr = readl((void *)(ioaddr + SiliconRev)); ++ if (netif_msg_hw(np)) ++ rtdm_printk(KERN_INFO "%s: silicon revision %#04x.\n", ++ dev->name, np->srr); ++ ++ ++ return 0; ++ ++err_out_unmap: ++#ifdef USE_MEM ++ iounmap((void *)ioaddr); ++err_out_free_res: ++#endif ++ pci_release_regions(pdev); ++/*err_out_free_netdev:*/ ++/*** RTnet ***/ ++ rt_rtdev_disconnect(dev); ++ rtdev_free(dev); ++/*** RTnet ***/ ++err_out: ++ return -ENODEV; ++ ++} ++ ++ ++/* Read the EEPROM and MII Management Data I/O (MDIO) interfaces. ++ The EEPROM code is for the common 93c06/46 EEPROMs with 6 bit addresses. */ ++ ++/* Delay between EEPROM clock transitions. ++ No extra delay is needed with 33Mhz PCI, but future 66Mhz access may need ++ a delay. Note that pre-2.0.34 kernels had a cache-alignment bug that ++ made udelay() unreliable. ++ The old method of using an ISA access as a delay, __SLOW_DOWN_IO__, is ++ depricated. ++*/ ++#define eeprom_delay(ee_addr) readl((void *)(ee_addr)) ++ ++#define EE_Write0 (EE_ChipSelect) ++#define EE_Write1 (EE_ChipSelect | EE_DataIn) ++ ++/* The EEPROM commands include the alway-set leading bit. */ ++enum EEPROM_Cmds { ++ EE_WriteCmd=(5 << 6), EE_ReadCmd=(6 << 6), EE_EraseCmd=(7 << 6), ++}; ++ ++static int eeprom_read(long addr, int location) ++{ ++ int i; ++ int retval = 0; ++ long ee_addr = addr + EECtrl; ++ int read_cmd = location | EE_ReadCmd; ++ writel(EE_Write0, (void *)ee_addr); ++ ++ /* Shift the read command bits out. */ ++ for (i = 10; i >= 0; i--) { ++ short dataval = (read_cmd & (1 << i)) ? EE_Write1 : EE_Write0; ++ writel(dataval, (void *)ee_addr); ++ eeprom_delay(ee_addr); ++ writel(dataval | EE_ShiftClk, (void *)ee_addr); ++ eeprom_delay(ee_addr); ++ } ++ writel(EE_ChipSelect, (void *)ee_addr); ++ eeprom_delay(ee_addr); ++ ++ for (i = 0; i < 16; i++) { ++ writel(EE_ChipSelect | EE_ShiftClk, (void *)ee_addr); ++ eeprom_delay(ee_addr); ++ retval |= (readl((void *)ee_addr) & EE_DataOut) ? 1 << i : 0; ++ writel(EE_ChipSelect, (void *)ee_addr); ++ eeprom_delay(ee_addr); ++ } ++ ++ /* Terminate the EEPROM access. */ ++ writel(EE_Write0, (void *)ee_addr); ++ writel(0, (void *)ee_addr); ++ return retval; ++} ++ ++/* MII transceiver control section. ++ * The 83815 series has an internal transceiver, and we present the ++ * management registers as if they were MII connected. */ ++ ++static int mdio_read(struct rtnet_device *dev, int phy_id, int reg) ++{ ++ if (phy_id == 1 && reg < 32) ++ return readl((void *)(dev->base_addr+BasicControl+(reg<<2)))&0xffff; ++ else ++ return 0xffff; ++} ++/*** RTnet ++static void mdio_write(struct rtnet_device *dev, int phy_id, int reg, u16 data) ++{ ++ struct netdev_private *np = dev->priv; ++ if (phy_id == 1 && reg < 32) { ++ writew(data, dev->base_addr+BasicControl+(reg<<2)); ++ switch (reg) { ++ case MII_ADVERTISE: np->advertising = data; break; ++ } ++ } ++} ++RTnet ***/ ++/* CFG bits [13:16] [18:23] */ ++#define CFG_RESET_SAVE 0xfde000 ++/* WCSR bits [0:4] [9:10] */ ++#define WCSR_RESET_SAVE 0x61f ++/* RFCR bits [20] [22] [27:31] */ ++#define RFCR_RESET_SAVE 0xf8500000; ++ ++static void natsemi_reset(struct rtnet_device *dev) ++{ ++ int i; ++ u32 cfg; ++ u32 wcsr; ++ u32 rfcr; ++ u16 pmatch[3]; ++ u16 sopass[3]; ++ struct netdev_private *np = dev->priv; ++ ++ /* ++ * Resetting the chip causes some registers to be lost. ++ * Natsemi suggests NOT reloading the EEPROM while live, so instead ++ * we save the state that would have been loaded from EEPROM ++ * on a normal power-up (see the spec EEPROM map). This assumes ++ * whoever calls this will follow up with init_registers() eventually. ++ */ ++ ++ /* CFG */ ++ cfg = readl((void *)(dev->base_addr + ChipConfig)) & CFG_RESET_SAVE; ++ /* WCSR */ ++ wcsr = readl((void *)(dev->base_addr + WOLCmd)) & WCSR_RESET_SAVE; ++ /* RFCR */ ++ rfcr = readl((void *)(dev->base_addr + RxFilterAddr)) & RFCR_RESET_SAVE; ++ /* PMATCH */ ++ for (i = 0; i < 3; i++) { ++ writel(i*2, (void *)(dev->base_addr + RxFilterAddr)); ++ pmatch[i] = readw((void *)(dev->base_addr + RxFilterData)); ++ } ++ /* SOPAS */ ++ for (i = 0; i < 3; i++) { ++ writel(0xa+(i*2), (void *)(dev->base_addr + RxFilterAddr)); ++ sopass[i] = readw((void *)(dev->base_addr + RxFilterData)); ++ } ++ ++ /* now whack the chip */ ++ writel(ChipReset, (void *)(dev->base_addr + ChipCmd)); ++ for (i=0;ibase_addr + ChipCmd)) & ChipReset)) ++ break; ++ udelay(5); ++ } ++ if (i==NATSEMI_HW_TIMEOUT) { ++ rtdm_printk(KERN_WARNING "%s: reset did not complete in %d usec.\n", ++ dev->name, i*5); ++ } else if (netif_msg_hw(np)) { ++ rtdm_printk(KERN_DEBUG "%s: reset completed in %d usec.\n", ++ dev->name, i*5); ++ } ++ ++ /* restore CFG */ ++ cfg |= readl((void *)(dev->base_addr + ChipConfig)) & ~CFG_RESET_SAVE; ++ writel(cfg, (void *)(dev->base_addr + ChipConfig)); ++ /* restore WCSR */ ++ wcsr |= readl((void *)(dev->base_addr + WOLCmd)) & ~WCSR_RESET_SAVE; ++ writel(wcsr, (void *)(dev->base_addr + WOLCmd)); ++ /* read RFCR */ ++ rfcr |= readl((void *)(dev->base_addr + RxFilterAddr)) & ~RFCR_RESET_SAVE; ++ /* restore PMATCH */ ++ for (i = 0; i < 3; i++) { ++ writel(i*2, (void *)(dev->base_addr + RxFilterAddr)); ++ writew(pmatch[i], (void *)(dev->base_addr + RxFilterData)); ++ } ++ for (i = 0; i < 3; i++) { ++ writel(0xa+(i*2), (void *)(dev->base_addr + RxFilterAddr)); ++ writew(sopass[i], (void *)(dev->base_addr + RxFilterData)); ++ } ++ /* restore RFCR */ ++ writel(rfcr, (void *)(dev->base_addr + RxFilterAddr)); ++} ++ ++static void natsemi_reload_eeprom(struct rtnet_device *dev) ++{ ++ struct netdev_private *np = dev->priv; ++ int i; ++ ++ writel(EepromReload, (void *)(dev->base_addr + PCIBusCfg)); ++ for (i=0;ibase_addr + PCIBusCfg)) & EepromReload)) ++ break; ++ } ++ if (i==NATSEMI_HW_TIMEOUT) { ++ rtdm_printk(KERN_WARNING "%s: EEPROM did not reload in %d usec.\n", ++ dev->name, i*50); ++ } else if (netif_msg_hw(np)) { ++ rtdm_printk(KERN_DEBUG "%s: EEPROM reloaded in %d usec.\n", ++ dev->name, i*50); ++ } ++} ++ ++static void natsemi_stop_rxtx(struct rtnet_device *dev) ++{ ++ long ioaddr = dev->base_addr; ++ struct netdev_private *np = dev->priv; ++ int i; ++ ++ writel(RxOff | TxOff, (void *)(ioaddr + ChipCmd)); ++ for(i=0;i< NATSEMI_HW_TIMEOUT;i++) { ++ if ((readl((void *)(ioaddr + ChipCmd)) & (TxOn|RxOn)) == 0) ++ break; ++ udelay(5); ++ } ++ if (i==NATSEMI_HW_TIMEOUT) { ++ rtdm_printk(KERN_WARNING "%s: Tx/Rx process did not stop in %d usec.\n", ++ dev->name, i*5); ++ } else if (netif_msg_hw(np)) { ++ rtdm_printk(KERN_DEBUG "%s: Tx/Rx process stopped in %d usec.\n", ++ dev->name, i*5); ++ } ++} ++ ++static int netdev_open(struct rtnet_device *dev) ++{ ++ struct netdev_private *np = dev->priv; ++ long ioaddr = dev->base_addr; ++ int i; ++ ++ /* Reset the chip, just in case. */ ++ natsemi_reset(dev); ++ ++/*** RTnet ***/ ++ rt_stack_connect(dev, &STACK_manager); ++ i = rtdm_irq_request(&np->irq_handle, dev->irq, intr_handler, ++ RTDM_IRQTYPE_SHARED, "rt_natsemi", dev); ++/*** RTnet ***/ ++/* i = request_irq(dev->irq, &intr_handler, SA_SHIRQ, dev->name, dev);*/ ++ if (i) { ++ return i; ++ } ++ ++ if (netif_msg_ifup(np)) ++ rtdm_printk(KERN_DEBUG "%s: netdev_open() irq %d.\n", ++ dev->name, dev->irq); ++ i = alloc_ring(dev); ++ if (i < 0) { ++ rtdm_irq_free(&np->irq_handle); ++ return i; ++ } ++ init_ring(dev); ++ init_registers(dev); ++ /* now set the MAC address according to dev->dev_addr */ ++ for (i = 0; i < 3; i++) { ++ u16 mac = (dev->dev_addr[2*i+1]<<8) + dev->dev_addr[2*i]; ++ ++ writel(i*2, (void *)(ioaddr + RxFilterAddr)); ++ writew(mac, (void *)(ioaddr + RxFilterData)); ++ } ++ writel(np->cur_rx_mode, (void *)(ioaddr + RxFilterAddr)); ++ ++ rtnetif_start_queue(dev); /*** RTnet ***/ ++ ++ if (netif_msg_ifup(np)) ++ rtdm_printk(KERN_DEBUG "%s: Done netdev_open(), status: %#08x.\n", ++ dev->name, (int)readl((void *)(ioaddr + ChipCmd))); ++ ++/*** RTnet ***/ ++ /* Set the timer to check for link beat. */ ++/*** RTnet ***/ ++ ++ return 0; ++} ++ ++static void do_cable_magic(struct rtnet_device *dev) ++{ ++ struct netdev_private *np = dev->priv; ++ ++ if (np->srr >= SRR_DP83816_A5) ++ return; ++ ++ /* ++ * 100 MBit links with short cables can trip an issue with the chip. ++ * The problem manifests as lots of CRC errors and/or flickering ++ * activity LED while idle. This process is based on instructions ++ * from engineers at National. ++ */ ++ if (readl((void *)(dev->base_addr + ChipConfig)) & CfgSpeed100) { ++ u16 data; ++ ++ writew(1, (void *)(dev->base_addr + PGSEL)); ++ /* ++ * coefficient visibility should already be enabled via ++ * DSPCFG | 0x1000 ++ */ ++ data = readw((void *)(dev->base_addr + TSTDAT)) & 0xff; ++ /* ++ * the value must be negative, and within certain values ++ * (these values all come from National) ++ */ ++ if (!(data & 0x80) || ((data >= 0xd8) && (data <= 0xff))) { ++ struct netdev_private *np = dev->priv; ++ ++ /* the bug has been triggered - fix the coefficient */ ++ writew(TSTDAT_FIXED, (void *)(dev->base_addr + TSTDAT)); ++ /* lock the value */ ++ data = readw((void *)(dev->base_addr + DSPCFG)); ++ np->dspcfg = data | DSPCFG_LOCK; ++ writew(np->dspcfg, (void *)(dev->base_addr + DSPCFG)); ++ } ++ writew(0, (void *)(dev->base_addr + PGSEL)); ++ } ++} ++ ++static void undo_cable_magic(struct rtnet_device *dev) ++{ ++ u16 data; ++ struct netdev_private *np = dev->priv; ++ ++ if (np->srr >= SRR_DP83816_A5) ++ return; ++ ++ writew(1, (void *)(dev->base_addr + PGSEL)); ++ /* make sure the lock bit is clear */ ++ data = readw((void *)(dev->base_addr + DSPCFG)); ++ np->dspcfg = data & ~DSPCFG_LOCK; ++ writew(np->dspcfg, (void *)(dev->base_addr + DSPCFG)); ++ writew(0, (void *)(dev->base_addr + PGSEL)); ++} ++ ++static void check_link(struct rtnet_device *dev) ++{ ++ struct netdev_private *np = dev->priv; ++ long ioaddr = dev->base_addr; ++ int duplex; ++ int chipcfg = readl((void *)(ioaddr + ChipConfig)); ++ ++ if (!(chipcfg & CfgLink)) { ++ if (rtnetif_carrier_ok(dev)) { ++ if (netif_msg_link(np)) ++ rtdm_printk(KERN_NOTICE "%s: link down.\n", ++ dev->name); ++ rtnetif_carrier_off(dev); ++ undo_cable_magic(dev); ++ } ++ return; ++ } ++ if (!rtnetif_carrier_ok(dev)) { ++ if (netif_msg_link(np)) ++ rtdm_printk(KERN_NOTICE "%s: link up.\n", dev->name); ++ rtnetif_carrier_on(dev); ++ do_cable_magic(dev); ++ } ++ ++ duplex = np->full_duplex || (chipcfg & CfgFullDuplex ? 1 : 0); ++ ++ /* if duplex is set then bit 28 must be set, too */ ++ if (duplex ^ !!(np->rx_config & RxAcceptTx)) { ++ if (netif_msg_link(np)) ++ rtdm_printk(KERN_INFO ++ "%s: Setting %s-duplex based on negotiated " ++ "link capability.\n", dev->name, ++ duplex ? "full" : "half"); ++ if (duplex) { ++ np->rx_config |= RxAcceptTx; ++ np->tx_config |= TxCarrierIgn | TxHeartIgn; ++ } else { ++ np->rx_config &= ~RxAcceptTx; ++ np->tx_config &= ~(TxCarrierIgn | TxHeartIgn); ++ } ++ writel(np->tx_config, (void *)(ioaddr + TxConfig)); ++ writel(np->rx_config, (void *)(ioaddr + RxConfig)); ++ } ++} ++ ++static void init_registers(struct rtnet_device *dev) ++{ ++ struct netdev_private *np = dev->priv; ++ long ioaddr = dev->base_addr; ++ int i; ++ ++ for (i=0;ibase_addr + ChipConfig)) & CfgAnegDone) ++ break; ++ udelay(10); ++ } ++ if (i==NATSEMI_HW_TIMEOUT && netif_msg_link(np)) { ++ rtdm_printk(KERN_INFO ++ "%s: autonegotiation did not complete in %d usec.\n", ++ dev->name, i*10); ++ } ++ ++ /* On page 78 of the spec, they recommend some settings for "optimum ++ performance" to be done in sequence. These settings optimize some ++ of the 100Mbit autodetection circuitry. They say we only want to ++ do this for rev C of the chip, but engineers at NSC (Bradley ++ Kennedy) recommends always setting them. If you don't, you get ++ errors on some autonegotiations that make the device unusable. ++ */ ++ writew(1, (void *)(ioaddr + PGSEL)); ++ writew(PMDCSR_VAL, (void *)(ioaddr + PMDCSR)); ++ writew(TSTDAT_VAL, (void *)(ioaddr + TSTDAT)); ++ writew(DSPCFG_VAL, (void *)(ioaddr + DSPCFG)); ++ writew(SDCFG_VAL, (void *)(ioaddr + SDCFG)); ++ writew(0, (void *)(ioaddr + PGSEL)); ++ np->dspcfg = DSPCFG_VAL; ++ ++ /* Enable PHY Specific event based interrupts. Link state change ++ and Auto-Negotiation Completion are among the affected. ++ Read the intr status to clear it (needed for wake events). ++ */ ++ readw((void *)(ioaddr + MIntrStatus)); ++ writew(MICRIntEn, (void *)(ioaddr + MIntrCtrl)); ++ ++ /* clear any interrupts that are pending, such as wake events */ ++ readl((void *)(ioaddr + IntrStatus)); ++ ++ writel(np->ring_dma, (void *)(ioaddr + RxRingPtr)); ++ writel(np->ring_dma + RX_RING_SIZE * sizeof(struct netdev_desc), ++ (void *)(ioaddr + TxRingPtr)); ++ ++ /* Initialize other registers. ++ * Configure the PCI bus bursts and FIFO thresholds. ++ * Configure for standard, in-spec Ethernet. ++ * Start with half-duplex. check_link will update ++ * to the correct settings. ++ */ ++ ++ /* DRTH: 2: start tx if 64 bytes are in the fifo ++ * FLTH: 0x10: refill with next packet if 512 bytes are free ++ * MXDMA: 0: up to 256 byte bursts. ++ * MXDMA must be <= FLTH ++ * ECRETRY=1 ++ * ATP=1 ++ */ ++ np->tx_config = TxAutoPad | TxCollRetry | TxMxdma_256 | (0x1002); ++ writel(np->tx_config, (void *)(ioaddr + TxConfig)); ++ ++ /* DRTH 0x10: start copying to memory if 128 bytes are in the fifo ++ * MXDMA 0: up to 256 byte bursts ++ */ ++ np->rx_config = RxMxdma_256 | 0x20; ++ writel(np->rx_config, (void *)(ioaddr + RxConfig)); ++ ++ /* Disable PME: ++ * The PME bit is initialized from the EEPROM contents. ++ * PCI cards probably have PME disabled, but motherboard ++ * implementations may have PME set to enable WakeOnLan. ++ * With PME set the chip will scan incoming packets but ++ * nothing will be written to memory. */ ++ np->SavedClkRun = readl((void *)(ioaddr + ClkRun)); ++ writel(np->SavedClkRun & ~PMEEnable, (void *)(ioaddr + ClkRun)); ++ if (np->SavedClkRun & PMEStatus && netif_msg_wol(np)) { ++ rtdm_printk(KERN_NOTICE "%s: Wake-up event %#08x\n", ++ dev->name, readl((void *)(ioaddr + WOLCmd))); ++ } ++ ++ check_link(dev); ++ __set_rx_mode(dev); ++ ++ /* Enable interrupts by setting the interrupt mask. */ ++ writel(DEFAULT_INTR, (void *)(ioaddr + IntrMask)); ++ writel(1, (void *)(ioaddr + IntrEnable)); ++ ++ writel(RxOn | TxOn, (void *)(ioaddr + ChipCmd)); ++ writel(StatsClear, (void *)(ioaddr + StatsCtrl)); /* Clear Stats */ ++} ++ ++/* ++ * netdev_timer: ++ * Purpose: ++ * 1) check for link changes. Usually they are handled by the MII interrupt ++ * but it doesn't hurt to check twice. ++ * 2) check for sudden death of the NIC: ++ * It seems that a reference set for this chip went out with incorrect info, ++ * and there exist boards that aren't quite right. An unexpected voltage ++ * drop can cause the PHY to get itself in a weird state (basically reset). ++ * NOTE: this only seems to affect revC chips. ++ * 3) check of death of the RX path due to OOM ++ */ ++/*** RTnet ***/ ++/*** RTnet ***/ ++ ++static void dump_ring(struct rtnet_device *dev) ++{ ++ struct netdev_private *np = dev->priv; ++ ++ if (netif_msg_pktdata(np)) { ++ int i; ++ rtdm_printk(KERN_DEBUG " Tx ring at %p:\n", np->tx_ring); ++ for (i = 0; i < TX_RING_SIZE; i++) { ++ rtdm_printk(KERN_DEBUG " #%d desc. %#08x %#08x %#08x.\n", ++ i, np->tx_ring[i].next_desc, ++ np->tx_ring[i].cmd_status, ++ np->tx_ring[i].addr); ++ } ++ rtdm_printk(KERN_DEBUG " Rx ring %p:\n", np->rx_ring); ++ for (i = 0; i < RX_RING_SIZE; i++) { ++ rtdm_printk(KERN_DEBUG " #%d desc. %#08x %#08x %#08x.\n", ++ i, np->rx_ring[i].next_desc, ++ np->rx_ring[i].cmd_status, ++ np->rx_ring[i].addr); ++ } ++ } ++} ++ ++/*** RTnet ***/ ++/*** RTnet ***/ ++ ++static int alloc_ring(struct rtnet_device *dev) ++{ ++ struct netdev_private *np = dev->priv; ++ np->rx_ring = pci_alloc_consistent(np->pci_dev, ++ sizeof(struct netdev_desc) * (RX_RING_SIZE+TX_RING_SIZE), ++ &np->ring_dma); ++ if (!np->rx_ring) ++ return -ENOMEM; ++ np->tx_ring = &np->rx_ring[RX_RING_SIZE]; ++ return 0; ++} ++ ++static void refill_rx(struct rtnet_device *dev) ++{ ++ struct netdev_private *np = dev->priv; ++ ++ /* Refill the Rx ring buffers. */ ++ for (; np->cur_rx - np->dirty_rx > 0; np->dirty_rx++) { ++ struct rtskb *skb; ++ int entry = np->dirty_rx % RX_RING_SIZE; ++ if (np->rx_skbuff[entry] == NULL) { ++ skb = rtnetdev_alloc_rtskb(dev, np->rx_buf_sz); ++ np->rx_skbuff[entry] = skb; ++ if (skb == NULL) ++ break; /* Better luck next round. */ ++ np->rx_dma[entry] = pci_map_single(np->pci_dev, ++ skb->data, np->rx_buf_sz, PCI_DMA_FROMDEVICE); ++ np->rx_ring[entry].addr = cpu_to_le32(np->rx_dma[entry]); ++ } ++ np->rx_ring[entry].cmd_status = cpu_to_le32(np->rx_buf_sz); ++ } ++ if (np->cur_rx - np->dirty_rx == RX_RING_SIZE) { ++ if (netif_msg_rx_err(np)) ++ rtdm_printk(KERN_WARNING "%s: going OOM.\n", dev->name); ++ np->oom = 1; ++ } ++} ++ ++/* Initialize the Rx and Tx rings, along with various 'dev' bits. */ ++static void init_ring(struct rtnet_device *dev) ++{ ++ struct netdev_private *np = dev->priv; ++ int i; ++ ++ /* 1) TX ring */ ++ np->dirty_tx = np->cur_tx = 0; ++ for (i = 0; i < TX_RING_SIZE; i++) { ++ np->tx_skbuff[i] = NULL; ++ np->tx_ring[i].next_desc = cpu_to_le32(np->ring_dma ++ +sizeof(struct netdev_desc) ++ *((i+1)%TX_RING_SIZE+RX_RING_SIZE)); ++ np->tx_ring[i].cmd_status = 0; ++ } ++ ++ /* 2) RX ring */ ++ np->dirty_rx = 0; ++ np->cur_rx = RX_RING_SIZE; ++ np->rx_buf_sz = (dev->mtu <= 1500 ? PKT_BUF_SZ : dev->mtu + 32); ++ np->oom = 0; ++ np->rx_head_desc = &np->rx_ring[0]; ++ ++ /* Please be carefull before changing this loop - at least gcc-2.95.1 ++ * miscompiles it otherwise. ++ */ ++ /* Initialize all Rx descriptors. */ ++ for (i = 0; i < RX_RING_SIZE; i++) { ++ np->rx_ring[i].next_desc = cpu_to_le32(np->ring_dma ++ +sizeof(struct netdev_desc) ++ *((i+1)%RX_RING_SIZE)); ++ np->rx_ring[i].cmd_status = cpu_to_le32(DescOwn); ++ np->rx_skbuff[i] = NULL; ++ } ++ refill_rx(dev); ++ dump_ring(dev); ++} ++ ++static void drain_tx(struct rtnet_device *dev) ++{ ++ struct netdev_private *np = dev->priv; ++ int i; ++ ++ for (i = 0; i < TX_RING_SIZE; i++) { ++ if (np->tx_skbuff[i]) { ++ pci_unmap_single(np->pci_dev, ++ np->rx_dma[i], np->tx_skbuff[i]->len, ++ PCI_DMA_TODEVICE); ++ dev_kfree_rtskb(np->tx_skbuff[i]); ++ np->stats.tx_dropped++; ++ } ++ np->tx_skbuff[i] = NULL; ++ } ++} ++ ++static void drain_ring(struct rtnet_device *dev) ++{ ++ struct netdev_private *np = dev->priv; ++ int i; ++ ++ /* Free all the skbuffs in the Rx queue. */ ++ for (i = 0; i < RX_RING_SIZE; i++) { ++ np->rx_ring[i].cmd_status = 0; ++ np->rx_ring[i].addr = 0xBADF00D0; /* An invalid address. */ ++ if (np->rx_skbuff[i]) { ++ pci_unmap_single(np->pci_dev, ++ np->rx_dma[i], np->rx_skbuff[i]->len, ++ PCI_DMA_FROMDEVICE); ++ dev_kfree_rtskb(np->rx_skbuff[i]); ++ } ++ np->rx_skbuff[i] = NULL; ++ } ++ drain_tx(dev); ++} ++ ++static void free_ring(struct rtnet_device *dev) ++{ ++ struct netdev_private *np = dev->priv; ++ pci_free_consistent(np->pci_dev, ++ sizeof(struct netdev_desc) * (RX_RING_SIZE+TX_RING_SIZE), ++ np->rx_ring, np->ring_dma); ++} ++ ++static int start_tx(struct rtskb *skb, struct rtnet_device *dev) /*** RTnet ***/ ++{ ++ struct netdev_private *np = dev->priv; ++ unsigned entry; ++/*** RTnet ***/ ++ rtdm_lockctx_t context; ++/*** RTnet ***/ ++ ++ /* Note: Ordering is important here, set the field with the ++ "ownership" bit last, and only then increment cur_tx. */ ++ ++ /* Calculate the next Tx descriptor entry. */ ++ entry = np->cur_tx % TX_RING_SIZE; ++ ++ np->tx_skbuff[entry] = skb; ++ np->tx_dma[entry] = pci_map_single(np->pci_dev, ++ skb->data,skb->len, PCI_DMA_TODEVICE); ++ ++ np->tx_ring[entry].addr = cpu_to_le32(np->tx_dma[entry]); ++ ++/* spin_lock_irq(&np->lock);*/ ++/*** RTnet ***/ ++ rtdm_lock_get_irqsave(&np->lock, context); ++/*** RTnet ***/ ++ ++ if (!np->hands_off) { ++ /* get and patch time stamp just before the transmission */ ++ if (skb->xmit_stamp) ++ *skb->xmit_stamp = cpu_to_be64(rtdm_clock_read() + ++ *skb->xmit_stamp); ++ np->tx_ring[entry].cmd_status = cpu_to_le32(DescOwn | skb->len); ++ /* StrongARM: Explicitly cache flush np->tx_ring and ++ * skb->data,skb->len. */ ++ wmb(); ++ np->cur_tx++; ++ if (np->cur_tx - np->dirty_tx >= TX_QUEUE_LEN - 1) { ++ netdev_tx_done(dev); ++ if (np->cur_tx - np->dirty_tx >= TX_QUEUE_LEN - 1) ++ rtnetif_stop_queue(dev); ++ } ++ /* Wake the potentially-idle transmit channel. */ ++ writel(TxOn, (void *)(dev->base_addr + ChipCmd)); ++ } else { ++ dev_kfree_rtskb(skb); /*** RTnet ***/ ++ np->stats.tx_dropped++; ++ } ++ ++/* spin_unlock_irq(&np->lock);*/ ++/*** RTnet ***/ ++ rtdm_lock_put_irqrestore(&np->lock, context); ++/*** RTnet ***/ ++ ++/* dev->trans_start = jiffies;*/ ++ ++ if (netif_msg_tx_queued(np)) { ++ rtdm_printk(KERN_DEBUG "%s: Transmit frame #%d queued in slot %d.\n", ++ dev->name, np->cur_tx, entry); ++ } ++ return 0; ++} ++ ++static void netdev_tx_done(struct rtnet_device *dev) ++{ ++ struct netdev_private *np = dev->priv; ++ ++ for (; np->cur_tx - np->dirty_tx > 0; np->dirty_tx++) { ++ int entry = np->dirty_tx % TX_RING_SIZE; ++ if (np->tx_ring[entry].cmd_status & cpu_to_le32(DescOwn)) ++ break; ++ if (netif_msg_tx_done(np)) ++ rtdm_printk(KERN_DEBUG ++ "%s: tx frame #%d finished, status %#08x.\n", ++ dev->name, np->dirty_tx, ++ le32_to_cpu(np->tx_ring[entry].cmd_status)); ++ if (np->tx_ring[entry].cmd_status & cpu_to_le32(DescPktOK)) { ++ np->stats.tx_packets++; ++ np->stats.tx_bytes += np->tx_skbuff[entry]->len; ++ } else { /* Various Tx errors */ ++ int tx_status = ++ le32_to_cpu(np->tx_ring[entry].cmd_status); ++ if (tx_status & (DescTxAbort|DescTxExcColl)) ++ np->stats.tx_aborted_errors++; ++ if (tx_status & DescTxFIFO) ++ np->stats.tx_fifo_errors++; ++ if (tx_status & DescTxCarrier) ++ np->stats.tx_carrier_errors++; ++ if (tx_status & DescTxOOWCol) ++ np->stats.tx_window_errors++; ++ np->stats.tx_errors++; ++ } ++ pci_unmap_single(np->pci_dev,np->tx_dma[entry], ++ np->tx_skbuff[entry]->len, ++ PCI_DMA_TODEVICE); ++ /* Free the original skb. */ ++ dev_kfree_rtskb(np->tx_skbuff[entry]); /*** RTnet ***/ ++/* dev_kfree_skb_irq(np->tx_skbuff[entry]);*/ ++ np->tx_skbuff[entry] = NULL; ++ } ++ if (rtnetif_queue_stopped(dev) ++ && np->cur_tx - np->dirty_tx < TX_QUEUE_LEN - 4) { ++ /* The ring is no longer full, wake queue. */ ++ rtnetif_wake_queue(dev); ++ } ++} ++ ++/* The interrupt handler does all of the Rx thread work and cleans up ++ after the Tx thread. */ ++static int intr_handler(rtdm_irq_t *irq_handle) ++{ ++ nanosecs_abs_t time_stamp = rtdm_clock_read(); /*** RTnet ***/ ++ struct rtnet_device *dev = ++ rtdm_irq_get_arg(irq_handle, struct rtnet_device); /*** RTnet ***/ ++ struct netdev_private *np = dev->priv; ++ unsigned int old_packet_cnt = np->stats.rx_packets; /*** RTnet ***/ ++ long ioaddr = dev->base_addr; ++ int boguscnt = max_interrupt_work; ++ int ret = RTDM_IRQ_NONE; ++ ++ if (np->hands_off) ++ return ret; ++ do { ++ /* Reading automatically acknowledges all int sources. */ ++ u32 intr_status = readl((void *)(ioaddr + IntrStatus)); ++ ++ if (netif_msg_intr(np)) ++ rtdm_printk(KERN_DEBUG ++ "%s: Interrupt, status %#08x, mask %#08x.\n", ++ dev->name, intr_status, ++ readl((void *)(ioaddr + IntrMask))); ++ ++ if (intr_status == 0) ++ break; ++ ++ ret = RTDM_IRQ_HANDLED; ++ ++ if (intr_status & ++ (IntrRxDone | IntrRxIntr | RxStatusFIFOOver | ++ IntrRxErr | IntrRxOverrun)) { ++ netdev_rx(dev, &time_stamp); ++ } ++ ++ if (intr_status & ++ (IntrTxDone | IntrTxIntr | IntrTxIdle | IntrTxErr)) { ++ rtdm_lock_get(&np->lock); ++ netdev_tx_done(dev); ++ rtdm_lock_put(&np->lock); ++ } ++ ++ /* Abnormal error summary/uncommon events handlers. */ ++ if (intr_status & IntrAbnormalSummary) ++ netdev_error(dev, intr_status); ++ ++ if (--boguscnt < 0) { ++ if (netif_msg_intr(np)) ++ rtdm_printk(KERN_WARNING ++ "%s: Too much work at interrupt, " ++ "status=%#08x.\n", ++ dev->name, intr_status); ++ break; ++ } ++ } while (1); ++ ++ if (netif_msg_intr(np)) ++ rtdm_printk(KERN_DEBUG "%s: exiting interrupt.\n", dev->name); ++ ++/*** RTnet ***/ ++ if (old_packet_cnt != np->stats.rx_packets) ++ rt_mark_stack_mgr(dev); ++ return ret; ++} ++ ++/* This routine is logically part of the interrupt handler, but separated ++ for clarity and better register allocation. */ ++static void netdev_rx(struct rtnet_device *dev, nanosecs_abs_t *time_stamp) ++{ ++ struct netdev_private *np = dev->priv; ++ int entry = np->cur_rx % RX_RING_SIZE; ++ int boguscnt = np->dirty_rx + RX_RING_SIZE - np->cur_rx; ++ s32 desc_status = le32_to_cpu(np->rx_head_desc->cmd_status); ++ ++ /* If the driver owns the next entry it's a new packet. Send it up. */ ++ while (desc_status < 0) { /* e.g. & DescOwn */ ++ if (netif_msg_rx_status(np)) ++ rtdm_printk(KERN_DEBUG ++ " netdev_rx() entry %d status was %#08x.\n", ++ entry, desc_status); ++ if (--boguscnt < 0) ++ break; ++ if ((desc_status&(DescMore|DescPktOK|DescRxLong)) != DescPktOK){ ++ if (desc_status & DescMore) { ++ if (netif_msg_rx_err(np)) ++ rtdm_printk(KERN_WARNING ++ "%s: Oversized(?) Ethernet " ++ "frame spanned multiple " ++ "buffers, entry %#08x " ++ "status %#08x.\n", dev->name, ++ np->cur_rx, desc_status); ++ np->stats.rx_length_errors++; ++ } else { ++ /* There was an error. */ ++ np->stats.rx_errors++; ++ if (desc_status & (DescRxAbort|DescRxOver)) ++ np->stats.rx_over_errors++; ++ if (desc_status & (DescRxLong|DescRxRunt)) ++ np->stats.rx_length_errors++; ++ if (desc_status & (DescRxInvalid|DescRxAlign)) ++ np->stats.rx_frame_errors++; ++ if (desc_status & DescRxCRC) ++ np->stats.rx_crc_errors++; ++ } ++ } else { ++ struct rtskb *skb; ++ /* Omit CRC size. */ ++ int pkt_len = (desc_status & DescSizeMask) - 4; ++ /* Check if the packet is long enough to accept ++ * without copying to a minimally-sized skbuff. */ ++/*** RTnet ***/ ++ { ++ skb = np->rx_skbuff[entry]; ++ pci_unmap_single(np->pci_dev, np->rx_dma[entry], ++ np->rx_skbuff[entry]->len, ++ PCI_DMA_FROMDEVICE); ++ rtskb_put(skb, pkt_len); ++ np->rx_skbuff[entry] = NULL; ++ } ++/*** RTnet ***/ ++ skb->protocol = rt_eth_type_trans(skb, dev); ++ skb->time_stamp = *time_stamp; ++ rtnetif_rx(skb); ++ /*dev->last_rx = jiffies;*/ ++/*** RTnet ***/ ++ np->stats.rx_packets++; ++ np->stats.rx_bytes += pkt_len; ++ } ++ entry = (++np->cur_rx) % RX_RING_SIZE; ++ np->rx_head_desc = &np->rx_ring[entry]; ++ desc_status = le32_to_cpu(np->rx_head_desc->cmd_status); ++ } ++ refill_rx(dev); ++ ++ /* Restart Rx engine if stopped. */ ++ if (np->oom) ++ ; ++/* mod_timer(&np->timer, jiffies + 1);*/ ++ else ++ writel(RxOn, (void *)(dev->base_addr + ChipCmd)); ++} ++ ++static void netdev_error(struct rtnet_device *dev, int intr_status) ++{ ++ struct netdev_private *np = dev->priv; ++ long ioaddr = dev->base_addr; ++ ++ rtdm_lock_get(&np->lock); ++ if (intr_status & LinkChange) { ++ u16 adv = mdio_read(dev, 1, MII_ADVERTISE); ++ u16 lpa = mdio_read(dev, 1, MII_LPA); ++ if (mdio_read(dev, 1, MII_BMCR) & BMCR_ANENABLE ++ && netif_msg_link(np)) { ++ rtdm_printk(KERN_INFO ++ "%s: Autonegotiation advertising" ++ " %#04x partner %#04x.\n", dev->name, ++ adv, lpa); ++ } ++ ++ /* read MII int status to clear the flag */ ++ readw((void *)(ioaddr + MIntrStatus)); ++ check_link(dev); ++ } ++ if (intr_status & StatsMax) { ++ __get_stats(dev); ++ } ++ if (intr_status & IntrTxUnderrun) { ++ if ((np->tx_config & TxDrthMask) < 62) ++ np->tx_config += 2; ++ if (netif_msg_tx_err(np)) ++ rtdm_printk(KERN_NOTICE ++ "%s: increased Tx threshold, txcfg %#08x.\n", ++ dev->name, np->tx_config); ++ writel(np->tx_config, (void *)(ioaddr + TxConfig)); ++ } ++ if (intr_status & WOLPkt && netif_msg_wol(np)) { ++ int wol_status = readl((void *)(ioaddr + WOLCmd)); ++ rtdm_printk(KERN_NOTICE "%s: Link wake-up event %#08x\n", ++ dev->name, wol_status); ++ } ++ if (intr_status & RxStatusFIFOOver) { ++ if (netif_msg_rx_err(np) && netif_msg_intr(np)) { ++ rtdm_printk(KERN_NOTICE "%s: Rx status FIFO overrun\n", ++ dev->name); ++ } ++ np->stats.rx_fifo_errors++; ++ } ++ /* Hmmmmm, it's not clear how to recover from PCI faults. */ ++ if (intr_status & IntrPCIErr) { ++ rtdm_printk(KERN_NOTICE "%s: PCI error %#08x\n", dev->name, ++ intr_status & IntrPCIErr); ++ np->stats.tx_fifo_errors++; ++ np->stats.rx_fifo_errors++; ++ } ++ rtdm_lock_put(&np->lock); ++} ++ ++static void __get_stats(struct rtnet_device *dev) ++{ ++ long ioaddr = dev->base_addr; ++ struct netdev_private *np = dev->priv; ++ ++ /* The chip only need report frame silently dropped. */ ++ np->stats.rx_crc_errors += readl((void *)(ioaddr + RxCRCErrs)); ++ np->stats.rx_missed_errors += readl((void *)(ioaddr + RxMissed)); ++} ++ ++static struct net_device_stats *get_stats(struct rtnet_device *rtdev) ++{ ++ struct netdev_private *np = rtdev->priv; ++ rtdm_lockctx_t context; ++ ++ /* The chip only need report frame silently dropped. */ ++ rtdm_lock_get_irqsave(&np->lock, context); ++ if (rtnetif_running(rtdev) && !np->hands_off) ++ __get_stats(rtdev); ++ rtdm_lock_put_irqrestore(&np->lock, context); ++ ++ return &np->stats; ++} ++ ++#define HASH_TABLE 0x200 ++static void __set_rx_mode(struct rtnet_device *dev) ++{ ++ long ioaddr = dev->base_addr; ++ struct netdev_private *np = dev->priv; ++ u8 mc_filter[64]; /* Multicast hash filter */ ++ u32 rx_mode; ++ ++ if (dev->flags & IFF_PROMISC) { /* Set promiscuous. */ ++ /* Unconditionally log net taps. */ ++ rtdm_printk(KERN_NOTICE "%s: Promiscuous mode enabled.\n", ++ dev->name); ++ rx_mode = RxFilterEnable | AcceptBroadcast ++ | AcceptAllMulticast | AcceptAllPhys | AcceptMyPhys; ++ } else if (dev->flags & IFF_ALLMULTI) { ++ rx_mode = RxFilterEnable | AcceptBroadcast ++ | AcceptAllMulticast | AcceptMyPhys; ++ } else { ++ int i; ++ ++ memset(mc_filter, 0, sizeof(mc_filter)); ++ rx_mode = RxFilterEnable | AcceptBroadcast ++ | AcceptMulticast | AcceptMyPhys; ++ for (i = 0; i < 64; i += 2) { ++ writew(HASH_TABLE + i, (void *)(ioaddr + RxFilterAddr)); ++ writew((mc_filter[i+1]<<8) + mc_filter[i], ++ (void *)(ioaddr + RxFilterData)); ++ } ++ } ++ writel(rx_mode, (void *)(ioaddr + RxFilterAddr)); ++ np->cur_rx_mode = rx_mode; ++} ++/*** RTnet ++static void set_rx_mode(struct rtnet_device *dev) ++{ ++ struct netdev_private *np = dev->priv; ++ spin_lock_irq(&np->lock); ++ if (!np->hands_off) ++ __set_rx_mode(dev); ++ spin_unlock_irq(&np->lock); ++} ++RTnet ***/ ++/*** RTnet ***/ ++/*** RTnet ***/ ++ ++static void enable_wol_mode(struct rtnet_device *dev, int enable_intr) ++{ ++ long ioaddr = dev->base_addr; ++ struct netdev_private *np = dev->priv; ++ ++ if (netif_msg_wol(np)) ++ rtdm_printk(KERN_INFO "%s: remaining active for wake-on-lan\n", ++ dev->name); ++ ++ /* For WOL we must restart the rx process in silent mode. ++ * Write NULL to the RxRingPtr. Only possible if ++ * rx process is stopped ++ */ ++ writel(0, (void *)(ioaddr + RxRingPtr)); ++ ++ /* read WoL status to clear */ ++ readl((void *)(ioaddr + WOLCmd)); ++ ++ /* PME on, clear status */ ++ writel(np->SavedClkRun | PMEEnable | PMEStatus, (void *)(ioaddr + ClkRun)); ++ ++ /* and restart the rx process */ ++ writel(RxOn, (void *)(ioaddr + ChipCmd)); ++ ++ if (enable_intr) { ++ /* enable the WOL interrupt. ++ * Could be used to send a netlink message. ++ */ ++ writel(WOLPkt | LinkChange, (void *)(ioaddr + IntrMask)); ++ writel(1, (void *)(ioaddr + IntrEnable)); ++ } ++} ++ ++static int netdev_close(struct rtnet_device *dev) ++{ ++ int i; ++ long ioaddr = dev->base_addr; ++ struct netdev_private *np = dev->priv; ++ ++ if (netif_msg_ifdown(np)) ++ rtdm_printk(KERN_DEBUG ++ "%s: Shutting down ethercard, status was %#04x.\n", ++ dev->name, (int)readl((void *)(ioaddr + ChipCmd))); ++ if (netif_msg_pktdata(np)) ++ rtdm_printk(KERN_DEBUG ++ "%s: Queue pointers were Tx %d / %d, Rx %d / %d.\n", ++ dev->name, np->cur_tx, np->dirty_tx, ++ np->cur_rx, np->dirty_rx); ++ ++ /* ++ * FIXME: what if someone tries to close a device ++ * that is suspended? ++ * Should we reenable the nic to switch to ++ * the final WOL settings? ++ */ ++/*** RTnet *** ++ del_timer_sync(&np->timer); ++ *** RTnet ***/ ++/* disable_irq(dev->irq);*/ ++ rtdm_irq_disable(&np->irq_handle); ++ rtdm_lock_get(&np->lock); ++ /* Disable interrupts, and flush posted writes */ ++ writel(0, (void *)(ioaddr + IntrEnable)); ++ readl((void *)(ioaddr + IntrEnable)); ++ np->hands_off = 1; ++ rtdm_lock_put(&np->lock); ++ ++/*** RTnet ***/ ++ if ( (i=rtdm_irq_free(&np->irq_handle))<0 ) ++ return i; ++ ++ rt_stack_disconnect(dev); ++/*** RTnet ***/ ++ ++/* enable_irq(dev->irq);*/ ++ ++/* free_irq(dev->irq, dev);*/ ++ ++ /* Interrupt disabled, interrupt handler released, ++ * queue stopped, timer deleted, rtnl_lock held ++ * All async codepaths that access the driver are disabled. ++ */ ++ rtdm_lock_get(&np->lock); ++ np->hands_off = 0; ++ readl((void *)(ioaddr + IntrMask)); ++ readw((void *)(ioaddr + MIntrStatus)); ++ ++ /* Freeze Stats */ ++ writel(StatsFreeze, (void *)(ioaddr + StatsCtrl)); ++ ++ /* Stop the chip's Tx and Rx processes. */ ++ natsemi_stop_rxtx(dev); ++ ++ __get_stats(dev); ++ rtdm_lock_put(&np->lock); ++ ++ /* clear the carrier last - an interrupt could reenable it otherwise */ ++ rtnetif_carrier_off(dev); ++ rtnetif_stop_queue(dev); ++ ++ dump_ring(dev); ++ drain_ring(dev); ++ free_ring(dev); ++ ++ { ++ u32 wol = readl((void *)(ioaddr + WOLCmd)) & WakeOptsSummary; ++ if (wol) { ++ /* restart the NIC in WOL mode. ++ * The nic must be stopped for this. ++ */ ++ enable_wol_mode(dev, 0); ++ } else { ++ /* Restore PME enable bit unmolested */ ++ writel(np->SavedClkRun, (void *)(ioaddr + ClkRun)); ++ } ++ } ++ ++ return 0; ++} ++ ++ ++static void natsemi_remove1 (struct pci_dev *pdev) ++{ ++ ++ /*** RTnet ***/ ++ struct rtnet_device *dev = pci_get_drvdata(pdev); ++ ++ rt_unregister_rtnetdev(dev); ++ rt_rtdev_disconnect(dev); ++/*** RTnet ***/ ++ ++ pci_release_regions (pdev); ++ iounmap ((char *) dev->base_addr); ++ rtdev_free(dev); /*** RTnet ***/ ++ pci_set_drvdata(pdev, NULL); ++} ++ ++#ifdef CONFIG_PM ++ ++/* ++ * The ns83815 chip doesn't have explicit RxStop bits. ++ * Kicking the Rx or Tx process for a new packet reenables the Rx process ++ * of the nic, thus this function must be very careful: ++ * ++ * suspend/resume synchronization: ++ * entry points: ++ * netdev_open, netdev_close, netdev_ioctl, set_rx_mode, intr_handler, ++ * start_tx, tx_timeout ++ * ++ * No function accesses the hardware without checking np->hands_off. ++ * the check occurs under spin_lock_irq(&np->lock); ++ * exceptions: ++ * * netdev_ioctl: noncritical access. ++ * * netdev_open: cannot happen due to the device_detach ++ * * netdev_close: doesn't hurt. ++ * * netdev_timer: timer stopped by natsemi_suspend. ++ * * intr_handler: doesn't acquire the spinlock. suspend calls ++ * disable_irq() to enforce synchronization. ++ * ++ * Interrupts must be disabled, otherwise hands_off can cause irq storms. ++ */ ++ ++#endif /* CONFIG_PM */ ++ ++static struct pci_driver natsemi_driver = { ++ .name = DRV_NAME, ++ .id_table = natsemi_pci_tbl, ++ .probe = natsemi_probe1, ++ .remove = natsemi_remove1, ++/*#ifdef CONFIG_PM*/ ++}; ++ ++static int __init natsemi_init_mod (void) ++{ ++/* when a module, this is printed whether or not devices are found in probe */ ++#ifdef MODULE ++ rtdm_printk(version); ++#endif ++ ++ return pci_register_driver (&natsemi_driver); ++} ++ ++static void __exit natsemi_exit_mod (void) ++{ ++ pci_unregister_driver (&natsemi_driver); ++} ++ ++module_init(natsemi_init_mod); ++module_exit(natsemi_exit_mod); +--- linux/drivers/xenomai/net/drivers/macb.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/macb.c 2021-04-07 16:01:27.354633985 +0800 +@@ -0,0 +1,1821 @@ ++/* ++ * Cadence MACB/GEM Ethernet Controller driver ++ * ++ * Copyright (C) 2004-2006 Atmel Corporation ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ * ++ * RTnet porting by Cristiano Mantovani & Stefano Banzi (Marposs SpA). ++ * Copyright (C) 2014 Gilles Chanteperdrix ++ */ ++ ++#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++ ++#include "rt_macb.h" ++ ++#define MACB_RX_BUFFER_SIZE 128 ++#define RX_BUFFER_MULTIPLE 64 /* bytes */ ++#define RX_RING_SIZE 512 /* must be power of 2 */ ++#define RX_RING_BYTES (sizeof(struct macb_dma_desc) * RX_RING_SIZE) ++ ++#define TX_RING_SIZE 128 /* must be power of 2 */ ++#define TX_RING_BYTES (sizeof(struct macb_dma_desc) * TX_RING_SIZE) ++ ++/* level of occupied TX descriptors under which we wake up TX process */ ++#define MACB_TX_WAKEUP_THRESH (3 * TX_RING_SIZE / 4) ++ ++#define MACB_RX_INT_FLAGS (MACB_BIT(RCOMP) | MACB_BIT(RXUBR) \ ++ | MACB_BIT(ISR_ROVR)) ++#define MACB_TX_ERR_FLAGS (MACB_BIT(ISR_TUND) \ ++ | MACB_BIT(ISR_RLE) \ ++ | MACB_BIT(TXERR)) ++#define MACB_TX_INT_FLAGS (MACB_TX_ERR_FLAGS | MACB_BIT(TCOMP)) ++ ++/* ++ * Graceful stop timeouts in us. We should allow up to ++ * 1 frame time (10 Mbits/s, full-duplex, ignoring collisions) ++ */ ++#define MACB_HALT_TIMEOUT 1230 ++ ++/* Ring buffer accessors */ ++static unsigned int macb_tx_ring_wrap(unsigned int index) ++{ ++ return index & (TX_RING_SIZE - 1); ++} ++ ++static struct macb_dma_desc *macb_tx_desc(struct macb *bp, unsigned int index) ++{ ++ return &bp->tx_ring[macb_tx_ring_wrap(index)]; ++} ++ ++static struct macb_tx_skb *macb_tx_skb(struct macb *bp, unsigned int index) ++{ ++ return &bp->tx_skb[macb_tx_ring_wrap(index)]; ++} ++ ++static unsigned int macb_rx_ring_wrap(unsigned int index) ++{ ++ return index & (RX_RING_SIZE - 1); ++} ++ ++static struct macb_dma_desc *macb_rx_desc(struct macb *bp, unsigned int index) ++{ ++ return &bp->rx_ring[macb_rx_ring_wrap(index)]; ++} ++ ++static void *macb_rx_buffer(struct macb *bp, unsigned int index) ++{ ++ return bp->rx_buffers + bp->rx_buffer_size * macb_rx_ring_wrap(index); ++} ++ ++void rtmacb_set_hwaddr(struct macb *bp) ++{ ++ u32 bottom; ++ u16 top; ++ ++ bottom = cpu_to_le32(*((u32 *)bp->dev->dev_addr)); ++ macb_or_gem_writel(bp, SA1B, bottom); ++ top = cpu_to_le16(*((u16 *)(bp->dev->dev_addr + 4))); ++ macb_or_gem_writel(bp, SA1T, top); ++ ++ /* Clear unused address register sets */ ++ macb_or_gem_writel(bp, SA2B, 0); ++ macb_or_gem_writel(bp, SA2T, 0); ++ macb_or_gem_writel(bp, SA3B, 0); ++ macb_or_gem_writel(bp, SA3T, 0); ++ macb_or_gem_writel(bp, SA4B, 0); ++ macb_or_gem_writel(bp, SA4T, 0); ++} ++EXPORT_SYMBOL_GPL(rtmacb_set_hwaddr); ++ ++void rtmacb_get_hwaddr(struct macb *bp) ++{ ++ struct macb_platform_data *pdata; ++ u32 bottom; ++ u16 top; ++ u8 addr[6]; ++ int i; ++ ++ pdata = dev_get_platdata(&bp->pdev->dev); ++ ++ /* Check all 4 address register for vaild address */ ++ for (i = 0; i < 4; i++) { ++ bottom = macb_or_gem_readl(bp, SA1B + i * 8); ++ top = macb_or_gem_readl(bp, SA1T + i * 8); ++ ++ if (pdata && pdata->rev_eth_addr) { ++ addr[5] = bottom & 0xff; ++ addr[4] = (bottom >> 8) & 0xff; ++ addr[3] = (bottom >> 16) & 0xff; ++ addr[2] = (bottom >> 24) & 0xff; ++ addr[1] = top & 0xff; ++ addr[0] = (top & 0xff00) >> 8; ++ } else { ++ addr[0] = bottom & 0xff; ++ addr[1] = (bottom >> 8) & 0xff; ++ addr[2] = (bottom >> 16) & 0xff; ++ addr[3] = (bottom >> 24) & 0xff; ++ addr[4] = top & 0xff; ++ addr[5] = (top >> 8) & 0xff; ++ } ++ ++ if (is_valid_ether_addr(addr)) { ++ memcpy(bp->dev->dev_addr, addr, sizeof(addr)); ++ return; ++ } ++ } ++} ++EXPORT_SYMBOL_GPL(rtmacb_get_hwaddr); ++ ++static int macb_mdio_read(struct mii_bus *bus, int mii_id, int regnum) ++{ ++ struct macb *bp = bus->priv; ++ int value; ++ ++ macb_writel(bp, MAN, (MACB_BF(SOF, MACB_MAN_SOF) ++ | MACB_BF(RW, MACB_MAN_READ) ++ | MACB_BF(PHYA, mii_id) ++ | MACB_BF(REGA, regnum) ++ | MACB_BF(CODE, MACB_MAN_CODE))); ++ ++ /* wait for end of transfer */ ++ while (!MACB_BFEXT(IDLE, macb_readl(bp, NSR))) ++ cpu_relax(); ++ ++ value = MACB_BFEXT(DATA, macb_readl(bp, MAN)); ++ ++ return value; ++} ++ ++static int macb_mdio_write(struct mii_bus *bus, int mii_id, int regnum, ++ u16 value) ++{ ++ struct macb *bp = bus->priv; ++ ++ macb_writel(bp, MAN, (MACB_BF(SOF, MACB_MAN_SOF) ++ | MACB_BF(RW, MACB_MAN_WRITE) ++ | MACB_BF(PHYA, mii_id) ++ | MACB_BF(REGA, regnum) ++ | MACB_BF(CODE, MACB_MAN_CODE) ++ | MACB_BF(DATA, value))); ++ ++ /* wait for end of transfer */ ++ while (!MACB_BFEXT(IDLE, macb_readl(bp, NSR))) ++ cpu_relax(); ++ ++ return 0; ++} ++ ++/** ++ * macb_set_tx_clk() - Set a clock to a new frequency ++ * @clk Pointer to the clock to change ++ * @rate New frequency in Hz ++ * @dev Pointer to the struct rtnet_device ++ */ ++static void macb_set_tx_clk(struct clk *clk, int speed, struct rtnet_device *dev) ++{ ++ long ferr, rate, rate_rounded; ++ ++ switch (speed) { ++ case SPEED_10: ++ rate = 2500000; ++ break; ++ case SPEED_100: ++ rate = 25000000; ++ break; ++ case SPEED_1000: ++ rate = 125000000; ++ break; ++ default: ++ return; ++ } ++ ++ rate_rounded = clk_round_rate(clk, rate); ++ if (rate_rounded < 0) ++ return; ++ ++ /* RGMII allows 50 ppm frequency error. Test and warn if this limit ++ * is not satisfied. ++ */ ++ ferr = abs(rate_rounded - rate); ++ ferr = DIV_ROUND_UP(ferr, rate / 100000); ++ if (ferr > 5) ++ rtdev_warn(dev, "unable to generate target frequency: %ld Hz\n", ++ rate); ++ ++ if (clk_set_rate(clk, rate_rounded)) ++ rtdev_err(dev, "adjusting tx_clk failed.\n"); ++} ++ ++struct macb_dummy_netdev_priv { ++ struct rtnet_device *rtdev; ++}; ++ ++static void macb_handle_link_change(struct net_device *nrt_dev) ++{ ++ struct macb_dummy_netdev_priv *p = netdev_priv(nrt_dev); ++ struct rtnet_device *dev = p->rtdev; ++ struct macb *bp = rtnetdev_priv(dev); ++ struct phy_device *phydev = bp->phy_dev; ++ unsigned long flags; ++ ++ int status_change = 0; ++ ++ rtdm_lock_get_irqsave(&bp->lock, flags); ++ ++ if (phydev->link) { ++ if ((bp->speed != phydev->speed) || ++ (bp->duplex != phydev->duplex)) { ++ u32 reg; ++ ++ reg = macb_readl(bp, NCFGR); ++ reg &= ~(MACB_BIT(SPD) | MACB_BIT(FD)); ++ if (macb_is_gem(bp)) ++ reg &= ~GEM_BIT(GBE); ++ ++ if (phydev->duplex) ++ reg |= MACB_BIT(FD); ++ if (phydev->speed == SPEED_100) ++ reg |= MACB_BIT(SPD); ++ if (phydev->speed == SPEED_1000) ++ reg |= GEM_BIT(GBE); ++ ++ macb_or_gem_writel(bp, NCFGR, reg); ++ ++ bp->speed = phydev->speed; ++ bp->duplex = phydev->duplex; ++ status_change = 1; ++ } ++ } ++ ++ if (phydev->link != bp->link) { ++ if (!phydev->link) { ++ bp->speed = 0; ++ bp->duplex = -1; ++ } ++ bp->link = phydev->link; ++ ++ status_change = 1; ++ } ++ ++ rtdm_lock_put_irqrestore(&bp->lock, flags); ++ ++ if (!IS_ERR(bp->tx_clk)) ++ macb_set_tx_clk(bp->tx_clk, phydev->speed, dev); ++ ++ if (status_change) { ++ if (phydev->link) { ++ rtnetif_carrier_on(dev); ++ rtdev_info(dev, "link up (%d/%s)\n", ++ phydev->speed, ++ phydev->duplex == DUPLEX_FULL ? ++ "Full" : "Half"); ++ } else { ++ rtnetif_carrier_off(dev); ++ rtdev_info(dev, "link down\n"); ++ } ++ } ++} ++ ++/* based on au1000_eth. c*/ ++static int macb_mii_probe(struct rtnet_device *dev) ++{ ++ struct macb *bp = rtnetdev_priv(dev); ++ struct macb_dummy_netdev_priv *p; ++ struct macb_platform_data *pdata; ++ struct phy_device *phydev; ++ struct net_device *dummy; ++ int phy_irq; ++ int ret; ++ ++ phydev = phy_find_first(bp->mii_bus); ++ if (!phydev) { ++ rtdev_err(dev, "no PHY found\n"); ++ return -ENXIO; ++ } ++ ++ pdata = dev_get_platdata(&bp->pdev->dev); ++ if (pdata && gpio_is_valid(pdata->phy_irq_pin)) { ++ ret = devm_gpio_request(&bp->pdev->dev, pdata->phy_irq_pin, "phy int"); ++ if (!ret) { ++ phy_irq = gpio_to_irq(pdata->phy_irq_pin); ++ phydev->irq = (phy_irq < 0) ? PHY_POLL : phy_irq; ++ } ++ } ++ ++ dummy = alloc_etherdev(sizeof(*p)); ++ p = netdev_priv(dummy); ++ p->rtdev = dev; ++ bp->phy_phony_net_device = dummy; ++ ++ /* attach the mac to the phy */ ++ ret = phy_connect_direct(dummy, phydev, &macb_handle_link_change, ++ bp->phy_interface); ++ if (ret) { ++ rtdev_err(dev, "Could not attach to PHY\n"); ++ return ret; ++ } ++ ++ /* mask with MAC supported features */ ++ if (macb_is_gem(bp)) ++ phydev->supported &= PHY_GBIT_FEATURES; ++ else ++ phydev->supported &= PHY_BASIC_FEATURES; ++ ++ phydev->advertising = phydev->supported; ++ ++ bp->link = 0; ++ bp->speed = 0; ++ bp->duplex = -1; ++ bp->phy_dev = phydev; ++ ++ return 0; ++} ++ ++int rtmacb_mii_init(struct macb *bp) ++{ ++ struct macb_platform_data *pdata; ++ struct device_node *np; ++ int err = -ENXIO, i; ++ ++ /* Enable management port */ ++ macb_writel(bp, NCR, MACB_BIT(MPE)); ++ ++ bp->mii_bus = mdiobus_alloc(); ++ if (bp->mii_bus == NULL) { ++ err = -ENOMEM; ++ goto err_out; ++ } ++ ++ bp->mii_bus->name = "MACB_mii_bus"; ++ bp->mii_bus->read = &macb_mdio_read; ++ bp->mii_bus->write = &macb_mdio_write; ++ snprintf(bp->mii_bus->id, MII_BUS_ID_SIZE, "%s-%x", ++ bp->pdev->name, bp->pdev->id); ++ bp->mii_bus->priv = bp; ++ bp->mii_bus->parent = &bp->pdev->dev; ++ pdata = dev_get_platdata(&bp->pdev->dev); ++ ++ bp->mii_bus->irq = kmalloc(sizeof(int)*PHY_MAX_ADDR, GFP_KERNEL); ++ if (!bp->mii_bus->irq) { ++ err = -ENOMEM; ++ goto err_out_free_mdiobus; ++ } ++ ++ np = bp->pdev->dev.of_node; ++ if (np) { ++ /* try dt phy registration */ ++ err = of_mdiobus_register(bp->mii_bus, np); ++ ++ /* fallback to standard phy registration if no phy were ++ found during dt phy registration */ ++ if (!err && !phy_find_first(bp->mii_bus)) { ++ for (i = 0; i < PHY_MAX_ADDR; i++) { ++ struct phy_device *phydev; ++ ++ phydev = mdiobus_scan(bp->mii_bus, i); ++ if (IS_ERR(phydev)) { ++ err = PTR_ERR(phydev); ++ break; ++ } ++ } ++ ++ if (err) ++ goto err_out_unregister_bus; ++ } ++ } else { ++ for (i = 0; i < PHY_MAX_ADDR; i++) ++ bp->mii_bus->irq[i] = PHY_POLL; ++ ++ if (pdata) ++ bp->mii_bus->phy_mask = pdata->phy_mask; ++ ++ err = mdiobus_register(bp->mii_bus); ++ } ++ ++ if (err) ++ goto err_out_free_mdio_irq; ++ ++ err = macb_mii_probe(bp->dev); ++ if (err) ++ goto err_out_unregister_bus; ++ ++ return 0; ++ ++err_out_unregister_bus: ++ mdiobus_unregister(bp->mii_bus); ++err_out_free_mdio_irq: ++ kfree(bp->mii_bus->irq); ++err_out_free_mdiobus: ++ mdiobus_free(bp->mii_bus); ++err_out: ++ return err; ++} ++EXPORT_SYMBOL_GPL(rtmacb_mii_init); ++ ++static void macb_update_stats(struct macb *bp) ++{ ++ u32 __iomem *reg = bp->regs + MACB_PFR; ++ u32 *p = &bp->hw_stats.macb.rx_pause_frames; ++ u32 *end = &bp->hw_stats.macb.tx_pause_frames + 1; ++ ++ WARN_ON((unsigned long)(end - p - 1) != (MACB_TPF - MACB_PFR) / 4); ++ ++ for(; p < end; p++, reg++) ++ *p += __raw_readl(reg); ++} ++ ++static int macb_halt_tx(struct macb *bp) ++{ ++ unsigned long halt_time, timeout; ++ u32 status; ++ ++ macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(THALT)); ++ ++ timeout = jiffies + usecs_to_jiffies(MACB_HALT_TIMEOUT); ++ do { ++ halt_time = jiffies; ++ status = macb_readl(bp, TSR); ++ if (!(status & MACB_BIT(TGO))) ++ return 0; ++ ++ usleep_range(10, 250); ++ } while (time_before(halt_time, timeout)); ++ ++ return -ETIMEDOUT; ++} ++ ++static void macb_tx_error_task(struct work_struct *work) ++{ ++ struct macb *bp = container_of(work, struct macb, tx_error_task); ++ struct macb_tx_skb *tx_skb; ++ struct rtskb *skb; ++ unsigned int tail; ++ ++ rtdev_vdbg(bp->dev, "macb_tx_error_task: t = %u, h = %u\n", ++ bp->tx_tail, bp->tx_head); ++ ++ /* Make sure nobody is trying to queue up new packets */ ++ rtnetif_stop_queue(bp->dev); ++ ++ /* ++ * Stop transmission now ++ * (in case we have just queued new packets) ++ */ ++ if (macb_halt_tx(bp)) ++ /* Just complain for now, reinitializing TX path can be good */ ++ rtdev_err(bp->dev, "BUG: halt tx timed out\n"); ++ ++ /* No need for the lock here as nobody will interrupt us anymore */ ++ ++ /* ++ * Treat frames in TX queue including the ones that caused the error. ++ * Free transmit buffers in upper layer. ++ */ ++ for (tail = bp->tx_tail; tail != bp->tx_head; tail++) { ++ struct macb_dma_desc *desc; ++ u32 ctrl; ++ ++ desc = macb_tx_desc(bp, tail); ++ ctrl = desc->ctrl; ++ tx_skb = macb_tx_skb(bp, tail); ++ skb = tx_skb->skb; ++ ++ if (ctrl & MACB_BIT(TX_USED)) { ++ rtdev_vdbg(bp->dev, "txerr skb %u (data %p) TX complete\n", ++ macb_tx_ring_wrap(tail), skb->data); ++ bp->stats.tx_packets++; ++ bp->stats.tx_bytes += skb->len; ++ } else { ++ /* ++ * "Buffers exhausted mid-frame" errors may only happen ++ * if the driver is buggy, so complain loudly about those. ++ * Statistics are updated by hardware. ++ */ ++ if (ctrl & MACB_BIT(TX_BUF_EXHAUSTED)) ++ rtdev_err(bp->dev, ++ "BUG: TX buffers exhausted mid-frame\n"); ++ ++ desc->ctrl = ctrl | MACB_BIT(TX_USED); ++ } ++ ++ dma_unmap_single(&bp->pdev->dev, tx_skb->mapping, skb->len, ++ DMA_TO_DEVICE); ++ tx_skb->skb = NULL; ++ dev_kfree_rtskb(skb); ++ } ++ ++ /* Make descriptor updates visible to hardware */ ++ wmb(); ++ ++ /* Reinitialize the TX desc queue */ ++ macb_writel(bp, TBQP, bp->tx_ring_dma); ++ /* Make TX ring reflect state of hardware */ ++ bp->tx_head = bp->tx_tail = 0; ++ ++ /* Now we are ready to start transmission again */ ++ rtnetif_wake_queue(bp->dev); ++ ++ /* Housework before enabling TX IRQ */ ++ macb_writel(bp, TSR, macb_readl(bp, TSR)); ++ macb_writel(bp, IER, MACB_TX_INT_FLAGS); ++} ++ ++static void macb_tx_interrupt(struct macb *bp) ++{ ++ unsigned int tail; ++ unsigned int head; ++ u32 status; ++ ++ status = macb_readl(bp, TSR); ++ macb_writel(bp, TSR, status); ++ ++ if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE) ++ macb_writel(bp, ISR, MACB_BIT(TCOMP)); ++ ++ rtdev_vdbg(bp->dev, "macb_tx_interrupt status = 0x%03lx\n", ++ (unsigned long)status); ++ ++ head = bp->tx_head; ++ for (tail = bp->tx_tail; tail != head; tail++) { ++ struct macb_tx_skb *tx_skb; ++ struct rtskb *skb; ++ struct macb_dma_desc *desc; ++ u32 ctrl; ++ ++ desc = macb_tx_desc(bp, tail); ++ ++ /* Make hw descriptor updates visible to CPU */ ++ rmb(); ++ ++ ctrl = desc->ctrl; ++ ++ if (!(ctrl & MACB_BIT(TX_USED))) ++ break; ++ ++ tx_skb = macb_tx_skb(bp, tail); ++ skb = tx_skb->skb; ++ ++ rtdev_vdbg(bp->dev, "skb %u (data %p) TX complete\n", ++ macb_tx_ring_wrap(tail), skb->data); ++ dma_unmap_single(&bp->pdev->dev, tx_skb->mapping, skb->len, ++ DMA_TO_DEVICE); ++ bp->stats.tx_packets++; ++ bp->stats.tx_bytes += skb->len; ++ tx_skb->skb = NULL; ++ dev_kfree_rtskb(skb); ++ } ++ ++ bp->tx_tail = tail; ++ if (rtnetif_queue_stopped(bp->dev) ++ && CIRC_CNT(bp->tx_head, bp->tx_tail, ++ TX_RING_SIZE) <= MACB_TX_WAKEUP_THRESH) ++ rtnetif_wake_queue(bp->dev); ++} ++ ++static void gem_rx_refill(struct macb *bp) ++{ ++ unsigned int entry; ++ struct rtskb *skb; ++ dma_addr_t paddr; ++ ++ while (CIRC_SPACE(bp->rx_prepared_head, bp->rx_tail, RX_RING_SIZE) > 0) { ++ entry = macb_rx_ring_wrap(bp->rx_prepared_head); ++ ++ /* Make hw descriptor updates visible to CPU */ ++ rmb(); ++ ++ bp->rx_prepared_head++; ++ ++ if (bp->rx_skbuff[entry] == NULL) { ++ /* allocate rtskb for this free entry in ring */ ++ skb = rtnetdev_alloc_rtskb(bp->dev, bp->rx_buffer_size); ++ if (unlikely(skb == NULL)) { ++ rtdev_err(bp->dev, ++ "Unable to allocate sk_buff\n"); ++ break; ++ } ++ ++ /* now fill corresponding descriptor entry */ ++ paddr = dma_map_single(&bp->pdev->dev, skb->data, ++ bp->rx_buffer_size, DMA_FROM_DEVICE); ++ if (dma_mapping_error(&bp->pdev->dev, paddr)) { ++ dev_kfree_rtskb(skb); ++ break; ++ } ++ ++ bp->rx_skbuff[entry] = skb; ++ ++ if (entry == RX_RING_SIZE - 1) ++ paddr |= MACB_BIT(RX_WRAP); ++ bp->rx_ring[entry].addr = paddr; ++ bp->rx_ring[entry].ctrl = 0; ++ ++ /* properly align Ethernet header */ ++ rtskb_reserve(skb, NET_IP_ALIGN); ++ } ++ } ++ ++ /* Make descriptor updates visible to hardware */ ++ wmb(); ++ ++ rtdev_vdbg(bp->dev, "rx ring: prepared head %d, tail %d\n", ++ bp->rx_prepared_head, bp->rx_tail); ++} ++ ++/* Mark DMA descriptors from begin up to and not including end as unused */ ++static void discard_partial_frame(struct macb *bp, unsigned int begin, ++ unsigned int end) ++{ ++ unsigned int frag; ++ ++ for (frag = begin; frag != end; frag++) { ++ struct macb_dma_desc *desc = macb_rx_desc(bp, frag); ++ desc->addr &= ~MACB_BIT(RX_USED); ++ } ++ ++ /* Make descriptor updates visible to hardware */ ++ wmb(); ++ ++ /* ++ * When this happens, the hardware stats registers for ++ * whatever caused this is updated, so we don't have to record ++ * anything. ++ */ ++} ++ ++static int gem_rx(struct macb *bp, int budget, nanosecs_abs_t *time_stamp) ++{ ++ unsigned int len; ++ unsigned int entry; ++ struct rtskb *skb; ++ struct macb_dma_desc *desc; ++ int count = 0, status; ++ ++ status = macb_readl(bp, RSR); ++ macb_writel(bp, RSR, status); ++ ++ if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE) ++ macb_writel(bp, ISR, MACB_BIT(RCOMP)); ++ ++ while (count < budget) { ++ u32 addr, ctrl; ++ ++ entry = macb_rx_ring_wrap(bp->rx_tail); ++ desc = &bp->rx_ring[entry]; ++ ++ /* Make hw descriptor updates visible to CPU */ ++ rmb(); ++ ++ addr = desc->addr; ++ ctrl = desc->ctrl; ++ ++ if (!(addr & MACB_BIT(RX_USED))) ++ break; ++ ++ bp->rx_tail++; ++ count++; ++ ++ if (!(ctrl & MACB_BIT(RX_SOF) && ctrl & MACB_BIT(RX_EOF))) { ++ rtdev_err(bp->dev, ++ "not whole frame pointed by descriptor\n"); ++ bp->stats.rx_dropped++; ++ break; ++ } ++ skb = bp->rx_skbuff[entry]; ++ if (unlikely(!skb)) { ++ rtdev_err(bp->dev, ++ "inconsistent Rx descriptor chain\n"); ++ bp->stats.rx_dropped++; ++ break; ++ } ++ skb->time_stamp = *time_stamp; ++ /* now everything is ready for receiving packet */ ++ bp->rx_skbuff[entry] = NULL; ++ len = MACB_BFEXT(RX_FRMLEN, ctrl); ++ ++ rtdev_vdbg(bp->dev, "gem_rx %u (len %u)\n", entry, len); ++ ++ rtskb_put(skb, len); ++ addr = MACB_BF(RX_WADDR, MACB_BFEXT(RX_WADDR, addr)); ++ dma_unmap_single(&bp->pdev->dev, addr, ++ bp->rx_buffer_size, DMA_FROM_DEVICE); ++ ++ skb->protocol = rt_eth_type_trans(skb, bp->dev); ++ ++ bp->stats.rx_packets++; ++ bp->stats.rx_bytes += skb->len; ++ ++#if defined(DEBUG) && defined(VERBOSE_DEBUG) ++ rtdev_vdbg(bp->dev, "received skb of length %u, csum: %08x\n", ++ skb->len, skb->csum); ++ print_hex_dump(KERN_DEBUG, " mac: ", DUMP_PREFIX_ADDRESS, 16, 1, ++ skb->mac_header, 16, true); ++ print_hex_dump(KERN_DEBUG, "data: ", DUMP_PREFIX_ADDRESS, 16, 1, ++ skb->data, 32, true); ++#endif ++ ++ rtnetif_rx(skb); ++ } ++ ++ gem_rx_refill(bp); ++ ++ return count; ++} ++ ++static int macb_rx_frame(struct macb *bp, unsigned int first_frag, ++ unsigned int last_frag, nanosecs_abs_t *time_stamp) ++{ ++ unsigned int len; ++ unsigned int frag; ++ unsigned int offset; ++ struct rtskb *skb; ++ struct macb_dma_desc *desc; ++ ++ desc = macb_rx_desc(bp, last_frag); ++ len = MACB_BFEXT(RX_FRMLEN, desc->ctrl); ++ ++ rtdev_vdbg(bp->dev, "macb_rx_frame frags %u - %u (len %u)\n", ++ macb_rx_ring_wrap(first_frag), ++ macb_rx_ring_wrap(last_frag), len); ++ ++ /* ++ * The ethernet header starts NET_IP_ALIGN bytes into the ++ * first buffer. Since the header is 14 bytes, this makes the ++ * payload word-aligned. ++ * ++ * Instead of calling skb_reserve(NET_IP_ALIGN), we just copy ++ * the two padding bytes into the skb so that we avoid hitting ++ * the slowpath in memcpy(), and pull them off afterwards. ++ */ ++ skb = rtnetdev_alloc_rtskb(bp->dev, len + NET_IP_ALIGN); ++ if (!skb) { ++ rtdev_notice(bp->dev, "Low memory, packet dropped.\n"); ++ bp->stats.rx_dropped++; ++ for (frag = first_frag; ; frag++) { ++ desc = macb_rx_desc(bp, frag); ++ desc->addr &= ~MACB_BIT(RX_USED); ++ if (frag == last_frag) ++ break; ++ } ++ ++ /* Make descriptor updates visible to hardware */ ++ wmb(); ++ ++ return 1; ++ } ++ ++ offset = 0; ++ len += NET_IP_ALIGN; ++ skb->time_stamp = *time_stamp; ++ rtskb_put(skb, len); ++ ++ for (frag = first_frag; ; frag++) { ++ unsigned int frag_len = bp->rx_buffer_size; ++ ++ if (offset + frag_len > len) { ++ BUG_ON(frag != last_frag); ++ frag_len = len - offset; ++ } ++ memcpy(skb->data + offset, macb_rx_buffer(bp, frag), frag_len); ++ offset += bp->rx_buffer_size; ++ desc = macb_rx_desc(bp, frag); ++ desc->addr &= ~MACB_BIT(RX_USED); ++ ++ if (frag == last_frag) ++ break; ++ } ++ ++ /* Make descriptor updates visible to hardware */ ++ wmb(); ++ ++ __rtskb_pull(skb, NET_IP_ALIGN); ++ skb->protocol = rt_eth_type_trans(skb, bp->dev); ++ ++ bp->stats.rx_packets++; ++ bp->stats.rx_bytes += skb->len; ++ rtdev_vdbg(bp->dev, "received skb of length %u, csum: %08x\n", ++ skb->len, skb->csum); ++ rtnetif_rx(skb); ++ ++ return 0; ++} ++ ++static int macb_rx(struct macb *bp, int budget, nanosecs_abs_t *time_stamp) ++{ ++ int received = 0; ++ unsigned int tail; ++ int first_frag = -1; ++ ++ for (tail = bp->rx_tail; budget > 0; tail++) { ++ struct macb_dma_desc *desc = macb_rx_desc(bp, tail); ++ u32 addr, ctrl; ++ ++ /* Make hw descriptor updates visible to CPU */ ++ rmb(); ++ ++ addr = desc->addr; ++ ctrl = desc->ctrl; ++ ++ if (!(addr & MACB_BIT(RX_USED))) ++ break; ++ ++ if (ctrl & MACB_BIT(RX_SOF)) { ++ if (first_frag != -1) ++ discard_partial_frame(bp, first_frag, tail); ++ first_frag = tail; ++ } ++ ++ if (ctrl & MACB_BIT(RX_EOF)) { ++ int dropped; ++ BUG_ON(first_frag == -1); ++ ++ dropped = macb_rx_frame(bp, first_frag, tail, time_stamp); ++ first_frag = -1; ++ if (!dropped) { ++ received++; ++ budget--; ++ } ++ } ++ } ++ ++ if (first_frag != -1) ++ bp->rx_tail = first_frag; ++ else ++ bp->rx_tail = tail; ++ ++ return received; ++} ++ ++static int macb_interrupt(rtdm_irq_t *irq_handle) ++{ ++ void *dev_id = rtdm_irq_get_arg(irq_handle, void); ++ nanosecs_abs_t time_stamp = rtdm_clock_read(); ++ struct rtnet_device *dev = dev_id; ++ struct macb *bp = rtnetdev_priv(dev); ++ unsigned received = 0; ++ u32 status, ctrl; ++ ++ status = macb_readl(bp, ISR); ++ ++ if (unlikely(!status)) ++ return RTDM_IRQ_NONE; ++ ++ rtdm_lock_get(&bp->lock); ++ ++ while (status) { ++ /* close possible race with dev_close */ ++ if (unlikely(!rtnetif_running(dev))) { ++ macb_writel(bp, IDR, -1); ++ break; ++ } ++ ++ rtdev_vdbg(bp->dev, "isr = 0x%08lx\n", (unsigned long)status); ++ ++ if (status & MACB_BIT(RCOMP)) { ++ received += bp->macbgem_ops.mog_rx(bp, 100 - received, ++ &time_stamp); ++ } ++ ++ if (unlikely(status & (MACB_TX_ERR_FLAGS))) { ++ macb_writel(bp, IDR, MACB_TX_INT_FLAGS); ++ rtdm_schedule_nrt_work(&bp->tx_error_task); ++ ++ if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE) ++ macb_writel(bp, ISR, MACB_TX_ERR_FLAGS); ++ ++ break; ++ } ++ ++ if (status & MACB_BIT(TCOMP)) ++ macb_tx_interrupt(bp); ++ ++ /* ++ * Link change detection isn't possible with RMII, so we'll ++ * add that if/when we get our hands on a full-blown MII PHY. ++ */ ++ ++ if (status & MACB_BIT(RXUBR)) { ++ ctrl = macb_readl(bp, NCR); ++ macb_writel(bp, NCR, ctrl & ~MACB_BIT(RE)); ++ macb_writel(bp, NCR, ctrl | MACB_BIT(RE)); ++ ++ if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE) ++ macb_writel(bp, ISR, MACB_BIT(RXUBR)); ++ } ++ ++ if (status & MACB_BIT(ISR_ROVR)) { ++ /* We missed at least one packet */ ++ if (macb_is_gem(bp)) ++ bp->hw_stats.gem.rx_overruns++; ++ else ++ bp->hw_stats.macb.rx_overruns++; ++ ++ if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE) ++ macb_writel(bp, ISR, MACB_BIT(ISR_ROVR)); ++ } ++ ++ if (status & MACB_BIT(HRESP)) { ++ /* ++ * TODO: Reset the hardware, and maybe move the ++ * rtdev_err to a lower-priority context as well ++ * (work queue?) ++ */ ++ rtdev_err(dev, "DMA bus error: HRESP not OK\n"); ++ ++ if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE) ++ macb_writel(bp, ISR, MACB_BIT(HRESP)); ++ } ++ ++ status = macb_readl(bp, ISR); ++ } ++ ++ rtdm_lock_put(&bp->lock); ++ ++ if (received) ++ rt_mark_stack_mgr(dev); ++ ++ return RTDM_IRQ_HANDLED; ++} ++ ++static int macb_start_xmit(struct rtskb *skb, struct rtnet_device *dev) ++{ ++ struct macb *bp = rtnetdev_priv(dev); ++ dma_addr_t mapping; ++ unsigned int len, entry; ++ struct macb_dma_desc *desc; ++ struct macb_tx_skb *tx_skb; ++ u32 ctrl; ++ unsigned long flags; ++ ++#if defined(DEBUG) && defined(VERBOSE_DEBUG) ++ rtdev_vdbg(bp->dev, ++ "start_xmit: len %u head %p data %p tail %p end %p\n", ++ skb->len, skb->head, skb->data, ++ rtskb_tail_pointer(skb), rtskb_end_pointer(skb)); ++ print_hex_dump(KERN_DEBUG, "data: ", DUMP_PREFIX_OFFSET, 16, 1, ++ skb->data, 16, true); ++#endif ++ ++ len = skb->len; ++ rtdm_lock_get_irqsave(&bp->lock, flags); ++ ++ /* This is a hard error, log it. */ ++ if (CIRC_SPACE(bp->tx_head, bp->tx_tail, TX_RING_SIZE) < 1) { ++ rtnetif_stop_queue(dev); ++ rtdm_lock_put_irqrestore(&bp->lock, flags); ++ rtdev_err(bp->dev, "BUG! Tx Ring full when queue awake!\n"); ++ rtdev_dbg(bp->dev, "tx_head = %u, tx_tail = %u\n", ++ bp->tx_head, bp->tx_tail); ++ return RTDEV_TX_BUSY; ++ } ++ ++ entry = macb_tx_ring_wrap(bp->tx_head); ++ rtdev_vdbg(bp->dev, "Allocated ring entry %u\n", entry); ++ mapping = dma_map_single(&bp->pdev->dev, skb->data, ++ len, DMA_TO_DEVICE); ++ if (dma_mapping_error(&bp->pdev->dev, mapping)) { ++ dev_kfree_rtskb(skb); ++ goto unlock; ++ } ++ ++ bp->tx_head++; ++ tx_skb = &bp->tx_skb[entry]; ++ tx_skb->skb = skb; ++ tx_skb->mapping = mapping; ++ rtdev_vdbg(bp->dev, "Mapped skb data %p to DMA addr %08lx\n", ++ skb->data, (unsigned long)mapping); ++ ++ ctrl = MACB_BF(TX_FRMLEN, len); ++ ctrl |= MACB_BIT(TX_LAST); ++ if (entry == (TX_RING_SIZE - 1)) ++ ctrl |= MACB_BIT(TX_WRAP); ++ ++ desc = &bp->tx_ring[entry]; ++ desc->addr = mapping; ++ desc->ctrl = ctrl; ++ ++ /* Make newly initialized descriptor visible to hardware */ ++ wmb(); ++ ++ rtskb_tx_timestamp(skb); ++ ++ macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(TSTART)); ++ ++ if (CIRC_SPACE(bp->tx_head, bp->tx_tail, TX_RING_SIZE) < 1) ++ rtnetif_stop_queue(dev); ++ ++unlock: ++ rtdm_lock_put_irqrestore(&bp->lock, flags); ++ ++ return RTDEV_TX_OK; ++} ++ ++static void macb_init_rx_buffer_size(struct macb *bp, size_t size) ++{ ++ if (!macb_is_gem(bp)) { ++ bp->rx_buffer_size = MACB_RX_BUFFER_SIZE; ++ } else { ++ bp->rx_buffer_size = size; ++ ++ if (bp->rx_buffer_size % RX_BUFFER_MULTIPLE) { ++ rtdev_dbg(bp->dev, ++ "RX buffer must be multiple of %d bytes, expanding\n", ++ RX_BUFFER_MULTIPLE); ++ bp->rx_buffer_size = ++ roundup(bp->rx_buffer_size, RX_BUFFER_MULTIPLE); ++ } ++ } ++ ++ rtdev_dbg(bp->dev, "mtu [%u] rx_buffer_size [%Zu]\n", ++ bp->dev->mtu, bp->rx_buffer_size); ++} ++ ++static void gem_free_rx_buffers(struct macb *bp) ++{ ++ struct rtskb *skb; ++ struct macb_dma_desc *desc; ++ dma_addr_t addr; ++ int i; ++ ++ if (!bp->rx_skbuff) ++ return; ++ ++ for (i = 0; i < RX_RING_SIZE; i++) { ++ skb = bp->rx_skbuff[i]; ++ ++ if (skb == NULL) ++ continue; ++ ++ desc = &bp->rx_ring[i]; ++ addr = MACB_BF(RX_WADDR, MACB_BFEXT(RX_WADDR, desc->addr)); ++ dma_unmap_single(&bp->pdev->dev, addr, bp->rx_buffer_size, ++ DMA_FROM_DEVICE); ++ dev_kfree_rtskb(skb); ++ skb = NULL; ++ } ++ ++ kfree(bp->rx_skbuff); ++ bp->rx_skbuff = NULL; ++} ++ ++static void macb_free_rx_buffers(struct macb *bp) ++{ ++ if (bp->rx_buffers) { ++ dma_free_coherent(&bp->pdev->dev, ++ RX_RING_SIZE * bp->rx_buffer_size, ++ bp->rx_buffers, bp->rx_buffers_dma); ++ bp->rx_buffers = NULL; ++ } ++} ++ ++static void macb_free_consistent(struct macb *bp) ++{ ++ if (bp->tx_skb) { ++ kfree(bp->tx_skb); ++ bp->tx_skb = NULL; ++ } ++ bp->macbgem_ops.mog_free_rx_buffers(bp); ++ if (bp->rx_ring) { ++ dma_free_coherent(&bp->pdev->dev, RX_RING_BYTES, ++ bp->rx_ring, bp->rx_ring_dma); ++ bp->rx_ring = NULL; ++ } ++ if (bp->tx_ring) { ++ dma_free_coherent(&bp->pdev->dev, TX_RING_BYTES, ++ bp->tx_ring, bp->tx_ring_dma); ++ bp->tx_ring = NULL; ++ } ++} ++ ++static int gem_alloc_rx_buffers(struct macb *bp) ++{ ++ int size; ++ ++ size = RX_RING_SIZE * sizeof(struct rtskb *); ++ bp->rx_skbuff = kzalloc(size, GFP_KERNEL); ++ if (!bp->rx_skbuff) ++ return -ENOMEM; ++ else ++ rtdev_dbg(bp->dev, ++ "Allocated %d RX struct rtskb entries at %p\n", ++ RX_RING_SIZE, bp->rx_skbuff); ++ return 0; ++} ++ ++static int macb_alloc_rx_buffers(struct macb *bp) ++{ ++ int size; ++ ++ size = RX_RING_SIZE * bp->rx_buffer_size; ++ bp->rx_buffers = dma_alloc_coherent(&bp->pdev->dev, size, ++ &bp->rx_buffers_dma, GFP_KERNEL); ++ if (!bp->rx_buffers) ++ return -ENOMEM; ++ else ++ rtdev_dbg(bp->dev, ++ "Allocated RX buffers of %d bytes at %08lx (mapped %p)\n", ++ size, (unsigned long)bp->rx_buffers_dma, bp->rx_buffers); ++ return 0; ++} ++ ++static int macb_alloc_consistent(struct macb *bp) ++{ ++ int size; ++ ++ size = TX_RING_SIZE * sizeof(struct macb_tx_skb); ++ bp->tx_skb = kmalloc(size, GFP_KERNEL); ++ if (!bp->tx_skb) ++ goto out_err; ++ ++ size = RX_RING_BYTES; ++ bp->rx_ring = dma_alloc_coherent(&bp->pdev->dev, size, ++ &bp->rx_ring_dma, GFP_KERNEL); ++ if (!bp->rx_ring) ++ goto out_err; ++ rtdev_dbg(bp->dev, ++ "Allocated RX ring of %d bytes at %08lx (mapped %p)\n", ++ size, (unsigned long)bp->rx_ring_dma, bp->rx_ring); ++ ++ size = TX_RING_BYTES; ++ bp->tx_ring = dma_alloc_coherent(&bp->pdev->dev, size, ++ &bp->tx_ring_dma, GFP_KERNEL); ++ if (!bp->tx_ring) ++ goto out_err; ++ rtdev_dbg(bp->dev, ++ "Allocated TX ring of %d bytes at %08lx (mapped %p)\n", ++ size, (unsigned long)bp->tx_ring_dma, bp->tx_ring); ++ ++ if (bp->macbgem_ops.mog_alloc_rx_buffers(bp)) ++ goto out_err; ++ ++ return 0; ++ ++out_err: ++ macb_free_consistent(bp); ++ return -ENOMEM; ++} ++ ++static void gem_init_rings(struct macb *bp) ++{ ++ int i; ++ ++ for (i = 0; i < TX_RING_SIZE; i++) { ++ bp->tx_ring[i].addr = 0; ++ bp->tx_ring[i].ctrl = MACB_BIT(TX_USED); ++ } ++ bp->tx_ring[TX_RING_SIZE - 1].ctrl |= MACB_BIT(TX_WRAP); ++ ++ bp->rx_tail = bp->rx_prepared_head = bp->tx_head = bp->tx_tail = 0; ++ ++ gem_rx_refill(bp); ++} ++ ++static void macb_init_rings(struct macb *bp) ++{ ++ int i; ++ dma_addr_t addr; ++ ++ addr = bp->rx_buffers_dma; ++ for (i = 0; i < RX_RING_SIZE; i++) { ++ bp->rx_ring[i].addr = addr; ++ bp->rx_ring[i].ctrl = 0; ++ addr += bp->rx_buffer_size; ++ } ++ bp->rx_ring[RX_RING_SIZE - 1].addr |= MACB_BIT(RX_WRAP); ++ ++ for (i = 0; i < TX_RING_SIZE; i++) { ++ bp->tx_ring[i].addr = 0; ++ bp->tx_ring[i].ctrl = MACB_BIT(TX_USED); ++ } ++ bp->tx_ring[TX_RING_SIZE - 1].ctrl |= MACB_BIT(TX_WRAP); ++ ++ bp->rx_tail = bp->tx_head = bp->tx_tail = 0; ++} ++ ++static void macb_reset_hw(struct macb *bp) ++{ ++ /* ++ * Disable RX and TX (XXX: Should we halt the transmission ++ * more gracefully?) ++ */ ++ macb_writel(bp, NCR, 0); ++ ++ /* Clear the stats registers (XXX: Update stats first?) */ ++ macb_writel(bp, NCR, MACB_BIT(CLRSTAT)); ++ ++ /* Clear all status flags */ ++ macb_writel(bp, TSR, -1); ++ macb_writel(bp, RSR, -1); ++ ++ /* Disable all interrupts */ ++ macb_writel(bp, IDR, -1); ++ macb_readl(bp, ISR); ++} ++ ++static u32 gem_mdc_clk_div(struct macb *bp) ++{ ++ u32 config; ++ unsigned long pclk_hz = clk_get_rate(bp->pclk); ++ ++ if (pclk_hz <= 20000000) ++ config = GEM_BF(CLK, GEM_CLK_DIV8); ++ else if (pclk_hz <= 40000000) ++ config = GEM_BF(CLK, GEM_CLK_DIV16); ++ else if (pclk_hz <= 80000000) ++ config = GEM_BF(CLK, GEM_CLK_DIV32); ++ else if (pclk_hz <= 120000000) ++ config = GEM_BF(CLK, GEM_CLK_DIV48); ++ else if (pclk_hz <= 160000000) ++ config = GEM_BF(CLK, GEM_CLK_DIV64); ++ else ++ config = GEM_BF(CLK, GEM_CLK_DIV96); ++ ++ return config; ++} ++ ++static u32 macb_mdc_clk_div(struct macb *bp) ++{ ++ u32 config; ++ unsigned long pclk_hz; ++ ++ if (macb_is_gem(bp)) ++ return gem_mdc_clk_div(bp); ++ ++ pclk_hz = clk_get_rate(bp->pclk); ++ if (pclk_hz <= 20000000) ++ config = MACB_BF(CLK, MACB_CLK_DIV8); ++ else if (pclk_hz <= 40000000) ++ config = MACB_BF(CLK, MACB_CLK_DIV16); ++ else if (pclk_hz <= 80000000) ++ config = MACB_BF(CLK, MACB_CLK_DIV32); ++ else ++ config = MACB_BF(CLK, MACB_CLK_DIV64); ++ ++ return config; ++} ++ ++/* ++ * Get the DMA bus width field of the network configuration register that we ++ * should program. We find the width from decoding the design configuration ++ * register to find the maximum supported data bus width. ++ */ ++static u32 macb_dbw(struct macb *bp) ++{ ++ if (!macb_is_gem(bp)) ++ return 0; ++ ++ switch (GEM_BFEXT(DBWDEF, gem_readl(bp, DCFG1))) { ++ case 4: ++ return GEM_BF(DBW, GEM_DBW128); ++ case 2: ++ return GEM_BF(DBW, GEM_DBW64); ++ case 1: ++ default: ++ return GEM_BF(DBW, GEM_DBW32); ++ } ++} ++ ++/* ++ * Configure the receive DMA engine ++ * - use the correct receive buffer size ++ * - set the possibility to use INCR16 bursts ++ * (if not supported by FIFO, it will fallback to default) ++ * - set both rx/tx packet buffers to full memory size ++ * These are configurable parameters for GEM. ++ */ ++static void macb_configure_dma(struct macb *bp) ++{ ++ u32 dmacfg; ++ ++ if (macb_is_gem(bp)) { ++ dmacfg = gem_readl(bp, DMACFG) & ~GEM_BF(RXBS, -1L); ++ dmacfg |= GEM_BF(RXBS, bp->rx_buffer_size / RX_BUFFER_MULTIPLE); ++ dmacfg |= GEM_BF(FBLDO, 16); ++ dmacfg |= GEM_BIT(TXPBMS) | GEM_BF(RXBMS, -1L); ++ dmacfg &= ~GEM_BIT(ENDIA); ++ gem_writel(bp, DMACFG, dmacfg); ++ } ++} ++ ++/* ++ * Configure peripheral capacities according to integration options used ++ */ ++static void macb_configure_caps(struct macb *bp) ++{ ++ if (macb_is_gem(bp)) { ++ if (GEM_BFEXT(IRQCOR, gem_readl(bp, DCFG1)) == 0) ++ bp->caps |= MACB_CAPS_ISR_CLEAR_ON_WRITE; ++ } ++ rtdev_vdbg(bp->dev, "Capabilities : %X\n", bp->caps); ++} ++ ++static void macb_init_hw(struct macb *bp) ++{ ++ u32 config; ++ ++ macb_reset_hw(bp); ++ rtmacb_set_hwaddr(bp); ++ ++ config = macb_mdc_clk_div(bp); ++ config |= MACB_BF(RBOF, NET_IP_ALIGN); /* Make eth data aligned */ ++ config |= MACB_BIT(PAE); /* PAuse Enable */ ++ config |= MACB_BIT(DRFCS); /* Discard Rx FCS */ ++ if (bp->dev->flags & IFF_PROMISC) ++ config |= MACB_BIT(CAF); /* Copy All Frames */ ++ if (!(bp->dev->flags & IFF_BROADCAST)) ++ config |= MACB_BIT(NBC); /* No BroadCast */ ++ config |= macb_dbw(bp); ++ macb_writel(bp, NCFGR, config); ++ bp->speed = SPEED_10; ++ bp->duplex = DUPLEX_HALF; ++ ++ macb_configure_dma(bp); ++ macb_configure_caps(bp); ++ ++ /* Initialize TX and RX buffers */ ++ macb_writel(bp, RBQP, bp->rx_ring_dma); ++ macb_writel(bp, TBQP, bp->tx_ring_dma); ++ ++ /* Enable TX and RX */ ++ macb_writel(bp, NCR, MACB_BIT(RE) | MACB_BIT(TE) | MACB_BIT(MPE)); ++ ++ /* Enable interrupts */ ++ macb_writel(bp, IER, (MACB_RX_INT_FLAGS ++ | MACB_TX_INT_FLAGS ++ | MACB_BIT(HRESP))); ++ ++} ++ ++static int macb_open(struct rtnet_device *dev) ++{ ++ struct macb *bp = rtnetdev_priv(dev); ++ size_t bufsz = dev->mtu + ETH_HLEN + ETH_FCS_LEN + NET_IP_ALIGN; ++ int err; ++ ++ rt_stack_connect(dev, &STACK_manager); ++ ++ rtdev_dbg(bp->dev, "open\n"); ++ ++ /* carrier starts down */ ++ rtnetif_carrier_off(dev); ++ ++ /* if the phy is not yet register, retry later*/ ++ if (!bp->phy_dev) ++ return -EAGAIN; ++ ++ /* RX buffers initialization */ ++ macb_init_rx_buffer_size(bp, bufsz); ++ ++ err = macb_alloc_consistent(bp); ++ if (err) { ++ rtdev_err(dev, "Unable to allocate DMA memory (error %d)\n", ++ err); ++ return err; ++ } ++ ++ bp->macbgem_ops.mog_init_rings(bp); ++ macb_init_hw(bp); ++ ++ /* schedule a link state check */ ++ phy_start(bp->phy_dev); ++ ++ rtnetif_start_queue(dev); ++ ++ return 0; ++} ++ ++static int macb_close(struct rtnet_device *dev) ++{ ++ struct macb *bp = rtnetdev_priv(dev); ++ unsigned long flags; ++ ++ rtnetif_stop_queue(dev); ++ ++ if (bp->phy_dev) ++ phy_stop(bp->phy_dev); ++ ++ rtdm_lock_get_irqsave(&bp->lock, flags); ++ macb_reset_hw(bp); ++ rtnetif_carrier_off(dev); ++ rtdm_lock_put_irqrestore(&bp->lock, flags); ++ ++ macb_free_consistent(bp); ++ ++ rt_stack_disconnect(dev); ++ ++ return 0; ++} ++ ++static void gem_update_stats(struct macb *bp) ++{ ++ u32 __iomem *reg = bp->regs + GEM_OTX; ++ u32 *p = &bp->hw_stats.gem.tx_octets_31_0; ++ u32 *end = &bp->hw_stats.gem.rx_udp_checksum_errors + 1; ++ ++ for (; p < end; p++, reg++) ++ *p += __raw_readl(reg); ++} ++ ++static struct net_device_stats *gem_get_stats(struct macb *bp) ++{ ++ struct gem_stats *hwstat = &bp->hw_stats.gem; ++ struct net_device_stats *nstat = &bp->stats; ++ ++ gem_update_stats(bp); ++ ++ nstat->rx_errors = (hwstat->rx_frame_check_sequence_errors + ++ hwstat->rx_alignment_errors + ++ hwstat->rx_resource_errors + ++ hwstat->rx_overruns + ++ hwstat->rx_oversize_frames + ++ hwstat->rx_jabbers + ++ hwstat->rx_undersized_frames + ++ hwstat->rx_length_field_frame_errors); ++ nstat->tx_errors = (hwstat->tx_late_collisions + ++ hwstat->tx_excessive_collisions + ++ hwstat->tx_underrun + ++ hwstat->tx_carrier_sense_errors); ++ nstat->multicast = hwstat->rx_multicast_frames; ++ nstat->collisions = (hwstat->tx_single_collision_frames + ++ hwstat->tx_multiple_collision_frames + ++ hwstat->tx_excessive_collisions); ++ nstat->rx_length_errors = (hwstat->rx_oversize_frames + ++ hwstat->rx_jabbers + ++ hwstat->rx_undersized_frames + ++ hwstat->rx_length_field_frame_errors); ++ nstat->rx_over_errors = hwstat->rx_resource_errors; ++ nstat->rx_crc_errors = hwstat->rx_frame_check_sequence_errors; ++ nstat->rx_frame_errors = hwstat->rx_alignment_errors; ++ nstat->rx_fifo_errors = hwstat->rx_overruns; ++ nstat->tx_aborted_errors = hwstat->tx_excessive_collisions; ++ nstat->tx_carrier_errors = hwstat->tx_carrier_sense_errors; ++ nstat->tx_fifo_errors = hwstat->tx_underrun; ++ ++ return nstat; ++} ++ ++struct net_device_stats *rtmacb_get_stats(struct rtnet_device *dev) ++{ ++ struct macb *bp = rtnetdev_priv(dev); ++ struct net_device_stats *nstat = &bp->stats; ++ struct macb_stats *hwstat = &bp->hw_stats.macb; ++ ++ if (macb_is_gem(bp)) ++ return gem_get_stats(bp); ++ ++ /* read stats from hardware */ ++ macb_update_stats(bp); ++ ++ /* Convert HW stats into netdevice stats */ ++ nstat->rx_errors = (hwstat->rx_fcs_errors + ++ hwstat->rx_align_errors + ++ hwstat->rx_resource_errors + ++ hwstat->rx_overruns + ++ hwstat->rx_oversize_pkts + ++ hwstat->rx_jabbers + ++ hwstat->rx_undersize_pkts + ++ hwstat->sqe_test_errors + ++ hwstat->rx_length_mismatch); ++ nstat->tx_errors = (hwstat->tx_late_cols + ++ hwstat->tx_excessive_cols + ++ hwstat->tx_underruns + ++ hwstat->tx_carrier_errors); ++ nstat->collisions = (hwstat->tx_single_cols + ++ hwstat->tx_multiple_cols + ++ hwstat->tx_excessive_cols); ++ nstat->rx_length_errors = (hwstat->rx_oversize_pkts + ++ hwstat->rx_jabbers + ++ hwstat->rx_undersize_pkts + ++ hwstat->rx_length_mismatch); ++ nstat->rx_over_errors = hwstat->rx_resource_errors + ++ hwstat->rx_overruns; ++ nstat->rx_crc_errors = hwstat->rx_fcs_errors; ++ nstat->rx_frame_errors = hwstat->rx_align_errors; ++ nstat->rx_fifo_errors = hwstat->rx_overruns; ++ /* XXX: What does "missed" mean? */ ++ nstat->tx_aborted_errors = hwstat->tx_excessive_cols; ++ nstat->tx_carrier_errors = hwstat->tx_carrier_errors; ++ nstat->tx_fifo_errors = hwstat->tx_underruns; ++ /* Don't know about heartbeat or window errors... */ ++ ++ return nstat; ++} ++EXPORT_SYMBOL_GPL(rtmacb_get_stats); ++ ++int rtmacb_ioctl(struct rtnet_device *dev, unsigned cmd, void *rq) ++{ ++ struct macb *bp = rtnetdev_priv(dev); ++ struct phy_device *phydev = bp->phy_dev; ++ ++ if (!rtnetif_running(dev)) ++ return -EINVAL; ++ ++ if (!phydev) ++ return -ENODEV; ++ ++ return phy_mii_ioctl(phydev, rq, cmd); ++} ++EXPORT_SYMBOL_GPL(rtmacb_ioctl); ++ ++#if defined(CONFIG_OF) ++static const struct of_device_id macb_dt_ids[] = { ++ { .compatible = "cdns,at32ap7000-macb" }, ++ { .compatible = "cdns,at91sam9260-macb" }, ++ { .compatible = "cdns,macb" }, ++ { .compatible = "cdns,pc302-gem" }, ++ { .compatible = "cdns,gem" }, ++ { .compatible = "atmel,sama5d3-gem" }, ++ { /* sentinel */ } ++}; ++MODULE_DEVICE_TABLE(of, macb_dt_ids); ++#endif ++ ++static int __init macb_probe(struct platform_device *pdev) ++{ ++ struct macb_platform_data *pdata; ++ struct resource *regs; ++ struct rtnet_device *dev; ++ struct macb *bp; ++ struct phy_device *phydev; ++ u32 config; ++ int err = -ENXIO; ++ struct pinctrl *pinctrl; ++ const char *mac; ++ ++ regs = platform_get_resource(pdev, IORESOURCE_MEM, 0); ++ if (!regs) { ++ dev_err(&pdev->dev, "no mmio resource defined\n"); ++ goto err_out; ++ } ++ ++ pinctrl = devm_pinctrl_get_select_default(&pdev->dev); ++ if (IS_ERR(pinctrl)) { ++ err = PTR_ERR(pinctrl); ++ if (err == -EPROBE_DEFER) ++ goto err_out; ++ ++ dev_warn(&pdev->dev, "No pinctrl provided\n"); ++ } ++ ++ err = -ENOMEM; ++ dev = rt_alloc_etherdev(sizeof(*bp), RX_RING_SIZE * 2 + TX_RING_SIZE); ++ if (!dev) ++ goto err_out; ++ ++ rtdev_alloc_name(dev, "rteth%d"); ++ rt_rtdev_connect(dev, &RTDEV_manager); ++ dev->vers = RTDEV_VERS_2_0; ++ dev->sysbind = &pdev->dev; ++ ++ /* TODO: Actually, we have some interesting features... */ ++ dev->features |= 0; ++ ++ bp = rtnetdev_priv(dev); ++ bp->pdev = pdev; ++ bp->dev = dev; ++ ++ rtdm_lock_init(&bp->lock); ++ INIT_WORK(&bp->tx_error_task, macb_tx_error_task); ++ ++ bp->pclk = devm_clk_get(&pdev->dev, "pclk"); ++ if (IS_ERR(bp->pclk)) { ++ err = PTR_ERR(bp->pclk); ++ dev_err(&pdev->dev, "failed to get macb_clk (%u)\n", err); ++ goto err_out_free_dev; ++ } ++ ++ bp->hclk = devm_clk_get(&pdev->dev, "hclk"); ++ if (IS_ERR(bp->hclk)) { ++ err = PTR_ERR(bp->hclk); ++ dev_err(&pdev->dev, "failed to get hclk (%u)\n", err); ++ goto err_out_free_dev; ++ } ++ ++ bp->tx_clk = devm_clk_get(&pdev->dev, "tx_clk"); ++ ++ err = clk_prepare_enable(bp->pclk); ++ if (err) { ++ dev_err(&pdev->dev, "failed to enable pclk (%u)\n", err); ++ goto err_out_free_dev; ++ } ++ ++ err = clk_prepare_enable(bp->hclk); ++ if (err) { ++ dev_err(&pdev->dev, "failed to enable hclk (%u)\n", err); ++ goto err_out_disable_pclk; ++ } ++ ++ if (!IS_ERR(bp->tx_clk)) { ++ err = clk_prepare_enable(bp->tx_clk); ++ if (err) { ++ dev_err(&pdev->dev, "failed to enable tx_clk (%u)\n", ++ err); ++ goto err_out_disable_hclk; ++ } ++ } ++ ++ bp->regs = devm_ioremap(&pdev->dev, regs->start, resource_size(regs)); ++ if (!bp->regs) { ++ dev_err(&pdev->dev, "failed to map registers, aborting.\n"); ++ err = -ENOMEM; ++ goto err_out_disable_clocks; ++ } ++ ++ dev->irq = platform_get_irq(pdev, 0); ++ rt_stack_connect(dev, &STACK_manager); ++ ++ err = rtdm_irq_request(&bp->irq_handle, dev->irq, macb_interrupt, 0, ++ dev->name, dev); ++ if (err) { ++ dev_err(&pdev->dev, "Unable to request IRQ %d (error %d)\n", ++ dev->irq, err); ++ goto err_out_disable_clocks; ++ } ++ ++ dev->open = macb_open; ++ dev->stop = macb_close; ++ dev->hard_start_xmit = macb_start_xmit; ++ dev->do_ioctl = rtmacb_ioctl; ++ dev->get_stats = rtmacb_get_stats; ++ ++ dev->base_addr = regs->start; ++ ++ /* setup appropriated routines according to adapter type */ ++ if (macb_is_gem(bp)) { ++ bp->macbgem_ops.mog_alloc_rx_buffers = gem_alloc_rx_buffers; ++ bp->macbgem_ops.mog_free_rx_buffers = gem_free_rx_buffers; ++ bp->macbgem_ops.mog_init_rings = gem_init_rings; ++ bp->macbgem_ops.mog_rx = gem_rx; ++ } else { ++ bp->macbgem_ops.mog_alloc_rx_buffers = macb_alloc_rx_buffers; ++ bp->macbgem_ops.mog_free_rx_buffers = macb_free_rx_buffers; ++ bp->macbgem_ops.mog_init_rings = macb_init_rings; ++ bp->macbgem_ops.mog_rx = macb_rx; ++ } ++ ++ /* Set MII management clock divider */ ++ config = macb_mdc_clk_div(bp); ++ config |= macb_dbw(bp); ++ macb_writel(bp, NCFGR, config); ++ ++ mac = of_get_mac_address(pdev->dev.of_node); ++ if (mac) ++ memcpy(bp->dev->dev_addr, mac, ETH_ALEN); ++ else ++ rtmacb_get_hwaddr(bp); ++ ++ err = of_get_phy_mode(pdev->dev.of_node); ++ if (err < 0) { ++ pdata = dev_get_platdata(&pdev->dev); ++ if (pdata && pdata->is_rmii) ++ bp->phy_interface = PHY_INTERFACE_MODE_RMII; ++ else ++ bp->phy_interface = PHY_INTERFACE_MODE_MII; ++ } else { ++ bp->phy_interface = err; ++ } ++ ++ if (bp->phy_interface == PHY_INTERFACE_MODE_RGMII) ++ macb_or_gem_writel(bp, USRIO, GEM_BIT(RGMII)); ++ else if (bp->phy_interface == PHY_INTERFACE_MODE_RMII) ++#if defined(CONFIG_ARCH_AT91) ++ macb_or_gem_writel(bp, USRIO, (MACB_BIT(RMII) | ++ MACB_BIT(CLKEN))); ++#else ++ macb_or_gem_writel(bp, USRIO, 0); ++#endif ++ else ++#if defined(CONFIG_ARCH_AT91) ++ macb_or_gem_writel(bp, USRIO, MACB_BIT(CLKEN)); ++#else ++ macb_or_gem_writel(bp, USRIO, MACB_BIT(MII)); ++#endif ++ ++ err = rt_register_rtnetdev(dev); ++ if (err) { ++ dev_err(&pdev->dev, "Cannot register net device, aborting.\n"); ++ goto err_out_irq_free; ++ } ++ ++ err = rtmacb_mii_init(bp); ++ if (err) ++ goto err_out_unregister_netdev; ++ ++ platform_set_drvdata(pdev, dev); ++ ++ rtnetif_carrier_off(dev); ++ ++ rtdev_info(dev, "Cadence %s at 0x%08lx irq %d (%pM)\n", ++ macb_is_gem(bp) ? "GEM" : "MACB", dev->base_addr, ++ dev->irq, dev->dev_addr); ++ ++ phydev = bp->phy_dev; ++ rtdev_info(dev, "attached PHY driver [%s] (mii_bus:phy_addr=%s, irq=%d)\n", ++ phydev->drv->name, dev_name(&phydev->dev), phydev->irq); ++ ++ return 0; ++ ++err_out_unregister_netdev: ++ rt_unregister_rtnetdev(dev); ++err_out_irq_free: ++ rtdm_irq_free(&bp->irq_handle); ++err_out_disable_clocks: ++ if (!IS_ERR(bp->tx_clk)) ++ clk_disable_unprepare(bp->tx_clk); ++err_out_disable_hclk: ++ clk_disable_unprepare(bp->hclk); ++err_out_disable_pclk: ++ clk_disable_unprepare(bp->pclk); ++err_out_free_dev: ++ rtdev_free(dev); ++err_out: ++ return err; ++} ++ ++static int __exit macb_remove(struct platform_device *pdev) ++{ ++ struct rtnet_device *dev; ++ struct macb *bp; ++ ++ dev = platform_get_drvdata(pdev); ++ ++ if (dev) { ++ bp = rtnetdev_priv(dev); ++ if (bp->phy_dev) ++ phy_disconnect(bp->phy_dev); ++ mdiobus_unregister(bp->mii_bus); ++ if (bp->phy_phony_net_device) ++ free_netdev(bp->phy_phony_net_device); ++ kfree(bp->mii_bus->irq); ++ rt_rtdev_disconnect(dev); ++ rtdm_irq_free(&bp->irq_handle); ++ mdiobus_free(bp->mii_bus); ++ rt_unregister_rtnetdev(dev); ++ if (!IS_ERR(bp->tx_clk)) ++ clk_disable_unprepare(bp->tx_clk); ++ clk_disable_unprepare(bp->hclk); ++ clk_disable_unprepare(bp->pclk); ++ rtdev_free(dev); ++ } ++ ++ return 0; ++} ++ ++static struct platform_driver macb_driver = { ++ .remove = __exit_p(macb_remove), ++ .driver = { ++ .name = "macb", ++ .owner = THIS_MODULE, ++ .of_match_table = of_match_ptr(macb_dt_ids), ++ }, ++}; ++ ++static bool found; ++static int __init macb_driver_init(void) ++{ ++ found = platform_driver_probe(&macb_driver, macb_probe) == 0; ++ return 0; ++} ++module_init(macb_driver_init); ++ ++static void __exit macb_driver_exit(void) ++{ ++ if (found) ++ platform_driver_unregister(&macb_driver); ++} ++module_exit(macb_driver_exit); ++ ++MODULE_LICENSE("GPL"); ++MODULE_DESCRIPTION("Cadence MACB/GEM Ethernet driver"); ++MODULE_AUTHOR("Haavard Skinnemoen (Atmel)"); ++MODULE_ALIAS("platform:macb"); +--- linux/drivers/xenomai/net/drivers/eepro100.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/eepro100.c 2021-04-07 16:01:27.349633992 +0800 +@@ -0,0 +1,1845 @@ ++/* rtnet/drivers/eepro100-rt.c: An Intel i82557-559 Real-Time-Ethernet driver for Linux. */ ++/* ++ RTnet porting 2002 by Jan Kiszka ++ Originally written 1996-1999 by Donald Becker. ++ ++ The driver also contains updates by different kernel developers ++ (see incomplete list below). ++ Current maintainer is Andrey V. Savochkin . ++ Please use this email address and linux-kernel mailing list for bug reports. ++ ++ This software may be used and distributed according to the terms ++ of the GNU General Public License, incorporated herein by reference. ++ ++ This driver is for the Intel EtherExpress Pro100 (Speedo3) design. ++ It should work with all i82557/558/559 boards. ++ ++ Version history: ++ 1998 Apr - 2000 Feb Andrey V. Savochkin ++ Serious fixes for multicast filter list setting, TX timeout routine; ++ RX ring refilling logic; other stuff ++ 2000 Feb Jeff Garzik ++ Convert to new PCI driver interface ++ 2000 Mar 24 Dragan Stancevic ++ Disabled FC and ER, to avoid lockups when when we get FCP interrupts. ++ 2000 Jul 17 Goutham Rao ++ PCI DMA API fixes, adding pci_dma_sync_single calls where neccesary ++ ++ 2002 May 16 Jan Kiszka ++ Ported to RTnet (RTAI version) ++*/ ++ ++static const char *version = ++"eepro100-rt.c:1.36-RTnet-0.8 2002-2006 Jan Kiszka \n" ++"eepro100-rt.c: based on eepro100.c 1.36 by D. Becker, A. V. Savochkin and others\n"; ++ ++/* A few user-configurable values that apply to all boards. ++ First set is undocumented and spelled per Intel recommendations. */ ++ ++static int txfifo = 8; /* Tx FIFO threshold in 4 byte units, 0-15 */ ++static int rxfifo = 8; /* Rx FIFO threshold, default 32 bytes. */ ++/* Tx/Rx DMA burst length, 0-127, 0 == no preemption, tx==128 -> disabled. */ ++static int txdmacount = 128; ++static int rxdmacount /* = 0 */; ++ ++/* Maximum events (Rx packets, etc.) to handle at each interrupt. */ ++static int max_interrupt_work = 20; ++ ++/* Maximum number of multicast addresses to filter (vs. rx-all-multicast) */ ++static int multicast_filter_limit = 64; ++ ++/* 'options' is used to pass a transceiver override or full-duplex flag ++ e.g. "options=16" for FD, "options=32" for 100mbps-only. */ ++static int full_duplex[] = {-1, -1, -1, -1, -1, -1, -1, -1}; ++static int options[] = {-1, -1, -1, -1, -1, -1, -1, -1}; ++static int local_debug = -1; /* The debug level */ ++ ++/* A few values that may be tweaked. */ ++/* The ring sizes should be a power of two for efficiency. */ ++#define TX_RING_SIZE 32 ++#define RX_RING_SIZE 8 /* RX_RING_SIZE*2 rtskbs will be preallocated */ ++/* How much slots multicast filter setup may take. ++ Do not descrease without changing set_rx_mode() implementaion. */ ++#define TX_MULTICAST_SIZE 2 ++#define TX_MULTICAST_RESERV (TX_MULTICAST_SIZE*2) ++/* Actual number of TX packets queued, must be ++ <= TX_RING_SIZE-TX_MULTICAST_RESERV. */ ++#define TX_QUEUE_LIMIT (TX_RING_SIZE-TX_MULTICAST_RESERV) ++/* Hysteresis marking queue as no longer full. */ ++#define TX_QUEUE_UNFULL (TX_QUEUE_LIMIT-4) ++ ++/* Operational parameters that usually are not changed. */ ++ ++/* Time in jiffies before concluding the transmitter is hung. */ ++#define TX_TIMEOUT (2*HZ) ++/* Size of an pre-allocated Rx buffer: + slack.*/ ++#define PKT_BUF_SZ VLAN_ETH_FRAME_LEN ++ ++#if !defined(__OPTIMIZE__) || !defined(__KERNEL__) ++#warning You must compile this file with the correct options! ++#warning See the last lines of the source file. ++#error You must compile this driver with "-O". ++#endif ++ ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++ ++// *** RTnet *** ++#include ++#include ++ ++#define MAX_UNITS 8 ++ ++static int cards[MAX_UNITS] = { [0 ... (MAX_UNITS-1)] = 1 }; ++module_param_array(cards, int, NULL, 0444); ++MODULE_PARM_DESC(cards, "array of cards to be supported (e.g. 1,0,1)"); ++// *** RTnet *** ++ ++MODULE_AUTHOR("Maintainer: Jan Kiszka "); ++MODULE_DESCRIPTION("Intel i82557/i82558/i82559 PCI EtherExpressPro driver"); ++MODULE_LICENSE("GPL"); ++module_param_named(debug, local_debug, int, 0444); ++module_param_array(options, int, NULL, 0444); ++module_param_array(full_duplex, int, NULL, 0444); ++module_param(txfifo, int, 0444); ++module_param(rxfifo, int, 0444); ++module_param(txdmacount, int, 0444); ++module_param(rxdmacount, int, 0444); ++module_param(max_interrupt_work, int, 0444); ++module_param(multicast_filter_limit, int, 0444); ++MODULE_PARM_DESC(debug, "eepro100 debug level (0-6)"); ++MODULE_PARM_DESC(options, "eepro100: Bits 0-3: tranceiver type, bit 4: full duplex, bit 5: 100Mbps"); ++MODULE_PARM_DESC(full_duplex, "eepro100 full duplex setting(s) (1)"); ++MODULE_PARM_DESC(txfifo, "eepro100 Tx FIFO threshold in 4 byte units, (0-15)"); ++MODULE_PARM_DESC(rxfifo, "eepro100 Rx FIFO threshold in 4 byte units, (0-15)"); ++MODULE_PARM_DESC(txdmaccount, "eepro100 Tx DMA burst length; 128 - disable (0-128)"); ++MODULE_PARM_DESC(rxdmaccount, "eepro100 Rx DMA burst length; 128 - disable (0-128)"); ++MODULE_PARM_DESC(max_interrupt_work, "eepro100 maximum events handled per interrupt"); ++MODULE_PARM_DESC(multicast_filter_limit, "eepro100 maximum number of filtered multicast addresses"); ++ ++#define RUN_AT(x) (jiffies + (x)) ++ ++// *** RTnet - no power management *** ++#undef pci_set_power_state ++#define pci_set_power_state null_set_power_state ++static inline int null_set_power_state(struct pci_dev *dev, int state) ++{ ++ return 0; ++} ++// *** RTnet *** ++ ++#define netdevice_start(dev) ++#define netdevice_stop(dev) ++#define netif_set_tx_timeout(dev, tf, tm) \ ++ do { \ ++ (dev)->tx_timeout = (tf); \ ++ (dev)->watchdog_timeo = (tm); \ ++ } while(0) ++ ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_DRV_EEPRO100_DBG ++static int speedo_debug = 1; ++#else ++#define speedo_debug 0 ++#endif ++ ++/* ++ Theory of Operation ++ ++I. Board Compatibility ++ ++This device driver is designed for the Intel i82557 "Speedo3" chip, Intel's ++single-chip fast Ethernet controller for PCI, as used on the Intel ++EtherExpress Pro 100 adapter. ++ ++II. Board-specific settings ++ ++PCI bus devices are configured by the system at boot time, so no jumpers ++need to be set on the board. The system BIOS should be set to assign the ++PCI INTA signal to an otherwise unused system IRQ line. While it's ++possible to share PCI interrupt lines, it negatively impacts performance and ++only recent kernels support it. ++ ++III. Driver operation ++ ++IIIA. General ++The Speedo3 is very similar to other Intel network chips, that is to say ++"apparently designed on a different planet". This chips retains the complex ++Rx and Tx descriptors and multiple buffers pointers as previous chips, but ++also has simplified Tx and Rx buffer modes. This driver uses the "flexible" ++Tx mode, but in a simplified lower-overhead manner: it associates only a ++single buffer descriptor with each frame descriptor. ++ ++Despite the extra space overhead in each receive skbuff, the driver must use ++the simplified Rx buffer mode to assure that only a single data buffer is ++associated with each RxFD. The driver implements this by reserving space ++for the Rx descriptor at the head of each Rx skbuff. ++ ++The Speedo-3 has receive and command unit base addresses that are added to ++almost all descriptor pointers. The driver sets these to zero, so that all ++pointer fields are absolute addresses. ++ ++The System Control Block (SCB) of some previous Intel chips exists on the ++chip in both PCI I/O and memory space. This driver uses the I/O space ++registers, but might switch to memory mapped mode to better support non-x86 ++processors. ++ ++IIIB. Transmit structure ++ ++The driver must use the complex Tx command+descriptor mode in order to ++have a indirect pointer to the skbuff data section. Each Tx command block ++(TxCB) is associated with two immediately appended Tx Buffer Descriptor ++(TxBD). A fixed ring of these TxCB+TxBD pairs are kept as part of the ++speedo_private data structure for each adapter instance. ++ ++The newer i82558 explicitly supports this structure, and can read the two ++TxBDs in the same PCI burst as the TxCB. ++ ++This ring structure is used for all normal transmit packets, but the ++transmit packet descriptors aren't long enough for most non-Tx commands such ++as CmdConfigure. This is complicated by the possibility that the chip has ++already loaded the link address in the previous descriptor. So for these ++commands we convert the next free descriptor on the ring to a NoOp, and point ++that descriptor's link to the complex command. ++ ++An additional complexity of these non-transmit commands are that they may be ++added asynchronous to the normal transmit queue, so we disable interrupts ++whenever the Tx descriptor ring is manipulated. ++ ++A notable aspect of these special configure commands is that they do ++work with the normal Tx ring entry scavenge method. The Tx ring scavenge ++is done at interrupt time using the 'dirty_tx' index, and checking for the ++command-complete bit. While the setup frames may have the NoOp command on the ++Tx ring marked as complete, but not have completed the setup command, this ++is not a problem. The tx_ring entry can be still safely reused, as the ++tx_skbuff[] entry is always empty for config_cmd and mc_setup frames. ++ ++Commands may have bits set e.g. CmdSuspend in the command word to either ++suspend or stop the transmit/command unit. This driver always flags the last ++command with CmdSuspend, erases the CmdSuspend in the previous command, and ++then issues a CU_RESUME. ++Note: Watch out for the potential race condition here: imagine ++ erasing the previous suspend ++ the chip processes the previous command ++ the chip processes the final command, and suspends ++ doing the CU_RESUME ++ the chip processes the next-yet-valid post-final-command. ++So blindly sending a CU_RESUME is only safe if we do it immediately after ++after erasing the previous CmdSuspend, without the possibility of an ++intervening delay. Thus the resume command is always within the ++interrupts-disabled region. This is a timing dependence, but handling this ++condition in a timing-independent way would considerably complicate the code. ++ ++Note: In previous generation Intel chips, restarting the command unit was a ++notoriously slow process. This is presumably no longer true. ++ ++IIIC. Receive structure ++ ++Because of the bus-master support on the Speedo3 this driver uses the new ++SKBUFF_RX_COPYBREAK scheme, rather than a fixed intermediate receive buffer. ++This scheme allocates full-sized skbuffs as receive buffers. The value ++SKBUFF_RX_COPYBREAK is used as the copying breakpoint: it is chosen to ++trade-off the memory wasted by passing the full-sized skbuff to the queue ++layer for all frames vs. the copying cost of copying a frame to a ++correctly-sized skbuff. ++ ++For small frames the copying cost is negligible (esp. considering that we ++are pre-loading the cache with immediately useful header information), so we ++allocate a new, minimally-sized skbuff. For large frames the copying cost ++is non-trivial, and the larger copy might flush the cache of useful data, so ++we pass up the skbuff the packet was received into. ++ ++IV. Notes ++ ++Thanks to Steve Williams of Intel for arranging the non-disclosure agreement ++that stated that I could disclose the information. But I still resent ++having to sign an Intel NDA when I'm helping Intel sell their own product! ++ ++*/ ++ ++static int speedo_found1(struct pci_dev *pdev, long ioaddr, int fnd_cnt, int acpi_idle_state); ++ ++enum pci_flags_bit { ++ PCI_USES_IO=1, PCI_USES_MEM=2, PCI_USES_MASTER=4, ++ PCI_ADDR0=0x10<<0, PCI_ADDR1=0x10<<1, PCI_ADDR2=0x10<<2, PCI_ADDR3=0x10<<3, ++}; ++ ++static inline unsigned int io_inw(unsigned long port) ++{ ++ return inw(port); ++} ++static inline void io_outw(unsigned int val, unsigned long port) ++{ ++ outw(val, port); ++} ++ ++#ifndef USE_IO ++/* Currently alpha headers define in/out macros. ++ Undefine them. 2000/03/30 SAW */ ++#undef inb ++#undef inw ++#undef inl ++#undef outb ++#undef outw ++#undef outl ++#define inb(addr) readb((void *)(addr)) ++#define inw(addr) readw((void *)(addr)) ++#define inl(addr) readl((void *)(addr)) ++#define outb(val, addr) writeb(val, (void *)(addr)) ++#define outw(val, addr) writew(val, (void *)(addr)) ++#define outl(val, addr) writel(val, (void *)(addr)) ++#endif ++ ++/* How to wait for the command unit to accept a command. ++ Typically this takes 0 ticks. */ ++static inline void wait_for_cmd_done(long cmd_ioaddr) ++{ ++ int wait = 1000; ++ do udelay(1) ; ++ while(inb(cmd_ioaddr) && --wait >= 0); ++#ifdef CONFIG_XENO_DRIVERS_NET_DRV_EEPRO100_DBG ++ if (wait < 0) ++ printk(KERN_ALERT "eepro100: wait_for_cmd_done timeout!\n"); ++#endif ++} ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_DRV_EEPRO100_CMDSTATS ++static inline int rt_wait_for_cmd_done(long cmd_ioaddr, const char *cmd) ++{ ++ int wait = CONFIG_XENO_DRIVERS_NET_DRV_EEPRO100_CMDTIMEOUT; ++ rtmd_time_t t0, t1; ++ ++ t0 = rtdm_clock_read(); ++ while (inb(cmd_ioaddr) != 0) { ++ if (wait-- == 0) { ++ rtdm_printk(KERN_ALERT "eepro100: rt_wait_for_cmd_done(%s) " ++ "timeout!\n", cmd); ++ return 1; ++ } ++ rtdm_task_busy_sleep(1000); ++ } ++ return 0; ++} ++#else ++static inline int rt_wait_for_cmd_done(long cmd_ioaddr, const char *cmd) ++{ ++ int wait = CONFIG_XENO_DRIVERS_NET_DRV_EEPRO100_CMDTIMEOUT; ++ ++ while (inb(cmd_ioaddr) != 0) { ++ if (wait-- == 0) ++ return 1; ++ rtdm_task_busy_sleep(1000); ++ } ++ return 0; ++} ++#endif ++ ++/* Offsets to the various registers. ++ All accesses need not be longword aligned. */ ++enum speedo_offsets { ++ SCBStatus = 0, SCBCmd = 2, /* Rx/Command Unit command and status. */ ++ SCBPointer = 4, /* General purpose pointer. */ ++ SCBPort = 8, /* Misc. commands and operands. */ ++ SCBflash = 12, SCBeeprom = 14, /* EEPROM and flash memory control. */ ++ SCBCtrlMDI = 16, /* MDI interface control. */ ++ SCBEarlyRx = 20, /* Early receive byte count. */ ++}; ++/* Commands that can be put in a command list entry. */ ++enum commands { ++ CmdNOp = 0, CmdIASetup = 0x10000, CmdConfigure = 0x20000, ++ CmdMulticastList = 0x30000, CmdTx = 0x40000, CmdTDR = 0x50000, ++ CmdDump = 0x60000, CmdDiagnose = 0x70000, ++ CmdSuspend = 0x40000000, /* Suspend after completion. */ ++ CmdIntr = 0x20000000, /* Interrupt after completion. */ ++ CmdTxFlex = 0x00080000, /* Use "Flexible mode" for CmdTx command. */ ++}; ++/* Clear CmdSuspend (1<<30) avoiding interference with the card access to the ++ status bits. Previous driver versions used separate 16 bit fields for ++ commands and statuses. --SAW ++ */ ++#if defined(__alpha__) ++# define clear_suspend(cmd) clear_bit(30, &(cmd)->cmd_status); ++#else ++# if defined(__LITTLE_ENDIAN) ++# define clear_suspend(cmd) ((__u16 *)&(cmd)->cmd_status)[1] &= ~0x4000 ++# elif defined(__BIG_ENDIAN) ++# define clear_suspend(cmd) ((__u16 *)&(cmd)->cmd_status)[1] &= ~0x0040 ++# else ++# error Unsupported byteorder ++# endif ++#endif ++ ++enum SCBCmdBits { ++ SCBMaskCmdDone=0x8000, SCBMaskRxDone=0x4000, SCBMaskCmdIdle=0x2000, ++ SCBMaskRxSuspend=0x1000, SCBMaskEarlyRx=0x0800, SCBMaskFlowCtl=0x0400, ++ SCBTriggerIntr=0x0200, SCBMaskAll=0x0100, ++ /* The rest are Rx and Tx commands. */ ++ CUStart=0x0010, CUResume=0x0020, CUStatsAddr=0x0040, CUShowStats=0x0050, ++ CUCmdBase=0x0060, /* CU Base address (set to zero) . */ ++ CUDumpStats=0x0070, /* Dump then reset stats counters. */ ++ RxStart=0x0001, RxResume=0x0002, RxAbort=0x0004, RxAddrLoad=0x0006, ++ RxResumeNoResources=0x0007, ++}; ++ ++enum SCBPort_cmds { ++ PortReset=0, PortSelfTest=1, PortPartialReset=2, PortDump=3, ++}; ++ ++/* The Speedo3 Rx and Tx frame/buffer descriptors. */ ++struct descriptor { /* A generic descriptor. */ ++ s32 cmd_status; /* All command and status fields. */ ++ u32 link; /* struct descriptor * */ ++ unsigned char params[0]; ++}; ++ ++/* The Speedo3 Rx and Tx buffer descriptors. */ ++struct RxFD { /* Receive frame descriptor. */ ++ s32 status; ++ u32 link; /* struct RxFD * */ ++ u32 rx_buf_addr; /* void * */ ++ u32 count; ++}; ++ ++/* Selected elements of the Tx/RxFD.status word. */ ++enum RxFD_bits { ++ RxComplete=0x8000, RxOK=0x2000, ++ RxErrCRC=0x0800, RxErrAlign=0x0400, RxErrTooBig=0x0200, RxErrSymbol=0x0010, ++ RxEth2Type=0x0020, RxNoMatch=0x0004, RxNoIAMatch=0x0002, ++ TxUnderrun=0x1000, StatusComplete=0x8000, ++}; ++ ++#define CONFIG_DATA_SIZE 22 ++struct TxFD { /* Transmit frame descriptor set. */ ++ s32 status; ++ u32 link; /* void * */ ++ u32 tx_desc_addr; /* Always points to the tx_buf_addr element. */ ++ s32 count; /* # of TBD (=1), Tx start thresh., etc. */ ++ /* This constitutes two "TBD" entries -- we only use one. */ ++#define TX_DESCR_BUF_OFFSET 16 ++ u32 tx_buf_addr0; /* void *, frame to be transmitted. */ ++ s32 tx_buf_size0; /* Length of Tx frame. */ ++ u32 tx_buf_addr1; /* void *, frame to be transmitted. */ ++ s32 tx_buf_size1; /* Length of Tx frame. */ ++ /* the structure must have space for at least CONFIG_DATA_SIZE starting ++ * from tx_desc_addr field */ ++}; ++ ++/* Multicast filter setting block. --SAW */ ++struct speedo_mc_block { ++ struct speedo_mc_block *next; ++ unsigned int tx; ++ dma_addr_t frame_dma; ++ unsigned int len; ++ struct descriptor frame __attribute__ ((__aligned__(16))); ++}; ++ ++/* Elements of the dump_statistics block. This block must be lword aligned. */ ++struct speedo_stats { ++ u32 tx_good_frames; ++ u32 tx_coll16_errs; ++ u32 tx_late_colls; ++ u32 tx_underruns; ++ u32 tx_lost_carrier; ++ u32 tx_deferred; ++ u32 tx_one_colls; ++ u32 tx_multi_colls; ++ u32 tx_total_colls; ++ u32 rx_good_frames; ++ u32 rx_crc_errs; ++ u32 rx_align_errs; ++ u32 rx_resource_errs; ++ u32 rx_overrun_errs; ++ u32 rx_colls_errs; ++ u32 rx_runt_errs; ++ u32 done_marker; ++}; ++ ++enum Rx_ring_state_bits { ++ RrNoMem=1, RrPostponed=2, RrNoResources=4, RrOOMReported=8, ++}; ++ ++/* Do not change the position (alignment) of the first few elements! ++ The later elements are grouped for cache locality. ++ ++ Unfortunately, all the positions have been shifted since there. ++ A new re-alignment is required. 2000/03/06 SAW */ ++struct speedo_private { ++ struct TxFD *tx_ring; /* Commands (usually CmdTxPacket). */ ++ struct RxFD *rx_ringp[RX_RING_SIZE]; /* Rx descriptor, used as ring. */ ++ ++ // *** RTnet *** ++ /* The addresses of a Tx/Rx-in-place packets/buffers. */ ++ struct rtskb *tx_skbuff[TX_RING_SIZE]; ++ struct rtskb *rx_skbuff[RX_RING_SIZE]; ++ // *** RTnet *** ++ ++ /* Mapped addresses of the rings. */ ++ dma_addr_t tx_ring_dma; ++#define TX_RING_ELEM_DMA(sp, n) ((sp)->tx_ring_dma + (n)*sizeof(struct TxFD)) ++ dma_addr_t rx_ring_dma[RX_RING_SIZE]; ++ struct descriptor *last_cmd; /* Last command sent. */ ++ unsigned int cur_tx, dirty_tx; /* The ring entries to be free()ed. */ ++ rtdm_lock_t lock; /* Group with Tx control cache line. */ ++ u32 tx_threshold; /* The value for txdesc.count. */ ++ struct RxFD *last_rxf; /* Last filled RX buffer. */ ++ dma_addr_t last_rxf_dma; ++ unsigned int cur_rx, dirty_rx; /* The next free ring entry */ ++ long last_rx_time; /* Last Rx, in jiffies, to handle Rx hang. */ ++ struct net_device_stats stats; ++ struct speedo_stats *lstats; ++ dma_addr_t lstats_dma; ++ int chip_id; ++ struct pci_dev *pdev; ++ struct speedo_mc_block *mc_setup_head;/* Multicast setup frame list head. */ ++ struct speedo_mc_block *mc_setup_tail;/* Multicast setup frame list tail. */ ++ long in_interrupt; /* Word-aligned rtdev->interrupt */ ++ unsigned char acpi_pwr; ++ signed char rx_mode; /* Current PROMISC/ALLMULTI setting. */ ++ unsigned int tx_full:1; /* The Tx queue is full. */ ++ unsigned int full_duplex:1; /* Full-duplex operation requested. */ ++ unsigned int flow_ctrl:1; /* Use 802.3x flow control. */ ++ unsigned int rx_bug:1; /* Work around receiver hang errata. */ ++ unsigned char default_port:8; /* Last rtdev->if_port value. */ ++ unsigned char rx_ring_state; /* RX ring status flags. */ ++ unsigned short phy[2]; /* PHY media interfaces available. */ ++ unsigned short advertising; /* Current PHY advertised caps. */ ++ unsigned short partner; /* Link partner caps. */ ++ rtdm_irq_t irq_handle; ++}; ++ ++/* The parameters for a CmdConfigure operation. ++ There are so many options that it would be difficult to document each bit. ++ We mostly use the default or recommended settings. */ ++static const char i82558_config_cmd[CONFIG_DATA_SIZE] = { ++ 22, 0x08, 0, 1, 0, 0, 0x22, 0x03, 1, /* 1=Use MII 0=Use AUI */ ++ 0, 0x2E, 0, 0x60, 0x08, 0x88, ++ 0x68, 0, 0x40, 0xf2, 0x84, /* Disable FC */ ++ 0x31, 0x05, }; ++ ++/* PHY media interface chips. */ ++enum phy_chips { NonSuchPhy=0, I82553AB, I82553C, I82503, DP83840, S80C240, ++ S80C24, I82555, DP83840A=10, }; ++#define EE_READ_CMD (6) ++ ++static int eepro100_init_one(struct pci_dev *pdev, ++ const struct pci_device_id *ent); ++static void eepro100_remove_one (struct pci_dev *pdev); ++ ++static int do_eeprom_cmd(long ioaddr, int cmd, int cmd_len); ++static int mdio_read(long ioaddr, int phy_id, int location); ++static int speedo_open(struct rtnet_device *rtdev); ++static void speedo_resume(struct rtnet_device *rtdev); ++static void speedo_init_rx_ring(struct rtnet_device *rtdev); ++static int speedo_start_xmit(struct rtskb *skb, struct rtnet_device *rtdev); ++static void speedo_refill_rx_buffers(struct rtnet_device *rtdev, int force); ++static int speedo_rx(struct rtnet_device *rtdev, int* packets, nanosecs_abs_t *time_stamp); ++static void speedo_tx_buffer_gc(struct rtnet_device *rtdev); ++static int speedo_interrupt(rtdm_irq_t *irq_handle); ++static int speedo_close(struct rtnet_device *rtdev); ++static void set_rx_mode(struct rtnet_device *rtdev); ++static void speedo_show_state(struct rtnet_device *rtdev); ++static struct net_device_stats *speedo_get_stats(struct rtnet_device *rtdev); ++ ++ ++static inline void speedo_write_flush(long ioaddr) ++{ ++ /* Flush previous PCI writes through intermediate bridges ++ * by doing a benign read */ ++ (void)readb((void *)(ioaddr + SCBStatus)); ++} ++ ++static int eepro100_init_one (struct pci_dev *pdev, ++ const struct pci_device_id *ent) ++{ ++ unsigned long ioaddr; ++ int irq; ++ int acpi_idle_state = 0, pm; ++ static int cards_found = -1; ++ ++ static int did_version /* = 0 */; /* Already printed version info. */ ++ if (speedo_debug > 0 && did_version++ == 0) ++ printk(version); ++ ++ // *** RTnet *** ++ cards_found++; ++ if (cards[cards_found] == 0) ++ goto err_out_none; ++ // *** RTnet *** ++ ++ if (!request_region(pci_resource_start(pdev, 1), ++ pci_resource_len(pdev, 1), "eepro100")) { ++ printk (KERN_ERR "eepro100: cannot reserve I/O ports\n"); ++ goto err_out_none; ++ } ++ if (!request_mem_region(pci_resource_start(pdev, 0), ++ pci_resource_len(pdev, 0), "eepro100")) { ++ printk (KERN_ERR "eepro100: cannot reserve MMIO region\n"); ++ goto err_out_free_pio_region; ++ } ++ ++ irq = pdev->irq; ++#ifdef USE_IO ++ ioaddr = pci_resource_start(pdev, 1); ++ if (speedo_debug > 2) ++ printk("Found Intel i82557 PCI Speedo at I/O %#lx, IRQ %d.\n", ++ ioaddr, irq); ++#else ++ ioaddr = (unsigned long)ioremap(pci_resource_start(pdev, 0), ++ pci_resource_len(pdev, 0)); ++ if (!ioaddr) { ++ printk(KERN_ERR "eepro100: cannot remap MMIO region %llx @ %llx\n", ++ (unsigned long long)pci_resource_len(pdev, 0), ++ (unsigned long long)pci_resource_start(pdev, 0)); ++ goto err_out_free_mmio_region; ++ } ++ if (speedo_debug > 2) ++ printk("Found Intel i82557 PCI Speedo, MMIO at %#llx, IRQ %d.\n", ++ (unsigned long long)pci_resource_start(pdev, 0), irq); ++#endif ++ ++ /* save power state b4 pci_enable_device overwrites it */ ++ pm = pci_find_capability(pdev, PCI_CAP_ID_PM); ++ if (pm) { ++ u16 pwr_command; ++ pci_read_config_word(pdev, pm + PCI_PM_CTRL, &pwr_command); ++ acpi_idle_state = pwr_command & PCI_PM_CTRL_STATE_MASK; ++ } ++ ++ if (pci_enable_device(pdev)) ++ goto err_out_free_mmio_region; ++ ++ pci_set_master(pdev); ++ ++ if (speedo_found1(pdev, ioaddr, cards_found, acpi_idle_state) != 0) ++ goto err_out_iounmap; ++ ++ return 0; ++ ++err_out_iounmap: ; ++#ifndef USE_IO ++ iounmap ((void *)ioaddr); ++#endif ++err_out_free_mmio_region: ++ release_mem_region(pci_resource_start(pdev, 0), pci_resource_len(pdev, 0)); ++err_out_free_pio_region: ++ release_region(pci_resource_start(pdev, 1), pci_resource_len(pdev, 1)); ++err_out_none: ++ return -ENODEV; ++} ++ ++static int speedo_found1(struct pci_dev *pdev, ++ long ioaddr, int card_idx, int acpi_idle_state) ++{ ++ // *** RTnet *** ++ struct rtnet_device *rtdev = NULL; ++ // *** RTnet *** ++ ++ struct speedo_private *sp; ++ const char *product; ++ int i, option; ++ u16 eeprom[0x100]; ++ int size; ++ void *tx_ring_space; ++ dma_addr_t tx_ring_dma; ++ ++ size = TX_RING_SIZE * sizeof(struct TxFD) + sizeof(struct speedo_stats); ++ tx_ring_space = pci_alloc_consistent(pdev, size, &tx_ring_dma); ++ if (tx_ring_space == NULL) ++ return -1; ++ ++ // *** RTnet *** ++ rtdev = rt_alloc_etherdev(sizeof(struct speedo_private), ++ RX_RING_SIZE * 2 + TX_RING_SIZE); ++ if (rtdev == NULL) { ++ printk(KERN_ERR "eepro100: Could not allocate ethernet device.\n"); ++ pci_free_consistent(pdev, size, tx_ring_space, tx_ring_dma); ++ return -1; ++ } ++ rtdev_alloc_name(rtdev, "rteth%d"); ++ memset(rtdev->priv, 0, sizeof(struct speedo_private)); ++ rt_rtdev_connect(rtdev, &RTDEV_manager); ++ rtdev->vers = RTDEV_VERS_2_0; ++ rtdev->sysbind = &pdev->dev; ++ // *** RTnet *** ++ ++ if (rtdev->mem_start > 0) ++ option = rtdev->mem_start; ++ else if (card_idx >= 0 && options[card_idx] >= 0) ++ option = options[card_idx]; ++ else ++ option = 0; ++ ++ /* Read the station address EEPROM before doing the reset. ++ Nominally his should even be done before accepting the device, but ++ then we wouldn't have a device name with which to report the error. ++ The size test is for 6 bit vs. 8 bit address serial EEPROMs. ++ */ ++ { ++ unsigned long iobase; ++ int read_cmd, ee_size; ++ u16 sum; ++ int j; ++ ++ /* Use IO only to avoid postponed writes and satisfy EEPROM timing ++ requirements. */ ++ iobase = pci_resource_start(pdev, 1); ++ if ((do_eeprom_cmd(iobase, EE_READ_CMD << 24, 27) & 0xffe0000) ++ == 0xffe0000) { ++ ee_size = 0x100; ++ read_cmd = EE_READ_CMD << 24; ++ } else { ++ ee_size = 0x40; ++ read_cmd = EE_READ_CMD << 22; ++ } ++ ++ for (j = 0, i = 0, sum = 0; i < ee_size; i++) { ++ u16 value = do_eeprom_cmd(iobase, read_cmd | (i << 16), 27); ++ eeprom[i] = value; ++ sum += value; ++ if (i < 3) { ++ rtdev->dev_addr[j++] = value; ++ rtdev->dev_addr[j++] = value >> 8; ++ } ++ } ++ if (sum != 0xBABA) ++ printk(KERN_WARNING "%s: Invalid EEPROM checksum %#4.4x, " ++ "check settings before activating this device!\n", ++ rtdev->name, sum); ++ /* Don't unregister_netdev(dev); as the EEPro may actually be ++ usable, especially if the MAC address is set later. ++ On the other hand, it may be unusable if MDI data is corrupted. */ ++ } ++ ++ /* Reset the chip: stop Tx and Rx processes and clear counters. ++ This takes less than 10usec and will easily finish before the next ++ action. */ ++ outl(PortReset, ioaddr + SCBPort); ++ inl(ioaddr + SCBPort); ++ udelay(10); ++ ++ if (eeprom[3] & 0x0100) ++ product = "OEM i82557/i82558 10/100 Ethernet"; ++ else ++ product = pci_name(pdev); ++ ++ printk(KERN_INFO "%s: %s, ", rtdev->name, product); ++ ++ for (i = 0; i < 5; i++) ++ printk("%2.2X:", rtdev->dev_addr[i]); ++ printk("%2.2X, ", rtdev->dev_addr[i]); ++#ifdef USE_IO ++ printk("I/O at %#3lx, ", ioaddr); ++#endif ++ printk("IRQ %d.\n", pdev->irq); ++ ++ outl(PortReset, ioaddr + SCBPort); ++ inl(ioaddr + SCBPort); ++ udelay(10); ++ ++ /* Return the chip to its original power state. */ ++ pci_set_power_state(pdev, acpi_idle_state); ++ ++ rtdev->base_addr = ioaddr; ++ rtdev->irq = pdev->irq; ++ ++ sp = rtdev->priv; ++ sp->pdev = pdev; ++ sp->acpi_pwr = acpi_idle_state; ++ sp->tx_ring = tx_ring_space; ++ sp->tx_ring_dma = tx_ring_dma; ++ sp->lstats = (struct speedo_stats *)(sp->tx_ring + TX_RING_SIZE); ++ sp->lstats_dma = TX_RING_ELEM_DMA(sp, TX_RING_SIZE); ++ ++ sp->full_duplex = option >= 0 && (option & 0x10) ? 1 : 0; ++ if (card_idx >= 0) { ++ if (full_duplex[card_idx] >= 0) ++ sp->full_duplex = full_duplex[card_idx]; ++ } ++ sp->default_port = option >= 0 ? (option & 0x0f) : 0; ++ ++ sp->phy[0] = eeprom[6]; ++ sp->phy[1] = eeprom[7]; ++ sp->rx_bug = (eeprom[3] & 0x03) == 3 ? 0 : 1; ++ if (((pdev->device > 0x1030 && (pdev->device < 0x1039))) ++ || (pdev->device == 0x2449)) { ++ sp->chip_id = 1; ++ } ++ ++ if (sp->rx_bug) ++ printk(KERN_ERR " *** Receiver lock-up bug detected ***\n" ++ KERN_ERR " Your device may not work reliably!\n"); ++ ++ // *** RTnet *** ++ /* The Speedo-specific entries in the device structure. */ ++ rtdev->open = &speedo_open; ++ rtdev->hard_start_xmit = &speedo_start_xmit; ++ rtdev->stop = &speedo_close; ++ rtdev->hard_header = &rt_eth_header; ++ rtdev->get_stats = &speedo_get_stats; ++ //rtdev->do_ioctl = NULL; ++ ++ if ( (i=rt_register_rtnetdev(rtdev)) ) ++ { ++ pci_free_consistent(pdev, size, tx_ring_space, tx_ring_dma); ++ rtdev_free(rtdev); ++ return i; ++ } ++ ++ pci_set_drvdata (pdev, rtdev); ++ // *** RTnet *** ++ ++ return 0; ++} ++ ++/* Serial EEPROM section. ++ A "bit" grungy, but we work our way through bit-by-bit :->. */ ++/* EEPROM_Ctrl bits. */ ++#define EE_SHIFT_CLK 0x01 /* EEPROM shift clock. */ ++#define EE_CS 0x02 /* EEPROM chip select. */ ++#define EE_DATA_WRITE 0x04 /* EEPROM chip data in. */ ++#define EE_DATA_READ 0x08 /* EEPROM chip data out. */ ++#define EE_ENB (0x4800 | EE_CS) ++#define EE_WRITE_0 0x4802 ++#define EE_WRITE_1 0x4806 ++#define EE_OFFSET SCBeeprom ++ ++/* The fixes for the code were kindly provided by Dragan Stancevic ++ to strictly follow Intel specifications of EEPROM ++ access timing. ++ The publicly available sheet 64486302 (sec. 3.1) specifies 1us access ++ interval for serial EEPROM. However, it looks like that there is an ++ additional requirement dictating larger udelay's in the code below. ++ 2000/05/24 SAW */ ++static int do_eeprom_cmd(long ioaddr, int cmd, int cmd_len) ++{ ++ unsigned retval = 0; ++ long ee_addr = ioaddr + SCBeeprom; ++ ++ io_outw(EE_ENB, ee_addr); udelay(2); ++ io_outw(EE_ENB | EE_SHIFT_CLK, ee_addr); udelay(2); ++ ++ /* Shift the command bits out. */ ++ do { ++ short dataval = (cmd & (1 << cmd_len)) ? EE_WRITE_1 : EE_WRITE_0; ++ io_outw(dataval, ee_addr); udelay(2); ++ io_outw(dataval | EE_SHIFT_CLK, ee_addr); udelay(2); ++ retval = (retval << 1) | ((io_inw(ee_addr) & EE_DATA_READ) ? 1 : 0); ++ } while (--cmd_len >= 0); ++ io_outw(EE_ENB, ee_addr); udelay(2); ++ ++ /* Terminate the EEPROM access. */ ++ io_outw(EE_ENB & ~EE_CS, ee_addr); ++ return retval; ++} ++ ++static int mdio_read(long ioaddr, int phy_id, int location) ++{ ++ int val, boguscnt = 64*10; /* <64 usec. to complete, typ 27 ticks */ ++ outl(0x08000000 | (location<<16) | (phy_id<<21), ioaddr + SCBCtrlMDI); ++ do { ++ val = inl(ioaddr + SCBCtrlMDI); ++ if (--boguscnt < 0) { ++ printk(KERN_ERR " mdio_read() timed out with val = %8.8x.\n", val); ++ break; ++ } ++ } while (! (val & 0x10000000)); ++ return val & 0xffff; ++} ++ ++ ++static int ++speedo_open(struct rtnet_device *rtdev) ++{ ++ struct speedo_private *sp = (struct speedo_private *)rtdev->priv; ++ long ioaddr = rtdev->base_addr; ++ int retval; ++ ++ if (speedo_debug > 1) ++ printk(KERN_DEBUG "%s: speedo_open() irq %d.\n", rtdev->name, rtdev->irq); ++ ++ pci_set_power_state(sp->pdev, 0); ++ ++ /* Set up the Tx queue early.. */ ++ sp->cur_tx = 0; ++ sp->dirty_tx = 0; ++ sp->last_cmd = 0; ++ sp->tx_full = 0; ++ rtdm_lock_init(&sp->lock); ++ sp->in_interrupt = 0; ++ ++ // *** RTnet *** ++ rt_stack_connect(rtdev, &STACK_manager); ++ ++ retval = rtdm_irq_request(&sp->irq_handle, rtdev->irq, ++ speedo_interrupt, RTDM_IRQTYPE_SHARED, ++ "rt_eepro100", rtdev); ++ if (retval) { ++ return retval; ++ } ++ // *** RTnet *** ++ ++ rtdev->if_port = sp->default_port; ++ ++ speedo_init_rx_ring(rtdev); ++ ++ /* Fire up the hardware. */ ++ outw(SCBMaskAll, ioaddr + SCBCmd); ++ speedo_write_flush(ioaddr); ++ speedo_resume(rtdev); ++ ++ netdevice_start(rtdev); ++ rtnetif_start_queue(rtdev); ++ ++ /* Setup the chip and configure the multicast list. */ ++ sp->mc_setup_head = NULL; ++ sp->mc_setup_tail = NULL; ++ sp->flow_ctrl = sp->partner = 0; ++ sp->rx_mode = -1; /* Invalid -> always reset the mode. */ ++ set_rx_mode(rtdev); ++ if ((sp->phy[0] & 0x8000) == 0) ++ sp->advertising = mdio_read(ioaddr, sp->phy[0] & 0x1f, 4); ++ ++ if (mdio_read(ioaddr, sp->phy[0] & 0x1f, MII_BMSR) & BMSR_LSTATUS) ++ rtnetif_carrier_on(rtdev); ++ else ++ rtnetif_carrier_off(rtdev); ++ ++ if (speedo_debug > 2) { ++ printk(KERN_DEBUG "%s: Done speedo_open(), status %8.8x.\n", ++ rtdev->name, inw(ioaddr + SCBStatus)); ++ } ++ ++ /* No need to wait for the command unit to accept here. */ ++ if ((sp->phy[0] & 0x8000) == 0) ++ mdio_read(ioaddr, sp->phy[0] & 0x1f, 0); ++ ++ return 0; ++} ++ ++/* Start the chip hardware after a full reset. */ ++static void speedo_resume(struct rtnet_device *rtdev) ++{ ++ struct speedo_private *sp = (struct speedo_private *)rtdev->priv; ++ long ioaddr = rtdev->base_addr; ++ ++ /* Start with a Tx threshold of 256 (0x..20.... 8 byte units). */ ++ sp->tx_threshold = 0x01208000; ++ ++ /* Set the segment registers to '0'. */ ++ wait_for_cmd_done(ioaddr + SCBCmd); ++ outl(0, ioaddr + SCBPointer); ++ /* impose a delay to avoid a bug */ ++ inl(ioaddr + SCBPointer); ++ udelay(10); ++ outb(RxAddrLoad, ioaddr + SCBCmd); ++ wait_for_cmd_done(ioaddr + SCBCmd); ++ outb(CUCmdBase, ioaddr + SCBCmd); ++ ++ /* Load the statistics block and rx ring addresses. */ ++ wait_for_cmd_done(ioaddr + SCBCmd); ++ outl(sp->lstats_dma, ioaddr + SCBPointer); ++ outb(CUStatsAddr, ioaddr + SCBCmd); ++ sp->lstats->done_marker = 0; ++ ++ if (sp->rx_ringp[sp->cur_rx % RX_RING_SIZE] == NULL) { ++ if (speedo_debug > 2) ++ printk(KERN_DEBUG "%s: NULL cur_rx in speedo_resume().\n", ++ rtdev->name); ++ } else { ++ wait_for_cmd_done(ioaddr + SCBCmd); ++ outl(sp->rx_ring_dma[sp->cur_rx % RX_RING_SIZE], ++ ioaddr + SCBPointer); ++ outb(RxStart, ioaddr + SCBCmd); ++ } ++ ++ wait_for_cmd_done(ioaddr + SCBCmd); ++ outb(CUDumpStats, ioaddr + SCBCmd); ++ udelay(30); ++ ++ /* Fill the first command with our physical address. */ ++ { ++ struct descriptor *ias_cmd; ++ ++ ias_cmd = ++ (struct descriptor *)&sp->tx_ring[sp->cur_tx++ % TX_RING_SIZE]; ++ /* Avoid a bug(?!) here by marking the command already completed. */ ++ ias_cmd->cmd_status = cpu_to_le32((CmdSuspend | CmdIASetup) | 0xa000); ++ ias_cmd->link = ++ cpu_to_le32(TX_RING_ELEM_DMA(sp, sp->cur_tx % TX_RING_SIZE)); ++ memcpy(ias_cmd->params, rtdev->dev_addr, 6); ++ sp->last_cmd = ias_cmd; ++ } ++ ++ /* Start the chip's Tx process and unmask interrupts. */ ++ wait_for_cmd_done(ioaddr + SCBCmd); ++ outl(TX_RING_ELEM_DMA(sp, sp->dirty_tx % TX_RING_SIZE), ++ ioaddr + SCBPointer); ++ /* We are not ACK-ing FCP and ER in the interrupt handler yet so they should ++ remain masked --Dragan */ ++ outw(CUStart | SCBMaskEarlyRx | SCBMaskFlowCtl, ioaddr + SCBCmd); ++} ++ ++static void speedo_show_state(struct rtnet_device *rtdev) ++{ ++ struct speedo_private *sp = (struct speedo_private *)rtdev->priv; ++ unsigned int i; ++ ++ /* Print a few items for debugging. */ ++ if (speedo_debug > 0) { ++ printk(KERN_DEBUG "%s: Tx ring dump, Tx queue %u / %u:\n", rtdev->name, ++ sp->cur_tx, sp->dirty_tx); ++ for (i = 0; i < TX_RING_SIZE; i++) ++ printk(KERN_DEBUG "%s: %c%c%2d %8.8x.\n", rtdev->name, ++ i == sp->dirty_tx % TX_RING_SIZE ? '*' : ' ', ++ i == sp->cur_tx % TX_RING_SIZE ? '=' : ' ', ++ i, sp->tx_ring[i].status); ++ } ++ printk(KERN_DEBUG "%s: Printing Rx ring" ++ " (next to receive into %u, dirty index %u).\n", ++ rtdev->name, sp->cur_rx, sp->dirty_rx); ++ ++ for (i = 0; i < RX_RING_SIZE; i++) ++ printk(KERN_DEBUG "%s: %c%c%c%2d %8.8x.\n", rtdev->name, ++ sp->rx_ringp[i] == sp->last_rxf ? 'l' : ' ', ++ i == sp->dirty_rx % RX_RING_SIZE ? '*' : ' ', ++ i == sp->cur_rx % RX_RING_SIZE ? '=' : ' ', ++ i, (sp->rx_ringp[i] != NULL) ? ++ (unsigned)sp->rx_ringp[i]->status : 0); ++ ++ { ++ long ioaddr = rtdev->base_addr; ++ int phy_num = sp->phy[0] & 0x1f; ++ for (i = 0; i < 16; i++) { ++ /* FIXME: what does it mean? --SAW */ ++ if (i == 6) i = 21; ++ printk(KERN_DEBUG "%s: PHY index %d register %d is %4.4x.\n", ++ rtdev->name, phy_num, i, mdio_read(ioaddr, phy_num, i)); ++ } ++ } ++} ++ ++static struct net_device_stats *speedo_get_stats(struct rtnet_device *rtdev) ++{ ++ struct speedo_private *sp = (struct speedo_private *)rtdev->priv; ++ return &sp->stats; ++} ++ ++/* Initialize the Rx and Tx rings, along with various 'dev' bits. */ ++static void ++speedo_init_rx_ring(struct rtnet_device *rtdev) ++{ ++ struct speedo_private *sp = (struct speedo_private *)rtdev->priv; ++ struct RxFD *rxf, *last_rxf = NULL; ++ dma_addr_t last_rxf_dma = 0 /* to shut up the compiler */; ++ int i; ++ ++ sp->cur_rx = 0; ++ ++ for (i = 0; i < RX_RING_SIZE; i++) { ++ struct rtskb *skb; ++ skb = rtnetdev_alloc_rtskb(rtdev, PKT_BUF_SZ + 2 + sizeof(struct RxFD)); ++ sp->rx_skbuff[i] = skb; ++ if (skb == NULL) ++ break; /* OK. Just initially short of Rx bufs. */ ++ // *** RTnet *** ++ rtskb_reserve(skb, 2); /* IP header alignment */ ++ // *** RTnet *** ++ rxf = (struct RxFD *)skb->tail; ++ sp->rx_ringp[i] = rxf; ++ sp->rx_ring_dma[i] = ++ pci_map_single(sp->pdev, rxf, ++ PKT_BUF_SZ + sizeof(struct RxFD), PCI_DMA_BIDIRECTIONAL); ++ rtskb_reserve(skb, sizeof(struct RxFD)); ++ if (last_rxf) { ++ last_rxf->link = cpu_to_le32(sp->rx_ring_dma[i]); ++ pci_dma_sync_single_for_device(sp->pdev, last_rxf_dma, ++ sizeof(struct RxFD), PCI_DMA_TODEVICE); ++ } ++ last_rxf = rxf; ++ last_rxf_dma = sp->rx_ring_dma[i]; ++ rxf->status = cpu_to_le32(0x00000001); /* '1' is flag value only. */ ++ rxf->link = 0; /* None yet. */ ++ /* This field unused by i82557. */ ++ rxf->rx_buf_addr = 0xffffffff; ++ rxf->count = cpu_to_le32(PKT_BUF_SZ << 16); ++ pci_dma_sync_single_for_device(sp->pdev, sp->rx_ring_dma[i], ++ sizeof(struct RxFD), PCI_DMA_TODEVICE); ++ } ++ sp->dirty_rx = (unsigned int)(i - RX_RING_SIZE); ++ /* Mark the last entry as end-of-list. */ ++ last_rxf->status = cpu_to_le32(0xC0000002); /* '2' is flag value only. */ ++ pci_dma_sync_single_for_device(sp->pdev, sp->rx_ring_dma[RX_RING_SIZE-1], ++ sizeof(struct RxFD), PCI_DMA_TODEVICE); ++ sp->last_rxf = last_rxf; ++ sp->last_rxf_dma = last_rxf_dma; ++} ++ ++static int ++speedo_start_xmit(struct rtskb *skb, struct rtnet_device *rtdev) ++{ ++ struct speedo_private *sp = (struct speedo_private *)rtdev->priv; ++ long ioaddr = rtdev->base_addr; ++ int entry; ++ // *** RTnet *** ++ rtdm_lockctx_t context; ++ ++ /* Prevent interrupts from changing the Tx ring from underneath us. */ ++ rtdm_lock_get_irqsave(&sp->lock, context); ++ // *** RTnet *** ++ ++ /* Check if there are enough space. */ ++ if ((int)(sp->cur_tx - sp->dirty_tx) >= TX_QUEUE_LIMIT) { ++ // *** RTnet *** ++ rtnetif_stop_queue(rtdev); ++ sp->tx_full = 1; ++ ++ rtdm_lock_put_irqrestore(&sp->lock, context); ++ ++ rtdm_printk(KERN_ERR "%s: incorrect tbusy state, fixed.\n", rtdev->name); ++ // *** RTnet *** ++ ++ return 1; ++ } ++ ++ /* Calculate the Tx descriptor entry. */ ++ entry = sp->cur_tx++ % TX_RING_SIZE; ++ ++ sp->tx_skbuff[entry] = skb; ++ sp->tx_ring[entry].status = ++ cpu_to_le32(CmdSuspend | CmdTx | CmdTxFlex); ++ if (!(entry & ((TX_RING_SIZE>>2)-1))) ++ sp->tx_ring[entry].status |= cpu_to_le32(CmdIntr); ++ sp->tx_ring[entry].link = ++ cpu_to_le32(TX_RING_ELEM_DMA(sp, sp->cur_tx % TX_RING_SIZE)); ++ sp->tx_ring[entry].tx_desc_addr = ++ cpu_to_le32(TX_RING_ELEM_DMA(sp, entry) + TX_DESCR_BUF_OFFSET); ++ /* The data region is always in one buffer descriptor. */ ++ sp->tx_ring[entry].count = cpu_to_le32(sp->tx_threshold); ++ sp->tx_ring[entry].tx_buf_addr0 = ++ cpu_to_le32(pci_map_single(sp->pdev, skb->data, ++ skb->len, PCI_DMA_TODEVICE)); ++ sp->tx_ring[entry].tx_buf_size0 = cpu_to_le32(skb->len); ++ ++// *** RTnet *** ++// Disabled to gain shorter worst-case execution times. ++// Hope this bug is not relevant for us ++ ++ /* Trigger the command unit resume. */ ++ if (rt_wait_for_cmd_done(ioaddr + SCBCmd, __FUNCTION__) != 0) { ++ rtdm_lock_put_irqrestore(&sp->lock, context); ++ ++ return 1; ++ } ++ ++ /* get and patch time stamp just before the transmission */ ++ if (skb->xmit_stamp) ++ *skb->xmit_stamp = cpu_to_be64(rtdm_clock_read() + *skb->xmit_stamp); ++// *** RTnet *** ++ ++ clear_suspend(sp->last_cmd); ++ /* We want the time window between clearing suspend flag on the previous ++ command and resuming CU to be as small as possible. ++ Interrupts in between are very undesired. --SAW */ ++ outb(CUResume, ioaddr + SCBCmd); ++ sp->last_cmd = (struct descriptor *)&sp->tx_ring[entry]; ++ ++ /* Leave room for set_rx_mode(). If there is no more space than reserved ++ for multicast filter mark the ring as full. */ ++ if ((int)(sp->cur_tx - sp->dirty_tx) >= TX_QUEUE_LIMIT) { ++ rtnetif_stop_queue(rtdev); ++ sp->tx_full = 1; ++ } ++ ++ // *** RTnet *** ++ rtdm_lock_put_irqrestore(&sp->lock, context); ++ // *** RTnet *** ++ ++ return 0; ++} ++ ++static void speedo_tx_buffer_gc(struct rtnet_device *rtdev) ++{ ++ unsigned int dirty_tx; ++ struct speedo_private *sp = (struct speedo_private *)rtdev->priv; ++ ++ dirty_tx = sp->dirty_tx; ++ while ((int)(sp->cur_tx - dirty_tx) > 0) { ++ int entry = dirty_tx % TX_RING_SIZE; ++ int status = le32_to_cpu(sp->tx_ring[entry].status); ++ ++ if (speedo_debug > 5) ++ printk(KERN_DEBUG " scavenge candidate %d status %4.4x.\n", ++ entry, status); ++ if ((status & StatusComplete) == 0) ++ break; /* It still hasn't been processed. */ ++ if (status & TxUnderrun) ++ if (sp->tx_threshold < 0x01e08000) { ++ if (speedo_debug > 2) ++ printk(KERN_DEBUG "%s: TX underrun, threshold adjusted.\n", ++ rtdev->name); ++ sp->tx_threshold += 0x00040000; ++ } ++ /* Free the original skb. */ ++ if (sp->tx_skbuff[entry]) { ++ sp->stats.tx_packets++; /* Count only user packets. */ ++ sp->stats.tx_bytes += sp->tx_skbuff[entry]->len; ++ pci_unmap_single(sp->pdev, ++ le32_to_cpu(sp->tx_ring[entry].tx_buf_addr0), ++ sp->tx_skbuff[entry]->len, PCI_DMA_TODEVICE); ++ ++ // *** RTnet *** ++ dev_kfree_rtskb(sp->tx_skbuff[entry]); ++ // *** RTnet *** ++ ++ sp->tx_skbuff[entry] = 0; ++ } ++ dirty_tx++; ++ } ++ ++// *** RTnet *** ++// *** RTnet *** ++ ++ sp->dirty_tx = dirty_tx; ++} ++ ++/* The interrupt handler does all of the Rx thread work and cleans up ++ after the Tx thread. */ ++static int speedo_interrupt(rtdm_irq_t *irq_handle) ++{ ++ // *** RTnet *** ++ nanosecs_abs_t time_stamp = rtdm_clock_read(); ++ struct rtnet_device *rtdev = ++ rtdm_irq_get_arg(irq_handle, struct rtnet_device); ++ int packets = 0; ++ int ret = RTDM_IRQ_NONE; ++ // *** RTnet *** ++ ++ struct speedo_private *sp; ++ long ioaddr, boguscnt = max_interrupt_work; ++ unsigned short status; ++ ++ ++ ioaddr = rtdev->base_addr; ++ sp = (struct speedo_private *)rtdev->priv; ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_DRV_EEPRO100_DBG ++ /* A lock to prevent simultaneous entry on SMP machines. */ ++ if (test_and_set_bit(0, (void*)&sp->in_interrupt)) { ++ rtdm_printk(KERN_ERR"%s: SMP simultaneous entry of an interrupt handler.\n", ++ rtdev->name); ++ sp->in_interrupt = 0; /* Avoid halting machine. */ ++ return ret; ++ } ++#endif ++ ++ do { ++ status = inw(ioaddr + SCBStatus); ++ /* Acknowledge all of the current interrupt sources ASAP. */ ++ /* Will change from 0xfc00 to 0xff00 when we start handling ++ FCP and ER interrupts --Dragan */ ++ outw(status & 0xfc00, ioaddr + SCBStatus); ++ speedo_write_flush(ioaddr); ++ ++ if (speedo_debug > 4) ++ rtdm_printk(KERN_DEBUG "%s: interrupt status=%#4.4x.\n", ++ rtdev->name, status); ++ ++ if ((status & 0xfc00) == 0) ++ break; ++ ++ ret = RTDM_IRQ_HANDLED; ++ ++ /* Always check if all rx buffers are allocated. --SAW */ ++ speedo_refill_rx_buffers(rtdev, 0); ++ ++ if ((status & 0x5000) || /* Packet received, or Rx error. */ ++ (sp->rx_ring_state&(RrNoMem|RrPostponed)) == RrPostponed) ++ /* Need to gather the postponed packet. */ ++ speedo_rx(rtdev, &packets, &time_stamp); ++ ++ if (status & 0x1000) { ++ rtdm_lock_get(&sp->lock); ++ if ((status & 0x003c) == 0x0028) { /* No more Rx buffers. */ ++ struct RxFD *rxf; ++ rtdm_printk(KERN_WARNING "%s: card reports no RX buffers.\n", ++ rtdev->name); ++ rxf = sp->rx_ringp[sp->cur_rx % RX_RING_SIZE]; ++ if (rxf == NULL) { ++ if (speedo_debug > 2) ++ rtdm_printk(KERN_DEBUG ++ "%s: NULL cur_rx in speedo_interrupt().\n", ++ rtdev->name); ++ sp->rx_ring_state |= RrNoMem|RrNoResources; ++ } else if (rxf == sp->last_rxf) { ++ if (speedo_debug > 2) ++ rtdm_printk(KERN_DEBUG ++ "%s: cur_rx is last in speedo_interrupt().\n", ++ rtdev->name); ++ sp->rx_ring_state |= RrNoMem|RrNoResources; ++ } else ++ outb(RxResumeNoResources, ioaddr + SCBCmd); ++ } else if ((status & 0x003c) == 0x0008) { /* No resources. */ ++ struct RxFD *rxf; ++ rtdm_printk(KERN_WARNING "%s: card reports no resources.\n", ++ rtdev->name); ++ rxf = sp->rx_ringp[sp->cur_rx % RX_RING_SIZE]; ++ if (rxf == NULL) { ++ if (speedo_debug > 2) ++ rtdm_printk(KERN_DEBUG ++ "%s: NULL cur_rx in speedo_interrupt().\n", ++ rtdev->name); ++ sp->rx_ring_state |= RrNoMem|RrNoResources; ++ } else if (rxf == sp->last_rxf) { ++ if (speedo_debug > 2) ++ rtdm_printk(KERN_DEBUG ++ "%s: cur_rx is last in speedo_interrupt().\n", ++ rtdev->name); ++ sp->rx_ring_state |= RrNoMem|RrNoResources; ++ } else { ++ /* Restart the receiver. */ ++ outl(sp->rx_ring_dma[sp->cur_rx % RX_RING_SIZE], ++ ioaddr + SCBPointer); ++ outb(RxStart, ioaddr + SCBCmd); ++ } ++ } ++ sp->stats.rx_errors++; ++ rtdm_lock_put(&sp->lock); ++ } ++ ++ if ((sp->rx_ring_state&(RrNoMem|RrNoResources)) == RrNoResources) { ++ rtdm_printk(KERN_WARNING ++ "%s: restart the receiver after a possible hang.\n", ++ rtdev->name); ++ rtdm_lock_get(&sp->lock); ++ /* Restart the receiver. ++ I'm not sure if it's always right to restart the receiver ++ here but I don't know another way to prevent receiver hangs. ++ 1999/12/25 SAW */ ++ outl(sp->rx_ring_dma[sp->cur_rx % RX_RING_SIZE], ++ ioaddr + SCBPointer); ++ outb(RxStart, ioaddr + SCBCmd); ++ sp->rx_ring_state &= ~RrNoResources; ++ rtdm_lock_put(&sp->lock); ++ } ++ ++ /* User interrupt, Command/Tx unit interrupt or CU not active. */ ++ if (status & 0xA400) { ++ rtdm_lock_get(&sp->lock); ++ speedo_tx_buffer_gc(rtdev); ++ if (sp->tx_full ++ && (int)(sp->cur_tx - sp->dirty_tx) < TX_QUEUE_UNFULL) { ++ /* The ring is no longer full. */ ++ sp->tx_full = 0; ++ rtnetif_wake_queue(rtdev); /* Attention: under a spinlock. --SAW */ ++ } ++ rtdm_lock_put(&sp->lock); ++ } ++ ++ if (--boguscnt < 0) { ++ rtdm_printk(KERN_ERR "%s: Too much work at interrupt, status=0x%4.4x.\n", ++ rtdev->name, status); ++ /* Clear all interrupt sources. */ ++ /* Will change from 0xfc00 to 0xff00 when we start handling ++ FCP and ER interrupts --Dragan */ ++ outw(0xfc00, ioaddr + SCBStatus); ++ break; ++ } ++ } while (1); ++ ++ if (speedo_debug > 3) ++ rtdm_printk(KERN_DEBUG "%s: exiting interrupt, status=%#4.4x.\n", ++ rtdev->name, inw(ioaddr + SCBStatus)); ++ ++ clear_bit(0, (void*)&sp->in_interrupt); ++ if (packets > 0) ++ rt_mark_stack_mgr(rtdev); ++ return ret; ++} ++ ++static inline struct RxFD *speedo_rx_alloc(struct rtnet_device *rtdev, int entry) ++{ ++ struct speedo_private *sp = (struct speedo_private *)rtdev->priv; ++ struct RxFD *rxf; ++ struct rtskb *skb; ++ /* Get a fresh skbuff to replace the consumed one. */ ++ skb = rtnetdev_alloc_rtskb(rtdev, PKT_BUF_SZ + 2 + sizeof(struct RxFD)); ++ sp->rx_skbuff[entry] = skb; ++ if (skb == NULL) { ++ sp->rx_ringp[entry] = NULL; ++ return NULL; ++ } ++ rtskb_reserve(skb, 2); /* IP header alignment */ ++ rxf = sp->rx_ringp[entry] = (struct RxFD *)skb->tail; ++ sp->rx_ring_dma[entry] = ++ pci_map_single(sp->pdev, rxf, ++ PKT_BUF_SZ + sizeof(struct RxFD), PCI_DMA_FROMDEVICE); ++ rtskb_reserve(skb, sizeof(struct RxFD)); ++ rxf->rx_buf_addr = 0xffffffff; ++ pci_dma_sync_single_for_device(sp->pdev, sp->rx_ring_dma[entry], ++ sizeof(struct RxFD), PCI_DMA_TODEVICE); ++ return rxf; ++} ++ ++static inline void speedo_rx_link(struct rtnet_device *rtdev, int entry, ++ struct RxFD *rxf, dma_addr_t rxf_dma) ++{ ++ struct speedo_private *sp = (struct speedo_private *)rtdev->priv; ++ rxf->status = cpu_to_le32(0xC0000001); /* '1' for driver use only. */ ++ rxf->link = 0; /* None yet. */ ++ rxf->count = cpu_to_le32(PKT_BUF_SZ << 16); ++ sp->last_rxf->link = cpu_to_le32(rxf_dma); ++ sp->last_rxf->status &= cpu_to_le32(~0xC0000000); ++ pci_dma_sync_single_for_device(sp->pdev, sp->last_rxf_dma, ++ sizeof(struct RxFD), PCI_DMA_TODEVICE); ++ sp->last_rxf = rxf; ++ sp->last_rxf_dma = rxf_dma; ++} ++ ++static int speedo_refill_rx_buf(struct rtnet_device *rtdev, int force) ++{ ++ struct speedo_private *sp = (struct speedo_private *)rtdev->priv; ++ int entry; ++ struct RxFD *rxf; ++ ++ entry = sp->dirty_rx % RX_RING_SIZE; ++ if (sp->rx_skbuff[entry] == NULL) { ++ rxf = speedo_rx_alloc(rtdev, entry); ++ if (rxf == NULL) { ++ unsigned int forw; ++ int forw_entry; ++ if (speedo_debug > 2 || !(sp->rx_ring_state & RrOOMReported)) { ++ // *** RTnet *** ++ rtdm_printk(KERN_WARNING "%s: can't fill rx buffer (force %d)!\n", ++ rtdev->name, force); ++ // *** RTnet *** ++ sp->rx_ring_state |= RrOOMReported; ++ } ++ if (!force) ++ return -1; /* Better luck next time! */ ++ /* Borrow an skb from one of next entries. */ ++ for (forw = sp->dirty_rx + 1; forw != sp->cur_rx; forw++) ++ if (sp->rx_skbuff[forw % RX_RING_SIZE] != NULL) ++ break; ++ if (forw == sp->cur_rx) ++ return -1; ++ forw_entry = forw % RX_RING_SIZE; ++ sp->rx_skbuff[entry] = sp->rx_skbuff[forw_entry]; ++ sp->rx_skbuff[forw_entry] = NULL; ++ rxf = sp->rx_ringp[forw_entry]; ++ sp->rx_ringp[forw_entry] = NULL; ++ sp->rx_ringp[entry] = rxf; ++ } ++ } else { ++ rxf = sp->rx_ringp[entry]; ++ } ++ speedo_rx_link(rtdev, entry, rxf, sp->rx_ring_dma[entry]); ++ sp->dirty_rx++; ++ sp->rx_ring_state &= ~(RrNoMem|RrOOMReported); /* Mark the progress. */ ++ return 0; ++} ++ ++static void speedo_refill_rx_buffers(struct rtnet_device *rtdev, int force) ++{ ++ struct speedo_private *sp = (struct speedo_private *)rtdev->priv; ++ ++ /* Refill the RX ring. */ ++ while ((int)(sp->cur_rx - sp->dirty_rx) > 0 && ++ speedo_refill_rx_buf(rtdev, force) != -1); ++} ++ ++static int ++speedo_rx(struct rtnet_device *rtdev, int* packets, nanosecs_abs_t *time_stamp) ++{ ++ struct speedo_private *sp = (struct speedo_private *)rtdev->priv; ++ int entry = sp->cur_rx % RX_RING_SIZE; ++ int rx_work_limit = sp->dirty_rx + RX_RING_SIZE - sp->cur_rx; ++ int alloc_ok = 1; ++ ++ if (speedo_debug > 4) ++ rtdm_printk(KERN_DEBUG " In speedo_rx().\n"); ++ /* If we own the next entry, it's a new packet. Send it up. */ ++ while (sp->rx_ringp[entry] != NULL) { ++ int status; ++ int pkt_len; ++ ++ pci_dma_sync_single_for_cpu(sp->pdev, sp->rx_ring_dma[entry], ++ sizeof(struct RxFD), PCI_DMA_FROMDEVICE); ++ status = le32_to_cpu(sp->rx_ringp[entry]->status); ++ pkt_len = le32_to_cpu(sp->rx_ringp[entry]->count) & 0x3fff; ++ ++ if (!(status & RxComplete)) ++ break; ++ ++ if (--rx_work_limit < 0) ++ break; ++ ++ /* Check for a rare out-of-memory case: the current buffer is ++ the last buffer allocated in the RX ring. --SAW */ ++ if (sp->last_rxf == sp->rx_ringp[entry]) { ++ /* Postpone the packet. It'll be reaped at an interrupt when this ++ packet is no longer the last packet in the ring. */ ++ if (speedo_debug > 2) ++ rtdm_printk(KERN_DEBUG "%s: RX packet postponed!\n", ++ rtdev->name); ++ sp->rx_ring_state |= RrPostponed; ++ break; ++ } ++ ++ if (speedo_debug > 4) ++ rtdm_printk(KERN_DEBUG " speedo_rx() status %8.8x len %d.\n", status, ++ pkt_len); ++ if ((status & (RxErrTooBig|RxOK|0x0f90)) != RxOK) { ++ if (status & RxErrTooBig) ++ rtdm_printk(KERN_ERR "%s: Ethernet frame overran the Rx buffer, " ++ "status %8.8x!\n", rtdev->name, status); ++ else if (! (status & RxOK)) { ++ /* There was a fatal error. This *should* be impossible. */ ++ sp->stats.rx_errors++; ++ rtdm_printk(KERN_ERR "%s: Anomalous event in speedo_rx(), " ++ "status %8.8x.\n", ++ rtdev->name, status); ++ } ++ } else { ++ struct rtskb *skb; ++ ++// *** RTnet *** ++ { ++// *** RTnet *** ++ /* Pass up the already-filled skbuff. */ ++ skb = sp->rx_skbuff[entry]; ++ if (skb == NULL) { ++ rtdm_printk(KERN_ERR "%s: Inconsistent Rx descriptor chain.\n", ++ rtdev->name); ++ break; ++ } ++ sp->rx_skbuff[entry] = NULL; ++ rtskb_put(skb, pkt_len); ++ sp->rx_ringp[entry] = NULL; ++ pci_unmap_single(sp->pdev, sp->rx_ring_dma[entry], ++ PKT_BUF_SZ + sizeof(struct RxFD), PCI_DMA_FROMDEVICE); ++ } ++ skb->protocol = rt_eth_type_trans(skb, rtdev); ++ //rtmac ++ skb->time_stamp = *time_stamp; ++ //rtmac ++ rtnetif_rx(skb); ++ (*packets)++; ++ sp->stats.rx_packets++; ++ sp->stats.rx_bytes += pkt_len; ++ } ++ entry = (++sp->cur_rx) % RX_RING_SIZE; ++ sp->rx_ring_state &= ~RrPostponed; ++ /* Refill the recently taken buffers. ++ Do it one-by-one to handle traffic bursts better. */ ++ if (alloc_ok && speedo_refill_rx_buf(rtdev, 0) == -1) ++ alloc_ok = 0; ++ } ++ ++ /* Try hard to refill the recently taken buffers. */ ++ speedo_refill_rx_buffers(rtdev, 1); ++ ++ sp->last_rx_time = jiffies; ++ ++ return 0; ++} ++ ++static int ++speedo_close(struct rtnet_device *rtdev) ++{ ++ long ioaddr = rtdev->base_addr; ++ struct speedo_private *sp = (struct speedo_private *)rtdev->priv; ++ int i; ++ ++ netdevice_stop(rtdev); ++ rtnetif_stop_queue(rtdev); ++ ++ if (speedo_debug > 1) ++ printk(KERN_DEBUG "%s: Shutting down ethercard, status was %4.4x.\n", ++ rtdev->name, inw(ioaddr + SCBStatus)); ++ ++ /* Shutdown procedure according to Intel's e100 */ ++ outl(PortPartialReset, ioaddr + SCBPort); ++ speedo_write_flush(ioaddr); udelay(20); ++ ++ outl(PortReset, ioaddr + SCBPort); ++ speedo_write_flush(ioaddr); udelay(20); ++ ++ outw(SCBMaskAll, ioaddr + SCBCmd); ++ speedo_write_flush(ioaddr); ++ ++ // *** RTnet *** ++ if ( (i=rtdm_irq_free(&sp->irq_handle))<0 ) ++ return i; ++ ++ rt_stack_disconnect(rtdev); ++ ++ // *** RTnet *** ++ ++ /* Print a few items for debugging. */ ++ if (speedo_debug > 3) ++ speedo_show_state(rtdev); ++ ++ /* Free all the skbuffs in the Rx and Tx queues. */ ++ for (i = 0; i < RX_RING_SIZE; i++) { ++ struct rtskb *skb = sp->rx_skbuff[i]; ++ sp->rx_skbuff[i] = 0; ++ /* Clear the Rx descriptors. */ ++ if (skb) { ++ pci_unmap_single(sp->pdev, ++ sp->rx_ring_dma[i], ++ PKT_BUF_SZ + sizeof(struct RxFD), PCI_DMA_FROMDEVICE); ++ dev_kfree_rtskb(skb); ++ } ++ } ++ ++ for (i = 0; i < TX_RING_SIZE; i++) { ++ struct rtskb *skb = sp->tx_skbuff[i]; ++ sp->tx_skbuff[i] = 0; ++ /* Clear the Tx descriptors. */ ++ if (skb) { ++ pci_unmap_single(sp->pdev, ++ le32_to_cpu(sp->tx_ring[i].tx_buf_addr0), ++ skb->len, PCI_DMA_TODEVICE); ++ ++ // *** RTnet *** ++ dev_kfree_rtskb(skb); ++ // *** RTnet *** ++ } ++ } ++ ++// *** RTnet *** ++// *** RTnet *** ++ ++ pci_set_power_state(sp->pdev, 2); ++ ++ return 0; ++} ++ ++ ++/* Set or clear the multicast filter for this adaptor. ++ This is very ugly with Intel chips -- we usually have to execute an ++ entire configuration command, plus process a multicast command. ++ This is complicated. We must put a large configuration command and ++ an arbitrarily-sized multicast command in the transmit list. ++ To minimize the disruption -- the previous command might have already ++ loaded the link -- we convert the current command block, normally a Tx ++ command, into a no-op and link it to the new command. ++*/ ++static void set_rx_mode(struct rtnet_device *rtdev) ++{ ++ struct speedo_private *sp = (struct speedo_private *)rtdev->priv; ++ long ioaddr = rtdev->base_addr; ++ struct descriptor *last_cmd; ++ char new_rx_mode; ++ //unsigned long flags; ++ int entry/*, i*/; ++ ++ if (rtdev->flags & IFF_PROMISC) { /* Set promiscuous. */ ++ new_rx_mode = 3; ++ } else if (rtdev->flags & IFF_ALLMULTI) { ++ new_rx_mode = 1; ++ } else ++ new_rx_mode = 0; ++ ++ if (speedo_debug > 3) ++ printk(KERN_DEBUG "%s: set_rx_mode %d -> %d\n", rtdev->name, ++ sp->rx_mode, new_rx_mode); ++ ++ if ((int)(sp->cur_tx - sp->dirty_tx) > TX_RING_SIZE - TX_MULTICAST_SIZE) { ++ /* The Tx ring is full -- don't add anything! Hope the mode will be ++ * set again later. */ ++ sp->rx_mode = -1; ++ return; ++ } ++ ++ if (new_rx_mode != sp->rx_mode) { ++ u8 *config_cmd_data; ++ ++ //spin_lock_irqsave(&sp->lock, flags); --- disabled for now as it runs before irq handler is active ++ entry = sp->cur_tx++ % TX_RING_SIZE; ++ last_cmd = sp->last_cmd; ++ sp->last_cmd = (struct descriptor *)&sp->tx_ring[entry]; ++ ++ sp->tx_skbuff[entry] = 0; /* Redundant. */ ++ sp->tx_ring[entry].status = cpu_to_le32(CmdSuspend | CmdConfigure); ++ sp->tx_ring[entry].link = ++ cpu_to_le32(TX_RING_ELEM_DMA(sp, (entry + 1) % TX_RING_SIZE)); ++ config_cmd_data = (void *)&sp->tx_ring[entry].tx_desc_addr; ++ /* Construct a full CmdConfig frame. */ ++ memcpy(config_cmd_data, i82558_config_cmd, CONFIG_DATA_SIZE); ++ config_cmd_data[1] = (txfifo << 4) | rxfifo; ++ config_cmd_data[4] = rxdmacount; ++ config_cmd_data[5] = txdmacount + 0x80; ++ config_cmd_data[15] |= (new_rx_mode & 2) ? 1 : 0; ++ /* 0x80 doesn't disable FC 0x84 does. ++ Disable Flow control since we are not ACK-ing any FC interrupts ++ for now. --Dragan */ ++ config_cmd_data[19] = 0x84; ++ config_cmd_data[19] |= sp->full_duplex ? 0x40 : 0; ++ config_cmd_data[21] = (new_rx_mode & 1) ? 0x0D : 0x05; ++ if (sp->phy[0] & 0x8000) { /* Use the AUI port instead. */ ++ config_cmd_data[15] |= 0x80; ++ config_cmd_data[8] = 0; ++ } ++ /* Trigger the command unit resume. */ ++ wait_for_cmd_done(ioaddr + SCBCmd); ++ clear_suspend(last_cmd); ++ outb(CUResume, ioaddr + SCBCmd); ++ if ((int)(sp->cur_tx - sp->dirty_tx) >= TX_QUEUE_LIMIT) { ++ rtnetif_stop_queue(rtdev); ++ sp->tx_full = 1; ++ } ++ //spin_unlock_irqrestore(&sp->lock, flags); ++ } ++ ++ if (new_rx_mode == 0) { ++ /* The simple case of 0-3 multicast list entries occurs often, and ++ fits within one tx_ring[] entry. */ ++ /*struct dev_mc_list *mclist;*/ ++ u16 *setup_params/*, *eaddrs*/; ++ ++ //spin_lock_irqsave(&sp->lock, flags); --- disabled for now as it runs before irq handler is active ++ entry = sp->cur_tx++ % TX_RING_SIZE; ++ last_cmd = sp->last_cmd; ++ sp->last_cmd = (struct descriptor *)&sp->tx_ring[entry]; ++ ++ sp->tx_skbuff[entry] = 0; ++ sp->tx_ring[entry].status = cpu_to_le32(CmdSuspend | CmdMulticastList); ++ sp->tx_ring[entry].link = ++ cpu_to_le32(TX_RING_ELEM_DMA(sp, (entry + 1) % TX_RING_SIZE)); ++ sp->tx_ring[entry].tx_desc_addr = 0; /* Really MC list count. */ ++ setup_params = (u16 *)&sp->tx_ring[entry].tx_desc_addr; ++ *setup_params++ = cpu_to_le16(0); /* mc_count */ ++// *** RTnet *** ++// *** RTnet *** ++ ++ wait_for_cmd_done(ioaddr + SCBCmd); ++ clear_suspend(last_cmd); ++ /* Immediately trigger the command unit resume. */ ++ outb(CUResume, ioaddr + SCBCmd); ++ ++ if ((int)(sp->cur_tx - sp->dirty_tx) >= TX_QUEUE_LIMIT) { ++ rtnetif_stop_queue(rtdev); ++ sp->tx_full = 1; ++ } ++ //spin_unlock_irqrestore(&sp->lock, flags); ++// *** RTnet *** ++// *** RTnet *** ++ } ++ ++ sp->rx_mode = new_rx_mode; ++} ++ ++ ++static void eepro100_remove_one (struct pci_dev *pdev) ++{ ++ // *** RTnet *** ++ struct rtnet_device *rtdev = pci_get_drvdata (pdev); ++ ++ struct speedo_private *sp = (struct speedo_private *)rtdev->priv; ++ ++ rt_unregister_rtnetdev(rtdev); ++ rt_rtdev_disconnect(rtdev); ++ // *** RTnet *** ++ ++ release_region(pci_resource_start(pdev, 1), pci_resource_len(pdev, 1)); ++ release_mem_region(pci_resource_start(pdev, 0), pci_resource_len(pdev, 0)); ++ ++#ifndef USE_IO ++ iounmap((char *)rtdev->base_addr); ++#endif ++ ++ pci_free_consistent(pdev, TX_RING_SIZE * sizeof(struct TxFD) ++ + sizeof(struct speedo_stats), ++ sp->tx_ring, sp->tx_ring_dma); ++ pci_disable_device(pdev); ++ ++ // *** RTnet *** ++ rtdev_free(rtdev); ++ // *** RTnet *** ++} ++ ++static struct pci_device_id eepro100_pci_tbl[] = { ++ { PCI_VENDOR_ID_INTEL, 0x1229, PCI_ANY_ID, PCI_ANY_ID, }, ++ { PCI_VENDOR_ID_INTEL, 0x1209, PCI_ANY_ID, PCI_ANY_ID, }, ++ { PCI_VENDOR_ID_INTEL, 0x1029, PCI_ANY_ID, PCI_ANY_ID, }, ++ { PCI_VENDOR_ID_INTEL, 0x1030, PCI_ANY_ID, PCI_ANY_ID, }, ++ { PCI_VENDOR_ID_INTEL, 0x1031, PCI_ANY_ID, PCI_ANY_ID, }, ++ { PCI_VENDOR_ID_INTEL, 0x1032, PCI_ANY_ID, PCI_ANY_ID, }, ++ { PCI_VENDOR_ID_INTEL, 0x1033, PCI_ANY_ID, PCI_ANY_ID, }, ++ { PCI_VENDOR_ID_INTEL, 0x1034, PCI_ANY_ID, PCI_ANY_ID, }, ++ { PCI_VENDOR_ID_INTEL, 0x1035, PCI_ANY_ID, PCI_ANY_ID, }, ++ { PCI_VENDOR_ID_INTEL, 0x1036, PCI_ANY_ID, PCI_ANY_ID, }, ++ { PCI_VENDOR_ID_INTEL, 0x1037, PCI_ANY_ID, PCI_ANY_ID, }, ++ { PCI_VENDOR_ID_INTEL, 0x1038, PCI_ANY_ID, PCI_ANY_ID, }, ++ { PCI_VENDOR_ID_INTEL, 0x1039, PCI_ANY_ID, PCI_ANY_ID, }, ++ { PCI_VENDOR_ID_INTEL, 0x103A, PCI_ANY_ID, PCI_ANY_ID, }, ++ { PCI_VENDOR_ID_INTEL, 0x103B, PCI_ANY_ID, PCI_ANY_ID, }, ++ { PCI_VENDOR_ID_INTEL, 0x103C, PCI_ANY_ID, PCI_ANY_ID, }, ++ { PCI_VENDOR_ID_INTEL, 0x103D, PCI_ANY_ID, PCI_ANY_ID, }, ++ { PCI_VENDOR_ID_INTEL, 0x103E, PCI_ANY_ID, PCI_ANY_ID, }, ++ { PCI_VENDOR_ID_INTEL, 0x1092, PCI_ANY_ID, PCI_ANY_ID, }, ++ { PCI_VENDOR_ID_INTEL, 0x1227, PCI_ANY_ID, PCI_ANY_ID, }, ++ { PCI_VENDOR_ID_INTEL, 0x1228, PCI_ANY_ID, PCI_ANY_ID, }, ++ { PCI_VENDOR_ID_INTEL, 0x2449, PCI_ANY_ID, PCI_ANY_ID, }, ++ { PCI_VENDOR_ID_INTEL, 0x2459, PCI_ANY_ID, PCI_ANY_ID, }, ++ { PCI_VENDOR_ID_INTEL, 0x245D, PCI_ANY_ID, PCI_ANY_ID, }, ++ { PCI_VENDOR_ID_INTEL, 0x27DC, PCI_ANY_ID, PCI_ANY_ID, }, ++ { PCI_VENDOR_ID_INTEL, 0x5200, PCI_ANY_ID, PCI_ANY_ID, }, ++ { PCI_VENDOR_ID_INTEL, 0x5201, PCI_ANY_ID, PCI_ANY_ID, }, ++ { 0,} ++}; ++MODULE_DEVICE_TABLE(pci, eepro100_pci_tbl); ++ ++static struct pci_driver eepro100_driver = { ++ name: "eepro100_rt", ++ id_table: eepro100_pci_tbl, ++ probe: eepro100_init_one, ++ remove: eepro100_remove_one, ++ suspend: NULL, ++ resume: NULL, ++}; ++ ++static int __init eepro100_init_module(void) ++{ ++#ifdef CONFIG_XENO_DRIVERS_NET_DRV_EEPRO100_DBG ++ if (local_debug >= 0 && speedo_debug != local_debug) ++ printk(KERN_INFO "eepro100.c: Debug level is %d.\n", local_debug); ++ if (local_debug >= 0) ++ speedo_debug = local_debug; ++#else /* !CONFIG_XENO_DRIVERS_NET_DRV_EEPRO100_DBG */ ++ local_debug = speedo_debug; /* touch debug variable */ ++#endif /* CONFIG_XENO_DRIVERS_NET_DRV_EEPRO100_DBG */ ++ ++ return pci_register_driver(&eepro100_driver); ++} ++ ++static void __exit eepro100_cleanup_module(void) ++{ ++ pci_unregister_driver(&eepro100_driver); ++} ++ ++module_init(eepro100_init_module); ++module_exit(eepro100_cleanup_module); +--- linux/drivers/xenomai/net/drivers/smc91111.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/smc91111.c 2021-04-07 16:01:27.344633999 +0800 +@@ -0,0 +1,3531 @@ ++/*------------------------------------------------------------------------ ++ . smc91111.c ++ . This is a driver for SMSC's 91C111 single-chip Ethernet device. ++ . ++ . Copyright (C) 2001 Standard Microsystems Corporation (SMSC) ++ . Developed by Simple Network Magic Corporation (SNMC) ++ . Copyright (C) 1996 by Erik Stahlman (ES) ++ . ++ . This program is free software; you can redistribute it and/or modify ++ . it under the terms of the GNU General Public License as published by ++ . the Free Software Foundation; either version 2 of the License, or ++ . (at your option) any later version. ++ . ++ . This program is distributed in the hope that it will be useful, ++ . but WITHOUT ANY WARRANTY; without even the implied warranty of ++ . MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ . GNU General Public License for more details. ++ . ++ . You should have received a copy of the GNU General Public License ++ . along with this program; if not, write to the Free Software ++ . Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ++ . ++ . Information contained in this file was obtained from the LAN91C111 ++ . manual from SMC. To get a copy, if you really want one, you can find ++ . information under www.smsc.com. ++ . ++ . ++ . "Features" of the SMC chip: ++ . Integrated PHY/MAC for 10/100BaseT Operation ++ . Supports internal and external MII ++ . Integrated 8K packet memory ++ . EEPROM interface for configuration ++ . ++ . Arguments: ++ . io = for the base address ++ . irq = for the IRQ ++ . nowait = 0 for normal wait states, 1 eliminates additional wait states ++ . ++ . author: ++ . Erik Stahlman ( erik@vt.edu ) ++ . Daris A Nevil ( dnevil@snmc.com ) ++ . Pramod B Bhardwaj (pramod.bhardwaj@smsc.com) ++ . ++ . ++ . Hardware multicast code from Peter Cammaert ( pc@denkart.be ) ++ . ++ . Sources: ++ . o SMSC LAN91C111 databook (www.smsc.com) ++ . o smc9194.c by Erik Stahlman ++ . o skeleton.c by Donald Becker ( becker@cesdis.gsfc.nasa.gov ) ++ . ++ . History: ++ . 09/24/01 Pramod B Bhardwaj, Added the changes for Kernel 2.4 ++ . 08/21/01 Pramod B Bhardwaj Added support for RevB of LAN91C111 ++ . 04/25/01 Daris A Nevil Initial public release through SMSC ++ . 03/16/01 Daris A Nevil Modified smc9194.c for use with LAN91C111 ++ ++ Ported to RTnet: March 2004, Jan Kiszka ++ ----------------------------------------------------------------------------*/ ++ ++// Use power-down feature of the chip ++#define POWER_DOWN 1 ++ ++ ++static const char version[] = ++ "SMSC LAN91C111 Driver (v2.0-rt), RTnet version - Jan Kiszka (jan.kiszka@web.de)\n\n"; ++ ++#ifdef MODULE ++#include ++#include ++#endif ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include //#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++//#include ++ ++#ifdef DISABLED____CONFIG_SYSCTL ++#include ++#include ++#endif ++ ++#include ++ ++#include "rt_smc91111.h" ++/*------------------------------------------------------------------------ ++ . ++ . Configuration options, for the experienced user to change. ++ . ++ -------------------------------------------------------------------------*/ ++ ++/* ++ . Do you want to use 32 bit xfers? This should work on all chips, as ++ . the chipset is designed to accommodate them. ++*/ ++#define USE_32_BIT 1 ++ ++ ++/* ++ .the LAN91C111 can be at any of the following port addresses. To change, ++ .for a slightly different card, you can add it to the array. Keep in ++ .mind that the array must end in zero. ++*/ ++static unsigned int smc_portlist[] __initdata = ++ { 0x200, 0x220, 0x240, 0x260, 0x280, 0x2A0, 0x2C0, 0x2E0, ++ 0x300, 0x320, 0x340, 0x360, 0x380, 0x3A0, 0x3C0, 0x3E0, 0}; ++ ++ ++/* ++ . Wait time for memory to be free. This probably shouldn't be ++ . tuned that much, as waiting for this means nothing else happens ++ . in the system ++*/ ++#define MEMORY_WAIT_TIME 16 ++ ++ ++/* ++ . Timeout in us for waiting on the completion of a previous MMU command ++ . in smc_rcv(). ++*/ ++#define MMU_CMD_TIMEOUT 5 ++ ++ ++/* ++ . DEBUGGING LEVELS ++ . ++ . 0 for normal operation ++ . 1 for slightly more details ++ . >2 for various levels of increasingly useless information ++ . 2 for interrupt tracking, status flags ++ . 3 for packet info ++ . 4 for complete packet dumps ++*/ ++//#define SMC_DEBUG 3 // Must be defined in makefile ++ ++#if defined(SMC_DEBUG) && (SMC_DEBUG > 2) ++#define PRINTK3(args...) rtdm_printk(args) ++#else ++#define PRINTK3(args...) ++#endif ++ ++#if defined(SMC_DEBUG) && (SMC_DEBUG > 1) ++#define PRINTK2(args...) rtdm_printk(args) ++#else ++#define PRINTK2(args...) ++#endif ++ ++#ifdef SMC_DEBUG ++#define PRINTK(args...) rtdm_printk(args) ++#else ++#define PRINTK(args...) ++#endif ++ ++ ++/*------------------------------------------------------------------------ ++ . ++ . The internal workings of the driver. If you are changing anything ++ . here with the SMC stuff, you should have the datasheet and know ++ . what you are doing. ++ . ++ -------------------------------------------------------------------------*/ ++#define CARDNAME "LAN91C111" ++ ++// Memory sizing constant ++#define LAN91C111_MEMORY_MULTIPLIER (1024*2) ++ ++/* store this information for the driver.. */ ++struct smc_local { ++ ++// these are things that the kernel wants me to keep, so users ++ // can find out semi-useless statistics of how well the card is ++ // performing ++ struct net_device_stats stats; ++ ++ // If I have to wait until memory is available to send ++ // a packet, I will store the skbuff here, until I get the ++ // desired memory. Then, I'll send it out and free it. ++ struct rtskb * saved_skb; ++ ++ // This keeps track of how many packets that I have ++ // sent out. When an TX_EMPTY interrupt comes, I know ++ // that all of these have been sent. ++ int packets_waiting; ++ ++ // Set to true during the auto-negotiation sequence ++ int autoneg_active; ++ ++ // Address of our PHY port ++ word phyaddr; ++ ++ // Type of PHY ++ word phytype; ++ ++ // Last contents of PHY Register 18 ++ word lastPhy18; ++ ++ // Contains the current active transmission mode ++ word tcr_cur_mode; ++ ++ // Contains the current active receive mode ++ word rcr_cur_mode; ++ ++ // Contains the current active receive/phy mode ++ word rpc_cur_mode; ++ ++ /* => Pramod, Odd Byte issue */ ++ // Contains the Current ChipID ++ unsigned short ChipID; ++ ++ //Contains the Current ChipRevision ++ unsigned short ChipRev; ++ /* <= Pramod, Odd Byte issue */ ++ ++#ifdef DISABLED____CONFIG_SYSCTL ++ ++ // Root directory /proc/sys/dev ++ // Second entry must be null to terminate the table ++ ctl_table root_table[2]; ++ ++ // Directory for this device /proc/sys/dev/ethX ++ // Again the second entry must be zero to terminate ++ ctl_table eth_table[2]; ++ ++ // This is the parameters (file) table ++ ctl_table param_table[CTL_SMC_LAST_ENTRY]; ++ ++ // Saves the sysctl header returned by register_sysctl_table() ++ // we send this to unregister_sysctl_table() ++ struct ctl_table_header *sysctl_header; ++ ++ // Parameter variables (files) go here ++ char ctl_info[1024]; ++ int ctl_swfdup; ++ int ctl_ephloop; ++ int ctl_miiop; ++ int ctl_autoneg; ++ int ctl_rfduplx; ++ int ctl_rspeed; ++ int ctl_afduplx; ++ int ctl_aspeed; ++ int ctl_lnkfail; ++ int ctl_forcol; ++ int ctl_filtcar; ++ int ctl_freemem; ++ int ctl_totmem; ++ int ctl_leda; ++ int ctl_ledb; ++ int ctl_chiprev; ++#ifdef SMC_DEBUG ++ int ctl_reg_bsr; ++ int ctl_reg_tcr; ++ int ctl_reg_esr; ++ int ctl_reg_rcr; ++ int ctl_reg_ctrr; ++ int ctl_reg_mir; ++ int ctl_reg_rpcr; ++ int ctl_reg_cfgr; ++ int ctl_reg_bar; ++ int ctl_reg_iar0; ++ int ctl_reg_iar1; ++ int ctl_reg_iar2; ++ int ctl_reg_gpr; ++ int ctl_reg_ctlr; ++ int ctl_reg_mcr; ++ int ctl_reg_pnr; ++ int ctl_reg_fpr; ++ int ctl_reg_ptr; ++ int ctl_reg_dr; ++ int ctl_reg_isr; ++ int ctl_reg_mtr1; ++ int ctl_reg_mtr2; ++ int ctl_reg_mtr3; ++ int ctl_reg_mtr4; ++ int ctl_reg_miir; ++ int ctl_reg_revr; ++ int ctl_reg_ercvr; ++ int ctl_reg_extr; ++ int ctl_phy_ctrl; ++ int ctl_phy_stat; ++ int ctl_phy_id1; ++ int ctl_phy_id2; ++ int ctl_phy_adc; ++ int ctl_phy_remc; ++ int ctl_phy_cfg1; ++ int ctl_phy_cfg2; ++ int ctl_phy_int; ++ int ctl_phy_mask; ++#endif // SMC_DEBUG ++ ++#endif // CONFIG_SYSCTL ++ ++ rtdm_irq_t irq_handle; ++}; ++ ++ ++/*----------------------------------------------------------------- ++ . ++ . The driver can be entered at any of the following entry points. ++ . ++ .------------------------------------------------------------------ */ ++ ++/* ++ . This is called by register_netdev(). It is responsible for ++ . checking the portlist for the SMC9000 series chipset. If it finds ++ . one, then it will initialize the device, find the hardware information, ++ . and sets up the appropriate device parameters. ++ . NOTE: Interrupts are *OFF* when this procedure is called. ++ . ++ . NB:This shouldn't be static since it is referred to externally. ++*/ ++int smc_init(struct rtnet_device *dev); ++ ++/* ++ . This is called by unregister_netdev(). It is responsible for ++ . cleaning up before the driver is finally unregistered and discarded. ++*/ ++//void smc_destructor(struct net_device *dev); ++ ++/* ++ . The kernel calls this function when someone wants to use the net_device, ++ . typically 'ifconfig ethX up'. ++*/ ++static int smc_open(struct rtnet_device *dev); ++ ++/* ++ . This is called by the kernel to send a packet out into the net. it's ++ . responsible for doing a best-effort send, but if it's simply not possible ++ . to send it, the packet gets dropped. ++*/ ++//static void smc_timeout (struct net_device *dev);*/ ++/* ++ . This is called by the kernel in response to 'ifconfig ethX down'. It ++ . is responsible for cleaning up everything that the open routine ++ . does, and maybe putting the card into a powerdown state. ++*/ ++static int smc_close(struct rtnet_device *dev); ++ ++/* ++ . This routine allows the proc file system to query the driver's ++ . statistics. ++*/ ++static struct net_device_stats *smc_query_statistics(struct rtnet_device *rtdev); ++ ++/* ++ . Finally, a call to set promiscuous mode ( for TCPDUMP and related ++ . programs ) and multicast modes. ++*/ ++static void smc_set_multicast_list(struct rtnet_device *dev); ++ ++/* ++ . Configures the PHY through the MII Management interface ++*/ ++static void smc_phy_configure(struct rtnet_device* dev); ++ ++/*--------------------------------------------------------------- ++ . ++ . Interrupt level calls.. ++ . ++ ----------------------------------------------------------------*/ ++ ++/* ++ . Handles the actual interrupt ++*/ ++static int smc_interrupt(rtdm_irq_t *irq_handle); ++/* ++ . This is a separate procedure to handle the receipt of a packet, to ++ . leave the interrupt code looking slightly cleaner ++*/ ++inline static void smc_rcv( struct rtnet_device *dev ); ++/* ++ . This handles a TX interrupt, which is only called when an error ++ . relating to a packet is sent. ++*/ ++//inline static void smc_tx( struct net_device * dev ); ++ ++/* ++ . This handles interrupts generated from PHY register 18 ++*/ ++//static void smc_phy_interrupt(struct net_device* dev); ++ ++/* ++ ------------------------------------------------------------ ++ . ++ . Internal routines ++ . ++ ------------------------------------------------------------ ++*/ ++ ++/* ++ . Test if a given location contains a chip, trying to cause as ++ . little damage as possible if it's not a SMC chip. ++*/ ++static int smc_probe(struct rtnet_device *dev, int ioaddr); ++ ++/* ++ . A rather simple routine to print out a packet for debugging purposes. ++*/ ++#if defined(SMC_DEBUG) && (SMC_DEBUG > 2) ++static void print_packet( byte *, int ); ++#endif ++ ++#define tx_done(dev) 1 ++ ++/* this is called to actually send the packet to the chip */ ++static void smc_hardware_send_packet( struct rtnet_device * dev ); ++ ++/* Since I am not sure if I will have enough room in the chip's ram ++ . to store the packet, I call this routine, which either sends it ++ . now, or generates an interrupt when the card is ready for the ++ . packet */ ++static int smc_wait_to_send_packet( struct rtskb * skb, struct rtnet_device *dev ); ++ ++/* this does a soft reset on the device */ ++static void smc_reset( struct rtnet_device* dev ); ++ ++/* Enable Interrupts, Receive, and Transmit */ ++static void smc_enable( struct rtnet_device *dev ); ++ ++/* this puts the device in an inactive state */ ++static void smc_shutdown( int ioaddr ); ++ ++#ifndef NO_AUTOPROBE ++/* This routine will find the IRQ of the driver if one is not ++ . specified in the input to the device. */ ++static int smc_findirq( int ioaddr ); ++#endif ++ ++/* Routines to Read and Write the PHY Registers across the ++ MII Management Interface ++*/ ++ ++static word smc_read_phy_register(int ioaddr, byte phyaddr, byte phyreg); ++static void smc_write_phy_register(int ioaddr, byte phyaddr, byte phyreg, word phydata); ++ ++/* Initilizes our device's sysctl proc filesystem */ ++ ++#ifdef DISABLED____CONFIG_SYSCTL ++static void smc_sysctl_register(struct rtnet_device *); ++static void smc_sysctl_unregister(struct rtnet_device *); ++#endif /* CONFIG_SYSCTL */ ++ ++/* ++ . Function: smc_reset( struct device* dev ) ++ . Purpose: ++ . This sets the SMC91111 chip to its normal state, hopefully from whatever ++ . mess that any other DOS driver has put it in. ++ . ++ . Maybe I should reset more registers to defaults in here? SOFTRST should ++ . do that for me. ++ . ++ . Method: ++ . 1. send a SOFT RESET ++ . 2. wait for it to finish ++ . 3. enable autorelease mode ++ . 4. reset the memory management unit ++ . 5. clear all interrupts ++ . ++*/ ++static void smc_reset( struct rtnet_device* dev ) ++{ ++ //struct smc_local *lp = (struct smc_local *)dev->priv; ++ int ioaddr = dev->base_addr; ++ ++ PRINTK2("%s:smc_reset\n", dev->name); ++ ++ /* This resets the registers mostly to defaults, but doesn't ++ affect EEPROM. That seems unnecessary */ ++ SMC_SELECT_BANK( 0 ); ++ outw( RCR_SOFTRST, ioaddr + RCR_REG ); ++ ++ /* Setup the Configuration Register */ ++ /* This is necessary because the CONFIG_REG is not affected */ ++ /* by a soft reset */ ++ ++ SMC_SELECT_BANK( 1 ); ++ outw( CONFIG_DEFAULT, ioaddr + CONFIG_REG); ++ ++ /* Setup for fast accesses if requested */ ++ /* If the card/system can't handle it then there will */ ++ /* be no recovery except for a hard reset or power cycle */ ++ ++ if (dev->dma) ++ outw( inw( ioaddr + CONFIG_REG ) | CONFIG_NO_WAIT, ++ ioaddr + CONFIG_REG ); ++ ++#ifdef POWER_DOWN ++ /* Release from possible power-down state */ ++ /* Configuration register is not affected by Soft Reset */ ++ SMC_SELECT_BANK( 1 ); ++ outw( inw( ioaddr + CONFIG_REG ) | CONFIG_EPH_POWER_EN, ++ ioaddr + CONFIG_REG ); ++#endif ++ ++ SMC_SELECT_BANK( 0 ); ++ ++ /* this should pause enough for the chip to be happy */ ++ mdelay(10); ++ ++ /* Disable transmit and receive functionality */ ++ outw( RCR_CLEAR, ioaddr + RCR_REG ); ++ outw( TCR_CLEAR, ioaddr + TCR_REG ); ++ ++ /* set the control register to automatically ++ release successfully transmitted packets, to make the best ++ use out of our limited memory */ ++ SMC_SELECT_BANK( 1 ); ++ outw( inw( ioaddr + CTL_REG ) | CTL_AUTO_RELEASE , ioaddr + CTL_REG ); ++ ++ /* Reset the MMU */ ++ SMC_SELECT_BANK( 2 ); ++ outw( MC_RESET, ioaddr + MMU_CMD_REG ); ++ ++ /* Note: It doesn't seem that waiting for the MMU busy is needed here, ++ but this is a place where future chipsets _COULD_ break. Be wary ++ of issuing another MMU command right after this */ ++ ++ /* Disable all interrupts */ ++ outb( 0, ioaddr + IM_REG ); ++} ++ ++/* ++ . Function: smc_enable ++ . Purpose: let the chip talk to the outside work ++ . Method: ++ . 1. Enable the transmitter ++ . 2. Enable the receiver ++ . 3. Enable interrupts ++*/ ++static void smc_enable( struct rtnet_device *dev ) ++{ ++ unsigned short ioaddr = dev->base_addr; ++ struct smc_local *lp = (struct smc_local *)dev->priv; ++ ++ PRINTK2("%s:smc_enable\n", dev->name); ++ ++ SMC_SELECT_BANK( 0 ); ++ /* see the header file for options in TCR/RCR DEFAULT*/ ++ outw( lp->tcr_cur_mode, ioaddr + TCR_REG ); ++ outw( lp->rcr_cur_mode, ioaddr + RCR_REG ); ++ ++ /* now, enable interrupts */ ++ SMC_SELECT_BANK( 2 ); ++ outb( SMC_INTERRUPT_MASK, ioaddr + IM_REG ); ++} ++ ++/* ++ . Function: smc_shutdown ++ . Purpose: closes down the SMC91xxx chip. ++ . Method: ++ . 1. zero the interrupt mask ++ . 2. clear the enable receive flag ++ . 3. clear the enable xmit flags ++ . ++ . TODO: ++ . (1) maybe utilize power down mode. ++ . Why not yet? Because while the chip will go into power down mode, ++ . the manual says that it will wake up in response to any I/O requests ++ . in the register space. Empirical results do not show this working. ++*/ ++static void smc_shutdown( int ioaddr ) ++{ ++ PRINTK2("CARDNAME:smc_shutdown\n"); ++ ++ /* no more interrupts for me */ ++ SMC_SELECT_BANK( 2 ); ++ outb( 0, ioaddr + IM_REG ); ++ ++ /* and tell the card to stay away from that nasty outside world */ ++ SMC_SELECT_BANK( 0 ); ++ outb( RCR_CLEAR, ioaddr + RCR_REG ); ++ outb( TCR_CLEAR, ioaddr + TCR_REG ); ++ ++#ifdef POWER_DOWN ++ /* finally, shut the chip down */ ++ SMC_SELECT_BANK( 1 ); ++ outw( inw( ioaddr + CONFIG_REG ) & ~CONFIG_EPH_POWER_EN, ++ ioaddr + CONFIG_REG ); ++#endif ++} ++ ++ ++/* ++ . Function: smc_wait_to_send_packet( struct sk_buff * skb, struct device * ) ++ . Purpose: ++ . Attempt to allocate memory for a packet, if chip-memory is not ++ . available, then tell the card to generate an interrupt when it ++ . is available. ++ . ++ . Algorithm: ++ . ++ . o if the saved_skb is not currently null, then drop this packet ++ . on the floor. This should never happen, because of TBUSY. ++ . o if the saved_skb is null, then replace it with the current packet, ++ . o See if I can sending it now. ++ . o (NO): Enable interrupts and let the interrupt handler deal with it. ++ . o (YES):Send it now. ++*/ ++static int smc_wait_to_send_packet( struct rtskb * skb, struct rtnet_device * dev ) ++{ ++ struct smc_local *lp = (struct smc_local *)dev->priv; ++ unsigned short ioaddr = dev->base_addr; ++ word length; ++ unsigned short numPages; ++ word time_out; ++ word status; ++ ++ PRINTK3("%s:smc_wait_to_send_packet\n", dev->name); ++ ++ rtnetif_stop_queue(dev); ++ ++ if ( lp->saved_skb) { ++ /* THIS SHOULD NEVER HAPPEN. */ ++ lp->stats.tx_aborted_errors++; ++ rtdm_printk("%s: Bad Craziness - sent packet while busy.\n", ++ dev->name); ++ return 1; ++ } ++ lp->saved_skb = skb; ++ ++ length = ETH_ZLEN < skb->len ? skb->len : ETH_ZLEN; ++ ++ ++ /* ++ ** The MMU wants the number of pages to be the number of 256 bytes ++ ** 'pages', minus 1 ( since a packet can't ever have 0 pages :) ) ++ ** ++ ** The 91C111 ignores the size bits, but the code is left intact ++ ** for backwards and future compatibility. ++ ** ++ ** Pkt size for allocating is data length +6 (for additional status ++ ** words, length and ctl!) ++ ** ++ ** If odd size then last byte is included in this header. ++ */ ++ numPages = ((length & 0xfffe) + 6); ++ numPages >>= 8; // Divide by 256 ++ ++ if (numPages > 7 ) { ++ rtdm_printk("%s: Far too big packet error. \n", dev->name); ++ /* freeing the packet is a good thing here... but should ++ . any packets of this size get down here? */ ++ kfree_rtskb(skb); ++ lp->saved_skb = NULL; ++ /* this IS an error, but, i don't want the skb saved */ ++ rtnetif_wake_queue(dev); ++ return 0; ++ } ++ /* either way, a packet is waiting now */ ++ lp->packets_waiting++; ++ ++ /* now, try to allocate the memory */ ++ SMC_SELECT_BANK( 2 ); ++ outw( MC_ALLOC | numPages, ioaddr + MMU_CMD_REG ); ++ /* ++ . Performance Hack ++ . ++ . wait a short amount of time.. if I can send a packet now, I send ++ . it now. Otherwise, I enable an interrupt and wait for one to be ++ . available. ++ . ++ . I could have handled this a slightly different way, by checking to ++ . see if any memory was available in the FREE MEMORY register. However, ++ . either way, I need to generate an allocation, and the allocation works ++ . no matter what, so I saw no point in checking free memory. ++ */ ++ time_out = MEMORY_WAIT_TIME; ++ do { ++ status = inb( ioaddr + INT_REG ); ++ if ( status & IM_ALLOC_INT ) { ++ /* acknowledge the interrupt */ ++ outb( IM_ALLOC_INT, ioaddr + INT_REG ); ++ break; ++ } ++ } while ( -- time_out ); ++ ++ if ( !time_out ) { ++ kfree_rtskb(skb); ++ lp->saved_skb = NULL; ++ rtnetif_wake_queue(dev); ++ ++ rtdm_printk("%s: ERROR: unable to allocate card memory for " ++ "packet transmission.\n", dev->name); ++ return 0; ++ } ++ /* or YES! I can send the packet now.. */ ++ smc_hardware_send_packet(dev); ++ rtnetif_wake_queue(dev); ++ return 0; ++} ++ ++/* ++ . Function: smc_hardware_send_packet(struct device * ) ++ . Purpose: ++ . This sends the actual packet to the SMC9xxx chip. ++ . ++ . Algorithm: ++ . First, see if a saved_skb is available. ++ . ( this should NOT be called if there is no 'saved_skb' ++ . Now, find the packet number that the chip allocated ++ . Point the data pointers at it in memory ++ . Set the length word in the chip's memory ++ . Dump the packet to chip memory ++ . Check if a last byte is needed ( odd length packet ) ++ . if so, set the control flag right ++ . Tell the card to send it ++ . Enable the transmit interrupt, so I know if it failed ++ . Free the kernel data if I actually sent it. ++*/ ++static void smc_hardware_send_packet( struct rtnet_device * dev ) ++{ ++ struct smc_local *lp = (struct smc_local *)dev->priv; ++ byte packet_no; ++ struct rtskb * skb = lp->saved_skb; ++ word length; ++ unsigned short ioaddr; ++ void * buf; ++ rtdm_lockctx_t context; ++ ++ PRINTK3("%s:smc_hardware_send_packet\n", dev->name); ++ ++ ioaddr = dev->base_addr; ++ ++ if ( !skb ) { ++ PRINTK("%s: In XMIT with no packet to send \n", dev->name); ++ return; ++ } ++ length = ETH_ZLEN < skb->len ? skb->len : ETH_ZLEN; ++ buf = skb->data; ++ ++ /* If I get here, I _know_ there is a packet slot waiting for me */ ++ packet_no = inb( ioaddr + AR_REG ); ++ if ( packet_no & AR_FAILED ) { ++ /* or isn't there? BAD CHIP! */ ++ rtdm_printk(KERN_DEBUG "%s: Memory allocation failed. \n", ++ dev->name); ++ kfree_rtskb(skb); ++ lp->saved_skb = NULL; ++ rtnetif_wake_queue(dev); ++ return; ++ } ++ ++ /* we have a packet address, so tell the card to use it */ ++ outb( packet_no, ioaddr + PN_REG ); ++ ++ /* point to the beginning of the packet */ ++ outw( PTR_AUTOINC , ioaddr + PTR_REG ); ++ ++ PRINTK3("%s: Trying to xmit packet of length %x\n", ++ dev->name, length); ++ ++#if defined(SMC_DEBUG) && (SMC_DEBUG > 2) ++ rtdm_printk("Transmitting Packet\n"); ++ print_packet( buf, length ); ++#endif ++ ++ /* send the packet length ( +6 for status, length and ctl byte ) ++ and the status word ( set to zeros ) */ ++#ifdef USE_32_BIT ++ outl( (length +6 ) << 16 , ioaddr + DATA_REG ); ++#else ++ outw( 0, ioaddr + DATA_REG ); ++ /* send the packet length ( +6 for status words, length, and ctl*/ ++ outb( (length+6) & 0xFF,ioaddr + DATA_REG ); ++ outb( (length+6) >> 8 , ioaddr + DATA_REG ); ++#endif ++ ++ /* send the actual data ++ . I _think_ it's faster to send the longs first, and then ++ . mop up by sending the last word. It depends heavily ++ . on alignment, at least on the 486. Maybe it would be ++ . a good idea to check which is optimal? But that could take ++ . almost as much time as is saved? ++ */ ++#ifdef USE_32_BIT ++ outsl(ioaddr + DATA_REG, buf, length >> 2 ); ++ if ( length & 0x2 ) ++ outw(*((word *)(buf + (length & 0xFFFFFFFC))),ioaddr +DATA_REG); ++#else ++ outsw(ioaddr + DATA_REG , buf, (length ) >> 1); ++#endif // USE_32_BIT ++ ++ /* Send the last byte, if there is one. */ ++ if ( (length & 1) == 0 ) { ++ outw( 0, ioaddr + DATA_REG ); ++ } else { ++ outb( ((char *)buf)[length -1 ], ioaddr + DATA_REG ); ++ outb( 0x20, ioaddr + DATA_REG); // Set odd bit in CONTROL BYTE ++ } ++ ++ rtdm_lock_irqsave(context); ++ ++ /* get and patch time stamp just before the transmission */ ++ if (skb->xmit_stamp) { ++ nanosecs_abs_t xmit_stamp = ++ cpu_to_be64(rtdm_clock_read() + *skb->xmit_stamp); ++ ++ /* point to the patch address */ ++ outw(PTR_AUTOINC | ++ (4 + (char *)skb->xmit_stamp - (char *)skb->data), ++ ioaddr + PTR_REG); ++ /* we don't check alignments, we just write bytes */ ++ outsb(ioaddr + DATA_REG, (char *)&xmit_stamp, ++ sizeof(xmit_stamp)); ++ } ++ ++ /* enable the interrupts */ ++ SMC_ENABLE_INT( (IM_TX_INT | IM_TX_EMPTY_INT) ); ++ ++ /* and let the chipset deal with it */ ++ outw( MC_ENQUEUE , ioaddr + MMU_CMD_REG ); ++ ++ rtdm_lock_irqrestore(context); ++ ++ PRINTK2("%s: Sent packet of length %d \n", dev->name, length); ++ ++ lp->saved_skb = NULL; ++ kfree_rtskb(skb); ++ ++// dev->trans_start = jiffies; ++ ++ /* we can send another packet */ ++ rtnetif_wake_queue(dev); ++ ++ ++ return; ++} ++ ++/*------------------------------------------------------------------------- ++ | ++ | smc_init( struct device * dev ) ++ | Input parameters: ++ | dev->base_addr == 0, try to find all possible locations ++ | dev->base_addr == 1, return failure code ++ | dev->base_addr == 2, always allocate space, and return success ++ | dev->base_addr == this is the address to check ++ | ++ | Output: ++ | 0 --> there is a device ++ | anything else, error ++ | ++ --------------------------------------------------------------------------- ++*/ ++int __init smc_init(struct rtnet_device *dev) ++{ ++ int i; ++ int base_addr = dev ? dev->base_addr : 0; ++ ++ PRINTK2("CARDNAME:smc_init\n"); ++ ++ /* try a specific location */ ++ if (base_addr > 0x1ff) ++ return smc_probe(dev, base_addr); ++ else if ( 0 != base_addr ) ++ return -ENXIO; ++ ++ /* check every ethernet address */ ++ for (i = 0; smc_portlist[i]; i++) ++ if ( smc_probe(dev,smc_portlist[i]) ==0) ++ return 0; ++ ++ /* couldn't find anything */ ++ return -ENODEV; ++} ++ ++ ++#ifndef NO_AUTOPROBE ++/*---------------------------------------------------------------------- ++ . smc_findirq ++ . ++ . This routine has a simple purpose -- make the SMC chip generate an ++ . interrupt, so an auto-detect routine can detect it, and find the IRQ, ++ ------------------------------------------------------------------------ ++*/ ++int __init smc_findirq( int ioaddr ) ++{ ++ int timeout = 20; ++ unsigned long cookie; ++ ++ PRINTK2("CARDNAME:smc_findirq\n"); ++ ++ /* I have to do a STI() here, because this is called from ++ a routine that does an CLI during this process, making it ++ rather difficult to get interrupts for auto detection */ ++ local_irq_enable(); ++ ++ cookie = probe_irq_on(); ++ ++ /* ++ * What I try to do here is trigger an ALLOC_INT. This is done ++ * by allocating a small chunk of memory, which will give an interrupt ++ * when done. ++ */ ++ ++ ++ SMC_SELECT_BANK(2); ++ /* enable ALLOCation interrupts ONLY */ ++ outb( IM_ALLOC_INT, ioaddr + IM_REG ); ++ ++ /* ++ . Allocate 512 bytes of memory. Note that the chip was just ++ . reset so all the memory is available ++ */ ++ outw( MC_ALLOC | 1, ioaddr + MMU_CMD_REG ); ++ ++ /* ++ . Wait until positive that the interrupt has been generated ++ */ ++ while ( timeout ) { ++ byte int_status; ++ ++ int_status = inb( ioaddr + INT_REG ); ++ ++ if ( int_status & IM_ALLOC_INT ) ++ break; /* got the interrupt */ ++ timeout--; ++ } ++ ++ /* there is really nothing that I can do here if timeout fails, ++ as autoirq_report will return a 0 anyway, which is what I ++ want in this case. Plus, the clean up is needed in both ++ cases. */ ++ ++ /* DELAY HERE! ++ On a fast machine, the status might change before the interrupt ++ is given to the processor. This means that the interrupt was ++ never detected, and autoirq_report fails to report anything. ++ This should fix autoirq_* problems. ++ */ ++ mdelay(10); ++ ++ /* and disable all interrupts again */ ++ outb( 0, ioaddr + IM_REG ); ++ ++ /* clear hardware interrupts again, because that's how it ++ was when I was called... */ ++ local_irq_disable(); ++ ++ /* and return what I found */ ++ return probe_irq_off(cookie); ++} ++#endif ++ ++/*---------------------------------------------------------------------- ++ . Function: smc_probe( int ioaddr ) ++ . ++ . Purpose: ++ . Tests to see if a given ioaddr points to an SMC91111 chip. ++ . Returns a 0 on success ++ . ++ . Algorithm: ++ . (1) see if the high byte of BANK_SELECT is 0x33 ++ . (2) compare the ioaddr with the base register's address ++ . (3) see if I recognize the chip ID in the appropriate register ++ . ++ .--------------------------------------------------------------------- ++ */ ++/*--------------------------------------------------------------- ++ . Here I do typical initialization tasks. ++ . ++ . o Initialize the structure if needed ++ . o print out my vanity message if not done so already ++ . o print out what type of hardware is detected ++ . o print out the ethernet address ++ . o find the IRQ ++ . o set up my private data ++ . o configure the dev structure with my subroutines ++ . o actually GRAB the irq. ++ . o GRAB the region ++ .-----------------------------------------------------------------*/ ++ ++static int __init smc_probe(struct rtnet_device *dev, int ioaddr ) ++{ ++ int i, memory, retval; ++ static unsigned version_printed = 0; ++ unsigned int bank; ++ ++ const char *version_string; ++ ++ /*registers */ ++ word revision_register; ++ word base_address_register; ++ word memory_info_register; ++ /*=> Pramod */ ++ struct smc_local *lp; ++ /*<= Pramod */ ++ ++ PRINTK2("CARDNAME:smc_probe\n"); ++ ++ /* Grab the region so that no one else tries to probe our ioports. */ ++ if (!request_region(ioaddr, SMC_IO_EXTENT, dev->name)) return -EBUSY; ++ ++ /* First, see if the high byte is 0x33 */ ++ bank = inw( ioaddr + BANK_SELECT ); ++ if ( (bank & 0xFF00) != 0x3300 ) return -ENODEV; ++ ++ /* The above MIGHT indicate a device, but I need to write to further test this. */ ++ outw( 0x0, ioaddr + BANK_SELECT ); ++ bank = inw( ioaddr + BANK_SELECT ); ++ if ( (bank & 0xFF00 ) != 0x3300 ) ++ { ++ retval = -ENODEV; ++ goto err_out; ++ } ++ ++ /* well, we've already written once, so hopefully another time won't ++ hurt. This time, I need to switch the bank register to bank 1, ++ so I can access the base address register */ ++ SMC_SELECT_BANK(1); ++ base_address_register = inw( ioaddr + BASE_REG ); ++ if ( ioaddr != ( base_address_register >> 3 & 0x3E0 ) ) ++ { ++ printk("CARDNAME: IOADDR %x doesn't match configuration (%x)." ++ "Probably not a SMC chip\n", ++ ioaddr, base_address_register >> 3 & 0x3E0 ); ++ /* well, the base address register didn't match. Must not have ++ been a SMC chip after all. */ ++ retval = -ENODEV; ++ goto err_out; ++ } ++ ++ /* check if the revision register is something that I recognize. ++ These might need to be added to later, as future revisions ++ could be added. */ ++ SMC_SELECT_BANK(3); ++ revision_register = inw( ioaddr + REV_REG ); ++ if ( !chip_ids[ ( revision_register >> 4 ) & 0xF ] ) ++ { ++ /* I don't recognize this chip, so... */ ++ printk("CARDNAME: IO %x: Unrecognized revision register:" ++ " %x, Contact author. \n", ++ ioaddr, revision_register ); ++ retval = -ENODEV; ++ goto err_out; ++ } ++ ++ /* at this point I'll assume that the chip is an SMC9xxx. ++ It might be prudent to check a listing of MAC addresses ++ against the hardware address, or do some other tests. */ ++ ++ if (version_printed++ == 0) ++ printk("%s", version); ++ ++ /* fill in some of the fields */ ++ dev->base_addr = ioaddr; ++ ++ /* ++ . Get the MAC address ( bank 1, regs 4 - 9 ) ++ */ ++ SMC_SELECT_BANK( 1 ); ++ for ( i = 0; i < 6; i += 2 ) ++ { ++ word address; ++ ++ address = inw( ioaddr + ADDR0_REG + i ); ++ dev->dev_addr[ i + 1] = address >> 8; ++ dev->dev_addr[ i ] = address & 0xFF; ++ } ++ ++ /* get the memory information */ ++ ++ SMC_SELECT_BANK( 0 ); ++ memory_info_register = inw( ioaddr + MIR_REG ); ++ memory = memory_info_register & (word)0x00ff; ++ memory *= LAN91C111_MEMORY_MULTIPLIER; ++ ++ /* ++ Now, I want to find out more about the chip. This is sort of ++ redundant, but it's cleaner to have it in both, rather than having ++ one VERY long probe procedure. ++ */ ++ SMC_SELECT_BANK(3); ++ revision_register = inw( ioaddr + REV_REG ); ++ version_string = chip_ids[ ( revision_register >> 4 ) & 0xF ]; ++ if ( !version_string ) ++ { ++ /* I shouldn't get here because this call was done before.... */ ++ retval = -ENODEV; ++ goto err_out; ++ } ++ ++ /* now, reset the chip, and put it into a known state */ ++ smc_reset( dev ); ++ ++ /* ++ . If dev->irq is 0, then the device has to be banged on to see ++ . what the IRQ is. ++ . ++ . This banging doesn't always detect the IRQ, for unknown reasons. ++ . a workaround is to reset the chip and try again. ++ . ++ . Interestingly, the DOS packet driver *SETS* the IRQ on the card to ++ . be what is requested on the command line. I don't do that, mostly ++ . because the card that I have uses a non-standard method of accessing ++ . the IRQs, and because this _should_ work in most configurations. ++ . ++ . Specifying an IRQ is done with the assumption that the user knows ++ . what (s)he is doing. No checking is done!!!! ++ . ++ */ ++ if ( dev->irq < 2 ) { ++ int trials; ++ ++ trials = 3; ++ while ( trials-- ) { ++ dev->irq = smc_findirq( ioaddr ); ++ if ( dev->irq ) ++ break; ++ /* kick the card and try again */ ++ smc_reset( dev ); ++ } ++ } ++ if (dev->irq == 0 ) { ++ printk("%s: Couldn't autodetect your IRQ. Use irq=xx.\n", ++ dev->name); ++ retval = -ENODEV; ++ goto err_out; ++ } ++ ++ if (dev->irq == 2) { ++ /* Fixup for users that don't know that IRQ 2 is really IRQ 9, ++ * or don't know which one to set. ++ */ ++ dev->irq = 9; ++ } ++ ++ /* now, print out the card info, in a short format.. */ ++ ++ printk("%s: %s(rev:%d) at %#3x IRQ:%d MEMSIZE:%db NOWAIT:%d ", ++ dev->name, ++ version_string, revision_register & 0xF, ioaddr, dev->irq, ++ memory, dev->dma); ++ /* ++ . Print the Ethernet address ++ */ ++ printk("ADDR: "); ++ for (i = 0; i < 5; i++) ++ printk("%2.2x:", dev->dev_addr[i] ); ++ printk("%2.2x \n", dev->dev_addr[5] ); ++ ++ ++ /* Initialize the private structure. */ ++ /*if (dev->priv == NULL) { ++ dev->priv = kmalloc(sizeof(struct smc_local), GFP_KERNEL); ++ if (dev->priv == NULL) { ++ retval = -ENOMEM; ++ goto err_out; ++ } ++ }*/ ++ /* set the private data to zero by default */ ++ memset(dev->priv, 0, sizeof(struct smc_local)); ++ ++ /* Fill in the fields of the device structure with ethernet values. */ ++// ether_setup(dev); ++ ++ rt_stack_connect(dev, &STACK_manager); ++ ++ /* Grab the IRQ */ ++ retval = rtdm_irq_request(&((struct smc_local *)dev->priv)->irq_handle, ++ dev->irq, &smc_interrupt, 0, ++ "rt_smx91111", dev); ++ if (retval) { ++ printk("%s: unable to get IRQ %d (irqval=%d).\n", ++ dev->name, dev->irq, retval); ++ //kfree (dev->priv); ++ //dev->priv = NULL; ++ goto err_out; ++ } ++ ++ dev->open = smc_open; ++ dev->stop = smc_close; ++ dev->hard_start_xmit = smc_wait_to_send_packet; ++ dev->get_stats = smc_query_statistics; ++// dev->tx_timeout = smc_timeout; ++#ifdef HAVE_MULTICAST ++// dev->set_multicast_list = &smc_set_multicast_list; ++#endif ++ ++ /* => Store the ChipRevision and ChipID, to be used in resolving the Odd-Byte issue in RevB of LAN91C111; Pramod */ ++ SMC_SELECT_BANK(3); ++ revision_register = inw( ioaddr + REV_REG ); ++ lp = (struct smc_local *)dev->priv; ++ lp->ChipID = (revision_register >> 4) & 0xF; ++ lp->ChipRev = revision_register & 0xF; ++ ++ return 0; ++ ++err_out: ++ release_region (ioaddr, SMC_IO_EXTENT); ++ return retval; ++} ++ ++#if defined(SMC_DEBUG) && (SMC_DEBUG > 2) ++static void print_packet( byte * buf, int length ) ++{ ++ int i; ++ int remainder; ++ int lines; ++ ++ rtdm_printk("Packet of length %d \n", length ); ++ ++#if SMC_DEBUG > 3 ++ lines = length / 16; ++ remainder = length % 16; ++ ++ for ( i = 0; i < lines ; i ++ ) { ++ int cur; ++ ++ for ( cur = 0; cur < 8; cur ++ ) { ++ byte a, b; ++ ++ a = *(buf ++ ); ++ b = *(buf ++ ); ++ rtdm_printk("%02x%02x ", a, b ); ++ } ++ rtdm_printk("\n"); ++ } ++ for ( i = 0; i < remainder/2 ; i++ ) { ++ byte a, b; ++ ++ a = *(buf ++ ); ++ b = *(buf ++ ); ++ rtdm_printk("%02x%02x ", a, b ); ++ } ++ rtdm_printk("\n"); ++#endif ++} ++#endif ++ ++ ++/* ++ * Open and Initialize the board ++ * ++ * Set up everything, reset the card, etc .. ++ * ++ */ ++static int smc_open(struct rtnet_device *dev) ++{ ++ struct smc_local *lp = (struct smc_local *)dev->priv; ++ int ioaddr = dev->base_addr; ++ int i; /* used to set hw ethernet address */ ++ ++ PRINTK2("%s:smc_open\n", dev->name); ++ ++ /* clear out all the junk that was put here before... */ ++ memset(dev->priv, 0, (size_t)&((struct smc_local *)0)->irq_handle); ++ ++ rtnetif_start_queue(dev); ++ ++ // Setup the default Register Modes ++ lp->tcr_cur_mode = TCR_DEFAULT; ++ lp->rcr_cur_mode = RCR_DEFAULT; ++ lp->rpc_cur_mode = RPC_DEFAULT; ++ ++#ifdef DISABLED____CONFIG_SYSCTL ++ // Set default parameters (files) ++ lp->ctl_swfdup = 0; ++ lp->ctl_ephloop = 0; ++ lp->ctl_miiop = 0; ++ lp->ctl_autoneg = 1; ++ lp->ctl_rfduplx = 1; ++ lp->ctl_rspeed = 100; ++ lp->ctl_afduplx = 1; ++ lp->ctl_aspeed = 100; ++ lp->ctl_lnkfail = 1; ++ lp->ctl_forcol = 0; ++ lp->ctl_filtcar = 0; ++#endif /* CONFIG_SYSCTL */ ++ ++ /* reset the hardware */ ++ ++ smc_reset( dev ); ++ smc_enable( dev ); ++ ++ /* Configure the PHY */ ++ smc_phy_configure(dev); ++ ++ smc_set_multicast_list(dev); ++ ++ /* ++ According to Becker, I have to set the hardware address ++ at this point, because the (l)user can set it with an ++ ioctl. Easily done... ++ */ ++ SMC_SELECT_BANK( 1 ); ++ for ( i = 0; i < 6; i += 2 ) { ++ word address; ++ ++ address = dev->dev_addr[ i + 1 ] << 8 ; ++ address |= dev->dev_addr[ i ]; ++ outw( address, ioaddr + ADDR0_REG + i ); ++ } ++ ++#ifdef DISABLED____CONFIG_SYSCTL ++ smc_sysctl_register(dev); ++#endif /* CONFIG_SYSCTL */ ++ ++ rtnetif_start_queue(dev); ++ return 0; ++} ++ ++/*------------------------------------------------------------- ++ . ++ . smc_rcv - receive a packet from the card ++ . ++ . There is ( at least ) a packet waiting to be read from ++ . chip-memory. ++ . ++ . o Read the status ++ . o If an error, record it ++ . o otherwise, read in the packet ++ -------------------------------------------------------------- ++*/ ++static inline void smc_rcv(struct rtnet_device *dev) ++{ ++ struct smc_local *lp = (struct smc_local *)dev->priv; ++ int ioaddr = dev->base_addr; ++ int packet_number; ++ word status; ++ word packet_length; ++ nanosecs_abs_t time_stamp = rtdm_clock_read(); ++ int timeout; ++ ++ PRINTK3("%s:smc_rcv\n", dev->name); ++ ++ /* assume bank 2 */ ++ ++ packet_number = inw( ioaddr + RXFIFO_REG ); ++ ++ if ( packet_number & RXFIFO_REMPTY ) { ++ ++ /* we got called , but nothing was on the FIFO */ ++ PRINTK("%s: WARNING: smc_rcv with nothing on FIFO. \n", ++ dev->name); ++ /* don't need to restore anything */ ++ return; ++ } ++ ++ /* start reading from the start of the packet */ ++ outw( PTR_READ | PTR_RCV | PTR_AUTOINC, ioaddr + PTR_REG ); ++ inw( ioaddr + MMU_CMD_REG ); /* min delay to avoid errors... */ ++ ++ /* First two words are status and packet_length */ ++ status = inw( ioaddr + DATA_REG ); ++ packet_length = inw( ioaddr + DATA_REG ); ++ ++ packet_length &= 0x07ff; /* mask off top bits */ ++ ++ PRINTK2("RCV: STATUS %4x LENGTH %4x\n", status, packet_length ); ++ ++ if ( !(status & RS_ERRORS ) ){ ++ /* do stuff to make a new packet */ ++ struct rtskb * skb; ++ void * data; ++ ++ /* set multicast stats */ ++ if ( status & RS_MULTICAST ) ++ lp->stats.multicast++; ++ ++ // Allocate enough memory for entire receive frame, to be safe ++ skb = rtnetdev_alloc_rtskb(dev, packet_length); ++ ++ /* Adjust for having already read the first two words */ ++ packet_length -= 4; ++ ++ if ( skb == NULL ) { ++ rtdm_printk(KERN_NOTICE "%s: Low memory, packet dropped.\n", ++ dev->name); ++ lp->stats.rx_dropped++; ++ goto done; ++ } ++ ++ /* ++ ! This should work without alignment, but it could be ++ ! in the worse case ++ */ ++ /* TODO: Should I use 32bit alignment here ? */ ++ rtskb_reserve( skb, 2 ); /* 16 bit alignment */ ++ ++ /* => ++ ODD-BYTE ISSUE : The odd byte problem has been fixed in the LAN91C111 Rev B. ++ So we check if the Chip Revision, stored in smsc_local->ChipRev, is = 1. ++ If so then we increment the packet length only if RS_ODDFRAME is set. ++ If the Chip's revision is equal to 0, then we blindly increment the packet length ++ by 1, thus always assuming that the packet is odd length, leaving the higher layer ++ to decide the actual length. ++ -- Pramod ++ <= */ ++ if ((9 == lp->ChipID) && (1 == lp->ChipRev)) ++ { ++ if (status & RS_ODDFRAME) ++ data = rtskb_put( skb, packet_length + 1 ); ++ else ++ data = rtskb_put( skb, packet_length); ++ ++ } ++ else ++ { ++ // set odd length for bug in LAN91C111, REV A ++ // which never sets RS_ODDFRAME ++ data = rtskb_put( skb, packet_length + 1 ); ++ } ++ ++#ifdef USE_32_BIT ++ PRINTK3(" Reading %d dwords (and %d bytes) \n", ++ packet_length >> 2, packet_length & 3 ); ++ /* QUESTION: Like in the TX routine, do I want ++ to send the DWORDs or the bytes first, or some ++ mixture. A mixture might improve already slow PIO ++ performance */ ++ insl(ioaddr + DATA_REG , data, packet_length >> 2 ); ++ /* read the left over bytes */ ++ insb( ioaddr + DATA_REG, data + (packet_length & 0xFFFFFC), ++ packet_length & 0x3 ); ++#else ++ PRINTK3(" Reading %d words and %d byte(s) \n", ++ (packet_length >> 1 ), packet_length & 1 ); ++ insw(ioaddr + DATA_REG , data, packet_length >> 1); ++ ++#endif // USE_32_BIT ++ ++#if defined(SMC_DEBUG) && (SMC_DEBUG > 2) ++ rtdm_printk("Receiving Packet\n"); ++ print_packet( data, packet_length ); ++#endif ++ ++ skb->protocol = rt_eth_type_trans(skb, dev ); ++ skb->time_stamp = time_stamp; ++ rtnetif_rx(skb); ++ lp->stats.rx_packets++; ++ } else { ++ /* error ... */ ++ lp->stats.rx_errors++; ++ ++ if ( status & RS_ALGNERR ) lp->stats.rx_frame_errors++; ++ if ( status & (RS_TOOSHORT | RS_TOOLONG ) ) ++ lp->stats.rx_length_errors++; ++ if ( status & RS_BADCRC) lp->stats.rx_crc_errors++; ++ } ++ ++ timeout = MMU_CMD_TIMEOUT; ++ while ( inw( ioaddr + MMU_CMD_REG ) & MC_BUSY ) { ++ rtdm_task_busy_sleep(1000); // Wait until not busy ++ if (--timeout == 0) { ++ rtdm_printk("%s: ERROR: timeout while waiting on MMU.\n", ++ dev->name); ++ break; ++ } ++ } ++done: ++ /* error or good, tell the card to get rid of this packet */ ++ outw( MC_RELEASE, ioaddr + MMU_CMD_REG ); ++ ++ return; ++} ++ ++/*-------------------------------------------------------------------- ++ . ++ . This is the main routine of the driver, to handle the net_device when ++ . it needs some attention. ++ . ++ . So: ++ . first, save state of the chipset ++ . branch off into routines to handle each case, and acknowledge ++ . each to the interrupt register ++ . and finally restore state. ++ . ++ ---------------------------------------------------------------------*/ ++static int smc_interrupt(rtdm_irq_t *irq_handle) ++{ ++ struct rtnet_device *dev = rtdm_irq_get_arg(irq_handle, struct rtnet_device); ++ int ioaddr = dev->base_addr; ++ struct smc_local *lp = (struct smc_local *)dev->priv; ++ ++ byte status; ++ word card_stats; ++ byte mask; ++ int timeout; ++ /* state registers */ ++ word saved_bank; ++ word saved_pointer; ++ ++ unsigned int old_packet_cnt = lp->stats.rx_packets; ++ ++ ++ ++ PRINTK3("%s: SMC interrupt started \n", dev->name); ++ ++/* if (dev == NULL) { ++ rtdm_printk(KERN_WARNING "%s: irq %d for unknown device.\n", ++ dev->name, irq); ++ return; ++ }*/ ++ ++/* will Linux let this happen ?? If not, this costs some speed ++ if ( dev->interrupt ) { ++ printk(KERN_WARNING "%s: interrupt inside interrupt.\n", ++ dev->name); ++ return; ++ } ++ ++ dev->interrupt = 1; */ ++ ++ saved_bank = inw( ioaddr + BANK_SELECT ); ++ ++ SMC_SELECT_BANK(2); ++ saved_pointer = inw( ioaddr + PTR_REG ); ++ ++ /* read the interrupt status register */ ++ mask = inb( ioaddr + IM_REG ); ++ ++ /* disable all interrupts */ ++ outb( 0, ioaddr + IM_REG ); ++ ++ /* ++ * The packet reception will take some time (up to several hundred us). ++ * Re-enable other irqs now so that no critical deadline will be missed. ++ */ ++ hard_local_irq_enable(); ++ ++ /* set a timeout value, so I don't stay here forever */ ++ timeout = 4; ++ ++ PRINTK2(KERN_WARNING "%s: MASK IS %x \n", dev->name, mask); ++ do { ++ /* read the status flag, and mask it */ ++ status = inb( ioaddr + INT_REG ) & mask; ++ if (!status ) ++ break; ++ ++ PRINTK3(KERN_WARNING "%s: Handling interrupt status %x \n", ++ dev->name, status); ++ ++ if (status & IM_RCV_INT) { ++ /* Got a packet(s). */ ++ PRINTK2(KERN_WARNING ++ "%s: Receive Interrupt\n", dev->name); ++ smc_rcv(dev); ++ } else if (status & IM_TX_INT ) { ++ rtdm_printk(KERN_ERR "%s: TX ERROR!\n", dev->name); ++ //smc_tx(dev); ++ // Acknowledge the interrupt ++ outb(IM_TX_INT, ioaddr + INT_REG ); ++ } else if (status & IM_TX_EMPTY_INT ) { ++ /* update stats */ ++ SMC_SELECT_BANK( 0 ); ++ card_stats = inw( ioaddr + COUNTER_REG ); ++ /* single collisions */ ++ lp->stats.collisions += card_stats & 0xF; ++ card_stats >>= 4; ++ /* multiple collisions */ ++ lp->stats.collisions += card_stats & 0xF; ++ ++ /* these are for when linux supports these statistics */ ++ SMC_SELECT_BANK( 2 ); ++ PRINTK2(KERN_WARNING "%s: TX_BUFFER_EMPTY handled\n", ++ dev->name); ++ // Acknowledge the interrupt ++ outb( IM_TX_EMPTY_INT, ioaddr + INT_REG ); ++ mask &= ~IM_TX_EMPTY_INT; ++ lp->stats.tx_packets += lp->packets_waiting; ++ lp->packets_waiting = 0; ++ ++ } else if (status & IM_ALLOC_INT ) { ++ PRINTK2(KERN_DEBUG "%s: Allocation interrupt \n", ++ dev->name); ++ /* clear this interrupt so it doesn't happen again */ ++ mask &= ~IM_ALLOC_INT; ++ ++ } else if (status & IM_RX_OVRN_INT ) { ++ lp->stats.rx_errors++; ++ lp->stats.rx_fifo_errors++; ++ // Acknowledge the interrupt ++ outb( IM_RX_OVRN_INT, ioaddr + INT_REG ); ++ } else if (status & IM_EPH_INT ) { ++ PRINTK("%s: UNSUPPORTED: EPH INTERRUPT \n", ++ dev->name); ++ } else if (status & IM_MDINT ) { ++ //smc_phy_interrupt(dev); ++ PRINTK("%s: UNSUPPORTED: MD INTERRUPT \n", ++ dev->name); ++ // Acknowledge the interrupt ++ outb(IM_MDINT, ioaddr + INT_REG ); ++ } else if (status & IM_ERCV_INT ) { ++ PRINTK("%s: UNSUPPORTED: ERCV INTERRUPT \n", ++ dev->name); ++ // Acknowledge the interrupt ++ outb( IM_ERCV_INT, ioaddr + INT_REG ); ++ } ++ } while ( timeout -- ); ++ ++ ++ /* restore register states */ ++ ++ SMC_SELECT_BANK( 2 ); ++ ++ outb( mask, ioaddr + IM_REG ); ++ ++ PRINTK3( KERN_WARNING "%s: MASK is now %x \n", dev->name, mask); ++ outw( saved_pointer, ioaddr + PTR_REG ); ++ ++ SMC_SELECT_BANK( saved_bank ); ++ ++ if (old_packet_cnt != lp->stats.rx_packets) ++ rt_mark_stack_mgr(dev); ++ ++ hard_local_irq_disable(); ++ ++ //dev->interrupt = 0; ++ PRINTK3("%s: Interrupt done\n", dev->name); ++ return RTDM_IRQ_HANDLED; ++} ++ ++ ++/*---------------------------------------------------- ++ . smc_close ++ . ++ . this makes the board clean up everything that it can ++ . and not talk to the outside world. Caused by ++ . an 'ifconfig ethX down' ++ . ++ -----------------------------------------------------*/ ++static int smc_close(struct rtnet_device *dev) ++{ ++ rtnetif_stop_queue(dev); ++ //dev->start = 0; ++ ++ PRINTK2("%s:smc_close\n", dev->name); ++ ++#ifdef DISABLED____CONFIG_SYSCTL ++ smc_sysctl_unregister(dev); ++#endif /* CONFIG_SYSCTL */ ++ ++ /* clear everything */ ++ smc_shutdown( dev->base_addr ); ++ ++ /* Update the statistics here. */ ++ ++ return 0; ++} ++ ++/*------------------------------------------------------------ ++ . Get the current statistics. ++ . This may be called with the card open or closed. ++ .-------------------------------------------------------------*/ ++static struct net_device_stats* smc_query_statistics(struct rtnet_device *rtdev) ++{ ++ struct smc_local *lp = (struct smc_local *)rtdev->priv; ++ ++ PRINTK2("%s:smc_query_statistics\n", rtdev->name); ++ ++ return &lp->stats; ++} ++ ++/*----------------------------------------------------------- ++ . smc_set_multicast_list ++ . ++ . This routine will, depending on the values passed to it, ++ . either make it accept multicast packets, go into ++ . promiscuous mode ( for TCPDUMP and cousins ) or accept ++ . a select set of multicast packets ++*/ ++static void smc_set_multicast_list(struct rtnet_device *dev) ++{ ++ short ioaddr = dev->base_addr; ++ ++ PRINTK2("%s:smc_set_multicast_list\n", dev->name); ++ ++ SMC_SELECT_BANK(0); ++ if ( dev->flags & IFF_PROMISC ) ++ { ++ PRINTK2("%s:smc_set_multicast_list:RCR_PRMS\n", dev->name); ++ outw( inw(ioaddr + RCR_REG ) | RCR_PRMS, ioaddr + RCR_REG ); ++ } ++ ++/* BUG? I never disable promiscuous mode if multicasting was turned on. ++ Now, I turn off promiscuous mode, but I don't do anything to multicasting ++ when promiscuous mode is turned on. ++*/ ++ ++ /* Here, I am setting this to accept all multicast packets. ++ I don't need to zero the multicast table, because the flag is ++ checked before the table is ++ */ ++ else if (dev->flags & IFF_ALLMULTI) ++ { ++ outw( inw(ioaddr + RCR_REG ) | RCR_ALMUL, ioaddr + RCR_REG ); ++ PRINTK2("%s:smc_set_multicast_list:RCR_ALMUL\n", dev->name); ++ } ++ ++ else { ++ PRINTK2("%s:smc_set_multicast_list:~(RCR_PRMS|RCR_ALMUL)\n", ++ dev->name); ++ outw( inw( ioaddr + RCR_REG ) & ~(RCR_PRMS | RCR_ALMUL), ++ ioaddr + RCR_REG ); ++ ++ /* ++ since I'm disabling all multicast entirely, I need to ++ clear the multicast list ++ */ ++ SMC_SELECT_BANK( 3 ); ++ outw( 0, ioaddr + MCAST_REG1 ); ++ outw( 0, ioaddr + MCAST_REG2 ); ++ outw( 0, ioaddr + MCAST_REG3 ); ++ outw( 0, ioaddr + MCAST_REG4 ); ++ } ++} ++ ++#ifdef MODULE ++ ++static struct rtnet_device *devSMC91111; ++int io = 0; ++int irq = 0; ++int nowait = 0; ++ ++module_param(io, int, 0444); ++module_param(irq, int, 0444); ++module_param(nowait, int, 0444); ++ ++/*------------------------------------------------------------ ++ . Module initialization function ++ .-------------------------------------------------------------*/ ++int __init init_module(void) ++{ ++ int result; ++ ++ PRINTK2("CARDNAME:init_module\n"); ++ if (io == 0) ++ printk(KERN_WARNING ++ CARDNAME": You shouldn't use auto-probing with insmod!\n" ); ++ ++ devSMC91111 = rt_alloc_etherdev(sizeof(struct smc_local), 4 * 2 + 1); ++ if (devSMC91111 == NULL) { ++ printk (KERN_ERR "init_ethernet failed\n"); ++ return -ENODEV; ++ } ++ rtdev_alloc_name(devSMC91111, "rteth%d"); ++ rt_rtdev_connect(devSMC91111, &RTDEV_manager); ++ devSMC91111->vers = RTDEV_VERS_2_0; ++ ++ /* copy the parameters from insmod into the device structure */ ++ devSMC91111->base_addr = io; ++ devSMC91111->irq = irq; ++ devSMC91111->dma = nowait; // Use DMA field for nowait ++ if ((result = smc_init(devSMC91111)) != 0) ++ return result; ++ ++ if ((result = rt_register_rtnetdev(devSMC91111)) != 0) { ++ rt_rtdev_disconnect(devSMC91111); ++ release_region(devSMC91111->base_addr, SMC_IO_EXTENT); ++ ++ rtdm_irq_free(&((struct smc_local *)devSMC91111)->irq_handle); ++ ++ rtdev_free(devSMC91111); ++ ++ return result; ++ } ++ ++ return 0; ++} ++ ++/*------------------------------------------------------------ ++ . Cleanup when module is removed with rmmod ++ .-------------------------------------------------------------*/ ++void __exit cleanup_module(void) ++{ ++ /* No need to check MOD_IN_USE, as sys_delete_module() checks. */ ++ rt_unregister_rtnetdev(devSMC91111); ++ rt_rtdev_disconnect(devSMC91111); ++ ++ release_region(devSMC91111->base_addr, SMC_IO_EXTENT); ++ ++ if (devSMC91111->priv) { ++ rtdm_irq_free(&((struct smc_local *)devSMC91111->priv)->irq_handle); ++ } ++ ++ rtdev_free(devSMC91111); ++} ++ ++#endif /* MODULE */ ++ ++ ++#ifdef DISABLED____CONFIG_SYSCTL ++ ++ ++/*------------------------------------------------------------ ++ . Modify a bit in the LAN91C111 register set ++ .-------------------------------------------------------------*/ ++static word smc_modify_regbit(int bank, int ioaddr, int reg, ++ unsigned int bit, int val) ++{ ++ word regval; ++ ++ SMC_SELECT_BANK( bank ); ++ ++ regval = inw( ioaddr+reg ); ++ if (val) ++ regval |= bit; ++ else ++ regval &= ~bit; ++ ++ outw( regval, ioaddr ); ++ return(regval); ++} ++ ++ ++/*------------------------------------------------------------ ++ . Retrieve a bit in the LAN91C111 register set ++ .-------------------------------------------------------------*/ ++static int smc_get_regbit(int bank, int ioaddr, int reg, unsigned int bit) ++{ ++ SMC_SELECT_BANK( bank ); ++ if ( inw( ioaddr+reg ) & bit) ++ return(1); ++ else ++ return(0); ++} ++ ++ ++/*------------------------------------------------------------ ++ . Modify a LAN91C111 register (word access only) ++ .-------------------------------------------------------------*/ ++static void smc_modify_reg(int bank, int ioaddr, int reg, word val) ++{ ++ SMC_SELECT_BANK( bank ); ++ outw( val, ioaddr+reg ); ++} ++ ++ ++/*------------------------------------------------------------ ++ . Retrieve a LAN91C111 register (word access only) ++ .-------------------------------------------------------------*/ ++static int smc_get_reg(int bank, int ioaddr, int reg) ++{ ++ SMC_SELECT_BANK( bank ); ++ return(inw( ioaddr+reg )); ++} ++ ++ ++static const char smc_info_string[] = ++"\n" ++"info Provides this information blurb\n" ++"swver Prints the software version information of this driver\n" ++"autoneg Auto-negotiate Mode = 1\n" ++"rspeed Requested Speed, 100=100Mbps, 10=10Mpbs\n" ++"rfduplx Requested Full Duplex Operation\n" ++"aspeed Actual Speed, 100=100Mbps, 10=10Mpbs\n" ++"afduplx Actual Full Duplex Operation\n" ++"lnkfail PHY Link Failure when 1\n" ++"miiop External MII when 1, Internal PHY when 0\n" ++"swfdup Switched Full Duplex Mode (allowed only in MII operation)\n" ++"ephloop EPH Block Loopback\n" ++"forcol Force a collision\n" ++"filtcar Filter leading edge of carrier sense for 12 bit times\n" ++"freemem Free buffer memory in bytes\n" ++"totmem Total buffer memory in bytes\n" ++"leda Output of LED-A (green)\n" ++"ledb Output of LED-B (yellow)\n" ++"chiprev Revision ID of the LAN91C111 chip\n" ++""; ++ ++/*------------------------------------------------------------ ++ . Sysctl handler for all integer parameters ++ .-------------------------------------------------------------*/ ++static int smc_sysctl_handler(ctl_table *ctl, int write, struct file * filp, ++ void *buffer, size_t *lenp, loff_t *ppos) ++{ ++ struct rtnet_device *dev = (struct rtnet_device*)ctl->extra1; ++ struct smc_local *lp = (struct smc_local *)ctl->extra2; ++ int ioaddr = dev->base_addr; ++ int *valp = ctl->data; ++ int val; ++ int ret; ++ ++ // Update parameters from the real registers ++ switch (ctl->ctl_name) ++ { ++ case CTL_SMC_FORCOL: ++ *valp = smc_get_regbit(0, ioaddr, TCR_REG, TCR_FORCOL); ++ break; ++ ++ case CTL_SMC_FREEMEM: ++ *valp = ( (word)smc_get_reg(0, ioaddr, MIR_REG) >> 8 ) ++ * LAN91C111_MEMORY_MULTIPLIER; ++ break; ++ ++ ++ case CTL_SMC_TOTMEM: ++ *valp = ( smc_get_reg(0, ioaddr, MIR_REG) & (word)0x00ff ) ++ * LAN91C111_MEMORY_MULTIPLIER; ++ break; ++ ++ case CTL_SMC_CHIPREV: ++ *valp = smc_get_reg(3, ioaddr, REV_REG); ++ break; ++ ++ case CTL_SMC_AFDUPLX: ++ *valp = (lp->lastPhy18 & PHY_INT_DPLXDET) ? 1 : 0; ++ break; ++ ++ case CTL_SMC_ASPEED: ++ *valp = (lp->lastPhy18 & PHY_INT_SPDDET) ? 100 : 10; ++ break; ++ ++ case CTL_SMC_LNKFAIL: ++ *valp = (lp->lastPhy18 & PHY_INT_LNKFAIL) ? 1 : 0; ++ break; ++ ++ case CTL_SMC_LEDA: ++ *valp = (lp->rpc_cur_mode >> RPC_LSXA_SHFT) & (word)0x0007; ++ break; ++ ++ case CTL_SMC_LEDB: ++ *valp = (lp->rpc_cur_mode >> RPC_LSXB_SHFT) & (word)0x0007; ++ break; ++ ++ case CTL_SMC_MIIOP: ++ *valp = smc_get_regbit(1, ioaddr, CONFIG_REG, CONFIG_EXT_PHY); ++ break; ++ ++#ifdef SMC_DEBUG ++ case CTL_SMC_REG_BSR: // Bank Select ++ *valp = smc_get_reg(0, ioaddr, BSR_REG); ++ break; ++ ++ case CTL_SMC_REG_TCR: // Transmit Control ++ *valp = smc_get_reg(0, ioaddr, TCR_REG); ++ break; ++ ++ case CTL_SMC_REG_ESR: // EPH Status ++ *valp = smc_get_reg(0, ioaddr, EPH_STATUS_REG); ++ break; ++ ++ case CTL_SMC_REG_RCR: // Receive Control ++ *valp = smc_get_reg(0, ioaddr, RCR_REG); ++ break; ++ ++ case CTL_SMC_REG_CTRR: // Counter ++ *valp = smc_get_reg(0, ioaddr, COUNTER_REG); ++ break; ++ ++ case CTL_SMC_REG_MIR: // Memory Information ++ *valp = smc_get_reg(0, ioaddr, MIR_REG); ++ break; ++ ++ case CTL_SMC_REG_RPCR: // Receive/Phy Control ++ *valp = smc_get_reg(0, ioaddr, RPC_REG); ++ break; ++ ++ case CTL_SMC_REG_CFGR: // Configuration ++ *valp = smc_get_reg(1, ioaddr, CONFIG_REG); ++ break; ++ ++ case CTL_SMC_REG_BAR: // Base Address ++ *valp = smc_get_reg(1, ioaddr, BASE_REG); ++ break; ++ ++ case CTL_SMC_REG_IAR0: // Individual Address ++ *valp = smc_get_reg(1, ioaddr, ADDR0_REG); ++ break; ++ ++ case CTL_SMC_REG_IAR1: // Individual Address ++ *valp = smc_get_reg(1, ioaddr, ADDR1_REG); ++ break; ++ ++ case CTL_SMC_REG_IAR2: // Individual Address ++ *valp = smc_get_reg(1, ioaddr, ADDR2_REG); ++ break; ++ ++ case CTL_SMC_REG_GPR: // General Purpose ++ *valp = smc_get_reg(1, ioaddr, GP_REG); ++ break; ++ ++ case CTL_SMC_REG_CTLR: // Control ++ *valp = smc_get_reg(1, ioaddr, CTL_REG); ++ break; ++ ++ case CTL_SMC_REG_MCR: // MMU Command ++ *valp = smc_get_reg(2, ioaddr, MMU_CMD_REG); ++ break; ++ ++ case CTL_SMC_REG_PNR: // Packet Number ++ *valp = smc_get_reg(2, ioaddr, PN_REG); ++ break; ++ ++ case CTL_SMC_REG_FPR: // Allocation Result/FIFO Ports ++ *valp = smc_get_reg(2, ioaddr, RXFIFO_REG); ++ break; ++ ++ case CTL_SMC_REG_PTR: // Pointer ++ *valp = smc_get_reg(2, ioaddr, PTR_REG); ++ break; ++ ++ case CTL_SMC_REG_DR: // Data ++ *valp = smc_get_reg(2, ioaddr, DATA_REG); ++ break; ++ ++ case CTL_SMC_REG_ISR: // Interrupt Status/Mask ++ *valp = smc_get_reg(2, ioaddr, INT_REG); ++ break; ++ ++ case CTL_SMC_REG_MTR1: // Multicast Table Entry 1 ++ *valp = smc_get_reg(3, ioaddr, MCAST_REG1); ++ break; ++ ++ case CTL_SMC_REG_MTR2: // Multicast Table Entry 2 ++ *valp = smc_get_reg(3, ioaddr, MCAST_REG2); ++ break; ++ ++ case CTL_SMC_REG_MTR3: // Multicast Table Entry 3 ++ *valp = smc_get_reg(3, ioaddr, MCAST_REG3); ++ break; ++ ++ case CTL_SMC_REG_MTR4: // Multicast Table Entry 4 ++ *valp = smc_get_reg(3, ioaddr, MCAST_REG4); ++ break; ++ ++ case CTL_SMC_REG_MIIR: // Management Interface ++ *valp = smc_get_reg(3, ioaddr, MII_REG); ++ break; ++ ++ case CTL_SMC_REG_REVR: // Revision ++ *valp = smc_get_reg(3, ioaddr, REV_REG); ++ break; ++ ++ case CTL_SMC_REG_ERCVR: // Early RCV ++ *valp = smc_get_reg(3, ioaddr, ERCV_REG); ++ break; ++ ++ case CTL_SMC_REG_EXTR: // External ++ *valp = smc_get_reg(7, ioaddr, EXT_REG); ++ break; ++ ++ case CTL_SMC_PHY_CTRL: ++ *valp = smc_read_phy_register(ioaddr, lp->phyaddr, ++ PHY_CNTL_REG); ++ break; ++ ++ case CTL_SMC_PHY_STAT: ++ *valp = smc_read_phy_register(ioaddr, lp->phyaddr, ++ PHY_STAT_REG); ++ break; ++ ++ case CTL_SMC_PHY_ID1: ++ *valp = smc_read_phy_register(ioaddr, lp->phyaddr, ++ PHY_ID1_REG); ++ break; ++ ++ case CTL_SMC_PHY_ID2: ++ *valp = smc_read_phy_register(ioaddr, lp->phyaddr, ++ PHY_ID2_REG); ++ break; ++ ++ case CTL_SMC_PHY_ADC: ++ *valp = smc_read_phy_register(ioaddr, lp->phyaddr, ++ PHY_AD_REG); ++ break; ++ ++ case CTL_SMC_PHY_REMC: ++ *valp = smc_read_phy_register(ioaddr, lp->phyaddr, ++ PHY_RMT_REG); ++ break; ++ ++ case CTL_SMC_PHY_CFG1: ++ *valp = smc_read_phy_register(ioaddr, lp->phyaddr, ++ PHY_CFG1_REG); ++ break; ++ ++ case CTL_SMC_PHY_CFG2: ++ *valp = smc_read_phy_register(ioaddr, lp->phyaddr, ++ PHY_CFG2_REG); ++ break; ++ ++ case CTL_SMC_PHY_INT: ++ *valp = smc_read_phy_register(ioaddr, lp->phyaddr, ++ PHY_INT_REG); ++ break; ++ ++ case CTL_SMC_PHY_MASK: ++ *valp = smc_read_phy_register(ioaddr, lp->phyaddr, ++ PHY_MASK_REG); ++ break; ++ ++#endif // SMC_DEBUG ++ ++ default: ++ // Just ignore unsupported parameters ++ break; ++ } ++ ++ // Save old state ++ val = *valp; ++ ++ // Perform the generic integer operation ++ if ((ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos)) != 0) ++ return(ret); ++ ++ // Write changes out to the registers ++ if (write && *valp != val) { ++ ++ val = *valp; ++ switch (ctl->ctl_name) { ++ ++ case CTL_SMC_SWFDUP: ++ if (val) ++ lp->tcr_cur_mode |= TCR_SWFDUP; ++ else ++ lp->tcr_cur_mode &= ~TCR_SWFDUP; ++ ++ smc_modify_regbit(0, ioaddr, TCR_REG, TCR_SWFDUP, val); ++ break; ++ ++ case CTL_SMC_EPHLOOP: ++ if (val) ++ lp->tcr_cur_mode |= TCR_EPH_LOOP; ++ else ++ lp->tcr_cur_mode &= ~TCR_EPH_LOOP; ++ ++ smc_modify_regbit(0, ioaddr, TCR_REG, TCR_EPH_LOOP, val); ++ break; ++ ++ case CTL_SMC_FORCOL: ++ if (val) ++ lp->tcr_cur_mode |= TCR_FORCOL; ++ else ++ lp->tcr_cur_mode &= ~TCR_FORCOL; ++ ++ // Update the EPH block ++ smc_modify_regbit(0, ioaddr, TCR_REG, TCR_FORCOL, val); ++ break; ++ ++ case CTL_SMC_FILTCAR: ++ if (val) ++ lp->rcr_cur_mode |= RCR_FILT_CAR; ++ else ++ lp->rcr_cur_mode &= ~RCR_FILT_CAR; ++ ++ // Update the EPH block ++ smc_modify_regbit(0, ioaddr, RCR_REG, RCR_FILT_CAR, val); ++ break; ++ ++ case CTL_SMC_RFDUPLX: ++ // Disallow changes if in auto-negotiation mode ++ if (lp->ctl_autoneg) ++ break; ++ ++ if (val) ++ { ++ lp->rpc_cur_mode |= RPC_DPLX; ++ } ++ else ++ { ++ lp->rpc_cur_mode &= ~RPC_DPLX; ++ } ++ ++ // Reconfigure the PHY ++ smc_phy_configure(dev); ++ ++ break; ++ ++ case CTL_SMC_RSPEED: ++ // Disallow changes if in auto-negotiation mode ++ if (lp->ctl_autoneg) ++ break; ++ ++ if (val > 10) ++ lp->rpc_cur_mode |= RPC_SPEED; ++ else ++ lp->rpc_cur_mode &= ~RPC_SPEED; ++ ++ // Reconfigure the PHY ++ smc_phy_configure(dev); ++ ++ break; ++ ++ case CTL_SMC_AUTONEG: ++ if (val) ++ lp->rpc_cur_mode |= RPC_ANEG; ++ else ++ lp->rpc_cur_mode &= ~RPC_ANEG; ++ ++ // Reconfigure the PHY ++ smc_phy_configure(dev); ++ ++ break; ++ ++ case CTL_SMC_LEDA: ++ val &= 0x07; // Restrict to 3 ls bits ++ lp->rpc_cur_mode &= ~(word)(0x07<rpc_cur_mode |= (word)(val<rpc_cur_mode); ++ break; ++ ++ case CTL_SMC_LEDB: ++ val &= 0x07; // Restrict to 3 ls bits ++ lp->rpc_cur_mode &= ~(word)(0x07<rpc_cur_mode |= (word)(val<rpc_cur_mode); ++ break; ++ ++ case CTL_SMC_MIIOP: ++ // Update the Internal PHY block ++ smc_modify_regbit(1, ioaddr, CONFIG_REG, ++ CONFIG_EXT_PHY, val); ++ break; ++ ++#ifdef SMC_DEBUG ++ case CTL_SMC_REG_BSR: // Bank Select ++ smc_modify_reg(0, ioaddr, BSR_REG, val); ++ break; ++ ++ case CTL_SMC_REG_TCR: // Transmit Control ++ smc_modify_reg(0, ioaddr, TCR_REG, val); ++ break; ++ ++ case CTL_SMC_REG_ESR: // EPH Status ++ smc_modify_reg(0, ioaddr, EPH_STATUS_REG, val); ++ break; ++ ++ case CTL_SMC_REG_RCR: // Receive Control ++ smc_modify_reg(0, ioaddr, RCR_REG, val); ++ break; ++ ++ case CTL_SMC_REG_CTRR: // Counter ++ smc_modify_reg(0, ioaddr, COUNTER_REG, val); ++ break; ++ ++ case CTL_SMC_REG_MIR: // Memory Information ++ smc_modify_reg(0, ioaddr, MIR_REG, val); ++ break; ++ ++ case CTL_SMC_REG_RPCR: // Receive/Phy Control ++ smc_modify_reg(0, ioaddr, RPC_REG, val); ++ break; ++ ++ case CTL_SMC_REG_CFGR: // Configuration ++ smc_modify_reg(1, ioaddr, CONFIG_REG, val); ++ break; ++ ++ case CTL_SMC_REG_BAR: // Base Address ++ smc_modify_reg(1, ioaddr, BASE_REG, val); ++ break; ++ ++ case CTL_SMC_REG_IAR0: // Individual Address ++ smc_modify_reg(1, ioaddr, ADDR0_REG, val); ++ break; ++ ++ case CTL_SMC_REG_IAR1: // Individual Address ++ smc_modify_reg(1, ioaddr, ADDR1_REG, val); ++ break; ++ ++ case CTL_SMC_REG_IAR2: // Individual Address ++ smc_modify_reg(1, ioaddr, ADDR2_REG, val); ++ break; ++ ++ case CTL_SMC_REG_GPR: // General Purpose ++ smc_modify_reg(1, ioaddr, GP_REG, val); ++ break; ++ ++ case CTL_SMC_REG_CTLR: // Control ++ smc_modify_reg(1, ioaddr, CTL_REG, val); ++ break; ++ ++ case CTL_SMC_REG_MCR: // MMU Command ++ smc_modify_reg(2, ioaddr, MMU_CMD_REG, val); ++ break; ++ ++ case CTL_SMC_REG_PNR: // Packet Number ++ smc_modify_reg(2, ioaddr, PN_REG, val); ++ break; ++ ++ case CTL_SMC_REG_FPR: // Allocation Result/FIFO Ports ++ smc_modify_reg(2, ioaddr, RXFIFO_REG, val); ++ break; ++ ++ case CTL_SMC_REG_PTR: // Pointer ++ smc_modify_reg(2, ioaddr, PTR_REG, val); ++ break; ++ ++ case CTL_SMC_REG_DR: // Data ++ smc_modify_reg(2, ioaddr, DATA_REG, val); ++ break; ++ ++ case CTL_SMC_REG_ISR: // Interrupt Status/Mask ++ smc_modify_reg(2, ioaddr, INT_REG, val); ++ break; ++ ++ case CTL_SMC_REG_MTR1: // Multicast Table Entry 1 ++ smc_modify_reg(3, ioaddr, MCAST_REG1, val); ++ break; ++ ++ case CTL_SMC_REG_MTR2: // Multicast Table Entry 2 ++ smc_modify_reg(3, ioaddr, MCAST_REG2, val); ++ break; ++ ++ case CTL_SMC_REG_MTR3: // Multicast Table Entry 3 ++ smc_modify_reg(3, ioaddr, MCAST_REG3, val); ++ break; ++ ++ case CTL_SMC_REG_MTR4: // Multicast Table Entry 4 ++ smc_modify_reg(3, ioaddr, MCAST_REG4, val); ++ break; ++ ++ case CTL_SMC_REG_MIIR: // Management Interface ++ smc_modify_reg(3, ioaddr, MII_REG, val); ++ break; ++ ++ case CTL_SMC_REG_REVR: // Revision ++ smc_modify_reg(3, ioaddr, REV_REG, val); ++ break; ++ ++ case CTL_SMC_REG_ERCVR: // Early RCV ++ smc_modify_reg(3, ioaddr, ERCV_REG, val); ++ break; ++ ++ case CTL_SMC_REG_EXTR: // External ++ smc_modify_reg(7, ioaddr, EXT_REG, val); ++ break; ++ ++ case CTL_SMC_PHY_CTRL: ++ smc_write_phy_register(ioaddr, lp->phyaddr, ++ PHY_CNTL_REG, val); ++ break; ++ ++ case CTL_SMC_PHY_STAT: ++ smc_write_phy_register(ioaddr, lp->phyaddr, ++ PHY_STAT_REG, val); ++ break; ++ ++ case CTL_SMC_PHY_ID1: ++ smc_write_phy_register(ioaddr, lp->phyaddr, ++ PHY_ID1_REG, val); ++ break; ++ ++ case CTL_SMC_PHY_ID2: ++ smc_write_phy_register(ioaddr, lp->phyaddr, ++ PHY_ID2_REG, val); ++ break; ++ ++ case CTL_SMC_PHY_ADC: ++ smc_write_phy_register(ioaddr, lp->phyaddr, ++ PHY_AD_REG, val); ++ break; ++ ++ case CTL_SMC_PHY_REMC: ++ smc_write_phy_register(ioaddr, lp->phyaddr, ++ PHY_RMT_REG, val); ++ break; ++ ++ case CTL_SMC_PHY_CFG1: ++ smc_write_phy_register(ioaddr, lp->phyaddr, ++ PHY_CFG1_REG, val); ++ break; ++ ++ case CTL_SMC_PHY_CFG2: ++ smc_write_phy_register(ioaddr, lp->phyaddr, ++ PHY_CFG2_REG, val); ++ break; ++ ++ case CTL_SMC_PHY_INT: ++ smc_write_phy_register(ioaddr, lp->phyaddr, ++ PHY_INT_REG, val); ++ break; ++ ++ case CTL_SMC_PHY_MASK: ++ smc_write_phy_register(ioaddr, lp->phyaddr, ++ PHY_MASK_REG, val); ++ break; ++ ++#endif // SMC_DEBUG ++ ++ default: ++ // Just ignore unsupported parameters ++ break; ++ } // end switch ++ ++ } // end if ++ ++ return ret; ++} ++ ++/*------------------------------------------------------------ ++ . Sysctl registration function for all parameters (files) ++ .-------------------------------------------------------------*/ ++static void smc_sysctl_register(struct rtnet_device *dev) ++{ ++ struct smc_local *lp = (struct smc_local *)dev->priv; ++ static int ctl_name = CTL_SMC; ++ ctl_table* ct; ++ int i; ++ ++ // Make sure the ctl_tables start out as all zeros ++ memset(lp->root_table, 0, sizeof lp->root_table); ++ memset(lp->eth_table, 0, sizeof lp->eth_table); ++ memset(lp->param_table, 0, sizeof lp->param_table); ++ ++ // Initialize the root table ++ ct = lp->root_table; ++ ct->ctl_name = CTL_DEV; ++ ct->procname = "dev"; ++ ct->maxlen = 0; ++ ct->mode = 0555; ++ ct->child = lp->eth_table; ++ // remaining fields are zero ++ ++ // Initialize the ethX table (this device's table) ++ ct = lp->eth_table; ++ ct->ctl_name = ctl_name++; // Must be unique ++ ct->procname = dev->name; ++ ct->maxlen = 0; ++ ct->mode = 0555; ++ ct->child = lp->param_table; ++ // remaining fields are zero ++ ++ // Initialize the parameter (files) table ++ // Make sure the last entry remains null ++ ct = lp->param_table; ++ for (i = 0; i < (CTL_SMC_LAST_ENTRY-1); ++i) ++ { ++ // Initialize fields common to all table entries ++ ct[i].proc_handler = smc_sysctl_handler; ++ ct[i].extra1 = (void*)dev; // Save our device pointer ++ ct[i].extra2 = (void*)lp; // Save our smc_local data pointer ++ } ++ ++ // INFO - this is our only string parameter ++ i = 0; ++ ct[i].proc_handler = proc_dostring; // use default handler ++ ct[i].ctl_name = CTL_SMC_INFO; ++ ct[i].procname = "info"; ++ ct[i].data = (void*)smc_info_string; ++ ct[i].maxlen = sizeof smc_info_string; ++ ct[i].mode = 0444; // Read only ++ ++ // SWVER ++ ++i; ++ ct[i].proc_handler = proc_dostring; // use default handler ++ ct[i].ctl_name = CTL_SMC_SWVER; ++ ct[i].procname = "swver"; ++ ct[i].data = (void*)version; ++ ct[i].maxlen = sizeof version; ++ ct[i].mode = 0444; // Read only ++ ++ // SWFDUP ++ ++i; ++ ct[i].ctl_name = CTL_SMC_SWFDUP; ++ ct[i].procname = "swfdup"; ++ ct[i].data = (void*)&(lp->ctl_swfdup); ++ ct[i].maxlen = sizeof lp->ctl_swfdup; ++ ct[i].mode = 0644; // Read by all, write by root ++ ++ // EPHLOOP ++ ++i; ++ ct[i].ctl_name = CTL_SMC_EPHLOOP; ++ ct[i].procname = "ephloop"; ++ ct[i].data = (void*)&(lp->ctl_ephloop); ++ ct[i].maxlen = sizeof lp->ctl_ephloop; ++ ct[i].mode = 0644; // Read by all, write by root ++ ++ // MIIOP ++ ++i; ++ ct[i].ctl_name = CTL_SMC_MIIOP; ++ ct[i].procname = "miiop"; ++ ct[i].data = (void*)&(lp->ctl_miiop); ++ ct[i].maxlen = sizeof lp->ctl_miiop; ++ ct[i].mode = 0644; // Read by all, write by root ++ ++ // AUTONEG ++ ++i; ++ ct[i].ctl_name = CTL_SMC_AUTONEG; ++ ct[i].procname = "autoneg"; ++ ct[i].data = (void*)&(lp->ctl_autoneg); ++ ct[i].maxlen = sizeof lp->ctl_autoneg; ++ ct[i].mode = 0644; // Read by all, write by root ++ ++ // RFDUPLX ++ ++i; ++ ct[i].ctl_name = CTL_SMC_RFDUPLX; ++ ct[i].procname = "rfduplx"; ++ ct[i].data = (void*)&(lp->ctl_rfduplx); ++ ct[i].maxlen = sizeof lp->ctl_rfduplx; ++ ct[i].mode = 0644; // Read by all, write by root ++ ++ // RSPEED ++ ++i; ++ ct[i].ctl_name = CTL_SMC_RSPEED; ++ ct[i].procname = "rspeed"; ++ ct[i].data = (void*)&(lp->ctl_rspeed); ++ ct[i].maxlen = sizeof lp->ctl_rspeed; ++ ct[i].mode = 0644; // Read by all, write by root ++ ++ // AFDUPLX ++ ++i; ++ ct[i].ctl_name = CTL_SMC_AFDUPLX; ++ ct[i].procname = "afduplx"; ++ ct[i].data = (void*)&(lp->ctl_afduplx); ++ ct[i].maxlen = sizeof lp->ctl_afduplx; ++ ct[i].mode = 0444; // Read only ++ ++ // ASPEED ++ ++i; ++ ct[i].ctl_name = CTL_SMC_ASPEED; ++ ct[i].procname = "aspeed"; ++ ct[i].data = (void*)&(lp->ctl_aspeed); ++ ct[i].maxlen = sizeof lp->ctl_aspeed; ++ ct[i].mode = 0444; // Read only ++ ++ // LNKFAIL ++ ++i; ++ ct[i].ctl_name = CTL_SMC_LNKFAIL; ++ ct[i].procname = "lnkfail"; ++ ct[i].data = (void*)&(lp->ctl_lnkfail); ++ ct[i].maxlen = sizeof lp->ctl_lnkfail; ++ ct[i].mode = 0444; // Read only ++ ++ // FORCOL ++ ++i; ++ ct[i].ctl_name = CTL_SMC_FORCOL; ++ ct[i].procname = "forcol"; ++ ct[i].data = (void*)&(lp->ctl_forcol); ++ ct[i].maxlen = sizeof lp->ctl_forcol; ++ ct[i].mode = 0644; // Read by all, write by root ++ ++ // FILTCAR ++ ++i; ++ ct[i].ctl_name = CTL_SMC_FILTCAR; ++ ct[i].procname = "filtcar"; ++ ct[i].data = (void*)&(lp->ctl_filtcar); ++ ct[i].maxlen = sizeof lp->ctl_filtcar; ++ ct[i].mode = 0644; // Read by all, write by root ++ ++ // FREEMEM ++ ++i; ++ ct[i].ctl_name = CTL_SMC_FREEMEM; ++ ct[i].procname = "freemem"; ++ ct[i].data = (void*)&(lp->ctl_freemem); ++ ct[i].maxlen = sizeof lp->ctl_freemem; ++ ct[i].mode = 0444; // Read only ++ ++ // TOTMEM ++ ++i; ++ ct[i].ctl_name = CTL_SMC_TOTMEM; ++ ct[i].procname = "totmem"; ++ ct[i].data = (void*)&(lp->ctl_totmem); ++ ct[i].maxlen = sizeof lp->ctl_totmem; ++ ct[i].mode = 0444; // Read only ++ ++ // LEDA ++ ++i; ++ ct[i].ctl_name = CTL_SMC_LEDA; ++ ct[i].procname = "leda"; ++ ct[i].data = (void*)&(lp->ctl_leda); ++ ct[i].maxlen = sizeof lp->ctl_leda; ++ ct[i].mode = 0644; // Read by all, write by root ++ ++ // LEDB ++ ++i; ++ ct[i].ctl_name = CTL_SMC_LEDB; ++ ct[i].procname = "ledb"; ++ ct[i].data = (void*)&(lp->ctl_ledb); ++ ct[i].maxlen = sizeof lp->ctl_ledb; ++ ct[i].mode = 0644; // Read by all, write by root ++ ++ // CHIPREV ++ ++i; ++ ct[i].ctl_name = CTL_SMC_CHIPREV; ++ ct[i].procname = "chiprev"; ++ ct[i].data = (void*)&(lp->ctl_chiprev); ++ ct[i].maxlen = sizeof lp->ctl_chiprev; ++ ct[i].mode = 0444; // Read only ++ ++#ifdef SMC_DEBUG ++ // REG_BSR ++ ++i; ++ ct[i].ctl_name = CTL_SMC_REG_BSR; ++ ct[i].procname = "reg_bsr"; ++ ct[i].data = (void*)&(lp->ctl_reg_bsr); ++ ct[i].maxlen = sizeof lp->ctl_reg_bsr; ++ ct[i].mode = 0644; // Read by all, write by root ++ ++ // REG_TCR ++ ++i; ++ ct[i].ctl_name = CTL_SMC_REG_TCR; ++ ct[i].procname = "reg_tcr"; ++ ct[i].data = (void*)&(lp->ctl_reg_tcr); ++ ct[i].maxlen = sizeof lp->ctl_reg_tcr; ++ ct[i].mode = 0644; // Read by all, write by root ++ ++ // REG_ESR ++ ++i; ++ ct[i].ctl_name = CTL_SMC_REG_ESR; ++ ct[i].procname = "reg_esr"; ++ ct[i].data = (void*)&(lp->ctl_reg_esr); ++ ct[i].maxlen = sizeof lp->ctl_reg_esr; ++ ct[i].mode = 0644; // Read by all, write by root ++ ++ // REG_RCR ++ ++i; ++ ct[i].ctl_name = CTL_SMC_REG_RCR; ++ ct[i].procname = "reg_rcr"; ++ ct[i].data = (void*)&(lp->ctl_reg_rcr); ++ ct[i].maxlen = sizeof lp->ctl_reg_rcr; ++ ct[i].mode = 0644; // Read by all, write by root ++ ++ // REG_CTRR ++ ++i; ++ ct[i].ctl_name = CTL_SMC_REG_CTRR; ++ ct[i].procname = "reg_ctrr"; ++ ct[i].data = (void*)&(lp->ctl_reg_ctrr); ++ ct[i].maxlen = sizeof lp->ctl_reg_ctrr; ++ ct[i].mode = 0644; // Read by all, write by root ++ ++ // REG_MIR ++ ++i; ++ ct[i].ctl_name = CTL_SMC_REG_MIR; ++ ct[i].procname = "reg_mir"; ++ ct[i].data = (void*)&(lp->ctl_reg_mir); ++ ct[i].maxlen = sizeof lp->ctl_reg_mir; ++ ct[i].mode = 0644; // Read by all, write by root ++ ++ // REG_RPCR ++ ++i; ++ ct[i].ctl_name = CTL_SMC_REG_RPCR; ++ ct[i].procname = "reg_rpcr"; ++ ct[i].data = (void*)&(lp->ctl_reg_rpcr); ++ ct[i].maxlen = sizeof lp->ctl_reg_rpcr; ++ ct[i].mode = 0644; // Read by all, write by root ++ ++ // REG_CFGR ++ ++i; ++ ct[i].ctl_name = CTL_SMC_REG_CFGR; ++ ct[i].procname = "reg_cfgr"; ++ ct[i].data = (void*)&(lp->ctl_reg_cfgr); ++ ct[i].maxlen = sizeof lp->ctl_reg_cfgr; ++ ct[i].mode = 0644; // Read by all, write by root ++ ++ // REG_BAR ++ ++i; ++ ct[i].ctl_name = CTL_SMC_REG_BAR; ++ ct[i].procname = "reg_bar"; ++ ct[i].data = (void*)&(lp->ctl_reg_bar); ++ ct[i].maxlen = sizeof lp->ctl_reg_bar; ++ ct[i].mode = 0644; // Read by all, write by root ++ ++ // REG_IAR0 ++ ++i; ++ ct[i].ctl_name = CTL_SMC_REG_IAR0; ++ ct[i].procname = "reg_iar0"; ++ ct[i].data = (void*)&(lp->ctl_reg_iar0); ++ ct[i].maxlen = sizeof lp->ctl_reg_iar0; ++ ct[i].mode = 0644; // Read by all, write by root ++ ++ // REG_IAR1 ++ ++i; ++ ct[i].ctl_name = CTL_SMC_REG_IAR1; ++ ct[i].procname = "reg_iar1"; ++ ct[i].data = (void*)&(lp->ctl_reg_iar1); ++ ct[i].maxlen = sizeof lp->ctl_reg_iar1; ++ ct[i].mode = 0644; // Read by all, write by root ++ ++ // REG_IAR2 ++ ++i; ++ ct[i].ctl_name = CTL_SMC_REG_IAR2; ++ ct[i].procname = "reg_iar2"; ++ ct[i].data = (void*)&(lp->ctl_reg_iar2); ++ ct[i].maxlen = sizeof lp->ctl_reg_iar2; ++ ct[i].mode = 0644; // Read by all, write by root ++ ++ // REG_GPR ++ ++i; ++ ct[i].ctl_name = CTL_SMC_REG_GPR; ++ ct[i].procname = "reg_gpr"; ++ ct[i].data = (void*)&(lp->ctl_reg_gpr); ++ ct[i].maxlen = sizeof lp->ctl_reg_gpr; ++ ct[i].mode = 0644; // Read by all, write by root ++ ++ // REG_CTLR ++ ++i; ++ ct[i].ctl_name = CTL_SMC_REG_CTLR; ++ ct[i].procname = "reg_ctlr"; ++ ct[i].data = (void*)&(lp->ctl_reg_ctlr); ++ ct[i].maxlen = sizeof lp->ctl_reg_ctlr; ++ ct[i].mode = 0644; // Read by all, write by root ++ ++ // REG_MCR ++ ++i; ++ ct[i].ctl_name = CTL_SMC_REG_MCR; ++ ct[i].procname = "reg_mcr"; ++ ct[i].data = (void*)&(lp->ctl_reg_mcr); ++ ct[i].maxlen = sizeof lp->ctl_reg_mcr; ++ ct[i].mode = 0644; // Read by all, write by root ++ ++ // REG_PNR ++ ++i; ++ ct[i].ctl_name = CTL_SMC_REG_PNR; ++ ct[i].procname = "reg_pnr"; ++ ct[i].data = (void*)&(lp->ctl_reg_pnr); ++ ct[i].maxlen = sizeof lp->ctl_reg_pnr; ++ ct[i].mode = 0644; // Read by all, write by root ++ ++ // REG_FPR ++ ++i; ++ ct[i].ctl_name = CTL_SMC_REG_FPR; ++ ct[i].procname = "reg_fpr"; ++ ct[i].data = (void*)&(lp->ctl_reg_fpr); ++ ct[i].maxlen = sizeof lp->ctl_reg_fpr; ++ ct[i].mode = 0644; // Read by all, write by root ++ ++ // REG_PTR ++ ++i; ++ ct[i].ctl_name = CTL_SMC_REG_PTR; ++ ct[i].procname = "reg_ptr"; ++ ct[i].data = (void*)&(lp->ctl_reg_ptr); ++ ct[i].maxlen = sizeof lp->ctl_reg_ptr; ++ ct[i].mode = 0644; // Read by all, write by root ++ ++ // REG_DR ++ ++i; ++ ct[i].ctl_name = CTL_SMC_REG_DR; ++ ct[i].procname = "reg_dr"; ++ ct[i].data = (void*)&(lp->ctl_reg_dr); ++ ct[i].maxlen = sizeof lp->ctl_reg_dr; ++ ct[i].mode = 0644; // Read by all, write by root ++ ++ // REG_ISR ++ ++i; ++ ct[i].ctl_name = CTL_SMC_REG_ISR; ++ ct[i].procname = "reg_isr"; ++ ct[i].data = (void*)&(lp->ctl_reg_isr); ++ ct[i].maxlen = sizeof lp->ctl_reg_isr; ++ ct[i].mode = 0644; // Read by all, write by root ++ ++ // REG_MTR1 ++ ++i; ++ ct[i].ctl_name = CTL_SMC_REG_MTR1; ++ ct[i].procname = "reg_mtr1"; ++ ct[i].data = (void*)&(lp->ctl_reg_mtr1); ++ ct[i].maxlen = sizeof lp->ctl_reg_mtr1; ++ ct[i].mode = 0644; // Read by all, write by root ++ ++ // REG_MTR2 ++ ++i; ++ ct[i].ctl_name = CTL_SMC_REG_MTR2; ++ ct[i].procname = "reg_mtr2"; ++ ct[i].data = (void*)&(lp->ctl_reg_mtr2); ++ ct[i].maxlen = sizeof lp->ctl_reg_mtr2; ++ ct[i].mode = 0644; // Read by all, write by root ++ ++ // REG_MTR3 ++ ++i; ++ ct[i].ctl_name = CTL_SMC_REG_MTR3; ++ ct[i].procname = "reg_mtr3"; ++ ct[i].data = (void*)&(lp->ctl_reg_mtr3); ++ ct[i].maxlen = sizeof lp->ctl_reg_mtr3; ++ ct[i].mode = 0644; // Read by all, write by root ++ ++ // REG_MTR4 ++ ++i; ++ ct[i].ctl_name = CTL_SMC_REG_MTR4; ++ ct[i].procname = "reg_mtr4"; ++ ct[i].data = (void*)&(lp->ctl_reg_mtr4); ++ ct[i].maxlen = sizeof lp->ctl_reg_mtr4; ++ ct[i].mode = 0644; // Read by all, write by root ++ ++ // REG_MIIR ++ ++i; ++ ct[i].ctl_name = CTL_SMC_REG_MIIR; ++ ct[i].procname = "reg_miir"; ++ ct[i].data = (void*)&(lp->ctl_reg_miir); ++ ct[i].maxlen = sizeof lp->ctl_reg_miir; ++ ct[i].mode = 0644; // Read by all, write by root ++ ++ // REG_REVR ++ ++i; ++ ct[i].ctl_name = CTL_SMC_REG_REVR; ++ ct[i].procname = "reg_revr"; ++ ct[i].data = (void*)&(lp->ctl_reg_revr); ++ ct[i].maxlen = sizeof lp->ctl_reg_revr; ++ ct[i].mode = 0644; // Read by all, write by root ++ ++ // REG_ERCVR ++ ++i; ++ ct[i].ctl_name = CTL_SMC_REG_ERCVR; ++ ct[i].procname = "reg_ercvr"; ++ ct[i].data = (void*)&(lp->ctl_reg_ercvr); ++ ct[i].maxlen = sizeof lp->ctl_reg_ercvr; ++ ct[i].mode = 0644; // Read by all, write by root ++ ++ // REG_EXTR ++ ++i; ++ ct[i].ctl_name = CTL_SMC_REG_EXTR; ++ ct[i].procname = "reg_extr"; ++ ct[i].data = (void*)&(lp->ctl_reg_extr); ++ ct[i].maxlen = sizeof lp->ctl_reg_extr; ++ ct[i].mode = 0644; // Read by all, write by root ++ ++ // PHY Control ++ ++i; ++ ct[i].ctl_name = CTL_SMC_PHY_CTRL; ++ ct[i].procname = "phy_ctrl"; ++ ct[i].data = (void*)&(lp->ctl_phy_ctrl); ++ ct[i].maxlen = sizeof lp->ctl_phy_ctrl; ++ ct[i].mode = 0644; // Read by all, write by root ++ ++ // PHY Status ++ ++i; ++ ct[i].ctl_name = CTL_SMC_PHY_STAT; ++ ct[i].procname = "phy_stat"; ++ ct[i].data = (void*)&(lp->ctl_phy_stat); ++ ct[i].maxlen = sizeof lp->ctl_phy_stat; ++ ct[i].mode = 0644; // Read by all, write by root ++ ++ // PHY ID1 ++ ++i; ++ ct[i].ctl_name = CTL_SMC_PHY_ID1; ++ ct[i].procname = "phy_id1"; ++ ct[i].data = (void*)&(lp->ctl_phy_id1); ++ ct[i].maxlen = sizeof lp->ctl_phy_id1; ++ ct[i].mode = 0644; // Read by all, write by root ++ ++ // PHY ID2 ++ ++i; ++ ct[i].ctl_name = CTL_SMC_PHY_ID2; ++ ct[i].procname = "phy_id2"; ++ ct[i].data = (void*)&(lp->ctl_phy_id2); ++ ct[i].maxlen = sizeof lp->ctl_phy_id2; ++ ct[i].mode = 0644; // Read by all, write by root ++ ++ // PHY Advertise Capabilities ++ ++i; ++ ct[i].ctl_name = CTL_SMC_PHY_ADC; ++ ct[i].procname = "phy_adc"; ++ ct[i].data = (void*)&(lp->ctl_phy_adc); ++ ct[i].maxlen = sizeof lp->ctl_phy_adc; ++ ct[i].mode = 0644; // Read by all, write by root ++ ++ // PHY Remote Capabilities ++ ++i; ++ ct[i].ctl_name = CTL_SMC_PHY_REMC; ++ ct[i].procname = "phy_remc"; ++ ct[i].data = (void*)&(lp->ctl_phy_remc); ++ ct[i].maxlen = sizeof lp->ctl_phy_remc; ++ ct[i].mode = 0644; // Read by all, write by root ++ ++ // PHY Configuration 1 ++ ++i; ++ ct[i].ctl_name = CTL_SMC_PHY_CFG1; ++ ct[i].procname = "phy_cfg1"; ++ ct[i].data = (void*)&(lp->ctl_phy_cfg1); ++ ct[i].maxlen = sizeof lp->ctl_phy_cfg1; ++ ct[i].mode = 0644; // Read by all, write by root ++ ++ // PHY Configuration 2 ++ ++i; ++ ct[i].ctl_name = CTL_SMC_PHY_CFG2; ++ ct[i].procname = "phy_cfg2"; ++ ct[i].data = (void*)&(lp->ctl_phy_cfg2); ++ ct[i].maxlen = sizeof lp->ctl_phy_cfg2; ++ ct[i].mode = 0644; // Read by all, write by root ++ ++ // PHY Interrupt/Status Output ++ ++i; ++ ct[i].ctl_name = CTL_SMC_PHY_INT; ++ ct[i].procname = "phy_int"; ++ ct[i].data = (void*)&(lp->ctl_phy_int); ++ ct[i].maxlen = sizeof lp->ctl_phy_int; ++ ct[i].mode = 0644; // Read by all, write by root ++ ++ // PHY Interrupt/Status Mask ++ ++i; ++ ct[i].ctl_name = CTL_SMC_PHY_MASK; ++ ct[i].procname = "phy_mask"; ++ ct[i].data = (void*)&(lp->ctl_phy_mask); ++ ct[i].maxlen = sizeof lp->ctl_phy_mask; ++ ct[i].mode = 0644; // Read by all, write by root ++ ++#endif // SMC_DEBUG ++ ++ // Register /proc/sys/dev/ethX ++ lp->sysctl_header = register_sysctl_table(lp->root_table, 1); ++} ++ ++ ++/*------------------------------------------------------------ ++ . Sysctl unregistration when driver is closed ++ .-------------------------------------------------------------*/ ++static void smc_sysctl_unregister(struct rtnet_device *dev) ++{ ++ struct smc_local *lp = (struct smc_local *)dev->priv; ++ ++ unregister_sysctl_table(lp->sysctl_header); ++} ++ ++#endif /* endif CONFIG_SYSCTL */ ++ ++ ++//---PHY CONTROL AND CONFIGURATION----------------------------------------- ++ ++#if defined(SMC_DEBUG) && (SMC_DEBUG > 2 ) ++ ++/*------------------------------------------------------------ ++ . Debugging function for viewing MII Management serial bitstream ++ .-------------------------------------------------------------*/ ++static void smc_dump_mii_stream(byte* bits, int size) ++{ ++ int i; ++ ++ printk("BIT#:"); ++ for (i = 0; i < size; ++i) ++ { ++ printk("%d", i%10); ++ } ++ ++ printk("\nMDOE:"); ++ for (i = 0; i < size; ++i) ++ { ++ if (bits[i] & MII_MDOE) ++ printk("1"); ++ else ++ printk("0"); ++ } ++ ++ printk("\nMDO :"); ++ for (i = 0; i < size; ++i) ++ { ++ if (bits[i] & MII_MDO) ++ printk("1"); ++ else ++ printk("0"); ++ } ++ ++ printk("\nMDI :"); ++ for (i = 0; i < size; ++i) ++ { ++ if (bits[i] & MII_MDI) ++ printk("1"); ++ else ++ printk("0"); ++ } ++ ++ printk("\n"); ++} ++#endif ++ ++/*------------------------------------------------------------ ++ . Reads a register from the MII Management serial interface ++ .-------------------------------------------------------------*/ ++static word smc_read_phy_register(int ioaddr, byte phyaddr, byte phyreg) ++{ ++ int oldBank; ++ int i; ++ byte mask; ++ word mii_reg; ++ byte bits[64]; ++ int clk_idx = 0; ++ int input_idx; ++ word phydata; ++ ++ // 32 consecutive ones on MDO to establish sync ++ for (i = 0; i < 32; ++i) ++ bits[clk_idx++] = MII_MDOE | MII_MDO; ++ ++ // Start code <01> ++ bits[clk_idx++] = MII_MDOE; ++ bits[clk_idx++] = MII_MDOE | MII_MDO; ++ ++ // Read command <10> ++ bits[clk_idx++] = MII_MDOE | MII_MDO; ++ bits[clk_idx++] = MII_MDOE; ++ ++ // Output the PHY address, msb first ++ mask = (byte)0x10; ++ for (i = 0; i < 5; ++i) ++ { ++ if (phyaddr & mask) ++ bits[clk_idx++] = MII_MDOE | MII_MDO; ++ else ++ bits[clk_idx++] = MII_MDOE; ++ ++ // Shift to next lowest bit ++ mask >>= 1; ++ } ++ ++ // Output the phy register number, msb first ++ mask = (byte)0x10; ++ for (i = 0; i < 5; ++i) ++ { ++ if (phyreg & mask) ++ bits[clk_idx++] = MII_MDOE | MII_MDO; ++ else ++ bits[clk_idx++] = MII_MDOE; ++ ++ // Shift to next lowest bit ++ mask >>= 1; ++ } ++ ++ // Tristate and turnaround (2 bit times) ++ bits[clk_idx++] = 0; ++ //bits[clk_idx++] = 0; ++ ++ // Input starts at this bit time ++ input_idx = clk_idx; ++ ++ // Will input 16 bits ++ for (i = 0; i < 16; ++i) ++ bits[clk_idx++] = 0; ++ ++ // Final clock bit ++ bits[clk_idx++] = 0; ++ ++ // Save the current bank ++ oldBank = inw( ioaddr+BANK_SELECT ); ++ ++ // Select bank 3 ++ SMC_SELECT_BANK( 3 ); ++ ++ // Get the current MII register value ++ mii_reg = inw( ioaddr+MII_REG ); ++ ++ // Turn off all MII Interface bits ++ mii_reg &= ~(MII_MDOE|MII_MCLK|MII_MDI|MII_MDO); ++ ++ // Clock all 64 cycles ++ for (i = 0; i < sizeof bits; ++i) ++ { ++ // Clock Low - output data ++ outw( mii_reg | bits[i], ioaddr+MII_REG ); ++ udelay(50); ++ ++ ++ // Clock Hi - input data ++ outw( mii_reg | bits[i] | MII_MCLK, ioaddr+MII_REG ); ++ udelay(50); ++ bits[i] |= inw( ioaddr+MII_REG ) & MII_MDI; ++ } ++ ++ // Return to idle state ++ // Set clock to low, data to low, and output tristated ++ outw( mii_reg, ioaddr+MII_REG ); ++ udelay(50); ++ ++ // Restore original bank select ++ SMC_SELECT_BANK( oldBank ); ++ ++ // Recover input data ++ phydata = 0; ++ for (i = 0; i < 16; ++i) ++ { ++ phydata <<= 1; ++ ++ if (bits[input_idx++] & MII_MDI) ++ phydata |= 0x0001; ++ } ++ ++#if defined(SMC_DEBUG) && (SMC_DEBUG > 2 ) ++ printk("smc_read_phy_register(): phyaddr=%x,phyreg=%x,phydata=%x\n", ++ phyaddr, phyreg, phydata); ++ smc_dump_mii_stream(bits, sizeof bits); ++#endif ++ ++ return(phydata); ++} ++ ++ ++/*------------------------------------------------------------ ++ . Writes a register to the MII Management serial interface ++ .-------------------------------------------------------------*/ ++static void smc_write_phy_register(int ioaddr, ++ byte phyaddr, byte phyreg, word phydata) ++{ ++ int oldBank; ++ int i; ++ word mask; ++ word mii_reg; ++ byte bits[65]; ++ int clk_idx = 0; ++ ++ // 32 consecutive ones on MDO to establish sync ++ for (i = 0; i < 32; ++i) ++ bits[clk_idx++] = MII_MDOE | MII_MDO; ++ ++ // Start code <01> ++ bits[clk_idx++] = MII_MDOE; ++ bits[clk_idx++] = MII_MDOE | MII_MDO; ++ ++ // Write command <01> ++ bits[clk_idx++] = MII_MDOE; ++ bits[clk_idx++] = MII_MDOE | MII_MDO; ++ ++ // Output the PHY address, msb first ++ mask = (byte)0x10; ++ for (i = 0; i < 5; ++i) ++ { ++ if (phyaddr & mask) ++ bits[clk_idx++] = MII_MDOE | MII_MDO; ++ else ++ bits[clk_idx++] = MII_MDOE; ++ ++ // Shift to next lowest bit ++ mask >>= 1; ++ } ++ ++ // Output the phy register number, msb first ++ mask = (byte)0x10; ++ for (i = 0; i < 5; ++i) ++ { ++ if (phyreg & mask) ++ bits[clk_idx++] = MII_MDOE | MII_MDO; ++ else ++ bits[clk_idx++] = MII_MDOE; ++ ++ // Shift to next lowest bit ++ mask >>= 1; ++ } ++ ++ // Tristate and turnaround (2 bit times) ++ bits[clk_idx++] = 0; ++ bits[clk_idx++] = 0; ++ ++ // Write out 16 bits of data, msb first ++ mask = 0x8000; ++ for (i = 0; i < 16; ++i) ++ { ++ if (phydata & mask) ++ bits[clk_idx++] = MII_MDOE | MII_MDO; ++ else ++ bits[clk_idx++] = MII_MDOE; ++ ++ // Shift to next lowest bit ++ mask >>= 1; ++ } ++ ++ // Final clock bit (tristate) ++ bits[clk_idx++] = 0; ++ ++ // Save the current bank ++ oldBank = inw( ioaddr+BANK_SELECT ); ++ ++ // Select bank 3 ++ SMC_SELECT_BANK( 3 ); ++ ++ // Get the current MII register value ++ mii_reg = inw( ioaddr+MII_REG ); ++ ++ // Turn off all MII Interface bits ++ mii_reg &= ~(MII_MDOE|MII_MCLK|MII_MDI|MII_MDO); ++ ++ // Clock all cycles ++ for (i = 0; i < sizeof bits; ++i) ++ { ++ // Clock Low - output data ++ outw( mii_reg | bits[i], ioaddr+MII_REG ); ++ udelay(50); ++ ++ ++ // Clock Hi - input data ++ outw( mii_reg | bits[i] | MII_MCLK, ioaddr+MII_REG ); ++ udelay(50); ++ bits[i] |= inw( ioaddr+MII_REG ) & MII_MDI; ++ } ++ ++ // Return to idle state ++ // Set clock to low, data to low, and output tristated ++ outw( mii_reg, ioaddr+MII_REG ); ++ udelay(50); ++ ++ // Restore original bank select ++ SMC_SELECT_BANK( oldBank ); ++ ++#if defined(SMC_DEBUG) && (SMC_DEBUG > 2 ) ++ printk("smc_write_phy_register(): phyaddr=%x,phyreg=%x,phydata=%x\n", ++ phyaddr, phyreg, phydata); ++ smc_dump_mii_stream(bits, sizeof bits); ++#endif ++} ++ ++ ++/*------------------------------------------------------------ ++ . Finds and reports the PHY address ++ .-------------------------------------------------------------*/ ++static int smc_detect_phy(struct rtnet_device* dev) ++{ ++ struct smc_local *lp = (struct smc_local *)dev->priv; ++ int ioaddr = dev->base_addr; ++ word phy_id1; ++ word phy_id2; ++ int phyaddr; ++ int found = 0; ++ ++ PRINTK3("%s:smc_detect_phy()\n", dev->name); ++ ++ // Scan all 32 PHY addresses if necessary ++ for (phyaddr = 0; phyaddr < 32; ++phyaddr) ++ { ++ // Read the PHY identifiers ++ phy_id1 = smc_read_phy_register(ioaddr, phyaddr, PHY_ID1_REG); ++ phy_id2 = smc_read_phy_register(ioaddr, phyaddr, PHY_ID2_REG); ++ ++ PRINTK3("%s: phy_id1=%x, phy_id2=%x\n", ++ dev->name, phy_id1, phy_id2); ++ ++ // Make sure it is a valid identifier ++ if ((phy_id2 > 0x0000) && (phy_id2 < 0xffff) && ++ (phy_id1 > 0x0000) && (phy_id1 < 0xffff)) ++ { ++ if ((phy_id1 != 0x8000) && (phy_id2 != 0x8000)) ++ { ++ // Save the PHY's address ++ lp->phyaddr = phyaddr; ++ found = 1; ++ break; ++ } ++ } ++ } ++ ++ if (!found) ++ { ++ PRINTK("%s: No PHY found\n", dev->name); ++ return(0); ++ } ++ ++ // Set the PHY type ++ if ( (phy_id1 == 0x0016) && ((phy_id2 & 0xFFF0) == 0xF840 ) ) ++ { ++ lp->phytype = PHY_LAN83C183; ++ PRINTK("%s: PHY=LAN83C183 (LAN91C111 Internal)\n", dev->name); ++ } ++ ++ if ( (phy_id1 == 0x0282) && ((phy_id2 & 0xFFF0) == 0x1C50) ) ++ { ++ lp->phytype = PHY_LAN83C180; ++ PRINTK("%s: PHY=LAN83C180\n", dev->name); ++ } ++ ++ return(1); ++} ++ ++/*------------------------------------------------------------ ++ . Waits the specified number of milliseconds - kernel friendly ++ .-------------------------------------------------------------*/ ++static void smc_wait_ms(unsigned int ms) ++{ ++ ++ if (!in_interrupt()) ++ { ++ current->state = TASK_UNINTERRUPTIBLE; ++ schedule_timeout(1 + ms * HZ / 1000); ++ } ++ else ++ { ++ current->state = TASK_INTERRUPTIBLE; ++ schedule_timeout(1 + ms * HZ / 1000); ++ current->state = TASK_RUNNING; ++ } ++} ++ ++/*------------------------------------------------------------ ++ . Sets the PHY to a configuration as determined by the user ++ .-------------------------------------------------------------*/ ++#ifdef DISABLED____CONFIG_SYSCTL ++static int smc_phy_fixed(struct rtnet_device* dev) ++{ ++ int ioaddr = dev->base_addr; ++ struct smc_local *lp = (struct smc_local *)dev->priv; ++ byte phyaddr = lp->phyaddr; ++ word my_fixed_caps; ++ word cfg1; ++ ++ PRINTK3("%s:smc_phy_fixed()\n", dev->name); ++ ++ // Enter Link Disable state ++ cfg1 = smc_read_phy_register(ioaddr, phyaddr, PHY_CFG1_REG); ++ cfg1 |= PHY_CFG1_LNKDIS; ++ smc_write_phy_register(ioaddr, phyaddr, PHY_CFG1_REG, cfg1); ++ ++ // Set our fixed capabilities ++ // Disable auto-negotiation ++ my_fixed_caps = 0; ++ ++ if (lp->ctl_rfduplx) ++ my_fixed_caps |= PHY_CNTL_DPLX; ++ ++ if (lp->ctl_rspeed == 100) ++ my_fixed_caps |= PHY_CNTL_SPEED; ++ ++ // Write our capabilities to the phy control register ++ smc_write_phy_register(ioaddr, phyaddr, PHY_CNTL_REG, my_fixed_caps); ++ ++ // Re-Configure the Receive/Phy Control register ++ outw( lp->rpc_cur_mode, ioaddr + RPC_REG ); ++ ++ // Success ++ return(1); ++} ++#endif // CONFIG_SYSCTL ++ ++ ++/*------------------------------------------------------------ ++ . Configures the specified PHY using Autonegotiation. Calls ++ . smc_phy_fixed() if the user has requested a certain config. ++ .-------------------------------------------------------------*/ ++static void smc_phy_configure(struct rtnet_device* dev) ++{ ++ int ioaddr = dev->base_addr; ++ struct smc_local *lp = (struct smc_local *)dev->priv; ++ int timeout; ++ byte phyaddr; ++ word my_phy_caps; // My PHY capabilities ++ word my_ad_caps; // My Advertised capabilities ++ word status; ++ int failed = 0; ++ ++ PRINTK3("%s:smc_program_phy()\n", dev->name); ++ ++ // Set the blocking flag ++ lp->autoneg_active = 1; ++ ++ // Find the address and type of our phy ++ if (!smc_detect_phy(dev)) ++ { ++ goto smc_phy_configure_exit; ++ } ++ ++ // Get the detected phy address ++ phyaddr = lp->phyaddr; ++ ++ // Reset the PHY, setting all other bits to zero ++ smc_write_phy_register(ioaddr, phyaddr, PHY_CNTL_REG, PHY_CNTL_RST); ++ ++ // Wait for the reset to complete, or time out ++ timeout = 6; // Wait up to 3 seconds ++ while (timeout--) ++ { ++ if (!(smc_read_phy_register(ioaddr, phyaddr, PHY_CNTL_REG) ++ & PHY_CNTL_RST)) ++ { ++ // reset complete ++ break; ++ } ++ ++ smc_wait_ms(500); // wait 500 millisecs ++ if (signal_pending(current)) // Exit anyway if signaled ++ { ++ PRINTK2("%s:PHY reset interrupted by signal\n", ++ dev->name); ++ timeout = 0; ++ break; ++ } ++ } ++ ++ if (timeout < 1) ++ { ++ PRINTK2("%s:PHY reset timed out\n", dev->name); ++ goto smc_phy_configure_exit; ++ } ++ ++ // Read PHY Register 18, Status Output ++ lp->lastPhy18 = smc_read_phy_register(ioaddr, phyaddr, PHY_INT_REG); ++ ++ // Enable PHY Interrupts (for register 18) ++ // Interrupts listed here are disabled ++ smc_write_phy_register(ioaddr, phyaddr, PHY_MASK_REG, ++ PHY_INT_LOSSSYNC | PHY_INT_CWRD | PHY_INT_SSD | ++ PHY_INT_ESD | PHY_INT_RPOL | PHY_INT_JAB | ++ PHY_INT_SPDDET | PHY_INT_DPLXDET); ++ ++ /* Configure the Receive/Phy Control register */ ++ SMC_SELECT_BANK( 0 ); ++ outw( lp->rpc_cur_mode, ioaddr + RPC_REG ); ++ ++ // Copy our capabilities from PHY_STAT_REG to PHY_AD_REG ++ my_phy_caps = smc_read_phy_register(ioaddr, phyaddr, PHY_STAT_REG); ++ my_ad_caps = PHY_AD_CSMA; // I am CSMA capable ++ ++ if (my_phy_caps & PHY_STAT_CAP_T4) ++ my_ad_caps |= PHY_AD_T4; ++ ++ if (my_phy_caps & PHY_STAT_CAP_TXF) ++ my_ad_caps |= PHY_AD_TX_FDX; ++ ++ if (my_phy_caps & PHY_STAT_CAP_TXH) ++ my_ad_caps |= PHY_AD_TX_HDX; ++ ++ if (my_phy_caps & PHY_STAT_CAP_TF) ++ my_ad_caps |= PHY_AD_10_FDX; ++ ++ if (my_phy_caps & PHY_STAT_CAP_TH) ++ my_ad_caps |= PHY_AD_10_HDX; ++ ++#ifdef DISABLED____CONFIG_SYSCTL ++ // Disable capabilities not selected by our user ++ if (lp->ctl_rspeed != 100) ++ { ++ my_ad_caps &= ~(PHY_AD_T4|PHY_AD_TX_FDX|PHY_AD_TX_HDX); ++ } ++ ++ if (!lp->ctl_rfduplx) ++ { ++ my_ad_caps &= ~(PHY_AD_TX_FDX|PHY_AD_10_FDX); ++ } ++#endif // CONFIG_SYSCTL ++ ++ // Update our Auto-Neg Advertisement Register ++ smc_write_phy_register(ioaddr, phyaddr, PHY_AD_REG, my_ad_caps); ++ ++ PRINTK2("%s:phy caps=%x\n", dev->name, my_phy_caps); ++ PRINTK2("%s:phy advertised caps=%x\n", dev->name, my_ad_caps); ++ ++#ifdef DISABLED____CONFIG_SYSCTL ++ // If the user requested no auto neg, then go set his request ++ if (!(lp->ctl_autoneg)) ++ { ++ smc_phy_fixed(dev); ++ goto smc_phy_configure_exit; ++ } ++#endif // CONFIG_SYSCTL ++ ++ // Restart auto-negotiation process in order to advertise my caps ++ smc_write_phy_register( ioaddr, phyaddr, PHY_CNTL_REG, ++ PHY_CNTL_ANEG_EN | PHY_CNTL_ANEG_RST ); ++ ++ // Wait for the auto-negotiation to complete. This may take from ++ // 2 to 3 seconds. ++ // Wait for the reset to complete, or time out ++ timeout = 20-1; // Wait up to 10 seconds ++ do ++ { ++ status = smc_read_phy_register(ioaddr, phyaddr, PHY_STAT_REG); ++ if (status & PHY_STAT_ANEG_ACK) ++ { ++ // auto-negotiate complete ++ break; ++ } ++ ++ smc_wait_ms(500); // wait 500 millisecs ++ if (signal_pending(current)) // Exit anyway if signaled ++ { ++ printk(KERN_DEBUG ++ "%s:PHY auto-negotiate interrupted by signal\n", ++ dev->name); ++ timeout = 0; ++ break; ++ } ++ ++ // Restart auto-negotiation if remote fault ++ if (status & PHY_STAT_REM_FLT) ++ { ++ PRINTK2("%s:PHY remote fault detected\n", dev->name); ++ ++ // Restart auto-negotiation ++ PRINTK2("%s:PHY restarting auto-negotiation\n", ++ dev->name); ++ smc_write_phy_register( ioaddr, phyaddr, PHY_CNTL_REG, ++ PHY_CNTL_ANEG_EN | PHY_CNTL_ANEG_RST | ++ PHY_CNTL_SPEED | PHY_CNTL_DPLX); ++ } ++ } ++ while (timeout--); ++ ++ if (timeout < 1) ++ { ++ printk(KERN_DEBUG "%s:PHY auto-negotiate timed out\n", ++ dev->name); ++ PRINTK2("%s:PHY auto-negotiate timed out\n", dev->name); ++ failed = 1; ++ } ++ ++ // Fail if we detected an auto-negotiate remote fault ++ if (status & PHY_STAT_REM_FLT) ++ { ++ printk(KERN_DEBUG "%s:PHY remote fault detected\n", dev->name); ++ PRINTK2("%s:PHY remote fault detected\n", dev->name); ++ failed = 1; ++ } ++ ++ // The smc_phy_interrupt() routine will be called to update lastPhy18 ++ ++ // Set our sysctl parameters to match auto-negotiation results ++ if ( lp->lastPhy18 & PHY_INT_SPDDET ) ++ { ++ PRINTK2("%s:PHY 100BaseT\n", dev->name); ++ lp->rpc_cur_mode |= RPC_SPEED; ++ } ++ else ++ { ++ PRINTK2("%s:PHY 10BaseT\n", dev->name); ++ lp->rpc_cur_mode &= ~RPC_SPEED; ++ } ++ ++ if ( lp->lastPhy18 & PHY_INT_DPLXDET ) ++ { ++ PRINTK2("%s:PHY Full Duplex\n", dev->name); ++ lp->rpc_cur_mode |= RPC_DPLX; ++ } ++ else ++ { ++ PRINTK2("%s:PHY Half Duplex\n", dev->name); ++ lp->rpc_cur_mode &= ~RPC_DPLX; ++ } ++ ++ // Re-Configure the Receive/Phy Control register ++ outw( lp->rpc_cur_mode, ioaddr + RPC_REG ); ++ ++ smc_phy_configure_exit: ++ ++ // Exit auto-negotiation ++ lp->autoneg_active = 0; ++} ++ ++ ++ ++MODULE_LICENSE("GPL"); +--- linux/drivers/xenomai/net/drivers/tulip/eeprom.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/tulip/eeprom.c 2021-04-07 16:01:27.339634006 +0800 +@@ -0,0 +1,321 @@ ++/* ++ drivers/net/tulip/eeprom.c ++ ++ Maintained by Jeff Garzik ++ Copyright 2000,2001 The Linux Kernel Team ++ Written/copyright 1994-2001 by Donald Becker. ++ ++ This software may be used and distributed according to the terms ++ of the GNU General Public License, incorporated herein by reference. ++ ++ Please refer to Documentation/DocBook/tulip.{pdf,ps,html} ++ for more information on this driver, or visit the project ++ Web page at http://sourceforge.net/projects/tulip/ ++ ++*/ ++/* Ported to RTnet by Wittawat Yamwong */ ++ ++#include "tulip.h" ++#include ++#include ++ ++ ++ ++/* Serial EEPROM section. */ ++/* The main routine to parse the very complicated SROM structure. ++ Search www.digital.com for "21X4 SROM" to get details. ++ This code is very complex, and will require changes to support ++ additional cards, so I'll be verbose about what is going on. ++ */ ++ ++/* Known cards that have old-style EEPROMs. */ ++static struct eeprom_fixup eeprom_fixups[] = { ++ {"Asante", 0, 0, 0x94, {0x1e00, 0x0000, 0x0800, 0x0100, 0x018c, ++ 0x0000, 0x0000, 0xe078, 0x0001, 0x0050, 0x0018 }}, ++ {"SMC9332DST", 0, 0, 0xC0, { 0x1e00, 0x0000, 0x0800, 0x041f, ++ 0x0000, 0x009E, /* 10baseT */ ++ 0x0004, 0x009E, /* 10baseT-FD */ ++ 0x0903, 0x006D, /* 100baseTx */ ++ 0x0905, 0x006D, /* 100baseTx-FD */ }}, ++ {"Cogent EM100", 0, 0, 0x92, { 0x1e00, 0x0000, 0x0800, 0x063f, ++ 0x0107, 0x8021, /* 100baseFx */ ++ 0x0108, 0x8021, /* 100baseFx-FD */ ++ 0x0100, 0x009E, /* 10baseT */ ++ 0x0104, 0x009E, /* 10baseT-FD */ ++ 0x0103, 0x006D, /* 100baseTx */ ++ 0x0105, 0x006D, /* 100baseTx-FD */ }}, ++ {"Maxtech NX-110", 0, 0, 0xE8, { 0x1e00, 0x0000, 0x0800, 0x0513, ++ 0x1001, 0x009E, /* 10base2, CSR12 0x10*/ ++ 0x0000, 0x009E, /* 10baseT */ ++ 0x0004, 0x009E, /* 10baseT-FD */ ++ 0x0303, 0x006D, /* 100baseTx, CSR12 0x03 */ ++ 0x0305, 0x006D, /* 100baseTx-FD CSR12 0x03 */}}, ++ {"Accton EN1207", 0, 0, 0xE8, { 0x1e00, 0x0000, 0x0800, 0x051F, ++ 0x1B01, 0x0000, /* 10base2, CSR12 0x1B */ ++ 0x0B00, 0x009E, /* 10baseT, CSR12 0x0B */ ++ 0x0B04, 0x009E, /* 10baseT-FD,CSR12 0x0B */ ++ 0x1B03, 0x006D, /* 100baseTx, CSR12 0x1B */ ++ 0x1B05, 0x006D, /* 100baseTx-FD CSR12 0x1B */ ++ }}, ++ {"NetWinder", 0x00, 0x10, 0x57, ++ /* Default media = MII ++ * MII block, reset sequence (3) = 0x0821 0x0000 0x0001, capabilities 0x01e1 ++ */ ++ { 0x1e00, 0x0000, 0x000b, 0x8f01, 0x0103, 0x0300, 0x0821, 0x000, 0x0001, 0x0000, 0x01e1 } ++ }, ++ {0, 0, 0, 0, {}}}; ++ ++ ++static const char *block_name[] = { ++ "21140 non-MII", ++ "21140 MII PHY", ++ "21142 Serial PHY", ++ "21142 MII PHY", ++ "21143 SYM PHY", ++ "21143 reset method" ++}; ++ ++ ++void tulip_parse_eeprom(/*RTnet*/struct rtnet_device *rtdev) ++{ ++ /* The last media info list parsed, for multiport boards. */ ++ static struct mediatable *last_mediatable; ++ static unsigned char *last_ee_data; ++ static int controller_index; ++ struct tulip_private *tp = (struct tulip_private *)rtdev->priv; ++ unsigned char *ee_data = tp->eeprom; ++ int i; ++ ++ tp->mtable = 0; ++ /* Detect an old-style (SA only) EEPROM layout: ++ memcmp(eedata, eedata+16, 8). */ ++ for (i = 0; i < 8; i ++) ++ if (ee_data[i] != ee_data[16+i]) ++ break; ++ if (i >= 8) { ++ if (ee_data[0] == 0xff) { ++ if (last_mediatable) { ++ controller_index++; ++ /*RTnet*/rtdm_printk(KERN_INFO "%s: Controller %d of multiport board.\n", ++ rtdev->name, controller_index); ++ tp->mtable = last_mediatable; ++ ee_data = last_ee_data; ++ goto subsequent_board; ++ } else ++ /*RTnet*/rtdm_printk(KERN_INFO "%s: Missing EEPROM, this interface may " ++ "not work correctly!\n", ++ rtdev->name); ++ return; ++ } ++ /* Do a fix-up based on the vendor half of the station address prefix. */ ++ for (i = 0; eeprom_fixups[i].name; i++) { ++ if (rtdev->dev_addr[0] == eeprom_fixups[i].addr0 ++ && rtdev->dev_addr[1] == eeprom_fixups[i].addr1 ++ && rtdev->dev_addr[2] == eeprom_fixups[i].addr2) { ++ if (rtdev->dev_addr[2] == 0xE8 && ee_data[0x1a] == 0x55) ++ i++; /* An Accton EN1207, not an outlaw Maxtech. */ ++ memcpy(ee_data + 26, eeprom_fixups[i].newtable, ++ sizeof(eeprom_fixups[i].newtable)); ++ /*RTnet*/rtdm_printk(KERN_INFO "%s: Old format EEPROM on '%s' board. Using" ++ " substitute media control info.\n", ++ rtdev->name, eeprom_fixups[i].name); ++ break; ++ } ++ } ++ if (eeprom_fixups[i].name == NULL) { /* No fixup found. */ ++ /*RTnet*/rtdm_printk(KERN_INFO "%s: Old style EEPROM with no media selection " ++ "information.\n", ++ rtdev->name); ++ return; ++ } ++ } ++ ++ controller_index = 0; ++ if (ee_data[19] > 1) { /* Multiport board. */ ++ last_ee_data = ee_data; ++ } ++subsequent_board: ++ ++ if (ee_data[27] == 0) { /* No valid media table. */ ++ } else if (tp->chip_id == DC21041) { ++ unsigned char *p = (void *)ee_data + ee_data[27 + controller_index*3]; ++ int media = get_u16(p); ++ int count = p[2]; ++ p += 3; ++ ++ /*RTnet*/rtdm_printk(KERN_INFO "%s: 21041 Media table, default media %4.4x (%s).\n", ++ rtdev->name, media, ++ media & 0x0800 ? "Autosense" : medianame[media & MEDIA_MASK]); ++ for (i = 0; i < count; i++) { ++ unsigned char media_block = *p++; ++ int media_code = media_block & MEDIA_MASK; ++ if (media_block & 0x40) ++ p += 6; ++ /*RTnet*/rtdm_printk(KERN_INFO "%s: 21041 media #%d, %s.\n", ++ rtdev->name, media_code, medianame[media_code]); ++ } ++ } else { ++ unsigned char *p = (void *)ee_data + ee_data[27]; ++ unsigned char csr12dir = 0; ++ int count, new_advertise = 0; ++ struct mediatable *mtable; ++ u16 media = get_u16(p); ++ ++ p += 2; ++ if (tp->flags & CSR12_IN_SROM) ++ csr12dir = *p++; ++ count = *p++; ++ ++ /* there is no phy information, don't even try to build mtable */ ++ if (count == 0) { ++ if (tulip_debug > 0) ++ /*RTnet*/rtdm_printk(KERN_WARNING "%s: no phy info, aborting mtable build\n", rtdev->name); ++ return; ++ } ++ ++ mtable = (struct mediatable *) ++ kmalloc(sizeof(struct mediatable) + count*sizeof(struct medialeaf), GFP_KERNEL); ++ ++ if (mtable == NULL) ++ return; /* Horrible, impossible failure. */ ++ last_mediatable = tp->mtable = mtable; ++ mtable->defaultmedia = media; ++ mtable->leafcount = count; ++ mtable->csr12dir = csr12dir; ++ mtable->has_nonmii = mtable->has_mii = mtable->has_reset = 0; ++ mtable->csr15dir = mtable->csr15val = 0; ++ ++ /*RTnet*/rtdm_printk(KERN_INFO "%s: EEPROM default media type %s.\n", rtdev->name, ++ media & 0x0800 ? "Autosense" : medianame[media & MEDIA_MASK]); ++ for (i = 0; i < count; i++) { ++ struct medialeaf *leaf = &mtable->mleaf[i]; ++ ++ if ((p[0] & 0x80) == 0) { /* 21140 Compact block. */ ++ leaf->type = 0; ++ leaf->media = p[0] & 0x3f; ++ leaf->leafdata = p; ++ if ((p[2] & 0x61) == 0x01) /* Bogus, but Znyx boards do it. */ ++ mtable->has_mii = 1; ++ p += 4; ++ } else { ++ leaf->type = p[1]; ++ if (p[1] == 0x05) { ++ mtable->has_reset = i; ++ leaf->media = p[2] & 0x0f; ++ } else if (tp->chip_id == DM910X && p[1] == 0x80) { ++ /* Hack to ignore Davicom delay period block */ ++ mtable->leafcount--; ++ count--; ++ i--; ++ leaf->leafdata = p + 2; ++ p += (p[0] & 0x3f) + 1; ++ continue; ++ } else if (p[1] & 1) { ++ int gpr_len, reset_len; ++ ++ mtable->has_mii = 1; ++ leaf->media = 11; ++ gpr_len=p[3]*2; ++ reset_len=p[4+gpr_len]*2; ++ new_advertise |= get_u16(&p[7+gpr_len+reset_len]); ++ } else { ++ mtable->has_nonmii = 1; ++ leaf->media = p[2] & MEDIA_MASK; ++ /* Davicom's media number for 100BaseTX is strange */ ++ if (tp->chip_id == DM910X && leaf->media == 1) ++ leaf->media = 3; ++ switch (leaf->media) { ++ case 0: new_advertise |= 0x0020; break; ++ case 4: new_advertise |= 0x0040; break; ++ case 3: new_advertise |= 0x0080; break; ++ case 5: new_advertise |= 0x0100; break; ++ case 6: new_advertise |= 0x0200; break; ++ } ++ if (p[1] == 2 && leaf->media == 0) { ++ if (p[2] & 0x40) { ++ u32 base15 = get_unaligned((u16*)&p[7]); ++ mtable->csr15dir = ++ (get_unaligned((u16*)&p[9])<<16) + base15; ++ mtable->csr15val = ++ (get_unaligned((u16*)&p[11])<<16) + base15; ++ } else { ++ mtable->csr15dir = get_unaligned((u16*)&p[3])<<16; ++ mtable->csr15val = get_unaligned((u16*)&p[5])<<16; ++ } ++ } ++ } ++ leaf->leafdata = p + 2; ++ p += (p[0] & 0x3f) + 1; ++ } ++ if (tulip_debug > 1 && leaf->media == 11) { ++ unsigned char *bp = leaf->leafdata; ++ /*RTnet*/rtdm_printk(KERN_INFO "%s: MII interface PHY %d, setup/reset " ++ "sequences %d/%d long, capabilities %2.2x %2.2x.\n", ++ rtdev->name, bp[0], bp[1], bp[2 + bp[1]*2], ++ bp[5 + bp[2 + bp[1]*2]*2], bp[4 + bp[2 + bp[1]*2]*2]); ++ } ++ /*RTnet*/rtdm_printk(KERN_INFO "%s: Index #%d - Media %s (#%d) described " ++ "by a %s (%d) block.\n", ++ rtdev->name, i, medianame[leaf->media & 15], leaf->media, ++ leaf->type < ARRAY_SIZE(block_name) ? block_name[leaf->type] : "", ++ leaf->type); ++ } ++ if (new_advertise) ++ tp->sym_advertise = new_advertise; ++ } ++} ++/* Reading a serial EEPROM is a "bit" grungy, but we work our way through:->.*/ ++ ++/* EEPROM_Ctrl bits. */ ++#define EE_SHIFT_CLK 0x02 /* EEPROM shift clock. */ ++#define EE_CS 0x01 /* EEPROM chip select. */ ++#define EE_DATA_WRITE 0x04 /* Data from the Tulip to EEPROM. */ ++#define EE_WRITE_0 0x01 ++#define EE_WRITE_1 0x05 ++#define EE_DATA_READ 0x08 /* Data from the EEPROM chip. */ ++#define EE_ENB (0x4800 | EE_CS) ++ ++/* Delay between EEPROM clock transitions. ++ Even at 33Mhz current PCI implementations don't overrun the EEPROM clock. ++ We add a bus turn-around to insure that this remains true. */ ++#define eeprom_delay() inl(ee_addr) ++ ++/* The EEPROM commands include the alway-set leading bit. */ ++#define EE_READ_CMD (6) ++ ++/* Note: this routine returns extra data bits for size detection. */ ++int tulip_read_eeprom(long ioaddr, int location, int addr_len) ++{ ++ int i; ++ unsigned retval = 0; ++ long ee_addr = ioaddr + CSR9; ++ int read_cmd = location | (EE_READ_CMD << addr_len); ++ ++ outl(EE_ENB & ~EE_CS, ee_addr); ++ outl(EE_ENB, ee_addr); ++ ++ /* Shift the read command bits out. */ ++ for (i = 4 + addr_len; i >= 0; i--) { ++ short dataval = (read_cmd & (1 << i)) ? EE_DATA_WRITE : 0; ++ outl(EE_ENB | dataval, ee_addr); ++ eeprom_delay(); ++ outl(EE_ENB | dataval | EE_SHIFT_CLK, ee_addr); ++ eeprom_delay(); ++ retval = (retval << 1) | ((inl(ee_addr) & EE_DATA_READ) ? 1 : 0); ++ } ++ outl(EE_ENB, ee_addr); ++ eeprom_delay(); ++ ++ for (i = 16; i > 0; i--) { ++ outl(EE_ENB | EE_SHIFT_CLK, ee_addr); ++ eeprom_delay(); ++ retval = (retval << 1) | ((inl(ee_addr) & EE_DATA_READ) ? 1 : 0); ++ outl(EE_ENB, ee_addr); ++ eeprom_delay(); ++ } ++ ++ /* Terminate the EEPROM access. */ ++ outl(EE_ENB & ~EE_CS, ee_addr); ++ return retval; ++} ++ +--- linux/drivers/xenomai/net/drivers/tulip/tulip_core.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/tulip/tulip_core.c 2021-04-07 16:01:27.334634013 +0800 +@@ -0,0 +1,1403 @@ ++/* tulip_core.c: A DEC 21x4x-family ethernet driver for Linux. */ ++ ++/* ++ Maintained by Jeff Garzik ++ Copyright 2000-2002 The Linux Kernel Team ++ Written/copyright 1994-2001 by Donald Becker. ++ ++ This software may be used and distributed according to the terms ++ of the GNU General Public License, incorporated herein by reference. ++ ++ Please refer to Documentation/DocBook/tulip.{pdf,ps,html} ++ for more information on this driver, or visit the project ++ Web page at http://sourceforge.net/projects/tulip/ ++ ++*/ ++ ++/* Ported to RTnet by Wittawat Yamwong */ ++ ++#define DRV_NAME "tulip-rt" ++#define DRV_VERSION "0.9.15-pre11-rt" ++#define DRV_RELDATE "May 11, 2002" ++ ++#include ++#include "tulip.h" ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#ifdef __sparc__ ++#include ++#endif ++ ++#include ++ ++static char version[] = ++ "Linux Tulip driver version " DRV_VERSION " (" DRV_RELDATE ")\n"; ++ ++ ++/* A few user-configurable values. */ ++ ++/* Maximum events (Rx packets, etc.) to handle at each interrupt. */ ++static unsigned int max_interrupt_work = 25; ++ ++#define MAX_UNITS 8 ++/* Used to pass the full-duplex flag, etc. */ ++static int full_duplex[MAX_UNITS]; ++static int options[MAX_UNITS]; ++static int mtu[MAX_UNITS]; /* Jumbo MTU for interfaces. */ ++ ++/* The possible media types that can be set in options[] are: */ ++const char * const medianame[32] = { ++ "10baseT", "10base2", "AUI", "100baseTx", ++ "10baseT-FDX", "100baseTx-FDX", "100baseT4", "100baseFx", ++ "100baseFx-FDX", "MII 10baseT", "MII 10baseT-FDX", "MII", ++ "10baseT(forced)", "MII 100baseTx", "MII 100baseTx-FDX", "MII 100baseT4", ++ "MII 100baseFx-HDX", "MII 100baseFx-FDX", "Home-PNA 1Mbps", "Invalid-19", ++ "","","","", "","","","", "","","","Transceiver reset", ++}; ++ ++/* Set the copy breakpoint for the copy-only-tiny-buffer Rx structure. */ ++#if defined(__alpha__) || defined(__arm__) || defined(__hppa__) \ ++ || defined(__sparc_) || defined(__ia64__) \ ++ || defined(__sh__) || defined(__mips__) ++static int rx_copybreak = 1518; ++#else ++static int rx_copybreak = 100; ++#endif ++ ++/* ++ Set the bus performance register. ++ Typical: Set 16 longword cache alignment, no burst limit. ++ Cache alignment bits 15:14 Burst length 13:8 ++ 0000 No alignment 0x00000000 unlimited 0800 8 longwords ++ 4000 8 longwords 0100 1 longword 1000 16 longwords ++ 8000 16 longwords 0200 2 longwords 2000 32 longwords ++ C000 32 longwords 0400 4 longwords ++ Warning: many older 486 systems are broken and require setting 0x00A04800 ++ 8 longword cache alignment, 8 longword burst. ++ ToDo: Non-Intel setting could be better. ++*/ ++ ++#if defined(__alpha__) || defined(__ia64__) || defined(__x86_64__) ++static int csr0 = 0x01A00000 | 0xE000; ++#elif defined(__i386__) || defined(__powerpc__) ++static int csr0 = 0x01A00000 | 0x8000; ++#elif defined(__sparc__) || defined(__hppa__) ++/* The UltraSparc PCI controllers will disconnect at every 64-byte ++ * crossing anyways so it makes no sense to tell Tulip to burst ++ * any more than that. ++ */ ++static int csr0 = 0x01A00000 | 0x9000; ++#elif defined(__arm__) || defined(__sh__) ++static int csr0 = 0x01A00000 | 0x4800; ++#elif defined(__mips__) ++static int csr0 = 0x00200000 | 0x4000; ++#else ++#warning Processor architecture undefined! ++static int csr0 = 0x00A00000 | 0x4800; ++#endif ++ ++/* Operational parameters that usually are not changed. */ ++/* Time in jiffies before concluding the transmitter is hung. */ ++#define TX_TIMEOUT (4*HZ) ++ ++ ++MODULE_AUTHOR("The Linux Kernel Team"); ++MODULE_DESCRIPTION("Digital 21*4* Tulip ethernet driver"); ++MODULE_LICENSE("GPL"); ++module_param(tulip_debug, int, 0444); ++module_param(max_interrupt_work, int, 0444); ++/*MODULE_PARM(rx_copybreak, "i");*/ ++module_param(csr0, int, 0444); ++module_param_array(options, int, NULL, 0444); ++module_param_array(full_duplex, int, NULL, 0444); ++ ++#define PFX DRV_NAME ": " ++ ++#ifdef TULIP_DEBUG ++int tulip_debug = TULIP_DEBUG; ++#else ++int tulip_debug = 1; ++#endif ++ ++static int cards[MAX_UNITS] = { [0 ... (MAX_UNITS-1)] = 1 }; ++module_param_array(cards, int, NULL, 0444); ++MODULE_PARM_DESC(cards, "array of cards to be supported (e.g. 1,0,1)"); ++ ++ ++ ++/* ++ * This table use during operation for capabilities and media timer. ++ * ++ * It is indexed via the values in 'enum chips' ++ */ ++ ++struct tulip_chip_table tulip_tbl[] = { ++ /* DC21040 */ ++ { "Digital DC21040 Tulip", 128, 0x0001ebef, 0 }, ++ ++ /* DC21041 */ ++ { "Digital DC21041 Tulip", 128, 0x0001ebef, ++ HAS_MEDIA_TABLE | HAS_NWAY }, ++ ++ /* DC21140 */ ++ { "Digital DS21140 Tulip", 128, 0x0001ebef, ++ HAS_MII | HAS_MEDIA_TABLE | CSR12_IN_SROM | HAS_PCI_MWI }, ++ ++ /* DC21142, DC21143 */ ++ { "Digital DS21143 Tulip", 128, 0x0801fbff, ++ HAS_MII | HAS_MEDIA_TABLE | ALWAYS_CHECK_MII | HAS_ACPI | HAS_NWAY ++ | HAS_INTR_MITIGATION | HAS_PCI_MWI }, ++ ++ /* LC82C168 */ ++ { "Lite-On 82c168 PNIC", 256, 0x0001fbef, ++ HAS_MII | HAS_PNICNWAY }, ++ ++ /* MX98713 */ ++ { "Macronix 98713 PMAC", 128, 0x0001ebef, ++ HAS_MII | HAS_MEDIA_TABLE | CSR12_IN_SROM }, ++ ++ /* MX98715 */ ++ { "Macronix 98715 PMAC", 256, 0x0001ebef, ++ HAS_MEDIA_TABLE }, ++ ++ /* MX98725 */ ++ { "Macronix 98725 PMAC", 256, 0x0001ebef, ++ HAS_MEDIA_TABLE }, ++ ++ /* AX88140 */ ++ { "ASIX AX88140", 128, 0x0001fbff, ++ HAS_MII | HAS_MEDIA_TABLE | CSR12_IN_SROM | MC_HASH_ONLY | IS_ASIX }, ++ ++ /* PNIC2 */ ++ { "Lite-On PNIC-II", 256, 0x0801fbff, ++ HAS_MII | HAS_NWAY | HAS_8023X | HAS_PCI_MWI }, ++ ++ /* COMET */ ++ { "ADMtek Comet", 256, 0x0001abef, ++ MC_HASH_ONLY | COMET_MAC_ADDR }, ++ ++ /* COMPEX9881 */ ++ { "Compex 9881 PMAC", 128, 0x0001ebef, ++ HAS_MII | HAS_MEDIA_TABLE | CSR12_IN_SROM }, ++ ++ /* I21145 */ ++ { "Intel DS21145 Tulip", 128, 0x0801fbff, ++ HAS_MII | HAS_MEDIA_TABLE | ALWAYS_CHECK_MII | HAS_ACPI ++ | HAS_NWAY | HAS_PCI_MWI }, ++ ++ /* DM910X */ ++ { "Davicom DM9102/DM9102A", 128, 0x0001ebef, ++ HAS_MII | HAS_MEDIA_TABLE | CSR12_IN_SROM | HAS_ACPI }, ++}; ++ ++ ++static struct pci_device_id tulip_pci_tbl[] = { ++ { 0x1011, 0x0002, PCI_ANY_ID, PCI_ANY_ID, 0, 0, DC21040 }, ++ { 0x1011, 0x0014, PCI_ANY_ID, PCI_ANY_ID, 0, 0, DC21041 }, ++ { 0x1011, 0x0009, PCI_ANY_ID, PCI_ANY_ID, 0, 0, DC21140 }, ++ { 0x1011, 0x0019, PCI_ANY_ID, PCI_ANY_ID, 0, 0, DC21143 }, ++ { 0x11AD, 0x0002, PCI_ANY_ID, PCI_ANY_ID, 0, 0, LC82C168 }, ++ { 0x10d9, 0x0512, PCI_ANY_ID, PCI_ANY_ID, 0, 0, MX98713 }, ++ { 0x10d9, 0x0531, PCI_ANY_ID, PCI_ANY_ID, 0, 0, MX98715 }, ++/* { 0x10d9, 0x0531, PCI_ANY_ID, PCI_ANY_ID, 0, 0, MX98725 },*/ ++ { 0x125B, 0x1400, PCI_ANY_ID, PCI_ANY_ID, 0, 0, AX88140 }, ++ { 0x11AD, 0xc115, PCI_ANY_ID, PCI_ANY_ID, 0, 0, PNIC2 }, ++ { 0x1317, 0x0981, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET }, ++ { 0x1317, 0x0985, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET }, ++ { 0x1317, 0x1985, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET }, ++ { 0x1317, 0x9511, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET }, ++ { 0x13D1, 0xAB02, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET }, ++ { 0x13D1, 0xAB03, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET }, ++ { 0x13D1, 0xAB08, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET }, ++ { 0x104A, 0x0981, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET }, ++ { 0x104A, 0x2774, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET }, ++ { 0x11F6, 0x9881, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMPEX9881 }, ++ { 0x8086, 0x0039, PCI_ANY_ID, PCI_ANY_ID, 0, 0, I21145 }, ++ { 0x1282, 0x9100, PCI_ANY_ID, PCI_ANY_ID, 0, 0, DM910X }, ++ { 0x1282, 0x9102, PCI_ANY_ID, PCI_ANY_ID, 0, 0, DM910X }, ++ { 0x1113, 0x1216, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET }, ++ { 0x1113, 0x1217, PCI_ANY_ID, PCI_ANY_ID, 0, 0, MX98715 }, ++ { 0x1113, 0x9511, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET }, ++ { } /* terminate list */ ++}; ++MODULE_DEVICE_TABLE(pci, tulip_pci_tbl); ++ ++ ++/* A full-duplex map for media types. */ ++const char tulip_media_cap[32] = ++{0,0,0,16, 3,19,16,24, 27,4,7,5, 0,20,23,20, 28,31,0,0, }; ++u8 t21040_csr13[] = {2,0x0C,8,4, 4,0,0,0, 0,0,0,0, 4,0,0,0}; ++ ++/* 21041 transceiver register settings: 10-T, 10-2, AUI, 10-T, 10T-FD*/ ++u16 t21041_csr13[] = { ++ csr13_mask_10bt, /* 10-T */ ++ csr13_mask_auibnc, /* 10-2 */ ++ csr13_mask_auibnc, /* AUI */ ++ csr13_mask_10bt, /* 10-T */ ++ csr13_mask_10bt, /* 10T-FD */ ++}; ++u16 t21041_csr14[] = { 0xFFFF, 0xF7FD, 0xF7FD, 0x7F3F, 0x7F3D, }; ++u16 t21041_csr15[] = { 0x0008, 0x0006, 0x000E, 0x0008, 0x0008, }; ++ ++ ++static void tulip_init_ring(/*RTnet*/struct rtnet_device *rtdev); ++static int tulip_start_xmit(struct /*RTnet*/rtskb *skb, /*RTnet*/struct rtnet_device *rtdev); ++static int tulip_open(/*RTnet*/struct rtnet_device *rtdev); ++static int tulip_close(/*RTnet*/struct rtnet_device *rtdev); ++static void tulip_up(/*RTnet*/struct rtnet_device *rtdev); ++static void tulip_down(/*RTnet*/struct rtnet_device *rtdev); ++static struct net_device_stats *tulip_get_stats(struct rtnet_device *rtdev); ++//static void set_rx_mode(struct net_device *dev); ++ ++ ++static void tulip_set_power_state (struct tulip_private *tp, ++ int sleep, int snooze) ++{ ++ if (tp->flags & HAS_ACPI) { ++ u32 tmp, newtmp; ++ pci_read_config_dword (tp->pdev, CFDD, &tmp); ++ newtmp = tmp & ~(CFDD_Sleep | CFDD_Snooze); ++ if (sleep) ++ newtmp |= CFDD_Sleep; ++ else if (snooze) ++ newtmp |= CFDD_Snooze; ++ if (tmp != newtmp) ++ pci_write_config_dword (tp->pdev, CFDD, newtmp); ++ } ++ ++} ++ ++static void tulip_up(/*RTnet*/struct rtnet_device *rtdev) ++{ ++ struct tulip_private *tp = (struct tulip_private *)rtdev->priv; ++ long ioaddr = rtdev->base_addr; ++ int i; ++ ++ /* Wake the chip from sleep/snooze mode. */ ++ tulip_set_power_state (tp, 0, 0); ++ ++ /* On some chip revs we must set the MII/SYM port before the reset!? */ ++ if (tp->mii_cnt || (tp->mtable && tp->mtable->has_mii)) ++ outl(0x00040000, ioaddr + CSR6); ++ ++ /* Reset the chip, holding bit 0 set at least 50 PCI cycles. */ ++ outl(0x00000001, ioaddr + CSR0); ++ udelay(100); ++ ++ /* Deassert reset. ++ Wait the specified 50 PCI cycles after a reset by initializing ++ Tx and Rx queues and the address filter list. */ ++ outl(tp->csr0, ioaddr + CSR0); ++ udelay(100); ++ ++ if (tulip_debug > 1) ++ printk(KERN_DEBUG "%s: tulip_up(), irq==%d.\n", rtdev->name, rtdev->irq); ++ ++ outl(tp->rx_ring_dma, ioaddr + CSR3); ++ outl(tp->tx_ring_dma, ioaddr + CSR4); ++ tp->cur_rx = tp->cur_tx = 0; ++ tp->dirty_rx = tp->dirty_tx = 0; ++ ++ if (tp->flags & MC_HASH_ONLY) { ++ u32 addr_low = cpu_to_le32(get_unaligned((u32 *)rtdev->dev_addr)); ++ u32 addr_high = cpu_to_le32(get_unaligned((u16 *)(rtdev->dev_addr+4))); ++ if (tp->chip_id == AX88140) { ++ outl(0, ioaddr + CSR13); ++ outl(addr_low, ioaddr + CSR14); ++ outl(1, ioaddr + CSR13); ++ outl(addr_high, ioaddr + CSR14); ++ } else if (tp->flags & COMET_MAC_ADDR) { ++ outl(addr_low, ioaddr + 0xA4); ++ outl(addr_high, ioaddr + 0xA8); ++ outl(0, ioaddr + 0xAC); ++ outl(0, ioaddr + 0xB0); ++ } ++ } else { ++ /* This is set_rx_mode(), but without starting the transmitter. */ ++ u16 *eaddrs = (u16 *)rtdev->dev_addr; ++ u16 *setup_frm = &tp->setup_frame[15*6]; ++ dma_addr_t mapping; ++ ++ /* 21140 bug: you must add the broadcast address. */ ++ memset(tp->setup_frame, 0xff, sizeof(tp->setup_frame)); ++ /* Fill the final entry of the table with our physical address. */ ++ *setup_frm++ = eaddrs[0]; *setup_frm++ = eaddrs[0]; ++ *setup_frm++ = eaddrs[1]; *setup_frm++ = eaddrs[1]; ++ *setup_frm++ = eaddrs[2]; *setup_frm++ = eaddrs[2]; ++ ++ mapping = pci_map_single(tp->pdev, tp->setup_frame, ++ sizeof(tp->setup_frame), ++ PCI_DMA_TODEVICE); ++ tp->tx_buffers[tp->cur_tx].skb = NULL; ++ tp->tx_buffers[tp->cur_tx].mapping = mapping; ++ ++ /* Put the setup frame on the Tx list. */ ++ tp->tx_ring[tp->cur_tx].length = cpu_to_le32(0x08000000 | 192); ++ tp->tx_ring[tp->cur_tx].buffer1 = cpu_to_le32(mapping); ++ tp->tx_ring[tp->cur_tx].status = cpu_to_le32(DescOwned); ++ ++ tp->cur_tx++; ++ } ++ ++ tp->saved_if_port = rtdev->if_port; ++ if (rtdev->if_port == 0) ++ rtdev->if_port = tp->default_port; ++ ++ /* Allow selecting a default media. */ ++ i = 0; ++ if (tp->mtable == NULL) ++ goto media_picked; ++ if (rtdev->if_port) { ++ int looking_for = tulip_media_cap[rtdev->if_port] & MediaIsMII ? 11 : ++ (rtdev->if_port == 12 ? 0 : rtdev->if_port); ++ for (i = 0; i < tp->mtable->leafcount; i++) ++ if (tp->mtable->mleaf[i].media == looking_for) { ++ printk(KERN_INFO "%s: Using user-specified media %s.\n", ++ rtdev->name, medianame[rtdev->if_port]); ++ goto media_picked; ++ } ++ } ++ if ((tp->mtable->defaultmedia & 0x0800) == 0) { ++ int looking_for = tp->mtable->defaultmedia & MEDIA_MASK; ++ for (i = 0; i < tp->mtable->leafcount; i++) ++ if (tp->mtable->mleaf[i].media == looking_for) { ++ printk(KERN_INFO "%s: Using EEPROM-set media %s.\n", ++ rtdev->name, medianame[looking_for]); ++ goto media_picked; ++ } ++ } ++ /* Start sensing first non-full-duplex media. */ ++ for (i = tp->mtable->leafcount - 1; ++ (tulip_media_cap[tp->mtable->mleaf[i].media] & MediaAlwaysFD) && i > 0; i--) ++ ; ++media_picked: ++ ++ tp->csr6 = 0; ++ tp->cur_index = i; ++ tp->nwayset = 0; ++ ++ if (rtdev->if_port) { ++ if (tp->chip_id == DC21143 && ++ (tulip_media_cap[rtdev->if_port] & MediaIsMII)) { ++ /* We must reset the media CSRs when we force-select MII mode. */ ++ outl(0x0000, ioaddr + CSR13); ++ outl(0x0000, ioaddr + CSR14); ++ outl(0x0008, ioaddr + CSR15); ++ } ++ tulip_select_media(rtdev, 1); ++ } else if (tp->chip_id == DC21041) { ++ rtdev->if_port = 0; ++ tp->nway = tp->mediasense = 1; ++ tp->nwayset = tp->lpar = 0; ++ outl(0x00000000, ioaddr + CSR13); ++ outl(0xFFFFFFFF, ioaddr + CSR14); ++ outl(0x00000008, ioaddr + CSR15); /* Listen on AUI also. */ ++ tp->csr6 = 0x80020000; ++ if (tp->sym_advertise & 0x0040) ++ tp->csr6 |= FullDuplex; ++ outl(tp->csr6, ioaddr + CSR6); ++ outl(0x0000EF01, ioaddr + CSR13); ++ ++ } else if (tp->chip_id == DC21142) { ++ if (tp->mii_cnt) { ++ tulip_select_media(rtdev, 1); ++ if (tulip_debug > 1) ++ printk(KERN_INFO "%s: Using MII transceiver %d, status %4.4x.\n", ++ rtdev->name, tp->phys[0], tulip_mdio_read(rtdev, tp->phys[0], 1)); ++ outl(csr6_mask_defstate, ioaddr + CSR6); ++ tp->csr6 = csr6_mask_hdcap; ++ rtdev->if_port = 11; ++ outl(0x0000, ioaddr + CSR13); ++ outl(0x0000, ioaddr + CSR14); ++ } else ++ t21142_start_nway(rtdev); ++ } else if (tp->chip_id == PNIC2) { ++ /* for initial startup advertise 10/100 Full and Half */ ++ tp->sym_advertise = 0x01E0; ++ /* enable autonegotiate end interrupt */ ++ outl(inl(ioaddr+CSR5)| 0x00008010, ioaddr + CSR5); ++ outl(inl(ioaddr+CSR7)| 0x00008010, ioaddr + CSR7); ++ pnic2_start_nway(rtdev); ++ } else if (tp->chip_id == LC82C168 && ! tp->medialock) { ++ if (tp->mii_cnt) { ++ rtdev->if_port = 11; ++ tp->csr6 = 0x814C0000 | (tp->full_duplex ? 0x0200 : 0); ++ outl(0x0001, ioaddr + CSR15); ++ } else if (inl(ioaddr + CSR5) & TPLnkPass) ++ pnic_do_nway(rtdev); ++ else { ++ /* Start with 10mbps to do autonegotiation. */ ++ outl(0x32, ioaddr + CSR12); ++ tp->csr6 = 0x00420000; ++ outl(0x0001B078, ioaddr + 0xB8); ++ outl(0x0201B078, ioaddr + 0xB8); ++ } ++ } else if ((tp->chip_id == MX98713 || tp->chip_id == COMPEX9881) ++ && ! tp->medialock) { ++ rtdev->if_port = 0; ++ tp->csr6 = 0x01880000 | (tp->full_duplex ? 0x0200 : 0); ++ outl(0x0f370000 | inw(ioaddr + 0x80), ioaddr + 0x80); ++ } else if (tp->chip_id == MX98715 || tp->chip_id == MX98725) { ++ /* Provided by BOLO, Macronix - 12/10/1998. */ ++ rtdev->if_port = 0; ++ tp->csr6 = 0x01a80200; ++ outl(0x0f370000 | inw(ioaddr + 0x80), ioaddr + 0x80); ++ outl(0x11000 | inw(ioaddr + 0xa0), ioaddr + 0xa0); ++ } else if (tp->chip_id == COMET) { ++ /* Enable automatic Tx underrun recovery. */ ++ outl(inl(ioaddr + 0x88) | 1, ioaddr + 0x88); ++ rtdev->if_port = tp->mii_cnt ? 11 : 0; ++ tp->csr6 = 0x00040000; ++ } else if (tp->chip_id == AX88140) { ++ tp->csr6 = tp->mii_cnt ? 0x00040100 : 0x00000100; ++ } else ++ tulip_select_media(rtdev, 1); ++ ++ /* Start the chip's Tx to process setup frame. */ ++ tulip_stop_rxtx(tp); ++ barrier(); ++ udelay(5); ++ outl(tp->csr6 | TxOn, ioaddr + CSR6); ++ ++ /* Enable interrupts by setting the interrupt mask. */ ++ outl(tulip_tbl[tp->chip_id].valid_intrs, ioaddr + CSR5); ++ outl(tulip_tbl[tp->chip_id].valid_intrs, ioaddr + CSR7); ++ tulip_start_rxtx(tp); ++ outl(0, ioaddr + CSR2); /* Rx poll demand */ ++ ++ if (tulip_debug > 2) { ++ printk(KERN_DEBUG "%s: Done tulip_up(), CSR0 %8.8x, CSR5 %8.8x CSR6 %8.8x.\n", ++ rtdev->name, inl(ioaddr + CSR0), inl(ioaddr + CSR5), ++ inl(ioaddr + CSR6)); ++ } ++} ++ ++ ++static int ++tulip_open(/*RTnet*/struct rtnet_device *rtdev) ++{ ++ struct tulip_private *tp = (struct tulip_private *)rtdev->priv; ++ int retval; ++ ++ if ((retval = /*RTnet*/rtdm_irq_request(&tp->irq_handle, rtdev->irq, ++ tulip_interrupt, 0, "rt_tulip", ++ rtdev))) { ++ printk("%s: Unable to install ISR for IRQ %d\n", ++ rtdev->name,rtdev->irq); ++ return retval; ++ } ++ ++ rt_stack_connect(rtdev, &STACK_manager); ++ ++ tulip_init_ring (rtdev); ++ ++ tulip_up (rtdev); ++ ++ rtnetif_start_queue (rtdev); ++ ++ return 0; ++} ++ ++/* Initialize the Rx and Tx rings, along with various 'dev' bits. */ ++static void tulip_init_ring(/*RTnet*/struct rtnet_device *rtdev) ++{ ++ struct tulip_private *tp = (struct tulip_private *)rtdev->priv; ++ int i; ++ ++ tp->susp_rx = 0; ++ tp->ttimer = 0; ++ tp->nir = 0; ++ ++ for (i = 0; i < RX_RING_SIZE; i++) { ++ tp->rx_ring[i].status = 0x00000000; ++ tp->rx_ring[i].length = cpu_to_le32(PKT_BUF_SZ); ++ tp->rx_ring[i].buffer2 = cpu_to_le32(tp->rx_ring_dma + sizeof(struct tulip_rx_desc) * (i + 1)); ++ tp->rx_buffers[i].skb = NULL; ++ tp->rx_buffers[i].mapping = 0; ++ } ++ /* Mark the last entry as wrapping the ring. */ ++ tp->rx_ring[i-1].length = cpu_to_le32(PKT_BUF_SZ | DESC_RING_WRAP); ++ tp->rx_ring[i-1].buffer2 = cpu_to_le32(tp->rx_ring_dma); ++ ++ for (i = 0; i < RX_RING_SIZE; i++) { ++ dma_addr_t mapping; ++ ++ /* Note the receive buffer must be longword aligned. ++ dev_alloc_skb() provides 16 byte alignment. But do *not* ++ use skb_reserve() to align the IP header! */ ++ struct /*RTnet*/rtskb *skb = /*RTnet*/rtnetdev_alloc_rtskb(rtdev, PKT_BUF_SZ); ++ tp->rx_buffers[i].skb = skb; ++ if (skb == NULL) ++ break; ++ mapping = pci_map_single(tp->pdev, skb->tail, PKT_BUF_SZ, PCI_DMA_FROMDEVICE); ++ tp->rx_buffers[i].mapping = mapping; ++ tp->rx_ring[i].status = cpu_to_le32(DescOwned); /* Owned by Tulip chip */ ++ tp->rx_ring[i].buffer1 = cpu_to_le32(mapping); ++ } ++ tp->dirty_rx = (unsigned int)(i - RX_RING_SIZE); ++ ++ /* The Tx buffer descriptor is filled in as needed, but we ++ do need to clear the ownership bit. */ ++ for (i = 0; i < TX_RING_SIZE; i++) { ++ tp->tx_buffers[i].skb = NULL; ++ tp->tx_buffers[i].mapping = 0; ++ tp->tx_ring[i].status = 0x00000000; ++ tp->tx_ring[i].buffer2 = cpu_to_le32(tp->tx_ring_dma + sizeof(struct tulip_tx_desc) * (i + 1)); ++ } ++ tp->tx_ring[i-1].buffer2 = cpu_to_le32(tp->tx_ring_dma); ++} ++ ++static int ++tulip_start_xmit(struct /*RTnet*/rtskb *skb, /*RTnet*/struct rtnet_device *rtdev) ++{ ++ struct tulip_private *tp = (struct tulip_private *)rtdev->priv; ++ int entry; ++ u32 flag; ++ dma_addr_t mapping; ++ /*RTnet*/ ++ rtdm_lockctx_t context; ++ ++ ++ rtdm_lock_get_irqsave(&tp->lock, context); ++ ++ /* TODO: move to rtdev_xmit, use queue */ ++ if (rtnetif_queue_stopped(rtdev)) { ++ dev_kfree_rtskb(skb); ++ tp->stats.tx_dropped++; ++ ++ rtdm_lock_put_irqrestore(&tp->lock, context); ++ return 0; ++ } ++ /*RTnet*/ ++ ++ /* Calculate the next Tx descriptor entry. */ ++ entry = tp->cur_tx % TX_RING_SIZE; ++ ++ tp->tx_buffers[entry].skb = skb; ++ mapping = pci_map_single(tp->pdev, skb->data, skb->len, PCI_DMA_TODEVICE); ++ tp->tx_buffers[entry].mapping = mapping; ++ tp->tx_ring[entry].buffer1 = cpu_to_le32(mapping); ++ ++ if (tp->cur_tx - tp->dirty_tx < TX_RING_SIZE/2) {/* Typical path */ ++ flag = 0x60000000; /* No interrupt */ ++ } else if (tp->cur_tx - tp->dirty_tx == TX_RING_SIZE/2) { ++ flag = 0xe0000000; /* Tx-done intr. */ ++ } else if (tp->cur_tx - tp->dirty_tx < TX_RING_SIZE - 2) { ++ flag = 0x60000000; /* No Tx-done intr. */ ++ } else { /* Leave room for set_rx_mode() to fill entries. */ ++ flag = 0xe0000000; /* Tx-done intr. */ ++ rtnetif_stop_queue(rtdev); ++ } ++ if (entry == TX_RING_SIZE-1) ++ flag = 0xe0000000 | DESC_RING_WRAP; ++ ++ tp->tx_ring[entry].length = cpu_to_le32(skb->len | flag); ++ /* if we were using Transmit Automatic Polling, we would need a ++ * wmb() here. */ ++ tp->tx_ring[entry].status = cpu_to_le32(DescOwned); ++ ++ /*RTnet*/ ++ /* get and patch time stamp just before the transmission */ ++ if (skb->xmit_stamp) ++ *skb->xmit_stamp = cpu_to_be64(rtdm_clock_read() + *skb->xmit_stamp); ++ /*RTnet*/ ++ ++ wmb(); ++ ++ tp->cur_tx++; ++ ++ /* Trigger an immediate transmit demand. */ ++ outl(0, rtdev->base_addr + CSR1); ++ ++ /*RTnet*/ ++ rtdm_lock_put_irqrestore(&tp->lock, context); ++ /*RTnet*/ ++ ++ return 0; ++} ++ ++static void tulip_clean_tx_ring(struct tulip_private *tp) ++{ ++ unsigned int dirty_tx; ++ ++ for (dirty_tx = tp->dirty_tx ; tp->cur_tx - dirty_tx > 0; ++ dirty_tx++) { ++ int entry = dirty_tx % TX_RING_SIZE; ++ int status = le32_to_cpu(tp->tx_ring[entry].status); ++ ++ if (status < 0) { ++ tp->stats.tx_errors++; /* It wasn't Txed */ ++ tp->tx_ring[entry].status = 0; ++ } ++ ++ /* Check for Tx filter setup frames. */ ++ if (tp->tx_buffers[entry].skb == NULL) { ++ /* test because dummy frames not mapped */ ++ if (tp->tx_buffers[entry].mapping) ++ pci_unmap_single(tp->pdev, ++ tp->tx_buffers[entry].mapping, ++ sizeof(tp->setup_frame), ++ PCI_DMA_TODEVICE); ++ continue; ++ } ++ ++ pci_unmap_single(tp->pdev, tp->tx_buffers[entry].mapping, ++ tp->tx_buffers[entry].skb->len, ++ PCI_DMA_TODEVICE); ++ ++ /* Free the original skb. */ ++ /*RTnet*/dev_kfree_rtskb(tp->tx_buffers[entry].skb); ++ tp->tx_buffers[entry].skb = NULL; ++ tp->tx_buffers[entry].mapping = 0; ++ } ++} ++ ++static struct net_device_stats *tulip_get_stats(struct rtnet_device *rtdev) ++{ ++ struct tulip_private *tp = (struct tulip_private *) rtdev->priv; ++ return &tp->stats; ++} ++ ++static void tulip_down (/*RTnet*/struct rtnet_device *rtdev) ++{ ++ long ioaddr = rtdev->base_addr; ++ struct tulip_private *tp = (struct tulip_private *) rtdev->priv; ++ ++ rtdm_irq_disable(&tp->irq_handle); ++ rtdm_lock_get(&tp->lock); /* sync with IRQ handler on other cpu -JK- */ ++ ++ /* Disable interrupts by clearing the interrupt mask. */ ++ outl (0x00000000, ioaddr + CSR7); ++ ++ /* Stop the Tx and Rx processes. */ ++ tulip_stop_rxtx(tp); ++ ++ /* prepare receive buffers */ ++ tulip_refill_rx(rtdev); ++ ++ /* release any unconsumed transmit buffers */ ++ tulip_clean_tx_ring(tp); ++ ++ /* 21040 -- Leave the card in 10baseT state. */ ++ if (tp->chip_id == DC21040) ++ outl (0x00000004, ioaddr + CSR13); ++ ++ if (inl (ioaddr + CSR6) != 0xffffffff) ++ tp->stats.rx_missed_errors += inl (ioaddr + CSR8) & 0xffff; ++ ++ rtdm_lock_put(&tp->lock); ++ rtdm_irq_enable(&tp->irq_handle); ++ ++ rtdev->if_port = tp->saved_if_port; ++ ++ /* Leave the driver in snooze, not sleep, mode. */ ++ tulip_set_power_state (tp, 0, 1); ++} ++ ++ ++static int tulip_close (/*RTnet*/struct rtnet_device *rtdev) ++{ ++ long ioaddr = rtdev->base_addr; ++ struct tulip_private *tp = (struct tulip_private *) rtdev->priv; ++ int i; ++ ++ rtnetif_stop_queue (rtdev); ++ ++ tulip_down (rtdev); ++ ++ if (tulip_debug > 1) ++ printk(KERN_DEBUG "%s: Shutting down ethercard, status was %2.2x.\n", ++ rtdev->name, inl (ioaddr + CSR5)); ++ ++ rtdm_irq_free(&tp->irq_handle); ++ ++ /* Free all the skbuffs in the Rx queue. */ ++ for (i = 0; i < RX_RING_SIZE; i++) { ++ struct /*RTnet*/rtskb *skb = tp->rx_buffers[i].skb; ++ dma_addr_t mapping = tp->rx_buffers[i].mapping; ++ ++ tp->rx_buffers[i].skb = NULL; ++ tp->rx_buffers[i].mapping = 0; ++ ++ tp->rx_ring[i].status = 0; /* Not owned by Tulip chip. */ ++ tp->rx_ring[i].length = 0; ++ tp->rx_ring[i].buffer1 = 0xBADF00D0; /* An invalid address. */ ++ if (skb) { ++ pci_unmap_single(tp->pdev, mapping, PKT_BUF_SZ, ++ PCI_DMA_FROMDEVICE); ++ /*RTnet*/dev_kfree_rtskb (skb); ++ } ++ } ++ for (i = 0; i < TX_RING_SIZE; i++) { ++ struct /*RTnet*/rtskb *skb = tp->tx_buffers[i].skb; ++ ++ if (skb != NULL) { ++ pci_unmap_single(tp->pdev, tp->tx_buffers[i].mapping, ++ skb->len, PCI_DMA_TODEVICE); ++ /*RTnet*/dev_kfree_rtskb (skb); ++ } ++ tp->tx_buffers[i].skb = NULL; ++ tp->tx_buffers[i].mapping = 0; ++ } ++ ++ rt_stack_disconnect(rtdev); ++ ++ return 0; ++} ++ ++#ifdef XXX_CONFIG_TULIP_MWI ++static void tulip_mwi_config (struct pci_dev *pdev, ++ struct net_device *dev) ++{ ++ struct tulip_private *tp = rtdev->priv; ++ u8 cache; ++ u16 pci_command; ++ u32 csr0; ++ ++ if (tulip_debug > 3) ++ printk(KERN_DEBUG "%s: tulip_mwi_config()\n", pci_name(pdev)); ++ ++ tp->csr0 = csr0 = 0; ++ ++ /* if we have any cache line size at all, we can do MRM */ ++ csr0 |= MRM; ++ ++ /* ...and barring hardware bugs, MWI */ ++ if (!(tp->chip_id == DC21143 && tp->revision == 65)) ++ csr0 |= MWI; ++ ++ /* set or disable MWI in the standard PCI command bit. ++ * Check for the case where mwi is desired but not available ++ */ ++ if (csr0 & MWI) pci_set_mwi(pdev); ++ else pci_clear_mwi(pdev); ++ ++ /* read result from hardware (in case bit refused to enable) */ ++ pci_read_config_word(pdev, PCI_COMMAND, &pci_command); ++ if ((csr0 & MWI) && (!(pci_command & PCI_COMMAND_INVALIDATE))) ++ csr0 &= ~MWI; ++ ++ /* if cache line size hardwired to zero, no MWI */ ++ pci_read_config_byte(pdev, PCI_CACHE_LINE_SIZE, &cache); ++ if ((csr0 & MWI) && (cache == 0)) { ++ csr0 &= ~MWI; ++ pci_clear_mwi(pdev); ++ } ++ ++ /* assign per-cacheline-size cache alignment and ++ * burst length values ++ */ ++ switch (cache) { ++ case 8: ++ csr0 |= MRL | (1 << CALShift) | (16 << BurstLenShift); ++ break; ++ case 16: ++ csr0 |= MRL | (2 << CALShift) | (16 << BurstLenShift); ++ break; ++ case 32: ++ csr0 |= MRL | (3 << CALShift) | (32 << BurstLenShift); ++ break; ++ default: ++ cache = 0; ++ break; ++ } ++ ++ /* if we have a good cache line size, we by now have a good ++ * csr0, so save it and exit ++ */ ++ if (cache) ++ goto out; ++ ++ /* we don't have a good csr0 or cache line size, disable MWI */ ++ if (csr0 & MWI) { ++ pci_clear_mwi(pdev); ++ csr0 &= ~MWI; ++ } ++ ++ /* sane defaults for burst length and cache alignment ++ * originally from de4x5 driver ++ */ ++ csr0 |= (8 << BurstLenShift) | (1 << CALShift); ++ ++out: ++ tp->csr0 = csr0; ++ if (tulip_debug > 2) ++ printk(KERN_DEBUG "%s: MWI config cacheline=%d, csr0=%08x\n", ++ pci_name(pdev), cache, csr0); ++} ++#endif ++ ++ ++static int tulip_init_one (struct pci_dev *pdev, ++ const struct pci_device_id *ent) ++{ ++ struct tulip_private *tp; ++ /* See note below on the multiport cards. */ ++ static unsigned char last_phys_addr[6] = {0x00, 'L', 'i', 'n', 'u', 'x'}; ++ static struct pci_device_id early_486_chipsets[] = { ++ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82424) }, ++ { PCI_DEVICE(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_496) }, ++ { }, ++ }; ++#if defined(__i386__) ++ static int last_irq; ++#endif ++ u8 chip_rev; ++ unsigned int i, irq; ++ unsigned short sum; ++ u8 ee_data[EEPROM_SIZE]; ++ /*RTnet*/struct rtnet_device *rtdev; ++ long ioaddr; ++ static int board_idx = -1; ++ int chip_idx = ent->driver_data; ++ unsigned int t2104x_mode = 0; ++ unsigned int eeprom_missing = 0; ++ ++#ifndef MODULE ++ static int did_version; /* Already printed version info. */ ++ if (tulip_debug > 0 && did_version++ == 0) ++ printk(KERN_INFO "%s", version); ++#endif ++ ++ board_idx++; ++ ++ if (cards[board_idx] == 0) ++ return -ENODEV; ++ ++ /* ++ * Lan media wire a tulip chip to a wan interface. Needs a very ++ * different driver (lmc driver) ++ */ ++ ++ if (pdev->subsystem_vendor == PCI_VENDOR_ID_LMC) { ++ printk(KERN_ERR PFX "skipping LMC card.\n"); ++ return -ENODEV; ++ } ++ ++ /* ++ * Early DM9100's need software CRC and the DMFE driver ++ */ ++ ++ if (pdev->vendor == 0x1282 && pdev->device == 0x9100) ++ { ++ u32 dev_rev; ++ /* Read Chip revision */ ++ pci_read_config_dword(pdev, PCI_REVISION_ID, &dev_rev); ++ if(dev_rev < 0x02000030) ++ { ++ printk(KERN_ERR PFX "skipping early DM9100 with Crc bug (use dmfe)\n"); ++ return -ENODEV; ++ } ++ } ++ ++ /* ++ * Looks for early PCI chipsets where people report hangs ++ * without the workarounds being on. ++ */ ++ ++ /* 1. Intel Saturn. Switch to 8 long words burst, 8 long word cache ++ aligned. Aries might need this too. The Saturn errata are not ++ pretty reading but thankfully it's an old 486 chipset. ++ ++ 2. The dreaded SiS496 486 chipset. Same workaround as Intel ++ Saturn. ++ */ ++ ++ if (pci_dev_present(early_486_chipsets)) ++ csr0 = MRL | MRM | (8 << BurstLenShift) | (1 << CALShift); ++ ++ /* bugfix: the ASIX must have a burst limit or horrible things happen. */ ++ if (chip_idx == AX88140) { ++ if ((csr0 & 0x3f00) == 0) ++ csr0 |= 0x2000; ++ } ++ ++ /* PNIC doesn't have MWI/MRL/MRM... */ ++ if (chip_idx == LC82C168) ++ csr0 &= ~0xfff10000; /* zero reserved bits 31:20, 16 */ ++ ++ /* DM9102A has troubles with MRM & clear reserved bits 24:22, 20, 16, 7:1 */ ++ if (pdev->vendor == 0x1282 && pdev->device == 0x9102) ++ csr0 &= ~0x01f100ff; ++ ++#if defined(__sparc__) ++ /* DM9102A needs 32-dword alignment/burst length on sparc - chip bug? */ ++ if (pdev->vendor == 0x1282 && pdev->device == 0x9102) ++ csr0 = (csr0 & ~0xff00) | 0xe000; ++#endif ++ ++ /* ++ * And back to business ++ */ ++ ++ i = pci_enable_device(pdev); ++ if (i) { ++ printk(KERN_ERR PFX ++ "Cannot enable tulip board #%d, aborting\n", ++ board_idx); ++ return i; ++ } ++ ++ ioaddr = pci_resource_start (pdev, 0); ++ irq = pdev->irq; ++ ++ /* alloc_etherdev ensures aligned and zeroed private structures */ ++ rtdev = /*RTnet*/rt_alloc_etherdev (sizeof (*tp), ++ RX_RING_SIZE * 2 + TX_RING_SIZE); ++ if (!rtdev) { ++ printk(KERN_ERR PFX "ether device alloc failed, aborting\n"); ++ return -ENOMEM; ++ } ++ //rtdev_alloc_name(rtdev, "eth%d");//Done by register_rtdev() ++ rt_rtdev_connect(rtdev, &RTDEV_manager); ++ rtdev->vers = RTDEV_VERS_2_0; ++ rtdev->sysbind = &pdev->dev; ++ ++ if (pci_resource_len (pdev, 0) < tulip_tbl[chip_idx].io_size) { ++ printk(KERN_ERR PFX "%s: I/O region (0x%llx@0x%llx) too small, " ++ "aborting\n", pci_name(pdev), ++ (unsigned long long)pci_resource_len (pdev, 0), ++ (unsigned long long)pci_resource_start (pdev, 0)); ++ goto err_out_free_netdev; ++ } ++ ++ /* grab all resources from both PIO and MMIO regions, as we ++ * don't want anyone else messing around with our hardware */ ++ if (pci_request_regions (pdev, "tulip")) ++ goto err_out_free_netdev; ++ ++#ifndef USE_IO_OPS ++ ioaddr = (unsigned long) ioremap (pci_resource_start (pdev, 1), ++ tulip_tbl[chip_idx].io_size); ++ if (!ioaddr) ++ goto err_out_free_res; ++#endif ++ ++ pci_read_config_byte (pdev, PCI_REVISION_ID, &chip_rev); ++ ++ /* ++ * initialize private data structure 'tp' ++ * it is zeroed and aligned in alloc_etherdev ++ */ ++ tp = rtdev->priv; ++ ++ tp->rx_ring = pci_alloc_consistent(pdev, ++ sizeof(struct tulip_rx_desc) * RX_RING_SIZE + ++ sizeof(struct tulip_tx_desc) * TX_RING_SIZE, ++ &tp->rx_ring_dma); ++ if (!tp->rx_ring) ++ goto err_out_mtable; ++ tp->tx_ring = (struct tulip_tx_desc *)(tp->rx_ring + RX_RING_SIZE); ++ tp->tx_ring_dma = tp->rx_ring_dma + sizeof(struct tulip_rx_desc) * RX_RING_SIZE; ++ ++ tp->chip_id = chip_idx; ++ tp->flags = tulip_tbl[chip_idx].flags; ++ tp->pdev = pdev; ++ tp->base_addr = ioaddr; ++ tp->revision = chip_rev; ++ tp->csr0 = csr0; ++ rtdm_lock_init(&tp->lock); ++ spin_lock_init(&tp->mii_lock); ++ ++ rtdev->base_addr = ioaddr; ++ rtdev->irq = irq; ++ ++#ifdef XXX_CONFIG_TULIP_MWI ++ if (!force_csr0 && (tp->flags & HAS_PCI_MWI)) ++ tulip_mwi_config (pdev, rtdev); ++#else ++ /* MWI is broken for DC21143 rev 65... */ ++ if (chip_idx == DC21143 && chip_rev == 65) ++ tp->csr0 &= ~MWI; ++#endif ++ ++ /* Stop the chip's Tx and Rx processes. */ ++ tulip_stop_rxtx(tp); ++ ++ pci_set_master(pdev); ++ ++ /* Clear the missed-packet counter. */ ++ inl(ioaddr + CSR8); ++ ++ if (chip_idx == DC21041) { ++ if (inl(ioaddr + CSR9) & 0x8000) { ++ chip_idx = DC21040; ++ t2104x_mode = 1; ++ } else { ++ t2104x_mode = 2; ++ } ++ } ++ ++ /* The station address ROM is read byte serially. The register must ++ be polled, waiting for the value to be read bit serially from the ++ EEPROM. ++ */ ++ sum = 0; ++ if (chip_idx == DC21040) { ++ outl(0, ioaddr + CSR9); /* Reset the pointer with a dummy write. */ ++ for (i = 0; i < 6; i++) { ++ int value, boguscnt = 100000; ++ do ++ value = inl(ioaddr + CSR9); ++ while (value < 0 && --boguscnt > 0); ++ rtdev->dev_addr[i] = value; ++ sum += value & 0xff; ++ } ++ } else if (chip_idx == LC82C168) { ++ for (i = 0; i < 3; i++) { ++ int value, boguscnt = 100000; ++ outl(0x600 | i, ioaddr + 0x98); ++ do ++ value = inl(ioaddr + CSR9); ++ while (value < 0 && --boguscnt > 0); ++ put_unaligned(le16_to_cpu(value), ((u16*)rtdev->dev_addr) + i); ++ sum += value & 0xffff; ++ } ++ } else if (chip_idx == COMET) { ++ /* No need to read the EEPROM. */ ++ put_unaligned(inl(ioaddr + 0xA4), (u32 *)rtdev->dev_addr); ++ put_unaligned(inl(ioaddr + 0xA8), (u16 *)(rtdev->dev_addr + 4)); ++ for (i = 0; i < 6; i ++) ++ sum += rtdev->dev_addr[i]; ++ } else { ++ /* A serial EEPROM interface, we read now and sort it out later. */ ++ int sa_offset = 0; ++ int ee_addr_size = tulip_read_eeprom(ioaddr, 0xff, 8) & 0x40000 ? 8 : 6; ++ ++ for (i = 0; i < sizeof(ee_data)/2; i++) ++ ((u16 *)ee_data)[i] = ++ le16_to_cpu(tulip_read_eeprom(ioaddr, i, ee_addr_size)); ++ ++ /* DEC now has a specification (see Notes) but early board makers ++ just put the address in the first EEPROM locations. */ ++ /* This does memcmp(eedata, eedata+16, 8) */ ++ for (i = 0; i < 8; i ++) ++ if (ee_data[i] != ee_data[16+i]) ++ sa_offset = 20; ++ if (ee_data[0] == 0xff && ee_data[1] == 0xff && ee_data[2] == 0) ++ sa_offset = 2; /* Grrr, damn Matrox boards. */ ++#ifdef CONFIG_DDB5476 ++ if ((pdev->bus->number == 0) && (PCI_SLOT(pdev->devfn) == 6)) { ++ /* DDB5476 MAC address in first EEPROM locations. */ ++ sa_offset = 0; ++ /* No media table either */ ++ tp->flags &= ~HAS_MEDIA_TABLE; ++ } ++#endif ++#ifdef CONFIG_DDB5477 ++ if ((pdev->bus->number == 0) && (PCI_SLOT(pdev->devfn) == 4)) { ++ /* DDB5477 MAC address in first EEPROM locations. */ ++ sa_offset = 0; ++ /* No media table either */ ++ tp->flags &= ~HAS_MEDIA_TABLE; ++ } ++#endif ++#ifdef CONFIG_MIPS_COBALT ++ if ((pdev->bus->number == 0) && ++ ((PCI_SLOT(pdev->devfn) == 7) || ++ (PCI_SLOT(pdev->devfn) == 12))) { ++ /* Cobalt MAC address in first EEPROM locations. */ ++ sa_offset = 0; ++ /* No media table either */ ++ tp->flags &= ~HAS_MEDIA_TABLE; ++ } ++#endif ++ for (i = 0; i < 6; i ++) { ++ rtdev->dev_addr[i] = ee_data[i + sa_offset]; ++ sum += ee_data[i + sa_offset]; ++ } ++ } ++ /* Lite-On boards have the address byte-swapped. */ ++ if ((rtdev->dev_addr[0] == 0xA0 || rtdev->dev_addr[0] == 0xC0) ++ && rtdev->dev_addr[1] == 0x00) ++ for (i = 0; i < 6; i+=2) { ++ char tmp = rtdev->dev_addr[i]; ++ rtdev->dev_addr[i] = rtdev->dev_addr[i+1]; ++ rtdev->dev_addr[i+1] = tmp; ++ } ++ /* On the Zynx 315 Etherarray and other multiport boards only the ++ first Tulip has an EEPROM. ++ On Sparc systems the mac address is held in the OBP property ++ "local-mac-address". ++ The addresses of the subsequent ports are derived from the first. ++ Many PCI BIOSes also incorrectly report the IRQ line, so we correct ++ that here as well. */ ++ if (sum == 0 || sum == 6*0xff) { ++#if defined(__sparc__) ++ struct pcidev_cookie *pcp = pdev->sysdata; ++#endif ++ eeprom_missing = 1; ++ for (i = 0; i < 5; i++) ++ rtdev->dev_addr[i] = last_phys_addr[i]; ++ rtdev->dev_addr[i] = last_phys_addr[i] + 1; ++#if defined(__sparc__) ++ if ((pcp != NULL) && prom_getproplen(pcp->prom_node, ++ "local-mac-address") == 6) { ++ prom_getproperty(pcp->prom_node, "local-mac-address", ++ rtdev->dev_addr, 6); ++ } ++#endif ++#if defined(__i386__) /* Patch up x86 BIOS bug. */ ++ if (last_irq) ++ irq = last_irq; ++#endif ++ } ++ ++ for (i = 0; i < 6; i++) ++ last_phys_addr[i] = rtdev->dev_addr[i]; ++#if defined(__i386__) ++ last_irq = irq; ++#endif ++ ++ /* The lower four bits are the media type. */ ++ if (board_idx >= 0 && board_idx < MAX_UNITS) { ++ /* Somehow required for this RTnet version, don't ask me why... */ ++ if (!options[board_idx]) ++ tp->default_port = 11; /*MII*/ ++ /*RTnet*/ ++ ++ if (options[board_idx] & MEDIA_MASK) ++ tp->default_port = options[board_idx] & MEDIA_MASK; ++ if ((options[board_idx] & FullDuplex) || full_duplex[board_idx] > 0) ++ tp->full_duplex = 1; ++ if (mtu[board_idx] > 0) ++ rtdev->mtu = mtu[board_idx]; ++ } ++ if (rtdev->mem_start & MEDIA_MASK) ++ tp->default_port = rtdev->mem_start & MEDIA_MASK; ++ if (tp->default_port) { ++ printk(KERN_INFO "tulip%d: Transceiver selection forced to %s.\n", ++ board_idx, medianame[tp->default_port & MEDIA_MASK]); ++ tp->medialock = 1; ++ if (tulip_media_cap[tp->default_port] & MediaAlwaysFD) ++ tp->full_duplex = 1; ++ } ++ if (tp->full_duplex) ++ tp->full_duplex_lock = 1; ++ ++ if (tulip_media_cap[tp->default_port] & MediaIsMII) { ++ u16 media2advert[] = { 0x20, 0x40, 0x03e0, 0x60, 0x80, 0x100, 0x200 }; ++ tp->mii_advertise = media2advert[tp->default_port - 9]; ++ tp->mii_advertise |= (tp->flags & HAS_8023X); /* Matching bits! */ ++ } ++ ++ if (tp->flags & HAS_MEDIA_TABLE) { ++ memcpy(tp->eeprom, ee_data, sizeof(tp->eeprom)); ++ ++ sprintf(rtdev->name, "tulip%d", board_idx); /* hack */ ++ tulip_parse_eeprom(rtdev); ++ strcpy(rtdev->name, "rteth%d"); /* un-hack */ ++ } ++ ++ if ((tp->flags & ALWAYS_CHECK_MII) || ++ (tp->mtable && tp->mtable->has_mii) || ++ ( ! tp->mtable && (tp->flags & HAS_MII))) { ++ if (tp->mtable && tp->mtable->has_mii) { ++ for (i = 0; i < tp->mtable->leafcount; i++) ++ if (tp->mtable->mleaf[i].media == 11) { ++ tp->cur_index = i; ++ tp->saved_if_port = rtdev->if_port; ++ tulip_select_media(rtdev, 2); ++ rtdev->if_port = tp->saved_if_port; ++ break; ++ } ++ } ++ ++ /* Find the connected MII xcvrs. ++ Doing this in open() would allow detecting external xcvrs ++ later, but takes much time. */ ++ tulip_find_mii (rtdev, board_idx); ++ } ++ ++ rtdev->open = tulip_open; ++ rtdev->stop = tulip_close; ++ rtdev->hard_header = rt_eth_header; ++ rtdev->hard_start_xmit = tulip_start_xmit; ++ rtdev->get_stats = tulip_get_stats; ++ ++ if (/*RTnet*/rt_register_rtnetdev(rtdev)) { ++ goto err_out_free_ring; ++ } ++ ++ printk(KERN_INFO "%s: %s rev %d at %#3lx,", ++ rtdev->name, tulip_tbl[chip_idx].chip_name, chip_rev, ioaddr); ++ pci_set_drvdata(pdev, rtdev); ++ ++ if (t2104x_mode == 1) ++ printk(" 21040 compatible mode,"); ++ else if (t2104x_mode == 2) ++ printk(" 21041 mode,"); ++ if (eeprom_missing) ++ printk(" EEPROM not present,"); ++ for (i = 0; i < 6; i++) ++ printk("%c%2.2X", i ? ':' : ' ', rtdev->dev_addr[i]); ++ printk(", IRQ %d.\n", irq); ++ ++/*RTnet ++ if (tp->chip_id == PNIC2) ++ tp->link_change = pnic2_lnk_change; ++ else if ((tp->flags & HAS_NWAY) || tp->chip_id == DC21041) ++ tp->link_change = t21142_lnk_change; ++ else if (tp->flags & HAS_PNICNWAY) ++ tp->link_change = pnic_lnk_change; ++ *RTnet*/ ++ tp->link_change = NULL; ++ ++ /* Reset the xcvr interface and turn on heartbeat. */ ++ switch (chip_idx) { ++ case DC21041: ++ if (tp->sym_advertise == 0) ++ tp->sym_advertise = 0x0061; ++ outl(0x00000000, ioaddr + CSR13); ++ outl(0xFFFFFFFF, ioaddr + CSR14); ++ outl(0x00000008, ioaddr + CSR15); /* Listen on AUI also. */ ++ outl(inl(ioaddr + CSR6) | csr6_fd, ioaddr + CSR6); ++ outl(0x0000EF01, ioaddr + CSR13); ++ break; ++ case DC21040: ++ outl(0x00000000, ioaddr + CSR13); ++ outl(0x00000004, ioaddr + CSR13); ++ break; ++ case DC21140: ++ case DM910X: ++ default: ++ if (tp->mtable) ++ outl(tp->mtable->csr12dir | 0x100, ioaddr + CSR12); ++ break; ++ case DC21142: ++ if (tp->mii_cnt || tulip_media_cap[rtdev->if_port] & MediaIsMII) { ++ outl(csr6_mask_defstate, ioaddr + CSR6); ++ outl(0x0000, ioaddr + CSR13); ++ outl(0x0000, ioaddr + CSR14); ++ outl(csr6_mask_hdcap, ioaddr + CSR6); ++ } else ++ t21142_start_nway(rtdev); ++ break; ++ case PNIC2: ++ /* just do a reset for sanity sake */ ++ outl(0x0000, ioaddr + CSR13); ++ outl(0x0000, ioaddr + CSR14); ++ break; ++ case LC82C168: ++ if ( ! tp->mii_cnt) { ++ tp->nway = 1; ++ tp->nwayset = 0; ++ outl(csr6_ttm | csr6_ca, ioaddr + CSR6); ++ outl(0x30, ioaddr + CSR12); ++ outl(0x0001F078, ioaddr + CSR6); ++ outl(0x0201F078, ioaddr + CSR6); /* Turn on autonegotiation. */ ++ } ++ break; ++ case MX98713: ++ case COMPEX9881: ++ outl(0x00000000, ioaddr + CSR6); ++ outl(0x000711C0, ioaddr + CSR14); /* Turn on NWay. */ ++ outl(0x00000001, ioaddr + CSR13); ++ break; ++ case MX98715: ++ case MX98725: ++ outl(0x01a80000, ioaddr + CSR6); ++ outl(0xFFFFFFFF, ioaddr + CSR14); ++ outl(0x00001000, ioaddr + CSR12); ++ break; ++ case COMET: ++ /* No initialization necessary. */ ++ break; ++ } ++ ++ /* put the chip in snooze mode until opened */ ++ tulip_set_power_state (tp, 0, 1); ++ ++ return 0; ++ ++err_out_free_ring: ++ pci_free_consistent (pdev, ++ sizeof (struct tulip_rx_desc) * RX_RING_SIZE + ++ sizeof (struct tulip_tx_desc) * TX_RING_SIZE, ++ tp->rx_ring, tp->rx_ring_dma); ++ ++err_out_mtable: ++ if (tp->mtable) ++ kfree (tp->mtable); ++#ifndef USE_IO_OPS ++ iounmap((void *)ioaddr); ++ ++err_out_free_res: ++#endif ++ pci_release_regions (pdev); ++ ++err_out_free_netdev: ++ /*RTnet*/rtdev_free (rtdev); ++ return -ENODEV; ++} ++ ++ ++static void tulip_remove_one (struct pci_dev *pdev) ++{ ++ struct rtnet_device *rtdev = (struct rtnet_device *) pci_get_drvdata (pdev); ++ struct tulip_private *tp; ++ ++ if (!rtdev || !rtdev->priv) ++ return; ++ ++ tp = rtdev->priv; ++ pci_free_consistent (pdev, ++ sizeof (struct tulip_rx_desc) * RX_RING_SIZE + ++ sizeof (struct tulip_tx_desc) * TX_RING_SIZE, ++ tp->rx_ring, tp->rx_ring_dma); ++ rt_unregister_rtnetdev (rtdev); ++ if (tp->mtable) ++ kfree (tp->mtable); ++#ifndef USE_IO_OPS ++ iounmap((void *)rtdev->base_addr); ++#endif ++ /*RTnet*/ ++ rt_rtdev_disconnect(rtdev); ++ rtdev_free (rtdev); ++ /*RTnet*/ ++ pci_release_regions (pdev); ++ pci_set_drvdata (pdev, NULL); ++ ++ /* pci_power_off (pdev, -1); */ ++} ++ ++ ++static struct pci_driver tulip_driver = { ++ name: DRV_NAME, ++ id_table: tulip_pci_tbl, ++ probe: tulip_init_one, ++ remove: tulip_remove_one, ++}; ++ ++ ++static int __init tulip_init (void) ++{ ++#ifdef MODULE ++ printk(KERN_INFO "%s", version); ++#endif ++ ++ /* copy module parms into globals */ ++ tulip_rx_copybreak = rx_copybreak; ++ tulip_max_interrupt_work = max_interrupt_work; ++ ++ /* probe for and init boards */ ++ return pci_register_driver (&tulip_driver); ++} ++ ++ ++static void __exit tulip_cleanup (void) ++{ ++ pci_unregister_driver (&tulip_driver); ++} ++ ++ ++module_init(tulip_init); ++module_exit(tulip_cleanup); +--- linux/drivers/xenomai/net/drivers/tulip/Makefile 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/tulip/Makefile 2021-04-07 16:01:27.329634020 +0800 +@@ -0,0 +1,12 @@ ++ccflags-y += -Idrivers/xenomai/net/stack/include ++ ++obj-$(CONFIG_XENO_DRIVERS_NET_DRV_TULIP) += rt_tulip.o ++ ++rt_tulip-y := \ ++ tulip_core.o \ ++ eeprom.o \ ++ interrupt.o \ ++ media.o \ ++ 21142.o \ ++ pnic.o \ ++ pnic2.o +--- linux/drivers/xenomai/net/drivers/tulip/21142.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/tulip/21142.c 2021-04-07 16:01:27.324634028 +0800 +@@ -0,0 +1,51 @@ ++/* ++ drivers/net/tulip/21142.c ++ ++ Maintained by Jeff Garzik ++ Copyright 2000,2001 The Linux Kernel Team ++ Written/copyright 1994-2001 by Donald Becker. ++ ++ This software may be used and distributed according to the terms ++ of the GNU General Public License, incorporated herein by reference. ++ ++ Please refer to Documentation/DocBook/tulip.{pdf,ps,html} ++ for more information on this driver, or visit the project ++ Web page at http://sourceforge.net/projects/tulip/ ++ ++*/ ++/* Ported to RTnet by Wittawat Yamwong */ ++ ++#include "tulip.h" ++#include ++#include ++ ++u16 t21142_csr14[] = { 0xFFFF, 0x0705, 0x0705, 0x0000, 0x7F3D, }; ++ ++ ++void t21142_start_nway(/*RTnet*/struct rtnet_device *rtdev) ++{ ++ struct tulip_private *tp = (struct tulip_private *)rtdev->priv; ++ long ioaddr = rtdev->base_addr; ++ int csr14 = ((tp->sym_advertise & 0x0780) << 9) | ++ ((tp->sym_advertise & 0x0020) << 1) | 0xffbf; ++ ++ rtdev->if_port = 0; ++ tp->nway = tp->mediasense = 1; ++ tp->nwayset = tp->lpar = 0; ++ if (tulip_debug > 1) ++ printk(KERN_DEBUG "%s: Restarting 21143 autonegotiation, csr14=%8.8x.\n", ++ rtdev->name, csr14); ++ outl(0x0001, ioaddr + CSR13); ++ udelay(100); ++ outl(csr14, ioaddr + CSR14); ++ tp->csr6 = 0x82420000 | (tp->sym_advertise & 0x0040 ? FullDuplex : 0); ++ outl(tp->csr6, ioaddr + CSR6); ++ if (tp->mtable && tp->mtable->csr15dir) { ++ outl(tp->mtable->csr15dir, ioaddr + CSR15); ++ outl(tp->mtable->csr15val, ioaddr + CSR15); ++ } else ++ outw(0x0008, ioaddr + CSR15); ++ outl(0x1301, ioaddr + CSR12); /* Trigger NWAY. */ ++} ++ ++ +--- linux/drivers/xenomai/net/drivers/tulip/interrupt.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/tulip/interrupt.c 2021-04-07 16:01:27.319634035 +0800 +@@ -0,0 +1,391 @@ ++/* ++ drivers/net/tulip/interrupt.c ++ ++ Maintained by Jeff Garzik ++ Copyright 2000,2001 The Linux Kernel Team ++ Written/copyright 1994-2001 by Donald Becker. ++ ++ This software may be used and distributed according to the terms ++ of the GNU General Public License, incorporated herein by reference. ++ ++ Please refer to Documentation/DocBook/tulip.{pdf,ps,html} ++ for more information on this driver, or visit the project ++ Web page at http://sourceforge.net/projects/tulip/ ++ ++*/ ++/* Ported to RTnet by Wittawat Yamwong */ ++ ++#include "tulip.h" ++#include ++#include ++ ++ ++int tulip_rx_copybreak; ++unsigned int tulip_max_interrupt_work; ++ ++#ifdef CONFIG_NET_HW_FLOWCONTROL ++ ++#define MIT_SIZE 15 ++unsigned int mit_table[MIT_SIZE+1] = ++{ ++ /* CRS11 21143 hardware Mitigation Control Interrupt ++ We use only RX mitigation we other techniques for ++ TX intr. mitigation. ++ ++ 31 Cycle Size (timer control) ++ 30:27 TX timer in 16 * Cycle size ++ 26:24 TX No pkts before Int. ++ 23:20 RX timer in Cycle size ++ 19:17 RX No pkts before Int. ++ 16 Continues Mode (CM) ++ */ ++ ++ 0x0, /* IM disabled */ ++ 0x80150000, /* RX time = 1, RX pkts = 2, CM = 1 */ ++ 0x80150000, ++ 0x80270000, ++ 0x80370000, ++ 0x80490000, ++ 0x80590000, ++ 0x80690000, ++ 0x807B0000, ++ 0x808B0000, ++ 0x809D0000, ++ 0x80AD0000, ++ 0x80BD0000, ++ 0x80CF0000, ++ 0x80DF0000, ++// 0x80FF0000 /* RX time = 16, RX pkts = 7, CM = 1 */ ++ 0x80F10000 /* RX time = 16, RX pkts = 0, CM = 1 */ ++}; ++#endif ++ ++ ++int tulip_refill_rx(/*RTnet*/struct rtnet_device *rtdev) ++{ ++ struct tulip_private *tp = (struct tulip_private *)rtdev->priv; ++ int entry; ++ int refilled = 0; ++ ++ /* Refill the Rx ring buffers. */ ++ for (; tp->cur_rx - tp->dirty_rx > 0; tp->dirty_rx++) { ++ entry = tp->dirty_rx % RX_RING_SIZE; ++ if (tp->rx_buffers[entry].skb == NULL) { ++ struct /*RTnet*/rtskb *skb; ++ dma_addr_t mapping; ++ ++ skb = tp->rx_buffers[entry].skb = /*RTnet*/rtnetdev_alloc_rtskb(rtdev, PKT_BUF_SZ); ++ if (skb == NULL) ++ break; ++ ++ mapping = pci_map_single(tp->pdev, skb->tail, PKT_BUF_SZ, ++ PCI_DMA_FROMDEVICE); ++ tp->rx_buffers[entry].mapping = mapping; ++ ++ tp->rx_ring[entry].buffer1 = cpu_to_le32(mapping); ++ refilled++; ++ } ++ tp->rx_ring[entry].status = cpu_to_le32(DescOwned); ++ } ++ if(tp->chip_id == LC82C168) { ++ if(((inl(rtdev->base_addr + CSR5)>>17)&0x07) == 4) { ++ /* Rx stopped due to out of buffers, ++ * restart it ++ */ ++ outl(0x01, rtdev->base_addr + CSR2); ++ } ++ } ++ return refilled; ++} ++ ++ ++static int tulip_rx(/*RTnet*/struct rtnet_device *rtdev, nanosecs_abs_t *time_stamp) ++{ ++ struct tulip_private *tp = (struct tulip_private *)rtdev->priv; ++ int entry = tp->cur_rx % RX_RING_SIZE; ++ int rx_work_limit = tp->dirty_rx + RX_RING_SIZE - tp->cur_rx; ++ int received = 0; ++ ++ if (tulip_debug > 4) ++ /*RTnet*/rtdm_printk(KERN_DEBUG " In tulip_rx(), entry %d %8.8x.\n", entry, ++ tp->rx_ring[entry].status); ++ /* If we own the next entry, it is a new packet. Send it up. */ ++ while ( ! (tp->rx_ring[entry].status & cpu_to_le32(DescOwned))) { ++ s32 status = le32_to_cpu(tp->rx_ring[entry].status); ++ ++ if (tulip_debug > 5) ++ /*RTnet*/rtdm_printk(KERN_DEBUG "%s: In tulip_rx(), entry %d %8.8x.\n", ++ rtdev->name, entry, status); ++ if (--rx_work_limit < 0) ++ break; ++ if ((status & 0x38008300) != 0x0300) { ++ if ((status & 0x38000300) != 0x0300) { ++ /* Ingore earlier buffers. */ ++ if ((status & 0xffff) != 0x7fff) { ++ if (tulip_debug > 1) ++ /*RTnet*/rtdm_printk(KERN_WARNING "%s: Oversized Ethernet frame " ++ "spanned multiple buffers, status %8.8x!\n", ++ rtdev->name, status); ++ tp->stats.rx_length_errors++; ++ } ++ } else if (status & RxDescFatalErr) { ++ /* There was a fatal error. */ ++ if (tulip_debug > 2) ++ /*RTnet*/rtdm_printk(KERN_DEBUG "%s: Receive error, Rx status %8.8x.\n", ++ rtdev->name, status); ++ tp->stats.rx_errors++; /* end of a packet.*/ ++ if (status & 0x0890) tp->stats.rx_length_errors++; ++ if (status & 0x0004) tp->stats.rx_frame_errors++; ++ if (status & 0x0002) tp->stats.rx_crc_errors++; ++ if (status & 0x0001) tp->stats.rx_fifo_errors++; ++ } ++ } else { ++ /* Omit the four octet CRC from the length. */ ++ short pkt_len = ((status >> 16) & 0x7ff) - 4; ++ struct /*RTnet*/rtskb *skb; ++ ++#ifndef final_version ++ if (pkt_len > 1518) { ++ /*RTnet*/rtdm_printk(KERN_WARNING "%s: Bogus packet size of %d (%#x).\n", ++ rtdev->name, pkt_len, pkt_len); ++ pkt_len = 1518; ++ tp->stats.rx_length_errors++; ++ } ++#endif ++ ++ { ++ unsigned char *temp = /*RTnet*/rtskb_put(skb = tp->rx_buffers[entry].skb, pkt_len); ++ ++#ifndef final_version ++ if (tp->rx_buffers[entry].mapping != ++ le32_to_cpu(tp->rx_ring[entry].buffer1)) { ++ /*RTnet*/rtdm_printk(KERN_ERR "%s: Internal fault: The skbuff addresses " ++ "do not match in tulip_rx: %08x vs. %08llx ? / %p.\n", ++ rtdev->name, ++ le32_to_cpu(tp->rx_ring[entry].buffer1), ++ (unsigned long long)tp->rx_buffers[entry].mapping, ++ temp);/*RTnet*/ ++ } ++#endif ++ ++ pci_unmap_single(tp->pdev, tp->rx_buffers[entry].mapping, ++ PKT_BUF_SZ, PCI_DMA_FROMDEVICE); ++ ++ tp->rx_buffers[entry].skb = NULL; ++ tp->rx_buffers[entry].mapping = 0; ++ } ++ skb->protocol = /*RTnet*/rt_eth_type_trans(skb, rtdev); ++ skb->time_stamp = *time_stamp; ++ /*RTnet*/rtnetif_rx(skb); ++ ++ tp->stats.rx_packets++; ++ tp->stats.rx_bytes += pkt_len; ++ } ++ received++; ++ entry = (++tp->cur_rx) % RX_RING_SIZE; ++ } ++ return received; ++} ++ ++/* The interrupt handler does all of the Rx thread work and cleans up ++ after the Tx thread. */ ++int tulip_interrupt(rtdm_irq_t *irq_handle) ++{ ++ nanosecs_abs_t time_stamp = rtdm_clock_read();/*RTnet*/ ++ struct rtnet_device *rtdev = ++ rtdm_irq_get_arg(irq_handle, struct rtnet_device);/*RTnet*/ ++ struct tulip_private *tp = (struct tulip_private *)rtdev->priv; ++ long ioaddr = rtdev->base_addr; ++ unsigned int csr5; ++ int entry; ++ int missed; ++ int rx = 0; ++ int tx = 0; ++ int oi = 0; ++ int maxrx = RX_RING_SIZE; ++ int maxtx = TX_RING_SIZE; ++ int maxoi = TX_RING_SIZE; ++ unsigned int work_count = tulip_max_interrupt_work; ++ ++ /* Let's see whether the interrupt really is for us */ ++ csr5 = inl(ioaddr + CSR5); ++ ++ if ((csr5 & (NormalIntr|AbnormalIntr)) == 0) { ++ rtdm_printk("%s: unexpected IRQ!\n",rtdev->name); ++ return RTDM_IRQ_NONE; ++ } ++ ++ tp->nir++; ++ ++ do { ++ /* Acknowledge all of the current interrupt sources ASAP. */ ++ outl(csr5 & 0x0001ffff, ioaddr + CSR5); ++ ++ if (tulip_debug > 4) ++ /*RTnet*/rtdm_printk(KERN_DEBUG "%s: interrupt csr5=%#8.8x new csr5=%#8.8x.\n", ++ rtdev->name, csr5, inl(rtdev->base_addr + CSR5)); ++ ++ if (csr5 & (RxIntr | RxNoBuf)) { ++ rx += tulip_rx(rtdev, &time_stamp); ++ tulip_refill_rx(rtdev); ++ } ++ ++ if (csr5 & (TxNoBuf | TxDied | TxIntr | TimerInt)) { ++ unsigned int dirty_tx; ++ ++ rtdm_lock_get(&tp->lock); ++ ++ for (dirty_tx = tp->dirty_tx; tp->cur_tx - dirty_tx > 0; ++ dirty_tx++) { ++ int entry = dirty_tx % TX_RING_SIZE; ++ int status = le32_to_cpu(tp->tx_ring[entry].status); ++ ++ if (status < 0) ++ break; /* It still has not been Txed */ ++ ++ /* Check for Rx filter setup frames. */ ++ if (tp->tx_buffers[entry].skb == NULL) { ++ /* test because dummy frames not mapped */ ++ if (tp->tx_buffers[entry].mapping) ++ pci_unmap_single(tp->pdev, ++ tp->tx_buffers[entry].mapping, ++ sizeof(tp->setup_frame), ++ PCI_DMA_TODEVICE); ++ continue; ++ } ++ ++ if (status & 0x8000) { ++ /* There was an major error, log it. */ ++#ifndef final_version ++ if (tulip_debug > 1) ++ /*RTnet*/rtdm_printk(KERN_DEBUG "%s: Transmit error, Tx status %8.8x.\n", ++ rtdev->name, status); ++#endif ++ tp->stats.tx_errors++; ++ if (status & 0x4104) tp->stats.tx_aborted_errors++; ++ if (status & 0x0C00) tp->stats.tx_carrier_errors++; ++ if (status & 0x0200) tp->stats.tx_window_errors++; ++ if (status & 0x0002) tp->stats.tx_fifo_errors++; ++ if ((status & 0x0080) && tp->full_duplex == 0) ++ tp->stats.tx_heartbeat_errors++; ++ } else { ++ tp->stats.tx_bytes += ++ tp->tx_buffers[entry].skb->len; ++ tp->stats.collisions += (status >> 3) & 15; ++ tp->stats.tx_packets++; ++ } ++ ++ pci_unmap_single(tp->pdev, tp->tx_buffers[entry].mapping, ++ tp->tx_buffers[entry].skb->len, ++ PCI_DMA_TODEVICE); ++ ++ /* Free the original skb. */ ++ /*RTnet*/dev_kfree_rtskb(tp->tx_buffers[entry].skb); ++ tp->tx_buffers[entry].skb = NULL; ++ tp->tx_buffers[entry].mapping = 0; ++ tx++; ++ rtnetif_tx(rtdev); ++ } ++ ++#ifndef final_version ++ if (tp->cur_tx - dirty_tx > TX_RING_SIZE) { ++ /*RTnet*/rtdm_printk(KERN_ERR "%s: Out-of-sync dirty pointer, %d vs. %d.\n", ++ rtdev->name, dirty_tx, tp->cur_tx); ++ dirty_tx += TX_RING_SIZE; ++ } ++#endif ++ ++ if (tp->cur_tx - dirty_tx < TX_RING_SIZE - 2) ++ /*RTnet*/rtnetif_wake_queue(rtdev); ++ ++ tp->dirty_tx = dirty_tx; ++ if (csr5 & TxDied) { ++ if (tulip_debug > 2) ++ /*RTnet*/rtdm_printk(KERN_WARNING "%s: The transmitter stopped." ++ " CSR5 is %x, CSR6 %x, new CSR6 %x.\n", ++ rtdev->name, csr5, inl(ioaddr + CSR6), tp->csr6); ++ tulip_restart_rxtx(tp); ++ } ++ rtdm_lock_put(&tp->lock); ++ } ++ ++ /* Log errors. */ ++ if (csr5 & AbnormalIntr) { /* Abnormal error summary bit. */ ++ if (csr5 == 0xffffffff) ++ break; ++ /*RTnet*/rtdm_printk(KERN_ERR "%s: Error detected, " ++ "device may not work any more (csr5=%08x)!\n", rtdev->name, csr5); ++ /* Clear all error sources, included undocumented ones! */ ++ outl(0x0800f7ba, ioaddr + CSR5); ++ oi++; ++ } ++ if (csr5 & TimerInt) { ++ ++ if (tulip_debug > 2) ++ /*RTnet*/rtdm_printk(KERN_ERR "%s: Re-enabling interrupts, %8.8x.\n", ++ rtdev->name, csr5); ++ outl(tulip_tbl[tp->chip_id].valid_intrs, ioaddr + CSR7); ++ tp->ttimer = 0; ++ oi++; ++ } ++ if (tx > maxtx || rx > maxrx || oi > maxoi) { ++ if (tulip_debug > 1) ++ /*RTnet*/rtdm_printk(KERN_WARNING "%s: Too much work during an interrupt, " ++ "csr5=0x%8.8x. (%lu) (%d,%d,%d)\n", rtdev->name, csr5, tp->nir, tx, rx, oi); ++ ++ /* Acknowledge all interrupt sources. */ ++ outl(0x8001ffff, ioaddr + CSR5); ++ if (tp->flags & HAS_INTR_MITIGATION) { ++ /* Josip Loncaric at ICASE did extensive experimentation ++ to develop a good interrupt mitigation setting.*/ ++ outl(0x8b240000, ioaddr + CSR11); ++ } else if (tp->chip_id == LC82C168) { ++ /* the LC82C168 doesn't have a hw timer.*/ ++ outl(0x00, ioaddr + CSR7); ++ } else { ++ /* Mask all interrupting sources, set timer to ++ re-enable. */ ++ } ++ break; ++ } ++ ++ work_count--; ++ if (work_count == 0) ++ break; ++ ++ csr5 = inl(ioaddr + CSR5); ++ } while ((csr5 & (NormalIntr|AbnormalIntr)) != 0); ++ ++ tulip_refill_rx(rtdev); ++ ++ /* check if the card is in suspend mode */ ++ entry = tp->dirty_rx % RX_RING_SIZE; ++ if (tp->rx_buffers[entry].skb == NULL) { ++ if (tulip_debug > 1) ++ /*RTnet*/rtdm_printk(KERN_WARNING "%s: in rx suspend mode: (%lu) (tp->cur_rx = %u, ttimer = %d, rx = %d) go/stay in suspend mode\n", rtdev->name, tp->nir, tp->cur_rx, tp->ttimer, rx); ++ if (tp->chip_id == LC82C168) ++ outl(0x00, ioaddr + CSR7); ++ else { ++ if (tp->ttimer == 0 || (inl(ioaddr + CSR11) & 0xffff) == 0) { ++ if (tulip_debug > 1) ++ /*RTnet*/rtdm_printk(KERN_WARNING "%s: in rx suspend mode: (%lu) set timer\n", rtdev->name, tp->nir); ++ outl(tulip_tbl[tp->chip_id].valid_intrs | TimerInt, ++ ioaddr + CSR7); ++ outl(TimerInt, ioaddr + CSR5); ++ outl(12, ioaddr + CSR11); ++ tp->ttimer = 1; ++ } ++ } ++ } ++ ++ if ((missed = inl(ioaddr + CSR8) & 0x1ffff)) { ++ tp->stats.rx_dropped += missed & 0x10000 ? 0x10000 : missed; ++ } ++ ++ if (tulip_debug > 4) ++ /*RTnet*/rtdm_printk(KERN_DEBUG "%s: exiting interrupt, csr5=%#4.4x.\n", ++ rtdev->name, inl(ioaddr + CSR5)); ++ if (rx) ++ rt_mark_stack_mgr(rtdev); ++ return RTDM_IRQ_HANDLED; ++} +--- linux/drivers/xenomai/net/drivers/tulip/tulip.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/tulip/tulip.h 2021-04-07 16:01:27.314634042 +0800 +@@ -0,0 +1,490 @@ ++/* ++ drivers/net/tulip/tulip.h ++ ++ Copyright 2000,2001 The Linux Kernel Team ++ Written/copyright 1994-2001 by Donald Becker. ++ ++ This software may be used and distributed according to the terms ++ of the GNU General Public License, incorporated herein by reference. ++ ++ Please refer to Documentation/DocBook/tulip.{pdf,ps,html} ++ for more information on this driver, or visit the project ++ Web page at http://sourceforge.net/projects/tulip/ ++ ++*/ ++/* Ported to RTnet by Wittawat Yamwong */ ++ ++#ifndef __NET_TULIP_H__ ++#define __NET_TULIP_H__ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++ ++ ++ ++/* undefine, or define to various debugging levels (>4 == obscene levels) */ ++#define TULIP_DEBUG 1 ++ ++/* undefine USE_IO_OPS for MMIO, define for PIO */ ++#ifdef CONFIG_TULIP_MMIO ++# undef USE_IO_OPS ++#else ++# define USE_IO_OPS 1 ++#endif ++ ++ ++ ++struct tulip_chip_table { ++ char *chip_name; ++ unsigned int io_size; ++ int valid_intrs; /* CSR7 interrupt enable settings */ ++ int flags; ++}; ++ ++ ++enum tbl_flag { ++ HAS_MII = 0x0001, ++ HAS_MEDIA_TABLE = 0x0002, ++ CSR12_IN_SROM = 0x0004, ++ ALWAYS_CHECK_MII = 0x0008, ++ HAS_ACPI = 0x0010, ++ MC_HASH_ONLY = 0x0020, /* Hash-only multicast filter. */ ++ HAS_PNICNWAY = 0x0080, ++ HAS_NWAY = 0x0040, /* Uses internal NWay xcvr. */ ++ HAS_INTR_MITIGATION = 0x0100, ++ IS_ASIX = 0x0200, ++ HAS_8023X = 0x0400, ++ COMET_MAC_ADDR = 0x0800, ++ HAS_PCI_MWI = 0x1000, ++}; ++ ++ ++/* chip types. careful! order is VERY IMPORTANT here, as these ++ * are used throughout the driver as indices into arrays */ ++/* Note 21142 == 21143. */ ++enum chips { ++ DC21040 = 0, ++ DC21041 = 1, ++ DC21140 = 2, ++ DC21142 = 3, DC21143 = 3, ++ LC82C168, ++ MX98713, ++ MX98715, ++ MX98725, ++ AX88140, ++ PNIC2, ++ COMET, ++ COMPEX9881, ++ I21145, ++ DM910X, ++}; ++ ++ ++enum MediaIs { ++ MediaIsFD = 1, ++ MediaAlwaysFD = 2, ++ MediaIsMII = 4, ++ MediaIsFx = 8, ++ MediaIs100 = 16 ++}; ++ ++ ++/* Offsets to the Command and Status Registers, "CSRs". All accesses ++ must be longword instructions and quadword aligned. */ ++enum tulip_offsets { ++ CSR0 = 0, ++ CSR1 = 0x08, ++ CSR2 = 0x10, ++ CSR3 = 0x18, ++ CSR4 = 0x20, ++ CSR5 = 0x28, ++ CSR6 = 0x30, ++ CSR7 = 0x38, ++ CSR8 = 0x40, ++ CSR9 = 0x48, ++ CSR10 = 0x50, ++ CSR11 = 0x58, ++ CSR12 = 0x60, ++ CSR13 = 0x68, ++ CSR14 = 0x70, ++ CSR15 = 0x78, ++}; ++ ++/* register offset and bits for CFDD PCI config reg */ ++enum pci_cfg_driver_reg { ++ CFDD = 0x40, ++ CFDD_Sleep = (1 << 31), ++ CFDD_Snooze = (1 << 30), ++}; ++ ++ ++/* The bits in the CSR5 status registers, mostly interrupt sources. */ ++enum status_bits { ++ TimerInt = 0x800, ++ SytemError = 0x2000, ++ TPLnkFail = 0x1000, ++ TPLnkPass = 0x10, ++ NormalIntr = 0x10000, ++ AbnormalIntr = 0x8000, ++ RxJabber = 0x200, ++ RxDied = 0x100, ++ RxNoBuf = 0x80, ++ RxIntr = 0x40, ++ TxFIFOUnderflow = 0x20, ++ TxJabber = 0x08, ++ TxNoBuf = 0x04, ++ TxDied = 0x02, ++ TxIntr = 0x01, ++}; ++ ++ ++enum tulip_mode_bits { ++ TxThreshold = (1 << 22), ++ FullDuplex = (1 << 9), ++ TxOn = 0x2000, ++ AcceptBroadcast = 0x0100, ++ AcceptAllMulticast = 0x0080, ++ AcceptAllPhys = 0x0040, ++ AcceptRunt = 0x0008, ++ RxOn = 0x0002, ++ RxTx = (TxOn | RxOn), ++}; ++ ++ ++enum tulip_busconfig_bits { ++ MWI = (1 << 24), ++ MRL = (1 << 23), ++ MRM = (1 << 21), ++ CALShift = 14, ++ BurstLenShift = 8, ++}; ++ ++ ++/* The Tulip Rx and Tx buffer descriptors. */ ++struct tulip_rx_desc { ++ s32 status; ++ s32 length; ++ u32 buffer1; ++ u32 buffer2; ++}; ++ ++ ++struct tulip_tx_desc { ++ s32 status; ++ s32 length; ++ u32 buffer1; ++ u32 buffer2; /* We use only buffer 1. */ ++}; ++ ++ ++enum desc_status_bits { ++ DescOwned = 0x80000000, ++ RxDescFatalErr = 0x8000, ++ RxWholePkt = 0x0300, ++}; ++ ++ ++enum t21041_csr13_bits { ++ csr13_eng = (0xEF0<<4), /* for eng. purposes only, hardcode at EF0h */ ++ csr13_aui = (1<<3), /* clear to force 10bT, set to force AUI/BNC */ ++ csr13_cac = (1<<2), /* CSR13/14/15 autoconfiguration */ ++ csr13_srl = (1<<0), /* When reset, resets all SIA functions, machines */ ++ ++ csr13_mask_auibnc = (csr13_eng | csr13_aui | csr13_srl), ++ csr13_mask_10bt = (csr13_eng | csr13_srl), ++}; ++ ++enum t21143_csr6_bits { ++ csr6_sc = (1<<31), ++ csr6_ra = (1<<30), ++ csr6_ign_dest_msb = (1<<26), ++ csr6_mbo = (1<<25), ++ csr6_scr = (1<<24), /* scramble mode flag: can't be set */ ++ csr6_pcs = (1<<23), /* Enables PCS functions (symbol mode requires csr6_ps be set) default is set */ ++ csr6_ttm = (1<<22), /* Transmit Threshold Mode, set for 10baseT, 0 for 100BaseTX */ ++ csr6_sf = (1<<21), /* Store and forward. If set ignores TR bits */ ++ csr6_hbd = (1<<19), /* Heart beat disable. Disables SQE function in 10baseT */ ++ csr6_ps = (1<<18), /* Port Select. 0 (defualt) = 10baseT, 1 = 100baseTX: can't be set */ ++ csr6_ca = (1<<17), /* Collision Offset Enable. If set uses special algorithm in low collision situations */ ++ csr6_trh = (1<<15), /* Transmit Threshold high bit */ ++ csr6_trl = (1<<14), /* Transmit Threshold low bit */ ++ ++ /*************************************************************** ++ * This table shows transmit threshold values based on media * ++ * and these two registers (from PNIC1 & 2 docs) Note: this is * ++ * all meaningless if sf is set. * ++ ***************************************************************/ ++ ++ /*********************************** ++ * (trh,trl) * 100BaseTX * 10BaseT * ++ *********************************** ++ * (0,0) * 128 * 72 * ++ * (0,1) * 256 * 96 * ++ * (1,0) * 512 * 128 * ++ * (1,1) * 1024 * 160 * ++ ***********************************/ ++ ++ csr6_fc = (1<<12), /* Forces a collision in next transmission (for testing in loopback mode) */ ++ csr6_om_int_loop = (1<<10), /* internal (FIFO) loopback flag */ ++ csr6_om_ext_loop = (1<<11), /* external (PMD) loopback flag */ ++ /* set both and you get (PHY) loopback */ ++ csr6_fd = (1<<9), /* Full duplex mode, disables hearbeat, no loopback */ ++ csr6_pm = (1<<7), /* Pass All Multicast */ ++ csr6_pr = (1<<6), /* Promiscuous mode */ ++ csr6_sb = (1<<5), /* Start(1)/Stop(0) backoff counter */ ++ csr6_if = (1<<4), /* Inverse Filtering, rejects only addresses in address table: can't be set */ ++ csr6_pb = (1<<3), /* Pass Bad Frames, (1) causes even bad frames to be passed on */ ++ csr6_ho = (1<<2), /* Hash-only filtering mode: can't be set */ ++ csr6_hp = (1<<0), /* Hash/Perfect Receive Filtering Mode: can't be set */ ++ ++ csr6_mask_capture = (csr6_sc | csr6_ca), ++ csr6_mask_defstate = (csr6_mask_capture | csr6_mbo), ++ csr6_mask_hdcap = (csr6_mask_defstate | csr6_hbd | csr6_ps), ++ csr6_mask_hdcaptt = (csr6_mask_hdcap | csr6_trh | csr6_trl), ++ csr6_mask_fullcap = (csr6_mask_hdcaptt | csr6_fd), ++ csr6_mask_fullpromisc = (csr6_pr | csr6_pm), ++ csr6_mask_filters = (csr6_hp | csr6_ho | csr6_if), ++ csr6_mask_100bt = (csr6_scr | csr6_pcs | csr6_hbd), ++}; ++ ++ ++/* Keep the ring sizes a power of two for efficiency. ++ Making the Tx ring too large decreases the effectiveness of channel ++ bonding and packet priority. ++ There are no ill effects from too-large receive rings. */ ++#define TX_RING_SIZE 16 ++#define RX_RING_SIZE 8 /* RTnet: RX_RING_SIZE*2 rtskbs will be preallocated */ ++ ++#define MEDIA_MASK 31 ++ ++#define PKT_BUF_SZ 1536 /* Size of each temporary Rx buffer. */ ++ ++#define TULIP_MIN_CACHE_LINE 8 /* in units of 32-bit words */ ++ ++#if defined(__sparc__) || defined(__hppa__) ++/* The UltraSparc PCI controllers will disconnect at every 64-byte ++ * crossing anyways so it makes no sense to tell Tulip to burst ++ * any more than that. ++ */ ++#define TULIP_MAX_CACHE_LINE 16 /* in units of 32-bit words */ ++#else ++#define TULIP_MAX_CACHE_LINE 32 /* in units of 32-bit words */ ++#endif ++ ++ ++/* Ring-wrap flag in length field, use for last ring entry. ++ 0x01000000 means chain on buffer2 address, ++ 0x02000000 means use the ring start address in CSR2/3. ++ Note: Some work-alike chips do not function correctly in chained mode. ++ The ASIX chip works only in chained mode. ++ Thus we indicates ring mode, but always write the 'next' field for ++ chained mode as well. ++*/ ++#define DESC_RING_WRAP 0x02000000 ++ ++ ++#define EEPROM_SIZE 128 /* 2 << EEPROM_ADDRLEN */ ++ ++ ++#define RUN_AT(x) (jiffies + (x)) ++ ++#if defined(__i386__) /* AKA get_unaligned() */ ++#define get_u16(ptr) (*(u16 *)(ptr)) ++#else ++#define get_u16(ptr) (((u8*)(ptr))[0] + (((u8*)(ptr))[1]<<8)) ++#endif ++ ++struct medialeaf { ++ u8 type; ++ u8 media; ++ unsigned char *leafdata; ++}; ++ ++ ++struct mediatable { ++ u16 defaultmedia; ++ u8 leafcount; ++ u8 csr12dir; /* General purpose pin directions. */ ++ unsigned has_mii:1; ++ unsigned has_nonmii:1; ++ unsigned has_reset:6; ++ u32 csr15dir; ++ u32 csr15val; /* 21143 NWay setting. */ ++ struct medialeaf mleaf[0]; ++}; ++ ++ ++struct mediainfo { ++ struct mediainfo *next; ++ int info_type; ++ int index; ++ unsigned char *info; ++}; ++ ++struct ring_info { ++ struct /*RTnet*/rtskb *skb; ++ dma_addr_t mapping; ++}; ++ ++ ++struct tulip_private { ++ const char *product_name; ++ /*RTnet*/struct rtnet_device *next_module; ++ struct tulip_rx_desc *rx_ring; ++ struct tulip_tx_desc *tx_ring; ++ dma_addr_t rx_ring_dma; ++ dma_addr_t tx_ring_dma; ++ /* The saved address of a sent-in-place packet/buffer, for skfree(). */ ++ struct ring_info tx_buffers[TX_RING_SIZE]; ++ /* The addresses of receive-in-place skbuffs. */ ++ struct ring_info rx_buffers[RX_RING_SIZE]; ++ u16 setup_frame[96]; /* Pseudo-Tx frame to init address table. */ ++ int chip_id; ++ int revision; ++ int flags; ++ struct net_device_stats stats; ++ u32 mc_filter[2]; ++ /*RTnet*/rtdm_lock_t lock; ++ spinlock_t mii_lock; ++ unsigned int cur_rx, cur_tx; /* The next free ring entry */ ++ unsigned int dirty_rx, dirty_tx; /* The ring entries to be free()ed. */ ++ ++#ifdef CONFIG_NET_HW_FLOWCONTROL ++#define RX_A_NBF_STOP 0xffffff3f /* To disable RX and RX-NOBUF ints. */ ++ int fc_bit; ++ int mit_sel; ++ int mit_change; /* Signal for Interrupt Mitigtion */ ++#endif ++ unsigned int full_duplex:1; /* Full-duplex operation requested. */ ++ unsigned int full_duplex_lock:1; ++ unsigned int fake_addr:1; /* Multiport board faked address. */ ++ unsigned int default_port:4; /* Last dev->if_port value. */ ++ unsigned int media2:4; /* Secondary monitored media port. */ ++ unsigned int medialock:1; /* Don't sense media type. */ ++ unsigned int mediasense:1; /* Media sensing in progress. */ ++ unsigned int nway:1, nwayset:1; /* 21143 internal NWay. */ ++ unsigned int csr0; /* CSR0 setting. */ ++ unsigned int csr6; /* Current CSR6 control settings. */ ++ unsigned char eeprom[EEPROM_SIZE]; /* Serial EEPROM contents. */ ++ void (*link_change) (/*RTnet*/struct rtnet_device *rtdev, int csr5); ++ u16 sym_advertise, mii_advertise; /* NWay capabilities advertised. */ ++ u16 lpar; /* 21143 Link partner ability. */ ++ u16 advertising[4]; ++ signed char phys[4], mii_cnt; /* MII device addresses. */ ++ struct mediatable *mtable; ++ int cur_index; /* Current media index. */ ++ int saved_if_port; ++ struct pci_dev *pdev; ++ int ttimer; ++ int susp_rx; ++ unsigned long nir; ++ unsigned long base_addr; ++ int pad0, pad1; /* Used for 8-byte alignment */ ++ rtdm_irq_t irq_handle; ++}; ++ ++ ++struct eeprom_fixup { ++ char *name; ++ unsigned char addr0; ++ unsigned char addr1; ++ unsigned char addr2; ++ u16 newtable[32]; /* Max length below. */ ++}; ++ ++ ++/* 21142.c */ ++extern u16 t21142_csr14[]; ++void t21142_start_nway(/*RTnet*/struct rtnet_device *rtdev); ++void t21142_lnk_change(/*RTnet*/struct rtnet_device *rtdev, int csr5); ++ ++ ++/* PNIC2.c */ ++void pnic2_lnk_change(/*RTnet*/struct rtnet_device *rtdev, int csr5); ++void pnic2_start_nway(/*RTnet*/struct rtnet_device *rtdev); ++void pnic2_lnk_change(/*RTnet*/struct rtnet_device *rtdev, int csr5); ++ ++/* eeprom.c */ ++void tulip_parse_eeprom(struct rtnet_device *rtdev); ++int tulip_read_eeprom(long ioaddr, int location, int addr_len); ++ ++/* interrupt.c */ ++extern unsigned int tulip_max_interrupt_work; ++extern int tulip_rx_copybreak; ++int tulip_interrupt(rtdm_irq_t *irq_handle); ++int tulip_refill_rx(/*RTnet*/struct rtnet_device *rtdev); ++ ++/* media.c */ ++int tulip_mdio_read(struct rtnet_device *dev, int phy_id, int location); ++void tulip_mdio_write(struct rtnet_device *dev, int phy_id, int location, int value); ++void tulip_select_media(struct rtnet_device *dev, int startup); ++int tulip_check_duplex(struct rtnet_device *dev); ++void tulip_find_mii (struct rtnet_device *dev, int board_idx); ++ ++/* pnic.c */ ++void pnic_do_nway(/*RTnet*/struct rtnet_device *rtdev); ++void pnic_lnk_change(/*RTnet*/struct rtnet_device *rtdev, int csr5); ++ ++/* tulip_core.c */ ++extern int tulip_debug; ++extern const char * const medianame[]; ++extern const char tulip_media_cap[]; ++extern struct tulip_chip_table tulip_tbl[]; ++extern u8 t21040_csr13[]; ++extern u16 t21041_csr13[]; ++extern u16 t21041_csr14[]; ++extern u16 t21041_csr15[]; ++ ++#ifndef USE_IO_OPS ++#undef inb ++#undef inw ++#undef inl ++#undef outb ++#undef outw ++#undef outl ++#define inb(addr) readb((void*)(addr)) ++#define inw(addr) readw((void*)(addr)) ++#define inl(addr) readl((void*)(addr)) ++#define outb(val,addr) writeb((val), (void*)(addr)) ++#define outw(val,addr) writew((val), (void*)(addr)) ++#define outl(val,addr) writel((val), (void*)(addr)) ++#endif /* !USE_IO_OPS */ ++ ++ ++ ++static inline void tulip_start_rxtx(struct tulip_private *tp) ++{ ++ long ioaddr = tp->base_addr; ++ outl(tp->csr6 | RxTx, ioaddr + CSR6); ++ barrier(); ++ (void) inl(ioaddr + CSR6); /* mmio sync */ ++} ++ ++static inline void tulip_stop_rxtx(struct tulip_private *tp) ++{ ++ long ioaddr = tp->base_addr; ++ u32 csr6 = inl(ioaddr + CSR6); ++ ++ if (csr6 & RxTx) { ++ outl(csr6 & ~RxTx, ioaddr + CSR6); ++ barrier(); ++ (void) inl(ioaddr + CSR6); /* mmio sync */ ++ } ++} ++ ++static inline void tulip_restart_rxtx(struct tulip_private *tp) ++{ ++ tulip_stop_rxtx(tp); ++ rtdm_task_busy_sleep(5); ++ tulip_start_rxtx(tp); ++} ++ ++#endif /* __NET_TULIP_H__ */ +--- linux/drivers/xenomai/net/drivers/tulip/media.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/tulip/media.c 2021-04-07 16:01:27.309634049 +0800 +@@ -0,0 +1,567 @@ ++/* ++ drivers/net/tulip/media.c ++ ++ Maintained by Jeff Garzik ++ Copyright 2000,2001 The Linux Kernel Team ++ Written/copyright 1994-2001 by Donald Becker. ++ ++ This software may be used and distributed according to the terms ++ of the GNU General Public License, incorporated herein by reference. ++ ++ Please refer to Documentation/DocBook/tulip.{pdf,ps,html} ++ for more information on this driver, or visit the project ++ Web page at http://sourceforge.net/projects/tulip/ ++ ++*/ ++/* Ported to RTnet by Wittawat Yamwong */ ++ ++#include ++#include ++#include ++#include ++#include "tulip.h" ++ ++ ++/* This is a mysterious value that can be written to CSR11 in the 21040 (only) ++ to support a pre-NWay full-duplex signaling mechanism using short frames. ++ No one knows what it should be, but if left at its default value some ++ 10base2(!) packets trigger a full-duplex-request interrupt. */ ++#define FULL_DUPLEX_MAGIC 0x6969 ++ ++/* The maximum data clock rate is 2.5 Mhz. The minimum timing is usually ++ met by back-to-back PCI I/O cycles, but we insert a delay to avoid ++ "overclocking" issues or future 66Mhz PCI. */ ++#define mdio_delay() inl(mdio_addr) ++ ++/* Read and write the MII registers using software-generated serial ++ MDIO protocol. It is just different enough from the EEPROM protocol ++ to not share code. The maxium data clock rate is 2.5 Mhz. */ ++#define MDIO_SHIFT_CLK 0x10000 ++#define MDIO_DATA_WRITE0 0x00000 ++#define MDIO_DATA_WRITE1 0x20000 ++#define MDIO_ENB 0x00000 /* Ignore the 0x02000 databook setting. */ ++#define MDIO_ENB_IN 0x40000 ++#define MDIO_DATA_READ 0x80000 ++ ++static const unsigned char comet_miireg2offset[32] = { ++ 0xB4, 0xB8, 0xBC, 0xC0, 0xC4, 0xC8, 0xCC, 0, 0,0,0,0, 0,0,0,0, ++ 0,0xD0,0,0, 0,0,0,0, 0,0,0,0, 0, 0xD4, 0xD8, 0xDC, }; ++ ++ ++/* MII transceiver control section. ++ Read and write the MII registers using software-generated serial ++ MDIO protocol. See the MII specifications or DP83840A data sheet ++ for details. */ ++ ++int tulip_mdio_read(struct rtnet_device *rtdev, int phy_id, int location) ++{ ++ struct tulip_private *tp = (struct tulip_private *)rtdev->priv; ++ int i; ++ int read_cmd = (0xf6 << 10) | ((phy_id & 0x1f) << 5) | location; ++ int retval = 0; ++ long ioaddr = rtdev->base_addr; ++ long mdio_addr = ioaddr + CSR9; ++ unsigned long flags; ++ ++ if (location & ~0x1f) ++ return 0xffff; ++ ++ if (tp->chip_id == COMET && phy_id == 30) { ++ if (comet_miireg2offset[location]) ++ return inl(ioaddr + comet_miireg2offset[location]); ++ return 0xffff; ++ } ++ ++ spin_lock_irqsave(&tp->mii_lock, flags); ++ if (tp->chip_id == LC82C168) { ++ int i = 1000; ++ outl(0x60020000 + (phy_id<<23) + (location<<18), ioaddr + 0xA0); ++ inl(ioaddr + 0xA0); ++ inl(ioaddr + 0xA0); ++ while (--i > 0) { ++ barrier(); ++ if ( ! ((retval = inl(ioaddr + 0xA0)) & 0x80000000)) ++ break; ++ } ++ spin_unlock_irqrestore(&tp->mii_lock, flags); ++ return retval & 0xffff; ++ } ++ ++ /* Establish sync by sending at least 32 logic ones. */ ++ for (i = 32; i >= 0; i--) { ++ outl(MDIO_ENB | MDIO_DATA_WRITE1, mdio_addr); ++ mdio_delay(); ++ outl(MDIO_ENB | MDIO_DATA_WRITE1 | MDIO_SHIFT_CLK, mdio_addr); ++ mdio_delay(); ++ } ++ /* Shift the read command bits out. */ ++ for (i = 15; i >= 0; i--) { ++ int dataval = (read_cmd & (1 << i)) ? MDIO_DATA_WRITE1 : 0; ++ ++ outl(MDIO_ENB | dataval, mdio_addr); ++ mdio_delay(); ++ outl(MDIO_ENB | dataval | MDIO_SHIFT_CLK, mdio_addr); ++ mdio_delay(); ++ } ++ /* Read the two transition, 16 data, and wire-idle bits. */ ++ for (i = 19; i > 0; i--) { ++ outl(MDIO_ENB_IN, mdio_addr); ++ mdio_delay(); ++ retval = (retval << 1) | ((inl(mdio_addr) & MDIO_DATA_READ) ? 1 : 0); ++ outl(MDIO_ENB_IN | MDIO_SHIFT_CLK, mdio_addr); ++ mdio_delay(); ++ } ++ ++ spin_unlock_irqrestore(&tp->mii_lock, flags); ++ return (retval>>1) & 0xffff; ++} ++ ++void tulip_mdio_write(struct rtnet_device *rtdev, int phy_id, int location, int val) ++{ ++ struct tulip_private *tp = (struct tulip_private *)rtdev->priv; ++ int i; ++ int cmd = (0x5002 << 16) | ((phy_id & 0x1f) << 23) | (location<<18) | (val & 0xffff); ++ long ioaddr = rtdev->base_addr; ++ long mdio_addr = ioaddr + CSR9; ++ unsigned long flags; ++ ++ if (location & ~0x1f) ++ return; ++ ++ if (tp->chip_id == COMET && phy_id == 30) { ++ if (comet_miireg2offset[location]) ++ outl(val, ioaddr + comet_miireg2offset[location]); ++ return; ++ } ++ ++ spin_lock_irqsave(&tp->mii_lock, flags); ++ if (tp->chip_id == LC82C168) { ++ int i = 1000; ++ outl(cmd, ioaddr + 0xA0); ++ do { ++ barrier(); ++ if ( ! (inl(ioaddr + 0xA0) & 0x80000000)) ++ break; ++ } while (--i > 0); ++ spin_unlock_irqrestore(&tp->mii_lock, flags); ++ return; ++ } ++ ++ /* Establish sync by sending 32 logic ones. */ ++ for (i = 32; i >= 0; i--) { ++ outl(MDIO_ENB | MDIO_DATA_WRITE1, mdio_addr); ++ mdio_delay(); ++ outl(MDIO_ENB | MDIO_DATA_WRITE1 | MDIO_SHIFT_CLK, mdio_addr); ++ mdio_delay(); ++ } ++ /* Shift the command bits out. */ ++ for (i = 31; i >= 0; i--) { ++ int dataval = (cmd & (1 << i)) ? MDIO_DATA_WRITE1 : 0; ++ outl(MDIO_ENB | dataval, mdio_addr); ++ mdio_delay(); ++ outl(MDIO_ENB | dataval | MDIO_SHIFT_CLK, mdio_addr); ++ mdio_delay(); ++ } ++ /* Clear out extra bits. */ ++ for (i = 2; i > 0; i--) { ++ outl(MDIO_ENB_IN, mdio_addr); ++ mdio_delay(); ++ outl(MDIO_ENB_IN | MDIO_SHIFT_CLK, mdio_addr); ++ mdio_delay(); ++ } ++ ++ spin_unlock_irqrestore(&tp->mii_lock, flags); ++} ++ ++ ++/* Set up the transceiver control registers for the selected media type. */ ++void tulip_select_media(struct rtnet_device *rtdev, int startup) ++{ ++ long ioaddr = rtdev->base_addr; ++ struct tulip_private *tp = (struct tulip_private *)rtdev->priv; ++ struct mediatable *mtable = tp->mtable; ++ u32 new_csr6; ++ int i; ++ ++ if (mtable) { ++ struct medialeaf *mleaf = &mtable->mleaf[tp->cur_index]; ++ unsigned char *p = mleaf->leafdata; ++ switch (mleaf->type) { ++ case 0: /* 21140 non-MII xcvr. */ ++ if (tulip_debug > 1) ++ /*RTnet*/rtdm_printk(KERN_DEBUG "%s: Using a 21140 non-MII transceiver" ++ " with control setting %2.2x.\n", ++ rtdev->name, p[1]); ++ rtdev->if_port = p[0]; ++ if (startup) ++ outl(mtable->csr12dir | 0x100, ioaddr + CSR12); ++ outl(p[1], ioaddr + CSR12); ++ new_csr6 = 0x02000000 | ((p[2] & 0x71) << 18); ++ break; ++ case 2: case 4: { ++ u16 setup[5]; ++ u32 csr13val, csr14val, csr15dir, csr15val; ++ for (i = 0; i < 5; i++) ++ setup[i] = get_u16(&p[i*2 + 1]); ++ ++ rtdev->if_port = p[0] & MEDIA_MASK; ++ if (tulip_media_cap[rtdev->if_port] & MediaAlwaysFD) ++ tp->full_duplex = 1; ++ ++ if (startup && mtable->has_reset) { ++ struct medialeaf *rleaf = &mtable->mleaf[mtable->has_reset]; ++ unsigned char *rst = rleaf->leafdata; ++ if (tulip_debug > 1) ++ /*RTnet*/rtdm_printk(KERN_DEBUG "%s: Resetting the transceiver.\n", ++ rtdev->name); ++ for (i = 0; i < rst[0]; i++) ++ outl(get_u16(rst + 1 + (i<<1)) << 16, ioaddr + CSR15); ++ } ++ if (tulip_debug > 1) ++ /*RTnet*/rtdm_printk(KERN_DEBUG "%s: 21143 non-MII %s transceiver control " ++ "%4.4x/%4.4x.\n", ++ rtdev->name, medianame[rtdev->if_port], setup[0], setup[1]); ++ if (p[0] & 0x40) { /* SIA (CSR13-15) setup values are provided. */ ++ csr13val = setup[0]; ++ csr14val = setup[1]; ++ csr15dir = (setup[3]<<16) | setup[2]; ++ csr15val = (setup[4]<<16) | setup[2]; ++ outl(0, ioaddr + CSR13); ++ outl(csr14val, ioaddr + CSR14); ++ outl(csr15dir, ioaddr + CSR15); /* Direction */ ++ outl(csr15val, ioaddr + CSR15); /* Data */ ++ outl(csr13val, ioaddr + CSR13); ++ } else { ++ csr13val = 1; ++ csr14val = 0; ++ csr15dir = (setup[0]<<16) | 0x0008; ++ csr15val = (setup[1]<<16) | 0x0008; ++ if (rtdev->if_port <= 4) ++ csr14val = t21142_csr14[rtdev->if_port]; ++ if (startup) { ++ outl(0, ioaddr + CSR13); ++ outl(csr14val, ioaddr + CSR14); ++ } ++ outl(csr15dir, ioaddr + CSR15); /* Direction */ ++ outl(csr15val, ioaddr + CSR15); /* Data */ ++ if (startup) outl(csr13val, ioaddr + CSR13); ++ } ++ if (tulip_debug > 1) ++ /*RTnet*/rtdm_printk(KERN_DEBUG "%s: Setting CSR15 to %8.8x/%8.8x.\n", ++ rtdev->name, csr15dir, csr15val); ++ if (mleaf->type == 4) ++ new_csr6 = 0x82020000 | ((setup[2] & 0x71) << 18); ++ else ++ new_csr6 = 0x82420000; ++ break; ++ } ++ case 1: case 3: { ++ int phy_num = p[0]; ++ int init_length = p[1]; ++ u16 *misc_info, tmp_info; ++ ++ rtdev->if_port = 11; ++ new_csr6 = 0x020E0000; ++ if (mleaf->type == 3) { /* 21142 */ ++ u16 *init_sequence = (u16*)(p+2); ++ u16 *reset_sequence = &((u16*)(p+3))[init_length]; ++ int reset_length = p[2 + init_length*2]; ++ misc_info = reset_sequence + reset_length; ++ if (startup) ++ for (i = 0; i < reset_length; i++) ++ outl(get_u16(&reset_sequence[i]) << 16, ioaddr + CSR15); ++ for (i = 0; i < init_length; i++) ++ outl(get_u16(&init_sequence[i]) << 16, ioaddr + CSR15); ++ } else { ++ u8 *init_sequence = p + 2; ++ u8 *reset_sequence = p + 3 + init_length; ++ int reset_length = p[2 + init_length]; ++ misc_info = (u16*)(reset_sequence + reset_length); ++ if (startup) { ++ outl(mtable->csr12dir | 0x100, ioaddr + CSR12); ++ for (i = 0; i < reset_length; i++) ++ outl(reset_sequence[i], ioaddr + CSR12); ++ } ++ for (i = 0; i < init_length; i++) ++ outl(init_sequence[i], ioaddr + CSR12); ++ } ++ tmp_info = get_u16(&misc_info[1]); ++ if (tmp_info) ++ tp->advertising[phy_num] = tmp_info | 1; ++ if (tmp_info && startup < 2) { ++ if (tp->mii_advertise == 0) ++ tp->mii_advertise = tp->advertising[phy_num]; ++ if (tulip_debug > 1) ++ /*RTnet*/rtdm_printk(KERN_DEBUG "%s: Advertising %4.4x on MII %d.\n", ++ rtdev->name, tp->mii_advertise, tp->phys[phy_num]); ++ tulip_mdio_write(rtdev, tp->phys[phy_num], 4, tp->mii_advertise); ++ } ++ break; ++ } ++ case 5: case 6: { ++ u16 setup[5]; ++ ++ new_csr6 = 0; /* FIXME */ ++ ++ for (i = 0; i < 5; i++) ++ setup[i] = get_u16(&p[i*2 + 1]); ++ ++ if (startup && mtable->has_reset) { ++ struct medialeaf *rleaf = &mtable->mleaf[mtable->has_reset]; ++ unsigned char *rst = rleaf->leafdata; ++ if (tulip_debug > 1) ++ /*RTnet*/rtdm_printk(KERN_DEBUG "%s: Resetting the transceiver.\n", ++ rtdev->name); ++ for (i = 0; i < rst[0]; i++) ++ outl(get_u16(rst + 1 + (i<<1)) << 16, ioaddr + CSR15); ++ } ++ ++ break; ++ } ++ default: ++ /*RTnet*/rtdm_printk(KERN_DEBUG "%s: Invalid media table selection %d.\n", ++ rtdev->name, mleaf->type); ++ new_csr6 = 0x020E0000; ++ } ++ if (tulip_debug > 1) ++ /*RTnet*/rtdm_printk(KERN_DEBUG "%s: Using media type %s, CSR12 is %2.2x.\n", ++ rtdev->name, medianame[rtdev->if_port], ++ inl(ioaddr + CSR12) & 0xff); ++ } else if (tp->chip_id == DC21041) { ++ int port = rtdev->if_port <= 4 ? rtdev->if_port : 0; ++ if (tulip_debug > 1) ++ /*RTnet*/rtdm_printk(KERN_DEBUG "%s: 21041 using media %s, CSR12 is %4.4x.\n", ++ rtdev->name, medianame[port == 3 ? 12: port], ++ inl(ioaddr + CSR12)); ++ outl(0x00000000, ioaddr + CSR13); /* Reset the serial interface */ ++ outl(t21041_csr14[port], ioaddr + CSR14); ++ outl(t21041_csr15[port], ioaddr + CSR15); ++ outl(t21041_csr13[port], ioaddr + CSR13); ++ new_csr6 = 0x80020000; ++ } else if (tp->chip_id == LC82C168) { ++ if (startup && ! tp->medialock) ++ rtdev->if_port = tp->mii_cnt ? 11 : 0; ++ if (tulip_debug > 1) ++ /*RTnet*/rtdm_printk(KERN_DEBUG "%s: PNIC PHY status is %3.3x, media %s.\n", ++ rtdev->name, inl(ioaddr + 0xB8), medianame[rtdev->if_port]); ++ if (tp->mii_cnt) { ++ new_csr6 = 0x810C0000; ++ outl(0x0001, ioaddr + CSR15); ++ outl(0x0201B07A, ioaddr + 0xB8); ++ } else if (startup) { ++ /* Start with 10mbps to do autonegotiation. */ ++ outl(0x32, ioaddr + CSR12); ++ new_csr6 = 0x00420000; ++ outl(0x0001B078, ioaddr + 0xB8); ++ outl(0x0201B078, ioaddr + 0xB8); ++ } else if (rtdev->if_port == 3 || rtdev->if_port == 5) { ++ outl(0x33, ioaddr + CSR12); ++ new_csr6 = 0x01860000; ++ /* Trigger autonegotiation. */ ++ outl(startup ? 0x0201F868 : 0x0001F868, ioaddr + 0xB8); ++ } else { ++ outl(0x32, ioaddr + CSR12); ++ new_csr6 = 0x00420000; ++ outl(0x1F078, ioaddr + 0xB8); ++ } ++ } else if (tp->chip_id == DC21040) { /* 21040 */ ++ /* Turn on the xcvr interface. */ ++ int csr12 = inl(ioaddr + CSR12); ++ if (tulip_debug > 1) ++ /*RTnet*/rtdm_printk(KERN_DEBUG "%s: 21040 media type is %s, CSR12 is %2.2x.\n", ++ rtdev->name, medianame[rtdev->if_port], csr12); ++ if (tulip_media_cap[rtdev->if_port] & MediaAlwaysFD) ++ tp->full_duplex = 1; ++ new_csr6 = 0x20000; ++ /* Set the full duplux match frame. */ ++ outl(FULL_DUPLEX_MAGIC, ioaddr + CSR11); ++ outl(0x00000000, ioaddr + CSR13); /* Reset the serial interface */ ++ if (t21040_csr13[rtdev->if_port] & 8) { ++ outl(0x0705, ioaddr + CSR14); ++ outl(0x0006, ioaddr + CSR15); ++ } else { ++ outl(0xffff, ioaddr + CSR14); ++ outl(0x0000, ioaddr + CSR15); ++ } ++ outl(0x8f01 | t21040_csr13[rtdev->if_port], ioaddr + CSR13); ++ } else { /* Unknown chip type with no media table. */ ++ if (tp->default_port == 0) ++ rtdev->if_port = tp->mii_cnt ? 11 : 3; ++ if (tulip_media_cap[rtdev->if_port] & MediaIsMII) { ++ new_csr6 = 0x020E0000; ++ } else if (tulip_media_cap[rtdev->if_port] & MediaIsFx) { ++ new_csr6 = 0x02860000; ++ } else ++ new_csr6 = 0x03860000; ++ if (tulip_debug > 1) ++ /*RTnet*/rtdm_printk(KERN_DEBUG "%s: No media description table, assuming " ++ "%s transceiver, CSR12 %2.2x.\n", ++ rtdev->name, medianame[rtdev->if_port], ++ inl(ioaddr + CSR12)); ++ } ++ ++ tp->csr6 = new_csr6 | (tp->csr6 & 0xfdff) | (tp->full_duplex ? 0x0200 : 0); ++ ++ mdelay(1); ++ ++ return; ++} ++ ++/* ++ Check the MII negotiated duplex and change the CSR6 setting if ++ required. ++ Return 0 if everything is OK. ++ Return < 0 if the transceiver is missing or has no link beat. ++ */ ++int tulip_check_duplex(struct rtnet_device *rtdev) ++{ ++ struct tulip_private *tp = rtdev->priv; ++ unsigned int bmsr, lpa, negotiated, new_csr6; ++ ++ bmsr = tulip_mdio_read(rtdev, tp->phys[0], MII_BMSR); ++ lpa = tulip_mdio_read(rtdev, tp->phys[0], MII_LPA); ++ if (tulip_debug > 1) ++ /*RTnet*/rtdm_printk(KERN_INFO "%s: MII status %4.4x, Link partner report " ++ "%4.4x.\n", rtdev->name, bmsr, lpa); ++ if (bmsr == 0xffff) ++ return -2; ++ if ((bmsr & BMSR_LSTATUS) == 0) { ++ int new_bmsr = tulip_mdio_read(rtdev, tp->phys[0], MII_BMSR); ++ if ((new_bmsr & BMSR_LSTATUS) == 0) { ++ if (tulip_debug > 1) ++ /*RTnet*/rtdm_printk(KERN_INFO "%s: No link beat on the MII interface," ++ " status %4.4x.\n", rtdev->name, new_bmsr); ++ return -1; ++ } ++ } ++ negotiated = lpa & tp->advertising[0]; ++ tp->full_duplex = mii_duplex(tp->full_duplex_lock, negotiated); ++ ++ new_csr6 = tp->csr6; ++ ++ if (negotiated & LPA_100) new_csr6 &= ~TxThreshold; ++ else new_csr6 |= TxThreshold; ++ if (tp->full_duplex) new_csr6 |= FullDuplex; ++ else new_csr6 &= ~FullDuplex; ++ ++ if (new_csr6 != tp->csr6) { ++ tp->csr6 = new_csr6; ++ tulip_restart_rxtx(tp); ++ ++ if (tulip_debug > 0) ++ /*RTnet*/rtdm_printk(KERN_INFO "%s: Setting %s-duplex based on MII" ++ "#%d link partner capability of %4.4x.\n", ++ rtdev->name, tp->full_duplex ? "full" : "half", ++ tp->phys[0], lpa); ++ return 1; ++ } ++ ++ return 0; ++} ++ ++void tulip_find_mii (struct rtnet_device *rtdev, int board_idx) ++{ ++ struct tulip_private *tp = rtdev->priv; ++ int phyn, phy_idx = 0; ++ int mii_reg0; ++ int mii_advert; ++ unsigned int to_advert, new_bmcr, ane_switch; ++ ++ /* Find the connected MII xcvrs. ++ Doing this in open() would allow detecting external xcvrs later, ++ but takes much time. */ ++ for (phyn = 1; phyn <= 32 && phy_idx < sizeof (tp->phys); phyn++) { ++ int phy = phyn & 0x1f; ++ int mii_status = tulip_mdio_read (rtdev, phy, MII_BMSR); ++ if ((mii_status & 0x8301) == 0x8001 || ++ ((mii_status & BMSR_100BASE4) == 0 ++ && (mii_status & 0x7800) != 0)) { ++ /* preserve Becker logic, gain indentation level */ ++ } else { ++ continue; ++ } ++ ++ mii_reg0 = tulip_mdio_read (rtdev, phy, MII_BMCR); ++ mii_advert = tulip_mdio_read (rtdev, phy, MII_ADVERTISE); ++ ane_switch = 0; ++ ++ /* if not advertising at all, gen an ++ * advertising value from the capability ++ * bits in BMSR ++ */ ++ if ((mii_advert & ADVERTISE_ALL) == 0) { ++ unsigned int tmpadv = tulip_mdio_read (rtdev, phy, MII_BMSR); ++ mii_advert = ((tmpadv >> 6) & 0x3e0) | 1; ++ } ++ ++ if (tp->mii_advertise) { ++ tp->advertising[phy_idx] = ++ to_advert = tp->mii_advertise; ++ } else if (tp->advertising[phy_idx]) { ++ to_advert = tp->advertising[phy_idx]; ++ } else { ++ tp->advertising[phy_idx] = ++ tp->mii_advertise = ++ to_advert = mii_advert; ++ } ++ ++ tp->phys[phy_idx++] = phy; ++ ++ /*RTnet*/rtdm_printk(KERN_INFO "tulip%d: MII transceiver #%d " ++ "config %4.4x status %4.4x advertising %4.4x.\n", ++ board_idx, phy, mii_reg0, mii_status, mii_advert); ++ ++ /* Fixup for DLink with miswired PHY. */ ++ if (mii_advert != to_advert) { ++ /*RTnet*/rtdm_printk(KERN_DEBUG "tulip%d: Advertising %4.4x on PHY %d," ++ " previously advertising %4.4x.\n", ++ board_idx, to_advert, phy, mii_advert); ++ tulip_mdio_write (rtdev, phy, 4, to_advert); ++ } ++ ++ /* Enable autonegotiation: some boards default to off. */ ++ if (tp->default_port == 0) { ++ new_bmcr = mii_reg0 | BMCR_ANENABLE; ++ if (new_bmcr != mii_reg0) { ++ new_bmcr |= BMCR_ANRESTART; ++ ane_switch = 1; ++ } ++ } ++ /* ...or disable nway, if forcing media */ ++ else { ++ new_bmcr = mii_reg0 & ~BMCR_ANENABLE; ++ if (new_bmcr != mii_reg0) ++ ane_switch = 1; ++ } ++ ++ /* clear out bits we never want at this point */ ++ new_bmcr &= ~(BMCR_CTST | BMCR_FULLDPLX | BMCR_ISOLATE | ++ BMCR_PDOWN | BMCR_SPEED100 | BMCR_LOOPBACK | ++ BMCR_RESET); ++ ++ if (tp->full_duplex) ++ new_bmcr |= BMCR_FULLDPLX; ++ if (tulip_media_cap[tp->default_port] & MediaIs100) ++ new_bmcr |= BMCR_SPEED100; ++ ++ if (new_bmcr != mii_reg0) { ++ /* some phys need the ANE switch to ++ * happen before forced media settings ++ * will "take." However, we write the ++ * same value twice in order not to ++ * confuse the sane phys. ++ */ ++ if (ane_switch) { ++ tulip_mdio_write (rtdev, phy, MII_BMCR, new_bmcr); ++ udelay (10); ++ } ++ tulip_mdio_write (rtdev, phy, MII_BMCR, new_bmcr); ++ } ++ } ++ tp->mii_cnt = phy_idx; ++ if (tp->mtable && tp->mtable->has_mii && phy_idx == 0) { ++ /*RTnet*/rtdm_printk(KERN_INFO "tulip%d: ***WARNING***: No MII transceiver found!\n", ++ board_idx); ++ tp->phys[0] = 1; ++ } ++} +--- linux/drivers/xenomai/net/drivers/tulip/pnic.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/tulip/pnic.c 2021-04-07 16:01:27.304634056 +0800 +@@ -0,0 +1,53 @@ ++/* ++ drivers/net/tulip/pnic.c ++ ++ Maintained by Jeff Garzik ++ Copyright 2000,2001 The Linux Kernel Team ++ Written/copyright 1994-2001 by Donald Becker. ++ ++ This software may be used and distributed according to the terms ++ of the GNU General Public License, incorporated herein by reference. ++ ++ Please refer to Documentation/DocBook/tulip.{pdf,ps,html} ++ for more information on this driver, or visit the project ++ Web page at http://sourceforge.net/projects/tulip/ ++ ++*/ ++/* Ported to RTnet by Wittawat Yamwong */ ++ ++#include ++#include "tulip.h" ++ ++ ++void pnic_do_nway(/*RTnet*/struct rtnet_device *rtdev) ++{ ++ struct tulip_private *tp = (struct tulip_private *)rtdev->priv; ++ long ioaddr = rtdev->base_addr; ++ u32 phy_reg = inl(ioaddr + 0xB8); ++ u32 new_csr6 = tp->csr6 & ~0x40C40200; ++ ++ if (phy_reg & 0x78000000) { /* Ignore baseT4 */ ++ if (phy_reg & 0x20000000) rtdev->if_port = 5; ++ else if (phy_reg & 0x40000000) rtdev->if_port = 3; ++ else if (phy_reg & 0x10000000) rtdev->if_port = 4; ++ else if (phy_reg & 0x08000000) rtdev->if_port = 0; ++ tp->nwayset = 1; ++ new_csr6 = (rtdev->if_port & 1) ? 0x01860000 : 0x00420000; ++ outl(0x32 | (rtdev->if_port & 1), ioaddr + CSR12); ++ if (rtdev->if_port & 1) ++ outl(0x1F868, ioaddr + 0xB8); ++ if (phy_reg & 0x30000000) { ++ tp->full_duplex = 1; ++ new_csr6 |= 0x00000200; ++ } ++ if (tulip_debug > 1) ++ /*RTnet*/printk(KERN_DEBUG "%s: PNIC autonegotiated status %8.8x, %s.\n", ++ rtdev->name, phy_reg, medianame[rtdev->if_port]); ++ if (tp->csr6 != new_csr6) { ++ tp->csr6 = new_csr6; ++ /* Restart Tx */ ++ tulip_restart_rxtx(tp); ++ } ++ } ++} ++ +--- linux/drivers/xenomai/net/drivers/tulip/pnic2.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/tulip/pnic2.c 2021-04-07 16:01:27.299634063 +0800 +@@ -0,0 +1,158 @@ ++/* ++ drivers/net/tulip/pnic2.c ++ ++ Maintained by Jeff Garzik ++ Copyright 2000,2001 The Linux Kernel Team ++ Written/copyright 1994-2001 by Donald Becker. ++ Modified to hep support PNIC_II by Kevin B. Hendricks ++ ++ This software may be used and distributed according to the terms ++ of the GNU General Public License, incorporated herein by reference. ++ ++ Please refer to Documentation/DocBook/tulip.{pdf,ps,html} ++ for more information on this driver, or visit the project ++ Web page at http://sourceforge.net/projects/tulip/ ++ ++*/ ++/* Ported to RTnet by Wittawat Yamwong */ ++ ++ ++/* Understanding the PNIC_II - everything is this file is based ++ * on the PNIC_II_PDF datasheet which is sorely lacking in detail ++ * ++ * As I understand things, here are the registers and bits that ++ * explain the masks and constants used in this file that are ++ * either different from the 21142/3 or important for basic operation. ++ * ++ * ++ * CSR 6 (mask = 0xfe3bd1fd of bits not to change) ++ * ----- ++ * Bit 24 - SCR ++ * Bit 23 - PCS ++ * Bit 22 - TTM (Trasmit Threshold Mode) ++ * Bit 18 - Port Select ++ * Bit 13 - Start - 1, Stop - 0 Transmissions ++ * Bit 11:10 - Loop Back Operation Mode ++ * Bit 9 - Full Duplex mode (Advertise 10BaseT-FD is CSR14<7> is set) ++ * Bit 1 - Start - 1, Stop - 0 Receive ++ * ++ * ++ * CSR 14 (mask = 0xfff0ee39 of bits not to change) ++ * ------ ++ * Bit 19 - PAUSE-Pause ++ * Bit 18 - Advertise T4 ++ * Bit 17 - Advertise 100baseTx-FD ++ * Bit 16 - Advertise 100baseTx-HD ++ * Bit 12 - LTE - Link Test Enable ++ * Bit 7 - ANE - Auto Negotiate Enable ++ * Bit 6 - HDE - Advertise 10baseT-HD ++ * Bit 2 - Reset to Power down - kept as 1 for normal operation ++ * Bit 1 - Loop Back enable for 10baseT MCC ++ * ++ * ++ * CSR 12 ++ * ------ ++ * Bit 25 - Partner can do T4 ++ * Bit 24 - Partner can do 100baseTx-FD ++ * Bit 23 - Partner can do 100baseTx-HD ++ * Bit 22 - Partner can do 10baseT-FD ++ * Bit 21 - Partner can do 10baseT-HD ++ * Bit 15 - LPN is 1 if all above bits are valid other wise 0 ++ * Bit 14:12 - autonegotiation state (write 001 to start autonegotiate) ++ * Bit 3 - Autopolarity state ++ * Bit 2 - LS10B - link state of 10baseT 0 - good, 1 - failed ++ * Bit 1 - LS100B - link state of 100baseT 0 - good, 1- faild ++ * ++ * ++ * Data Port Selection Info ++ *------------------------- ++ * ++ * CSR14<7> CSR6<18> CSR6<22> CSR6<23> CSR6<24> MODE/PORT ++ * 1 0 0 (X) 0 (X) 1 NWAY ++ * 0 0 1 0 (X) 0 10baseT ++ * 0 1 0 1 1 (X) 100baseT ++ * ++ * ++ */ ++ ++ ++ ++#include "tulip.h" ++#include ++#include ++ ++ ++void pnic2_start_nway(/*RTnet*/struct rtnet_device *rtdev) ++{ ++ struct tulip_private *tp = (struct tulip_private *)rtdev->priv; ++ long ioaddr = rtdev->base_addr; ++ int csr14; ++ int csr12; ++ ++ /* set up what to advertise during the negotiation */ ++ ++ /* load in csr14 and mask off bits not to touch ++ * comment at top of file explains mask value ++ */ ++ csr14 = (inl(ioaddr + CSR14) & 0xfff0ee39); ++ ++ /* bit 17 - advetise 100baseTx-FD */ ++ if (tp->sym_advertise & 0x0100) csr14 |= 0x00020000; ++ ++ /* bit 16 - advertise 100baseTx-HD */ ++ if (tp->sym_advertise & 0x0080) csr14 |= 0x00010000; ++ ++ /* bit 6 - advertise 10baseT-HD */ ++ if (tp->sym_advertise & 0x0020) csr14 |= 0x00000040; ++ ++ /* Now set bit 12 Link Test Enable, Bit 7 Autonegotiation Enable ++ * and bit 0 Don't PowerDown 10baseT ++ */ ++ csr14 |= 0x00001184; ++ ++ if (tulip_debug > 1) ++ printk(KERN_DEBUG "%s: Restarting PNIC2 autonegotiation, " ++ "csr14=%8.8x.\n", rtdev->name, csr14); ++ ++ /* tell pnic2_lnk_change we are doing an nway negotiation */ ++ rtdev->if_port = 0; ++ tp->nway = tp->mediasense = 1; ++ tp->nwayset = tp->lpar = 0; ++ ++ /* now we have to set up csr6 for NWAY state */ ++ ++ tp->csr6 = inl(ioaddr + CSR6); ++ if (tulip_debug > 1) ++ printk(KERN_DEBUG "%s: On Entry to Nway, " ++ "csr6=%8.8x.\n", rtdev->name, tp->csr6); ++ ++ /* mask off any bits not to touch ++ * comment at top of file explains mask value ++ */ ++ tp->csr6 = tp->csr6 & 0xfe3bd1fd; ++ ++ /* don't forget that bit 9 is also used for advertising */ ++ /* advertise 10baseT-FD for the negotiation (bit 9) */ ++ if (tp->sym_advertise & 0x0040) tp->csr6 |= 0x00000200; ++ ++ /* set bit 24 for nway negotiation mode ... ++ * see Data Port Selection comment at top of file ++ * and "Stop" - reset both Transmit (bit 13) and Receive (bit 1) ++ */ ++ tp->csr6 |= 0x01000000; ++ outl(csr14, ioaddr + CSR14); ++ outl(tp->csr6, ioaddr + CSR6); ++ udelay(100); ++ ++ /* all set up so now force the negotiation to begin */ ++ ++ /* read in current values and mask off all but the ++ * Autonegotiation bits 14:12. Writing a 001 to those bits ++ * should start the autonegotiation ++ */ ++ csr12 = (inl(ioaddr + CSR12) & 0xffff8fff); ++ csr12 |= 0x1000; ++ outl(csr12, ioaddr + CSR12); ++} ++ ++ +--- linux/drivers/xenomai/net/drivers/rt_at91_ether.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/rt_at91_ether.h 2021-04-07 16:01:27.294634070 +0800 +@@ -0,0 +1,109 @@ ++/* ++ * Ethernet driver for the Atmel AT91RM9200 (Thunder) ++ * ++ * Copyright (C) SAN People (Pty) Ltd ++ * ++ * Based on an earlier Atmel EMAC macrocell driver by Atmel and Lineo Inc. ++ * Initial version by Rick Bronson. ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version ++ * 2 of the License, or (at your option) any later version. ++ */ ++ ++#ifndef AT91_ETHERNET ++#define AT91_ETHERNET ++ ++#include ++#include ++ ++/* Davicom 9161 PHY */ ++#define MII_DM9161_ID 0x0181b880 ++#define MII_DM9161A_ID 0x0181b8a0 ++ ++/* Davicom specific registers */ ++#define MII_DSCR_REG 16 ++#define MII_DSCSR_REG 17 ++#define MII_DSINTR_REG 21 ++ ++/* Intel LXT971A PHY */ ++#define MII_LXT971A_ID 0x001378E0 ++ ++/* Intel specific registers */ ++#define MII_ISINTE_REG 18 ++#define MII_ISINTS_REG 19 ++#define MII_LEDCTRL_REG 20 ++ ++/* Realtek RTL8201 PHY */ ++#define MII_RTL8201_ID 0x00008200 ++ ++/* Broadcom BCM5221 PHY */ ++#define MII_BCM5221_ID 0x004061e0 ++ ++/* Broadcom specific registers */ ++#define MII_BCMINTR_REG 26 ++ ++/* National Semiconductor DP83847 */ ++#define MII_DP83847_ID 0x20005c30 ++ ++/* Altima AC101L PHY */ ++#define MII_AC101L_ID 0x00225520 ++ ++/* Micrel KS8721 PHY */ ++#define MII_KS8721_ID 0x00221610 ++ ++/* ........................................................................ */ ++ ++#define MAX_RBUFF_SZ 0x600 /* 1518 rounded up */ ++#define MAX_RX_DESCR 9 /* max number of receive buffers */ ++ ++#define EMAC_DESC_DONE 0x00000001 /* bit for if DMA is done */ ++#define EMAC_DESC_WRAP 0x00000002 /* bit for wrap */ ++ ++#define EMAC_BROADCAST 0x80000000 /* broadcast address */ ++#define EMAC_MULTICAST 0x40000000 /* multicast address */ ++#define EMAC_UNICAST 0x20000000 /* unicast address */ ++ ++struct rbf_t ++{ ++ unsigned int addr; ++ unsigned long size; ++}; ++ ++struct recv_desc_bufs ++{ ++ struct rbf_t descriptors[MAX_RX_DESCR]; /* must be on sizeof (rbf_t) boundary */ ++ char recv_buf[MAX_RX_DESCR][MAX_RBUFF_SZ]; /* must be on long boundary */ ++}; ++ ++struct at91_private ++{ ++ struct net_device_stats stats; ++ struct mii_if_info mii; /* ethtool support */ ++ struct at91_eth_data board_data; /* board-specific configuration */ ++ struct clk *ether_clk; /* clock */ ++ ++ /* PHY */ ++ unsigned long phy_type; /* type of PHY (PHY_ID) */ ++ rtdm_lock_t lock; /* lock for MDI interface */ ++ short phy_media; /* media interface type */ ++ unsigned short phy_address; /* 5-bit MDI address of PHY (0..31) */ ++ struct timer_list check_timer; /* Poll link status */ ++ ++ /* Transmit */ ++ struct rtskb *skb; /* holds skb until xmit interrupt completes */ ++ dma_addr_t skb_physaddr; /* phys addr from pci_map_single */ ++ int skb_length; /* saved skb length for pci_unmap_single */ ++ ++ /* Receive */ ++ int rxBuffIndex; /* index into receive descriptor list */ ++ struct recv_desc_bufs *dlist; /* descriptor list address */ ++ struct recv_desc_bufs *dlist_phys; /* descriptor list physical address */ ++ ++ /* RT Net */ ++ rtdm_irq_t irq_handle; ++ rtdm_irq_t phy_irq_handle; ++}; ++ ++#endif +--- linux/drivers/xenomai/net/drivers/pcnet32.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/pcnet32.c 2021-04-07 16:01:27.289634078 +0800 +@@ -0,0 +1,1652 @@ ++/* pcnet32.c: An AMD PCnet32 ethernet driver for linux. */ ++/* ++ * Copyright 1996-1999 Thomas Bogendoerfer ++ * ++ * Derived from the lance driver written 1993,1994,1995 by Donald Becker. ++ * ++ * Copyright 1993 United States Government as represented by the ++ * Director, National Security Agency. ++ * ++ * This software may be used and distributed according to the terms ++ * of the GNU General Public License, incorporated herein by reference. ++ * ++ * This driver is for PCnet32 and PCnetPCI based ethercards ++ */ ++/************************************************************************** ++ * 23 Oct, 2000. ++ * Fixed a few bugs, related to running the controller in 32bit mode. ++ * ++ * Carsten Langgaard, carstenl@mips.com ++ * Copyright (C) 2000 MIPS Technologies, Inc. All rights reserved. ++ * ++ * Ported to RTnet: September 2003, Jan Kiszka ++ *************************************************************************/ ++ ++#define DRV_NAME "pcnet32-rt" ++#define DRV_VERSION "1.27a-RTnet-0.2" ++#define DRV_RELDATE "2003-09-24" ++#define PFX DRV_NAME ": " ++ ++static const char *version = ++ DRV_NAME ".c:v" DRV_VERSION " " DRV_RELDATE " Jan.Kiszka@web.de\n"; ++ ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++ ++/*** RTnet ***/ ++#include ++ ++#define MAX_UNITS 8 /* More are supported, limit only on options */ ++#define DEFAULT_RX_POOL_SIZE 16 ++ ++static int cards[MAX_UNITS] = { [0 ...(MAX_UNITS - 1)] = 1 }; ++module_param_array(cards, int, NULL, 0444); ++MODULE_PARM_DESC(cards, "array of cards to be supported (e.g. 1,0,1)"); ++/*** RTnet ***/ ++ ++/* ++ * PCI device identifiers for "new style" Linux PCI Device Drivers ++ */ ++static struct pci_device_id pcnet32_pci_tbl[] = { ++ { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_LANCE_HOME, PCI_ANY_ID, ++ PCI_ANY_ID, 0, 0, 0 }, ++ { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_LANCE, PCI_ANY_ID, PCI_ANY_ID, 0, ++ 0, 0 }, ++ { ++ 0, ++ } ++}; ++ ++MODULE_DEVICE_TABLE(pci, pcnet32_pci_tbl); ++ ++static int cards_found = -1; ++static int pcnet32_have_pci; ++ ++/* ++ * VLB I/O addresses ++ */ ++static unsigned int pcnet32_portlist[] = { 0x300, 0x320, 0x340, 0x360, 0 }; ++ ++static int pcnet32_debug = 1; ++static int tx_start = ++ 1; /* Mapping -- 0:20, 1:64, 2:128, 3:~220 (depends on chip vers) */ ++static int pcnet32vlb; /* check for VLB cards ? */ ++ ++static struct rtnet_device *pcnet32_dev; /*** RTnet ***/ ++ ++static int max_interrupt_work = 80; ++/*** RTnet *** ++static int rx_copybreak = 200; ++ *** RTnet ***/ ++ ++#define PCNET32_PORT_AUI 0x00 ++#define PCNET32_PORT_10BT 0x01 ++#define PCNET32_PORT_GPSI 0x02 ++#define PCNET32_PORT_MII 0x03 ++ ++#define PCNET32_PORT_PORTSEL 0x03 ++#define PCNET32_PORT_ASEL 0x04 ++#define PCNET32_PORT_100 0x40 ++#define PCNET32_PORT_FD 0x80 ++ ++#define PCNET32_DMA_MASK 0xffffffff ++ ++/* ++ * table to translate option values from tulip ++ * to internal options ++ */ ++static unsigned char options_mapping[] = { ++ PCNET32_PORT_ASEL, /* 0 Auto-select */ ++ PCNET32_PORT_AUI, /* 1 BNC/AUI */ ++ PCNET32_PORT_AUI, /* 2 AUI/BNC */ ++ PCNET32_PORT_ASEL, /* 3 not supported */ ++ PCNET32_PORT_10BT | PCNET32_PORT_FD, /* 4 10baseT-FD */ ++ PCNET32_PORT_ASEL, /* 5 not supported */ ++ PCNET32_PORT_ASEL, /* 6 not supported */ ++ PCNET32_PORT_ASEL, /* 7 not supported */ ++ PCNET32_PORT_ASEL, /* 8 not supported */ ++ PCNET32_PORT_MII, /* 9 MII 10baseT */ ++ PCNET32_PORT_MII | PCNET32_PORT_FD, /* 10 MII 10baseT-FD */ ++ PCNET32_PORT_MII, /* 11 MII (autosel) */ ++ PCNET32_PORT_10BT, /* 12 10BaseT */ ++ PCNET32_PORT_MII | PCNET32_PORT_100, /* 13 MII 100BaseTx */ ++ PCNET32_PORT_MII | PCNET32_PORT_100 | ++ PCNET32_PORT_FD, /* 14 MII 100BaseTx-FD */ ++ PCNET32_PORT_ASEL /* 15 not supported */ ++}; ++ ++static int options[MAX_UNITS]; ++static int full_duplex[MAX_UNITS]; ++ ++/* ++ * Theory of Operation ++ * ++ * This driver uses the same software structure as the normal lance ++ * driver. So look for a verbose description in lance.c. The differences ++ * to the normal lance driver is the use of the 32bit mode of PCnet32 ++ * and PCnetPCI chips. Because these chips are 32bit chips, there is no ++ * 16MB limitation and we don't need bounce buffers. ++ */ ++ ++/* ++ * History: ++ * v0.01: Initial version ++ * only tested on Alpha Noname Board ++ * v0.02: changed IRQ handling for new interrupt scheme (dev_id) ++ * tested on a ASUS SP3G ++ * v0.10: fixed an odd problem with the 79C974 in a Compaq Deskpro XL ++ * looks like the 974 doesn't like stopping and restarting in a ++ * short period of time; now we do a reinit of the lance; the ++ * bug was triggered by doing ifconfig eth0 broadcast ++ * and hangs the machine (thanks to Klaus Liedl for debugging) ++ * v0.12: by suggestion from Donald Becker: Renamed driver to pcnet32, ++ * made it standalone (no need for lance.c) ++ * v0.13: added additional PCI detecting for special PCI devices (Compaq) ++ * v0.14: stripped down additional PCI probe (thanks to David C Niemi ++ * and sveneric@xs4all.nl for testing this on their Compaq boxes) ++ * v0.15: added 79C965 (VLB) probe ++ * added interrupt sharing for PCI chips ++ * v0.16: fixed set_multicast_list on Alpha machines ++ * v0.17: removed hack from dev.c; now pcnet32 uses ethif_probe in Space.c ++ * v0.19: changed setting of autoselect bit ++ * v0.20: removed additional Compaq PCI probe; there is now a working one ++ * in arch/i386/bios32.c ++ * v0.21: added endian conversion for ppc, from work by cort@cs.nmt.edu ++ * v0.22: added printing of status to ring dump ++ * v0.23: changed enet_statistics to net_devive_stats ++ * v0.90: added multicast filter ++ * added module support ++ * changed irq probe to new style ++ * added PCnetFast chip id ++ * added fix for receive stalls with Intel saturn chipsets ++ * added in-place rx skbs like in the tulip driver ++ * minor cleanups ++ * v0.91: added PCnetFast+ chip id ++ * back port to 2.0.x ++ * v1.00: added some stuff from Donald Becker's 2.0.34 version ++ * added support for byte counters in net_dev_stats ++ * v1.01: do ring dumps, only when debugging the driver ++ * increased the transmit timeout ++ * v1.02: fixed memory leak in pcnet32_init_ring() ++ * v1.10: workaround for stopped transmitter ++ * added port selection for modules ++ * detect special T1/E1 WAN card and setup port selection ++ * v1.11: fixed wrong checking of Tx errors ++ * v1.20: added check of return value kmalloc (cpeterso@cs.washington.edu) ++ * added save original kmalloc addr for freeing (mcr@solidum.com) ++ * added support for PCnetHome chip (joe@MIT.EDU) ++ * rewritten PCI card detection ++ * added dwio mode to get driver working on some PPC machines ++ * v1.21: added mii selection and mii ioctl ++ * v1.22: changed pci scanning code to make PPC people happy ++ * fixed switching to 32bit mode in pcnet32_open() (thanks ++ * to Michael Richard for noticing this one) ++ * added sub vendor/device id matching (thanks again to ++ * Michael Richard ) ++ * added chip id for 79c973/975 (thanks to Zach Brown ) ++ * v1.23 fixed small bug, when manual selecting MII speed/duplex ++ * v1.24 Applied Thomas' patch to use TxStartPoint and thus decrease TxFIFO ++ * underflows. Added tx_start_pt module parameter. Increased ++ * TX_RING_SIZE from 16 to 32. Added #ifdef'd code to use DXSUFLO ++ * for FAST[+] chipsets. ++ * v1.24ac Added SMP spinlocking - Alan Cox ++ * v1.25kf Added No Interrupt on successful Tx for some Tx's ++ * v1.26 Converted to pci_alloc_consistent, Jamey Hicks / George France ++ * ++ * - Fixed a few bugs, related to running the controller in 32bit mode. ++ * 23 Oct, 2000. Carsten Langgaard, carstenl@mips.com ++ * Copyright (C) 2000 MIPS Technologies, Inc. All rights reserved. ++ * v1.26p Fix oops on rmmod+insmod; plug i/o resource leak - Paul Gortmaker ++ * v1.27 improved CSR/PROM address detection, lots of cleanups, ++ * new pcnet32vlb module option, HP-PARISC support, ++ * added module parameter descriptions, ++ * initial ethtool support - Helge Deller ++ * v1.27a Sun Feb 10 2002 Go Taniguchi ++ * use alloc_etherdev and register_netdev ++ * fix pci probe not increment cards_found ++ * FD auto negotiate error workaround for xSeries250 ++ * clean up and using new mii module ++ */ ++ ++/* ++ * Set the number of Tx and Rx buffers, using Log_2(# buffers). ++ * Reasonable default values are 4 Tx buffers, and 16 Rx buffers. ++ * That translates to 2 (4 == 2^^2) and 4 (16 == 2^^4). ++ */ ++#ifndef PCNET32_LOG_TX_BUFFERS ++#define PCNET32_LOG_TX_BUFFERS 4 ++#define PCNET32_LOG_RX_BUFFERS 3 /*** RTnet ***/ ++#endif ++ ++#define TX_RING_SIZE (1 << (PCNET32_LOG_TX_BUFFERS)) ++#define TX_RING_MOD_MASK (TX_RING_SIZE - 1) ++#define TX_RING_LEN_BITS ((PCNET32_LOG_TX_BUFFERS) << 12) ++ ++#define RX_RING_SIZE (1 << (PCNET32_LOG_RX_BUFFERS)) ++#define RX_RING_MOD_MASK (RX_RING_SIZE - 1) ++#define RX_RING_LEN_BITS ((PCNET32_LOG_RX_BUFFERS) << 4) ++ ++#define PKT_BUF_SZ 1544 ++ ++/* Offsets from base I/O address. */ ++#define PCNET32_WIO_RDP 0x10 ++#define PCNET32_WIO_RAP 0x12 ++#define PCNET32_WIO_RESET 0x14 ++#define PCNET32_WIO_BDP 0x16 ++ ++#define PCNET32_DWIO_RDP 0x10 ++#define PCNET32_DWIO_RAP 0x14 ++#define PCNET32_DWIO_RESET 0x18 ++#define PCNET32_DWIO_BDP 0x1C ++ ++#define PCNET32_TOTAL_SIZE 0x20 ++ ++/* The PCNET32 Rx and Tx ring descriptors. */ ++struct pcnet32_rx_head { ++ u32 base; ++ s16 buf_length; ++ s16 status; ++ u32 msg_length; ++ u32 reserved; ++}; ++ ++struct pcnet32_tx_head { ++ u32 base; ++ s16 length; ++ s16 status; ++ u32 misc; ++ u32 reserved; ++}; ++ ++/* The PCNET32 32-Bit initialization block, described in databook. */ ++struct pcnet32_init_block { ++ u16 mode; ++ u16 tlen_rlen; ++ u8 phys_addr[6]; ++ u16 reserved; ++ u32 filter[2]; ++ /* Receive and transmit ring base, along with extra bits. */ ++ u32 rx_ring; ++ u32 tx_ring; ++}; ++ ++/* PCnet32 access functions */ ++struct pcnet32_access { ++ u16 (*read_csr)(unsigned long, int); ++ void (*write_csr)(unsigned long, int, u16); ++ u16 (*read_bcr)(unsigned long, int); ++ void (*write_bcr)(unsigned long, int, u16); ++ u16 (*read_rap)(unsigned long); ++ void (*write_rap)(unsigned long, u16); ++ void (*reset)(unsigned long); ++}; ++ ++/* ++ * The first three fields of pcnet32_private are read by the ethernet device ++ * so we allocate the structure should be allocated by pci_alloc_consistent(). ++ */ ++struct pcnet32_private { ++ /* The Tx and Rx ring entries must be aligned on 16-byte boundaries in 32bit mode. */ ++ struct pcnet32_rx_head rx_ring[RX_RING_SIZE]; ++ struct pcnet32_tx_head tx_ring[TX_RING_SIZE]; ++ struct pcnet32_init_block init_block; ++ dma_addr_t dma_addr; /* DMA address of beginning of this object, ++ returned by pci_alloc_consistent */ ++ struct pci_dev ++ *pci_dev; /* Pointer to the associated pci device structure */ ++ const char *name; ++ /* The saved address of a sent-in-place packet/buffer, for skfree(). */ ++ /*** RTnet ***/ ++ struct rtskb *tx_skbuff[TX_RING_SIZE]; ++ struct rtskb *rx_skbuff[RX_RING_SIZE]; ++ /*** RTnet ***/ ++ dma_addr_t tx_dma_addr[TX_RING_SIZE]; ++ dma_addr_t rx_dma_addr[RX_RING_SIZE]; ++ struct pcnet32_access a; ++ rtdm_lock_t lock; /* Guard lock */ ++ unsigned int cur_rx, cur_tx; /* The next free ring entry */ ++ unsigned int dirty_rx, dirty_tx; /* The ring entries to be free()ed. */ ++ struct net_device_stats stats; ++ char tx_full; ++ int options; ++ int shared_irq : 1, /* shared irq possible */ ++ ltint : 1, /* enable TxDone-intr inhibitor */ ++ dxsuflo : 1, /* disable transmit stop on uflo */ ++ mii : 1; /* mii port available */ ++ struct rtnet_device *next; /*** RTnet ***/ ++ struct mii_if_info mii_if; ++ rtdm_irq_t irq_handle; ++}; ++ ++static void pcnet32_probe_vlbus(void); ++static int pcnet32_probe_pci(struct pci_dev *, const struct pci_device_id *); ++static int pcnet32_probe1(unsigned long, unsigned int, int, struct pci_dev *); ++/*** RTnet ***/ ++static int pcnet32_open(struct rtnet_device *); ++static int pcnet32_init_ring(struct rtnet_device *); ++static int pcnet32_start_xmit(struct rtskb *, struct rtnet_device *); ++static int pcnet32_rx(struct rtnet_device *, nanosecs_abs_t *time_stamp); ++//static void pcnet32_tx_timeout (struct net_device *dev); ++static int pcnet32_interrupt(rtdm_irq_t *irq_handle); ++static int pcnet32_close(struct rtnet_device *); ++static struct net_device_stats *pcnet32_get_stats(struct rtnet_device *); ++//static void pcnet32_set_multicast_list(struct net_device *); ++//static int pcnet32_ioctl(struct net_device *, struct ifreq *, int); ++//static int mdio_read(struct net_device *dev, int phy_id, int reg_num); ++//static void mdio_write(struct net_device *dev, int phy_id, int reg_num, int val); ++/*** RTnet ***/ ++ ++enum pci_flags_bit { ++ PCI_USES_IO = 1, ++ PCI_USES_MEM = 2, ++ PCI_USES_MASTER = 4, ++ PCI_ADDR0 = 0x10 << 0, ++ PCI_ADDR1 = 0x10 << 1, ++ PCI_ADDR2 = 0x10 << 2, ++ PCI_ADDR3 = 0x10 << 3, ++}; ++ ++static u16 pcnet32_wio_read_csr(unsigned long addr, int index) ++{ ++ outw(index, addr + PCNET32_WIO_RAP); ++ return inw(addr + PCNET32_WIO_RDP); ++} ++ ++static void pcnet32_wio_write_csr(unsigned long addr, int index, u16 val) ++{ ++ outw(index, addr + PCNET32_WIO_RAP); ++ outw(val, addr + PCNET32_WIO_RDP); ++} ++ ++static u16 pcnet32_wio_read_bcr(unsigned long addr, int index) ++{ ++ outw(index, addr + PCNET32_WIO_RAP); ++ return inw(addr + PCNET32_WIO_BDP); ++} ++ ++static void pcnet32_wio_write_bcr(unsigned long addr, int index, u16 val) ++{ ++ outw(index, addr + PCNET32_WIO_RAP); ++ outw(val, addr + PCNET32_WIO_BDP); ++} ++ ++static u16 pcnet32_wio_read_rap(unsigned long addr) ++{ ++ return inw(addr + PCNET32_WIO_RAP); ++} ++ ++static void pcnet32_wio_write_rap(unsigned long addr, u16 val) ++{ ++ outw(val, addr + PCNET32_WIO_RAP); ++} ++ ++static void pcnet32_wio_reset(unsigned long addr) ++{ ++ inw(addr + PCNET32_WIO_RESET); ++} ++ ++static int pcnet32_wio_check(unsigned long addr) ++{ ++ outw(88, addr + PCNET32_WIO_RAP); ++ return (inw(addr + PCNET32_WIO_RAP) == 88); ++} ++ ++static struct pcnet32_access pcnet32_wio = { ++ read_csr: pcnet32_wio_read_csr, ++ write_csr: pcnet32_wio_write_csr, ++ read_bcr: pcnet32_wio_read_bcr, ++ write_bcr: pcnet32_wio_write_bcr, ++ read_rap: pcnet32_wio_read_rap, ++ write_rap: pcnet32_wio_write_rap, ++ reset: pcnet32_wio_reset ++}; ++ ++static u16 pcnet32_dwio_read_csr(unsigned long addr, int index) ++{ ++ outl(index, addr + PCNET32_DWIO_RAP); ++ return (inl(addr + PCNET32_DWIO_RDP) & 0xffff); ++} ++ ++static void pcnet32_dwio_write_csr(unsigned long addr, int index, u16 val) ++{ ++ outl(index, addr + PCNET32_DWIO_RAP); ++ outl(val, addr + PCNET32_DWIO_RDP); ++} ++ ++static u16 pcnet32_dwio_read_bcr(unsigned long addr, int index) ++{ ++ outl(index, addr + PCNET32_DWIO_RAP); ++ return (inl(addr + PCNET32_DWIO_BDP) & 0xffff); ++} ++ ++static void pcnet32_dwio_write_bcr(unsigned long addr, int index, u16 val) ++{ ++ outl(index, addr + PCNET32_DWIO_RAP); ++ outl(val, addr + PCNET32_DWIO_BDP); ++} ++ ++static u16 pcnet32_dwio_read_rap(unsigned long addr) ++{ ++ return (inl(addr + PCNET32_DWIO_RAP) & 0xffff); ++} ++ ++static void pcnet32_dwio_write_rap(unsigned long addr, u16 val) ++{ ++ outl(val, addr + PCNET32_DWIO_RAP); ++} ++ ++static void pcnet32_dwio_reset(unsigned long addr) ++{ ++ inl(addr + PCNET32_DWIO_RESET); ++} ++ ++static int pcnet32_dwio_check(unsigned long addr) ++{ ++ outl(88, addr + PCNET32_DWIO_RAP); ++ return ((inl(addr + PCNET32_DWIO_RAP) & 0xffff) == 88); ++} ++ ++static struct pcnet32_access pcnet32_dwio = { ++ read_csr: pcnet32_dwio_read_csr, ++ write_csr: pcnet32_dwio_write_csr, ++ read_bcr: pcnet32_dwio_read_bcr, ++ write_bcr: pcnet32_dwio_write_bcr, ++ read_rap: pcnet32_dwio_read_rap, ++ write_rap: pcnet32_dwio_write_rap, ++ reset: pcnet32_dwio_reset ++}; ++ ++/* only probes for non-PCI devices, the rest are handled by ++ * pci_register_driver via pcnet32_probe_pci */ ++ ++static void pcnet32_probe_vlbus(void) ++{ ++ unsigned int *port, ioaddr; ++ ++ /* search for PCnet32 VLB cards at known addresses */ ++ for (port = pcnet32_portlist; (ioaddr = *port); port++) { ++ if (!request_region(ioaddr, PCNET32_TOTAL_SIZE, ++ "pcnet32_probe_vlbus")) { ++ /* check if there is really a pcnet chip on that ioaddr */ ++ if ((inb(ioaddr + 14) == 0x57) && ++ (inb(ioaddr + 15) == 0x57)) { ++ pcnet32_probe1(ioaddr, 0, 0, NULL); ++ } else { ++ release_region(ioaddr, PCNET32_TOTAL_SIZE); ++ } ++ } ++ } ++} ++ ++static int pcnet32_probe_pci(struct pci_dev *pdev, ++ const struct pci_device_id *ent) ++{ ++ unsigned long ioaddr; ++ int err; ++ ++ err = pci_enable_device(pdev); ++ if (err < 0) { ++ printk(KERN_ERR PFX "failed to enable device -- err=%d\n", err); ++ return err; ++ } ++ pci_set_master(pdev); ++ ++ ioaddr = pci_resource_start(pdev, 0); ++ if (!ioaddr) { ++ printk(KERN_ERR PFX "card has no PCI IO resources, aborting\n"); ++ return -ENODEV; ++ } ++ ++ if (!dma_supported(&pdev->dev, PCNET32_DMA_MASK)) { ++ printk(KERN_ERR PFX ++ "architecture does not support 32bit PCI busmaster DMA\n"); ++ return -ENODEV; ++ } ++ ++ return pcnet32_probe1(ioaddr, pdev->irq, 1, pdev); ++} ++ ++/* pcnet32_probe1 ++ * Called from both pcnet32_probe_vlbus and pcnet_probe_pci. ++ * pdev will be NULL when called from pcnet32_probe_vlbus. ++ */ ++static int pcnet32_probe1(unsigned long ioaddr, unsigned int irq_line, ++ int shared, struct pci_dev *pdev) ++{ ++ struct pcnet32_private *lp; ++ dma_addr_t lp_dma_addr; ++ int i, media; ++ int fdx, mii, fset, dxsuflo, ltint; ++ int chip_version; ++ char *chipname; ++ struct rtnet_device *dev; /*** RTnet ***/ ++ struct pcnet32_access *a = NULL; ++ u8 promaddr[6]; ++ ++ // *** RTnet *** ++ cards_found++; ++ if (cards[cards_found] == 0) ++ return -ENODEV; ++ // *** RTnet *** ++ ++ /* reset the chip */ ++ pcnet32_wio_reset(ioaddr); ++ ++ /* NOTE: 16-bit check is first, otherwise some older PCnet chips fail */ ++ if (pcnet32_wio_read_csr(ioaddr, 0) == 4 && pcnet32_wio_check(ioaddr)) { ++ a = &pcnet32_wio; ++ } else { ++ pcnet32_dwio_reset(ioaddr); ++ if (pcnet32_dwio_read_csr(ioaddr, 0) == 4 && ++ pcnet32_dwio_check(ioaddr)) { ++ a = &pcnet32_dwio; ++ } else ++ return -ENODEV; ++ } ++ ++ chip_version = ++ a->read_csr(ioaddr, 88) | (a->read_csr(ioaddr, 89) << 16); ++ if (pcnet32_debug > 2) ++ printk(KERN_INFO " PCnet chip version is %#x.\n", ++ chip_version); ++ if ((chip_version & 0xfff) != 0x003) ++ return -ENODEV; ++ ++ /* initialize variables */ ++ fdx = mii = fset = dxsuflo = ltint = 0; ++ chip_version = (chip_version >> 12) & 0xffff; ++ ++ switch (chip_version) { ++ case 0x2420: ++ chipname = "PCnet/PCI 79C970"; /* PCI */ ++ break; ++ case 0x2430: ++ if (shared) ++ chipname = ++ "PCnet/PCI 79C970"; /* 970 gives the wrong chip id back */ ++ else ++ chipname = "PCnet/32 79C965"; /* 486/VL bus */ ++ break; ++ case 0x2621: ++ chipname = "PCnet/PCI II 79C970A"; /* PCI */ ++ fdx = 1; ++ break; ++ case 0x2623: ++ chipname = "PCnet/FAST 79C971"; /* PCI */ ++ fdx = 1; ++ mii = 1; ++ fset = 1; ++ ltint = 1; ++ break; ++ case 0x2624: ++ chipname = "PCnet/FAST+ 79C972"; /* PCI */ ++ fdx = 1; ++ mii = 1; ++ fset = 1; ++ break; ++ case 0x2625: ++ chipname = "PCnet/FAST III 79C973"; /* PCI */ ++ fdx = 1; ++ mii = 1; ++ break; ++ case 0x2626: ++ chipname = "PCnet/Home 79C978"; /* PCI */ ++ fdx = 1; ++ /* ++ * This is based on specs published at www.amd.com. This section ++ * assumes that a card with a 79C978 wants to go into 1Mb HomePNA ++ * mode. The 79C978 can also go into standard ethernet, and there ++ * probably should be some sort of module option to select the ++ * mode by which the card should operate ++ */ ++ /* switch to home wiring mode */ ++ media = a->read_bcr(ioaddr, 49); ++ if (pcnet32_debug > 2) ++ printk(KERN_DEBUG PFX "media reset to %#x.\n", media); ++ a->write_bcr(ioaddr, 49, media); ++ break; ++ case 0x2627: ++ chipname = "PCnet/FAST III 79C975"; /* PCI */ ++ fdx = 1; ++ mii = 1; ++ break; ++ default: ++ printk(KERN_INFO PFX "PCnet version %#x, no PCnet32 chip.\n", ++ chip_version); ++ return -ENODEV; ++ } ++ ++ /* ++ * On selected chips turn on the BCR18:NOUFLO bit. This stops transmit ++ * starting until the packet is loaded. Strike one for reliability, lose ++ * one for latency - although on PCI this isnt a big loss. Older chips ++ * have FIFO's smaller than a packet, so you can't do this. ++ */ ++ ++ if (fset) { ++ a->write_bcr(ioaddr, 18, (a->read_bcr(ioaddr, 18) | 0x0800)); ++ a->write_csr(ioaddr, 80, ++ (a->read_csr(ioaddr, 80) & 0x0C00) | 0x0c00); ++ dxsuflo = 1; ++ ltint = 1; ++ } ++ ++ /*** RTnet ***/ ++ dev = rt_alloc_etherdev(0, RX_RING_SIZE * 2 + TX_RING_SIZE); ++ if (dev == NULL) ++ return -ENOMEM; ++ rtdev_alloc_name(dev, "rteth%d"); ++ rt_rtdev_connect(dev, &RTDEV_manager); ++ dev->vers = RTDEV_VERS_2_0; ++ dev->sysbind = &pdev->dev; ++ /*** RTnet ***/ ++ ++ printk(KERN_INFO PFX "%s at %#3lx,", chipname, ioaddr); ++ ++ /* In most chips, after a chip reset, the ethernet address is read from the ++ * station address PROM at the base address and programmed into the ++ * "Physical Address Registers" CSR12-14. ++ * As a precautionary measure, we read the PROM values and complain if ++ * they disagree with the CSRs. Either way, we use the CSR values, and ++ * double check that they are valid. ++ */ ++ for (i = 0; i < 3; i++) { ++ unsigned int val; ++ val = a->read_csr(ioaddr, i + 12) & 0x0ffff; ++ /* There may be endianness issues here. */ ++ dev->dev_addr[2 * i] = val & 0x0ff; ++ dev->dev_addr[2 * i + 1] = (val >> 8) & 0x0ff; ++ } ++ ++ /* read PROM address and compare with CSR address */ ++ for (i = 0; i < 6; i++) ++ promaddr[i] = inb(ioaddr + i); ++ ++ if (memcmp(promaddr, dev->dev_addr, 6) || ++ !is_valid_ether_addr(dev->dev_addr)) { ++#ifndef __powerpc__ ++ if (is_valid_ether_addr(promaddr)) { ++#else ++ if (!is_valid_ether_addr(dev->dev_addr) && ++ is_valid_ether_addr(promaddr)) { ++#endif ++ printk(" warning: CSR address invalid,\n"); ++ printk(KERN_INFO " using instead PROM address of"); ++ memcpy(dev->dev_addr, promaddr, 6); ++ } ++ } ++ ++ /* if the ethernet address is not valid, force to 00:00:00:00:00:00 */ ++ if (!is_valid_ether_addr(dev->dev_addr)) ++ memset(dev->dev_addr, 0, sizeof(dev->dev_addr)); ++ ++ for (i = 0; i < 6; i++) ++ printk(" %2.2x", dev->dev_addr[i]); ++ ++ if (((chip_version + 1) & 0xfffe) == ++ 0x2624) { /* Version 0x2623 or 0x2624 */ ++ i = a->read_csr(ioaddr, 80) & 0x0C00; /* Check tx_start_pt */ ++ printk("\n" KERN_INFO " tx_start_pt(0x%04x):", i); ++ switch (i >> 10) { ++ case 0: ++ printk(" 20 bytes,"); ++ break; ++ case 1: ++ printk(" 64 bytes,"); ++ break; ++ case 2: ++ printk(" 128 bytes,"); ++ break; ++ case 3: ++ printk("~220 bytes,"); ++ break; ++ } ++ i = a->read_bcr(ioaddr, 18); /* Check Burst/Bus control */ ++ printk(" BCR18(%x):", i & 0xffff); ++ if (i & (1 << 5)) ++ printk("BurstWrEn "); ++ if (i & (1 << 6)) ++ printk("BurstRdEn "); ++ if (i & (1 << 7)) ++ printk("DWordIO "); ++ if (i & (1 << 11)) ++ printk("NoUFlow "); ++ i = a->read_bcr(ioaddr, 25); ++ printk("\n" KERN_INFO " SRAMSIZE=0x%04x,", i << 8); ++ i = a->read_bcr(ioaddr, 26); ++ printk(" SRAM_BND=0x%04x,", i << 8); ++ i = a->read_bcr(ioaddr, 27); ++ if (i & (1 << 14)) ++ printk("LowLatRx"); ++ } ++ ++ dev->base_addr = ioaddr; ++ if (request_region(ioaddr, PCNET32_TOTAL_SIZE, chipname) == NULL) ++ return -EBUSY; ++ ++ /* pci_alloc_consistent returns page-aligned memory, so we do not have to check the alignment */ ++ if ((lp = pci_alloc_consistent(pdev, sizeof(*lp), &lp_dma_addr)) == ++ NULL) { ++ release_region(ioaddr, PCNET32_TOTAL_SIZE); ++ return -ENOMEM; ++ } ++ ++ memset(lp, 0, sizeof(*lp)); ++ lp->dma_addr = lp_dma_addr; ++ lp->pci_dev = pdev; ++ ++ rtdm_lock_init(&lp->lock); ++ ++ dev->priv = lp; ++ lp->name = chipname; ++ lp->shared_irq = shared; ++ lp->mii_if.full_duplex = fdx; ++ lp->dxsuflo = dxsuflo; ++ lp->ltint = ltint; ++ lp->mii = mii; ++ if ((cards_found >= MAX_UNITS) || ++ (options[cards_found] > (int)sizeof(options_mapping))) ++ lp->options = PCNET32_PORT_ASEL; ++ else ++ lp->options = options_mapping[options[cards_found]]; ++ /*** RTnet *** ++ lp->mii_if.dev = dev; ++ lp->mii_if.mdio_read = mdio_read; ++ lp->mii_if.mdio_write = mdio_write; ++ *** RTnet ***/ ++ ++ if (fdx && !(lp->options & PCNET32_PORT_ASEL) && ++ ((cards_found >= MAX_UNITS) || full_duplex[cards_found])) ++ lp->options |= PCNET32_PORT_FD; ++ ++ if (!a) { ++ printk(KERN_ERR PFX "No access methods\n"); ++ pci_free_consistent(lp->pci_dev, sizeof(*lp), lp, lp->dma_addr); ++ release_region(ioaddr, PCNET32_TOTAL_SIZE); ++ return -ENODEV; ++ } ++ lp->a = *a; ++ ++ /* detect special T1/E1 WAN card by checking for MAC address */ ++ if (dev->dev_addr[0] == 0x00 && dev->dev_addr[1] == 0xe0 && ++ dev->dev_addr[2] == 0x75) ++ lp->options = PCNET32_PORT_FD | PCNET32_PORT_GPSI; ++ ++ lp->init_block.mode = le16_to_cpu(0x0003); /* Disable Rx and Tx. */ ++ lp->init_block.tlen_rlen = ++ le16_to_cpu(TX_RING_LEN_BITS | RX_RING_LEN_BITS); ++ for (i = 0; i < 6; i++) ++ lp->init_block.phys_addr[i] = dev->dev_addr[i]; ++ lp->init_block.filter[0] = 0x00000000; ++ lp->init_block.filter[1] = 0x00000000; ++ lp->init_block.rx_ring = (u32)le32_to_cpu( ++ lp->dma_addr + offsetof(struct pcnet32_private, rx_ring)); ++ lp->init_block.tx_ring = (u32)le32_to_cpu( ++ lp->dma_addr + offsetof(struct pcnet32_private, tx_ring)); ++ ++ /* switch pcnet32 to 32bit mode */ ++ a->write_bcr(ioaddr, 20, 2); ++ ++ a->write_csr( ++ ioaddr, 1, ++ (lp->dma_addr + offsetof(struct pcnet32_private, init_block)) & ++ 0xffff); ++ a->write_csr( ++ ioaddr, 2, ++ (lp->dma_addr + offsetof(struct pcnet32_private, init_block)) >> ++ 16); ++ ++ if (irq_line) { ++ dev->irq = irq_line; ++ } ++ ++ if (dev->irq >= 2) ++ printk(" assigned IRQ %d.\n", dev->irq); ++ else { ++ unsigned long irq_mask = probe_irq_on(); ++ ++ /* ++ * To auto-IRQ we enable the initialization-done and DMA error ++ * interrupts. For ISA boards we get a DMA error, but VLB and PCI ++ * boards will work. ++ */ ++ /* Trigger an initialization just for the interrupt. */ ++ a->write_csr(ioaddr, 0, 0x41); ++ mdelay(1); ++ ++ dev->irq = probe_irq_off(irq_mask); ++ if (dev->irq) ++ printk(", probed IRQ %d.\n", dev->irq); ++ else { ++ printk(", failed to detect IRQ line.\n"); ++ pci_free_consistent(lp->pci_dev, sizeof(*lp), lp, ++ lp->dma_addr); ++ release_region(ioaddr, PCNET32_TOTAL_SIZE); ++ return -ENODEV; ++ } ++ } ++ ++ /* The PCNET32-specific entries in the device structure. */ ++ dev->open = &pcnet32_open; ++ dev->hard_start_xmit = &pcnet32_start_xmit; ++ dev->stop = &pcnet32_close; ++ dev->get_stats = &pcnet32_get_stats; ++ /*** RTnet *** ++ dev->set_multicast_list = &pcnet32_set_multicast_list; ++ dev->do_ioctl = &pcnet32_ioctl; ++ dev->tx_timeout = pcnet32_tx_timeout; ++ dev->watchdog_timeo = (5*HZ); ++ *** RTnet ***/ ++ ++ lp->next = pcnet32_dev; ++ pcnet32_dev = dev; ++ ++ /* Fill in the generic fields of the device structure. */ ++ /*** RTnet ***/ ++ if ((i = rt_register_rtnetdev(dev))) { ++ pci_free_consistent(lp->pci_dev, sizeof(*lp), lp, lp->dma_addr); ++ release_region(ioaddr, PCNET32_TOTAL_SIZE); ++ rtdev_free(dev); ++ return i; ++ } ++ /*** RTnet ***/ ++ ++ printk(KERN_INFO "%s: registered as %s\n", dev->name, lp->name); ++ return 0; ++} ++ ++static int pcnet32_open(struct rtnet_device *dev) /*** RTnet ***/ ++{ ++ struct pcnet32_private *lp = dev->priv; ++ unsigned long ioaddr = dev->base_addr; ++ u16 val; ++ int i; ++ ++ /*** RTnet ***/ ++ if (dev->irq == 0) ++ return -EAGAIN; ++ ++ rt_stack_connect(dev, &STACK_manager); ++ ++ i = rtdm_irq_request(&lp->irq_handle, dev->irq, pcnet32_interrupt, ++ RTDM_IRQTYPE_SHARED, "rt_pcnet32", dev); ++ if (i) ++ return i; ++ /*** RTnet ***/ ++ ++ /* Check for a valid station address */ ++ if (!is_valid_ether_addr(dev->dev_addr)) ++ return -EINVAL; ++ ++ /* Reset the PCNET32 */ ++ lp->a.reset(ioaddr); ++ ++ /* switch pcnet32 to 32bit mode */ ++ lp->a.write_bcr(ioaddr, 20, 2); ++ ++ if (pcnet32_debug > 1) ++ printk(KERN_DEBUG ++ "%s: pcnet32_open() irq %d tx/rx rings %#x/%#x init %#x.\n", ++ dev->name, dev->irq, ++ (u32)(lp->dma_addr + ++ offsetof(struct pcnet32_private, tx_ring)), ++ (u32)(lp->dma_addr + ++ offsetof(struct pcnet32_private, rx_ring)), ++ (u32)(lp->dma_addr + ++ offsetof(struct pcnet32_private, init_block))); ++ ++ /* set/reset autoselect bit */ ++ val = lp->a.read_bcr(ioaddr, 2) & ~2; ++ if (lp->options & PCNET32_PORT_ASEL) ++ val |= 2; ++ lp->a.write_bcr(ioaddr, 2, val); ++ ++ /* handle full duplex setting */ ++ if (lp->mii_if.full_duplex) { ++ val = lp->a.read_bcr(ioaddr, 9) & ~3; ++ if (lp->options & PCNET32_PORT_FD) { ++ val |= 1; ++ if (lp->options == (PCNET32_PORT_FD | PCNET32_PORT_AUI)) ++ val |= 2; ++ } else if (lp->options & PCNET32_PORT_ASEL) { ++ /* workaround of xSeries250, turn on for 79C975 only */ ++ i = ((lp->a.read_csr(ioaddr, 88) | ++ (lp->a.read_csr(ioaddr, 89) << 16)) >> ++ 12) & ++ 0xffff; ++ if (i == 0x2627) ++ val |= 3; ++ } ++ lp->a.write_bcr(ioaddr, 9, val); ++ } ++ ++ /* set/reset GPSI bit in test register */ ++ val = lp->a.read_csr(ioaddr, 124) & ~0x10; ++ if ((lp->options & PCNET32_PORT_PORTSEL) == PCNET32_PORT_GPSI) ++ val |= 0x10; ++ lp->a.write_csr(ioaddr, 124, val); ++ ++ if (lp->mii && !(lp->options & PCNET32_PORT_ASEL)) { ++ val = lp->a.read_bcr(ioaddr, 32) & ++ ~0x38; /* disable Auto Negotiation, set 10Mpbs, HD */ ++ if (lp->options & PCNET32_PORT_FD) ++ val |= 0x10; ++ if (lp->options & PCNET32_PORT_100) ++ val |= 0x08; ++ lp->a.write_bcr(ioaddr, 32, val); ++ } else { ++ if (lp->options & ++ PCNET32_PORT_ASEL) { /* enable auto negotiate, setup, disable fd */ ++ val = lp->a.read_bcr(ioaddr, 32) & ~0x98; ++ val |= 0x20; ++ lp->a.write_bcr(ioaddr, 32, val); ++ } ++ } ++ ++#ifdef DO_DXSUFLO ++ if (lp->dxsuflo) { /* Disable transmit stop on underflow */ ++ val = lp->a.read_csr(ioaddr, 3); ++ val |= 0x40; ++ lp->a.write_csr(ioaddr, 3, val); ++ } ++#endif ++ ++ if (lp->ltint) { /* Enable TxDone-intr inhibitor */ ++ val = lp->a.read_csr(ioaddr, 5); ++ val |= (1 << 14); ++ lp->a.write_csr(ioaddr, 5, val); ++ } ++ ++ lp->init_block.mode = ++ le16_to_cpu((lp->options & PCNET32_PORT_PORTSEL) << 7); ++ lp->init_block.filter[0] = 0x00000000; ++ lp->init_block.filter[1] = 0x00000000; ++ if (pcnet32_init_ring(dev)) ++ return -ENOMEM; ++ ++ /* Re-initialize the PCNET32, and start it when done. */ ++ lp->a.write_csr( ++ ioaddr, 1, ++ (lp->dma_addr + offsetof(struct pcnet32_private, init_block)) & ++ 0xffff); ++ lp->a.write_csr( ++ ioaddr, 2, ++ (lp->dma_addr + offsetof(struct pcnet32_private, init_block)) >> ++ 16); ++ ++ lp->a.write_csr(ioaddr, 4, 0x0915); ++ lp->a.write_csr(ioaddr, 0, 0x0001); ++ ++ rtnetif_start_queue(dev); /*** RTnet ***/ ++ ++ i = 0; ++ while (i++ < 100) ++ if (lp->a.read_csr(ioaddr, 0) & 0x0100) ++ break; ++ /* ++ * We used to clear the InitDone bit, 0x0100, here but Mark Stockton ++ * reports that doing so triggers a bug in the '974. ++ */ ++ lp->a.write_csr(ioaddr, 0, 0x0042); ++ ++ if (pcnet32_debug > 2) ++ printk(KERN_DEBUG ++ "%s: pcnet32 open after %d ticks, init block %#x csr0 %4.4x.\n", ++ dev->name, i, ++ (u32)(lp->dma_addr + ++ offsetof(struct pcnet32_private, init_block)), ++ lp->a.read_csr(ioaddr, 0)); ++ ++ return 0; /* Always succeed */ ++} ++ ++/* ++ * The LANCE has been halted for one reason or another (busmaster memory ++ * arbitration error, Tx FIFO underflow, driver stopped it to reconfigure, ++ * etc.). Modern LANCE variants always reload their ring-buffer ++ * configuration when restarted, so we must reinitialize our ring ++ * context before restarting. As part of this reinitialization, ++ * find all packets still on the Tx ring and pretend that they had been ++ * sent (in effect, drop the packets on the floor) - the higher-level ++ * protocols will time out and retransmit. It'd be better to shuffle ++ * these skbs to a temp list and then actually re-Tx them after ++ * restarting the chip, but I'm too lazy to do so right now. dplatt@3do.com ++ */ ++ ++/*** RTnet *** ++static void ++pcnet32_purge_tx_ring(struct net_device *dev) ++{ ++ struct pcnet32_private *lp = dev->priv; ++ int i; ++ ++ for (i = 0; i < TX_RING_SIZE; i++) { ++ if (lp->tx_skbuff[i]) { ++ pci_unmap_single(lp->pci_dev, lp->tx_dma_addr[i], lp->tx_skbuff[i]->len, PCI_DMA_TODEVICE); ++ dev_kfree_skb(lp->tx_skbuff[i]); ++ lp->tx_skbuff[i] = NULL; ++ lp->tx_dma_addr[i] = 0; ++ } ++ } ++} ++ *** RTnet ***/ ++ ++/* Initialize the PCNET32 Rx and Tx rings. */ ++static int pcnet32_init_ring(struct rtnet_device *dev) /*** RTnet ***/ ++{ ++ struct pcnet32_private *lp = dev->priv; ++ int i; ++ ++ lp->tx_full = 0; ++ lp->cur_rx = lp->cur_tx = 0; ++ lp->dirty_rx = lp->dirty_tx = 0; ++ ++ for (i = 0; i < RX_RING_SIZE; i++) { ++ struct rtskb *rx_skbuff = lp->rx_skbuff[i]; /*** RTnet ***/ ++ if (rx_skbuff == NULL) { ++ if (!(rx_skbuff = lp->rx_skbuff[i] = ++ rtnetdev_alloc_rtskb( ++ dev, ++ PKT_BUF_SZ))) { /*** RTnet ***/ ++ /* there is not much, we can do at this point */ ++ printk(KERN_ERR ++ "%s: pcnet32_init_ring rtnetdev_alloc_rtskb failed.\n", ++ dev->name); ++ return -1; ++ } ++ rtskb_reserve(rx_skbuff, 2); /*** RTnet ***/ ++ } ++ lp->rx_dma_addr[i] = ++ pci_map_single(lp->pci_dev, rx_skbuff->tail, ++ rx_skbuff->len, PCI_DMA_FROMDEVICE); ++ lp->rx_ring[i].base = (u32)le32_to_cpu(lp->rx_dma_addr[i]); ++ lp->rx_ring[i].buf_length = le16_to_cpu(-PKT_BUF_SZ); ++ lp->rx_ring[i].status = le16_to_cpu(0x8000); ++ } ++ /* The Tx buffer address is filled in as needed, but we do need to clear ++ the upper ownership bit. */ ++ for (i = 0; i < TX_RING_SIZE; i++) { ++ lp->tx_ring[i].base = 0; ++ lp->tx_ring[i].status = 0; ++ lp->tx_dma_addr[i] = 0; ++ } ++ ++ lp->init_block.tlen_rlen = ++ le16_to_cpu(TX_RING_LEN_BITS | RX_RING_LEN_BITS); ++ for (i = 0; i < 6; i++) ++ lp->init_block.phys_addr[i] = dev->dev_addr[i]; ++ lp->init_block.rx_ring = (u32)le32_to_cpu( ++ lp->dma_addr + offsetof(struct pcnet32_private, rx_ring)); ++ lp->init_block.tx_ring = (u32)le32_to_cpu( ++ lp->dma_addr + offsetof(struct pcnet32_private, tx_ring)); ++ return 0; ++} ++ ++/*** RTnet ***/ ++/*** RTnet ***/ ++ ++static int pcnet32_start_xmit(struct rtskb *skb, ++ struct rtnet_device *dev) /*** RTnet ***/ ++{ ++ struct pcnet32_private *lp = dev->priv; ++ unsigned long ioaddr = dev->base_addr; ++ u16 status; ++ int entry; ++ rtdm_lockctx_t context; ++ ++ if (pcnet32_debug > 3) { ++ rtdm_printk(KERN_DEBUG ++ "%s: pcnet32_start_xmit() called, csr0 %4.4x.\n", ++ dev->name, lp->a.read_csr(ioaddr, 0)); ++ } ++ ++ /*** RTnet ***/ ++ rtdm_lock_get_irqsave(&lp->lock, context); ++ /*** RTnet ***/ ++ ++ /* Default status -- will not enable Successful-TxDone ++ * interrupt when that option is available to us. ++ */ ++ status = 0x8300; ++ if ((lp->ltint) && ((lp->cur_tx - lp->dirty_tx == TX_RING_SIZE / 2) || ++ (lp->cur_tx - lp->dirty_tx >= TX_RING_SIZE - 2))) { ++ /* Enable Successful-TxDone interrupt if we have ++ * 1/2 of, or nearly all of, our ring buffer Tx'd ++ * but not yet cleaned up. Thus, most of the time, ++ * we will not enable Successful-TxDone interrupts. ++ */ ++ status = 0x9300; ++ } ++ ++ /* Fill in a Tx ring entry */ ++ ++ /* Mask to ring buffer boundary. */ ++ entry = lp->cur_tx & TX_RING_MOD_MASK; ++ ++ /* Caution: the write order is important here, set the base address ++ with the "ownership" bits last. */ ++ ++ lp->tx_ring[entry].length = le16_to_cpu(-skb->len); ++ ++ lp->tx_ring[entry].misc = 0x00000000; ++ ++ lp->tx_skbuff[entry] = skb; ++ lp->tx_dma_addr[entry] = pci_map_single(lp->pci_dev, skb->data, ++ skb->len, PCI_DMA_TODEVICE); ++ lp->tx_ring[entry].base = (u32)le32_to_cpu(lp->tx_dma_addr[entry]); ++ ++ /*** RTnet ***/ ++ /* get and patch time stamp just before the transmission */ ++ if (skb->xmit_stamp) ++ *skb->xmit_stamp = ++ cpu_to_be64(rtdm_clock_read() + *skb->xmit_stamp); ++ /*** RTnet ***/ ++ ++ wmb(); ++ lp->tx_ring[entry].status = le16_to_cpu(status); ++ ++ lp->cur_tx++; ++ lp->stats.tx_bytes += skb->len; ++ ++ /* Trigger an immediate send poll. */ ++ lp->a.write_csr(ioaddr, 0, 0x0048); ++ ++ //dev->trans_start = jiffies; /*** RTnet ***/ ++ ++ if (lp->tx_ring[(entry + 1) & TX_RING_MOD_MASK].base == 0) ++ rtnetif_start_queue(dev); /*** RTnet ***/ ++ else { ++ lp->tx_full = 1; ++ rtnetif_stop_queue(dev); /*** RTnet ***/ ++ } ++ /*** RTnet ***/ ++ rtdm_lock_put_irqrestore(&lp->lock, context); ++ /*** RTnet ***/ ++ return 0; ++} ++ ++/* The PCNET32 interrupt handler. */ ++static int pcnet32_interrupt(rtdm_irq_t *irq_handle) /*** RTnet ***/ ++{ ++ nanosecs_abs_t time_stamp = rtdm_clock_read(); /*** RTnet ***/ ++ struct rtnet_device *dev = rtdm_irq_get_arg( ++ irq_handle, struct rtnet_device); /*** RTnet ***/ ++ struct pcnet32_private *lp; ++ unsigned long ioaddr; ++ u16 csr0, rap; ++ int boguscnt = max_interrupt_work; ++ int must_restart; ++ unsigned int old_packet_cnt; /*** RTnet ***/ ++ int ret = RTDM_IRQ_NONE; ++ ++ /*** RTnet *** ++ if (!dev) { ++ rtdm_printk (KERN_DEBUG "%s(): irq %d for unknown device\n", ++ __FUNCTION__, irq); ++ return; ++ } ++ *** RTnet ***/ ++ ++ ioaddr = dev->base_addr; ++ lp = dev->priv; ++ old_packet_cnt = lp->stats.rx_packets; /*** RTnet ***/ ++ ++ rtdm_lock_get(&lp->lock); /*** RTnet ***/ ++ ++ rap = lp->a.read_rap(ioaddr); ++ while ((csr0 = lp->a.read_csr(ioaddr, 0)) & 0x8600 && --boguscnt >= 0) { ++ /* Acknowledge all of the current interrupt sources ASAP. */ ++ lp->a.write_csr(ioaddr, 0, csr0 & ~0x004f); ++ ++ ret = RTDM_IRQ_HANDLED; ++ ++ must_restart = 0; ++ ++ if (pcnet32_debug > 5) ++ rtdm_printk( ++ KERN_DEBUG ++ "%s: interrupt csr0=%#2.2x new csr=%#2.2x.\n", ++ dev->name, csr0, lp->a.read_csr(ioaddr, 0)); ++ ++ if (csr0 & 0x0400) /* Rx interrupt */ ++ pcnet32_rx(dev, &time_stamp); ++ ++ if (csr0 & 0x0200) { /* Tx-done interrupt */ ++ unsigned int dirty_tx = lp->dirty_tx; ++ ++ while (dirty_tx < lp->cur_tx) { ++ int entry = dirty_tx & TX_RING_MOD_MASK; ++ int status = (short)le16_to_cpu( ++ lp->tx_ring[entry].status); ++ ++ if (status < 0) ++ break; /* It still hasn't been Txed */ ++ ++ lp->tx_ring[entry].base = 0; ++ ++ if (status & 0x4000) { ++ /* There was an major error, log it. */ ++ int err_status = le32_to_cpu( ++ lp->tx_ring[entry].misc); ++ lp->stats.tx_errors++; ++ if (err_status & 0x04000000) ++ lp->stats.tx_aborted_errors++; ++ if (err_status & 0x08000000) ++ lp->stats.tx_carrier_errors++; ++ if (err_status & 0x10000000) ++ lp->stats.tx_window_errors++; ++#ifndef DO_DXSUFLO ++ if (err_status & 0x40000000) { ++ lp->stats.tx_fifo_errors++; ++ /* Ackk! On FIFO errors the Tx unit is turned off! */ ++ /* Remove this verbosity later! */ ++ rtdm_printk( ++ KERN_ERR ++ "%s: Tx FIFO error! CSR0=%4.4x\n", ++ dev->name, csr0); ++ must_restart = 1; ++ } ++#else ++ if (err_status & 0x40000000) { ++ lp->stats.tx_fifo_errors++; ++ if (!lp->dxsuflo) { /* If controller doesn't recover ... */ ++ /* Ackk! On FIFO errors the Tx unit is turned off! */ ++ /* Remove this verbosity later! */ ++ rtdm_printk( ++ KERN_ERR ++ "%s: Tx FIFO error! CSR0=%4.4x\n", ++ dev->name, ++ csr0); ++ must_restart = 1; ++ } ++ } ++#endif ++ } else { ++ if (status & 0x1800) ++ lp->stats.collisions++; ++ lp->stats.tx_packets++; ++ } ++ ++ /* We must free the original skb */ ++ if (lp->tx_skbuff[entry]) { ++ pci_unmap_single( ++ lp->pci_dev, ++ lp->tx_dma_addr[entry], ++ lp->tx_skbuff[entry]->len, ++ PCI_DMA_TODEVICE); ++ dev_kfree_rtskb( ++ lp->tx_skbuff[entry]); /*** RTnet ***/ ++ lp->tx_skbuff[entry] = 0; ++ lp->tx_dma_addr[entry] = 0; ++ } ++ dirty_tx++; ++ } ++ ++ if (lp->cur_tx - dirty_tx >= TX_RING_SIZE) { ++ rtdm_printk( ++ KERN_ERR ++ "%s: out-of-sync dirty pointer, %d vs. %d, full=%d.\n", ++ dev->name, dirty_tx, lp->cur_tx, ++ lp->tx_full); ++ dirty_tx += TX_RING_SIZE; ++ } ++ ++ if (lp->tx_full && ++ rtnetif_queue_stopped(dev) && /*** RTnet ***/ ++ dirty_tx > lp->cur_tx - TX_RING_SIZE + 2) { ++ /* The ring is no longer full, clear tbusy. */ ++ lp->tx_full = 0; ++ rtnetif_wake_queue(dev); /*** RTnet ***/ ++ } ++ lp->dirty_tx = dirty_tx; ++ } ++ ++ /* Log misc errors. */ ++ if (csr0 & 0x4000) ++ lp->stats.tx_errors++; /* Tx babble. */ ++ if (csr0 & 0x1000) { ++ /* ++ * this happens when our receive ring is full. This shouldn't ++ * be a problem as we will see normal rx interrupts for the frames ++ * in the receive ring. But there are some PCI chipsets (I can reproduce ++ * this on SP3G with Intel saturn chipset) which have sometimes problems ++ * and will fill up the receive ring with error descriptors. In this ++ * situation we don't get a rx interrupt, but a missed frame interrupt sooner ++ * or later. So we try to clean up our receive ring here. ++ */ ++ pcnet32_rx(dev, &time_stamp); ++ lp->stats.rx_errors++; /* Missed a Rx frame. */ ++ } ++ if (csr0 & 0x0800) { ++ rtdm_printk( ++ KERN_ERR ++ "%s: Bus master arbitration failure, status %4.4x.\n", ++ dev->name, csr0); ++ /* unlike for the lance, there is no restart needed */ ++ } ++ ++ /*** RTnet ***/ ++ /*** RTnet ***/ ++ } ++ ++ /* Clear any other interrupt, and set interrupt enable. */ ++ lp->a.write_csr(ioaddr, 0, 0x7940); ++ lp->a.write_rap(ioaddr, rap); ++ ++ if (pcnet32_debug > 4) ++ rtdm_printk(KERN_DEBUG "%s: exiting interrupt, csr0=%#4.4x.\n", ++ dev->name, lp->a.read_csr(ioaddr, 0)); ++ ++ /*** RTnet ***/ ++ rtdm_lock_put(&lp->lock); ++ ++ if (old_packet_cnt != lp->stats.rx_packets) ++ rt_mark_stack_mgr(dev); ++ ++ return ret; ++ /*** RTnet ***/ ++} ++ ++static int pcnet32_rx(struct rtnet_device *dev, ++ nanosecs_abs_t *time_stamp) /*** RTnet ***/ ++{ ++ struct pcnet32_private *lp = dev->priv; ++ int entry = lp->cur_rx & RX_RING_MOD_MASK; ++ ++ /* If we own the next entry, it's a new packet. Send it up. */ ++ while ((short)le16_to_cpu(lp->rx_ring[entry].status) >= 0) { ++ int status = (short)le16_to_cpu(lp->rx_ring[entry].status) >> 8; ++ ++ if (status != 0x03) { /* There was an error. */ ++ /* ++ * There is a tricky error noted by John Murphy, ++ * to Russ Nelson: Even with full-sized ++ * buffers it's possible for a jabber packet to use two ++ * buffers, with only the last correctly noting the error. ++ */ ++ if (status & ++ 0x01) /* Only count a general error at the */ ++ lp->stats.rx_errors++; /* end of a packet.*/ ++ if (status & 0x20) ++ lp->stats.rx_frame_errors++; ++ if (status & 0x10) ++ lp->stats.rx_over_errors++; ++ if (status & 0x08) ++ lp->stats.rx_crc_errors++; ++ if (status & 0x04) ++ lp->stats.rx_fifo_errors++; ++ lp->rx_ring[entry].status &= le16_to_cpu(0x03ff); ++ } else { ++ /* Malloc up new buffer, compatible with net-2e. */ ++ short pkt_len = ++ (le32_to_cpu(lp->rx_ring[entry].msg_length) & ++ 0xfff) - ++ 4; ++ struct rtskb *skb; /*** RTnet ***/ ++ ++ if (pkt_len < 60) { ++ rtdm_printk(KERN_ERR "%s: Runt packet!\n", ++ dev->name); ++ lp->stats.rx_errors++; ++ } else { ++ /*** RTnet ***/ ++ /*int rx_in_place = 0;*/ ++ ++ /*if (pkt_len > rx_copybreak)*/ { ++ struct rtskb *newskb; ++ ++ if ((newskb = rtnetdev_alloc_rtskb( ++ dev, PKT_BUF_SZ))) { ++ rtskb_reserve(newskb, 2); ++ skb = lp->rx_skbuff[entry]; ++ pci_unmap_single( ++ lp->pci_dev, ++ lp->rx_dma_addr[entry], ++ skb->len, ++ PCI_DMA_FROMDEVICE); ++ rtskb_put(skb, pkt_len); ++ lp->rx_skbuff[entry] = newskb; ++ lp->rx_dma_addr ++ [entry] = pci_map_single( ++ lp->pci_dev, ++ newskb->tail, ++ newskb->len, ++ PCI_DMA_FROMDEVICE); ++ lp->rx_ring[entry] ++ .base = le32_to_cpu( ++ lp->rx_dma_addr[entry]); ++ /*rx_in_place = 1;*/ ++ } else ++ skb = NULL; ++ } /*else { ++ skb = dev_alloc_skb(pkt_len+2); ++ }*/ ++ /*** RTnet ***/ ++ ++ if (skb == NULL) { ++ int i; ++ rtdm_printk( ++ KERN_ERR ++ "%s: Memory squeeze, deferring packet.\n", ++ dev->name); ++ for (i = 0; i < RX_RING_SIZE; i++) ++ if ((short)le16_to_cpu( ++ lp->rx_ring[(entry + ++ i) & ++ RX_RING_MOD_MASK] ++ .status) < ++ 0) ++ break; ++ ++ if (i > RX_RING_SIZE - 2) { ++ lp->stats.rx_dropped++; ++ lp->rx_ring[entry].status |= ++ le16_to_cpu(0x8000); ++ lp->cur_rx++; ++ } ++ break; ++ } ++ /*** RTnet ***/ ++ lp->stats.rx_bytes += skb->len; ++ skb->protocol = rt_eth_type_trans(skb, dev); ++ skb->time_stamp = *time_stamp; ++ rtnetif_rx(skb); ++ ///dev->last_rx = jiffies; ++ /*** RTnet ***/ ++ lp->stats.rx_packets++; ++ } ++ } ++ /* ++ * The docs say that the buffer length isn't touched, but Andrew Boyd ++ * of QNX reports that some revs of the 79C965 clear it. ++ */ ++ lp->rx_ring[entry].buf_length = le16_to_cpu(-PKT_BUF_SZ); ++ lp->rx_ring[entry].status |= le16_to_cpu(0x8000); ++ entry = (++lp->cur_rx) & RX_RING_MOD_MASK; ++ } ++ ++ return 0; ++} ++ ++static int pcnet32_close(struct rtnet_device *dev) /*** RTnet ***/ ++{ ++ unsigned long ioaddr = dev->base_addr; ++ struct pcnet32_private *lp = dev->priv; ++ int i; ++ ++ rtnetif_stop_queue(dev); /*** RTnet ***/ ++ ++ lp->stats.rx_missed_errors = lp->a.read_csr(ioaddr, 112); ++ ++ if (pcnet32_debug > 1) ++ printk(KERN_DEBUG ++ "%s: Shutting down ethercard, status was %2.2x.\n", ++ dev->name, lp->a.read_csr(ioaddr, 0)); ++ ++ /* We stop the PCNET32 here -- it occasionally polls memory if we don't. */ ++ lp->a.write_csr(ioaddr, 0, 0x0004); ++ ++ /* ++ * Switch back to 16bit mode to avoid problems with dumb ++ * DOS packet driver after a warm reboot ++ */ ++ lp->a.write_bcr(ioaddr, 20, 4); ++ ++ /*** RTnet ***/ ++ if ((i = rtdm_irq_free(&lp->irq_handle)) < 0) ++ return i; ++ ++ rt_stack_disconnect(dev); ++ /*** RTnet ***/ ++ ++ /* free all allocated skbuffs */ ++ for (i = 0; i < RX_RING_SIZE; i++) { ++ lp->rx_ring[i].status = 0; ++ if (lp->rx_skbuff[i]) { ++ pci_unmap_single(lp->pci_dev, lp->rx_dma_addr[i], ++ lp->rx_skbuff[i]->len, ++ PCI_DMA_FROMDEVICE); ++ dev_kfree_rtskb(lp->rx_skbuff[i]); /*** RTnet ***/ ++ } ++ lp->rx_skbuff[i] = NULL; ++ lp->rx_dma_addr[i] = 0; ++ } ++ ++ for (i = 0; i < TX_RING_SIZE; i++) { ++ if (lp->tx_skbuff[i]) { ++ pci_unmap_single(lp->pci_dev, lp->tx_dma_addr[i], ++ lp->tx_skbuff[i]->len, ++ PCI_DMA_TODEVICE); ++ dev_kfree_rtskb(lp->tx_skbuff[i]); /*** RTnet ***/ ++ } ++ lp->tx_skbuff[i] = NULL; ++ lp->tx_dma_addr[i] = 0; ++ } ++ ++ return 0; ++} ++ ++/*** RTnet ***/ ++static struct net_device_stats *pcnet32_get_stats(struct rtnet_device *rtdev) ++{ ++ struct pcnet32_private *lp = rtdev->priv; ++ unsigned long ioaddr = rtdev->base_addr; ++ rtdm_lockctx_t context; ++ u16 saved_addr; ++ ++ rtdm_lock_get_irqsave(&lp->lock, context); ++ saved_addr = lp->a.read_rap(ioaddr); ++ lp->stats.rx_missed_errors = lp->a.read_csr(ioaddr, 112); ++ lp->a.write_rap(ioaddr, saved_addr); ++ rtdm_lock_put_irqrestore(&lp->lock, context); ++ ++ return &lp->stats; ++} ++ ++/*** RTnet ***/ ++ ++static struct pci_driver pcnet32_driver = { ++ name: DRV_NAME, ++ probe: pcnet32_probe_pci, ++ id_table: pcnet32_pci_tbl, ++}; ++ ++/* An additional parameter that may be passed in... */ ++static int local_debug = -1; ++static int tx_start_pt = -1; ++ ++module_param_named(debug, local_debug, int, 0444); ++MODULE_PARM_DESC(debug, DRV_NAME " debug level (0-6)"); ++module_param(max_interrupt_work, int, 0444); ++MODULE_PARM_DESC(max_interrupt_work, ++ DRV_NAME " maximum events handled per interrupt"); ++/*** RTnet *** ++MODULE_PARM(rx_copybreak, "i"); ++MODULE_PARM_DESC(rx_copybreak, DRV_NAME " copy breakpoint for copy-only-tiny-frames"); ++ *** RTnet ***/ ++module_param(tx_start_pt, int, 0444); ++MODULE_PARM_DESC(tx_start_pt, DRV_NAME " transmit start point (0-3)"); ++module_param(pcnet32vlb, int, 0444); ++MODULE_PARM_DESC(pcnet32vlb, DRV_NAME " Vesa local bus (VLB) support (0/1)"); ++module_param_array(options, int, NULL, 0444); ++MODULE_PARM_DESC(options, DRV_NAME " initial option setting(s) (0-15)"); ++module_param_array(full_duplex, int, NULL, 0444); ++MODULE_PARM_DESC(full_duplex, DRV_NAME " full duplex setting(s) (1)"); ++ ++MODULE_AUTHOR("Jan Kiszka"); ++MODULE_DESCRIPTION("RTnet Driver for PCnet32 and PCnetPCI based ethercards"); ++MODULE_LICENSE("GPL"); ++ ++static int __init pcnet32_init_module(void) ++{ ++ printk(KERN_INFO "%s", version); ++ ++ if (local_debug > 0) ++ pcnet32_debug = local_debug; ++ ++ if ((tx_start_pt >= 0) && (tx_start_pt <= 3)) ++ tx_start = tx_start_pt; ++ ++ /* find the PCI devices */ ++ if (!pci_register_driver(&pcnet32_driver)) ++ pcnet32_have_pci = 1; ++ ++ /* should we find any remaining VLbus devices ? */ ++ if (pcnet32vlb) ++ pcnet32_probe_vlbus(); ++ ++ if (cards_found) ++ printk(KERN_INFO PFX "%d cards_found.\n", cards_found); ++ ++ return (pcnet32_have_pci + cards_found) ? 0 : -ENODEV; ++} ++ ++static void __exit pcnet32_cleanup_module(void) ++{ ++ struct rtnet_device *next_dev; /*** RTnet ***/ ++ ++ /* No need to check MOD_IN_USE, as sys_delete_module() checks. */ ++ while (pcnet32_dev) { ++ struct pcnet32_private *lp = pcnet32_dev->priv; ++ next_dev = lp->next; ++ /*** RTnet ***/ ++ rt_unregister_rtnetdev(pcnet32_dev); ++ rt_rtdev_disconnect(pcnet32_dev); ++ /*** RTnet ***/ ++ release_region(pcnet32_dev->base_addr, PCNET32_TOTAL_SIZE); ++ pci_free_consistent(lp->pci_dev, sizeof(*lp), lp, lp->dma_addr); ++ /*** RTnet ***/ ++ rtdev_free(pcnet32_dev); ++ /*** RTnet ***/ ++ pcnet32_dev = next_dev; ++ } ++ ++ if (pcnet32_have_pci) ++ pci_unregister_driver(&pcnet32_driver); ++} ++ ++module_init(pcnet32_init_module); ++module_exit(pcnet32_cleanup_module); +--- linux/drivers/xenomai/net/drivers/Makefile 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/Makefile 2021-04-07 16:01:27.284634085 +0800 +@@ -0,0 +1,69 @@ ++ccflags-y += -Idrivers/xenomai/net/stack/include ++ ++obj-$(CONFIG_XENO_DRIVERS_NET_EXP_DRIVERS) += experimental/ ++ ++obj-$(CONFIG_XENO_DRIVERS_NET_DRV_E1000) += e1000/ ++ ++obj-$(CONFIG_XENO_DRIVERS_NET_DRV_E1000E) += e1000e/ ++ ++obj-$(CONFIG_XENO_DRIVERS_NET_DRV_MPC52XX_FEC) += mpc52xx_fec/ ++ ++obj-$(CONFIG_XENO_DRIVERS_NET_DRV_TULIP) += tulip/ ++ ++obj-$(CONFIG_XENO_DRIVERS_NET_DRV_IGB) += igb/ ++ ++obj-$(CONFIG_XENO_DRIVERS_NET_DRV_8139) += rt_8139too.o ++ ++rt_8139too-y := 8139too.o ++ ++obj-$(CONFIG_XENO_DRIVERS_NET_DRV_AT91_ETHER) += rt_at91_ether.o ++ ++rt_at91_ether-y := at91_ether.o ++ ++obj-$(CONFIG_XENO_DRIVERS_NET_DRV_EEPRO100) += rt_eepro100.o ++ ++rt_eepro100-y := eepro100.o ++ ++obj-$(CONFIG_XENO_DRIVERS_NET_DRV_LOOPBACK) += rt_loopback.o ++ ++rt_loopback-y := loopback.o ++ ++obj-$(CONFIG_XENO_DRIVERS_NET_DRV_FCC_ENET) += rt_mpc8260_fcc_enet.o ++ ++rt_mpc8260_fcc_enet-y := mpc8260_fcc_enet.o ++ ++obj-$(CONFIG_XENO_DRIVERS_NET_DRV_SCC_ENET) += rt_mpc8xx_enet.o ++ ++rt_mpc8xx_enet-y := mpc8xx_enet.o ++ ++obj-$(CONFIG_XENO_DRIVERS_NET_DRV_FEC_ENET) += rt_mpc8xx_fec.o ++ ++rt_mpc8xx_fec-y := mpc8xx_fec.o ++ ++obj-$(CONFIG_XENO_DRIVERS_NET_DRV_FEC) += rt_fec.o ++ ++rt_fec-y := fec.o ++ ++obj-$(CONFIG_XENO_DRIVERS_NET_DRV_NATSEMI) += rt_natsemi.o ++ ++rt_natsemi-y := natsemi.o ++ ++obj-$(CONFIG_XENO_DRIVERS_NET_DRV_PCNET32) += rt_pcnet32.o ++ ++rt_pcnet32-y := pcnet32.o ++ ++obj-$(CONFIG_XENO_DRIVERS_NET_DRV_SMC91111) += rt_smc91111.o ++ ++rt_smc91111-y := smc91111.o ++ ++obj-$(CONFIG_XENO_DRIVERS_NET_DRV_MACB) += rt_macb.o ++ ++rt_macb-y := macb.o ++ ++obj-$(CONFIG_XENO_DRIVERS_NET_DRV_VIA_RHINE) += rt_via-rhine.o ++ ++rt_via-rhine-y := via-rhine.o ++ ++obj-$(CONFIG_XENO_DRIVERS_NET_DRV_R8169) += rt_r8169.o ++ ++rt_r8169-y := r8169.o +--- linux/drivers/xenomai/net/drivers/rt_eth1394.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/rt_eth1394.h 2021-04-07 16:01:27.279634092 +0800 +@@ -0,0 +1,240 @@ ++/* ++ * eth1394.h -- Driver for Ethernet emulation over FireWire, (adapted from Linux1394) ++ * working under RTnet. ++ * ++ * Copyright (C) 2005 Zhang Yuchen ++ * ++ * Mainly based on work by Emanuel Pirker and Andreas E. Bombe ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#ifndef __ETH1394_H ++#define __ETH1394_H ++ ++#include ++#include ++#include ++#include ++ ++ ++/* Register for incoming packets. This is 4096 bytes, which supports up to ++ * S3200 (per Table 16-3 of IEEE 1394b-2002). */ ++#define ETHER1394_REGION_ADDR_LEN 4096 ++#define ETHER1394_REGION_ADDR 0xfffff0200000ULL ++#define ETHER1394_REGION_ADDR_END (ETHER1394_REGION_ADDR + ETHER1394_REGION_ADDR_LEN) ++ ++/* GASP identifier numbers for IPv4 over IEEE 1394 */ ++#define ETHER1394_GASP_SPECIFIER_ID 0x00005E ++#define ETHER1394_GASP_SPECIFIER_ID_HI ((ETHER1394_GASP_SPECIFIER_ID >> 8) & 0xffff) ++#define ETHER1394_GASP_SPECIFIER_ID_LO (ETHER1394_GASP_SPECIFIER_ID & 0xff) ++#define ETHER1394_GASP_VERSION 1 ++ ++#define ETHER1394_GASP_OVERHEAD (2 * sizeof(quadlet_t)) /* GASP header overhead */ ++ ++#define ETHER1394_GASP_BUFFERS 16 ++ ++#define ETH1394_BC_CHANNEL 31 ++ ++#define ALL_NODES 0x003f //stolen from ieee1394_types.h ++/* Node set == 64 */ ++#define NODE_SET (ALL_NODES + 1) ++ ++enum eth1394_bc_states { ETHER1394_BC_CLOSED, ETHER1394_BC_OPENED, ++ ETHER1394_BC_CHECK, ETHER1394_BC_ERROR, ++ ETHER1394_BC_RUNNING, ++ ETHER1394_BC_STOPPED }; ++ ++#define TX_RING_SIZE 32 ++#define RX_RING_SIZE 8 /* RX_RING_SIZE*2 rtskbs will be preallocated */ ++ ++struct pdg_list { ++ struct list_head list; /* partial datagram list per node */ ++ unsigned int sz; /* partial datagram list size per node */ ++ rtdm_lock_t lock; /* partial datagram lock */ ++}; ++ ++/* IP1394 headers */ ++#include ++ ++/* Unfragmented */ ++#if defined __BIG_ENDIAN_BITFIELD ++struct eth1394_uf_hdr { ++ u16 lf:2; ++ u16 res:14; ++ u16 ether_type; /* Ethernet packet type */ ++} __attribute__((packed)); ++#elif defined __LITTLE_ENDIAN_BITFIELD ++struct eth1394_uf_hdr { ++ u16 res:14; ++ u16 lf:2; ++ u16 ether_type; ++} __attribute__((packed)); ++#else ++#error Unknown bit field type ++#endif ++ ++/* End of IP1394 headers */ ++ ++/* Fragment types */ ++#define ETH1394_HDR_LF_UF 0 /* unfragmented */ ++#define ETH1394_HDR_LF_FF 1 /* first fragment */ ++#define ETH1394_HDR_LF_LF 2 /* last fragment */ ++#define ETH1394_HDR_LF_IF 3 /* interior fragment */ ++ ++#define IP1394_HW_ADDR_LEN 2 /* In RFC, the value is 16; here use the value for modified spec */ ++ ++/* Our arp packet (ARPHRD_IEEE1394) */ ++struct eth1394_arp { ++ u16 hw_type; /* 0x0018 */ ++ u16 proto_type; /* 0x0080 */ ++ u8 hw_addr_len; /* 2 */ ++ u8 ip_addr_len; /* 4 */ ++ u16 opcode; /* ARP Opcode: 1 for req, 2 for resp */ ++ /* Above is exactly the same format as struct arphdr */ ++ ++ unsigned char s_uniq_id[ETH_ALEN]; /* Sender's node id padded with zeros */ ++ u8 max_rec; /* Sender's max packet size */ ++ u8 sspd; /* Sender's max speed */ ++ u32 sip; /* Sender's IP Address */ ++ u32 tip; /* IP Address of requested hw addr */ ++}; ++ ++ ++/* Network timeout */ ++#define ETHER1394_TIMEOUT 100000 ++ ++/* First fragment */ ++#if defined __BIG_ENDIAN_BITFIELD ++struct eth1394_ff_hdr { ++ u16 lf:2; ++ u16 res1:2; ++ u16 dg_size:12; /* Datagram size */ ++ u16 ether_type; /* Ethernet packet type */ ++ u16 dgl; /* Datagram label */ ++ u16 res2; ++} __attribute__((packed)); ++#elif defined __LITTLE_ENDIAN_BITFIELD ++struct eth1394_ff_hdr { ++ u16 dg_size:12; ++ u16 res1:2; ++ u16 lf:2; ++ u16 ether_type; ++ u16 dgl; ++ u16 res2; ++} __attribute__((packed)); ++#else ++#error Unknown bit field type ++#endif ++ ++/* XXX: Subsequent fragments, including last */ ++#if defined __BIG_ENDIAN_BITFIELD ++struct eth1394_sf_hdr { ++ u16 lf:2; ++ u16 res1:2; ++ u16 dg_size:12; /* Datagram size */ ++ u16 res2:4; ++ u16 fg_off:12; /* Fragment offset */ ++ u16 dgl; /* Datagram label */ ++ u16 res3; ++} __attribute__((packed)); ++#elif defined __LITTLE_ENDIAN_BITFIELD ++struct eth1394_sf_hdr { ++ u16 dg_size:12; ++ u16 res1:2; ++ u16 lf:2; ++ u16 fg_off:12; ++ u16 res2:4; ++ u16 dgl; ++ u16 res3; ++} __attribute__((packed)); ++#else ++#error Unknown bit field type ++#endif ++ ++#if defined __BIG_ENDIAN_BITFIELD ++struct eth1394_common_hdr { ++ u16 lf:2; ++ u16 pad1:14; ++} __attribute__((packed)); ++#elif defined __LITTLE_ENDIAN_BITFIELD ++struct eth1394_common_hdr { ++ u16 pad1:14; ++ u16 lf:2; ++} __attribute__((packed)); ++#else ++#error Unknown bit field type ++#endif ++ ++struct eth1394_hdr_words { ++ u16 word1; ++ u16 word2; ++ u16 word3; ++ u16 word4; ++}; ++ ++union eth1394_hdr { ++ struct eth1394_common_hdr common; ++ struct eth1394_uf_hdr uf; ++ struct eth1394_ff_hdr ff; ++ struct eth1394_sf_hdr sf; ++ struct eth1394_hdr_words words; ++}; ++ ++typedef enum {ETH1394_GASP, ETH1394_WRREQ} eth1394_tx_type; ++ ++/* This is our task struct. It's used for the packet complete callback. */ ++struct packet_task { ++ struct list_head lh; ++ struct rtskb *skb; ++ int outstanding_pkts; ++ eth1394_tx_type tx_type; ++ int max_payload; ++ struct hpsb_packet *packet; ++ struct eth1394_priv *priv; ++ union eth1394_hdr hdr; ++ u64 addr; ++ u16 dest_node; ++ unsigned int priority; //the priority mapped to priority on 1394 transaction ++}; ++ ++/* Private structure for our ethernet driver */ ++struct eth1394_priv { ++ struct net_device_stats stats; /* Device stats */ ++ struct hpsb_host *host; /* The card for this dev */ ++ u16 maxpayload[NODE_SET]; /* Max payload per node */ ++ unsigned char sspd[NODE_SET]; /* Max speed per node */ ++ rtdm_lock_t lock; /* Private lock */ ++ int broadcast_channel; /* Async stream Broadcast Channel */ ++ enum eth1394_bc_states bc_state; /* broadcast channel state */ ++ struct hpsb_iso *iso; ++ struct pdg_list pdg[ALL_NODES]; /* partial RX datagram lists */ ++ int dgl[NODE_SET]; /* Outgoing datagram label per node */ ++ ++ /* The addresses of a Tx/Rx-in-place packets/buffers. */ ++ struct rtskb *tx_skbuff[TX_RING_SIZE]; ++ struct rtskb *rx_skbuff[RX_RING_SIZE]; ++ struct packet_task ptask_list[20]; //the list of pre-allocated ptask structure ++}; ++ ++ ++ ++struct host_info { ++ struct hpsb_host *host; ++ struct rtnet_device *dev; ++}; ++ ++ ++#endif /* __ETH1394_H */ +--- linux/drivers/xenomai/net/drivers/via-rhine.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/via-rhine.c 2021-04-07 16:01:27.274634099 +0800 +@@ -0,0 +1,1818 @@ ++/* via-rhine.c: A Linux Ethernet device driver for VIA Rhine family chips. */ ++/* ++ Written 1998-2001 by Donald Becker. ++ ++ This software may be used and distributed according to the terms of ++ the GNU General Public License (GPL), incorporated herein by reference. ++ Drivers based on or derived from this code fall under the GPL and must ++ retain the authorship, copyright and license notice. This file is not ++ a complete program and may only be used when the entire operating ++ system is licensed under the GPL. ++ ++ This driver is designed for the VIA VT86C100A Rhine-I. ++ It also works with the 6102 Rhine-II, and 6105/6105M Rhine-III. ++ ++ The author may be reached as becker@scyld.com, or C/O ++ Scyld Computing Corporation ++ 410 Severn Ave., Suite 210 ++ Annapolis MD 21403 ++ ++ ++ This driver contains some changes from the original Donald Becker ++ version. He may or may not be interested in bug reports on this ++ code. You can find his versions at: ++ http://www.scyld.com/network/via-rhine.html ++ ++ ++ Linux kernel version history: ++ ++ LK1.1.0: ++ - Jeff Garzik: softnet 'n stuff ++ ++ LK1.1.1: ++ - Justin Guyett: softnet and locking fixes ++ - Jeff Garzik: use PCI interface ++ ++ LK1.1.2: ++ - Urban Widmark: minor cleanups, merges from Becker 1.03a/1.04 versions ++ ++ LK1.1.3: ++ - Urban Widmark: use PCI DMA interface (with thanks to the eepro100.c ++ code) update "Theory of Operation" with ++ softnet/locking changes ++ - Dave Miller: PCI DMA and endian fixups ++ - Jeff Garzik: MOD_xxx race fixes, updated PCI resource allocation ++ ++ LK1.1.4: ++ - Urban Widmark: fix gcc 2.95.2 problem and ++ remove writel's to fixed address 0x7c ++ ++ LK1.1.5: ++ - Urban Widmark: mdio locking, bounce buffer changes ++ merges from Beckers 1.05 version ++ added netif_running_on/off support ++ ++ LK1.1.6: ++ - Urban Widmark: merges from Beckers 1.08b version (VT6102 + mdio) ++ set netif_running_on/off on startup, del_timer_sync ++ ++ LK1.1.7: ++ - Manfred Spraul: added reset into tx_timeout ++ ++ LK1.1.9: ++ - Urban Widmark: merges from Beckers 1.10 version ++ (media selection + eeprom reload) ++ - David Vrabel: merges from D-Link "1.11" version ++ (disable WOL and PME on startup) ++ ++ LK1.1.10: ++ - Manfred Spraul: use "singlecopy" for unaligned buffers ++ don't allocate bounce buffers for !ReqTxAlign cards ++ ++ LK1.1.11: ++ - David Woodhouse: Set dev->base_addr before the first time we call ++ wait_for_reset(). It's a lot happier that way. ++ Free np->tx_bufs only if we actually allocated it. ++ ++ LK1.1.12: ++ - Martin Eriksson: Allow Memory-Mapped IO to be enabled. ++ ++ LK1.1.13 (jgarzik): ++ - Add ethtool support ++ - Replace some MII-related magic numbers with constants ++ ++ LK1.1.14 (Ivan G.): ++ - fixes comments for Rhine-III ++ - removes W_MAX_TIMEOUT (unused) ++ - adds HasDavicomPhy for Rhine-I (basis: linuxfet driver; my card ++ is R-I and has Davicom chip, flag is referenced in kernel driver) ++ - sends chip_id as a parameter to wait_for_reset since np is not ++ initialized on first call ++ - changes mmio "else if (chip_id==VT6102)" to "else" so it will work ++ for Rhine-III's (documentation says same bit is correct) ++ - transmit frame queue message is off by one - fixed ++ - adds IntrNormalSummary to "Something Wicked" exclusion list ++ so normal interrupts will not trigger the message (src: Donald Becker) ++ (Roger Luethi) ++ - show confused chip where to continue after Tx error ++ - location of collision counter is chip specific ++ - allow selecting backoff algorithm (module parameter) ++ ++ LK1.1.15 (jgarzik): ++ - Use new MII lib helper generic_mii_ioctl ++ ++ LK1.1.16 (Roger Luethi) ++ - Etherleak fix ++ - Handle Tx buffer underrun ++ - Fix bugs in full duplex handling ++ - New reset code uses "force reset" cmd on Rhine-II ++ - Various clean ups ++ ++ LK1.1.17 (Roger Luethi) ++ - Fix race in via_rhine_start_tx() ++ - On errors, wait for Tx engine to turn off before scavenging ++ - Handle Tx descriptor write-back race on Rhine-II ++ - Force flushing for PCI posted writes ++ - More reset code changes ++ ++ Ported to RTnet: October 2003, Jan Kiszka ++*/ ++ ++#define DRV_NAME "via-rhine-rt" ++#define DRV_VERSION "1.1.17-RTnet-0.1" ++#define DRV_RELDATE "2003-10-05" ++ ++ ++/* A few user-configurable values. ++ These may be modified when a driver module is loaded. */ ++ ++static int local_debug = 1; /* 1 normal messages, 0 quiet .. 7 verbose. */ ++static int max_interrupt_work = 20; ++ ++/* Set the copy breakpoint for the copy-only-tiny-frames scheme. ++ Setting to > 1518 effectively disables this feature. */ ++/*** RTnet *** ++static int rx_copybreak; ++ *** RTnet ***/ ++ ++/* Select a backoff algorithm (Ethernet capture effect) */ ++static int backoff; ++ ++/* Used to pass the media type, etc. ++ Both 'options[]' and 'full_duplex[]' should exist for driver ++ interoperability. ++ The media type is usually passed in 'options[]'. ++ The default is autonegotation for speed and duplex. ++ This should rarely be overridden. ++ Use option values 0x10/0x20 for 10Mbps, 0x100,0x200 for 100Mbps. ++ Use option values 0x10 and 0x100 for forcing half duplex fixed speed. ++ Use option values 0x20 and 0x200 for forcing full duplex operation. ++*/ ++#define MAX_UNITS 8 /* More are supported, limit only on options */ ++static int options[MAX_UNITS] = {-1, -1, -1, -1, -1, -1, -1, -1}; ++static int full_duplex[MAX_UNITS] = {-1, -1, -1, -1, -1, -1, -1, -1}; ++ ++/* Maximum number of multicast addresses to filter (vs. rx-all-multicast). ++ The Rhine has a 64 element 8390-like hash table. */ ++static const int multicast_filter_limit = 32; ++ ++ ++/* Operational parameters that are set at compile time. */ ++ ++/* Keep the ring sizes a power of two for compile efficiency. ++ The compiler will convert '%'<2^N> into a bit mask. ++ Making the Tx ring too large decreases the effectiveness of channel ++ bonding and packet priority. ++ There are no ill effects from too-large receive rings. */ ++#define TX_RING_SIZE 16 ++#define TX_QUEUE_LEN 10 /* Limit ring entries actually used. */ ++#define RX_RING_SIZE 8 /*** RTnet ***/ ++ ++ ++/* Operational parameters that usually are not changed. */ ++ ++/* Time in jiffies before concluding the transmitter is hung. */ ++#define TX_TIMEOUT (2*HZ) ++ ++#define PKT_BUF_SZ 1536 /* Size of each temporary Rx buffer.*/ ++ ++#if !defined(__OPTIMIZE__) || !defined(__KERNEL__) ++#warning You must compile this file with the correct options! ++#warning See the last lines of the source file. ++#error You must compile this driver with "-O". ++#endif ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include /* Processor type for cache alignment. */ ++#include ++#include ++#include ++ ++/*** RTnet ***/ ++#include ++ ++#define DEFAULT_RX_POOL_SIZE 16 ++ ++static int cards[MAX_UNITS] = { [0 ... (MAX_UNITS-1)] = 1 }; ++module_param_array(cards, int, NULL, 0444); ++MODULE_PARM_DESC(cards, "array of cards to be supported (e.g. 1,0,1)"); ++/*** RTnet ***/ ++ ++/* These identify the driver base version and may not be removed. */ ++static char version[] = ++KERN_INFO DRV_NAME ".c:" DRV_VERSION " " DRV_RELDATE " Jan.Kiszka@web.de\n"; ++ ++static char shortname[] = DRV_NAME; ++ ++ ++/* This driver was written to use PCI memory space, however most versions ++ of the Rhine only work correctly with I/O space accesses. */ ++/*#ifdef CONFIG_VIA_RHINE_MMIO ++#define USE_MEM ++#else*/ ++#define USE_IO ++#undef readb ++#undef readw ++#undef readl ++#undef writeb ++#undef writew ++#undef writel ++#define readb(addr) inb((unsigned long)(addr)) ++#define readw(addr) inw((unsigned long)(addr)) ++#define readl(addr) inl((unsigned long)(addr)) ++#define writeb(val,addr) outb((val),(unsigned long)(addr)) ++#define writew(val,addr) outw((val),(unsigned long)(addr)) ++#define writel(val,addr) outl((val),(unsigned long)(addr)) ++/*#endif*/ ++ ++MODULE_AUTHOR("Jan Kiszka"); ++MODULE_DESCRIPTION("RTnet VIA Rhine PCI Fast Ethernet driver"); ++MODULE_LICENSE("GPL"); ++ ++module_param(max_interrupt_work, int, 0444); ++module_param_named(debug, local_debug, int, 0444); ++/*** RTnet *** ++MODULE_PARM(rx_copybreak, "i"); ++ *** RTnet ***/ ++module_param(backoff, int, 0444); ++module_param_array(options, int, NULL, 0444); ++module_param_array(full_duplex, int, NULL, 0444); ++MODULE_PARM_DESC(max_interrupt_work, "VIA Rhine maximum events handled per interrupt"); ++MODULE_PARM_DESC(debug, "VIA Rhine debug level (0-7)"); ++/*** RTnet *** ++MODULE_PARM_DESC(rx_copybreak, "VIA Rhine copy breakpoint for copy-only-tiny-frames"); ++ *** RTnet ***/ ++MODULE_PARM_DESC(backoff, "VIA Rhine: Bits 0-3: backoff algorithm"); ++MODULE_PARM_DESC(options, "VIA Rhine: Bits 0-3: media type, bit 17: full duplex"); ++MODULE_PARM_DESC(full_duplex, "VIA Rhine full duplex setting(s) (1)"); ++ ++/* ++ Theory of Operation ++ ++I. Board Compatibility ++ ++This driver is designed for the VIA 86c100A Rhine-II PCI Fast Ethernet ++controller. ++ ++II. Board-specific settings ++ ++Boards with this chip are functional only in a bus-master PCI slot. ++ ++Many operational settings are loaded from the EEPROM to the Config word at ++offset 0x78. For most of these settings, this driver assumes that they are ++correct. ++If this driver is compiled to use PCI memory space operations the EEPROM ++must be configured to enable memory ops. ++ ++III. Driver operation ++ ++IIIa. Ring buffers ++ ++This driver uses two statically allocated fixed-size descriptor lists ++formed into rings by a branch from the final descriptor to the beginning of ++the list. The ring sizes are set at compile time by RX/TX_RING_SIZE. ++ ++IIIb/c. Transmit/Receive Structure ++ ++This driver attempts to use a zero-copy receive and transmit scheme. ++ ++Alas, all data buffers are required to start on a 32 bit boundary, so ++the driver must often copy transmit packets into bounce buffers. ++ ++The driver allocates full frame size skbuffs for the Rx ring buffers at ++open() time and passes the skb->data field to the chip as receive data ++buffers. When an incoming frame is less than RX_COPYBREAK bytes long, ++a fresh skbuff is allocated and the frame is copied to the new skbuff. ++When the incoming frame is larger, the skbuff is passed directly up the ++protocol stack. Buffers consumed this way are replaced by newly allocated ++skbuffs in the last phase of via_rhine_rx(). ++ ++The RX_COPYBREAK value is chosen to trade-off the memory wasted by ++using a full-sized skbuff for small frames vs. the copying costs of larger ++frames. New boards are typically used in generously configured machines ++and the underfilled buffers have negligible impact compared to the benefit of ++a single allocation size, so the default value of zero results in never ++copying packets. When copying is done, the cost is usually mitigated by using ++a combined copy/checksum routine. Copying also preloads the cache, which is ++most useful with small frames. ++ ++Since the VIA chips are only able to transfer data to buffers on 32 bit ++boundaries, the IP header at offset 14 in an ethernet frame isn't ++longword aligned for further processing. Copying these unaligned buffers ++has the beneficial effect of 16-byte aligning the IP header. ++ ++IIId. Synchronization ++ ++The driver runs as two independent, single-threaded flows of control. One ++is the send-packet routine, which enforces single-threaded use by the ++dev->priv->lock spinlock. The other thread is the interrupt handler, which ++is single threaded by the hardware and interrupt handling software. ++ ++The send packet thread has partial control over the Tx ring. It locks the ++dev->priv->lock whenever it's queuing a Tx packet. If the next slot in the ring ++is not available it stops the transmit queue by calling netif_stop_queue. ++ ++The interrupt handler has exclusive control over the Rx ring and records stats ++from the Tx ring. After reaping the stats, it marks the Tx queue entry as ++empty by incrementing the dirty_tx mark. If at least half of the entries in ++the Rx ring are available the transmit queue is woken up if it was stopped. ++ ++IV. Notes ++ ++IVb. References ++ ++Preliminary VT86C100A manual from http://www.via.com.tw/ ++http://www.scyld.com/expert/100mbps.html ++http://www.scyld.com/expert/NWay.html ++ftp://ftp.via.com.tw/public/lan/Products/NIC/VT86C100A/Datasheet/VT86C100A03.pdf ++ftp://ftp.via.com.tw/public/lan/Products/NIC/VT6102/Datasheet/VT6102_021.PDF ++ ++ ++IVc. Errata ++ ++The VT86C100A manual is not reliable information. ++The 3043 chip does not handle unaligned transmit or receive buffers, resulting ++in significant performance degradation for bounce buffer copies on transmit ++and unaligned IP headers on receive. ++The chip does not pad to minimum transmit length. ++ ++*/ ++ ++ ++/* This table drives the PCI probe routines. It's mostly boilerplate in all ++ of the drivers, and will likely be provided by some future kernel. ++ Note the matching code -- the first table entry matchs all 56** cards but ++ second only the 1234 card. ++*/ ++ ++enum pci_flags_bit { ++ PCI_USES_IO=1, PCI_USES_MEM=2, PCI_USES_MASTER=4, ++ PCI_ADDR0=0x10<<0, PCI_ADDR1=0x10<<1, PCI_ADDR2=0x10<<2, PCI_ADDR3=0x10<<3, ++}; ++ ++enum via_rhine_chips { ++ VT86C100A = 0, ++ VT6102, ++ VT6105, ++ VT6105M ++}; ++ ++struct via_rhine_chip_info { ++ const char *name; ++ u16 pci_flags; ++ int io_size; ++ int drv_flags; ++}; ++ ++ ++enum chip_capability_flags { ++ CanHaveMII=1, HasESIPhy=2, HasDavicomPhy=4, ++ ReqTxAlign=0x10, HasWOL=0x20, }; ++ ++#ifdef USE_MEM ++#define RHINE_IOTYPE (PCI_USES_MEM | PCI_USES_MASTER | PCI_ADDR1) ++#else ++#define RHINE_IOTYPE (PCI_USES_IO | PCI_USES_MASTER | PCI_ADDR0) ++#endif ++/* Beware of PCI posted writes */ ++#define IOSYNC do { readb((void *)dev->base_addr + StationAddr); } while (0) ++ ++/* directly indexed by enum via_rhine_chips, above */ ++static struct via_rhine_chip_info via_rhine_chip_info[] = ++{ ++ { "VIA VT86C100A Rhine", RHINE_IOTYPE, 128, ++ CanHaveMII | ReqTxAlign | HasDavicomPhy }, ++ { "VIA VT6102 Rhine-II", RHINE_IOTYPE, 256, ++ CanHaveMII | HasWOL }, ++ { "VIA VT6105 Rhine-III", RHINE_IOTYPE, 256, ++ CanHaveMII | HasWOL }, ++ { "VIA VT6105M Rhine-III", RHINE_IOTYPE, 256, ++ CanHaveMII | HasWOL }, ++}; ++ ++static struct pci_device_id via_rhine_pci_tbl[] = ++{ ++ {0x1106, 0x3043, PCI_ANY_ID, PCI_ANY_ID, 0, 0, VT86C100A}, ++ {0x1106, 0x3065, PCI_ANY_ID, PCI_ANY_ID, 0, 0, VT6102}, ++ {0x1106, 0x3106, PCI_ANY_ID, PCI_ANY_ID, 0, 0, VT6105}, ++ {0x1106, 0x3053, PCI_ANY_ID, PCI_ANY_ID, 0, 0, VT6105M}, ++ {0,} /* terminate list */ ++}; ++MODULE_DEVICE_TABLE(pci, via_rhine_pci_tbl); ++ ++ ++/* Offsets to the device registers. */ ++enum register_offsets { ++ StationAddr=0x00, RxConfig=0x06, TxConfig=0x07, ChipCmd=0x08, ++ IntrStatus=0x0C, IntrEnable=0x0E, ++ MulticastFilter0=0x10, MulticastFilter1=0x14, ++ RxRingPtr=0x18, TxRingPtr=0x1C, GFIFOTest=0x54, ++ MIIPhyAddr=0x6C, MIIStatus=0x6D, PCIBusConfig=0x6E, ++ MIICmd=0x70, MIIRegAddr=0x71, MIIData=0x72, MACRegEEcsr=0x74, ++ ConfigA=0x78, ConfigB=0x79, ConfigC=0x7A, ConfigD=0x7B, ++ RxMissed=0x7C, RxCRCErrs=0x7E, MiscCmd=0x81, ++ StickyHW=0x83, IntrStatus2=0x84, WOLcrClr=0xA4, WOLcgClr=0xA7, ++ PwrcsrClr=0xAC, ++}; ++ ++/* Bits in ConfigD */ ++enum backoff_bits { ++ BackOptional=0x01, BackModify=0x02, ++ BackCaptureEffect=0x04, BackRandom=0x08 ++}; ++ ++#ifdef USE_MEM ++/* Registers we check that mmio and reg are the same. */ ++int mmio_verify_registers[] = { ++ RxConfig, TxConfig, IntrEnable, ConfigA, ConfigB, ConfigC, ConfigD, ++ 0 ++}; ++#endif ++ ++/* Bits in the interrupt status/mask registers. */ ++enum intr_status_bits { ++ IntrRxDone=0x0001, IntrRxErr=0x0004, IntrRxEmpty=0x0020, ++ IntrTxDone=0x0002, IntrTxError=0x0008, IntrTxUnderrun=0x0210, ++ IntrPCIErr=0x0040, ++ IntrStatsMax=0x0080, IntrRxEarly=0x0100, ++ IntrRxOverflow=0x0400, IntrRxDropped=0x0800, IntrRxNoBuf=0x1000, ++ IntrTxAborted=0x2000, IntrLinkChange=0x4000, ++ IntrRxWakeUp=0x8000, ++ IntrNormalSummary=0x0003, IntrAbnormalSummary=0xC260, ++ IntrTxDescRace=0x080000, /* mapped from IntrStatus2 */ ++ IntrTxErrSummary=0x082218, ++}; ++ ++/* The Rx and Tx buffer descriptors. */ ++struct rx_desc { ++ s32 rx_status; ++ u32 desc_length; /* Chain flag, Buffer/frame length */ ++ u32 addr; ++ u32 next_desc; ++}; ++struct tx_desc { ++ s32 tx_status; ++ u32 desc_length; /* Chain flag, Tx Config, Frame length */ ++ u32 addr; ++ u32 next_desc; ++}; ++ ++/* Initial value for tx_desc.desc_length, Buffer size goes to bits 0-10 */ ++#define TXDESC 0x00e08000 ++ ++enum rx_status_bits { ++ RxOK=0x8000, RxWholePkt=0x0300, RxErr=0x008F ++}; ++ ++/* Bits in *_desc.*_status */ ++enum desc_status_bits { ++ DescOwn=0x80000000 ++}; ++ ++/* Bits in ChipCmd. */ ++enum chip_cmd_bits { ++ CmdInit=0x0001, CmdStart=0x0002, CmdStop=0x0004, CmdRxOn=0x0008, ++ CmdTxOn=0x0010, CmdTxDemand=0x0020, CmdRxDemand=0x0040, ++ CmdEarlyRx=0x0100, CmdEarlyTx=0x0200, CmdFDuplex=0x0400, ++ CmdNoTxPoll=0x0800, CmdReset=0x8000, ++}; ++ ++#define MAX_MII_CNT 4 ++struct netdev_private { ++ /* Descriptor rings */ ++ struct rx_desc *rx_ring; ++ struct tx_desc *tx_ring; ++ dma_addr_t rx_ring_dma; ++ dma_addr_t tx_ring_dma; ++ ++ /* The addresses of receive-in-place skbuffs. */ ++ struct rtskb *rx_skbuff[RX_RING_SIZE]; /*** RTnet ***/ ++ dma_addr_t rx_skbuff_dma[RX_RING_SIZE]; ++ ++ /* The saved address of a sent-in-place packet/buffer, for later free(). */ ++ struct rtskb *tx_skbuff[TX_RING_SIZE]; /*** RTnet ***/ ++ dma_addr_t tx_skbuff_dma[TX_RING_SIZE]; ++ ++ /* Tx bounce buffers */ ++ unsigned char *tx_buf[TX_RING_SIZE]; ++ unsigned char *tx_bufs; ++ dma_addr_t tx_bufs_dma; ++ ++ struct pci_dev *pdev; ++ struct net_device_stats stats; ++ struct timer_list timer; /* Media monitoring timer. */ ++ rtdm_lock_t lock; ++ ++ /* Frequently used values: keep some adjacent for cache effect. */ ++ int chip_id, drv_flags; ++ struct rx_desc *rx_head_desc; ++ unsigned int cur_rx, dirty_rx; /* Producer/consumer ring indices */ ++ unsigned int cur_tx, dirty_tx; ++ unsigned int rx_buf_sz; /* Based on MTU+slack. */ ++ u16 chip_cmd; /* Current setting for ChipCmd */ ++ ++ /* These values are keep track of the transceiver/media in use. */ ++ unsigned int default_port:4; /* Last dev->if_port value. */ ++ u8 tx_thresh, rx_thresh; ++ ++ /* MII transceiver section. */ ++ unsigned char phys[MAX_MII_CNT]; /* MII device addresses. */ ++ unsigned int mii_cnt; /* number of MIIs found, but only the first one is used */ ++ u16 mii_status; /* last read MII status */ ++ struct mii_if_info mii_if; ++ unsigned int mii_if_force_media; /*** RTnet, support for older kernels (e.g. 2.4.19) ***/ ++ ++ rtdm_irq_t irq_handle; ++}; ++ ++/*** RTnet ***/ ++static int mdio_read(struct rtnet_device *dev, int phy_id, int location); ++static void mdio_write(struct rtnet_device *dev, int phy_id, int location, int value); ++static int via_rhine_open(struct rtnet_device *dev); ++static void via_rhine_check_duplex(struct rtnet_device *dev); ++/*static void via_rhine_timer(unsigned long data); ++static void via_rhine_tx_timeout(struct net_device *dev);*/ ++static int via_rhine_start_tx(struct rtskb *skb, struct rtnet_device *dev); ++static int via_rhine_interrupt(rtdm_irq_t *irq_handle); ++static void via_rhine_tx(struct rtnet_device *dev); ++static void via_rhine_rx(struct rtnet_device *dev, nanosecs_abs_t *time_stamp); ++static void via_rhine_error(struct rtnet_device *dev, int intr_status); ++static void via_rhine_set_rx_mode(struct rtnet_device *dev); ++static struct net_device_stats *via_rhine_get_stats(struct rtnet_device *rtdev); ++/*static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);*/ ++static int via_rhine_close(struct rtnet_device *dev); ++/*** RTnet ***/ ++ ++static inline u32 get_intr_status(struct rtnet_device *dev) /*** RTnet ***/ ++{ ++ void *ioaddr = (void *)dev->base_addr; ++ struct netdev_private *np = dev->priv; ++ u32 intr_status; ++ ++ intr_status = readw(ioaddr + IntrStatus); ++ /* On Rhine-II, Bit 3 indicates Tx descriptor write-back race. */ ++ if (np->chip_id == VT6102) ++ intr_status |= readb(ioaddr + IntrStatus2) << 16; ++ return intr_status; ++} ++ ++static void wait_for_reset(struct rtnet_device *dev, int chip_id, char *name) /*** RTnet ***/ ++{ ++ void *ioaddr = (void *)dev->base_addr; ++ int boguscnt = 20; ++ ++ IOSYNC; ++ ++ if (readw(ioaddr + ChipCmd) & CmdReset) { ++ printk(KERN_INFO "%s: Reset not complete yet. " ++ "Trying harder.\n", name); ++ ++ /* Rhine-II needs to be forced sometimes */ ++ if (chip_id == VT6102) ++ writeb(0x40, ioaddr + MiscCmd); ++ ++ /* VT86C100A may need long delay after reset (dlink) */ ++ /* Seen on Rhine-II as well (rl) */ ++ while ((readw(ioaddr + ChipCmd) & CmdReset) && --boguscnt) ++ udelay(5); ++ ++ } ++ ++ if (local_debug > 1) ++ printk(KERN_INFO "%s: Reset %s.\n", name, ++ boguscnt ? "succeeded" : "failed"); ++} ++ ++#ifdef USE_MEM ++static void enable_mmio(long ioaddr, int chip_id) ++{ ++ int n; ++ if (chip_id == VT86C100A) { ++ /* More recent docs say that this bit is reserved ... */ ++ n = inb(ioaddr + ConfigA) | 0x20; ++ outb(n, ioaddr + ConfigA); ++ } else { ++ n = inb(ioaddr + ConfigD) | 0x80; ++ outb(n, ioaddr + ConfigD); ++ } ++} ++#endif ++ ++static void reload_eeprom(long ioaddr) ++{ ++ int i; ++ outb(0x20, ioaddr + MACRegEEcsr); ++ /* Typically 2 cycles to reload. */ ++ for (i = 0; i < 150; i++) ++ if (! (inb(ioaddr + MACRegEEcsr) & 0x20)) ++ break; ++} ++ ++static int via_rhine_init_one (struct pci_dev *pdev, ++ const struct pci_device_id *ent) ++{ ++ struct rtnet_device *dev; /*** RTnet ***/ ++ struct netdev_private *np; ++ int i, option; ++ int chip_id = (int) ent->driver_data; ++ static int card_idx = -1; ++ void *ioaddr; ++ long memaddr; ++ unsigned int io_size; ++ int pci_flags; ++#ifdef USE_MEM ++ long ioaddr0; ++#endif ++ ++/* when built into the kernel, we only print version if device is found */ ++#ifndef MODULE ++ static int printed_version; ++ if (!printed_version++) ++ printk(version); ++#endif ++ ++ card_idx++; ++ option = card_idx < MAX_UNITS ? options[card_idx] : 0; ++ io_size = via_rhine_chip_info[chip_id].io_size; ++ pci_flags = via_rhine_chip_info[chip_id].pci_flags; ++ ++/*** RTnet ***/ ++ if (cards[card_idx] == 0) ++ goto err_out; ++/*** RTnet ***/ ++ ++ if (pci_enable_device (pdev)) ++ goto err_out; ++ ++ /* this should always be supported */ ++ if (pci_set_dma_mask(pdev, 0xffffffff)) { ++ printk(KERN_ERR "32-bit PCI DMA addresses not supported by the card!?\n"); ++ goto err_out; ++ } ++ ++ /* sanity check */ ++ if ((pci_resource_len (pdev, 0) < io_size) || ++ (pci_resource_len (pdev, 1) < io_size)) { ++ printk (KERN_ERR "Insufficient PCI resources, aborting\n"); ++ goto err_out; ++ } ++ ++ ioaddr = (void *)pci_resource_start (pdev, 0); ++ memaddr = pci_resource_start (pdev, 1); ++ ++ if (pci_flags & PCI_USES_MASTER) ++ pci_set_master (pdev); ++ ++/*** RTnet ***/ ++ dev = rt_alloc_etherdev(sizeof(struct netdev_private), ++ RX_RING_SIZE * 2 + TX_RING_SIZE); ++ if (dev == NULL) { ++ printk (KERN_ERR "init_ethernet failed for card #%d\n", card_idx); ++ goto err_out; ++ } ++ rtdev_alloc_name(dev, "rteth%d"); ++ rt_rtdev_connect(dev, &RTDEV_manager); ++ dev->vers = RTDEV_VERS_2_0; ++ dev->sysbind = &pdev->dev; ++/*** RTnet ***/ ++ ++ if (pci_request_regions(pdev, shortname)) ++ goto err_out_free_netdev; ++ ++#ifdef USE_MEM ++ ioaddr0 = (long)ioaddr; ++ enable_mmio(ioaddr0, chip_id); ++ ++ ioaddr = ioremap (memaddr, io_size); ++ if (!ioaddr) { ++ printk (KERN_ERR "ioremap failed for device %s, region 0x%X @ 0x%lX\n", ++ pci_name(pdev), io_size, memaddr); ++ goto err_out_free_res; ++ } ++ ++ /* Check that selected MMIO registers match the PIO ones */ ++ i = 0; ++ while (mmio_verify_registers[i]) { ++ int reg = mmio_verify_registers[i++]; ++ unsigned char a = inb(ioaddr0+reg); ++ unsigned char b = readb(ioaddr+reg); ++ if (a != b) { ++ printk (KERN_ERR "MMIO do not match PIO [%02x] (%02x != %02x)\n", ++ reg, a, b); ++ goto err_out_unmap; ++ } ++ } ++#endif ++ ++ /* D-Link provided reset code (with comment additions) */ ++ if (via_rhine_chip_info[chip_id].drv_flags & HasWOL) { ++ unsigned char byOrgValue; ++ ++ /* clear sticky bit before reset & read ethernet address */ ++ byOrgValue = readb(ioaddr + StickyHW); ++ byOrgValue = byOrgValue & 0xFC; ++ writeb(byOrgValue, ioaddr + StickyHW); ++ ++ /* (bits written are cleared?) */ ++ /* disable force PME-enable */ ++ writeb(0x80, ioaddr + WOLcgClr); ++ /* disable power-event config bit */ ++ writeb(0xFF, ioaddr + WOLcrClr); ++ /* clear power status (undocumented in vt6102 docs?) */ ++ writeb(0xFF, ioaddr + PwrcsrClr); ++ } ++ ++ /* Reset the chip to erase previous misconfiguration. */ ++ writew(CmdReset, ioaddr + ChipCmd); ++ ++ dev->base_addr = (long)ioaddr; ++ wait_for_reset(dev, chip_id, shortname); ++ ++ /* Reload the station address from the EEPROM. */ ++#ifdef USE_IO ++ reload_eeprom((long)ioaddr); ++#else ++ reload_eeprom(ioaddr0); ++ /* Reloading from eeprom overwrites cfgA-D, so we must re-enable MMIO. ++ If reload_eeprom() was done first this could be avoided, but it is ++ not known if that still works with the "win98-reboot" problem. */ ++ enable_mmio(ioaddr0, chip_id); ++#endif ++ ++ for (i = 0; i < 6; i++) ++ dev->dev_addr[i] = readb(ioaddr + StationAddr + i); ++ ++ if (!is_valid_ether_addr(dev->dev_addr)) { ++ printk(KERN_ERR "Invalid MAC address for card #%d\n", card_idx); ++ goto err_out_unmap; ++ } ++ ++ if (chip_id == VT6102) { ++ /* ++ * for 3065D, EEPROM reloaded will cause bit 0 in MAC_REG_CFGA ++ * turned on. it makes MAC receive magic packet ++ * automatically. So, we turn it off. (D-Link) ++ */ ++ writeb(readb(ioaddr + ConfigA) & 0xFE, ioaddr + ConfigA); ++ } ++ ++ /* Select backoff algorithm */ ++ if (backoff) ++ writeb(readb(ioaddr + ConfigD) & (0xF0 | backoff), ++ ioaddr + ConfigD); ++ ++ dev->irq = pdev->irq; ++ ++ np = dev->priv; ++ rtdm_lock_init (&np->lock); ++ np->chip_id = chip_id; ++ np->drv_flags = via_rhine_chip_info[chip_id].drv_flags; ++ np->pdev = pdev; ++/*** RTnet *** ++ np->mii_if.dev = dev; ++ np->mii_if.mdio_read = mdio_read; ++ np->mii_if.mdio_write = mdio_write; ++ np->mii_if.phy_id_mask = 0x1f; ++ np->mii_if.reg_num_mask = 0x1f; ++ *** RTnet ***/ ++ ++ if (dev->mem_start) ++ option = dev->mem_start; ++ ++ /* The chip-specific entries in the device structure. */ ++ dev->open = via_rhine_open; ++ dev->hard_start_xmit = via_rhine_start_tx; ++ dev->stop = via_rhine_close; ++ dev->get_stats = via_rhine_get_stats; ++/*** RTnet *** ++ dev->set_multicast_list = via_rhine_set_rx_mode; ++ dev->do_ioctl = netdev_ioctl; ++ dev->tx_timeout = via_rhine_tx_timeout; ++ dev->watchdog_timeo = TX_TIMEOUT; ++ *** RTnet ***/ ++ if (np->drv_flags & ReqTxAlign) ++ dev->features |= NETIF_F_SG|NETIF_F_HW_CSUM; ++ ++ /* dev->name not defined before register_netdev()! */ ++/*** RTnet ***/ ++ i = rt_register_rtnetdev(dev); ++ if (i) { ++ goto err_out_unmap; ++ } ++/*** RTnet ***/ ++ ++ /* The lower four bits are the media type. */ ++ if (option > 0) { ++ if (option & 0x220) ++ np->mii_if.full_duplex = 1; ++ np->default_port = option & 15; ++ } ++ if (card_idx < MAX_UNITS && full_duplex[card_idx] > 0) ++ np->mii_if.full_duplex = 1; ++ ++ if (np->mii_if.full_duplex) { ++ printk(KERN_INFO "%s: Set to forced full duplex, autonegotiation" ++ " disabled.\n", dev->name); ++ np->mii_if_force_media = 1; /*** RTnet ***/ ++ } ++ ++ printk(KERN_INFO "%s: %s at 0x%lx, ", ++ dev->name, via_rhine_chip_info[chip_id].name, ++ (pci_flags & PCI_USES_IO) ? (long)ioaddr : memaddr); ++ ++ for (i = 0; i < 5; i++) ++ printk("%2.2x:", dev->dev_addr[i]); ++ printk("%2.2x, IRQ %d.\n", dev->dev_addr[i], pdev->irq); ++ ++ pci_set_drvdata(pdev, dev); ++ ++ if (np->drv_flags & CanHaveMII) { ++ int phy, phy_idx = 0; ++ np->phys[0] = 1; /* Standard for this chip. */ ++ for (phy = 1; phy < 32 && phy_idx < MAX_MII_CNT; phy++) { ++ int mii_status = mdio_read(dev, phy, 1); ++ if (mii_status != 0xffff && mii_status != 0x0000) { ++ np->phys[phy_idx++] = phy; ++ np->mii_if.advertising = mdio_read(dev, phy, 4); ++ printk(KERN_INFO "%s: MII PHY found at address %d, status " ++ "0x%4.4x advertising %4.4x Link %4.4x.\n", ++ dev->name, phy, mii_status, np->mii_if.advertising, ++ mdio_read(dev, phy, 5)); ++ ++ /* set IFF_RUNNING */ ++ if (mii_status & BMSR_LSTATUS) ++ rtnetif_carrier_on(dev); /*** RTnet ***/ ++ else ++ rtnetif_carrier_off(dev); /*** RTnet ***/ ++ } ++ } ++ np->mii_cnt = phy_idx; ++ np->mii_if.phy_id = np->phys[0]; ++ } ++ ++ /* Allow forcing the media type. */ ++ if (option > 0) { ++ if (option & 0x220) ++ np->mii_if.full_duplex = 1; ++ np->default_port = option & 0x3ff; ++ if (np->default_port & 0x330) { ++ /* FIXME: shouldn't someone check this variable? */ ++ /* np->medialock = 1; */ ++ printk(KERN_INFO " Forcing %dMbs %s-duplex operation.\n", ++ (option & 0x300 ? 100 : 10), ++ (option & 0x220 ? "full" : "half")); ++ if (np->mii_cnt) ++ mdio_write(dev, np->phys[0], MII_BMCR, ++ ((option & 0x300) ? 0x2000 : 0) | /* 100mbps? */ ++ ((option & 0x220) ? 0x0100 : 0)); /* Full duplex? */ ++ } ++ } ++ ++ return 0; ++ ++err_out_unmap: ++#ifdef USE_MEM ++ iounmap((void *)ioaddr); ++err_out_free_res: ++#endif ++ pci_release_regions(pdev); ++err_out_free_netdev: ++/*** RTnet ***/ ++ rt_rtdev_disconnect(dev); ++ rtdev_free(dev); ++/*** RTnet ***/ ++err_out: ++ return -ENODEV; ++} ++ ++static int alloc_ring(struct rtnet_device* dev) /*** RTnet ***/ ++{ ++ struct netdev_private *np = dev->priv; ++ void *ring; ++ dma_addr_t ring_dma; ++ ++ ring = pci_alloc_consistent(np->pdev, ++ RX_RING_SIZE * sizeof(struct rx_desc) + ++ TX_RING_SIZE * sizeof(struct tx_desc), ++ &ring_dma); ++ if (!ring) { ++ printk(KERN_ERR "Could not allocate DMA memory.\n"); ++ return -ENOMEM; ++ } ++ if (np->drv_flags & ReqTxAlign) { ++ np->tx_bufs = pci_alloc_consistent(np->pdev, PKT_BUF_SZ * TX_RING_SIZE, ++ &np->tx_bufs_dma); ++ if (np->tx_bufs == NULL) { ++ pci_free_consistent(np->pdev, ++ RX_RING_SIZE * sizeof(struct rx_desc) + ++ TX_RING_SIZE * sizeof(struct tx_desc), ++ ring, ring_dma); ++ return -ENOMEM; ++ } ++ } ++ ++ np->rx_ring = ring; ++ np->tx_ring = ring + RX_RING_SIZE * sizeof(struct rx_desc); ++ np->rx_ring_dma = ring_dma; ++ np->tx_ring_dma = ring_dma + RX_RING_SIZE * sizeof(struct rx_desc); ++ ++ return 0; ++} ++ ++void free_ring(struct rtnet_device* dev) /*** RTnet ***/ ++{ ++ struct netdev_private *np = dev->priv; ++ ++ pci_free_consistent(np->pdev, ++ RX_RING_SIZE * sizeof(struct rx_desc) + ++ TX_RING_SIZE * sizeof(struct tx_desc), ++ np->rx_ring, np->rx_ring_dma); ++ np->tx_ring = NULL; ++ ++ if (np->tx_bufs) ++ pci_free_consistent(np->pdev, PKT_BUF_SZ * TX_RING_SIZE, ++ np->tx_bufs, np->tx_bufs_dma); ++ ++ np->tx_bufs = NULL; ++ ++} ++ ++static void alloc_rbufs(struct rtnet_device *dev) /*** RTnet ***/ ++{ ++ struct netdev_private *np = dev->priv; ++ dma_addr_t next; ++ int i; ++ ++ np->dirty_rx = np->cur_rx = 0; ++ ++ np->rx_buf_sz = (dev->mtu <= 1500 ? PKT_BUF_SZ : dev->mtu + 32); ++ np->rx_head_desc = &np->rx_ring[0]; ++ next = np->rx_ring_dma; ++ ++ /* Init the ring entries */ ++ for (i = 0; i < RX_RING_SIZE; i++) { ++ np->rx_ring[i].rx_status = 0; ++ np->rx_ring[i].desc_length = cpu_to_le32(np->rx_buf_sz); ++ next += sizeof(struct rx_desc); ++ np->rx_ring[i].next_desc = cpu_to_le32(next); ++ np->rx_skbuff[i] = 0; ++ } ++ /* Mark the last entry as wrapping the ring. */ ++ np->rx_ring[i-1].next_desc = cpu_to_le32(np->rx_ring_dma); ++ ++ /* Fill in the Rx buffers. Handle allocation failure gracefully. */ ++ for (i = 0; i < RX_RING_SIZE; i++) { ++ struct rtskb *skb = rtnetdev_alloc_rtskb(dev, np->rx_buf_sz); /*** RTnet ***/ ++ np->rx_skbuff[i] = skb; ++ if (skb == NULL) ++ break; ++ np->rx_skbuff_dma[i] = ++ pci_map_single(np->pdev, skb->tail, np->rx_buf_sz, ++ PCI_DMA_FROMDEVICE); ++ ++ np->rx_ring[i].addr = cpu_to_le32(np->rx_skbuff_dma[i]); ++ np->rx_ring[i].rx_status = cpu_to_le32(DescOwn); ++ } ++ np->dirty_rx = (unsigned int)(i - RX_RING_SIZE); ++} ++ ++static void free_rbufs(struct rtnet_device* dev) /*** RTnet ***/ ++{ ++ struct netdev_private *np = dev->priv; ++ int i; ++ ++ /* Free all the skbuffs in the Rx queue. */ ++ for (i = 0; i < RX_RING_SIZE; i++) { ++ np->rx_ring[i].rx_status = 0; ++ np->rx_ring[i].addr = cpu_to_le32(0xBADF00D0); /* An invalid address. */ ++ if (np->rx_skbuff[i]) { ++ pci_unmap_single(np->pdev, ++ np->rx_skbuff_dma[i], ++ np->rx_buf_sz, PCI_DMA_FROMDEVICE); ++ dev_kfree_rtskb(np->rx_skbuff[i]); /*** RTnet ***/ ++ } ++ np->rx_skbuff[i] = 0; ++ } ++} ++ ++static void alloc_tbufs(struct rtnet_device* dev) /*** RTnet ***/ ++{ ++ struct netdev_private *np = dev->priv; ++ dma_addr_t next; ++ int i; ++ ++ np->dirty_tx = np->cur_tx = 0; ++ next = np->tx_ring_dma; ++ for (i = 0; i < TX_RING_SIZE; i++) { ++ np->tx_skbuff[i] = 0; ++ np->tx_ring[i].tx_status = 0; ++ np->tx_ring[i].desc_length = cpu_to_le32(TXDESC); ++ next += sizeof(struct tx_desc); ++ np->tx_ring[i].next_desc = cpu_to_le32(next); ++ np->tx_buf[i] = &np->tx_bufs[i * PKT_BUF_SZ]; ++ } ++ np->tx_ring[i-1].next_desc = cpu_to_le32(np->tx_ring_dma); ++ ++} ++ ++static void free_tbufs(struct rtnet_device* dev) /*** RTnet ***/ ++{ ++ struct netdev_private *np = dev->priv; ++ int i; ++ ++ for (i = 0; i < TX_RING_SIZE; i++) { ++ np->tx_ring[i].tx_status = 0; ++ np->tx_ring[i].desc_length = cpu_to_le32(TXDESC); ++ np->tx_ring[i].addr = cpu_to_le32(0xBADF00D0); /* An invalid address. */ ++ if (np->tx_skbuff[i]) { ++ if (np->tx_skbuff_dma[i]) { ++ pci_unmap_single(np->pdev, ++ np->tx_skbuff_dma[i], ++ np->tx_skbuff[i]->len, PCI_DMA_TODEVICE); ++ } ++ dev_kfree_rtskb(np->tx_skbuff[i]); /*** RTnet ***/ ++ } ++ np->tx_skbuff[i] = 0; ++ np->tx_buf[i] = 0; ++ } ++} ++ ++static void init_registers(struct rtnet_device *dev) /*** RTnet ***/ ++{ ++ struct netdev_private *np = dev->priv; ++ void *ioaddr = (void *)dev->base_addr; ++ int i; ++ ++ for (i = 0; i < 6; i++) ++ writeb(dev->dev_addr[i], ioaddr + StationAddr + i); ++ ++ /* Initialize other registers. */ ++ writew(0x0006, ioaddr + PCIBusConfig); /* Tune configuration??? */ ++ /* Configure initial FIFO thresholds. */ ++ writeb(0x20, ioaddr + TxConfig); ++ np->tx_thresh = 0x20; ++ np->rx_thresh = 0x60; /* Written in via_rhine_set_rx_mode(). */ ++ np->mii_if.full_duplex = 0; ++ ++ if (dev->if_port == 0) ++ dev->if_port = np->default_port; ++ ++ writel(np->rx_ring_dma, ioaddr + RxRingPtr); ++ writel(np->tx_ring_dma, ioaddr + TxRingPtr); ++ ++ via_rhine_set_rx_mode(dev); ++ ++ /* Enable interrupts by setting the interrupt mask. */ ++ writew(IntrRxDone | IntrRxErr | IntrRxEmpty| IntrRxOverflow | ++ IntrRxDropped | IntrRxNoBuf | IntrTxAborted | ++ IntrTxDone | IntrTxError | IntrTxUnderrun | ++ IntrPCIErr | IntrStatsMax | IntrLinkChange, ++ ioaddr + IntrEnable); ++ ++ np->chip_cmd = CmdStart|CmdTxOn|CmdRxOn|CmdNoTxPoll; ++ if (np->mii_if_force_media) /*** RTnet ***/ ++ np->chip_cmd |= CmdFDuplex; ++ writew(np->chip_cmd, ioaddr + ChipCmd); ++ ++ via_rhine_check_duplex(dev); ++ ++ /* The LED outputs of various MII xcvrs should be configured. */ ++ /* For NS or Mison phys, turn on bit 1 in register 0x17 */ ++ /* For ESI phys, turn on bit 7 in register 0x17. */ ++ mdio_write(dev, np->phys[0], 0x17, mdio_read(dev, np->phys[0], 0x17) | ++ (np->drv_flags & HasESIPhy) ? 0x0080 : 0x0001); ++} ++/* Read and write over the MII Management Data I/O (MDIO) interface. */ ++ ++static int mdio_read(struct rtnet_device *dev, int phy_id, int regnum) /*** RTnet ***/ ++{ ++ void *ioaddr = (void *)dev->base_addr; ++ int boguscnt = 1024; ++ ++ /* Wait for a previous command to complete. */ ++ while ((readb(ioaddr + MIICmd) & 0x60) && --boguscnt > 0) ++ ; ++ writeb(0x00, ioaddr + MIICmd); ++ writeb(phy_id, ioaddr + MIIPhyAddr); ++ writeb(regnum, ioaddr + MIIRegAddr); ++ writeb(0x40, ioaddr + MIICmd); /* Trigger read */ ++ boguscnt = 1024; ++ while ((readb(ioaddr + MIICmd) & 0x40) && --boguscnt > 0) ++ ; ++ return readw(ioaddr + MIIData); ++} ++ ++static void mdio_write(struct rtnet_device *dev, int phy_id, int regnum, int value) /*** RTnet ***/ ++{ ++ struct netdev_private *np = dev->priv; ++ void *ioaddr = (void *)dev->base_addr; ++ int boguscnt = 1024; ++ ++ if (phy_id == np->phys[0]) { ++ switch (regnum) { ++ case MII_BMCR: /* Is user forcing speed/duplex? */ ++ if (value & 0x9000) /* Autonegotiation. */ ++ np->mii_if_force_media = 0; /*** RTnet ***/ ++ else ++ np->mii_if.full_duplex = (value & 0x0100) ? 1 : 0; ++ break; ++ case MII_ADVERTISE: ++ np->mii_if.advertising = value; ++ break; ++ } ++ } ++ ++ /* Wait for a previous command to complete. */ ++ while ((readb(ioaddr + MIICmd) & 0x60) && --boguscnt > 0) ++ ; ++ writeb(0x00, ioaddr + MIICmd); ++ writeb(phy_id, ioaddr + MIIPhyAddr); ++ writeb(regnum, ioaddr + MIIRegAddr); ++ writew(value, ioaddr + MIIData); ++ writeb(0x20, ioaddr + MIICmd); /* Trigger write. */ ++} ++ ++ ++static int via_rhine_open(struct rtnet_device *dev) /*** RTnet ***/ ++{ ++ struct netdev_private *np = dev->priv; ++ void *ioaddr = (void *)dev->base_addr; ++ int i; ++ ++ /* Reset the chip. */ ++ writew(CmdReset, ioaddr + ChipCmd); ++ ++/*** RTnet ***/ ++ rt_stack_connect(dev, &STACK_manager); ++ i = rtdm_irq_request(&np->irq_handle, dev->irq, via_rhine_interrupt, ++ RTDM_IRQTYPE_SHARED, "rt_via-rhine", dev); ++/*** RTnet ***/ ++ if (i) { ++ return i; ++ } ++ ++ if (local_debug > 1) ++ printk(KERN_DEBUG "%s: via_rhine_open() irq %d.\n", ++ dev->name, np->pdev->irq); ++ ++ i = alloc_ring(dev); ++ if (i) { ++ return i; ++ } ++ alloc_rbufs(dev); ++ alloc_tbufs(dev); ++ wait_for_reset(dev, np->chip_id, dev->name); ++ init_registers(dev); ++ if (local_debug > 2) ++ printk(KERN_DEBUG "%s: Done via_rhine_open(), status %4.4x " ++ "MII status: %4.4x.\n", ++ dev->name, readw(ioaddr + ChipCmd), ++ mdio_read(dev, np->phys[0], MII_BMSR)); ++ ++ rtnetif_start_queue(dev); /*** RTnet ***/ ++ ++/*** RTnet ***/ ++ /* Set the timer to check for link beat. */ ++/*** RTnet ***/ ++ ++ return 0; ++} ++ ++static void via_rhine_check_duplex(struct rtnet_device *dev) /*** RTnet ***/ ++{ ++ struct netdev_private *np = dev->priv; ++ void *ioaddr = (void *)dev->base_addr; ++ int mii_lpa = mdio_read(dev, np->phys[0], MII_LPA); ++ int negotiated = mii_lpa & np->mii_if.advertising; ++ int duplex; ++ ++ if (np->mii_if_force_media || mii_lpa == 0xffff) /*** RTnet ***/ ++ return; ++ duplex = (negotiated & 0x0100) || (negotiated & 0x01C0) == 0x0040; ++ if (np->mii_if.full_duplex != duplex) { ++ np->mii_if.full_duplex = duplex; ++ if (local_debug) ++ printk(KERN_INFO "%s: Setting %s-duplex based on MII #%d link" ++ " partner capability of %4.4x.\n", dev->name, ++ duplex ? "full" : "half", np->phys[0], mii_lpa); ++ if (duplex) ++ np->chip_cmd |= CmdFDuplex; ++ else ++ np->chip_cmd &= ~CmdFDuplex; ++ writew(np->chip_cmd, ioaddr + ChipCmd); ++ } ++} ++ ++ ++/*** RTnet ***/ ++/*** RTnet ***/ ++ ++static int via_rhine_start_tx(struct rtskb *skb, struct rtnet_device *dev) /*** RTnet ***/ ++{ ++ struct netdev_private *np = dev->priv; ++ unsigned entry; ++ u32 intr_status; ++/*** RTnet ***/ ++ rtdm_lockctx_t context; ++/*** RTnet ***/ ++ ++ /* Caution: the write order is important here, set the field ++ with the "ownership" bits last. */ ++ ++ /* Calculate the next Tx descriptor entry. */ ++ entry = np->cur_tx % TX_RING_SIZE; ++ ++ if (skb->len < ETH_ZLEN) { ++ skb = rtskb_padto(skb, ETH_ZLEN); ++ if(skb == NULL) ++ return 0; ++ } ++ ++ np->tx_skbuff[entry] = skb; ++ ++ if ((np->drv_flags & ReqTxAlign) && ++ (((long)skb->data & 3) || /*** RTnet skb_shinfo(skb)->nr_frags != 0 || RTnet ***/ skb->ip_summed == CHECKSUM_PARTIAL) ++ ) { ++ /* Must use alignment buffer. */ ++ if (skb->len > PKT_BUF_SZ) { ++ /* packet too long, drop it */ ++ dev_kfree_rtskb(skb); /*** RTnet ***/ ++ np->tx_skbuff[entry] = NULL; ++ np->stats.tx_dropped++; ++ return 0; ++ } ++ ++/*** RTnet ***/ ++ /* get and patch time stamp just before the transmission */ ++ if (skb->xmit_stamp) { ++ rtdm_lock_get_irqsave(&np->lock, context); ++ ++ *skb->xmit_stamp = cpu_to_be64(rtdm_clock_read() + ++ *skb->xmit_stamp); ++ ++ rtskb_copy_and_csum_dev(skb, np->tx_buf[entry]); ++ } else { ++ /* no need to block the interrupts during copy */ ++ rtskb_copy_and_csum_dev(skb, np->tx_buf[entry]); ++ ++ rtdm_lock_get_irqsave(&np->lock, context); ++ } ++/*** RTnet ***/ ++ ++ np->tx_skbuff_dma[entry] = 0; ++ np->tx_ring[entry].addr = cpu_to_le32(np->tx_bufs_dma + ++ (np->tx_buf[entry] - np->tx_bufs)); ++ } else { ++ np->tx_skbuff_dma[entry] = ++ pci_map_single(np->pdev, skb->data, skb->len, PCI_DMA_TODEVICE); ++ np->tx_ring[entry].addr = cpu_to_le32(np->tx_skbuff_dma[entry]); ++ ++/*** RTnet ***/ ++ rtdm_lock_get_irqsave(&np->lock, context); ++ ++ /* get and patch time stamp just before the transmission */ ++ if (skb->xmit_stamp) ++ *skb->xmit_stamp = cpu_to_be64(rtdm_clock_read() + ++ *skb->xmit_stamp); ++/*** RTnet ***/ ++ } ++ ++ np->tx_ring[entry].desc_length = ++ cpu_to_le32(TXDESC | (skb->len >= ETH_ZLEN ? skb->len : ETH_ZLEN)); ++ ++ wmb(); ++ np->tx_ring[entry].tx_status = cpu_to_le32(DescOwn); ++ wmb(); ++ ++ np->cur_tx++; ++ ++ /* Non-x86 Todo: explicitly flush cache lines here. */ ++ ++ /* ++ * Wake the potentially-idle transmit channel unless errors are ++ * pending (the ISR must sort them out first). ++ */ ++ intr_status = get_intr_status(dev); ++ if ((intr_status & IntrTxErrSummary) == 0) { ++ writew(CmdTxDemand | np->chip_cmd, (void *)dev->base_addr + ChipCmd); ++ } ++ IOSYNC; ++ ++ if (np->cur_tx == np->dirty_tx + TX_QUEUE_LEN) ++ rtnetif_stop_queue(dev); /*** RTnet ***/ ++ ++ /*dev->trans_start = jiffies; *** RTnet ***/ ++ ++/*** RTnet ***/ ++ rtdm_lock_put_irqrestore(&np->lock, context); ++/*** RTnet ***/ ++ ++ if (local_debug > 4) { ++ rtdm_printk(KERN_DEBUG "%s: Transmit frame #%d queued in slot %d.\n", /*** RTnet ***/ ++ dev->name, np->cur_tx-1, entry); ++ } ++ return 0; ++} ++ ++/* The interrupt handler does all of the Rx thread work and cleans up ++ after the Tx thread. */ ++static int via_rhine_interrupt(rtdm_irq_t *irq_handle) /*** RTnet ***/ ++{ ++ nanosecs_abs_t time_stamp = rtdm_clock_read(); /*** RTnet ***/ ++ struct rtnet_device *dev = ++ rtdm_irq_get_arg(irq_handle, struct rtnet_device); /*** RTnet ***/ ++ long ioaddr; ++ u32 intr_status; ++ int boguscnt = max_interrupt_work; ++ struct netdev_private *np = dev->priv; /*** RTnet ***/ ++ unsigned int old_packet_cnt = np->stats.rx_packets; /*** RTnet ***/ ++ int ret = RTDM_IRQ_NONE; ++ ++ ioaddr = dev->base_addr; ++ ++ while ((intr_status = get_intr_status(dev))) { ++ /* Acknowledge all of the current interrupt sources ASAP. */ ++ if (intr_status & IntrTxDescRace) ++ writeb(0x08, (void *)ioaddr + IntrStatus2); ++ writew(intr_status & 0xffff, (void *)ioaddr + IntrStatus); ++ IOSYNC; ++ ++ ret = RTDM_IRQ_HANDLED; ++ ++ if (local_debug > 4) ++ rtdm_printk(KERN_DEBUG "%s: Interrupt, status %8.8x.\n", /*** RTnet ***/ ++ dev->name, intr_status); ++ ++ if (intr_status & (IntrRxDone | IntrRxErr | IntrRxDropped | ++ IntrRxWakeUp | IntrRxEmpty | IntrRxNoBuf)) ++ via_rhine_rx(dev, &time_stamp); ++ ++ if (intr_status & (IntrTxErrSummary | IntrTxDone)) { ++ if (intr_status & IntrTxErrSummary) { ++/*** RTnet ***/ ++ rtdm_printk(KERN_ERR "%s: via_rhine_interrupt(), Transmissions error\n", dev->name); ++/*** RTnet ***/ ++ } ++ via_rhine_tx(dev); ++ } ++ ++ /* Abnormal error summary/uncommon events handlers. */ ++ if (intr_status & (IntrPCIErr | IntrLinkChange | ++ IntrStatsMax | IntrTxError | IntrTxAborted | ++ IntrTxUnderrun | IntrTxDescRace)) ++ via_rhine_error(dev, intr_status); ++ ++ if (--boguscnt < 0) { ++ rtdm_printk(KERN_WARNING "%s: Too much work at interrupt, " /*** RTnet ***/ ++ "status=%#8.8x.\n", ++ dev->name, intr_status); ++ break; ++ } ++ } ++ ++ if (local_debug > 3) ++ rtdm_printk(KERN_DEBUG "%s: exiting interrupt, status=%8.8x.\n", /*** RTnet ***/ ++ dev->name, readw((void *)ioaddr + IntrStatus)); ++ ++/*** RTnet ***/ ++ if (old_packet_cnt != np->stats.rx_packets) ++ rt_mark_stack_mgr(dev); ++ return ret; ++} ++ ++/* This routine is logically part of the interrupt handler, but isolated ++ for clarity. */ ++static void via_rhine_tx(struct rtnet_device *dev) /*** RTnet ***/ ++{ ++ struct netdev_private *np = dev->priv; ++ int txstatus = 0, entry = np->dirty_tx % TX_RING_SIZE; ++ ++ rtdm_lock_get(&np->lock); /*** RTnet ***/ ++ ++ /* find and cleanup dirty tx descriptors */ ++ while (np->dirty_tx != np->cur_tx) { ++ txstatus = le32_to_cpu(np->tx_ring[entry].tx_status); ++ if (local_debug > 6) ++ rtdm_printk(KERN_DEBUG " Tx scavenge %d status %8.8x.\n", /*** RTnet ***/ ++ entry, txstatus); ++ if (txstatus & DescOwn) ++ break; ++ if (txstatus & 0x8000) { ++ if (local_debug > 1) ++ rtdm_printk(KERN_DEBUG "%s: Transmit error, Tx status %8.8x.\n", /*** RTnet ***/ ++ dev->name, txstatus); ++ np->stats.tx_errors++; ++ if (txstatus & 0x0400) np->stats.tx_carrier_errors++; ++ if (txstatus & 0x0200) np->stats.tx_window_errors++; ++ if (txstatus & 0x0100) np->stats.tx_aborted_errors++; ++ if (txstatus & 0x0080) np->stats.tx_heartbeat_errors++; ++ if (((np->chip_id == VT86C100A) && txstatus & 0x0002) || ++ (txstatus & 0x0800) || (txstatus & 0x1000)) { ++ np->stats.tx_fifo_errors++; ++ np->tx_ring[entry].tx_status = cpu_to_le32(DescOwn); ++ break; /* Keep the skb - we try again */ ++ } ++ /* Transmitter restarted in 'abnormal' handler. */ ++ } else { ++ if (np->chip_id == VT86C100A) ++ np->stats.collisions += (txstatus >> 3) & 0x0F; ++ else ++ np->stats.collisions += txstatus & 0x0F; ++ if (local_debug > 6) ++ rtdm_printk(KERN_DEBUG "collisions: %1.1x:%1.1x\n", /*** RTnet ***/ ++ (txstatus >> 3) & 0xF, ++ txstatus & 0xF); ++ np->stats.tx_bytes += np->tx_skbuff[entry]->len; ++ np->stats.tx_packets++; ++ } ++ /* Free the original skb. */ ++ if (np->tx_skbuff_dma[entry]) { ++ pci_unmap_single(np->pdev, ++ np->tx_skbuff_dma[entry], ++ np->tx_skbuff[entry]->len, PCI_DMA_TODEVICE); ++ } ++ dev_kfree_rtskb(np->tx_skbuff[entry]); /*** RTnet ***/ ++ np->tx_skbuff[entry] = NULL; ++ entry = (++np->dirty_tx) % TX_RING_SIZE; ++ } ++ if ((np->cur_tx - np->dirty_tx) < TX_QUEUE_LEN - 4) ++ rtnetif_wake_queue (dev); /*** RTnet ***/ ++ ++ rtdm_lock_put(&np->lock); /*** RTnet ***/ ++} ++ ++/* This routine is logically part of the interrupt handler, but isolated ++ for clarity and better register allocation. */ ++static void via_rhine_rx(struct rtnet_device *dev, nanosecs_abs_t *time_stamp) /*** RTnet ***/ ++{ ++ struct netdev_private *np = dev->priv; ++ int entry = np->cur_rx % RX_RING_SIZE; ++ int boguscnt = np->dirty_rx + RX_RING_SIZE - np->cur_rx; ++ ++ if (local_debug > 4) { ++ rtdm_printk(KERN_DEBUG "%s: via_rhine_rx(), entry %d status %8.8x.\n", /*** RTnet ***/ ++ dev->name, entry, le32_to_cpu(np->rx_head_desc->rx_status)); ++ } ++ ++ /* If EOP is set on the next entry, it's a new packet. Send it up. */ ++ while ( ! (np->rx_head_desc->rx_status & cpu_to_le32(DescOwn))) { ++ struct rx_desc *desc = np->rx_head_desc; ++ u32 desc_status = le32_to_cpu(desc->rx_status); ++ int data_size = desc_status >> 16; ++ ++ if (local_debug > 4) ++ rtdm_printk(KERN_DEBUG " via_rhine_rx() status is %8.8x.\n", /*** RTnet ***/ ++ desc_status); ++ if (--boguscnt < 0) ++ break; ++ if ( (desc_status & (RxWholePkt | RxErr)) != RxWholePkt) { ++ if ((desc_status & RxWholePkt) != RxWholePkt) { ++ rtdm_printk(KERN_WARNING "%s: Oversized Ethernet frame spanned " /*** RTnet ***/ ++ "multiple buffers, entry %#x length %d status %8.8x!\n", ++ dev->name, entry, data_size, desc_status); ++ rtdm_printk(KERN_WARNING "%s: Oversized Ethernet frame %p vs %p.\n", /*** RTnet ***/ ++ dev->name, np->rx_head_desc, &np->rx_ring[entry]); ++ np->stats.rx_length_errors++; ++ } else if (desc_status & RxErr) { ++ /* There was a error. */ ++ if (local_debug > 2) ++ rtdm_printk(KERN_DEBUG " via_rhine_rx() Rx error was %8.8x.\n", /*** RTnet ***/ ++ desc_status); ++ np->stats.rx_errors++; ++ if (desc_status & 0x0030) np->stats.rx_length_errors++; ++ if (desc_status & 0x0048) np->stats.rx_fifo_errors++; ++ if (desc_status & 0x0004) np->stats.rx_frame_errors++; ++ if (desc_status & 0x0002) ++ /* RTnet: this is only updated in the interrupt handler */ ++ np->stats.rx_crc_errors++; ++ } ++ } else { ++ struct rtskb *skb; /*** RTnet ***/ ++ /* Length should omit the CRC */ ++ int pkt_len = data_size - 4; ++ ++ /* Check if the packet is long enough to accept without copying ++ to a minimally-sized skbuff. */ ++/*** RTnet ***/ ++ { ++/*** RTnet ***/ ++ skb = np->rx_skbuff[entry]; ++ if (skb == NULL) { ++ rtdm_printk(KERN_ERR "%s: Inconsistent Rx descriptor chain.\n", /*** RTnet ***/ ++ dev->name); ++ break; ++ } ++ np->rx_skbuff[entry] = NULL; ++ rtskb_put(skb, pkt_len); /*** RTnet ***/ ++ pci_unmap_single(np->pdev, np->rx_skbuff_dma[entry], ++ np->rx_buf_sz, PCI_DMA_FROMDEVICE); ++ } ++/*** RTnet ***/ ++ skb->protocol = rt_eth_type_trans(skb, dev); ++ skb->time_stamp = *time_stamp; ++ rtnetif_rx(skb); ++ /*dev->last_rx = jiffies;*/ ++/*** RTnet ***/ ++ np->stats.rx_bytes += pkt_len; ++ np->stats.rx_packets++; ++ } ++ entry = (++np->cur_rx) % RX_RING_SIZE; ++ np->rx_head_desc = &np->rx_ring[entry]; ++ } ++ ++ /* Refill the Rx ring buffers. */ ++ for (; np->cur_rx - np->dirty_rx > 0; np->dirty_rx++) { ++ struct rtskb *skb; /*** RTnet ***/ ++ entry = np->dirty_rx % RX_RING_SIZE; ++ if (np->rx_skbuff[entry] == NULL) { ++ skb = rtnetdev_alloc_rtskb(dev, np->rx_buf_sz); /*** RTnet ***/ ++ np->rx_skbuff[entry] = skb; ++ if (skb == NULL) ++ break; /* Better luck next round. */ ++ np->rx_skbuff_dma[entry] = ++ pci_map_single(np->pdev, skb->tail, np->rx_buf_sz, ++ PCI_DMA_FROMDEVICE); ++ np->rx_ring[entry].addr = cpu_to_le32(np->rx_skbuff_dma[entry]); ++ } ++ np->rx_ring[entry].rx_status = cpu_to_le32(DescOwn); ++ } ++ ++ /* Pre-emptively restart Rx engine. */ ++ writew(readw((void *)dev->base_addr + ChipCmd) | CmdRxOn | CmdRxDemand, ++ (void *)dev->base_addr + ChipCmd); ++} ++ ++/* Clears the "tally counters" for CRC errors and missed frames(?). ++ It has been reported that some chips need a write of 0 to clear ++ these, for others the counters are set to 1 when written to and ++ instead cleared when read. So we clear them both ways ... */ ++static inline void clear_tally_counters(void *ioaddr) ++{ ++ writel(0, ioaddr + RxMissed); ++ readw(ioaddr + RxCRCErrs); ++ readw(ioaddr + RxMissed); ++} ++ ++static void via_rhine_restart_tx(struct rtnet_device *dev) { /*** RTnet ***/ ++ struct netdev_private *np = dev->priv; ++ void *ioaddr = (void *)dev->base_addr; ++ int entry = np->dirty_tx % TX_RING_SIZE; ++ u32 intr_status; ++ ++ /* ++ * If new errors occured, we need to sort them out before doing Tx. ++ * In that case the ISR will be back here RSN anyway. ++ */ ++ intr_status = get_intr_status(dev); ++ ++ if ((intr_status & IntrTxErrSummary) == 0) { ++ ++ /* We know better than the chip where it should continue. */ ++ writel(np->tx_ring_dma + entry * sizeof(struct tx_desc), ++ ioaddr + TxRingPtr); ++ ++ writew(CmdTxDemand | np->chip_cmd, ioaddr + ChipCmd); ++ IOSYNC; ++ } ++ else { ++ /* This should never happen */ ++ if (local_debug > 1) ++ rtdm_printk(KERN_WARNING "%s: via_rhine_restart_tx() " /*** RTnet ***/ ++ "Another error occured %8.8x.\n", ++ dev->name, intr_status); ++ } ++ ++} ++ ++static void via_rhine_error(struct rtnet_device *dev, int intr_status) /*** RTnet ***/ ++{ ++ struct netdev_private *np = dev->priv; ++ void *ioaddr = (void *)dev->base_addr; ++ ++ rtdm_lock_get(&np->lock); /*** RTnet ***/ ++ ++ if (intr_status & (IntrLinkChange)) { ++ if (readb(ioaddr + MIIStatus) & 0x02) { ++ /* Link failed, restart autonegotiation. */ ++ if (np->drv_flags & HasDavicomPhy) ++ mdio_write(dev, np->phys[0], MII_BMCR, 0x3300); ++ } else ++ via_rhine_check_duplex(dev); ++ if (local_debug) ++ rtdm_printk(KERN_ERR "%s: MII status changed: Autonegotiation " /*** RTnet ***/ ++ "advertising %4.4x partner %4.4x.\n", dev->name, ++ mdio_read(dev, np->phys[0], MII_ADVERTISE), ++ mdio_read(dev, np->phys[0], MII_LPA)); ++ } ++ if (intr_status & IntrStatsMax) { ++ np->stats.rx_crc_errors += readw(ioaddr + RxCRCErrs); ++ np->stats.rx_missed_errors += readw(ioaddr + RxMissed); ++ clear_tally_counters(ioaddr); ++ } ++ if (intr_status & IntrTxAborted) { ++ if (local_debug > 1) ++ rtdm_printk(KERN_INFO "%s: Abort %8.8x, frame dropped.\n", /*** RTnet ***/ ++ dev->name, intr_status); ++ } ++ if (intr_status & IntrTxUnderrun) { ++ if (np->tx_thresh < 0xE0) ++ writeb(np->tx_thresh += 0x20, ioaddr + TxConfig); ++ if (local_debug > 1) ++ rtdm_printk(KERN_INFO "%s: Transmitter underrun, Tx " /*** RTnet ***/ ++ "threshold now %2.2x.\n", ++ dev->name, np->tx_thresh); ++ } ++ if (intr_status & IntrTxDescRace) { ++ if (local_debug > 2) ++ rtdm_printk(KERN_INFO "%s: Tx descriptor write-back race.\n", /*** RTnet ***/ ++ dev->name); ++ } ++ if ((intr_status & IntrTxError) && ~( IntrTxAborted | IntrTxUnderrun | ++ IntrTxDescRace )) { ++ if (np->tx_thresh < 0xE0) { ++ writeb(np->tx_thresh += 0x20, ioaddr + TxConfig); ++ } ++ if (local_debug > 1) ++ rtdm_printk(KERN_INFO "%s: Unspecified error. Tx " /*** RTnet ***/ ++ "threshold now %2.2x.\n", ++ dev->name, np->tx_thresh); ++ } ++ if (intr_status & ( IntrTxAborted | IntrTxUnderrun | IntrTxDescRace | ++ IntrTxError )) ++ via_rhine_restart_tx(dev); ++ ++ if (intr_status & ~( IntrLinkChange | IntrStatsMax | IntrTxUnderrun | ++ IntrTxError | IntrTxAborted | IntrNormalSummary | ++ IntrTxDescRace )) { ++ if (local_debug > 1) ++ rtdm_printk(KERN_ERR "%s: Something Wicked happened! %8.8x.\n", /*** RTnet ***/ ++ dev->name, intr_status); ++ } ++ ++ rtdm_lock_put(&np->lock); /*** RTnet ***/ ++} ++ ++static struct net_device_stats *via_rhine_get_stats(struct rtnet_device *rtdev) ++{ ++ struct netdev_private *np = rtdev->priv; ++ long ioaddr = rtdev->base_addr; ++ rtdm_lockctx_t context; ++ ++ rtdm_lock_get_irqsave(&np->lock, context); ++ np->stats.rx_crc_errors += readw(ioaddr + RxCRCErrs); ++ np->stats.rx_missed_errors += readw(ioaddr + RxMissed); ++ clear_tally_counters((void *)ioaddr); ++ rtdm_lock_put_irqrestore(&np->lock, context); ++ ++ return &np->stats; ++} ++ ++static void via_rhine_set_rx_mode(struct rtnet_device *dev) /*** RTnet ***/ ++{ ++ struct netdev_private *np = dev->priv; ++ void *ioaddr = (void *)dev->base_addr; ++ u32 mc_filter[2]; /* Multicast hash filter */ ++ u8 rx_mode; /* Note: 0x02=accept runt, 0x01=accept errs */ ++ ++ if (dev->flags & IFF_PROMISC) { /* Set promiscuous. */ ++ /* Unconditionally log net taps. */ ++ printk(KERN_NOTICE "%s: Promiscuous mode enabled.\n", dev->name); ++ rx_mode = 0x1C; ++ writel(0xffffffff, (void *)ioaddr + MulticastFilter0); ++ writel(0xffffffff, (void *)ioaddr + MulticastFilter1); ++ } else if (dev->flags & IFF_ALLMULTI) { ++ /* Too many to match, or accept all multicasts. */ ++ writel(0xffffffff, (void *)ioaddr + MulticastFilter0); ++ writel(0xffffffff, (void *)ioaddr + MulticastFilter1); ++ rx_mode = 0x0C; ++ } else { ++ memset(mc_filter, 0, sizeof(mc_filter)); ++ writel(mc_filter[0], (void *)ioaddr + MulticastFilter0); ++ writel(mc_filter[1], (void *)ioaddr + MulticastFilter1); ++ rx_mode = 0x0C; ++ } ++ writeb(np->rx_thresh | rx_mode, (void *)ioaddr + RxConfig); ++} ++ ++/*** RTnet ***/ ++/*** RTnet ***/ ++ ++static int via_rhine_close(struct rtnet_device *dev) /*** RTnet ***/ ++{ ++ long ioaddr = dev->base_addr; ++ struct netdev_private *np = dev->priv; ++ int i; /*** RTnet ***/ ++ rtdm_lockctx_t context; ++ ++/*** RTnet *** ++ del_timer_sync(&np->timer); ++ *** RTnet ***/ ++ ++ rtdm_lock_get_irqsave(&np->lock, context); /*** RTnet ***/ ++ ++ rtnetif_stop_queue(dev); /*** RTnet ***/ ++ ++ if (local_debug > 1) ++ rtdm_printk(KERN_DEBUG "%s: Shutting down ethercard, status was %4.4x.\n", /*** RTnet ***/ ++ dev->name, readw((void *)ioaddr + ChipCmd)); ++ ++ /* Switch to loopback mode to avoid hardware races. */ ++ writeb(np->tx_thresh | 0x02, (void *)ioaddr + TxConfig); ++ ++ /* Disable interrupts by clearing the interrupt mask. */ ++ writew(0x0000, (void *)ioaddr + IntrEnable); ++ ++ /* Stop the chip's Tx and Rx processes. */ ++ writew(CmdStop, (void *)ioaddr + ChipCmd); ++ ++ rtdm_lock_put_irqrestore(&np->lock, context); /*** RTnet ***/ ++ ++/*** RTnet ***/ ++ if ( (i=rtdm_irq_free(&np->irq_handle))<0 ) ++ return i; ++ ++ rt_stack_disconnect(dev); ++/*** RTnet ***/ ++ ++ free_rbufs(dev); ++ free_tbufs(dev); ++ free_ring(dev); ++ ++ return 0; ++} ++ ++ ++static void via_rhine_remove_one (struct pci_dev *pdev) ++{ ++ /*** RTnet ***/ ++ struct rtnet_device *dev = pci_get_drvdata(pdev); ++ ++ rt_unregister_rtnetdev(dev); ++ rt_rtdev_disconnect(dev); ++/*** RTnet ***/ ++ ++ pci_release_regions(pdev); ++ ++#ifdef USE_MEM ++ iounmap((char *)(dev->base_addr)); ++#endif ++ ++ rtdev_free(dev); /*** RTnet ***/ ++ pci_disable_device(pdev); ++ pci_set_drvdata(pdev, NULL); ++} ++ ++ ++static struct pci_driver via_rhine_driver = { ++ .name = DRV_NAME, ++ .id_table = via_rhine_pci_tbl, ++ .probe = via_rhine_init_one, ++ .remove = via_rhine_remove_one, ++}; ++ ++ ++static int __init via_rhine_init (void) ++{ ++/* when a module, this is printed whether or not devices are found in probe */ ++#ifdef MODULE ++ printk(version); ++#endif ++ return pci_register_driver (&via_rhine_driver); ++} ++ ++ ++static void __exit via_rhine_cleanup (void) ++{ ++ pci_unregister_driver (&via_rhine_driver); ++} ++ ++ ++module_init(via_rhine_init); ++module_exit(via_rhine_cleanup); ++ ++ ++/* ++ * Local variables: ++ * compile-command: "gcc -DMODULE -D__KERNEL__ -I/usr/src/linux/net/inet -Wall -Wstrict-prototypes -O6 -c via-rhine.c `[ -f /usr/include/linux/modversions.h ] && echo -DMODVERSIONS`" ++ * c-indent-level: 4 ++ * c-basic-offset: 4 ++ * tab-width: 4 ++ * End: ++ */ +--- linux/drivers/xenomai/net/drivers/mpc8xx_enet.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/mpc8xx_enet.c 2021-04-07 16:01:27.269634106 +0800 +@@ -0,0 +1,1073 @@ ++/* ++ * BK Id: SCCS/s.enet.c 1.24 01/19/02 03:07:14 dan ++ */ ++/* ++ * Ethernet driver for Motorola MPC8xx. ++ * Copyright (c) 1997 Dan Malek (dmalek@jlc.net) ++ * ++ * I copied the basic skeleton from the lance driver, because I did not ++ * know how to write the Linux driver, but I did know how the LANCE worked. ++ * ++ * This version of the driver is somewhat selectable for the different ++ * processor/board combinations. It works for the boards I know about ++ * now, and should be easily modified to include others. Some of the ++ * configuration information is contained in and the ++ * remainder is here. ++ * ++ * Buffer descriptors are kept in the CPM dual port RAM, and the frame ++ * buffers are in the host memory. ++ * ++ * Right now, I am very watseful with the buffers. I allocate memory ++ * pages and then divide them into 2K frame buffers. This way I know I ++ * have buffers large enough to hold one frame within one buffer descriptor. ++ * Once I get this working, I will use 64 or 128 byte CPM buffers, which ++ * will be much more memory efficient and will easily handle lots of ++ * small packets. ++ * ++ * Ported to RTnet. ++ * Copyright (c) 2003 Wolfgang Grandegger (wg@denx.de) ++ */ ++ ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++ ++MODULE_AUTHOR("Maintainer: Wolfgang Grandegger "); ++MODULE_DESCRIPTION("RTnet MPC8xx SCC Ethernet driver"); ++MODULE_LICENSE("GPL"); ++ ++static unsigned int rx_pool_size = 0; ++MODULE_PARM(rx_pool_size, "i"); ++MODULE_PARM_DESC(rx_pool_size, "Receive buffer pool size"); ++ ++static unsigned int rtnet_scc = 1; /* SCC1 */ ++MODULE_PARM(rtnet_scc, "i"); ++MODULE_PARM_DESC(rtnet_scc, "SCCx port for RTnet, x=1..3 (default=1)"); ++ ++#define RT_DEBUG(fmt,args...) ++ ++/* ++ * Theory of Operation ++ * ++ * The MPC8xx CPM performs the Ethernet processing on SCC1. It can use ++ * an aribtrary number of buffers on byte boundaries, but must have at ++ * least two receive buffers to prevent constant overrun conditions. ++ * ++ * The buffer descriptors are allocated from the CPM dual port memory ++ * with the data buffers allocated from host memory, just like all other ++ * serial communication protocols. The host memory buffers are allocated ++ * from the free page pool, and then divided into smaller receive and ++ * transmit buffers. The size of the buffers should be a power of two, ++ * since that nicely divides the page. This creates a ring buffer ++ * structure similar to the LANCE and other controllers. ++ * ++ * Like the LANCE driver: ++ * The driver runs as two independent, single-threaded flows of control. One ++ * is the send-packet routine, which enforces single-threaded use by the ++ * cep->tx_busy flag. The other thread is the interrupt handler, which is ++ * single threaded by the hardware and other software. ++ * ++ * The send packet thread has partial control over the Tx ring and the ++ * 'cep->tx_busy' flag. It sets the tx_busy flag whenever it's queuing a Tx ++ * packet. If the next queue slot is empty, it clears the tx_busy flag when ++ * finished otherwise it sets the 'lp->tx_full' flag. ++ * ++ * The MBX has a control register external to the MPC8xx that has some ++ * control of the Ethernet interface. Information is in the manual for ++ * your board. ++ * ++ * The RPX boards have an external control/status register. Consult the ++ * programming documents for details unique to your board. ++ * ++ * For the TQM8xx(L) modules, there is no control register interface. ++ * All functions are directly controlled using I/O pins. See . ++ */ ++ ++/* The transmitter timeout ++ */ ++#define TX_TIMEOUT (2*HZ) ++ ++/* The number of Tx and Rx buffers. These are allocated from the page ++ * pool. The code may assume these are power of two, so it is best ++ * to keep them that size. ++ * We don't need to allocate pages for the transmitter. We just use ++ * the skbuffer directly. ++ */ ++#define CPM_ENET_RX_PAGES 4 ++#define CPM_ENET_RX_FRSIZE 2048 ++#define CPM_ENET_RX_FRPPG (PAGE_SIZE / CPM_ENET_RX_FRSIZE) ++#define RX_RING_SIZE (CPM_ENET_RX_FRPPG * CPM_ENET_RX_PAGES) ++#define TX_RING_SIZE 8 /* Must be power of two */ ++#define TX_RING_MOD_MASK 7 /* for this to work */ ++ ++/* The CPM stores dest/src/type, data, and checksum for receive packets. ++ */ ++#define PKT_MAXBUF_SIZE 1518 ++#define PKT_MINBUF_SIZE 64 ++#define PKT_MAXBLR_SIZE 1520 ++ ++/* The CPM buffer descriptors track the ring buffers. The rx_bd_base and ++ * tx_bd_base always point to the base of the buffer descriptors. The ++ * cur_rx and cur_tx point to the currently available buffer. ++ * The dirty_tx tracks the current buffer that is being sent by the ++ * controller. The cur_tx and dirty_tx are equal under both completely ++ * empty and completely full conditions. The empty/ready indicator in ++ * the buffer descriptor determines the actual condition. ++ */ ++struct scc_enet_private { ++ /* The addresses of a Tx/Rx-in-place packets/buffers. */ ++ struct rtskb *tx_skbuff[TX_RING_SIZE]; ++ ushort skb_cur; ++ ushort skb_dirty; ++ ++ /* CPM dual port RAM relative addresses. ++ */ ++ cbd_t *rx_bd_base; /* Address of Rx and Tx buffers. */ ++ cbd_t *tx_bd_base; ++ cbd_t *cur_rx, *cur_tx; /* The next free ring entry */ ++ cbd_t *dirty_tx; /* The ring entries to be free()ed. */ ++ scc_t *sccp; ++ ++ /* Virtual addresses for the receive buffers because we can't ++ * do a __va() on them anymore. ++ */ ++ unsigned char *rx_vaddr[RX_RING_SIZE]; ++ struct net_device_stats stats; ++ uint tx_full; ++ rtdm_lock_t lock; ++ rtdm_irq_t irq_handle; ++}; ++ ++static int scc_enet_open(struct rtnet_device *rtdev); ++static int scc_enet_start_xmit(struct rtskb *skb, struct rtnet_device *rtdev); ++static int scc_enet_rx(struct rtnet_device *rtdev, int *packets, nanosecs_abs_t *time_stamp); ++static int scc_enet_interrupt(rtdm_irq_t *irq_handle); ++static int scc_enet_close(struct rtnet_device *rtdev); ++ ++static struct net_device_stats *scc_enet_get_stats(struct rtnet_device *rtdev); ++#ifdef ORIGINAL_VERSION ++static void set_multicast_list(struct net_device *dev); ++#endif ++ ++#ifndef ORIGINAL_VERSION ++static struct rtnet_device *rtdev_root = NULL; ++#endif ++ ++/* Typically, 860(T) boards use SCC1 for Ethernet, and other 8xx boards ++ * use SCC2. Some even may use SCC3. ++ * This is easily extended if necessary. ++ * These values are set when the driver is initialized. ++ */ ++static int CPM_CR_ENET; ++static int PROFF_ENET; ++static int SCC_ENET; ++static int CPMVEC_ENET; ++ ++static int ++scc_enet_open(struct rtnet_device *rtdev) ++{ ++ /* I should reset the ring buffers here, but I don't yet know ++ * a simple way to do that. ++ */ ++ rtnetif_start_queue(rtdev); ++ ++ return 0; /* Always succeed */ ++} ++ ++static int ++scc_enet_start_xmit(struct rtskb *skb, struct rtnet_device *rtdev) ++{ ++ struct scc_enet_private *cep = (struct scc_enet_private *)rtdev->priv; ++ volatile cbd_t *bdp; ++ rtdm_lockctx_t context; ++ ++ ++ RT_DEBUG(__FUNCTION__": ...\n"); ++ ++ /* Fill in a Tx ring entry */ ++ bdp = cep->cur_tx; ++ ++#ifndef final_version ++ if (bdp->cbd_sc & BD_ENET_TX_READY) { ++ /* Ooops. All transmit buffers are full. Bail out. ++ * This should not happen, since cep->tx_busy should be set. ++ */ ++ rtdm_printk("%s: tx queue full!.\n", rtdev->name); ++ return 1; ++ } ++#endif ++ ++ /* Clear all of the status flags. ++ */ ++ bdp->cbd_sc &= ~BD_ENET_TX_STATS; ++ ++ /* If the frame is short, tell CPM to pad it. ++ */ ++ if (skb->len <= ETH_ZLEN) ++ bdp->cbd_sc |= BD_ENET_TX_PAD; ++ else ++ bdp->cbd_sc &= ~BD_ENET_TX_PAD; ++ ++ /* Set buffer length and buffer pointer. ++ */ ++ bdp->cbd_datlen = skb->len; ++ bdp->cbd_bufaddr = __pa(skb->data); ++ ++ /* Save skb pointer. ++ */ ++ cep->tx_skbuff[cep->skb_cur] = skb; ++ ++ cep->stats.tx_bytes += skb->len; ++ cep->skb_cur = (cep->skb_cur+1) & TX_RING_MOD_MASK; ++ ++ /* Prevent interrupts from changing the Tx ring from underneath us. */ ++ // *** RTnet *** ++ rtdm_lock_get_irqsave(&cep->lock, context); ++ ++ /* Get and patch time stamp just before the transmission */ ++ if (skb->xmit_stamp) ++ *skb->xmit_stamp = cpu_to_be64(rtdm_clock_read() + *skb->xmit_stamp); ++ ++ /* Push the data cache so the CPM does not get stale memory ++ * data. ++ */ ++ flush_dcache_range((unsigned long)(skb->data), ++ (unsigned long)(skb->data + skb->len)); ++ ++ ++ /* Send it on its way. Tell CPM its ready, interrupt when done, ++ * its the last BD of the frame, and to put the CRC on the end. ++ */ ++ bdp->cbd_sc |= (BD_ENET_TX_READY | BD_ENET_TX_INTR | BD_ENET_TX_LAST | BD_ENET_TX_TC); ++ ++ /* If this was the last BD in the ring, start at the beginning again. ++ */ ++ if (bdp->cbd_sc & BD_ENET_TX_WRAP) ++ bdp = cep->tx_bd_base; ++ else ++ bdp++; ++ ++ if (bdp->cbd_sc & BD_ENET_TX_READY) { ++ rtnetif_stop_queue(rtdev); ++ cep->tx_full = 1; ++ } ++ ++ cep->cur_tx = (cbd_t *)bdp; ++ ++ // *** RTnet *** ++ rtdm_lock_put_irqrestore(&cep->lock, context); ++ ++ return 0; ++} ++ ++#ifdef ORIGINAL_VERSION ++static void ++scc_enet_timeout(struct net_device *dev) ++{ ++ struct scc_enet_private *cep = (struct scc_enet_private *)dev->priv; ++ ++ printk("%s: transmit timed out.\n", dev->name); ++ cep->stats.tx_errors++; ++#ifndef final_version ++ { ++ int i; ++ cbd_t *bdp; ++ printk(" Ring data dump: cur_tx %p%s cur_rx %p.\n", ++ cep->cur_tx, cep->tx_full ? " (full)" : "", ++ cep->cur_rx); ++ bdp = cep->tx_bd_base; ++ for (i = 0 ; i < TX_RING_SIZE; i++, bdp++) ++ printk("%04x %04x %08x\n", ++ bdp->cbd_sc, ++ bdp->cbd_datlen, ++ bdp->cbd_bufaddr); ++ bdp = cep->rx_bd_base; ++ for (i = 0 ; i < RX_RING_SIZE; i++, bdp++) ++ printk("%04x %04x %08x\n", ++ bdp->cbd_sc, ++ bdp->cbd_datlen, ++ bdp->cbd_bufaddr); ++ } ++#endif ++ if (!cep->tx_full) ++ netif_wake_queue(dev); ++} ++#endif /* ORIGINAL_VERSION */ ++ ++/* The interrupt handler. ++ * This is called from the CPM handler, not the MPC core interrupt. ++ */ ++static int scc_enet_interrupt(rtdm_irq_t *irq_handle) ++{ ++ struct rtnet_device *rtdev = rtdm_irq_get_arg(irq_handle, struct rtnet_device); ++ int packets = 0; ++ struct scc_enet_private *cep; ++ volatile cbd_t *bdp; ++ ushort int_events; ++ int must_restart; ++ nanosecs_abs_t time_stamp = rtdm_clock_read(); ++ ++ ++ cep = (struct scc_enet_private *)rtdev->priv; ++ ++ /* Get the interrupt events that caused us to be here. ++ */ ++ int_events = cep->sccp->scc_scce; ++ cep->sccp->scc_scce = int_events; ++ must_restart = 0; ++ ++ /* Handle receive event in its own function. ++ */ ++ if (int_events & SCCE_ENET_RXF) { ++ scc_enet_rx(rtdev, &packets, &time_stamp); ++ } ++ ++ /* Check for a transmit error. The manual is a little unclear ++ * about this, so the debug code until I get it figured out. It ++ * appears that if TXE is set, then TXB is not set. However, ++ * if carrier sense is lost during frame transmission, the TXE ++ * bit is set, "and continues the buffer transmission normally." ++ * I don't know if "normally" implies TXB is set when the buffer ++ * descriptor is closed.....trial and error :-). ++ */ ++ ++ /* Transmit OK, or non-fatal error. Update the buffer descriptors. ++ */ ++ if (int_events & (SCCE_ENET_TXE | SCCE_ENET_TXB)) { ++ rtdm_lock_get(&cep->lock); ++ bdp = cep->dirty_tx; ++ while ((bdp->cbd_sc&BD_ENET_TX_READY)==0) { ++ RT_DEBUG(__FUNCTION__": Tx ok\n"); ++ if ((bdp==cep->cur_tx) && (cep->tx_full == 0)) ++ break; ++ ++ if (bdp->cbd_sc & BD_ENET_TX_HB) /* No heartbeat */ ++ cep->stats.tx_heartbeat_errors++; ++ if (bdp->cbd_sc & BD_ENET_TX_LC) /* Late collision */ ++ cep->stats.tx_window_errors++; ++ if (bdp->cbd_sc & BD_ENET_TX_RL) /* Retrans limit */ ++ cep->stats.tx_aborted_errors++; ++ if (bdp->cbd_sc & BD_ENET_TX_UN) /* Underrun */ ++ cep->stats.tx_fifo_errors++; ++ if (bdp->cbd_sc & BD_ENET_TX_CSL) /* Carrier lost */ ++ cep->stats.tx_carrier_errors++; ++ ++ ++ /* No heartbeat or Lost carrier are not really bad errors. ++ * The others require a restart transmit command. ++ */ ++ if (bdp->cbd_sc & ++ (BD_ENET_TX_LC | BD_ENET_TX_RL | BD_ENET_TX_UN)) { ++ must_restart = 1; ++ cep->stats.tx_errors++; ++ } ++ ++ cep->stats.tx_packets++; ++ ++ /* Deferred means some collisions occurred during transmit, ++ * but we eventually sent the packet OK. ++ */ ++ if (bdp->cbd_sc & BD_ENET_TX_DEF) ++ cep->stats.collisions++; ++ ++ /* Free the sk buffer associated with this last transmit. ++ */ ++ dev_kfree_rtskb(cep->tx_skbuff[cep->skb_dirty]); ++ cep->skb_dirty = (cep->skb_dirty + 1) & TX_RING_MOD_MASK; ++ ++ /* Update pointer to next buffer descriptor to be transmitted. ++ */ ++ if (bdp->cbd_sc & BD_ENET_TX_WRAP) ++ bdp = cep->tx_bd_base; ++ else ++ bdp++; ++ ++ /* I don't know if we can be held off from processing these ++ * interrupts for more than one frame time. I really hope ++ * not. In such a case, we would now want to check the ++ * currently available BD (cur_tx) and determine if any ++ * buffers between the dirty_tx and cur_tx have also been ++ * sent. We would want to process anything in between that ++ * does not have BD_ENET_TX_READY set. ++ */ ++ ++ /* Since we have freed up a buffer, the ring is no longer ++ * full. ++ */ ++ if (cep->tx_full) { ++ cep->tx_full = 0; ++ if (rtnetif_queue_stopped(rtdev)) ++ rtnetif_wake_queue(rtdev); ++ } ++ ++ cep->dirty_tx = (cbd_t *)bdp; ++ } ++ ++ if (must_restart) { ++ volatile cpm8xx_t *cp; ++ ++ /* Some transmit errors cause the transmitter to shut ++ * down. We now issue a restart transmit. Since the ++ * errors close the BD and update the pointers, the restart ++ * _should_ pick up without having to reset any of our ++ * pointers either. ++ */ ++ cp = cpmp; ++ cp->cp_cpcr = ++ mk_cr_cmd(CPM_CR_ENET, CPM_CR_RESTART_TX) | CPM_CR_FLG; ++ while (cp->cp_cpcr & CPM_CR_FLG); ++ } ++ rtdm_lock_put(&cep->lock); ++ } ++ ++ /* Check for receive busy, i.e. packets coming but no place to ++ * put them. This "can't happen" because the receive interrupt ++ * is tossing previous frames. ++ */ ++ if (int_events & SCCE_ENET_BSY) { ++ cep->stats.rx_dropped++; ++ rtdm_printk("CPM ENET: BSY can't happen.\n"); ++ } ++ ++ if (packets > 0) ++ rt_mark_stack_mgr(rtdev); ++ return RTDM_IRQ_HANDLED; ++} ++ ++/* During a receive, the cur_rx points to the current incoming buffer. ++ * When we update through the ring, if the next incoming buffer has ++ * not been given to the system, we just set the empty indicator, ++ * effectively tossing the packet. ++ */ ++static int ++scc_enet_rx(struct rtnet_device *rtdev, int* packets, nanosecs_abs_t *time_stamp) ++{ ++ struct scc_enet_private *cep; ++ volatile cbd_t *bdp; ++ ushort pkt_len; ++ struct rtskb *skb; ++ ++ RT_DEBUG(__FUNCTION__": ...\n"); ++ ++ cep = (struct scc_enet_private *)rtdev->priv; ++ ++ /* First, grab all of the stats for the incoming packet. ++ * These get messed up if we get called due to a busy condition. ++ */ ++ bdp = cep->cur_rx; ++ ++ for (;;) { ++ ++ if (bdp->cbd_sc & BD_ENET_RX_EMPTY) ++ break; ++ ++#ifndef final_version ++ /* Since we have allocated space to hold a complete frame, both ++ * the first and last indicators should be set. ++ */ ++ if ((bdp->cbd_sc & (BD_ENET_RX_FIRST | BD_ENET_RX_LAST)) != ++ (BD_ENET_RX_FIRST | BD_ENET_RX_LAST)) ++ rtdm_printk("CPM ENET: rcv is not first+last\n"); ++#endif ++ ++ /* Frame too long or too short. ++ */ ++ if (bdp->cbd_sc & (BD_ENET_RX_LG | BD_ENET_RX_SH)) ++ cep->stats.rx_length_errors++; ++ if (bdp->cbd_sc & BD_ENET_RX_NO) /* Frame alignment */ ++ cep->stats.rx_frame_errors++; ++ if (bdp->cbd_sc & BD_ENET_RX_CR) /* CRC Error */ ++ cep->stats.rx_crc_errors++; ++ if (bdp->cbd_sc & BD_ENET_RX_OV) /* FIFO overrun */ ++ cep->stats.rx_crc_errors++; ++ ++ /* Report late collisions as a frame error. ++ * On this error, the BD is closed, but we don't know what we ++ * have in the buffer. So, just drop this frame on the floor. ++ */ ++ if (bdp->cbd_sc & BD_ENET_RX_CL) { ++ cep->stats.rx_frame_errors++; ++ } ++ else { ++ ++ /* Process the incoming frame. ++ */ ++ cep->stats.rx_packets++; ++ pkt_len = bdp->cbd_datlen; ++ cep->stats.rx_bytes += pkt_len; ++ ++ /* This does 16 byte alignment, much more than we need. ++ * The packet length includes FCS, but we don't want to ++ * include that when passing upstream as it messes up ++ * bridging applications. ++ */ ++ skb = rtnetdev_alloc_rtskb(rtdev, pkt_len-4); ++ if (skb == NULL) { ++ rtdm_printk("%s: Memory squeeze, dropping packet.\n", rtdev->name); ++ cep->stats.rx_dropped++; ++ } ++ else { ++ rtskb_put(skb,pkt_len-4); /* Make room */ ++ memcpy(skb->data, ++ cep->rx_vaddr[bdp - cep->rx_bd_base], ++ pkt_len-4); ++ skb->protocol=rt_eth_type_trans(skb,rtdev); ++ skb->time_stamp = *time_stamp; ++ rtnetif_rx(skb); ++ (*packets)++; ++ } ++ } ++ ++ /* Clear the status flags for this buffer. ++ */ ++ bdp->cbd_sc &= ~BD_ENET_RX_STATS; ++ ++ /* Mark the buffer empty. ++ */ ++ bdp->cbd_sc |= BD_ENET_RX_EMPTY; ++ ++ /* Update BD pointer to next entry. ++ */ ++ if (bdp->cbd_sc & BD_ENET_RX_WRAP) ++ bdp = cep->rx_bd_base; ++ else ++ bdp++; ++ ++ } ++ cep->cur_rx = (cbd_t *)bdp; ++ ++ return 0; ++} ++ ++static int ++scc_enet_close(struct rtnet_device *rtdev) ++{ ++ /* Don't know what to do yet. ++ */ ++ rtnetif_stop_queue(rtdev); ++ ++ return 0; ++} ++ ++static struct net_device_stats *scc_enet_get_stats(struct rtnet_device *rtdev) ++{ ++ struct scc_enet_private *cep = (struct scc_enet_private *)rtdev->priv; ++ ++ return &cep->stats; ++} ++ ++#ifdef ORIGINAL_VERSION ++/* Set or clear the multicast filter for this adaptor. ++ * Skeleton taken from sunlance driver. ++ * The CPM Ethernet implementation allows Multicast as well as individual ++ * MAC address filtering. Some of the drivers check to make sure it is ++ * a group multicast address, and discard those that are not. I guess I ++ * will do the same for now, but just remove the test if you want ++ * individual filtering as well (do the upper net layers want or support ++ * this kind of feature?). ++ */ ++ ++static void set_multicast_list(struct net_device *dev) ++{ ++ struct scc_enet_private *cep; ++ struct dev_mc_list *dmi; ++ u_char *mcptr, *tdptr; ++ volatile scc_enet_t *ep; ++ int i, j; ++ cep = (struct scc_enet_private *)dev->priv; ++ ++ /* Get pointer to SCC area in parameter RAM. ++ */ ++ ep = (scc_enet_t *)dev->base_addr; ++ ++ if (dev->flags&IFF_PROMISC) { ++ ++ /* Log any net taps. */ ++ printk("%s: Promiscuous mode enabled.\n", dev->name); ++ cep->sccp->scc_pmsr |= SCC_PMSR_PRO; ++ } else { ++ ++ cep->sccp->scc_pmsr &= ~SCC_PMSR_PRO; ++ ++ if (dev->flags & IFF_ALLMULTI) { ++ /* Catch all multicast addresses, so set the ++ * filter to all 1's. ++ */ ++ ep->sen_gaddr1 = 0xffff; ++ ep->sen_gaddr2 = 0xffff; ++ ep->sen_gaddr3 = 0xffff; ++ ep->sen_gaddr4 = 0xffff; ++ } ++ else { ++ /* Clear filter and add the addresses in the list. ++ */ ++ ep->sen_gaddr1 = 0; ++ ep->sen_gaddr2 = 0; ++ ep->sen_gaddr3 = 0; ++ ep->sen_gaddr4 = 0; ++ ++ dmi = dev->mc_list; ++ ++ for (i=0; imc_count; i++) { ++ ++ /* Only support group multicast for now. ++ */ ++ if (!(dmi->dmi_addr[0] & 1)) ++ continue; ++ ++ /* The address in dmi_addr is LSB first, ++ * and taddr is MSB first. We have to ++ * copy bytes MSB first from dmi_addr. ++ */ ++ mcptr = (u_char *)dmi->dmi_addr + 5; ++ tdptr = (u_char *)&ep->sen_taddrh; ++ for (j=0; j<6; j++) ++ *tdptr++ = *mcptr--; ++ ++ /* Ask CPM to run CRC and set bit in ++ * filter mask. ++ */ ++ cpmp->cp_cpcr = mk_cr_cmd(CPM_CR_ENET, CPM_CR_SET_GADDR) | CPM_CR_FLG; ++ /* this delay is necessary here -- Cort */ ++ udelay(10); ++ while (cpmp->cp_cpcr & CPM_CR_FLG); ++ } ++ } ++ } ++} ++#endif /* ORIGINAL_VERSION */ ++ ++/* Initialize the CPM Ethernet on SCC. If EPPC-Bug loaded us, or performed ++ * some other network I/O, a whole bunch of this has already been set up. ++ * It is no big deal if we do it again, we just have to disable the ++ * transmit and receive to make sure we don't catch the CPM with some ++ * inconsistent control information. ++ */ ++int __init scc_enet_init(void) ++{ ++ struct rtnet_device *rtdev = NULL; ++ struct scc_enet_private *cep; ++ int i, j, k; ++ unsigned char *eap, *ba; ++ dma_addr_t mem_addr; ++ bd_t *bd; ++ volatile cbd_t *bdp; ++ volatile cpm8xx_t *cp; ++ volatile scc_t *sccp; ++ volatile scc_enet_t *ep; ++ volatile immap_t *immap; ++ ++ cp = cpmp; /* Get pointer to Communication Processor */ ++ ++ immap = (immap_t *)(mfspr(IMMR) & 0xFFFF0000); /* and to internal registers */ ++ ++ bd = (bd_t *)__res; ++ ++ /* Configure the SCC parameters (this has formerly be done ++ * by macro definitions). ++ */ ++ switch (rtnet_scc) { ++ case 3: ++ CPM_CR_ENET = CPM_CR_CH_SCC3; ++ PROFF_ENET = PROFF_SCC3; ++ SCC_ENET = 2; /* Index, not number! */ ++ CPMVEC_ENET = CPMVEC_SCC3; ++ break; ++ case 2: ++ CPM_CR_ENET = CPM_CR_CH_SCC2; ++ PROFF_ENET = PROFF_SCC2; ++ SCC_ENET = 1; /* Index, not number! */ ++ CPMVEC_ENET = CPMVEC_SCC2; ++ break; ++ case 1: ++ CPM_CR_ENET = CPM_CR_CH_SCC1; ++ PROFF_ENET = PROFF_SCC1; ++ SCC_ENET = 0; /* Index, not number! */ ++ CPMVEC_ENET = CPMVEC_SCC1; ++ break; ++ default: ++ printk(KERN_ERR "enet: SCC%d doesn't exit (check rtnet_scc)\n", rtnet_scc); ++ return -1; ++ } ++ ++ /* Allocate some private information and create an Ethernet device instance. ++ */ ++ if (!rx_pool_size) ++ rx_pool_size = RX_RING_SIZE * 2; ++ rtdev = rtdev_root = rt_alloc_etherdev(sizeof(struct scc_enet_private), ++ rx_pool_size + TX_RING_SIZE); ++ if (rtdev == NULL) { ++ printk(KERN_ERR "enet: Could not allocate ethernet device.\n"); ++ return -1; ++ } ++ rtdev_alloc_name(rtdev, "rteth%d"); ++ rt_rtdev_connect(rtdev, &RTDEV_manager); ++ rtdev->vers = RTDEV_VERS_2_0; ++ ++ cep = (struct scc_enet_private *)rtdev->priv; ++ rtdm_lock_init(&cep->lock); ++ ++ /* Get pointer to SCC area in parameter RAM. ++ */ ++ ep = (scc_enet_t *)(&cp->cp_dparam[PROFF_ENET]); ++ ++ /* And another to the SCC register area. ++ */ ++ sccp = (volatile scc_t *)(&cp->cp_scc[SCC_ENET]); ++ cep->sccp = (scc_t *)sccp; /* Keep the pointer handy */ ++ ++ /* Disable receive and transmit in case EPPC-Bug started it. ++ */ ++ sccp->scc_gsmrl &= ~(SCC_GSMRL_ENR | SCC_GSMRL_ENT); ++ ++ /* Cookbook style from the MPC860 manual..... ++ * Not all of this is necessary if EPPC-Bug has initialized ++ * the network. ++ * So far we are lucky, all board configurations use the same ++ * pins, or at least the same I/O Port for these functions..... ++ * It can't last though...... ++ */ ++ ++#if (defined(PA_ENET_RXD) && defined(PA_ENET_TXD)) ++ /* Configure port A pins for Txd and Rxd. ++ */ ++ immap->im_ioport.iop_papar |= (PA_ENET_RXD | PA_ENET_TXD); ++ immap->im_ioport.iop_padir &= ~(PA_ENET_RXD | PA_ENET_TXD); ++ immap->im_ioport.iop_paodr &= ~PA_ENET_TXD; ++#elif (defined(PB_ENET_RXD) && defined(PB_ENET_TXD)) ++ /* Configure port B pins for Txd and Rxd. ++ */ ++ immap->im_cpm.cp_pbpar |= (PB_ENET_RXD | PB_ENET_TXD); ++ immap->im_cpm.cp_pbdir &= ~(PB_ENET_RXD | PB_ENET_TXD); ++ immap->im_cpm.cp_pbodr &= ~PB_ENET_TXD; ++#else ++#error Exactly ONE pair of PA_ENET_[RT]XD, PB_ENET_[RT]XD must be defined ++#endif ++ ++#if defined(PC_ENET_LBK) ++ /* Configure port C pins to disable External Loopback ++ */ ++ immap->im_ioport.iop_pcpar &= ~PC_ENET_LBK; ++ immap->im_ioport.iop_pcdir |= PC_ENET_LBK; ++ immap->im_ioport.iop_pcso &= ~PC_ENET_LBK; ++ immap->im_ioport.iop_pcdat &= ~PC_ENET_LBK; /* Disable Loopback */ ++#endif /* PC_ENET_LBK */ ++ ++ /* Configure port C pins to enable CLSN and RENA. ++ */ ++ immap->im_ioport.iop_pcpar &= ~(PC_ENET_CLSN | PC_ENET_RENA); ++ immap->im_ioport.iop_pcdir &= ~(PC_ENET_CLSN | PC_ENET_RENA); ++ immap->im_ioport.iop_pcso |= (PC_ENET_CLSN | PC_ENET_RENA); ++ ++ /* Configure port A for TCLK and RCLK. ++ */ ++ immap->im_ioport.iop_papar |= (PA_ENET_TCLK | PA_ENET_RCLK); ++ immap->im_ioport.iop_padir &= ~(PA_ENET_TCLK | PA_ENET_RCLK); ++ ++ /* Configure Serial Interface clock routing. ++ * First, clear all SCC bits to zero, then set the ones we want. ++ */ ++ cp->cp_sicr &= ~SICR_ENET_MASK; ++ cp->cp_sicr |= SICR_ENET_CLKRT; ++ ++ /* Manual says set SDDR, but I can't find anything with that ++ * name. I think it is a misprint, and should be SDCR. This ++ * has already been set by the communication processor initialization. ++ */ ++ ++ /* Allocate space for the buffer descriptors in the DP ram. ++ * These are relative offsets in the DP ram address space. ++ * Initialize base addresses for the buffer descriptors. ++ */ ++ i = m8xx_cpm_dpalloc(sizeof(cbd_t) * RX_RING_SIZE); ++ ep->sen_genscc.scc_rbase = i; ++ cep->rx_bd_base = (cbd_t *)&cp->cp_dpmem[i]; ++ ++ i = m8xx_cpm_dpalloc(sizeof(cbd_t) * TX_RING_SIZE); ++ ep->sen_genscc.scc_tbase = i; ++ cep->tx_bd_base = (cbd_t *)&cp->cp_dpmem[i]; ++ ++ cep->dirty_tx = cep->cur_tx = cep->tx_bd_base; ++ cep->cur_rx = cep->rx_bd_base; ++ ++ /* Issue init Rx BD command for SCC. ++ * Manual says to perform an Init Rx parameters here. We have ++ * to perform both Rx and Tx because the SCC may have been ++ * already running. ++ * In addition, we have to do it later because we don't yet have ++ * all of the BD control/status set properly. ++ cp->cp_cpcr = mk_cr_cmd(CPM_CR_ENET, CPM_CR_INIT_RX) | CPM_CR_FLG; ++ while (cp->cp_cpcr & CPM_CR_FLG); ++ */ ++ ++ /* Initialize function code registers for big-endian. ++ */ ++ ep->sen_genscc.scc_rfcr = SCC_EB; ++ ep->sen_genscc.scc_tfcr = SCC_EB; ++ ++ /* Set maximum bytes per receive buffer. ++ * This appears to be an Ethernet frame size, not the buffer ++ * fragment size. It must be a multiple of four. ++ */ ++ ep->sen_genscc.scc_mrblr = PKT_MAXBLR_SIZE; ++ ++ /* Set CRC preset and mask. ++ */ ++ ep->sen_cpres = 0xffffffff; ++ ep->sen_cmask = 0xdebb20e3; ++ ++ ep->sen_crcec = 0; /* CRC Error counter */ ++ ep->sen_alec = 0; /* alignment error counter */ ++ ep->sen_disfc = 0; /* discard frame counter */ ++ ++ ep->sen_pads = 0x8888; /* Tx short frame pad character */ ++ ep->sen_retlim = 15; /* Retry limit threshold */ ++ ++ ep->sen_maxflr = PKT_MAXBUF_SIZE; /* maximum frame length register */ ++ ep->sen_minflr = PKT_MINBUF_SIZE; /* minimum frame length register */ ++ ++ ep->sen_maxd1 = PKT_MAXBLR_SIZE; /* maximum DMA1 length */ ++ ep->sen_maxd2 = PKT_MAXBLR_SIZE; /* maximum DMA2 length */ ++ ++ /* Clear hash tables. ++ */ ++ ep->sen_gaddr1 = 0; ++ ep->sen_gaddr2 = 0; ++ ep->sen_gaddr3 = 0; ++ ep->sen_gaddr4 = 0; ++ ep->sen_iaddr1 = 0; ++ ep->sen_iaddr2 = 0; ++ ep->sen_iaddr3 = 0; ++ ep->sen_iaddr4 = 0; ++ ++ /* Set Ethernet station address. ++ */ ++ eap = (unsigned char *)&(ep->sen_paddrh); ++#ifdef CONFIG_FEC_ENET ++ /* We need a second MAC address if FEC is used by Linux */ ++ for (i=5; i>=0; i--) ++ *eap++ = rtdev->dev_addr[i] = (bd->bi_enetaddr[i] | ++ (i==3 ? 0x80 : 0)); ++#else ++ for (i=5; i>=0; i--) ++ *eap++ = rtdev->dev_addr[i] = bd->bi_enetaddr[i]; ++#endif ++ ++ ep->sen_pper = 0; /* 'cause the book says so */ ++ ep->sen_taddrl = 0; /* temp address (LSB) */ ++ ep->sen_taddrm = 0; ++ ep->sen_taddrh = 0; /* temp address (MSB) */ ++ ++ /* Now allocate the host memory pages and initialize the ++ * buffer descriptors. ++ */ ++ bdp = cep->tx_bd_base; ++ for (i=0; icbd_sc = 0; ++ bdp->cbd_bufaddr = 0; ++ bdp++; ++ } ++ ++ /* Set the last buffer to wrap. ++ */ ++ bdp--; ++ bdp->cbd_sc |= BD_SC_WRAP; ++ ++ bdp = cep->rx_bd_base; ++ k = 0; ++ for (i=0; icbd_sc = BD_ENET_RX_EMPTY | BD_ENET_RX_INTR; ++ bdp->cbd_bufaddr = mem_addr; ++ cep->rx_vaddr[k++] = ba; ++ mem_addr += CPM_ENET_RX_FRSIZE; ++ ba += CPM_ENET_RX_FRSIZE; ++ bdp++; ++ } ++ } ++ ++ /* Set the last buffer to wrap. ++ */ ++ bdp--; ++ bdp->cbd_sc |= BD_SC_WRAP; ++ ++ /* Let's re-initialize the channel now. We have to do it later ++ * than the manual describes because we have just now finished ++ * the BD initialization. ++ */ ++ cp->cp_cpcr = mk_cr_cmd(CPM_CR_ENET, CPM_CR_INIT_TRX) | CPM_CR_FLG; ++ while (cp->cp_cpcr & CPM_CR_FLG); ++ ++ cep->skb_cur = cep->skb_dirty = 0; ++ ++ sccp->scc_scce = 0xffff; /* Clear any pending events */ ++ ++ /* Enable interrupts for transmit error, complete frame ++ * received, and any transmit buffer we have also set the ++ * interrupt flag. ++ */ ++ sccp->scc_sccm = (SCCE_ENET_TXE | SCCE_ENET_RXF | SCCE_ENET_TXB); ++ ++ /* Install our interrupt handler. ++ */ ++ rtdev->irq = CPM_IRQ_OFFSET + CPMVEC_ENET; ++ rt_stack_connect(rtdev, &STACK_manager); ++ if ((i = rtdm_irq_request(&cep->irq_handle, rtdev->irq, ++ scc_enet_interrupt, 0, "rt_mpc8xx_enet", rtdev))) { ++ printk(KERN_ERR "Couldn't request IRQ %d\n", rtdev->irq); ++ rtdev_free(rtdev); ++ return i; ++ } ++ ++ ++ /* Set GSMR_H to enable all normal operating modes. ++ * Set GSMR_L to enable Ethernet to MC68160. ++ */ ++ sccp->scc_gsmrh = 0; ++ sccp->scc_gsmrl = (SCC_GSMRL_TCI | SCC_GSMRL_TPL_48 | SCC_GSMRL_TPP_10 | SCC_GSMRL_MODE_ENET); ++ ++ /* Set sync/delimiters. ++ */ ++ sccp->scc_dsr = 0xd555; ++ ++ /* Set processing mode. Use Ethernet CRC, catch broadcast, and ++ * start frame search 22 bit times after RENA. ++ */ ++ sccp->scc_pmsr = (SCC_PMSR_ENCRC | SCC_PMSR_NIB22); ++ ++ /* It is now OK to enable the Ethernet transmitter. ++ * Unfortunately, there are board implementation differences here. ++ */ ++#if (!defined (PB_ENET_TENA) && defined (PC_ENET_TENA)) ++ immap->im_ioport.iop_pcpar |= PC_ENET_TENA; ++ immap->im_ioport.iop_pcdir &= ~PC_ENET_TENA; ++#elif ( defined (PB_ENET_TENA) && !defined (PC_ENET_TENA)) ++ cp->cp_pbpar |= PB_ENET_TENA; ++ cp->cp_pbdir |= PB_ENET_TENA; ++#else ++#error Configuration Error: define exactly ONE of PB_ENET_TENA, PC_ENET_TENA ++#endif ++ ++#if defined(CONFIG_RPXLITE) || defined(CONFIG_RPXCLASSIC) ++ /* And while we are here, set the configuration to enable ethernet. ++ */ ++ *((volatile uint *)RPX_CSR_ADDR) &= ~BCSR0_ETHLPBK; ++ *((volatile uint *)RPX_CSR_ADDR) |= ++ (BCSR0_ETHEN | BCSR0_COLTESTDIS | BCSR0_FULLDPLXDIS); ++#endif ++ ++#ifdef CONFIG_BSEIP ++ /* BSE uses port B and C for PHY control. ++ */ ++ cp->cp_pbpar &= ~(PB_BSE_POWERUP | PB_BSE_FDXDIS); ++ cp->cp_pbdir |= (PB_BSE_POWERUP | PB_BSE_FDXDIS); ++ cp->cp_pbdat |= (PB_BSE_POWERUP | PB_BSE_FDXDIS); ++ ++ immap->im_ioport.iop_pcpar &= ~PC_BSE_LOOPBACK; ++ immap->im_ioport.iop_pcdir |= PC_BSE_LOOPBACK; ++ immap->im_ioport.iop_pcso &= ~PC_BSE_LOOPBACK; ++ immap->im_ioport.iop_pcdat &= ~PC_BSE_LOOPBACK; ++#endif ++ ++#ifdef CONFIG_FADS ++ cp->cp_pbpar |= PB_ENET_TENA; ++ cp->cp_pbdir |= PB_ENET_TENA; ++ ++ /* Enable the EEST PHY. ++ */ ++ *((volatile uint *)BCSR1) &= ~BCSR1_ETHEN; ++#endif ++ ++ rtdev->base_addr = (unsigned long)ep; ++ ++ /* The CPM Ethernet specific entries in the device structure. */ ++ rtdev->open = scc_enet_open; ++ rtdev->hard_start_xmit = scc_enet_start_xmit; ++ rtdev->stop = scc_enet_close; ++ rtdev->hard_header = &rt_eth_header; ++ rtdev->get_stats = scc_enet_get_stats; ++ ++ if (!rx_pool_size) ++ rx_pool_size = RX_RING_SIZE * 2; ++ ++ if ((i = rt_register_rtnetdev(rtdev))) { ++ printk(KERN_ERR "Couldn't register rtdev\n"); ++ rtdm_irq_disable(&cep->irq_handle); ++ rtdm_irq_free(&cep->irq_handle); ++ rtdev_free(rtdev); ++ return i; ++ } ++ ++ /* And last, enable the transmit and receive processing. ++ */ ++ sccp->scc_gsmrl |= (SCC_GSMRL_ENR | SCC_GSMRL_ENT); ++ ++ printk("%s: CPM ENET Version 0.2 on SCC%d, irq %d, addr %02x:%02x:%02x:%02x:%02x:%02x\n", ++ rtdev->name, SCC_ENET+1, rtdev->irq, ++ rtdev->dev_addr[0], rtdev->dev_addr[1], rtdev->dev_addr[2], ++ rtdev->dev_addr[3], rtdev->dev_addr[4], rtdev->dev_addr[5]); ++ ++ return 0; ++} ++ ++static void __exit scc_enet_cleanup(void) ++{ ++ struct rtnet_device *rtdev = rtdev_root; ++ struct scc_enet_private *cep = (struct scc_enet_private *)rtdev->priv; ++ volatile cpm8xx_t *cp = cpmp; ++ volatile scc_enet_t *ep; ++ ++ if (rtdev) { ++ rtdm_irq_disable(&cep->irq_handle); ++ rtdm_irq_free(&cep->irq_handle); ++ ++ ep = (scc_enet_t *)(&cp->cp_dparam[PROFF_ENET]); ++ m8xx_cpm_dpfree(ep->sen_genscc.scc_rbase); ++ m8xx_cpm_dpfree(ep->sen_genscc.scc_tbase); ++ ++ rt_stack_disconnect(rtdev); ++ rt_unregister_rtnetdev(rtdev); ++ rt_rtdev_disconnect(rtdev); ++ ++ printk("%s: unloaded\n", rtdev->name); ++ rtdev_free(rtdev); ++ rtdev_root = NULL; ++ } ++} ++ ++module_init(scc_enet_init); ++module_exit(scc_enet_cleanup); +--- linux/drivers/xenomai/net/drivers/mpc8xx_fec.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/mpc8xx_fec.c 2021-04-07 16:01:27.264634113 +0800 +@@ -0,0 +1,2341 @@ ++/* ++ * BK Id: SCCS/s.fec.c 1.30 09/11/02 14:55:08 paulus ++ */ ++/* ++ * Fast Ethernet Controller (FEC) driver for Motorola MPC8xx. ++ * Copyright (c) 1997 Dan Malek (dmalek@jlc.net) ++ * ++ * This version of the driver is specific to the FADS implementation, ++ * since the board contains control registers external to the processor ++ * for the control of the LevelOne LXT970 transceiver. The MPC860T manual ++ * describes connections using the internal parallel port I/O, which ++ * is basically all of Port D. ++ * ++ * Includes support for the following PHYs: QS6612, LXT970, LXT971/2. ++ * ++ * Right now, I am very wasteful with the buffers. I allocate memory ++ * pages and then divide them into 2K frame buffers. This way I know I ++ * have buffers large enough to hold one frame within one buffer descriptor. ++ * Once I get this working, I will use 64 or 128 byte CPM buffers, which ++ * will be much more memory efficient and will easily handle lots of ++ * small packets. ++ * ++ * Much better multiple PHY support by Magnus Damm. ++ * Copyright (c) 2000 Ericsson Radio Systems AB. ++ * ++ * Make use of MII for PHY control configurable. ++ * Some fixes. ++ * Copyright (c) 2000-2002 Wolfgang Denk, DENX Software Engineering. ++ * ++ * Fixes for tx_full condition and relink when using MII. ++ * Support for AMD AM79C874 added. ++ * Thomas Lange, thomas@corelatus.com ++ * ++ * Added code for Multicast support, Frederic Goddeeris, Paul Geerinckx ++ * Copyright (c) 2002 Siemens Atea ++ * ++ * Ported to RTnet from "linuxppc_2_4_devel/arch/ppc/8xx_io/fec.c". ++ * Copyright (c) 2003 Wolfgang Grandegger (wg@denx.de) ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO ++#error "MDIO for PHY configuration is not yet supported!" ++#endif ++ ++#include ++ ++MODULE_AUTHOR("Maintainer: Wolfgang Grandegger "); ++MODULE_DESCRIPTION("RTnet driver for the MPC8xx FEC Ethernet"); ++MODULE_LICENSE("GPL"); ++ ++static unsigned int rx_pool_size = 0; ++MODULE_PARM(rx_pool_size, "i"); ++MODULE_PARM_DESC(rx_pool_size, "Receive buffer pool size"); ++ ++#define RT_DEBUG(fmt,args...) ++ ++/* multicast support ++ */ ++/* #define DEBUG_MULTICAST */ ++ ++/* CRC polynomium used by the FEC for the multicast group filtering ++ */ ++#define FEC_CRC_POLY 0x04C11DB7 ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO ++/* Forward declarations of some structures to support different PHYs ++*/ ++ ++typedef struct { ++ uint mii_data; ++ void (*funct)(uint mii_reg, struct net_device *dev, uint data); ++} phy_cmd_t; ++ ++typedef struct { ++ uint id; ++ char *name; ++ ++ const phy_cmd_t *config; ++ const phy_cmd_t *startup; ++ const phy_cmd_t *ack_int; ++ const phy_cmd_t *shutdown; ++} phy_info_t; ++#endif /* CONFIG_XENO_DRIVERS_NET_USE_MDIO */ ++ ++/* The number of Tx and Rx buffers. These are allocated from the page ++ * pool. The code may assume these are power of two, so it is best ++ * to keep them that size. ++ * We don't need to allocate pages for the transmitter. We just use ++ * the skbuffer directly. ++ */ ++#define FEC_ENET_RX_PAGES 4 ++#define FEC_ENET_RX_FRSIZE 2048 ++#define FEC_ENET_RX_FRPPG (PAGE_SIZE / FEC_ENET_RX_FRSIZE) ++#define RX_RING_SIZE (FEC_ENET_RX_FRPPG * FEC_ENET_RX_PAGES) ++#define TX_RING_SIZE 8 /* Must be power of two */ ++#define TX_RING_MOD_MASK 7 /* for this to work */ ++ ++/* Interrupt events/masks. ++*/ ++#define FEC_ENET_HBERR ((uint)0x80000000) /* Heartbeat error */ ++#define FEC_ENET_BABR ((uint)0x40000000) /* Babbling receiver */ ++#define FEC_ENET_BABT ((uint)0x20000000) /* Babbling transmitter */ ++#define FEC_ENET_GRA ((uint)0x10000000) /* Graceful stop complete */ ++#define FEC_ENET_TXF ((uint)0x08000000) /* Full frame transmitted */ ++#define FEC_ENET_TXB ((uint)0x04000000) /* A buffer was transmitted */ ++#define FEC_ENET_RXF ((uint)0x02000000) /* Full frame received */ ++#define FEC_ENET_RXB ((uint)0x01000000) /* A buffer was received */ ++#define FEC_ENET_MII ((uint)0x00800000) /* MII interrupt */ ++#define FEC_ENET_EBERR ((uint)0x00400000) /* SDMA bus error */ ++ ++/* ++*/ ++#define FEC_ECNTRL_PINMUX 0x00000004 ++#define FEC_ECNTRL_ETHER_EN 0x00000002 ++#define FEC_ECNTRL_RESET 0x00000001 ++ ++#define FEC_RCNTRL_BC_REJ 0x00000010 ++#define FEC_RCNTRL_PROM 0x00000008 ++#define FEC_RCNTRL_MII_MODE 0x00000004 ++#define FEC_RCNTRL_DRT 0x00000002 ++#define FEC_RCNTRL_LOOP 0x00000001 ++ ++#define FEC_TCNTRL_FDEN 0x00000004 ++#define FEC_TCNTRL_HBC 0x00000002 ++#define FEC_TCNTRL_GTS 0x00000001 ++ ++/* Delay to wait for FEC reset command to complete (in us) ++*/ ++#define FEC_RESET_DELAY 50 ++ ++/* The FEC stores dest/src/type, data, and checksum for receive packets. ++ */ ++#define PKT_MAXBUF_SIZE 1518 ++#define PKT_MINBUF_SIZE 64 ++#define PKT_MAXBLR_SIZE 1520 ++ ++/* The FEC buffer descriptors track the ring buffers. The rx_bd_base and ++ * tx_bd_base always point to the base of the buffer descriptors. The ++ * cur_rx and cur_tx point to the currently available buffer. ++ * The dirty_tx tracks the current buffer that is being sent by the ++ * controller. The cur_tx and dirty_tx are equal under both completely ++ * empty and completely full conditions. The empty/ready indicator in ++ * the buffer descriptor determines the actual condition. ++ */ ++struct fec_enet_private { ++ /* The addresses of a Tx/Rx-in-place packets/buffers. */ ++ struct rtskb *tx_skbuff[TX_RING_SIZE]; ++ ushort skb_cur; ++ ushort skb_dirty; ++ ++ /* CPM dual port RAM relative addresses. ++ */ ++ cbd_t *rx_bd_base; /* Address of Rx and Tx buffers. */ ++ cbd_t *tx_bd_base; ++ cbd_t *cur_rx, *cur_tx; /* The next free ring entry */ ++ cbd_t *dirty_tx; /* The ring entries to be free()ed. */ ++ ++ /* Virtual addresses for the receive buffers because we can't ++ * do a __va() on them anymore. ++ */ ++ unsigned char *rx_vaddr[RX_RING_SIZE]; ++ ++ struct net_device_stats stats; ++ uint tx_full; ++ rtdm_lock_t lock; ++ rtdm_irq_t irq_handle; ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO ++ uint phy_id; ++ uint phy_id_done; ++ uint phy_status; ++ uint phy_speed; ++ phy_info_t *phy; ++ struct tq_struct phy_task; ++ ++ uint sequence_done; ++ ++ uint phy_addr; ++ ++ struct timer_list phy_timer_list; ++ u16 old_status; ++#endif /* CONFIG_XENO_DRIVERS_NET_USE_MDIO */ ++ ++ int link; ++ int old_link; ++ int full_duplex; ++ ++}; ++ ++static int fec_enet_open(struct rtnet_device *rtev); ++static int fec_enet_start_xmit(struct rtskb *skb, struct rtnet_device *rtdev); ++static void fec_enet_tx(struct rtnet_device *rtdev); ++static void fec_enet_rx(struct rtnet_device *rtdev, int *packets, nanosecs_abs_t *time_stamp); ++static int fec_enet_interrupt(rtdm_irq_t *irq_handle); ++static int fec_enet_close(struct rtnet_device *dev); ++static void fec_restart(struct rtnet_device *rtdev, int duplex); ++static void fec_stop(struct rtnet_device *rtdev); ++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO ++static void fec_enet_mii(struct net_device *dev); ++#endif /* CONFIG_XENO_DRIVERS_NET_USE_MDIO */ ++static struct net_device_stats *fec_enet_get_stats(struct rtnet_device *rtdev); ++#ifdef ORIGINAL_VERSION ++static void set_multicast_list(struct net_device *dev); ++#endif /* ORIGINAL_VERSION */ ++ ++static struct rtnet_device *rtdev_root = NULL; /* for cleanup */ ++ ++static ushort my_enet_addr[3]; ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO ++static int fec_enet_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); ++static int netdev_ethtool_ioctl(struct net_device *dev, void *useraddr); ++ ++static void mdio_callback(uint regval, struct net_device *dev, uint data); ++static int mdio_read(struct net_device *dev, int phy_id, int location); ++ ++#if defined(CONFIG_FEC_DP83846A) ++static void mdio_timer_callback(unsigned long data); ++#endif /* CONFIG_FEC_DP83846A */ ++ ++/* MII processing. We keep this as simple as possible. Requests are ++ * placed on the list (if there is room). When the request is finished ++ * by the MII, an optional function may be called. ++ */ ++typedef struct mii_list { ++ uint mii_regval; ++ void (*mii_func)(uint val, struct net_device *dev, uint data); ++ struct mii_list *mii_next; ++ uint mii_data; ++} mii_list_t; ++ ++#define NMII 20 ++mii_list_t mii_cmds[NMII]; ++mii_list_t *mii_free; ++mii_list_t *mii_head; ++mii_list_t *mii_tail; ++ ++typedef struct mdio_read_data { ++ u16 regval; ++ struct task_struct *sleeping_task; ++} mdio_read_data_t; ++ ++static int mii_queue(struct net_device *dev, int request, ++ void (*func)(uint, struct net_device *, uint), uint data); ++static void mii_queue_relink(uint mii_reg, struct net_device *dev, uint data); ++ ++/* Make MII read/write commands for the FEC. ++*/ ++#define mk_mii_read(REG) (0x60020000 | ((REG & 0x1f) << 18)) ++#define mk_mii_write(REG, VAL) (0x50020000 | ((REG & 0x1f) << 18) | \ ++ (VAL & 0xffff)) ++#define mk_mii_end 0 ++#endif /* CONFIG_XENO_DRIVERS_NET_USE_MDIO */ ++ ++/* Transmitter timeout. ++*/ ++#define TX_TIMEOUT (2*HZ) ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO ++/* Register definitions for the PHY. ++*/ ++ ++#define MII_REG_CR 0 /* Control Register */ ++#define MII_REG_SR 1 /* Status Register */ ++#define MII_REG_PHYIR1 2 /* PHY Identification Register 1 */ ++#define MII_REG_PHYIR2 3 /* PHY Identification Register 2 */ ++#define MII_REG_ANAR 4 /* A-N Advertisement Register */ ++#define MII_REG_ANLPAR 5 /* A-N Link Partner Ability Register */ ++#define MII_REG_ANER 6 /* A-N Expansion Register */ ++#define MII_REG_ANNPTR 7 /* A-N Next Page Transmit Register */ ++#define MII_REG_ANLPRNPR 8 /* A-N Link Partner Received Next Page Reg. */ ++ ++/* values for phy_status */ ++ ++#define PHY_CONF_ANE 0x0001 /* 1 auto-negotiation enabled */ ++#define PHY_CONF_LOOP 0x0002 /* 1 loopback mode enabled */ ++#define PHY_CONF_SPMASK 0x00f0 /* mask for speed */ ++#define PHY_CONF_10HDX 0x0010 /* 10 Mbit half duplex supported */ ++#define PHY_CONF_10FDX 0x0020 /* 10 Mbit full duplex supported */ ++#define PHY_CONF_100HDX 0x0040 /* 100 Mbit half duplex supported */ ++#define PHY_CONF_100FDX 0x0080 /* 100 Mbit full duplex supported */ ++ ++#define PHY_STAT_LINK 0x0100 /* 1 up - 0 down */ ++#define PHY_STAT_FAULT 0x0200 /* 1 remote fault */ ++#define PHY_STAT_ANC 0x0400 /* 1 auto-negotiation complete */ ++#define PHY_STAT_SPMASK 0xf000 /* mask for speed */ ++#define PHY_STAT_10HDX 0x1000 /* 10 Mbit half duplex selected */ ++#define PHY_STAT_10FDX 0x2000 /* 10 Mbit full duplex selected */ ++#define PHY_STAT_100HDX 0x4000 /* 100 Mbit half duplex selected */ ++#define PHY_STAT_100FDX 0x8000 /* 100 Mbit full duplex selected */ ++#endif /* CONFIG_XENO_DRIVERS_NET_USE_MDIO */ ++ ++ ++static int ++fec_enet_start_xmit(struct rtskb *skb, struct rtnet_device *rtdev) ++{ ++ struct fec_enet_private *fep; ++ volatile fec_t *fecp; ++ volatile cbd_t *bdp; ++ rtdm_lockctx_t context; ++ ++ ++ RT_DEBUG(__FUNCTION__": ...\n"); ++ ++ fep = rtdev->priv; ++ fecp = (volatile fec_t*)rtdev->base_addr; ++ ++ if (!fep->link) { ++ /* Link is down or autonegotiation is in progress. */ ++ return 1; ++ } ++ ++ /* Fill in a Tx ring entry */ ++ bdp = fep->cur_tx; ++ ++#ifndef final_version ++ if (bdp->cbd_sc & BD_ENET_TX_READY) { ++ /* Ooops. All transmit buffers are full. Bail out. ++ * This should not happen, since dev->tbusy should be set. ++ */ ++ rtdm_printk("%s: tx queue full!.\n", rtdev->name); ++ return 1; ++ } ++#endif ++ ++ /* Clear all of the status flags. ++ */ ++ bdp->cbd_sc &= ~BD_ENET_TX_STATS; ++ ++ /* Set buffer length and buffer pointer. ++ */ ++ bdp->cbd_bufaddr = __pa(skb->data); ++ bdp->cbd_datlen = skb->len; ++ ++ /* Save skb pointer. ++ */ ++ fep->tx_skbuff[fep->skb_cur] = skb; ++ ++ fep->stats.tx_bytes += skb->len; ++ fep->skb_cur = (fep->skb_cur+1) & TX_RING_MOD_MASK; ++ ++ rtdm_lock_get_irqsave(&fep->lock, context); ++ ++ /* Get and patch time stamp just before the transmission */ ++ if (skb->xmit_stamp) ++ *skb->xmit_stamp = cpu_to_be64(rtdm_clock_read() + *skb->xmit_stamp); ++ ++ /* Push the data cache so the CPM does not get stale memory ++ * data. ++ */ ++ flush_dcache_range((unsigned long)skb->data, ++ (unsigned long)skb->data + skb->len); ++ ++ /* Send it on its way. Tell FEC its ready, interrupt when done, ++ * its the last BD of the frame, and to put the CRC on the end. ++ */ ++ ++ bdp->cbd_sc |= (BD_ENET_TX_READY | BD_ENET_TX_INTR ++ | BD_ENET_TX_LAST | BD_ENET_TX_TC); ++ ++ //rtdev->trans_start = jiffies; ++ ++ /* Trigger transmission start */ ++ fecp->fec_x_des_active = 0x01000000; ++ ++ /* If this was the last BD in the ring, start at the beginning again. ++ */ ++ if (bdp->cbd_sc & BD_ENET_TX_WRAP) { ++ bdp = fep->tx_bd_base; ++ } else { ++ bdp++; ++ } ++ ++ if (bdp->cbd_sc & BD_ENET_TX_READY) { ++ rtnetif_stop_queue(rtdev); ++ fep->tx_full = 1; ++ } ++ ++ fep->cur_tx = (cbd_t *)bdp; ++ ++ rtdm_lock_put_irqrestore(&fep->lock, context); ++ ++ return 0; ++} ++ ++#ifdef ORIGINAL_VERSION ++static void ++fec_timeout(struct net_device *dev) ++{ ++ struct fec_enet_private *fep = rtdev->priv; ++ ++ if (fep->link || fep->old_link) { ++ /* Link status changed - print timeout message */ ++ printk("%s: transmit timed out.\n", rtdev->name); ++ } ++ ++ fep->stats.tx_errors++; ++#ifndef final_version ++ if (fep->link) { ++ int i; ++ cbd_t *bdp; ++ ++ printk ("Ring data dump: " ++ "cur_tx %p%s dirty_tx %p cur_rx %p\n", ++ fep->cur_tx, ++ fep->tx_full ? " (full)" : "", ++ fep->dirty_tx, ++ fep->cur_rx); ++ ++ bdp = fep->tx_bd_base; ++ printk(" tx: %u buffers\n", TX_RING_SIZE); ++ for (i = 0 ; i < TX_RING_SIZE; i++) { ++ printk(" %08x: %04x %04x %08x\n", ++ (uint) bdp, ++ bdp->cbd_sc, ++ bdp->cbd_datlen, ++ bdp->cbd_bufaddr); ++ bdp++; ++ } ++ ++ bdp = fep->rx_bd_base; ++ printk(" rx: %lu buffers\n", RX_RING_SIZE); ++ for (i = 0 ; i < RX_RING_SIZE; i++) { ++ printk(" %08x: %04x %04x %08x\n", ++ (uint) bdp, ++ bdp->cbd_sc, ++ bdp->cbd_datlen, ++ bdp->cbd_bufaddr); ++ bdp++; ++ } ++ } ++#endif ++ if (!fep->tx_full) { ++ netif_wake_queue(dev); ++ } ++} ++#endif /* ORIGINAL_VERSION */ ++ ++/* The interrupt handler. ++ * This is called from the MPC core interrupt. ++ */ ++static int fec_enet_interrupt(rtdm_irq_t *irq_handle) ++{ ++ struct rtnet_device *rtdev = rtdm_irq_get_arg(irq_handle, struct rtnet_device); ++ int packets = 0; ++ volatile fec_t *fecp; ++ uint int_events; ++ nanosecs_abs_t time_stamp = rtdm_clock_read(); ++ ++ ++ fecp = (volatile fec_t*)rtdev->base_addr; ++ ++ /* Get the interrupt events that caused us to be here. ++ */ ++ while ((int_events = fecp->fec_ievent) != 0) { ++ fecp->fec_ievent = int_events; ++ if ((int_events & (FEC_ENET_HBERR | FEC_ENET_BABR | ++ FEC_ENET_BABT | FEC_ENET_EBERR)) != 0) { ++ rtdm_printk("FEC ERROR %x\n", int_events); ++ } ++ ++ /* Handle receive event in its own function. ++ */ ++ if (int_events & FEC_ENET_RXF) { ++ fec_enet_rx(rtdev, &packets, &time_stamp); ++ } ++ ++ /* Transmit OK, or non-fatal error. Update the buffer ++ descriptors. FEC handles all errors, we just discover ++ them as part of the transmit process. ++ */ ++ if (int_events & FEC_ENET_TXF) { ++ fec_enet_tx(rtdev); ++ } ++ ++ if (int_events & FEC_ENET_MII) { ++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO ++ fec_enet_mii(dev); ++#else ++ rtdm_printk("%s[%d] %s: unexpected FEC_ENET_MII event\n", ++ __FILE__,__LINE__,__FUNCTION__); ++#endif /* CONFIG_XENO_DRIVERS_NET_USE_MDIO */ ++ } ++ ++ } ++ ++ if (packets > 0) ++ rt_mark_stack_mgr(rtdev); ++ return RTDM_IRQ_HANDLED; ++} ++ ++ ++static void ++fec_enet_tx(struct rtnet_device *rtdev) ++{ ++ struct rtskb *skb; ++ struct fec_enet_private *fep = rtdev->priv; ++ volatile cbd_t *bdp; ++ rtdm_lock_get(&fep->lock); ++ bdp = fep->dirty_tx; ++ ++ while ((bdp->cbd_sc&BD_ENET_TX_READY) == 0) { ++ if (bdp == fep->cur_tx && fep->tx_full == 0) break; ++ ++ skb = fep->tx_skbuff[fep->skb_dirty]; ++ /* Check for errors. */ ++ if (bdp->cbd_sc & (BD_ENET_TX_HB | BD_ENET_TX_LC | ++ BD_ENET_TX_RL | BD_ENET_TX_UN | ++ BD_ENET_TX_CSL)) { ++ fep->stats.tx_errors++; ++ if (bdp->cbd_sc & BD_ENET_TX_HB) /* No heartbeat */ ++ fep->stats.tx_heartbeat_errors++; ++ if (bdp->cbd_sc & BD_ENET_TX_LC) /* Late collision */ ++ fep->stats.tx_window_errors++; ++ if (bdp->cbd_sc & BD_ENET_TX_RL) /* Retrans limit */ ++ fep->stats.tx_aborted_errors++; ++ if (bdp->cbd_sc & BD_ENET_TX_UN) /* Underrun */ ++ fep->stats.tx_fifo_errors++; ++ if (bdp->cbd_sc & BD_ENET_TX_CSL) /* Carrier lost */ ++ fep->stats.tx_carrier_errors++; ++ } else { ++ fep->stats.tx_packets++; ++ } ++ ++#ifndef final_version ++ if (bdp->cbd_sc & BD_ENET_TX_READY) ++ rtdm_printk("HEY! Enet xmit interrupt and TX_READY.\n"); ++#endif ++ /* Deferred means some collisions occurred during transmit, ++ * but we eventually sent the packet OK. ++ */ ++ if (bdp->cbd_sc & BD_ENET_TX_DEF) ++ fep->stats.collisions++; ++ ++ /* Free the sk buffer associated with this last transmit. ++ */ ++ dev_kfree_rtskb(skb); ++ fep->tx_skbuff[fep->skb_dirty] = NULL; ++ fep->skb_dirty = (fep->skb_dirty + 1) & TX_RING_MOD_MASK; ++ ++ /* Update pointer to next buffer descriptor to be transmitted. ++ */ ++ if (bdp->cbd_sc & BD_ENET_TX_WRAP) ++ bdp = fep->tx_bd_base; ++ else ++ bdp++; ++ ++ /* Since we have freed up a buffer, the ring is no longer ++ * full. ++ */ ++ if (fep->tx_full) { ++ fep->tx_full = 0; ++ if (rtnetif_queue_stopped(rtdev)) ++ rtnetif_wake_queue(rtdev); ++ } ++ } ++ fep->dirty_tx = (cbd_t *)bdp; ++ rtdm_lock_put(&fep->lock); ++} ++ ++ ++/* During a receive, the cur_rx points to the current incoming buffer. ++ * When we update through the ring, if the next incoming buffer has ++ * not been given to the system, we just set the empty indicator, ++ * effectively tossing the packet. ++ */ ++static void ++fec_enet_rx(struct rtnet_device *rtdev, int *packets, nanosecs_abs_t *time_stamp) ++{ ++ struct fec_enet_private *fep; ++ volatile fec_t *fecp; ++ volatile cbd_t *bdp; ++ struct rtskb *skb; ++ ushort pkt_len; ++ __u8 *data; ++ ++ fep = rtdev->priv; ++ fecp = (volatile fec_t*)rtdev->base_addr; ++ ++ /* First, grab all of the stats for the incoming packet. ++ * These get messed up if we get called due to a busy condition. ++ */ ++ bdp = fep->cur_rx; ++ ++while (!(bdp->cbd_sc & BD_ENET_RX_EMPTY)) { ++ ++#ifndef final_version ++ /* Since we have allocated space to hold a complete frame, ++ * the last indicator should be set. ++ */ ++ if ((bdp->cbd_sc & BD_ENET_RX_LAST) == 0) ++ rtdm_printk("FEC ENET: rcv is not +last\n"); ++#endif ++ ++ /* Check for errors. */ ++ if (bdp->cbd_sc & (BD_ENET_RX_LG | BD_ENET_RX_SH | BD_ENET_RX_NO | ++ BD_ENET_RX_CR | BD_ENET_RX_OV)) { ++ fep->stats.rx_errors++; ++ if (bdp->cbd_sc & (BD_ENET_RX_LG | BD_ENET_RX_SH)) { ++ /* Frame too long or too short. */ ++ fep->stats.rx_length_errors++; ++ } ++ if (bdp->cbd_sc & BD_ENET_RX_NO) /* Frame alignment */ ++ fep->stats.rx_frame_errors++; ++ if (bdp->cbd_sc & BD_ENET_RX_CR) /* CRC Error */ ++ fep->stats.rx_crc_errors++; ++ if (bdp->cbd_sc & BD_ENET_RX_OV) /* FIFO overrun */ ++ fep->stats.rx_crc_errors++; ++ } ++ ++ /* Report late collisions as a frame error. ++ * On this error, the BD is closed, but we don't know what we ++ * have in the buffer. So, just drop this frame on the floor. ++ */ ++ if (bdp->cbd_sc & BD_ENET_RX_CL) { ++ fep->stats.rx_errors++; ++ fep->stats.rx_frame_errors++; ++ goto rx_processing_done; ++ } ++ ++ /* Process the incoming frame. ++ */ ++ fep->stats.rx_packets++; ++ pkt_len = bdp->cbd_datlen; ++ fep->stats.rx_bytes += pkt_len; ++ data = fep->rx_vaddr[bdp - fep->rx_bd_base]; ++ ++ /* This does 16 byte alignment, exactly what we need. ++ * The packet length includes FCS, but we don't want to ++ * include that when passing upstream as it messes up ++ * bridging applications. ++ */ ++ skb = rtnetdev_alloc_rtskb(rtdev, pkt_len-4); ++ ++ if (skb == NULL) { ++ rtdm_printk("%s: Memory squeeze, dropping packet.\n", rtdev->name); ++ fep->stats.rx_dropped++; ++ } else { ++ rtskb_put(skb,pkt_len-4); /* Make room */ ++ memcpy(skb->data, data, pkt_len-4); ++ skb->protocol=rt_eth_type_trans(skb,rtdev); ++ skb->time_stamp = *time_stamp; ++ rtnetif_rx(skb); ++ (*packets)++; ++ } ++rx_processing_done: ++ ++ /* Clear the status flags for this buffer. ++ */ ++ bdp->cbd_sc &= ~BD_ENET_RX_STATS; ++ ++ /* Mark the buffer empty. ++ */ ++ bdp->cbd_sc |= BD_ENET_RX_EMPTY; ++ ++ /* Update BD pointer to next entry. ++ */ ++ if (bdp->cbd_sc & BD_ENET_RX_WRAP) ++ bdp = fep->rx_bd_base; ++ else ++ bdp++; ++ ++ /* Doing this here will keep the FEC running while we process ++ * incoming frames. On a heavily loaded network, we should be ++ * able to keep up at the expense of system resources. ++ */ ++ fecp->fec_r_des_active = 0x01000000; ++ } /* while (!(bdp->cbd_sc & BD_ENET_RX_EMPTY)) */ ++ fep->cur_rx = (cbd_t *)bdp; ++ ++} ++ ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO ++static void ++fec_enet_mii(struct net_device *dev) ++{ ++ struct fec_enet_private *fep; ++ volatile fec_t *ep; ++ mii_list_t *mip; ++ uint mii_reg; ++ ++ fep = (struct fec_enet_private *)dev->priv; ++ ep = &(((immap_t *)IMAP_ADDR)->im_cpm.cp_fec); ++ mii_reg = ep->fec_mii_data; ++ ++ if ((mip = mii_head) == NULL) { ++ printk("MII and no head!\n"); ++ return; ++ } ++ ++ if (mip->mii_func != NULL) ++ (*(mip->mii_func))(mii_reg, dev, mip->mii_data); ++ ++ mii_head = mip->mii_next; ++ mip->mii_next = mii_free; ++ mii_free = mip; ++ ++ if ((mip = mii_head) != NULL) { ++ ep->fec_mii_data = mip->mii_regval; ++ } ++} ++ ++static int ++mii_queue(struct net_device *dev, int regval, void (*func)(uint, struct net_device *, uint), uint data) ++{ ++ struct fec_enet_private *fep; ++ unsigned long flags; ++ mii_list_t *mip; ++ int retval; ++ ++ /* Add PHY address to register command. ++ */ ++ fep = dev->priv; ++ regval |= fep->phy_addr << 23; ++ ++ retval = 0; ++ ++ save_flags(flags); ++ cli(); ++ ++ if ((mip = mii_free) != NULL) { ++ mii_free = mip->mii_next; ++ mip->mii_regval = regval; ++ mip->mii_func = func; ++ mip->mii_next = NULL; ++ mip->mii_data = data; ++ if (mii_head) { ++ mii_tail->mii_next = mip; ++ mii_tail = mip; ++ } else { ++ mii_head = mii_tail = mip; ++ (&(((immap_t *)IMAP_ADDR)->im_cpm.cp_fec))->fec_mii_data = regval; ++ } ++ } else { ++ retval = 1; ++ } ++ ++ restore_flags(flags); ++ ++ return(retval); ++} ++ ++static void mii_do_cmd(struct net_device *dev, const phy_cmd_t *c) ++{ ++ int k; ++ ++ if(!c) ++ return; ++ ++ for(k = 0; (c+k)->mii_data != mk_mii_end; k++) ++ mii_queue(dev, (c+k)->mii_data, (c+k)->funct, 0); ++} ++ ++static void mii_parse_sr(uint mii_reg, struct net_device *dev, uint data) ++{ ++ volatile struct fec_enet_private *fep = dev->priv; ++ uint s = fep->phy_status; ++ ++ s &= ~(PHY_STAT_LINK | PHY_STAT_FAULT | PHY_STAT_ANC); ++ ++ if (mii_reg & 0x0004) ++ s |= PHY_STAT_LINK; ++ if (mii_reg & 0x0010) ++ s |= PHY_STAT_FAULT; ++ if (mii_reg & 0x0020) ++ s |= PHY_STAT_ANC; ++ ++ fep->phy_status = s; ++ fep->link = (s & PHY_STAT_LINK) ? 1 : 0; ++} ++ ++static void mii_parse_cr(uint mii_reg, struct net_device *dev, uint data) ++{ ++ volatile struct fec_enet_private *fep = dev->priv; ++ uint s = fep->phy_status; ++ ++ s &= ~(PHY_CONF_ANE | PHY_CONF_LOOP); ++ ++ if (mii_reg & 0x1000) ++ s |= PHY_CONF_ANE; ++ if (mii_reg & 0x4000) ++ s |= PHY_CONF_LOOP; ++ ++ fep->phy_status = s; ++} ++ ++static void mii_parse_anar(uint mii_reg, struct net_device *dev, uint data) ++{ ++ volatile struct fec_enet_private *fep = dev->priv; ++ uint s = fep->phy_status; ++ ++ s &= ~(PHY_CONF_SPMASK); ++ ++ if (mii_reg & 0x0020) ++ s |= PHY_CONF_10HDX; ++ if (mii_reg & 0x0040) ++ s |= PHY_CONF_10FDX; ++ if (mii_reg & 0x0080) ++ s |= PHY_CONF_100HDX; ++ if (mii_reg & 0x0100) ++ s |= PHY_CONF_100FDX; ++ ++ fep->phy_status = s; ++} ++ ++/* ------------------------------------------------------------------------- */ ++/* The Level one LXT970 is used by many boards */ ++ ++#ifdef CONFIG_FEC_LXT970 ++ ++#define MII_LXT970_MIRROR 16 /* Mirror register */ ++#define MII_LXT970_IER 17 /* Interrupt Enable Register */ ++#define MII_LXT970_ISR 18 /* Interrupt Status Register */ ++#define MII_LXT970_CONFIG 19 /* Configuration Register */ ++#define MII_LXT970_CSR 20 /* Chip Status Register */ ++ ++static void mii_parse_lxt970_csr(uint mii_reg, struct net_device *dev, uint data) ++{ ++ volatile struct fec_enet_private *fep = dev->priv; ++ uint s = fep->phy_status; ++ ++ s &= ~(PHY_STAT_SPMASK); ++ ++ if (mii_reg & 0x0800) { ++ if (mii_reg & 0x1000) ++ s |= PHY_STAT_100FDX; ++ else ++ s |= PHY_STAT_100HDX; ++ } ++ else { ++ if (mii_reg & 0x1000) ++ s |= PHY_STAT_10FDX; ++ else ++ s |= PHY_STAT_10HDX; ++ } ++ ++ fep->phy_status = s; ++} ++ ++static phy_info_t phy_info_lxt970 = { ++ 0x07810000, ++ "LXT970", ++ ++ (const phy_cmd_t []) { /* config */ ++ { mk_mii_read(MII_REG_CR), mii_parse_cr }, ++ { mk_mii_read(MII_REG_ANAR), mii_parse_anar }, ++ { mk_mii_end, } ++ }, ++ (const phy_cmd_t []) { /* startup - enable interrupts */ ++ { mk_mii_write(MII_LXT970_IER, 0x0002), NULL }, ++ { mk_mii_write(MII_REG_CR, 0x1200), NULL }, /* autonegotiate */ ++ { mk_mii_end, } ++ }, ++ (const phy_cmd_t []) { /* ack_int */ ++ /* read SR and ISR to acknowledge */ ++ ++ { mk_mii_read(MII_REG_SR), mii_parse_sr }, ++ { mk_mii_read(MII_LXT970_ISR), NULL }, ++ ++ /* find out the current status */ ++ ++ { mk_mii_read(MII_LXT970_CSR), mii_parse_lxt970_csr }, ++ { mk_mii_end, } ++ }, ++ (const phy_cmd_t []) { /* shutdown - disable interrupts */ ++ { mk_mii_write(MII_LXT970_IER, 0x0000), NULL }, ++ { mk_mii_end, } ++ }, ++}; ++ ++#endif /* CONFIG_FEC_LXT970 */ ++ ++/* ------------------------------------------------------------------------- */ ++/* The Level one LXT971 is used on some of my custom boards */ ++ ++#ifdef CONFIG_FEC_LXT971 ++ ++/* register definitions for the 971 */ ++ ++#define MII_LXT971_PCR 16 /* Port Control Register */ ++#define MII_LXT971_SR2 17 /* Status Register 2 */ ++#define MII_LXT971_IER 18 /* Interrupt Enable Register */ ++#define MII_LXT971_ISR 19 /* Interrupt Status Register */ ++#define MII_LXT971_LCR 20 /* LED Control Register */ ++#define MII_LXT971_TCR 30 /* Transmit Control Register */ ++ ++/* ++ * I had some nice ideas of running the MDIO faster... ++ * The 971 should support 8MHz and I tried it, but things acted really ++ * weird, so 2.5 MHz ought to be enough for anyone... ++ */ ++ ++static void mii_parse_lxt971_sr2(uint mii_reg, struct net_device *dev, uint data) ++{ ++ volatile struct fec_enet_private *fep = dev->priv; ++ uint s = fep->phy_status; ++ ++ s &= ~(PHY_STAT_SPMASK); ++ ++ if (mii_reg & 0x4000) { ++ if (mii_reg & 0x0200) ++ s |= PHY_STAT_100FDX; ++ else ++ s |= PHY_STAT_100HDX; ++ } ++ else { ++ if (mii_reg & 0x0200) ++ s |= PHY_STAT_10FDX; ++ else ++ s |= PHY_STAT_10HDX; ++ } ++ if (mii_reg & 0x0008) ++ s |= PHY_STAT_FAULT; ++ ++ fep->phy_status = s; ++} ++ ++static phy_info_t phy_info_lxt971 = { ++ 0x0001378e, ++ "LXT971", ++ ++ (const phy_cmd_t []) { /* config */ ++// { mk_mii_write(MII_REG_ANAR, 0x021), NULL }, /* 10 Mbps, HD */ ++ { mk_mii_read(MII_REG_CR), mii_parse_cr }, ++ { mk_mii_read(MII_REG_ANAR), mii_parse_anar }, ++ { mk_mii_end, } ++ }, ++ (const phy_cmd_t []) { /* startup - enable interrupts */ ++ { mk_mii_write(MII_LXT971_IER, 0x00f2), NULL }, ++ { mk_mii_write(MII_REG_CR, 0x1200), NULL }, /* autonegotiate */ ++ ++ /* Somehow does the 971 tell me that the link is down ++ * the first read after power-up. ++ * read here to get a valid value in ack_int */ ++ ++ { mk_mii_read(MII_REG_SR), mii_parse_sr }, ++ { mk_mii_end, } ++ }, ++ (const phy_cmd_t []) { /* ack_int */ ++ /* find out the current status */ ++ ++ { mk_mii_read(MII_REG_SR), mii_parse_sr }, ++ { mk_mii_read(MII_LXT971_SR2), mii_parse_lxt971_sr2 }, ++ ++ /* we only need to read ISR to acknowledge */ ++ ++ { mk_mii_read(MII_LXT971_ISR), NULL }, ++ { mk_mii_end, } ++ }, ++ (const phy_cmd_t []) { /* shutdown - disable interrupts */ ++ { mk_mii_write(MII_LXT971_IER, 0x0000), NULL }, ++ { mk_mii_end, } ++ }, ++}; ++ ++#endif /* CONFIG_FEC_LXT971 */ ++ ++ ++/* ------------------------------------------------------------------------- */ ++/* The Quality Semiconductor QS6612 is used on the RPX CLLF */ ++ ++#ifdef CONFIG_FEC_QS6612 ++ ++/* register definitions */ ++ ++#define MII_QS6612_MCR 17 /* Mode Control Register */ ++#define MII_QS6612_FTR 27 /* Factory Test Register */ ++#define MII_QS6612_MCO 28 /* Misc. Control Register */ ++#define MII_QS6612_ISR 29 /* Interrupt Source Register */ ++#define MII_QS6612_IMR 30 /* Interrupt Mask Register */ ++#define MII_QS6612_PCR 31 /* 100BaseTx PHY Control Reg. */ ++ ++static void mii_parse_qs6612_pcr(uint mii_reg, struct net_device *dev, uint data) ++{ ++ volatile struct fec_enet_private *fep = dev->priv; ++ uint s = fep->phy_status; ++ ++ s &= ~(PHY_STAT_SPMASK); ++ ++ switch((mii_reg >> 2) & 7) { ++ case 1: s |= PHY_STAT_10HDX; break; ++ case 2: s |= PHY_STAT_100HDX; break; ++ case 5: s |= PHY_STAT_10FDX; break; ++ case 6: s |= PHY_STAT_100FDX; break; ++ } ++ ++ fep->phy_status = s; ++} ++ ++static phy_info_t phy_info_qs6612 = { ++ 0x00181440, ++ "QS6612", ++ ++ (const phy_cmd_t []) { /* config */ ++// { mk_mii_write(MII_REG_ANAR, 0x061), NULL }, /* 10 Mbps */ ++ ++ /* The PHY powers up isolated on the RPX, ++ * so send a command to allow operation. ++ */ ++ ++ { mk_mii_write(MII_QS6612_PCR, 0x0dc0), NULL }, ++ ++ /* parse cr and anar to get some info */ ++ ++ { mk_mii_read(MII_REG_CR), mii_parse_cr }, ++ { mk_mii_read(MII_REG_ANAR), mii_parse_anar }, ++ { mk_mii_end, } ++ }, ++ (const phy_cmd_t []) { /* startup - enable interrupts */ ++ { mk_mii_write(MII_QS6612_IMR, 0x003a), NULL }, ++ { mk_mii_write(MII_REG_CR, 0x1200), NULL }, /* autonegotiate */ ++ { mk_mii_end, } ++ }, ++ (const phy_cmd_t []) { /* ack_int */ ++ ++ /* we need to read ISR, SR and ANER to acknowledge */ ++ ++ { mk_mii_read(MII_QS6612_ISR), NULL }, ++ { mk_mii_read(MII_REG_SR), mii_parse_sr }, ++ { mk_mii_read(MII_REG_ANER), NULL }, ++ ++ /* read pcr to get info */ ++ ++ { mk_mii_read(MII_QS6612_PCR), mii_parse_qs6612_pcr }, ++ { mk_mii_end, } ++ }, ++ (const phy_cmd_t []) { /* shutdown - disable interrupts */ ++ { mk_mii_write(MII_QS6612_IMR, 0x0000), NULL }, ++ { mk_mii_end, } ++ }, ++}; ++ ++#endif /* CONFIG_FEC_QS6612 */ ++ ++/* ------------------------------------------------------------------------- */ ++/* The Advanced Micro Devices AM79C874 is used on the ICU862 */ ++ ++#ifdef CONFIG_FEC_AM79C874 ++ ++/* register definitions for the 79C874 */ ++ ++#define MII_AM79C874_MFR 16 /* Miscellaneous Features Register */ ++#define MII_AM79C874_ICSR 17 /* Interrupt Control/Status Register */ ++#define MII_AM79C874_DR 18 /* Diagnostic Register */ ++#define MII_AM79C874_PMLR 19 /* Power Management & Loopback Register */ ++#define MII_AM79C874_MCR 21 /* Mode Control Register */ ++#define MII_AM79C874_DC 23 /* Disconnect Counter */ ++#define MII_AM79C874_REC 24 /* Receiver Error Counter */ ++ ++static void mii_parse_amd79c874_dr(uint mii_reg, struct net_device *dev, uint data) ++{ ++ volatile struct fec_enet_private *fep = dev->priv; ++ uint s = fep->phy_status; ++ ++ s &= ~(PHY_STAT_SPMASK); ++ ++ /* Register 18: Bit 10 is data rate, 11 is Duplex */ ++ switch ((mii_reg >> 10) & 3) { ++ case 0: s |= PHY_STAT_10HDX; break; ++ case 1: s |= PHY_STAT_100HDX; break; ++ case 2: s |= PHY_STAT_10FDX; break; ++ case 3: s |= PHY_STAT_100FDX; break; ++ } ++ ++ fep->phy_status = s; ++} ++ ++static phy_info_t phy_info_amd79c874 = { ++ 0x00022561, ++ "AM79C874", ++ ++ (const phy_cmd_t []) { /* config */ ++// { mk_mii_write(MII_REG_ANAR, 0x021), NULL }, /* 10 Mbps, HD */ ++ { mk_mii_read(MII_REG_CR), mii_parse_cr }, ++ { mk_mii_read(MII_REG_ANAR), mii_parse_anar }, ++ { mk_mii_end, } ++ }, ++ (const phy_cmd_t []) { /* startup - enable interrupts */ ++ { mk_mii_write(MII_AM79C874_ICSR, 0xff00), NULL }, ++ { mk_mii_write(MII_REG_CR, 0x1200), NULL }, /* autonegotiate */ ++ { mk_mii_end, } ++ }, ++ (const phy_cmd_t []) { /* ack_int */ ++ /* find out the current status */ ++ ++ { mk_mii_read(MII_REG_SR), mii_parse_sr }, ++ { mk_mii_read(MII_AM79C874_DR), mii_parse_amd79c874_dr }, ++ ++ /* we only need to read ICSR to acknowledge */ ++ ++ { mk_mii_read(MII_AM79C874_ICSR), NULL }, ++ { mk_mii_end, } ++ }, ++ (const phy_cmd_t []) { /* shutdown - disable interrupts */ ++ { mk_mii_write(MII_AM79C874_ICSR, 0x0000), NULL }, ++ { mk_mii_end, } ++ }, ++}; ++ ++#endif /* CONFIG_FEC_AM79C874 */ ++ ++/* -------------------------------------------------------------------- */ ++/* The National Semiconductor DP83843BVJE is used on a Mediatrix board */ ++/* -------------------------------------------------------------------- */ ++ ++#ifdef CONFIG_FEC_DP83843 ++ ++/* Register definitions */ ++#define MII_DP83843_PHYSTS 0x10 /* PHY Status Register */ ++#define MII_DP83843_MIPSCR 0x11 /* Specific Status Register */ ++#define MII_DP83843_MIPGSR 0x12 /* Generic Status Register */ ++ ++static void mii_parse_dp83843_physts(uint mii_reg, struct net_device *dev, uint data) ++{ ++ volatile struct fec_enet_private *fep = dev->priv; ++ uint s = fep->phy_status; ++ ++ s &= ~(PHY_STAT_SPMASK); ++ ++ if (mii_reg & 0x0002) ++ { ++ if (mii_reg & 0x0004) ++ s |= PHY_STAT_10FDX; ++ else ++ s |= PHY_STAT_10HDX; ++ } ++ else ++ { ++ if (mii_reg & 0x0004) ++ s |= PHY_STAT_100FDX; ++ else ++ s |= PHY_STAT_100HDX; ++ } ++ ++ fep->phy_status = s; ++} ++ ++static phy_info_t phy_info_dp83843 = { ++ 0x020005c1, ++ "DP83843BVJE", ++ ++ (const phy_cmd_t []) { /* config */ ++ { mk_mii_write(MII_REG_ANAR, 0x01E1), NULL }, /* Auto-Negociation Register Control set to */ ++ /* auto-negociate 10/100MBps, Half/Full duplex */ ++ { mk_mii_read(MII_REG_CR), mii_parse_cr }, ++ { mk_mii_read(MII_REG_ANAR), mii_parse_anar }, ++ { mk_mii_end, } ++ }, ++ (const phy_cmd_t []) { /* startup */ ++ { mk_mii_write(MII_DP83843_MIPSCR, 0x0002), NULL }, /* Enable interrupts */ ++ { mk_mii_write(MII_REG_CR, 0x1200), NULL }, /* Enable and Restart Auto-Negotiation */ ++ { mk_mii_read(MII_REG_SR), mii_parse_sr }, ++ { mk_mii_read(MII_REG_CR), mii_parse_cr }, ++ { mk_mii_read(MII_DP83843_PHYSTS), mii_parse_dp83843_physts }, ++ { mk_mii_end, } ++ }, ++ (const phy_cmd_t []) { /* ack_int */ ++ { mk_mii_read(MII_DP83843_MIPGSR), NULL }, /* Acknowledge interrupts */ ++ { mk_mii_read(MII_REG_SR), mii_parse_sr }, /* Find out the current status */ ++ { mk_mii_read(MII_REG_CR), mii_parse_cr }, ++ { mk_mii_read(MII_DP83843_PHYSTS), mii_parse_dp83843_physts }, ++ { mk_mii_end, } ++ }, ++ (const phy_cmd_t []) { /* shutdown - disable interrupts */ ++ { mk_mii_end, } ++ } ++}; ++ ++#endif /* CONFIG_FEC_DP83843 */ ++ ++ ++/* ----------------------------------------------------------------- */ ++/* The National Semiconductor DP83846A is used on a Mediatrix board */ ++/* ----------------------------------------------------------------- */ ++ ++#ifdef CONFIG_FEC_DP83846A ++ ++/* Register definitions */ ++#define MII_DP83846A_PHYSTS 0x10 /* PHY Status Register */ ++ ++static void mii_parse_dp83846a_physts(uint mii_reg, struct net_device *dev, uint data) ++{ ++ volatile struct fec_enet_private *fep = (struct fec_enet_private *)dev->priv; ++ uint s = fep->phy_status; ++ int link_change_mask; ++ ++ s &= ~(PHY_STAT_SPMASK); ++ ++ if (mii_reg & 0x0002) { ++ if (mii_reg & 0x0004) ++ s |= PHY_STAT_10FDX; ++ else ++ s |= PHY_STAT_10HDX; ++ } ++ else { ++ if (mii_reg & 0x0004) ++ s |= PHY_STAT_100FDX; ++ else ++ s |= PHY_STAT_100HDX; ++ } ++ ++ fep->phy_status = s; ++ ++ link_change_mask = PHY_STAT_LINK | PHY_STAT_10FDX | PHY_STAT_10HDX | PHY_STAT_100FDX | PHY_STAT_100HDX; ++ if(fep->old_status != (link_change_mask & s)) ++ { ++ fep->old_status = (link_change_mask & s); ++ mii_queue_relink(mii_reg, dev, 0); ++ } ++} ++ ++static phy_info_t phy_info_dp83846a = { ++ 0x020005c2, ++ "DP83846A", ++ ++ (const phy_cmd_t []) { /* config */ ++ { mk_mii_write(MII_REG_ANAR, 0x01E1), NULL }, /* Auto-Negociation Register Control set to */ ++ /* auto-negociate 10/100MBps, Half/Full duplex */ ++ { mk_mii_read(MII_REG_CR), mii_parse_cr }, ++ { mk_mii_read(MII_REG_ANAR), mii_parse_anar }, ++ { mk_mii_end, } ++ }, ++ (const phy_cmd_t []) { /* startup */ ++ { mk_mii_write(MII_REG_CR, 0x1200), NULL }, /* Enable and Restart Auto-Negotiation */ ++ { mk_mii_read(MII_REG_SR), mii_parse_sr }, ++ { mk_mii_read(MII_REG_CR), mii_parse_cr }, ++ { mk_mii_read(MII_DP83846A_PHYSTS), mii_parse_dp83846a_physts }, ++ { mk_mii_end, } ++ }, ++ (const phy_cmd_t []) { /* ack_int */ ++ { mk_mii_read(MII_REG_SR), mii_parse_sr }, ++ { mk_mii_read(MII_REG_CR), mii_parse_cr }, ++ { mk_mii_read(MII_DP83846A_PHYSTS), mii_parse_dp83846a_physts }, ++ { mk_mii_end, } ++ }, ++ (const phy_cmd_t []) { /* shutdown - disable interrupts */ ++ { mk_mii_end, } ++ } ++}; ++ ++#endif /* CONFIG_FEC_DP83846A */ ++ ++ ++static phy_info_t *phy_info[] = { ++ ++#ifdef CONFIG_FEC_LXT970 ++ &phy_info_lxt970, ++#endif /* CONFIG_FEC_LXT970 */ ++ ++#ifdef CONFIG_FEC_LXT971 ++ &phy_info_lxt971, ++#endif /* CONFIG_FEC_LXT971 */ ++ ++#ifdef CONFIG_FEC_QS6612 ++ &phy_info_qs6612, ++#endif /* CONFIG_FEC_QS6612 */ ++ ++#ifdef CONFIG_FEC_AM79C874 ++ &phy_info_amd79c874, ++#endif /* CONFIG_FEC_AM79C874 */ ++ ++#ifdef CONFIG_FEC_DP83843 ++ &phy_info_dp83843, ++#endif /* CONFIG_FEC_DP83843 */ ++ ++#ifdef CONFIG_FEC_DP83846A ++ &phy_info_dp83846a, ++#endif /* CONFIG_FEC_DP83846A */ ++ ++ NULL ++}; ++ ++static void mii_display_status(struct net_device *dev) ++{ ++ volatile struct fec_enet_private *fep = dev->priv; ++ uint s = fep->phy_status; ++ ++ if (!fep->link && !fep->old_link) { ++ /* Link is still down - don't print anything */ ++ return; ++ } ++ ++ printk("%s: status: ", dev->name); ++ ++ if (!fep->link) { ++ printk("link down"); ++ } else { ++ printk("link up"); ++ ++ switch(s & PHY_STAT_SPMASK) { ++ case PHY_STAT_100FDX: printk(", 100 Mbps Full Duplex"); break; ++ case PHY_STAT_100HDX: printk(", 100 Mbps Half Duplex"); break; ++ case PHY_STAT_10FDX: printk(", 10 Mbps Full Duplex"); break; ++ case PHY_STAT_10HDX: printk(", 10 Mbps Half Duplex"); break; ++ default: ++ printk(", Unknown speed/duplex"); ++ } ++ ++ if (s & PHY_STAT_ANC) ++ printk(", auto-negotiation complete"); ++ } ++ ++ if (s & PHY_STAT_FAULT) ++ printk(", remote fault"); ++ ++ printk(".\n"); ++} ++ ++static void mii_display_config(struct net_device *dev) ++{ ++ volatile struct fec_enet_private *fep = dev->priv; ++ uint s = fep->phy_status; ++ ++ printk("%s: config: auto-negotiation ", dev->name); ++ ++ if (s & PHY_CONF_ANE) ++ printk("on"); ++ else ++ printk("off"); ++ ++ if (s & PHY_CONF_100FDX) ++ printk(", 100FDX"); ++ if (s & PHY_CONF_100HDX) ++ printk(", 100HDX"); ++ if (s & PHY_CONF_10FDX) ++ printk(", 10FDX"); ++ if (s & PHY_CONF_10HDX) ++ printk(", 10HDX"); ++ if (!(s & PHY_CONF_SPMASK)) ++ printk(", No speed/duplex selected?"); ++ ++ if (s & PHY_CONF_LOOP) ++ printk(", loopback enabled"); ++ ++ printk(".\n"); ++ ++ fep->sequence_done = 1; ++} ++ ++static void mii_relink(struct net_device *dev) ++{ ++ struct fec_enet_private *fep = dev->priv; ++ int duplex; ++ ++ fep->link = (fep->phy_status & PHY_STAT_LINK) ? 1 : 0; ++ mii_display_status(dev); ++ fep->old_link = fep->link; ++ ++ if (fep->link) { ++ duplex = 0; ++ if (fep->phy_status ++ & (PHY_STAT_100FDX | PHY_STAT_10FDX)) ++ duplex = 1; ++ fec_restart(dev, duplex); ++ ++ if (netif_queue_stopped(dev)) { ++ netif_wake_queue(dev); ++ } ++ } else { ++ netif_stop_queue(dev); ++ fec_stop(dev); ++ } ++} ++ ++static void mii_queue_relink(uint mii_reg, struct net_device *dev, uint data) ++{ ++ struct fec_enet_private *fep = dev->priv; ++ ++ fep->phy_task.routine = (void *)mii_relink; ++ fep->phy_task.data = dev; ++ schedule_task(&fep->phy_task); ++} ++ ++static void mii_queue_config(uint mii_reg, struct net_device *dev, uint data) ++{ ++ struct fec_enet_private *fep = dev->priv; ++ ++ fep->phy_task.routine = (void *)mii_display_config; ++ fep->phy_task.data = dev; ++ schedule_task(&fep->phy_task); ++} ++ ++ ++ ++phy_cmd_t phy_cmd_relink[] = { { mk_mii_read(MII_REG_CR), mii_queue_relink }, ++ { mk_mii_end, } }; ++phy_cmd_t phy_cmd_config[] = { { mk_mii_read(MII_REG_CR), mii_queue_config }, ++ { mk_mii_end, } }; ++ ++ ++ ++/* Read remainder of PHY ID. ++*/ ++static void ++mii_discover_phy3(uint mii_reg, struct net_device *dev, uint data) ++{ ++ struct fec_enet_private *fep; ++ int i; ++ ++ fep = dev->priv; ++ fep->phy_id |= (mii_reg & 0xffff); ++ ++ for(i = 0; phy_info[i]; i++) ++ if(phy_info[i]->id == (fep->phy_id >> 4)) ++ break; ++ ++ if(!phy_info[i]) ++ panic("%s: PHY id 0x%08x is not supported!\n", ++ dev->name, fep->phy_id); ++ ++ fep->phy = phy_info[i]; ++ fep->phy_id_done = 1; ++ ++ printk("%s: Phy @ 0x%x, type %s (0x%08x)\n", ++ dev->name, fep->phy_addr, fep->phy->name, fep->phy_id); ++} ++ ++/* Scan all of the MII PHY addresses looking for someone to respond ++ * with a valid ID. This usually happens quickly. ++ */ ++static void ++mii_discover_phy(uint mii_reg, struct net_device *dev, uint data) ++{ ++ struct fec_enet_private *fep; ++ uint phytype; ++ ++ fep = dev->priv; ++ ++ if ((phytype = (mii_reg & 0xffff)) != 0xffff) { ++ ++ /* Got first part of ID, now get remainder. ++ */ ++ fep->phy_id = phytype << 16; ++ mii_queue(dev, mk_mii_read(MII_REG_PHYIR2), mii_discover_phy3, 0); ++ } else { ++ fep->phy_addr++; ++ if (fep->phy_addr < 32) { ++ mii_queue(dev, mk_mii_read(MII_REG_PHYIR1), ++ mii_discover_phy, 0); ++ } else { ++ printk("fec: No PHY device found.\n"); ++ } ++ } ++} ++#endif /* CONFIG_XENO_DRIVERS_NET_USE_MDIO */ ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO ++/* This interrupt occurs when the PHY detects a link change. ++*/ ++static void ++#ifdef CONFIG_RPXCLASSIC ++mii_link_interrupt(void *dev_id) ++#else ++mii_link_interrupt(int irq, void * dev_id, struct pt_regs * regs) ++#endif ++{ ++ struct net_device *dev = dev_id; ++ struct fec_enet_private *fep = dev->priv; ++ volatile immap_t *immap = (immap_t *)IMAP_ADDR; ++ volatile fec_t *fecp = &(immap->im_cpm.cp_fec); ++ unsigned int ecntrl = fecp->fec_ecntrl; ++ ++ /* ++ * Acknowledge the interrupt if possible. If we have not ++ * found the PHY yet we can't process or acknowledge the ++ * interrupt now. Instead we ignore this interrupt for now, ++ * which we can do since it is edge triggered. It will be ++ * acknowledged later by fec_enet_open(). ++ */ ++ if (fep->phy) { ++ /* ++ * We need the FEC enabled to access the MII ++ */ ++ if ((ecntrl & FEC_ECNTRL_ETHER_EN) == 0) { ++ fecp->fec_ecntrl |= FEC_ECNTRL_ETHER_EN; ++ } ++ ++ mii_do_cmd(dev, fep->phy->ack_int); ++ mii_do_cmd(dev, phy_cmd_relink); /* restart and display status */ ++ ++ if ((ecntrl & FEC_ECNTRL_ETHER_EN) == 0) { ++ fecp->fec_ecntrl = ecntrl; /* restore old settings */ ++ } ++ } ++ ++} ++#endif /* CONFIG_XENO_DRIVERS_NET_USE_MDIO */ ++ ++static int ++fec_enet_open(struct rtnet_device *rtdev) ++{ ++ struct fec_enet_private *fep = rtdev->priv; ++ ++ /* I should reset the ring buffers here, but I don't yet know ++ * a simple way to do that. ++ */ ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO ++ fep->sequence_done = 0; ++ fep->link = 0; ++ ++ if (fep->phy) { ++ mii_do_cmd(dev, fep->phy->config); ++ mii_do_cmd(dev, phy_cmd_config); /* display configuration */ ++ while(!fep->sequence_done) ++ schedule(); ++ ++ mii_do_cmd(dev, fep->phy->startup); ++ ++#if defined(CONFIG_XENO_DRIVERS_NET_USE_MDIO) && defined(CONFIG_FEC_DP83846A) ++ if(fep->phy == &phy_info_dp83846a) ++ { ++ /* Initializing timers ++ */ ++ init_timer( &fep->phy_timer_list ); ++ ++ /* Starting timer for periodic link status check ++ * After 100 milli-seconds, mdio_timer_callback function is called. ++ */ ++ fep->phy_timer_list.expires = jiffies + (100 * HZ / 1000); ++ fep->phy_timer_list.data = (unsigned long)dev; ++ fep->phy_timer_list.function = mdio_timer_callback; ++ add_timer( &fep->phy_timer_list ); ++ } ++ ++#if defined(CONFIG_IP_PNP) ++ rtdm_printk("%s: Waiting for the link to be up...\n", rtdev->name); ++ ++ while(fep->link == 0 || ((((volatile fec_t*)rtdev->base_addr)->fec_ecntrl & FEC_ECNTRL_ETHER_EN) == 0)) ++ { ++ schedule(); ++ } ++#endif /* CONFIG_IP_PNP */ ++ ++#endif /* CONFIG_XENO_DRIVERS_NET_USE_MDIO && CONFIG_FEC_DP83846A */ ++ ++ netif_start_queue(dev); ++ return 0; /* Success */ ++ } ++ return -ENODEV; /* No PHY we understand */ ++#else /* !CONFIG_XENO_DRIVERS_NET_USE_MDIO */ ++ fep->link = 1; ++ rtnetif_start_queue(rtdev); ++ ++ return 0; /* Success */ ++#endif /* CONFIG_XENO_DRIVERS_NET_USE_MDIO */ ++ ++} ++ ++static int ++fec_enet_close(struct rtnet_device *rtdev) ++{ ++ /* Don't know what to do yet. ++ */ ++ rtnetif_stop_queue(rtdev); ++ ++ fec_stop(rtdev); ++ ++ return 0; ++} ++ ++static struct net_device_stats *fec_enet_get_stats(struct rtnet_device *rtdev) ++{ ++ struct fec_enet_private *fep = (struct fec_enet_private *)rtdev->priv; ++ ++ return &fep->stats; ++} ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO ++ ++#if defined(CONFIG_FEC_DP83846A) ++/* Execute the ack_int command set and schedules next timer call back. */ ++static void mdio_timer_callback(unsigned long data) ++{ ++ struct net_device *dev = (struct net_device *)data; ++ struct fec_enet_private *fep = (struct fec_enet_private *)(dev->priv); ++ mii_do_cmd(dev, fep->phy->ack_int); ++ ++ if(fep->link == 0) ++ { ++ fep->phy_timer_list.expires = jiffies + (100 * HZ / 1000); /* Sleep for 100ms */ ++ } ++ else ++ { ++ fep->phy_timer_list.expires = jiffies + (1 * HZ); /* Sleep for 1 sec. */ ++ } ++ add_timer( &fep->phy_timer_list ); ++} ++#endif /* CONFIG_FEC_DP83846A */ ++ ++static void mdio_callback(uint regval, struct net_device *dev, uint data) ++{ ++ mdio_read_data_t* mrd = (mdio_read_data_t *)data; ++ mrd->regval = 0xFFFF & regval; ++ wake_up_process(mrd->sleeping_task); ++} ++ ++static int mdio_read(struct net_device *dev, int phy_id, int location) ++{ ++ uint retval; ++ mdio_read_data_t* mrd = (mdio_read_data_t *)kmalloc(sizeof(*mrd), GFP_KERNEL); ++ ++ mrd->sleeping_task = current; ++ set_current_state(TASK_INTERRUPTIBLE); ++ mii_queue(dev, mk_mii_read(location), mdio_callback, (unsigned int) mrd); ++ schedule(); ++ ++ retval = mrd->regval; ++ ++ kfree(mrd); ++ ++ return retval; ++} ++ ++void mdio_write(struct net_device *dev, int phy_id, int location, int value) ++{ ++ mii_queue(dev, mk_mii_write(location, value), NULL, 0); ++} ++ ++static int fec_enet_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) ++{ ++ struct fec_enet_private *cep = (struct fec_enet_private *)dev->priv; ++ struct mii_ioctl_data *data = (struct mii_ioctl_data *)&rq->ifr_data; ++ ++ int phy = cep->phy_addr & 0x1f; ++ int retval; ++ ++ if (data == NULL) ++ { ++ retval = -EINVAL; ++ } ++ else ++ { ++ switch(cmd) ++ { ++ case SIOCETHTOOL: ++ return netdev_ethtool_ioctl(dev, (void*)rq->ifr_data); ++ break; ++ ++ case SIOCGMIIPHY: /* Get address of MII PHY in use. */ ++ case SIOCDEVPRIVATE: /* for binary compat, remove in 2.5 */ ++ data->phy_id = phy; ++ ++ case SIOCGMIIREG: /* Read MII PHY register. */ ++ case SIOCDEVPRIVATE+1: /* for binary compat, remove in 2.5 */ ++ data->val_out = mdio_read(dev, data->phy_id & 0x1f, data->reg_num & 0x1f); ++ retval = 0; ++ break; ++ ++ case SIOCSMIIREG: /* Write MII PHY register. */ ++ case SIOCDEVPRIVATE+2: /* for binary compat, remove in 2.5 */ ++ if (!capable(CAP_NET_ADMIN)) ++ { ++ retval = -EPERM; ++ } ++ else ++ { ++ mdio_write(dev, data->phy_id & 0x1f, data->reg_num & 0x1f, data->val_in); ++ retval = 0; ++ } ++ break; ++ ++ default: ++ retval = -EOPNOTSUPP; ++ break; ++ } ++ } ++ return retval; ++} ++ ++ ++static int netdev_ethtool_ioctl (struct net_device *dev, void *useraddr) ++{ ++ u32 ethcmd; ++ ++ /* dev_ioctl() in ../../net/core/dev.c has already checked ++ capable(CAP_NET_ADMIN), so don't bother with that here. */ ++ ++ if (copy_from_user (ðcmd, useraddr, sizeof (ethcmd))) ++ return -EFAULT; ++ ++ switch (ethcmd) { ++ case ETHTOOL_GDRVINFO: ++ { ++ struct ethtool_drvinfo info = { ETHTOOL_GDRVINFO }; ++ strcpy (info.driver, dev->name); ++ strcpy (info.version, "0.3"); ++ strcpy (info.bus_info, ""); ++ if (copy_to_user (useraddr, &info, sizeof (info))) ++ return -EFAULT; ++ return 0; ++ } ++ default: ++ break; ++ } ++ ++ return -EOPNOTSUPP; ++} ++ ++#endif /* CONFIG_XENO_DRIVERS_NET_USE_MDIO */ ++ ++ ++#ifdef ORIGINAL_VERSION ++ ++/* Returns the CRC needed when filling in the hash table for ++ * multicast group filtering ++ * pAddr must point to a MAC address (6 bytes) ++ */ ++static u32 fec_mulicast_calc_crc(char *pAddr) ++{ ++ u8 byte; ++ int byte_count; ++ int bit_count; ++ u32 crc = 0xffffffff; ++ u8 msb; ++ ++ for (byte_count=0; byte_count<6; byte_count++) { ++ byte = pAddr[byte_count]; ++ for (bit_count=0; bit_count<8; bit_count++) { ++ msb = crc >> 31; ++ crc <<= 1; ++ if (msb ^ (byte & 0x1)) { ++ crc ^= FEC_CRC_POLY; ++ } ++ byte >>= 1; ++ } ++ } ++ return (crc); ++} ++ ++/* Set or clear the multicast filter for this adaptor. ++ * Skeleton taken from sunlance driver. ++ * The CPM Ethernet implementation allows Multicast as well as individual ++ * MAC address filtering. Some of the drivers check to make sure it is ++ * a group multicast address, and discard those that are not. I guess I ++ * will do the same for now, but just remove the test if you want ++ * individual filtering as well (do the upper net layers want or support ++ * this kind of feature?). ++ */ ++ ++static void set_multicast_list(struct net_device *dev) ++{ ++ struct fec_enet_private *fep; ++ volatile fec_t *ep; ++ ++ fep = (struct fec_enet_private *)dev->priv; ++ ep = &(((immap_t *)IMAP_ADDR)->im_cpm.cp_fec); ++ ++ if (dev->flags&IFF_PROMISC) { ++ ++ /* Log any net taps. */ ++ printk("%s: Promiscuous mode enabled.\n", dev->name); ++ ep->fec_r_cntrl |= FEC_RCNTRL_PROM; ++ } else { ++ ++ ep->fec_r_cntrl &= ~FEC_RCNTRL_PROM; ++ ++ if (dev->flags & IFF_ALLMULTI) { ++ /* Catch all multicast addresses, so set the ++ * filter to all 1's. ++ */ ++ ep->fec_hash_table_high = 0xffffffff; ++ ep->fec_hash_table_low = 0xffffffff; ++ } else { ++ struct dev_mc_list *pmc = dev->mc_list; ++ ++ /* Clear Hash-Table ++ */ ++ ep->fec_hash_table_high = 0; ++ ep->fec_hash_table_low = 0; ++ ++ /* Now populate the hash table ++ */ ++#ifdef DEBUG_MULTICAST ++ if (pmc) { ++ printk ("%s: Recalculating hash-table:\n", ++ dev->name); ++ printk (" MAC Address high low\n"); ++ } ++#endif ++ ++ while (pmc) { ++ u32 crc; ++ int temp; ++ u32 csrVal; ++ int hash_index; ++ ++ crc = fec_mulicast_calc_crc(pmc->dmi_addr); ++ temp = (crc & 0x3f) >> 1; ++ hash_index = ((temp & 0x01) << 4) | ++ ((temp & 0x02) << 2) | ++ ((temp & 0x04)) | ++ ((temp & 0x08) >> 2) | ++ ((temp & 0x10) >> 4); ++ csrVal = (1 << hash_index); ++ if (crc & 1) { ++ ep->fec_hash_table_high |= csrVal; ++ } ++ else { ++ ep->fec_hash_table_low |= csrVal; ++ } ++#ifdef DEBUG_MULTICAST ++ printk (" %02x:%02x:%02x:%02x:%02x:%02x %08x %08x\n", ++ (int)pmc->dmi_addr[0], ++ (int)pmc->dmi_addr[1], ++ (int)pmc->dmi_addr[2], ++ (int)pmc->dmi_addr[3], ++ (int)pmc->dmi_addr[4], ++ (int)pmc->dmi_addr[5], ++ ep->fec_hash_table_high, ++ ep->fec_hash_table_low ++ ); ++#endif ++ pmc = pmc->next; ++ } ++ } ++ } ++} ++#endif /* ORIGINAL_VERSION */ ++ ++/* Initialize the FEC Ethernet on 860T. ++ */ ++int __init fec_enet_init(void) ++{ ++ struct rtnet_device *rtdev = NULL; ++ struct fec_enet_private *fep; ++ int i, j, k; ++ unsigned char *eap, *iap, *ba; ++ unsigned long mem_addr; ++ volatile cbd_t *bdp; ++ cbd_t *cbd_base; ++ volatile immap_t *immap; ++ volatile fec_t *fecp; ++ bd_t *bd; ++ ++ immap = (immap_t *)IMAP_ADDR; /* pointer to internal registers */ ++ ++ bd = (bd_t *)__res; ++ ++ if (!rx_pool_size) ++ rx_pool_size = RX_RING_SIZE * 2; ++ ++ rtdev = rtdev_root = rt_alloc_etherdev(sizeof(struct fec_enet_private), ++ rx_pool_size + TX_RING_SIZE); ++ if (rtdev == NULL) { ++ printk(KERN_ERR "enet: Could not allocate ethernet device.\n"); ++ return -1; ++ } ++ rtdev_alloc_name(rtdev, "rteth%d"); ++ rt_rtdev_connect(rtdev, &RTDEV_manager); ++ rtdev->vers = RTDEV_VERS_2_0; ++ ++ fep = (struct fec_enet_private *)rtdev->priv; ++ fecp = &(immap->im_cpm.cp_fec); ++ ++ /* Whack a reset. We should wait for this. ++ */ ++ fecp->fec_ecntrl = FEC_ECNTRL_PINMUX | FEC_ECNTRL_RESET; ++ for (i = 0; ++ (fecp->fec_ecntrl & FEC_ECNTRL_RESET) && (i < FEC_RESET_DELAY); ++ ++i) { ++ udelay(1); ++ } ++ if (i == FEC_RESET_DELAY) { ++ printk ("FEC Reset timeout!\n"); ++ } ++ ++ /* Set the Ethernet address. If using multiple Enets on the 8xx, ++ * this needs some work to get unique addresses. ++ */ ++ eap = (unsigned char *)my_enet_addr; ++ iap = bd->bi_enetaddr; ++ ++#if defined(CONFIG_SCC_ENET) && !defined(ORIGINAL_VERSION) ++ /* ++ * If a board has Ethernet configured both on a SCC and the ++ * FEC, it needs (at least) 2 MAC addresses (we know that Sun ++ * disagrees, but anyway). For the FEC port, we create ++ * another address by setting one of the address bits above ++ * something that would have (up to now) been allocated. ++ */ ++ { ++ unsigned char tmpaddr[6]; ++ for (i=0; i<6; i++) ++ tmpaddr[i] = *iap++; ++ tmpaddr[3] |= 0x80; ++ iap = tmpaddr; ++ } ++#endif ++ ++ for (i=0; i<6; i++) { ++ rtdev->dev_addr[i] = *eap++ = *iap++; ++ } ++ ++ /* Allocate memory for buffer descriptors. ++ */ ++ if (((RX_RING_SIZE + TX_RING_SIZE) * sizeof(cbd_t)) > PAGE_SIZE) { ++ printk("FEC init error. Need more space.\n"); ++ printk("FEC initialization failed.\n"); ++ return 1; ++ } ++ cbd_base = (cbd_t *)consistent_alloc(GFP_KERNEL, PAGE_SIZE, (void *)&mem_addr); ++ ++ /* Set receive and transmit descriptor base. ++ */ ++ fep->rx_bd_base = cbd_base; ++ fep->tx_bd_base = cbd_base + RX_RING_SIZE; ++ ++ fep->skb_cur = fep->skb_dirty = 0; ++ ++ /* Initialize the receive buffer descriptors. ++ */ ++ bdp = fep->rx_bd_base; ++ k = 0; ++ for (i=0; icbd_sc = BD_ENET_RX_EMPTY; ++ bdp->cbd_bufaddr = mem_addr; ++ fep->rx_vaddr[k++] = ba; ++ mem_addr += FEC_ENET_RX_FRSIZE; ++ ba += FEC_ENET_RX_FRSIZE; ++ bdp++; ++ } ++ } ++ ++ rtdm_lock_init(&fep->lock); ++ ++ /* Set the last buffer to wrap. ++ */ ++ bdp--; ++ bdp->cbd_sc |= BD_SC_WRAP; ++ ++ /* Install our interrupt handler. ++ */ ++ rt_stack_connect(rtdev, &STACK_manager); ++ if ((i = rtdm_irq_request(&fep->irq_handle, FEC_INTERRUPT, ++ fec_enet_interrupt, 0, "rt_mpc8xx_fec", rtdev))) { ++ printk(KERN_ERR "Couldn't request IRQ %d\n", rtdev->irq); ++ rtdev_free(rtdev); ++ return i; ++ } ++ ++ rtdev->base_addr = (unsigned long)fecp; ++ ++#ifdef CONFIG_RPXCLASSIC ++/* If MDIO is disabled the PHY should not be allowed to ++ * generate interrupts telling us to read the PHY. ++ */ ++# ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO ++ /* Make Port C, bit 15 an input that causes interrupts. ++ */ ++ immap->im_ioport.iop_pcpar &= ~0x0001; ++ immap->im_ioport.iop_pcdir &= ~0x0001; ++ immap->im_ioport.iop_pcso &= ~0x0001; ++ immap->im_ioport.iop_pcint |= 0x0001; ++ cpm_install_handler(CPMVEC_PIO_PC15, mii_link_interrupt, dev); ++# endif /* CONFIG_XENO_DRIVERS_NET_USE_MDIO */ ++ ++ /* Make LEDS reflect Link status. ++ */ ++ *((uint *) RPX_CSR_ADDR) &= ~BCSR2_FETHLEDMODE; ++#endif /* CONFIG_RPXCLASSIC */ ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO ++# ifndef PHY_INTERRUPT ++# error Want to use MII, but PHY_INTERRUPT not defined! ++# endif ++ ((immap_t *)IMAP_ADDR)->im_siu_conf.sc_siel |= ++ (0x80000000 >> PHY_INTERRUPT); ++ ++ if (request_8xxirq(PHY_INTERRUPT, mii_link_interrupt, 0, "mii", dev) != 0) ++ panic("Could not allocate MII IRQ!"); ++#endif /* CONFIG_XENO_DRIVERS_NET_USE_MDIO */ ++ ++ rtdev->base_addr = (unsigned long)fecp; ++ ++ /* The FEC Ethernet specific entries in the device structure. */ ++ rtdev->open = fec_enet_open; ++ rtdev->hard_start_xmit = fec_enet_start_xmit; ++ rtdev->stop = fec_enet_close; ++ rtdev->hard_header = &rt_eth_header; ++ rtdev->get_stats = fec_enet_get_stats; ++ ++ if ((i = rt_register_rtnetdev(rtdev))) { ++ rtdm_irq_disable(&fep->irq_handle); ++ rtdm_irq_free(&fep->irq_handle); ++ rtdev_free(rtdev); ++ return i; ++ } ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO ++ dev->do_ioctl = fec_enet_ioctl; ++ ++ for (i=0; iim_ioport.iop_pdpar = 0x1fff; ++ ++#else /* CONFIG_ICU862 */ ++ /* Configure port A for MII. ++ */ ++ ++ /* Has Utopia been configured? */ ++ if (immap->im_ioport.iop_pdpar & (0x8000 >> 1)) { ++ /* ++ * YES - Use MUXED mode for UTOPIA bus. ++ * This frees Port A for use by MII (see 862UM table 41-6). ++ */ ++ immap->im_ioport.utmode &= ~0x80; ++ } else { ++ /* ++ * NO - set SPLIT mode for UTOPIA bus. ++ * ++ * This doesn't really effect UTOPIA (which isn't ++ * enabled anyway) but just tells the 862 ++ * to use port A for MII (see 862UM table 41-6). ++ */ ++ immap->im_ioport.utmode |= 0x80; ++ } ++ ++# ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO ++ /* Now configure MII_MDC pin */ ++ immap->im_ioport.iop_pdpar |= (0x8000 >> 8); ++# endif /* CONFIG_XENO_DRIVERS_NET_USE_MDIO */ ++#endif /* CONFIG_ICU862 */ ++ ++ /* Bits moved from Rev. D onward. ++ */ ++ if ((mfspr(IMMR) & 0xffff) < 0x0501) ++ immap->im_ioport.iop_pddir = 0x1c58; /* Pre rev. D */ ++ else ++ immap->im_ioport.iop_pddir = 0x1fff; /* Rev. D and later */ ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO ++ /* Set MII speed to 2.5 MHz ++ */ ++ fecp->fec_mii_speed = fep->phy_speed = ++ ((((bd->bi_intfreq + 4999999) / 2500000) / 2 ) & 0x3F ) << 1; ++#else ++ fecp->fec_mii_speed = 0; /* turn off MDIO */ ++#endif /* CONFIG_XENO_DRIVERS_NET_USE_MDIO */ ++ ++#ifndef ORIGINAL_VERSION ++ printk("%s: FEC ENET Version 0.3, irq %d, addr %02x:%02x:%02x:%02x:%02x:%02x\n", ++ rtdev->name, FEC_INTERRUPT, ++ rtdev->dev_addr[0], rtdev->dev_addr[1], rtdev->dev_addr[2], ++ rtdev->dev_addr[3], rtdev->dev_addr[4], rtdev->dev_addr[5]); ++#else ++ printk ("%s: FEC ENET Version 0.3, FEC irq %d" ++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO ++ ", with MDIO" ++#endif ++#ifdef PHY_INTERRUPT ++ ", MII irq %d" ++#endif ++ ", addr ", ++ dev->name, FEC_INTERRUPT ++#ifdef PHY_INTERRUPT ++ , PHY_INTERRUPT ++#endif ++ ); ++ for (i=0; i<6; i++) ++ printk("%02x%c", rtdev->dev_addr[i], (i==5) ? '\n' : ':'); ++#endif /* ORIGINAL_VERSION */ ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO /* start in full duplex mode, and negotiate speed */ ++ fec_restart (dev, 1); ++#else /* always use half duplex mode only */ ++ fec_restart (rtdev, 0); ++#endif ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO ++ /* Queue up command to detect the PHY and initialize the ++ * remainder of the interface. ++ */ ++ fep->phy_id_done = 0; ++ fep->phy_addr = 0; ++ mii_queue(dev, mk_mii_read(MII_REG_PHYIR1), mii_discover_phy, 0); ++ ++ fep->old_status = 0; ++#endif /* CONFIG_XENO_DRIVERS_NET_USE_MDIO */ ++ ++ return 0; ++} ++ ++/* This function is called to start or restart the FEC during a link ++ * change. This only happens when switching between half and full ++ * duplex. ++ */ ++static void ++fec_restart(struct rtnet_device *rtdev, int duplex) ++{ ++ struct fec_enet_private *fep; ++ int i; ++ volatile cbd_t *bdp; ++ volatile immap_t *immap; ++ volatile fec_t *fecp; ++ ++ immap = (immap_t *)IMAP_ADDR; /* pointer to internal registers */ ++ ++ fecp = &(immap->im_cpm.cp_fec); ++ ++ fep = rtdev->priv; ++ ++ /* Whack a reset. We should wait for this. ++ */ ++ fecp->fec_ecntrl = FEC_ECNTRL_PINMUX | FEC_ECNTRL_RESET; ++ for (i = 0; ++ (fecp->fec_ecntrl & FEC_ECNTRL_RESET) && (i < FEC_RESET_DELAY); ++ ++i) { ++ udelay(1); ++ } ++ if (i == FEC_RESET_DELAY) { ++ printk ("FEC Reset timeout!\n"); ++ } ++ ++ /* Set station address. ++ */ ++ fecp->fec_addr_low = (my_enet_addr[0] << 16) | my_enet_addr[1]; ++ fecp->fec_addr_high = my_enet_addr[2]; ++ ++ /* Reset all multicast. ++ */ ++ fecp->fec_hash_table_high = 0; ++ fecp->fec_hash_table_low = 0; ++ ++ /* Set maximum receive buffer size. ++ */ ++ fecp->fec_r_buff_size = PKT_MAXBLR_SIZE; ++ fecp->fec_r_hash = PKT_MAXBUF_SIZE; ++ ++ /* Set receive and transmit descriptor base. ++ */ ++ fecp->fec_r_des_start = iopa((uint)(fep->rx_bd_base)); ++ fecp->fec_x_des_start = iopa((uint)(fep->tx_bd_base)); ++ ++ fep->dirty_tx = fep->cur_tx = fep->tx_bd_base; ++ fep->cur_rx = fep->rx_bd_base; ++ ++ /* Reset SKB transmit buffers. ++ */ ++ fep->skb_cur = fep->skb_dirty = 0; ++ for (i=0; i<=TX_RING_MOD_MASK; i++) { ++ if (fep->tx_skbuff[i] != NULL) { ++ dev_kfree_rtskb(fep->tx_skbuff[i]); ++ fep->tx_skbuff[i] = NULL; ++ } ++ } ++ ++ /* Initialize the receive buffer descriptors. ++ */ ++ bdp = fep->rx_bd_base; ++ for (i=0; icbd_sc = BD_ENET_RX_EMPTY; ++ bdp++; ++ } ++ ++ /* Set the last buffer to wrap. ++ */ ++ bdp--; ++ bdp->cbd_sc |= BD_SC_WRAP; ++ ++ /* ...and the same for transmmit. ++ */ ++ bdp = fep->tx_bd_base; ++ for (i=0; icbd_sc = 0; ++ bdp->cbd_bufaddr = 0; ++ bdp++; ++ } ++ ++ /* Set the last buffer to wrap. ++ */ ++ bdp--; ++ bdp->cbd_sc |= BD_SC_WRAP; ++ ++ /* Enable MII mode. ++ */ ++ if (duplex) { ++ fecp->fec_r_cntrl = FEC_RCNTRL_MII_MODE; /* MII enable */ ++ fecp->fec_x_cntrl = FEC_TCNTRL_FDEN; /* FD enable */ ++ } ++ else { ++ fecp->fec_r_cntrl = FEC_RCNTRL_MII_MODE | FEC_RCNTRL_DRT; ++ fecp->fec_x_cntrl = 0; ++ } ++ ++ fep->full_duplex = duplex; ++ ++ /* Enable big endian and don't care about SDMA FC. ++ */ ++ fecp->fec_fun_code = 0x78000000; ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO ++ /* Set MII speed. ++ */ ++ fecp->fec_mii_speed = fep->phy_speed; ++#endif /* CONFIG_XENO_DRIVERS_NET_USE_MDIO */ ++ ++ /* Clear any outstanding interrupt. ++ */ ++ fecp->fec_ievent = 0xffc0; ++ ++ fecp->fec_ivec = (FEC_INTERRUPT/2) << 29; ++ ++ /* Enable interrupts we wish to service. ++ */ ++ fecp->fec_imask = ( FEC_ENET_TXF | FEC_ENET_TXB | ++ FEC_ENET_RXF | FEC_ENET_RXB | FEC_ENET_MII ); ++ ++ /* And last, enable the transmit and receive processing. ++ */ ++ fecp->fec_ecntrl = FEC_ECNTRL_PINMUX | FEC_ECNTRL_ETHER_EN; ++ fecp->fec_r_des_active = 0x01000000; ++ ++ /* The tx ring is no longer full. */ ++ if(fep->tx_full) ++ { ++ fep->tx_full = 0; ++ rtnetif_wake_queue(rtdev); ++ } ++} ++ ++static void ++fec_stop(struct rtnet_device *rtdev) ++{ ++ volatile immap_t *immap; ++ volatile fec_t *fecp; ++ int i; ++ struct fec_enet_private *fep; ++ ++ immap = (immap_t *)IMAP_ADDR; /* pointer to internal registers */ ++ ++ fecp = &(immap->im_cpm.cp_fec); ++ ++ if ((fecp->fec_ecntrl & FEC_ECNTRL_ETHER_EN) == 0) ++ return; /* already down */ ++ ++ fep = rtdev->priv; ++ ++ ++ fecp->fec_x_cntrl = 0x01; /* Graceful transmit stop */ ++ ++ for (i = 0; ++ ((fecp->fec_ievent & 0x10000000) == 0) && (i < FEC_RESET_DELAY); ++ ++i) { ++ udelay(1); ++ } ++ if (i == FEC_RESET_DELAY) { ++ printk ("FEC timeout on graceful transmit stop\n"); ++ } ++ ++ /* Clear outstanding MII command interrupts. ++ */ ++ fecp->fec_ievent = FEC_ENET_MII; ++ ++ /* Enable MII command finished interrupt ++ */ ++ fecp->fec_ivec = (FEC_INTERRUPT/2) << 29; ++ fecp->fec_imask = FEC_ENET_MII; ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO ++ /* Set MII speed. ++ */ ++ fecp->fec_mii_speed = fep->phy_speed; ++#endif /* CONFIG_XENO_DRIVERS_NET_USE_MDIO */ ++ ++ /* Disable FEC ++ */ ++ fecp->fec_ecntrl &= ~(FEC_ECNTRL_ETHER_EN); ++} ++ ++static void __exit fec_enet_cleanup(void) ++{ ++ struct rtnet_device *rtdev = rtdev_root; ++ struct fec_enet_private *fep = rtdev->priv; ++ ++ if (rtdev) { ++ rtdm_irq_disable(&fep->irq_handle); ++ rtdm_irq_free(&fep->irq_handle); ++ ++ consistent_free(fep->rx_bd_base); ++ ++ rt_stack_disconnect(rtdev); ++ rt_unregister_rtnetdev(rtdev); ++ rt_rtdev_disconnect(rtdev); ++ ++ printk("%s: unloaded\n", rtdev->name); ++ rtdev_free(rtdev); ++ rtdev_root = NULL; ++ } ++} ++ ++module_init(fec_enet_init); ++module_exit(fec_enet_cleanup); +--- linux/drivers/xenomai/net/drivers/Kconfig 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/Kconfig 2021-04-07 16:01:27.258634122 +0800 +@@ -0,0 +1,138 @@ ++menu "Drivers" ++ depends on XENO_DRIVERS_NET ++ ++comment "Common PCI Drivers" ++ depends on PCI ++ ++config XENO_DRIVERS_NET_DRV_PCNET32 ++ depends on XENO_DRIVERS_NET && PCI ++ tristate "AMD PCnet32" ++ ++ ++config XENO_DRIVERS_NET_DRV_TULIP ++ depends on XENO_DRIVERS_NET && PCI ++ tristate "DEC Tulip" ++ ++ ++config XENO_DRIVERS_NET_DRV_EEPRO100 ++ depends on XENO_DRIVERS_NET && PCI ++ tristate "Intel EtherExpress PRO/100" ++ default y ++ ++config XENO_DRIVERS_NET_DRV_EEPRO100_CMDTIMEOUT ++ depends on XENO_DRIVERS_NET && PCI ++ int "Command Timeout" ++ depends on XENO_DRIVERS_NET_DRV_EEPRO100 ++ default 20 ++ ---help--- ++ Timeout in microseconds of transmission or configuration commands that ++ are issued in real-time contexts. ++ ++config XENO_DRIVERS_NET_DRV_EEPRO100_DBG ++ depends on XENO_DRIVERS_NET && PCI ++ bool "Enable debugging and instrumentation" ++ depends on XENO_DRIVERS_NET_DRV_EEPRO100 ++ ---help--- ++ This option switches on internal debugging code of the EEPRO/100 driver. ++ It also enables the collection of worst-case command delays in real-time ++ contexts in order to reduce the command timeout (which, effectively, will ++ also reduce the worst-case transmission latency). ++ ++ ++config XENO_DRIVERS_NET_DRV_E1000 ++ depends on XENO_DRIVERS_NET && PCI ++ tristate "Intel(R) PRO/1000 (Gigabit)" ++ default y ++ ++config XENO_DRIVERS_NET_DRV_E1000E ++ depends on XENO_DRIVERS_NET && PCI ++ tristate "New Intel(R) PRO/1000 PCIe (Gigabit)" ++ ++ ++config XENO_DRIVERS_NET_DRV_NATSEMI ++ depends on XENO_DRIVERS_NET && PCI ++ tristate "NatSemi" ++ ++ ++config XENO_DRIVERS_NET_DRV_8139 ++ depends on XENO_DRIVERS_NET && PCI ++ tristate "Realtek 8139" ++ default y ++ ++ ++config XENO_DRIVERS_NET_DRV_VIA_RHINE ++ depends on XENO_DRIVERS_NET && PCI ++ tristate "VIA Rhine" ++ ++ ++config XENO_DRIVERS_NET_DRV_IGB ++ select I2C ++ select I2C_ALGOBIT ++ depends on XENO_DRIVERS_NET && PCI ++ tristate "Intel(R) 82575 (Gigabit)" ++ ++ ++config XENO_DRIVERS_NET_DRV_R8169 ++ depends on XENO_DRIVERS_NET && PCI ++ tristate "Realtek 8169 (Gigabit)" ++ ++ ++if PPC ++ ++comment "Embedded MPC Drivers" ++ depends on XENO_DRIVERS_NET ++ ++config XENO_DRIVERS_NET_DRV_FCC_ENET ++ depends on XENO_DRIVERS_NET ++ tristate "MPC8260 FCC Ethernet" ++ ++ ++config XENO_DRIVERS_NET_DRV_FEC_ENET ++ depends on XENO_DRIVERS_NET ++ tristate "MPC8xx FEC Ethernet" ++ ++ ++config XENO_DRIVERS_NET_DRV_SCC_ENET ++ depends on XENO_DRIVERS_NET ++ tristate "MPC8xx SCC Ethernet" ++ ++ ++config XENO_DRIVERS_NET_DRV_MPC52XX_FEC ++ depends on XENO_DRIVERS_NET ++ tristate "MPC52xx FEC Ethernet" ++ ++endif ++ ++ ++comment "Misc Drivers" ++ ++config XENO_DRIVERS_NET_DRV_LOOPBACK ++ depends on XENO_DRIVERS_NET ++ tristate "Loopback" ++ default y ++ ++ ++config XENO_DRIVERS_NET_DRV_SMC91111 ++ depends on XENO_DRIVERS_NET ++ tristate "SMSC LAN91C111" ++ ++if ARM ++ ++config XENO_DRIVERS_NET_DRV_AT91_ETHER ++ depends on XENO_DRIVERS_NET && SOC_AT91RM9200 ++ select XENO_DRIVERS_NET_DRV_MACB ++ tristate "AT91RM9200 Board Ethernet Driver" ++ ++config XENO_DRIVERS_NET_DRV_MACB ++ depends on XENO_DRIVERS_NET ++ select AT91_PROGRAMMABLE_CLOCKS if ARCH_AT91 ++ tristate "Cadence MACB/GEM devices" ++ ---help--- ++ Driver for internal MAC-controller on AT91SAM926x microcontrollers. ++ Porting by Cristiano Mantovani and Stefano Banzi (Marposs SpA). ++ ++endif ++ ++source "drivers/xenomai/net/drivers/experimental/Kconfig" ++ ++endmenu +--- linux/drivers/xenomai/net/drivers/fec.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/fec.c 2021-04-07 16:01:27.253634129 +0800 +@@ -0,0 +1,1859 @@ ++/* ++ * Fast Ethernet Controller (FEC) driver for Motorola MPC8xx. ++ * Copyright (c) 1997 Dan Malek (dmalek@jlc.net) ++ * ++ * Right now, I am very wasteful with the buffers. I allocate memory ++ * pages and then divide them into 2K frame buffers. This way I know I ++ * have buffers large enough to hold one frame within one buffer descriptor. ++ * Once I get this working, I will use 64 or 128 byte CPM buffers, which ++ * will be much more memory efficient and will easily handle lots of ++ * small packets. ++ * ++ * Much better multiple PHY support by Magnus Damm. ++ * Copyright (c) 2000 Ericsson Radio Systems AB. ++ * ++ * Support for FEC controller of ColdFire processors. ++ * Copyright (c) 2001-2005 Greg Ungerer (gerg@snapgear.com) ++ * ++ * Bug fixes and cleanup by Philippe De Muyter (phdm@macqel.be) ++ * Copyright (c) 2004-2006 Macq Electronique SA. ++ * ++ * Copyright (C) 2010-2011 Freescale Semiconductor, Inc. ++ * ++ * Ported from v3.5 Linux drivers/net/ethernet/freescale/fec.[ch] ++ * (git tag v3.5-709-ga6be1fc) ++ * ++ * Copyright (c) 2012 Wolfgang Grandegger ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++ ++#ifndef CONFIG_ARM ++#include ++#include ++#endif ++ ++/* RTnet */ ++#include ++#include ++ ++/* RTnet */ ++#include "rt_fec.h" ++ ++MODULE_AUTHOR("Maintainer: Wolfgang Grandegger "); ++MODULE_DESCRIPTION("RTnet driver for the FEC Ethernet"); ++MODULE_LICENSE("GPL"); ++ ++#if defined(CONFIG_ARM) ++#define FEC_ALIGNMENT 0xf ++#else ++#define FEC_ALIGNMENT 0x3 ++#endif ++ ++#define DRIVER_NAME "rt_fec" ++ ++/* Controller is ENET-MAC */ ++#define FEC_QUIRK_ENET_MAC (1 << 0) ++/* Controller needs driver to swap frame */ ++#define FEC_QUIRK_SWAP_FRAME (1 << 1) ++/* Controller uses gasket */ ++#define FEC_QUIRK_USE_GASKET (1 << 2) ++/* Controller has GBIT support */ ++#define FEC_QUIRK_HAS_GBIT (1 << 3) ++ ++static struct platform_device_id fec_devtype[] = { ++ { ++ .name = "fec", ++/* For legacy not devicetree based support */ ++#if defined(CONFIG_SOC_IMX6Q) ++ .driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_HAS_GBIT, ++#elif defined(CONFIG_SOC_IMX28) ++ .driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_SWAP_FRAME, ++#elif defined(CONFIG_SOC_IMX25) ++ .driver_data = FEC_QUIRK_USE_GASKET, ++#else ++ /* keep it for coldfire */ ++ .driver_data = 0, ++#endif ++ }, { ++ .name = "imx25-fec", ++ .driver_data = FEC_QUIRK_USE_GASKET, ++ }, { ++ .name = "imx27-fec", ++ .driver_data = 0, ++ }, { ++ .name = "imx28-fec", ++ .driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_SWAP_FRAME, ++ }, { ++ .name = "imx6q-fec", ++ .driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_HAS_GBIT, ++ }, { ++ /* sentinel */ ++ } ++}; ++MODULE_DEVICE_TABLE(platform, fec_devtype); ++ ++enum imx_fec_type { ++ IMX25_FEC = 1, /* runs on i.mx25/50/53 */ ++ IMX27_FEC, /* runs on i.mx27/35/51 */ ++ IMX28_FEC, ++ IMX6Q_FEC, ++}; ++ ++static const struct of_device_id fec_dt_ids[] = { ++ { .compatible = "fsl,imx25-fec", .data = &fec_devtype[IMX25_FEC], }, ++ { .compatible = "fsl,imx27-fec", .data = &fec_devtype[IMX27_FEC], }, ++ { .compatible = "fsl,imx28-fec", .data = &fec_devtype[IMX28_FEC], }, ++ { .compatible = "fsl,imx6q-fec", .data = &fec_devtype[IMX6Q_FEC], }, ++ { /* sentinel */ } ++}; ++MODULE_DEVICE_TABLE(of, fec_dt_ids); ++ ++static unsigned char macaddr[ETH_ALEN]; ++module_param_array(macaddr, byte, NULL, 0); ++MODULE_PARM_DESC(macaddr, "FEC Ethernet MAC address"); ++ ++#if defined(CONFIG_M5272) ++/* ++ * Some hardware gets it MAC address out of local flash memory. ++ * if this is non-zero then assume it is the address to get MAC from. ++ */ ++#if defined(CONFIG_NETtel) ++#define FEC_FLASHMAC 0xf0006006 ++#elif defined(CONFIG_GILBARCONAP) || defined(CONFIG_SCALES) ++#define FEC_FLASHMAC 0xf0006000 ++#elif defined(CONFIG_CANCam) ++#define FEC_FLASHMAC 0xf0020000 ++#elif defined (CONFIG_M5272C3) ++#define FEC_FLASHMAC (0xffe04000 + 4) ++#elif defined(CONFIG_MOD5272) ++#define FEC_FLASHMAC 0xffc0406b ++#else ++#define FEC_FLASHMAC 0 ++#endif ++#endif /* CONFIG_M5272 */ ++ ++/* The number of Tx and Rx buffers. These are allocated from the page ++ * pool. The code may assume these are power of two, so it it best ++ * to keep them that size. ++ * We don't need to allocate pages for the transmitter. We just use ++ * the skbuffer directly. ++ */ ++#define FEC_ENET_RX_PAGES 8 ++#define FEC_ENET_RX_FRSIZE RTSKB_SIZE /* Maximum size for RTnet */ ++#define FEC_ENET_RX_FRPPG (PAGE_SIZE / FEC_ENET_RX_FRSIZE) ++#define RX_RING_SIZE (FEC_ENET_RX_FRPPG * FEC_ENET_RX_PAGES) ++#define FEC_ENET_TX_FRSIZE 2048 ++#define FEC_ENET_TX_FRPPG (PAGE_SIZE / FEC_ENET_TX_FRSIZE) ++#define TX_RING_SIZE 16 /* Must be power of two */ ++#define TX_RING_MOD_MASK 15 /* for this to work */ ++ ++#if (((RX_RING_SIZE + TX_RING_SIZE) * 8) > PAGE_SIZE) ++#error "FEC: descriptor ring size constants too large" ++#endif ++ ++/* Interrupt events/masks. */ ++#define FEC_ENET_HBERR ((uint)0x80000000) /* Heartbeat error */ ++#define FEC_ENET_BABR ((uint)0x40000000) /* Babbling receiver */ ++#define FEC_ENET_BABT ((uint)0x20000000) /* Babbling transmitter */ ++#define FEC_ENET_GRA ((uint)0x10000000) /* Graceful stop complete */ ++#define FEC_ENET_TXF ((uint)0x08000000) /* Full frame transmitted */ ++#define FEC_ENET_TXB ((uint)0x04000000) /* A buffer was transmitted */ ++#define FEC_ENET_RXF ((uint)0x02000000) /* Full frame received */ ++#define FEC_ENET_RXB ((uint)0x01000000) /* A buffer was received */ ++#define FEC_ENET_MII ((uint)0x00800000) /* MII interrupt */ ++#define FEC_ENET_EBERR ((uint)0x00400000) /* SDMA bus error */ ++ ++#define FEC_DEFAULT_IMASK (FEC_ENET_TXF | FEC_ENET_RXF | FEC_ENET_MII) ++ ++/* The FEC stores dest/src/type, data, and checksum for receive packets. ++ */ ++#define PKT_MAXBUF_SIZE 1518 ++#define PKT_MINBUF_SIZE 64 ++#define PKT_MAXBLR_SIZE 1520 ++ ++/* This device has up to three irqs on some platforms */ ++#define FEC_IRQ_NUM 3 ++ ++/* ++ * The 5270/5271/5280/5282/532x RX control register also contains maximum frame ++ * size bits. Other FEC hardware does not, so we need to take that into ++ * account when setting it. ++ */ ++#if defined(CONFIG_M523x) || defined(CONFIG_M527x) || defined(CONFIG_M528x) || \ ++ defined(CONFIG_M520x) || defined(CONFIG_M532x) || defined(CONFIG_ARM) ++#define OPT_FRAME_SIZE (PKT_MAXBUF_SIZE << 16) ++#else ++#define OPT_FRAME_SIZE 0 ++#endif ++ ++static unsigned int rx_pool_size = 2 * RX_RING_SIZE; ++module_param(rx_pool_size, int, 0444); ++MODULE_PARM_DESC(rx_pool_size, "Receive buffer pool size"); ++ ++#ifndef rtnetdev_priv ++#define rtnetdev_priv(ndev) (ndev)->priv ++#endif ++ ++/* The FEC buffer descriptors track the ring buffers. The rx_bd_base and ++ * tx_bd_base always point to the base of the buffer descriptors. The ++ * cur_rx and cur_tx point to the currently available buffer. ++ * The dirty_tx tracks the current buffer that is being sent by the ++ * controller. The cur_tx and dirty_tx are equal under both completely ++ * empty and completely full conditions. The empty/ready indicator in ++ * the buffer descriptor determines the actual condition. ++ */ ++struct fec_enet_private { ++ /* Hardware registers of the FEC device */ ++ void __iomem *hwp; ++ ++ struct net_device *netdev; /* linux netdev needed for phy handling */ ++ ++ struct clk *clk_ipg; ++ struct clk *clk_ahb; ++ ++ /* The saved address of a sent-in-place packet/buffer, for skfree(). */ ++ unsigned char *tx_bounce[TX_RING_SIZE]; ++ struct rtskb *tx_skbuff[TX_RING_SIZE]; ++ struct rtskb *rx_skbuff[RX_RING_SIZE]; ++ ushort skb_cur; ++ ushort skb_dirty; ++ ++ /* CPM dual port RAM relative addresses */ ++ dma_addr_t bd_dma; ++ /* Address of Rx and Tx buffers */ ++ struct bufdesc *rx_bd_base; ++ struct bufdesc *tx_bd_base; ++ /* The next free ring entry */ ++ struct bufdesc *cur_rx, *cur_tx; ++ /* The ring entries to be free()ed */ ++ struct bufdesc *dirty_tx; ++ ++ uint tx_full; ++ /* hold while accessing the HW like ringbuffer for tx/rx but not MAC */ ++ rtdm_lock_t hw_lock; ++ ++ struct platform_device *pdev; ++ ++ int opened; ++ int dev_id; ++ ++ /* Phylib and MDIO interface */ ++ struct mii_bus *mii_bus; ++ struct phy_device *phy_dev; ++ int mii_timeout; ++ uint phy_speed; ++ phy_interface_t phy_interface; ++ int link; ++ int full_duplex; ++ struct completion mdio_done; ++ int irq[FEC_IRQ_NUM]; ++ ++ /* RTnet */ ++ struct device *dev; ++ rtdm_irq_t irq_handle[3]; ++ rtdm_nrtsig_t mdio_done_sig; ++ struct net_device_stats stats; ++}; ++ ++/* For phy handling */ ++struct fec_enet_netdev_priv { ++ struct rtnet_device *rtdev; ++}; ++ ++/* FEC MII MMFR bits definition */ ++#define FEC_MMFR_ST (1 << 30) ++#define FEC_MMFR_OP_READ (2 << 28) ++#define FEC_MMFR_OP_WRITE (1 << 28) ++#define FEC_MMFR_PA(v) ((v & 0x1f) << 23) ++#define FEC_MMFR_RA(v) ((v & 0x1f) << 18) ++#define FEC_MMFR_TA (2 << 16) ++#define FEC_MMFR_DATA(v) (v & 0xffff) ++ ++#define FEC_MII_TIMEOUT 30000 /* us */ ++ ++/* Transmitter timeout */ ++#define TX_TIMEOUT (2 * HZ) ++ ++static int mii_cnt; ++ ++static void *swap_buffer(void *bufaddr, int len) ++{ ++ int i; ++ unsigned int *buf = bufaddr; ++ ++ for (i = 0; i < (len + 3) / 4; i++, buf++) ++ *buf = cpu_to_be32(*buf); ++ ++ return bufaddr; ++} ++ ++static int ++fec_enet_start_xmit(struct rtskb *skb, struct rtnet_device *ndev) ++{ ++ struct fec_enet_private *fep = rtnetdev_priv(ndev); ++ const struct platform_device_id *id_entry = ++ platform_get_device_id(fep->pdev); ++ struct bufdesc *bdp; ++ void *bufaddr; ++ unsigned short status; ++ unsigned long context; ++ ++ if (!fep->link) { ++ /* Link is down or autonegotiation is in progress. */ ++ printk("%s: tx link down!.\n", ndev->name); ++ rtnetif_stop_queue(ndev); ++ return 1; /* RTnet: will call kfree_rtskb() */ ++ } ++ ++ rtdm_lock_get_irqsave(&fep->hw_lock, context); ++ ++ /* RTnet */ ++ if (skb->xmit_stamp) ++ *skb->xmit_stamp = cpu_to_be64(rtdm_clock_read() + ++ *skb->xmit_stamp); ++ ++ /* Fill in a Tx ring entry */ ++ bdp = fep->cur_tx; ++ ++ status = bdp->cbd_sc; ++ ++ if (status & BD_ENET_TX_READY) { ++ /* Ooops. All transmit buffers are full. Bail out. ++ * This should not happen, since ndev->tbusy should be set. ++ */ ++ printk("%s: tx queue full!.\n", ndev->name); ++ rtdm_lock_put_irqrestore(&fep->hw_lock, context); ++ return 1; /* RTnet: will call kfree_rtskb() */ ++ } ++ ++ /* Clear all of the status flags */ ++ status &= ~BD_ENET_TX_STATS; ++ ++ /* Set buffer length and buffer pointer */ ++ bufaddr = skb->data; ++ bdp->cbd_datlen = skb->len; ++ ++ /* ++ * On some FEC implementations data must be aligned on ++ * 4-byte boundaries. Use bounce buffers to copy data ++ * and get it aligned. Ugh. ++ */ ++ if (((unsigned long) bufaddr) & FEC_ALIGNMENT) { ++ unsigned int index; ++ index = bdp - fep->tx_bd_base; ++ memcpy(fep->tx_bounce[index], skb->data, skb->len); ++ bufaddr = fep->tx_bounce[index]; ++ } ++ ++ /* ++ * Some design made an incorrect assumption on endian mode of ++ * the system that it's running on. As the result, driver has to ++ * swap every frame going to and coming from the controller. ++ */ ++ if (id_entry->driver_data & FEC_QUIRK_SWAP_FRAME) ++ swap_buffer(bufaddr, skb->len); ++ ++ /* Save skb pointer */ ++ fep->tx_skbuff[fep->skb_cur] = skb; ++ ++ fep->stats.tx_bytes += skb->len; ++ fep->skb_cur = (fep->skb_cur+1) & TX_RING_MOD_MASK; ++ ++ /* Push the data cache so the CPM does not get stale memory ++ * data. ++ */ ++ bdp->cbd_bufaddr = dma_map_single(&fep->pdev->dev, bufaddr, ++ FEC_ENET_TX_FRSIZE, DMA_TO_DEVICE); ++ ++ /* Send it on its way. Tell FEC it's ready, interrupt when done, ++ * it's the last BD of the frame, and to put the CRC on the end. ++ */ ++ status |= (BD_ENET_TX_READY | BD_ENET_TX_INTR ++ | BD_ENET_TX_LAST | BD_ENET_TX_TC); ++ bdp->cbd_sc = status; ++ ++ /* Trigger transmission start */ ++ writel(0, fep->hwp + FEC_X_DES_ACTIVE); ++ ++ /* If this was the last BD in the ring, start at the beginning again. */ ++ if (status & BD_ENET_TX_WRAP) ++ bdp = fep->tx_bd_base; ++ else ++ bdp++; ++ ++ if (bdp == fep->dirty_tx) { ++ fep->tx_full = 1; ++ rtnetif_stop_queue(ndev); ++ } ++ ++ fep->cur_tx = bdp; ++ ++ rtdm_lock_put_irqrestore(&fep->hw_lock, context); ++ ++ return NETDEV_TX_OK; ++} ++ ++/* This function is called to start or restart the FEC during a link ++ * change. This only happens when switching between half and full ++ * duplex. ++ */ ++static void ++fec_restart(struct rtnet_device *ndev, int duplex) ++{ ++ struct fec_enet_private *fep = rtnetdev_priv(ndev); ++ const struct platform_device_id *id_entry = ++ platform_get_device_id(fep->pdev); ++ int i; ++ u32 temp_mac[2]; ++ u32 rcntl = OPT_FRAME_SIZE | 0x04; ++ u32 ecntl = 0x2; /* ETHEREN */ ++ ++ /* Whack a reset. We should wait for this. */ ++ writel(1, fep->hwp + FEC_ECNTRL); ++ udelay(10); ++ ++ /* ++ * enet-mac reset will reset mac address registers too, ++ * so need to reconfigure it. ++ */ ++ if (id_entry->driver_data & FEC_QUIRK_ENET_MAC) { ++ memcpy(&temp_mac, ndev->dev_addr, ETH_ALEN); ++ writel(cpu_to_be32(temp_mac[0]), fep->hwp + FEC_ADDR_LOW); ++ writel(cpu_to_be32(temp_mac[1]), fep->hwp + FEC_ADDR_HIGH); ++ } ++ ++ /* Clear any outstanding interrupt. */ ++ writel(0xffc00000, fep->hwp + FEC_IEVENT); ++ ++ /* Reset all multicast. */ ++ writel(0, fep->hwp + FEC_GRP_HASH_TABLE_HIGH); ++ writel(0, fep->hwp + FEC_GRP_HASH_TABLE_LOW); ++#ifndef CONFIG_M5272 ++ writel(0, fep->hwp + FEC_HASH_TABLE_HIGH); ++ writel(0, fep->hwp + FEC_HASH_TABLE_LOW); ++#endif ++ ++ /* Set maximum receive buffer size. */ ++ writel(PKT_MAXBLR_SIZE, fep->hwp + FEC_R_BUFF_SIZE); ++ ++ /* Set receive and transmit descriptor base. */ ++ writel(fep->bd_dma, fep->hwp + FEC_R_DES_START); ++ writel((unsigned long)fep->bd_dma + sizeof(struct bufdesc) * RX_RING_SIZE, ++ fep->hwp + FEC_X_DES_START); ++ ++ fep->dirty_tx = fep->cur_tx = fep->tx_bd_base; ++ fep->cur_rx = fep->rx_bd_base; ++ ++ /* Reset SKB transmit buffers. */ ++ fep->skb_cur = fep->skb_dirty = 0; ++ for (i = 0; i <= TX_RING_MOD_MASK; i++) { ++ if (fep->tx_skbuff[i]) { ++ dev_kfree_rtskb(fep->tx_skbuff[i]); ++ fep->tx_skbuff[i] = NULL; ++ } ++ } ++ ++ /* Enable MII mode */ ++ if (duplex) { ++ /* FD enable */ ++ writel(0x04, fep->hwp + FEC_X_CNTRL); ++ } else { ++ /* No Rcv on Xmit */ ++ rcntl |= 0x02; ++ writel(0x0, fep->hwp + FEC_X_CNTRL); ++ } ++ ++ fep->full_duplex = duplex; ++ ++ /* Set MII speed */ ++ writel(fep->phy_speed, fep->hwp + FEC_MII_SPEED); ++ ++ /* ++ * The phy interface and speed need to get configured ++ * differently on enet-mac. ++ */ ++ if (id_entry->driver_data & FEC_QUIRK_ENET_MAC) { ++ /* Enable flow control and length check */ ++ rcntl |= 0x40000000 | 0x00000020; ++ ++ /* RGMII, RMII or MII */ ++ if (fep->phy_interface == PHY_INTERFACE_MODE_RGMII) ++ rcntl |= (1 << 6); ++ else if (fep->phy_interface == PHY_INTERFACE_MODE_RMII) ++ rcntl |= (1 << 8); ++ else ++ rcntl &= ~(1 << 8); ++ ++ /* 1G, 100M or 10M */ ++ if (fep->phy_dev) { ++ if (fep->phy_dev->speed == SPEED_1000) ++ ecntl |= (1 << 5); ++ else if (fep->phy_dev->speed == SPEED_100) ++ rcntl &= ~(1 << 9); ++ else ++ rcntl |= (1 << 9); ++ } ++ } else { ++#ifdef FEC_MIIGSK_ENR ++ if (id_entry->driver_data & FEC_QUIRK_USE_GASKET) { ++ u32 cfgr; ++ /* disable the gasket and wait */ ++ writel(0, fep->hwp + FEC_MIIGSK_ENR); ++ while (readl(fep->hwp + FEC_MIIGSK_ENR) & 4) ++ udelay(1); ++ ++ /* ++ * configure the gasket: ++ * RMII, 50 MHz, no loopback, no echo ++ * MII, 25 MHz, no loopback, no echo ++ */ ++ cfgr = (fep->phy_interface == PHY_INTERFACE_MODE_RMII) ++ ? BM_MIIGSK_CFGR_RMII : BM_MIIGSK_CFGR_MII; ++ if (fep->phy_dev && fep->phy_dev->speed == SPEED_10) ++ cfgr |= BM_MIIGSK_CFGR_FRCONT_10M; ++ writel(cfgr, fep->hwp + FEC_MIIGSK_CFGR); ++ ++ /* re-enable the gasket */ ++ writel(2, fep->hwp + FEC_MIIGSK_ENR); ++ } ++#endif ++ } ++ writel(rcntl, fep->hwp + FEC_R_CNTRL); ++ ++ if (id_entry->driver_data & FEC_QUIRK_ENET_MAC) { ++ /* enable ENET endian swap */ ++ ecntl |= (1 << 8); ++ /* enable ENET store and forward mode */ ++ writel(1 << 8, fep->hwp + FEC_X_WMRK); ++ } ++ ++ /* And last, enable the transmit and receive processing */ ++ writel(ecntl, fep->hwp + FEC_ECNTRL); ++ writel(0, fep->hwp + FEC_R_DES_ACTIVE); ++ ++ /* Enable interrupts we wish to service */ ++ writel(FEC_DEFAULT_IMASK, fep->hwp + FEC_IMASK); ++} ++ ++static void ++fec_stop(struct rtnet_device *ndev) ++{ ++ struct fec_enet_private *fep = rtnetdev_priv(ndev); ++ const struct platform_device_id *id_entry = ++ platform_get_device_id(fep->pdev); ++ u32 rmii_mode = readl(fep->hwp + FEC_R_CNTRL) & (1 << 8); ++ ++ /* We cannot expect a graceful transmit stop without link !!! */ ++ if (fep->link) { ++ writel(1, fep->hwp + FEC_X_CNTRL); /* Graceful transmit stop */ ++ udelay(10); ++ if (!(readl(fep->hwp + FEC_IEVENT) & FEC_ENET_GRA)) ++ printk("fec_stop : Graceful transmit stop did not complete !\n"); ++ } ++ ++ /* Whack a reset. We should wait for this. */ ++ writel(1, fep->hwp + FEC_ECNTRL); ++ udelay(10); ++ writel(fep->phy_speed, fep->hwp + FEC_MII_SPEED); ++ writel(FEC_DEFAULT_IMASK, fep->hwp + FEC_IMASK); ++ ++ /* We have to keep ENET enabled to have MII interrupt stay working */ ++ if (id_entry->driver_data & FEC_QUIRK_ENET_MAC) { ++ writel(2, fep->hwp + FEC_ECNTRL); ++ writel(rmii_mode, fep->hwp + FEC_R_CNTRL); ++ } ++} ++ ++static void ++fec_enet_tx(struct rtnet_device *ndev) ++{ ++ struct fec_enet_private *fep; ++ struct bufdesc *bdp; ++ unsigned short status; ++ struct rtskb *skb; ++ ++ fep = rtnetdev_priv(ndev); ++ rtdm_lock_get(&fep->hw_lock); ++ bdp = fep->dirty_tx; ++ ++ while (((status = bdp->cbd_sc) & BD_ENET_TX_READY) == 0) { ++ if (bdp == fep->cur_tx && fep->tx_full == 0) ++ break; ++ ++ dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr, ++ FEC_ENET_TX_FRSIZE, DMA_TO_DEVICE); ++ bdp->cbd_bufaddr = 0; ++ ++ skb = fep->tx_skbuff[fep->skb_dirty]; ++ /* Check for errors. */ ++ if (status & (BD_ENET_TX_HB | BD_ENET_TX_LC | ++ BD_ENET_TX_RL | BD_ENET_TX_UN | ++ BD_ENET_TX_CSL)) { ++ fep->stats.tx_errors++; ++ if (status & BD_ENET_TX_HB) /* No heartbeat */ ++ fep->stats.tx_heartbeat_errors++; ++ if (status & BD_ENET_TX_LC) /* Late collision */ ++ fep->stats.tx_window_errors++; ++ if (status & BD_ENET_TX_RL) /* Retrans limit */ ++ fep->stats.tx_aborted_errors++; ++ if (status & BD_ENET_TX_UN) /* Underrun */ ++ fep->stats.tx_fifo_errors++; ++ if (status & BD_ENET_TX_CSL) /* Carrier lost */ ++ fep->stats.tx_carrier_errors++; ++ } else { ++ fep->stats.tx_packets++; ++ } ++ ++ if (status & BD_ENET_TX_READY) ++ printk("HEY! Enet xmit interrupt and TX_READY.\n"); ++ ++ /* Deferred means some collisions occurred during transmit, ++ * but we eventually sent the packet OK. ++ */ ++ if (status & BD_ENET_TX_DEF) ++ fep->stats.collisions++; ++ ++ /* Free the sk buffer associated with this last transmit */ ++ dev_kfree_rtskb(skb); /* RTnet */ ++ fep->tx_skbuff[fep->skb_dirty] = NULL; ++ fep->skb_dirty = (fep->skb_dirty + 1) & TX_RING_MOD_MASK; ++ ++ /* Update pointer to next buffer descriptor to be transmitted */ ++ if (status & BD_ENET_TX_WRAP) ++ bdp = fep->tx_bd_base; ++ else ++ bdp++; ++ ++ /* Since we have freed up a buffer, the ring is no longer full ++ */ ++ if (fep->tx_full) { ++ fep->tx_full = 0; ++ if (rtnetif_queue_stopped(ndev)) ++ rtnetif_wake_queue(ndev); ++ } ++ } ++ fep->dirty_tx = bdp; ++ rtdm_lock_put(&fep->hw_lock); ++} ++ ++ ++/* During a receive, the cur_rx points to the current incoming buffer. ++ * When we update through the ring, if the next incoming buffer has ++ * not been given to the system, we just set the empty indicator, ++ * effectively tossing the packet. ++ */ ++static void ++fec_enet_rx(struct rtnet_device *ndev, int *packets, nanosecs_abs_t *time_stamp) ++{ ++ struct fec_enet_private *fep = rtnetdev_priv(ndev); ++ const struct platform_device_id *id_entry = ++ platform_get_device_id(fep->pdev); ++ struct bufdesc *bdp; ++ unsigned short status; ++ struct rtskb *skb; ++ ushort pkt_len; ++ __u8 *data; ++ ++#ifdef CONFIG_M532x ++ flush_cache_all(); ++#endif ++ rtdm_lock_get(&fep->hw_lock); ++ ++ /* First, grab all of the stats for the incoming packet. ++ * These get messed up if we get called due to a busy condition. ++ */ ++ bdp = fep->cur_rx; ++ ++ while (!((status = bdp->cbd_sc) & BD_ENET_RX_EMPTY)) { ++ ++ /* Since we have allocated space to hold a complete frame, ++ * the last indicator should be set. ++ */ ++ if ((status & BD_ENET_RX_LAST) == 0) ++ printk("FEC ENET: rcv is not +last\n"); ++ ++ if (!fep->opened) ++ goto rx_processing_done; ++ ++ /* Check for errors. */ ++ if (status & (BD_ENET_RX_LG | BD_ENET_RX_SH | BD_ENET_RX_NO | ++ BD_ENET_RX_CR | BD_ENET_RX_OV)) { ++ fep->stats.rx_errors++; ++ if (status & (BD_ENET_RX_LG | BD_ENET_RX_SH)) { ++ /* Frame too long or too short. */ ++ fep->stats.rx_length_errors++; ++ } ++ if (status & BD_ENET_RX_NO) /* Frame alignment */ ++ fep->stats.rx_frame_errors++; ++ if (status & BD_ENET_RX_CR) /* CRC Error */ ++ fep->stats.rx_crc_errors++; ++ if (status & BD_ENET_RX_OV) /* FIFO overrun */ ++ fep->stats.rx_fifo_errors++; ++ } ++ ++ /* Report late collisions as a frame error. ++ * On this error, the BD is closed, but we don't know what we ++ * have in the buffer. So, just drop this frame on the floor. ++ */ ++ if (status & BD_ENET_RX_CL) { ++ fep->stats.rx_errors++; ++ fep->stats.rx_frame_errors++; ++ goto rx_processing_done; ++ } ++ ++ /* Process the incoming frame. */ ++ fep->stats.rx_packets++; ++ pkt_len = bdp->cbd_datlen; ++ fep->stats.rx_bytes += pkt_len; ++ data = (__u8*)__va(bdp->cbd_bufaddr); ++ ++ dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr, ++ FEC_ENET_TX_FRSIZE, DMA_FROM_DEVICE); ++ ++ if (id_entry->driver_data & FEC_QUIRK_SWAP_FRAME) ++ swap_buffer(data, pkt_len); ++ ++ /* This does 16 byte alignment, exactly what we need. ++ * The packet length includes FCS, but we don't want to ++ * include that when passing upstream as it messes up ++ * bridging applications. ++ */ ++ skb = rtnetdev_alloc_rtskb(ndev, pkt_len - 4 + NET_IP_ALIGN); /* RTnet */ ++ ++ if (unlikely(!skb)) { ++ printk("%s: Memory squeeze, dropping packet.\n", ++ ndev->name); ++ fep->stats.rx_dropped++; ++ } else { ++ rtskb_reserve(skb, NET_IP_ALIGN); ++ rtskb_put(skb, pkt_len - 4); /* Make room */ ++ memcpy(skb->data, data, pkt_len - 4); ++ skb->protocol = rt_eth_type_trans(skb, ndev); ++ skb->time_stamp = *time_stamp; ++ rtnetif_rx(skb); ++ (*packets)++; /* RTnet */ ++ } ++ ++ bdp->cbd_bufaddr = dma_map_single(&fep->pdev->dev, data, ++ FEC_ENET_TX_FRSIZE, DMA_FROM_DEVICE); ++rx_processing_done: ++ /* Clear the status flags for this buffer */ ++ status &= ~BD_ENET_RX_STATS; ++ ++ /* Mark the buffer empty */ ++ status |= BD_ENET_RX_EMPTY; ++ bdp->cbd_sc = status; ++ ++ /* Update BD pointer to next entry */ ++ if (status & BD_ENET_RX_WRAP) ++ bdp = fep->rx_bd_base; ++ else ++ bdp++; ++ /* Doing this here will keep the FEC running while we process ++ * incoming frames. On a heavily loaded network, we should be ++ * able to keep up at the expense of system resources. ++ */ ++ writel(0, fep->hwp + FEC_R_DES_ACTIVE); ++ } ++ fep->cur_rx = bdp; ++ ++ rtdm_lock_put(&fep->hw_lock); ++} ++ ++static int ++fec_enet_interrupt(rtdm_irq_t *irq_handle) ++{ ++ struct rtnet_device *ndev = ++ rtdm_irq_get_arg(irq_handle, struct rtnet_device); /* RTnet */ ++ struct fec_enet_private *fep = rtnetdev_priv(ndev); ++ uint int_events; ++ irqreturn_t ret = RTDM_IRQ_NONE; ++ /* RTnet */ ++ nanosecs_abs_t time_stamp = rtdm_clock_read(); ++ int packets = 0; ++ ++ do { ++ int_events = readl(fep->hwp + FEC_IEVENT); ++ writel(int_events, fep->hwp + FEC_IEVENT); ++ ++ if (int_events & FEC_ENET_RXF) { ++ ret = RTDM_IRQ_HANDLED; ++ fec_enet_rx(ndev, &packets, &time_stamp); ++ } ++ ++ /* Transmit OK, or non-fatal error. Update the buffer ++ * descriptors. FEC handles all errors, we just discover ++ * them as part of the transmit process. ++ */ ++ if (int_events & FEC_ENET_TXF) { ++ ret = RTDM_IRQ_HANDLED; ++ fec_enet_tx(ndev); ++ } ++ ++ if (int_events & FEC_ENET_MII) { ++ ret = RTDM_IRQ_HANDLED; ++ rtdm_nrtsig_pend(&fep->mdio_done_sig); ++ } ++ } while (int_events); ++ ++ if (packets > 0) ++ rt_mark_stack_mgr(ndev); ++ ++ return ret; ++} ++ ++ ++ ++/* ------------------------------------------------------------------------- */ ++static void __inline__ fec_get_mac(struct rtnet_device *ndev) ++{ ++ struct fec_enet_private *fep = rtnetdev_priv(ndev); ++ struct fec_platform_data *pdata = fep->pdev->dev.platform_data; ++ unsigned char *iap, tmpaddr[ETH_ALEN]; ++ ++ /* ++ * try to get mac address in following order: ++ * ++ * 1) module parameter via kernel command line in form ++ * fec.macaddr=0x00,0x04,0x9f,0x01,0x30,0xe0 ++ */ ++ iap = macaddr; ++ ++#ifdef CONFIG_OF ++ /* ++ * 2) from device tree data ++ */ ++ if (!is_valid_ether_addr(iap)) { ++ struct device_node *np = fep->pdev->dev.of_node; ++ if (np) { ++ const char *mac = of_get_mac_address(np); ++ if (mac) ++ iap = (unsigned char *) mac; ++ } ++ } ++#endif ++ ++ /* ++ * 3) from flash or fuse (via platform data) ++ */ ++ if (!is_valid_ether_addr(iap)) { ++#ifdef CONFIG_M5272 ++ if (FEC_FLASHMAC) ++ iap = (unsigned char *)FEC_FLASHMAC; ++#else ++ if (pdata) ++ iap = (unsigned char *)&pdata->mac; ++#endif ++ } ++ ++ /* ++ * 4) FEC mac registers set by bootloader ++ */ ++ if (!is_valid_ether_addr(iap)) { ++ *((unsigned long *) &tmpaddr[0]) = ++ be32_to_cpu(readl(fep->hwp + FEC_ADDR_LOW)); ++ *((unsigned short *) &tmpaddr[4]) = ++ be16_to_cpu(readl(fep->hwp + FEC_ADDR_HIGH) >> 16); ++ iap = &tmpaddr[0]; ++ } ++ ++ memcpy(ndev->dev_addr, iap, ETH_ALEN); ++ ++ /* Adjust MAC if using macaddr */ ++ if (iap == macaddr) ++ ndev->dev_addr[ETH_ALEN-1] = macaddr[ETH_ALEN-1] + fep->dev_id; ++} ++ ++/* ------------------------------------------------------------------------- */ ++ ++/* ++ * Phy section ++ */ ++static void fec_enet_mdio_done(rtdm_nrtsig_t *nrt_sig, void* data) ++{ ++ struct fec_enet_private *fep = data; ++ ++ complete(&fep->mdio_done); ++} ++ ++static void fec_enet_adjust_link(struct net_device *netdev) ++{ ++ struct fec_enet_netdev_priv *npriv = netdev_priv(netdev); ++ struct rtnet_device *ndev = npriv->rtdev; ++ struct fec_enet_private *fep = rtnetdev_priv(ndev); ++ struct phy_device *phy_dev = fep->phy_dev; ++ unsigned long context; ++ ++ int status_change = 0; ++ ++ rtdm_lock_get_irqsave(&fep->hw_lock, context); ++ ++ /* Prevent a state halted on mii error */ ++ if (fep->mii_timeout && phy_dev->state == PHY_HALTED) { ++ phy_dev->state = PHY_RESUMING; ++ goto spin_unlock; ++ } ++ ++ /* Duplex link change */ ++ if (phy_dev->link) { ++ if (fep->full_duplex != phy_dev->duplex) { ++ fec_restart(ndev, phy_dev->duplex); ++ /* prevent unnecessary second fec_restart() below */ ++ fep->link = phy_dev->link; ++ status_change = 1; ++ } ++ } ++ ++ /* Link on or off change */ ++ if (phy_dev->link != fep->link) { ++ fep->link = phy_dev->link; ++ if (phy_dev->link) ++ fec_restart(ndev, phy_dev->duplex); ++ else ++ fec_stop(ndev); ++ status_change = 1; ++ } ++ ++spin_unlock: ++ rtdm_lock_put_irqrestore(&fep->hw_lock, context); ++ ++ if (status_change) ++ phy_print_status(phy_dev); ++} ++ ++static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum) ++{ ++ struct fec_enet_private *fep = bus->priv; ++ unsigned long time_left; ++ ++ fep->mii_timeout = 0; ++ init_completion(&fep->mdio_done); ++ ++ /* start a read op */ ++ writel(FEC_MMFR_ST | FEC_MMFR_OP_READ | ++ FEC_MMFR_PA(mii_id) | FEC_MMFR_RA(regnum) | ++ FEC_MMFR_TA, fep->hwp + FEC_MII_DATA); ++ ++ /* wait for end of transfer */ ++ time_left = wait_for_completion_timeout(&fep->mdio_done, ++ usecs_to_jiffies(FEC_MII_TIMEOUT)); ++ if (time_left == 0) { ++ fep->mii_timeout = 1; ++ printk(KERN_ERR "FEC: MDIO read timeout\n"); ++ return -ETIMEDOUT; ++ } ++ ++ /* return value */ ++ return FEC_MMFR_DATA(readl(fep->hwp + FEC_MII_DATA)); ++} ++ ++static int fec_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum, ++ u16 value) ++{ ++ struct fec_enet_private *fep = bus->priv; ++ unsigned long time_left; ++ ++ fep->mii_timeout = 0; ++ init_completion(&fep->mdio_done); ++ ++ /* start a write op */ ++ writel(FEC_MMFR_ST | FEC_MMFR_OP_WRITE | ++ FEC_MMFR_PA(mii_id) | FEC_MMFR_RA(regnum) | ++ FEC_MMFR_TA | FEC_MMFR_DATA(value), ++ fep->hwp + FEC_MII_DATA); ++ ++ /* wait for end of transfer */ ++ time_left = wait_for_completion_timeout(&fep->mdio_done, ++ usecs_to_jiffies(FEC_MII_TIMEOUT)); ++ if (time_left == 0) { ++ fep->mii_timeout = 1; ++ printk(KERN_ERR "FEC: MDIO write timeout\n"); ++ return -ETIMEDOUT; ++ } ++ ++ return 0; ++} ++ ++static int fec_enet_mdio_reset(struct mii_bus *bus) ++{ ++ return 0; ++} ++ ++static int fec_enet_mii_probe(struct rtnet_device *ndev) ++{ ++ struct fec_enet_private *fep = rtnetdev_priv(ndev); ++ const struct platform_device_id *id_entry = ++ platform_get_device_id(fep->pdev); ++ struct phy_device *phy_dev = NULL; ++ char mdio_bus_id[MII_BUS_ID_SIZE]; ++ char phy_name[MII_BUS_ID_SIZE + 3]; ++ int phy_id; ++ int dev_id = fep->dev_id; ++ ++ fep->phy_dev = NULL; ++ ++ /* check for attached phy */ ++ for (phy_id = 0; (phy_id < PHY_MAX_ADDR); phy_id++) { ++ if ((fep->mii_bus->phy_mask & (1 << phy_id))) ++ continue; ++ if (fep->mii_bus->phy_map[phy_id] == NULL) ++ continue; ++ if (fep->mii_bus->phy_map[phy_id]->phy_id == 0) ++ continue; ++ if (dev_id--) ++ continue; ++ strncpy(mdio_bus_id, fep->mii_bus->id, MII_BUS_ID_SIZE); ++ break; ++ } ++ ++ if (phy_id >= PHY_MAX_ADDR) { ++ printk(KERN_INFO ++ "%s: no PHY, assuming direct connection to switch\n", ++ ndev->name); ++ strncpy(mdio_bus_id, "fixed-0", MII_BUS_ID_SIZE); ++ phy_id = 0; ++ } ++ ++ snprintf(phy_name, sizeof(phy_name), PHY_ID_FMT, mdio_bus_id, phy_id); ++ /* attach the mac to the phy using the dummy linux netdev */ ++ phy_dev = phy_connect(fep->netdev, phy_name, &fec_enet_adjust_link, 0, ++ fep->phy_interface); ++ if (IS_ERR(phy_dev)) { ++ printk(KERN_ERR "%s: could not attach to PHY\n", ndev->name); ++ return PTR_ERR(phy_dev); ++ } ++ ++ /* mask with MAC supported features */ ++ if (id_entry->driver_data & FEC_QUIRK_HAS_GBIT) ++ phy_dev->supported &= PHY_GBIT_FEATURES; ++ else ++ phy_dev->supported &= PHY_BASIC_FEATURES; ++ ++ phy_dev->advertising = phy_dev->supported; ++ ++ fep->phy_dev = phy_dev; ++ fep->link = 0; ++ fep->full_duplex = 0; ++ ++ printk(KERN_INFO ++ "%s: Freescale FEC PHY driver [%s] (mii_bus:phy_addr=%s, irq=%d)\n", ++ ndev->name, ++ fep->phy_dev->drv->name, dev_name(&fep->phy_dev->dev), ++ fep->phy_dev->irq); ++ ++ return 0; ++} ++ ++static int fec_enet_mii_init(struct platform_device *pdev) ++{ ++ static struct mii_bus *fec0_mii_bus; ++ struct rtnet_device *ndev = platform_get_drvdata(pdev); ++ struct fec_enet_private *fep = rtnetdev_priv(ndev); ++ const struct platform_device_id *id_entry = ++ platform_get_device_id(fep->pdev); ++ int err = -ENXIO, i; ++ ++ /* ++ * The dual fec interfaces are not equivalent with enet-mac. ++ * Here are the differences: ++ * ++ * - fec0 supports MII & RMII modes while fec1 only supports RMII ++ * - fec0 acts as the 1588 time master while fec1 is slave ++ * - external phys can only be configured by fec0 ++ * ++ * That is to say fec1 can not work independently. It only works ++ * when fec0 is working. The reason behind this design is that the ++ * second interface is added primarily for Switch mode. ++ * ++ * Because of the last point above, both phys are attached on fec0 ++ * mdio interface in board design, and need to be configured by ++ * fec0 mii_bus. ++ */ ++ if ((id_entry->driver_data & FEC_QUIRK_ENET_MAC) && fep->dev_id > 0) { ++ /* fec1 uses fec0 mii_bus */ ++ if (mii_cnt && fec0_mii_bus) { ++ fep->mii_bus = fec0_mii_bus; ++ mii_cnt++; ++ return 0; ++ } ++ return -ENOENT; ++ } ++ ++ fep->mii_timeout = 0; ++ ++ /* ++ * Set MII speed to 2.5 MHz (= clk_get_rate() / 2 * phy_speed) ++ * ++ * The formula for FEC MDC is 'ref_freq / (MII_SPEED x 2)' while ++ * for ENET-MAC is 'ref_freq / ((MII_SPEED + 1) x 2)'. The i.MX28 ++ * Reference Manual has an error on this, and gets fixed on i.MX6Q ++ * document. ++ */ ++ fep->phy_speed = DIV_ROUND_UP(clk_get_rate(fep->clk_ahb), 5000000); ++ if (id_entry->driver_data & FEC_QUIRK_ENET_MAC) ++ fep->phy_speed--; ++ fep->phy_speed <<= 1; ++ writel(fep->phy_speed, fep->hwp + FEC_MII_SPEED); ++ ++ fep->mii_bus = mdiobus_alloc(); ++ if (fep->mii_bus == NULL) { ++ err = -ENOMEM; ++ goto err_out; ++ } ++ ++ fep->mii_bus->name = "fec_enet_mii_bus"; ++ fep->mii_bus->read = fec_enet_mdio_read; ++ fep->mii_bus->write = fec_enet_mdio_write; ++ fep->mii_bus->reset = fec_enet_mdio_reset; ++ snprintf(fep->mii_bus->id, MII_BUS_ID_SIZE, "%s-%x", ++ pdev->name, fep->dev_id + 1); ++ fep->mii_bus->priv = fep; ++ fep->mii_bus->parent = &pdev->dev; ++ ++ fep->mii_bus->irq = kmalloc(sizeof(int) * PHY_MAX_ADDR, GFP_KERNEL); ++ if (!fep->mii_bus->irq) { ++ err = -ENOMEM; ++ goto err_out_free_mdiobus; ++ } ++ ++ for (i = 0; i < PHY_MAX_ADDR; i++) ++ fep->mii_bus->irq[i] = PHY_POLL; ++ ++ rtdm_nrtsig_init(&fep->mdio_done_sig, fec_enet_mdio_done, fep); ++ ++ if (mdiobus_register(fep->mii_bus)) ++ goto err_out_destroy_nrt; ++ ++ mii_cnt++; ++ ++ /* save fec0 mii_bus */ ++ if (id_entry->driver_data & FEC_QUIRK_ENET_MAC) ++ fec0_mii_bus = fep->mii_bus; ++ ++ return 0; ++ ++err_out_destroy_nrt: ++ rtdm_nrtsig_destroy(&fep->mdio_done_sig); ++ kfree(fep->mii_bus->irq); ++err_out_free_mdiobus: ++ mdiobus_free(fep->mii_bus); ++err_out: ++ return err; ++} ++ ++static void fec_enet_mii_remove(struct fec_enet_private *fep) ++{ ++ if (--mii_cnt == 0) { ++ mdiobus_unregister(fep->mii_bus); ++ kfree(fep->mii_bus->irq); ++ mdiobus_free(fep->mii_bus); ++ } ++ rtdm_nrtsig_destroy(&fep->mdio_done_sig); ++} ++ ++static int ++fec_enet_ioctl(struct rtnet_device *ndev, unsigned int request, void *arg) ++{ ++ struct fec_enet_private *fep = rtnetdev_priv(ndev); ++ struct phy_device *phydev = fep->phy_dev; ++ struct ifreq *ifr = arg; ++ struct ethtool_value *value; ++ struct ethtool_cmd cmd; ++ int err = 0; ++ ++ if (!rtnetif_running(ndev)) ++ return -EINVAL; ++ ++ if (!phydev) ++ return -ENODEV; ++ ++ switch (request) { ++ case SIOCETHTOOL: ++ value = (struct ethtool_value *)ifr->ifr_data; ++ switch (value->cmd) { ++ case ETHTOOL_GLINK: ++ value->data = fep->link; ++ if (copy_to_user(&value->data, &fep->link, ++ sizeof(value->data))) ++ err = -EFAULT; ++ break; ++ case ETHTOOL_GSET: ++ memset(&cmd, 0, sizeof(cmd)); ++ cmd.cmd = ETHTOOL_GSET; ++ err = phy_ethtool_gset(phydev, &cmd); ++ if (err) ++ break; ++ if (copy_to_user(ifr->ifr_data, &cmd, sizeof(cmd))) ++ err = -EFAULT; ++ break; ++ case ETHTOOL_SSET: ++ if (copy_from_user(&cmd, ifr->ifr_data, sizeof(cmd))) ++ err = -EFAULT; ++ else ++ err = phy_ethtool_sset(phydev, &cmd); ++ break; ++ } ++ break; ++ default: ++ err = -EOPNOTSUPP; ++ break; ++ } ++ ++ return err; ++} ++ ++static void fec_enet_free_buffers(struct rtnet_device *ndev) ++{ ++ struct fec_enet_private *fep = rtnetdev_priv(ndev); ++ int i; ++ struct rtskb *skb; ++ struct bufdesc *bdp; ++ ++ bdp = fep->rx_bd_base; ++ for (i = 0; i < RX_RING_SIZE; i++) { ++ skb = fep->rx_skbuff[i]; ++ ++ if (bdp->cbd_bufaddr) ++ dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr, ++ FEC_ENET_RX_FRSIZE, DMA_FROM_DEVICE); ++ if (skb) ++ dev_kfree_rtskb(skb); /* RTnet */ ++ bdp++; ++ } ++ ++ bdp = fep->tx_bd_base; ++ for (i = 0; i < TX_RING_SIZE; i++) ++ kfree(fep->tx_bounce[i]); ++} ++ ++static int fec_enet_alloc_buffers(struct rtnet_device *ndev) ++{ ++ struct fec_enet_private *fep = rtnetdev_priv(ndev); ++ int i; ++ struct rtskb *skb; ++ struct bufdesc *bdp; ++ ++ bdp = fep->rx_bd_base; ++ for (i = 0; i < RX_RING_SIZE; i++) { ++ skb = rtnetdev_alloc_rtskb(netdev, FEC_ENET_RX_FRSIZE); /* RTnet */ ++ if (!skb) { ++ fec_enet_free_buffers(ndev); ++ return -ENOMEM; ++ } ++ fep->rx_skbuff[i] = skb; ++ ++ bdp->cbd_bufaddr = dma_map_single(&fep->pdev->dev, skb->data, ++ FEC_ENET_RX_FRSIZE, DMA_FROM_DEVICE); ++ bdp->cbd_sc = BD_ENET_RX_EMPTY; ++ bdp++; ++ } ++ ++ /* Set the last buffer to wrap. */ ++ bdp--; ++ bdp->cbd_sc |= BD_SC_WRAP; ++ ++ bdp = fep->tx_bd_base; ++ for (i = 0; i < TX_RING_SIZE; i++) { ++ fep->tx_bounce[i] = kmalloc(FEC_ENET_TX_FRSIZE, GFP_KERNEL); ++ ++ bdp->cbd_sc = 0; ++ bdp->cbd_bufaddr = 0; ++ bdp++; ++ } ++ ++ /* Set the last buffer to wrap. */ ++ bdp--; ++ bdp->cbd_sc |= BD_SC_WRAP; ++ ++ return 0; ++} ++ ++static int ++fec_enet_open(struct rtnet_device *ndev) ++{ ++ struct fec_enet_private *fep = rtnetdev_priv(ndev); ++ int ret; ++ ++ /* I should reset the ring buffers here, but I don't yet know ++ * a simple way to do that. ++ */ ++ ++ ret = fec_enet_alloc_buffers(ndev); ++ if (ret) ++ return ret; ++ ++ /* RTnet */ ++ rt_stack_connect(ndev, &STACK_manager); ++ ++ /* Probe and connect to PHY when open the interface */ ++ ret = fec_enet_mii_probe(ndev); ++ if (ret) { ++ fec_enet_free_buffers(ndev); ++ return ret; ++ } ++ phy_start(fep->phy_dev); ++ rtnetif_carrier_on(ndev); ++ rtnetif_start_queue(ndev); ++ fep->opened = 1; ++ return 0; ++} ++ ++static int ++fec_enet_close(struct rtnet_device *ndev) ++{ ++ struct fec_enet_private *fep = rtnetdev_priv(ndev); ++ ++ /* Don't know what to do yet. */ ++ fep->opened = 0; ++ rtnetif_stop_queue(ndev); ++ fec_stop(ndev); ++ ++ if (fep->phy_dev) { ++ phy_stop(fep->phy_dev); ++ phy_disconnect(fep->phy_dev); ++ } ++ ++ fec_enet_free_buffers(ndev); ++ ++ /* RTnet */ ++ rt_stack_disconnect(ndev); ++ ++ return 0; ++} ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_MULTICAST ++/* Set or clear the multicast filter for this adaptor. ++ * Skeleton taken from sunlance driver. ++ * The CPM Ethernet implementation allows Multicast as well as individual ++ * MAC address filtering. Some of the drivers check to make sure it is ++ * a group multicast address, and discard those that are not. I guess I ++ * will do the same for now, but just remove the test if you want ++ * individual filtering as well (do the upper net layers want or support ++ * this kind of feature?). ++ */ ++ ++#define HASH_BITS 6 /* #bits in hash */ ++#define CRC32_POLY 0xEDB88320 ++ ++static void set_multicast_list(struct rtnet_device *ndev) ++{ ++ struct fec_enet_private *fep = rtnetdev_priv(ndev); ++ struct netdev_hw_addr *ha; ++ unsigned int i, bit, data, crc, tmp; ++ unsigned char hash; ++ ++ if (ndev->flags & IFF_PROMISC) { ++ tmp = readl(fep->hwp + FEC_R_CNTRL); ++ tmp |= 0x8; ++ writel(tmp, fep->hwp + FEC_R_CNTRL); ++ return; ++ } ++ ++ tmp = readl(fep->hwp + FEC_R_CNTRL); ++ tmp &= ~0x8; ++ writel(tmp, fep->hwp + FEC_R_CNTRL); ++ ++ if (ndev->flags & IFF_ALLMULTI) { ++ /* Catch all multicast addresses, so set the ++ * filter to all 1's ++ */ ++ writel(0xffffffff, fep->hwp + FEC_GRP_HASH_TABLE_HIGH); ++ writel(0xffffffff, fep->hwp + FEC_GRP_HASH_TABLE_LOW); ++ ++ return; ++ } ++ ++ /* Clear filter and add the addresses in hash register ++ */ ++ writel(0, fep->hwp + FEC_GRP_HASH_TABLE_HIGH); ++ writel(0, fep->hwp + FEC_GRP_HASH_TABLE_LOW); ++ ++ rtnetdev_for_each_mc_addr(ha, ndev) { ++ /* calculate crc32 value of mac address */ ++ crc = 0xffffffff; ++ ++ for (i = 0; i < ndev->addr_len; i++) { ++ data = ha->addr[i]; ++ for (bit = 0; bit < 8; bit++, data >>= 1) { ++ crc = (crc >> 1) ^ ++ (((crc ^ data) & 1) ? CRC32_POLY : 0); ++ } ++ } ++ ++ /* only upper 6 bits (HASH_BITS) are used ++ * which point to specific bit in he hash registers ++ */ ++ hash = (crc >> (32 - HASH_BITS)) & 0x3f; ++ ++ if (hash > 31) { ++ tmp = readl(fep->hwp + FEC_GRP_HASH_TABLE_HIGH); ++ tmp |= 1 << (hash - 32); ++ writel(tmp, fep->hwp + FEC_GRP_HASH_TABLE_HIGH); ++ } else { ++ tmp = readl(fep->hwp + FEC_GRP_HASH_TABLE_LOW); ++ tmp |= 1 << hash; ++ writel(tmp, fep->hwp + FEC_GRP_HASH_TABLE_LOW); ++ } ++ } ++} ++#endif /* CONFIG_XENO_DRIVERS_NET_MULTICAST */ ++ ++#ifdef ORIGINAL_CODE ++/* Set a MAC change in hardware. */ ++static int ++fec_set_mac_address(struct rtnet_device *ndev, void *p) ++{ ++ struct fec_enet_private *fep = rtnetdev_priv(ndev); ++ struct sockaddr *addr = p; ++ ++ if (!is_valid_ether_addr(addr->sa_data)) ++ return -EADDRNOTAVAIL; ++ ++ memcpy(ndev->dev_addr, addr->sa_data, ndev->addr_len); ++ ++ writel(ndev->dev_addr[3] | (ndev->dev_addr[2] << 8) | ++ (ndev->dev_addr[1] << 16) | (ndev->dev_addr[0] << 24), ++ fep->hwp + FEC_ADDR_LOW); ++ writel((ndev->dev_addr[5] << 16) | (ndev->dev_addr[4] << 24), ++ fep->hwp + FEC_ADDR_HIGH); ++ return 0; ++} ++ ++#ifdef CONFIG_NET_POLL_CONTROLLER ++/* ++ * fec_poll_controller: FEC Poll controller function ++ * @dev: The FEC network adapter ++ * ++ * Polled functionality used by netconsole and others in non interrupt mode ++ * ++ */ ++void fec_poll_controller(struct rtnet_device *dev) ++{ ++ int i; ++ struct fec_enet_private *fep = rtnetdev_priv(dev); ++ ++ for (i = 0; i < FEC_IRQ_NUM; i++) { ++ if (fep->irq[i] > 0) { ++ disable_irq(fep->irq[i]); ++ fec_enet_interrupt(fep->irq[i], dev); ++ enable_irq(fep->irq[i]); ++ } ++ } ++} ++#endif /* ORIGINAL_CODE */ ++ ++static const struct rtnet_device_ops fec_netdev_ops = { ++ .ndo_open = fec_enet_open, ++ .ndo_stop = fec_enet_close, ++ .ndo_start_xmit = fec_enet_start_xmit, ++ .ndo_set_rx_mode = set_multicast_list, ++ .ndo_change_mtu = eth_change_mtu, ++ .ndo_validate_addr = eth_validate_addr, ++ .ndo_tx_timeout = fec_timeout, ++ .ndo_set_mac_address = fec_set_mac_address, ++#ifdef CONFIG_NET_POLL_CONTROLLER ++ .ndo_poll_controller = fec_poll_controller, ++#endif ++}; ++#endif ++ ++/* RTnet: get statistics */ ++static struct net_device_stats *fec_get_stats(struct rtnet_device *ndev) ++{ ++ struct fec_enet_private *fep = rtnetdev_priv(ndev); ++ return &fep->stats; ++} ++ ++ /* ++ * XXX: We need to clean up on failure exits here. ++ * ++ */ ++static int fec_enet_init(struct rtnet_device *ndev) ++{ ++ struct fec_enet_private *fep = rtnetdev_priv(ndev); ++ struct bufdesc *cbd_base; ++ struct bufdesc *bdp; ++ int i; ++ ++ /* Allocate memory for buffer descriptors. */ ++ cbd_base = dma_alloc_coherent(NULL, PAGE_SIZE, &fep->bd_dma, ++ GFP_KERNEL); ++ if (!cbd_base) { ++ printk("FEC: allocate descriptor memory failed?\n"); ++ return -ENOMEM; ++ } ++ ++ rtdm_lock_init(&fep->hw_lock); ++ ++ /* Get the Ethernet address */ ++ fec_get_mac(ndev); ++ ++ /* Set receive and transmit descriptor base. */ ++ fep->rx_bd_base = cbd_base; ++ fep->tx_bd_base = cbd_base + RX_RING_SIZE; ++ ++ /* RTnet: specific entries in the device structure */ ++ ndev->open = fec_enet_open; ++ ndev->stop = fec_enet_close; ++ ndev->hard_start_xmit = fec_enet_start_xmit; ++ ndev->get_stats = fec_get_stats; ++ ndev->do_ioctl = fec_enet_ioctl; ++#ifdef CONFIG_XENO_DRIVERS_NET_MULTICAST ++ ndev->set_multicast_list = &set_multicast_list; ++#endif ++ ++ /* Initialize the receive buffer descriptors. */ ++ bdp = fep->rx_bd_base; ++ for (i = 0; i < RX_RING_SIZE; i++) { ++ ++ /* Initialize the BD for every fragment in the page. */ ++ bdp->cbd_sc = 0; ++ bdp++; ++ } ++ ++ /* Set the last buffer to wrap */ ++ bdp--; ++ bdp->cbd_sc |= BD_SC_WRAP; ++ ++ /* ...and the same for transmit */ ++ bdp = fep->tx_bd_base; ++ for (i = 0; i < TX_RING_SIZE; i++) { ++ ++ /* Initialize the BD for every fragment in the page. */ ++ bdp->cbd_sc = 0; ++ bdp->cbd_bufaddr = 0; ++ bdp++; ++ } ++ ++ /* Set the last buffer to wrap */ ++ bdp--; ++ bdp->cbd_sc |= BD_SC_WRAP; ++ ++ fec_restart(ndev, 0); ++ ++ return 0; ++} ++ ++#ifdef CONFIG_OF ++static int fec_get_phy_mode_dt(struct platform_device *pdev) ++{ ++ struct device_node *np = pdev->dev.of_node; ++ ++ if (np) ++ return of_get_phy_mode(np); ++ ++ return -ENODEV; ++} ++ ++static void fec_reset_phy(struct platform_device *pdev) ++{ ++ int err, phy_reset; ++ struct device_node *np = pdev->dev.of_node; ++ ++ if (!np) ++ return; ++ ++ phy_reset = of_get_named_gpio(np, "phy-reset-gpios", 0); ++ err = gpio_request_one(phy_reset, GPIOF_OUT_INIT_LOW, "phy-reset"); ++ if (err) { ++ pr_debug("FEC: failed to get gpio phy-reset: %d\n", err); ++ return; ++ } ++ msleep(1); ++ gpio_set_value(phy_reset, 1); ++} ++#else /* CONFIG_OF */ ++static inline int fec_get_phy_mode_dt(struct platform_device *pdev) ++{ ++ return -ENODEV; ++} ++ ++static inline void fec_reset_phy(struct platform_device *pdev) ++{ ++ /* ++ * In case of platform probe, the reset has been done ++ * by machine code. ++ */ ++} ++#endif /* CONFIG_OF */ ++ ++static int fec_probe(struct platform_device *pdev) ++{ ++ struct fec_enet_netdev_priv *npriv; ++ struct fec_enet_private *fep; ++ struct fec_platform_data *pdata; ++ struct rtnet_device *ndev; ++ int i, irq, ret = 0; ++ struct resource *r; ++ const struct of_device_id *of_id; ++ static int dev_id; ++ struct pinctrl *pinctrl; ++ ++ of_id = of_match_device(fec_dt_ids, &pdev->dev); ++ if (of_id) ++ pdev->id_entry = of_id->data; ++ ++ r = platform_get_resource(pdev, IORESOURCE_MEM, 0); ++ if (!r) ++ return -ENXIO; ++ ++ r = request_mem_region(r->start, resource_size(r), pdev->name); ++ if (!r) ++ return -EBUSY; ++ ++ /* Init network device */ ++ ndev = rt_alloc_etherdev(sizeof(struct fec_enet_private), ++ rx_pool_size + TX_RING_SIZE); ++ if (!ndev) { ++ ret = -ENOMEM; ++ goto failed_alloc_etherdev; ++ } ++ ++ /* RTnet */ ++ rtdev_alloc_name(ndev, "rteth%d"); ++ rt_rtdev_connect(ndev, &RTDEV_manager); ++ ndev->vers = RTDEV_VERS_2_0; ++ ndev->sysbind = &pdev->dev; ++ ++ /* setup board info structure */ ++ fep = rtnetdev_priv(ndev); ++ memset(fep, 0, sizeof(*fep)); ++ ++ /* RTnet: allocate dummy linux netdev structure for phy handling */ ++ fep->netdev = alloc_etherdev(sizeof(struct fec_enet_netdev_priv)); ++ if (!fep->netdev) ++ goto failed_alloc_netdev; ++ SET_NETDEV_DEV(fep->netdev, &pdev->dev); ++ npriv = netdev_priv(fep->netdev); ++ npriv->rtdev = ndev; ++ ++ fep->hwp = ioremap(r->start, resource_size(r)); ++ fep->pdev = pdev; ++ fep->dev_id = dev_id++; ++ ++ if (!fep->hwp) { ++ ret = -ENOMEM; ++ goto failed_ioremap; ++ } ++ ++ platform_set_drvdata(pdev, ndev); ++ ++ ret = fec_get_phy_mode_dt(pdev); ++ if (ret < 0) { ++ pdata = pdev->dev.platform_data; ++ if (pdata) ++ fep->phy_interface = pdata->phy; ++ else ++ fep->phy_interface = PHY_INTERFACE_MODE_MII; ++ } else { ++ fep->phy_interface = ret; ++ } ++ ++ fec_reset_phy(pdev); ++ ++ for (i = 0; i < FEC_IRQ_NUM; i++) { ++ irq = platform_get_irq(pdev, i); ++ if (irq < 0) { ++ if (i) ++ break; ++ ret = irq; ++ goto failed_irq; ++ } ++ ret = rtdm_irq_request(&fep->irq_handle[i], irq, ++ fec_enet_interrupt, 0, ndev->name, ndev); ++ if (ret) { ++ while (--i >= 0) { ++ irq = platform_get_irq(pdev, i); ++ rtdm_irq_free(&fep->irq_handle[i]); ++ } ++ goto failed_irq; ++ } ++ } ++ ++ pinctrl = devm_pinctrl_get_select_default(&pdev->dev); ++ if (IS_ERR(pinctrl)) { ++ ret = PTR_ERR(pinctrl); ++ goto failed_pin; ++ } ++ ++ fep->clk_ipg = devm_clk_get(&pdev->dev, "ipg"); ++ if (IS_ERR(fep->clk_ipg)) { ++ ret = PTR_ERR(fep->clk_ipg); ++ goto failed_clk; ++ } ++ ++ fep->clk_ahb = devm_clk_get(&pdev->dev, "ahb"); ++ if (IS_ERR(fep->clk_ahb)) { ++ ret = PTR_ERR(fep->clk_ahb); ++ goto failed_clk; ++ } ++ ++ clk_prepare_enable(fep->clk_ahb); ++ clk_prepare_enable(fep->clk_ipg); ++ ++ ret = fec_enet_init(ndev); ++ if (ret) ++ goto failed_init; ++ ++ ret = fec_enet_mii_init(pdev); ++ if (ret) ++ goto failed_mii_init; ++ ++ /* Carrier starts down, phylib will bring it up */ ++ rtnetif_carrier_off(ndev); ++ ++ /* RTnet: register the network interface */ ++ ret = rt_register_rtnetdev(ndev); ++ if (ret) ++ goto failed_register; ++ ++ return 0; ++ ++failed_register: ++ fec_enet_mii_remove(fep); ++failed_mii_init: ++failed_init: ++ clk_disable_unprepare(fep->clk_ahb); ++ clk_disable_unprepare(fep->clk_ipg); ++failed_pin: ++failed_clk: ++ for (i = 0; i < FEC_IRQ_NUM; i++) { ++ irq = platform_get_irq(pdev, i); ++ if (irq > 0) ++ rtdm_irq_free(&fep->irq_handle[i]); ++ } ++failed_irq: ++ iounmap(fep->hwp); ++failed_ioremap: ++ free_netdev(fep->netdev); ++failed_alloc_netdev: ++ rtdev_free(ndev); /* RTnet */ ++failed_alloc_etherdev: ++ release_mem_region(r->start, resource_size(r)); ++ ++ return ret; ++} ++ ++static int fec_drv_remove(struct platform_device *pdev) ++{ ++ struct rtnet_device *ndev = platform_get_drvdata(pdev); ++ struct fec_enet_private *fep = rtnetdev_priv(ndev); ++ struct resource *r; ++ int i; ++ ++ /* RTnet */ ++ rt_unregister_rtnetdev(ndev); ++ rt_rtdev_disconnect(ndev); ++ ++ fec_enet_mii_remove(fep); ++ for (i = 0; i < FEC_IRQ_NUM; i++) { ++ int irq = platform_get_irq(pdev, i); ++ if (irq > 0) ++ rtdm_irq_free(&fep->irq_handle[i]); ++ } ++ ++ clk_disable_unprepare(fep->clk_ahb); ++ clk_disable_unprepare(fep->clk_ipg); ++ iounmap(fep->hwp); ++ ++ /* RTnet */ ++ free_netdev(fep->netdev); ++ rtdev_free(ndev); ++ ++ r = platform_get_resource(pdev, IORESOURCE_MEM, 0); ++ BUG_ON(!r); ++ release_mem_region(r->start, resource_size(r)); ++ ++ platform_set_drvdata(pdev, NULL); ++ ++ return 0; ++} ++ ++#ifdef CONFIG_PM ++static int ++fec_suspend(struct device *dev) ++{ ++ struct rtnet_device *ndev = dev_get_drvdata(dev); ++ struct fec_enet_private *fep = rtnetdev_priv(ndev); ++ ++ if (rtnetif_running(ndev)) { ++ fec_stop(ndev); ++ rtnetif_device_detach(ndev); ++ } ++ clk_disable_unprepare(fep->clk_ahb); ++ clk_disable_unprepare(fep->clk_ipg); ++ return 0; ++} ++ ++static int ++fec_resume(struct device *dev) ++{ ++ struct rtnet_device *ndev = dev_get_drvdata(dev); ++ struct fec_enet_private *fep = rtnetdev_priv(ndev); ++ ++ clk_prepare_enable(fep->clk_ahb); ++ clk_prepare_enable(fep->clk_ipg); ++ if (rtnetif_running(ndev)) { ++ fec_restart(ndev, fep->full_duplex); ++ rtnetif_device_attach(ndev); ++ } ++ ++ return 0; ++} ++ ++static const struct dev_pm_ops fec_pm_ops = { ++ .suspend = fec_suspend, ++ .resume = fec_resume, ++ .freeze = fec_suspend, ++ .thaw = fec_resume, ++ .poweroff = fec_suspend, ++ .restore = fec_resume, ++}; ++#endif ++ ++static struct platform_driver fec_driver = { ++ .driver = { ++ .name = DRIVER_NAME, ++ .owner = THIS_MODULE, ++#ifdef CONFIG_PM ++ .pm = &fec_pm_ops, ++#endif ++ .of_match_table = fec_dt_ids, ++ }, ++ .id_table = fec_devtype, ++ .probe = fec_probe, ++ .remove = fec_drv_remove, ++}; ++ ++module_platform_driver(fec_driver); +--- linux/drivers/xenomai/net/drivers/at91_ether.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/at91_ether.c 2021-04-07 16:01:27.248634136 +0800 +@@ -0,0 +1,453 @@ ++/* ++ * Ethernet driver for the Atmel AT91RM9200 (Thunder) ++ * ++ * Copyright (C) 2003 SAN People (Pty) Ltd ++ * ++ * Based on an earlier Atmel EMAC macrocell driver by Atmel and Lineo Inc. ++ * Initial version by Rick Bronson 01/11/2003 ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version ++ * 2 of the License, or (at your option) any later version. ++ * ++ * RTnet port: ++ * Copyright (C) 2014 Gilles Chanteperdrix ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include "rt_macb.h" ++ ++/* 1518 rounded up */ ++#define MAX_RBUFF_SZ 0x600 ++/* max number of receive buffers */ ++#define MAX_RX_DESCR 9 ++ ++/* Initialize and start the Receiver and Transmit subsystems */ ++static int at91ether_start(struct rtnet_device *dev) ++{ ++ struct macb *lp = rtnetdev_priv(dev); ++ dma_addr_t addr; ++ u32 ctl; ++ int i; ++ ++ lp->rx_ring = dma_alloc_coherent(&lp->pdev->dev, ++ (MAX_RX_DESCR * ++ sizeof(struct macb_dma_desc)), ++ &lp->rx_ring_dma, GFP_KERNEL); ++ if (!lp->rx_ring) ++ return -ENOMEM; ++ ++ lp->rx_buffers = dma_alloc_coherent(&lp->pdev->dev, ++ MAX_RX_DESCR * MAX_RBUFF_SZ, ++ &lp->rx_buffers_dma, GFP_KERNEL); ++ if (!lp->rx_buffers) { ++ dma_free_coherent(&lp->pdev->dev, ++ MAX_RX_DESCR * sizeof(struct macb_dma_desc), ++ lp->rx_ring, lp->rx_ring_dma); ++ lp->rx_ring = NULL; ++ return -ENOMEM; ++ } ++ ++ addr = lp->rx_buffers_dma; ++ for (i = 0; i < MAX_RX_DESCR; i++) { ++ lp->rx_ring[i].addr = addr; ++ lp->rx_ring[i].ctrl = 0; ++ addr += MAX_RBUFF_SZ; ++ } ++ ++ /* Set the Wrap bit on the last descriptor */ ++ lp->rx_ring[MAX_RX_DESCR - 1].addr |= MACB_BIT(RX_WRAP); ++ ++ /* Reset buffer index */ ++ lp->rx_tail = 0; ++ ++ /* Program address of descriptor list in Rx Buffer Queue register */ ++ macb_writel(lp, RBQP, lp->rx_ring_dma); ++ ++ /* Enable Receive and Transmit */ ++ ctl = macb_readl(lp, NCR); ++ macb_writel(lp, NCR, ctl | MACB_BIT(RE) | MACB_BIT(TE)); ++ ++ return 0; ++} ++ ++/* Open the ethernet interface */ ++static int at91ether_open(struct rtnet_device *dev) ++{ ++ struct macb *lp = rtnetdev_priv(dev); ++ u32 ctl; ++ int ret; ++ ++ rt_stack_connect(dev, &STACK_manager); ++ ++ /* Clear internal statistics */ ++ ctl = macb_readl(lp, NCR); ++ macb_writel(lp, NCR, ctl | MACB_BIT(CLRSTAT)); ++ ++ rtmacb_set_hwaddr(lp); ++ ++ ret = at91ether_start(dev); ++ if (ret) ++ return ret; ++ ++ /* Enable MAC interrupts */ ++ macb_writel(lp, IER, MACB_BIT(RCOMP) | ++ MACB_BIT(RXUBR) | ++ MACB_BIT(ISR_TUND) | ++ MACB_BIT(ISR_RLE) | ++ MACB_BIT(TCOMP) | ++ MACB_BIT(ISR_ROVR) | ++ MACB_BIT(HRESP)); ++ ++ /* schedule a link state check */ ++ phy_start(lp->phy_dev); ++ ++ rtnetif_start_queue(dev); ++ ++ return 0; ++} ++ ++/* Close the interface */ ++static int at91ether_close(struct rtnet_device *dev) ++{ ++ struct macb *lp = rtnetdev_priv(dev); ++ u32 ctl; ++ ++ /* Disable Receiver and Transmitter */ ++ ctl = macb_readl(lp, NCR); ++ macb_writel(lp, NCR, ctl & ~(MACB_BIT(TE) | MACB_BIT(RE))); ++ ++ /* Disable MAC interrupts */ ++ macb_writel(lp, IDR, MACB_BIT(RCOMP) | ++ MACB_BIT(RXUBR) | ++ MACB_BIT(ISR_TUND) | ++ MACB_BIT(ISR_RLE) | ++ MACB_BIT(TCOMP) | ++ MACB_BIT(ISR_ROVR) | ++ MACB_BIT(HRESP)); ++ ++ rtnetif_stop_queue(dev); ++ ++ dma_free_coherent(&lp->pdev->dev, ++ MAX_RX_DESCR * sizeof(struct macb_dma_desc), ++ lp->rx_ring, lp->rx_ring_dma); ++ lp->rx_ring = NULL; ++ ++ dma_free_coherent(&lp->pdev->dev, ++ MAX_RX_DESCR * MAX_RBUFF_SZ, ++ lp->rx_buffers, lp->rx_buffers_dma); ++ lp->rx_buffers = NULL; ++ ++ rt_stack_disconnect(dev); ++ ++ return 0; ++} ++ ++/* Transmit packet */ ++static int at91ether_start_xmit(struct rtskb *skb, struct rtnet_device *dev) ++{ ++ struct macb *lp = rtnetdev_priv(dev); ++ ++ if (macb_readl(lp, TSR) & MACB_BIT(RM9200_BNQ)) { ++ rtnetif_stop_queue(dev); ++ ++ /* Store packet information (to free when Tx completed) */ ++ lp->skb = skb; ++ lp->skb_length = skb->len; ++ lp->skb_physaddr = dma_map_single(NULL, skb->data, skb->len, ++ DMA_TO_DEVICE); ++ ++ /* Set address of the data in the Transmit Address register */ ++ macb_writel(lp, TAR, lp->skb_physaddr); ++ /* Set length of the packet in the Transmit Control register */ ++ macb_writel(lp, TCR, skb->len); ++ ++ } else { ++ rtdev_err(dev, "%s called, but device is busy!\n", __func__); ++ return RTDEV_TX_BUSY; ++ } ++ ++ return RTDEV_TX_OK; ++} ++ ++/* Extract received frame from buffer descriptors and sent to upper layers. ++ * (Called from interrupt context) ++ */ ++static bool at91ether_rx(struct rtnet_device *dev, nanosecs_abs_t *time_stamp) ++{ ++ struct macb *lp = rtnetdev_priv(dev); ++ unsigned char *p_recv; ++ struct rtskb *skb; ++ unsigned int pktlen; ++ bool ret = false; ++ ++ while (lp->rx_ring[lp->rx_tail].addr & MACB_BIT(RX_USED)) { ++ p_recv = lp->rx_buffers + lp->rx_tail * MAX_RBUFF_SZ; ++ pktlen = MACB_BF(RX_FRMLEN, lp->rx_ring[lp->rx_tail].ctrl); ++ skb = rtnetdev_alloc_rtskb(dev, pktlen + 2); ++ if (skb) { ++ rtskb_reserve(skb, 2); ++ memcpy(rtskb_put(skb, pktlen), p_recv, pktlen); ++ ++ skb->protocol = rt_eth_type_trans(skb, dev); ++ lp->stats.rx_packets++; ++ lp->stats.rx_bytes += pktlen; ++ ret = true; ++ skb->time_stamp = *time_stamp; ++ rtnetif_rx(skb); ++ } else { ++ lp->stats.rx_dropped++; ++ } ++ ++ if (lp->rx_ring[lp->rx_tail].ctrl & MACB_BIT(RX_MHASH_MATCH)) ++ lp->stats.multicast++; ++ ++ /* reset ownership bit */ ++ lp->rx_ring[lp->rx_tail].addr &= ~MACB_BIT(RX_USED); ++ ++ /* wrap after last buffer */ ++ if (lp->rx_tail == MAX_RX_DESCR - 1) ++ lp->rx_tail = 0; ++ else ++ lp->rx_tail++; ++ } ++ ++ return ret; ++} ++ ++/* MAC interrupt handler */ ++static int at91ether_interrupt(rtdm_irq_t *irq_handle) ++{ ++ void *dev_id = rtdm_irq_get_arg(irq_handle, void); ++ nanosecs_abs_t time_stamp = rtdm_clock_read(); ++ struct rtnet_device *dev = dev_id; ++ struct macb *lp = rtnetdev_priv(dev); ++ u32 intstatus, ctl; ++ ++ /* MAC Interrupt Status register indicates what interrupts are pending. ++ * It is automatically cleared once read. ++ */ ++ intstatus = macb_readl(lp, ISR); ++ ++ /* Receive complete */ ++ if ((intstatus & MACB_BIT(RCOMP)) && at91ether_rx(dev, &time_stamp)) ++ rt_mark_stack_mgr(dev); ++ ++ /* Transmit complete */ ++ if (intstatus & MACB_BIT(TCOMP)) { ++ /* The TCOM bit is set even if the transmission failed */ ++ if (intstatus & (MACB_BIT(ISR_TUND) | MACB_BIT(ISR_RLE))) ++ lp->stats.tx_errors++; ++ ++ if (lp->skb) { ++ dev_kfree_rtskb(lp->skb); ++ lp->skb = NULL; ++ dma_unmap_single(NULL, lp->skb_physaddr, lp->skb_length, DMA_TO_DEVICE); ++ lp->stats.tx_packets++; ++ lp->stats.tx_bytes += lp->skb_length; ++ } ++ rtnetif_wake_queue(dev); ++ } ++ ++ /* Work-around for EMAC Errata section 41.3.1 */ ++ if (intstatus & MACB_BIT(RXUBR)) { ++ ctl = macb_readl(lp, NCR); ++ macb_writel(lp, NCR, ctl & ~MACB_BIT(RE)); ++ macb_writel(lp, NCR, ctl | MACB_BIT(RE)); ++ } ++ ++ if (intstatus & MACB_BIT(ISR_ROVR)) ++ rtdev_err(dev, "ROVR error\n"); ++ ++ return RTDM_IRQ_HANDLED; ++} ++ ++#if defined(CONFIG_OF) ++static const struct of_device_id at91ether_dt_ids[] = { ++ { .compatible = "cdns,at91rm9200-emac" }, ++ { .compatible = "cdns,emac" }, ++ { /* sentinel */ } ++}; ++MODULE_DEVICE_TABLE(of, at91ether_dt_ids); ++#endif ++ ++/* Detect MAC & PHY and perform ethernet interface initialization */ ++static int __init at91ether_probe(struct platform_device *pdev) ++{ ++ struct macb_platform_data *board_data = dev_get_platdata(&pdev->dev); ++ struct resource *regs; ++ struct rtnet_device *dev; ++ struct phy_device *phydev; ++ struct macb *lp; ++ int res; ++ u32 reg; ++ const char *mac; ++ ++ regs = platform_get_resource(pdev, IORESOURCE_MEM, 0); ++ if (!regs) ++ return -ENOENT; ++ ++ dev = rt_alloc_etherdev(sizeof(struct macb), MAX_RX_DESCR * 2 + 2); ++ if (!dev) ++ return -ENOMEM; ++ ++ rtdev_alloc_name(dev, "rteth%d"); ++ rt_rtdev_connect(dev, &RTDEV_manager); ++ dev->vers = RTDEV_VERS_2_0; ++ dev->sysbind = &pdev->dev; ++ ++ lp = rtnetdev_priv(dev); ++ lp->pdev = pdev; ++ lp->dev = dev; ++ rtdm_lock_init(&lp->lock); ++ ++ /* physical base address */ ++ dev->base_addr = regs->start; ++ lp->regs = devm_ioremap(&pdev->dev, regs->start, resource_size(regs)); ++ if (!lp->regs) { ++ res = -ENOMEM; ++ goto err_free_dev; ++ } ++ ++ /* Clock */ ++ lp->pclk = devm_clk_get(&pdev->dev, "ether_clk"); ++ if (IS_ERR(lp->pclk)) { ++ res = PTR_ERR(lp->pclk); ++ goto err_free_dev; ++ } ++ clk_enable(lp->pclk); ++ ++ lp->hclk = ERR_PTR(-ENOENT); ++ lp->tx_clk = ERR_PTR(-ENOENT); ++ ++ /* Install the interrupt handler */ ++ dev->irq = platform_get_irq(pdev, 0); ++ res = rtdm_irq_request(&lp->irq_handle, dev->irq, at91ether_interrupt, 0, dev->name, dev); ++ if (res) ++ goto err_disable_clock; ++ ++ dev->open = at91ether_open; ++ dev->stop = at91ether_close; ++ dev->hard_start_xmit = at91ether_start_xmit; ++ dev->do_ioctl = rtmacb_ioctl; ++ dev->get_stats = rtmacb_get_stats; ++ ++ platform_set_drvdata(pdev, dev); ++ ++ mac = of_get_mac_address(pdev->dev.of_node); ++ if (mac) ++ memcpy(lp->dev->dev_addr, mac, ETH_ALEN); ++ else ++ rtmacb_get_hwaddr(lp); ++ ++ res = of_get_phy_mode(pdev->dev.of_node); ++ if (res < 0) { ++ if (board_data && board_data->is_rmii) ++ lp->phy_interface = PHY_INTERFACE_MODE_RMII; ++ else ++ lp->phy_interface = PHY_INTERFACE_MODE_MII; ++ } else { ++ lp->phy_interface = res; ++ } ++ ++ macb_writel(lp, NCR, 0); ++ ++ reg = MACB_BF(CLK, MACB_CLK_DIV32) | MACB_BIT(BIG); ++ if (lp->phy_interface == PHY_INTERFACE_MODE_RMII) ++ reg |= MACB_BIT(RM9200_RMII); ++ ++ macb_writel(lp, NCFGR, reg); ++ ++ /* Register the network interface */ ++ res = rt_register_rtnetdev(dev); ++ if (res) ++ goto err_irq_free; ++ ++ res = rtmacb_mii_init(lp); ++ if (res) ++ goto err_out_unregister_netdev; ++ ++ /* will be enabled in open() */ ++ rtnetif_carrier_off(dev); ++ ++ phydev = lp->phy_dev; ++ rtdev_info(dev, "attached PHY driver [%s] (mii_bus:phy_addr=%s, irq=%d)\n", ++ phydev->drv->name, dev_name(&phydev->dev), ++ phydev->irq); ++ ++ /* Display ethernet banner */ ++ rtdev_info(dev, "AT91 ethernet at 0x%08lx int=%d (%pM)\n", ++ dev->base_addr, dev->irq, dev->dev_addr); ++ ++ return 0; ++ ++err_out_unregister_netdev: ++ rt_unregister_rtnetdev(dev); ++err_irq_free: ++ rtdm_irq_free(&lp->irq_handle); ++err_disable_clock: ++ clk_disable(lp->pclk); ++err_free_dev: ++ rtdev_free(dev); ++ return res; ++} ++ ++static int at91ether_remove(struct platform_device *pdev) ++{ ++ struct rtnet_device *dev = platform_get_drvdata(pdev); ++ struct macb *lp = rtnetdev_priv(dev); ++ ++ if (lp->phy_dev) ++ phy_disconnect(lp->phy_dev); ++ ++ mdiobus_unregister(lp->mii_bus); ++ if (lp->phy_phony_net_device) ++ free_netdev(lp->phy_phony_net_device); ++ kfree(lp->mii_bus->irq); ++ rt_rtdev_disconnect(dev); ++ rtdm_irq_free(&lp->irq_handle); ++ mdiobus_free(lp->mii_bus); ++ rt_unregister_rtnetdev(dev); ++ clk_disable(lp->pclk); ++ rtdev_free(dev); ++ ++ return 0; ++} ++ ++static struct platform_driver at91ether_driver = { ++ .remove = at91ether_remove, ++ .driver = { ++ .name = "at91_ether", ++ .owner = THIS_MODULE, ++ .of_match_table = of_match_ptr(at91ether_dt_ids), ++ }, ++}; ++ ++module_platform_driver_probe(at91ether_driver, at91ether_probe); ++ ++MODULE_LICENSE("GPL"); ++MODULE_DESCRIPTION("AT91RM9200 EMAC Ethernet driver"); ++MODULE_AUTHOR("Andrew Victor"); ++MODULE_ALIAS("platform:at91_ether"); +--- linux/drivers/xenomai/net/drivers/mpc8260_fcc_enet.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/mpc8260_fcc_enet.c 2021-04-07 16:01:27.244634142 +0800 +@@ -0,0 +1,2235 @@ ++/* ++ * Fast Ethernet Controller (FCC) driver for Motorola MPC8260. ++ * Copyright (c) 2000 MontaVista Software, Inc. Dan Malek (dmalek@jlc.net) ++ * ++ * This version of the driver is a combination of the 8xx fec and ++ * 8260 SCC Ethernet drivers. This version has some additional ++ * configuration options, which should probably be moved out of ++ * here. This driver currently works for the EST SBC8260, ++ * SBS Diablo/BCM, Embedded Planet RPX6, TQM8260, and others. ++ * ++ * Right now, I am very watseful with the buffers. I allocate memory ++ * pages and then divide them into 2K frame buffers. This way I know I ++ * have buffers large enough to hold one frame within one buffer descriptor. ++ * Once I get this working, I will use 64 or 128 byte CPM buffers, which ++ * will be much more memory efficient and will easily handle lots of ++ * small packets. Since this is a cache coherent processor and CPM, ++ * I could also preallocate SKB's and use them directly on the interface. ++ * ++ * Ported to RTnet from "linuxppc_2_4_devel/arch/ppc/8260_io/fcc_enet.c". ++ * Copyright (c) 2003 Wolfgang Grandegger (wg@denx.de) ++ */ ++ ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO ++#error "MDIO for PHY configuration is not yet supported!" ++#endif ++ ++#include ++ ++MODULE_AUTHOR("Maintainer: Wolfgang Grandegger "); ++MODULE_DESCRIPTION("RTnet driver for the MPC8260 FCC Ethernet"); ++MODULE_LICENSE("GPL"); ++ ++static unsigned int rx_pool_size = 0; ++MODULE_PARM(rx_pool_size, "i"); ++MODULE_PARM_DESC(rx_pool_size, "Receive buffer pool size"); ++ ++static unsigned int rtnet_fcc = 1; ++MODULE_PARM(rtnet_fcc, "i"); ++MODULE_PARM_DESC(rtnet_fcc, "FCCx port for RTnet (default=1)"); ++ ++#define RT_DEBUG(fmt,args...) ++ ++/* The transmitter timeout ++ */ ++#define TX_TIMEOUT (2*HZ) ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO ++/* Forward declarations of some structures to support different PHYs */ ++ ++typedef struct { ++ uint mii_data; ++ void (*funct)(uint mii_reg, struct net_device *dev); ++} phy_cmd_t; ++ ++typedef struct { ++ uint id; ++ char *name; ++ ++ const phy_cmd_t *config; ++ const phy_cmd_t *startup; ++ const phy_cmd_t *ack_int; ++ const phy_cmd_t *shutdown; ++} phy_info_t; ++ ++/* Register definitions for the PHY. */ ++ ++#define MII_REG_CR 0 /* Control Register */ ++#define MII_REG_SR 1 /* Status Register */ ++#define MII_REG_PHYIR1 2 /* PHY Identification Register 1 */ ++#define MII_REG_PHYIR2 3 /* PHY Identification Register 2 */ ++#define MII_REG_ANAR 4 /* A-N Advertisement Register */ ++#define MII_REG_ANLPAR 5 /* A-N Link Partner Ability Register */ ++#define MII_REG_ANER 6 /* A-N Expansion Register */ ++#define MII_REG_ANNPTR 7 /* A-N Next Page Transmit Register */ ++#define MII_REG_ANLPRNPR 8 /* A-N Link Partner Received Next Page Reg. */ ++ ++/* values for phy_status */ ++ ++#define PHY_CONF_ANE 0x0001 /* 1 auto-negotiation enabled */ ++#define PHY_CONF_LOOP 0x0002 /* 1 loopback mode enabled */ ++#define PHY_CONF_SPMASK 0x00f0 /* mask for speed */ ++#define PHY_CONF_10HDX 0x0010 /* 10 Mbit half duplex supported */ ++#define PHY_CONF_10FDX 0x0020 /* 10 Mbit full duplex supported */ ++#define PHY_CONF_100HDX 0x0040 /* 100 Mbit half duplex supported */ ++#define PHY_CONF_100FDX 0x0080 /* 100 Mbit full duplex supported */ ++ ++#define PHY_STAT_LINK 0x0100 /* 1 up - 0 down */ ++#define PHY_STAT_FAULT 0x0200 /* 1 remote fault */ ++#define PHY_STAT_ANC 0x0400 /* 1 auto-negotiation complete */ ++#define PHY_STAT_SPMASK 0xf000 /* mask for speed */ ++#define PHY_STAT_10HDX 0x1000 /* 10 Mbit half duplex selected */ ++#define PHY_STAT_10FDX 0x2000 /* 10 Mbit full duplex selected */ ++#define PHY_STAT_100HDX 0x4000 /* 100 Mbit half duplex selected */ ++#define PHY_STAT_100FDX 0x8000 /* 100 Mbit full duplex selected */ ++#endif /* CONFIG_XENO_DRIVERS_NET_USE_MDIO */ ++ ++/* The number of Tx and Rx buffers. These are allocated from the page ++ * pool. The code may assume these are power of two, so it is best ++ * to keep them that size. ++ * We don't need to allocate pages for the transmitter. We just use ++ * the skbuffer directly. ++ */ ++#define FCC_ENET_RX_PAGES 16 ++#define FCC_ENET_RX_FRSIZE 2048 ++#define FCC_ENET_RX_FRPPG (PAGE_SIZE / FCC_ENET_RX_FRSIZE) ++#define RX_RING_SIZE (FCC_ENET_RX_FRPPG * FCC_ENET_RX_PAGES) ++#define TX_RING_SIZE 16 /* Must be power of two */ ++#define TX_RING_MOD_MASK 15 /* for this to work */ ++ ++/* The FCC stores dest/src/type, data, and checksum for receive packets. ++ */ ++#define PKT_MAXBUF_SIZE 1518 ++#define PKT_MINBUF_SIZE 64 ++ ++/* Maximum input DMA size. Must be a should(?) be a multiple of 4. ++*/ ++#define PKT_MAXDMA_SIZE 1520 ++ ++/* Maximum input buffer size. Must be a multiple of 32. ++*/ ++#define PKT_MAXBLR_SIZE 1536 ++ ++static int fcc_enet_open(struct rtnet_device *rtev); ++static int fcc_enet_start_xmit(struct rtskb *skb, struct rtnet_device *rtdev); ++static int fcc_enet_rx(struct rtnet_device *rtdev, int *packets, nanosecs_abs_t *time_stamp); ++static int fcc_enet_interrupt(rtdm_irq_t *irq_handle); ++static int fcc_enet_close(struct rtnet_device *dev); ++ ++static struct net_device_stats *fcc_enet_get_stats(struct rtnet_device *rtdev); ++#ifdef ORIGINAL_VERSION ++static void set_multicast_list(struct net_device *dev); ++static int fcc_enet_set_mac_address(struct net_device *dev, void *addr); ++#endif /* ORIGINAL_VERSION */ ++ ++static void fcc_restart(struct rtnet_device *rtdev, int duplex); ++ ++/* These will be configurable for the FCC choice. ++ * Multiple ports can be configured. There is little choice among the ++ * I/O pins to the PHY, except the clocks. We will need some board ++ * dependent clock selection. ++ * Why in the hell did I put these inside #ifdef's? I dunno, maybe to ++ * help show what pins are used for each device. ++ */ ++ ++/* I/O Pin assignment for FCC1. I don't yet know the best way to do this, ++ * but there is little variation among the choices. ++ */ ++#define PA1_COL ((uint)0x00000001) ++#define PA1_CRS ((uint)0x00000002) ++#define PA1_TXER ((uint)0x00000004) ++#define PA1_TXEN ((uint)0x00000008) ++#define PA1_RXDV ((uint)0x00000010) ++#define PA1_RXER ((uint)0x00000020) ++#define PA1_TXDAT ((uint)0x00003c00) ++#define PA1_RXDAT ((uint)0x0003c000) ++#define PA1_PSORA0 (PA1_RXDAT | PA1_TXDAT) ++#define PA1_PSORA1 (PA1_COL | PA1_CRS | PA1_TXER | PA1_TXEN | \ ++ PA1_RXDV | PA1_RXER) ++#define PA1_DIRA0 (PA1_RXDAT | PA1_CRS | PA1_COL | PA1_RXER | PA1_RXDV) ++#define PA1_DIRA1 (PA1_TXDAT | PA1_TXEN | PA1_TXER) ++ ++/* CLK12 is receive, CLK11 is transmit. These are board specific. ++*/ ++#define PC_F1RXCLK ((uint)0x00000800) ++#define PC_F1TXCLK ((uint)0x00000400) ++#if defined(CONFIG_PM826) ++#ifndef CONFIG_RTAI_RTNET_DB_CR826_J30x_ON ++#define CMX1_CLK_ROUTE ((uint)0x35000000) ++#define CMX1_CLK_MASK ((uint)0x7f000000) ++#else ++#define CMX1_CLK_ROUTE ((uint)0x37000000) ++#define CMX1_CLK_MASK ((uint)0x7f000000) ++#endif ++#elif defined(CONFIG_CPU86) ++#define CMX1_CLK_ROUTE ((uint)0x37000000) ++#define CMX1_CLK_MASK ((uint)0x7f000000) ++#else ++#define CMX1_CLK_ROUTE ((uint)0x3e000000) ++#define CMX1_CLK_MASK ((uint)0xff000000) ++#endif /* CONFIG_PM826 */ ++ ++/* I/O Pin assignment for FCC2. I don't yet know the best way to do this, ++ * but there is little variation among the choices. ++ */ ++#define PB2_TXER ((uint)0x00000001) ++#define PB2_RXDV ((uint)0x00000002) ++#define PB2_TXEN ((uint)0x00000004) ++#define PB2_RXER ((uint)0x00000008) ++#define PB2_COL ((uint)0x00000010) ++#define PB2_CRS ((uint)0x00000020) ++#define PB2_TXDAT ((uint)0x000003c0) ++#define PB2_RXDAT ((uint)0x00003c00) ++#define PB2_PSORB0 (PB2_RXDAT | PB2_TXDAT | PB2_CRS | PB2_COL | \ ++ PB2_RXER | PB2_RXDV | PB2_TXER) ++#define PB2_PSORB1 (PB2_TXEN) ++#define PB2_DIRB0 (PB2_RXDAT | PB2_CRS | PB2_COL | PB2_RXER | PB2_RXDV) ++#define PB2_DIRB1 (PB2_TXDAT | PB2_TXEN | PB2_TXER) ++ ++/* CLK13 is receive, CLK14 is transmit. These are board dependent. ++*/ ++#define PC_F2RXCLK ((uint)0x00001000) ++#define PC_F2TXCLK ((uint)0x00002000) ++#define CMX2_CLK_ROUTE ((uint)0x00250000) ++#define CMX2_CLK_MASK ((uint)0x00ff0000) ++ ++/* I/O Pin assignment for FCC3. I don't yet know the best way to do this, ++ * but there is little variation among the choices. ++ */ ++#define PB3_RXDV ((uint)0x00004000) ++#define PB3_RXER ((uint)0x00008000) ++#define PB3_TXER ((uint)0x00010000) ++#define PB3_TXEN ((uint)0x00020000) ++#define PB3_COL ((uint)0x00040000) ++#define PB3_CRS ((uint)0x00080000) ++#define PB3_TXDAT ((uint)0x0f000000) ++#define PB3_RXDAT ((uint)0x00f00000) ++#define PB3_PSORB0 (PB3_RXDAT | PB3_TXDAT | PB3_CRS | PB3_COL | \ ++ PB3_RXER | PB3_RXDV | PB3_TXER | PB3_TXEN) ++#define PB3_PSORB1 (0) ++#define PB3_DIRB0 (PB3_RXDAT | PB3_CRS | PB3_COL | PB3_RXER | PB3_RXDV) ++#define PB3_DIRB1 (PB3_TXDAT | PB3_TXEN | PB3_TXER) ++ ++/* CLK15 is receive, CLK16 is transmit. These are board dependent. ++*/ ++#ifdef CONFIG_IPHASE4539 ++#define PC_F3RXCLK ((uint)0x00002000) /* CLK 14 is receive */ ++#define PC_F3TXCLK ((uint)0x00008000) /* CLK 16 is transmit */ ++#define CMX3_CLK_ROUTE ((uint)0x00002f00) ++#define CMX3_CLK_MASK ((uint)0x00007f00) ++#else ++#define PC_F3RXCLK ((uint)0x00004000) ++#define PC_F3TXCLK ((uint)0x00008000) ++#define CMX3_CLK_ROUTE ((uint)0x00003700) ++#define CMX3_CLK_MASK ((uint)0x0000ff00) ++#endif ++ ++/* MII status/control serial interface. ++*/ ++#define IOP_PORT_OFF(f) ((uint)(&((iop8260_t *)0)->iop_p##f)) ++#define IOP_PORT(x) IOP_PORT_OFF(dir##x) ++ ++#define IOP_DIR(b,p) *((uint*)((void*)(b)+(p)+(IOP_PORT_OFF(dira)-IOP_PORT_OFF(dira)))) ++#define IOP_PAR(b,p) *((uint*)((void*)(b)+(p)+(IOP_PORT_OFF(para)-IOP_PORT_OFF(dira)))) ++#define IOP_SOR(b,p) *((uint*)((void*)(b)+(p)+(IOP_PORT_OFF(sora)-IOP_PORT_OFF(dira)))) ++#define IOP_ODR(b,p) *((uint*)((void*)(b)+(p)+(IOP_PORT_OFF(odra)-IOP_PORT_OFF(dira)))) ++#define IOP_DAT(b,p) *((uint*)((void*)(b)+(p)+(IOP_PORT_OFF(data)-IOP_PORT_OFF(dira)))) ++ ++#if defined(CONFIG_TQM8260) ++/* TQM8260 has MDIO and MDCK on PC30 and PC31 respectively */ ++#define MII_MDIO ((uint)0x00000002) ++#define MII_MDCK ((uint)0x00000001) ++#elif defined (CONFIG_PM826) ++#ifndef CONFIG_RTAI_RTNET_DB_CR826_J30x_ON ++#define MII_MDIO ((uint)0x00000080) /* MDIO on PC24 */ ++#define MII_MDCK ((uint)0x00000100) /* MDCK on PC23 */ ++#else ++#define MII_MDIO ((uint)0x00000100) /* MDIO on PA23 */ ++#define MII_MDCK ((uint)0x00000200) /* MDCK on PA22 */ ++#define MII_PORT IOP_PORT(a) ++#endif /* CONFIG_RTAI_RTNET_DB_CR826_J30x_ON */ ++#elif defined (CONFIG_IPHASE4539) ++#define MII_MDIO ((uint)0x00000080) /* MDIO on PC24 */ ++#define MII_MDCK ((uint)0x00000100) /* MDCK on PC23 */ ++#else ++#define MII_MDIO ((uint)0x00000004) ++#define MII_MDCK ((uint)0x00000100) ++#endif ++ ++# if defined(CONFIG_TQM8260) ++#define MII_MDIO2 MII_MDIO ++#define MII_MDCK2 MII_MDCK ++#elif defined(CONFIG_EST8260) || defined(CONFIG_ADS8260) ++#define MII_MDIO2 ((uint)0x00400000) ++#define MII_MDCK2 ((uint)0x00200000) ++#elif defined(CONFIG_PM826) ++#define MII_MDIO2 ((uint)0x00000040) /* MDIO on PA25 */ ++#define MII_MDCK2 ((uint)0x00000080) /* MDCK on PA24 */ ++#define MII_PORT2 IOP_PORT(a) ++#else ++#define MII_MDIO2 ((uint)0x00000002) ++#define MII_MDCK2 ((uint)0x00000080) ++#endif ++ ++# if defined(CONFIG_TQM8260) ++#define MII_MDIO3 MII_MDIO ++#define MII_MDCK3 MII_MDCK ++#else ++#define MII_MDIO3 ((uint)0x00000001) ++#define MII_MDCK3 ((uint)0x00000040) ++#endif ++ ++#ifndef MII_PORT ++#define MII_PORT IOP_PORT(c) ++#endif ++ ++#ifndef MII_PORT2 ++#define MII_PORT2 IOP_PORT(c) ++#endif ++ ++#ifndef MII_PORT3 ++#define MII_PORT3 IOP_PORT(c) ++#endif ++ ++/* A table of information for supporting FCCs. This does two things. ++ * First, we know how many FCCs we have and they are always externally ++ * numbered from zero. Second, it holds control register and I/O ++ * information that could be different among board designs. ++ */ ++typedef struct fcc_info { ++ uint fc_fccnum; ++ uint fc_cpmblock; ++ uint fc_cpmpage; ++ uint fc_proff; ++ uint fc_interrupt; ++ uint fc_trxclocks; ++ uint fc_clockroute; ++ uint fc_clockmask; ++ uint fc_mdio; ++ uint fc_mdck; ++ uint fc_port; ++ struct rtnet_device *rtdev; ++} fcc_info_t; ++ ++static fcc_info_t fcc_ports[] = { ++ { 0, CPM_CR_FCC1_SBLOCK, CPM_CR_FCC1_PAGE, PROFF_FCC1, SIU_INT_FCC1, ++ (PC_F1RXCLK | PC_F1TXCLK), CMX1_CLK_ROUTE, CMX1_CLK_MASK, ++ MII_MDIO, MII_MDCK, MII_PORT }, ++ { 1, CPM_CR_FCC2_SBLOCK, CPM_CR_FCC2_PAGE, PROFF_FCC2, SIU_INT_FCC2, ++ (PC_F2RXCLK | PC_F2TXCLK), CMX2_CLK_ROUTE, CMX2_CLK_MASK, ++ MII_MDIO2, MII_MDCK2, MII_PORT2 }, ++ { 2, CPM_CR_FCC3_SBLOCK, CPM_CR_FCC3_PAGE, PROFF_FCC3, SIU_INT_FCC3, ++ (PC_F3RXCLK | PC_F3TXCLK), CMX3_CLK_ROUTE, CMX3_CLK_MASK, ++ MII_MDIO3, MII_MDCK3, MII_PORT3 }, ++}; ++ ++/* The FCC buffer descriptors track the ring buffers. The rx_bd_base and ++ * tx_bd_base always point to the base of the buffer descriptors. The ++ * cur_rx and cur_tx point to the currently available buffer. ++ * The dirty_tx tracks the current buffer that is being sent by the ++ * controller. The cur_tx and dirty_tx are equal under both completely ++ * empty and completely full conditions. The empty/ready indicator in ++ * the buffer descriptor determines the actual condition. ++ */ ++struct fcc_enet_private { ++ /* The addresses of a Tx/Rx-in-place packets/buffers. */ ++ struct rtskb *tx_skbuff[TX_RING_SIZE]; ++ ushort skb_cur; ++ ushort skb_dirty; ++ ++ /* CPM dual port RAM relative addresses. ++ */ ++ cbd_t *rx_bd_base; /* Address of Rx and Tx buffers. */ ++ cbd_t *tx_bd_base; ++ cbd_t *cur_rx, *cur_tx; /* The next free ring entry */ ++ cbd_t *dirty_tx; /* The ring entries to be free()ed. */ ++ volatile fcc_t *fccp; ++ volatile fcc_enet_t *ep; ++ struct net_device_stats stats; ++ uint tx_full; ++ rtdm_lock_t lock; ++ rtdm_irq_t irq_handle; ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO ++ uint phy_id; ++ uint phy_id_done; ++ uint phy_status; ++ phy_info_t *phy; ++ struct tq_struct phy_task; ++ ++ uint sequence_done; ++ ++ uint phy_addr; ++#endif /* CONFIG_XENO_DRIVERS_NET_USE_MDIO */ ++ ++ int link; ++ int old_link; ++ int full_duplex; ++ ++ fcc_info_t *fip; ++}; ++ ++static void init_fcc_shutdown(fcc_info_t *fip, struct fcc_enet_private *cep, ++ volatile immap_t *immap); ++static void init_fcc_startup(fcc_info_t *fip, struct rtnet_device *rtdev); ++static void init_fcc_ioports(fcc_info_t *fip, volatile iop8260_t *io, ++ volatile immap_t *immap); ++static void init_fcc_param(fcc_info_t *fip, struct rtnet_device *rtdev, ++ volatile immap_t *immap); ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO ++static int mii_queue(struct net_device *dev, int request, void (*func)(uint, struct net_device *)); ++static uint mii_send_receive(fcc_info_t *fip, uint cmd); ++ ++static void fcc_stop(struct net_device *dev); ++ ++/* Make MII read/write commands for the FCC. ++*/ ++#define mk_mii_read(REG) (0x60020000 | ((REG & 0x1f) << 18)) ++#define mk_mii_write(REG, VAL) (0x50020000 | ((REG & 0x1f) << 18) | \ ++ (VAL & 0xffff)) ++#define mk_mii_end 0 ++#endif /* CONFIG_XENO_DRIVERS_NET_USE_MDIO */ ++ ++ ++static int ++fcc_enet_start_xmit(struct rtskb *skb, struct rtnet_device *rtdev) ++{ ++ struct fcc_enet_private *cep = (struct fcc_enet_private *)rtdev->priv; ++ volatile cbd_t *bdp; ++ rtdm_lockctx_t context; ++ ++ RT_DEBUG(__FUNCTION__": ...\n"); ++ ++ if (!cep->link) { ++ /* Link is down or autonegotiation is in progress. */ ++ return 1; ++ } ++ ++ /* Fill in a Tx ring entry */ ++ bdp = cep->cur_tx; ++ ++#ifndef final_version ++ if (bdp->cbd_sc & BD_ENET_TX_READY) { ++ /* Ooops. All transmit buffers are full. Bail out. ++ * This should not happen, since cep->tx_full should be set. ++ */ ++ rtdm_printk("%s: tx queue full!.\n", rtdev->name); ++ return 1; ++ } ++#endif ++ ++ /* Clear all of the status flags. */ ++ bdp->cbd_sc &= ~BD_ENET_TX_STATS; ++ ++ /* If the frame is short, tell CPM to pad it. */ ++ if (skb->len <= ETH_ZLEN) ++ bdp->cbd_sc |= BD_ENET_TX_PAD; ++ else ++ bdp->cbd_sc &= ~BD_ENET_TX_PAD; ++ ++ /* Set buffer length and buffer pointer. */ ++ bdp->cbd_datlen = skb->len; ++ bdp->cbd_bufaddr = __pa(skb->data); ++ ++ /* Save skb pointer. */ ++ cep->tx_skbuff[cep->skb_cur] = skb; ++ ++ cep->stats.tx_bytes += skb->len; ++ cep->skb_cur = (cep->skb_cur+1) & TX_RING_MOD_MASK; ++ ++ rtdm_lock_get_irqsave(&cep->lock, context); ++ ++ /* Get and patch time stamp just before the transmission */ ++ if (skb->xmit_stamp) ++ *skb->xmit_stamp = cpu_to_be64(rtdm_clock_read() + *skb->xmit_stamp); ++ ++ /* Send it on its way. Tell CPM its ready, interrupt when done, ++ * its the last BD of the frame, and to put the CRC on the end. ++ */ ++ bdp->cbd_sc |= (BD_ENET_TX_READY | BD_ENET_TX_INTR | BD_ENET_TX_LAST | BD_ENET_TX_TC); ++ ++#ifdef ORIGINAL_VERSION ++ dev->trans_start = jiffies; ++#endif ++ ++ /* If this was the last BD in the ring, start at the beginning again. */ ++ if (bdp->cbd_sc & BD_ENET_TX_WRAP) ++ bdp = cep->tx_bd_base; ++ else ++ bdp++; ++ ++ if (bdp->cbd_sc & BD_ENET_TX_READY) { ++ rtnetif_stop_queue(rtdev); ++ cep->tx_full = 1; ++ } ++ ++ cep->cur_tx = (cbd_t *)bdp; ++ ++ rtdm_lock_put_irqrestore(&cep->lock, context); ++ ++ return 0; ++} ++ ++ ++#ifdef ORIGINAL_VERSION ++static void ++fcc_enet_timeout(struct net_device *dev) ++{ ++ struct fcc_enet_private *cep = (struct fcc_enet_private *)dev->priv; ++ ++ printk("%s: transmit timed out.\n", dev->name); ++ cep->stats.tx_errors++; ++#ifndef final_version ++ { ++ int i; ++ cbd_t *bdp; ++ printk(" Ring data dump: cur_tx %p%s cur_rx %p.\n", ++ cep->cur_tx, cep->tx_full ? " (full)" : "", ++ cep->cur_rx); ++ bdp = cep->tx_bd_base; ++ printk(" Tx @base %p :\n", bdp); ++ for (i = 0 ; i < TX_RING_SIZE; i++, bdp++) ++ printk("%04x %04x %08x\n", ++ bdp->cbd_sc, ++ bdp->cbd_datlen, ++ bdp->cbd_bufaddr); ++ bdp = cep->rx_bd_base; ++ printk(" Rx @base %p :\n", bdp); ++ for (i = 0 ; i < RX_RING_SIZE; i++, bdp++) ++ printk("%04x %04x %08x\n", ++ bdp->cbd_sc, ++ bdp->cbd_datlen, ++ bdp->cbd_bufaddr); ++ } ++#endif ++ if (!cep->tx_full) ++ netif_wake_queue(dev); ++} ++#endif /* ORIGINAL_VERSION */ ++ ++/* The interrupt handler. */ ++static int fcc_enet_interrupt(rtdm_irq_t *irq_handle) ++{ ++ struct rtnet_device *rtdev = rtdm_irq_get_arg(irq_handle, struct rtnet_device); ++ int packets = 0; ++ struct fcc_enet_private *cep; ++ volatile cbd_t *bdp; ++ ushort int_events; ++ int must_restart; ++ nanosecs_abs_t time_stamp = rtdm_clock_read(); ++ ++ ++ cep = (struct fcc_enet_private *)rtdev->priv; ++ ++ /* Get the interrupt events that caused us to be here. ++ */ ++ int_events = cep->fccp->fcc_fcce; ++ cep->fccp->fcc_fcce = int_events; ++ must_restart = 0; ++ ++ /* Handle receive event in its own function. ++ */ ++ if (int_events & FCC_ENET_RXF) { ++ fcc_enet_rx(rtdev, &packets, &time_stamp); ++ } ++ ++ /* Check for a transmit error. The manual is a little unclear ++ * about this, so the debug code until I get it figured out. It ++ * appears that if TXE is set, then TXB is not set. However, ++ * if carrier sense is lost during frame transmission, the TXE ++ * bit is set, "and continues the buffer transmission normally." ++ * I don't know if "normally" implies TXB is set when the buffer ++ * descriptor is closed.....trial and error :-). ++ */ ++ ++ /* Transmit OK, or non-fatal error. Update the buffer descriptors. ++ */ ++ if (int_events & (FCC_ENET_TXE | FCC_ENET_TXB)) { ++ rtdm_lock_get(&cep->lock); ++ bdp = cep->dirty_tx; ++ while ((bdp->cbd_sc&BD_ENET_TX_READY)==0) { ++ if ((bdp==cep->cur_tx) && (cep->tx_full == 0)) ++ break; ++ ++ if (bdp->cbd_sc & BD_ENET_TX_HB) /* No heartbeat */ ++ cep->stats.tx_heartbeat_errors++; ++ if (bdp->cbd_sc & BD_ENET_TX_LC) /* Late collision */ ++ cep->stats.tx_window_errors++; ++ if (bdp->cbd_sc & BD_ENET_TX_RL) /* Retrans limit */ ++ cep->stats.tx_aborted_errors++; ++ if (bdp->cbd_sc & BD_ENET_TX_UN) /* Underrun */ ++ cep->stats.tx_fifo_errors++; ++ if (bdp->cbd_sc & BD_ENET_TX_CSL) /* Carrier lost */ ++ cep->stats.tx_carrier_errors++; ++ ++ ++ /* No heartbeat or Lost carrier are not really bad errors. ++ * The others require a restart transmit command. ++ */ ++ if (bdp->cbd_sc & ++ (BD_ENET_TX_LC | BD_ENET_TX_RL | BD_ENET_TX_UN)) { ++ must_restart = 1; ++ cep->stats.tx_errors++; ++ } ++ ++ cep->stats.tx_packets++; ++ ++ /* Deferred means some collisions occurred during transmit, ++ * but we eventually sent the packet OK. ++ */ ++ if (bdp->cbd_sc & BD_ENET_TX_DEF) ++ cep->stats.collisions++; ++ ++ /* Free the sk buffer associated with this last transmit. */ ++ dev_kfree_rtskb(cep->tx_skbuff[cep->skb_dirty]); ++ cep->skb_dirty = (cep->skb_dirty + 1) & TX_RING_MOD_MASK; ++ ++ /* Update pointer to next buffer descriptor to be transmitted. */ ++ if (bdp->cbd_sc & BD_ENET_TX_WRAP) ++ bdp = cep->tx_bd_base; ++ else ++ bdp++; ++ ++ /* I don't know if we can be held off from processing these ++ * interrupts for more than one frame time. I really hope ++ * not. In such a case, we would now want to check the ++ * currently available BD (cur_tx) and determine if any ++ * buffers between the dirty_tx and cur_tx have also been ++ * sent. We would want to process anything in between that ++ * does not have BD_ENET_TX_READY set. ++ */ ++ ++ /* Since we have freed up a buffer, the ring is no longer ++ * full. ++ */ ++ if (cep->tx_full) { ++ cep->tx_full = 0; ++ if (rtnetif_queue_stopped(rtdev)) ++ rtnetif_wake_queue(rtdev); ++ } ++ ++ cep->dirty_tx = (cbd_t *)bdp; ++ } ++ ++ if (must_restart) { ++ volatile cpm8260_t *cp; ++ ++ /* Some transmit errors cause the transmitter to shut ++ * down. We now issue a restart transmit. Since the ++ * errors close the BD and update the pointers, the restart ++ * _should_ pick up without having to reset any of our ++ * pointers either. Also, To workaround 8260 device erratum ++ * CPM37, we must disable and then re-enable the transmitter ++ * following a Late Collision, Underrun, or Retry Limit error. ++ */ ++ cep->fccp->fcc_gfmr &= ~FCC_GFMR_ENT; ++#ifdef ORIGINAL_VERSION ++ udelay(10); /* wait a few microseconds just on principle */ ++#endif ++ cep->fccp->fcc_gfmr |= FCC_GFMR_ENT; ++ ++ cp = cpmp; ++ cp->cp_cpcr = ++ mk_cr_cmd(cep->fip->fc_cpmpage, cep->fip->fc_cpmblock, ++ 0x0c, CPM_CR_RESTART_TX) | CPM_CR_FLG; ++ while (cp->cp_cpcr & CPM_CR_FLG); // looks suspicious - how long may it take? ++ } ++ rtdm_lock_put(&cep->lock); ++ } ++ ++ /* Check for receive busy, i.e. packets coming but no place to ++ * put them. ++ */ ++ if (int_events & FCC_ENET_BSY) { ++ cep->stats.rx_dropped++; ++ } ++ ++ if (packets > 0) ++ rt_mark_stack_mgr(rtdev); ++ return RTDM_IRQ_HANDLED; ++} ++ ++/* During a receive, the cur_rx points to the current incoming buffer. ++ * When we update through the ring, if the next incoming buffer has ++ * not been given to the system, we just set the empty indicator, ++ * effectively tossing the packet. ++ */ ++static int ++fcc_enet_rx(struct rtnet_device *rtdev, int* packets, nanosecs_abs_t *time_stamp) ++{ ++ struct fcc_enet_private *cep; ++ volatile cbd_t *bdp; ++ struct rtskb *skb; ++ ushort pkt_len; ++ ++ RT_DEBUG(__FUNCTION__": ...\n"); ++ ++ cep = (struct fcc_enet_private *)rtdev->priv; ++ ++ /* First, grab all of the stats for the incoming packet. ++ * These get messed up if we get called due to a busy condition. ++ */ ++ bdp = cep->cur_rx; ++ ++for (;;) { ++ if (bdp->cbd_sc & BD_ENET_RX_EMPTY) ++ break; ++ ++#ifndef final_version ++ /* Since we have allocated space to hold a complete frame, both ++ * the first and last indicators should be set. ++ */ ++ if ((bdp->cbd_sc & (BD_ENET_RX_FIRST | BD_ENET_RX_LAST)) != ++ (BD_ENET_RX_FIRST | BD_ENET_RX_LAST)) ++ rtdm_printk("CPM ENET: rcv is not first+last\n"); ++#endif ++ ++ /* Frame too long or too short. */ ++ if (bdp->cbd_sc & (BD_ENET_RX_LG | BD_ENET_RX_SH)) ++ cep->stats.rx_length_errors++; ++ if (bdp->cbd_sc & BD_ENET_RX_NO) /* Frame alignment */ ++ cep->stats.rx_frame_errors++; ++ if (bdp->cbd_sc & BD_ENET_RX_CR) /* CRC Error */ ++ cep->stats.rx_crc_errors++; ++ if (bdp->cbd_sc & BD_ENET_RX_OV) /* FIFO overrun */ ++ cep->stats.rx_crc_errors++; ++ if (bdp->cbd_sc & BD_ENET_RX_CL) /* Late Collision */ ++ cep->stats.rx_frame_errors++; ++ ++ if (!(bdp->cbd_sc & ++ (BD_ENET_RX_LG | BD_ENET_RX_SH | BD_ENET_RX_NO | BD_ENET_RX_CR ++ | BD_ENET_RX_OV | BD_ENET_RX_CL))) ++ { ++ /* Process the incoming frame. */ ++ cep->stats.rx_packets++; ++ ++ /* Remove the FCS from the packet length. */ ++ pkt_len = bdp->cbd_datlen - 4; ++ cep->stats.rx_bytes += pkt_len; ++ ++ /* This does 16 byte alignment, much more than we need. */ ++ skb = rtnetdev_alloc_rtskb(rtdev, pkt_len); ++ ++ if (skb == NULL) { ++ rtdm_printk("%s: Memory squeeze, dropping packet.\n", rtdev->name); ++ cep->stats.rx_dropped++; ++ } ++ else { ++ rtskb_put(skb,pkt_len); /* Make room */ ++ memcpy(skb->data, ++ (unsigned char *)__va(bdp->cbd_bufaddr), ++ pkt_len); ++ skb->protocol=rt_eth_type_trans(skb,rtdev); ++ skb->time_stamp = *time_stamp; ++ rtnetif_rx(skb); ++ (*packets)++; ++ } ++ } ++ ++ /* Clear the status flags for this buffer. */ ++ bdp->cbd_sc &= ~BD_ENET_RX_STATS; ++ ++ /* Mark the buffer empty. */ ++ bdp->cbd_sc |= BD_ENET_RX_EMPTY; ++ ++ /* Update BD pointer to next entry. */ ++ if (bdp->cbd_sc & BD_ENET_RX_WRAP) ++ bdp = cep->rx_bd_base; ++ else ++ bdp++; ++ ++ } ++ cep->cur_rx = (cbd_t *)bdp; ++ ++ return 0; ++} ++ ++static int ++fcc_enet_close(struct rtnet_device *rtdev) ++{ ++ /* Don't know what to do yet. */ ++ rtnetif_stop_queue(rtdev); ++ ++ return 0; ++} ++ ++static struct net_device_stats *fcc_enet_get_stats(struct rtnet_device *rtdev) ++{ ++ struct fcc_enet_private *cep = (struct fcc_enet_private *)rtdev->priv; ++ ++ return &cep->stats; ++} ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO ++ ++/* NOTE: Most of the following comes from the FEC driver for 860. The ++ * overall structure of MII code has been retained (as it's proved stable ++ * and well-tested), but actual transfer requests are processed "at once" ++ * instead of being queued (there's no interrupt-driven MII transfer ++ * mechanism, one has to toggle the data/clock bits manually). ++ */ ++static int ++mii_queue(struct net_device *dev, int regval, void (*func)(uint, struct net_device *)) ++{ ++ struct fcc_enet_private *fep; ++ int retval, tmp; ++ ++ /* Add PHY address to register command. */ ++ fep = dev->priv; ++ regval |= fep->phy_addr << 23; ++ ++ retval = 0; ++ ++ tmp = mii_send_receive(fep->fip, regval); ++ if (func) ++ func(tmp, dev); ++ ++ return retval; ++} ++ ++static void mii_do_cmd(struct net_device *dev, const phy_cmd_t *c) ++{ ++ int k; ++ ++ if(!c) ++ return; ++ ++ for(k = 0; (c+k)->mii_data != mk_mii_end; k++) ++ mii_queue(dev, (c+k)->mii_data, (c+k)->funct); ++} ++ ++static void mii_parse_sr(uint mii_reg, struct net_device *dev) ++{ ++ volatile struct fcc_enet_private *fep = dev->priv; ++ uint s = fep->phy_status; ++ ++ s &= ~(PHY_STAT_LINK | PHY_STAT_FAULT | PHY_STAT_ANC); ++ ++ if (mii_reg & 0x0004) ++ s |= PHY_STAT_LINK; ++ if (mii_reg & 0x0010) ++ s |= PHY_STAT_FAULT; ++ if (mii_reg & 0x0020) ++ s |= PHY_STAT_ANC; ++ ++ fep->phy_status = s; ++ fep->link = (s & PHY_STAT_LINK) ? 1 : 0; ++} ++ ++static void mii_parse_cr(uint mii_reg, struct net_device *dev) ++{ ++ volatile struct fcc_enet_private *fep = dev->priv; ++ uint s = fep->phy_status; ++ ++ s &= ~(PHY_CONF_ANE | PHY_CONF_LOOP); ++ ++ if (mii_reg & 0x1000) ++ s |= PHY_CONF_ANE; ++ if (mii_reg & 0x4000) ++ s |= PHY_CONF_LOOP; ++ ++ fep->phy_status = s; ++} ++ ++static void mii_parse_anar(uint mii_reg, struct net_device *dev) ++{ ++ volatile struct fcc_enet_private *fep = dev->priv; ++ uint s = fep->phy_status; ++ ++ s &= ~(PHY_CONF_SPMASK); ++ ++ if (mii_reg & 0x0020) ++ s |= PHY_CONF_10HDX; ++ if (mii_reg & 0x0040) ++ s |= PHY_CONF_10FDX; ++ if (mii_reg & 0x0080) ++ s |= PHY_CONF_100HDX; ++ if (mii_reg & 0x00100) ++ s |= PHY_CONF_100FDX; ++ ++ fep->phy_status = s; ++} ++ ++/* Some boards don't have the MDIRQ line connected (PM826 is such a board) */ ++ ++static void mii_waitfor_anc(uint mii_reg, struct net_device *dev) ++{ ++ struct fcc_enet_private *fep; ++ int regval; ++ int i; ++ ++ fep = dev->priv; ++ regval = mk_mii_read(MII_REG_SR) | (fep->phy_addr << 23); ++ ++ for (i = 0; i < 1000; i++) ++ { ++ if (mii_send_receive(fep->fip, regval) & 0x20) ++ return; ++ udelay(10000); ++ } ++ ++ printk("%s: autonegotiation timeout\n", dev->name); ++} ++ ++/* ------------------------------------------------------------------------- */ ++/* The Level one LXT970 is used by many boards */ ++ ++#ifdef CONFIG_FCC_LXT970 ++ ++#define MII_LXT970_MIRROR 16 /* Mirror register */ ++#define MII_LXT970_IER 17 /* Interrupt Enable Register */ ++#define MII_LXT970_ISR 18 /* Interrupt Status Register */ ++#define MII_LXT970_CONFIG 19 /* Configuration Register */ ++#define MII_LXT970_CSR 20 /* Chip Status Register */ ++ ++static void mii_parse_lxt970_csr(uint mii_reg, struct net_device *dev) ++{ ++ volatile struct fcc_enet_private *fep = dev->priv; ++ uint s = fep->phy_status; ++ ++ s &= ~(PHY_STAT_SPMASK); ++ ++ if (mii_reg & 0x0800) { ++ if (mii_reg & 0x1000) ++ s |= PHY_STAT_100FDX; ++ else ++ s |= PHY_STAT_100HDX; ++ } else { ++ if (mii_reg & 0x1000) ++ s |= PHY_STAT_10FDX; ++ else ++ s |= PHY_STAT_10HDX; ++ } ++ ++ fep->phy_status = s; ++} ++ ++static phy_info_t phy_info_lxt970 = { ++ 0x07810000, ++ "LXT970", ++ ++ (const phy_cmd_t []) { /* config */ ++ { mk_mii_read(MII_REG_CR), mii_parse_cr }, ++ { mk_mii_read(MII_REG_ANAR), mii_parse_anar }, ++ { mk_mii_end, } ++ }, ++ (const phy_cmd_t []) { /* startup - enable interrupts */ ++ { mk_mii_write(MII_LXT970_IER, 0x0002), NULL }, ++ { mk_mii_write(MII_REG_CR, 0x1200), NULL }, /* autonegotiate */ ++ { mk_mii_end, } ++ }, ++ (const phy_cmd_t []) { /* ack_int */ ++ /* read SR and ISR to acknowledge */ ++ ++ { mk_mii_read(MII_REG_SR), mii_parse_sr }, ++ { mk_mii_read(MII_LXT970_ISR), NULL }, ++ ++ /* find out the current status */ ++ ++ { mk_mii_read(MII_LXT970_CSR), mii_parse_lxt970_csr }, ++ { mk_mii_end, } ++ }, ++ (const phy_cmd_t []) { /* shutdown - disable interrupts */ ++ { mk_mii_write(MII_LXT970_IER, 0x0000), NULL }, ++ { mk_mii_end, } ++ }, ++}; ++ ++#endif /* CONFIG_FEC_LXT970 */ ++ ++/* ------------------------------------------------------------------------- */ ++/* The Level one LXT971 is used on some of my custom boards */ ++ ++#ifdef CONFIG_FCC_LXT971 ++ ++/* register definitions for the 971 */ ++ ++#define MII_LXT971_PCR 16 /* Port Control Register */ ++#define MII_LXT971_SR2 17 /* Status Register 2 */ ++#define MII_LXT971_IER 18 /* Interrupt Enable Register */ ++#define MII_LXT971_ISR 19 /* Interrupt Status Register */ ++#define MII_LXT971_LCR 20 /* LED Control Register */ ++#define MII_LXT971_TCR 30 /* Transmit Control Register */ ++ ++/* ++ * I had some nice ideas of running the MDIO faster... ++ * The 971 should support 8MHz and I tried it, but things acted really ++ * weird, so 2.5 MHz ought to be enough for anyone... ++ */ ++ ++static void mii_parse_lxt971_sr2(uint mii_reg, struct net_device *dev) ++{ ++ volatile struct fcc_enet_private *fep = dev->priv; ++ uint s = fep->phy_status; ++ ++ s &= ~(PHY_STAT_SPMASK); ++ ++ if (mii_reg & 0x4000) { ++ if (mii_reg & 0x0200) ++ s |= PHY_STAT_100FDX; ++ else ++ s |= PHY_STAT_100HDX; ++ } else { ++ if (mii_reg & 0x0200) ++ s |= PHY_STAT_10FDX; ++ else ++ s |= PHY_STAT_10HDX; ++ } ++ if (mii_reg & 0x0008) ++ s |= PHY_STAT_FAULT; ++ ++ fep->phy_status = s; ++} ++ ++static phy_info_t phy_info_lxt971 = { ++ 0x0001378e, ++ "LXT971", ++ ++ (const phy_cmd_t []) { /* config */ ++// { mk_mii_write(MII_REG_ANAR, 0x021), NULL }, /* 10 Mbps, HD */ ++ { mk_mii_read(MII_REG_CR), mii_parse_cr }, ++ { mk_mii_read(MII_REG_ANAR), mii_parse_anar }, ++ { mk_mii_end, } ++ }, ++ (const phy_cmd_t []) { /* startup - enable interrupts */ ++ { mk_mii_write(MII_LXT971_IER, 0x00f2), NULL }, ++ { mk_mii_write(MII_REG_CR, 0x1200), NULL }, /* autonegotiate */ ++ ++ /* Somehow does the 971 tell me that the link is down ++ * the first read after power-up. ++ * read here to get a valid value in ack_int */ ++ ++ { mk_mii_read(MII_REG_SR), mii_parse_sr }, ++#ifdef CONFIG_PM826 ++ { mk_mii_read(MII_REG_SR), mii_waitfor_anc }, ++#endif ++ { mk_mii_end, } ++ }, ++ (const phy_cmd_t []) { /* ack_int */ ++ /* find out the current status */ ++ ++ { mk_mii_read(MII_REG_SR), mii_parse_sr }, ++ { mk_mii_read(MII_LXT971_SR2), mii_parse_lxt971_sr2 }, ++ ++ /* we only need to read ISR to acknowledge */ ++ ++ { mk_mii_read(MII_LXT971_ISR), NULL }, ++ { mk_mii_end, } ++ }, ++ (const phy_cmd_t []) { /* shutdown - disable interrupts */ ++ { mk_mii_write(MII_LXT971_IER, 0x0000), NULL }, ++ { mk_mii_end, } ++ }, ++}; ++ ++#endif /* CONFIG_FEC_LXT971 */ ++ ++ ++/* ------------------------------------------------------------------------- */ ++/* The Quality Semiconductor QS6612 is used on the RPX CLLF */ ++ ++#ifdef CONFIG_FCC_QS6612 ++ ++/* register definitions */ ++ ++#define MII_QS6612_MCR 17 /* Mode Control Register */ ++#define MII_QS6612_FTR 27 /* Factory Test Register */ ++#define MII_QS6612_MCO 28 /* Misc. Control Register */ ++#define MII_QS6612_ISR 29 /* Interrupt Source Register */ ++#define MII_QS6612_IMR 30 /* Interrupt Mask Register */ ++#define MII_QS6612_PCR 31 /* 100BaseTx PHY Control Reg. */ ++ ++static void mii_parse_qs6612_pcr(uint mii_reg, struct net_device *dev) ++{ ++ volatile struct fcc_enet_private *fep = dev->priv; ++ uint s = fep->phy_status; ++ ++ s &= ~(PHY_STAT_SPMASK); ++ ++ switch((mii_reg >> 2) & 7) { ++ case 1: s |= PHY_STAT_10HDX; break; ++ case 2: s |= PHY_STAT_100HDX; break; ++ case 5: s |= PHY_STAT_10FDX; break; ++ case 6: s |= PHY_STAT_100FDX; break; ++ } ++ ++ fep->phy_status = s; ++} ++ ++static phy_info_t phy_info_qs6612 = { ++ 0x00181440, ++ "QS6612", ++ ++ (const phy_cmd_t []) { /* config */ ++// { mk_mii_write(MII_REG_ANAR, 0x061), NULL }, /* 10 Mbps */ ++ ++ /* The PHY powers up isolated on the RPX, ++ * so send a command to allow operation. ++ */ ++ ++ { mk_mii_write(MII_QS6612_PCR, 0x0dc0), NULL }, ++ ++ /* parse cr and anar to get some info */ ++ ++ { mk_mii_read(MII_REG_CR), mii_parse_cr }, ++ { mk_mii_read(MII_REG_ANAR), mii_parse_anar }, ++ { mk_mii_end, } ++ }, ++ (const phy_cmd_t []) { /* startup - enable interrupts */ ++ { mk_mii_write(MII_QS6612_IMR, 0x003a), NULL }, ++ { mk_mii_write(MII_REG_CR, 0x1200), NULL }, /* autonegotiate */ ++ { mk_mii_end, } ++ }, ++ (const phy_cmd_t []) { /* ack_int */ ++ ++ /* we need to read ISR, SR and ANER to acknowledge */ ++ ++ { mk_mii_read(MII_QS6612_ISR), NULL }, ++ { mk_mii_read(MII_REG_SR), mii_parse_sr }, ++ { mk_mii_read(MII_REG_ANER), NULL }, ++ ++ /* read pcr to get info */ ++ ++ { mk_mii_read(MII_QS6612_PCR), mii_parse_qs6612_pcr }, ++ { mk_mii_end, } ++ }, ++ (const phy_cmd_t []) { /* shutdown - disable interrupts */ ++ { mk_mii_write(MII_QS6612_IMR, 0x0000), NULL }, ++ { mk_mii_end, } ++ }, ++}; ++ ++ ++#endif /* CONFIG_FCC_QS6612 */ ++ ++/* ------------------------------------------------------------------------- */ ++/* The AMD Am79C873 PHY is on PM826 */ ++ ++#ifdef CONFIG_FCC_AMD79C873 ++ ++#define MII_79C873_IER 17 /* Interrupt Enable Register */ ++#define MII_79C873_DR 18 /* Diagnostic Register */ ++ ++static void mii_parse_79c873_cr(uint mii_reg, struct net_device *dev) ++{ ++ volatile struct fcc_enet_private *fep = dev->priv; ++ uint s = fep->phy_status; ++ ++ s &= ~(PHY_STAT_SPMASK); ++ ++ if (mii_reg & 0x2000) { ++ if (mii_reg & 0x0100) ++ s |= PHY_STAT_100FDX; ++ else ++ s |= PHY_STAT_100HDX; ++ } else { ++ if (mii_reg & 0x0100) ++ s |= PHY_STAT_10FDX; ++ else ++ s |= PHY_STAT_10HDX; ++ } ++ ++ fep->phy_status = s; ++} ++ ++static phy_info_t phy_info_79c873 = { ++ 0x00181b80, ++ "AMD79C873", ++ ++ (const phy_cmd_t []) { /* config */ ++ { mk_mii_read(MII_REG_CR), mii_parse_cr }, ++ { mk_mii_read(MII_REG_ANAR), mii_parse_anar }, ++ { mk_mii_end, } ++ }, ++ (const phy_cmd_t []) { /* startup */ ++ { mk_mii_write(MII_REG_CR, 0x1200), NULL }, /* autonegotiate */ ++#ifdef CONFIG_PM826 ++ { mk_mii_read(MII_REG_SR), mii_waitfor_anc }, ++#endif ++ { mk_mii_end, } ++ }, ++ (const phy_cmd_t []) { /* ack_int */ ++ /* read SR twice: to acknowledge and to get link status */ ++ { mk_mii_read(MII_REG_SR), mii_parse_sr }, ++ { mk_mii_read(MII_REG_SR), mii_parse_sr }, ++ ++ /* find out the current link parameters */ ++ ++ { mk_mii_read(MII_REG_CR), mii_parse_79c873_cr }, ++ { mk_mii_end, } ++ }, ++ (const phy_cmd_t []) { /* shutdown - disable interrupts */ ++ { mk_mii_write(MII_79C873_IER, 0x0000), NULL }, ++ { mk_mii_end, } ++ }, ++}; ++ ++#endif /* CONFIG_FCC_AMD79C873 */ ++ ++ ++/* ------------------------------------------------------------------------- */ ++/* The Davicom DM9131 is used on the HYMOD board */ ++ ++#ifdef CONFIG_FCC_DM9131 ++ ++/* register definitions */ ++ ++#define MII_DM9131_ACR 16 /* Aux. Config Register */ ++#define MII_DM9131_ACSR 17 /* Aux. Config/Status Register */ ++#define MII_DM9131_10TCSR 18 /* 10BaseT Config/Status Reg. */ ++#define MII_DM9131_INTR 21 /* Interrupt Register */ ++#define MII_DM9131_RECR 22 /* Receive Error Counter Reg. */ ++#define MII_DM9131_DISCR 23 /* Disconnect Counter Register */ ++ ++static void mii_parse_dm9131_acsr(uint mii_reg, struct net_device *dev) ++{ ++ volatile struct fcc_enet_private *fep = dev->priv; ++ uint s = fep->phy_status; ++ ++ s &= ~(PHY_STAT_SPMASK); ++ ++ switch ((mii_reg >> 12) & 0xf) { ++ case 1: s |= PHY_STAT_10HDX; break; ++ case 2: s |= PHY_STAT_10FDX; break; ++ case 4: s |= PHY_STAT_100HDX; break; ++ case 8: s |= PHY_STAT_100FDX; break; ++ } ++ ++ fep->phy_status = s; ++} ++ ++static phy_info_t phy_info_dm9131 = { ++ 0x00181b80, ++ "DM9131", ++ ++ (const phy_cmd_t []) { /* config */ ++ /* parse cr and anar to get some info */ ++ { mk_mii_read(MII_REG_CR), mii_parse_cr }, ++ { mk_mii_read(MII_REG_ANAR), mii_parse_anar }, ++ { mk_mii_end, } ++ }, ++ (const phy_cmd_t []) { /* startup - enable interrupts */ ++ { mk_mii_write(MII_DM9131_INTR, 0x0002), NULL }, ++ { mk_mii_write(MII_REG_CR, 0x1200), NULL }, /* autonegotiate */ ++ { mk_mii_end, } ++ }, ++ (const phy_cmd_t []) { /* ack_int */ ++ ++ /* we need to read INTR, SR and ANER to acknowledge */ ++ ++ { mk_mii_read(MII_DM9131_INTR), NULL }, ++ { mk_mii_read(MII_REG_SR), mii_parse_sr }, ++ { mk_mii_read(MII_REG_ANER), NULL }, ++ ++ /* read acsr to get info */ ++ ++ { mk_mii_read(MII_DM9131_ACSR), mii_parse_dm9131_acsr }, ++ { mk_mii_end, } ++ }, ++ (const phy_cmd_t []) { /* shutdown - disable interrupts */ ++ { mk_mii_write(MII_DM9131_INTR, 0x0f00), NULL }, ++ { mk_mii_end, } ++ }, ++}; ++ ++ ++#endif /* CONFIG_FEC_DM9131 */ ++ ++ ++static phy_info_t *phy_info[] = { ++ ++#ifdef CONFIG_FCC_LXT970 ++ &phy_info_lxt970, ++#endif /* CONFIG_FCC_LXT970 */ ++ ++#ifdef CONFIG_FCC_LXT971 ++ &phy_info_lxt971, ++#endif /* CONFIG_FCC_LXT971 */ ++ ++#ifdef CONFIG_FCC_QS6612 ++ &phy_info_qs6612, ++#endif /* CONFIG_FCC_QS6612 */ ++ ++#ifdef CONFIG_FCC_DM9131 ++ &phy_info_dm9131, ++#endif /* CONFIG_FCC_DM9131 */ ++ ++#ifdef CONFIG_FCC_AMD79C873 ++ &phy_info_79c873, ++#endif /* CONFIG_FCC_AMD79C873 */ ++ ++ NULL ++}; ++ ++static void mii_display_status(struct net_device *dev) ++{ ++ volatile struct fcc_enet_private *fep = dev->priv; ++ uint s = fep->phy_status; ++ ++ if (!fep->link && !fep->old_link) { ++ /* Link is still down - don't print anything */ ++ return; ++ } ++ ++ printk("%s: status: ", dev->name); ++ ++ if (!fep->link) { ++ printk("link down"); ++ } else { ++ printk("link up"); ++ ++ switch(s & PHY_STAT_SPMASK) { ++ case PHY_STAT_100FDX: printk(", 100 Mbps Full Duplex"); break; ++ case PHY_STAT_100HDX: printk(", 100 Mbps Half Duplex"); break; ++ case PHY_STAT_10FDX: printk(", 10 Mbps Full Duplex"); break; ++ case PHY_STAT_10HDX: printk(", 10 Mbps Half Duplex"); break; ++ default: ++ printk(", Unknown speed/duplex"); ++ } ++ ++ if (s & PHY_STAT_ANC) ++ printk(", auto-negotiation complete"); ++ } ++ ++ if (s & PHY_STAT_FAULT) ++ printk(", remote fault"); ++ ++ printk(".\n"); ++} ++ ++static void mii_display_config(struct net_device *dev) ++{ ++ volatile struct fcc_enet_private *fep = dev->priv; ++ uint s = fep->phy_status; ++ ++ printk("%s: config: auto-negotiation ", dev->name); ++ ++ if (s & PHY_CONF_ANE) ++ printk("on"); ++ else ++ printk("off"); ++ ++ if (s & PHY_CONF_100FDX) ++ printk(", 100FDX"); ++ if (s & PHY_CONF_100HDX) ++ printk(", 100HDX"); ++ if (s & PHY_CONF_10FDX) ++ printk(", 10FDX"); ++ if (s & PHY_CONF_10HDX) ++ printk(", 10HDX"); ++ if (!(s & PHY_CONF_SPMASK)) ++ printk(", No speed/duplex selected?"); ++ ++ if (s & PHY_CONF_LOOP) ++ printk(", loopback enabled"); ++ ++ printk(".\n"); ++ ++ fep->sequence_done = 1; ++} ++ ++static void mii_relink(struct net_device *dev) ++{ ++ struct fcc_enet_private *fep = dev->priv; ++ int duplex; ++ ++ fep->link = (fep->phy_status & PHY_STAT_LINK) ? 1 : 0; ++ mii_display_status(dev); ++ fep->old_link = fep->link; ++ ++ if (fep->link) { ++ duplex = 0; ++ if (fep->phy_status ++ & (PHY_STAT_100FDX | PHY_STAT_10FDX)) ++ duplex = 1; ++ fcc_restart(dev, duplex); ++ } else { ++ fcc_stop(dev); ++ } ++} ++ ++static void mii_queue_relink(uint mii_reg, struct net_device *dev) ++{ ++ struct fcc_enet_private *fep = dev->priv; ++ ++ fep->phy_task.routine = (void *)mii_relink; ++ fep->phy_task.data = dev; ++ schedule_task(&fep->phy_task); ++} ++ ++static void mii_queue_config(uint mii_reg, struct net_device *dev) ++{ ++ struct fcc_enet_private *fep = dev->priv; ++ ++ fep->phy_task.routine = (void *)mii_display_config; ++ fep->phy_task.data = dev; ++ schedule_task(&fep->phy_task); ++} ++ ++ ++ ++phy_cmd_t phy_cmd_relink[] = { { mk_mii_read(MII_REG_CR), mii_queue_relink }, ++ { mk_mii_end, } }; ++phy_cmd_t phy_cmd_config[] = { { mk_mii_read(MII_REG_CR), mii_queue_config }, ++ { mk_mii_end, } }; ++ ++ ++/* Read remainder of PHY ID. ++*/ ++static void ++mii_discover_phy3(uint mii_reg, struct net_device *dev) ++{ ++ struct fcc_enet_private *fep; ++ int i; ++ ++ fep = dev->priv; ++ fep->phy_id |= (mii_reg & 0xffff); ++ ++ for(i = 0; phy_info[i]; i++) ++ if(phy_info[i]->id == (fep->phy_id >> 4)) ++ break; ++ ++ if(!phy_info[i]) ++ panic("%s: PHY id 0x%08x is not supported!\n", ++ dev->name, fep->phy_id); ++ ++ fep->phy = phy_info[i]; ++ ++ printk("%s: Phy @ 0x%x, type %s (0x%08x)\n", ++ dev->name, fep->phy_addr, fep->phy->name, fep->phy_id); ++} ++ ++/* Scan all of the MII PHY addresses looking for someone to respond ++ * with a valid ID. This usually happens quickly. ++ */ ++static void ++mii_discover_phy(uint mii_reg, struct net_device *dev) ++{ ++ struct fcc_enet_private *fep; ++ uint phytype; ++ ++ fep = dev->priv; ++ ++ if ((phytype = (mii_reg & 0xfff)) != 0xfff && phytype != 0) { ++ ++ /* Got first part of ID, now get remainder. */ ++ fep->phy_id = phytype << 16; ++ mii_queue(dev, mk_mii_read(MII_REG_PHYIR2), mii_discover_phy3); ++ } else { ++ fep->phy_addr++; ++ if (fep->phy_addr < 32) { ++ mii_queue(dev, mk_mii_read(MII_REG_PHYIR1), ++ mii_discover_phy); ++ } else { ++ printk("FCC: No PHY device found.\n"); ++ } ++ } ++} ++ ++/* This interrupt occurs when the PHY detects a link change. */ ++#if !defined (CONFIG_PM826) ++static void ++mii_link_interrupt(int irq, void * dev_id, struct pt_regs * regs) ++{ ++ struct net_device *dev = dev_id; ++ struct fcc_enet_private *fep = dev->priv; ++ ++ mii_do_cmd(dev, fep->phy->ack_int); ++ mii_do_cmd(dev, phy_cmd_relink); /* restart and display status */ ++} ++#endif /* !CONFIG_PM826 */ ++ ++#endif /* CONFIG_XENO_DRIVERS_NET_USE_MDIO */ ++ ++#ifdef ORIGINAL_VERSION ++/* Set or clear the multicast filter for this adaptor. ++ * Skeleton taken from sunlance driver. ++ * The CPM Ethernet implementation allows Multicast as well as individual ++ * MAC address filtering. Some of the drivers check to make sure it is ++ * a group multicast address, and discard those that are not. I guess I ++ * will do the same for now, but just remove the test if you want ++ * individual filtering as well (do the upper net layers want or support ++ * this kind of feature?). ++ */ ++static void ++set_multicast_list(struct net_device *dev) ++{ ++ struct fcc_enet_private *cep; ++ struct dev_mc_list *dmi; ++ u_char *mcptr, *tdptr; ++ volatile fcc_enet_t *ep; ++ int i, j; ++ ++ cep = (struct fcc_enet_private *)dev->priv; ++ ++return; ++ /* Get pointer to FCC area in parameter RAM. ++ */ ++ ep = (fcc_enet_t *)dev->base_addr; ++ ++ if (dev->flags&IFF_PROMISC) { ++ ++ /* Log any net taps. */ ++ printk("%s: Promiscuous mode enabled.\n", dev->name); ++ cep->fccp->fcc_fpsmr |= FCC_PSMR_PRO; ++ } else { ++ ++ cep->fccp->fcc_fpsmr &= ~FCC_PSMR_PRO; ++ ++ if (dev->flags & IFF_ALLMULTI) { ++ /* Catch all multicast addresses, so set the ++ * filter to all 1's. ++ */ ++ ep->fen_gaddrh = 0xffffffff; ++ ep->fen_gaddrl = 0xffffffff; ++ } ++ else { ++ /* Clear filter and add the addresses in the list. ++ */ ++ ep->fen_gaddrh = 0; ++ ep->fen_gaddrl = 0; ++ ++ dmi = dev->mc_list; ++ ++ for (i=0; imc_count; i++) { ++ ++ /* Only support group multicast for now. ++ */ ++ if (!(dmi->dmi_addr[0] & 1)) ++ continue; ++ ++ /* The address in dmi_addr is LSB first, ++ * and taddr is MSB first. We have to ++ * copy bytes MSB first from dmi_addr. ++ */ ++ mcptr = (u_char *)dmi->dmi_addr + 5; ++ tdptr = (u_char *)&ep->fen_taddrh; ++ for (j=0; j<6; j++) ++ *tdptr++ = *mcptr--; ++ ++ /* Ask CPM to run CRC and set bit in ++ * filter mask. ++ */ ++ cpmp->cp_cpcr = mk_cr_cmd(cep->fip->fc_cpmpage, ++ cep->fip->fc_cpmblock, 0x0c, ++ CPM_CR_SET_GADDR) | CPM_CR_FLG; ++ udelay(10); ++ while (cpmp->cp_cpcr & CPM_CR_FLG); ++ } ++ } ++ } ++} ++ ++ ++/* Set the individual MAC address. ++ */ ++int fcc_enet_set_mac_address(struct net_device *dev, void *p) ++{ ++ struct sockaddr *addr= (struct sockaddr *) p; ++ struct fcc_enet_private *cep; ++ volatile fcc_enet_t *ep; ++ unsigned char *eap; ++ int i; ++ ++ cep = (struct fcc_enet_private *)(dev->priv); ++ ep = cep->ep; ++ ++ if (netif_running(dev)) ++ return -EBUSY; ++ ++ memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); ++ ++ eap = (unsigned char *) &(ep->fen_paddrh); ++ for (i=5; i>=0; i--) ++ *eap++ = addr->sa_data[i]; ++ ++ return 0; ++} ++#endif /* ORIGINAL_VERSION */ ++ ++ ++/* Initialize the CPM Ethernet on FCC. ++ */ ++int __init fec_enet_init(void) ++{ ++ struct rtnet_device *rtdev = NULL; ++ struct fcc_enet_private *cep; ++ fcc_info_t *fip; ++ int i, np; ++ volatile immap_t *immap; ++ volatile iop8260_t *io; ++ ++ immap = (immap_t *)IMAP_ADDR; /* and to internal registers */ ++ io = &immap->im_ioport; ++ ++ for (np = 0, fip = fcc_ports; ++ np < sizeof(fcc_ports) / sizeof(fcc_info_t); ++ np++, fip++) { ++ ++ /* Skip FCC ports not used for RTnet. ++ */ ++ if (np != rtnet_fcc - 1) continue; ++ ++ /* Allocate some private information and create an Ethernet device instance. ++ */ ++ if (!rx_pool_size) ++ rx_pool_size = RX_RING_SIZE * 2; ++ ++ rtdev = rt_alloc_etherdev(sizeof(struct fcc_enet_private), ++ rx_pool_size + TX_RING_SIZE); ++ if (rtdev == NULL) { ++ printk(KERN_ERR "fcc_enet: Could not allocate ethernet device.\n"); ++ return -1; ++ } ++ rtdev_alloc_name(rtdev, "rteth%d"); ++ rt_rtdev_connect(rtdev, &RTDEV_manager); ++ rtdev->vers = RTDEV_VERS_2_0; ++ ++ cep = (struct fcc_enet_private *)rtdev->priv; ++ rtdm_lock_init(&cep->lock); ++ cep->fip = fip; ++ fip->rtdev = rtdev; /* need for cleanup */ ++ ++ init_fcc_shutdown(fip, cep, immap); ++ init_fcc_ioports(fip, io, immap); ++ init_fcc_param(fip, rtdev, immap); ++ ++ rtdev->base_addr = (unsigned long)(cep->ep); ++ ++ /* The CPM Ethernet specific entries in the device ++ * structure. ++ */ ++ rtdev->open = fcc_enet_open; ++ rtdev->hard_start_xmit = fcc_enet_start_xmit; ++ rtdev->stop = fcc_enet_close; ++ rtdev->hard_header = &rt_eth_header; ++ rtdev->get_stats = fcc_enet_get_stats; ++ ++ if ((i = rt_register_rtnetdev(rtdev))) { ++ rtdm_irq_disable(&cep->irq_handle); ++ rtdm_irq_free(&cep->irq_handle); ++ rtdev_free(rtdev); ++ return i; ++ } ++ init_fcc_startup(fip, rtdev); ++ ++ printk("%s: FCC%d ENET Version 0.4, %02x:%02x:%02x:%02x:%02x:%02x\n", ++ rtdev->name, fip->fc_fccnum + 1, ++ rtdev->dev_addr[0], rtdev->dev_addr[1], rtdev->dev_addr[2], ++ rtdev->dev_addr[3], rtdev->dev_addr[4], rtdev->dev_addr[5]); ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO ++ /* Queue up command to detect the PHY and initialize the ++ * remainder of the interface. ++ */ ++ cep->phy_addr = 0; ++ mii_queue(dev, mk_mii_read(MII_REG_PHYIR1), mii_discover_phy); ++#endif /* CONFIG_XENO_DRIVERS_NET_USE_MDIO */ ++ } ++ ++ return 0; ++} ++ ++/* Make sure the device is shut down during initialization. ++*/ ++static void __init ++init_fcc_shutdown(fcc_info_t *fip, struct fcc_enet_private *cep, ++ volatile immap_t *immap) ++{ ++ volatile fcc_enet_t *ep; ++ volatile fcc_t *fccp; ++ ++ /* Get pointer to FCC area in parameter RAM. ++ */ ++ ep = (fcc_enet_t *)(&immap->im_dprambase[fip->fc_proff]); ++ ++ /* And another to the FCC register area. ++ */ ++ fccp = (volatile fcc_t *)(&immap->im_fcc[fip->fc_fccnum]); ++ cep->fccp = fccp; /* Keep the pointers handy */ ++ cep->ep = ep; ++ ++ /* Disable receive and transmit in case someone left it running. ++ */ ++ fccp->fcc_gfmr &= ~(FCC_GFMR_ENR | FCC_GFMR_ENT); ++} ++ ++/* Initialize the I/O pins for the FCC Ethernet. ++*/ ++static void __init ++init_fcc_ioports(fcc_info_t *fip, volatile iop8260_t *io, ++ volatile immap_t *immap) ++{ ++ ++ /* FCC1 pins are on port A/C. FCC2/3 are port B/C. ++ */ ++ if (fip->fc_proff == PROFF_FCC1) { ++ /* Configure port A and C pins for FCC1 Ethernet. ++ */ ++ io->iop_pdira &= ~PA1_DIRA0; ++ io->iop_pdira |= PA1_DIRA1; ++ io->iop_psora &= ~PA1_PSORA0; ++ io->iop_psora |= PA1_PSORA1; ++ io->iop_ppara |= (PA1_DIRA0 | PA1_DIRA1); ++ } ++ if (fip->fc_proff == PROFF_FCC2) { ++ /* Configure port B and C pins for FCC Ethernet. ++ */ ++ io->iop_pdirb &= ~PB2_DIRB0; ++ io->iop_pdirb |= PB2_DIRB1; ++ io->iop_psorb &= ~PB2_PSORB0; ++ io->iop_psorb |= PB2_PSORB1; ++ io->iop_pparb |= (PB2_DIRB0 | PB2_DIRB1); ++ } ++ if (fip->fc_proff == PROFF_FCC3) { ++ /* Configure port B and C pins for FCC Ethernet. ++ */ ++ io->iop_pdirb &= ~PB3_DIRB0; ++ io->iop_pdirb |= PB3_DIRB1; ++ io->iop_psorb &= ~PB3_PSORB0; ++ io->iop_psorb |= PB3_PSORB1; ++ io->iop_pparb |= (PB3_DIRB0 | PB3_DIRB1); ++ } ++ ++ /* Port C has clocks...... ++ */ ++ io->iop_psorc &= ~(fip->fc_trxclocks); ++ io->iop_pdirc &= ~(fip->fc_trxclocks); ++ io->iop_pparc |= fip->fc_trxclocks; ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO ++ /* ....and the MII serial clock/data. ++ */ ++#ifndef CONFIG_PM826 ++ IOP_DAT(io,fip->fc_port) |= (fip->fc_mdio | fip->fc_mdck); ++ IOP_ODR(io,fip->fc_port) &= ~(fip->fc_mdio | fip->fc_mdck); ++#endif /* CONFIG_PM826 */ ++ IOP_DIR(io,fip->fc_port) |= (fip->fc_mdio | fip->fc_mdck); ++ IOP_PAR(io,fip->fc_port) &= ~(fip->fc_mdio | fip->fc_mdck); ++#endif /* CONFIG_XENO_DRIVERS_NET_USE_MDIO */ ++ ++ /* Configure Serial Interface clock routing. ++ * First, clear all FCC bits to zero, ++ * then set the ones we want. ++ */ ++ immap->im_cpmux.cmx_fcr &= ~(fip->fc_clockmask); ++ immap->im_cpmux.cmx_fcr |= fip->fc_clockroute; ++} ++ ++static void __init ++init_fcc_param(fcc_info_t *fip, struct rtnet_device *rtdev, ++ volatile immap_t *immap) ++{ ++ unsigned char *eap; ++ unsigned long mem_addr; ++ bd_t *bd; ++ int i, j; ++ struct fcc_enet_private *cep; ++ volatile fcc_enet_t *ep; ++ volatile cbd_t *bdp; ++ volatile cpm8260_t *cp; ++ ++ cep = (struct fcc_enet_private *)rtdev->priv; ++ ep = cep->ep; ++ cp = cpmp; ++ ++ bd = (bd_t *)__res; ++ ++ /* Zero the whole thing.....I must have missed some individually. ++ * It works when I do this. ++ */ ++ memset((char *)ep, 0, sizeof(fcc_enet_t)); ++ ++ /* Allocate space for the buffer descriptors in the DP ram. ++ * These are relative offsets in the DP ram address space. ++ * Initialize base addresses for the buffer descriptors. ++ */ ++ cep->rx_bd_base = (cbd_t *)m8260_cpm_hostalloc(sizeof(cbd_t) * RX_RING_SIZE, 8); ++ ep->fen_genfcc.fcc_rbase = __pa(cep->rx_bd_base); ++ cep->tx_bd_base = (cbd_t *)m8260_cpm_hostalloc(sizeof(cbd_t) * TX_RING_SIZE, 8); ++ ep->fen_genfcc.fcc_tbase = __pa(cep->tx_bd_base); ++ ++ cep->dirty_tx = cep->cur_tx = cep->tx_bd_base; ++ cep->cur_rx = cep->rx_bd_base; ++ ++ ep->fen_genfcc.fcc_rstate = (CPMFCR_GBL | CPMFCR_EB) << 24; ++ ep->fen_genfcc.fcc_tstate = (CPMFCR_GBL | CPMFCR_EB) << 24; ++ ++ /* Set maximum bytes per receive buffer. ++ * It must be a multiple of 32. ++ */ ++ ep->fen_genfcc.fcc_mrblr = PKT_MAXBLR_SIZE; ++ ++ /* Allocate space in the reserved FCC area of DPRAM for the ++ * internal buffers. No one uses this space (yet), so we ++ * can do this. Later, we will add resource management for ++ * this area. ++ */ ++ mem_addr = CPM_FCC_SPECIAL_BASE + (fip->fc_fccnum * 128); ++ ep->fen_genfcc.fcc_riptr = mem_addr; ++ ep->fen_genfcc.fcc_tiptr = mem_addr+32; ++ ep->fen_padptr = mem_addr+64; ++ memset((char *)(&(immap->im_dprambase[(mem_addr+64)])), 0x88, 32); ++ ++ ep->fen_genfcc.fcc_rbptr = 0; ++ ep->fen_genfcc.fcc_tbptr = 0; ++ ep->fen_genfcc.fcc_rcrc = 0; ++ ep->fen_genfcc.fcc_tcrc = 0; ++ ep->fen_genfcc.fcc_res1 = 0; ++ ep->fen_genfcc.fcc_res2 = 0; ++ ++ ep->fen_camptr = 0; /* CAM isn't used in this driver */ ++ ++ /* Set CRC preset and mask. ++ */ ++ ep->fen_cmask = 0xdebb20e3; ++ ep->fen_cpres = 0xffffffff; ++ ++ ep->fen_crcec = 0; /* CRC Error counter */ ++ ep->fen_alec = 0; /* alignment error counter */ ++ ep->fen_disfc = 0; /* discard frame counter */ ++ ep->fen_retlim = 15; /* Retry limit threshold */ ++ ep->fen_pper = 0; /* Normal persistence */ ++ ++ /* Clear hash filter tables. ++ */ ++ ep->fen_gaddrh = 0; ++ ep->fen_gaddrl = 0; ++ ep->fen_iaddrh = 0; ++ ep->fen_iaddrl = 0; ++ ++ /* Clear the Out-of-sequence TxBD. ++ */ ++ ep->fen_tfcstat = 0; ++ ep->fen_tfclen = 0; ++ ep->fen_tfcptr = 0; ++ ++ ep->fen_mflr = PKT_MAXBUF_SIZE; /* maximum frame length register */ ++ ep->fen_minflr = PKT_MINBUF_SIZE; /* minimum frame length register */ ++ ++ /* Set Ethernet station address. ++ * ++ * This is supplied in the board information structure, so we ++ * copy that into the controller. ++ */ ++ eap = (unsigned char *)&(ep->fen_paddrh); ++#if defined(CONFIG_CPU86) || defined(CONFIG_TQM8260) ++ /* ++ * TQM8260 and CPU86 use sequential MAC addresses ++ */ ++ *eap++ = rtdev->dev_addr[5] = bd->bi_enetaddr[5] + fip->fc_fccnum; ++ for (i=4; i>=0; i--) { ++ *eap++ = rtdev->dev_addr[i] = bd->bi_enetaddr[i]; ++ } ++#elif defined(CONFIG_PM826) ++ *eap++ = rtdev->dev_addr[5] = bd->bi_enetaddr[5] + fip->fc_fccnum + 1; ++ for (i=4; i>=0; i--) { ++ *eap++ = rtdev->dev_addr[i] = bd->bi_enetaddr[i]; ++ } ++#else ++ /* ++ * So, far we have only been given one Ethernet address. We make ++ * it unique by toggling selected bits in the upper byte of the ++ * non-static part of the address (for the second and third ports, ++ * the first port uses the address supplied as is). ++ */ ++ for (i=5; i>=0; i--) { ++ if (i == 3 && fip->fc_fccnum != 0) { ++ rtdev->dev_addr[i] = bd->bi_enetaddr[i]; ++ rtdev->dev_addr[i] ^= (1 << (7 - fip->fc_fccnum)); ++ *eap++ = dev->dev_addr[i]; ++ } ++ else { ++ *eap++ = dev->dev_addr[i] = bd->bi_enetaddr[i]; ++ } ++ } ++#endif ++ ++ ep->fen_taddrh = 0; ++ ep->fen_taddrm = 0; ++ ep->fen_taddrl = 0; ++ ++ ep->fen_maxd1 = PKT_MAXDMA_SIZE; /* maximum DMA1 length */ ++ ep->fen_maxd2 = PKT_MAXDMA_SIZE; /* maximum DMA2 length */ ++ ++ /* Clear stat counters, in case we ever enable RMON. ++ */ ++ ep->fen_octc = 0; ++ ep->fen_colc = 0; ++ ep->fen_broc = 0; ++ ep->fen_mulc = 0; ++ ep->fen_uspc = 0; ++ ep->fen_frgc = 0; ++ ep->fen_ospc = 0; ++ ep->fen_jbrc = 0; ++ ep->fen_p64c = 0; ++ ep->fen_p65c = 0; ++ ep->fen_p128c = 0; ++ ep->fen_p256c = 0; ++ ep->fen_p512c = 0; ++ ep->fen_p1024c = 0; ++ ++ ep->fen_rfthr = 0; /* Suggested by manual */ ++ ep->fen_rfcnt = 0; ++ ep->fen_cftype = 0; ++ ++ /* Now allocate the host memory pages and initialize the ++ * buffer descriptors. ++ */ ++ bdp = cep->tx_bd_base; ++ for (i=0; icbd_sc = 0; ++ bdp->cbd_datlen = 0; ++ bdp->cbd_bufaddr = 0; ++ bdp++; ++ } ++ ++ /* Set the last buffer to wrap. ++ */ ++ bdp--; ++ bdp->cbd_sc |= BD_SC_WRAP; ++ ++ bdp = cep->rx_bd_base; ++ for (i=0; icbd_sc = BD_ENET_RX_EMPTY | BD_ENET_RX_INTR; ++ bdp->cbd_datlen = 0; ++ bdp->cbd_bufaddr = __pa(mem_addr); ++ mem_addr += FCC_ENET_RX_FRSIZE; ++ bdp++; ++ } ++ } ++ ++ /* Set the last buffer to wrap. ++ */ ++ bdp--; ++ bdp->cbd_sc |= BD_SC_WRAP; ++ ++ /* Let's re-initialize the channel now. We have to do it later ++ * than the manual describes because we have just now finished ++ * the BD initialization. ++ */ ++ cp->cp_cpcr = mk_cr_cmd(fip->fc_cpmpage, fip->fc_cpmblock, 0x0c, ++ CPM_CR_INIT_TRX) | CPM_CR_FLG; ++ while (cp->cp_cpcr & CPM_CR_FLG); ++ ++ cep->skb_cur = cep->skb_dirty = 0; ++} ++ ++/* Let 'er rip. ++*/ ++static void __init ++init_fcc_startup(fcc_info_t *fip, struct rtnet_device *rtdev) ++{ ++ volatile fcc_t *fccp; ++ struct fcc_enet_private *cep; ++ ++ cep = (struct fcc_enet_private *)rtdev->priv; ++ fccp = cep->fccp; ++ ++ fccp->fcc_fcce = 0xffff; /* Clear any pending events */ ++ ++ /* Enable interrupts for transmit error, complete frame ++ * received, and any transmit buffer we have also set the ++ * interrupt flag. ++ */ ++ fccp->fcc_fccm = (FCC_ENET_TXE | FCC_ENET_RXF | FCC_ENET_TXB); ++ ++ rt_stack_connect(rtdev, &STACK_manager); ++ ++ /* Install our interrupt handler. ++ */ ++ if (rtdm_irq_request(&cep->irq_handle, fip->fc_interrupt, ++ fcc_enet_interrupt, 0, "rt_mpc8260_fcc_enet", rtdev)) { ++ printk(KERN_ERR "Couldn't request IRQ %d\n", rtdev->irq); ++ rtdev_free(rtdev); ++ return; ++ } ++ ++ ++#if defined (CONFIG_XENO_DRIVERS_NET_USE_MDIO) && !defined (CONFIG_PM826) ++# ifndef PHY_INTERRUPT ++# error Want to use MDIO, but PHY_INTERRUPT not defined! ++# endif ++ if (request_8xxirq(PHY_INTERRUPT, mii_link_interrupt, 0, ++ "mii", dev) < 0) ++ printk("Can't get MII IRQ %d\n", PHY_INTERRUPT); ++#endif /* CONFIG_XENO_DRIVERS_NET_USE_MDIO, CONFIG_PM826 */ ++ ++ /* Set GFMR to enable Ethernet operating mode. ++ */ ++#ifndef CONFIG_EST8260 ++ fccp->fcc_gfmr = (FCC_GFMR_TCI | FCC_GFMR_MODE_ENET); ++#else ++ fccp->fcc_gfmr = FCC_GFMR_MODE_ENET; ++#endif ++ ++ /* Set sync/delimiters. ++ */ ++ fccp->fcc_fdsr = 0xd555; ++ ++ /* Set protocol specific processing mode for Ethernet. ++ * This has to be adjusted for Full Duplex operation after we can ++ * determine how to detect that. ++ */ ++ fccp->fcc_fpsmr = FCC_PSMR_ENCRC; ++ ++#ifdef CONFIG_ADS8260 ++ /* Enable the PHY. ++ */ ++ ads_csr_addr[1] |= BCSR1_FETH_RST; /* Remove reset */ ++ ads_csr_addr[1] &= ~BCSR1_FETHIEN; /* Enable */ ++#endif ++ ++#if defined(CONFIG_XENO_DRIVERS_NET_USE_MDIO) || defined(CONFIG_TQM8260) ++ /* start in full duplex mode, and negotiate speed */ ++ fcc_restart (rtdev, 1); ++#else ++ /* start in half duplex mode */ ++ fcc_restart (rtdev, 0); ++#endif ++} ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO ++/* MII command/status interface. ++ * I'm not going to describe all of the details. You can find the ++ * protocol definition in many other places, including the data sheet ++ * of most PHY parts. ++ * I wonder what "they" were thinking (maybe weren't) when they leave ++ * the I2C in the CPM but I have to toggle these bits...... ++ * ++ * Timing is a critical, especially on faster CPU's ... ++ */ ++#define MDIO_DELAY 5 ++ ++#define FCC_MDIO(bit) do { \ ++ udelay(MDIO_DELAY); \ ++ if (bit) \ ++ IOP_DAT(io,fip->fc_port) |= fip->fc_mdio; \ ++ else \ ++ IOP_DAT(io,fip->fc_port) &= ~fip->fc_mdio; \ ++} while(0) ++ ++#define FCC_MDC(bit) do { \ ++ udelay(MDIO_DELAY); \ ++ if (bit) \ ++ IOP_DAT(io,fip->fc_port) |= fip->fc_mdck; \ ++ else \ ++ IOP_DAT(io,fip->fc_port) &= ~fip->fc_mdck; \ ++} while(0) ++ ++static uint ++mii_send_receive(fcc_info_t *fip, uint cmd) ++{ ++ uint retval; ++ int read_op, i, off; ++ volatile immap_t *immap; ++ volatile iop8260_t *io; ++ ++ immap = (immap_t *)IMAP_ADDR; ++ io = &immap->im_ioport; ++ ++ IOP_DIR(io,fip->fc_port) |= (fip->fc_mdio | fip->fc_mdck); ++ ++ read_op = ((cmd & 0xf0000000) == 0x60000000); ++ ++ /* Write preamble ++ */ ++ for (i = 0; i < 32; i++) ++ { ++ FCC_MDC(0); ++ FCC_MDIO(1); ++ FCC_MDC(1); ++ } ++ ++ /* Write data ++ */ ++ for (i = 0, off = 31; i < (read_op ? 14 : 32); i++, --off) ++ { ++ FCC_MDC(0); ++ FCC_MDIO((cmd >> off) & 0x00000001); ++ FCC_MDC(1); ++ } ++ ++ retval = cmd; ++ ++ if (read_op) ++ { ++ retval >>= 16; ++ ++ FCC_MDC(0); ++ IOP_DIR(io,fip->fc_port) &= ~fip->fc_mdio; ++ FCC_MDC(1); ++ FCC_MDC(0); ++ ++ for (i = 0, off = 15; i < 16; i++, off--) ++ { ++ FCC_MDC(1); ++ udelay(MDIO_DELAY); ++ retval <<= 1; ++ if (IOP_DAT(io,fip->fc_port) & fip->fc_mdio) ++ retval++; ++ FCC_MDC(0); ++ } ++ } ++ ++ IOP_DIR(io,fip->fc_port) |= (fip->fc_mdio | fip->fc_mdck); ++ ++ for (i = 0; i < 32; i++) ++ { ++ FCC_MDC(0); ++ FCC_MDIO(1); ++ FCC_MDC(1); ++ } ++ ++ return retval; ++} ++ ++static void ++fcc_stop(struct net_device *dev) ++{ ++ volatile fcc_t *fccp; ++ struct fcc_enet_private *fcp; ++ ++ fcp = (struct fcc_enet_private *)(dev->priv); ++ fccp = fcp->fccp; ++ ++ /* Disable transmit/receive */ ++ fccp->fcc_gfmr &= ~(FCC_GFMR_ENR | FCC_GFMR_ENT); ++} ++#endif /* CONFIG_XENO_DRIVERS_NET_USE_MDIO */ ++ ++static void ++fcc_restart(struct rtnet_device *rtdev, int duplex) ++{ ++ volatile fcc_t *fccp; ++ struct fcc_enet_private *fcp; ++ ++ fcp = (struct fcc_enet_private *)rtdev->priv; ++ fccp = fcp->fccp; ++ ++ if (duplex) ++ fccp->fcc_fpsmr |= (FCC_PSMR_FDE | FCC_PSMR_LPB); ++ else ++ fccp->fcc_fpsmr &= ~(FCC_PSMR_FDE | FCC_PSMR_LPB); ++ ++ /* Enable transmit/receive */ ++ fccp->fcc_gfmr |= FCC_GFMR_ENR | FCC_GFMR_ENT; ++} ++ ++static int ++fcc_enet_open(struct rtnet_device *rtdev) ++{ ++ struct fcc_enet_private *fep = rtdev->priv; ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO ++ fep->sequence_done = 0; ++ fep->link = 0; ++ ++ if (fep->phy) { ++ mii_do_cmd(dev, fep->phy->ack_int); ++ mii_do_cmd(dev, fep->phy->config); ++ mii_do_cmd(dev, phy_cmd_config); /* display configuration */ ++ while(!fep->sequence_done) ++ schedule(); ++ ++ mii_do_cmd(dev, fep->phy->startup); ++#ifdef CONFIG_PM826 ++ /* Read the autonegotiation results */ ++ mii_do_cmd(dev, fep->phy->ack_int); ++ mii_do_cmd(dev, phy_cmd_relink); ++#endif /* CONFIG_PM826 */ ++ rtnetif_start_queue(rtdev); ++ return 0; /* Success */ ++ } ++ return -ENODEV; /* No PHY we understand */ ++#else ++ fep->link = 1; ++ rtnetif_start_queue(rtdev); ++ return 0; /* Always succeed */ ++#endif /* CONFIG_XENO_DRIVERS_NET_USE_MDIO */ ++} ++ ++static void __exit fcc_enet_cleanup(void) ++{ ++ struct rtnet_device *rtdev; ++ volatile immap_t *immap = (immap_t *)IMAP_ADDR; ++ struct fcc_enet_private *cep; ++ fcc_info_t *fip; ++ int np; ++ ++ for (np = 0, fip = fcc_ports; ++ np < sizeof(fcc_ports) / sizeof(fcc_info_t); ++ np++, fip++) { ++ ++ /* Skip FCC ports not used for RTnet. */ ++ if (np != rtnet_fcc - 1) continue; ++ ++ rtdev = fip->rtdev; ++ cep = (struct fcc_enet_private *)rtdev->priv; ++ ++ rtdm_irq_disable(&cep->irq_handle); ++ rtdm_irq_free(&cep->irq_handle); ++ ++ init_fcc_shutdown(fip, cep, immap); ++ printk("%s: cleanup incomplete (m8260_cpm_dpfree does not exit)!\n", ++ rtdev->name); ++ rt_stack_disconnect(rtdev); ++ rt_unregister_rtnetdev(rtdev); ++ rt_rtdev_disconnect(rtdev); ++ ++ printk("%s: unloaded\n", rtdev->name); ++ rtdev_free(rtdev); ++ fip++; ++ } ++} ++ ++module_init(fec_enet_init); ++module_exit(fcc_enet_cleanup); +--- linux/drivers/xenomai/net/drivers/r8169.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/r8169.c 2021-04-07 16:01:27.238634150 +0800 +@@ -0,0 +1,2046 @@ ++/* ++========================================================================= ++ r8169.c: A RealTek RTL8169s/8110s Gigabit Ethernet driver for Linux kernel 2.4.x. ++ -------------------------------------------------------------------- ++ ++ History: ++ Feb 4 2002 - created initially by ShuChen . ++ May 20 2002 - Add link status force-mode and TBI mode support. ++========================================================================= ++ ++RTL8169_VERSION "1.1" <2002/10/4> ++ ++ The bit4:0 of MII register 4 is called "selector field", and have to be ++ 00001b to indicate support of IEEE std 802.3 during NWay process of ++ exchanging Link Code Word (FLP). ++ ++RTL8169_VERSION "1.2" <2003/6/17> ++ Update driver module name. ++ Modify ISR. ++ Add chip mcfg. ++ ++RTL8169_VERSION "1.3" <2003/6/20> ++ Add chip pcfg. ++ Add priv->phy_timer_t, rtl8169_phy_timer_t_handler() ++ Add rtl8169_hw_PHY_config() ++ Add rtl8169_hw_PHY_reset() ++ ++RTL8169_VERSION "1.4" <2003/7/14> ++ Add tx_bytes, rx_bytes. ++ ++RTL8169_VERSION "1.5" <2003/7/18> ++ Set 0x0000 to PHY at offset 0x0b. ++ Modify chip mcfg, pcfg ++ Force media for multiple card. ++RTL8169_VERSION "1.6" <2003/8/25> ++ Modify receive data buffer. ++ ++RTL8169_VERSION "1.7" <2003/9/18> ++ Add Jumbo Frame support. ++ ++RTL8169_VERSION "1.8" <2003/10/21> ++ Performance and CPU Utilizaion Enhancement. ++ ++RTL8169_VERSION "1.9" <2003/12/29> ++ Enable Tx/Rx flow control. ++ ++RTL8169_VERSION "2.0" <2004/03/26> ++ Beta version. ++ Support for linux 2.6.x ++ ++RTL8169_VERSION "2.1" <2004/07/05> ++ Modify parameters. ++ ++RTL8169_VERSION "2.2" <2004/08/09> ++ Add.pci_dma_sync_single. ++ Add pci_alloc_consistent()/pci_free_consistent(). ++ Revise parameters. ++ Recognize our interrupt for linux 2.6.x. ++*/ ++ ++/* ++ * Ported to RTnet by Klaus Keppler ++ * All RTnet porting stuff may be used and distributed according to the ++ * terms of the GNU General Public License (GPL). ++ * ++ * Version 2.2-04 <2005/08/22> ++ * Initial release of this driver, based on RTL8169 driver v2.2 ++ * ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++ ++#include /*** RTnet ***/ ++ ++#define RTL8169_VERSION "2.2-04" ++#define MODULENAME "rt_r8169" ++#define RTL8169_DRIVER_NAME MODULENAME " RTnet Gigabit Ethernet driver " RTL8169_VERSION ++#define PFX MODULENAME ": " ++ ++//#define RTL8169_DEBUG ++#undef RTL8169_JUMBO_FRAME_SUPPORT /*** RTnet: no not enable! ***/ ++#undef RTL8169_HW_FLOW_CONTROL_SUPPORT ++ ++ ++#undef RTL8169_IOCTL_SUPPORT /*** RTnet: do not enable! ***/ ++#undef RTL8169_DYNAMIC_CONTROL ++#undef RTL8169_USE_IO ++ ++ ++#ifdef RTL8169_DEBUG ++ #define assert(expr) \ ++ if(!(expr)) { printk( "Assertion failed! %s,%s,%s,line=%d\n", #expr,__FILE__,__FUNCTION__,__LINE__); } ++ /*** RTnet / : rt_assert must be used instead of assert() within interrupt context! ***/ ++ #define rt_assert(expr) \ ++ if(!(expr)) { rtdm_printk( "Assertion failed! %s,%s,%s,line=%d\n", #expr,__FILE__,__FUNCTION__,__LINE__); } ++ /*** RTnet / : RT_DBG_PRINT must be used instead of DBG_PRINT() within interrupt context! ***/ ++ #define DBG_PRINT( fmt, args...) printk("r8169: " fmt, ## args); ++ #define RT_DBG_PRINT( fmt, args...) rtdm_printk("r8169: " fmt, ## args); ++#else ++ #define assert(expr) do {} while (0) ++ #define rt_assert(expr) do {} while (0) ++ #define DBG_PRINT( fmt, args...) ; ++ #define RT_DBG_PRINT( fmt, args...) ; ++#endif // end of #ifdef RTL8169_DEBUG ++ ++/* media options */ ++#define MAX_UNITS 8 ++static int media[MAX_UNITS] = {-1, -1, -1, -1, -1, -1, -1, -1}; ++ ++/*** RTnet ***/ ++static int cards[MAX_UNITS] = { [0 ... (MAX_UNITS-1)] = 1 }; ++module_param_array(cards, int, NULL, 0444); ++MODULE_PARM_DESC(cards, "array of cards to be supported (e.g. 1,0,1)"); ++/*** /RTnet ***/ ++ ++/* Enable debugging output */ ++#define DEBUG_RX_SYNC 1 ++#define DEBUG_RX_OTHER 2 ++#define DEBUG_TX_SYNC 4 ++#define DEBUG_TX_OTHER 8 ++#define DEBUG_RUN 16 ++static int local_debug = -1; ++static int r8169_debug = -1; ++module_param_named(debug, local_debug, int, 0444); ++MODULE_PARM_DESC(debug, MODULENAME " debug level (bit mask, see docs!)"); ++ ++ ++/* Maximum events (Rx packets, etc.) to handle at each interrupt. */ ++static int max_interrupt_work = 20; ++ ++/* MAC address length*/ ++#define MAC_ADDR_LEN 6 ++ ++#define RX_FIFO_THRESH 7 /* 7 means NO threshold, Rx buffer level before first PCI xfer. */ ++#define RX_DMA_BURST 7 /* Maximum PCI burst, '6' is 1024 */ ++#define TX_DMA_BURST 7 /* Maximum PCI burst, '6' is 1024 */ ++#define ETTh 0x3F /* 0x3F means NO threshold */ ++ ++#define ETH_HDR_LEN 14 ++#define DEFAULT_MTU 1500 ++#define DEFAULT_RX_BUF_LEN 1536 ++ ++ ++#ifdef RTL8169_JUMBO_FRAME_SUPPORT ++#define MAX_JUMBO_FRAME_MTU ( 10000 ) ++#define MAX_RX_SKBDATA_SIZE ( MAX_JUMBO_FRAME_MTU + ETH_HDR_LEN ) ++#else ++#define MAX_RX_SKBDATA_SIZE 1600 ++#endif //end #ifdef RTL8169_JUMBO_FRAME_SUPPORT ++ ++ ++#define InterFrameGap 0x03 /* 3 means InterFrameGap = the shortest one */ ++ ++//#define NUM_TX_DESC 64 /* Number of Tx descriptor registers*/ ++//#define NUM_RX_DESC 64 /* Number of Rx descriptor registers*/ ++ ++#define TX_RING_SIZE 16 /*** RTnet ***/ ++#define NUM_TX_DESC TX_RING_SIZE /* Number of Tx descriptor registers*/ /*** RTnet ***/ ++#define RX_RING_SIZE 8 /*** RTnet ***/ ++#define NUM_RX_DESC RX_RING_SIZE /* Number of Rx descriptor registers*/ /*** RTnet ***/ ++ ++#define RTL_MIN_IO_SIZE 0x80 ++#define TX_TIMEOUT (6*HZ) ++//#define RTL8169_TIMER_EXPIRE_TIME 100 //100 /*** RTnet ***/ ++ ++ ++#ifdef RTL8169_USE_IO ++#define RTL_W8(reg, val8) outb ((val8), ioaddr + (reg)) ++#define RTL_W16(reg, val16) outw ((val16), ioaddr + (reg)) ++#define RTL_W32(reg, val32) outl ((val32), ioaddr + (reg)) ++#define RTL_R8(reg) inb (ioaddr + (reg)) ++#define RTL_R16(reg) inw (ioaddr + (reg)) ++#define RTL_R32(reg) ((unsigned long) inl (ioaddr + (reg))) ++#else ++/* write/read MMIO register */ ++#define RTL_W8(reg, val8) writeb ((val8), (void *)ioaddr + (reg)) ++#define RTL_W16(reg, val16) writew ((val16), (void *)ioaddr + (reg)) ++#define RTL_W32(reg, val32) writel ((val32), (void *)ioaddr + (reg)) ++#define RTL_R8(reg) readb ((void *)ioaddr + (reg)) ++#define RTL_R16(reg) readw ((void *)ioaddr + (reg)) ++#define RTL_R32(reg) ((unsigned long) readl ((void *)ioaddr + (reg))) ++#endif ++ ++#define MCFG_METHOD_1 0x01 ++#define MCFG_METHOD_2 0x02 ++#define MCFG_METHOD_3 0x03 ++#define MCFG_METHOD_4 0x04 ++ ++#define PCFG_METHOD_1 0x01 //PHY Reg 0x03 bit0-3 == 0x0000 ++#define PCFG_METHOD_2 0x02 //PHY Reg 0x03 bit0-3 == 0x0001 ++#define PCFG_METHOD_3 0x03 //PHY Reg 0x03 bit0-3 == 0x0002 ++ ++ ++#ifdef RTL8169_DYNAMIC_CONTROL ++#include "r8169_callback.h" ++#endif //end #ifdef RTL8169_DYNAMIC_CONTROL ++ ++ ++const static struct { ++ const char *name; ++ u8 mcfg; /* depend on RTL8169 docs */ ++ u32 RxConfigMask; /* should clear the bits supported by this chip */ ++} rtl_chip_info[] = { ++ { "RTL8169", MCFG_METHOD_1, 0xff7e1880 }, ++ { "RTL8169s/8110s", MCFG_METHOD_2, 0xff7e1880 }, ++ { "RTL8169s/8110s", MCFG_METHOD_3, 0xff7e1880 }, ++}; ++ ++ ++static struct pci_device_id rtl8169_pci_tbl[] = { ++ { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8136), 0, 0, 2 }, ++ { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8167), 0, 0, 1 }, ++ { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8169), 0, 0, 1 }, ++ { PCI_DEVICE(PCI_VENDOR_ID_DLINK, 0x4300), 0, 0, 1 }, /* D-Link DGE-528T */ ++ {0,}, ++}; ++ ++ ++MODULE_DEVICE_TABLE (pci, rtl8169_pci_tbl); ++ ++ ++enum RTL8169_registers { ++ MAC0 = 0x0, ++ MAR0 = 0x8, ++ TxDescStartAddr = 0x20, ++ TxHDescStartAddr= 0x28, ++ FLASH = 0x30, ++ ERSR = 0x36, ++ ChipCmd = 0x37, ++ TxPoll = 0x38, ++ IntrMask = 0x3C, ++ IntrStatus = 0x3E, ++ TxConfig = 0x40, ++ RxConfig = 0x44, ++ RxMissed = 0x4C, ++ Cfg9346 = 0x50, ++ Config0 = 0x51, ++ Config1 = 0x52, ++ Config2 = 0x53, ++ Config3 = 0x54, ++ Config4 = 0x55, ++ Config5 = 0x56, ++ MultiIntr = 0x5C, ++ PHYAR = 0x60, ++ TBICSR = 0x64, ++ TBI_ANAR = 0x68, ++ TBI_LPAR = 0x6A, ++ PHYstatus = 0x6C, ++ RxMaxSize = 0xDA, ++ CPlusCmd = 0xE0, ++ RxDescStartAddr = 0xE4, ++ ETThReg = 0xEC, ++ FuncEvent = 0xF0, ++ FuncEventMask = 0xF4, ++ FuncPresetState = 0xF8, ++ FuncForceEvent = 0xFC, ++}; ++ ++enum RTL8169_register_content { ++ /*InterruptStatusBits*/ ++ SYSErr = 0x8000, ++ PCSTimeout = 0x4000, ++ SWInt = 0x0100, ++ TxDescUnavail = 0x80, ++ RxFIFOOver = 0x40, ++ LinkChg = 0x20, ++ RxOverflow = 0x10, ++ TxErr = 0x08, ++ TxOK = 0x04, ++ RxErr = 0x02, ++ RxOK = 0x01, ++ ++ /*RxStatusDesc*/ ++ RxRES = 0x00200000, ++ RxCRC = 0x00080000, ++ RxRUNT= 0x00100000, ++ RxRWT = 0x00400000, ++ ++ /*ChipCmdBits*/ ++ CmdReset = 0x10, ++ CmdRxEnb = 0x08, ++ CmdTxEnb = 0x04, ++ RxBufEmpty = 0x01, ++ ++ /*Cfg9346Bits*/ ++ Cfg9346_Lock = 0x00, ++ Cfg9346_Unlock = 0xC0, ++ ++ /*rx_mode_bits*/ ++ AcceptErr = 0x20, ++ AcceptRunt = 0x10, ++ AcceptBroadcast = 0x08, ++ AcceptMulticast = 0x04, ++ AcceptMyPhys = 0x02, ++ AcceptAllPhys = 0x01, ++ ++ /*RxConfigBits*/ ++ RxCfgFIFOShift = 13, ++ RxCfgDMAShift = 8, ++ ++ /*TxConfigBits*/ ++ TxInterFrameGapShift = 24, ++ TxDMAShift = 8, ++ ++ /* Config2 register */ ++ MSIEnable = (1 << 5), ++ ++ /*rtl8169_PHYstatus*/ ++ TBI_Enable = 0x80, ++ TxFlowCtrl = 0x40, ++ RxFlowCtrl = 0x20, ++ _1000bpsF = 0x10, ++ _100bps = 0x08, ++ _10bps = 0x04, ++ LinkStatus = 0x02, ++ FullDup = 0x01, ++ ++ /*GIGABIT_PHY_registers*/ ++ PHY_CTRL_REG = 0, ++ PHY_STAT_REG = 1, ++ PHY_AUTO_NEGO_REG = 4, ++ PHY_1000_CTRL_REG = 9, ++ ++ /*GIGABIT_PHY_REG_BIT*/ ++ PHY_Restart_Auto_Nego = 0x0200, ++ PHY_Enable_Auto_Nego = 0x1000, ++ ++ //PHY_STAT_REG = 1; ++ PHY_Auto_Neco_Comp = 0x0020, ++ ++ //PHY_AUTO_NEGO_REG = 4; ++ PHY_Cap_10_Half = 0x0020, ++ PHY_Cap_10_Full = 0x0040, ++ PHY_Cap_100_Half = 0x0080, ++ PHY_Cap_100_Full = 0x0100, ++ ++ //PHY_1000_CTRL_REG = 9; ++ PHY_Cap_1000_Full = 0x0200, ++ PHY_Cap_1000_Half = 0x0100, ++ ++ PHY_Cap_PAUSE = 0x0400, ++ PHY_Cap_ASYM_PAUSE = 0x0800, ++ ++ PHY_Cap_Null = 0x0, ++ ++ /*_MediaType*/ ++ _10_Half = 0x01, ++ _10_Full = 0x02, ++ _100_Half = 0x04, ++ _100_Full = 0x08, ++ _1000_Full = 0x10, ++ ++ /*_TBICSRBit*/ ++ TBILinkOK = 0x02000000, ++}; ++ ++ ++ ++enum _DescStatusBit { ++ OWNbit = 0x80000000, ++ EORbit = 0x40000000, ++ FSbit = 0x20000000, ++ LSbit = 0x10000000, ++}; ++ ++ ++struct TxDesc { ++ u32 status; ++ u32 vlan_tag; ++ u32 buf_addr; ++ u32 buf_Haddr; ++}; ++ ++struct RxDesc { ++ u32 status; ++ u32 vlan_tag; ++ u32 buf_addr; ++ u32 buf_Haddr; ++}; ++ ++ ++typedef struct timer_list rt_timer_t; ++ ++enum rtl8169_features { ++ RTL_FEATURE_WOL = (1 << 0), ++ RTL_FEATURE_MSI = (1 << 1), ++ RTL_FEATURE_GMII = (1 << 2), ++}; ++ ++ ++struct rtl8169_private { ++ unsigned long ioaddr; /* memory map physical address*/ ++ struct pci_dev *pci_dev; /* Index of PCI device */ ++ struct net_device_stats stats; /* statistics of net device */ ++ rtdm_lock_t lock; /* spin lock flag */ /*** RTnet ***/ ++ int chipset; ++ int mcfg; ++ int pcfg; ++/* rt_timer_t r8169_timer; */ /*** RTnet ***/ ++/* unsigned long expire_time; */ /*** RTnet ***/ ++ ++ unsigned long phy_link_down_cnt; ++ unsigned long cur_rx; /* Index into the Rx descriptor buffer of next Rx pkt. */ ++ unsigned long cur_tx; /* Index into the Tx descriptor buffer of next Rx pkt. */ ++ unsigned long dirty_tx; ++ struct TxDesc *TxDescArray; /* Index of 256-alignment Tx Descriptor buffer */ ++ struct RxDesc *RxDescArray; /* Index of 256-alignment Rx Descriptor buffer */ ++ struct rtskb *Tx_skbuff[NUM_TX_DESC];/* Index of Transmit data buffer */ /*** RTnet ***/ ++ struct rtskb *Rx_skbuff[NUM_RX_DESC];/* Receive data buffer */ /*** RTnet ***/ ++ unsigned char drvinit_fail; ++ ++ dma_addr_t txdesc_array_dma_addr[NUM_TX_DESC]; ++ dma_addr_t rxdesc_array_dma_addr[NUM_RX_DESC]; ++ dma_addr_t rx_skbuff_dma_addr[NUM_RX_DESC]; ++ ++ void *txdesc_space; ++ dma_addr_t txdesc_phy_dma_addr; ++ int sizeof_txdesc_space; ++ ++ void *rxdesc_space; ++ dma_addr_t rxdesc_phy_dma_addr; ++ int sizeof_rxdesc_space; ++ ++ int curr_mtu_size; ++ int tx_pkt_len; ++ int rx_pkt_len; ++ ++ int hw_rx_pkt_len; ++ ++ int rx_buf_size; /*** RTnet / ***/ ++ ++#ifdef RTL8169_DYNAMIC_CONTROL ++ struct r8169_cb_t rt; ++#endif //end #ifdef RTL8169_DYNAMIC_CONTROL ++ ++ unsigned char linkstatus; ++ rtdm_irq_t irq_handle; /*** RTnet ***/ ++ ++ unsigned features; ++}; ++ ++ ++MODULE_AUTHOR ("Realtek, modified for RTnet by Klaus.Keppler@gmx.de"); ++MODULE_DESCRIPTION ("RealTek RTL-8169 Gigabit Ethernet driver"); ++module_param_array(media, int, NULL, 0444); ++MODULE_LICENSE("GPL"); ++ ++ ++static int rtl8169_open (struct rtnet_device *rtdev); ++static int rtl8169_start_xmit (struct rtskb *skb, struct rtnet_device *rtdev); ++ ++static int rtl8169_interrupt(rtdm_irq_t *irq_handle); ++ ++static void rtl8169_init_ring (struct rtnet_device *rtdev); ++static void rtl8169_hw_start (struct rtnet_device *rtdev); ++static int rtl8169_close (struct rtnet_device *rtdev); ++static inline u32 ether_crc (int length, unsigned char *data); ++static void rtl8169_set_rx_mode (struct rtnet_device *rtdev); ++/* static void rtl8169_tx_timeout (struct net_device *dev); */ /*** RTnet ***/ ++static struct net_device_stats *rtl8169_get_stats(struct rtnet_device *netdev); ++ ++#ifdef RTL8169_JUMBO_FRAME_SUPPORT ++static int rtl8169_change_mtu(struct net_device *dev, int new_mtu); ++#endif //end #ifdef RTL8169_JUMBO_FRAME_SUPPORT ++ ++static void rtl8169_hw_PHY_config (struct rtnet_device *rtdev); ++/* static void rtl8169_hw_PHY_reset(struct net_device *dev); */ /*** RTnet ***/ ++static const u16 rtl8169_intr_mask = LinkChg | RxOverflow | RxFIFOOver | TxErr | TxOK | RxErr | RxOK | SYSErr; /*** added SYSErr ***/ ++static const unsigned int rtl8169_rx_config = (RX_FIFO_THRESH << RxCfgFIFOShift) | (RX_DMA_BURST << RxCfgDMAShift) | 0x0000000E; ++ ++/*** these functions are backported from Linux-2.6.12's r8169.c driver ***/ ++static void rtl8169_irq_mask_and_ack(unsigned long ioaddr); ++/* static void rtl8169_asic_down(unsigned long ioaddr); */ /*** RTnet ***/ ++static void rtl8169_pcierr_interrupt(struct rtnet_device *rtdev); ++ ++#define RTL8169_WRITE_GMII_REG_BIT( ioaddr, reg, bitnum, bitval )\ ++{ \ ++ int val; \ ++ if( bitval == 1 ){ val = ( RTL8169_READ_GMII_REG( ioaddr, reg ) | (bitval< 0 ; i -- ){ ++ // Check if the RTL8169 has completed writing to the specified MII register ++ if( ! (RTL_R32(PHYAR)&0x80000000) ){ ++ break; ++ } ++ else{ ++ udelay(100); ++ }// end of if( ! (RTL_R32(PHYAR)&0x80000000) ) ++ }// end of for() loop ++} ++//================================================================= ++int RTL8169_READ_GMII_REG( unsigned long ioaddr, int RegAddr ) ++{ ++ int i, value = -1; ++ ++ RTL_W32 ( PHYAR, 0x0 | (RegAddr&0xFF)<<16 ); ++ udelay(1000); ++ ++ for( i = 2000; i > 0 ; i -- ){ ++ // Check if the RTL8169 has completed retrieving data from the specified MII register ++ if( RTL_R32(PHYAR) & 0x80000000 ){ ++ value = (int)( RTL_R32(PHYAR)&0xFFFF ); ++ break; ++ } ++ else{ ++ udelay(100); ++ }// end of if( RTL_R32(PHYAR) & 0x80000000 ) ++ }// end of for() loop ++ return value; ++} ++ ++ ++#ifdef RTL8169_IOCTL_SUPPORT ++#include "r8169_ioctl.c" ++#endif //end #ifdef RTL8169_IOCTL_SUPPORT ++ ++ ++#ifdef RTL8169_DYNAMIC_CONTROL ++#include "r8169_callback.c" ++#endif ++ ++ ++ ++//====================================================================================================== ++//====================================================================================================== ++static int rtl8169_init_board ( struct pci_dev *pdev, struct rtnet_device **dev_out, unsigned long *ioaddr_out, int region) ++{ ++ unsigned long ioaddr = 0; ++ struct rtnet_device *rtdev; ++ struct rtl8169_private *priv; ++ int rc, i; ++ unsigned long mmio_start, mmio_end, mmio_flags, mmio_len; ++ ++ ++ assert (pdev != NULL); ++ assert (ioaddr_out != NULL); ++ ++ *ioaddr_out = 0; ++ *dev_out = NULL; ++ ++ /*** RTnet ***/ ++ rtdev = rt_alloc_etherdev(sizeof(struct rtl8169_private), ++ RX_RING_SIZE * 2 + TX_RING_SIZE); ++ if (rtdev == NULL) { ++ printk (KERN_ERR PFX "unable to alloc new ethernet\n"); ++ return -ENOMEM; ++ } ++ rtdev_alloc_name(rtdev, "rteth%d"); ++ rt_rtdev_connect(rtdev, &RTDEV_manager); ++ rtdev->vers = RTDEV_VERS_2_0; ++ rtdev->sysbind = &pdev->dev; ++ /*** /RTnet ***/ ++ ++ priv = rtdev->priv; ++ ++ /* disable ASPM completely as that cause random device stop working ++ * problems as well as full system hangs for some PCIe devices users */ ++ pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1 | ++ PCIE_LINK_STATE_CLKPM); ++ ++ // enable device (incl. PCI PM wakeup and hotplug setup) ++ rc = pci_enable_device (pdev); ++ if (rc) ++ goto err_out; ++ ++ if (pci_set_mwi(pdev) < 0) ++ printk("R8169: Mem-Wr-Inval unavailable\n"); ++ ++ mmio_start = pci_resource_start (pdev, region); ++ mmio_end = pci_resource_end (pdev, region); ++ mmio_flags = pci_resource_flags (pdev, region); ++ mmio_len = pci_resource_len (pdev, region); ++ ++ // make sure PCI base addr 1 is MMIO ++ if (!(mmio_flags & IORESOURCE_MEM)) { ++ printk (KERN_ERR PFX "region #%d not an MMIO resource, aborting\n", region); ++ rc = -ENODEV; ++ goto err_out; ++ } ++ ++ // check for weird/broken PCI region reporting ++ if ( mmio_len < RTL_MIN_IO_SIZE ) { ++ printk (KERN_ERR PFX "Invalid PCI region size(s), aborting\n"); ++ rc = -ENODEV; ++ goto err_out; ++ } ++ ++ ++ rc = pci_request_regions (pdev, rtdev->name); ++ if (rc) ++ goto err_out; ++ ++ // enable PCI bus-mastering ++ pci_set_master (pdev); ++ ++#ifdef RTL8169_USE_IO ++ ioaddr = pci_resource_start(pdev, 0); ++#else ++ // ioremap MMIO region ++ ioaddr = (unsigned long)ioremap (mmio_start, mmio_len); ++ if (ioaddr == 0) { ++ printk (KERN_ERR PFX "cannot remap MMIO, aborting\n"); ++ rc = -EIO; ++ goto err_out_free_res; ++ } ++#endif ++ ++ // Soft reset the chip. ++ RTL_W8 ( ChipCmd, CmdReset); ++ ++ // Check that the chip has finished the reset. ++ for (i = 1000; i > 0; i--){ ++ if ( (RTL_R8(ChipCmd) & CmdReset) == 0){ ++ break; ++ } ++ else{ ++ udelay (10); ++ } ++ } ++ ++ { ++ u8 cfg2 = RTL_R8(Config2) & ~MSIEnable; ++ if (region) { ++ if (pci_enable_msi(pdev)) ++ printk("R8169: no MSI, Back to INTx.\n"); ++ else { ++ cfg2 |= MSIEnable; ++ priv->features |= RTL_FEATURE_MSI; ++ } ++ } ++ RTL_W8(Config2, cfg2); ++ } ++ ++ // identify config method ++ { ++ unsigned long val32 = (RTL_R32(TxConfig)&0x7c800000); ++ ++ if( val32 == (0x1<<28) ){ ++ priv->mcfg = MCFG_METHOD_4; ++ } ++ else if( val32 == (0x1<<26) ){ ++ priv->mcfg = MCFG_METHOD_3; ++ } ++ else if( val32 == (0x1<<23) ){ ++ priv->mcfg = MCFG_METHOD_2; ++ } ++ else if( val32 == 0x00000000 ){ ++ priv->mcfg = MCFG_METHOD_1; ++ } ++ else{ ++ priv->mcfg = MCFG_METHOD_1; ++ } ++ } ++ ++ { ++ unsigned char val8 = (unsigned char)(RTL8169_READ_GMII_REG(ioaddr,3)&0x000f); ++ if( val8 == 0x00 ){ ++ priv->pcfg = PCFG_METHOD_1; ++ } ++ else if( val8 == 0x01 ){ ++ priv->pcfg = PCFG_METHOD_2; ++ } ++ else if( val8 == 0x02 ){ ++ priv->pcfg = PCFG_METHOD_3; ++ } ++ else{ ++ priv->pcfg = PCFG_METHOD_3; ++ } ++ } ++ ++ ++ for (i = ARRAY_SIZE (rtl_chip_info) - 1; i >= 0; i--){ ++ if (priv->mcfg == rtl_chip_info[i].mcfg) { ++ priv->chipset = i; ++ goto match; ++ } ++ } ++ ++ //if unknown chip, assume array element #0, original RTL-8169 in this case ++ printk (KERN_DEBUG PFX "PCI device %s: unknown chip version, assuming RTL-8169\n", pci_name(pdev)); ++ priv->chipset = 0; ++ ++match: ++ *ioaddr_out = ioaddr; ++ *dev_out = rtdev; ++ return 0; ++ ++#ifndef RTL8169_USE_IO ++err_out_free_res: ++#endif ++ pci_release_regions (pdev); /*** moved outside of #ifdev ***/ ++ ++err_out: ++ /*** RTnet ***/ ++ rt_rtdev_disconnect(rtdev); ++ rtdev_free(rtdev); ++ /*** /RTnet ***/ ++ return rc; ++} ++ ++ ++ ++ ++ ++ ++ ++//====================================================================================================== ++static int rtl8169_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) ++{ ++ struct rtnet_device *rtdev = NULL; /*** RTnet ***/ ++ struct rtl8169_private *priv = NULL; ++ unsigned long ioaddr = 0; ++ static int board_idx = -1; ++ int region = ent->driver_data; ++ int i; ++ int option = -1, Cap10_100 = 0, Cap1000 = 0; ++ ++ ++ assert (pdev != NULL); ++ assert (ent != NULL); ++ ++ board_idx++; ++ ++ /*** RTnet ***/ ++ if (board_idx >= MAX_UNITS) { ++ return -ENODEV; ++ } ++ if (cards[board_idx] == 0) ++ return -ENODEV; ++ /*** RTnet ***/ ++ ++ i = rtl8169_init_board (pdev, &rtdev, &ioaddr, region); ++ if (i < 0) { ++ return i; ++ } ++ ++ priv = rtdev->priv; ++ ++ assert (ioaddr != 0); ++ assert (rtdev != NULL); ++ assert (priv != NULL); ++ ++ // Get MAC address // ++ for (i = 0; i < MAC_ADDR_LEN ; i++){ ++ rtdev->dev_addr[i] = RTL_R8( MAC0 + i ); ++ } ++ ++ rtdev->open = rtl8169_open; ++ rtdev->hard_start_xmit = rtl8169_start_xmit; ++ rtdev->get_stats = rtl8169_get_stats; ++ rtdev->stop = rtl8169_close; ++ /* dev->tx_timeout = rtl8169_tx_timeout; */ /*** RTnet ***/ ++ /* dev->set_multicast_list = rtl8169_set_rx_mode; */ /*** RTnet ***/ ++ /* dev->watchdog_timeo = TX_TIMEOUT; */ /*** RTnet ***/ ++ rtdev->irq = pdev->irq; ++ rtdev->base_addr = (unsigned long) ioaddr; ++ ++#ifdef RTL8169_JUMBO_FRAME_SUPPORT ++ rtdev->change_mtu = rtl8169_change_mtu; ++#endif //end #ifdef RTL8169_JUMBO_FRAME_SUPPORT ++ ++#ifdef RTL8169_IOCTL_SUPPORT ++ rtdev->do_ioctl = rtl8169_ioctl; ++#endif //end #ifdef RTL8169_IOCTL_SUPPORT ++ ++#ifdef RTL8169_DYNAMIC_CONTROL ++ priv->rt.dev = rtdev; ++#endif //end #ifdef RTL8169_DYNAMIC_CONTROL ++ ++ priv = rtdev->priv; // private data // ++ priv->pci_dev = pdev; ++ priv->ioaddr = ioaddr; ++ ++//#ifdef RTL8169_JUMBO_FRAME_SUPPORT ++ priv->curr_mtu_size = rtdev->mtu; ++ priv->tx_pkt_len = rtdev->mtu + ETH_HDR_LEN; ++ priv->rx_pkt_len = rtdev->mtu + ETH_HDR_LEN; ++ priv->hw_rx_pkt_len = priv->rx_pkt_len + 8; ++//#endif //end #ifdef RTL8169_JUMBO_FRAME_SUPPORT ++ ++ DBG_PRINT("-------------------------- \n"); ++ DBG_PRINT("dev->mtu = %d \n", rtdev->mtu); ++ DBG_PRINT("priv->curr_mtu_size = %d \n", priv->curr_mtu_size); ++ DBG_PRINT("priv->tx_pkt_len = %d \n", priv->tx_pkt_len); ++ DBG_PRINT("priv->rx_pkt_len = %d \n", priv->rx_pkt_len); ++ DBG_PRINT("priv->hw_rx_pkt_len = %d \n", priv->hw_rx_pkt_len); ++ DBG_PRINT("-------------------------- \n"); ++ ++ rtdm_lock_init(&priv->lock); /*** RTnet ***/ ++ ++ /*** RTnet ***/ ++ if (rt_register_rtnetdev(rtdev) < 0) { ++ /* clean up... */ ++ pci_release_regions (pdev); ++ rt_rtdev_disconnect(rtdev); ++ rtdev_free(rtdev); ++ return -ENODEV; ++ } ++ /*** /RTnet ***/ ++ ++ pci_set_drvdata(pdev, rtdev); // pdev->driver_data = data; ++ ++ printk (KERN_DEBUG "%s: Identified chip type is '%s'.\n", rtdev->name, rtl_chip_info[priv->chipset].name); ++ printk (KERN_INFO "%s: %s at 0x%lx, " ++ "%2.2x:%2.2x:%2.2x:%2.2x:%2.2x:%2.2x, " ++ "IRQ %d\n", ++ rtdev->name, ++ RTL8169_DRIVER_NAME, ++ rtdev->base_addr, ++ rtdev->dev_addr[0], rtdev->dev_addr[1], ++ rtdev->dev_addr[2], rtdev->dev_addr[3], ++ rtdev->dev_addr[4], rtdev->dev_addr[5], ++ rtdev->irq); ++ ++ // Config PHY ++ rtl8169_hw_PHY_config(rtdev); ++ ++ DBG_PRINT("Set MAC Reg C+CR Offset 0x82h = 0x01h\n"); ++ RTL_W8( 0x82, 0x01 ); ++ ++ if( priv->mcfg < MCFG_METHOD_3 ){ ++ DBG_PRINT("Set PCI Latency=0x40\n"); ++ pci_write_config_byte(pdev, PCI_LATENCY_TIMER, 0x40); ++ } ++ ++ if( priv->mcfg == MCFG_METHOD_2 ){ ++ DBG_PRINT("Set MAC Reg C+CR Offset 0x82h = 0x01h\n"); ++ RTL_W8( 0x82, 0x01 ); ++ DBG_PRINT("Set PHY Reg 0x0bh = 0x00h\n"); ++ RTL8169_WRITE_GMII_REG( ioaddr, 0x0b, 0x0000 ); //w 0x0b 15 0 0 ++ } ++ ++ // if TBI is not endbled ++ if( !(RTL_R8(PHYstatus) & TBI_Enable) ){ ++ int val = RTL8169_READ_GMII_REG( ioaddr, PHY_AUTO_NEGO_REG ); ++ ++#ifdef RTL8169_HW_FLOW_CONTROL_SUPPORT ++ val |= PHY_Cap_PAUSE | PHY_Cap_ASYM_PAUSE ; ++#endif //end #define RTL8169_HW_FLOW_CONTROL_SUPPORT ++ ++ option = (board_idx >= MAX_UNITS) ? 0 : media[board_idx]; ++ // Force RTL8169 in 10/100/1000 Full/Half mode. ++ if( option > 0 ){ ++ printk(KERN_INFO "%s: Force-mode Enabled. \n", rtdev->name); ++ Cap10_100 = 0; ++ Cap1000 = 0; ++ switch( option ){ ++ case _10_Half: ++ Cap10_100 = PHY_Cap_10_Half; ++ Cap1000 = PHY_Cap_Null; ++ break; ++ case _10_Full: ++ Cap10_100 = PHY_Cap_10_Full | PHY_Cap_10_Half; ++ Cap1000 = PHY_Cap_Null; ++ break; ++ case _100_Half: ++ Cap10_100 = PHY_Cap_100_Half | PHY_Cap_10_Full | PHY_Cap_10_Half; ++ Cap1000 = PHY_Cap_Null; ++ break; ++ case _100_Full: ++ Cap10_100 = PHY_Cap_100_Full | PHY_Cap_100_Half | PHY_Cap_10_Full | PHY_Cap_10_Half; ++ Cap1000 = PHY_Cap_Null; ++ break; ++ case _1000_Full: ++ Cap10_100 = PHY_Cap_100_Full | PHY_Cap_100_Half | PHY_Cap_10_Full | PHY_Cap_10_Half; ++ Cap1000 = PHY_Cap_1000_Full; ++ break; ++ default: ++ break; ++ } ++ RTL8169_WRITE_GMII_REG( ioaddr, PHY_AUTO_NEGO_REG, Cap10_100 | ( val&0xC1F ) ); //leave PHY_AUTO_NEGO_REG bit4:0 unchanged ++ RTL8169_WRITE_GMII_REG( ioaddr, PHY_1000_CTRL_REG, Cap1000 ); ++ } ++ else{ ++ printk(KERN_INFO "%s: Auto-negotiation Enabled.\n", rtdev->name); ++ ++ // enable 10/100 Full/Half Mode, leave PHY_AUTO_NEGO_REG bit4:0 unchanged ++ RTL8169_WRITE_GMII_REG( ioaddr, PHY_AUTO_NEGO_REG, ++ PHY_Cap_10_Half | PHY_Cap_10_Full | PHY_Cap_100_Half | PHY_Cap_100_Full | ( val&0xC1F ) ); ++ ++ // enable 1000 Full Mode ++// RTL8169_WRITE_GMII_REG( ioaddr, PHY_1000_CTRL_REG, PHY_Cap_1000_Full ); ++ RTL8169_WRITE_GMII_REG( ioaddr, PHY_1000_CTRL_REG, PHY_Cap_1000_Full | PHY_Cap_1000_Half); //rtl8168 ++ ++ }// end of if( option > 0 ) ++ ++ // Enable auto-negotiation and restart auto-nigotiation ++ RTL8169_WRITE_GMII_REG( ioaddr, PHY_CTRL_REG, PHY_Enable_Auto_Nego | PHY_Restart_Auto_Nego ); ++ udelay(100); ++ ++ // wait for auto-negotiation process ++ for( i = 10000; i > 0; i-- ){ ++ //check if auto-negotiation complete ++ if( RTL8169_READ_GMII_REG(ioaddr, PHY_STAT_REG) & PHY_Auto_Neco_Comp ){ ++ udelay(100); ++ option = RTL_R8(PHYstatus); ++ if( option & _1000bpsF ){ ++ printk(KERN_INFO "%s: 1000Mbps Full-duplex operation.\n", rtdev->name); ++ } ++ else{ ++ printk(KERN_INFO "%s: %sMbps %s-duplex operation.\n", rtdev->name, ++ (option & _100bps) ? "100" : "10", (option & FullDup) ? "Full" : "Half" ); ++ } ++ break; ++ } ++ else{ ++ udelay(100); ++ }// end of if( RTL8169_READ_GMII_REG(ioaddr, 1) & 0x20 ) ++ }// end for-loop to wait for auto-negotiation process ++ ++ option = RTL_R8(PHYstatus); ++ if( option & _1000bpsF ){ ++ priv->linkstatus = _1000_Full; ++ } ++ else{ ++ if(option & _100bps){ ++ priv->linkstatus = (option & FullDup) ? _100_Full : _100_Half; ++ } ++ else{ ++ priv->linkstatus = (option & FullDup) ? _10_Full : _10_Half; ++ } ++ } ++ DBG_PRINT("priv->linkstatus = 0x%02x\n", priv->linkstatus); ++ ++ }// end of TBI is not enabled ++ else{ ++ udelay(100); ++ DBG_PRINT("1000Mbps Full-duplex operation, TBI Link %s!\n",(RTL_R32(TBICSR) & TBILinkOK) ? "OK" : "Failed" ); ++ ++ }// end of TBI is not enabled ++ ++ return 0; ++} ++ ++ ++ ++ ++ ++ ++ ++//====================================================================================================== ++static void rtl8169_remove_one (struct pci_dev *pdev) ++{ ++ struct rtnet_device *rtdev = pci_get_drvdata(pdev); ++ struct rtl8169_private *priv = rtdev->priv;; ++ ++ assert (rtdev != NULL); ++ ++ /*** RTnet ***/ ++ rt_unregister_rtnetdev(rtdev); ++ rt_rtdev_disconnect(rtdev); ++ /*** /RTnet ***/ ++ ++ if (priv->features & RTL_FEATURE_MSI) ++ pci_disable_msi(pdev); ++ ++#ifdef RTL8169_USE_IO ++#else ++ iounmap ((void *)(rtdev->base_addr)); ++#endif ++ pci_release_regions(pdev); ++ ++ rtdev_free(rtdev); /*** RTnet ***/ ++ ++ pci_disable_device(pdev); /*** Disable device now :-) ***/ ++ ++ pci_set_drvdata(pdev, NULL); ++} ++ ++ ++ ++ ++ ++ ++ ++//====================================================================================================== ++static int rtl8169_open (struct rtnet_device *rtdev) ++{ ++ struct rtl8169_private *priv = rtdev->priv; ++ struct pci_dev *pdev = priv->pci_dev; ++ int retval; ++// u8 diff; ++// u32 TxPhyAddr, RxPhyAddr; ++ ++ if( priv->drvinit_fail == 1 ){ ++ printk("%s: Gigabit driver open failed.\n", rtdev->name ); ++ return -ENOMEM; ++ } ++ ++ /*** RTnet ***/ ++ rt_stack_connect(rtdev, &STACK_manager); ++ ++ retval = rtdm_irq_request(&priv->irq_handle, rtdev->irq, rtl8169_interrupt, 0, "rt_r8169", rtdev); ++ /*** /RTnet ***/ ++ ++ // retval = request_irq (dev->irq, rtl8169_interrupt, SA_SHIRQ, dev->name, dev); ++ if (retval) { ++ return retval; ++ } ++ ++ ++ //2004-05-11 ++ // Allocate tx/rx descriptor space ++ priv->sizeof_txdesc_space = NUM_TX_DESC * sizeof(struct TxDesc)+256; ++ priv->txdesc_space = pci_alloc_consistent( pdev, priv->sizeof_txdesc_space, &priv->txdesc_phy_dma_addr ); ++ if( priv->txdesc_space == NULL ){ ++ printk("%s: Gigabit driver alloc txdesc_space failed.\n", rtdev->name ); ++ return -ENOMEM; ++ } ++ priv->sizeof_rxdesc_space = NUM_RX_DESC * sizeof(struct RxDesc)+256; ++ priv->rxdesc_space = pci_alloc_consistent( pdev, priv->sizeof_rxdesc_space, &priv->rxdesc_phy_dma_addr ); ++ if( priv->rxdesc_space == NULL ){ ++ printk("%s: Gigabit driver alloc rxdesc_space failed.\n", rtdev->name ); ++ return -ENOMEM; ++ } ++ ++ if(priv->txdesc_phy_dma_addr & 0xff){ ++ printk("%s: Gigabit driver txdesc_phy_dma_addr is not 256-bytes-aligned.\n", rtdev->name ); ++ } ++ if(priv->rxdesc_phy_dma_addr & 0xff){ ++ printk("%s: Gigabit driver rxdesc_phy_dma_addr is not 256-bytes-aligned.\n", rtdev->name ); ++ } ++ // Set tx/rx descriptor space ++ priv->TxDescArray = (struct TxDesc *)priv->txdesc_space; ++ priv->RxDescArray = (struct RxDesc *)priv->rxdesc_space; ++ ++ { ++ int i; ++ struct rtskb *skb = NULL; /*** RTnet ***/ ++ priv->rx_buf_size = (rtdev->mtu <= 1500 ? DEFAULT_RX_BUF_LEN : rtdev->mtu + 32); /*** RTnet / ***/ ++ ++ for(i=0;i ***/ ++ skb = rtnetdev_alloc_rtskb(rtdev, priv->rx_buf_size); /*** RTnet ***/; ++ if( skb != NULL ) { ++ rtskb_reserve (skb, 2); // 16 byte align the IP fields. // ++ priv->Rx_skbuff[i] = skb; ++ } ++ else{ ++ printk("%s: Gigabit driver failed to allocate skbuff.\n", rtdev->name); ++ priv->drvinit_fail = 1; ++ } ++ } ++ } ++ ++ ++ ////////////////////////////////////////////////////////////////////////////// ++ rtl8169_init_ring(rtdev); ++ rtl8169_hw_start(rtdev); ++ ++ // ------------------------------------------------------ ++ ++ //DBG_PRINT("%s: %s() alloc_rxskb_cnt = %d\n", dev->name, __FUNCTION__, alloc_rxskb_cnt ); /*** won't work anymore... ***/ ++ ++ return 0; ++ ++}//end of rtl8169_open (struct net_device *dev) ++ ++ ++ ++ ++ ++ ++ ++ ++//====================================================================================================== ++ ++ ++ ++//====================================================================================================== ++static void rtl8169_hw_PHY_config (struct rtnet_device *rtdev) ++{ ++ struct rtl8169_private *priv = rtdev->priv; ++ void *ioaddr = (void*)priv->ioaddr; ++ ++ DBG_PRINT("priv->mcfg=%d, priv->pcfg=%d\n",priv->mcfg,priv->pcfg); ++ ++ if( priv->mcfg == MCFG_METHOD_4 ){ ++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x1F, 0x0001 ); ++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x1b, 0x841e ); ++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x0e, 0x7bfb ); ++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x09, 0x273a ); ++ ++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x1F, 0x0002 ); ++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x01, 0x90D0 ); ++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x1F, 0x0000 ); ++ }else if((priv->mcfg == MCFG_METHOD_2)||(priv->mcfg == MCFG_METHOD_3)){ ++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x1F, 0x0001 ); ++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x15, 0x1000 ); ++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x18, 0x65C7 ); ++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x04, 0x0000 ); ++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x03, 0x00A1 ); ++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x02, 0x0008 ); ++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x01, 0x1020 ); ++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x00, 0x1000 ); ++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x04, 0x0800 ); ++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x04, 0x0000 ); ++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x04, 0x7000 ); ++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x03, 0xFF41 ); ++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x02, 0xDE60 ); ++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x01, 0x0140 ); ++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x00, 0x0077 ); ++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x04, 0x7800 ); ++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x04, 0x7000 ); ++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x04, 0xA000 ); ++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x03, 0xDF01 ); ++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x02, 0xDF20 ); ++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x01, 0xFF95 ); ++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x00, 0xFA00 ); ++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x04, 0xA800 ); ++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x04, 0xA000 ); ++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x04, 0xB000 ); ++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x03, 0xFF41 ); ++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x02, 0xDE20 ); ++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x01, 0x0140 ); ++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x00, 0x00BB ); ++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x04, 0xB800 ); ++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x04, 0xB000 ); ++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x04, 0xF000 ); ++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x03, 0xDF01 ); ++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x02, 0xDF20 ); ++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x01, 0xFF95 ); ++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x00, 0xBF00 ); ++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x04, 0xF800 ); ++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x04, 0xF000 ); ++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x04, 0x0000 ); ++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x1F, 0x0000 ); ++ RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x0B, 0x0000 ); ++ } ++ else{ ++ DBG_PRINT("priv->mcfg=%d. Discard hw PHY config.\n",priv->mcfg); ++ } ++} ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++//====================================================================================================== ++static void rtl8169_hw_start (struct rtnet_device *rtdev) ++{ ++ struct rtl8169_private *priv = rtdev->priv; ++ unsigned long ioaddr = priv->ioaddr; ++ u32 i; ++ ++ ++ /* Soft reset the chip. */ ++ RTL_W8 ( ChipCmd, CmdReset); ++ ++ /* Check that the chip has finished the reset. */ ++ for (i = 1000; i > 0; i--){ ++ if ((RTL_R8( ChipCmd ) & CmdReset) == 0) break; ++ else udelay (10); ++ } ++ ++ RTL_W8 ( Cfg9346, Cfg9346_Unlock); ++ RTL_W8 ( ChipCmd, CmdTxEnb | CmdRxEnb); ++ RTL_W8 ( ETThReg, ETTh); ++ ++ // For gigabit rtl8169 ++ RTL_W16 ( RxMaxSize, (unsigned short)priv->hw_rx_pkt_len ); ++ ++ // Set Rx Config register ++ i = rtl8169_rx_config | ( RTL_R32( RxConfig ) & rtl_chip_info[priv->chipset].RxConfigMask); ++ RTL_W32 ( RxConfig, i); ++ ++ ++ /* Set DMA burst size and Interframe Gap Time */ ++ RTL_W32 ( TxConfig, (TX_DMA_BURST << TxDMAShift) | (InterFrameGap << TxInterFrameGapShift) ); ++ ++ ++ ++ RTL_W16( CPlusCmd, RTL_R16(CPlusCmd) ); ++ ++ if( priv->mcfg == MCFG_METHOD_2 || ++ priv->mcfg == MCFG_METHOD_3) ++ { ++ RTL_W16( CPlusCmd, (RTL_R16(CPlusCmd)|(1<<14)|(1<<3)) ); ++ DBG_PRINT("Set MAC Reg C+CR Offset 0xE0: bit-3 and bit-14\n"); ++ } ++ else ++ { ++ RTL_W16( CPlusCmd, (RTL_R16(CPlusCmd)|(1<<3)) ); ++ DBG_PRINT("Set MAC Reg C+CR Offset 0xE0: bit-3.\n"); ++ } ++ ++ { ++ //RTL_W16(0xE2, 0x1517); ++ //RTL_W16(0xE2, 0x152a); ++ //RTL_W16(0xE2, 0x282a); ++ RTL_W16(0xE2, 0x0000); /* 0xE2 = IntrMitigate */ ++ } ++ ++ priv->cur_rx = 0; ++ ++ RTL_W32 ( TxDescStartAddr, priv->txdesc_phy_dma_addr); ++ RTL_W32 ( TxDescStartAddr + 4, 0x00); ++ RTL_W32 ( RxDescStartAddr, priv->rxdesc_phy_dma_addr); ++ RTL_W32 ( RxDescStartAddr + 4, 0x00); ++ ++ RTL_W8 ( Cfg9346, Cfg9346_Lock ); ++ udelay (10); ++ ++ RTL_W32 ( RxMissed, 0 ); ++ ++ rtl8169_set_rx_mode (rtdev); ++ ++ /* no early-rx interrupts */ ++ RTL_W16 ( MultiIntr, RTL_R16(MultiIntr) & 0xF000); ++ ++ /* enable all known interrupts by setting the interrupt mask */ ++ RTL_W16 ( IntrMask, rtl8169_intr_mask); ++ ++ rtnetif_start_queue (rtdev); /*** RTnet ***/ ++ ++}//end of rtl8169_hw_start (struct net_device *dev) ++ ++ ++ ++ ++ ++ ++ ++//====================================================================================================== ++static void rtl8169_init_ring (struct rtnet_device *rtdev) ++{ ++ struct rtl8169_private *priv = rtdev->priv; ++ struct pci_dev *pdev = priv->pci_dev; ++ int i; ++ struct rtskb *skb; ++ ++ ++ priv->cur_rx = 0; ++ priv->cur_tx = 0; ++ priv->dirty_tx = 0; ++ memset(priv->TxDescArray, 0x0, NUM_TX_DESC*sizeof(struct TxDesc)); ++ memset(priv->RxDescArray, 0x0, NUM_RX_DESC*sizeof(struct RxDesc)); ++ ++ ++ for (i=0 ; iTx_skbuff[i]=NULL; ++ priv->txdesc_array_dma_addr[i] = pci_map_single(pdev, &priv->TxDescArray[i], sizeof(struct TxDesc), PCI_DMA_TODEVICE); ++ } ++ ++ for (i=0; i RxDescArray[i].status = cpu_to_le32((OWNbit | EORbit) | (unsigned long)priv->hw_rx_pkt_len); ++ } ++ else{ ++ priv->RxDescArray[i].status = cpu_to_le32(OWNbit | (unsigned long)priv->hw_rx_pkt_len); ++ } ++ ++ {//----------------------------------------------------------------------- ++ skb = priv->Rx_skbuff[i]; ++ priv->rx_skbuff_dma_addr[i] = pci_map_single(pdev, skb->data, priv->rx_buf_size /* MAX_RX_SKBDATA_SIZE */, PCI_DMA_FROMDEVICE); /*** ***/ ++ ++ if( skb != NULL ){ ++ priv->RxDescArray[i].buf_addr = cpu_to_le32(priv->rx_skbuff_dma_addr[i]); ++ priv->RxDescArray[i].buf_Haddr = 0; ++ } ++ else{ ++ DBG_PRINT("%s: %s() Rx_skbuff == NULL\n", rtdev->name, __FUNCTION__); ++ priv->drvinit_fail = 1; ++ } ++ }//----------------------------------------------------------------------- ++ priv->rxdesc_array_dma_addr[i] = pci_map_single(pdev, &priv->RxDescArray[i], sizeof(struct RxDesc), PCI_DMA_TODEVICE); ++ pci_dma_sync_single_for_device(pdev, priv->rxdesc_array_dma_addr[i], sizeof(struct RxDesc), PCI_DMA_TODEVICE); ++ } ++} ++ ++ ++ ++ ++ ++ ++ ++//====================================================================================================== ++static void rtl8169_tx_clear (struct rtl8169_private *priv) ++{ ++ int i; ++ ++ priv->cur_tx = 0; ++ for ( i = 0 ; i < NUM_TX_DESC ; i++ ){ ++ if ( priv->Tx_skbuff[i] != NULL ) { ++ dev_kfree_rtskb ( priv->Tx_skbuff[i] ); ++ priv->Tx_skbuff[i] = NULL; ++ priv->stats.tx_dropped++; ++ } ++ } ++} ++ ++ ++ ++ ++ ++ ++ ++//====================================================================================================== ++ ++ ++ ++ ++ ++ ++//====================================================================================================== ++static int rtl8169_start_xmit (struct rtskb *skb, struct rtnet_device *rtdev) ++{ ++ struct rtl8169_private *priv = rtdev->priv; ++ unsigned long ioaddr = priv->ioaddr; ++ struct pci_dev *pdev = priv->pci_dev; ++ int entry = priv->cur_tx % NUM_TX_DESC; ++ // int buf_len = 60; ++ dma_addr_t txbuf_dma_addr; ++ rtdm_lockctx_t context; /*** RTnet ***/ ++ u32 status, len; /* */ ++ ++ rtdm_lock_get_irqsave(&priv->lock, context); /*** RTnet ***/ ++ ++ status = le32_to_cpu(priv->TxDescArray[entry].status); ++ ++ if( (status & OWNbit)==0 ){ ++ ++ priv->Tx_skbuff[entry] = skb; ++ ++ len = skb->len; ++ if (len < ETH_ZLEN) { ++ skb = rtskb_padto(skb, ETH_ZLEN); ++ if (skb == NULL) { ++ /* Error... */ ++ rtdm_printk("%s: Error -- rtskb_padto returned NULL; out of memory?\n", rtdev->name); ++ } ++ len = ETH_ZLEN; ++ } ++ ++ txbuf_dma_addr = pci_map_single(pdev, skb->data, len, PCI_DMA_TODEVICE); ++ ++ priv->TxDescArray[entry].buf_addr = cpu_to_le32(txbuf_dma_addr); ++ ++ /* print TX frame debug informations? */ ++ while (r8169_debug & (DEBUG_TX_SYNC | DEBUG_TX_OTHER)) { ++ unsigned short proto = 0; ++ ++ /* get ethernet protocol id */ ++ if (skb->len < 14) break; /* packet too small! */ ++ if (skb->len > 12) proto = be16_to_cpu(*((unsigned short *)(skb->data + 12))); ++ ++ if (proto == 0x9021 && !(r8169_debug & DEBUG_TX_SYNC)) { ++ /* don't show TDMA Sync frames for better debugging, so look at RTmac frame type... */ ++ unsigned short type; ++ ++ if (skb->len < 16) break; /* packet too small! */ ++ type = be16_to_cpu(*((unsigned short *)(skb->data + 14))); ++ ++ if (type == 0x0001) { ++ /* TDMA-Frame; get Message ID */ ++ unsigned short tdma_version; ++ ++ if (skb->len < 20) break; /* packet too small! */ ++ tdma_version = be16_to_cpu(*((unsigned short *)(skb->data + 18))); ++ ++ if (tdma_version == 0x0201) { ++ unsigned short tdma_id; ++ ++ if (skb->len < 22) break; /* packet too small! */ ++ tdma_id = be16_to_cpu(*((unsigned short *)(skb->data + 20))); ++ ++ if (tdma_id == 0x0000 && !(r8169_debug & DEBUG_TX_SYNC)) { ++ /* TDMA sync frame found, but not allowed to print it */ ++ break; ++ } ++ } ++ } ++ ++ } ++ ++ /* print frame informations */ ++ RT_DBG_PRINT("%s: TX len = %d, skb->len = %d, eth_proto=%04x\n", __FUNCTION__, len, skb->len, proto); ++ ++ break; /* leave loop */ ++ } ++ ++ if( len > priv->tx_pkt_len ){ ++ rtdm_printk("%s: Error -- Tx packet size(%d) > mtu(%d)+14\n", rtdev->name, len, rtdev->mtu); ++ len = priv->tx_pkt_len; ++ } ++ ++ /*** RTnet ***/ ++ /* get and patch time stamp just before the transmission */ ++ if (skb->xmit_stamp) ++ *skb->xmit_stamp = cpu_to_be64(rtdm_clock_read() + *skb->xmit_stamp); ++ /*** /RTnet ***/ ++ ++ if( entry != (NUM_TX_DESC-1) ){ ++ status = (OWNbit | FSbit | LSbit) | len; ++ } ++ else{ ++ status = (OWNbit | EORbit | FSbit | LSbit) | len; ++ } ++ priv->TxDescArray[entry].status = cpu_to_le32(status); ++ ++ pci_dma_sync_single_for_device(pdev, priv->txdesc_array_dma_addr[entry], sizeof(struct TxDesc), PCI_DMA_TODEVICE); ++ ++ RTL_W8 ( TxPoll, 0x40); //set polling bit ++ ++ //rtdev->trans_start = jiffies; ++ ++ priv->stats.tx_bytes += len; ++ priv->cur_tx++; ++ }//end of if( (priv->TxDescArray[entry].status & 0x80000000)==0 ) ++ ++ rtdm_lock_put_irqrestore(&priv->lock, context); /*** RTnet ***/ ++ ++ if ( (priv->cur_tx - NUM_TX_DESC) == priv->dirty_tx ){ ++ if (r8169_debug & DEBUG_RUN) rtdm_printk(KERN_DEBUG "%s: stopping rtnetif queue", __FUNCTION__); ++ rtnetif_stop_queue (rtdev); ++ } ++ else{ ++ if (rtnetif_queue_stopped (rtdev)){ ++ if (r8169_debug & DEBUG_RUN) rtdm_printk(KERN_DEBUG "%s: waking rtnetif queue", __FUNCTION__); ++ rtnetif_wake_queue (rtdev); ++ } ++ } ++ ++ return 0; ++} ++ ++ ++ ++ ++ ++ ++ ++//====================================================================================================== ++/* This routine is logically part of the interrupt handler, but isolated ++ for clarity. */ ++static void rtl8169_tx_interrupt (struct rtnet_device *rtdev, struct rtl8169_private *priv, unsigned long ioaddr) ++{ ++ unsigned long dirty_tx, tx_left=0; ++ //int entry = priv->cur_tx % NUM_TX_DESC; /* */ ++ int txloop_cnt = 0; ++ ++ rt_assert (rtdev != NULL); ++ rt_assert (priv != NULL); ++ rt_assert (ioaddr != 0); ++ ++ rtdm_lock_get(&priv->lock); /*** RTnet ***/ ++ ++ dirty_tx = priv->dirty_tx; ++ smp_rmb(); /*** ***/ ++ tx_left = priv->cur_tx - dirty_tx; ++ ++ while( (tx_left > 0) && (txloop_cnt < max_interrupt_work) ){ ++ unsigned int entry = dirty_tx % NUM_TX_DESC; /* */ ++ if( (le32_to_cpu(priv->TxDescArray[entry].status) & OWNbit) == 0 ){ ++ ++#ifdef RTL8169_DYNAMIC_CONTROL ++ r8169_callback_tx(&(priv->rt), 1, priv->Tx_skbuff[dirty_tx % NUM_TX_DESC]->len); ++#endif //end #ifdef RTL8169_DYNAMIC_CONTROL ++ ++ if (priv->txdesc_array_dma_addr[entry]) ++ pci_unmap_single(priv->pci_dev, priv->txdesc_array_dma_addr[entry], priv->Tx_skbuff[entry]->len, PCI_DMA_TODEVICE); /*** ##KK## ***/ ++ dev_kfree_rtskb( priv->Tx_skbuff[entry] ); /*** RTnet; previously: dev_kfree_skb_irq() - luckily we're within an IRQ ***/ ++ priv->Tx_skbuff[entry] = NULL; ++ priv->stats.tx_packets++; ++ dirty_tx++; ++ tx_left--; ++ entry++; ++ } ++ txloop_cnt ++; ++ } ++ ++ if (priv->dirty_tx != dirty_tx) { ++ priv->dirty_tx = dirty_tx; ++ smp_wmb(); /*** ***/ ++ if (rtnetif_queue_stopped (rtdev)) ++ rtnetif_wake_queue (rtdev); ++ } ++ ++ rtdm_lock_put(&priv->lock); /*** RTnet ***/ ++ ++} ++ ++ ++ ++ ++ ++ ++//====================================================================================================== ++/* This routine is logically part of the interrupt handler, but isolated ++ for clarity. */ ++static void rtl8169_rx_interrupt (struct rtnet_device *rtdev, struct rtl8169_private *priv, unsigned long ioaddr, nanosecs_abs_t *time_stamp) ++{ ++ struct pci_dev *pdev = priv->pci_dev; ++ int cur_rx; ++ int pkt_size = 0 ; ++ int rxdesc_cnt = 0; ++ /* int ret; */ /*** RTnet ***/ ++ struct rtskb *n_skb = NULL; ++ struct rtskb *cur_skb; ++ struct rtskb *rx_skb; ++ struct RxDesc *rxdesc; ++ ++ rt_assert (rtdev != NULL); ++ rt_assert (priv != NULL); ++ rt_assert (ioaddr != 0); ++ ++ ++ cur_rx = priv->cur_rx; ++ ++ rxdesc = &priv->RxDescArray[cur_rx]; ++ pci_dma_sync_single_for_cpu(pdev, priv->rxdesc_array_dma_addr[cur_rx], sizeof(struct RxDesc), PCI_DMA_FROMDEVICE); ++ ++ while ( ((le32_to_cpu(rxdesc->status) & OWNbit)== 0) && (rxdesc_cnt < max_interrupt_work) ){ ++ ++ rxdesc_cnt++; ++ ++ if( le32_to_cpu(rxdesc->status) & RxRES ){ ++ rtdm_printk(KERN_INFO "%s: Rx ERROR!!!\n", rtdev->name); ++ priv->stats.rx_errors++; ++ if ( le32_to_cpu(rxdesc->status) & (RxRWT|RxRUNT) ) ++ priv->stats.rx_length_errors++; ++ if ( le32_to_cpu(rxdesc->status) & RxCRC) { ++ /* in the rt_via-rhine.c there's a lock around the incrementation... we'll do that also here */ ++ rtdm_lock_get(&priv->lock); /*** RTnet ***/ ++ priv->stats.rx_crc_errors++; ++ rtdm_lock_put(&priv->lock); /*** RTnet ***/ ++ } ++ } ++ else{ ++ pkt_size=(int)(le32_to_cpu(rxdesc->status) & 0x00001FFF)-4; ++ ++ if( pkt_size > priv->rx_pkt_len ){ ++ rtdm_printk("%s: Error -- Rx packet size(%d) > mtu(%d)+14\n", rtdev->name, pkt_size, rtdev->mtu); ++ pkt_size = priv->rx_pkt_len; ++ } ++ ++ {// ----------------------------------------------------- ++ rx_skb = priv->Rx_skbuff[cur_rx]; ++ // n_skb = RTL8169_ALLOC_RXSKB(MAX_RX_SKBDATA_SIZE); /*** ***/ ++ n_skb = rtnetdev_alloc_rtskb(rtdev, priv->rx_buf_size); /*** RTnet ***/ ++ if( n_skb != NULL ) { ++ rtskb_reserve (n_skb, 2); // 16 byte align the IP fields. // ++ ++ // Indicate rx_skb ++ if( rx_skb != NULL ){ ++ pci_dma_sync_single_for_cpu(pdev, priv->rx_skbuff_dma_addr[cur_rx], sizeof(struct RxDesc), PCI_DMA_FROMDEVICE); ++ ++ rtskb_put ( rx_skb, pkt_size ); ++ rx_skb->protocol = rt_eth_type_trans ( rx_skb, rtdev ); ++ rx_skb->time_stamp = *time_stamp; /*** RTnet ***/ ++ //ret = RTL8169_NETIF_RX (rx_skb); ++ rtnetif_rx(rx_skb); /*** RTnet ***/ ++ ++// dev->last_rx = jiffies; ++ priv->stats.rx_bytes += pkt_size; ++ priv->stats.rx_packets++; ++ ++#ifdef RTL8169_DYNAMIC_CONTROL ++ r8169_callback_rx( &(priv->rt), 1, pkt_size); ++#endif //end #ifdef RTL8169_DYNAMIC_CONTROL ++ ++ }//end if( rx_skb != NULL ) ++ ++ priv->Rx_skbuff[cur_rx] = n_skb; ++ } ++ else{ ++ RT_DBG_PRINT("%s: Allocate n_skb failed! (priv->rx_buf_size = %d)\n",__FUNCTION__, priv->rx_buf_size ); ++ priv->Rx_skbuff[cur_rx] = rx_skb; ++ } ++ ++ ++ // Update rx descriptor ++ if( cur_rx == (NUM_RX_DESC-1) ){ ++ priv->RxDescArray[cur_rx].status = cpu_to_le32((OWNbit | EORbit) | (unsigned long)priv->hw_rx_pkt_len); ++ } ++ else{ ++ priv->RxDescArray[cur_rx].status = cpu_to_le32(OWNbit | (unsigned long)priv->hw_rx_pkt_len); ++ } ++ ++ cur_skb = priv->Rx_skbuff[cur_rx]; ++ ++ if( cur_skb != NULL ){ ++ priv->rx_skbuff_dma_addr[cur_rx] = pci_map_single(pdev, cur_skb->data, priv->rx_buf_size /* MAX_RX_SKBDATA_SIZE */, PCI_DMA_FROMDEVICE); ++ rxdesc->buf_addr = cpu_to_le32(priv->rx_skbuff_dma_addr[cur_rx]); ++ } ++ else{ ++ RT_DBG_PRINT("%s: %s() cur_skb == NULL\n", rtdev->name, __FUNCTION__); ++ } ++ ++ }//------------------------------------------------------------ ++ ++ }// end of if( priv->RxDescArray[cur_rx].status & RxRES ) ++ ++ cur_rx = (cur_rx +1) % NUM_RX_DESC; ++ rxdesc = &priv->RxDescArray[cur_rx]; ++ pci_dma_sync_single_for_cpu(pdev, priv->rxdesc_array_dma_addr[cur_rx], sizeof(struct RxDesc), PCI_DMA_FROMDEVICE); ++ ++ }// end of while ( (priv->RxDescArray[cur_rx].status & 0x80000000)== 0) ++ ++ if( rxdesc_cnt >= max_interrupt_work ){ ++ RT_DBG_PRINT("%s: Too much work at Rx interrupt.\n", rtdev->name); ++ } ++ ++ priv->cur_rx = cur_rx; ++} ++ ++ ++ ++ ++ ++ ++ ++ ++//====================================================================================================== ++/* The interrupt handler does all of the Rx thread work and cleans up after the Tx thread. */ ++static int rtl8169_interrupt(rtdm_irq_t *irq_handle) ++{ ++ /* struct net_device *dev = (struct net_device *) dev_instance; */ /*** RTnet ***/ ++ struct rtnet_device *rtdev = rtdm_irq_get_arg(irq_handle, struct rtnet_device); /*** RTnet ***/ ++ struct rtl8169_private *priv = rtdev->priv; ++ int boguscnt = max_interrupt_work; ++ unsigned long ioaddr = priv->ioaddr; ++ int status = 0; ++ unsigned int old_packet_cnt = priv->stats.rx_packets; /*** RTnet ***/ ++ nanosecs_abs_t time_stamp = rtdm_clock_read(); /*** RTnet ***/ ++ ++ int interrupt_handled = RTDM_IRQ_NONE; /*** ***/ ++ ++ do { ++ status = RTL_R16(IntrStatus); /* read interrupt status */ ++ ++ if ((status == 0xFFFF) || (!status)) { ++ break; /* hotplug/major error/no more work/shared irq */ ++ } ++ ++ ++ interrupt_handled = RTDM_IRQ_HANDLED; ++ ++/* if (unlikely(!rtnetif_running(rtdev))) { ++ rtl8169_asic_down(ioaddr); ++ goto out; ++ } ++*/ ++ ++ /* Acknowledge interrupts */ ++ RTL_W16(IntrStatus, 0xffff); ++ ++ if (!(status & rtl8169_intr_mask)) { ++ break; ++ } ++ ++ if (unlikely(status & SYSErr)) { ++ RT_DBG_PRINT("PCI error...!? %i\n", __LINE__); ++ rtl8169_pcierr_interrupt(rtdev); ++ break; ++ } ++ ++ /*** RTnet / (Linux-2.6.12-Backport) ***/ ++ if (unlikely(status & LinkChg)) { ++ rtdm_lock_get(&priv->lock); ++ if (RTL_R8(PHYstatus) & LinkStatus) /*** only supporting XMII, not yet TBI ***/ ++ rtnetif_carrier_on(rtdev); ++ else ++ rtnetif_carrier_off(rtdev); ++ rtdm_lock_put(&priv->lock); ++ } ++ ++ // Rx interrupt ++ if (status & (RxOK | RxOverflow | RxFIFOOver)) { ++ rtl8169_rx_interrupt (rtdev, priv, ioaddr, &time_stamp); ++ } ++ ++ // Tx interrupt ++ if (status & (TxOK | TxErr)) { ++ rtl8169_tx_interrupt (rtdev, priv, ioaddr); ++ } ++ ++ boguscnt--; ++ } while (boguscnt > 0); ++ ++ if (boguscnt <= 0) { ++ rtdm_printk(KERN_WARNING "%s: Too much work at interrupt!\n", rtdev->name); ++ RTL_W16( IntrStatus, 0xffff); /* Clear all interrupt sources */ ++ } ++ ++//out: ++ ++ if (old_packet_cnt != priv->stats.rx_packets) ++ rt_mark_stack_mgr(rtdev); ++ return interrupt_handled; ++} ++ ++ ++ ++ ++ ++ ++ ++//====================================================================================================== ++static int rtl8169_close (struct rtnet_device *rtdev) ++{ ++ struct rtl8169_private *priv = rtdev->priv; ++ unsigned long ioaddr = priv->ioaddr; ++ int i; ++ rtdm_lockctx_t context; /*** RTnet, for rtdm_lock_get_irqsave ***/ ++ ++ // ----------------------------------------- ++ /* rtl8169_delete_timer( &(priv->r8169_timer) ); */ /*** RTnet ***/ ++ ++ ++ rtdm_lock_get_irqsave (&priv->lock, context); /*** RTnet ***/ ++ ++ rtnetif_stop_queue (rtdev); /*** RTnet / : moved behind spin_lock! ***/ ++ ++ /* Stop the chip's Tx and Rx processes. */ ++ RTL_W8 ( ChipCmd, 0x00); ++ ++ /* Disable interrupts by clearing the interrupt mask. */ ++ RTL_W16 ( IntrMask, 0x0000); ++ ++ /* Update the error counts. */ ++ priv->stats.rx_missed_errors += RTL_R32(RxMissed); ++ RTL_W32( RxMissed, 0); ++ ++ rtdm_lock_put_irqrestore(&priv->lock, context); /*** RTnet ***/ ++ ++ /*** RTnet ***/ ++ if ( (i=rtdm_irq_free(&priv->irq_handle))<0 ) ++ return i; ++ ++ rt_stack_disconnect(rtdev); ++ /*** /RTnet ***/ ++ ++ rtl8169_tx_clear (priv); ++ ++ //2004-05-11 ++ if(priv->txdesc_space != NULL){ ++ pci_free_consistent( ++ priv->pci_dev, ++ priv->sizeof_txdesc_space, ++ priv->txdesc_space, ++ priv->txdesc_phy_dma_addr ++ ); ++ priv->txdesc_space = NULL; ++ } ++ ++ if(priv->rxdesc_space != NULL){ ++ pci_free_consistent( ++ priv->pci_dev, ++ priv->sizeof_rxdesc_space, ++ priv->rxdesc_space, ++ priv->rxdesc_phy_dma_addr ++ ); ++ priv->rxdesc_space = NULL; ++ } ++ ++ priv->TxDescArray = NULL; ++ priv->RxDescArray = NULL; ++ ++ {//----------------------------------------------------------------------------- ++ for(i=0;iRx_skbuff[i] != NULL ) { ++ //RTL8169_FREE_RXSKB ( priv->Rx_skbuff[i] ); /*** ***/ ++ dev_kfree_rtskb(priv->Rx_skbuff[i]); /*** RTnet ***/ ++ } ++ } ++ }//----------------------------------------------------------------------------- ++ ++ //DBG_PRINT("%s: %s() alloc_rxskb_cnt = %d\n", dev->name, __FUNCTION__, alloc_rxskb_cnt ); /*** won't work anymore ***/ ++ ++ return 0; ++} ++ ++ ++ ++ ++ ++ ++ ++//====================================================================================================== ++static unsigned const ethernet_polynomial = 0x04c11db7U; ++static inline u32 ether_crc (int length, unsigned char *data) ++{ ++ int crc = -1; ++ ++ while (--length >= 0) { ++ unsigned char current_octet = *data++; ++ int bit; ++ for (bit = 0; bit < 8; bit++, current_octet >>= 1) ++ crc = (crc << 1) ^ ((crc < 0) ^ (current_octet & 1) ? ethernet_polynomial : 0); ++ } ++ ++ return crc; ++} ++ ++ ++ ++ ++ ++ ++ ++ ++//====================================================================================================== ++static void rtl8169_set_rx_mode (struct rtnet_device *rtdev) ++{ ++ struct rtl8169_private *priv = rtdev->priv; ++ unsigned long ioaddr = priv->ioaddr; ++ rtdm_lockctx_t context; ++ u32 mc_filter[2]; /* Multicast hash filter */ ++ int rx_mode; ++ u32 tmp=0; ++ ++ ++ if (rtdev->flags & IFF_PROMISC) { ++ /* Unconditionally log net taps. */ ++ printk (KERN_NOTICE "%s: Promiscuous mode enabled.\n", rtdev->name); ++ rx_mode = AcceptBroadcast | AcceptMulticast | AcceptMyPhys | AcceptAllPhys; ++ mc_filter[1] = mc_filter[0] = 0xffffffff; ++ } else if (rtdev->flags & IFF_ALLMULTI) { ++ /* Too many to filter perfectly -- accept all multicasts. */ ++ rx_mode = AcceptBroadcast | AcceptMulticast | AcceptMyPhys; ++ mc_filter[1] = mc_filter[0] = 0xffffffff; ++ } else { ++ rx_mode = AcceptBroadcast | AcceptMulticast | AcceptMyPhys; ++ mc_filter[1] = mc_filter[0] = 0; ++ } ++ ++ rtdm_lock_get_irqsave(&priv->lock, context); /*** RTnet ***/ ++ ++ tmp = rtl8169_rx_config | rx_mode | (RTL_R32(RxConfig) & rtl_chip_info[priv->chipset].RxConfigMask); ++ ++ RTL_W32 ( RxConfig, tmp); ++ RTL_W32 ( MAR0 + 0, mc_filter[0]); ++ RTL_W32 ( MAR0 + 4, mc_filter[1]); ++ ++ rtdm_lock_put_irqrestore(&priv->lock, context); /*** RTnet ***/ ++ ++}//end of rtl8169_set_rx_mode (struct net_device *dev) ++ ++ ++ ++ ++ ++ ++ ++//================================================================================ ++static struct net_device_stats *rtl8169_get_stats(struct rtnet_device *rtdev) ++ ++{ ++ struct rtl8169_private *priv = rtdev->priv; ++ ++ return &priv->stats; ++} ++ ++ ++ ++ ++ ++ ++ ++//================================================================================ ++static struct pci_driver rtl8169_pci_driver = { ++ name: MODULENAME, ++ id_table: rtl8169_pci_tbl, ++ probe: rtl8169_init_one, ++ remove: rtl8169_remove_one, ++ suspend: NULL, ++ resume: NULL, ++}; ++ ++ ++ ++ ++ ++//====================================================================================================== ++static int __init rtl8169_init_module (void) ++{ ++ /* Enable debugging output... */ ++ if (local_debug > 0) { ++ r8169_debug = local_debug; ++ } ++ if (r8169_debug & DEBUG_RUN) printk("Initializing " MODULENAME " driver"); ++ return pci_register_driver (&rtl8169_pci_driver); ++} ++ ++ ++ ++ ++//====================================================================================================== ++static void __exit rtl8169_cleanup_module (void) ++{ ++ pci_unregister_driver (&rtl8169_pci_driver); ++} ++ ++ ++#ifdef RTL8169_JUMBO_FRAME_SUPPORT ++static int rtl8169_change_mtu(struct net_device *dev, int new_mtu) ++{ ++ struct rtl8169_private *priv = dev->priv; ++ unsigned long ioaddr = priv->ioaddr; ++ ++ if( new_mtu > MAX_JUMBO_FRAME_MTU ){ ++ printk("%s: Error -- new_mtu(%d) > MAX_JUMBO_FRAME_MTU(%d).\n", dev->name, new_mtu, MAX_JUMBO_FRAME_MTU); ++ return -1; ++ } ++ ++ dev->mtu = new_mtu; ++ ++ priv->curr_mtu_size = new_mtu; ++ priv->tx_pkt_len = new_mtu + ETH_HDR_LEN; ++ priv->rx_pkt_len = new_mtu + ETH_HDR_LEN; ++ priv->hw_rx_pkt_len = priv->rx_pkt_len + 8; ++ ++ RTL_W8 ( Cfg9346, Cfg9346_Unlock); ++ RTL_W16 ( RxMaxSize, (unsigned short)priv->hw_rx_pkt_len ); ++ RTL_W8 ( Cfg9346, Cfg9346_Lock); ++ ++ DBG_PRINT("-------------------------- \n"); ++ DBG_PRINT("dev->mtu = %d \n", dev->mtu); ++ DBG_PRINT("priv->curr_mtu_size = %d \n", priv->curr_mtu_size); ++ DBG_PRINT("priv->rx_pkt_len = %d \n", priv->rx_pkt_len); ++ DBG_PRINT("priv->tx_pkt_len = %d \n", priv->tx_pkt_len); ++ DBG_PRINT("RTL_W16( RxMaxSize, %d )\n", priv->hw_rx_pkt_len); ++ DBG_PRINT("-------------------------- \n"); ++ ++ rtl8169_close (dev); ++ rtl8169_open (dev); ++ ++ return 0; ++} ++#endif //end #ifdef RTL8169_JUMBO_FRAME_SUPPORT ++ ++ ++ ++/*** these functions are backported from Linux-2.6.12's r8169.c driver ***/ ++static void rtl8169_irq_mask_and_ack(unsigned long ioaddr) ++{ ++ RTL_W16(IntrMask, 0x0000); ++ ++ RTL_W16(IntrStatus, 0xffff); ++} ++ ++static void rtl8169_pcierr_interrupt(struct rtnet_device *rtdev) ++{ ++ struct rtl8169_private *priv = rtdev->priv; ++ struct pci_dev *pdev = priv->pci_dev; ++ unsigned long ioaddr = priv->ioaddr; ++ u16 pci_status, pci_cmd; ++ ++ pci_read_config_word(pdev, PCI_COMMAND, &pci_cmd); ++ pci_read_config_word(pdev, PCI_STATUS, &pci_status); ++ ++ rtdm_printk(KERN_ERR PFX "%s: PCI error (cmd = 0x%04x, status = 0x%04x).\n", ++ rtdev->name, pci_cmd, pci_status); ++ ++ /* ++ * The recovery sequence below admits a very elaborated explanation: ++ * - it seems to work; ++ * - I did not see what else could be done. ++ * ++ * Feel free to adjust to your needs. ++ */ ++ pci_write_config_word(pdev, PCI_COMMAND, ++ pci_cmd | PCI_COMMAND_SERR | PCI_COMMAND_PARITY); ++ ++ pci_write_config_word(pdev, PCI_STATUS, ++ pci_status & (PCI_STATUS_DETECTED_PARITY | ++ PCI_STATUS_SIG_SYSTEM_ERROR | PCI_STATUS_REC_MASTER_ABORT | ++ PCI_STATUS_REC_TARGET_ABORT | PCI_STATUS_SIG_TARGET_ABORT)); ++ ++ /* The infamous DAC f*ckup only happens at boot time */ ++ /*** *** ++ if ((priv->cp_cmd & PCIDAC) && !priv->dirty_rx && !priv->cur_rx) { ++ rtdm_printk(KERN_INFO PFX "%s: disabling PCI DAC.\n", rtdev->name); ++ priv->cp_cmd &= ~PCIDAC; ++ RTL_W16(CPlusCmd, priv->cp_cmd); ++ rtdev->features &= ~NETIF_F_HIGHDMA; ++ rtl8169_schedule_work(rtdev, rtl8169_reinit_task); ++ } ++ *** /RTnet ***/ ++ ++ /* Disable interrupts */ ++ rtl8169_irq_mask_and_ack(ioaddr); ++ ++ /* Reset the chipset */ ++ RTL_W8(ChipCmd, CmdReset); ++ ++ /* PCI commit */ ++ RTL_R8(ChipCmd); ++ ++} ++ ++ ++ ++ ++ ++ ++//====================================================================================================== ++module_init(rtl8169_init_module); ++module_exit(rtl8169_cleanup_module); +--- linux/drivers/xenomai/net/drivers/rt_smc91111.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/rt_smc91111.h 2021-04-07 16:01:27.234634156 +0800 +@@ -0,0 +1,566 @@ ++/*------------------------------------------------------------------------ ++ . smc91111.h - macros for the LAN91C111 Ethernet Driver ++ . ++ . Copyright (C) 2001 Standard Microsystems Corporation (SMSC) ++ . Developed by Simple Network Magic Corporation (SNMC) ++ . Copyright (C) 1996 by Erik Stahlman (ES) ++ . ++ . This program is free software; you can redistribute it and/or modify ++ . it under the terms of the GNU General Public License as published by ++ . the Free Software Foundation; either version 2 of the License, or ++ . (at your option) any later version. ++ . ++ . This program is distributed in the hope that it will be useful, ++ . but WITHOUT ANY WARRANTY; without even the implied warranty of ++ . MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ . GNU General Public License for more details. ++ . ++ . You should have received a copy of the GNU General Public License ++ . along with this program; if not, write to the Free Software ++ . Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ++ . ++ . This file contains register information and access macros for ++ . the LAN91C111 single chip ethernet controller. It is a modified ++ . version of the smc9194.h file. ++ . ++ . Information contained in this file was obtained from the LAN91C111 ++ . manual from SMC. To get a copy, if you really want one, you can find ++ . information under www.smsc.com. ++ . ++ . Authors ++ . Erik Stahlman ( erik@vt.edu ) ++ . Daris A Nevil ( dnevil@snmc.com ) ++ . ++ . History ++ . 03/16/01 Daris A Nevil Modified for use with LAN91C111 device ++ . ++ ---------------------------------------------------------------------------*/ ++#ifndef _SMC91111_H_ ++#define _SMC91111_H_ ++ ++/* I want some simple types */ ++ ++typedef unsigned char byte; ++typedef unsigned short word; ++typedef unsigned long int dword; ++ ++ ++/* Because of bank switching, the LAN91xxx uses only 16 I/O ports */ ++ ++#define SMC_IO_EXTENT 16 ++ ++ ++/*--------------------------------------------------------------- ++ . ++ . A description of the SMSC registers is probably in order here, ++ . although for details, the SMC datasheet is invaluable. ++ . ++ . Basically, the chip has 4 banks of registers ( 0 to 3 ), which ++ . are accessed by writing a number into the BANK_SELECT register ++ . ( I also use a SMC_SELECT_BANK macro for this ). ++ . ++ . The banks are configured so that for most purposes, bank 2 is all ++ . that is needed for simple run time tasks. ++ -----------------------------------------------------------------------*/ ++ ++/* ++ . Bank Select Register: ++ . ++ . yyyy yyyy 0000 00xx ++ . xx = bank number ++ . yyyy yyyy = 0x33, for identification purposes. ++*/ ++#define BANK_SELECT 14 ++ ++// Transmit Control Register ++/* BANK 0 */ ++#define TCR_REG 0x0000 // transmit control register ++#define TCR_ENABLE 0x0001 // When 1 we can transmit ++#define TCR_LOOP 0x0002 // Controls output pin LBK ++#define TCR_FORCOL 0x0004 // When 1 will force a collision ++#define TCR_PAD_EN 0x0080 // When 1 will pad tx frames < 64 bytes w/0 ++#define TCR_NOCRC 0x0100 // When 1 will not append CRC to tx frames ++#define TCR_MON_CSN 0x0400 // When 1 tx monitors carrier ++#define TCR_FDUPLX 0x0800 // When 1 enables full duplex operation ++#define TCR_STP_SQET 0x1000 // When 1 stops tx if Signal Quality Error ++#define TCR_EPH_LOOP 0x2000 // When 1 enables EPH block loopback ++#define TCR_SWFDUP 0x8000 // When 1 enables Switched Full Duplex mode ++ ++#define TCR_CLEAR 0 /* do NOTHING */ ++/* the default settings for the TCR register : */ ++/* QUESTION: do I want to enable padding of short packets ? */ ++#define TCR_DEFAULT TCR_ENABLE ++ ++ ++// EPH Status Register ++/* BANK 0 */ ++#define EPH_STATUS_REG 0x0002 ++#define ES_TX_SUC 0x0001 // Last TX was successful ++#define ES_SNGL_COL 0x0002 // Single collision detected for last tx ++#define ES_MUL_COL 0x0004 // Multiple collisions detected for last tx ++#define ES_LTX_MULT 0x0008 // Last tx was a multicast ++#define ES_16COL 0x0010 // 16 Collisions Reached ++#define ES_SQET 0x0020 // Signal Quality Error Test ++#define ES_LTXBRD 0x0040 // Last tx was a broadcast ++#define ES_TXDEFR 0x0080 // Transmit Deferred ++#define ES_LATCOL 0x0200 // Late collision detected on last tx ++#define ES_LOSTCARR 0x0400 // Lost Carrier Sense ++#define ES_EXC_DEF 0x0800 // Excessive Deferral ++#define ES_CTR_ROL 0x1000 // Counter Roll Over indication ++#define ES_LINK_OK 0x4000 // Driven by inverted value of nLNK pin ++#define ES_TXUNRN 0x8000 // Tx Underrun ++ ++ ++// Receive Control Register ++/* BANK 0 */ ++#define RCR_REG 0x0004 ++#define RCR_RX_ABORT 0x0001 // Set if a rx frame was aborted ++#define RCR_PRMS 0x0002 // Enable promiscuous mode ++#define RCR_ALMUL 0x0004 // When set accepts all multicast frames ++#define RCR_RXEN 0x0100 // IFF this is set, we can receive packets ++#define RCR_STRIP_CRC 0x0200 // When set strips CRC from rx packets ++#define RCR_ABORT_ENB 0x0200 // When set will abort rx on collision ++#define RCR_FILT_CAR 0x0400 // When set filters leading 12 bit s of carrier ++#define RCR_SOFTRST 0x8000 // resets the chip ++ ++/* the normal settings for the RCR register : */ ++#define RCR_DEFAULT (RCR_STRIP_CRC | RCR_RXEN) ++#define RCR_CLEAR 0x0 // set it to a base state ++ ++// Counter Register ++/* BANK 0 */ ++#define COUNTER_REG 0x0006 ++ ++// Memory Information Register ++/* BANK 0 */ ++#define MIR_REG 0x0008 ++ ++// Receive/Phy Control Register ++/* BANK 0 */ ++#define RPC_REG 0x000A ++#define RPC_SPEED 0x2000 // When 1 PHY is in 100Mbps mode. ++#define RPC_DPLX 0x1000 // When 1 PHY is in Full-Duplex Mode ++#define RPC_ANEG 0x0800 // When 1 PHY is in Auto-Negotiate Mode ++#define RPC_LSXA_SHFT 5 // Bits to shift LS2A,LS1A,LS0A to lsb ++#define RPC_LSXB_SHFT 2 // Bits to get LS2B,LS1B,LS0B to lsb ++#define RPC_LED_100_10 (0x00) // LED = 100Mbps OR's with 10Mbps link detect ++#define RPC_LED_RES (0x01) // LED = Reserved ++#define RPC_LED_10 (0x02) // LED = 10Mbps link detect ++#define RPC_LED_FD (0x03) // LED = Full Duplex Mode ++#define RPC_LED_TX_RX (0x04) // LED = TX or RX packet occurred ++#define RPC_LED_100 (0x05) // LED = 100Mbps link dectect ++#define RPC_LED_TX (0x06) // LED = TX packet occurred ++#define RPC_LED_RX (0x07) // LED = RX packet occurred ++#define RPC_DEFAULT (RPC_ANEG | (RPC_LED_100 << RPC_LSXA_SHFT) | (RPC_LED_FD << RPC_LSXB_SHFT) | RPC_SPEED | RPC_DPLX) ++ ++/* Bank 0 0x000C is reserved */ ++ ++// Bank Select Register ++/* All Banks */ ++#define BSR_REG 0x000E ++ ++ ++// Configuration Reg ++/* BANK 1 */ ++#define CONFIG_REG 0x0000 ++#define CONFIG_EXT_PHY 0x0200 // 1=external MII, 0=internal Phy ++#define CONFIG_GPCNTRL 0x0400 // Inverse value drives pin nCNTRL ++#define CONFIG_NO_WAIT 0x1000 // When 1 no extra wait states on ISA bus ++#define CONFIG_EPH_POWER_EN 0x8000 // When 0 EPH is placed into low power mode. ++ ++// Default is powered-up, Internal Phy, Wait States, and pin nCNTRL=low ++#define CONFIG_DEFAULT (CONFIG_EPH_POWER_EN) ++ ++ ++// Base Address Register ++/* BANK 1 */ ++#define BASE_REG 0x0002 ++ ++ ++// Individual Address Registers ++/* BANK 1 */ ++#define ADDR0_REG 0x0004 ++#define ADDR1_REG 0x0006 ++#define ADDR2_REG 0x0008 ++ ++ ++// General Purpose Register ++/* BANK 1 */ ++#define GP_REG 0x000A ++ ++ ++// Control Register ++/* BANK 1 */ ++#define CTL_REG 0x000C ++#define CTL_RCV_BAD 0x4000 // When 1 bad CRC packets are received ++#define CTL_AUTO_RELEASE 0x0800 // When 1 tx pages are released automatically ++#define CTL_LE_ENABLE 0x0080 // When 1 enables Link Error interrupt ++#define CTL_CR_ENABLE 0x0040 // When 1 enables Counter Rollover interrupt ++#define CTL_TE_ENABLE 0x0020 // When 1 enables Transmit Error interrupt ++#define CTL_EEPROM_SELECT 0x0004 // Controls EEPROM reload & store ++#define CTL_RELOAD 0x0002 // When set reads EEPROM into registers ++#define CTL_STORE 0x0001 // When set stores registers into EEPROM ++ ++ ++// MMU Command Register ++/* BANK 2 */ ++#define MMU_CMD_REG 0x0000 ++#define MC_BUSY 1 // When 1 the last release has not completed ++#define MC_NOP (0<<5) // No Op ++#define MC_ALLOC (1<<5) // OR with number of 256 byte packets ++#define MC_RESET (2<<5) // Reset MMU to initial state ++#define MC_REMOVE (3<<5) // Remove the current rx packet ++#define MC_RELEASE (4<<5) // Remove and release the current rx packet ++#define MC_FREEPKT (5<<5) // Release packet in PNR register ++#define MC_ENQUEUE (6<<5) // Enqueue the packet for transmit ++#define MC_RSTTXFIFO (7<<5) // Reset the TX FIFOs ++ ++ ++// Packet Number Register ++/* BANK 2 */ ++#define PN_REG 0x0002 ++ ++ ++// Allocation Result Register ++/* BANK 2 */ ++#define AR_REG 0x0003 ++#define AR_FAILED 0x80 // Alocation Failed ++ ++ ++// RX FIFO Ports Register ++/* BANK 2 */ ++#define RXFIFO_REG 0x0004 // Must be read as a word ++#define RXFIFO_REMPTY 0x8000 // RX FIFO Empty ++ ++ ++// TX FIFO Ports Register ++/* BANK 2 */ ++#define TXFIFO_REG RXFIFO_REG // Must be read as a word ++#define TXFIFO_TEMPTY 0x80 // TX FIFO Empty ++ ++ ++// Pointer Register ++/* BANK 2 */ ++#define PTR_REG 0x0006 ++#define PTR_RCV 0x8000 // 1=Receive area, 0=Transmit area ++#define PTR_AUTOINC 0x4000 // Auto increment the pointer on each access ++#define PTR_READ 0x2000 // When 1 the operation is a read ++ ++ ++// Data Register ++/* BANK 2 */ ++#define DATA_REG 0x0008 ++ ++ ++// Interrupt Status/Acknowledge Register ++/* BANK 2 */ ++#define INT_REG 0x000C ++ ++ ++// Interrupt Mask Register ++/* BANK 2 */ ++#define IM_REG 0x000D ++#define IM_MDINT 0x80 // PHY MI Register 18 Interrupt ++#define IM_ERCV_INT 0x40 // Early Receive Interrupt ++#define IM_EPH_INT 0x20 // Set by Etheret Protocol Handler section ++#define IM_RX_OVRN_INT 0x10 // Set by Receiver Overruns ++#define IM_ALLOC_INT 0x08 // Set when allocation request is completed ++#define IM_TX_EMPTY_INT 0x04 // Set if the TX FIFO goes empty ++#define IM_TX_INT 0x02 // Transmit Interrrupt ++#define IM_RCV_INT 0x01 // Receive Interrupt ++ ++ ++// Multicast Table Registers ++/* BANK 3 */ ++#define MCAST_REG1 0x0000 ++#define MCAST_REG2 0x0002 ++#define MCAST_REG3 0x0004 ++#define MCAST_REG4 0x0006 ++ ++ ++// Management Interface Register (MII) ++/* BANK 3 */ ++#define MII_REG 0x0008 ++#define MII_MSK_CRS100 0x4000 // Disables CRS100 detection during tx half dup ++#define MII_MDOE 0x0008 // MII Output Enable ++#define MII_MCLK 0x0004 // MII Clock, pin MDCLK ++#define MII_MDI 0x0002 // MII Input, pin MDI ++#define MII_MDO 0x0001 // MII Output, pin MDO ++ ++ ++// Revision Register ++/* BANK 3 */ ++#define REV_REG 0x000A /* ( hi: chip id low: rev # ) */ ++ ++ ++// Early RCV Register ++/* BANK 3 */ ++/* this is NOT on SMC9192 */ ++#define ERCV_REG 0x000C ++#define ERCV_RCV_DISCRD 0x0080 // When 1 discards a packet being received ++#define ERCV_THRESHOLD 0x001F // ERCV Threshold Mask ++ ++// External Register ++/* BANK 7 */ ++#define EXT_REG 0x0000 ++ ++ ++#define CHIP_9192 3 ++#define CHIP_9194 4 ++#define CHIP_9195 5 ++#define CHIP_9196 6 ++#define CHIP_91100 7 ++#define CHIP_91100FD 8 ++#define CHIP_91111FD 9 ++ ++static const char * chip_ids[ 15 ] = { ++ NULL, NULL, NULL, ++ /* 3 */ "SMC91C90/91C92", ++ /* 4 */ "SMC91C94", ++ /* 5 */ "SMC91C95", ++ /* 6 */ "SMC91C96", ++ /* 7 */ "SMC91C100", ++ /* 8 */ "SMC91C100FD", ++ /* 9 */ "SMC91C11xFD", ++ NULL, NULL, ++ NULL, NULL, NULL}; ++ ++/* ++ . Transmit status bits ++*/ ++#define TS_SUCCESS 0x0001 ++#define TS_LOSTCAR 0x0400 ++#define TS_LATCOL 0x0200 ++#define TS_16COL 0x0010 ++ ++/* ++ . Receive status bits ++*/ ++#define RS_ALGNERR 0x8000 ++#define RS_BRODCAST 0x4000 ++#define RS_BADCRC 0x2000 ++#define RS_ODDFRAME 0x1000 // bug: the LAN91C111 never sets this on receive ++#define RS_TOOLONG 0x0800 ++#define RS_TOOSHORT 0x0400 ++#define RS_MULTICAST 0x0001 ++#define RS_ERRORS (RS_ALGNERR | RS_BADCRC | RS_TOOLONG | RS_TOOSHORT) ++ ++ ++// PHY Types ++enum { ++ PHY_LAN83C183 = 1, // LAN91C111 Internal PHY ++ PHY_LAN83C180 ++}; ++ ++ ++// PHY Register Addresses (LAN91C111 Internal PHY) ++ ++// PHY Control Register ++#define PHY_CNTL_REG 0x00 ++#define PHY_CNTL_RST 0x8000 // 1=PHY Reset ++#define PHY_CNTL_LPBK 0x4000 // 1=PHY Loopback ++#define PHY_CNTL_SPEED 0x2000 // 1=100Mbps, 0=10Mpbs ++#define PHY_CNTL_ANEG_EN 0x1000 // 1=Enable Auto negotiation ++#define PHY_CNTL_PDN 0x0800 // 1=PHY Power Down mode ++#define PHY_CNTL_MII_DIS 0x0400 // 1=MII 4 bit interface disabled ++#define PHY_CNTL_ANEG_RST 0x0200 // 1=Reset Auto negotiate ++#define PHY_CNTL_DPLX 0x0100 // 1=Full Duplex, 0=Half Duplex ++#define PHY_CNTL_COLTST 0x0080 // 1= MII Colision Test ++ ++// PHY Status Register ++#define PHY_STAT_REG 0x01 ++#define PHY_STAT_CAP_T4 0x8000 // 1=100Base-T4 capable ++#define PHY_STAT_CAP_TXF 0x4000 // 1=100Base-X full duplex capable ++#define PHY_STAT_CAP_TXH 0x2000 // 1=100Base-X half duplex capable ++#define PHY_STAT_CAP_TF 0x1000 // 1=10Mbps full duplex capable ++#define PHY_STAT_CAP_TH 0x0800 // 1=10Mbps half duplex capable ++#define PHY_STAT_CAP_SUPR 0x0040 // 1=recv mgmt frames with not preamble ++#define PHY_STAT_ANEG_ACK 0x0020 // 1=ANEG has completed ++#define PHY_STAT_REM_FLT 0x0010 // 1=Remote Fault detected ++#define PHY_STAT_CAP_ANEG 0x0008 // 1=Auto negotiate capable ++#define PHY_STAT_LINK 0x0004 // 1=valid link ++#define PHY_STAT_JAB 0x0002 // 1=10Mbps jabber condition ++#define PHY_STAT_EXREG 0x0001 // 1=extended registers implemented ++ ++// PHY Identifier Registers ++#define PHY_ID1_REG 0x02 // PHY Identifier 1 ++#define PHY_ID2_REG 0x03 // PHY Identifier 2 ++ ++// PHY Auto-Negotiation Advertisement Register ++#define PHY_AD_REG 0x04 ++#define PHY_AD_NP 0x8000 // 1=PHY requests exchange of Next Page ++#define PHY_AD_ACK 0x4000 // 1=got link code word from remote ++#define PHY_AD_RF 0x2000 // 1=advertise remote fault ++#define PHY_AD_T4 0x0200 // 1=PHY is capable of 100Base-T4 ++#define PHY_AD_TX_FDX 0x0100 // 1=PHY is capable of 100Base-TX FDPLX ++#define PHY_AD_TX_HDX 0x0080 // 1=PHY is capable of 100Base-TX HDPLX ++#define PHY_AD_10_FDX 0x0040 // 1=PHY is capable of 10Base-T FDPLX ++#define PHY_AD_10_HDX 0x0020 // 1=PHY is capable of 10Base-T HDPLX ++#define PHY_AD_CSMA 0x0001 // 1=PHY is capable of 802.3 CMSA ++ ++// PHY Auto-negotiation Remote End Capability Register ++#define PHY_RMT_REG 0x05 ++// Uses same bit definitions as PHY_AD_REG ++ ++// PHY Configuration Register 1 ++#define PHY_CFG1_REG 0x10 ++#define PHY_CFG1_LNKDIS 0x8000 // 1=Rx Link Detect Function disabled ++#define PHY_CFG1_XMTDIS 0x4000 // 1=TP Transmitter Disabled ++#define PHY_CFG1_XMTPDN 0x2000 // 1=TP Transmitter Powered Down ++#define PHY_CFG1_BYPSCR 0x0400 // 1=Bypass scrambler/descrambler ++#define PHY_CFG1_UNSCDS 0x0200 // 1=Unscramble Idle Reception Disable ++#define PHY_CFG1_EQLZR 0x0100 // 1=Rx Equalizer Disabled ++#define PHY_CFG1_CABLE 0x0080 // 1=STP(150ohm), 0=UTP(100ohm) ++#define PHY_CFG1_RLVL0 0x0040 // 1=Rx Squelch level reduced by 4.5db ++#define PHY_CFG1_TLVL_SHIFT 2 // Transmit Output Level Adjust ++#define PHY_CFG1_TLVL_MASK 0x003C ++#define PHY_CFG1_TRF_MASK 0x0003 // Transmitter Rise/Fall time ++ ++ ++// PHY Configuration Register 2 ++#define PHY_CFG2_REG 0x11 ++#define PHY_CFG2_APOLDIS 0x0020 // 1=Auto Polarity Correction disabled ++#define PHY_CFG2_JABDIS 0x0010 // 1=Jabber disabled ++#define PHY_CFG2_MREG 0x0008 // 1=Multiple register access (MII mgt) ++#define PHY_CFG2_INTMDIO 0x0004 // 1=Interrupt signaled with MDIO pulseo ++ ++// PHY Status Output (and Interrupt status) Register ++#define PHY_INT_REG 0x12 // Status Output (Interrupt Status) ++#define PHY_INT_INT 0x8000 // 1=bits have changed since last read ++#define PHY_INT_LNKFAIL 0x4000 // 1=Link Not detected ++#define PHY_INT_LOSSSYNC 0x2000 // 1=Descrambler has lost sync ++#define PHY_INT_CWRD 0x1000 // 1=Invalid 4B5B code detected on rx ++#define PHY_INT_SSD 0x0800 // 1=No Start Of Stream detected on rx ++#define PHY_INT_ESD 0x0400 // 1=No End Of Stream detected on rx ++#define PHY_INT_RPOL 0x0200 // 1=Reverse Polarity detected ++#define PHY_INT_JAB 0x0100 // 1=Jabber detected ++#define PHY_INT_SPDDET 0x0080 // 1=100Base-TX mode, 0=10Base-T mode ++#define PHY_INT_DPLXDET 0x0040 // 1=Device in Full Duplex ++ ++// PHY Interrupt/Status Mask Register ++#define PHY_MASK_REG 0x13 // Interrupt Mask ++// Uses the same bit definitions as PHY_INT_REG ++ ++ ++ ++/*------------------------------------------------------------------------- ++ . I define some macros to make it easier to do somewhat common ++ . or slightly complicated, repeated tasks. ++ --------------------------------------------------------------------------*/ ++ ++/* select a register bank, 0 to 3 */ ++ ++#define SMC_SELECT_BANK(x) { outw( x, ioaddr + BANK_SELECT ); } ++ ++/* this enables an interrupt in the interrupt mask register */ ++#define SMC_ENABLE_INT(x) {\ ++ unsigned char mask;\ ++ SMC_SELECT_BANK(2);\ ++ mask = inb( ioaddr + IM_REG );\ ++ mask |= (x);\ ++ outb( mask, ioaddr + IM_REG ); \ ++} ++ ++/* this disables an interrupt from the interrupt mask register */ ++ ++#define SMC_DISABLE_INT(x) {\ ++ unsigned char mask;\ ++ SMC_SELECT_BANK(2);\ ++ mask = inb( ioaddr + IM_REG );\ ++ mask &= ~(x);\ ++ outb( mask, ioaddr + IM_REG ); \ ++} ++ ++/*---------------------------------------------------------------------- ++ . Define the interrupts that I want to receive from the card ++ . ++ . I want: ++ . IM_EPH_INT, for nasty errors ++ . IM_RCV_INT, for happy received packets ++ . IM_RX_OVRN_INT, because I have to kick the receiver ++ . IM_MDINT, for PHY Register 18 Status Changes ++ --------------------------------------------------------------------------*/ ++#define SMC_INTERRUPT_MASK (IM_EPH_INT | IM_RX_OVRN_INT | IM_RCV_INT | \ ++ IM_MDINT) ++ ++ ++#ifdef CONFIG_SYSCTL ++ ++ ++/* ++ * Declarations for the sysctl interface, which allows users the ability to ++ * control the finer aspects of the LAN91C111 chip. Since the smc ++ * module currently registers its sysctl table dynamically, the sysctl path ++ * for module FOO is /proc/sys/dev/ethX/FOO ++ */ ++#define CTL_SMC (CTL_BUS+1389) // arbitrary and hopefully unused ++ ++enum { ++ CTL_SMC_INFO = 1, // Sysctl files information ++ CTL_SMC_SWVER, // Driver Software Version Info ++ CTL_SMC_SWFDUP, // Switched Full Duplex Mode ++ CTL_SMC_EPHLOOP, // EPH Block Internal Loopback ++ CTL_SMC_MIIOP, // MII Operation ++ CTL_SMC_AUTONEG, // Auto-negotiate Mode ++ CTL_SMC_RFDUPLX, // Request Full Duplex Mode ++ CTL_SMC_RSPEED, // Request Speed Selection ++ CTL_SMC_AFDUPLX, // Actual Full Duplex Mode ++ CTL_SMC_ASPEED, // Actual Speed Selection ++ CTL_SMC_LNKFAIL, // Link Failed ++ CTL_SMC_FORCOL, // Force a Collision ++ CTL_SMC_FILTCAR, // Filter Carrier ++ CTL_SMC_FREEMEM, // Free Buffer Memory ++ CTL_SMC_TOTMEM, // Total Buffer Memory ++ CTL_SMC_LEDA, // Output of LED-A ++ CTL_SMC_LEDB, // Output of LED-B ++ CTL_SMC_CHIPREV, // LAN91C111 Chip Revision ID ++#ifdef SMC_DEBUG ++ // Register access for debugging ++ CTL_SMC_REG_BSR, // Bank Select ++ CTL_SMC_REG_TCR, // Transmit Control ++ CTL_SMC_REG_ESR, // EPH Status ++ CTL_SMC_REG_RCR, // Receive Control ++ CTL_SMC_REG_CTRR, // Counter ++ CTL_SMC_REG_MIR, // Memory Information ++ CTL_SMC_REG_RPCR, // Receive/Phy Control ++ CTL_SMC_REG_CFGR, // Configuration ++ CTL_SMC_REG_BAR, // Base Address ++ CTL_SMC_REG_IAR0, // Individual Address 0 ++ CTL_SMC_REG_IAR1, // Individual Address 1 ++ CTL_SMC_REG_IAR2, // Individual Address 2 ++ CTL_SMC_REG_GPR, // General Purpose ++ CTL_SMC_REG_CTLR, // Control ++ CTL_SMC_REG_MCR, // MMU Command ++ CTL_SMC_REG_PNR, // Packet Number ++ CTL_SMC_REG_FPR, // FIFO Ports ++ CTL_SMC_REG_PTR, // Pointer ++ CTL_SMC_REG_DR, // Data ++ CTL_SMC_REG_ISR, // Interrupt Status ++ CTL_SMC_REG_MTR1, // Multicast Table Entry 1 ++ CTL_SMC_REG_MTR2, // Multicast Table Entry 2 ++ CTL_SMC_REG_MTR3, // Multicast Table Entry 3 ++ CTL_SMC_REG_MTR4, // Multicast Table Entry 4 ++ CTL_SMC_REG_MIIR, // Management Interface ++ CTL_SMC_REG_REVR, // Revision ++ CTL_SMC_REG_ERCVR, // Early RCV ++ CTL_SMC_REG_EXTR, // External ++ CTL_SMC_PHY_CTRL, // PHY Control ++ CTL_SMC_PHY_STAT, // PHY Status ++ CTL_SMC_PHY_ID1, // PHY ID1 ++ CTL_SMC_PHY_ID2, // PHY ID2 ++ CTL_SMC_PHY_ADC, // PHY Advertise Capability ++ CTL_SMC_PHY_REMC, // PHY Advertise Capability ++ CTL_SMC_PHY_CFG1, // PHY Configuration 1 ++ CTL_SMC_PHY_CFG2, // PHY Configuration 2 ++ CTL_SMC_PHY_INT, // PHY Interrupt/Status Output ++ CTL_SMC_PHY_MASK, // PHY Interrupt/Status Mask ++#endif ++ // --------------------------------------------------- ++ CTL_SMC_LAST_ENTRY // Add new entries above the line ++}; ++ ++#endif // CONFIG_SYSCTL ++ ++#endif /* _SMC_91111_H_ */ ++ ++ +--- linux/drivers/xenomai/net/drivers/e1000e/phy.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/e1000e/phy.c 2021-04-07 16:01:27.229634163 +0800 +@@ -0,0 +1,3381 @@ ++/******************************************************************************* ++ ++ Intel PRO/1000 Linux driver ++ Copyright(c) 1999 - 2011 Intel Corporation. ++ ++ This program is free software; you can redistribute it and/or modify it ++ under the terms and conditions of the GNU General Public License, ++ version 2, as published by the Free Software Foundation. ++ ++ This program is distributed in the hope it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ You should have received a copy of the GNU General Public License along with ++ this program; if not, write to the Free Software Foundation, Inc., ++ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. ++ ++ The full GNU General Public License is included in this distribution in ++ the file called "COPYING". ++ ++ Contact Information: ++ Linux NICS ++ e1000-devel Mailing List ++ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 ++ ++*******************************************************************************/ ++ ++#include ++ ++#include "e1000.h" ++ ++static s32 e1000_get_phy_cfg_done(struct e1000_hw *hw); ++static s32 e1000_phy_force_speed_duplex(struct e1000_hw *hw); ++static s32 e1000_set_d0_lplu_state(struct e1000_hw *hw, bool active); ++static s32 e1000_wait_autoneg(struct e1000_hw *hw); ++static u32 e1000_get_phy_addr_for_bm_page(u32 page, u32 reg); ++static s32 e1000_access_phy_wakeup_reg_bm(struct e1000_hw *hw, u32 offset, ++ u16 *data, bool read, bool page_set); ++static u32 e1000_get_phy_addr_for_hv_page(u32 page); ++static s32 e1000_access_phy_debug_regs_hv(struct e1000_hw *hw, u32 offset, ++ u16 *data, bool read); ++ ++/* Cable length tables */ ++static const u16 e1000_m88_cable_length_table[] = { ++ 0, 50, 80, 110, 140, 140, E1000_CABLE_LENGTH_UNDEFINED }; ++#define M88E1000_CABLE_LENGTH_TABLE_SIZE \ ++ ARRAY_SIZE(e1000_m88_cable_length_table) ++ ++static const u16 e1000_igp_2_cable_length_table[] = { ++ 0, 0, 0, 0, 0, 0, 0, 0, 3, 5, 8, 11, 13, 16, 18, 21, 0, 0, 0, 3, ++ 6, 10, 13, 16, 19, 23, 26, 29, 32, 35, 38, 41, 6, 10, 14, 18, 22, ++ 26, 30, 33, 37, 41, 44, 48, 51, 54, 58, 61, 21, 26, 31, 35, 40, ++ 44, 49, 53, 57, 61, 65, 68, 72, 75, 79, 82, 40, 45, 51, 56, 61, ++ 66, 70, 75, 79, 83, 87, 91, 94, 98, 101, 104, 60, 66, 72, 77, 82, ++ 87, 92, 96, 100, 104, 108, 111, 114, 117, 119, 121, 83, 89, 95, ++ 100, 105, 109, 113, 116, 119, 122, 124, 104, 109, 114, 118, 121, ++ 124}; ++#define IGP02E1000_CABLE_LENGTH_TABLE_SIZE \ ++ ARRAY_SIZE(e1000_igp_2_cable_length_table) ++ ++#define BM_PHY_REG_PAGE(offset) \ ++ ((u16)(((offset) >> PHY_PAGE_SHIFT) & 0xFFFF)) ++#define BM_PHY_REG_NUM(offset) \ ++ ((u16)(((offset) & MAX_PHY_REG_ADDRESS) |\ ++ (((offset) >> (PHY_UPPER_SHIFT - PHY_PAGE_SHIFT)) &\ ++ ~MAX_PHY_REG_ADDRESS))) ++ ++#define HV_INTC_FC_PAGE_START 768 ++#define I82578_ADDR_REG 29 ++#define I82577_ADDR_REG 16 ++#define I82577_CFG_REG 22 ++#define I82577_CFG_ASSERT_CRS_ON_TX (1 << 15) ++#define I82577_CFG_ENABLE_DOWNSHIFT (3 << 10) /* auto downshift 100/10 */ ++#define I82577_CTRL_REG 23 ++ ++/* 82577 specific PHY registers */ ++#define I82577_PHY_CTRL_2 18 ++#define I82577_PHY_STATUS_2 26 ++#define I82577_PHY_DIAG_STATUS 31 ++ ++/* I82577 PHY Status 2 */ ++#define I82577_PHY_STATUS2_REV_POLARITY 0x0400 ++#define I82577_PHY_STATUS2_MDIX 0x0800 ++#define I82577_PHY_STATUS2_SPEED_MASK 0x0300 ++#define I82577_PHY_STATUS2_SPEED_1000MBPS 0x0200 ++ ++/* I82577 PHY Control 2 */ ++#define I82577_PHY_CTRL2_AUTO_MDIX 0x0400 ++#define I82577_PHY_CTRL2_FORCE_MDI_MDIX 0x0200 ++ ++/* I82577 PHY Diagnostics Status */ ++#define I82577_DSTATUS_CABLE_LENGTH 0x03FC ++#define I82577_DSTATUS_CABLE_LENGTH_SHIFT 2 ++ ++/* BM PHY Copper Specific Control 1 */ ++#define BM_CS_CTRL1 16 ++ ++#define HV_MUX_DATA_CTRL PHY_REG(776, 16) ++#define HV_MUX_DATA_CTRL_GEN_TO_MAC 0x0400 ++#define HV_MUX_DATA_CTRL_FORCE_SPEED 0x0004 ++ ++/** ++ * e1000e_check_reset_block_generic - Check if PHY reset is blocked ++ * @hw: pointer to the HW structure ++ * ++ * Read the PHY management control register and check whether a PHY reset ++ * is blocked. If a reset is not blocked return 0, otherwise ++ * return E1000_BLK_PHY_RESET (12). ++ **/ ++s32 e1000e_check_reset_block_generic(struct e1000_hw *hw) ++{ ++ u32 manc; ++ ++ manc = er32(MANC); ++ ++ return (manc & E1000_MANC_BLK_PHY_RST_ON_IDE) ? ++ E1000_BLK_PHY_RESET : 0; ++} ++ ++/** ++ * e1000e_get_phy_id - Retrieve the PHY ID and revision ++ * @hw: pointer to the HW structure ++ * ++ * Reads the PHY registers and stores the PHY ID and possibly the PHY ++ * revision in the hardware structure. ++ **/ ++s32 e1000e_get_phy_id(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val = 0; ++ u16 phy_id; ++ u16 retry_count = 0; ++ ++ if (!(phy->ops.read_reg)) ++ goto out; ++ ++ while (retry_count < 2) { ++ ret_val = e1e_rphy(hw, PHY_ID1, &phy_id); ++ if (ret_val) ++ goto out; ++ ++ phy->id = (u32)(phy_id << 16); ++ udelay(20); ++ ret_val = e1e_rphy(hw, PHY_ID2, &phy_id); ++ if (ret_val) ++ goto out; ++ ++ phy->id |= (u32)(phy_id & PHY_REVISION_MASK); ++ phy->revision = (u32)(phy_id & ~PHY_REVISION_MASK); ++ ++ if (phy->id != 0 && phy->id != PHY_REVISION_MASK) ++ goto out; ++ ++ retry_count++; ++ } ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000e_phy_reset_dsp - Reset PHY DSP ++ * @hw: pointer to the HW structure ++ * ++ * Reset the digital signal processor. ++ **/ ++s32 e1000e_phy_reset_dsp(struct e1000_hw *hw) ++{ ++ s32 ret_val; ++ ++ ret_val = e1e_wphy(hw, M88E1000_PHY_GEN_CONTROL, 0xC1); ++ if (ret_val) ++ return ret_val; ++ ++ return e1e_wphy(hw, M88E1000_PHY_GEN_CONTROL, 0); ++} ++ ++/** ++ * e1000e_read_phy_reg_mdic - Read MDI control register ++ * @hw: pointer to the HW structure ++ * @offset: register offset to be read ++ * @data: pointer to the read data ++ * ++ * Reads the MDI control register in the PHY at offset and stores the ++ * information read to data. ++ **/ ++s32 e1000e_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ u32 i, mdic = 0; ++ ++ if (offset > MAX_PHY_REG_ADDRESS) { ++ e_dbg("PHY Address %d is out of range\n", offset); ++ return -E1000_ERR_PARAM; ++ } ++ ++ /* ++ * Set up Op-code, Phy Address, and register offset in the MDI ++ * Control register. The MAC will take care of interfacing with the ++ * PHY to retrieve the desired data. ++ */ ++ mdic = ((offset << E1000_MDIC_REG_SHIFT) | ++ (phy->addr << E1000_MDIC_PHY_SHIFT) | ++ (E1000_MDIC_OP_READ)); ++ ++ ew32(MDIC, mdic); ++ ++ /* ++ * Poll the ready bit to see if the MDI read completed ++ * Increasing the time out as testing showed failures with ++ * the lower time out ++ */ ++ for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) { ++ udelay(50); ++ mdic = er32(MDIC); ++ if (mdic & E1000_MDIC_READY) ++ break; ++ } ++ if (!(mdic & E1000_MDIC_READY)) { ++ e_dbg("MDI Read did not complete\n"); ++ return -E1000_ERR_PHY; ++ } ++ if (mdic & E1000_MDIC_ERROR) { ++ e_dbg("MDI Error\n"); ++ return -E1000_ERR_PHY; ++ } ++ *data = (u16) mdic; ++ ++ /* ++ * Allow some time after each MDIC transaction to avoid ++ * reading duplicate data in the next MDIC transaction. ++ */ ++ if (hw->mac.type == e1000_pch2lan) ++ udelay(100); ++ ++ return 0; ++} ++ ++/** ++ * e1000e_write_phy_reg_mdic - Write MDI control register ++ * @hw: pointer to the HW structure ++ * @offset: register offset to write to ++ * @data: data to write to register at offset ++ * ++ * Writes data to MDI control register in the PHY at offset. ++ **/ ++s32 e1000e_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ u32 i, mdic = 0; ++ ++ if (offset > MAX_PHY_REG_ADDRESS) { ++ e_dbg("PHY Address %d is out of range\n", offset); ++ return -E1000_ERR_PARAM; ++ } ++ ++ /* ++ * Set up Op-code, Phy Address, and register offset in the MDI ++ * Control register. The MAC will take care of interfacing with the ++ * PHY to retrieve the desired data. ++ */ ++ mdic = (((u32)data) | ++ (offset << E1000_MDIC_REG_SHIFT) | ++ (phy->addr << E1000_MDIC_PHY_SHIFT) | ++ (E1000_MDIC_OP_WRITE)); ++ ++ ew32(MDIC, mdic); ++ ++ /* ++ * Poll the ready bit to see if the MDI read completed ++ * Increasing the time out as testing showed failures with ++ * the lower time out ++ */ ++ for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) { ++ udelay(50); ++ mdic = er32(MDIC); ++ if (mdic & E1000_MDIC_READY) ++ break; ++ } ++ if (!(mdic & E1000_MDIC_READY)) { ++ e_dbg("MDI Write did not complete\n"); ++ return -E1000_ERR_PHY; ++ } ++ if (mdic & E1000_MDIC_ERROR) { ++ e_dbg("MDI Error\n"); ++ return -E1000_ERR_PHY; ++ } ++ ++ /* ++ * Allow some time after each MDIC transaction to avoid ++ * reading duplicate data in the next MDIC transaction. ++ */ ++ if (hw->mac.type == e1000_pch2lan) ++ udelay(100); ++ ++ return 0; ++} ++ ++/** ++ * e1000e_read_phy_reg_m88 - Read m88 PHY register ++ * @hw: pointer to the HW structure ++ * @offset: register offset to be read ++ * @data: pointer to the read data ++ * ++ * Acquires semaphore, if necessary, then reads the PHY register at offset ++ * and storing the retrieved information in data. Release any acquired ++ * semaphores before exiting. ++ **/ ++s32 e1000e_read_phy_reg_m88(struct e1000_hw *hw, u32 offset, u16 *data) ++{ ++ s32 ret_val; ++ ++ ret_val = hw->phy.ops.acquire(hw); ++ if (ret_val) ++ return ret_val; ++ ++ ret_val = e1000e_read_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & offset, ++ data); ++ ++ hw->phy.ops.release(hw); ++ ++ return ret_val; ++} ++ ++/** ++ * e1000e_write_phy_reg_m88 - Write m88 PHY register ++ * @hw: pointer to the HW structure ++ * @offset: register offset to write to ++ * @data: data to write at register offset ++ * ++ * Acquires semaphore, if necessary, then writes the data to PHY register ++ * at the offset. Release any acquired semaphores before exiting. ++ **/ ++s32 e1000e_write_phy_reg_m88(struct e1000_hw *hw, u32 offset, u16 data) ++{ ++ s32 ret_val; ++ ++ ret_val = hw->phy.ops.acquire(hw); ++ if (ret_val) ++ return ret_val; ++ ++ ret_val = e1000e_write_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & offset, ++ data); ++ ++ hw->phy.ops.release(hw); ++ ++ return ret_val; ++} ++ ++/** ++ * @brief Set page as on IGP-like PHY(s) ++ * @param hw pointer to the HW structure ++ * @param page page to set (shifted left when necessary) ++ * ++ * Sets PHY page required for PHY register access. Assumes semaphore is ++ * already acquired. Note, this function sets phy.addr to 1 so the caller ++ * must set it appropriately (if necessary) after this function returns. ++ */ ++s32 e1000_set_page_igp(struct e1000_hw *hw, u16 page) ++{ ++ e_dbg("Setting page 0x%x\n", page); ++ ++ hw->phy.addr = 1; ++ ++ return e1000e_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT, page); ++} ++ ++/** ++ * __e1000e_read_phy_reg_igp - Read igp PHY register ++ * @hw: pointer to the HW structure ++ * @offset: register offset to be read ++ * @data: pointer to the read data ++ * @locked: semaphore has already been acquired or not ++ * ++ * Acquires semaphore, if necessary, then reads the PHY register at offset ++ * and stores the retrieved information in data. Release any acquired ++ * semaphores before exiting. ++ **/ ++static s32 __e1000e_read_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 *data, ++ bool locked) ++{ ++ s32 ret_val = 0; ++ ++ if (!locked) { ++ if (!(hw->phy.ops.acquire)) ++ goto out; ++ ++ ret_val = hw->phy.ops.acquire(hw); ++ if (ret_val) ++ goto out; ++ } ++ ++ if (offset > MAX_PHY_MULTI_PAGE_REG) { ++ ret_val = e1000e_write_phy_reg_mdic(hw, ++ IGP01E1000_PHY_PAGE_SELECT, ++ (u16)offset); ++ if (ret_val) ++ goto release; ++ } ++ ++ ret_val = e1000e_read_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & offset, ++ data); ++ ++release: ++ if (!locked) ++ hw->phy.ops.release(hw); ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000e_read_phy_reg_igp - Read igp PHY register ++ * @hw: pointer to the HW structure ++ * @offset: register offset to be read ++ * @data: pointer to the read data ++ * ++ * Acquires semaphore then reads the PHY register at offset and stores the ++ * retrieved information in data. ++ * Release the acquired semaphore before exiting. ++ **/ ++s32 e1000e_read_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 *data) ++{ ++ return __e1000e_read_phy_reg_igp(hw, offset, data, false); ++} ++ ++/** ++ * e1000e_read_phy_reg_igp_locked - Read igp PHY register ++ * @hw: pointer to the HW structure ++ * @offset: register offset to be read ++ * @data: pointer to the read data ++ * ++ * Reads the PHY register at offset and stores the retrieved information ++ * in data. Assumes semaphore already acquired. ++ **/ ++s32 e1000e_read_phy_reg_igp_locked(struct e1000_hw *hw, u32 offset, u16 *data) ++{ ++ return __e1000e_read_phy_reg_igp(hw, offset, data, true); ++} ++ ++/** ++ * e1000e_write_phy_reg_igp - Write igp PHY register ++ * @hw: pointer to the HW structure ++ * @offset: register offset to write to ++ * @data: data to write at register offset ++ * @locked: semaphore has already been acquired or not ++ * ++ * Acquires semaphore, if necessary, then writes the data to PHY register ++ * at the offset. Release any acquired semaphores before exiting. ++ **/ ++static s32 __e1000e_write_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 data, ++ bool locked) ++{ ++ s32 ret_val = 0; ++ ++ if (!locked) { ++ if (!(hw->phy.ops.acquire)) ++ goto out; ++ ++ ret_val = hw->phy.ops.acquire(hw); ++ if (ret_val) ++ goto out; ++ } ++ ++ if (offset > MAX_PHY_MULTI_PAGE_REG) { ++ ret_val = e1000e_write_phy_reg_mdic(hw, ++ IGP01E1000_PHY_PAGE_SELECT, ++ (u16)offset); ++ if (ret_val) ++ goto release; ++ } ++ ++ ret_val = e1000e_write_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & offset, ++ data); ++ ++release: ++ if (!locked) ++ hw->phy.ops.release(hw); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000e_write_phy_reg_igp - Write igp PHY register ++ * @hw: pointer to the HW structure ++ * @offset: register offset to write to ++ * @data: data to write at register offset ++ * ++ * Acquires semaphore then writes the data to PHY register ++ * at the offset. Release any acquired semaphores before exiting. ++ **/ ++s32 e1000e_write_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 data) ++{ ++ return __e1000e_write_phy_reg_igp(hw, offset, data, false); ++} ++ ++/** ++ * e1000e_write_phy_reg_igp_locked - Write igp PHY register ++ * @hw: pointer to the HW structure ++ * @offset: register offset to write to ++ * @data: data to write at register offset ++ * ++ * Writes the data to PHY register at the offset. ++ * Assumes semaphore already acquired. ++ **/ ++s32 e1000e_write_phy_reg_igp_locked(struct e1000_hw *hw, u32 offset, u16 data) ++{ ++ return __e1000e_write_phy_reg_igp(hw, offset, data, true); ++} ++ ++/** ++ * __e1000_read_kmrn_reg - Read kumeran register ++ * @hw: pointer to the HW structure ++ * @offset: register offset to be read ++ * @data: pointer to the read data ++ * @locked: semaphore has already been acquired or not ++ * ++ * Acquires semaphore, if necessary. Then reads the PHY register at offset ++ * using the kumeran interface. The information retrieved is stored in data. ++ * Release any acquired semaphores before exiting. ++ **/ ++static s32 __e1000_read_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 *data, ++ bool locked) ++{ ++ u32 kmrnctrlsta; ++ s32 ret_val = 0; ++ ++ if (!locked) { ++ if (!(hw->phy.ops.acquire)) ++ goto out; ++ ++ ret_val = hw->phy.ops.acquire(hw); ++ if (ret_val) ++ goto out; ++ } ++ ++ kmrnctrlsta = ((offset << E1000_KMRNCTRLSTA_OFFSET_SHIFT) & ++ E1000_KMRNCTRLSTA_OFFSET) | E1000_KMRNCTRLSTA_REN; ++ ew32(KMRNCTRLSTA, kmrnctrlsta); ++ e1e_flush(); ++ ++ udelay(2); ++ ++ kmrnctrlsta = er32(KMRNCTRLSTA); ++ *data = (u16)kmrnctrlsta; ++ ++ if (!locked) ++ hw->phy.ops.release(hw); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000e_read_kmrn_reg - Read kumeran register ++ * @hw: pointer to the HW structure ++ * @offset: register offset to be read ++ * @data: pointer to the read data ++ * ++ * Acquires semaphore then reads the PHY register at offset using the ++ * kumeran interface. The information retrieved is stored in data. ++ * Release the acquired semaphore before exiting. ++ **/ ++s32 e1000e_read_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 *data) ++{ ++ return __e1000_read_kmrn_reg(hw, offset, data, false); ++} ++ ++/** ++ * e1000e_read_kmrn_reg_locked - Read kumeran register ++ * @hw: pointer to the HW structure ++ * @offset: register offset to be read ++ * @data: pointer to the read data ++ * ++ * Reads the PHY register at offset using the kumeran interface. The ++ * information retrieved is stored in data. ++ * Assumes semaphore already acquired. ++ **/ ++s32 e1000e_read_kmrn_reg_locked(struct e1000_hw *hw, u32 offset, u16 *data) ++{ ++ return __e1000_read_kmrn_reg(hw, offset, data, true); ++} ++ ++/** ++ * __e1000_write_kmrn_reg - Write kumeran register ++ * @hw: pointer to the HW structure ++ * @offset: register offset to write to ++ * @data: data to write at register offset ++ * @locked: semaphore has already been acquired or not ++ * ++ * Acquires semaphore, if necessary. Then write the data to PHY register ++ * at the offset using the kumeran interface. Release any acquired semaphores ++ * before exiting. ++ **/ ++static s32 __e1000_write_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 data, ++ bool locked) ++{ ++ u32 kmrnctrlsta; ++ s32 ret_val = 0; ++ ++ if (!locked) { ++ if (!(hw->phy.ops.acquire)) ++ goto out; ++ ++ ret_val = hw->phy.ops.acquire(hw); ++ if (ret_val) ++ goto out; ++ } ++ ++ kmrnctrlsta = ((offset << E1000_KMRNCTRLSTA_OFFSET_SHIFT) & ++ E1000_KMRNCTRLSTA_OFFSET) | data; ++ ew32(KMRNCTRLSTA, kmrnctrlsta); ++ e1e_flush(); ++ ++ udelay(2); ++ ++ if (!locked) ++ hw->phy.ops.release(hw); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000e_write_kmrn_reg - Write kumeran register ++ * @hw: pointer to the HW structure ++ * @offset: register offset to write to ++ * @data: data to write at register offset ++ * ++ * Acquires semaphore then writes the data to the PHY register at the offset ++ * using the kumeran interface. Release the acquired semaphore before exiting. ++ **/ ++s32 e1000e_write_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 data) ++{ ++ return __e1000_write_kmrn_reg(hw, offset, data, false); ++} ++ ++/** ++ * e1000e_write_kmrn_reg_locked - Write kumeran register ++ * @hw: pointer to the HW structure ++ * @offset: register offset to write to ++ * @data: data to write at register offset ++ * ++ * Write the data to PHY register at the offset using the kumeran interface. ++ * Assumes semaphore already acquired. ++ **/ ++s32 e1000e_write_kmrn_reg_locked(struct e1000_hw *hw, u32 offset, u16 data) ++{ ++ return __e1000_write_kmrn_reg(hw, offset, data, true); ++} ++ ++/** ++ * e1000_copper_link_setup_82577 - Setup 82577 PHY for copper link ++ * @hw: pointer to the HW structure ++ * ++ * Sets up Carrier-sense on Transmit and downshift values. ++ **/ ++s32 e1000_copper_link_setup_82577(struct e1000_hw *hw) ++{ ++ s32 ret_val; ++ u16 phy_data; ++ ++ /* Enable CRS on Tx. This must be set for half-duplex operation. */ ++ ret_val = e1e_rphy(hw, I82577_CFG_REG, &phy_data); ++ if (ret_val) ++ goto out; ++ ++ phy_data |= I82577_CFG_ASSERT_CRS_ON_TX; ++ ++ /* Enable downshift */ ++ phy_data |= I82577_CFG_ENABLE_DOWNSHIFT; ++ ++ ret_val = e1e_wphy(hw, I82577_CFG_REG, phy_data); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000e_copper_link_setup_m88 - Setup m88 PHY's for copper link ++ * @hw: pointer to the HW structure ++ * ++ * Sets up MDI/MDI-X and polarity for m88 PHY's. If necessary, transmit clock ++ * and downshift values are set also. ++ **/ ++s32 e1000e_copper_link_setup_m88(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u16 phy_data; ++ ++ /* Enable CRS on Tx. This must be set for half-duplex operation. */ ++ ret_val = e1e_rphy(hw, M88E1000_PHY_SPEC_CTRL, &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ /* For BM PHY this bit is downshift enable */ ++ if (phy->type != e1000_phy_bm) ++ phy_data |= M88E1000_PSCR_ASSERT_CRS_ON_TX; ++ ++ /* ++ * Options: ++ * MDI/MDI-X = 0 (default) ++ * 0 - Auto for all speeds ++ * 1 - MDI mode ++ * 2 - MDI-X mode ++ * 3 - Auto for 1000Base-T only (MDI-X for 10/100Base-T modes) ++ */ ++ phy_data &= ~M88E1000_PSCR_AUTO_X_MODE; ++ ++ switch (phy->mdix) { ++ case 1: ++ phy_data |= M88E1000_PSCR_MDI_MANUAL_MODE; ++ break; ++ case 2: ++ phy_data |= M88E1000_PSCR_MDIX_MANUAL_MODE; ++ break; ++ case 3: ++ phy_data |= M88E1000_PSCR_AUTO_X_1000T; ++ break; ++ case 0: ++ default: ++ phy_data |= M88E1000_PSCR_AUTO_X_MODE; ++ break; ++ } ++ ++ /* ++ * Options: ++ * disable_polarity_correction = 0 (default) ++ * Automatic Correction for Reversed Cable Polarity ++ * 0 - Disabled ++ * 1 - Enabled ++ */ ++ phy_data &= ~M88E1000_PSCR_POLARITY_REVERSAL; ++ if (phy->disable_polarity_correction == 1) ++ phy_data |= M88E1000_PSCR_POLARITY_REVERSAL; ++ ++ /* Enable downshift on BM (disabled by default) */ ++ if (phy->type == e1000_phy_bm) ++ phy_data |= BME1000_PSCR_ENABLE_DOWNSHIFT; ++ ++ ret_val = e1e_wphy(hw, M88E1000_PHY_SPEC_CTRL, phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ if ((phy->type == e1000_phy_m88) && ++ (phy->revision < E1000_REVISION_4) && ++ (phy->id != BME1000_E_PHY_ID_R2)) { ++ /* ++ * Force TX_CLK in the Extended PHY Specific Control Register ++ * to 25MHz clock. ++ */ ++ ret_val = e1e_rphy(hw, M88E1000_EXT_PHY_SPEC_CTRL, &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ phy_data |= M88E1000_EPSCR_TX_CLK_25; ++ ++ if ((phy->revision == 2) && ++ (phy->id == M88E1111_I_PHY_ID)) { ++ /* 82573L PHY - set the downshift counter to 5x. */ ++ phy_data &= ~M88EC018_EPSCR_DOWNSHIFT_COUNTER_MASK; ++ phy_data |= M88EC018_EPSCR_DOWNSHIFT_COUNTER_5X; ++ } else { ++ /* Configure Master and Slave downshift values */ ++ phy_data &= ~(M88E1000_EPSCR_MASTER_DOWNSHIFT_MASK | ++ M88E1000_EPSCR_SLAVE_DOWNSHIFT_MASK); ++ phy_data |= (M88E1000_EPSCR_MASTER_DOWNSHIFT_1X | ++ M88E1000_EPSCR_SLAVE_DOWNSHIFT_1X); ++ } ++ ret_val = e1e_wphy(hw, M88E1000_EXT_PHY_SPEC_CTRL, phy_data); ++ if (ret_val) ++ return ret_val; ++ } ++ ++ if ((phy->type == e1000_phy_bm) && (phy->id == BME1000_E_PHY_ID_R2)) { ++ /* Set PHY page 0, register 29 to 0x0003 */ ++ ret_val = e1e_wphy(hw, 29, 0x0003); ++ if (ret_val) ++ return ret_val; ++ ++ /* Set PHY page 0, register 30 to 0x0000 */ ++ ret_val = e1e_wphy(hw, 30, 0x0000); ++ if (ret_val) ++ return ret_val; ++ } ++ ++ /* Commit the changes. */ ++ ret_val = e1000e_commit_phy(hw); ++ if (ret_val) { ++ e_dbg("Error committing the PHY changes\n"); ++ return ret_val; ++ } ++ ++ if (phy->type == e1000_phy_82578) { ++ ret_val = e1e_rphy(hw, M88E1000_EXT_PHY_SPEC_CTRL, &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ /* 82578 PHY - set the downshift count to 1x. */ ++ phy_data |= I82578_EPSCR_DOWNSHIFT_ENABLE; ++ phy_data &= ~I82578_EPSCR_DOWNSHIFT_COUNTER_MASK; ++ ret_val = e1e_wphy(hw, M88E1000_EXT_PHY_SPEC_CTRL, phy_data); ++ if (ret_val) ++ return ret_val; ++ } ++ ++ return 0; ++} ++ ++/** ++ * e1000e_copper_link_setup_igp - Setup igp PHY's for copper link ++ * @hw: pointer to the HW structure ++ * ++ * Sets up LPLU, MDI/MDI-X, polarity, Smartspeed and Master/Slave config for ++ * igp PHY's. ++ **/ ++s32 e1000e_copper_link_setup_igp(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u16 data; ++ ++ ret_val = e1000_phy_hw_reset(hw); ++ if (ret_val) { ++ e_dbg("Error resetting the PHY.\n"); ++ return ret_val; ++ } ++ ++ /* ++ * Wait 100ms for MAC to configure PHY from NVM settings, to avoid ++ * timeout issues when LFS is enabled. ++ */ ++ msleep(100); ++ ++ /* disable lplu d0 during driver init */ ++ ret_val = e1000_set_d0_lplu_state(hw, false); ++ if (ret_val) { ++ e_dbg("Error Disabling LPLU D0\n"); ++ return ret_val; ++ } ++ /* Configure mdi-mdix settings */ ++ ret_val = e1e_rphy(hw, IGP01E1000_PHY_PORT_CTRL, &data); ++ if (ret_val) ++ return ret_val; ++ ++ data &= ~IGP01E1000_PSCR_AUTO_MDIX; ++ ++ switch (phy->mdix) { ++ case 1: ++ data &= ~IGP01E1000_PSCR_FORCE_MDI_MDIX; ++ break; ++ case 2: ++ data |= IGP01E1000_PSCR_FORCE_MDI_MDIX; ++ break; ++ case 0: ++ default: ++ data |= IGP01E1000_PSCR_AUTO_MDIX; ++ break; ++ } ++ ret_val = e1e_wphy(hw, IGP01E1000_PHY_PORT_CTRL, data); ++ if (ret_val) ++ return ret_val; ++ ++ /* set auto-master slave resolution settings */ ++ if (hw->mac.autoneg) { ++ /* ++ * when autonegotiation advertisement is only 1000Mbps then we ++ * should disable SmartSpeed and enable Auto MasterSlave ++ * resolution as hardware default. ++ */ ++ if (phy->autoneg_advertised == ADVERTISE_1000_FULL) { ++ /* Disable SmartSpeed */ ++ ret_val = e1e_rphy(hw, IGP01E1000_PHY_PORT_CONFIG, ++ &data); ++ if (ret_val) ++ return ret_val; ++ ++ data &= ~IGP01E1000_PSCFR_SMART_SPEED; ++ ret_val = e1e_wphy(hw, IGP01E1000_PHY_PORT_CONFIG, ++ data); ++ if (ret_val) ++ return ret_val; ++ ++ /* Set auto Master/Slave resolution process */ ++ ret_val = e1e_rphy(hw, PHY_1000T_CTRL, &data); ++ if (ret_val) ++ return ret_val; ++ ++ data &= ~CR_1000T_MS_ENABLE; ++ ret_val = e1e_wphy(hw, PHY_1000T_CTRL, data); ++ if (ret_val) ++ return ret_val; ++ } ++ ++ ret_val = e1e_rphy(hw, PHY_1000T_CTRL, &data); ++ if (ret_val) ++ return ret_val; ++ ++ /* load defaults for future use */ ++ phy->original_ms_type = (data & CR_1000T_MS_ENABLE) ? ++ ((data & CR_1000T_MS_VALUE) ? ++ e1000_ms_force_master : ++ e1000_ms_force_slave) : ++ e1000_ms_auto; ++ ++ switch (phy->ms_type) { ++ case e1000_ms_force_master: ++ data |= (CR_1000T_MS_ENABLE | CR_1000T_MS_VALUE); ++ break; ++ case e1000_ms_force_slave: ++ data |= CR_1000T_MS_ENABLE; ++ data &= ~(CR_1000T_MS_VALUE); ++ break; ++ case e1000_ms_auto: ++ data &= ~CR_1000T_MS_ENABLE; ++ default: ++ break; ++ } ++ ret_val = e1e_wphy(hw, PHY_1000T_CTRL, data); ++ } ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_phy_setup_autoneg - Configure PHY for auto-negotiation ++ * @hw: pointer to the HW structure ++ * ++ * Reads the MII auto-neg advertisement register and/or the 1000T control ++ * register and if the PHY is already setup for auto-negotiation, then ++ * return successful. Otherwise, setup advertisement and flow control to ++ * the appropriate values for the wanted auto-negotiation. ++ **/ ++static s32 e1000_phy_setup_autoneg(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u16 mii_autoneg_adv_reg; ++ u16 mii_1000t_ctrl_reg = 0; ++ ++ phy->autoneg_advertised &= phy->autoneg_mask; ++ ++ /* Read the MII Auto-Neg Advertisement Register (Address 4). */ ++ ret_val = e1e_rphy(hw, PHY_AUTONEG_ADV, &mii_autoneg_adv_reg); ++ if (ret_val) ++ return ret_val; ++ ++ if (phy->autoneg_mask & ADVERTISE_1000_FULL) { ++ /* Read the MII 1000Base-T Control Register (Address 9). */ ++ ret_val = e1e_rphy(hw, PHY_1000T_CTRL, &mii_1000t_ctrl_reg); ++ if (ret_val) ++ return ret_val; ++ } ++ ++ /* ++ * Need to parse both autoneg_advertised and fc and set up ++ * the appropriate PHY registers. First we will parse for ++ * autoneg_advertised software override. Since we can advertise ++ * a plethora of combinations, we need to check each bit ++ * individually. ++ */ ++ ++ /* ++ * First we clear all the 10/100 mb speed bits in the Auto-Neg ++ * Advertisement Register (Address 4) and the 1000 mb speed bits in ++ * the 1000Base-T Control Register (Address 9). ++ */ ++ mii_autoneg_adv_reg &= ~(NWAY_AR_100TX_FD_CAPS | ++ NWAY_AR_100TX_HD_CAPS | ++ NWAY_AR_10T_FD_CAPS | ++ NWAY_AR_10T_HD_CAPS); ++ mii_1000t_ctrl_reg &= ~(CR_1000T_HD_CAPS | CR_1000T_FD_CAPS); ++ ++ e_dbg("autoneg_advertised %x\n", phy->autoneg_advertised); ++ ++ /* Do we want to advertise 10 Mb Half Duplex? */ ++ if (phy->autoneg_advertised & ADVERTISE_10_HALF) { ++ e_dbg("Advertise 10mb Half duplex\n"); ++ mii_autoneg_adv_reg |= NWAY_AR_10T_HD_CAPS; ++ } ++ ++ /* Do we want to advertise 10 Mb Full Duplex? */ ++ if (phy->autoneg_advertised & ADVERTISE_10_FULL) { ++ e_dbg("Advertise 10mb Full duplex\n"); ++ mii_autoneg_adv_reg |= NWAY_AR_10T_FD_CAPS; ++ } ++ ++ /* Do we want to advertise 100 Mb Half Duplex? */ ++ if (phy->autoneg_advertised & ADVERTISE_100_HALF) { ++ e_dbg("Advertise 100mb Half duplex\n"); ++ mii_autoneg_adv_reg |= NWAY_AR_100TX_HD_CAPS; ++ } ++ ++ /* Do we want to advertise 100 Mb Full Duplex? */ ++ if (phy->autoneg_advertised & ADVERTISE_100_FULL) { ++ e_dbg("Advertise 100mb Full duplex\n"); ++ mii_autoneg_adv_reg |= NWAY_AR_100TX_FD_CAPS; ++ } ++ ++ /* We do not allow the Phy to advertise 1000 Mb Half Duplex */ ++ if (phy->autoneg_advertised & ADVERTISE_1000_HALF) ++ e_dbg("Advertise 1000mb Half duplex request denied!\n"); ++ ++ /* Do we want to advertise 1000 Mb Full Duplex? */ ++ if (phy->autoneg_advertised & ADVERTISE_1000_FULL) { ++ e_dbg("Advertise 1000mb Full duplex\n"); ++ mii_1000t_ctrl_reg |= CR_1000T_FD_CAPS; ++ } ++ ++ /* ++ * Check for a software override of the flow control settings, and ++ * setup the PHY advertisement registers accordingly. If ++ * auto-negotiation is enabled, then software will have to set the ++ * "PAUSE" bits to the correct value in the Auto-Negotiation ++ * Advertisement Register (PHY_AUTONEG_ADV) and re-start auto- ++ * negotiation. ++ * ++ * The possible values of the "fc" parameter are: ++ * 0: Flow control is completely disabled ++ * 1: Rx flow control is enabled (we can receive pause frames ++ * but not send pause frames). ++ * 2: Tx flow control is enabled (we can send pause frames ++ * but we do not support receiving pause frames). ++ * 3: Both Rx and Tx flow control (symmetric) are enabled. ++ * other: No software override. The flow control configuration ++ * in the EEPROM is used. ++ */ ++ switch (hw->fc.current_mode) { ++ case e1000_fc_none: ++ /* ++ * Flow control (Rx & Tx) is completely disabled by a ++ * software over-ride. ++ */ ++ mii_autoneg_adv_reg &= ~(NWAY_AR_ASM_DIR | NWAY_AR_PAUSE); ++ break; ++ case e1000_fc_rx_pause: ++ /* ++ * Rx Flow control is enabled, and Tx Flow control is ++ * disabled, by a software over-ride. ++ * ++ * Since there really isn't a way to advertise that we are ++ * capable of Rx Pause ONLY, we will advertise that we ++ * support both symmetric and asymmetric Rx PAUSE. Later ++ * (in e1000e_config_fc_after_link_up) we will disable the ++ * hw's ability to send PAUSE frames. ++ */ ++ mii_autoneg_adv_reg |= (NWAY_AR_ASM_DIR | NWAY_AR_PAUSE); ++ break; ++ case e1000_fc_tx_pause: ++ /* ++ * Tx Flow control is enabled, and Rx Flow control is ++ * disabled, by a software over-ride. ++ */ ++ mii_autoneg_adv_reg |= NWAY_AR_ASM_DIR; ++ mii_autoneg_adv_reg &= ~NWAY_AR_PAUSE; ++ break; ++ case e1000_fc_full: ++ /* ++ * Flow control (both Rx and Tx) is enabled by a software ++ * over-ride. ++ */ ++ mii_autoneg_adv_reg |= (NWAY_AR_ASM_DIR | NWAY_AR_PAUSE); ++ break; ++ default: ++ e_dbg("Flow control param set incorrectly\n"); ++ ret_val = -E1000_ERR_CONFIG; ++ return ret_val; ++ } ++ ++ ret_val = e1e_wphy(hw, PHY_AUTONEG_ADV, mii_autoneg_adv_reg); ++ if (ret_val) ++ return ret_val; ++ ++ e_dbg("Auto-Neg Advertising %x\n", mii_autoneg_adv_reg); ++ ++ if (phy->autoneg_mask & ADVERTISE_1000_FULL) ++ ret_val = e1e_wphy(hw, PHY_1000T_CTRL, mii_1000t_ctrl_reg); ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_copper_link_autoneg - Setup/Enable autoneg for copper link ++ * @hw: pointer to the HW structure ++ * ++ * Performs initial bounds checking on autoneg advertisement parameter, then ++ * configure to advertise the full capability. Setup the PHY to autoneg ++ * and restart the negotiation process between the link partner. If ++ * autoneg_wait_to_complete, then wait for autoneg to complete before exiting. ++ **/ ++static s32 e1000_copper_link_autoneg(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u16 phy_ctrl; ++ ++ /* ++ * Perform some bounds checking on the autoneg advertisement ++ * parameter. ++ */ ++ phy->autoneg_advertised &= phy->autoneg_mask; ++ ++ /* ++ * If autoneg_advertised is zero, we assume it was not defaulted ++ * by the calling code so we set to advertise full capability. ++ */ ++ if (phy->autoneg_advertised == 0) ++ phy->autoneg_advertised = phy->autoneg_mask; ++ ++ e_dbg("Reconfiguring auto-neg advertisement params\n"); ++ ret_val = e1000_phy_setup_autoneg(hw); ++ if (ret_val) { ++ e_dbg("Error Setting up Auto-Negotiation\n"); ++ return ret_val; ++ } ++ e_dbg("Restarting Auto-Neg\n"); ++ ++ /* ++ * Restart auto-negotiation by setting the Auto Neg Enable bit and ++ * the Auto Neg Restart bit in the PHY control register. ++ */ ++ ret_val = e1e_rphy(hw, PHY_CONTROL, &phy_ctrl); ++ if (ret_val) ++ return ret_val; ++ ++ phy_ctrl |= (MII_CR_AUTO_NEG_EN | MII_CR_RESTART_AUTO_NEG); ++ ret_val = e1e_wphy(hw, PHY_CONTROL, phy_ctrl); ++ if (ret_val) ++ return ret_val; ++ ++ /* ++ * Does the user want to wait for Auto-Neg to complete here, or ++ * check at a later time (for example, callback routine). ++ */ ++ if (phy->autoneg_wait_to_complete) { ++ ret_val = e1000_wait_autoneg(hw); ++ if (ret_val) { ++ e_dbg("Error while waiting for " ++ "autoneg to complete\n"); ++ return ret_val; ++ } ++ } ++ ++ hw->mac.get_link_status = 1; ++ ++ return ret_val; ++} ++ ++/** ++ * e1000e_setup_copper_link - Configure copper link settings ++ * @hw: pointer to the HW structure ++ * ++ * Calls the appropriate function to configure the link for auto-neg or forced ++ * speed and duplex. Then we check for link, once link is established calls ++ * to configure collision distance and flow control are called. If link is ++ * not established, we return -E1000_ERR_PHY (-2). ++ **/ ++s32 e1000e_setup_copper_link(struct e1000_hw *hw) ++{ ++ s32 ret_val; ++ bool link; ++ ++ if (hw->mac.autoneg) { ++ /* ++ * Setup autoneg and flow control advertisement and perform ++ * autonegotiation. ++ */ ++ ret_val = e1000_copper_link_autoneg(hw); ++ if (ret_val) ++ return ret_val; ++ } else { ++ /* ++ * PHY will be set to 10H, 10F, 100H or 100F ++ * depending on user settings. ++ */ ++ e_dbg("Forcing Speed and Duplex\n"); ++ ret_val = e1000_phy_force_speed_duplex(hw); ++ if (ret_val) { ++ e_dbg("Error Forcing Speed and Duplex\n"); ++ return ret_val; ++ } ++ } ++ ++ /* ++ * Check link status. Wait up to 100 microseconds for link to become ++ * valid. ++ */ ++ ret_val = e1000e_phy_has_link_generic(hw, ++ COPPER_LINK_UP_LIMIT, ++ 10, ++ &link); ++ if (ret_val) ++ return ret_val; ++ ++ if (link) { ++ e_dbg("Valid link established!!!\n"); ++ e1000e_config_collision_dist(hw); ++ ret_val = e1000e_config_fc_after_link_up(hw); ++ } else { ++ e_dbg("Unable to establish link!!!\n"); ++ } ++ ++ return ret_val; ++} ++ ++/** ++ * e1000e_phy_force_speed_duplex_igp - Force speed/duplex for igp PHY ++ * @hw: pointer to the HW structure ++ * ++ * Calls the PHY setup function to force speed and duplex. Clears the ++ * auto-crossover to force MDI manually. Waits for link and returns ++ * successful if link up is successful, else -E1000_ERR_PHY (-2). ++ **/ ++s32 e1000e_phy_force_speed_duplex_igp(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u16 phy_data; ++ bool link; ++ ++ ret_val = e1e_rphy(hw, PHY_CONTROL, &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ e1000e_phy_force_speed_duplex_setup(hw, &phy_data); ++ ++ ret_val = e1e_wphy(hw, PHY_CONTROL, phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ /* ++ * Clear Auto-Crossover to force MDI manually. IGP requires MDI ++ * forced whenever speed and duplex are forced. ++ */ ++ ret_val = e1e_rphy(hw, IGP01E1000_PHY_PORT_CTRL, &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ phy_data &= ~IGP01E1000_PSCR_AUTO_MDIX; ++ phy_data &= ~IGP01E1000_PSCR_FORCE_MDI_MDIX; ++ ++ ret_val = e1e_wphy(hw, IGP01E1000_PHY_PORT_CTRL, phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ e_dbg("IGP PSCR: %X\n", phy_data); ++ ++ udelay(1); ++ ++ if (phy->autoneg_wait_to_complete) { ++ e_dbg("Waiting for forced speed/duplex link on IGP phy.\n"); ++ ++ ret_val = e1000e_phy_has_link_generic(hw, ++ PHY_FORCE_LIMIT, ++ 100000, ++ &link); ++ if (ret_val) ++ return ret_val; ++ ++ if (!link) ++ e_dbg("Link taking longer than expected.\n"); ++ ++ /* Try once more */ ++ ret_val = e1000e_phy_has_link_generic(hw, ++ PHY_FORCE_LIMIT, ++ 100000, ++ &link); ++ if (ret_val) ++ return ret_val; ++ } ++ ++ return ret_val; ++} ++ ++/** ++ * e1000e_phy_force_speed_duplex_m88 - Force speed/duplex for m88 PHY ++ * @hw: pointer to the HW structure ++ * ++ * Calls the PHY setup function to force speed and duplex. Clears the ++ * auto-crossover to force MDI manually. Resets the PHY to commit the ++ * changes. If time expires while waiting for link up, we reset the DSP. ++ * After reset, TX_CLK and CRS on Tx must be set. Return successful upon ++ * successful completion, else return corresponding error code. ++ **/ ++s32 e1000e_phy_force_speed_duplex_m88(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u16 phy_data; ++ bool link; ++ ++ /* ++ * Clear Auto-Crossover to force MDI manually. M88E1000 requires MDI ++ * forced whenever speed and duplex are forced. ++ */ ++ ret_val = e1e_rphy(hw, M88E1000_PHY_SPEC_CTRL, &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ phy_data &= ~M88E1000_PSCR_AUTO_X_MODE; ++ ret_val = e1e_wphy(hw, M88E1000_PHY_SPEC_CTRL, phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ e_dbg("M88E1000 PSCR: %X\n", phy_data); ++ ++ ret_val = e1e_rphy(hw, PHY_CONTROL, &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ e1000e_phy_force_speed_duplex_setup(hw, &phy_data); ++ ++ ret_val = e1e_wphy(hw, PHY_CONTROL, phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ /* Reset the phy to commit changes. */ ++ ret_val = e1000e_commit_phy(hw); ++ if (ret_val) ++ return ret_val; ++ ++ if (phy->autoneg_wait_to_complete) { ++ e_dbg("Waiting for forced speed/duplex link on M88 phy.\n"); ++ ++ ret_val = e1000e_phy_has_link_generic(hw, PHY_FORCE_LIMIT, ++ 100000, &link); ++ if (ret_val) ++ return ret_val; ++ ++ if (!link) { ++ if (hw->phy.type != e1000_phy_m88) { ++ e_dbg("Link taking longer than expected.\n"); ++ } else { ++ /* ++ * We didn't get link. ++ * Reset the DSP and cross our fingers. ++ */ ++ ret_val = e1e_wphy(hw, M88E1000_PHY_PAGE_SELECT, ++ 0x001d); ++ if (ret_val) ++ return ret_val; ++ ret_val = e1000e_phy_reset_dsp(hw); ++ if (ret_val) ++ return ret_val; ++ } ++ } ++ ++ /* Try once more */ ++ ret_val = e1000e_phy_has_link_generic(hw, PHY_FORCE_LIMIT, ++ 100000, &link); ++ if (ret_val) ++ return ret_val; ++ } ++ ++ if (hw->phy.type != e1000_phy_m88) ++ return 0; ++ ++ ret_val = e1e_rphy(hw, M88E1000_EXT_PHY_SPEC_CTRL, &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ /* ++ * Resetting the phy means we need to re-force TX_CLK in the ++ * Extended PHY Specific Control Register to 25MHz clock from ++ * the reset value of 2.5MHz. ++ */ ++ phy_data |= M88E1000_EPSCR_TX_CLK_25; ++ ret_val = e1e_wphy(hw, M88E1000_EXT_PHY_SPEC_CTRL, phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ /* ++ * In addition, we must re-enable CRS on Tx for both half and full ++ * duplex. ++ */ ++ ret_val = e1e_rphy(hw, M88E1000_PHY_SPEC_CTRL, &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ phy_data |= M88E1000_PSCR_ASSERT_CRS_ON_TX; ++ ret_val = e1e_wphy(hw, M88E1000_PHY_SPEC_CTRL, phy_data); ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_phy_force_speed_duplex_ife - Force PHY speed & duplex ++ * @hw: pointer to the HW structure ++ * ++ * Forces the speed and duplex settings of the PHY. ++ * This is a function pointer entry point only called by ++ * PHY setup routines. ++ **/ ++s32 e1000_phy_force_speed_duplex_ife(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u16 data; ++ bool link; ++ ++ ret_val = e1e_rphy(hw, PHY_CONTROL, &data); ++ if (ret_val) ++ goto out; ++ ++ e1000e_phy_force_speed_duplex_setup(hw, &data); ++ ++ ret_val = e1e_wphy(hw, PHY_CONTROL, data); ++ if (ret_val) ++ goto out; ++ ++ /* Disable MDI-X support for 10/100 */ ++ ret_val = e1e_rphy(hw, IFE_PHY_MDIX_CONTROL, &data); ++ if (ret_val) ++ goto out; ++ ++ data &= ~IFE_PMC_AUTO_MDIX; ++ data &= ~IFE_PMC_FORCE_MDIX; ++ ++ ret_val = e1e_wphy(hw, IFE_PHY_MDIX_CONTROL, data); ++ if (ret_val) ++ goto out; ++ ++ e_dbg("IFE PMC: %X\n", data); ++ ++ udelay(1); ++ ++ if (phy->autoneg_wait_to_complete) { ++ e_dbg("Waiting for forced speed/duplex link on IFE phy.\n"); ++ ++ ret_val = e1000e_phy_has_link_generic(hw, ++ PHY_FORCE_LIMIT, ++ 100000, ++ &link); ++ if (ret_val) ++ goto out; ++ ++ if (!link) ++ e_dbg("Link taking longer than expected.\n"); ++ ++ /* Try once more */ ++ ret_val = e1000e_phy_has_link_generic(hw, ++ PHY_FORCE_LIMIT, ++ 100000, ++ &link); ++ if (ret_val) ++ goto out; ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000e_phy_force_speed_duplex_setup - Configure forced PHY speed/duplex ++ * @hw: pointer to the HW structure ++ * @phy_ctrl: pointer to current value of PHY_CONTROL ++ * ++ * Forces speed and duplex on the PHY by doing the following: disable flow ++ * control, force speed/duplex on the MAC, disable auto speed detection, ++ * disable auto-negotiation, configure duplex, configure speed, configure ++ * the collision distance, write configuration to CTRL register. The ++ * caller must write to the PHY_CONTROL register for these settings to ++ * take affect. ++ **/ ++void e1000e_phy_force_speed_duplex_setup(struct e1000_hw *hw, u16 *phy_ctrl) ++{ ++ struct e1000_mac_info *mac = &hw->mac; ++ u32 ctrl; ++ ++ /* Turn off flow control when forcing speed/duplex */ ++ hw->fc.current_mode = e1000_fc_none; ++ ++ /* Force speed/duplex on the mac */ ++ ctrl = er32(CTRL); ++ ctrl |= (E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX); ++ ctrl &= ~E1000_CTRL_SPD_SEL; ++ ++ /* Disable Auto Speed Detection */ ++ ctrl &= ~E1000_CTRL_ASDE; ++ ++ /* Disable autoneg on the phy */ ++ *phy_ctrl &= ~MII_CR_AUTO_NEG_EN; ++ ++ /* Forcing Full or Half Duplex? */ ++ if (mac->forced_speed_duplex & E1000_ALL_HALF_DUPLEX) { ++ ctrl &= ~E1000_CTRL_FD; ++ *phy_ctrl &= ~MII_CR_FULL_DUPLEX; ++ e_dbg("Half Duplex\n"); ++ } else { ++ ctrl |= E1000_CTRL_FD; ++ *phy_ctrl |= MII_CR_FULL_DUPLEX; ++ e_dbg("Full Duplex\n"); ++ } ++ ++ /* Forcing 10mb or 100mb? */ ++ if (mac->forced_speed_duplex & E1000_ALL_100_SPEED) { ++ ctrl |= E1000_CTRL_SPD_100; ++ *phy_ctrl |= MII_CR_SPEED_100; ++ *phy_ctrl &= ~(MII_CR_SPEED_1000 | MII_CR_SPEED_10); ++ e_dbg("Forcing 100mb\n"); ++ } else { ++ ctrl &= ~(E1000_CTRL_SPD_1000 | E1000_CTRL_SPD_100); ++ *phy_ctrl |= MII_CR_SPEED_10; ++ *phy_ctrl &= ~(MII_CR_SPEED_1000 | MII_CR_SPEED_100); ++ e_dbg("Forcing 10mb\n"); ++ } ++ ++ e1000e_config_collision_dist(hw); ++ ++ ew32(CTRL, ctrl); ++} ++ ++/** ++ * e1000e_set_d3_lplu_state - Sets low power link up state for D3 ++ * @hw: pointer to the HW structure ++ * @active: boolean used to enable/disable lplu ++ * ++ * Success returns 0, Failure returns 1 ++ * ++ * The low power link up (lplu) state is set to the power management level D3 ++ * and SmartSpeed is disabled when active is true, else clear lplu for D3 ++ * and enable Smartspeed. LPLU and Smartspeed are mutually exclusive. LPLU ++ * is used during Dx states where the power conservation is most important. ++ * During driver activity, SmartSpeed should be enabled so performance is ++ * maintained. ++ **/ ++s32 e1000e_set_d3_lplu_state(struct e1000_hw *hw, bool active) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u16 data; ++ ++ ret_val = e1e_rphy(hw, IGP02E1000_PHY_POWER_MGMT, &data); ++ if (ret_val) ++ return ret_val; ++ ++ if (!active) { ++ data &= ~IGP02E1000_PM_D3_LPLU; ++ ret_val = e1e_wphy(hw, IGP02E1000_PHY_POWER_MGMT, data); ++ if (ret_val) ++ return ret_val; ++ /* ++ * LPLU and SmartSpeed are mutually exclusive. LPLU is used ++ * during Dx states where the power conservation is most ++ * important. During driver activity we should enable ++ * SmartSpeed, so performance is maintained. ++ */ ++ if (phy->smart_speed == e1000_smart_speed_on) { ++ ret_val = e1e_rphy(hw, IGP01E1000_PHY_PORT_CONFIG, ++ &data); ++ if (ret_val) ++ return ret_val; ++ ++ data |= IGP01E1000_PSCFR_SMART_SPEED; ++ ret_val = e1e_wphy(hw, IGP01E1000_PHY_PORT_CONFIG, ++ data); ++ if (ret_val) ++ return ret_val; ++ } else if (phy->smart_speed == e1000_smart_speed_off) { ++ ret_val = e1e_rphy(hw, IGP01E1000_PHY_PORT_CONFIG, ++ &data); ++ if (ret_val) ++ return ret_val; ++ ++ data &= ~IGP01E1000_PSCFR_SMART_SPEED; ++ ret_val = e1e_wphy(hw, IGP01E1000_PHY_PORT_CONFIG, ++ data); ++ if (ret_val) ++ return ret_val; ++ } ++ } else if ((phy->autoneg_advertised == E1000_ALL_SPEED_DUPLEX) || ++ (phy->autoneg_advertised == E1000_ALL_NOT_GIG) || ++ (phy->autoneg_advertised == E1000_ALL_10_SPEED)) { ++ data |= IGP02E1000_PM_D3_LPLU; ++ ret_val = e1e_wphy(hw, IGP02E1000_PHY_POWER_MGMT, data); ++ if (ret_val) ++ return ret_val; ++ ++ /* When LPLU is enabled, we should disable SmartSpeed */ ++ ret_val = e1e_rphy(hw, IGP01E1000_PHY_PORT_CONFIG, &data); ++ if (ret_val) ++ return ret_val; ++ ++ data &= ~IGP01E1000_PSCFR_SMART_SPEED; ++ ret_val = e1e_wphy(hw, IGP01E1000_PHY_PORT_CONFIG, data); ++ } ++ ++ return ret_val; ++} ++ ++/** ++ * e1000e_check_downshift - Checks whether a downshift in speed occurred ++ * @hw: pointer to the HW structure ++ * ++ * Success returns 0, Failure returns 1 ++ * ++ * A downshift is detected by querying the PHY link health. ++ **/ ++s32 e1000e_check_downshift(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u16 phy_data, offset, mask; ++ ++ switch (phy->type) { ++ case e1000_phy_m88: ++ case e1000_phy_gg82563: ++ case e1000_phy_bm: ++ case e1000_phy_82578: ++ offset = M88E1000_PHY_SPEC_STATUS; ++ mask = M88E1000_PSSR_DOWNSHIFT; ++ break; ++ case e1000_phy_igp_2: ++ case e1000_phy_igp_3: ++ offset = IGP01E1000_PHY_LINK_HEALTH; ++ mask = IGP01E1000_PLHR_SS_DOWNGRADE; ++ break; ++ default: ++ /* speed downshift not supported */ ++ phy->speed_downgraded = false; ++ return 0; ++ } ++ ++ ret_val = e1e_rphy(hw, offset, &phy_data); ++ ++ if (!ret_val) ++ phy->speed_downgraded = (phy_data & mask); ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_check_polarity_m88 - Checks the polarity. ++ * @hw: pointer to the HW structure ++ * ++ * Success returns 0, Failure returns -E1000_ERR_PHY (-2) ++ * ++ * Polarity is determined based on the PHY specific status register. ++ **/ ++s32 e1000_check_polarity_m88(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u16 data; ++ ++ ret_val = e1e_rphy(hw, M88E1000_PHY_SPEC_STATUS, &data); ++ ++ if (!ret_val) ++ phy->cable_polarity = (data & M88E1000_PSSR_REV_POLARITY) ++ ? e1000_rev_polarity_reversed ++ : e1000_rev_polarity_normal; ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_check_polarity_igp - Checks the polarity. ++ * @hw: pointer to the HW structure ++ * ++ * Success returns 0, Failure returns -E1000_ERR_PHY (-2) ++ * ++ * Polarity is determined based on the PHY port status register, and the ++ * current speed (since there is no polarity at 100Mbps). ++ **/ ++s32 e1000_check_polarity_igp(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u16 data, offset, mask; ++ ++ /* ++ * Polarity is determined based on the speed of ++ * our connection. ++ */ ++ ret_val = e1e_rphy(hw, IGP01E1000_PHY_PORT_STATUS, &data); ++ if (ret_val) ++ return ret_val; ++ ++ if ((data & IGP01E1000_PSSR_SPEED_MASK) == ++ IGP01E1000_PSSR_SPEED_1000MBPS) { ++ offset = IGP01E1000_PHY_PCS_INIT_REG; ++ mask = IGP01E1000_PHY_POLARITY_MASK; ++ } else { ++ /* ++ * This really only applies to 10Mbps since ++ * there is no polarity for 100Mbps (always 0). ++ */ ++ offset = IGP01E1000_PHY_PORT_STATUS; ++ mask = IGP01E1000_PSSR_POLARITY_REVERSED; ++ } ++ ++ ret_val = e1e_rphy(hw, offset, &data); ++ ++ if (!ret_val) ++ phy->cable_polarity = (data & mask) ++ ? e1000_rev_polarity_reversed ++ : e1000_rev_polarity_normal; ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_check_polarity_ife - Check cable polarity for IFE PHY ++ * @hw: pointer to the HW structure ++ * ++ * Polarity is determined on the polarity reversal feature being enabled. ++ **/ ++s32 e1000_check_polarity_ife(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u16 phy_data, offset, mask; ++ ++ /* ++ * Polarity is determined based on the reversal feature being enabled. ++ */ ++ if (phy->polarity_correction) { ++ offset = IFE_PHY_EXTENDED_STATUS_CONTROL; ++ mask = IFE_PESC_POLARITY_REVERSED; ++ } else { ++ offset = IFE_PHY_SPECIAL_CONTROL; ++ mask = IFE_PSC_FORCE_POLARITY; ++ } ++ ++ ret_val = e1e_rphy(hw, offset, &phy_data); ++ ++ if (!ret_val) ++ phy->cable_polarity = (phy_data & mask) ++ ? e1000_rev_polarity_reversed ++ : e1000_rev_polarity_normal; ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_wait_autoneg - Wait for auto-neg completion ++ * @hw: pointer to the HW structure ++ * ++ * Waits for auto-negotiation to complete or for the auto-negotiation time ++ * limit to expire, which ever happens first. ++ **/ ++static s32 e1000_wait_autoneg(struct e1000_hw *hw) ++{ ++ s32 ret_val = 0; ++ u16 i, phy_status; ++ ++ /* Break after autoneg completes or PHY_AUTO_NEG_LIMIT expires. */ ++ for (i = PHY_AUTO_NEG_LIMIT; i > 0; i--) { ++ ret_val = e1e_rphy(hw, PHY_STATUS, &phy_status); ++ if (ret_val) ++ break; ++ ret_val = e1e_rphy(hw, PHY_STATUS, &phy_status); ++ if (ret_val) ++ break; ++ if (phy_status & MII_SR_AUTONEG_COMPLETE) ++ break; ++ msleep(100); ++ } ++ ++ /* ++ * PHY_AUTO_NEG_TIME expiration doesn't guarantee auto-negotiation ++ * has completed. ++ */ ++ return ret_val; ++} ++ ++/** ++ * e1000e_phy_has_link_generic - Polls PHY for link ++ * @hw: pointer to the HW structure ++ * @iterations: number of times to poll for link ++ * @usec_interval: delay between polling attempts ++ * @success: pointer to whether polling was successful or not ++ * ++ * Polls the PHY status register for link, 'iterations' number of times. ++ **/ ++s32 e1000e_phy_has_link_generic(struct e1000_hw *hw, u32 iterations, ++ u32 usec_interval, bool *success) ++{ ++ s32 ret_val = 0; ++ u16 i, phy_status; ++ ++ for (i = 0; i < iterations; i++) { ++ /* ++ * Some PHYs require the PHY_STATUS register to be read ++ * twice due to the link bit being sticky. No harm doing ++ * it across the board. ++ */ ++ ret_val = e1e_rphy(hw, PHY_STATUS, &phy_status); ++ if (ret_val) ++ /* ++ * If the first read fails, another entity may have ++ * ownership of the resources, wait and try again to ++ * see if they have relinquished the resources yet. ++ */ ++ udelay(usec_interval); ++ ret_val = e1e_rphy(hw, PHY_STATUS, &phy_status); ++ if (ret_val) ++ break; ++ if (phy_status & MII_SR_LINK_STATUS) ++ break; ++ if (usec_interval >= 1000) ++ mdelay(usec_interval/1000); ++ else ++ udelay(usec_interval); ++ } ++ ++ *success = (i < iterations); ++ ++ return ret_val; ++} ++ ++/** ++ * e1000e_get_cable_length_m88 - Determine cable length for m88 PHY ++ * @hw: pointer to the HW structure ++ * ++ * Reads the PHY specific status register to retrieve the cable length ++ * information. The cable length is determined by averaging the minimum and ++ * maximum values to get the "average" cable length. The m88 PHY has four ++ * possible cable length values, which are: ++ * Register Value Cable Length ++ * 0 < 50 meters ++ * 1 50 - 80 meters ++ * 2 80 - 110 meters ++ * 3 110 - 140 meters ++ * 4 > 140 meters ++ **/ ++s32 e1000e_get_cable_length_m88(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u16 phy_data, index; ++ ++ ret_val = e1e_rphy(hw, M88E1000_PHY_SPEC_STATUS, &phy_data); ++ if (ret_val) ++ goto out; ++ ++ index = (phy_data & M88E1000_PSSR_CABLE_LENGTH) >> ++ M88E1000_PSSR_CABLE_LENGTH_SHIFT; ++ if (index >= M88E1000_CABLE_LENGTH_TABLE_SIZE - 1) { ++ ret_val = -E1000_ERR_PHY; ++ goto out; ++ } ++ ++ phy->min_cable_length = e1000_m88_cable_length_table[index]; ++ phy->max_cable_length = e1000_m88_cable_length_table[index + 1]; ++ ++ phy->cable_length = (phy->min_cable_length + phy->max_cable_length) / 2; ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000e_get_cable_length_igp_2 - Determine cable length for igp2 PHY ++ * @hw: pointer to the HW structure ++ * ++ * The automatic gain control (agc) normalizes the amplitude of the ++ * received signal, adjusting for the attenuation produced by the ++ * cable. By reading the AGC registers, which represent the ++ * combination of coarse and fine gain value, the value can be put ++ * into a lookup table to obtain the approximate cable length ++ * for each channel. ++ **/ ++s32 e1000e_get_cable_length_igp_2(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u16 phy_data, i, agc_value = 0; ++ u16 cur_agc_index, max_agc_index = 0; ++ u16 min_agc_index = IGP02E1000_CABLE_LENGTH_TABLE_SIZE - 1; ++ static const u16 agc_reg_array[IGP02E1000_PHY_CHANNEL_NUM] = { ++ IGP02E1000_PHY_AGC_A, ++ IGP02E1000_PHY_AGC_B, ++ IGP02E1000_PHY_AGC_C, ++ IGP02E1000_PHY_AGC_D ++ }; ++ ++ /* Read the AGC registers for all channels */ ++ for (i = 0; i < IGP02E1000_PHY_CHANNEL_NUM; i++) { ++ ret_val = e1e_rphy(hw, agc_reg_array[i], &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ /* ++ * Getting bits 15:9, which represent the combination of ++ * coarse and fine gain values. The result is a number ++ * that can be put into the lookup table to obtain the ++ * approximate cable length. ++ */ ++ cur_agc_index = (phy_data >> IGP02E1000_AGC_LENGTH_SHIFT) & ++ IGP02E1000_AGC_LENGTH_MASK; ++ ++ /* Array index bound check. */ ++ if ((cur_agc_index >= IGP02E1000_CABLE_LENGTH_TABLE_SIZE) || ++ (cur_agc_index == 0)) ++ return -E1000_ERR_PHY; ++ ++ /* Remove min & max AGC values from calculation. */ ++ if (e1000_igp_2_cable_length_table[min_agc_index] > ++ e1000_igp_2_cable_length_table[cur_agc_index]) ++ min_agc_index = cur_agc_index; ++ if (e1000_igp_2_cable_length_table[max_agc_index] < ++ e1000_igp_2_cable_length_table[cur_agc_index]) ++ max_agc_index = cur_agc_index; ++ ++ agc_value += e1000_igp_2_cable_length_table[cur_agc_index]; ++ } ++ ++ agc_value -= (e1000_igp_2_cable_length_table[min_agc_index] + ++ e1000_igp_2_cable_length_table[max_agc_index]); ++ agc_value /= (IGP02E1000_PHY_CHANNEL_NUM - 2); ++ ++ /* Calculate cable length with the error range of +/- 10 meters. */ ++ phy->min_cable_length = ((agc_value - IGP02E1000_AGC_RANGE) > 0) ? ++ (agc_value - IGP02E1000_AGC_RANGE) : 0; ++ phy->max_cable_length = agc_value + IGP02E1000_AGC_RANGE; ++ ++ phy->cable_length = (phy->min_cable_length + phy->max_cable_length) / 2; ++ ++ return ret_val; ++} ++ ++/** ++ * e1000e_get_phy_info_m88 - Retrieve PHY information ++ * @hw: pointer to the HW structure ++ * ++ * Valid for only copper links. Read the PHY status register (sticky read) ++ * to verify that link is up. Read the PHY special control register to ++ * determine the polarity and 10base-T extended distance. Read the PHY ++ * special status register to determine MDI/MDIx and current speed. If ++ * speed is 1000, then determine cable length, local and remote receiver. ++ **/ ++s32 e1000e_get_phy_info_m88(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u16 phy_data; ++ bool link; ++ ++ if (phy->media_type != e1000_media_type_copper) { ++ e_dbg("Phy info is only valid for copper media\n"); ++ return -E1000_ERR_CONFIG; ++ } ++ ++ ret_val = e1000e_phy_has_link_generic(hw, 1, 0, &link); ++ if (ret_val) ++ return ret_val; ++ ++ if (!link) { ++ e_dbg("Phy info is only valid if link is up\n"); ++ return -E1000_ERR_CONFIG; ++ } ++ ++ ret_val = e1e_rphy(hw, M88E1000_PHY_SPEC_CTRL, &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ phy->polarity_correction = (phy_data & ++ M88E1000_PSCR_POLARITY_REVERSAL); ++ ++ ret_val = e1000_check_polarity_m88(hw); ++ if (ret_val) ++ return ret_val; ++ ++ ret_val = e1e_rphy(hw, M88E1000_PHY_SPEC_STATUS, &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ phy->is_mdix = (phy_data & M88E1000_PSSR_MDIX); ++ ++ if ((phy_data & M88E1000_PSSR_SPEED) == M88E1000_PSSR_1000MBS) { ++ ret_val = e1000_get_cable_length(hw); ++ if (ret_val) ++ return ret_val; ++ ++ ret_val = e1e_rphy(hw, PHY_1000T_STATUS, &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ phy->local_rx = (phy_data & SR_1000T_LOCAL_RX_STATUS) ++ ? e1000_1000t_rx_status_ok ++ : e1000_1000t_rx_status_not_ok; ++ ++ phy->remote_rx = (phy_data & SR_1000T_REMOTE_RX_STATUS) ++ ? e1000_1000t_rx_status_ok ++ : e1000_1000t_rx_status_not_ok; ++ } else { ++ /* Set values to "undefined" */ ++ phy->cable_length = E1000_CABLE_LENGTH_UNDEFINED; ++ phy->local_rx = e1000_1000t_rx_status_undefined; ++ phy->remote_rx = e1000_1000t_rx_status_undefined; ++ } ++ ++ return ret_val; ++} ++ ++/** ++ * e1000e_get_phy_info_igp - Retrieve igp PHY information ++ * @hw: pointer to the HW structure ++ * ++ * Read PHY status to determine if link is up. If link is up, then ++ * set/determine 10base-T extended distance and polarity correction. Read ++ * PHY port status to determine MDI/MDIx and speed. Based on the speed, ++ * determine on the cable length, local and remote receiver. ++ **/ ++s32 e1000e_get_phy_info_igp(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u16 data; ++ bool link; ++ ++ ret_val = e1000e_phy_has_link_generic(hw, 1, 0, &link); ++ if (ret_val) ++ return ret_val; ++ ++ if (!link) { ++ e_dbg("Phy info is only valid if link is up\n"); ++ return -E1000_ERR_CONFIG; ++ } ++ ++ phy->polarity_correction = true; ++ ++ ret_val = e1000_check_polarity_igp(hw); ++ if (ret_val) ++ return ret_val; ++ ++ ret_val = e1e_rphy(hw, IGP01E1000_PHY_PORT_STATUS, &data); ++ if (ret_val) ++ return ret_val; ++ ++ phy->is_mdix = (data & IGP01E1000_PSSR_MDIX); ++ ++ if ((data & IGP01E1000_PSSR_SPEED_MASK) == ++ IGP01E1000_PSSR_SPEED_1000MBPS) { ++ ret_val = e1000_get_cable_length(hw); ++ if (ret_val) ++ return ret_val; ++ ++ ret_val = e1e_rphy(hw, PHY_1000T_STATUS, &data); ++ if (ret_val) ++ return ret_val; ++ ++ phy->local_rx = (data & SR_1000T_LOCAL_RX_STATUS) ++ ? e1000_1000t_rx_status_ok ++ : e1000_1000t_rx_status_not_ok; ++ ++ phy->remote_rx = (data & SR_1000T_REMOTE_RX_STATUS) ++ ? e1000_1000t_rx_status_ok ++ : e1000_1000t_rx_status_not_ok; ++ } else { ++ phy->cable_length = E1000_CABLE_LENGTH_UNDEFINED; ++ phy->local_rx = e1000_1000t_rx_status_undefined; ++ phy->remote_rx = e1000_1000t_rx_status_undefined; ++ } ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_get_phy_info_ife - Retrieves various IFE PHY states ++ * @hw: pointer to the HW structure ++ * ++ * Populates "phy" structure with various feature states. ++ **/ ++s32 e1000_get_phy_info_ife(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u16 data; ++ bool link; ++ ++ ret_val = e1000e_phy_has_link_generic(hw, 1, 0, &link); ++ if (ret_val) ++ goto out; ++ ++ if (!link) { ++ e_dbg("Phy info is only valid if link is up\n"); ++ ret_val = -E1000_ERR_CONFIG; ++ goto out; ++ } ++ ++ ret_val = e1e_rphy(hw, IFE_PHY_SPECIAL_CONTROL, &data); ++ if (ret_val) ++ goto out; ++ phy->polarity_correction = (data & IFE_PSC_AUTO_POLARITY_DISABLE) ++ ? false : true; ++ ++ if (phy->polarity_correction) { ++ ret_val = e1000_check_polarity_ife(hw); ++ if (ret_val) ++ goto out; ++ } else { ++ /* Polarity is forced */ ++ phy->cable_polarity = (data & IFE_PSC_FORCE_POLARITY) ++ ? e1000_rev_polarity_reversed ++ : e1000_rev_polarity_normal; ++ } ++ ++ ret_val = e1e_rphy(hw, IFE_PHY_MDIX_CONTROL, &data); ++ if (ret_val) ++ goto out; ++ ++ phy->is_mdix = (data & IFE_PMC_MDIX_STATUS) ? true : false; ++ ++ /* The following parameters are undefined for 10/100 operation. */ ++ phy->cable_length = E1000_CABLE_LENGTH_UNDEFINED; ++ phy->local_rx = e1000_1000t_rx_status_undefined; ++ phy->remote_rx = e1000_1000t_rx_status_undefined; ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000e_phy_sw_reset - PHY software reset ++ * @hw: pointer to the HW structure ++ * ++ * Does a software reset of the PHY by reading the PHY control register and ++ * setting/write the control register reset bit to the PHY. ++ **/ ++s32 e1000e_phy_sw_reset(struct e1000_hw *hw) ++{ ++ s32 ret_val; ++ u16 phy_ctrl; ++ ++ ret_val = e1e_rphy(hw, PHY_CONTROL, &phy_ctrl); ++ if (ret_val) ++ return ret_val; ++ ++ phy_ctrl |= MII_CR_RESET; ++ ret_val = e1e_wphy(hw, PHY_CONTROL, phy_ctrl); ++ if (ret_val) ++ return ret_val; ++ ++ udelay(1); ++ ++ return ret_val; ++} ++ ++/** ++ * e1000e_phy_hw_reset_generic - PHY hardware reset ++ * @hw: pointer to the HW structure ++ * ++ * Verify the reset block is not blocking us from resetting. Acquire ++ * semaphore (if necessary) and read/set/write the device control reset ++ * bit in the PHY. Wait the appropriate delay time for the device to ++ * reset and release the semaphore (if necessary). ++ **/ ++s32 e1000e_phy_hw_reset_generic(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u32 ctrl; ++ ++ ret_val = e1000_check_reset_block(hw); ++ if (ret_val) ++ return 0; ++ ++ ret_val = phy->ops.acquire(hw); ++ if (ret_val) ++ return ret_val; ++ ++ ctrl = er32(CTRL); ++ ew32(CTRL, ctrl | E1000_CTRL_PHY_RST); ++ e1e_flush(); ++ ++ udelay(phy->reset_delay_us); ++ ++ ew32(CTRL, ctrl); ++ e1e_flush(); ++ ++ udelay(150); ++ ++ phy->ops.release(hw); ++ ++ return e1000_get_phy_cfg_done(hw); ++} ++ ++/** ++ * e1000e_get_cfg_done - Generic configuration done ++ * @hw: pointer to the HW structure ++ * ++ * Generic function to wait 10 milli-seconds for configuration to complete ++ * and return success. ++ **/ ++s32 e1000e_get_cfg_done(struct e1000_hw *hw) ++{ ++ mdelay(10); ++ return 0; ++} ++ ++/** ++ * e1000e_phy_init_script_igp3 - Inits the IGP3 PHY ++ * @hw: pointer to the HW structure ++ * ++ * Initializes a Intel Gigabit PHY3 when an EEPROM is not present. ++ **/ ++s32 e1000e_phy_init_script_igp3(struct e1000_hw *hw) ++{ ++ e_dbg("Running IGP 3 PHY init script\n"); ++ ++ /* PHY init IGP 3 */ ++ /* Enable rise/fall, 10-mode work in class-A */ ++ e1e_wphy(hw, 0x2F5B, 0x9018); ++ /* Remove all caps from Replica path filter */ ++ e1e_wphy(hw, 0x2F52, 0x0000); ++ /* Bias trimming for ADC, AFE and Driver (Default) */ ++ e1e_wphy(hw, 0x2FB1, 0x8B24); ++ /* Increase Hybrid poly bias */ ++ e1e_wphy(hw, 0x2FB2, 0xF8F0); ++ /* Add 4% to Tx amplitude in Gig mode */ ++ e1e_wphy(hw, 0x2010, 0x10B0); ++ /* Disable trimming (TTT) */ ++ e1e_wphy(hw, 0x2011, 0x0000); ++ /* Poly DC correction to 94.6% + 2% for all channels */ ++ e1e_wphy(hw, 0x20DD, 0x249A); ++ /* ABS DC correction to 95.9% */ ++ e1e_wphy(hw, 0x20DE, 0x00D3); ++ /* BG temp curve trim */ ++ e1e_wphy(hw, 0x28B4, 0x04CE); ++ /* Increasing ADC OPAMP stage 1 currents to max */ ++ e1e_wphy(hw, 0x2F70, 0x29E4); ++ /* Force 1000 ( required for enabling PHY regs configuration) */ ++ e1e_wphy(hw, 0x0000, 0x0140); ++ /* Set upd_freq to 6 */ ++ e1e_wphy(hw, 0x1F30, 0x1606); ++ /* Disable NPDFE */ ++ e1e_wphy(hw, 0x1F31, 0xB814); ++ /* Disable adaptive fixed FFE (Default) */ ++ e1e_wphy(hw, 0x1F35, 0x002A); ++ /* Enable FFE hysteresis */ ++ e1e_wphy(hw, 0x1F3E, 0x0067); ++ /* Fixed FFE for short cable lengths */ ++ e1e_wphy(hw, 0x1F54, 0x0065); ++ /* Fixed FFE for medium cable lengths */ ++ e1e_wphy(hw, 0x1F55, 0x002A); ++ /* Fixed FFE for long cable lengths */ ++ e1e_wphy(hw, 0x1F56, 0x002A); ++ /* Enable Adaptive Clip Threshold */ ++ e1e_wphy(hw, 0x1F72, 0x3FB0); ++ /* AHT reset limit to 1 */ ++ e1e_wphy(hw, 0x1F76, 0xC0FF); ++ /* Set AHT master delay to 127 msec */ ++ e1e_wphy(hw, 0x1F77, 0x1DEC); ++ /* Set scan bits for AHT */ ++ e1e_wphy(hw, 0x1F78, 0xF9EF); ++ /* Set AHT Preset bits */ ++ e1e_wphy(hw, 0x1F79, 0x0210); ++ /* Change integ_factor of channel A to 3 */ ++ e1e_wphy(hw, 0x1895, 0x0003); ++ /* Change prop_factor of channels BCD to 8 */ ++ e1e_wphy(hw, 0x1796, 0x0008); ++ /* Change cg_icount + enable integbp for channels BCD */ ++ e1e_wphy(hw, 0x1798, 0xD008); ++ /* ++ * Change cg_icount + enable integbp + change prop_factor_master ++ * to 8 for channel A ++ */ ++ e1e_wphy(hw, 0x1898, 0xD918); ++ /* Disable AHT in Slave mode on channel A */ ++ e1e_wphy(hw, 0x187A, 0x0800); ++ /* ++ * Enable LPLU and disable AN to 1000 in non-D0a states, ++ * Enable SPD+B2B ++ */ ++ e1e_wphy(hw, 0x0019, 0x008D); ++ /* Enable restart AN on an1000_dis change */ ++ e1e_wphy(hw, 0x001B, 0x2080); ++ /* Enable wh_fifo read clock in 10/100 modes */ ++ e1e_wphy(hw, 0x0014, 0x0045); ++ /* Restart AN, Speed selection is 1000 */ ++ e1e_wphy(hw, 0x0000, 0x1340); ++ ++ return 0; ++} ++ ++/* Internal function pointers */ ++ ++/** ++ * e1000_get_phy_cfg_done - Generic PHY configuration done ++ * @hw: pointer to the HW structure ++ * ++ * Return success if silicon family did not implement a family specific ++ * get_cfg_done function. ++ **/ ++static s32 e1000_get_phy_cfg_done(struct e1000_hw *hw) ++{ ++ if (hw->phy.ops.get_cfg_done) ++ return hw->phy.ops.get_cfg_done(hw); ++ ++ return 0; ++} ++ ++/** ++ * e1000_phy_force_speed_duplex - Generic force PHY speed/duplex ++ * @hw: pointer to the HW structure ++ * ++ * When the silicon family has not implemented a forced speed/duplex ++ * function for the PHY, simply return 0. ++ **/ ++static s32 e1000_phy_force_speed_duplex(struct e1000_hw *hw) ++{ ++ if (hw->phy.ops.force_speed_duplex) ++ return hw->phy.ops.force_speed_duplex(hw); ++ ++ return 0; ++} ++ ++/** ++ * e1000e_get_phy_type_from_id - Get PHY type from id ++ * @phy_id: phy_id read from the phy ++ * ++ * Returns the phy type from the id. ++ **/ ++enum e1000_phy_type e1000e_get_phy_type_from_id(u32 phy_id) ++{ ++ enum e1000_phy_type phy_type = e1000_phy_unknown; ++ ++ switch (phy_id) { ++ case M88E1000_I_PHY_ID: ++ case M88E1000_E_PHY_ID: ++ case M88E1111_I_PHY_ID: ++ case M88E1011_I_PHY_ID: ++ phy_type = e1000_phy_m88; ++ break; ++ case IGP01E1000_I_PHY_ID: /* IGP 1 & 2 share this */ ++ phy_type = e1000_phy_igp_2; ++ break; ++ case GG82563_E_PHY_ID: ++ phy_type = e1000_phy_gg82563; ++ break; ++ case IGP03E1000_E_PHY_ID: ++ phy_type = e1000_phy_igp_3; ++ break; ++ case IFE_E_PHY_ID: ++ case IFE_PLUS_E_PHY_ID: ++ case IFE_C_E_PHY_ID: ++ phy_type = e1000_phy_ife; ++ break; ++ case BME1000_E_PHY_ID: ++ case BME1000_E_PHY_ID_R2: ++ phy_type = e1000_phy_bm; ++ break; ++ case I82578_E_PHY_ID: ++ phy_type = e1000_phy_82578; ++ break; ++ case I82577_E_PHY_ID: ++ phy_type = e1000_phy_82577; ++ break; ++ case I82579_E_PHY_ID: ++ phy_type = e1000_phy_82579; ++ break; ++ case I217_E_PHY_ID: ++ phy_type = e1000_phy_i217; ++ break; ++ default: ++ phy_type = e1000_phy_unknown; ++ break; ++ } ++ return phy_type; ++} ++ ++/** ++ * e1000e_determine_phy_address - Determines PHY address. ++ * @hw: pointer to the HW structure ++ * ++ * This uses a trial and error method to loop through possible PHY ++ * addresses. It tests each by reading the PHY ID registers and ++ * checking for a match. ++ **/ ++s32 e1000e_determine_phy_address(struct e1000_hw *hw) ++{ ++ s32 ret_val = -E1000_ERR_PHY_TYPE; ++ u32 phy_addr = 0; ++ u32 i; ++ enum e1000_phy_type phy_type = e1000_phy_unknown; ++ ++ hw->phy.id = phy_type; ++ ++ for (phy_addr = 0; phy_addr < E1000_MAX_PHY_ADDR; phy_addr++) { ++ hw->phy.addr = phy_addr; ++ i = 0; ++ ++ do { ++ e1000e_get_phy_id(hw); ++ phy_type = e1000e_get_phy_type_from_id(hw->phy.id); ++ ++ /* ++ * If phy_type is valid, break - we found our ++ * PHY address ++ */ ++ if (phy_type != e1000_phy_unknown) { ++ ret_val = 0; ++ goto out; ++ } ++ usleep_range(1000, 2000); ++ i++; ++ } while (i < 10); ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * @brief Retrieve PHY page address ++ * @param page page to access ++ * ++ * @return PHY address for the page requested. ++ **/ ++static u32 e1000_get_phy_addr_for_bm_page(u32 page, u32 reg) ++{ ++ u32 phy_addr = 2; ++ ++ if ((page >= 768) || (page == 0 && reg == 25) || (reg == 31)) ++ phy_addr = 1; ++ ++ return phy_addr; ++} ++ ++/** ++ * e1000e_write_phy_reg_bm - Write BM PHY register ++ * @hw: pointer to the HW structure ++ * @offset: register offset to write to ++ * @data: data to write at register offset ++ * ++ * Acquires semaphore, if necessary, then writes the data to PHY register ++ * at the offset. Release any acquired semaphores before exiting. ++ **/ ++s32 e1000e_write_phy_reg_bm(struct e1000_hw *hw, u32 offset, u16 data) ++{ ++ s32 ret_val; ++ u32 page = offset >> IGP_PAGE_SHIFT; ++ ++ ret_val = hw->phy.ops.acquire(hw); ++ if (ret_val) ++ return ret_val; ++ ++ /* Page 800 works differently than the rest so it has its own func */ ++ if (page == BM_WUC_PAGE) { ++ ret_val = e1000_access_phy_wakeup_reg_bm(hw, offset, &data, ++ false, false); ++ goto out; ++ } ++ ++ hw->phy.addr = e1000_get_phy_addr_for_bm_page(page, offset); ++ ++ if (offset > MAX_PHY_MULTI_PAGE_REG) { ++ u32 page_shift, page_select; ++ ++ /* ++ * Page select is register 31 for phy address 1 and 22 for ++ * phy address 2 and 3. Page select is shifted only for ++ * phy address 1. ++ */ ++ if (hw->phy.addr == 1) { ++ page_shift = IGP_PAGE_SHIFT; ++ page_select = IGP01E1000_PHY_PAGE_SELECT; ++ } else { ++ page_shift = 0; ++ page_select = BM_PHY_PAGE_SELECT; ++ } ++ ++ /* Page is shifted left, PHY expects (page x 32) */ ++ ret_val = e1000e_write_phy_reg_mdic(hw, page_select, ++ (page << page_shift)); ++ if (ret_val) ++ goto out; ++ } ++ ++ ret_val = e1000e_write_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & offset, ++ data); ++ ++out: ++ hw->phy.ops.release(hw); ++ return ret_val; ++} ++ ++/** ++ * e1000e_read_phy_reg_bm - Read BM PHY register ++ * @hw: pointer to the HW structure ++ * @offset: register offset to be read ++ * @data: pointer to the read data ++ * ++ * Acquires semaphore, if necessary, then reads the PHY register at offset ++ * and storing the retrieved information in data. Release any acquired ++ * semaphores before exiting. ++ **/ ++s32 e1000e_read_phy_reg_bm(struct e1000_hw *hw, u32 offset, u16 *data) ++{ ++ s32 ret_val; ++ u32 page = offset >> IGP_PAGE_SHIFT; ++ ++ ret_val = hw->phy.ops.acquire(hw); ++ if (ret_val) ++ return ret_val; ++ ++ /* Page 800 works differently than the rest so it has its own func */ ++ if (page == BM_WUC_PAGE) { ++ ret_val = e1000_access_phy_wakeup_reg_bm(hw, offset, data, ++ true, false); ++ goto out; ++ } ++ ++ hw->phy.addr = e1000_get_phy_addr_for_bm_page(page, offset); ++ ++ if (offset > MAX_PHY_MULTI_PAGE_REG) { ++ u32 page_shift, page_select; ++ ++ /* ++ * Page select is register 31 for phy address 1 and 22 for ++ * phy address 2 and 3. Page select is shifted only for ++ * phy address 1. ++ */ ++ if (hw->phy.addr == 1) { ++ page_shift = IGP_PAGE_SHIFT; ++ page_select = IGP01E1000_PHY_PAGE_SELECT; ++ } else { ++ page_shift = 0; ++ page_select = BM_PHY_PAGE_SELECT; ++ } ++ ++ /* Page is shifted left, PHY expects (page x 32) */ ++ ret_val = e1000e_write_phy_reg_mdic(hw, page_select, ++ (page << page_shift)); ++ if (ret_val) ++ goto out; ++ } ++ ++ ret_val = e1000e_read_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & offset, ++ data); ++out: ++ hw->phy.ops.release(hw); ++ return ret_val; ++} ++ ++/** ++ * e1000e_read_phy_reg_bm2 - Read BM PHY register ++ * @hw: pointer to the HW structure ++ * @offset: register offset to be read ++ * @data: pointer to the read data ++ * ++ * Acquires semaphore, if necessary, then reads the PHY register at offset ++ * and storing the retrieved information in data. Release any acquired ++ * semaphores before exiting. ++ **/ ++s32 e1000e_read_phy_reg_bm2(struct e1000_hw *hw, u32 offset, u16 *data) ++{ ++ s32 ret_val; ++ u16 page = (u16)(offset >> IGP_PAGE_SHIFT); ++ ++ ret_val = hw->phy.ops.acquire(hw); ++ if (ret_val) ++ return ret_val; ++ ++ /* Page 800 works differently than the rest so it has its own func */ ++ if (page == BM_WUC_PAGE) { ++ ret_val = e1000_access_phy_wakeup_reg_bm(hw, offset, data, ++ true, false); ++ goto out; ++ } ++ ++ hw->phy.addr = 1; ++ ++ if (offset > MAX_PHY_MULTI_PAGE_REG) { ++ ++ /* Page is shifted left, PHY expects (page x 32) */ ++ ret_val = e1000e_write_phy_reg_mdic(hw, BM_PHY_PAGE_SELECT, ++ page); ++ ++ if (ret_val) ++ goto out; ++ } ++ ++ ret_val = e1000e_read_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & offset, ++ data); ++out: ++ hw->phy.ops.release(hw); ++ return ret_val; ++} ++ ++/** ++ * e1000e_write_phy_reg_bm2 - Write BM PHY register ++ * @hw: pointer to the HW structure ++ * @offset: register offset to write to ++ * @data: data to write at register offset ++ * ++ * Acquires semaphore, if necessary, then writes the data to PHY register ++ * at the offset. Release any acquired semaphores before exiting. ++ **/ ++s32 e1000e_write_phy_reg_bm2(struct e1000_hw *hw, u32 offset, u16 data) ++{ ++ s32 ret_val; ++ u16 page = (u16)(offset >> IGP_PAGE_SHIFT); ++ ++ ret_val = hw->phy.ops.acquire(hw); ++ if (ret_val) ++ return ret_val; ++ ++ /* Page 800 works differently than the rest so it has its own func */ ++ if (page == BM_WUC_PAGE) { ++ ret_val = e1000_access_phy_wakeup_reg_bm(hw, offset, &data, ++ false, false); ++ goto out; ++ } ++ ++ hw->phy.addr = 1; ++ ++ if (offset > MAX_PHY_MULTI_PAGE_REG) { ++ /* Page is shifted left, PHY expects (page x 32) */ ++ ret_val = e1000e_write_phy_reg_mdic(hw, BM_PHY_PAGE_SELECT, ++ page); ++ ++ if (ret_val) ++ goto out; ++ } ++ ++ ret_val = e1000e_write_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & offset, ++ data); ++ ++out: ++ hw->phy.ops.release(hw); ++ return ret_val; ++} ++ ++/** ++ * e1000_enable_phy_wakeup_reg_access_bm - enable access to BM wakeup registers ++ * @hw: pointer to the HW structure ++ * @phy_reg: pointer to store original contents of BM_WUC_ENABLE_REG ++ * ++ * Assumes semaphore already acquired and phy_reg points to a valid memory ++ * address to store contents of the BM_WUC_ENABLE_REG register. ++ **/ ++s32 e1000_enable_phy_wakeup_reg_access_bm(struct e1000_hw *hw, u16 *phy_reg) ++{ ++ s32 ret_val; ++ u16 temp; ++ ++ /* All page select, port ctrl and wakeup registers use phy address 1 */ ++ hw->phy.addr = 1; ++ ++ /* Select Port Control Registers page */ ++ ret_val = e1000_set_page_igp(hw, (BM_PORT_CTRL_PAGE << IGP_PAGE_SHIFT)); ++ if (ret_val) { ++ e_dbg("Could not set Port Control page\n"); ++ goto out; ++ } ++ ++ ret_val = e1000e_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, phy_reg); ++ if (ret_val) { ++ e_dbg("Could not read PHY register %d.%d\n", ++ BM_PORT_CTRL_PAGE, BM_WUC_ENABLE_REG); ++ goto out; ++ } ++ ++ /* ++ * Enable both PHY wakeup mode and Wakeup register page writes. ++ * Prevent a power state change by disabling ME and Host PHY wakeup. ++ */ ++ temp = *phy_reg; ++ temp |= BM_WUC_ENABLE_BIT; ++ temp &= ~(BM_WUC_ME_WU_BIT | BM_WUC_HOST_WU_BIT); ++ ++ ret_val = e1000e_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, temp); ++ if (ret_val) { ++ e_dbg("Could not write PHY register %d.%d\n", ++ BM_PORT_CTRL_PAGE, BM_WUC_ENABLE_REG); ++ goto out; ++ } ++ ++ /* Select Host Wakeup Registers page */ ++ ret_val = e1000_set_page_igp(hw, (BM_WUC_PAGE << IGP_PAGE_SHIFT)); ++ ++ /* caller now able to write registers on the Wakeup registers page */ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_disable_phy_wakeup_reg_access_bm - disable access to BM wakeup regs ++ * @hw: pointer to the HW structure ++ * @phy_reg: pointer to original contents of BM_WUC_ENABLE_REG ++ * ++ * Restore BM_WUC_ENABLE_REG to its original value. ++ * ++ * Assumes semaphore already acquired and *phy_reg is the contents of the ++ * BM_WUC_ENABLE_REG before register(s) on BM_WUC_PAGE were accessed by ++ * caller. ++ **/ ++s32 e1000_disable_phy_wakeup_reg_access_bm(struct e1000_hw *hw, u16 *phy_reg) ++{ ++ s32 ret_val = 0; ++ ++ /* Select Port Control Registers page */ ++ ret_val = e1000_set_page_igp(hw, (BM_PORT_CTRL_PAGE << IGP_PAGE_SHIFT)); ++ if (ret_val) { ++ e_dbg("Could not set Port Control page\n"); ++ goto out; ++ } ++ ++ /* Restore 769.17 to its original value */ ++ ret_val = e1000e_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, *phy_reg); ++ if (ret_val) ++ e_dbg("Could not restore PHY register %d.%d\n", ++ BM_PORT_CTRL_PAGE, BM_WUC_ENABLE_REG); ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_access_phy_wakeup_reg_bm - Read/write BM PHY wakeup register ++ * @hw: pointer to the HW structure ++ * @offset: register offset to be read or written ++ * @data: pointer to the data to read or write ++ * @read: determines if operation is read or write ++ * @page_set: BM_WUC_PAGE already set and access enabled ++ * ++ * Read the PHY register at offset and store the retrieved information in ++ * data, or write data to PHY register at offset. Note the procedure to ++ * access the PHY wakeup registers is different than reading the other PHY ++ * registers. It works as such: ++ * 1) Set 769.17.2 (page 769, register 17, bit 2) = 1 ++ * 2) Set page to 800 for host (801 if we were manageability) ++ * 3) Write the address using the address opcode (0x11) ++ * 4) Read or write the data using the data opcode (0x12) ++ * 5) Restore 769.17.2 to its original value ++ * ++ * Steps 1 and 2 are done by e1000_enable_phy_wakeup_reg_access_bm() and ++ * step 5 is done by e1000_disable_phy_wakeup_reg_access_bm(). ++ * ++ * Assumes semaphore is already acquired. When page_set==true, assumes ++ * the PHY page is set to BM_WUC_PAGE (i.e. a function in the call stack ++ * is responsible for calls to e1000_[enable|disable]_phy_wakeup_reg_bm()). ++ **/ ++static s32 e1000_access_phy_wakeup_reg_bm(struct e1000_hw *hw, u32 offset, ++ u16 *data, bool read, bool page_set) ++{ ++ s32 ret_val; ++ u16 reg = BM_PHY_REG_NUM(offset); ++ u16 page = BM_PHY_REG_PAGE(offset); ++ u16 phy_reg = 0; ++ ++ /* Gig must be disabled for MDIO accesses to Host Wakeup reg page */ ++ if ((hw->mac.type == e1000_pchlan) && ++ (!(er32(PHY_CTRL) & E1000_PHY_CTRL_GBE_DISABLE))) ++ e_dbg("Attempting to access page %d while gig enabled.\n", ++ page); ++ ++ if (!page_set) { ++ /* Enable access to PHY wakeup registers */ ++ ret_val = e1000_enable_phy_wakeup_reg_access_bm(hw, &phy_reg); ++ if (ret_val) { ++ e_dbg("Could not enable PHY wakeup reg access\n"); ++ goto out; ++ } ++ } ++ ++ e_dbg("Accessing PHY page %d reg 0x%x\n", page, reg); ++ ++ /* Write the Wakeup register page offset value using opcode 0x11 */ ++ ret_val = e1000e_write_phy_reg_mdic(hw, BM_WUC_ADDRESS_OPCODE, reg); ++ if (ret_val) { ++ e_dbg("Could not write address opcode to page %d\n", page); ++ goto out; ++ } ++ ++ if (read) { ++ /* Read the Wakeup register page value using opcode 0x12 */ ++ ret_val = e1000e_read_phy_reg_mdic(hw, BM_WUC_DATA_OPCODE, ++ data); ++ } else { ++ /* Write the Wakeup register page value using opcode 0x12 */ ++ ret_val = e1000e_write_phy_reg_mdic(hw, BM_WUC_DATA_OPCODE, ++ *data); ++ } ++ ++ if (ret_val) { ++ e_dbg("Could not access PHY reg %d.%d\n", page, reg); ++ goto out; ++ } ++ ++ if (!page_set) ++ ret_val = e1000_disable_phy_wakeup_reg_access_bm(hw, &phy_reg); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_power_up_phy_copper - Restore copper link in case of PHY power down ++ * @hw: pointer to the HW structure ++ * ++ * In the case of a PHY power down to save power, or to turn off link during a ++ * driver unload, or wake on lan is not enabled, restore the link to previous ++ * settings. ++ **/ ++void e1000_power_up_phy_copper(struct e1000_hw *hw) ++{ ++ u16 mii_reg = 0; ++ ++ /* The PHY will retain its settings across a power down/up cycle */ ++ e1e_rphy(hw, PHY_CONTROL, &mii_reg); ++ mii_reg &= ~MII_CR_POWER_DOWN; ++ e1e_wphy(hw, PHY_CONTROL, mii_reg); ++} ++ ++/** ++ * e1000_power_down_phy_copper - Restore copper link in case of PHY power down ++ * @hw: pointer to the HW structure ++ * ++ * In the case of a PHY power down to save power, or to turn off link during a ++ * driver unload, or wake on lan is not enabled, restore the link to previous ++ * settings. ++ **/ ++void e1000_power_down_phy_copper(struct e1000_hw *hw) ++{ ++ u16 mii_reg = 0; ++ ++ /* The PHY will retain its settings across a power down/up cycle */ ++ e1e_rphy(hw, PHY_CONTROL, &mii_reg); ++ mii_reg |= MII_CR_POWER_DOWN; ++ e1e_wphy(hw, PHY_CONTROL, mii_reg); ++ usleep_range(1000, 2000); ++} ++ ++/** ++ * e1000e_commit_phy - Soft PHY reset ++ * @hw: pointer to the HW structure ++ * ++ * Performs a soft PHY reset on those that apply. This is a function pointer ++ * entry point called by drivers. ++ **/ ++s32 e1000e_commit_phy(struct e1000_hw *hw) ++{ ++ if (hw->phy.ops.commit) ++ return hw->phy.ops.commit(hw); ++ ++ return 0; ++} ++ ++/** ++ * e1000_set_d0_lplu_state - Sets low power link up state for D0 ++ * @hw: pointer to the HW structure ++ * @active: boolean used to enable/disable lplu ++ * ++ * Success returns 0, Failure returns 1 ++ * ++ * The low power link up (lplu) state is set to the power management level D0 ++ * and SmartSpeed is disabled when active is true, else clear lplu for D0 ++ * and enable Smartspeed. LPLU and Smartspeed are mutually exclusive. LPLU ++ * is used during Dx states where the power conservation is most important. ++ * During driver activity, SmartSpeed should be enabled so performance is ++ * maintained. This is a function pointer entry point called by drivers. ++ **/ ++static s32 e1000_set_d0_lplu_state(struct e1000_hw *hw, bool active) ++{ ++ if (hw->phy.ops.set_d0_lplu_state) ++ return hw->phy.ops.set_d0_lplu_state(hw, active); ++ ++ return 0; ++} ++ ++/** ++ * __e1000_read_phy_reg_hv - Read HV PHY register ++ * @hw: pointer to the HW structure ++ * @offset: register offset to be read ++ * @data: pointer to the read data ++ * @locked: semaphore has already been acquired or not ++ * ++ * Acquires semaphore, if necessary, then reads the PHY register at offset ++ * and stores the retrieved information in data. Release any acquired ++ * semaphore before exiting. ++ **/ ++static s32 __e1000_read_phy_reg_hv(struct e1000_hw *hw, u32 offset, u16 *data, ++ bool locked, bool page_set) ++{ ++ s32 ret_val; ++ u16 page = BM_PHY_REG_PAGE(offset); ++ u16 reg = BM_PHY_REG_NUM(offset); ++ u32 phy_addr = hw->phy.addr = e1000_get_phy_addr_for_hv_page(page); ++ ++ if (!locked) { ++ ret_val = hw->phy.ops.acquire(hw); ++ if (ret_val) ++ return ret_val; ++ } ++ ++ /* Page 800 works differently than the rest so it has its own func */ ++ if (page == BM_WUC_PAGE) { ++ ret_val = e1000_access_phy_wakeup_reg_bm(hw, offset, data, ++ true, page_set); ++ goto out; ++ } ++ ++ if (page > 0 && page < HV_INTC_FC_PAGE_START) { ++ ret_val = e1000_access_phy_debug_regs_hv(hw, offset, ++ data, true); ++ goto out; ++ } ++ ++ if (!page_set) { ++ if (page == HV_INTC_FC_PAGE_START) ++ page = 0; ++ ++ if (reg > MAX_PHY_MULTI_PAGE_REG) { ++ /* Page is shifted left, PHY expects (page x 32) */ ++ ret_val = e1000_set_page_igp(hw, ++ (page << IGP_PAGE_SHIFT)); ++ ++ hw->phy.addr = phy_addr; ++ ++ if (ret_val) ++ goto out; ++ } ++ } ++ ++ e_dbg("reading PHY page %d (or 0x%x shifted) reg 0x%x\n", page, ++ page << IGP_PAGE_SHIFT, reg); ++ ++ ret_val = e1000e_read_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & reg, ++ data); ++out: ++ if (!locked) ++ hw->phy.ops.release(hw); ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_read_phy_reg_hv - Read HV PHY register ++ * @hw: pointer to the HW structure ++ * @offset: register offset to be read ++ * @data: pointer to the read data ++ * ++ * Acquires semaphore then reads the PHY register at offset and stores ++ * the retrieved information in data. Release the acquired semaphore ++ * before exiting. ++ **/ ++s32 e1000_read_phy_reg_hv(struct e1000_hw *hw, u32 offset, u16 *data) ++{ ++ return __e1000_read_phy_reg_hv(hw, offset, data, false, false); ++} ++ ++/** ++ * e1000_read_phy_reg_hv_locked - Read HV PHY register ++ * @hw: pointer to the HW structure ++ * @offset: register offset to be read ++ * @data: pointer to the read data ++ * ++ * Reads the PHY register at offset and stores the retrieved information ++ * in data. Assumes semaphore already acquired. ++ **/ ++s32 e1000_read_phy_reg_hv_locked(struct e1000_hw *hw, u32 offset, u16 *data) ++{ ++ return __e1000_read_phy_reg_hv(hw, offset, data, true, false); ++} ++ ++/** ++ * e1000_read_phy_reg_page_hv - Read HV PHY register ++ * @hw: pointer to the HW structure ++ * @offset: register offset to write to ++ * @data: data to write at register offset ++ * ++ * Reads the PHY register at offset and stores the retrieved information ++ * in data. Assumes semaphore already acquired and page already set. ++ **/ ++s32 e1000_read_phy_reg_page_hv(struct e1000_hw *hw, u32 offset, u16 *data) ++{ ++ return __e1000_read_phy_reg_hv(hw, offset, data, true, true); ++} ++ ++/** ++ * __e1000_write_phy_reg_hv - Write HV PHY register ++ * @hw: pointer to the HW structure ++ * @offset: register offset to write to ++ * @data: data to write at register offset ++ * @locked: semaphore has already been acquired or not ++ * ++ * Acquires semaphore, if necessary, then writes the data to PHY register ++ * at the offset. Release any acquired semaphores before exiting. ++ **/ ++static s32 __e1000_write_phy_reg_hv(struct e1000_hw *hw, u32 offset, u16 data, ++ bool locked, bool page_set) ++{ ++ s32 ret_val; ++ u16 page = BM_PHY_REG_PAGE(offset); ++ u16 reg = BM_PHY_REG_NUM(offset); ++ u32 phy_addr = hw->phy.addr = e1000_get_phy_addr_for_hv_page(page); ++ ++ if (!locked) { ++ ret_val = hw->phy.ops.acquire(hw); ++ if (ret_val) ++ return ret_val; ++ } ++ ++ /* Page 800 works differently than the rest so it has its own func */ ++ if (page == BM_WUC_PAGE) { ++ ret_val = e1000_access_phy_wakeup_reg_bm(hw, offset, &data, ++ false, page_set); ++ goto out; ++ } ++ ++ if (page > 0 && page < HV_INTC_FC_PAGE_START) { ++ ret_val = e1000_access_phy_debug_regs_hv(hw, offset, ++ &data, false); ++ goto out; ++ } ++ ++ if (!page_set) { ++ if (page == HV_INTC_FC_PAGE_START) ++ page = 0; ++ ++ /* ++ * Workaround MDIO accesses being disabled after entering IEEE ++ * Power Down (when bit 11 of the PHY Control register is set) ++ */ ++ if ((hw->phy.type == e1000_phy_82578) && ++ (hw->phy.revision >= 1) && ++ (hw->phy.addr == 2) && ++ ((MAX_PHY_REG_ADDRESS & reg) == 0) && (data & (1 << 11))) { ++ u16 data2 = 0x7EFF; ++ ret_val = e1000_access_phy_debug_regs_hv(hw, ++ (1 << 6) | 0x3, ++ &data2, false); ++ if (ret_val) ++ goto out; ++ } ++ ++ if (reg > MAX_PHY_MULTI_PAGE_REG) { ++ /* Page is shifted left, PHY expects (page x 32) */ ++ ret_val = e1000_set_page_igp(hw, ++ (page << IGP_PAGE_SHIFT)); ++ ++ hw->phy.addr = phy_addr; ++ ++ if (ret_val) ++ goto out; ++ } ++ } ++ ++ e_dbg("writing PHY page %d (or 0x%x shifted) reg 0x%x\n", page, ++ page << IGP_PAGE_SHIFT, reg); ++ ++ ret_val = e1000e_write_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & reg, ++ data); ++ ++out: ++ if (!locked) ++ hw->phy.ops.release(hw); ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_write_phy_reg_hv - Write HV PHY register ++ * @hw: pointer to the HW structure ++ * @offset: register offset to write to ++ * @data: data to write at register offset ++ * ++ * Acquires semaphore then writes the data to PHY register at the offset. ++ * Release the acquired semaphores before exiting. ++ **/ ++s32 e1000_write_phy_reg_hv(struct e1000_hw *hw, u32 offset, u16 data) ++{ ++ return __e1000_write_phy_reg_hv(hw, offset, data, false, false); ++} ++ ++/** ++ * e1000_write_phy_reg_hv_locked - Write HV PHY register ++ * @hw: pointer to the HW structure ++ * @offset: register offset to write to ++ * @data: data to write at register offset ++ * ++ * Writes the data to PHY register at the offset. Assumes semaphore ++ * already acquired. ++ **/ ++s32 e1000_write_phy_reg_hv_locked(struct e1000_hw *hw, u32 offset, u16 data) ++{ ++ return __e1000_write_phy_reg_hv(hw, offset, data, true, false); ++} ++ ++/** ++ * e1000_write_phy_reg_page_hv - Write HV PHY register ++ * @hw: pointer to the HW structure ++ * @offset: register offset to write to ++ * @data: data to write at register offset ++ * ++ * Writes the data to PHY register at the offset. Assumes semaphore ++ * already acquired and page already set. ++ **/ ++s32 e1000_write_phy_reg_page_hv(struct e1000_hw *hw, u32 offset, u16 data) ++{ ++ return __e1000_write_phy_reg_hv(hw, offset, data, true, true); ++} ++ ++/** ++ * @brief Get PHY address based on page ++ * @param page page to be accessed ++ * @return PHY address ++ */ ++static u32 e1000_get_phy_addr_for_hv_page(u32 page) ++{ ++ u32 phy_addr = 2; ++ ++ if (page >= HV_INTC_FC_PAGE_START) ++ phy_addr = 1; ++ ++ return phy_addr; ++} ++ ++/** ++ * e1000_access_phy_debug_regs_hv - Read HV PHY vendor specific high registers ++ * @hw: pointer to the HW structure ++ * @offset: register offset to be read or written ++ * @data: pointer to the data to be read or written ++ * @read: determines if operation is read or write ++ * ++ * Reads the PHY register at offset and stores the retreived information ++ * in data. Assumes semaphore already acquired. Note that the procedure ++ * to access these regs uses the address port and data port to read/write. ++ * These accesses done with PHY address 2 and without using pages. ++ **/ ++static s32 e1000_access_phy_debug_regs_hv(struct e1000_hw *hw, u32 offset, ++ u16 *data, bool read) ++{ ++ s32 ret_val; ++ u32 addr_reg = 0; ++ u32 data_reg = 0; ++ ++ /* This takes care of the difference with desktop vs mobile phy */ ++ addr_reg = (hw->phy.type == e1000_phy_82578) ? ++ I82578_ADDR_REG : I82577_ADDR_REG; ++ data_reg = addr_reg + 1; ++ ++ /* All operations in this function are phy address 2 */ ++ hw->phy.addr = 2; ++ ++ /* masking with 0x3F to remove the page from offset */ ++ ret_val = e1000e_write_phy_reg_mdic(hw, addr_reg, (u16)offset & 0x3F); ++ if (ret_val) { ++ e_dbg("Could not write the Address Offset port register\n"); ++ goto out; ++ } ++ ++ /* Read or write the data value next */ ++ if (read) ++ ret_val = e1000e_read_phy_reg_mdic(hw, data_reg, data); ++ else ++ ret_val = e1000e_write_phy_reg_mdic(hw, data_reg, *data); ++ ++ if (ret_val) { ++ e_dbg("Could not access the Data port register\n"); ++ goto out; ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_link_stall_workaround_hv - Si workaround ++ * @hw: pointer to the HW structure ++ * ++ * This function works around a Si bug where the link partner can get ++ * a link up indication before the PHY does. If small packets are sent ++ * by the link partner they can be placed in the packet buffer without ++ * being properly accounted for by the PHY and will stall preventing ++ * further packets from being received. The workaround is to clear the ++ * packet buffer after the PHY detects link up. ++ **/ ++s32 e1000_link_stall_workaround_hv(struct e1000_hw *hw) ++{ ++ s32 ret_val = 0; ++ u16 data; ++ ++ if (hw->phy.type != e1000_phy_82578) ++ goto out; ++ ++ /* Do not apply workaround if in PHY loopback bit 14 set */ ++ e1e_rphy(hw, PHY_CONTROL, &data); ++ if (data & PHY_CONTROL_LB) ++ goto out; ++ ++ /* check if link is up and at 1Gbps */ ++ ret_val = e1e_rphy(hw, BM_CS_STATUS, &data); ++ if (ret_val) ++ goto out; ++ ++ data &= BM_CS_STATUS_LINK_UP | ++ BM_CS_STATUS_RESOLVED | ++ BM_CS_STATUS_SPEED_MASK; ++ ++ if (data != (BM_CS_STATUS_LINK_UP | ++ BM_CS_STATUS_RESOLVED | ++ BM_CS_STATUS_SPEED_1000)) ++ goto out; ++ ++ mdelay(200); ++ ++ /* flush the packets in the fifo buffer */ ++ ret_val = e1e_wphy(hw, HV_MUX_DATA_CTRL, HV_MUX_DATA_CTRL_GEN_TO_MAC | ++ HV_MUX_DATA_CTRL_FORCE_SPEED); ++ if (ret_val) ++ goto out; ++ ++ ret_val = e1e_wphy(hw, HV_MUX_DATA_CTRL, HV_MUX_DATA_CTRL_GEN_TO_MAC); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_check_polarity_82577 - Checks the polarity. ++ * @hw: pointer to the HW structure ++ * ++ * Success returns 0, Failure returns -E1000_ERR_PHY (-2) ++ * ++ * Polarity is determined based on the PHY specific status register. ++ **/ ++s32 e1000_check_polarity_82577(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u16 data; ++ ++ ret_val = e1e_rphy(hw, I82577_PHY_STATUS_2, &data); ++ ++ if (!ret_val) ++ phy->cable_polarity = (data & I82577_PHY_STATUS2_REV_POLARITY) ++ ? e1000_rev_polarity_reversed ++ : e1000_rev_polarity_normal; ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_phy_force_speed_duplex_82577 - Force speed/duplex for I82577 PHY ++ * @hw: pointer to the HW structure ++ * ++ * Calls the PHY setup function to force speed and duplex. ++ **/ ++s32 e1000_phy_force_speed_duplex_82577(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u16 phy_data; ++ bool link; ++ ++ ret_val = e1e_rphy(hw, PHY_CONTROL, &phy_data); ++ if (ret_val) ++ goto out; ++ ++ e1000e_phy_force_speed_duplex_setup(hw, &phy_data); ++ ++ ret_val = e1e_wphy(hw, PHY_CONTROL, phy_data); ++ if (ret_val) ++ goto out; ++ ++ udelay(1); ++ ++ if (phy->autoneg_wait_to_complete) { ++ e_dbg("Waiting for forced speed/duplex link on 82577 phy\n"); ++ ++ ret_val = e1000e_phy_has_link_generic(hw, ++ PHY_FORCE_LIMIT, ++ 100000, ++ &link); ++ if (ret_val) ++ goto out; ++ ++ if (!link) ++ e_dbg("Link taking longer than expected.\n"); ++ ++ /* Try once more */ ++ ret_val = e1000e_phy_has_link_generic(hw, ++ PHY_FORCE_LIMIT, ++ 100000, ++ &link); ++ if (ret_val) ++ goto out; ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_get_phy_info_82577 - Retrieve I82577 PHY information ++ * @hw: pointer to the HW structure ++ * ++ * Read PHY status to determine if link is up. If link is up, then ++ * set/determine 10base-T extended distance and polarity correction. Read ++ * PHY port status to determine MDI/MDIx and speed. Based on the speed, ++ * determine on the cable length, local and remote receiver. ++ **/ ++s32 e1000_get_phy_info_82577(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u16 data; ++ bool link; ++ ++ ret_val = e1000e_phy_has_link_generic(hw, 1, 0, &link); ++ if (ret_val) ++ goto out; ++ ++ if (!link) { ++ e_dbg("Phy info is only valid if link is up\n"); ++ ret_val = -E1000_ERR_CONFIG; ++ goto out; ++ } ++ ++ phy->polarity_correction = true; ++ ++ ret_val = e1000_check_polarity_82577(hw); ++ if (ret_val) ++ goto out; ++ ++ ret_val = e1e_rphy(hw, I82577_PHY_STATUS_2, &data); ++ if (ret_val) ++ goto out; ++ ++ phy->is_mdix = (data & I82577_PHY_STATUS2_MDIX) ? true : false; ++ ++ if ((data & I82577_PHY_STATUS2_SPEED_MASK) == ++ I82577_PHY_STATUS2_SPEED_1000MBPS) { ++ ret_val = hw->phy.ops.get_cable_length(hw); ++ if (ret_val) ++ goto out; ++ ++ ret_val = e1e_rphy(hw, PHY_1000T_STATUS, &data); ++ if (ret_val) ++ goto out; ++ ++ phy->local_rx = (data & SR_1000T_LOCAL_RX_STATUS) ++ ? e1000_1000t_rx_status_ok ++ : e1000_1000t_rx_status_not_ok; ++ ++ phy->remote_rx = (data & SR_1000T_REMOTE_RX_STATUS) ++ ? e1000_1000t_rx_status_ok ++ : e1000_1000t_rx_status_not_ok; ++ } else { ++ phy->cable_length = E1000_CABLE_LENGTH_UNDEFINED; ++ phy->local_rx = e1000_1000t_rx_status_undefined; ++ phy->remote_rx = e1000_1000t_rx_status_undefined; ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_get_cable_length_82577 - Determine cable length for 82577 PHY ++ * @hw: pointer to the HW structure ++ * ++ * Reads the diagnostic status register and verifies result is valid before ++ * placing it in the phy_cable_length field. ++ **/ ++s32 e1000_get_cable_length_82577(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u16 phy_data, length; ++ ++ ret_val = e1e_rphy(hw, I82577_PHY_DIAG_STATUS, &phy_data); ++ if (ret_val) ++ goto out; ++ ++ length = (phy_data & I82577_DSTATUS_CABLE_LENGTH) >> ++ I82577_DSTATUS_CABLE_LENGTH_SHIFT; ++ ++ if (length == E1000_CABLE_LENGTH_UNDEFINED) ++ ret_val = -E1000_ERR_PHY; ++ ++ phy->cable_length = length; ++ ++out: ++ return ret_val; ++} +--- linux/drivers/xenomai/net/drivers/e1000e/e1000.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/e1000e/e1000.h 2021-04-07 16:01:27.224634170 +0800 +@@ -0,0 +1,764 @@ ++/******************************************************************************* ++ ++ Intel PRO/1000 Linux driver ++ Copyright(c) 1999 - 2011 Intel Corporation. ++ ++ This program is free software; you can redistribute it and/or modify it ++ under the terms and conditions of the GNU General Public License, ++ version 2, as published by the Free Software Foundation. ++ ++ This program is distributed in the hope it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ You should have received a copy of the GNU General Public License along with ++ this program; if not, write to the Free Software Foundation, Inc., ++ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. ++ ++ The full GNU General Public License is included in this distribution in ++ the file called "COPYING". ++ ++ Contact Information: ++ Linux NICS ++ e1000-devel Mailing List ++ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 ++ ++*******************************************************************************/ ++ ++/* Linux PRO/1000 Ethernet Driver main header file */ ++ ++#ifndef _E1000_H_ ++#define _E1000_H_ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++ ++#include "hw.h" ++ ++struct e1000_info; ++ ++#define e_dbg(format, arg...) \ ++ pr_debug(format, ## arg) ++#define e_err(format, arg...) \ ++ pr_err(format, ## arg) ++#define e_info(format, arg...) \ ++ pr_info(format, ## arg) ++#define e_warn(format, arg...) \ ++ pr_warn(format, ## arg) ++#define e_notice(format, arg...) \ ++ pr_notice(format, ## arg) ++ ++ ++/* Interrupt modes, as used by the IntMode parameter */ ++#define E1000E_INT_MODE_LEGACY 0 ++#define E1000E_INT_MODE_MSI 1 ++#define E1000E_INT_MODE_MSIX 2 ++ ++/* Tx/Rx descriptor defines */ ++#define E1000_DEFAULT_TXD 256 ++#define E1000_MAX_TXD 4096 ++#define E1000_MIN_TXD 64 ++ ++#define E1000_DEFAULT_RXD 256 ++#define E1000_MAX_RXD 4096 ++#define E1000_MIN_RXD 64 ++ ++#define E1000_MIN_ITR_USECS 10 /* 100000 irq/sec */ ++#define E1000_MAX_ITR_USECS 10000 /* 100 irq/sec */ ++ ++/* Early Receive defines */ ++#define E1000_ERT_2048 0x100 ++ ++#define E1000_FC_PAUSE_TIME 0x0680 /* 858 usec */ ++ ++/* How many Tx Descriptors do we need to call netif_wake_queue ? */ ++/* How many Rx Buffers do we bundle into one write to the hardware ? */ ++#define E1000_RX_BUFFER_WRITE 16 /* Must be power of 2 */ ++ ++#define AUTO_ALL_MODES 0 ++#define E1000_EEPROM_APME 0x0400 ++ ++#define E1000_MNG_VLAN_NONE (-1) ++ ++/* Number of packet split data buffers (not including the header buffer) */ ++#define PS_PAGE_BUFFERS (MAX_PS_BUFFERS - 1) ++ ++#define DEFAULT_JUMBO 9234 ++ ++/* BM/HV Specific Registers */ ++#define BM_PORT_CTRL_PAGE 769 ++ ++#define PHY_UPPER_SHIFT 21 ++#define BM_PHY_REG(page, reg) \ ++ (((reg) & MAX_PHY_REG_ADDRESS) |\ ++ (((page) & 0xFFFF) << PHY_PAGE_SHIFT) |\ ++ (((reg) & ~MAX_PHY_REG_ADDRESS) << (PHY_UPPER_SHIFT - PHY_PAGE_SHIFT))) ++ ++/* PHY Wakeup Registers and defines */ ++#define BM_PORT_GEN_CFG PHY_REG(BM_PORT_CTRL_PAGE, 17) ++#define BM_RCTL PHY_REG(BM_WUC_PAGE, 0) ++#define BM_WUC PHY_REG(BM_WUC_PAGE, 1) ++#define BM_WUFC PHY_REG(BM_WUC_PAGE, 2) ++#define BM_WUS PHY_REG(BM_WUC_PAGE, 3) ++#define BM_RAR_L(_i) (BM_PHY_REG(BM_WUC_PAGE, 16 + ((_i) << 2))) ++#define BM_RAR_M(_i) (BM_PHY_REG(BM_WUC_PAGE, 17 + ((_i) << 2))) ++#define BM_RAR_H(_i) (BM_PHY_REG(BM_WUC_PAGE, 18 + ((_i) << 2))) ++#define BM_RAR_CTRL(_i) (BM_PHY_REG(BM_WUC_PAGE, 19 + ((_i) << 2))) ++#define BM_MTA(_i) (BM_PHY_REG(BM_WUC_PAGE, 128 + ((_i) << 1))) ++ ++#define BM_RCTL_UPE 0x0001 /* Unicast Promiscuous Mode */ ++#define BM_RCTL_MPE 0x0002 /* Multicast Promiscuous Mode */ ++#define BM_RCTL_MO_SHIFT 3 /* Multicast Offset Shift */ ++#define BM_RCTL_MO_MASK (3 << 3) /* Multicast Offset Mask */ ++#define BM_RCTL_BAM 0x0020 /* Broadcast Accept Mode */ ++#define BM_RCTL_PMCF 0x0040 /* Pass MAC Control Frames */ ++#define BM_RCTL_RFCE 0x0080 /* Rx Flow Control Enable */ ++ ++#define HV_STATS_PAGE 778 ++#define HV_SCC_UPPER PHY_REG(HV_STATS_PAGE, 16) /* Single Collision Count */ ++#define HV_SCC_LOWER PHY_REG(HV_STATS_PAGE, 17) ++#define HV_ECOL_UPPER PHY_REG(HV_STATS_PAGE, 18) /* Excessive Coll. Count */ ++#define HV_ECOL_LOWER PHY_REG(HV_STATS_PAGE, 19) ++#define HV_MCC_UPPER PHY_REG(HV_STATS_PAGE, 20) /* Multiple Coll. Count */ ++#define HV_MCC_LOWER PHY_REG(HV_STATS_PAGE, 21) ++#define HV_LATECOL_UPPER PHY_REG(HV_STATS_PAGE, 23) /* Late Collision Count */ ++#define HV_LATECOL_LOWER PHY_REG(HV_STATS_PAGE, 24) ++#define HV_COLC_UPPER PHY_REG(HV_STATS_PAGE, 25) /* Collision Count */ ++#define HV_COLC_LOWER PHY_REG(HV_STATS_PAGE, 26) ++#define HV_DC_UPPER PHY_REG(HV_STATS_PAGE, 27) /* Defer Count */ ++#define HV_DC_LOWER PHY_REG(HV_STATS_PAGE, 28) ++#define HV_TNCRS_UPPER PHY_REG(HV_STATS_PAGE, 29) /* Transmit with no CRS */ ++#define HV_TNCRS_LOWER PHY_REG(HV_STATS_PAGE, 30) ++ ++#define E1000_FCRTV_PCH 0x05F40 /* PCH Flow Control Refresh Timer Value */ ++ ++/* BM PHY Copper Specific Status */ ++#define BM_CS_STATUS 17 ++#define BM_CS_STATUS_LINK_UP 0x0400 ++#define BM_CS_STATUS_RESOLVED 0x0800 ++#define BM_CS_STATUS_SPEED_MASK 0xC000 ++#define BM_CS_STATUS_SPEED_1000 0x8000 ++ ++/* 82577 Mobile Phy Status Register */ ++#define HV_M_STATUS 26 ++#define HV_M_STATUS_AUTONEG_COMPLETE 0x1000 ++#define HV_M_STATUS_SPEED_MASK 0x0300 ++#define HV_M_STATUS_SPEED_1000 0x0200 ++#define HV_M_STATUS_LINK_UP 0x0040 ++ ++#define E1000_ICH_FWSM_PCIM2PCI 0x01000000 /* ME PCIm-to-PCI active */ ++#define E1000_ICH_FWSM_PCIM2PCI_COUNT 2000 ++ ++/* Time to wait before putting the device into D3 if there's no link (in ms). */ ++#define LINK_TIMEOUT 100 ++ ++#define DEFAULT_RDTR 0 ++#define DEFAULT_RADV 8 ++#define BURST_RDTR 0x20 ++#define BURST_RADV 0x20 ++ ++/* ++ * in the case of WTHRESH, it appears at least the 82571/2 hardware ++ * writes back 4 descriptors when WTHRESH=5, and 3 descriptors when ++ * WTHRESH=4, and since we want 64 bytes at a time written back, set ++ * it to 5 ++ */ ++#define E1000_TXDCTL_DMA_BURST_ENABLE \ ++ (E1000_TXDCTL_GRAN | /* set descriptor granularity */ \ ++ E1000_TXDCTL_COUNT_DESC | \ ++ (5 << 16) | /* wthresh must be +1 more than desired */\ ++ (1 << 8) | /* hthresh */ \ ++ 0x1f) /* pthresh */ ++ ++#define E1000_RXDCTL_DMA_BURST_ENABLE \ ++ (0x01000000 | /* set descriptor granularity */ \ ++ (4 << 16) | /* set writeback threshold */ \ ++ (4 << 8) | /* set prefetch threshold */ \ ++ 0x20) /* set hthresh */ ++ ++#define E1000_TIDV_FPD (1 << 31) ++#define E1000_RDTR_FPD (1 << 31) ++ ++enum e1000_boards { ++ board_82571, ++ board_82572, ++ board_82573, ++ board_82574, ++ board_82583, ++ board_80003es2lan, ++ board_ich8lan, ++ board_ich9lan, ++ board_ich10lan, ++ board_pchlan, ++ board_pch2lan, ++ board_pch_lpt, ++}; ++ ++struct e1000_ps_page { ++ struct page *page; ++ u64 dma; /* must be u64 - written to hw */ ++}; ++ ++/* ++ * wrappers around a pointer to a socket buffer, ++ * so a DMA handle can be stored along with the buffer ++ */ ++struct e1000_buffer { ++ dma_addr_t dma; ++ struct rtskb *skb; ++ union { ++ /* Tx */ ++ struct { ++ unsigned long time_stamp; ++ u16 length; ++ u16 next_to_watch; ++ unsigned int segs; ++ unsigned int bytecount; ++ u16 mapped_as_page; ++ }; ++ /* Rx */ ++ struct { ++ /* arrays of page information for packet split */ ++ struct e1000_ps_page *ps_pages; ++ struct page *page; ++ }; ++ }; ++}; ++ ++struct e1000_ring { ++ void *desc; /* pointer to ring memory */ ++ dma_addr_t dma; /* phys address of ring */ ++ unsigned int size; /* length of ring in bytes */ ++ unsigned int count; /* number of desc. in ring */ ++ ++ u16 next_to_use; ++ u16 next_to_clean; ++ ++ u16 head; ++ u16 tail; ++ ++ /* array of buffer information structs */ ++ struct e1000_buffer *buffer_info; ++ ++ char name[IFNAMSIZ + 5]; ++ u32 ims_val; ++ u32 itr_val; ++ u16 itr_register; ++ int set_itr; ++ ++ struct rtskb *rx_skb_top; ++ ++ rtdm_lock_t lock; ++}; ++ ++/* PHY register snapshot values */ ++struct e1000_phy_regs { ++ u16 bmcr; /* basic mode control register */ ++ u16 bmsr; /* basic mode status register */ ++ u16 advertise; /* auto-negotiation advertisement */ ++ u16 lpa; /* link partner ability register */ ++ u16 expansion; /* auto-negotiation expansion reg */ ++ u16 ctrl1000; /* 1000BASE-T control register */ ++ u16 stat1000; /* 1000BASE-T status register */ ++ u16 estatus; /* extended status register */ ++}; ++ ++/* board specific private data structure */ ++struct e1000_adapter { ++ struct timer_list watchdog_timer; ++ struct timer_list phy_info_timer; ++ struct timer_list blink_timer; ++ ++ struct work_struct reset_task; ++ struct work_struct watchdog_task; ++ ++ const struct e1000_info *ei; ++ ++ unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; ++ u32 bd_number; ++ u32 rx_buffer_len; ++ u16 mng_vlan_id; ++ u16 link_speed; ++ u16 link_duplex; ++ u16 eeprom_vers; ++ ++ /* track device up/down/testing state */ ++ unsigned long state; ++ ++ /* Interrupt Throttle Rate */ ++ u32 itr; ++ u32 itr_setting; ++ u16 tx_itr; ++ u16 rx_itr; ++ ++ /* ++ * Tx ++ */ ++ struct e1000_ring *tx_ring /* One per active queue */ ++ ____cacheline_aligned_in_smp; ++ ++ struct napi_struct napi; ++ ++ unsigned int restart_queue; ++ u32 txd_cmd; ++ ++ bool detect_tx_hung; ++ u8 tx_timeout_factor; ++ ++ u32 tx_int_delay; ++ u32 tx_abs_int_delay; ++ ++ unsigned int total_tx_bytes; ++ unsigned int total_tx_packets; ++ unsigned int total_rx_bytes; ++ unsigned int total_rx_packets; ++ ++ /* Tx stats */ ++ u64 tpt_old; ++ u64 colc_old; ++ u32 gotc; ++ u64 gotc_old; ++ u32 tx_timeout_count; ++ u32 tx_fifo_head; ++ u32 tx_head_addr; ++ u32 tx_fifo_size; ++ u32 tx_dma_failed; ++ ++ /* ++ * Rx ++ */ ++ bool (*clean_rx) (struct e1000_adapter *adapter, ++ nanosecs_abs_t *time_stamp) ++ ____cacheline_aligned_in_smp; ++ void (*alloc_rx_buf) (struct e1000_adapter *adapter, ++ int cleaned_count, gfp_t gfp); ++ struct e1000_ring *rx_ring; ++ ++ u32 rx_int_delay; ++ u32 rx_abs_int_delay; ++ ++ /* Rx stats */ ++ u64 hw_csum_err; ++ u64 hw_csum_good; ++ u64 rx_hdr_split; ++ u32 gorc; ++ u64 gorc_old; ++ u32 alloc_rx_buff_failed; ++ u32 rx_dma_failed; ++ ++ unsigned int rx_ps_pages; ++ u16 rx_ps_bsize0; ++ u32 max_frame_size; ++ u32 min_frame_size; ++ ++ /* OS defined structs */ ++ struct rtnet_device *netdev; ++ struct pci_dev *pdev; ++ ++ rtdm_irq_t irq_handle; ++ rtdm_irq_t rx_irq_handle; ++ rtdm_irq_t tx_irq_handle; ++ rtdm_nrtsig_t mod_timer_sig; ++ rtdm_nrtsig_t downshift_sig; ++ ++ /* structs defined in e1000_hw.h */ ++ struct e1000_hw hw; ++ ++ spinlock_t stats64_lock; ++ struct e1000_hw_stats stats; ++ struct e1000_phy_info phy_info; ++ struct e1000_phy_stats phy_stats; ++ ++ /* Snapshot of PHY registers */ ++ struct e1000_phy_regs phy_regs; ++ ++ struct e1000_ring test_tx_ring; ++ struct e1000_ring test_rx_ring; ++ u32 test_icr; ++ ++ u32 msg_enable; ++ unsigned int num_vectors; ++ struct msix_entry *msix_entries; ++ int int_mode; ++ u32 eiac_mask; ++ ++ u32 eeprom_wol; ++ u32 wol; ++ u32 pba; ++ u32 max_hw_frame_size; ++ ++ bool fc_autoneg; ++ ++ unsigned int flags; ++ unsigned int flags2; ++ struct work_struct downshift_task; ++ struct work_struct update_phy_task; ++ struct work_struct print_hang_task; ++ ++ bool idle_check; ++ int phy_hang_count; ++}; ++ ++struct e1000_info { ++ enum e1000_mac_type mac; ++ unsigned int flags; ++ unsigned int flags2; ++ u32 pba; ++ u32 max_hw_frame_size; ++ s32 (*get_variants)(struct e1000_adapter *); ++ const struct e1000_mac_operations *mac_ops; ++ const struct e1000_phy_operations *phy_ops; ++ const struct e1000_nvm_operations *nvm_ops; ++}; ++ ++/* hardware capability, feature, and workaround flags */ ++#define FLAG_HAS_AMT (1 << 0) ++#define FLAG_HAS_FLASH (1 << 1) ++#define FLAG_HAS_HW_VLAN_FILTER (1 << 2) ++#define FLAG_HAS_WOL (1 << 3) ++#define FLAG_HAS_ERT (1 << 4) ++#define FLAG_HAS_CTRLEXT_ON_LOAD (1 << 5) ++#define FLAG_HAS_SWSM_ON_LOAD (1 << 6) ++#define FLAG_HAS_JUMBO_FRAMES (1 << 7) ++#define FLAG_READ_ONLY_NVM (1 << 8) ++#define FLAG_IS_ICH (1 << 9) ++#define FLAG_HAS_MSIX (1 << 10) ++#define FLAG_HAS_SMART_POWER_DOWN (1 << 11) ++#define FLAG_IS_QUAD_PORT_A (1 << 12) ++#define FLAG_IS_QUAD_PORT (1 << 13) ++#define FLAG_TIPG_MEDIUM_FOR_80003ESLAN (1 << 14) ++#define FLAG_APME_IN_WUC (1 << 15) ++#define FLAG_APME_IN_CTRL3 (1 << 16) ++#define FLAG_APME_CHECK_PORT_B (1 << 17) ++#define FLAG_DISABLE_FC_PAUSE_TIME (1 << 18) ++#define FLAG_NO_WAKE_UCAST (1 << 19) ++#define FLAG_MNG_PT_ENABLED (1 << 20) ++#define FLAG_RESET_OVERWRITES_LAA (1 << 21) ++#define FLAG_TARC_SPEED_MODE_BIT (1 << 22) ++#define FLAG_TARC_SET_BIT_ZERO (1 << 23) ++#define FLAG_RX_NEEDS_RESTART (1 << 24) ++#define FLAG_LSC_GIG_SPEED_DROP (1 << 25) ++#define FLAG_SMART_POWER_DOWN (1 << 26) ++#define FLAG_MSI_ENABLED (1 << 27) ++/* reserved (1 << 28) */ ++#define FLAG_TSO_FORCE (1 << 29) ++#define FLAG_RX_RESTART_NOW (1 << 30) ++#define FLAG_MSI_TEST_FAILED (1 << 31) ++ ++#define FLAG2_CRC_STRIPPING (1 << 0) ++#define FLAG2_HAS_PHY_WAKEUP (1 << 1) ++#define FLAG2_IS_DISCARDING (1 << 2) ++#define FLAG2_DISABLE_ASPM_L1 (1 << 3) ++#define FLAG2_HAS_PHY_STATS (1 << 4) ++#define FLAG2_HAS_EEE (1 << 5) ++#define FLAG2_DMA_BURST (1 << 6) ++#define FLAG2_DISABLE_ASPM_L0S (1 << 7) ++#define FLAG2_DISABLE_AIM (1 << 8) ++#define FLAG2_CHECK_PHY_HANG (1 << 9) ++#define FLAG2_NO_DISABLE_RX (1 << 10) ++#define FLAG2_PCIM2PCI_ARBITER_WA (1 << 11) ++ ++#define E1000_RX_DESC_PS(R, i) \ ++ (&(((union e1000_rx_desc_packet_split *)((R).desc))[i])) ++#define E1000_RX_DESC_EXT(R, i) \ ++ (&(((union e1000_rx_desc_extended *)((R).desc))[i])) ++#define E1000_GET_DESC(R, i, type) (&(((struct type *)((R).desc))[i])) ++#define E1000_TX_DESC(R, i) E1000_GET_DESC(R, i, e1000_tx_desc) ++#define E1000_CONTEXT_DESC(R, i) E1000_GET_DESC(R, i, e1000_context_desc) ++ ++enum e1000_state_t { ++ __E1000_TESTING, ++ __E1000_RESETTING, ++ __E1000_ACCESS_SHARED_RESOURCE, ++ __E1000_DOWN ++}; ++ ++enum latency_range { ++ lowest_latency = 0, ++ low_latency = 1, ++ bulk_latency = 2, ++ latency_invalid = 255 ++}; ++ ++extern char e1000e_driver_name[]; ++extern const char e1000e_driver_version[]; ++ ++extern void e1000e_check_options(struct e1000_adapter *adapter); ++extern void e1000e_set_ethtool_ops(struct net_device *netdev); ++ ++extern int e1000e_up(struct e1000_adapter *adapter); ++extern void e1000e_down(struct e1000_adapter *adapter); ++extern void e1000e_reinit_locked(struct e1000_adapter *adapter); ++extern void e1000e_reset(struct e1000_adapter *adapter); ++extern void e1000e_power_up_phy(struct e1000_adapter *adapter); ++extern int e1000e_setup_rx_resources(struct e1000_adapter *adapter); ++extern int e1000e_setup_tx_resources(struct e1000_adapter *adapter); ++extern void e1000e_free_rx_resources(struct e1000_adapter *adapter); ++extern void e1000e_free_tx_resources(struct e1000_adapter *adapter); ++extern struct rtnl_link_stats64 *e1000e_get_stats64(struct net_device *netdev, ++ struct rtnl_link_stats64 ++ *stats); ++extern void e1000e_set_interrupt_capability(struct e1000_adapter *adapter); ++extern void e1000e_reset_interrupt_capability(struct e1000_adapter *adapter); ++extern void e1000e_get_hw_control(struct e1000_adapter *adapter); ++extern void e1000e_release_hw_control(struct e1000_adapter *adapter); ++ ++extern unsigned int copybreak; ++ ++extern char *e1000e_get_hw_dev_name(struct e1000_hw *hw); ++ ++extern const struct e1000_info e1000_82571_info; ++extern const struct e1000_info e1000_82572_info; ++extern const struct e1000_info e1000_82573_info; ++extern const struct e1000_info e1000_82574_info; ++extern const struct e1000_info e1000_82583_info; ++extern const struct e1000_info e1000_ich8_info; ++extern const struct e1000_info e1000_ich9_info; ++extern const struct e1000_info e1000_ich10_info; ++extern const struct e1000_info e1000_pch_info; ++extern const struct e1000_info e1000_pch2_info; ++extern const struct e1000_info e1000_pch_lpt_info; ++extern const struct e1000_info e1000_es2_info; ++ ++extern s32 e1000_read_pba_string_generic(struct e1000_hw *hw, u8 *pba_num, ++ u32 pba_num_size); ++ ++extern s32 e1000e_commit_phy(struct e1000_hw *hw); ++ ++extern bool e1000e_enable_mng_pass_thru(struct e1000_hw *hw); ++ ++extern bool e1000e_get_laa_state_82571(struct e1000_hw *hw); ++extern void e1000e_set_laa_state_82571(struct e1000_hw *hw, bool state); ++ ++extern void e1000e_write_protect_nvm_ich8lan(struct e1000_hw *hw); ++extern void e1000e_set_kmrn_lock_loss_workaround_ich8lan(struct e1000_hw *hw, ++ bool state); ++extern void e1000e_igp3_phy_powerdown_workaround_ich8lan(struct e1000_hw *hw); ++extern void e1000e_gig_downshift_workaround_ich8lan(struct e1000_hw *hw); ++extern void e1000_suspend_workarounds_ich8lan(struct e1000_hw *hw); ++extern void e1000_resume_workarounds_pchlan(struct e1000_hw *hw); ++extern s32 e1000_configure_k1_ich8lan(struct e1000_hw *hw, bool k1_enable); ++extern s32 e1000_lv_jumbo_workaround_ich8lan(struct e1000_hw *hw, bool enable); ++extern void e1000_copy_rx_addrs_to_phy_ich8lan(struct e1000_hw *hw); ++ ++extern s32 e1000e_check_for_copper_link(struct e1000_hw *hw); ++extern s32 e1000e_check_for_fiber_link(struct e1000_hw *hw); ++extern s32 e1000e_check_for_serdes_link(struct e1000_hw *hw); ++extern s32 e1000e_setup_led_generic(struct e1000_hw *hw); ++extern s32 e1000e_cleanup_led_generic(struct e1000_hw *hw); ++extern s32 e1000e_led_on_generic(struct e1000_hw *hw); ++extern s32 e1000e_led_off_generic(struct e1000_hw *hw); ++extern s32 e1000e_get_bus_info_pcie(struct e1000_hw *hw); ++extern void e1000_set_lan_id_multi_port_pcie(struct e1000_hw *hw); ++extern void e1000_set_lan_id_single_port(struct e1000_hw *hw); ++extern s32 e1000e_get_speed_and_duplex_copper(struct e1000_hw *hw, u16 *speed, u16 *duplex); ++extern s32 e1000e_get_speed_and_duplex_fiber_serdes(struct e1000_hw *hw, u16 *speed, u16 *duplex); ++extern s32 e1000e_disable_pcie_master(struct e1000_hw *hw); ++extern s32 e1000e_get_auto_rd_done(struct e1000_hw *hw); ++extern s32 e1000e_id_led_init(struct e1000_hw *hw); ++extern void e1000e_clear_hw_cntrs_base(struct e1000_hw *hw); ++extern s32 e1000e_setup_fiber_serdes_link(struct e1000_hw *hw); ++extern s32 e1000e_copper_link_setup_m88(struct e1000_hw *hw); ++extern s32 e1000e_copper_link_setup_igp(struct e1000_hw *hw); ++extern s32 e1000e_setup_link(struct e1000_hw *hw); ++extern void e1000_clear_vfta_generic(struct e1000_hw *hw); ++extern void e1000e_init_rx_addrs(struct e1000_hw *hw, u16 rar_count); ++extern void e1000e_update_mc_addr_list_generic(struct e1000_hw *hw, ++ u8 *mc_addr_list, ++ u32 mc_addr_count); ++extern void e1000e_rar_set(struct e1000_hw *hw, u8 *addr, u32 index); ++extern s32 e1000e_set_fc_watermarks(struct e1000_hw *hw); ++extern void e1000e_set_pcie_no_snoop(struct e1000_hw *hw, u32 no_snoop); ++extern s32 e1000e_get_hw_semaphore(struct e1000_hw *hw); ++extern s32 e1000e_valid_led_default(struct e1000_hw *hw, u16 *data); ++extern void e1000e_config_collision_dist(struct e1000_hw *hw); ++extern s32 e1000e_config_fc_after_link_up(struct e1000_hw *hw); ++extern s32 e1000e_force_mac_fc(struct e1000_hw *hw); ++extern s32 e1000e_blink_led_generic(struct e1000_hw *hw); ++extern void e1000_write_vfta_generic(struct e1000_hw *hw, u32 offset, u32 value); ++extern s32 e1000_check_alt_mac_addr_generic(struct e1000_hw *hw); ++extern void e1000e_reset_adaptive(struct e1000_hw *hw); ++extern void e1000e_update_adaptive(struct e1000_hw *hw); ++ ++extern s32 e1000e_setup_copper_link(struct e1000_hw *hw); ++extern s32 e1000e_get_phy_id(struct e1000_hw *hw); ++extern void e1000e_put_hw_semaphore(struct e1000_hw *hw); ++extern s32 e1000e_check_reset_block_generic(struct e1000_hw *hw); ++extern s32 e1000e_phy_force_speed_duplex_igp(struct e1000_hw *hw); ++extern s32 e1000e_get_cable_length_igp_2(struct e1000_hw *hw); ++extern s32 e1000e_get_phy_info_igp(struct e1000_hw *hw); ++extern s32 e1000_set_page_igp(struct e1000_hw *hw, u16 page); ++extern s32 e1000e_read_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 *data); ++extern s32 e1000e_read_phy_reg_igp_locked(struct e1000_hw *hw, u32 offset, ++ u16 *data); ++extern s32 e1000e_phy_hw_reset_generic(struct e1000_hw *hw); ++extern s32 e1000e_set_d3_lplu_state(struct e1000_hw *hw, bool active); ++extern s32 e1000e_write_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 data); ++extern s32 e1000e_write_phy_reg_igp_locked(struct e1000_hw *hw, u32 offset, ++ u16 data); ++extern s32 e1000e_phy_sw_reset(struct e1000_hw *hw); ++extern s32 e1000e_phy_force_speed_duplex_m88(struct e1000_hw *hw); ++extern s32 e1000e_get_cfg_done(struct e1000_hw *hw); ++extern s32 e1000e_get_cable_length_m88(struct e1000_hw *hw); ++extern s32 e1000e_get_phy_info_m88(struct e1000_hw *hw); ++extern s32 e1000e_read_phy_reg_m88(struct e1000_hw *hw, u32 offset, u16 *data); ++extern s32 e1000e_write_phy_reg_m88(struct e1000_hw *hw, u32 offset, u16 data); ++extern s32 e1000e_phy_init_script_igp3(struct e1000_hw *hw); ++extern enum e1000_phy_type e1000e_get_phy_type_from_id(u32 phy_id); ++extern s32 e1000e_determine_phy_address(struct e1000_hw *hw); ++extern s32 e1000e_write_phy_reg_bm(struct e1000_hw *hw, u32 offset, u16 data); ++extern s32 e1000e_read_phy_reg_bm(struct e1000_hw *hw, u32 offset, u16 *data); ++extern s32 e1000_enable_phy_wakeup_reg_access_bm(struct e1000_hw *hw, ++ u16 *phy_reg); ++extern s32 e1000_disable_phy_wakeup_reg_access_bm(struct e1000_hw *hw, ++ u16 *phy_reg); ++extern s32 e1000e_read_phy_reg_bm2(struct e1000_hw *hw, u32 offset, u16 *data); ++extern s32 e1000e_write_phy_reg_bm2(struct e1000_hw *hw, u32 offset, u16 data); ++extern void e1000e_phy_force_speed_duplex_setup(struct e1000_hw *hw, u16 *phy_ctrl); ++extern s32 e1000e_write_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 data); ++extern s32 e1000e_write_kmrn_reg_locked(struct e1000_hw *hw, u32 offset, ++ u16 data); ++extern s32 e1000e_read_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 *data); ++extern s32 e1000e_read_kmrn_reg_locked(struct e1000_hw *hw, u32 offset, ++ u16 *data); ++extern s32 e1000e_phy_has_link_generic(struct e1000_hw *hw, u32 iterations, ++ u32 usec_interval, bool *success); ++extern s32 e1000e_phy_reset_dsp(struct e1000_hw *hw); ++extern void e1000_power_up_phy_copper(struct e1000_hw *hw); ++extern void e1000_power_down_phy_copper(struct e1000_hw *hw); ++extern s32 e1000e_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data); ++extern s32 e1000e_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data); ++extern s32 e1000e_check_downshift(struct e1000_hw *hw); ++extern s32 e1000_read_phy_reg_hv(struct e1000_hw *hw, u32 offset, u16 *data); ++extern s32 e1000_read_phy_reg_hv_locked(struct e1000_hw *hw, u32 offset, ++ u16 *data); ++extern s32 e1000_read_phy_reg_page_hv(struct e1000_hw *hw, u32 offset, ++ u16 *data); ++extern s32 e1000_write_phy_reg_hv(struct e1000_hw *hw, u32 offset, u16 data); ++extern s32 e1000_write_phy_reg_hv_locked(struct e1000_hw *hw, u32 offset, ++ u16 data); ++extern s32 e1000_write_phy_reg_page_hv(struct e1000_hw *hw, u32 offset, ++ u16 data); ++extern s32 e1000_link_stall_workaround_hv(struct e1000_hw *hw); ++extern s32 e1000_copper_link_setup_82577(struct e1000_hw *hw); ++extern s32 e1000_check_polarity_82577(struct e1000_hw *hw); ++extern s32 e1000_get_phy_info_82577(struct e1000_hw *hw); ++extern s32 e1000_phy_force_speed_duplex_82577(struct e1000_hw *hw); ++extern s32 e1000_get_cable_length_82577(struct e1000_hw *hw); ++ ++extern s32 e1000_check_polarity_m88(struct e1000_hw *hw); ++extern s32 e1000_get_phy_info_ife(struct e1000_hw *hw); ++extern s32 e1000_check_polarity_ife(struct e1000_hw *hw); ++extern s32 e1000_phy_force_speed_duplex_ife(struct e1000_hw *hw); ++extern s32 e1000_check_polarity_igp(struct e1000_hw *hw); ++extern bool e1000_check_phy_82574(struct e1000_hw *hw); ++ ++static inline s32 e1000_phy_hw_reset(struct e1000_hw *hw) ++{ ++ return hw->phy.ops.reset(hw); ++} ++ ++static inline s32 e1000_check_reset_block(struct e1000_hw *hw) ++{ ++ return hw->phy.ops.check_reset_block(hw); ++} ++ ++static inline s32 e1e_rphy(struct e1000_hw *hw, u32 offset, u16 *data) ++{ ++ return hw->phy.ops.read_reg(hw, offset, data); ++} ++ ++static inline s32 e1e_rphy_locked(struct e1000_hw *hw, u32 offset, u16 *data) ++{ ++ return hw->phy.ops.read_reg_locked(hw, offset, data); ++} ++ ++static inline s32 e1e_wphy(struct e1000_hw *hw, u32 offset, u16 data) ++{ ++ return hw->phy.ops.write_reg(hw, offset, data); ++} ++ ++static inline s32 e1e_wphy_locked(struct e1000_hw *hw, u32 offset, u16 data) ++{ ++ return hw->phy.ops.write_reg_locked(hw, offset, data); ++} ++ ++static inline s32 e1000_get_cable_length(struct e1000_hw *hw) ++{ ++ return hw->phy.ops.get_cable_length(hw); ++} ++ ++extern s32 e1000e_acquire_nvm(struct e1000_hw *hw); ++extern s32 e1000e_write_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, u16 *data); ++extern s32 e1000e_update_nvm_checksum_generic(struct e1000_hw *hw); ++extern s32 e1000e_poll_eerd_eewr_done(struct e1000_hw *hw, int ee_reg); ++extern s32 e1000e_read_nvm_eerd(struct e1000_hw *hw, u16 offset, u16 words, u16 *data); ++extern s32 e1000e_validate_nvm_checksum_generic(struct e1000_hw *hw); ++extern void e1000e_release_nvm(struct e1000_hw *hw); ++extern void e1000e_reload_nvm(struct e1000_hw *hw); ++extern s32 e1000_read_mac_addr_generic(struct e1000_hw *hw); ++ ++static inline s32 e1000e_read_mac_addr(struct e1000_hw *hw) ++{ ++ if (hw->mac.ops.read_mac_addr) ++ return hw->mac.ops.read_mac_addr(hw); ++ ++ return e1000_read_mac_addr_generic(hw); ++} ++ ++static inline s32 e1000_validate_nvm_checksum(struct e1000_hw *hw) ++{ ++ return hw->nvm.ops.validate(hw); ++} ++ ++static inline s32 e1000e_update_nvm_checksum(struct e1000_hw *hw) ++{ ++ return hw->nvm.ops.update(hw); ++} ++ ++static inline s32 e1000_read_nvm(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) ++{ ++ return hw->nvm.ops.read(hw, offset, words, data); ++} ++ ++static inline s32 e1000_write_nvm(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) ++{ ++ return hw->nvm.ops.write(hw, offset, words, data); ++} ++ ++static inline s32 e1000_get_phy_info(struct e1000_hw *hw) ++{ ++ return hw->phy.ops.get_info(hw); ++} ++ ++static inline s32 e1000e_check_mng_mode(struct e1000_hw *hw) ++{ ++ return hw->mac.ops.check_mng_mode(hw); ++} ++ ++extern bool e1000e_check_mng_mode_generic(struct e1000_hw *hw); ++extern bool e1000e_enable_tx_pkt_filtering(struct e1000_hw *hw); ++extern s32 e1000e_mng_write_dhcp_info(struct e1000_hw *hw, u8 *buffer, u16 length); ++ ++static inline u32 __er32(struct e1000_hw *hw, unsigned long reg) ++{ ++ return readl(hw->hw_addr + reg); ++} ++ ++static inline void __ew32(struct e1000_hw *hw, unsigned long reg, u32 val) ++{ ++ writel(val, hw->hw_addr + reg); ++} ++ ++#endif /* _E1000_H_ */ +--- linux/drivers/xenomai/net/drivers/e1000e/82571.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/e1000e/82571.c 2021-04-07 16:01:27.219634178 +0800 +@@ -0,0 +1,2112 @@ ++/******************************************************************************* ++ ++ Intel PRO/1000 Linux driver ++ Copyright(c) 1999 - 2011 Intel Corporation. ++ ++ This program is free software; you can redistribute it and/or modify it ++ under the terms and conditions of the GNU General Public License, ++ version 2, as published by the Free Software Foundation. ++ ++ This program is distributed in the hope it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ You should have received a copy of the GNU General Public License along with ++ this program; if not, write to the Free Software Foundation, Inc., ++ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. ++ ++ The full GNU General Public License is included in this distribution in ++ the file called "COPYING". ++ ++ Contact Information: ++ Linux NICS ++ e1000-devel Mailing List ++ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 ++ ++*******************************************************************************/ ++ ++/* ++ * 82571EB Gigabit Ethernet Controller ++ * 82571EB Gigabit Ethernet Controller (Copper) ++ * 82571EB Gigabit Ethernet Controller (Fiber) ++ * 82571EB Dual Port Gigabit Mezzanine Adapter ++ * 82571EB Quad Port Gigabit Mezzanine Adapter ++ * 82571PT Gigabit PT Quad Port Server ExpressModule ++ * 82572EI Gigabit Ethernet Controller (Copper) ++ * 82572EI Gigabit Ethernet Controller (Fiber) ++ * 82572EI Gigabit Ethernet Controller ++ * 82573V Gigabit Ethernet Controller (Copper) ++ * 82573E Gigabit Ethernet Controller (Copper) ++ * 82573L Gigabit Ethernet Controller ++ * 82574L Gigabit Network Connection ++ * 82583V Gigabit Network Connection ++ */ ++ ++#include "e1000.h" ++ ++#define ID_LED_RESERVED_F746 0xF746 ++#define ID_LED_DEFAULT_82573 ((ID_LED_DEF1_DEF2 << 12) | \ ++ (ID_LED_OFF1_ON2 << 8) | \ ++ (ID_LED_DEF1_DEF2 << 4) | \ ++ (ID_LED_DEF1_DEF2)) ++ ++#define E1000_GCR_L1_ACT_WITHOUT_L0S_RX 0x08000000 ++#define AN_RETRY_COUNT 5 /* Autoneg Retry Count value */ ++#define E1000_BASE1000T_STATUS 10 ++#define E1000_IDLE_ERROR_COUNT_MASK 0xFF ++#define E1000_RECEIVE_ERROR_COUNTER 21 ++#define E1000_RECEIVE_ERROR_MAX 0xFFFF ++ ++#define E1000_NVM_INIT_CTRL2_MNGM 0x6000 /* Manageability Operation Mode mask */ ++ ++static s32 e1000_get_phy_id_82571(struct e1000_hw *hw); ++static s32 e1000_setup_copper_link_82571(struct e1000_hw *hw); ++static s32 e1000_setup_fiber_serdes_link_82571(struct e1000_hw *hw); ++static s32 e1000_check_for_serdes_link_82571(struct e1000_hw *hw); ++static s32 e1000_write_nvm_eewr_82571(struct e1000_hw *hw, u16 offset, ++ u16 words, u16 *data); ++static s32 e1000_fix_nvm_checksum_82571(struct e1000_hw *hw); ++static void e1000_initialize_hw_bits_82571(struct e1000_hw *hw); ++static s32 e1000_setup_link_82571(struct e1000_hw *hw); ++static void e1000_clear_hw_cntrs_82571(struct e1000_hw *hw); ++static void e1000_clear_vfta_82571(struct e1000_hw *hw); ++static bool e1000_check_mng_mode_82574(struct e1000_hw *hw); ++static s32 e1000_led_on_82574(struct e1000_hw *hw); ++static void e1000_put_hw_semaphore_82571(struct e1000_hw *hw); ++static void e1000_power_down_phy_copper_82571(struct e1000_hw *hw); ++static void e1000_put_hw_semaphore_82573(struct e1000_hw *hw); ++static s32 e1000_get_hw_semaphore_82574(struct e1000_hw *hw); ++static void e1000_put_hw_semaphore_82574(struct e1000_hw *hw); ++static s32 e1000_set_d0_lplu_state_82574(struct e1000_hw *hw, bool active); ++static s32 e1000_set_d3_lplu_state_82574(struct e1000_hw *hw, bool active); ++ ++/** ++ * e1000_init_phy_params_82571 - Init PHY func ptrs. ++ * @hw: pointer to the HW structure ++ **/ ++static s32 e1000_init_phy_params_82571(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ ++ if (hw->phy.media_type != e1000_media_type_copper) { ++ phy->type = e1000_phy_none; ++ return 0; ++ } ++ ++ phy->addr = 1; ++ phy->autoneg_mask = AUTONEG_ADVERTISE_SPEED_DEFAULT; ++ phy->reset_delay_us = 100; ++ ++ phy->ops.power_up = e1000_power_up_phy_copper; ++ phy->ops.power_down = e1000_power_down_phy_copper_82571; ++ ++ switch (hw->mac.type) { ++ case e1000_82571: ++ case e1000_82572: ++ phy->type = e1000_phy_igp_2; ++ break; ++ case e1000_82573: ++ phy->type = e1000_phy_m88; ++ break; ++ case e1000_82574: ++ case e1000_82583: ++ phy->type = e1000_phy_bm; ++ phy->ops.acquire = e1000_get_hw_semaphore_82574; ++ phy->ops.release = e1000_put_hw_semaphore_82574; ++ phy->ops.set_d0_lplu_state = e1000_set_d0_lplu_state_82574; ++ phy->ops.set_d3_lplu_state = e1000_set_d3_lplu_state_82574; ++ break; ++ default: ++ return -E1000_ERR_PHY; ++ break; ++ } ++ ++ /* This can only be done after all function pointers are setup. */ ++ ret_val = e1000_get_phy_id_82571(hw); ++ if (ret_val) { ++ e_dbg("Error getting PHY ID\n"); ++ return ret_val; ++ } ++ ++ /* Verify phy id */ ++ switch (hw->mac.type) { ++ case e1000_82571: ++ case e1000_82572: ++ if (phy->id != IGP01E1000_I_PHY_ID) ++ ret_val = -E1000_ERR_PHY; ++ break; ++ case e1000_82573: ++ if (phy->id != M88E1111_I_PHY_ID) ++ ret_val = -E1000_ERR_PHY; ++ break; ++ case e1000_82574: ++ case e1000_82583: ++ if (phy->id != BME1000_E_PHY_ID_R2) ++ ret_val = -E1000_ERR_PHY; ++ break; ++ default: ++ ret_val = -E1000_ERR_PHY; ++ break; ++ } ++ ++ if (ret_val) ++ e_dbg("PHY ID unknown: type = 0x%08x\n", phy->id); ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_init_nvm_params_82571 - Init NVM func ptrs. ++ * @hw: pointer to the HW structure ++ **/ ++static s32 e1000_init_nvm_params_82571(struct e1000_hw *hw) ++{ ++ struct e1000_nvm_info *nvm = &hw->nvm; ++ u32 eecd = er32(EECD); ++ u16 size; ++ ++ nvm->opcode_bits = 8; ++ nvm->delay_usec = 1; ++ switch (nvm->override) { ++ case e1000_nvm_override_spi_large: ++ nvm->page_size = 32; ++ nvm->address_bits = 16; ++ break; ++ case e1000_nvm_override_spi_small: ++ nvm->page_size = 8; ++ nvm->address_bits = 8; ++ break; ++ default: ++ nvm->page_size = eecd & E1000_EECD_ADDR_BITS ? 32 : 8; ++ nvm->address_bits = eecd & E1000_EECD_ADDR_BITS ? 16 : 8; ++ break; ++ } ++ ++ switch (hw->mac.type) { ++ case e1000_82573: ++ case e1000_82574: ++ case e1000_82583: ++ if (((eecd >> 15) & 0x3) == 0x3) { ++ nvm->type = e1000_nvm_flash_hw; ++ nvm->word_size = 2048; ++ /* ++ * Autonomous Flash update bit must be cleared due ++ * to Flash update issue. ++ */ ++ eecd &= ~E1000_EECD_AUPDEN; ++ ew32(EECD, eecd); ++ break; ++ } ++ /* Fall Through */ ++ default: ++ nvm->type = e1000_nvm_eeprom_spi; ++ size = (u16)((eecd & E1000_EECD_SIZE_EX_MASK) >> ++ E1000_EECD_SIZE_EX_SHIFT); ++ /* ++ * Added to a constant, "size" becomes the left-shift value ++ * for setting word_size. ++ */ ++ size += NVM_WORD_SIZE_BASE_SHIFT; ++ ++ /* EEPROM access above 16k is unsupported */ ++ if (size > 14) ++ size = 14; ++ nvm->word_size = 1 << size; ++ break; ++ } ++ ++ /* Function Pointers */ ++ switch (hw->mac.type) { ++ case e1000_82574: ++ case e1000_82583: ++ nvm->ops.acquire = e1000_get_hw_semaphore_82574; ++ nvm->ops.release = e1000_put_hw_semaphore_82574; ++ break; ++ default: ++ break; ++ } ++ ++ return 0; ++} ++ ++/** ++ * e1000_init_mac_params_82571 - Init MAC func ptrs. ++ * @hw: pointer to the HW structure ++ **/ ++static s32 e1000_init_mac_params_82571(struct e1000_adapter *adapter) ++{ ++ struct e1000_hw *hw = &adapter->hw; ++ struct e1000_mac_info *mac = &hw->mac; ++ struct e1000_mac_operations *func = &mac->ops; ++ u32 swsm = 0; ++ u32 swsm2 = 0; ++ bool force_clear_smbi = false; ++ ++ /* Set media type */ ++ switch (adapter->pdev->device) { ++ case E1000_DEV_ID_82571EB_FIBER: ++ case E1000_DEV_ID_82572EI_FIBER: ++ case E1000_DEV_ID_82571EB_QUAD_FIBER: ++ hw->phy.media_type = e1000_media_type_fiber; ++ break; ++ case E1000_DEV_ID_82571EB_SERDES: ++ case E1000_DEV_ID_82572EI_SERDES: ++ case E1000_DEV_ID_82571EB_SERDES_DUAL: ++ case E1000_DEV_ID_82571EB_SERDES_QUAD: ++ hw->phy.media_type = e1000_media_type_internal_serdes; ++ break; ++ default: ++ hw->phy.media_type = e1000_media_type_copper; ++ break; ++ } ++ ++ /* Set mta register count */ ++ mac->mta_reg_count = 128; ++ /* Set rar entry count */ ++ mac->rar_entry_count = E1000_RAR_ENTRIES; ++ /* Adaptive IFS supported */ ++ mac->adaptive_ifs = true; ++ ++ /* check for link */ ++ switch (hw->phy.media_type) { ++ case e1000_media_type_copper: ++ func->setup_physical_interface = e1000_setup_copper_link_82571; ++ func->check_for_link = e1000e_check_for_copper_link; ++ func->get_link_up_info = e1000e_get_speed_and_duplex_copper; ++ break; ++ case e1000_media_type_fiber: ++ func->setup_physical_interface = ++ e1000_setup_fiber_serdes_link_82571; ++ func->check_for_link = e1000e_check_for_fiber_link; ++ func->get_link_up_info = ++ e1000e_get_speed_and_duplex_fiber_serdes; ++ break; ++ case e1000_media_type_internal_serdes: ++ func->setup_physical_interface = ++ e1000_setup_fiber_serdes_link_82571; ++ func->check_for_link = e1000_check_for_serdes_link_82571; ++ func->get_link_up_info = ++ e1000e_get_speed_and_duplex_fiber_serdes; ++ break; ++ default: ++ return -E1000_ERR_CONFIG; ++ break; ++ } ++ ++ switch (hw->mac.type) { ++ case e1000_82573: ++ func->set_lan_id = e1000_set_lan_id_single_port; ++ func->check_mng_mode = e1000e_check_mng_mode_generic; ++ func->led_on = e1000e_led_on_generic; ++ func->blink_led = e1000e_blink_led_generic; ++ ++ /* FWSM register */ ++ mac->has_fwsm = true; ++ /* ++ * ARC supported; valid only if manageability features are ++ * enabled. ++ */ ++ mac->arc_subsystem_valid = ++ (er32(FWSM) & E1000_FWSM_MODE_MASK) ++ ? true : false; ++ break; ++ case e1000_82574: ++ case e1000_82583: ++ func->set_lan_id = e1000_set_lan_id_single_port; ++ func->check_mng_mode = e1000_check_mng_mode_82574; ++ func->led_on = e1000_led_on_82574; ++ break; ++ default: ++ func->check_mng_mode = e1000e_check_mng_mode_generic; ++ func->led_on = e1000e_led_on_generic; ++ func->blink_led = e1000e_blink_led_generic; ++ ++ /* FWSM register */ ++ mac->has_fwsm = true; ++ break; ++ } ++ ++ /* ++ * Ensure that the inter-port SWSM.SMBI lock bit is clear before ++ * first NVM or PHY access. This should be done for single-port ++ * devices, and for one port only on dual-port devices so that ++ * for those devices we can still use the SMBI lock to synchronize ++ * inter-port accesses to the PHY & NVM. ++ */ ++ switch (hw->mac.type) { ++ case e1000_82571: ++ case e1000_82572: ++ swsm2 = er32(SWSM2); ++ ++ if (!(swsm2 & E1000_SWSM2_LOCK)) { ++ /* Only do this for the first interface on this card */ ++ ew32(SWSM2, ++ swsm2 | E1000_SWSM2_LOCK); ++ force_clear_smbi = true; ++ } else ++ force_clear_smbi = false; ++ break; ++ default: ++ force_clear_smbi = true; ++ break; ++ } ++ ++ if (force_clear_smbi) { ++ /* Make sure SWSM.SMBI is clear */ ++ swsm = er32(SWSM); ++ if (swsm & E1000_SWSM_SMBI) { ++ /* This bit should not be set on a first interface, and ++ * indicates that the bootagent or EFI code has ++ * improperly left this bit enabled ++ */ ++ e_dbg("Please update your 82571 Bootagent\n"); ++ } ++ ew32(SWSM, swsm & ~E1000_SWSM_SMBI); ++ } ++ ++ /* ++ * Initialize device specific counter of SMBI acquisition ++ * timeouts. ++ */ ++ hw->dev_spec.e82571.smb_counter = 0; ++ ++ return 0; ++} ++ ++static s32 e1000_get_variants_82571(struct e1000_adapter *adapter) ++{ ++ struct e1000_hw *hw = &adapter->hw; ++ static int global_quad_port_a; /* global port a indication */ ++ struct pci_dev *pdev = adapter->pdev; ++ int is_port_b = er32(STATUS) & E1000_STATUS_FUNC_1; ++ s32 rc; ++ ++ rc = e1000_init_mac_params_82571(adapter); ++ if (rc) ++ return rc; ++ ++ rc = e1000_init_nvm_params_82571(hw); ++ if (rc) ++ return rc; ++ ++ rc = e1000_init_phy_params_82571(hw); ++ if (rc) ++ return rc; ++ ++ /* tag quad port adapters first, it's used below */ ++ switch (pdev->device) { ++ case E1000_DEV_ID_82571EB_QUAD_COPPER: ++ case E1000_DEV_ID_82571EB_QUAD_FIBER: ++ case E1000_DEV_ID_82571EB_QUAD_COPPER_LP: ++ case E1000_DEV_ID_82571PT_QUAD_COPPER: ++ adapter->flags |= FLAG_IS_QUAD_PORT; ++ /* mark the first port */ ++ if (global_quad_port_a == 0) ++ adapter->flags |= FLAG_IS_QUAD_PORT_A; ++ /* Reset for multiple quad port adapters */ ++ global_quad_port_a++; ++ if (global_quad_port_a == 4) ++ global_quad_port_a = 0; ++ break; ++ default: ++ break; ++ } ++ ++ switch (adapter->hw.mac.type) { ++ case e1000_82571: ++ /* these dual ports don't have WoL on port B at all */ ++ if (((pdev->device == E1000_DEV_ID_82571EB_FIBER) || ++ (pdev->device == E1000_DEV_ID_82571EB_SERDES) || ++ (pdev->device == E1000_DEV_ID_82571EB_COPPER)) && ++ (is_port_b)) ++ adapter->flags &= ~FLAG_HAS_WOL; ++ /* quad ports only support WoL on port A */ ++ if (adapter->flags & FLAG_IS_QUAD_PORT && ++ (!(adapter->flags & FLAG_IS_QUAD_PORT_A))) ++ adapter->flags &= ~FLAG_HAS_WOL; ++ /* Does not support WoL on any port */ ++ if (pdev->device == E1000_DEV_ID_82571EB_SERDES_QUAD) ++ adapter->flags &= ~FLAG_HAS_WOL; ++ break; ++ case e1000_82573: ++ if (pdev->device == E1000_DEV_ID_82573L) { ++ adapter->flags |= FLAG_HAS_JUMBO_FRAMES; ++ adapter->max_hw_frame_size = DEFAULT_JUMBO; ++ } ++ break; ++ default: ++ break; ++ } ++ ++ return 0; ++} ++ ++/** ++ * e1000_get_phy_id_82571 - Retrieve the PHY ID and revision ++ * @hw: pointer to the HW structure ++ * ++ * Reads the PHY registers and stores the PHY ID and possibly the PHY ++ * revision in the hardware structure. ++ **/ ++static s32 e1000_get_phy_id_82571(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u16 phy_id = 0; ++ ++ switch (hw->mac.type) { ++ case e1000_82571: ++ case e1000_82572: ++ /* ++ * The 82571 firmware may still be configuring the PHY. ++ * In this case, we cannot access the PHY until the ++ * configuration is done. So we explicitly set the ++ * PHY ID. ++ */ ++ phy->id = IGP01E1000_I_PHY_ID; ++ break; ++ case e1000_82573: ++ return e1000e_get_phy_id(hw); ++ break; ++ case e1000_82574: ++ case e1000_82583: ++ ret_val = e1e_rphy(hw, PHY_ID1, &phy_id); ++ if (ret_val) ++ return ret_val; ++ ++ phy->id = (u32)(phy_id << 16); ++ udelay(20); ++ ret_val = e1e_rphy(hw, PHY_ID2, &phy_id); ++ if (ret_val) ++ return ret_val; ++ ++ phy->id |= (u32)(phy_id); ++ phy->revision = (u32)(phy_id & ~PHY_REVISION_MASK); ++ break; ++ default: ++ return -E1000_ERR_PHY; ++ break; ++ } ++ ++ return 0; ++} ++ ++/** ++ * e1000_get_hw_semaphore_82571 - Acquire hardware semaphore ++ * @hw: pointer to the HW structure ++ * ++ * Acquire the HW semaphore to access the PHY or NVM ++ **/ ++static s32 e1000_get_hw_semaphore_82571(struct e1000_hw *hw) ++{ ++ u32 swsm; ++ s32 sw_timeout = hw->nvm.word_size + 1; ++ s32 fw_timeout = hw->nvm.word_size + 1; ++ s32 i = 0; ++ ++ /* ++ * If we have timedout 3 times on trying to acquire ++ * the inter-port SMBI semaphore, there is old code ++ * operating on the other port, and it is not ++ * releasing SMBI. Modify the number of times that ++ * we try for the semaphore to interwork with this ++ * older code. ++ */ ++ if (hw->dev_spec.e82571.smb_counter > 2) ++ sw_timeout = 1; ++ ++ /* Get the SW semaphore */ ++ while (i < sw_timeout) { ++ swsm = er32(SWSM); ++ if (!(swsm & E1000_SWSM_SMBI)) ++ break; ++ ++ udelay(50); ++ i++; ++ } ++ ++ if (i == sw_timeout) { ++ e_dbg("Driver can't access device - SMBI bit is set.\n"); ++ hw->dev_spec.e82571.smb_counter++; ++ } ++ /* Get the FW semaphore. */ ++ for (i = 0; i < fw_timeout; i++) { ++ swsm = er32(SWSM); ++ ew32(SWSM, swsm | E1000_SWSM_SWESMBI); ++ ++ /* Semaphore acquired if bit latched */ ++ if (er32(SWSM) & E1000_SWSM_SWESMBI) ++ break; ++ ++ udelay(50); ++ } ++ ++ if (i == fw_timeout) { ++ /* Release semaphores */ ++ e1000_put_hw_semaphore_82571(hw); ++ e_dbg("Driver can't access the NVM\n"); ++ return -E1000_ERR_NVM; ++ } ++ ++ return 0; ++} ++ ++/** ++ * e1000_put_hw_semaphore_82571 - Release hardware semaphore ++ * @hw: pointer to the HW structure ++ * ++ * Release hardware semaphore used to access the PHY or NVM ++ **/ ++static void e1000_put_hw_semaphore_82571(struct e1000_hw *hw) ++{ ++ u32 swsm; ++ ++ swsm = er32(SWSM); ++ swsm &= ~(E1000_SWSM_SMBI | E1000_SWSM_SWESMBI); ++ ew32(SWSM, swsm); ++} ++/** ++ * e1000_get_hw_semaphore_82573 - Acquire hardware semaphore ++ * @hw: pointer to the HW structure ++ * ++ * Acquire the HW semaphore during reset. ++ * ++ **/ ++static s32 e1000_get_hw_semaphore_82573(struct e1000_hw *hw) ++{ ++ u32 extcnf_ctrl; ++ s32 ret_val = 0; ++ s32 i = 0; ++ ++ extcnf_ctrl = er32(EXTCNF_CTRL); ++ extcnf_ctrl |= E1000_EXTCNF_CTRL_MDIO_SW_OWNERSHIP; ++ do { ++ ew32(EXTCNF_CTRL, extcnf_ctrl); ++ extcnf_ctrl = er32(EXTCNF_CTRL); ++ ++ if (extcnf_ctrl & E1000_EXTCNF_CTRL_MDIO_SW_OWNERSHIP) ++ break; ++ ++ extcnf_ctrl |= E1000_EXTCNF_CTRL_MDIO_SW_OWNERSHIP; ++ ++ usleep_range(2000, 4000); ++ i++; ++ } while (i < MDIO_OWNERSHIP_TIMEOUT); ++ ++ if (i == MDIO_OWNERSHIP_TIMEOUT) { ++ /* Release semaphores */ ++ e1000_put_hw_semaphore_82573(hw); ++ e_dbg("Driver can't access the PHY\n"); ++ ret_val = -E1000_ERR_PHY; ++ goto out; ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_put_hw_semaphore_82573 - Release hardware semaphore ++ * @hw: pointer to the HW structure ++ * ++ * Release hardware semaphore used during reset. ++ * ++ **/ ++static void e1000_put_hw_semaphore_82573(struct e1000_hw *hw) ++{ ++ u32 extcnf_ctrl; ++ ++ extcnf_ctrl = er32(EXTCNF_CTRL); ++ extcnf_ctrl &= ~E1000_EXTCNF_CTRL_MDIO_SW_OWNERSHIP; ++ ew32(EXTCNF_CTRL, extcnf_ctrl); ++} ++ ++static DEFINE_MUTEX(swflag_mutex); ++ ++/** ++ * e1000_get_hw_semaphore_82574 - Acquire hardware semaphore ++ * @hw: pointer to the HW structure ++ * ++ * Acquire the HW semaphore to access the PHY or NVM. ++ * ++ **/ ++static s32 e1000_get_hw_semaphore_82574(struct e1000_hw *hw) ++{ ++ s32 ret_val; ++ ++ mutex_lock(&swflag_mutex); ++ ret_val = e1000_get_hw_semaphore_82573(hw); ++ if (ret_val) ++ mutex_unlock(&swflag_mutex); ++ return ret_val; ++} ++ ++/** ++ * e1000_put_hw_semaphore_82574 - Release hardware semaphore ++ * @hw: pointer to the HW structure ++ * ++ * Release hardware semaphore used to access the PHY or NVM ++ * ++ **/ ++static void e1000_put_hw_semaphore_82574(struct e1000_hw *hw) ++{ ++ e1000_put_hw_semaphore_82573(hw); ++ mutex_unlock(&swflag_mutex); ++} ++ ++/** ++ * e1000_set_d0_lplu_state_82574 - Set Low Power Linkup D0 state ++ * @hw: pointer to the HW structure ++ * @active: true to enable LPLU, false to disable ++ * ++ * Sets the LPLU D0 state according to the active flag. ++ * LPLU will not be activated unless the ++ * device autonegotiation advertisement meets standards of ++ * either 10 or 10/100 or 10/100/1000 at all duplexes. ++ * This is a function pointer entry point only called by ++ * PHY setup routines. ++ **/ ++static s32 e1000_set_d0_lplu_state_82574(struct e1000_hw *hw, bool active) ++{ ++ u16 data = er32(POEMB); ++ ++ if (active) ++ data |= E1000_PHY_CTRL_D0A_LPLU; ++ else ++ data &= ~E1000_PHY_CTRL_D0A_LPLU; ++ ++ ew32(POEMB, data); ++ return 0; ++} ++ ++/** ++ * e1000_set_d3_lplu_state_82574 - Sets low power link up state for D3 ++ * @hw: pointer to the HW structure ++ * @active: boolean used to enable/disable lplu ++ * ++ * The low power link up (lplu) state is set to the power management level D3 ++ * when active is true, else clear lplu for D3. LPLU ++ * is used during Dx states where the power conservation is most important. ++ * During driver activity, SmartSpeed should be enabled so performance is ++ * maintained. ++ **/ ++static s32 e1000_set_d3_lplu_state_82574(struct e1000_hw *hw, bool active) ++{ ++ u16 data = er32(POEMB); ++ ++ if (!active) { ++ data &= ~E1000_PHY_CTRL_NOND0A_LPLU; ++ } else if ((hw->phy.autoneg_advertised == E1000_ALL_SPEED_DUPLEX) || ++ (hw->phy.autoneg_advertised == E1000_ALL_NOT_GIG) || ++ (hw->phy.autoneg_advertised == E1000_ALL_10_SPEED)) { ++ data |= E1000_PHY_CTRL_NOND0A_LPLU; ++ } ++ ++ ew32(POEMB, data); ++ return 0; ++} ++ ++/** ++ * e1000_acquire_nvm_82571 - Request for access to the EEPROM ++ * @hw: pointer to the HW structure ++ * ++ * To gain access to the EEPROM, first we must obtain a hardware semaphore. ++ * Then for non-82573 hardware, set the EEPROM access request bit and wait ++ * for EEPROM access grant bit. If the access grant bit is not set, release ++ * hardware semaphore. ++ **/ ++static s32 e1000_acquire_nvm_82571(struct e1000_hw *hw) ++{ ++ s32 ret_val; ++ ++ ret_val = e1000_get_hw_semaphore_82571(hw); ++ if (ret_val) ++ return ret_val; ++ ++ switch (hw->mac.type) { ++ case e1000_82573: ++ break; ++ default: ++ ret_val = e1000e_acquire_nvm(hw); ++ break; ++ } ++ ++ if (ret_val) ++ e1000_put_hw_semaphore_82571(hw); ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_release_nvm_82571 - Release exclusive access to EEPROM ++ * @hw: pointer to the HW structure ++ * ++ * Stop any current commands to the EEPROM and clear the EEPROM request bit. ++ **/ ++static void e1000_release_nvm_82571(struct e1000_hw *hw) ++{ ++ e1000e_release_nvm(hw); ++ e1000_put_hw_semaphore_82571(hw); ++} ++ ++/** ++ * e1000_write_nvm_82571 - Write to EEPROM using appropriate interface ++ * @hw: pointer to the HW structure ++ * @offset: offset within the EEPROM to be written to ++ * @words: number of words to write ++ * @data: 16 bit word(s) to be written to the EEPROM ++ * ++ * For non-82573 silicon, write data to EEPROM at offset using SPI interface. ++ * ++ * If e1000e_update_nvm_checksum is not called after this function, the ++ * EEPROM will most likely contain an invalid checksum. ++ **/ ++static s32 e1000_write_nvm_82571(struct e1000_hw *hw, u16 offset, u16 words, ++ u16 *data) ++{ ++ s32 ret_val; ++ ++ switch (hw->mac.type) { ++ case e1000_82573: ++ case e1000_82574: ++ case e1000_82583: ++ ret_val = e1000_write_nvm_eewr_82571(hw, offset, words, data); ++ break; ++ case e1000_82571: ++ case e1000_82572: ++ ret_val = e1000e_write_nvm_spi(hw, offset, words, data); ++ break; ++ default: ++ ret_val = -E1000_ERR_NVM; ++ break; ++ } ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_update_nvm_checksum_82571 - Update EEPROM checksum ++ * @hw: pointer to the HW structure ++ * ++ * Updates the EEPROM checksum by reading/adding each word of the EEPROM ++ * up to the checksum. Then calculates the EEPROM checksum and writes the ++ * value to the EEPROM. ++ **/ ++static s32 e1000_update_nvm_checksum_82571(struct e1000_hw *hw) ++{ ++ u32 eecd; ++ s32 ret_val; ++ u16 i; ++ ++ ret_val = e1000e_update_nvm_checksum_generic(hw); ++ if (ret_val) ++ return ret_val; ++ ++ /* ++ * If our nvm is an EEPROM, then we're done ++ * otherwise, commit the checksum to the flash NVM. ++ */ ++ if (hw->nvm.type != e1000_nvm_flash_hw) ++ return ret_val; ++ ++ /* Check for pending operations. */ ++ for (i = 0; i < E1000_FLASH_UPDATES; i++) { ++ usleep_range(1000, 2000); ++ if ((er32(EECD) & E1000_EECD_FLUPD) == 0) ++ break; ++ } ++ ++ if (i == E1000_FLASH_UPDATES) ++ return -E1000_ERR_NVM; ++ ++ /* Reset the firmware if using STM opcode. */ ++ if ((er32(FLOP) & 0xFF00) == E1000_STM_OPCODE) { ++ /* ++ * The enabling of and the actual reset must be done ++ * in two write cycles. ++ */ ++ ew32(HICR, E1000_HICR_FW_RESET_ENABLE); ++ e1e_flush(); ++ ew32(HICR, E1000_HICR_FW_RESET); ++ } ++ ++ /* Commit the write to flash */ ++ eecd = er32(EECD) | E1000_EECD_FLUPD; ++ ew32(EECD, eecd); ++ ++ for (i = 0; i < E1000_FLASH_UPDATES; i++) { ++ usleep_range(1000, 2000); ++ if ((er32(EECD) & E1000_EECD_FLUPD) == 0) ++ break; ++ } ++ ++ if (i == E1000_FLASH_UPDATES) ++ return -E1000_ERR_NVM; ++ ++ return 0; ++} ++ ++/** ++ * e1000_validate_nvm_checksum_82571 - Validate EEPROM checksum ++ * @hw: pointer to the HW structure ++ * ++ * Calculates the EEPROM checksum by reading/adding each word of the EEPROM ++ * and then verifies that the sum of the EEPROM is equal to 0xBABA. ++ **/ ++static s32 e1000_validate_nvm_checksum_82571(struct e1000_hw *hw) ++{ ++ if (hw->nvm.type == e1000_nvm_flash_hw) ++ e1000_fix_nvm_checksum_82571(hw); ++ ++ return e1000e_validate_nvm_checksum_generic(hw); ++} ++ ++/** ++ * e1000_write_nvm_eewr_82571 - Write to EEPROM for 82573 silicon ++ * @hw: pointer to the HW structure ++ * @offset: offset within the EEPROM to be written to ++ * @words: number of words to write ++ * @data: 16 bit word(s) to be written to the EEPROM ++ * ++ * After checking for invalid values, poll the EEPROM to ensure the previous ++ * command has completed before trying to write the next word. After write ++ * poll for completion. ++ * ++ * If e1000e_update_nvm_checksum is not called after this function, the ++ * EEPROM will most likely contain an invalid checksum. ++ **/ ++static s32 e1000_write_nvm_eewr_82571(struct e1000_hw *hw, u16 offset, ++ u16 words, u16 *data) ++{ ++ struct e1000_nvm_info *nvm = &hw->nvm; ++ u32 i, eewr = 0; ++ s32 ret_val = 0; ++ ++ /* ++ * A check for invalid values: offset too large, too many words, ++ * and not enough words. ++ */ ++ if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) || ++ (words == 0)) { ++ e_dbg("nvm parameter(s) out of bounds\n"); ++ return -E1000_ERR_NVM; ++ } ++ ++ for (i = 0; i < words; i++) { ++ eewr = (data[i] << E1000_NVM_RW_REG_DATA) | ++ ((offset+i) << E1000_NVM_RW_ADDR_SHIFT) | ++ E1000_NVM_RW_REG_START; ++ ++ ret_val = e1000e_poll_eerd_eewr_done(hw, E1000_NVM_POLL_WRITE); ++ if (ret_val) ++ break; ++ ++ ew32(EEWR, eewr); ++ ++ ret_val = e1000e_poll_eerd_eewr_done(hw, E1000_NVM_POLL_WRITE); ++ if (ret_val) ++ break; ++ } ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_get_cfg_done_82571 - Poll for configuration done ++ * @hw: pointer to the HW structure ++ * ++ * Reads the management control register for the config done bit to be set. ++ **/ ++static s32 e1000_get_cfg_done_82571(struct e1000_hw *hw) ++{ ++ s32 timeout = PHY_CFG_TIMEOUT; ++ ++ while (timeout) { ++ if (er32(EEMNGCTL) & ++ E1000_NVM_CFG_DONE_PORT_0) ++ break; ++ usleep_range(1000, 2000); ++ timeout--; ++ } ++ if (!timeout) { ++ e_dbg("MNG configuration cycle has not completed.\n"); ++ return -E1000_ERR_RESET; ++ } ++ ++ return 0; ++} ++ ++/** ++ * e1000_set_d0_lplu_state_82571 - Set Low Power Linkup D0 state ++ * @hw: pointer to the HW structure ++ * @active: true to enable LPLU, false to disable ++ * ++ * Sets the LPLU D0 state according to the active flag. When activating LPLU ++ * this function also disables smart speed and vice versa. LPLU will not be ++ * activated unless the device autonegotiation advertisement meets standards ++ * of either 10 or 10/100 or 10/100/1000 at all duplexes. This is a function ++ * pointer entry point only called by PHY setup routines. ++ **/ ++static s32 e1000_set_d0_lplu_state_82571(struct e1000_hw *hw, bool active) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u16 data; ++ ++ ret_val = e1e_rphy(hw, IGP02E1000_PHY_POWER_MGMT, &data); ++ if (ret_val) ++ return ret_val; ++ ++ if (active) { ++ data |= IGP02E1000_PM_D0_LPLU; ++ ret_val = e1e_wphy(hw, IGP02E1000_PHY_POWER_MGMT, data); ++ if (ret_val) ++ return ret_val; ++ ++ /* When LPLU is enabled, we should disable SmartSpeed */ ++ ret_val = e1e_rphy(hw, IGP01E1000_PHY_PORT_CONFIG, &data); ++ data &= ~IGP01E1000_PSCFR_SMART_SPEED; ++ ret_val = e1e_wphy(hw, IGP01E1000_PHY_PORT_CONFIG, data); ++ if (ret_val) ++ return ret_val; ++ } else { ++ data &= ~IGP02E1000_PM_D0_LPLU; ++ ret_val = e1e_wphy(hw, IGP02E1000_PHY_POWER_MGMT, data); ++ /* ++ * LPLU and SmartSpeed are mutually exclusive. LPLU is used ++ * during Dx states where the power conservation is most ++ * important. During driver activity we should enable ++ * SmartSpeed, so performance is maintained. ++ */ ++ if (phy->smart_speed == e1000_smart_speed_on) { ++ ret_val = e1e_rphy(hw, IGP01E1000_PHY_PORT_CONFIG, ++ &data); ++ if (ret_val) ++ return ret_val; ++ ++ data |= IGP01E1000_PSCFR_SMART_SPEED; ++ ret_val = e1e_wphy(hw, IGP01E1000_PHY_PORT_CONFIG, ++ data); ++ if (ret_val) ++ return ret_val; ++ } else if (phy->smart_speed == e1000_smart_speed_off) { ++ ret_val = e1e_rphy(hw, IGP01E1000_PHY_PORT_CONFIG, ++ &data); ++ if (ret_val) ++ return ret_val; ++ ++ data &= ~IGP01E1000_PSCFR_SMART_SPEED; ++ ret_val = e1e_wphy(hw, IGP01E1000_PHY_PORT_CONFIG, ++ data); ++ if (ret_val) ++ return ret_val; ++ } ++ } ++ ++ return 0; ++} ++ ++/** ++ * e1000_reset_hw_82571 - Reset hardware ++ * @hw: pointer to the HW structure ++ * ++ * This resets the hardware into a known state. ++ **/ ++static s32 e1000_reset_hw_82571(struct e1000_hw *hw) ++{ ++ u32 ctrl, ctrl_ext; ++ s32 ret_val; ++ ++ /* ++ * Prevent the PCI-E bus from sticking if there is no TLP connection ++ * on the last TLP read/write transaction when MAC is reset. ++ */ ++ ret_val = e1000e_disable_pcie_master(hw); ++ if (ret_val) ++ e_dbg("PCI-E Master disable polling has failed.\n"); ++ ++ e_dbg("Masking off all interrupts\n"); ++ ew32(IMC, 0xffffffff); ++ ++ ew32(RCTL, 0); ++ ew32(TCTL, E1000_TCTL_PSP); ++ e1e_flush(); ++ ++ usleep_range(10000, 20000); ++ ++ /* ++ * Must acquire the MDIO ownership before MAC reset. ++ * Ownership defaults to firmware after a reset. ++ */ ++ switch (hw->mac.type) { ++ case e1000_82573: ++ ret_val = e1000_get_hw_semaphore_82573(hw); ++ break; ++ case e1000_82574: ++ case e1000_82583: ++ ret_val = e1000_get_hw_semaphore_82574(hw); ++ break; ++ default: ++ break; ++ } ++ if (ret_val) ++ e_dbg("Cannot acquire MDIO ownership\n"); ++ ++ ctrl = er32(CTRL); ++ ++ e_dbg("Issuing a global reset to MAC\n"); ++ ew32(CTRL, ctrl | E1000_CTRL_RST); ++ ++ /* Must release MDIO ownership and mutex after MAC reset. */ ++ switch (hw->mac.type) { ++ case e1000_82574: ++ case e1000_82583: ++ e1000_put_hw_semaphore_82574(hw); ++ break; ++ default: ++ break; ++ } ++ ++ if (hw->nvm.type == e1000_nvm_flash_hw) { ++ udelay(10); ++ ctrl_ext = er32(CTRL_EXT); ++ ctrl_ext |= E1000_CTRL_EXT_EE_RST; ++ ew32(CTRL_EXT, ctrl_ext); ++ e1e_flush(); ++ } ++ ++ ret_val = e1000e_get_auto_rd_done(hw); ++ if (ret_val) ++ /* We don't want to continue accessing MAC registers. */ ++ return ret_val; ++ ++ /* ++ * Phy configuration from NVM just starts after EECD_AUTO_RD is set. ++ * Need to wait for Phy configuration completion before accessing ++ * NVM and Phy. ++ */ ++ ++ switch (hw->mac.type) { ++ case e1000_82573: ++ case e1000_82574: ++ case e1000_82583: ++ msleep(25); ++ break; ++ default: ++ break; ++ } ++ ++ /* Clear any pending interrupt events. */ ++ ew32(IMC, 0xffffffff); ++ er32(ICR); ++ ++ if (hw->mac.type == e1000_82571) { ++ /* Install any alternate MAC address into RAR0 */ ++ ret_val = e1000_check_alt_mac_addr_generic(hw); ++ if (ret_val) ++ return ret_val; ++ ++ e1000e_set_laa_state_82571(hw, true); ++ } ++ ++ /* Reinitialize the 82571 serdes link state machine */ ++ if (hw->phy.media_type == e1000_media_type_internal_serdes) ++ hw->mac.serdes_link_state = e1000_serdes_link_down; ++ ++ return 0; ++} ++ ++/** ++ * e1000_init_hw_82571 - Initialize hardware ++ * @hw: pointer to the HW structure ++ * ++ * This inits the hardware readying it for operation. ++ **/ ++static s32 e1000_init_hw_82571(struct e1000_hw *hw) ++{ ++ struct e1000_mac_info *mac = &hw->mac; ++ u32 reg_data; ++ s32 ret_val; ++ u16 i, rar_count = mac->rar_entry_count; ++ ++ e1000_initialize_hw_bits_82571(hw); ++ ++ /* Initialize identification LED */ ++ ret_val = e1000e_id_led_init(hw); ++ if (ret_val) ++ e_dbg("Error initializing identification LED\n"); ++ /* This is not fatal and we should not stop init due to this */ ++ ++ /* Disabling VLAN filtering */ ++ e_dbg("Initializing the IEEE VLAN\n"); ++ mac->ops.clear_vfta(hw); ++ ++ /* Setup the receive address. */ ++ /* ++ * If, however, a locally administered address was assigned to the ++ * 82571, we must reserve a RAR for it to work around an issue where ++ * resetting one port will reload the MAC on the other port. ++ */ ++ if (e1000e_get_laa_state_82571(hw)) ++ rar_count--; ++ e1000e_init_rx_addrs(hw, rar_count); ++ ++ /* Zero out the Multicast HASH table */ ++ e_dbg("Zeroing the MTA\n"); ++ for (i = 0; i < mac->mta_reg_count; i++) ++ E1000_WRITE_REG_ARRAY(hw, E1000_MTA, i, 0); ++ ++ /* Setup link and flow control */ ++ ret_val = e1000_setup_link_82571(hw); ++ ++ /* Set the transmit descriptor write-back policy */ ++ reg_data = er32(TXDCTL(0)); ++ reg_data = (reg_data & ~E1000_TXDCTL_WTHRESH) | ++ E1000_TXDCTL_FULL_TX_DESC_WB | ++ E1000_TXDCTL_COUNT_DESC; ++ ew32(TXDCTL(0), reg_data); ++ ++ /* ...for both queues. */ ++ switch (mac->type) { ++ case e1000_82573: ++ e1000e_enable_tx_pkt_filtering(hw); ++ /* fall through */ ++ case e1000_82574: ++ case e1000_82583: ++ reg_data = er32(GCR); ++ reg_data |= E1000_GCR_L1_ACT_WITHOUT_L0S_RX; ++ ew32(GCR, reg_data); ++ break; ++ default: ++ reg_data = er32(TXDCTL(1)); ++ reg_data = (reg_data & ~E1000_TXDCTL_WTHRESH) | ++ E1000_TXDCTL_FULL_TX_DESC_WB | ++ E1000_TXDCTL_COUNT_DESC; ++ ew32(TXDCTL(1), reg_data); ++ break; ++ } ++ ++ /* ++ * Clear all of the statistics registers (clear on read). It is ++ * important that we do this after we have tried to establish link ++ * because the symbol error count will increment wildly if there ++ * is no link. ++ */ ++ e1000_clear_hw_cntrs_82571(hw); ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_initialize_hw_bits_82571 - Initialize hardware-dependent bits ++ * @hw: pointer to the HW structure ++ * ++ * Initializes required hardware-dependent bits needed for normal operation. ++ **/ ++static void e1000_initialize_hw_bits_82571(struct e1000_hw *hw) ++{ ++ u32 reg; ++ ++ /* Transmit Descriptor Control 0 */ ++ reg = er32(TXDCTL(0)); ++ reg |= (1 << 22); ++ ew32(TXDCTL(0), reg); ++ ++ /* Transmit Descriptor Control 1 */ ++ reg = er32(TXDCTL(1)); ++ reg |= (1 << 22); ++ ew32(TXDCTL(1), reg); ++ ++ /* Transmit Arbitration Control 0 */ ++ reg = er32(TARC(0)); ++ reg &= ~(0xF << 27); /* 30:27 */ ++ switch (hw->mac.type) { ++ case e1000_82571: ++ case e1000_82572: ++ reg |= (1 << 23) | (1 << 24) | (1 << 25) | (1 << 26); ++ break; ++ default: ++ break; ++ } ++ ew32(TARC(0), reg); ++ ++ /* Transmit Arbitration Control 1 */ ++ reg = er32(TARC(1)); ++ switch (hw->mac.type) { ++ case e1000_82571: ++ case e1000_82572: ++ reg &= ~((1 << 29) | (1 << 30)); ++ reg |= (1 << 22) | (1 << 24) | (1 << 25) | (1 << 26); ++ if (er32(TCTL) & E1000_TCTL_MULR) ++ reg &= ~(1 << 28); ++ else ++ reg |= (1 << 28); ++ ew32(TARC(1), reg); ++ break; ++ default: ++ break; ++ } ++ ++ /* Device Control */ ++ switch (hw->mac.type) { ++ case e1000_82573: ++ case e1000_82574: ++ case e1000_82583: ++ reg = er32(CTRL); ++ reg &= ~(1 << 29); ++ ew32(CTRL, reg); ++ break; ++ default: ++ break; ++ } ++ ++ /* Extended Device Control */ ++ switch (hw->mac.type) { ++ case e1000_82573: ++ case e1000_82574: ++ case e1000_82583: ++ reg = er32(CTRL_EXT); ++ reg &= ~(1 << 23); ++ reg |= (1 << 22); ++ ew32(CTRL_EXT, reg); ++ break; ++ default: ++ break; ++ } ++ ++ if (hw->mac.type == e1000_82571) { ++ reg = er32(PBA_ECC); ++ reg |= E1000_PBA_ECC_CORR_EN; ++ ew32(PBA_ECC, reg); ++ } ++ /* ++ * Workaround for hardware errata. ++ * Ensure that DMA Dynamic Clock gating is disabled on 82571 and 82572 ++ */ ++ ++ if ((hw->mac.type == e1000_82571) || ++ (hw->mac.type == e1000_82572)) { ++ reg = er32(CTRL_EXT); ++ reg &= ~E1000_CTRL_EXT_DMA_DYN_CLK_EN; ++ ew32(CTRL_EXT, reg); ++ } ++ ++ ++ /* PCI-Ex Control Registers */ ++ switch (hw->mac.type) { ++ case e1000_82574: ++ case e1000_82583: ++ reg = er32(GCR); ++ reg |= (1 << 22); ++ ew32(GCR, reg); ++ ++ /* ++ * Workaround for hardware errata. ++ * apply workaround for hardware errata documented in errata ++ * docs Fixes issue where some error prone or unreliable PCIe ++ * completions are occurring, particularly with ASPM enabled. ++ * Without fix, issue can cause Tx timeouts. ++ */ ++ reg = er32(GCR2); ++ reg |= 1; ++ ew32(GCR2, reg); ++ break; ++ default: ++ break; ++ } ++} ++ ++/** ++ * e1000_clear_vfta_82571 - Clear VLAN filter table ++ * @hw: pointer to the HW structure ++ * ++ * Clears the register array which contains the VLAN filter table by ++ * setting all the values to 0. ++ **/ ++static void e1000_clear_vfta_82571(struct e1000_hw *hw) ++{ ++ u32 offset; ++ u32 vfta_value = 0; ++ u32 vfta_offset = 0; ++ u32 vfta_bit_in_reg = 0; ++ ++ switch (hw->mac.type) { ++ case e1000_82573: ++ case e1000_82574: ++ case e1000_82583: ++ if (hw->mng_cookie.vlan_id != 0) { ++ /* ++ * The VFTA is a 4096b bit-field, each identifying ++ * a single VLAN ID. The following operations ++ * determine which 32b entry (i.e. offset) into the ++ * array we want to set the VLAN ID (i.e. bit) of ++ * the manageability unit. ++ */ ++ vfta_offset = (hw->mng_cookie.vlan_id >> ++ E1000_VFTA_ENTRY_SHIFT) & ++ E1000_VFTA_ENTRY_MASK; ++ vfta_bit_in_reg = 1 << (hw->mng_cookie.vlan_id & ++ E1000_VFTA_ENTRY_BIT_SHIFT_MASK); ++ } ++ break; ++ default: ++ break; ++ } ++ for (offset = 0; offset < E1000_VLAN_FILTER_TBL_SIZE; offset++) { ++ /* ++ * If the offset we want to clear is the same offset of the ++ * manageability VLAN ID, then clear all bits except that of ++ * the manageability unit. ++ */ ++ vfta_value = (offset == vfta_offset) ? vfta_bit_in_reg : 0; ++ E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, offset, vfta_value); ++ e1e_flush(); ++ } ++} ++ ++/** ++ * e1000_check_mng_mode_82574 - Check manageability is enabled ++ * @hw: pointer to the HW structure ++ * ++ * Reads the NVM Initialization Control Word 2 and returns true ++ * (>0) if any manageability is enabled, else false (0). ++ **/ ++static bool e1000_check_mng_mode_82574(struct e1000_hw *hw) ++{ ++ u16 data; ++ ++ e1000_read_nvm(hw, NVM_INIT_CONTROL2_REG, 1, &data); ++ return (data & E1000_NVM_INIT_CTRL2_MNGM) != 0; ++} ++ ++/** ++ * e1000_led_on_82574 - Turn LED on ++ * @hw: pointer to the HW structure ++ * ++ * Turn LED on. ++ **/ ++static s32 e1000_led_on_82574(struct e1000_hw *hw) ++{ ++ u32 ctrl; ++ u32 i; ++ ++ ctrl = hw->mac.ledctl_mode2; ++ if (!(E1000_STATUS_LU & er32(STATUS))) { ++ /* ++ * If no link, then turn LED on by setting the invert bit ++ * for each LED that's "on" (0x0E) in ledctl_mode2. ++ */ ++ for (i = 0; i < 4; i++) ++ if (((hw->mac.ledctl_mode2 >> (i * 8)) & 0xFF) == ++ E1000_LEDCTL_MODE_LED_ON) ++ ctrl |= (E1000_LEDCTL_LED0_IVRT << (i * 8)); ++ } ++ ew32(LEDCTL, ctrl); ++ ++ return 0; ++} ++ ++/** ++ * e1000_check_phy_82574 - check 82574 phy hung state ++ * @hw: pointer to the HW structure ++ * ++ * Returns whether phy is hung or not ++ **/ ++bool e1000_check_phy_82574(struct e1000_hw *hw) ++{ ++ u16 status_1kbt = 0; ++ u16 receive_errors = 0; ++ bool phy_hung = false; ++ s32 ret_val = 0; ++ ++ /* ++ * Read PHY Receive Error counter first, if its is max - all F's then ++ * read the Base1000T status register If both are max then PHY is hung. ++ */ ++ ret_val = e1e_rphy(hw, E1000_RECEIVE_ERROR_COUNTER, &receive_errors); ++ ++ if (ret_val) ++ goto out; ++ if (receive_errors == E1000_RECEIVE_ERROR_MAX) { ++ ret_val = e1e_rphy(hw, E1000_BASE1000T_STATUS, &status_1kbt); ++ if (ret_val) ++ goto out; ++ if ((status_1kbt & E1000_IDLE_ERROR_COUNT_MASK) == ++ E1000_IDLE_ERROR_COUNT_MASK) ++ phy_hung = true; ++ } ++out: ++ return phy_hung; ++} ++ ++/** ++ * e1000_setup_link_82571 - Setup flow control and link settings ++ * @hw: pointer to the HW structure ++ * ++ * Determines which flow control settings to use, then configures flow ++ * control. Calls the appropriate media-specific link configuration ++ * function. Assuming the adapter has a valid link partner, a valid link ++ * should be established. Assumes the hardware has previously been reset ++ * and the transmitter and receiver are not enabled. ++ **/ ++static s32 e1000_setup_link_82571(struct e1000_hw *hw) ++{ ++ /* ++ * 82573 does not have a word in the NVM to determine ++ * the default flow control setting, so we explicitly ++ * set it to full. ++ */ ++ switch (hw->mac.type) { ++ case e1000_82573: ++ case e1000_82574: ++ case e1000_82583: ++ if (hw->fc.requested_mode == e1000_fc_default) ++ hw->fc.requested_mode = e1000_fc_full; ++ break; ++ default: ++ break; ++ } ++ ++ return e1000e_setup_link(hw); ++} ++ ++/** ++ * e1000_setup_copper_link_82571 - Configure copper link settings ++ * @hw: pointer to the HW structure ++ * ++ * Configures the link for auto-neg or forced speed and duplex. Then we check ++ * for link, once link is established calls to configure collision distance ++ * and flow control are called. ++ **/ ++static s32 e1000_setup_copper_link_82571(struct e1000_hw *hw) ++{ ++ u32 ctrl; ++ s32 ret_val; ++ ++ ctrl = er32(CTRL); ++ ctrl |= E1000_CTRL_SLU; ++ ctrl &= ~(E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX); ++ ew32(CTRL, ctrl); ++ ++ switch (hw->phy.type) { ++ case e1000_phy_m88: ++ case e1000_phy_bm: ++ ret_val = e1000e_copper_link_setup_m88(hw); ++ break; ++ case e1000_phy_igp_2: ++ ret_val = e1000e_copper_link_setup_igp(hw); ++ break; ++ default: ++ return -E1000_ERR_PHY; ++ break; ++ } ++ ++ if (ret_val) ++ return ret_val; ++ ++ ret_val = e1000e_setup_copper_link(hw); ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_setup_fiber_serdes_link_82571 - Setup link for fiber/serdes ++ * @hw: pointer to the HW structure ++ * ++ * Configures collision distance and flow control for fiber and serdes links. ++ * Upon successful setup, poll for link. ++ **/ ++static s32 e1000_setup_fiber_serdes_link_82571(struct e1000_hw *hw) ++{ ++ switch (hw->mac.type) { ++ case e1000_82571: ++ case e1000_82572: ++ /* ++ * If SerDes loopback mode is entered, there is no form ++ * of reset to take the adapter out of that mode. So we ++ * have to explicitly take the adapter out of loopback ++ * mode. This prevents drivers from twiddling their thumbs ++ * if another tool failed to take it out of loopback mode. ++ */ ++ ew32(SCTL, E1000_SCTL_DISABLE_SERDES_LOOPBACK); ++ break; ++ default: ++ break; ++ } ++ ++ return e1000e_setup_fiber_serdes_link(hw); ++} ++ ++/** ++ * e1000_check_for_serdes_link_82571 - Check for link (Serdes) ++ * @hw: pointer to the HW structure ++ * ++ * Reports the link state as up or down. ++ * ++ * If autonegotiation is supported by the link partner, the link state is ++ * determined by the result of autonegotiation. This is the most likely case. ++ * If autonegotiation is not supported by the link partner, and the link ++ * has a valid signal, force the link up. ++ * ++ * The link state is represented internally here by 4 states: ++ * ++ * 1) down ++ * 2) autoneg_progress ++ * 3) autoneg_complete (the link successfully autonegotiated) ++ * 4) forced_up (the link has been forced up, it did not autonegotiate) ++ * ++ **/ ++static s32 e1000_check_for_serdes_link_82571(struct e1000_hw *hw) ++{ ++ struct e1000_mac_info *mac = &hw->mac; ++ u32 rxcw; ++ u32 ctrl; ++ u32 status; ++ u32 txcw; ++ u32 i; ++ s32 ret_val = 0; ++ ++ ctrl = er32(CTRL); ++ status = er32(STATUS); ++ rxcw = er32(RXCW); ++ ++ if ((rxcw & E1000_RXCW_SYNCH) && !(rxcw & E1000_RXCW_IV)) { ++ ++ /* Receiver is synchronized with no invalid bits. */ ++ switch (mac->serdes_link_state) { ++ case e1000_serdes_link_autoneg_complete: ++ if (!(status & E1000_STATUS_LU)) { ++ /* ++ * We have lost link, retry autoneg before ++ * reporting link failure ++ */ ++ mac->serdes_link_state = ++ e1000_serdes_link_autoneg_progress; ++ mac->serdes_has_link = false; ++ e_dbg("AN_UP -> AN_PROG\n"); ++ } else { ++ mac->serdes_has_link = true; ++ } ++ break; ++ ++ case e1000_serdes_link_forced_up: ++ /* ++ * If we are receiving /C/ ordered sets, re-enable ++ * auto-negotiation in the TXCW register and disable ++ * forced link in the Device Control register in an ++ * attempt to auto-negotiate with our link partner. ++ * If the partner code word is null, stop forcing ++ * and restart auto negotiation. ++ */ ++ if ((rxcw & E1000_RXCW_C) || !(rxcw & E1000_RXCW_CW)) { ++ /* Enable autoneg, and unforce link up */ ++ ew32(TXCW, mac->txcw); ++ ew32(CTRL, (ctrl & ~E1000_CTRL_SLU)); ++ mac->serdes_link_state = ++ e1000_serdes_link_autoneg_progress; ++ mac->serdes_has_link = false; ++ e_dbg("FORCED_UP -> AN_PROG\n"); ++ } else { ++ mac->serdes_has_link = true; ++ } ++ break; ++ ++ case e1000_serdes_link_autoneg_progress: ++ if (rxcw & E1000_RXCW_C) { ++ /* ++ * We received /C/ ordered sets, meaning the ++ * link partner has autonegotiated, and we can ++ * trust the Link Up (LU) status bit. ++ */ ++ if (status & E1000_STATUS_LU) { ++ mac->serdes_link_state = ++ e1000_serdes_link_autoneg_complete; ++ e_dbg("AN_PROG -> AN_UP\n"); ++ mac->serdes_has_link = true; ++ } else { ++ /* Autoneg completed, but failed. */ ++ mac->serdes_link_state = ++ e1000_serdes_link_down; ++ e_dbg("AN_PROG -> DOWN\n"); ++ } ++ } else { ++ /* ++ * The link partner did not autoneg. ++ * Force link up and full duplex, and change ++ * state to forced. ++ */ ++ ew32(TXCW, (mac->txcw & ~E1000_TXCW_ANE)); ++ ctrl |= (E1000_CTRL_SLU | E1000_CTRL_FD); ++ ew32(CTRL, ctrl); ++ ++ /* Configure Flow Control after link up. */ ++ ret_val = e1000e_config_fc_after_link_up(hw); ++ if (ret_val) { ++ e_dbg("Error config flow control\n"); ++ break; ++ } ++ mac->serdes_link_state = ++ e1000_serdes_link_forced_up; ++ mac->serdes_has_link = true; ++ e_dbg("AN_PROG -> FORCED_UP\n"); ++ } ++ break; ++ ++ case e1000_serdes_link_down: ++ default: ++ /* ++ * The link was down but the receiver has now gained ++ * valid sync, so lets see if we can bring the link ++ * up. ++ */ ++ ew32(TXCW, mac->txcw); ++ ew32(CTRL, (ctrl & ~E1000_CTRL_SLU)); ++ mac->serdes_link_state = ++ e1000_serdes_link_autoneg_progress; ++ mac->serdes_has_link = false; ++ e_dbg("DOWN -> AN_PROG\n"); ++ break; ++ } ++ } else { ++ if (!(rxcw & E1000_RXCW_SYNCH)) { ++ mac->serdes_has_link = false; ++ mac->serdes_link_state = e1000_serdes_link_down; ++ e_dbg("ANYSTATE -> DOWN\n"); ++ } else { ++ /* ++ * Check several times, if Sync and Config ++ * both are consistently 1 then simply ignore ++ * the Invalid bit and restart Autoneg ++ */ ++ for (i = 0; i < AN_RETRY_COUNT; i++) { ++ udelay(10); ++ rxcw = er32(RXCW); ++ if ((rxcw & E1000_RXCW_IV) && ++ !((rxcw & E1000_RXCW_SYNCH) && ++ (rxcw & E1000_RXCW_C))) { ++ mac->serdes_has_link = false; ++ mac->serdes_link_state = ++ e1000_serdes_link_down; ++ e_dbg("ANYSTATE -> DOWN\n"); ++ break; ++ } ++ } ++ ++ if (i == AN_RETRY_COUNT) { ++ txcw = er32(TXCW); ++ txcw |= E1000_TXCW_ANE; ++ ew32(TXCW, txcw); ++ mac->serdes_link_state = ++ e1000_serdes_link_autoneg_progress; ++ mac->serdes_has_link = false; ++ e_dbg("ANYSTATE -> AN_PROG\n"); ++ } ++ } ++ } ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_valid_led_default_82571 - Verify a valid default LED config ++ * @hw: pointer to the HW structure ++ * @data: pointer to the NVM (EEPROM) ++ * ++ * Read the EEPROM for the current default LED configuration. If the ++ * LED configuration is not valid, set to a valid LED configuration. ++ **/ ++static s32 e1000_valid_led_default_82571(struct e1000_hw *hw, u16 *data) ++{ ++ s32 ret_val; ++ ++ ret_val = e1000_read_nvm(hw, NVM_ID_LED_SETTINGS, 1, data); ++ if (ret_val) { ++ e_dbg("NVM Read Error\n"); ++ return ret_val; ++ } ++ ++ switch (hw->mac.type) { ++ case e1000_82573: ++ case e1000_82574: ++ case e1000_82583: ++ if (*data == ID_LED_RESERVED_F746) ++ *data = ID_LED_DEFAULT_82573; ++ break; ++ default: ++ if (*data == ID_LED_RESERVED_0000 || ++ *data == ID_LED_RESERVED_FFFF) ++ *data = ID_LED_DEFAULT; ++ break; ++ } ++ ++ return 0; ++} ++ ++/** ++ * e1000e_get_laa_state_82571 - Get locally administered address state ++ * @hw: pointer to the HW structure ++ * ++ * Retrieve and return the current locally administered address state. ++ **/ ++bool e1000e_get_laa_state_82571(struct e1000_hw *hw) ++{ ++ if (hw->mac.type != e1000_82571) ++ return false; ++ ++ return hw->dev_spec.e82571.laa_is_present; ++} ++ ++/** ++ * e1000e_set_laa_state_82571 - Set locally administered address state ++ * @hw: pointer to the HW structure ++ * @state: enable/disable locally administered address ++ * ++ * Enable/Disable the current locally administered address state. ++ **/ ++void e1000e_set_laa_state_82571(struct e1000_hw *hw, bool state) ++{ ++ if (hw->mac.type != e1000_82571) ++ return; ++ ++ hw->dev_spec.e82571.laa_is_present = state; ++ ++ /* If workaround is activated... */ ++ if (state) ++ /* ++ * Hold a copy of the LAA in RAR[14] This is done so that ++ * between the time RAR[0] gets clobbered and the time it ++ * gets fixed, the actual LAA is in one of the RARs and no ++ * incoming packets directed to this port are dropped. ++ * Eventually the LAA will be in RAR[0] and RAR[14]. ++ */ ++ e1000e_rar_set(hw, hw->mac.addr, hw->mac.rar_entry_count - 1); ++} ++ ++/** ++ * e1000_fix_nvm_checksum_82571 - Fix EEPROM checksum ++ * @hw: pointer to the HW structure ++ * ++ * Verifies that the EEPROM has completed the update. After updating the ++ * EEPROM, we need to check bit 15 in work 0x23 for the checksum fix. If ++ * the checksum fix is not implemented, we need to set the bit and update ++ * the checksum. Otherwise, if bit 15 is set and the checksum is incorrect, ++ * we need to return bad checksum. ++ **/ ++static s32 e1000_fix_nvm_checksum_82571(struct e1000_hw *hw) ++{ ++ struct e1000_nvm_info *nvm = &hw->nvm; ++ s32 ret_val; ++ u16 data; ++ ++ if (nvm->type != e1000_nvm_flash_hw) ++ return 0; ++ ++ /* ++ * Check bit 4 of word 10h. If it is 0, firmware is done updating ++ * 10h-12h. Checksum may need to be fixed. ++ */ ++ ret_val = e1000_read_nvm(hw, 0x10, 1, &data); ++ if (ret_val) ++ return ret_val; ++ ++ if (!(data & 0x10)) { ++ /* ++ * Read 0x23 and check bit 15. This bit is a 1 ++ * when the checksum has already been fixed. If ++ * the checksum is still wrong and this bit is a ++ * 1, we need to return bad checksum. Otherwise, ++ * we need to set this bit to a 1 and update the ++ * checksum. ++ */ ++ ret_val = e1000_read_nvm(hw, 0x23, 1, &data); ++ if (ret_val) ++ return ret_val; ++ ++ if (!(data & 0x8000)) { ++ data |= 0x8000; ++ ret_val = e1000_write_nvm(hw, 0x23, 1, &data); ++ if (ret_val) ++ return ret_val; ++ ret_val = e1000e_update_nvm_checksum(hw); ++ } ++ } ++ ++ return 0; ++} ++ ++/** ++ * e1000_read_mac_addr_82571 - Read device MAC address ++ * @hw: pointer to the HW structure ++ **/ ++static s32 e1000_read_mac_addr_82571(struct e1000_hw *hw) ++{ ++ s32 ret_val = 0; ++ ++ if (hw->mac.type == e1000_82571) { ++ /* ++ * If there's an alternate MAC address place it in RAR0 ++ * so that it will override the Si installed default perm ++ * address. ++ */ ++ ret_val = e1000_check_alt_mac_addr_generic(hw); ++ if (ret_val) ++ goto out; ++ } ++ ++ ret_val = e1000_read_mac_addr_generic(hw); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_power_down_phy_copper_82571 - Remove link during PHY power down ++ * @hw: pointer to the HW structure ++ * ++ * In the case of a PHY power down to save power, or to turn off link during a ++ * driver unload, or wake on lan is not enabled, remove the link. ++ **/ ++static void e1000_power_down_phy_copper_82571(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ struct e1000_mac_info *mac = &hw->mac; ++ ++ if (!(phy->ops.check_reset_block)) ++ return; ++ ++ /* If the management interface is not enabled, then power down */ ++ if (!(mac->ops.check_mng_mode(hw) || phy->ops.check_reset_block(hw))) ++ e1000_power_down_phy_copper(hw); ++} ++ ++/** ++ * e1000_clear_hw_cntrs_82571 - Clear device specific hardware counters ++ * @hw: pointer to the HW structure ++ * ++ * Clears the hardware counters by reading the counter registers. ++ **/ ++static void e1000_clear_hw_cntrs_82571(struct e1000_hw *hw) ++{ ++ e1000e_clear_hw_cntrs_base(hw); ++ ++ er32(PRC64); ++ er32(PRC127); ++ er32(PRC255); ++ er32(PRC511); ++ er32(PRC1023); ++ er32(PRC1522); ++ er32(PTC64); ++ er32(PTC127); ++ er32(PTC255); ++ er32(PTC511); ++ er32(PTC1023); ++ er32(PTC1522); ++ ++ er32(ALGNERRC); ++ er32(RXERRC); ++ er32(TNCRS); ++ er32(CEXTERR); ++ er32(TSCTC); ++ er32(TSCTFC); ++ ++ er32(MGTPRC); ++ er32(MGTPDC); ++ er32(MGTPTC); ++ ++ er32(IAC); ++ er32(ICRXOC); ++ ++ er32(ICRXPTC); ++ er32(ICRXATC); ++ er32(ICTXPTC); ++ er32(ICTXATC); ++ er32(ICTXQEC); ++ er32(ICTXQMTC); ++ er32(ICRXDMTC); ++} ++ ++static const struct e1000_mac_operations e82571_mac_ops = { ++ /* .check_mng_mode: mac type dependent */ ++ /* .check_for_link: media type dependent */ ++ .id_led_init = e1000e_id_led_init, ++ .cleanup_led = e1000e_cleanup_led_generic, ++ .clear_hw_cntrs = e1000_clear_hw_cntrs_82571, ++ .get_bus_info = e1000e_get_bus_info_pcie, ++ .set_lan_id = e1000_set_lan_id_multi_port_pcie, ++ /* .get_link_up_info: media type dependent */ ++ /* .led_on: mac type dependent */ ++ .led_off = e1000e_led_off_generic, ++ .update_mc_addr_list = e1000e_update_mc_addr_list_generic, ++ .write_vfta = e1000_write_vfta_generic, ++ .clear_vfta = e1000_clear_vfta_82571, ++ .reset_hw = e1000_reset_hw_82571, ++ .init_hw = e1000_init_hw_82571, ++ .setup_link = e1000_setup_link_82571, ++ /* .setup_physical_interface: media type dependent */ ++ .setup_led = e1000e_setup_led_generic, ++ .read_mac_addr = e1000_read_mac_addr_82571, ++}; ++ ++static const struct e1000_phy_operations e82_phy_ops_igp = { ++ .acquire = e1000_get_hw_semaphore_82571, ++ .check_polarity = e1000_check_polarity_igp, ++ .check_reset_block = e1000e_check_reset_block_generic, ++ .commit = NULL, ++ .force_speed_duplex = e1000e_phy_force_speed_duplex_igp, ++ .get_cfg_done = e1000_get_cfg_done_82571, ++ .get_cable_length = e1000e_get_cable_length_igp_2, ++ .get_info = e1000e_get_phy_info_igp, ++ .read_reg = e1000e_read_phy_reg_igp, ++ .release = e1000_put_hw_semaphore_82571, ++ .reset = e1000e_phy_hw_reset_generic, ++ .set_d0_lplu_state = e1000_set_d0_lplu_state_82571, ++ .set_d3_lplu_state = e1000e_set_d3_lplu_state, ++ .write_reg = e1000e_write_phy_reg_igp, ++ .cfg_on_link_up = NULL, ++}; ++ ++static const struct e1000_phy_operations e82_phy_ops_m88 = { ++ .acquire = e1000_get_hw_semaphore_82571, ++ .check_polarity = e1000_check_polarity_m88, ++ .check_reset_block = e1000e_check_reset_block_generic, ++ .commit = e1000e_phy_sw_reset, ++ .force_speed_duplex = e1000e_phy_force_speed_duplex_m88, ++ .get_cfg_done = e1000e_get_cfg_done, ++ .get_cable_length = e1000e_get_cable_length_m88, ++ .get_info = e1000e_get_phy_info_m88, ++ .read_reg = e1000e_read_phy_reg_m88, ++ .release = e1000_put_hw_semaphore_82571, ++ .reset = e1000e_phy_hw_reset_generic, ++ .set_d0_lplu_state = e1000_set_d0_lplu_state_82571, ++ .set_d3_lplu_state = e1000e_set_d3_lplu_state, ++ .write_reg = e1000e_write_phy_reg_m88, ++ .cfg_on_link_up = NULL, ++}; ++ ++static const struct e1000_phy_operations e82_phy_ops_bm = { ++ .acquire = e1000_get_hw_semaphore_82571, ++ .check_polarity = e1000_check_polarity_m88, ++ .check_reset_block = e1000e_check_reset_block_generic, ++ .commit = e1000e_phy_sw_reset, ++ .force_speed_duplex = e1000e_phy_force_speed_duplex_m88, ++ .get_cfg_done = e1000e_get_cfg_done, ++ .get_cable_length = e1000e_get_cable_length_m88, ++ .get_info = e1000e_get_phy_info_m88, ++ .read_reg = e1000e_read_phy_reg_bm2, ++ .release = e1000_put_hw_semaphore_82571, ++ .reset = e1000e_phy_hw_reset_generic, ++ .set_d0_lplu_state = e1000_set_d0_lplu_state_82571, ++ .set_d3_lplu_state = e1000e_set_d3_lplu_state, ++ .write_reg = e1000e_write_phy_reg_bm2, ++ .cfg_on_link_up = NULL, ++}; ++ ++static const struct e1000_nvm_operations e82571_nvm_ops = { ++ .acquire = e1000_acquire_nvm_82571, ++ .read = e1000e_read_nvm_eerd, ++ .release = e1000_release_nvm_82571, ++ .update = e1000_update_nvm_checksum_82571, ++ .valid_led_default = e1000_valid_led_default_82571, ++ .validate = e1000_validate_nvm_checksum_82571, ++ .write = e1000_write_nvm_82571, ++}; ++ ++const struct e1000_info e1000_82571_info = { ++ .mac = e1000_82571, ++ .flags = FLAG_HAS_HW_VLAN_FILTER ++ | FLAG_HAS_JUMBO_FRAMES ++ | FLAG_HAS_WOL ++ | FLAG_APME_IN_CTRL3 ++ | FLAG_HAS_CTRLEXT_ON_LOAD ++ | FLAG_HAS_SMART_POWER_DOWN ++ | FLAG_RESET_OVERWRITES_LAA /* errata */ ++ | FLAG_TARC_SPEED_MODE_BIT /* errata */ ++ | FLAG_APME_CHECK_PORT_B, ++ .flags2 = FLAG2_DISABLE_ASPM_L1 /* errata 13 */ ++ | FLAG2_DMA_BURST, ++ .pba = 38, ++ .max_hw_frame_size = DEFAULT_JUMBO, ++ .get_variants = e1000_get_variants_82571, ++ .mac_ops = &e82571_mac_ops, ++ .phy_ops = &e82_phy_ops_igp, ++ .nvm_ops = &e82571_nvm_ops, ++}; ++ ++const struct e1000_info e1000_82572_info = { ++ .mac = e1000_82572, ++ .flags = FLAG_HAS_HW_VLAN_FILTER ++ | FLAG_HAS_JUMBO_FRAMES ++ | FLAG_HAS_WOL ++ | FLAG_APME_IN_CTRL3 ++ | FLAG_HAS_CTRLEXT_ON_LOAD ++ | FLAG_TARC_SPEED_MODE_BIT, /* errata */ ++ .flags2 = FLAG2_DISABLE_ASPM_L1 /* errata 13 */ ++ | FLAG2_DMA_BURST, ++ .pba = 38, ++ .max_hw_frame_size = DEFAULT_JUMBO, ++ .get_variants = e1000_get_variants_82571, ++ .mac_ops = &e82571_mac_ops, ++ .phy_ops = &e82_phy_ops_igp, ++ .nvm_ops = &e82571_nvm_ops, ++}; ++ ++const struct e1000_info e1000_82573_info = { ++ .mac = e1000_82573, ++ .flags = FLAG_HAS_HW_VLAN_FILTER ++ | FLAG_HAS_WOL ++ | FLAG_APME_IN_CTRL3 ++ | FLAG_HAS_SMART_POWER_DOWN ++ | FLAG_HAS_AMT ++ | FLAG_HAS_SWSM_ON_LOAD, ++ .flags2 = FLAG2_DISABLE_ASPM_L1 ++ | FLAG2_DISABLE_ASPM_L0S, ++ .pba = 20, ++ .max_hw_frame_size = ETH_FRAME_LEN + ETH_FCS_LEN, ++ .get_variants = e1000_get_variants_82571, ++ .mac_ops = &e82571_mac_ops, ++ .phy_ops = &e82_phy_ops_m88, ++ .nvm_ops = &e82571_nvm_ops, ++}; ++ ++const struct e1000_info e1000_82574_info = { ++ .mac = e1000_82574, ++ .flags = FLAG_HAS_HW_VLAN_FILTER ++ | FLAG_HAS_MSIX ++ | FLAG_HAS_JUMBO_FRAMES ++ | FLAG_HAS_WOL ++ | FLAG_APME_IN_CTRL3 ++ | FLAG_HAS_SMART_POWER_DOWN ++ | FLAG_HAS_AMT ++ | FLAG_HAS_CTRLEXT_ON_LOAD, ++ .flags2 = FLAG2_CHECK_PHY_HANG ++ | FLAG2_DISABLE_ASPM_L0S ++ | FLAG2_NO_DISABLE_RX, ++ .pba = 32, ++ .max_hw_frame_size = DEFAULT_JUMBO, ++ .get_variants = e1000_get_variants_82571, ++ .mac_ops = &e82571_mac_ops, ++ .phy_ops = &e82_phy_ops_bm, ++ .nvm_ops = &e82571_nvm_ops, ++}; ++ ++const struct e1000_info e1000_82583_info = { ++ .mac = e1000_82583, ++ .flags = FLAG_HAS_HW_VLAN_FILTER ++ | FLAG_HAS_WOL ++ | FLAG_APME_IN_CTRL3 ++ | FLAG_HAS_SMART_POWER_DOWN ++ | FLAG_HAS_AMT ++ | FLAG_HAS_JUMBO_FRAMES ++ | FLAG_HAS_CTRLEXT_ON_LOAD, ++ .flags2 = FLAG2_DISABLE_ASPM_L0S ++ | FLAG2_NO_DISABLE_RX, ++ .pba = 32, ++ .max_hw_frame_size = DEFAULT_JUMBO, ++ .get_variants = e1000_get_variants_82571, ++ .mac_ops = &e82571_mac_ops, ++ .phy_ops = &e82_phy_ops_bm, ++ .nvm_ops = &e82571_nvm_ops, ++}; ++ +--- linux/drivers/xenomai/net/drivers/e1000e/lib.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/e1000e/lib.c 2021-04-07 16:01:27.214634185 +0800 +@@ -0,0 +1,2693 @@ ++/******************************************************************************* ++ ++ Intel PRO/1000 Linux driver ++ Copyright(c) 1999 - 2011 Intel Corporation. ++ ++ This program is free software; you can redistribute it and/or modify it ++ under the terms and conditions of the GNU General Public License, ++ version 2, as published by the Free Software Foundation. ++ ++ This program is distributed in the hope it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ You should have received a copy of the GNU General Public License along with ++ this program; if not, write to the Free Software Foundation, Inc., ++ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. ++ ++ The full GNU General Public License is included in this distribution in ++ the file called "COPYING". ++ ++ Contact Information: ++ Linux NICS ++ e1000-devel Mailing List ++ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 ++ ++*******************************************************************************/ ++ ++#include "e1000.h" ++ ++enum e1000_mng_mode { ++ e1000_mng_mode_none = 0, ++ e1000_mng_mode_asf, ++ e1000_mng_mode_pt, ++ e1000_mng_mode_ipmi, ++ e1000_mng_mode_host_if_only ++}; ++ ++#define E1000_FACTPS_MNGCG 0x20000000 ++ ++/* Intel(R) Active Management Technology signature */ ++#define E1000_IAMT_SIGNATURE 0x544D4149 ++ ++/** ++ * e1000e_get_bus_info_pcie - Get PCIe bus information ++ * @hw: pointer to the HW structure ++ * ++ * Determines and stores the system bus information for a particular ++ * network interface. The following bus information is determined and stored: ++ * bus speed, bus width, type (PCIe), and PCIe function. ++ **/ ++s32 e1000e_get_bus_info_pcie(struct e1000_hw *hw) ++{ ++ struct e1000_mac_info *mac = &hw->mac; ++ struct e1000_bus_info *bus = &hw->bus; ++ struct e1000_adapter *adapter = hw->adapter; ++ u16 pcie_link_status, cap_offset; ++ ++ cap_offset = pci_pcie_cap(adapter->pdev); ++ if (!cap_offset) { ++ bus->width = e1000_bus_width_unknown; ++ } else { ++ pci_read_config_word(adapter->pdev, ++ cap_offset + PCIE_LINK_STATUS, ++ &pcie_link_status); ++ bus->width = (enum e1000_bus_width)((pcie_link_status & ++ PCIE_LINK_WIDTH_MASK) >> ++ PCIE_LINK_WIDTH_SHIFT); ++ } ++ ++ mac->ops.set_lan_id(hw); ++ ++ return 0; ++} ++ ++/** ++ * e1000_set_lan_id_multi_port_pcie - Set LAN id for PCIe multiple port devices ++ * ++ * @hw: pointer to the HW structure ++ * ++ * Determines the LAN function id by reading memory-mapped registers ++ * and swaps the port value if requested. ++ **/ ++void e1000_set_lan_id_multi_port_pcie(struct e1000_hw *hw) ++{ ++ struct e1000_bus_info *bus = &hw->bus; ++ u32 reg; ++ ++ /* ++ * The status register reports the correct function number ++ * for the device regardless of function swap state. ++ */ ++ reg = er32(STATUS); ++ bus->func = (reg & E1000_STATUS_FUNC_MASK) >> E1000_STATUS_FUNC_SHIFT; ++} ++ ++/** ++ * e1000_set_lan_id_single_port - Set LAN id for a single port device ++ * @hw: pointer to the HW structure ++ * ++ * Sets the LAN function id to zero for a single port device. ++ **/ ++void e1000_set_lan_id_single_port(struct e1000_hw *hw) ++{ ++ struct e1000_bus_info *bus = &hw->bus; ++ ++ bus->func = 0; ++} ++ ++/** ++ * e1000_clear_vfta_generic - Clear VLAN filter table ++ * @hw: pointer to the HW structure ++ * ++ * Clears the register array which contains the VLAN filter table by ++ * setting all the values to 0. ++ **/ ++void e1000_clear_vfta_generic(struct e1000_hw *hw) ++{ ++ u32 offset; ++ ++ for (offset = 0; offset < E1000_VLAN_FILTER_TBL_SIZE; offset++) { ++ E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, offset, 0); ++ e1e_flush(); ++ } ++} ++ ++/** ++ * e1000_write_vfta_generic - Write value to VLAN filter table ++ * @hw: pointer to the HW structure ++ * @offset: register offset in VLAN filter table ++ * @value: register value written to VLAN filter table ++ * ++ * Writes value at the given offset in the register array which stores ++ * the VLAN filter table. ++ **/ ++void e1000_write_vfta_generic(struct e1000_hw *hw, u32 offset, u32 value) ++{ ++ E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, offset, value); ++ e1e_flush(); ++} ++ ++/** ++ * e1000e_init_rx_addrs - Initialize receive address's ++ * @hw: pointer to the HW structure ++ * @rar_count: receive address registers ++ * ++ * Setup the receive address registers by setting the base receive address ++ * register to the devices MAC address and clearing all the other receive ++ * address registers to 0. ++ **/ ++void e1000e_init_rx_addrs(struct e1000_hw *hw, u16 rar_count) ++{ ++ u32 i; ++ u8 mac_addr[ETH_ALEN] = {0}; ++ ++ /* Setup the receive address */ ++ e_dbg("Programming MAC Address into RAR[0]\n"); ++ ++ e1000e_rar_set(hw, hw->mac.addr, 0); ++ ++ /* Zero out the other (rar_entry_count - 1) receive addresses */ ++ e_dbg("Clearing RAR[1-%u]\n", rar_count-1); ++ for (i = 1; i < rar_count; i++) ++ e1000e_rar_set(hw, mac_addr, i); ++} ++ ++/** ++ * e1000_check_alt_mac_addr_generic - Check for alternate MAC addr ++ * @hw: pointer to the HW structure ++ * ++ * Checks the nvm for an alternate MAC address. An alternate MAC address ++ * can be setup by pre-boot software and must be treated like a permanent ++ * address and must override the actual permanent MAC address. If an ++ * alternate MAC address is found it is programmed into RAR0, replacing ++ * the permanent address that was installed into RAR0 by the Si on reset. ++ * This function will return SUCCESS unless it encounters an error while ++ * reading the EEPROM. ++ **/ ++s32 e1000_check_alt_mac_addr_generic(struct e1000_hw *hw) ++{ ++ u32 i; ++ s32 ret_val = 0; ++ u16 offset, nvm_alt_mac_addr_offset, nvm_data; ++ u8 alt_mac_addr[ETH_ALEN]; ++ ++ ret_val = e1000_read_nvm(hw, NVM_COMPAT, 1, &nvm_data); ++ if (ret_val) ++ goto out; ++ ++ /* Check for LOM (vs. NIC) or one of two valid mezzanine cards */ ++ if (!((nvm_data & NVM_COMPAT_LOM) || ++ (hw->adapter->pdev->device == E1000_DEV_ID_82571EB_SERDES_DUAL) || ++ (hw->adapter->pdev->device == E1000_DEV_ID_82571EB_SERDES_QUAD) || ++ (hw->adapter->pdev->device == E1000_DEV_ID_82571EB_SERDES))) ++ goto out; ++ ++ ret_val = e1000_read_nvm(hw, NVM_ALT_MAC_ADDR_PTR, 1, ++ &nvm_alt_mac_addr_offset); ++ if (ret_val) { ++ e_dbg("NVM Read Error\n"); ++ goto out; ++ } ++ ++ if ((nvm_alt_mac_addr_offset == 0xFFFF) || ++ (nvm_alt_mac_addr_offset == 0x0000)) ++ /* There is no Alternate MAC Address */ ++ goto out; ++ ++ if (hw->bus.func == E1000_FUNC_1) ++ nvm_alt_mac_addr_offset += E1000_ALT_MAC_ADDRESS_OFFSET_LAN1; ++ for (i = 0; i < ETH_ALEN; i += 2) { ++ offset = nvm_alt_mac_addr_offset + (i >> 1); ++ ret_val = e1000_read_nvm(hw, offset, 1, &nvm_data); ++ if (ret_val) { ++ e_dbg("NVM Read Error\n"); ++ goto out; ++ } ++ ++ alt_mac_addr[i] = (u8)(nvm_data & 0xFF); ++ alt_mac_addr[i + 1] = (u8)(nvm_data >> 8); ++ } ++ ++ /* if multicast bit is set, the alternate address will not be used */ ++ if (is_multicast_ether_addr(alt_mac_addr)) { ++ e_dbg("Ignoring Alternate Mac Address with MC bit set\n"); ++ goto out; ++ } ++ ++ /* ++ * We have a valid alternate MAC address, and we want to treat it the ++ * same as the normal permanent MAC address stored by the HW into the ++ * RAR. Do this by mapping this address into RAR0. ++ */ ++ e1000e_rar_set(hw, alt_mac_addr, 0); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000e_rar_set - Set receive address register ++ * @hw: pointer to the HW structure ++ * @addr: pointer to the receive address ++ * @index: receive address array register ++ * ++ * Sets the receive address array register at index to the address passed ++ * in by addr. ++ **/ ++void e1000e_rar_set(struct e1000_hw *hw, u8 *addr, u32 index) ++{ ++ u32 rar_low, rar_high; ++ ++ /* ++ * HW expects these in little endian so we reverse the byte order ++ * from network order (big endian) to little endian ++ */ ++ rar_low = ((u32) addr[0] | ++ ((u32) addr[1] << 8) | ++ ((u32) addr[2] << 16) | ((u32) addr[3] << 24)); ++ ++ rar_high = ((u32) addr[4] | ((u32) addr[5] << 8)); ++ ++ /* If MAC address zero, no need to set the AV bit */ ++ if (rar_low || rar_high) ++ rar_high |= E1000_RAH_AV; ++ ++ /* ++ * Some bridges will combine consecutive 32-bit writes into ++ * a single burst write, which will malfunction on some parts. ++ * The flushes avoid this. ++ */ ++ ew32(RAL(index), rar_low); ++ e1e_flush(); ++ ew32(RAH(index), rar_high); ++ e1e_flush(); ++} ++ ++/** ++ * e1000_hash_mc_addr - Generate a multicast hash value ++ * @hw: pointer to the HW structure ++ * @mc_addr: pointer to a multicast address ++ * ++ * Generates a multicast address hash value which is used to determine ++ * the multicast filter table array address and new table value. See ++ * e1000_mta_set_generic() ++ **/ ++static u32 e1000_hash_mc_addr(struct e1000_hw *hw, u8 *mc_addr) ++{ ++ u32 hash_value, hash_mask; ++ u8 bit_shift = 0; ++ ++ /* Register count multiplied by bits per register */ ++ hash_mask = (hw->mac.mta_reg_count * 32) - 1; ++ ++ /* ++ * For a mc_filter_type of 0, bit_shift is the number of left-shifts ++ * where 0xFF would still fall within the hash mask. ++ */ ++ while (hash_mask >> bit_shift != 0xFF) ++ bit_shift++; ++ ++ /* ++ * The portion of the address that is used for the hash table ++ * is determined by the mc_filter_type setting. ++ * The algorithm is such that there is a total of 8 bits of shifting. ++ * The bit_shift for a mc_filter_type of 0 represents the number of ++ * left-shifts where the MSB of mc_addr[5] would still fall within ++ * the hash_mask. Case 0 does this exactly. Since there are a total ++ * of 8 bits of shifting, then mc_addr[4] will shift right the ++ * remaining number of bits. Thus 8 - bit_shift. The rest of the ++ * cases are a variation of this algorithm...essentially raising the ++ * number of bits to shift mc_addr[5] left, while still keeping the ++ * 8-bit shifting total. ++ * ++ * For example, given the following Destination MAC Address and an ++ * mta register count of 128 (thus a 4096-bit vector and 0xFFF mask), ++ * we can see that the bit_shift for case 0 is 4. These are the hash ++ * values resulting from each mc_filter_type... ++ * [0] [1] [2] [3] [4] [5] ++ * 01 AA 00 12 34 56 ++ * LSB MSB ++ * ++ * case 0: hash_value = ((0x34 >> 4) | (0x56 << 4)) & 0xFFF = 0x563 ++ * case 1: hash_value = ((0x34 >> 3) | (0x56 << 5)) & 0xFFF = 0xAC6 ++ * case 2: hash_value = ((0x34 >> 2) | (0x56 << 6)) & 0xFFF = 0x163 ++ * case 3: hash_value = ((0x34 >> 0) | (0x56 << 8)) & 0xFFF = 0x634 ++ */ ++ switch (hw->mac.mc_filter_type) { ++ default: ++ case 0: ++ break; ++ case 1: ++ bit_shift += 1; ++ break; ++ case 2: ++ bit_shift += 2; ++ break; ++ case 3: ++ bit_shift += 4; ++ break; ++ } ++ ++ hash_value = hash_mask & (((mc_addr[4] >> (8 - bit_shift)) | ++ (((u16) mc_addr[5]) << bit_shift))); ++ ++ return hash_value; ++} ++ ++/** ++ * e1000e_update_mc_addr_list_generic - Update Multicast addresses ++ * @hw: pointer to the HW structure ++ * @mc_addr_list: array of multicast addresses to program ++ * @mc_addr_count: number of multicast addresses to program ++ * ++ * Updates entire Multicast Table Array. ++ * The caller must have a packed mc_addr_list of multicast addresses. ++ **/ ++void e1000e_update_mc_addr_list_generic(struct e1000_hw *hw, ++ u8 *mc_addr_list, u32 mc_addr_count) ++{ ++ u32 hash_value, hash_bit, hash_reg; ++ int i; ++ ++ /* clear mta_shadow */ ++ memset(&hw->mac.mta_shadow, 0, sizeof(hw->mac.mta_shadow)); ++ ++ /* update mta_shadow from mc_addr_list */ ++ for (i = 0; (u32) i < mc_addr_count; i++) { ++ hash_value = e1000_hash_mc_addr(hw, mc_addr_list); ++ ++ hash_reg = (hash_value >> 5) & (hw->mac.mta_reg_count - 1); ++ hash_bit = hash_value & 0x1F; ++ ++ hw->mac.mta_shadow[hash_reg] |= (1 << hash_bit); ++ mc_addr_list += (ETH_ALEN); ++ } ++ ++ /* replace the entire MTA table */ ++ for (i = hw->mac.mta_reg_count - 1; i >= 0; i--) ++ E1000_WRITE_REG_ARRAY(hw, E1000_MTA, i, hw->mac.mta_shadow[i]); ++ e1e_flush(); ++} ++ ++/** ++ * e1000e_clear_hw_cntrs_base - Clear base hardware counters ++ * @hw: pointer to the HW structure ++ * ++ * Clears the base hardware counters by reading the counter registers. ++ **/ ++void e1000e_clear_hw_cntrs_base(struct e1000_hw *hw) ++{ ++ er32(CRCERRS); ++ er32(SYMERRS); ++ er32(MPC); ++ er32(SCC); ++ er32(ECOL); ++ er32(MCC); ++ er32(LATECOL); ++ er32(COLC); ++ er32(DC); ++ er32(SEC); ++ er32(RLEC); ++ er32(XONRXC); ++ er32(XONTXC); ++ er32(XOFFRXC); ++ er32(XOFFTXC); ++ er32(FCRUC); ++ er32(GPRC); ++ er32(BPRC); ++ er32(MPRC); ++ er32(GPTC); ++ er32(GORCL); ++ er32(GORCH); ++ er32(GOTCL); ++ er32(GOTCH); ++ er32(RNBC); ++ er32(RUC); ++ er32(RFC); ++ er32(ROC); ++ er32(RJC); ++ er32(TORL); ++ er32(TORH); ++ er32(TOTL); ++ er32(TOTH); ++ er32(TPR); ++ er32(TPT); ++ er32(MPTC); ++ er32(BPTC); ++} ++ ++/** ++ * e1000e_check_for_copper_link - Check for link (Copper) ++ * @hw: pointer to the HW structure ++ * ++ * Checks to see of the link status of the hardware has changed. If a ++ * change in link status has been detected, then we read the PHY registers ++ * to get the current speed/duplex if link exists. ++ **/ ++s32 e1000e_check_for_copper_link(struct e1000_hw *hw) ++{ ++ struct e1000_mac_info *mac = &hw->mac; ++ s32 ret_val; ++ bool link; ++ ++ /* ++ * We only want to go out to the PHY registers to see if Auto-Neg ++ * has completed and/or if our link status has changed. The ++ * get_link_status flag is set upon receiving a Link Status ++ * Change or Rx Sequence Error interrupt. ++ */ ++ if (!mac->get_link_status) ++ return 0; ++ ++ /* ++ * First we want to see if the MII Status Register reports ++ * link. If so, then we want to get the current speed/duplex ++ * of the PHY. ++ */ ++ ret_val = e1000e_phy_has_link_generic(hw, 1, 0, &link); ++ if (ret_val) ++ return ret_val; ++ ++ if (!link) ++ return ret_val; /* No link detected */ ++ ++ mac->get_link_status = false; ++ ++ /* ++ * Check if there was DownShift, must be checked ++ * immediately after link-up ++ */ ++ e1000e_check_downshift(hw); ++ ++ /* ++ * If we are forcing speed/duplex, then we simply return since ++ * we have already determined whether we have link or not. ++ */ ++ if (!mac->autoneg) { ++ ret_val = -E1000_ERR_CONFIG; ++ return ret_val; ++ } ++ ++ /* ++ * Auto-Neg is enabled. Auto Speed Detection takes care ++ * of MAC speed/duplex configuration. So we only need to ++ * configure Collision Distance in the MAC. ++ */ ++ e1000e_config_collision_dist(hw); ++ ++ /* ++ * Configure Flow Control now that Auto-Neg has completed. ++ * First, we need to restore the desired flow control ++ * settings because we may have had to re-autoneg with a ++ * different link partner. ++ */ ++ ret_val = e1000e_config_fc_after_link_up(hw); ++ if (ret_val) ++ e_dbg("Error configuring flow control\n"); ++ ++ return ret_val; ++} ++ ++/** ++ * e1000e_check_for_fiber_link - Check for link (Fiber) ++ * @hw: pointer to the HW structure ++ * ++ * Checks for link up on the hardware. If link is not up and we have ++ * a signal, then we need to force link up. ++ **/ ++s32 e1000e_check_for_fiber_link(struct e1000_hw *hw) ++{ ++ struct e1000_mac_info *mac = &hw->mac; ++ u32 rxcw; ++ u32 ctrl; ++ u32 status; ++ s32 ret_val; ++ ++ ctrl = er32(CTRL); ++ status = er32(STATUS); ++ rxcw = er32(RXCW); ++ ++ /* ++ * If we don't have link (auto-negotiation failed or link partner ++ * cannot auto-negotiate), the cable is plugged in (we have signal), ++ * and our link partner is not trying to auto-negotiate with us (we ++ * are receiving idles or data), we need to force link up. We also ++ * need to give auto-negotiation time to complete, in case the cable ++ * was just plugged in. The autoneg_failed flag does this. ++ */ ++ /* (ctrl & E1000_CTRL_SWDPIN1) == 1 == have signal */ ++ if ((ctrl & E1000_CTRL_SWDPIN1) && (!(status & E1000_STATUS_LU)) && ++ (!(rxcw & E1000_RXCW_C))) { ++ if (mac->autoneg_failed == 0) { ++ mac->autoneg_failed = 1; ++ return 0; ++ } ++ e_dbg("NOT Rx'ing /C/, disable AutoNeg and force link.\n"); ++ ++ /* Disable auto-negotiation in the TXCW register */ ++ ew32(TXCW, (mac->txcw & ~E1000_TXCW_ANE)); ++ ++ /* Force link-up and also force full-duplex. */ ++ ctrl = er32(CTRL); ++ ctrl |= (E1000_CTRL_SLU | E1000_CTRL_FD); ++ ew32(CTRL, ctrl); ++ ++ /* Configure Flow Control after forcing link up. */ ++ ret_val = e1000e_config_fc_after_link_up(hw); ++ if (ret_val) { ++ e_dbg("Error configuring flow control\n"); ++ return ret_val; ++ } ++ } else if ((ctrl & E1000_CTRL_SLU) && (rxcw & E1000_RXCW_C)) { ++ /* ++ * If we are forcing link and we are receiving /C/ ordered ++ * sets, re-enable auto-negotiation in the TXCW register ++ * and disable forced link in the Device Control register ++ * in an attempt to auto-negotiate with our link partner. ++ */ ++ e_dbg("Rx'ing /C/, enable AutoNeg and stop forcing link.\n"); ++ ew32(TXCW, mac->txcw); ++ ew32(CTRL, (ctrl & ~E1000_CTRL_SLU)); ++ ++ mac->serdes_has_link = true; ++ } ++ ++ return 0; ++} ++ ++/** ++ * e1000e_check_for_serdes_link - Check for link (Serdes) ++ * @hw: pointer to the HW structure ++ * ++ * Checks for link up on the hardware. If link is not up and we have ++ * a signal, then we need to force link up. ++ **/ ++s32 e1000e_check_for_serdes_link(struct e1000_hw *hw) ++{ ++ struct e1000_mac_info *mac = &hw->mac; ++ u32 rxcw; ++ u32 ctrl; ++ u32 status; ++ s32 ret_val; ++ ++ ctrl = er32(CTRL); ++ status = er32(STATUS); ++ rxcw = er32(RXCW); ++ ++ /* ++ * If we don't have link (auto-negotiation failed or link partner ++ * cannot auto-negotiate), and our link partner is not trying to ++ * auto-negotiate with us (we are receiving idles or data), ++ * we need to force link up. We also need to give auto-negotiation ++ * time to complete. ++ */ ++ /* (ctrl & E1000_CTRL_SWDPIN1) == 1 == have signal */ ++ if ((!(status & E1000_STATUS_LU)) && (!(rxcw & E1000_RXCW_C))) { ++ if (mac->autoneg_failed == 0) { ++ mac->autoneg_failed = 1; ++ return 0; ++ } ++ e_dbg("NOT Rx'ing /C/, disable AutoNeg and force link.\n"); ++ ++ /* Disable auto-negotiation in the TXCW register */ ++ ew32(TXCW, (mac->txcw & ~E1000_TXCW_ANE)); ++ ++ /* Force link-up and also force full-duplex. */ ++ ctrl = er32(CTRL); ++ ctrl |= (E1000_CTRL_SLU | E1000_CTRL_FD); ++ ew32(CTRL, ctrl); ++ ++ /* Configure Flow Control after forcing link up. */ ++ ret_val = e1000e_config_fc_after_link_up(hw); ++ if (ret_val) { ++ e_dbg("Error configuring flow control\n"); ++ return ret_val; ++ } ++ } else if ((ctrl & E1000_CTRL_SLU) && (rxcw & E1000_RXCW_C)) { ++ /* ++ * If we are forcing link and we are receiving /C/ ordered ++ * sets, re-enable auto-negotiation in the TXCW register ++ * and disable forced link in the Device Control register ++ * in an attempt to auto-negotiate with our link partner. ++ */ ++ e_dbg("Rx'ing /C/, enable AutoNeg and stop forcing link.\n"); ++ ew32(TXCW, mac->txcw); ++ ew32(CTRL, (ctrl & ~E1000_CTRL_SLU)); ++ ++ mac->serdes_has_link = true; ++ } else if (!(E1000_TXCW_ANE & er32(TXCW))) { ++ /* ++ * If we force link for non-auto-negotiation switch, check ++ * link status based on MAC synchronization for internal ++ * serdes media type. ++ */ ++ /* SYNCH bit and IV bit are sticky. */ ++ udelay(10); ++ rxcw = er32(RXCW); ++ if (rxcw & E1000_RXCW_SYNCH) { ++ if (!(rxcw & E1000_RXCW_IV)) { ++ mac->serdes_has_link = true; ++ e_dbg("SERDES: Link up - forced.\n"); ++ } ++ } else { ++ mac->serdes_has_link = false; ++ e_dbg("SERDES: Link down - force failed.\n"); ++ } ++ } ++ ++ if (E1000_TXCW_ANE & er32(TXCW)) { ++ status = er32(STATUS); ++ if (status & E1000_STATUS_LU) { ++ /* SYNCH bit and IV bit are sticky, so reread rxcw. */ ++ udelay(10); ++ rxcw = er32(RXCW); ++ if (rxcw & E1000_RXCW_SYNCH) { ++ if (!(rxcw & E1000_RXCW_IV)) { ++ mac->serdes_has_link = true; ++ e_dbg("SERDES: Link up - autoneg " ++ "completed successfully.\n"); ++ } else { ++ mac->serdes_has_link = false; ++ e_dbg("SERDES: Link down - invalid" ++ "codewords detected in autoneg.\n"); ++ } ++ } else { ++ mac->serdes_has_link = false; ++ e_dbg("SERDES: Link down - no sync.\n"); ++ } ++ } else { ++ mac->serdes_has_link = false; ++ e_dbg("SERDES: Link down - autoneg failed\n"); ++ } ++ } ++ ++ return 0; ++} ++ ++/** ++ * e1000_set_default_fc_generic - Set flow control default values ++ * @hw: pointer to the HW structure ++ * ++ * Read the EEPROM for the default values for flow control and store the ++ * values. ++ **/ ++static s32 e1000_set_default_fc_generic(struct e1000_hw *hw) ++{ ++ s32 ret_val; ++ u16 nvm_data; ++ ++ /* ++ * Read and store word 0x0F of the EEPROM. This word contains bits ++ * that determine the hardware's default PAUSE (flow control) mode, ++ * a bit that determines whether the HW defaults to enabling or ++ * disabling auto-negotiation, and the direction of the ++ * SW defined pins. If there is no SW over-ride of the flow ++ * control setting, then the variable hw->fc will ++ * be initialized based on a value in the EEPROM. ++ */ ++ ret_val = e1000_read_nvm(hw, NVM_INIT_CONTROL2_REG, 1, &nvm_data); ++ ++ if (ret_val) { ++ e_dbg("NVM Read Error\n"); ++ return ret_val; ++ } ++ ++ if ((nvm_data & NVM_WORD0F_PAUSE_MASK) == 0) ++ hw->fc.requested_mode = e1000_fc_none; ++ else if ((nvm_data & NVM_WORD0F_PAUSE_MASK) == ++ NVM_WORD0F_ASM_DIR) ++ hw->fc.requested_mode = e1000_fc_tx_pause; ++ else ++ hw->fc.requested_mode = e1000_fc_full; ++ ++ return 0; ++} ++ ++/** ++ * e1000e_setup_link - Setup flow control and link settings ++ * @hw: pointer to the HW structure ++ * ++ * Determines which flow control settings to use, then configures flow ++ * control. Calls the appropriate media-specific link configuration ++ * function. Assuming the adapter has a valid link partner, a valid link ++ * should be established. Assumes the hardware has previously been reset ++ * and the transmitter and receiver are not enabled. ++ **/ ++s32 e1000e_setup_link(struct e1000_hw *hw) ++{ ++ struct e1000_mac_info *mac = &hw->mac; ++ s32 ret_val; ++ ++ /* ++ * In the case of the phy reset being blocked, we already have a link. ++ * We do not need to set it up again. ++ */ ++ if (e1000_check_reset_block(hw)) ++ return 0; ++ ++ /* ++ * If requested flow control is set to default, set flow control ++ * based on the EEPROM flow control settings. ++ */ ++ if (hw->fc.requested_mode == e1000_fc_default) { ++ ret_val = e1000_set_default_fc_generic(hw); ++ if (ret_val) ++ return ret_val; ++ } ++ ++ /* ++ * Save off the requested flow control mode for use later. Depending ++ * on the link partner's capabilities, we may or may not use this mode. ++ */ ++ hw->fc.current_mode = hw->fc.requested_mode; ++ ++ e_dbg("After fix-ups FlowControl is now = %x\n", ++ hw->fc.current_mode); ++ ++ /* Call the necessary media_type subroutine to configure the link. */ ++ ret_val = mac->ops.setup_physical_interface(hw); ++ if (ret_val) ++ return ret_val; ++ ++ /* ++ * Initialize the flow control address, type, and PAUSE timer ++ * registers to their default values. This is done even if flow ++ * control is disabled, because it does not hurt anything to ++ * initialize these registers. ++ */ ++ e_dbg("Initializing the Flow Control address, type and timer regs\n"); ++ ew32(FCT, FLOW_CONTROL_TYPE); ++ ew32(FCAH, FLOW_CONTROL_ADDRESS_HIGH); ++ ew32(FCAL, FLOW_CONTROL_ADDRESS_LOW); ++ ++ ew32(FCTTV, hw->fc.pause_time); ++ ++ return e1000e_set_fc_watermarks(hw); ++} ++ ++/** ++ * e1000_commit_fc_settings_generic - Configure flow control ++ * @hw: pointer to the HW structure ++ * ++ * Write the flow control settings to the Transmit Config Word Register (TXCW) ++ * base on the flow control settings in e1000_mac_info. ++ **/ ++static s32 e1000_commit_fc_settings_generic(struct e1000_hw *hw) ++{ ++ struct e1000_mac_info *mac = &hw->mac; ++ u32 txcw; ++ ++ /* ++ * Check for a software override of the flow control settings, and ++ * setup the device accordingly. If auto-negotiation is enabled, then ++ * software will have to set the "PAUSE" bits to the correct value in ++ * the Transmit Config Word Register (TXCW) and re-start auto- ++ * negotiation. However, if auto-negotiation is disabled, then ++ * software will have to manually configure the two flow control enable ++ * bits in the CTRL register. ++ * ++ * The possible values of the "fc" parameter are: ++ * 0: Flow control is completely disabled ++ * 1: Rx flow control is enabled (we can receive pause frames, ++ * but not send pause frames). ++ * 2: Tx flow control is enabled (we can send pause frames but we ++ * do not support receiving pause frames). ++ * 3: Both Rx and Tx flow control (symmetric) are enabled. ++ */ ++ switch (hw->fc.current_mode) { ++ case e1000_fc_none: ++ /* Flow control completely disabled by a software over-ride. */ ++ txcw = (E1000_TXCW_ANE | E1000_TXCW_FD); ++ break; ++ case e1000_fc_rx_pause: ++ /* ++ * Rx Flow control is enabled and Tx Flow control is disabled ++ * by a software over-ride. Since there really isn't a way to ++ * advertise that we are capable of Rx Pause ONLY, we will ++ * advertise that we support both symmetric and asymmetric Rx ++ * PAUSE. Later, we will disable the adapter's ability to send ++ * PAUSE frames. ++ */ ++ txcw = (E1000_TXCW_ANE | E1000_TXCW_FD | E1000_TXCW_PAUSE_MASK); ++ break; ++ case e1000_fc_tx_pause: ++ /* ++ * Tx Flow control is enabled, and Rx Flow control is disabled, ++ * by a software over-ride. ++ */ ++ txcw = (E1000_TXCW_ANE | E1000_TXCW_FD | E1000_TXCW_ASM_DIR); ++ break; ++ case e1000_fc_full: ++ /* ++ * Flow control (both Rx and Tx) is enabled by a software ++ * over-ride. ++ */ ++ txcw = (E1000_TXCW_ANE | E1000_TXCW_FD | E1000_TXCW_PAUSE_MASK); ++ break; ++ default: ++ e_dbg("Flow control param set incorrectly\n"); ++ return -E1000_ERR_CONFIG; ++ break; ++ } ++ ++ ew32(TXCW, txcw); ++ mac->txcw = txcw; ++ ++ return 0; ++} ++ ++/** ++ * e1000_poll_fiber_serdes_link_generic - Poll for link up ++ * @hw: pointer to the HW structure ++ * ++ * Polls for link up by reading the status register, if link fails to come ++ * up with auto-negotiation, then the link is forced if a signal is detected. ++ **/ ++static s32 e1000_poll_fiber_serdes_link_generic(struct e1000_hw *hw) ++{ ++ struct e1000_mac_info *mac = &hw->mac; ++ u32 i, status; ++ s32 ret_val; ++ ++ /* ++ * If we have a signal (the cable is plugged in, or assumed true for ++ * serdes media) then poll for a "Link-Up" indication in the Device ++ * Status Register. Time-out if a link isn't seen in 500 milliseconds ++ * seconds (Auto-negotiation should complete in less than 500 ++ * milliseconds even if the other end is doing it in SW). ++ */ ++ for (i = 0; i < FIBER_LINK_UP_LIMIT; i++) { ++ usleep_range(10000, 20000); ++ status = er32(STATUS); ++ if (status & E1000_STATUS_LU) ++ break; ++ } ++ if (i == FIBER_LINK_UP_LIMIT) { ++ e_dbg("Never got a valid link from auto-neg!!!\n"); ++ mac->autoneg_failed = 1; ++ /* ++ * AutoNeg failed to achieve a link, so we'll call ++ * mac->check_for_link. This routine will force the ++ * link up if we detect a signal. This will allow us to ++ * communicate with non-autonegotiating link partners. ++ */ ++ ret_val = mac->ops.check_for_link(hw); ++ if (ret_val) { ++ e_dbg("Error while checking for link\n"); ++ return ret_val; ++ } ++ mac->autoneg_failed = 0; ++ } else { ++ mac->autoneg_failed = 0; ++ e_dbg("Valid Link Found\n"); ++ } ++ ++ return 0; ++} ++ ++/** ++ * e1000e_setup_fiber_serdes_link - Setup link for fiber/serdes ++ * @hw: pointer to the HW structure ++ * ++ * Configures collision distance and flow control for fiber and serdes ++ * links. Upon successful setup, poll for link. ++ **/ ++s32 e1000e_setup_fiber_serdes_link(struct e1000_hw *hw) ++{ ++ u32 ctrl; ++ s32 ret_val; ++ ++ ctrl = er32(CTRL); ++ ++ /* Take the link out of reset */ ++ ctrl &= ~E1000_CTRL_LRST; ++ ++ e1000e_config_collision_dist(hw); ++ ++ ret_val = e1000_commit_fc_settings_generic(hw); ++ if (ret_val) ++ return ret_val; ++ ++ /* ++ * Since auto-negotiation is enabled, take the link out of reset (the ++ * link will be in reset, because we previously reset the chip). This ++ * will restart auto-negotiation. If auto-negotiation is successful ++ * then the link-up status bit will be set and the flow control enable ++ * bits (RFCE and TFCE) will be set according to their negotiated value. ++ */ ++ e_dbg("Auto-negotiation enabled\n"); ++ ++ ew32(CTRL, ctrl); ++ e1e_flush(); ++ usleep_range(1000, 2000); ++ ++ /* ++ * For these adapters, the SW definable pin 1 is set when the optics ++ * detect a signal. If we have a signal, then poll for a "Link-Up" ++ * indication. ++ */ ++ if (hw->phy.media_type == e1000_media_type_internal_serdes || ++ (er32(CTRL) & E1000_CTRL_SWDPIN1)) { ++ ret_val = e1000_poll_fiber_serdes_link_generic(hw); ++ } else { ++ e_dbg("No signal detected\n"); ++ } ++ ++ return 0; ++} ++ ++/** ++ * e1000e_config_collision_dist - Configure collision distance ++ * @hw: pointer to the HW structure ++ * ++ * Configures the collision distance to the default value and is used ++ * during link setup. Currently no func pointer exists and all ++ * implementations are handled in the generic version of this function. ++ **/ ++void e1000e_config_collision_dist(struct e1000_hw *hw) ++{ ++ u32 tctl; ++ ++ tctl = er32(TCTL); ++ ++ tctl &= ~E1000_TCTL_COLD; ++ tctl |= E1000_COLLISION_DISTANCE << E1000_COLD_SHIFT; ++ ++ ew32(TCTL, tctl); ++ e1e_flush(); ++} ++ ++/** ++ * e1000e_set_fc_watermarks - Set flow control high/low watermarks ++ * @hw: pointer to the HW structure ++ * ++ * Sets the flow control high/low threshold (watermark) registers. If ++ * flow control XON frame transmission is enabled, then set XON frame ++ * transmission as well. ++ **/ ++s32 e1000e_set_fc_watermarks(struct e1000_hw *hw) ++{ ++ u32 fcrtl = 0, fcrth = 0; ++ ++ /* ++ * Set the flow control receive threshold registers. Normally, ++ * these registers will be set to a default threshold that may be ++ * adjusted later by the driver's runtime code. However, if the ++ * ability to transmit pause frames is not enabled, then these ++ * registers will be set to 0. ++ */ ++ if (hw->fc.current_mode & e1000_fc_tx_pause) { ++ /* ++ * We need to set up the Receive Threshold high and low water ++ * marks as well as (optionally) enabling the transmission of ++ * XON frames. ++ */ ++ fcrtl = hw->fc.low_water; ++ fcrtl |= E1000_FCRTL_XONE; ++ fcrth = hw->fc.high_water; ++ } ++ ew32(FCRTL, fcrtl); ++ ew32(FCRTH, fcrth); ++ ++ return 0; ++} ++ ++/** ++ * e1000e_force_mac_fc - Force the MAC's flow control settings ++ * @hw: pointer to the HW structure ++ * ++ * Force the MAC's flow control settings. Sets the TFCE and RFCE bits in the ++ * device control register to reflect the adapter settings. TFCE and RFCE ++ * need to be explicitly set by software when a copper PHY is used because ++ * autonegotiation is managed by the PHY rather than the MAC. Software must ++ * also configure these bits when link is forced on a fiber connection. ++ **/ ++s32 e1000e_force_mac_fc(struct e1000_hw *hw) ++{ ++ u32 ctrl; ++ ++ ctrl = er32(CTRL); ++ ++ /* ++ * Because we didn't get link via the internal auto-negotiation ++ * mechanism (we either forced link or we got link via PHY ++ * auto-neg), we have to manually enable/disable transmit an ++ * receive flow control. ++ * ++ * The "Case" statement below enables/disable flow control ++ * according to the "hw->fc.current_mode" parameter. ++ * ++ * The possible values of the "fc" parameter are: ++ * 0: Flow control is completely disabled ++ * 1: Rx flow control is enabled (we can receive pause ++ * frames but not send pause frames). ++ * 2: Tx flow control is enabled (we can send pause frames ++ * frames but we do not receive pause frames). ++ * 3: Both Rx and Tx flow control (symmetric) is enabled. ++ * other: No other values should be possible at this point. ++ */ ++ e_dbg("hw->fc.current_mode = %u\n", hw->fc.current_mode); ++ ++ switch (hw->fc.current_mode) { ++ case e1000_fc_none: ++ ctrl &= (~(E1000_CTRL_TFCE | E1000_CTRL_RFCE)); ++ break; ++ case e1000_fc_rx_pause: ++ ctrl &= (~E1000_CTRL_TFCE); ++ ctrl |= E1000_CTRL_RFCE; ++ break; ++ case e1000_fc_tx_pause: ++ ctrl &= (~E1000_CTRL_RFCE); ++ ctrl |= E1000_CTRL_TFCE; ++ break; ++ case e1000_fc_full: ++ ctrl |= (E1000_CTRL_TFCE | E1000_CTRL_RFCE); ++ break; ++ default: ++ e_dbg("Flow control param set incorrectly\n"); ++ return -E1000_ERR_CONFIG; ++ } ++ ++ ew32(CTRL, ctrl); ++ ++ return 0; ++} ++ ++/** ++ * e1000e_config_fc_after_link_up - Configures flow control after link ++ * @hw: pointer to the HW structure ++ * ++ * Checks the status of auto-negotiation after link up to ensure that the ++ * speed and duplex were not forced. If the link needed to be forced, then ++ * flow control needs to be forced also. If auto-negotiation is enabled ++ * and did not fail, then we configure flow control based on our link ++ * partner. ++ **/ ++s32 e1000e_config_fc_after_link_up(struct e1000_hw *hw) ++{ ++ struct e1000_mac_info *mac = &hw->mac; ++ s32 ret_val = 0; ++ u16 mii_status_reg, mii_nway_adv_reg, mii_nway_lp_ability_reg; ++ u16 speed, duplex; ++ ++ /* ++ * Check for the case where we have fiber media and auto-neg failed ++ * so we had to force link. In this case, we need to force the ++ * configuration of the MAC to match the "fc" parameter. ++ */ ++ if (mac->autoneg_failed) { ++ if (hw->phy.media_type == e1000_media_type_fiber || ++ hw->phy.media_type == e1000_media_type_internal_serdes) ++ ret_val = e1000e_force_mac_fc(hw); ++ } else { ++ if (hw->phy.media_type == e1000_media_type_copper) ++ ret_val = e1000e_force_mac_fc(hw); ++ } ++ ++ if (ret_val) { ++ e_dbg("Error forcing flow control settings\n"); ++ return ret_val; ++ } ++ ++ /* ++ * Check for the case where we have copper media and auto-neg is ++ * enabled. In this case, we need to check and see if Auto-Neg ++ * has completed, and if so, how the PHY and link partner has ++ * flow control configured. ++ */ ++ if ((hw->phy.media_type == e1000_media_type_copper) && mac->autoneg) { ++ /* ++ * Read the MII Status Register and check to see if AutoNeg ++ * has completed. We read this twice because this reg has ++ * some "sticky" (latched) bits. ++ */ ++ ret_val = e1e_rphy(hw, PHY_STATUS, &mii_status_reg); ++ if (ret_val) ++ return ret_val; ++ ret_val = e1e_rphy(hw, PHY_STATUS, &mii_status_reg); ++ if (ret_val) ++ return ret_val; ++ ++ if (!(mii_status_reg & MII_SR_AUTONEG_COMPLETE)) { ++ e_dbg("Copper PHY and Auto Neg " ++ "has not completed.\n"); ++ return ret_val; ++ } ++ ++ /* ++ * The AutoNeg process has completed, so we now need to ++ * read both the Auto Negotiation Advertisement ++ * Register (Address 4) and the Auto_Negotiation Base ++ * Page Ability Register (Address 5) to determine how ++ * flow control was negotiated. ++ */ ++ ret_val = e1e_rphy(hw, PHY_AUTONEG_ADV, &mii_nway_adv_reg); ++ if (ret_val) ++ return ret_val; ++ ret_val = ++ e1e_rphy(hw, PHY_LP_ABILITY, &mii_nway_lp_ability_reg); ++ if (ret_val) ++ return ret_val; ++ ++ /* ++ * Two bits in the Auto Negotiation Advertisement Register ++ * (Address 4) and two bits in the Auto Negotiation Base ++ * Page Ability Register (Address 5) determine flow control ++ * for both the PHY and the link partner. The following ++ * table, taken out of the IEEE 802.3ab/D6.0 dated March 25, ++ * 1999, describes these PAUSE resolution bits and how flow ++ * control is determined based upon these settings. ++ * NOTE: DC = Don't Care ++ * ++ * LOCAL DEVICE | LINK PARTNER ++ * PAUSE | ASM_DIR | PAUSE | ASM_DIR | NIC Resolution ++ *-------|---------|-------|---------|-------------------- ++ * 0 | 0 | DC | DC | e1000_fc_none ++ * 0 | 1 | 0 | DC | e1000_fc_none ++ * 0 | 1 | 1 | 0 | e1000_fc_none ++ * 0 | 1 | 1 | 1 | e1000_fc_tx_pause ++ * 1 | 0 | 0 | DC | e1000_fc_none ++ * 1 | DC | 1 | DC | e1000_fc_full ++ * 1 | 1 | 0 | 0 | e1000_fc_none ++ * 1 | 1 | 0 | 1 | e1000_fc_rx_pause ++ * ++ * Are both PAUSE bits set to 1? If so, this implies ++ * Symmetric Flow Control is enabled at both ends. The ++ * ASM_DIR bits are irrelevant per the spec. ++ * ++ * For Symmetric Flow Control: ++ * ++ * LOCAL DEVICE | LINK PARTNER ++ * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result ++ *-------|---------|-------|---------|-------------------- ++ * 1 | DC | 1 | DC | E1000_fc_full ++ * ++ */ ++ if ((mii_nway_adv_reg & NWAY_AR_PAUSE) && ++ (mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE)) { ++ /* ++ * Now we need to check if the user selected Rx ONLY ++ * of pause frames. In this case, we had to advertise ++ * FULL flow control because we could not advertise Rx ++ * ONLY. Hence, we must now check to see if we need to ++ * turn OFF the TRANSMISSION of PAUSE frames. ++ */ ++ if (hw->fc.requested_mode == e1000_fc_full) { ++ hw->fc.current_mode = e1000_fc_full; ++ e_dbg("Flow Control = FULL.\r\n"); ++ } else { ++ hw->fc.current_mode = e1000_fc_rx_pause; ++ e_dbg("Flow Control = " ++ "Rx PAUSE frames only.\r\n"); ++ } ++ } ++ /* ++ * For receiving PAUSE frames ONLY. ++ * ++ * LOCAL DEVICE | LINK PARTNER ++ * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result ++ *-------|---------|-------|---------|-------------------- ++ * 0 | 1 | 1 | 1 | e1000_fc_tx_pause ++ */ ++ else if (!(mii_nway_adv_reg & NWAY_AR_PAUSE) && ++ (mii_nway_adv_reg & NWAY_AR_ASM_DIR) && ++ (mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE) && ++ (mii_nway_lp_ability_reg & NWAY_LPAR_ASM_DIR)) { ++ hw->fc.current_mode = e1000_fc_tx_pause; ++ e_dbg("Flow Control = Tx PAUSE frames only.\r\n"); ++ } ++ /* ++ * For transmitting PAUSE frames ONLY. ++ * ++ * LOCAL DEVICE | LINK PARTNER ++ * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result ++ *-------|---------|-------|---------|-------------------- ++ * 1 | 1 | 0 | 1 | e1000_fc_rx_pause ++ */ ++ else if ((mii_nway_adv_reg & NWAY_AR_PAUSE) && ++ (mii_nway_adv_reg & NWAY_AR_ASM_DIR) && ++ !(mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE) && ++ (mii_nway_lp_ability_reg & NWAY_LPAR_ASM_DIR)) { ++ hw->fc.current_mode = e1000_fc_rx_pause; ++ e_dbg("Flow Control = Rx PAUSE frames only.\r\n"); ++ } else { ++ /* ++ * Per the IEEE spec, at this point flow control ++ * should be disabled. ++ */ ++ hw->fc.current_mode = e1000_fc_none; ++ e_dbg("Flow Control = NONE.\r\n"); ++ } ++ ++ /* ++ * Now we need to do one last check... If we auto- ++ * negotiated to HALF DUPLEX, flow control should not be ++ * enabled per IEEE 802.3 spec. ++ */ ++ ret_val = mac->ops.get_link_up_info(hw, &speed, &duplex); ++ if (ret_val) { ++ e_dbg("Error getting link speed and duplex\n"); ++ return ret_val; ++ } ++ ++ if (duplex == HALF_DUPLEX) ++ hw->fc.current_mode = e1000_fc_none; ++ ++ /* ++ * Now we call a subroutine to actually force the MAC ++ * controller to use the correct flow control settings. ++ */ ++ ret_val = e1000e_force_mac_fc(hw); ++ if (ret_val) { ++ e_dbg("Error forcing flow control settings\n"); ++ return ret_val; ++ } ++ } ++ ++ return 0; ++} ++ ++/** ++ * e1000e_get_speed_and_duplex_copper - Retrieve current speed/duplex ++ * @hw: pointer to the HW structure ++ * @speed: stores the current speed ++ * @duplex: stores the current duplex ++ * ++ * Read the status register for the current speed/duplex and store the current ++ * speed and duplex for copper connections. ++ **/ ++s32 e1000e_get_speed_and_duplex_copper(struct e1000_hw *hw, u16 *speed, u16 *duplex) ++{ ++ u32 status; ++ ++ status = er32(STATUS); ++ if (status & E1000_STATUS_SPEED_1000) ++ *speed = SPEED_1000; ++ else if (status & E1000_STATUS_SPEED_100) ++ *speed = SPEED_100; ++ else ++ *speed = SPEED_10; ++ ++ if (status & E1000_STATUS_FD) ++ *duplex = FULL_DUPLEX; ++ else ++ *duplex = HALF_DUPLEX; ++ ++ e_dbg("%u Mbps, %s Duplex\n", ++ *speed == SPEED_1000 ? 1000 : *speed == SPEED_100 ? 100 : 10, ++ *duplex == FULL_DUPLEX ? "Full" : "Half"); ++ ++ return 0; ++} ++ ++/** ++ * e1000e_get_speed_and_duplex_fiber_serdes - Retrieve current speed/duplex ++ * @hw: pointer to the HW structure ++ * @speed: stores the current speed ++ * @duplex: stores the current duplex ++ * ++ * Sets the speed and duplex to gigabit full duplex (the only possible option) ++ * for fiber/serdes links. ++ **/ ++s32 e1000e_get_speed_and_duplex_fiber_serdes(struct e1000_hw *hw, u16 *speed, u16 *duplex) ++{ ++ *speed = SPEED_1000; ++ *duplex = FULL_DUPLEX; ++ ++ return 0; ++} ++ ++/** ++ * e1000e_get_hw_semaphore - Acquire hardware semaphore ++ * @hw: pointer to the HW structure ++ * ++ * Acquire the HW semaphore to access the PHY or NVM ++ **/ ++s32 e1000e_get_hw_semaphore(struct e1000_hw *hw) ++{ ++ u32 swsm; ++ s32 timeout = hw->nvm.word_size + 1; ++ s32 i = 0; ++ ++ /* Get the SW semaphore */ ++ while (i < timeout) { ++ swsm = er32(SWSM); ++ if (!(swsm & E1000_SWSM_SMBI)) ++ break; ++ ++ udelay(50); ++ i++; ++ } ++ ++ if (i == timeout) { ++ e_dbg("Driver can't access device - SMBI bit is set.\n"); ++ return -E1000_ERR_NVM; ++ } ++ ++ /* Get the FW semaphore. */ ++ for (i = 0; i < timeout; i++) { ++ swsm = er32(SWSM); ++ ew32(SWSM, swsm | E1000_SWSM_SWESMBI); ++ ++ /* Semaphore acquired if bit latched */ ++ if (er32(SWSM) & E1000_SWSM_SWESMBI) ++ break; ++ ++ udelay(50); ++ } ++ ++ if (i == timeout) { ++ /* Release semaphores */ ++ e1000e_put_hw_semaphore(hw); ++ e_dbg("Driver can't access the NVM\n"); ++ return -E1000_ERR_NVM; ++ } ++ ++ return 0; ++} ++ ++/** ++ * e1000e_put_hw_semaphore - Release hardware semaphore ++ * @hw: pointer to the HW structure ++ * ++ * Release hardware semaphore used to access the PHY or NVM ++ **/ ++void e1000e_put_hw_semaphore(struct e1000_hw *hw) ++{ ++ u32 swsm; ++ ++ swsm = er32(SWSM); ++ swsm &= ~(E1000_SWSM_SMBI | E1000_SWSM_SWESMBI); ++ ew32(SWSM, swsm); ++} ++ ++/** ++ * e1000e_get_auto_rd_done - Check for auto read completion ++ * @hw: pointer to the HW structure ++ * ++ * Check EEPROM for Auto Read done bit. ++ **/ ++s32 e1000e_get_auto_rd_done(struct e1000_hw *hw) ++{ ++ s32 i = 0; ++ ++ while (i < AUTO_READ_DONE_TIMEOUT) { ++ if (er32(EECD) & E1000_EECD_AUTO_RD) ++ break; ++ usleep_range(1000, 2000); ++ i++; ++ } ++ ++ if (i == AUTO_READ_DONE_TIMEOUT) { ++ e_dbg("Auto read by HW from NVM has not completed.\n"); ++ return -E1000_ERR_RESET; ++ } ++ ++ return 0; ++} ++ ++/** ++ * e1000e_valid_led_default - Verify a valid default LED config ++ * @hw: pointer to the HW structure ++ * @data: pointer to the NVM (EEPROM) ++ * ++ * Read the EEPROM for the current default LED configuration. If the ++ * LED configuration is not valid, set to a valid LED configuration. ++ **/ ++s32 e1000e_valid_led_default(struct e1000_hw *hw, u16 *data) ++{ ++ s32 ret_val; ++ ++ ret_val = e1000_read_nvm(hw, NVM_ID_LED_SETTINGS, 1, data); ++ if (ret_val) { ++ e_dbg("NVM Read Error\n"); ++ return ret_val; ++ } ++ ++ if (*data == ID_LED_RESERVED_0000 || *data == ID_LED_RESERVED_FFFF) ++ *data = ID_LED_DEFAULT; ++ ++ return 0; ++} ++ ++/** ++ * e1000e_id_led_init - ++ * @hw: pointer to the HW structure ++ * ++ **/ ++s32 e1000e_id_led_init(struct e1000_hw *hw) ++{ ++ struct e1000_mac_info *mac = &hw->mac; ++ s32 ret_val; ++ const u32 ledctl_mask = 0x000000FF; ++ const u32 ledctl_on = E1000_LEDCTL_MODE_LED_ON; ++ const u32 ledctl_off = E1000_LEDCTL_MODE_LED_OFF; ++ u16 data, i, temp; ++ const u16 led_mask = 0x0F; ++ ++ ret_val = hw->nvm.ops.valid_led_default(hw, &data); ++ if (ret_val) ++ return ret_val; ++ ++ mac->ledctl_default = er32(LEDCTL); ++ mac->ledctl_mode1 = mac->ledctl_default; ++ mac->ledctl_mode2 = mac->ledctl_default; ++ ++ for (i = 0; i < 4; i++) { ++ temp = (data >> (i << 2)) & led_mask; ++ switch (temp) { ++ case ID_LED_ON1_DEF2: ++ case ID_LED_ON1_ON2: ++ case ID_LED_ON1_OFF2: ++ mac->ledctl_mode1 &= ~(ledctl_mask << (i << 3)); ++ mac->ledctl_mode1 |= ledctl_on << (i << 3); ++ break; ++ case ID_LED_OFF1_DEF2: ++ case ID_LED_OFF1_ON2: ++ case ID_LED_OFF1_OFF2: ++ mac->ledctl_mode1 &= ~(ledctl_mask << (i << 3)); ++ mac->ledctl_mode1 |= ledctl_off << (i << 3); ++ break; ++ default: ++ /* Do nothing */ ++ break; ++ } ++ switch (temp) { ++ case ID_LED_DEF1_ON2: ++ case ID_LED_ON1_ON2: ++ case ID_LED_OFF1_ON2: ++ mac->ledctl_mode2 &= ~(ledctl_mask << (i << 3)); ++ mac->ledctl_mode2 |= ledctl_on << (i << 3); ++ break; ++ case ID_LED_DEF1_OFF2: ++ case ID_LED_ON1_OFF2: ++ case ID_LED_OFF1_OFF2: ++ mac->ledctl_mode2 &= ~(ledctl_mask << (i << 3)); ++ mac->ledctl_mode2 |= ledctl_off << (i << 3); ++ break; ++ default: ++ /* Do nothing */ ++ break; ++ } ++ } ++ ++ return 0; ++} ++ ++/** ++ * e1000e_setup_led_generic - Configures SW controllable LED ++ * @hw: pointer to the HW structure ++ * ++ * This prepares the SW controllable LED for use and saves the current state ++ * of the LED so it can be later restored. ++ **/ ++s32 e1000e_setup_led_generic(struct e1000_hw *hw) ++{ ++ u32 ledctl; ++ ++ if (hw->mac.ops.setup_led != e1000e_setup_led_generic) ++ return -E1000_ERR_CONFIG; ++ ++ if (hw->phy.media_type == e1000_media_type_fiber) { ++ ledctl = er32(LEDCTL); ++ hw->mac.ledctl_default = ledctl; ++ /* Turn off LED0 */ ++ ledctl &= ~(E1000_LEDCTL_LED0_IVRT | ++ E1000_LEDCTL_LED0_BLINK | ++ E1000_LEDCTL_LED0_MODE_MASK); ++ ledctl |= (E1000_LEDCTL_MODE_LED_OFF << ++ E1000_LEDCTL_LED0_MODE_SHIFT); ++ ew32(LEDCTL, ledctl); ++ } else if (hw->phy.media_type == e1000_media_type_copper) { ++ ew32(LEDCTL, hw->mac.ledctl_mode1); ++ } ++ ++ return 0; ++} ++ ++/** ++ * e1000e_cleanup_led_generic - Set LED config to default operation ++ * @hw: pointer to the HW structure ++ * ++ * Remove the current LED configuration and set the LED configuration ++ * to the default value, saved from the EEPROM. ++ **/ ++s32 e1000e_cleanup_led_generic(struct e1000_hw *hw) ++{ ++ ew32(LEDCTL, hw->mac.ledctl_default); ++ return 0; ++} ++ ++/** ++ * e1000e_blink_led_generic - Blink LED ++ * @hw: pointer to the HW structure ++ * ++ * Blink the LEDs which are set to be on. ++ **/ ++s32 e1000e_blink_led_generic(struct e1000_hw *hw) ++{ ++ u32 ledctl_blink = 0; ++ u32 i; ++ ++ if (hw->phy.media_type == e1000_media_type_fiber) { ++ /* always blink LED0 for PCI-E fiber */ ++ ledctl_blink = E1000_LEDCTL_LED0_BLINK | ++ (E1000_LEDCTL_MODE_LED_ON << E1000_LEDCTL_LED0_MODE_SHIFT); ++ } else { ++ /* ++ * set the blink bit for each LED that's "on" (0x0E) ++ * in ledctl_mode2 ++ */ ++ ledctl_blink = hw->mac.ledctl_mode2; ++ for (i = 0; i < 4; i++) ++ if (((hw->mac.ledctl_mode2 >> (i * 8)) & 0xFF) == ++ E1000_LEDCTL_MODE_LED_ON) ++ ledctl_blink |= (E1000_LEDCTL_LED0_BLINK << ++ (i * 8)); ++ } ++ ++ ew32(LEDCTL, ledctl_blink); ++ ++ return 0; ++} ++ ++/** ++ * e1000e_led_on_generic - Turn LED on ++ * @hw: pointer to the HW structure ++ * ++ * Turn LED on. ++ **/ ++s32 e1000e_led_on_generic(struct e1000_hw *hw) ++{ ++ u32 ctrl; ++ ++ switch (hw->phy.media_type) { ++ case e1000_media_type_fiber: ++ ctrl = er32(CTRL); ++ ctrl &= ~E1000_CTRL_SWDPIN0; ++ ctrl |= E1000_CTRL_SWDPIO0; ++ ew32(CTRL, ctrl); ++ break; ++ case e1000_media_type_copper: ++ ew32(LEDCTL, hw->mac.ledctl_mode2); ++ break; ++ default: ++ break; ++ } ++ ++ return 0; ++} ++ ++/** ++ * e1000e_led_off_generic - Turn LED off ++ * @hw: pointer to the HW structure ++ * ++ * Turn LED off. ++ **/ ++s32 e1000e_led_off_generic(struct e1000_hw *hw) ++{ ++ u32 ctrl; ++ ++ switch (hw->phy.media_type) { ++ case e1000_media_type_fiber: ++ ctrl = er32(CTRL); ++ ctrl |= E1000_CTRL_SWDPIN0; ++ ctrl |= E1000_CTRL_SWDPIO0; ++ ew32(CTRL, ctrl); ++ break; ++ case e1000_media_type_copper: ++ ew32(LEDCTL, hw->mac.ledctl_mode1); ++ break; ++ default: ++ break; ++ } ++ ++ return 0; ++} ++ ++/** ++ * e1000e_set_pcie_no_snoop - Set PCI-express capabilities ++ * @hw: pointer to the HW structure ++ * @no_snoop: bitmap of snoop events ++ * ++ * Set the PCI-express register to snoop for events enabled in 'no_snoop'. ++ **/ ++void e1000e_set_pcie_no_snoop(struct e1000_hw *hw, u32 no_snoop) ++{ ++ u32 gcr; ++ ++ if (no_snoop) { ++ gcr = er32(GCR); ++ gcr &= ~(PCIE_NO_SNOOP_ALL); ++ gcr |= no_snoop; ++ ew32(GCR, gcr); ++ } ++} ++ ++/** ++ * e1000e_disable_pcie_master - Disables PCI-express master access ++ * @hw: pointer to the HW structure ++ * ++ * Returns 0 if successful, else returns -10 ++ * (-E1000_ERR_MASTER_REQUESTS_PENDING) if master disable bit has not caused ++ * the master requests to be disabled. ++ * ++ * Disables PCI-Express master access and verifies there are no pending ++ * requests. ++ **/ ++s32 e1000e_disable_pcie_master(struct e1000_hw *hw) ++{ ++ u32 ctrl; ++ s32 timeout = MASTER_DISABLE_TIMEOUT; ++ ++ ctrl = er32(CTRL); ++ ctrl |= E1000_CTRL_GIO_MASTER_DISABLE; ++ ew32(CTRL, ctrl); ++ ++ while (timeout) { ++ if (!(er32(STATUS) & ++ E1000_STATUS_GIO_MASTER_ENABLE)) ++ break; ++ udelay(100); ++ timeout--; ++ } ++ ++ if (!timeout) { ++ e_dbg("Master requests are pending.\n"); ++ return -E1000_ERR_MASTER_REQUESTS_PENDING; ++ } ++ ++ return 0; ++} ++ ++/** ++ * e1000e_reset_adaptive - Reset Adaptive Interframe Spacing ++ * @hw: pointer to the HW structure ++ * ++ * Reset the Adaptive Interframe Spacing throttle to default values. ++ **/ ++void e1000e_reset_adaptive(struct e1000_hw *hw) ++{ ++ struct e1000_mac_info *mac = &hw->mac; ++ ++ if (!mac->adaptive_ifs) { ++ e_dbg("Not in Adaptive IFS mode!\n"); ++ goto out; ++ } ++ ++ mac->current_ifs_val = 0; ++ mac->ifs_min_val = IFS_MIN; ++ mac->ifs_max_val = IFS_MAX; ++ mac->ifs_step_size = IFS_STEP; ++ mac->ifs_ratio = IFS_RATIO; ++ ++ mac->in_ifs_mode = false; ++ ew32(AIT, 0); ++out: ++ return; ++} ++ ++/** ++ * e1000e_update_adaptive - Update Adaptive Interframe Spacing ++ * @hw: pointer to the HW structure ++ * ++ * Update the Adaptive Interframe Spacing Throttle value based on the ++ * time between transmitted packets and time between collisions. ++ **/ ++void e1000e_update_adaptive(struct e1000_hw *hw) ++{ ++ struct e1000_mac_info *mac = &hw->mac; ++ ++ if (!mac->adaptive_ifs) { ++ e_dbg("Not in Adaptive IFS mode!\n"); ++ goto out; ++ } ++ ++ if ((mac->collision_delta * mac->ifs_ratio) > mac->tx_packet_delta) { ++ if (mac->tx_packet_delta > MIN_NUM_XMITS) { ++ mac->in_ifs_mode = true; ++ if (mac->current_ifs_val < mac->ifs_max_val) { ++ if (!mac->current_ifs_val) ++ mac->current_ifs_val = mac->ifs_min_val; ++ else ++ mac->current_ifs_val += ++ mac->ifs_step_size; ++ ew32(AIT, mac->current_ifs_val); ++ } ++ } ++ } else { ++ if (mac->in_ifs_mode && ++ (mac->tx_packet_delta <= MIN_NUM_XMITS)) { ++ mac->current_ifs_val = 0; ++ mac->in_ifs_mode = false; ++ ew32(AIT, 0); ++ } ++ } ++out: ++ return; ++} ++ ++/** ++ * e1000_raise_eec_clk - Raise EEPROM clock ++ * @hw: pointer to the HW structure ++ * @eecd: pointer to the EEPROM ++ * ++ * Enable/Raise the EEPROM clock bit. ++ **/ ++static void e1000_raise_eec_clk(struct e1000_hw *hw, u32 *eecd) ++{ ++ *eecd = *eecd | E1000_EECD_SK; ++ ew32(EECD, *eecd); ++ e1e_flush(); ++ udelay(hw->nvm.delay_usec); ++} ++ ++/** ++ * e1000_lower_eec_clk - Lower EEPROM clock ++ * @hw: pointer to the HW structure ++ * @eecd: pointer to the EEPROM ++ * ++ * Clear/Lower the EEPROM clock bit. ++ **/ ++static void e1000_lower_eec_clk(struct e1000_hw *hw, u32 *eecd) ++{ ++ *eecd = *eecd & ~E1000_EECD_SK; ++ ew32(EECD, *eecd); ++ e1e_flush(); ++ udelay(hw->nvm.delay_usec); ++} ++ ++/** ++ * e1000_shift_out_eec_bits - Shift data bits our to the EEPROM ++ * @hw: pointer to the HW structure ++ * @data: data to send to the EEPROM ++ * @count: number of bits to shift out ++ * ++ * We need to shift 'count' bits out to the EEPROM. So, the value in the ++ * "data" parameter will be shifted out to the EEPROM one bit at a time. ++ * In order to do this, "data" must be broken down into bits. ++ **/ ++static void e1000_shift_out_eec_bits(struct e1000_hw *hw, u16 data, u16 count) ++{ ++ struct e1000_nvm_info *nvm = &hw->nvm; ++ u32 eecd = er32(EECD); ++ u32 mask; ++ ++ mask = 0x01 << (count - 1); ++ if (nvm->type == e1000_nvm_eeprom_spi) ++ eecd |= E1000_EECD_DO; ++ ++ do { ++ eecd &= ~E1000_EECD_DI; ++ ++ if (data & mask) ++ eecd |= E1000_EECD_DI; ++ ++ ew32(EECD, eecd); ++ e1e_flush(); ++ ++ udelay(nvm->delay_usec); ++ ++ e1000_raise_eec_clk(hw, &eecd); ++ e1000_lower_eec_clk(hw, &eecd); ++ ++ mask >>= 1; ++ } while (mask); ++ ++ eecd &= ~E1000_EECD_DI; ++ ew32(EECD, eecd); ++} ++ ++/** ++ * e1000_shift_in_eec_bits - Shift data bits in from the EEPROM ++ * @hw: pointer to the HW structure ++ * @count: number of bits to shift in ++ * ++ * In order to read a register from the EEPROM, we need to shift 'count' bits ++ * in from the EEPROM. Bits are "shifted in" by raising the clock input to ++ * the EEPROM (setting the SK bit), and then reading the value of the data out ++ * "DO" bit. During this "shifting in" process the data in "DI" bit should ++ * always be clear. ++ **/ ++static u16 e1000_shift_in_eec_bits(struct e1000_hw *hw, u16 count) ++{ ++ u32 eecd; ++ u32 i; ++ u16 data; ++ ++ eecd = er32(EECD); ++ ++ eecd &= ~(E1000_EECD_DO | E1000_EECD_DI); ++ data = 0; ++ ++ for (i = 0; i < count; i++) { ++ data <<= 1; ++ e1000_raise_eec_clk(hw, &eecd); ++ ++ eecd = er32(EECD); ++ ++ eecd &= ~E1000_EECD_DI; ++ if (eecd & E1000_EECD_DO) ++ data |= 1; ++ ++ e1000_lower_eec_clk(hw, &eecd); ++ } ++ ++ return data; ++} ++ ++/** ++ * e1000e_poll_eerd_eewr_done - Poll for EEPROM read/write completion ++ * @hw: pointer to the HW structure ++ * @ee_reg: EEPROM flag for polling ++ * ++ * Polls the EEPROM status bit for either read or write completion based ++ * upon the value of 'ee_reg'. ++ **/ ++s32 e1000e_poll_eerd_eewr_done(struct e1000_hw *hw, int ee_reg) ++{ ++ u32 attempts = 100000; ++ u32 i, reg = 0; ++ ++ for (i = 0; i < attempts; i++) { ++ if (ee_reg == E1000_NVM_POLL_READ) ++ reg = er32(EERD); ++ else ++ reg = er32(EEWR); ++ ++ if (reg & E1000_NVM_RW_REG_DONE) ++ return 0; ++ ++ udelay(5); ++ } ++ ++ return -E1000_ERR_NVM; ++} ++ ++/** ++ * e1000e_acquire_nvm - Generic request for access to EEPROM ++ * @hw: pointer to the HW structure ++ * ++ * Set the EEPROM access request bit and wait for EEPROM access grant bit. ++ * Return successful if access grant bit set, else clear the request for ++ * EEPROM access and return -E1000_ERR_NVM (-1). ++ **/ ++s32 e1000e_acquire_nvm(struct e1000_hw *hw) ++{ ++ u32 eecd = er32(EECD); ++ s32 timeout = E1000_NVM_GRANT_ATTEMPTS; ++ ++ ew32(EECD, eecd | E1000_EECD_REQ); ++ eecd = er32(EECD); ++ ++ while (timeout) { ++ if (eecd & E1000_EECD_GNT) ++ break; ++ udelay(5); ++ eecd = er32(EECD); ++ timeout--; ++ } ++ ++ if (!timeout) { ++ eecd &= ~E1000_EECD_REQ; ++ ew32(EECD, eecd); ++ e_dbg("Could not acquire NVM grant\n"); ++ return -E1000_ERR_NVM; ++ } ++ ++ return 0; ++} ++ ++/** ++ * e1000_standby_nvm - Return EEPROM to standby state ++ * @hw: pointer to the HW structure ++ * ++ * Return the EEPROM to a standby state. ++ **/ ++static void e1000_standby_nvm(struct e1000_hw *hw) ++{ ++ struct e1000_nvm_info *nvm = &hw->nvm; ++ u32 eecd = er32(EECD); ++ ++ if (nvm->type == e1000_nvm_eeprom_spi) { ++ /* Toggle CS to flush commands */ ++ eecd |= E1000_EECD_CS; ++ ew32(EECD, eecd); ++ e1e_flush(); ++ udelay(nvm->delay_usec); ++ eecd &= ~E1000_EECD_CS; ++ ew32(EECD, eecd); ++ e1e_flush(); ++ udelay(nvm->delay_usec); ++ } ++} ++ ++/** ++ * e1000_stop_nvm - Terminate EEPROM command ++ * @hw: pointer to the HW structure ++ * ++ * Terminates the current command by inverting the EEPROM's chip select pin. ++ **/ ++static void e1000_stop_nvm(struct e1000_hw *hw) ++{ ++ u32 eecd; ++ ++ eecd = er32(EECD); ++ if (hw->nvm.type == e1000_nvm_eeprom_spi) { ++ /* Pull CS high */ ++ eecd |= E1000_EECD_CS; ++ e1000_lower_eec_clk(hw, &eecd); ++ } ++} ++ ++/** ++ * e1000e_release_nvm - Release exclusive access to EEPROM ++ * @hw: pointer to the HW structure ++ * ++ * Stop any current commands to the EEPROM and clear the EEPROM request bit. ++ **/ ++void e1000e_release_nvm(struct e1000_hw *hw) ++{ ++ u32 eecd; ++ ++ e1000_stop_nvm(hw); ++ ++ eecd = er32(EECD); ++ eecd &= ~E1000_EECD_REQ; ++ ew32(EECD, eecd); ++} ++ ++/** ++ * e1000_ready_nvm_eeprom - Prepares EEPROM for read/write ++ * @hw: pointer to the HW structure ++ * ++ * Setups the EEPROM for reading and writing. ++ **/ ++static s32 e1000_ready_nvm_eeprom(struct e1000_hw *hw) ++{ ++ struct e1000_nvm_info *nvm = &hw->nvm; ++ u32 eecd = er32(EECD); ++ u8 spi_stat_reg; ++ ++ if (nvm->type == e1000_nvm_eeprom_spi) { ++ u16 timeout = NVM_MAX_RETRY_SPI; ++ ++ /* Clear SK and CS */ ++ eecd &= ~(E1000_EECD_CS | E1000_EECD_SK); ++ ew32(EECD, eecd); ++ e1e_flush(); ++ udelay(1); ++ ++ /* ++ * Read "Status Register" repeatedly until the LSB is cleared. ++ * The EEPROM will signal that the command has been completed ++ * by clearing bit 0 of the internal status register. If it's ++ * not cleared within 'timeout', then error out. ++ */ ++ while (timeout) { ++ e1000_shift_out_eec_bits(hw, NVM_RDSR_OPCODE_SPI, ++ hw->nvm.opcode_bits); ++ spi_stat_reg = (u8)e1000_shift_in_eec_bits(hw, 8); ++ if (!(spi_stat_reg & NVM_STATUS_RDY_SPI)) ++ break; ++ ++ udelay(5); ++ e1000_standby_nvm(hw); ++ timeout--; ++ } ++ ++ if (!timeout) { ++ e_dbg("SPI NVM Status error\n"); ++ return -E1000_ERR_NVM; ++ } ++ } ++ ++ return 0; ++} ++ ++/** ++ * e1000e_read_nvm_eerd - Reads EEPROM using EERD register ++ * @hw: pointer to the HW structure ++ * @offset: offset of word in the EEPROM to read ++ * @words: number of words to read ++ * @data: word read from the EEPROM ++ * ++ * Reads a 16 bit word from the EEPROM using the EERD register. ++ **/ ++s32 e1000e_read_nvm_eerd(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) ++{ ++ struct e1000_nvm_info *nvm = &hw->nvm; ++ u32 i, eerd = 0; ++ s32 ret_val = 0; ++ ++ /* ++ * A check for invalid values: offset too large, too many words, ++ * too many words for the offset, and not enough words. ++ */ ++ if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) || ++ (words == 0)) { ++ e_dbg("nvm parameter(s) out of bounds\n"); ++ return -E1000_ERR_NVM; ++ } ++ ++ for (i = 0; i < words; i++) { ++ eerd = ((offset+i) << E1000_NVM_RW_ADDR_SHIFT) + ++ E1000_NVM_RW_REG_START; ++ ++ ew32(EERD, eerd); ++ ret_val = e1000e_poll_eerd_eewr_done(hw, E1000_NVM_POLL_READ); ++ if (ret_val) ++ break; ++ ++ data[i] = (er32(EERD) >> E1000_NVM_RW_REG_DATA); ++ } ++ ++ return ret_val; ++} ++ ++/** ++ * e1000e_write_nvm_spi - Write to EEPROM using SPI ++ * @hw: pointer to the HW structure ++ * @offset: offset within the EEPROM to be written to ++ * @words: number of words to write ++ * @data: 16 bit word(s) to be written to the EEPROM ++ * ++ * Writes data to EEPROM at offset using SPI interface. ++ * ++ * If e1000e_update_nvm_checksum is not called after this function , the ++ * EEPROM will most likely contain an invalid checksum. ++ **/ ++s32 e1000e_write_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) ++{ ++ struct e1000_nvm_info *nvm = &hw->nvm; ++ s32 ret_val; ++ u16 widx = 0; ++ ++ /* ++ * A check for invalid values: offset too large, too many words, ++ * and not enough words. ++ */ ++ if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) || ++ (words == 0)) { ++ e_dbg("nvm parameter(s) out of bounds\n"); ++ return -E1000_ERR_NVM; ++ } ++ ++ ret_val = nvm->ops.acquire(hw); ++ if (ret_val) ++ return ret_val; ++ ++ while (widx < words) { ++ u8 write_opcode = NVM_WRITE_OPCODE_SPI; ++ ++ ret_val = e1000_ready_nvm_eeprom(hw); ++ if (ret_val) { ++ nvm->ops.release(hw); ++ return ret_val; ++ } ++ ++ e1000_standby_nvm(hw); ++ ++ /* Send the WRITE ENABLE command (8 bit opcode) */ ++ e1000_shift_out_eec_bits(hw, NVM_WREN_OPCODE_SPI, ++ nvm->opcode_bits); ++ ++ e1000_standby_nvm(hw); ++ ++ /* ++ * Some SPI eeproms use the 8th address bit embedded in the ++ * opcode ++ */ ++ if ((nvm->address_bits == 8) && (offset >= 128)) ++ write_opcode |= NVM_A8_OPCODE_SPI; ++ ++ /* Send the Write command (8-bit opcode + addr) */ ++ e1000_shift_out_eec_bits(hw, write_opcode, nvm->opcode_bits); ++ e1000_shift_out_eec_bits(hw, (u16)((offset + widx) * 2), ++ nvm->address_bits); ++ ++ /* Loop to allow for up to whole page write of eeprom */ ++ while (widx < words) { ++ u16 word_out = data[widx]; ++ word_out = (word_out >> 8) | (word_out << 8); ++ e1000_shift_out_eec_bits(hw, word_out, 16); ++ widx++; ++ ++ if ((((offset + widx) * 2) % nvm->page_size) == 0) { ++ e1000_standby_nvm(hw); ++ break; ++ } ++ } ++ } ++ ++ usleep_range(10000, 20000); ++ nvm->ops.release(hw); ++ return 0; ++} ++ ++/** ++ * e1000_read_pba_string_generic - Read device part number ++ * @hw: pointer to the HW structure ++ * @pba_num: pointer to device part number ++ * @pba_num_size: size of part number buffer ++ * ++ * Reads the product board assembly (PBA) number from the EEPROM and stores ++ * the value in pba_num. ++ **/ ++s32 e1000_read_pba_string_generic(struct e1000_hw *hw, u8 *pba_num, ++ u32 pba_num_size) ++{ ++ s32 ret_val; ++ u16 nvm_data; ++ u16 pba_ptr; ++ u16 offset; ++ u16 length; ++ ++ if (pba_num == NULL) { ++ e_dbg("PBA string buffer was null\n"); ++ ret_val = E1000_ERR_INVALID_ARGUMENT; ++ goto out; ++ } ++ ++ ret_val = e1000_read_nvm(hw, NVM_PBA_OFFSET_0, 1, &nvm_data); ++ if (ret_val) { ++ e_dbg("NVM Read Error\n"); ++ goto out; ++ } ++ ++ ret_val = e1000_read_nvm(hw, NVM_PBA_OFFSET_1, 1, &pba_ptr); ++ if (ret_val) { ++ e_dbg("NVM Read Error\n"); ++ goto out; ++ } ++ ++ /* ++ * if nvm_data is not ptr guard the PBA must be in legacy format which ++ * means pba_ptr is actually our second data word for the PBA number ++ * and we can decode it into an ascii string ++ */ ++ if (nvm_data != NVM_PBA_PTR_GUARD) { ++ e_dbg("NVM PBA number is not stored as string\n"); ++ ++ /* we will need 11 characters to store the PBA */ ++ if (pba_num_size < 11) { ++ e_dbg("PBA string buffer too small\n"); ++ return E1000_ERR_NO_SPACE; ++ } ++ ++ /* extract hex string from data and pba_ptr */ ++ pba_num[0] = (nvm_data >> 12) & 0xF; ++ pba_num[1] = (nvm_data >> 8) & 0xF; ++ pba_num[2] = (nvm_data >> 4) & 0xF; ++ pba_num[3] = nvm_data & 0xF; ++ pba_num[4] = (pba_ptr >> 12) & 0xF; ++ pba_num[5] = (pba_ptr >> 8) & 0xF; ++ pba_num[6] = '-'; ++ pba_num[7] = 0; ++ pba_num[8] = (pba_ptr >> 4) & 0xF; ++ pba_num[9] = pba_ptr & 0xF; ++ ++ /* put a null character on the end of our string */ ++ pba_num[10] = '\0'; ++ ++ /* switch all the data but the '-' to hex char */ ++ for (offset = 0; offset < 10; offset++) { ++ if (pba_num[offset] < 0xA) ++ pba_num[offset] += '0'; ++ else if (pba_num[offset] < 0x10) ++ pba_num[offset] += 'A' - 0xA; ++ } ++ ++ goto out; ++ } ++ ++ ret_val = e1000_read_nvm(hw, pba_ptr, 1, &length); ++ if (ret_val) { ++ e_dbg("NVM Read Error\n"); ++ goto out; ++ } ++ ++ if (length == 0xFFFF || length == 0) { ++ e_dbg("NVM PBA number section invalid length\n"); ++ ret_val = E1000_ERR_NVM_PBA_SECTION; ++ goto out; ++ } ++ /* check if pba_num buffer is big enough */ ++ if (pba_num_size < (((u32)length * 2) - 1)) { ++ e_dbg("PBA string buffer too small\n"); ++ ret_val = E1000_ERR_NO_SPACE; ++ goto out; ++ } ++ ++ /* trim pba length from start of string */ ++ pba_ptr++; ++ length--; ++ ++ for (offset = 0; offset < length; offset++) { ++ ret_val = e1000_read_nvm(hw, pba_ptr + offset, 1, &nvm_data); ++ if (ret_val) { ++ e_dbg("NVM Read Error\n"); ++ goto out; ++ } ++ pba_num[offset * 2] = (u8)(nvm_data >> 8); ++ pba_num[(offset * 2) + 1] = (u8)(nvm_data & 0xFF); ++ } ++ pba_num[offset * 2] = '\0'; ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_read_mac_addr_generic - Read device MAC address ++ * @hw: pointer to the HW structure ++ * ++ * Reads the device MAC address from the EEPROM and stores the value. ++ * Since devices with two ports use the same EEPROM, we increment the ++ * last bit in the MAC address for the second port. ++ **/ ++s32 e1000_read_mac_addr_generic(struct e1000_hw *hw) ++{ ++ u32 rar_high; ++ u32 rar_low; ++ u16 i; ++ ++ rar_high = er32(RAH(0)); ++ rar_low = er32(RAL(0)); ++ ++ for (i = 0; i < E1000_RAL_MAC_ADDR_LEN; i++) ++ hw->mac.perm_addr[i] = (u8)(rar_low >> (i*8)); ++ ++ for (i = 0; i < E1000_RAH_MAC_ADDR_LEN; i++) ++ hw->mac.perm_addr[i+4] = (u8)(rar_high >> (i*8)); ++ ++ for (i = 0; i < ETH_ALEN; i++) ++ hw->mac.addr[i] = hw->mac.perm_addr[i]; ++ ++ return 0; ++} ++ ++/** ++ * e1000e_validate_nvm_checksum_generic - Validate EEPROM checksum ++ * @hw: pointer to the HW structure ++ * ++ * Calculates the EEPROM checksum by reading/adding each word of the EEPROM ++ * and then verifies that the sum of the EEPROM is equal to 0xBABA. ++ **/ ++s32 e1000e_validate_nvm_checksum_generic(struct e1000_hw *hw) ++{ ++ s32 ret_val; ++ u16 checksum = 0; ++ u16 i, nvm_data; ++ ++ for (i = 0; i < (NVM_CHECKSUM_REG + 1); i++) { ++ ret_val = e1000_read_nvm(hw, i, 1, &nvm_data); ++ if (ret_val) { ++ e_dbg("NVM Read Error\n"); ++ return ret_val; ++ } ++ checksum += nvm_data; ++ } ++ ++ if (checksum != (u16) NVM_SUM) { ++ e_dbg("NVM Checksum Invalid\n"); ++ return -E1000_ERR_NVM; ++ } ++ ++ return 0; ++} ++ ++/** ++ * e1000e_update_nvm_checksum_generic - Update EEPROM checksum ++ * @hw: pointer to the HW structure ++ * ++ * Updates the EEPROM checksum by reading/adding each word of the EEPROM ++ * up to the checksum. Then calculates the EEPROM checksum and writes the ++ * value to the EEPROM. ++ **/ ++s32 e1000e_update_nvm_checksum_generic(struct e1000_hw *hw) ++{ ++ s32 ret_val; ++ u16 checksum = 0; ++ u16 i, nvm_data; ++ ++ for (i = 0; i < NVM_CHECKSUM_REG; i++) { ++ ret_val = e1000_read_nvm(hw, i, 1, &nvm_data); ++ if (ret_val) { ++ e_dbg("NVM Read Error while updating checksum.\n"); ++ return ret_val; ++ } ++ checksum += nvm_data; ++ } ++ checksum = (u16) NVM_SUM - checksum; ++ ret_val = e1000_write_nvm(hw, NVM_CHECKSUM_REG, 1, &checksum); ++ if (ret_val) ++ e_dbg("NVM Write Error while updating checksum.\n"); ++ ++ return ret_val; ++} ++ ++/** ++ * e1000e_reload_nvm - Reloads EEPROM ++ * @hw: pointer to the HW structure ++ * ++ * Reloads the EEPROM by setting the "Reinitialize from EEPROM" bit in the ++ * extended control register. ++ **/ ++void e1000e_reload_nvm(struct e1000_hw *hw) ++{ ++ u32 ctrl_ext; ++ ++ udelay(10); ++ ctrl_ext = er32(CTRL_EXT); ++ ctrl_ext |= E1000_CTRL_EXT_EE_RST; ++ ew32(CTRL_EXT, ctrl_ext); ++ e1e_flush(); ++} ++ ++/** ++ * e1000_calculate_checksum - Calculate checksum for buffer ++ * @buffer: pointer to EEPROM ++ * @length: size of EEPROM to calculate a checksum for ++ * ++ * Calculates the checksum for some buffer on a specified length. The ++ * checksum calculated is returned. ++ **/ ++static u8 e1000_calculate_checksum(u8 *buffer, u32 length) ++{ ++ u32 i; ++ u8 sum = 0; ++ ++ if (!buffer) ++ return 0; ++ ++ for (i = 0; i < length; i++) ++ sum += buffer[i]; ++ ++ return (u8) (0 - sum); ++} ++ ++/** ++ * e1000_mng_enable_host_if - Checks host interface is enabled ++ * @hw: pointer to the HW structure ++ * ++ * Returns E1000_success upon success, else E1000_ERR_HOST_INTERFACE_COMMAND ++ * ++ * This function checks whether the HOST IF is enabled for command operation ++ * and also checks whether the previous command is completed. It busy waits ++ * in case of previous command is not completed. ++ **/ ++static s32 e1000_mng_enable_host_if(struct e1000_hw *hw) ++{ ++ u32 hicr; ++ u8 i; ++ ++ if (!(hw->mac.arc_subsystem_valid)) { ++ e_dbg("ARC subsystem not valid.\n"); ++ return -E1000_ERR_HOST_INTERFACE_COMMAND; ++ } ++ ++ /* Check that the host interface is enabled. */ ++ hicr = er32(HICR); ++ if ((hicr & E1000_HICR_EN) == 0) { ++ e_dbg("E1000_HOST_EN bit disabled.\n"); ++ return -E1000_ERR_HOST_INTERFACE_COMMAND; ++ } ++ /* check the previous command is completed */ ++ for (i = 0; i < E1000_MNG_DHCP_COMMAND_TIMEOUT; i++) { ++ hicr = er32(HICR); ++ if (!(hicr & E1000_HICR_C)) ++ break; ++ mdelay(1); ++ } ++ ++ if (i == E1000_MNG_DHCP_COMMAND_TIMEOUT) { ++ e_dbg("Previous command timeout failed .\n"); ++ return -E1000_ERR_HOST_INTERFACE_COMMAND; ++ } ++ ++ return 0; ++} ++ ++/** ++ * e1000e_check_mng_mode_generic - check management mode ++ * @hw: pointer to the HW structure ++ * ++ * Reads the firmware semaphore register and returns true (>0) if ++ * manageability is enabled, else false (0). ++ **/ ++bool e1000e_check_mng_mode_generic(struct e1000_hw *hw) ++{ ++ u32 fwsm = er32(FWSM); ++ ++ return (fwsm & E1000_FWSM_MODE_MASK) == ++ (E1000_MNG_IAMT_MODE << E1000_FWSM_MODE_SHIFT); ++} ++ ++/** ++ * e1000e_enable_tx_pkt_filtering - Enable packet filtering on Tx ++ * @hw: pointer to the HW structure ++ * ++ * Enables packet filtering on transmit packets if manageability is enabled ++ * and host interface is enabled. ++ **/ ++bool e1000e_enable_tx_pkt_filtering(struct e1000_hw *hw) ++{ ++ struct e1000_host_mng_dhcp_cookie *hdr = &hw->mng_cookie; ++ u32 *buffer = (u32 *)&hw->mng_cookie; ++ u32 offset; ++ s32 ret_val, hdr_csum, csum; ++ u8 i, len; ++ ++ hw->mac.tx_pkt_filtering = true; ++ ++ /* No manageability, no filtering */ ++ if (!e1000e_check_mng_mode(hw)) { ++ hw->mac.tx_pkt_filtering = false; ++ goto out; ++ } ++ ++ /* ++ * If we can't read from the host interface for whatever ++ * reason, disable filtering. ++ */ ++ ret_val = e1000_mng_enable_host_if(hw); ++ if (ret_val) { ++ hw->mac.tx_pkt_filtering = false; ++ goto out; ++ } ++ ++ /* Read in the header. Length and offset are in dwords. */ ++ len = E1000_MNG_DHCP_COOKIE_LENGTH >> 2; ++ offset = E1000_MNG_DHCP_COOKIE_OFFSET >> 2; ++ for (i = 0; i < len; i++) ++ *(buffer + i) = E1000_READ_REG_ARRAY(hw, E1000_HOST_IF, offset + i); ++ hdr_csum = hdr->checksum; ++ hdr->checksum = 0; ++ csum = e1000_calculate_checksum((u8 *)hdr, ++ E1000_MNG_DHCP_COOKIE_LENGTH); ++ /* ++ * If either the checksums or signature don't match, then ++ * the cookie area isn't considered valid, in which case we ++ * take the safe route of assuming Tx filtering is enabled. ++ */ ++ if ((hdr_csum != csum) || (hdr->signature != E1000_IAMT_SIGNATURE)) { ++ hw->mac.tx_pkt_filtering = true; ++ goto out; ++ } ++ ++ /* Cookie area is valid, make the final check for filtering. */ ++ if (!(hdr->status & E1000_MNG_DHCP_COOKIE_STATUS_PARSING)) { ++ hw->mac.tx_pkt_filtering = false; ++ goto out; ++ } ++ ++out: ++ return hw->mac.tx_pkt_filtering; ++} ++ ++/** ++ * e1000_mng_write_cmd_header - Writes manageability command header ++ * @hw: pointer to the HW structure ++ * @hdr: pointer to the host interface command header ++ * ++ * Writes the command header after does the checksum calculation. ++ **/ ++static s32 e1000_mng_write_cmd_header(struct e1000_hw *hw, ++ struct e1000_host_mng_command_header *hdr) ++{ ++ u16 i, length = sizeof(struct e1000_host_mng_command_header); ++ ++ /* Write the whole command header structure with new checksum. */ ++ ++ hdr->checksum = e1000_calculate_checksum((u8 *)hdr, length); ++ ++ length >>= 2; ++ /* Write the relevant command block into the ram area. */ ++ for (i = 0; i < length; i++) { ++ E1000_WRITE_REG_ARRAY(hw, E1000_HOST_IF, i, ++ *((u32 *) hdr + i)); ++ e1e_flush(); ++ } ++ ++ return 0; ++} ++ ++/** ++ * e1000_mng_host_if_write - Write to the manageability host interface ++ * @hw: pointer to the HW structure ++ * @buffer: pointer to the host interface buffer ++ * @length: size of the buffer ++ * @offset: location in the buffer to write to ++ * @sum: sum of the data (not checksum) ++ * ++ * This function writes the buffer content at the offset given on the host if. ++ * It also does alignment considerations to do the writes in most efficient ++ * way. Also fills up the sum of the buffer in *buffer parameter. ++ **/ ++static s32 e1000_mng_host_if_write(struct e1000_hw *hw, u8 *buffer, ++ u16 length, u16 offset, u8 *sum) ++{ ++ u8 *tmp; ++ u8 *bufptr = buffer; ++ u32 data = 0; ++ u16 remaining, i, j, prev_bytes; ++ ++ /* sum = only sum of the data and it is not checksum */ ++ ++ if (length == 0 || offset + length > E1000_HI_MAX_MNG_DATA_LENGTH) ++ return -E1000_ERR_PARAM; ++ ++ tmp = (u8 *)&data; ++ prev_bytes = offset & 0x3; ++ offset >>= 2; ++ ++ if (prev_bytes) { ++ data = E1000_READ_REG_ARRAY(hw, E1000_HOST_IF, offset); ++ for (j = prev_bytes; j < sizeof(u32); j++) { ++ *(tmp + j) = *bufptr++; ++ *sum += *(tmp + j); ++ } ++ E1000_WRITE_REG_ARRAY(hw, E1000_HOST_IF, offset, data); ++ length -= j - prev_bytes; ++ offset++; ++ } ++ ++ remaining = length & 0x3; ++ length -= remaining; ++ ++ /* Calculate length in DWORDs */ ++ length >>= 2; ++ ++ /* ++ * The device driver writes the relevant command block into the ++ * ram area. ++ */ ++ for (i = 0; i < length; i++) { ++ for (j = 0; j < sizeof(u32); j++) { ++ *(tmp + j) = *bufptr++; ++ *sum += *(tmp + j); ++ } ++ ++ E1000_WRITE_REG_ARRAY(hw, E1000_HOST_IF, offset + i, data); ++ } ++ if (remaining) { ++ for (j = 0; j < sizeof(u32); j++) { ++ if (j < remaining) ++ *(tmp + j) = *bufptr++; ++ else ++ *(tmp + j) = 0; ++ ++ *sum += *(tmp + j); ++ } ++ E1000_WRITE_REG_ARRAY(hw, E1000_HOST_IF, offset + i, data); ++ } ++ ++ return 0; ++} ++ ++/** ++ * e1000e_mng_write_dhcp_info - Writes DHCP info to host interface ++ * @hw: pointer to the HW structure ++ * @buffer: pointer to the host interface ++ * @length: size of the buffer ++ * ++ * Writes the DHCP information to the host interface. ++ **/ ++s32 e1000e_mng_write_dhcp_info(struct e1000_hw *hw, u8 *buffer, u16 length) ++{ ++ struct e1000_host_mng_command_header hdr; ++ s32 ret_val; ++ u32 hicr; ++ ++ hdr.command_id = E1000_MNG_DHCP_TX_PAYLOAD_CMD; ++ hdr.command_length = length; ++ hdr.reserved1 = 0; ++ hdr.reserved2 = 0; ++ hdr.checksum = 0; ++ ++ /* Enable the host interface */ ++ ret_val = e1000_mng_enable_host_if(hw); ++ if (ret_val) ++ return ret_val; ++ ++ /* Populate the host interface with the contents of "buffer". */ ++ ret_val = e1000_mng_host_if_write(hw, buffer, length, ++ sizeof(hdr), &(hdr.checksum)); ++ if (ret_val) ++ return ret_val; ++ ++ /* Write the manageability command header */ ++ ret_val = e1000_mng_write_cmd_header(hw, &hdr); ++ if (ret_val) ++ return ret_val; ++ ++ /* Tell the ARC a new command is pending. */ ++ hicr = er32(HICR); ++ ew32(HICR, hicr | E1000_HICR_C); ++ ++ return 0; ++} ++ ++/** ++ * e1000e_enable_mng_pass_thru - Check if management passthrough is needed ++ * @hw: pointer to the HW structure ++ * ++ * Verifies the hardware needs to leave interface enabled so that frames can ++ * be directed to and from the management interface. ++ **/ ++bool e1000e_enable_mng_pass_thru(struct e1000_hw *hw) ++{ ++ u32 manc; ++ u32 fwsm, factps; ++ bool ret_val = false; ++ ++ manc = er32(MANC); ++ ++ if (!(manc & E1000_MANC_RCV_TCO_EN)) ++ goto out; ++ ++ if (hw->mac.has_fwsm) { ++ fwsm = er32(FWSM); ++ factps = er32(FACTPS); ++ ++ if (!(factps & E1000_FACTPS_MNGCG) && ++ ((fwsm & E1000_FWSM_MODE_MASK) == ++ (e1000_mng_mode_pt << E1000_FWSM_MODE_SHIFT))) { ++ ret_val = true; ++ goto out; ++ } ++ } else if ((hw->mac.type == e1000_82574) || ++ (hw->mac.type == e1000_82583)) { ++ u16 data; ++ ++ factps = er32(FACTPS); ++ e1000_read_nvm(hw, NVM_INIT_CONTROL2_REG, 1, &data); ++ ++ if (!(factps & E1000_FACTPS_MNGCG) && ++ ((data & E1000_NVM_INIT_CTRL2_MNGM) == ++ (e1000_mng_mode_pt << 13))) { ++ ret_val = true; ++ goto out; ++ } ++ } else if ((manc & E1000_MANC_SMBUS_EN) && ++ !(manc & E1000_MANC_ASF_EN)) { ++ ret_val = true; ++ goto out; ++ } ++ ++out: ++ return ret_val; ++} +--- linux/drivers/xenomai/net/drivers/e1000e/Makefile 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/e1000e/Makefile 2021-04-07 16:01:27.209634192 +0800 +@@ -0,0 +1,12 @@ ++ccflags-y += -Idrivers/xenomai/net/stack/include ++ ++obj-$(CONFIG_XENO_DRIVERS_NET_DRV_E1000E) += rt_e1000e.o ++ ++rt_e1000e-y := \ ++ 82571.o \ ++ 80003es2lan.o \ ++ ich8lan.o \ ++ lib.o \ ++ netdev.o \ ++ param.o \ ++ phy.o +--- linux/drivers/xenomai/net/drivers/e1000e/param.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/e1000e/param.c 2021-04-07 16:01:27.204634199 +0800 +@@ -0,0 +1,484 @@ ++/******************************************************************************* ++ ++ Intel PRO/1000 Linux driver ++ Copyright(c) 1999 - 2011 Intel Corporation. ++ ++ This program is free software; you can redistribute it and/or modify it ++ under the terms and conditions of the GNU General Public License, ++ version 2, as published by the Free Software Foundation. ++ ++ This program is distributed in the hope it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ You should have received a copy of the GNU General Public License along with ++ this program; if not, write to the Free Software Foundation, Inc., ++ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. ++ ++ The full GNU General Public License is included in this distribution in ++ the file called "COPYING". ++ ++ Contact Information: ++ Linux NICS ++ e1000-devel Mailing List ++ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 ++ ++*******************************************************************************/ ++ ++#include ++#include ++ ++#include "e1000.h" ++ ++/* ++ * This is the only thing that needs to be changed to adjust the ++ * maximum number of ports that the driver can manage. ++ */ ++ ++#define E1000_MAX_NIC 32 ++ ++#define OPTION_UNSET -1 ++#define OPTION_DISABLED 0 ++#define OPTION_ENABLED 1 ++ ++#define COPYBREAK_DEFAULT 256 ++unsigned int copybreak = COPYBREAK_DEFAULT; ++module_param(copybreak, uint, 0644); ++MODULE_PARM_DESC(copybreak, ++ "Maximum size of packet that is copied to a new buffer on receive"); ++ ++/* ++ * All parameters are treated the same, as an integer array of values. ++ * This macro just reduces the need to repeat the same declaration code ++ * over and over (plus this helps to avoid typo bugs). ++ */ ++ ++#define E1000_PARAM_INIT { [0 ... E1000_MAX_NIC] = OPTION_UNSET } ++#define E1000_PARAM(X, desc) \ ++ static int X[E1000_MAX_NIC+1] \ ++ = E1000_PARAM_INIT; \ ++ static unsigned int num_##X; \ ++ module_param_array_named(X, X, int, &num_##X, 0); \ ++ MODULE_PARM_DESC(X, desc); ++ ++/* ++ * Transmit Interrupt Delay in units of 1.024 microseconds ++ * Tx interrupt delay needs to typically be set to something non-zero ++ * ++ * Valid Range: 0-65535 ++ * ++ * Default Value: 0 for rtnet ++ */ ++E1000_PARAM(TxIntDelay, "Transmit Interrupt Delay"); ++#define DEFAULT_TIDV 0 ++#define MAX_TXDELAY 0xFFFF ++#define MIN_TXDELAY 0 ++ ++/* ++ * Transmit Absolute Interrupt Delay in units of 1.024 microseconds ++ * ++ * Valid Range: 0-65535 ++ * ++ * Default Value: 0 for rtnet ++ */ ++E1000_PARAM(TxAbsIntDelay, "Transmit Absolute Interrupt Delay"); ++#define DEFAULT_TADV 0 ++#define MAX_TXABSDELAY 0xFFFF ++#define MIN_TXABSDELAY 0 ++ ++/* ++ * Receive Interrupt Delay in units of 1.024 microseconds ++ * hardware will likely hang if you set this to anything but zero. ++ * ++ * Valid Range: 0-65535 ++ */ ++E1000_PARAM(RxIntDelay, "Receive Interrupt Delay"); ++#define MAX_RXDELAY 0xFFFF ++#define MIN_RXDELAY 0 ++ ++/* ++ * Receive Absolute Interrupt Delay in units of 1.024 microseconds ++ * ++ * Valid Range: 0-65535 ++ */ ++E1000_PARAM(RxAbsIntDelay, "Receive Absolute Interrupt Delay"); ++#define MAX_RXABSDELAY 0xFFFF ++#define MIN_RXABSDELAY 0 ++ ++/* ++ * Interrupt Throttle Rate (interrupts/sec) ++ * ++ * Valid Range: 100-100000 (0=off, 1=dynamic, 3=dynamic conservative) ++ * ++ * Default Value: 0 for rtnet ++ */ ++E1000_PARAM(InterruptThrottleRate, "Interrupt Throttling Rate"); ++#define DEFAULT_ITR 0 ++#define MAX_ITR 100000 ++#define MIN_ITR 100 ++ ++/* IntMode (Interrupt Mode) ++ * ++ * Valid Range: 0 - 2 ++ * ++ * Default Value: 2 (MSI-X) ++ */ ++E1000_PARAM(IntMode, "Interrupt Mode"); ++#define MAX_INTMODE 2 ++#define MIN_INTMODE 0 ++ ++/* ++ * Enable Smart Power Down of the PHY ++ * ++ * Valid Range: 0, 1 ++ * ++ * Default Value: 0 (disabled) ++ */ ++E1000_PARAM(SmartPowerDownEnable, "Enable PHY smart power down"); ++ ++/* ++ * Enable Kumeran Lock Loss workaround ++ * ++ * Valid Range: 0, 1 ++ * ++ * Default Value: 1 (enabled) ++ */ ++E1000_PARAM(KumeranLockLoss, "Enable Kumeran lock loss workaround"); ++ ++/* ++ * Write Protect NVM ++ * ++ * Valid Range: 0, 1 ++ * ++ * Default Value: 1 (enabled) ++ */ ++E1000_PARAM(WriteProtectNVM, "Write-protect NVM [WARNING: disabling this can lead to corrupted NVM]"); ++ ++/* ++ * Enable CRC Stripping ++ * ++ * Valid Range: 0, 1 ++ * ++ * Default Value: 1 (enabled) ++ */ ++E1000_PARAM(CrcStripping, "Enable CRC Stripping, disable if your BMC needs " \ ++ "the CRC"); ++ ++struct e1000_option { ++ enum { enable_option, range_option, list_option } type; ++ const char *name; ++ const char *err; ++ int def; ++ union { ++ struct { /* range_option info */ ++ int min; ++ int max; ++ } r; ++ struct { /* list_option info */ ++ int nr; ++ struct e1000_opt_list { int i; char *str; } *p; ++ } l; ++ } arg; ++}; ++ ++static int e1000_validate_option(unsigned int *value, ++ const struct e1000_option *opt, ++ struct e1000_adapter *adapter) ++{ ++ if (*value == OPTION_UNSET) { ++ *value = opt->def; ++ return 0; ++ } ++ ++ switch (opt->type) { ++ case enable_option: ++ switch (*value) { ++ case OPTION_ENABLED: ++ e_info("%s Enabled\n", opt->name); ++ return 0; ++ case OPTION_DISABLED: ++ e_info("%s Disabled\n", opt->name); ++ return 0; ++ } ++ break; ++ case range_option: ++ if (*value >= opt->arg.r.min && *value <= opt->arg.r.max) { ++ e_info("%s set to %i\n", opt->name, *value); ++ return 0; ++ } ++ break; ++ case list_option: { ++ int i; ++ struct e1000_opt_list *ent; ++ ++ for (i = 0; i < opt->arg.l.nr; i++) { ++ ent = &opt->arg.l.p[i]; ++ if (*value == ent->i) { ++ if (ent->str[0] != '\0') ++ e_info("%s\n", ent->str); ++ return 0; ++ } ++ } ++ } ++ break; ++ default: ++ BUG(); ++ } ++ ++ e_info("Invalid %s value specified (%i) %s\n", opt->name, *value, ++ opt->err); ++ *value = opt->def; ++ return -1; ++} ++ ++/** ++ * e1000e_check_options - Range Checking for Command Line Parameters ++ * @adapter: board private structure ++ * ++ * This routine checks all command line parameters for valid user ++ * input. If an invalid value is given, or if no user specified ++ * value exists, a default value is used. The final value is stored ++ * in a variable in the adapter structure. ++ **/ ++void e1000e_check_options(struct e1000_adapter *adapter) ++{ ++ struct e1000_hw *hw = &adapter->hw; ++ int bd = adapter->bd_number; ++ ++ if (bd >= E1000_MAX_NIC) { ++ e_notice("Warning: no configuration for board #%i\n", bd); ++ e_notice("Using defaults for all values\n"); ++ } ++ ++ { /* Transmit Interrupt Delay */ ++ static const struct e1000_option opt = { ++ .type = range_option, ++ .name = "Transmit Interrupt Delay", ++ .err = "using default of " ++ __MODULE_STRING(DEFAULT_TIDV), ++ .def = DEFAULT_TIDV, ++ .arg = { .r = { .min = MIN_TXDELAY, ++ .max = MAX_TXDELAY } } ++ }; ++ ++ if (num_TxIntDelay > bd) { ++ adapter->tx_int_delay = TxIntDelay[bd]; ++ e1000_validate_option(&adapter->tx_int_delay, &opt, ++ adapter); ++ } else { ++ adapter->tx_int_delay = opt.def; ++ } ++ } ++ { /* Transmit Absolute Interrupt Delay */ ++ static const struct e1000_option opt = { ++ .type = range_option, ++ .name = "Transmit Absolute Interrupt Delay", ++ .err = "using default of " ++ __MODULE_STRING(DEFAULT_TADV), ++ .def = DEFAULT_TADV, ++ .arg = { .r = { .min = MIN_TXABSDELAY, ++ .max = MAX_TXABSDELAY } } ++ }; ++ ++ if (num_TxAbsIntDelay > bd) { ++ adapter->tx_abs_int_delay = TxAbsIntDelay[bd]; ++ e1000_validate_option(&adapter->tx_abs_int_delay, &opt, ++ adapter); ++ } else { ++ adapter->tx_abs_int_delay = opt.def; ++ } ++ } ++ { /* Receive Interrupt Delay */ ++ static struct e1000_option opt = { ++ .type = range_option, ++ .name = "Receive Interrupt Delay", ++ .err = "using default of " ++ __MODULE_STRING(DEFAULT_RDTR), ++ .def = DEFAULT_RDTR, ++ .arg = { .r = { .min = MIN_RXDELAY, ++ .max = MAX_RXDELAY } } ++ }; ++ ++ if (num_RxIntDelay > bd) { ++ adapter->rx_int_delay = RxIntDelay[bd]; ++ e1000_validate_option(&adapter->rx_int_delay, &opt, ++ adapter); ++ } else { ++ adapter->rx_int_delay = opt.def; ++ } ++ } ++ { /* Receive Absolute Interrupt Delay */ ++ static const struct e1000_option opt = { ++ .type = range_option, ++ .name = "Receive Absolute Interrupt Delay", ++ .err = "using default of " ++ __MODULE_STRING(DEFAULT_RADV), ++ .def = DEFAULT_RADV, ++ .arg = { .r = { .min = MIN_RXABSDELAY, ++ .max = MAX_RXABSDELAY } } ++ }; ++ ++ if (num_RxAbsIntDelay > bd) { ++ adapter->rx_abs_int_delay = RxAbsIntDelay[bd]; ++ e1000_validate_option(&adapter->rx_abs_int_delay, &opt, ++ adapter); ++ } else { ++ adapter->rx_abs_int_delay = opt.def; ++ } ++ } ++ { /* Interrupt Throttling Rate */ ++ static const struct e1000_option opt = { ++ .type = range_option, ++ .name = "Interrupt Throttling Rate (ints/sec)", ++ .err = "using default of " ++ __MODULE_STRING(DEFAULT_ITR), ++ .def = DEFAULT_ITR, ++ .arg = { .r = { .min = MIN_ITR, ++ .max = MAX_ITR } } ++ }; ++ ++ if (num_InterruptThrottleRate > bd) { ++ adapter->itr = InterruptThrottleRate[bd]; ++ switch (adapter->itr) { ++ case 0: ++ e_info("%s turned off\n", opt.name); ++ break; ++ case 1: ++ e_info("%s set to dynamic mode\n", opt.name); ++ adapter->itr_setting = adapter->itr; ++ adapter->itr = 20000; ++ break; ++ case 3: ++ e_info("%s set to dynamic conservative mode\n", ++ opt.name); ++ adapter->itr_setting = adapter->itr; ++ adapter->itr = 20000; ++ break; ++ case 4: ++ e_info("%s set to simplified (2000-8000 ints) " ++ "mode\n", opt.name); ++ adapter->itr_setting = 4; ++ break; ++ default: ++ /* ++ * Save the setting, because the dynamic bits ++ * change itr. ++ */ ++ if (e1000_validate_option(&adapter->itr, &opt, ++ adapter) && ++ (adapter->itr == 3)) { ++ /* ++ * In case of invalid user value, ++ * default to conservative mode. ++ */ ++ adapter->itr_setting = adapter->itr; ++ adapter->itr = 20000; ++ } else { ++ /* ++ * Clear the lower two bits because ++ * they are used as control. ++ */ ++ adapter->itr_setting = ++ adapter->itr & ~3; ++ } ++ break; ++ } ++ } else { ++ adapter->itr_setting = opt.def; ++ adapter->itr = 0; ++ } ++ } ++ { /* Interrupt Mode */ ++ static struct e1000_option opt = { ++ .type = range_option, ++ .name = "Interrupt Mode", ++ .err = "defaulting to 2 (MSI-X)", ++ .def = E1000E_INT_MODE_MSIX, ++ .arg = { .r = { .min = MIN_INTMODE, ++ .max = MAX_INTMODE } } ++ }; ++ ++ if (num_IntMode > bd) { ++ unsigned int int_mode = IntMode[bd]; ++ e1000_validate_option(&int_mode, &opt, adapter); ++ adapter->int_mode = int_mode; ++ } else { ++ adapter->int_mode = opt.def; ++ } ++ } ++ { /* Smart Power Down */ ++ static const struct e1000_option opt = { ++ .type = enable_option, ++ .name = "PHY Smart Power Down", ++ .err = "defaulting to Disabled", ++ .def = OPTION_DISABLED ++ }; ++ ++ if (num_SmartPowerDownEnable > bd) { ++ unsigned int spd = SmartPowerDownEnable[bd]; ++ e1000_validate_option(&spd, &opt, adapter); ++ if ((adapter->flags & FLAG_HAS_SMART_POWER_DOWN) ++ && spd) ++ adapter->flags |= FLAG_SMART_POWER_DOWN; ++ } ++ } ++ { /* CRC Stripping */ ++ static const struct e1000_option opt = { ++ .type = enable_option, ++ .name = "CRC Stripping", ++ .err = "defaulting to Enabled", ++ .def = OPTION_ENABLED ++ }; ++ ++ if (num_CrcStripping > bd) { ++ unsigned int crc_stripping = CrcStripping[bd]; ++ e1000_validate_option(&crc_stripping, &opt, adapter); ++ if (crc_stripping == OPTION_ENABLED) ++ adapter->flags2 |= FLAG2_CRC_STRIPPING; ++ } else { ++ adapter->flags2 |= FLAG2_CRC_STRIPPING; ++ } ++ } ++ { /* Kumeran Lock Loss Workaround */ ++ static const struct e1000_option opt = { ++ .type = enable_option, ++ .name = "Kumeran Lock Loss Workaround", ++ .err = "defaulting to Enabled", ++ .def = OPTION_ENABLED ++ }; ++ ++ if (num_KumeranLockLoss > bd) { ++ unsigned int kmrn_lock_loss = KumeranLockLoss[bd]; ++ e1000_validate_option(&kmrn_lock_loss, &opt, adapter); ++ if (hw->mac.type == e1000_ich8lan) ++ e1000e_set_kmrn_lock_loss_workaround_ich8lan(hw, ++ kmrn_lock_loss); ++ } else { ++ if (hw->mac.type == e1000_ich8lan) ++ e1000e_set_kmrn_lock_loss_workaround_ich8lan(hw, ++ opt.def); ++ } ++ } ++ { /* Write-protect NVM */ ++ static const struct e1000_option opt = { ++ .type = enable_option, ++ .name = "Write-protect NVM", ++ .err = "defaulting to Enabled", ++ .def = OPTION_ENABLED ++ }; ++ ++ if (adapter->flags & FLAG_IS_ICH) { ++ if (num_WriteProtectNVM > bd) { ++ unsigned int write_protect_nvm = WriteProtectNVM[bd]; ++ e1000_validate_option(&write_protect_nvm, &opt, ++ adapter); ++ if (write_protect_nvm) ++ adapter->flags |= FLAG_READ_ONLY_NVM; ++ } else { ++ if (opt.def) ++ adapter->flags |= FLAG_READ_ONLY_NVM; ++ } ++ } ++ } ++} +--- linux/drivers/xenomai/net/drivers/e1000e/ich8lan.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/e1000e/ich8lan.c 2021-04-07 16:01:27.199634206 +0800 +@@ -0,0 +1,4446 @@ ++/******************************************************************************* ++ ++ Intel PRO/1000 Linux driver ++ Copyright(c) 1999 - 2011 Intel Corporation. ++ ++ This program is free software; you can redistribute it and/or modify it ++ under the terms and conditions of the GNU General Public License, ++ version 2, as published by the Free Software Foundation. ++ ++ This program is distributed in the hope it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ You should have received a copy of the GNU General Public License along with ++ this program; if not, write to the Free Software Foundation, Inc., ++ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. ++ ++ The full GNU General Public License is included in this distribution in ++ the file called "COPYING". ++ ++ Contact Information: ++ Linux NICS ++ e1000-devel Mailing List ++ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 ++ ++*******************************************************************************/ ++ ++/* ++ * 82562G 10/100 Network Connection ++ * 82562G-2 10/100 Network Connection ++ * 82562GT 10/100 Network Connection ++ * 82562GT-2 10/100 Network Connection ++ * 82562V 10/100 Network Connection ++ * 82562V-2 10/100 Network Connection ++ * 82566DC-2 Gigabit Network Connection ++ * 82566DC Gigabit Network Connection ++ * 82566DM-2 Gigabit Network Connection ++ * 82566DM Gigabit Network Connection ++ * 82566MC Gigabit Network Connection ++ * 82566MM Gigabit Network Connection ++ * 82567LM Gigabit Network Connection ++ * 82567LF Gigabit Network Connection ++ * 82567V Gigabit Network Connection ++ * 82567LM-2 Gigabit Network Connection ++ * 82567LF-2 Gigabit Network Connection ++ * 82567V-2 Gigabit Network Connection ++ * 82567LF-3 Gigabit Network Connection ++ * 82567LM-3 Gigabit Network Connection ++ * 82567LM-4 Gigabit Network Connection ++ * 82577LM Gigabit Network Connection ++ * 82577LC Gigabit Network Connection ++ * 82578DM Gigabit Network Connection ++ * 82578DC Gigabit Network Connection ++ * 82579LM Gigabit Network Connection ++ * 82579V Gigabit Network Connection ++ */ ++ ++#include "e1000.h" ++ ++#define ICH_FLASH_GFPREG 0x0000 ++#define ICH_FLASH_HSFSTS 0x0004 ++#define ICH_FLASH_HSFCTL 0x0006 ++#define ICH_FLASH_FADDR 0x0008 ++#define ICH_FLASH_FDATA0 0x0010 ++#define ICH_FLASH_PR0 0x0074 ++ ++#define ICH_FLASH_READ_COMMAND_TIMEOUT 500 ++#define ICH_FLASH_WRITE_COMMAND_TIMEOUT 500 ++#define ICH_FLASH_ERASE_COMMAND_TIMEOUT 3000000 ++#define ICH_FLASH_LINEAR_ADDR_MASK 0x00FFFFFF ++#define ICH_FLASH_CYCLE_REPEAT_COUNT 10 ++ ++#define ICH_CYCLE_READ 0 ++#define ICH_CYCLE_WRITE 2 ++#define ICH_CYCLE_ERASE 3 ++ ++#define FLASH_GFPREG_BASE_MASK 0x1FFF ++#define FLASH_SECTOR_ADDR_SHIFT 12 ++ ++#define ICH_FLASH_SEG_SIZE_256 256 ++#define ICH_FLASH_SEG_SIZE_4K 4096 ++#define ICH_FLASH_SEG_SIZE_8K 8192 ++#define ICH_FLASH_SEG_SIZE_64K 65536 ++ ++ ++#define E1000_ICH_FWSM_RSPCIPHY 0x00000040 /* Reset PHY on PCI Reset */ ++/* FW established a valid mode */ ++#define E1000_ICH_FWSM_FW_VALID 0x00008000 ++ ++#define E1000_ICH_MNG_IAMT_MODE 0x2 ++ ++#define ID_LED_DEFAULT_ICH8LAN ((ID_LED_DEF1_DEF2 << 12) | \ ++ (ID_LED_DEF1_OFF2 << 8) | \ ++ (ID_LED_DEF1_ON2 << 4) | \ ++ (ID_LED_DEF1_DEF2)) ++ ++#define E1000_ICH_NVM_SIG_WORD 0x13 ++#define E1000_ICH_NVM_SIG_MASK 0xC000 ++#define E1000_ICH_NVM_VALID_SIG_MASK 0xC0 ++#define E1000_ICH_NVM_SIG_VALUE 0x80 ++ ++#define E1000_ICH8_LAN_INIT_TIMEOUT 1500 ++ ++#define E1000_FEXTNVM_SW_CONFIG 1 ++#define E1000_FEXTNVM_SW_CONFIG_ICH8M (1 << 27) /* Bit redefined for ICH8M :/ */ ++ ++#define E1000_FEXTNVM3_PHY_CFG_COUNTER_MASK 0x0C000000 ++#define E1000_FEXTNVM3_PHY_CFG_COUNTER_50MSEC 0x08000000 ++ ++#define E1000_FEXTNVM4_BEACON_DURATION_MASK 0x7 ++#define E1000_FEXTNVM4_BEACON_DURATION_8USEC 0x7 ++#define E1000_FEXTNVM4_BEACON_DURATION_16USEC 0x3 ++ ++#define PCIE_ICH8_SNOOP_ALL PCIE_NO_SNOOP_ALL ++ ++#define E1000_ICH_RAR_ENTRIES 7 ++#define E1000_PCH2_RAR_ENTRIES 5 /* RAR[0], SHRA[0-3] */ ++#define E1000_PCH_LPT_RAR_ENTRIES 12 /* RAR[0], SHRA[0-10] */ ++ ++#define PHY_PAGE_SHIFT 5 ++#define PHY_REG(page, reg) (((page) << PHY_PAGE_SHIFT) | \ ++ ((reg) & MAX_PHY_REG_ADDRESS)) ++#define IGP3_KMRN_DIAG PHY_REG(770, 19) /* KMRN Diagnostic */ ++#define IGP3_VR_CTRL PHY_REG(776, 18) /* Voltage Regulator Control */ ++ ++#define IGP3_KMRN_DIAG_PCS_LOCK_LOSS 0x0002 ++#define IGP3_VR_CTRL_DEV_POWERDOWN_MODE_MASK 0x0300 ++#define IGP3_VR_CTRL_MODE_SHUTDOWN 0x0200 ++ ++#define HV_LED_CONFIG PHY_REG(768, 30) /* LED Configuration */ ++ ++#define SW_FLAG_TIMEOUT 1000 /* SW Semaphore flag timeout in milliseconds */ ++ ++/* SMBus Control Phy Register */ ++#define CV_SMB_CTRL PHY_REG(769, 23) ++#define CV_SMB_CTRL_FORCE_SMBUS 0x0001 ++ ++/* SMBus Address Phy Register */ ++#define HV_SMB_ADDR PHY_REG(768, 26) ++#define HV_SMB_ADDR_MASK 0x007F ++#define HV_SMB_ADDR_PEC_EN 0x0200 ++#define HV_SMB_ADDR_VALID 0x0080 ++#define HV_SMB_ADDR_FREQ_MASK 0x1100 ++#define HV_SMB_ADDR_FREQ_LOW_SHIFT 8 ++#define HV_SMB_ADDR_FREQ_HIGH_SHIFT 12 ++ ++/* PHY Power Management Control */ ++#define HV_PM_CTRL PHY_REG(770, 17) ++#define HV_PM_CTRL_PLL_STOP_IN_K1_GIGA 0x100 ++ ++/* PHY Low Power Idle Control */ ++#define I82579_LPI_CTRL PHY_REG(772, 20) ++#define I82579_LPI_CTRL_ENABLE_MASK 0x6000 ++#define I82579_LPI_CTRL_FORCE_PLL_LOCK_COUNT 0x80 ++ ++/* EMI Registers */ ++#define I82579_EMI_ADDR 0x10 ++#define I82579_EMI_DATA 0x11 ++#define I82579_LPI_UPDATE_TIMER 0x4805 /* in 40ns units + 40 ns base value */ ++ ++#define I217_EEE_ADVERTISEMENT 0x8001 /* IEEE MMD Register 7.60 */ ++#define I217_EEE_LP_ABILITY 0x8002 /* IEEE MMD Register 7.61 */ ++#define I217_EEE_100_SUPPORTED (1 << 1) /* 100BaseTx EEE supported */ ++ ++/* Intel Rapid Start Technology Support */ ++#define I217_PROXY_CTRL PHY_REG(BM_WUC_PAGE, 70) ++#define I217_PROXY_CTRL_AUTO_DISABLE 0x0080 ++#define I217_SxCTRL PHY_REG(BM_PORT_CTRL_PAGE, 28) ++#define I217_SxCTRL_MASK 0x1000 ++#define I217_CGFREG PHY_REG(772, 29) ++#define I217_CGFREG_MASK 0x0002 ++#define I217_MEMPWR PHY_REG(772, 26) ++#define I217_MEMPWR_MASK 0x0010 ++ ++/* Strapping Option Register - RO */ ++#define E1000_STRAP 0x0000C ++#define E1000_STRAP_SMBUS_ADDRESS_MASK 0x00FE0000 ++#define E1000_STRAP_SMBUS_ADDRESS_SHIFT 17 ++#define E1000_STRAP_SMT_FREQ_MASK 0x00003000 ++#define E1000_STRAP_SMT_FREQ_SHIFT 12 ++ ++/* OEM Bits Phy Register */ ++#define HV_OEM_BITS PHY_REG(768, 25) ++#define HV_OEM_BITS_LPLU 0x0004 /* Low Power Link Up */ ++#define HV_OEM_BITS_GBE_DIS 0x0040 /* Gigabit Disable */ ++#define HV_OEM_BITS_RESTART_AN 0x0400 /* Restart Auto-negotiation */ ++ ++#define E1000_NVM_K1_CONFIG 0x1B /* NVM K1 Config Word */ ++#define E1000_NVM_K1_ENABLE 0x1 /* NVM Enable K1 bit */ ++ ++/* KMRN Mode Control */ ++#define HV_KMRN_MODE_CTRL PHY_REG(769, 16) ++#define HV_KMRN_MDIO_SLOW 0x0400 ++ ++/* KMRN FIFO Control and Status */ ++#define HV_KMRN_FIFO_CTRLSTA PHY_REG(770, 16) ++#define HV_KMRN_FIFO_CTRLSTA_PREAMBLE_MASK 0x7000 ++#define HV_KMRN_FIFO_CTRLSTA_PREAMBLE_SHIFT 12 ++ ++/* ICH GbE Flash Hardware Sequencing Flash Status Register bit breakdown */ ++/* Offset 04h HSFSTS */ ++union ich8_hws_flash_status { ++ struct ich8_hsfsts { ++ u16 flcdone :1; /* bit 0 Flash Cycle Done */ ++ u16 flcerr :1; /* bit 1 Flash Cycle Error */ ++ u16 dael :1; /* bit 2 Direct Access error Log */ ++ u16 berasesz :2; /* bit 4:3 Sector Erase Size */ ++ u16 flcinprog :1; /* bit 5 flash cycle in Progress */ ++ u16 reserved1 :2; /* bit 13:6 Reserved */ ++ u16 reserved2 :6; /* bit 13:6 Reserved */ ++ u16 fldesvalid :1; /* bit 14 Flash Descriptor Valid */ ++ u16 flockdn :1; /* bit 15 Flash Config Lock-Down */ ++ } hsf_status; ++ u16 regval; ++}; ++ ++/* ICH GbE Flash Hardware Sequencing Flash control Register bit breakdown */ ++/* Offset 06h FLCTL */ ++union ich8_hws_flash_ctrl { ++ struct ich8_hsflctl { ++ u16 flcgo :1; /* 0 Flash Cycle Go */ ++ u16 flcycle :2; /* 2:1 Flash Cycle */ ++ u16 reserved :5; /* 7:3 Reserved */ ++ u16 fldbcount :2; /* 9:8 Flash Data Byte Count */ ++ u16 flockdn :6; /* 15:10 Reserved */ ++ } hsf_ctrl; ++ u16 regval; ++}; ++ ++/* ICH Flash Region Access Permissions */ ++union ich8_hws_flash_regacc { ++ struct ich8_flracc { ++ u32 grra :8; /* 0:7 GbE region Read Access */ ++ u32 grwa :8; /* 8:15 GbE region Write Access */ ++ u32 gmrag :8; /* 23:16 GbE Master Read Access Grant */ ++ u32 gmwag :8; /* 31:24 GbE Master Write Access Grant */ ++ } hsf_flregacc; ++ u16 regval; ++}; ++ ++/* ICH Flash Protected Region */ ++union ich8_flash_protected_range { ++ struct ich8_pr { ++ u32 base:13; /* 0:12 Protected Range Base */ ++ u32 reserved1:2; /* 13:14 Reserved */ ++ u32 rpe:1; /* 15 Read Protection Enable */ ++ u32 limit:13; /* 16:28 Protected Range Limit */ ++ u32 reserved2:2; /* 29:30 Reserved */ ++ u32 wpe:1; /* 31 Write Protection Enable */ ++ } range; ++ u32 regval; ++}; ++ ++static s32 e1000_setup_link_ich8lan(struct e1000_hw *hw); ++static void e1000_clear_hw_cntrs_ich8lan(struct e1000_hw *hw); ++static void e1000_initialize_hw_bits_ich8lan(struct e1000_hw *hw); ++static s32 e1000_erase_flash_bank_ich8lan(struct e1000_hw *hw, u32 bank); ++static s32 e1000_retry_write_flash_byte_ich8lan(struct e1000_hw *hw, ++ u32 offset, u8 byte); ++static s32 e1000_read_flash_byte_ich8lan(struct e1000_hw *hw, u32 offset, ++ u8 *data); ++static s32 e1000_read_flash_word_ich8lan(struct e1000_hw *hw, u32 offset, ++ u16 *data); ++static s32 e1000_read_flash_data_ich8lan(struct e1000_hw *hw, u32 offset, ++ u8 size, u16 *data); ++static s32 e1000_setup_copper_link_ich8lan(struct e1000_hw *hw); ++static s32 e1000_kmrn_lock_loss_workaround_ich8lan(struct e1000_hw *hw); ++static s32 e1000_get_cfg_done_ich8lan(struct e1000_hw *hw); ++static s32 e1000_cleanup_led_ich8lan(struct e1000_hw *hw); ++static s32 e1000_led_on_ich8lan(struct e1000_hw *hw); ++static s32 e1000_led_off_ich8lan(struct e1000_hw *hw); ++static s32 e1000_id_led_init_pchlan(struct e1000_hw *hw); ++static s32 e1000_setup_led_pchlan(struct e1000_hw *hw); ++static s32 e1000_cleanup_led_pchlan(struct e1000_hw *hw); ++static s32 e1000_led_on_pchlan(struct e1000_hw *hw); ++static s32 e1000_led_off_pchlan(struct e1000_hw *hw); ++static s32 e1000_set_lplu_state_pchlan(struct e1000_hw *hw, bool active); ++static void e1000_power_down_phy_copper_ich8lan(struct e1000_hw *hw); ++static void e1000_lan_init_done_ich8lan(struct e1000_hw *hw); ++static s32 e1000_k1_gig_workaround_hv(struct e1000_hw *hw, bool link); ++static s32 e1000_set_mdio_slow_mode_hv(struct e1000_hw *hw); ++static bool e1000_check_mng_mode_ich8lan(struct e1000_hw *hw); ++static bool e1000_check_mng_mode_pchlan(struct e1000_hw *hw); ++static void e1000_rar_set_pch_lpt(struct e1000_hw *hw, u8 *addr, u32 index); ++static s32 e1000_k1_workaround_lv(struct e1000_hw *hw); ++static void e1000_gate_hw_phy_config_ich8lan(struct e1000_hw *hw, bool gate); ++ ++static inline u16 __er16flash(struct e1000_hw *hw, unsigned long reg) ++{ ++ return readw(hw->flash_address + reg); ++} ++ ++static inline u32 __er32flash(struct e1000_hw *hw, unsigned long reg) ++{ ++ return readl(hw->flash_address + reg); ++} ++ ++static inline void __ew16flash(struct e1000_hw *hw, unsigned long reg, u16 val) ++{ ++ writew(val, hw->flash_address + reg); ++} ++ ++static inline void __ew32flash(struct e1000_hw *hw, unsigned long reg, u32 val) ++{ ++ writel(val, hw->flash_address + reg); ++} ++ ++#define er16flash(reg) __er16flash(hw, (reg)) ++#define er32flash(reg) __er32flash(hw, (reg)) ++#define ew16flash(reg,val) __ew16flash(hw, (reg), (val)) ++#define ew32flash(reg,val) __ew32flash(hw, (reg), (val)) ++ ++static void e1000_toggle_lanphypc_value_ich8lan(struct e1000_hw *hw) ++{ ++ u32 ctrl; ++ ++ ctrl = er32(CTRL); ++ ctrl |= E1000_CTRL_LANPHYPC_OVERRIDE; ++ ctrl &= ~E1000_CTRL_LANPHYPC_VALUE; ++ ew32(CTRL, ctrl); ++ e1e_flush(); ++ udelay(10); ++ ctrl &= ~E1000_CTRL_LANPHYPC_OVERRIDE; ++ ew32(CTRL, ctrl); ++} ++ ++/** ++ * e1000_init_phy_params_pchlan - Initialize PHY function pointers ++ * @hw: pointer to the HW structure ++ * ++ * Initialize family-specific PHY parameters and function pointers. ++ **/ ++static s32 e1000_init_phy_params_pchlan(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ u32 fwsm; ++ s32 ret_val = 0; ++ ++ phy->addr = 1; ++ phy->reset_delay_us = 100; ++ ++ phy->ops.set_page = e1000_set_page_igp; ++ phy->ops.read_reg = e1000_read_phy_reg_hv; ++ phy->ops.read_reg_locked = e1000_read_phy_reg_hv_locked; ++ phy->ops.read_reg_page = e1000_read_phy_reg_page_hv; ++ phy->ops.set_d0_lplu_state = e1000_set_lplu_state_pchlan; ++ phy->ops.set_d3_lplu_state = e1000_set_lplu_state_pchlan; ++ phy->ops.write_reg = e1000_write_phy_reg_hv; ++ phy->ops.write_reg_locked = e1000_write_phy_reg_hv_locked; ++ phy->ops.write_reg_page = e1000_write_phy_reg_page_hv; ++ phy->ops.power_up = e1000_power_up_phy_copper; ++ phy->ops.power_down = e1000_power_down_phy_copper_ich8lan; ++ phy->autoneg_mask = AUTONEG_ADVERTISE_SPEED_DEFAULT; ++ ++ /* ++ * The MAC-PHY interconnect may still be in SMBus mode ++ * after Sx->S0. If the manageability engine (ME) is ++ * disabled, then toggle the LANPHYPC Value bit to force ++ * the interconnect to PCIe mode. ++ */ ++ fwsm = er32(FWSM); ++ if (!(fwsm & E1000_ICH_FWSM_FW_VALID) && !e1000_check_reset_block(hw)) { ++ e1000_toggle_lanphypc_value_ich8lan(hw); ++ msleep(50); ++ ++ /* ++ * Gate automatic PHY configuration by hardware on ++ * non-managed 82579 ++ */ ++ if (hw->mac.type == e1000_pch2lan) ++ e1000_gate_hw_phy_config_ich8lan(hw, true); ++ } ++ ++ /* ++ * Reset the PHY before any access to it. Doing so, ensures that ++ * the PHY is in a known good state before we read/write PHY registers. ++ * The generic reset is sufficient here, because we haven't determined ++ * the PHY type yet. ++ */ ++ ret_val = e1000e_phy_hw_reset_generic(hw); ++ if (ret_val) ++ goto out; ++ ++ /* Ungate automatic PHY configuration on non-managed 82579 */ ++ if ((hw->mac.type == e1000_pch2lan) && ++ !(fwsm & E1000_ICH_FWSM_FW_VALID)) { ++ usleep_range(10000, 20000); ++ e1000_gate_hw_phy_config_ich8lan(hw, false); ++ } ++ ++ phy->id = e1000_phy_unknown; ++ switch (hw->mac.type) { ++ default: ++ ret_val = e1000e_get_phy_id(hw); ++ if (ret_val) ++ goto out; ++ if ((phy->id != 0) && (phy->id != PHY_REVISION_MASK)) ++ break; ++ /* fall-through */ ++ case e1000_pch2lan: ++ case e1000_pch_lpt: ++ /* ++ * In case the PHY needs to be in mdio slow mode, ++ * set slow mode and try to get the PHY id again. ++ */ ++ ret_val = e1000_set_mdio_slow_mode_hv(hw); ++ if (ret_val) ++ goto out; ++ ret_val = e1000e_get_phy_id(hw); ++ if (ret_val) ++ goto out; ++ break; ++ } ++ phy->type = e1000e_get_phy_type_from_id(phy->id); ++ ++ switch (phy->type) { ++ case e1000_phy_82577: ++ case e1000_phy_82579: ++ case e1000_phy_i217: ++ phy->ops.check_polarity = e1000_check_polarity_82577; ++ phy->ops.force_speed_duplex = ++ e1000_phy_force_speed_duplex_82577; ++ phy->ops.get_cable_length = e1000_get_cable_length_82577; ++ phy->ops.get_info = e1000_get_phy_info_82577; ++ phy->ops.commit = e1000e_phy_sw_reset; ++ break; ++ case e1000_phy_82578: ++ phy->ops.check_polarity = e1000_check_polarity_m88; ++ phy->ops.force_speed_duplex = e1000e_phy_force_speed_duplex_m88; ++ phy->ops.get_cable_length = e1000e_get_cable_length_m88; ++ phy->ops.get_info = e1000e_get_phy_info_m88; ++ break; ++ default: ++ ret_val = -E1000_ERR_PHY; ++ break; ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_init_phy_params_ich8lan - Initialize PHY function pointers ++ * @hw: pointer to the HW structure ++ * ++ * Initialize family-specific PHY parameters and function pointers. ++ **/ ++static s32 e1000_init_phy_params_ich8lan(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u16 i = 0; ++ ++ phy->addr = 1; ++ phy->reset_delay_us = 100; ++ ++ phy->ops.power_up = e1000_power_up_phy_copper; ++ phy->ops.power_down = e1000_power_down_phy_copper_ich8lan; ++ ++ /* ++ * We may need to do this twice - once for IGP and if that fails, ++ * we'll set BM func pointers and try again ++ */ ++ ret_val = e1000e_determine_phy_address(hw); ++ if (ret_val) { ++ phy->ops.write_reg = e1000e_write_phy_reg_bm; ++ phy->ops.read_reg = e1000e_read_phy_reg_bm; ++ ret_val = e1000e_determine_phy_address(hw); ++ if (ret_val) { ++ e_dbg("Cannot determine PHY addr. Erroring out\n"); ++ return ret_val; ++ } ++ } ++ ++ phy->id = 0; ++ while ((e1000_phy_unknown == e1000e_get_phy_type_from_id(phy->id)) && ++ (i++ < 100)) { ++ usleep_range(1000, 2000); ++ ret_val = e1000e_get_phy_id(hw); ++ if (ret_val) ++ return ret_val; ++ } ++ ++ /* Verify phy id */ ++ switch (phy->id) { ++ case IGP03E1000_E_PHY_ID: ++ phy->type = e1000_phy_igp_3; ++ phy->autoneg_mask = AUTONEG_ADVERTISE_SPEED_DEFAULT; ++ phy->ops.read_reg_locked = e1000e_read_phy_reg_igp_locked; ++ phy->ops.write_reg_locked = e1000e_write_phy_reg_igp_locked; ++ phy->ops.get_info = e1000e_get_phy_info_igp; ++ phy->ops.check_polarity = e1000_check_polarity_igp; ++ phy->ops.force_speed_duplex = e1000e_phy_force_speed_duplex_igp; ++ break; ++ case IFE_E_PHY_ID: ++ case IFE_PLUS_E_PHY_ID: ++ case IFE_C_E_PHY_ID: ++ phy->type = e1000_phy_ife; ++ phy->autoneg_mask = E1000_ALL_NOT_GIG; ++ phy->ops.get_info = e1000_get_phy_info_ife; ++ phy->ops.check_polarity = e1000_check_polarity_ife; ++ phy->ops.force_speed_duplex = e1000_phy_force_speed_duplex_ife; ++ break; ++ case BME1000_E_PHY_ID: ++ phy->type = e1000_phy_bm; ++ phy->autoneg_mask = AUTONEG_ADVERTISE_SPEED_DEFAULT; ++ phy->ops.read_reg = e1000e_read_phy_reg_bm; ++ phy->ops.write_reg = e1000e_write_phy_reg_bm; ++ phy->ops.commit = e1000e_phy_sw_reset; ++ phy->ops.get_info = e1000e_get_phy_info_m88; ++ phy->ops.check_polarity = e1000_check_polarity_m88; ++ phy->ops.force_speed_duplex = e1000e_phy_force_speed_duplex_m88; ++ break; ++ default: ++ return -E1000_ERR_PHY; ++ break; ++ } ++ ++ return 0; ++} ++ ++/** ++ * e1000_init_nvm_params_ich8lan - Initialize NVM function pointers ++ * @hw: pointer to the HW structure ++ * ++ * Initialize family-specific NVM parameters and function ++ * pointers. ++ **/ ++static s32 e1000_init_nvm_params_ich8lan(struct e1000_hw *hw) ++{ ++ struct e1000_nvm_info *nvm = &hw->nvm; ++ struct e1000_dev_spec_ich8lan *dev_spec = &hw->dev_spec.ich8lan; ++ u32 gfpreg, sector_base_addr, sector_end_addr; ++ u16 i; ++ ++ /* Can't read flash registers if the register set isn't mapped. */ ++ if (!hw->flash_address) { ++ e_dbg("ERROR: Flash registers not mapped\n"); ++ return -E1000_ERR_CONFIG; ++ } ++ ++ nvm->type = e1000_nvm_flash_sw; ++ ++ gfpreg = er32flash(ICH_FLASH_GFPREG); ++ ++ /* ++ * sector_X_addr is a "sector"-aligned address (4096 bytes) ++ * Add 1 to sector_end_addr since this sector is included in ++ * the overall size. ++ */ ++ sector_base_addr = gfpreg & FLASH_GFPREG_BASE_MASK; ++ sector_end_addr = ((gfpreg >> 16) & FLASH_GFPREG_BASE_MASK) + 1; ++ ++ /* flash_base_addr is byte-aligned */ ++ nvm->flash_base_addr = sector_base_addr << FLASH_SECTOR_ADDR_SHIFT; ++ ++ /* ++ * find total size of the NVM, then cut in half since the total ++ * size represents two separate NVM banks. ++ */ ++ nvm->flash_bank_size = (sector_end_addr - sector_base_addr) ++ << FLASH_SECTOR_ADDR_SHIFT; ++ nvm->flash_bank_size /= 2; ++ /* Adjust to word count */ ++ nvm->flash_bank_size /= sizeof(u16); ++ ++ nvm->word_size = E1000_ICH8_SHADOW_RAM_WORDS; ++ ++ /* Clear shadow ram */ ++ for (i = 0; i < nvm->word_size; i++) { ++ dev_spec->shadow_ram[i].modified = false; ++ dev_spec->shadow_ram[i].value = 0xFFFF; ++ } ++ ++ return 0; ++} ++ ++/** ++ * e1000_init_mac_params_ich8lan - Initialize MAC function pointers ++ * @hw: pointer to the HW structure ++ * ++ * Initialize family-specific MAC parameters and function ++ * pointers. ++ **/ ++static s32 e1000_init_mac_params_ich8lan(struct e1000_adapter *adapter) ++{ ++ struct e1000_hw *hw = &adapter->hw; ++ struct e1000_mac_info *mac = &hw->mac; ++ ++ /* Set media type function pointer */ ++ hw->phy.media_type = e1000_media_type_copper; ++ ++ /* Set mta register count */ ++ mac->mta_reg_count = 32; ++ /* Set rar entry count */ ++ mac->rar_entry_count = E1000_ICH_RAR_ENTRIES; ++ if (mac->type == e1000_ich8lan) ++ mac->rar_entry_count--; ++ /* FWSM register */ ++ mac->has_fwsm = true; ++ /* ARC subsystem not supported */ ++ mac->arc_subsystem_valid = false; ++ /* Adaptive IFS supported */ ++ mac->adaptive_ifs = true; ++ ++ /* LED operations */ ++ switch (mac->type) { ++ case e1000_ich8lan: ++ case e1000_ich9lan: ++ case e1000_ich10lan: ++ /* check management mode */ ++ mac->ops.check_mng_mode = e1000_check_mng_mode_ich8lan; ++ /* ID LED init */ ++ mac->ops.id_led_init = e1000e_id_led_init; ++ /* blink LED */ ++ mac->ops.blink_led = e1000e_blink_led_generic; ++ /* setup LED */ ++ mac->ops.setup_led = e1000e_setup_led_generic; ++ /* cleanup LED */ ++ mac->ops.cleanup_led = e1000_cleanup_led_ich8lan; ++ /* turn on/off LED */ ++ mac->ops.led_on = e1000_led_on_ich8lan; ++ mac->ops.led_off = e1000_led_off_ich8lan; ++ break; ++ case e1000_pch_lpt: ++ case e1000_pchlan: ++ case e1000_pch2lan: ++ /* check management mode */ ++ mac->ops.check_mng_mode = e1000_check_mng_mode_pchlan; ++ /* ID LED init */ ++ mac->ops.id_led_init = e1000_id_led_init_pchlan; ++ /* setup LED */ ++ mac->ops.setup_led = e1000_setup_led_pchlan; ++ /* cleanup LED */ ++ mac->ops.cleanup_led = e1000_cleanup_led_pchlan; ++ /* turn on/off LED */ ++ mac->ops.led_on = e1000_led_on_pchlan; ++ mac->ops.led_off = e1000_led_off_pchlan; ++ break; ++ default: ++ break; ++ } ++ ++ if (mac->type == e1000_pch_lpt) { ++ mac->rar_entry_count = E1000_PCH_LPT_RAR_ENTRIES; ++ mac->ops.rar_set = e1000_rar_set_pch_lpt; ++ } ++ ++ /* Enable PCS Lock-loss workaround for ICH8 */ ++ if (mac->type == e1000_ich8lan) ++ e1000e_set_kmrn_lock_loss_workaround_ich8lan(hw, true); ++ ++ /* Gate automatic PHY configuration by hardware on managed ++ * 82579 and i217 ++ */ ++ if ((mac->type == e1000_pch2lan || mac->type == e1000_pch_lpt) && ++ (er32(FWSM) & E1000_ICH_FWSM_FW_VALID)) ++ e1000_gate_hw_phy_config_ich8lan(hw, true); ++ ++ return 0; ++} ++ ++/** ++ * e1000_set_eee_pchlan - Enable/disable EEE support ++ * @hw: pointer to the HW structure ++ * ++ * Enable/disable EEE based on setting in dev_spec structure. The bits in ++ * the LPI Control register will remain set only if/when link is up. ++ **/ ++static s32 e1000_set_eee_pchlan(struct e1000_hw *hw) ++{ ++ struct e1000_dev_spec_ich8lan *dev_spec = &hw->dev_spec.ich8lan; ++ s32 ret_val = 0; ++ u16 phy_reg; ++ ++ if ((hw->phy.type != e1000_phy_82579) && ++ (hw->phy.type != e1000_phy_i217)) ++ return ret_val; ++ ++ ret_val = e1e_rphy(hw, I82579_LPI_CTRL, &phy_reg); ++ if (ret_val) ++ return ret_val; ++ ++ if (dev_spec->eee_disable) ++ phy_reg &= ~I82579_LPI_CTRL_ENABLE_MASK; ++ else ++ phy_reg |= I82579_LPI_CTRL_ENABLE_MASK; ++ ++ ret_val = e1e_wphy(hw, I82579_LPI_CTRL, phy_reg); ++ ++ if (ret_val) ++ return ret_val; ++ ++ if ((hw->phy.type == e1000_phy_i217) && !dev_spec->eee_disable) { ++ /* Save off link partner's EEE ability */ ++ ret_val = hw->phy.ops.acquire(hw); ++ if (ret_val) ++ return ret_val; ++ ret_val = e1e_wphy_locked(hw, I82579_EMI_ADDR, ++ I217_EEE_LP_ABILITY); ++ if (ret_val) ++ goto release; ++ e1e_rphy_locked(hw, I82579_EMI_DATA, &dev_spec->eee_lp_ability); ++ ++ /* EEE is not supported in 100Half, so ignore partner's EEE ++ * in 100 ability if full-duplex is not advertised. ++ */ ++ e1e_rphy_locked(hw, PHY_LP_ABILITY, &phy_reg); ++ if (!(phy_reg & NWAY_LPAR_100TX_FD_CAPS)) ++ dev_spec->eee_lp_ability &= ~I217_EEE_100_SUPPORTED; ++release: ++ hw->phy.ops.release(hw); ++ } ++ ++ return 0; ++} ++ ++/** ++ * e1000_check_for_copper_link_ich8lan - Check for link (Copper) ++ * @hw: pointer to the HW structure ++ * ++ * Checks to see of the link status of the hardware has changed. If a ++ * change in link status has been detected, then we read the PHY registers ++ * to get the current speed/duplex if link exists. ++ **/ ++static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) ++{ ++ struct e1000_mac_info *mac = &hw->mac; ++ s32 ret_val; ++ bool link; ++ u16 phy_reg; ++ ++ /* ++ * We only want to go out to the PHY registers to see if Auto-Neg ++ * has completed and/or if our link status has changed. The ++ * get_link_status flag is set upon receiving a Link Status ++ * Change or Rx Sequence Error interrupt. ++ */ ++ if (!mac->get_link_status) { ++ ret_val = 0; ++ goto out; ++ } ++ ++ /* ++ * First we want to see if the MII Status Register reports ++ * link. If so, then we want to get the current speed/duplex ++ * of the PHY. ++ */ ++ ret_val = e1000e_phy_has_link_generic(hw, 1, 0, &link); ++ if (ret_val) ++ goto out; ++ ++ if (hw->mac.type == e1000_pchlan) { ++ ret_val = e1000_k1_gig_workaround_hv(hw, link); ++ if (ret_val) ++ goto out; ++ } ++ ++ /* Clear link partner's EEE ability */ ++ hw->dev_spec.ich8lan.eee_lp_ability = 0; ++ ++ if (!link) ++ goto out; /* No link detected */ ++ ++ mac->get_link_status = false; ++ ++ switch (hw->mac.type) { ++ case e1000_pch2lan: ++ ret_val = e1000_k1_workaround_lv(hw); ++ if (ret_val) ++ goto out; ++ /* fall-thru */ ++ case e1000_pchlan: ++ if (hw->phy.type == e1000_phy_82578) { ++ ret_val = e1000_link_stall_workaround_hv(hw); ++ if (ret_val) ++ goto out; ++ } ++ ++ /* ++ * Workaround for PCHx parts in half-duplex: ++ * Set the number of preambles removed from the packet ++ * when it is passed from the PHY to the MAC to prevent ++ * the MAC from misinterpreting the packet type. ++ */ ++ e1e_rphy(hw, HV_KMRN_FIFO_CTRLSTA, &phy_reg); ++ phy_reg &= ~HV_KMRN_FIFO_CTRLSTA_PREAMBLE_MASK; ++ ++ if ((er32(STATUS) & E1000_STATUS_FD) != E1000_STATUS_FD) ++ phy_reg |= (1 << HV_KMRN_FIFO_CTRLSTA_PREAMBLE_SHIFT); ++ ++ e1e_wphy(hw, HV_KMRN_FIFO_CTRLSTA, phy_reg); ++ break; ++ default: ++ break; ++ } ++ ++ /* ++ * Check if there was DownShift, must be checked ++ * immediately after link-up ++ */ ++ e1000e_check_downshift(hw); ++ ++ /* Enable/Disable EEE after link up */ ++ ret_val = e1000_set_eee_pchlan(hw); ++ if (ret_val) ++ goto out; ++ ++ /* ++ * If we are forcing speed/duplex, then we simply return since ++ * we have already determined whether we have link or not. ++ */ ++ if (!mac->autoneg) { ++ ret_val = -E1000_ERR_CONFIG; ++ goto out; ++ } ++ ++ /* ++ * Auto-Neg is enabled. Auto Speed Detection takes care ++ * of MAC speed/duplex configuration. So we only need to ++ * configure Collision Distance in the MAC. ++ */ ++ e1000e_config_collision_dist(hw); ++ ++ /* ++ * Configure Flow Control now that Auto-Neg has completed. ++ * First, we need to restore the desired flow control ++ * settings because we may have had to re-autoneg with a ++ * different link partner. ++ */ ++ ret_val = e1000e_config_fc_after_link_up(hw); ++ if (ret_val) ++ e_dbg("Error configuring flow control\n"); ++ ++out: ++ return ret_val; ++} ++ ++static s32 e1000_get_variants_ich8lan(struct e1000_adapter *adapter) ++{ ++ struct e1000_hw *hw = &adapter->hw; ++ s32 rc; ++ ++ rc = e1000_init_mac_params_ich8lan(adapter); ++ if (rc) ++ return rc; ++ ++ rc = e1000_init_nvm_params_ich8lan(hw); ++ if (rc) ++ return rc; ++ ++ switch (hw->mac.type) { ++ case e1000_ich8lan: ++ case e1000_ich9lan: ++ case e1000_ich10lan: ++ rc = e1000_init_phy_params_ich8lan(hw); ++ break; ++ case e1000_pchlan: ++ case e1000_pch2lan: ++ case e1000_pch_lpt: ++ rc = e1000_init_phy_params_pchlan(hw); ++ break; ++ default: ++ break; ++ } ++ if (rc) ++ return rc; ++ ++ /* ++ * Disable Jumbo Frame support on parts with Intel 10/100 PHY or ++ * on parts with MACsec enabled in NVM (reflected in CTRL_EXT). ++ */ ++ if ((adapter->hw.phy.type == e1000_phy_ife) || ++ ((adapter->hw.mac.type >= e1000_pch2lan) && ++ (!(er32(CTRL_EXT) & E1000_CTRL_EXT_LSECCK)))) { ++ adapter->flags &= ~FLAG_HAS_JUMBO_FRAMES; ++ adapter->max_hw_frame_size = ETH_FRAME_LEN + ETH_FCS_LEN; ++ ++ hw->mac.ops.blink_led = NULL; ++ } ++ ++ if ((adapter->hw.mac.type == e1000_ich8lan) && ++ (adapter->hw.phy.type != e1000_phy_ife)) ++ adapter->flags |= FLAG_LSC_GIG_SPEED_DROP; ++ ++ /* Enable workaround for 82579 w/ ME enabled */ ++ if ((adapter->hw.mac.type == e1000_pch2lan) && ++ (er32(FWSM) & E1000_ICH_FWSM_FW_VALID)) ++ adapter->flags2 |= FLAG2_PCIM2PCI_ARBITER_WA; ++ ++ /* Disable EEE by default until IEEE802.3az spec is finalized */ ++ if (adapter->flags2 & FLAG2_HAS_EEE) ++ adapter->hw.dev_spec.ich8lan.eee_disable = true; ++ ++ return 0; ++} ++ ++static DEFINE_MUTEX(nvm_mutex); ++ ++/** ++ * e1000_acquire_nvm_ich8lan - Acquire NVM mutex ++ * @hw: pointer to the HW structure ++ * ++ * Acquires the mutex for performing NVM operations. ++ **/ ++static s32 e1000_acquire_nvm_ich8lan(struct e1000_hw *hw) ++{ ++ mutex_lock(&nvm_mutex); ++ ++ return 0; ++} ++ ++/** ++ * e1000_release_nvm_ich8lan - Release NVM mutex ++ * @hw: pointer to the HW structure ++ * ++ * Releases the mutex used while performing NVM operations. ++ **/ ++static void e1000_release_nvm_ich8lan(struct e1000_hw *hw) ++{ ++ mutex_unlock(&nvm_mutex); ++} ++ ++/** ++ * e1000_acquire_swflag_ich8lan - Acquire software control flag ++ * @hw: pointer to the HW structure ++ * ++ * Acquires the software control flag for performing PHY and select ++ * MAC CSR accesses. ++ **/ ++static s32 e1000_acquire_swflag_ich8lan(struct e1000_hw *hw) ++{ ++ u32 extcnf_ctrl, timeout = PHY_CFG_TIMEOUT; ++ s32 ret_val = 0; ++ ++ if (test_and_set_bit(__E1000_ACCESS_SHARED_RESOURCE, ++ &hw->adapter->state)) { ++ WARN(1, "e1000e: %s: contention for Phy access\n", ++ hw->adapter->netdev->name); ++ return -E1000_ERR_PHY; ++ } ++ ++ while (timeout) { ++ extcnf_ctrl = er32(EXTCNF_CTRL); ++ if (!(extcnf_ctrl & E1000_EXTCNF_CTRL_SWFLAG)) ++ break; ++ ++ mdelay(1); ++ timeout--; ++ } ++ ++ if (!timeout) { ++ e_dbg("SW has already locked the resource.\n"); ++ ret_val = -E1000_ERR_CONFIG; ++ goto out; ++ } ++ ++ timeout = SW_FLAG_TIMEOUT; ++ ++ extcnf_ctrl |= E1000_EXTCNF_CTRL_SWFLAG; ++ ew32(EXTCNF_CTRL, extcnf_ctrl); ++ ++ while (timeout) { ++ extcnf_ctrl = er32(EXTCNF_CTRL); ++ if (extcnf_ctrl & E1000_EXTCNF_CTRL_SWFLAG) ++ break; ++ ++ mdelay(1); ++ timeout--; ++ } ++ ++ if (!timeout) { ++ e_dbg("Failed to acquire the semaphore, FW or HW has it: " ++ "FWSM=0x%8.8x EXTCNF_CTRL=0x%8.8x)\n", ++ er32(FWSM), extcnf_ctrl); ++ extcnf_ctrl &= ~E1000_EXTCNF_CTRL_SWFLAG; ++ ew32(EXTCNF_CTRL, extcnf_ctrl); ++ ret_val = -E1000_ERR_CONFIG; ++ goto out; ++ } ++ ++out: ++ if (ret_val) ++ clear_bit(__E1000_ACCESS_SHARED_RESOURCE, &hw->adapter->state); ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_release_swflag_ich8lan - Release software control flag ++ * @hw: pointer to the HW structure ++ * ++ * Releases the software control flag for performing PHY and select ++ * MAC CSR accesses. ++ **/ ++static void e1000_release_swflag_ich8lan(struct e1000_hw *hw) ++{ ++ u32 extcnf_ctrl; ++ ++ extcnf_ctrl = er32(EXTCNF_CTRL); ++ ++ if (extcnf_ctrl & E1000_EXTCNF_CTRL_SWFLAG) { ++ extcnf_ctrl &= ~E1000_EXTCNF_CTRL_SWFLAG; ++ ew32(EXTCNF_CTRL, extcnf_ctrl); ++ } else { ++ e_dbg("Semaphore unexpectedly released by sw/fw/hw\n"); ++ } ++ ++ clear_bit(__E1000_ACCESS_SHARED_RESOURCE, &hw->adapter->state); ++} ++ ++/** ++ * e1000_check_mng_mode_ich8lan - Checks management mode ++ * @hw: pointer to the HW structure ++ * ++ * This checks if the adapter has any manageability enabled. ++ * This is a function pointer entry point only called by read/write ++ * routines for the PHY and NVM parts. ++ **/ ++static bool e1000_check_mng_mode_ich8lan(struct e1000_hw *hw) ++{ ++ u32 fwsm; ++ ++ fwsm = er32(FWSM); ++ return (fwsm & E1000_ICH_FWSM_FW_VALID) && ++ ((fwsm & E1000_FWSM_MODE_MASK) == ++ (E1000_ICH_MNG_IAMT_MODE << E1000_FWSM_MODE_SHIFT)); ++} ++ ++/** ++ * e1000_check_mng_mode_pchlan - Checks management mode ++ * @hw: pointer to the HW structure ++ * ++ * This checks if the adapter has iAMT enabled. ++ * This is a function pointer entry point only called by read/write ++ * routines for the PHY and NVM parts. ++ **/ ++static bool e1000_check_mng_mode_pchlan(struct e1000_hw *hw) ++{ ++ u32 fwsm; ++ ++ fwsm = er32(FWSM); ++ return (fwsm & E1000_ICH_FWSM_FW_VALID) && ++ (fwsm & (E1000_ICH_MNG_IAMT_MODE << E1000_FWSM_MODE_SHIFT)); ++} ++ ++/** ++ * e1000_rar_set_pch_lpt - Set receive address registers ++ * @hw: pointer to the HW structure ++ * @addr: pointer to the receive address ++ * @index: receive address array register ++ * ++ * Sets the receive address register array at index to the address passed ++ * in by addr. For LPT, RAR[0] is the base address register that is to ++ * contain the MAC address. SHRA[0-10] are the shared receive address ++ * registers that are shared between the Host and manageability engine (ME). ++ **/ ++static void e1000_rar_set_pch_lpt(struct e1000_hw *hw, u8 *addr, u32 index) ++{ ++ u32 rar_low, rar_high; ++ u32 wlock_mac; ++ ++ /* HW expects these in little endian so we reverse the byte order ++ * from network order (big endian) to little endian ++ */ ++ rar_low = ((u32)addr[0] | ((u32)addr[1] << 8) | ++ ((u32)addr[2] << 16) | ((u32)addr[3] << 24)); ++ ++ rar_high = ((u32)addr[4] | ((u32)addr[5] << 8)); ++ ++ /* If MAC address zero, no need to set the AV bit */ ++ if (rar_low || rar_high) ++ rar_high |= E1000_RAH_AV; ++ ++ if (index == 0) { ++ ew32(RAL(index), rar_low); ++ e1e_flush(); ++ ew32(RAH(index), rar_high); ++ e1e_flush(); ++ return; ++ } ++ ++ /* The manageability engine (ME) can lock certain SHRAR registers that ++ * it is using - those registers are unavailable for use. ++ */ ++ if (index < hw->mac.rar_entry_count) { ++ wlock_mac = er32(FWSM) & E1000_FWSM_WLOCK_MAC_MASK; ++ wlock_mac >>= E1000_FWSM_WLOCK_MAC_SHIFT; ++ ++ /* Check if all SHRAR registers are locked */ ++ if (wlock_mac == 1) ++ goto out; ++ ++ if ((wlock_mac == 0) || (index <= wlock_mac)) { ++ s32 ret_val; ++ ++ ret_val = e1000_acquire_swflag_ich8lan(hw); ++ ++ if (ret_val) ++ goto out; ++ ++ ew32(SHRAL_PCH_LPT(index - 1), rar_low); ++ e1e_flush(); ++ ew32(SHRAH_PCH_LPT(index - 1), rar_high); ++ e1e_flush(); ++ ++ e1000_release_swflag_ich8lan(hw); ++ ++ /* verify the register updates */ ++ if ((er32(SHRAL_PCH_LPT(index - 1)) == rar_low) && ++ (er32(SHRAH_PCH_LPT(index - 1)) == rar_high)) ++ return; ++ } ++ } ++ ++out: ++ e_dbg("Failed to write receive address at index %d\n", index); ++} ++ ++/** ++ * e1000_check_reset_block_ich8lan - Check if PHY reset is blocked ++ * @hw: pointer to the HW structure ++ * ++ * Checks if firmware is blocking the reset of the PHY. ++ * This is a function pointer entry point only called by ++ * reset routines. ++ **/ ++static s32 e1000_check_reset_block_ich8lan(struct e1000_hw *hw) ++{ ++ u32 fwsm; ++ ++ fwsm = er32(FWSM); ++ ++ return (fwsm & E1000_ICH_FWSM_RSPCIPHY) ? 0 : E1000_BLK_PHY_RESET; ++} ++ ++/** ++ * e1000_write_smbus_addr - Write SMBus address to PHY needed during Sx states ++ * @hw: pointer to the HW structure ++ * ++ * Assumes semaphore already acquired. ++ * ++ **/ ++static s32 e1000_write_smbus_addr(struct e1000_hw *hw) ++{ ++ u16 phy_data; ++ u32 strap = er32(STRAP); ++ u32 freq = (strap & E1000_STRAP_SMT_FREQ_MASK) >> ++ E1000_STRAP_SMT_FREQ_SHIFT; ++ s32 ret_val = 0; ++ ++ strap &= E1000_STRAP_SMBUS_ADDRESS_MASK; ++ ++ ret_val = e1000_read_phy_reg_hv_locked(hw, HV_SMB_ADDR, &phy_data); ++ if (ret_val) ++ goto out; ++ ++ phy_data &= ~HV_SMB_ADDR_MASK; ++ phy_data |= (strap >> E1000_STRAP_SMBUS_ADDRESS_SHIFT); ++ phy_data |= HV_SMB_ADDR_PEC_EN | HV_SMB_ADDR_VALID; ++ ++ if (hw->phy.type == e1000_phy_i217) { ++ /* Restore SMBus frequency */ ++ if (freq--) { ++ phy_data &= ~HV_SMB_ADDR_FREQ_MASK; ++ phy_data |= (freq & (1 << 0)) << ++ HV_SMB_ADDR_FREQ_LOW_SHIFT; ++ phy_data |= (freq & (1 << 1)) << ++ (HV_SMB_ADDR_FREQ_HIGH_SHIFT - 1); ++ } else { ++ e_dbg("Unsupported SMB frequency in PHY\n"); ++ } ++ } ++ ++ ret_val = e1000_write_phy_reg_hv_locked(hw, HV_SMB_ADDR, phy_data); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_sw_lcd_config_ich8lan - SW-based LCD Configuration ++ * @hw: pointer to the HW structure ++ * ++ * SW should configure the LCD from the NVM extended configuration region ++ * as a workaround for certain parts. ++ **/ ++static s32 e1000_sw_lcd_config_ich8lan(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ u32 i, data, cnf_size, cnf_base_addr, sw_cfg_mask; ++ s32 ret_val = 0; ++ u16 word_addr, reg_data, reg_addr, phy_page = 0; ++ ++ /* ++ * Initialize the PHY from the NVM on ICH platforms. This ++ * is needed due to an issue where the NVM configuration is ++ * not properly autoloaded after power transitions. ++ * Therefore, after each PHY reset, we will load the ++ * configuration data out of the NVM manually. ++ */ ++ switch (hw->mac.type) { ++ case e1000_ich8lan: ++ if (phy->type != e1000_phy_igp_3) ++ return ret_val; ++ ++ if ((hw->adapter->pdev->device == E1000_DEV_ID_ICH8_IGP_AMT) || ++ (hw->adapter->pdev->device == E1000_DEV_ID_ICH8_IGP_C)) { ++ sw_cfg_mask = E1000_FEXTNVM_SW_CONFIG; ++ break; ++ } ++ /* Fall-thru */ ++ case e1000_pchlan: ++ case e1000_pch2lan: ++ case e1000_pch_lpt: ++ sw_cfg_mask = E1000_FEXTNVM_SW_CONFIG_ICH8M; ++ break; ++ default: ++ return ret_val; ++ } ++ ++ ret_val = hw->phy.ops.acquire(hw); ++ if (ret_val) ++ return ret_val; ++ ++ data = er32(FEXTNVM); ++ if (!(data & sw_cfg_mask)) ++ goto out; ++ ++ /* ++ * Make sure HW does not configure LCD from PHY ++ * extended configuration before SW configuration ++ */ ++ data = er32(EXTCNF_CTRL); ++ if ((hw->mac.type < e1000_pch2lan) && ++ (data & E1000_EXTCNF_CTRL_LCD_WRITE_ENABLE)) ++ goto out; ++ ++ cnf_size = er32(EXTCNF_SIZE); ++ cnf_size &= E1000_EXTCNF_SIZE_EXT_PCIE_LENGTH_MASK; ++ cnf_size >>= E1000_EXTCNF_SIZE_EXT_PCIE_LENGTH_SHIFT; ++ if (!cnf_size) ++ goto out; ++ ++ cnf_base_addr = data & E1000_EXTCNF_CTRL_EXT_CNF_POINTER_MASK; ++ cnf_base_addr >>= E1000_EXTCNF_CTRL_EXT_CNF_POINTER_SHIFT; ++ ++ if (((hw->mac.type == e1000_pchlan) && ++ !(data & E1000_EXTCNF_CTRL_OEM_WRITE_ENABLE)) || ++ (hw->mac.type > e1000_pchlan)) { ++ /* ++ * HW configures the SMBus address and LEDs when the ++ * OEM and LCD Write Enable bits are set in the NVM. ++ * When both NVM bits are cleared, SW will configure ++ * them instead. ++ */ ++ ret_val = e1000_write_smbus_addr(hw); ++ if (ret_val) ++ goto out; ++ ++ data = er32(LEDCTL); ++ ret_val = e1000_write_phy_reg_hv_locked(hw, HV_LED_CONFIG, ++ (u16)data); ++ if (ret_val) ++ goto out; ++ } ++ ++ /* Configure LCD from extended configuration region. */ ++ ++ /* cnf_base_addr is in DWORD */ ++ word_addr = (u16)(cnf_base_addr << 1); ++ ++ for (i = 0; i < cnf_size; i++) { ++ ret_val = e1000_read_nvm(hw, (word_addr + i * 2), 1, ++ ®_data); ++ if (ret_val) ++ goto out; ++ ++ ret_val = e1000_read_nvm(hw, (word_addr + i * 2 + 1), ++ 1, ®_addr); ++ if (ret_val) ++ goto out; ++ ++ /* Save off the PHY page for future writes. */ ++ if (reg_addr == IGP01E1000_PHY_PAGE_SELECT) { ++ phy_page = reg_data; ++ continue; ++ } ++ ++ reg_addr &= PHY_REG_MASK; ++ reg_addr |= phy_page; ++ ++ ret_val = phy->ops.write_reg_locked(hw, (u32)reg_addr, ++ reg_data); ++ if (ret_val) ++ goto out; ++ } ++ ++out: ++ hw->phy.ops.release(hw); ++ return ret_val; ++} ++ ++/** ++ * e1000_k1_gig_workaround_hv - K1 Si workaround ++ * @hw: pointer to the HW structure ++ * @link: link up bool flag ++ * ++ * If K1 is enabled for 1Gbps, the MAC might stall when transitioning ++ * from a lower speed. This workaround disables K1 whenever link is at 1Gig ++ * If link is down, the function will restore the default K1 setting located ++ * in the NVM. ++ **/ ++static s32 e1000_k1_gig_workaround_hv(struct e1000_hw *hw, bool link) ++{ ++ s32 ret_val = 0; ++ u16 status_reg = 0; ++ bool k1_enable = hw->dev_spec.ich8lan.nvm_k1_enabled; ++ ++ if (hw->mac.type != e1000_pchlan) ++ goto out; ++ ++ /* Wrap the whole flow with the sw flag */ ++ ret_val = hw->phy.ops.acquire(hw); ++ if (ret_val) ++ goto out; ++ ++ /* Disable K1 when link is 1Gbps, otherwise use the NVM setting */ ++ if (link) { ++ if (hw->phy.type == e1000_phy_82578) { ++ ret_val = hw->phy.ops.read_reg_locked(hw, BM_CS_STATUS, ++ &status_reg); ++ if (ret_val) ++ goto release; ++ ++ status_reg &= BM_CS_STATUS_LINK_UP | ++ BM_CS_STATUS_RESOLVED | ++ BM_CS_STATUS_SPEED_MASK; ++ ++ if (status_reg == (BM_CS_STATUS_LINK_UP | ++ BM_CS_STATUS_RESOLVED | ++ BM_CS_STATUS_SPEED_1000)) ++ k1_enable = false; ++ } ++ ++ if (hw->phy.type == e1000_phy_82577) { ++ ret_val = hw->phy.ops.read_reg_locked(hw, HV_M_STATUS, ++ &status_reg); ++ if (ret_val) ++ goto release; ++ ++ status_reg &= HV_M_STATUS_LINK_UP | ++ HV_M_STATUS_AUTONEG_COMPLETE | ++ HV_M_STATUS_SPEED_MASK; ++ ++ if (status_reg == (HV_M_STATUS_LINK_UP | ++ HV_M_STATUS_AUTONEG_COMPLETE | ++ HV_M_STATUS_SPEED_1000)) ++ k1_enable = false; ++ } ++ ++ /* Link stall fix for link up */ ++ ret_val = hw->phy.ops.write_reg_locked(hw, PHY_REG(770, 19), ++ 0x0100); ++ if (ret_val) ++ goto release; ++ ++ } else { ++ /* Link stall fix for link down */ ++ ret_val = hw->phy.ops.write_reg_locked(hw, PHY_REG(770, 19), ++ 0x4100); ++ if (ret_val) ++ goto release; ++ } ++ ++ ret_val = e1000_configure_k1_ich8lan(hw, k1_enable); ++ ++release: ++ hw->phy.ops.release(hw); ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_configure_k1_ich8lan - Configure K1 power state ++ * @hw: pointer to the HW structure ++ * @enable: K1 state to configure ++ * ++ * Configure the K1 power state based on the provided parameter. ++ * Assumes semaphore already acquired. ++ * ++ * Success returns 0, Failure returns -E1000_ERR_PHY (-2) ++ **/ ++s32 e1000_configure_k1_ich8lan(struct e1000_hw *hw, bool k1_enable) ++{ ++ s32 ret_val = 0; ++ u32 ctrl_reg = 0; ++ u32 ctrl_ext = 0; ++ u32 reg = 0; ++ u16 kmrn_reg = 0; ++ ++ ret_val = e1000e_read_kmrn_reg_locked(hw, ++ E1000_KMRNCTRLSTA_K1_CONFIG, ++ &kmrn_reg); ++ if (ret_val) ++ goto out; ++ ++ if (k1_enable) ++ kmrn_reg |= E1000_KMRNCTRLSTA_K1_ENABLE; ++ else ++ kmrn_reg &= ~E1000_KMRNCTRLSTA_K1_ENABLE; ++ ++ ret_val = e1000e_write_kmrn_reg_locked(hw, ++ E1000_KMRNCTRLSTA_K1_CONFIG, ++ kmrn_reg); ++ if (ret_val) ++ goto out; ++ ++ udelay(20); ++ ctrl_ext = er32(CTRL_EXT); ++ ctrl_reg = er32(CTRL); ++ ++ reg = ctrl_reg & ~(E1000_CTRL_SPD_1000 | E1000_CTRL_SPD_100); ++ reg |= E1000_CTRL_FRCSPD; ++ ew32(CTRL, reg); ++ ++ ew32(CTRL_EXT, ctrl_ext | E1000_CTRL_EXT_SPD_BYPS); ++ e1e_flush(); ++ udelay(20); ++ ew32(CTRL, ctrl_reg); ++ ew32(CTRL_EXT, ctrl_ext); ++ e1e_flush(); ++ udelay(20); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_oem_bits_config_ich8lan - SW-based LCD Configuration ++ * @hw: pointer to the HW structure ++ * @d0_state: boolean if entering d0 or d3 device state ++ * ++ * SW will configure Gbe Disable and LPLU based on the NVM. The four bits are ++ * collectively called OEM bits. The OEM Write Enable bit and SW Config bit ++ * in NVM determines whether HW should configure LPLU and Gbe Disable. ++ **/ ++static s32 e1000_oem_bits_config_ich8lan(struct e1000_hw *hw, bool d0_state) ++{ ++ s32 ret_val = 0; ++ u32 mac_reg; ++ u16 oem_reg; ++ ++ if (hw->mac.type < e1000_pchlan) ++ return ret_val; ++ ++ ret_val = hw->phy.ops.acquire(hw); ++ if (ret_val) ++ return ret_val; ++ ++ if (hw->mac.type == e1000_pchlan) { ++ mac_reg = er32(EXTCNF_CTRL); ++ if (mac_reg & E1000_EXTCNF_CTRL_OEM_WRITE_ENABLE) ++ goto out; ++ } ++ ++ mac_reg = er32(FEXTNVM); ++ if (!(mac_reg & E1000_FEXTNVM_SW_CONFIG_ICH8M)) ++ goto out; ++ ++ mac_reg = er32(PHY_CTRL); ++ ++ ret_val = hw->phy.ops.read_reg_locked(hw, HV_OEM_BITS, &oem_reg); ++ if (ret_val) ++ goto out; ++ ++ oem_reg &= ~(HV_OEM_BITS_GBE_DIS | HV_OEM_BITS_LPLU); ++ ++ if (d0_state) { ++ if (mac_reg & E1000_PHY_CTRL_GBE_DISABLE) ++ oem_reg |= HV_OEM_BITS_GBE_DIS; ++ ++ if (mac_reg & E1000_PHY_CTRL_D0A_LPLU) ++ oem_reg |= HV_OEM_BITS_LPLU; ++ ++ /* Set Restart auto-neg to activate the bits */ ++ if (!e1000_check_reset_block(hw)) ++ oem_reg |= HV_OEM_BITS_RESTART_AN; ++ } else { ++ if (mac_reg & (E1000_PHY_CTRL_GBE_DISABLE | ++ E1000_PHY_CTRL_NOND0A_GBE_DISABLE)) ++ oem_reg |= HV_OEM_BITS_GBE_DIS; ++ ++ if (mac_reg & (E1000_PHY_CTRL_D0A_LPLU | ++ E1000_PHY_CTRL_NOND0A_LPLU)) ++ oem_reg |= HV_OEM_BITS_LPLU; ++ } ++ ++ ret_val = hw->phy.ops.write_reg_locked(hw, HV_OEM_BITS, oem_reg); ++ ++out: ++ hw->phy.ops.release(hw); ++ ++ return ret_val; ++} ++ ++ ++/** ++ * e1000_set_mdio_slow_mode_hv - Set slow MDIO access mode ++ * @hw: pointer to the HW structure ++ **/ ++static s32 e1000_set_mdio_slow_mode_hv(struct e1000_hw *hw) ++{ ++ s32 ret_val; ++ u16 data; ++ ++ ret_val = e1e_rphy(hw, HV_KMRN_MODE_CTRL, &data); ++ if (ret_val) ++ return ret_val; ++ ++ data |= HV_KMRN_MDIO_SLOW; ++ ++ ret_val = e1e_wphy(hw, HV_KMRN_MODE_CTRL, data); ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_hv_phy_workarounds_ich8lan - A series of Phy workarounds to be ++ * done after every PHY reset. ++ **/ ++static s32 e1000_hv_phy_workarounds_ich8lan(struct e1000_hw *hw) ++{ ++ s32 ret_val = 0; ++ u16 phy_data; ++ ++ if (hw->mac.type != e1000_pchlan) ++ return ret_val; ++ ++ /* Set MDIO slow mode before any other MDIO access */ ++ if (hw->phy.type == e1000_phy_82577) { ++ ret_val = e1000_set_mdio_slow_mode_hv(hw); ++ if (ret_val) ++ goto out; ++ } ++ ++ if (((hw->phy.type == e1000_phy_82577) && ++ ((hw->phy.revision == 1) || (hw->phy.revision == 2))) || ++ ((hw->phy.type == e1000_phy_82578) && (hw->phy.revision == 1))) { ++ /* Disable generation of early preamble */ ++ ret_val = e1e_wphy(hw, PHY_REG(769, 25), 0x4431); ++ if (ret_val) ++ return ret_val; ++ ++ /* Preamble tuning for SSC */ ++ ret_val = e1e_wphy(hw, HV_KMRN_FIFO_CTRLSTA, 0xA204); ++ if (ret_val) ++ return ret_val; ++ } ++ ++ if (hw->phy.type == e1000_phy_82578) { ++ /* ++ * Return registers to default by doing a soft reset then ++ * writing 0x3140 to the control register. ++ */ ++ if (hw->phy.revision < 2) { ++ e1000e_phy_sw_reset(hw); ++ ret_val = e1e_wphy(hw, PHY_CONTROL, 0x3140); ++ } ++ } ++ ++ /* Select page 0 */ ++ ret_val = hw->phy.ops.acquire(hw); ++ if (ret_val) ++ return ret_val; ++ ++ hw->phy.addr = 1; ++ ret_val = e1000e_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT, 0); ++ hw->phy.ops.release(hw); ++ if (ret_val) ++ goto out; ++ ++ /* ++ * Configure the K1 Si workaround during phy reset assuming there is ++ * link so that it disables K1 if link is in 1Gbps. ++ */ ++ ret_val = e1000_k1_gig_workaround_hv(hw, true); ++ if (ret_val) ++ goto out; ++ ++ /* Workaround for link disconnects on a busy hub in half duplex */ ++ ret_val = hw->phy.ops.acquire(hw); ++ if (ret_val) ++ goto out; ++ ret_val = hw->phy.ops.read_reg_locked(hw, BM_PORT_GEN_CFG, &phy_data); ++ if (ret_val) ++ goto release; ++ ret_val = hw->phy.ops.write_reg_locked(hw, BM_PORT_GEN_CFG, ++ phy_data & 0x00FF); ++release: ++ hw->phy.ops.release(hw); ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_copy_rx_addrs_to_phy_ich8lan - Copy Rx addresses from MAC to PHY ++ * @hw: pointer to the HW structure ++ **/ ++void e1000_copy_rx_addrs_to_phy_ich8lan(struct e1000_hw *hw) ++{ ++ u32 mac_reg; ++ u16 i, phy_reg = 0; ++ s32 ret_val; ++ ++ ret_val = hw->phy.ops.acquire(hw); ++ if (ret_val) ++ return; ++ ret_val = e1000_enable_phy_wakeup_reg_access_bm(hw, &phy_reg); ++ if (ret_val) ++ goto release; ++ ++ /* Copy both RAL/H (rar_entry_count) and SHRAL/H (+4) to PHY */ ++ for (i = 0; i < (hw->mac.rar_entry_count + 4); i++) { ++ mac_reg = er32(RAL(i)); ++ hw->phy.ops.write_reg_page(hw, BM_RAR_L(i), ++ (u16)(mac_reg & 0xFFFF)); ++ hw->phy.ops.write_reg_page(hw, BM_RAR_M(i), ++ (u16)((mac_reg >> 16) & 0xFFFF)); ++ ++ mac_reg = er32(RAH(i)); ++ hw->phy.ops.write_reg_page(hw, BM_RAR_H(i), ++ (u16)(mac_reg & 0xFFFF)); ++ hw->phy.ops.write_reg_page(hw, BM_RAR_CTRL(i), ++ (u16)((mac_reg & E1000_RAH_AV) ++ >> 16)); ++ } ++ ++ e1000_disable_phy_wakeup_reg_access_bm(hw, &phy_reg); ++ ++release: ++ hw->phy.ops.release(hw); ++} ++ ++/** ++ * e1000_lv_jumbo_workaround_ich8lan - required for jumbo frame operation ++ * with 82579 PHY ++ * @hw: pointer to the HW structure ++ * @enable: flag to enable/disable workaround when enabling/disabling jumbos ++ **/ ++s32 e1000_lv_jumbo_workaround_ich8lan(struct e1000_hw *hw, bool enable) ++{ ++ s32 ret_val = 0; ++ u16 phy_reg, data; ++ u32 mac_reg; ++ u16 i; ++ ++ if (hw->mac.type < e1000_pch2lan) ++ goto out; ++ ++ /* disable Rx path while enabling/disabling workaround */ ++ e1e_rphy(hw, PHY_REG(769, 20), &phy_reg); ++ ret_val = e1e_wphy(hw, PHY_REG(769, 20), phy_reg | (1 << 14)); ++ if (ret_val) ++ goto out; ++ ++ if (enable) { ++ /* ++ * Write Rx addresses (rar_entry_count for RAL/H, +4 for ++ * SHRAL/H) and initial CRC values to the MAC ++ */ ++ for (i = 0; i < (hw->mac.rar_entry_count + 4); i++) { ++ u8 mac_addr[ETH_ALEN] = {0}; ++ u32 addr_high, addr_low; ++ ++ addr_high = er32(RAH(i)); ++ if (!(addr_high & E1000_RAH_AV)) ++ continue; ++ addr_low = er32(RAL(i)); ++ mac_addr[0] = (addr_low & 0xFF); ++ mac_addr[1] = ((addr_low >> 8) & 0xFF); ++ mac_addr[2] = ((addr_low >> 16) & 0xFF); ++ mac_addr[3] = ((addr_low >> 24) & 0xFF); ++ mac_addr[4] = (addr_high & 0xFF); ++ mac_addr[5] = ((addr_high >> 8) & 0xFF); ++ ++ ew32(PCH_RAICC(i), ~ether_crc_le(ETH_ALEN, mac_addr)); ++ } ++ ++ /* Write Rx addresses to the PHY */ ++ e1000_copy_rx_addrs_to_phy_ich8lan(hw); ++ ++ /* Enable jumbo frame workaround in the MAC */ ++ mac_reg = er32(FFLT_DBG); ++ mac_reg &= ~(1 << 14); ++ mac_reg |= (7 << 15); ++ ew32(FFLT_DBG, mac_reg); ++ ++ mac_reg = er32(RCTL); ++ mac_reg |= E1000_RCTL_SECRC; ++ ew32(RCTL, mac_reg); ++ ++ ret_val = e1000e_read_kmrn_reg(hw, ++ E1000_KMRNCTRLSTA_CTRL_OFFSET, ++ &data); ++ if (ret_val) ++ goto out; ++ ret_val = e1000e_write_kmrn_reg(hw, ++ E1000_KMRNCTRLSTA_CTRL_OFFSET, ++ data | (1 << 0)); ++ if (ret_val) ++ goto out; ++ ret_val = e1000e_read_kmrn_reg(hw, ++ E1000_KMRNCTRLSTA_HD_CTRL, ++ &data); ++ if (ret_val) ++ goto out; ++ data &= ~(0xF << 8); ++ data |= (0xB << 8); ++ ret_val = e1000e_write_kmrn_reg(hw, ++ E1000_KMRNCTRLSTA_HD_CTRL, ++ data); ++ if (ret_val) ++ goto out; ++ ++ /* Enable jumbo frame workaround in the PHY */ ++ e1e_rphy(hw, PHY_REG(769, 23), &data); ++ data &= ~(0x7F << 5); ++ data |= (0x37 << 5); ++ ret_val = e1e_wphy(hw, PHY_REG(769, 23), data); ++ if (ret_val) ++ goto out; ++ e1e_rphy(hw, PHY_REG(769, 16), &data); ++ data &= ~(1 << 13); ++ ret_val = e1e_wphy(hw, PHY_REG(769, 16), data); ++ if (ret_val) ++ goto out; ++ e1e_rphy(hw, PHY_REG(776, 20), &data); ++ data &= ~(0x3FF << 2); ++ data |= (0x1A << 2); ++ ret_val = e1e_wphy(hw, PHY_REG(776, 20), data); ++ if (ret_val) ++ goto out; ++ ret_val = e1e_wphy(hw, PHY_REG(776, 23), 0xF100); ++ if (ret_val) ++ goto out; ++ e1e_rphy(hw, HV_PM_CTRL, &data); ++ ret_val = e1e_wphy(hw, HV_PM_CTRL, data | (1 << 10)); ++ if (ret_val) ++ goto out; ++ } else { ++ /* Write MAC register values back to h/w defaults */ ++ mac_reg = er32(FFLT_DBG); ++ mac_reg &= ~(0xF << 14); ++ ew32(FFLT_DBG, mac_reg); ++ ++ mac_reg = er32(RCTL); ++ mac_reg &= ~E1000_RCTL_SECRC; ++ ew32(RCTL, mac_reg); ++ ++ ret_val = e1000e_read_kmrn_reg(hw, ++ E1000_KMRNCTRLSTA_CTRL_OFFSET, ++ &data); ++ if (ret_val) ++ goto out; ++ ret_val = e1000e_write_kmrn_reg(hw, ++ E1000_KMRNCTRLSTA_CTRL_OFFSET, ++ data & ~(1 << 0)); ++ if (ret_val) ++ goto out; ++ ret_val = e1000e_read_kmrn_reg(hw, ++ E1000_KMRNCTRLSTA_HD_CTRL, ++ &data); ++ if (ret_val) ++ goto out; ++ data &= ~(0xF << 8); ++ data |= (0xB << 8); ++ ret_val = e1000e_write_kmrn_reg(hw, ++ E1000_KMRNCTRLSTA_HD_CTRL, ++ data); ++ if (ret_val) ++ goto out; ++ ++ /* Write PHY register values back to h/w defaults */ ++ e1e_rphy(hw, PHY_REG(769, 23), &data); ++ data &= ~(0x7F << 5); ++ ret_val = e1e_wphy(hw, PHY_REG(769, 23), data); ++ if (ret_val) ++ goto out; ++ e1e_rphy(hw, PHY_REG(769, 16), &data); ++ data |= (1 << 13); ++ ret_val = e1e_wphy(hw, PHY_REG(769, 16), data); ++ if (ret_val) ++ goto out; ++ e1e_rphy(hw, PHY_REG(776, 20), &data); ++ data &= ~(0x3FF << 2); ++ data |= (0x8 << 2); ++ ret_val = e1e_wphy(hw, PHY_REG(776, 20), data); ++ if (ret_val) ++ goto out; ++ ret_val = e1e_wphy(hw, PHY_REG(776, 23), 0x7E00); ++ if (ret_val) ++ goto out; ++ e1e_rphy(hw, HV_PM_CTRL, &data); ++ ret_val = e1e_wphy(hw, HV_PM_CTRL, data & ~(1 << 10)); ++ if (ret_val) ++ goto out; ++ } ++ ++ /* re-enable Rx path after enabling/disabling workaround */ ++ ret_val = e1e_wphy(hw, PHY_REG(769, 20), phy_reg & ~(1 << 14)); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_lv_phy_workarounds_ich8lan - A series of Phy workarounds to be ++ * done after every PHY reset. ++ **/ ++static s32 e1000_lv_phy_workarounds_ich8lan(struct e1000_hw *hw) ++{ ++ s32 ret_val = 0; ++ ++ if (hw->mac.type < e1000_pch2lan) ++ goto out; ++ ++ /* Set MDIO slow mode before any other MDIO access */ ++ ret_val = e1000_set_mdio_slow_mode_hv(hw); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_k1_gig_workaround_lv - K1 Si workaround ++ * @hw: pointer to the HW structure ++ * ++ * Workaround to set the K1 beacon duration for 82579 parts ++ **/ ++static s32 e1000_k1_workaround_lv(struct e1000_hw *hw) ++{ ++ s32 ret_val = 0; ++ u16 status_reg = 0; ++ u32 mac_reg; ++ u16 phy_reg; ++ ++ if (hw->mac.type != e1000_pch2lan) ++ goto out; ++ ++ /* Set K1 beacon duration based on 1Gbps speed or otherwise */ ++ ret_val = e1e_rphy(hw, HV_M_STATUS, &status_reg); ++ if (ret_val) ++ goto out; ++ ++ if ((status_reg & (HV_M_STATUS_LINK_UP | HV_M_STATUS_AUTONEG_COMPLETE)) ++ == (HV_M_STATUS_LINK_UP | HV_M_STATUS_AUTONEG_COMPLETE)) { ++ mac_reg = er32(FEXTNVM4); ++ mac_reg &= ~E1000_FEXTNVM4_BEACON_DURATION_MASK; ++ ++ ret_val = e1e_rphy(hw, I82579_LPI_CTRL, &phy_reg); ++ if (ret_val) ++ goto out; ++ ++ if (status_reg & HV_M_STATUS_SPEED_1000) { ++ mac_reg |= E1000_FEXTNVM4_BEACON_DURATION_8USEC; ++ phy_reg &= ~I82579_LPI_CTRL_FORCE_PLL_LOCK_COUNT; ++ } else { ++ mac_reg |= E1000_FEXTNVM4_BEACON_DURATION_16USEC; ++ phy_reg |= I82579_LPI_CTRL_FORCE_PLL_LOCK_COUNT; ++ } ++ ew32(FEXTNVM4, mac_reg); ++ ret_val = e1e_wphy(hw, I82579_LPI_CTRL, phy_reg); ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_gate_hw_phy_config_ich8lan - disable PHY config via hardware ++ * @hw: pointer to the HW structure ++ * @gate: boolean set to true to gate, false to ungate ++ * ++ * Gate/ungate the automatic PHY configuration via hardware; perform ++ * the configuration via software instead. ++ **/ ++static void e1000_gate_hw_phy_config_ich8lan(struct e1000_hw *hw, bool gate) ++{ ++ u32 extcnf_ctrl; ++ ++ if (hw->mac.type != e1000_pch2lan) ++ return; ++ ++ extcnf_ctrl = er32(EXTCNF_CTRL); ++ ++ if (gate) ++ extcnf_ctrl |= E1000_EXTCNF_CTRL_GATE_PHY_CFG; ++ else ++ extcnf_ctrl &= ~E1000_EXTCNF_CTRL_GATE_PHY_CFG; ++ ++ ew32(EXTCNF_CTRL, extcnf_ctrl); ++ return; ++} ++ ++/** ++ * e1000_lan_init_done_ich8lan - Check for PHY config completion ++ * @hw: pointer to the HW structure ++ * ++ * Check the appropriate indication the MAC has finished configuring the ++ * PHY after a software reset. ++ **/ ++static void e1000_lan_init_done_ich8lan(struct e1000_hw *hw) ++{ ++ u32 data, loop = E1000_ICH8_LAN_INIT_TIMEOUT; ++ ++ /* Wait for basic configuration completes before proceeding */ ++ do { ++ data = er32(STATUS); ++ data &= E1000_STATUS_LAN_INIT_DONE; ++ udelay(100); ++ } while ((!data) && --loop); ++ ++ /* ++ * If basic configuration is incomplete before the above loop ++ * count reaches 0, loading the configuration from NVM will ++ * leave the PHY in a bad state possibly resulting in no link. ++ */ ++ if (loop == 0) ++ e_dbg("LAN_INIT_DONE not set, increase timeout\n"); ++ ++ /* Clear the Init Done bit for the next init event */ ++ data = er32(STATUS); ++ data &= ~E1000_STATUS_LAN_INIT_DONE; ++ ew32(STATUS, data); ++} ++ ++/** ++ * e1000_post_phy_reset_ich8lan - Perform steps required after a PHY reset ++ * @hw: pointer to the HW structure ++ **/ ++static s32 e1000_post_phy_reset_ich8lan(struct e1000_hw *hw) ++{ ++ s32 ret_val = 0; ++ u16 reg; ++ ++ if (e1000_check_reset_block(hw)) ++ goto out; ++ ++ /* Allow time for h/w to get to quiescent state after reset */ ++ usleep_range(10000, 20000); ++ ++ /* Perform any necessary post-reset workarounds */ ++ switch (hw->mac.type) { ++ case e1000_pchlan: ++ ret_val = e1000_hv_phy_workarounds_ich8lan(hw); ++ if (ret_val) ++ goto out; ++ break; ++ case e1000_pch2lan: ++ ret_val = e1000_lv_phy_workarounds_ich8lan(hw); ++ if (ret_val) ++ goto out; ++ break; ++ default: ++ break; ++ } ++ ++ /* Clear the host wakeup bit after lcd reset */ ++ if (hw->mac.type >= e1000_pchlan) { ++ e1e_rphy(hw, BM_PORT_GEN_CFG, ®); ++ reg &= ~BM_WUC_HOST_WU_BIT; ++ e1e_wphy(hw, BM_PORT_GEN_CFG, reg); ++ } ++ ++ /* Configure the LCD with the extended configuration region in NVM */ ++ ret_val = e1000_sw_lcd_config_ich8lan(hw); ++ if (ret_val) ++ goto out; ++ ++ /* Configure the LCD with the OEM bits in NVM */ ++ ret_val = e1000_oem_bits_config_ich8lan(hw, true); ++ ++ if (hw->mac.type == e1000_pch2lan) { ++ /* Ungate automatic PHY configuration on non-managed 82579 */ ++ if (!(er32(FWSM) & E1000_ICH_FWSM_FW_VALID)) { ++ usleep_range(10000, 20000); ++ e1000_gate_hw_phy_config_ich8lan(hw, false); ++ } ++ ++ /* Set EEE LPI Update Timer to 200usec */ ++ ret_val = hw->phy.ops.acquire(hw); ++ if (ret_val) ++ goto out; ++ ret_val = hw->phy.ops.write_reg_locked(hw, I82579_EMI_ADDR, ++ I82579_LPI_UPDATE_TIMER); ++ if (ret_val) ++ goto release; ++ ret_val = hw->phy.ops.write_reg_locked(hw, I82579_EMI_DATA, ++ 0x1387); ++release: ++ hw->phy.ops.release(hw); ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_phy_hw_reset_ich8lan - Performs a PHY reset ++ * @hw: pointer to the HW structure ++ * ++ * Resets the PHY ++ * This is a function pointer entry point called by drivers ++ * or other shared routines. ++ **/ ++static s32 e1000_phy_hw_reset_ich8lan(struct e1000_hw *hw) ++{ ++ s32 ret_val = 0; ++ ++ /* Gate automatic PHY configuration by hardware on non-managed 82579 */ ++ if ((hw->mac.type == e1000_pch2lan) && ++ !(er32(FWSM) & E1000_ICH_FWSM_FW_VALID)) ++ e1000_gate_hw_phy_config_ich8lan(hw, true); ++ ++ ret_val = e1000e_phy_hw_reset_generic(hw); ++ if (ret_val) ++ return ret_val; ++ ++ return e1000_post_phy_reset_ich8lan(hw); ++} ++ ++/** ++ * e1000_set_lplu_state_pchlan - Set Low Power Link Up state ++ * @hw: pointer to the HW structure ++ * @active: true to enable LPLU, false to disable ++ * ++ * Sets the LPLU state according to the active flag. For PCH, if OEM write ++ * bit are disabled in the NVM, writing the LPLU bits in the MAC will not set ++ * the phy speed. This function will manually set the LPLU bit and restart ++ * auto-neg as hw would do. D3 and D0 LPLU will call the same function ++ * since it configures the same bit. ++ **/ ++static s32 e1000_set_lplu_state_pchlan(struct e1000_hw *hw, bool active) ++{ ++ s32 ret_val = 0; ++ u16 oem_reg; ++ ++ ret_val = e1e_rphy(hw, HV_OEM_BITS, &oem_reg); ++ if (ret_val) ++ goto out; ++ ++ if (active) ++ oem_reg |= HV_OEM_BITS_LPLU; ++ else ++ oem_reg &= ~HV_OEM_BITS_LPLU; ++ ++ oem_reg |= HV_OEM_BITS_RESTART_AN; ++ ret_val = e1e_wphy(hw, HV_OEM_BITS, oem_reg); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_set_d0_lplu_state_ich8lan - Set Low Power Linkup D0 state ++ * @hw: pointer to the HW structure ++ * @active: true to enable LPLU, false to disable ++ * ++ * Sets the LPLU D0 state according to the active flag. When ++ * activating LPLU this function also disables smart speed ++ * and vice versa. LPLU will not be activated unless the ++ * device autonegotiation advertisement meets standards of ++ * either 10 or 10/100 or 10/100/1000 at all duplexes. ++ * This is a function pointer entry point only called by ++ * PHY setup routines. ++ **/ ++static s32 e1000_set_d0_lplu_state_ich8lan(struct e1000_hw *hw, bool active) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ u32 phy_ctrl; ++ s32 ret_val = 0; ++ u16 data; ++ ++ if (phy->type == e1000_phy_ife) ++ return ret_val; ++ ++ phy_ctrl = er32(PHY_CTRL); ++ ++ if (active) { ++ phy_ctrl |= E1000_PHY_CTRL_D0A_LPLU; ++ ew32(PHY_CTRL, phy_ctrl); ++ ++ if (phy->type != e1000_phy_igp_3) ++ return 0; ++ ++ /* ++ * Call gig speed drop workaround on LPLU before accessing ++ * any PHY registers ++ */ ++ if (hw->mac.type == e1000_ich8lan) ++ e1000e_gig_downshift_workaround_ich8lan(hw); ++ ++ /* When LPLU is enabled, we should disable SmartSpeed */ ++ ret_val = e1e_rphy(hw, IGP01E1000_PHY_PORT_CONFIG, &data); ++ data &= ~IGP01E1000_PSCFR_SMART_SPEED; ++ ret_val = e1e_wphy(hw, IGP01E1000_PHY_PORT_CONFIG, data); ++ if (ret_val) ++ return ret_val; ++ } else { ++ phy_ctrl &= ~E1000_PHY_CTRL_D0A_LPLU; ++ ew32(PHY_CTRL, phy_ctrl); ++ ++ if (phy->type != e1000_phy_igp_3) ++ return 0; ++ ++ /* ++ * LPLU and SmartSpeed are mutually exclusive. LPLU is used ++ * during Dx states where the power conservation is most ++ * important. During driver activity we should enable ++ * SmartSpeed, so performance is maintained. ++ */ ++ if (phy->smart_speed == e1000_smart_speed_on) { ++ ret_val = e1e_rphy(hw, IGP01E1000_PHY_PORT_CONFIG, ++ &data); ++ if (ret_val) ++ return ret_val; ++ ++ data |= IGP01E1000_PSCFR_SMART_SPEED; ++ ret_val = e1e_wphy(hw, IGP01E1000_PHY_PORT_CONFIG, ++ data); ++ if (ret_val) ++ return ret_val; ++ } else if (phy->smart_speed == e1000_smart_speed_off) { ++ ret_val = e1e_rphy(hw, IGP01E1000_PHY_PORT_CONFIG, ++ &data); ++ if (ret_val) ++ return ret_val; ++ ++ data &= ~IGP01E1000_PSCFR_SMART_SPEED; ++ ret_val = e1e_wphy(hw, IGP01E1000_PHY_PORT_CONFIG, ++ data); ++ if (ret_val) ++ return ret_val; ++ } ++ } ++ ++ return 0; ++} ++ ++/** ++ * e1000_set_d3_lplu_state_ich8lan - Set Low Power Linkup D3 state ++ * @hw: pointer to the HW structure ++ * @active: true to enable LPLU, false to disable ++ * ++ * Sets the LPLU D3 state according to the active flag. When ++ * activating LPLU this function also disables smart speed ++ * and vice versa. LPLU will not be activated unless the ++ * device autonegotiation advertisement meets standards of ++ * either 10 or 10/100 or 10/100/1000 at all duplexes. ++ * This is a function pointer entry point only called by ++ * PHY setup routines. ++ **/ ++static s32 e1000_set_d3_lplu_state_ich8lan(struct e1000_hw *hw, bool active) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ u32 phy_ctrl; ++ s32 ret_val; ++ u16 data; ++ ++ phy_ctrl = er32(PHY_CTRL); ++ ++ if (!active) { ++ phy_ctrl &= ~E1000_PHY_CTRL_NOND0A_LPLU; ++ ew32(PHY_CTRL, phy_ctrl); ++ ++ if (phy->type != e1000_phy_igp_3) ++ return 0; ++ ++ /* ++ * LPLU and SmartSpeed are mutually exclusive. LPLU is used ++ * during Dx states where the power conservation is most ++ * important. During driver activity we should enable ++ * SmartSpeed, so performance is maintained. ++ */ ++ if (phy->smart_speed == e1000_smart_speed_on) { ++ ret_val = e1e_rphy(hw, IGP01E1000_PHY_PORT_CONFIG, ++ &data); ++ if (ret_val) ++ return ret_val; ++ ++ data |= IGP01E1000_PSCFR_SMART_SPEED; ++ ret_val = e1e_wphy(hw, IGP01E1000_PHY_PORT_CONFIG, ++ data); ++ if (ret_val) ++ return ret_val; ++ } else if (phy->smart_speed == e1000_smart_speed_off) { ++ ret_val = e1e_rphy(hw, IGP01E1000_PHY_PORT_CONFIG, ++ &data); ++ if (ret_val) ++ return ret_val; ++ ++ data &= ~IGP01E1000_PSCFR_SMART_SPEED; ++ ret_val = e1e_wphy(hw, IGP01E1000_PHY_PORT_CONFIG, ++ data); ++ if (ret_val) ++ return ret_val; ++ } ++ } else if ((phy->autoneg_advertised == E1000_ALL_SPEED_DUPLEX) || ++ (phy->autoneg_advertised == E1000_ALL_NOT_GIG) || ++ (phy->autoneg_advertised == E1000_ALL_10_SPEED)) { ++ phy_ctrl |= E1000_PHY_CTRL_NOND0A_LPLU; ++ ew32(PHY_CTRL, phy_ctrl); ++ ++ if (phy->type != e1000_phy_igp_3) ++ return 0; ++ ++ /* ++ * Call gig speed drop workaround on LPLU before accessing ++ * any PHY registers ++ */ ++ if (hw->mac.type == e1000_ich8lan) ++ e1000e_gig_downshift_workaround_ich8lan(hw); ++ ++ /* When LPLU is enabled, we should disable SmartSpeed */ ++ ret_val = e1e_rphy(hw, IGP01E1000_PHY_PORT_CONFIG, &data); ++ if (ret_val) ++ return ret_val; ++ ++ data &= ~IGP01E1000_PSCFR_SMART_SPEED; ++ ret_val = e1e_wphy(hw, IGP01E1000_PHY_PORT_CONFIG, data); ++ } ++ ++ return 0; ++} ++ ++/** ++ * e1000_valid_nvm_bank_detect_ich8lan - finds out the valid bank 0 or 1 ++ * @hw: pointer to the HW structure ++ * @bank: pointer to the variable that returns the active bank ++ * ++ * Reads signature byte from the NVM using the flash access registers. ++ * Word 0x13 bits 15:14 = 10b indicate a valid signature for that bank. ++ **/ ++static s32 e1000_valid_nvm_bank_detect_ich8lan(struct e1000_hw *hw, u32 *bank) ++{ ++ u32 eecd; ++ struct e1000_nvm_info *nvm = &hw->nvm; ++ u32 bank1_offset = nvm->flash_bank_size * sizeof(u16); ++ u32 act_offset = E1000_ICH_NVM_SIG_WORD * 2 + 1; ++ u8 sig_byte = 0; ++ s32 ret_val = 0; ++ ++ switch (hw->mac.type) { ++ case e1000_ich8lan: ++ case e1000_ich9lan: ++ eecd = er32(EECD); ++ if ((eecd & E1000_EECD_SEC1VAL_VALID_MASK) == ++ E1000_EECD_SEC1VAL_VALID_MASK) { ++ if (eecd & E1000_EECD_SEC1VAL) ++ *bank = 1; ++ else ++ *bank = 0; ++ ++ return 0; ++ } ++ e_dbg("Unable to determine valid NVM bank via EEC - " ++ "reading flash signature\n"); ++ /* fall-thru */ ++ default: ++ /* set bank to 0 in case flash read fails */ ++ *bank = 0; ++ ++ /* Check bank 0 */ ++ ret_val = e1000_read_flash_byte_ich8lan(hw, act_offset, ++ &sig_byte); ++ if (ret_val) ++ return ret_val; ++ if ((sig_byte & E1000_ICH_NVM_VALID_SIG_MASK) == ++ E1000_ICH_NVM_SIG_VALUE) { ++ *bank = 0; ++ return 0; ++ } ++ ++ /* Check bank 1 */ ++ ret_val = e1000_read_flash_byte_ich8lan(hw, act_offset + ++ bank1_offset, ++ &sig_byte); ++ if (ret_val) ++ return ret_val; ++ if ((sig_byte & E1000_ICH_NVM_VALID_SIG_MASK) == ++ E1000_ICH_NVM_SIG_VALUE) { ++ *bank = 1; ++ return 0; ++ } ++ ++ e_dbg("ERROR: No valid NVM bank present\n"); ++ return -E1000_ERR_NVM; ++ } ++ ++ return 0; ++} ++ ++/** ++ * e1000_read_nvm_ich8lan - Read word(s) from the NVM ++ * @hw: pointer to the HW structure ++ * @offset: The offset (in bytes) of the word(s) to read. ++ * @words: Size of data to read in words ++ * @data: Pointer to the word(s) to read at offset. ++ * ++ * Reads a word(s) from the NVM using the flash access registers. ++ **/ ++static s32 e1000_read_nvm_ich8lan(struct e1000_hw *hw, u16 offset, u16 words, ++ u16 *data) ++{ ++ struct e1000_nvm_info *nvm = &hw->nvm; ++ struct e1000_dev_spec_ich8lan *dev_spec = &hw->dev_spec.ich8lan; ++ u32 act_offset; ++ s32 ret_val = 0; ++ u32 bank = 0; ++ u16 i, word; ++ ++ if ((offset >= nvm->word_size) || (words > nvm->word_size - offset) || ++ (words == 0)) { ++ e_dbg("nvm parameter(s) out of bounds\n"); ++ ret_val = -E1000_ERR_NVM; ++ goto out; ++ } ++ ++ nvm->ops.acquire(hw); ++ ++ ret_val = e1000_valid_nvm_bank_detect_ich8lan(hw, &bank); ++ if (ret_val) { ++ e_dbg("Could not detect valid bank, assuming bank 0\n"); ++ bank = 0; ++ } ++ ++ act_offset = (bank) ? nvm->flash_bank_size : 0; ++ act_offset += offset; ++ ++ ret_val = 0; ++ for (i = 0; i < words; i++) { ++ if (dev_spec->shadow_ram[offset+i].modified) { ++ data[i] = dev_spec->shadow_ram[offset+i].value; ++ } else { ++ ret_val = e1000_read_flash_word_ich8lan(hw, ++ act_offset + i, ++ &word); ++ if (ret_val) ++ break; ++ data[i] = word; ++ } ++ } ++ ++ nvm->ops.release(hw); ++ ++out: ++ if (ret_val) ++ e_dbg("NVM read error: %d\n", ret_val); ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_flash_cycle_init_ich8lan - Initialize flash ++ * @hw: pointer to the HW structure ++ * ++ * This function does initial flash setup so that a new read/write/erase cycle ++ * can be started. ++ **/ ++static s32 e1000_flash_cycle_init_ich8lan(struct e1000_hw *hw) ++{ ++ union ich8_hws_flash_status hsfsts; ++ s32 ret_val = -E1000_ERR_NVM; ++ ++ hsfsts.regval = er16flash(ICH_FLASH_HSFSTS); ++ ++ /* Check if the flash descriptor is valid */ ++ if (hsfsts.hsf_status.fldesvalid == 0) { ++ e_dbg("Flash descriptor invalid. " ++ "SW Sequencing must be used.\n"); ++ return -E1000_ERR_NVM; ++ } ++ ++ /* Clear FCERR and DAEL in hw status by writing 1 */ ++ hsfsts.hsf_status.flcerr = 1; ++ hsfsts.hsf_status.dael = 1; ++ ++ ew16flash(ICH_FLASH_HSFSTS, hsfsts.regval); ++ ++ /* ++ * Either we should have a hardware SPI cycle in progress ++ * bit to check against, in order to start a new cycle or ++ * FDONE bit should be changed in the hardware so that it ++ * is 1 after hardware reset, which can then be used as an ++ * indication whether a cycle is in progress or has been ++ * completed. ++ */ ++ ++ if (hsfsts.hsf_status.flcinprog == 0) { ++ /* ++ * There is no cycle running at present, ++ * so we can start a cycle. ++ * Begin by setting Flash Cycle Done. ++ */ ++ hsfsts.hsf_status.flcdone = 1; ++ ew16flash(ICH_FLASH_HSFSTS, hsfsts.regval); ++ ret_val = 0; ++ } else { ++ s32 i = 0; ++ ++ /* ++ * Otherwise poll for sometime so the current ++ * cycle has a chance to end before giving up. ++ */ ++ for (i = 0; i < ICH_FLASH_READ_COMMAND_TIMEOUT; i++) { ++ hsfsts.regval = __er16flash(hw, ICH_FLASH_HSFSTS); ++ if (hsfsts.hsf_status.flcinprog == 0) { ++ ret_val = 0; ++ break; ++ } ++ udelay(1); ++ } ++ if (ret_val == 0) { ++ /* ++ * Successful in waiting for previous cycle to timeout, ++ * now set the Flash Cycle Done. ++ */ ++ hsfsts.hsf_status.flcdone = 1; ++ ew16flash(ICH_FLASH_HSFSTS, hsfsts.regval); ++ } else { ++ e_dbg("Flash controller busy, cannot get access\n"); ++ } ++ } ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_flash_cycle_ich8lan - Starts flash cycle (read/write/erase) ++ * @hw: pointer to the HW structure ++ * @timeout: maximum time to wait for completion ++ * ++ * This function starts a flash cycle and waits for its completion. ++ **/ ++static s32 e1000_flash_cycle_ich8lan(struct e1000_hw *hw, u32 timeout) ++{ ++ union ich8_hws_flash_ctrl hsflctl; ++ union ich8_hws_flash_status hsfsts; ++ s32 ret_val = -E1000_ERR_NVM; ++ u32 i = 0; ++ ++ /* Start a cycle by writing 1 in Flash Cycle Go in Hw Flash Control */ ++ hsflctl.regval = er16flash(ICH_FLASH_HSFCTL); ++ hsflctl.hsf_ctrl.flcgo = 1; ++ ew16flash(ICH_FLASH_HSFCTL, hsflctl.regval); ++ ++ /* wait till FDONE bit is set to 1 */ ++ do { ++ hsfsts.regval = er16flash(ICH_FLASH_HSFSTS); ++ if (hsfsts.hsf_status.flcdone == 1) ++ break; ++ udelay(1); ++ } while (i++ < timeout); ++ ++ if (hsfsts.hsf_status.flcdone == 1 && hsfsts.hsf_status.flcerr == 0) ++ return 0; ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_read_flash_word_ich8lan - Read word from flash ++ * @hw: pointer to the HW structure ++ * @offset: offset to data location ++ * @data: pointer to the location for storing the data ++ * ++ * Reads the flash word at offset into data. Offset is converted ++ * to bytes before read. ++ **/ ++static s32 e1000_read_flash_word_ich8lan(struct e1000_hw *hw, u32 offset, ++ u16 *data) ++{ ++ /* Must convert offset into bytes. */ ++ offset <<= 1; ++ ++ return e1000_read_flash_data_ich8lan(hw, offset, 2, data); ++} ++ ++/** ++ * e1000_read_flash_byte_ich8lan - Read byte from flash ++ * @hw: pointer to the HW structure ++ * @offset: The offset of the byte to read. ++ * @data: Pointer to a byte to store the value read. ++ * ++ * Reads a single byte from the NVM using the flash access registers. ++ **/ ++static s32 e1000_read_flash_byte_ich8lan(struct e1000_hw *hw, u32 offset, ++ u8 *data) ++{ ++ s32 ret_val; ++ u16 word = 0; ++ ++ ret_val = e1000_read_flash_data_ich8lan(hw, offset, 1, &word); ++ if (ret_val) ++ return ret_val; ++ ++ *data = (u8)word; ++ ++ return 0; ++} ++ ++/** ++ * e1000_read_flash_data_ich8lan - Read byte or word from NVM ++ * @hw: pointer to the HW structure ++ * @offset: The offset (in bytes) of the byte or word to read. ++ * @size: Size of data to read, 1=byte 2=word ++ * @data: Pointer to the word to store the value read. ++ * ++ * Reads a byte or word from the NVM using the flash access registers. ++ **/ ++static s32 e1000_read_flash_data_ich8lan(struct e1000_hw *hw, u32 offset, ++ u8 size, u16 *data) ++{ ++ union ich8_hws_flash_status hsfsts; ++ union ich8_hws_flash_ctrl hsflctl; ++ u32 flash_linear_addr; ++ u32 flash_data = 0; ++ s32 ret_val = -E1000_ERR_NVM; ++ u8 count = 0; ++ ++ if (size < 1 || size > 2 || offset > ICH_FLASH_LINEAR_ADDR_MASK) ++ return -E1000_ERR_NVM; ++ ++ flash_linear_addr = (ICH_FLASH_LINEAR_ADDR_MASK & offset) + ++ hw->nvm.flash_base_addr; ++ ++ do { ++ udelay(1); ++ /* Steps */ ++ ret_val = e1000_flash_cycle_init_ich8lan(hw); ++ if (ret_val != 0) ++ break; ++ ++ hsflctl.regval = er16flash(ICH_FLASH_HSFCTL); ++ /* 0b/1b corresponds to 1 or 2 byte size, respectively. */ ++ hsflctl.hsf_ctrl.fldbcount = size - 1; ++ hsflctl.hsf_ctrl.flcycle = ICH_CYCLE_READ; ++ ew16flash(ICH_FLASH_HSFCTL, hsflctl.regval); ++ ++ ew32flash(ICH_FLASH_FADDR, flash_linear_addr); ++ ++ ret_val = e1000_flash_cycle_ich8lan(hw, ++ ICH_FLASH_READ_COMMAND_TIMEOUT); ++ ++ /* ++ * Check if FCERR is set to 1, if set to 1, clear it ++ * and try the whole sequence a few more times, else ++ * read in (shift in) the Flash Data0, the order is ++ * least significant byte first msb to lsb ++ */ ++ if (ret_val == 0) { ++ flash_data = er32flash(ICH_FLASH_FDATA0); ++ if (size == 1) ++ *data = (u8)(flash_data & 0x000000FF); ++ else if (size == 2) ++ *data = (u16)(flash_data & 0x0000FFFF); ++ break; ++ } else { ++ /* ++ * If we've gotten here, then things are probably ++ * completely hosed, but if the error condition is ++ * detected, it won't hurt to give it another try... ++ * ICH_FLASH_CYCLE_REPEAT_COUNT times. ++ */ ++ hsfsts.regval = er16flash(ICH_FLASH_HSFSTS); ++ if (hsfsts.hsf_status.flcerr == 1) { ++ /* Repeat for some time before giving up. */ ++ continue; ++ } else if (hsfsts.hsf_status.flcdone == 0) { ++ e_dbg("Timeout error - flash cycle " ++ "did not complete.\n"); ++ break; ++ } ++ } ++ } while (count++ < ICH_FLASH_CYCLE_REPEAT_COUNT); ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_write_nvm_ich8lan - Write word(s) to the NVM ++ * @hw: pointer to the HW structure ++ * @offset: The offset (in bytes) of the word(s) to write. ++ * @words: Size of data to write in words ++ * @data: Pointer to the word(s) to write at offset. ++ * ++ * Writes a byte or word to the NVM using the flash access registers. ++ **/ ++static s32 e1000_write_nvm_ich8lan(struct e1000_hw *hw, u16 offset, u16 words, ++ u16 *data) ++{ ++ struct e1000_nvm_info *nvm = &hw->nvm; ++ struct e1000_dev_spec_ich8lan *dev_spec = &hw->dev_spec.ich8lan; ++ u16 i; ++ ++ if ((offset >= nvm->word_size) || (words > nvm->word_size - offset) || ++ (words == 0)) { ++ e_dbg("nvm parameter(s) out of bounds\n"); ++ return -E1000_ERR_NVM; ++ } ++ ++ nvm->ops.acquire(hw); ++ ++ for (i = 0; i < words; i++) { ++ dev_spec->shadow_ram[offset+i].modified = true; ++ dev_spec->shadow_ram[offset+i].value = data[i]; ++ } ++ ++ nvm->ops.release(hw); ++ ++ return 0; ++} ++ ++/** ++ * e1000_update_nvm_checksum_ich8lan - Update the checksum for NVM ++ * @hw: pointer to the HW structure ++ * ++ * The NVM checksum is updated by calling the generic update_nvm_checksum, ++ * which writes the checksum to the shadow ram. The changes in the shadow ++ * ram are then committed to the EEPROM by processing each bank at a time ++ * checking for the modified bit and writing only the pending changes. ++ * After a successful commit, the shadow ram is cleared and is ready for ++ * future writes. ++ **/ ++static s32 e1000_update_nvm_checksum_ich8lan(struct e1000_hw *hw) ++{ ++ struct e1000_nvm_info *nvm = &hw->nvm; ++ struct e1000_dev_spec_ich8lan *dev_spec = &hw->dev_spec.ich8lan; ++ u32 i, act_offset, new_bank_offset, old_bank_offset, bank; ++ s32 ret_val; ++ u16 data; ++ ++ ret_val = e1000e_update_nvm_checksum_generic(hw); ++ if (ret_val) ++ goto out; ++ ++ if (nvm->type != e1000_nvm_flash_sw) ++ goto out; ++ ++ nvm->ops.acquire(hw); ++ ++ /* ++ * We're writing to the opposite bank so if we're on bank 1, ++ * write to bank 0 etc. We also need to erase the segment that ++ * is going to be written ++ */ ++ ret_val = e1000_valid_nvm_bank_detect_ich8lan(hw, &bank); ++ if (ret_val) { ++ e_dbg("Could not detect valid bank, assuming bank 0\n"); ++ bank = 0; ++ } ++ ++ if (bank == 0) { ++ new_bank_offset = nvm->flash_bank_size; ++ old_bank_offset = 0; ++ ret_val = e1000_erase_flash_bank_ich8lan(hw, 1); ++ if (ret_val) ++ goto release; ++ } else { ++ old_bank_offset = nvm->flash_bank_size; ++ new_bank_offset = 0; ++ ret_val = e1000_erase_flash_bank_ich8lan(hw, 0); ++ if (ret_val) ++ goto release; ++ } ++ ++ for (i = 0; i < E1000_ICH8_SHADOW_RAM_WORDS; i++) { ++ /* ++ * Determine whether to write the value stored ++ * in the other NVM bank or a modified value stored ++ * in the shadow RAM ++ */ ++ if (dev_spec->shadow_ram[i].modified) { ++ data = dev_spec->shadow_ram[i].value; ++ } else { ++ ret_val = e1000_read_flash_word_ich8lan(hw, i + ++ old_bank_offset, ++ &data); ++ if (ret_val) ++ break; ++ } ++ ++ /* ++ * If the word is 0x13, then make sure the signature bits ++ * (15:14) are 11b until the commit has completed. ++ * This will allow us to write 10b which indicates the ++ * signature is valid. We want to do this after the write ++ * has completed so that we don't mark the segment valid ++ * while the write is still in progress ++ */ ++ if (i == E1000_ICH_NVM_SIG_WORD) ++ data |= E1000_ICH_NVM_SIG_MASK; ++ ++ /* Convert offset to bytes. */ ++ act_offset = (i + new_bank_offset) << 1; ++ ++ udelay(100); ++ /* Write the bytes to the new bank. */ ++ ret_val = e1000_retry_write_flash_byte_ich8lan(hw, ++ act_offset, ++ (u8)data); ++ if (ret_val) ++ break; ++ ++ udelay(100); ++ ret_val = e1000_retry_write_flash_byte_ich8lan(hw, ++ act_offset + 1, ++ (u8)(data >> 8)); ++ if (ret_val) ++ break; ++ } ++ ++ /* ++ * Don't bother writing the segment valid bits if sector ++ * programming failed. ++ */ ++ if (ret_val) { ++ /* Possibly read-only, see e1000e_write_protect_nvm_ich8lan() */ ++ e_dbg("Flash commit failed.\n"); ++ goto release; ++ } ++ ++ /* ++ * Finally validate the new segment by setting bit 15:14 ++ * to 10b in word 0x13 , this can be done without an ++ * erase as well since these bits are 11 to start with ++ * and we need to change bit 14 to 0b ++ */ ++ act_offset = new_bank_offset + E1000_ICH_NVM_SIG_WORD; ++ ret_val = e1000_read_flash_word_ich8lan(hw, act_offset, &data); ++ if (ret_val) ++ goto release; ++ ++ data &= 0xBFFF; ++ ret_val = e1000_retry_write_flash_byte_ich8lan(hw, ++ act_offset * 2 + 1, ++ (u8)(data >> 8)); ++ if (ret_val) ++ goto release; ++ ++ /* ++ * And invalidate the previously valid segment by setting ++ * its signature word (0x13) high_byte to 0b. This can be ++ * done without an erase because flash erase sets all bits ++ * to 1's. We can write 1's to 0's without an erase ++ */ ++ act_offset = (old_bank_offset + E1000_ICH_NVM_SIG_WORD) * 2 + 1; ++ ret_val = e1000_retry_write_flash_byte_ich8lan(hw, act_offset, 0); ++ if (ret_val) ++ goto release; ++ ++ /* Great! Everything worked, we can now clear the cached entries. */ ++ for (i = 0; i < E1000_ICH8_SHADOW_RAM_WORDS; i++) { ++ dev_spec->shadow_ram[i].modified = false; ++ dev_spec->shadow_ram[i].value = 0xFFFF; ++ } ++ ++release: ++ nvm->ops.release(hw); ++ ++ /* ++ * Reload the EEPROM, or else modifications will not appear ++ * until after the next adapter reset. ++ */ ++ if (!ret_val) { ++ e1000e_reload_nvm(hw); ++ usleep_range(10000, 20000); ++ } ++ ++out: ++ if (ret_val) ++ e_dbg("NVM update error: %d\n", ret_val); ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_validate_nvm_checksum_ich8lan - Validate EEPROM checksum ++ * @hw: pointer to the HW structure ++ * ++ * Check to see if checksum needs to be fixed by reading bit 6 in word 0x19. ++ * If the bit is 0, that the EEPROM had been modified, but the checksum was not ++ * calculated, in which case we need to calculate the checksum and set bit 6. ++ **/ ++static s32 e1000_validate_nvm_checksum_ich8lan(struct e1000_hw *hw) ++{ ++ s32 ret_val; ++ u16 data; ++ ++ /* ++ * Read 0x19 and check bit 6. If this bit is 0, the checksum ++ * needs to be fixed. This bit is an indication that the NVM ++ * was prepared by OEM software and did not calculate the ++ * checksum...a likely scenario. ++ */ ++ ret_val = e1000_read_nvm(hw, 0x19, 1, &data); ++ if (ret_val) ++ return ret_val; ++ ++ if ((data & 0x40) == 0) { ++ data |= 0x40; ++ ret_val = e1000_write_nvm(hw, 0x19, 1, &data); ++ if (ret_val) ++ return ret_val; ++ ret_val = e1000e_update_nvm_checksum(hw); ++ if (ret_val) ++ return ret_val; ++ } ++ ++ return e1000e_validate_nvm_checksum_generic(hw); ++} ++ ++/** ++ * e1000e_write_protect_nvm_ich8lan - Make the NVM read-only ++ * @hw: pointer to the HW structure ++ * ++ * To prevent malicious write/erase of the NVM, set it to be read-only ++ * so that the hardware ignores all write/erase cycles of the NVM via ++ * the flash control registers. The shadow-ram copy of the NVM will ++ * still be updated, however any updates to this copy will not stick ++ * across driver reloads. ++ **/ ++void e1000e_write_protect_nvm_ich8lan(struct e1000_hw *hw) ++{ ++ struct e1000_nvm_info *nvm = &hw->nvm; ++ union ich8_flash_protected_range pr0; ++ union ich8_hws_flash_status hsfsts; ++ u32 gfpreg; ++ ++ nvm->ops.acquire(hw); ++ ++ gfpreg = er32flash(ICH_FLASH_GFPREG); ++ ++ /* Write-protect GbE Sector of NVM */ ++ pr0.regval = er32flash(ICH_FLASH_PR0); ++ pr0.range.base = gfpreg & FLASH_GFPREG_BASE_MASK; ++ pr0.range.limit = ((gfpreg >> 16) & FLASH_GFPREG_BASE_MASK); ++ pr0.range.wpe = true; ++ ew32flash(ICH_FLASH_PR0, pr0.regval); ++ ++ /* ++ * Lock down a subset of GbE Flash Control Registers, e.g. ++ * PR0 to prevent the write-protection from being lifted. ++ * Once FLOCKDN is set, the registers protected by it cannot ++ * be written until FLOCKDN is cleared by a hardware reset. ++ */ ++ hsfsts.regval = er16flash(ICH_FLASH_HSFSTS); ++ hsfsts.hsf_status.flockdn = true; ++ ew32flash(ICH_FLASH_HSFSTS, hsfsts.regval); ++ ++ nvm->ops.release(hw); ++} ++ ++/** ++ * e1000_write_flash_data_ich8lan - Writes bytes to the NVM ++ * @hw: pointer to the HW structure ++ * @offset: The offset (in bytes) of the byte/word to read. ++ * @size: Size of data to read, 1=byte 2=word ++ * @data: The byte(s) to write to the NVM. ++ * ++ * Writes one/two bytes to the NVM using the flash access registers. ++ **/ ++static s32 e1000_write_flash_data_ich8lan(struct e1000_hw *hw, u32 offset, ++ u8 size, u16 data) ++{ ++ union ich8_hws_flash_status hsfsts; ++ union ich8_hws_flash_ctrl hsflctl; ++ u32 flash_linear_addr; ++ u32 flash_data = 0; ++ s32 ret_val; ++ u8 count = 0; ++ ++ if (size < 1 || size > 2 || data > size * 0xff || ++ offset > ICH_FLASH_LINEAR_ADDR_MASK) ++ return -E1000_ERR_NVM; ++ ++ flash_linear_addr = (ICH_FLASH_LINEAR_ADDR_MASK & offset) + ++ hw->nvm.flash_base_addr; ++ ++ do { ++ udelay(1); ++ /* Steps */ ++ ret_val = e1000_flash_cycle_init_ich8lan(hw); ++ if (ret_val) ++ break; ++ ++ hsflctl.regval = er16flash(ICH_FLASH_HSFCTL); ++ /* 0b/1b corresponds to 1 or 2 byte size, respectively. */ ++ hsflctl.hsf_ctrl.fldbcount = size -1; ++ hsflctl.hsf_ctrl.flcycle = ICH_CYCLE_WRITE; ++ ew16flash(ICH_FLASH_HSFCTL, hsflctl.regval); ++ ++ ew32flash(ICH_FLASH_FADDR, flash_linear_addr); ++ ++ if (size == 1) ++ flash_data = (u32)data & 0x00FF; ++ else ++ flash_data = (u32)data; ++ ++ ew32flash(ICH_FLASH_FDATA0, flash_data); ++ ++ /* ++ * check if FCERR is set to 1 , if set to 1, clear it ++ * and try the whole sequence a few more times else done ++ */ ++ ret_val = e1000_flash_cycle_ich8lan(hw, ++ ICH_FLASH_WRITE_COMMAND_TIMEOUT); ++ if (!ret_val) ++ break; ++ ++ /* ++ * If we're here, then things are most likely ++ * completely hosed, but if the error condition ++ * is detected, it won't hurt to give it another ++ * try...ICH_FLASH_CYCLE_REPEAT_COUNT times. ++ */ ++ hsfsts.regval = er16flash(ICH_FLASH_HSFSTS); ++ if (hsfsts.hsf_status.flcerr == 1) ++ /* Repeat for some time before giving up. */ ++ continue; ++ if (hsfsts.hsf_status.flcdone == 0) { ++ e_dbg("Timeout error - flash cycle " ++ "did not complete."); ++ break; ++ } ++ } while (count++ < ICH_FLASH_CYCLE_REPEAT_COUNT); ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_write_flash_byte_ich8lan - Write a single byte to NVM ++ * @hw: pointer to the HW structure ++ * @offset: The index of the byte to read. ++ * @data: The byte to write to the NVM. ++ * ++ * Writes a single byte to the NVM using the flash access registers. ++ **/ ++static s32 e1000_write_flash_byte_ich8lan(struct e1000_hw *hw, u32 offset, ++ u8 data) ++{ ++ u16 word = (u16)data; ++ ++ return e1000_write_flash_data_ich8lan(hw, offset, 1, word); ++} ++ ++/** ++ * e1000_retry_write_flash_byte_ich8lan - Writes a single byte to NVM ++ * @hw: pointer to the HW structure ++ * @offset: The offset of the byte to write. ++ * @byte: The byte to write to the NVM. ++ * ++ * Writes a single byte to the NVM using the flash access registers. ++ * Goes through a retry algorithm before giving up. ++ **/ ++static s32 e1000_retry_write_flash_byte_ich8lan(struct e1000_hw *hw, ++ u32 offset, u8 byte) ++{ ++ s32 ret_val; ++ u16 program_retries; ++ ++ ret_val = e1000_write_flash_byte_ich8lan(hw, offset, byte); ++ if (!ret_val) ++ return ret_val; ++ ++ for (program_retries = 0; program_retries < 100; program_retries++) { ++ e_dbg("Retrying Byte %2.2X at offset %u\n", byte, offset); ++ udelay(100); ++ ret_val = e1000_write_flash_byte_ich8lan(hw, offset, byte); ++ if (!ret_val) ++ break; ++ } ++ if (program_retries == 100) ++ return -E1000_ERR_NVM; ++ ++ return 0; ++} ++ ++/** ++ * e1000_erase_flash_bank_ich8lan - Erase a bank (4k) from NVM ++ * @hw: pointer to the HW structure ++ * @bank: 0 for first bank, 1 for second bank, etc. ++ * ++ * Erases the bank specified. Each bank is a 4k block. Banks are 0 based. ++ * bank N is 4096 * N + flash_reg_addr. ++ **/ ++static s32 e1000_erase_flash_bank_ich8lan(struct e1000_hw *hw, u32 bank) ++{ ++ struct e1000_nvm_info *nvm = &hw->nvm; ++ union ich8_hws_flash_status hsfsts; ++ union ich8_hws_flash_ctrl hsflctl; ++ u32 flash_linear_addr; ++ /* bank size is in 16bit words - adjust to bytes */ ++ u32 flash_bank_size = nvm->flash_bank_size * 2; ++ s32 ret_val; ++ s32 count = 0; ++ s32 j, iteration, sector_size; ++ ++ hsfsts.regval = er16flash(ICH_FLASH_HSFSTS); ++ ++ /* ++ * Determine HW Sector size: Read BERASE bits of hw flash status ++ * register ++ * 00: The Hw sector is 256 bytes, hence we need to erase 16 ++ * consecutive sectors. The start index for the nth Hw sector ++ * can be calculated as = bank * 4096 + n * 256 ++ * 01: The Hw sector is 4K bytes, hence we need to erase 1 sector. ++ * The start index for the nth Hw sector can be calculated ++ * as = bank * 4096 ++ * 10: The Hw sector is 8K bytes, nth sector = bank * 8192 ++ * (ich9 only, otherwise error condition) ++ * 11: The Hw sector is 64K bytes, nth sector = bank * 65536 ++ */ ++ switch (hsfsts.hsf_status.berasesz) { ++ case 0: ++ /* Hw sector size 256 */ ++ sector_size = ICH_FLASH_SEG_SIZE_256; ++ iteration = flash_bank_size / ICH_FLASH_SEG_SIZE_256; ++ break; ++ case 1: ++ sector_size = ICH_FLASH_SEG_SIZE_4K; ++ iteration = 1; ++ break; ++ case 2: ++ sector_size = ICH_FLASH_SEG_SIZE_8K; ++ iteration = 1; ++ break; ++ case 3: ++ sector_size = ICH_FLASH_SEG_SIZE_64K; ++ iteration = 1; ++ break; ++ default: ++ return -E1000_ERR_NVM; ++ } ++ ++ /* Start with the base address, then add the sector offset. */ ++ flash_linear_addr = hw->nvm.flash_base_addr; ++ flash_linear_addr += (bank) ? flash_bank_size : 0; ++ ++ for (j = 0; j < iteration ; j++) { ++ do { ++ /* Steps */ ++ ret_val = e1000_flash_cycle_init_ich8lan(hw); ++ if (ret_val) ++ return ret_val; ++ ++ /* ++ * Write a value 11 (block Erase) in Flash ++ * Cycle field in hw flash control ++ */ ++ hsflctl.regval = er16flash(ICH_FLASH_HSFCTL); ++ hsflctl.hsf_ctrl.flcycle = ICH_CYCLE_ERASE; ++ ew16flash(ICH_FLASH_HSFCTL, hsflctl.regval); ++ ++ /* ++ * Write the last 24 bits of an index within the ++ * block into Flash Linear address field in Flash ++ * Address. ++ */ ++ flash_linear_addr += (j * sector_size); ++ ew32flash(ICH_FLASH_FADDR, flash_linear_addr); ++ ++ ret_val = e1000_flash_cycle_ich8lan(hw, ++ ICH_FLASH_ERASE_COMMAND_TIMEOUT); ++ if (ret_val == 0) ++ break; ++ ++ /* ++ * Check if FCERR is set to 1. If 1, ++ * clear it and try the whole sequence ++ * a few more times else Done ++ */ ++ hsfsts.regval = er16flash(ICH_FLASH_HSFSTS); ++ if (hsfsts.hsf_status.flcerr == 1) ++ /* repeat for some time before giving up */ ++ continue; ++ else if (hsfsts.hsf_status.flcdone == 0) ++ return ret_val; ++ } while (++count < ICH_FLASH_CYCLE_REPEAT_COUNT); ++ } ++ ++ return 0; ++} ++ ++/** ++ * e1000_valid_led_default_ich8lan - Set the default LED settings ++ * @hw: pointer to the HW structure ++ * @data: Pointer to the LED settings ++ * ++ * Reads the LED default settings from the NVM to data. If the NVM LED ++ * settings is all 0's or F's, set the LED default to a valid LED default ++ * setting. ++ **/ ++static s32 e1000_valid_led_default_ich8lan(struct e1000_hw *hw, u16 *data) ++{ ++ s32 ret_val; ++ ++ ret_val = e1000_read_nvm(hw, NVM_ID_LED_SETTINGS, 1, data); ++ if (ret_val) { ++ e_dbg("NVM Read Error\n"); ++ return ret_val; ++ } ++ ++ if (*data == ID_LED_RESERVED_0000 || ++ *data == ID_LED_RESERVED_FFFF) ++ *data = ID_LED_DEFAULT_ICH8LAN; ++ ++ return 0; ++} ++ ++/** ++ * e1000_id_led_init_pchlan - store LED configurations ++ * @hw: pointer to the HW structure ++ * ++ * PCH does not control LEDs via the LEDCTL register, rather it uses ++ * the PHY LED configuration register. ++ * ++ * PCH also does not have an "always on" or "always off" mode which ++ * complicates the ID feature. Instead of using the "on" mode to indicate ++ * in ledctl_mode2 the LEDs to use for ID (see e1000e_id_led_init()), ++ * use "link_up" mode. The LEDs will still ID on request if there is no ++ * link based on logic in e1000_led_[on|off]_pchlan(). ++ **/ ++static s32 e1000_id_led_init_pchlan(struct e1000_hw *hw) ++{ ++ struct e1000_mac_info *mac = &hw->mac; ++ s32 ret_val; ++ const u32 ledctl_on = E1000_LEDCTL_MODE_LINK_UP; ++ const u32 ledctl_off = E1000_LEDCTL_MODE_LINK_UP | E1000_PHY_LED0_IVRT; ++ u16 data, i, temp, shift; ++ ++ /* Get default ID LED modes */ ++ ret_val = hw->nvm.ops.valid_led_default(hw, &data); ++ if (ret_val) ++ goto out; ++ ++ mac->ledctl_default = er32(LEDCTL); ++ mac->ledctl_mode1 = mac->ledctl_default; ++ mac->ledctl_mode2 = mac->ledctl_default; ++ ++ for (i = 0; i < 4; i++) { ++ temp = (data >> (i << 2)) & E1000_LEDCTL_LED0_MODE_MASK; ++ shift = (i * 5); ++ switch (temp) { ++ case ID_LED_ON1_DEF2: ++ case ID_LED_ON1_ON2: ++ case ID_LED_ON1_OFF2: ++ mac->ledctl_mode1 &= ~(E1000_PHY_LED0_MASK << shift); ++ mac->ledctl_mode1 |= (ledctl_on << shift); ++ break; ++ case ID_LED_OFF1_DEF2: ++ case ID_LED_OFF1_ON2: ++ case ID_LED_OFF1_OFF2: ++ mac->ledctl_mode1 &= ~(E1000_PHY_LED0_MASK << shift); ++ mac->ledctl_mode1 |= (ledctl_off << shift); ++ break; ++ default: ++ /* Do nothing */ ++ break; ++ } ++ switch (temp) { ++ case ID_LED_DEF1_ON2: ++ case ID_LED_ON1_ON2: ++ case ID_LED_OFF1_ON2: ++ mac->ledctl_mode2 &= ~(E1000_PHY_LED0_MASK << shift); ++ mac->ledctl_mode2 |= (ledctl_on << shift); ++ break; ++ case ID_LED_DEF1_OFF2: ++ case ID_LED_ON1_OFF2: ++ case ID_LED_OFF1_OFF2: ++ mac->ledctl_mode2 &= ~(E1000_PHY_LED0_MASK << shift); ++ mac->ledctl_mode2 |= (ledctl_off << shift); ++ break; ++ default: ++ /* Do nothing */ ++ break; ++ } ++ } ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_get_bus_info_ich8lan - Get/Set the bus type and width ++ * @hw: pointer to the HW structure ++ * ++ * ICH8 use the PCI Express bus, but does not contain a PCI Express Capability ++ * register, so the the bus width is hard coded. ++ **/ ++static s32 e1000_get_bus_info_ich8lan(struct e1000_hw *hw) ++{ ++ struct e1000_bus_info *bus = &hw->bus; ++ s32 ret_val; ++ ++ ret_val = e1000e_get_bus_info_pcie(hw); ++ ++ /* ++ * ICH devices are "PCI Express"-ish. They have ++ * a configuration space, but do not contain ++ * PCI Express Capability registers, so bus width ++ * must be hardcoded. ++ */ ++ if (bus->width == e1000_bus_width_unknown) ++ bus->width = e1000_bus_width_pcie_x1; ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_reset_hw_ich8lan - Reset the hardware ++ * @hw: pointer to the HW structure ++ * ++ * Does a full reset of the hardware which includes a reset of the PHY and ++ * MAC. ++ **/ ++static s32 e1000_reset_hw_ich8lan(struct e1000_hw *hw) ++{ ++ struct e1000_dev_spec_ich8lan *dev_spec = &hw->dev_spec.ich8lan; ++ u16 reg; ++ u32 ctrl, kab; ++ s32 ret_val; ++ ++ /* ++ * Prevent the PCI-E bus from sticking if there is no TLP connection ++ * on the last TLP read/write transaction when MAC is reset. ++ */ ++ ret_val = e1000e_disable_pcie_master(hw); ++ if (ret_val) ++ e_dbg("PCI-E Master disable polling has failed.\n"); ++ ++ e_dbg("Masking off all interrupts\n"); ++ ew32(IMC, 0xffffffff); ++ ++ /* ++ * Disable the Transmit and Receive units. Then delay to allow ++ * any pending transactions to complete before we hit the MAC ++ * with the global reset. ++ */ ++ ew32(RCTL, 0); ++ ew32(TCTL, E1000_TCTL_PSP); ++ e1e_flush(); ++ ++ usleep_range(10000, 20000); ++ ++ /* Workaround for ICH8 bit corruption issue in FIFO memory */ ++ if (hw->mac.type == e1000_ich8lan) { ++ /* Set Tx and Rx buffer allocation to 8k apiece. */ ++ ew32(PBA, E1000_PBA_8K); ++ /* Set Packet Buffer Size to 16k. */ ++ ew32(PBS, E1000_PBS_16K); ++ } ++ ++ if (hw->mac.type == e1000_pchlan) { ++ /* Save the NVM K1 bit setting*/ ++ ret_val = e1000_read_nvm(hw, E1000_NVM_K1_CONFIG, 1, ®); ++ if (ret_val) ++ return ret_val; ++ ++ if (reg & E1000_NVM_K1_ENABLE) ++ dev_spec->nvm_k1_enabled = true; ++ else ++ dev_spec->nvm_k1_enabled = false; ++ } ++ ++ ctrl = er32(CTRL); ++ ++ if (!e1000_check_reset_block(hw)) { ++ /* ++ * Full-chip reset requires MAC and PHY reset at the same ++ * time to make sure the interface between MAC and the ++ * external PHY is reset. ++ */ ++ ctrl |= E1000_CTRL_PHY_RST; ++ ++ /* ++ * Gate automatic PHY configuration by hardware on ++ * non-managed 82579 ++ */ ++ if ((hw->mac.type == e1000_pch2lan) && ++ !(er32(FWSM) & E1000_ICH_FWSM_FW_VALID)) ++ e1000_gate_hw_phy_config_ich8lan(hw, true); ++ } ++ ret_val = e1000_acquire_swflag_ich8lan(hw); ++ e_dbg("Issuing a global reset to ich8lan\n"); ++ ew32(CTRL, (ctrl | E1000_CTRL_RST)); ++ /* cannot issue a flush here because it hangs the hardware */ ++ msleep(20); ++ ++ if (!ret_val) ++ clear_bit(__E1000_ACCESS_SHARED_RESOURCE, &hw->adapter->state); ++ ++ if (ctrl & E1000_CTRL_PHY_RST) { ++ ret_val = hw->phy.ops.get_cfg_done(hw); ++ if (ret_val) ++ goto out; ++ ++ ret_val = e1000_post_phy_reset_ich8lan(hw); ++ if (ret_val) ++ goto out; ++ } ++ ++ /* ++ * For PCH, this write will make sure that any noise ++ * will be detected as a CRC error and be dropped rather than show up ++ * as a bad packet to the DMA engine. ++ */ ++ if (hw->mac.type == e1000_pchlan) ++ ew32(CRC_OFFSET, 0x65656565); ++ ++ ew32(IMC, 0xffffffff); ++ er32(ICR); ++ ++ kab = er32(KABGTXD); ++ kab |= E1000_KABGTXD_BGSQLBIAS; ++ ew32(KABGTXD, kab); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_init_hw_ich8lan - Initialize the hardware ++ * @hw: pointer to the HW structure ++ * ++ * Prepares the hardware for transmit and receive by doing the following: ++ * - initialize hardware bits ++ * - initialize LED identification ++ * - setup receive address registers ++ * - setup flow control ++ * - setup transmit descriptors ++ * - clear statistics ++ **/ ++static s32 e1000_init_hw_ich8lan(struct e1000_hw *hw) ++{ ++ struct e1000_mac_info *mac = &hw->mac; ++ u32 ctrl_ext, txdctl, snoop; ++ s32 ret_val; ++ u16 i; ++ ++ e1000_initialize_hw_bits_ich8lan(hw); ++ ++ /* Initialize identification LED */ ++ ret_val = mac->ops.id_led_init(hw); ++ if (ret_val) ++ e_dbg("Error initializing identification LED\n"); ++ /* This is not fatal and we should not stop init due to this */ ++ ++ /* Setup the receive address. */ ++ e1000e_init_rx_addrs(hw, mac->rar_entry_count); ++ ++ /* Zero out the Multicast HASH table */ ++ e_dbg("Zeroing the MTA\n"); ++ for (i = 0; i < mac->mta_reg_count; i++) ++ E1000_WRITE_REG_ARRAY(hw, E1000_MTA, i, 0); ++ ++ /* ++ * The 82578 Rx buffer will stall if wakeup is enabled in host and ++ * the ME. Disable wakeup by clearing the host wakeup bit. ++ * Reset the phy after disabling host wakeup to reset the Rx buffer. ++ */ ++ if (hw->phy.type == e1000_phy_82578) { ++ e1e_rphy(hw, BM_PORT_GEN_CFG, &i); ++ i &= ~BM_WUC_HOST_WU_BIT; ++ e1e_wphy(hw, BM_PORT_GEN_CFG, i); ++ ret_val = e1000_phy_hw_reset_ich8lan(hw); ++ if (ret_val) ++ return ret_val; ++ } ++ ++ /* Setup link and flow control */ ++ ret_val = e1000_setup_link_ich8lan(hw); ++ ++ /* Set the transmit descriptor write-back policy for both queues */ ++ txdctl = er32(TXDCTL(0)); ++ txdctl = (txdctl & ~E1000_TXDCTL_WTHRESH) | ++ E1000_TXDCTL_FULL_TX_DESC_WB; ++ txdctl = (txdctl & ~E1000_TXDCTL_PTHRESH) | ++ E1000_TXDCTL_MAX_TX_DESC_PREFETCH; ++ ew32(TXDCTL(0), txdctl); ++ txdctl = er32(TXDCTL(1)); ++ txdctl = (txdctl & ~E1000_TXDCTL_WTHRESH) | ++ E1000_TXDCTL_FULL_TX_DESC_WB; ++ txdctl = (txdctl & ~E1000_TXDCTL_PTHRESH) | ++ E1000_TXDCTL_MAX_TX_DESC_PREFETCH; ++ ew32(TXDCTL(1), txdctl); ++ ++ /* ++ * ICH8 has opposite polarity of no_snoop bits. ++ * By default, we should use snoop behavior. ++ */ ++ if (mac->type == e1000_ich8lan) ++ snoop = PCIE_ICH8_SNOOP_ALL; ++ else ++ snoop = (u32) ~(PCIE_NO_SNOOP_ALL); ++ e1000e_set_pcie_no_snoop(hw, snoop); ++ ++ ctrl_ext = er32(CTRL_EXT); ++ ctrl_ext |= E1000_CTRL_EXT_RO_DIS; ++ ew32(CTRL_EXT, ctrl_ext); ++ ++ /* ++ * Clear all of the statistics registers (clear on read). It is ++ * important that we do this after we have tried to establish link ++ * because the symbol error count will increment wildly if there ++ * is no link. ++ */ ++ e1000_clear_hw_cntrs_ich8lan(hw); ++ ++ return 0; ++} ++/** ++ * e1000_initialize_hw_bits_ich8lan - Initialize required hardware bits ++ * @hw: pointer to the HW structure ++ * ++ * Sets/Clears required hardware bits necessary for correctly setting up the ++ * hardware for transmit and receive. ++ **/ ++static void e1000_initialize_hw_bits_ich8lan(struct e1000_hw *hw) ++{ ++ u32 reg; ++ ++ /* Extended Device Control */ ++ reg = er32(CTRL_EXT); ++ reg |= (1 << 22); ++ /* Enable PHY low-power state when MAC is at D3 w/o WoL */ ++ if (hw->mac.type >= e1000_pchlan) ++ reg |= E1000_CTRL_EXT_PHYPDEN; ++ ew32(CTRL_EXT, reg); ++ ++ /* Transmit Descriptor Control 0 */ ++ reg = er32(TXDCTL(0)); ++ reg |= (1 << 22); ++ ew32(TXDCTL(0), reg); ++ ++ /* Transmit Descriptor Control 1 */ ++ reg = er32(TXDCTL(1)); ++ reg |= (1 << 22); ++ ew32(TXDCTL(1), reg); ++ ++ /* Transmit Arbitration Control 0 */ ++ reg = er32(TARC(0)); ++ if (hw->mac.type == e1000_ich8lan) ++ reg |= (1 << 28) | (1 << 29); ++ reg |= (1 << 23) | (1 << 24) | (1 << 26) | (1 << 27); ++ ew32(TARC(0), reg); ++ ++ /* Transmit Arbitration Control 1 */ ++ reg = er32(TARC(1)); ++ if (er32(TCTL) & E1000_TCTL_MULR) ++ reg &= ~(1 << 28); ++ else ++ reg |= (1 << 28); ++ reg |= (1 << 24) | (1 << 26) | (1 << 30); ++ ew32(TARC(1), reg); ++ ++ /* Device Status */ ++ if (hw->mac.type == e1000_ich8lan) { ++ reg = er32(STATUS); ++ reg &= ~(1 << 31); ++ ew32(STATUS, reg); ++ } ++ ++ /* ++ * work-around descriptor data corruption issue during nfs v2 udp ++ * traffic, just disable the nfs filtering capability ++ */ ++ reg = er32(RFCTL); ++ reg |= (E1000_RFCTL_NFSW_DIS | E1000_RFCTL_NFSR_DIS); ++ ew32(RFCTL, reg); ++} ++ ++/** ++ * e1000_setup_link_ich8lan - Setup flow control and link settings ++ * @hw: pointer to the HW structure ++ * ++ * Determines which flow control settings to use, then configures flow ++ * control. Calls the appropriate media-specific link configuration ++ * function. Assuming the adapter has a valid link partner, a valid link ++ * should be established. Assumes the hardware has previously been reset ++ * and the transmitter and receiver are not enabled. ++ **/ ++static s32 e1000_setup_link_ich8lan(struct e1000_hw *hw) ++{ ++ s32 ret_val; ++ ++ if (e1000_check_reset_block(hw)) ++ return 0; ++ ++ /* ++ * ICH parts do not have a word in the NVM to determine ++ * the default flow control setting, so we explicitly ++ * set it to full. ++ */ ++ if (hw->fc.requested_mode == e1000_fc_default) { ++ /* Workaround h/w hang when Tx flow control enabled */ ++ if (hw->mac.type == e1000_pchlan) ++ hw->fc.requested_mode = e1000_fc_rx_pause; ++ else ++ hw->fc.requested_mode = e1000_fc_full; ++ } ++ ++ /* ++ * Save off the requested flow control mode for use later. Depending ++ * on the link partner's capabilities, we may or may not use this mode. ++ */ ++ hw->fc.current_mode = hw->fc.requested_mode; ++ ++ e_dbg("After fix-ups FlowControl is now = %x\n", ++ hw->fc.current_mode); ++ ++ /* Continue to configure the copper link. */ ++ ret_val = e1000_setup_copper_link_ich8lan(hw); ++ if (ret_val) ++ return ret_val; ++ ++ ew32(FCTTV, hw->fc.pause_time); ++ if ((hw->phy.type == e1000_phy_82578) || ++ (hw->phy.type == e1000_phy_82579) || ++ (hw->phy.type == e1000_phy_i217) || ++ (hw->phy.type == e1000_phy_82577)) { ++ ew32(FCRTV_PCH, hw->fc.refresh_time); ++ ++ ret_val = e1e_wphy(hw, PHY_REG(BM_PORT_CTRL_PAGE, 27), ++ hw->fc.pause_time); ++ if (ret_val) ++ return ret_val; ++ } ++ ++ return e1000e_set_fc_watermarks(hw); ++} ++ ++/** ++ * e1000_setup_copper_link_ich8lan - Configure MAC/PHY interface ++ * @hw: pointer to the HW structure ++ * ++ * Configures the kumeran interface to the PHY to wait the appropriate time ++ * when polling the PHY, then call the generic setup_copper_link to finish ++ * configuring the copper link. ++ **/ ++static s32 e1000_setup_copper_link_ich8lan(struct e1000_hw *hw) ++{ ++ u32 ctrl; ++ s32 ret_val; ++ u16 reg_data; ++ ++ ctrl = er32(CTRL); ++ ctrl |= E1000_CTRL_SLU; ++ ctrl &= ~(E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX); ++ ew32(CTRL, ctrl); ++ ++ /* ++ * Set the mac to wait the maximum time between each iteration ++ * and increase the max iterations when polling the phy; ++ * this fixes erroneous timeouts at 10Mbps. ++ */ ++ ret_val = e1000e_write_kmrn_reg(hw, E1000_KMRNCTRLSTA_TIMEOUTS, 0xFFFF); ++ if (ret_val) ++ return ret_val; ++ ret_val = e1000e_read_kmrn_reg(hw, E1000_KMRNCTRLSTA_INBAND_PARAM, ++ ®_data); ++ if (ret_val) ++ return ret_val; ++ reg_data |= 0x3F; ++ ret_val = e1000e_write_kmrn_reg(hw, E1000_KMRNCTRLSTA_INBAND_PARAM, ++ reg_data); ++ if (ret_val) ++ return ret_val; ++ ++ switch (hw->phy.type) { ++ case e1000_phy_igp_3: ++ ret_val = e1000e_copper_link_setup_igp(hw); ++ if (ret_val) ++ return ret_val; ++ break; ++ case e1000_phy_bm: ++ case e1000_phy_82578: ++ ret_val = e1000e_copper_link_setup_m88(hw); ++ if (ret_val) ++ return ret_val; ++ break; ++ case e1000_phy_82577: ++ case e1000_phy_82579: ++ case e1000_phy_i217: ++ ret_val = e1000_copper_link_setup_82577(hw); ++ if (ret_val) ++ return ret_val; ++ break; ++ case e1000_phy_ife: ++ ret_val = e1e_rphy(hw, IFE_PHY_MDIX_CONTROL, ®_data); ++ if (ret_val) ++ return ret_val; ++ ++ reg_data &= ~IFE_PMC_AUTO_MDIX; ++ ++ switch (hw->phy.mdix) { ++ case 1: ++ reg_data &= ~IFE_PMC_FORCE_MDIX; ++ break; ++ case 2: ++ reg_data |= IFE_PMC_FORCE_MDIX; ++ break; ++ case 0: ++ default: ++ reg_data |= IFE_PMC_AUTO_MDIX; ++ break; ++ } ++ ret_val = e1e_wphy(hw, IFE_PHY_MDIX_CONTROL, reg_data); ++ if (ret_val) ++ return ret_val; ++ break; ++ default: ++ break; ++ } ++ return e1000e_setup_copper_link(hw); ++} ++ ++/** ++ * e1000_get_link_up_info_ich8lan - Get current link speed and duplex ++ * @hw: pointer to the HW structure ++ * @speed: pointer to store current link speed ++ * @duplex: pointer to store the current link duplex ++ * ++ * Calls the generic get_speed_and_duplex to retrieve the current link ++ * information and then calls the Kumeran lock loss workaround for links at ++ * gigabit speeds. ++ **/ ++static s32 e1000_get_link_up_info_ich8lan(struct e1000_hw *hw, u16 *speed, ++ u16 *duplex) ++{ ++ s32 ret_val; ++ ++ ret_val = e1000e_get_speed_and_duplex_copper(hw, speed, duplex); ++ if (ret_val) ++ return ret_val; ++ ++ if ((hw->mac.type == e1000_ich8lan) && ++ (hw->phy.type == e1000_phy_igp_3) && ++ (*speed == SPEED_1000)) { ++ ret_val = e1000_kmrn_lock_loss_workaround_ich8lan(hw); ++ } ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_kmrn_lock_loss_workaround_ich8lan - Kumeran workaround ++ * @hw: pointer to the HW structure ++ * ++ * Work-around for 82566 Kumeran PCS lock loss: ++ * On link status change (i.e. PCI reset, speed change) and link is up and ++ * speed is gigabit- ++ * 0) if workaround is optionally disabled do nothing ++ * 1) wait 1ms for Kumeran link to come up ++ * 2) check Kumeran Diagnostic register PCS lock loss bit ++ * 3) if not set the link is locked (all is good), otherwise... ++ * 4) reset the PHY ++ * 5) repeat up to 10 times ++ * Note: this is only called for IGP3 copper when speed is 1gb. ++ **/ ++static s32 e1000_kmrn_lock_loss_workaround_ich8lan(struct e1000_hw *hw) ++{ ++ struct e1000_dev_spec_ich8lan *dev_spec = &hw->dev_spec.ich8lan; ++ u32 phy_ctrl; ++ s32 ret_val; ++ u16 i, data; ++ bool link; ++ ++ if (!dev_spec->kmrn_lock_loss_workaround_enabled) ++ return 0; ++ ++ /* ++ * Make sure link is up before proceeding. If not just return. ++ * Attempting this while link is negotiating fouled up link ++ * stability ++ */ ++ ret_val = e1000e_phy_has_link_generic(hw, 1, 0, &link); ++ if (!link) ++ return 0; ++ ++ for (i = 0; i < 10; i++) { ++ /* read once to clear */ ++ ret_val = e1e_rphy(hw, IGP3_KMRN_DIAG, &data); ++ if (ret_val) ++ return ret_val; ++ /* and again to get new status */ ++ ret_val = e1e_rphy(hw, IGP3_KMRN_DIAG, &data); ++ if (ret_val) ++ return ret_val; ++ ++ /* check for PCS lock */ ++ if (!(data & IGP3_KMRN_DIAG_PCS_LOCK_LOSS)) ++ return 0; ++ ++ /* Issue PHY reset */ ++ e1000_phy_hw_reset(hw); ++ mdelay(5); ++ } ++ /* Disable GigE link negotiation */ ++ phy_ctrl = er32(PHY_CTRL); ++ phy_ctrl |= (E1000_PHY_CTRL_GBE_DISABLE | ++ E1000_PHY_CTRL_NOND0A_GBE_DISABLE); ++ ew32(PHY_CTRL, phy_ctrl); ++ ++ /* ++ * Call gig speed drop workaround on Gig disable before accessing ++ * any PHY registers ++ */ ++ e1000e_gig_downshift_workaround_ich8lan(hw); ++ ++ /* unable to acquire PCS lock */ ++ return -E1000_ERR_PHY; ++} ++ ++/** ++ * e1000_set_kmrn_lock_loss_workaround_ich8lan - Set Kumeran workaround state ++ * @hw: pointer to the HW structure ++ * @state: boolean value used to set the current Kumeran workaround state ++ * ++ * If ICH8, set the current Kumeran workaround state (enabled - true ++ * /disabled - false). ++ **/ ++void e1000e_set_kmrn_lock_loss_workaround_ich8lan(struct e1000_hw *hw, ++ bool state) ++{ ++ struct e1000_dev_spec_ich8lan *dev_spec = &hw->dev_spec.ich8lan; ++ ++ if (hw->mac.type != e1000_ich8lan) { ++ e_dbg("Workaround applies to ICH8 only.\n"); ++ return; ++ } ++ ++ dev_spec->kmrn_lock_loss_workaround_enabled = state; ++} ++ ++/** ++ * e1000_ipg3_phy_powerdown_workaround_ich8lan - Power down workaround on D3 ++ * @hw: pointer to the HW structure ++ * ++ * Workaround for 82566 power-down on D3 entry: ++ * 1) disable gigabit link ++ * 2) write VR power-down enable ++ * 3) read it back ++ * Continue if successful, else issue LCD reset and repeat ++ **/ ++void e1000e_igp3_phy_powerdown_workaround_ich8lan(struct e1000_hw *hw) ++{ ++ u32 reg; ++ u16 data; ++ u8 retry = 0; ++ ++ if (hw->phy.type != e1000_phy_igp_3) ++ return; ++ ++ /* Try the workaround twice (if needed) */ ++ do { ++ /* Disable link */ ++ reg = er32(PHY_CTRL); ++ reg |= (E1000_PHY_CTRL_GBE_DISABLE | ++ E1000_PHY_CTRL_NOND0A_GBE_DISABLE); ++ ew32(PHY_CTRL, reg); ++ ++ /* ++ * Call gig speed drop workaround on Gig disable before ++ * accessing any PHY registers ++ */ ++ if (hw->mac.type == e1000_ich8lan) ++ e1000e_gig_downshift_workaround_ich8lan(hw); ++ ++ /* Write VR power-down enable */ ++ e1e_rphy(hw, IGP3_VR_CTRL, &data); ++ data &= ~IGP3_VR_CTRL_DEV_POWERDOWN_MODE_MASK; ++ e1e_wphy(hw, IGP3_VR_CTRL, data | IGP3_VR_CTRL_MODE_SHUTDOWN); ++ ++ /* Read it back and test */ ++ e1e_rphy(hw, IGP3_VR_CTRL, &data); ++ data &= IGP3_VR_CTRL_DEV_POWERDOWN_MODE_MASK; ++ if ((data == IGP3_VR_CTRL_MODE_SHUTDOWN) || retry) ++ break; ++ ++ /* Issue PHY reset and repeat at most one more time */ ++ reg = er32(CTRL); ++ ew32(CTRL, reg | E1000_CTRL_PHY_RST); ++ retry++; ++ } while (retry); ++} ++ ++/** ++ * e1000e_gig_downshift_workaround_ich8lan - WoL from S5 stops working ++ * @hw: pointer to the HW structure ++ * ++ * Steps to take when dropping from 1Gb/s (eg. link cable removal (LSC), ++ * LPLU, Gig disable, MDIC PHY reset): ++ * 1) Set Kumeran Near-end loopback ++ * 2) Clear Kumeran Near-end loopback ++ * Should only be called for ICH8[m] devices with any 1G Phy. ++ **/ ++void e1000e_gig_downshift_workaround_ich8lan(struct e1000_hw *hw) ++{ ++ s32 ret_val; ++ u16 reg_data; ++ ++ if ((hw->mac.type != e1000_ich8lan) || (hw->phy.type == e1000_phy_ife)) ++ return; ++ ++ ret_val = e1000e_read_kmrn_reg(hw, E1000_KMRNCTRLSTA_DIAG_OFFSET, ++ ®_data); ++ if (ret_val) ++ return; ++ reg_data |= E1000_KMRNCTRLSTA_DIAG_NELPBK; ++ ret_val = e1000e_write_kmrn_reg(hw, E1000_KMRNCTRLSTA_DIAG_OFFSET, ++ reg_data); ++ if (ret_val) ++ return; ++ reg_data &= ~E1000_KMRNCTRLSTA_DIAG_NELPBK; ++ ret_val = e1000e_write_kmrn_reg(hw, E1000_KMRNCTRLSTA_DIAG_OFFSET, ++ reg_data); ++} ++ ++/** ++ * e1000_suspend_workarounds_ich8lan - workarounds needed during S0->Sx ++ * @hw: pointer to the HW structure ++ * ++ * During S0 to Sx transition, it is possible the link remains at gig ++ * instead of negotiating to a lower speed. Before going to Sx, set ++ * 'LPLU Enabled' and 'Gig Disable' to force link speed negotiation ++ * to a lower speed. For PCH and newer parts, the OEM bits PHY register ++ * (LED, GbE disable and LPLU configurations) also needs to be written. ++ * Parts that support (and are linked to a partner which support) EEE in ++ * 100Mbps should disable LPLU since 100Mbps w/ EEE requires less power ++ * than 10Mbps w/o EEE. ++ **/ ++void e1000_suspend_workarounds_ich8lan(struct e1000_hw *hw) ++{ ++ struct e1000_dev_spec_ich8lan *dev_spec = &hw->dev_spec.ich8lan; ++ u32 phy_ctrl; ++ s32 ret_val; ++ ++ phy_ctrl = er32(PHY_CTRL); ++ phy_ctrl |= E1000_PHY_CTRL_D0A_LPLU | E1000_PHY_CTRL_GBE_DISABLE; ++ ++ if (hw->phy.type == e1000_phy_i217) { ++ u16 phy_reg; ++ ++ ret_val = hw->phy.ops.acquire(hw); ++ if (ret_val) ++ goto out; ++ ++ if (!dev_spec->eee_disable) { ++ u16 eee_advert; ++ ++ ret_val = e1e_wphy_locked(hw, I82579_EMI_ADDR, ++ I217_EEE_ADVERTISEMENT); ++ if (ret_val) ++ goto release; ++ e1e_rphy_locked(hw, I82579_EMI_DATA, &eee_advert); ++ ++ /* Disable LPLU if both link partners support 100BaseT ++ * EEE and 100Full is advertised on both ends of the ++ * link. ++ */ ++ if ((eee_advert & I217_EEE_100_SUPPORTED) && ++ (dev_spec->eee_lp_ability & ++ I217_EEE_100_SUPPORTED) && ++ (hw->phy.autoneg_advertised & ADVERTISE_100_FULL)) ++ phy_ctrl &= ~(E1000_PHY_CTRL_D0A_LPLU | ++ E1000_PHY_CTRL_NOND0A_LPLU); ++ } ++ ++ /* For i217 Intel Rapid Start Technology support, ++ * when the system is going into Sx and no manageability engine ++ * is present, the driver must configure proxy to reset only on ++ * power good. LPI (Low Power Idle) state must also reset only ++ * on power good, as well as the MTA (Multicast table array). ++ * The SMBus release must also be disabled on LCD reset. ++ */ ++ if (!(er32(FWSM) & E1000_ICH_FWSM_FW_VALID)) { ++ ++ /* Enable proxy to reset only on power good. */ ++ e1e_rphy_locked(hw, I217_PROXY_CTRL, &phy_reg); ++ phy_reg |= I217_PROXY_CTRL_AUTO_DISABLE; ++ e1e_wphy_locked(hw, I217_PROXY_CTRL, phy_reg); ++ ++ /* Set bit enable LPI (EEE) to reset only on ++ * power good. ++ */ ++ e1e_rphy_locked(hw, I217_SxCTRL, &phy_reg); ++ phy_reg |= I217_SxCTRL_MASK; ++ e1e_wphy_locked(hw, I217_SxCTRL, phy_reg); ++ ++ /* Disable the SMB release on LCD reset. */ ++ e1e_rphy_locked(hw, I217_MEMPWR, &phy_reg); ++ phy_reg &= ~I217_MEMPWR; ++ e1e_wphy_locked(hw, I217_MEMPWR, phy_reg); ++ } ++ ++ /* Enable MTA to reset for Intel Rapid Start Technology ++ * Support ++ */ ++ e1e_rphy_locked(hw, I217_CGFREG, &phy_reg); ++ phy_reg |= I217_CGFREG_MASK; ++ e1e_wphy_locked(hw, I217_CGFREG, phy_reg); ++ ++release: ++ hw->phy.ops.release(hw); ++ } ++out: ++ ew32(PHY_CTRL, phy_ctrl); ++ ++ if (hw->mac.type == e1000_ich8lan) ++ e1000e_gig_downshift_workaround_ich8lan(hw); ++ ++ if (hw->mac.type >= e1000_pchlan) { ++ e1000_oem_bits_config_ich8lan(hw, false); ++ e1000_phy_hw_reset_ich8lan(hw); ++ ret_val = hw->phy.ops.acquire(hw); ++ if (ret_val) ++ return; ++ e1000_write_smbus_addr(hw); ++ hw->phy.ops.release(hw); ++ } ++} ++ ++/** ++ * e1000_resume_workarounds_pchlan - workarounds needed during Sx->S0 ++ * @hw: pointer to the HW structure ++ * ++ * During Sx to S0 transitions on non-managed devices or managed devices ++ * on which PHY resets are not blocked, if the PHY registers cannot be ++ * accessed properly by the s/w toggle the LANPHYPC value to power cycle ++ * the PHY. ++ * On i217, setup Intel Rapid Start Technology. ++ **/ ++void e1000_resume_workarounds_pchlan(struct e1000_hw *hw) ++{ ++ u32 fwsm; ++ ++ if (hw->mac.type != e1000_pch2lan) ++ return; ++ ++ fwsm = er32(FWSM); ++ if (!(fwsm & E1000_ICH_FWSM_FW_VALID) || !e1000_check_reset_block(hw)) { ++ u16 phy_id1, phy_id2; ++ s32 ret_val; ++ ++ ret_val = hw->phy.ops.acquire(hw); ++ if (ret_val) { ++ e_dbg("Failed to acquire PHY semaphore in resume\n"); ++ return; ++ } ++ ++ /* For i217 Intel Rapid Start Technology support when the system ++ * is transitioning from Sx and no manageability engine is present ++ * configure SMBus to restore on reset, disable proxy, and enable ++ * the reset on MTA (Multicast table array). ++ */ ++ if (hw->phy.type == e1000_phy_i217) { ++ u16 phy_reg; ++ ++ ret_val = hw->phy.ops.acquire(hw); ++ if (ret_val) { ++ e_dbg("Failed to setup iRST\n"); ++ return; ++ } ++ ++ if (!(er32(FWSM) & E1000_ICH_FWSM_FW_VALID)) { ++ /* Restore clear on SMB if no manageability engine ++ * is present ++ */ ++ ret_val = e1e_rphy_locked(hw, I217_MEMPWR, &phy_reg); ++ if (ret_val) ++ goto _release; ++ phy_reg |= I217_MEMPWR_MASK; ++ e1e_wphy_locked(hw, I217_MEMPWR, phy_reg); ++ ++ /* Disable Proxy */ ++ e1e_wphy_locked(hw, I217_PROXY_CTRL, 0); ++ } ++ /* Enable reset on MTA */ ++ ret_val = e1e_rphy_locked(hw, I217_CGFREG, &phy_reg); ++ if (ret_val) ++ goto _release; ++ phy_reg &= ~I217_CGFREG_MASK; ++ e1e_wphy_locked(hw, I217_CGFREG, phy_reg); ++ _release: ++ if (ret_val) ++ e_dbg("Error %d in resume workarounds\n", ret_val); ++ hw->phy.ops.release(hw); ++ } ++ ++ /* Test access to the PHY registers by reading the ID regs */ ++ ret_val = hw->phy.ops.read_reg_locked(hw, PHY_ID1, &phy_id1); ++ if (ret_val) ++ goto release; ++ ret_val = hw->phy.ops.read_reg_locked(hw, PHY_ID2, &phy_id2); ++ if (ret_val) ++ goto release; ++ ++ if (hw->phy.id == ((u32)(phy_id1 << 16) | ++ (u32)(phy_id2 & PHY_REVISION_MASK))) ++ goto release; ++ ++ e1000_toggle_lanphypc_value_ich8lan(hw); ++ ++ hw->phy.ops.release(hw); ++ msleep(50); ++ e1000_phy_hw_reset(hw); ++ msleep(50); ++ return; ++ } ++ ++release: ++ hw->phy.ops.release(hw); ++ ++ return; ++} ++ ++/** ++ * e1000_cleanup_led_ich8lan - Restore the default LED operation ++ * @hw: pointer to the HW structure ++ * ++ * Return the LED back to the default configuration. ++ **/ ++static s32 e1000_cleanup_led_ich8lan(struct e1000_hw *hw) ++{ ++ if (hw->phy.type == e1000_phy_ife) ++ return e1e_wphy(hw, IFE_PHY_SPECIAL_CONTROL_LED, 0); ++ ++ ew32(LEDCTL, hw->mac.ledctl_default); ++ return 0; ++} ++ ++/** ++ * e1000_led_on_ich8lan - Turn LEDs on ++ * @hw: pointer to the HW structure ++ * ++ * Turn on the LEDs. ++ **/ ++static s32 e1000_led_on_ich8lan(struct e1000_hw *hw) ++{ ++ if (hw->phy.type == e1000_phy_ife) ++ return e1e_wphy(hw, IFE_PHY_SPECIAL_CONTROL_LED, ++ (IFE_PSCL_PROBE_MODE | IFE_PSCL_PROBE_LEDS_ON)); ++ ++ ew32(LEDCTL, hw->mac.ledctl_mode2); ++ return 0; ++} ++ ++/** ++ * e1000_led_off_ich8lan - Turn LEDs off ++ * @hw: pointer to the HW structure ++ * ++ * Turn off the LEDs. ++ **/ ++static s32 e1000_led_off_ich8lan(struct e1000_hw *hw) ++{ ++ if (hw->phy.type == e1000_phy_ife) ++ return e1e_wphy(hw, IFE_PHY_SPECIAL_CONTROL_LED, ++ (IFE_PSCL_PROBE_MODE | ++ IFE_PSCL_PROBE_LEDS_OFF)); ++ ++ ew32(LEDCTL, hw->mac.ledctl_mode1); ++ return 0; ++} ++ ++/** ++ * e1000_setup_led_pchlan - Configures SW controllable LED ++ * @hw: pointer to the HW structure ++ * ++ * This prepares the SW controllable LED for use. ++ **/ ++static s32 e1000_setup_led_pchlan(struct e1000_hw *hw) ++{ ++ return e1e_wphy(hw, HV_LED_CONFIG, (u16)hw->mac.ledctl_mode1); ++} ++ ++/** ++ * e1000_cleanup_led_pchlan - Restore the default LED operation ++ * @hw: pointer to the HW structure ++ * ++ * Return the LED back to the default configuration. ++ **/ ++static s32 e1000_cleanup_led_pchlan(struct e1000_hw *hw) ++{ ++ return e1e_wphy(hw, HV_LED_CONFIG, (u16)hw->mac.ledctl_default); ++} ++ ++/** ++ * e1000_led_on_pchlan - Turn LEDs on ++ * @hw: pointer to the HW structure ++ * ++ * Turn on the LEDs. ++ **/ ++static s32 e1000_led_on_pchlan(struct e1000_hw *hw) ++{ ++ u16 data = (u16)hw->mac.ledctl_mode2; ++ u32 i, led; ++ ++ /* ++ * If no link, then turn LED on by setting the invert bit ++ * for each LED that's mode is "link_up" in ledctl_mode2. ++ */ ++ if (!(er32(STATUS) & E1000_STATUS_LU)) { ++ for (i = 0; i < 3; i++) { ++ led = (data >> (i * 5)) & E1000_PHY_LED0_MASK; ++ if ((led & E1000_PHY_LED0_MODE_MASK) != ++ E1000_LEDCTL_MODE_LINK_UP) ++ continue; ++ if (led & E1000_PHY_LED0_IVRT) ++ data &= ~(E1000_PHY_LED0_IVRT << (i * 5)); ++ else ++ data |= (E1000_PHY_LED0_IVRT << (i * 5)); ++ } ++ } ++ ++ return e1e_wphy(hw, HV_LED_CONFIG, data); ++} ++ ++/** ++ * e1000_led_off_pchlan - Turn LEDs off ++ * @hw: pointer to the HW structure ++ * ++ * Turn off the LEDs. ++ **/ ++static s32 e1000_led_off_pchlan(struct e1000_hw *hw) ++{ ++ u16 data = (u16)hw->mac.ledctl_mode1; ++ u32 i, led; ++ ++ /* ++ * If no link, then turn LED off by clearing the invert bit ++ * for each LED that's mode is "link_up" in ledctl_mode1. ++ */ ++ if (!(er32(STATUS) & E1000_STATUS_LU)) { ++ for (i = 0; i < 3; i++) { ++ led = (data >> (i * 5)) & E1000_PHY_LED0_MASK; ++ if ((led & E1000_PHY_LED0_MODE_MASK) != ++ E1000_LEDCTL_MODE_LINK_UP) ++ continue; ++ if (led & E1000_PHY_LED0_IVRT) ++ data &= ~(E1000_PHY_LED0_IVRT << (i * 5)); ++ else ++ data |= (E1000_PHY_LED0_IVRT << (i * 5)); ++ } ++ } ++ ++ return e1e_wphy(hw, HV_LED_CONFIG, data); ++} ++ ++/** ++ * e1000_get_cfg_done_ich8lan - Read config done bit after Full or PHY reset ++ * @hw: pointer to the HW structure ++ * ++ * Read appropriate register for the config done bit for completion status ++ * and configure the PHY through s/w for EEPROM-less parts. ++ * ++ * NOTE: some silicon which is EEPROM-less will fail trying to read the ++ * config done bit, so only an error is logged and continues. If we were ++ * to return with error, EEPROM-less silicon would not be able to be reset ++ * or change link. ++ **/ ++static s32 e1000_get_cfg_done_ich8lan(struct e1000_hw *hw) ++{ ++ s32 ret_val = 0; ++ u32 bank = 0; ++ u32 status; ++ ++ e1000e_get_cfg_done(hw); ++ ++ /* Wait for indication from h/w that it has completed basic config */ ++ if (hw->mac.type >= e1000_ich10lan) { ++ e1000_lan_init_done_ich8lan(hw); ++ } else { ++ ret_val = e1000e_get_auto_rd_done(hw); ++ if (ret_val) { ++ /* ++ * When auto config read does not complete, do not ++ * return with an error. This can happen in situations ++ * where there is no eeprom and prevents getting link. ++ */ ++ e_dbg("Auto Read Done did not complete\n"); ++ ret_val = 0; ++ } ++ } ++ ++ /* Clear PHY Reset Asserted bit */ ++ status = er32(STATUS); ++ if (status & E1000_STATUS_PHYRA) ++ ew32(STATUS, status & ~E1000_STATUS_PHYRA); ++ else ++ e_dbg("PHY Reset Asserted not set - needs delay\n"); ++ ++ /* If EEPROM is not marked present, init the IGP 3 PHY manually */ ++ if (hw->mac.type <= e1000_ich9lan) { ++ if (((er32(EECD) & E1000_EECD_PRES) == 0) && ++ (hw->phy.type == e1000_phy_igp_3)) { ++ e1000e_phy_init_script_igp3(hw); ++ } ++ } else { ++ if (e1000_valid_nvm_bank_detect_ich8lan(hw, &bank)) { ++ /* Maybe we should do a basic PHY config */ ++ e_dbg("EEPROM not present\n"); ++ ret_val = -E1000_ERR_CONFIG; ++ } ++ } ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_power_down_phy_copper_ich8lan - Remove link during PHY power down ++ * @hw: pointer to the HW structure ++ * ++ * In the case of a PHY power down to save power, or to turn off link during a ++ * driver unload, or wake on lan is not enabled, remove the link. ++ **/ ++static void e1000_power_down_phy_copper_ich8lan(struct e1000_hw *hw) ++{ ++ /* If the management interface is not enabled, then power down */ ++ if (!(hw->mac.ops.check_mng_mode(hw) || ++ hw->phy.ops.check_reset_block(hw))) ++ e1000_power_down_phy_copper(hw); ++} ++ ++/** ++ * e1000_clear_hw_cntrs_ich8lan - Clear statistical counters ++ * @hw: pointer to the HW structure ++ * ++ * Clears hardware counters specific to the silicon family and calls ++ * clear_hw_cntrs_generic to clear all general purpose counters. ++ **/ ++static void e1000_clear_hw_cntrs_ich8lan(struct e1000_hw *hw) ++{ ++ u16 phy_data; ++ s32 ret_val; ++ ++ e1000e_clear_hw_cntrs_base(hw); ++ ++ er32(ALGNERRC); ++ er32(RXERRC); ++ er32(TNCRS); ++ er32(CEXTERR); ++ er32(TSCTC); ++ er32(TSCTFC); ++ ++ er32(MGTPRC); ++ er32(MGTPDC); ++ er32(MGTPTC); ++ ++ er32(IAC); ++ er32(ICRXOC); ++ ++ /* Clear PHY statistics registers */ ++ if ((hw->phy.type == e1000_phy_82578) || ++ (hw->phy.type == e1000_phy_82579) || ++ (hw->phy.type == e1000_phy_i217) || ++ (hw->phy.type == e1000_phy_82577)) { ++ ret_val = hw->phy.ops.acquire(hw); ++ if (ret_val) ++ return; ++ ret_val = hw->phy.ops.set_page(hw, ++ HV_STATS_PAGE << IGP_PAGE_SHIFT); ++ if (ret_val) ++ goto release; ++ hw->phy.ops.read_reg_page(hw, HV_SCC_UPPER, &phy_data); ++ hw->phy.ops.read_reg_page(hw, HV_SCC_LOWER, &phy_data); ++ hw->phy.ops.read_reg_page(hw, HV_ECOL_UPPER, &phy_data); ++ hw->phy.ops.read_reg_page(hw, HV_ECOL_LOWER, &phy_data); ++ hw->phy.ops.read_reg_page(hw, HV_MCC_UPPER, &phy_data); ++ hw->phy.ops.read_reg_page(hw, HV_MCC_LOWER, &phy_data); ++ hw->phy.ops.read_reg_page(hw, HV_LATECOL_UPPER, &phy_data); ++ hw->phy.ops.read_reg_page(hw, HV_LATECOL_LOWER, &phy_data); ++ hw->phy.ops.read_reg_page(hw, HV_COLC_UPPER, &phy_data); ++ hw->phy.ops.read_reg_page(hw, HV_COLC_LOWER, &phy_data); ++ hw->phy.ops.read_reg_page(hw, HV_DC_UPPER, &phy_data); ++ hw->phy.ops.read_reg_page(hw, HV_DC_LOWER, &phy_data); ++ hw->phy.ops.read_reg_page(hw, HV_TNCRS_UPPER, &phy_data); ++ hw->phy.ops.read_reg_page(hw, HV_TNCRS_LOWER, &phy_data); ++release: ++ hw->phy.ops.release(hw); ++ } ++} ++ ++static const struct e1000_mac_operations ich8_mac_ops = { ++ .id_led_init = e1000e_id_led_init, ++ /* check_mng_mode dependent on mac type */ ++ .check_for_link = e1000_check_for_copper_link_ich8lan, ++ /* cleanup_led dependent on mac type */ ++ .clear_hw_cntrs = e1000_clear_hw_cntrs_ich8lan, ++ .get_bus_info = e1000_get_bus_info_ich8lan, ++ .set_lan_id = e1000_set_lan_id_single_port, ++ .get_link_up_info = e1000_get_link_up_info_ich8lan, ++ /* led_on dependent on mac type */ ++ /* led_off dependent on mac type */ ++ .update_mc_addr_list = e1000e_update_mc_addr_list_generic, ++ .reset_hw = e1000_reset_hw_ich8lan, ++ .init_hw = e1000_init_hw_ich8lan, ++ .setup_link = e1000_setup_link_ich8lan, ++ .setup_physical_interface= e1000_setup_copper_link_ich8lan, ++ /* id_led_init dependent on mac type */ ++}; ++ ++static const struct e1000_phy_operations ich8_phy_ops = { ++ .acquire = e1000_acquire_swflag_ich8lan, ++ .check_reset_block = e1000_check_reset_block_ich8lan, ++ .commit = NULL, ++ .get_cfg_done = e1000_get_cfg_done_ich8lan, ++ .get_cable_length = e1000e_get_cable_length_igp_2, ++ .read_reg = e1000e_read_phy_reg_igp, ++ .release = e1000_release_swflag_ich8lan, ++ .reset = e1000_phy_hw_reset_ich8lan, ++ .set_d0_lplu_state = e1000_set_d0_lplu_state_ich8lan, ++ .set_d3_lplu_state = e1000_set_d3_lplu_state_ich8lan, ++ .write_reg = e1000e_write_phy_reg_igp, ++}; ++ ++static const struct e1000_nvm_operations ich8_nvm_ops = { ++ .acquire = e1000_acquire_nvm_ich8lan, ++ .read = e1000_read_nvm_ich8lan, ++ .release = e1000_release_nvm_ich8lan, ++ .update = e1000_update_nvm_checksum_ich8lan, ++ .valid_led_default = e1000_valid_led_default_ich8lan, ++ .validate = e1000_validate_nvm_checksum_ich8lan, ++ .write = e1000_write_nvm_ich8lan, ++}; ++ ++const struct e1000_info e1000_ich8_info = { ++ .mac = e1000_ich8lan, ++ .flags = FLAG_HAS_WOL ++ | FLAG_IS_ICH ++ | FLAG_HAS_CTRLEXT_ON_LOAD ++ | FLAG_HAS_AMT ++ | FLAG_HAS_FLASH ++ | FLAG_APME_IN_WUC, ++ .pba = 8, ++ .max_hw_frame_size = ETH_FRAME_LEN + ETH_FCS_LEN, ++ .get_variants = e1000_get_variants_ich8lan, ++ .mac_ops = &ich8_mac_ops, ++ .phy_ops = &ich8_phy_ops, ++ .nvm_ops = &ich8_nvm_ops, ++}; ++ ++const struct e1000_info e1000_ich9_info = { ++ .mac = e1000_ich9lan, ++ .flags = FLAG_HAS_JUMBO_FRAMES ++ | FLAG_IS_ICH ++ | FLAG_HAS_WOL ++ | FLAG_HAS_CTRLEXT_ON_LOAD ++ | FLAG_HAS_AMT ++ | FLAG_HAS_ERT ++ | FLAG_HAS_FLASH ++ | FLAG_APME_IN_WUC, ++ .pba = 10, ++ .max_hw_frame_size = DEFAULT_JUMBO, ++ .get_variants = e1000_get_variants_ich8lan, ++ .mac_ops = &ich8_mac_ops, ++ .phy_ops = &ich8_phy_ops, ++ .nvm_ops = &ich8_nvm_ops, ++}; ++ ++const struct e1000_info e1000_ich10_info = { ++ .mac = e1000_ich10lan, ++ .flags = FLAG_HAS_JUMBO_FRAMES ++ | FLAG_IS_ICH ++ | FLAG_HAS_WOL ++ | FLAG_HAS_CTRLEXT_ON_LOAD ++ | FLAG_HAS_AMT ++ | FLAG_HAS_ERT ++ | FLAG_HAS_FLASH ++ | FLAG_APME_IN_WUC, ++ .pba = 10, ++ .max_hw_frame_size = DEFAULT_JUMBO, ++ .get_variants = e1000_get_variants_ich8lan, ++ .mac_ops = &ich8_mac_ops, ++ .phy_ops = &ich8_phy_ops, ++ .nvm_ops = &ich8_nvm_ops, ++}; ++ ++const struct e1000_info e1000_pch_info = { ++ .mac = e1000_pchlan, ++ .flags = FLAG_IS_ICH ++ | FLAG_HAS_WOL ++ | FLAG_HAS_CTRLEXT_ON_LOAD ++ | FLAG_HAS_AMT ++ | FLAG_HAS_FLASH ++ | FLAG_HAS_JUMBO_FRAMES ++ | FLAG_DISABLE_FC_PAUSE_TIME /* errata */ ++ | FLAG_APME_IN_WUC, ++ .flags2 = FLAG2_HAS_PHY_STATS, ++ .pba = 26, ++ .max_hw_frame_size = 4096, ++ .get_variants = e1000_get_variants_ich8lan, ++ .mac_ops = &ich8_mac_ops, ++ .phy_ops = &ich8_phy_ops, ++ .nvm_ops = &ich8_nvm_ops, ++}; ++ ++const struct e1000_info e1000_pch2_info = { ++ .mac = e1000_pch2lan, ++ .flags = FLAG_IS_ICH ++ | FLAG_HAS_WOL ++ | FLAG_HAS_CTRLEXT_ON_LOAD ++ | FLAG_HAS_AMT ++ | FLAG_HAS_FLASH ++ | FLAG_HAS_JUMBO_FRAMES ++ | FLAG_APME_IN_WUC, ++ .flags2 = FLAG2_HAS_PHY_STATS ++ | FLAG2_HAS_EEE, ++ .pba = 26, ++ .max_hw_frame_size = DEFAULT_JUMBO, ++ .get_variants = e1000_get_variants_ich8lan, ++ .mac_ops = &ich8_mac_ops, ++ .phy_ops = &ich8_phy_ops, ++ .nvm_ops = &ich8_nvm_ops, ++}; ++ ++const struct e1000_info e1000_pch_lpt_info = { ++ .mac = e1000_pch_lpt, ++ .flags = FLAG_IS_ICH ++ | FLAG_HAS_WOL ++ | FLAG_HAS_CTRLEXT_ON_LOAD ++ | FLAG_HAS_AMT ++ | FLAG_HAS_FLASH ++ | FLAG_HAS_JUMBO_FRAMES ++ | FLAG_APME_IN_WUC, ++ .flags2 = FLAG2_HAS_PHY_STATS ++ | FLAG2_HAS_EEE, ++ .pba = 26, ++ .max_hw_frame_size = DEFAULT_JUMBO, ++ .get_variants = e1000_get_variants_ich8lan, ++ .mac_ops = &ich8_mac_ops, ++ .phy_ops = &ich8_phy_ops, ++ .nvm_ops = &ich8_nvm_ops, ++}; +--- linux/drivers/xenomai/net/drivers/e1000e/80003es2lan.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/e1000e/80003es2lan.c 2021-04-07 16:01:27.194634213 +0800 +@@ -0,0 +1,1515 @@ ++/******************************************************************************* ++ ++ Intel PRO/1000 Linux driver ++ Copyright(c) 1999 - 2011 Intel Corporation. ++ ++ This program is free software; you can redistribute it and/or modify it ++ under the terms and conditions of the GNU General Public License, ++ version 2, as published by the Free Software Foundation. ++ ++ This program is distributed in the hope it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ You should have received a copy of the GNU General Public License along with ++ this program; if not, write to the Free Software Foundation, Inc., ++ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. ++ ++ The full GNU General Public License is included in this distribution in ++ the file called "COPYING". ++ ++ Contact Information: ++ Linux NICS ++ e1000-devel Mailing List ++ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 ++ ++*******************************************************************************/ ++ ++/* ++ * 80003ES2LAN Gigabit Ethernet Controller (Copper) ++ * 80003ES2LAN Gigabit Ethernet Controller (Serdes) ++ */ ++ ++#include "e1000.h" ++ ++#define E1000_KMRNCTRLSTA_OFFSET_FIFO_CTRL 0x00 ++#define E1000_KMRNCTRLSTA_OFFSET_INB_CTRL 0x02 ++#define E1000_KMRNCTRLSTA_OFFSET_HD_CTRL 0x10 ++#define E1000_KMRNCTRLSTA_OFFSET_MAC2PHY_OPMODE 0x1F ++ ++#define E1000_KMRNCTRLSTA_FIFO_CTRL_RX_BYPASS 0x0008 ++#define E1000_KMRNCTRLSTA_FIFO_CTRL_TX_BYPASS 0x0800 ++#define E1000_KMRNCTRLSTA_INB_CTRL_DIS_PADDING 0x0010 ++ ++#define E1000_KMRNCTRLSTA_HD_CTRL_10_100_DEFAULT 0x0004 ++#define E1000_KMRNCTRLSTA_HD_CTRL_1000_DEFAULT 0x0000 ++#define E1000_KMRNCTRLSTA_OPMODE_E_IDLE 0x2000 ++ ++#define E1000_KMRNCTRLSTA_OPMODE_MASK 0x000C ++#define E1000_KMRNCTRLSTA_OPMODE_INBAND_MDIO 0x0004 ++ ++#define E1000_TCTL_EXT_GCEX_MASK 0x000FFC00 /* Gigabit Carry Extend Padding */ ++#define DEFAULT_TCTL_EXT_GCEX_80003ES2LAN 0x00010000 ++ ++#define DEFAULT_TIPG_IPGT_1000_80003ES2LAN 0x8 ++#define DEFAULT_TIPG_IPGT_10_100_80003ES2LAN 0x9 ++ ++/* GG82563 PHY Specific Status Register (Page 0, Register 16 */ ++#define GG82563_PSCR_POLARITY_REVERSAL_DISABLE 0x0002 /* 1=Reversal Disab. */ ++#define GG82563_PSCR_CROSSOVER_MODE_MASK 0x0060 ++#define GG82563_PSCR_CROSSOVER_MODE_MDI 0x0000 /* 00=Manual MDI */ ++#define GG82563_PSCR_CROSSOVER_MODE_MDIX 0x0020 /* 01=Manual MDIX */ ++#define GG82563_PSCR_CROSSOVER_MODE_AUTO 0x0060 /* 11=Auto crossover */ ++ ++/* PHY Specific Control Register 2 (Page 0, Register 26) */ ++#define GG82563_PSCR2_REVERSE_AUTO_NEG 0x2000 ++ /* 1=Reverse Auto-Negotiation */ ++ ++/* MAC Specific Control Register (Page 2, Register 21) */ ++/* Tx clock speed for Link Down and 1000BASE-T for the following speeds */ ++#define GG82563_MSCR_TX_CLK_MASK 0x0007 ++#define GG82563_MSCR_TX_CLK_10MBPS_2_5 0x0004 ++#define GG82563_MSCR_TX_CLK_100MBPS_25 0x0005 ++#define GG82563_MSCR_TX_CLK_1000MBPS_25 0x0007 ++ ++#define GG82563_MSCR_ASSERT_CRS_ON_TX 0x0010 /* 1=Assert */ ++ ++/* DSP Distance Register (Page 5, Register 26) */ ++#define GG82563_DSPD_CABLE_LENGTH 0x0007 /* 0 = <50M ++ 1 = 50-80M ++ 2 = 80-110M ++ 3 = 110-140M ++ 4 = >140M */ ++ ++/* Kumeran Mode Control Register (Page 193, Register 16) */ ++#define GG82563_KMCR_PASS_FALSE_CARRIER 0x0800 ++ ++/* Max number of times Kumeran read/write should be validated */ ++#define GG82563_MAX_KMRN_RETRY 0x5 ++ ++/* Power Management Control Register (Page 193, Register 20) */ ++#define GG82563_PMCR_ENABLE_ELECTRICAL_IDLE 0x0001 ++ /* 1=Enable SERDES Electrical Idle */ ++ ++/* In-Band Control Register (Page 194, Register 18) */ ++#define GG82563_ICR_DIS_PADDING 0x0010 /* Disable Padding */ ++ ++/* ++ * A table for the GG82563 cable length where the range is defined ++ * with a lower bound at "index" and the upper bound at ++ * "index + 5". ++ */ ++static const u16 e1000_gg82563_cable_length_table[] = { ++ 0, 60, 115, 150, 150, 60, 115, 150, 180, 180, 0xFF }; ++#define GG82563_CABLE_LENGTH_TABLE_SIZE \ ++ ARRAY_SIZE(e1000_gg82563_cable_length_table) ++ ++static s32 e1000_setup_copper_link_80003es2lan(struct e1000_hw *hw); ++static s32 e1000_acquire_swfw_sync_80003es2lan(struct e1000_hw *hw, u16 mask); ++static void e1000_release_swfw_sync_80003es2lan(struct e1000_hw *hw, u16 mask); ++static void e1000_initialize_hw_bits_80003es2lan(struct e1000_hw *hw); ++static void e1000_clear_hw_cntrs_80003es2lan(struct e1000_hw *hw); ++static s32 e1000_cfg_kmrn_1000_80003es2lan(struct e1000_hw *hw); ++static s32 e1000_cfg_kmrn_10_100_80003es2lan(struct e1000_hw *hw, u16 duplex); ++static s32 e1000_cfg_on_link_up_80003es2lan(struct e1000_hw *hw); ++static s32 e1000_read_kmrn_reg_80003es2lan(struct e1000_hw *hw, u32 offset, ++ u16 *data); ++static s32 e1000_write_kmrn_reg_80003es2lan(struct e1000_hw *hw, u32 offset, ++ u16 data); ++static void e1000_power_down_phy_copper_80003es2lan(struct e1000_hw *hw); ++ ++/** ++ * e1000_init_phy_params_80003es2lan - Init ESB2 PHY func ptrs. ++ * @hw: pointer to the HW structure ++ **/ ++static s32 e1000_init_phy_params_80003es2lan(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ ++ if (hw->phy.media_type != e1000_media_type_copper) { ++ phy->type = e1000_phy_none; ++ return 0; ++ } else { ++ phy->ops.power_up = e1000_power_up_phy_copper; ++ phy->ops.power_down = e1000_power_down_phy_copper_80003es2lan; ++ } ++ ++ phy->addr = 1; ++ phy->autoneg_mask = AUTONEG_ADVERTISE_SPEED_DEFAULT; ++ phy->reset_delay_us = 100; ++ phy->type = e1000_phy_gg82563; ++ ++ /* This can only be done after all function pointers are setup. */ ++ ret_val = e1000e_get_phy_id(hw); ++ ++ /* Verify phy id */ ++ if (phy->id != GG82563_E_PHY_ID) ++ return -E1000_ERR_PHY; ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_init_nvm_params_80003es2lan - Init ESB2 NVM func ptrs. ++ * @hw: pointer to the HW structure ++ **/ ++static s32 e1000_init_nvm_params_80003es2lan(struct e1000_hw *hw) ++{ ++ struct e1000_nvm_info *nvm = &hw->nvm; ++ u32 eecd = er32(EECD); ++ u16 size; ++ ++ nvm->opcode_bits = 8; ++ nvm->delay_usec = 1; ++ switch (nvm->override) { ++ case e1000_nvm_override_spi_large: ++ nvm->page_size = 32; ++ nvm->address_bits = 16; ++ break; ++ case e1000_nvm_override_spi_small: ++ nvm->page_size = 8; ++ nvm->address_bits = 8; ++ break; ++ default: ++ nvm->page_size = eecd & E1000_EECD_ADDR_BITS ? 32 : 8; ++ nvm->address_bits = eecd & E1000_EECD_ADDR_BITS ? 16 : 8; ++ break; ++ } ++ ++ nvm->type = e1000_nvm_eeprom_spi; ++ ++ size = (u16)((eecd & E1000_EECD_SIZE_EX_MASK) >> ++ E1000_EECD_SIZE_EX_SHIFT); ++ ++ /* ++ * Added to a constant, "size" becomes the left-shift value ++ * for setting word_size. ++ */ ++ size += NVM_WORD_SIZE_BASE_SHIFT; ++ ++ /* EEPROM access above 16k is unsupported */ ++ if (size > 14) ++ size = 14; ++ nvm->word_size = 1 << size; ++ ++ return 0; ++} ++ ++/** ++ * e1000_init_mac_params_80003es2lan - Init ESB2 MAC func ptrs. ++ * @hw: pointer to the HW structure ++ **/ ++static s32 e1000_init_mac_params_80003es2lan(struct e1000_adapter *adapter) ++{ ++ struct e1000_hw *hw = &adapter->hw; ++ struct e1000_mac_info *mac = &hw->mac; ++ struct e1000_mac_operations *func = &mac->ops; ++ ++ /* Set media type */ ++ switch (adapter->pdev->device) { ++ case E1000_DEV_ID_80003ES2LAN_SERDES_DPT: ++ hw->phy.media_type = e1000_media_type_internal_serdes; ++ break; ++ default: ++ hw->phy.media_type = e1000_media_type_copper; ++ break; ++ } ++ ++ /* Set mta register count */ ++ mac->mta_reg_count = 128; ++ /* Set rar entry count */ ++ mac->rar_entry_count = E1000_RAR_ENTRIES; ++ /* FWSM register */ ++ mac->has_fwsm = true; ++ /* ARC supported; valid only if manageability features are enabled. */ ++ mac->arc_subsystem_valid = ++ (er32(FWSM) & E1000_FWSM_MODE_MASK) ++ ? true : false; ++ /* Adaptive IFS not supported */ ++ mac->adaptive_ifs = false; ++ ++ /* check for link */ ++ switch (hw->phy.media_type) { ++ case e1000_media_type_copper: ++ func->setup_physical_interface = e1000_setup_copper_link_80003es2lan; ++ func->check_for_link = e1000e_check_for_copper_link; ++ break; ++ case e1000_media_type_fiber: ++ func->setup_physical_interface = e1000e_setup_fiber_serdes_link; ++ func->check_for_link = e1000e_check_for_fiber_link; ++ break; ++ case e1000_media_type_internal_serdes: ++ func->setup_physical_interface = e1000e_setup_fiber_serdes_link; ++ func->check_for_link = e1000e_check_for_serdes_link; ++ break; ++ default: ++ return -E1000_ERR_CONFIG; ++ break; ++ } ++ ++ /* set lan id for port to determine which phy lock to use */ ++ hw->mac.ops.set_lan_id(hw); ++ ++ return 0; ++} ++ ++static s32 e1000_get_variants_80003es2lan(struct e1000_adapter *adapter) ++{ ++ struct e1000_hw *hw = &adapter->hw; ++ s32 rc; ++ ++ rc = e1000_init_mac_params_80003es2lan(adapter); ++ if (rc) ++ return rc; ++ ++ rc = e1000_init_nvm_params_80003es2lan(hw); ++ if (rc) ++ return rc; ++ ++ rc = e1000_init_phy_params_80003es2lan(hw); ++ if (rc) ++ return rc; ++ ++ return 0; ++} ++ ++/** ++ * e1000_acquire_phy_80003es2lan - Acquire rights to access PHY ++ * @hw: pointer to the HW structure ++ * ++ * A wrapper to acquire access rights to the correct PHY. ++ **/ ++static s32 e1000_acquire_phy_80003es2lan(struct e1000_hw *hw) ++{ ++ u16 mask; ++ ++ mask = hw->bus.func ? E1000_SWFW_PHY1_SM : E1000_SWFW_PHY0_SM; ++ return e1000_acquire_swfw_sync_80003es2lan(hw, mask); ++} ++ ++/** ++ * e1000_release_phy_80003es2lan - Release rights to access PHY ++ * @hw: pointer to the HW structure ++ * ++ * A wrapper to release access rights to the correct PHY. ++ **/ ++static void e1000_release_phy_80003es2lan(struct e1000_hw *hw) ++{ ++ u16 mask; ++ ++ mask = hw->bus.func ? E1000_SWFW_PHY1_SM : E1000_SWFW_PHY0_SM; ++ e1000_release_swfw_sync_80003es2lan(hw, mask); ++} ++ ++/** ++ * e1000_acquire_mac_csr_80003es2lan - Acquire rights to access Kumeran register ++ * @hw: pointer to the HW structure ++ * ++ * Acquire the semaphore to access the Kumeran interface. ++ * ++ **/ ++static s32 e1000_acquire_mac_csr_80003es2lan(struct e1000_hw *hw) ++{ ++ u16 mask; ++ ++ mask = E1000_SWFW_CSR_SM; ++ ++ return e1000_acquire_swfw_sync_80003es2lan(hw, mask); ++} ++ ++/** ++ * e1000_release_mac_csr_80003es2lan - Release rights to access Kumeran Register ++ * @hw: pointer to the HW structure ++ * ++ * Release the semaphore used to access the Kumeran interface ++ **/ ++static void e1000_release_mac_csr_80003es2lan(struct e1000_hw *hw) ++{ ++ u16 mask; ++ ++ mask = E1000_SWFW_CSR_SM; ++ ++ e1000_release_swfw_sync_80003es2lan(hw, mask); ++} ++ ++/** ++ * e1000_acquire_nvm_80003es2lan - Acquire rights to access NVM ++ * @hw: pointer to the HW structure ++ * ++ * Acquire the semaphore to access the EEPROM. ++ **/ ++static s32 e1000_acquire_nvm_80003es2lan(struct e1000_hw *hw) ++{ ++ s32 ret_val; ++ ++ ret_val = e1000_acquire_swfw_sync_80003es2lan(hw, E1000_SWFW_EEP_SM); ++ if (ret_val) ++ return ret_val; ++ ++ ret_val = e1000e_acquire_nvm(hw); ++ ++ if (ret_val) ++ e1000_release_swfw_sync_80003es2lan(hw, E1000_SWFW_EEP_SM); ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_release_nvm_80003es2lan - Relinquish rights to access NVM ++ * @hw: pointer to the HW structure ++ * ++ * Release the semaphore used to access the EEPROM. ++ **/ ++static void e1000_release_nvm_80003es2lan(struct e1000_hw *hw) ++{ ++ e1000e_release_nvm(hw); ++ e1000_release_swfw_sync_80003es2lan(hw, E1000_SWFW_EEP_SM); ++} ++ ++/** ++ * e1000_acquire_swfw_sync_80003es2lan - Acquire SW/FW semaphore ++ * @hw: pointer to the HW structure ++ * @mask: specifies which semaphore to acquire ++ * ++ * Acquire the SW/FW semaphore to access the PHY or NVM. The mask ++ * will also specify which port we're acquiring the lock for. ++ **/ ++static s32 e1000_acquire_swfw_sync_80003es2lan(struct e1000_hw *hw, u16 mask) ++{ ++ u32 swfw_sync; ++ u32 swmask = mask; ++ u32 fwmask = mask << 16; ++ s32 i = 0; ++ s32 timeout = 50; ++ ++ while (i < timeout) { ++ if (e1000e_get_hw_semaphore(hw)) ++ return -E1000_ERR_SWFW_SYNC; ++ ++ swfw_sync = er32(SW_FW_SYNC); ++ if (!(swfw_sync & (fwmask | swmask))) ++ break; ++ ++ /* ++ * Firmware currently using resource (fwmask) ++ * or other software thread using resource (swmask) ++ */ ++ e1000e_put_hw_semaphore(hw); ++ mdelay(5); ++ i++; ++ } ++ ++ if (i == timeout) { ++ e_dbg("Driver can't access resource, SW_FW_SYNC timeout.\n"); ++ return -E1000_ERR_SWFW_SYNC; ++ } ++ ++ swfw_sync |= swmask; ++ ew32(SW_FW_SYNC, swfw_sync); ++ ++ e1000e_put_hw_semaphore(hw); ++ ++ return 0; ++} ++ ++/** ++ * e1000_release_swfw_sync_80003es2lan - Release SW/FW semaphore ++ * @hw: pointer to the HW structure ++ * @mask: specifies which semaphore to acquire ++ * ++ * Release the SW/FW semaphore used to access the PHY or NVM. The mask ++ * will also specify which port we're releasing the lock for. ++ **/ ++static void e1000_release_swfw_sync_80003es2lan(struct e1000_hw *hw, u16 mask) ++{ ++ u32 swfw_sync; ++ ++ while (e1000e_get_hw_semaphore(hw) != 0) ++ ; /* Empty */ ++ ++ swfw_sync = er32(SW_FW_SYNC); ++ swfw_sync &= ~mask; ++ ew32(SW_FW_SYNC, swfw_sync); ++ ++ e1000e_put_hw_semaphore(hw); ++} ++ ++/** ++ * e1000_read_phy_reg_gg82563_80003es2lan - Read GG82563 PHY register ++ * @hw: pointer to the HW structure ++ * @offset: offset of the register to read ++ * @data: pointer to the data returned from the operation ++ * ++ * Read the GG82563 PHY register. ++ **/ ++static s32 e1000_read_phy_reg_gg82563_80003es2lan(struct e1000_hw *hw, ++ u32 offset, u16 *data) ++{ ++ s32 ret_val; ++ u32 page_select; ++ u16 temp; ++ ++ ret_val = e1000_acquire_phy_80003es2lan(hw); ++ if (ret_val) ++ return ret_val; ++ ++ /* Select Configuration Page */ ++ if ((offset & MAX_PHY_REG_ADDRESS) < GG82563_MIN_ALT_REG) { ++ page_select = GG82563_PHY_PAGE_SELECT; ++ } else { ++ /* ++ * Use Alternative Page Select register to access ++ * registers 30 and 31 ++ */ ++ page_select = GG82563_PHY_PAGE_SELECT_ALT; ++ } ++ ++ temp = (u16)((u16)offset >> GG82563_PAGE_SHIFT); ++ ret_val = e1000e_write_phy_reg_mdic(hw, page_select, temp); ++ if (ret_val) { ++ e1000_release_phy_80003es2lan(hw); ++ return ret_val; ++ } ++ ++ if (hw->dev_spec.e80003es2lan.mdic_wa_enable == true) { ++ /* ++ * The "ready" bit in the MDIC register may be incorrectly set ++ * before the device has completed the "Page Select" MDI ++ * transaction. So we wait 200us after each MDI command... ++ */ ++ udelay(200); ++ ++ /* ...and verify the command was successful. */ ++ ret_val = e1000e_read_phy_reg_mdic(hw, page_select, &temp); ++ ++ if (((u16)offset >> GG82563_PAGE_SHIFT) != temp) { ++ ret_val = -E1000_ERR_PHY; ++ e1000_release_phy_80003es2lan(hw); ++ return ret_val; ++ } ++ ++ udelay(200); ++ ++ ret_val = e1000e_read_phy_reg_mdic(hw, ++ MAX_PHY_REG_ADDRESS & offset, ++ data); ++ ++ udelay(200); ++ } else { ++ ret_val = e1000e_read_phy_reg_mdic(hw, ++ MAX_PHY_REG_ADDRESS & offset, ++ data); ++ } ++ ++ e1000_release_phy_80003es2lan(hw); ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_write_phy_reg_gg82563_80003es2lan - Write GG82563 PHY register ++ * @hw: pointer to the HW structure ++ * @offset: offset of the register to read ++ * @data: value to write to the register ++ * ++ * Write to the GG82563 PHY register. ++ **/ ++static s32 e1000_write_phy_reg_gg82563_80003es2lan(struct e1000_hw *hw, ++ u32 offset, u16 data) ++{ ++ s32 ret_val; ++ u32 page_select; ++ u16 temp; ++ ++ ret_val = e1000_acquire_phy_80003es2lan(hw); ++ if (ret_val) ++ return ret_val; ++ ++ /* Select Configuration Page */ ++ if ((offset & MAX_PHY_REG_ADDRESS) < GG82563_MIN_ALT_REG) { ++ page_select = GG82563_PHY_PAGE_SELECT; ++ } else { ++ /* ++ * Use Alternative Page Select register to access ++ * registers 30 and 31 ++ */ ++ page_select = GG82563_PHY_PAGE_SELECT_ALT; ++ } ++ ++ temp = (u16)((u16)offset >> GG82563_PAGE_SHIFT); ++ ret_val = e1000e_write_phy_reg_mdic(hw, page_select, temp); ++ if (ret_val) { ++ e1000_release_phy_80003es2lan(hw); ++ return ret_val; ++ } ++ ++ if (hw->dev_spec.e80003es2lan.mdic_wa_enable == true) { ++ /* ++ * The "ready" bit in the MDIC register may be incorrectly set ++ * before the device has completed the "Page Select" MDI ++ * transaction. So we wait 200us after each MDI command... ++ */ ++ udelay(200); ++ ++ /* ...and verify the command was successful. */ ++ ret_val = e1000e_read_phy_reg_mdic(hw, page_select, &temp); ++ ++ if (((u16)offset >> GG82563_PAGE_SHIFT) != temp) { ++ e1000_release_phy_80003es2lan(hw); ++ return -E1000_ERR_PHY; ++ } ++ ++ udelay(200); ++ ++ ret_val = e1000e_write_phy_reg_mdic(hw, ++ MAX_PHY_REG_ADDRESS & offset, ++ data); ++ ++ udelay(200); ++ } else { ++ ret_val = e1000e_write_phy_reg_mdic(hw, ++ MAX_PHY_REG_ADDRESS & offset, ++ data); ++ } ++ ++ e1000_release_phy_80003es2lan(hw); ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_write_nvm_80003es2lan - Write to ESB2 NVM ++ * @hw: pointer to the HW structure ++ * @offset: offset of the register to read ++ * @words: number of words to write ++ * @data: buffer of data to write to the NVM ++ * ++ * Write "words" of data to the ESB2 NVM. ++ **/ ++static s32 e1000_write_nvm_80003es2lan(struct e1000_hw *hw, u16 offset, ++ u16 words, u16 *data) ++{ ++ return e1000e_write_nvm_spi(hw, offset, words, data); ++} ++ ++/** ++ * e1000_get_cfg_done_80003es2lan - Wait for configuration to complete ++ * @hw: pointer to the HW structure ++ * ++ * Wait a specific amount of time for manageability processes to complete. ++ * This is a function pointer entry point called by the phy module. ++ **/ ++static s32 e1000_get_cfg_done_80003es2lan(struct e1000_hw *hw) ++{ ++ s32 timeout = PHY_CFG_TIMEOUT; ++ u32 mask = E1000_NVM_CFG_DONE_PORT_0; ++ ++ if (hw->bus.func == 1) ++ mask = E1000_NVM_CFG_DONE_PORT_1; ++ ++ while (timeout) { ++ if (er32(EEMNGCTL) & mask) ++ break; ++ usleep_range(1000, 2000); ++ timeout--; ++ } ++ if (!timeout) { ++ e_dbg("MNG configuration cycle has not completed.\n"); ++ return -E1000_ERR_RESET; ++ } ++ ++ return 0; ++} ++ ++/** ++ * e1000_phy_force_speed_duplex_80003es2lan - Force PHY speed and duplex ++ * @hw: pointer to the HW structure ++ * ++ * Force the speed and duplex settings onto the PHY. This is a ++ * function pointer entry point called by the phy module. ++ **/ ++static s32 e1000_phy_force_speed_duplex_80003es2lan(struct e1000_hw *hw) ++{ ++ s32 ret_val; ++ u16 phy_data; ++ bool link; ++ ++ /* ++ * Clear Auto-Crossover to force MDI manually. M88E1000 requires MDI ++ * forced whenever speed and duplex are forced. ++ */ ++ ret_val = e1e_rphy(hw, M88E1000_PHY_SPEC_CTRL, &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ phy_data &= ~GG82563_PSCR_CROSSOVER_MODE_AUTO; ++ ret_val = e1e_wphy(hw, GG82563_PHY_SPEC_CTRL, phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ e_dbg("GG82563 PSCR: %X\n", phy_data); ++ ++ ret_val = e1e_rphy(hw, PHY_CONTROL, &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ e1000e_phy_force_speed_duplex_setup(hw, &phy_data); ++ ++ /* Reset the phy to commit changes. */ ++ phy_data |= MII_CR_RESET; ++ ++ ret_val = e1e_wphy(hw, PHY_CONTROL, phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ udelay(1); ++ ++ if (hw->phy.autoneg_wait_to_complete) { ++ e_dbg("Waiting for forced speed/duplex link " ++ "on GG82563 phy.\n"); ++ ++ ret_val = e1000e_phy_has_link_generic(hw, PHY_FORCE_LIMIT, ++ 100000, &link); ++ if (ret_val) ++ return ret_val; ++ ++ if (!link) { ++ /* ++ * We didn't get link. ++ * Reset the DSP and cross our fingers. ++ */ ++ ret_val = e1000e_phy_reset_dsp(hw); ++ if (ret_val) ++ return ret_val; ++ } ++ ++ /* Try once more */ ++ ret_val = e1000e_phy_has_link_generic(hw, PHY_FORCE_LIMIT, ++ 100000, &link); ++ if (ret_val) ++ return ret_val; ++ } ++ ++ ret_val = e1e_rphy(hw, GG82563_PHY_MAC_SPEC_CTRL, &phy_data); ++ if (ret_val) ++ return ret_val; ++ ++ /* ++ * Resetting the phy means we need to verify the TX_CLK corresponds ++ * to the link speed. 10Mbps -> 2.5MHz, else 25MHz. ++ */ ++ phy_data &= ~GG82563_MSCR_TX_CLK_MASK; ++ if (hw->mac.forced_speed_duplex & E1000_ALL_10_SPEED) ++ phy_data |= GG82563_MSCR_TX_CLK_10MBPS_2_5; ++ else ++ phy_data |= GG82563_MSCR_TX_CLK_100MBPS_25; ++ ++ /* ++ * In addition, we must re-enable CRS on Tx for both half and full ++ * duplex. ++ */ ++ phy_data |= GG82563_MSCR_ASSERT_CRS_ON_TX; ++ ret_val = e1e_wphy(hw, GG82563_PHY_MAC_SPEC_CTRL, phy_data); ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_get_cable_length_80003es2lan - Set approximate cable length ++ * @hw: pointer to the HW structure ++ * ++ * Find the approximate cable length as measured by the GG82563 PHY. ++ * This is a function pointer entry point called by the phy module. ++ **/ ++static s32 e1000_get_cable_length_80003es2lan(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val = 0; ++ u16 phy_data, index; ++ ++ ret_val = e1e_rphy(hw, GG82563_PHY_DSP_DISTANCE, &phy_data); ++ if (ret_val) ++ goto out; ++ ++ index = phy_data & GG82563_DSPD_CABLE_LENGTH; ++ ++ if (index >= GG82563_CABLE_LENGTH_TABLE_SIZE - 5) { ++ ret_val = -E1000_ERR_PHY; ++ goto out; ++ } ++ ++ phy->min_cable_length = e1000_gg82563_cable_length_table[index]; ++ phy->max_cable_length = e1000_gg82563_cable_length_table[index + 5]; ++ ++ phy->cable_length = (phy->min_cable_length + phy->max_cable_length) / 2; ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_get_link_up_info_80003es2lan - Report speed and duplex ++ * @hw: pointer to the HW structure ++ * @speed: pointer to speed buffer ++ * @duplex: pointer to duplex buffer ++ * ++ * Retrieve the current speed and duplex configuration. ++ **/ ++static s32 e1000_get_link_up_info_80003es2lan(struct e1000_hw *hw, u16 *speed, ++ u16 *duplex) ++{ ++ s32 ret_val; ++ ++ if (hw->phy.media_type == e1000_media_type_copper) { ++ ret_val = e1000e_get_speed_and_duplex_copper(hw, ++ speed, ++ duplex); ++ hw->phy.ops.cfg_on_link_up(hw); ++ } else { ++ ret_val = e1000e_get_speed_and_duplex_fiber_serdes(hw, ++ speed, ++ duplex); ++ } ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_reset_hw_80003es2lan - Reset the ESB2 controller ++ * @hw: pointer to the HW structure ++ * ++ * Perform a global reset to the ESB2 controller. ++ **/ ++static s32 e1000_reset_hw_80003es2lan(struct e1000_hw *hw) ++{ ++ u32 ctrl; ++ s32 ret_val; ++ ++ /* ++ * Prevent the PCI-E bus from sticking if there is no TLP connection ++ * on the last TLP read/write transaction when MAC is reset. ++ */ ++ ret_val = e1000e_disable_pcie_master(hw); ++ if (ret_val) ++ e_dbg("PCI-E Master disable polling has failed.\n"); ++ ++ e_dbg("Masking off all interrupts\n"); ++ ew32(IMC, 0xffffffff); ++ ++ ew32(RCTL, 0); ++ ew32(TCTL, E1000_TCTL_PSP); ++ e1e_flush(); ++ ++ usleep_range(10000, 20000); ++ ++ ctrl = er32(CTRL); ++ ++ ret_val = e1000_acquire_phy_80003es2lan(hw); ++ e_dbg("Issuing a global reset to MAC\n"); ++ ew32(CTRL, ctrl | E1000_CTRL_RST); ++ e1000_release_phy_80003es2lan(hw); ++ ++ ret_val = e1000e_get_auto_rd_done(hw); ++ if (ret_val) ++ /* We don't want to continue accessing MAC registers. */ ++ return ret_val; ++ ++ /* Clear any pending interrupt events. */ ++ ew32(IMC, 0xffffffff); ++ er32(ICR); ++ ++ ret_val = e1000_check_alt_mac_addr_generic(hw); ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_init_hw_80003es2lan - Initialize the ESB2 controller ++ * @hw: pointer to the HW structure ++ * ++ * Initialize the hw bits, LED, VFTA, MTA, link and hw counters. ++ **/ ++static s32 e1000_init_hw_80003es2lan(struct e1000_hw *hw) ++{ ++ struct e1000_mac_info *mac = &hw->mac; ++ u32 reg_data; ++ s32 ret_val; ++ u16 kum_reg_data; ++ u16 i; ++ ++ e1000_initialize_hw_bits_80003es2lan(hw); ++ ++ /* Initialize identification LED */ ++ ret_val = e1000e_id_led_init(hw); ++ if (ret_val) ++ e_dbg("Error initializing identification LED\n"); ++ /* This is not fatal and we should not stop init due to this */ ++ ++ /* Disabling VLAN filtering */ ++ e_dbg("Initializing the IEEE VLAN\n"); ++ mac->ops.clear_vfta(hw); ++ ++ /* Setup the receive address. */ ++ e1000e_init_rx_addrs(hw, mac->rar_entry_count); ++ ++ /* Zero out the Multicast HASH table */ ++ e_dbg("Zeroing the MTA\n"); ++ for (i = 0; i < mac->mta_reg_count; i++) ++ E1000_WRITE_REG_ARRAY(hw, E1000_MTA, i, 0); ++ ++ /* Setup link and flow control */ ++ ret_val = e1000e_setup_link(hw); ++ ++ /* Disable IBIST slave mode (far-end loopback) */ ++ e1000_read_kmrn_reg_80003es2lan(hw, E1000_KMRNCTRLSTA_INBAND_PARAM, ++ &kum_reg_data); ++ kum_reg_data |= E1000_KMRNCTRLSTA_IBIST_DISABLE; ++ e1000_write_kmrn_reg_80003es2lan(hw, E1000_KMRNCTRLSTA_INBAND_PARAM, ++ kum_reg_data); ++ ++ /* Set the transmit descriptor write-back policy */ ++ reg_data = er32(TXDCTL(0)); ++ reg_data = (reg_data & ~E1000_TXDCTL_WTHRESH) | ++ E1000_TXDCTL_FULL_TX_DESC_WB | E1000_TXDCTL_COUNT_DESC; ++ ew32(TXDCTL(0), reg_data); ++ ++ /* ...for both queues. */ ++ reg_data = er32(TXDCTL(1)); ++ reg_data = (reg_data & ~E1000_TXDCTL_WTHRESH) | ++ E1000_TXDCTL_FULL_TX_DESC_WB | E1000_TXDCTL_COUNT_DESC; ++ ew32(TXDCTL(1), reg_data); ++ ++ /* Enable retransmit on late collisions */ ++ reg_data = er32(TCTL); ++ reg_data |= E1000_TCTL_RTLC; ++ ew32(TCTL, reg_data); ++ ++ /* Configure Gigabit Carry Extend Padding */ ++ reg_data = er32(TCTL_EXT); ++ reg_data &= ~E1000_TCTL_EXT_GCEX_MASK; ++ reg_data |= DEFAULT_TCTL_EXT_GCEX_80003ES2LAN; ++ ew32(TCTL_EXT, reg_data); ++ ++ /* Configure Transmit Inter-Packet Gap */ ++ reg_data = er32(TIPG); ++ reg_data &= ~E1000_TIPG_IPGT_MASK; ++ reg_data |= DEFAULT_TIPG_IPGT_1000_80003ES2LAN; ++ ew32(TIPG, reg_data); ++ ++ reg_data = E1000_READ_REG_ARRAY(hw, E1000_FFLT, 0x0001); ++ reg_data &= ~0x00100000; ++ E1000_WRITE_REG_ARRAY(hw, E1000_FFLT, 0x0001, reg_data); ++ ++ /* default to true to enable the MDIC W/A */ ++ hw->dev_spec.e80003es2lan.mdic_wa_enable = true; ++ ++ ret_val = e1000_read_kmrn_reg_80003es2lan(hw, ++ E1000_KMRNCTRLSTA_OFFSET >> ++ E1000_KMRNCTRLSTA_OFFSET_SHIFT, ++ &i); ++ if (!ret_val) { ++ if ((i & E1000_KMRNCTRLSTA_OPMODE_MASK) == ++ E1000_KMRNCTRLSTA_OPMODE_INBAND_MDIO) ++ hw->dev_spec.e80003es2lan.mdic_wa_enable = false; ++ } ++ ++ /* ++ * Clear all of the statistics registers (clear on read). It is ++ * important that we do this after we have tried to establish link ++ * because the symbol error count will increment wildly if there ++ * is no link. ++ */ ++ e1000_clear_hw_cntrs_80003es2lan(hw); ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_initialize_hw_bits_80003es2lan - Init hw bits of ESB2 ++ * @hw: pointer to the HW structure ++ * ++ * Initializes required hardware-dependent bits needed for normal operation. ++ **/ ++static void e1000_initialize_hw_bits_80003es2lan(struct e1000_hw *hw) ++{ ++ u32 reg; ++ ++ /* Transmit Descriptor Control 0 */ ++ reg = er32(TXDCTL(0)); ++ reg |= (1 << 22); ++ ew32(TXDCTL(0), reg); ++ ++ /* Transmit Descriptor Control 1 */ ++ reg = er32(TXDCTL(1)); ++ reg |= (1 << 22); ++ ew32(TXDCTL(1), reg); ++ ++ /* Transmit Arbitration Control 0 */ ++ reg = er32(TARC(0)); ++ reg &= ~(0xF << 27); /* 30:27 */ ++ if (hw->phy.media_type != e1000_media_type_copper) ++ reg &= ~(1 << 20); ++ ew32(TARC(0), reg); ++ ++ /* Transmit Arbitration Control 1 */ ++ reg = er32(TARC(1)); ++ if (er32(TCTL) & E1000_TCTL_MULR) ++ reg &= ~(1 << 28); ++ else ++ reg |= (1 << 28); ++ ew32(TARC(1), reg); ++} ++ ++/** ++ * e1000_copper_link_setup_gg82563_80003es2lan - Configure GG82563 Link ++ * @hw: pointer to the HW structure ++ * ++ * Setup some GG82563 PHY registers for obtaining link ++ **/ ++static s32 e1000_copper_link_setup_gg82563_80003es2lan(struct e1000_hw *hw) ++{ ++ struct e1000_phy_info *phy = &hw->phy; ++ s32 ret_val; ++ u32 ctrl_ext; ++ u16 data; ++ ++ ret_val = e1e_rphy(hw, GG82563_PHY_MAC_SPEC_CTRL, &data); ++ if (ret_val) ++ return ret_val; ++ ++ data |= GG82563_MSCR_ASSERT_CRS_ON_TX; ++ /* Use 25MHz for both link down and 1000Base-T for Tx clock. */ ++ data |= GG82563_MSCR_TX_CLK_1000MBPS_25; ++ ++ ret_val = e1e_wphy(hw, GG82563_PHY_MAC_SPEC_CTRL, data); ++ if (ret_val) ++ return ret_val; ++ ++ /* ++ * Options: ++ * MDI/MDI-X = 0 (default) ++ * 0 - Auto for all speeds ++ * 1 - MDI mode ++ * 2 - MDI-X mode ++ * 3 - Auto for 1000Base-T only (MDI-X for 10/100Base-T modes) ++ */ ++ ret_val = e1e_rphy(hw, GG82563_PHY_SPEC_CTRL, &data); ++ if (ret_val) ++ return ret_val; ++ ++ data &= ~GG82563_PSCR_CROSSOVER_MODE_MASK; ++ ++ switch (phy->mdix) { ++ case 1: ++ data |= GG82563_PSCR_CROSSOVER_MODE_MDI; ++ break; ++ case 2: ++ data |= GG82563_PSCR_CROSSOVER_MODE_MDIX; ++ break; ++ case 0: ++ default: ++ data |= GG82563_PSCR_CROSSOVER_MODE_AUTO; ++ break; ++ } ++ ++ /* ++ * Options: ++ * disable_polarity_correction = 0 (default) ++ * Automatic Correction for Reversed Cable Polarity ++ * 0 - Disabled ++ * 1 - Enabled ++ */ ++ data &= ~GG82563_PSCR_POLARITY_REVERSAL_DISABLE; ++ if (phy->disable_polarity_correction) ++ data |= GG82563_PSCR_POLARITY_REVERSAL_DISABLE; ++ ++ ret_val = e1e_wphy(hw, GG82563_PHY_SPEC_CTRL, data); ++ if (ret_val) ++ return ret_val; ++ ++ /* SW Reset the PHY so all changes take effect */ ++ ret_val = e1000e_commit_phy(hw); ++ if (ret_val) { ++ e_dbg("Error Resetting the PHY\n"); ++ return ret_val; ++ } ++ ++ /* Bypass Rx and Tx FIFO's */ ++ ret_val = e1000_write_kmrn_reg_80003es2lan(hw, ++ E1000_KMRNCTRLSTA_OFFSET_FIFO_CTRL, ++ E1000_KMRNCTRLSTA_FIFO_CTRL_RX_BYPASS | ++ E1000_KMRNCTRLSTA_FIFO_CTRL_TX_BYPASS); ++ if (ret_val) ++ return ret_val; ++ ++ ret_val = e1000_read_kmrn_reg_80003es2lan(hw, ++ E1000_KMRNCTRLSTA_OFFSET_MAC2PHY_OPMODE, ++ &data); ++ if (ret_val) ++ return ret_val; ++ data |= E1000_KMRNCTRLSTA_OPMODE_E_IDLE; ++ ret_val = e1000_write_kmrn_reg_80003es2lan(hw, ++ E1000_KMRNCTRLSTA_OFFSET_MAC2PHY_OPMODE, ++ data); ++ if (ret_val) ++ return ret_val; ++ ++ ret_val = e1e_rphy(hw, GG82563_PHY_SPEC_CTRL_2, &data); ++ if (ret_val) ++ return ret_val; ++ ++ data &= ~GG82563_PSCR2_REVERSE_AUTO_NEG; ++ ret_val = e1e_wphy(hw, GG82563_PHY_SPEC_CTRL_2, data); ++ if (ret_val) ++ return ret_val; ++ ++ ctrl_ext = er32(CTRL_EXT); ++ ctrl_ext &= ~(E1000_CTRL_EXT_LINK_MODE_MASK); ++ ew32(CTRL_EXT, ctrl_ext); ++ ++ ret_val = e1e_rphy(hw, GG82563_PHY_PWR_MGMT_CTRL, &data); ++ if (ret_val) ++ return ret_val; ++ ++ /* ++ * Do not init these registers when the HW is in IAMT mode, since the ++ * firmware will have already initialized them. We only initialize ++ * them if the HW is not in IAMT mode. ++ */ ++ if (!e1000e_check_mng_mode(hw)) { ++ /* Enable Electrical Idle on the PHY */ ++ data |= GG82563_PMCR_ENABLE_ELECTRICAL_IDLE; ++ ret_val = e1e_wphy(hw, GG82563_PHY_PWR_MGMT_CTRL, data); ++ if (ret_val) ++ return ret_val; ++ ++ ret_val = e1e_rphy(hw, GG82563_PHY_KMRN_MODE_CTRL, &data); ++ if (ret_val) ++ return ret_val; ++ ++ data &= ~GG82563_KMCR_PASS_FALSE_CARRIER; ++ ret_val = e1e_wphy(hw, GG82563_PHY_KMRN_MODE_CTRL, data); ++ if (ret_val) ++ return ret_val; ++ } ++ ++ /* ++ * Workaround: Disable padding in Kumeran interface in the MAC ++ * and in the PHY to avoid CRC errors. ++ */ ++ ret_val = e1e_rphy(hw, GG82563_PHY_INBAND_CTRL, &data); ++ if (ret_val) ++ return ret_val; ++ ++ data |= GG82563_ICR_DIS_PADDING; ++ ret_val = e1e_wphy(hw, GG82563_PHY_INBAND_CTRL, data); ++ if (ret_val) ++ return ret_val; ++ ++ return 0; ++} ++ ++/** ++ * e1000_setup_copper_link_80003es2lan - Setup Copper Link for ESB2 ++ * @hw: pointer to the HW structure ++ * ++ * Essentially a wrapper for setting up all things "copper" related. ++ * This is a function pointer entry point called by the mac module. ++ **/ ++static s32 e1000_setup_copper_link_80003es2lan(struct e1000_hw *hw) ++{ ++ u32 ctrl; ++ s32 ret_val; ++ u16 reg_data; ++ ++ ctrl = er32(CTRL); ++ ctrl |= E1000_CTRL_SLU; ++ ctrl &= ~(E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX); ++ ew32(CTRL, ctrl); ++ ++ /* ++ * Set the mac to wait the maximum time between each ++ * iteration and increase the max iterations when ++ * polling the phy; this fixes erroneous timeouts at 10Mbps. ++ */ ++ ret_val = e1000_write_kmrn_reg_80003es2lan(hw, GG82563_REG(0x34, 4), ++ 0xFFFF); ++ if (ret_val) ++ return ret_val; ++ ret_val = e1000_read_kmrn_reg_80003es2lan(hw, GG82563_REG(0x34, 9), ++ ®_data); ++ if (ret_val) ++ return ret_val; ++ reg_data |= 0x3F; ++ ret_val = e1000_write_kmrn_reg_80003es2lan(hw, GG82563_REG(0x34, 9), ++ reg_data); ++ if (ret_val) ++ return ret_val; ++ ret_val = e1000_read_kmrn_reg_80003es2lan(hw, ++ E1000_KMRNCTRLSTA_OFFSET_INB_CTRL, ++ ®_data); ++ if (ret_val) ++ return ret_val; ++ reg_data |= E1000_KMRNCTRLSTA_INB_CTRL_DIS_PADDING; ++ ret_val = e1000_write_kmrn_reg_80003es2lan(hw, ++ E1000_KMRNCTRLSTA_OFFSET_INB_CTRL, ++ reg_data); ++ if (ret_val) ++ return ret_val; ++ ++ ret_val = e1000_copper_link_setup_gg82563_80003es2lan(hw); ++ if (ret_val) ++ return ret_val; ++ ++ ret_val = e1000e_setup_copper_link(hw); ++ ++ return 0; ++} ++ ++/** ++ * e1000_cfg_on_link_up_80003es2lan - es2 link configuration after link-up ++ * @hw: pointer to the HW structure ++ * @duplex: current duplex setting ++ * ++ * Configure the KMRN interface by applying last minute quirks for ++ * 10/100 operation. ++ **/ ++static s32 e1000_cfg_on_link_up_80003es2lan(struct e1000_hw *hw) ++{ ++ s32 ret_val = 0; ++ u16 speed; ++ u16 duplex; ++ ++ if (hw->phy.media_type == e1000_media_type_copper) { ++ ret_val = e1000e_get_speed_and_duplex_copper(hw, &speed, ++ &duplex); ++ if (ret_val) ++ return ret_val; ++ ++ if (speed == SPEED_1000) ++ ret_val = e1000_cfg_kmrn_1000_80003es2lan(hw); ++ else ++ ret_val = e1000_cfg_kmrn_10_100_80003es2lan(hw, duplex); ++ } ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_cfg_kmrn_10_100_80003es2lan - Apply "quirks" for 10/100 operation ++ * @hw: pointer to the HW structure ++ * @duplex: current duplex setting ++ * ++ * Configure the KMRN interface by applying last minute quirks for ++ * 10/100 operation. ++ **/ ++static s32 e1000_cfg_kmrn_10_100_80003es2lan(struct e1000_hw *hw, u16 duplex) ++{ ++ s32 ret_val; ++ u32 tipg; ++ u32 i = 0; ++ u16 reg_data, reg_data2; ++ ++ reg_data = E1000_KMRNCTRLSTA_HD_CTRL_10_100_DEFAULT; ++ ret_val = e1000_write_kmrn_reg_80003es2lan(hw, ++ E1000_KMRNCTRLSTA_OFFSET_HD_CTRL, ++ reg_data); ++ if (ret_val) ++ return ret_val; ++ ++ /* Configure Transmit Inter-Packet Gap */ ++ tipg = er32(TIPG); ++ tipg &= ~E1000_TIPG_IPGT_MASK; ++ tipg |= DEFAULT_TIPG_IPGT_10_100_80003ES2LAN; ++ ew32(TIPG, tipg); ++ ++ do { ++ ret_val = e1e_rphy(hw, GG82563_PHY_KMRN_MODE_CTRL, ®_data); ++ if (ret_val) ++ return ret_val; ++ ++ ret_val = e1e_rphy(hw, GG82563_PHY_KMRN_MODE_CTRL, ®_data2); ++ if (ret_val) ++ return ret_val; ++ i++; ++ } while ((reg_data != reg_data2) && (i < GG82563_MAX_KMRN_RETRY)); ++ ++ if (duplex == HALF_DUPLEX) ++ reg_data |= GG82563_KMCR_PASS_FALSE_CARRIER; ++ else ++ reg_data &= ~GG82563_KMCR_PASS_FALSE_CARRIER; ++ ++ ret_val = e1e_wphy(hw, GG82563_PHY_KMRN_MODE_CTRL, reg_data); ++ ++ return 0; ++} ++ ++/** ++ * e1000_cfg_kmrn_1000_80003es2lan - Apply "quirks" for gigabit operation ++ * @hw: pointer to the HW structure ++ * ++ * Configure the KMRN interface by applying last minute quirks for ++ * gigabit operation. ++ **/ ++static s32 e1000_cfg_kmrn_1000_80003es2lan(struct e1000_hw *hw) ++{ ++ s32 ret_val; ++ u16 reg_data, reg_data2; ++ u32 tipg; ++ u32 i = 0; ++ ++ reg_data = E1000_KMRNCTRLSTA_HD_CTRL_1000_DEFAULT; ++ ret_val = e1000_write_kmrn_reg_80003es2lan(hw, ++ E1000_KMRNCTRLSTA_OFFSET_HD_CTRL, ++ reg_data); ++ if (ret_val) ++ return ret_val; ++ ++ /* Configure Transmit Inter-Packet Gap */ ++ tipg = er32(TIPG); ++ tipg &= ~E1000_TIPG_IPGT_MASK; ++ tipg |= DEFAULT_TIPG_IPGT_1000_80003ES2LAN; ++ ew32(TIPG, tipg); ++ ++ do { ++ ret_val = e1e_rphy(hw, GG82563_PHY_KMRN_MODE_CTRL, ®_data); ++ if (ret_val) ++ return ret_val; ++ ++ ret_val = e1e_rphy(hw, GG82563_PHY_KMRN_MODE_CTRL, ®_data2); ++ if (ret_val) ++ return ret_val; ++ i++; ++ } while ((reg_data != reg_data2) && (i < GG82563_MAX_KMRN_RETRY)); ++ ++ reg_data &= ~GG82563_KMCR_PASS_FALSE_CARRIER; ++ ret_val = e1e_wphy(hw, GG82563_PHY_KMRN_MODE_CTRL, reg_data); ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_read_kmrn_reg_80003es2lan - Read kumeran register ++ * @hw: pointer to the HW structure ++ * @offset: register offset to be read ++ * @data: pointer to the read data ++ * ++ * Acquire semaphore, then read the PHY register at offset ++ * using the kumeran interface. The information retrieved is stored in data. ++ * Release the semaphore before exiting. ++ **/ ++static s32 e1000_read_kmrn_reg_80003es2lan(struct e1000_hw *hw, u32 offset, ++ u16 *data) ++{ ++ u32 kmrnctrlsta; ++ s32 ret_val = 0; ++ ++ ret_val = e1000_acquire_mac_csr_80003es2lan(hw); ++ if (ret_val) ++ return ret_val; ++ ++ kmrnctrlsta = ((offset << E1000_KMRNCTRLSTA_OFFSET_SHIFT) & ++ E1000_KMRNCTRLSTA_OFFSET) | E1000_KMRNCTRLSTA_REN; ++ ew32(KMRNCTRLSTA, kmrnctrlsta); ++ e1e_flush(); ++ ++ udelay(2); ++ ++ kmrnctrlsta = er32(KMRNCTRLSTA); ++ *data = (u16)kmrnctrlsta; ++ ++ e1000_release_mac_csr_80003es2lan(hw); ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_write_kmrn_reg_80003es2lan - Write kumeran register ++ * @hw: pointer to the HW structure ++ * @offset: register offset to write to ++ * @data: data to write at register offset ++ * ++ * Acquire semaphore, then write the data to PHY register ++ * at the offset using the kumeran interface. Release semaphore ++ * before exiting. ++ **/ ++static s32 e1000_write_kmrn_reg_80003es2lan(struct e1000_hw *hw, u32 offset, ++ u16 data) ++{ ++ u32 kmrnctrlsta; ++ s32 ret_val = 0; ++ ++ ret_val = e1000_acquire_mac_csr_80003es2lan(hw); ++ if (ret_val) ++ return ret_val; ++ ++ kmrnctrlsta = ((offset << E1000_KMRNCTRLSTA_OFFSET_SHIFT) & ++ E1000_KMRNCTRLSTA_OFFSET) | data; ++ ew32(KMRNCTRLSTA, kmrnctrlsta); ++ e1e_flush(); ++ ++ udelay(2); ++ ++ e1000_release_mac_csr_80003es2lan(hw); ++ ++ return ret_val; ++} ++ ++/** ++ * e1000_read_mac_addr_80003es2lan - Read device MAC address ++ * @hw: pointer to the HW structure ++ **/ ++static s32 e1000_read_mac_addr_80003es2lan(struct e1000_hw *hw) ++{ ++ s32 ret_val = 0; ++ ++ /* ++ * If there's an alternate MAC address place it in RAR0 ++ * so that it will override the Si installed default perm ++ * address. ++ */ ++ ret_val = e1000_check_alt_mac_addr_generic(hw); ++ if (ret_val) ++ goto out; ++ ++ ret_val = e1000_read_mac_addr_generic(hw); ++ ++out: ++ return ret_val; ++} ++ ++/** ++ * e1000_power_down_phy_copper_80003es2lan - Remove link during PHY power down ++ * @hw: pointer to the HW structure ++ * ++ * In the case of a PHY power down to save power, or to turn off link during a ++ * driver unload, or wake on lan is not enabled, remove the link. ++ **/ ++static void e1000_power_down_phy_copper_80003es2lan(struct e1000_hw *hw) ++{ ++ /* If the management interface is not enabled, then power down */ ++ if (!(hw->mac.ops.check_mng_mode(hw) || ++ hw->phy.ops.check_reset_block(hw))) ++ e1000_power_down_phy_copper(hw); ++} ++ ++/** ++ * e1000_clear_hw_cntrs_80003es2lan - Clear device specific hardware counters ++ * @hw: pointer to the HW structure ++ * ++ * Clears the hardware counters by reading the counter registers. ++ **/ ++static void e1000_clear_hw_cntrs_80003es2lan(struct e1000_hw *hw) ++{ ++ e1000e_clear_hw_cntrs_base(hw); ++ ++ er32(PRC64); ++ er32(PRC127); ++ er32(PRC255); ++ er32(PRC511); ++ er32(PRC1023); ++ er32(PRC1522); ++ er32(PTC64); ++ er32(PTC127); ++ er32(PTC255); ++ er32(PTC511); ++ er32(PTC1023); ++ er32(PTC1522); ++ ++ er32(ALGNERRC); ++ er32(RXERRC); ++ er32(TNCRS); ++ er32(CEXTERR); ++ er32(TSCTC); ++ er32(TSCTFC); ++ ++ er32(MGTPRC); ++ er32(MGTPDC); ++ er32(MGTPTC); ++ ++ er32(IAC); ++ er32(ICRXOC); ++ ++ er32(ICRXPTC); ++ er32(ICRXATC); ++ er32(ICTXPTC); ++ er32(ICTXATC); ++ er32(ICTXQEC); ++ er32(ICTXQMTC); ++ er32(ICRXDMTC); ++} ++ ++static const struct e1000_mac_operations es2_mac_ops = { ++ .read_mac_addr = e1000_read_mac_addr_80003es2lan, ++ .id_led_init = e1000e_id_led_init, ++ .blink_led = e1000e_blink_led_generic, ++ .check_mng_mode = e1000e_check_mng_mode_generic, ++ /* check_for_link dependent on media type */ ++ .cleanup_led = e1000e_cleanup_led_generic, ++ .clear_hw_cntrs = e1000_clear_hw_cntrs_80003es2lan, ++ .get_bus_info = e1000e_get_bus_info_pcie, ++ .set_lan_id = e1000_set_lan_id_multi_port_pcie, ++ .get_link_up_info = e1000_get_link_up_info_80003es2lan, ++ .led_on = e1000e_led_on_generic, ++ .led_off = e1000e_led_off_generic, ++ .update_mc_addr_list = e1000e_update_mc_addr_list_generic, ++ .write_vfta = e1000_write_vfta_generic, ++ .clear_vfta = e1000_clear_vfta_generic, ++ .reset_hw = e1000_reset_hw_80003es2lan, ++ .init_hw = e1000_init_hw_80003es2lan, ++ .setup_link = e1000e_setup_link, ++ /* setup_physical_interface dependent on media type */ ++ .setup_led = e1000e_setup_led_generic, ++}; ++ ++static const struct e1000_phy_operations es2_phy_ops = { ++ .acquire = e1000_acquire_phy_80003es2lan, ++ .check_polarity = e1000_check_polarity_m88, ++ .check_reset_block = e1000e_check_reset_block_generic, ++ .commit = e1000e_phy_sw_reset, ++ .force_speed_duplex = e1000_phy_force_speed_duplex_80003es2lan, ++ .get_cfg_done = e1000_get_cfg_done_80003es2lan, ++ .get_cable_length = e1000_get_cable_length_80003es2lan, ++ .get_info = e1000e_get_phy_info_m88, ++ .read_reg = e1000_read_phy_reg_gg82563_80003es2lan, ++ .release = e1000_release_phy_80003es2lan, ++ .reset = e1000e_phy_hw_reset_generic, ++ .set_d0_lplu_state = NULL, ++ .set_d3_lplu_state = e1000e_set_d3_lplu_state, ++ .write_reg = e1000_write_phy_reg_gg82563_80003es2lan, ++ .cfg_on_link_up = e1000_cfg_on_link_up_80003es2lan, ++}; ++ ++static const struct e1000_nvm_operations es2_nvm_ops = { ++ .acquire = e1000_acquire_nvm_80003es2lan, ++ .read = e1000e_read_nvm_eerd, ++ .release = e1000_release_nvm_80003es2lan, ++ .update = e1000e_update_nvm_checksum_generic, ++ .valid_led_default = e1000e_valid_led_default, ++ .validate = e1000e_validate_nvm_checksum_generic, ++ .write = e1000_write_nvm_80003es2lan, ++}; ++ ++const struct e1000_info e1000_es2_info = { ++ .mac = e1000_80003es2lan, ++ .flags = FLAG_HAS_HW_VLAN_FILTER ++ | FLAG_HAS_JUMBO_FRAMES ++ | FLAG_HAS_WOL ++ | FLAG_APME_IN_CTRL3 ++ | FLAG_HAS_CTRLEXT_ON_LOAD ++ | FLAG_RX_NEEDS_RESTART /* errata */ ++ | FLAG_TARC_SET_BIT_ZERO /* errata */ ++ | FLAG_APME_CHECK_PORT_B ++ | FLAG_DISABLE_FC_PAUSE_TIME /* errata */ ++ | FLAG_TIPG_MEDIUM_FOR_80003ESLAN, ++ .flags2 = FLAG2_DMA_BURST, ++ .pba = 38, ++ .max_hw_frame_size = DEFAULT_JUMBO, ++ .get_variants = e1000_get_variants_80003es2lan, ++ .mac_ops = &es2_mac_ops, ++ .phy_ops = &es2_phy_ops, ++ .nvm_ops = &es2_nvm_ops, ++}; ++ +--- linux/drivers/xenomai/net/drivers/e1000e/defines.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/e1000e/defines.h 2021-04-07 16:01:27.180634233 +0800 +@@ -0,0 +1,852 @@ ++/******************************************************************************* ++ ++ Intel PRO/1000 Linux driver ++ Copyright(c) 1999 - 2011 Intel Corporation. ++ ++ This program is free software; you can redistribute it and/or modify it ++ under the terms and conditions of the GNU General Public License, ++ version 2, as published by the Free Software Foundation. ++ ++ This program is distributed in the hope it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ You should have received a copy of the GNU General Public License along with ++ this program; if not, write to the Free Software Foundation, Inc., ++ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. ++ ++ The full GNU General Public License is included in this distribution in ++ the file called "COPYING". ++ ++ Contact Information: ++ Linux NICS ++ e1000-devel Mailing List ++ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 ++ ++*******************************************************************************/ ++ ++#ifndef _E1000_DEFINES_H_ ++#define _E1000_DEFINES_H_ ++ ++#define E1000_TXD_POPTS_IXSM 0x01 /* Insert IP checksum */ ++#define E1000_TXD_POPTS_TXSM 0x02 /* Insert TCP/UDP checksum */ ++#define E1000_TXD_CMD_EOP 0x01000000 /* End of Packet */ ++#define E1000_TXD_CMD_IFCS 0x02000000 /* Insert FCS (Ethernet CRC) */ ++#define E1000_TXD_CMD_IC 0x04000000 /* Insert Checksum */ ++#define E1000_TXD_CMD_RS 0x08000000 /* Report Status */ ++#define E1000_TXD_CMD_RPS 0x10000000 /* Report Packet Sent */ ++#define E1000_TXD_CMD_DEXT 0x20000000 /* Descriptor extension (0 = legacy) */ ++#define E1000_TXD_CMD_VLE 0x40000000 /* Add VLAN tag */ ++#define E1000_TXD_CMD_IDE 0x80000000 /* Enable Tidv register */ ++#define E1000_TXD_STAT_DD 0x00000001 /* Descriptor Done */ ++#define E1000_TXD_STAT_EC 0x00000002 /* Excess Collisions */ ++#define E1000_TXD_STAT_LC 0x00000004 /* Late Collisions */ ++#define E1000_TXD_STAT_TU 0x00000008 /* Transmit underrun */ ++#define E1000_TXD_CMD_TCP 0x01000000 /* TCP packet */ ++#define E1000_TXD_CMD_IP 0x02000000 /* IP packet */ ++#define E1000_TXD_CMD_TSE 0x04000000 /* TCP Seg enable */ ++#define E1000_TXD_STAT_TC 0x00000004 /* Tx Underrun */ ++ ++/* Number of Transmit and Receive Descriptors must be a multiple of 8 */ ++#define REQ_TX_DESCRIPTOR_MULTIPLE 8 ++#define REQ_RX_DESCRIPTOR_MULTIPLE 8 ++ ++/* Definitions for power management and wakeup registers */ ++/* Wake Up Control */ ++#define E1000_WUC_APME 0x00000001 /* APM Enable */ ++#define E1000_WUC_PME_EN 0x00000002 /* PME Enable */ ++#define E1000_WUC_PHY_WAKE 0x00000100 /* if PHY supports wakeup */ ++ ++/* Wake Up Filter Control */ ++#define E1000_WUFC_LNKC 0x00000001 /* Link Status Change Wakeup Enable */ ++#define E1000_WUFC_MAG 0x00000002 /* Magic Packet Wakeup Enable */ ++#define E1000_WUFC_EX 0x00000004 /* Directed Exact Wakeup Enable */ ++#define E1000_WUFC_MC 0x00000008 /* Directed Multicast Wakeup Enable */ ++#define E1000_WUFC_BC 0x00000010 /* Broadcast Wakeup Enable */ ++#define E1000_WUFC_ARP 0x00000020 /* ARP Request Packet Wakeup Enable */ ++ ++/* Wake Up Status */ ++#define E1000_WUS_LNKC E1000_WUFC_LNKC ++#define E1000_WUS_MAG E1000_WUFC_MAG ++#define E1000_WUS_EX E1000_WUFC_EX ++#define E1000_WUS_MC E1000_WUFC_MC ++#define E1000_WUS_BC E1000_WUFC_BC ++ ++/* Extended Device Control */ ++#define E1000_CTRL_EXT_LPCD 0x00000004 /* LCD Power Cycle Done */ ++#define E1000_CTRL_EXT_SDP3_DATA 0x00000080 /* Value of SW Definable Pin 3 */ ++#define E1000_CTRL_EXT_FORCE_SMBUS 0x00000004 /* Force SMBus mode*/ ++#define E1000_CTRL_EXT_EE_RST 0x00002000 /* Reinitialize from EEPROM */ ++#define E1000_CTRL_EXT_SPD_BYPS 0x00008000 /* Speed Select Bypass */ ++#define E1000_CTRL_EXT_RO_DIS 0x00020000 /* Relaxed Ordering disable */ ++#define E1000_CTRL_EXT_DMA_DYN_CLK_EN 0x00080000 /* DMA Dynamic Clock Gating */ ++#define E1000_CTRL_EXT_LINK_MODE_MASK 0x00C00000 ++#define E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES 0x00C00000 ++#define E1000_CTRL_EXT_EIAME 0x01000000 ++#define E1000_CTRL_EXT_DRV_LOAD 0x10000000 /* Driver loaded bit for FW */ ++#define E1000_CTRL_EXT_IAME 0x08000000 /* Interrupt acknowledge Auto-mask */ ++#define E1000_CTRL_EXT_INT_TIMER_CLR 0x20000000 /* Clear Interrupt timers after IMS clear */ ++#define E1000_CTRL_EXT_PBA_CLR 0x80000000 /* PBA Clear */ ++#define E1000_CTRL_EXT_LSECCK 0x00001000 ++#define E1000_CTRL_EXT_PHYPDEN 0x00100000 ++ ++/* Receive Descriptor bit definitions */ ++#define E1000_RXD_STAT_DD 0x01 /* Descriptor Done */ ++#define E1000_RXD_STAT_EOP 0x02 /* End of Packet */ ++#define E1000_RXD_STAT_IXSM 0x04 /* Ignore checksum */ ++#define E1000_RXD_STAT_VP 0x08 /* IEEE VLAN Packet */ ++#define E1000_RXD_STAT_UDPCS 0x10 /* UDP xsum calculated */ ++#define E1000_RXD_STAT_TCPCS 0x20 /* TCP xsum calculated */ ++#define E1000_RXD_ERR_CE 0x01 /* CRC Error */ ++#define E1000_RXD_ERR_SE 0x02 /* Symbol Error */ ++#define E1000_RXD_ERR_SEQ 0x04 /* Sequence Error */ ++#define E1000_RXD_ERR_CXE 0x10 /* Carrier Extension Error */ ++#define E1000_RXD_ERR_TCPE 0x20 /* TCP/UDP Checksum Error */ ++#define E1000_RXD_ERR_RXE 0x80 /* Rx Data Error */ ++#define E1000_RXD_SPC_VLAN_MASK 0x0FFF /* VLAN ID is in lower 12 bits */ ++ ++#define E1000_RXDEXT_STATERR_CE 0x01000000 ++#define E1000_RXDEXT_STATERR_SE 0x02000000 ++#define E1000_RXDEXT_STATERR_SEQ 0x04000000 ++#define E1000_RXDEXT_STATERR_CXE 0x10000000 ++#define E1000_RXDEXT_STATERR_RXE 0x80000000 ++ ++/* mask to determine if packets should be dropped due to frame errors */ ++#define E1000_RXD_ERR_FRAME_ERR_MASK ( \ ++ E1000_RXD_ERR_CE | \ ++ E1000_RXD_ERR_SE | \ ++ E1000_RXD_ERR_SEQ | \ ++ E1000_RXD_ERR_CXE | \ ++ E1000_RXD_ERR_RXE) ++ ++/* Same mask, but for extended and packet split descriptors */ ++#define E1000_RXDEXT_ERR_FRAME_ERR_MASK ( \ ++ E1000_RXDEXT_STATERR_CE | \ ++ E1000_RXDEXT_STATERR_SE | \ ++ E1000_RXDEXT_STATERR_SEQ | \ ++ E1000_RXDEXT_STATERR_CXE | \ ++ E1000_RXDEXT_STATERR_RXE) ++ ++#define E1000_RXDPS_HDRSTAT_HDRSP 0x00008000 ++ ++/* Management Control */ ++#define E1000_MANC_SMBUS_EN 0x00000001 /* SMBus Enabled - RO */ ++#define E1000_MANC_ASF_EN 0x00000002 /* ASF Enabled - RO */ ++#define E1000_MANC_ARP_EN 0x00002000 /* Enable ARP Request Filtering */ ++#define E1000_MANC_RCV_TCO_EN 0x00020000 /* Receive TCO Packets Enabled */ ++#define E1000_MANC_BLK_PHY_RST_ON_IDE 0x00040000 /* Block phy resets */ ++/* Enable MAC address filtering */ ++#define E1000_MANC_EN_MAC_ADDR_FILTER 0x00100000 ++/* Enable MNG packets to host memory */ ++#define E1000_MANC_EN_MNG2HOST 0x00200000 ++ ++#define E1000_MANC2H_PORT_623 0x00000020 /* Port 0x26f */ ++#define E1000_MANC2H_PORT_664 0x00000040 /* Port 0x298 */ ++#define E1000_MDEF_PORT_623 0x00000800 /* Port 0x26f */ ++#define E1000_MDEF_PORT_664 0x00000400 /* Port 0x298 */ ++ ++/* Receive Control */ ++#define E1000_RCTL_EN 0x00000002 /* enable */ ++#define E1000_RCTL_SBP 0x00000004 /* store bad packet */ ++#define E1000_RCTL_UPE 0x00000008 /* unicast promiscuous enable */ ++#define E1000_RCTL_MPE 0x00000010 /* multicast promiscuous enab */ ++#define E1000_RCTL_LPE 0x00000020 /* long packet enable */ ++#define E1000_RCTL_LBM_NO 0x00000000 /* no loopback mode */ ++#define E1000_RCTL_LBM_MAC 0x00000040 /* MAC loopback mode */ ++#define E1000_RCTL_LBM_TCVR 0x000000C0 /* tcvr loopback mode */ ++#define E1000_RCTL_DTYP_PS 0x00000400 /* Packet Split descriptor */ ++#define E1000_RCTL_RDMTS_HALF 0x00000000 /* Rx desc min threshold size */ ++#define E1000_RCTL_MO_SHIFT 12 /* multicast offset shift */ ++#define E1000_RCTL_MO_3 0x00003000 /* multicast offset 15:4 */ ++#define E1000_RCTL_BAM 0x00008000 /* broadcast enable */ ++/* these buffer sizes are valid if E1000_RCTL_BSEX is 0 */ ++#define E1000_RCTL_SZ_2048 0x00000000 /* Rx buffer size 2048 */ ++#define E1000_RCTL_SZ_1024 0x00010000 /* Rx buffer size 1024 */ ++#define E1000_RCTL_SZ_512 0x00020000 /* Rx buffer size 512 */ ++#define E1000_RCTL_SZ_256 0x00030000 /* Rx buffer size 256 */ ++/* these buffer sizes are valid if E1000_RCTL_BSEX is 1 */ ++#define E1000_RCTL_SZ_16384 0x00010000 /* Rx buffer size 16384 */ ++#define E1000_RCTL_SZ_8192 0x00020000 /* Rx buffer size 8192 */ ++#define E1000_RCTL_SZ_4096 0x00030000 /* Rx buffer size 4096 */ ++#define E1000_RCTL_VFE 0x00040000 /* vlan filter enable */ ++#define E1000_RCTL_CFIEN 0x00080000 /* canonical form enable */ ++#define E1000_RCTL_CFI 0x00100000 /* canonical form indicator */ ++#define E1000_RCTL_PMCF 0x00800000 /* pass MAC control frames */ ++#define E1000_RCTL_BSEX 0x02000000 /* Buffer size extension */ ++#define E1000_RCTL_SECRC 0x04000000 /* Strip Ethernet CRC */ ++ ++/* ++ * Use byte values for the following shift parameters ++ * Usage: ++ * psrctl |= (((ROUNDUP(value0, 128) >> E1000_PSRCTL_BSIZE0_SHIFT) & ++ * E1000_PSRCTL_BSIZE0_MASK) | ++ * ((ROUNDUP(value1, 1024) >> E1000_PSRCTL_BSIZE1_SHIFT) & ++ * E1000_PSRCTL_BSIZE1_MASK) | ++ * ((ROUNDUP(value2, 1024) << E1000_PSRCTL_BSIZE2_SHIFT) & ++ * E1000_PSRCTL_BSIZE2_MASK) | ++ * ((ROUNDUP(value3, 1024) << E1000_PSRCTL_BSIZE3_SHIFT) |; ++ * E1000_PSRCTL_BSIZE3_MASK)) ++ * where value0 = [128..16256], default=256 ++ * value1 = [1024..64512], default=4096 ++ * value2 = [0..64512], default=4096 ++ * value3 = [0..64512], default=0 ++ */ ++ ++#define E1000_PSRCTL_BSIZE0_MASK 0x0000007F ++#define E1000_PSRCTL_BSIZE1_MASK 0x00003F00 ++#define E1000_PSRCTL_BSIZE2_MASK 0x003F0000 ++#define E1000_PSRCTL_BSIZE3_MASK 0x3F000000 ++ ++#define E1000_PSRCTL_BSIZE0_SHIFT 7 /* Shift _right_ 7 */ ++#define E1000_PSRCTL_BSIZE1_SHIFT 2 /* Shift _right_ 2 */ ++#define E1000_PSRCTL_BSIZE2_SHIFT 6 /* Shift _left_ 6 */ ++#define E1000_PSRCTL_BSIZE3_SHIFT 14 /* Shift _left_ 14 */ ++ ++/* SWFW_SYNC Definitions */ ++#define E1000_SWFW_EEP_SM 0x1 ++#define E1000_SWFW_PHY0_SM 0x2 ++#define E1000_SWFW_PHY1_SM 0x4 ++#define E1000_SWFW_CSR_SM 0x8 ++ ++/* Device Control */ ++#define E1000_CTRL_FD 0x00000001 /* Full duplex.0=half; 1=full */ ++#define E1000_CTRL_GIO_MASTER_DISABLE 0x00000004 /*Blocks new Master requests */ ++#define E1000_CTRL_LRST 0x00000008 /* Link reset. 0=normal,1=reset */ ++#define E1000_CTRL_ASDE 0x00000020 /* Auto-speed detect enable */ ++#define E1000_CTRL_SLU 0x00000040 /* Set link up (Force Link) */ ++#define E1000_CTRL_ILOS 0x00000080 /* Invert Loss-Of Signal */ ++#define E1000_CTRL_SPD_SEL 0x00000300 /* Speed Select Mask */ ++#define E1000_CTRL_SPD_10 0x00000000 /* Force 10Mb */ ++#define E1000_CTRL_SPD_100 0x00000100 /* Force 100Mb */ ++#define E1000_CTRL_SPD_1000 0x00000200 /* Force 1Gb */ ++#define E1000_CTRL_FRCSPD 0x00000800 /* Force Speed */ ++#define E1000_CTRL_FRCDPX 0x00001000 /* Force Duplex */ ++#define E1000_CTRL_LANPHYPC_OVERRIDE 0x00010000 /* SW control of LANPHYPC */ ++#define E1000_CTRL_LANPHYPC_VALUE 0x00020000 /* SW value of LANPHYPC */ ++#define E1000_CTRL_SWDPIN0 0x00040000 /* SWDPIN 0 value */ ++#define E1000_CTRL_SWDPIN1 0x00080000 /* SWDPIN 1 value */ ++#define E1000_CTRL_SWDPIO0 0x00400000 /* SWDPIN 0 Input or output */ ++#define E1000_CTRL_RST 0x04000000 /* Global reset */ ++#define E1000_CTRL_RFCE 0x08000000 /* Receive Flow Control enable */ ++#define E1000_CTRL_TFCE 0x10000000 /* Transmit flow control enable */ ++#define E1000_CTRL_VME 0x40000000 /* IEEE VLAN mode enable */ ++#define E1000_CTRL_PHY_RST 0x80000000 /* PHY Reset */ ++ ++/* ++ * Bit definitions for the Management Data IO (MDIO) and Management Data ++ * Clock (MDC) pins in the Device Control Register. ++ */ ++ ++/* Device Status */ ++#define E1000_STATUS_FD 0x00000001 /* Full duplex.0=half,1=full */ ++#define E1000_STATUS_LU 0x00000002 /* Link up.0=no,1=link */ ++#define E1000_STATUS_FUNC_MASK 0x0000000C /* PCI Function Mask */ ++#define E1000_STATUS_FUNC_SHIFT 2 ++#define E1000_STATUS_FUNC_1 0x00000004 /* Function 1 */ ++#define E1000_STATUS_TXOFF 0x00000010 /* transmission paused */ ++#define E1000_STATUS_SPEED_10 0x00000000 /* Speed 10Mb/s */ ++#define E1000_STATUS_SPEED_100 0x00000040 /* Speed 100Mb/s */ ++#define E1000_STATUS_SPEED_1000 0x00000080 /* Speed 1000Mb/s */ ++#define E1000_STATUS_LAN_INIT_DONE 0x00000200 /* Lan Init Completion by NVM */ ++#define E1000_STATUS_PHYRA 0x00000400 /* PHY Reset Asserted */ ++#define E1000_STATUS_GIO_MASTER_ENABLE 0x00080000 /* Status of Master requests. */ ++ ++/* Constants used to interpret the masked PCI-X bus speed. */ ++ ++#define HALF_DUPLEX 1 ++#define FULL_DUPLEX 2 ++ ++ ++#define ADVERTISE_10_HALF 0x0001 ++#define ADVERTISE_10_FULL 0x0002 ++#define ADVERTISE_100_HALF 0x0004 ++#define ADVERTISE_100_FULL 0x0008 ++#define ADVERTISE_1000_HALF 0x0010 /* Not used, just FYI */ ++#define ADVERTISE_1000_FULL 0x0020 ++ ++/* 1000/H is not supported, nor spec-compliant. */ ++#define E1000_ALL_SPEED_DUPLEX ( ADVERTISE_10_HALF | ADVERTISE_10_FULL | \ ++ ADVERTISE_100_HALF | ADVERTISE_100_FULL | \ ++ ADVERTISE_1000_FULL) ++#define E1000_ALL_NOT_GIG ( ADVERTISE_10_HALF | ADVERTISE_10_FULL | \ ++ ADVERTISE_100_HALF | ADVERTISE_100_FULL) ++#define E1000_ALL_100_SPEED (ADVERTISE_100_HALF | ADVERTISE_100_FULL) ++#define E1000_ALL_10_SPEED (ADVERTISE_10_HALF | ADVERTISE_10_FULL) ++#define E1000_ALL_HALF_DUPLEX (ADVERTISE_10_HALF | ADVERTISE_100_HALF) ++ ++#define AUTONEG_ADVERTISE_SPEED_DEFAULT E1000_ALL_SPEED_DUPLEX ++ ++/* LED Control */ ++#define E1000_PHY_LED0_MODE_MASK 0x00000007 ++#define E1000_PHY_LED0_IVRT 0x00000008 ++#define E1000_PHY_LED0_MASK 0x0000001F ++ ++#define E1000_LEDCTL_LED0_MODE_MASK 0x0000000F ++#define E1000_LEDCTL_LED0_MODE_SHIFT 0 ++#define E1000_LEDCTL_LED0_IVRT 0x00000040 ++#define E1000_LEDCTL_LED0_BLINK 0x00000080 ++ ++#define E1000_LEDCTL_MODE_LINK_UP 0x2 ++#define E1000_LEDCTL_MODE_LED_ON 0xE ++#define E1000_LEDCTL_MODE_LED_OFF 0xF ++ ++/* Transmit Descriptor bit definitions */ ++#define E1000_TXD_DTYP_D 0x00100000 /* Data Descriptor */ ++#define E1000_TXD_POPTS_IXSM 0x01 /* Insert IP checksum */ ++#define E1000_TXD_POPTS_TXSM 0x02 /* Insert TCP/UDP checksum */ ++#define E1000_TXD_CMD_EOP 0x01000000 /* End of Packet */ ++#define E1000_TXD_CMD_IFCS 0x02000000 /* Insert FCS (Ethernet CRC) */ ++#define E1000_TXD_CMD_IC 0x04000000 /* Insert Checksum */ ++#define E1000_TXD_CMD_RS 0x08000000 /* Report Status */ ++#define E1000_TXD_CMD_RPS 0x10000000 /* Report Packet Sent */ ++#define E1000_TXD_CMD_DEXT 0x20000000 /* Descriptor extension (0 = legacy) */ ++#define E1000_TXD_CMD_VLE 0x40000000 /* Add VLAN tag */ ++#define E1000_TXD_CMD_IDE 0x80000000 /* Enable Tidv register */ ++#define E1000_TXD_STAT_DD 0x00000001 /* Descriptor Done */ ++#define E1000_TXD_STAT_EC 0x00000002 /* Excess Collisions */ ++#define E1000_TXD_STAT_LC 0x00000004 /* Late Collisions */ ++#define E1000_TXD_STAT_TU 0x00000008 /* Transmit underrun */ ++#define E1000_TXD_CMD_TCP 0x01000000 /* TCP packet */ ++#define E1000_TXD_CMD_IP 0x02000000 /* IP packet */ ++#define E1000_TXD_CMD_TSE 0x04000000 /* TCP Seg enable */ ++#define E1000_TXD_STAT_TC 0x00000004 /* Tx Underrun */ ++ ++/* Transmit Control */ ++#define E1000_TCTL_EN 0x00000002 /* enable Tx */ ++#define E1000_TCTL_PSP 0x00000008 /* pad short packets */ ++#define E1000_TCTL_CT 0x00000ff0 /* collision threshold */ ++#define E1000_TCTL_COLD 0x003ff000 /* collision distance */ ++#define E1000_TCTL_RTLC 0x01000000 /* Re-transmit on late collision */ ++#define E1000_TCTL_MULR 0x10000000 /* Multiple request support */ ++ ++/* Transmit Arbitration Count */ ++ ++/* SerDes Control */ ++#define E1000_SCTL_DISABLE_SERDES_LOOPBACK 0x0400 ++ ++/* Receive Checksum Control */ ++#define E1000_RXCSUM_TUOFL 0x00000200 /* TCP / UDP checksum offload */ ++#define E1000_RXCSUM_IPPCSE 0x00001000 /* IP payload checksum enable */ ++ ++/* Header split receive */ ++#define E1000_RFCTL_NFSW_DIS 0x00000040 ++#define E1000_RFCTL_NFSR_DIS 0x00000080 ++#define E1000_RFCTL_ACK_DIS 0x00001000 ++#define E1000_RFCTL_EXTEN 0x00008000 ++#define E1000_RFCTL_IPV6_EX_DIS 0x00010000 ++#define E1000_RFCTL_NEW_IPV6_EXT_DIS 0x00020000 ++ ++/* Collision related configuration parameters */ ++#define E1000_COLLISION_THRESHOLD 15 ++#define E1000_CT_SHIFT 4 ++#define E1000_COLLISION_DISTANCE 63 ++#define E1000_COLD_SHIFT 12 ++ ++/* Default values for the transmit IPG register */ ++#define DEFAULT_82543_TIPG_IPGT_COPPER 8 ++ ++#define E1000_TIPG_IPGT_MASK 0x000003FF ++ ++#define DEFAULT_82543_TIPG_IPGR1 8 ++#define E1000_TIPG_IPGR1_SHIFT 10 ++ ++#define DEFAULT_82543_TIPG_IPGR2 6 ++#define DEFAULT_80003ES2LAN_TIPG_IPGR2 7 ++#define E1000_TIPG_IPGR2_SHIFT 20 ++ ++#define MAX_JUMBO_FRAME_SIZE 0x3F00 ++ ++/* Extended Configuration Control and Size */ ++#define E1000_EXTCNF_CTRL_MDIO_SW_OWNERSHIP 0x00000020 ++#define E1000_EXTCNF_CTRL_LCD_WRITE_ENABLE 0x00000001 ++#define E1000_EXTCNF_CTRL_OEM_WRITE_ENABLE 0x00000008 ++#define E1000_EXTCNF_CTRL_SWFLAG 0x00000020 ++#define E1000_EXTCNF_CTRL_GATE_PHY_CFG 0x00000080 ++#define E1000_EXTCNF_SIZE_EXT_PCIE_LENGTH_MASK 0x00FF0000 ++#define E1000_EXTCNF_SIZE_EXT_PCIE_LENGTH_SHIFT 16 ++#define E1000_EXTCNF_CTRL_EXT_CNF_POINTER_MASK 0x0FFF0000 ++#define E1000_EXTCNF_CTRL_EXT_CNF_POINTER_SHIFT 16 ++ ++#define E1000_PHY_CTRL_D0A_LPLU 0x00000002 ++#define E1000_PHY_CTRL_NOND0A_LPLU 0x00000004 ++#define E1000_PHY_CTRL_NOND0A_GBE_DISABLE 0x00000008 ++#define E1000_PHY_CTRL_GBE_DISABLE 0x00000040 ++ ++#define E1000_KABGTXD_BGSQLBIAS 0x00050000 ++ ++/* PBA constants */ ++#define E1000_PBA_8K 0x0008 /* 8KB */ ++#define E1000_PBA_16K 0x0010 /* 16KB */ ++ ++#define E1000_PBS_16K E1000_PBA_16K ++ ++#define IFS_MAX 80 ++#define IFS_MIN 40 ++#define IFS_RATIO 4 ++#define IFS_STEP 10 ++#define MIN_NUM_XMITS 1000 ++ ++/* SW Semaphore Register */ ++#define E1000_SWSM_SMBI 0x00000001 /* Driver Semaphore bit */ ++#define E1000_SWSM_SWESMBI 0x00000002 /* FW Semaphore bit */ ++#define E1000_SWSM_DRV_LOAD 0x00000008 /* Driver Loaded Bit */ ++ ++#define E1000_SWSM2_LOCK 0x00000002 /* Secondary driver semaphore bit */ ++ ++/* Interrupt Cause Read */ ++#define E1000_ICR_TXDW 0x00000001 /* Transmit desc written back */ ++#define E1000_ICR_LSC 0x00000004 /* Link Status Change */ ++#define E1000_ICR_RXSEQ 0x00000008 /* Rx sequence error */ ++#define E1000_ICR_RXDMT0 0x00000010 /* Rx desc min. threshold (0) */ ++#define E1000_ICR_RXT0 0x00000080 /* Rx timer intr (ring 0) */ ++#define E1000_ICR_INT_ASSERTED 0x80000000 /* If this bit asserted, the driver should claim the interrupt */ ++#define E1000_ICR_RXQ0 0x00100000 /* Rx Queue 0 Interrupt */ ++#define E1000_ICR_RXQ1 0x00200000 /* Rx Queue 1 Interrupt */ ++#define E1000_ICR_TXQ0 0x00400000 /* Tx Queue 0 Interrupt */ ++#define E1000_ICR_TXQ1 0x00800000 /* Tx Queue 1 Interrupt */ ++#define E1000_ICR_OTHER 0x01000000 /* Other Interrupts */ ++ ++/* PBA ECC Register */ ++#define E1000_PBA_ECC_COUNTER_MASK 0xFFF00000 /* ECC counter mask */ ++#define E1000_PBA_ECC_COUNTER_SHIFT 20 /* ECC counter shift value */ ++#define E1000_PBA_ECC_CORR_EN 0x00000001 /* ECC correction enable */ ++#define E1000_PBA_ECC_STAT_CLR 0x00000002 /* Clear ECC error counter */ ++#define E1000_PBA_ECC_INT_EN 0x00000004 /* Enable ICR bit 5 for ECC */ ++ ++/* ++ * This defines the bits that are set in the Interrupt Mask ++ * Set/Read Register. Each bit is documented below: ++ * o RXT0 = Receiver Timer Interrupt (ring 0) ++ * o TXDW = Transmit Descriptor Written Back ++ * o RXDMT0 = Receive Descriptor Minimum Threshold hit (ring 0) ++ * o RXSEQ = Receive Sequence Error ++ * o LSC = Link Status Change ++ */ ++#define IMS_ENABLE_MASK ( \ ++ E1000_IMS_RXT0 | \ ++ E1000_IMS_TXDW | \ ++ E1000_IMS_RXDMT0 | \ ++ E1000_IMS_RXSEQ | \ ++ E1000_IMS_LSC) ++ ++/* Interrupt Mask Set */ ++#define E1000_IMS_TXDW E1000_ICR_TXDW /* Transmit desc written back */ ++#define E1000_IMS_LSC E1000_ICR_LSC /* Link Status Change */ ++#define E1000_IMS_RXSEQ E1000_ICR_RXSEQ /* Rx sequence error */ ++#define E1000_IMS_RXDMT0 E1000_ICR_RXDMT0 /* Rx desc min. threshold */ ++#define E1000_IMS_RXT0 E1000_ICR_RXT0 /* Rx timer intr */ ++#define E1000_IMS_RXQ0 E1000_ICR_RXQ0 /* Rx Queue 0 Interrupt */ ++#define E1000_IMS_RXQ1 E1000_ICR_RXQ1 /* Rx Queue 1 Interrupt */ ++#define E1000_IMS_TXQ0 E1000_ICR_TXQ0 /* Tx Queue 0 Interrupt */ ++#define E1000_IMS_TXQ1 E1000_ICR_TXQ1 /* Tx Queue 1 Interrupt */ ++#define E1000_IMS_OTHER E1000_ICR_OTHER /* Other Interrupts */ ++ ++/* Interrupt Cause Set */ ++#define E1000_ICS_LSC E1000_ICR_LSC /* Link Status Change */ ++#define E1000_ICS_RXSEQ E1000_ICR_RXSEQ /* Rx sequence error */ ++#define E1000_ICS_RXDMT0 E1000_ICR_RXDMT0 /* Rx desc min. threshold */ ++ ++/* Transmit Descriptor Control */ ++#define E1000_TXDCTL_PTHRESH 0x0000003F /* TXDCTL Prefetch Threshold */ ++#define E1000_TXDCTL_HTHRESH 0x00003F00 /* TXDCTL Host Threshold */ ++#define E1000_TXDCTL_WTHRESH 0x003F0000 /* TXDCTL Writeback Threshold */ ++#define E1000_TXDCTL_GRAN 0x01000000 /* TXDCTL Granularity */ ++#define E1000_TXDCTL_FULL_TX_DESC_WB 0x01010000 /* GRAN=1, WTHRESH=1 */ ++#define E1000_TXDCTL_MAX_TX_DESC_PREFETCH 0x0100001F /* GRAN=1, PTHRESH=31 */ ++/* Enable the counting of desc. still to be processed. */ ++#define E1000_TXDCTL_COUNT_DESC 0x00400000 ++ ++/* Flow Control Constants */ ++#define FLOW_CONTROL_ADDRESS_LOW 0x00C28001 ++#define FLOW_CONTROL_ADDRESS_HIGH 0x00000100 ++#define FLOW_CONTROL_TYPE 0x8808 ++ ++/* 802.1q VLAN Packet Size */ ++#define E1000_VLAN_FILTER_TBL_SIZE 128 /* VLAN Filter Table (4096 bits) */ ++ ++/* Receive Address */ ++/* ++ * Number of high/low register pairs in the RAR. The RAR (Receive Address ++ * Registers) holds the directed and multicast addresses that we monitor. ++ * Technically, we have 16 spots. However, we reserve one of these spots ++ * (RAR[15]) for our directed address used by controllers with ++ * manageability enabled, allowing us room for 15 multicast addresses. ++ */ ++#define E1000_RAR_ENTRIES 15 ++#define E1000_RAH_AV 0x80000000 /* Receive descriptor valid */ ++#define E1000_RAL_MAC_ADDR_LEN 4 ++#define E1000_RAH_MAC_ADDR_LEN 2 ++ ++/* Error Codes */ ++#define E1000_ERR_NVM 1 ++#define E1000_ERR_PHY 2 ++#define E1000_ERR_CONFIG 3 ++#define E1000_ERR_PARAM 4 ++#define E1000_ERR_MAC_INIT 5 ++#define E1000_ERR_PHY_TYPE 6 ++#define E1000_ERR_RESET 9 ++#define E1000_ERR_MASTER_REQUESTS_PENDING 10 ++#define E1000_ERR_HOST_INTERFACE_COMMAND 11 ++#define E1000_BLK_PHY_RESET 12 ++#define E1000_ERR_SWFW_SYNC 13 ++#define E1000_NOT_IMPLEMENTED 14 ++#define E1000_ERR_INVALID_ARGUMENT 16 ++#define E1000_ERR_NO_SPACE 17 ++#define E1000_ERR_NVM_PBA_SECTION 18 ++ ++/* Loop limit on how long we wait for auto-negotiation to complete */ ++#define FIBER_LINK_UP_LIMIT 50 ++#define COPPER_LINK_UP_LIMIT 10 ++#define PHY_AUTO_NEG_LIMIT 45 ++#define PHY_FORCE_LIMIT 20 ++/* Number of 100 microseconds we wait for PCI Express master disable */ ++#define MASTER_DISABLE_TIMEOUT 800 ++/* Number of milliseconds we wait for PHY configuration done after MAC reset */ ++#define PHY_CFG_TIMEOUT 100 ++/* Number of 2 milliseconds we wait for acquiring MDIO ownership. */ ++#define MDIO_OWNERSHIP_TIMEOUT 10 ++/* Number of milliseconds for NVM auto read done after MAC reset. */ ++#define AUTO_READ_DONE_TIMEOUT 10 ++ ++/* Flow Control */ ++#define E1000_FCRTH_RTH 0x0000FFF8 /* Mask Bits[15:3] for RTH */ ++#define E1000_FCRTL_RTL 0x0000FFF8 /* Mask Bits[15:3] for RTL */ ++#define E1000_FCRTL_XONE 0x80000000 /* Enable XON frame transmission */ ++ ++/* Transmit Configuration Word */ ++#define E1000_TXCW_FD 0x00000020 /* TXCW full duplex */ ++#define E1000_TXCW_PAUSE 0x00000080 /* TXCW sym pause request */ ++#define E1000_TXCW_ASM_DIR 0x00000100 /* TXCW astm pause direction */ ++#define E1000_TXCW_PAUSE_MASK 0x00000180 /* TXCW pause request mask */ ++#define E1000_TXCW_ANE 0x80000000 /* Auto-neg enable */ ++ ++/* Receive Configuration Word */ ++#define E1000_RXCW_CW 0x0000ffff /* RxConfigWord mask */ ++#define E1000_RXCW_IV 0x08000000 /* Receive config invalid */ ++#define E1000_RXCW_C 0x20000000 /* Receive config */ ++#define E1000_RXCW_SYNCH 0x40000000 /* Receive config synch */ ++ ++/* PCI Express Control */ ++#define E1000_GCR_RXD_NO_SNOOP 0x00000001 ++#define E1000_GCR_RXDSCW_NO_SNOOP 0x00000002 ++#define E1000_GCR_RXDSCR_NO_SNOOP 0x00000004 ++#define E1000_GCR_TXD_NO_SNOOP 0x00000008 ++#define E1000_GCR_TXDSCW_NO_SNOOP 0x00000010 ++#define E1000_GCR_TXDSCR_NO_SNOOP 0x00000020 ++ ++#define PCIE_NO_SNOOP_ALL (E1000_GCR_RXD_NO_SNOOP | \ ++ E1000_GCR_RXDSCW_NO_SNOOP | \ ++ E1000_GCR_RXDSCR_NO_SNOOP | \ ++ E1000_GCR_TXD_NO_SNOOP | \ ++ E1000_GCR_TXDSCW_NO_SNOOP | \ ++ E1000_GCR_TXDSCR_NO_SNOOP) ++ ++/* PHY Control Register */ ++#define MII_CR_FULL_DUPLEX 0x0100 /* FDX =1, half duplex =0 */ ++#define MII_CR_RESTART_AUTO_NEG 0x0200 /* Restart auto negotiation */ ++#define MII_CR_POWER_DOWN 0x0800 /* Power down */ ++#define MII_CR_AUTO_NEG_EN 0x1000 /* Auto Neg Enable */ ++#define MII_CR_LOOPBACK 0x4000 /* 0 = normal, 1 = loopback */ ++#define MII_CR_RESET 0x8000 /* 0 = normal, 1 = PHY reset */ ++#define MII_CR_SPEED_1000 0x0040 ++#define MII_CR_SPEED_100 0x2000 ++#define MII_CR_SPEED_10 0x0000 ++ ++/* PHY Status Register */ ++#define MII_SR_LINK_STATUS 0x0004 /* Link Status 1 = link */ ++#define MII_SR_AUTONEG_COMPLETE 0x0020 /* Auto Neg Complete */ ++ ++/* Autoneg Advertisement Register */ ++#define NWAY_AR_10T_HD_CAPS 0x0020 /* 10T Half Duplex Capable */ ++#define NWAY_AR_10T_FD_CAPS 0x0040 /* 10T Full Duplex Capable */ ++#define NWAY_AR_100TX_HD_CAPS 0x0080 /* 100TX Half Duplex Capable */ ++#define NWAY_AR_100TX_FD_CAPS 0x0100 /* 100TX Full Duplex Capable */ ++#define NWAY_AR_PAUSE 0x0400 /* Pause operation desired */ ++#define NWAY_AR_ASM_DIR 0x0800 /* Asymmetric Pause Direction bit */ ++ ++/* Link Partner Ability Register (Base Page) */ ++#define NWAY_LPAR_100TX_FD_CAPS 0x0100 /* LP 100TX Full Dplx Capable */ ++#define NWAY_LPAR_PAUSE 0x0400 /* LP Pause operation desired */ ++#define NWAY_LPAR_ASM_DIR 0x0800 /* LP Asymmetric Pause Direction bit */ ++ ++/* Autoneg Expansion Register */ ++#define NWAY_ER_LP_NWAY_CAPS 0x0001 /* LP has Auto Neg Capability */ ++ ++/* 1000BASE-T Control Register */ ++#define CR_1000T_HD_CAPS 0x0100 /* Advertise 1000T HD capability */ ++#define CR_1000T_FD_CAPS 0x0200 /* Advertise 1000T FD capability */ ++ /* 0=DTE device */ ++#define CR_1000T_MS_VALUE 0x0800 /* 1=Configure PHY as Master */ ++ /* 0=Configure PHY as Slave */ ++#define CR_1000T_MS_ENABLE 0x1000 /* 1=Master/Slave manual config value */ ++ /* 0=Automatic Master/Slave config */ ++ ++/* 1000BASE-T Status Register */ ++#define SR_1000T_REMOTE_RX_STATUS 0x1000 /* Remote receiver OK */ ++#define SR_1000T_LOCAL_RX_STATUS 0x2000 /* Local receiver OK */ ++ ++ ++/* PHY 1000 MII Register/Bit Definitions */ ++/* PHY Registers defined by IEEE */ ++#define PHY_CONTROL 0x00 /* Control Register */ ++#define PHY_STATUS 0x01 /* Status Register */ ++#define PHY_ID1 0x02 /* Phy Id Reg (word 1) */ ++#define PHY_ID2 0x03 /* Phy Id Reg (word 2) */ ++#define PHY_AUTONEG_ADV 0x04 /* Autoneg Advertisement */ ++#define PHY_LP_ABILITY 0x05 /* Link Partner Ability (Base Page) */ ++#define PHY_AUTONEG_EXP 0x06 /* Autoneg Expansion Reg */ ++#define PHY_1000T_CTRL 0x09 /* 1000Base-T Control Reg */ ++#define PHY_1000T_STATUS 0x0A /* 1000Base-T Status Reg */ ++#define PHY_EXT_STATUS 0x0F /* Extended Status Reg */ ++ ++#define PHY_CONTROL_LB 0x4000 /* PHY Loopback bit */ ++ ++/* NVM Control */ ++#define E1000_EECD_SK 0x00000001 /* NVM Clock */ ++#define E1000_EECD_CS 0x00000002 /* NVM Chip Select */ ++#define E1000_EECD_DI 0x00000004 /* NVM Data In */ ++#define E1000_EECD_DO 0x00000008 /* NVM Data Out */ ++#define E1000_EECD_REQ 0x00000040 /* NVM Access Request */ ++#define E1000_EECD_GNT 0x00000080 /* NVM Access Grant */ ++#define E1000_EECD_PRES 0x00000100 /* NVM Present */ ++#define E1000_EECD_SIZE 0x00000200 /* NVM Size (0=64 word 1=256 word) */ ++/* NVM Addressing bits based on type (0-small, 1-large) */ ++#define E1000_EECD_ADDR_BITS 0x00000400 ++#define E1000_NVM_GRANT_ATTEMPTS 1000 /* NVM # attempts to gain grant */ ++#define E1000_EECD_AUTO_RD 0x00000200 /* NVM Auto Read done */ ++#define E1000_EECD_SIZE_EX_MASK 0x00007800 /* NVM Size */ ++#define E1000_EECD_SIZE_EX_SHIFT 11 ++#define E1000_EECD_FLUPD 0x00080000 /* Update FLASH */ ++#define E1000_EECD_AUPDEN 0x00100000 /* Enable Autonomous FLASH update */ ++#define E1000_EECD_SEC1VAL 0x00400000 /* Sector One Valid */ ++#define E1000_EECD_SEC1VAL_VALID_MASK (E1000_EECD_AUTO_RD | E1000_EECD_PRES) ++ ++#define E1000_NVM_RW_REG_DATA 16 /* Offset to data in NVM read/write registers */ ++#define E1000_NVM_RW_REG_DONE 2 /* Offset to READ/WRITE done bit */ ++#define E1000_NVM_RW_REG_START 1 /* Start operation */ ++#define E1000_NVM_RW_ADDR_SHIFT 2 /* Shift to the address bits */ ++#define E1000_NVM_POLL_WRITE 1 /* Flag for polling for write complete */ ++#define E1000_NVM_POLL_READ 0 /* Flag for polling for read complete */ ++#define E1000_FLASH_UPDATES 2000 ++ ++/* NVM Word Offsets */ ++#define NVM_COMPAT 0x0003 ++#define NVM_ID_LED_SETTINGS 0x0004 ++#define NVM_INIT_CONTROL2_REG 0x000F ++#define NVM_INIT_CONTROL3_PORT_B 0x0014 ++#define NVM_INIT_3GIO_3 0x001A ++#define NVM_INIT_CONTROL3_PORT_A 0x0024 ++#define NVM_CFG 0x0012 ++#define NVM_ALT_MAC_ADDR_PTR 0x0037 ++#define NVM_CHECKSUM_REG 0x003F ++ ++#define E1000_NVM_INIT_CTRL2_MNGM 0x6000 /* Manageability Operation Mode mask */ ++ ++#define E1000_NVM_CFG_DONE_PORT_0 0x40000 /* MNG config cycle done */ ++#define E1000_NVM_CFG_DONE_PORT_1 0x80000 /* ...for second port */ ++ ++/* Mask bits for fields in Word 0x0f of the NVM */ ++#define NVM_WORD0F_PAUSE_MASK 0x3000 ++#define NVM_WORD0F_PAUSE 0x1000 ++#define NVM_WORD0F_ASM_DIR 0x2000 ++ ++/* Mask bits for fields in Word 0x1a of the NVM */ ++#define NVM_WORD1A_ASPM_MASK 0x000C ++ ++/* Mask bits for fields in Word 0x03 of the EEPROM */ ++#define NVM_COMPAT_LOM 0x0800 ++ ++/* length of string needed to store PBA number */ ++#define E1000_PBANUM_LENGTH 11 ++ ++/* For checksumming, the sum of all words in the NVM should equal 0xBABA. */ ++#define NVM_SUM 0xBABA ++ ++/* PBA (printed board assembly) number words */ ++#define NVM_PBA_OFFSET_0 8 ++#define NVM_PBA_OFFSET_1 9 ++#define NVM_PBA_PTR_GUARD 0xFAFA ++#define NVM_WORD_SIZE_BASE_SHIFT 6 ++ ++/* NVM Commands - SPI */ ++#define NVM_MAX_RETRY_SPI 5000 /* Max wait of 5ms, for RDY signal */ ++#define NVM_READ_OPCODE_SPI 0x03 /* NVM read opcode */ ++#define NVM_WRITE_OPCODE_SPI 0x02 /* NVM write opcode */ ++#define NVM_A8_OPCODE_SPI 0x08 /* opcode bit-3 = address bit-8 */ ++#define NVM_WREN_OPCODE_SPI 0x06 /* NVM set Write Enable latch */ ++#define NVM_RDSR_OPCODE_SPI 0x05 /* NVM read Status register */ ++ ++/* SPI NVM Status Register */ ++#define NVM_STATUS_RDY_SPI 0x01 ++ ++/* Word definitions for ID LED Settings */ ++#define ID_LED_RESERVED_0000 0x0000 ++#define ID_LED_RESERVED_FFFF 0xFFFF ++#define ID_LED_DEFAULT ((ID_LED_OFF1_ON2 << 12) | \ ++ (ID_LED_OFF1_OFF2 << 8) | \ ++ (ID_LED_DEF1_DEF2 << 4) | \ ++ (ID_LED_DEF1_DEF2)) ++#define ID_LED_DEF1_DEF2 0x1 ++#define ID_LED_DEF1_ON2 0x2 ++#define ID_LED_DEF1_OFF2 0x3 ++#define ID_LED_ON1_DEF2 0x4 ++#define ID_LED_ON1_ON2 0x5 ++#define ID_LED_ON1_OFF2 0x6 ++#define ID_LED_OFF1_DEF2 0x7 ++#define ID_LED_OFF1_ON2 0x8 ++#define ID_LED_OFF1_OFF2 0x9 ++ ++#define IGP_ACTIVITY_LED_MASK 0xFFFFF0FF ++#define IGP_ACTIVITY_LED_ENABLE 0x0300 ++#define IGP_LED3_MODE 0x07000000 ++ ++/* PCI/PCI-X/PCI-EX Config space */ ++#define PCI_HEADER_TYPE_REGISTER 0x0E ++#define PCIE_LINK_STATUS 0x12 ++ ++#define PCI_HEADER_TYPE_MULTIFUNC 0x80 ++#define PCIE_LINK_WIDTH_MASK 0x3F0 ++#define PCIE_LINK_WIDTH_SHIFT 4 ++ ++#define PHY_REVISION_MASK 0xFFFFFFF0 ++#define MAX_PHY_REG_ADDRESS 0x1F /* 5 bit address bus (0-0x1F) */ ++#define MAX_PHY_MULTI_PAGE_REG 0xF ++ ++/* Bit definitions for valid PHY IDs. */ ++/* ++ * I = Integrated ++ * E = External ++ */ ++#define M88E1000_E_PHY_ID 0x01410C50 ++#define M88E1000_I_PHY_ID 0x01410C30 ++#define M88E1011_I_PHY_ID 0x01410C20 ++#define IGP01E1000_I_PHY_ID 0x02A80380 ++#define M88E1111_I_PHY_ID 0x01410CC0 ++#define GG82563_E_PHY_ID 0x01410CA0 ++#define IGP03E1000_E_PHY_ID 0x02A80390 ++#define IFE_E_PHY_ID 0x02A80330 ++#define IFE_PLUS_E_PHY_ID 0x02A80320 ++#define IFE_C_E_PHY_ID 0x02A80310 ++#define BME1000_E_PHY_ID 0x01410CB0 ++#define BME1000_E_PHY_ID_R2 0x01410CB1 ++#define I82577_E_PHY_ID 0x01540050 ++#define I82578_E_PHY_ID 0x004DD040 ++#define I82579_E_PHY_ID 0x01540090 ++#define I217_E_PHY_ID 0x015400A0 ++ ++/* M88E1000 Specific Registers */ ++#define M88E1000_PHY_SPEC_CTRL 0x10 /* PHY Specific Control Register */ ++#define M88E1000_PHY_SPEC_STATUS 0x11 /* PHY Specific Status Register */ ++#define M88E1000_EXT_PHY_SPEC_CTRL 0x14 /* Extended PHY Specific Control */ ++ ++#define M88E1000_PHY_PAGE_SELECT 0x1D /* Reg 29 for page number setting */ ++#define M88E1000_PHY_GEN_CONTROL 0x1E /* Its meaning depends on reg 29 */ ++ ++/* M88E1000 PHY Specific Control Register */ ++#define M88E1000_PSCR_POLARITY_REVERSAL 0x0002 /* 1=Polarity Reversal enabled */ ++#define M88E1000_PSCR_MDI_MANUAL_MODE 0x0000 /* MDI Crossover Mode bits 6:5 */ ++ /* Manual MDI configuration */ ++#define M88E1000_PSCR_MDIX_MANUAL_MODE 0x0020 /* Manual MDIX configuration */ ++/* 1000BASE-T: Auto crossover, 100BASE-TX/10BASE-T: MDI Mode */ ++#define M88E1000_PSCR_AUTO_X_1000T 0x0040 ++/* Auto crossover enabled all speeds */ ++#define M88E1000_PSCR_AUTO_X_MODE 0x0060 ++/* ++ * 1=Enable Extended 10BASE-T distance (Lower 10BASE-T Rx Threshold) ++ * 0=Normal 10BASE-T Rx Threshold ++ */ ++#define M88E1000_PSCR_ASSERT_CRS_ON_TX 0x0800 /* 1=Assert CRS on Transmit */ ++ ++/* M88E1000 PHY Specific Status Register */ ++#define M88E1000_PSSR_REV_POLARITY 0x0002 /* 1=Polarity reversed */ ++#define M88E1000_PSSR_DOWNSHIFT 0x0020 /* 1=Downshifted */ ++#define M88E1000_PSSR_MDIX 0x0040 /* 1=MDIX; 0=MDI */ ++/* 0=<50M; 1=50-80M; 2=80-110M; 3=110-140M; 4=>140M */ ++#define M88E1000_PSSR_CABLE_LENGTH 0x0380 ++#define M88E1000_PSSR_SPEED 0xC000 /* Speed, bits 14:15 */ ++#define M88E1000_PSSR_1000MBS 0x8000 /* 10=1000Mbs */ ++ ++#define M88E1000_PSSR_CABLE_LENGTH_SHIFT 7 ++ ++/* ++ * Number of times we will attempt to autonegotiate before downshifting if we ++ * are the master ++ */ ++#define M88E1000_EPSCR_MASTER_DOWNSHIFT_MASK 0x0C00 ++#define M88E1000_EPSCR_MASTER_DOWNSHIFT_1X 0x0000 ++/* ++ * Number of times we will attempt to autonegotiate before downshifting if we ++ * are the slave ++ */ ++#define M88E1000_EPSCR_SLAVE_DOWNSHIFT_MASK 0x0300 ++#define M88E1000_EPSCR_SLAVE_DOWNSHIFT_1X 0x0100 ++#define M88E1000_EPSCR_TX_CLK_25 0x0070 /* 25 MHz TX_CLK */ ++ ++/* M88EC018 Rev 2 specific DownShift settings */ ++#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_MASK 0x0E00 ++#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_5X 0x0800 ++ ++#define I82578_EPSCR_DOWNSHIFT_ENABLE 0x0020 ++#define I82578_EPSCR_DOWNSHIFT_COUNTER_MASK 0x001C ++ ++/* BME1000 PHY Specific Control Register */ ++#define BME1000_PSCR_ENABLE_DOWNSHIFT 0x0800 /* 1 = enable downshift */ ++ ++ ++#define PHY_PAGE_SHIFT 5 ++#define PHY_REG(page, reg) (((page) << PHY_PAGE_SHIFT) | \ ++ ((reg) & MAX_PHY_REG_ADDRESS)) ++ ++/* ++ * Bits... ++ * 15-5: page ++ * 4-0: register offset ++ */ ++#define GG82563_PAGE_SHIFT 5 ++#define GG82563_REG(page, reg) \ ++ (((page) << GG82563_PAGE_SHIFT) | ((reg) & MAX_PHY_REG_ADDRESS)) ++#define GG82563_MIN_ALT_REG 30 ++ ++/* GG82563 Specific Registers */ ++#define GG82563_PHY_SPEC_CTRL \ ++ GG82563_REG(0, 16) /* PHY Specific Control */ ++#define GG82563_PHY_PAGE_SELECT \ ++ GG82563_REG(0, 22) /* Page Select */ ++#define GG82563_PHY_SPEC_CTRL_2 \ ++ GG82563_REG(0, 26) /* PHY Specific Control 2 */ ++#define GG82563_PHY_PAGE_SELECT_ALT \ ++ GG82563_REG(0, 29) /* Alternate Page Select */ ++ ++#define GG82563_PHY_MAC_SPEC_CTRL \ ++ GG82563_REG(2, 21) /* MAC Specific Control Register */ ++ ++#define GG82563_PHY_DSP_DISTANCE \ ++ GG82563_REG(5, 26) /* DSP Distance */ ++ ++/* Page 193 - Port Control Registers */ ++#define GG82563_PHY_KMRN_MODE_CTRL \ ++ GG82563_REG(193, 16) /* Kumeran Mode Control */ ++#define GG82563_PHY_PWR_MGMT_CTRL \ ++ GG82563_REG(193, 20) /* Power Management Control */ ++ ++/* Page 194 - KMRN Registers */ ++#define GG82563_PHY_INBAND_CTRL \ ++ GG82563_REG(194, 18) /* Inband Control */ ++ ++/* MDI Control */ ++#define E1000_MDIC_REG_SHIFT 16 ++#define E1000_MDIC_PHY_SHIFT 21 ++#define E1000_MDIC_OP_WRITE 0x04000000 ++#define E1000_MDIC_OP_READ 0x08000000 ++#define E1000_MDIC_READY 0x10000000 ++#define E1000_MDIC_ERROR 0x40000000 ++ ++/* SerDes Control */ ++#define E1000_GEN_POLL_TIMEOUT 640 ++ ++/* FW Semaphore */ ++#define E1000_FWSM_WLOCK_MAC_MASK 0x0380 ++#define E1000_FWSM_WLOCK_MAC_SHIFT 7 ++ ++#endif /* _E1000_DEFINES_H_ */ +--- linux/drivers/xenomai/net/drivers/e1000e/netdev.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/e1000e/netdev.c 2021-04-07 16:01:27.175634241 +0800 +@@ -0,0 +1,4419 @@ ++/******************************************************************************* ++ ++ Intel PRO/1000 Linux driver ++ Copyright(c) 1999 - 2011 Intel Corporation. ++ ++ This program is free software; you can redistribute it and/or modify it ++ under the terms and conditions of the GNU General Public License, ++ version 2, as published by the Free Software Foundation. ++ ++ This program is distributed in the hope it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ You should have received a copy of the GNU General Public License along with ++ this program; if not, write to the Free Software Foundation, Inc., ++ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. ++ ++ The full GNU General Public License is included in this distribution in ++ the file called "COPYING". ++ ++ Contact Information: ++ Linux NICS ++ e1000-devel Mailing List ++ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 ++ ++*******************************************************************************/ ++ ++#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "e1000.h" ++ ++#define RT_E1000E_NUM_RXD 64 ++ ++#define DRV_EXTRAVERSION "-k-rt" ++ ++#define DRV_VERSION "1.5.1" DRV_EXTRAVERSION ++char e1000e_driver_name[] = "rt_e1000e"; ++const char e1000e_driver_version[] = DRV_VERSION; ++ ++static void e1000e_disable_aspm(struct pci_dev *pdev, u16 state); ++ ++static const struct e1000_info *e1000_info_tbl[] = { ++ [board_82571] = &e1000_82571_info, ++ [board_82572] = &e1000_82572_info, ++ [board_82573] = &e1000_82573_info, ++ [board_82574] = &e1000_82574_info, ++ [board_82583] = &e1000_82583_info, ++ [board_80003es2lan] = &e1000_es2_info, ++ [board_ich8lan] = &e1000_ich8_info, ++ [board_ich9lan] = &e1000_ich9_info, ++ [board_ich10lan] = &e1000_ich10_info, ++ [board_pchlan] = &e1000_pch_info, ++ [board_pch2lan] = &e1000_pch2_info, ++ [board_pch_lpt] = &e1000_pch_lpt_info, ++}; ++ ++struct e1000_reg_info { ++ u32 ofs; ++ char *name; ++}; ++ ++#define E1000_RDFH 0x02410 /* Rx Data FIFO Head - RW */ ++#define E1000_RDFT 0x02418 /* Rx Data FIFO Tail - RW */ ++#define E1000_RDFHS 0x02420 /* Rx Data FIFO Head Saved - RW */ ++#define E1000_RDFTS 0x02428 /* Rx Data FIFO Tail Saved - RW */ ++#define E1000_RDFPC 0x02430 /* Rx Data FIFO Packet Count - RW */ ++ ++#define E1000_TDFH 0x03410 /* Tx Data FIFO Head - RW */ ++#define E1000_TDFT 0x03418 /* Tx Data FIFO Tail - RW */ ++#define E1000_TDFHS 0x03420 /* Tx Data FIFO Head Saved - RW */ ++#define E1000_TDFTS 0x03428 /* Tx Data FIFO Tail Saved - RW */ ++#define E1000_TDFPC 0x03430 /* Tx Data FIFO Packet Count - RW */ ++ ++static const struct e1000_reg_info e1000_reg_info_tbl[] = { ++ ++ /* General Registers */ ++ {E1000_CTRL, "CTRL"}, ++ {E1000_STATUS, "STATUS"}, ++ {E1000_CTRL_EXT, "CTRL_EXT"}, ++ ++ /* Interrupt Registers */ ++ {E1000_ICR, "ICR"}, ++ ++ /* Rx Registers */ ++ {E1000_RCTL, "RCTL"}, ++ {E1000_RDLEN, "RDLEN"}, ++ {E1000_RDH, "RDH"}, ++ {E1000_RDT, "RDT"}, ++ {E1000_RDTR, "RDTR"}, ++ {E1000_RXDCTL(0), "RXDCTL"}, ++ {E1000_ERT, "ERT"}, ++ {E1000_RDBAL, "RDBAL"}, ++ {E1000_RDBAH, "RDBAH"}, ++ {E1000_RDFH, "RDFH"}, ++ {E1000_RDFT, "RDFT"}, ++ {E1000_RDFHS, "RDFHS"}, ++ {E1000_RDFTS, "RDFTS"}, ++ {E1000_RDFPC, "RDFPC"}, ++ ++ /* Tx Registers */ ++ {E1000_TCTL, "TCTL"}, ++ {E1000_TDBAL, "TDBAL"}, ++ {E1000_TDBAH, "TDBAH"}, ++ {E1000_TDLEN, "TDLEN"}, ++ {E1000_TDH, "TDH"}, ++ {E1000_TDT, "TDT"}, ++ {E1000_TIDV, "TIDV"}, ++ {E1000_TXDCTL(0), "TXDCTL"}, ++ {E1000_TADV, "TADV"}, ++ {E1000_TARC(0), "TARC"}, ++ {E1000_TDFH, "TDFH"}, ++ {E1000_TDFT, "TDFT"}, ++ {E1000_TDFHS, "TDFHS"}, ++ {E1000_TDFTS, "TDFTS"}, ++ {E1000_TDFPC, "TDFPC"}, ++ ++ /* List Terminator */ ++ {} ++}; ++ ++/* ++ * e1000_regdump - register printout routine ++ */ ++static void e1000_regdump(struct e1000_hw *hw, struct e1000_reg_info *reginfo) ++{ ++ int n = 0; ++ char rname[16]; ++ u32 regs[8]; ++ ++ switch (reginfo->ofs) { ++ case E1000_RXDCTL(0): ++ for (n = 0; n < 2; n++) ++ regs[n] = __er32(hw, E1000_RXDCTL(n)); ++ break; ++ case E1000_TXDCTL(0): ++ for (n = 0; n < 2; n++) ++ regs[n] = __er32(hw, E1000_TXDCTL(n)); ++ break; ++ case E1000_TARC(0): ++ for (n = 0; n < 2; n++) ++ regs[n] = __er32(hw, E1000_TARC(n)); ++ break; ++ default: ++ printk(KERN_INFO "%-15s %08x\n", ++ reginfo->name, __er32(hw, reginfo->ofs)); ++ return; ++ } ++ ++ snprintf(rname, 16, "%s%s", reginfo->name, "[0-1]"); ++ printk(KERN_INFO "%-15s ", rname); ++ for (n = 0; n < 2; n++) ++ printk(KERN_CONT "%08x ", regs[n]); ++ printk(KERN_CONT "\n"); ++} ++ ++/* ++ * e1000e_dump - Print registers, Tx-ring and Rx-ring ++ */ ++static void e1000e_dump(struct e1000_adapter *adapter) ++{ ++ struct rtnet_device *netdev = adapter->netdev; ++ struct e1000_hw *hw = &adapter->hw; ++ struct e1000_reg_info *reginfo; ++ struct e1000_ring *tx_ring = adapter->tx_ring; ++ struct e1000_tx_desc *tx_desc; ++ struct my_u0 { ++ u64 a; ++ u64 b; ++ } *u0; ++ struct e1000_buffer *buffer_info; ++ struct e1000_ring *rx_ring = adapter->rx_ring; ++ union e1000_rx_desc_packet_split *rx_desc_ps; ++ union e1000_rx_desc_extended *rx_desc; ++ struct my_u1 { ++ u64 a; ++ u64 b; ++ u64 c; ++ u64 d; ++ } *u1; ++ u32 staterr; ++ int i = 0; ++ ++ if (!netif_msg_hw(adapter)) ++ return; ++ ++ /* Print netdevice Info */ ++ if (netdev) { ++ dev_info(&adapter->pdev->dev, "Net device Info\n"); ++ printk(KERN_INFO "Device Name state " ++ "trans_start last_rx\n"); ++ printk(KERN_INFO "%-15s\n", netdev->name); ++ } ++ ++ /* Print Registers */ ++ dev_info(&adapter->pdev->dev, "Register Dump\n"); ++ printk(KERN_INFO " Register Name Value\n"); ++ for (reginfo = (struct e1000_reg_info *)e1000_reg_info_tbl; ++ reginfo->name; reginfo++) { ++ e1000_regdump(hw, reginfo); ++ } ++ ++ /* Print Tx Ring Summary */ ++ if (!netdev || !rtnetif_running(netdev)) ++ goto exit; ++ ++ dev_info(&adapter->pdev->dev, "Tx Ring Summary\n"); ++ printk(KERN_INFO "Queue [NTU] [NTC] [bi(ntc)->dma ]" ++ " leng ntw timestamp\n"); ++ buffer_info = &tx_ring->buffer_info[tx_ring->next_to_clean]; ++ printk(KERN_INFO " %5d %5X %5X %016llX %04X %3X %016llX\n", ++ 0, tx_ring->next_to_use, tx_ring->next_to_clean, ++ (unsigned long long)buffer_info->dma, ++ buffer_info->length, ++ buffer_info->next_to_watch, ++ (unsigned long long)buffer_info->time_stamp); ++ ++ /* Print Tx Ring */ ++ if (!netif_msg_tx_done(adapter)) ++ goto rx_ring_summary; ++ ++ dev_info(&adapter->pdev->dev, "Tx Ring Dump\n"); ++ ++ /* Transmit Descriptor Formats - DEXT[29] is 0 (Legacy) or 1 (Extended) ++ * ++ * Legacy Transmit Descriptor ++ * +--------------------------------------------------------------+ ++ * 0 | Buffer Address [63:0] (Reserved on Write Back) | ++ * +--------------------------------------------------------------+ ++ * 8 | Special | CSS | Status | CMD | CSO | Length | ++ * +--------------------------------------------------------------+ ++ * 63 48 47 36 35 32 31 24 23 16 15 0 ++ * ++ * Extended Context Descriptor (DTYP=0x0) for TSO or checksum offload ++ * 63 48 47 40 39 32 31 16 15 8 7 0 ++ * +----------------------------------------------------------------+ ++ * 0 | TUCSE | TUCS0 | TUCSS | IPCSE | IPCS0 | IPCSS | ++ * +----------------------------------------------------------------+ ++ * 8 | MSS | HDRLEN | RSV | STA | TUCMD | DTYP | PAYLEN | ++ * +----------------------------------------------------------------+ ++ * 63 48 47 40 39 36 35 32 31 24 23 20 19 0 ++ * ++ * Extended Data Descriptor (DTYP=0x1) ++ * +----------------------------------------------------------------+ ++ * 0 | Buffer Address [63:0] | ++ * +----------------------------------------------------------------+ ++ * 8 | VLAN tag | POPTS | Rsvd | Status | Command | DTYP | DTALEN | ++ * +----------------------------------------------------------------+ ++ * 63 48 47 40 39 36 35 32 31 24 23 20 19 0 ++ */ ++ printk(KERN_INFO "Tl[desc] [address 63:0 ] [SpeCssSCmCsLen]" ++ " [bi->dma ] leng ntw timestamp bi->skb " ++ "<-- Legacy format\n"); ++ printk(KERN_INFO "Tc[desc] [Ce CoCsIpceCoS] [MssHlRSCm0Plen]" ++ " [bi->dma ] leng ntw timestamp bi->skb " ++ "<-- Ext Context format\n"); ++ printk(KERN_INFO "Td[desc] [address 63:0 ] [VlaPoRSCm1Dlen]" ++ " [bi->dma ] leng ntw timestamp bi->skb " ++ "<-- Ext Data format\n"); ++ for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) { ++ tx_desc = E1000_TX_DESC(*tx_ring, i); ++ buffer_info = &tx_ring->buffer_info[i]; ++ u0 = (struct my_u0 *)tx_desc; ++ printk(KERN_INFO "T%c[0x%03X] %016llX %016llX %016llX " ++ "%04X %3X %016llX %p", ++ (!(le64_to_cpu(u0->b) & (1 << 29)) ? 'l' : ++ ((le64_to_cpu(u0->b) & (1 << 20)) ? 'd' : 'c')), i, ++ (unsigned long long)le64_to_cpu(u0->a), ++ (unsigned long long)le64_to_cpu(u0->b), ++ (unsigned long long)buffer_info->dma, ++ buffer_info->length, buffer_info->next_to_watch, ++ (unsigned long long)buffer_info->time_stamp, ++ buffer_info->skb); ++ if (i == tx_ring->next_to_use && i == tx_ring->next_to_clean) ++ printk(KERN_CONT " NTC/U\n"); ++ else if (i == tx_ring->next_to_use) ++ printk(KERN_CONT " NTU\n"); ++ else if (i == tx_ring->next_to_clean) ++ printk(KERN_CONT " NTC\n"); ++ else ++ printk(KERN_CONT "\n"); ++ ++ if (netif_msg_pktdata(adapter) && buffer_info->dma != 0) ++ print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, ++ 16, 1, phys_to_virt(buffer_info->dma), ++ buffer_info->length, true); ++ } ++ ++ /* Print Rx Ring Summary */ ++rx_ring_summary: ++ dev_info(&adapter->pdev->dev, "Rx Ring Summary\n"); ++ printk(KERN_INFO "Queue [NTU] [NTC]\n"); ++ printk(KERN_INFO " %5d %5X %5X\n", 0, ++ rx_ring->next_to_use, rx_ring->next_to_clean); ++ ++ /* Print Rx Ring */ ++ if (!netif_msg_rx_status(adapter)) ++ goto exit; ++ ++ dev_info(&adapter->pdev->dev, "Rx Ring Dump\n"); ++ switch (adapter->rx_ps_pages) { ++ case 1: ++ case 2: ++ case 3: ++ /* [Extended] Packet Split Receive Descriptor Format ++ * ++ * +-----------------------------------------------------+ ++ * 0 | Buffer Address 0 [63:0] | ++ * +-----------------------------------------------------+ ++ * 8 | Buffer Address 1 [63:0] | ++ * +-----------------------------------------------------+ ++ * 16 | Buffer Address 2 [63:0] | ++ * +-----------------------------------------------------+ ++ * 24 | Buffer Address 3 [63:0] | ++ * +-----------------------------------------------------+ ++ */ ++ printk(KERN_INFO "R [desc] [buffer 0 63:0 ] " ++ "[buffer 1 63:0 ] " ++ "[buffer 2 63:0 ] [buffer 3 63:0 ] [bi->dma ] " ++ "[bi->skb] <-- Ext Pkt Split format\n"); ++ /* [Extended] Receive Descriptor (Write-Back) Format ++ * ++ * 63 48 47 32 31 13 12 8 7 4 3 0 ++ * +------------------------------------------------------+ ++ * 0 | Packet | IP | Rsvd | MRQ | Rsvd | MRQ RSS | ++ * | Checksum | Ident | | Queue | | Type | ++ * +------------------------------------------------------+ ++ * 8 | VLAN Tag | Length | Extended Error | Extended Status | ++ * +------------------------------------------------------+ ++ * 63 48 47 32 31 20 19 0 ++ */ ++ printk(KERN_INFO "RWB[desc] [ck ipid mrqhsh] " ++ "[vl l0 ee es] " ++ "[ l3 l2 l1 hs] [reserved ] ---------------- " ++ "[bi->skb] <-- Ext Rx Write-Back format\n"); ++ for (i = 0; i < rx_ring->count; i++) { ++ buffer_info = &rx_ring->buffer_info[i]; ++ rx_desc_ps = E1000_RX_DESC_PS(*rx_ring, i); ++ u1 = (struct my_u1 *)rx_desc_ps; ++ staterr = ++ le32_to_cpu(rx_desc_ps->wb.middle.status_error); ++ if (staterr & E1000_RXD_STAT_DD) { ++ /* Descriptor Done */ ++ printk(KERN_INFO "RWB[0x%03X] %016llX " ++ "%016llX %016llX %016llX " ++ "---------------- %p", i, ++ (unsigned long long)le64_to_cpu(u1->a), ++ (unsigned long long)le64_to_cpu(u1->b), ++ (unsigned long long)le64_to_cpu(u1->c), ++ (unsigned long long)le64_to_cpu(u1->d), ++ buffer_info->skb); ++ } else { ++ printk(KERN_INFO "R [0x%03X] %016llX " ++ "%016llX %016llX %016llX %016llX %p", i, ++ (unsigned long long)le64_to_cpu(u1->a), ++ (unsigned long long)le64_to_cpu(u1->b), ++ (unsigned long long)le64_to_cpu(u1->c), ++ (unsigned long long)le64_to_cpu(u1->d), ++ (unsigned long long)buffer_info->dma, ++ buffer_info->skb); ++ ++ if (netif_msg_pktdata(adapter)) ++ print_hex_dump(KERN_INFO, "", ++ DUMP_PREFIX_ADDRESS, 16, 1, ++ phys_to_virt(buffer_info->dma), ++ adapter->rx_ps_bsize0, true); ++ } ++ ++ if (i == rx_ring->next_to_use) ++ printk(KERN_CONT " NTU\n"); ++ else if (i == rx_ring->next_to_clean) ++ printk(KERN_CONT " NTC\n"); ++ else ++ printk(KERN_CONT "\n"); ++ } ++ break; ++ default: ++ case 0: ++ /* Extended Receive Descriptor (Read) Format ++ * ++ * +-----------------------------------------------------+ ++ * 0 | Buffer Address [63:0] | ++ * +-----------------------------------------------------+ ++ * 8 | Reserved | ++ * +-----------------------------------------------------+ ++ */ ++ printk(KERN_INFO "R [desc] [buf addr 63:0 ] " ++ "[reserved 63:0 ] [bi->dma ] " ++ "[bi->skb] <-- Ext (Read) format\n"); ++ /* Extended Receive Descriptor (Write-Back) Format ++ * ++ * 63 48 47 32 31 24 23 4 3 0 ++ * +------------------------------------------------------+ ++ * | RSS Hash | | | | ++ * 0 +-------------------+ Rsvd | Reserved | MRQ RSS | ++ * | Packet | IP | | | Type | ++ * | Checksum | Ident | | | | ++ * +------------------------------------------------------+ ++ * 8 | VLAN Tag | Length | Extended Error | Extended Status | ++ * +------------------------------------------------------+ ++ * 63 48 47 32 31 20 19 0 ++ */ ++ printk(KERN_INFO "RWB[desc] [cs ipid mrq] " ++ "[vt ln xe xs] " ++ "[bi->skb] <-- Ext (Write-Back) format\n"); ++ ++ for (i = 0; i < rx_ring->count; i++) { ++ buffer_info = &rx_ring->buffer_info[i]; ++ rx_desc = E1000_RX_DESC_EXT(*rx_ring, i); ++ u1 = (struct my_u1 *)rx_desc; ++ staterr = le32_to_cpu(rx_desc->wb.upper.status_error); ++ if (staterr & E1000_RXD_STAT_DD) { ++ /* Descriptor Done */ ++ printk(KERN_INFO "RWB[0x%03X] %016llX " ++ "%016llX ---------------- %p", i, ++ (unsigned long long)le64_to_cpu(u1->a), ++ (unsigned long long)le64_to_cpu(u1->b), ++ buffer_info->skb); ++ } else { ++ printk(KERN_INFO "R [0x%03X] %016llX " ++ "%016llX %016llX %p", i, ++ (unsigned long long)le64_to_cpu(u1->a), ++ (unsigned long long)le64_to_cpu(u1->b), ++ (unsigned long long)buffer_info->dma, ++ buffer_info->skb); ++ ++ if (netif_msg_pktdata(adapter)) ++ print_hex_dump(KERN_INFO, "", ++ DUMP_PREFIX_ADDRESS, 16, ++ 1, ++ phys_to_virt ++ (buffer_info->dma), ++ adapter->rx_buffer_len, ++ true); ++ } ++ ++ if (i == rx_ring->next_to_use) ++ printk(KERN_CONT " NTU\n"); ++ else if (i == rx_ring->next_to_clean) ++ printk(KERN_CONT " NTC\n"); ++ else ++ printk(KERN_CONT "\n"); ++ } ++ } ++ ++exit: ++ return; ++} ++ ++void e1000e_mod_watchdog_timer(rtdm_nrtsig_t *nrt_sig, void *data) ++{ ++ struct timer_list *timer = data; ++ ++ mod_timer(timer, jiffies + 1); ++} ++ ++void e1000e_trigger_downshift(rtdm_nrtsig_t *nrt_sig, void *data) ++{ ++ struct work_struct *downshift_task = data; ++ ++ schedule_work(downshift_task); ++} ++ ++/** ++ * e1000_desc_unused - calculate if we have unused descriptors ++ **/ ++static int e1000_desc_unused(struct e1000_ring *ring) ++{ ++ if (ring->next_to_clean > ring->next_to_use) ++ return ring->next_to_clean - ring->next_to_use - 1; ++ ++ return ring->count + ring->next_to_clean - ring->next_to_use - 1; ++} ++ ++/** ++ * e1000_rx_checksum - Receive Checksum Offload ++ * @adapter: board private structure ++ * @status_err: receive descriptor status and error fields ++ * @csum: receive descriptor csum field ++ * @sk_buff: socket buffer with received data ++ **/ ++static void e1000_rx_checksum(struct e1000_adapter *adapter, u32 status_err, ++ u32 csum, struct rtskb *skb) ++{ ++ u16 status = (u16)status_err; ++ u8 errors = (u8)(status_err >> 24); ++ ++ /* Ignore Checksum bit is set */ ++ if (status & E1000_RXD_STAT_IXSM) ++ return; ++ /* TCP/UDP checksum error bit is set */ ++ if (errors & E1000_RXD_ERR_TCPE) { ++ /* let the stack verify checksum errors */ ++ adapter->hw_csum_err++; ++ return; ++ } ++ ++ /* TCP/UDP Checksum has not been calculated */ ++ if (!(status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))) ++ return; ++ ++ /* It must be a TCP or UDP packet with a valid checksum */ ++ if (status & E1000_RXD_STAT_TCPCS) { ++ /* TCP checksum is good */ ++ skb->ip_summed = CHECKSUM_UNNECESSARY; ++ } else { ++ /* ++ * IP fragment with UDP payload ++ * Hardware complements the payload checksum, so we undo it ++ * and then put the value in host order for further stack use. ++ */ ++ __sum16 sum = (__force __sum16)htons(csum); ++ skb->csum = csum_unfold(~sum); ++ skb->ip_summed = CHECKSUM_COMPLETE; ++ } ++ adapter->hw_csum_good++; ++} ++ ++/** ++ * e1000e_update_tail_wa - helper function for e1000e_update_[rt]dt_wa() ++ * @hw: pointer to the HW structure ++ * @tail: address of tail descriptor register ++ * @i: value to write to tail descriptor register ++ * ++ * When updating the tail register, the ME could be accessing Host CSR ++ * registers at the same time. Normally, this is handled in h/w by an ++ * arbiter but on some parts there is a bug that acknowledges Host accesses ++ * later than it should which could result in the descriptor register to ++ * have an incorrect value. Workaround this by checking the FWSM register ++ * which has bit 24 set while ME is accessing Host CSR registers, wait ++ * if it is set and try again a number of times. ++ **/ ++static inline s32 e1000e_update_tail_wa(struct e1000_hw *hw, u8 __iomem * tail, ++ unsigned int i) ++{ ++ unsigned int j = 0; ++ ++ while ((j++ < E1000_ICH_FWSM_PCIM2PCI_COUNT) && ++ (er32(FWSM) & E1000_ICH_FWSM_PCIM2PCI)) ++ udelay(50); ++ ++ writel(i, tail); ++ ++ if ((j == E1000_ICH_FWSM_PCIM2PCI_COUNT) && (i != readl(tail))) ++ return E1000_ERR_SWFW_SYNC; ++ ++ return 0; ++} ++ ++static void e1000e_update_rdt_wa(struct e1000_adapter *adapter, unsigned int i) ++{ ++ u8 __iomem *tail = (adapter->hw.hw_addr + adapter->rx_ring->tail); ++ struct e1000_hw *hw = &adapter->hw; ++ ++ if (e1000e_update_tail_wa(hw, tail, i)) { ++ u32 rctl = er32(RCTL); ++ ew32(RCTL, rctl & ~E1000_RCTL_EN); ++ e_err("ME firmware caused invalid RDT - resetting\n"); ++ rtdm_schedule_nrt_work(&adapter->reset_task); ++ } ++} ++ ++static void e1000e_update_tdt_wa(struct e1000_adapter *adapter, unsigned int i) ++{ ++ u8 __iomem *tail = (adapter->hw.hw_addr + adapter->tx_ring->tail); ++ struct e1000_hw *hw = &adapter->hw; ++ ++ if (e1000e_update_tail_wa(hw, tail, i)) { ++ u32 tctl = er32(TCTL); ++ ew32(TCTL, tctl & ~E1000_TCTL_EN); ++ e_err("ME firmware caused invalid TDT - resetting\n"); ++ rtdm_schedule_nrt_work(&adapter->reset_task); ++ } ++} ++ ++/** ++ * e1000_alloc_rx_buffers - Replace used receive buffers ++ * @adapter: address of board private structure ++ **/ ++static void e1000_alloc_rx_buffers(struct e1000_adapter *adapter, ++ int cleaned_count, gfp_t gfp) ++{ ++ struct e1000_ring *rx_ring = adapter->rx_ring; ++ union e1000_rx_desc_extended *rx_desc; ++ struct e1000_buffer *buffer_info; ++ struct rtskb *skb; ++ unsigned int i; ++ unsigned int bufsz = adapter->rx_buffer_len; ++ ++ i = rx_ring->next_to_use; ++ buffer_info = &rx_ring->buffer_info[i]; ++ ++ while (cleaned_count--) { ++ skb = buffer_info->skb; ++ if (skb) { ++ rtskb_trim(skb, 0); ++ goto map_skb; ++ } ++ ++ skb = rtnetdev_alloc_rtskb(adapter->netdev, bufsz); ++ if (!skb) { ++ /* Better luck next round */ ++ adapter->alloc_rx_buff_failed++; ++ break; ++ } ++ rtskb_reserve(skb, NET_IP_ALIGN); ++ ++ buffer_info->skb = skb; ++map_skb: ++ buffer_info->dma = rtskb_data_dma_addr(skb, 0); ++ ++ rx_desc = E1000_RX_DESC_EXT(*rx_ring, i); ++ rx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma); ++ ++ if (unlikely(!(i & (E1000_RX_BUFFER_WRITE - 1)))) { ++ /* ++ * Force memory writes to complete before letting h/w ++ * know there are new descriptors to fetch. (Only ++ * applicable for weak-ordered memory model archs, ++ * such as IA-64). ++ */ ++ wmb(); ++ if (adapter->flags2 & FLAG2_PCIM2PCI_ARBITER_WA) ++ e1000e_update_rdt_wa(adapter, i); ++ else ++ writel(i, adapter->hw.hw_addr + rx_ring->tail); ++ } ++ i++; ++ if (i == rx_ring->count) ++ i = 0; ++ buffer_info = &rx_ring->buffer_info[i]; ++ } ++ ++ rx_ring->next_to_use = i; ++} ++ ++/** ++ * e1000_clean_rx_irq - Send received data up the network stack; legacy ++ * @adapter: board private structure ++ * ++ * the return value indicates whether actual cleaning was done, there ++ * is no guarantee that everything was cleaned ++ **/ ++static bool e1000_clean_rx_irq(struct e1000_adapter *adapter, ++ nanosecs_abs_t *time_stamp) ++{ ++ struct rtnet_device *netdev = adapter->netdev; ++ struct e1000_ring *rx_ring = adapter->rx_ring; ++ union e1000_rx_desc_extended *rx_desc, *next_rxd; ++ struct e1000_buffer *buffer_info, *next_buffer; ++ u32 length, staterr; ++ unsigned int i; ++ int cleaned_count = 0; ++ bool data_received = false; ++ unsigned int total_rx_bytes = 0, total_rx_packets = 0; ++ ++ i = rx_ring->next_to_clean; ++ rx_desc = E1000_RX_DESC_EXT(*rx_ring, i); ++ staterr = le32_to_cpu(rx_desc->wb.upper.status_error); ++ buffer_info = &rx_ring->buffer_info[i]; ++ ++ while (staterr & E1000_RXD_STAT_DD) { ++ struct rtskb *skb; ++ ++ rmb(); /* read descriptor and rx_buffer_info after status DD */ ++ ++ skb = buffer_info->skb; ++ buffer_info->skb = NULL; ++ ++ prefetch(skb->data - NET_IP_ALIGN); ++ ++ i++; ++ if (i == rx_ring->count) ++ i = 0; ++ next_rxd = E1000_RX_DESC_EXT(*rx_ring, i); ++ prefetch(next_rxd); ++ ++ next_buffer = &rx_ring->buffer_info[i]; ++ ++ cleaned_count++; ++ buffer_info->dma = 0; ++ ++ length = le16_to_cpu(rx_desc->wb.upper.length); ++ ++ /* ++ * !EOP means multiple descriptors were used to store a single ++ * packet, if that's the case we need to toss it. In fact, we ++ * need to toss every packet with the EOP bit clear and the ++ * next frame that _does_ have the EOP bit set, as it is by ++ * definition only a frame fragment ++ */ ++ if (unlikely(!(staterr & E1000_RXD_STAT_EOP))) ++ adapter->flags2 |= FLAG2_IS_DISCARDING; ++ ++ if (adapter->flags2 & FLAG2_IS_DISCARDING) { ++ /* All receives must fit into a single buffer */ ++ e_dbg("Receive packet consumed multiple buffers\n"); ++ /* recycle */ ++ buffer_info->skb = skb; ++ if (staterr & E1000_RXD_STAT_EOP) ++ adapter->flags2 &= ~FLAG2_IS_DISCARDING; ++ goto next_desc; ++ } ++ ++ if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) { ++ /* recycle */ ++ buffer_info->skb = skb; ++ goto next_desc; ++ } ++ ++ /* adjust length to remove Ethernet CRC */ ++ if (!(adapter->flags2 & FLAG2_CRC_STRIPPING)) ++ length -= 4; ++ ++ total_rx_bytes += length; ++ total_rx_packets++; ++ ++ rtskb_put(skb, length); ++ ++ /* Receive Checksum Offload */ ++ e1000_rx_checksum(adapter, staterr, ++ le16_to_cpu(rx_desc->wb.lower.hi_dword. ++ csum_ip.csum), skb); ++ ++ skb->protocol = rt_eth_type_trans(skb, netdev); ++ skb->time_stamp = *time_stamp; ++ rtnetif_rx(skb); ++ data_received = true; ++ ++next_desc: ++ rx_desc->wb.upper.status_error &= cpu_to_le32(~0xFF); ++ ++ /* return some buffers to hardware, one at a time is too slow */ ++ if (cleaned_count >= E1000_RX_BUFFER_WRITE) { ++ adapter->alloc_rx_buf(adapter, cleaned_count, ++ GFP_ATOMIC); ++ cleaned_count = 0; ++ } ++ ++ /* use prefetched values */ ++ rx_desc = next_rxd; ++ buffer_info = next_buffer; ++ ++ staterr = le32_to_cpu(rx_desc->wb.upper.status_error); ++ } ++ rx_ring->next_to_clean = i; ++ ++ cleaned_count = e1000_desc_unused(rx_ring); ++ if (cleaned_count) ++ adapter->alloc_rx_buf(adapter, cleaned_count, GFP_ATOMIC); ++ ++ adapter->total_rx_bytes += total_rx_bytes; ++ adapter->total_rx_packets += total_rx_packets; ++ return data_received; ++} ++ ++static void e1000_put_txbuf(struct e1000_adapter *adapter, ++ struct e1000_buffer *buffer_info) ++{ ++ buffer_info->dma = 0; ++ if (buffer_info->skb) { ++ kfree_rtskb(buffer_info->skb); ++ buffer_info->skb = NULL; ++ } ++ buffer_info->time_stamp = 0; ++} ++ ++/** ++ * e1000_clean_tx_irq - Reclaim resources after transmit completes ++ * @adapter: board private structure ++ * ++ * the return value indicates whether actual cleaning was done, there ++ * is no guarantee that everything was cleaned ++ **/ ++static bool e1000_clean_tx_irq(struct e1000_adapter *adapter) ++{ ++ struct rtnet_device *netdev = adapter->netdev; ++ struct e1000_hw *hw = &adapter->hw; ++ struct e1000_ring *tx_ring = adapter->tx_ring; ++ struct e1000_tx_desc *tx_desc, *eop_desc; ++ struct e1000_buffer *buffer_info; ++ unsigned int i, eop; ++ unsigned int count = 0; ++ unsigned int total_tx_bytes = 0, total_tx_packets = 0; ++ ++ i = tx_ring->next_to_clean; ++ eop = tx_ring->buffer_info[i].next_to_watch; ++ eop_desc = E1000_TX_DESC(*tx_ring, eop); ++ ++ while ((eop_desc->upper.data & cpu_to_le32(E1000_TXD_STAT_DD)) && ++ (count < tx_ring->count)) { ++ bool cleaned = false; ++ rmb(); /* read buffer_info after eop_desc */ ++ for (; !cleaned; count++) { ++ tx_desc = E1000_TX_DESC(*tx_ring, i); ++ buffer_info = &tx_ring->buffer_info[i]; ++ cleaned = (i == eop); ++ ++ if (cleaned) { ++ total_tx_packets += buffer_info->segs; ++ total_tx_bytes += buffer_info->bytecount; ++ } ++ ++ e1000_put_txbuf(adapter, buffer_info); ++ tx_desc->upper.data = 0; ++ ++ i++; ++ if (i == tx_ring->count) ++ i = 0; ++ } ++ ++ if (i == tx_ring->next_to_use) ++ break; ++ eop = tx_ring->buffer_info[i].next_to_watch; ++ eop_desc = E1000_TX_DESC(*tx_ring, eop); ++ } ++ ++ tx_ring->next_to_clean = i; ++ ++#define TX_WAKE_THRESHOLD 32 ++ if (count && rtnetif_carrier_ok(netdev) && ++ e1000_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD) { ++ /* Make sure that anybody stopping the queue after this ++ * sees the new next_to_clean. ++ */ ++ smp_mb(); ++ ++ if (rtnetif_queue_stopped(netdev) && ++ !(test_bit(__E1000_DOWN, &adapter->state))) { ++ rtnetif_wake_queue(netdev); ++ ++adapter->restart_queue; ++ } ++ } ++ ++ if (adapter->detect_tx_hung) { ++ /* ++ * Detect a transmit hang in hardware, this serializes the ++ * check with the clearing of time_stamp and movement of i ++ */ ++ adapter->detect_tx_hung = 0; ++ if (tx_ring->buffer_info[i].time_stamp && ++ time_after(jiffies, tx_ring->buffer_info[i].time_stamp ++ + (adapter->tx_timeout_factor * HZ)) && ++ !(er32(STATUS) & E1000_STATUS_TXOFF)) { ++ rtnetif_stop_queue(netdev); ++ } ++ } ++ adapter->total_tx_bytes += total_tx_bytes; ++ adapter->total_tx_packets += total_tx_packets; ++ return count < tx_ring->count; ++} ++ ++/** ++ * e1000_clean_rx_ring - Free Rx Buffers per Queue ++ * @adapter: board private structure ++ **/ ++static void e1000_clean_rx_ring(struct e1000_adapter *adapter) ++{ ++ struct e1000_ring *rx_ring = adapter->rx_ring; ++ struct e1000_buffer *buffer_info; ++ unsigned int i; ++ ++ /* Free all the Rx ring sk_buffs */ ++ for (i = 0; i < rx_ring->count; i++) { ++ buffer_info = &rx_ring->buffer_info[i]; ++ buffer_info->dma = 0; ++ ++ if (buffer_info->skb) { ++ kfree_rtskb(buffer_info->skb); ++ buffer_info->skb = NULL; ++ } ++ } ++ ++ /* there also may be some cached data from a chained receive */ ++ if (rx_ring->rx_skb_top) { ++ kfree_rtskb(rx_ring->rx_skb_top); ++ rx_ring->rx_skb_top = NULL; ++ } ++ ++ /* Zero out the descriptor ring */ ++ memset(rx_ring->desc, 0, rx_ring->size); ++ ++ rx_ring->next_to_clean = 0; ++ rx_ring->next_to_use = 0; ++ adapter->flags2 &= ~FLAG2_IS_DISCARDING; ++ ++ writel(0, adapter->hw.hw_addr + rx_ring->head); ++ writel(0, adapter->hw.hw_addr + rx_ring->tail); ++} ++ ++static void e1000e_downshift_workaround(struct work_struct *work) ++{ ++ struct e1000_adapter *adapter = container_of(work, ++ struct e1000_adapter, downshift_task); ++ ++ if (test_bit(__E1000_DOWN, &adapter->state)) ++ return; ++ ++ e1000e_gig_downshift_workaround_ich8lan(&adapter->hw); ++} ++ ++/** ++ * e1000_intr_msi - Interrupt Handler ++ * @irq: interrupt number ++ * @data: pointer to a network interface device structure ++ **/ ++static int e1000_intr_msi(rtdm_irq_t *irq_handle) ++{ ++ struct e1000_adapter *adapter = ++ rtdm_irq_get_arg(irq_handle, struct e1000_adapter); ++ struct e1000_hw *hw = &adapter->hw; ++ nanosecs_abs_t time_stamp = rtdm_clock_read(); ++ u32 icr = er32(ICR); ++ ++ /* ++ * read ICR disables interrupts using IAM ++ */ ++ ++ if (icr & E1000_ICR_LSC) { ++ hw->mac.get_link_status = 1; ++ /* ++ * ICH8 workaround-- Call gig speed drop workaround on cable ++ * disconnect (LSC) before accessing any PHY registers ++ */ ++ if ((adapter->flags & FLAG_LSC_GIG_SPEED_DROP) && ++ (!(er32(STATUS) & E1000_STATUS_LU))) ++ rtdm_schedule_nrt_work(&adapter->downshift_task); ++ ++ /* ++ * 80003ES2LAN workaround-- For packet buffer work-around on ++ * link down event; disable receives here in the ISR and reset ++ * adapter in watchdog ++ */ ++ if (rtnetif_carrier_ok(adapter->netdev) && ++ adapter->flags & FLAG_RX_NEEDS_RESTART) { ++ /* disable receives */ ++ u32 rctl = er32(RCTL); ++ ew32(RCTL, rctl & ~E1000_RCTL_EN); ++ adapter->flags |= FLAG_RX_RESTART_NOW; ++ } ++ /* guard against interrupt when we're going down */ ++ if (!test_bit(__E1000_DOWN, &adapter->state)) ++ rtdm_nrtsig_pend(&adapter->mod_timer_sig); ++ } ++ ++ if (!e1000_clean_tx_irq(adapter)) ++ /* Ring was not completely cleaned, so fire another interrupt */ ++ ew32(ICS, adapter->tx_ring->ims_val); ++ ++ if (e1000_clean_rx_irq(adapter, &time_stamp)) ++ rt_mark_stack_mgr(adapter->netdev); ++ ++ return RTDM_IRQ_HANDLED; ++} ++ ++/** ++ * e1000_intr - Interrupt Handler ++ * @irq: interrupt number ++ * @data: pointer to a network interface device structure ++ **/ ++static int e1000_intr(rtdm_irq_t *irq_handle) ++{ ++ struct e1000_adapter *adapter = ++ rtdm_irq_get_arg(irq_handle, struct e1000_adapter); ++ struct e1000_hw *hw = &adapter->hw; ++ nanosecs_abs_t time_stamp = rtdm_clock_read(); ++ u32 rctl, icr = er32(ICR); ++ ++ if (!icr || test_bit(__E1000_DOWN, &adapter->state)) ++ return RTDM_IRQ_NONE; /* Not our interrupt */ ++ ++ /* ++ * IMS will not auto-mask if INT_ASSERTED is not set, and if it is ++ * not set, then the adapter didn't send an interrupt ++ */ ++ if (!(icr & E1000_ICR_INT_ASSERTED)) ++ return RTDM_IRQ_NONE; ++ ++ /* ++ * Interrupt Auto-Mask...upon reading ICR, ++ * interrupts are masked. No need for the ++ * IMC write ++ */ ++ ++ if (icr & E1000_ICR_LSC) { ++ hw->mac.get_link_status = 1; ++ /* ++ * ICH8 workaround-- Call gig speed drop workaround on cable ++ * disconnect (LSC) before accessing any PHY registers ++ */ ++ if ((adapter->flags & FLAG_LSC_GIG_SPEED_DROP) && ++ (!(er32(STATUS) & E1000_STATUS_LU))) ++ rtdm_nrtsig_pend(&adapter->downshift_sig); ++ ++ /* ++ * 80003ES2LAN workaround-- ++ * For packet buffer work-around on link down event; ++ * disable receives here in the ISR and ++ * reset adapter in watchdog ++ */ ++ if (rtnetif_carrier_ok(adapter->netdev) && ++ (adapter->flags & FLAG_RX_NEEDS_RESTART)) { ++ /* disable receives */ ++ rctl = er32(RCTL); ++ ew32(RCTL, rctl & ~E1000_RCTL_EN); ++ adapter->flags |= FLAG_RX_RESTART_NOW; ++ } ++ /* guard against interrupt when we're going down */ ++ if (!test_bit(__E1000_DOWN, &adapter->state)) ++ rtdm_nrtsig_pend(&adapter->mod_timer_sig); ++ } ++ ++ if (!e1000_clean_tx_irq(adapter)) ++ /* Ring was not completely cleaned, so fire another interrupt */ ++ ew32(ICS, adapter->tx_ring->ims_val); ++ ++ if (e1000_clean_rx_irq(adapter, &time_stamp)) ++ rt_mark_stack_mgr(adapter->netdev); ++ ++ return RTDM_IRQ_HANDLED; ++} ++ ++static irqreturn_t e1000_msix_other(int irq, void *data) ++{ ++ struct rtnet_device *netdev = data; ++ struct e1000_adapter *adapter = netdev->priv; ++ struct e1000_hw *hw = &adapter->hw; ++ u32 icr = er32(ICR); ++ ++ if (!(icr & E1000_ICR_INT_ASSERTED)) { ++ if (!test_bit(__E1000_DOWN, &adapter->state)) ++ ew32(IMS, E1000_IMS_OTHER); ++ return IRQ_NONE; ++ } ++ ++ if (icr & adapter->eiac_mask) ++ ew32(ICS, (icr & adapter->eiac_mask)); ++ ++ if (icr & E1000_ICR_OTHER) { ++ if (!(icr & E1000_ICR_LSC)) ++ goto no_link_interrupt; ++ hw->mac.get_link_status = 1; ++ /* guard against interrupt when we're going down */ ++ if (!test_bit(__E1000_DOWN, &adapter->state)) ++ mod_timer(&adapter->watchdog_timer, jiffies + 1); ++ } ++ ++no_link_interrupt: ++ if (!test_bit(__E1000_DOWN, &adapter->state)) ++ ew32(IMS, E1000_IMS_LSC | E1000_IMS_OTHER); ++ ++ return IRQ_HANDLED; ++} ++ ++ ++static int e1000_intr_msix_tx(rtdm_irq_t *irq_handle) ++{ ++ struct e1000_adapter *adapter = ++ rtdm_irq_get_arg(irq_handle, struct e1000_adapter); ++ struct e1000_hw *hw = &adapter->hw; ++ struct e1000_ring *tx_ring = adapter->tx_ring; ++ ++ ++ adapter->total_tx_bytes = 0; ++ adapter->total_tx_packets = 0; ++ ++ if (!e1000_clean_tx_irq(adapter)) ++ /* Ring was not completely cleaned, so fire another interrupt */ ++ ew32(ICS, tx_ring->ims_val); ++ ++ return RTDM_IRQ_HANDLED; ++} ++ ++static int e1000_intr_msix_rx(rtdm_irq_t *irq_handle) ++{ ++ struct e1000_adapter *adapter = ++ rtdm_irq_get_arg(irq_handle, struct e1000_adapter); ++ nanosecs_abs_t time_stamp = rtdm_clock_read(); ++ ++ /* Write the ITR value calculated at the end of the ++ * previous interrupt. ++ */ ++ if (adapter->rx_ring->set_itr) { ++ writel(1000000000 / (adapter->rx_ring->itr_val * 256), ++ adapter->hw.hw_addr + adapter->rx_ring->itr_register); ++ adapter->rx_ring->set_itr = 0; ++ } ++ ++ if (e1000_clean_rx_irq(adapter, &time_stamp)) ++ rt_mark_stack_mgr(adapter->netdev); ++ ++ return RTDM_IRQ_HANDLED; ++} ++ ++/** ++ * e1000_configure_msix - Configure MSI-X hardware ++ * ++ * e1000_configure_msix sets up the hardware to properly ++ * generate MSI-X interrupts. ++ **/ ++static void e1000_configure_msix(struct e1000_adapter *adapter) ++{ ++ struct e1000_hw *hw = &adapter->hw; ++ struct e1000_ring *rx_ring = adapter->rx_ring; ++ struct e1000_ring *tx_ring = adapter->tx_ring; ++ int vector = 0; ++ u32 ctrl_ext, ivar = 0; ++ ++ adapter->eiac_mask = 0; ++ ++ /* Workaround issue with spurious interrupts on 82574 in MSI-X mode */ ++ if (hw->mac.type == e1000_82574) { ++ u32 rfctl = er32(RFCTL); ++ rfctl |= E1000_RFCTL_ACK_DIS; ++ ew32(RFCTL, rfctl); ++ } ++ ++#define E1000_IVAR_INT_ALLOC_VALID 0x8 ++ /* Configure Rx vector */ ++ rx_ring->ims_val = E1000_IMS_RXQ0; ++ adapter->eiac_mask |= rx_ring->ims_val; ++ if (rx_ring->itr_val) ++ writel(1000000000 / (rx_ring->itr_val * 256), ++ hw->hw_addr + rx_ring->itr_register); ++ else ++ writel(1, hw->hw_addr + rx_ring->itr_register); ++ ivar = E1000_IVAR_INT_ALLOC_VALID | vector; ++ ++ /* Configure Tx vector */ ++ tx_ring->ims_val = E1000_IMS_TXQ0; ++ vector++; ++ if (tx_ring->itr_val) ++ writel(1000000000 / (tx_ring->itr_val * 256), ++ hw->hw_addr + tx_ring->itr_register); ++ else ++ writel(1, hw->hw_addr + tx_ring->itr_register); ++ adapter->eiac_mask |= tx_ring->ims_val; ++ ivar |= ((E1000_IVAR_INT_ALLOC_VALID | vector) << 8); ++ ++ /* set vector for Other Causes, e.g. link changes */ ++ vector++; ++ ivar |= ((E1000_IVAR_INT_ALLOC_VALID | vector) << 16); ++ if (rx_ring->itr_val) ++ writel(1000000000 / (rx_ring->itr_val * 256), ++ hw->hw_addr + E1000_EITR_82574(vector)); ++ else ++ writel(1, hw->hw_addr + E1000_EITR_82574(vector)); ++ ++ /* Cause Tx interrupts on every write back */ ++ ivar |= (1 << 31); ++ ++ ew32(IVAR, ivar); ++ ++ /* enable MSI-X PBA support */ ++ ctrl_ext = er32(CTRL_EXT); ++ ctrl_ext |= E1000_CTRL_EXT_PBA_CLR; ++ ++ /* Auto-Mask Other interrupts upon ICR read */ ++#define E1000_EIAC_MASK_82574 0x01F00000 ++ ew32(IAM, ~E1000_EIAC_MASK_82574 | E1000_IMS_OTHER); ++ ctrl_ext |= E1000_CTRL_EXT_EIAME; ++ ew32(CTRL_EXT, ctrl_ext); ++ e1e_flush(); ++} ++ ++void e1000e_reset_interrupt_capability(struct e1000_adapter *adapter) ++{ ++ if (adapter->msix_entries) { ++ pci_disable_msix(adapter->pdev); ++ kfree(adapter->msix_entries); ++ adapter->msix_entries = NULL; ++ } else if (adapter->flags & FLAG_MSI_ENABLED) { ++ pci_disable_msi(adapter->pdev); ++ adapter->flags &= ~FLAG_MSI_ENABLED; ++ } ++} ++ ++/** ++ * e1000e_set_interrupt_capability - set MSI or MSI-X if supported ++ * ++ * Attempt to configure interrupts using the best available ++ * capabilities of the hardware and kernel. ++ **/ ++void e1000e_set_interrupt_capability(struct e1000_adapter *adapter) ++{ ++ int err; ++ int i; ++ ++ switch (adapter->int_mode) { ++ case E1000E_INT_MODE_MSIX: ++ if (adapter->flags & FLAG_HAS_MSIX) { ++ adapter->num_vectors = 3; /* RxQ0, TxQ0 and other */ ++ adapter->msix_entries = kcalloc(adapter->num_vectors, ++ sizeof(struct msix_entry), ++ GFP_KERNEL); ++ if (adapter->msix_entries) { ++ for (i = 0; i < adapter->num_vectors; i++) ++ adapter->msix_entries[i].entry = i; ++ ++ err = pci_enable_msix_range(adapter->pdev, ++ adapter->msix_entries, ++ adapter->num_vectors, ++ adapter->num_vectors); ++ if (err == 0) ++ return; ++ } ++ /* MSI-X failed, so fall through and try MSI */ ++ e_err("Failed to initialize MSI-X interrupts. " ++ "Falling back to MSI interrupts.\n"); ++ e1000e_reset_interrupt_capability(adapter); ++ } ++ adapter->int_mode = E1000E_INT_MODE_MSI; ++ /* Fall through */ ++ case E1000E_INT_MODE_MSI: ++ if (!pci_enable_msi(adapter->pdev)) { ++ adapter->flags |= FLAG_MSI_ENABLED; ++ } else { ++ adapter->int_mode = E1000E_INT_MODE_LEGACY; ++ e_err("Failed to initialize MSI interrupts. Falling " ++ "back to legacy interrupts.\n"); ++ } ++ /* Fall through */ ++ case E1000E_INT_MODE_LEGACY: ++ /* Don't do anything; this is the system default */ ++ break; ++ } ++ ++ /* store the number of vectors being used */ ++ adapter->num_vectors = 1; ++} ++ ++/** ++ * e1000_request_msix - Initialize MSI-X interrupts ++ * ++ * e1000_request_msix allocates MSI-X vectors and requests interrupts from the ++ * kernel. ++ **/ ++static int e1000_request_msix(struct e1000_adapter *adapter) ++{ ++ struct rtnet_device *netdev = adapter->netdev; ++ int err = 0, vector = 0; ++ ++ if (strlen(netdev->name) < (IFNAMSIZ - 5)) ++ snprintf(adapter->rx_ring->name, ++ sizeof(adapter->rx_ring->name) - 1, ++ "%s-rx-0", netdev->name); ++ else ++ memcpy(adapter->rx_ring->name, netdev->name, IFNAMSIZ); ++ err = rtdm_irq_request(&adapter->rx_irq_handle, ++ adapter->msix_entries[vector].vector, ++ e1000_intr_msix_rx, 0, adapter->rx_ring->name, ++ adapter); ++ if (err) ++ goto out; ++ adapter->rx_ring->itr_register = E1000_EITR_82574(vector); ++ adapter->rx_ring->itr_val = adapter->itr; ++ vector++; ++ ++ if (strlen(netdev->name) < (IFNAMSIZ - 5)) ++ snprintf(adapter->tx_ring->name, ++ sizeof(adapter->tx_ring->name) - 1, ++ "%s-tx-0", netdev->name); ++ else ++ memcpy(adapter->tx_ring->name, netdev->name, IFNAMSIZ); ++ err = rtdm_irq_request(&adapter->tx_irq_handle, ++ adapter->msix_entries[vector].vector, ++ e1000_intr_msix_tx, 0, adapter->tx_ring->name, ++ adapter); ++ if (err) ++ goto out; ++ adapter->tx_ring->itr_register = E1000_EITR_82574(vector); ++ adapter->tx_ring->itr_val = adapter->itr; ++ vector++; ++ ++ err = request_irq(adapter->msix_entries[vector].vector, ++ e1000_msix_other, 0, netdev->name, netdev); ++ if (err) ++ goto out; ++ ++ e1000_configure_msix(adapter); ++ return 0; ++out: ++ return err; ++} ++ ++/** ++ * e1000_request_irq - initialize interrupts ++ * ++ * Attempts to configure interrupts using the best available ++ * capabilities of the hardware and kernel. ++ **/ ++static int e1000_request_irq(struct e1000_adapter *adapter) ++{ ++ struct rtnet_device *netdev = adapter->netdev; ++ int err; ++ ++ if (adapter->msix_entries) { ++ err = e1000_request_msix(adapter); ++ if (!err) ++ return err; ++ /* fall back to MSI */ ++ e1000e_reset_interrupt_capability(adapter); ++ adapter->int_mode = E1000E_INT_MODE_MSI; ++ e1000e_set_interrupt_capability(adapter); ++ } ++ if (adapter->flags & FLAG_MSI_ENABLED) { ++ err = rtdm_irq_request(&adapter->irq_handle, ++ adapter->pdev->irq, e1000_intr_msi, ++ 0, netdev->name, adapter); ++ if (!err) ++ return err; ++ ++ /* fall back to legacy interrupt */ ++ e1000e_reset_interrupt_capability(adapter); ++ adapter->int_mode = E1000E_INT_MODE_LEGACY; ++ } ++ ++ err = rtdm_irq_request(&adapter->irq_handle, adapter->pdev->irq, ++ e1000_intr, 0, netdev->name, adapter); ++ if (err) ++ e_err("Unable to allocate interrupt, Error: %d\n", err); ++ ++ return err; ++} ++ ++static void e1000_free_irq(struct e1000_adapter *adapter) ++{ ++ struct rtnet_device *netdev = adapter->netdev; ++ ++ if (adapter->msix_entries) { ++ int vector = 0; ++ ++ rtdm_irq_disable(&adapter->rx_irq_handle); ++ rtdm_irq_free(&adapter->rx_irq_handle); ++ vector++; ++ ++ rtdm_irq_disable(&adapter->tx_irq_handle); ++ rtdm_irq_free(&adapter->tx_irq_handle); ++ vector++; ++ ++ /* Other Causes interrupt vector */ ++ free_irq(adapter->msix_entries[vector].vector, netdev); ++ return; ++ } ++ ++ if (adapter->flags & FLAG_MSI_ENABLED) ++ rtdm_irq_disable(&adapter->irq_handle); ++ rtdm_irq_free(&adapter->irq_handle); ++} ++ ++/** ++ * e1000_irq_disable - Mask off interrupt generation on the NIC ++ **/ ++static void e1000_irq_disable(struct e1000_adapter *adapter) ++{ ++ struct e1000_hw *hw = &adapter->hw; ++ ++ ew32(IMC, ~0); ++ if (adapter->msix_entries) ++ ew32(EIAC_82574, 0); ++ e1e_flush(); ++ ++ if (adapter->msix_entries) { ++ int i; ++ for (i = 0; i < adapter->num_vectors; i++) ++ synchronize_irq(adapter->msix_entries[i].vector); ++ } else { ++ synchronize_irq(adapter->pdev->irq); ++ } ++} ++ ++/** ++ * e1000_irq_enable - Enable default interrupt generation settings ++ **/ ++static void e1000_irq_enable(struct e1000_adapter *adapter) ++{ ++ struct e1000_hw *hw = &adapter->hw; ++ ++ if (adapter->msix_entries) { ++ ew32(EIAC_82574, adapter->eiac_mask & E1000_EIAC_MASK_82574); ++ ew32(IMS, adapter->eiac_mask | E1000_IMS_OTHER | E1000_IMS_LSC); ++ } else { ++ ew32(IMS, IMS_ENABLE_MASK); ++ } ++ e1e_flush(); ++} ++ ++/** ++ * e1000e_get_hw_control - get control of the h/w from f/w ++ * @adapter: address of board private structure ++ * ++ * e1000e_get_hw_control sets {CTRL_EXT|SWSM}:DRV_LOAD bit. ++ * For ASF and Pass Through versions of f/w this means that ++ * the driver is loaded. For AMT version (only with 82573) ++ * of the f/w this means that the network i/f is open. ++ **/ ++void e1000e_get_hw_control(struct e1000_adapter *adapter) ++{ ++ struct e1000_hw *hw = &adapter->hw; ++ u32 ctrl_ext; ++ u32 swsm; ++ ++ /* Let firmware know the driver has taken over */ ++ if (adapter->flags & FLAG_HAS_SWSM_ON_LOAD) { ++ swsm = er32(SWSM); ++ ew32(SWSM, swsm | E1000_SWSM_DRV_LOAD); ++ } else if (adapter->flags & FLAG_HAS_CTRLEXT_ON_LOAD) { ++ ctrl_ext = er32(CTRL_EXT); ++ ew32(CTRL_EXT, ctrl_ext | E1000_CTRL_EXT_DRV_LOAD); ++ } ++} ++ ++/** ++ * e1000e_release_hw_control - release control of the h/w to f/w ++ * @adapter: address of board private structure ++ * ++ * e1000e_release_hw_control resets {CTRL_EXT|SWSM}:DRV_LOAD bit. ++ * For ASF and Pass Through versions of f/w this means that the ++ * driver is no longer loaded. For AMT version (only with 82573) i ++ * of the f/w this means that the network i/f is closed. ++ * ++ **/ ++void e1000e_release_hw_control(struct e1000_adapter *adapter) ++{ ++ struct e1000_hw *hw = &adapter->hw; ++ u32 ctrl_ext; ++ u32 swsm; ++ ++ /* Let firmware taken over control of h/w */ ++ if (adapter->flags & FLAG_HAS_SWSM_ON_LOAD) { ++ swsm = er32(SWSM); ++ ew32(SWSM, swsm & ~E1000_SWSM_DRV_LOAD); ++ } else if (adapter->flags & FLAG_HAS_CTRLEXT_ON_LOAD) { ++ ctrl_ext = er32(CTRL_EXT); ++ ew32(CTRL_EXT, ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD); ++ } ++} ++ ++/** ++ * @e1000_alloc_ring - allocate memory for a ring structure ++ **/ ++static int e1000_alloc_ring_dma(struct e1000_adapter *adapter, ++ struct e1000_ring *ring) ++{ ++ struct pci_dev *pdev = adapter->pdev; ++ ++ ring->desc = dma_alloc_coherent(&pdev->dev, ring->size, &ring->dma, ++ GFP_KERNEL); ++ if (!ring->desc) ++ return -ENOMEM; ++ ++ return 0; ++} ++ ++/** ++ * e1000e_setup_tx_resources - allocate Tx resources (Descriptors) ++ * @adapter: board private structure ++ * ++ * Return 0 on success, negative on failure ++ **/ ++int e1000e_setup_tx_resources(struct e1000_adapter *adapter) ++{ ++ struct e1000_ring *tx_ring = adapter->tx_ring; ++ int err = -ENOMEM, size; ++ ++ size = sizeof(struct e1000_buffer) * tx_ring->count; ++ tx_ring->buffer_info = vzalloc(size); ++ if (!tx_ring->buffer_info) ++ goto err; ++ ++ /* round up to nearest 4K */ ++ tx_ring->size = tx_ring->count * sizeof(struct e1000_tx_desc); ++ tx_ring->size = ALIGN(tx_ring->size, 4096); ++ ++ err = e1000_alloc_ring_dma(adapter, tx_ring); ++ if (err) ++ goto err; ++ ++ tx_ring->next_to_use = 0; ++ tx_ring->next_to_clean = 0; ++ ++ return 0; ++err: ++ vfree(tx_ring->buffer_info); ++ e_err("Unable to allocate memory for the transmit descriptor ring\n"); ++ return err; ++} ++ ++/** ++ * e1000e_setup_rx_resources - allocate Rx resources (Descriptors) ++ * @adapter: board private structure ++ * ++ * Returns 0 on success, negative on failure ++ **/ ++int e1000e_setup_rx_resources(struct e1000_adapter *adapter) ++{ ++ struct e1000_ring *rx_ring = adapter->rx_ring; ++ int size, desc_len, err = -ENOMEM; ++ ++ size = sizeof(struct e1000_buffer) * rx_ring->count; ++ rx_ring->buffer_info = vzalloc(size); ++ if (!rx_ring->buffer_info) ++ goto err; ++ ++ desc_len = sizeof(union e1000_rx_desc_packet_split); ++ ++ /* Round up to nearest 4K */ ++ rx_ring->size = rx_ring->count * desc_len; ++ rx_ring->size = ALIGN(rx_ring->size, 4096); ++ ++ err = e1000_alloc_ring_dma(adapter, rx_ring); ++ if (err) ++ goto err; ++ ++ rx_ring->next_to_clean = 0; ++ rx_ring->next_to_use = 0; ++ rx_ring->rx_skb_top = NULL; ++ ++ return 0; ++ ++err: ++ vfree(rx_ring->buffer_info); ++ e_err("Unable to allocate memory for the receive descriptor ring\n"); ++ return err; ++} ++ ++/** ++ * e1000_clean_tx_ring - Free Tx Buffers ++ * @adapter: board private structure ++ **/ ++static void e1000_clean_tx_ring(struct e1000_adapter *adapter) ++{ ++ struct e1000_ring *tx_ring = adapter->tx_ring; ++ struct e1000_buffer *buffer_info; ++ unsigned long size; ++ unsigned int i; ++ ++ for (i = 0; i < tx_ring->count; i++) { ++ buffer_info = &tx_ring->buffer_info[i]; ++ e1000_put_txbuf(adapter, buffer_info); ++ } ++ ++ size = sizeof(struct e1000_buffer) * tx_ring->count; ++ memset(tx_ring->buffer_info, 0, size); ++ ++ memset(tx_ring->desc, 0, tx_ring->size); ++ ++ tx_ring->next_to_use = 0; ++ tx_ring->next_to_clean = 0; ++ ++ writel(0, adapter->hw.hw_addr + tx_ring->head); ++ writel(0, adapter->hw.hw_addr + tx_ring->tail); ++} ++ ++/** ++ * e1000e_free_tx_resources - Free Tx Resources per Queue ++ * @adapter: board private structure ++ * ++ * Free all transmit software resources ++ **/ ++void e1000e_free_tx_resources(struct e1000_adapter *adapter) ++{ ++ struct pci_dev *pdev = adapter->pdev; ++ struct e1000_ring *tx_ring = adapter->tx_ring; ++ ++ e1000_clean_tx_ring(adapter); ++ ++ vfree(tx_ring->buffer_info); ++ tx_ring->buffer_info = NULL; ++ ++ dma_free_coherent(&pdev->dev, tx_ring->size, tx_ring->desc, ++ tx_ring->dma); ++ tx_ring->desc = NULL; ++} ++ ++/** ++ * e1000e_free_rx_resources - Free Rx Resources ++ * @adapter: board private structure ++ * ++ * Free all receive software resources ++ **/ ++ ++void e1000e_free_rx_resources(struct e1000_adapter *adapter) ++{ ++ struct pci_dev *pdev = adapter->pdev; ++ struct e1000_ring *rx_ring = adapter->rx_ring; ++ int i; ++ ++ e1000_clean_rx_ring(adapter); ++ ++ for (i = 0; i < rx_ring->count; i++) ++ kfree(rx_ring->buffer_info[i].ps_pages); ++ ++ vfree(rx_ring->buffer_info); ++ rx_ring->buffer_info = NULL; ++ ++ dma_free_coherent(&pdev->dev, rx_ring->size, rx_ring->desc, ++ rx_ring->dma); ++ rx_ring->desc = NULL; ++} ++ ++/** ++ * e1000_alloc_queues - Allocate memory for all rings ++ * @adapter: board private structure to initialize ++ **/ ++static int e1000_alloc_queues(struct e1000_adapter *adapter) ++{ ++ adapter->tx_ring = kzalloc(sizeof(struct e1000_ring), GFP_KERNEL); ++ if (!adapter->tx_ring) ++ goto err; ++ ++ rtdm_lock_init(&adapter->tx_ring->lock); ++ ++ adapter->rx_ring = kzalloc(sizeof(struct e1000_ring), GFP_KERNEL); ++ if (!adapter->rx_ring) ++ goto err; ++ ++ return 0; ++err: ++ e_err("Unable to allocate memory for queues\n"); ++ kfree(adapter->rx_ring); ++ kfree(adapter->tx_ring); ++ return -ENOMEM; ++} ++ ++static void e1000_vlan_rx_add_vid(struct rtnet_device *netdev, u16 vid) ++{ ++ struct e1000_adapter *adapter = netdev->priv; ++ struct e1000_hw *hw = &adapter->hw; ++ u32 vfta, index; ++ ++ /* don't update vlan cookie if already programmed */ ++ if ((adapter->hw.mng_cookie.status & ++ E1000_MNG_DHCP_COOKIE_STATUS_VLAN) && ++ (vid == adapter->mng_vlan_id)) ++ return; ++ ++ /* add VID to filter table */ ++ if (adapter->flags & FLAG_HAS_HW_VLAN_FILTER) { ++ index = (vid >> 5) & 0x7F; ++ vfta = E1000_READ_REG_ARRAY(hw, E1000_VFTA, index); ++ vfta |= (1 << (vid & 0x1F)); ++ hw->mac.ops.write_vfta(hw, index, vfta); ++ } ++ ++ set_bit(vid, adapter->active_vlans); ++} ++ ++static void e1000_vlan_rx_kill_vid(struct rtnet_device *netdev, u16 vid) ++{ ++ struct e1000_adapter *adapter = netdev->priv; ++ struct e1000_hw *hw = &adapter->hw; ++ u32 vfta, index; ++ ++ if ((adapter->hw.mng_cookie.status & ++ E1000_MNG_DHCP_COOKIE_STATUS_VLAN) && ++ (vid == adapter->mng_vlan_id)) { ++ /* release control to f/w */ ++ e1000e_release_hw_control(adapter); ++ return; ++ } ++ ++ /* remove VID from filter table */ ++ if (adapter->flags & FLAG_HAS_HW_VLAN_FILTER) { ++ index = (vid >> 5) & 0x7F; ++ vfta = E1000_READ_REG_ARRAY(hw, E1000_VFTA, index); ++ vfta &= ~(1 << (vid & 0x1F)); ++ hw->mac.ops.write_vfta(hw, index, vfta); ++ } ++ ++ clear_bit(vid, adapter->active_vlans); ++} ++ ++/** ++ * e1000e_vlan_filter_disable - helper to disable hw VLAN filtering ++ * @adapter: board private structure to initialize ++ **/ ++static void e1000e_vlan_filter_disable(struct e1000_adapter *adapter) ++{ ++ struct rtnet_device *netdev = adapter->netdev; ++ struct e1000_hw *hw = &adapter->hw; ++ u32 rctl; ++ ++ if (adapter->flags & FLAG_HAS_HW_VLAN_FILTER) { ++ /* disable VLAN receive filtering */ ++ rctl = er32(RCTL); ++ rctl &= ~(E1000_RCTL_VFE | E1000_RCTL_CFIEN); ++ ew32(RCTL, rctl); ++ ++ if (adapter->mng_vlan_id != (u16)E1000_MNG_VLAN_NONE) { ++ e1000_vlan_rx_kill_vid(netdev, adapter->mng_vlan_id); ++ adapter->mng_vlan_id = E1000_MNG_VLAN_NONE; ++ } ++ } ++} ++ ++/** ++ * e1000e_vlan_filter_enable - helper to enable HW VLAN filtering ++ * @adapter: board private structure to initialize ++ **/ ++static void e1000e_vlan_filter_enable(struct e1000_adapter *adapter) ++{ ++ struct e1000_hw *hw = &adapter->hw; ++ u32 rctl; ++ ++ if (adapter->flags & FLAG_HAS_HW_VLAN_FILTER) { ++ /* enable VLAN receive filtering */ ++ rctl = er32(RCTL); ++ rctl |= E1000_RCTL_VFE; ++ rctl &= ~E1000_RCTL_CFIEN; ++ ew32(RCTL, rctl); ++ } ++} ++ ++/** ++ * e1000e_vlan_strip_enable - helper to disable HW VLAN stripping ++ * @adapter: board private structure to initialize ++ **/ ++static void e1000e_vlan_strip_disable(struct e1000_adapter *adapter) ++{ ++ struct e1000_hw *hw = &adapter->hw; ++ u32 ctrl; ++ ++ /* disable VLAN tag insert/strip */ ++ ctrl = er32(CTRL); ++ ctrl &= ~E1000_CTRL_VME; ++ ew32(CTRL, ctrl); ++} ++ ++/** ++ * e1000e_vlan_strip_enable - helper to enable HW VLAN stripping ++ * @adapter: board private structure to initialize ++ **/ ++static void e1000e_vlan_strip_enable(struct e1000_adapter *adapter) ++{ ++ struct e1000_hw *hw = &adapter->hw; ++ u32 ctrl; ++ ++ /* enable VLAN tag insert/strip */ ++ ctrl = er32(CTRL); ++ ctrl |= E1000_CTRL_VME; ++ ew32(CTRL, ctrl); ++} ++ ++static void e1000_update_mng_vlan(struct e1000_adapter *adapter) ++{ ++ struct rtnet_device *netdev = adapter->netdev; ++ u16 vid = adapter->hw.mng_cookie.vlan_id; ++ u16 old_vid = adapter->mng_vlan_id; ++ ++ if (adapter->hw.mng_cookie.status & ++ E1000_MNG_DHCP_COOKIE_STATUS_VLAN) { ++ e1000_vlan_rx_add_vid(netdev, vid); ++ adapter->mng_vlan_id = vid; ++ } ++ ++ if ((old_vid != (u16)E1000_MNG_VLAN_NONE) && (vid != old_vid)) ++ e1000_vlan_rx_kill_vid(netdev, old_vid); ++} ++ ++static void e1000_restore_vlan(struct e1000_adapter *adapter) ++{ ++ u16 vid; ++ ++ e1000_vlan_rx_add_vid(adapter->netdev, 0); ++ ++ for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID) ++ e1000_vlan_rx_add_vid(adapter->netdev, vid); ++} ++ ++static void e1000_init_manageability_pt(struct e1000_adapter *adapter) ++{ ++ struct e1000_hw *hw = &adapter->hw; ++ u32 manc, manc2h, mdef, i, j; ++ ++ if (!(adapter->flags & FLAG_MNG_PT_ENABLED)) ++ return; ++ ++ manc = er32(MANC); ++ ++ /* ++ * enable receiving management packets to the host. this will probably ++ * generate destination unreachable messages from the host OS, but ++ * the packets will be handled on SMBUS ++ */ ++ manc |= E1000_MANC_EN_MNG2HOST; ++ manc2h = er32(MANC2H); ++ ++ switch (hw->mac.type) { ++ default: ++ manc2h |= (E1000_MANC2H_PORT_623 | E1000_MANC2H_PORT_664); ++ break; ++ case e1000_82574: ++ case e1000_82583: ++ /* ++ * Check if IPMI pass-through decision filter already exists; ++ * if so, enable it. ++ */ ++ for (i = 0, j = 0; i < 8; i++) { ++ mdef = er32(MDEF(i)); ++ ++ /* Ignore filters with anything other than IPMI ports */ ++ if (mdef & ~(E1000_MDEF_PORT_623 | E1000_MDEF_PORT_664)) ++ continue; ++ ++ /* Enable this decision filter in MANC2H */ ++ if (mdef) ++ manc2h |= (1 << i); ++ ++ j |= mdef; ++ } ++ ++ if (j == (E1000_MDEF_PORT_623 | E1000_MDEF_PORT_664)) ++ break; ++ ++ /* Create new decision filter in an empty filter */ ++ for (i = 0, j = 0; i < 8; i++) ++ if (er32(MDEF(i)) == 0) { ++ ew32(MDEF(i), (E1000_MDEF_PORT_623 | ++ E1000_MDEF_PORT_664)); ++ manc2h |= (1 << 1); ++ j++; ++ break; ++ } ++ ++ if (!j) ++ e_warn("Unable to create IPMI pass-through filter\n"); ++ break; ++ } ++ ++ ew32(MANC2H, manc2h); ++ ew32(MANC, manc); ++} ++ ++/** ++ * e1000_configure_tx - Configure Transmit Unit after Reset ++ * @adapter: board private structure ++ * ++ * Configure the Tx unit of the MAC after a reset. ++ **/ ++static void e1000_configure_tx(struct e1000_adapter *adapter) ++{ ++ struct e1000_hw *hw = &adapter->hw; ++ struct e1000_ring *tx_ring = adapter->tx_ring; ++ u64 tdba; ++ u32 tdlen, tctl, tipg, tarc; ++ u32 ipgr1, ipgr2; ++ ++ /* Setup the HW Tx Head and Tail descriptor pointers */ ++ tdba = tx_ring->dma; ++ tdlen = tx_ring->count * sizeof(struct e1000_tx_desc); ++ ew32(TDBAL, (tdba & DMA_BIT_MASK(32))); ++ ew32(TDBAH, (tdba >> 32)); ++ ew32(TDLEN, tdlen); ++ ew32(TDH, 0); ++ ew32(TDT, 0); ++ tx_ring->head = E1000_TDH; ++ tx_ring->tail = E1000_TDT; ++ ++ /* Set the default values for the Tx Inter Packet Gap timer */ ++ tipg = DEFAULT_82543_TIPG_IPGT_COPPER; /* 8 */ ++ ipgr1 = DEFAULT_82543_TIPG_IPGR1; /* 8 */ ++ ipgr2 = DEFAULT_82543_TIPG_IPGR2; /* 6 */ ++ ++ if (adapter->flags & FLAG_TIPG_MEDIUM_FOR_80003ESLAN) ++ ipgr2 = DEFAULT_80003ES2LAN_TIPG_IPGR2; /* 7 */ ++ ++ tipg |= ipgr1 << E1000_TIPG_IPGR1_SHIFT; ++ tipg |= ipgr2 << E1000_TIPG_IPGR2_SHIFT; ++ ew32(TIPG, tipg); ++ ++ /* Set the Tx Interrupt Delay register */ ++ ew32(TIDV, adapter->tx_int_delay); ++ /* Tx irq moderation */ ++ ew32(TADV, adapter->tx_abs_int_delay); ++ ++ if (adapter->flags2 & FLAG2_DMA_BURST) { ++ u32 txdctl = er32(TXDCTL(0)); ++ txdctl &= ~(E1000_TXDCTL_PTHRESH | E1000_TXDCTL_HTHRESH | ++ E1000_TXDCTL_WTHRESH); ++ /* ++ * set up some performance related parameters to encourage the ++ * hardware to use the bus more efficiently in bursts, depends ++ * on the tx_int_delay to be enabled, ++ * wthresh = 5 ==> burst write a cacheline (64 bytes) at a time ++ * hthresh = 1 ==> prefetch when one or more available ++ * pthresh = 0x1f ==> prefetch if internal cache 31 or less ++ * BEWARE: this seems to work but should be considered first if ++ * there are Tx hangs or other Tx related bugs ++ */ ++ txdctl |= E1000_TXDCTL_DMA_BURST_ENABLE; ++ ew32(TXDCTL(0), txdctl); ++ /* erratum work around: set txdctl the same for both queues */ ++ ew32(TXDCTL(1), txdctl); ++ } ++ ++ /* Program the Transmit Control Register */ ++ tctl = er32(TCTL); ++ tctl &= ~E1000_TCTL_CT; ++ tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC | ++ (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT); ++ ++ if (adapter->flags & FLAG_TARC_SPEED_MODE_BIT) { ++ tarc = er32(TARC(0)); ++ /* ++ * set the speed mode bit, we'll clear it if we're not at ++ * gigabit link later ++ */ ++#define SPEED_MODE_BIT (1 << 21) ++ tarc |= SPEED_MODE_BIT; ++ ew32(TARC(0), tarc); ++ } ++ ++ /* errata: program both queues to unweighted RR */ ++ if (adapter->flags & FLAG_TARC_SET_BIT_ZERO) { ++ tarc = er32(TARC(0)); ++ tarc |= 1; ++ ew32(TARC(0), tarc); ++ tarc = er32(TARC(1)); ++ tarc |= 1; ++ ew32(TARC(1), tarc); ++ } ++ ++ /* Setup Transmit Descriptor Settings for eop descriptor */ ++ adapter->txd_cmd = E1000_TXD_CMD_EOP | E1000_TXD_CMD_IFCS; ++ ++ /* only set IDE if we are delaying interrupts using the timers */ ++ if (adapter->tx_int_delay) ++ adapter->txd_cmd |= E1000_TXD_CMD_IDE; ++ ++ /* enable Report Status bit */ ++ adapter->txd_cmd |= E1000_TXD_CMD_RS; ++ ++ ew32(TCTL, tctl); ++ ++ e1000e_config_collision_dist(hw); ++} ++ ++/** ++ * e1000_setup_rctl - configure the receive control registers ++ * @adapter: Board private structure ++ **/ ++#define PAGE_USE_COUNT(S) (((S) >> PAGE_SHIFT) + \ ++ (((S) & (PAGE_SIZE - 1)) ? 1 : 0)) ++static void e1000_setup_rctl(struct e1000_adapter *adapter) ++{ ++ struct e1000_hw *hw = &adapter->hw; ++ u32 rctl, rfctl; ++ ++ /* Workaround Si errata on PCHx - configure jumbo frame flow */ ++ if (hw->mac.type >= e1000_pch2lan) { ++ s32 ret_val; ++ ++ if (adapter->netdev->mtu > ETH_DATA_LEN) ++ ret_val = e1000_lv_jumbo_workaround_ich8lan(hw, true); ++ else ++ ret_val = e1000_lv_jumbo_workaround_ich8lan(hw, false); ++ ++ if (ret_val) ++ e_dbg("failed to enable jumbo frame workaround mode\n"); ++ } ++ ++ /* Program MC offset vector base */ ++ rctl = er32(RCTL); ++ rctl &= ~(3 << E1000_RCTL_MO_SHIFT); ++ rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | ++ E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF | ++ (adapter->hw.mac.mc_filter_type << E1000_RCTL_MO_SHIFT); ++ ++ /* Do not Store bad packets */ ++ rctl &= ~E1000_RCTL_SBP; ++ ++ /* Enable Long Packet receive */ ++ if (adapter->netdev->mtu <= ETH_DATA_LEN) ++ rctl &= ~E1000_RCTL_LPE; ++ else ++ rctl |= E1000_RCTL_LPE; ++ ++ /* Some systems expect that the CRC is included in SMBUS traffic. The ++ * hardware strips the CRC before sending to both SMBUS (BMC) and to ++ * host memory when this is enabled ++ */ ++ if (adapter->flags2 & FLAG2_CRC_STRIPPING) ++ rctl |= E1000_RCTL_SECRC; ++ ++ /* Workaround Si errata on 82577 PHY - configure IPG for jumbos */ ++ if ((hw->phy.type == e1000_phy_82577) && (rctl & E1000_RCTL_LPE)) { ++ u16 phy_data; ++ ++ e1e_rphy(hw, PHY_REG(770, 26), &phy_data); ++ phy_data &= 0xfff8; ++ phy_data |= (1 << 2); ++ e1e_wphy(hw, PHY_REG(770, 26), phy_data); ++ ++ e1e_rphy(hw, 22, &phy_data); ++ phy_data &= 0x0fff; ++ phy_data |= (1 << 14); ++ e1e_wphy(hw, 0x10, 0x2823); ++ e1e_wphy(hw, 0x11, 0x0003); ++ e1e_wphy(hw, 22, phy_data); ++ } ++ ++ /* Setup buffer sizes */ ++ rctl &= ~E1000_RCTL_SZ_4096; ++ rctl |= E1000_RCTL_BSEX; ++ switch (adapter->rx_buffer_len) { ++ case 2048: ++ default: ++ rctl |= E1000_RCTL_SZ_2048; ++ rctl &= ~E1000_RCTL_BSEX; ++ break; ++ case 4096: ++ rctl |= E1000_RCTL_SZ_4096; ++ break; ++ case 8192: ++ rctl |= E1000_RCTL_SZ_8192; ++ break; ++ case 16384: ++ rctl |= E1000_RCTL_SZ_16384; ++ break; ++ } ++ ++ /* Enable Extended Status in all Receive Descriptors */ ++ rfctl = er32(RFCTL); ++ rfctl |= E1000_RFCTL_EXTEN; ++ ++ adapter->rx_ps_pages = 0; ++ ++ ew32(RFCTL, rfctl); ++ ew32(RCTL, rctl); ++ /* just started the receive unit, no need to restart */ ++ adapter->flags &= ~FLAG_RX_RESTART_NOW; ++} ++ ++/** ++ * e1000_configure_rx - Configure Receive Unit after Reset ++ * @adapter: board private structure ++ * ++ * Configure the Rx unit of the MAC after a reset. ++ **/ ++static void e1000_configure_rx(struct e1000_adapter *adapter) ++{ ++ struct e1000_hw *hw = &adapter->hw; ++ struct e1000_ring *rx_ring = adapter->rx_ring; ++ u64 rdba; ++ u32 rdlen, rctl, rxcsum, ctrl_ext; ++ ++ rdlen = rx_ring->count * sizeof(union e1000_rx_desc_extended); ++ adapter->clean_rx = e1000_clean_rx_irq; ++ adapter->alloc_rx_buf = e1000_alloc_rx_buffers; ++ ++ /* disable receives while setting up the descriptors */ ++ rctl = er32(RCTL); ++ if (!(adapter->flags2 & FLAG2_NO_DISABLE_RX)) ++ ew32(RCTL, rctl & ~E1000_RCTL_EN); ++ e1e_flush(); ++ usleep_range(10000, 20000); ++ ++ if (adapter->flags2 & FLAG2_DMA_BURST) { ++ /* ++ * set the writeback threshold (only takes effect if the RDTR ++ * is set). set GRAN=1 and write back up to 0x4 worth, and ++ * enable prefetching of 0x20 Rx descriptors ++ * granularity = 01 ++ * wthresh = 04, ++ * hthresh = 04, ++ * pthresh = 0x20 ++ */ ++ ew32(RXDCTL(0), E1000_RXDCTL_DMA_BURST_ENABLE); ++ ew32(RXDCTL(1), E1000_RXDCTL_DMA_BURST_ENABLE); ++ ++ /* ++ * override the delay timers for enabling bursting, only if ++ * the value was not set by the user via module options ++ */ ++ if (adapter->rx_int_delay == DEFAULT_RDTR) ++ adapter->rx_int_delay = BURST_RDTR; ++ if (adapter->rx_abs_int_delay == DEFAULT_RADV) ++ adapter->rx_abs_int_delay = BURST_RADV; ++ } ++ ++ /* set the Receive Delay Timer Register */ ++ ew32(RDTR, adapter->rx_int_delay); ++ ++ /* irq moderation */ ++ ew32(RADV, adapter->rx_abs_int_delay); ++ if ((adapter->itr_setting != 0) && (adapter->itr != 0)) ++ ew32(ITR, 1000000000 / (adapter->itr * 256)); ++ ++ ctrl_ext = er32(CTRL_EXT); ++ ew32(CTRL_EXT, ctrl_ext); ++ e1e_flush(); ++ ++ /* ++ * Setup the HW Rx Head and Tail Descriptor Pointers and ++ * the Base and Length of the Rx Descriptor Ring ++ */ ++ rdba = rx_ring->dma; ++ ew32(RDBAL, (rdba & DMA_BIT_MASK(32))); ++ ew32(RDBAH, (rdba >> 32)); ++ ew32(RDLEN, rdlen); ++ ew32(RDH, 0); ++ ew32(RDT, 0); ++ rx_ring->head = E1000_RDH; ++ rx_ring->tail = E1000_RDT; ++ ++ /* Enable Receive Checksum Offload for TCP and UDP */ ++ rxcsum = er32(RXCSUM); ++ if (adapter->netdev->features & NETIF_F_RXCSUM) { ++ rxcsum |= E1000_RXCSUM_TUOFL; ++ } else { ++ rxcsum &= ~E1000_RXCSUM_TUOFL; ++ /* no need to clear IPPCSE as it defaults to 0 */ ++ } ++ ew32(RXCSUM, rxcsum); ++ ++ /* Enable Receives */ ++ ew32(RCTL, rctl); ++} ++ ++/** ++ * e1000_update_mc_addr_list - Update Multicast addresses ++ * @hw: pointer to the HW structure ++ * @mc_addr_list: array of multicast addresses to program ++ * @mc_addr_count: number of multicast addresses to program ++ * ++ * Updates the Multicast Table Array. ++ * The caller must have a packed mc_addr_list of multicast addresses. ++ **/ ++static void e1000_update_mc_addr_list(struct e1000_hw *hw, u8 *mc_addr_list, ++ u32 mc_addr_count) ++{ ++ hw->mac.ops.update_mc_addr_list(hw, mc_addr_list, mc_addr_count); ++} ++ ++/** ++ * e1000_set_multi - Multicast and Promiscuous mode set ++ * @netdev: network interface device structure ++ * ++ * The set_multi entry point is called whenever the multicast address ++ * list or the network interface flags are updated. This routine is ++ * responsible for configuring the hardware for proper multicast, ++ * promiscuous mode, and all-multi behavior. ++ **/ ++static void e1000_set_multi(struct rtnet_device *netdev) ++{ ++ struct e1000_adapter *adapter = netdev->priv; ++ struct e1000_hw *hw = &adapter->hw; ++ u32 rctl; ++ ++ /* Check for Promiscuous and All Multicast modes */ ++ ++ rctl = er32(RCTL); ++ ++ if (netdev->flags & IFF_PROMISC) { ++ rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE); ++ rctl &= ~E1000_RCTL_VFE; ++ /* Do not hardware filter VLANs in promisc mode */ ++ e1000e_vlan_filter_disable(adapter); ++ } else { ++ if (netdev->flags & IFF_ALLMULTI) { ++ rctl |= E1000_RCTL_MPE; ++ rctl &= ~E1000_RCTL_UPE; ++ } else { ++ rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE); ++ } ++ e1000e_vlan_filter_enable(adapter); ++ } ++ ++ ew32(RCTL, rctl); ++ ++ e1000_update_mc_addr_list(hw, NULL, 0); ++ ++ if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX) ++ e1000e_vlan_strip_enable(adapter); ++ else ++ e1000e_vlan_strip_disable(adapter); ++} ++ ++/** ++ * e1000_configure - configure the hardware for Rx and Tx ++ * @adapter: private board structure ++ **/ ++static void e1000_configure(struct e1000_adapter *adapter) ++{ ++ e1000_set_multi(adapter->netdev); ++ ++ e1000_restore_vlan(adapter); ++ e1000_init_manageability_pt(adapter); ++ ++ e1000_configure_tx(adapter); ++ e1000_setup_rctl(adapter); ++ e1000_configure_rx(adapter); ++ adapter->alloc_rx_buf(adapter, e1000_desc_unused(adapter->rx_ring), ++ GFP_KERNEL); ++} ++ ++/** ++ * e1000e_power_up_phy - restore link in case the phy was powered down ++ * @adapter: address of board private structure ++ * ++ * The phy may be powered down to save power and turn off link when the ++ * driver is unloaded and wake on lan is not enabled (among others) ++ * *** this routine MUST be followed by a call to e1000e_reset *** ++ **/ ++void e1000e_power_up_phy(struct e1000_adapter *adapter) ++{ ++ if (adapter->hw.phy.ops.power_up) ++ adapter->hw.phy.ops.power_up(&adapter->hw); ++ ++ adapter->hw.mac.ops.setup_link(&adapter->hw); ++} ++ ++/** ++ * e1000_power_down_phy - Power down the PHY ++ * ++ * Power down the PHY so no link is implied when interface is down. ++ * The PHY cannot be powered down if management or WoL is active. ++ */ ++static void e1000_power_down_phy(struct e1000_adapter *adapter) ++{ ++ /* WoL is enabled */ ++ if (adapter->wol) ++ return; ++ ++ if (adapter->hw.phy.ops.power_down) ++ adapter->hw.phy.ops.power_down(&adapter->hw); ++} ++ ++/** ++ * e1000e_reset - bring the hardware into a known good state ++ * ++ * This function boots the hardware and enables some settings that ++ * require a configuration cycle of the hardware - those cannot be ++ * set/changed during runtime. After reset the device needs to be ++ * properly configured for Rx, Tx etc. ++ */ ++void e1000e_reset(struct e1000_adapter *adapter) ++{ ++ struct e1000_mac_info *mac = &adapter->hw.mac; ++ struct e1000_fc_info *fc = &adapter->hw.fc; ++ struct e1000_hw *hw = &adapter->hw; ++ u32 tx_space, min_tx_space, min_rx_space; ++ u32 pba = adapter->pba; ++ u16 hwm; ++ ++ /* reset Packet Buffer Allocation to default */ ++ ew32(PBA, pba); ++ ++ if (adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) { ++ /* ++ * To maintain wire speed transmits, the Tx FIFO should be ++ * large enough to accommodate two full transmit packets, ++ * rounded up to the next 1KB and expressed in KB. Likewise, ++ * the Rx FIFO should be large enough to accommodate at least ++ * one full receive packet and is similarly rounded up and ++ * expressed in KB. ++ */ ++ pba = er32(PBA); ++ /* upper 16 bits has Tx packet buffer allocation size in KB */ ++ tx_space = pba >> 16; ++ /* lower 16 bits has Rx packet buffer allocation size in KB */ ++ pba &= 0xffff; ++ /* ++ * the Tx fifo also stores 16 bytes of information about the Tx ++ * but don't include ethernet FCS because hardware appends it ++ */ ++ min_tx_space = (adapter->max_frame_size + ++ sizeof(struct e1000_tx_desc) - ++ ETH_FCS_LEN) * 2; ++ min_tx_space = ALIGN(min_tx_space, 1024); ++ min_tx_space >>= 10; ++ /* software strips receive CRC, so leave room for it */ ++ min_rx_space = adapter->max_frame_size; ++ min_rx_space = ALIGN(min_rx_space, 1024); ++ min_rx_space >>= 10; ++ ++ /* ++ * If current Tx allocation is less than the min Tx FIFO size, ++ * and the min Tx FIFO size is less than the current Rx FIFO ++ * allocation, take space away from current Rx allocation ++ */ ++ if ((tx_space < min_tx_space) && ++ ((min_tx_space - tx_space) < pba)) { ++ pba -= min_tx_space - tx_space; ++ ++ /* ++ * if short on Rx space, Rx wins and must trump Tx ++ * adjustment or use Early Receive if available ++ */ ++ if ((pba < min_rx_space) && ++ (!(adapter->flags & FLAG_HAS_ERT))) ++ /* ERT enabled in e1000_configure_rx */ ++ pba = min_rx_space; ++ } ++ ++ ew32(PBA, pba); ++ } ++ ++ /* ++ * flow control settings ++ * ++ * The high water mark must be low enough to fit one full frame ++ * (or the size used for early receive) above it in the Rx FIFO. ++ * Set it to the lower of: ++ * - 90% of the Rx FIFO size, and ++ * - the full Rx FIFO size minus the early receive size (for parts ++ * with ERT support assuming ERT set to E1000_ERT_2048), or ++ * - the full Rx FIFO size minus one full frame ++ */ ++ if (adapter->flags & FLAG_DISABLE_FC_PAUSE_TIME) ++ fc->pause_time = 0xFFFF; ++ else ++ fc->pause_time = E1000_FC_PAUSE_TIME; ++ fc->send_xon = 1; ++ fc->current_mode = fc->requested_mode; ++ ++ switch (hw->mac.type) { ++ default: ++ if ((adapter->flags & FLAG_HAS_ERT) && ++ (adapter->netdev->mtu > ETH_DATA_LEN)) ++ hwm = min(((pba << 10) * 9 / 10), ++ ((pba << 10) - (E1000_ERT_2048 << 3))); ++ else ++ hwm = min(((pba << 10) * 9 / 10), ++ ((pba << 10) - adapter->max_frame_size)); ++ ++ fc->high_water = hwm & E1000_FCRTH_RTH; /* 8-byte granularity */ ++ fc->low_water = fc->high_water - 8; ++ break; ++ case e1000_pchlan: ++ /* ++ * Workaround PCH LOM adapter hangs with certain network ++ * loads. If hangs persist, try disabling Tx flow control. ++ */ ++ if (adapter->netdev->mtu > ETH_DATA_LEN) { ++ fc->high_water = 0x3500; ++ fc->low_water = 0x1500; ++ } else { ++ fc->high_water = 0x5000; ++ fc->low_water = 0x3000; ++ } ++ fc->refresh_time = 0x1000; ++ break; ++ case e1000_pch2lan: ++ case e1000_pch_lpt: ++ fc->high_water = 0x05C20; ++ fc->low_water = 0x05048; ++ fc->pause_time = 0x0650; ++ fc->refresh_time = 0x0400; ++ if (adapter->netdev->mtu > ETH_DATA_LEN) { ++ pba = 14; ++ ew32(PBA, pba); ++ } ++ break; ++ } ++ ++ /* ++ * Disable Adaptive Interrupt Moderation if 2 full packets cannot ++ * fit in receive buffer and early-receive not supported. ++ */ ++ if (adapter->itr_setting & 0x3) { ++ if (((adapter->max_frame_size * 2) > (pba << 10)) && ++ !(adapter->flags & FLAG_HAS_ERT)) { ++ if (!(adapter->flags2 & FLAG2_DISABLE_AIM)) { ++ dev_info(&adapter->pdev->dev, ++ "Interrupt Throttle Rate turned off\n"); ++ adapter->flags2 |= FLAG2_DISABLE_AIM; ++ ew32(ITR, 0); ++ } ++ } else if (adapter->flags2 & FLAG2_DISABLE_AIM) { ++ dev_info(&adapter->pdev->dev, ++ "Interrupt Throttle Rate turned on\n"); ++ adapter->flags2 &= ~FLAG2_DISABLE_AIM; ++ adapter->itr = 20000; ++ ew32(ITR, 1000000000 / (adapter->itr * 256)); ++ } ++ } ++ ++ /* Allow time for pending master requests to run */ ++ mac->ops.reset_hw(hw); ++ ++ /* ++ * For parts with AMT enabled, let the firmware know ++ * that the network interface is in control ++ */ ++ if (adapter->flags & FLAG_HAS_AMT) ++ e1000e_get_hw_control(adapter); ++ ++ ew32(WUC, 0); ++ ++ if (mac->ops.init_hw(hw)) ++ e_err("Hardware Error\n"); ++ ++ e1000_update_mng_vlan(adapter); ++ ++ /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */ ++ ew32(VET, ETH_P_8021Q); ++ ++ e1000e_reset_adaptive(hw); ++ ++ if (!rtnetif_running(adapter->netdev) && ++ !test_bit(__E1000_TESTING, &adapter->state)) { ++ e1000_power_down_phy(adapter); ++ return; ++ } ++ ++ e1000_get_phy_info(hw); ++ ++ if ((adapter->flags & FLAG_HAS_SMART_POWER_DOWN) && ++ !(adapter->flags & FLAG_SMART_POWER_DOWN)) { ++ u16 phy_data = 0; ++ /* ++ * speed up time to link by disabling smart power down, ignore ++ * the return value of this function because there is nothing ++ * different we would do if it failed ++ */ ++ e1e_rphy(hw, IGP02E1000_PHY_POWER_MGMT, &phy_data); ++ phy_data &= ~IGP02E1000_PM_SPD; ++ e1e_wphy(hw, IGP02E1000_PHY_POWER_MGMT, phy_data); ++ } ++} ++ ++int e1000e_up(struct e1000_adapter *adapter) ++{ ++ struct e1000_hw *hw = &adapter->hw; ++ ++ /* hardware has been reset, we need to reload some things */ ++ e1000_configure(adapter); ++ ++ clear_bit(__E1000_DOWN, &adapter->state); ++ ++ if (adapter->msix_entries) ++ e1000_configure_msix(adapter); ++ e1000_irq_enable(adapter); ++ ++ rtnetif_start_queue(adapter->netdev); ++ ++ /* fire a link change interrupt to start the watchdog */ ++ if (adapter->msix_entries) ++ ew32(ICS, E1000_ICS_LSC | E1000_ICR_OTHER); ++ else ++ ew32(ICS, E1000_ICS_LSC); ++ ++ return 0; ++} ++ ++static void e1000e_flush_descriptors(struct e1000_adapter *adapter) ++{ ++ struct e1000_hw *hw = &adapter->hw; ++ ++ if (!(adapter->flags2 & FLAG2_DMA_BURST)) ++ return; ++ ++ /* flush pending descriptor writebacks to memory */ ++ ew32(TIDV, adapter->tx_int_delay | E1000_TIDV_FPD); ++ ew32(RDTR, adapter->rx_int_delay | E1000_RDTR_FPD); ++ ++ /* execute the writes immediately */ ++ e1e_flush(); ++} ++ ++void e1000e_down(struct e1000_adapter *adapter) ++{ ++ struct rtnet_device *netdev = adapter->netdev; ++ struct e1000_hw *hw = &adapter->hw; ++ u32 tctl, rctl; ++ ++ /* ++ * signal that we're down so the interrupt handler does not ++ * reschedule our watchdog timer ++ */ ++ set_bit(__E1000_DOWN, &adapter->state); ++ ++ /* disable receives in the hardware */ ++ rctl = er32(RCTL); ++ if (!(adapter->flags2 & FLAG2_NO_DISABLE_RX)) ++ ew32(RCTL, rctl & ~E1000_RCTL_EN); ++ /* flush and sleep below */ ++ ++ rtnetif_stop_queue(netdev); ++ ++ /* disable transmits in the hardware */ ++ tctl = er32(TCTL); ++ tctl &= ~E1000_TCTL_EN; ++ ew32(TCTL, tctl); ++ ++ /* flush both disables and wait for them to finish */ ++ e1e_flush(); ++ usleep_range(10000, 20000); ++ ++ e1000_irq_disable(adapter); ++ ++ del_timer_sync(&adapter->watchdog_timer); ++ del_timer_sync(&adapter->phy_info_timer); ++ ++ rtnetif_carrier_off(netdev); ++ ++ e1000e_flush_descriptors(adapter); ++ e1000_clean_tx_ring(adapter); ++ e1000_clean_rx_ring(adapter); ++ ++ adapter->link_speed = 0; ++ adapter->link_duplex = 0; ++ ++ if (!pci_channel_offline(adapter->pdev)) ++ e1000e_reset(adapter); ++ ++ /* ++ * TODO: for power management, we could drop the link and ++ * pci_disable_device here. ++ */ ++} ++ ++void e1000e_reinit_locked(struct e1000_adapter *adapter) ++{ ++ might_sleep(); ++ while (test_and_set_bit(__E1000_RESETTING, &adapter->state)) ++ usleep_range(1000, 2000); ++ e1000e_down(adapter); ++ e1000e_up(adapter); ++ clear_bit(__E1000_RESETTING, &adapter->state); ++} ++ ++/** ++ * e1000_sw_init - Initialize general software structures (struct e1000_adapter) ++ * @adapter: board private structure to initialize ++ * ++ * e1000_sw_init initializes the Adapter private data structure. ++ * Fields are initialized based on PCI device information and ++ * OS network device settings (MTU size). ++ **/ ++static int e1000_sw_init(struct e1000_adapter *adapter) ++{ ++ struct rtnet_device *netdev = adapter->netdev; ++ ++ adapter->rx_buffer_len = ETH_FRAME_LEN + VLAN_HLEN + ETH_FCS_LEN; ++ adapter->rx_ps_bsize0 = 128; ++ adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN; ++ adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN; ++ ++ spin_lock_init(&adapter->stats64_lock); ++ ++ e1000e_set_interrupt_capability(adapter); ++ ++ if (e1000_alloc_queues(adapter)) ++ return -ENOMEM; ++ ++ /* Explicitly disable IRQ since the NIC can be in any state. */ ++ e1000_irq_disable(adapter); ++ ++ set_bit(__E1000_DOWN, &adapter->state); ++ return 0; ++} ++ ++/** ++ * e1000_intr_msi_test - Interrupt Handler ++ * @irq: interrupt number ++ * @data: pointer to a network interface device structure ++ **/ ++static irqreturn_t e1000_intr_msi_test(int irq, void *data) ++{ ++ struct rtnet_device *netdev = data; ++ struct e1000_adapter *adapter = netdev->priv; ++ struct e1000_hw *hw = &adapter->hw; ++ u32 icr = er32(ICR); ++ ++ e_dbg("icr is %08X\n", icr); ++ if (icr & E1000_ICR_RXSEQ) { ++ adapter->flags &= ~FLAG_MSI_TEST_FAILED; ++ wmb(); ++ } ++ ++ return IRQ_HANDLED; ++} ++ ++/** ++ * e1000_test_msi_interrupt - Returns 0 for successful test ++ * @adapter: board private struct ++ * ++ * code flow taken from tg3.c ++ **/ ++static int e1000_test_msi_interrupt(struct e1000_adapter *adapter) ++{ ++ struct rtnet_device *netdev = adapter->netdev; ++ struct e1000_hw *hw = &adapter->hw; ++ int err; ++ ++ /* poll_enable hasn't been called yet, so don't need disable */ ++ /* clear any pending events */ ++ er32(ICR); ++ ++ /* free the real vector and request a test handler */ ++ e1000_free_irq(adapter); ++ e1000e_reset_interrupt_capability(adapter); ++ ++ /* Assume that the test fails, if it succeeds then the test ++ * MSI irq handler will unset this flag */ ++ adapter->flags |= FLAG_MSI_TEST_FAILED; ++ ++ err = pci_enable_msi(adapter->pdev); ++ if (err) ++ goto msi_test_failed; ++ ++ err = request_irq(adapter->pdev->irq, e1000_intr_msi_test, 0, ++ netdev->name, netdev); ++ if (err) { ++ pci_disable_msi(adapter->pdev); ++ goto msi_test_failed; ++ } ++ ++ wmb(); ++ ++ e1000_irq_enable(adapter); ++ ++ /* fire an unusual interrupt on the test handler */ ++ ew32(ICS, E1000_ICS_RXSEQ); ++ e1e_flush(); ++ msleep(50); ++ ++ e1000_irq_disable(adapter); ++ ++ rmb(); ++ ++ if (adapter->flags & FLAG_MSI_TEST_FAILED) { ++ adapter->int_mode = E1000E_INT_MODE_LEGACY; ++ e_info("MSI interrupt test failed, using legacy interrupt.\n"); ++ } else ++ e_dbg("MSI interrupt test succeeded!\n"); ++ ++ free_irq(adapter->pdev->irq, netdev); ++ pci_disable_msi(adapter->pdev); ++ ++msi_test_failed: ++ e1000e_set_interrupt_capability(adapter); ++ return e1000_request_irq(adapter); ++} ++ ++/** ++ * e1000_test_msi - Returns 0 if MSI test succeeds or INTx mode is restored ++ * @adapter: board private struct ++ * ++ * code flow taken from tg3.c, called with e1000 interrupts disabled. ++ **/ ++static int e1000_test_msi(struct e1000_adapter *adapter) ++{ ++ int err; ++ u16 pci_cmd; ++ ++ if (!(adapter->flags & FLAG_MSI_ENABLED)) ++ return 0; ++ ++ /* disable SERR in case the MSI write causes a master abort */ ++ pci_read_config_word(adapter->pdev, PCI_COMMAND, &pci_cmd); ++ if (pci_cmd & PCI_COMMAND_SERR) ++ pci_write_config_word(adapter->pdev, PCI_COMMAND, ++ pci_cmd & ~PCI_COMMAND_SERR); ++ ++ err = e1000_test_msi_interrupt(adapter); ++ ++ /* re-enable SERR */ ++ if (pci_cmd & PCI_COMMAND_SERR) { ++ pci_read_config_word(adapter->pdev, PCI_COMMAND, &pci_cmd); ++ pci_cmd |= PCI_COMMAND_SERR; ++ pci_write_config_word(adapter->pdev, PCI_COMMAND, pci_cmd); ++ } ++ ++ return err; ++} ++ ++/** ++ * e1000_open - Called when a network interface is made active ++ * @netdev: network interface device structure ++ * ++ * Returns 0 on success, negative value on failure ++ * ++ * The open entry point is called when a network interface is made ++ * active by the system (IFF_UP). At this point all resources needed ++ * for transmit and receive operations are allocated, the interrupt ++ * handler is registered with the OS, the watchdog timer is started, ++ * and the stack is notified that the interface is ready. ++ **/ ++static int e1000_open(struct rtnet_device *netdev) ++{ ++ struct e1000_adapter *adapter = netdev->priv; ++ struct e1000_hw *hw = &adapter->hw; ++ struct pci_dev *pdev = adapter->pdev; ++ int err; ++ ++ /* disallow open during test */ ++ if (test_bit(__E1000_TESTING, &adapter->state)) ++ return -EBUSY; ++ ++ pm_runtime_get_sync(&pdev->dev); ++ ++ rtnetif_carrier_off(netdev); ++ ++ /* allocate transmit descriptors */ ++ err = e1000e_setup_tx_resources(adapter); ++ if (err) ++ goto err_setup_tx; ++ ++ /* allocate receive descriptors */ ++ err = e1000e_setup_rx_resources(adapter); ++ if (err) ++ goto err_setup_rx; ++ ++ /* ++ * If AMT is enabled, let the firmware know that the network ++ * interface is now open and reset the part to a known state. ++ */ ++ if (adapter->flags & FLAG_HAS_AMT) { ++ e1000e_get_hw_control(adapter); ++ e1000e_reset(adapter); ++ } ++ ++ e1000e_power_up_phy(adapter); ++ ++ adapter->mng_vlan_id = E1000_MNG_VLAN_NONE; ++ if ((adapter->hw.mng_cookie.status & ++ E1000_MNG_DHCP_COOKIE_STATUS_VLAN)) ++ e1000_update_mng_vlan(adapter); ++ ++ /* ++ * before we allocate an interrupt, we must be ready to handle it. ++ * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt ++ * as soon as we call pci_request_irq, so we have to setup our ++ * clean_rx handler before we do so. ++ */ ++ e1000_configure(adapter); ++ ++ rt_stack_connect(netdev, &STACK_manager); ++ ++ err = e1000_request_irq(adapter); ++ if (err) ++ goto err_req_irq; ++ ++ /* ++ * Work around PCIe errata with MSI interrupts causing some chipsets to ++ * ignore e1000e MSI messages, which means we need to test our MSI ++ * interrupt now ++ */ ++ if (adapter->int_mode != E1000E_INT_MODE_LEGACY) { ++ err = e1000_test_msi(adapter); ++ if (err) { ++ e_err("Interrupt allocation failed\n"); ++ goto err_req_irq; ++ } ++ } ++ ++ /* From here on the code is the same as e1000e_up() */ ++ clear_bit(__E1000_DOWN, &adapter->state); ++ ++ e1000_irq_enable(adapter); ++ ++ rtnetif_start_queue(netdev); ++ ++ adapter->idle_check = true; ++ pm_runtime_put(&pdev->dev); ++ ++ /* fire a link status change interrupt to start the watchdog */ ++ if (adapter->msix_entries) ++ ew32(ICS, E1000_ICS_LSC | E1000_ICR_OTHER); ++ else ++ ew32(ICS, E1000_ICS_LSC); ++ ++ return 0; ++ ++err_req_irq: ++ e1000e_release_hw_control(adapter); ++ e1000_power_down_phy(adapter); ++ e1000e_free_rx_resources(adapter); ++err_setup_rx: ++ e1000e_free_tx_resources(adapter); ++err_setup_tx: ++ e1000e_reset(adapter); ++ pm_runtime_put_sync(&pdev->dev); ++ ++ return err; ++} ++ ++/** ++ * e1000_close - Disables a network interface ++ * @netdev: network interface device structure ++ * ++ * Returns 0, this is not allowed to fail ++ * ++ * The close entry point is called when an interface is de-activated ++ * by the OS. The hardware is still under the drivers control, but ++ * needs to be disabled. A global MAC reset is issued to stop the ++ * hardware, and all transmit and receive resources are freed. ++ **/ ++static int e1000_close(struct rtnet_device *netdev) ++{ ++ struct e1000_adapter *adapter = netdev->priv; ++ struct pci_dev *pdev = adapter->pdev; ++ ++ WARN_ON(test_bit(__E1000_RESETTING, &adapter->state)); ++ ++ pm_runtime_get_sync(&pdev->dev); ++ ++ if (!test_bit(__E1000_DOWN, &adapter->state)) { ++ e1000e_down(adapter); ++ e1000_free_irq(adapter); ++ } ++ e1000_power_down_phy(adapter); ++ ++ rt_stack_disconnect(netdev); ++ ++ e1000e_free_tx_resources(adapter); ++ e1000e_free_rx_resources(adapter); ++ ++ /* ++ * kill manageability vlan ID if supported, but not if a vlan with ++ * the same ID is registered on the host OS (let 8021q kill it) ++ */ ++ if (adapter->hw.mng_cookie.status & ++ E1000_MNG_DHCP_COOKIE_STATUS_VLAN) ++ e1000_vlan_rx_kill_vid(netdev, adapter->mng_vlan_id); ++ ++ /* ++ * If AMT is enabled, let the firmware know that the network ++ * interface is now closed ++ */ ++ if ((adapter->flags & FLAG_HAS_AMT) && ++ !test_bit(__E1000_TESTING, &adapter->state)) ++ e1000e_release_hw_control(adapter); ++ ++ pm_runtime_put_sync(&pdev->dev); ++ ++ return 0; ++} ++ ++/** ++ * e1000e_update_phy_task - work thread to update phy ++ * @work: pointer to our work struct ++ * ++ * this worker thread exists because we must acquire a ++ * semaphore to read the phy, which we could msleep while ++ * waiting for it, and we can't msleep in a timer. ++ **/ ++static void e1000e_update_phy_task(struct work_struct *work) ++{ ++ struct e1000_adapter *adapter = container_of(work, ++ struct e1000_adapter, update_phy_task); ++ ++ if (test_bit(__E1000_DOWN, &adapter->state)) ++ return; ++ ++ e1000_get_phy_info(&adapter->hw); ++} ++ ++/* ++ * Need to wait a few seconds after link up to get diagnostic information from ++ * the phy ++ */ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,14,0) ++static void e1000_update_phy_info(struct timer_list *t) ++{ ++ struct e1000_adapter *adapter = from_timer(adapter, t, phy_info_timer); ++#else /* < 4.14 */ ++static void e1000_update_phy_info(unsigned long data) ++{ ++ struct e1000_adapter *adapter = (struct e1000_adapter *) data; ++#endif /* < 4.14 */ ++ ++ if (test_bit(__E1000_DOWN, &adapter->state)) ++ return; ++ ++ rtdm_schedule_nrt_work(&adapter->update_phy_task); ++} ++ ++/** ++ * e1000_phy_read_status - Update the PHY register status snapshot ++ * @adapter: board private structure ++ **/ ++static void e1000_phy_read_status(struct e1000_adapter *adapter) ++{ ++ struct e1000_hw *hw = &adapter->hw; ++ struct e1000_phy_regs *phy = &adapter->phy_regs; ++ ++ if ((er32(STATUS) & E1000_STATUS_LU) && ++ (adapter->hw.phy.media_type == e1000_media_type_copper)) { ++ int ret_val; ++ ++ ret_val = e1e_rphy(hw, PHY_CONTROL, &phy->bmcr); ++ ret_val |= e1e_rphy(hw, PHY_STATUS, &phy->bmsr); ++ ret_val |= e1e_rphy(hw, PHY_AUTONEG_ADV, &phy->advertise); ++ ret_val |= e1e_rphy(hw, PHY_LP_ABILITY, &phy->lpa); ++ ret_val |= e1e_rphy(hw, PHY_AUTONEG_EXP, &phy->expansion); ++ ret_val |= e1e_rphy(hw, PHY_1000T_CTRL, &phy->ctrl1000); ++ ret_val |= e1e_rphy(hw, PHY_1000T_STATUS, &phy->stat1000); ++ ret_val |= e1e_rphy(hw, PHY_EXT_STATUS, &phy->estatus); ++ if (ret_val) ++ e_warn("Error reading PHY register\n"); ++ } else { ++ /* ++ * Do not read PHY registers if link is not up ++ * Set values to typical power-on defaults ++ */ ++ phy->bmcr = (BMCR_SPEED1000 | BMCR_ANENABLE | BMCR_FULLDPLX); ++ phy->bmsr = (BMSR_100FULL | BMSR_100HALF | BMSR_10FULL | ++ BMSR_10HALF | BMSR_ESTATEN | BMSR_ANEGCAPABLE | ++ BMSR_ERCAP); ++ phy->advertise = (ADVERTISE_PAUSE_ASYM | ADVERTISE_PAUSE_CAP | ++ ADVERTISE_ALL | ADVERTISE_CSMA); ++ phy->lpa = 0; ++ phy->expansion = EXPANSION_ENABLENPAGE; ++ phy->ctrl1000 = ADVERTISE_1000FULL; ++ phy->stat1000 = 0; ++ phy->estatus = (ESTATUS_1000_TFULL | ESTATUS_1000_THALF); ++ } ++} ++ ++static void e1000_print_link_info(struct e1000_adapter *adapter) ++{ ++ struct e1000_hw *hw = &adapter->hw; ++ u32 ctrl = er32(CTRL); ++ ++ /* Link status message must follow this format for user tools */ ++ printk(KERN_INFO "e1000e: %s NIC Link is Up %d Mbps %s, " ++ "Flow Control: %s\n", ++ adapter->netdev->name, ++ adapter->link_speed, ++ (adapter->link_duplex == FULL_DUPLEX) ? ++ "Full Duplex" : "Half Duplex", ++ ((ctrl & E1000_CTRL_TFCE) && (ctrl & E1000_CTRL_RFCE)) ? ++ "Rx/Tx" : ++ ((ctrl & E1000_CTRL_RFCE) ? "Rx" : ++ ((ctrl & E1000_CTRL_TFCE) ? "Tx" : "None"))); ++} ++ ++static bool e1000e_has_link(struct e1000_adapter *adapter) ++{ ++ struct e1000_hw *hw = &adapter->hw; ++ bool link_active = 0; ++ s32 ret_val = 0; ++ ++ /* ++ * get_link_status is set on LSC (link status) interrupt or ++ * Rx sequence error interrupt. get_link_status will stay ++ * false until the check_for_link establishes link ++ * for copper adapters ONLY ++ */ ++ switch (hw->phy.media_type) { ++ case e1000_media_type_copper: ++ if (hw->mac.get_link_status) { ++ ret_val = hw->mac.ops.check_for_link(hw); ++ link_active = !hw->mac.get_link_status; ++ } else { ++ link_active = 1; ++ } ++ break; ++ case e1000_media_type_fiber: ++ ret_val = hw->mac.ops.check_for_link(hw); ++ link_active = !!(er32(STATUS) & E1000_STATUS_LU); ++ break; ++ case e1000_media_type_internal_serdes: ++ ret_val = hw->mac.ops.check_for_link(hw); ++ link_active = adapter->hw.mac.serdes_has_link; ++ break; ++ default: ++ case e1000_media_type_unknown: ++ break; ++ } ++ ++ if ((ret_val == E1000_ERR_PHY) && (hw->phy.type == e1000_phy_igp_3) && ++ (er32(CTRL) & E1000_PHY_CTRL_GBE_DISABLE)) { ++ /* See e1000_kmrn_lock_loss_workaround_ich8lan() */ ++ e_info("Gigabit has been disabled, downgrading speed\n"); ++ } ++ ++ return link_active; ++} ++ ++static void e1000e_enable_receives(struct e1000_adapter *adapter) ++{ ++ /* make sure the receive unit is started */ ++ if ((adapter->flags & FLAG_RX_NEEDS_RESTART) && ++ (adapter->flags & FLAG_RX_RESTART_NOW)) { ++ struct e1000_hw *hw = &adapter->hw; ++ u32 rctl = er32(RCTL); ++ ew32(RCTL, rctl | E1000_RCTL_EN); ++ adapter->flags &= ~FLAG_RX_RESTART_NOW; ++ } ++} ++ ++static void e1000e_check_82574_phy_workaround(struct e1000_adapter *adapter) ++{ ++ struct e1000_hw *hw = &adapter->hw; ++ ++ /* ++ * With 82574 controllers, PHY needs to be checked periodically ++ * for hung state and reset, if two calls return true ++ */ ++ if (e1000_check_phy_82574(hw)) ++ adapter->phy_hang_count++; ++ else ++ adapter->phy_hang_count = 0; ++ ++ if (adapter->phy_hang_count > 1) { ++ adapter->phy_hang_count = 0; ++ rtdm_schedule_nrt_work(&adapter->reset_task); ++ } ++} ++ ++/** ++ * e1000_watchdog - Timer Call-back ++ * @data: pointer to adapter cast into an unsigned long ++ **/ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,14,0) ++static void e1000_watchdog(struct timer_list *t) ++{ ++ struct e1000_adapter *adapter = from_timer(adapter, t, watchdog_timer); ++#else /* < 4.14 */ ++static void e1000_watchdog(unsigned long data) ++{ ++ struct e1000_adapter *adapter = (struct e1000_adapter *) data; ++#endif /* < 4.14 */ ++ ++ /* Do the rest outside of interrupt context */ ++ rtdm_schedule_nrt_work(&adapter->watchdog_task); ++ ++ /* TODO: make this use queue_delayed_work() */ ++} ++ ++static void e1000_watchdog_task(struct work_struct *work) ++{ ++ struct e1000_adapter *adapter = container_of(work, ++ struct e1000_adapter, watchdog_task); ++ struct rtnet_device *netdev = adapter->netdev; ++ struct e1000_mac_info *mac = &adapter->hw.mac; ++ struct e1000_phy_info *phy = &adapter->hw.phy; ++ struct e1000_ring *tx_ring = adapter->tx_ring; ++ struct e1000_hw *hw = &adapter->hw; ++ u32 link, tctl; ++ ++ if (test_bit(__E1000_DOWN, &adapter->state)) ++ return; ++ ++ link = e1000e_has_link(adapter); ++ if ((rtnetif_carrier_ok(netdev)) && link) { ++ e1000e_enable_receives(adapter); ++ goto link_up; ++ } ++ ++ if ((e1000e_enable_tx_pkt_filtering(hw)) && ++ (adapter->mng_vlan_id != adapter->hw.mng_cookie.vlan_id)) ++ e1000_update_mng_vlan(adapter); ++ ++ if (link) { ++ if (!rtnetif_carrier_ok(netdev)) { ++ bool txb2b = 1; ++ ++ /* update snapshot of PHY registers on LSC */ ++ e1000_phy_read_status(adapter); ++ mac->ops.get_link_up_info(&adapter->hw, ++ &adapter->link_speed, ++ &adapter->link_duplex); ++ e1000_print_link_info(adapter); ++ /* ++ * On supported PHYs, check for duplex mismatch only ++ * if link has autonegotiated at 10/100 half ++ */ ++ if ((hw->phy.type == e1000_phy_igp_3 || ++ hw->phy.type == e1000_phy_bm) && ++ (hw->mac.autoneg == true) && ++ (adapter->link_speed == SPEED_10 || ++ adapter->link_speed == SPEED_100) && ++ (adapter->link_duplex == HALF_DUPLEX)) { ++ u16 autoneg_exp; ++ ++ e1e_rphy(hw, PHY_AUTONEG_EXP, &autoneg_exp); ++ ++ if (!(autoneg_exp & NWAY_ER_LP_NWAY_CAPS)) ++ e_info("Autonegotiated half duplex but" ++ " link partner cannot autoneg. " ++ " Try forcing full duplex if " ++ "link gets many collisions.\n"); ++ } ++ ++ /* adjust timeout factor according to speed/duplex */ ++ adapter->tx_timeout_factor = 1; ++ switch (adapter->link_speed) { ++ case SPEED_10: ++ txb2b = 0; ++ adapter->tx_timeout_factor = 16; ++ break; ++ case SPEED_100: ++ txb2b = 0; ++ adapter->tx_timeout_factor = 10; ++ break; ++ } ++ ++ /* ++ * workaround: re-program speed mode bit after ++ * link-up event ++ */ ++ if ((adapter->flags & FLAG_TARC_SPEED_MODE_BIT) && ++ !txb2b) { ++ u32 tarc0; ++ tarc0 = er32(TARC(0)); ++ tarc0 &= ~SPEED_MODE_BIT; ++ ew32(TARC(0), tarc0); ++ } ++ ++ /* ++ * disable TSO for pcie and 10/100 speeds, to avoid ++ * some hardware issues ++ */ ++ if (!(adapter->flags & FLAG_TSO_FORCE)) { ++ switch (adapter->link_speed) { ++ case SPEED_10: ++ case SPEED_100: ++ e_info("10/100 speed: disabling TSO\n"); ++ netdev->features &= ~NETIF_F_TSO; ++ netdev->features &= ~NETIF_F_TSO6; ++ break; ++ case SPEED_1000: ++ netdev->features |= NETIF_F_TSO; ++ netdev->features |= NETIF_F_TSO6; ++ break; ++ default: ++ /* oops */ ++ break; ++ } ++ } ++ ++ /* ++ * enable transmits in the hardware, need to do this ++ * after setting TARC(0) ++ */ ++ tctl = er32(TCTL); ++ tctl |= E1000_TCTL_EN; ++ ew32(TCTL, tctl); ++ ++ /* ++ * Perform any post-link-up configuration before ++ * reporting link up. ++ */ ++ if (phy->ops.cfg_on_link_up) ++ phy->ops.cfg_on_link_up(hw); ++ ++ rtnetif_carrier_on(netdev); ++ ++ if (!test_bit(__E1000_DOWN, &adapter->state)) ++ mod_timer(&adapter->phy_info_timer, ++ round_jiffies(jiffies + 2 * HZ)); ++ } ++ } else { ++ if (rtnetif_carrier_ok(netdev)) { ++ adapter->link_speed = 0; ++ adapter->link_duplex = 0; ++ /* Link status message must follow this format */ ++ printk(KERN_INFO "e1000e: %s NIC Link is Down\n", ++ adapter->netdev->name); ++ rtnetif_carrier_off(netdev); ++ if (!test_bit(__E1000_DOWN, &adapter->state)) ++ mod_timer(&adapter->phy_info_timer, ++ round_jiffies(jiffies + 2 * HZ)); ++ ++ if (adapter->flags & FLAG_RX_NEEDS_RESTART) ++ rtdm_schedule_nrt_work(&adapter->reset_task); ++ } ++ } ++ ++link_up: ++ spin_lock(&adapter->stats64_lock); ++ ++ mac->tx_packet_delta = adapter->stats.tpt - adapter->tpt_old; ++ adapter->tpt_old = adapter->stats.tpt; ++ mac->collision_delta = adapter->stats.colc - adapter->colc_old; ++ adapter->colc_old = adapter->stats.colc; ++ ++ adapter->gorc = adapter->stats.gorc - adapter->gorc_old; ++ adapter->gorc_old = adapter->stats.gorc; ++ adapter->gotc = adapter->stats.gotc - adapter->gotc_old; ++ adapter->gotc_old = adapter->stats.gotc; ++ spin_unlock(&adapter->stats64_lock); ++ ++ e1000e_update_adaptive(&adapter->hw); ++ ++ if (!rtnetif_carrier_ok(netdev) && ++ (e1000_desc_unused(tx_ring) + 1 < tx_ring->count)) { ++ /* ++ * We've lost link, so the controller stops DMA, ++ * but we've got queued Tx work that's never going ++ * to get done, so reset controller to flush Tx. ++ * (Do the reset outside of interrupt context). ++ */ ++ rtdm_schedule_nrt_work(&adapter->reset_task); ++ /* return immediately since reset is imminent */ ++ return; ++ } ++ ++ /* Simple mode for Interrupt Throttle Rate (ITR) */ ++ if (adapter->itr_setting == 4) { ++ /* ++ * Symmetric Tx/Rx gets a reduced ITR=2000; ++ * Total asymmetrical Tx or Rx gets ITR=8000; ++ * everyone else is between 2000-8000. ++ */ ++ u32 goc = (adapter->gotc + adapter->gorc) / 10000; ++ u32 dif = (adapter->gotc > adapter->gorc ? ++ adapter->gotc - adapter->gorc : ++ adapter->gorc - adapter->gotc) / 10000; ++ u32 itr = goc > 0 ? (dif * 6000 / goc + 2000) : 8000; ++ ++ ew32(ITR, 1000000000 / (itr * 256)); ++ } ++ ++ /* Cause software interrupt to ensure Rx ring is cleaned */ ++ if (adapter->msix_entries) ++ ew32(ICS, adapter->rx_ring->ims_val); ++ else ++ ew32(ICS, E1000_ICS_RXDMT0); ++ ++ /* flush pending descriptors to memory before detecting Tx hang */ ++ e1000e_flush_descriptors(adapter); ++ ++ /* Force detection of hung controller every watchdog period */ ++ adapter->detect_tx_hung = 1; ++ ++ /* ++ * With 82571 controllers, LAA may be overwritten due to controller ++ * reset from the other port. Set the appropriate LAA in RAR[0] ++ */ ++ if (e1000e_get_laa_state_82571(hw)) ++ e1000e_rar_set(hw, adapter->hw.mac.addr, 0); ++ ++ if (adapter->flags2 & FLAG2_CHECK_PHY_HANG) ++ e1000e_check_82574_phy_workaround(adapter); ++ ++ /* Reset the timer */ ++ if (!test_bit(__E1000_DOWN, &adapter->state)) ++ mod_timer(&adapter->watchdog_timer, ++ round_jiffies(jiffies + 2 * HZ)); ++} ++ ++#define E1000_TX_FLAGS_CSUM 0x00000001 ++#define E1000_TX_FLAGS_VLAN 0x00000002 ++#define E1000_TX_FLAGS_TSO 0x00000004 ++#define E1000_TX_FLAGS_IPV4 0x00000008 ++#define E1000_TX_FLAGS_VLAN_MASK 0xffff0000 ++#define E1000_TX_FLAGS_VLAN_SHIFT 16 ++ ++#define E1000_MAX_PER_TXD 8192 ++#define E1000_MAX_TXD_PWR 12 ++ ++static int e1000_tx_map(struct e1000_adapter *adapter, ++ struct rtskb *skb, unsigned int first) ++{ ++ struct e1000_ring *tx_ring = adapter->tx_ring; ++ struct e1000_buffer *buffer_info; ++ unsigned int offset = 0, size, i; ++ ++ i = tx_ring->next_to_use; ++ ++ buffer_info = &tx_ring->buffer_info[i]; ++ size = skb->len; ++ ++ buffer_info->length = size; ++ buffer_info->time_stamp = jiffies; ++ buffer_info->next_to_watch = i; ++ buffer_info->dma = rtskb_data_dma_addr(skb, offset); ++ buffer_info->mapped_as_page = false; ++ ++ tx_ring->buffer_info[i].skb = skb; ++ tx_ring->buffer_info[i].segs = 1; ++ tx_ring->buffer_info[i].bytecount = size; ++ tx_ring->buffer_info[first].next_to_watch = i; ++ ++ return 1; ++} ++ ++static void e1000_tx_queue(struct e1000_adapter *adapter, ++ int tx_flags, int count) ++{ ++ struct e1000_ring *tx_ring = adapter->tx_ring; ++ struct e1000_tx_desc *tx_desc = NULL; ++ struct e1000_buffer *buffer_info; ++ u32 txd_upper = 0, txd_lower = E1000_TXD_CMD_IFCS; ++ unsigned int i; ++ ++ if (tx_flags & E1000_TX_FLAGS_CSUM) { ++ txd_lower |= E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D; ++ txd_upper |= E1000_TXD_POPTS_TXSM << 8; ++ } ++ ++ if (tx_flags & E1000_TX_FLAGS_VLAN) { ++ txd_lower |= E1000_TXD_CMD_VLE; ++ txd_upper |= (tx_flags & E1000_TX_FLAGS_VLAN_MASK); ++ } ++ ++ i = tx_ring->next_to_use; ++ ++ do { ++ buffer_info = &tx_ring->buffer_info[i]; ++ tx_desc = E1000_TX_DESC(*tx_ring, i); ++ tx_desc->buffer_addr = cpu_to_le64(buffer_info->dma); ++ tx_desc->lower.data = ++ cpu_to_le32(txd_lower | buffer_info->length); ++ tx_desc->upper.data = cpu_to_le32(txd_upper); ++ ++ i++; ++ if (i == tx_ring->count) ++ i = 0; ++ } while (--count > 0); ++ ++ tx_desc->lower.data |= cpu_to_le32(adapter->txd_cmd); ++ ++ /* ++ * Force memory writes to complete before letting h/w ++ * know there are new descriptors to fetch. (Only ++ * applicable for weak-ordered memory model archs, ++ * such as IA-64). ++ */ ++ wmb(); ++ ++ tx_ring->next_to_use = i; ++ ++ if (adapter->flags2 & FLAG2_PCIM2PCI_ARBITER_WA) ++ e1000e_update_tdt_wa(adapter, i); ++ else ++ writel(i, adapter->hw.hw_addr + tx_ring->tail); ++ ++ /* ++ * we need this if more than one processor can write to our tail ++ * at a time, it synchronizes IO on IA64/Altix systems ++ */ ++ mmiowb(); ++} ++ ++#define MINIMUM_DHCP_PACKET_SIZE 282 ++static int e1000_transfer_dhcp_info(struct e1000_adapter *adapter, ++ struct rtskb *skb) ++{ ++ struct e1000_hw *hw = &adapter->hw; ++ u16 length, offset; ++ ++ if (skb->len <= MINIMUM_DHCP_PACKET_SIZE) ++ return 0; ++ ++ if (((struct ethhdr *) skb->data)->h_proto != htons(ETH_P_IP)) ++ return 0; ++ ++ { ++ const struct iphdr *ip = (struct iphdr *)((u8 *)skb->data+14); ++ struct udphdr *udp; ++ ++ if (ip->protocol != IPPROTO_UDP) ++ return 0; ++ ++ udp = (struct udphdr *)((u8 *)ip + (ip->ihl << 2)); ++ if (ntohs(udp->dest) != 67) ++ return 0; ++ ++ offset = (u8 *)udp + 8 - skb->data; ++ length = skb->len - offset; ++ return e1000e_mng_write_dhcp_info(hw, (u8 *)udp + 8, length); ++ } ++ ++ return 0; ++} ++ ++#define TXD_USE_COUNT(S, X) (((S) >> (X)) + 1 ) ++static int e1000_xmit_frame(struct rtskb *skb, struct rtnet_device *netdev) ++{ ++ struct e1000_adapter *adapter = netdev->priv; ++ struct e1000_ring *tx_ring = adapter->tx_ring; ++ rtdm_lockctx_t context; ++ unsigned int first; ++ unsigned int tx_flags = 0; ++ int count = 0; ++ ++ if (test_bit(__E1000_DOWN, &adapter->state)) { ++ kfree_rtskb(skb); ++ return NETDEV_TX_OK; ++ } ++ ++ if (skb->len <= 0) { ++ kfree_rtskb(skb); ++ return NETDEV_TX_OK; ++ } ++ ++ count++; ++ ++ count += skb->len; ++ ++ if (adapter->hw.mac.tx_pkt_filtering) ++ e1000_transfer_dhcp_info(adapter, skb); ++ ++ rtdm_lock_get_irqsave(&tx_ring->lock, context); ++ ++ first = tx_ring->next_to_use; ++ ++ if (skb->xmit_stamp) ++ *skb->xmit_stamp = ++ cpu_to_be64(rtdm_clock_read() + *skb->xmit_stamp); ++ ++ /* if count is 0 then mapping error has occurred */ ++ count = e1000_tx_map(adapter, skb, first); ++ if (count) { ++ e1000_tx_queue(adapter, tx_flags, count); ++ rtdm_lock_put_irqrestore(&tx_ring->lock, context); ++ } else { ++ tx_ring->buffer_info[first].time_stamp = 0; ++ tx_ring->next_to_use = first; ++ rtdm_lock_put_irqrestore(&tx_ring->lock, context); ++ kfree_rtskb(skb); ++ } ++ ++ return NETDEV_TX_OK; ++} ++ ++static void e1000_reset_task(struct work_struct *work) ++{ ++ struct e1000_adapter *adapter; ++ adapter = container_of(work, struct e1000_adapter, reset_task); ++ ++ /* don't run the task if already down */ ++ if (test_bit(__E1000_DOWN, &adapter->state)) ++ return; ++ ++ if (!((adapter->flags & FLAG_RX_NEEDS_RESTART) && ++ (adapter->flags & FLAG_RX_RESTART_NOW))) { ++ e1000e_dump(adapter); ++ e_err("Reset adapter\n"); ++ } ++ e1000e_reinit_locked(adapter); ++} ++ ++static int e1000_init_phy_wakeup(struct e1000_adapter *adapter, u32 wufc) ++{ ++ struct e1000_hw *hw = &adapter->hw; ++ u32 i, mac_reg; ++ u16 phy_reg, wuc_enable; ++ int retval = 0; ++ ++ /* copy MAC RARs to PHY RARs */ ++ e1000_copy_rx_addrs_to_phy_ich8lan(hw); ++ ++ retval = hw->phy.ops.acquire(hw); ++ if (retval) { ++ e_err("Could not acquire PHY\n"); ++ return retval; ++ } ++ ++ /* Enable access to wakeup registers on and set page to BM_WUC_PAGE */ ++ retval = e1000_enable_phy_wakeup_reg_access_bm(hw, &wuc_enable); ++ if (retval) ++ goto out; ++ ++ /* copy MAC MTA to PHY MTA - only needed for pchlan */ ++ for (i = 0; i < adapter->hw.mac.mta_reg_count; i++) { ++ mac_reg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i); ++ hw->phy.ops.write_reg_page(hw, BM_MTA(i), ++ (u16)(mac_reg & 0xFFFF)); ++ hw->phy.ops.write_reg_page(hw, BM_MTA(i) + 1, ++ (u16)((mac_reg >> 16) & 0xFFFF)); ++ } ++ ++ /* configure PHY Rx Control register */ ++ hw->phy.ops.read_reg_page(&adapter->hw, BM_RCTL, &phy_reg); ++ mac_reg = er32(RCTL); ++ if (mac_reg & E1000_RCTL_UPE) ++ phy_reg |= BM_RCTL_UPE; ++ if (mac_reg & E1000_RCTL_MPE) ++ phy_reg |= BM_RCTL_MPE; ++ phy_reg &= ~(BM_RCTL_MO_MASK); ++ if (mac_reg & E1000_RCTL_MO_3) ++ phy_reg |= (((mac_reg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT) ++ << BM_RCTL_MO_SHIFT); ++ if (mac_reg & E1000_RCTL_BAM) ++ phy_reg |= BM_RCTL_BAM; ++ if (mac_reg & E1000_RCTL_PMCF) ++ phy_reg |= BM_RCTL_PMCF; ++ mac_reg = er32(CTRL); ++ if (mac_reg & E1000_CTRL_RFCE) ++ phy_reg |= BM_RCTL_RFCE; ++ hw->phy.ops.write_reg_page(&adapter->hw, BM_RCTL, phy_reg); ++ ++ /* enable PHY wakeup in MAC register */ ++ ew32(WUFC, wufc); ++ ew32(WUC, E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN); ++ ++ /* configure and enable PHY wakeup in PHY registers */ ++ hw->phy.ops.write_reg_page(&adapter->hw, BM_WUFC, wufc); ++ hw->phy.ops.write_reg_page(&adapter->hw, BM_WUC, E1000_WUC_PME_EN); ++ ++ /* activate PHY wakeup */ ++ wuc_enable |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT; ++ retval = e1000_disable_phy_wakeup_reg_access_bm(hw, &wuc_enable); ++ if (retval) ++ e_err("Could not set PHY Host Wakeup bit\n"); ++out: ++ hw->phy.ops.release(hw); ++ ++ return retval; ++} ++ ++static int __e1000_shutdown(struct pci_dev *pdev, bool *enable_wake, ++ bool runtime) ++{ ++ struct rtnet_device *netdev = pci_get_drvdata(pdev); ++ struct e1000_adapter *adapter = netdev->priv; ++ struct e1000_hw *hw = &adapter->hw; ++ u32 ctrl, ctrl_ext, rctl, status; ++ /* Runtime suspend should only enable wakeup for link changes */ ++ u32 wufc = runtime ? E1000_WUFC_LNKC : adapter->wol; ++ int retval = 0; ++ ++ rtnetif_device_detach(netdev); ++ ++ if (rtnetif_running(netdev)) { ++ WARN_ON(test_bit(__E1000_RESETTING, &adapter->state)); ++ e1000e_down(adapter); ++ e1000_free_irq(adapter); ++ } ++ e1000e_reset_interrupt_capability(adapter); ++ ++ retval = pci_save_state(pdev); ++ if (retval) ++ return retval; ++ ++ status = er32(STATUS); ++ if (status & E1000_STATUS_LU) ++ wufc &= ~E1000_WUFC_LNKC; ++ ++ if (wufc) { ++ e1000_setup_rctl(adapter); ++ e1000_set_multi(netdev); ++ ++ /* turn on all-multi mode if wake on multicast is enabled */ ++ if (wufc & E1000_WUFC_MC) { ++ rctl = er32(RCTL); ++ rctl |= E1000_RCTL_MPE; ++ ew32(RCTL, rctl); ++ } ++ ++ ctrl = er32(CTRL); ++ /* advertise wake from D3Cold */ ++ #define E1000_CTRL_ADVD3WUC 0x00100000 ++ /* phy power management enable */ ++ #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000 ++ ctrl |= E1000_CTRL_ADVD3WUC; ++ if (!(adapter->flags2 & FLAG2_HAS_PHY_WAKEUP)) ++ ctrl |= E1000_CTRL_EN_PHY_PWR_MGMT; ++ ew32(CTRL, ctrl); ++ ++ if (adapter->hw.phy.media_type == e1000_media_type_fiber || ++ adapter->hw.phy.media_type == ++ e1000_media_type_internal_serdes) { ++ /* keep the laser running in D3 */ ++ ctrl_ext = er32(CTRL_EXT); ++ ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA; ++ ew32(CTRL_EXT, ctrl_ext); ++ } ++ ++ if (adapter->flags & FLAG_IS_ICH) ++ e1000_suspend_workarounds_ich8lan(&adapter->hw); ++ ++ /* Allow time for pending master requests to run */ ++ e1000e_disable_pcie_master(&adapter->hw); ++ ++ if (adapter->flags2 & FLAG2_HAS_PHY_WAKEUP) { ++ /* enable wakeup by the PHY */ ++ retval = e1000_init_phy_wakeup(adapter, wufc); ++ if (retval) ++ return retval; ++ } else { ++ /* enable wakeup by the MAC */ ++ ew32(WUFC, wufc); ++ ew32(WUC, E1000_WUC_PME_EN); ++ } ++ } else { ++ ew32(WUC, 0); ++ ew32(WUFC, 0); ++ } ++ ++ *enable_wake = !!wufc; ++ ++ /* make sure adapter isn't asleep if manageability is enabled */ ++ if ((adapter->flags & FLAG_MNG_PT_ENABLED) || ++ (hw->mac.ops.check_mng_mode(hw))) ++ *enable_wake = true; ++ ++ if (adapter->hw.phy.type == e1000_phy_igp_3) ++ e1000e_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw); ++ ++ /* ++ * Release control of h/w to f/w. If f/w is AMT enabled, this ++ * would have already happened in close and is redundant. ++ */ ++ e1000e_release_hw_control(adapter); ++ ++ pci_disable_device(pdev); ++ ++ return 0; ++} ++ ++static void e1000_power_off(struct pci_dev *pdev, bool sleep, bool wake) ++{ ++ if (sleep && wake) { ++ pci_prepare_to_sleep(pdev); ++ return; ++ } ++ ++ pci_wake_from_d3(pdev, wake); ++ pci_set_power_state(pdev, PCI_D3hot); ++} ++ ++static void e1000_complete_shutdown(struct pci_dev *pdev, bool sleep, ++ bool wake) ++{ ++ struct rtnet_device *netdev = pci_get_drvdata(pdev); ++ struct e1000_adapter *adapter = netdev->priv; ++ ++ /* ++ * The pci-e switch on some quad port adapters will report a ++ * correctable error when the MAC transitions from D0 to D3. To ++ * prevent this we need to mask off the correctable errors on the ++ * downstream port of the pci-e switch. ++ */ ++ if (adapter->flags & FLAG_IS_QUAD_PORT) { ++ struct pci_dev *us_dev = pdev->bus->self; ++ int pos = pci_pcie_cap(us_dev); ++ u16 devctl; ++ ++ pci_read_config_word(us_dev, pos + PCI_EXP_DEVCTL, &devctl); ++ pci_write_config_word(us_dev, pos + PCI_EXP_DEVCTL, ++ (devctl & ~PCI_EXP_DEVCTL_CERE)); ++ ++ e1000_power_off(pdev, sleep, wake); ++ ++ pci_write_config_word(us_dev, pos + PCI_EXP_DEVCTL, devctl); ++ } else { ++ e1000_power_off(pdev, sleep, wake); ++ } ++} ++ ++static void __e1000e_disable_aspm(struct pci_dev *pdev, u16 state) ++{ ++ int pos; ++ u16 reg16; ++ ++ /* ++ * Both device and parent should have the same ASPM setting. ++ * Disable ASPM in downstream component first and then upstream. ++ */ ++ pos = pci_pcie_cap(pdev); ++ pci_read_config_word(pdev, pos + PCI_EXP_LNKCTL, ®16); ++ reg16 &= ~state; ++ pci_write_config_word(pdev, pos + PCI_EXP_LNKCTL, reg16); ++ ++ if (!pdev->bus->self) ++ return; ++ ++ pos = pci_pcie_cap(pdev->bus->self); ++ pci_read_config_word(pdev->bus->self, pos + PCI_EXP_LNKCTL, ®16); ++ reg16 &= ~state; ++ pci_write_config_word(pdev->bus->self, pos + PCI_EXP_LNKCTL, reg16); ++} ++ ++static void e1000e_disable_aspm(struct pci_dev *pdev, u16 state) ++{ ++ dev_info(&pdev->dev, "Disabling ASPM %s %s\n", ++ (state & PCIE_LINK_STATE_L0S) ? "L0s" : "", ++ (state & PCIE_LINK_STATE_L1) ? "L1" : ""); ++ ++ __e1000e_disable_aspm(pdev, state); ++} ++ ++static void e1000_shutdown(struct pci_dev *pdev) ++{ ++ bool wake = false; ++ ++ __e1000_shutdown(pdev, &wake, false); ++ ++ if (system_state == SYSTEM_POWER_OFF) ++ e1000_complete_shutdown(pdev, false, wake); ++} ++ ++/** ++ * e1000_io_error_detected - called when PCI error is detected ++ * @pdev: Pointer to PCI device ++ * @state: The current pci connection state ++ * ++ * This function is called after a PCI bus error affecting ++ * this device has been detected. ++ */ ++static pci_ers_result_t e1000_io_error_detected(struct pci_dev *pdev, ++ pci_channel_state_t state) ++{ ++ struct rtnet_device *netdev = pci_get_drvdata(pdev); ++ struct e1000_adapter *adapter = netdev->priv; ++ ++ rtnetif_device_detach(netdev); ++ ++ if (state == pci_channel_io_perm_failure) ++ return PCI_ERS_RESULT_DISCONNECT; ++ ++ if (rtnetif_running(netdev)) ++ e1000e_down(adapter); ++ pci_disable_device(pdev); ++ ++ /* Request a slot slot reset. */ ++ return PCI_ERS_RESULT_NEED_RESET; ++} ++ ++/** ++ * e1000_io_slot_reset - called after the pci bus has been reset. ++ * @pdev: Pointer to PCI device ++ * ++ * Restart the card from scratch, as if from a cold-boot. Implementation ++ * resembles the first-half of the e1000_resume routine. ++ */ ++static pci_ers_result_t e1000_io_slot_reset(struct pci_dev *pdev) ++{ ++ struct rtnet_device *netdev = pci_get_drvdata(pdev); ++ struct e1000_adapter *adapter = netdev->priv; ++ struct e1000_hw *hw = &adapter->hw; ++ u16 aspm_disable_flag = 0; ++ int err; ++ pci_ers_result_t result; ++ ++ if (adapter->flags2 & FLAG2_DISABLE_ASPM_L0S) ++ aspm_disable_flag = PCIE_LINK_STATE_L0S; ++ if (adapter->flags2 & FLAG2_DISABLE_ASPM_L1) ++ aspm_disable_flag |= PCIE_LINK_STATE_L1; ++ if (aspm_disable_flag) ++ e1000e_disable_aspm(pdev, aspm_disable_flag); ++ ++ err = pci_enable_device_mem(pdev); ++ if (err) { ++ dev_err(&pdev->dev, ++ "Cannot re-enable PCI device after reset.\n"); ++ result = PCI_ERS_RESULT_DISCONNECT; ++ } else { ++ pci_set_master(pdev); ++ pdev->state_saved = true; ++ pci_restore_state(pdev); ++ ++ pci_enable_wake(pdev, PCI_D3hot, 0); ++ pci_enable_wake(pdev, PCI_D3cold, 0); ++ ++ e1000e_reset(adapter); ++ ew32(WUS, ~0); ++ result = PCI_ERS_RESULT_RECOVERED; ++ } ++ ++ pci_cleanup_aer_uncorrect_error_status(pdev); ++ ++ return result; ++} ++ ++/** ++ * e1000_io_resume - called when traffic can start flowing again. ++ * @pdev: Pointer to PCI device ++ * ++ * This callback is called when the error recovery driver tells us that ++ * its OK to resume normal operation. Implementation resembles the ++ * second-half of the e1000_resume routine. ++ */ ++static void e1000_io_resume(struct pci_dev *pdev) ++{ ++ struct rtnet_device *netdev = pci_get_drvdata(pdev); ++ struct e1000_adapter *adapter = netdev->priv; ++ ++ e1000_init_manageability_pt(adapter); ++ ++ if (rtnetif_running(netdev)) { ++ if (e1000e_up(adapter)) { ++ dev_err(&pdev->dev, ++ "can't bring device back up after reset\n"); ++ return; ++ } ++ } ++ ++ rtnetif_device_attach(netdev); ++ ++ /* ++ * If the controller has AMT, do not set DRV_LOAD until the interface ++ * is up. For all other cases, let the f/w know that the h/w is now ++ * under the control of the driver. ++ */ ++ if (!(adapter->flags & FLAG_HAS_AMT)) ++ e1000e_get_hw_control(adapter); ++ ++} ++ ++static void e1000_print_device_info(struct e1000_adapter *adapter) ++{ ++ struct e1000_hw *hw = &adapter->hw; ++ struct rtnet_device *netdev = adapter->netdev; ++ u32 ret_val; ++ u8 pba_str[E1000_PBANUM_LENGTH]; ++ ++ /* print bus type/speed/width info */ ++ e_info("(PCI Express:2.5GT/s:%s) %pM\n", ++ /* bus width */ ++ ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" : ++ "Width x1"), ++ /* MAC address */ ++ netdev->dev_addr); ++ e_info("Intel(R) PRO/%s Network Connection\n", ++ (hw->phy.type == e1000_phy_ife) ? "10/100" : "1000"); ++ ret_val = e1000_read_pba_string_generic(hw, pba_str, ++ E1000_PBANUM_LENGTH); ++ if (ret_val) ++ strncpy((char *)pba_str, "Unknown", sizeof(pba_str) - 1); ++ e_info("MAC: %d, PHY: %d, PBA No: %s\n", ++ hw->mac.type, hw->phy.type, pba_str); ++} ++ ++static void e1000_eeprom_checks(struct e1000_adapter *adapter) ++{ ++ struct e1000_hw *hw = &adapter->hw; ++ int ret_val; ++ u16 buf = 0; ++ ++ if (hw->mac.type != e1000_82573) ++ return; ++ ++ ret_val = e1000_read_nvm(hw, NVM_INIT_CONTROL2_REG, 1, &buf); ++ if (!ret_val && (!(le16_to_cpu(buf) & (1 << 0)))) { ++ /* Deep Smart Power Down (DSPD) */ ++ dev_warn(&adapter->pdev->dev, ++ "Warning: detected DSPD enabled in EEPROM\n"); ++ } ++} ++ ++static dma_addr_t e1000_map_rtskb(struct rtnet_device *netdev, ++ struct rtskb *skb) ++{ ++ struct e1000_adapter *adapter = netdev->priv; ++ struct device *dev = &adapter->pdev->dev; ++ dma_addr_t addr; ++ ++ addr = dma_map_single(dev, skb->buf_start, RTSKB_SIZE, ++ DMA_BIDIRECTIONAL); ++ if (dma_mapping_error(dev, addr)) { ++ dev_err(dev, "DMA map failed\n"); ++ return RTSKB_UNMAPPED; ++ } ++ return addr; ++} ++ ++static void e1000_unmap_rtskb(struct rtnet_device *netdev, ++ struct rtskb *skb) ++{ ++ struct e1000_adapter *adapter = netdev->priv; ++ struct device *dev = &adapter->pdev->dev; ++ ++ dma_unmap_single(dev, skb->buf_dma_addr, RTSKB_SIZE, ++ DMA_BIDIRECTIONAL); ++} ++ ++/** ++ * e1000_probe - Device Initialization Routine ++ * @pdev: PCI device information struct ++ * @ent: entry in e1000_pci_tbl ++ * ++ * Returns 0 on success, negative on failure ++ * ++ * e1000_probe initializes an adapter identified by a pci_dev structure. ++ * The OS initialization, configuring of the adapter private structure, ++ * and a hardware reset occur. ++ **/ ++static int e1000_probe(struct pci_dev *pdev, ++ const struct pci_device_id *ent) ++{ ++ struct rtnet_device *netdev; ++ struct e1000_adapter *adapter; ++ struct e1000_hw *hw; ++ const struct e1000_info *ei = e1000_info_tbl[ent->driver_data]; ++ resource_size_t mmio_start, mmio_len; ++ resource_size_t flash_start, flash_len; ++ ++ static int cards_found; ++ u16 aspm_disable_flag = 0; ++ int i, err, pci_using_dac; ++ u16 eeprom_data = 0; ++ u16 eeprom_apme_mask = E1000_EEPROM_APME; ++ ++ if (ei->flags2 & FLAG2_DISABLE_ASPM_L0S) ++ aspm_disable_flag = PCIE_LINK_STATE_L0S; ++ if (ei->flags2 & FLAG2_DISABLE_ASPM_L1) ++ aspm_disable_flag |= PCIE_LINK_STATE_L1; ++ if (aspm_disable_flag) ++ e1000e_disable_aspm(pdev, aspm_disable_flag); ++ ++ err = pci_enable_device_mem(pdev); ++ if (err) ++ return err; ++ ++ pci_using_dac = 0; ++ err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)); ++ if (!err) { ++ err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64)); ++ if (!err) ++ pci_using_dac = 1; ++ } else { ++ err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)); ++ if (err) { ++ err = dma_set_coherent_mask(&pdev->dev, ++ DMA_BIT_MASK(32)); ++ if (err) { ++ dev_err(&pdev->dev, "No usable DMA " ++ "configuration, aborting\n"); ++ goto err_dma; ++ } ++ } ++ } ++ ++ err = pci_request_selected_regions_exclusive(pdev, ++ pci_select_bars(pdev, IORESOURCE_MEM), ++ e1000e_driver_name); ++ if (err) ++ goto err_pci_reg; ++ ++ /* AER (Advanced Error Reporting) hooks */ ++ pci_enable_pcie_error_reporting(pdev); ++ ++ pci_set_master(pdev); ++ /* PCI config space info */ ++ err = pci_save_state(pdev); ++ if (err) ++ goto err_alloc_etherdev; ++ ++ err = -ENOMEM; ++ netdev = rt_alloc_etherdev(sizeof(*adapter), ++ 2 * RT_E1000E_NUM_RXD + 256); ++ if (!netdev) ++ goto err_alloc_etherdev; ++ ++ rtdev_alloc_name(netdev, "rteth%d"); ++ rt_rtdev_connect(netdev, &RTDEV_manager); ++ netdev->vers = RTDEV_VERS_2_0; ++ netdev->sysbind = &pdev->dev; ++ ++ netdev->irq = pdev->irq; ++ ++ pci_set_drvdata(pdev, netdev); ++ adapter = netdev->priv; ++ hw = &adapter->hw; ++ adapter->netdev = netdev; ++ adapter->pdev = pdev; ++ adapter->ei = ei; ++ adapter->pba = ei->pba; ++ adapter->flags = ei->flags; ++ adapter->flags2 = ei->flags2; ++ adapter->hw.adapter = adapter; ++ adapter->hw.mac.type = ei->mac; ++ adapter->max_hw_frame_size = ei->max_hw_frame_size; ++ adapter->msg_enable = (1 << NETIF_MSG_DRV | NETIF_MSG_PROBE) - 1; ++ ++ mmio_start = pci_resource_start(pdev, 0); ++ mmio_len = pci_resource_len(pdev, 0); ++ ++ err = -EIO; ++ adapter->hw.hw_addr = ioremap(mmio_start, mmio_len); ++ if (!adapter->hw.hw_addr) ++ goto err_ioremap; ++ ++ if ((adapter->flags & FLAG_HAS_FLASH) && ++ (pci_resource_flags(pdev, 1) & IORESOURCE_MEM)) { ++ flash_start = pci_resource_start(pdev, 1); ++ flash_len = pci_resource_len(pdev, 1); ++ adapter->hw.flash_address = ioremap(flash_start, flash_len); ++ if (!adapter->hw.flash_address) ++ goto err_flashmap; ++ } ++ ++ /* construct the net_device struct */ ++ netdev->open = e1000_open; ++ netdev->stop = e1000_close; ++ netdev->hard_start_xmit = e1000_xmit_frame; ++ //netdev->get_stats = e1000_get_stats; ++ netdev->map_rtskb = e1000_map_rtskb; ++ netdev->unmap_rtskb = e1000_unmap_rtskb; ++ strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1); ++ ++ netdev->mem_start = mmio_start; ++ netdev->mem_end = mmio_start + mmio_len; ++ ++ adapter->bd_number = cards_found++; ++ ++ e1000e_check_options(adapter); ++ ++ /* setup adapter struct */ ++ err = e1000_sw_init(adapter); ++ if (err) ++ goto err_sw_init; ++ ++ memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops)); ++ memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops)); ++ memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops)); ++ ++ err = ei->get_variants(adapter); ++ if (err) ++ goto err_hw_init; ++ ++ if ((adapter->flags & FLAG_IS_ICH) && ++ (adapter->flags & FLAG_READ_ONLY_NVM)) ++ e1000e_write_protect_nvm_ich8lan(&adapter->hw); ++ ++ hw->mac.ops.get_bus_info(&adapter->hw); ++ ++ adapter->hw.phy.autoneg_wait_to_complete = 0; ++ ++ /* Copper options */ ++ if (adapter->hw.phy.media_type == e1000_media_type_copper) { ++ adapter->hw.phy.mdix = AUTO_ALL_MODES; ++ adapter->hw.phy.disable_polarity_correction = 0; ++ adapter->hw.phy.ms_type = e1000_ms_hw_default; ++ } ++ ++ if (e1000_check_reset_block(&adapter->hw)) ++ e_info("PHY reset is blocked due to SOL/IDER session.\n"); ++ ++ /* Set initial default active device features */ ++ netdev->features = (NETIF_F_SG | ++ NETIF_F_HW_VLAN_CTAG_RX | ++ NETIF_F_HW_VLAN_CTAG_TX | ++ NETIF_F_TSO | ++ NETIF_F_TSO6 | ++ NETIF_F_RXCSUM | ++ NETIF_F_HW_CSUM); ++ ++ if (adapter->flags & FLAG_HAS_HW_VLAN_FILTER) ++ netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; ++ ++ if (pci_using_dac) { ++ netdev->features |= NETIF_F_HIGHDMA; ++ } ++ ++ if (e1000e_enable_mng_pass_thru(&adapter->hw)) ++ adapter->flags |= FLAG_MNG_PT_ENABLED; ++ ++ /* ++ * before reading the NVM, reset the controller to ++ * put the device in a known good starting state ++ */ ++ adapter->hw.mac.ops.reset_hw(&adapter->hw); ++ ++ /* ++ * systems with ASPM and others may see the checksum fail on the first ++ * attempt. Let's give it a few tries ++ */ ++ for (i = 0;; i++) { ++ if (e1000_validate_nvm_checksum(&adapter->hw) >= 0) ++ break; ++ if (i == 2) { ++ e_err("The NVM Checksum Is Not Valid\n"); ++ err = -EIO; ++ goto err_eeprom; ++ } ++ } ++ ++ e1000_eeprom_checks(adapter); ++ ++ /* copy the MAC address */ ++ if (e1000e_read_mac_addr(&adapter->hw)) ++ e_err("NVM Read Error while reading MAC address\n"); ++ ++ memcpy(netdev->dev_addr, adapter->hw.mac.addr, netdev->addr_len); ++ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,14,0) ++ timer_setup(&adapter->watchdog_timer, e1000_watchdog, 0); ++ timer_setup(&adapter->phy_info_timer, e1000_update_phy_info, 0); ++#else /* < 4.14 */ ++ init_timer(&adapter->watchdog_timer); ++ adapter->watchdog_timer.function = e1000_watchdog; ++ adapter->watchdog_timer.data = (unsigned long) adapter; ++ ++ init_timer(&adapter->phy_info_timer); ++ adapter->phy_info_timer.function = e1000_update_phy_info; ++ adapter->phy_info_timer.data = (unsigned long) adapter; ++#endif /* < 4.14 */ ++ ++ INIT_WORK(&adapter->reset_task, e1000_reset_task); ++ INIT_WORK(&adapter->watchdog_task, e1000_watchdog_task); ++ INIT_WORK(&adapter->downshift_task, e1000e_downshift_workaround); ++ INIT_WORK(&adapter->update_phy_task, e1000e_update_phy_task); ++ ++ rtdm_nrtsig_init(&adapter->mod_timer_sig, e1000e_mod_watchdog_timer, ++ (void*)&adapter->watchdog_timer); ++ rtdm_nrtsig_init(&adapter->downshift_sig, e1000e_trigger_downshift, ++ &adapter->downshift_task); ++ ++ /* Initialize link parameters. User can change them with ethtool */ ++ adapter->hw.mac.autoneg = 1; ++ adapter->fc_autoneg = 1; ++ adapter->hw.fc.requested_mode = e1000_fc_default; ++ adapter->hw.fc.current_mode = e1000_fc_default; ++ adapter->hw.phy.autoneg_advertised = 0x2f; ++ ++ /* ring size defaults */ ++ adapter->rx_ring->count = RT_E1000E_NUM_RXD; ++ adapter->tx_ring->count = 256; ++ ++ /* ++ * Initial Wake on LAN setting - If APM wake is enabled in ++ * the EEPROM, enable the ACPI Magic Packet filter ++ */ ++ if (adapter->flags & FLAG_APME_IN_WUC) { ++ /* APME bit in EEPROM is mapped to WUC.APME */ ++ eeprom_data = er32(WUC); ++ eeprom_apme_mask = E1000_WUC_APME; ++ if ((hw->mac.type > e1000_ich10lan) && ++ (eeprom_data & E1000_WUC_PHY_WAKE)) ++ adapter->flags2 |= FLAG2_HAS_PHY_WAKEUP; ++ } else if (adapter->flags & FLAG_APME_IN_CTRL3) { ++ if (adapter->flags & FLAG_APME_CHECK_PORT_B && ++ (adapter->hw.bus.func == 1)) ++ e1000_read_nvm(&adapter->hw, ++ NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data); ++ else ++ e1000_read_nvm(&adapter->hw, ++ NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data); ++ } ++ ++ /* fetch WoL from EEPROM */ ++ if (eeprom_data & eeprom_apme_mask) ++ adapter->eeprom_wol |= E1000_WUFC_MAG; ++ ++ /* ++ * now that we have the eeprom settings, apply the special cases ++ * where the eeprom may be wrong or the board simply won't support ++ * wake on lan on a particular port ++ */ ++ if (!(adapter->flags & FLAG_HAS_WOL)) ++ adapter->eeprom_wol = 0; ++ ++ /* initialize the wol settings based on the eeprom settings */ ++ adapter->wol = adapter->eeprom_wol; ++ device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol); ++ ++ /* save off EEPROM version number */ ++ e1000_read_nvm(&adapter->hw, 5, 1, &adapter->eeprom_vers); ++ ++ /* reset the hardware with the new settings */ ++ e1000e_reset(adapter); ++ ++ /* ++ * If the controller has AMT, do not set DRV_LOAD until the interface ++ * is up. For all other cases, let the f/w know that the h/w is now ++ * under the control of the driver. ++ */ ++ if (!(adapter->flags & FLAG_HAS_AMT)) ++ e1000e_get_hw_control(adapter); ++ ++ strncpy(netdev->name, "rteth%d", sizeof(netdev->name) - 1); ++ err = rt_register_rtnetdev(netdev); ++ if (err) ++ goto err_register; ++ ++ /* carrier off reporting is important to ethtool even BEFORE open */ ++ rtnetif_carrier_off(netdev); ++ ++ e1000_print_device_info(adapter); ++ ++ if (pci_dev_run_wake(pdev)) ++ pm_runtime_put_noidle(&pdev->dev); ++ ++ return 0; ++ ++err_register: ++ rtdm_nrtsig_destroy(&adapter->downshift_sig); ++ rtdm_nrtsig_destroy(&adapter->mod_timer_sig); ++ if (!(adapter->flags & FLAG_HAS_AMT)) ++ e1000e_release_hw_control(adapter); ++err_eeprom: ++ if (!e1000_check_reset_block(&adapter->hw)) ++ e1000_phy_hw_reset(&adapter->hw); ++err_hw_init: ++ kfree(adapter->tx_ring); ++ kfree(adapter->rx_ring); ++err_sw_init: ++ if (adapter->hw.flash_address) ++ iounmap(adapter->hw.flash_address); ++ e1000e_reset_interrupt_capability(adapter); ++err_flashmap: ++ iounmap(adapter->hw.hw_addr); ++err_ioremap: ++ rtdev_free(netdev); ++err_alloc_etherdev: ++ pci_release_selected_regions(pdev, ++ pci_select_bars(pdev, IORESOURCE_MEM)); ++err_pci_reg: ++err_dma: ++ pci_disable_device(pdev); ++ return err; ++} ++ ++/** ++ * e1000_remove - Device Removal Routine ++ * @pdev: PCI device information struct ++ * ++ * e1000_remove is called by the PCI subsystem to alert the driver ++ * that it should release a PCI device. The could be caused by a ++ * Hot-Plug event, or because the driver is going to be removed from ++ * memory. ++ **/ ++static void e1000_remove(struct pci_dev *pdev) ++{ ++ struct rtnet_device *netdev = pci_get_drvdata(pdev); ++ struct e1000_adapter *adapter = netdev->priv; ++ bool down = test_bit(__E1000_DOWN, &adapter->state); ++ ++ /* ++ * The timers may be rescheduled, so explicitly disable them ++ * from being rescheduled. ++ */ ++ if (!down) ++ set_bit(__E1000_DOWN, &adapter->state); ++ del_timer_sync(&adapter->watchdog_timer); ++ del_timer_sync(&adapter->phy_info_timer); ++ ++ rtdm_nrtsig_destroy(&adapter->downshift_sig); ++ rtdm_nrtsig_destroy(&adapter->mod_timer_sig); ++ ++ cancel_work_sync(&adapter->reset_task); ++ cancel_work_sync(&adapter->watchdog_task); ++ cancel_work_sync(&adapter->downshift_task); ++ cancel_work_sync(&adapter->update_phy_task); ++ ++ if (!(netdev->flags & IFF_UP)) ++ e1000_power_down_phy(adapter); ++ ++ /* Don't lie to e1000_close() down the road. */ ++ if (!down) ++ clear_bit(__E1000_DOWN, &adapter->state); ++ rt_unregister_rtnetdev(netdev); ++ ++ if (pci_dev_run_wake(pdev)) ++ pm_runtime_get_noresume(&pdev->dev); ++ ++ /* ++ * Release control of h/w to f/w. If f/w is AMT enabled, this ++ * would have already happened in close and is redundant. ++ */ ++ e1000e_release_hw_control(adapter); ++ ++ e1000e_reset_interrupt_capability(adapter); ++ kfree(adapter->tx_ring); ++ kfree(adapter->rx_ring); ++ ++ iounmap(adapter->hw.hw_addr); ++ if (adapter->hw.flash_address) ++ iounmap(adapter->hw.flash_address); ++ pci_release_selected_regions(pdev, ++ pci_select_bars(pdev, IORESOURCE_MEM)); ++ ++ rtdev_free(netdev); ++ ++ /* AER disable */ ++ pci_disable_pcie_error_reporting(pdev); ++ ++ pci_disable_device(pdev); ++} ++ ++/* PCI Error Recovery (ERS) */ ++static struct pci_error_handlers e1000_err_handler = { ++ .error_detected = e1000_io_error_detected, ++ .slot_reset = e1000_io_slot_reset, ++ .resume = e1000_io_resume, ++}; ++ ++static const struct pci_device_id e1000_pci_tbl[] = { ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_82571EB_COPPER), board_82571 }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_82571EB_FIBER), board_82571 }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_82571EB_QUAD_COPPER), board_82571 }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_82571EB_QUAD_COPPER_LP), board_82571 }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_82571EB_QUAD_FIBER), board_82571 }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_82571EB_SERDES), board_82571 }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_82571EB_SERDES_DUAL), board_82571 }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_82571EB_SERDES_QUAD), board_82571 }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_82571PT_QUAD_COPPER), board_82571 }, ++ ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_82572EI), board_82572 }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_82572EI_COPPER), board_82572 }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_82572EI_FIBER), board_82572 }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_82572EI_SERDES), board_82572 }, ++ ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_82573E), board_82573 }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_82573E_IAMT), board_82573 }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_82573L), board_82573 }, ++ ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_82574L), board_82574 }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_82574LA), board_82574 }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_82583V), board_82583 }, ++ ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_80003ES2LAN_COPPER_DPT), ++ board_80003es2lan }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_80003ES2LAN_COPPER_SPT), ++ board_80003es2lan }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_80003ES2LAN_SERDES_DPT), ++ board_80003es2lan }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_80003ES2LAN_SERDES_SPT), ++ board_80003es2lan }, ++ ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH8_IFE), board_ich8lan }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH8_IFE_G), board_ich8lan }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH8_IFE_GT), board_ich8lan }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH8_IGP_AMT), board_ich8lan }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH8_IGP_C), board_ich8lan }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH8_IGP_M), board_ich8lan }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH8_IGP_M_AMT), board_ich8lan }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH8_82567V_3), board_ich8lan }, ++ ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH9_IFE), board_ich9lan }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH9_IFE_G), board_ich9lan }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH9_IFE_GT), board_ich9lan }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH9_IGP_AMT), board_ich9lan }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH9_IGP_C), board_ich9lan }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH9_BM), board_ich9lan }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH9_IGP_M), board_ich9lan }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH9_IGP_M_AMT), board_ich9lan }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH9_IGP_M_V), board_ich9lan }, ++ ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH10_R_BM_LM), board_ich9lan }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH10_R_BM_LF), board_ich9lan }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH10_R_BM_V), board_ich9lan }, ++ ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH10_D_BM_LM), board_ich10lan }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH10_D_BM_LF), board_ich10lan }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH10_D_BM_V), board_ich10lan }, ++ ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_M_HV_LM), board_pchlan }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_M_HV_LC), board_pchlan }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_D_HV_DM), board_pchlan }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_D_HV_DC), board_pchlan }, ++ ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH2_LV_LM), board_pch2lan }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH2_LV_V), board_pch2lan }, ++ ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LPT_I217_LM), board_pch_lpt }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LPT_I217_V), board_pch_lpt }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LPTLP_I218_LM), board_pch_lpt }, ++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LPTLP_I218_V), board_pch_lpt }, ++ ++ { } /* terminate list */ ++}; ++MODULE_DEVICE_TABLE(pci, e1000_pci_tbl); ++ ++/* PCI Device API Driver */ ++static struct pci_driver e1000_driver = { ++ .name = e1000e_driver_name, ++ .id_table = e1000_pci_tbl, ++ .probe = e1000_probe, ++ .remove = e1000_remove, ++ .shutdown = e1000_shutdown, ++ .err_handler = &e1000_err_handler ++}; ++ ++/** ++ * e1000_init_module - Driver Registration Routine ++ * ++ * e1000_init_module is the first routine called when the driver is ++ * loaded. All it does is register with the PCI subsystem. ++ **/ ++static int __init e1000_init_module(void) ++{ ++ int ret; ++ pr_info("Intel(R) PRO/1000 Network Driver - %s\n", ++ e1000e_driver_version); ++ pr_info("Copyright(c) 1999 - 2011 Intel Corporation.\n"); ++ ret = pci_register_driver(&e1000_driver); ++ ++ return ret; ++} ++module_init(e1000_init_module); ++ ++/** ++ * e1000_exit_module - Driver Exit Cleanup Routine ++ * ++ * e1000_exit_module is called just before the driver is removed ++ * from memory. ++ **/ ++static void __exit e1000_exit_module(void) ++{ ++ pci_unregister_driver(&e1000_driver); ++} ++module_exit(e1000_exit_module); ++ ++ ++MODULE_AUTHOR("Intel Corporation, "); ++MODULE_DESCRIPTION("Intel(R) PRO/1000 Network Driver"); ++MODULE_LICENSE("GPL"); ++MODULE_VERSION(DRV_VERSION); ++ ++/* e1000_main.c */ +--- linux/drivers/xenomai/net/drivers/e1000e/hw.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/e1000e/hw.h 2021-04-07 16:01:27.170634248 +0800 +@@ -0,0 +1,997 @@ ++/******************************************************************************* ++ ++ Intel PRO/1000 Linux driver ++ Copyright(c) 1999 - 2011 Intel Corporation. ++ ++ This program is free software; you can redistribute it and/or modify it ++ under the terms and conditions of the GNU General Public License, ++ version 2, as published by the Free Software Foundation. ++ ++ This program is distributed in the hope it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ You should have received a copy of the GNU General Public License along with ++ this program; if not, write to the Free Software Foundation, Inc., ++ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. ++ ++ The full GNU General Public License is included in this distribution in ++ the file called "COPYING". ++ ++ Contact Information: ++ Linux NICS ++ e1000-devel Mailing List ++ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 ++ ++*******************************************************************************/ ++ ++#ifndef _E1000_HW_H_ ++#define _E1000_HW_H_ ++ ++#include ++ ++struct e1000_hw; ++struct e1000_adapter; ++ ++#include "defines.h" ++ ++#define er32(reg) __er32(hw, E1000_##reg) ++#define ew32(reg,val) __ew32(hw, E1000_##reg, (val)) ++#define e1e_flush() er32(STATUS) ++ ++#define E1000_WRITE_REG_ARRAY(a, reg, offset, value) \ ++ (writel((value), ((a)->hw_addr + reg + ((offset) << 2)))) ++ ++#define E1000_READ_REG_ARRAY(a, reg, offset) \ ++ (readl((a)->hw_addr + reg + ((offset) << 2))) ++ ++enum e1e_registers { ++ E1000_CTRL = 0x00000, /* Device Control - RW */ ++ E1000_STATUS = 0x00008, /* Device Status - RO */ ++ E1000_EECD = 0x00010, /* EEPROM/Flash Control - RW */ ++ E1000_EERD = 0x00014, /* EEPROM Read - RW */ ++ E1000_CTRL_EXT = 0x00018, /* Extended Device Control - RW */ ++ E1000_FLA = 0x0001C, /* Flash Access - RW */ ++ E1000_MDIC = 0x00020, /* MDI Control - RW */ ++ E1000_SCTL = 0x00024, /* SerDes Control - RW */ ++ E1000_FCAL = 0x00028, /* Flow Control Address Low - RW */ ++ E1000_FCAH = 0x0002C, /* Flow Control Address High -RW */ ++ E1000_FEXTNVM4 = 0x00024, /* Future Extended NVM 4 - RW */ ++ E1000_FEXTNVM = 0x00028, /* Future Extended NVM - RW */ ++ E1000_FCT = 0x00030, /* Flow Control Type - RW */ ++ E1000_VET = 0x00038, /* VLAN Ether Type - RW */ ++ E1000_ICR = 0x000C0, /* Interrupt Cause Read - R/clr */ ++ E1000_ITR = 0x000C4, /* Interrupt Throttling Rate - RW */ ++ E1000_ICS = 0x000C8, /* Interrupt Cause Set - WO */ ++ E1000_IMS = 0x000D0, /* Interrupt Mask Set - RW */ ++ E1000_IMC = 0x000D8, /* Interrupt Mask Clear - WO */ ++ E1000_EIAC_82574 = 0x000DC, /* Ext. Interrupt Auto Clear - RW */ ++ E1000_IAM = 0x000E0, /* Interrupt Acknowledge Auto Mask */ ++ E1000_IVAR = 0x000E4, /* Interrupt Vector Allocation - RW */ ++ E1000_EITR_82574_BASE = 0x000E8, /* Interrupt Throttling - RW */ ++#define E1000_EITR_82574(_n) (E1000_EITR_82574_BASE + (_n << 2)) ++ E1000_RCTL = 0x00100, /* Rx Control - RW */ ++ E1000_FCTTV = 0x00170, /* Flow Control Transmit Timer Value - RW */ ++ E1000_TXCW = 0x00178, /* Tx Configuration Word - RW */ ++ E1000_RXCW = 0x00180, /* Rx Configuration Word - RO */ ++ E1000_TCTL = 0x00400, /* Tx Control - RW */ ++ E1000_TCTL_EXT = 0x00404, /* Extended Tx Control - RW */ ++ E1000_TIPG = 0x00410, /* Tx Inter-packet gap -RW */ ++ E1000_AIT = 0x00458, /* Adaptive Interframe Spacing Throttle -RW */ ++ E1000_LEDCTL = 0x00E00, /* LED Control - RW */ ++ E1000_EXTCNF_CTRL = 0x00F00, /* Extended Configuration Control */ ++ E1000_EXTCNF_SIZE = 0x00F08, /* Extended Configuration Size */ ++ E1000_PHY_CTRL = 0x00F10, /* PHY Control Register in CSR */ ++#define E1000_POEMB E1000_PHY_CTRL /* PHY OEM Bits */ ++ E1000_PBA = 0x01000, /* Packet Buffer Allocation - RW */ ++ E1000_PBS = 0x01008, /* Packet Buffer Size */ ++ E1000_EEMNGCTL = 0x01010, /* MNG EEprom Control */ ++ E1000_EEWR = 0x0102C, /* EEPROM Write Register - RW */ ++ E1000_FLOP = 0x0103C, /* FLASH Opcode Register */ ++ E1000_PBA_ECC = 0x01100, /* PBA ECC Register */ ++ E1000_ERT = 0x02008, /* Early Rx Threshold - RW */ ++ E1000_FCRTL = 0x02160, /* Flow Control Receive Threshold Low - RW */ ++ E1000_FCRTH = 0x02168, /* Flow Control Receive Threshold High - RW */ ++ E1000_PSRCTL = 0x02170, /* Packet Split Receive Control - RW */ ++ E1000_RDBAL = 0x02800, /* Rx Descriptor Base Address Low - RW */ ++ E1000_RDBAH = 0x02804, /* Rx Descriptor Base Address High - RW */ ++ E1000_RDLEN = 0x02808, /* Rx Descriptor Length - RW */ ++ E1000_RDH = 0x02810, /* Rx Descriptor Head - RW */ ++ E1000_RDT = 0x02818, /* Rx Descriptor Tail - RW */ ++ E1000_RDTR = 0x02820, /* Rx Delay Timer - RW */ ++ E1000_RXDCTL_BASE = 0x02828, /* Rx Descriptor Control - RW */ ++#define E1000_RXDCTL(_n) (E1000_RXDCTL_BASE + (_n << 8)) ++ E1000_RADV = 0x0282C, /* Rx Interrupt Absolute Delay Timer - RW */ ++ ++/* Convenience macros ++ * ++ * Note: "_n" is the queue number of the register to be written to. ++ * ++ * Example usage: ++ * E1000_RDBAL_REG(current_rx_queue) ++ * ++ */ ++#define E1000_RDBAL_REG(_n) (E1000_RDBAL + (_n << 8)) ++ E1000_KABGTXD = 0x03004, /* AFE Band Gap Transmit Ref Data */ ++ E1000_TDBAL = 0x03800, /* Tx Descriptor Base Address Low - RW */ ++ E1000_TDBAH = 0x03804, /* Tx Descriptor Base Address High - RW */ ++ E1000_TDLEN = 0x03808, /* Tx Descriptor Length - RW */ ++ E1000_TDH = 0x03810, /* Tx Descriptor Head - RW */ ++ E1000_TDT = 0x03818, /* Tx Descriptor Tail - RW */ ++ E1000_TIDV = 0x03820, /* Tx Interrupt Delay Value - RW */ ++ E1000_TXDCTL_BASE = 0x03828, /* Tx Descriptor Control - RW */ ++#define E1000_TXDCTL(_n) (E1000_TXDCTL_BASE + (_n << 8)) ++ E1000_TADV = 0x0382C, /* Tx Interrupt Absolute Delay Val - RW */ ++ E1000_TARC_BASE = 0x03840, /* Tx Arbitration Count (0) */ ++#define E1000_TARC(_n) (E1000_TARC_BASE + (_n << 8)) ++ E1000_CRCERRS = 0x04000, /* CRC Error Count - R/clr */ ++ E1000_ALGNERRC = 0x04004, /* Alignment Error Count - R/clr */ ++ E1000_SYMERRS = 0x04008, /* Symbol Error Count - R/clr */ ++ E1000_RXERRC = 0x0400C, /* Receive Error Count - R/clr */ ++ E1000_MPC = 0x04010, /* Missed Packet Count - R/clr */ ++ E1000_SCC = 0x04014, /* Single Collision Count - R/clr */ ++ E1000_ECOL = 0x04018, /* Excessive Collision Count - R/clr */ ++ E1000_MCC = 0x0401C, /* Multiple Collision Count - R/clr */ ++ E1000_LATECOL = 0x04020, /* Late Collision Count - R/clr */ ++ E1000_COLC = 0x04028, /* Collision Count - R/clr */ ++ E1000_DC = 0x04030, /* Defer Count - R/clr */ ++ E1000_TNCRS = 0x04034, /* Tx-No CRS - R/clr */ ++ E1000_SEC = 0x04038, /* Sequence Error Count - R/clr */ ++ E1000_CEXTERR = 0x0403C, /* Carrier Extension Error Count - R/clr */ ++ E1000_RLEC = 0x04040, /* Receive Length Error Count - R/clr */ ++ E1000_XONRXC = 0x04048, /* XON Rx Count - R/clr */ ++ E1000_XONTXC = 0x0404C, /* XON Tx Count - R/clr */ ++ E1000_XOFFRXC = 0x04050, /* XOFF Rx Count - R/clr */ ++ E1000_XOFFTXC = 0x04054, /* XOFF Tx Count - R/clr */ ++ E1000_FCRUC = 0x04058, /* Flow Control Rx Unsupported Count- R/clr */ ++ E1000_PRC64 = 0x0405C, /* Packets Rx (64 bytes) - R/clr */ ++ E1000_PRC127 = 0x04060, /* Packets Rx (65-127 bytes) - R/clr */ ++ E1000_PRC255 = 0x04064, /* Packets Rx (128-255 bytes) - R/clr */ ++ E1000_PRC511 = 0x04068, /* Packets Rx (255-511 bytes) - R/clr */ ++ E1000_PRC1023 = 0x0406C, /* Packets Rx (512-1023 bytes) - R/clr */ ++ E1000_PRC1522 = 0x04070, /* Packets Rx (1024-1522 bytes) - R/clr */ ++ E1000_GPRC = 0x04074, /* Good Packets Rx Count - R/clr */ ++ E1000_BPRC = 0x04078, /* Broadcast Packets Rx Count - R/clr */ ++ E1000_MPRC = 0x0407C, /* Multicast Packets Rx Count - R/clr */ ++ E1000_GPTC = 0x04080, /* Good Packets Tx Count - R/clr */ ++ E1000_GORCL = 0x04088, /* Good Octets Rx Count Low - R/clr */ ++ E1000_GORCH = 0x0408C, /* Good Octets Rx Count High - R/clr */ ++ E1000_GOTCL = 0x04090, /* Good Octets Tx Count Low - R/clr */ ++ E1000_GOTCH = 0x04094, /* Good Octets Tx Count High - R/clr */ ++ E1000_RNBC = 0x040A0, /* Rx No Buffers Count - R/clr */ ++ E1000_RUC = 0x040A4, /* Rx Undersize Count - R/clr */ ++ E1000_RFC = 0x040A8, /* Rx Fragment Count - R/clr */ ++ E1000_ROC = 0x040AC, /* Rx Oversize Count - R/clr */ ++ E1000_RJC = 0x040B0, /* Rx Jabber Count - R/clr */ ++ E1000_MGTPRC = 0x040B4, /* Management Packets Rx Count - R/clr */ ++ E1000_MGTPDC = 0x040B8, /* Management Packets Dropped Count - R/clr */ ++ E1000_MGTPTC = 0x040BC, /* Management Packets Tx Count - R/clr */ ++ E1000_TORL = 0x040C0, /* Total Octets Rx Low - R/clr */ ++ E1000_TORH = 0x040C4, /* Total Octets Rx High - R/clr */ ++ E1000_TOTL = 0x040C8, /* Total Octets Tx Low - R/clr */ ++ E1000_TOTH = 0x040CC, /* Total Octets Tx High - R/clr */ ++ E1000_TPR = 0x040D0, /* Total Packets Rx - R/clr */ ++ E1000_TPT = 0x040D4, /* Total Packets Tx - R/clr */ ++ E1000_PTC64 = 0x040D8, /* Packets Tx (64 bytes) - R/clr */ ++ E1000_PTC127 = 0x040DC, /* Packets Tx (65-127 bytes) - R/clr */ ++ E1000_PTC255 = 0x040E0, /* Packets Tx (128-255 bytes) - R/clr */ ++ E1000_PTC511 = 0x040E4, /* Packets Tx (256-511 bytes) - R/clr */ ++ E1000_PTC1023 = 0x040E8, /* Packets Tx (512-1023 bytes) - R/clr */ ++ E1000_PTC1522 = 0x040EC, /* Packets Tx (1024-1522 Bytes) - R/clr */ ++ E1000_MPTC = 0x040F0, /* Multicast Packets Tx Count - R/clr */ ++ E1000_BPTC = 0x040F4, /* Broadcast Packets Tx Count - R/clr */ ++ E1000_TSCTC = 0x040F8, /* TCP Segmentation Context Tx - R/clr */ ++ E1000_TSCTFC = 0x040FC, /* TCP Segmentation Context Tx Fail - R/clr */ ++ E1000_IAC = 0x04100, /* Interrupt Assertion Count */ ++ E1000_ICRXPTC = 0x04104, /* Irq Cause Rx Packet Timer Expire Count */ ++ E1000_ICRXATC = 0x04108, /* Irq Cause Rx Abs Timer Expire Count */ ++ E1000_ICTXPTC = 0x0410C, /* Irq Cause Tx Packet Timer Expire Count */ ++ E1000_ICTXATC = 0x04110, /* Irq Cause Tx Abs Timer Expire Count */ ++ E1000_ICTXQEC = 0x04118, /* Irq Cause Tx Queue Empty Count */ ++ E1000_ICTXQMTC = 0x0411C, /* Irq Cause Tx Queue MinThreshold Count */ ++ E1000_ICRXDMTC = 0x04120, /* Irq Cause Rx Desc MinThreshold Count */ ++ E1000_ICRXOC = 0x04124, /* Irq Cause Receiver Overrun Count */ ++ E1000_RXCSUM = 0x05000, /* Rx Checksum Control - RW */ ++ E1000_RFCTL = 0x05008, /* Receive Filter Control */ ++ E1000_MTA = 0x05200, /* Multicast Table Array - RW Array */ ++ E1000_RAL_BASE = 0x05400, /* Receive Address Low - RW */ ++#define E1000_RAL(_n) (E1000_RAL_BASE + ((_n) * 8)) ++#define E1000_RA (E1000_RAL(0)) ++ E1000_RAH_BASE = 0x05404, /* Receive Address High - RW */ ++#define E1000_RAH(_n) (E1000_RAH_BASE + ((_n) * 8)) ++ E1000_SHRAL_PCH_LPT_BASE = 0x05408, ++#define E1000_SHRAL_PCH_LPT(_n) (E1000_SHRAL_PCH_LPT_BASE + ((_n) * 8)) ++ E1000_SHRAH_PCH_LTP_BASE = 0x0540C, ++#define E1000_SHRAH_PCH_LPT(_n) (E1000_SHRAH_PCH_LTP_BASE + ((_n) * 8)) ++ E1000_VFTA = 0x05600, /* VLAN Filter Table Array - RW Array */ ++ E1000_WUC = 0x05800, /* Wakeup Control - RW */ ++ E1000_WUFC = 0x05808, /* Wakeup Filter Control - RW */ ++ E1000_WUS = 0x05810, /* Wakeup Status - RO */ ++ E1000_MANC = 0x05820, /* Management Control - RW */ ++ E1000_FFLT = 0x05F00, /* Flexible Filter Length Table - RW Array */ ++ E1000_HOST_IF = 0x08800, /* Host Interface */ ++ ++ E1000_KMRNCTRLSTA = 0x00034, /* MAC-PHY interface - RW */ ++ E1000_MANC2H = 0x05860, /* Management Control To Host - RW */ ++ E1000_MDEF_BASE = 0x05890, /* Management Decision Filters */ ++#define E1000_MDEF(_n) (E1000_MDEF_BASE + ((_n) * 4)) ++ E1000_SW_FW_SYNC = 0x05B5C, /* Software-Firmware Synchronization - RW */ ++ E1000_GCR = 0x05B00, /* PCI-Ex Control */ ++ E1000_GCR2 = 0x05B64, /* PCI-Ex Control #2 */ ++ E1000_FACTPS = 0x05B30, /* Function Active and Power State to MNG */ ++ E1000_SWSM = 0x05B50, /* SW Semaphore */ ++ E1000_FWSM = 0x05B54, /* FW Semaphore */ ++ E1000_SWSM2 = 0x05B58, /* Driver-only SW semaphore */ ++ E1000_FFLT_DBG = 0x05F04, /* Debug Register */ ++ E1000_PCH_RAICC_BASE = 0x05F50, /* Receive Address Initial CRC */ ++#define E1000_PCH_RAICC(_n) (E1000_PCH_RAICC_BASE + ((_n) * 4)) ++#define E1000_CRC_OFFSET E1000_PCH_RAICC_BASE ++ E1000_HICR = 0x08F00, /* Host Interface Control */ ++}; ++ ++#define E1000_MAX_PHY_ADDR 4 ++ ++/* IGP01E1000 Specific Registers */ ++#define IGP01E1000_PHY_PORT_CONFIG 0x10 /* Port Config */ ++#define IGP01E1000_PHY_PORT_STATUS 0x11 /* Status */ ++#define IGP01E1000_PHY_PORT_CTRL 0x12 /* Control */ ++#define IGP01E1000_PHY_LINK_HEALTH 0x13 /* PHY Link Health */ ++#define IGP02E1000_PHY_POWER_MGMT 0x19 /* Power Management */ ++#define IGP01E1000_PHY_PAGE_SELECT 0x1F /* Page Select */ ++#define BM_PHY_PAGE_SELECT 22 /* Page Select for BM */ ++#define IGP_PAGE_SHIFT 5 ++#define PHY_REG_MASK 0x1F ++ ++#define BM_WUC_PAGE 800 ++#define BM_WUC_ADDRESS_OPCODE 0x11 ++#define BM_WUC_DATA_OPCODE 0x12 ++#define BM_WUC_ENABLE_PAGE 769 ++#define BM_WUC_ENABLE_REG 17 ++#define BM_WUC_ENABLE_BIT (1 << 2) ++#define BM_WUC_HOST_WU_BIT (1 << 4) ++#define BM_WUC_ME_WU_BIT (1 << 5) ++ ++#define BM_WUC PHY_REG(BM_WUC_PAGE, 1) ++#define BM_WUFC PHY_REG(BM_WUC_PAGE, 2) ++#define BM_WUS PHY_REG(BM_WUC_PAGE, 3) ++ ++#define IGP01E1000_PHY_PCS_INIT_REG 0x00B4 ++#define IGP01E1000_PHY_POLARITY_MASK 0x0078 ++ ++#define IGP01E1000_PSCR_AUTO_MDIX 0x1000 ++#define IGP01E1000_PSCR_FORCE_MDI_MDIX 0x2000 /* 0=MDI, 1=MDIX */ ++ ++#define IGP01E1000_PSCFR_SMART_SPEED 0x0080 ++ ++#define IGP02E1000_PM_SPD 0x0001 /* Smart Power Down */ ++#define IGP02E1000_PM_D0_LPLU 0x0002 /* For D0a states */ ++#define IGP02E1000_PM_D3_LPLU 0x0004 /* For all other states */ ++ ++#define IGP01E1000_PLHR_SS_DOWNGRADE 0x8000 ++ ++#define IGP01E1000_PSSR_POLARITY_REVERSED 0x0002 ++#define IGP01E1000_PSSR_MDIX 0x0800 ++#define IGP01E1000_PSSR_SPEED_MASK 0xC000 ++#define IGP01E1000_PSSR_SPEED_1000MBPS 0xC000 ++ ++#define IGP02E1000_PHY_CHANNEL_NUM 4 ++#define IGP02E1000_PHY_AGC_A 0x11B1 ++#define IGP02E1000_PHY_AGC_B 0x12B1 ++#define IGP02E1000_PHY_AGC_C 0x14B1 ++#define IGP02E1000_PHY_AGC_D 0x18B1 ++ ++#define IGP02E1000_AGC_LENGTH_SHIFT 9 /* Course - 15:13, Fine - 12:9 */ ++#define IGP02E1000_AGC_LENGTH_MASK 0x7F ++#define IGP02E1000_AGC_RANGE 15 ++ ++/* manage.c */ ++#define E1000_VFTA_ENTRY_SHIFT 5 ++#define E1000_VFTA_ENTRY_MASK 0x7F ++#define E1000_VFTA_ENTRY_BIT_SHIFT_MASK 0x1F ++ ++#define E1000_HICR_EN 0x01 /* Enable bit - RO */ ++/* Driver sets this bit when done to put command in RAM */ ++#define E1000_HICR_C 0x02 ++#define E1000_HICR_FW_RESET_ENABLE 0x40 ++#define E1000_HICR_FW_RESET 0x80 ++ ++#define E1000_FWSM_MODE_MASK 0xE ++#define E1000_FWSM_MODE_SHIFT 1 ++ ++#define E1000_MNG_IAMT_MODE 0x3 ++#define E1000_MNG_DHCP_COOKIE_LENGTH 0x10 ++#define E1000_MNG_DHCP_COOKIE_OFFSET 0x6F0 ++#define E1000_MNG_DHCP_COMMAND_TIMEOUT 10 ++#define E1000_MNG_DHCP_TX_PAYLOAD_CMD 64 ++#define E1000_MNG_DHCP_COOKIE_STATUS_PARSING 0x1 ++#define E1000_MNG_DHCP_COOKIE_STATUS_VLAN 0x2 ++ ++/* nvm.c */ ++#define E1000_STM_OPCODE 0xDB00 ++ ++#define E1000_KMRNCTRLSTA_OFFSET 0x001F0000 ++#define E1000_KMRNCTRLSTA_OFFSET_SHIFT 16 ++#define E1000_KMRNCTRLSTA_REN 0x00200000 ++#define E1000_KMRNCTRLSTA_CTRL_OFFSET 0x1 /* Kumeran Control */ ++#define E1000_KMRNCTRLSTA_DIAG_OFFSET 0x3 /* Kumeran Diagnostic */ ++#define E1000_KMRNCTRLSTA_TIMEOUTS 0x4 /* Kumeran Timeouts */ ++#define E1000_KMRNCTRLSTA_INBAND_PARAM 0x9 /* Kumeran InBand Parameters */ ++#define E1000_KMRNCTRLSTA_IBIST_DISABLE 0x0200 /* Kumeran IBIST Disable */ ++#define E1000_KMRNCTRLSTA_DIAG_NELPBK 0x1000 /* Nearend Loopback mode */ ++#define E1000_KMRNCTRLSTA_K1_CONFIG 0x7 ++#define E1000_KMRNCTRLSTA_K1_ENABLE 0x0002 ++#define E1000_KMRNCTRLSTA_HD_CTRL 0x10 /* Kumeran HD Control */ ++ ++#define IFE_PHY_EXTENDED_STATUS_CONTROL 0x10 ++#define IFE_PHY_SPECIAL_CONTROL 0x11 /* 100BaseTx PHY Special Control */ ++#define IFE_PHY_SPECIAL_CONTROL_LED 0x1B /* PHY Special and LED Control */ ++#define IFE_PHY_MDIX_CONTROL 0x1C /* MDI/MDI-X Control */ ++ ++/* IFE PHY Extended Status Control */ ++#define IFE_PESC_POLARITY_REVERSED 0x0100 ++ ++/* IFE PHY Special Control */ ++#define IFE_PSC_AUTO_POLARITY_DISABLE 0x0010 ++#define IFE_PSC_FORCE_POLARITY 0x0020 ++ ++/* IFE PHY Special Control and LED Control */ ++#define IFE_PSCL_PROBE_MODE 0x0020 ++#define IFE_PSCL_PROBE_LEDS_OFF 0x0006 /* Force LEDs 0 and 2 off */ ++#define IFE_PSCL_PROBE_LEDS_ON 0x0007 /* Force LEDs 0 and 2 on */ ++ ++/* IFE PHY MDIX Control */ ++#define IFE_PMC_MDIX_STATUS 0x0020 /* 1=MDI-X, 0=MDI */ ++#define IFE_PMC_FORCE_MDIX 0x0040 /* 1=force MDI-X, 0=force MDI */ ++#define IFE_PMC_AUTO_MDIX 0x0080 /* 1=enable auto MDI/MDI-X, 0=disable */ ++ ++#define E1000_CABLE_LENGTH_UNDEFINED 0xFF ++ ++#define E1000_DEV_ID_82571EB_COPPER 0x105E ++#define E1000_DEV_ID_82571EB_FIBER 0x105F ++#define E1000_DEV_ID_82571EB_SERDES 0x1060 ++#define E1000_DEV_ID_82571EB_QUAD_COPPER 0x10A4 ++#define E1000_DEV_ID_82571PT_QUAD_COPPER 0x10D5 ++#define E1000_DEV_ID_82571EB_QUAD_FIBER 0x10A5 ++#define E1000_DEV_ID_82571EB_QUAD_COPPER_LP 0x10BC ++#define E1000_DEV_ID_82571EB_SERDES_DUAL 0x10D9 ++#define E1000_DEV_ID_82571EB_SERDES_QUAD 0x10DA ++#define E1000_DEV_ID_82572EI_COPPER 0x107D ++#define E1000_DEV_ID_82572EI_FIBER 0x107E ++#define E1000_DEV_ID_82572EI_SERDES 0x107F ++#define E1000_DEV_ID_82572EI 0x10B9 ++#define E1000_DEV_ID_82573E 0x108B ++#define E1000_DEV_ID_82573E_IAMT 0x108C ++#define E1000_DEV_ID_82573L 0x109A ++#define E1000_DEV_ID_82574L 0x10D3 ++#define E1000_DEV_ID_82574LA 0x10F6 ++#define E1000_DEV_ID_82583V 0x150C ++ ++#define E1000_DEV_ID_80003ES2LAN_COPPER_DPT 0x1096 ++#define E1000_DEV_ID_80003ES2LAN_SERDES_DPT 0x1098 ++#define E1000_DEV_ID_80003ES2LAN_COPPER_SPT 0x10BA ++#define E1000_DEV_ID_80003ES2LAN_SERDES_SPT 0x10BB ++ ++#define E1000_DEV_ID_ICH8_82567V_3 0x1501 ++#define E1000_DEV_ID_ICH8_IGP_M_AMT 0x1049 ++#define E1000_DEV_ID_ICH8_IGP_AMT 0x104A ++#define E1000_DEV_ID_ICH8_IGP_C 0x104B ++#define E1000_DEV_ID_ICH8_IFE 0x104C ++#define E1000_DEV_ID_ICH8_IFE_GT 0x10C4 ++#define E1000_DEV_ID_ICH8_IFE_G 0x10C5 ++#define E1000_DEV_ID_ICH8_IGP_M 0x104D ++#define E1000_DEV_ID_ICH9_IGP_AMT 0x10BD ++#define E1000_DEV_ID_ICH9_BM 0x10E5 ++#define E1000_DEV_ID_ICH9_IGP_M_AMT 0x10F5 ++#define E1000_DEV_ID_ICH9_IGP_M 0x10BF ++#define E1000_DEV_ID_ICH9_IGP_M_V 0x10CB ++#define E1000_DEV_ID_ICH9_IGP_C 0x294C ++#define E1000_DEV_ID_ICH9_IFE 0x10C0 ++#define E1000_DEV_ID_ICH9_IFE_GT 0x10C3 ++#define E1000_DEV_ID_ICH9_IFE_G 0x10C2 ++#define E1000_DEV_ID_ICH10_R_BM_LM 0x10CC ++#define E1000_DEV_ID_ICH10_R_BM_LF 0x10CD ++#define E1000_DEV_ID_ICH10_R_BM_V 0x10CE ++#define E1000_DEV_ID_ICH10_D_BM_LM 0x10DE ++#define E1000_DEV_ID_ICH10_D_BM_LF 0x10DF ++#define E1000_DEV_ID_ICH10_D_BM_V 0x1525 ++#define E1000_DEV_ID_PCH_M_HV_LM 0x10EA ++#define E1000_DEV_ID_PCH_M_HV_LC 0x10EB ++#define E1000_DEV_ID_PCH_D_HV_DM 0x10EF ++#define E1000_DEV_ID_PCH_D_HV_DC 0x10F0 ++#define E1000_DEV_ID_PCH2_LV_LM 0x1502 ++#define E1000_DEV_ID_PCH2_LV_V 0x1503 ++#define E1000_DEV_ID_PCH_LPT_I217_LM 0x153A ++#define E1000_DEV_ID_PCH_LPT_I217_V 0x153B ++#define E1000_DEV_ID_PCH_LPTLP_I218_LM 0x155A ++#define E1000_DEV_ID_PCH_LPTLP_I218_V 0x1559 ++ ++#define E1000_REVISION_4 4 ++ ++#define E1000_FUNC_1 1 ++ ++#define E1000_ALT_MAC_ADDRESS_OFFSET_LAN0 0 ++#define E1000_ALT_MAC_ADDRESS_OFFSET_LAN1 3 ++ ++enum e1000_mac_type { ++ e1000_82571, ++ e1000_82572, ++ e1000_82573, ++ e1000_82574, ++ e1000_82583, ++ e1000_80003es2lan, ++ e1000_ich8lan, ++ e1000_ich9lan, ++ e1000_ich10lan, ++ e1000_pchlan, ++ e1000_pch2lan, ++ e1000_pch_lpt, ++}; ++ ++enum e1000_media_type { ++ e1000_media_type_unknown = 0, ++ e1000_media_type_copper = 1, ++ e1000_media_type_fiber = 2, ++ e1000_media_type_internal_serdes = 3, ++ e1000_num_media_types ++}; ++ ++enum e1000_nvm_type { ++ e1000_nvm_unknown = 0, ++ e1000_nvm_none, ++ e1000_nvm_eeprom_spi, ++ e1000_nvm_flash_hw, ++ e1000_nvm_flash_sw ++}; ++ ++enum e1000_nvm_override { ++ e1000_nvm_override_none = 0, ++ e1000_nvm_override_spi_small, ++ e1000_nvm_override_spi_large ++}; ++ ++enum e1000_phy_type { ++ e1000_phy_unknown = 0, ++ e1000_phy_none, ++ e1000_phy_m88, ++ e1000_phy_igp, ++ e1000_phy_igp_2, ++ e1000_phy_gg82563, ++ e1000_phy_igp_3, ++ e1000_phy_ife, ++ e1000_phy_bm, ++ e1000_phy_82578, ++ e1000_phy_82577, ++ e1000_phy_82579, ++ e1000_phy_i217, ++}; ++ ++enum e1000_bus_width { ++ e1000_bus_width_unknown = 0, ++ e1000_bus_width_pcie_x1, ++ e1000_bus_width_pcie_x2, ++ e1000_bus_width_pcie_x4 = 4, ++ e1000_bus_width_32, ++ e1000_bus_width_64, ++ e1000_bus_width_reserved ++}; ++ ++enum e1000_1000t_rx_status { ++ e1000_1000t_rx_status_not_ok = 0, ++ e1000_1000t_rx_status_ok, ++ e1000_1000t_rx_status_undefined = 0xFF ++}; ++ ++enum e1000_rev_polarity{ ++ e1000_rev_polarity_normal = 0, ++ e1000_rev_polarity_reversed, ++ e1000_rev_polarity_undefined = 0xFF ++}; ++ ++enum e1000_fc_mode { ++ e1000_fc_none = 0, ++ e1000_fc_rx_pause, ++ e1000_fc_tx_pause, ++ e1000_fc_full, ++ e1000_fc_default = 0xFF ++}; ++ ++enum e1000_ms_type { ++ e1000_ms_hw_default = 0, ++ e1000_ms_force_master, ++ e1000_ms_force_slave, ++ e1000_ms_auto ++}; ++ ++enum e1000_smart_speed { ++ e1000_smart_speed_default = 0, ++ e1000_smart_speed_on, ++ e1000_smart_speed_off ++}; ++ ++enum e1000_serdes_link_state { ++ e1000_serdes_link_down = 0, ++ e1000_serdes_link_autoneg_progress, ++ e1000_serdes_link_autoneg_complete, ++ e1000_serdes_link_forced_up ++}; ++ ++/* Receive Descriptor */ ++struct e1000_rx_desc { ++ __le64 buffer_addr; /* Address of the descriptor's data buffer */ ++ __le16 length; /* Length of data DMAed into data buffer */ ++ __le16 csum; /* Packet checksum */ ++ u8 status; /* Descriptor status */ ++ u8 errors; /* Descriptor Errors */ ++ __le16 special; ++}; ++ ++/* Receive Descriptor - Extended */ ++union e1000_rx_desc_extended { ++ struct { ++ __le64 buffer_addr; ++ __le64 reserved; ++ } read; ++ struct { ++ struct { ++ __le32 mrq; /* Multiple Rx Queues */ ++ union { ++ __le32 rss; /* RSS Hash */ ++ struct { ++ __le16 ip_id; /* IP id */ ++ __le16 csum; /* Packet Checksum */ ++ } csum_ip; ++ } hi_dword; ++ } lower; ++ struct { ++ __le32 status_error; /* ext status/error */ ++ __le16 length; ++ __le16 vlan; /* VLAN tag */ ++ } upper; ++ } wb; /* writeback */ ++}; ++ ++#define MAX_PS_BUFFERS 4 ++/* Receive Descriptor - Packet Split */ ++union e1000_rx_desc_packet_split { ++ struct { ++ /* one buffer for protocol header(s), three data buffers */ ++ __le64 buffer_addr[MAX_PS_BUFFERS]; ++ } read; ++ struct { ++ struct { ++ __le32 mrq; /* Multiple Rx Queues */ ++ union { ++ __le32 rss; /* RSS Hash */ ++ struct { ++ __le16 ip_id; /* IP id */ ++ __le16 csum; /* Packet Checksum */ ++ } csum_ip; ++ } hi_dword; ++ } lower; ++ struct { ++ __le32 status_error; /* ext status/error */ ++ __le16 length0; /* length of buffer 0 */ ++ __le16 vlan; /* VLAN tag */ ++ } middle; ++ struct { ++ __le16 header_status; ++ __le16 length[3]; /* length of buffers 1-3 */ ++ } upper; ++ __le64 reserved; ++ } wb; /* writeback */ ++}; ++ ++/* Transmit Descriptor */ ++struct e1000_tx_desc { ++ __le64 buffer_addr; /* Address of the descriptor's data buffer */ ++ union { ++ __le32 data; ++ struct { ++ __le16 length; /* Data buffer length */ ++ u8 cso; /* Checksum offset */ ++ u8 cmd; /* Descriptor control */ ++ } flags; ++ } lower; ++ union { ++ __le32 data; ++ struct { ++ u8 status; /* Descriptor status */ ++ u8 css; /* Checksum start */ ++ __le16 special; ++ } fields; ++ } upper; ++}; ++ ++/* Offload Context Descriptor */ ++struct e1000_context_desc { ++ union { ++ __le32 ip_config; ++ struct { ++ u8 ipcss; /* IP checksum start */ ++ u8 ipcso; /* IP checksum offset */ ++ __le16 ipcse; /* IP checksum end */ ++ } ip_fields; ++ } lower_setup; ++ union { ++ __le32 tcp_config; ++ struct { ++ u8 tucss; /* TCP checksum start */ ++ u8 tucso; /* TCP checksum offset */ ++ __le16 tucse; /* TCP checksum end */ ++ } tcp_fields; ++ } upper_setup; ++ __le32 cmd_and_length; ++ union { ++ __le32 data; ++ struct { ++ u8 status; /* Descriptor status */ ++ u8 hdr_len; /* Header length */ ++ __le16 mss; /* Maximum segment size */ ++ } fields; ++ } tcp_seg_setup; ++}; ++ ++/* Offload data descriptor */ ++struct e1000_data_desc { ++ __le64 buffer_addr; /* Address of the descriptor's buffer address */ ++ union { ++ __le32 data; ++ struct { ++ __le16 length; /* Data buffer length */ ++ u8 typ_len_ext; ++ u8 cmd; ++ } flags; ++ } lower; ++ union { ++ __le32 data; ++ struct { ++ u8 status; /* Descriptor status */ ++ u8 popts; /* Packet Options */ ++ __le16 special; /* */ ++ } fields; ++ } upper; ++}; ++ ++/* Statistics counters collected by the MAC */ ++struct e1000_hw_stats { ++ u64 crcerrs; ++ u64 algnerrc; ++ u64 symerrs; ++ u64 rxerrc; ++ u64 mpc; ++ u64 scc; ++ u64 ecol; ++ u64 mcc; ++ u64 latecol; ++ u64 colc; ++ u64 dc; ++ u64 tncrs; ++ u64 sec; ++ u64 cexterr; ++ u64 rlec; ++ u64 xonrxc; ++ u64 xontxc; ++ u64 xoffrxc; ++ u64 xofftxc; ++ u64 fcruc; ++ u64 prc64; ++ u64 prc127; ++ u64 prc255; ++ u64 prc511; ++ u64 prc1023; ++ u64 prc1522; ++ u64 gprc; ++ u64 bprc; ++ u64 mprc; ++ u64 gptc; ++ u64 gorc; ++ u64 gotc; ++ u64 rnbc; ++ u64 ruc; ++ u64 rfc; ++ u64 roc; ++ u64 rjc; ++ u64 mgprc; ++ u64 mgpdc; ++ u64 mgptc; ++ u64 tor; ++ u64 tot; ++ u64 tpr; ++ u64 tpt; ++ u64 ptc64; ++ u64 ptc127; ++ u64 ptc255; ++ u64 ptc511; ++ u64 ptc1023; ++ u64 ptc1522; ++ u64 mptc; ++ u64 bptc; ++ u64 tsctc; ++ u64 tsctfc; ++ u64 iac; ++ u64 icrxptc; ++ u64 icrxatc; ++ u64 ictxptc; ++ u64 ictxatc; ++ u64 ictxqec; ++ u64 ictxqmtc; ++ u64 icrxdmtc; ++ u64 icrxoc; ++}; ++ ++struct e1000_phy_stats { ++ u32 idle_errors; ++ u32 receive_errors; ++}; ++ ++struct e1000_host_mng_dhcp_cookie { ++ u32 signature; ++ u8 status; ++ u8 reserved0; ++ u16 vlan_id; ++ u32 reserved1; ++ u16 reserved2; ++ u8 reserved3; ++ u8 checksum; ++}; ++ ++/* Host Interface "Rev 1" */ ++struct e1000_host_command_header { ++ u8 command_id; ++ u8 command_length; ++ u8 command_options; ++ u8 checksum; ++}; ++ ++#define E1000_HI_MAX_DATA_LENGTH 252 ++struct e1000_host_command_info { ++ struct e1000_host_command_header command_header; ++ u8 command_data[E1000_HI_MAX_DATA_LENGTH]; ++}; ++ ++/* Host Interface "Rev 2" */ ++struct e1000_host_mng_command_header { ++ u8 command_id; ++ u8 checksum; ++ u16 reserved1; ++ u16 reserved2; ++ u16 command_length; ++}; ++ ++#define E1000_HI_MAX_MNG_DATA_LENGTH 0x6F8 ++struct e1000_host_mng_command_info { ++ struct e1000_host_mng_command_header command_header; ++ u8 command_data[E1000_HI_MAX_MNG_DATA_LENGTH]; ++}; ++ ++/* Function pointers and static data for the MAC. */ ++struct e1000_mac_operations { ++ s32 (*id_led_init)(struct e1000_hw *); ++ s32 (*blink_led)(struct e1000_hw *); ++ bool (*check_mng_mode)(struct e1000_hw *); ++ s32 (*check_for_link)(struct e1000_hw *); ++ s32 (*cleanup_led)(struct e1000_hw *); ++ void (*clear_hw_cntrs)(struct e1000_hw *); ++ void (*clear_vfta)(struct e1000_hw *); ++ s32 (*get_bus_info)(struct e1000_hw *); ++ void (*set_lan_id)(struct e1000_hw *); ++ s32 (*get_link_up_info)(struct e1000_hw *, u16 *, u16 *); ++ s32 (*led_on)(struct e1000_hw *); ++ s32 (*led_off)(struct e1000_hw *); ++ void (*update_mc_addr_list)(struct e1000_hw *, u8 *, u32); ++ s32 (*reset_hw)(struct e1000_hw *); ++ s32 (*init_hw)(struct e1000_hw *); ++ s32 (*setup_link)(struct e1000_hw *); ++ s32 (*setup_physical_interface)(struct e1000_hw *); ++ s32 (*setup_led)(struct e1000_hw *); ++ void (*write_vfta)(struct e1000_hw *, u32, u32); ++ void (*config_collision_dist)(struct e1000_hw *); ++ void (*rar_set)(struct e1000_hw *, u8 *, u32); ++ s32 (*read_mac_addr)(struct e1000_hw *); ++}; ++ ++/* ++ * When to use various PHY register access functions: ++ * ++ * Func Caller ++ * Function Does Does When to use ++ * ~~~~~~~~~~~~ ~~~~~ ~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++ * X_reg L,P,A n/a for simple PHY reg accesses ++ * X_reg_locked P,A L for multiple accesses of different regs ++ * on different pages ++ * X_reg_page A L,P for multiple accesses of different regs ++ * on the same page ++ * ++ * Where X=[read|write], L=locking, P=sets page, A=register access ++ * ++ */ ++struct e1000_phy_operations { ++ s32 (*acquire)(struct e1000_hw *); ++ s32 (*cfg_on_link_up)(struct e1000_hw *); ++ s32 (*check_polarity)(struct e1000_hw *); ++ s32 (*check_reset_block)(struct e1000_hw *); ++ s32 (*commit)(struct e1000_hw *); ++ s32 (*force_speed_duplex)(struct e1000_hw *); ++ s32 (*get_cfg_done)(struct e1000_hw *hw); ++ s32 (*get_cable_length)(struct e1000_hw *); ++ s32 (*get_info)(struct e1000_hw *); ++ s32 (*set_page)(struct e1000_hw *, u16); ++ s32 (*read_reg)(struct e1000_hw *, u32, u16 *); ++ s32 (*read_reg_locked)(struct e1000_hw *, u32, u16 *); ++ s32 (*read_reg_page)(struct e1000_hw *, u32, u16 *); ++ void (*release)(struct e1000_hw *); ++ s32 (*reset)(struct e1000_hw *); ++ s32 (*set_d0_lplu_state)(struct e1000_hw *, bool); ++ s32 (*set_d3_lplu_state)(struct e1000_hw *, bool); ++ s32 (*write_reg)(struct e1000_hw *, u32, u16); ++ s32 (*write_reg_locked)(struct e1000_hw *, u32, u16); ++ s32 (*write_reg_page)(struct e1000_hw *, u32, u16); ++ void (*power_up)(struct e1000_hw *); ++ void (*power_down)(struct e1000_hw *); ++}; ++ ++/* Function pointers for the NVM. */ ++struct e1000_nvm_operations { ++ s32 (*acquire)(struct e1000_hw *); ++ s32 (*read)(struct e1000_hw *, u16, u16, u16 *); ++ void (*release)(struct e1000_hw *); ++ s32 (*update)(struct e1000_hw *); ++ s32 (*valid_led_default)(struct e1000_hw *, u16 *); ++ s32 (*validate)(struct e1000_hw *); ++ s32 (*write)(struct e1000_hw *, u16, u16, u16 *); ++}; ++ ++struct e1000_mac_info { ++ struct e1000_mac_operations ops; ++ u8 addr[ETH_ALEN]; ++ u8 perm_addr[ETH_ALEN]; ++ ++ enum e1000_mac_type type; ++ ++ u32 collision_delta; ++ u32 ledctl_default; ++ u32 ledctl_mode1; ++ u32 ledctl_mode2; ++ u32 mc_filter_type; ++ u32 tx_packet_delta; ++ u32 txcw; ++ ++ u16 current_ifs_val; ++ u16 ifs_max_val; ++ u16 ifs_min_val; ++ u16 ifs_ratio; ++ u16 ifs_step_size; ++ u16 mta_reg_count; ++ ++ /* Maximum size of the MTA register table in all supported adapters */ ++ #define MAX_MTA_REG 128 ++ u32 mta_shadow[MAX_MTA_REG]; ++ u16 rar_entry_count; ++ ++ u8 forced_speed_duplex; ++ ++ bool adaptive_ifs; ++ bool has_fwsm; ++ bool arc_subsystem_valid; ++ bool autoneg; ++ bool autoneg_failed; ++ bool get_link_status; ++ bool in_ifs_mode; ++ bool serdes_has_link; ++ bool tx_pkt_filtering; ++ enum e1000_serdes_link_state serdes_link_state; ++}; ++ ++struct e1000_phy_info { ++ struct e1000_phy_operations ops; ++ ++ enum e1000_phy_type type; ++ ++ enum e1000_1000t_rx_status local_rx; ++ enum e1000_1000t_rx_status remote_rx; ++ enum e1000_ms_type ms_type; ++ enum e1000_ms_type original_ms_type; ++ enum e1000_rev_polarity cable_polarity; ++ enum e1000_smart_speed smart_speed; ++ ++ u32 addr; ++ u32 id; ++ u32 reset_delay_us; /* in usec */ ++ u32 revision; ++ ++ enum e1000_media_type media_type; ++ ++ u16 autoneg_advertised; ++ u16 autoneg_mask; ++ u16 cable_length; ++ u16 max_cable_length; ++ u16 min_cable_length; ++ ++ u8 mdix; ++ ++ bool disable_polarity_correction; ++ bool is_mdix; ++ bool polarity_correction; ++ bool speed_downgraded; ++ bool autoneg_wait_to_complete; ++}; ++ ++struct e1000_nvm_info { ++ struct e1000_nvm_operations ops; ++ ++ enum e1000_nvm_type type; ++ enum e1000_nvm_override override; ++ ++ u32 flash_bank_size; ++ u32 flash_base_addr; ++ ++ u16 word_size; ++ u16 delay_usec; ++ u16 address_bits; ++ u16 opcode_bits; ++ u16 page_size; ++}; ++ ++struct e1000_bus_info { ++ enum e1000_bus_width width; ++ ++ u16 func; ++}; ++ ++struct e1000_fc_info { ++ u32 high_water; /* Flow control high-water mark */ ++ u32 low_water; /* Flow control low-water mark */ ++ u16 pause_time; /* Flow control pause timer */ ++ u16 refresh_time; /* Flow control refresh timer */ ++ bool send_xon; /* Flow control send XON */ ++ bool strict_ieee; /* Strict IEEE mode */ ++ enum e1000_fc_mode current_mode; /* FC mode in effect */ ++ enum e1000_fc_mode requested_mode; /* FC mode requested by caller */ ++}; ++ ++struct e1000_dev_spec_82571 { ++ bool laa_is_present; ++ u32 smb_counter; ++}; ++ ++struct e1000_dev_spec_80003es2lan { ++ bool mdic_wa_enable; ++}; ++ ++struct e1000_shadow_ram { ++ u16 value; ++ bool modified; ++}; ++ ++#define E1000_ICH8_SHADOW_RAM_WORDS 2048 ++ ++struct e1000_dev_spec_ich8lan { ++ bool kmrn_lock_loss_workaround_enabled; ++ struct e1000_shadow_ram shadow_ram[E1000_ICH8_SHADOW_RAM_WORDS]; ++ bool nvm_k1_enabled; ++ bool eee_disable; ++ u16 eee_lp_ability; ++}; ++ ++struct e1000_hw { ++ struct e1000_adapter *adapter; ++ ++ u8 __iomem *hw_addr; ++ u8 __iomem *flash_address; ++ ++ struct e1000_mac_info mac; ++ struct e1000_fc_info fc; ++ struct e1000_phy_info phy; ++ struct e1000_nvm_info nvm; ++ struct e1000_bus_info bus; ++ struct e1000_host_mng_dhcp_cookie mng_cookie; ++ ++ union { ++ struct e1000_dev_spec_82571 e82571; ++ struct e1000_dev_spec_80003es2lan e80003es2lan; ++ struct e1000_dev_spec_ich8lan ich8lan; ++ } dev_spec; ++}; ++ ++#endif +--- linux/drivers/xenomai/net/drivers/eth1394.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/drivers/eth1394.c 2021-04-07 16:01:27.165634255 +0800 +@@ -0,0 +1,1536 @@ ++/* ++ * eth1394.h -- RTnet Driver for Ethernet emulation over FireWire ++ * (adapted from Linux1394) ++ * ++ * Copyright (C) 2005 Zhang Yuchen ++ * ++ * Mainly based on work by Emanuel Pirker and Andreas E. Bombe ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#define rtos_spinlock_t rtdm_lock_t ++#define nanosecs_abs_t nanosecs_t ++ ++#include ++ ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#define driver_name "RT-ETH1394" ++ ++ ++#define ETH1394_PRINT_G(level, fmt, args...) \ ++ rtdm_printk(level "%s: " fmt, driver_name, ## args) ++ ++#define ETH1394_PRINT(level, dev_name, fmt, args...) \ ++ rtdm_printk(level "%s: %s: " fmt, driver_name, dev_name, ## args) ++ ++//#define ETH1394_DEBUG 1 ++ ++#ifdef ETH1394_DEBUG ++#define DEBUGP(fmt, args...) \ ++ rtdm_printk(KERN_ERR "%s:%s[%d]: " fmt "\n", driver_name, __FUNCTION__, __LINE__, ## args) ++#else ++#define DEBUGP(fmt, args...) ++#endif ++ ++#define TRACE() rtdm_printk(KERN_ERR "%s:%s[%d] ---- TRACE\n", driver_name, __FUNCTION__, __LINE__) ++ ++/* Change this to IEEE1394_SPEED_S100 to make testing easier */ ++#define ETH1394_SPEED_DEF 0x03 /*IEEE1394_SPEED_MAX*/ ++ ++/* For now, this needs to be 1500, so that XP works with us */ ++#define ETH1394_DATA_LEN 1500/*ETH_DATA_LEN*/ ++ ++struct fragment_info { ++ struct list_head list; ++ int offset; ++ int len; ++}; ++ ++struct partial_datagram { ++ struct list_head list; ++ u16 dgl; ++ u16 dg_size; ++ u16 ether_type; ++ struct rtskb *skb; ++ char *pbuf; ++ struct list_head frag_info; ++}; ++ ++ static const u16 eth1394_speedto_maxpayload[] = { ++/* S100, S200, S400, S800, S1600, S3200 */ ++ 512, 1024, 2048, 4096, 4096, 4096 ++}; ++ ++static struct hpsb_highlevel eth1394_highlevel; ++ ++/* Use common.lf to determine header len */ ++static const int hdr_type_len[] = { ++ sizeof (struct eth1394_uf_hdr), ++ sizeof (struct eth1394_ff_hdr), ++ sizeof (struct eth1394_sf_hdr), ++ sizeof (struct eth1394_sf_hdr) ++}; ++ ++/* The max_partial_datagrams parameter is the maximum number of fragmented ++ * datagrams per node that eth1394 will keep in memory. Providing an upper ++ * bound allows us to limit the amount of memory that partial datagrams ++ * consume in the event that some partial datagrams are never completed. This ++ * should probably change to a sysctl item or the like if possible. ++ */ ++static int max_partial_datagrams = 25; ++module_param(max_partial_datagrams, int, 0444); ++MODULE_PARM_DESC(max_partial_datagrams, ++ "Maximum number of partially received fragmented datagrams " ++ "(default = 25)."); ++ ++ ++static int eth1394_header(struct rtskb *skb, struct rtnet_device *dev, ++ unsigned short type, void *daddr, void *saddr, ++ unsigned len); ++ ++static int eth1394_write(struct hpsb_host *host,struct hpsb_packet *packet, unsigned int length); ++ ++static inline void purge_partial_datagram(struct list_head *old); ++static int eth1394_tx(struct rtskb *skb, struct rtnet_device *dev); ++static void eth1394_iso(struct hpsb_iso *iso, void *arg); ++ ++/* Function for incoming 1394 packets */ ++static struct hpsb_address_ops eth1394_ops = { ++ .write = eth1394_write, ++}; ++ ++static void eth1394_add_host (struct hpsb_host *host); ++static void eth1394_remove_host (struct hpsb_host *host); ++static void eth1394_host_reset (struct hpsb_host *host); ++ ++/* Ieee1394 highlevel driver functions */ ++static struct hpsb_highlevel eth1394_highlevel = { ++ .name = driver_name, ++ .add_host = eth1394_add_host, ++ .remove_host = eth1394_remove_host, ++ .host_reset = eth1394_host_reset, ++}; ++ ++static void eth1394_iso_shutdown(struct eth1394_priv *priv) ++{ ++ priv->bc_state = ETHER1394_BC_CLOSED; ++ ++ if (priv->iso != NULL) { ++ //~ if (!in_interrupt()) ++ hpsb_iso_shutdown(priv->iso); ++ priv->iso = NULL; ++ } ++} ++ ++static int eth1394_init_bc(struct rtnet_device *dev) ++{ ++ struct eth1394_priv *priv = (struct eth1394_priv *)dev->priv; ++ ++ /* First time sending? Need a broadcast channel for ARP and for ++ * listening on */ ++ if (priv->bc_state == ETHER1394_BC_CHECK) { ++ quadlet_t bc; ++ ++ /* Get the local copy of the broadcast channel and check its ++ * validity (the IRM should validate it for us) */ ++ ++ bc = priv->host->csr.broadcast_channel; ++ ++ if ((bc & 0x80000000) != 0x80000000) { //used to be 0xc0000000 ++ /* broadcast channel not validated yet */ ++ ETH1394_PRINT(KERN_WARNING, dev->name, ++ "Error BROADCAST_CHANNEL register valid " ++ "bit not set, can't send IP traffic\n"); ++ ++ eth1394_iso_shutdown(priv); ++ ++ return -EAGAIN; ++ } ++ if (priv->broadcast_channel != (bc & 0x3f)) { ++ /* This really shouldn't be possible, but just in case ++ * the IEEE 1394 spec changes regarding broadcast ++ * channels in the future. */ ++ ++ eth1394_iso_shutdown(priv); ++ ++ //~ if (in_interrupt()) ++ //~ return -EAGAIN; ++ ++ priv->broadcast_channel = bc & 0x3f; ++ ETH1394_PRINT(KERN_INFO, dev->name, ++ "Changing to broadcast channel %d...\n", ++ priv->broadcast_channel); ++ ++ priv->iso = hpsb_iso_recv_init(priv->host, 16 * 4096, ++ 16, priv->broadcast_channel, HPSB_ISO_DMA_PACKET_PER_BUFFER, ++ 1, eth1394_iso, 0, "eth1394_iso", IEEE1394_PRIORITY_HIGHEST); ++ ++ if (priv->iso == NULL) { ++ ETH1394_PRINT(KERN_ERR, dev->name, ++ "failed to change broadcast " ++ "channel\n"); ++ return -EAGAIN; ++ } ++ } ++ if (hpsb_iso_recv_start(priv->iso, -1, (1 << 3), -1) < 0) { ++ ETH1394_PRINT(KERN_ERR, dev->name, ++ "Could not start data stream reception\n"); ++ ++ eth1394_iso_shutdown(priv); ++ ++ return -EAGAIN; ++ } ++ priv->bc_state = ETHER1394_BC_OPENED; ++ } ++ ++ return 0; ++} ++ ++static int eth1394_open (struct rtnet_device *dev) ++{ ++ struct eth1394_priv *priv = (struct eth1394_priv *)dev->priv; ++ rtdm_lockctx_t context; ++ int ret; ++ ++ /* Something bad happened, don't even try */ ++ if (priv->bc_state == ETHER1394_BC_CLOSED) ++ { ++ return -EAGAIN; ++ } ++ rtdm_lock_get_irqsave(&priv->lock, context); ++ ret = eth1394_init_bc(dev); ++ rtdm_lock_put_irqrestore(&priv->lock, context); ++ ++ if (ret) ++ return ret; ++ rt_stack_connect(dev,&STACK_manager); ++ rtnetif_start_queue (dev); ++ return 0; ++} ++ ++static int eth1394_stop (struct rtnet_device *dev) ++{ ++ rtnetif_stop_queue (dev); ++ rt_stack_disconnect(dev); ++ return 0; ++} ++ ++/* Return statistics to the caller */ ++static struct net_device_stats *eth1394_stats (struct rtnet_device *dev) ++{ ++ return &(((struct eth1394_priv *)dev->priv)->stats); ++} ++ ++static inline void eth1394_register_limits(int nodeid, u16 maxpayload, ++ unsigned char sspd, ++ struct eth1394_priv *priv) ++{ ++ ++ if (nodeid < 0 || nodeid >= ALL_NODES) { ++ ETH1394_PRINT_G (KERN_ERR, "Cannot register invalid nodeid %d\n", nodeid); ++ return; ++ } ++ ++ priv->maxpayload[nodeid] = maxpayload; ++ priv->sspd[nodeid] = sspd; ++ priv->maxpayload[ALL_NODES] = min(priv->maxpayload[ALL_NODES], maxpayload); ++ priv->sspd[ALL_NODES] = min(priv->sspd[ALL_NODES], sspd); ++ ++ return; ++} ++ ++ ++static void eth1394_reset_priv (struct rtnet_device *dev, int set_mtu) ++{ ++ rtdm_lockctx_t context; ++ int i; ++ struct eth1394_priv *priv = (struct eth1394_priv *)dev->priv; ++ struct hpsb_host *host = priv->host; ++ int phy_id = NODEID_TO_NODE(host->node_id); ++ u16 maxpayload = 1 << (host->csr.max_rec + 1); ++ ++ rtdm_lock_get_irqsave(&priv->lock, context); ++ /* Clear the speed/payload/offset tables */ ++ memset (priv->maxpayload, 0, sizeof (priv->maxpayload)); ++ memset (priv->sspd, 0, sizeof (priv->sspd)); ++ ++ priv->sspd[ALL_NODES] = ETH1394_SPEED_DEF; ++ priv->maxpayload[ALL_NODES] = eth1394_speedto_maxpayload[priv->sspd[ALL_NODES]]; ++ ++ priv->bc_state = ETHER1394_BC_CHECK; ++ ++ /* Register our limits now */ ++ eth1394_register_limits(phy_id, maxpayload, ++ host->speed_map[(phy_id << 6) + phy_id], priv); ++ ++ /* We'll use our maxpayload as the default mtu */ ++ if (set_mtu) { ++ dev->mtu = min(ETH1394_DATA_LEN, (int)(priv->maxpayload[phy_id] - ++ (sizeof(union eth1394_hdr) + ETHER1394_GASP_OVERHEAD))); ++ ++ //~ /* Set our hardware address while we're at it */ ++ //~ *(u64*)dev->dev_addr = guid; ++ //~ *(u64*)dev->broadcast = ~0x0ULL; ++ *(u16*)dev->dev_addr = LOCAL_BUS | phy_id; //we directly use FireWire address for our MAC address ++ *(u16*)dev->broadcast = LOCAL_BUS | ALL_NODES; ++ } ++ ++ rtdm_lock_put_irqrestore(&priv->lock, context); ++ ++ for (i = 0; i < ALL_NODES; i++) { ++ struct list_head *lh, *n; ++ ++ rtdm_lock_get_irqsave(&priv->pdg[i].lock, context); ++ if (!set_mtu) { ++ list_for_each_safe(lh, n, &priv->pdg[i].list) { ++ //~ purge_partial_datagram(lh); ++ } ++ } ++ INIT_LIST_HEAD(&(priv->pdg[i].list)); ++ priv->pdg[i].sz = 0; ++ rtdm_lock_put_irqrestore(&priv->pdg[i].lock, context); ++ } ++ ++} ++ ++static void eth1394_add_host (struct hpsb_host *host) ++{ ++ int i; ++ struct host_info *hi = NULL; ++ ++ //*******RTnet******** ++ struct rtnet_device *dev = NULL; ++ // ++ struct eth1394_priv *priv; ++ ++ /* We should really have our own alloc_hpsbdev() function in ++ * net_init.c instead of calling the one for ethernet then hijacking ++ * it for ourselves. That way we'd be a real networking device. */ ++ ++ //******RTnet****** ++ ++ dev = rt_alloc_etherdev(sizeof (struct eth1394_priv), ++ RX_RING_SIZE * 2 + TX_RING_SIZE); ++ if (dev == NULL) { ++ ETH1394_PRINT_G (KERN_ERR, "Out of memory trying to allocate " ++ "etherdevice for IEEE 1394 device\n"); ++ goto free_dev; ++ } ++ rtdev_alloc_name(dev, "rteth%d"); ++ memset(dev->priv, 0, sizeof(struct eth1394_priv)); ++ rt_rtdev_connect(dev, &RTDEV_manager); ++ ++ //dev->init = eth1394_init_dev; ++ ++ dev->vers = RTDEV_VERS_2_0; ++ dev->open = eth1394_open; ++ dev->hard_start_xmit = eth1394_tx; ++ dev->stop = eth1394_stop; ++ dev->hard_header = eth1394_header; ++ dev->get_stats = eth1394_stats; ++ dev->flags = IFF_BROADCAST | IFF_MULTICAST; ++ dev->addr_len = ETH_ALEN; ++ dev->hard_header_len = ETH_HLEN; ++ dev->type = ARPHRD_IEEE1394; ++ ++ //rtdev->do_ioctl = NULL; ++ priv = (struct eth1394_priv *)dev->priv; ++ ++ rtdm_lock_init(&priv->lock); ++ priv->host = host; ++ ++ for (i = 0; i < ALL_NODES; i++) { ++ rtdm_lock_init(&priv->pdg[i].lock); ++ INIT_LIST_HEAD(&priv->pdg[i].list); ++ priv->pdg[i].sz = 0; ++ } ++ ++ hi = hpsb_create_hostinfo(ð1394_highlevel, host, sizeof(*hi)); ++ if (hi == NULL) { ++ ETH1394_PRINT_G (KERN_ERR, "Out of memory trying to create " ++ "hostinfo for IEEE 1394 device\n"); ++ goto free_hi; ++ } ++ ++ if(rt_register_rtnetdev(dev)) ++ { ++ ETH1394_PRINT (KERN_ERR, dev->name, "Error registering network driver\n"); ++ goto free_hi; ++ } ++ ++ ETH1394_PRINT (KERN_ERR, dev->name, "IEEE-1394 IPv4 over 1394 Ethernet\n"); ++ ++ hi->host = host; ++ hi->dev = dev; ++ ++ eth1394_reset_priv (dev, 1); ++ ++ /* Ignore validity in hopes that it will be set in the future. It'll ++ * be checked when the eth device is opened. */ ++ priv->broadcast_channel = host->csr.broadcast_channel & 0x3f; ++ ++ priv->iso = hpsb_iso_recv_init(host, (ETHER1394_GASP_BUFFERS * 2 * ++ 2048), // XXX workaround for limitation in rawiso ++ //(1 << (host->csr.max_rec + 1))), ++ ETHER1394_GASP_BUFFERS, ++ priv->broadcast_channel, ++ HPSB_ISO_DMA_PACKET_PER_BUFFER, ++ 1, eth1394_iso, 0, "eth1394_iso", IEEE1394_PRIORITY_HIGHEST); ++ ++ ++ ++ if (priv->iso == NULL) { ++ ETH1394_PRINT(KERN_ERR, dev->name, ++ "Could not allocate isochronous receive context " ++ "for the broadcast channel\n"); ++ priv->bc_state = ETHER1394_BC_ERROR; ++ goto unregister_dev; ++ } else { ++ if (hpsb_iso_recv_start(priv->iso, -1, (1 << 3), -1) < 0){ ++ priv->bc_state = ETHER1394_BC_STOPPED; ++ goto unregister_dev; ++ } ++ else ++ priv->bc_state = ETHER1394_BC_RUNNING; ++ } ++ ++ hpsb_register_addrspace(ð1394_highlevel, host, ð1394_ops, ETHER1394_REGION_ADDR, ++ ETHER1394_REGION_ADDR_END); ++ ++ return; ++ ++unregister_dev: ++ rt_unregister_rtnetdev(dev); ++free_hi: ++ hpsb_destroy_hostinfo(ð1394_highlevel, host); ++free_dev: ++ rtdev_free(dev); ++ ++ return; ++} ++ ++static void eth1394_remove_host (struct hpsb_host *host) ++{ ++ struct host_info *hi = hpsb_get_hostinfo(ð1394_highlevel, host); ++ ++ if (hi != NULL) { ++ struct eth1394_priv *priv = (struct eth1394_priv *)hi->dev->priv; ++ ++ eth1394_iso_shutdown(priv); ++ ++ if (hi->dev) { ++ rt_stack_disconnect(hi->dev); ++ rt_unregister_rtnetdev (hi->dev); ++ rtdev_free(hi->dev); ++ } ++ } ++ return; ++} ++ ++static void eth1394_host_reset (struct hpsb_host *host) ++{ ++ struct host_info *hi = hpsb_get_hostinfo(ð1394_highlevel, host); ++ struct rtnet_device *dev; ++ ++ /* This can happen for hosts that we don't use */ ++ if (hi == NULL) ++ return; ++ ++ dev = hi->dev; ++ ++ /* Reset our private host data, but not our mtu */ ++ rtnetif_stop_queue (dev); ++ eth1394_reset_priv (dev, 1); ++ rtnetif_wake_queue (dev); ++} ++ ++ ++/****************************************** ++ * HW Header net device functions ++ ******************************************/ ++/* These functions have been adapted from net/ethernet/eth.c */ ++ ++ ++/* Create a fake MAC header for an arbitrary protocol layer. ++ * saddr=NULL means use device source address ++ * daddr=NULL means leave destination address (eg unresolved arp). */ ++static int eth1394_header(struct rtskb *skb, struct rtnet_device *dev, ++ unsigned short type, void *daddr, void *saddr, ++ unsigned len) ++{ ++ struct ethhdr *eth = (struct ethhdr *)rtskb_push(skb,ETH_HLEN); ++ memset(eth, 0, sizeof(*eth)); ++ ++ eth->h_proto = htons(type); ++ ++ if (saddr) ++ memcpy(eth->h_source, saddr, sizeof(nodeid_t)); ++ else ++ memcpy(eth->h_source, dev->dev_addr, sizeof(nodeid_t)); ++ ++ if (dev->flags & (IFF_LOOPBACK|IFF_NOARP)) ++ { ++ memset(eth->h_dest, 0, dev->addr_len); ++ return(dev->hard_header_len); ++ } ++ ++ if (daddr) ++ { ++ memcpy(eth->h_dest,daddr, sizeof(nodeid_t)); ++ return dev->hard_header_len; ++ } ++ ++ return -dev->hard_header_len; ++ ++} ++ ++ ++/****************************************** ++ * Datagram reception code ++ ******************************************/ ++ ++/* Copied from net/ethernet/eth.c */ ++static inline u16 eth1394_type_trans(struct rtskb *skb, ++ struct rtnet_device *dev) ++{ ++ struct ethhdr *eth; ++ unsigned char *rawp; ++ ++ skb->mac.raw = skb->data; ++ rtskb_pull (skb, ETH_HLEN); ++ eth = (struct ethhdr*)skb->mac.raw; ++ ++ if (*eth->h_dest & 1) { ++ if (memcmp(eth->h_dest, dev->broadcast, dev->addr_len)==0) ++ skb->pkt_type = PACKET_BROADCAST; ++ } else { ++ if (memcmp(eth->h_dest, dev->dev_addr, dev->addr_len)) ++ skb->pkt_type = PACKET_OTHERHOST; ++ } ++ ++ if (ntohs (eth->h_proto) >= 1536) ++ return eth->h_proto; ++ ++ rawp = skb->data; ++ ++ if (*(unsigned short *)rawp == 0xFFFF) ++ return htons (ETH_P_802_3); ++ ++ return htons (ETH_P_802_2); ++} ++ ++/* Parse an encapsulated IP1394 header into an ethernet frame packet. ++ * We also perform ARP translation here, if need be. */ ++static inline u16 eth1394_parse_encap(struct rtskb *skb, ++ struct rtnet_device *dev, ++ nodeid_t srcid, nodeid_t destid, ++ u16 ether_type) ++{ ++ struct eth1394_priv *priv = (struct eth1394_priv *)dev->priv; ++ unsigned short ret = 0; ++ ++ /* If this is an ARP packet, convert it. First, we want to make ++ * use of some of the fields, since they tell us a little bit ++ * about the sending machine. */ ++ if (ether_type == __constant_htons (ETH_P_ARP)) { ++ rtdm_lockctx_t context; ++ struct eth1394_arp *arp1394 = ++ (struct eth1394_arp*)((u8 *)skb->data); ++ struct arphdr *arp = ++ (struct arphdr *)((u8 *)skb->data); ++ unsigned char *arp_ptr = (unsigned char *)(arp + 1); ++ u8 max_rec = min(priv->host->csr.max_rec, ++ (u8)(arp1394->max_rec)); ++ int sspd = arp1394->sspd; ++ u16 maxpayload; ++ /* Sanity check. MacOSX seems to be sending us 131 in this ++ * field (atleast on my Panther G5). Not sure why. */ ++ if (sspd > 5 || sspd < 0) ++ sspd = 0; ++ ++ maxpayload = min(eth1394_speedto_maxpayload[sspd], (u16)(1 << (max_rec + 1))); ++ ++ ++ ++ /* Update our speed/payload/fifo_offset table */ ++ rtdm_lock_get_irqsave(&priv->lock, context); ++ eth1394_register_limits(NODEID_TO_NODE(srcid), maxpayload, ++ arp1394->sspd, ++ priv); ++ rtdm_lock_put_irqrestore(&priv->lock, context); ++ ++ /* Now that we're done with the 1394 specific stuff, we'll ++ * need to alter some of the data. Believe it or not, all ++ * that needs to be done is sender_IP_address needs to be ++ * moved, the destination hardware address get stuffed ++ * in and the hardware address length set to 8. ++ * ++ * IMPORTANT: The code below overwrites 1394 specific data ++ * needed above data so keep the call to ++ * eth1394_register_limits() before munging the data for the ++ * higher level IP stack. */ ++ ++ arp->ar_hln = ETH_ALEN; ++ arp_ptr += arp->ar_hln; /* skip over sender unique id */ ++ *(u32*)arp_ptr = arp1394->sip; /* move sender IP addr */ ++ arp_ptr += arp->ar_pln; /* skip over sender IP addr */ ++ ++ if (arp->ar_op == 1) ++ /* just set ARP req target unique ID to 0 */ ++ memset(arp_ptr, 0, ETH_ALEN); ++ else ++ memcpy(arp_ptr, dev->dev_addr, ETH_ALEN); ++ } ++ ++ /* Now add the ethernet header. */ ++ //no need to add ethernet header now, since we did not get rid of it on the sending side ++ if (dev->hard_header (skb, dev, __constant_ntohs (ether_type), ++ &destid, &srcid, skb->len) >= 0) ++ ret = eth1394_type_trans(skb, dev); ++ ++ return ret; ++} ++ ++static inline int fragment_overlap(struct list_head *frag_list, int offset, int len) ++{ ++ struct list_head *lh; ++ struct fragment_info *fi; ++ ++ list_for_each(lh, frag_list) { ++ fi = list_entry(lh, struct fragment_info, list); ++ ++ if ( ! ((offset > (fi->offset + fi->len - 1)) || ++ ((offset + len - 1) < fi->offset))) ++ return 1; ++ } ++ return 0; ++} ++ ++static inline struct list_head *find_partial_datagram(struct list_head *pdgl, int dgl) ++{ ++ struct list_head *lh; ++ struct partial_datagram *pd; ++ ++ list_for_each(lh, pdgl) { ++ pd = list_entry(lh, struct partial_datagram, list); ++ if (pd->dgl == dgl) ++ return lh; ++ } ++ return NULL; ++} ++ ++/* Assumes that new fragment does not overlap any existing fragments */ ++static inline int new_fragment(struct list_head *frag_info, int offset, int len) ++{ ++ struct list_head *lh; ++ struct fragment_info *fi, *fi2, *new; ++ ++ list_for_each(lh, frag_info) { ++ fi = list_entry(lh, struct fragment_info, list); ++ if ((fi->offset + fi->len) == offset) { ++ /* The new fragment can be tacked on to the end */ ++ fi->len += len; ++ /* Did the new fragment plug a hole? */ ++ fi2 = list_entry(lh->next, struct fragment_info, list); ++ if ((fi->offset + fi->len) == fi2->offset) { ++ /* glue fragments together */ ++ fi->len += fi2->len; ++ list_del(lh->next); ++ kfree(fi2); ++ } ++ return 0; ++ } else if ((offset + len) == fi->offset) { ++ /* The new fragment can be tacked on to the beginning */ ++ fi->offset = offset; ++ fi->len += len; ++ /* Did the new fragment plug a hole? */ ++ fi2 = list_entry(lh->prev, struct fragment_info, list); ++ if ((fi2->offset + fi2->len) == fi->offset) { ++ /* glue fragments together */ ++ fi2->len += fi->len; ++ list_del(lh); ++ kfree(fi); ++ } ++ return 0; ++ } else if (offset > (fi->offset + fi->len)) { ++ break; ++ } else if ((offset + len) < fi->offset) { ++ lh = lh->prev; ++ break; ++ } ++ } ++ ++ new = kmalloc(sizeof(struct fragment_info), GFP_ATOMIC); ++ if (!new) ++ return -ENOMEM; ++ ++ new->offset = offset; ++ new->len = len; ++ ++ list_add(&new->list, lh); ++ ++ return 0; ++} ++ ++static inline int new_partial_datagram(struct rtnet_device *dev, ++ struct list_head *pdgl, int dgl, ++ int dg_size, char *frag_buf, ++ int frag_off, int frag_len) ++{ ++ struct partial_datagram *new; ++ struct eth1394_priv *priv = (struct eth1394_priv *)dev->priv; ++ ++ new = kmalloc(sizeof(struct partial_datagram), GFP_ATOMIC); ++ if (!new) ++ return -ENOMEM; ++ ++ INIT_LIST_HEAD(&new->frag_info); ++ ++ if (new_fragment(&new->frag_info, frag_off, frag_len) < 0) { ++ kfree(new); ++ return -ENOMEM; ++ } ++ ++ new->dgl = dgl; ++ new->dg_size = dg_size; ++ ++ new->skb = rtnetdev_alloc_rtskb(dev, dg_size + dev->hard_header_len + 15); ++ if (!new->skb) { ++ struct fragment_info *fi = list_entry(new->frag_info.next, ++ struct fragment_info, ++ list); ++ kfree(fi); ++ kfree(new); ++ return -ENOMEM; ++ } ++ ++ rtskb_reserve(new->skb, (dev->hard_header_len + 15) & ~15); ++ new->pbuf = rtskb_put(new->skb, dg_size); ++ memcpy(new->pbuf + frag_off, frag_buf, frag_len); ++ ++ list_add(&new->list, pdgl); ++ ++ return 0; ++} ++ ++static inline int update_partial_datagram(struct list_head *pdgl, struct list_head *lh, ++ char *frag_buf, int frag_off, int frag_len) ++{ ++ struct partial_datagram *pd = list_entry(lh, struct partial_datagram, list); ++ ++ if (new_fragment(&pd->frag_info, frag_off, frag_len) < 0) { ++ return -ENOMEM; ++ } ++ ++ memcpy(pd->pbuf + frag_off, frag_buf, frag_len); ++ ++ /* Move list entry to beginnig of list so that oldest partial ++ * datagrams percolate to the end of the list */ ++ list_del(lh); ++ list_add(lh, pdgl); ++ ++ return 0; ++} ++ ++static inline void purge_partial_datagram(struct list_head *old) ++{ ++ struct partial_datagram *pd = list_entry(old, struct partial_datagram, list); ++ struct list_head *lh, *n; ++ ++ list_for_each_safe(lh, n, &pd->frag_info) { ++ struct fragment_info *fi = list_entry(lh, struct fragment_info, list); ++ list_del(lh); ++ kfree(fi); ++ } ++ list_del(old); ++ kfree_rtskb(pd->skb); ++ kfree(pd); ++} ++ ++static inline int is_datagram_complete(struct list_head *lh, int dg_size) ++{ ++ struct partial_datagram *pd = list_entry(lh, struct partial_datagram, list); ++ struct fragment_info *fi = list_entry(pd->frag_info.next, ++ struct fragment_info, list); ++ ++ return (fi->len == dg_size); ++} ++ ++ ++ ++ ++/* Packet reception. We convert the IP1394 encapsulation header to an ++ * ethernet header, and fill it with some of our other fields. This is ++ * an incoming packet from the 1394 bus. */ ++static int eth1394_data_handler(struct rtnet_device *dev, int srcid, int destid, ++ char *buf, int len, nanosecs_abs_t time_stamp) ++{ ++ struct rtskb *skb; ++ rtdm_lockctx_t context; ++ struct eth1394_priv *priv; ++ union eth1394_hdr *hdr = (union eth1394_hdr *)buf; ++ u16 ether_type = 0; /* initialized to clear warning */ ++ int hdr_len; ++ ++ //~ nanosecs_abs_t time_stamp = rtdm_clock_read(); ++ ++ priv = (struct eth1394_priv *)dev->priv; ++ ++ /* First, did we receive a fragmented or unfragmented datagram? */ ++ hdr->words.word1 = ntohs(hdr->words.word1); ++ ++ hdr_len = hdr_type_len[hdr->common.lf]; ++ ++ if (hdr->common.lf == ETH1394_HDR_LF_UF) { ++ DEBUGP("a single datagram has been received\n"); ++ /* An unfragmented datagram has been received by the ieee1394 ++ * bus. Build an skbuff around it so we can pass it to the ++ * high level network layer. */ ++ ++ //~ if(rtpkb_acquire((struct rtpkb*)packet, &priv->skb_pool)){ ++ //~ HPSB_PRINT (KERN_ERR, "eth1394 rx: low on mem\n"); ++ //~ priv->stats.rx_dropped++; ++ //~ return -1; ++ //~ } ++ ++ skb = rtnetdev_alloc_rtskb(dev, len + dev->hard_header_len + 15); ++ if (!skb) { ++ ETH1394_PRINT_G(KERN_ERR, "eth1394 rx: low on mem\n"); ++ priv->stats.rx_dropped++; ++ return -1; ++ } ++ //~ skb = (struct rtskb *)packet;//we can do this, because these two belong to the same common object, rtpkb. ++ //~ rtpkb_put(skb, len-hdr_len); ++ //~ skb->data = (u8 *)packet->data + hdr_len; //we jump over the 1394-specific fragment overhead ++ //~ rtskb_put(skb, ); ++ rtskb_reserve(skb, (dev->hard_header_len + 15) & ~15);//we reserve the space to put in fake MAC address ++ memcpy(rtskb_put(skb, len - hdr_len), buf + hdr_len, len - hdr_len); ++ ether_type = hdr->uf.ether_type; ++ } else { ++ /* A datagram fragment has been received, now the fun begins. */ ++ struct list_head *pdgl, *lh; ++ struct partial_datagram *pd; ++ int fg_off; ++ int fg_len = len - hdr_len; ++ int dg_size; ++ int dgl; ++ int retval; ++ int sid = NODEID_TO_NODE(srcid); ++ struct pdg_list *pdg = &(priv->pdg[sid]); ++ ++ DEBUGP("a datagram fragment has been received\n"); ++ hdr->words.word3 = ntohs(hdr->words.word3); ++ /* The 4th header word is reserved so no need to do ntohs() */ ++ ++ if (hdr->common.lf == ETH1394_HDR_LF_FF) { ++ //first fragment ++ ether_type = hdr->ff.ether_type; ++ dgl = hdr->ff.dgl; ++ dg_size = hdr->ff.dg_size + 1; ++ fg_off = 0; ++ } else { ++ hdr->words.word2 = ntohs(hdr->words.word2); ++ dgl = hdr->sf.dgl; ++ dg_size = hdr->sf.dg_size + 1; ++ fg_off = hdr->sf.fg_off; ++ } ++ rtdm_lock_get_irqsave(&pdg->lock, context); ++ ++ pdgl = &(pdg->list); ++ lh = find_partial_datagram(pdgl, dgl); ++ ++ if (lh == NULL) { ++ if (pdg->sz == max_partial_datagrams) { ++ /* remove the oldest */ ++ purge_partial_datagram(pdgl->prev); ++ pdg->sz--; ++ } ++ ++ retval = new_partial_datagram(dev, pdgl, dgl, dg_size, ++ buf + hdr_len, fg_off, ++ fg_len); ++ if (retval < 0) { ++ rtdm_lock_put_irqrestore(&pdg->lock, context); ++ goto bad_proto; ++ } ++ pdg->sz++; ++ lh = find_partial_datagram(pdgl, dgl); ++ } else { ++ struct partial_datagram *pd; ++ ++ pd = list_entry(lh, struct partial_datagram, list); ++ ++ if (fragment_overlap(&pd->frag_info, fg_off, fg_len)) { ++ /* Overlapping fragments, obliterate old ++ * datagram and start new one. */ ++ purge_partial_datagram(lh); ++ retval = new_partial_datagram(dev, pdgl, dgl, ++ dg_size, ++ buf + hdr_len, ++ fg_off, fg_len); ++ if (retval < 0) { ++ pdg->sz--; ++ rtdm_lock_put_irqrestore(&pdg->lock, context); ++ goto bad_proto; ++ } ++ } else { ++ retval = update_partial_datagram(pdgl, lh, ++ buf + hdr_len, ++ fg_off, fg_len); ++ if (retval < 0) { ++ /* Couldn't save off fragment anyway ++ * so might as well obliterate the ++ * datagram now. */ ++ purge_partial_datagram(lh); ++ pdg->sz--; ++ rtdm_lock_put_irqrestore(&pdg->lock, context); ++ goto bad_proto; ++ } ++ } /* fragment overlap */ ++ } /* new datagram or add to existing one */ ++ ++ pd = list_entry(lh, struct partial_datagram, list); ++ ++ if (hdr->common.lf == ETH1394_HDR_LF_FF) { ++ pd->ether_type = ether_type; ++ } ++ ++ if (is_datagram_complete(lh, dg_size)) { ++ ether_type = pd->ether_type; ++ pdg->sz--; ++ //skb = skb_get(pd->skb); ++ skb = pd->skb; ++ purge_partial_datagram(lh); ++ rtdm_lock_put_irqrestore(&pdg->lock, context); ++ } else { ++ /* Datagram is not complete, we're done for the ++ * moment. */ ++ rtdm_lock_put_irqrestore(&pdg->lock, context); ++ return 0; ++ } ++ } /* unframgented datagram or fragmented one */ ++ ++ /* Write metadata, and then pass to the receive level */ ++ skb->ip_summed = CHECKSUM_UNNECESSARY; /* don't check it */ ++ ++ /* Parse the encapsulation header. This actually does the job of ++ * converting to an ethernet frame header, aswell as arp ++ * conversion if needed. ARP conversion is easier in this ++ * direction, since we are using ethernet as our backend. */ ++ skb->protocol = eth1394_parse_encap(skb, dev, srcid, destid, ++ ether_type); ++ ++ ++ rtdm_lock_get_irqsave(&priv->lock, context); ++ if (!skb->protocol) { ++ DEBUG_PRINT("pointer to %s(%s)%d\n",__FILE__,__FUNCTION__,__LINE__); ++ priv->stats.rx_errors++; ++ priv->stats.rx_dropped++; ++ //dev_kfree_skb_any(skb); ++ kfree_rtskb(skb); ++ goto bad_proto; ++ } ++ ++ skb->time_stamp = time_stamp; ++ /*if (netif_rx(skb) == NET_RX_DROP) { ++ priv->stats.rx_errors++; ++ priv->stats.rx_dropped++; ++ goto bad_proto; ++ }*/ ++ rtnetif_rx(skb);//finally, we deliver the packet ++ ++ /* Statistics */ ++ priv->stats.rx_packets++; ++ priv->stats.rx_bytes += skb->len; ++ rt_mark_stack_mgr(dev); ++ ++bad_proto: ++ if (rtnetif_queue_stopped(dev)) ++ rtnetif_wake_queue(dev); ++ rtdm_lock_put_irqrestore(&priv->lock, context); ++ ++ //dev->last_rx = jiffies; ++ ++ return 0; ++} ++ ++ ++static int eth1394_write(struct hpsb_host *host, struct hpsb_packet *packet, unsigned int length) ++{ ++ struct host_info *hi = hpsb_get_hostinfo(ð1394_highlevel, host); ++ int ret; ++ ++ if (hi == NULL) { ++ ETH1394_PRINT_G(KERN_ERR, "Could not find net device for host %s\n", ++ host->driver->name); ++ return RCODE_ADDRESS_ERROR; ++ } ++ ++ //we need to parse the packet now ++ ret = eth1394_data_handler(hi->dev, packet->header[1]>>16, //source id ++ packet->header[0]>>16, //dest id ++ (char *)packet->data, //data ++ packet->data_size, packet->time_stamp); ++ //we only get the request packet, serve it, but dont free it, since it does not belong to us!!!! ++ ++ if(ret) ++ return RCODE_ADDRESS_ERROR; ++ else ++ return RCODE_COMPLETE; ++} ++ ++ ++/** ++ * callback function for broadcast channel ++ * called from hpsb_iso_wake( ) ++ */ ++static void eth1394_iso(struct hpsb_iso *iso, void *arg) ++{ ++ quadlet_t *data; ++ char *buf; ++ struct rtnet_device *dev; ++ unsigned int len; ++ u32 specifier_id; ++ u16 source_id; ++ int i; ++ int nready; ++ ++ struct host_info *hi = hpsb_get_hostinfo(ð1394_highlevel, iso->host); ++ if (hi == NULL) { ++ ETH1394_PRINT_G(KERN_ERR, "Could not find net device for host %s\n", ++ iso->host->driver->name); ++ return; ++ } ++ ++ dev = hi->dev; ++ ++ nready = hpsb_iso_n_ready(iso); ++ for (i = 0; i < nready; i++) { ++ struct hpsb_iso_packet_info *info = ++ &iso->infos[(iso->first_packet + i) % iso->buf_packets]; ++ data = (quadlet_t*) (iso->data_buf.kvirt + info->offset); ++ ++ /* skip over GASP header */ ++ buf = (char *)data + 8; ++ len = info->len - 8; ++ ++ specifier_id = (((be32_to_cpu(data[0]) & 0xffff) << 8) | ++ ((be32_to_cpu(data[1]) & 0xff000000) >> 24)); ++ source_id = be32_to_cpu(data[0]) >> 16; ++ ++ if (info->channel != (iso->host->csr.broadcast_channel & 0x3f) || ++ specifier_id != ETHER1394_GASP_SPECIFIER_ID) { ++ /* This packet is not for us */ ++ continue; ++ } ++ eth1394_data_handler(dev, source_id, LOCAL_BUS | ALL_NODES, ++ buf, len, rtdm_clock_read()); ++ } ++ ++ hpsb_iso_recv_release_packets(iso, i); ++ ++ //dev->last_rx = jiffies; ++} ++ ++/****************************************** ++ * Datagram transmission code ++ ******************************************/ ++ ++/* Convert a standard ARP packet to 1394 ARP. The first 8 bytes (the entire ++ * arphdr) is the same format as the ip1394 header, so they overlap. The rest ++ * needs to be munged a bit. The remainder of the arphdr is formatted based ++ * on hwaddr len and ipaddr len. We know what they'll be, so it's easy to ++ * judge. ++ * ++ * Now that the EUI is used for the hardware address all we need to do to make ++ * this work for 1394 is to insert 2 quadlets that contain max_rec size, ++ * speed, and unicast FIFO address information between the sender_unique_id ++ * and the IP addresses. ++ */ ++ ++//we dont need the EUI id now. fifo_hi should contain the bus id and node id. ++//fifo_lo should contain the highest 32 bits of in-node address. ++static inline void eth1394_arp_to_1394arp(struct rtskb *skb, ++ struct rtnet_device *dev) ++{ ++ struct eth1394_priv *priv = (struct eth1394_priv *)(dev->priv); ++ u16 phy_id = NODEID_TO_NODE(priv->host->node_id); ++ ++ struct arphdr *arp = (struct arphdr *)skb->data; ++ unsigned char *arp_ptr = (unsigned char *)(arp + 1); ++ struct eth1394_arp *arp1394 = (struct eth1394_arp *)skb->data; ++ ++ arp1394->hw_addr_len = 6; ++ arp1394->sip = *(u32*)(arp_ptr + ETH_ALEN); ++ arp1394->max_rec = priv->host->csr.max_rec; ++ arp1394->sspd = priv->sspd[phy_id]; ++ ++ return; ++} ++ ++/* We need to encapsulate the standard header with our own. We use the ++ * ethernet header's proto for our own. */ ++static inline unsigned int eth1394_encapsulate_prep(unsigned int max_payload, ++ int proto, ++ union eth1394_hdr *hdr, ++ u16 dg_size, u16 dgl) ++{ ++ unsigned int adj_max_payload = max_payload - hdr_type_len[ETH1394_HDR_LF_UF]; ++ ++ /* Does it all fit in one packet? */ ++ if (dg_size <= adj_max_payload) { ++ hdr->uf.lf = ETH1394_HDR_LF_UF; ++ hdr->uf.ether_type = proto; ++ } else { ++ hdr->ff.lf = ETH1394_HDR_LF_FF; ++ hdr->ff.ether_type = proto; ++ hdr->ff.dg_size = dg_size - 1; ++ hdr->ff.dgl = dgl; ++ adj_max_payload = max_payload - hdr_type_len[ETH1394_HDR_LF_FF]; ++ } ++ return((dg_size + (adj_max_payload - 1)) / adj_max_payload); ++} ++ ++static inline unsigned int eth1394_encapsulate(struct rtskb *skb, ++ unsigned int max_payload, ++ union eth1394_hdr *hdr) ++{ ++ union eth1394_hdr *bufhdr; ++ int ftype = hdr->common.lf; ++ int hdrsz = hdr_type_len[ftype]; ++ unsigned int adj_max_payload = max_payload - hdrsz; ++ ++ switch(ftype) { ++ case ETH1394_HDR_LF_UF: ++ bufhdr = (union eth1394_hdr *)rtskb_push(skb, hdrsz); ++ bufhdr->words.word1 = htons(hdr->words.word1); ++ bufhdr->words.word2 = hdr->words.word2; ++ break; ++ ++ case ETH1394_HDR_LF_FF: ++ bufhdr = (union eth1394_hdr *)rtskb_push(skb, hdrsz); ++ bufhdr->words.word1 = htons(hdr->words.word1); ++ bufhdr->words.word2 = hdr->words.word2; ++ bufhdr->words.word3 = htons(hdr->words.word3); ++ bufhdr->words.word4 = 0; ++ ++ /* Set frag type here for future interior fragments */ ++ hdr->common.lf = ETH1394_HDR_LF_IF; ++ hdr->sf.fg_off = 0; ++ break; ++ ++ default: ++ hdr->sf.fg_off += adj_max_payload; ++ bufhdr = (union eth1394_hdr *)rtskb_pull(skb, adj_max_payload); ++ if (max_payload >= skb->len) ++ hdr->common.lf = ETH1394_HDR_LF_LF; ++ bufhdr->words.word1 = htons(hdr->words.word1); ++ bufhdr->words.word2 = htons(hdr->words.word2); ++ bufhdr->words.word3 = htons(hdr->words.word3); ++ bufhdr->words.word4 = 0; ++ } ++ ++ return min(max_payload, skb->len); ++} ++ ++//just allocate a hpsb_packet header, without payload. ++static inline struct hpsb_packet *eth1394_alloc_common_packet(struct hpsb_host *host, unsigned int priority) ++{ ++ struct hpsb_packet *p; ++ ++ p = hpsb_alloc_packet(0,&host->pool, priority); ++ if (p) { ++ p->host = host; ++ p->data = NULL; ++ p->generation = get_hpsb_generation(host); ++ p->type = hpsb_async; ++ } ++ return p; ++} ++ ++//prepare an asynchronous write packet ++static inline int eth1394_prep_write_packet(struct hpsb_packet *p, ++ struct hpsb_host *host, ++ nodeid_t node, u64 addr, ++ void * data, int tx_len) ++{ ++ p->node_id = node; ++ ++ p->tcode = TCODE_WRITEB; ++ ++ p->header[1] = (host->node_id << 16) | (addr >> 32); ++ p->header[2] = addr & 0xffffffff; ++ ++ p->header_size = 16; ++ p->expect_response = 1; ++ ++ if (hpsb_get_tlabel(p)) { ++ ETH1394_PRINT_G(KERN_ERR, "No more tlabels left while sending " ++ "to node " NODE_BUS_FMT "\n", NODE_BUS_ARGS(host, node)); ++ return -1; ++ } ++ p->header[0] = (p->node_id << 16) | (p->tlabel << 10) ++ | (1 << 8) | (TCODE_WRITEB << 4); ++ ++ p->header[3] = tx_len << 16; ++ p->data_size = tx_len + (tx_len % 4 ? 4 - (tx_len % 4) : 0); ++ p->data = (quadlet_t*)data; ++ ++ return 0; ++} ++ ++//prepare gasp packet from skb. ++static inline void eth1394_prep_gasp_packet(struct hpsb_packet *p, ++ struct eth1394_priv *priv, ++ struct rtskb *skb, int length) ++{ ++ p->header_size = 4; ++ p->tcode = TCODE_STREAM_DATA; ++ ++ p->header[0] = (length << 16) | (3 << 14) ++ | ((priv->broadcast_channel) << 8) ++ | (TCODE_STREAM_DATA << 4); ++ p->data_size = length; ++ p->data = ((quadlet_t*)skb->data) - 2; //we need 64bits for extra spec_id and gasp version. ++ p->data[0] = cpu_to_be32((priv->host->node_id << 16) | ++ ETHER1394_GASP_SPECIFIER_ID_HI); ++ p->data[1] = cpu_to_be32((ETHER1394_GASP_SPECIFIER_ID_LO << 24) | ++ ETHER1394_GASP_VERSION); ++ ++ /* Setting the node id to ALL_NODES (not LOCAL_BUS | ALL_NODES) ++ * prevents hpsb_send_packet() from setting the speed to an arbitrary ++ * value based on packet->node_id if packet->node_id is not set. */ ++ p->node_id = ALL_NODES; ++ p->speed_code = priv->sspd[ALL_NODES]; ++} ++ ++ ++static inline void eth1394_free_packet(struct hpsb_packet *packet) ++{ ++ if (packet->tcode != TCODE_STREAM_DATA) ++ hpsb_free_tlabel(packet); ++ hpsb_free_packet(packet); ++} ++ ++static void eth1394_complete_cb(struct hpsb_packet *packet, void *__ptask); ++ ++ ++/** ++ * this function does the real calling of hpsb_send_packet ++ *But before that, it also constructs the FireWire packet according to ++ * ptask ++ */ ++static int eth1394_send_packet(struct packet_task *ptask, unsigned int tx_len, nanosecs_abs_t *xmit_stamp) ++{ ++ struct eth1394_priv *priv = ptask->priv; ++ struct hpsb_packet *packet = NULL; ++ int ret; ++ ++ packet = eth1394_alloc_common_packet(priv->host, ptask->priority); ++ if (!packet) { ++ ret = -ENOMEM; ++ return ret; ++ } ++ if(xmit_stamp) ++ packet->xmit_stamp = xmit_stamp; ++ ++ if (ptask->tx_type == ETH1394_GASP) { ++ int length = tx_len + (2 * sizeof(quadlet_t)); //for the extra gasp overhead ++ ++ eth1394_prep_gasp_packet(packet, priv, ptask->skb, length); ++ } else if (eth1394_prep_write_packet(packet, priv->host, ++ ptask->dest_node, ++ ptask->addr, ptask->skb->data, ++ tx_len)) { ++ hpsb_free_packet(packet); ++ return -1; ++ } ++ ++ ptask->packet = packet; ++ hpsb_set_packet_complete_task(ptask->packet, eth1394_complete_cb, ++ ptask); ++ ++ ret = hpsb_send_packet(packet); ++ if (ret != 0) { ++ eth1394_free_packet(packet); ++ } ++ ++ return ret; ++} ++ ++ ++/* Task function to be run when a datagram transmission is completed */ ++static inline void eth1394_dg_complete(struct packet_task *ptask, int fail) ++{ ++ struct rtskb *skb = ptask->skb; ++ struct rtnet_device *dev = skb->rtdev; ++ struct eth1394_priv *priv = (struct eth1394_priv *)dev->priv; ++ rtdm_lockctx_t context; ++ ++ /* Statistics */ ++ rtdm_lock_get_irqsave(&priv->lock, context); ++ if (fail) { ++ priv->stats.tx_dropped++; ++ priv->stats.tx_errors++; ++ } else { ++ priv->stats.tx_bytes += skb->len; ++ priv->stats.tx_packets++; ++ } ++ rtdm_lock_put_irqrestore(&priv->lock, context); ++ ++ //dev_kfree_skb_any(skb); ++ kfree_rtskb(skb); ++ //~ kmem_cache_free(packet_task_cache, ptask); ++ //this means this ptask structure has been freed ++ ptask->packet=NULL; ++} ++ ++ ++/* Callback for when a packet has been sent and the status of that packet is ++ * known */ ++static void eth1394_complete_cb(struct hpsb_packet *packet, void *__ptask) ++{ ++ struct packet_task *ptask = (struct packet_task *)__ptask; ++ int fail = 0; ++ ++ if (packet->tcode != TCODE_STREAM_DATA) ++ fail = hpsb_packet_success(packet); ++ ++ //we have no rights to free packet, since it belongs to RT-FireWire kernel. ++ //~ eth1394_free_packet(packet); ++ ++ ptask->outstanding_pkts--; ++ if (ptask->outstanding_pkts > 0 && !fail) ++ { ++ int tx_len; ++ ++ /* Add the encapsulation header to the fragment */ ++ tx_len = eth1394_encapsulate(ptask->skb, ptask->max_payload, ++ &ptask->hdr); ++ if (eth1394_send_packet(ptask, tx_len, NULL)) ++ eth1394_dg_complete(ptask, 1); ++ } else { ++ eth1394_dg_complete(ptask, fail); ++ } ++} ++ ++ ++ ++/** ++ *Transmit a packet (called by kernel) ++ * this is the dev->hard_start_transmit ++ */ ++static int eth1394_tx (struct rtskb *skb, struct rtnet_device *dev) ++{ ++ ++ struct ethhdr *eth; ++ struct eth1394_priv *priv = (struct eth1394_priv *)dev->priv; ++ int proto; ++ rtdm_lockctx_t context; ++ nodeid_t dest_node; ++ eth1394_tx_type tx_type; ++ int ret = 0; ++ unsigned int tx_len; ++ unsigned int max_payload; ++ u16 dg_size; ++ u16 dgl; ++ ++ //we try to find the available ptask struct, if failed, we can not send packet ++ struct packet_task *ptask = NULL; ++ int i; ++ for(i=0;i<20;i++){ ++ if(priv->ptask_list[i].packet == NULL){ ++ ptask = &priv->ptask_list[i]; ++ break; ++ } ++ } ++ if(ptask == NULL) ++ return -EBUSY; ++ ++ rtdm_lock_get_irqsave(&priv->lock, context); ++ if (priv->bc_state == ETHER1394_BC_CLOSED) { ++ ETH1394_PRINT(KERN_ERR, dev->name, ++ "Cannot send packet, no broadcast channel available.\n"); ++ ret = -EAGAIN; ++ rtdm_lock_put_irqrestore(&priv->lock, context); ++ goto fail; ++ } ++ if ((ret = eth1394_init_bc(dev))) { ++ rtdm_lock_put_irqrestore(&priv->lock, context); ++ goto fail; ++ } ++ rtdm_lock_put_irqrestore(&priv->lock, context); ++ //if ((skb = skb_share_check (skb, kmflags)) == NULL) { ++ // ret = -ENOMEM; ++ // goto fail; ++ //} ++ ++ /* Get rid of the fake eth1394 header, but save a pointer */ ++ eth = (struct ethhdr*)skb->data; ++ rtskb_pull(skb, ETH_HLEN); ++ //dont get rid of the fake eth1394 header, since we need it on the receiving side ++ //eth = (struct ethhdr*)skb->data; ++ ++ //~ //find the node id via our fake MAC address ++ //~ ne = hpsb_guid_get_entry(be64_to_cpu(*(u64*)eth->h_dest)); ++ //~ if (!ne) ++ //~ dest_node = LOCAL_BUS | ALL_NODES; ++ //~ else ++ //~ dest_node = ne->nodeid; ++ //now it is much easier ++ dest_node = *(u16*)eth->h_dest; ++ if(dest_node != 0xffff) ++ DEBUGP("%s: dest_node is %x\n", __FUNCTION__, dest_node); ++ ++ proto = eth->h_proto; ++ ++ /* If this is an ARP packet, convert it */ ++ if (proto == __constant_htons (ETH_P_ARP)) ++ eth1394_arp_to_1394arp (skb, dev); ++ ++ max_payload = priv->maxpayload[NODEID_TO_NODE(dest_node)]; ++ DEBUGP("%s: max_payload is %d\n", __FUNCTION__, max_payload); ++ ++ /* This check should be unnecessary, but we'll keep it for safety for ++ * a while longer. */ ++ if (max_payload < 512) { ++ DEBUGP("max_payload too small: %d (setting to 512)\n", ++ max_payload); ++ max_payload = 512; ++ } ++ ++ /* Set the transmission type for the packet. ARP packets and IP ++ * broadcast packets are sent via GASP. */ ++ if (memcmp(eth->h_dest, dev->broadcast, sizeof(nodeid_t)) == 0 || ++ proto == __constant_htons(ETH_P_ARP) || ++ (proto == __constant_htons(ETH_P_IP) && ++ IN_MULTICAST(__constant_ntohl(skb->nh.iph->daddr)))) { ++ tx_type = ETH1394_GASP; ++ max_payload -= ETHER1394_GASP_OVERHEAD; //we have extra overhead for gasp packet ++ } else { ++ tx_type = ETH1394_WRREQ; ++ } ++ ++ dg_size = skb->len; ++ ++ rtdm_lock_get_irqsave(&priv->lock, context); ++ dgl = priv->dgl[NODEID_TO_NODE(dest_node)]; ++ if (max_payload < dg_size + hdr_type_len[ETH1394_HDR_LF_UF]) ++ priv->dgl[NODEID_TO_NODE(dest_node)]++; ++ rtdm_lock_put_irqrestore(&priv->lock, context); ++ ++ ptask->hdr.words.word1 = 0; ++ ptask->hdr.words.word2 = 0; ++ ptask->hdr.words.word3 = 0; ++ ptask->hdr.words.word4 = 0; ++ ptask->skb = skb; ++ ptask->priv = priv; ++ ptask->tx_type = tx_type; ++ ++ if (tx_type != ETH1394_GASP) { ++ u64 addr; ++ ++ /* This test is just temporary until ConfigROM support has ++ * been added to eth1394. Until then, we need an ARP packet ++ * after a bus reset from the current destination node so that ++ * we can get FIFO information. */ ++ //~ if (priv->fifo[NODEID_TO_NODE(dest_node)] == 0ULL) { ++ //~ ret = -EAGAIN; ++ //~ goto fail; ++ //~ } ++ ++ //~ rtos_spin_lock_irqsave(&priv->lock, flags); ++ //~ addr = priv->fifo[NODEID_TO_NODE(dest_node)]; ++ addr = ETHER1394_REGION_ADDR; ++ //~ rtos_spin_unlock_irqrestore(&priv->lock, flags); ++ ++ ptask->addr = addr; ++ ptask->dest_node = dest_node; ++ } ++ ++ ptask->tx_type = tx_type; ++ ptask->max_payload = max_payload; ++ ptask->outstanding_pkts = eth1394_encapsulate_prep(max_payload, proto, ++ &ptask->hdr, dg_size, ++ dgl); ++ ++ /* Add the encapsulation header to the fragment */ ++ tx_len = eth1394_encapsulate(skb, max_payload, &ptask->hdr); ++ //dev->trans_start = jiffies; ++ //~ if(skb->xmit_stamp) ++ //~ *skb->xmit_stamp = cpu_to_be64(rtos_get_time() + *skb->xmit_stamp); ++ ++ ++ if (eth1394_send_packet(ptask, tx_len, skb->xmit_stamp)) ++ goto fail; ++ ++ rtnetif_wake_queue(dev); ++ return 0; ++fail: ++ if (ptask!=NULL){ ++ //~ kmem_cache_free(packet_task_cache, ptask); ++ ptask->packet=NULL; ++ ptask=NULL; ++ } ++ ++ if (skb != NULL) ++ dev_kfree_rtskb(skb); ++ ++ rtdm_lock_get_irqsave(&priv->lock, context); ++ priv->stats.tx_dropped++; ++ priv->stats.tx_errors++; ++ rtdm_lock_put_irqrestore(&priv->lock, context); ++ ++ if (rtnetif_queue_stopped(dev)) ++ rtnetif_wake_queue(dev); ++ ++ return 0; /* returning non-zero causes serious problems */ ++} ++ ++static int eth1394_init(void) ++{ ++ hpsb_register_highlevel(ð1394_highlevel); ++ ++ return 0; ++} ++ ++static void eth1394_exit(void) ++{ ++ hpsb_unregister_highlevel(ð1394_highlevel); ++} ++ ++module_init(eth1394_init); ++module_exit(eth1394_exit); ++ ++MODULE_LICENSE("GPL"); +--- linux/drivers/xenomai/net/addons/proxy.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/addons/proxy.c 2021-04-07 16:01:27.149634278 +0800 +@@ -0,0 +1,442 @@ ++/* rtnetproxy.c: a Linux network driver that uses the RTnet driver to ++ * transport IP data from/to Linux kernel mode. ++ * This allows the usage of TCP/IP from linux space using via the RTNET ++ * network adapter. ++ * ++ * ++ * Usage: ++ * ++ * insmod rtnetproxy.o (only after having rtnet up and running) ++ * ++ * ifconfig rtproxy up IP_ADDRESS netmask NETMASK ++ * ++ * Use it like any other network device from linux. ++ * ++ * Restrictions: ++ * Only IPV4 based protocols are supported, UDP and ICMP can be send out ++ * but not received - as these are handled directly by rtnet! ++ * ++ * ++ * ++ * Based on the linux net driver dummy.c by Nick Holloway ++ * ++ * ++ * Changelog: ++ * ++ * 08-Nov-2002 Mathias Koehrer - Clear separation between rtai context and ++ * standard linux driver context. ++ * Data exchange via ringbuffers. ++ * A RTAI thread is used for rtnet transmission. ++ * ++ * 05-Nov-2002 Mathias Koehrer - Initial version! ++ * Development based on rtnet 0.2.6, ++ * rtai-24.1.10, kernel 2.4.19 ++ * ++ * ++ * Mathias Koehrer - mathias_koehrer@yahoo.de ++*/ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include /* For the statistics structure. */ ++#include /* For ARPHRD_ETHER */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++static struct net_device *dev_rtnetproxy; ++ ++/* ************************************************************************** ++ * SKB pool management (JK): ++ * ************************************************************************ */ ++#define DEFAULT_PROXY_RTSKBS 32 ++ ++static unsigned int proxy_rtskbs = DEFAULT_PROXY_RTSKBS; ++module_param(proxy_rtskbs, uint, 0444); ++MODULE_PARM_DESC(proxy_rtskbs, ++ "Number of realtime socket buffers in proxy pool"); ++ ++static struct rtskb_pool rtskb_pool; ++ ++static struct rtskb_queue tx_queue; ++static struct rtskb_queue rx_queue; ++ ++/* handle for non-real-time signal */ ++static rtdm_nrtsig_t rtnetproxy_rx_signal; ++ ++/* Thread for transmission */ ++static rtdm_task_t rtnetproxy_tx_task; ++ ++static rtdm_event_t rtnetproxy_tx_event; ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_ADDON_PROXY_ARP ++static char *rtdev_attach = "rteth0"; ++module_param(rtdev_attach, charp, 0444); ++MODULE_PARM_DESC(rtdev_attach, "Attach to the specified RTnet device"); ++ ++struct rtnet_device *rtnetproxy_rtdev; ++#endif ++ ++/* ************************************************************************ ++ * ************************************************************************ ++ * T R A N S M I T ++ * ************************************************************************ ++ * ************************************************************************ */ ++ ++static void rtnetproxy_tx_loop(void *arg) ++{ ++ struct rtnet_device *rtdev; ++ struct rtskb *rtskb; ++ ++ while (!rtdm_task_should_stop()) { ++ if (rtdm_event_wait(&rtnetproxy_tx_event) < 0) ++ break; ++ ++ while ((rtskb = rtskb_dequeue(&tx_queue)) != NULL) { ++ rtdev = rtskb->rtdev; ++ rtdev_xmit_proxy(rtskb); ++ rtdev_dereference(rtdev); ++ } ++ } ++} ++ ++/* ************************************************************************ ++ * hard_xmit ++ * ++ * This function runs in linux kernel context and is executed whenever ++ * there is a frame to be sent out. ++ * ************************************************************************ */ ++static int rtnetproxy_xmit(struct sk_buff *skb, struct net_device *dev) ++{ ++ struct ethhdr *eth = (struct ethhdr *)skb->data; ++ struct rtskb *rtskb; ++ int len = skb->len; ++#ifndef CONFIG_XENO_DRIVERS_NET_ADDON_PROXY_ARP ++ struct dest_route rt; ++ struct iphdr *iph; ++ u32 saddr, daddr; ++#endif ++ ++ switch (ntohs(eth->h_proto)) { ++ case ETH_P_IP: ++ if (len < sizeof(struct ethhdr) + sizeof(struct iphdr)) ++ goto drop1; ++#ifdef CONFIG_XENO_DRIVERS_NET_ADDON_PROXY_ARP ++ case ETH_P_ARP: ++#endif ++ break; ++ default: ++ drop1: ++ dev->stats.tx_dropped++; ++ dev_kfree_skb(skb); ++ return NETDEV_TX_OK; ++ } ++ ++ rtskb = alloc_rtskb(len, &rtskb_pool); ++ if (!rtskb) ++ return NETDEV_TX_BUSY; ++ ++ memcpy(rtskb_put(rtskb, len), skb->data, len); ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_ADDON_PROXY_ARP ++ dev_kfree_skb(skb); ++ ++ rtskb->rtdev = rtnetproxy_rtdev; ++ if (rtdev_reference(rtnetproxy_rtdev) == 0) { ++ dev->stats.tx_dropped++; ++ kfree_rtskb(rtskb); ++ return NETDEV_TX_BUSY; ++ } ++ ++#else /* !CONFIG_XENO_DRIVERS_NET_ADDON_PROXY_ARP */ ++ iph = (struct iphdr *)(skb->data + sizeof(struct ethhdr)); ++ saddr = iph->saddr; ++ daddr = iph->daddr; ++ ++ dev_kfree_skb(skb); ++ ++ if (rt_ip_route_output(&rt, daddr, INADDR_ANY) < 0) { ++ drop2: ++ dev->stats.tx_dropped++; ++ kfree_rtskb(rtskb); ++ return NETDEV_TX_OK; ++ } ++ if (rt.rtdev->local_ip != saddr) { ++ rtdev_dereference(rt.rtdev); ++ goto drop2; ++ } ++ ++ eth = (struct ethhdr *)rtskb->data; ++ memcpy(eth->h_source, rt.rtdev->dev_addr, rt.rtdev->addr_len); ++ memcpy(eth->h_dest, rt.dev_addr, rt.rtdev->addr_len); ++ ++ rtskb->rtdev = rt.rtdev; ++#endif /* CONFIG_XENO_DRIVERS_NET_ADDON_PROXY_ARP */ ++ ++ dev->stats.tx_packets++; ++ dev->stats.tx_bytes += len; ++ ++ rtskb_queue_tail(&tx_queue, rtskb); ++ rtdm_event_signal(&rtnetproxy_tx_event); ++ ++ return NETDEV_TX_OK; ++} ++ ++/* ************************************************************************ ++ * ************************************************************************ ++ * R E C E I V E ++ * ************************************************************************ ++ * ************************************************************************ */ ++ ++/* ************************************************************************ ++ * This function runs in real-time context. ++ * ++ * It is called from inside rtnet whenever a packet has been received that ++ * has to be processed by rtnetproxy. ++ * ************************************************************************ */ ++static void rtnetproxy_recv(struct rtskb *rtskb) ++{ ++ /* Acquire rtskb (JK) */ ++ if (rtskb_acquire(rtskb, &rtskb_pool) != 0) { ++ dev_rtnetproxy->stats.rx_dropped++; ++ rtdm_printk("rtnetproxy_recv: No free rtskb in pool\n"); ++ kfree_rtskb(rtskb); ++ return; ++ } ++ ++ if (rtskb_queue_tail_check(&rx_queue, rtskb)) ++ rtdm_nrtsig_pend(&rtnetproxy_rx_signal); ++} ++ ++/* ************************************************************************ ++ * This function runs in kernel mode. ++ * It is activated from rtnetproxy_signal_handler whenever rtnet received a ++ * frame to be processed by rtnetproxy. ++ * ************************************************************************ */ ++static inline void rtnetproxy_kernel_recv(struct rtskb *rtskb) ++{ ++ struct sk_buff *skb; ++ struct net_device *dev = dev_rtnetproxy; ++ ++ int header_len = rtskb->rtdev->hard_header_len; ++ int len = rtskb->len + header_len; ++ ++ /* Copy the realtime skb (rtskb) to the standard skb: */ ++ skb = dev_alloc_skb(len + 2); ++ skb_reserve(skb, 2); ++ ++ memcpy(skb_put(skb, len), rtskb->data - header_len, len); ++ ++ /* Set some relevant entries in the skb: */ ++ skb->protocol = eth_type_trans(skb, dev); ++ skb->dev = dev; ++ skb->ip_summed = CHECKSUM_UNNECESSARY; ++ skb->pkt_type = PACKET_HOST; /* Extremely important! Why?!? */ ++ ++ /* the rtskb stamp is useless (different clock), get new one */ ++ __net_timestamp(skb); ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) ++ dev->last_rx = jiffies; ++#endif ++ dev->stats.rx_bytes += skb->len; ++ dev->stats.rx_packets++; ++ ++ netif_rx(skb); /* pass it to the received stuff */ ++} ++ ++/* ************************************************************************ ++ * This function runs in kernel mode. ++ * It is activated from rtnetproxy_recv whenever rtnet received a frame to ++ * be processed by rtnetproxy. ++ * ************************************************************************ */ ++static void rtnetproxy_signal_handler(rtdm_nrtsig_t *nrtsig, void *arg) ++{ ++ struct rtskb *rtskb; ++ ++ while ((rtskb = rtskb_dequeue(&rx_queue)) != NULL) { ++ rtnetproxy_kernel_recv(rtskb); ++ kfree_rtskb(rtskb); ++ } ++} ++ ++/* ************************************************************************ ++ * ************************************************************************ ++ * G E N E R A L ++ * ************************************************************************ ++ * ************************************************************************ */ ++ ++static void fake_multicast_support(struct net_device *dev) ++{ ++} ++ ++#ifdef CONFIG_NET_FASTROUTE ++static int rtnetproxy_accept_fastpath(struct net_device *dev, ++ struct dst_entry *dst) ++{ ++ return -1; ++} ++#endif ++ ++static int rtnetproxy_open(struct net_device *dev) ++{ ++ int err = try_module_get(THIS_MODULE); ++ if (err == 0) ++ return -EIDRM; ++ ++ return 0; ++} ++ ++static int rtnetproxy_stop(struct net_device *dev) ++{ ++ module_put(THIS_MODULE); ++ return 0; ++} ++ ++static const struct net_device_ops rtnetproxy_netdev_ops = { ++ .ndo_open = rtnetproxy_open, ++ .ndo_stop = rtnetproxy_stop, ++ .ndo_start_xmit = rtnetproxy_xmit, ++ .ndo_set_rx_mode = fake_multicast_support, ++}; ++ ++/* ************************************************************************ ++ * device init ++ * ************************************************************************ */ ++static void __init rtnetproxy_init(struct net_device *dev) ++{ ++ /* Fill in device structure with ethernet-generic values. */ ++ ether_setup(dev); ++ ++ dev->tx_queue_len = 0; ++#ifdef CONFIG_XENO_DRIVERS_NET_ADDON_PROXY_ARP ++ memcpy(dev->dev_addr, rtnetproxy_rtdev->dev_addr, MAX_ADDR_LEN); ++#else ++ dev->flags |= IFF_NOARP; ++#endif ++ dev->flags &= ~IFF_MULTICAST; ++ ++ dev->netdev_ops = &rtnetproxy_netdev_ops; ++} ++ ++/* ************************************************************************ ++ * ************************************************************************ ++ * I N I T ++ * ************************************************************************ ++ * ************************************************************************ */ ++static int __init rtnetproxy_init_module(void) ++{ ++ int err; ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_ADDON_PROXY_ARP ++ if ((rtnetproxy_rtdev = rtdev_get_by_name(rtdev_attach)) == NULL) { ++ printk("Couldn't attach to %s\n", rtdev_attach); ++ return -EINVAL; ++ } ++ printk("RTproxy attached to %s\n", rtdev_attach); ++#endif ++ ++ /* Initialize the proxy's rtskb pool (JK) */ ++ if (rtskb_module_pool_init(&rtskb_pool, proxy_rtskbs) < proxy_rtskbs) { ++ err = -ENOMEM; ++ goto err1; ++ } ++ ++ dev_rtnetproxy = ++ alloc_netdev(0, "rtproxy", NET_NAME_UNKNOWN, rtnetproxy_init); ++ if (!dev_rtnetproxy) { ++ err = -ENOMEM; ++ goto err1; ++ } ++ ++ rtdm_nrtsig_init(&rtnetproxy_rx_signal, rtnetproxy_signal_handler, ++ NULL); ++ ++ rtskb_queue_init(&tx_queue); ++ rtskb_queue_init(&rx_queue); ++ ++ err = register_netdev(dev_rtnetproxy); ++ if (err < 0) ++ goto err3; ++ ++ /* Init the task for transmission */ ++ rtdm_event_init(&rtnetproxy_tx_event, 0); ++ err = rtdm_task_init(&rtnetproxy_tx_task, "rtnetproxy", ++ rtnetproxy_tx_loop, 0, RTDM_TASK_LOWEST_PRIORITY, ++ 0); ++ if (err) ++ goto err4; ++ ++ /* Register with RTnet */ ++ rt_ip_fallback_handler = rtnetproxy_recv; ++ ++ printk("rtnetproxy installed as \"%s\"\n", dev_rtnetproxy->name); ++ ++ return 0; ++ ++err4: ++ unregister_netdev(dev_rtnetproxy); ++ ++err3: ++ rtdm_nrtsig_destroy(&rtnetproxy_rx_signal); ++ ++ free_netdev(dev_rtnetproxy); ++ ++err1: ++ rtskb_pool_release(&rtskb_pool); ++#ifdef CONFIG_XENO_DRIVERS_NET_ADDON_PROXY_ARP ++ rtdev_dereference(rtnetproxy_rtdev); ++#endif ++ return err; ++} ++ ++static void __exit rtnetproxy_cleanup_module(void) ++{ ++ struct rtskb *rtskb; ++ ++ /* Unregister the fallback at rtnet */ ++ rt_ip_fallback_handler = NULL; ++ ++ /* Unregister the net device: */ ++ unregister_netdev(dev_rtnetproxy); ++ free_netdev(dev_rtnetproxy); ++ ++ rtdm_event_destroy(&rtnetproxy_tx_event); ++ rtdm_task_destroy(&rtnetproxy_tx_task); ++ ++ /* free the non-real-time signal */ ++ rtdm_nrtsig_destroy(&rtnetproxy_rx_signal); ++ ++ while ((rtskb = rtskb_dequeue(&tx_queue)) != NULL) { ++ rtdev_dereference(rtskb->rtdev); ++ kfree_rtskb(rtskb); ++ } ++ ++ while ((rtskb = rtskb_dequeue(&rx_queue)) != NULL) { ++ kfree_rtskb(rtskb); ++ } ++ ++ rtskb_pool_release(&rtskb_pool); ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_ADDON_PROXY_ARP ++ rtdev_dereference(rtnetproxy_rtdev); ++#endif ++} ++ ++module_init(rtnetproxy_init_module); ++module_exit(rtnetproxy_cleanup_module); ++MODULE_LICENSE("GPL"); +--- linux/drivers/xenomai/net/addons/Makefile 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/addons/Makefile 2021-04-07 16:01:27.144634285 +0800 +@@ -0,0 +1,9 @@ ++ccflags-y += -Idrivers/xenomai/net/stack/include ++ ++obj-$(CONFIG_XENO_DRIVERS_NET_ADDON_RTCAP) += rtcap.o ++ ++rtcap-y := cap.o ++ ++obj-$(CONFIG_XENO_DRIVERS_NET_ADDON_PROXY) += rtnetproxy.o ++ ++rtnetproxy-y := proxy.o +--- linux/drivers/xenomai/net/addons/Kconfig 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/addons/Kconfig 2021-04-07 16:01:27.139634292 +0800 +@@ -0,0 +1,44 @@ ++menu "Add-Ons" ++ depends on XENO_DRIVERS_NET ++ ++config XENO_DRIVERS_NET_ADDON_RTCAP ++ depends on XENO_DRIVERS_NET && m ++ select ETHERNET ++ tristate "Real-Time Capturing Support" ++ default n ++ ---help--- ++ This feature allows to capture real-time packets traversing the RTnet ++ stack. It can both be used to sniff passively on a network (in this ++ case you may want to enable the promisc mode of your real-time NIC via ++ rtifconfig) and to log the traffic the node receives and transmits ++ during normal operation. RTcap consists of additional hooks in the ++ RTnet stack and a separate module as interface to standard network ++ analysis tools like Ethereal. ++ ++ For further information see Documentation/README.rtcap. ++ ++config XENO_DRIVERS_NET_ADDON_PROXY ++ depends on XENO_DRIVERS_NET_RTIPV4 && m ++ select ETHERNET ++ tristate "IP protocol proxy for Linux" ++ default n ++ ---help--- ++ Enables a forward-to-Linux module for all IP protocols that are not ++ handled by the IPv4 implemenation of RTnet (TCP, UDP, etc.). Only use ++ when you know what you are doing - it can easily break your real-time ++ requirements! ++ ++ See Documentation/README.rtnetproxy for further information. ++ ++config XENO_DRIVERS_NET_ADDON_PROXY_ARP ++ depends on XENO_DRIVERS_NET_ADDON_PROXY ++ bool "Enable ARP handling via protocol proxy" ++ default n ++ ---help--- ++ Enables ARP support for the IP protocol proxy. Incoming ARP replies ++ are then delivered to both, the RTnet and the Linux network stack, ++ but only answered by Linux. The IP protocol proxy gets attached to ++ the RTnet device specified by the module parameter "rtdev_attach", ++ rteth0 by default. ++ ++endmenu +--- linux/drivers/xenomai/net/addons/cap.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/addons/cap.c 2021-04-07 16:01:27.134634299 +0800 +@@ -0,0 +1,503 @@ ++/*** ++ * ++ * rtcap/rtcap.c ++ * ++ * Real-Time Capturing Interface ++ * ++ * Copyright (C) 2004, 2005 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include /* for netdev_priv() */ ++ ++MODULE_LICENSE("GPL"); ++ ++static unsigned int rtcap_rtskbs = 128; ++module_param(rtcap_rtskbs, uint, 0444); ++MODULE_PARM_DESC(rtcap_rtskbs, "Number of real-time socket buffers per " ++ "real-time device"); ++ ++#define TAP_DEV 1 ++#define RTMAC_TAP_DEV 2 ++#define XMIT_HOOK 4 ++ ++static rtdm_nrtsig_t cap_signal; ++static struct rtskb_queue cap_queue; ++static struct rtskb_pool cap_pool; ++ ++static struct tap_device_t { ++ struct net_device *tap_dev; ++ struct net_device *rtmac_tap_dev; ++ struct net_device_stats tap_dev_stats; ++ int present; ++ int (*orig_xmit)(struct rtskb *skb, struct rtnet_device *dev); ++} tap_device[MAX_RT_DEVICES]; ++ ++void rtcap_rx_hook(struct rtskb *rtskb) ++{ ++ bool trigger = false; ++ ++ if ((rtskb->cap_comp_skb = rtskb_pool_dequeue(&cap_pool)) == 0) { ++ tap_device[rtskb->rtdev->ifindex].tap_dev_stats.rx_dropped++; ++ return; ++ } ++ ++ if (cap_queue.first == NULL) { ++ cap_queue.first = rtskb; ++ trigger = true; ++ } else ++ cap_queue.last->cap_next = rtskb; ++ cap_queue.last = rtskb; ++ rtskb->cap_next = NULL; ++ ++ rtskb->cap_flags |= RTSKB_CAP_SHARED; ++ ++ if (trigger) ++ rtdm_nrtsig_pend(&cap_signal); ++} ++ ++int rtcap_xmit_hook(struct rtskb *rtskb, struct rtnet_device *rtdev) ++{ ++ struct tap_device_t *tap_dev = &tap_device[rtskb->rtdev->ifindex]; ++ rtdm_lockctx_t context; ++ bool trigger = false; ++ ++ if ((rtskb->cap_comp_skb = rtskb_pool_dequeue(&cap_pool)) == 0) { ++ tap_dev->tap_dev_stats.rx_dropped++; ++ return tap_dev->orig_xmit(rtskb, rtdev); ++ } ++ ++ rtskb->cap_next = NULL; ++ rtskb->cap_start = rtskb->data; ++ rtskb->cap_len = rtskb->len; ++ rtskb->cap_flags |= RTSKB_CAP_SHARED; ++ ++ rtskb->time_stamp = rtdm_clock_read(); ++ ++ rtdm_lock_get_irqsave(&rtcap_lock, context); ++ ++ if (cap_queue.first == NULL) { ++ cap_queue.first = rtskb; ++ trigger = true; ++ } else ++ cap_queue.last->cap_next = rtskb; ++ cap_queue.last = rtskb; ++ ++ rtdm_lock_put_irqrestore(&rtcap_lock, context); ++ ++ if (trigger) ++ rtdm_nrtsig_pend(&cap_signal); ++ ++ return tap_dev->orig_xmit(rtskb, rtdev); ++} ++ ++int rtcap_loopback_xmit_hook(struct rtskb *rtskb, struct rtnet_device *rtdev) ++{ ++ struct tap_device_t *tap_dev = &tap_device[rtskb->rtdev->ifindex]; ++ ++ rtskb->time_stamp = rtdm_clock_read(); ++ ++ return tap_dev->orig_xmit(rtskb, rtdev); ++} ++ ++void rtcap_kfree_rtskb(struct rtskb *rtskb) ++{ ++ rtdm_lockctx_t context; ++ struct rtskb *comp_skb; ++ ++ rtdm_lock_get_irqsave(&rtcap_lock, context); ++ ++ if (rtskb->cap_flags & RTSKB_CAP_SHARED) { ++ rtskb->cap_flags &= ~RTSKB_CAP_SHARED; ++ ++ comp_skb = rtskb->cap_comp_skb; ++ ++ rtdm_lock_put_irqrestore(&rtcap_lock, context); ++ ++ rtskb_pool_queue_tail(comp_skb->pool, comp_skb); ++ ++ return; ++ } ++ ++ rtdm_lock_put_irqrestore(&rtcap_lock, context); ++ ++ rtskb->chain_end = rtskb; ++ rtskb_pool_queue_tail(rtskb->pool, rtskb); ++} ++ ++static void convert_timestamp(nanosecs_abs_t timestamp, struct sk_buff *skb) ++{ ++#ifdef CONFIG_KTIME_SCALAR ++ skb->tstamp.tv64 = timestamp; ++#else /* !CONFIG_KTIME_SCALAR */ ++ unsigned long rem; ++ ++ rem = do_div(timestamp, NSEC_PER_SEC); ++ skb->tstamp = ktime_set((long)timestamp, rem); ++#endif /* !CONFIG_KTIME_SCALAR */ ++} ++ ++static void rtcap_signal_handler(rtdm_nrtsig_t *nrtsig, void *arg) ++{ ++ struct rtskb *rtskb; ++ struct sk_buff *skb; ++ struct sk_buff *rtmac_skb; ++ struct net_device_stats *stats; ++ int ifindex; ++ int active; ++ rtdm_lockctx_t context; ++ ++ while (1) { ++ rtdm_lock_get_irqsave(&rtcap_lock, context); ++ ++ if ((rtskb = cap_queue.first) == NULL) { ++ rtdm_lock_put_irqrestore(&rtcap_lock, context); ++ break; ++ } ++ ++ cap_queue.first = rtskb->cap_next; ++ ++ rtdm_lock_put_irqrestore(&rtcap_lock, context); ++ ++ ifindex = rtskb->rtdev->ifindex; ++ active = tap_device[ifindex].present; ++ ++ if (active) { ++ if ((tap_device[ifindex].tap_dev->flags & IFF_UP) == 0) ++ active &= ~TAP_DEV; ++ if (active & RTMAC_TAP_DEV && ++ !(tap_device[ifindex].rtmac_tap_dev->flags & ++ IFF_UP)) ++ active &= ~RTMAC_TAP_DEV; ++ } ++ ++ if (active == 0) { ++ tap_device[ifindex].tap_dev_stats.rx_dropped++; ++ rtcap_kfree_rtskb(rtskb); ++ continue; ++ } ++ ++ skb = dev_alloc_skb(rtskb->cap_len); ++ if (skb) { ++ memcpy(skb_put(skb, rtskb->cap_len), rtskb->cap_start, ++ rtskb->cap_len); ++ ++ if (active & TAP_DEV) { ++ skb->dev = tap_device[ifindex].tap_dev; ++ skb->protocol = eth_type_trans(skb, skb->dev); ++ convert_timestamp(rtskb->time_stamp, skb); ++ ++ rtmac_skb = NULL; ++ if ((rtskb->cap_flags & ++ RTSKB_CAP_RTMAC_STAMP) && ++ (active & RTMAC_TAP_DEV)) { ++ rtmac_skb = skb_clone(skb, GFP_ATOMIC); ++ if (rtmac_skb != NULL) ++ convert_timestamp( ++ rtskb->cap_rtmac_stamp, ++ rtmac_skb); ++ } ++ ++ rtcap_kfree_rtskb(rtskb); ++ ++ stats = &tap_device[ifindex].tap_dev_stats; ++ stats->rx_packets++; ++ stats->rx_bytes += skb->len; ++ ++ if (rtmac_skb != NULL) { ++ rtmac_skb->dev = tap_device[ifindex] ++ .rtmac_tap_dev; ++ netif_rx(rtmac_skb); ++ } ++ netif_rx(skb); ++ } else if (rtskb->cap_flags & RTSKB_CAP_RTMAC_STAMP) { ++ skb->dev = tap_device[ifindex].rtmac_tap_dev; ++ skb->protocol = eth_type_trans(skb, skb->dev); ++ convert_timestamp(rtskb->cap_rtmac_stamp, skb); ++ ++ rtcap_kfree_rtskb(rtskb); ++ ++ stats = &tap_device[ifindex].tap_dev_stats; ++ stats->rx_packets++; ++ stats->rx_bytes += skb->len; ++ ++ netif_rx(skb); ++ } else { ++ dev_kfree_skb(skb); ++ rtcap_kfree_rtskb(rtskb); ++ } ++ } else { ++ printk("RTcap: unable to allocate linux skb\n"); ++ rtcap_kfree_rtskb(rtskb); ++ } ++ } ++} ++ ++static int tap_dev_open(struct net_device *dev) ++{ ++ int err; ++ ++ err = try_module_get(THIS_MODULE); ++ if (err == 0) ++ return -EIDRM; ++ ++ memcpy(dev->dev_addr, ++ (*(struct rtnet_device **)netdev_priv(dev))->dev_addr, ++ MAX_ADDR_LEN); ++ ++ return 0; ++} ++ ++static int tap_dev_stop(struct net_device *dev) ++{ ++ module_put(THIS_MODULE); ++ return 0; ++} ++ ++static int tap_dev_xmit(struct sk_buff *skb, struct net_device *dev) ++{ ++ netif_stop_queue(dev); ++ return 1; ++} ++ ++static struct net_device_stats *tap_dev_get_stats(struct net_device *dev) ++{ ++ struct rtnet_device *rtdev = *(struct rtnet_device **)netdev_priv(dev); ++ ++ return &tap_device[rtdev->ifindex].tap_dev_stats; ++} ++ ++static int tap_dev_change_mtu(struct net_device *dev, int new_mtu) ++{ ++ return -EINVAL; ++} ++ ++static const struct net_device_ops tap_netdev_ops = { ++ .ndo_open = tap_dev_open, ++ .ndo_stop = tap_dev_stop, ++ .ndo_start_xmit = tap_dev_xmit, ++ .ndo_get_stats = tap_dev_get_stats, ++ .ndo_change_mtu = tap_dev_change_mtu, ++}; ++ ++static void tap_dev_setup(struct net_device *dev) ++{ ++ ether_setup(dev); ++ ++ dev->netdev_ops = &tap_netdev_ops; ++ dev->mtu = 1500; ++ dev->flags &= ~IFF_MULTICAST; ++} ++ ++void cleanup_tap_devices(void) ++{ ++ int i; ++ struct rtnet_device *rtdev; ++ ++ for (i = 0; i < MAX_RT_DEVICES; i++) ++ if ((tap_device[i].present & TAP_DEV) != 0) { ++ if ((tap_device[i].present & XMIT_HOOK) != 0) { ++ rtdev = *(struct rtnet_device **)netdev_priv( ++ tap_device[i].tap_dev); ++ ++ mutex_lock(&rtdev->nrt_lock); ++ rtdev->hard_start_xmit = ++ tap_device[i].orig_xmit; ++ if (rtdev->features & NETIF_F_LLTX) ++ rtdev->start_xmit = ++ tap_device[i].orig_xmit; ++ mutex_unlock(&rtdev->nrt_lock); ++ ++ rtdev_dereference(rtdev); ++ } ++ ++ if ((tap_device[i].present & RTMAC_TAP_DEV) != 0) { ++ unregister_netdev(tap_device[i].rtmac_tap_dev); ++ free_netdev(tap_device[i].rtmac_tap_dev); ++ } ++ ++ unregister_netdev(tap_device[i].tap_dev); ++ free_netdev(tap_device[i].tap_dev); ++ } ++} ++ ++int __init rtcap_init(void) ++{ ++ struct rtnet_device *rtdev; ++ struct net_device *dev; ++ int ret; ++ int devices = 0; ++ int i; ++ ++ printk("RTcap: real-time capturing interface\n"); ++ ++ rtskb_queue_init(&cap_queue); ++ ++ rtdm_nrtsig_init(&cap_signal, rtcap_signal_handler, NULL); ++ ++ for (i = 0; i < MAX_RT_DEVICES; i++) { ++ tap_device[i].present = 0; ++ ++ rtdev = rtdev_get_by_index(i); ++ if (rtdev != NULL) { ++ mutex_lock(&rtdev->nrt_lock); ++ ++ if (test_bit(PRIV_FLAG_UP, &rtdev->priv_flags)) { ++ mutex_unlock(&rtdev->nrt_lock); ++ printk("RTcap: %s busy, skipping device!\n", ++ rtdev->name); ++ rtdev_dereference(rtdev); ++ continue; ++ } ++ ++ if (rtdev->mac_priv != NULL) { ++ mutex_unlock(&rtdev->nrt_lock); ++ ++ printk("RTcap: RTmac discipline already active on device %s. " ++ "Load RTcap before RTmac!\n", ++ rtdev->name); ++ ++ rtdev_dereference(rtdev); ++ continue; ++ } ++ ++ memset(&tap_device[i].tap_dev_stats, 0, ++ sizeof(struct net_device_stats)); ++ ++ dev = alloc_netdev(sizeof(struct rtnet_device *), ++ rtdev->name, NET_NAME_UNKNOWN, ++ tap_dev_setup); ++ if (!dev) { ++ ret = -ENOMEM; ++ goto error3; ++ } ++ ++ tap_device[i].tap_dev = dev; ++ *(struct rtnet_device **)netdev_priv(dev) = rtdev; ++ ++ ret = register_netdev(dev); ++ if (ret < 0) ++ goto error3; ++ ++ tap_device[i].present = TAP_DEV; ++ ++ tap_device[i].orig_xmit = rtdev->hard_start_xmit; ++ ++ if ((rtdev->flags & IFF_LOOPBACK) == 0) { ++ dev = alloc_netdev( ++ sizeof(struct rtnet_device *), ++ rtdev->name, NET_NAME_UNKNOWN, ++ tap_dev_setup); ++ if (!dev) { ++ ret = -ENOMEM; ++ goto error3; ++ } ++ ++ tap_device[i].rtmac_tap_dev = dev; ++ *(struct rtnet_device **)netdev_priv(dev) = ++ rtdev; ++ strncat(dev->name, "-mac", ++ IFNAMSIZ - strlen(dev->name)); ++ ++ ret = register_netdev(dev); ++ if (ret < 0) ++ goto error3; ++ ++ tap_device[i].present |= RTMAC_TAP_DEV; ++ ++ rtdev->hard_start_xmit = rtcap_xmit_hook; ++ } else ++ rtdev->hard_start_xmit = ++ rtcap_loopback_xmit_hook; ++ ++ /* If the device requires no xmit_lock, start_xmit points equals ++ * hard_start_xmit => we have to update this as well ++ */ ++ if (rtdev->features & NETIF_F_LLTX) ++ rtdev->start_xmit = rtdev->hard_start_xmit; ++ ++ tap_device[i].present |= XMIT_HOOK; ++ ++ mutex_unlock(&rtdev->nrt_lock); ++ ++ devices++; ++ } ++ } ++ ++ if (devices == 0) { ++ printk("RTcap: no real-time devices found!\n"); ++ ret = -ENODEV; ++ goto error2; ++ } ++ ++ if (rtskb_module_pool_init(&cap_pool, rtcap_rtskbs * devices) < ++ rtcap_rtskbs * devices) { ++ rtskb_pool_release(&cap_pool); ++ ret = -ENOMEM; ++ goto error2; ++ } ++ ++ /* register capturing handlers with RTnet core ++ * (adding the handler need no locking) */ ++ rtcap_handler = rtcap_rx_hook; ++ ++ return 0; ++ ++error3: ++ mutex_unlock(&rtdev->nrt_lock); ++ rtdev_dereference(rtdev); ++ printk("RTcap: unable to register %s!\n", dev->name); ++ ++error2: ++ cleanup_tap_devices(); ++ rtdm_nrtsig_destroy(&cap_signal); ++ ++ return ret; ++} ++ ++void rtcap_cleanup(void) ++{ ++ rtdm_lockctx_t context; ++ ++ rtdm_nrtsig_destroy(&cap_signal); ++ ++ /* unregister capturing handlers ++ * (take lock to avoid any unloading code before handler was left) */ ++ rtdm_lock_get_irqsave(&rtcap_lock, context); ++ rtcap_handler = NULL; ++ rtdm_lock_put_irqrestore(&rtcap_lock, context); ++ ++ /* empty queue (should be already empty) */ ++ rtcap_signal_handler(0, NULL /* we ignore them anyway */); ++ ++ cleanup_tap_devices(); ++ ++ rtskb_pool_release(&cap_pool); ++ ++ printk("RTcap: unloaded\n"); ++} ++ ++module_init(rtcap_init); ++module_exit(rtcap_cleanup); +--- linux/drivers/xenomai/net/stack/rtskb.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/rtskb.c 2021-04-07 16:01:27.129634306 +0800 +@@ -0,0 +1,535 @@ ++/*** ++ * ++ * stack/rtskb.c - rtskb implementation for rtnet ++ * ++ * Copyright (C) 2002 Ulrich Marx , ++ * Copyright (C) 2003-2006 Jan Kiszka ++ * Copyright (C) 2006 Jorge Almeida ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of version 2 of the GNU General Public License as ++ * published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ++ * ++ */ ++ ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++ ++static unsigned int global_rtskbs = DEFAULT_GLOBAL_RTSKBS; ++module_param(global_rtskbs, uint, 0444); ++MODULE_PARM_DESC(global_rtskbs, ++ "Number of realtime socket buffers in global pool"); ++ ++/* Linux slab pool for rtskbs */ ++static struct kmem_cache *rtskb_slab_pool; ++ ++/* pool of rtskbs for global use */ ++struct rtskb_pool global_pool; ++EXPORT_SYMBOL_GPL(global_pool); ++ ++/* pool statistics */ ++unsigned int rtskb_pools = 0; ++unsigned int rtskb_pools_max = 0; ++unsigned int rtskb_amount = 0; ++unsigned int rtskb_amount_max = 0; ++ ++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_ADDON_RTCAP) ++/* RTcap interface */ ++rtdm_lock_t rtcap_lock; ++EXPORT_SYMBOL_GPL(rtcap_lock); ++ ++void (*rtcap_handler)(struct rtskb *skb) = NULL; ++EXPORT_SYMBOL_GPL(rtcap_handler); ++#endif ++ ++/*** ++ * rtskb_copy_and_csum_bits ++ */ ++unsigned int rtskb_copy_and_csum_bits(const struct rtskb *skb, int offset, ++ u8 *to, int len, unsigned int csum) ++{ ++ int copy; ++ ++ /* Copy header. */ ++ if ((copy = skb->len - offset) > 0) { ++ if (copy > len) ++ copy = len; ++ csum = csum_partial_copy_nocheck(skb->data + offset, to, copy, ++ csum); ++ if ((len -= copy) == 0) ++ return csum; ++ offset += copy; ++ to += copy; ++ } ++ ++ RTNET_ASSERT(len == 0, ); ++ return csum; ++} ++ ++EXPORT_SYMBOL_GPL(rtskb_copy_and_csum_bits); ++ ++/*** ++ * rtskb_copy_and_csum_dev ++ */ ++void rtskb_copy_and_csum_dev(const struct rtskb *skb, u8 *to) ++{ ++ unsigned int csum; ++ unsigned int csstart; ++ ++ if (skb->ip_summed == CHECKSUM_PARTIAL) { ++ csstart = skb->h.raw - skb->data; ++ ++ if (csstart > skb->len) ++ BUG(); ++ } else ++ csstart = skb->len; ++ ++ memcpy(to, skb->data, csstart); ++ ++ csum = 0; ++ if (csstart != skb->len) ++ csum = rtskb_copy_and_csum_bits(skb, csstart, to + csstart, ++ skb->len - csstart, 0); ++ ++ if (skb->ip_summed == CHECKSUM_PARTIAL) { ++ unsigned int csstuff = csstart + skb->csum; ++ ++ *((unsigned short *)(to + csstuff)) = csum_fold(csum); ++ } ++} ++ ++EXPORT_SYMBOL_GPL(rtskb_copy_and_csum_dev); ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_CHECKED ++/** ++ * skb_over_panic - private function ++ * @skb: buffer ++ * @sz: size ++ * @here: address ++ * ++ * Out of line support code for rtskb_put(). Not user callable. ++ */ ++void rtskb_over_panic(struct rtskb *skb, int sz, void *here) ++{ ++ rtdm_printk("RTnet: rtskb_put :over: %p:%d put:%d dev:%s\n", here, ++ skb->len, sz, (skb->rtdev) ? skb->rtdev->name : ""); ++} ++ ++EXPORT_SYMBOL_GPL(rtskb_over_panic); ++ ++/** ++ * skb_under_panic - private function ++ * @skb: buffer ++ * @sz: size ++ * @here: address ++ * ++ * Out of line support code for rtskb_push(). Not user callable. ++ */ ++void rtskb_under_panic(struct rtskb *skb, int sz, void *here) ++{ ++ rtdm_printk("RTnet: rtskb_push :under: %p:%d put:%d dev:%s\n", here, ++ skb->len, sz, (skb->rtdev) ? skb->rtdev->name : ""); ++} ++ ++EXPORT_SYMBOL_GPL(rtskb_under_panic); ++#endif /* CONFIG_XENO_DRIVERS_NET_CHECKED */ ++ ++static struct rtskb *__rtskb_pool_dequeue(struct rtskb_pool *pool) ++{ ++ struct rtskb_queue *queue = &pool->queue; ++ struct rtskb *skb; ++ ++ if (pool->lock_ops && !pool->lock_ops->trylock(pool->lock_cookie)) ++ return NULL; ++ skb = __rtskb_dequeue(queue); ++ if (skb == NULL && pool->lock_ops) ++ pool->lock_ops->unlock(pool->lock_cookie); ++ ++ return skb; ++} ++ ++struct rtskb *rtskb_pool_dequeue(struct rtskb_pool *pool) ++{ ++ struct rtskb_queue *queue = &pool->queue; ++ rtdm_lockctx_t context; ++ struct rtskb *skb; ++ ++ rtdm_lock_get_irqsave(&queue->lock, context); ++ skb = __rtskb_pool_dequeue(pool); ++ rtdm_lock_put_irqrestore(&queue->lock, context); ++ ++ return skb; ++} ++EXPORT_SYMBOL_GPL(rtskb_pool_dequeue); ++ ++static void __rtskb_pool_queue_tail(struct rtskb_pool *pool, struct rtskb *skb) ++{ ++ struct rtskb_queue *queue = &pool->queue; ++ ++ __rtskb_queue_tail(queue, skb); ++ if (pool->lock_ops) ++ pool->lock_ops->unlock(pool->lock_cookie); ++} ++ ++void rtskb_pool_queue_tail(struct rtskb_pool *pool, struct rtskb *skb) ++{ ++ struct rtskb_queue *queue = &pool->queue; ++ rtdm_lockctx_t context; ++ ++ rtdm_lock_get_irqsave(&queue->lock, context); ++ __rtskb_pool_queue_tail(pool, skb); ++ rtdm_lock_put_irqrestore(&queue->lock, context); ++} ++EXPORT_SYMBOL_GPL(rtskb_pool_queue_tail); ++ ++/*** ++ * alloc_rtskb - allocate an rtskb from a pool ++ * @size: required buffer size (to check against maximum boundary) ++ * @pool: pool to take the rtskb from ++ */ ++struct rtskb *alloc_rtskb(unsigned int size, struct rtskb_pool *pool) ++{ ++ struct rtskb *skb; ++ ++ RTNET_ASSERT(size <= SKB_DATA_ALIGN(RTSKB_SIZE), return NULL;); ++ ++ skb = rtskb_pool_dequeue(pool); ++ if (!skb) ++ return NULL; ++ ++ /* Load the data pointers. */ ++ skb->data = skb->buf_start; ++ skb->tail = skb->buf_start; ++ skb->end = skb->buf_start + size; ++ ++ /* Set up other states */ ++ skb->chain_end = skb; ++ skb->len = 0; ++ skb->pkt_type = PACKET_HOST; ++ skb->xmit_stamp = NULL; ++ skb->ip_summed = CHECKSUM_NONE; ++ ++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_ADDON_RTCAP) ++ skb->cap_flags = 0; ++#endif ++ ++ return skb; ++} ++ ++EXPORT_SYMBOL_GPL(alloc_rtskb); ++ ++/*** ++ * kfree_rtskb ++ * @skb rtskb ++ */ ++void kfree_rtskb(struct rtskb *skb) ++{ ++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_ADDON_RTCAP) ++ rtdm_lockctx_t context; ++ struct rtskb *comp_skb; ++ struct rtskb *next_skb; ++ struct rtskb *chain_end; ++#endif ++ ++ RTNET_ASSERT(skb != NULL, return;); ++ RTNET_ASSERT(skb->pool != NULL, return;); ++ ++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_ADDON_RTCAP) ++ next_skb = skb; ++ chain_end = skb->chain_end; ++ ++ do { ++ skb = next_skb; ++ next_skb = skb->next; ++ ++ rtdm_lock_get_irqsave(&rtcap_lock, context); ++ ++ if (skb->cap_flags & RTSKB_CAP_SHARED) { ++ skb->cap_flags &= ~RTSKB_CAP_SHARED; ++ ++ comp_skb = skb->cap_comp_skb; ++ skb->pool = xchg(&comp_skb->pool, skb->pool); ++ ++ rtdm_lock_put_irqrestore(&rtcap_lock, context); ++ ++ rtskb_pool_queue_tail(comp_skb->pool, comp_skb); ++ } else { ++ rtdm_lock_put_irqrestore(&rtcap_lock, context); ++ ++ skb->chain_end = skb; ++ rtskb_pool_queue_tail(skb->pool, skb); ++ } ++ ++ } while (chain_end != skb); ++ ++#else /* CONFIG_XENO_DRIVERS_NET_ADDON_RTCAP */ ++ ++ rtskb_pool_queue_tail(skb->pool, skb); ++ ++#endif /* CONFIG_XENO_DRIVERS_NET_ADDON_RTCAP */ ++} ++ ++EXPORT_SYMBOL_GPL(kfree_rtskb); ++ ++/*** ++ * rtskb_pool_init ++ * @pool: pool to be initialized ++ * @initial_size: number of rtskbs to allocate ++ * return: number of actually allocated rtskbs ++ */ ++unsigned int rtskb_pool_init(struct rtskb_pool *pool, unsigned int initial_size, ++ const struct rtskb_pool_lock_ops *lock_ops, ++ void *lock_cookie) ++{ ++ unsigned int i; ++ ++ rtskb_queue_init(&pool->queue); ++ ++ i = rtskb_pool_extend(pool, initial_size); ++ ++ rtskb_pools++; ++ if (rtskb_pools > rtskb_pools_max) ++ rtskb_pools_max = rtskb_pools; ++ ++ pool->lock_ops = lock_ops; ++ pool->lock_cookie = lock_cookie; ++ ++ return i; ++} ++ ++EXPORT_SYMBOL_GPL(rtskb_pool_init); ++ ++static int rtskb_module_pool_trylock(void *cookie) ++{ ++ int err = 1; ++ if (cookie) ++ err = try_module_get(cookie); ++ return err; ++} ++ ++static void rtskb_module_pool_unlock(void *cookie) ++{ ++ if (cookie) ++ module_put(cookie); ++} ++ ++static const struct rtskb_pool_lock_ops rtskb_module_lock_ops = { ++ .trylock = rtskb_module_pool_trylock, ++ .unlock = rtskb_module_pool_unlock, ++}; ++ ++unsigned int __rtskb_module_pool_init(struct rtskb_pool *pool, ++ unsigned int initial_size, ++ struct module *module) ++{ ++ return rtskb_pool_init(pool, initial_size, &rtskb_module_lock_ops, ++ module); ++} ++EXPORT_SYMBOL_GPL(__rtskb_module_pool_init); ++ ++/*** ++ * __rtskb_pool_release ++ * @pool: pool to release ++ */ ++void rtskb_pool_release(struct rtskb_pool *pool) ++{ ++ struct rtskb *skb; ++ ++ while ((skb = rtskb_dequeue(&pool->queue)) != NULL) { ++ rtdev_unmap_rtskb(skb); ++ kmem_cache_free(rtskb_slab_pool, skb); ++ rtskb_amount--; ++ } ++ ++ rtskb_pools--; ++} ++ ++EXPORT_SYMBOL_GPL(rtskb_pool_release); ++ ++unsigned int rtskb_pool_extend(struct rtskb_pool *pool, unsigned int add_rtskbs) ++{ ++ unsigned int i; ++ struct rtskb *skb; ++ ++ RTNET_ASSERT(pool != NULL, return -EINVAL;); ++ ++ for (i = 0; i < add_rtskbs; i++) { ++ /* get rtskb from slab pool */ ++ if (!(skb = kmem_cache_alloc(rtskb_slab_pool, GFP_KERNEL))) { ++ printk(KERN_ERR ++ "RTnet: rtskb allocation from slab pool failed\n"); ++ break; ++ } ++ ++ /* fill the header with zero */ ++ memset(skb, 0, sizeof(struct rtskb)); ++ ++ skb->chain_end = skb; ++ skb->pool = pool; ++ skb->buf_start = ++ ((unsigned char *)skb) + ALIGN_RTSKB_STRUCT_LEN; ++#ifdef CONFIG_XENO_DRIVERS_NET_CHECKED ++ skb->buf_end = skb->buf_start + SKB_DATA_ALIGN(RTSKB_SIZE) - 1; ++#endif ++ ++ if (rtdev_map_rtskb(skb) < 0) { ++ kmem_cache_free(rtskb_slab_pool, skb); ++ break; ++ } ++ ++ rtskb_queue_tail(&pool->queue, skb); ++ ++ rtskb_amount++; ++ if (rtskb_amount > rtskb_amount_max) ++ rtskb_amount_max = rtskb_amount; ++ } ++ ++ return i; ++} ++ ++unsigned int rtskb_pool_shrink(struct rtskb_pool *pool, unsigned int rem_rtskbs) ++{ ++ unsigned int i; ++ struct rtskb *skb; ++ ++ for (i = 0; i < rem_rtskbs; i++) { ++ if ((skb = rtskb_dequeue(&pool->queue)) == NULL) ++ break; ++ ++ rtdev_unmap_rtskb(skb); ++ kmem_cache_free(rtskb_slab_pool, skb); ++ rtskb_amount--; ++ } ++ ++ return i; ++} ++ ++/* Note: acquires only the first skb of a chain! */ ++int rtskb_acquire(struct rtskb *rtskb, struct rtskb_pool *comp_pool) ++{ ++ struct rtskb *comp_rtskb; ++ struct rtskb_pool *release_pool; ++ rtdm_lockctx_t context; ++ ++ rtdm_lock_get_irqsave(&comp_pool->queue.lock, context); ++ ++ comp_rtskb = __rtskb_pool_dequeue(comp_pool); ++ if (!comp_rtskb) { ++ rtdm_lock_put_irqrestore(&comp_pool->queue.lock, context); ++ return -ENOMEM; ++ } ++ ++ rtdm_lock_put(&comp_pool->queue.lock); ++ ++ comp_rtskb->chain_end = comp_rtskb; ++ comp_rtskb->pool = release_pool = rtskb->pool; ++ ++ rtdm_lock_get(&release_pool->queue.lock); ++ ++ __rtskb_pool_queue_tail(release_pool, comp_rtskb); ++ ++ rtdm_lock_put_irqrestore(&release_pool->queue.lock, context); ++ ++ rtskb->pool = comp_pool; ++ ++ return 0; ++} ++ ++EXPORT_SYMBOL_GPL(rtskb_acquire); ++ ++/* clone rtskb to another, allocating the new rtskb from pool */ ++struct rtskb *rtskb_clone(struct rtskb *rtskb, struct rtskb_pool *pool) ++{ ++ struct rtskb *clone_rtskb; ++ unsigned int total_len; ++ ++ clone_rtskb = alloc_rtskb(rtskb->end - rtskb->buf_start, pool); ++ if (clone_rtskb == NULL) ++ return NULL; ++ ++ /* Note: We don't clone ++ - rtskb.sk ++ - rtskb.xmit_stamp ++ until real use cases show up. */ ++ ++ clone_rtskb->priority = rtskb->priority; ++ clone_rtskb->rtdev = rtskb->rtdev; ++ clone_rtskb->time_stamp = rtskb->time_stamp; ++ ++ clone_rtskb->mac.raw = clone_rtskb->buf_start; ++ clone_rtskb->nh.raw = clone_rtskb->buf_start; ++ clone_rtskb->h.raw = clone_rtskb->buf_start; ++ ++ clone_rtskb->data += rtskb->data - rtskb->buf_start; ++ clone_rtskb->tail += rtskb->tail - rtskb->buf_start; ++ clone_rtskb->mac.raw += rtskb->mac.raw - rtskb->buf_start; ++ clone_rtskb->nh.raw += rtskb->nh.raw - rtskb->buf_start; ++ clone_rtskb->h.raw += rtskb->h.raw - rtskb->buf_start; ++ ++ clone_rtskb->protocol = rtskb->protocol; ++ clone_rtskb->pkt_type = rtskb->pkt_type; ++ ++ clone_rtskb->ip_summed = rtskb->ip_summed; ++ clone_rtskb->csum = rtskb->csum; ++ ++ total_len = rtskb->len + rtskb->data - rtskb->mac.raw; ++ memcpy(clone_rtskb->mac.raw, rtskb->mac.raw, total_len); ++ clone_rtskb->len = rtskb->len; ++ ++ return clone_rtskb; ++} ++ ++EXPORT_SYMBOL_GPL(rtskb_clone); ++ ++int rtskb_pools_init(void) ++{ ++ rtskb_slab_pool = kmem_cache_create("rtskb_slab_pool", ++ ALIGN_RTSKB_STRUCT_LEN + ++ SKB_DATA_ALIGN(RTSKB_SIZE), ++ 0, SLAB_HWCACHE_ALIGN, NULL); ++ if (rtskb_slab_pool == NULL) ++ return -ENOMEM; ++ ++ /* reset the statistics (cache is accounted separately) */ ++ rtskb_pools = 0; ++ rtskb_pools_max = 0; ++ rtskb_amount = 0; ++ rtskb_amount_max = 0; ++ ++ /* create the global rtskb pool */ ++ if (rtskb_module_pool_init(&global_pool, global_rtskbs) < global_rtskbs) ++ goto err_out; ++ ++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_ADDON_RTCAP) ++ rtdm_lock_init(&rtcap_lock); ++#endif ++ ++ return 0; ++ ++err_out: ++ rtskb_pool_release(&global_pool); ++ kmem_cache_destroy(rtskb_slab_pool); ++ ++ return -ENOMEM; ++} ++ ++void rtskb_pools_release(void) ++{ ++ rtskb_pool_release(&global_pool); ++ kmem_cache_destroy(rtskb_slab_pool); ++} +--- linux/drivers/xenomai/net/stack/rtnet_rtpc.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/rtnet_rtpc.c 2021-04-07 16:01:27.124634313 +0800 +@@ -0,0 +1,258 @@ ++/*** ++ * ++ * stack/rtnet_rtpc.c ++ * ++ * RTnet - real-time networking subsystem ++ * ++ * Copyright (C) 2003-2005 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++ ++static DEFINE_RTDM_LOCK(pending_calls_lock); ++static DEFINE_RTDM_LOCK(processed_calls_lock); ++static rtdm_event_t dispatch_event; ++static rtdm_task_t dispatch_task; ++static rtdm_nrtsig_t rtpc_nrt_signal; ++ ++LIST_HEAD(pending_calls); ++LIST_HEAD(processed_calls); ++ ++#ifndef __wait_event_interruptible_timeout ++#define __wait_event_interruptible_timeout(wq, condition, ret) \ ++ do { \ ++ wait_queue_t __wait; \ ++ init_waitqueue_entry(&__wait, current); \ ++ \ ++ add_wait_queue(&wq, &__wait); \ ++ for (;;) { \ ++ set_current_state(TASK_INTERRUPTIBLE); \ ++ if (condition) \ ++ break; \ ++ if (!signal_pending(current)) { \ ++ ret = schedule_timeout(ret); \ ++ if (!ret) \ ++ break; \ ++ continue; \ ++ } \ ++ ret = -ERESTARTSYS; \ ++ break; \ ++ } \ ++ current->state = TASK_RUNNING; \ ++ remove_wait_queue(&wq, &__wait); \ ++ } while (0) ++#endif ++ ++#ifndef wait_event_interruptible_timeout ++#define wait_event_interruptible_timeout(wq, condition, timeout) \ ++ ({ \ ++ long __ret = timeout; \ ++ if (!(condition)) \ ++ __wait_event_interruptible_timeout(wq, condition, \ ++ __ret); \ ++ __ret; \ ++ }) ++#endif ++ ++int rtnet_rtpc_dispatch_call(rtpc_proc proc, unsigned int timeout, ++ void *priv_data, size_t priv_data_size, ++ rtpc_copy_back_proc copy_back_handler, ++ rtpc_cleanup_proc cleanup_handler) ++{ ++ struct rt_proc_call *call; ++ rtdm_lockctx_t context; ++ int ret; ++ ++ call = kmalloc(sizeof(struct rt_proc_call) + priv_data_size, ++ GFP_KERNEL); ++ if (call == NULL) ++ return -ENOMEM; ++ ++ memcpy(call->priv_data, priv_data, priv_data_size); ++ ++ call->processed = 0; ++ call->proc = proc; ++ call->result = 0; ++ call->cleanup_handler = cleanup_handler; ++ atomic_set(&call->ref_count, 2); /* dispatcher + rt-procedure */ ++ init_waitqueue_head(&call->call_wq); ++ ++ rtdm_lock_get_irqsave(&pending_calls_lock, context); ++ list_add_tail(&call->list_entry, &pending_calls); ++ rtdm_lock_put_irqrestore(&pending_calls_lock, context); ++ ++ rtdm_event_signal(&dispatch_event); ++ ++ if (timeout > 0) { ++ ret = wait_event_interruptible_timeout( ++ call->call_wq, call->processed, (timeout * HZ) / 1000); ++ if (ret == 0) ++ ret = -ETIME; ++ } else ++ ret = wait_event_interruptible(call->call_wq, call->processed); ++ ++ if (ret >= 0) { ++ if (copy_back_handler != NULL) ++ copy_back_handler(call, priv_data); ++ ret = call->result; ++ } ++ ++ if (atomic_dec_and_test(&call->ref_count)) { ++ if (call->cleanup_handler != NULL) ++ call->cleanup_handler(&call->priv_data); ++ kfree(call); ++ } ++ ++ return ret; ++} ++ ++static inline struct rt_proc_call *rtpc_dequeue_pending_call(void) ++{ ++ rtdm_lockctx_t context; ++ struct rt_proc_call *call = NULL; ++ ++ rtdm_lock_get_irqsave(&pending_calls_lock, context); ++ if (!list_empty(&pending_calls)) { ++ call = (struct rt_proc_call *)pending_calls.next; ++ list_del(&call->list_entry); ++ } ++ rtdm_lock_put_irqrestore(&pending_calls_lock, context); ++ ++ return call; ++} ++ ++static inline void rtpc_queue_processed_call(struct rt_proc_call *call) ++{ ++ rtdm_lockctx_t context; ++ bool trigger; ++ ++ rtdm_lock_get_irqsave(&processed_calls_lock, context); ++ trigger = list_empty(&processed_calls); ++ list_add_tail(&call->list_entry, &processed_calls); ++ rtdm_lock_put_irqrestore(&processed_calls_lock, context); ++ ++ if (trigger) ++ rtdm_nrtsig_pend(&rtpc_nrt_signal); ++} ++ ++static inline struct rt_proc_call *rtpc_dequeue_processed_call(void) ++{ ++ rtdm_lockctx_t context; ++ struct rt_proc_call *call = NULL; ++ ++ rtdm_lock_get_irqsave(&processed_calls_lock, context); ++ if (!list_empty(&processed_calls)) { ++ call = (struct rt_proc_call *)processed_calls.next; ++ list_del(&call->list_entry); ++ } ++ rtdm_lock_put_irqrestore(&processed_calls_lock, context); ++ ++ return call; ++} ++ ++static void rtpc_dispatch_handler(void *arg) ++{ ++ struct rt_proc_call *call; ++ int ret; ++ ++ while (!rtdm_task_should_stop()) { ++ if (rtdm_event_wait(&dispatch_event) < 0) ++ break; ++ ++ while ((call = rtpc_dequeue_pending_call())) { ++ ret = call->proc(call); ++ if (ret != -CALL_PENDING) ++ rtpc_complete_call(call, ret); ++ } ++ } ++} ++ ++static void rtpc_signal_handler(rtdm_nrtsig_t *nrt_sig, void *arg) ++{ ++ struct rt_proc_call *call; ++ ++ while ((call = rtpc_dequeue_processed_call()) != NULL) { ++ call->processed = 1; ++ wake_up(&call->call_wq); ++ ++ if (atomic_dec_and_test(&call->ref_count)) { ++ if (call->cleanup_handler != NULL) ++ call->cleanup_handler(&call->priv_data); ++ kfree(call); ++ } ++ } ++} ++ ++void rtnet_rtpc_complete_call(struct rt_proc_call *call, int result) ++{ ++ call->result = result; ++ rtpc_queue_processed_call(call); ++} ++ ++void rtnet_rtpc_complete_call_nrt(struct rt_proc_call *call, int result) ++{ ++ RTNET_ASSERT(!rtdm_in_rt_context(), ++ rtnet_rtpc_complete_call(call, result); ++ return;); ++ ++ call->processed = 1; ++ wake_up(&call->call_wq); ++ ++ if (atomic_dec_and_test(&call->ref_count)) { ++ if (call->cleanup_handler != NULL) ++ call->cleanup_handler(&call->priv_data); ++ kfree(call); ++ } ++} ++ ++int __init rtpc_init(void) ++{ ++ int ret; ++ ++ rtdm_nrtsig_init(&rtpc_nrt_signal, rtpc_signal_handler, NULL); ++ ++ rtdm_event_init(&dispatch_event, 0); ++ ++ ret = rtdm_task_init(&dispatch_task, "rtnet-rtpc", ++ rtpc_dispatch_handler, 0, ++ RTDM_TASK_LOWEST_PRIORITY, 0); ++ if (ret < 0) { ++ rtdm_event_destroy(&dispatch_event); ++ rtdm_nrtsig_destroy(&rtpc_nrt_signal); ++ } ++ ++ return ret; ++} ++ ++void rtpc_cleanup(void) ++{ ++ rtdm_event_destroy(&dispatch_event); ++ rtdm_task_destroy(&dispatch_task); ++ rtdm_nrtsig_destroy(&rtpc_nrt_signal); ++} ++ ++EXPORT_SYMBOL_GPL(rtnet_rtpc_dispatch_call); ++EXPORT_SYMBOL_GPL(rtnet_rtpc_complete_call); ++EXPORT_SYMBOL_GPL(rtnet_rtpc_complete_call_nrt); +--- linux/drivers/xenomai/net/stack/rtnet_module.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/rtnet_module.c 2021-04-07 16:01:27.120634319 +0800 +@@ -0,0 +1,411 @@ ++/*** ++ * ++ * stack/rtnet_module.c - module framework, proc file system ++ * ++ * Copyright (C) 2002 Ulrich Marx ++ * 2003-2006 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++MODULE_LICENSE("GPL"); ++MODULE_DESCRIPTION("RTnet stack core"); ++ ++struct class *rtnet_class; ++ ++struct rtnet_mgr STACK_manager; ++struct rtnet_mgr RTDEV_manager; ++ ++EXPORT_SYMBOL_GPL(STACK_manager); ++EXPORT_SYMBOL_GPL(RTDEV_manager); ++ ++const char rtnet_rtdm_provider_name[] = ++ "(C) 1999-2008 RTnet Development Team, http://www.rtnet.org"; ++ ++EXPORT_SYMBOL_GPL(rtnet_rtdm_provider_name); ++ ++void rtnet_corectl_register(void); ++void rtnet_corectl_unregister(void); ++ ++#ifdef CONFIG_XENO_OPT_VFILE ++/*** ++ * proc filesystem section ++ */ ++struct xnvfile_directory rtnet_proc_root; ++EXPORT_SYMBOL_GPL(rtnet_proc_root); ++ ++static int rtnet_devices_nrt_lock_get(struct xnvfile *vfile) ++{ ++ return mutex_lock_interruptible(&rtnet_devices_nrt_lock); ++} ++ ++static void rtnet_devices_nrt_lock_put(struct xnvfile *vfile) ++{ ++ mutex_unlock(&rtnet_devices_nrt_lock); ++} ++ ++static struct xnvfile_lock_ops rtnet_devices_nrt_lock_ops = { ++ .get = rtnet_devices_nrt_lock_get, ++ .put = rtnet_devices_nrt_lock_put, ++}; ++ ++static void *rtnet_devices_begin(struct xnvfile_regular_iterator *it) ++{ ++ if (it->pos == 0) ++ return VFILE_SEQ_START; ++ ++ return (void *)2UL; ++} ++ ++static void *rtnet_devices_next(struct xnvfile_regular_iterator *it) ++{ ++ if (it->pos >= MAX_RT_DEVICES) ++ return NULL; ++ ++ return (void *)2UL; ++} ++ ++static int rtnet_devices_show(struct xnvfile_regular_iterator *it, void *data) ++{ ++ struct rtnet_device *rtdev; ++ ++ if (data == NULL) { ++ xnvfile_printf(it, "Index\tName\t\tFlags\n"); ++ return 0; ++ } ++ ++ rtdev = __rtdev_get_by_index(it->pos); ++ if (rtdev == NULL) ++ return VFILE_SEQ_SKIP; ++ ++ xnvfile_printf(it, "%d\t%-15s %s%s%s%s\n", rtdev->ifindex, rtdev->name, ++ (rtdev->flags & IFF_UP) ? "UP" : "DOWN", ++ (rtdev->flags & IFF_BROADCAST) ? " BROADCAST" : "", ++ (rtdev->flags & IFF_LOOPBACK) ? " LOOPBACK" : "", ++ (rtdev->flags & IFF_PROMISC) ? " PROMISC" : ""); ++ return 0; ++} ++ ++static struct xnvfile_regular_ops rtnet_devices_vfile_ops = { ++ .begin = rtnet_devices_begin, ++ .next = rtnet_devices_next, ++ .show = rtnet_devices_show, ++}; ++ ++static struct xnvfile_regular rtnet_devices_vfile = { ++ .entry = { .lockops = &rtnet_devices_nrt_lock_ops, }, ++ .ops = &rtnet_devices_vfile_ops, ++}; ++ ++static int rtnet_rtskb_show(struct xnvfile_regular_iterator *it, void *data) ++{ ++ unsigned int rtskb_len; ++ ++ rtskb_len = ALIGN_RTSKB_STRUCT_LEN + SKB_DATA_ALIGN(RTSKB_SIZE); ++ ++ xnvfile_printf(it, ++ "Statistics\t\tCurrent\tMaximum\n" ++ "rtskb pools\t\t%d\t%d\n" ++ "rtskbs\t\t\t%d\t%d\n" ++ "rtskb memory need\t%d\t%d\n", ++ rtskb_pools, rtskb_pools_max, rtskb_amount, ++ rtskb_amount_max, rtskb_amount * rtskb_len, ++ rtskb_amount_max * rtskb_len); ++ return 0; ++} ++ ++static struct xnvfile_regular_ops rtnet_rtskb_vfile_ops = { ++ .show = rtnet_rtskb_show, ++}; ++ ++static struct xnvfile_regular rtnet_rtskb_vfile = { ++ .ops = &rtnet_rtskb_vfile_ops, ++}; ++ ++static int rtnet_version_show(struct xnvfile_regular_iterator *it, void *data) ++{ ++ const char verstr[] = "RTnet for Xenomai v" XENO_VERSION_STRING "\n" ++ "RTcap: " ++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_ADDON_RTCAP) ++ "yes\n" ++#else ++ "no\n" ++#endif ++ "rtnetproxy: " ++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_ADDON_PROXY) ++ "yes\n" ++#else ++ "no\n" ++#endif ++ "bug checks: " ++#ifdef CONFIG_XENO_DRIVERS_NET_CHECKED ++ "yes\n" ++#else ++ "no\n" ++#endif ++ ; ++ ++ xnvfile_printf(it, "%s", verstr); ++ ++ return 0; ++} ++ ++static struct xnvfile_regular_ops rtnet_version_vfile_ops = { ++ .show = rtnet_version_show, ++}; ++ ++static struct xnvfile_regular rtnet_version_vfile = { ++ .ops = &rtnet_version_vfile_ops, ++}; ++ ++static void *rtnet_stats_begin(struct xnvfile_regular_iterator *it) ++{ ++ return (void *)1UL; ++} ++ ++static void *rtnet_stats_next(struct xnvfile_regular_iterator *it) ++{ ++ if (it->pos >= MAX_RT_DEVICES) ++ return NULL; ++ ++ return (void *)1UL; ++} ++ ++static int rtnet_stats_show(struct xnvfile_regular_iterator *it, void *data) ++{ ++ struct net_device_stats *stats; ++ struct rtnet_device *rtdev; ++ ++ if (it->pos == 0) { ++ xnvfile_printf(it, ++ "Inter-| Receive " ++ " | Transmit\n"); ++ xnvfile_printf(it, ++ " face |bytes packets errs drop fifo frame " ++ "compressed multicast|bytes packets errs " ++ "drop fifo colls carrier compressed\n"); ++ return 0; ++ } ++ ++ rtdev = __rtdev_get_by_index(it->pos); ++ if (rtdev == NULL) ++ return VFILE_SEQ_SKIP; ++ ++ if (rtdev->get_stats == NULL) { ++ xnvfile_printf(it, "%6s: No statistics available.\n", ++ rtdev->name); ++ return 0; ++ } ++ ++ stats = rtdev->get_stats(rtdev); ++ xnvfile_printf( ++ it, ++ "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu " ++ "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n", ++ rtdev->name, stats->rx_bytes, stats->rx_packets, ++ stats->rx_errors, stats->rx_dropped + stats->rx_missed_errors, ++ stats->rx_fifo_errors, ++ stats->rx_length_errors + stats->rx_over_errors + ++ stats->rx_crc_errors + stats->rx_frame_errors, ++ stats->rx_compressed, stats->multicast, stats->tx_bytes, ++ stats->tx_packets, stats->tx_errors, stats->tx_dropped, ++ stats->tx_fifo_errors, stats->collisions, ++ stats->tx_carrier_errors + stats->tx_aborted_errors + ++ stats->tx_window_errors + stats->tx_heartbeat_errors, ++ stats->tx_compressed); ++ return 0; ++} ++ ++static struct xnvfile_regular_ops rtnet_stats_vfile_ops = { ++ .begin = rtnet_stats_begin, ++ .next = rtnet_stats_next, ++ .show = rtnet_stats_show, ++}; ++ ++static struct xnvfile_regular rtnet_stats_vfile = { ++ .entry = { .lockops = &rtnet_devices_nrt_lock_ops, }, ++ .ops = &rtnet_stats_vfile_ops, ++}; ++ ++static int rtnet_proc_register(void) ++{ ++ int err; ++ ++ err = xnvfile_init_dir("rtnet", &rtnet_proc_root, NULL); ++ if (err < 0) ++ goto error1; ++ ++ err = xnvfile_init_regular("devices", &rtnet_devices_vfile, ++ &rtnet_proc_root); ++ if (err < 0) ++ goto error2; ++ ++ err = xnvfile_init_regular("rtskb", &rtnet_rtskb_vfile, ++ &rtnet_proc_root); ++ if (err < 0) ++ goto error3; ++ ++ err = xnvfile_init_regular("version", &rtnet_version_vfile, ++ &rtnet_proc_root); ++ if (err < 0) ++ goto error4; ++ ++ err = xnvfile_init_regular("stats", &rtnet_stats_vfile, ++ &rtnet_proc_root); ++ if (err < 0) ++ goto error5; ++ ++ return 0; ++ ++error5: ++ xnvfile_destroy_regular(&rtnet_version_vfile); ++ ++error4: ++ xnvfile_destroy_regular(&rtnet_rtskb_vfile); ++ ++error3: ++ xnvfile_destroy_regular(&rtnet_devices_vfile); ++ ++error2: ++ xnvfile_destroy_dir(&rtnet_proc_root); ++ ++error1: ++ printk("RTnet: unable to initialize /proc entries\n"); ++ return err; ++} ++ ++static void rtnet_proc_unregister(void) ++{ ++ xnvfile_destroy_regular(&rtnet_stats_vfile); ++ xnvfile_destroy_regular(&rtnet_version_vfile); ++ xnvfile_destroy_regular(&rtnet_rtskb_vfile); ++ xnvfile_destroy_regular(&rtnet_devices_vfile); ++ xnvfile_destroy_dir(&rtnet_proc_root); ++} ++#endif /* CONFIG_XENO_OPT_VFILE */ ++ ++/** ++ * rtnet_init() ++ */ ++int __init rtnet_init(void) ++{ ++ int err = 0; ++ ++ if (!rtdm_available()) ++ return -ENOSYS; ++ ++ printk("\n*** RTnet for Xenomai v" XENO_VERSION_STRING " ***\n\n"); ++ printk("RTnet: initialising real-time networking\n"); ++ ++ rtnet_class = class_create(THIS_MODULE, "rtnet"); ++ if (IS_ERR(rtnet_class)) ++ return PTR_ERR(rtnet_class); ++ ++ if ((err = rtskb_pools_init()) != 0) ++ goto err_out1; ++ ++#ifdef CONFIG_XENO_OPT_VFILE ++ if ((err = rtnet_proc_register()) != 0) ++ goto err_out2; ++#endif ++ ++ /* initialize the Stack-Manager */ ++ if ((err = rt_stack_mgr_init(&STACK_manager)) != 0) ++ goto err_out3; ++ ++ /* initialize the RTDEV-Manager */ ++ if ((err = rt_rtdev_mgr_init(&RTDEV_manager)) != 0) ++ goto err_out4; ++ ++ rtnet_chrdev_init(); ++ ++ if ((err = rtwlan_init()) != 0) ++ goto err_out5; ++ ++ if ((err = rtpc_init()) != 0) ++ goto err_out6; ++ ++ rtnet_corectl_register(); ++ ++ return 0; ++ ++err_out6: ++ rtwlan_exit(); ++ ++err_out5: ++ rtnet_chrdev_release(); ++ rt_rtdev_mgr_delete(&RTDEV_manager); ++ ++err_out4: ++ rt_stack_mgr_delete(&STACK_manager); ++ ++err_out3: ++#ifdef CONFIG_XENO_OPT_VFILE ++ rtnet_proc_unregister(); ++ ++err_out2: ++#endif ++ rtskb_pools_release(); ++ ++err_out1: ++ class_destroy(rtnet_class); ++ ++ return err; ++} ++ ++/** ++ * rtnet_release() ++ */ ++void __exit rtnet_release(void) ++{ ++ rtnet_corectl_unregister(); ++ ++ rtpc_cleanup(); ++ ++ rtwlan_exit(); ++ ++ rtnet_chrdev_release(); ++ ++ rt_stack_mgr_delete(&STACK_manager); ++ rt_rtdev_mgr_delete(&RTDEV_manager); ++ ++ rtskb_pools_release(); ++ ++#ifdef CONFIG_XENO_OPT_VFILE ++ rtnet_proc_unregister(); ++#endif ++ ++ class_destroy(rtnet_class); ++ ++ printk("RTnet: unloaded\n"); ++} ++ ++module_init(rtnet_init); ++module_exit(rtnet_release); +--- linux/drivers/xenomai/net/stack/Makefile 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/Makefile 2021-04-07 16:01:27.115634326 +0800 +@@ -0,0 +1,26 @@ ++ccflags-y += -Idrivers/xenomai/net/stack/include -Ikernel/ ++ ++obj-$(CONFIG_XENO_DRIVERS_NET_RTIPV4) += ipv4/ ++ ++obj-$(CONFIG_XENO_DRIVERS_NET_RTPACKET) += packet/ ++ ++obj-$(CONFIG_XENO_DRIVERS_NET_RTMAC) += rtmac/ ++ ++obj-$(CONFIG_XENO_DRIVERS_NET_RTCFG) += rtcfg/ ++ ++obj-$(CONFIG_XENO_DRIVERS_NET) += rtnet.o ++ ++rtnet-y := \ ++ corectl.o \ ++ iovec.o \ ++ rtdev.o \ ++ rtdev_mgr.o \ ++ rtnet_chrdev.o \ ++ rtnet_module.o \ ++ rtnet_rtpc.o \ ++ rtskb.o \ ++ socket.o \ ++ stack_mgr.o \ ++ eth.o ++ ++rtnet-$(CONFIG_XENO_DRIVERS_NET_RTWLAN) += rtwlan.o +--- linux/drivers/xenomai/net/stack/packet/af_packet.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/packet/af_packet.c 2021-04-07 16:01:27.110634333 +0800 +@@ -0,0 +1,670 @@ ++/*** ++ * ++ * packet/af_packet.c ++ * ++ * RTnet - real-time networking subsystem ++ * Copyright (C) 2003-2006 Jan Kiszka ++ * Copyright (C) 2006 Jorge Almeida ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#include ++#include ++#include ++ ++#include ++#include ++#include ++ ++MODULE_LICENSE("GPL"); ++ ++/*** ++ * rt_packet_rcv ++ */ ++static int rt_packet_rcv(struct rtskb *skb, struct rtpacket_type *pt) ++{ ++ struct rtsocket *sock = ++ container_of(pt, struct rtsocket, prot.packet.packet_type); ++ int ifindex = sock->prot.packet.ifindex; ++ void (*callback_func)(struct rtdm_fd *, void *); ++ void *callback_arg; ++ rtdm_lockctx_t context; ++ ++ if (unlikely((ifindex != 0) && (ifindex != skb->rtdev->ifindex))) ++ return -EUNATCH; ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_ETH_P_ALL ++ if (pt->type == htons(ETH_P_ALL)) { ++ struct rtskb *clone_skb = rtskb_clone(skb, &sock->skb_pool); ++ if (clone_skb == NULL) ++ goto out; ++ skb = clone_skb; ++ } else ++#endif /* CONFIG_XENO_DRIVERS_NET_ETH_P_ALL */ ++ if (unlikely(rtskb_acquire(skb, &sock->skb_pool) < 0)) { ++ kfree_rtskb(skb); ++ goto out; ++ } ++ ++ rtskb_queue_tail(&sock->incoming, skb); ++ rtdm_sem_up(&sock->pending_sem); ++ ++ rtdm_lock_get_irqsave(&sock->param_lock, context); ++ callback_func = sock->callback_func; ++ callback_arg = sock->callback_arg; ++ rtdm_lock_put_irqrestore(&sock->param_lock, context); ++ ++ if (callback_func) ++ callback_func(rt_socket_fd(sock), callback_arg); ++ ++out: ++ return 0; ++} ++ ++static bool rt_packet_trylock(struct rtpacket_type *pt) ++{ ++ struct rtsocket *sock = ++ container_of(pt, struct rtsocket, prot.packet.packet_type); ++ struct rtdm_fd *fd = rtdm_private_to_fd(sock); ++ ++ if (rtdm_fd_lock(fd) < 0) ++ return false; ++ ++ return true; ++} ++ ++static void rt_packet_unlock(struct rtpacket_type *pt) ++{ ++ struct rtsocket *sock = ++ container_of(pt, struct rtsocket, prot.packet.packet_type); ++ struct rtdm_fd *fd = rtdm_private_to_fd(sock); ++ ++ rtdm_fd_unlock(fd); ++} ++ ++/*** ++ * rt_packet_bind ++ */ ++static int rt_packet_bind(struct rtdm_fd *fd, struct rtsocket *sock, ++ const struct sockaddr *addr, socklen_t addrlen) ++{ ++ struct sockaddr_ll _sll, *sll; ++ struct rtpacket_type *pt = &sock->prot.packet.packet_type; ++ int new_type; ++ int ret; ++ rtdm_lockctx_t context; ++ ++ if (addrlen < sizeof(struct sockaddr_ll)) ++ return -EINVAL; ++ ++ sll = rtnet_get_arg(fd, &_sll, addr, sizeof(_sll)); ++ if (IS_ERR(sll)) ++ return PTR_ERR(sll); ++ ++ if (sll->sll_family != AF_PACKET) ++ return -EINVAL; ++ ++ new_type = ++ (sll->sll_protocol != 0) ? sll->sll_protocol : sock->protocol; ++ ++ rtdm_lock_get_irqsave(&sock->param_lock, context); ++ ++ /* release existing binding */ ++ if (pt->type != 0) ++ rtdev_remove_pack(pt); ++ ++ pt->type = new_type; ++ sock->prot.packet.ifindex = sll->sll_ifindex; ++ ++ /* if protocol is non-zero, register the packet type */ ++ if (new_type != 0) { ++ pt->handler = rt_packet_rcv; ++ pt->err_handler = NULL; ++ pt->trylock = rt_packet_trylock; ++ pt->unlock = rt_packet_unlock; ++ ++ ret = rtdev_add_pack(pt); ++ } else ++ ret = 0; ++ ++ rtdm_lock_put_irqrestore(&sock->param_lock, context); ++ ++ return ret; ++} ++ ++/*** ++ * rt_packet_getsockname ++ */ ++static int rt_packet_getsockname(struct rtdm_fd *fd, struct rtsocket *sock, ++ struct sockaddr *addr, socklen_t *addrlen) ++{ ++ struct sockaddr_ll _sll, *sll; ++ struct rtnet_device *rtdev; ++ rtdm_lockctx_t context; ++ socklen_t _namelen, *namelen; ++ int ret; ++ ++ namelen = rtnet_get_arg(fd, &_namelen, addrlen, sizeof(_namelen)); ++ if (IS_ERR(namelen)) ++ return PTR_ERR(namelen); ++ ++ if (*namelen < sizeof(struct sockaddr_ll)) ++ return -EINVAL; ++ ++ sll = rtnet_get_arg(fd, &_sll, addr, sizeof(_sll)); ++ if (IS_ERR(sll)) ++ return PTR_ERR(sll); ++ ++ rtdm_lock_get_irqsave(&sock->param_lock, context); ++ ++ sll->sll_family = AF_PACKET; ++ sll->sll_ifindex = sock->prot.packet.ifindex; ++ sll->sll_protocol = sock->protocol; ++ ++ rtdm_lock_put_irqrestore(&sock->param_lock, context); ++ ++ rtdev = rtdev_get_by_index(sll->sll_ifindex); ++ if (rtdev != NULL) { ++ sll->sll_hatype = rtdev->type; ++ sll->sll_halen = rtdev->addr_len; ++ memcpy(sll->sll_addr, rtdev->dev_addr, rtdev->addr_len); ++ rtdev_dereference(rtdev); ++ } else { ++ sll->sll_hatype = 0; ++ sll->sll_halen = 0; ++ } ++ ++ *namelen = sizeof(struct sockaddr_ll); ++ ++ ret = rtnet_put_arg(fd, addr, sll, sizeof(*sll)); ++ if (ret) ++ return ret; ++ ++ return rtnet_put_arg(fd, addrlen, namelen, sizeof(*namelen)); ++} ++ ++/*** ++ * rt_packet_socket - initialize a packet socket ++ */ ++static int rt_packet_socket(struct rtdm_fd *fd, int protocol) ++{ ++ struct rtsocket *sock = rtdm_fd_to_private(fd); ++ int ret; ++ ++ if ((ret = rt_socket_init(fd, protocol)) != 0) ++ return ret; ++ ++ sock->prot.packet.packet_type.type = protocol; ++ sock->prot.packet.ifindex = 0; ++ sock->prot.packet.packet_type.trylock = rt_packet_trylock; ++ sock->prot.packet.packet_type.unlock = rt_packet_unlock; ++ ++ /* if protocol is non-zero, register the packet type */ ++ if (protocol != 0) { ++ sock->prot.packet.packet_type.handler = rt_packet_rcv; ++ sock->prot.packet.packet_type.err_handler = NULL; ++ ++ if ((ret = rtdev_add_pack(&sock->prot.packet.packet_type)) < ++ 0) { ++ rt_socket_cleanup(fd); ++ return ret; ++ } ++ } ++ ++ return 0; ++} ++ ++/*** ++ * rt_packet_close ++ */ ++static void rt_packet_close(struct rtdm_fd *fd) ++{ ++ struct rtsocket *sock = rtdm_fd_to_private(fd); ++ struct rtpacket_type *pt = &sock->prot.packet.packet_type; ++ struct rtskb *del; ++ rtdm_lockctx_t context; ++ ++ rtdm_lock_get_irqsave(&sock->param_lock, context); ++ ++ if (pt->type != 0) { ++ rtdev_remove_pack(pt); ++ pt->type = 0; ++ } ++ ++ rtdm_lock_put_irqrestore(&sock->param_lock, context); ++ ++ /* free packets in incoming queue */ ++ while ((del = rtskb_dequeue(&sock->incoming)) != NULL) { ++ kfree_rtskb(del); ++ } ++ ++ rt_socket_cleanup(fd); ++} ++ ++/*** ++ * rt_packet_ioctl ++ */ ++static int rt_packet_ioctl(struct rtdm_fd *fd, unsigned int request, ++ void __user *arg) ++{ ++ struct rtsocket *sock = rtdm_fd_to_private(fd); ++ const struct _rtdm_setsockaddr_args *setaddr; ++ struct _rtdm_setsockaddr_args _setaddr; ++ const struct _rtdm_getsockaddr_args *getaddr; ++ struct _rtdm_getsockaddr_args _getaddr; ++ ++ /* fast path for common socket IOCTLs */ ++ if (_IOC_TYPE(request) == RTIOC_TYPE_NETWORK) ++ return rt_socket_common_ioctl(fd, request, arg); ++ ++ switch (request) { ++ case _RTIOC_BIND: ++ setaddr = rtnet_get_arg(fd, &_setaddr, arg, sizeof(_setaddr)); ++ if (IS_ERR(setaddr)) ++ return PTR_ERR(setaddr); ++ return rt_packet_bind(fd, sock, setaddr->addr, ++ setaddr->addrlen); ++ ++ case _RTIOC_GETSOCKNAME: ++ getaddr = rtnet_get_arg(fd, &_getaddr, arg, sizeof(_getaddr)); ++ if (IS_ERR(getaddr)) ++ return PTR_ERR(getaddr); ++ return rt_packet_getsockname(fd, sock, getaddr->addr, ++ getaddr->addrlen); ++ ++ default: ++ return rt_socket_if_ioctl(fd, request, arg); ++ } ++} ++ ++/*** ++ * rt_packet_recvmsg ++ */ ++static ssize_t rt_packet_recvmsg(struct rtdm_fd *fd, struct user_msghdr *u_msg, ++ int msg_flags) ++{ ++ struct rtsocket *sock = rtdm_fd_to_private(fd); ++ ssize_t len; ++ size_t copy_len; ++ struct rtskb *rtskb; ++ struct sockaddr_ll sll; ++ int ret, flags; ++ nanosecs_rel_t timeout = sock->timeout; ++ struct user_msghdr _msg, *msg; ++ socklen_t namelen; ++ struct iovec iov_fast[RTDM_IOV_FASTMAX], *iov; ++ ++ msg = rtnet_get_arg(fd, &_msg, u_msg, sizeof(_msg)); ++ if (IS_ERR(msg)) ++ return PTR_ERR(msg); ++ ++ if (msg->msg_iovlen < 0) ++ return -EINVAL; ++ ++ if (msg->msg_iovlen == 0) ++ return 0; ++ ++ ret = rtdm_get_iovec(fd, &iov, msg, iov_fast); ++ if (ret) ++ return ret; ++ ++ /* non-blocking receive? */ ++ if (msg_flags & MSG_DONTWAIT) ++ timeout = -1; ++ ++ ret = rtdm_sem_timeddown(&sock->pending_sem, timeout, NULL); ++ if (unlikely(ret < 0)) ++ switch (ret) { ++ default: ++ ret = -EBADF; /* socket has been closed */ ++ case -EWOULDBLOCK: ++ case -ETIMEDOUT: ++ case -EINTR: ++ rtdm_drop_iovec(iov, iov_fast); ++ return ret; ++ } ++ ++ rtskb = rtskb_dequeue_chain(&sock->incoming); ++ RTNET_ASSERT(rtskb != NULL, return -EFAULT;); ++ ++ /* copy the address if required. */ ++ if (msg->msg_name) { ++ struct rtnet_device *rtdev = rtskb->rtdev; ++ memset(&sll, 0, sizeof(sll)); ++ sll.sll_family = AF_PACKET; ++ sll.sll_hatype = rtdev->type; ++ sll.sll_protocol = rtskb->protocol; ++ sll.sll_pkttype = rtskb->pkt_type; ++ sll.sll_ifindex = rtdev->ifindex; ++ ++ /* Ethernet specific - we rather need some parse handler here */ ++ memcpy(sll.sll_addr, rtskb->mac.ethernet->h_source, ETH_ALEN); ++ sll.sll_halen = ETH_ALEN; ++ ret = rtnet_put_arg(fd, msg->msg_name, &sll, sizeof(sll)); ++ if (ret) ++ goto fail; ++ ++ namelen = sizeof(sll); ++ ret = rtnet_put_arg(fd, &u_msg->msg_namelen, &namelen, ++ sizeof(namelen)); ++ if (ret) ++ goto fail; ++ } ++ ++ /* Include the header in raw delivery */ ++ if (rtdm_fd_to_context(fd)->device->driver->socket_type != SOCK_DGRAM) ++ rtskb_push(rtskb, rtskb->data - rtskb->mac.raw); ++ ++ /* The data must not be longer than the available buffer size */ ++ copy_len = rtskb->len; ++ len = rtdm_get_iov_flatlen(iov, msg->msg_iovlen); ++ if (len < 0) { ++ copy_len = len; ++ goto out; ++ } ++ ++ if (copy_len > len) { ++ copy_len = len; ++ flags = msg->msg_flags | MSG_TRUNC; ++ ret = rtnet_put_arg(fd, &u_msg->msg_flags, &flags, ++ sizeof(flags)); ++ if (ret) ++ goto fail; ++ } ++ ++ copy_len = rtnet_write_to_iov(fd, iov, msg->msg_iovlen, rtskb->data, ++ copy_len); ++out: ++ if ((msg_flags & MSG_PEEK) == 0) { ++ kfree_rtskb(rtskb); ++ } else { ++ rtskb_queue_head(&sock->incoming, rtskb); ++ rtdm_sem_up(&sock->pending_sem); ++ } ++ ++ rtdm_drop_iovec(iov, iov_fast); ++ ++ return copy_len; ++fail: ++ copy_len = ret; ++ goto out; ++} ++ ++/*** ++ * rt_packet_sendmsg ++ */ ++static ssize_t rt_packet_sendmsg(struct rtdm_fd *fd, ++ const struct user_msghdr *msg, int msg_flags) ++{ ++ struct rtsocket *sock = rtdm_fd_to_private(fd); ++ size_t len; ++ struct sockaddr_ll _sll, *sll; ++ struct rtnet_device *rtdev; ++ struct rtskb *rtskb; ++ unsigned short proto; ++ unsigned char *addr; ++ int ifindex; ++ ssize_t ret; ++ struct user_msghdr _msg; ++ struct iovec iov_fast[RTDM_IOV_FASTMAX], *iov; ++ ++ if (msg_flags & MSG_OOB) /* Mirror BSD error message compatibility */ ++ return -EOPNOTSUPP; ++ if (msg_flags & ~MSG_DONTWAIT) ++ return -EINVAL; ++ ++ msg = rtnet_get_arg(fd, &_msg, msg, sizeof(*msg)); ++ if (IS_ERR(msg)) ++ return PTR_ERR(msg); ++ ++ if (msg->msg_iovlen < 0) ++ return -EINVAL; ++ ++ if (msg->msg_iovlen == 0) ++ return 0; ++ ++ ret = rtdm_get_iovec(fd, &iov, msg, iov_fast); ++ if (ret) ++ return ret; ++ ++ if (msg->msg_name == NULL) { ++ /* Note: We do not care about races with rt_packet_bind here - ++ the user has to do so. */ ++ ifindex = sock->prot.packet.ifindex; ++ proto = sock->prot.packet.packet_type.type; ++ addr = NULL; ++ sll = NULL; ++ } else { ++ sll = rtnet_get_arg(fd, &_sll, msg->msg_name, sizeof(_sll)); ++ if (IS_ERR(sll)) { ++ ret = PTR_ERR(sll); ++ goto abort; ++ } ++ ++ if ((msg->msg_namelen < sizeof(struct sockaddr_ll)) || ++ (msg->msg_namelen < ++ (sll->sll_halen + ++ offsetof(struct sockaddr_ll, sll_addr))) || ++ ((sll->sll_family != AF_PACKET) && ++ (sll->sll_family != AF_UNSPEC))) { ++ ret = -EINVAL; ++ goto abort; ++ } ++ ++ ifindex = sll->sll_ifindex; ++ proto = sll->sll_protocol; ++ addr = sll->sll_addr; ++ } ++ ++ if ((rtdev = rtdev_get_by_index(ifindex)) == NULL) { ++ ret = -ENODEV; ++ goto abort; ++ } ++ ++ len = rtdm_get_iov_flatlen(iov, msg->msg_iovlen); ++ rtskb = alloc_rtskb(rtdev->hard_header_len + len, &sock->skb_pool); ++ if (rtskb == NULL) { ++ ret = -ENOBUFS; ++ goto out; ++ } ++ ++ /* If an RTmac discipline is active, this becomes a pure sanity check to ++ avoid writing beyond rtskb boundaries. The hard check is then performed ++ upon rtdev_xmit() by the discipline's xmit handler. */ ++ if (len > ++ rtdev->mtu + ++ ((rtdm_fd_to_context(fd)->device->driver->socket_type == ++ SOCK_RAW) ? ++ rtdev->hard_header_len : ++ 0)) { ++ ret = -EMSGSIZE; ++ goto err; ++ } ++ ++ if ((sll != NULL) && (sll->sll_halen != rtdev->addr_len)) { ++ ret = -EINVAL; ++ goto err; ++ } ++ ++ rtskb_reserve(rtskb, rtdev->hard_header_len); ++ ++ rtskb->rtdev = rtdev; ++ rtskb->priority = sock->priority; ++ ++ if (rtdev->hard_header) { ++ int hdr_len; ++ ++ ret = -EINVAL; ++ hdr_len = rtdev->hard_header(rtskb, rtdev, ntohs(proto), addr, ++ NULL, len); ++ if (rtdm_fd_to_context(fd)->device->driver->socket_type != ++ SOCK_DGRAM) { ++ rtskb->tail = rtskb->data; ++ rtskb->len = 0; ++ } else if (hdr_len < 0) ++ goto err; ++ } ++ ++ ret = rtnet_read_from_iov(fd, iov, msg->msg_iovlen, ++ rtskb_put(rtskb, len), len); ++ ++ if ((rtdev->flags & IFF_UP) != 0) { ++ if ((ret = rtdev_xmit(rtskb)) == 0) ++ ret = len; ++ } else { ++ ret = -ENETDOWN; ++ goto err; ++ } ++ ++out: ++ rtdev_dereference(rtdev); ++abort: ++ rtdm_drop_iovec(iov, iov_fast); ++ ++ return ret; ++err: ++ kfree_rtskb(rtskb); ++ goto out; ++} ++ ++static struct rtdm_driver packet_proto_drv = { ++ .profile_info = RTDM_PROFILE_INFO(packet, ++ RTDM_CLASS_NETWORK, ++ RTDM_SUBCLASS_RTNET, ++ RTNET_RTDM_VER), ++ .device_flags = RTDM_PROTOCOL_DEVICE, ++ .device_count = 1, ++ .context_size = sizeof(struct rtsocket), ++ ++ .protocol_family = PF_PACKET, ++ .socket_type = SOCK_DGRAM, ++ ++ ++ .ops = { ++ .socket = rt_packet_socket, ++ .close = rt_packet_close, ++ .ioctl_rt = rt_packet_ioctl, ++ .ioctl_nrt = rt_packet_ioctl, ++ .recvmsg_rt = rt_packet_recvmsg, ++ .sendmsg_rt = rt_packet_sendmsg, ++ .select = rt_socket_select_bind, ++ }, ++}; ++ ++static struct rtdm_device packet_proto_dev = { ++ .driver = &packet_proto_drv, ++ .label = "packet", ++}; ++ ++static struct rtdm_driver raw_packet_proto_drv = { ++ .profile_info = RTDM_PROFILE_INFO(raw_packet, ++ RTDM_CLASS_NETWORK, ++ RTDM_SUBCLASS_RTNET, ++ RTNET_RTDM_VER), ++ .device_flags = RTDM_PROTOCOL_DEVICE, ++ .device_count = 1, ++ .context_size = sizeof(struct rtsocket), ++ ++ .protocol_family = PF_PACKET, ++ .socket_type = SOCK_RAW, ++ ++ .ops = { ++ .socket = rt_packet_socket, ++ .close = rt_packet_close, ++ .ioctl_rt = rt_packet_ioctl, ++ .ioctl_nrt = rt_packet_ioctl, ++ .recvmsg_rt = rt_packet_recvmsg, ++ .sendmsg_rt = rt_packet_sendmsg, ++ .select = rt_socket_select_bind, ++ }, ++}; ++ ++static struct rtdm_device raw_packet_proto_dev = { ++ .driver = &raw_packet_proto_drv, ++ .label = "raw_packet", ++}; ++ ++static int __init rt_packet_proto_init(void) ++{ ++ int err; ++ ++ err = rtdm_dev_register(&packet_proto_dev); ++ if (err) ++ return err; ++ ++ err = rtdm_dev_register(&raw_packet_proto_dev); ++ if (err) ++ rtdm_dev_unregister(&packet_proto_dev); ++ ++ return err; ++} ++ ++static void rt_packet_proto_release(void) ++{ ++ rtdm_dev_unregister(&packet_proto_dev); ++ rtdm_dev_unregister(&raw_packet_proto_dev); ++} ++ ++module_init(rt_packet_proto_init); ++module_exit(rt_packet_proto_release); ++ ++/********************************************************** ++ * Utilities * ++ **********************************************************/ ++ ++static int hex2int(unsigned char hex_char) ++{ ++ if ((hex_char >= '0') && (hex_char <= '9')) ++ return hex_char - '0'; ++ else if ((hex_char >= 'a') && (hex_char <= 'f')) ++ return hex_char - 'a' + 10; ++ else if ((hex_char >= 'A') && (hex_char <= 'F')) ++ return hex_char - 'A' + 10; ++ else ++ return -EINVAL; ++} ++ ++int rt_eth_aton(unsigned char *addr_buf, const char *mac) ++{ ++ int i = 0; ++ int nibble; ++ ++ while (1) { ++ if (*mac == 0) ++ return -EINVAL; ++ ++ if ((nibble = hex2int(*mac++)) < 0) ++ return nibble; ++ *addr_buf = nibble << 4; ++ ++ if (*mac == 0) ++ return -EINVAL; ++ ++ if ((nibble = hex2int(*mac++)) < 0) ++ return nibble; ++ *addr_buf++ |= nibble; ++ ++ if (++i == 6) ++ break; ++ ++ if ((*mac == 0) || (*mac++ != ':')) ++ return -EINVAL; ++ } ++ return 0; ++} ++ ++EXPORT_SYMBOL_GPL(rt_eth_aton); +--- linux/drivers/xenomai/net/stack/packet/Makefile 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/packet/Makefile 2021-04-07 16:01:27.106634339 +0800 +@@ -0,0 +1,5 @@ ++ccflags-y += -Idrivers/xenomai/net/stack/include ++ ++obj-$(CONFIG_XENO_DRIVERS_NET_RTPACKET) += rtpacket.o ++ ++rtpacket-y := af_packet.o +--- linux/drivers/xenomai/net/stack/packet/Kconfig 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/packet/Kconfig 2021-04-07 16:01:27.101634346 +0800 +@@ -0,0 +1,14 @@ ++config XENO_DRIVERS_NET_RTPACKET ++ depends on XENO_DRIVERS_NET ++ tristate "Real-Time Packet Socket Support" ++ default y ++ ---help--- ++ Enables real-time packet sockets for RTnet. This support is ++ implemented in a separate module. When loaded, application programs ++ can send and received so-called "cooked" packets directly at OSI layer ++ 2 (device layer). This means that RTnet will still maintain the ++ device-dependent packet header but leave the full data segment to the ++ user. ++ ++ Examples like raw-ethernet or netshm make use of this support. See ++ also Linux man page packet(7). +--- linux/drivers/xenomai/net/stack/eth.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/eth.c 2021-04-07 16:01:27.096634353 +0800 +@@ -0,0 +1,131 @@ ++/*** ++ * ++ * stack/eth.c - Ethernet-specific functions ++ * ++ * Copyright (C) 2002 Ulrich Marx ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#include ++#include ++#include ++ ++#include ++#include ++ ++/* ++ * Create the Ethernet MAC header for an arbitrary protocol layer ++ * ++ * saddr=NULL means use device source address ++ * daddr=NULL means leave destination address (eg unresolved arp) ++ */ ++int rt_eth_header(struct rtskb *skb, struct rtnet_device *rtdev, ++ unsigned short type, void *daddr, void *saddr, unsigned len) ++{ ++ struct ethhdr *eth = (struct ethhdr *)rtskb_push(skb, ETH_HLEN); ++ ++ /* ++ * Set rtskb mac field ++ */ ++ ++ skb->mac.ethernet = eth; ++ ++ /* ++ * Set the protocol type. For a packet of type ETH_P_802_3 we put the length ++ * in here instead. It is up to the 802.2 layer to carry protocol information. ++ */ ++ ++ if (type != ETH_P_802_3) ++ eth->h_proto = htons(type); ++ else ++ eth->h_proto = htons(len); ++ ++ /* ++ * Set the source hardware address. ++ */ ++ ++ if (saddr) ++ memcpy(eth->h_source, saddr, rtdev->addr_len); ++ else ++ memcpy(eth->h_source, rtdev->dev_addr, rtdev->addr_len); ++ ++ if (rtdev->flags & (IFF_LOOPBACK | IFF_NOARP)) { ++ memset(eth->h_dest, 0, rtdev->addr_len); ++ return rtdev->hard_header_len; ++ } ++ ++ if (daddr) { ++ memcpy(eth->h_dest, daddr, rtdev->addr_len); ++ return rtdev->hard_header_len; ++ } ++ ++ return -rtdev->hard_header_len; ++} ++ ++unsigned short rt_eth_type_trans(struct rtskb *skb, struct rtnet_device *rtdev) ++{ ++ struct ethhdr *eth; ++ unsigned char *rawp; ++ ++ rtcap_mark_incoming(skb); ++ ++ skb->mac.raw = skb->data; ++ rtskb_pull(skb, rtdev->hard_header_len); ++ eth = skb->mac.ethernet; ++ ++ if (*eth->h_dest & 1) { ++ if (memcmp(eth->h_dest, rtdev->broadcast, ETH_ALEN) == 0) ++ skb->pkt_type = PACKET_BROADCAST; ++ else ++ skb->pkt_type = PACKET_MULTICAST; ++ } ++ ++ /* ++ * This ALLMULTI check should be redundant by 1.4 ++ * so don't forget to remove it. ++ * ++ * Seems, you forgot to remove it. All silly devices ++ * seems to set IFF_PROMISC. ++ */ ++ ++ else if (1 /*rtdev->flags&IFF_PROMISC*/) { ++ if (memcmp(eth->h_dest, rtdev->dev_addr, ETH_ALEN)) ++ skb->pkt_type = PACKET_OTHERHOST; ++ } ++ ++ if (ntohs(eth->h_proto) >= 1536) ++ return eth->h_proto; ++ ++ rawp = skb->data; ++ ++ /* ++ * This is a magic hack to spot IPX packets. Older Novell breaks ++ * the protocol design and runs IPX over 802.3 without an 802.2 LLC ++ * layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This ++ * won't work for fault tolerant netware but does for the rest. ++ */ ++ if (*(unsigned short *)rawp == 0xFFFF) ++ return htons(ETH_P_802_3); ++ ++ /* ++ * Real 802.2 LLC ++ */ ++ return htons(ETH_P_802_2); ++} ++ ++EXPORT_SYMBOL_GPL(rt_eth_header); ++EXPORT_SYMBOL_GPL(rt_eth_type_trans); +--- linux/drivers/xenomai/net/stack/ipv4/protocol.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/ipv4/protocol.c 2021-04-07 16:01:27.092634359 +0800 +@@ -0,0 +1,88 @@ ++/*** ++ * ++ * ipv4/protocol.c ++ * ++ * rtnet - real-time networking subsystem ++ * Copyright (C) 1999, 2000 Zentropic Computing, LLC ++ * 2002 Ulrich Marx ++ * 2004, 2005 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#include ++#include ++ ++#include ++#include ++ ++struct rtinet_protocol *rt_inet_protocols[MAX_RT_INET_PROTOCOLS]; ++ ++/*** ++ * rt_inet_add_protocol ++ */ ++void rt_inet_add_protocol(struct rtinet_protocol *prot) ++{ ++ unsigned char hash = rt_inet_hashkey(prot->protocol); ++ ++ if (rt_inet_protocols[hash] == NULL) ++ rt_inet_protocols[hash] = prot; ++} ++EXPORT_SYMBOL_GPL(rt_inet_add_protocol); ++ ++/*** ++ * rt_inet_del_protocol ++ */ ++void rt_inet_del_protocol(struct rtinet_protocol *prot) ++{ ++ unsigned char hash = rt_inet_hashkey(prot->protocol); ++ ++ if (prot == rt_inet_protocols[hash]) ++ rt_inet_protocols[hash] = NULL; ++} ++EXPORT_SYMBOL_GPL(rt_inet_del_protocol); ++ ++/*** ++ * rt_inet_socket - initialize an Internet socket ++ * @sock: socket structure ++ * @protocol: protocol id ++ */ ++int rt_inet_socket(struct rtdm_fd *fd, int protocol) ++{ ++ struct rtinet_protocol *prot; ++ ++ if (protocol == 0) ++ switch (rtdm_fd_to_context(fd)->device->driver->socket_type) { ++ case SOCK_DGRAM: ++ protocol = IPPROTO_UDP; ++ break; ++ case SOCK_STREAM: ++ protocol = IPPROTO_TCP; ++ break; ++ } ++ ++ prot = rt_inet_protocols[rt_inet_hashkey(protocol)]; ++ ++ /* create the socket (call the socket creator) */ ++ if ((prot != NULL) && (prot->protocol == protocol)) ++ return prot->init_socket(fd); ++ else { ++ rtdm_printk("RTnet: protocol with id %d not found\n", protocol); ++ ++ return -ENOPROTOOPT; ++ } ++} ++EXPORT_SYMBOL_GPL(rt_inet_socket); +--- linux/drivers/xenomai/net/stack/ipv4/route.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/ipv4/route.c 2021-04-07 16:01:27.087634366 +0800 +@@ -0,0 +1,1057 @@ ++/*** ++ * ++ * ipv4/route.c - real-time routing ++ * ++ * Copyright (C) 2004, 2005 Jan Kiszka ++ * ++ * Rewritten version of the original route by David Schleef and Ulrich Marx ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++ ++/* FIXME: should also become some tunable parameter */ ++#define ROUTER_FORWARD_PRIO \ ++ RTSKB_PRIO_VALUE(QUEUE_MAX_PRIO + \ ++ (QUEUE_MIN_PRIO - QUEUE_MAX_PRIO + 1) / 2, \ ++ RTSKB_DEF_RT_CHANNEL) ++ ++/* First-level routing: explicite host routes */ ++struct host_route { ++ struct host_route *next; ++ struct dest_route dest_host; ++}; ++ ++/* Second-level routing: routes to other networks */ ++struct net_route { ++ struct net_route *next; ++ u32 dest_net_ip; ++ u32 dest_net_mask; ++ u32 gw_ip; ++}; ++ ++#if (CONFIG_XENO_DRIVERS_NET_RTIPV4_HOST_ROUTES & \ ++ (CONFIG_XENO_DRIVERS_NET_RTIPV4_HOST_ROUTES - 1)) ++#error CONFIG_XENO_DRIVERS_NET_RTIPV4_HOST_ROUTES must be power of 2 ++#endif ++#if CONFIG_XENO_DRIVERS_NET_RTIPV4_HOST_ROUTES < 256 ++#define HOST_HASH_TBL_SIZE 64 ++#else ++#define HOST_HASH_TBL_SIZE \ ++ ((CONFIG_XENO_DRIVERS_NET_RTIPV4_HOST_ROUTES / 256) * 64) ++#endif ++#define HOST_HASH_KEY_MASK (HOST_HASH_TBL_SIZE - 1) ++ ++static struct host_route host_routes[CONFIG_XENO_DRIVERS_NET_RTIPV4_HOST_ROUTES]; ++static struct host_route *free_host_route; ++static int allocated_host_routes; ++static struct host_route *host_hash_tbl[HOST_HASH_TBL_SIZE]; ++static DEFINE_RTDM_LOCK(host_table_lock); ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_RTIPV4_NETROUTING ++#if (CONFIG_XENO_DRIVERS_NET_RTIPV4_NET_ROUTES & \ ++ (CONFIG_XENO_DRIVERS_NET_RTIPV4_NET_ROUTES - 1)) ++#error CONFIG_XENO_DRIVERS_NET_RTIPV4_NET_ROUTES must be power of 2 ++#endif ++#if CONFIG_XENO_DRIVERS_NET_RTIPV4_NET_ROUTES < 256 ++#define NET_HASH_TBL_SIZE 64 ++#else ++#define NET_HASH_TBL_SIZE \ ++ ((CONFIG_XENO_DRIVERS_NET_RTIPV4_NET_ROUTES / 256) * 64) ++#endif ++#define NET_HASH_KEY_MASK (NET_HASH_TBL_SIZE - 1) ++#define NET_HASH_KEY_SHIFT 8 ++ ++static struct net_route net_routes[CONFIG_XENO_DRIVERS_NET_RTIPV4_NET_ROUTES]; ++static struct net_route *free_net_route; ++static int allocated_net_routes; ++static struct net_route *net_hash_tbl[NET_HASH_TBL_SIZE + 1]; ++static unsigned int net_hash_key_shift = NET_HASH_KEY_SHIFT; ++static DEFINE_RTDM_LOCK(net_table_lock); ++ ++module_param(net_hash_key_shift, uint, 0444); ++MODULE_PARM_DESC(net_hash_key_shift, "destination right shift for " ++ "network hash key (default: 8)"); ++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4_NETROUTING */ ++ ++/*** ++ * proc filesystem section ++ */ ++#ifdef CONFIG_XENO_OPT_VFILE ++static int rtnet_ipv4_route_show(struct xnvfile_regular_iterator *it, void *d) ++{ ++#ifdef CONFIG_XENO_DRIVERS_NET_RTIPV4_NETROUTING ++ u32 mask; ++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4_NETROUTING */ ++ ++ xnvfile_printf(it, ++ "Host routes allocated/total:\t%d/%d\n" ++ "Host hash table size:\t\t%d\n", ++ allocated_host_routes, ++ CONFIG_XENO_DRIVERS_NET_RTIPV4_HOST_ROUTES, ++ HOST_HASH_TBL_SIZE); ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_RTIPV4_NETROUTING ++ mask = NET_HASH_KEY_MASK << net_hash_key_shift; ++ xnvfile_printf(it, ++ "Network routes allocated/total:\t%d/%d\n" ++ "Network hash table size:\t%d\n" ++ "Network hash key shift/mask:\t%d/%08X\n", ++ allocated_net_routes, ++ CONFIG_XENO_DRIVERS_NET_RTIPV4_NET_ROUTES, ++ NET_HASH_TBL_SIZE, net_hash_key_shift, mask); ++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4_NETROUTING */ ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_RTIPV4_ROUTER ++ xnvfile_printf(it, "IP Router:\t\t\tyes\n"); ++#else ++ xnvfile_printf(it, "IP Router:\t\t\tno\n"); ++#endif ++ ++ return 0; ++} ++ ++static int rtnet_ipv4_module_lock(struct xnvfile *vfile) ++{ ++ bool res = try_module_get(THIS_MODULE); ++ if (!res) ++ return -EIDRM; ++ ++ return 0; ++} ++ ++static void rtnet_ipv4_module_unlock(struct xnvfile *vfile) ++{ ++ module_put(THIS_MODULE); ++} ++ ++static struct xnvfile_lock_ops rtnet_ipv4_module_lock_ops = { ++ .get = rtnet_ipv4_module_lock, ++ .put = rtnet_ipv4_module_unlock, ++}; ++ ++static struct xnvfile_regular_ops rtnet_ipv4_route_vfile_ops = { ++ .show = rtnet_ipv4_route_show, ++}; ++ ++static struct xnvfile_regular rtnet_ipv4_route_vfile = { ++ .entry = { ++ .lockops = &rtnet_ipv4_module_lock_ops, ++ }, ++ .ops = &rtnet_ipv4_route_vfile_ops, ++}; ++ ++static rtdm_lockctx_t rtnet_ipv4_host_route_lock_ctx; ++ ++static int rtnet_ipv4_host_route_lock(struct xnvfile *vfile) ++{ ++ rtdm_lock_get_irqsave(&host_table_lock, rtnet_ipv4_host_route_lock_ctx); ++ return 0; ++} ++ ++static void rtnet_ipv4_host_route_unlock(struct xnvfile *vfile) ++{ ++ rtdm_lock_put_irqrestore(&host_table_lock, ++ rtnet_ipv4_host_route_lock_ctx); ++} ++ ++static struct xnvfile_lock_ops rtnet_ipv4_host_route_lock_ops = { ++ .get = rtnet_ipv4_host_route_lock, ++ .put = rtnet_ipv4_host_route_unlock, ++}; ++ ++struct rtnet_ipv4_host_route_priv { ++ unsigned key; ++ struct host_route *entry_ptr; ++}; ++ ++struct rtnet_ipv4_host_route_data { ++ int key; ++ char name[IFNAMSIZ]; ++ struct dest_route dest_host; ++}; ++ ++struct xnvfile_rev_tag host_route_tag; ++ ++static void *rtnet_ipv4_host_route_begin(struct xnvfile_snapshot_iterator *it) ++{ ++ struct rtnet_ipv4_host_route_priv *priv = xnvfile_iterator_priv(it); ++ struct rtnet_ipv4_host_route_data *data; ++ unsigned routes; ++ int err; ++ ++ routes = allocated_host_routes; ++ if (!routes) ++ return VFILE_SEQ_EMPTY; ++ ++ data = kmalloc(sizeof(*data) * routes, GFP_KERNEL); ++ if (data == NULL) ++ return NULL; ++ ++ err = rtnet_ipv4_module_lock(NULL); ++ if (err < 0) { ++ kfree(data); ++ return VFILE_SEQ_EMPTY; ++ } ++ ++ priv->key = -1; ++ priv->entry_ptr = NULL; ++ return data; ++} ++ ++static void rtnet_ipv4_host_route_end(struct xnvfile_snapshot_iterator *it, ++ void *buf) ++{ ++ rtnet_ipv4_module_unlock(NULL); ++ kfree(buf); ++} ++ ++static int rtnet_ipv4_host_route_next(struct xnvfile_snapshot_iterator *it, ++ void *data) ++{ ++ struct rtnet_ipv4_host_route_priv *priv = xnvfile_iterator_priv(it); ++ struct rtnet_ipv4_host_route_data *p = data; ++ struct rtnet_device *rtdev; ++ ++ if (priv->entry_ptr == NULL) { ++ if (++priv->key >= HOST_HASH_TBL_SIZE) ++ return 0; ++ ++ priv->entry_ptr = host_hash_tbl[priv->key]; ++ if (priv->entry_ptr == NULL) ++ return VFILE_SEQ_SKIP; ++ } ++ ++ rtdev = priv->entry_ptr->dest_host.rtdev; ++ ++ if (!rtdev_reference(rtdev)) ++ return -EIDRM; ++ ++ memcpy(&p->name, rtdev->name, sizeof(p->name)); ++ ++ rtdev_dereference(rtdev); ++ ++ p->key = priv->key; ++ ++ memcpy(&p->dest_host, &priv->entry_ptr->dest_host, ++ sizeof(p->dest_host)); ++ ++ priv->entry_ptr = priv->entry_ptr->next; ++ ++ return 1; ++} ++ ++static int rtnet_ipv4_host_route_show(struct xnvfile_snapshot_iterator *it, ++ void *data) ++{ ++ struct rtnet_ipv4_host_route_data *p = data; ++ ++ if (p == NULL) { ++ xnvfile_printf(it, "Hash\tDestination\tHW Address\t\tDevice\n"); ++ return 0; ++ } ++ ++ xnvfile_printf(it, ++ "%02X\t%u.%u.%u.%-3u\t" ++ "%02X:%02X:%02X:%02X:%02X:%02X\t%s\n", ++ p->key, NIPQUAD(p->dest_host.ip), ++ p->dest_host.dev_addr[0], p->dest_host.dev_addr[1], ++ p->dest_host.dev_addr[2], p->dest_host.dev_addr[3], ++ p->dest_host.dev_addr[4], p->dest_host.dev_addr[5], ++ p->name); ++ return 0; ++} ++ ++static struct xnvfile_snapshot_ops rtnet_ipv4_host_route_vfile_ops = { ++ .begin = rtnet_ipv4_host_route_begin, ++ .end = rtnet_ipv4_host_route_end, ++ .next = rtnet_ipv4_host_route_next, ++ .show = rtnet_ipv4_host_route_show, ++}; ++ ++static struct xnvfile_snapshot rtnet_ipv4_host_route_vfile = { ++ .entry = { ++ .lockops = &rtnet_ipv4_host_route_lock_ops, ++ }, ++ .privsz = sizeof(struct rtnet_ipv4_host_route_priv), ++ .datasz = sizeof(struct rtnet_ipv4_host_route_data), ++ .tag = &host_route_tag, ++ .ops = &rtnet_ipv4_host_route_vfile_ops, ++}; ++ ++static struct xnvfile_link rtnet_ipv4_arp_vfile; ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_RTIPV4_NETROUTING ++static rtdm_lockctx_t rtnet_ipv4_net_route_lock_ctx; ++ ++static int rtnet_ipv4_net_route_lock(struct xnvfile *vfile) ++{ ++ rtdm_lock_get_irqsave(&net_table_lock, rtnet_ipv4_net_route_lock_ctx); ++ return 0; ++} ++ ++static void rtnet_ipv4_net_route_unlock(struct xnvfile *vfile) ++{ ++ rtdm_lock_put_irqrestore(&net_table_lock, ++ rtnet_ipv4_net_route_lock_ctx); ++} ++ ++static struct xnvfile_lock_ops rtnet_ipv4_net_route_lock_ops = { ++ .get = rtnet_ipv4_net_route_lock, ++ .put = rtnet_ipv4_net_route_unlock, ++}; ++ ++struct rtnet_ipv4_net_route_priv { ++ unsigned key; ++ struct net_route *entry_ptr; ++}; ++ ++struct rtnet_ipv4_net_route_data { ++ int key; ++ u32 dest_net_ip; ++ u32 dest_net_mask; ++ u32 gw_ip; ++}; ++ ++struct xnvfile_rev_tag net_route_tag; ++ ++static void *rtnet_ipv4_net_route_begin(struct xnvfile_snapshot_iterator *it) ++{ ++ struct rtnet_ipv4_net_route_priv *priv = xnvfile_iterator_priv(it); ++ struct rtnet_ipv4_net_route_data *data; ++ unsigned routes; ++ int err; ++ ++ routes = allocated_net_routes; ++ if (!routes) ++ return VFILE_SEQ_EMPTY; ++ ++ data = kmalloc(sizeof(*data) * routes, GFP_KERNEL); ++ if (data == NULL) ++ return NULL; ++ ++ err = rtnet_ipv4_module_lock(NULL); ++ if (err < 0) { ++ kfree(data); ++ return VFILE_SEQ_EMPTY; ++ } ++ ++ priv->key = -1; ++ priv->entry_ptr = NULL; ++ return data; ++} ++ ++static void rtnet_ipv4_net_route_end(struct xnvfile_snapshot_iterator *it, ++ void *buf) ++{ ++ rtnet_ipv4_module_unlock(NULL); ++ kfree(buf); ++} ++ ++static int rtnet_ipv4_net_route_next(struct xnvfile_snapshot_iterator *it, ++ void *data) ++{ ++ struct rtnet_ipv4_net_route_priv *priv = xnvfile_iterator_priv(it); ++ struct rtnet_ipv4_net_route_data *p = data; ++ ++ if (priv->entry_ptr == NULL) { ++ if (++priv->key >= NET_HASH_TBL_SIZE + 1) ++ return 0; ++ ++ priv->entry_ptr = net_hash_tbl[priv->key]; ++ if (priv->entry_ptr == NULL) ++ return VFILE_SEQ_SKIP; ++ } ++ ++ p->key = priv->key; ++ p->dest_net_ip = priv->entry_ptr->dest_net_ip; ++ p->dest_net_mask = priv->entry_ptr->dest_net_mask; ++ p->gw_ip = priv->entry_ptr->gw_ip; ++ ++ priv->entry_ptr = priv->entry_ptr->next; ++ ++ return 1; ++} ++ ++static int rtnet_ipv4_net_route_show(struct xnvfile_snapshot_iterator *it, ++ void *data) ++{ ++ struct rtnet_ipv4_net_route_data *p = data; ++ ++ if (p == NULL) { ++ xnvfile_printf(it, "Hash\tDestination\tMask\t\t\tGateway\n"); ++ return 0; ++ } ++ ++ if (p->key < NET_HASH_TBL_SIZE) ++ xnvfile_printf(it, ++ "%02X\t%u.%u.%u.%-3u\t%u.%u.%u.%-3u" ++ "\t\t%u.%u.%u.%-3u\n", ++ p->key, NIPQUAD(p->dest_net_ip), ++ NIPQUAD(p->dest_net_mask), NIPQUAD(p->gw_ip)); ++ else ++ xnvfile_printf(it, ++ "*\t%u.%u.%u.%-3u\t%u.%u.%u.%-3u\t\t" ++ "%u.%u.%u.%-3u\n", ++ NIPQUAD(p->dest_net_ip), ++ NIPQUAD(p->dest_net_mask), NIPQUAD(p->gw_ip)); ++ ++ return 0; ++} ++ ++static struct xnvfile_snapshot_ops rtnet_ipv4_net_route_vfile_ops = { ++ .begin = rtnet_ipv4_net_route_begin, ++ .end = rtnet_ipv4_net_route_end, ++ .next = rtnet_ipv4_net_route_next, ++ .show = rtnet_ipv4_net_route_show, ++}; ++ ++static struct xnvfile_snapshot rtnet_ipv4_net_route_vfile = { ++ .entry = { ++ .lockops = &rtnet_ipv4_net_route_lock_ops, ++ }, ++ .privsz = sizeof(struct rtnet_ipv4_net_route_priv), ++ .datasz = sizeof(struct rtnet_ipv4_net_route_data), ++ .tag = &net_route_tag, ++ .ops = &rtnet_ipv4_net_route_vfile_ops, ++}; ++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4_NETROUTING */ ++ ++static int __init rt_route_proc_register(void) ++{ ++ int err; ++ ++ err = xnvfile_init_regular("route", &rtnet_ipv4_route_vfile, ++ &ipv4_proc_root); ++ if (err < 0) ++ goto err1; ++ ++ err = xnvfile_init_snapshot("host_route", &rtnet_ipv4_host_route_vfile, ++ &ipv4_proc_root); ++ if (err < 0) ++ goto err2; ++ ++ /* create "arp" as an alias for "host_route" */ ++ err = xnvfile_init_link("arp", "host_route", &rtnet_ipv4_arp_vfile, ++ &ipv4_proc_root); ++ if (err < 0) ++ goto err3; ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_RTIPV4_NETROUTING ++ err = xnvfile_init_snapshot("net_route", &rtnet_ipv4_net_route_vfile, ++ &ipv4_proc_root); ++ if (err < 0) ++ goto err4; ++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4_NETROUTING */ ++ ++ return 0; ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_RTIPV4_NETROUTING ++err4: ++ xnvfile_destroy_link(&rtnet_ipv4_arp_vfile); ++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4_NETROUTING */ ++ ++err3: ++ xnvfile_destroy_snapshot(&rtnet_ipv4_host_route_vfile); ++ ++err2: ++ xnvfile_destroy_regular(&rtnet_ipv4_route_vfile); ++ ++err1: ++ printk("RTnet: unable to initialize /proc entries (route)\n"); ++ return err; ++} ++ ++static void rt_route_proc_unregister(void) ++{ ++#ifdef CONFIG_XENO_DRIVERS_NET_RTIPV4_NETROUTING ++ xnvfile_destroy_snapshot(&rtnet_ipv4_net_route_vfile); ++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4_NETROUTING */ ++ xnvfile_destroy_link(&rtnet_ipv4_arp_vfile); ++ xnvfile_destroy_snapshot(&rtnet_ipv4_host_route_vfile); ++ xnvfile_destroy_regular(&rtnet_ipv4_route_vfile); ++} ++#endif /* CONFIG_XENO_OPT_VFILE */ ++ ++/*** ++ * rt_alloc_host_route - allocates new host route ++ */ ++static inline struct host_route *rt_alloc_host_route(void) ++{ ++ rtdm_lockctx_t context; ++ struct host_route *rt; ++ ++ rtdm_lock_get_irqsave(&host_table_lock, context); ++ ++ if ((rt = free_host_route) != NULL) { ++ free_host_route = rt->next; ++ allocated_host_routes++; ++ } ++ ++ rtdm_lock_put_irqrestore(&host_table_lock, context); ++ ++ return rt; ++} ++ ++/*** ++ * rt_free_host_route - releases host route ++ * ++ * Note: must be called with host_table_lock held ++ */ ++static inline void rt_free_host_route(struct host_route *rt) ++{ ++ rt->next = free_host_route; ++ free_host_route = rt; ++ allocated_host_routes--; ++} ++ ++/*** ++ * rt_ip_route_add_host: add or update host route ++ */ ++int rt_ip_route_add_host(u32 addr, unsigned char *dev_addr, ++ struct rtnet_device *rtdev) ++{ ++ rtdm_lockctx_t context; ++ struct host_route *new_route; ++ struct host_route *rt; ++ unsigned int key; ++ int ret = 0; ++ ++ rtdm_lock_get_irqsave(&rtdev->rtdev_lock, context); ++ ++ if ((!test_bit(PRIV_FLAG_UP, &rtdev->priv_flags) || ++ test_and_set_bit(PRIV_FLAG_ADDING_ROUTE, &rtdev->priv_flags))) { ++ rtdm_lock_put_irqrestore(&rtdev->rtdev_lock, context); ++ return -EBUSY; ++ } ++ ++ rtdm_lock_put_irqrestore(&rtdev->rtdev_lock, context); ++ ++ if ((new_route = rt_alloc_host_route()) != NULL) { ++ new_route->dest_host.ip = addr; ++ new_route->dest_host.rtdev = rtdev; ++ memcpy(new_route->dest_host.dev_addr, dev_addr, ++ rtdev->addr_len); ++ } ++ ++ key = ntohl(addr) & HOST_HASH_KEY_MASK; ++ ++ rtdm_lock_get_irqsave(&host_table_lock, context); ++ ++ xnvfile_touch_tag(&host_route_tag); ++ ++ rt = host_hash_tbl[key]; ++ while (rt != NULL) { ++ if ((rt->dest_host.ip == addr) && ++ (rt->dest_host.rtdev->local_ip == rtdev->local_ip)) { ++ rt->dest_host.rtdev = rtdev; ++ memcpy(rt->dest_host.dev_addr, dev_addr, ++ rtdev->addr_len); ++ ++ if (new_route) ++ rt_free_host_route(new_route); ++ ++ rtdm_lock_put_irqrestore(&host_table_lock, context); ++ ++ goto out; ++ } ++ ++ rt = rt->next; ++ } ++ ++ if (new_route) { ++ new_route->next = host_hash_tbl[key]; ++ host_hash_tbl[key] = new_route; ++ ++ rtdm_lock_put_irqrestore(&host_table_lock, context); ++ } else { ++ rtdm_lock_put_irqrestore(&host_table_lock, context); ++ ++ /*ERRMSG*/ rtdm_printk( ++ "RTnet: no more host routes available\n"); ++ ret = -ENOBUFS; ++ } ++ ++out: ++ clear_bit(PRIV_FLAG_ADDING_ROUTE, &rtdev->priv_flags); ++ ++ return ret; ++} ++ ++/*** ++ * rt_ip_route_del_host - deletes specified host route ++ */ ++int rt_ip_route_del_host(u32 addr, struct rtnet_device *rtdev) ++{ ++ rtdm_lockctx_t context; ++ struct host_route *rt; ++ struct host_route **last_ptr; ++ unsigned int key; ++ ++ key = ntohl(addr) & HOST_HASH_KEY_MASK; ++ last_ptr = &host_hash_tbl[key]; ++ ++ rtdm_lock_get_irqsave(&host_table_lock, context); ++ ++ rt = host_hash_tbl[key]; ++ while (rt != NULL) { ++ if ((rt->dest_host.ip == addr) && ++ (!rtdev || ++ (rt->dest_host.rtdev->local_ip == rtdev->local_ip))) { ++ *last_ptr = rt->next; ++ ++ rt_free_host_route(rt); ++ ++ xnvfile_touch_tag(&host_route_tag); ++ ++ rtdm_lock_put_irqrestore(&host_table_lock, context); ++ ++ return 0; ++ } ++ ++ last_ptr = &rt->next; ++ rt = rt->next; ++ } ++ ++ rtdm_lock_put_irqrestore(&host_table_lock, context); ++ ++ return -ENOENT; ++} ++ ++/*** ++ * rt_ip_route_del_all - deletes all routes associated with a specified device ++ */ ++void rt_ip_route_del_all(struct rtnet_device *rtdev) ++{ ++ rtdm_lockctx_t context; ++ struct host_route *host_rt; ++ struct host_route **last_host_ptr; ++ unsigned int key; ++ u32 ip; ++ ++ for (key = 0; key < HOST_HASH_TBL_SIZE; key++) { ++ host_start_over: ++ last_host_ptr = &host_hash_tbl[key]; ++ ++ rtdm_lock_get_irqsave(&host_table_lock, context); ++ ++ host_rt = host_hash_tbl[key]; ++ while (host_rt != NULL) { ++ if (host_rt->dest_host.rtdev == rtdev) { ++ *last_host_ptr = host_rt->next; ++ ++ rt_free_host_route(host_rt); ++ ++ rtdm_lock_put_irqrestore(&host_table_lock, ++ context); ++ ++ goto host_start_over; ++ } ++ ++ last_host_ptr = &host_rt->next; ++ host_rt = host_rt->next; ++ } ++ ++ rtdm_lock_put_irqrestore(&host_table_lock, context); ++ } ++ ++ if ((ip = rtdev->local_ip) != 0) ++ rt_ip_route_del_host(ip, rtdev); ++} ++ ++/*** ++ * rt_ip_route_get_host - check if specified host route is resolved ++ */ ++int rt_ip_route_get_host(u32 addr, char *if_name, unsigned char *dev_addr, ++ struct rtnet_device *rtdev) ++{ ++ rtdm_lockctx_t context; ++ struct host_route *rt; ++ unsigned int key; ++ ++ key = ntohl(addr) & HOST_HASH_KEY_MASK; ++ ++ rtdm_lock_get_irqsave(&host_table_lock, context); ++ ++ rt = host_hash_tbl[key]; ++ while (rt != NULL) { ++ if ((rt->dest_host.ip == addr) && ++ (!rtdev || ++ rt->dest_host.rtdev->local_ip == rtdev->local_ip)) { ++ memcpy(dev_addr, rt->dest_host.dev_addr, ++ rt->dest_host.rtdev->addr_len); ++ strncpy(if_name, rt->dest_host.rtdev->name, IFNAMSIZ); ++ ++ rtdm_lock_put_irqrestore(&host_table_lock, context); ++ return 0; ++ } ++ ++ rt = rt->next; ++ } ++ ++ rtdm_lock_put_irqrestore(&host_table_lock, context); ++ ++ return -ENOENT; ++} ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_RTIPV4_NETROUTING ++/*** ++ * rt_alloc_net_route - allocates new network route ++ */ ++static inline struct net_route *rt_alloc_net_route(void) ++{ ++ rtdm_lockctx_t context; ++ struct net_route *rt; ++ ++ rtdm_lock_get_irqsave(&net_table_lock, context); ++ ++ if ((rt = free_net_route) != NULL) { ++ free_net_route = rt->next; ++ allocated_net_routes++; ++ } ++ ++ rtdm_lock_put_irqrestore(&net_table_lock, context); ++ ++ return rt; ++} ++ ++/*** ++ * rt_free_net_route - releases network route ++ * ++ * Note: must be called with net_table_lock held ++ */ ++static inline void rt_free_net_route(struct net_route *rt) ++{ ++ rt->next = free_net_route; ++ free_net_route = rt; ++ allocated_host_routes--; ++} ++ ++/*** ++ * rt_ip_route_add_net: add or update network route ++ */ ++int rt_ip_route_add_net(u32 addr, u32 mask, u32 gw_addr) ++{ ++ rtdm_lockctx_t context; ++ struct net_route *new_route; ++ struct net_route *rt; ++ struct net_route **last_ptr; ++ unsigned int key; ++ u32 shifted_mask; ++ ++ addr &= mask; ++ ++ if ((new_route = rt_alloc_net_route()) != NULL) { ++ new_route->dest_net_ip = addr; ++ new_route->dest_net_mask = mask; ++ new_route->gw_ip = gw_addr; ++ } ++ ++ shifted_mask = NET_HASH_KEY_MASK << net_hash_key_shift; ++ if ((mask & shifted_mask) == shifted_mask) ++ key = (ntohl(addr) >> net_hash_key_shift) & NET_HASH_KEY_MASK; ++ else ++ key = NET_HASH_TBL_SIZE; ++ last_ptr = &net_hash_tbl[key]; ++ ++ rtdm_lock_get_irqsave(&net_table_lock, context); ++ ++ xnvfile_touch_tag(&net_route_tag); ++ ++ rt = net_hash_tbl[key]; ++ while (rt != NULL) { ++ if ((rt->dest_net_ip == addr) && (rt->dest_net_mask == mask)) { ++ rt->gw_ip = gw_addr; ++ ++ if (new_route) ++ rt_free_net_route(new_route); ++ ++ rtdm_lock_put_irqrestore(&net_table_lock, context); ++ ++ return 0; ++ } ++ ++ last_ptr = &rt->next; ++ rt = rt->next; ++ } ++ ++ if (new_route) { ++ new_route->next = *last_ptr; ++ *last_ptr = new_route; ++ ++ rtdm_lock_put_irqrestore(&net_table_lock, context); ++ ++ return 0; ++ } else { ++ rtdm_lock_put_irqrestore(&net_table_lock, context); ++ ++ /*ERRMSG*/ rtdm_printk( ++ "RTnet: no more network routes available\n"); ++ return -ENOBUFS; ++ } ++} ++ ++/*** ++ * rt_ip_route_del_net - deletes specified network route ++ */ ++int rt_ip_route_del_net(u32 addr, u32 mask) ++{ ++ rtdm_lockctx_t context; ++ struct net_route *rt; ++ struct net_route **last_ptr; ++ unsigned int key; ++ u32 shifted_mask; ++ ++ addr &= mask; ++ ++ shifted_mask = NET_HASH_KEY_MASK << net_hash_key_shift; ++ if ((mask & shifted_mask) == shifted_mask) ++ key = (ntohl(addr) >> net_hash_key_shift) & NET_HASH_KEY_MASK; ++ else ++ key = NET_HASH_TBL_SIZE; ++ last_ptr = &net_hash_tbl[key]; ++ ++ rtdm_lock_get_irqsave(&net_table_lock, context); ++ ++ rt = net_hash_tbl[key]; ++ while (rt != NULL) { ++ if ((rt->dest_net_ip == addr) && (rt->dest_net_mask == mask)) { ++ *last_ptr = rt->next; ++ ++ rt_free_net_route(rt); ++ ++ xnvfile_touch_tag(&net_route_tag); ++ ++ rtdm_lock_put_irqrestore(&net_table_lock, context); ++ ++ return 0; ++ } ++ ++ last_ptr = &rt->next; ++ rt = rt->next; ++ } ++ ++ rtdm_lock_put_irqrestore(&net_table_lock, context); ++ ++ return -ENOENT; ++} ++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4_NETROUTING */ ++ ++/*** ++ * rt_ip_route_output - looks up output route ++ * ++ * Note: increments refcount on returned rtdev in rt_buf ++ */ ++int rt_ip_route_output(struct dest_route *rt_buf, u32 daddr, u32 saddr) ++{ ++ rtdm_lockctx_t context; ++ struct host_route *host_rt; ++ unsigned int key; ++ ++#ifndef CONFIG_XENO_DRIVERS_NET_RTIPV4_NETROUTING ++#define DADDR daddr ++#else ++#define DADDR real_daddr ++ ++ struct net_route *net_rt; ++ int lookup_gw = 1; ++ u32 real_daddr = daddr; ++ ++restart: ++#endif /* !CONFIG_XENO_DRIVERS_NET_RTIPV4_NETROUTING */ ++ ++ key = ntohl(daddr) & HOST_HASH_KEY_MASK; ++ ++ rtdm_lock_get_irqsave(&host_table_lock, context); ++ ++ host_rt = host_hash_tbl[key]; ++ if (likely(saddr == INADDR_ANY)) ++ while (host_rt != NULL) { ++ if (host_rt->dest_host.ip == daddr) { ++ host_route_found: ++ if (!rtdev_reference( ++ host_rt->dest_host.rtdev)) { ++ rtdm_lock_put_irqrestore( ++ &host_table_lock, context); ++ goto next; ++ } ++ ++ memcpy(rt_buf->dev_addr, ++ &host_rt->dest_host.dev_addr, ++ sizeof(rt_buf->dev_addr)); ++ rt_buf->rtdev = host_rt->dest_host.rtdev; ++ ++ rtdm_lock_put_irqrestore(&host_table_lock, ++ context); ++ ++ rt_buf->ip = DADDR; ++ ++ return 0; ++ } ++ next: ++ host_rt = host_rt->next; ++ } ++ else ++ while (host_rt != NULL) { ++ if ((host_rt->dest_host.ip == daddr) && ++ (host_rt->dest_host.rtdev->local_ip == saddr)) ++ goto host_route_found; ++ host_rt = host_rt->next; ++ } ++ ++ rtdm_lock_put_irqrestore(&host_table_lock, context); ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_RTIPV4_NETROUTING ++ if (lookup_gw) { ++ lookup_gw = 0; ++ key = (ntohl(daddr) >> net_hash_key_shift) & NET_HASH_KEY_MASK; ++ ++ rtdm_lock_get_irqsave(&net_table_lock, context); ++ ++ net_rt = net_hash_tbl[key]; ++ while (net_rt != NULL) { ++ if (net_rt->dest_net_ip == ++ (daddr & net_rt->dest_net_mask)) { ++ daddr = net_rt->gw_ip; ++ ++ rtdm_lock_put_irqrestore(&net_table_lock, ++ context); ++ ++ /* start over, now using the gateway ip as destination */ ++ goto restart; ++ } ++ ++ net_rt = net_rt->next; ++ } ++ ++ rtdm_lock_put_irqrestore(&net_table_lock, context); ++ ++ /* last try: no hash key */ ++ rtdm_lock_get_irqsave(&net_table_lock, context); ++ ++ net_rt = net_hash_tbl[NET_HASH_TBL_SIZE]; ++ while (net_rt != NULL) { ++ if (net_rt->dest_net_ip == ++ (daddr & net_rt->dest_net_mask)) { ++ daddr = net_rt->gw_ip; ++ ++ rtdm_lock_put_irqrestore(&net_table_lock, ++ context); ++ ++ /* start over, now using the gateway ip as destination */ ++ goto restart; ++ } ++ ++ net_rt = net_rt->next; ++ } ++ ++ rtdm_lock_put_irqrestore(&net_table_lock, context); ++ } ++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4_NETROUTING */ ++ ++ /*ERRMSG*/ rtdm_printk("RTnet: host %u.%u.%u.%u unreachable\n", ++ NIPQUAD(daddr)); ++ return -EHOSTUNREACH; ++} ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_RTIPV4_ROUTER ++int rt_ip_route_forward(struct rtskb *rtskb, u32 daddr) ++{ ++ struct rtnet_device *rtdev = rtskb->rtdev; ++ struct dest_route dest; ++ ++ if (likely((daddr == rtdev->local_ip) || ++ (daddr == rtdev->broadcast_ip) || ++ (rtdev->flags & IFF_LOOPBACK))) ++ return 0; ++ ++ if (rtskb_acquire(rtskb, &global_pool) != 0) { ++ /*ERRMSG*/ rtdm_printk( ++ "RTnet: router overloaded, dropping packet\n"); ++ goto error; ++ } ++ ++ if (rt_ip_route_output(&dest, daddr, INADDR_ANY) < 0) { ++ /*ERRMSG*/ rtdm_printk( ++ "RTnet: unable to forward packet from %u.%u.%u.%u\n", ++ NIPQUAD(rtskb->nh.iph->saddr)); ++ goto error; ++ } ++ ++ rtskb->rtdev = dest.rtdev; ++ rtskb->priority = ROUTER_FORWARD_PRIO; ++ ++ if ((dest.rtdev->hard_header) && ++ (dest.rtdev->hard_header(rtskb, dest.rtdev, ETH_P_IP, dest.dev_addr, ++ dest.rtdev->dev_addr, rtskb->len) < 0)) ++ goto error; ++ ++ rtdev_xmit(rtskb); ++ ++ return 1; ++ ++error: ++ kfree_rtskb(rtskb); ++ return 1; ++} ++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4_ROUTER */ ++ ++/*** ++ * rt_ip_routing_init: initialize ++ */ ++int __init rt_ip_routing_init(void) ++{ ++ int i; ++ ++ for (i = 0; i < CONFIG_XENO_DRIVERS_NET_RTIPV4_HOST_ROUTES - 2; i++) ++ host_routes[i].next = &host_routes[i + 1]; ++ free_host_route = &host_routes[0]; ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_RTIPV4_NETROUTING ++ for (i = 0; i < CONFIG_XENO_DRIVERS_NET_RTIPV4_NET_ROUTES - 2; i++) ++ net_routes[i].next = &net_routes[i + 1]; ++ free_net_route = &net_routes[0]; ++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4_NETROUTING */ ++ ++#ifdef CONFIG_XENO_OPT_VFILE ++ return rt_route_proc_register(); ++#else /* !CONFIG_XENO_OPT_VFILE */ ++ return 0; ++#endif /* CONFIG_XENO_OPT_VFILE */ ++} ++ ++/*** ++ * rt_ip_routing_realease ++ */ ++void rt_ip_routing_release(void) ++{ ++#ifdef CONFIG_XENO_OPT_VFILE ++ rt_route_proc_unregister(); ++#endif /* CONFIG_XENO_OPT_VFILE */ ++} ++ ++EXPORT_SYMBOL_GPL(rt_ip_route_add_host); ++EXPORT_SYMBOL_GPL(rt_ip_route_del_host); ++EXPORT_SYMBOL_GPL(rt_ip_route_del_all); ++EXPORT_SYMBOL_GPL(rt_ip_route_output); +--- linux/drivers/xenomai/net/stack/ipv4/icmp.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/ipv4/icmp.c 2021-04-07 16:01:27.082634374 +0800 +@@ -0,0 +1,497 @@ ++/*** ++ * ++ * ipv4/icmp.c ++ * ++ * rtnet - real-time networking subsystem ++ * Copyright (C) 1999, 2000 Zentropic Computing, LLC ++ * 2002 Ulrich Marx ++ * 2002 Vinay Sridhara ++ * 2003-2005 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/*** ++ * Structure for sending the icmp packets ++ */ ++struct icmp_bxm { ++ unsigned int csum; ++ size_t head_len; ++ size_t data_len; ++ off_t offset; ++ struct { ++ struct icmphdr icmph; ++ nanosecs_abs_t timestamp; ++ } head; ++ union { ++ struct rtskb *skb; ++ void *buf; ++ } data; ++}; ++ ++struct rt_icmp_control { ++ void (*handler)(struct rtskb *skb); ++ short error; /* This ICMP is classed as an error message */ ++}; ++ ++static DEFINE_RTDM_LOCK(echo_calls_lock); ++LIST_HEAD(echo_calls); ++ ++static struct { ++ /* ++ * Scratch pad, provided so that rt_socket_dereference(&icmp_socket); ++ * remains legal. ++ */ ++ struct rtdm_dev_context dummy; ++ ++ /* ++ * Socket for icmp replies ++ * It is not part of the socket pool. It may furthermore be used ++ * concurrently by multiple tasks because all fields are static excect ++ * skb_pool, but that one is spinlock protected. ++ */ ++ struct rtsocket socket; ++} icmp_socket_container; ++ ++#define icmp_fd (&icmp_socket_container.dummy.fd) ++#define icmp_socket ((struct rtsocket *)rtdm_fd_to_private(icmp_fd)) ++ ++void rt_icmp_queue_echo_request(struct rt_proc_call *call) ++{ ++ rtdm_lockctx_t context; ++ ++ rtdm_lock_get_irqsave(&echo_calls_lock, context); ++ list_add_tail(&call->list_entry, &echo_calls); ++ rtdm_lock_put_irqrestore(&echo_calls_lock, context); ++} ++ ++void rt_icmp_dequeue_echo_request(struct rt_proc_call *call) ++{ ++ rtdm_lockctx_t context; ++ ++ rtdm_lock_get_irqsave(&echo_calls_lock, context); ++ list_del(&call->list_entry); ++ rtdm_lock_put_irqrestore(&echo_calls_lock, context); ++} ++ ++void rt_icmp_cleanup_echo_requests(void) ++{ ++ rtdm_lockctx_t context; ++ struct list_head *entry; ++ struct list_head *next; ++ ++ rtdm_lock_get_irqsave(&echo_calls_lock, context); ++ entry = echo_calls.next; ++ INIT_LIST_HEAD(&echo_calls); ++ rtdm_lock_put_irqrestore(&echo_calls_lock, context); ++ ++ while (entry != &echo_calls) { ++ next = entry->next; ++ rtpc_complete_call_nrt((struct rt_proc_call *)entry, -EINTR); ++ entry = next; ++ } ++ ++ /* purge any pending ICMP fragments */ ++ rt_ip_frag_invalidate_socket(icmp_socket); ++} ++ ++/*** ++ * rt_icmp_discard - dummy function ++ */ ++static void rt_icmp_discard(struct rtskb *skb) ++{ ++} ++ ++static int rt_icmp_glue_reply_bits(const void *p, unsigned char *to, ++ unsigned int offset, unsigned int fraglen) ++{ ++ struct icmp_bxm *icmp_param = (struct icmp_bxm *)p; ++ struct icmphdr *icmph; ++ unsigned long csum; ++ ++ /* TODO: add support for fragmented ICMP packets */ ++ if (offset != 0) ++ return -EMSGSIZE; ++ ++ csum = csum_partial_copy_nocheck((void *)&icmp_param->head, to, ++ icmp_param->head_len, ++ icmp_param->csum); ++ ++ csum = rtskb_copy_and_csum_bits(icmp_param->data.skb, ++ icmp_param->offset, ++ to + icmp_param->head_len, ++ fraglen - icmp_param->head_len, csum); ++ ++ icmph = (struct icmphdr *)to; ++ ++ icmph->checksum = csum_fold(csum); ++ ++ return 0; ++} ++ ++/*** ++ * common reply function ++ */ ++static void rt_icmp_send_reply(struct icmp_bxm *icmp_param, struct rtskb *skb) ++{ ++ struct dest_route rt; ++ int err; ++ ++ icmp_param->head.icmph.checksum = 0; ++ icmp_param->csum = 0; ++ ++ /* route back to the source address via the incoming device */ ++ if (rt_ip_route_output(&rt, skb->nh.iph->saddr, skb->rtdev->local_ip) != ++ 0) ++ return; ++ ++ rt_socket_reference(icmp_socket); ++ err = rt_ip_build_xmit(icmp_socket, rt_icmp_glue_reply_bits, icmp_param, ++ sizeof(struct icmphdr) + icmp_param->data_len, ++ &rt, MSG_DONTWAIT); ++ if (err) ++ rt_socket_dereference(icmp_socket); ++ ++ rtdev_dereference(rt.rtdev); ++ ++ RTNET_ASSERT(err == 0, ++ rtdm_printk("RTnet: %s() error in xmit\n", __FUNCTION__);); ++ (void)err; ++} ++ ++/*** ++ * rt_icmp_echo - handles echo replies on our previously sent requests ++ */ ++static void rt_icmp_echo_reply(struct rtskb *skb) ++{ ++ rtdm_lockctx_t context; ++ struct rt_proc_call *call; ++ struct ipv4_cmd *cmd; ++ ++ rtdm_lock_get_irqsave(&echo_calls_lock, context); ++ ++ if (!list_empty(&echo_calls)) { ++ call = (struct rt_proc_call *)echo_calls.next; ++ list_del(&call->list_entry); ++ ++ rtdm_lock_put_irqrestore(&echo_calls_lock, context); ++ } else { ++ rtdm_lock_put_irqrestore(&echo_calls_lock, context); ++ return; ++ } ++ ++ cmd = rtpc_get_priv(call, struct ipv4_cmd); ++ ++ cmd->args.ping.ip_addr = skb->nh.iph->saddr; ++ cmd->args.ping.rtt = 0; ++ ++ if ((skb->h.icmph->un.echo.id == cmd->args.ping.id) && ++ (ntohs(skb->h.icmph->un.echo.sequence) == ++ cmd->args.ping.sequence) && ++ skb->len == cmd->args.ping.msg_size) { ++ if (skb->len >= sizeof(nanosecs_abs_t)) ++ cmd->args.ping.rtt = rtdm_clock_read() - ++ *((nanosecs_abs_t *)skb->data); ++ rtpc_complete_call(call, sizeof(struct icmphdr) + skb->len); ++ } else ++ rtpc_complete_call(call, 0); ++} ++ ++/*** ++ * rt_icmp_echo_request - handles echo requests sent by other stations ++ */ ++static void rt_icmp_echo_request(struct rtskb *skb) ++{ ++ struct icmp_bxm icmp_param; ++ ++ icmp_param.head.icmph = *skb->h.icmph; ++ icmp_param.head.icmph.type = ICMP_ECHOREPLY; ++ icmp_param.data.skb = skb; ++ icmp_param.offset = 0; ++ icmp_param.data_len = skb->len; ++ icmp_param.head_len = sizeof(struct icmphdr); ++ ++ rt_icmp_send_reply(&icmp_param, skb); ++ ++ return; ++} ++ ++static int rt_icmp_glue_request_bits(const void *p, unsigned char *to, ++ unsigned int offset, unsigned int fraglen) ++{ ++ struct icmp_bxm *icmp_param = (struct icmp_bxm *)p; ++ struct icmphdr *icmph; ++ unsigned long csum; ++ ++ /* TODO: add support for fragmented ICMP packets */ ++ RTNET_ASSERT( ++ offset == 0, ++ rtdm_printk("RTnet: %s() does not support fragmentation.\n", ++ __FUNCTION__); ++ return -1;); ++ ++ csum = csum_partial_copy_nocheck((void *)&icmp_param->head, to, ++ icmp_param->head_len, ++ icmp_param->csum); ++ ++ csum = csum_partial_copy_nocheck(icmp_param->data.buf, ++ to + icmp_param->head_len, ++ fraglen - icmp_param->head_len, csum); ++ ++ icmph = (struct icmphdr *)to; ++ ++ icmph->checksum = csum_fold(csum); ++ ++ return 0; ++} ++ ++/*** ++ * common request function ++ */ ++static int rt_icmp_send_request(u32 daddr, struct icmp_bxm *icmp_param) ++{ ++ struct dest_route rt; ++ unsigned int size; ++ int err; ++ ++ icmp_param->head.icmph.checksum = 0; ++ icmp_param->csum = 0; ++ ++ if ((err = rt_ip_route_output(&rt, daddr, INADDR_ANY)) < 0) ++ return err; ++ ++ /* TODO: add support for fragmented ICMP packets */ ++ size = icmp_param->head_len + icmp_param->data_len; ++ if (size + 20 /* ip header */ > ++ rt.rtdev->get_mtu(rt.rtdev, RT_ICMP_PRIO)) ++ err = -EMSGSIZE; ++ else { ++ rt_socket_reference(icmp_socket); ++ err = rt_ip_build_xmit(icmp_socket, rt_icmp_glue_request_bits, ++ icmp_param, size, &rt, MSG_DONTWAIT); ++ if (err) ++ rt_socket_dereference(icmp_socket); ++ } ++ ++ rtdev_dereference(rt.rtdev); ++ ++ return err; ++} ++ ++/*** ++ * rt_icmp_echo_request - sends an echo request to the specified address ++ */ ++int rt_icmp_send_echo(u32 daddr, u16 id, u16 sequence, size_t msg_size) ++{ ++ struct icmp_bxm icmp_param; ++ unsigned char pattern_buf[msg_size]; ++ off_t pos; ++ ++ /* first purge any potentially pending ICMP fragments */ ++ rt_ip_frag_invalidate_socket(icmp_socket); ++ ++ icmp_param.head.icmph.type = ICMP_ECHO; ++ icmp_param.head.icmph.code = 0; ++ icmp_param.head.icmph.un.echo.id = id; ++ icmp_param.head.icmph.un.echo.sequence = htons(sequence); ++ icmp_param.offset = 0; ++ ++ if (msg_size >= sizeof(nanosecs_abs_t)) { ++ icmp_param.head_len = ++ sizeof(struct icmphdr) + sizeof(nanosecs_abs_t); ++ icmp_param.data_len = msg_size - sizeof(nanosecs_abs_t); ++ ++ for (pos = 0; pos < icmp_param.data_len; pos++) ++ pattern_buf[pos] = pos & 0xFF; ++ ++ icmp_param.head.timestamp = rtdm_clock_read(); ++ } else { ++ icmp_param.head_len = sizeof(struct icmphdr) + msg_size; ++ icmp_param.data_len = 0; ++ ++ for (pos = 0; pos < msg_size; pos++) ++ pattern_buf[pos] = pos & 0xFF; ++ } ++ icmp_param.data.buf = pattern_buf; ++ ++ return rt_icmp_send_request(daddr, &icmp_param); ++} ++ ++/*** ++ * rt_icmp_socket ++ */ ++int rt_icmp_socket(struct rtdm_fd *fd) ++{ ++ /* we don't support user-created ICMP sockets */ ++ return -ENOPROTOOPT; ++} ++ ++static struct rt_icmp_control rt_icmp_pointers[NR_ICMP_TYPES + 1] = { ++ /* ECHO REPLY (0) */ ++ { rt_icmp_echo_reply, 0 }, ++ { rt_icmp_discard, 1 }, ++ { rt_icmp_discard, 1 }, ++ ++ /* DEST UNREACH (3) */ ++ { rt_icmp_discard, 1 }, ++ ++ /* SOURCE QUENCH (4) */ ++ { rt_icmp_discard, 1 }, ++ ++ /* REDIRECT (5) */ ++ { rt_icmp_discard, 1 }, ++ { rt_icmp_discard, 1 }, ++ { rt_icmp_discard, 1 }, ++ ++ /* ECHO (8) */ ++ { rt_icmp_echo_request, 0 }, ++ { rt_icmp_discard, 1 }, ++ { rt_icmp_discard, 1 }, ++ ++ /* TIME EXCEEDED (11) */ ++ { rt_icmp_discard, 1 }, ++ ++ /* PARAMETER PROBLEM (12) */ ++ { rt_icmp_discard, 1 }, ++ ++ /* TIMESTAMP (13) */ ++ { rt_icmp_discard, 0 }, ++ ++ /* TIMESTAMP REPLY (14) */ ++ { rt_icmp_discard, 0 }, ++ ++ /* INFO (15) */ ++ { rt_icmp_discard, 0 }, ++ ++ /* INFO REPLY (16) */ ++ { rt_icmp_discard, 0 }, ++ ++ /* ADDR MASK (17) */ ++ { rt_icmp_discard, 0 }, ++ ++ /* ADDR MASK REPLY (18) */ ++ { rt_icmp_discard, 0 } ++}; ++ ++/*** ++ * rt_icmp_dest_pool ++ */ ++struct rtsocket *rt_icmp_dest_socket(struct rtskb *skb) ++{ ++ rt_socket_reference(icmp_socket); ++ return icmp_socket; ++} ++ ++/*** ++ * rt_icmp_rcv ++ */ ++void rt_icmp_rcv(struct rtskb *skb) ++{ ++ struct icmphdr *icmpHdr = skb->h.icmph; ++ unsigned int length = skb->len; ++ ++ /* check header sanity and don't accept fragmented packets */ ++ if ((length < sizeof(struct icmphdr)) || (skb->next != NULL)) { ++ rtdm_printk("RTnet: improper length in icmp packet\n"); ++ goto cleanup; ++ } ++ ++ if (ip_compute_csum((unsigned char *)icmpHdr, length)) { ++ rtdm_printk("RTnet: invalid checksum in icmp packet %d\n", ++ length); ++ goto cleanup; ++ } ++ ++ if (!rtskb_pull(skb, sizeof(struct icmphdr))) { ++ rtdm_printk("RTnet: pull failed %p\n", (skb->sk)); ++ goto cleanup; ++ } ++ ++ if (icmpHdr->type > NR_ICMP_TYPES) { ++ rtdm_printk("RTnet: invalid icmp type\n"); ++ goto cleanup; ++ } ++ ++ /* sane packet, process it */ ++ rt_icmp_pointers[icmpHdr->type].handler(skb); ++ ++cleanup: ++ kfree_rtskb(skb); ++} ++ ++/*** ++ * rt_icmp_rcv_err ++ */ ++void rt_icmp_rcv_err(struct rtskb *skb) ++{ ++ rtdm_printk("RTnet: rt_icmp_rcv err\n"); ++} ++ ++/*** ++ * ICMP-Initialisation ++ */ ++static struct rtinet_protocol icmp_protocol = { .protocol = IPPROTO_ICMP, ++ .dest_socket = ++ &rt_icmp_dest_socket, ++ .rcv_handler = &rt_icmp_rcv, ++ .err_handler = &rt_icmp_rcv_err, ++ .init_socket = ++ &rt_icmp_socket }; ++ ++/*** ++ * rt_icmp_init ++ */ ++void __init rt_icmp_init(void) ++{ ++ int skbs; ++ ++ skbs = rt_bare_socket_init(icmp_fd, IPPROTO_ICMP, RT_ICMP_PRIO, ++ ICMP_REPLY_POOL_SIZE); ++ BUG_ON(skbs < 0); ++ if (skbs < ICMP_REPLY_POOL_SIZE) ++ printk("RTnet: allocated only %d icmp rtskbs\n", skbs); ++ ++ icmp_socket->prot.inet.tos = 0; ++ icmp_fd->refs = 1; ++ ++ rt_inet_add_protocol(&icmp_protocol); ++} ++ ++/*** ++ * rt_icmp_release ++ */ ++void rt_icmp_release(void) ++{ ++ rt_icmp_cleanup_echo_requests(); ++ rt_inet_del_protocol(&icmp_protocol); ++ rt_bare_socket_cleanup(icmp_socket); ++} +--- linux/drivers/xenomai/net/stack/ipv4/af_inet.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/ipv4/af_inet.c 2021-04-07 16:01:27.077634381 +0800 +@@ -0,0 +1,340 @@ ++/*** ++ * ++ * ipv4/af_inet.c ++ * ++ * rtnet - real-time networking subsystem ++ * Copyright (C) 1999, 2000 Zentropic Computing, LLC ++ * 2002 Ulrich Marx ++ * 2004, 2005 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++MODULE_LICENSE("GPL"); ++ ++struct route_solicit_params { ++ struct rtnet_device *rtdev; ++ __u32 ip_addr; ++}; ++ ++#ifdef CONFIG_XENO_OPT_VFILE ++struct xnvfile_directory ipv4_proc_root; ++EXPORT_SYMBOL_GPL(ipv4_proc_root); ++#endif ++ ++static int route_solicit_handler(struct rt_proc_call *call) ++{ ++ struct route_solicit_params *param; ++ struct rtnet_device *rtdev; ++ ++ param = rtpc_get_priv(call, struct route_solicit_params); ++ rtdev = param->rtdev; ++ ++ if ((rtdev->flags & IFF_UP) == 0) ++ return -ENODEV; ++ ++ rt_arp_solicit(rtdev, param->ip_addr); ++ ++ return 0; ++} ++ ++static void cleanup_route_solicit(void *priv_data) ++{ ++ struct route_solicit_params *param; ++ ++ param = (struct route_solicit_params *)priv_data; ++ rtdev_dereference(param->rtdev); ++} ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_RTIPV4_ICMP ++static int ping_handler(struct rt_proc_call *call) ++{ ++ struct ipv4_cmd *cmd; ++ int err; ++ ++ cmd = rtpc_get_priv(call, struct ipv4_cmd); ++ ++ rt_icmp_queue_echo_request(call); ++ ++ err = rt_icmp_send_echo(cmd->args.ping.ip_addr, cmd->args.ping.id, ++ cmd->args.ping.sequence, ++ cmd->args.ping.msg_size); ++ if (err < 0) { ++ rt_icmp_dequeue_echo_request(call); ++ return err; ++ } ++ ++ return -CALL_PENDING; ++} ++ ++static void ping_complete_handler(struct rt_proc_call *call, void *priv_data) ++{ ++ struct ipv4_cmd *cmd; ++ struct ipv4_cmd *usr_cmd = (struct ipv4_cmd *)priv_data; ++ ++ if (rtpc_get_result(call) < 0) ++ return; ++ ++ cmd = rtpc_get_priv(call, struct ipv4_cmd); ++ usr_cmd->args.ping.ip_addr = cmd->args.ping.ip_addr; ++ usr_cmd->args.ping.rtt = cmd->args.ping.rtt; ++} ++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4_ICMP */ ++ ++static int ipv4_ioctl(struct rtnet_device *rtdev, unsigned int request, ++ unsigned long arg) ++{ ++ struct ipv4_cmd cmd; ++ struct route_solicit_params params; ++ int ret; ++ ++ ret = copy_from_user(&cmd, (void *)arg, sizeof(cmd)); ++ if (ret != 0) ++ return -EFAULT; ++ ++ switch (request) { ++ case IOC_RT_HOST_ROUTE_ADD: ++ if (mutex_lock_interruptible(&rtdev->nrt_lock)) ++ return -ERESTARTSYS; ++ ++ ret = rt_ip_route_add_host(cmd.args.addhost.ip_addr, ++ cmd.args.addhost.dev_addr, rtdev); ++ ++ mutex_unlock(&rtdev->nrt_lock); ++ break; ++ ++ case IOC_RT_HOST_ROUTE_SOLICIT: ++ if (mutex_lock_interruptible(&rtdev->nrt_lock)) ++ return -ERESTARTSYS; ++ ++ if (!rtdev_reference(rtdev)) { ++ mutex_unlock(&rtdev->nrt_lock); ++ return -EIDRM; ++ } ++ ++ params.rtdev = rtdev; ++ params.ip_addr = cmd.args.solicit.ip_addr; ++ ++ /* We need the rtpc wrapping because rt_arp_solicit can block on a ++ * real-time lock in the NIC's xmit routine. */ ++ ret = rtpc_dispatch_call(route_solicit_handler, 0, ¶ms, ++ sizeof(params), NULL, ++ cleanup_route_solicit); ++ ++ mutex_unlock(&rtdev->nrt_lock); ++ break; ++ ++ case IOC_RT_HOST_ROUTE_DELETE: ++ case IOC_RT_HOST_ROUTE_DELETE_DEV: ++ ret = rt_ip_route_del_host(cmd.args.delhost.ip_addr, rtdev); ++ break; ++ ++ case IOC_RT_HOST_ROUTE_GET: ++ case IOC_RT_HOST_ROUTE_GET_DEV: ++ ret = rt_ip_route_get_host(cmd.args.gethost.ip_addr, ++ cmd.head.if_name, ++ cmd.args.gethost.dev_addr, rtdev); ++ if (ret >= 0) { ++ if (copy_to_user((void *)arg, &cmd, sizeof(cmd)) != 0) ++ ret = -EFAULT; ++ } ++ break; ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_RTIPV4_NETROUTING ++ case IOC_RT_NET_ROUTE_ADD: ++ ret = rt_ip_route_add_net(cmd.args.addnet.net_addr, ++ cmd.args.addnet.net_mask, ++ cmd.args.addnet.gw_addr); ++ break; ++ ++ case IOC_RT_NET_ROUTE_DELETE: ++ ret = rt_ip_route_del_net(cmd.args.delnet.net_addr, ++ cmd.args.delnet.net_mask); ++ break; ++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4_NETROUTING */ ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_RTIPV4_ICMP ++ case IOC_RT_PING: ++ ret = rtpc_dispatch_call(ping_handler, cmd.args.ping.timeout, ++ &cmd, sizeof(cmd), ++ ping_complete_handler, NULL); ++ if (ret >= 0) { ++ if (copy_to_user((void *)arg, &cmd, sizeof(cmd)) != 0) ++ ret = -EFAULT; ++ } ++ if (ret < 0) ++ rt_icmp_cleanup_echo_requests(); ++ break; ++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4_ICMP */ ++ ++ default: ++ ret = -ENOTTY; ++ } ++ ++ return ret; ++} ++ ++unsigned long rt_inet_aton(const char *ip) ++{ ++ int p, n, c; ++ union { ++ unsigned long l; ++ char c[4]; ++ } u; ++ p = n = 0; ++ while ((c = *ip++)) { ++ if (c != '.') { ++ n = n * 10 + c - '0'; ++ } else { ++ if (n > 0xFF) { ++ return 0; ++ } ++ u.c[p++] = n; ++ n = 0; ++ } ++ } ++ u.c[3] = n; ++ return u.l; ++} ++ ++static void rt_ip_ifup(struct rtnet_device *rtdev, ++ struct rtnet_core_cmd *up_cmd) ++{ ++ struct rtnet_device *tmp; ++ int i; ++ ++ rt_ip_route_del_all(rtdev); /* cleanup routing table */ ++ ++ if (up_cmd->args.up.ip_addr != 0xFFFFFFFF) { ++ rtdev->local_ip = up_cmd->args.up.ip_addr; ++ rtdev->broadcast_ip = up_cmd->args.up.broadcast_ip; ++ } ++ ++ if (rtdev->local_ip != 0) { ++ if (rtdev->flags & IFF_LOOPBACK) { ++ for (i = 0; i < MAX_RT_DEVICES; i++) ++ if ((tmp = rtdev_get_by_index(i)) != NULL) { ++ rt_ip_route_add_host(tmp->local_ip, ++ rtdev->dev_addr, ++ rtdev); ++ rtdev_dereference(tmp); ++ } ++ } else if ((tmp = rtdev_get_loopback()) != NULL) { ++ rt_ip_route_add_host(rtdev->local_ip, tmp->dev_addr, ++ tmp); ++ rtdev_dereference(tmp); ++ } ++ ++ if (rtdev->flags & IFF_BROADCAST) ++ rt_ip_route_add_host(up_cmd->args.up.broadcast_ip, ++ rtdev->broadcast, rtdev); ++ } ++} ++ ++static void rt_ip_ifdown(struct rtnet_device *rtdev) ++{ ++ rt_ip_route_del_all(rtdev); ++} ++ ++static struct rtdev_event_hook rtdev_hook = { .unregister_device = rt_ip_ifdown, ++ .ifup = rt_ip_ifup, ++ .ifdown = rt_ip_ifdown }; ++ ++static struct rtnet_ioctls ipv4_ioctls = { .service_name = "IPv4", ++ .ioctl_type = RTNET_IOC_TYPE_IPV4, ++ .handler = ipv4_ioctl }; ++ ++static int __init rt_ipv4_proto_init(void) ++{ ++ int i; ++ int result; ++ ++ /* Network-Layer */ ++ rt_ip_init(); ++ rt_arp_init(); ++ ++ /* Transport-Layer */ ++ for (i = 0; i < MAX_RT_INET_PROTOCOLS; i++) ++ rt_inet_protocols[i] = NULL; ++ ++ rt_icmp_init(); ++ ++#ifdef CONFIG_XENO_OPT_VFILE ++ result = xnvfile_init_dir("ipv4", &ipv4_proc_root, &rtnet_proc_root); ++ if (result < 0) ++ goto err1; ++#endif /* CONFIG_XENO_OPT_VFILE */ ++ ++ if ((result = rt_ip_routing_init()) < 0) ++ goto err2; ++ if ((result = rtnet_register_ioctls(&ipv4_ioctls)) < 0) ++ goto err3; ++ ++ rtdev_add_event_hook(&rtdev_hook); ++ ++ return 0; ++ ++err3: ++ rt_ip_routing_release(); ++ ++err2: ++#ifdef CONFIG_XENO_OPT_VFILE ++ xnvfile_destroy_dir(&ipv4_proc_root); ++err1: ++#endif /* CONFIG_XENO_OPT_VFILE */ ++ ++ rt_icmp_release(); ++ rt_arp_release(); ++ rt_ip_release(); ++ ++ return result; ++} ++ ++static void __exit rt_ipv4_proto_release(void) ++{ ++ rt_ip_release(); ++ ++ rtdev_del_event_hook(&rtdev_hook); ++ rtnet_unregister_ioctls(&ipv4_ioctls); ++ rt_ip_routing_release(); ++ ++#ifdef CONFIG_XENO_OPT_VFILE ++ xnvfile_destroy_dir(&ipv4_proc_root); ++#endif ++ ++ /* Transport-Layer */ ++ rt_icmp_release(); ++ ++ /* Network-Layer */ ++ rt_arp_release(); ++} ++ ++module_init(rt_ipv4_proto_init); ++module_exit(rt_ipv4_proto_release); ++ ++EXPORT_SYMBOL_GPL(rt_inet_aton); +--- linux/drivers/xenomai/net/stack/ipv4/tcp/Makefile 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/ipv4/tcp/Makefile 2021-04-07 16:01:27.073634386 +0800 +@@ -0,0 +1,7 @@ ++ccflags-y += -Idrivers/xenomai/net/stack/include ++ ++obj-$(CONFIG_XENO_DRIVERS_NET_RTIPV4_TCP) += rttcp.o ++ ++rttcp-y := \ ++ tcp.o \ ++ timerwheel.o +--- linux/drivers/xenomai/net/stack/ipv4/tcp/timerwheel.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/ipv4/tcp/timerwheel.h 2021-04-07 16:01:27.068634393 +0800 +@@ -0,0 +1,62 @@ ++/*** ++ * ++ * ipv4/tcp/timerwheel.h - timerwheel interface for RTnet ++ * ++ * Copyright (C) 2009 Vladimir Zapolskiy ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License, version 2, as ++ * published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#ifndef __TIMERWHEEL_H_ ++#define __TIMERWHEEL_H_ ++ ++#include ++#include ++ ++#define TIMERWHEEL_TIMER_UNUSED -1 ++ ++typedef void (*timerwheel_timer_handler)(void *); ++ ++struct timerwheel_timer { ++ struct list_head link; ++ timerwheel_timer_handler handler; ++ void *data; ++ int slot; ++ volatile int refcount; /* only written by wheel task */ ++}; ++ ++static inline void timerwheel_init_timer(struct timerwheel_timer *timer, ++ timerwheel_timer_handler handler, ++ void *data) ++{ ++ timer->slot = TIMERWHEEL_TIMER_UNUSED; ++ timer->handler = handler; ++ timer->data = data; ++ timer->refcount = 0; ++} ++ ++/* passed data must remain valid till a timer fireup */ ++int timerwheel_add_timer(struct timerwheel_timer *timer, ++ nanosecs_rel_t expires); ++ ++int timerwheel_remove_timer(struct timerwheel_timer *timer); ++ ++void timerwheel_remove_timer_sync(struct timerwheel_timer *timer); ++ ++int timerwheel_init(nanosecs_rel_t timeout, unsigned int granularity); ++ ++void timerwheel_cleanup(void); ++ ++#endif +--- linux/drivers/xenomai/net/stack/ipv4/tcp/timerwheel.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/ipv4/tcp/timerwheel.c 2021-04-07 16:01:27.063634401 +0800 +@@ -0,0 +1,220 @@ ++/*** ++ * ++ * ipv4/tcp/timerwheel.c - timerwheel implementation for RTnet ++ * ++ * Copyright (C) 2009 Vladimir Zapolskiy ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License, version 2, as ++ * published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#include ++#include ++ ++#include ++#include "timerwheel.h" ++ ++static struct { ++ /* timer pivot task */ ++ rtdm_task_t pivot_task; ++ ++ /* time length for one period of rotation of timerwheel */ ++ nanosecs_rel_t timeout; ++ ++ /* timer wheel slots for storing timers up to timerwheel_timeout */ ++ unsigned int slots; ++ ++ /* timer wheel interval timeout */ ++ nanosecs_rel_t interval; ++ ++ /* timer wheel interval timeout */ ++ unsigned int interval_base; ++ ++ /* timerwheel array */ ++ struct list_head *ring; ++ ++ /* timerwheel slot counter */ ++ unsigned int current_slot; ++ ++ /* timerwheel current slot lock */ ++ rtdm_lock_t slot_lock; ++} wheel; ++ ++static struct timerwheel_timer *timerwheel_get_from_current_slot(void) ++{ ++ struct timerwheel_timer *timer = NULL; ++ struct list_head *slot_list; ++ rtdm_lockctx_t context; ++ ++ rtdm_lock_get_irqsave(&wheel.slot_lock, context); ++ ++ slot_list = &wheel.ring[wheel.current_slot]; ++ ++ if (!list_empty(slot_list)) { ++ timer = list_first_entry(slot_list, struct timerwheel_timer, ++ link); ++ list_del(&timer->link); ++ timer->slot = TIMERWHEEL_TIMER_UNUSED; ++ timer->refcount++; ++ } ++ ++ rtdm_lock_put_irqrestore(&wheel.slot_lock, context); ++ ++ return timer; ++} ++ ++int timerwheel_add_timer(struct timerwheel_timer *timer, nanosecs_rel_t expires) ++{ ++ rtdm_lockctx_t context; ++ int slot; ++ ++ slot = expires >> wheel.interval_base; ++ ++ if (slot >= wheel.slots) ++ return -EINVAL; ++ ++ rtdm_lock_get_irqsave(&wheel.slot_lock, context); ++ ++ /* cancel timer if it's still running */ ++ if (timer->slot >= 0) ++ list_del(&timer->link); ++ ++ slot = slot + wheel.current_slot; ++ if (slot >= wheel.slots) ++ slot = slot - wheel.slots; ++ ++ list_add_tail(&timer->link, &wheel.ring[slot]); ++ timer->slot = slot; ++ ++ rtdm_lock_put_irqrestore(&wheel.slot_lock, context); ++ ++ return 0; ++} ++ ++static int timerwheel_sleep(void) ++{ ++ int ret; ++ ++ ret = rtdm_task_sleep(wheel.interval); ++ if (ret < 0) ++ return ret; ++ ++ wheel.current_slot++; ++ if (wheel.current_slot == wheel.slots) ++ wheel.current_slot = 0; ++ ++ return 0; ++} ++ ++static void timerwheel_pivot(void *arg) ++{ ++ struct timerwheel_timer *timer; ++ int ret; ++ ++ while (1) { ++ ret = timerwheel_sleep(); ++ if (ret < 0) { ++ rtdm_printk( ++ "timerwheel: timerwheel_pivot interrupted %d\n", ++ -ret); ++ break; ++ } ++ ++ while ((timer = timerwheel_get_from_current_slot())) { ++ timer->handler(timer->data); ++ ++ smp_mb(); ++ timer->refcount--; ++ } ++ } ++} ++ ++int timerwheel_remove_timer(struct timerwheel_timer *timer) ++{ ++ rtdm_lockctx_t context; ++ int ret; ++ ++ rtdm_lock_get_irqsave(&wheel.slot_lock, context); ++ ++ if (timer->slot >= 0) { ++ list_del(&timer->link); ++ timer->slot = TIMERWHEEL_TIMER_UNUSED; ++ ret = 0; ++ } else ++ ret = -ENOENT; ++ ++ rtdm_lock_put_irqrestore(&wheel.slot_lock, context); ++ ++ return ret; ++} ++ ++void timerwheel_remove_timer_sync(struct timerwheel_timer *timer) ++{ ++ u64 interval_ms = wheel.interval; ++ ++ do_div(interval_ms, 1000000); ++ ++ timerwheel_remove_timer(timer); ++ ++ while (timer->refcount > 0) ++ msleep(interval_ms); ++} ++ ++/* ++ timeout - maximum expiration timeout for timers ++ granularity - is an exponent of 2 representing nanoseconds for ++ one wheel tick ++ heapsize - is a number of timers to allocate ++*/ ++int __init timerwheel_init(nanosecs_rel_t timeout, unsigned int granularity) ++{ ++ int i; ++ int err; ++ ++ /* the least possible slot timeout is set for 1ms */ ++ if (granularity < 10) ++ return -EINVAL; ++ ++ wheel.timeout = timeout; ++ wheel.interval_base = granularity; ++ wheel.slots = (timeout >> granularity) + 1; ++ wheel.interval = (1 << granularity); ++ wheel.current_slot = 0; ++ ++ wheel.ring = ++ kmalloc(sizeof(struct list_head) * wheel.slots, GFP_KERNEL); ++ if (!wheel.ring) ++ return -ENOMEM; ++ ++ for (i = 0; i < wheel.slots; i++) ++ INIT_LIST_HEAD(&wheel.ring[i]); ++ ++ rtdm_lock_init(&wheel.slot_lock); ++ ++ err = rtdm_task_init(&wheel.pivot_task, "rttcp timerwheel", ++ timerwheel_pivot, NULL, 1, 0); ++ if (err) { ++ printk("timerwheel: error on pivot task initialization: %d\n", ++ err); ++ kfree(wheel.ring); ++ } ++ ++ return err; ++} ++ ++void timerwheel_cleanup(void) ++{ ++ rtdm_task_destroy(&wheel.pivot_task); ++ kfree(wheel.ring); ++} +--- linux/drivers/xenomai/net/stack/ipv4/tcp/Kconfig 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/ipv4/tcp/Kconfig 2021-04-07 16:01:27.059634406 +0800 +@@ -0,0 +1,18 @@ ++config XENO_DRIVERS_NET_RTIPV4_TCP ++ tristate "TCP support" ++ depends on XENO_DRIVERS_NET_RTIPV4 ++ ---help--- ++ Enables TCP support of the RTnet Real-Time IPv4 protocol. ++ ++ When the RTnet IPv4 is enabled while this feature is disabled, TCP ++ will be forwarded to the Linux network stack. ++ ++config XENO_DRIVERS_NET_RTIPV4_TCP_ERROR_INJECTION ++ bool "TCP error injection" ++ depends on XENO_DRIVERS_NET_RTIPV4_TCP ++ ---help--- ++ Enables error injection for incoming TCP packets. This can be used ++ to test both protocol as well as application behavior under error ++ conditions. The per-socket error rate is 0 by default and can be ++ tuned during runtime via the error_rate and multi_error module ++ parameters. +--- linux/drivers/xenomai/net/stack/ipv4/tcp/tcp.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/ipv4/tcp/tcp.c 2021-04-07 16:01:27.054634413 +0800 +@@ -0,0 +1,2462 @@ ++/*** ++ * ++ * ipv4/tcp/tcp.c - TCP implementation for RTnet ++ * ++ * Copyright (C) 2009 Vladimir Zapolskiy ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License, version 2, as ++ * published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "timerwheel.h" ++ ++static unsigned int close_timeout = 1000; ++module_param(close_timeout, uint, 0664); ++MODULE_PARM_DESC(close_timeout, ++ "max time (ms) to wait during close for FIN-ACK handshake to complete, default 1000"); ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_RTIPV4_TCP_ERROR_INJECTION ++ ++static unsigned int error_rate; ++module_param(error_rate, uint, 0664); ++MODULE_PARM_DESC(error_rate, "simulate packet loss after every n packets"); ++ ++static unsigned int multi_error = 1; ++module_param(multi_error, uint, 0664); ++MODULE_PARM_DESC(multi_error, "on simulated error, drop n packets in a row"); ++ ++static unsigned int counter_start = 1234; ++module_param(counter_start, uint, 0664); ++MODULE_PARM_DESC(counter_start, "start value of per-socket packet counter " ++ "(used for error injection)"); ++ ++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4_TCP_ERROR_INJECTION */ ++ ++struct tcp_sync { ++ u32 seq; ++ u32 ack_seq; ++ ++ /* Local window size sent to peer */ ++ u16 window; ++ /* Last received destination peer window size */ ++ u16 dst_window; ++}; ++ ++/* ++ connection timeout ++*/ ++/* 5 second */ ++static const nanosecs_rel_t rt_tcp_connection_timeout = 1000000000ull; ++ ++/* retransmission timerwheel timeout */ ++static const u64 rt_tcp_retransmit_timeout = 100000000ull; ++ ++/* ++ keepalive constants ++*/ ++/* 75 second */ ++static const u64 rt_tcp_keepalive_intvl = 75000000000ull; ++/* 9 probes to send */ ++static const u8 rt_tcp_keepalive_probes = 9; ++/* 2 hour */ ++static const u64 rt_tcp_keepalive_timeout = 7200000000000ull; ++ ++/* ++ retransmission timeout ++*/ ++/* 50 millisecond */ ++static const nanosecs_rel_t rt_tcp_retransmission_timeout = 50000000ull; ++/* ++ maximum allowed number of retransmissions ++*/ ++static const unsigned int max_retransmits = 3; ++ ++struct tcp_keepalive { ++ u8 enabled; ++ u32 probes; ++ rtdm_timer_t timer; ++}; ++ ++/*** ++ * This structure is used to register a TCP socket for reception. All ++ * structures are kept in the port_registry array to increase the cache ++ * locality during the critical port lookup in rt_tcp_v4_lookup(). ++ */ ++ ++/* if dport & daddr are zeroes, it means a listening socket */ ++/* otherwise this is a data structure, which describes a connection */ ++ ++/* NB: sock->prot.inet.saddr & sock->prot.inet.sport values are not used */ ++struct tcp_socket { ++ struct rtsocket sock; /* set up by rt_socket_init() implicitly */ ++ u16 sport; /* local port */ ++ u32 saddr; /* local ip-addr */ ++ u16 dport; /* destination port */ ++ u32 daddr; /* destination ip-addr */ ++ ++ u8 tcp_state; /* tcp connection state */ ++ ++ u8 is_binding; /* if set, tcp socket is in port binding progress */ ++ u8 is_bound; /* if set, tcp socket is already port bound */ ++ u8 is_valid; /* if set, read() and write() can process */ ++ u8 is_accepting; /* if set, accept() is in progress */ ++ u8 is_accepted; /* if set, accept() is already called */ ++ u8 is_closed; /* close() call for resource deallocation follows */ ++ ++ rtdm_event_t send_evt; /* write request is permissible */ ++ rtdm_event_t conn_evt; /* connection event */ ++ ++ struct dest_route rt; ++ struct tcp_sync sync; ++ struct tcp_keepalive keepalive; ++ rtdm_lock_t socket_lock; ++ ++ struct hlist_node link; ++ ++ nanosecs_rel_t sk_sndtimeo; ++ ++ /* retransmission routine data */ ++ u32 nacked_first; ++ unsigned int timer_state; ++ struct rtskb_queue retransmit_queue; ++ struct timerwheel_timer timer; ++ ++ struct completion fin_handshake; ++ rtdm_nrtsig_t close_sig; ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_RTIPV4_TCP_ERROR_INJECTION ++ unsigned int packet_counter; ++ unsigned int error_rate; ++ unsigned int multi_error; ++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4_TCP_ERROR_INJECTION */ ++}; ++ ++struct rt_tcp_dispatched_packet_send_cmd { ++ __be32 flags; /* packet flags value */ ++ struct tcp_socket *ts; ++}; ++ ++/*** ++ * Automatic port number assignment ++ ++ * The automatic assignment of port numbers to unbound sockets is realised as ++ * a simple addition of two values: ++ * - the socket ID (lower 8 bits of file descriptor) which is set during ++ * initialisation and left unchanged afterwards ++ * - the start value tcp_auto_port_start which is a module parameter ++ ++ * tcp_auto_port_mask, also a module parameter, is used to define the range of ++ * port numbers which are used for automatic assignment. Any number within ++ * this range will be rejected when passed to bind_rt(). ++ ++ */ ++ ++MODULE_LICENSE("GPL"); ++ ++static struct { ++ struct rtdm_dev_context dummy; ++ struct tcp_socket rst_socket; ++} rst_socket_container; ++ ++#define rst_fd (&rst_socket_container.dummy.fd) ++#define rst_socket (*(struct tcp_socket *)rtdm_fd_to_private(rst_fd)) ++ ++static u32 tcp_auto_port_start = 1024; ++static u32 tcp_auto_port_mask = ~(RT_TCP_SOCKETS - 1); ++static u32 free_ports = RT_TCP_SOCKETS; ++#define RT_PORT_BITMAP_WORDS \ ++ ((RT_TCP_SOCKETS + BITS_PER_LONG - 1) / BITS_PER_LONG) ++static unsigned long port_bitmap[RT_PORT_BITMAP_WORDS]; ++ ++static struct tcp_socket *port_registry[RT_TCP_SOCKETS]; ++static DEFINE_RTDM_LOCK(tcp_socket_base_lock); ++ ++static struct hlist_head port_hash[RT_TCP_SOCKETS * 2]; ++#define port_hash_mask (RT_TCP_SOCKETS * 2 - 1) ++ ++module_param(tcp_auto_port_start, uint, 0444); ++module_param(tcp_auto_port_mask, uint, 0444); ++MODULE_PARM_DESC(tcp_auto_port_start, "Start of automatically assigned " ++ "port range for TCP"); ++MODULE_PARM_DESC(tcp_auto_port_mask, "Mask that defines port range for TCP " ++ "for automatic assignment"); ++ ++static inline struct tcp_socket *port_hash_search(u32 saddr, u16 sport) ++{ ++ u32 bucket = sport & port_hash_mask; ++ struct tcp_socket *ts; ++ ++ hlist_for_each_entry (ts, &port_hash[bucket], link) ++ if (ts->sport == sport && ++ (saddr == INADDR_ANY || ts->saddr == saddr || ++ ts->saddr == INADDR_ANY)) ++ return ts; ++ ++ return NULL; ++} ++ ++static int port_hash_insert(struct tcp_socket *ts, u32 saddr, u16 sport) ++{ ++ u32 bucket; ++ ++ if (port_hash_search(saddr, sport)) ++ return -EADDRINUSE; ++ ++ bucket = sport & port_hash_mask; ++ ts->saddr = saddr; ++ ts->sport = sport; ++ ts->daddr = 0; ++ ts->dport = 0; ++ ++ hlist_add_head(&ts->link, &port_hash[bucket]); ++ ++ return 0; ++} ++ ++static inline void port_hash_del(struct tcp_socket *ts) ++{ ++ hlist_del(&ts->link); ++} ++ ++/*** ++ * rt_tcp_v4_lookup ++ */ ++static struct rtsocket *rt_tcp_v4_lookup(u32 daddr, u16 dport) ++{ ++ rtdm_lockctx_t context; ++ struct tcp_socket *ts; ++ int ret; ++ ++ rtdm_lock_get_irqsave(&tcp_socket_base_lock, context); ++ ts = port_hash_search(daddr, dport); ++ ++ if (ts != NULL) { ++ ret = rt_socket_reference(&ts->sock); ++ if (ret == 0 || (ret == -EIDRM && ts->is_closed)) { ++ rtdm_lock_put_irqrestore(&tcp_socket_base_lock, ++ context); ++ ++ return &ts->sock; ++ } ++ } ++ ++ rtdm_lock_put_irqrestore(&tcp_socket_base_lock, context); ++ ++ return NULL; ++} ++ ++/* test seq1 <= seq2 */ ++static inline int rt_tcp_before(__u32 seq1, __u32 seq2) ++{ ++ return (__s32)(seq1 - seq2) <= 0; ++} ++ ++/* test seq1 => seq2 */ ++static inline int rt_tcp_after(__u32 seq1, __u32 seq2) ++{ ++ return (__s32)(seq2 - seq1) <= 0; ++} ++ ++static inline u32 rt_tcp_compute_ack_seq(struct tcphdr *th, u32 len) ++{ ++ u32 ack_seq = ntohl(th->seq) + len; ++ ++ if (unlikely(th->syn || th->fin)) ++ ack_seq++; ++ ++ return ack_seq; ++} ++ ++static void rt_tcp_keepalive_start(struct tcp_socket *ts) ++{ ++ if (ts->tcp_state == TCP_ESTABLISHED) { ++ rtdm_timer_start(&ts->keepalive.timer, rt_tcp_keepalive_timeout, ++ 0, RTDM_TIMERMODE_RELATIVE); ++ } ++} ++ ++static void rt_tcp_keepalive_stop(struct tcp_socket *ts) ++{ ++ if (ts->tcp_state == TCP_ESTABLISHED) { ++ rtdm_timer_stop(&ts->keepalive.timer); ++ } ++} ++ ++#ifdef YET_UNUSED ++static void rt_tcp_keepalive_timer(rtdm_timer_t *timer); ++ ++static void rt_tcp_keepalive_enable(struct tcp_socket *ts) ++{ ++ rtdm_lockctx_t context; ++ struct tcp_keepalive *keepalive; ++ ++ rtdm_lock_get_irqsave(&ts->socket_lock, context); ++ ++ keepalive = &ts->keepalive; ++ ++ if (keepalive->enabled) { ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ return; ++ } ++ ++ keepalive->probes = rt_tcp_keepalive_probes; ++ ++ rtdm_timer_init(&keepalive->timer, rt_tcp_keepalive_timer, ++ "RT TCP keepalive timer"); ++ ++ rt_tcp_keepalive_start(ts); ++ ++ keepalive->enabled = 1; ++ ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++} ++#endif ++ ++static void rt_tcp_keepalive_disable(struct tcp_socket *ts) ++{ ++ struct tcp_keepalive *keepalive; ++ ++ keepalive = &ts->keepalive; ++ ++ if (!keepalive->enabled) { ++ return; ++ } ++ ++ rt_tcp_keepalive_stop(ts); ++ rtdm_timer_destroy(&keepalive->timer); ++ ++ keepalive->enabled = 0; ++} ++ ++static void rt_tcp_keepalive_feed(struct tcp_socket *ts) ++{ ++ rtdm_lockctx_t context; ++ struct tcp_keepalive *keepalive; ++ ++ rtdm_lock_get_irqsave(&ts->socket_lock, context); ++ ++ keepalive = &ts->keepalive; ++ ++ if (ts->tcp_state == TCP_ESTABLISHED && ts->keepalive.enabled) { ++ keepalive->probes = rt_tcp_keepalive_probes; ++ ++ /* Restart keepalive timer */ ++ rtdm_timer_stop(&keepalive->timer); ++ rtdm_timer_start(&keepalive->timer, rt_tcp_keepalive_timeout, 0, ++ RTDM_TIMERMODE_RELATIVE); ++ ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ } else { ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ } ++} ++ ++static int rt_tcp_socket_invalidate(struct tcp_socket *ts, u8 to_state) ++{ ++ int signal = ts->is_valid; ++ ++ ts->tcp_state = to_state; ++ ++ /* ++ multiple invalidation could happen without fuss, ++ see rt_tcp_close(), rt_tcp_rcv(), timeout expiration etc. ++ */ ++ if (ts->is_valid) { ++ ts->is_valid = 0; ++ ++ if (ts->keepalive.enabled) { ++ rt_tcp_keepalive_stop(ts); ++ } ++ } ++ ++ return signal; ++} ++ ++static void rt_tcp_socket_invalidate_signal(struct tcp_socket *ts) ++{ ++ /* awake all readers and writers destroying events */ ++ rtdm_sem_destroy(&ts->sock.pending_sem); ++ rtdm_event_destroy(&ts->send_evt); ++} ++ ++static void rt_tcp_socket_validate(struct tcp_socket *ts) ++{ ++ ts->tcp_state = TCP_ESTABLISHED; ++ ++ ts->is_valid = 1; ++ ++ if (ts->keepalive.enabled) { ++ rt_tcp_keepalive_start(ts); ++ } ++ ++ rtdm_event_init(&ts->send_evt, 0); ++} ++ ++/*** ++ * rt_tcp_retransmit_handler - timerwheel handler to process a retransmission ++ * @data: pointer to a rttcp socket structure ++ */ ++static void rt_tcp_retransmit_handler(void *data) ++{ ++ struct tcp_socket *ts = (struct tcp_socket *)data; ++ struct rtskb *skb; ++ rtdm_lockctx_t context; ++ int signal; ++ ++ rtdm_lock_get_irqsave(&ts->socket_lock, context); ++ ++ if (unlikely(rtskb_queue_empty(&ts->retransmit_queue))) { ++ /* handled, but retransmission queue is empty */ ++ rtdm_lock_get_irqsave(&ts->socket_lock, context); ++ rtdm_printk("rttcp: bug in RT TCP retransmission routine\n"); ++ return; ++ } ++ ++ if (ts->tcp_state == TCP_CLOSE) { ++ /* socket is already closed */ ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ return; ++ } ++ ++ if (ts->timer_state) { ++ /* more tries */ ++ ts->timer_state--; ++ timerwheel_add_timer(&ts->timer, rt_tcp_retransmission_timeout); ++ ++ /* warning, rtskb_clone is under lock */ ++ skb = rtskb_clone(ts->retransmit_queue.first, ++ &ts->sock.skb_pool); ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ ++ /* BUG, window changes are not respected */ ++ if (unlikely(rtdev_xmit(skb)) != 0) { ++ kfree_rtskb(skb); ++ rtdm_printk( ++ "rttcp: packet retransmission from timer failed\n"); ++ } ++ } else { ++ ts->timer_state = max_retransmits; ++ ++ /* report about connection lost */ ++ signal = rt_tcp_socket_invalidate(ts, TCP_CLOSE); ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ ++ if (signal) ++ rt_tcp_socket_invalidate_signal(ts); ++ ++ /* retransmission queue will be cleaned up in rt_tcp_socket_destruct */ ++ rtdm_printk("rttcp: connection is lost by NACK timeout\n"); ++ } ++} ++ ++/*** ++ * rt_tcp_retransmit_ack - remove skbs from retransmission queue on ACK ++ * @ts: rttcp socket ++ * @ack_seq: received ACK sequence value ++ */ ++static void rt_tcp_retransmit_ack(struct tcp_socket *ts, u32 ack_seq) ++{ ++ struct rtskb *skb; ++ rtdm_lockctx_t context; ++ ++ rtdm_lock_get_irqsave(&ts->socket_lock, context); ++ ++ /* ++ ACK, but retransmission queue is empty ++ This could happen on repeated ACKs ++ */ ++ if (rtskb_queue_empty(&ts->retransmit_queue)) { ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ return; ++ } ++ ++ /* ++ Check ts->nacked_first value firstly to ensure that ++ skb for retransmission is present in the queue, otherwise ++ retransmission queue will be drained completely ++ */ ++ if (!rt_tcp_before(ts->nacked_first, ack_seq)) { ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ return; ++ } ++ ++ if (timerwheel_remove_timer(&ts->timer) != 0) { ++ /* already timed out */ ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ return; ++ } ++ ++dequeue_loop: ++ if (ts->tcp_state == TCP_CLOSE) { ++ /* warn about queue safety in race with anyone, ++ who closes the socket */ ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ return; ++ } ++ ++ if ((skb = __rtskb_dequeue(&ts->retransmit_queue)) == NULL) { ++ ts->timer_state = max_retransmits; ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ return; ++ } ++ ++ if (rt_tcp_before(ts->nacked_first, ack_seq)) { ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ kfree_rtskb(skb); ++ rtdm_lock_get_irqsave(&ts->socket_lock, context); ++ goto dequeue_loop; ++ } ++ ++ /* Put NACKed skb back to queue */ ++ /* BUG, need to respect half-acknowledged packets */ ++ ts->nacked_first = ntohl(skb->h.th->seq) + 1; ++ ++ __rtskb_queue_head(&ts->retransmit_queue, skb); ++ ++ /* Have more packages in retransmission queue, restart the timer */ ++ timerwheel_add_timer(&ts->timer, rt_tcp_retransmission_timeout); ++ ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++} ++ ++/*** ++ * rt_tcp_retransmit_send - enqueue a skb to retransmission queue (not locked) ++ * @ts: rttcp socket ++ * @skb: a copied skb for enqueueing ++ */ ++static void rt_tcp_retransmit_send(struct tcp_socket *ts, struct rtskb *skb) ++{ ++ if (rtskb_queue_empty(&ts->retransmit_queue)) { ++ /* retransmission queue is empty */ ++ ts->nacked_first = ntohl(skb->h.th->seq) + 1; ++ ++ __rtskb_queue_tail(&ts->retransmit_queue, skb); ++ ++ timerwheel_add_timer(&ts->timer, rt_tcp_retransmission_timeout); ++ } else { ++ /* retransmission queue is not empty */ ++ __rtskb_queue_tail(&ts->retransmit_queue, skb); ++ } ++} ++ ++static int rt_ip_build_frame(struct rtskb *skb, struct rtsocket *sk, ++ struct dest_route *rt, struct iphdr *iph) ++{ ++ int ret; ++ struct rtnet_device *rtdev = rt->rtdev; ++ ++ RTNET_ASSERT(rtdev->hard_header, return -EBADF;); ++ ++ if (!rtdev_reference(rt->rtdev)) ++ return -EIDRM; ++ ++ iph->ihl = 5; /* 20 byte header only - no TCP options */ ++ ++ skb->nh.iph = iph; ++ ++ iph->version = 4; ++ iph->tos = sk->prot.inet.tos; ++ iph->tot_len = htons(skb->len); /* length of IP header and IP payload */ ++ iph->id = htons(0x00); /* zero IP frame id */ ++ iph->frag_off = htons(IP_DF); /* and no more frames */ ++ iph->ttl = 255; ++ iph->protocol = sk->protocol; ++ iph->saddr = rtdev->local_ip; ++ iph->daddr = rt->ip; ++ iph->check = 0; /* required to compute correct checksum */ ++ iph->check = ip_fast_csum((u8 *)iph, 5 /*iph->ihl*/); ++ ++ ret = rtdev->hard_header(skb, rtdev, ETH_P_IP, rt->dev_addr, ++ rtdev->dev_addr, skb->len); ++ rtdev_dereference(rt->rtdev); ++ ++ if (ret != rtdev->hard_header_len) { ++ rtdm_printk("rttcp: rt_ip_build_frame: error on lower level\n"); ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ ++static void rt_tcp_build_header(struct tcp_socket *ts, struct rtskb *skb, ++ __be32 flags, u8 is_keepalive) ++{ ++ u32 wcheck; ++ u8 tcphdrlen = 20; ++ u8 iphdrlen = 20; ++ struct tcphdr *th; ++ ++ th = skb->h.th; ++ th->source = ts->sport; ++ th->dest = ts->dport; ++ ++ th->seq = htonl(ts->sync.seq); ++ ++ if (unlikely(is_keepalive)) ++ th->seq--; ++ ++ tcp_flag_word(th) = flags; ++ th->ack_seq = htonl(ts->sync.ack_seq); ++ th->window = htons(ts->sync.window); ++ ++ th->doff = tcphdrlen >> 2; /* No options for now */ ++ th->res1 = 0; ++ th->check = 0; ++ th->urg_ptr = 0; ++ ++ /* compute checksum */ ++ wcheck = csum_partial(th, tcphdrlen, 0); ++ ++ if (skb->len - tcphdrlen - iphdrlen) { ++ wcheck = csum_partial(skb->data + tcphdrlen + iphdrlen, ++ skb->len - tcphdrlen - iphdrlen, wcheck); ++ } ++ ++ th->check = ++ tcp_v4_check(skb->len - iphdrlen, ts->saddr, ts->daddr, wcheck); ++} ++ ++static int rt_tcp_segment(struct dest_route *rt, struct tcp_socket *ts, ++ __be32 flags, u32 data_len, u8 *data_ptr, ++ u8 is_keepalive) ++{ ++ struct tcphdr *th; ++ struct rtsocket *sk = &ts->sock; ++ struct rtnet_device *rtdev = rt->rtdev; ++ struct rtskb *skb; ++ struct iphdr *iph; ++ struct rtskb *cloned_skb; ++ rtdm_lockctx_t context; ++ ++ int ret; ++ ++ u32 hh_len = (rtdev->hard_header_len + 15) & ~15; ++ u32 prio = (volatile unsigned int)sk->priority; ++ u32 mtu = rtdev->get_mtu(rtdev, prio); ++ ++ u8 *data = NULL; ++ ++ if ((skb = alloc_rtskb(mtu + hh_len + 15, &sk->skb_pool)) == NULL) { ++ rtdm_printk( ++ "rttcp: no more elements in skb_pool for allocation\n"); ++ return -ENOBUFS; ++ } ++ ++ /* rtskb_reserve(skb, hh_len + 20); */ ++ rtskb_reserve(skb, hh_len); ++ ++ iph = (struct iphdr *)rtskb_put(skb, 20); /* length of IP header */ ++ skb->nh.iph = iph; ++ ++ th = (struct tcphdr *)rtskb_put(skb, 20); /* length of TCP header */ ++ skb->h.th = th; ++ ++ if (data_len) { /* check for available place */ ++ data = (u8 *)rtskb_put(skb, ++ data_len); /* length of TCP payload */ ++ if (!memcpy(data, (void *)data_ptr, data_len)) { ++ ret = -EFAULT; ++ goto error; ++ } ++ } ++ ++ /* used local phy MTU value */ ++ if (data_len > mtu) ++ data_len = mtu; ++ ++ skb->rtdev = rtdev; ++ skb->priority = prio; ++ ++ /* do not validate socket connection on xmit ++ this should be done at upper level */ ++ ++ rtdm_lock_get_irqsave(&ts->socket_lock, context); ++ rt_tcp_build_header(ts, skb, flags, is_keepalive); ++ ++ if ((ret = rt_ip_build_frame(skb, sk, rt, iph)) != 0) { ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ goto error; ++ } ++ ++ /* add rtskb entry to the socket retransmission queue */ ++ if (ts->tcp_state != TCP_CLOSE && ++ ((flags & (TCP_FLAG_SYN | TCP_FLAG_FIN)) || data_len)) { ++ /* rtskb_clone below is called under lock, this is an admission, ++ because for now there is no rtskb copy by reference */ ++ cloned_skb = rtskb_clone(skb, &ts->sock.skb_pool); ++ if (!cloned_skb) { ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ rtdm_printk("rttcp: cann't clone skb\n"); ++ ret = -ENOMEM; ++ goto error; ++ } ++ ++ rt_tcp_retransmit_send(ts, cloned_skb); ++ } ++ ++ /* need to update sync here, because it is safe way in ++ comparison with races on fast ACK response */ ++ if (flags & (TCP_FLAG_FIN | TCP_FLAG_SYN)) ++ ts->sync.seq++; ++ ++ ts->sync.seq += data_len; ++ ts->sync.dst_window -= data_len; ++ ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ ++ /* ignore return value from rtdev_xmit */ ++ /* the packet was enqueued and on error will be retransmitted later */ ++ /* on critical error after retransmission timeout the connection will ++ be closed by connection lost */ ++ rtdev_xmit(skb); ++ ++ return data_len; ++ ++error: ++ kfree_rtskb(skb); ++ return ret; ++} ++ ++static int rt_tcp_send(struct tcp_socket *ts, __be32 flags) ++{ ++ struct dest_route rt; ++ int ret; ++ ++ /* ++ * We may not have a route yet during setup. But once it is set, it stays ++ * until the socket died. ++ */ ++ if (likely(ts->rt.rtdev)) { ++ ret = rt_tcp_segment(&ts->rt, ts, flags, 0, NULL, 0); ++ } else { ++ ret = rt_ip_route_output(&rt, ts->daddr, ts->saddr); ++ if (ret == 0) { ++ ret = rt_tcp_segment(&rt, ts, flags, 0, NULL, 0); ++ rtdev_dereference(rt.rtdev); ++ } ++ } ++ if (ret < 0) ++ rtdm_printk("rttcp: can't send a packet: err %d\n", -ret); ++ return ret; ++} ++ ++#ifdef YET_UNUSED ++static void rt_tcp_keepalive_timer(rtdm_timer_t *timer) ++{ ++ rtdm_lockctx_t context; ++ struct tcp_keepalive *keepalive = ++ container_of(timer, struct tcp_keepalive, timer); ++ ++ struct tcp_socket *ts = ++ container_of(keepalive, struct tcp_socket, keepalive); ++ int signal = 0; ++ ++ rtdm_lock_get_irqsave(&ts->socket_lock, context); ++ ++ if (keepalive->probes) { ++ /* Send a probe */ ++ if (rt_tcp_segment(&ts->rt, ts, 0, 0, NULL, 1) < 0) { ++ /* data receiving and sending is not possible anymore */ ++ signal = rt_tcp_socket_invalidate(ts, TCP_TIME_WAIT); ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ } ++ ++ keepalive->probes--; ++ rtdm_timer_start_in_handler(&keepalive->timer, ++ rt_tcp_keepalive_intvl, 0, ++ RTDM_TIMERMODE_RELATIVE); ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ } else { ++ /* data receiving and sending is not possible anymore */ ++ ++ signal = rt_tcp_socket_invalidate(ts, TCP_TIME_WAIT); ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ } ++ ++ if (signal) ++ rt_tcp_socket_invalidate_signal(ts); ++} ++#endif ++ ++static inline u32 rt_tcp_initial_seq(void) ++{ ++ uint64_t clock_val = rtdm_clock_read_monotonic(); ++ return (u32)(clock_val ^ (clock_val >> 32)); ++} ++ ++/*** ++ * rt_tcp_dest_socket ++ */ ++static struct rtsocket *rt_tcp_dest_socket(struct rtskb *skb) ++{ ++ struct tcphdr *th = skb->h.th; ++ ++ u32 saddr = skb->nh.iph->saddr; ++ u32 daddr = skb->nh.iph->daddr; ++ u32 sport = th->source; ++ u32 dport = th->dest; ++ ++ u32 data_len; ++ ++ if (tcp_v4_check(skb->len, saddr, daddr, ++ csum_partial(skb->data, skb->len, 0))) { ++ rtdm_printk("rttcp: invalid TCP packet checksum, dropped\n"); ++ return NULL; /* Invalid checksum, drop the packet */ ++ } ++ ++ /* find the destination socket */ ++ if ((skb->sk = rt_tcp_v4_lookup(daddr, dport)) == NULL) { ++ /* ++ rtdm_printk("Not found addr:0x%08x, port: 0x%04x\n", daddr, dport); ++ */ ++ if (!th->rst) { ++ /* No listening socket found, send RST|ACK */ ++ rst_socket.saddr = daddr; ++ rst_socket.daddr = saddr; ++ rst_socket.sport = dport; ++ rst_socket.dport = sport; ++ ++ data_len = skb->len - (th->doff << 2); ++ ++ rst_socket.sync.seq = 0; ++ rst_socket.sync.ack_seq = ++ rt_tcp_compute_ack_seq(th, data_len); ++ ++ if (rt_ip_route_output(&rst_socket.rt, daddr, saddr) == ++ 0) { ++ rt_socket_reference(&rst_socket.sock); ++ rt_tcp_send(&rst_socket, ++ TCP_FLAG_ACK | TCP_FLAG_RST); ++ rtdev_dereference(rst_socket.rt.rtdev); ++ } ++ } ++ } ++ ++ return skb->sk; ++} ++ ++static void rt_tcp_window_update(struct tcp_socket *ts, u16 window) ++{ ++ rtdm_lockctx_t context; ++ ++ rtdm_lock_get_irqsave(&ts->socket_lock, context); ++ ++ if (ts->sync.dst_window) { ++ ts->sync.dst_window = window; ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ if (!window) { ++ /* clear send event status */ ++ rtdm_event_clear(&ts->send_evt); ++ } ++ } else { ++ if (window) { ++ ts->sync.dst_window = window; ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ /* set send event status */ ++ rtdm_event_signal(&ts->send_evt); ++ } else { ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ } ++ } ++} ++ ++/*** ++ * rt_tcp_rcv ++ */ ++static void rt_tcp_rcv(struct rtskb *skb) ++{ ++ rtdm_lockctx_t context; ++ struct tcp_socket *ts; ++ struct tcphdr *th = skb->h.th; ++ unsigned int data_len = skb->len - (th->doff << 2); ++ u32 seq = ntohl(th->seq); ++ int signal; ++ ++ ts = container_of(skb->sk, struct tcp_socket, sock); ++ ++ rtdm_lock_get_irqsave(&ts->socket_lock, context); ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_RTIPV4_TCP_ERROR_INJECTION ++ if (ts->error_rate > 0) { ++ if ((ts->packet_counter++ % error_rate) < ts->multi_error) { ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ goto drop; ++ } ++ } ++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4_TCP_ERROR_INJECTION */ ++ ++ /* Check for daddr/dport correspondence to values stored in ++ selected socket from hash */ ++ if (ts->tcp_state != TCP_LISTEN && (ts->daddr != skb->nh.iph->saddr || ++ ts->dport != skb->h.th->source)) { ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ goto drop; ++ } ++ ++ /* Check if it is a keepalive probe */ ++ if (ts->sync.ack_seq == (seq + 1) && ts->tcp_state == TCP_ESTABLISHED) { ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ rt_tcp_send(ts, TCP_FLAG_ACK); ++ goto feed; ++ } ++ ++ if (ts->tcp_state == TCP_SYN_SENT) { ++ ts->sync.ack_seq = rt_tcp_compute_ack_seq(th, data_len); ++ ++ if (th->syn && th->ack) { ++ rt_tcp_socket_validate(ts); ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ rtdm_event_signal(&ts->conn_evt); ++ /* Send ACK */ ++ rt_tcp_send(ts, TCP_FLAG_ACK); ++ goto feed; ++ } ++ ++ ts->tcp_state = TCP_CLOSE; ++ ts->sync.seq = ntohl(th->ack_seq); ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ ++ /* Send RST|ACK */ ++ rtdm_event_signal(&ts->conn_evt); ++ rt_tcp_send(ts, TCP_FLAG_RST | TCP_FLAG_ACK); ++ goto drop; ++ } ++ ++ /* Check for SEQ correspondence to determine the connection relevance */ ++ ++ /* OR-list of conditions to be satisfied: ++ * ++ * th->ack && rt_tcp_after(ts->nacked_first, ntohl(th->ack_seq)) ++ * th->ack && th->rst && ... ++ * th->syn && (ts->tcp_state == TCP_LISTEN || ++ ts->tcp_state == TCP_SYN_SENT) ++ * rt_tcp_after(seq, ts->sync.ack_seq) && ++ rt_tcp_before(seq, ts->sync.ack_seq + ts->sync.window) ++ */ ++ ++ if ((rt_tcp_after(seq, ts->sync.ack_seq) && ++ rt_tcp_before(seq, ts->sync.ack_seq + ts->sync.window)) || ++ th->rst || ++ (th->syn && ++ (ts->tcp_state == TCP_LISTEN || ts->tcp_state == TCP_SYN_SENT))) { ++ /* everything is ok */ ++ } else if (rt_tcp_after(seq, ts->sync.ack_seq - data_len)) { ++ /* retransmission of data we already acked */ ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ rt_tcp_send(ts, TCP_FLAG_ACK); ++ goto drop; ++ } else { ++ /* drop forward ack */ ++ if (th->ack && ++ /* but reset ack from old connection */ ++ ts->tcp_state == TCP_ESTABLISHED) { ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ rtdm_printk( ++ "rttcp: dropped unappropriate ACK packet %u\n", ++ ts->sync.ack_seq); ++ goto drop; ++ } ++ ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ rtdm_printk("rttcp: sequence number is not in window, " ++ "dropped (failed: %u <= %u <= %u)\n", ++ ts->sync.ack_seq, seq, ++ ts->sync.ack_seq + ts->sync.window); ++ ++ /* That's a forced RST for a lost connection */ ++ rst_socket.saddr = skb->nh.iph->daddr; ++ rst_socket.daddr = skb->nh.iph->saddr; ++ rst_socket.sport = th->dest; ++ rst_socket.dport = th->source; ++ ++ rst_socket.sync.seq = ntohl(th->ack_seq); ++ rst_socket.sync.ack_seq = rt_tcp_compute_ack_seq(th, data_len); ++ ++ if (rt_ip_route_output(&rst_socket.rt, rst_socket.daddr, ++ rst_socket.saddr) == 0) { ++ rt_socket_reference(&rst_socket.sock); ++ rt_tcp_send(&rst_socket, TCP_FLAG_RST | TCP_FLAG_ACK); ++ rtdev_dereference(rst_socket.rt.rtdev); ++ } ++ goto drop; ++ } ++ ++ if (th->rst) { ++ if (ts->tcp_state == TCP_SYN_RECV) { ++ ts->tcp_state = TCP_LISTEN; ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ goto drop; ++ } else { ++ /* Drop our half-open connection, peer obviously went away. */ ++ signal = rt_tcp_socket_invalidate(ts, TCP_CLOSE); ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ ++ if (signal) ++ rt_tcp_socket_invalidate_signal(ts); ++ ++ goto drop; ++ } ++ } ++ ++ ts->sync.ack_seq = rt_tcp_compute_ack_seq(th, data_len); ++ ++ if (th->fin) { ++ if (ts->tcp_state == TCP_ESTABLISHED) { ++ /* Send ACK */ ++ signal = rt_tcp_socket_invalidate(ts, TCP_CLOSE_WAIT); ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ ++ if (signal) ++ rt_tcp_socket_invalidate_signal(ts); ++ ++ rt_tcp_send(ts, TCP_FLAG_ACK); ++ goto feed; ++ } else if ((ts->tcp_state == TCP_FIN_WAIT1 && th->ack) || ++ ts->tcp_state == TCP_FIN_WAIT2) { ++ /* Send ACK */ ++ ts->tcp_state = TCP_TIME_WAIT; ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ rt_tcp_send(ts, TCP_FLAG_ACK); ++ /* data receiving is not possible anymore */ ++ rtdm_sem_destroy(&ts->sock.pending_sem); ++ rtdm_nrtsig_pend(&ts->close_sig); ++ goto feed; ++ } else if (ts->tcp_state == TCP_FIN_WAIT1) { ++ /* Send ACK */ ++ ts->tcp_state = TCP_CLOSING; ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ rt_tcp_send(ts, TCP_FLAG_ACK); ++ /* data receiving is not possible anymore */ ++ rtdm_sem_destroy(&ts->sock.pending_sem); ++ goto feed; ++ } else { ++ /* just drop it */ ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ goto drop; ++ } ++ } ++ ++ if (th->syn) { ++ /* Need to differentiate LISTEN socket from ESTABLISHED one */ ++ /* Both of them have the same sport/saddr, but different dport/daddr */ ++ /* dport is unknown if it is the first connection of n */ ++ ++ if (ts->tcp_state == TCP_LISTEN) { ++ /* Need to store ts->seq while sending SYN earlier */ ++ /* The socket shall be in TCP_LISTEN state */ ++ ++ /* safe to update ts->saddr here due to a single task for ++ rt_tcp_rcv() and rt_tcp_dest_socket() callers */ ++ ts->saddr = skb->nh.iph->daddr; ++ ++ ts->daddr = skb->nh.iph->saddr; ++ ts->dport = th->source; ++ ts->sync.seq = rt_tcp_initial_seq(); ++ ts->sync.window = 4096; ++ ts->tcp_state = TCP_SYN_RECV; ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ ++ /* Send SYN|ACK */ ++ rt_tcp_send(ts, TCP_FLAG_SYN | TCP_FLAG_ACK); ++ goto drop; ++ } ++ ++ /* Send RST|ACK */ ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ rt_tcp_send(ts, TCP_FLAG_RST | TCP_FLAG_ACK); ++ goto drop; ++ } ++ ++ /* ACK received without SYN, FIN or RST flags */ ++ if (th->ack) { ++ /* Check ack sequence */ ++ if (rt_tcp_before(ts->sync.seq + 1, ntohl(th->ack_seq))) { ++ rtdm_printk("rttcp: unexpected ACK %u %u %u\n", ++ ts->sync.seq, ts->nacked_first, ++ ntohl(th->ack_seq)); ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ goto drop; ++ } ++ ++ if (ts->tcp_state == TCP_LAST_ACK) { ++ /* close connection and free socket data */ ++ ts->tcp_state = TCP_CLOSE; ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ /* socket destruction will be done on close() */ ++ rtdm_nrtsig_pend(&ts->close_sig); ++ goto drop; ++ } else if (ts->tcp_state == TCP_FIN_WAIT1) { ++ ts->tcp_state = TCP_FIN_WAIT2; ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ goto feed; ++ } else if (ts->tcp_state == TCP_SYN_RECV) { ++ rt_tcp_socket_validate(ts); ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ rtdm_event_signal(&ts->conn_evt); ++ goto feed; ++ } else if (ts->tcp_state == TCP_CLOSING) { ++ ts->tcp_state = TCP_TIME_WAIT; ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ /* socket destruction will be done on close() */ ++ rtdm_nrtsig_pend(&ts->close_sig); ++ goto feed; ++ } ++ } ++ ++ if (ts->tcp_state != TCP_ESTABLISHED) { ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ goto drop; ++ } ++ ++ if (data_len == 0) { ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ goto feed; ++ } ++ ++ /* Send ACK */ ++ ts->sync.window -= data_len; ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ rt_tcp_send(ts, TCP_FLAG_ACK); ++ ++ rtskb_queue_tail(&skb->sk->incoming, skb); ++ rtdm_sem_up(&ts->sock.pending_sem); ++ ++ /* inform retransmission subsystem about arrived ack */ ++ if (th->ack) { ++ rt_tcp_retransmit_ack(ts, ntohl(th->ack_seq)); ++ } ++ ++ rt_tcp_keepalive_feed(ts); ++ rt_tcp_window_update(ts, ntohs(th->window)); ++ ++ return; ++ ++feed: ++ /* inform retransmission subsystem about arrived ack */ ++ if (th->ack) { ++ rt_tcp_retransmit_ack(ts, ntohl(th->ack_seq)); ++ } ++ ++ rt_tcp_keepalive_feed(ts); ++ rt_tcp_window_update(ts, ntohs(th->window)); ++ ++drop: ++ kfree_rtskb(skb); ++ return; ++} ++ ++/*** ++ * rt_tcp_rcv_err ++ */ ++static void rt_tcp_rcv_err(struct rtskb *skb) ++{ ++ rtdm_printk("rttcp: rt_tcp_rcv err\n"); ++} ++ ++static int rt_tcp_window_send(struct tcp_socket *ts, u32 data_len, u8 *data_ptr) ++{ ++ u32 dst_window = ts->sync.dst_window; ++ int ret; ++ ++ if (data_len > dst_window) ++ data_len = dst_window; ++ ++ if ((ret = rt_tcp_segment(&ts->rt, ts, TCP_FLAG_ACK, data_len, data_ptr, ++ 0)) < 0) { ++ rtdm_printk("rttcp: cann't send a packet: err %d\n", -ret); ++ return ret; ++ } ++ ++ return ret; ++} ++ ++static void rt_tcp_close_signal_handler(rtdm_nrtsig_t *nrtsig, void *arg) ++{ ++ complete_all((struct completion *)arg); ++} ++ ++static int rt_tcp_socket_create(struct tcp_socket *ts) ++{ ++ rtdm_lockctx_t context; ++ int i; ++ int index; ++ struct rtsocket *sock = &ts->sock; ++ ++ sock->prot.inet.saddr = INADDR_ANY; ++ sock->prot.inet.state = TCP_CLOSE; ++ sock->prot.inet.tos = 0; ++ /* ++ rtdm_printk("rttcp: rt_tcp_socket_create 0x%p\n", ts); ++ */ ++ rtdm_lock_init(&ts->socket_lock); ++ ++ ts->rt.rtdev = NULL; ++ ++ ts->tcp_state = TCP_CLOSE; ++ ++ ts->is_accepting = 0; ++ ts->is_accepted = 0; ++ ts->is_binding = 0; ++ ts->is_bound = 0; ++ ts->is_valid = 0; ++ ts->is_closed = 0; ++ ++ ts->sk_sndtimeo = RTDM_TIMEOUT_INFINITE; ++ ++ rtdm_event_init(&ts->conn_evt, 0); ++ ++ ts->keepalive.enabled = 0; ++ ++ ts->timer_state = max_retransmits; ++ timerwheel_init_timer(&ts->timer, rt_tcp_retransmit_handler, ts); ++ rtskb_queue_init(&ts->retransmit_queue); ++ ++ init_completion(&ts->fin_handshake); ++ rtdm_nrtsig_init(&ts->close_sig, rt_tcp_close_signal_handler, ++ &ts->fin_handshake); ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_RTIPV4_TCP_ERROR_INJECTION ++ ts->packet_counter = counter_start; ++ ts->error_rate = error_rate; ++ ts->multi_error = multi_error; ++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4_TCP_ERROR_INJECTION */ ++ ++ rtdm_lock_get_irqsave(&tcp_socket_base_lock, context); ++ ++ /* enforce maximum number of TCP sockets */ ++ if (free_ports == 0) { ++ rtdm_lock_put_irqrestore(&tcp_socket_base_lock, context); ++ rtdm_nrtsig_destroy(&ts->close_sig); ++ return -EAGAIN; ++ } ++ free_ports--; ++ ++ /* find free auto-port in bitmap */ ++ for (i = 0; i < RT_PORT_BITMAP_WORDS; i++) ++ if (port_bitmap[i] != (unsigned long)-1) ++ break; ++ index = ffz(port_bitmap[i]); ++ set_bit(index, &port_bitmap[i]); ++ index += i * 32; ++ sock->prot.inet.reg_index = index; ++ sock->prot.inet.sport = index + tcp_auto_port_start; ++ ++ /* register TCP socket */ ++ port_registry[index] = ts; ++ port_hash_insert(ts, INADDR_ANY, sock->prot.inet.sport); ++ ++ rtdm_lock_put_irqrestore(&tcp_socket_base_lock, context); ++ ++ return 0; ++} ++ ++/*** ++ * rt_tcp_socket - create a new TCP-Socket ++ * @s: socket ++ */ ++static int rt_tcp_socket(struct rtdm_fd *fd) ++{ ++ struct tcp_socket *ts = rtdm_fd_to_private(fd); ++ int ret; ++ ++ if ((ret = rt_socket_init(fd, IPPROTO_TCP)) != 0) ++ return ret; ++ ++ if ((ret = rt_tcp_socket_create(ts)) != 0) ++ rt_socket_cleanup(fd); ++ ++ return ret; ++} ++ ++static int rt_tcp_dispatched_packet_send(struct rt_proc_call *call) ++{ ++ int ret; ++ struct rt_tcp_dispatched_packet_send_cmd *cmd; ++ ++ cmd = rtpc_get_priv(call, struct rt_tcp_dispatched_packet_send_cmd); ++ ret = rt_tcp_send(cmd->ts, cmd->flags); ++ ++ return ret; ++} ++ ++/*** ++ * rt_tcp_socket_destruct ++ * this function requires non realtime context ++ */ ++static void rt_tcp_socket_destruct(struct tcp_socket *ts) ++{ ++ rtdm_lockctx_t context; ++ struct rtskb *skb; ++ int index; ++ int signal; ++ struct rtsocket *sock = &ts->sock; ++ ++ /* ++ rtdm_printk("rttcp: rt_tcp_socket_destruct 0x%p\n", ts); ++ */ ++ ++ rtdm_lock_get_irqsave(&tcp_socket_base_lock, context); ++ if (sock->prot.inet.reg_index >= 0) { ++ index = sock->prot.inet.reg_index; ++ ++ clear_bit(index % BITS_PER_LONG, ++ &port_bitmap[index / BITS_PER_LONG]); ++ port_hash_del(port_registry[index]); ++ free_ports++; ++ sock->prot.inet.reg_index = -1; ++ } ++ rtdm_lock_put_irqrestore(&tcp_socket_base_lock, context); ++ ++ rtdm_lock_get_irqsave(&ts->socket_lock, context); ++ ++ signal = rt_tcp_socket_invalidate(ts, TCP_CLOSE); ++ ++ rt_tcp_keepalive_disable(ts); ++ ++ sock->prot.inet.state = TCP_CLOSE; ++ ++ /* dereference rtdev */ ++ if (ts->rt.rtdev != NULL) { ++ rtdev_dereference(ts->rt.rtdev); ++ ts->rt.rtdev = NULL; ++ } ++ ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ ++ if (signal) ++ rt_tcp_socket_invalidate_signal(ts); ++ ++ rtdm_event_destroy(&ts->conn_evt); ++ ++ rtdm_nrtsig_destroy(&ts->close_sig); ++ ++ /* cleanup already collected fragments */ ++ rt_ip_frag_invalidate_socket(sock); ++ ++ /* free packets in incoming queue */ ++ while ((skb = rtskb_dequeue(&sock->incoming)) != NULL) ++ kfree_rtskb(skb); ++ ++ /* ensure that the timer is no longer running */ ++ timerwheel_remove_timer_sync(&ts->timer); ++ ++ /* free packets in retransmission queue */ ++ while ((skb = __rtskb_dequeue(&ts->retransmit_queue)) != NULL) ++ kfree_rtskb(skb); ++} ++ ++/*** ++ * rt_tcp_close ++ */ ++static void rt_tcp_close(struct rtdm_fd *fd) ++{ ++ struct tcp_socket *ts = rtdm_fd_to_private(fd); ++ struct rt_tcp_dispatched_packet_send_cmd send_cmd; ++ rtdm_lockctx_t context; ++ int signal = 0; ++ ++ rtdm_lock_get_irqsave(&ts->socket_lock, context); ++ ++ ts->is_closed = 1; ++ ++ if (ts->tcp_state == TCP_ESTABLISHED || ts->tcp_state == TCP_SYN_RECV) { ++ /* close() from ESTABLISHED */ ++ send_cmd.ts = ts; ++ send_cmd.flags = TCP_FLAG_FIN | TCP_FLAG_ACK; ++ signal = rt_tcp_socket_invalidate(ts, TCP_FIN_WAIT1); ++ ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ ++ rtpc_dispatch_call(rt_tcp_dispatched_packet_send, 0, &send_cmd, ++ sizeof(send_cmd), NULL, NULL); ++ /* result is ignored */ ++ ++ /* Give the peer some time to reply to our FIN. ++ Since it is not relevant what exactly causes the wait ++ function to return its result is ignored. */ ++ wait_for_completion_interruptible_timeout(&ts->fin_handshake, ++ msecs_to_jiffies(close_timeout)); ++ } else if (ts->tcp_state == TCP_CLOSE_WAIT) { ++ /* Send FIN in CLOSE_WAIT */ ++ send_cmd.ts = ts; ++ send_cmd.flags = TCP_FLAG_FIN | TCP_FLAG_ACK; ++ signal = rt_tcp_socket_invalidate(ts, TCP_LAST_ACK); ++ ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ ++ rtpc_dispatch_call(rt_tcp_dispatched_packet_send, 0, &send_cmd, ++ sizeof(send_cmd), NULL, NULL); ++ /* result is ignored */ ++ ++ /* Give the peer some time to reply to our FIN. ++ Since it is not relevant what exactly causes the wait ++ function to return its result is ignored. */ ++ wait_for_completion_interruptible_timeout(&ts->fin_handshake, ++ msecs_to_jiffies(close_timeout)); ++ } else { ++ /* ++ rt_tcp_socket_validate() has not been called at all, ++ hence socket state is TCP_SYN_SENT or TCP_LISTEN, ++ or socket is in one of close states, ++ hence rt_tcp_socket_invalidate() was called, ++ but close() is called at first time ++ */ ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ } ++ ++ if (signal) ++ rt_tcp_socket_invalidate_signal(ts); ++ ++ rt_tcp_socket_destruct(ts); ++ ++ rt_socket_cleanup(fd); ++} ++ ++/*** ++ * rt_tcp_bind - bind socket to local address ++ * @s: socket ++ * @addr: local address ++ */ ++static int rt_tcp_bind(struct rtdm_fd *fd, struct tcp_socket *ts, ++ const struct sockaddr __user *addr, socklen_t addrlen) ++{ ++ struct sockaddr_in *usin, _usin; ++ rtdm_lockctx_t context; ++ int index; ++ int bound = 0; ++ int ret = 0; ++ ++ usin = rtnet_get_arg(fd, &_usin, addr, sizeof(_usin)); ++ if (IS_ERR(usin)) ++ return PTR_ERR(usin); ++ ++ if ((addrlen < (int)sizeof(struct sockaddr_in)) || ++ ((usin->sin_port & tcp_auto_port_mask) == tcp_auto_port_start)) ++ return -EINVAL; ++ ++ rtdm_lock_get_irqsave(&ts->socket_lock, context); ++ if (ts->tcp_state != TCP_CLOSE || ts->is_bound || ts->is_binding) { ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ return -EINVAL; ++ } ++ ++ ts->is_binding = 1; ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ ++ rtdm_lock_get_irqsave(&tcp_socket_base_lock, context); ++ ++ if ((index = ts->sock.prot.inet.reg_index) < 0) { ++ /* socket is destroyed */ ++ ret = -EBADF; ++ goto unlock_out; ++ } ++ ++ port_hash_del(ts); ++ if (port_hash_insert(ts, usin->sin_addr.s_addr, ++ usin->sin_port ?: index + tcp_auto_port_start)) { ++ port_hash_insert(ts, ts->saddr, ts->sport); ++ ++ ret = -EADDRINUSE; ++ goto unlock_out; ++ } ++ ++ bound = 1; ++ ++unlock_out: ++ rtdm_lock_put_irqrestore(&tcp_socket_base_lock, context); ++ ++ rtdm_lock_get_irqsave(&ts->socket_lock, context); ++ ts->is_bound = bound; ++ ts->is_binding = 0; ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ ++ return ret; ++} ++ ++/*** ++ * rt_tcp_connect ++ */ ++static int rt_tcp_connect(struct rtdm_fd *fd, struct tcp_socket *ts, ++ const struct sockaddr __user *serv_addr, ++ socklen_t addrlen) ++{ ++ struct sockaddr_in *usin, _usin; ++ struct dest_route rt; ++ rtdm_lockctx_t context; ++ int ret; ++ ++ if (addrlen < (int)sizeof(struct sockaddr_in)) ++ return -EINVAL; ++ ++ usin = rtnet_get_arg(fd, &_usin, serv_addr, sizeof(_usin)); ++ if (IS_ERR(usin)) ++ return PTR_ERR(usin); ++ ++ if (usin->sin_family != AF_INET) ++ return -EAFNOSUPPORT; ++ ++ ret = rt_ip_route_output(&rt, usin->sin_addr.s_addr, ts->saddr); ++ if (ret < 0) { ++ /* no route to host */ ++ return -ENETUNREACH; ++ } ++ ++ rtdm_lock_get_irqsave(&ts->socket_lock, context); ++ ++ if (ts->is_closed) { ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ ret = -EBADF; ++ goto err_deref; ++ } ++ ++ if (ts->tcp_state != TCP_CLOSE || ts->is_binding) { ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ ret = -EINVAL; ++ goto err_deref; ++ } ++ ++ if (ts->rt.rtdev == NULL) ++ memcpy(&ts->rt, &rt, sizeof(rt)); ++ else ++ rtdev_dereference(rt.rtdev); ++ ++ ts->saddr = rt.rtdev->local_ip; ++ ++ ts->daddr = usin->sin_addr.s_addr; ++ ts->dport = usin->sin_port; ++ ++ ts->sync.seq = rt_tcp_initial_seq(); ++ ts->sync.ack_seq = 0; ++ ts->sync.window = 4096; ++ ts->sync.dst_window = 0; ++ ++ ts->tcp_state = TCP_SYN_SENT; ++ ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ ++ /* Complete three-way handshake */ ++ ret = rt_tcp_send(ts, TCP_FLAG_SYN); ++ if (ret < 0) { ++ rtdm_printk("rttcp: cann't send SYN\n"); ++ return ret; ++ } ++ ++ ret = rtdm_event_timedwait(&ts->conn_evt, rt_tcp_connection_timeout, ++ NULL); ++ if (unlikely(ret < 0)) ++ switch (ret) { ++ case -EWOULDBLOCK: ++ case -ETIMEDOUT: ++ case -EINTR: ++ return ret; ++ ++ default: ++ return -EBADF; ++ } ++ ++ if (ts->tcp_state == TCP_SYN_SENT) { ++ /* received conn_evt, but connection is not established */ ++ return -ECONNREFUSED; ++ } ++ ++ return ret; ++ ++err_deref: ++ rtdev_dereference(rt.rtdev); ++ ++ return ret; ++} ++ ++/*** ++ * rt_tcp_listen ++ */ ++static int rt_tcp_listen(struct tcp_socket *ts, unsigned long backlog) ++{ ++ int ret; ++ rtdm_lockctx_t context; ++ ++ /* Ignore backlog value, maximum number of queued connections is 1 */ ++ ++ rtdm_lock_get_irqsave(&ts->socket_lock, context); ++ if (ts->is_closed) { ++ ret = -EBADF; ++ goto unlock_out; ++ } ++ ++ if (ts->tcp_state != TCP_CLOSE || ts->is_binding) { ++ ret = -EINVAL; ++ goto unlock_out; ++ } ++ ++ ts->tcp_state = TCP_LISTEN; ++ ret = 0; ++ ++unlock_out: ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ ++ return ret; ++} ++ ++/*** ++ * rt_tcp_accept ++ */ ++static int rt_tcp_accept(struct rtdm_fd *fd, struct tcp_socket *ts, ++ struct sockaddr *addr, socklen_t __user *addrlen) ++{ ++ /* Return sockaddr, but bind it with rt_socket_init, so it would be ++ possible to read/write from it in future, return valid file descriptor */ ++ ++ int ret; ++ socklen_t *uaddrlen, _uaddrlen; ++ struct sockaddr_in sin; ++ nanosecs_rel_t timeout = ts->sock.timeout; ++ rtdm_lockctx_t context; ++ struct dest_route rt; ++ ++ uaddrlen = rtnet_get_arg(fd, &_uaddrlen, addrlen, sizeof(_uaddrlen)); ++ if (IS_ERR(uaddrlen)) ++ return PTR_ERR(uaddrlen); ++ ++ rtdm_lock_get_irqsave(&ts->socket_lock, context); ++ if (ts->is_accepting || ts->is_accepted) { ++ /* socket is already accepted or is accepting a connection right now */ ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ return -EALREADY; ++ } ++ ++ if (ts->tcp_state != TCP_LISTEN || ++ *uaddrlen < sizeof(struct sockaddr_in)) { ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ return -EINVAL; ++ } ++ ++ ts->is_accepting = 1; ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ ++ ret = rtdm_event_timedwait(&ts->conn_evt, timeout, NULL); ++ ++ if (unlikely(ret < 0)) ++ switch (ret) { ++ case -ETIMEDOUT: ++ case -EINTR: ++ goto err; ++ ++ default: ++ ret = -EBADF; ++ goto err; ++ } ++ ++ /* accept() reported about connection establishment */ ++ ret = rt_ip_route_output(&rt, ts->daddr, ts->saddr); ++ if (ret < 0) { ++ /* strange, no route to host, keep status quo */ ++ ret = -EPROTO; ++ goto err; ++ } ++ ++ if (addr) { ++ sin.sin_family = AF_INET; ++ sin.sin_port = ts->dport; ++ sin.sin_addr.s_addr = ts->daddr; ++ ret = rtnet_put_arg(fd, addr, &sin, sizeof(sin)); ++ if (ret) { ++ rtdev_dereference(rt.rtdev); ++ ret = -EFAULT; ++ goto err; ++ } ++ } ++ ++ rtdm_lock_get_irqsave(&ts->socket_lock, context); ++ ++ if (ts->tcp_state != TCP_ESTABLISHED) { ++ /* protocol error */ ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ rtdev_dereference(rt.rtdev); ++ ret = -EPROTO; ++ goto err; ++ } ++ ++ if (ts->rt.rtdev == NULL) ++ memcpy(&ts->rt, &rt, sizeof(rt)); ++ else ++ rtdev_dereference(rt.rtdev); ++ ++ ts->is_accepted = 1; ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ ++ ret = rtdm_fd_ufd(rt_socket_fd(&ts->sock)); ++ ++err: ++ /* it is not critical to leave this unlocked ++ due to single entry nature of accept() */ ++ ts->is_accepting = 0; ++ ++ return ret; ++} ++ ++/*** ++ * rt_tcp_shutdown ++ */ ++static int rt_tcp_shutdown(struct tcp_socket *ts, unsigned long how) ++{ ++ return -EOPNOTSUPP; ++} ++ ++/*** ++ * rt_tcp_setsockopt ++ */ ++static int rt_tcp_setsockopt(struct rtdm_fd *fd, struct tcp_socket *ts, ++ int level, int optname, const void *optval, ++ socklen_t optlen) ++{ ++ /* uint64_t val; */ ++ struct timeval tv; ++ rtdm_lockctx_t context; ++ ++ switch (optname) { ++ case SO_KEEPALIVE: ++ if (optlen < sizeof(unsigned int)) ++ return -EINVAL; ++ ++ /* commented out, because current implementation transmits ++ keepalive probes from interrupt context */ ++ /* ++ val = *(unsigned long*)optval; ++ ++ if (val) ++ rt_tcp_keepalive_enable(ts); ++ else ++ rt_tcp_keepalive_disable(ts); ++ */ ++ return 0; ++ ++ case SO_SNDTIMEO: ++ if (optlen < sizeof(tv)) ++ return -EINVAL; ++ if (rtdm_copy_from_user(fd, &tv, optval, sizeof(tv))) ++ return -EFAULT; ++ if (tv.tv_usec < 0 || tv.tv_usec >= 1000000) ++ return -EDOM; ++ ++ rtdm_lock_get_irqsave(&ts->socket_lock, context); ++ ++ if (tv.tv_sec < 0) { ++ ts->sk_sndtimeo = RTDM_TIMEOUT_NONE; ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ return 0; ++ } ++ ++ ts->sk_sndtimeo = RTDM_TIMEOUT_INFINITE; ++ if (tv.tv_sec == 0 && tv.tv_usec == 0) { ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ return 0; ++ } ++ ++ if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT / 1000000000ull - 1)) ++ ts->sk_sndtimeo = ++ (tv.tv_sec * 1000000 + tv.tv_usec) * 1000; ++ ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ ++ return 0; ++ ++ case SO_REUSEADDR: ++ /* to implement */ ++ return -EOPNOTSUPP; ++ } ++ ++ return -ENOPROTOOPT; ++} ++ ++/*** ++ * rt_tcp_getsockopt ++ */ ++static int rt_tcp_getsockopt(struct rtdm_fd *fd, struct tcp_socket *ts, ++ int level, int optname, void *optval, ++ socklen_t *optlen) ++{ ++ int ret = 0; ++ ++ if (*optlen < sizeof(unsigned int)) ++ return -EINVAL; ++ ++ switch (optname) { ++ case SO_ERROR: ++ ret = 0; /* used in nonblocking connect(), extend later */ ++ break; ++ ++ default: ++ ret = -ENOPROTOOPT; ++ break; ++ } ++ ++ return ret; ++} ++ ++/*** ++ * rt_tcp_ioctl ++ */ ++static int rt_tcp_ioctl(struct rtdm_fd *fd, unsigned int request, ++ void __user *arg) ++{ ++ struct tcp_socket *ts = rtdm_fd_to_private(fd); ++ const struct _rtdm_setsockaddr_args *setaddr; ++ struct _rtdm_setsockaddr_args _setaddr; ++ const struct _rtdm_getsockaddr_args *getaddr; ++ struct _rtdm_getsockaddr_args _getaddr; ++ const struct _rtdm_getsockopt_args *getopt; ++ struct _rtdm_getsockopt_args _getopt; ++ const struct _rtdm_setsockopt_args *setopt; ++ struct _rtdm_setsockopt_args _setopt; ++ int in_rt; ++ ++ /* fast path for common socket IOCTLs */ ++ if (_IOC_TYPE(request) == RTIOC_TYPE_NETWORK) ++ return rt_socket_common_ioctl(fd, request, arg); ++ ++ in_rt = rtdm_in_rt_context(); ++ ++ switch (request) { ++ case _RTIOC_BIND: ++ setaddr = rtnet_get_arg(fd, &_setaddr, arg, sizeof(_setaddr)); ++ if (IS_ERR(setaddr)) ++ return PTR_ERR(setaddr); ++ return rt_tcp_bind(fd, ts, setaddr->addr, setaddr->addrlen); ++ case _RTIOC_CONNECT: ++ if (!in_rt) ++ return -ENOSYS; ++ setaddr = rtnet_get_arg(fd, &_setaddr, arg, sizeof(_setaddr)); ++ if (IS_ERR(setaddr)) ++ return PTR_ERR(setaddr); ++ return rt_tcp_connect(fd, ts, setaddr->addr, setaddr->addrlen); ++ ++ case _RTIOC_LISTEN: ++ return rt_tcp_listen(ts, (unsigned long)arg); ++ ++ case _RTIOC_ACCEPT: ++ if (!in_rt) ++ return -ENOSYS; ++ getaddr = rtnet_get_arg(fd, &_getaddr, arg, sizeof(_getaddr)); ++ if (IS_ERR(getaddr)) ++ return PTR_ERR(getaddr); ++ return rt_tcp_accept(fd, ts, getaddr->addr, getaddr->addrlen); ++ ++ case _RTIOC_SHUTDOWN: ++ return rt_tcp_shutdown(ts, (unsigned long)arg); ++ ++ case _RTIOC_SETSOCKOPT: ++ setopt = rtnet_get_arg(fd, &_setopt, arg, sizeof(_setopt)); ++ if (IS_ERR(setopt)) ++ return PTR_ERR(setopt); ++ ++ if (setopt->level != SOL_SOCKET) ++ break; ++ ++ return rt_tcp_setsockopt(fd, ts, setopt->level, setopt->optname, ++ setopt->optval, setopt->optlen); ++ ++ case _RTIOC_GETSOCKOPT: ++ getopt = rtnet_get_arg(fd, &_getopt, arg, sizeof(_getopt)); ++ if (IS_ERR(getopt)) ++ return PTR_ERR(getopt); ++ ++ if (getopt->level != SOL_SOCKET) ++ break; ++ ++ return rt_tcp_getsockopt(fd, ts, getopt->level, getopt->optname, ++ getopt->optval, getopt->optlen); ++ default: ++ break; ++ } ++ ++ return rt_ip_ioctl(fd, request, arg); ++} ++ ++/*** ++ * rt_tcp_read ++ */ ++static ssize_t rt_tcp_read(struct rtdm_fd *fd, void *buf, size_t nbyte) ++{ ++ struct tcp_socket *ts = rtdm_fd_to_private(fd); ++ struct rtsocket *sock = &ts->sock; ++ ++ struct rtskb *skb; ++ struct rtskb *first_skb; ++ nanosecs_rel_t timeout = sock->timeout; ++ size_t data_len; ++ size_t th_len; ++ size_t copied = 0; ++ size_t block_size; ++ u8 *user_buf = buf; ++ int ret; ++ rtdm_lockctx_t context; ++ ++ rtdm_toseq_t timeout_seq; ++ ++ if (!rtdm_fd_is_user(fd)) { ++ return -EFAULT; ++ } ++ ++ rtdm_lock_get_irqsave(&ts->socket_lock, context); ++ ++ if (ts->is_closed) { ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ return -EBADF; ++ } ++ ++ if (!ts->is_valid) { ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ return 0; ++ } ++ ++ if (ts->tcp_state != TCP_ESTABLISHED && ++ ts->tcp_state != TCP_FIN_WAIT2) { ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ return -EINVAL; ++ } ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ ++ rtdm_toseq_init(&timeout_seq, timeout); ++ ++ while (copied < nbyte) { ++ ret = rtdm_sem_timeddown(&ts->sock.pending_sem, timeout, ++ &timeout_seq); ++ ++ if (unlikely(ret < 0)) ++ switch (ret) { ++ case -EWOULDBLOCK: ++ case -ETIMEDOUT: ++ case -EINTR: ++ return (copied ? copied : ret); ++ ++ case -EIDRM: /* event is destroyed */ ++ if (ts->is_closed) ++ return -EBADF; ++ ++ return copied; ++ ++ default: ++ if (ts->is_closed) { ++ return -EBADF; ++ } ++ ++ return 0; ++ } ++ ++ skb = rtskb_dequeue_chain(&sock->incoming); ++ RTNET_ASSERT(skb != NULL, return -EFAULT;); ++ ++ th_len = (skb->h.th->doff) << 2; ++ ++ data_len = skb->len - th_len; ++ ++ __rtskb_pull(skb, th_len); ++ ++ first_skb = skb; ++ ++ /* iterate over all IP fragments */ ++ iterate_fragments: ++ block_size = skb->len; ++ copied += block_size; ++ data_len -= block_size; ++ ++ if (copied > nbyte) { ++ block_size -= copied - nbyte; ++ copied = nbyte; ++ ++ if (rtdm_copy_to_user(fd, user_buf, skb->data, ++ block_size)) { ++ kfree_rtskb(first_skb); /* or store the data? */ ++ return -EFAULT; ++ } ++ rtdm_lock_get_irqsave(&ts->socket_lock, context); ++ if (ts->sync.window) { ++ ts->sync.window += block_size; ++ rtdm_lock_put_irqrestore(&ts->socket_lock, ++ context); ++ } else { ++ ts->sync.window = block_size; ++ rtdm_lock_put_irqrestore(&ts->socket_lock, ++ context); ++ rt_tcp_send(ts, ++ TCP_FLAG_ACK); /* window update */ ++ } ++ ++ __rtskb_pull(skb, block_size); ++ __rtskb_push(first_skb, sizeof(struct tcphdr)); ++ first_skb->h.th->doff = 5; ++ rtskb_queue_head(&sock->incoming, first_skb); ++ rtdm_sem_up(&ts->sock.pending_sem); ++ ++ return copied; ++ } ++ ++ if (rtdm_copy_to_user(fd, user_buf, skb->data, block_size)) { ++ kfree_rtskb(first_skb); /* or store the data? */ ++ return -EFAULT; ++ } ++ rtdm_lock_get_irqsave(&ts->socket_lock, context); ++ if (ts->sync.window) { ++ ts->sync.window += block_size; ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ } else { ++ ts->sync.window = block_size; ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ rt_tcp_send(ts, TCP_FLAG_ACK); /* window update */ ++ } ++ ++ if ((skb = skb->next) != NULL) { ++ user_buf += data_len; ++ goto iterate_fragments; ++ } ++ ++ kfree_rtskb(first_skb); ++ } ++ ++ return copied; ++} ++ ++/*** ++ * rt_tcp_write ++ */ ++static ssize_t rt_tcp_write(struct rtdm_fd *fd, const void __user *user_buf, ++ size_t nbyte) ++{ ++ struct tcp_socket *ts = rtdm_fd_to_private(fd); ++ uint32_t sent_len = 0; ++ rtdm_lockctx_t context; ++ int ret = 0; ++ nanosecs_rel_t sk_sndtimeo; ++ void *buf; ++ ++ if (!rtdm_fd_is_user(fd)) { ++ return -EFAULT; ++ } ++ ++ rtdm_lock_get_irqsave(&ts->socket_lock, context); ++ ++ sk_sndtimeo = ts->sk_sndtimeo; ++ ++ if (!ts->is_valid) { ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ return -EPIPE; ++ } ++ ++ if ((ts->daddr | ts->dport) == 0 || ts->tcp_state != TCP_ESTABLISHED) { ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ return -EINVAL; ++ } ++ ++ rtdm_lock_put_irqrestore(&ts->socket_lock, context); ++ ++ buf = xnmalloc(nbyte); ++ if (buf == NULL) ++ return -ENOMEM; ++ ++ ret = rtdm_copy_from_user(fd, buf, user_buf, nbyte); ++ if (ret) { ++ xnfree(buf); ++ return ret; ++ } ++ ++ while (sent_len < nbyte) { ++ ret = rtdm_event_timedwait(&ts->send_evt, sk_sndtimeo, NULL); ++ ++ if (unlikely(ret < 0)) ++ switch (ret) { ++ case -EWOULDBLOCK: ++ case -ETIMEDOUT: ++ case -EINTR: ++ xnfree(buf); ++ return sent_len ?: ret; ++ ++ case -EIDRM: /* event is destroyed */ ++ default: ++ if (ts->is_closed) { ++ xnfree(buf); ++ return -EBADF; ++ } ++ ++ xnfree(buf); ++ return sent_len ?: ret; ++ } ++ ++ ret = rt_tcp_window_send(ts, nbyte - sent_len, ++ ((u8 *)buf) + sent_len); ++ ++ if (ret < 0) { /* check this branch correctness */ ++ rtdm_event_signal(&ts->send_evt); ++ break; ++ } ++ ++ sent_len += ret; ++ if (ts->sync.dst_window) ++ rtdm_event_signal(&ts->send_evt); ++ } ++ ++ xnfree(buf); ++ return (ret < 0 ? ret : sent_len); ++} ++ ++/*** ++ * rt_tcp_recvmsg ++ */ ++static ssize_t rt_tcp_recvmsg(struct rtdm_fd *fd, struct user_msghdr *msg, ++ int msg_flags) ++{ ++ struct iovec iov_fast[RTDM_IOV_FASTMAX], *iov; ++ struct user_msghdr _msg; ++ ssize_t ret; ++ size_t len; ++ void *buf; ++ ++ if (msg_flags) ++ return -EOPNOTSUPP; ++ ++ msg = rtnet_get_arg(fd, &_msg, msg, sizeof(*msg)); ++ if (IS_ERR(msg)) ++ return PTR_ERR(msg); ++ ++ /* loop over all vectors to be implemented */ ++ if (msg->msg_iovlen != 1) ++ return -EOPNOTSUPP; ++ ++ ret = rtdm_get_iovec(fd, &iov, msg, iov_fast); ++ if (ret) ++ return ret; ++ ++ len = iov[0].iov_len; ++ if (len > 0) { ++ buf = iov[0].iov_base; ++ ret = rt_tcp_read(fd, buf, len); ++ } ++ ++ rtdm_drop_iovec(iov, iov_fast); ++ ++ return ret; ++} ++ ++/*** ++ * rt_tcp_sendmsg ++ */ ++static ssize_t rt_tcp_sendmsg(struct rtdm_fd *fd, const struct user_msghdr *msg, ++ int msg_flags) ++{ ++ struct iovec iov_fast[RTDM_IOV_FASTMAX], *iov; ++ struct user_msghdr _msg; ++ ssize_t ret; ++ size_t len; ++ ++ if (msg_flags) ++ return -EOPNOTSUPP; ++ ++ msg = rtnet_get_arg(fd, &_msg, msg, sizeof(*msg)); ++ if (IS_ERR(msg)) ++ return PTR_ERR(msg); ++ ++ /* loop over all vectors to be implemented */ ++ if (msg->msg_iovlen != 1) ++ return -EOPNOTSUPP; ++ ++ ret = rtdm_get_iovec(fd, &iov, msg, iov_fast); ++ if (ret) ++ return ret; ++ ++ len = iov[0].iov_len; ++ if (len > 0) ++ ret = rt_tcp_write(fd, iov[0].iov_base, len); ++ ++ rtdm_drop_iovec(iov, iov_fast); ++ ++ return ret; ++} ++ ++/*** ++ * rt_tcp_select ++ */ ++static int rt_tcp_select(struct rtdm_fd *fd, rtdm_selector_t *selector, ++ enum rtdm_selecttype type, unsigned fd_index) ++{ ++ struct tcp_socket *ts = rtdm_fd_to_private(fd); ++ ++ switch (type) { ++ case XNSELECT_READ: ++ return rtdm_sem_select(&ts->sock.pending_sem, selector, ++ XNSELECT_READ, fd_index); ++ case XNSELECT_WRITE: ++ return rtdm_event_select(&ts->send_evt, selector, ++ XNSELECT_WRITE, fd_index); ++ default: ++ return -EBADF; ++ } ++ ++ return -EINVAL; ++} ++ ++/*** ++ * TCP-Initialisation ++ */ ++static struct rtinet_protocol tcp_protocol = { .protocol = IPPROTO_TCP, ++ .dest_socket = ++ &rt_tcp_dest_socket, ++ .rcv_handler = &rt_tcp_rcv, ++ .err_handler = &rt_tcp_rcv_err, ++ .init_socket = &rt_tcp_socket }; ++ ++static struct rtdm_driver tcp_driver = { ++ .profile_info = RTDM_PROFILE_INFO(tcp, ++ RTDM_CLASS_NETWORK, ++ RTDM_SUBCLASS_RTNET, ++ RTNET_RTDM_VER), ++ .device_flags = RTDM_PROTOCOL_DEVICE, ++ .device_count = 1, ++ .context_size = sizeof(struct tcp_socket), ++ ++ .protocol_family = PF_INET, ++ .socket_type = SOCK_STREAM, ++ ++ .ops = { ++ .socket = rt_inet_socket, ++ .close = rt_tcp_close, ++ .ioctl_rt = rt_tcp_ioctl, ++ .ioctl_nrt = rt_tcp_ioctl, ++ .read_rt = rt_tcp_read, ++ .write_rt = rt_tcp_write, ++ .recvmsg_rt = rt_tcp_recvmsg, ++ .sendmsg_rt = rt_tcp_sendmsg, ++ .select = rt_tcp_select, ++ }, ++}; ++ ++static struct rtdm_device tcp_device = { ++ .driver = &tcp_driver, ++ .label = "tcp", ++}; ++ ++#ifdef CONFIG_XENO_OPT_VFILE ++/*** ++ * rt_tcp_proc_read ++ */ ++static inline char *rt_tcp_string_of_state(u8 state) ++{ ++ switch (state) { ++ case TCP_ESTABLISHED: ++ return "ESTABLISHED"; ++ case TCP_SYN_SENT: ++ return "SYN_SENT"; ++ case TCP_SYN_RECV: ++ return "SYN_RECV"; ++ case TCP_FIN_WAIT1: ++ return "FIN_WAIT1"; ++ case TCP_FIN_WAIT2: ++ return "FIN_WAIT2"; ++ case TCP_TIME_WAIT: ++ return "TIME_WAIT"; ++ case TCP_CLOSE: ++ return "CLOSE"; ++ case TCP_CLOSE_WAIT: ++ return "CLOSE_WAIT"; ++ case TCP_LAST_ACK: ++ return "LASK_ACK"; ++ case TCP_LISTEN: ++ return "LISTEN"; ++ case TCP_CLOSING: ++ return "CLOSING"; ++ default: ++ return "UNKNOWN"; ++ } ++} ++ ++static int rtnet_ipv4_tcp_show(struct xnvfile_regular_iterator *it, void *data) ++{ ++ rtdm_lockctx_t context; ++ struct tcp_socket *ts; ++ u32 saddr, daddr; ++ u16 sport = 0, dport = 0; /* set to 0 to silence compiler */ ++ char sbuffer[24]; ++ char dbuffer[24]; ++ int state; ++ int index; ++ ++ xnvfile_printf(it, "Hash Local Address " ++ "Foreign Address State\n"); ++ ++ for (index = 0; index < RT_TCP_SOCKETS; index++) { ++ rtdm_lock_get_irqsave(&tcp_socket_base_lock, context); ++ ++ ts = port_registry[index]; ++ state = ts ? ts->tcp_state : TCP_CLOSE; ++ ++ if (ts && ts->tcp_state != TCP_CLOSE) { ++ saddr = ts->saddr; ++ sport = ts->sport; ++ daddr = ts->daddr; ++ dport = ts->dport; ++ } ++ ++ rtdm_lock_put_irqrestore(&tcp_socket_base_lock, context); ++ ++ if (state != TCP_CLOSE) { ++ snprintf(sbuffer, sizeof(sbuffer), "%u.%u.%u.%u:%u", ++ NIPQUAD(saddr), ntohs(sport)); ++ snprintf(dbuffer, sizeof(dbuffer), "%u.%u.%u.%u:%u", ++ NIPQUAD(daddr), ntohs(dport)); ++ ++ xnvfile_printf(it, "%04X %-23s %-23s %s\n", ++ sport & port_hash_mask, sbuffer, dbuffer, ++ rt_tcp_string_of_state(state)); ++ } ++ } ++ ++ return 0; ++} ++ ++static struct xnvfile_regular_ops rtnet_ipv4_tcp_vfile_ops = { ++ .show = rtnet_ipv4_tcp_show, ++}; ++ ++static struct xnvfile_regular rtnet_ipv4_tcp_vfile = { ++ .ops = &rtnet_ipv4_tcp_vfile_ops, ++}; ++ ++/*** ++ * rt_tcp_proc_register ++ */ ++static int __init rt_tcp_proc_register(void) ++{ ++ return xnvfile_init_regular("tcp", &rtnet_ipv4_tcp_vfile, ++ &ipv4_proc_root); ++} ++ ++/*** ++ * rt_tcp_proc_unregister ++ */ ++ ++static void rt_tcp_proc_unregister(void) ++{ ++ xnvfile_destroy_regular(&rtnet_ipv4_tcp_vfile); ++} ++#endif /* CONFIG_XENO_OPT_VFILE */ ++ ++/*** ++ * rt_tcp_init ++ */ ++int __init rt_tcp_init(void) ++{ ++ unsigned int skbs; ++ int i; ++ int ret; ++ ++ if ((tcp_auto_port_start < 0) || ++ (tcp_auto_port_start >= 0x10000 - RT_TCP_SOCKETS)) ++ tcp_auto_port_start = 1024; ++ tcp_auto_port_start = ++ htons(tcp_auto_port_start & (tcp_auto_port_mask & 0xFFFF)); ++ tcp_auto_port_mask = htons(tcp_auto_port_mask | 0xFFFF0000); ++ ++ for (i = 0; i < ARRAY_SIZE(port_hash); i++) ++ INIT_HLIST_HEAD(&port_hash[i]); ++ ++ /* Perform essential initialization of the RST|ACK socket */ ++ skbs = rt_bare_socket_init(rst_fd, IPPROTO_TCP, RT_TCP_RST_PRIO, ++ RT_TCP_RST_POOL_SIZE); ++ if (skbs < RT_TCP_RST_POOL_SIZE) ++ printk("rttcp: allocated only %d RST|ACK rtskbs\n", skbs); ++ rst_socket.sock.prot.inet.tos = 0; ++ rst_fd->refs = 1; ++ rtdm_lock_init(&rst_socket.socket_lock); ++ ++ /* ++ * 100 ms forwarding timer with 8.38 ms slots ++ */ ++ ret = timerwheel_init(100000000ull, 23); ++ if (ret < 0) { ++ rtdm_printk("rttcp: cann't initialize timerwheel task: %d\n", ++ -ret); ++ goto out_1; ++ } ++ ++#ifdef CONFIG_XENO_OPT_VFILE ++ if ((ret = rt_tcp_proc_register()) < 0) { ++ rtdm_printk("rttcp: cann't initialize proc entry: %d\n", -ret); ++ goto out_2; ++ } ++#endif /* CONFIG_XENO_OPT_VFILE */ ++ ++ rt_inet_add_protocol(&tcp_protocol); ++ ++ ret = rtdm_dev_register(&tcp_device); ++ if (ret < 0) { ++ rtdm_printk("rttcp: cann't register RT TCP: %d\n", -ret); ++ goto out_3; ++ } ++ ++ return ret; ++ ++out_3: ++ rt_inet_del_protocol(&tcp_protocol); ++#ifdef CONFIG_XENO_OPT_VFILE ++ rt_tcp_proc_unregister(); ++#endif /* CONFIG_XENO_OPT_VFILE */ ++ ++out_2: ++ timerwheel_cleanup(); ++ ++out_1: ++ rt_bare_socket_cleanup(&rst_socket.sock); ++ ++ return ret; ++} ++ ++/*** ++ * rt_tcp_release ++ */ ++void __exit rt_tcp_release(void) ++{ ++ rt_inet_del_protocol(&tcp_protocol); ++ ++#ifdef CONFIG_XENO_OPT_VFILE ++ rt_tcp_proc_unregister(); ++#endif /* CONFIG_XENO_OPT_VFILE */ ++ ++ timerwheel_cleanup(); ++ ++ rt_bare_socket_cleanup(&rst_socket.sock); ++ ++ rtdm_dev_unregister(&tcp_device); ++} ++ ++module_init(rt_tcp_init); ++module_exit(rt_tcp_release); +--- linux/drivers/xenomai/net/stack/ipv4/ip_sock.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/ipv4/ip_sock.c 2021-04-07 16:01:27.049634420 +0800 +@@ -0,0 +1,194 @@ ++/*** ++ * ++ * ipv4/ip_sock.c ++ * ++ * Copyright (C) 2003 Hans-Peter Bock ++ * 2004, 2005 Jan Kiszka ++ * 2019 Sebastian Smolorz ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#include ++#include ++#include ++ ++#include ++ ++int rt_ip_setsockopt(struct rtdm_fd *fd, struct rtsocket *s, int level, ++ int optname, const void __user *optval, socklen_t optlen) ++{ ++ int err = 0; ++ unsigned int _tos, *tos; ++ ++ if (level != SOL_IP) ++ return -ENOPROTOOPT; ++ ++ if (optlen < sizeof(unsigned int)) ++ return -EINVAL; ++ ++ switch (optname) { ++ case IP_TOS: ++ tos = rtnet_get_arg(fd, &_tos, optval, sizeof(_tos)); ++ if (IS_ERR(tos)) ++ return PTR_ERR(tos); ++ else ++ s->prot.inet.tos = *tos; ++ break; ++ ++ default: ++ err = -ENOPROTOOPT; ++ break; ++ } ++ ++ return err; ++} ++ ++int rt_ip_getsockopt(struct rtdm_fd *fd, struct rtsocket *s, int level, ++ int optname, void __user *optval, socklen_t __user *optlen) ++{ ++ int err = 0; ++ unsigned int tos; ++ socklen_t _len, *len; ++ ++ len = rtnet_get_arg(fd, &_len, optlen, sizeof(_len)); ++ if (IS_ERR(len)) ++ return PTR_ERR(len); ++ ++ if (*len < sizeof(unsigned int)) ++ return -EINVAL; ++ ++ switch (optname) { ++ case IP_TOS: ++ tos = s->prot.inet.tos; ++ err = rtnet_put_arg(fd, optval, &tos, sizeof(tos)); ++ if (!err) { ++ *len = sizeof(unsigned int); ++ err = rtnet_put_arg(fd, optlen, len, sizeof(socklen_t)); ++ } ++ break; ++ ++ default: ++ err = -ENOPROTOOPT; ++ break; ++ } ++ ++ return err; ++} ++ ++int rt_ip_getsockname(struct rtdm_fd *fd, struct rtsocket *s, ++ struct sockaddr __user *addr, socklen_t __user *addrlen) ++{ ++ struct sockaddr_in _sin; ++ socklen_t *len, _len; ++ int ret; ++ ++ len = rtnet_get_arg(fd, &_len, addrlen, sizeof(_len)); ++ if (IS_ERR(len)) ++ return PTR_ERR(len); ++ ++ if (*len < sizeof(struct sockaddr_in)) ++ return -EINVAL; ++ ++ _sin.sin_family = AF_INET; ++ _sin.sin_addr.s_addr = s->prot.inet.saddr; ++ _sin.sin_port = s->prot.inet.sport; ++ memset(&_sin.sin_zero, 0, sizeof(_sin.sin_zero)); ++ ret = rtnet_put_arg(fd, addr, &_sin, sizeof(_sin)); ++ if (ret) ++ return ret; ++ ++ *len = sizeof(struct sockaddr_in); ++ ret = rtnet_put_arg(fd, addrlen, len, sizeof(socklen_t)); ++ ++ return ret; ++} ++ ++int rt_ip_getpeername(struct rtdm_fd *fd, struct rtsocket *s, ++ struct sockaddr __user *addr, socklen_t __user *addrlen) ++{ ++ struct sockaddr_in _sin; ++ socklen_t *len, _len; ++ int ret; ++ ++ len = rtnet_get_arg(fd, &_len, addrlen, sizeof(_len)); ++ if (IS_ERR(len)) ++ return PTR_ERR(len); ++ ++ if (*len < sizeof(struct sockaddr_in)) ++ return -EINVAL; ++ ++ _sin.sin_family = AF_INET; ++ _sin.sin_addr.s_addr = s->prot.inet.daddr; ++ _sin.sin_port = s->prot.inet.dport; ++ memset(&_sin.sin_zero, 0, sizeof(_sin.sin_zero)); ++ ret = rtnet_put_arg(fd, addr, &_sin, sizeof(_sin)); ++ if (ret) ++ return ret; ++ ++ *len = sizeof(struct sockaddr_in); ++ ret = rtnet_put_arg(fd, addrlen, len, sizeof(socklen_t)); ++ ++ return ret; ++} ++ ++int rt_ip_ioctl(struct rtdm_fd *fd, int request, void __user *arg) ++{ ++ struct rtsocket *sock = rtdm_fd_to_private(fd); ++ struct _rtdm_getsockaddr_args _getaddr, *getaddr; ++ struct _rtdm_getsockopt_args _getopt, *getopt; ++ struct _rtdm_setsockopt_args _setopt, *setopt; ++ ++ switch (request) { ++ case _RTIOC_SETSOCKOPT: ++ setopt = rtnet_get_arg(fd, &_setopt, arg, sizeof(_setopt)); ++ if (IS_ERR(setopt)) ++ return PTR_ERR(setopt); ++ ++ return rt_ip_setsockopt(fd, sock, setopt->level, ++ setopt->optname, setopt->optval, ++ setopt->optlen); ++ ++ case _RTIOC_GETSOCKOPT: ++ getopt = rtnet_get_arg(fd, &_getopt, arg, sizeof(_getopt)); ++ if (IS_ERR(getopt)) ++ return PTR_ERR(getopt); ++ ++ return rt_ip_getsockopt(fd, sock, getopt->level, ++ getopt->optname, getopt->optval, ++ getopt->optlen); ++ ++ case _RTIOC_GETSOCKNAME: ++ getaddr = rtnet_get_arg(fd, &_getaddr, arg, sizeof(_getaddr)); ++ if (IS_ERR(getaddr)) ++ return PTR_ERR(getaddr); ++ ++ return rt_ip_getsockname(fd, sock, getaddr->addr, ++ getaddr->addrlen); ++ ++ case _RTIOC_GETPEERNAME: ++ getaddr = rtnet_get_arg(fd, &_getaddr, arg, sizeof(_getaddr)); ++ if (IS_ERR(getaddr)) ++ return PTR_ERR(getaddr); ++ ++ return rt_ip_getpeername(fd, sock, getaddr->addr, ++ getaddr->addrlen); ++ ++ default: ++ return rt_socket_if_ioctl(fd, request, arg); ++ } ++} ++EXPORT_SYMBOL_GPL(rt_ip_ioctl); +--- linux/drivers/xenomai/net/stack/ipv4/Makefile 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/ipv4/Makefile 2021-04-07 16:01:27.045634426 +0800 +@@ -0,0 +1,19 @@ ++ccflags-y += -Idrivers/xenomai/net/stack/include ++ ++obj-$(CONFIG_XENO_DRIVERS_NET_RTIPV4_UDP) += udp/ ++ ++obj-$(CONFIG_XENO_DRIVERS_NET_RTIPV4_TCP) += tcp/ ++ ++obj-$(CONFIG_XENO_DRIVERS_NET_RTIPV4) += rtipv4.o ++ ++rtipv4-y := \ ++ route.o \ ++ protocol.o \ ++ arp.o \ ++ af_inet.o \ ++ ip_input.o \ ++ ip_sock.o \ ++ ip_output.o \ ++ ip_fragment.o ++ ++rtipv4-$(CONFIG_XENO_DRIVERS_NET_RTIPV4_ICMP) += icmp.o +--- linux/drivers/xenomai/net/stack/ipv4/arp.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/ipv4/arp.c 2021-04-07 16:01:27.040634433 +0800 +@@ -0,0 +1,212 @@ ++/*** ++ * ++ * ipv4/arp.h - Adress Resolution Protocol for RTnet ++ * ++ * Copyright (C) 2002 Ulrich Marx ++ * 2004 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#include ++#include ++#include ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_ADDON_PROXY_ARP ++#include ++#endif /* CONFIG_XENO_DRIVERS_NET_ADDON_PROXY_ARP */ ++ ++/*** ++ * arp_send: Create and send an arp packet. If (dest_hw == NULL), ++ * we create a broadcast message. ++ */ ++void rt_arp_send(int type, int ptype, u32 dest_ip, struct rtnet_device *rtdev, ++ u32 src_ip, unsigned char *dest_hw, unsigned char *src_hw, ++ unsigned char *target_hw) ++{ ++ struct rtskb *skb; ++ struct arphdr *arp; ++ unsigned char *arp_ptr; ++ ++ if (rtdev->flags & IFF_NOARP) ++ return; ++ ++ if (!(skb = alloc_rtskb(sizeof(struct arphdr) + ++ 2 * (rtdev->addr_len + 4) + ++ rtdev->hard_header_len + 15, ++ &global_pool))) ++ return; ++ ++ rtskb_reserve(skb, (rtdev->hard_header_len + 15) & ~15); ++ ++ skb->nh.raw = skb->data; ++ arp = (struct arphdr *)rtskb_put( ++ skb, sizeof(struct arphdr) + 2 * (rtdev->addr_len + 4)); ++ ++ skb->rtdev = rtdev; ++ skb->protocol = __constant_htons(ETH_P_ARP); ++ skb->priority = RT_ARP_SKB_PRIO; ++ if (src_hw == NULL) ++ src_hw = rtdev->dev_addr; ++ if (dest_hw == NULL) ++ dest_hw = rtdev->broadcast; ++ ++ /* ++ * Fill the device header for the ARP frame ++ */ ++ if (rtdev->hard_header && ++ (rtdev->hard_header(skb, rtdev, ptype, dest_hw, src_hw, skb->len) < ++ 0)) ++ goto out; ++ ++ arp->ar_hrd = htons(rtdev->type); ++ arp->ar_pro = __constant_htons(ETH_P_IP); ++ arp->ar_hln = rtdev->addr_len; ++ arp->ar_pln = 4; ++ arp->ar_op = htons(type); ++ ++ arp_ptr = (unsigned char *)(arp + 1); ++ ++ memcpy(arp_ptr, src_hw, rtdev->addr_len); ++ arp_ptr += rtdev->addr_len; ++ ++ memcpy(arp_ptr, &src_ip, 4); ++ arp_ptr += 4; ++ ++ if (target_hw != NULL) ++ memcpy(arp_ptr, target_hw, rtdev->addr_len); ++ else ++ memset(arp_ptr, 0, rtdev->addr_len); ++ arp_ptr += rtdev->addr_len; ++ ++ memcpy(arp_ptr, &dest_ip, 4); ++ ++ /* send the frame */ ++ rtdev_xmit(skb); ++ ++ return; ++ ++out: ++ kfree_rtskb(skb); ++} ++ ++/*** ++ * arp_rcv: Receive an arp request by the device layer. ++ */ ++int rt_arp_rcv(struct rtskb *skb, struct rtpacket_type *pt) ++{ ++ struct rtnet_device *rtdev = skb->rtdev; ++ struct arphdr *arp = skb->nh.arph; ++ unsigned char *arp_ptr = (unsigned char *)(arp + 1); ++ unsigned char *sha; ++ u32 sip, tip; ++ u16 dev_type = rtdev->type; ++ ++ /* ++ * The hardware length of the packet should match the hardware length ++ * of the device. Similarly, the hardware types should match. The ++ * device should be ARP-able. Also, if pln is not 4, then the lookup ++ * is not from an IP number. We can't currently handle this, so toss ++ * it. ++ */ ++ if ((arp->ar_hln != rtdev->addr_len) || (rtdev->flags & IFF_NOARP) || ++ (skb->pkt_type == PACKET_OTHERHOST) || ++ (skb->pkt_type == PACKET_LOOPBACK) || (arp->ar_pln != 4)) ++ goto out; ++ ++ switch (dev_type) { ++ default: ++ if ((arp->ar_pro != __constant_htons(ETH_P_IP)) && ++ (htons(dev_type) != arp->ar_hrd)) ++ goto out; ++ break; ++ case ARPHRD_ETHER: ++ /* ++ * ETHERNET devices will accept ARP hardware types of either ++ * 1 (Ethernet) or 6 (IEEE 802.2). ++ */ ++ if ((arp->ar_hrd != __constant_htons(ARPHRD_ETHER)) && ++ (arp->ar_hrd != __constant_htons(ARPHRD_IEEE802))) { ++ goto out; ++ } ++ if (arp->ar_pro != __constant_htons(ETH_P_IP)) { ++ goto out; ++ } ++ break; ++ } ++ ++ /* Understand only these message types */ ++ if ((arp->ar_op != __constant_htons(ARPOP_REPLY)) && ++ (arp->ar_op != __constant_htons(ARPOP_REQUEST))) ++ goto out; ++ ++ /* ++ * Extract fields ++ */ ++ sha = arp_ptr; ++ arp_ptr += rtdev->addr_len; ++ memcpy(&sip, arp_ptr, 4); ++ ++ arp_ptr += 4; ++ arp_ptr += rtdev->addr_len; ++ memcpy(&tip, arp_ptr, 4); ++ ++ /* process only requests/replies directed to us */ ++ if (tip == rtdev->local_ip) { ++ rt_ip_route_add_host(sip, sha, rtdev); ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_ADDON_PROXY_ARP ++ if (!rt_ip_fallback_handler) ++#endif /* CONFIG_XENO_DRIVERS_NET_ADDON_PROXY_ARP */ ++ if (arp->ar_op == __constant_htons(ARPOP_REQUEST)) { ++ rt_arp_send(ARPOP_REPLY, ETH_P_ARP, sip, rtdev, ++ tip, sha, rtdev->dev_addr, sha); ++ goto out1; ++ } ++ } ++ ++out: ++#ifdef CONFIG_XENO_DRIVERS_NET_ADDON_PROXY_ARP ++ if (rt_ip_fallback_handler) { ++ rt_ip_fallback_handler(skb); ++ return 0; ++ } ++#endif /* CONFIG_XENO_DRIVERS_NET_ADDON_PROXY_ARP */ ++out1: ++ kfree_rtskb(skb); ++ return 0; ++} ++ ++static struct rtpacket_type arp_packet_type = { ++ type: __constant_htons(ETH_P_ARP), ++ handler: &rt_arp_rcv ++}; ++ ++/*** ++ * rt_arp_init ++ */ ++void __init rt_arp_init(void) ++{ ++ rtdev_add_pack(&arp_packet_type); ++} ++ ++/*** ++ * rt_arp_release ++ */ ++void rt_arp_release(void) ++{ ++ rtdev_remove_pack(&arp_packet_type); ++} +--- linux/drivers/xenomai/net/stack/ipv4/Kconfig 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/ipv4/Kconfig 2021-04-07 16:01:27.035634441 +0800 +@@ -0,0 +1,75 @@ ++config XENO_DRIVERS_NET_RTIPV4 ++ depends on XENO_DRIVERS_NET ++ tristate "Real-Time IPv4" ++ default y ++ ---help--- ++ Enables the real-time capable IPv4 support of RTnet. The protocol is ++ implemented as a separate module. Supplementing tools (rtroute, ++ rtping) and examples are provided as well. Moreover, RTcfg will ++ include IPv4 support when this option is switched on. ++ ++ For further information see also Documentation/README.routing and ++ Documentation/README.ipfragmentation. ++ ++config XENO_DRIVERS_NET_RTIPV4_ICMP ++ bool "ICMP support" ++ depends on XENO_DRIVERS_NET_RTIPV4 ++ default y ++ ---help--- ++ Enables ICMP support of the RTnet Real-Time IPv4 protocol. ++ ++ When the RTnet-Proxy is enabled while this feature is disabled, ICMP ++ will be forwarded to the Linux network stack. ++ ++config XENO_DRIVERS_NET_RTIPV4_HOST_ROUTES ++ int "Maximum host routing table entries" ++ depends on XENO_DRIVERS_NET_RTIPV4 ++ default 32 ++ ---help--- ++ Each IPv4 supporting interface and each remote host that is directly ++ reachable via via some output interface requires a host routing table ++ entry. If you run larger networks with may hosts per subnet, you may ++ have to increase this limit. Must be power of 2! ++ ++config XENO_DRIVERS_NET_RTIPV4_NETROUTING ++ bool "IP Network Routing" ++ depends on XENO_DRIVERS_NET_RTIPV4 ++ ---help--- ++ Enables routing across IPv4 real-time networks. You will only require ++ this feature in complex networks, while switching it off for flat, ++ single-segment networks improves code size and the worst-case routing ++ decision delay. ++ ++ See Documentation/README.routing for further information. ++ ++config XENO_DRIVERS_NET_RTIPV4_NET_ROUTES ++ int "Maximum network routing table entries" ++ depends on XENO_DRIVERS_NET_RTIPV4_NETROUTING ++ default 16 ++ ---help--- ++ Each route describing a target network reachable via a router ++ requires an entry in the network routing table. If you run very ++ complex realtime networks, you may have to increase this limit. Must ++ be power of 2! ++ ++config XENO_DRIVERS_NET_RTIPV4_ROUTER ++ bool "IP Router" ++ depends on XENO_DRIVERS_NET_RTIPV4 ++ ---help--- ++ When switched on, the RTnet station will be able to forward IPv4 ++ packets that are not directed to the station itself. Typically used in ++ combination with CONFIG_RTNET_RTIPV4_NETROUTING. ++ ++ See Documentation/README.routing for further information. ++ ++config XENO_DRIVERS_NET_RTIPV4_DEBUG ++ bool "RTipv4 Debugging" ++ depends on XENO_DRIVERS_NET_RTIPV4 ++ default n ++ ++ ---help--- ++ Enables debug message output of the RTipv4 layer. Typically, you ++ may want to turn this on for tracing issues in packet delivery. ++ ++source "drivers/xenomai/net/stack/ipv4/udp/Kconfig" ++source "drivers/xenomai/net/stack/ipv4/tcp/Kconfig" +--- linux/drivers/xenomai/net/stack/ipv4/ip_output.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/ipv4/ip_output.c 2021-04-07 16:01:27.031634446 +0800 +@@ -0,0 +1,267 @@ ++/*** ++ * ++ * ipv4/ip_output.c - prepare outgoing IP packets ++ * ++ * Copyright (C) 2002 Ulrich Marx ++ * 2003-2005 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++ ++static DEFINE_RTDM_LOCK(rt_ip_id_lock); ++static u16 rt_ip_id_count = 0; ++ ++/*** ++ * Slow path for fragmented packets ++ */ ++int rt_ip_build_xmit_slow(struct rtsocket *sk, ++ int getfrag(const void *, char *, unsigned int, ++ unsigned int), ++ const void *frag, unsigned length, ++ struct dest_route *rt, int msg_flags, ++ unsigned int mtu, unsigned int prio) ++{ ++ int err, next_err; ++ struct rtskb *skb; ++ struct rtskb *next_skb; ++ struct iphdr *iph; ++ struct rtnet_device *rtdev = rt->rtdev; ++ unsigned int fragdatalen; ++ unsigned int offset = 0; ++ u16 msg_rt_ip_id; ++ rtdm_lockctx_t context; ++ unsigned int rtskb_size; ++ int hh_len = (rtdev->hard_header_len + 15) & ~15; ++ ++#define FRAGHEADERLEN sizeof(struct iphdr) ++ ++ fragdatalen = ((mtu - FRAGHEADERLEN) & ~7); ++ ++ /* Store id in local variable */ ++ rtdm_lock_get_irqsave(&rt_ip_id_lock, context); ++ msg_rt_ip_id = rt_ip_id_count++; ++ rtdm_lock_put_irqrestore(&rt_ip_id_lock, context); ++ ++ rtskb_size = mtu + hh_len + 15; ++ ++ /* TODO: delay previous skb until ALL errors are catched which may occure ++ during next skb setup */ ++ ++ /* Preallocate first rtskb */ ++ skb = alloc_rtskb(rtskb_size, &sk->skb_pool); ++ if (skb == NULL) ++ return -ENOBUFS; ++ ++ for (offset = 0; offset < length; offset += fragdatalen) { ++ int fraglen; /* The length (IP, including ip-header) of this ++ very fragment */ ++ __u16 frag_off = offset >> 3; ++ ++ next_err = 0; ++ if (offset >= length - fragdatalen) { ++ /* last fragment */ ++ fraglen = FRAGHEADERLEN + length - offset; ++ next_skb = NULL; ++ } else { ++ fraglen = FRAGHEADERLEN + fragdatalen; ++ frag_off |= IP_MF; ++ ++ next_skb = alloc_rtskb(rtskb_size, &sk->skb_pool); ++ if (next_skb == NULL) { ++ frag_off &= ~IP_MF; /* cut the chain */ ++ next_err = -ENOBUFS; ++ } ++ } ++ ++ rtskb_reserve(skb, hh_len); ++ ++ skb->rtdev = rtdev; ++ skb->nh.iph = iph = (struct iphdr *)rtskb_put(skb, fraglen); ++ skb->priority = prio; ++ ++ iph->version = 4; ++ iph->ihl = 5; /* 20 byte header - no options */ ++ iph->tos = sk->prot.inet.tos; ++ iph->tot_len = htons(fraglen); ++ iph->id = htons(msg_rt_ip_id); ++ iph->frag_off = htons(frag_off); ++ iph->ttl = 255; ++ iph->protocol = sk->protocol; ++ iph->saddr = rtdev->local_ip; ++ iph->daddr = rt->ip; ++ iph->check = 0; /* required! */ ++ iph->check = ip_fast_csum((unsigned char *)iph, 5 /*iph->ihl*/); ++ ++ if ((err = getfrag(frag, ((char *)iph) + 5 /*iph->ihl*/ * 4, ++ offset, fraglen - FRAGHEADERLEN))) ++ goto error; ++ ++ if (rtdev->hard_header) { ++ err = rtdev->hard_header(skb, rtdev, ETH_P_IP, ++ rt->dev_addr, rtdev->dev_addr, ++ skb->len); ++ if (err < 0) ++ goto error; ++ } ++ ++ err = rtdev_xmit(skb); ++ ++ skb = next_skb; ++ ++ if (err != 0) { ++ err = -EAGAIN; ++ goto error; ++ } ++ ++ if (next_err != 0) ++ return next_err; ++ } ++ return 0; ++ ++error: ++ if (skb != NULL) { ++ kfree_rtskb(skb); ++ ++ if (next_skb != NULL) ++ kfree_rtskb(next_skb); ++ } ++ return err; ++} ++ ++/*** ++ * Fast path for unfragmented packets. ++ */ ++int rt_ip_build_xmit(struct rtsocket *sk, ++ int getfrag(const void *, char *, unsigned int, ++ unsigned int), ++ const void *frag, unsigned length, struct dest_route *rt, ++ int msg_flags) ++{ ++ int err = 0; ++ struct rtskb *skb; ++ struct iphdr *iph; ++ int hh_len; ++ u16 msg_rt_ip_id; ++ rtdm_lockctx_t context; ++ struct rtnet_device *rtdev = rt->rtdev; ++ unsigned int prio; ++ unsigned int mtu; ++ ++ /* sk->priority may encode both priority and output channel. Make sure ++ we use a consitent value, also for the MTU which is derived from the ++ channel. */ ++ prio = (volatile unsigned int)sk->priority; ++ mtu = rtdev->get_mtu(rtdev, prio); ++ ++ /* ++ * Try the simple case first. This leaves fragmented frames, and by choice ++ * RAW frames within 20 bytes of maximum size(rare) to the long path ++ */ ++ length += sizeof(struct iphdr); ++ ++ if (length > mtu) ++ return rt_ip_build_xmit_slow(sk, getfrag, frag, ++ length - sizeof(struct iphdr), rt, ++ msg_flags, mtu, prio); ++ ++ /* Store id in local variable */ ++ rtdm_lock_get_irqsave(&rt_ip_id_lock, context); ++ msg_rt_ip_id = rt_ip_id_count++; ++ rtdm_lock_put_irqrestore(&rt_ip_id_lock, context); ++ ++ hh_len = (rtdev->hard_header_len + 15) & ~15; ++ ++ skb = alloc_rtskb(length + hh_len + 15, &sk->skb_pool); ++ if (skb == NULL) ++ return -ENOBUFS; ++ ++ rtskb_reserve(skb, hh_len); ++ ++ skb->rtdev = rtdev; ++ skb->nh.iph = iph = (struct iphdr *)rtskb_put(skb, length); ++ skb->priority = prio; ++ ++ iph->version = 4; ++ iph->ihl = 5; ++ iph->tos = sk->prot.inet.tos; ++ iph->tot_len = htons(length); ++ iph->id = htons(msg_rt_ip_id); ++ iph->frag_off = htons(IP_DF); ++ iph->ttl = 255; ++ iph->protocol = sk->protocol; ++ iph->saddr = rtdev->local_ip; ++ iph->daddr = rt->ip; ++ iph->check = 0; /* required! */ ++ iph->check = ip_fast_csum((unsigned char *)iph, 5 /*iph->ihl*/); ++ ++ if ((err = getfrag(frag, ((char *)iph) + 5 /*iph->ihl*/ * 4, 0, ++ length - 5 /*iph->ihl*/ * 4))) ++ goto error; ++ ++ if (rtdev->hard_header) { ++ err = rtdev->hard_header(skb, rtdev, ETH_P_IP, rt->dev_addr, ++ rtdev->dev_addr, skb->len); ++ if (err < 0) ++ goto error; ++ } ++ ++ err = rtdev_xmit(skb); ++ ++ if (err) ++ return -EAGAIN; ++ else ++ return 0; ++ ++error: ++ kfree_rtskb(skb); ++ return err; ++} ++EXPORT_SYMBOL_GPL(rt_ip_build_xmit); ++ ++/*** ++ * IP protocol layer initialiser ++ */ ++static struct rtpacket_type ip_packet_type = { .type = __constant_htons( ++ ETH_P_IP), ++ .handler = &rt_ip_rcv }; ++ ++/*** ++ * ip_init ++ */ ++void __init rt_ip_init(void) ++{ ++ rtdev_add_pack(&ip_packet_type); ++ rt_ip_fragment_init(); ++} ++ ++/*** ++ * ip_release ++ */ ++void rt_ip_release(void) ++{ ++ rtdev_remove_pack(&ip_packet_type); ++ rt_ip_fragment_cleanup(); ++} +--- linux/drivers/xenomai/net/stack/ipv4/ip_fragment.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/ipv4/ip_fragment.c 2021-04-07 16:01:27.026634453 +0800 +@@ -0,0 +1,327 @@ ++/* ip_fragment.c ++ * ++ * Copyright (C) 2002 Ulrich Marx ++ * 2003 Mathias Koehrer ++ * 2003-2005 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ */ ++ ++#include ++#include ++#include ++ ++#include ++#include ++#include ++ ++#include ++#include ++ ++#include ++ ++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_ADDON_PROXY) ++#include ++#endif /* CONFIG_XENO_DRIVERS_NET_ADDON_PROXY */ ++ ++/* ++ * This defined sets the number of incoming fragmented IP messages that ++ * can be handled in parallel. ++ */ ++#define COLLECTOR_COUNT 10 ++ ++struct ip_collector { ++ int in_use; ++ __u32 saddr; ++ __u32 daddr; ++ __u16 id; ++ __u8 protocol; ++ ++ struct rtskb_queue frags; ++ struct rtsocket *sock; ++ unsigned int buf_size; ++}; ++ ++static struct ip_collector collector[COLLECTOR_COUNT]; ++ ++static void alloc_collector(struct rtskb *skb, struct rtsocket *sock) ++{ ++ int i; ++ rtdm_lockctx_t context; ++ struct ip_collector *p_coll; ++ struct iphdr *iph = skb->nh.iph; ++ ++ /* ++ * Find a free collector ++ * ++ * Note: We once used to clean up probably outdated chains, but the ++ * algorithm was not stable enough and could cause incorrect drops even ++ * under medium load. If we run in overload, we will loose data anyhow. ++ * What we should do in the future is to account collectors per socket or ++ * socket owner and set quotations. ++ * Garbage collection is now performed only on socket close. ++ */ ++ for (i = 0; i < COLLECTOR_COUNT; i++) { ++ p_coll = &collector[i]; ++ rtdm_lock_get_irqsave(&p_coll->frags.lock, context); ++ ++ if (!p_coll->in_use) { ++ p_coll->in_use = 1; ++ p_coll->buf_size = skb->len; ++ p_coll->frags.first = skb; ++ p_coll->frags.last = skb; ++ p_coll->saddr = iph->saddr; ++ p_coll->daddr = iph->daddr; ++ p_coll->id = iph->id; ++ p_coll->protocol = iph->protocol; ++ p_coll->sock = sock; ++ ++ rtdm_lock_put_irqrestore(&p_coll->frags.lock, context); ++ ++ return; ++ } ++ ++ rtdm_lock_put_irqrestore(&p_coll->frags.lock, context); ++ } ++ ++ rtdm_printk("RTnet: IP fragmentation - no collector available\n"); ++ kfree_rtskb(skb); ++} ++ ++/* ++ * Return a pointer to the collector that holds the message which ++ * fits to the iphdr of the passed rtskb. ++ * */ ++static struct rtskb *add_to_collector(struct rtskb *skb, unsigned int offset, ++ int more_frags) ++{ ++ int i, err; ++ rtdm_lockctx_t context; ++ struct ip_collector *p_coll; ++ struct iphdr *iph = skb->nh.iph; ++ struct rtskb *first_skb; ++ ++ /* Search in existing collectors */ ++ for (i = 0; i < COLLECTOR_COUNT; i++) { ++ p_coll = &collector[i]; ++ rtdm_lock_get_irqsave(&p_coll->frags.lock, context); ++ ++ if (p_coll->in_use && (iph->saddr == p_coll->saddr) && ++ (iph->daddr == p_coll->daddr) && (iph->id == p_coll->id) && ++ (iph->protocol == p_coll->protocol)) { ++ first_skb = p_coll->frags.first; ++ ++ /* Acquire the rtskb at the expense of the protocol pool */ ++ if (rtskb_acquire(skb, &p_coll->sock->skb_pool) != 0) { ++ /* We have to drop this fragment => clean up the whole chain */ ++ p_coll->in_use = 0; ++ ++ rtdm_lock_put_irqrestore(&p_coll->frags.lock, ++ context); ++ ++#ifdef FRAG_DBG ++ rtdm_printk( ++ "RTnet: Compensation pool empty - IP fragments " ++ "dropped (saddr:%x, daddr:%x)\n", ++ iph->saddr, iph->daddr); ++#endif ++ ++ kfree_rtskb(first_skb); ++ kfree_rtskb(skb); ++ return NULL; ++ } ++ ++ /* Optimized version of __rtskb_queue_tail */ ++ skb->next = NULL; ++ p_coll->frags.last->next = skb; ++ p_coll->frags.last = skb; ++ ++ /* Extend the chain */ ++ first_skb->chain_end = skb; ++ ++ /* Sanity check: unordered fragments are not allowed! */ ++ if (offset != p_coll->buf_size) { ++ /* We have to drop this fragment => clean up the whole chain */ ++ p_coll->in_use = 0; ++ skb = first_skb; ++ ++ rtdm_lock_put_irqrestore(&p_coll->frags.lock, ++ context); ++ break; /* leave the for loop */ ++ } ++ ++ p_coll->buf_size += skb->len; ++ ++ if (!more_frags) { ++ p_coll->in_use = 0; ++ ++ err = rt_socket_reference(p_coll->sock); ++ ++ rtdm_lock_put_irqrestore(&p_coll->frags.lock, ++ context); ++ ++ if (err < 0) { ++ kfree_rtskb(first_skb); ++ return NULL; ++ } ++ ++ return first_skb; ++ } else { ++ rtdm_lock_put_irqrestore(&p_coll->frags.lock, ++ context); ++ return NULL; ++ } ++ } ++ ++ rtdm_lock_put_irqrestore(&p_coll->frags.lock, context); ++ } ++ ++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_ADDON_PROXY) ++ if (rt_ip_fallback_handler) { ++ __rtskb_push(skb, iph->ihl * 4); ++ rt_ip_fallback_handler(skb); ++ return NULL; ++ } ++#endif ++ ++#ifdef FRAG_DBG ++ rtdm_printk("RTnet: Unordered IP fragment (saddr:%x, daddr:%x)" ++ " - dropped\n", ++ iph->saddr, iph->daddr); ++#endif ++ ++ kfree_rtskb(skb); ++ return NULL; ++} ++ ++/* ++ * Cleans up all collectors referring to the specified socket. ++ * This is now the only kind of garbage collection we do. ++ */ ++void rt_ip_frag_invalidate_socket(struct rtsocket *sock) ++{ ++ int i; ++ rtdm_lockctx_t context; ++ struct ip_collector *p_coll; ++ ++ for (i = 0; i < COLLECTOR_COUNT; i++) { ++ p_coll = &collector[i]; ++ rtdm_lock_get_irqsave(&p_coll->frags.lock, context); ++ ++ if ((p_coll->in_use) && (p_coll->sock == sock)) { ++ p_coll->in_use = 0; ++ kfree_rtskb(p_coll->frags.first); ++ } ++ ++ rtdm_lock_put_irqrestore(&p_coll->frags.lock, context); ++ } ++} ++EXPORT_SYMBOL_GPL(rt_ip_frag_invalidate_socket); ++ ++/* ++ * Cleans up all existing collectors ++ */ ++static void cleanup_all_collectors(void) ++{ ++ int i; ++ rtdm_lockctx_t context; ++ struct ip_collector *p_coll; ++ ++ for (i = 0; i < COLLECTOR_COUNT; i++) { ++ p_coll = &collector[i]; ++ rtdm_lock_get_irqsave(&p_coll->frags.lock, context); ++ ++ if (p_coll->in_use) { ++ p_coll->in_use = 0; ++ kfree_rtskb(p_coll->frags.first); ++ } ++ ++ rtdm_lock_put_irqrestore(&p_coll->frags.lock, context); ++ } ++} ++ ++/* ++ * This function returns an rtskb that contains the complete, accumulated IP message. ++ * If not all fragments of the IP message have been received yet, it returns NULL ++ * Note: the IP header must have already been pulled from the rtskb! ++ * */ ++struct rtskb *rt_ip_defrag(struct rtskb *skb, struct rtinet_protocol *ipprot) ++{ ++ unsigned int more_frags; ++ unsigned int offset; ++ struct rtsocket *sock; ++ struct iphdr *iph = skb->nh.iph; ++ int ret; ++ ++ /* Parse the IP header */ ++ offset = ntohs(iph->frag_off); ++ more_frags = offset & IP_MF; ++ offset &= IP_OFFSET; ++ offset <<= 3; /* offset is in 8-byte chunks */ ++ ++ /* First fragment? */ ++ if (offset == 0) { ++ /* Get the destination socket */ ++ if ((sock = ipprot->dest_socket(skb)) == NULL) { ++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_ADDON_PROXY) ++ if (rt_ip_fallback_handler) { ++ __rtskb_push(skb, iph->ihl * 4); ++ rt_ip_fallback_handler(skb); ++ return NULL; ++ } ++#endif ++ /* Drop the rtskb */ ++ kfree_rtskb(skb); ++ return NULL; ++ } ++ ++ /* Acquire the rtskb, to unlock the device skb pool */ ++ ret = rtskb_acquire(skb, &sock->skb_pool); ++ ++ if (ret != 0) { ++ /* Drop the rtskb */ ++ kfree_rtskb(skb); ++ } else { ++ /* Allocates a new collector */ ++ alloc_collector(skb, sock); ++ } ++ ++ /* Packet is queued or freed, socket can be released */ ++ rt_socket_dereference(sock); ++ ++ return NULL; ++ } else { ++ /* Add to an existing collector */ ++ return add_to_collector(skb, offset, more_frags); ++ } ++} ++ ++int __init rt_ip_fragment_init(void) ++{ ++ int i; ++ ++ /* Probably not needed (static variable...) */ ++ memset(collector, 0, sizeof(collector)); ++ ++ for (i = 0; i < COLLECTOR_COUNT; i++) ++ rtdm_lock_init(&collector[i].frags.lock); ++ ++ return 0; ++} ++ ++void rt_ip_fragment_cleanup(void) ++{ ++ cleanup_all_collectors(); ++} +--- linux/drivers/xenomai/net/stack/ipv4/ip_input.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/ipv4/ip_input.c 2021-04-07 16:01:27.021634460 +0800 +@@ -0,0 +1,159 @@ ++/*** ++ * ++ * ipv4/ip_input.c - process incoming IP packets ++ * ++ * Copyright (C) 2002 Ulrich Marx ++ * 2003-2005 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_ADDON_PROXY) ++#include ++ ++rt_ip_fallback_handler_t rt_ip_fallback_handler = NULL; ++EXPORT_SYMBOL_GPL(rt_ip_fallback_handler); ++#endif /* CONFIG_XENO_DRIVERS_NET_ADDON_PROXY */ ++ ++/*** ++ * rt_ip_local_deliver ++ */ ++static inline void rt_ip_local_deliver(struct rtskb *skb) ++{ ++ struct iphdr *iph = skb->nh.iph; ++ unsigned short protocol = iph->protocol; ++ struct rtinet_protocol *ipprot; ++ struct rtsocket *sock; ++ int err; ++ ++ ipprot = rt_inet_protocols[rt_inet_hashkey(protocol)]; ++ ++ /* Check if we are supporting the protocol */ ++ if ((ipprot != NULL) && (ipprot->protocol == protocol)) { ++ __rtskb_pull(skb, iph->ihl * 4); ++ ++ /* Point into the IP datagram, just past the header. */ ++ skb->h.raw = skb->data; ++ ++ /* Reassemble IP fragments */ ++ if (iph->frag_off & htons(IP_MF | IP_OFFSET)) { ++ skb = rt_ip_defrag(skb, ipprot); ++ if (!skb) ++ return; ++ ++ sock = skb->sk; ++ } else { ++ /* Get the destination socket */ ++ if ((sock = ipprot->dest_socket(skb)) == NULL) { ++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_ADDON_PROXY) ++ if (rt_ip_fallback_handler) { ++ __rtskb_push(skb, iph->ihl * 4); ++ rt_ip_fallback_handler(skb); ++ return; ++ } ++#endif ++ kfree_rtskb(skb); ++ return; ++ } ++ ++ /* Acquire the rtskb, to unlock the device skb pool */ ++ err = rtskb_acquire(skb, &sock->skb_pool); ++ ++ if (err) { ++ kfree_rtskb(skb); ++ rt_socket_dereference(sock); ++ return; ++ } ++ } ++ ++ /* Deliver the packet to the next layer */ ++ ipprot->rcv_handler(skb); ++ ++ /* Packet is queued, socket can be released */ ++ rt_socket_dereference(sock); ++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_ADDON_PROXY) ++ } else if (rt_ip_fallback_handler) { ++ /* If a fallback handler for IP protocol has been installed, ++ * call it. */ ++ rt_ip_fallback_handler(skb); ++#endif /* CONFIG_XENO_DRIVERS_NET_ADDON_PROXY */ ++ } else { ++ if (IS_ENABLED(CONFIG_XENO_DRIVERS_NET_RTIPV4_DEBUG)) ++ rtdm_printk("RTnet: no protocol found\n"); ++ kfree_rtskb(skb); ++ } ++} ++ ++/*** ++ * rt_ip_rcv ++ */ ++int rt_ip_rcv(struct rtskb *skb, struct rtpacket_type *pt) ++{ ++ struct iphdr *iph; ++ __u32 len; ++ ++ /* When the interface is in promisc. mode, drop all the crap ++ * that it receives, do not try to analyse it. ++ */ ++ if (skb->pkt_type == PACKET_OTHERHOST) ++ goto drop; ++ ++ iph = skb->nh.iph; ++ ++ /* ++ * RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the checksum. ++ * ++ * Is the datagram acceptable? ++ * ++ * 1. Length at least the size of an ip header ++ * 2. Version of 4 ++ * 3. Checksums correctly. [Speed optimisation for later, skip loopback checksums] ++ * 4. Doesn't have a bogus length ++ */ ++ if (iph->ihl < 5 || iph->version != 4) ++ goto drop; ++ ++ if (ip_fast_csum((u8 *)iph, iph->ihl) != 0) ++ goto drop; ++ ++ len = ntohs(iph->tot_len); ++ if ((skb->len < len) || (len < ((__u32)iph->ihl << 2))) ++ goto drop; ++ ++ rtskb_trim(skb, len); ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_RTIPV4_ROUTER ++ if (rt_ip_route_forward(skb, iph->daddr)) ++ return 0; ++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4_ROUTER */ ++ ++ rt_ip_local_deliver(skb); ++ return 0; ++ ++drop: ++ kfree_rtskb(skb); ++ return 0; ++} +--- linux/drivers/xenomai/net/stack/ipv4/udp/udp.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/ipv4/udp/udp.c 2021-04-07 16:01:27.016634468 +0800 +@@ -0,0 +1,839 @@ ++/*** ++ * ++ * ipv4/udp.c - UDP implementation for RTnet ++ * ++ * Copyright (C) 1999, 2000 Zentropic Computing, LLC ++ * 2002 Ulrich Marx ++ * 2003-2005 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/*** ++ * This structure is used to register a UDP socket for reception. All ++ + structures are kept in the port_registry array to increase the cache ++ * locality during the critical port lookup in rt_udp_v4_lookup(). ++ */ ++struct udp_socket { ++ u16 sport; /* local port */ ++ u32 saddr; /* local ip-addr */ ++ struct rtsocket *sock; ++ struct hlist_node link; ++}; ++ ++/*** ++ * Automatic port number assignment ++ ++ * The automatic assignment of port numbers to unbound sockets is realised as ++ * a simple addition of two values: ++ * - the socket ID (lower 8 bits of file descriptor) which is set during ++ * initialisation and left unchanged afterwards ++ * - the start value auto_port_start which is a module parameter ++ ++ * auto_port_mask, also a module parameter, is used to define the range of ++ * port numbers which are used for automatic assignment. Any number within ++ * this range will be rejected when passed to bind_rt(). ++ ++ */ ++static unsigned int auto_port_start = 1024; ++static unsigned int auto_port_mask = ~(RT_UDP_SOCKETS - 1); ++static int free_ports = RT_UDP_SOCKETS; ++#define RT_PORT_BITMAP_WORDS \ ++ ((RT_UDP_SOCKETS + BITS_PER_LONG - 1) / BITS_PER_LONG) ++static unsigned long port_bitmap[RT_PORT_BITMAP_WORDS]; ++static struct udp_socket port_registry[RT_UDP_SOCKETS]; ++static DEFINE_RTDM_LOCK(udp_socket_base_lock); ++ ++static struct hlist_head port_hash[RT_UDP_SOCKETS * 2]; ++#define port_hash_mask (RT_UDP_SOCKETS * 2 - 1) ++ ++MODULE_LICENSE("GPL"); ++ ++module_param(auto_port_start, uint, 0444); ++module_param(auto_port_mask, uint, 0444); ++MODULE_PARM_DESC(auto_port_start, "Start of automatically assigned port range"); ++MODULE_PARM_DESC(auto_port_mask, ++ "Mask that defines port range for automatic assignment"); ++ ++static inline struct udp_socket *port_hash_search(u32 saddr, u16 sport) ++{ ++ unsigned bucket = sport & port_hash_mask; ++ struct udp_socket *sock; ++ ++ hlist_for_each_entry (sock, &port_hash[bucket], link) ++ if (sock->sport == sport && ++ (saddr == INADDR_ANY || sock->saddr == saddr || ++ sock->saddr == INADDR_ANY)) ++ return sock; ++ ++ return NULL; ++} ++ ++static inline int port_hash_insert(struct udp_socket *sock, u32 saddr, ++ u16 sport) ++{ ++ unsigned bucket; ++ ++ if (port_hash_search(saddr, sport)) ++ return -EADDRINUSE; ++ ++ bucket = sport & port_hash_mask; ++ sock->saddr = saddr; ++ sock->sport = sport; ++ hlist_add_head(&sock->link, &port_hash[bucket]); ++ return 0; ++} ++ ++static inline void port_hash_del(struct udp_socket *sock) ++{ ++ hlist_del(&sock->link); ++} ++ ++/*** ++ * rt_udp_v4_lookup ++ */ ++static inline struct rtsocket *rt_udp_v4_lookup(u32 daddr, u16 dport) ++{ ++ rtdm_lockctx_t context; ++ struct udp_socket *sock; ++ ++ rtdm_lock_get_irqsave(&udp_socket_base_lock, context); ++ sock = port_hash_search(daddr, dport); ++ if (sock && rt_socket_reference(sock->sock) == 0) { ++ rtdm_lock_put_irqrestore(&udp_socket_base_lock, context); ++ ++ return sock->sock; ++ } ++ ++ rtdm_lock_put_irqrestore(&udp_socket_base_lock, context); ++ ++ return NULL; ++} ++ ++/*** ++ * rt_udp_bind - bind socket to local address ++ * @s: socket ++ * @addr: local address ++ */ ++int rt_udp_bind(struct rtdm_fd *fd, struct rtsocket *sock, ++ const struct sockaddr __user *addr, socklen_t addrlen) ++{ ++ struct sockaddr_in _sin, *sin; ++ rtdm_lockctx_t context; ++ int index; ++ int err = 0; ++ ++ if (addrlen < sizeof(struct sockaddr_in)) ++ return -EINVAL; ++ ++ sin = rtnet_get_arg(fd, &_sin, addr, sizeof(_sin)); ++ if (IS_ERR(sin)) ++ return PTR_ERR(sin); ++ ++ if ((sin->sin_port & auto_port_mask) == auto_port_start) ++ return -EINVAL; ++ ++ rtdm_lock_get_irqsave(&udp_socket_base_lock, context); ++ ++ if ((index = sock->prot.inet.reg_index) < 0) { ++ /* socket is being closed */ ++ err = -EBADF; ++ goto unlock_out; ++ } ++ if (sock->prot.inet.state != TCP_CLOSE) { ++ err = -EINVAL; ++ goto unlock_out; ++ } ++ ++ port_hash_del(&port_registry[index]); ++ if (port_hash_insert(&port_registry[index], sin->sin_addr.s_addr, ++ sin->sin_port ?: index + auto_port_start)) { ++ port_hash_insert(&port_registry[index], ++ port_registry[index].saddr, ++ port_registry[index].sport); ++ rtdm_lock_put_irqrestore(&udp_socket_base_lock, context); ++ return -EADDRINUSE; ++ } ++ ++ /* set the source-addr */ ++ sock->prot.inet.saddr = port_registry[index].saddr; ++ ++ /* set source port, if not set by user */ ++ sock->prot.inet.sport = port_registry[index].sport; ++ ++unlock_out: ++ rtdm_lock_put_irqrestore(&udp_socket_base_lock, context); ++ ++ return err; ++} ++ ++/*** ++ * rt_udp_connect ++ */ ++int rt_udp_connect(struct rtdm_fd *fd, struct rtsocket *sock, ++ const struct sockaddr __user *serv_addr, socklen_t addrlen) ++{ ++ struct sockaddr _sa, *sa; ++ struct sockaddr_in _sin, *sin; ++ rtdm_lockctx_t context; ++ int index; ++ ++ if (addrlen < sizeof(struct sockaddr)) ++ return -EINVAL; ++ ++ sa = rtnet_get_arg(fd, &_sa, serv_addr, sizeof(_sa)); ++ if (IS_ERR(sa)) ++ return PTR_ERR(sa); ++ ++ if (sa->sa_family == AF_UNSPEC) { ++ if ((index = sock->prot.inet.reg_index) < 0) ++ /* socket is being closed */ ++ return -EBADF; ++ ++ rtdm_lock_get_irqsave(&udp_socket_base_lock, context); ++ ++ sock->prot.inet.saddr = INADDR_ANY; ++ /* Note: The following line differs from standard ++ stacks, and we also don't remove the socket from ++ the port list. Might get fixed in the future... */ ++ sock->prot.inet.sport = index + auto_port_start; ++ sock->prot.inet.daddr = INADDR_ANY; ++ sock->prot.inet.dport = 0; ++ sock->prot.inet.state = TCP_CLOSE; ++ ++ rtdm_lock_put_irqrestore(&udp_socket_base_lock, context); ++ } else { ++ if (addrlen < sizeof(struct sockaddr_in)) ++ return -EINVAL; ++ ++ sin = rtnet_get_arg(fd, &_sin, serv_addr, sizeof(_sin)); ++ if (IS_ERR(sin)) ++ return PTR_ERR(sin); ++ ++ if (sin->sin_family != AF_INET) ++ return -EINVAL; ++ ++ rtdm_lock_get_irqsave(&udp_socket_base_lock, context); ++ ++ if (sock->prot.inet.state != TCP_CLOSE) { ++ rtdm_lock_put_irqrestore(&udp_socket_base_lock, ++ context); ++ return -EINVAL; ++ } ++ ++ sock->prot.inet.state = TCP_ESTABLISHED; ++ sock->prot.inet.daddr = sin->sin_addr.s_addr; ++ sock->prot.inet.dport = sin->sin_port; ++ ++ rtdm_lock_put_irqrestore(&udp_socket_base_lock, context); ++ } ++ ++ return 0; ++} ++ ++/*** ++ * rt_udp_socket - create a new UDP-Socket ++ * @s: socket ++ */ ++int rt_udp_socket(struct rtdm_fd *fd) ++{ ++ struct rtsocket *sock = rtdm_fd_to_private(fd); ++ int ret; ++ int i; ++ int index; ++ rtdm_lockctx_t context; ++ ++ if ((ret = rt_socket_init(fd, IPPROTO_UDP)) != 0) ++ return ret; ++ ++ sock->prot.inet.saddr = INADDR_ANY; ++ sock->prot.inet.state = TCP_CLOSE; ++ sock->prot.inet.tos = 0; ++ ++ rtdm_lock_get_irqsave(&udp_socket_base_lock, context); ++ ++ /* enforce maximum number of UDP sockets */ ++ if (free_ports == 0) { ++ rtdm_lock_put_irqrestore(&udp_socket_base_lock, context); ++ rt_socket_cleanup(fd); ++ return -EAGAIN; ++ } ++ free_ports--; ++ ++ /* find free auto-port in bitmap */ ++ for (i = 0; i < RT_PORT_BITMAP_WORDS; i++) ++ if (port_bitmap[i] != (unsigned long)-1) ++ break; ++ index = ffz(port_bitmap[i]); ++ set_bit(index, &port_bitmap[i]); ++ index += i * 32; ++ sock->prot.inet.reg_index = index; ++ sock->prot.inet.sport = index + auto_port_start; ++ ++ /* register UDP socket */ ++ port_hash_insert(&port_registry[index], INADDR_ANY, ++ sock->prot.inet.sport); ++ port_registry[index].sock = sock; ++ ++ rtdm_lock_put_irqrestore(&udp_socket_base_lock, context); ++ ++ return 0; ++} ++ ++/*** ++ * rt_udp_close ++ */ ++void rt_udp_close(struct rtdm_fd *fd) ++{ ++ struct rtsocket *sock = rtdm_fd_to_private(fd); ++ struct rtskb *del; ++ int port; ++ rtdm_lockctx_t context; ++ ++ rtdm_lock_get_irqsave(&udp_socket_base_lock, context); ++ ++ sock->prot.inet.state = TCP_CLOSE; ++ ++ if (sock->prot.inet.reg_index >= 0) { ++ port = sock->prot.inet.reg_index; ++ clear_bit(port % BITS_PER_LONG, ++ &port_bitmap[port / BITS_PER_LONG]); ++ port_hash_del(&port_registry[port]); ++ ++ free_ports++; ++ ++ sock->prot.inet.reg_index = -1; ++ } ++ ++ rtdm_lock_put_irqrestore(&udp_socket_base_lock, context); ++ ++ /* cleanup already collected fragments */ ++ rt_ip_frag_invalidate_socket(sock); ++ ++ /* free packets in incoming queue */ ++ while ((del = rtskb_dequeue(&sock->incoming)) != NULL) ++ kfree_rtskb(del); ++ ++ rt_socket_cleanup(fd); ++} ++ ++int rt_udp_ioctl(struct rtdm_fd *fd, unsigned int request, void __user *arg) ++{ ++ struct rtsocket *sock = rtdm_fd_to_private(fd); ++ const struct _rtdm_setsockaddr_args *setaddr; ++ struct _rtdm_setsockaddr_args _setaddr; ++ ++ /* fast path for common socket IOCTLs */ ++ if (_IOC_TYPE(request) == RTIOC_TYPE_NETWORK) ++ return rt_socket_common_ioctl(fd, request, arg); ++ ++ switch (request) { ++ case _RTIOC_BIND: ++ case _RTIOC_CONNECT: ++ setaddr = rtnet_get_arg(fd, &_setaddr, arg, sizeof(_setaddr)); ++ if (IS_ERR(setaddr)) ++ return PTR_ERR(setaddr); ++ if (request == _RTIOC_BIND) ++ return rt_udp_bind(fd, sock, setaddr->addr, ++ setaddr->addrlen); ++ ++ return rt_udp_connect(fd, sock, setaddr->addr, ++ setaddr->addrlen); ++ ++ default: ++ return rt_ip_ioctl(fd, request, arg); ++ } ++} ++ ++/*** ++ * rt_udp_recvmsg ++ */ ++/*** ++ * rt_udp_recvmsg ++ */ ++ssize_t rt_udp_recvmsg(struct rtdm_fd *fd, struct user_msghdr *u_msg, ++ int msg_flags) ++{ ++ struct rtsocket *sock = rtdm_fd_to_private(fd); ++ size_t len; ++ struct rtskb *skb; ++ struct rtskb *first_skb; ++ size_t copied = 0; ++ size_t block_size; ++ size_t data_len; ++ struct udphdr *uh; ++ struct sockaddr_in sin; ++ nanosecs_rel_t timeout = sock->timeout; ++ int ret, flags; ++ struct user_msghdr _msg, *msg; ++ socklen_t namelen; ++ struct iovec iov_fast[RTDM_IOV_FASTMAX], *iov; ++ ++ msg = rtnet_get_arg(fd, &_msg, u_msg, sizeof(_msg)); ++ if (IS_ERR(msg)) ++ return PTR_ERR(msg); ++ ++ if (msg->msg_iovlen < 0) ++ return -EINVAL; ++ ++ if (msg->msg_iovlen == 0) ++ return 0; ++ ++ ret = rtdm_get_iovec(fd, &iov, msg, iov_fast); ++ if (ret) ++ return ret; ++ ++ /* non-blocking receive? */ ++ if (msg_flags & MSG_DONTWAIT) ++ timeout = -1; ++ ++ ret = rtdm_sem_timeddown(&sock->pending_sem, timeout, NULL); ++ if (unlikely(ret < 0)) ++ switch (ret) { ++ default: ++ ret = -EBADF; /* socket has been closed */ ++ case -EWOULDBLOCK: ++ case -ETIMEDOUT: ++ case -EINTR: ++ rtdm_drop_iovec(iov, iov_fast); ++ return ret; ++ } ++ ++ skb = rtskb_dequeue_chain(&sock->incoming); ++ RTNET_ASSERT(skb != NULL, return -EFAULT;); ++ uh = skb->h.uh; ++ first_skb = skb; ++ ++ /* copy the address if required. */ ++ if (msg->msg_name) { ++ memset(&sin, 0, sizeof(sin)); ++ sin.sin_family = AF_INET; ++ sin.sin_port = uh->source; ++ sin.sin_addr.s_addr = skb->nh.iph->saddr; ++ ret = rtnet_put_arg(fd, msg->msg_name, &sin, sizeof(sin)); ++ if (ret) ++ goto fail; ++ ++ namelen = sizeof(sin); ++ ret = rtnet_put_arg(fd, &u_msg->msg_namelen, &namelen, ++ sizeof(namelen)); ++ if (ret) ++ goto fail; ++ } ++ ++ data_len = ntohs(uh->len) - sizeof(struct udphdr); ++ ++ /* remove the UDP header */ ++ __rtskb_pull(skb, sizeof(struct udphdr)); ++ ++ flags = msg->msg_flags & ~MSG_TRUNC; ++ len = rtdm_get_iov_flatlen(iov, msg->msg_iovlen); ++ ++ /* iterate over all IP fragments */ ++ do { ++ rtskb_trim(skb, data_len); ++ ++ block_size = skb->len; ++ copied += block_size; ++ data_len -= block_size; ++ ++ /* The data must not be longer than the available buffer size */ ++ if (copied > len) { ++ block_size -= copied - len; ++ copied = len; ++ flags |= MSG_TRUNC; ++ } ++ ++ /* copy the data */ ++ ret = rtnet_write_to_iov(fd, iov, msg->msg_iovlen, skb->data, ++ block_size); ++ if (ret) ++ goto fail; ++ ++ /* next fragment */ ++ skb = skb->next; ++ } while (skb && !(flags & MSG_TRUNC)); ++ ++ /* did we copied all bytes? */ ++ if (data_len > 0) ++ flags |= MSG_TRUNC; ++ ++ if (flags != msg->msg_flags) { ++ ret = rtnet_put_arg(fd, &u_msg->msg_flags, &flags, ++ sizeof(flags)); ++ if (ret) ++ goto fail; ++ } ++out: ++ if ((msg_flags & MSG_PEEK) == 0) ++ kfree_rtskb(first_skb); ++ else { ++ __rtskb_push(first_skb, sizeof(struct udphdr)); ++ rtskb_queue_head(&sock->incoming, first_skb); ++ rtdm_sem_up(&sock->pending_sem); ++ } ++ rtdm_drop_iovec(iov, iov_fast); ++ ++ return copied; ++fail: ++ copied = ret; ++ goto out; ++} ++ ++/*** ++ * struct udpfakehdr ++ */ ++struct udpfakehdr { ++ struct udphdr uh; ++ u32 daddr; ++ u32 saddr; ++ struct rtdm_fd *fd; ++ struct iovec *iov; ++ int iovlen; ++ u32 wcheck; ++}; ++ ++/*** ++ * ++ */ ++static int rt_udp_getfrag(const void *p, unsigned char *to, unsigned int offset, ++ unsigned int fraglen) ++{ ++ struct udpfakehdr *ufh = (struct udpfakehdr *)p; ++ int ret; ++ ++ // We should optimize this function a bit (copy+csum...)! ++ if (offset) { ++ ret = rtnet_read_from_iov(ufh->fd, ufh->iov, ufh->iovlen, to, ++ fraglen); ++ return ret < 0 ? ret : 0; ++ } ++ ++ ret = rtnet_read_from_iov(ufh->fd, ufh->iov, ufh->iovlen, ++ to + sizeof(struct udphdr), ++ fraglen - sizeof(struct udphdr)); ++ if (ret < 0) ++ return ret; ++ ++ /* Checksum of the complete data part of the UDP message: */ ++ ufh->wcheck = ++ csum_partial(to + sizeof(struct udphdr), ++ fraglen - sizeof(struct udphdr), ufh->wcheck); ++ ++ /* Checksum of the udp header: */ ++ ufh->wcheck = csum_partial((unsigned char *)ufh, sizeof(struct udphdr), ++ ufh->wcheck); ++ ++ ufh->uh.check = ++ csum_tcpudp_magic(ufh->saddr, ufh->daddr, ntohs(ufh->uh.len), ++ IPPROTO_UDP, ufh->wcheck); ++ ++ if (ufh->uh.check == 0) ++ ufh->uh.check = -1; ++ ++ memcpy(to, ufh, sizeof(struct udphdr)); ++ ++ return 0; ++} ++ ++/*** ++ * rt_udp_sendmsg ++ */ ++ssize_t rt_udp_sendmsg(struct rtdm_fd *fd, const struct user_msghdr *msg, ++ int msg_flags) ++{ ++ struct rtsocket *sock = rtdm_fd_to_private(fd); ++ size_t len; ++ int ulen; ++ struct sockaddr_in _sin, *sin; ++ struct udpfakehdr ufh; ++ struct dest_route rt; ++ u32 saddr; ++ u32 daddr; ++ u16 dport; ++ int err; ++ rtdm_lockctx_t context; ++ struct user_msghdr _msg; ++ struct iovec iov_fast[RTDM_IOV_FASTMAX], *iov; ++ ++ if (msg_flags & MSG_OOB) /* Mirror BSD error message compatibility */ ++ return -EOPNOTSUPP; ++ ++ if (msg_flags & ~(MSG_DONTROUTE | MSG_DONTWAIT)) ++ return -EINVAL; ++ ++ msg = rtnet_get_arg(fd, &_msg, msg, sizeof(*msg)); ++ if (IS_ERR(msg)) ++ return PTR_ERR(msg); ++ ++ if (msg->msg_iovlen < 0) ++ return -EINVAL; ++ ++ if (msg->msg_iovlen == 0) ++ return 0; ++ ++ err = rtdm_get_iovec(fd, &iov, msg, iov_fast); ++ if (err) ++ return err; ++ ++ len = rtdm_get_iov_flatlen(iov, msg->msg_iovlen); ++ if ((len < 0) || ++ (len > 0xFFFF - sizeof(struct iphdr) - sizeof(struct udphdr))) { ++ err = -EMSGSIZE; ++ goto out; ++ } ++ ++ ulen = len + sizeof(struct udphdr); ++ ++ if (msg->msg_name && msg->msg_namelen == sizeof(*sin)) { ++ sin = rtnet_get_arg(fd, &_sin, msg->msg_name, sizeof(_sin)); ++ if (IS_ERR(sin)) { ++ err = PTR_ERR(sin); ++ goto out; ++ } ++ ++ if (sin->sin_family != AF_INET && ++ sin->sin_family != AF_UNSPEC) { ++ err = -EINVAL; ++ goto out; ++ } ++ ++ daddr = sin->sin_addr.s_addr; ++ dport = sin->sin_port; ++ rtdm_lock_get_irqsave(&udp_socket_base_lock, context); ++ } else { ++ rtdm_lock_get_irqsave(&udp_socket_base_lock, context); ++ ++ if (sock->prot.inet.state != TCP_ESTABLISHED) { ++ rtdm_lock_put_irqrestore(&udp_socket_base_lock, ++ context); ++ err = -ENOTCONN; ++ goto out; ++ } ++ ++ daddr = sock->prot.inet.daddr; ++ dport = sock->prot.inet.dport; ++ } ++ ++ saddr = sock->prot.inet.saddr; ++ ufh.uh.source = sock->prot.inet.sport; ++ ++ rtdm_lock_put_irqrestore(&udp_socket_base_lock, context); ++ ++ if ((daddr | dport) == 0) { ++ err = -EINVAL; ++ goto out; ++ } ++ ++ /* get output route */ ++ err = rt_ip_route_output(&rt, daddr, saddr); ++ if (err) ++ goto out; ++ ++ /* we found a route, remember the routing dest-addr could be the netmask */ ++ ufh.saddr = saddr != INADDR_ANY ? saddr : rt.rtdev->local_ip; ++ ufh.daddr = daddr; ++ ufh.uh.dest = dport; ++ ufh.uh.len = htons(ulen); ++ ufh.uh.check = 0; ++ ufh.fd = fd; ++ ufh.iov = iov; ++ ufh.iovlen = msg->msg_iovlen; ++ ufh.wcheck = 0; ++ ++ err = rt_ip_build_xmit(sock, rt_udp_getfrag, &ufh, ulen, &rt, ++ msg_flags); ++ ++ /* Drop the reference obtained in rt_ip_route_output() */ ++ rtdev_dereference(rt.rtdev); ++out: ++ rtdm_drop_iovec(iov, iov_fast); ++ ++ return err ?: len; ++} ++ ++/*** ++ * rt_udp_check ++ */ ++static inline unsigned short rt_udp_check(struct udphdr *uh, int len, ++ unsigned long saddr, ++ unsigned long daddr, ++ unsigned long base) ++{ ++ return (csum_tcpudp_magic(saddr, daddr, len, IPPROTO_UDP, base)); ++} ++ ++struct rtsocket *rt_udp_dest_socket(struct rtskb *skb) ++{ ++ struct udphdr *uh = skb->h.uh; ++ unsigned short ulen = ntohs(uh->len); ++ u32 saddr = skb->nh.iph->saddr; ++ u32 daddr = skb->nh.iph->daddr; ++ struct rtnet_device *rtdev = skb->rtdev; ++ ++ if (uh->check == 0) ++ skb->ip_summed = CHECKSUM_UNNECESSARY; ++ /* ip_summed (yet) never equals CHECKSUM_PARTIAL ++ else ++ if (skb->ip_summed == CHECKSUM_PARTIAL) { ++ skb->ip_summed = CHECKSUM_UNNECESSARY; ++ ++ if ( !rt_udp_check(uh, ulen, saddr, daddr, skb->csum) ) ++ return NULL; ++ ++ skb->ip_summed = CHECKSUM_NONE; ++ }*/ ++ ++ if (skb->ip_summed != CHECKSUM_UNNECESSARY) ++ skb->csum = ++ csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0); ++ ++ /* patch broadcast daddr */ ++ if (daddr == rtdev->broadcast_ip) ++ daddr = rtdev->local_ip; ++ ++ /* find the destination socket */ ++ skb->sk = rt_udp_v4_lookup(daddr, uh->dest); ++ ++ return skb->sk; ++} ++ ++/*** ++ * rt_udp_rcv ++ */ ++void rt_udp_rcv(struct rtskb *skb) ++{ ++ struct rtsocket *sock = skb->sk; ++ void (*callback_func)(struct rtdm_fd *, void *); ++ void *callback_arg; ++ rtdm_lockctx_t context; ++ ++ rtskb_queue_tail(&sock->incoming, skb); ++ rtdm_sem_up(&sock->pending_sem); ++ ++ rtdm_lock_get_irqsave(&sock->param_lock, context); ++ callback_func = sock->callback_func; ++ callback_arg = sock->callback_arg; ++ rtdm_lock_put_irqrestore(&sock->param_lock, context); ++ ++ if (callback_func) ++ callback_func(rt_socket_fd(sock), callback_arg); ++} ++ ++/*** ++ * rt_udp_rcv_err ++ */ ++void rt_udp_rcv_err(struct rtskb *skb) ++{ ++ rtdm_printk("RTnet: rt_udp_rcv err\n"); ++} ++ ++/*** ++ * UDP-Initialisation ++ */ ++static struct rtinet_protocol udp_protocol = { .protocol = IPPROTO_UDP, ++ .dest_socket = ++ &rt_udp_dest_socket, ++ .rcv_handler = &rt_udp_rcv, ++ .err_handler = &rt_udp_rcv_err, ++ .init_socket = &rt_udp_socket }; ++ ++static struct rtdm_driver udp_driver = { ++ .profile_info = RTDM_PROFILE_INFO(udp, ++ RTDM_CLASS_NETWORK, ++ RTDM_SUBCLASS_RTNET, ++ RTNET_RTDM_VER), ++ .device_flags = RTDM_PROTOCOL_DEVICE, ++ .device_count = 1, ++ .context_size = sizeof(struct rtsocket), ++ ++ .protocol_family = PF_INET, ++ .socket_type = SOCK_DGRAM, ++ ++ /* default is UDP */ ++ .ops = { ++ .socket = rt_inet_socket, ++ .close = rt_udp_close, ++ .ioctl_rt = rt_udp_ioctl, ++ .ioctl_nrt = rt_udp_ioctl, ++ .recvmsg_rt = rt_udp_recvmsg, ++ .sendmsg_rt = rt_udp_sendmsg, ++ .select = rt_socket_select_bind, ++ }, ++}; ++ ++static struct rtdm_device udp_device = { ++ .driver = &udp_driver, ++ .label = "udp", ++}; ++ ++/*** ++ * rt_udp_init ++ */ ++static int __init rt_udp_init(void) ++{ ++ int i, err; ++ ++ if ((auto_port_start < 0) || ++ (auto_port_start >= 0x10000 - RT_UDP_SOCKETS)) ++ auto_port_start = 1024; ++ auto_port_start = htons(auto_port_start & (auto_port_mask & 0xFFFF)); ++ auto_port_mask = htons(auto_port_mask | 0xFFFF0000); ++ ++ rt_inet_add_protocol(&udp_protocol); ++ ++ for (i = 0; i < ARRAY_SIZE(port_hash); i++) ++ INIT_HLIST_HEAD(&port_hash[i]); ++ ++ err = rtdm_dev_register(&udp_device); ++ if (err) ++ rt_inet_del_protocol(&udp_protocol); ++ return err; ++} ++ ++/*** ++ * rt_udp_release ++ */ ++static void __exit rt_udp_release(void) ++{ ++ rtdm_dev_unregister(&udp_device); ++ rt_inet_del_protocol(&udp_protocol); ++} ++ ++module_init(rt_udp_init); ++module_exit(rt_udp_release); +--- linux/drivers/xenomai/net/stack/ipv4/udp/Makefile 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/ipv4/udp/Makefile 2021-04-07 16:01:27.012634474 +0800 +@@ -0,0 +1,5 @@ ++ccflags-y += -Idrivers/xenomai/net/stack/include ++ ++obj-$(CONFIG_XENO_DRIVERS_NET_RTIPV4_UDP) += rtudp.o ++ ++rtudp-y := udp.o +--- linux/drivers/xenomai/net/stack/ipv4/udp/Kconfig 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/ipv4/udp/Kconfig 2021-04-07 16:01:27.007634481 +0800 +@@ -0,0 +1,6 @@ ++config XENO_DRIVERS_NET_RTIPV4_UDP ++ tristate "UDP support" ++ depends on XENO_DRIVERS_NET_RTIPV4 ++ default y ++ ---help--- ++ Enables UDP support of the RTnet Real-Time IPv4 protocol. +--- linux/drivers/xenomai/net/stack/rtmac/nomac/nomac_proto.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/rtmac/nomac/nomac_proto.c 2021-04-07 16:01:27.002634488 +0800 +@@ -0,0 +1,127 @@ ++/*** ++ * ++ * rtmac/nomac/nomac_proto.c ++ * ++ * RTmac - real-time networking media access control subsystem ++ * Copyright (C) 2002 Marc Kleine-Budde , ++ * 2003-2005 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#include ++ ++#include ++#include ++#include ++ ++static struct rtskb_queue nrt_rtskb_queue; ++static rtdm_task_t wrapper_task; ++static rtdm_event_t wakeup_sem; ++ ++int nomac_rt_packet_tx(struct rtskb *rtskb, struct rtnet_device *rtdev) ++{ ++ /* unused here, just to demonstrate access to the discipline state ++ struct nomac_priv *nomac = ++ (struct nomac_priv *)rtdev->mac_priv->disc_priv; */ ++ int ret; ++ ++ rtcap_mark_rtmac_enqueue(rtskb); ++ ++ /* no MAC: we simply transmit the packet under xmit_lock */ ++ rtdm_mutex_lock(&rtdev->xmit_mutex); ++ ret = rtmac_xmit(rtskb); ++ rtdm_mutex_unlock(&rtdev->xmit_mutex); ++ ++ return ret; ++} ++ ++int nomac_nrt_packet_tx(struct rtskb *rtskb) ++{ ++ struct rtnet_device *rtdev = rtskb->rtdev; ++ /* unused here, just to demonstrate access to the discipline state ++ struct nomac_priv *nomac = ++ (struct nomac_priv *)rtdev->mac_priv->disc_priv; */ ++ int ret; ++ ++ rtcap_mark_rtmac_enqueue(rtskb); ++ ++ /* note: this routine may be called both in rt and non-rt context ++ * => detect and wrap the context if necessary */ ++ if (!rtdm_in_rt_context()) { ++ rtskb_queue_tail(&nrt_rtskb_queue, rtskb); ++ rtdm_event_signal(&wakeup_sem); ++ return 0; ++ } else { ++ /* no MAC: we simply transmit the packet under xmit_lock */ ++ rtdm_mutex_lock(&rtdev->xmit_mutex); ++ ret = rtmac_xmit(rtskb); ++ rtdm_mutex_unlock(&rtdev->xmit_mutex); ++ ++ return ret; ++ } ++} ++ ++void nrt_xmit_task(void *arg) ++{ ++ struct rtskb *rtskb; ++ struct rtnet_device *rtdev; ++ ++ while (!rtdm_task_should_stop()) { ++ if (rtdm_event_wait(&wakeup_sem) < 0) ++ break; ++ ++ while ((rtskb = rtskb_dequeue(&nrt_rtskb_queue))) { ++ rtdev = rtskb->rtdev; ++ ++ /* no MAC: we simply transmit the packet under xmit_lock */ ++ rtdm_mutex_lock(&rtdev->xmit_mutex); ++ rtmac_xmit(rtskb); ++ rtdm_mutex_unlock(&rtdev->xmit_mutex); ++ } ++ } ++} ++ ++int nomac_packet_rx(struct rtskb *rtskb) ++{ ++ /* actually, NoMAC doesn't expect any control packet */ ++ kfree_rtskb(rtskb); ++ ++ return 0; ++} ++ ++int __init nomac_proto_init(void) ++{ ++ int ret; ++ ++ rtskb_queue_init(&nrt_rtskb_queue); ++ rtdm_event_init(&wakeup_sem, 0); ++ ++ ret = rtdm_task_init(&wrapper_task, "rtnet-nomac", nrt_xmit_task, 0, ++ RTDM_TASK_LOWEST_PRIORITY, 0); ++ if (ret < 0) { ++ rtdm_event_destroy(&wakeup_sem); ++ return ret; ++ } ++ ++ return 0; ++} ++ ++void nomac_proto_cleanup(void) ++{ ++ rtdm_event_destroy(&wakeup_sem); ++ rtdm_task_destroy(&wrapper_task); ++} +--- linux/drivers/xenomai/net/stack/rtmac/nomac/nomac_ioctl.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/rtmac/nomac/nomac_ioctl.c 2021-04-07 16:01:26.998634493 +0800 +@@ -0,0 +1,99 @@ ++/*** ++ * ++ * rtmac/nomac/nomac_ioctl.c ++ * ++ * RTmac - real-time networking media access control subsystem ++ * Copyright (C) 2002 Marc Kleine-Budde , ++ * 2003, 2004 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#include ++#include ++ ++#include ++#include ++ ++static int nomac_ioctl_attach(struct rtnet_device *rtdev) ++{ ++ struct nomac_priv *nomac; ++ int ret; ++ ++ if (rtdev->mac_priv == NULL) { ++ ret = rtmac_disc_attach(rtdev, &nomac_disc); ++ if (ret < 0) ++ return ret; ++ } ++ ++ nomac = (struct nomac_priv *)rtdev->mac_priv->disc_priv; ++ if (nomac->magic != NOMAC_MAGIC) ++ return -ENOTTY; ++ ++ /* ... */ ++ ++ return 0; ++} ++ ++static int nomac_ioctl_detach(struct rtnet_device *rtdev) ++{ ++ struct nomac_priv *nomac; ++ int ret; ++ ++ if (rtdev->mac_priv == NULL) ++ return -ENOTTY; ++ ++ nomac = (struct nomac_priv *)rtdev->mac_priv->disc_priv; ++ if (nomac->magic != NOMAC_MAGIC) ++ return -ENOTTY; ++ ++ ret = rtmac_disc_detach(rtdev); ++ ++ /* ... */ ++ ++ return ret; ++} ++ ++int nomac_ioctl(struct rtnet_device *rtdev, unsigned int request, ++ unsigned long arg) ++{ ++ struct nomac_config cfg; ++ int ret; ++ ++ ret = copy_from_user(&cfg, (void *)arg, sizeof(cfg)); ++ if (ret != 0) ++ return -EFAULT; ++ ++ if (mutex_lock_interruptible(&rtdev->nrt_lock)) ++ return -ERESTARTSYS; ++ ++ switch (request) { ++ case NOMAC_IOC_ATTACH: ++ ret = nomac_ioctl_attach(rtdev); ++ break; ++ ++ case NOMAC_IOC_DETACH: ++ ret = nomac_ioctl_detach(rtdev); ++ break; ++ ++ default: ++ ret = -ENOTTY; ++ } ++ ++ mutex_unlock(&rtdev->nrt_lock); ++ ++ return ret; ++} +--- linux/drivers/xenomai/net/stack/rtmac/nomac/Makefile 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/rtmac/nomac/Makefile 2021-04-07 16:01:26.993634501 +0800 +@@ -0,0 +1,9 @@ ++ccflags-y += -Idrivers/xenomai/net/stack/include ++ ++obj-$(CONFIG_XENO_DRIVERS_NET_NOMAC) += nomac.o ++ ++nomac-y := \ ++ nomac_dev.o \ ++ nomac_ioctl.o \ ++ nomac_module.o \ ++ nomac_proto.o +--- linux/drivers/xenomai/net/stack/rtmac/nomac/Kconfig 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/rtmac/nomac/Kconfig 2021-04-07 16:01:26.988634508 +0800 +@@ -0,0 +1,9 @@ ++config XENO_DRIVERS_NET_NOMAC ++ tristate "NoMAC discipline for RTmac" ++ depends on XENO_DRIVERS_NET_RTMAC ++ default n ++ ---help--- ++ This no-operation RTmac discipline is intended to act as a template ++ for new implementations. However, it can be compiled and used (see ++ nomaccfg management tool), but don't expect any improved determinism ++ of your network. ;) +--- linux/drivers/xenomai/net/stack/rtmac/nomac/nomac_dev.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/rtmac/nomac/nomac_dev.c 2021-04-07 16:01:26.984634513 +0800 +@@ -0,0 +1,84 @@ ++/*** ++ * ++ * rtmac/nomac/nomac_dev.c ++ * ++ * RTmac - real-time networking media access control subsystem ++ * Copyright (C) 2002 Marc Kleine-Budde ++ * 2003-2005 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#include ++ ++#include ++#include ++#include ++ ++static int nomac_dev_openclose(void) ++{ ++ return 0; ++} ++ ++static int nomac_dev_ioctl(struct rtdm_fd *fd, unsigned int request, void *arg) ++{ ++ struct nomac_priv *nomac; ++ ++ nomac = container_of(rtdm_fd_to_context(fd)->device, struct nomac_priv, ++ api_device); ++ ++ switch (request) { ++ case RTMAC_RTIOC_TIMEOFFSET: ++ ++ case RTMAC_RTIOC_WAITONCYCLE: ++ ++ default: ++ return -ENOTTY; ++ } ++} ++ ++static struct rtdm_driver ++ nomac_driver = { .profile_info = RTDM_PROFILE_INFO( ++ nomac, RTDM_CLASS_RTMAC, ++ RTDM_SUBCLASS_UNMANAGED, RTNET_RTDM_VER), ++ .device_flags = RTDM_NAMED_DEVICE, ++ .device_count = 1, ++ .context_size = 0, ++ .ops = { ++ .open = (typeof(nomac_driver.ops.open)) ++ nomac_dev_openclose, ++ .ioctl_rt = nomac_dev_ioctl, ++ .ioctl_nrt = nomac_dev_ioctl, ++ .close = (typeof(nomac_driver.ops.close)) ++ nomac_dev_openclose, ++ } }; ++ ++int nomac_dev_init(struct rtnet_device *rtdev, struct nomac_priv *nomac) ++{ ++ char *pos; ++ ++ strcpy(nomac->device_name, "NOMAC"); ++ for (pos = rtdev->name + strlen(rtdev->name) - 1; ++ (pos >= rtdev->name) && ((*pos) >= '0') && (*pos <= '9'); pos--) ++ ; ++ strncat(nomac->device_name + 5, pos + 1, IFNAMSIZ - 5); ++ ++ nomac->api_driver = nomac_driver; ++ nomac->api_device.driver = &nomac->api_driver; ++ nomac->api_device.label = nomac->device_name; ++ ++ return rtdm_dev_register(&nomac->api_device); ++} +--- linux/drivers/xenomai/net/stack/rtmac/nomac/nomac_module.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/rtmac/nomac/nomac_module.c 2021-04-07 16:01:26.979634520 +0800 +@@ -0,0 +1,161 @@ ++/*** ++ * ++ * rtmac/nomac/nomac_module.c ++ * ++ * RTmac - real-time networking media access control subsystem ++ * Copyright (C) 2002 Marc Kleine-Budde , ++ * 2003, 2004 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#ifdef CONFIG_XENO_OPT_VFILE ++LIST_HEAD(nomac_devices); ++DEFINE_MUTEX(nomac_nrt_lock); ++ ++int nomac_proc_read(struct xnvfile_regular_iterator *it, void *data) ++{ ++ struct nomac_priv *entry; ++ ++ mutex_lock(&nomac_nrt_lock); ++ ++ xnvfile_printf(it, "Interface API Device State\n"); ++ ++ list_for_each_entry (entry, &nomac_devices, list_entry) ++ xnvfile_printf(it, "%-15s %-15s Attached\n", entry->rtdev->name, ++ entry->api_device.name); ++ ++ mutex_unlock(&nomac_nrt_lock); ++ ++ return 0; ++} ++#endif /* CONFIG_XENO_OPT_VFILE */ ++ ++int nomac_attach(struct rtnet_device *rtdev, void *priv) ++{ ++ struct nomac_priv *nomac = (struct nomac_priv *)priv; ++ int ret; ++ ++ nomac->magic = NOMAC_MAGIC; ++ nomac->rtdev = rtdev; ++ ++ /* ... */ ++ ++ ret = nomac_dev_init(rtdev, nomac); ++ if (ret < 0) ++ return ret; ++ ++#ifdef CONFIG_XENO_OPT_VFILE ++ mutex_lock(&nomac_nrt_lock); ++ list_add(&nomac->list_entry, &nomac_devices); ++ mutex_unlock(&nomac_nrt_lock); ++#endif /* CONFIG_XENO_OPT_VFILE */ ++ ++ return 0; ++} ++ ++int nomac_detach(struct rtnet_device *rtdev, void *priv) ++{ ++ struct nomac_priv *nomac = (struct nomac_priv *)priv; ++ ++ nomac_dev_release(nomac); ++ ++ /* ... */ ++#ifdef CONFIG_XENO_OPT_VFILE ++ mutex_lock(&nomac_nrt_lock); ++ list_del(&nomac->list_entry); ++ mutex_unlock(&nomac_nrt_lock); ++#endif /* CONFIG_XENO_OPT_VFILE */ ++ ++ return 0; ++} ++ ++#ifdef CONFIG_XENO_OPT_VFILE ++struct rtmac_proc_entry nomac_proc_entries[] = { ++ { name: "nomac", handler: nomac_proc_read }, ++}; ++#endif /* CONFIG_XENO_OPT_VFILE */ ++ ++struct rtmac_disc nomac_disc = { ++ name: "NoMAC", ++ priv_size: sizeof(struct nomac_priv), ++ disc_type: __constant_htons(RTMAC_TYPE_NOMAC), ++ ++ packet_rx: nomac_packet_rx, ++ rt_packet_tx: nomac_rt_packet_tx, ++ nrt_packet_tx: nomac_nrt_packet_tx, ++ ++ get_mtu: NULL, ++ ++ vnic_xmit: RTMAC_DEFAULT_VNIC, ++ ++ attach: nomac_attach, ++ detach: nomac_detach, ++ ++ ioctls: { ++ service_name: "RTmac/NoMAC", ++ ioctl_type: RTNET_IOC_TYPE_RTMAC_NOMAC, ++ handler: nomac_ioctl ++ }, ++ ++#ifdef CONFIG_XENO_OPT_VFILE ++ proc_entries: nomac_proc_entries, ++ nr_proc_entries: ARRAY_SIZE(nomac_proc_entries), ++#endif /* CONFIG_XENO_OPT_VFILE */ ++}; ++ ++int __init nomac_init(void) ++{ ++ int ret; ++ ++ printk("RTmac/NoMAC: init void media access control mechanism\n"); ++ ++ ret = nomac_proto_init(); ++ if (ret < 0) ++ return ret; ++ ++ ret = rtmac_disc_register(&nomac_disc); ++ if (ret < 0) { ++ nomac_proto_cleanup(); ++ return ret; ++ } ++ ++ return 0; ++} ++ ++void nomac_release(void) ++{ ++ rtmac_disc_deregister(&nomac_disc); ++ nomac_proto_cleanup(); ++ ++ printk("RTmac/NoMAC: unloaded\n"); ++} ++ ++module_init(nomac_init); ++module_exit(nomac_release); ++ ++MODULE_AUTHOR("Jan Kiszka"); ++MODULE_LICENSE("GPL"); +--- linux/drivers/xenomai/net/stack/rtmac/Makefile 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/rtmac/Makefile 2021-04-07 16:01:26.974634528 +0800 +@@ -0,0 +1,15 @@ ++ccflags-y += -Idrivers/xenomai/net/stack/include ++ ++obj-$(CONFIG_XENO_DRIVERS_NET_NOMAC) += nomac/ ++ ++obj-$(CONFIG_XENO_DRIVERS_NET_TDMA) += tdma/ ++ ++obj-$(CONFIG_XENO_DRIVERS_NET_RTMAC) += rtmac.o ++ ++rtmac-y := \ ++ rtmac_disc.o \ ++ rtmac_module.o \ ++ rtmac_proc.o \ ++ rtmac_proto.o \ ++ rtmac_syms.o \ ++ rtmac_vnic.o +--- linux/drivers/xenomai/net/stack/rtmac/rtmac_vnic.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/rtmac/rtmac_vnic.c 2021-04-07 16:01:26.970634533 +0800 +@@ -0,0 +1,334 @@ ++/* rtmac_vnic.c ++ * ++ * rtmac - real-time networking media access control subsystem ++ * Copyright (C) 2002 Marc Kleine-Budde , ++ * 2003-2005 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ */ ++ ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include /* for netdev_priv() */ ++#include ++#include ++#include ++ ++static unsigned int vnic_rtskbs = DEFAULT_VNIC_RTSKBS; ++module_param(vnic_rtskbs, uint, 0444); ++MODULE_PARM_DESC(vnic_rtskbs, ++ "Number of realtime socket buffers per virtual NIC"); ++ ++static rtdm_nrtsig_t vnic_signal; ++static struct rtskb_queue rx_queue; ++ ++int rtmac_vnic_rx(struct rtskb *rtskb, u16 type) ++{ ++ struct rtmac_priv *mac_priv = rtskb->rtdev->mac_priv; ++ struct rtskb_pool *pool = &mac_priv->vnic_skb_pool; ++ ++ if (rtskb_acquire(rtskb, pool) != 0) { ++ mac_priv->vnic_stats.rx_dropped++; ++ kfree_rtskb(rtskb); ++ return -1; ++ } ++ ++ rtskb->protocol = type; ++ ++ if (rtskb_queue_tail_check(&rx_queue, rtskb)) ++ rtdm_nrtsig_pend(&vnic_signal); ++ ++ return 0; ++} ++ ++static void rtmac_vnic_signal_handler(rtdm_nrtsig_t *nrtsig, void *arg) ++{ ++ struct rtskb *rtskb; ++ struct sk_buff *skb; ++ unsigned hdrlen; ++ struct net_device_stats *stats; ++ struct rtnet_device *rtdev; ++ ++ while (1) { ++ rtskb = rtskb_dequeue(&rx_queue); ++ if (!rtskb) ++ break; ++ ++ rtdev = rtskb->rtdev; ++ hdrlen = rtdev->hard_header_len; ++ ++ skb = dev_alloc_skb(hdrlen + rtskb->len + 2); ++ if (skb) { ++ /* the rtskb stamp is useless (different clock), get new one */ ++ __net_timestamp(skb); ++ ++ skb_reserve(skb, ++ 2); /* Align IP on 16 byte boundaries */ ++ ++ /* copy Ethernet header */ ++ memcpy(skb_put(skb, hdrlen), ++ rtskb->data - hdrlen - sizeof(struct rtmac_hdr), ++ hdrlen); ++ ++ /* patch the protocol field in the original Ethernet header */ ++ ((struct ethhdr *)skb->data)->h_proto = rtskb->protocol; ++ ++ /* copy data */ ++ memcpy(skb_put(skb, rtskb->len), rtskb->data, ++ rtskb->len); ++ ++ skb->dev = rtskb->rtdev->mac_priv->vnic; ++ skb->protocol = eth_type_trans(skb, skb->dev); ++ ++ stats = &rtskb->rtdev->mac_priv->vnic_stats; ++ ++ kfree_rtskb(rtskb); ++ ++ stats->rx_packets++; ++ stats->rx_bytes += skb->len; ++ ++ netif_rx(skb); ++ } else { ++ printk("RTmac: VNIC fails to allocate linux skb\n"); ++ kfree_rtskb(rtskb); ++ } ++ } ++} ++ ++static int rtmac_vnic_copy_mac(struct net_device *dev) ++{ ++ memcpy(dev->dev_addr, ++ (*(struct rtnet_device **)netdev_priv(dev))->dev_addr, ++ MAX_ADDR_LEN); ++ ++ return 0; ++} ++ ++int rtmac_vnic_xmit(struct sk_buff *skb, struct net_device *dev) ++{ ++ struct rtnet_device *rtdev = *(struct rtnet_device **)netdev_priv(dev); ++ struct net_device_stats *stats = &rtdev->mac_priv->vnic_stats; ++ struct rtskb_pool *pool = &rtdev->mac_priv->vnic_skb_pool; ++ struct ethhdr *ethernet = (struct ethhdr *)skb->data; ++ struct rtskb *rtskb; ++ int res; ++ int data_len; ++ ++ rtskb = alloc_rtskb((skb->len + sizeof(struct rtmac_hdr) + 15) & ~15, ++ pool); ++ if (!rtskb) ++ return NETDEV_TX_BUSY; ++ ++ rtskb_reserve(rtskb, rtdev->hard_header_len + sizeof(struct rtmac_hdr)); ++ ++ data_len = skb->len - dev->hard_header_len; ++ memcpy(rtskb_put(rtskb, data_len), skb->data + dev->hard_header_len, ++ data_len); ++ ++ res = rtmac_add_header(rtdev, ethernet->h_dest, rtskb, ++ ntohs(ethernet->h_proto), RTMAC_FLAG_TUNNEL); ++ if (res < 0) { ++ stats->tx_dropped++; ++ kfree_rtskb(rtskb); ++ goto done; ++ } ++ ++ RTNET_ASSERT(rtdev->mac_disc->nrt_packet_tx != NULL, kfree_rtskb(rtskb); ++ goto done;); ++ ++ res = rtdev->mac_disc->nrt_packet_tx(rtskb); ++ if (res < 0) { ++ stats->tx_dropped++; ++ kfree_rtskb(rtskb); ++ } else { ++ stats->tx_packets++; ++ stats->tx_bytes += skb->len; ++ } ++ ++done: ++ dev_kfree_skb(skb); ++ return NETDEV_TX_OK; ++} ++ ++static struct net_device_stats *rtmac_vnic_get_stats(struct net_device *dev) ++{ ++ return &(*(struct rtnet_device **)netdev_priv(dev)) ++ ->mac_priv->vnic_stats; ++} ++ ++static int rtmac_vnic_change_mtu(struct net_device *dev, int new_mtu) ++{ ++ if ((new_mtu < 68) || ++ ((unsigned)new_mtu > 1500 - sizeof(struct rtmac_hdr))) ++ return -EINVAL; ++ dev->mtu = new_mtu; ++ return 0; ++} ++ ++void rtmac_vnic_set_max_mtu(struct rtnet_device *rtdev, unsigned int max_mtu) ++{ ++ struct rtmac_priv *mac_priv = rtdev->mac_priv; ++ struct net_device *vnic = mac_priv->vnic; ++ unsigned int prev_mtu = mac_priv->vnic_max_mtu; ++ ++ mac_priv->vnic_max_mtu = max_mtu - sizeof(struct rtmac_hdr); ++ ++ /* set vnic mtu in case max_mtu is smaller than the current mtu or ++ the current mtu was set to previous max_mtu */ ++ rtnl_lock(); ++ if ((vnic->mtu > mac_priv->vnic_max_mtu) || ++ (prev_mtu == mac_priv->vnic_max_mtu)) { ++ dev_set_mtu(vnic, mac_priv->vnic_max_mtu); ++ } ++ rtnl_unlock(); ++} ++ ++static struct net_device_ops vnic_netdev_ops = { ++ .ndo_open = rtmac_vnic_copy_mac, ++ .ndo_get_stats = rtmac_vnic_get_stats, ++ .ndo_change_mtu = rtmac_vnic_change_mtu, ++}; ++ ++static void rtmac_vnic_setup(struct net_device *dev) ++{ ++ ether_setup(dev); ++ ++ dev->netdev_ops = &vnic_netdev_ops; ++ dev->flags &= ~IFF_MULTICAST; ++} ++ ++int rtmac_vnic_add(struct rtnet_device *rtdev, vnic_xmit_handler vnic_xmit) ++{ ++ int res; ++ struct rtmac_priv *mac_priv = rtdev->mac_priv; ++ struct net_device *vnic; ++ char buf[IFNAMSIZ]; ++ ++ /* does the discipline request vnic support? */ ++ if (!vnic_xmit) ++ return 0; ++ ++ mac_priv->vnic = NULL; ++ mac_priv->vnic_max_mtu = rtdev->mtu - sizeof(struct rtmac_hdr); ++ memset(&mac_priv->vnic_stats, 0, sizeof(mac_priv->vnic_stats)); ++ ++ /* create the rtskb pool */ ++ if (rtskb_pool_init(&mac_priv->vnic_skb_pool, vnic_rtskbs, NULL, NULL) < ++ vnic_rtskbs) { ++ res = -ENOMEM; ++ goto error; ++ } ++ ++ snprintf(buf, sizeof(buf), "vnic%d", rtdev->ifindex - 1); ++ ++ vnic = alloc_netdev(sizeof(struct rtnet_device *), buf, ++ NET_NAME_UNKNOWN, rtmac_vnic_setup); ++ if (!vnic) { ++ res = -ENOMEM; ++ goto error; ++ } ++ ++ vnic_netdev_ops.ndo_start_xmit = vnic_xmit; ++ vnic->mtu = mac_priv->vnic_max_mtu; ++ *(struct rtnet_device **)netdev_priv(vnic) = rtdev; ++ rtmac_vnic_copy_mac(vnic); ++ ++ res = register_netdev(vnic); ++ if (res < 0) ++ goto error; ++ ++ mac_priv->vnic = vnic; ++ ++ return 0; ++ ++error: ++ rtskb_pool_release(&mac_priv->vnic_skb_pool); ++ return res; ++} ++ ++int rtmac_vnic_unregister(struct rtnet_device *rtdev) ++{ ++ struct rtmac_priv *mac_priv = rtdev->mac_priv; ++ ++ if (mac_priv->vnic) { ++ rtskb_pool_release(&mac_priv->vnic_skb_pool); ++ unregister_netdev(mac_priv->vnic); ++ free_netdev(mac_priv->vnic); ++ mac_priv->vnic = NULL; ++ } ++ ++ return 0; ++} ++ ++#ifdef CONFIG_XENO_OPT_VFILE ++int rtnet_rtmac_vnics_show(struct xnvfile_regular_iterator *it, void *d) ++{ ++ struct rtnet_device *rtdev; ++ int i; ++ int err; ++ ++ xnvfile_printf(it, "RT-NIC name\tVNIC name\n"); ++ ++ for (i = 1; i <= MAX_RT_DEVICES; i++) { ++ rtdev = rtdev_get_by_index(i); ++ if (rtdev == NULL) ++ continue; ++ ++ err = mutex_lock_interruptible(&rtdev->nrt_lock); ++ if (err < 0) { ++ rtdev_dereference(rtdev); ++ return err; ++ } ++ ++ if (rtdev->mac_priv != NULL) { ++ struct rtmac_priv *rtmac; ++ ++ rtmac = (struct rtmac_priv *)rtdev->mac_priv; ++ xnvfile_printf(it, "%-15s %s\n", rtdev->name, ++ rtmac->vnic->name); ++ } ++ ++ mutex_unlock(&rtdev->nrt_lock); ++ rtdev_dereference(rtdev); ++ } ++ ++ return 0; ++} ++#endif /* CONFIG_XENO_OPT_VFILE */ ++ ++int __init rtmac_vnic_module_init(void) ++{ ++ rtskb_queue_init(&rx_queue); ++ ++ rtdm_nrtsig_init(&vnic_signal, rtmac_vnic_signal_handler, NULL); ++ ++ return 0; ++} ++ ++void rtmac_vnic_module_cleanup(void) ++{ ++ struct rtskb *rtskb; ++ ++ rtdm_nrtsig_destroy(&vnic_signal); ++ ++ while ((rtskb = rtskb_dequeue(&rx_queue)) != NULL) { ++ kfree_rtskb(rtskb); ++ } ++} +--- linux/drivers/xenomai/net/stack/rtmac/tdma/tdma_ioctl.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/rtmac/tdma/tdma_ioctl.c 2021-04-07 16:01:26.965634541 +0800 +@@ -0,0 +1,663 @@ ++/*** ++ * ++ * rtmac/tdma/tdma_ioctl.c ++ * ++ * RTmac - real-time networking media access control subsystem ++ * Copyright (C) 2002 Marc Kleine-Budde , ++ * 2003-2005 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_TDMA_MASTER ++static int tdma_ioctl_master(struct rtnet_device *rtdev, ++ struct tdma_config *cfg) ++{ ++ struct tdma_priv *tdma; ++ u64 cycle_ms; ++ unsigned int table_size; ++ int ret; ++ ++ if (rtdev->mac_priv == NULL) { ++ ret = rtmac_disc_attach(rtdev, &tdma_disc); ++ if (ret < 0) ++ return ret; ++ } ++ ++ tdma = (struct tdma_priv *)rtdev->mac_priv->disc_priv; ++ if (tdma->magic != TDMA_MAGIC) { ++ /* note: we don't clean up an unknown discipline */ ++ return -ENOTTY; ++ } ++ ++ if (test_bit(TDMA_FLAG_ATTACHED, &tdma->flags)) { ++ /* already attached */ ++ return -EBUSY; ++ } ++ ++ set_bit(TDMA_FLAG_MASTER, &tdma->flags); ++ ++ tdma->cal_rounds = cfg->args.master.cal_rounds; ++ ++ /* search at least 3 cycle periods for other masters */ ++ cycle_ms = cfg->args.master.cycle_period; ++ do_div(cycle_ms, 1000000); ++ if (cycle_ms == 0) ++ cycle_ms = 1; ++ msleep(3 * cycle_ms); ++ ++ if (rtskb_module_pool_init(&tdma->cal_rtskb_pool, ++ cfg->args.master.max_cal_requests) != ++ cfg->args.master.max_cal_requests) { ++ ret = -ENOMEM; ++ goto err_out; ++ } ++ ++ table_size = sizeof(struct tdma_slot *) * ++ ((cfg->args.master.max_slot_id >= 1) ? ++ cfg->args.master.max_slot_id + 1 : ++ 2); ++ ++ tdma->slot_table = (struct tdma_slot **)kmalloc(table_size, GFP_KERNEL); ++ if (!tdma->slot_table) { ++ ret = -ENOMEM; ++ goto err_out; ++ } ++ tdma->max_slot_id = cfg->args.master.max_slot_id; ++ memset(tdma->slot_table, 0, table_size); ++ ++ tdma->cycle_period = cfg->args.master.cycle_period; ++ tdma->sync_job.ref_count = 0; ++ INIT_LIST_HEAD(&tdma->sync_job.entry); ++ ++ if (cfg->args.master.backup_sync_offset == 0) ++ tdma->sync_job.id = XMIT_SYNC; ++ else { ++ set_bit(TDMA_FLAG_BACKUP_MASTER, &tdma->flags); ++ tdma->sync_job.id = BACKUP_SYNC; ++ tdma->backup_sync_inc = cfg->args.master.backup_sync_offset + ++ tdma->cycle_period; ++ } ++ ++ /* did we detect another active master? */ ++ if (test_bit(TDMA_FLAG_RECEIVED_SYNC, &tdma->flags)) { ++ /* become a slave, we need to calibrate first */ ++ tdma->sync_job.id = WAIT_ON_SYNC; ++ } else { ++ if (test_bit(TDMA_FLAG_BACKUP_MASTER, &tdma->flags)) ++ printk("TDMA: warning, no primary master detected!\n"); ++ set_bit(TDMA_FLAG_CALIBRATED, &tdma->flags); ++ tdma->current_cycle_start = rtdm_clock_read(); ++ } ++ ++ tdma->first_job = tdma->current_job = &tdma->sync_job; ++ ++ rtdm_event_signal(&tdma->worker_wakeup); ++ ++ set_bit(TDMA_FLAG_ATTACHED, &tdma->flags); ++ ++ return 0; ++ ++err_out: ++ rtmac_disc_detach(rtdev); ++ return ret; ++} ++#endif /* CONFIG_XENO_DRIVERS_NET_TDMA_MASTER */ ++ ++static int tdma_ioctl_slave(struct rtnet_device *rtdev, struct tdma_config *cfg) ++{ ++ struct tdma_priv *tdma; ++ unsigned int table_size; ++ int ret; ++ ++ if (rtdev->mac_priv == NULL) { ++ ret = rtmac_disc_attach(rtdev, &tdma_disc); ++ if (ret < 0) ++ return ret; ++ } ++ ++ tdma = (struct tdma_priv *)rtdev->mac_priv->disc_priv; ++ if (tdma->magic != TDMA_MAGIC) { ++ /* note: we don't clean up an unknown discipline */ ++ return -ENOTTY; ++ } ++ ++ if (test_bit(TDMA_FLAG_ATTACHED, &tdma->flags)) { ++ /* already attached */ ++ return -EBUSY; ++ } ++ ++ tdma->cal_rounds = cfg->args.slave.cal_rounds; ++ if (tdma->cal_rounds == 0) ++ set_bit(TDMA_FLAG_CALIBRATED, &tdma->flags); ++ ++ table_size = sizeof(struct tdma_slot *) * ++ ((cfg->args.slave.max_slot_id >= 1) ? ++ cfg->args.slave.max_slot_id + 1 : ++ 2); ++ ++ tdma->slot_table = (struct tdma_slot **)kmalloc(table_size, GFP_KERNEL); ++ if (!tdma->slot_table) { ++ ret = -ENOMEM; ++ goto err_out; ++ } ++ tdma->max_slot_id = cfg->args.slave.max_slot_id; ++ memset(tdma->slot_table, 0, table_size); ++ ++ tdma->sync_job.id = WAIT_ON_SYNC; ++ tdma->sync_job.ref_count = 0; ++ INIT_LIST_HEAD(&tdma->sync_job.entry); ++ ++ tdma->first_job = tdma->current_job = &tdma->sync_job; ++ ++ rtdm_event_signal(&tdma->worker_wakeup); ++ ++ set_bit(TDMA_FLAG_ATTACHED, &tdma->flags); ++ ++ return 0; ++ ++err_out: ++ rtmac_disc_detach(rtdev); ++ return ret; ++} ++ ++static int tdma_ioctl_cal_result_size(struct rtnet_device *rtdev, ++ struct tdma_config *cfg) ++{ ++ struct tdma_priv *tdma; ++ ++ if (rtdev->mac_priv == NULL) ++ return -ENOTTY; ++ ++ tdma = (struct tdma_priv *)rtdev->mac_priv->disc_priv; ++ if (tdma->magic != TDMA_MAGIC) ++ return -ENOTTY; ++ ++ if (!test_bit(TDMA_FLAG_CALIBRATED, &tdma->flags)) ++ return tdma->cal_rounds; ++ else ++ return 0; ++} ++ ++int start_calibration(struct rt_proc_call *call) ++{ ++ struct tdma_request_cal *req_cal; ++ struct tdma_priv *tdma; ++ rtdm_lockctx_t context; ++ ++ req_cal = rtpc_get_priv(call, struct tdma_request_cal); ++ tdma = req_cal->tdma; ++ ++ /* there are no slots yet, simply add this job after first_job */ ++ rtdm_lock_get_irqsave(&tdma->lock, context); ++ tdma->calibration_call = call; ++ tdma->job_list_revision++; ++ list_add(&req_cal->head.entry, &tdma->first_job->entry); ++ rtdm_lock_put_irqrestore(&tdma->lock, context); ++ ++ return -CALL_PENDING; ++} ++ ++void copyback_calibration(struct rt_proc_call *call, void *priv_data) ++{ ++ struct tdma_request_cal *req_cal; ++ struct tdma_priv *tdma; ++ int i; ++ u64 value; ++ u64 average = 0; ++ u64 min = 0x7FFFFFFFFFFFFFFFLL; ++ u64 max = 0; ++ ++ req_cal = rtpc_get_priv(call, struct tdma_request_cal); ++ tdma = req_cal->tdma; ++ ++ for (i = 0; i < tdma->cal_rounds; i++) { ++ value = req_cal->result_buffer[i]; ++ average += value; ++ if (value < min) ++ min = value; ++ if (value > max) ++ max = value; ++ if ((req_cal->cal_results) && ++ (copy_to_user(&req_cal->cal_results[i], &value, ++ sizeof(value)) != 0)) ++ rtpc_set_result(call, -EFAULT); ++ } ++ do_div(average, tdma->cal_rounds); ++ tdma->master_packet_delay_ns = average; ++ ++ average += 500; ++ do_div(average, 1000); ++ min += 500; ++ do_div(min, 1000); ++ max += 500; ++ do_div(max, 1000); ++ printk("TDMA: calibrated master-to-slave packet delay: " ++ "%ld us (min/max: %ld/%ld us)\n", ++ (unsigned long)average, (unsigned long)min, (unsigned long)max); ++} ++ ++void cleanup_calibration(void *priv_data) ++{ ++ struct tdma_request_cal *req_cal; ++ ++ req_cal = (struct tdma_request_cal *)priv_data; ++ kfree(req_cal->result_buffer); ++} ++ ++static int tdma_ioctl_set_slot(struct rtnet_device *rtdev, ++ struct tdma_config *cfg) ++{ ++ struct tdma_priv *tdma; ++ int id; ++ int jnt_id; ++ struct tdma_slot *slot, *old_slot; ++ struct tdma_job *job, *prev_job; ++ struct tdma_request_cal req_cal; ++ struct rtskb *rtskb; ++ unsigned int job_list_revision; ++ rtdm_lockctx_t context; ++ int ret; ++ ++ if (rtdev->mac_priv == NULL) ++ return -ENOTTY; ++ ++ tdma = (struct tdma_priv *)rtdev->mac_priv->disc_priv; ++ if (tdma->magic != TDMA_MAGIC) ++ return -ENOTTY; ++ ++ id = cfg->args.set_slot.id; ++ if (id > tdma->max_slot_id) ++ return -EINVAL; ++ ++ if (cfg->args.set_slot.size == 0) ++ cfg->args.set_slot.size = rtdev->mtu; ++ else if (cfg->args.set_slot.size > rtdev->mtu) ++ return -EINVAL; ++ ++ jnt_id = cfg->args.set_slot.joint_slot; ++ if ((jnt_id >= 0) && ++ ((jnt_id >= tdma->max_slot_id) || (tdma->slot_table[jnt_id] == 0) || ++ (tdma->slot_table[jnt_id]->mtu != cfg->args.set_slot.size))) ++ return -EINVAL; ++ ++ slot = (struct tdma_slot *)kmalloc(sizeof(struct tdma_slot), ++ GFP_KERNEL); ++ if (!slot) ++ return -ENOMEM; ++ ++ if (!test_bit(TDMA_FLAG_CALIBRATED, &tdma->flags)) { ++ req_cal.head.id = XMIT_REQ_CAL; ++ req_cal.head.ref_count = 0; ++ req_cal.tdma = tdma; ++ req_cal.offset = cfg->args.set_slot.offset; ++ req_cal.period = cfg->args.set_slot.period; ++ req_cal.phasing = cfg->args.set_slot.phasing; ++ req_cal.cal_rounds = tdma->cal_rounds; ++ req_cal.cal_results = cfg->args.set_slot.cal_results; ++ ++ req_cal.result_buffer = ++ kmalloc(req_cal.cal_rounds * sizeof(u64), GFP_KERNEL); ++ if (!req_cal.result_buffer) { ++ kfree(slot); ++ return -ENOMEM; ++ } ++ ++ ret = rtpc_dispatch_call(start_calibration, 0, &req_cal, ++ sizeof(req_cal), copyback_calibration, ++ cleanup_calibration); ++ if (ret < 0) { ++ /* kick out any pending calibration job before returning */ ++ rtdm_lock_get_irqsave(&tdma->lock, context); ++ ++ job = list_entry(tdma->first_job->entry.next, ++ struct tdma_job, entry); ++ if (job != tdma->first_job) { ++ __list_del(job->entry.prev, job->entry.next); ++ ++ while (job->ref_count > 0) { ++ rtdm_lock_put_irqrestore(&tdma->lock, ++ context); ++ msleep(100); ++ rtdm_lock_get_irqsave(&tdma->lock, ++ context); ++ } ++ } ++ ++ rtdm_lock_put_irqrestore(&tdma->lock, context); ++ ++ kfree(slot); ++ return ret; ++ } ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_TDMA_MASTER ++ if (test_bit(TDMA_FLAG_MASTER, &tdma->flags)) { ++ u32 cycle_no = (volatile u32)tdma->current_cycle; ++ u64 cycle_ms; ++ ++ /* switch back to [backup] master mode */ ++ if (test_bit(TDMA_FLAG_BACKUP_MASTER, &tdma->flags)) ++ tdma->sync_job.id = BACKUP_SYNC; ++ else ++ tdma->sync_job.id = XMIT_SYNC; ++ ++ /* wait 2 cycle periods for the mode switch */ ++ cycle_ms = tdma->cycle_period; ++ do_div(cycle_ms, 1000000); ++ if (cycle_ms == 0) ++ cycle_ms = 1; ++ msleep(2 * cycle_ms); ++ ++ /* catch the very unlikely case that the current master died ++ while we just switched the mode */ ++ if (cycle_no == (volatile u32)tdma->current_cycle) { ++ kfree(slot); ++ return -ETIME; ++ } ++ } ++#endif /* CONFIG_XENO_DRIVERS_NET_TDMA_MASTER */ ++ ++ set_bit(TDMA_FLAG_CALIBRATED, &tdma->flags); ++ } ++ ++ slot->head.id = id; ++ slot->head.ref_count = 0; ++ slot->period = cfg->args.set_slot.period; ++ slot->phasing = cfg->args.set_slot.phasing; ++ slot->mtu = cfg->args.set_slot.size; ++ slot->size = cfg->args.set_slot.size + rtdev->hard_header_len; ++ slot->offset = cfg->args.set_slot.offset; ++ slot->queue = &slot->local_queue; ++ rtskb_prio_queue_init(&slot->local_queue); ++ ++ if (jnt_id >= 0) /* all other validation tests performed above */ ++ slot->queue = tdma->slot_table[jnt_id]->queue; ++ ++ old_slot = tdma->slot_table[id]; ++ if ((id == DEFAULT_NRT_SLOT) && ++ (old_slot == tdma->slot_table[DEFAULT_SLOT])) ++ old_slot = NULL; ++ ++restart: ++ job_list_revision = tdma->job_list_revision; ++ ++ if (!old_slot) { ++ job = tdma->first_job; ++ while (1) { ++ prev_job = job; ++ job = list_entry(job->entry.next, struct tdma_job, ++ entry); ++ if (((job->id >= 0) && ++ ((slot->offset < SLOT_JOB(job)->offset) || ++ ((slot->offset == SLOT_JOB(job)->offset) && ++ (slot->head.id <= SLOT_JOB(job)->head.id)))) || ++#ifdef CONFIG_XENO_DRIVERS_NET_TDMA_MASTER ++ ((job->id == XMIT_RPL_CAL) && ++ (slot->offset < ++ REPLY_CAL_JOB(job)->reply_offset)) || ++#endif /* CONFIG_XENO_DRIVERS_NET_TDMA_MASTER */ ++ (job == tdma->first_job)) ++ break; ++ } ++ ++ } else ++ prev_job = list_entry(old_slot->head.entry.prev, ++ struct tdma_job, entry); ++ ++ rtdm_lock_get_irqsave(&tdma->lock, context); ++ ++ if (job_list_revision != tdma->job_list_revision) { ++ rtdm_lock_put_irqrestore(&tdma->lock, context); ++ ++ msleep(100); ++ goto restart; ++ } ++ ++ if (old_slot) ++ __list_del(old_slot->head.entry.prev, ++ old_slot->head.entry.next); ++ ++ list_add(&slot->head.entry, &prev_job->entry); ++ tdma->slot_table[id] = slot; ++ if ((id == DEFAULT_SLOT) && ++ (tdma->slot_table[DEFAULT_NRT_SLOT] == old_slot)) ++ tdma->slot_table[DEFAULT_NRT_SLOT] = slot; ++ ++ if (old_slot) { ++ while (old_slot->head.ref_count > 0) { ++ rtdm_lock_put_irqrestore(&tdma->lock, context); ++ msleep(100); ++ rtdm_lock_get_irqsave(&tdma->lock, context); ++ } ++ ++ rtdm_lock_put_irqrestore(&tdma->lock, context); ++ ++ /* search for other slots linked to the old one */ ++ for (jnt_id = 0; jnt_id < tdma->max_slot_id; jnt_id++) ++ if ((tdma->slot_table[jnt_id] != 0) && ++ (tdma->slot_table[jnt_id]->queue == ++ &old_slot->local_queue)) { ++ /* found a joint slot, move or detach it now */ ++ rtdm_lock_get_irqsave(&tdma->lock, context); ++ ++ while (tdma->slot_table[jnt_id]->head.ref_count > ++ 0) { ++ rtdm_lock_put_irqrestore(&tdma->lock, ++ context); ++ msleep(100); ++ rtdm_lock_get_irqsave(&tdma->lock, ++ context); ++ } ++ ++ /* If the new slot size is larger, detach the other slot, ++ * update it otherwise. */ ++ if (slot->mtu > tdma->slot_table[jnt_id]->mtu) ++ tdma->slot_table[jnt_id]->queue = ++ &tdma->slot_table[jnt_id] ++ ->local_queue; ++ else { ++ tdma->slot_table[jnt_id]->mtu = ++ slot->mtu; ++ tdma->slot_table[jnt_id]->queue = ++ slot->queue; ++ } ++ ++ rtdm_lock_put_irqrestore(&tdma->lock, context); ++ } ++ } else ++ rtdm_lock_put_irqrestore(&tdma->lock, context); ++ ++ rtmac_vnic_set_max_mtu(rtdev, cfg->args.set_slot.size); ++ ++ if (old_slot) { ++ /* avoid that the formerly joint queue gets purged */ ++ old_slot->queue = &old_slot->local_queue; ++ ++ /* Without any reference to the old job and no joint slots we can ++ * safely purge its queue without lock protection. ++ * NOTE: Reconfiguring a slot during runtime may lead to packet ++ * drops! */ ++ while ((rtskb = __rtskb_prio_dequeue(old_slot->queue))) ++ kfree_rtskb(rtskb); ++ ++ kfree(old_slot); ++ } ++ ++ return 0; ++} ++ ++int tdma_cleanup_slot(struct tdma_priv *tdma, struct tdma_slot *slot) ++{ ++ struct rtskb *rtskb; ++ unsigned int id, jnt_id; ++ rtdm_lockctx_t context; ++ ++ if (!slot) ++ return -EINVAL; ++ ++ id = slot->head.id; ++ ++ rtdm_lock_get_irqsave(&tdma->lock, context); ++ ++ __list_del(slot->head.entry.prev, slot->head.entry.next); ++ ++ if (id == DEFAULT_NRT_SLOT) ++ tdma->slot_table[DEFAULT_NRT_SLOT] = ++ tdma->slot_table[DEFAULT_SLOT]; ++ else { ++ if ((id == DEFAULT_SLOT) && ++ (tdma->slot_table[DEFAULT_NRT_SLOT] == slot)) ++ tdma->slot_table[DEFAULT_NRT_SLOT] = NULL; ++ tdma->slot_table[id] = NULL; ++ } ++ ++ while (slot->head.ref_count > 0) { ++ rtdm_lock_put_irqrestore(&tdma->lock, context); ++ msleep(100); ++ rtdm_lock_get_irqsave(&tdma->lock, context); ++ } ++ ++ rtdm_lock_put_irqrestore(&tdma->lock, context); ++ ++ /* search for other slots linked to this one */ ++ for (jnt_id = 0; jnt_id < tdma->max_slot_id; jnt_id++) ++ if ((tdma->slot_table[jnt_id] != 0) && ++ (tdma->slot_table[jnt_id]->queue == &slot->local_queue)) { ++ /* found a joint slot, detach it now under lock protection */ ++ rtdm_lock_get_irqsave(&tdma->lock, context); ++ ++ while (tdma->slot_table[jnt_id]->head.ref_count > 0) { ++ rtdm_lock_put_irqrestore(&tdma->lock, context); ++ msleep(100); ++ rtdm_lock_get_irqsave(&tdma->lock, context); ++ } ++ tdma->slot_table[jnt_id]->queue = ++ &tdma->slot_table[jnt_id]->local_queue; ++ ++ rtdm_lock_put_irqrestore(&tdma->lock, context); ++ } ++ ++ /* avoid that the formerly joint queue gets purged */ ++ slot->queue = &slot->local_queue; ++ ++ /* No need to protect the queue access here - ++ * no one is referring to this job anymore ++ * (ref_count == 0, all joint slots detached). */ ++ while ((rtskb = __rtskb_prio_dequeue(slot->queue))) ++ kfree_rtskb(rtskb); ++ ++ kfree(slot); ++ ++ return 0; ++} ++ ++static int tdma_ioctl_remove_slot(struct rtnet_device *rtdev, ++ struct tdma_config *cfg) ++{ ++ struct tdma_priv *tdma; ++ int id; ++ ++ if (rtdev->mac_priv == NULL) ++ return -ENOTTY; ++ ++ tdma = (struct tdma_priv *)rtdev->mac_priv->disc_priv; ++ if (tdma->magic != TDMA_MAGIC) ++ return -ENOTTY; ++ ++ id = cfg->args.remove_slot.id; ++ if (id > tdma->max_slot_id) ++ return -EINVAL; ++ ++ if ((id == DEFAULT_NRT_SLOT) && (tdma->slot_table[DEFAULT_NRT_SLOT] == ++ tdma->slot_table[DEFAULT_SLOT])) ++ return -EINVAL; ++ ++ return tdma_cleanup_slot(tdma, tdma->slot_table[id]); ++} ++ ++static int tdma_ioctl_detach(struct rtnet_device *rtdev) ++{ ++ struct tdma_priv *tdma; ++ int ret; ++ ++ if (rtdev->mac_priv == NULL) ++ return -ENOTTY; ++ ++ tdma = (struct tdma_priv *)rtdev->mac_priv->disc_priv; ++ if (tdma->magic != TDMA_MAGIC) ++ return -ENOTTY; ++ ++ ret = rtmac_disc_detach(rtdev); ++ ++ return ret; ++} ++ ++int tdma_ioctl(struct rtnet_device *rtdev, unsigned int request, ++ unsigned long arg) ++{ ++ struct tdma_config cfg; ++ int ret; ++ ++ ret = copy_from_user(&cfg, (void *)arg, sizeof(cfg)); ++ if (ret != 0) ++ return -EFAULT; ++ ++ if (mutex_lock_interruptible(&rtdev->nrt_lock)) ++ return -ERESTARTSYS; ++ ++ switch (request) { ++#ifdef CONFIG_XENO_DRIVERS_NET_TDMA_MASTER ++ case TDMA_IOC_MASTER: ++ ret = tdma_ioctl_master(rtdev, &cfg); ++ break; ++#endif ++ case TDMA_IOC_SLAVE: ++ ret = tdma_ioctl_slave(rtdev, &cfg); ++ break; ++ ++ case TDMA_IOC_CAL_RESULT_SIZE: ++ ret = tdma_ioctl_cal_result_size(rtdev, &cfg); ++ break; ++ ++ case TDMA_IOC_SET_SLOT: ++ ret = tdma_ioctl_set_slot(rtdev, &cfg); ++ break; ++ ++ case TDMA_IOC_REMOVE_SLOT: ++ ret = tdma_ioctl_remove_slot(rtdev, &cfg); ++ break; ++ ++ case TDMA_IOC_DETACH: ++ ret = tdma_ioctl_detach(rtdev); ++ break; ++ ++ default: ++ ret = -ENOTTY; ++ } ++ ++ mutex_unlock(&rtdev->nrt_lock); ++ ++ return ret; ++} +--- linux/drivers/xenomai/net/stack/rtmac/tdma/Makefile 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/rtmac/tdma/Makefile 2021-04-07 16:01:26.960634548 +0800 +@@ -0,0 +1,10 @@ ++ccflags-y += -Idrivers/xenomai/net/stack/include ++ ++obj-$(CONFIG_XENO_DRIVERS_NET_TDMA) += tdma.o ++ ++tdma-y := \ ++ tdma_dev.o \ ++ tdma_ioctl.o \ ++ tdma_module.o \ ++ tdma_proto.o \ ++ tdma_worker.o +--- linux/drivers/xenomai/net/stack/rtmac/tdma/tdma_worker.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/rtmac/tdma/tdma_worker.c 2021-04-07 16:01:26.956634553 +0800 +@@ -0,0 +1,231 @@ ++/*** ++ * ++ * rtmac/tdma/tdma_worker.c ++ * ++ * RTmac - real-time networking media access control subsystem ++ * Copyright (C) 2002 Marc Kleine-Budde , ++ * 2003-2005 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#include ++#include ++ ++static void do_slot_job(struct tdma_priv *tdma, struct tdma_slot *job, ++ rtdm_lockctx_t lockctx) ++{ ++ struct rtskb *rtskb; ++ ++ if ((job->period != 1) && ++ (tdma->current_cycle % job->period != job->phasing)) ++ return; ++ ++ rtdm_lock_put_irqrestore(&tdma->lock, lockctx); ++ ++ /* wait for slot begin, then send one pending packet */ ++ rtdm_task_sleep_abs(tdma->current_cycle_start + SLOT_JOB(job)->offset, ++ RTDM_TIMERMODE_REALTIME); ++ ++ rtdm_lock_get_irqsave(&tdma->lock, lockctx); ++ rtskb = __rtskb_prio_dequeue(SLOT_JOB(job)->queue); ++ if (!rtskb) ++ return; ++ rtdm_lock_put_irqrestore(&tdma->lock, lockctx); ++ ++ rtmac_xmit(rtskb); ++ ++ rtdm_lock_get_irqsave(&tdma->lock, lockctx); ++} ++ ++static void do_xmit_sync_job(struct tdma_priv *tdma, rtdm_lockctx_t lockctx) ++{ ++ rtdm_lock_put_irqrestore(&tdma->lock, lockctx); ++ ++ /* wait for beginning of next cycle, then send sync */ ++ rtdm_task_sleep_abs(tdma->current_cycle_start + tdma->cycle_period, ++ RTDM_TIMERMODE_REALTIME); ++ rtdm_lock_get_irqsave(&tdma->lock, lockctx); ++ tdma->current_cycle++; ++ tdma->current_cycle_start += tdma->cycle_period; ++ rtdm_lock_put_irqrestore(&tdma->lock, lockctx); ++ ++ tdma_xmit_sync_frame(tdma); ++ ++ rtdm_lock_get_irqsave(&tdma->lock, lockctx); ++} ++ ++static void do_backup_sync_job(struct tdma_priv *tdma, rtdm_lockctx_t lockctx) ++{ ++ rtdm_lock_put_irqrestore(&tdma->lock, lockctx); ++ ++ /* wait for backup slot */ ++ rtdm_task_sleep_abs(tdma->current_cycle_start + tdma->backup_sync_inc, ++ RTDM_TIMERMODE_REALTIME); ++ ++ /* take over sync transmission if all earlier masters failed */ ++ if (!test_and_clear_bit(TDMA_FLAG_RECEIVED_SYNC, &tdma->flags)) { ++ rtdm_lock_get_irqsave(&tdma->lock, lockctx); ++ tdma->current_cycle++; ++ tdma->current_cycle_start += tdma->cycle_period; ++ rtdm_lock_put_irqrestore(&tdma->lock, lockctx); ++ ++ tdma_xmit_sync_frame(tdma); ++ ++ set_bit(TDMA_FLAG_BACKUP_ACTIVE, &tdma->flags); ++ } else ++ clear_bit(TDMA_FLAG_BACKUP_ACTIVE, &tdma->flags); ++ ++ rtdm_lock_get_irqsave(&tdma->lock, lockctx); ++} ++ ++static struct tdma_job *do_request_cal_job(struct tdma_priv *tdma, ++ struct tdma_request_cal *job, ++ rtdm_lockctx_t lockctx) ++{ ++ struct rt_proc_call *call; ++ struct tdma_job *prev_job; ++ int err; ++ ++ if ((job->period != 1) && ++ (tdma->current_cycle % job->period != job->phasing)) ++ return &job->head; ++ ++ /* remove job until we get a reply */ ++ __list_del(job->head.entry.prev, job->head.entry.next); ++ job->head.ref_count--; ++ prev_job = tdma->current_job = ++ list_entry(job->head.entry.prev, struct tdma_job, entry); ++ prev_job->ref_count++; ++ tdma->job_list_revision++; ++ ++ rtdm_lock_put_irqrestore(&tdma->lock, lockctx); ++ ++ rtdm_task_sleep_abs(tdma->current_cycle_start + job->offset, ++ RTDM_TIMERMODE_REALTIME); ++ err = tdma_xmit_request_cal_frame( ++ tdma, tdma->current_cycle + job->period, job->offset); ++ ++ rtdm_lock_get_irqsave(&tdma->lock, lockctx); ++ ++ /* terminate call on error */ ++ if (err < 0) { ++ call = tdma->calibration_call; ++ tdma->calibration_call = NULL; ++ ++ if (call) { ++ rtdm_lock_put_irqrestore(&tdma->lock, lockctx); ++ rtpc_complete_call(call, err); ++ rtdm_lock_get_irqsave(&tdma->lock, lockctx); ++ } ++ } ++ ++ return prev_job; ++} ++ ++static struct tdma_job *do_reply_cal_job(struct tdma_priv *tdma, ++ struct tdma_reply_cal *job, ++ rtdm_lockctx_t lockctx) ++{ ++ struct tdma_job *prev_job; ++ ++ if (job->reply_cycle > tdma->current_cycle) ++ return &job->head; ++ ++ /* remove the job */ ++ __list_del(job->head.entry.prev, job->head.entry.next); ++ job->head.ref_count--; ++ prev_job = tdma->current_job = ++ list_entry(job->head.entry.prev, struct tdma_job, entry); ++ prev_job->ref_count++; ++ tdma->job_list_revision++; ++ ++ rtdm_lock_put_irqrestore(&tdma->lock, lockctx); ++ ++ if (job->reply_cycle == tdma->current_cycle) { ++ /* send reply in the assigned slot */ ++ rtdm_task_sleep_abs(tdma->current_cycle_start + ++ job->reply_offset, ++ RTDM_TIMERMODE_REALTIME); ++ rtmac_xmit(job->reply_rtskb); ++ } else { ++ /* cleanup if cycle already passed */ ++ kfree_rtskb(job->reply_rtskb); ++ } ++ ++ rtdm_lock_get_irqsave(&tdma->lock, lockctx); ++ ++ return prev_job; ++} ++ ++void tdma_worker(void *arg) ++{ ++ struct tdma_priv *tdma = arg; ++ struct tdma_job *job; ++ rtdm_lockctx_t lockctx; ++ int ret; ++ ++ ret = rtdm_event_wait(&tdma->worker_wakeup); ++ if (ret) ++ return; ++ ++ rtdm_lock_get_irqsave(&tdma->lock, lockctx); ++ ++ job = tdma->first_job; ++ ++ while (!rtdm_task_should_stop()) { ++ job->ref_count++; ++ switch (job->id) { ++ case WAIT_ON_SYNC: ++ rtdm_lock_put_irqrestore(&tdma->lock, lockctx); ++ ret = rtdm_event_wait(&tdma->sync_event); ++ if (ret) ++ return; ++ rtdm_lock_get_irqsave(&tdma->lock, lockctx); ++ break; ++ ++ case XMIT_REQ_CAL: ++ job = do_request_cal_job(tdma, REQUEST_CAL_JOB(job), ++ lockctx); ++ break; ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_TDMA_MASTER ++ case XMIT_SYNC: ++ do_xmit_sync_job(tdma, lockctx); ++ break; ++ ++ case BACKUP_SYNC: ++ do_backup_sync_job(tdma, lockctx); ++ break; ++ ++ case XMIT_RPL_CAL: ++ job = do_reply_cal_job(tdma, REPLY_CAL_JOB(job), ++ lockctx); ++ break; ++#endif /* CONFIG_XENO_DRIVERS_NET_TDMA_MASTER */ ++ ++ default: ++ do_slot_job(tdma, SLOT_JOB(job), lockctx); ++ break; ++ } ++ job->ref_count--; ++ ++ job = tdma->current_job = ++ list_entry(job->entry.next, struct tdma_job, entry); ++ } ++ ++ rtdm_lock_put_irqrestore(&tdma->lock, lockctx); ++} +--- linux/drivers/xenomai/net/stack/rtmac/tdma/Kconfig 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/rtmac/tdma/Kconfig 2021-04-07 16:01:26.951634561 +0800 +@@ -0,0 +1,21 @@ ++config XENO_DRIVERS_NET_TDMA ++ tristate "TDMA discipline for RTmac" ++ depends on XENO_DRIVERS_NET_RTMAC ++ default y ++ ---help--- ++ The Time Division Multiple Access discipline is the default RTmac ++ protocol for Ethernet networks. It consists of a master synchronising ++ the access of the slaves to the media by periodically issuing frames. ++ Backup masters can be set up to take over if the primary master fails. ++ TDMA also provides a global clock across all participants. The tdmacfg ++ tool can be used to configure a real-time NIC to use TDMA. ++ ++ See Documenatation/README.rtmac for further details. ++ ++config XENO_DRIVERS_NET_TDMA_MASTER ++ bool "TDMA master support" ++ depends on XENO_DRIVERS_NET_TDMA ++ default y ++ ---help--- ++ Enables TDMA master and backup master support for the node. This can ++ be switched of to reduce the memory footprint of pure slave nodes. +--- linux/drivers/xenomai/net/stack/rtmac/tdma/tdma_module.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/rtmac/tdma/tdma_module.c 2021-04-07 16:01:26.946634568 +0800 +@@ -0,0 +1,317 @@ ++/*** ++ * ++ * rtmac/tdma/tdma_module.c ++ * ++ * RTmac - real-time networking media access control subsystem ++ * Copyright (C) 2002 Marc Kleine-Budde , ++ * 2003-2005 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#ifdef CONFIG_XENO_OPT_VFILE ++int tdma_proc_read(struct xnvfile_regular_iterator *it, void *data) ++{ ++ int d, err = 0; ++ struct rtnet_device *rtdev; ++ struct tdma_priv *tdma; ++ const char *state; ++#ifdef CONFIG_XENO_DRIVERS_NET_TDMA_MASTER ++ u64 cycle; ++#endif ++ ++ xnvfile_printf(it, "Interface API Device Operation Mode " ++ "Cycle State\n"); ++ ++ for (d = 1; d <= MAX_RT_DEVICES; d++) { ++ rtdev = rtdev_get_by_index(d); ++ if (!rtdev) ++ continue; ++ ++ err = mutex_lock_interruptible(&rtdev->nrt_lock); ++ if (err < 0) { ++ rtdev_dereference(rtdev); ++ break; ++ } ++ ++ if (!rtdev->mac_priv) ++ goto unlock_dev; ++ tdma = (struct tdma_priv *)rtdev->mac_priv->disc_priv; ++ ++ xnvfile_printf(it, "%-15s %-15s ", rtdev->name, ++ tdma->api_device.name); ++ ++ if (test_bit(TDMA_FLAG_CALIBRATED, &tdma->flags)) { ++#ifdef CONFIG_XENO_DRIVERS_NET_TDMA_MASTER ++ if (test_bit(TDMA_FLAG_BACKUP_MASTER, &tdma->flags) && ++ !test_bit(TDMA_FLAG_BACKUP_ACTIVE, &tdma->flags)) ++ state = "stand-by"; ++ else ++#endif /* CONFIG_XENO_DRIVERS_NET_TDMA_MASTER */ ++ state = "active"; ++ } else ++ state = "init"; ++#ifdef CONFIG_XENO_DRIVERS_NET_TDMA_MASTER ++ if (test_bit(TDMA_FLAG_MASTER, &tdma->flags)) { ++ cycle = tdma->cycle_period + 500; ++ do_div(cycle, 1000); ++ if (test_bit(TDMA_FLAG_BACKUP_MASTER, &tdma->flags)) ++ xnvfile_printf(it, "Backup Master %-7ld %s\n", ++ (unsigned long)cycle, state); ++ else ++ xnvfile_printf(it, "Master %-7ld %s\n", ++ (unsigned long)cycle, state); ++ } else ++#endif /* CONFIG_XENO_DRIVERS_NET_TDMA_MASTER */ ++ xnvfile_printf(it, "Slave - %s\n", ++ state); ++ ++ unlock_dev: ++ mutex_unlock(&rtdev->nrt_lock); ++ rtdev_dereference(rtdev); ++ } ++ ++ return err; ++} ++ ++int tdma_slots_proc_read(struct xnvfile_regular_iterator *it, void *data) ++{ ++ int d, i, err = 0; ++ struct rtnet_device *rtdev; ++ struct tdma_priv *tdma; ++ struct tdma_slot *slot; ++ int jnt_id; ++ u64 slot_offset; ++ ++ xnvfile_printf(it, "Interface " ++ "Slots (id[->joint]:offset:phasing/period:size)\n"); ++ ++ for (d = 1; d <= MAX_RT_DEVICES; d++) { ++ rtdev = rtdev_get_by_index(d); ++ if (!rtdev) ++ continue; ++ ++ err = mutex_lock_interruptible(&rtdev->nrt_lock); ++ if (err < 0) { ++ rtdev_dereference(rtdev); ++ break; ++ } ++ ++ if (!rtdev->mac_priv) ++ goto unlock_dev; ++ tdma = (struct tdma_priv *)rtdev->mac_priv->disc_priv; ++ ++ xnvfile_printf(it, "%-15s ", rtdev->name); ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_TDMA_MASTER ++ if (test_bit(TDMA_FLAG_BACKUP_MASTER, &tdma->flags)) { ++ slot_offset = tdma->backup_sync_inc - ++ tdma->cycle_period + 500; ++ do_div(slot_offset, 1000); ++ xnvfile_printf(it, "bak:%ld ", ++ (unsigned long)slot_offset); ++ } ++#endif /* CONFIG_XENO_DRIVERS_NET_TDMA_MASTER */ ++ ++ if (tdma->slot_table) ++ for (i = 0; i <= tdma->max_slot_id; i++) { ++ slot = tdma->slot_table[i]; ++ if (!slot || ++ ((i == DEFAULT_NRT_SLOT) && ++ (tdma->slot_table[DEFAULT_SLOT] == slot))) ++ continue; ++ ++ if (slot->queue == &slot->local_queue) { ++ xnvfile_printf(it, "%d", i); ++ } else ++ for (jnt_id = 0; ++ jnt_id <= tdma->max_slot_id; ++ jnt_id++) ++ if (&tdma->slot_table[jnt_id] ++ ->local_queue == ++ slot->queue) { ++ xnvfile_printf(it, ++ "%d->%d", ++ i, ++ jnt_id); ++ break; ++ } ++ ++ slot_offset = slot->offset + 500; ++ do_div(slot_offset, 1000); ++ xnvfile_printf(it, ":%ld:%d/%d:%d ", ++ (unsigned long)slot_offset, ++ slot->phasing + 1, slot->period, ++ slot->mtu); ++ } ++ ++ xnvfile_printf(it, "\n"); ++ ++ unlock_dev: ++ mutex_unlock(&rtdev->nrt_lock); ++ rtdev_dereference(rtdev); ++ } ++ ++ return err; ++} ++#endif /* CONFIG_XENO_OPT_VFILE */ ++ ++int tdma_attach(struct rtnet_device *rtdev, void *priv) ++{ ++ struct tdma_priv *tdma = (struct tdma_priv *)priv; ++ int ret; ++ ++ memset(tdma, 0, sizeof(struct tdma_priv)); ++ ++ tdma->magic = TDMA_MAGIC; ++ tdma->rtdev = rtdev; ++ ++ rtdm_lock_init(&tdma->lock); ++ ++ rtdm_event_init(&tdma->worker_wakeup, 0); ++ rtdm_event_init(&tdma->xmit_event, 0); ++ rtdm_event_init(&tdma->sync_event, 0); ++ ++ ret = tdma_dev_init(rtdev, tdma); ++ if (ret < 0) ++ goto err_out1; ++ ++ ret = rtdm_task_init(&tdma->worker_task, "rtnet-tdma", tdma_worker, ++ tdma, DEF_WORKER_PRIO, 0); ++ if (ret != 0) ++ goto err_out2; ++ ++ return 0; ++ ++err_out2: ++ tdma_dev_release(tdma); ++ ++err_out1: ++ rtdm_event_destroy(&tdma->sync_event); ++ rtdm_event_destroy(&tdma->xmit_event); ++ rtdm_event_destroy(&tdma->worker_wakeup); ++ ++ return ret; ++} ++ ++int tdma_detach(struct rtnet_device *rtdev, void *priv) ++{ ++ struct tdma_priv *tdma = (struct tdma_priv *)priv; ++ struct tdma_job *job, *tmp; ++ ++ rtdm_event_destroy(&tdma->sync_event); ++ rtdm_event_destroy(&tdma->xmit_event); ++ rtdm_event_destroy(&tdma->worker_wakeup); ++ ++ tdma_dev_release(tdma); ++ ++ rtdm_task_destroy(&tdma->worker_task); ++ ++ list_for_each_entry_safe (job, tmp, &tdma->first_job->entry, entry) { ++ if (job->id >= 0) ++ tdma_cleanup_slot(tdma, SLOT_JOB(job)); ++ else if (job->id == XMIT_RPL_CAL) { ++ __list_del(job->entry.prev, job->entry.next); ++ kfree_rtskb(REPLY_CAL_JOB(job)->reply_rtskb); ++ } ++ } ++ ++ if (tdma->slot_table) ++ kfree(tdma->slot_table); ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_TDMA_MASTER ++ if (test_bit(TDMA_FLAG_MASTER, &tdma->flags)) ++ rtskb_pool_release(&tdma->cal_rtskb_pool); ++#endif ++ ++ return 0; ++} ++ ++#ifdef CONFIG_XENO_OPT_VFILE ++struct rtmac_proc_entry tdma_proc_entries[] = { ++ { name: "tdma", handler: tdma_proc_read }, ++ { name: "tdma_slots", handler: tdma_slots_proc_read }, ++}; ++#endif /* CONFIG_XENO_OPT_VFILE */ ++ ++struct rtmac_disc tdma_disc = { ++ name: "TDMA", ++ priv_size: sizeof(struct tdma_priv), ++ disc_type: __constant_htons(RTMAC_TYPE_TDMA), ++ ++ packet_rx: tdma_packet_rx, ++ rt_packet_tx: tdma_rt_packet_tx, ++ nrt_packet_tx: tdma_nrt_packet_tx, ++ ++ get_mtu: tdma_get_mtu, ++ ++ vnic_xmit: RTMAC_DEFAULT_VNIC, ++ ++ attach: tdma_attach, ++ detach: tdma_detach, ++ ++ ioctls: { ++ service_name: "RTmac/TDMA", ++ ioctl_type: RTNET_IOC_TYPE_RTMAC_TDMA, ++ handler: tdma_ioctl ++ }, ++ ++#ifdef CONFIG_XENO_OPT_VFILE ++ proc_entries: tdma_proc_entries, ++ nr_proc_entries: ARRAY_SIZE(tdma_proc_entries), ++#endif /* CONFIG_XENO_OPT_VFILE */ ++}; ++ ++int __init tdma_init(void) ++{ ++ int ret; ++ ++ printk("RTmac/TDMA: init time division multiple access control " ++ "mechanism\n"); ++ ++ ret = rtmac_disc_register(&tdma_disc); ++ if (ret < 0) ++ return ret; ++ ++ return 0; ++} ++ ++void tdma_release(void) ++{ ++ rtmac_disc_deregister(&tdma_disc); ++ ++ printk("RTmac/TDMA: unloaded\n"); ++} ++ ++module_init(tdma_init); ++module_exit(tdma_release); ++ ++MODULE_AUTHOR("Jan Kiszka"); ++MODULE_LICENSE("GPL"); +--- linux/drivers/xenomai/net/stack/rtmac/tdma/tdma_proto.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/rtmac/tdma/tdma_proto.c 2021-04-07 16:01:26.942634574 +0800 +@@ -0,0 +1,407 @@ ++/*** ++ * ++ * rtmac/tdma/tdma_proto.c ++ * ++ * RTmac - real-time networking media access control subsystem ++ * Copyright (C) 2002 Marc Kleine-Budde , ++ * 2003-2005 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#include ++#include "asm/div64.h" ++ ++#include ++#include ++#include ++ ++void tdma_xmit_sync_frame(struct tdma_priv *tdma) ++{ ++ struct rtnet_device *rtdev = tdma->rtdev; ++ struct rtskb *rtskb; ++ struct tdma_frm_sync *sync; ++ ++ rtskb = alloc_rtskb(rtdev->hard_header_len + sizeof(struct rtmac_hdr) + ++ sizeof(struct tdma_frm_sync) + 15, ++ &global_pool); ++ if (!rtskb) ++ goto err_out; ++ ++ rtskb_reserve(rtskb, ++ (rtdev->hard_header_len + sizeof(struct rtmac_hdr) + 15) & ++ ~15); ++ ++ sync = (struct tdma_frm_sync *)rtskb_put(rtskb, ++ sizeof(struct tdma_frm_sync)); ++ ++ if (rtmac_add_header(rtdev, rtdev->broadcast, rtskb, RTMAC_TYPE_TDMA, ++ 0) < 0) { ++ kfree_rtskb(rtskb); ++ goto err_out; ++ } ++ ++ sync->head.version = __constant_htons(TDMA_FRM_VERSION); ++ sync->head.id = __constant_htons(TDMA_FRM_SYNC); ++ ++ sync->cycle_no = htonl(tdma->current_cycle); ++ sync->xmit_stamp = tdma->clock_offset; ++ sync->sched_xmit_stamp = ++ cpu_to_be64(tdma->clock_offset + tdma->current_cycle_start); ++ ++ rtskb->xmit_stamp = &sync->xmit_stamp; ++ ++ rtmac_xmit(rtskb); ++ ++ /* signal local waiters */ ++ rtdm_event_pulse(&tdma->sync_event); ++ ++ return; ++ ++err_out: ++ /*ERROR*/ rtdm_printk("TDMA: Failed to transmit sync frame!\n"); ++ return; ++} ++ ++int tdma_xmit_request_cal_frame(struct tdma_priv *tdma, u32 reply_cycle, ++ u64 reply_slot_offset) ++{ ++ struct rtnet_device *rtdev = tdma->rtdev; ++ struct rtskb *rtskb; ++ struct tdma_frm_req_cal *req_cal; ++ int ret; ++ ++ rtskb = alloc_rtskb(rtdev->hard_header_len + sizeof(struct rtmac_hdr) + ++ sizeof(struct tdma_frm_req_cal) + 15, ++ &global_pool); ++ ret = -ENOMEM; ++ if (!rtskb) ++ goto err_out; ++ ++ rtskb_reserve(rtskb, ++ (rtdev->hard_header_len + sizeof(struct rtmac_hdr) + 15) & ++ ~15); ++ ++ req_cal = (struct tdma_frm_req_cal *)rtskb_put( ++ rtskb, sizeof(struct tdma_frm_req_cal)); ++ ++ if ((ret = rtmac_add_header(rtdev, tdma->master_hw_addr, rtskb, ++ RTMAC_TYPE_TDMA, 0)) < 0) { ++ kfree_rtskb(rtskb); ++ goto err_out; ++ } ++ ++ req_cal->head.version = __constant_htons(TDMA_FRM_VERSION); ++ req_cal->head.id = __constant_htons(TDMA_FRM_REQ_CAL); ++ ++ req_cal->xmit_stamp = 0; ++ req_cal->reply_cycle = htonl(reply_cycle); ++ req_cal->reply_slot_offset = cpu_to_be64(reply_slot_offset); ++ ++ rtskb->xmit_stamp = &req_cal->xmit_stamp; ++ ++ ret = rtmac_xmit(rtskb); ++ if (ret < 0) ++ goto err_out; ++ ++ return 0; ++ ++err_out: ++ /*ERROR*/ rtdm_printk("TDMA: Failed to transmit request calibration " ++ "frame!\n"); ++ return ret; ++} ++ ++int tdma_rt_packet_tx(struct rtskb *rtskb, struct rtnet_device *rtdev) ++{ ++ struct tdma_priv *tdma; ++ rtdm_lockctx_t context; ++ struct tdma_slot *slot; ++ int ret = 0; ++ ++ tdma = (struct tdma_priv *)rtdev->mac_priv->disc_priv; ++ ++ rtcap_mark_rtmac_enqueue(rtskb); ++ ++ rtdm_lock_get_irqsave(&tdma->lock, context); ++ ++ slot = tdma->slot_table[(rtskb->priority & RTSKB_CHANNEL_MASK) >> ++ RTSKB_CHANNEL_SHIFT]; ++ ++ if (unlikely(!slot)) { ++ ret = -EAGAIN; ++ goto err_out; ++ } ++ ++ if (unlikely(rtskb->len > slot->size)) { ++ ret = -EMSGSIZE; ++ goto err_out; ++ } ++ ++ __rtskb_prio_queue_tail(slot->queue, rtskb); ++ ++err_out: ++ rtdm_lock_put_irqrestore(&tdma->lock, context); ++ ++ return ret; ++} ++ ++int tdma_nrt_packet_tx(struct rtskb *rtskb) ++{ ++ struct tdma_priv *tdma; ++ rtdm_lockctx_t context; ++ struct tdma_slot *slot; ++ int ret = 0; ++ ++ tdma = (struct tdma_priv *)rtskb->rtdev->mac_priv->disc_priv; ++ ++ rtcap_mark_rtmac_enqueue(rtskb); ++ ++ rtskb->priority = RTSKB_PRIO_VALUE(QUEUE_MIN_PRIO, DEFAULT_NRT_SLOT); ++ ++ rtdm_lock_get_irqsave(&tdma->lock, context); ++ ++ slot = tdma->slot_table[DEFAULT_NRT_SLOT]; ++ ++ if (unlikely(!slot)) { ++ ret = -EAGAIN; ++ goto err_out; ++ } ++ ++ if (unlikely(rtskb->len > slot->size)) { ++ ret = -EMSGSIZE; ++ goto err_out; ++ } ++ ++ __rtskb_prio_queue_tail(slot->queue, rtskb); ++ ++err_out: ++ rtdm_lock_put_irqrestore(&tdma->lock, context); ++ ++ return ret; ++} ++ ++int tdma_packet_rx(struct rtskb *rtskb) ++{ ++ struct tdma_priv *tdma; ++ struct tdma_frm_head *head; ++ u64 delay; ++ u64 cycle_start; ++ nanosecs_rel_t clock_offset; ++ struct rt_proc_call *call; ++ struct tdma_request_cal *req_cal_job; ++ rtdm_lockctx_t context; ++#ifdef CONFIG_XENO_DRIVERS_NET_TDMA_MASTER ++ struct rtskb *reply_rtskb; ++ struct rtnet_device *rtdev; ++ struct tdma_frm_rpl_cal *rpl_cal_frm; ++ struct tdma_reply_cal *rpl_cal_job; ++ struct tdma_job *job; ++#endif ++ ++ tdma = (struct tdma_priv *)rtskb->rtdev->mac_priv->disc_priv; ++ ++ head = (struct tdma_frm_head *)rtskb->data; ++ ++ if (head->version != __constant_htons(TDMA_FRM_VERSION)) ++ goto kfree_out; ++ ++ switch (head->id) { ++ case __constant_htons(TDMA_FRM_SYNC): ++ rtskb_pull(rtskb, sizeof(struct tdma_frm_sync)); ++ ++ /* see "Time Arithmetics" in the TDMA specification */ ++ clock_offset = be64_to_cpu(SYNC_FRM(head)->xmit_stamp) + ++ tdma->master_packet_delay_ns; ++ clock_offset -= rtskb->time_stamp; ++ ++ cycle_start = be64_to_cpu(SYNC_FRM(head)->sched_xmit_stamp) - ++ clock_offset; ++ ++ rtdm_lock_get_irqsave(&tdma->lock, context); ++ tdma->current_cycle = ntohl(SYNC_FRM(head)->cycle_no); ++ tdma->current_cycle_start = cycle_start; ++ tdma->clock_offset = clock_offset; ++ rtdm_lock_put_irqrestore(&tdma->lock, context); ++ ++ /* note: Ethernet-specific! */ ++ memcpy(tdma->master_hw_addr, rtskb->mac.ethernet->h_source, ++ ETH_ALEN); ++ ++ set_bit(TDMA_FLAG_RECEIVED_SYNC, &tdma->flags); ++ ++ rtdm_event_pulse(&tdma->sync_event); ++ break; ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_TDMA_MASTER ++ case __constant_htons(TDMA_FRM_REQ_CAL): ++ RTNET_ASSERT(test_bit(TDMA_FLAG_MASTER, &tdma->flags) && ++ test_bit(TDMA_FLAG_CALIBRATED, ++ &tdma->flags), ++ break;); ++ ++ rtskb_pull(rtskb, sizeof(struct tdma_frm_req_cal)); ++ ++ rtdev = rtskb->rtdev; ++ ++ reply_rtskb = alloc_rtskb( ++ rtdev->hard_header_len + sizeof(struct rtmac_hdr) + ++ sizeof(struct tdma_frm_rpl_cal) + 15, ++ &tdma->cal_rtskb_pool); ++ if (unlikely(!reply_rtskb)) { ++ /*ERROR*/ rtdm_printk( ++ "TDMA: Too many calibration requests " ++ "pending!\n"); ++ break; ++ } ++ ++ rtskb_reserve(reply_rtskb, (rtdev->hard_header_len + ++ sizeof(struct rtmac_hdr) + 15) & ++ ~15); ++ ++ rpl_cal_frm = (struct tdma_frm_rpl_cal *)rtskb_put( ++ reply_rtskb, sizeof(struct tdma_frm_rpl_cal)); ++ ++ /* note: Ethernet-specific! */ ++ if (unlikely(rtmac_add_header( ++ rtdev, rtskb->mac.ethernet->h_source, ++ reply_rtskb, RTMAC_TYPE_TDMA, 0) < 0)) { ++ kfree_rtskb(reply_rtskb); ++ break; ++ } ++ ++ rpl_cal_frm->head.version = __constant_htons(TDMA_FRM_VERSION); ++ rpl_cal_frm->head.id = __constant_htons(TDMA_FRM_RPL_CAL); ++ ++ rpl_cal_frm->request_xmit_stamp = REQ_CAL_FRM(head)->xmit_stamp; ++ rpl_cal_frm->reception_stamp = cpu_to_be64(rtskb->time_stamp); ++ rpl_cal_frm->xmit_stamp = 0; ++ ++ reply_rtskb->xmit_stamp = &rpl_cal_frm->xmit_stamp; ++ ++ /* use reply_rtskb memory behind the frame as job buffer */ ++ rpl_cal_job = (struct tdma_reply_cal *)reply_rtskb->tail; ++ RTNET_ASSERT(reply_rtskb->tail + ++ sizeof(struct tdma_reply_cal) <= ++ reply_rtskb->buf_end, ++ rtskb_over_panic(reply_rtskb, ++ sizeof(struct tdma_reply_cal), ++ current_text_addr());); ++ ++ rpl_cal_job->head.id = XMIT_RPL_CAL; ++ rpl_cal_job->head.ref_count = 0; ++ rpl_cal_job->reply_cycle = ++ ntohl(REQ_CAL_FRM(head)->reply_cycle); ++ rpl_cal_job->reply_rtskb = reply_rtskb; ++ rpl_cal_job->reply_offset = ++ be64_to_cpu(REQ_CAL_FRM(head)->reply_slot_offset); ++ ++ rtdm_lock_get_irqsave(&tdma->lock, context); ++ ++ job = tdma->current_job; ++ while (1) { ++ job = list_entry(job->entry.prev, struct tdma_job, ++ entry); ++ if ((job == tdma->first_job) || ++ ((job->id >= 0) && (SLOT_JOB(job)->offset < ++ rpl_cal_job->reply_offset)) || ++ ((job->id == XMIT_RPL_CAL) && ++ (REPLY_CAL_JOB(job)->reply_offset < ++ rpl_cal_job->reply_offset))) ++ break; ++ } ++ list_add(&rpl_cal_job->head.entry, &job->entry); ++ tdma->job_list_revision++; ++ ++ rtdm_lock_put_irqrestore(&tdma->lock, context); ++ ++ break; ++#endif ++ ++ case __constant_htons(TDMA_FRM_RPL_CAL): ++ rtskb_pull(rtskb, sizeof(struct tdma_frm_rpl_cal)); ++ ++ /* see "Time Arithmetics" in the TDMA specification */ ++ delay = (rtskb->time_stamp - ++ be64_to_cpu(RPL_CAL_FRM(head)->request_xmit_stamp)) - ++ (be64_to_cpu(RPL_CAL_FRM(head)->xmit_stamp) - ++ be64_to_cpu(RPL_CAL_FRM(head)->reception_stamp)); ++ delay = (delay + 1) >> 1; ++ ++ rtdm_lock_get_irqsave(&tdma->lock, context); ++ ++ call = tdma->calibration_call; ++ if (call == NULL) { ++ rtdm_lock_put_irqrestore(&tdma->lock, context); ++ break; ++ } ++ req_cal_job = rtpc_get_priv(call, struct tdma_request_cal); ++ ++ req_cal_job->result_buffer[--req_cal_job->cal_rounds] = delay; ++ ++ if (req_cal_job->cal_rounds > 0) { ++ tdma->job_list_revision++; ++ list_add(&req_cal_job->head.entry, ++ &tdma->first_job->entry); ++ ++ rtdm_lock_put_irqrestore(&tdma->lock, context); ++ ++ } else { ++ tdma->calibration_call = NULL; ++ ++ rtdm_lock_put_irqrestore(&tdma->lock, context); ++ ++ rtpc_complete_call(call, 0); ++ } ++ ++ break; ++ ++ default: ++ /*ERROR*/ rtdm_printk("TDMA: Unknown frame %d!\n", ++ ntohs(head->id)); ++ } ++ ++kfree_out: ++ kfree_rtskb(rtskb); ++ return 0; ++} ++ ++unsigned int tdma_get_mtu(struct rtnet_device *rtdev, unsigned int priority) ++{ ++ struct tdma_priv *tdma; ++ rtdm_lockctx_t context; ++ struct tdma_slot *slot; ++ unsigned int mtu; ++ ++ tdma = (struct tdma_priv *)rtdev->mac_priv->disc_priv; ++ ++ rtdm_lock_get_irqsave(&tdma->lock, context); ++ ++ slot = tdma->slot_table[(priority & RTSKB_CHANNEL_MASK) >> ++ RTSKB_CHANNEL_SHIFT]; ++ ++ if (unlikely(!slot)) { ++ mtu = rtdev->mtu; ++ goto out; ++ } ++ ++ mtu = slot->mtu; ++ ++out: ++ rtdm_lock_put_irqrestore(&tdma->lock, context); ++ ++ return mtu; ++} +--- linux/drivers/xenomai/net/stack/rtmac/tdma/tdma_dev.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/rtmac/tdma/tdma_dev.c 2021-04-07 16:01:26.937634581 +0800 +@@ -0,0 +1,186 @@ ++/*** ++ * ++ * rtmac/tdma/tdma_dev.c ++ * ++ * RTmac - real-time networking media access control subsystem ++ * Copyright (C) 2002 Marc Kleine-Budde ++ * 2003-2006 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#include ++ ++#include ++#include ++#include ++ ++struct tdma_dev_ctx { ++ rtdm_task_t *cycle_waiter; ++}; ++ ++static int tdma_dev_open(struct rtdm_fd *fd, int oflags) ++{ ++ struct tdma_dev_ctx *ctx = rtdm_fd_to_private(fd); ++ ++ ctx->cycle_waiter = NULL; ++ ++ return 0; ++} ++ ++static void tdma_dev_close(struct rtdm_fd *fd) ++{ ++ struct tdma_dev_ctx *ctx = rtdm_fd_to_private(fd); ++ rtdm_lockctx_t lock_ctx; ++ ++ cobalt_atomic_enter(lock_ctx); ++ if (ctx->cycle_waiter) ++ rtdm_task_unblock(ctx->cycle_waiter); ++ cobalt_atomic_leave(lock_ctx); ++} ++ ++static int wait_on_sync(struct tdma_dev_ctx *tdma_ctx, rtdm_event_t *sync_event) ++{ ++ rtdm_lockctx_t lock_ctx; ++ int ret; ++ ++ cobalt_atomic_enter(lock_ctx); ++ /* keep it simple: only one waiter per device instance allowed */ ++ if (!tdma_ctx->cycle_waiter) { ++ tdma_ctx->cycle_waiter = rtdm_task_current(); ++ ret = rtdm_event_wait(sync_event); ++ tdma_ctx->cycle_waiter = NULL; ++ } else ++ ret = -EBUSY; ++ cobalt_atomic_leave(lock_ctx); ++ ++ return ret; ++} ++ ++static int tdma_dev_ioctl(struct rtdm_fd *fd, unsigned int request, void *arg) ++{ ++ struct tdma_dev_ctx *ctx = rtdm_fd_to_private(fd); ++ struct tdma_priv *tdma; ++ rtdm_lockctx_t lock_ctx; ++ int ret; ++ ++ tdma = container_of(rtdm_fd_to_context(fd)->device, struct tdma_priv, ++ api_device); ++ ++ switch (request) { ++ case RTMAC_RTIOC_TIMEOFFSET: { ++ nanosecs_rel_t offset; ++ ++ rtdm_lock_get_irqsave(&tdma->lock, lock_ctx); ++ offset = tdma->clock_offset; ++ rtdm_lock_put_irqrestore(&tdma->lock, lock_ctx); ++ ++ if (rtdm_fd_is_user(fd)) { ++ if (!rtdm_rw_user_ok(fd, arg, sizeof(__s64)) || ++ rtdm_copy_to_user(fd, arg, &offset, sizeof(__s64))) ++ return -EFAULT; ++ } else ++ *(__s64 *)arg = offset; ++ ++ return 0; ++ } ++ case RTMAC_RTIOC_WAITONCYCLE: ++ if (!rtdm_in_rt_context()) ++ return -ENOSYS; ++ ++ if ((long)arg != TDMA_WAIT_ON_SYNC) ++ return -EINVAL; ++ ++ return wait_on_sync(ctx, &tdma->sync_event); ++ ++ case RTMAC_RTIOC_WAITONCYCLE_EX: { ++ struct rtmac_waitinfo *waitinfo = (struct rtmac_waitinfo *)arg; ++ struct rtmac_waitinfo waitinfo_buf; ++ ++#define WAITINFO_HEAD_SIZE \ ++ ((char *)&waitinfo_buf.cycle_no - (char *)&waitinfo_buf) ++ ++ if (!rtdm_in_rt_context()) ++ return -ENOSYS; ++ ++ if (rtdm_fd_is_user(fd)) { ++ if (!rtdm_rw_user_ok(fd, waitinfo, ++ sizeof(struct rtmac_waitinfo)) || ++ rtdm_copy_from_user(fd, &waitinfo_buf, arg, ++ WAITINFO_HEAD_SIZE)) ++ return -EFAULT; ++ ++ waitinfo = &waitinfo_buf; ++ } ++ ++ if ((waitinfo->type != TDMA_WAIT_ON_SYNC) || ++ (waitinfo->size < sizeof(struct rtmac_waitinfo))) ++ return -EINVAL; ++ ++ ret = wait_on_sync(ctx, &tdma->sync_event); ++ if (ret) ++ return ret; ++ ++ rtdm_lock_get_irqsave(&tdma->lock, lock_ctx); ++ waitinfo->cycle_no = tdma->current_cycle; ++ waitinfo->cycle_start = tdma->current_cycle_start; ++ waitinfo->clock_offset = tdma->clock_offset; ++ rtdm_lock_put_irqrestore(&tdma->lock, lock_ctx); ++ ++ if (rtdm_fd_is_user(fd)) { ++ if (rtdm_copy_to_user(fd, arg, &waitinfo_buf, ++ sizeof(struct rtmac_waitinfo))) ++ return -EFAULT; ++ } ++ ++ return 0; ++ } ++ default: ++ return -ENOTTY; ++ } ++} ++ ++static struct rtdm_driver tdma_driver = { .profile_info = RTDM_PROFILE_INFO( ++ tdma, RTDM_CLASS_RTMAC, ++ RTDM_SUBCLASS_TDMA, ++ RTNET_RTDM_VER), ++ .device_flags = RTDM_NAMED_DEVICE, ++ .device_count = 1, ++ .context_size = ++ sizeof(struct tdma_dev_ctx), ++ .ops = { ++ .open = tdma_dev_open, ++ .ioctl_rt = tdma_dev_ioctl, ++ .ioctl_nrt = tdma_dev_ioctl, ++ .close = tdma_dev_close, ++ } }; ++ ++int tdma_dev_init(struct rtnet_device *rtdev, struct tdma_priv *tdma) ++{ ++ char *pos; ++ ++ strcpy(tdma->device_name, "TDMA"); ++ for (pos = rtdev->name + strlen(rtdev->name) - 1; ++ (pos >= rtdev->name) && ((*pos) >= '0') && (*pos <= '9'); pos--) ++ ; ++ strncat(tdma->device_name + 4, pos + 1, IFNAMSIZ - 4); ++ ++ tdma->api_driver = tdma_driver; ++ tdma->api_device.driver = &tdma->api_driver; ++ tdma->api_device.label = tdma->device_name; ++ ++ return rtdm_dev_register(&tdma->api_device); ++} +--- linux/drivers/xenomai/net/stack/rtmac/Kconfig 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/rtmac/Kconfig 2021-04-07 16:01:26.932634588 +0800 +@@ -0,0 +1,16 @@ ++menuconfig XENO_DRIVERS_NET_RTMAC ++ depends on XENO_DRIVERS_NET ++ tristate "RTmac Layer" ++ default y ++ ---help--- ++ The Real-Time Media Access Control layer allows to extend the RTnet ++ stack with software-based access control mechanisms (also called ++ disciplines) for nondeterministic transport media. Disciplines can be ++ attached and detached per real-time device. RTmac also provides a ++ framework for tunnelling non-time-critical packets through real-time ++ networks by installing virtual NICs (VNIC) in the Linux domain. ++ ++ See Documentation/README.rtmac for further information. ++ ++source "drivers/xenomai/net/stack/rtmac/tdma/Kconfig" ++source "drivers/xenomai/net/stack/rtmac/nomac/Kconfig" +--- linux/drivers/xenomai/net/stack/rtmac/rtmac_proc.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/rtmac/rtmac_proc.c 2021-04-07 16:01:26.928634593 +0800 +@@ -0,0 +1,132 @@ ++/*** ++ * ++ * rtmac_proc.c ++ * ++ * rtmac - real-time networking medium access control subsystem ++ * Copyright (C) 2002 Marc Kleine-Budde ++ * 2004 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#include ++ ++#include ++#include ++#include ++#include ++ ++#ifdef CONFIG_XENO_OPT_VFILE ++struct xnvfile_directory rtmac_proc_root; ++ ++static struct xnvfile_regular_ops rtnet_rtmac_disciplines_vfile_ops = { ++ .show = rtnet_rtmac_disciplines_show, ++}; ++ ++static struct xnvfile_regular rtnet_rtmac_disciplines_vfile = { ++ .ops = &rtnet_rtmac_disciplines_vfile_ops, ++}; ++ ++static struct xnvfile_regular_ops rtnet_rtmac_vnics_vfile_ops = { ++ .show = rtnet_rtmac_vnics_show, ++}; ++ ++static struct xnvfile_regular rtnet_rtmac_vnics_vfile = { ++ .ops = &rtnet_rtmac_vnics_vfile_ops, ++}; ++ ++static int rtnet_rtmac_disc_show(struct xnvfile_regular_iterator *it, ++ void *data) ++{ ++ struct rtmac_proc_entry *entry; ++ entry = container_of(it->vfile, struct rtmac_proc_entry, vfile); ++ return entry->handler(it, data); ++} ++ ++static struct xnvfile_regular_ops rtnet_rtmac_disc_vfile_ops = { ++ .show = rtnet_rtmac_disc_show, ++}; ++ ++int rtmac_disc_proc_register(struct rtmac_disc *disc) ++{ ++ int i, err; ++ struct rtmac_proc_entry *entry; ++ ++ for (i = 0; i < disc->nr_proc_entries; i++) { ++ entry = &disc->proc_entries[i]; ++ ++ entry->vfile.ops = &rtnet_rtmac_disc_vfile_ops; ++ err = xnvfile_init_regular(entry->name, &entry->vfile, ++ &rtmac_proc_root); ++ if (err < 0) { ++ while (--i >= 0) ++ xnvfile_destroy_regular( ++ &disc->proc_entries[i].vfile); ++ return err; ++ } ++ } ++ ++ return 0; ++} ++ ++void rtmac_disc_proc_unregister(struct rtmac_disc *disc) ++{ ++ int i; ++ ++ for (i = 0; i < disc->nr_proc_entries; i++) ++ xnvfile_destroy_regular(&disc->proc_entries[i].vfile); ++} ++ ++int rtmac_proc_register(void) ++{ ++ int err; ++ ++ err = xnvfile_init_dir("rtmac", &rtmac_proc_root, &rtnet_proc_root); ++ if (err < 0) ++ goto err1; ++ ++ err = xnvfile_init_regular("disciplines", ++ &rtnet_rtmac_disciplines_vfile, ++ &rtmac_proc_root); ++ if (err < 0) ++ goto err2; ++ ++ err = xnvfile_init_regular("vnics", &rtnet_rtmac_vnics_vfile, ++ &rtmac_proc_root); ++ if (err < 0) ++ goto err3; ++ ++ return 0; ++ ++err3: ++ xnvfile_destroy_regular(&rtnet_rtmac_disciplines_vfile); ++ ++err2: ++ xnvfile_destroy_dir(&rtmac_proc_root); ++ ++err1: ++ /*ERRMSG*/ printk("RTmac: unable to initialize /proc entries\n"); ++ return err; ++} ++ ++void rtmac_proc_release(void) ++{ ++ xnvfile_destroy_regular(&rtnet_rtmac_vnics_vfile); ++ xnvfile_destroy_regular(&rtnet_rtmac_disciplines_vfile); ++ xnvfile_destroy_dir(&rtmac_proc_root); ++} ++ ++#endif /* CONFIG_XENO_OPT_VFILE */ +--- linux/drivers/xenomai/net/stack/rtmac/rtmac_disc.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/rtmac/rtmac_disc.c 2021-04-07 16:01:26.923634601 +0800 +@@ -0,0 +1,271 @@ ++/*** ++ * ++ * rtmac_disc.c ++ * ++ * rtmac - real-time networking media access control subsystem ++ * Copyright (C) 2002 Marc Kleine-Budde , ++ * 2003, 2004 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++ ++static DEFINE_MUTEX(disc_list_lock); ++static LIST_HEAD(disc_list); ++ ++/*** ++ * rtmac_disc_attach ++ * ++ * @rtdev attaches a discipline to a device ++ * @disc discipline to attach ++ * ++ * 0 success ++ * -EBUSY other discipline active ++ * -ENOMEM could not allocate memory ++ * ++ * Note: must be called with rtdev->nrt_lock acquired ++ */ ++int rtmac_disc_attach(struct rtnet_device *rtdev, struct rtmac_disc *disc) ++{ ++ int ret; ++ struct rtmac_priv *priv; ++ ++ RTNET_ASSERT(rtdev != NULL, return -EINVAL;); ++ RTNET_ASSERT(disc != NULL, return -EINVAL;); ++ RTNET_ASSERT(disc->attach != NULL, return -EINVAL;); ++ ++ if (rtdev->mac_disc) { ++ printk("RTmac: another discipline for rtdev '%s' active.\n", ++ rtdev->name); ++ return -EBUSY; ++ } ++ ++ if (rtdev->flags & IFF_LOOPBACK) ++ return -EINVAL; ++ ++ if (!try_module_get(disc->owner)) ++ return -EIDRM; ++ ++ if (!rtdev_reference(rtdev)) { ++ ret = -EIDRM; ++ goto err_module_put; ++ } ++ ++ /* alloc memory */ ++ priv = kmalloc(sizeof(struct rtmac_priv) + disc->priv_size, GFP_KERNEL); ++ if (!priv) { ++ printk("RTmac: kmalloc returned NULL for rtmac!\n"); ++ return -ENOMEM; ++ } ++ priv->orig_start_xmit = rtdev->start_xmit; ++ ++ /* call attach function of discipline */ ++ ret = disc->attach(rtdev, priv->disc_priv); ++ if (ret < 0) ++ goto err_kfree_priv; ++ ++ /* now attach RTmac to device */ ++ rtdev->mac_disc = disc; ++ rtdev->mac_priv = priv; ++ rtdev->start_xmit = disc->rt_packet_tx; ++ if (disc->get_mtu) ++ rtdev->get_mtu = disc->get_mtu; ++ rtdev->mac_detach = rtmac_disc_detach; ++ ++ /* create the VNIC */ ++ ret = rtmac_vnic_add(rtdev, disc->vnic_xmit); ++ if (ret < 0) { ++ printk("RTmac: Warning, VNIC creation failed for rtdev %s.\n", ++ rtdev->name); ++ goto err_disc_detach; ++ } ++ ++ return 0; ++ ++err_disc_detach: ++ disc->detach(rtdev, priv->disc_priv); ++err_kfree_priv: ++ kfree(priv); ++ rtdev_dereference(rtdev); ++err_module_put: ++ module_put(disc->owner); ++ return ret; ++} ++ ++/*** ++ * rtmac_disc_detach ++ * ++ * @rtdev detaches a discipline from a device ++ * ++ * 0 success ++ * -1 discipline has no detach function ++ * -EINVAL called with rtdev=NULL ++ * -ENODEV no discipline active on dev ++ * ++ * Note: must be called with rtdev->nrt_lock acquired ++ */ ++int rtmac_disc_detach(struct rtnet_device *rtdev) ++{ ++ int ret; ++ struct rtmac_disc *disc; ++ struct rtmac_priv *priv; ++ ++ RTNET_ASSERT(rtdev != NULL, return -EINVAL;); ++ ++ disc = rtdev->mac_disc; ++ if (!disc) ++ return -ENODEV; ++ ++ RTNET_ASSERT(disc->detach != NULL, return -EINVAL;); ++ ++ priv = rtdev->mac_priv; ++ RTNET_ASSERT(priv != NULL, return -EINVAL;); ++ ++ ret = rtmac_vnic_unregister(rtdev); ++ if (ret < 0) ++ return ret; ++ ++ /* call release function of discipline */ ++ ret = disc->detach(rtdev, priv->disc_priv); ++ if (ret < 0) ++ return ret; ++ ++ rtmac_vnic_cleanup(rtdev); ++ ++ /* restore start_xmit and get_mtu */ ++ rtdev->start_xmit = priv->orig_start_xmit; ++ rtdev->get_mtu = rt_hard_mtu; ++ ++ /* remove pointers from rtdev */ ++ rtdev->mac_disc = NULL; ++ rtdev->mac_priv = NULL; ++ rtdev->mac_detach = NULL; ++ ++ rtdev_dereference(rtdev); ++ ++ kfree(priv); ++ ++ module_put(disc->owner); ++ ++ return 0; ++} ++ ++static struct rtmac_disc *rtmac_get_disc_by_name(const char *name) ++{ ++ struct list_head *disc; ++ ++ mutex_lock(&disc_list_lock); ++ ++ list_for_each (disc, &disc_list) { ++ if (strcmp(((struct rtmac_disc *)disc)->name, name) == 0) { ++ mutex_unlock(&disc_list_lock); ++ return (struct rtmac_disc *)disc; ++ } ++ } ++ ++ mutex_unlock(&disc_list_lock); ++ ++ return NULL; ++} ++ ++int __rtmac_disc_register(struct rtmac_disc *disc, struct module *module) ++{ ++ int ret; ++ ++ RTNET_ASSERT(disc != NULL, return -EINVAL;); ++ RTNET_ASSERT(disc->name != NULL, return -EINVAL;); ++ RTNET_ASSERT(disc->rt_packet_tx != NULL, return -EINVAL;); ++ RTNET_ASSERT(disc->nrt_packet_tx != NULL, return -EINVAL;); ++ RTNET_ASSERT(disc->attach != NULL, return -EINVAL;); ++ RTNET_ASSERT(disc->detach != NULL, return -EINVAL;); ++ ++ disc->owner = module; ++ ++ if (rtmac_get_disc_by_name(disc->name) != NULL) { ++ printk("RTmac: discipline '%s' already registered!\n", ++ disc->name); ++ return -EBUSY; ++ } ++ ++ ret = rtnet_register_ioctls(&disc->ioctls); ++ if (ret < 0) ++ return ret; ++ ++#ifdef CONFIG_XENO_OPT_VFILE ++ ret = rtmac_disc_proc_register(disc); ++ if (ret < 0) { ++ rtnet_unregister_ioctls(&disc->ioctls); ++ return ret; ++ } ++#endif /* CONFIG_XENO_OPT_VFILE */ ++ ++ mutex_lock(&disc_list_lock); ++ ++ list_add(&disc->list, &disc_list); ++ ++ mutex_unlock(&disc_list_lock); ++ ++ return 0; ++} ++ ++void rtmac_disc_deregister(struct rtmac_disc *disc) ++{ ++ RTNET_ASSERT(disc != NULL, return;); ++ ++ mutex_lock(&disc_list_lock); ++ ++ list_del(&disc->list); ++ ++ mutex_unlock(&disc_list_lock); ++ ++ rtnet_unregister_ioctls(&disc->ioctls); ++ ++#ifdef CONFIG_XENO_OPT_VFILE ++ rtmac_disc_proc_unregister(disc); ++#endif /* CONFIG_XENO_OPT_VFILE */ ++} ++ ++#ifdef CONFIG_XENO_OPT_VFILE ++int rtnet_rtmac_disciplines_show(struct xnvfile_regular_iterator *it, void *d) ++{ ++ struct rtmac_disc *disc; ++ int err; ++ ++ err = mutex_lock_interruptible(&disc_list_lock); ++ if (err < 0) ++ return err; ++ ++ xnvfile_printf(it, "Name\t\tID\n"); ++ ++ list_for_each_entry (disc, &disc_list, list) ++ xnvfile_printf(it, "%-15s %04X\n", disc->name, ++ ntohs(disc->disc_type)); ++ ++ mutex_unlock(&disc_list_lock); ++ ++ return 0; ++} ++#endif /* CONFIG_XENO_OPT_VFILE */ +--- linux/drivers/xenomai/net/stack/rtmac/rtmac_proto.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/rtmac/rtmac_proto.c 2021-04-07 16:01:26.918634608 +0800 +@@ -0,0 +1,68 @@ ++/*** ++ * ++ * rtmac/rtmac_proto.c ++ * ++ * rtmac - real-time networking media access control subsystem ++ * Copyright (C) 2002 Marc Kleine-Budde , ++ * 2003-2005 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++int rtmac_proto_rx(struct rtskb *skb, struct rtpacket_type *pt) ++{ ++ struct rtmac_disc *disc = skb->rtdev->mac_disc; ++ struct rtmac_hdr *hdr; ++ ++ if (disc == NULL) { ++ goto error; ++ } ++ ++ hdr = (struct rtmac_hdr *)skb->data; ++ rtskb_pull(skb, sizeof(struct rtmac_hdr)); ++ ++ if (hdr->ver != RTMAC_VERSION) { ++ rtdm_printk( ++ "RTmac: received unsupported RTmac protocol version on " ++ "device %s. Got 0x%x but expected 0x%x\n", ++ skb->rtdev->name, hdr->ver, RTMAC_VERSION); ++ goto error; ++ } ++ ++ if (hdr->flags & RTMAC_FLAG_TUNNEL) ++ rtmac_vnic_rx(skb, hdr->type); ++ else if (disc->disc_type == hdr->type) ++ disc->packet_rx(skb); ++ return 0; ++ ++error: ++ kfree_rtskb(skb); ++ return 0; ++} ++ ++struct rtpacket_type rtmac_packet_type = { .type = __constant_htons(ETH_RTMAC), ++ .handler = rtmac_proto_rx }; ++ ++void rtmac_proto_release(void) ++{ ++ rtdev_remove_pack(&rtmac_packet_type); ++} +--- linux/drivers/xenomai/net/stack/rtmac/rtmac_module.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/rtmac/rtmac_module.c 2021-04-07 16:01:26.914634613 +0800 +@@ -0,0 +1,80 @@ ++/* rtmac_module.c ++ * ++ * rtmac - real-time networking media access control subsystem ++ * Copyright (C) 2002 Marc Kleine-Budde , ++ * 2003 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ */ ++ ++#include ++#include ++#include ++ ++#include ++ ++#include ++#include ++#include ++#include ++ ++int __init rtmac_init(void) ++{ ++ int ret = 0; ++ ++ printk("RTmac: init realtime media access control\n"); ++ ++#ifdef CONFIG_XENO_OPT_VFILE ++ ret = rtmac_proc_register(); ++ if (ret < 0) ++ return ret; ++#endif ++ ++ ret = rtmac_vnic_module_init(); ++ if (ret < 0) ++ goto error1; ++ ++ ret = rtmac_proto_init(); ++ if (ret < 0) ++ goto error2; ++ ++ return 0; ++ ++error2: ++ rtmac_vnic_module_cleanup(); ++ ++error1: ++#ifdef CONFIG_XENO_OPT_VFILE ++ rtmac_proc_release(); ++#endif ++ return ret; ++} ++ ++void rtmac_release(void) ++{ ++ rtmac_proto_release(); ++ rtmac_vnic_module_cleanup(); ++#ifdef CONFIG_XENO_OPT_VFILE ++ rtmac_proc_release(); ++#endif ++ ++ printk("RTmac: unloaded\n"); ++} ++ ++module_init(rtmac_init); ++module_exit(rtmac_release); ++ ++MODULE_AUTHOR("Marc Kleine-Budde, Jan Kiszka"); ++MODULE_LICENSE("GPL"); +--- linux/drivers/xenomai/net/stack/rtmac/rtmac_syms.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/rtmac/rtmac_syms.c 2021-04-07 16:01:26.909634620 +0800 +@@ -0,0 +1,36 @@ ++/* rtmac_syms.c ++ * ++ * rtmac - real-time networking media access control subsystem ++ * Copyright (C) 2002 Marc Kleine-Budde ++ * 2003 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ */ ++ ++#include ++#include ++ ++#include ++#include ++ ++EXPORT_SYMBOL_GPL(__rtmac_disc_register); ++EXPORT_SYMBOL_GPL(rtmac_disc_deregister); ++ ++EXPORT_SYMBOL_GPL(rtmac_disc_attach); ++EXPORT_SYMBOL_GPL(rtmac_disc_detach); ++ ++EXPORT_SYMBOL_GPL(rtmac_vnic_set_max_mtu); ++ ++EXPORT_SYMBOL_GPL(rtmac_vnic_xmit); +--- linux/drivers/xenomai/net/stack/rtdev.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/rtdev.c 2021-04-07 16:01:26.905634626 +0800 +@@ -0,0 +1,940 @@ ++/*** ++ * ++ * stack/rtdev.c - NIC device driver layer ++ * ++ * Copyright (C) 1999 Lineo, Inc ++ * 1999, 2002 David A. Schleef ++ * 2002 Ulrich Marx ++ * 2003-2005 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#include ++#include ++#include /* ARPHRD_ETHER */ ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++ ++static unsigned int device_rtskbs = DEFAULT_DEVICE_RTSKBS; ++module_param(device_rtskbs, uint, 0444); ++MODULE_PARM_DESC(device_rtskbs, "Number of additional global realtime socket " ++ "buffers per network adapter"); ++ ++struct rtnet_device *rtnet_devices[MAX_RT_DEVICES]; ++static struct rtnet_device *loopback_device; ++static DEFINE_RTDM_LOCK(rtnet_devices_rt_lock); ++static LIST_HEAD(rtskb_mapped_list); ++static LIST_HEAD(rtskb_mapwait_list); ++ ++LIST_HEAD(event_hook_list); ++DEFINE_MUTEX(rtnet_devices_nrt_lock); ++ ++static int rtdev_locked_xmit(struct rtskb *skb, struct rtnet_device *rtdev); ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 19, 0) ++#define atomic_fetch_add_unless __atomic_add_unless ++#endif ++ ++int rtdev_reference(struct rtnet_device *rtdev) ++{ ++ smp_mb__before_atomic(); ++ if (rtdev->rt_owner && ++ atomic_fetch_add_unless(&rtdev->refcount, 1, 0) == 0) { ++ if (!try_module_get(rtdev->rt_owner)) ++ return 0; ++ if (atomic_inc_return(&rtdev->refcount) != 1) ++ module_put(rtdev->rt_owner); ++ } ++ return 1; ++} ++EXPORT_SYMBOL_GPL(rtdev_reference); ++ ++struct rtskb *rtnetdev_alloc_rtskb(struct rtnet_device *rtdev, ++ unsigned int size) ++{ ++ struct rtskb *rtskb = alloc_rtskb(size, &rtdev->dev_pool); ++ if (rtskb) ++ rtskb->rtdev = rtdev; ++ return rtskb; ++} ++EXPORT_SYMBOL_GPL(rtnetdev_alloc_rtskb); ++ ++/*** ++ * __rtdev_get_by_name - find a rtnet_device by its name ++ * @name: name to find ++ * @note: caller must hold rtnet_devices_nrt_lock ++ */ ++static struct rtnet_device *__rtdev_get_by_name(const char *name) ++{ ++ int i; ++ struct rtnet_device *rtdev; ++ ++ for (i = 0; i < MAX_RT_DEVICES; i++) { ++ rtdev = rtnet_devices[i]; ++ if ((rtdev != NULL) && ++ (strncmp(rtdev->name, name, IFNAMSIZ) == 0)) ++ return rtdev; ++ } ++ return NULL; ++} ++ ++/*** ++ * rtdev_get_by_name - find and lock a rtnet_device by its name ++ * @name: name to find ++ */ ++struct rtnet_device *rtdev_get_by_name(const char *name) ++{ ++ struct rtnet_device *rtdev; ++ rtdm_lockctx_t context; ++ ++ rtdm_lock_get_irqsave(&rtnet_devices_rt_lock, context); ++ ++ rtdev = __rtdev_get_by_name(name); ++ if (rtdev != NULL && !rtdev_reference(rtdev)) ++ rtdev = NULL; ++ ++ rtdm_lock_put_irqrestore(&rtnet_devices_rt_lock, context); ++ ++ return rtdev; ++} ++ ++/*** ++ * rtdev_get_by_index - find and lock a rtnet_device by its ifindex ++ * @ifindex: index of device ++ */ ++struct rtnet_device *rtdev_get_by_index(int ifindex) ++{ ++ struct rtnet_device *rtdev; ++ rtdm_lockctx_t context; ++ ++ if ((ifindex <= 0) || (ifindex > MAX_RT_DEVICES)) ++ return NULL; ++ ++ rtdm_lock_get_irqsave(&rtnet_devices_rt_lock, context); ++ ++ rtdev = __rtdev_get_by_index(ifindex); ++ if (rtdev != NULL && !rtdev_reference(rtdev)) ++ rtdev = NULL; ++ ++ rtdm_lock_put_irqrestore(&rtnet_devices_rt_lock, context); ++ ++ return rtdev; ++} ++ ++/*** ++ * __rtdev_get_by_hwaddr - find a rtnetdevice by its mac-address ++ * @type: Type of the net_device (may be ARPHRD_ETHER) ++ * @hw_addr: MAC-Address ++ */ ++static inline struct rtnet_device *__rtdev_get_by_hwaddr(unsigned short type, ++ char *hw_addr) ++{ ++ int i; ++ struct rtnet_device *rtdev; ++ ++ for (i = 0; i < MAX_RT_DEVICES; i++) { ++ rtdev = rtnet_devices[i]; ++ if ((rtdev != NULL) && (rtdev->type == type) && ++ (!memcmp(rtdev->dev_addr, hw_addr, rtdev->addr_len))) { ++ return rtdev; ++ } ++ } ++ return NULL; ++} ++ ++/*** ++ * rtdev_get_by_hwaddr - find and lock a rtnetdevice by its mac-address ++ * @type: Type of the net_device (may be ARPHRD_ETHER) ++ * @hw_addr: MAC-Address ++ */ ++struct rtnet_device *rtdev_get_by_hwaddr(unsigned short type, char *hw_addr) ++{ ++ struct rtnet_device *rtdev; ++ rtdm_lockctx_t context; ++ ++ rtdm_lock_get_irqsave(&rtnet_devices_rt_lock, context); ++ ++ rtdev = __rtdev_get_by_hwaddr(type, hw_addr); ++ if (rtdev != NULL && !rtdev_reference(rtdev)) ++ rtdev = NULL; ++ ++ rtdm_lock_put_irqrestore(&rtnet_devices_rt_lock, context); ++ ++ return rtdev; ++} ++ ++/*** ++ * rtdev_get_by_hwaddr - find and lock the loopback device if available ++ */ ++struct rtnet_device *rtdev_get_loopback(void) ++{ ++ struct rtnet_device *rtdev; ++ rtdm_lockctx_t context; ++ ++ rtdm_lock_get_irqsave(&rtnet_devices_rt_lock, context); ++ ++ rtdev = loopback_device; ++ if (rtdev != NULL && !rtdev_reference(rtdev)) ++ rtdev = NULL; ++ ++ rtdm_lock_put_irqrestore(&rtnet_devices_rt_lock, context); ++ ++ return rtdev; ++} ++ ++/*** ++ * rtdev_alloc_name - allocate a name for the rtnet_device ++ * @rtdev: the rtnet_device ++ * @name_mask: a name mask (e.g. "rteth%d" for ethernet) ++ * ++ * This function have to be called from the driver probe function. ++ */ ++void rtdev_alloc_name(struct rtnet_device *rtdev, const char *mask) ++{ ++ char buf[IFNAMSIZ]; ++ int i; ++ struct rtnet_device *tmp; ++ ++ for (i = 0; i < MAX_RT_DEVICES; i++) { ++ snprintf(buf, IFNAMSIZ, mask, i); ++ if ((tmp = rtdev_get_by_name(buf)) == NULL) { ++ strncpy(rtdev->name, buf, IFNAMSIZ); ++ break; ++ } else ++ rtdev_dereference(tmp); ++ } ++} ++ ++static int rtdev_pool_trylock(void *cookie) ++{ ++ return rtdev_reference(cookie); ++} ++ ++static void rtdev_pool_unlock(void *cookie) ++{ ++ rtdev_dereference(cookie); ++} ++ ++static const struct rtskb_pool_lock_ops rtdev_ops = { ++ .trylock = rtdev_pool_trylock, ++ .unlock = rtdev_pool_unlock, ++}; ++ ++int rtdev_init(struct rtnet_device *rtdev, unsigned dev_pool_size) ++{ ++ int ret; ++ ++ ret = rtskb_pool_init(&rtdev->dev_pool, dev_pool_size, &rtdev_ops, ++ rtdev); ++ if (ret < dev_pool_size) { ++ printk(KERN_ERR "RTnet: cannot allocate rtnet device pool\n"); ++ rtskb_pool_release(&rtdev->dev_pool); ++ return -ENOMEM; ++ } ++ ++ rtdm_mutex_init(&rtdev->xmit_mutex); ++ rtdm_lock_init(&rtdev->rtdev_lock); ++ mutex_init(&rtdev->nrt_lock); ++ ++ atomic_set(&rtdev->refcount, 0); ++ ++ /* scale global rtskb pool */ ++ rtdev->add_rtskbs = rtskb_pool_extend(&global_pool, device_rtskbs); ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(rtdev_init); ++ ++void rtdev_destroy(struct rtnet_device *rtdev) ++{ ++ rtskb_pool_release(&rtdev->dev_pool); ++ rtskb_pool_shrink(&global_pool, rtdev->add_rtskbs); ++ rtdev->stack_event = NULL; ++ rtdm_mutex_destroy(&rtdev->xmit_mutex); ++} ++EXPORT_SYMBOL_GPL(rtdev_destroy); ++ ++/*** ++ * rtdev_alloc ++ * @int sizeof_priv: ++ * ++ * allocate memory for a new rt-network-adapter ++ */ ++struct rtnet_device *rtdev_alloc(unsigned sizeof_priv, unsigned dev_pool_size) ++{ ++ struct rtnet_device *rtdev; ++ unsigned alloc_size; ++ int ret; ++ ++ /* ensure 32-byte alignment of the private area */ ++ alloc_size = sizeof(*rtdev) + sizeof_priv + 31; ++ ++ rtdev = kzalloc(alloc_size, GFP_KERNEL); ++ if (rtdev == NULL) { ++ printk(KERN_ERR "RTnet: cannot allocate rtnet device\n"); ++ return NULL; ++ } ++ ++ ret = rtdev_init(rtdev, dev_pool_size); ++ if (ret) { ++ kfree(rtdev); ++ return NULL; ++ } ++ ++ if (sizeof_priv) ++ rtdev->priv = (void *)(((long)(rtdev + 1) + 31) & ~31); ++ ++ return rtdev; ++} ++ ++/*** ++ * rtdev_free ++ */ ++void rtdev_free(struct rtnet_device *rtdev) ++{ ++ if (rtdev != NULL) { ++ rtdev_destroy(rtdev); ++ kfree(rtdev); ++ } ++} ++EXPORT_SYMBOL_GPL(rtdev_free); ++ ++static void init_etherdev(struct rtnet_device *rtdev, struct module *module) ++{ ++ rtdev->hard_header = rt_eth_header; ++ rtdev->type = ARPHRD_ETHER; ++ rtdev->hard_header_len = ETH_HLEN; ++ rtdev->mtu = 1500; /* eth_mtu */ ++ rtdev->addr_len = ETH_ALEN; ++ rtdev->flags = IFF_BROADCAST; /* TODO: IFF_MULTICAST; */ ++ rtdev->get_mtu = rt_hard_mtu; ++ rtdev->rt_owner = module; ++ ++ memset(rtdev->broadcast, 0xFF, ETH_ALEN); ++ strcpy(rtdev->name, "rteth%d"); ++} ++ ++/** ++ * rt_init_etherdev - sets up an ethernet device ++ * @module: module initializing the device ++ * ++ * Fill in the fields of the device structure with ethernet-generic ++ * values. This routine can be used to set up a pre-allocated device ++ * structure. The device still needs to be registered afterwards. ++ */ ++int __rt_init_etherdev(struct rtnet_device *rtdev, unsigned dev_pool_size, ++ struct module *module) ++{ ++ int ret; ++ ++ ret = rtdev_init(rtdev, dev_pool_size); ++ if (ret) ++ return ret; ++ ++ init_etherdev(rtdev, module); ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(__rt_init_etherdev); ++ ++/** ++ * rt_alloc_etherdev - Allocates and sets up an ethernet device ++ * @sizeof_priv: size of additional driver-private structure to ++ * be allocated for this ethernet device ++ * @dev_pool_size: size of the rx pool ++ * @module: module creating the device ++ * ++ * Allocates then fills in the fields of a new device structure with ++ * ethernet-generic values. Basically does everything except ++ * registering the device. ++ * ++ * A 32-byte alignment is enforced for the private data area. ++ */ ++struct rtnet_device *__rt_alloc_etherdev(unsigned sizeof_priv, ++ unsigned dev_pool_size, ++ struct module *module) ++{ ++ struct rtnet_device *rtdev; ++ ++ rtdev = rtdev_alloc(sizeof_priv, dev_pool_size); ++ if (!rtdev) ++ return NULL; ++ ++ init_etherdev(rtdev, module); ++ ++ return rtdev; ++} ++EXPORT_SYMBOL_GPL(__rt_alloc_etherdev); ++ ++static inline int __rtdev_new_index(void) ++{ ++ int i; ++ ++ for (i = 0; i < MAX_RT_DEVICES; i++) ++ if (rtnet_devices[i] == NULL) ++ return i + 1; ++ ++ return -ENOMEM; ++} ++ ++static int rtskb_map(struct rtnet_device *rtdev, struct rtskb *skb) ++{ ++ dma_addr_t addr; ++ ++ addr = rtdev->map_rtskb(rtdev, skb); ++ ++ if (WARN_ON(addr == RTSKB_UNMAPPED)) ++ return -ENOMEM; ++ ++ if (skb->buf_dma_addr != RTSKB_UNMAPPED && addr != skb->buf_dma_addr) { ++ printk("RTnet: device %s maps skb differently than others. " ++ "Different IOMMU domain?\nThis is not supported.\n", ++ rtdev->name); ++ return -EACCES; ++ } ++ ++ skb->buf_dma_addr = addr; ++ ++ return 0; ++} ++ ++int rtdev_map_rtskb(struct rtskb *skb) ++{ ++ struct rtnet_device *rtdev; ++ int err = 0; ++ int i; ++ ++ skb->buf_dma_addr = RTSKB_UNMAPPED; ++ ++ mutex_lock(&rtnet_devices_nrt_lock); ++ ++ for (i = 0; i < MAX_RT_DEVICES; i++) { ++ rtdev = rtnet_devices[i]; ++ if (rtdev && rtdev->map_rtskb) { ++ err = rtskb_map(rtdev, skb); ++ if (err) ++ break; ++ } ++ } ++ ++ if (!err) { ++ if (skb->buf_dma_addr != RTSKB_UNMAPPED) ++ list_add(&skb->entry, &rtskb_mapped_list); ++ else ++ list_add(&skb->entry, &rtskb_mapwait_list); ++ } ++ ++ mutex_unlock(&rtnet_devices_nrt_lock); ++ ++ return err; ++} ++ ++static int rtdev_map_all_rtskbs(struct rtnet_device *rtdev) ++{ ++ struct rtskb *skb, *n; ++ int err = 0; ++ ++ if (!rtdev->map_rtskb) ++ return 0; ++ ++ list_for_each_entry (skb, &rtskb_mapped_list, entry) { ++ err = rtskb_map(rtdev, skb); ++ if (err) ++ break; ++ } ++ ++ list_for_each_entry_safe (skb, n, &rtskb_mapwait_list, entry) { ++ err = rtskb_map(rtdev, skb); ++ if (err) ++ break; ++ list_del(&skb->entry); ++ list_add(&skb->entry, &rtskb_mapped_list); ++ } ++ ++ return err; ++} ++ ++void rtdev_unmap_rtskb(struct rtskb *skb) ++{ ++ struct rtnet_device *rtdev; ++ int i; ++ ++ mutex_lock(&rtnet_devices_nrt_lock); ++ ++ list_del(&skb->entry); ++ ++ if (skb->buf_dma_addr != RTSKB_UNMAPPED) { ++ for (i = 0; i < MAX_RT_DEVICES; i++) { ++ rtdev = rtnet_devices[i]; ++ if (rtdev && rtdev->unmap_rtskb) { ++ rtdev->unmap_rtskb(rtdev, skb); ++ } ++ } ++ } ++ ++ skb->buf_dma_addr = RTSKB_UNMAPPED; ++ ++ mutex_unlock(&rtnet_devices_nrt_lock); ++} ++ ++static void rtdev_unmap_all_rtskbs(struct rtnet_device *rtdev) ++{ ++ struct rtskb *skb; ++ ++ if (!rtdev->unmap_rtskb) ++ return; ++ ++ list_for_each_entry (skb, &rtskb_mapped_list, entry) { ++ rtdev->unmap_rtskb(rtdev, skb); ++ } ++} ++ ++/*** ++ * rt_register_rtnetdev: register a new rtnet_device (linux-like) ++ * @rtdev: the device ++ */ ++int rt_register_rtnetdev(struct rtnet_device *rtdev) ++{ ++ struct list_head *entry; ++ struct rtdev_event_hook *hook; ++ rtdm_lockctx_t context; ++ int ifindex; ++ int err; ++ ++ /* requires at least driver layer version 2.0 */ ++ if (rtdev->vers < RTDEV_VERS_2_0) ++ return -EINVAL; ++ ++ if (rtdev->features & NETIF_F_LLTX) ++ rtdev->start_xmit = rtdev->hard_start_xmit; ++ else ++ rtdev->start_xmit = rtdev_locked_xmit; ++ ++ mutex_lock(&rtnet_devices_nrt_lock); ++ ++ ifindex = __rtdev_new_index(); ++ if (ifindex < 0) { ++ err = ifindex; ++ goto fail; ++ } ++ rtdev->ifindex = ifindex; ++ ++ if (strchr(rtdev->name, '%') != NULL) ++ rtdev_alloc_name(rtdev, rtdev->name); ++ ++ if (__rtdev_get_by_name(rtdev->name) != NULL) { ++ err = -EEXIST; ++ goto fail; ++ } ++ ++ rtdev->sysdev = ++ device_create(rtnet_class, NULL, MKDEV(0, rtdev->ifindex), ++ rtdev, rtdev->name); ++ if (IS_ERR(rtdev->sysdev)) { ++ err = PTR_ERR(rtdev->sysdev); ++ goto fail; ++ } ++ ++ if (rtdev->sysbind) { ++ err = sysfs_create_link(&rtdev->sysdev->kobj, ++ &rtdev->sysbind->kobj, "adapter"); ++ if (err) ++ goto fail_link; ++ } ++ ++ err = rtdev_map_all_rtskbs(rtdev); ++ if (err) ++ goto fail_map; ++ ++ rtdm_lock_get_irqsave(&rtnet_devices_rt_lock, context); ++ ++ if (rtdev->flags & IFF_LOOPBACK) { ++ /* allow only one loopback device */ ++ if (loopback_device) { ++ rtdm_lock_put_irqrestore(&rtnet_devices_rt_lock, ++ context); ++ err = -EEXIST; ++ goto fail_loopback; ++ } ++ loopback_device = rtdev; ++ } ++ rtnet_devices[rtdev->ifindex - 1] = rtdev; ++ ++ rtdm_lock_put_irqrestore(&rtnet_devices_rt_lock, context); ++ ++ list_for_each (entry, &event_hook_list) { ++ hook = list_entry(entry, struct rtdev_event_hook, entry); ++ if (hook->register_device) ++ hook->register_device(rtdev); ++ } ++ ++ mutex_unlock(&rtnet_devices_nrt_lock); ++ ++ /* Default state at registration is that the device is present. */ ++ set_bit(__RTNET_LINK_STATE_PRESENT, &rtdev->link_state); ++ ++ printk("RTnet: registered %s\n", rtdev->name); ++ ++ return 0; ++ ++fail_loopback: ++ rtdev_unmap_all_rtskbs(rtdev); ++fail_map: ++ if (rtdev->sysbind) ++ sysfs_remove_link(&rtdev->sysdev->kobj, "adapter"); ++fail_link: ++ device_destroy(rtnet_class, MKDEV(0, rtdev->ifindex)); ++fail: ++ mutex_unlock(&rtnet_devices_nrt_lock); ++ ++ return err; ++} ++ ++/*** ++ * rt_unregister_rtnetdev: unregister a rtnet_device ++ * @rtdev: the device ++ */ ++int rt_unregister_rtnetdev(struct rtnet_device *rtdev) ++{ ++ struct list_head *entry; ++ struct rtdev_event_hook *hook; ++ rtdm_lockctx_t context; ++ ++ RTNET_ASSERT(rtdev->ifindex != 0, ++ printk("RTnet: device %s/%p was not registered\n", ++ rtdev->name, rtdev); ++ return -ENODEV;); ++ ++ if (rtdev->sysbind) ++ sysfs_remove_link(&rtdev->sysdev->kobj, "adapter"); ++ ++ device_destroy(rtnet_class, MKDEV(0, rtdev->ifindex)); ++ ++ mutex_lock(&rtnet_devices_nrt_lock); ++ rtdm_lock_get_irqsave(&rtnet_devices_rt_lock, context); ++ ++ RTNET_ASSERT(atomic_read(&rtdev->refcount == 0), BUG()); ++ rtnet_devices[rtdev->ifindex - 1] = NULL; ++ if (rtdev->flags & IFF_LOOPBACK) ++ loopback_device = NULL; ++ ++ rtdm_lock_put_irqrestore(&rtnet_devices_rt_lock, context); ++ ++ list_for_each (entry, &event_hook_list) { ++ hook = list_entry(entry, struct rtdev_event_hook, entry); ++ if (hook->unregister_device) ++ hook->unregister_device(rtdev); ++ } ++ ++ rtdev_unmap_all_rtskbs(rtdev); ++ ++ mutex_unlock(&rtnet_devices_nrt_lock); ++ ++ clear_bit(__RTNET_LINK_STATE_PRESENT, &rtdev->link_state); ++ ++ RTNET_ASSERT(atomic_read(&rtdev->refcount) == 0, ++ printk("RTnet: rtdev reference counter < 0!\n");); ++ ++ printk("RTnet: unregistered %s\n", rtdev->name); ++ ++ return 0; ++} ++ ++void rtdev_add_event_hook(struct rtdev_event_hook *hook) ++{ ++ mutex_lock(&rtnet_devices_nrt_lock); ++ list_add(&hook->entry, &event_hook_list); ++ mutex_unlock(&rtnet_devices_nrt_lock); ++} ++ ++void rtdev_del_event_hook(struct rtdev_event_hook *hook) ++{ ++ mutex_lock(&rtnet_devices_nrt_lock); ++ list_del(&hook->entry); ++ mutex_unlock(&rtnet_devices_nrt_lock); ++} ++ ++int rtdev_up(struct rtnet_device *rtdev, struct rtnet_core_cmd *cmd) ++{ ++ struct list_head *entry; ++ struct rtdev_event_hook *hook; ++ int ret = 0; ++ ++ if (mutex_lock_interruptible(&rtdev->nrt_lock)) ++ return -ERESTARTSYS; ++ ++ /* We cannot change the promisc flag or the hardware address if ++ the device is already up. */ ++ if ((rtdev->flags & IFF_UP) && ++ (((cmd->args.up.set_dev_flags | cmd->args.up.clear_dev_flags) & ++ IFF_PROMISC) || ++ (cmd->args.up.dev_addr_type != ARPHRD_VOID))) { ++ ret = -EBUSY; ++ goto out; ++ } ++ ++ if (cmd->args.up.dev_addr_type != ARPHRD_VOID && ++ cmd->args.up.dev_addr_type != rtdev->type) { ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ /* Skip upon extraneous call only after args have been checked. */ ++ if (test_and_set_bit(PRIV_FLAG_UP, &rtdev->priv_flags)) ++ goto out; ++ ++ rtdev->flags |= cmd->args.up.set_dev_flags; ++ rtdev->flags &= ~cmd->args.up.clear_dev_flags; ++ ++ if (cmd->args.up.dev_addr_type != ARPHRD_VOID) ++ memcpy(rtdev->dev_addr, cmd->args.up.dev_addr, MAX_ADDR_LEN); ++ ++ ret = rtdev_open(rtdev); /* also == 0 if rtdev is already up */ ++ ++ if (ret == 0) { ++ mutex_lock(&rtnet_devices_nrt_lock); ++ ++ list_for_each (entry, &event_hook_list) { ++ hook = list_entry(entry, struct rtdev_event_hook, ++ entry); ++ if (hook->ifup) ++ hook->ifup(rtdev, cmd); ++ } ++ ++ mutex_unlock(&rtnet_devices_nrt_lock); ++ } else ++ clear_bit(PRIV_FLAG_UP, &rtdev->priv_flags); ++out: ++ mutex_unlock(&rtdev->nrt_lock); ++ ++ return ret; ++} ++EXPORT_SYMBOL_GPL(rtdev_up); ++ ++int rtdev_down(struct rtnet_device *rtdev) ++{ ++ struct list_head *entry; ++ struct rtdev_event_hook *hook; ++ rtdm_lockctx_t context; ++ int ret = 0; ++ ++ if (mutex_lock_interruptible(&rtdev->nrt_lock)) ++ return -ERESTARTSYS; ++ ++ /* spin lock required for sync with routing code */ ++ rtdm_lock_get_irqsave(&rtdev->rtdev_lock, context); ++ ++ if (test_bit(PRIV_FLAG_ADDING_ROUTE, &rtdev->priv_flags)) { ++ ret = -EBUSY; ++ goto fail; ++ } ++ ++ if (!test_and_clear_bit(PRIV_FLAG_UP, &rtdev->priv_flags)) ++ goto fail; ++ ++ rtdm_lock_put_irqrestore(&rtdev->rtdev_lock, context); ++ ++ if (rtdev->mac_detach != NULL) ++ ret = rtdev->mac_detach(rtdev); ++ ++ if (ret == 0) { ++ mutex_lock(&rtnet_devices_nrt_lock); ++ ++ list_for_each (entry, &event_hook_list) { ++ hook = list_entry(entry, struct rtdev_event_hook, ++ entry); ++ if (hook->ifdown) ++ hook->ifdown(rtdev); ++ } ++ ++ mutex_unlock(&rtnet_devices_nrt_lock); ++ ++ ret = rtdev_close(rtdev); ++ } ++out: ++ mutex_unlock(&rtdev->nrt_lock); ++ ++ return ret; ++fail: ++ rtdm_lock_put_irqrestore(&rtdev->rtdev_lock, context); ++ goto out; ++} ++EXPORT_SYMBOL_GPL(rtdev_down); ++ ++/*** ++ * rtdev_open ++ * ++ * Prepare an interface for use. ++ */ ++int rtdev_open(struct rtnet_device *rtdev) ++{ ++ int ret = 0; ++ ++ if (rtdev->flags & IFF_UP) /* Is it already up? */ ++ return 0; ++ ++ if (!rtdev_reference(rtdev)) ++ return -EIDRM; ++ ++ if (rtdev->open) /* Call device private open method */ ++ ret = rtdev->open(rtdev); ++ ++ if (!ret) { ++ rtdev->flags |= IFF_UP; ++ set_bit(__RTNET_LINK_STATE_START, &rtdev->link_state); ++ } else ++ rtdev_dereference(rtdev); ++ ++ return ret; ++} ++ ++/*** ++ * rtdev_close ++ */ ++int rtdev_close(struct rtnet_device *rtdev) ++{ ++ int ret = 0; ++ ++ if (!(rtdev->flags & IFF_UP)) ++ return 0; ++ ++ if (rtdev->stop) ++ ret = rtdev->stop(rtdev); ++ ++ rtdev->flags &= ~(IFF_UP | IFF_RUNNING); ++ clear_bit(__RTNET_LINK_STATE_START, &rtdev->link_state); ++ ++ if (ret == 0) ++ rtdev_dereference(rtdev); ++ ++ return ret; ++} ++ ++static int rtdev_locked_xmit(struct rtskb *skb, struct rtnet_device *rtdev) ++{ ++ int ret; ++ ++ rtdm_mutex_lock(&rtdev->xmit_mutex); ++ ret = rtdev->hard_start_xmit(skb, rtdev); ++ rtdm_mutex_unlock(&rtdev->xmit_mutex); ++ ++ return ret; ++} ++ ++/*** ++ * rtdev_xmit - send real-time packet ++ */ ++int rtdev_xmit(struct rtskb *rtskb) ++{ ++ struct rtnet_device *rtdev; ++ int err; ++ ++ RTNET_ASSERT(rtskb != NULL, return -EINVAL;); ++ ++ rtdev = rtskb->rtdev; ++ ++ if (!rtnetif_carrier_ok(rtdev)) { ++ err = -EAGAIN; ++ kfree_rtskb(rtskb); ++ return err; ++ } ++ ++ if (rtskb_acquire(rtskb, &rtdev->dev_pool) != 0) { ++ err = -ENOBUFS; ++ kfree_rtskb(rtskb); ++ return err; ++ } ++ ++ RTNET_ASSERT(rtdev != NULL, return -EINVAL;); ++ ++ err = rtdev->start_xmit(rtskb, rtdev); ++ if (err) { ++ /* on error we must free the rtskb here */ ++ kfree_rtskb(rtskb); ++ ++ rtdm_printk("hard_start_xmit returned %d\n", err); ++ } ++ ++ return err; ++} ++ ++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_ADDON_PROXY) ++/*** ++ * rtdev_xmit_proxy - send rtproxy packet ++ */ ++int rtdev_xmit_proxy(struct rtskb *rtskb) ++{ ++ struct rtnet_device *rtdev; ++ int err; ++ ++ RTNET_ASSERT(rtskb != NULL, return -EINVAL;); ++ ++ rtdev = rtskb->rtdev; ++ ++ RTNET_ASSERT(rtdev != NULL, return -EINVAL;); ++ ++ /* TODO: make these lines race-condition-safe */ ++ if (rtdev->mac_disc) { ++ RTNET_ASSERT(rtdev->mac_disc->nrt_packet_tx != NULL, ++ return -EINVAL;); ++ ++ err = rtdev->mac_disc->nrt_packet_tx(rtskb); ++ } else { ++ err = rtdev->start_xmit(rtskb, rtdev); ++ if (err) { ++ /* on error we must free the rtskb here */ ++ kfree_rtskb(rtskb); ++ ++ rtdm_printk("hard_start_xmit returned %d\n", err); ++ } ++ } ++ ++ return err; ++} ++#endif /* CONFIG_XENO_DRIVERS_NET_ADDON_PROXY */ ++ ++unsigned int rt_hard_mtu(struct rtnet_device *rtdev, unsigned int priority) ++{ ++ return rtdev->mtu; ++} ++ ++EXPORT_SYMBOL_GPL(rtdev_alloc_name); ++ ++EXPORT_SYMBOL_GPL(rt_register_rtnetdev); ++EXPORT_SYMBOL_GPL(rt_unregister_rtnetdev); ++ ++EXPORT_SYMBOL_GPL(rtdev_add_event_hook); ++EXPORT_SYMBOL_GPL(rtdev_del_event_hook); ++ ++EXPORT_SYMBOL_GPL(rtdev_get_by_name); ++EXPORT_SYMBOL_GPL(rtdev_get_by_index); ++EXPORT_SYMBOL_GPL(rtdev_get_by_hwaddr); ++EXPORT_SYMBOL_GPL(rtdev_get_loopback); ++ ++EXPORT_SYMBOL_GPL(rtdev_xmit); ++ ++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_ADDON_PROXY) ++EXPORT_SYMBOL_GPL(rtdev_xmit_proxy); ++#endif ++ ++EXPORT_SYMBOL_GPL(rt_hard_mtu); +--- linux/drivers/xenomai/net/stack/Kconfig 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/Kconfig 2021-04-07 16:01:26.900634633 +0800 +@@ -0,0 +1,41 @@ ++menu "Protocol Stack" ++ depends on XENO_DRIVERS_NET ++ ++comment "Stack parameters" ++ ++config XENO_DRIVERS_NET_RX_FIFO_SIZE ++ int "Size of central RX-FIFO" ++ depends on XENO_DRIVERS_NET ++ default 32 ++ ---help--- ++ Size of FIFO between NICs and stack manager task. Must be power ++ of two! Effectively, only CONFIG_RTNET_RX_FIFO_SIZE-1 slots will ++ be usable. ++ ++config XENO_DRIVERS_NET_ETH_P_ALL ++ depends on XENO_DRIVERS_NET ++ bool "Support for ETH_P_ALL" ++ ---help--- ++ Enables core support for registering listeners on all layer 3 ++ protocols (ETH_P_ALL). Internally this is currently realised by ++ clone-copying incoming frames for those listners, future versions ++ will implement buffer sharing for efficiency reasons. Use with ++ care, every ETH_P_ALL-listener adds noticable overhead to the ++ reception path. ++ ++config XENO_DRIVERS_NET_RTWLAN ++ depends on XENO_DRIVERS_NET ++ bool "Real-Time WLAN" ++ ---help--- ++ Enables core support for real-time wireless LAN. RT-WLAN is based ++ on low-level access to 802.11-compliant adapters and is currently ++ in an experimental stage. ++ ++comment "Protocols" ++ ++source "drivers/xenomai/net/stack/ipv4/Kconfig" ++source "drivers/xenomai/net/stack/packet/Kconfig" ++source "drivers/xenomai/net/stack/rtmac/Kconfig" ++source "drivers/xenomai/net/stack/rtcfg/Kconfig" ++ ++endmenu +--- linux/drivers/xenomai/net/stack/include/rtnet_internal.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/include/rtnet_internal.h 2021-04-07 16:01:26.895634641 +0800 +@@ -0,0 +1,75 @@ ++/*** ++ * ++ * rtnet_internal.h - internal declarations ++ * ++ * RTnet - real-time networking subsystem ++ * Copyright (C) 1999 Lineo, Inc ++ * 1999, 2002 David A. Schleef ++ * 2002 Ulrich Marx ++ * 2003-2005 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#ifndef __RTNET_INTERNAL_H_ ++#define __RTNET_INTERNAL_H_ ++ ++#include ++#include ++#include ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_CHECKED ++#define RTNET_ASSERT(expr, func) \ ++ if (!(expr)) { \ ++ rtdm_printk("Assertion failed! %s:%s:%d %s\n", __FILE__, \ ++ __FUNCTION__, __LINE__, (#expr)); \ ++ func \ ++ } ++#else ++#define RTNET_ASSERT(expr, func) ++#endif /* CONFIG_XENO_DRIVERS_NET_CHECKED */ ++ ++/* some configurables */ ++ ++#define RTNET_DEF_STACK_PRIORITY \ ++ RTDM_TASK_HIGHEST_PRIORITY + RTDM_TASK_LOWER_PRIORITY ++/*#define RTNET_RTDEV_PRIORITY 5*/ ++ ++struct rtnet_device; ++ ++/*struct rtnet_msg { ++ int msg_type; ++ struct rtnet_device *rtdev; ++};*/ ++ ++struct rtnet_mgr { ++ rtdm_task_t task; ++ /* MBX mbx;*/ ++ rtdm_event_t event; ++}; ++ ++extern struct rtnet_mgr STACK_manager; ++extern struct rtnet_mgr RTDEV_manager; ++ ++extern const char rtnet_rtdm_provider_name[]; ++ ++#ifdef CONFIG_XENO_OPT_VFILE ++extern struct xnvfile_directory rtnet_proc_root; ++#endif /* CONFIG_XENO_OPT_VFILE */ ++ ++extern struct class *rtnet_class; ++ ++#endif /* __RTNET_INTERNAL_H_ */ +--- linux/drivers/xenomai/net/stack/include/rtnet_socket.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/include/rtnet_socket.h 2021-04-07 16:01:26.890634648 +0800 +@@ -0,0 +1,108 @@ ++/*** ++ * ++ * include/rtnet_socket.h ++ * ++ * RTnet - real-time networking subsystem ++ * Copyright (C) 1999 Lineo, Inc ++ * 1999, 2002 David A. Schleef ++ * 2002 Ulrich Marx ++ * 2003-2005 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#ifndef __RTNET_SOCKET_H_ ++#define __RTNET_SOCKET_H_ ++ ++#include ++#include ++ ++#include ++#include ++#include ++#include ++ ++struct rtsocket { ++ unsigned short protocol; ++ ++ struct rtskb_pool skb_pool; ++ unsigned int pool_size; ++ struct mutex pool_nrt_lock; ++ ++ struct rtskb_queue incoming; ++ ++ rtdm_lock_t param_lock; ++ ++ unsigned int priority; ++ nanosecs_rel_t timeout; /* receive timeout, 0 for infinite */ ++ ++ rtdm_sem_t pending_sem; ++ ++ void (*callback_func)(struct rtdm_fd *, void *arg); ++ void *callback_arg; ++ ++ unsigned long flags; ++ ++ union { ++ /* IP specific */ ++ struct { ++ u32 saddr; /* source ip-addr (bind) */ ++ u32 daddr; /* destination ip-addr */ ++ u16 sport; /* source port */ ++ u16 dport; /* destination port */ ++ ++ int reg_index; /* index in port registry */ ++ u8 tos; ++ u8 state; ++ } inet; ++ ++ /* packet socket specific */ ++ struct { ++ struct rtpacket_type packet_type; ++ int ifindex; ++ } packet; ++ } prot; ++}; ++ ++static inline struct rtdm_fd *rt_socket_fd(struct rtsocket *sock) ++{ ++ return rtdm_private_to_fd(sock); ++} ++ ++void *rtnet_get_arg(struct rtdm_fd *fd, void *tmp, const void *src, size_t len); ++ ++int rtnet_put_arg(struct rtdm_fd *fd, void *dst, const void *src, size_t len); ++ ++#define rt_socket_reference(sock) rtdm_fd_lock(rt_socket_fd(sock)) ++#define rt_socket_dereference(sock) rtdm_fd_unlock(rt_socket_fd(sock)) ++ ++int rt_socket_init(struct rtdm_fd *fd, unsigned short protocol); ++ ++void rt_socket_cleanup(struct rtdm_fd *fd); ++int rt_socket_common_ioctl(struct rtdm_fd *fd, int request, void __user *arg); ++int rt_socket_if_ioctl(struct rtdm_fd *fd, int request, void __user *arg); ++int rt_socket_select_bind(struct rtdm_fd *fd, rtdm_selector_t *selector, ++ enum rtdm_selecttype type, unsigned fd_index); ++ ++int rt_bare_socket_init(struct rtdm_fd *fd, unsigned short protocol, ++ unsigned int priority, unsigned int pool_size); ++ ++static inline void rt_bare_socket_cleanup(struct rtsocket *sock) ++{ ++ rtskb_pool_release(&sock->skb_pool); ++} ++ ++#endif /* __RTNET_SOCKET_H_ */ +--- linux/drivers/xenomai/net/stack/include/rtmac.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/include/rtmac.h 2021-04-07 16:01:26.886634653 +0800 +@@ -0,0 +1,92 @@ ++/*** ++ * ++ * include/rtmac.h ++ * ++ * rtmac - real-time networking media access control subsystem ++ * Copyright (C) 2004-2006 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ * As a special exception to the GNU General Public license, the RTnet ++ * project allows you to use this header file in unmodified form to produce ++ * application programs executing in user-space which use RTnet services by ++ * normal system calls. The resulting executable will not be covered by the ++ * GNU General Public License merely as a result of this header file use. ++ * Instead, this header file use will be considered normal use of RTnet and ++ * not a "derived work" in the sense of the GNU General Public License. ++ * ++ * This exception does not apply when the application code is built as a ++ * static or dynamically loadable portion of the Linux kernel nor does the ++ * exception override other reasons justifying application of the GNU General ++ * Public License. ++ * ++ * This exception applies only to the code released by the RTnet project ++ * under the name RTnet and bearing this exception notice. If you copy code ++ * from other sources into a copy of RTnet, the exception does not apply to ++ * the code that you add in this way. ++ * ++ */ ++ ++#ifndef __RTMAC_H_ ++#define __RTMAC_H_ ++ ++#include ++ ++/* sub-classes: RTDM_CLASS_RTMAC */ ++#define RTDM_SUBCLASS_TDMA 0 ++#define RTDM_SUBCLASS_UNMANAGED 1 ++ ++#define RTIOC_TYPE_RTMAC RTDM_CLASS_RTMAC ++ ++/* ** Common Cycle Event Types ** */ ++/* standard event, wake up once per cycle */ ++#define RTMAC_WAIT_ON_DEFAULT 0x00 ++/* wake up on media access of the station, may trigger multiple times per ++ cycle */ ++#define RTMAC_WAIT_ON_XMIT 0x01 ++ ++/* ** TDMA-specific Cycle Event Types ** */ ++/* tigger on on SYNC frame reception/transmission */ ++#define TDMA_WAIT_ON_SYNC RTMAC_WAIT_ON_DEFAULT ++#define TDMA_WAIT_ON_SOF TDMA_WAIT_ON_SYNC /* legacy support */ ++ ++/* RTMAC_RTIOC_WAITONCYCLE_EX control and status data */ ++struct rtmac_waitinfo { ++ /** Set to wait type before invoking the service */ ++ unsigned int type; ++ ++ /** Set to sizeof(struct rtmac_waitinfo) before invoking the service */ ++ size_t size; ++ ++ /** Counter of elementary cycles of the underlying RTmac discipline ++ (if applicable) */ ++ unsigned long cycle_no; ++ ++ /** Date (in local time) of the last elementary cycle start of the RTmac ++ discipline (if applicable) */ ++ nanosecs_abs_t cycle_start; ++ ++ /** Offset of the local clock to the global clock provided by the RTmac ++ discipline (if applicable): t_global = t_local + clock_offset */ ++ nanosecs_rel_t clock_offset; ++}; ++ ++/* RTmac Discipline IOCTLs */ ++#define RTMAC_RTIOC_TIMEOFFSET _IOR(RTIOC_TYPE_RTMAC, 0x00, int64_t) ++#define RTMAC_RTIOC_WAITONCYCLE _IOW(RTIOC_TYPE_RTMAC, 0x01, unsigned int) ++#define RTMAC_RTIOC_WAITONCYCLE_EX \ ++ _IOWR(RTIOC_TYPE_RTMAC, 0x02, struct rtmac_waitinfo) ++ ++#endif /* __RTMAC_H_ */ +--- linux/drivers/xenomai/net/stack/include/ethernet/eth.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/include/ethernet/eth.h 2021-04-07 16:01:26.881634661 +0800 +@@ -0,0 +1,32 @@ ++/* ethernet/eth.h ++ * ++ * RTnet - real-time networking subsystem ++ * Copyright (C) 2002 Ulrich Marx ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ */ ++#ifndef __RTNET_ETH_H_ ++#define __RTNET_ETH_H_ ++ ++#include ++#include ++ ++extern int rt_eth_header(struct rtskb *skb, struct rtnet_device *rtdev, ++ unsigned short type, void *daddr, void *saddr, ++ unsigned int len); ++extern unsigned short rt_eth_type_trans(struct rtskb *skb, ++ struct rtnet_device *dev); ++ ++#endif /* __RTNET_ETH_H_ */ +--- linux/drivers/xenomai/net/stack/include/stack_mgr.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/include/stack_mgr.h 2021-04-07 16:01:26.877634666 +0800 +@@ -0,0 +1,95 @@ ++/*** ++ * ++ * stack_mgr.h ++ * ++ * RTnet - real-time networking subsystem ++ * Copyright (C) 2002 Ulrich Marx ++ * 2003-2006 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#ifndef __STACK_MGR_H_ ++#define __STACK_MGR_H_ ++ ++#ifdef __KERNEL__ ++ ++#include ++ ++#include ++#include ++ ++/*** ++ * network layer protocol (layer 3) ++ */ ++ ++#define RTPACKET_HASH_TBL_SIZE 64 ++#define RTPACKET_HASH_KEY_MASK (RTPACKET_HASH_TBL_SIZE - 1) ++ ++struct rtpacket_type { ++ struct list_head list_entry; ++ ++ unsigned short type; ++ short refcount; ++ ++ int (*handler)(struct rtskb *, struct rtpacket_type *); ++ int (*err_handler)(struct rtskb *, struct rtnet_device *, ++ struct rtpacket_type *); ++ bool (*trylock)(struct rtpacket_type *); ++ void (*unlock)(struct rtpacket_type *); ++ ++ struct module *owner; ++}; ++ ++int __rtdev_add_pack(struct rtpacket_type *pt, struct module *module); ++#define rtdev_add_pack(pt) __rtdev_add_pack(pt, THIS_MODULE) ++ ++void rtdev_remove_pack(struct rtpacket_type *pt); ++ ++static inline bool rtdev_lock_pack(struct rtpacket_type *pt) ++{ ++ return try_module_get(pt->owner); ++} ++ ++static inline void rtdev_unlock_pack(struct rtpacket_type *pt) ++{ ++ module_put(pt->owner); ++} ++ ++void rt_stack_connect(struct rtnet_device *rtdev, struct rtnet_mgr *mgr); ++void rt_stack_disconnect(struct rtnet_device *rtdev); ++ ++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_DRV_LOOPBACK) ++void rt_stack_deliver(struct rtskb *rtskb); ++#endif /* CONFIG_XENO_DRIVERS_NET_DRV_LOOPBACK */ ++ ++int rt_stack_mgr_init(struct rtnet_mgr *mgr); ++void rt_stack_mgr_delete(struct rtnet_mgr *mgr); ++ ++void rtnetif_rx(struct rtskb *skb); ++ ++static inline void rtnetif_tx(struct rtnet_device *rtdev) ++{ ++} ++ ++static inline void rt_mark_stack_mgr(struct rtnet_device *rtdev) ++{ ++ rtdm_event_signal(rtdev->stack_event); ++} ++ ++#endif /* __KERNEL__ */ ++ ++#endif /* __STACK_MGR_H_ */ +--- linux/drivers/xenomai/net/stack/include/rtskb.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/include/rtskb.h 2021-04-07 16:01:26.872634674 +0800 +@@ -0,0 +1,809 @@ ++/*** ++ * ++ * include/rtskb.h ++ * ++ * RTnet - real-time networking subsystem ++ * Copyright (C) 2002 Ulrich Marx , ++ * 2003-2005 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#ifndef __RTSKB_H_ ++#define __RTSKB_H_ ++ ++#ifdef __KERNEL__ ++ ++#include ++ ++#include ++#include ++ ++/*** ++ ++rtskb Management - A Short Introduction ++--------------------------------------- ++ ++1. rtskbs (Real-Time Socket Buffers) ++ ++A rtskb consists of a management structure (struct rtskb) and a fixed-sized ++(RTSKB_SIZE) data buffer. It is used to store network packets on their way from ++the API routines through the stack to the NICs or vice versa. rtskbs are ++allocated as one chunk of memory which contains both the managment structure ++and the buffer memory itself. ++ ++ ++2. rtskb Queues ++ ++A rtskb queue is described by struct rtskb_queue. A queue can contain an ++unlimited number of rtskbs in an ordered way. A rtskb can either be added to ++the head (rtskb_queue_head()) or the tail of a queue (rtskb_queue_tail()). When ++a rtskb is removed from a queue (rtskb_dequeue()), it is always taken from the ++head. Queues are normally spin lock protected unless the __variants of the ++queuing functions are used. ++ ++ ++3. Prioritized rtskb Queues ++ ++A prioritized queue contains a number of normal rtskb queues within an array. ++The array index of a sub-queue correspond to the priority of the rtskbs within ++this queue. For enqueuing a rtskb (rtskb_prio_queue_head()), its priority field ++is evaluated and the rtskb is then placed into the appropriate sub-queue. When ++dequeuing a rtskb, the first rtskb of the first non-empty sub-queue with the ++highest priority is returned. The current implementation supports 32 different ++priority levels, the lowest if defined by QUEUE_MIN_PRIO, the highest by ++QUEUE_MAX_PRIO. ++ ++ ++4. rtskb Pools ++ ++As rtskbs must not be allocated by a normal memory manager during runtime, ++preallocated rtskbs are kept ready in several pools. Most packet producers ++(NICs, sockets, etc.) have their own pools in order to be independent of the ++load situation of other parts of the stack. ++ ++When a pool is created (rtskb_pool_init()), the required rtskbs are allocated ++from a Linux slab cache. Pools can be extended (rtskb_pool_extend()) or ++shrinked (rtskb_pool_shrink()) during runtime. When shutting down the ++program/module, every pool has to be released (rtskb_pool_release()). All these ++commands demand to be executed within a non real-time context. ++ ++Pools are organized as normal rtskb queues (struct rtskb_queue). When a rtskb ++is allocated (alloc_rtskb()), it is actually dequeued from the pool's queue. ++When freeing a rtskb (kfree_rtskb()), the rtskb is enqueued to its owning pool. ++rtskbs can be exchanged between pools (rtskb_acquire()). In this case, the ++passed rtskb switches over to from its owning pool to a given pool, but only if ++this pool can pass an empty rtskb from its own queue back. ++ ++ ++5. rtskb Chains ++ ++To ease the defragmentation of larger IP packets, several rtskbs can form a ++chain. For these purposes, the first rtskb (and only the first!) provides a ++pointer to the last rtskb in the chain. When enqueuing the first rtskb of a ++chain, the whole chain is automatically placed into the destined queue. But, ++to dequeue a complete chain specialized calls are required (postfix: _chain). ++While chains also get freed en bloc (kfree_rtskb()) when passing the first ++rtskbs, it is not possible to allocate a chain from a pool (alloc_rtskb()); a ++newly allocated rtskb is always reset to a "single rtskb chain". Furthermore, ++the acquisition of complete chains is NOT supported (rtskb_acquire()). ++ ++ ++6. Capturing Support (Optional) ++ ++When incoming or outgoing packets are captured, the assigned rtskb needs to be ++shared between the stack, the driver, and the capturing service. In contrast to ++many other network stacks, RTnet does not create a new rtskb head and ++re-references the payload. Instead, additional fields at the end of the rtskb ++structure are use for sharing a rtskb with a capturing service. If the sharing ++bit (RTSKB_CAP_SHARED) in cap_flags is set, the rtskb will not be returned to ++the owning pool upon the call of kfree_rtskb. Instead this bit will be reset, ++and a compensation rtskb stored in cap_comp_skb will be returned to the owning ++pool. cap_start and cap_len can be used to mirror the dimension of the full ++packet. This is required because the data and len fields will be modified while ++walking through the stack. cap_next allows to add a rtskb to a separate queue ++which is independent of any queue described in 2. ++ ++Certain setup tasks for capturing packets can not become part of a capturing ++module, they have to be embedded into the stack. For this purpose, several ++inline functions are provided. rtcap_mark_incoming() is used to save the packet ++dimension right before it is modifed by the stack. rtcap_report_incoming() ++calls the capturing handler, if present, in order to let it process the ++received rtskb (e.g. allocate compensation rtskb, mark original rtskb as ++shared, and enqueue it). ++ ++Outgoing rtskb have to be captured by adding a hook function to the chain of ++hard_start_xmit functions of a device. To measure the delay caused by RTmac ++between the request and the actual transmission, a time stamp can be taken using ++rtcap_mark_rtmac_enqueue(). This function is typically called by RTmac ++disciplines when they add a rtskb to their internal transmission queue. In such ++a case, the RTSKB_CAP_RTMAC_STAMP bit is set in cap_flags to indicate that the ++cap_rtmac_stamp field now contains valid data. ++ ++ ***/ ++ ++#ifndef CHECKSUM_PARTIAL ++#define CHECKSUM_PARTIAL CHECKSUM_HW ++#endif ++ ++#define RTSKB_CAP_SHARED 1 /* rtskb shared between stack and RTcap */ ++#define RTSKB_CAP_RTMAC_STAMP 2 /* cap_rtmac_stamp is valid */ ++ ++#define RTSKB_UNMAPPED 0 ++ ++struct rtskb_queue; ++struct rtsocket; ++struct rtnet_device; ++ ++/*** ++ * rtskb - realtime socket buffer ++ */ ++struct rtskb { ++ struct rtskb *next; /* used for queuing rtskbs */ ++ struct rtskb *chain_end; /* marks the end of a rtskb chain starting ++ with this very rtskb */ ++ ++ struct rtskb_pool *pool; /* owning pool */ ++ ++ unsigned int priority; /* bit 0..15: prio, 16..31: user-defined */ ++ ++ struct rtsocket *sk; /* assigned socket */ ++ struct rtnet_device *rtdev; /* source or destination device */ ++ ++ nanosecs_abs_t time_stamp; /* arrival or transmission (RTcap) time */ ++ ++ /* patch address of the transmission time stamp, can be NULL ++ * calculation: *xmit_stamp = cpu_to_be64(time_in_ns + *xmit_stamp) ++ */ ++ nanosecs_abs_t *xmit_stamp; ++ ++ /* transport layer */ ++ union { ++ struct tcphdr *th; ++ struct udphdr *uh; ++ struct icmphdr *icmph; ++ struct iphdr *ipihdr; ++ unsigned char *raw; ++ } h; ++ ++ /* network layer */ ++ union { ++ struct iphdr *iph; ++ struct arphdr *arph; ++ unsigned char *raw; ++ } nh; ++ ++ /* link layer */ ++ union { ++ struct ethhdr *ethernet; ++ unsigned char *raw; ++ } mac; ++ ++ unsigned short protocol; ++ unsigned char pkt_type; ++ ++ unsigned char ip_summed; ++ unsigned int csum; ++ ++ unsigned char *data; ++ unsigned char *tail; ++ unsigned char *end; ++ unsigned int len; ++ ++ dma_addr_t buf_dma_addr; ++ ++ unsigned char *buf_start; ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_CHECKED ++ unsigned char *buf_end; ++#endif ++ ++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_ADDON_RTCAP) ++ int cap_flags; /* see RTSKB_CAP_xxx */ ++ struct rtskb *cap_comp_skb; /* compensation rtskb */ ++ struct rtskb *cap_next; /* used for capture queue */ ++ unsigned char *cap_start; /* start offset for capturing */ ++ unsigned int cap_len; /* capture length of this rtskb */ ++ nanosecs_abs_t cap_rtmac_stamp; /* RTmac enqueuing time */ ++#endif ++ ++ struct list_head entry; /* for global rtskb list */ ++}; ++ ++struct rtskb_queue { ++ struct rtskb *first; ++ struct rtskb *last; ++ rtdm_lock_t lock; ++}; ++ ++struct rtskb_pool_lock_ops { ++ int (*trylock)(void *cookie); ++ void (*unlock)(void *cookie); ++}; ++ ++struct rtskb_pool { ++ struct rtskb_queue queue; ++ const struct rtskb_pool_lock_ops *lock_ops; ++ void *lock_cookie; ++}; ++ ++#define QUEUE_MAX_PRIO 0 ++#define QUEUE_MIN_PRIO 31 ++ ++struct rtskb_prio_queue { ++ rtdm_lock_t lock; ++ unsigned long usage; /* bit array encoding non-empty sub-queues */ ++ struct rtskb_queue queue[QUEUE_MIN_PRIO + 1]; ++}; ++ ++#define RTSKB_PRIO_MASK 0x0000FFFF /* bits 0..15: xmit prio */ ++#define RTSKB_CHANNEL_MASK 0xFFFF0000 /* bits 16..31: xmit channel */ ++#define RTSKB_CHANNEL_SHIFT 16 ++ ++#define RTSKB_DEF_RT_CHANNEL SOCK_DEF_RT_CHANNEL ++#define RTSKB_DEF_NRT_CHANNEL SOCK_DEF_NRT_CHANNEL ++#define RTSKB_USER_CHANNEL SOCK_USER_CHANNEL ++ ++/* Note: always keep SOCK_XMIT_PARAMS consistent with definitions above! */ ++#define RTSKB_PRIO_VALUE SOCK_XMIT_PARAMS ++ ++/* default values for the module parameter */ ++#define DEFAULT_GLOBAL_RTSKBS 0 /* default number of rtskb's in global pool */ ++#define DEFAULT_DEVICE_RTSKBS \ ++ 16 /* default additional rtskbs per network adapter */ ++#define DEFAULT_SOCKET_RTSKBS 16 /* default number of rtskb's in socket pools */ ++ ++#define ALIGN_RTSKB_STRUCT_LEN SKB_DATA_ALIGN(sizeof(struct rtskb)) ++#define RTSKB_SIZE 1544 /* maximum needed by pcnet32-rt */ ++ ++extern unsigned int rtskb_pools; /* current number of rtskb pools */ ++extern unsigned int rtskb_pools_max; /* maximum number of rtskb pools */ ++extern unsigned int rtskb_amount; /* current number of allocated rtskbs */ ++extern unsigned int rtskb_amount_max; /* maximum number of allocated rtskbs */ ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_CHECKED ++extern void rtskb_over_panic(struct rtskb *skb, int len, void *here); ++extern void rtskb_under_panic(struct rtskb *skb, int len, void *here); ++#endif ++ ++extern struct rtskb *rtskb_pool_dequeue(struct rtskb_pool *pool); ++ ++extern void rtskb_pool_queue_tail(struct rtskb_pool *pool, struct rtskb *skb); ++ ++extern struct rtskb *alloc_rtskb(unsigned int size, struct rtskb_pool *pool); ++ ++extern void kfree_rtskb(struct rtskb *skb); ++#define dev_kfree_rtskb(a) kfree_rtskb(a) ++ ++static inline void rtskb_tx_timestamp(struct rtskb *skb) ++{ ++ nanosecs_abs_t *ts = skb->xmit_stamp; ++ ++ if (!ts) ++ return; ++ ++ *ts = cpu_to_be64(rtdm_clock_read() + *ts); ++} ++ ++/*** ++ * rtskb_queue_init - initialize the queue ++ * @queue ++ */ ++static inline void rtskb_queue_init(struct rtskb_queue *queue) ++{ ++ rtdm_lock_init(&queue->lock); ++ queue->first = NULL; ++ queue->last = NULL; ++} ++ ++/*** ++ * rtskb_prio_queue_init - initialize the prioritized queue ++ * @prioqueue ++ */ ++static inline void rtskb_prio_queue_init(struct rtskb_prio_queue *prioqueue) ++{ ++ memset(prioqueue, 0, sizeof(struct rtskb_prio_queue)); ++ rtdm_lock_init(&prioqueue->lock); ++} ++ ++/*** ++ * rtskb_queue_empty ++ * @queue ++ */ ++static inline int rtskb_queue_empty(struct rtskb_queue *queue) ++{ ++ return (queue->first == NULL); ++} ++ ++/*** ++ * rtskb__prio_queue_empty ++ * @queue ++ */ ++static inline int rtskb_prio_queue_empty(struct rtskb_prio_queue *prioqueue) ++{ ++ return (prioqueue->usage == 0); ++} ++ ++/*** ++ * __rtskb_queue_head - insert a buffer at the queue head (w/o locks) ++ * @queue: queue to use ++ * @skb: buffer to queue ++ */ ++static inline void __rtskb_queue_head(struct rtskb_queue *queue, ++ struct rtskb *skb) ++{ ++ struct rtskb *chain_end = skb->chain_end; ++ ++ chain_end->next = queue->first; ++ ++ if (queue->first == NULL) ++ queue->last = chain_end; ++ queue->first = skb; ++} ++ ++/*** ++ * rtskb_queue_head - insert a buffer at the queue head (lock protected) ++ * @queue: queue to use ++ * @skb: buffer to queue ++ */ ++static inline void rtskb_queue_head(struct rtskb_queue *queue, ++ struct rtskb *skb) ++{ ++ rtdm_lockctx_t context; ++ ++ rtdm_lock_get_irqsave(&queue->lock, context); ++ __rtskb_queue_head(queue, skb); ++ rtdm_lock_put_irqrestore(&queue->lock, context); ++} ++ ++/*** ++ * __rtskb_prio_queue_head - insert a buffer at the prioritized queue head ++ * (w/o locks) ++ * @queue: queue to use ++ * @skb: buffer to queue ++ */ ++static inline void __rtskb_prio_queue_head(struct rtskb_prio_queue *prioqueue, ++ struct rtskb *skb) ++{ ++ unsigned int prio = skb->priority & RTSKB_PRIO_MASK; ++ ++ RTNET_ASSERT(prio <= 31, prio = 31;); ++ ++ __rtskb_queue_head(&prioqueue->queue[prio], skb); ++ __set_bit(prio, &prioqueue->usage); ++} ++ ++/*** ++ * rtskb_prio_queue_head - insert a buffer at the prioritized queue head ++ * (lock protected) ++ * @queue: queue to use ++ * @skb: buffer to queue ++ */ ++static inline void rtskb_prio_queue_head(struct rtskb_prio_queue *prioqueue, ++ struct rtskb *skb) ++{ ++ rtdm_lockctx_t context; ++ ++ rtdm_lock_get_irqsave(&prioqueue->lock, context); ++ __rtskb_prio_queue_head(prioqueue, skb); ++ rtdm_lock_put_irqrestore(&prioqueue->lock, context); ++} ++ ++/*** ++ * __rtskb_queue_tail - insert a buffer at the queue tail (w/o locks) ++ * @queue: queue to use ++ * @skb: buffer to queue ++ */ ++static inline void __rtskb_queue_tail(struct rtskb_queue *queue, ++ struct rtskb *skb) ++{ ++ struct rtskb *chain_end = skb->chain_end; ++ ++ chain_end->next = NULL; ++ ++ if (queue->first == NULL) ++ queue->first = skb; ++ else ++ queue->last->next = skb; ++ queue->last = chain_end; ++} ++ ++/*** ++ * rtskb_queue_tail - insert a buffer at the queue tail (lock protected) ++ * @queue: queue to use ++ * @skb: buffer to queue ++ */ ++static inline void rtskb_queue_tail(struct rtskb_queue *queue, ++ struct rtskb *skb) ++{ ++ rtdm_lockctx_t context; ++ ++ rtdm_lock_get_irqsave(&queue->lock, context); ++ __rtskb_queue_tail(queue, skb); ++ rtdm_lock_put_irqrestore(&queue->lock, context); ++} ++ ++/*** ++ * rtskb_queue_tail_check - variant of rtskb_queue_tail ++ * returning true on empty->non empty transition. ++ * @queue: queue to use ++ * @skb: buffer to queue ++ */ ++static inline bool rtskb_queue_tail_check(struct rtskb_queue *queue, ++ struct rtskb *skb) ++{ ++ rtdm_lockctx_t context; ++ bool ret; ++ ++ rtdm_lock_get_irqsave(&queue->lock, context); ++ ret = queue->first == NULL; ++ __rtskb_queue_tail(queue, skb); ++ rtdm_lock_put_irqrestore(&queue->lock, context); ++ ++ return ret; ++} ++ ++/*** ++ * __rtskb_prio_queue_tail - insert a buffer at the prioritized queue tail ++ * (w/o locks) ++ * @prioqueue: queue to use ++ * @skb: buffer to queue ++ */ ++static inline void __rtskb_prio_queue_tail(struct rtskb_prio_queue *prioqueue, ++ struct rtskb *skb) ++{ ++ unsigned int prio = skb->priority & RTSKB_PRIO_MASK; ++ ++ RTNET_ASSERT(prio <= 31, prio = 31;); ++ ++ __rtskb_queue_tail(&prioqueue->queue[prio], skb); ++ __set_bit(prio, &prioqueue->usage); ++} ++ ++/*** ++ * rtskb_prio_queue_tail - insert a buffer at the prioritized queue tail ++ * (lock protected) ++ * @prioqueue: queue to use ++ * @skb: buffer to queue ++ */ ++static inline void rtskb_prio_queue_tail(struct rtskb_prio_queue *prioqueue, ++ struct rtskb *skb) ++{ ++ rtdm_lockctx_t context; ++ ++ rtdm_lock_get_irqsave(&prioqueue->lock, context); ++ __rtskb_prio_queue_tail(prioqueue, skb); ++ rtdm_lock_put_irqrestore(&prioqueue->lock, context); ++} ++ ++/*** ++ * __rtskb_dequeue - remove from the head of the queue (w/o locks) ++ * @queue: queue to remove from ++ */ ++static inline struct rtskb *__rtskb_dequeue(struct rtskb_queue *queue) ++{ ++ struct rtskb *result; ++ ++ if ((result = queue->first) != NULL) { ++ queue->first = result->next; ++ result->next = NULL; ++ } ++ ++ return result; ++} ++ ++/*** ++ * rtskb_dequeue - remove from the head of the queue (lock protected) ++ * @queue: queue to remove from ++ */ ++static inline struct rtskb *rtskb_dequeue(struct rtskb_queue *queue) ++{ ++ rtdm_lockctx_t context; ++ struct rtskb *result; ++ ++ rtdm_lock_get_irqsave(&queue->lock, context); ++ result = __rtskb_dequeue(queue); ++ rtdm_lock_put_irqrestore(&queue->lock, context); ++ ++ return result; ++} ++ ++/*** ++ * __rtskb_prio_dequeue - remove from the head of the prioritized queue ++ * (w/o locks) ++ * @prioqueue: queue to remove from ++ */ ++static inline struct rtskb * ++__rtskb_prio_dequeue(struct rtskb_prio_queue *prioqueue) ++{ ++ int prio; ++ struct rtskb *result = NULL; ++ struct rtskb_queue *sub_queue; ++ ++ if (prioqueue->usage) { ++ prio = ffz(~prioqueue->usage); ++ sub_queue = &prioqueue->queue[prio]; ++ result = __rtskb_dequeue(sub_queue); ++ if (rtskb_queue_empty(sub_queue)) ++ __change_bit(prio, &prioqueue->usage); ++ } ++ ++ return result; ++} ++ ++/*** ++ * rtskb_prio_dequeue - remove from the head of the prioritized queue ++ * (lock protected) ++ * @prioqueue: queue to remove from ++ */ ++static inline struct rtskb * ++rtskb_prio_dequeue(struct rtskb_prio_queue *prioqueue) ++{ ++ rtdm_lockctx_t context; ++ struct rtskb *result; ++ ++ rtdm_lock_get_irqsave(&prioqueue->lock, context); ++ result = __rtskb_prio_dequeue(prioqueue); ++ rtdm_lock_put_irqrestore(&prioqueue->lock, context); ++ ++ return result; ++} ++ ++/*** ++ * __rtskb_dequeue_chain - remove a chain from the head of the queue ++ * (w/o locks) ++ * @queue: queue to remove from ++ */ ++static inline struct rtskb *__rtskb_dequeue_chain(struct rtskb_queue *queue) ++{ ++ struct rtskb *result; ++ struct rtskb *chain_end; ++ ++ if ((result = queue->first) != NULL) { ++ chain_end = result->chain_end; ++ queue->first = chain_end->next; ++ chain_end->next = NULL; ++ } ++ ++ return result; ++} ++ ++/*** ++ * rtskb_dequeue_chain - remove a chain from the head of the queue ++ * (lock protected) ++ * @queue: queue to remove from ++ */ ++static inline struct rtskb *rtskb_dequeue_chain(struct rtskb_queue *queue) ++{ ++ rtdm_lockctx_t context; ++ struct rtskb *result; ++ ++ rtdm_lock_get_irqsave(&queue->lock, context); ++ result = __rtskb_dequeue_chain(queue); ++ rtdm_lock_put_irqrestore(&queue->lock, context); ++ ++ return result; ++} ++ ++/*** ++ * rtskb_prio_dequeue_chain - remove a chain from the head of the ++ * prioritized queue ++ * @prioqueue: queue to remove from ++ */ ++static inline struct rtskb * ++rtskb_prio_dequeue_chain(struct rtskb_prio_queue *prioqueue) ++{ ++ rtdm_lockctx_t context; ++ int prio; ++ struct rtskb *result = NULL; ++ struct rtskb_queue *sub_queue; ++ ++ rtdm_lock_get_irqsave(&prioqueue->lock, context); ++ if (prioqueue->usage) { ++ prio = ffz(~prioqueue->usage); ++ sub_queue = &prioqueue->queue[prio]; ++ result = __rtskb_dequeue_chain(sub_queue); ++ if (rtskb_queue_empty(sub_queue)) ++ __change_bit(prio, &prioqueue->usage); ++ } ++ rtdm_lock_put_irqrestore(&prioqueue->lock, context); ++ ++ return result; ++} ++ ++/*** ++ * rtskb_queue_purge - clean the queue ++ * @queue ++ */ ++static inline void rtskb_queue_purge(struct rtskb_queue *queue) ++{ ++ struct rtskb *skb; ++ while ((skb = rtskb_dequeue(queue)) != NULL) ++ kfree_rtskb(skb); ++} ++ ++static inline int rtskb_headlen(const struct rtskb *skb) ++{ ++ return skb->len; ++} ++ ++static inline void rtskb_reserve(struct rtskb *skb, unsigned int len) ++{ ++ skb->data += len; ++ skb->tail += len; ++} ++ ++static inline unsigned char *__rtskb_put(struct rtskb *skb, unsigned int len) ++{ ++ unsigned char *tmp = skb->tail; ++ ++ skb->tail += len; ++ skb->len += len; ++ return tmp; ++} ++ ++#define rtskb_put(skb, length) \ ++ ({ \ ++ struct rtskb *__rtskb = (skb); \ ++ unsigned int __len = (length); \ ++ unsigned char *tmp = __rtskb->tail; \ ++ \ ++ __rtskb->tail += __len; \ ++ __rtskb->len += __len; \ ++ \ ++ RTNET_ASSERT(__rtskb->tail <= __rtskb->buf_end, \ ++ rtskb_over_panic(__rtskb, __len, \ ++ current_text_addr());); \ ++ \ ++ tmp; \ ++ }) ++ ++static inline unsigned char *__rtskb_push(struct rtskb *skb, unsigned int len) ++{ ++ skb->data -= len; ++ skb->len += len; ++ return skb->data; ++} ++ ++#define rtskb_push(skb, length) \ ++ ({ \ ++ struct rtskb *__rtskb = (skb); \ ++ unsigned int __len = (length); \ ++ \ ++ __rtskb->data -= __len; \ ++ __rtskb->len += __len; \ ++ \ ++ RTNET_ASSERT(__rtskb->data >= __rtskb->buf_start, \ ++ rtskb_under_panic(__rtskb, __len, \ ++ current_text_addr());); \ ++ \ ++ __rtskb->data; \ ++ }) ++ ++static inline unsigned char *__rtskb_pull(struct rtskb *skb, unsigned int len) ++{ ++ RTNET_ASSERT(len <= skb->len, return NULL;); ++ ++ skb->len -= len; ++ ++ return skb->data += len; ++} ++ ++static inline unsigned char *rtskb_pull(struct rtskb *skb, unsigned int len) ++{ ++ if (len > skb->len) ++ return NULL; ++ ++ skb->len -= len; ++ ++ return skb->data += len; ++} ++ ++static inline void rtskb_trim(struct rtskb *skb, unsigned int len) ++{ ++ if (skb->len > len) { ++ skb->len = len; ++ skb->tail = skb->data + len; ++ } ++} ++ ++static inline struct rtskb *rtskb_padto(struct rtskb *rtskb, unsigned int len) ++{ ++ RTNET_ASSERT(len <= (unsigned int)(rtskb->buf_end + 1 - rtskb->data), ++ return NULL;); ++ ++ memset(rtskb->data + rtskb->len, 0, len - rtskb->len); ++ ++ return rtskb; ++} ++ ++static inline dma_addr_t rtskb_data_dma_addr(struct rtskb *rtskb, ++ unsigned int offset) ++{ ++ return rtskb->buf_dma_addr + rtskb->data - rtskb->buf_start + offset; ++} ++ ++extern struct rtskb_pool global_pool; ++ ++extern unsigned int rtskb_pool_init(struct rtskb_pool *pool, ++ unsigned int initial_size, ++ const struct rtskb_pool_lock_ops *lock_ops, ++ void *lock_cookie); ++ ++extern unsigned int __rtskb_module_pool_init(struct rtskb_pool *pool, ++ unsigned int initial_size, ++ struct module *module); ++ ++#define rtskb_module_pool_init(pool, size) \ ++ __rtskb_module_pool_init(pool, size, THIS_MODULE) ++ ++extern void rtskb_pool_release(struct rtskb_pool *pool); ++ ++extern unsigned int rtskb_pool_extend(struct rtskb_pool *pool, ++ unsigned int add_rtskbs); ++extern unsigned int rtskb_pool_shrink(struct rtskb_pool *pool, ++ unsigned int rem_rtskbs); ++extern int rtskb_acquire(struct rtskb *rtskb, struct rtskb_pool *comp_pool); ++extern struct rtskb *rtskb_clone(struct rtskb *rtskb, struct rtskb_pool *pool); ++ ++extern int rtskb_pools_init(void); ++extern void rtskb_pools_release(void); ++ ++extern unsigned int rtskb_copy_and_csum_bits(const struct rtskb *skb, ++ int offset, u8 *to, int len, ++ unsigned int csum); ++extern void rtskb_copy_and_csum_dev(const struct rtskb *skb, u8 *to); ++ ++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_ADDON_RTCAP) ++ ++extern rtdm_lock_t rtcap_lock; ++extern void (*rtcap_handler)(struct rtskb *skb); ++ ++static inline void rtcap_mark_incoming(struct rtskb *skb) ++{ ++ skb->cap_start = skb->data; ++ skb->cap_len = skb->len; ++} ++ ++static inline void rtcap_report_incoming(struct rtskb *skb) ++{ ++ rtdm_lockctx_t context; ++ ++ rtdm_lock_get_irqsave(&rtcap_lock, context); ++ if (rtcap_handler != NULL) ++ rtcap_handler(skb); ++ ++ rtdm_lock_put_irqrestore(&rtcap_lock, context); ++} ++ ++static inline void rtcap_mark_rtmac_enqueue(struct rtskb *skb) ++{ ++ /* rtskb start and length are probably not valid yet */ ++ skb->cap_flags |= RTSKB_CAP_RTMAC_STAMP; ++ skb->cap_rtmac_stamp = rtdm_clock_read(); ++} ++ ++#else /* ifndef CONFIG_XENO_DRIVERS_NET_ADDON_RTCAP */ ++ ++#define rtcap_mark_incoming(skb) ++#define rtcap_report_incoming(skb) ++#define rtcap_mark_rtmac_enqueue(skb) ++ ++#endif /* CONFIG_XENO_DRIVERS_NET_ADDON_RTCAP */ ++ ++#endif /* __KERNEL__ */ ++ ++#endif /* __RTSKB_H_ */ +--- linux/drivers/xenomai/net/stack/include/rtnet_iovec.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/include/rtnet_iovec.h 2021-04-07 16:01:26.867634681 +0800 +@@ -0,0 +1,38 @@ ++/* rtnet_iovec.h ++ * ++ * RTnet - real-time networking subsystem ++ * Copyright (C) 1999,2000 Zentropic Computing, LLC ++ * 2002 Ulrich Marx ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ */ ++#ifndef __RTNET_IOVEC_H_ ++#define __RTNET_IOVEC_H_ ++ ++#ifdef __KERNEL__ ++ ++#include ++ ++struct user_msghdr; ++struct rtdm_fd; ++ ++ssize_t rtnet_write_to_iov(struct rtdm_fd *fd, struct iovec *iov, int iovlen, ++ const void *data, size_t len); ++ ++ssize_t rtnet_read_from_iov(struct rtdm_fd *fd, struct iovec *iov, int iovlen, ++ void *data, size_t len); ++#endif /* __KERNEL__ */ ++ ++#endif /* __RTNET_IOVEC_H_ */ +--- linux/drivers/xenomai/net/stack/include/rtdev.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/include/rtdev.h 2021-04-07 16:01:26.863634686 +0800 +@@ -0,0 +1,275 @@ ++/*** ++ * ++ * rtdev.h ++ * ++ * RTnet - real-time networking subsystem ++ * Copyright (C) 1999 Lineo, Inc ++ * 1999, 2002 David A. Schleef ++ * 2003-2005 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#ifndef __RTDEV_H_ ++#define __RTDEV_H_ ++ ++#define MAX_RT_DEVICES 8 ++ ++#ifdef __KERNEL__ ++ ++#include ++#include ++ ++#include ++#include ++ ++#define RTDEV_VERS_2_0 0x0200 ++ ++#define PRIV_FLAG_UP 0 ++#define PRIV_FLAG_ADDING_ROUTE 1 ++ ++#ifndef NETIF_F_LLTX ++#define NETIF_F_LLTX 4096 ++#endif ++ ++#define RTDEV_TX_OK 0 ++#define RTDEV_TX_BUSY 1 ++ ++enum rtnet_link_state { ++ __RTNET_LINK_STATE_XOFF = 0, ++ __RTNET_LINK_STATE_START, ++ __RTNET_LINK_STATE_PRESENT, ++ __RTNET_LINK_STATE_NOCARRIER, ++}; ++#define RTNET_LINK_STATE_XOFF (1 << __RTNET_LINK_STATE_XOFF) ++#define RTNET_LINK_STATE_START (1 << __RTNET_LINK_STATE_START) ++#define RTNET_LINK_STATE_PRESENT (1 << __RTNET_LINK_STATE_PRESENT) ++#define RTNET_LINK_STATE_NOCARRIER (1 << __RTNET_LINK_STATE_NOCARRIER) ++ ++/*** ++ * rtnet_device ++ */ ++struct rtnet_device { ++ /* Many field are borrowed from struct net_device in ++ * - WY ++ */ ++ unsigned int vers; ++ ++ char name[IFNAMSIZ]; ++ struct device *sysbind; /* device bound in sysfs (optional) */ ++ ++ unsigned long rmem_end; /* shmem "recv" end */ ++ unsigned long rmem_start; /* shmem "recv" start */ ++ unsigned long mem_end; /* shared mem end */ ++ unsigned long mem_start; /* shared mem start */ ++ unsigned long base_addr; /* device I/O address */ ++ unsigned int irq; /* device IRQ number */ ++ ++ /* ++ * Some hardware also needs these fields, but they are not ++ * part of the usual set specified in Space.c. ++ */ ++ unsigned char if_port; /* Selectable AUI, TP,..*/ ++ unsigned char dma; /* DMA channel */ ++ __u16 __padding; ++ ++ unsigned long link_state; ++ int ifindex; ++ atomic_t refcount; ++ ++ struct device *sysdev; /* node in driver model for sysfs */ ++ struct module *rt_owner; /* like classic owner, but * ++ * forces correct macro usage */ ++ ++ unsigned int flags; /* interface flags (a la BSD) */ ++ unsigned long priv_flags; /* internal flags */ ++ unsigned short type; /* interface hardware type */ ++ unsigned short hard_header_len; /* hardware hdr length */ ++ unsigned int mtu; /* eth = 1536, tr = 4... */ ++ void *priv; /* pointer to private data */ ++ netdev_features_t features; /* [RT]NETIF_F_* */ ++ ++ /* Interface address info. */ ++ unsigned char broadcast[MAX_ADDR_LEN]; /* hw bcast add */ ++ unsigned char dev_addr[MAX_ADDR_LEN]; /* hw address */ ++ unsigned char addr_len; /* hardware address length */ ++ ++ int promiscuity; ++ int allmulti; ++ ++ __u32 local_ip; /* IP address in network order */ ++ __u32 broadcast_ip; /* broadcast IP in network order */ ++ ++ rtdm_event_t *stack_event; ++ ++ rtdm_mutex_t xmit_mutex; /* protects xmit routine */ ++ rtdm_lock_t rtdev_lock; /* management lock */ ++ struct mutex nrt_lock; /* non-real-time locking */ ++ ++ unsigned int add_rtskbs; /* additionally allocated global rtskbs */ ++ ++ struct rtskb_pool dev_pool; ++ ++ /* RTmac related fields */ ++ struct rtmac_disc *mac_disc; ++ struct rtmac_priv *mac_priv; ++ int (*mac_detach)(struct rtnet_device *rtdev); ++ ++ /* Device operations */ ++ int (*open)(struct rtnet_device *rtdev); ++ int (*stop)(struct rtnet_device *rtdev); ++ int (*hard_header)(struct rtskb *, struct rtnet_device *, ++ unsigned short type, void *daddr, void *saddr, ++ unsigned int len); ++ int (*rebuild_header)(struct rtskb *); ++ int (*hard_start_xmit)(struct rtskb *skb, struct rtnet_device *dev); ++ int (*hw_reset)(struct rtnet_device *rtdev); ++ ++ /* Transmission hook, managed by the stack core, RTcap, and RTmac ++ * ++ * If xmit_lock is used, start_xmit points either to rtdev_locked_xmit or ++ * the RTmac discipline handler. If xmit_lock is not required, start_xmit ++ * points to hard_start_xmit or the discipline handler. ++ */ ++ int (*start_xmit)(struct rtskb *skb, struct rtnet_device *dev); ++ ++ /* MTU hook, managed by the stack core and RTmac */ ++ unsigned int (*get_mtu)(struct rtnet_device *rtdev, ++ unsigned int priority); ++ ++ int (*do_ioctl)(struct rtnet_device *rtdev, struct ifreq *ifr, int cmd); ++ struct net_device_stats *(*get_stats)(struct rtnet_device *rtdev); ++ ++ /* DMA pre-mapping hooks */ ++ dma_addr_t (*map_rtskb)(struct rtnet_device *rtdev, struct rtskb *skb); ++ void (*unmap_rtskb)(struct rtnet_device *rtdev, struct rtskb *skb); ++}; ++ ++struct rtnet_core_cmd; ++ ++struct rtdev_event_hook { ++ struct list_head entry; ++ void (*register_device)(struct rtnet_device *rtdev); ++ void (*unregister_device)(struct rtnet_device *rtdev); ++ void (*ifup)(struct rtnet_device *rtdev, struct rtnet_core_cmd *up_cmd); ++ void (*ifdown)(struct rtnet_device *rtdev); ++}; ++ ++extern struct list_head event_hook_list; ++extern struct mutex rtnet_devices_nrt_lock; ++extern struct rtnet_device *rtnet_devices[]; ++ ++int __rt_init_etherdev(struct rtnet_device *rtdev, unsigned int dev_pool_size, ++ struct module *module); ++ ++#define rt_init_etherdev(__rtdev, __dev_pool_size) \ ++ __rt_init_etherdev(__rtdev, __dev_pool_size, THIS_MODULE) ++ ++struct rtnet_device *__rt_alloc_etherdev(unsigned sizeof_priv, ++ unsigned dev_pool_size, ++ struct module *module); ++#define rt_alloc_etherdev(priv_size, rx_size) \ ++ __rt_alloc_etherdev(priv_size, rx_size, THIS_MODULE) ++ ++void rtdev_destroy(struct rtnet_device *rtdev); ++ ++void rtdev_free(struct rtnet_device *rtdev); ++ ++int rt_register_rtnetdev(struct rtnet_device *rtdev); ++int rt_unregister_rtnetdev(struct rtnet_device *rtdev); ++ ++void rtdev_add_event_hook(struct rtdev_event_hook *hook); ++void rtdev_del_event_hook(struct rtdev_event_hook *hook); ++ ++void rtdev_alloc_name(struct rtnet_device *rtdev, const char *name_mask); ++ ++/** ++ * __rtdev_get_by_index - find a rtnet_device by its ifindex ++ * @ifindex: index of device ++ * @note: caller must hold rtnet_devices_nrt_lock ++ */ ++static inline struct rtnet_device *__rtdev_get_by_index(int ifindex) ++{ ++ return rtnet_devices[ifindex - 1]; ++} ++ ++struct rtnet_device *rtdev_get_by_name(const char *if_name); ++struct rtnet_device *rtdev_get_by_index(int ifindex); ++struct rtnet_device *rtdev_get_by_hwaddr(unsigned short type, char *ha); ++struct rtnet_device *rtdev_get_loopback(void); ++ ++int rtdev_reference(struct rtnet_device *rtdev); ++ ++static inline void rtdev_dereference(struct rtnet_device *rtdev) ++{ ++ smp_mb__before_atomic(); ++ if (rtdev->rt_owner && atomic_dec_and_test(&rtdev->refcount)) ++ module_put(rtdev->rt_owner); ++} ++ ++int rtdev_xmit(struct rtskb *skb); ++ ++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_ADDON_PROXY) ++int rtdev_xmit_proxy(struct rtskb *skb); ++#endif ++ ++unsigned int rt_hard_mtu(struct rtnet_device *rtdev, unsigned int priority); ++ ++int rtdev_open(struct rtnet_device *rtdev); ++int rtdev_close(struct rtnet_device *rtdev); ++ ++int rtdev_up(struct rtnet_device *rtdev, struct rtnet_core_cmd *cmd); ++int rtdev_down(struct rtnet_device *rtdev); ++ ++int rtdev_map_rtskb(struct rtskb *skb); ++void rtdev_unmap_rtskb(struct rtskb *skb); ++ ++struct rtskb *rtnetdev_alloc_rtskb(struct rtnet_device *dev, unsigned int size); ++ ++#define rtnetdev_priv(dev) ((dev)->priv) ++ ++#define rtdev_emerg(__dev, format, args...) \ ++ pr_emerg("%s: " format, (__dev)->name, ##args) ++#define rtdev_alert(__dev, format, args...) \ ++ pr_alert("%s: " format, (__dev)->name, ##args) ++#define rtdev_crit(__dev, format, args...) \ ++ pr_crit("%s: " format, (__dev)->name, ##args) ++#define rtdev_err(__dev, format, args...) \ ++ pr_err("%s: " format, (__dev)->name, ##args) ++#define rtdev_warn(__dev, format, args...) \ ++ pr_warn("%s: " format, (__dev)->name, ##args) ++#define rtdev_notice(__dev, format, args...) \ ++ pr_notice("%s: " format, (__dev)->name, ##args) ++#define rtdev_info(__dev, format, args...) \ ++ pr_info("%s: " format, (__dev)->name, ##args) ++#define rtdev_dbg(__dev, format, args...) \ ++ pr_debug("%s: " format, (__dev)->name, ##args) ++ ++#ifdef VERBOSE_DEBUG ++#define rtdev_vdbg rtdev_dbg ++#else ++#define rtdev_vdbg(__dev, format, args...) \ ++ ({ \ ++ if (0) \ ++ pr_debug("%s: " format, (__dev)->name, ##args); \ ++ \ ++ 0; \ ++ }) ++#endif ++ ++#endif /* __KERNEL__ */ ++ ++#endif /* __RTDEV_H_ */ +--- linux/drivers/xenomai/net/stack/include/tdma_chrdev.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/include/tdma_chrdev.h 2021-04-07 16:01:26.858634693 +0800 +@@ -0,0 +1,81 @@ ++/*** ++ * ++ * include/tdma_chrdev.h ++ * ++ * RTmac - real-time networking media access control subsystem ++ * Copyright (C) 2002 Marc Kleine-Budde , ++ * 2003-2005 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#ifndef __TDMA_CHRDEV_H_ ++#define __TDMA_CHRDEV_H_ ++ ++#ifndef __KERNEL__ ++#include ++#endif ++ ++#include ++ ++#define MIN_SLOT_SIZE 60 ++ ++struct tdma_config { ++ struct rtnet_ioctl_head head; ++ ++ union { ++ struct { ++ __u64 cycle_period; ++ __u64 backup_sync_offset; ++ __u32 cal_rounds; ++ __u32 max_cal_requests; ++ __u32 max_slot_id; ++ } master; ++ ++ struct { ++ __u32 cal_rounds; ++ __u32 max_slot_id; ++ } slave; ++ ++ struct { ++ __s32 id; ++ __u32 period; ++ __u64 offset; ++ __u32 phasing; ++ __u32 size; ++ __s32 joint_slot; ++ __u32 cal_timeout; ++ __u64 *cal_results; ++ } set_slot; ++ ++ struct { ++ __s32 id; ++ } remove_slot; ++ ++ __u64 __padding[8]; ++ } args; ++}; ++ ++#define TDMA_IOC_MASTER _IOW(RTNET_IOC_TYPE_RTMAC_TDMA, 0, struct tdma_config) ++#define TDMA_IOC_SLAVE _IOW(RTNET_IOC_TYPE_RTMAC_TDMA, 1, struct tdma_config) ++#define TDMA_IOC_CAL_RESULT_SIZE \ ++ _IOW(RTNET_IOC_TYPE_RTMAC_TDMA, 2, struct tdma_config) ++#define TDMA_IOC_SET_SLOT _IOW(RTNET_IOC_TYPE_RTMAC_TDMA, 3, struct tdma_config) ++#define TDMA_IOC_REMOVE_SLOT \ ++ _IOW(RTNET_IOC_TYPE_RTMAC_TDMA, 4, struct tdma_config) ++#define TDMA_IOC_DETACH _IOW(RTNET_IOC_TYPE_RTMAC_TDMA, 5, struct tdma_config) ++ ++#endif /* __TDMA_CHRDEV_H_ */ +--- linux/drivers/xenomai/net/stack/include/ipv4/ip_output.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/include/ipv4/ip_output.h 2021-04-07 16:01:26.853634701 +0800 +@@ -0,0 +1,42 @@ ++/*** ++ * ++ * include/ipv4/ip_output.h - prepare outgoing IP packets ++ * ++ * RTnet - real-time networking subsystem ++ * Copyright (C) 1999,2000 Zentropic Computing, LLC ++ * 2002 Ulrich Marx ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#ifndef __RTNET_IP_OUTPUT_H_ ++#define __RTNET_IP_OUTPUT_H_ ++ ++#include ++ ++#include ++#include ++ ++extern int rt_ip_build_xmit(struct rtsocket *sk, ++ int getfrag(const void *, unsigned char *, ++ unsigned int, unsigned int), ++ const void *frag, unsigned length, ++ struct dest_route *rt, int flags); ++ ++extern void __init rt_ip_init(void); ++extern void rt_ip_release(void); ++ ++#endif /* __RTNET_IP_OUTPUT_H_ */ +--- linux/drivers/xenomai/net/stack/include/ipv4/route.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/include/ipv4/route.h 2021-04-07 16:01:26.849634706 +0800 +@@ -0,0 +1,60 @@ ++/*** ++ * ++ * include/ipv4/route.h - real-time routing ++ * ++ * Copyright (C) 2004, 2005 Jan Kiszka ++ * ++ * Rewritten version of the original route by David Schleef and Ulrich Marx ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#ifndef __RTNET_ROUTE_H_ ++#define __RTNET_ROUTE_H_ ++ ++#include ++#include ++ ++#include ++ ++struct dest_route { ++ u32 ip; ++ unsigned char dev_addr[MAX_ADDR_LEN]; ++ struct rtnet_device *rtdev; ++}; ++ ++int rt_ip_route_add_host(u32 addr, unsigned char *dev_addr, ++ struct rtnet_device *rtdev); ++void rt_ip_route_del_all(struct rtnet_device *rtdev); ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_RTIPV4_NETROUTING ++int rt_ip_route_add_net(u32 addr, u32 mask, u32 gw_addr); ++int rt_ip_route_del_net(u32 addr, u32 mask); ++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4_NETROUTING */ ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_RTIPV4_ROUTER ++int rt_ip_route_forward(struct rtskb *rtskb, u32 daddr); ++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4_ROUTER */ ++ ++int rt_ip_route_del_host(u32 addr, struct rtnet_device *rtdev); ++int rt_ip_route_get_host(u32 addr, char *if_name, unsigned char *dev_addr, ++ struct rtnet_device *rtdev); ++int rt_ip_route_output(struct dest_route *rt_buf, u32 daddr, u32 saddr); ++ ++int __init rt_ip_routing_init(void); ++void rt_ip_routing_release(void); ++ ++#endif /* __RTNET_ROUTE_H_ */ +--- linux/drivers/xenomai/net/stack/include/ipv4/ip_sock.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/include/ipv4/ip_sock.h 2021-04-07 16:01:26.844634713 +0800 +@@ -0,0 +1,31 @@ ++/*** ++ * ++ * include/ipv4/ip_sock.h ++ * ++ * RTnet - real-time networking subsystem ++ * Copyright (C) 2003-2005 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#ifndef __RTNET_IP_SOCK_H_ ++#define __RTNET_IP_SOCK_H_ ++ ++#include ++ ++extern int rt_ip_ioctl(struct rtdm_fd *fd, int request, void *arg); ++ ++#endif /* __RTNET_IP_SOCK_H_ */ +--- linux/drivers/xenomai/net/stack/include/ipv4/af_inet.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/include/ipv4/af_inet.h 2021-04-07 16:01:26.839634720 +0800 +@@ -0,0 +1,35 @@ ++/*** ++ * ++ * include/ipv4/af_inet.h ++ * ++ * RTnet - real-time networking subsystem ++ * Copyright (C) 1999, 2000 Zentropic Computing, LLC ++ * 2002 Ulrich Marx ++ * 2004, 2005 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#ifndef __RTNET_AF_INET_H_ ++#define __RTNET_AF_INET_H_ ++ ++#include ++ ++#ifdef CONFIG_XENO_OPT_VFILE ++extern struct xnvfile_directory ipv4_proc_root; ++#endif ++ ++#endif /* __RTNET_AF_INET_H_ */ +--- linux/drivers/xenomai/net/stack/include/ipv4/protocol.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/include/ipv4/protocol.h 2021-04-07 16:01:26.835634726 +0800 +@@ -0,0 +1,54 @@ ++/*** ++ * ++ * include/ipv4/protocol.h ++ * ++ * RTnet - real-time networking subsystem ++ * Copyright (C) 1999, 2000 Zentropic Computing, LLC ++ * 2002 Ulrich Marx ++ * 2004, 2005 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#ifndef __RTNET_PROTOCOL_H_ ++#define __RTNET_PROTOCOL_H_ ++ ++#include ++#include ++ ++#define MAX_RT_INET_PROTOCOLS 32 ++ ++/*** ++ * transport layer protocol ++ */ ++struct rtinet_protocol { ++ char *name; ++ unsigned short protocol; ++ ++ struct rtsocket *(*dest_socket)(struct rtskb *); ++ void (*rcv_handler)(struct rtskb *); ++ void (*err_handler)(struct rtskb *); ++ int (*init_socket)(struct rtdm_fd *); ++}; ++ ++extern struct rtinet_protocol *rt_inet_protocols[]; ++ ++#define rt_inet_hashkey(id) (id & (MAX_RT_INET_PROTOCOLS - 1)) ++extern void rt_inet_add_protocol(struct rtinet_protocol *prot); ++extern void rt_inet_del_protocol(struct rtinet_protocol *prot); ++extern int rt_inet_socket(struct rtdm_fd *fd, int protocol); ++ ++#endif /* __RTNET_PROTOCOL_H_ */ +--- linux/drivers/xenomai/net/stack/include/ipv4/udp.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/include/ipv4/udp.h 2021-04-07 16:01:26.830634733 +0800 +@@ -0,0 +1,33 @@ ++/*** ++ * ++ * include/ipv4/udp.h ++ * ++ * RTnet - real-time networking subsystem ++ * Copyright (C) 1999, 2000 Zentropic Computing, LLC ++ * 2002 Ulrich Marx ++ * 2004, 2005 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#ifndef __RTNET_UDP_H_ ++#define __RTNET_UDP_H_ ++ ++/* Maximum number of active udp sockets ++ Only increase with care (look-up delays!), must be power of 2 */ ++#define RT_UDP_SOCKETS 64 ++ ++#endif /* __RTNET_UDP_H_ */ +--- linux/drivers/xenomai/net/stack/include/ipv4/arp.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/include/ipv4/arp.h 2021-04-07 16:01:26.825634741 +0800 +@@ -0,0 +1,51 @@ ++/*** ++ * ++ * include/ipv4/arp.h - Adress Resolution Protocol for RTnet ++ * ++ * RTnet - real-time networking subsystem ++ * Copyright (C) 1999,2000 Zentropic Computing, LLC ++ * 2002 Ulrich Marx ++ * 2004 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#ifndef __RTNET_ARP_H_ ++#define __RTNET_ARP_H_ ++ ++#include ++#include ++#include ++ ++#include ++ ++#define RT_ARP_SKB_PRIO \ ++ RTSKB_PRIO_VALUE(QUEUE_MIN_PRIO - 1, RTSKB_DEF_NRT_CHANNEL) ++ ++void rt_arp_send(int type, int ptype, u32 dest_ip, struct rtnet_device *rtdev, ++ u32 src_ip, unsigned char *dest_hw, unsigned char *src_hw, ++ unsigned char *target_hw); ++ ++static inline void rt_arp_solicit(struct rtnet_device *rtdev, u32 target) ++{ ++ rt_arp_send(ARPOP_REQUEST, ETH_P_ARP, target, rtdev, rtdev->local_ip, ++ NULL, NULL, NULL); ++} ++ ++void __init rt_arp_init(void); ++void rt_arp_release(void); ++ ++#endif /* __RTNET_ARP_H_ */ +--- linux/drivers/xenomai/net/stack/include/ipv4/icmp.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/include/ipv4/icmp.h 2021-04-07 16:01:26.820634748 +0800 +@@ -0,0 +1,56 @@ ++/*** ++ * ++ * ipv4/icmp.h ++ * ++ * RTnet - real-time networking subsystem ++ * Copyright (C) 1999, 2000 Zentropic Computing, LLC ++ * 2002 Ulrich Marx ++ * 2004, 2005 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#ifndef __RTNET_ICMP_H_ ++#define __RTNET_ICMP_H_ ++ ++#include ++ ++#include ++#include ++#include ++ ++#define RT_ICMP_PRIO RTSKB_PRIO_VALUE(QUEUE_MIN_PRIO - 1, RTSKB_DEF_NRT_CHANNEL) ++ ++#define ICMP_REPLY_POOL_SIZE 8 ++ ++void rt_icmp_queue_echo_request(struct rt_proc_call *call); ++void rt_icmp_dequeue_echo_request(struct rt_proc_call *call); ++void rt_icmp_cleanup_echo_requests(void); ++int rt_icmp_send_echo(u32 daddr, u16 id, u16 sequence, size_t msg_size); ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_RTIPV4_ICMP ++void __init rt_icmp_init(void); ++void rt_icmp_release(void); ++#else /* !CONFIG_XENO_DRIVERS_NET_RTIPV4_ICMP */ ++#define rt_icmp_init() \ ++ do { \ ++ } while (0) ++#define rt_icmp_release() \ ++ do { \ ++ } while (0) ++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4_ICMP */ ++ ++#endif /* __RTNET_ICMP_H_ */ +--- linux/drivers/xenomai/net/stack/include/ipv4/ip_fragment.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/include/ipv4/ip_fragment.h 2021-04-07 16:01:26.816634753 +0800 +@@ -0,0 +1,37 @@ ++/* ipv4/ip_fragment.h ++ * ++ * RTnet - real-time networking subsystem ++ * Copyright (C) 1999,2000 Zentropic Computing, LLC ++ * 2002 Ulrich Marx ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ */ ++#ifndef __RTNET_IP_FRAGMENT_H_ ++#define __RTNET_IP_FRAGMENT_H_ ++ ++#include ++ ++#include ++#include ++ ++extern struct rtskb *rt_ip_defrag(struct rtskb *skb, ++ struct rtinet_protocol *ipprot); ++ ++extern void rt_ip_frag_invalidate_socket(struct rtsocket *sock); ++ ++extern int __init rt_ip_fragment_init(void); ++extern void rt_ip_fragment_cleanup(void); ++ ++#endif /* __RTNET_IP_FRAGMENT_H_ */ +--- linux/drivers/xenomai/net/stack/include/ipv4/ip_input.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/include/ipv4/ip_input.h 2021-04-07 16:01:26.811634761 +0800 +@@ -0,0 +1,45 @@ ++/* ipv4/ip_input.h ++ * ++ * RTnet - real-time networking subsystem ++ * Copyright (C) 1999,2000 Zentropic Computing, LLC ++ * 2002 Ulrich Marx ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ */ ++#ifndef __RTNET_IP_INPUT_H_ ++#define __RTNET_IP_INPUT_H_ ++ ++#include ++#include ++ ++extern int rt_ip_rcv(struct rtskb *skb, struct rtpacket_type *pt); ++ ++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_ADDON_PROXY) ++typedef void (*rt_ip_fallback_handler_t)(struct rtskb *skb); ++ ++/* ++ * This hook can be used to register a fallback handler for incoming ++ * IP packets. Typically this is done to move over to the standard Linux ++ * IP protocol (e.g. for handling TCP). ++ * Manipulating the fallback handler is expected to happen only when the ++ * RTnetinterfaces are shut down (avoiding race conditions). ++ * ++ * Note that merging RT and non-RT traffic this way most likely breaks hard ++ * real-time constraints! ++ */ ++extern rt_ip_fallback_handler_t rt_ip_fallback_handler; ++#endif ++ ++#endif /* __RTNET_IP_INPUT_H_ */ +--- linux/drivers/xenomai/net/stack/include/ipv4/tcp.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/include/ipv4/tcp.h 2021-04-07 16:01:26.806634768 +0800 +@@ -0,0 +1,50 @@ ++/*** ++ * ++ * include/ipv4/tcp.h ++ * ++ * Copyright (C) 2009 Vladimir Zapolskiy ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License, version 2, as ++ * published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#ifndef __RTNET_TCP_H_ ++#define __RTNET_TCP_H_ ++ ++#include ++#include ++ ++/* Maximum number of active tcp sockets, must be power of 2 */ ++#define RT_TCP_SOCKETS 32 ++ ++/*Maximum number of active tcp connections, must be power of 2 */ ++#define RT_TCP_CONNECTIONS 64 ++ ++/* Maximum size of TCP input window */ ++#define RT_TCP_WINDOW 4096 ++ ++/* Maximum number of retransmissions of invalid segments */ ++#define RT_TCP_RETRANSMIT 3 ++ ++/* Number of milliseconds to wait for ACK */ ++#define RT_TCP_WAIT_TIME 10 ++ ++/* Priority of RST|ACK replies (error condition => non-RT prio) */ ++#define RT_TCP_RST_PRIO \ ++ RTSKB_PRIO_VALUE(QUEUE_MIN_PRIO - 1, RTSKB_DEF_NRT_CHANNEL) ++ ++/* rtskb pool for sending socket-less RST|ACK */ ++#define RT_TCP_RST_POOL_SIZE 8 ++ ++#endif /* __RTNET_TCP_H_ */ +--- linux/drivers/xenomai/net/stack/include/rtdev_mgr.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/include/rtdev_mgr.h 2021-04-07 16:01:26.802634774 +0800 +@@ -0,0 +1,39 @@ ++/* rtdev_mgr.h ++ * ++ * RTnet - real-time networking subsystem ++ * Copyright (C) 2002 Ulrich Marx ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ */ ++#ifndef __RTDEV_MGR_H_ ++#define __RTDEV_MGR_H_ ++ ++#ifdef __KERNEL__ ++ ++#include ++ ++extern void rtnetif_err_rx(struct rtnet_device *rtdev); ++extern void rtnetif_err_tx(struct rtnet_device *rtdev); ++ ++extern void rt_rtdev_connect(struct rtnet_device *rtdev, struct rtnet_mgr *mgr); ++extern void rt_rtdev_disconnect(struct rtnet_device *rtdev); ++extern int rt_rtdev_mgr_init(struct rtnet_mgr *mgr); ++extern void rt_rtdev_mgr_delete(struct rtnet_mgr *mgr); ++extern int rt_rtdev_mgr_start(struct rtnet_mgr *mgr); ++extern int rt_rtdev_mgr_stop(struct rtnet_mgr *mgr); ++ ++#endif /* __KERNEL__ */ ++ ++#endif /* __RTDEV_MGR_H_ */ +--- linux/drivers/xenomai/net/stack/include/rtnet_chrdev.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/include/rtnet_chrdev.h 2021-04-07 16:01:26.797634781 +0800 +@@ -0,0 +1,116 @@ ++/*** ++ * ++ * include/rtnet_chrdev.h ++ * ++ * RTnet - real-time networking subsystem ++ * Copyright (C) 1999 Lineo, Inc ++ * 1999,2002 David A. Schleef ++ * 2002 Ulrich Marx ++ * 2003,2004 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#ifndef __RTNET_CHRDEV_H_ ++#define __RTNET_CHRDEV_H_ ++ ++#include ++ ++#ifdef __KERNEL__ ++ ++#include ++#include ++#include ++#include ++#include ++ ++/* new extensible interface */ ++struct rtnet_ioctls { ++ /* internal usage only */ ++ struct list_head entry; ++ atomic_t ref_count; ++ ++ /* provider specification */ ++ const char *service_name; ++ unsigned int ioctl_type; ++ int (*handler)(struct rtnet_device *rtdev, unsigned int request, ++ unsigned long arg); ++}; ++ ++extern int rtnet_register_ioctls(struct rtnet_ioctls *ioctls); ++extern void rtnet_unregister_ioctls(struct rtnet_ioctls *ioctls); ++ ++extern int __init rtnet_chrdev_init(void); ++extern void rtnet_chrdev_release(void); ++ ++#else /* ifndef __KERNEL__ */ ++ ++#include /* IFNAMSIZ */ ++#include ++ ++#endif /* __KERNEL__ */ ++ ++#define RTNET_MINOR 240 /* user interface for /dev/rtnet */ ++#define DEV_ADDR_LEN 32 /* avoids inconsistent MAX_ADDR_LEN */ ++ ++struct rtnet_ioctl_head { ++ char if_name[IFNAMSIZ]; ++}; ++ ++struct rtnet_core_cmd { ++ struct rtnet_ioctl_head head; ++ ++ union { ++ /*** rtifconfig **/ ++ struct { ++ __u32 ip_addr; ++ __u32 broadcast_ip; ++ __u32 set_dev_flags; ++ __u32 clear_dev_flags; ++ __u32 dev_addr_type; ++ __u32 __padding; ++ __u8 dev_addr[DEV_ADDR_LEN]; ++ } up; ++ ++ struct { ++ __u32 ifindex; ++ __u32 type; ++ __u32 ip_addr; ++ __u32 broadcast_ip; ++ __u32 mtu; ++ __u32 flags; ++ __u8 dev_addr[DEV_ADDR_LEN]; ++ } info; ++ ++ __u64 __padding[8]; ++ } args; ++}; ++ ++#define RTNET_IOC_NODEV_PARAM 0x80 ++ ++#define RTNET_IOC_TYPE_CORE 0 ++#define RTNET_IOC_TYPE_RTCFG 1 ++#define RTNET_IOC_TYPE_IPV4 2 ++#define RTNET_IOC_TYPE_RTMAC_NOMAC 100 ++#define RTNET_IOC_TYPE_RTMAC_TDMA 110 ++ ++#define IOC_RT_IFUP _IOW(RTNET_IOC_TYPE_CORE, 0, struct rtnet_core_cmd) ++#define IOC_RT_IFDOWN _IOW(RTNET_IOC_TYPE_CORE, 1, struct rtnet_core_cmd) ++#define IOC_RT_IFINFO \ ++ _IOWR(RTNET_IOC_TYPE_CORE, 2 | RTNET_IOC_NODEV_PARAM, \ ++ struct rtnet_core_cmd) ++ ++#endif /* __RTNET_CHRDEV_H_ */ +--- linux/drivers/xenomai/net/stack/include/rtmac/rtmac_vnic.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/include/rtmac/rtmac_vnic.h 2021-04-07 16:01:26.793634786 +0800 +@@ -0,0 +1,59 @@ ++/* include/rtmac/rtmac_vnic.h ++ * ++ * rtmac - real-time networking media access control subsystem ++ * Copyright (C) 2002 Marc Kleine-Budde , ++ * 2003 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ */ ++ ++#ifndef __RTMAC_VNIC_H_ ++#define __RTMAC_VNIC_H_ ++ ++#ifdef __KERNEL__ ++ ++#include ++#include ++ ++#include ++ ++#define DEFAULT_VNIC_RTSKBS 32 ++ ++int rtmac_vnic_rx(struct rtskb *skb, u16 type); ++ ++int rtmac_vnic_xmit(struct sk_buff *skb, struct net_device *dev); ++ ++void rtmac_vnic_set_max_mtu(struct rtnet_device *rtdev, unsigned int max_mtu); ++ ++int rtmac_vnic_add(struct rtnet_device *rtdev, vnic_xmit_handler vnic_xmit); ++int rtmac_vnic_unregister(struct rtnet_device *rtdev); ++ ++static inline void rtmac_vnic_cleanup(struct rtnet_device *rtdev) ++{ ++ struct rtmac_priv *mac_priv = rtdev->mac_priv; ++ ++ rtskb_pool_release(&mac_priv->vnic_skb_pool); ++} ++ ++#ifdef CONFIG_XENO_OPT_VFILE ++int rtnet_rtmac_vnics_show(struct xnvfile_regular_iterator *it, void *data); ++#endif ++ ++int __init rtmac_vnic_module_init(void); ++void rtmac_vnic_module_cleanup(void); ++ ++#endif /* __KERNEL__ */ ++ ++#endif /* __RTMAC_VNIC_H_ */ +--- linux/drivers/xenomai/net/stack/include/rtmac/nomac/nomac.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/include/rtmac/nomac/nomac.h 2021-04-07 16:01:26.788634793 +0800 +@@ -0,0 +1,51 @@ ++/*** ++ * ++ * include/rtmac/nomac/nomac.h ++ * ++ * RTmac - real-time networking media access control subsystem ++ * Copyright (C) 2002 Marc Kleine-Budde , ++ * 2003-2005 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#ifndef __NOMAC_H_ ++#define __NOMAC_H_ ++ ++#include ++ ++#include ++ ++#define RTMAC_TYPE_NOMAC 0 ++ ++#define NOMAC_MAGIC 0x004D0A0C ++ ++struct nomac_priv { ++ unsigned int magic; ++ struct rtnet_device *rtdev; ++ char device_name[32]; ++ struct rtdm_driver api_driver; ++ struct rtdm_device api_device; ++ /* ... */ ++ ++#ifdef CONFIG_XENO_OPT_VFILE ++ struct list_head list_entry; ++#endif ++}; ++ ++extern struct rtmac_disc nomac_disc; ++ ++#endif /* __NOMAC_H_ */ +--- linux/drivers/xenomai/net/stack/include/rtmac/nomac/nomac_dev.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/include/rtmac/nomac/nomac_dev.h 2021-04-07 16:01:26.783634801 +0800 +@@ -0,0 +1,37 @@ ++/*** ++ * ++ * include/rtmac/nomac/nomac_dev.h ++ * ++ * RTmac - real-time networking media access control subsystem ++ * Copyright (C) 2002 Marc Kleine-Budde , ++ * 2003, 2004 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#ifndef __NOMAC_DEV_H_ ++#define __NOMAC_DEV_H_ ++ ++#include ++ ++int nomac_dev_init(struct rtnet_device *rtdev, struct nomac_priv *nomac); ++ ++static inline void nomac_dev_release(struct nomac_priv *nomac) ++{ ++ rtdm_dev_unregister(&nomac->api_device); ++} ++ ++#endif /* __NOMAC_DEV_H_ */ +--- linux/drivers/xenomai/net/stack/include/rtmac/nomac/nomac_proto.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/include/rtmac/nomac/nomac_proto.h 2021-04-07 16:01:26.778634808 +0800 +@@ -0,0 +1,38 @@ ++/*** ++ * ++ * include/rtmac/nomac/nomac_proto.h ++ * ++ * RTmac - real-time networking media access control subsystem ++ * Copyright (C) 2002 Marc Kleine-Budde , ++ * 2003, 2004 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#ifndef __NOMAC_PROTO_H_ ++#define __NOMAC_PROTO_H_ ++ ++#include ++ ++int nomac_rt_packet_tx(struct rtskb *rtskb, struct rtnet_device *rtdev); ++int nomac_nrt_packet_tx(struct rtskb *rtskb); ++ ++int nomac_packet_rx(struct rtskb *rtskb); ++ ++int nomac_proto_init(void); ++void nomac_proto_cleanup(void); ++ ++#endif /* __NOMAC_PROTO_H_ */ +--- linux/drivers/xenomai/net/stack/include/rtmac/nomac/nomac_ioctl.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/include/rtmac/nomac/nomac_ioctl.h 2021-04-07 16:01:26.774634813 +0800 +@@ -0,0 +1,31 @@ ++/*** ++ * ++ * include/rtmac/nomac/nomac_ioctl.h ++ * ++ * RTmac - real-time networking media access control subsystem ++ * Copyright (C) 2002 Marc Kleine-Budde , ++ * 2003, 2004 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#ifndef __NOMAC_IOCTL_H_ ++#define __NOMAC_IOCTL_H_ ++ ++int nomac_ioctl(struct rtnet_device *rtdev, unsigned int request, ++ unsigned long arg); ++ ++#endif /* __NOMAC_IOCTL_H_ */ +--- linux/drivers/xenomai/net/stack/include/rtmac/rtmac_proto.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/include/rtmac/rtmac_proto.h 2021-04-07 16:01:26.769634821 +0800 +@@ -0,0 +1,78 @@ ++/*** ++ * ++ * include/rtmac/rtmac_proto.h ++ * ++ * rtmac - real-time networking media access control subsystem ++ * Copyright (C) 2002 Marc Kleine-Budde , ++ * 2003, 2004 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#ifndef __RTMAC_PROTO_H_ ++#define __RTMAC_PROTO_H_ ++ ++#include ++ ++#define RTMAC_VERSION 0x02 ++#define ETH_RTMAC 0x9021 ++ ++#define RTMAC_FLAG_TUNNEL 0x01 ++ ++struct rtmac_hdr { ++ u16 type; ++ u8 ver; ++ u8 flags; ++} __attribute__((packed)); ++ ++static inline int rtmac_add_header(struct rtnet_device *rtdev, void *daddr, ++ struct rtskb *skb, u16 type, u8 flags) ++{ ++ struct rtmac_hdr *hdr = ++ (struct rtmac_hdr *)rtskb_push(skb, sizeof(struct rtmac_hdr)); ++ ++ hdr->type = htons(type); ++ hdr->ver = RTMAC_VERSION; ++ hdr->flags = flags; ++ ++ skb->rtdev = rtdev; ++ ++ if (rtdev->hard_header && ++ (rtdev->hard_header(skb, rtdev, ETH_RTMAC, daddr, rtdev->dev_addr, ++ skb->len) < 0)) ++ return -1; ++ ++ return 0; ++} ++ ++static inline int rtmac_xmit(struct rtskb *skb) ++{ ++ struct rtnet_device *rtdev = skb->rtdev; ++ int ret; ++ ++ ret = rtdev->hard_start_xmit(skb, rtdev); ++ if (ret != 0) ++ kfree_rtskb(skb); ++ ++ return ret; ++} ++ ++extern struct rtpacket_type rtmac_packet_type; ++ ++#define rtmac_proto_init() rtdev_add_pack(&rtmac_packet_type) ++void rtmac_proto_release(void); ++ ++#endif /* __RTMAC_PROTO_H_ */ +--- linux/drivers/xenomai/net/stack/include/rtmac/tdma/tdma_ioctl.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/include/rtmac/tdma/tdma_ioctl.h 2021-04-07 16:01:26.765634826 +0800 +@@ -0,0 +1,35 @@ ++/*** ++ * ++ * include/rtmac/tdma/tdma_ioctl.h ++ * ++ * RTmac - real-time networking media access control subsystem ++ * Copyright (C) 2002 Marc Kleine-Budde , ++ * 2003, 2004 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#ifndef __TDMA_IOCTL_H_ ++#define __TDMA_IOCTL_H_ ++ ++#include ++ ++int tdma_cleanup_slot(struct tdma_priv *tdma, struct tdma_slot *slot); ++ ++int tdma_ioctl(struct rtnet_device *rtdev, unsigned int request, ++ unsigned long arg); ++ ++#endif /* __TDMA_IOCTL_H_ */ +--- linux/drivers/xenomai/net/stack/include/rtmac/tdma/tdma_worker.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/include/rtmac/tdma/tdma_worker.h 2021-04-07 16:01:26.760634834 +0800 +@@ -0,0 +1,34 @@ ++/*** ++ * ++ * include/rtmac/tdma/tdma_worker.h ++ * ++ * RTmac - real-time networking media access control subsystem ++ * Copyright (C) 2002 Marc Kleine-Budde , ++ * 2003-2005 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#ifndef __TDMA_WORKER_H_ ++#define __TDMA_WORKER_H_ ++ ++#include ++ ++#define DEF_WORKER_PRIO RTDM_TASK_HIGHEST_PRIORITY ++ ++void tdma_worker(void *arg); ++ ++#endif /* __TDMA_WORKER_H_ */ +--- linux/drivers/xenomai/net/stack/include/rtmac/tdma/tdma.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/include/rtmac/tdma/tdma.h 2021-04-07 16:01:26.755634841 +0800 +@@ -0,0 +1,161 @@ ++/*** ++ * ++ * include/rtmac/tdma/tdma.h ++ * ++ * RTmac - real-time networking media access control subsystem ++ * Copyright (C) 2002 Marc Kleine-Budde , ++ * 2003-2005 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#ifndef __TDMA_H_ ++#define __TDMA_H_ ++ ++#include ++ ++#include ++#include ++ ++#define RTMAC_TYPE_TDMA 0x0001 ++ ++#define TDMA_MAGIC 0x3A0D4D0A ++ ++#define TDMA_FLAG_CALIBRATED 1 ++#define TDMA_FLAG_RECEIVED_SYNC 2 ++#define TDMA_FLAG_MASTER 3 /* also set for backup masters */ ++#define TDMA_FLAG_BACKUP_MASTER 4 ++#define TDMA_FLAG_ATTACHED 5 ++#define TDMA_FLAG_BACKUP_ACTIVE 6 ++ ++#define DEFAULT_SLOT 0 ++#define DEFAULT_NRT_SLOT 1 ++ ++/* job IDs */ ++#define WAIT_ON_SYNC -1 ++#define XMIT_SYNC -2 ++#define BACKUP_SYNC -3 ++#define XMIT_REQ_CAL -4 ++#define XMIT_RPL_CAL -5 ++ ++struct tdma_priv; ++ ++struct tdma_job { ++ struct list_head entry; ++ int id; ++ unsigned int ref_count; ++}; ++ ++#define SLOT_JOB(job) ((struct tdma_slot *)(job)) ++ ++struct tdma_slot { ++ struct tdma_job head; ++ ++ u64 offset; ++ unsigned int period; ++ unsigned int phasing; ++ unsigned int mtu; ++ unsigned int size; ++ struct rtskb_prio_queue *queue; ++ struct rtskb_prio_queue local_queue; ++}; ++ ++#define REQUEST_CAL_JOB(job) ((struct tdma_request_cal *)(job)) ++ ++struct tdma_request_cal { ++ struct tdma_job head; ++ ++ struct tdma_priv *tdma; ++ u64 offset; ++ unsigned int period; ++ unsigned int phasing; ++ unsigned int cal_rounds; ++ u64 *cal_results; ++ u64 *result_buffer; ++}; ++ ++#define REPLY_CAL_JOB(job) ((struct tdma_reply_cal *)(job)) ++ ++struct tdma_reply_cal { ++ struct tdma_job head; ++ ++ u32 reply_cycle; ++ u64 reply_offset; ++ struct rtskb *reply_rtskb; ++}; ++ ++struct tdma_priv { ++ unsigned int magic; ++ struct rtnet_device *rtdev; ++ char device_name[32]; ++ struct rtdm_driver api_driver; ++ struct rtdm_device api_device; ++ ++#ifdef ALIGN_RTOS_TASK ++ __u8 __align[(ALIGN_RTOS_TASK - ++ ((sizeof(unsigned int) + sizeof(struct rtnet_device *) + ++ sizeof(struct rtdm_device)) & ++ (ALIGN_RTOS_TASK - 1))) & ++ (ALIGN_RTOS_TASK - 1)]; ++#endif ++ rtdm_task_t worker_task; ++ rtdm_event_t worker_wakeup; ++ rtdm_event_t xmit_event; ++ rtdm_event_t sync_event; ++ ++ unsigned long flags; ++ unsigned int cal_rounds; ++ u32 current_cycle; ++ u64 current_cycle_start; ++ u64 master_packet_delay_ns; ++ nanosecs_rel_t clock_offset; ++ ++ struct tdma_job sync_job; ++ struct tdma_job *first_job; ++ struct tdma_job *current_job; ++ volatile unsigned int job_list_revision; ++ ++ unsigned int max_slot_id; ++ struct tdma_slot **slot_table; ++ ++ struct rt_proc_call *calibration_call; ++ unsigned char master_hw_addr[MAX_ADDR_LEN]; ++ ++ rtdm_lock_t lock; ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_TDMA_MASTER ++ struct rtskb_pool cal_rtskb_pool; ++ u64 cycle_period; ++ u64 backup_sync_inc; ++#endif ++ ++#ifdef CONFIG_XENO_OPT_VFILE ++ struct list_head list_entry; ++#endif ++}; ++ ++extern struct rtmac_disc tdma_disc; ++ ++#define print_jobs() \ ++ do { \ ++ struct tdma_job *entry; \ ++ rtdm_printk("%s:%d - ", __FUNCTION__, __LINE__); \ ++ list_for_each_entry (entry, &tdma->first_job->entry, entry) \ ++ rtdm_printk("%d ", entry->id); \ ++ rtdm_printk("\n"); \ ++ } while (0) ++ ++#endif /* __TDMA_H_ */ +--- linux/drivers/xenomai/net/stack/include/rtmac/tdma/tdma_dev.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/include/rtmac/tdma/tdma_dev.h 2021-04-07 16:01:26.751634846 +0800 +@@ -0,0 +1,37 @@ ++/*** ++ * ++ * include/rtmac/tdma/tdma_dev.h ++ * ++ * RTmac - real-time networking media access control subsystem ++ * Copyright (C) 2002 Marc Kleine-Budde , ++ * 2003, 2004 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#ifndef __TDMA_DEV_H_ ++#define __TDMA_DEV_H_ ++ ++#include ++ ++int tdma_dev_init(struct rtnet_device *rtdev, struct tdma_priv *tdma); ++ ++static inline void tdma_dev_release(struct tdma_priv *tdma) ++{ ++ rtdm_dev_unregister(&tdma->api_device); ++} ++ ++#endif /* __TDMA_DEV_H_ */ +--- linux/drivers/xenomai/net/stack/include/rtmac/tdma/tdma_proto.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/include/rtmac/tdma/tdma_proto.h 2021-04-07 16:01:26.746634853 +0800 +@@ -0,0 +1,81 @@ ++/*** ++ * ++ * include/rtmac/tdma/tdma_proto.h ++ * ++ * RTmac - real-time networking media access control subsystem ++ * Copyright (C) 2002 Marc Kleine-Budde , ++ * 2003, 2004 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#ifndef __TDMA_PROTO_H_ ++#define __TDMA_PROTO_H_ ++ ++#include ++ ++#include ++ ++#define TDMA_FRM_VERSION 0x0201 ++ ++#define TDMA_FRM_SYNC 0x0000 ++#define TDMA_FRM_REQ_CAL 0x0010 ++#define TDMA_FRM_RPL_CAL 0x0011 ++ ++struct tdma_frm_head { ++ u16 version; ++ u16 id; ++} __attribute__((packed)); ++ ++#define SYNC_FRM(head) ((struct tdma_frm_sync *)(head)) ++ ++struct tdma_frm_sync { ++ struct tdma_frm_head head; ++ u32 cycle_no; ++ u64 xmit_stamp; ++ u64 sched_xmit_stamp; ++} __attribute__((packed)); ++ ++#define REQ_CAL_FRM(head) ((struct tdma_frm_req_cal *)(head)) ++ ++struct tdma_frm_req_cal { ++ struct tdma_frm_head head; ++ u64 xmit_stamp; ++ u32 reply_cycle; ++ u64 reply_slot_offset; ++} __attribute__((packed)); ++ ++#define RPL_CAL_FRM(head) ((struct tdma_frm_rpl_cal *)(head)) ++ ++struct tdma_frm_rpl_cal { ++ struct tdma_frm_head head; ++ u64 request_xmit_stamp; ++ u64 reception_stamp; ++ u64 xmit_stamp; ++} __attribute__((packed)); ++ ++void tdma_xmit_sync_frame(struct tdma_priv *tdma); ++int tdma_xmit_request_cal_frame(struct tdma_priv *tdma, u32 reply_cycle, ++ u64 reply_slot_offset); ++ ++int tdma_rt_packet_tx(struct rtskb *rtskb, struct rtnet_device *rtdev); ++int tdma_nrt_packet_tx(struct rtskb *rtskb); ++ ++int tdma_packet_rx(struct rtskb *rtskb); ++ ++unsigned int tdma_get_mtu(struct rtnet_device *rtdev, unsigned int priority); ++ ++#endif /* __TDMA_PROTO_H_ */ +--- linux/drivers/xenomai/net/stack/include/rtmac/rtmac_disc.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/include/rtmac/rtmac_disc.h 2021-04-07 16:01:26.741634861 +0800 +@@ -0,0 +1,95 @@ ++/*** ++ * ++ * include/rtmac/rtmac_disc.h ++ * ++ * rtmac - real-time networking media access control subsystem ++ * Copyright (C) 2002 Marc Kleine-Budde , ++ * 2003, 2004 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#ifndef __RTMAC_DISC_H_ ++#define __RTMAC_DISC_H_ ++ ++#include ++#include ++ ++#include ++#include ++ ++#define RTMAC_NO_VNIC NULL ++#define RTMAC_DEFAULT_VNIC rtmac_vnic_xmit ++ ++typedef int (*vnic_xmit_handler)(struct sk_buff *skb, struct net_device *dev); ++ ++struct rtmac_priv { ++ int (*orig_start_xmit)(struct rtskb *skb, struct rtnet_device *dev); ++ struct net_device *vnic; ++ struct net_device_stats vnic_stats; ++ struct rtskb_pool vnic_skb_pool; ++ unsigned int vnic_max_mtu; ++ ++ u8 disc_priv[0] __attribute__((aligned(16))); ++}; ++ ++struct rtmac_proc_entry { ++ const char *name; ++ int (*handler)(struct xnvfile_regular_iterator *it, void *data); ++ struct xnvfile_regular vfile; ++}; ++ ++struct rtmac_disc { ++ struct list_head list; ++ ++ const char *name; ++ unsigned int priv_size; /* size of rtmac_priv.disc_priv */ ++ u16 disc_type; ++ ++ int (*packet_rx)(struct rtskb *skb); ++ /* rt_packet_tx prototype must be compatible with hard_start_xmit */ ++ int (*rt_packet_tx)(struct rtskb *skb, struct rtnet_device *dev); ++ int (*nrt_packet_tx)(struct rtskb *skb); ++ ++ unsigned int (*get_mtu)(struct rtnet_device *rtdev, ++ unsigned int priority); ++ ++ vnic_xmit_handler vnic_xmit; ++ ++ int (*attach)(struct rtnet_device *rtdev, void *disc_priv); ++ int (*detach)(struct rtnet_device *rtdev, void *disc_priv); ++ ++ struct rtnet_ioctls ioctls; ++ ++ struct rtmac_proc_entry *proc_entries; ++ unsigned nr_proc_entries; ++ ++ struct module *owner; ++}; ++ ++int rtmac_disc_attach(struct rtnet_device *rtdev, struct rtmac_disc *disc); ++int rtmac_disc_detach(struct rtnet_device *rtdev); ++ ++int __rtmac_disc_register(struct rtmac_disc *disc, struct module *module); ++#define rtmac_disc_register(disc) __rtmac_disc_register(disc, THIS_MODULE) ++ ++void rtmac_disc_deregister(struct rtmac_disc *disc); ++ ++#ifdef CONFIG_XENO_OPT_VFILE ++int rtnet_rtmac_disciplines_show(struct xnvfile_regular_iterator *it, void *d); ++#endif /* CONFIG_XENO_OPT_VFILE */ ++ ++#endif /* __RTMAC_DISC_H_ */ +--- linux/drivers/xenomai/net/stack/include/rtmac/rtmac_proc.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/include/rtmac/rtmac_proc.h 2021-04-07 16:01:26.737634866 +0800 +@@ -0,0 +1,34 @@ ++/*** ++ * ++ * include/rtmac/rtmac_proc.h ++ * ++ * rtmac - real-time networking medium access control subsystem ++ * Copyright (C) 2002 Marc Kleine-Budde ++ * 2004 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#ifndef __RTMAC_PROC_H_ ++#define __RTMAC_PROC_H_ ++ ++int rtmac_disc_proc_register(struct rtmac_disc *disc); ++void rtmac_disc_proc_unregister(struct rtmac_disc *disc); ++ ++int rtmac_proc_register(void); ++void rtmac_proc_release(void); ++ ++#endif /* __RTMAC_PROC_H_ */ +--- linux/drivers/xenomai/net/stack/include/rtwlan.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/include/rtwlan.h 2021-04-07 16:01:26.732634874 +0800 +@@ -0,0 +1,263 @@ ++/* rtwlan.h ++ * ++ * This file is a rtnet adaption from ieee80211/ieee80211.h used by the ++ * rt2x00-2.0.0-b3 sourceforge project ++ * ++ * Merged with mainline ieee80211.h in Aug 2004. Original ieee802_11 ++ * remains copyright by the original authors ++ * ++ * Portions of the merged code are based on Host AP (software wireless ++ * LAN access point) driver for Intersil Prism2/2.5/3. ++ * ++ * Copyright (c) 2001-2002, SSH Communications Security Corp and Jouni Malinen ++ * ++ * Copyright (c) 2002-2003, Jouni Malinen ++ * ++ * Adaption to a generic IEEE 802.11 stack by James Ketrenos ++ * ++ * Copyright (c) 2004-2005, Intel Corporation ++ * ++ * Adaption to rtnet ++ * Copyright (c) 2006, Daniel Gregorek ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#ifndef RTWLAN_H ++#define RTWLAN_H ++ ++#include /* ETH_ALEN */ ++#include /* ARRAY_SIZE */ ++ ++#include ++#include ++ ++#define IEEE80211_1ADDR_LEN 10 ++#define IEEE80211_2ADDR_LEN 16 ++#define IEEE80211_3ADDR_LEN 24 ++#define IEEE80211_4ADDR_LEN 30 ++#define IEEE80211_FCS_LEN 4 ++#define IEEE80211_HLEN (IEEE80211_4ADDR_LEN) ++#define IEEE80211_FRAME_LEN (IEEE80211_DATA_LEN + IEEE80211_HLEN) ++ ++#define MIN_FRAG_THRESHOLD 256U ++#define MAX_FRAG_THRESHOLD 2346U ++ ++/* Frame control field constants */ ++#define IEEE80211_FCTL_VERS 0x0003 ++#define IEEE80211_FCTL_FTYPE 0x000c ++#define IEEE80211_FCTL_STYPE 0x00f0 ++#define IEEE80211_FCTL_TODS 0x0100 ++#define IEEE80211_FCTL_FROMDS 0x0200 ++#define IEEE80211_FCTL_MOREFRAGS 0x0400 ++#define IEEE80211_FCTL_RETRY 0x0800 ++#define IEEE80211_FCTL_PM 0x1000 ++#define IEEE80211_FCTL_MOREDATA 0x2000 ++#define IEEE80211_FCTL_PROTECTED 0x4000 ++#define IEEE80211_FCTL_ORDER 0x8000 ++ ++#define IEEE80211_FTYPE_MGMT 0x0000 ++#define IEEE80211_FTYPE_CTL 0x0004 ++#define IEEE80211_FTYPE_DATA 0x0008 ++ ++/* management */ ++#define IEEE80211_STYPE_ASSOC_REQ 0x0000 ++#define IEEE80211_STYPE_ASSOC_RESP 0x0010 ++#define IEEE80211_STYPE_REASSOC_REQ 0x0020 ++#define IEEE80211_STYPE_REASSOC_RESP 0x0030 ++#define IEEE80211_STYPE_PROBE_REQ 0x0040 ++#define IEEE80211_STYPE_PROBE_RESP 0x0050 ++#define IEEE80211_STYPE_BEACON 0x0080 ++#define IEEE80211_STYPE_ATIM 0x0090 ++#define IEEE80211_STYPE_DISASSOC 0x00A0 ++#define IEEE80211_STYPE_AUTH 0x00B0 ++#define IEEE80211_STYPE_DEAUTH 0x00C0 ++#define IEEE80211_STYPE_ACTION 0x00D0 ++ ++/* control */ ++#define IEEE80211_STYPE_PSPOLL 0x00A0 ++#define IEEE80211_STYPE_RTS 0x00B0 ++#define IEEE80211_STYPE_CTS 0x00C0 ++#define IEEE80211_STYPE_ACK 0x00D0 ++#define IEEE80211_STYPE_CFEND 0x00E0 ++#define IEEE80211_STYPE_CFENDACK 0x00F0 ++ ++/* data */ ++#define IEEE80211_STYPE_DATA 0x0000 ++#define IEEE80211_STYPE_DATA_CFACK 0x0010 ++#define IEEE80211_STYPE_DATA_CFPOLL 0x0020 ++#define IEEE80211_STYPE_DATA_CFACKPOLL 0x0030 ++#define IEEE80211_STYPE_NULLFUNC 0x0040 ++#define IEEE80211_STYPE_CFACK 0x0050 ++#define IEEE80211_STYPE_CFPOLL 0x0060 ++#define IEEE80211_STYPE_CFACKPOLL 0x0070 ++#define IEEE80211_STYPE_QOS_DATA 0x0080 ++ ++#define RTWLAN_SCTL_SEQ 0xFFF0 ++ ++#define WLAN_FC_GET_VERS(fc) ((fc)&IEEE80211_FCTL_VERS) ++#define WLAN_FC_GET_TYPE(fc) ((fc)&IEEE80211_FCTL_FTYPE) ++#define WLAN_FC_GET_STYPE(fc) ((fc)&IEEE80211_FCTL_STYPE) ++ ++#define IEEE80211_DSSS_RATE_1MB 0x02 ++#define IEEE80211_DSSS_RATE_2MB 0x04 ++#define IEEE80211_DSSS_RATE_5MB 0x0B ++#define IEEE80211_DSSS_RATE_11MB 0x16 ++#define IEEE80211_OFDM_RATE_6MB 0x0C ++#define IEEE80211_OFDM_RATE_9MB 0x12 ++#define IEEE80211_OFDM_RATE_12MB 0x18 ++#define IEEE80211_OFDM_RATE_18MB 0x24 ++#define IEEE80211_OFDM_RATE_24MB 0x30 ++#define IEEE80211_OFDM_RATE_36MB 0x48 ++#define IEEE80211_OFDM_RATE_48MB 0x60 ++#define IEEE80211_OFDM_RATE_54MB 0x6C ++#define IEEE80211_BASIC_RATE_MASK 0x80 ++ ++#define MAC_FMT "%02x:%02x:%02x:%02x:%02x:%02x" ++#define MAC_ARG(x) \ ++ ((u8 *)(x))[0], ((u8 *)(x))[1], ((u8 *)(x))[2], ((u8 *)(x))[3], \ ++ ((u8 *)(x))[4], ((u8 *)(x))[5] ++ ++#ifdef CONFIG_RTWLAN_DEBUG ++#define RTWLAN_DEBUG_PRINTK(__message...) \ ++ do { \ ++ rtdm_printk(__message); \ ++ } while (0) ++#define RTWLAN_DEBUG(__message, __args...) \ ++ RTWLAN_DEBUG_PRINTK(KERN_DEBUG "rtwlan->%s: Debug - " __message, \ ++ __FUNCTION__, ##__args); ++#else ++#define RTWLAN_DEBUG(__message...) \ ++ do { \ ++ } while (0) ++#endif ++ ++struct rtwlan_stats { ++ unsigned long rx_packets; /* total packets received */ ++ unsigned long tx_packets; /* total packets transmitted */ ++ unsigned long tx_retry; /* total packets transmitted with retry */ ++}; ++ ++struct rtwlan_device { ++ struct rtwlan_stats stats; ++ ++ struct rtskb_pool skb_pool; ++ ++ int mode; ++ ++ int (*hard_start_xmit)(struct rtskb *rtskb, ++ struct rtnet_device *rtnet_dev); ++ ++ /* This must be the last item */ ++ u8 priv[0]; ++}; ++ ++/* Minimal header; can be used for passing 802.11 frames with sufficient ++ * information to determine what type of underlying data type is actually ++ * stored in the data. */ ++struct ieee80211_hdr { ++ u16 frame_ctl; ++ u16 duration_id; ++ u8 payload[0]; ++} __attribute__((packed)); ++ ++struct ieee80211_hdr_3addr { ++ u16 frame_ctl; ++ u16 duration_id; ++ u8 addr1[ETH_ALEN]; ++ u8 addr2[ETH_ALEN]; ++ u8 addr3[ETH_ALEN]; ++ u16 seq_ctl; ++ u8 payload[0]; ++} __attribute__((packed)); ++ ++static inline int ieee80211_get_hdrlen(u16 fc) ++{ ++ int hdrlen = IEEE80211_3ADDR_LEN; ++ u16 stype = WLAN_FC_GET_STYPE(fc); ++ ++ switch (WLAN_FC_GET_TYPE(fc)) { ++ case IEEE80211_FTYPE_DATA: ++ if ((fc & IEEE80211_FCTL_FROMDS) && (fc & IEEE80211_FCTL_TODS)) ++ hdrlen = IEEE80211_4ADDR_LEN; ++ if (stype & IEEE80211_STYPE_QOS_DATA) ++ hdrlen += 2; ++ break; ++ ++ case IEEE80211_FTYPE_CTL: ++ switch (WLAN_FC_GET_STYPE(fc)) { ++ case IEEE80211_STYPE_CTS: ++ case IEEE80211_STYPE_ACK: ++ hdrlen = IEEE80211_1ADDR_LEN; ++ break; ++ ++ default: ++ hdrlen = IEEE80211_2ADDR_LEN; ++ break; ++ } ++ break; ++ } ++ ++ return hdrlen; ++} ++ ++static inline int ieee80211_is_ofdm_rate(u8 rate) ++{ ++ switch (rate & ~IEEE80211_BASIC_RATE_MASK) { ++ case IEEE80211_OFDM_RATE_6MB: ++ case IEEE80211_OFDM_RATE_9MB: ++ case IEEE80211_OFDM_RATE_12MB: ++ case IEEE80211_OFDM_RATE_18MB: ++ case IEEE80211_OFDM_RATE_24MB: ++ case IEEE80211_OFDM_RATE_36MB: ++ case IEEE80211_OFDM_RATE_48MB: ++ case IEEE80211_OFDM_RATE_54MB: ++ return 1; ++ } ++ return 0; ++} ++ ++static inline int ieee80211_is_dsss_rate(u8 rate) ++{ ++ switch (rate & ~IEEE80211_BASIC_RATE_MASK) { ++ case IEEE80211_DSSS_RATE_1MB: ++ case IEEE80211_DSSS_RATE_2MB: ++ case IEEE80211_DSSS_RATE_5MB: ++ case IEEE80211_DSSS_RATE_11MB: ++ return 1; ++ } ++ return 0; ++} ++ ++static inline void *rtwlan_priv(struct rtwlan_device *rtwlan_dev) ++{ ++ return (void *)rtwlan_dev + sizeof(struct rtwlan_device); ++} ++ ++struct rtnet_device *rtwlan_alloc_dev(unsigned sizeof_priv, ++ unsigned dev_pool_size); ++int rtwlan_rx(struct rtskb *rtskb, struct rtnet_device *rtnet_dev); ++int rtwlan_tx(struct rtskb *rtskb, struct rtnet_device *rtnet_dev); ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_RTWLAN ++int __init rtwlan_init(void); ++void rtwlan_exit(void); ++#else /* !CONFIG_XENO_DRIVERS_NET_RTWLAN */ ++#define rtwlan_init() 0 ++#define rtwlan_exit() ++#endif /* CONFIG_XENO_DRIVERS_NET_RTWLAN */ ++ ++#endif +--- linux/drivers/xenomai/net/stack/include/rtcfg/rtcfg_proc.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/include/rtcfg/rtcfg_proc.h 2021-04-07 16:01:26.727634881 +0800 +@@ -0,0 +1,63 @@ ++/*** ++ * ++ * include/rtcfg/rtcfg_proc.c ++ * ++ * Real-Time Configuration Distribution Protocol ++ * ++ * Copyright (C) 2004 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#ifndef __RTCFG_PROC_H_ ++#define __RTCFG_PROC_H_ ++ ++#include ++ ++#ifdef CONFIG_XENO_OPT_VFILE ++ ++extern struct mutex nrt_proc_lock; ++ ++void rtcfg_update_conn_proc_entries(int ifindex); ++void rtcfg_remove_conn_proc_entries(int ifindex); ++ ++int rtcfg_init_proc(void); ++void rtcfg_cleanup_proc(void); ++ ++static inline void rtcfg_lockwr_proc(int ifindex) ++{ ++ mutex_lock(&nrt_proc_lock); ++ rtcfg_remove_conn_proc_entries(ifindex); ++} ++ ++static inline void rtcfg_unlockwr_proc(int ifindex) ++{ ++ rtcfg_update_conn_proc_entries(ifindex); ++ mutex_unlock(&nrt_proc_lock); ++} ++ ++#else ++ ++#define rtcfg_lockwr_proc(x) \ ++ do { \ ++ } while (0) ++#define rtcfg_unlockwr_proc(x) \ ++ do { \ ++ } while (0) ++ ++#endif /* CONFIG_XENO_OPT_VFILE */ ++ ++#endif /* __RTCFG_PROC_H_ */ +--- linux/drivers/xenomai/net/stack/include/rtcfg/rtcfg_timer.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/include/rtcfg/rtcfg_timer.h 2021-04-07 16:01:26.723634886 +0800 +@@ -0,0 +1,34 @@ ++/*** ++ * ++ * include/rtcfg/rtcfg_timer.h ++ * ++ * Real-Time Configuration Distribution Protocol ++ * ++ * Copyright (C) 2003-2005 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#ifndef __RTCFG_TIMER_H_ ++#define __RTCFG_TIMER_H_ ++ ++void rtcfg_timer(rtdm_timer_t *t); ++ ++void rtcfg_timer_run(void); ++ ++void rtcfg_thread_signal(void); ++ ++#endif /* __RTCFG_TIMER_H_ */ +--- linux/drivers/xenomai/net/stack/include/rtcfg/rtcfg_file.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/include/rtcfg/rtcfg_file.h 2021-04-07 16:01:26.718634893 +0800 +@@ -0,0 +1,43 @@ ++/*** ++ * ++ * include/rtcfg/rtcfg_file.h ++ * ++ * Real-Time Configuration Distribution Protocol ++ * ++ * Copyright (C) 2004 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#ifndef __RTCFG_FILE_H_ ++#define __RTCFG_FILE_H_ ++ ++#include ++#include ++ ++struct rtcfg_file { ++ struct list_head entry; ++ int ref_count; ++ const char *name; ++ size_t size; ++ void *buffer; ++}; ++ ++struct rtcfg_file *rtcfg_get_file(const char *filename); ++void rtcfg_add_file(struct rtcfg_file *file); ++int rtcfg_release_file(struct rtcfg_file *file); ++ ++#endif /* __RTCFG_FILE_H_ */ +--- linux/drivers/xenomai/net/stack/include/rtcfg/rtcfg_conn_event.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/include/rtcfg/rtcfg_conn_event.h 2021-04-07 16:01:26.713634901 +0800 +@@ -0,0 +1,69 @@ ++/*** ++ * ++ * include/rtcfg/rtcfg_conn_event.h ++ * ++ * Real-Time Configuration Distribution Protocol ++ * ++ * Copyright (C) 2003-2005 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#ifndef __RTCFG_CONN_EVENT_H_ ++#define __RTCFG_CONN_EVENT_H_ ++ ++#include ++ ++#include ++#include ++#include ++ ++typedef enum { ++ RTCFG_CONN_SEARCHING, ++ RTCFG_CONN_STAGE_1, ++ RTCFG_CONN_STAGE_2, ++ RTCFG_CONN_READY, ++ RTCFG_CONN_DEAD ++} RTCFG_CONN_STATE; ++ ++struct rtcfg_connection { ++ struct list_head entry; ++ int ifindex; ++ RTCFG_CONN_STATE state; ++ u8 mac_addr[MAX_ADDR_LEN]; ++ unsigned int addr_type; ++ union { ++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_RTIPV4) ++ u32 ip_addr; ++#endif ++ } addr; ++ void *stage1_data; ++ size_t stage1_size; ++ struct rtcfg_file *stage2_file; ++ u32 cfg_offs; ++ unsigned int flags; ++ unsigned int burstrate; ++ nanosecs_abs_t last_frame; ++ u64 cfg_timeout; ++#ifdef CONFIG_XENO_OPT_VFILE ++ struct xnvfile_regular proc_entry; ++#endif ++}; ++ ++int rtcfg_do_conn_event(struct rtcfg_connection *conn, RTCFG_EVENT event_id, ++ void *event_data); ++ ++#endif /* __RTCFG_CONN_EVENT_H_ */ +--- linux/drivers/xenomai/net/stack/include/rtcfg/rtcfg_client_event.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/include/rtcfg/rtcfg_client_event.h 2021-04-07 16:01:26.708634908 +0800 +@@ -0,0 +1,45 @@ ++/*** ++ * ++ * include/rtcfg/rtcfg_client_event.h ++ * ++ * Real-Time Configuration Distribution Protocol ++ * ++ * Copyright (C) 2003, 2004 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#ifndef __RTCFG_CLIENT_EVENT_H_ ++#define __RTCFG_CLIENT_EVENT_H_ ++ ++#include ++ ++int rtcfg_main_state_client_0(int ifindex, RTCFG_EVENT event_id, ++ void *event_data); ++int rtcfg_main_state_client_1(int ifindex, RTCFG_EVENT event_id, ++ void *event_data); ++int rtcfg_main_state_client_announced(int ifindex, RTCFG_EVENT event_id, ++ void *event_data); ++int rtcfg_main_state_client_all_known(int ifindex, RTCFG_EVENT event_id, ++ void *event_data); ++int rtcfg_main_state_client_all_frames(int ifindex, RTCFG_EVENT event_id, ++ void *event_data); ++int rtcfg_main_state_client_2(int ifindex, RTCFG_EVENT event_id, ++ void *event_data); ++int rtcfg_main_state_client_ready(int ifindex, RTCFG_EVENT event_id, ++ void *event_data); ++ ++#endif /* __RTCFG_CLIENT_EVENT_H_ */ +--- linux/drivers/xenomai/net/stack/include/rtcfg/rtcfg_event.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/include/rtcfg/rtcfg_event.h 2021-04-07 16:01:26.704634913 +0800 +@@ -0,0 +1,121 @@ ++/*** ++ * ++ * include/rtcfg/rtcfg_event.h ++ * ++ * Real-Time Configuration Distribution Protocol ++ * ++ * Copyright (C) 2003-2005 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#ifndef __RTCFG_EVENT_H_ ++#define __RTCFG_EVENT_H_ ++ ++#include ++#include ++ ++#include ++#include ++#include ++#include ++ ++#define FLAG_TIMER_STARTED 16 ++#define FLAG_TIMER_SHUTDOWN 17 ++#define FLAG_TIMER_PENDING 18 ++ ++#define _FLAG_TIMER_STARTED (1 << FLAG_TIMER_STARTED) ++#define _FLAG_TIMER_SHUTDOWN (1 << FLAG_TIMER_SHUTDOWN) ++#define _FLAG_TIMER_PENDING (1 << FLAG_TIMER_PENDING) ++ ++typedef enum { ++ RTCFG_MAIN_OFF, ++ RTCFG_MAIN_SERVER_RUNNING, ++ RTCFG_MAIN_CLIENT_0, ++ RTCFG_MAIN_CLIENT_1, ++ RTCFG_MAIN_CLIENT_ANNOUNCED, ++ RTCFG_MAIN_CLIENT_ALL_KNOWN, ++ RTCFG_MAIN_CLIENT_ALL_FRAMES, ++ RTCFG_MAIN_CLIENT_2, ++ RTCFG_MAIN_CLIENT_READY ++} RTCFG_MAIN_STATE; ++ ++struct rtcfg_station { ++ u8 mac_addr[ETH_ALEN]; /* Ethernet-specific! */ ++ u8 flags; ++}; ++ ++struct rtcfg_device { ++ RTCFG_MAIN_STATE state; ++ u32 other_stations; ++ u32 stations_found; ++ u32 stations_ready; ++ rtdm_mutex_t dev_mutex; ++ struct list_head event_calls; ++ rtdm_lock_t event_calls_lock; ++ rtdm_timer_t timer; ++ unsigned long flags; ++ unsigned int burstrate; ++#ifdef CONFIG_XENO_OPT_VFILE ++ struct xnvfile_directory proc_entry; ++ struct xnvfile_regular proc_state_vfile; ++ struct xnvfile_regular proc_stations_vfile; ++#endif ++ ++ union { ++ struct { ++ unsigned int addr_type; ++ union { ++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_RTIPV4) ++ u32 ip_addr; ++#endif ++ } srv_addr; ++ u8 srv_mac_addr[MAX_ADDR_LEN]; ++ u8 *stage2_buffer; ++ u32 cfg_len; ++ u32 cfg_offs; ++ unsigned int packet_counter; ++ u32 chain_len; ++ struct rtskb *stage2_chain; ++ u32 max_stations; ++ struct rtcfg_station *station_addr_list; ++ } clt; ++ ++ struct { ++ u32 clients_configured; ++ struct list_head conn_list; ++ u16 heartbeat; ++ u64 heartbeat_timeout; ++ } srv; ++ } spec; ++}; ++ ++extern struct rtcfg_device device[MAX_RT_DEVICES]; ++extern const char *rtcfg_event[]; ++extern const char *rtcfg_main_state[]; ++ ++int rtcfg_do_main_event(int ifindex, RTCFG_EVENT event_id, void *event_data); ++void rtcfg_next_main_state(int ifindex, RTCFG_MAIN_STATE state); ++ ++void rtcfg_queue_blocking_call(int ifindex, struct rt_proc_call *call); ++struct rt_proc_call *rtcfg_dequeue_blocking_call(int ifindex); ++void rtcfg_complete_cmd(int ifindex, RTCFG_EVENT event_id, int result); ++void rtcfg_reset_device(int ifindex); ++ ++void rtcfg_init_state_machines(void); ++void rtcfg_cleanup_state_machines(void); ++ ++#endif /* __RTCFG_EVENT_H_ */ +--- linux/drivers/xenomai/net/stack/include/rtcfg/rtcfg_ioctl.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/include/rtcfg/rtcfg_ioctl.h 2021-04-07 16:01:26.699634921 +0800 +@@ -0,0 +1,33 @@ ++/*** ++ * ++ * include/rtcfg/rtcfg_ioctl.h ++ * ++ * Real-Time Configuration Distribution Protocol ++ * ++ * Copyright (C) 2003, 2004 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#ifndef __RTCFG_IOCTL_H_ ++#define __RTCFG_IOCTL_H_ ++ ++extern struct rtnet_ioctls rtcfg_ioctls; ++ ++#define rtcfg_init_ioctls() rtnet_register_ioctls(&rtcfg_ioctls) ++#define rtcfg_cleanup_ioctls() rtnet_unregister_ioctls(&rtcfg_ioctls) ++ ++#endif /* __RTCFG_IOCTL_H_ */ +--- linux/drivers/xenomai/net/stack/include/rtcfg/rtcfg.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/include/rtcfg/rtcfg.h 2021-04-07 16:01:26.694634928 +0800 +@@ -0,0 +1,47 @@ ++/*** ++ * ++ * include/rtcfg/rtcfg.h ++ * ++ * Real-Time Configuration Distribution Protocol ++ * ++ * Copyright (C) 2003-2005 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#ifndef __RTCFG_H_INTERNAL_ ++#define __RTCFG_H_INTERNAL_ ++ ++#include ++ ++#define MIN(a, b) ((a) < (b) ? (a) : (b)) ++ ++/*** ++ * RTcfg debugging ++ */ ++#ifdef CONFIG_XENO_DRIVERS_NET_RTCFG_DEBUG ++ ++extern int rtcfg_debug; ++ ++/* use 0 for production, 1 for verification, >2 for debug */ ++#define RTCFG_DEFAULT_DEBUG_LEVEL 10 ++ ++#define RTCFG_DEBUG(n, args...) (rtcfg_debug >= (n)) ? (rtdm_printk(args)) : 0 ++#else ++#define RTCFG_DEBUG(n, args...) ++#endif /* CONFIG_RTCFG_DEBUG */ ++ ++#endif /* __RTCFG_H_INTERNAL_ */ +--- linux/drivers/xenomai/net/stack/include/rtcfg/rtcfg_frame.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/include/rtcfg/rtcfg_frame.h 2021-04-07 16:01:26.690634934 +0800 +@@ -0,0 +1,139 @@ ++/*** ++ * ++ * include/rtcfg/rtcfg_frame.h ++ * ++ * Real-Time Configuration Distribution Protocol ++ * ++ * Copyright (C) 2003 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#ifndef __RTCFG_FRAME_H_ ++#define __RTCFG_FRAME_H_ ++ ++#include ++#include ++#include ++ ++#include ++ ++#define ETH_RTCFG 0x9022 ++ ++#define RTCFG_SKB_PRIO \ ++ RTSKB_PRIO_VALUE(QUEUE_MIN_PRIO - 1, RTSKB_DEF_NRT_CHANNEL) ++ ++#define RTCFG_ID_STAGE_1_CFG 0 ++#define RTCFG_ID_ANNOUNCE_NEW 1 ++#define RTCFG_ID_ANNOUNCE_REPLY 2 ++#define RTCFG_ID_STAGE_2_CFG 3 ++#define RTCFG_ID_STAGE_2_CFG_FRAG 4 ++#define RTCFG_ID_ACK_CFG 5 ++#define RTCFG_ID_READY 6 ++#define RTCFG_ID_HEARTBEAT 7 ++#define RTCFG_ID_DEAD_STATION 8 ++ ++#define RTCFG_ADDRSIZE_MAC 0 ++#define RTCFG_ADDRSIZE_IP 4 ++#define RTCFG_MAX_ADDRSIZE RTCFG_ADDRSIZE_IP ++ ++#define RTCFG_FLAG_STAGE_2_DATA 0 ++#define RTCFG_FLAG_READY 1 ++ ++#define _RTCFG_FLAG_STAGE_2_DATA (1 << RTCFG_FLAG_STAGE_2_DATA) ++#define _RTCFG_FLAG_READY (1 << RTCFG_FLAG_READY) ++ ++struct rtcfg_frm_head { ++#if defined(__LITTLE_ENDIAN_BITFIELD) ++ u8 id : 5; ++ u8 version : 3; ++#elif defined(__BIG_ENDIAN_BITFIELD) ++ u8 version : 3; ++ u8 id : 5; ++#else ++#error unsupported byte order ++#endif ++} __attribute__((packed)); ++ ++struct rtcfg_frm_stage_1_cfg { ++ struct rtcfg_frm_head head; ++ u8 addr_type; ++ u8 client_addr[0]; ++ u8 server_addr[0]; ++ u8 burstrate; ++ u16 cfg_len; ++ u8 cfg_data[0]; ++} __attribute__((packed)); ++ ++struct rtcfg_frm_announce { ++ struct rtcfg_frm_head head; ++ u8 addr_type; ++ u8 addr[0]; ++ u8 flags; ++ u8 burstrate; ++} __attribute__((packed)); ++ ++struct rtcfg_frm_stage_2_cfg { ++ struct rtcfg_frm_head head; ++ u8 flags; ++ u32 stations; ++ u16 heartbeat_period; ++ u32 cfg_len; ++ u8 cfg_data[0]; ++} __attribute__((packed)); ++ ++struct rtcfg_frm_stage_2_cfg_frag { ++ struct rtcfg_frm_head head; ++ u32 frag_offs; ++ u8 cfg_data[0]; ++} __attribute__((packed)); ++ ++struct rtcfg_frm_ack_cfg { ++ struct rtcfg_frm_head head; ++ u32 ack_len; ++} __attribute__((packed)); ++ ++struct rtcfg_frm_simple { ++ struct rtcfg_frm_head head; ++} __attribute__((packed)); ++ ++struct rtcfg_frm_dead_station { ++ struct rtcfg_frm_head head; ++ u8 addr_type; ++ u8 logical_addr[0]; ++ u8 physical_addr[32]; ++} __attribute__((packed)); ++ ++int rtcfg_send_stage_1(struct rtcfg_connection *conn); ++int rtcfg_send_stage_2(struct rtcfg_connection *conn, int send_data); ++int rtcfg_send_stage_2_frag(struct rtcfg_connection *conn); ++int rtcfg_send_announce_new(int ifindex); ++int rtcfg_send_announce_reply(int ifindex, u8 *dest_mac_addr); ++int rtcfg_send_ack(int ifindex); ++int rtcfg_send_dead_station(struct rtcfg_connection *conn); ++ ++int rtcfg_send_simple_frame(int ifindex, int frame_id, u8 *dest_addr); ++ ++#define rtcfg_send_ready(ifindex) \ ++ rtcfg_send_simple_frame(ifindex, RTCFG_ID_READY, NULL) ++#define rtcfg_send_heartbeat(ifindex) \ ++ rtcfg_send_simple_frame(ifindex, RTCFG_ID_HEARTBEAT, \ ++ device[ifindex].spec.clt.srv_mac_addr) ++ ++int __init rtcfg_init_frames(void); ++void rtcfg_cleanup_frames(void); ++ ++#endif /* __RTCFG_FRAME_H_ */ +--- linux/drivers/xenomai/net/stack/include/rtnet_rtpc.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/include/rtnet_rtpc.h 2021-04-07 16:01:26.685634941 +0800 +@@ -0,0 +1,71 @@ ++/*** ++ * ++ * include/rtnet_rtpc.h ++ * ++ * RTnet - real-time networking subsystem ++ * ++ * Copyright (C) 2003, 2004 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#ifndef __RTNET_RTPC_H_ ++#define __RTNET_RTPC_H_ ++ ++#include ++ ++#include ++ ++struct rt_proc_call; ++ ++typedef int (*rtpc_proc)(struct rt_proc_call *call); ++typedef void (*rtpc_copy_back_proc)(struct rt_proc_call *call, void *priv_data); ++typedef void (*rtpc_cleanup_proc)(void *priv_data); ++ ++struct rt_proc_call { ++ struct list_head list_entry; ++ int processed; ++ rtpc_proc proc; ++ int result; ++ atomic_t ref_count; ++ wait_queue_head_t call_wq; ++ rtpc_cleanup_proc cleanup_handler; ++ char priv_data[0] __attribute__((aligned(8))); ++}; ++ ++#define CALL_PENDING 1000 /* result value for blocked calls */ ++ ++int rtnet_rtpc_dispatch_call(rtpc_proc rt_proc, unsigned int timeout, ++ void *priv_data, size_t priv_data_size, ++ rtpc_copy_back_proc copy_back_handler, ++ rtpc_cleanup_proc cleanup_handler); ++ ++void rtnet_rtpc_complete_call(struct rt_proc_call *call, int result); ++void rtnet_rtpc_complete_call_nrt(struct rt_proc_call *call, int result); ++ ++#define rtpc_dispatch_call rtnet_rtpc_dispatch_call ++#define rtpc_complete_call rtnet_rtpc_complete_call ++#define rtpc_complete_call_nrt rtnet_rtpc_complete_call_nrt ++ ++#define rtpc_get_priv(call, type) (type *)(call->priv_data) ++#define rtpc_get_result(call) call->result ++#define rtpc_set_result(call, new_result) call->result = new_result ++#define rtpc_set_cleanup_handler(call, handler) call->cleanup_handler = handler; ++ ++int __init rtpc_init(void); ++void rtpc_cleanup(void); ++ ++#endif /* __RTNET_RTPC_H_ */ +--- linux/drivers/xenomai/net/stack/include/nomac_chrdev.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/include/nomac_chrdev.h 2021-04-07 16:01:26.680634948 +0800 +@@ -0,0 +1,39 @@ ++/*** ++ * ++ * include/nomac_chrdev.h ++ * ++ * RTmac - real-time networking media access control subsystem ++ * Copyright (C) 2002 Marc Kleine-Budde , ++ * 2003, 2004 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#ifndef __NOMAC_CHRDEV_H_ ++#define __NOMAC_CHRDEV_H_ ++ ++#include ++ ++struct nomac_config { ++ struct rtnet_ioctl_head head; ++}; ++ ++#define NOMAC_IOC_ATTACH \ ++ _IOW(RTNET_IOC_TYPE_RTMAC_NOMAC, 0, struct nomac_config) ++#define NOMAC_IOC_DETACH \ ++ _IOW(RTNET_IOC_TYPE_RTMAC_NOMAC, 1, struct nomac_config) ++ ++#endif /* __NOMAC_CHRDEV_H_ */ +--- linux/drivers/xenomai/net/stack/include/ipv4_chrdev.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/include/ipv4_chrdev.h 2021-04-07 16:01:26.676634953 +0800 +@@ -0,0 +1,94 @@ ++/*** ++ * ++ * include/ipv4.h ++ * ++ * Real-Time IP/UDP/ICMP stack ++ * ++ * Copyright (C) 2004 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#ifndef __IPV4_H_ ++#define __RTCFG_H_ ++ ++#include ++ ++struct ipv4_cmd { ++ struct rtnet_ioctl_head head; ++ ++ union { ++ /*** rtroute ***/ ++ struct { ++ __u32 ip_addr; ++ } solicit; ++ ++ struct { ++ __u8 dev_addr[DEV_ADDR_LEN]; ++ __u32 ip_addr; ++ } gethost; ++ ++ struct { ++ __u8 dev_addr[DEV_ADDR_LEN]; ++ __u32 ip_addr; ++ } addhost; ++ ++ struct { ++ __u32 ip_addr; ++ } delhost; ++ ++ struct { ++ __u32 net_addr; ++ __u32 net_mask; ++ __u32 gw_addr; ++ } addnet; ++ ++ struct { ++ __u32 net_addr; ++ __u32 net_mask; ++ } delnet; ++ ++ /*** rtping ***/ ++ struct { ++ __u32 ip_addr; ++ __u16 id; ++ __u16 sequence; ++ __u32 msg_size; ++ __u32 timeout; ++ __s64 rtt; ++ } ping; ++ ++ __u64 __padding[8]; ++ } args; ++}; ++ ++#define IOC_RT_HOST_ROUTE_ADD _IOW(RTNET_IOC_TYPE_IPV4, 0, struct ipv4_cmd) ++#define IOC_RT_HOST_ROUTE_SOLICIT _IOW(RTNET_IOC_TYPE_IPV4, 1, struct ipv4_cmd) ++#define IOC_RT_HOST_ROUTE_DELETE \ ++ _IOW(RTNET_IOC_TYPE_IPV4, 2 | RTNET_IOC_NODEV_PARAM, struct ipv4_cmd) ++#define IOC_RT_NET_ROUTE_ADD \ ++ _IOW(RTNET_IOC_TYPE_IPV4, 3 | RTNET_IOC_NODEV_PARAM, struct ipv4_cmd) ++#define IOC_RT_NET_ROUTE_DELETE \ ++ _IOW(RTNET_IOC_TYPE_IPV4, 4 | RTNET_IOC_NODEV_PARAM, struct ipv4_cmd) ++#define IOC_RT_PING \ ++ _IOWR(RTNET_IOC_TYPE_IPV4, 5 | RTNET_IOC_NODEV_PARAM, struct ipv4_cmd) ++#define IOC_RT_HOST_ROUTE_DELETE_DEV \ ++ _IOW(RTNET_IOC_TYPE_IPV4, 6, struct ipv4_cmd) ++#define IOC_RT_HOST_ROUTE_GET \ ++ _IOWR(RTNET_IOC_TYPE_IPV4, 7 | RTNET_IOC_NODEV_PARAM, struct ipv4_cmd) ++#define IOC_RT_HOST_ROUTE_GET_DEV _IOWR(RTNET_IOC_TYPE_IPV4, 8, struct ipv4_cmd) ++ ++#endif /* __IPV4_H_ */ +--- linux/drivers/xenomai/net/stack/include/rtnet_port.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/include/rtnet_port.h 2021-04-07 16:01:26.671634961 +0800 +@@ -0,0 +1,113 @@ ++/* include/rtnet_port.h ++ * ++ * RTnet - real-time networking subsystem ++ * Copyright (C) 2003 Wittawat Yamwong ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ */ ++#ifndef __RTNET_PORT_H_ ++#define __RTNET_PORT_H_ ++ ++#ifdef __KERNEL__ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++ ++static inline void rtnetif_start_queue(struct rtnet_device *rtdev) ++{ ++ clear_bit(__RTNET_LINK_STATE_XOFF, &rtdev->link_state); ++} ++ ++static inline void rtnetif_wake_queue(struct rtnet_device *rtdev) ++{ ++ if (test_and_clear_bit(__RTNET_LINK_STATE_XOFF, &rtdev->link_state)) ++ /*TODO __netif_schedule(dev); */; ++} ++ ++static inline void rtnetif_stop_queue(struct rtnet_device *rtdev) ++{ ++ set_bit(__RTNET_LINK_STATE_XOFF, &rtdev->link_state); ++} ++ ++static inline int rtnetif_queue_stopped(struct rtnet_device *rtdev) ++{ ++ return test_bit(__RTNET_LINK_STATE_XOFF, &rtdev->link_state); ++} ++ ++static inline int rtnetif_running(struct rtnet_device *rtdev) ++{ ++ return test_bit(__RTNET_LINK_STATE_START, &rtdev->link_state); ++} ++ ++static inline int rtnetif_device_present(struct rtnet_device *rtdev) ++{ ++ return test_bit(__RTNET_LINK_STATE_PRESENT, &rtdev->link_state); ++} ++ ++static inline void rtnetif_device_detach(struct rtnet_device *rtdev) ++{ ++ if (test_and_clear_bit(__RTNET_LINK_STATE_PRESENT, ++ &rtdev->link_state) && ++ rtnetif_running(rtdev)) { ++ rtnetif_stop_queue(rtdev); ++ } ++} ++ ++static inline void rtnetif_device_attach(struct rtnet_device *rtdev) ++{ ++ if (!test_and_set_bit(__RTNET_LINK_STATE_PRESENT, &rtdev->link_state) && ++ rtnetif_running(rtdev)) { ++ rtnetif_wake_queue(rtdev); ++ /* __netdev_watchdog_up(rtdev); */ ++ } ++} ++ ++static inline void rtnetif_carrier_on(struct rtnet_device *rtdev) ++{ ++ clear_bit(__RTNET_LINK_STATE_NOCARRIER, &rtdev->link_state); ++ /* ++ if (netif_running(dev)) ++ __netdev_watchdog_up(dev); ++ */ ++} ++ ++static inline void rtnetif_carrier_off(struct rtnet_device *rtdev) ++{ ++ set_bit(__RTNET_LINK_STATE_NOCARRIER, &rtdev->link_state); ++} ++ ++static inline int rtnetif_carrier_ok(struct rtnet_device *rtdev) ++{ ++ return !test_bit(__RTNET_LINK_STATE_NOCARRIER, &rtdev->link_state); ++} ++ ++#define NIPQUAD(addr) \ ++ ((unsigned char *)&addr)[0], ((unsigned char *)&addr)[1], \ ++ ((unsigned char *)&addr)[2], ((unsigned char *)&addr)[3] ++#define NIPQUAD_FMT "%u.%u.%u.%u" ++ ++#endif /* __KERNEL__ */ ++ ++#endif /* __RTNET_PORT_H_ */ +--- linux/drivers/xenomai/net/stack/include/rtskb_fifo.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/include/rtskb_fifo.h 2021-04-07 16:01:26.666634968 +0800 +@@ -0,0 +1,144 @@ ++/*** ++ * ++ * include/rtskb_fifo.h ++ * ++ * RTnet - real-time networking subsystem ++ * Copyright (C) 2006 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#ifndef __RTSKB_FIFO_H_ ++#define __RTSKB_FIFO_H_ ++ ++#include ++ ++struct rtskb_fifo { ++ unsigned long read_pos ____cacheline_aligned_in_smp; ++ rtdm_lock_t read_lock; ++ unsigned long size_mask; ++ unsigned long write_pos ____cacheline_aligned_in_smp; ++ rtdm_lock_t write_lock; ++ struct rtskb *buffer[0]; ++}; ++ ++#define DECLARE_RTSKB_FIFO(name_prefix, size) \ ++ struct { \ ++ struct rtskb_fifo fifo; \ ++ struct rtskb *__buffer[(size)]; \ ++ } name_prefix ++ ++static inline int __rtskb_fifo_insert(struct rtskb_fifo *fifo, ++ struct rtskb *rtskb) ++{ ++ unsigned long pos = fifo->write_pos; ++ unsigned long new_pos = (pos + 1) & fifo->size_mask; ++ ++ if (unlikely(new_pos == fifo->read_pos)) ++ return -EAGAIN; ++ ++ fifo->buffer[pos] = rtskb; ++ ++ /* rtskb must have been written before write_pos update */ ++ smp_wmb(); ++ ++ fifo->write_pos = new_pos; ++ ++ return 0; ++} ++ ++static inline int rtskb_fifo_insert(struct rtskb_fifo *fifo, ++ struct rtskb *rtskb) ++{ ++ rtdm_lockctx_t context; ++ int result; ++ ++ rtdm_lock_get_irqsave(&fifo->write_lock, context); ++ result = __rtskb_fifo_insert(fifo, rtskb); ++ rtdm_lock_put_irqrestore(&fifo->write_lock, context); ++ ++ return result; ++} ++ ++static inline int rtskb_fifo_insert_inirq(struct rtskb_fifo *fifo, ++ struct rtskb *rtskb) ++{ ++ int result; ++ ++ rtdm_lock_get(&fifo->write_lock); ++ result = __rtskb_fifo_insert(fifo, rtskb); ++ rtdm_lock_put(&fifo->write_lock); ++ ++ return result; ++} ++ ++static inline struct rtskb *__rtskb_fifo_remove(struct rtskb_fifo *fifo) ++{ ++ unsigned long pos = fifo->read_pos; ++ struct rtskb *result; ++ ++ /* check FIFO status first */ ++ if (unlikely(pos == fifo->write_pos)) ++ return NULL; ++ ++ /* at least one rtskb is enqueued, so get the next one */ ++ result = fifo->buffer[pos]; ++ ++ /* result must have been read before read_pos update */ ++ smp_rmb(); ++ ++ fifo->read_pos = (pos + 1) & fifo->size_mask; ++ ++ /* read_pos must have been written for a consitent fifo state on exit */ ++ smp_wmb(); ++ ++ return result; ++} ++ ++static inline struct rtskb *rtskb_fifo_remove(struct rtskb_fifo *fifo) ++{ ++ rtdm_lockctx_t context; ++ struct rtskb *result; ++ ++ rtdm_lock_get_irqsave(&fifo->read_lock, context); ++ result = __rtskb_fifo_remove(fifo); ++ rtdm_lock_put_irqrestore(&fifo->read_lock, context); ++ ++ return result; ++} ++ ++static inline struct rtskb *rtskb_fifo_remove_inirq(struct rtskb_fifo *fifo) ++{ ++ struct rtskb *result; ++ ++ rtdm_lock_get(&fifo->read_lock); ++ result = __rtskb_fifo_remove(fifo); ++ rtdm_lock_put(&fifo->read_lock); ++ ++ return result; ++} ++ ++/* for now inlined... */ ++static inline void rtskb_fifo_init(struct rtskb_fifo *fifo, unsigned long size) ++{ ++ fifo->read_pos = 0; ++ fifo->write_pos = 0; ++ fifo->size_mask = size - 1; ++ rtdm_lock_init(&fifo->read_lock); ++ rtdm_lock_init(&fifo->write_lock); ++} ++ ++#endif /* __RTSKB_FIFO_H_ */ +--- linux/drivers/xenomai/net/stack/include/rtwlan_io.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/include/rtwlan_io.h 2021-04-07 16:01:26.662634974 +0800 +@@ -0,0 +1,104 @@ ++/* rtwlan_io.h ++ * ++ * Copyright (C) 2006 Daniel Gregorek ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#ifndef RTWLAN_IO ++#define RTWLAN_IO ++ ++#include ++ ++#define RTWLAN_TXMODE_RAW 0 ++#define RTWLAN_TXMODE_ACK 1 ++#define RTWLAN_TXMODE_MCAST 2 ++ ++#define ENORTWLANDEV 0xff08 ++ ++struct rtwlan_cmd { ++ struct rtnet_ioctl_head head; ++ ++ union { ++ struct { ++ unsigned int bitrate; ++ unsigned int channel; ++ unsigned int retry; ++ unsigned int txpower; ++ unsigned int mode; ++ unsigned int autoresponder; ++ unsigned int dropbcast; ++ unsigned int dropmcast; ++ unsigned int bbpsens; ++ } set; ++ ++ struct { ++ unsigned int address; ++ unsigned int value; ++ } reg; ++ ++ struct { ++ int ifindex; ++ unsigned int flags; ++ unsigned int bitrate; ++ unsigned int channel; ++ unsigned int retry; ++ unsigned int txpower; ++ unsigned int bbpsens; ++ unsigned int mode; ++ unsigned int autoresponder; ++ unsigned int dropbcast; ++ unsigned int dropmcast; ++ unsigned int rx_packets; ++ unsigned int tx_packets; ++ unsigned int tx_retry; ++ } info; ++ } args; ++}; ++ ++#define RTNET_IOC_TYPE_RTWLAN 8 ++ ++#define IOC_RTWLAN_IFINFO \ ++ _IOWR(RTNET_IOC_TYPE_RTWLAN, 0 | RTNET_IOC_NODEV_PARAM, \ ++ struct rtwlan_cmd) ++ ++#define IOC_RTWLAN_BITRATE _IOWR(RTNET_IOC_TYPE_RTWLAN, 1, struct rtwlan_cmd) ++ ++#define IOC_RTWLAN_CHANNEL _IOWR(RTNET_IOC_TYPE_RTWLAN, 2, struct rtwlan_cmd) ++ ++#define IOC_RTWLAN_TXPOWER _IOWR(RTNET_IOC_TYPE_RTWLAN, 3, struct rtwlan_cmd) ++ ++#define IOC_RTWLAN_RETRY _IOWR(RTNET_IOC_TYPE_RTWLAN, 4, struct rtwlan_cmd) ++ ++#define IOC_RTWLAN_TXMODE _IOWR(RTNET_IOC_TYPE_RTWLAN, 5, struct rtwlan_cmd) ++ ++#define IOC_RTWLAN_DROPBCAST _IOWR(RTNET_IOC_TYPE_RTWLAN, 6, struct rtwlan_cmd) ++ ++#define IOC_RTWLAN_DROPMCAST _IOWR(RTNET_IOC_TYPE_RTWLAN, 7, struct rtwlan_cmd) ++ ++#define IOC_RTWLAN_REGREAD _IOWR(RTNET_IOC_TYPE_RTWLAN, 8, struct rtwlan_cmd) ++ ++#define IOC_RTWLAN_REGWRITE _IOWR(RTNET_IOC_TYPE_RTWLAN, 9, struct rtwlan_cmd) ++ ++#define IOC_RTWLAN_BBPWRITE _IOWR(RTNET_IOC_TYPE_RTWLAN, 10, struct rtwlan_cmd) ++ ++#define IOC_RTWLAN_BBPREAD _IOWR(RTNET_IOC_TYPE_RTWLAN, 11, struct rtwlan_cmd) ++ ++#define IOC_RTWLAN_BBPSENS _IOWR(RTNET_IOC_TYPE_RTWLAN, 12, struct rtwlan_cmd) ++ ++#define IOC_RTWLAN_AUTORESP _IOWR(RTNET_IOC_TYPE_RTWLAN, 13, struct rtwlan_cmd) ++ ++#endif +--- linux/drivers/xenomai/net/stack/include/rtcfg_chrdev.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/include/rtcfg_chrdev.h 2021-04-07 16:01:26.657634981 +0800 +@@ -0,0 +1,176 @@ ++/*** ++ * ++ * include/rtcfg.h ++ * ++ * Real-Time Configuration Distribution Protocol ++ * ++ * Copyright (C) 2004, 2005 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#ifndef __RTCFG_H_ ++#define __RTCFG_H_ ++ ++#include ++ ++#define ERTCFG_START 0x0F00 ++#define ESTAGE1SIZE ERTCFG_START ++ ++#define FLAG_STAGE_2_DATA 0x0001 ++#define FLAG_READY 0x0002 ++#define FLAG_ASSIGN_ADDR_BY_MAC 0x0100 ++ ++#define RTCFG_ADDR_MAC 0x00 ++#define RTCFG_ADDR_IP 0x01 ++#define RTCFG_ADDR_MASK 0xFF ++ ++typedef enum { ++ RTCFG_CMD_SERVER, ++ RTCFG_CMD_ADD, ++ RTCFG_CMD_DEL, ++ RTCFG_CMD_WAIT, ++ RTCFG_CMD_CLIENT, ++ RTCFG_CMD_ANNOUNCE, ++ RTCFG_CMD_READY, ++ RTCFG_CMD_DETACH, ++ ++ /* internal usage only */ ++ RTCFG_TIMER, ++ RTCFG_FRM_STAGE_1_CFG, ++ RTCFG_FRM_ANNOUNCE_NEW, ++ RTCFG_FRM_ANNOUNCE_REPLY, ++ RTCFG_FRM_STAGE_2_CFG, ++ RTCFG_FRM_STAGE_2_CFG_FRAG, ++ RTCFG_FRM_ACK_CFG, ++ RTCFG_FRM_READY, ++ RTCFG_FRM_HEARTBEAT, ++ RTCFG_FRM_DEAD_STATION ++} RTCFG_EVENT; ++ ++struct rtskb; ++struct rtcfg_station; ++struct rtcfg_connection; ++struct rtcfg_file; ++ ++struct rtcfg_cmd { ++ struct rtnet_ioctl_head head; ++ ++ union { ++ struct { ++ __u32 period; ++ __u32 burstrate; ++ __u32 heartbeat; ++ __u32 threshold; ++ __u32 flags; ++ } server; ++ ++ struct { ++ __u32 addr_type; ++ __u32 ip_addr; ++ __u8 mac_addr[DEV_ADDR_LEN]; ++ __u32 timeout; ++ __u16 stage1_size; ++ __u16 __padding; ++ void *stage1_data; ++ const char *stage2_filename; ++ ++ /* internal usage only */ ++ struct rtcfg_connection *conn_buf; ++ struct rtcfg_file *stage2_file; ++ } add; ++ ++ struct { ++ __u32 addr_type; ++ __u32 ip_addr; ++ __u8 mac_addr[DEV_ADDR_LEN]; ++ ++ /* internal usage only */ ++ struct rtcfg_connection *conn_buf; ++ struct rtcfg_file *stage2_file; ++ } del; ++ ++ struct { ++ __u32 timeout; ++ } wait; ++ ++ struct { ++ __u32 timeout; ++ __u32 max_stations; ++ __u64 buffer_size; ++ void *buffer; ++ ++ /* internal usage only */ ++ struct rtcfg_station *station_buf; ++ struct rtskb *rtskb; ++ } client; ++ ++ struct { ++ __u32 timeout; ++ __u32 flags; ++ __u32 burstrate; ++ __u32 __padding; ++ __u64 buffer_size; ++ void *buffer; ++ ++ /* internal usage only */ ++ struct rtskb *rtskb; ++ } announce; ++ ++ struct { ++ __u32 timeout; ++ } ready; ++ ++ struct { ++ /* internal usage only */ ++ struct rtcfg_connection *conn_buf; ++ struct rtcfg_file *stage2_file; ++ struct rtcfg_station *station_addr_list; ++ struct rtskb *stage2_chain; ++ } detach; ++ ++ __u64 __padding[16]; ++ } args; ++ ++ /* internal usage only */ ++ union { ++ struct { ++ int ifindex; ++ RTCFG_EVENT event_id; ++ } data; ++ ++ __u64 __padding[2]; ++ } internal; ++}; ++ ++#define RTCFG_IOC_SERVER \ ++ _IOW(RTNET_IOC_TYPE_RTCFG, RTCFG_CMD_SERVER, struct rtcfg_cmd) ++#define RTCFG_IOC_ADD \ ++ _IOW(RTNET_IOC_TYPE_RTCFG, RTCFG_CMD_ADD, struct rtcfg_cmd) ++#define RTCFG_IOC_DEL \ ++ _IOW(RTNET_IOC_TYPE_RTCFG, RTCFG_CMD_DEL, struct rtcfg_cmd) ++#define RTCFG_IOC_WAIT \ ++ _IOW(RTNET_IOC_TYPE_RTCFG, RTCFG_CMD_WAIT, struct rtcfg_cmd) ++#define RTCFG_IOC_CLIENT \ ++ _IOW(RTNET_IOC_TYPE_RTCFG, RTCFG_CMD_CLIENT, struct rtcfg_cmd) ++#define RTCFG_IOC_ANNOUNCE \ ++ _IOW(RTNET_IOC_TYPE_RTCFG, RTCFG_CMD_ANNOUNCE, struct rtcfg_cmd) ++#define RTCFG_IOC_READY \ ++ _IOW(RTNET_IOC_TYPE_RTCFG, RTCFG_CMD_READY, struct rtcfg_cmd) ++#define RTCFG_IOC_DETACH \ ++ _IOW(RTNET_IOC_TYPE_RTCFG, RTCFG_CMD_DETACH, struct rtcfg_cmd) ++ ++#endif /* __RTCFG_H_ */ +--- linux/drivers/xenomai/net/stack/rtcfg/rtcfg_proc.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/rtcfg/rtcfg_proc.c 2021-04-07 16:01:26.652634988 +0800 +@@ -0,0 +1,347 @@ ++/*** ++ * ++ * rtcfg/rtcfg_proc.c ++ * ++ * Real-Time Configuration Distribution Protocol ++ * ++ * Copyright (C) 2004 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#ifdef CONFIG_XENO_OPT_VFILE ++DEFINE_MUTEX(nrt_proc_lock); ++static struct xnvfile_directory rtcfg_proc_root; ++ ++static int rtnet_rtcfg_proc_lock_get(struct xnvfile *vfile) ++{ ++ return mutex_lock_interruptible(&nrt_proc_lock); ++} ++ ++static void rtnet_rtcfg_proc_lock_put(struct xnvfile *vfile) ++{ ++ return mutex_unlock(&nrt_proc_lock); ++} ++ ++static struct xnvfile_lock_ops rtnet_rtcfg_proc_lock_ops = { ++ .get = rtnet_rtcfg_proc_lock_get, ++ .put = rtnet_rtcfg_proc_lock_put, ++}; ++ ++int rtnet_rtcfg_dev_state_show(struct xnvfile_regular_iterator *it, void *data) ++{ ++ struct rtcfg_device *rtcfg_dev = xnvfile_priv(it->vfile); ++ const char *state_name[] = { "OFF", ++ "SERVER_RUNNING", ++ "CLIENT_0", ++ "CLIENT_1", ++ "CLIENT_ANNOUNCED", ++ "CLIENT_ALL_KNOWN", ++ "CLIENT_ALL_FRAMES", ++ "CLIENT_2", ++ "CLIENT_READY" }; ++ ++ xnvfile_printf(it, ++ "state:\t\t\t%d (%s)\n" ++ "flags:\t\t\t%08lX\n" ++ "other stations:\t\t%d\n" ++ "stations found:\t\t%d\n" ++ "stations ready:\t\t%d\n", ++ rtcfg_dev->state, state_name[rtcfg_dev->state], ++ rtcfg_dev->flags, rtcfg_dev->other_stations, ++ rtcfg_dev->stations_found, rtcfg_dev->stations_ready); ++ ++ if (rtcfg_dev->state == RTCFG_MAIN_SERVER_RUNNING) { ++ xnvfile_printf(it, ++ "configured clients:\t%d\n" ++ "burstrate:\t\t%d\n" ++ "heartbeat period:\t%d ms\n", ++ rtcfg_dev->spec.srv.clients_configured, ++ rtcfg_dev->burstrate, ++ rtcfg_dev->spec.srv.heartbeat); ++ } else if (rtcfg_dev->state != RTCFG_MAIN_OFF) { ++ xnvfile_printf( ++ it, ++ "address type:\t\t%d\n" ++ "server address:\t\t%02X:%02X:%02X:%02X:%02X:%02X\n" ++ "stage 2 config:\t\t%d/%d\n", ++ rtcfg_dev->spec.clt.addr_type, ++ rtcfg_dev->spec.clt.srv_mac_addr[0], ++ rtcfg_dev->spec.clt.srv_mac_addr[1], ++ rtcfg_dev->spec.clt.srv_mac_addr[2], ++ rtcfg_dev->spec.clt.srv_mac_addr[3], ++ rtcfg_dev->spec.clt.srv_mac_addr[4], ++ rtcfg_dev->spec.clt.srv_mac_addr[5], ++ rtcfg_dev->spec.clt.cfg_offs, ++ rtcfg_dev->spec.clt.cfg_len); ++ } ++ ++ return 0; ++} ++ ++static struct xnvfile_regular_ops rtnet_rtcfg_dev_state_vfile_ops = { ++ .show = rtnet_rtcfg_dev_state_show, ++}; ++ ++int rtnet_rtcfg_dev_stations_show(struct xnvfile_regular_iterator *it, void *d) ++{ ++ struct rtcfg_device *rtcfg_dev = xnvfile_priv(it->vfile); ++ struct rtcfg_connection *conn; ++ struct rtcfg_station *station; ++ int i; ++ ++ if (rtcfg_dev->state == RTCFG_MAIN_SERVER_RUNNING) { ++ list_for_each_entry (conn, &rtcfg_dev->spec.srv.conn_list, ++ entry) { ++ if ((conn->state != RTCFG_CONN_SEARCHING) && ++ (conn->state != RTCFG_CONN_DEAD)) ++ xnvfile_printf( ++ it, ++ "%02X:%02X:%02X:%02X:%02X:%02X\t%02X\n", ++ conn->mac_addr[0], conn->mac_addr[1], ++ conn->mac_addr[2], conn->mac_addr[3], ++ conn->mac_addr[4], conn->mac_addr[5], ++ conn->flags); ++ } ++ } else if (rtcfg_dev->spec.clt.station_addr_list) { ++ for (i = 0; i < rtcfg_dev->stations_found; i++) { ++ station = &rtcfg_dev->spec.clt.station_addr_list[i]; ++ ++ xnvfile_printf( ++ it, "%02X:%02X:%02X:%02X:%02X:%02X\t%02X\n", ++ station->mac_addr[0], station->mac_addr[1], ++ station->mac_addr[2], station->mac_addr[3], ++ station->mac_addr[4], station->mac_addr[5], ++ station->flags); ++ } ++ } ++ ++ return 0; ++} ++ ++static struct xnvfile_regular_ops rtnet_rtcfg_dev_stations_vfile_ops = { ++ .show = rtnet_rtcfg_dev_stations_show, ++}; ++ ++int rtnet_rtcfg_dev_conn_state_show(struct xnvfile_regular_iterator *it, ++ void *d) ++{ ++ struct rtcfg_connection *conn = xnvfile_priv(it->vfile); ++ char *state_name[] = { "SEARCHING", "STAGE_1", "STAGE_2", "READY", ++ "DEAD" }; ++ ++ xnvfile_printf(it, ++ "state:\t\t\t%d (%s)\n" ++ "flags:\t\t\t%02X\n" ++ "stage 1 size:\t\t%zd\n" ++ "stage 2 filename:\t%s\n" ++ "stage 2 size:\t\t%zd\n" ++ "stage 2 offset:\t\t%d\n" ++ "burstrate:\t\t%d\n" ++ "mac address:\t\t%02X:%02X:%02X:%02X:%02X:%02X\n", ++ conn->state, state_name[conn->state], conn->flags, ++ conn->stage1_size, ++ (conn->stage2_file) ? conn->stage2_file->name : "-", ++ (conn->stage2_file) ? conn->stage2_file->size : 0, ++ conn->cfg_offs, conn->burstrate, conn->mac_addr[0], ++ conn->mac_addr[1], conn->mac_addr[2], conn->mac_addr[3], ++ conn->mac_addr[4], conn->mac_addr[5]); ++ ++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_RTIPV4) ++ if ((conn->addr_type & RTCFG_ADDR_MASK) == RTCFG_ADDR_IP) ++ xnvfile_printf(it, "ip:\t\t\t%u.%u.%u.%u\n", ++ NIPQUAD(conn->addr.ip_addr)); ++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4 */ ++ ++ return 0; ++} ++ ++static struct xnvfile_regular_ops rtnet_rtcfg_dev_conn_state_vfile_ops = { ++ .show = rtnet_rtcfg_dev_conn_state_show, ++}; ++ ++void rtcfg_update_conn_proc_entries(int ifindex) ++{ ++ struct rtcfg_device *dev = &device[ifindex]; ++ struct rtcfg_connection *conn; ++ char name_buf[64]; ++ ++ if (dev->state != RTCFG_MAIN_SERVER_RUNNING) ++ return; ++ ++ list_for_each_entry (conn, &dev->spec.srv.conn_list, entry) { ++ switch (conn->addr_type & RTCFG_ADDR_MASK) { ++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_RTIPV4) ++ case RTCFG_ADDR_IP: ++ snprintf(name_buf, 64, "CLIENT_%u.%u.%u.%u", ++ NIPQUAD(conn->addr.ip_addr)); ++ break; ++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4 */ ++ ++ default: /* RTCFG_ADDR_MAC */ ++ snprintf(name_buf, 64, ++ "CLIENT_%02X%02X%02X%02X%02X%02X", ++ conn->mac_addr[0], conn->mac_addr[1], ++ conn->mac_addr[2], conn->mac_addr[3], ++ conn->mac_addr[4], conn->mac_addr[5]); ++ break; ++ } ++ memset(&conn->proc_entry, '\0', sizeof(conn->proc_entry)); ++ conn->proc_entry.entry.lockops = &rtnet_rtcfg_proc_lock_ops; ++ conn->proc_entry.ops = &rtnet_rtcfg_dev_conn_state_vfile_ops; ++ xnvfile_priv(&conn->proc_entry) = conn; ++ ++ xnvfile_init_regular(name_buf, &conn->proc_entry, ++ &dev->proc_entry); ++ } ++} ++ ++void rtcfg_remove_conn_proc_entries(int ifindex) ++{ ++ struct rtcfg_device *dev = &device[ifindex]; ++ struct rtcfg_connection *conn; ++ ++ if (dev->state != RTCFG_MAIN_SERVER_RUNNING) ++ return; ++ ++ list_for_each_entry (conn, &dev->spec.srv.conn_list, entry) ++ xnvfile_destroy_regular(&conn->proc_entry); ++} ++ ++void rtcfg_new_rtdev(struct rtnet_device *rtdev) ++{ ++ struct rtcfg_device *dev = &device[rtdev->ifindex]; ++ int err; ++ ++ mutex_lock(&nrt_proc_lock); ++ ++ memset(&dev->proc_entry, '\0', sizeof(dev->proc_entry)); ++ err = xnvfile_init_dir(rtdev->name, &dev->proc_entry, &rtcfg_proc_root); ++ if (err < 0) ++ goto error1; ++ ++ memset(&dev->proc_state_vfile, '\0', sizeof(dev->proc_state_vfile)); ++ dev->proc_state_vfile.entry.lockops = &rtnet_rtcfg_proc_lock_ops; ++ dev->proc_state_vfile.ops = &rtnet_rtcfg_dev_state_vfile_ops; ++ xnvfile_priv(&dev->proc_state_vfile) = dev; ++ ++ err = xnvfile_init_regular("state", &dev->proc_state_vfile, ++ &dev->proc_entry); ++ if (err < 0) ++ goto error2; ++ ++ memset(&dev->proc_stations_vfile, '\0', ++ sizeof(dev->proc_stations_vfile)); ++ dev->proc_stations_vfile.entry.lockops = &rtnet_rtcfg_proc_lock_ops; ++ dev->proc_stations_vfile.ops = &rtnet_rtcfg_dev_stations_vfile_ops; ++ xnvfile_priv(&dev->proc_stations_vfile) = dev; ++ ++ err = xnvfile_init_regular("stations_list", &dev->proc_stations_vfile, ++ &dev->proc_entry); ++ if (err < 0) ++ goto error3; ++ ++ mutex_unlock(&nrt_proc_lock); ++ ++ return; ++ ++error3: ++ xnvfile_destroy_regular(&dev->proc_state_vfile); ++error2: ++ xnvfile_destroy_dir(&dev->proc_entry); ++error1: ++ dev->proc_entry.entry.pde = NULL; ++ mutex_unlock(&nrt_proc_lock); ++} ++ ++void rtcfg_remove_rtdev(struct rtnet_device *rtdev) ++{ ++ struct rtcfg_device *dev = &device[rtdev->ifindex]; ++ ++ // To-Do: issue down command ++ ++ mutex_lock(&nrt_proc_lock); ++ ++ if (dev->proc_entry.entry.pde) { ++ rtcfg_remove_conn_proc_entries(rtdev->ifindex); ++ ++ xnvfile_destroy_regular(&dev->proc_stations_vfile); ++ xnvfile_destroy_regular(&dev->proc_state_vfile); ++ xnvfile_destroy_dir(&dev->proc_entry); ++ dev->proc_entry.entry.pde = NULL; ++ } ++ ++ mutex_unlock(&nrt_proc_lock); ++} ++ ++static struct rtdev_event_hook rtdev_hook = { .register_device = ++ rtcfg_new_rtdev, ++ .unregister_device = ++ rtcfg_remove_rtdev, ++ .ifup = NULL, ++ .ifdown = NULL }; ++ ++int rtcfg_init_proc(void) ++{ ++ struct rtnet_device *rtdev; ++ int i, err; ++ ++ err = xnvfile_init_dir("rtcfg", &rtcfg_proc_root, &rtnet_proc_root); ++ if (err < 0) ++ goto err1; ++ ++ for (i = 0; i < MAX_RT_DEVICES; i++) { ++ rtdev = rtdev_get_by_index(i); ++ if (rtdev) { ++ rtcfg_new_rtdev(rtdev); ++ rtdev_dereference(rtdev); ++ } ++ } ++ ++ rtdev_add_event_hook(&rtdev_hook); ++ return 0; ++ ++err1: ++ printk("RTcfg: unable to initialise /proc entries\n"); ++ return err; ++} ++ ++void rtcfg_cleanup_proc(void) ++{ ++ struct rtnet_device *rtdev; ++ int i; ++ ++ rtdev_del_event_hook(&rtdev_hook); ++ ++ for (i = 0; i < MAX_RT_DEVICES; i++) { ++ rtdev = rtdev_get_by_index(i); ++ if (rtdev) { ++ rtcfg_remove_rtdev(rtdev); ++ rtdev_dereference(rtdev); ++ } ++ } ++ ++ xnvfile_destroy_dir(&rtcfg_proc_root); ++} ++ ++#endif /* CONFIG_XENO_OPT_VFILE */ +--- linux/drivers/xenomai/net/stack/rtcfg/rtcfg_conn_event.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/rtcfg/rtcfg_conn_event.c 2021-04-07 16:01:26.647634995 +0800 +@@ -0,0 +1,364 @@ ++/*** ++ * ++ * rtcfg/rtcfg_conn_event.c ++ * ++ * Real-Time Configuration Distribution Protocol ++ * ++ * Copyright (C) 2003-2005 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#include ++ ++#include ++#include ++#include ++#include ++#include ++ ++/****************************** states ***************************************/ ++static int rtcfg_conn_state_searching(struct rtcfg_connection *conn, ++ RTCFG_EVENT event_id, void *event_data); ++static int rtcfg_conn_state_stage_1(struct rtcfg_connection *conn, ++ RTCFG_EVENT event_id, void *event_data); ++static int rtcfg_conn_state_stage_2(struct rtcfg_connection *conn, ++ RTCFG_EVENT event_id, void *event_data); ++static int rtcfg_conn_state_ready(struct rtcfg_connection *conn, ++ RTCFG_EVENT event_id, void *event_data); ++static int rtcfg_conn_state_dead(struct rtcfg_connection *conn, ++ RTCFG_EVENT event_id, void *event_data); ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_RTCFG_DEBUG ++const char *rtcfg_conn_state[] = { "RTCFG_CONN_SEARCHING", "RTCFG_CONN_STAGE_1", ++ "RTCFG_CONN_STAGE_2", "RTCFG_CONN_READY", ++ "RTCFG_CONN_DEAD" }; ++#endif /* CONFIG_XENO_DRIVERS_NET_RTCFG_DEBUG */ ++ ++static void rtcfg_conn_recv_announce_new(struct rtcfg_connection *conn, ++ struct rtskb *rtskb); ++static void rtcfg_conn_check_cfg_timeout(struct rtcfg_connection *conn); ++static void rtcfg_conn_check_heartbeat(struct rtcfg_connection *conn); ++ ++static int (*state[])(struct rtcfg_connection *conn, RTCFG_EVENT event_id, ++ void *event_data) = { ++ rtcfg_conn_state_searching, rtcfg_conn_state_stage_1, ++ rtcfg_conn_state_stage_2, rtcfg_conn_state_ready, rtcfg_conn_state_dead ++}; ++ ++int rtcfg_do_conn_event(struct rtcfg_connection *conn, RTCFG_EVENT event_id, ++ void *event_data) ++{ ++ int conn_state = conn->state; ++ ++ RTCFG_DEBUG(3, "RTcfg: %s() conn=%p, event=%s, state=%s\n", ++ __FUNCTION__, conn, rtcfg_event[event_id], ++ rtcfg_conn_state[conn_state]); ++ ++ return (*state[conn_state])(conn, event_id, event_data); ++} ++ ++static void rtcfg_next_conn_state(struct rtcfg_connection *conn, ++ RTCFG_CONN_STATE state) ++{ ++ RTCFG_DEBUG(4, "RTcfg: next connection state=%s \n", ++ rtcfg_conn_state[state]); ++ ++ conn->state = state; ++} ++ ++static int rtcfg_conn_state_searching(struct rtcfg_connection *conn, ++ RTCFG_EVENT event_id, void *event_data) ++{ ++ struct rtcfg_device *rtcfg_dev = &device[conn->ifindex]; ++ struct rtskb *rtskb = (struct rtskb *)event_data; ++ ++ switch (event_id) { ++ case RTCFG_FRM_ANNOUNCE_NEW: ++ rtcfg_conn_recv_announce_new(conn, rtskb); ++ break; ++ ++ case RTCFG_FRM_ANNOUNCE_REPLY: ++ conn->last_frame = rtskb->time_stamp; ++ ++ rtcfg_next_conn_state(conn, RTCFG_CONN_READY); ++ ++ rtcfg_dev->stations_found++; ++ rtcfg_dev->stations_ready++; ++ rtcfg_dev->spec.srv.clients_configured++; ++ if (rtcfg_dev->spec.srv.clients_configured == ++ rtcfg_dev->other_stations) ++ rtcfg_complete_cmd(conn->ifindex, RTCFG_CMD_WAIT, 0); ++ ++ break; ++ ++ default: ++ RTCFG_DEBUG(1, "RTcfg: unknown event %s for conn %p in %s()\n", ++ rtcfg_event[event_id], conn, __FUNCTION__); ++ return -EINVAL; ++ } ++ return 0; ++} ++ ++static int rtcfg_conn_state_stage_1(struct rtcfg_connection *conn, ++ RTCFG_EVENT event_id, void *event_data) ++{ ++ struct rtskb *rtskb = (struct rtskb *)event_data; ++ struct rtcfg_device *rtcfg_dev = &device[conn->ifindex]; ++ struct rtcfg_frm_ack_cfg *ack_cfg; ++ int packets; ++ ++ switch (event_id) { ++ case RTCFG_FRM_ACK_CFG: ++ conn->last_frame = rtskb->time_stamp; ++ ++ ack_cfg = (struct rtcfg_frm_ack_cfg *)rtskb->data; ++ conn->cfg_offs = ntohl(ack_cfg->ack_len); ++ ++ if ((conn->flags & _RTCFG_FLAG_STAGE_2_DATA) != 0) { ++ if (conn->cfg_offs >= conn->stage2_file->size) { ++ rtcfg_dev->spec.srv.clients_configured++; ++ if (rtcfg_dev->spec.srv.clients_configured == ++ rtcfg_dev->other_stations) ++ rtcfg_complete_cmd(conn->ifindex, ++ RTCFG_CMD_WAIT, 0); ++ rtcfg_next_conn_state( ++ conn, ((conn->flags & ++ _RTCFG_FLAG_READY) != 0) ? ++ RTCFG_CONN_READY : ++ RTCFG_CONN_STAGE_2); ++ } else { ++ packets = conn->burstrate; ++ while ((conn->cfg_offs < ++ conn->stage2_file->size) && ++ (packets > 0)) { ++ rtcfg_send_stage_2_frag(conn); ++ packets--; ++ } ++ } ++ } else { ++ rtcfg_dev->spec.srv.clients_configured++; ++ if (rtcfg_dev->spec.srv.clients_configured == ++ rtcfg_dev->other_stations) ++ rtcfg_complete_cmd(conn->ifindex, ++ RTCFG_CMD_WAIT, 0); ++ rtcfg_next_conn_state( ++ conn, ((conn->flags & _RTCFG_FLAG_READY) != 0) ? ++ RTCFG_CONN_READY : ++ RTCFG_CONN_STAGE_2); ++ } ++ ++ break; ++ ++ case RTCFG_TIMER: ++ rtcfg_conn_check_cfg_timeout(conn); ++ break; ++ ++ default: ++ RTCFG_DEBUG(1, "RTcfg: unknown event %s for conn %p in %s()\n", ++ rtcfg_event[event_id], conn, __FUNCTION__); ++ return -EINVAL; ++ } ++ return 0; ++} ++ ++static int rtcfg_conn_state_stage_2(struct rtcfg_connection *conn, ++ RTCFG_EVENT event_id, void *event_data) ++{ ++ struct rtskb *rtskb = (struct rtskb *)event_data; ++ struct rtcfg_device *rtcfg_dev = &device[conn->ifindex]; ++ ++ switch (event_id) { ++ case RTCFG_FRM_READY: ++ conn->last_frame = rtskb->time_stamp; ++ ++ rtcfg_next_conn_state(conn, RTCFG_CONN_READY); ++ ++ conn->flags |= _RTCFG_FLAG_READY; ++ rtcfg_dev->stations_ready++; ++ ++ if (rtcfg_dev->stations_ready == rtcfg_dev->other_stations) ++ rtcfg_complete_cmd(conn->ifindex, RTCFG_CMD_READY, 0); ++ ++ break; ++ ++ case RTCFG_TIMER: ++ rtcfg_conn_check_cfg_timeout(conn); ++ break; ++ ++ default: ++ RTCFG_DEBUG(1, "RTcfg: unknown event %s for conn %p in %s()\n", ++ rtcfg_event[event_id], conn, __FUNCTION__); ++ return -EINVAL; ++ } ++ return 0; ++} ++ ++static int rtcfg_conn_state_ready(struct rtcfg_connection *conn, ++ RTCFG_EVENT event_id, void *event_data) ++{ ++ struct rtskb *rtskb = (struct rtskb *)event_data; ++ ++ switch (event_id) { ++ case RTCFG_TIMER: ++ rtcfg_conn_check_heartbeat(conn); ++ break; ++ ++ case RTCFG_FRM_HEARTBEAT: ++ conn->last_frame = rtskb->time_stamp; ++ break; ++ ++ default: ++ RTCFG_DEBUG(1, "RTcfg: unknown event %s for conn %p in %s()\n", ++ rtcfg_event[event_id], conn, __FUNCTION__); ++ return -EINVAL; ++ } ++ return 0; ++} ++ ++static int rtcfg_conn_state_dead(struct rtcfg_connection *conn, ++ RTCFG_EVENT event_id, void *event_data) ++{ ++ switch (event_id) { ++ case RTCFG_FRM_ANNOUNCE_NEW: ++ rtcfg_conn_recv_announce_new(conn, (struct rtskb *)event_data); ++ break; ++ ++ case RTCFG_FRM_ANNOUNCE_REPLY: ++ /* Spec to-do: signal station that it is assumed to be dead ++ (=> reboot command?) */ ++ ++ default: ++ RTCFG_DEBUG(1, "RTcfg: unknown event %s for conn %p in %s()\n", ++ rtcfg_event[event_id], conn, __FUNCTION__); ++ return -EINVAL; ++ } ++ return 0; ++} ++ ++static void rtcfg_conn_recv_announce_new(struct rtcfg_connection *conn, ++ struct rtskb *rtskb) ++{ ++ struct rtcfg_device *rtcfg_dev = &device[conn->ifindex]; ++ struct rtcfg_frm_announce *announce_new; ++ int packets; ++ ++ conn->last_frame = rtskb->time_stamp; ++ ++ announce_new = (struct rtcfg_frm_announce *)rtskb->data; ++ ++ conn->flags = announce_new->flags; ++ if (announce_new->burstrate < conn->burstrate) ++ conn->burstrate = announce_new->burstrate; ++ ++ rtcfg_next_conn_state(conn, RTCFG_CONN_STAGE_1); ++ ++ rtcfg_dev->stations_found++; ++ if ((conn->flags & _RTCFG_FLAG_READY) != 0) ++ rtcfg_dev->stations_ready++; ++ ++ if (((conn->flags & _RTCFG_FLAG_STAGE_2_DATA) != 0) && ++ (conn->stage2_file != NULL)) { ++ packets = conn->burstrate - 1; ++ ++ rtcfg_send_stage_2(conn, 1); ++ ++ while ((conn->cfg_offs < conn->stage2_file->size) && ++ (packets > 0)) { ++ rtcfg_send_stage_2_frag(conn); ++ packets--; ++ } ++ } else { ++ rtcfg_send_stage_2(conn, 0); ++ conn->flags &= ~_RTCFG_FLAG_STAGE_2_DATA; ++ } ++} ++ ++static void rtcfg_conn_check_cfg_timeout(struct rtcfg_connection *conn) ++{ ++ struct rtcfg_device *rtcfg_dev; ++ ++ if (!conn->cfg_timeout) ++ return; ++ ++ if (rtdm_clock_read() >= conn->last_frame + conn->cfg_timeout) { ++ rtcfg_dev = &device[conn->ifindex]; ++ ++ rtcfg_dev->stations_found--; ++ if (conn->state == RTCFG_CONN_STAGE_2) ++ rtcfg_dev->spec.srv.clients_configured--; ++ ++ rtcfg_next_conn_state(conn, RTCFG_CONN_SEARCHING); ++ conn->cfg_offs = 0; ++ conn->flags = 0; ++ ++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_RTIPV4) ++ if (conn->addr_type == RTCFG_ADDR_IP) { ++ struct rtnet_device *rtdev; ++ ++ /* MAC address yet unknown -> use broadcast address */ ++ rtdev = rtdev_get_by_index(conn->ifindex); ++ if (rtdev == NULL) ++ return; ++ memcpy(conn->mac_addr, rtdev->broadcast, MAX_ADDR_LEN); ++ rtdev_dereference(rtdev); ++ } ++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4 */ ++ } ++} ++ ++static void rtcfg_conn_check_heartbeat(struct rtcfg_connection *conn) ++{ ++ u64 timeout; ++ struct rtcfg_device *rtcfg_dev; ++ ++ timeout = device[conn->ifindex].spec.srv.heartbeat_timeout; ++ if (!timeout) ++ return; ++ ++ if (rtdm_clock_read() >= conn->last_frame + timeout) { ++ rtcfg_dev = &device[conn->ifindex]; ++ ++ rtcfg_dev->stations_found--; ++ rtcfg_dev->stations_ready--; ++ rtcfg_dev->spec.srv.clients_configured--; ++ ++ rtcfg_send_dead_station(conn); ++ ++ rtcfg_next_conn_state(conn, RTCFG_CONN_DEAD); ++ conn->cfg_offs = 0; ++ conn->flags = 0; ++ ++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_RTIPV4) ++ if ((conn->addr_type & RTCFG_ADDR_MASK) == RTCFG_ADDR_IP) { ++ struct rtnet_device *rtdev = ++ rtdev_get_by_index(conn->ifindex); ++ ++ rt_ip_route_del_host(conn->addr.ip_addr, rtdev); ++ ++ if (rtdev == NULL) ++ return; ++ ++ if (!(conn->addr_type & FLAG_ASSIGN_ADDR_BY_MAC)) ++ /* MAC address yet unknown -> use broadcast address */ ++ memcpy(conn->mac_addr, rtdev->broadcast, ++ MAX_ADDR_LEN); ++ ++ rtdev_dereference(rtdev); ++ } ++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4 */ ++ } ++} +--- linux/drivers/xenomai/net/stack/rtcfg/Makefile 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/rtcfg/Makefile 2021-04-07 16:01:26.643635001 +0800 +@@ -0,0 +1,14 @@ ++ccflags-y += -Idrivers/xenomai/net/stack/include ++ ++obj-$(CONFIG_XENO_DRIVERS_NET_RTCFG) += rtcfg.o ++ ++rtcfg-y := \ ++ rtcfg_module.o \ ++ rtcfg_event.o \ ++ rtcfg_client_event.o \ ++ rtcfg_conn_event.o \ ++ rtcfg_ioctl.o \ ++ rtcfg_frame.o \ ++ rtcfg_timer.o \ ++ rtcfg_file.o \ ++ rtcfg_proc.o +--- linux/drivers/xenomai/net/stack/rtcfg/rtcfg_module.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/rtcfg/rtcfg_module.c 2021-04-07 16:01:26.638635008 +0800 +@@ -0,0 +1,83 @@ ++/*** ++ * ++ * rtcfg/rtcfg_module.c ++ * ++ * Real-Time Configuration Distribution Protocol ++ * ++ * Copyright (C) 2003, 2004 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++ ++MODULE_LICENSE("GPL"); ++ ++int __init rtcfg_init(void) ++{ ++ int ret; ++ ++ printk("RTcfg: init real-time configuration distribution protocol\n"); ++ ++ ret = rtcfg_init_ioctls(); ++ if (ret != 0) ++ goto error1; ++ ++ rtcfg_init_state_machines(); ++ ++ ret = rtcfg_init_frames(); ++ if (ret != 0) ++ goto error2; ++ ++#ifdef CONFIG_XENO_OPT_VFILE ++ ret = rtcfg_init_proc(); ++ if (ret != 0) { ++ rtcfg_cleanup_frames(); ++ goto error2; ++ } ++#endif ++ ++ return 0; ++ ++error2: ++ rtcfg_cleanup_state_machines(); ++ rtcfg_cleanup_ioctls(); ++ ++error1: ++ return ret; ++} ++ ++void rtcfg_cleanup(void) ++{ ++#ifdef CONFIG_XENO_OPT_VFILE ++ rtcfg_cleanup_proc(); ++#endif ++ rtcfg_cleanup_frames(); ++ rtcfg_cleanup_state_machines(); ++ rtcfg_cleanup_ioctls(); ++ ++ printk("RTcfg: unloaded\n"); ++} ++ ++module_init(rtcfg_init); ++module_exit(rtcfg_cleanup); +--- linux/drivers/xenomai/net/stack/rtcfg/Kconfig 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/rtcfg/Kconfig 2021-04-07 16:01:26.633635015 +0800 +@@ -0,0 +1,23 @@ ++config XENO_DRIVERS_NET_RTCFG ++ depends on XENO_DRIVERS_NET ++ tristate "RTcfg Service" ++ default y ++ ---help--- ++ The Real-Time Configuration service configures and monitors nodes in ++ a RTnet network. It works both with plain MAC as well as with IPv4 ++ addresses (in case CONFIG_RTNET_RTIPV4 has been switched on). RTcfg ++ consists of a configuration server, which can run on the same station ++ as the TDMA master e.g., and one or more clients. Clients can join and ++ leave the network during runtime without interfering with other ++ stations. Besides network configuration, the RTcfg server can also ++ distribute custom data. ++ ++ See Documentation/README.rtcfg for further information. ++ ++config XENO_DRIVERS_NET_RTCFG_DEBUG ++ bool "RTcfg Debugging" ++ depends on XENO_DRIVERS_NET_RTCFG ++ default n ++ ---help--- ++ Enables debug message output of the RTcfg state machines. Switch on if ++ you have to trace some problem related to RTcfg. +--- linux/drivers/xenomai/net/stack/rtcfg/rtcfg_file.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/rtcfg/rtcfg_file.c 2021-04-07 16:01:26.629635021 +0800 +@@ -0,0 +1,81 @@ ++/*** ++ * ++ * rtcfg/rtcfg_file.c ++ * ++ * Real-Time Configuration Distribution Protocol ++ * ++ * Copyright (C) 2004 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#include ++ ++#include ++#include ++#include ++#include ++ ++/* Note: ++ * We don't need any special lock protection while manipulating the ++ * rtcfg_files list. The list is only accessed through valid connections, and ++ * connections are already lock-protected. ++ */ ++LIST_HEAD(rtcfg_files); ++ ++struct rtcfg_file *rtcfg_get_file(const char *filename) ++{ ++ struct list_head *entry; ++ struct rtcfg_file *file; ++ ++ RTCFG_DEBUG(4, "RTcfg: looking for file %s\n", filename); ++ ++ list_for_each (entry, &rtcfg_files) { ++ file = list_entry(entry, struct rtcfg_file, entry); ++ ++ if (strcmp(file->name, filename) == 0) { ++ file->ref_count++; ++ ++ RTCFG_DEBUG(4, ++ "RTcfg: reusing file entry, now %d users\n", ++ file->ref_count); ++ ++ return file; ++ } ++ } ++ ++ return NULL; ++} ++ ++void rtcfg_add_file(struct rtcfg_file *file) ++{ ++ RTCFG_DEBUG(4, "RTcfg: adding file %s to list\n", file->name); ++ ++ file->ref_count = 1; ++ list_add_tail(&file->entry, &rtcfg_files); ++} ++ ++int rtcfg_release_file(struct rtcfg_file *file) ++{ ++ if (--file->ref_count == 0) { ++ RTCFG_DEBUG(4, "RTcfg: removing file %s from list\n", ++ file->name); ++ ++ list_del(&file->entry); ++ } ++ ++ return file->ref_count; ++} +--- linux/drivers/xenomai/net/stack/rtcfg/rtcfg_frame.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/rtcfg/rtcfg_frame.c 2021-04-07 16:01:26.624635028 +0800 +@@ -0,0 +1,571 @@ ++/*** ++ * ++ * rtcfg/rtcfg_frame.c ++ * ++ * Real-Time Configuration Distribution Protocol ++ * ++ * Copyright (C) 2003-2005 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++ ++static unsigned int num_rtskbs = 32; ++module_param(num_rtskbs, uint, 0444); ++MODULE_PARM_DESC(num_rtskbs, "Number of realtime socket buffers used by RTcfg"); ++ ++static struct rtskb_pool rtcfg_pool; ++static rtdm_task_t rx_task; ++static rtdm_event_t rx_event; ++static struct rtskb_queue rx_queue; ++ ++void rtcfg_thread_signal(void) ++{ ++ rtdm_event_signal(&rx_event); ++} ++ ++static int rtcfg_rx_handler(struct rtskb *rtskb, struct rtpacket_type *pt) ++{ ++ if (rtskb_acquire(rtskb, &rtcfg_pool) == 0) { ++ rtskb_queue_tail(&rx_queue, rtskb); ++ rtcfg_thread_signal(); ++ } else ++ kfree_rtskb(rtskb); ++ ++ return 0; ++} ++ ++static void rtcfg_rx_task(void *arg) ++{ ++ struct rtskb *rtskb; ++ struct rtcfg_frm_head *frm_head; ++ struct rtnet_device *rtdev; ++ ++ while (!rtdm_task_should_stop()) { ++ if (rtdm_event_wait(&rx_event) < 0) ++ break; ++ ++ while ((rtskb = rtskb_dequeue(&rx_queue))) { ++ rtdev = rtskb->rtdev; ++ ++ if (rtskb->pkt_type == PACKET_OTHERHOST) { ++ kfree_rtskb(rtskb); ++ continue; ++ } ++ ++ if (rtskb->len < sizeof(struct rtcfg_frm_head)) { ++ RTCFG_DEBUG( ++ 1, ++ "RTcfg: %s() received an invalid frame\n", ++ __FUNCTION__); ++ kfree_rtskb(rtskb); ++ continue; ++ } ++ ++ frm_head = (struct rtcfg_frm_head *)rtskb->data; ++ ++ if (rtcfg_do_main_event(rtskb->rtdev->ifindex, ++ frm_head->id + ++ RTCFG_FRM_STAGE_1_CFG, ++ rtskb) < 0) ++ kfree_rtskb(rtskb); ++ } ++ ++ rtcfg_timer_run(); ++ } ++} ++ ++int rtcfg_send_frame(struct rtskb *rtskb, struct rtnet_device *rtdev, ++ u8 *dest_addr) ++{ ++ int ret; ++ ++ rtskb->rtdev = rtdev; ++ rtskb->priority = RTCFG_SKB_PRIO; ++ ++ if (rtdev->hard_header) { ++ ret = rtdev->hard_header(rtskb, rtdev, ETH_RTCFG, dest_addr, ++ rtdev->dev_addr, rtskb->len); ++ if (ret < 0) ++ goto err; ++ } ++ ++ if ((rtdev->flags & IFF_UP) != 0) { ++ ret = 0; ++ if (rtdev_xmit(rtskb) != 0) ++ ret = -EAGAIN; ++ } else { ++ ret = -ENETDOWN; ++ goto err; ++ } ++ ++ rtdev_dereference(rtdev); ++ return ret; ++ ++err: ++ kfree_rtskb(rtskb); ++ rtdev_dereference(rtdev); ++ return ret; ++} ++ ++int rtcfg_send_stage_1(struct rtcfg_connection *conn) ++{ ++ struct rtnet_device *rtdev; ++ struct rtskb *rtskb; ++ unsigned int rtskb_size; ++ struct rtcfg_frm_stage_1_cfg *stage_1_frm; ++ ++ rtdev = rtdev_get_by_index(conn->ifindex); ++ if (rtdev == NULL) ++ return -ENODEV; ++ ++ rtskb_size = rtdev->hard_header_len + ++ sizeof(struct rtcfg_frm_stage_1_cfg) + conn->stage1_size + ++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_RTIPV4) ++ (((conn->addr_type & RTCFG_ADDR_MASK) == RTCFG_ADDR_IP) ? ++ 2 * RTCFG_ADDRSIZE_IP : ++ 0); ++#else /* !CONFIG_XENO_DRIVERS_NET_RTIPV4 */ ++ 0; ++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4 */ ++ ++ rtskb = alloc_rtskb(rtskb_size, &rtcfg_pool); ++ if (rtskb == NULL) { ++ rtdev_dereference(rtdev); ++ return -ENOBUFS; ++ } ++ ++ rtskb_reserve(rtskb, rtdev->hard_header_len); ++ ++ stage_1_frm = (struct rtcfg_frm_stage_1_cfg *)rtskb_put( ++ rtskb, sizeof(struct rtcfg_frm_stage_1_cfg)); ++ ++ stage_1_frm->head.id = RTCFG_ID_STAGE_1_CFG; ++ stage_1_frm->head.version = 0; ++ stage_1_frm->addr_type = conn->addr_type & RTCFG_ADDR_MASK; ++ ++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_RTIPV4) ++ if (stage_1_frm->addr_type == RTCFG_ADDR_IP) { ++ rtskb_put(rtskb, 2 * RTCFG_ADDRSIZE_IP); ++ ++ memcpy(stage_1_frm->client_addr, &(conn->addr.ip_addr), 4); ++ ++ stage_1_frm = ++ (struct rtcfg_frm_stage_1_cfg *)(((u8 *)stage_1_frm) + ++ RTCFG_ADDRSIZE_IP); ++ ++ memcpy(stage_1_frm->server_addr, &(rtdev->local_ip), 4); ++ ++ stage_1_frm = ++ (struct rtcfg_frm_stage_1_cfg *)(((u8 *)stage_1_frm) + ++ RTCFG_ADDRSIZE_IP); ++ } ++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4 */ ++ ++ stage_1_frm->burstrate = device[conn->ifindex].burstrate; ++ stage_1_frm->cfg_len = htons(conn->stage1_size); ++ ++ memcpy(rtskb_put(rtskb, conn->stage1_size), conn->stage1_data, ++ conn->stage1_size); ++ ++ return rtcfg_send_frame(rtskb, rtdev, conn->mac_addr); ++} ++ ++int rtcfg_send_stage_2(struct rtcfg_connection *conn, int send_data) ++{ ++ struct rtnet_device *rtdev; ++ struct rtcfg_device *rtcfg_dev = &device[conn->ifindex]; ++ struct rtskb *rtskb; ++ unsigned int rtskb_size; ++ struct rtcfg_frm_stage_2_cfg *stage_2_frm; ++ size_t total_size; ++ size_t frag_size; ++ ++ rtdev = rtdev_get_by_index(conn->ifindex); ++ if (rtdev == NULL) ++ return -ENODEV; ++ ++ if (send_data) { ++ total_size = conn->stage2_file->size; ++ frag_size = MIN(rtdev->get_mtu(rtdev, RTCFG_SKB_PRIO) - ++ sizeof(struct rtcfg_frm_stage_2_cfg), ++ total_size); ++ } else { ++ total_size = 0; ++ frag_size = 0; ++ } ++ ++ rtskb_size = rtdev->hard_header_len + ++ sizeof(struct rtcfg_frm_stage_2_cfg) + frag_size; ++ ++ rtskb = alloc_rtskb(rtskb_size, &rtcfg_pool); ++ if (rtskb == NULL) { ++ rtdev_dereference(rtdev); ++ return -ENOBUFS; ++ } ++ ++ rtskb_reserve(rtskb, rtdev->hard_header_len); ++ ++ stage_2_frm = (struct rtcfg_frm_stage_2_cfg *)rtskb_put( ++ rtskb, sizeof(struct rtcfg_frm_stage_2_cfg)); ++ ++ stage_2_frm->head.id = RTCFG_ID_STAGE_2_CFG; ++ stage_2_frm->head.version = 0; ++ stage_2_frm->flags = rtcfg_dev->flags; ++ stage_2_frm->stations = htonl(rtcfg_dev->other_stations); ++ stage_2_frm->heartbeat_period = htons(rtcfg_dev->spec.srv.heartbeat); ++ stage_2_frm->cfg_len = htonl(total_size); ++ ++ if (send_data) ++ memcpy(rtskb_put(rtskb, frag_size), conn->stage2_file->buffer, ++ frag_size); ++ conn->cfg_offs = frag_size; ++ ++ return rtcfg_send_frame(rtskb, rtdev, conn->mac_addr); ++} ++ ++int rtcfg_send_stage_2_frag(struct rtcfg_connection *conn) ++{ ++ struct rtnet_device *rtdev; ++ struct rtskb *rtskb; ++ unsigned int rtskb_size; ++ struct rtcfg_frm_stage_2_cfg_frag *stage_2_frm; ++ size_t frag_size; ++ ++ rtdev = rtdev_get_by_index(conn->ifindex); ++ if (rtdev == NULL) ++ return -ENODEV; ++ ++ frag_size = MIN(rtdev->get_mtu(rtdev, RTCFG_SKB_PRIO) - ++ sizeof(struct rtcfg_frm_stage_2_cfg_frag), ++ conn->stage2_file->size - conn->cfg_offs); ++ ++ rtskb_size = rtdev->hard_header_len + ++ sizeof(struct rtcfg_frm_stage_2_cfg_frag) + frag_size; ++ ++ rtskb = alloc_rtskb(rtskb_size, &rtcfg_pool); ++ if (rtskb == NULL) { ++ rtdev_dereference(rtdev); ++ return -ENOBUFS; ++ } ++ ++ rtskb_reserve(rtskb, rtdev->hard_header_len); ++ ++ stage_2_frm = (struct rtcfg_frm_stage_2_cfg_frag *)rtskb_put( ++ rtskb, sizeof(struct rtcfg_frm_stage_2_cfg_frag)); ++ ++ stage_2_frm->head.id = RTCFG_ID_STAGE_2_CFG_FRAG; ++ stage_2_frm->head.version = 0; ++ stage_2_frm->frag_offs = htonl(conn->cfg_offs); ++ ++ memcpy(rtskb_put(rtskb, frag_size), ++ conn->stage2_file->buffer + conn->cfg_offs, frag_size); ++ conn->cfg_offs += frag_size; ++ ++ return rtcfg_send_frame(rtskb, rtdev, conn->mac_addr); ++} ++ ++int rtcfg_send_announce_new(int ifindex) ++{ ++ struct rtcfg_device *rtcfg_dev = &device[ifindex]; ++ struct rtnet_device *rtdev; ++ struct rtskb *rtskb; ++ unsigned int rtskb_size; ++ struct rtcfg_frm_announce *announce_new; ++ ++ rtdev = rtdev_get_by_index(ifindex); ++ if (rtdev == NULL) ++ return -ENODEV; ++ ++ rtskb_size = rtdev->hard_header_len + ++ sizeof(struct rtcfg_frm_announce) + ++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_RTIPV4) ++ (((rtcfg_dev->spec.clt.addr_type & RTCFG_ADDR_MASK) == ++ RTCFG_ADDR_IP) ? ++ RTCFG_ADDRSIZE_IP : ++ 0); ++#else /* !CONFIG_XENO_DRIVERS_NET_RTIPV4 */ ++ 0; ++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4 */ ++ ++ rtskb = alloc_rtskb(rtskb_size, &rtcfg_pool); ++ if (rtskb == NULL) { ++ rtdev_dereference(rtdev); ++ return -ENOBUFS; ++ } ++ ++ rtskb_reserve(rtskb, rtdev->hard_header_len); ++ ++ announce_new = (struct rtcfg_frm_announce *)rtskb_put( ++ rtskb, sizeof(struct rtcfg_frm_announce)); ++ ++ announce_new->head.id = RTCFG_ID_ANNOUNCE_NEW; ++ announce_new->head.version = 0; ++ announce_new->addr_type = rtcfg_dev->spec.clt.addr_type; ++ ++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_RTIPV4) ++ if (announce_new->addr_type == RTCFG_ADDR_IP) { ++ rtskb_put(rtskb, RTCFG_ADDRSIZE_IP); ++ ++ memcpy(announce_new->addr, &(rtdev->local_ip), 4); ++ ++ announce_new = ++ (struct rtcfg_frm_announce *)(((u8 *)announce_new) + ++ RTCFG_ADDRSIZE_IP); ++ } ++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4 */ ++ ++ announce_new->flags = rtcfg_dev->flags; ++ announce_new->burstrate = rtcfg_dev->burstrate; ++ ++ return rtcfg_send_frame(rtskb, rtdev, rtdev->broadcast); ++} ++ ++int rtcfg_send_announce_reply(int ifindex, u8 *dest_mac_addr) ++{ ++ struct rtcfg_device *rtcfg_dev = &device[ifindex]; ++ struct rtnet_device *rtdev; ++ struct rtskb *rtskb; ++ unsigned int rtskb_size; ++ struct rtcfg_frm_announce *announce_rpl; ++ ++ rtdev = rtdev_get_by_index(ifindex); ++ if (rtdev == NULL) ++ return -ENODEV; ++ ++ rtskb_size = rtdev->hard_header_len + ++ sizeof(struct rtcfg_frm_announce) + ++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_RTIPV4) ++ ((rtcfg_dev->spec.clt.addr_type == RTCFG_ADDR_IP) ? ++ RTCFG_ADDRSIZE_IP : ++ 0); ++#else /* !CONFIG_XENO_DRIVERS_NET_RTIPV4 */ ++ 0; ++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4 */ ++ ++ rtskb = alloc_rtskb(rtskb_size, &rtcfg_pool); ++ if (rtskb == NULL) { ++ rtdev_dereference(rtdev); ++ return -ENOBUFS; ++ } ++ ++ rtskb_reserve(rtskb, rtdev->hard_header_len); ++ ++ announce_rpl = (struct rtcfg_frm_announce *)rtskb_put( ++ rtskb, sizeof(struct rtcfg_frm_announce)); ++ ++ announce_rpl->head.id = RTCFG_ID_ANNOUNCE_REPLY; ++ announce_rpl->head.version = 0; ++ announce_rpl->addr_type = rtcfg_dev->spec.clt.addr_type; ++ ++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_RTIPV4) ++ if (announce_rpl->addr_type == RTCFG_ADDR_IP) { ++ rtskb_put(rtskb, RTCFG_ADDRSIZE_IP); ++ ++ memcpy(announce_rpl->addr, &(rtdev->local_ip), 4); ++ ++ announce_rpl = ++ (struct rtcfg_frm_announce *)(((u8 *)announce_rpl) + ++ RTCFG_ADDRSIZE_IP); ++ } ++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4 */ ++ ++ announce_rpl->flags = rtcfg_dev->flags & _RTCFG_FLAG_READY; ++ announce_rpl->burstrate = 0; /* padding field */ ++ ++ return rtcfg_send_frame(rtskb, rtdev, dest_mac_addr); ++} ++ ++int rtcfg_send_ack(int ifindex) ++{ ++ struct rtnet_device *rtdev; ++ struct rtskb *rtskb; ++ unsigned int rtskb_size; ++ struct rtcfg_frm_ack_cfg *ack_frm; ++ ++ rtdev = rtdev_get_by_index(ifindex); ++ if (rtdev == NULL) ++ return -ENODEV; ++ ++ rtskb_size = rtdev->hard_header_len + sizeof(struct rtcfg_frm_ack_cfg); ++ ++ rtskb = alloc_rtskb(rtskb_size, &rtcfg_pool); ++ if (rtskb == NULL) { ++ rtdev_dereference(rtdev); ++ return -ENOBUFS; ++ } ++ ++ rtskb_reserve(rtskb, rtdev->hard_header_len); ++ ++ ack_frm = (struct rtcfg_frm_ack_cfg *)rtskb_put( ++ rtskb, sizeof(struct rtcfg_frm_ack_cfg)); ++ ++ ack_frm->head.id = RTCFG_ID_ACK_CFG; ++ ack_frm->head.version = 0; ++ ack_frm->ack_len = htonl(device[ifindex].spec.clt.cfg_offs); ++ ++ return rtcfg_send_frame(rtskb, rtdev, ++ device[ifindex].spec.clt.srv_mac_addr); ++} ++ ++int rtcfg_send_simple_frame(int ifindex, int frame_id, u8 *dest_addr) ++{ ++ struct rtnet_device *rtdev; ++ struct rtskb *rtskb; ++ unsigned int rtskb_size; ++ struct rtcfg_frm_simple *simple_frm; ++ ++ rtdev = rtdev_get_by_index(ifindex); ++ if (rtdev == NULL) ++ return -ENODEV; ++ ++ rtskb_size = rtdev->hard_header_len + sizeof(struct rtcfg_frm_simple); ++ ++ rtskb = alloc_rtskb(rtskb_size, &rtcfg_pool); ++ if (rtskb == NULL) { ++ rtdev_dereference(rtdev); ++ return -ENOBUFS; ++ } ++ ++ rtskb_reserve(rtskb, rtdev->hard_header_len); ++ ++ simple_frm = (struct rtcfg_frm_simple *)rtskb_put( ++ rtskb, sizeof(struct rtcfg_frm_simple)); ++ ++ simple_frm->head.id = frame_id; ++ simple_frm->head.version = 0; ++ ++ return rtcfg_send_frame(rtskb, rtdev, ++ (dest_addr) ? dest_addr : rtdev->broadcast); ++} ++ ++int rtcfg_send_dead_station(struct rtcfg_connection *conn) ++{ ++ struct rtnet_device *rtdev; ++ struct rtskb *rtskb; ++ unsigned int rtskb_size; ++ struct rtcfg_frm_dead_station *dead_station_frm; ++ ++ rtdev = rtdev_get_by_index(conn->ifindex); ++ if (rtdev == NULL) ++ return -ENODEV; ++ ++ rtskb_size = rtdev->hard_header_len + ++ sizeof(struct rtcfg_frm_dead_station) + ++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_RTIPV4) ++ (((conn->addr_type & RTCFG_ADDR_MASK) == RTCFG_ADDR_IP) ? ++ RTCFG_ADDRSIZE_IP : ++ 0); ++#else /* !CONFIG_XENO_DRIVERS_NET_RTIPV4 */ ++ 0; ++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4 */ ++ ++ rtskb = alloc_rtskb(rtskb_size, &rtcfg_pool); ++ if (rtskb == NULL) { ++ rtdev_dereference(rtdev); ++ return -ENOBUFS; ++ } ++ ++ rtskb_reserve(rtskb, rtdev->hard_header_len); ++ ++ dead_station_frm = (struct rtcfg_frm_dead_station *)rtskb_put( ++ rtskb, sizeof(struct rtcfg_frm_dead_station)); ++ ++ dead_station_frm->head.id = RTCFG_ID_DEAD_STATION; ++ dead_station_frm->head.version = 0; ++ dead_station_frm->addr_type = conn->addr_type & RTCFG_ADDR_MASK; ++ ++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_RTIPV4) ++ if (dead_station_frm->addr_type == RTCFG_ADDR_IP) { ++ rtskb_put(rtskb, RTCFG_ADDRSIZE_IP); ++ ++ memcpy(dead_station_frm->logical_addr, &(conn->addr.ip_addr), ++ 4); ++ ++ dead_station_frm = (struct rtcfg_frm_dead_station ++ *)(((u8 *)dead_station_frm) + ++ RTCFG_ADDRSIZE_IP); ++ } ++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4 */ ++ ++ /* Ethernet-specific! */ ++ memcpy(dead_station_frm->physical_addr, conn->mac_addr, ETH_ALEN); ++ memset(&dead_station_frm->physical_addr[ETH_ALEN], 0, ++ sizeof(dead_station_frm->physical_addr) - ETH_ALEN); ++ ++ return rtcfg_send_frame(rtskb, rtdev, rtdev->broadcast); ++} ++ ++static struct rtpacket_type rtcfg_packet_type = { .type = __constant_htons( ++ ETH_RTCFG), ++ .handler = rtcfg_rx_handler }; ++ ++int __init rtcfg_init_frames(void) ++{ ++ int ret; ++ ++ if (rtskb_module_pool_init(&rtcfg_pool, num_rtskbs) < num_rtskbs) ++ return -ENOMEM; ++ ++ rtskb_queue_init(&rx_queue); ++ rtdm_event_init(&rx_event, 0); ++ ++ ret = rtdm_task_init(&rx_task, "rtcfg-rx", rtcfg_rx_task, 0, ++ RTDM_TASK_LOWEST_PRIORITY, 0); ++ if (ret < 0) { ++ rtdm_event_destroy(&rx_event); ++ goto error1; ++ } ++ ++ ret = rtdev_add_pack(&rtcfg_packet_type); ++ if (ret < 0) ++ goto error2; ++ ++ return 0; ++ ++error2: ++ rtdm_event_destroy(&rx_event); ++ rtdm_task_destroy(&rx_task); ++ ++error1: ++ rtskb_pool_release(&rtcfg_pool); ++ ++ return ret; ++} ++ ++void rtcfg_cleanup_frames(void) ++{ ++ struct rtskb *rtskb; ++ ++ rtdev_remove_pack(&rtcfg_packet_type); ++ ++ rtdm_event_destroy(&rx_event); ++ rtdm_task_destroy(&rx_task); ++ ++ while ((rtskb = rtskb_dequeue(&rx_queue)) != NULL) { ++ kfree_rtskb(rtskb); ++ } ++ ++ rtskb_pool_release(&rtcfg_pool); ++} +--- linux/drivers/xenomai/net/stack/rtcfg/rtcfg_ioctl.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/rtcfg/rtcfg_ioctl.c 2021-04-07 16:01:26.619635035 +0800 +@@ -0,0 +1,421 @@ ++/*** ++ * ++ * rtcfg/rtcfg_ioctl.c ++ * ++ * Real-Time Configuration Distribution Protocol ++ * ++ * Copyright (C) 2003-2005 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++int rtcfg_event_handler(struct rt_proc_call *call) ++{ ++ struct rtcfg_cmd *cmd_event; ++ ++ cmd_event = rtpc_get_priv(call, struct rtcfg_cmd); ++ return rtcfg_do_main_event(cmd_event->internal.data.ifindex, ++ cmd_event->internal.data.event_id, call); ++} ++ ++void keep_cmd_add(struct rt_proc_call *call, void *priv_data) ++{ ++ /* do nothing on error (<0), or if file already present (=0) */ ++ if (rtpc_get_result(call) <= 0) ++ return; ++ ++ /* Don't cleanup any buffers, we are going to recycle them! */ ++ rtpc_set_cleanup_handler(call, NULL); ++} ++ ++void cleanup_cmd_add(void *priv_data) ++{ ++ struct rtcfg_cmd *cmd = (struct rtcfg_cmd *)priv_data; ++ void *buf; ++ ++ /* unlock proc and update directory structure */ ++ rtcfg_unlockwr_proc(cmd->internal.data.ifindex); ++ ++ buf = cmd->args.add.conn_buf; ++ if (buf != NULL) ++ kfree(buf); ++ ++ buf = cmd->args.add.stage1_data; ++ if (buf != NULL) ++ kfree(buf); ++ ++ if (cmd->args.add.stage2_file != NULL) { ++ buf = cmd->args.add.stage2_file->buffer; ++ if (buf != NULL) ++ vfree(buf); ++ kfree(cmd->args.add.stage2_file); ++ } ++} ++ ++void cleanup_cmd_del(void *priv_data) ++{ ++ struct rtcfg_cmd *cmd = (struct rtcfg_cmd *)priv_data; ++ void *buf; ++ ++ /* unlock proc and update directory structure */ ++ rtcfg_unlockwr_proc(cmd->internal.data.ifindex); ++ ++ if (cmd->args.del.conn_buf != NULL) { ++ buf = cmd->args.del.conn_buf->stage1_data; ++ if (buf != NULL) ++ kfree(buf); ++ kfree(cmd->args.del.conn_buf); ++ } ++ ++ if (cmd->args.del.stage2_file != NULL) { ++ buf = cmd->args.del.stage2_file->buffer; ++ if (buf != NULL) ++ vfree(buf); ++ kfree(cmd->args.del.stage2_file); ++ } ++} ++ ++void copy_stage_1_data(struct rt_proc_call *call, void *priv_data) ++{ ++ struct rtcfg_cmd *cmd; ++ int result = rtpc_get_result(call); ++ ++ if (result <= 0) ++ return; ++ ++ cmd = rtpc_get_priv(call, struct rtcfg_cmd); ++ ++ if (cmd->args.client.buffer_size < (size_t)result) ++ rtpc_set_result(call, -ENOSPC); ++ else if (copy_to_user(cmd->args.client.buffer, ++ cmd->args.client.rtskb->data, result) != 0) ++ rtpc_set_result(call, -EFAULT); ++} ++ ++void cleanup_cmd_client(void *priv_data) ++{ ++ struct rtcfg_cmd *cmd = (struct rtcfg_cmd *)priv_data; ++ void *station_buf; ++ struct rtskb *rtskb; ++ ++ station_buf = cmd->args.client.station_buf; ++ if (station_buf != NULL) ++ kfree(station_buf); ++ ++ rtskb = cmd->args.client.rtskb; ++ if (rtskb != NULL) ++ kfree_rtskb(rtskb); ++} ++ ++void copy_stage_2_data(struct rt_proc_call *call, void *priv_data) ++{ ++ struct rtcfg_cmd *cmd; ++ int result = rtpc_get_result(call); ++ struct rtskb *rtskb; ++ ++ if (result <= 0) ++ return; ++ ++ cmd = rtpc_get_priv(call, struct rtcfg_cmd); ++ ++ if (cmd->args.announce.buffer_size < (size_t)result) ++ rtpc_set_result(call, -ENOSPC); ++ else { ++ rtskb = cmd->args.announce.rtskb; ++ do { ++ if (copy_to_user(cmd->args.announce.buffer, rtskb->data, ++ rtskb->len) != 0) { ++ rtpc_set_result(call, -EFAULT); ++ break; ++ } ++ cmd->args.announce.buffer += rtskb->len; ++ rtskb = rtskb->next; ++ } while (rtskb != NULL); ++ } ++} ++ ++void cleanup_cmd_announce(void *priv_data) ++{ ++ struct rtcfg_cmd *cmd = (struct rtcfg_cmd *)priv_data; ++ struct rtskb *rtskb; ++ ++ rtskb = cmd->args.announce.rtskb; ++ if (rtskb != NULL) ++ kfree_rtskb(rtskb); ++} ++ ++void cleanup_cmd_detach(void *priv_data) ++{ ++ struct rtcfg_cmd *cmd = (struct rtcfg_cmd *)priv_data; ++ void *buf; ++ ++ /* unlock proc and update directory structure */ ++ rtcfg_unlockwr_proc(cmd->internal.data.ifindex); ++ ++ if (cmd->args.detach.conn_buf) { ++ buf = cmd->args.detach.conn_buf->stage1_data; ++ if (buf != NULL) ++ kfree(buf); ++ kfree(cmd->args.detach.conn_buf); ++ } ++ ++ if (cmd->args.detach.stage2_file != NULL) { ++ buf = cmd->args.detach.stage2_file->buffer; ++ if (buf) ++ vfree(buf); ++ kfree(cmd->args.detach.stage2_file); ++ } ++ ++ if (cmd->args.detach.station_addr_list) ++ kfree(cmd->args.detach.station_addr_list); ++ ++ if (cmd->args.detach.stage2_chain) ++ kfree_rtskb(cmd->args.detach.stage2_chain); ++} ++ ++int rtcfg_ioctl_add(struct rtnet_device *rtdev, struct rtcfg_cmd *cmd) ++{ ++ struct rtcfg_connection *conn_buf; ++ struct rtcfg_file *file = NULL; ++ void *data_buf; ++ size_t size; ++ int ret; ++ ++ conn_buf = kmalloc(sizeof(struct rtcfg_connection), GFP_KERNEL); ++ if (conn_buf == NULL) ++ return -ENOMEM; ++ cmd->args.add.conn_buf = conn_buf; ++ ++ data_buf = NULL; ++ size = cmd->args.add.stage1_size; ++ if (size > 0) { ++ /* check stage 1 data size */ ++ if (sizeof(struct rtcfg_frm_stage_1_cfg) + ++ 2 * RTCFG_ADDRSIZE_IP + size > ++ rtdev->get_mtu(rtdev, RTCFG_SKB_PRIO)) { ++ ret = -ESTAGE1SIZE; ++ goto err; ++ } ++ ++ data_buf = kmalloc(size, GFP_KERNEL); ++ if (data_buf == NULL) { ++ ret = -ENOMEM; ++ goto err; ++ } ++ ++ ret = copy_from_user(data_buf, cmd->args.add.stage1_data, size); ++ if (ret != 0) { ++ ret = -EFAULT; ++ goto err; ++ } ++ } ++ cmd->args.add.stage1_data = data_buf; ++ ++ if (cmd->args.add.stage2_filename != NULL) { ++ size = strnlen_user(cmd->args.add.stage2_filename, PATH_MAX); ++ ++ file = kmalloc(sizeof(struct rtcfg_file) + size, GFP_KERNEL); ++ if (file == NULL) { ++ ret = -ENOMEM; ++ goto err; ++ } ++ ++ file->name = (char *)file + sizeof(struct rtcfg_file); ++ file->buffer = NULL; ++ ++ ret = copy_from_user( ++ (void *)file + sizeof(struct rtcfg_file), ++ (const void *)cmd->args.add.stage2_filename, size); ++ if (ret != 0) { ++ ret = -EFAULT; ++ goto err; ++ } ++ } ++ cmd->args.add.stage2_file = file; ++ ++ /* lock proc structure for modification */ ++ rtcfg_lockwr_proc(cmd->internal.data.ifindex); ++ ++ ret = rtpc_dispatch_call(rtcfg_event_handler, 0, cmd, sizeof(*cmd), ++ keep_cmd_add, cleanup_cmd_add); ++ ++ /* load file if missing */ ++ if (ret > 0) { ++ struct file *filp; ++ mm_segment_t oldfs; ++ ++ filp = filp_open(file->name, O_RDONLY, 0); ++ if (IS_ERR(filp)) { ++ rtcfg_unlockwr_proc(cmd->internal.data.ifindex); ++ ret = PTR_ERR(filp); ++ goto err; ++ } ++ ++ file->size = filp->f_path.dentry->d_inode->i_size; ++ ++ /* allocate buffer even for empty files */ ++ file->buffer = vmalloc((file->size) ? file->size : 1); ++ if (file->buffer == NULL) { ++ rtcfg_unlockwr_proc(cmd->internal.data.ifindex); ++ fput(filp); ++ ret = -ENOMEM; ++ goto err; ++ } ++ ++ oldfs = get_fs(); ++ set_fs(KERNEL_DS); ++ filp->f_pos = 0; ++ ++ ret = filp->f_op->read(filp, file->buffer, file->size, ++ &filp->f_pos); ++ ++ set_fs(oldfs); ++ fput(filp); ++ ++ if (ret != (int)file->size) { ++ rtcfg_unlockwr_proc(cmd->internal.data.ifindex); ++ ret = -EIO; ++ goto err; ++ } ++ ++ /* dispatch again, this time with new file attached */ ++ ret = rtpc_dispatch_call(rtcfg_event_handler, 0, cmd, ++ sizeof(*cmd), NULL, cleanup_cmd_add); ++ } ++ ++ return ret; ++ ++err: ++ kfree(conn_buf); ++ if (data_buf != NULL) ++ kfree(data_buf); ++ if (file != NULL) { ++ if (file->buffer != NULL) ++ vfree(file->buffer); ++ kfree(file); ++ } ++ return ret; ++} ++ ++int rtcfg_ioctl(struct rtnet_device *rtdev, unsigned int request, ++ unsigned long arg) ++{ ++ struct rtcfg_cmd cmd; ++ struct rtcfg_station *station_buf; ++ int ret; ++ ++ ret = copy_from_user(&cmd, (void *)arg, sizeof(cmd)); ++ if (ret != 0) ++ return -EFAULT; ++ ++ cmd.internal.data.ifindex = rtdev->ifindex; ++ cmd.internal.data.event_id = _IOC_NR(request); ++ ++ switch (request) { ++ case RTCFG_IOC_SERVER: ++ ret = rtpc_dispatch_call(rtcfg_event_handler, 0, &cmd, ++ sizeof(cmd), NULL, NULL); ++ break; ++ ++ case RTCFG_IOC_ADD: ++ ret = rtcfg_ioctl_add(rtdev, &cmd); ++ break; ++ ++ case RTCFG_IOC_DEL: ++ cmd.args.del.conn_buf = NULL; ++ cmd.args.del.stage2_file = NULL; ++ ++ /* lock proc structure for modification ++ (unlock in cleanup_cmd_del) */ ++ rtcfg_lockwr_proc(cmd.internal.data.ifindex); ++ ++ ret = rtpc_dispatch_call(rtcfg_event_handler, 0, &cmd, ++ sizeof(cmd), NULL, cleanup_cmd_del); ++ break; ++ ++ case RTCFG_IOC_WAIT: ++ ret = rtpc_dispatch_call(rtcfg_event_handler, ++ cmd.args.wait.timeout, &cmd, ++ sizeof(cmd), NULL, NULL); ++ break; ++ ++ case RTCFG_IOC_CLIENT: ++ station_buf = kmalloc(sizeof(struct rtcfg_station) * ++ cmd.args.client.max_stations, ++ GFP_KERNEL); ++ if (station_buf == NULL) ++ return -ENOMEM; ++ cmd.args.client.station_buf = station_buf; ++ cmd.args.client.rtskb = NULL; ++ ++ ret = rtpc_dispatch_call(rtcfg_event_handler, ++ cmd.args.client.timeout, &cmd, ++ sizeof(cmd), copy_stage_1_data, ++ cleanup_cmd_client); ++ break; ++ ++ case RTCFG_IOC_ANNOUNCE: ++ cmd.args.announce.rtskb = NULL; ++ ++ ret = rtpc_dispatch_call(rtcfg_event_handler, ++ cmd.args.announce.timeout, &cmd, ++ sizeof(cmd), copy_stage_2_data, ++ cleanup_cmd_announce); ++ break; ++ ++ case RTCFG_IOC_READY: ++ ret = rtpc_dispatch_call(rtcfg_event_handler, ++ cmd.args.ready.timeout, &cmd, ++ sizeof(cmd), NULL, NULL); ++ break; ++ ++ case RTCFG_IOC_DETACH: ++ do { ++ cmd.args.detach.conn_buf = NULL; ++ cmd.args.detach.stage2_file = NULL; ++ cmd.args.detach.station_addr_list = NULL; ++ cmd.args.detach.stage2_chain = NULL; ++ ++ /* lock proc structure for modification ++ (unlock in cleanup_cmd_detach) */ ++ rtcfg_lockwr_proc(cmd.internal.data.ifindex); ++ ++ ret = rtpc_dispatch_call(rtcfg_event_handler, 0, &cmd, ++ sizeof(cmd), NULL, ++ cleanup_cmd_detach); ++ } while (ret == -EAGAIN); ++ break; ++ ++ default: ++ ret = -ENOTTY; ++ } ++ ++ return ret; ++} ++ ++struct rtnet_ioctls rtcfg_ioctls = { .service_name = "RTcfg", ++ .ioctl_type = RTNET_IOC_TYPE_RTCFG, ++ .handler = rtcfg_ioctl }; +--- linux/drivers/xenomai/net/stack/rtcfg/rtcfg_event.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/rtcfg/rtcfg_event.c 2021-04-07 16:01:26.614635042 +0800 +@@ -0,0 +1,745 @@ ++/*** ++ * ++ * rtcfg/rtcfg_event.c ++ * ++ * Real-Time Configuration Distribution Protocol ++ * ++ * Copyright (C) 2003-2005 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/*** Common and Server States ***/ ++static int rtcfg_main_state_off(int ifindex, RTCFG_EVENT event_id, ++ void *event_data); ++static int rtcfg_main_state_server_running(int ifindex, RTCFG_EVENT event_id, ++ void *event_data); ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_RTCFG_DEBUG ++const char *rtcfg_event[] = { "RTCFG_CMD_SERVER", ++ "RTCFG_CMD_ADD", ++ "RTCFG_CMD_DEL", ++ "RTCFG_CMD_WAIT", ++ "RTCFG_CMD_CLIENT", ++ "RTCFG_CMD_ANNOUNCE", ++ "RTCFG_CMD_READY", ++ "RTCFG_CMD_DETACH", ++ "RTCFG_TIMER", ++ "RTCFG_FRM_STAGE_1_CFG", ++ "RTCFG_FRM_ANNOUNCE_NEW", ++ "RTCFG_FRM_ANNOUNCE_REPLY", ++ "RTCFG_FRM_STAGE_2_CFG", ++ "RTCFG_FRM_STAGE_2_CFG_FRAG", ++ "RTCFG_FRM_ACK_CFG", ++ "RTCFG_FRM_READY", ++ "RTCFG_FRM_HEARTBEAT", ++ "RTCFG_FRM_DEAD_STATION" }; ++ ++const char *rtcfg_main_state[] = { "RTCFG_MAIN_OFF", ++ "RTCFG_MAIN_SERVER_RUNNING", ++ "RTCFG_MAIN_CLIENT_0", ++ "RTCFG_MAIN_CLIENT_1", ++ "RTCFG_MAIN_CLIENT_ANNOUNCED", ++ "RTCFG_MAIN_CLIENT_ALL_KNOWN", ++ "RTCFG_MAIN_CLIENT_ALL_FRAMES", ++ "RTCFG_MAIN_CLIENT_2", ++ "RTCFG_MAIN_CLIENT_READY" }; ++ ++int rtcfg_debug = RTCFG_DEFAULT_DEBUG_LEVEL; ++#endif /* CONFIG_XENO_DRIVERS_NET_RTCFG_DEBUG */ ++ ++struct rtcfg_device device[MAX_RT_DEVICES]; ++ ++static int (*state[])(int ifindex, RTCFG_EVENT event_id, ++ void *event_data) = { rtcfg_main_state_off, ++ rtcfg_main_state_server_running, ++ rtcfg_main_state_client_0, ++ rtcfg_main_state_client_1, ++ rtcfg_main_state_client_announced, ++ rtcfg_main_state_client_all_known, ++ rtcfg_main_state_client_all_frames, ++ rtcfg_main_state_client_2, ++ rtcfg_main_state_client_ready }; ++ ++static int rtcfg_server_add(struct rtcfg_cmd *cmd_event); ++static int rtcfg_server_del(struct rtcfg_cmd *cmd_event); ++static int rtcfg_server_detach(int ifindex, struct rtcfg_cmd *cmd_event); ++static int rtcfg_server_recv_announce(int ifindex, RTCFG_EVENT event_id, ++ struct rtskb *rtskb); ++static int rtcfg_server_recv_ack(int ifindex, struct rtskb *rtskb); ++static int rtcfg_server_recv_simple_frame(int ifindex, RTCFG_EVENT event_id, ++ struct rtskb *rtskb); ++ ++int rtcfg_do_main_event(int ifindex, RTCFG_EVENT event_id, void *event_data) ++{ ++ int main_state; ++ ++ rtdm_mutex_lock(&device[ifindex].dev_mutex); ++ ++ main_state = device[ifindex].state; ++ ++ RTCFG_DEBUG(3, "RTcfg: %s() rtdev=%d, event=%s, state=%s\n", ++ __FUNCTION__, ifindex, rtcfg_event[event_id], ++ rtcfg_main_state[main_state]); ++ ++ return (*state[main_state])(ifindex, event_id, event_data); ++} ++ ++void rtcfg_next_main_state(int ifindex, RTCFG_MAIN_STATE state) ++{ ++ RTCFG_DEBUG(4, "RTcfg: next main state=%s \n", rtcfg_main_state[state]); ++ ++ device[ifindex].state = state; ++} ++ ++static int rtcfg_main_state_off(int ifindex, RTCFG_EVENT event_id, ++ void *event_data) ++{ ++ struct rtcfg_device *rtcfg_dev = &device[ifindex]; ++ struct rt_proc_call *call = (struct rt_proc_call *)event_data; ++ struct rtcfg_cmd *cmd_event; ++ int ret; ++ ++ cmd_event = rtpc_get_priv(call, struct rtcfg_cmd); ++ switch (event_id) { ++ case RTCFG_CMD_SERVER: ++ INIT_LIST_HEAD(&rtcfg_dev->spec.srv.conn_list); ++ ++ ret = rtdm_timer_init(&rtcfg_dev->timer, rtcfg_timer, ++ "rtcfg-timer"); ++ if (ret == 0) { ++ ret = rtdm_timer_start( ++ &rtcfg_dev->timer, XN_INFINITE, ++ (nanosecs_rel_t)cmd_event->args.server.period * ++ 1000000, ++ RTDM_TIMERMODE_RELATIVE); ++ if (ret < 0) ++ rtdm_timer_destroy(&rtcfg_dev->timer); ++ } ++ if (ret < 0) { ++ rtdm_mutex_unlock(&rtcfg_dev->dev_mutex); ++ return ret; ++ } ++ ++ if (cmd_event->args.server.flags & _RTCFG_FLAG_READY) ++ set_bit(RTCFG_FLAG_READY, &rtcfg_dev->flags); ++ set_bit(FLAG_TIMER_STARTED, &rtcfg_dev->flags); ++ ++ rtcfg_dev->burstrate = cmd_event->args.server.burstrate; ++ ++ rtcfg_dev->spec.srv.heartbeat = ++ cmd_event->args.server.heartbeat; ++ ++ rtcfg_dev->spec.srv.heartbeat_timeout = ++ ((u64)cmd_event->args.server.heartbeat) * 1000000 * ++ cmd_event->args.server.threshold; ++ ++ rtcfg_next_main_state(ifindex, RTCFG_MAIN_SERVER_RUNNING); ++ ++ rtdm_mutex_unlock(&rtcfg_dev->dev_mutex); ++ ++ break; ++ ++ case RTCFG_CMD_CLIENT: ++ rtcfg_dev->spec.clt.station_addr_list = ++ cmd_event->args.client.station_buf; ++ cmd_event->args.client.station_buf = NULL; ++ ++ rtcfg_dev->spec.clt.max_stations = ++ cmd_event->args.client.max_stations; ++ rtcfg_dev->other_stations = -1; ++ ++ rtcfg_queue_blocking_call(ifindex, call); ++ ++ rtcfg_next_main_state(ifindex, RTCFG_MAIN_CLIENT_0); ++ ++ rtdm_mutex_unlock(&rtcfg_dev->dev_mutex); ++ ++ return -CALL_PENDING; ++ ++ default: ++ rtdm_mutex_unlock(&rtcfg_dev->dev_mutex); ++ ++ RTCFG_DEBUG(1, "RTcfg: unknown event %s for rtdev %d in %s()\n", ++ rtcfg_event[event_id], ifindex, __FUNCTION__); ++ return -EINVAL; ++ } ++ return 0; ++} ++ ++/*** Server States ***/ ++ ++static int rtcfg_main_state_server_running(int ifindex, RTCFG_EVENT event_id, ++ void *event_data) ++{ ++ struct rt_proc_call *call; ++ struct rtcfg_cmd *cmd_event; ++ struct rtcfg_device *rtcfg_dev; ++ struct rtskb *rtskb; ++ ++ switch (event_id) { ++ case RTCFG_CMD_ADD: ++ call = (struct rt_proc_call *)event_data; ++ cmd_event = rtpc_get_priv(call, struct rtcfg_cmd); ++ ++ return rtcfg_server_add(cmd_event); ++ ++ case RTCFG_CMD_DEL: ++ call = (struct rt_proc_call *)event_data; ++ cmd_event = rtpc_get_priv(call, struct rtcfg_cmd); ++ ++ return rtcfg_server_del(cmd_event); ++ ++ case RTCFG_CMD_WAIT: ++ call = (struct rt_proc_call *)event_data; ++ ++ rtcfg_dev = &device[ifindex]; ++ ++ if (rtcfg_dev->spec.srv.clients_configured == ++ rtcfg_dev->other_stations) ++ rtpc_complete_call(call, 0); ++ else ++ rtcfg_queue_blocking_call(ifindex, call); ++ ++ rtdm_mutex_unlock(&rtcfg_dev->dev_mutex); ++ ++ return -CALL_PENDING; ++ ++ case RTCFG_CMD_READY: ++ call = (struct rt_proc_call *)event_data; ++ ++ rtcfg_dev = &device[ifindex]; ++ ++ if (rtcfg_dev->stations_ready == rtcfg_dev->other_stations) ++ rtpc_complete_call(call, 0); ++ else ++ rtcfg_queue_blocking_call(ifindex, call); ++ ++ if (!test_and_set_bit(RTCFG_FLAG_READY, &rtcfg_dev->flags)) ++ rtcfg_send_ready(ifindex); ++ ++ rtdm_mutex_unlock(&rtcfg_dev->dev_mutex); ++ ++ return -CALL_PENDING; ++ ++ case RTCFG_CMD_DETACH: ++ call = (struct rt_proc_call *)event_data; ++ cmd_event = rtpc_get_priv(call, struct rtcfg_cmd); ++ ++ return rtcfg_server_detach(ifindex, cmd_event); ++ ++ case RTCFG_FRM_ANNOUNCE_NEW: ++ case RTCFG_FRM_ANNOUNCE_REPLY: ++ rtskb = (struct rtskb *)event_data; ++ return rtcfg_server_recv_announce(ifindex, event_id, rtskb); ++ ++ case RTCFG_FRM_ACK_CFG: ++ rtskb = (struct rtskb *)event_data; ++ return rtcfg_server_recv_ack(ifindex, rtskb); ++ ++ case RTCFG_FRM_READY: ++ case RTCFG_FRM_HEARTBEAT: ++ rtskb = (struct rtskb *)event_data; ++ return rtcfg_server_recv_simple_frame(ifindex, event_id, rtskb); ++ ++ default: ++ rtdm_mutex_unlock(&device[ifindex].dev_mutex); ++ ++ RTCFG_DEBUG(1, "RTcfg: unknown event %s for rtdev %d in %s()\n", ++ rtcfg_event[event_id], ifindex, __FUNCTION__); ++ return -EINVAL; ++ } ++ return 0; ++} ++ ++/*** Server Command Event Handlers ***/ ++ ++static int rtcfg_server_add(struct rtcfg_cmd *cmd_event) ++{ ++ struct rtcfg_device *rtcfg_dev; ++ struct rtcfg_connection *conn; ++ struct rtcfg_connection *new_conn; ++ struct list_head *entry; ++ unsigned int addr_type; ++ ++ rtcfg_dev = &device[cmd_event->internal.data.ifindex]; ++ addr_type = cmd_event->args.add.addr_type & RTCFG_ADDR_MASK; ++ ++ new_conn = cmd_event->args.add.conn_buf; ++ memset(new_conn, 0, sizeof(struct rtcfg_connection)); ++ ++ new_conn->ifindex = cmd_event->internal.data.ifindex; ++ new_conn->state = RTCFG_CONN_SEARCHING; ++ new_conn->addr_type = cmd_event->args.add.addr_type; ++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_RTIPV4) ++ new_conn->addr.ip_addr = cmd_event->args.add.ip_addr; ++#endif ++ new_conn->stage1_data = cmd_event->args.add.stage1_data; ++ new_conn->stage1_size = cmd_event->args.add.stage1_size; ++ new_conn->burstrate = rtcfg_dev->burstrate; ++ new_conn->cfg_timeout = ((u64)cmd_event->args.add.timeout) * 1000000; ++ ++ if (cmd_event->args.add.addr_type == RTCFG_ADDR_IP) { ++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_RTIPV4) ++ struct rtnet_device *rtdev; ++ ++ /* MAC address yet unknown -> use broadcast address */ ++ rtdev = rtdev_get_by_index(cmd_event->internal.data.ifindex); ++ if (rtdev == NULL) { ++ rtdm_mutex_unlock(&rtcfg_dev->dev_mutex); ++ return -ENODEV; ++ } ++ memcpy(new_conn->mac_addr, rtdev->broadcast, MAX_ADDR_LEN); ++ rtdev_dereference(rtdev); ++#else /* !CONFIG_XENO_DRIVERS_NET_RTIPV4 */ ++ return -EPROTONOSUPPORT; ++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4 */ ++ } else ++ memcpy(new_conn->mac_addr, cmd_event->args.add.mac_addr, ++ MAX_ADDR_LEN); ++ ++ /* get stage 2 file */ ++ if (cmd_event->args.add.stage2_file != NULL) { ++ if (cmd_event->args.add.stage2_file->buffer != NULL) { ++ new_conn->stage2_file = cmd_event->args.add.stage2_file; ++ rtcfg_add_file(new_conn->stage2_file); ++ ++ cmd_event->args.add.stage2_file = NULL; ++ } else { ++ new_conn->stage2_file = rtcfg_get_file( ++ cmd_event->args.add.stage2_file->name); ++ if (new_conn->stage2_file == NULL) { ++ rtdm_mutex_unlock(&rtcfg_dev->dev_mutex); ++ return 1; ++ } ++ } ++ } ++ ++ list_for_each (entry, &rtcfg_dev->spec.srv.conn_list) { ++ conn = list_entry(entry, struct rtcfg_connection, entry); ++ ++ if ( ++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_RTIPV4) ++ ((addr_type == RTCFG_ADDR_IP) && ++ (conn->addr.ip_addr == cmd_event->args.add.ip_addr)) || ++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4 */ ++ ((addr_type == RTCFG_ADDR_MAC) && ++ (memcmp(conn->mac_addr, new_conn->mac_addr, ++ MAX_ADDR_LEN) == 0))) { ++ rtdm_mutex_unlock(&rtcfg_dev->dev_mutex); ++ ++ if ((new_conn->stage2_file) && ++ (rtcfg_release_file(new_conn->stage2_file) == 0)) { ++ /* Note: This assignment cannot overwrite a valid file pointer. ++ * Effectively, it will only be executed when ++ * new_conn->stage2_file is the pointer originally passed by ++ * rtcfg_ioctl. But checking this assumptions does not cause ++ * any harm :o) ++ */ ++ RTNET_ASSERT(cmd_event->args.add.stage2_file == ++ NULL, ++ ;); ++ ++ cmd_event->args.add.stage2_file = ++ new_conn->stage2_file; ++ } ++ ++ return -EEXIST; ++ } ++ } ++ ++ list_add_tail(&new_conn->entry, &rtcfg_dev->spec.srv.conn_list); ++ rtcfg_dev->other_stations++; ++ ++ rtdm_mutex_unlock(&rtcfg_dev->dev_mutex); ++ ++ cmd_event->args.add.conn_buf = NULL; ++ cmd_event->args.add.stage1_data = NULL; ++ ++ return 0; ++} ++ ++static int rtcfg_server_del(struct rtcfg_cmd *cmd_event) ++{ ++ struct rtcfg_connection *conn; ++ struct list_head *entry; ++ unsigned int addr_type; ++ struct rtcfg_device *rtcfg_dev; ++ ++ rtcfg_dev = &device[cmd_event->internal.data.ifindex]; ++ addr_type = cmd_event->args.add.addr_type & RTCFG_ADDR_MASK; ++ ++ list_for_each (entry, &rtcfg_dev->spec.srv.conn_list) { ++ conn = list_entry(entry, struct rtcfg_connection, entry); ++ ++ if ((addr_type == conn->addr_type) && ++ ( ++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_RTIPV4) ++ ((addr_type == RTCFG_ADDR_IP) && ++ (conn->addr.ip_addr == ++ cmd_event->args.add.ip_addr)) || ++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4 */ ++ ((addr_type == RTCFG_ADDR_MAC) && ++ (memcmp(conn->mac_addr, ++ cmd_event->args.add.mac_addr, ++ MAX_ADDR_LEN) == 0)))) { ++ list_del(&conn->entry); ++ rtcfg_dev->other_stations--; ++ ++ if (conn->state > RTCFG_CONN_SEARCHING) { ++ rtcfg_dev->stations_found--; ++ if (conn->state >= RTCFG_CONN_STAGE_2) ++ rtcfg_dev->spec.srv.clients_configured--; ++ if (conn->flags & _RTCFG_FLAG_READY) ++ rtcfg_dev->stations_ready--; ++ } ++ ++ if ((conn->stage2_file) && ++ (rtcfg_release_file(conn->stage2_file) == 0)) ++ cmd_event->args.del.stage2_file = ++ conn->stage2_file; ++ ++ rtdm_mutex_unlock(&rtcfg_dev->dev_mutex); ++ ++ cmd_event->args.del.conn_buf = conn; ++ ++ return 0; ++ } ++ } ++ ++ rtdm_mutex_unlock(&rtcfg_dev->dev_mutex); ++ ++ return -ENOENT; ++} ++ ++static int rtcfg_server_detach(int ifindex, struct rtcfg_cmd *cmd_event) ++{ ++ struct rtcfg_connection *conn; ++ struct rtcfg_device *rtcfg_dev = &device[ifindex]; ++ ++ if (!list_empty(&rtcfg_dev->spec.srv.conn_list)) { ++ conn = list_entry(rtcfg_dev->spec.srv.conn_list.next, ++ struct rtcfg_connection, entry); ++ ++ list_del(&conn->entry); ++ rtcfg_dev->other_stations--; ++ ++ if (conn->state > RTCFG_CONN_SEARCHING) { ++ rtcfg_dev->stations_found--; ++ if (conn->state >= RTCFG_CONN_STAGE_2) ++ rtcfg_dev->spec.srv.clients_configured--; ++ if (conn->flags & _RTCFG_FLAG_READY) ++ rtcfg_dev->stations_ready--; ++ } ++ ++ if ((conn->stage2_file) && ++ (rtcfg_release_file(conn->stage2_file) == 0)) ++ cmd_event->args.detach.stage2_file = conn->stage2_file; ++ ++ rtdm_mutex_unlock(&rtcfg_dev->dev_mutex); ++ ++ cmd_event->args.detach.conn_buf = conn; ++ ++ return -EAGAIN; ++ } ++ ++ if (test_and_clear_bit(FLAG_TIMER_STARTED, &rtcfg_dev->flags)) ++ rtdm_timer_destroy(&rtcfg_dev->timer); ++ rtcfg_reset_device(ifindex); ++ ++ rtcfg_next_main_state(ifindex, RTCFG_MAIN_OFF); ++ ++ rtdm_mutex_unlock(&rtcfg_dev->dev_mutex); ++ ++ return 0; ++} ++ ++/*** Server Frame Event Handlers ***/ ++ ++static int rtcfg_server_recv_announce(int ifindex, RTCFG_EVENT event_id, ++ struct rtskb *rtskb) ++{ ++ struct rtcfg_device *rtcfg_dev = &device[ifindex]; ++ struct list_head *entry; ++ struct rtcfg_frm_announce *announce; ++ struct rtcfg_connection *conn; ++ ++ if (rtskb->len < sizeof(struct rtcfg_frm_announce)) { ++ rtdm_mutex_unlock(&rtcfg_dev->dev_mutex); ++ RTCFG_DEBUG(1, "RTcfg: received invalid announce frame\n"); ++ return -EINVAL; ++ } ++ ++ announce = (struct rtcfg_frm_announce *)rtskb->data; ++ ++ list_for_each (entry, &rtcfg_dev->spec.srv.conn_list) { ++ conn = list_entry(entry, struct rtcfg_connection, entry); ++ ++ switch (announce->addr_type) { ++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_RTIPV4) ++ u32 announce_addr; ++ case RTCFG_ADDR_IP: ++ memcpy(&announce_addr, announce->addr, 4); ++ ++ if (((conn->addr_type & RTCFG_ADDR_MASK) == ++ RTCFG_ADDR_IP) && ++ (announce_addr == conn->addr.ip_addr)) { ++ /* save MAC address - Ethernet-specific! */ ++ memcpy(conn->mac_addr, ++ rtskb->mac.ethernet->h_source, ETH_ALEN); ++ ++ /* update routing table */ ++ rt_ip_route_add_host(conn->addr.ip_addr, ++ conn->mac_addr, ++ rtskb->rtdev); ++ ++ /* remove IP address */ ++ __rtskb_pull(rtskb, RTCFG_ADDRSIZE_IP); ++ ++ rtcfg_do_conn_event(conn, event_id, rtskb); ++ ++ goto out; ++ } ++ break; ++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4 */ ++ ++ case RTCFG_ADDR_MAC: ++ /* Ethernet-specific! */ ++ if (memcmp(conn->mac_addr, ++ rtskb->mac.ethernet->h_source, ++ ETH_ALEN) == 0) { ++ rtcfg_do_conn_event(conn, event_id, rtskb); ++ ++ goto out; ++ } ++ break; ++ } ++ } ++ ++out: ++ rtdm_mutex_unlock(&rtcfg_dev->dev_mutex); ++ ++ kfree_rtskb(rtskb); ++ return 0; ++} ++ ++static int rtcfg_server_recv_ack(int ifindex, struct rtskb *rtskb) ++{ ++ struct rtcfg_device *rtcfg_dev = &device[ifindex]; ++ struct list_head *entry; ++ struct rtcfg_connection *conn; ++ ++ if (rtskb->len < sizeof(struct rtcfg_frm_ack_cfg)) { ++ rtdm_mutex_unlock(&rtcfg_dev->dev_mutex); ++ RTCFG_DEBUG(1, "RTcfg: received invalid ack_cfg frame\n"); ++ return -EINVAL; ++ } ++ ++ list_for_each (entry, &rtcfg_dev->spec.srv.conn_list) { ++ conn = list_entry(entry, struct rtcfg_connection, entry); ++ ++ /* find the corresponding connection - Ethernet-specific! */ ++ if (memcmp(conn->mac_addr, rtskb->mac.ethernet->h_source, ++ ETH_ALEN) != 0) ++ continue; ++ ++ rtcfg_do_conn_event(conn, RTCFG_FRM_ACK_CFG, rtskb); ++ ++ break; ++ } ++ ++ rtdm_mutex_unlock(&rtcfg_dev->dev_mutex); ++ ++ kfree_rtskb(rtskb); ++ return 0; ++} ++ ++static int rtcfg_server_recv_simple_frame(int ifindex, RTCFG_EVENT event_id, ++ struct rtskb *rtskb) ++{ ++ struct rtcfg_device *rtcfg_dev = &device[ifindex]; ++ struct list_head *entry; ++ struct rtcfg_connection *conn; ++ ++ list_for_each (entry, &rtcfg_dev->spec.srv.conn_list) { ++ conn = list_entry(entry, struct rtcfg_connection, entry); ++ ++ /* find the corresponding connection - Ethernet-specific! */ ++ if (memcmp(conn->mac_addr, rtskb->mac.ethernet->h_source, ++ ETH_ALEN) != 0) ++ continue; ++ ++ rtcfg_do_conn_event(conn, event_id, rtskb); ++ ++ break; ++ } ++ ++ rtdm_mutex_unlock(&rtcfg_dev->dev_mutex); ++ ++ kfree_rtskb(rtskb); ++ return 0; ++} ++ ++/*** Utility Functions ***/ ++ ++void rtcfg_queue_blocking_call(int ifindex, struct rt_proc_call *call) ++{ ++ rtdm_lockctx_t context; ++ struct rtcfg_device *rtcfg_dev = &device[ifindex]; ++ ++ rtdm_lock_get_irqsave(&rtcfg_dev->event_calls_lock, context); ++ list_add_tail(&call->list_entry, &rtcfg_dev->event_calls); ++ rtdm_lock_put_irqrestore(&rtcfg_dev->event_calls_lock, context); ++} ++ ++struct rt_proc_call *rtcfg_dequeue_blocking_call(int ifindex) ++{ ++ rtdm_lockctx_t context; ++ struct rt_proc_call *call; ++ struct rtcfg_device *rtcfg_dev = &device[ifindex]; ++ ++ rtdm_lock_get_irqsave(&rtcfg_dev->event_calls_lock, context); ++ if (!list_empty(&rtcfg_dev->event_calls)) { ++ call = (struct rt_proc_call *)rtcfg_dev->event_calls.next; ++ list_del(&call->list_entry); ++ } else ++ call = NULL; ++ rtdm_lock_put_irqrestore(&rtcfg_dev->event_calls_lock, context); ++ ++ return call; ++} ++ ++void rtcfg_complete_cmd(int ifindex, RTCFG_EVENT event_id, int result) ++{ ++ struct rt_proc_call *call; ++ struct rtcfg_cmd *cmd_event; ++ ++ while (1) { ++ call = rtcfg_dequeue_blocking_call(ifindex); ++ if (call == NULL) ++ break; ++ ++ cmd_event = rtpc_get_priv(call, struct rtcfg_cmd); ++ ++ rtpc_complete_call(call, (cmd_event->internal.data.event_id == ++ event_id) ? ++ result : ++ -EINVAL); ++ } ++} ++ ++void rtcfg_reset_device(int ifindex) ++{ ++ struct rtcfg_device *rtcfg_dev = &device[ifindex]; ++ ++ rtcfg_dev->other_stations = 0; ++ rtcfg_dev->stations_found = 0; ++ rtcfg_dev->stations_ready = 0; ++ rtcfg_dev->flags = 0; ++ rtcfg_dev->burstrate = 0; ++ ++ memset(&rtcfg_dev->spec, 0, sizeof(rtcfg_dev->spec)); ++ INIT_LIST_HEAD(&rtcfg_dev->spec.srv.conn_list); ++} ++ ++void rtcfg_init_state_machines(void) ++{ ++ int i; ++ struct rtcfg_device *rtcfg_dev; ++ ++ memset(device, 0, sizeof(device)); ++ ++ for (i = 0; i < MAX_RT_DEVICES; i++) { ++ rtcfg_dev = &device[i]; ++ rtcfg_dev->state = RTCFG_MAIN_OFF; ++ ++ rtdm_mutex_init(&rtcfg_dev->dev_mutex); ++ ++ INIT_LIST_HEAD(&rtcfg_dev->event_calls); ++ rtdm_lock_init(&rtcfg_dev->event_calls_lock); ++ } ++} ++ ++void rtcfg_cleanup_state_machines(void) ++{ ++ int i; ++ struct rtcfg_device *rtcfg_dev; ++ struct rtcfg_connection *conn; ++ struct list_head *entry; ++ struct list_head *tmp; ++ struct rt_proc_call *call; ++ ++ for (i = 0; i < MAX_RT_DEVICES; i++) { ++ rtcfg_dev = &device[i]; ++ ++ if (test_and_clear_bit(FLAG_TIMER_STARTED, &rtcfg_dev->flags)) ++ rtdm_timer_destroy(&rtcfg_dev->timer); ++ ++ /* ++ * No need to synchronize with rtcfg_timer here: the task running ++ * rtcfg_timer is already dead. ++ */ ++ ++ rtdm_mutex_destroy(&rtcfg_dev->dev_mutex); ++ ++ if (rtcfg_dev->state == RTCFG_MAIN_SERVER_RUNNING) { ++ list_for_each_safe (entry, tmp, ++ &rtcfg_dev->spec.srv.conn_list) { ++ conn = list_entry( ++ entry, struct rtcfg_connection, entry); ++ ++ if (conn->stage1_data != NULL) ++ kfree(conn->stage1_data); ++ ++ if ((conn->stage2_file != NULL) && ++ (rtcfg_release_file(conn->stage2_file) == ++ 0)) { ++ vfree(conn->stage2_file->buffer); ++ kfree(conn->stage2_file); ++ } ++ ++ kfree(entry); ++ } ++ } else if (rtcfg_dev->state != RTCFG_MAIN_OFF) { ++ if (rtcfg_dev->spec.clt.station_addr_list != NULL) ++ kfree(rtcfg_dev->spec.clt.station_addr_list); ++ ++ if (rtcfg_dev->spec.clt.stage2_chain != NULL) ++ kfree_rtskb(rtcfg_dev->spec.clt.stage2_chain); ++ } ++ ++ while (1) { ++ call = rtcfg_dequeue_blocking_call(i); ++ if (call == NULL) ++ break; ++ ++ rtpc_complete_call_nrt(call, -ENODEV); ++ } ++ } ++} +--- linux/drivers/xenomai/net/stack/rtcfg/rtcfg_timer.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/rtcfg/rtcfg_timer.c 2021-04-07 16:01:26.610635048 +0800 +@@ -0,0 +1,110 @@ ++/*** ++ * ++ * rtcfg/rtcfg_timer.c ++ * ++ * Real-Time Configuration Distribution Protocol ++ * ++ * Copyright (C) 2003-2005 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++void rtcfg_timer(rtdm_timer_t *t) ++{ ++ struct rtcfg_device *rtcfg_dev = ++ container_of(t, struct rtcfg_device, timer); ++ ++ set_bit(FLAG_TIMER_PENDING, &rtcfg_dev->flags); ++ rtcfg_thread_signal(); ++} ++ ++void rtcfg_timer_run_one(int ifindex) ++{ ++ struct rtcfg_device *rtcfg_dev = &device[ifindex]; ++ struct list_head *entry; ++ struct rtcfg_connection *conn; ++ int last_stage_1 = -1; ++ int burst_credit; ++ int index; ++ int ret, shutdown; ++ ++ shutdown = test_and_clear_bit(FLAG_TIMER_SHUTDOWN, &rtcfg_dev->flags); ++ ++ if (!test_and_clear_bit(FLAG_TIMER_PENDING, &rtcfg_dev->flags) || ++ shutdown) ++ return; ++ ++ rtdm_mutex_lock(&rtcfg_dev->dev_mutex); ++ ++ if (rtcfg_dev->state == RTCFG_MAIN_SERVER_RUNNING) { ++ index = 0; ++ burst_credit = rtcfg_dev->burstrate; ++ ++ list_for_each (entry, &rtcfg_dev->spec.srv.conn_list) { ++ conn = list_entry(entry, struct rtcfg_connection, ++ entry); ++ ++ if ((conn->state == RTCFG_CONN_SEARCHING) || ++ (conn->state == RTCFG_CONN_DEAD)) { ++ if ((burst_credit > 0) && ++ (index > last_stage_1)) { ++ if ((ret = rtcfg_send_stage_1(conn)) < ++ 0) { ++ RTCFG_DEBUG( ++ 2, ++ "RTcfg: error %d while sending " ++ "stage 1 frame\n", ++ ret); ++ } ++ burst_credit--; ++ last_stage_1 = index; ++ } ++ } else { ++ /* skip connection in history */ ++ if (last_stage_1 == (index - 1)) ++ last_stage_1 = index; ++ ++ rtcfg_do_conn_event(conn, RTCFG_TIMER, NULL); ++ } ++ index++; ++ } ++ ++ /* handle pointer overrun of the last stage 1 transmission */ ++ if (last_stage_1 == (index - 1)) ++ last_stage_1 = -1; ++ } else if (rtcfg_dev->state == RTCFG_MAIN_CLIENT_READY) ++ rtcfg_send_heartbeat(ifindex); ++ ++ rtdm_mutex_unlock(&rtcfg_dev->dev_mutex); ++} ++ ++void rtcfg_timer_run(void) ++{ ++ int ifindex; ++ ++ for (ifindex = 0; ifindex < MAX_RT_DEVICES; ifindex++) ++ rtcfg_timer_run_one(ifindex); ++} +--- linux/drivers/xenomai/net/stack/rtcfg/rtcfg_client_event.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/rtcfg/rtcfg_client_event.c 2021-04-07 16:01:26.605635055 +0800 +@@ -0,0 +1,1175 @@ ++/*** ++ * ++ * rtcfg/rtcfg_client_event.c ++ * ++ * Real-Time Configuration Distribution Protocol ++ * ++ * Copyright (C) 2003-2005 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++static int rtcfg_client_get_frag(int ifindex, struct rt_proc_call *call); ++static void rtcfg_client_detach(int ifindex, struct rt_proc_call *call); ++static void rtcfg_client_recv_stage_1(int ifindex, struct rtskb *rtskb); ++static int rtcfg_client_recv_announce(int ifindex, struct rtskb *rtskb); ++static void rtcfg_client_recv_stage_2_cfg(int ifindex, struct rtskb *rtskb); ++static void rtcfg_client_recv_stage_2_frag(int ifindex, struct rtskb *rtskb); ++static int rtcfg_client_recv_ready(int ifindex, struct rtskb *rtskb); ++static void rtcfg_client_recv_dead_station(int ifindex, struct rtskb *rtskb); ++static void rtcfg_client_update_server(int ifindex, struct rtskb *rtskb); ++ ++/*** Client States ***/ ++ ++int rtcfg_main_state_client_0(int ifindex, RTCFG_EVENT event_id, ++ void *event_data) ++{ ++ struct rtskb *rtskb = (struct rtskb *)event_data; ++ struct rt_proc_call *call = (struct rt_proc_call *)event_data; ++ ++ switch (event_id) { ++ case RTCFG_CMD_DETACH: ++ rtcfg_client_detach(ifindex, call); ++ break; ++ ++ case RTCFG_FRM_STAGE_1_CFG: ++ rtcfg_client_recv_stage_1(ifindex, rtskb); ++ break; ++ ++ case RTCFG_FRM_ANNOUNCE_NEW: ++ if (rtcfg_client_recv_announce(ifindex, rtskb) == 0) ++ rtdm_mutex_unlock(&device[ifindex].dev_mutex); ++ kfree_rtskb(rtskb); ++ break; ++ ++ case RTCFG_FRM_READY: ++ if (rtcfg_client_recv_ready(ifindex, rtskb) == 0) ++ rtdm_mutex_unlock(&device[ifindex].dev_mutex); ++ break; ++ ++ default: ++ rtdm_mutex_unlock(&device[ifindex].dev_mutex); ++ RTCFG_DEBUG(1, "RTcfg: unknown event %s for rtdev %d in %s()\n", ++ rtcfg_event[event_id], ifindex, __FUNCTION__); ++ return -EINVAL; ++ } ++ return 0; ++} ++ ++int rtcfg_main_state_client_1(int ifindex, RTCFG_EVENT event_id, ++ void *event_data) ++{ ++ struct rtcfg_device *rtcfg_dev = &device[ifindex]; ++ struct rtskb *rtskb = (struct rtskb *)event_data; ++ struct rt_proc_call *call = (struct rt_proc_call *)event_data; ++ struct rtcfg_cmd *cmd_event; ++ int ret; ++ ++ switch (event_id) { ++ case RTCFG_CMD_CLIENT: ++ /* second trial (buffer was probably too small) */ ++ rtcfg_queue_blocking_call(ifindex, ++ (struct rt_proc_call *)event_data); ++ ++ rtcfg_next_main_state(ifindex, RTCFG_MAIN_CLIENT_0); ++ ++ rtdm_mutex_unlock(&rtcfg_dev->dev_mutex); ++ ++ return -CALL_PENDING; ++ ++ case RTCFG_CMD_ANNOUNCE: ++ cmd_event = rtpc_get_priv(call, struct rtcfg_cmd); ++ ++ if (cmd_event->args.announce.burstrate == 0) { ++ rtdm_mutex_unlock(&rtcfg_dev->dev_mutex); ++ return -EINVAL; ++ } ++ ++ rtcfg_queue_blocking_call(ifindex, ++ (struct rt_proc_call *)event_data); ++ ++ if (cmd_event->args.announce.flags & _RTCFG_FLAG_STAGE_2_DATA) ++ set_bit(RTCFG_FLAG_STAGE_2_DATA, &rtcfg_dev->flags); ++ if (cmd_event->args.announce.flags & _RTCFG_FLAG_READY) ++ set_bit(RTCFG_FLAG_READY, &rtcfg_dev->flags); ++ if (cmd_event->args.announce.burstrate < rtcfg_dev->burstrate) ++ rtcfg_dev->burstrate = ++ cmd_event->args.announce.burstrate; ++ ++ rtcfg_next_main_state(ifindex, RTCFG_MAIN_CLIENT_ANNOUNCED); ++ ++ ret = rtcfg_send_announce_new(ifindex); ++ if (ret < 0) { ++ rtcfg_dequeue_blocking_call(ifindex); ++ rtdm_mutex_unlock(&rtcfg_dev->dev_mutex); ++ return ret; ++ } ++ ++ rtdm_mutex_unlock(&rtcfg_dev->dev_mutex); ++ ++ return -CALL_PENDING; ++ ++ case RTCFG_CMD_DETACH: ++ rtcfg_client_detach(ifindex, call); ++ break; ++ ++ case RTCFG_FRM_ANNOUNCE_NEW: ++ if (rtcfg_client_recv_announce(ifindex, rtskb) == 0) { ++ rtcfg_send_announce_reply( ++ ifindex, rtskb->mac.ethernet->h_source); ++ rtdm_mutex_unlock(&rtcfg_dev->dev_mutex); ++ } ++ ++ kfree_rtskb(rtskb); ++ break; ++ ++ case RTCFG_FRM_ANNOUNCE_REPLY: ++ if (rtcfg_client_recv_announce(ifindex, rtskb) == 0) ++ rtdm_mutex_unlock(&rtcfg_dev->dev_mutex); ++ ++ kfree_rtskb(rtskb); ++ break; ++ ++ case RTCFG_FRM_READY: ++ if (rtcfg_client_recv_ready(ifindex, rtskb) == 0) ++ rtdm_mutex_unlock(&rtcfg_dev->dev_mutex); ++ break; ++ ++ case RTCFG_FRM_STAGE_1_CFG: ++ /* ignore */ ++ rtdm_mutex_unlock(&rtcfg_dev->dev_mutex); ++ kfree_rtskb(rtskb); ++ break; ++ ++ default: ++ rtdm_mutex_unlock(&rtcfg_dev->dev_mutex); ++ RTCFG_DEBUG(1, "RTcfg: unknown event %s for rtdev %d in %s()\n", ++ rtcfg_event[event_id], ifindex, __FUNCTION__); ++ return -EINVAL; ++ } ++ return 0; ++} ++ ++int rtcfg_main_state_client_announced(int ifindex, RTCFG_EVENT event_id, ++ void *event_data) ++{ ++ struct rtskb *rtskb = (struct rtskb *)event_data; ++ struct rt_proc_call *call = (struct rt_proc_call *)event_data; ++ struct rtcfg_device *rtcfg_dev; ++ ++ switch (event_id) { ++ case RTCFG_CMD_ANNOUNCE: ++ return rtcfg_client_get_frag(ifindex, call); ++ ++ case RTCFG_CMD_DETACH: ++ rtcfg_client_detach(ifindex, call); ++ break; ++ ++ case RTCFG_FRM_STAGE_2_CFG: ++ rtcfg_client_recv_stage_2_cfg(ifindex, rtskb); ++ break; ++ ++ case RTCFG_FRM_STAGE_2_CFG_FRAG: ++ rtcfg_client_recv_stage_2_frag(ifindex, rtskb); ++ break; ++ ++ case RTCFG_FRM_ANNOUNCE_NEW: ++ if (rtcfg_client_recv_announce(ifindex, rtskb) == 0) { ++ rtcfg_send_announce_reply( ++ ifindex, rtskb->mac.ethernet->h_source); ++ ++ rtcfg_dev = &device[ifindex]; ++ if (rtcfg_dev->stations_found == ++ rtcfg_dev->other_stations) ++ rtcfg_next_main_state( ++ ifindex, RTCFG_MAIN_CLIENT_ALL_KNOWN); ++ ++ rtdm_mutex_unlock(&rtcfg_dev->dev_mutex); ++ } ++ kfree_rtskb(rtskb); ++ break; ++ ++ case RTCFG_FRM_ANNOUNCE_REPLY: ++ if (rtcfg_client_recv_announce(ifindex, rtskb) == 0) { ++ rtcfg_dev = &device[ifindex]; ++ if (rtcfg_dev->stations_found == ++ rtcfg_dev->other_stations) ++ rtcfg_next_main_state( ++ ifindex, RTCFG_MAIN_CLIENT_ALL_KNOWN); ++ ++ rtdm_mutex_unlock(&rtcfg_dev->dev_mutex); ++ } ++ kfree_rtskb(rtskb); ++ break; ++ ++ case RTCFG_FRM_READY: ++ if (rtcfg_client_recv_ready(ifindex, rtskb) == 0) ++ rtdm_mutex_unlock(&device[ifindex].dev_mutex); ++ break; ++ ++ case RTCFG_FRM_STAGE_1_CFG: ++ /* ignore */ ++ rtdm_mutex_unlock(&device[ifindex].dev_mutex); ++ kfree_rtskb(rtskb); ++ break; ++ ++ default: ++ rtdm_mutex_unlock(&device[ifindex].dev_mutex); ++ RTCFG_DEBUG(1, "RTcfg: unknown event %s for rtdev %d in %s()\n", ++ rtcfg_event[event_id], ifindex, __FUNCTION__); ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ ++int rtcfg_main_state_client_all_known(int ifindex, RTCFG_EVENT event_id, ++ void *event_data) ++{ ++ struct rtskb *rtskb = (struct rtskb *)event_data; ++ struct rt_proc_call *call = (struct rt_proc_call *)event_data; ++ ++ switch (event_id) { ++ case RTCFG_CMD_ANNOUNCE: ++ return rtcfg_client_get_frag(ifindex, call); ++ ++ case RTCFG_CMD_DETACH: ++ rtcfg_client_detach(ifindex, call); ++ break; ++ ++ case RTCFG_FRM_STAGE_2_CFG_FRAG: ++ rtcfg_client_recv_stage_2_frag(ifindex, rtskb); ++ break; ++ ++ case RTCFG_FRM_READY: ++ if (rtcfg_client_recv_ready(ifindex, rtskb) == 0) ++ rtdm_mutex_unlock(&device[ifindex].dev_mutex); ++ break; ++ ++ case RTCFG_FRM_ANNOUNCE_NEW: ++ if (rtcfg_client_recv_announce(ifindex, rtskb) == 0) { ++ rtcfg_send_announce_reply( ++ ifindex, rtskb->mac.ethernet->h_source); ++ rtdm_mutex_unlock(&device[ifindex].dev_mutex); ++ } ++ kfree_rtskb(rtskb); ++ break; ++ ++ case RTCFG_FRM_DEAD_STATION: ++ rtcfg_client_recv_dead_station(ifindex, rtskb); ++ break; ++ ++ case RTCFG_FRM_STAGE_1_CFG: ++ /* ignore */ ++ rtdm_mutex_unlock(&device[ifindex].dev_mutex); ++ kfree_rtskb(rtskb); ++ break; ++ ++ default: ++ rtdm_mutex_unlock(&device[ifindex].dev_mutex); ++ RTCFG_DEBUG(1, "RTcfg: unknown event %s for rtdev %d in %s()\n", ++ rtcfg_event[event_id], ifindex, __FUNCTION__); ++ return -EINVAL; ++ } ++ return 0; ++} ++ ++int rtcfg_main_state_client_all_frames(int ifindex, RTCFG_EVENT event_id, ++ void *event_data) ++{ ++ struct rtskb *rtskb = (struct rtskb *)event_data; ++ struct rt_proc_call *call = (struct rt_proc_call *)event_data; ++ struct rtcfg_device *rtcfg_dev; ++ ++ switch (event_id) { ++ case RTCFG_CMD_DETACH: ++ rtcfg_client_detach(ifindex, call); ++ break; ++ ++ case RTCFG_FRM_ANNOUNCE_NEW: ++ if (rtcfg_client_recv_announce(ifindex, rtskb) == 0) { ++ rtcfg_send_announce_reply( ++ ifindex, rtskb->mac.ethernet->h_source); ++ ++ rtcfg_dev = &device[ifindex]; ++ if (rtcfg_dev->stations_found == ++ rtcfg_dev->other_stations) { ++ rtcfg_complete_cmd(ifindex, RTCFG_CMD_ANNOUNCE, ++ 0); ++ ++ rtcfg_next_main_state( ++ ifindex, ++ test_bit(RTCFG_FLAG_READY, ++ &rtcfg_dev->flags) ? ++ RTCFG_MAIN_CLIENT_READY : ++ RTCFG_MAIN_CLIENT_2); ++ } ++ ++ rtdm_mutex_unlock(&rtcfg_dev->dev_mutex); ++ } ++ kfree_rtskb(rtskb); ++ break; ++ ++ case RTCFG_FRM_ANNOUNCE_REPLY: ++ if (rtcfg_client_recv_announce(ifindex, rtskb) == 0) { ++ rtcfg_dev = &device[ifindex]; ++ if (rtcfg_dev->stations_found == ++ rtcfg_dev->other_stations) { ++ rtcfg_complete_cmd(ifindex, RTCFG_CMD_ANNOUNCE, ++ 0); ++ ++ rtcfg_next_main_state( ++ ifindex, ++ test_bit(RTCFG_FLAG_READY, ++ &rtcfg_dev->flags) ? ++ RTCFG_MAIN_CLIENT_READY : ++ RTCFG_MAIN_CLIENT_2); ++ } ++ ++ rtdm_mutex_unlock(&rtcfg_dev->dev_mutex); ++ } ++ kfree_rtskb(rtskb); ++ break; ++ ++ case RTCFG_FRM_READY: ++ if (rtcfg_client_recv_ready(ifindex, rtskb) == 0) ++ rtdm_mutex_unlock(&device[ifindex].dev_mutex); ++ break; ++ ++ case RTCFG_FRM_DEAD_STATION: ++ rtcfg_client_recv_dead_station(ifindex, rtskb); ++ break; ++ ++ case RTCFG_FRM_STAGE_1_CFG: ++ /* ignore */ ++ rtdm_mutex_unlock(&device[ifindex].dev_mutex); ++ kfree_rtskb(rtskb); ++ break; ++ ++ default: ++ rtdm_mutex_unlock(&device[ifindex].dev_mutex); ++ RTCFG_DEBUG(1, "RTcfg: unknown event %s for rtdev %d in %s()\n", ++ rtcfg_event[event_id], ifindex, __FUNCTION__); ++ return -EINVAL; ++ } ++ return 0; ++} ++ ++int rtcfg_main_state_client_2(int ifindex, RTCFG_EVENT event_id, ++ void *event_data) ++{ ++ struct rtskb *rtskb = (struct rtskb *)event_data; ++ struct rt_proc_call *call = (struct rt_proc_call *)event_data; ++ struct rtcfg_device *rtcfg_dev; ++ ++ switch (event_id) { ++ case RTCFG_CMD_READY: ++ rtcfg_dev = &device[ifindex]; ++ ++ if (rtcfg_dev->stations_ready == rtcfg_dev->other_stations) ++ rtpc_complete_call(call, 0); ++ else ++ rtcfg_queue_blocking_call(ifindex, call); ++ ++ rtcfg_next_main_state(ifindex, RTCFG_MAIN_CLIENT_READY); ++ ++ if (!test_and_set_bit(RTCFG_FLAG_READY, &rtcfg_dev->flags)) ++ rtcfg_send_ready(ifindex); ++ ++ rtdm_mutex_unlock(&rtcfg_dev->dev_mutex); ++ ++ return -CALL_PENDING; ++ ++ case RTCFG_CMD_DETACH: ++ rtcfg_client_detach(ifindex, call); ++ break; ++ ++ case RTCFG_FRM_READY: ++ if (rtcfg_client_recv_ready(ifindex, rtskb) == 0) ++ rtdm_mutex_unlock(&device[ifindex].dev_mutex); ++ break; ++ ++ case RTCFG_FRM_ANNOUNCE_NEW: ++ if (rtcfg_client_recv_announce(ifindex, rtskb) == 0) { ++ rtcfg_send_announce_reply( ++ ifindex, rtskb->mac.ethernet->h_source); ++ rtdm_mutex_unlock(&device[ifindex].dev_mutex); ++ } ++ kfree_rtskb(rtskb); ++ break; ++ ++ case RTCFG_FRM_DEAD_STATION: ++ rtcfg_client_recv_dead_station(ifindex, rtskb); ++ break; ++ ++ case RTCFG_FRM_STAGE_1_CFG: ++ /* ignore */ ++ rtdm_mutex_unlock(&device[ifindex].dev_mutex); ++ kfree_rtskb(rtskb); ++ break; ++ ++ default: ++ rtdm_mutex_unlock(&device[ifindex].dev_mutex); ++ RTCFG_DEBUG(1, "RTcfg: unknown event %s for rtdev %d in %s()\n", ++ rtcfg_event[event_id], ifindex, __FUNCTION__); ++ return -EINVAL; ++ } ++ return 0; ++} ++ ++int rtcfg_main_state_client_ready(int ifindex, RTCFG_EVENT event_id, ++ void *event_data) ++{ ++ struct rtskb *rtskb = (struct rtskb *)event_data; ++ struct rt_proc_call *call = (struct rt_proc_call *)event_data; ++ struct rtcfg_device *rtcfg_dev; ++ ++ switch (event_id) { ++ case RTCFG_CMD_DETACH: ++ rtcfg_client_detach(ifindex, call); ++ break; ++ ++ case RTCFG_FRM_READY: ++ if (rtcfg_client_recv_ready(ifindex, rtskb) == 0) { ++ rtcfg_dev = &device[ifindex]; ++ if (rtcfg_dev->stations_ready == ++ rtcfg_dev->other_stations) ++ rtcfg_complete_cmd(ifindex, RTCFG_CMD_READY, 0); ++ ++ rtdm_mutex_unlock(&rtcfg_dev->dev_mutex); ++ } ++ break; ++ ++ case RTCFG_FRM_ANNOUNCE_NEW: ++ if (rtcfg_client_recv_announce(ifindex, rtskb) == 0) { ++ rtcfg_send_announce_reply( ++ ifindex, rtskb->mac.ethernet->h_source); ++ rtdm_mutex_unlock(&device[ifindex].dev_mutex); ++ } ++ kfree_rtskb(rtskb); ++ break; ++ ++ case RTCFG_FRM_DEAD_STATION: ++ rtcfg_client_recv_dead_station(ifindex, rtskb); ++ break; ++ ++ case RTCFG_FRM_STAGE_1_CFG: ++ rtcfg_client_update_server(ifindex, rtskb); ++ break; ++ ++ default: ++ rtdm_mutex_unlock(&device[ifindex].dev_mutex); ++ RTCFG_DEBUG(1, "RTcfg: unknown event %s for rtdev %d in %s()\n", ++ rtcfg_event[event_id], ifindex, __FUNCTION__); ++ return -EINVAL; ++ } ++ return 0; ++} ++ ++/*** Client Command Event Handlers ***/ ++ ++static int rtcfg_client_get_frag(int ifindex, struct rt_proc_call *call) ++{ ++ struct rtcfg_device *rtcfg_dev = &device[ifindex]; ++ ++ if (test_bit(RTCFG_FLAG_STAGE_2_DATA, &rtcfg_dev->flags) == 0) { ++ rtdm_mutex_unlock(&rtcfg_dev->dev_mutex); ++ return -EINVAL; ++ } ++ ++ rtcfg_send_ack(ifindex); ++ ++ if (rtcfg_dev->spec.clt.cfg_offs >= rtcfg_dev->spec.clt.cfg_len) { ++ if (rtcfg_dev->stations_found == rtcfg_dev->other_stations) { ++ rtpc_complete_call(call, 0); ++ ++ rtcfg_next_main_state(ifindex, ++ test_bit(RTCFG_FLAG_READY, ++ &rtcfg_dev->flags) ? ++ RTCFG_MAIN_CLIENT_READY : ++ RTCFG_MAIN_CLIENT_2); ++ } else { ++ rtcfg_next_main_state(ifindex, ++ RTCFG_MAIN_CLIENT_ALL_FRAMES); ++ rtcfg_queue_blocking_call(ifindex, call); ++ } ++ } else ++ rtcfg_queue_blocking_call(ifindex, call); ++ ++ rtdm_mutex_unlock(&rtcfg_dev->dev_mutex); ++ ++ return -CALL_PENDING; ++} ++ ++/* releases rtcfg_dev->dev_mutex on return */ ++static void rtcfg_client_detach(int ifindex, struct rt_proc_call *call) ++{ ++ struct rtcfg_device *rtcfg_dev = &device[ifindex]; ++ struct rtcfg_cmd *cmd_event; ++ ++ cmd_event = rtpc_get_priv(call, struct rtcfg_cmd); ++ ++ cmd_event->args.detach.station_addr_list = ++ rtcfg_dev->spec.clt.station_addr_list; ++ cmd_event->args.detach.stage2_chain = rtcfg_dev->spec.clt.stage2_chain; ++ ++ while (1) { ++ call = rtcfg_dequeue_blocking_call(ifindex); ++ if (call == NULL) ++ break; ++ ++ rtpc_complete_call(call, -ENODEV); ++ } ++ ++ if (test_and_clear_bit(FLAG_TIMER_STARTED, &rtcfg_dev->flags)) ++ rtdm_timer_destroy(&rtcfg_dev->timer); ++ rtcfg_reset_device(ifindex); ++ ++ rtcfg_next_main_state(cmd_event->internal.data.ifindex, RTCFG_MAIN_OFF); ++ ++ rtdm_mutex_unlock(&rtcfg_dev->dev_mutex); ++} ++ ++/*** Client Frame Event Handlers ***/ ++ ++static void rtcfg_client_recv_stage_1(int ifindex, struct rtskb *rtskb) ++{ ++ struct rtcfg_frm_stage_1_cfg *stage_1_cfg; ++ struct rt_proc_call *call; ++ struct rtcfg_cmd *cmd_event; ++ struct rtcfg_device *rtcfg_dev = &device[ifindex]; ++ u8 addr_type; ++ int ret; ++ ++ if (rtskb->len < sizeof(struct rtcfg_frm_stage_1_cfg)) { ++ rtdm_mutex_unlock(&rtcfg_dev->dev_mutex); ++ RTCFG_DEBUG(1, "RTcfg: received invalid stage_1_cfg frame\n"); ++ kfree_rtskb(rtskb); ++ return; ++ } ++ ++ stage_1_cfg = (struct rtcfg_frm_stage_1_cfg *)rtskb->data; ++ __rtskb_pull(rtskb, sizeof(struct rtcfg_frm_stage_1_cfg)); ++ ++ addr_type = stage_1_cfg->addr_type; ++ ++ switch (stage_1_cfg->addr_type) { ++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_RTIPV4) ++ case RTCFG_ADDR_IP: { ++ struct rtnet_device *rtdev, *tmp; ++ u32 daddr, saddr, mask, bcast; ++ ++ if (rtskb->len < sizeof(struct rtcfg_frm_stage_1_cfg) + ++ 2 * RTCFG_ADDRSIZE_IP) { ++ rtdm_mutex_unlock(&rtcfg_dev->dev_mutex); ++ RTCFG_DEBUG(1, "RTcfg: received invalid stage_1_cfg " ++ "frame\n"); ++ kfree_rtskb(rtskb); ++ return; ++ } ++ ++ rtdev = rtskb->rtdev; ++ ++ memcpy(&daddr, stage_1_cfg->client_addr, 4); ++ stage_1_cfg = ++ (struct rtcfg_frm_stage_1_cfg *)(((u8 *)stage_1_cfg) + ++ RTCFG_ADDRSIZE_IP); ++ ++ memcpy(&saddr, stage_1_cfg->server_addr, 4); ++ stage_1_cfg = ++ (struct rtcfg_frm_stage_1_cfg *)(((u8 *)stage_1_cfg) + ++ RTCFG_ADDRSIZE_IP); ++ ++ __rtskb_pull(rtskb, 2 * RTCFG_ADDRSIZE_IP); ++ ++ /* Broadcast: IP is used to address client */ ++ if (rtskb->pkt_type == PACKET_BROADCAST) { ++ /* directed to us? */ ++ if (daddr != rtdev->local_ip) { ++ rtdm_mutex_unlock(&rtcfg_dev->dev_mutex); ++ kfree_rtskb(rtskb); ++ return; ++ } ++ ++ /* Unicast: IP address is assigned by the server */ ++ } else { ++ /* default netmask */ ++ if (ntohl(daddr) <= 0x7FFFFFFF) /* 127.255.255.255 */ ++ mask = 0x000000FF; /* 255.0.0.0 */ ++ else if (ntohl(daddr) <= ++ 0xBFFFFFFF) /* 191.255.255.255 */ ++ mask = 0x0000FFFF; /* 255.255.0.0 */ ++ else ++ mask = 0x00FFFFFF; /* 255.255.255.0 */ ++ bcast = daddr | (~mask); ++ ++ rt_ip_route_del_all(rtdev); /* cleanup routing table */ ++ ++ rtdev->local_ip = daddr; ++ rtdev->broadcast_ip = bcast; ++ ++ if ((tmp = rtdev_get_loopback()) != NULL) { ++ rt_ip_route_add_host(daddr, tmp->dev_addr, tmp); ++ rtdev_dereference(tmp); ++ } ++ ++ if (rtdev->flags & IFF_BROADCAST) ++ rt_ip_route_add_host(bcast, rtdev->broadcast, ++ rtdev); ++ } ++ ++ /* update routing table */ ++ rt_ip_route_add_host(saddr, rtskb->mac.ethernet->h_source, ++ rtdev); ++ ++ rtcfg_dev->spec.clt.srv_addr.ip_addr = saddr; ++ break; ++ } ++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4 */ ++ ++ case RTCFG_ADDR_MAC: ++ /* nothing to do */ ++ break; ++ ++ default: ++ rtdm_mutex_unlock(&rtcfg_dev->dev_mutex); ++ RTCFG_DEBUG(1, "RTcfg: unknown addr_type %d in %s()\n", ++ stage_1_cfg->addr_type, __FUNCTION__); ++ kfree_rtskb(rtskb); ++ return; ++ } ++ ++ rtcfg_dev->spec.clt.addr_type = addr_type; ++ ++ /* Ethernet-specific */ ++ memcpy(rtcfg_dev->spec.clt.srv_mac_addr, rtskb->mac.ethernet->h_source, ++ ETH_ALEN); ++ ++ rtcfg_dev->burstrate = stage_1_cfg->burstrate; ++ ++ rtcfg_next_main_state(ifindex, RTCFG_MAIN_CLIENT_1); ++ ++ rtdm_mutex_unlock(&rtcfg_dev->dev_mutex); ++ ++ while (1) { ++ call = rtcfg_dequeue_blocking_call(ifindex); ++ if (call == NULL) ++ break; ++ ++ cmd_event = rtpc_get_priv(call, struct rtcfg_cmd); ++ ++ if (cmd_event->internal.data.event_id == RTCFG_CMD_CLIENT) { ++ ret = 0; ++ ++ /* note: only the first pending call gets data */ ++ if ((rtskb != NULL) && ++ (cmd_event->args.client.buffer_size > 0)) { ++ ret = ntohs(stage_1_cfg->cfg_len); ++ ++ cmd_event->args.client.rtskb = rtskb; ++ rtskb = NULL; ++ } ++ } else ++ ret = -EINVAL; ++ ++ rtpc_complete_call(call, ret); ++ } ++ ++ if (rtskb) ++ kfree_rtskb(rtskb); ++} ++ ++static int rtcfg_add_to_station_list(struct rtcfg_device *rtcfg_dev, ++ u8 *mac_addr, u8 flags) ++{ ++ if (rtcfg_dev->stations_found == rtcfg_dev->spec.clt.max_stations) { ++ RTCFG_DEBUG( ++ 1, "RTcfg: insufficient memory for storing new station " ++ "address\n"); ++ return -ENOMEM; ++ } ++ ++ /* Ethernet-specific! */ ++ memcpy(&rtcfg_dev->spec.clt.station_addr_list[rtcfg_dev->stations_found] ++ .mac_addr, ++ mac_addr, ETH_ALEN); ++ ++ rtcfg_dev->spec.clt.station_addr_list[rtcfg_dev->stations_found].flags = ++ flags; ++ ++ rtcfg_dev->stations_found++; ++ if ((flags & _RTCFG_FLAG_READY) != 0) ++ rtcfg_dev->stations_ready++; ++ ++ return 0; ++} ++ ++/* Notes: ++ * o rtcfg_client_recv_announce does not release the passed rtskb. ++ * o On success, rtcfg_client_recv_announce returns without releasing the ++ * device lock. ++ */ ++static int rtcfg_client_recv_announce(int ifindex, struct rtskb *rtskb) ++{ ++ struct rtcfg_frm_announce *announce_frm; ++ struct rtcfg_device *rtcfg_dev = &device[ifindex]; ++ u32 i; ++ u32 announce_frm_addr; ++ int result; ++ ++ announce_frm = (struct rtcfg_frm_announce *)rtskb->data; ++ ++ if (rtskb->len < sizeof(struct rtcfg_frm_announce)) { ++ rtdm_mutex_unlock(&rtcfg_dev->dev_mutex); ++ RTCFG_DEBUG(1, ++ "RTcfg: received invalid announce frame (id: %d)\n", ++ announce_frm->head.id); ++ return -EINVAL; ++ } ++ ++ switch (announce_frm->addr_type) { ++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_RTIPV4) ++ case RTCFG_ADDR_IP: ++ if (rtskb->len < ++ sizeof(struct rtcfg_frm_announce) + RTCFG_ADDRSIZE_IP) { ++ rtdm_mutex_unlock(&rtcfg_dev->dev_mutex); ++ RTCFG_DEBUG(1, ++ "RTcfg: received invalid announce frame " ++ "(id: %d)\n", ++ announce_frm->head.id); ++ return -EINVAL; ++ } ++ ++ memcpy(&announce_frm_addr, announce_frm->addr, 4); ++ ++ /* update routing table */ ++ rt_ip_route_add_host(announce_frm_addr, ++ rtskb->mac.ethernet->h_source, ++ rtskb->rtdev); ++ ++ announce_frm = ++ (struct rtcfg_frm_announce *)(((u8 *)announce_frm) + ++ RTCFG_ADDRSIZE_IP); ++ ++ break; ++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4 */ ++ ++ case RTCFG_ADDR_MAC: ++ /* nothing to do */ ++ break; ++ ++ default: ++ rtdm_mutex_unlock(&rtcfg_dev->dev_mutex); ++ RTCFG_DEBUG(1, "RTcfg: unknown addr_type %d in %s()\n", ++ announce_frm->addr_type, __FUNCTION__); ++ return -EINVAL; ++ } ++ ++ for (i = 0; i < rtcfg_dev->stations_found; i++) ++ /* Ethernet-specific! */ ++ if (memcmp(rtcfg_dev->spec.clt.station_addr_list[i].mac_addr, ++ rtskb->mac.ethernet->h_source, ETH_ALEN) == 0) ++ return 0; ++ ++ result = rtcfg_add_to_station_list( ++ rtcfg_dev, rtskb->mac.ethernet->h_source, announce_frm->flags); ++ if (result < 0) ++ rtdm_mutex_unlock(&rtcfg_dev->dev_mutex); ++ ++ return result; ++} ++ ++static void rtcfg_client_queue_frag(int ifindex, struct rtskb *rtskb, ++ size_t data_len) ++{ ++ struct rtcfg_device *rtcfg_dev = &device[ifindex]; ++ struct rt_proc_call *call; ++ struct rtcfg_cmd *cmd_event; ++ int result; ++ ++ rtskb_trim(rtskb, data_len); ++ ++ if (rtcfg_dev->spec.clt.stage2_chain == NULL) ++ rtcfg_dev->spec.clt.stage2_chain = rtskb; ++ else { ++ rtcfg_dev->spec.clt.stage2_chain->chain_end->next = rtskb; ++ rtcfg_dev->spec.clt.stage2_chain->chain_end = rtskb; ++ } ++ ++ rtcfg_dev->spec.clt.cfg_offs += data_len; ++ rtcfg_dev->spec.clt.chain_len += data_len; ++ ++ if ((rtcfg_dev->spec.clt.cfg_offs >= rtcfg_dev->spec.clt.cfg_len) || ++ (++rtcfg_dev->spec.clt.packet_counter == rtcfg_dev->burstrate)) { ++ while (1) { ++ call = rtcfg_dequeue_blocking_call(ifindex); ++ if (call == NULL) ++ break; ++ ++ cmd_event = rtpc_get_priv(call, struct rtcfg_cmd); ++ ++ result = 0; ++ ++ /* note: only the first pending call gets data */ ++ if (rtcfg_dev->spec.clt.stage2_chain != NULL) { ++ result = rtcfg_dev->spec.clt.chain_len; ++ cmd_event->args.announce.rtskb = ++ rtcfg_dev->spec.clt.stage2_chain; ++ rtcfg_dev->spec.clt.stage2_chain = NULL; ++ } ++ ++ rtpc_complete_call(call, ++ (cmd_event->internal.data.event_id == ++ RTCFG_CMD_ANNOUNCE) ? ++ result : ++ -EINVAL); ++ } ++ ++ rtcfg_dev->spec.clt.packet_counter = 0; ++ rtcfg_dev->spec.clt.chain_len = 0; ++ } ++} ++ ++static void rtcfg_client_recv_stage_2_cfg(int ifindex, struct rtskb *rtskb) ++{ ++ struct rtcfg_frm_stage_2_cfg *stage_2_cfg; ++ struct rtcfg_device *rtcfg_dev = &device[ifindex]; ++ size_t data_len; ++ int ret; ++ ++ if (rtskb->len < sizeof(struct rtcfg_frm_stage_2_cfg)) { ++ rtdm_mutex_unlock(&rtcfg_dev->dev_mutex); ++ RTCFG_DEBUG(1, "RTcfg: received invalid stage_2_cfg frame\n"); ++ kfree_rtskb(rtskb); ++ return; ++ } ++ ++ stage_2_cfg = (struct rtcfg_frm_stage_2_cfg *)rtskb->data; ++ __rtskb_pull(rtskb, sizeof(struct rtcfg_frm_stage_2_cfg)); ++ ++ if (stage_2_cfg->heartbeat_period) { ++ ret = rtdm_timer_init(&rtcfg_dev->timer, rtcfg_timer, ++ "rtcfg-timer"); ++ if (ret == 0) { ++ ret = rtdm_timer_start( ++ &rtcfg_dev->timer, XN_INFINITE, ++ (nanosecs_rel_t)ntohs( ++ stage_2_cfg->heartbeat_period) * ++ 1000000, ++ RTDM_TIMERMODE_RELATIVE); ++ if (ret < 0) ++ rtdm_timer_destroy(&rtcfg_dev->timer); ++ } ++ ++ if (ret < 0) ++ /*ERRMSG*/ rtdm_printk( ++ "RTcfg: unable to create timer task\n"); ++ else ++ set_bit(FLAG_TIMER_STARTED, &rtcfg_dev->flags); ++ } ++ ++ /* add server to station list */ ++ if (rtcfg_add_to_station_list(rtcfg_dev, rtskb->mac.ethernet->h_source, ++ stage_2_cfg->flags) < 0) { ++ rtdm_mutex_unlock(&rtcfg_dev->dev_mutex); ++ RTCFG_DEBUG(1, "RTcfg: unable to process stage_2_cfg frage\n"); ++ kfree_rtskb(rtskb); ++ return; ++ } ++ ++ rtcfg_dev->other_stations = ntohl(stage_2_cfg->stations); ++ rtcfg_dev->spec.clt.cfg_len = ntohl(stage_2_cfg->cfg_len); ++ data_len = MIN(rtcfg_dev->spec.clt.cfg_len, rtskb->len); ++ ++ if (test_bit(RTCFG_FLAG_STAGE_2_DATA, &rtcfg_dev->flags) && ++ (data_len > 0)) { ++ rtcfg_client_queue_frag(ifindex, rtskb, data_len); ++ rtskb = NULL; ++ ++ if (rtcfg_dev->stations_found == rtcfg_dev->other_stations) ++ rtcfg_next_main_state(ifindex, ++ RTCFG_MAIN_CLIENT_ALL_KNOWN); ++ } else { ++ if (rtcfg_dev->stations_found == rtcfg_dev->other_stations) { ++ rtcfg_complete_cmd(ifindex, RTCFG_CMD_ANNOUNCE, 0); ++ ++ rtcfg_next_main_state(ifindex, ++ test_bit(RTCFG_FLAG_READY, ++ &rtcfg_dev->flags) ? ++ RTCFG_MAIN_CLIENT_READY : ++ RTCFG_MAIN_CLIENT_2); ++ } else ++ rtcfg_next_main_state(ifindex, ++ RTCFG_MAIN_CLIENT_ALL_FRAMES); ++ ++ rtcfg_send_ack(ifindex); ++ } ++ ++ rtdm_mutex_unlock(&rtcfg_dev->dev_mutex); ++ ++ if (rtskb != NULL) ++ kfree_rtskb(rtskb); ++} ++ ++static void rtcfg_client_recv_stage_2_frag(int ifindex, struct rtskb *rtskb) ++{ ++ struct rtcfg_frm_stage_2_cfg_frag *stage_2_frag; ++ struct rtcfg_device *rtcfg_dev = &device[ifindex]; ++ size_t data_len; ++ ++ if (rtskb->len < sizeof(struct rtcfg_frm_stage_2_cfg_frag)) { ++ rtdm_mutex_unlock(&rtcfg_dev->dev_mutex); ++ RTCFG_DEBUG(1, ++ "RTcfg: received invalid stage_2_cfg_frag frame\n"); ++ kfree_rtskb(rtskb); ++ return; ++ } ++ ++ stage_2_frag = (struct rtcfg_frm_stage_2_cfg_frag *)rtskb->data; ++ __rtskb_pull(rtskb, sizeof(struct rtcfg_frm_stage_2_cfg_frag)); ++ ++ data_len = ++ MIN(rtcfg_dev->spec.clt.cfg_len - rtcfg_dev->spec.clt.cfg_offs, ++ rtskb->len); ++ ++ if (test_bit(RTCFG_FLAG_STAGE_2_DATA, &rtcfg_dev->flags) == 0) { ++ RTCFG_DEBUG(1, "RTcfg: unexpected stage 2 fragment, we did not " ++ "request any data!\n"); ++ ++ } else if (rtcfg_dev->spec.clt.cfg_offs != ++ ntohl(stage_2_frag->frag_offs)) { ++ RTCFG_DEBUG(1, ++ "RTcfg: unexpected stage 2 fragment (expected: %d, " ++ "received: %d)\n", ++ rtcfg_dev->spec.clt.cfg_offs, ++ ntohl(stage_2_frag->frag_offs)); ++ ++ rtcfg_send_ack(ifindex); ++ rtcfg_dev->spec.clt.packet_counter = 0; ++ } else { ++ rtcfg_client_queue_frag(ifindex, rtskb, data_len); ++ rtskb = NULL; ++ } ++ ++ rtdm_mutex_unlock(&rtcfg_dev->dev_mutex); ++ ++ if (rtskb != NULL) ++ kfree_rtskb(rtskb); ++} ++ ++/* Notes: ++ * o On success, rtcfg_client_recv_ready returns without releasing the ++ * device lock. ++ */ ++static int rtcfg_client_recv_ready(int ifindex, struct rtskb *rtskb) ++{ ++ struct rtcfg_device *rtcfg_dev = &device[ifindex]; ++ u32 i; ++ ++ if (rtskb->len < sizeof(struct rtcfg_frm_simple)) { ++ rtdm_mutex_unlock(&rtcfg_dev->dev_mutex); ++ RTCFG_DEBUG(1, "RTcfg: received invalid ready frame\n"); ++ kfree_rtskb(rtskb); ++ return -EINVAL; ++ } ++ ++ for (i = 0; i < rtcfg_dev->stations_found; i++) ++ /* Ethernet-specific! */ ++ if (memcmp(rtcfg_dev->spec.clt.station_addr_list[i].mac_addr, ++ rtskb->mac.ethernet->h_source, ETH_ALEN) == 0) { ++ if ((rtcfg_dev->spec.clt.station_addr_list[i].flags & ++ _RTCFG_FLAG_READY) == 0) { ++ rtcfg_dev->spec.clt.station_addr_list[i].flags |= ++ _RTCFG_FLAG_READY; ++ rtcfg_dev->stations_ready++; ++ } ++ break; ++ } ++ ++ kfree_rtskb(rtskb); ++ return 0; ++} ++ ++static void rtcfg_client_recv_dead_station(int ifindex, struct rtskb *rtskb) ++{ ++ struct rtcfg_frm_dead_station *dead_station_frm; ++ struct rtcfg_device *rtcfg_dev = &device[ifindex]; ++ u32 i; ++ ++ dead_station_frm = (struct rtcfg_frm_dead_station *)rtskb->data; ++ ++ if (rtskb->len < sizeof(struct rtcfg_frm_dead_station)) { ++ rtdm_mutex_unlock(&rtcfg_dev->dev_mutex); ++ RTCFG_DEBUG(1, "RTcfg: received invalid dead station frame\n"); ++ kfree_rtskb(rtskb); ++ return; ++ } ++ ++ switch (dead_station_frm->addr_type) { ++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_RTIPV4) ++ case RTCFG_ADDR_IP: { ++ u32 ip; ++ ++ if (rtskb->len < ++ sizeof(struct rtcfg_frm_dead_station) + RTCFG_ADDRSIZE_IP) { ++ rtdm_mutex_unlock(&rtcfg_dev->dev_mutex); ++ RTCFG_DEBUG( ++ 1, ++ "RTcfg: received invalid dead station frame\n"); ++ kfree_rtskb(rtskb); ++ return; ++ } ++ ++ memcpy(&ip, dead_station_frm->logical_addr, 4); ++ ++ /* only delete remote IPs from routing table */ ++ if (rtskb->rtdev->local_ip != ip) ++ rt_ip_route_del_host(ip, rtskb->rtdev); ++ ++ dead_station_frm = (struct rtcfg_frm_dead_station ++ *)(((u8 *)dead_station_frm) + ++ RTCFG_ADDRSIZE_IP); ++ ++ break; ++ } ++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4 */ ++ ++ case RTCFG_ADDR_MAC: ++ /* nothing to do */ ++ break; ++ ++ default: ++ rtdm_mutex_unlock(&rtcfg_dev->dev_mutex); ++ RTCFG_DEBUG(1, "RTcfg: unknown addr_type %d in %s()\n", ++ dead_station_frm->addr_type, __FUNCTION__); ++ kfree_rtskb(rtskb); ++ return; ++ } ++ ++ for (i = 0; i < rtcfg_dev->stations_found; i++) ++ /* Ethernet-specific! */ ++ if (memcmp(rtcfg_dev->spec.clt.station_addr_list[i].mac_addr, ++ dead_station_frm->physical_addr, ETH_ALEN) == 0) { ++ if ((rtcfg_dev->spec.clt.station_addr_list[i].flags & ++ _RTCFG_FLAG_READY) != 0) ++ rtcfg_dev->stations_ready--; ++ ++ rtcfg_dev->stations_found--; ++ memmove(&rtcfg_dev->spec.clt.station_addr_list[i], ++ &rtcfg_dev->spec.clt.station_addr_list[i + 1], ++ sizeof(struct rtcfg_station) * ++ (rtcfg_dev->stations_found - i)); ++ ++ if (rtcfg_dev->state == RTCFG_MAIN_CLIENT_ALL_KNOWN) ++ rtcfg_next_main_state( ++ ifindex, RTCFG_MAIN_CLIENT_ANNOUNCED); ++ break; ++ } ++ ++ rtdm_mutex_unlock(&rtcfg_dev->dev_mutex); ++ ++ kfree_rtskb(rtskb); ++} ++ ++static void rtcfg_client_update_server(int ifindex, struct rtskb *rtskb) ++{ ++ struct rtcfg_frm_stage_1_cfg *stage_1_cfg; ++ struct rtcfg_device *rtcfg_dev = &device[ifindex]; ++ ++ if (rtskb->len < sizeof(struct rtcfg_frm_stage_1_cfg)) { ++ rtdm_mutex_unlock(&rtcfg_dev->dev_mutex); ++ RTCFG_DEBUG(1, "RTcfg: received invalid stage_1_cfg frame\n"); ++ kfree_rtskb(rtskb); ++ return; ++ } ++ ++ stage_1_cfg = (struct rtcfg_frm_stage_1_cfg *)rtskb->data; ++ __rtskb_pull(rtskb, sizeof(struct rtcfg_frm_stage_1_cfg)); ++ ++ switch (stage_1_cfg->addr_type) { ++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_RTIPV4) ++ case RTCFG_ADDR_IP: { ++ struct rtnet_device *rtdev; ++ u32 daddr, saddr; ++ ++ if (rtskb->len < sizeof(struct rtcfg_frm_stage_1_cfg) + ++ 2 * RTCFG_ADDRSIZE_IP) { ++ rtdm_mutex_unlock(&rtcfg_dev->dev_mutex); ++ RTCFG_DEBUG(1, "RTcfg: received invalid stage_1_cfg " ++ "frame\n"); ++ kfree_rtskb(rtskb); ++ break; ++ } ++ ++ rtdev = rtskb->rtdev; ++ ++ memcpy(&daddr, stage_1_cfg->client_addr, 4); ++ stage_1_cfg = ++ (struct rtcfg_frm_stage_1_cfg *)(((u8 *)stage_1_cfg) + ++ RTCFG_ADDRSIZE_IP); ++ ++ memcpy(&saddr, stage_1_cfg->server_addr, 4); ++ stage_1_cfg = ++ (struct rtcfg_frm_stage_1_cfg *)(((u8 *)stage_1_cfg) + ++ RTCFG_ADDRSIZE_IP); ++ ++ __rtskb_pull(rtskb, 2 * RTCFG_ADDRSIZE_IP); ++ ++ /* directed to us? */ ++ if ((rtskb->pkt_type == PACKET_BROADCAST) && ++ (daddr != rtdev->local_ip)) { ++ rtdm_mutex_unlock(&rtcfg_dev->dev_mutex); ++ kfree_rtskb(rtskb); ++ return; ++ } ++ ++ /* update routing table */ ++ rt_ip_route_add_host(saddr, rtskb->mac.ethernet->h_source, ++ rtdev); ++ ++ rtcfg_dev->spec.clt.srv_addr.ip_addr = saddr; ++ break; ++ } ++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4 */ ++ ++ case RTCFG_ADDR_MAC: ++ /* nothing to do */ ++ break; ++ ++ default: ++ rtdm_mutex_unlock(&rtcfg_dev->dev_mutex); ++ RTCFG_DEBUG(1, "RTcfg: unknown addr_type %d in %s()\n", ++ stage_1_cfg->addr_type, __FUNCTION__); ++ kfree_rtskb(rtskb); ++ return; ++ } ++ ++ /* Ethernet-specific */ ++ memcpy(rtcfg_dev->spec.clt.srv_mac_addr, rtskb->mac.ethernet->h_source, ++ ETH_ALEN); ++ ++ rtcfg_send_announce_reply(ifindex, rtskb->mac.ethernet->h_source); ++ ++ rtdm_mutex_unlock(&rtcfg_dev->dev_mutex); ++ ++ kfree_rtskb(rtskb); ++} +--- linux/drivers/xenomai/net/stack/rtdev_mgr.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/rtdev_mgr.c 2021-04-07 16:01:26.600635062 +0800 +@@ -0,0 +1,127 @@ ++/*** ++ * ++ * stack/rtdev_mgr.c - device error manager ++ * ++ * Copyright (C) 2002 Ulrich Marx ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#include ++ ++#include ++#include ++#include ++ ++/*** ++ * rtnetif_err_rx: will be called from the driver ++ * ++ * ++ * @rtdev - the network-device ++ */ ++void rtnetif_err_rx(struct rtnet_device *rtdev) ++{ ++} ++ ++/*** ++ * rtnetif_err_tx: will be called from the driver ++ * ++ * ++ * @rtdev - the network-device ++ */ ++void rtnetif_err_tx(struct rtnet_device *rtdev) ++{ ++} ++ ++/*** ++ * do_rtdev_task ++ */ ++/*static void do_rtdev_task(int mgr_id) ++{ ++ struct rtnet_msg msg; ++ struct rtnet_mgr *mgr = (struct rtnet_mgr *)mgr_id; ++ ++ while (1) { ++ rt_mbx_receive(&(mgr->mbx), &msg, sizeof(struct rtnet_msg)); ++ if (msg.rtdev) { ++ rt_printk("RTnet: error on rtdev %s\n", msg.rtdev->name); ++ } ++ } ++}*/ ++ ++/*** ++ * rt_rtdev_connect ++ */ ++void rt_rtdev_connect(struct rtnet_device *rtdev, struct rtnet_mgr *mgr) ++{ ++ /* rtdev->rtdev_mbx=&(mgr->mbx);*/ ++} ++ ++/*** ++ * rt_rtdev_disconnect ++ */ ++void rt_rtdev_disconnect(struct rtnet_device *rtdev) ++{ ++ /* rtdev->rtdev_mbx=NULL;*/ ++} ++ ++/*** ++ * rt_rtdev_mgr_start ++ */ ++int rt_rtdev_mgr_start(struct rtnet_mgr *mgr) ++{ ++ return /*(rt_task_resume(&(mgr->task)))*/ 0; ++} ++ ++/*** ++ * rt_rtdev_mgr_stop ++ */ ++int rt_rtdev_mgr_stop(struct rtnet_mgr *mgr) ++{ ++ return /*(rt_task_suspend(&(mgr->task)))*/ 0; ++} ++ ++/*** ++ * rt_rtdev_mgr_init ++ */ ++int rt_rtdev_mgr_init(struct rtnet_mgr *mgr) ++{ ++ int ret = 0; ++ ++ /* if ( (ret=rt_mbx_init (&(mgr->mbx), sizeof(struct rtnet_msg))) ) ++ return ret; ++ if ( (ret=rt_task_init(&(mgr->task), &do_rtdev_task, (int)mgr, 4096, RTNET_RTDEV_PRIORITY, 0, 0)) ) ++ return ret; ++ if ( (ret=rt_task_resume(&(mgr->task))) ) ++ return ret;*/ ++ ++ return (ret); ++} ++ ++/*** ++ * rt_rtdev_mgr_delete ++ */ ++void rt_rtdev_mgr_delete(struct rtnet_mgr *mgr) ++{ ++ /* rt_task_delete(&(mgr->task)); ++ rt_mbx_delete(&(mgr->mbx));*/ ++} ++ ++EXPORT_SYMBOL_GPL(rtnetif_err_rx); ++EXPORT_SYMBOL_GPL(rtnetif_err_tx); ++ ++EXPORT_SYMBOL_GPL(rt_rtdev_connect); ++EXPORT_SYMBOL_GPL(rt_rtdev_disconnect); +--- linux/drivers/xenomai/net/stack/stack_mgr.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/stack_mgr.c 2021-04-07 16:01:26.595635069 +0800 +@@ -0,0 +1,256 @@ ++/*** ++ * ++ * stack/stack_mgr.c - Stack-Manager ++ * ++ * Copyright (C) 2002 Ulrich Marx ++ * Copyright (C) 2003-2006 Jan Kiszka ++ * Copyright (C) 2006 Jorge Almeida ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#include ++ ++#include ++#include ++#include ++#include ++ ++static unsigned int stack_mgr_prio = RTNET_DEF_STACK_PRIORITY; ++module_param(stack_mgr_prio, uint, 0444); ++MODULE_PARM_DESC(stack_mgr_prio, "Priority of the stack manager task"); ++ ++#if (CONFIG_XENO_DRIVERS_NET_RX_FIFO_SIZE & \ ++ (CONFIG_XENO_DRIVERS_NET_RX_FIFO_SIZE - 1)) != 0 ++#error CONFIG_XENO_DRIVERS_NET_RX_FIFO_SIZE must be power of 2! ++#endif ++static DECLARE_RTSKB_FIFO(rx, CONFIG_XENO_DRIVERS_NET_RX_FIFO_SIZE); ++ ++struct list_head rt_packets[RTPACKET_HASH_TBL_SIZE]; ++#ifdef CONFIG_XENO_DRIVERS_NET_ETH_P_ALL ++struct list_head rt_packets_all; ++#endif /* CONFIG_XENO_DRIVERS_NET_ETH_P_ALL */ ++DEFINE_RTDM_LOCK(rt_packets_lock); ++ ++/*** ++ * rtdev_add_pack: add protocol (Layer 3) ++ * @pt: the new protocol ++ */ ++int __rtdev_add_pack(struct rtpacket_type *pt, struct module *module) ++{ ++ int ret = 0; ++ rtdm_lockctx_t context; ++ ++ INIT_LIST_HEAD(&pt->list_entry); ++ pt->refcount = 0; ++ if (pt->trylock == NULL) ++ pt->trylock = rtdev_lock_pack; ++ if (pt->unlock == NULL) ++ pt->unlock = rtdev_unlock_pack; ++ pt->owner = module; ++ ++ rtdm_lock_get_irqsave(&rt_packets_lock, context); ++ ++ if (pt->type == htons(ETH_P_ALL)) ++#ifdef CONFIG_XENO_DRIVERS_NET_ETH_P_ALL ++ list_add_tail(&pt->list_entry, &rt_packets_all); ++#else /* !CONFIG_XENO_DRIVERS_NET_ETH_P_ALL */ ++ ret = -EINVAL; ++#endif /* CONFIG_XENO_DRIVERS_NET_ETH_P_ALL */ ++ else ++ list_add_tail( ++ &pt->list_entry, ++ &rt_packets[ntohs(pt->type) & RTPACKET_HASH_KEY_MASK]); ++ ++ rtdm_lock_put_irqrestore(&rt_packets_lock, context); ++ ++ return ret; ++} ++ ++EXPORT_SYMBOL_GPL(__rtdev_add_pack); ++ ++/*** ++ * rtdev_remove_pack: remove protocol (Layer 3) ++ * @pt: protocol ++ */ ++void rtdev_remove_pack(struct rtpacket_type *pt) ++{ ++ rtdm_lockctx_t context; ++ ++ RTNET_ASSERT(pt != NULL, return;); ++ ++ rtdm_lock_get_irqsave(&rt_packets_lock, context); ++ list_del(&pt->list_entry); ++ rtdm_lock_put_irqrestore(&rt_packets_lock, context); ++} ++ ++EXPORT_SYMBOL_GPL(rtdev_remove_pack); ++ ++/*** ++ * rtnetif_rx: will be called from the driver interrupt handler ++ * (IRQs disabled!) and send a message to rtdev-owned stack-manager ++ * ++ * @skb - the packet ++ */ ++void rtnetif_rx(struct rtskb *skb) ++{ ++ RTNET_ASSERT(skb != NULL, return;); ++ RTNET_ASSERT(skb->rtdev != NULL, return;); ++ ++ if (unlikely(rtskb_fifo_insert_inirq(&rx.fifo, skb) < 0)) { ++ rtdm_printk("RTnet: dropping packet in %s()\n", __FUNCTION__); ++ kfree_rtskb(skb); ++ } ++} ++ ++EXPORT_SYMBOL_GPL(rtnetif_rx); ++ ++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_DRV_LOOPBACK) ++#define __DELIVER_PREFIX ++#else /* !CONFIG_XENO_DRIVERS_NET_DRV_LOOPBACK */ ++#define __DELIVER_PREFIX static inline ++#endif /* CONFIG_XENO_DRIVERS_NET_DRV_LOOPBACK */ ++ ++__DELIVER_PREFIX void rt_stack_deliver(struct rtskb *rtskb) ++{ ++ unsigned short hash; ++ struct rtpacket_type *pt_entry; ++ rtdm_lockctx_t context; ++ struct rtnet_device *rtdev = rtskb->rtdev; ++ int err; ++ int eth_p_all_hit = 0; ++ ++ rtcap_report_incoming(rtskb); ++ ++ rtskb->nh.raw = rtskb->data; ++ ++ rtdm_lock_get_irqsave(&rt_packets_lock, context); ++ ++#ifdef CONFIG_XENO_DRIVERS_NET_ETH_P_ALL ++ eth_p_all_hit = 0; ++ list_for_each_entry (pt_entry, &rt_packets_all, list_entry) { ++ if (!pt_entry->trylock(pt_entry)) ++ continue; ++ rtdm_lock_put_irqrestore(&rt_packets_lock, context); ++ ++ pt_entry->handler(rtskb, pt_entry); ++ ++ rtdm_lock_get_irqsave(&rt_packets_lock, context); ++ pt_entry->unlock(pt_entry); ++ eth_p_all_hit = 1; ++ } ++#endif /* CONFIG_XENO_DRIVERS_NET_ETH_P_ALL */ ++ ++ hash = ntohs(rtskb->protocol) & RTPACKET_HASH_KEY_MASK; ++ ++ list_for_each_entry (pt_entry, &rt_packets[hash], list_entry) ++ if (pt_entry->type == rtskb->protocol) { ++ if (!pt_entry->trylock(pt_entry)) ++ continue; ++ rtdm_lock_put_irqrestore(&rt_packets_lock, context); ++ ++ err = pt_entry->handler(rtskb, pt_entry); ++ ++ rtdm_lock_get_irqsave(&rt_packets_lock, context); ++ pt_entry->unlock(pt_entry); ++ ++ if (likely(!err)) { ++ rtdm_lock_put_irqrestore(&rt_packets_lock, ++ context); ++ return; ++ } ++ } ++ ++ rtdm_lock_put_irqrestore(&rt_packets_lock, context); ++ ++ /* Don't warn if ETH_P_ALL listener were present or when running in ++ promiscuous mode (RTcap). */ ++ if (unlikely(!eth_p_all_hit && !(rtdev->flags & IFF_PROMISC))) ++ rtdm_printk("RTnet: no one cared for packet with layer 3 " ++ "protocol type 0x%04x\n", ++ ntohs(rtskb->protocol)); ++ ++ kfree_rtskb(rtskb); ++} ++ ++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_DRV_LOOPBACK) ++EXPORT_SYMBOL_GPL(rt_stack_deliver); ++#endif /* CONFIG_XENO_DRIVERS_NET_DRV_LOOPBACK */ ++ ++static void rt_stack_mgr_task(void *arg) ++{ ++ rtdm_event_t *mgr_event = &((struct rtnet_mgr *)arg)->event; ++ struct rtskb *rtskb; ++ ++ while (!rtdm_task_should_stop()) { ++ if (rtdm_event_wait(mgr_event) < 0) ++ break; ++ ++ /* we are the only reader => no locking required */ ++ while ((rtskb = __rtskb_fifo_remove(&rx.fifo))) ++ rt_stack_deliver(rtskb); ++ } ++} ++ ++/*** ++ * rt_stack_connect ++ */ ++void rt_stack_connect(struct rtnet_device *rtdev, struct rtnet_mgr *mgr) ++{ ++ rtdev->stack_event = &mgr->event; ++} ++ ++EXPORT_SYMBOL_GPL(rt_stack_connect); ++ ++/*** ++ * rt_stack_disconnect ++ */ ++void rt_stack_disconnect(struct rtnet_device *rtdev) ++{ ++ rtdev->stack_event = NULL; ++} ++ ++EXPORT_SYMBOL_GPL(rt_stack_disconnect); ++ ++/*** ++ * rt_stack_mgr_init ++ */ ++int rt_stack_mgr_init(struct rtnet_mgr *mgr) ++{ ++ int i; ++ ++ rtskb_fifo_init(&rx.fifo, CONFIG_XENO_DRIVERS_NET_RX_FIFO_SIZE); ++ ++ for (i = 0; i < RTPACKET_HASH_TBL_SIZE; i++) ++ INIT_LIST_HEAD(&rt_packets[i]); ++#ifdef CONFIG_XENO_DRIVERS_NET_ETH_P_ALL ++ INIT_LIST_HEAD(&rt_packets_all); ++#endif /* CONFIG_XENO_DRIVERS_NET_ETH_P_ALL */ ++ ++ rtdm_event_init(&mgr->event, 0); ++ ++ return rtdm_task_init(&mgr->task, "rtnet-stack", rt_stack_mgr_task, mgr, ++ stack_mgr_prio, 0); ++} ++ ++/*** ++ * rt_stack_mgr_delete ++ */ ++void rt_stack_mgr_delete(struct rtnet_mgr *mgr) ++{ ++ rtdm_event_destroy(&mgr->event); ++ rtdm_task_destroy(&mgr->task); ++} +--- linux/drivers/xenomai/net/stack/socket.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/socket.c 2021-04-07 16:01:26.591635075 +0800 +@@ -0,0 +1,395 @@ ++/*** ++ * ++ * stack/socket.c - sockets implementation for rtnet ++ * ++ * Copyright (C) 1999 Lineo, Inc ++ * 1999, 2002 David A. Schleef ++ * 2002 Ulrich Marx ++ * 2003-2005 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++ ++#define SKB_POOL_CLOSED 0 ++ ++static unsigned int socket_rtskbs = DEFAULT_SOCKET_RTSKBS; ++module_param(socket_rtskbs, uint, 0444); ++MODULE_PARM_DESC(socket_rtskbs, ++ "Default number of realtime socket buffers in socket pools"); ++ ++/************************************************************************ ++ * internal socket functions * ++ ************************************************************************/ ++ ++int rt_bare_socket_init(struct rtdm_fd *fd, unsigned short protocol, ++ unsigned int priority, unsigned int pool_size) ++{ ++ struct rtsocket *sock = rtdm_fd_to_private(fd); ++ int err; ++ ++ err = rtskb_pool_init(&sock->skb_pool, pool_size, NULL, fd); ++ if (err < 0) ++ return err; ++ ++ sock->protocol = protocol; ++ sock->priority = priority; ++ ++ return err; ++} ++EXPORT_SYMBOL_GPL(rt_bare_socket_init); ++ ++/*** ++ * rt_socket_init - initialises a new socket structure ++ */ ++int rt_socket_init(struct rtdm_fd *fd, unsigned short protocol) ++{ ++ struct rtsocket *sock = rtdm_fd_to_private(fd); ++ unsigned int pool_size; ++ ++ sock->flags = 0; ++ sock->callback_func = NULL; ++ ++ rtskb_queue_init(&sock->incoming); ++ ++ sock->timeout = 0; ++ ++ rtdm_lock_init(&sock->param_lock); ++ rtdm_sem_init(&sock->pending_sem, 0); ++ ++ pool_size = rt_bare_socket_init(fd, protocol, ++ RTSKB_PRIO_VALUE(SOCK_DEF_PRIO, ++ RTSKB_DEF_RT_CHANNEL), ++ socket_rtskbs); ++ sock->pool_size = pool_size; ++ mutex_init(&sock->pool_nrt_lock); ++ ++ if (pool_size < socket_rtskbs) { ++ /* fix statistics */ ++ if (pool_size == 0) ++ rtskb_pools--; ++ ++ rt_socket_cleanup(fd); ++ return -ENOMEM; ++ } ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(rt_socket_init); ++ ++/*** ++ * rt_socket_cleanup - releases resources allocated for the socket ++ */ ++void rt_socket_cleanup(struct rtdm_fd *fd) ++{ ++ struct rtsocket *sock = rtdm_fd_to_private(fd); ++ ++ rtdm_sem_destroy(&sock->pending_sem); ++ ++ mutex_lock(&sock->pool_nrt_lock); ++ ++ set_bit(SKB_POOL_CLOSED, &sock->flags); ++ ++ if (sock->pool_size > 0) ++ rtskb_pool_release(&sock->skb_pool); ++ ++ mutex_unlock(&sock->pool_nrt_lock); ++} ++EXPORT_SYMBOL_GPL(rt_socket_cleanup); ++ ++/*** ++ * rt_socket_common_ioctl ++ */ ++int rt_socket_common_ioctl(struct rtdm_fd *fd, int request, void __user *arg) ++{ ++ struct rtsocket *sock = rtdm_fd_to_private(fd); ++ int ret = 0; ++ struct rtnet_callback *callback; ++ const unsigned int *val; ++ unsigned int _val; ++ const nanosecs_rel_t *timeout; ++ nanosecs_rel_t _timeout; ++ rtdm_lockctx_t context; ++ ++ switch (request) { ++ case RTNET_RTIOC_XMITPARAMS: ++ val = rtnet_get_arg(fd, &_val, arg, sizeof(_val)); ++ if (IS_ERR(val)) ++ return PTR_ERR(val); ++ sock->priority = *val; ++ break; ++ ++ case RTNET_RTIOC_TIMEOUT: ++ timeout = rtnet_get_arg(fd, &_timeout, arg, sizeof(_timeout)); ++ if (IS_ERR(timeout)) ++ return PTR_ERR(timeout); ++ sock->timeout = *timeout; ++ break; ++ ++ case RTNET_RTIOC_CALLBACK: ++ if (rtdm_fd_is_user(fd)) ++ return -EACCES; ++ ++ rtdm_lock_get_irqsave(&sock->param_lock, context); ++ ++ callback = arg; ++ sock->callback_func = callback->func; ++ sock->callback_arg = callback->arg; ++ ++ rtdm_lock_put_irqrestore(&sock->param_lock, context); ++ break; ++ ++ case RTNET_RTIOC_EXTPOOL: ++ val = rtnet_get_arg(fd, &_val, arg, sizeof(_val)); ++ if (IS_ERR(val)) ++ return PTR_ERR(val); ++ ++ if (rtdm_in_rt_context()) ++ return -ENOSYS; ++ ++ mutex_lock(&sock->pool_nrt_lock); ++ ++ if (test_bit(SKB_POOL_CLOSED, &sock->flags)) { ++ mutex_unlock(&sock->pool_nrt_lock); ++ return -EBADF; ++ } ++ ret = rtskb_pool_extend(&sock->skb_pool, *val); ++ sock->pool_size += ret; ++ ++ mutex_unlock(&sock->pool_nrt_lock); ++ ++ if (ret == 0 && *val > 0) ++ ret = -ENOMEM; ++ ++ break; ++ ++ case RTNET_RTIOC_SHRPOOL: ++ val = rtnet_get_arg(fd, &_val, arg, sizeof(_val)); ++ if (IS_ERR(val)) ++ return PTR_ERR(val); ++ ++ if (rtdm_in_rt_context()) ++ return -ENOSYS; ++ ++ mutex_lock(&sock->pool_nrt_lock); ++ ++ ret = rtskb_pool_shrink(&sock->skb_pool, *val); ++ sock->pool_size -= ret; ++ ++ mutex_unlock(&sock->pool_nrt_lock); ++ ++ if (ret == 0 && *val > 0) ++ ret = -EBUSY; ++ ++ break; ++ ++ default: ++ ret = -EOPNOTSUPP; ++ break; ++ } ++ ++ return ret; ++} ++EXPORT_SYMBOL_GPL(rt_socket_common_ioctl); ++ ++/*** ++ * rt_socket_if_ioctl ++ */ ++int rt_socket_if_ioctl(struct rtdm_fd *fd, int request, void __user *arg) ++{ ++ struct rtnet_device *rtdev; ++ struct ifreq _ifr, *ifr, *u_ifr; ++ struct sockaddr_in _sin; ++ struct ifconf _ifc, *ifc, *u_ifc; ++ int ret = 0, size = 0, i; ++ short flags; ++ ++ if (request == SIOCGIFCONF) { ++ u_ifc = arg; ++ ifc = rtnet_get_arg(fd, &_ifc, u_ifc, sizeof(_ifc)); ++ if (IS_ERR(ifc)) ++ return PTR_ERR(ifc); ++ ++ for (u_ifr = ifc->ifc_req, i = 1; i <= MAX_RT_DEVICES; ++ i++, u_ifr++) { ++ rtdev = rtdev_get_by_index(i); ++ if (rtdev == NULL) ++ continue; ++ ++ if ((rtdev->flags & IFF_UP) == 0) { ++ rtdev_dereference(rtdev); ++ continue; ++ } ++ ++ size += sizeof(struct ifreq); ++ if (size > ifc->ifc_len) { ++ rtdev_dereference(rtdev); ++ size = ifc->ifc_len; ++ break; ++ } ++ ++ ret = rtnet_put_arg(fd, u_ifr->ifr_name, rtdev->name, ++ IFNAMSIZ); ++ if (ret == 0) { ++ memset(&_sin, 0, sizeof(_sin)); ++ _sin.sin_family = AF_INET; ++ _sin.sin_addr.s_addr = rtdev->local_ip; ++ ret = rtnet_put_arg(fd, &u_ifr->ifr_addr, &_sin, ++ sizeof(_sin)); ++ } ++ ++ rtdev_dereference(rtdev); ++ if (ret) ++ return ret; ++ } ++ ++ return rtnet_put_arg(fd, &u_ifc->ifc_len, &size, sizeof(size)); ++ } ++ ++ u_ifr = arg; ++ ifr = rtnet_get_arg(fd, &_ifr, u_ifr, sizeof(_ifr)); ++ if (IS_ERR(ifr)) ++ return PTR_ERR(ifr); ++ ++ if (request == SIOCGIFNAME) { ++ rtdev = rtdev_get_by_index(ifr->ifr_ifindex); ++ if (rtdev == NULL) ++ return -ENODEV; ++ ret = rtnet_put_arg(fd, u_ifr->ifr_name, rtdev->name, IFNAMSIZ); ++ goto out; ++ } ++ ++ rtdev = rtdev_get_by_name(ifr->ifr_name); ++ if (rtdev == NULL) ++ return -ENODEV; ++ ++ switch (request) { ++ case SIOCGIFINDEX: ++ ret = rtnet_put_arg(fd, &u_ifr->ifr_ifindex, &rtdev->ifindex, ++ sizeof(u_ifr->ifr_ifindex)); ++ break; ++ ++ case SIOCGIFFLAGS: ++ flags = rtdev->flags; ++ if ((ifr->ifr_flags & IFF_UP) && ++ (rtdev->link_state & ++ (RTNET_LINK_STATE_PRESENT | RTNET_LINK_STATE_NOCARRIER)) == ++ RTNET_LINK_STATE_PRESENT) ++ flags |= IFF_RUNNING; ++ ret = rtnet_put_arg(fd, &u_ifr->ifr_flags, &flags, ++ sizeof(u_ifr->ifr_flags)); ++ break; ++ ++ case SIOCGIFHWADDR: ++ ret = rtnet_put_arg(fd, &u_ifr->ifr_hwaddr.sa_data, ++ rtdev->dev_addr, rtdev->addr_len); ++ if (!ret) ++ ret = rtnet_put_arg( ++ fd, &u_ifr->ifr_hwaddr.sa_family, &rtdev->type, ++ sizeof(u_ifr->ifr_hwaddr.sa_family)); ++ break; ++ ++ case SIOCGIFADDR: ++ memset(&_sin, 0, sizeof(_sin)); ++ _sin.sin_family = AF_INET; ++ _sin.sin_addr.s_addr = rtdev->local_ip; ++ ret = rtnet_put_arg(fd, &u_ifr->ifr_addr, &_sin, sizeof(_sin)); ++ break; ++ ++ case SIOCETHTOOL: ++ if (rtdev->do_ioctl != NULL) { ++ if (rtdm_in_rt_context()) ++ return -ENOSYS; ++ ret = rtdev->do_ioctl(rtdev, ifr, request); ++ } else ++ ret = -EOPNOTSUPP; ++ break; ++ ++ case SIOCDEVPRIVATE ... SIOCDEVPRIVATE + 15: ++ if (rtdev->do_ioctl != NULL) ++ ret = rtdev->do_ioctl(rtdev, ifr, request); ++ else ++ ret = -EOPNOTSUPP; ++ break; ++ ++ default: ++ ret = -EOPNOTSUPP; ++ break; ++ } ++ ++out: ++ rtdev_dereference(rtdev); ++ return ret; ++} ++EXPORT_SYMBOL_GPL(rt_socket_if_ioctl); ++ ++int rt_socket_select_bind(struct rtdm_fd *fd, rtdm_selector_t *selector, ++ enum rtdm_selecttype type, unsigned fd_index) ++{ ++ struct rtsocket *sock = rtdm_fd_to_private(fd); ++ ++ switch (type) { ++ case XNSELECT_READ: ++ return rtdm_sem_select(&sock->pending_sem, selector, ++ XNSELECT_READ, fd_index); ++ default: ++ return -EBADF; ++ } ++ ++ return -EINVAL; ++} ++EXPORT_SYMBOL_GPL(rt_socket_select_bind); ++ ++void *rtnet_get_arg(struct rtdm_fd *fd, void *tmp, const void *src, size_t len) ++{ ++ int ret; ++ ++ if (!rtdm_fd_is_user(fd)) ++ return (void *)src; ++ ++ ret = rtdm_copy_from_user(fd, tmp, src, len); ++ if (ret) ++ return ERR_PTR(ret); ++ ++ return tmp; ++} ++EXPORT_SYMBOL_GPL(rtnet_get_arg); ++ ++int rtnet_put_arg(struct rtdm_fd *fd, void *dst, const void *src, size_t len) ++{ ++ if (!rtdm_fd_is_user(fd)) { ++ if (dst != src) ++ memcpy(dst, src, len); ++ return 0; ++ } ++ ++ return rtdm_copy_to_user(fd, dst, src, len); ++} ++EXPORT_SYMBOL_GPL(rtnet_put_arg); +--- linux/drivers/xenomai/net/stack/rtnet_chrdev.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/rtnet_chrdev.c 2021-04-07 16:01:26.586635082 +0800 +@@ -0,0 +1,240 @@ ++/*** ++ * ++ * stack/rtnet_chrdev.c - implements char device for management interface ++ * ++ * Copyright (C) 1999 Lineo, Inc ++ * 1999, 2002 David A. Schleef ++ * 2002 Ulrich Marx ++ * 2003-2005 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of version 2 of the GNU General Public License as ++ * published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ++ * ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++ ++static DEFINE_SPINLOCK(ioctl_handler_lock); ++static LIST_HEAD(ioctl_handlers); ++ ++static long rtnet_ioctl(struct file *file, unsigned int request, ++ unsigned long arg) ++{ ++ struct rtnet_ioctl_head head; ++ struct rtnet_device *rtdev = NULL; ++ struct rtnet_ioctls *ioctls; ++ struct list_head *entry; ++ int ret; ++ ++ if (!capable(CAP_SYS_ADMIN)) ++ return -EPERM; ++ ++ ret = copy_from_user(&head, (void *)arg, sizeof(head)); ++ if (ret != 0) ++ return -EFAULT; ++ ++ spin_lock(&ioctl_handler_lock); ++ ++ list_for_each (entry, &ioctl_handlers) { ++ ioctls = list_entry(entry, struct rtnet_ioctls, entry); ++ ++ if (ioctls->ioctl_type == _IOC_TYPE(request)) { ++ atomic_inc(&ioctls->ref_count); ++ ++ spin_unlock(&ioctl_handler_lock); ++ ++ if ((_IOC_NR(request) & RTNET_IOC_NODEV_PARAM) == 0) { ++ rtdev = rtdev_get_by_name(head.if_name); ++ if (!rtdev) { ++ atomic_dec(&ioctls->ref_count); ++ return -ENODEV; ++ } ++ } ++ ++ ret = ioctls->handler(rtdev, request, arg); ++ ++ if (rtdev) ++ rtdev_dereference(rtdev); ++ atomic_dec(&ioctls->ref_count); ++ ++ return ret; ++ } ++ } ++ ++ spin_unlock(&ioctl_handler_lock); ++ ++ return -ENOTTY; ++} ++ ++static int rtnet_core_ioctl(struct rtnet_device *rtdev, unsigned int request, ++ unsigned long arg) ++{ ++ struct rtnet_core_cmd cmd; ++ int ret; ++ ++ ret = copy_from_user(&cmd, (void *)arg, sizeof(cmd)); ++ if (ret != 0) ++ return -EFAULT; ++ ++ switch (request) { ++ case IOC_RT_IFUP: ++ ret = rtdev_up(rtdev, &cmd); ++ break; ++ ++ case IOC_RT_IFDOWN: ++ ret = rtdev_down(rtdev); ++ break; ++ ++ case IOC_RT_IFINFO: ++ if (cmd.args.info.ifindex > 0) ++ rtdev = rtdev_get_by_index(cmd.args.info.ifindex); ++ else ++ rtdev = rtdev_get_by_name(cmd.head.if_name); ++ if (rtdev == NULL) ++ return -ENODEV; ++ ++ if (mutex_lock_interruptible(&rtdev->nrt_lock)) { ++ rtdev_dereference(rtdev); ++ return -ERESTARTSYS; ++ } ++ ++ memcpy(cmd.head.if_name, rtdev->name, IFNAMSIZ); ++ cmd.args.info.ifindex = rtdev->ifindex; ++ cmd.args.info.type = rtdev->type; ++ cmd.args.info.ip_addr = rtdev->local_ip; ++ cmd.args.info.broadcast_ip = rtdev->broadcast_ip; ++ cmd.args.info.mtu = rtdev->mtu; ++ cmd.args.info.flags = rtdev->flags; ++ if ((cmd.args.info.flags & IFF_UP) && ++ (rtdev->link_state & ++ (RTNET_LINK_STATE_PRESENT | RTNET_LINK_STATE_NOCARRIER)) == ++ RTNET_LINK_STATE_PRESENT) ++ cmd.args.info.flags |= IFF_RUNNING; ++ ++ memcpy(cmd.args.info.dev_addr, rtdev->dev_addr, MAX_ADDR_LEN); ++ ++ mutex_unlock(&rtdev->nrt_lock); ++ ++ rtdev_dereference(rtdev); ++ ++ if (copy_to_user((void *)arg, &cmd, sizeof(cmd)) != 0) ++ return -EFAULT; ++ break; ++ ++ default: ++ ret = -ENOTTY; ++ } ++ ++ return ret; ++} ++ ++int rtnet_register_ioctls(struct rtnet_ioctls *ioctls) ++{ ++ struct list_head *entry; ++ struct rtnet_ioctls *registered_ioctls; ++ ++ RTNET_ASSERT(ioctls->handler != NULL, return -EINVAL;); ++ ++ spin_lock(&ioctl_handler_lock); ++ ++ list_for_each (entry, &ioctl_handlers) { ++ registered_ioctls = ++ list_entry(entry, struct rtnet_ioctls, entry); ++ if (registered_ioctls->ioctl_type == ioctls->ioctl_type) { ++ spin_unlock(&ioctl_handler_lock); ++ return -EEXIST; ++ } ++ } ++ ++ list_add_tail(&ioctls->entry, &ioctl_handlers); ++ atomic_set(&ioctls->ref_count, 0); ++ ++ spin_unlock(&ioctl_handler_lock); ++ ++ return 0; ++} ++ ++void rtnet_unregister_ioctls(struct rtnet_ioctls *ioctls) ++{ ++ spin_lock(&ioctl_handler_lock); ++ ++ while (atomic_read(&ioctls->ref_count) != 0) { ++ spin_unlock(&ioctl_handler_lock); ++ ++ set_current_state(TASK_UNINTERRUPTIBLE); ++ schedule_timeout(1 * HZ); /* wait a second */ ++ ++ spin_lock(&ioctl_handler_lock); ++ } ++ ++ list_del(&ioctls->entry); ++ ++ spin_unlock(&ioctl_handler_lock); ++} ++ ++static struct file_operations rtnet_fops = { ++ .owner = THIS_MODULE, ++ .unlocked_ioctl = rtnet_ioctl, ++}; ++ ++static struct miscdevice rtnet_chr_misc_dev = { ++ .minor = RTNET_MINOR, ++ .name = "rtnet", ++ .fops = &rtnet_fops, ++}; ++ ++static struct rtnet_ioctls core_ioctls = { .service_name = "RTnet Core", ++ .ioctl_type = RTNET_IOC_TYPE_CORE, ++ .handler = rtnet_core_ioctl }; ++ ++/** ++ * rtnet_chrdev_init - ++ * ++ */ ++int __init rtnet_chrdev_init(void) ++{ ++ int err; ++ ++ err = misc_register(&rtnet_chr_misc_dev); ++ if (err) { ++ printk("RTnet: unable to register rtnet management device/class " ++ "(error %d)\n", ++ err); ++ return err; ++ } ++ ++ rtnet_register_ioctls(&core_ioctls); ++ return 0; ++} ++ ++/** ++ * rtnet_chrdev_release - ++ * ++ */ ++void rtnet_chrdev_release(void) ++{ ++ misc_deregister(&rtnet_chr_misc_dev); ++} ++ ++EXPORT_SYMBOL_GPL(rtnet_register_ioctls); ++EXPORT_SYMBOL_GPL(rtnet_unregister_ioctls); +--- linux/drivers/xenomai/net/stack/rtwlan.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/rtwlan.c 2021-04-07 16:01:26.581635089 +0800 +@@ -0,0 +1,219 @@ ++/* rtwlan.c ++ * ++ * rtwlan protocol stack ++ * Copyright (c) 2006, Daniel Gregorek ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#include ++ ++#include ++ ++#include ++ ++int rtwlan_rx(struct rtskb *rtskb, struct rtnet_device *rtnet_dev) ++{ ++ struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rtskb->data; ++ u16 fc = le16_to_cpu(hdr->frame_ctl); ++ ++ /* strip rtwlan header */ ++ rtskb_pull(rtskb, ieee80211_get_hdrlen(fc)); ++ rtskb->protocol = rt_eth_type_trans(rtskb, rtnet_dev); ++ ++ /* forward rtskb to rtnet */ ++ rtnetif_rx(rtskb); ++ ++ return 0; ++} ++ ++EXPORT_SYMBOL_GPL(rtwlan_rx); ++ ++int rtwlan_tx(struct rtskb *rtskb, struct rtnet_device *rtnet_dev) ++{ ++ struct rtwlan_device *rtwlan_dev = rtnetdev_priv(rtnet_dev); ++ struct ieee80211_hdr_3addr header = { /* Ensure zero initialized */ ++ .duration_id = 0, ++ .seq_ctl = 0 ++ }; ++ int ret; ++ u8 dest[ETH_ALEN], src[ETH_ALEN]; ++ ++ /* Get source and destination addresses */ ++ ++ memcpy(src, rtskb->data + ETH_ALEN, ETH_ALEN); ++ ++ if (rtwlan_dev->mode == RTWLAN_TXMODE_MCAST) { ++ memcpy(dest, rtnet_dev->dev_addr, ETH_ALEN); ++ dest[0] |= 0x01; ++ } else { ++ memcpy(dest, rtskb->data, ETH_ALEN); ++ } ++ ++ /* ++ * Generate ieee80211 compatible header ++ */ ++ memcpy(header.addr3, src, ETH_ALEN); /* BSSID */ ++ memcpy(header.addr2, src, ETH_ALEN); /* SA */ ++ memcpy(header.addr1, dest, ETH_ALEN); /* DA */ ++ ++ /* Write frame control field */ ++ header.frame_ctl = ++ cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA); ++ ++ memcpy(rtskb_push(rtskb, IEEE80211_3ADDR_LEN), &header, ++ IEEE80211_3ADDR_LEN); ++ ++ ret = (*rtwlan_dev->hard_start_xmit)(rtskb, rtnet_dev); ++ ++ return ret; ++} ++ ++EXPORT_SYMBOL_GPL(rtwlan_tx); ++ ++/** ++ * rtalloc_wlandev - Allocates and sets up a wlan device ++ * @sizeof_priv: size of additional driver-private structure to ++ * be allocated for this wlan device ++ * ++ * Fill in the fields of the device structure with wlan-generic ++ * values. Basically does everything except registering the device. ++ * ++ * A 32-byte alignment is enforced for the private data area. ++ */ ++ ++struct rtnet_device *rtwlan_alloc_dev(unsigned sizeof_priv, ++ unsigned dev_pool_size) ++{ ++ struct rtnet_device *rtnet_dev; ++ ++ RTWLAN_DEBUG("Start.\n"); ++ ++ rtnet_dev = rt_alloc_etherdev( ++ sizeof(struct rtwlan_device) + sizeof_priv, dev_pool_size); ++ if (!rtnet_dev) ++ return NULL; ++ ++ rtnet_dev->hard_start_xmit = rtwlan_tx; ++ ++ rtdev_alloc_name(rtnet_dev, "rtwlan%d"); ++ ++ return rtnet_dev; ++} ++ ++EXPORT_SYMBOL_GPL(rtwlan_alloc_dev); ++ ++int rtwlan_ioctl(struct rtnet_device *rtdev, unsigned int request, ++ unsigned long arg) ++{ ++ struct rtwlan_cmd cmd; ++ struct ifreq ifr; ++ int ret = 0; ++ ++ if (copy_from_user(&cmd, (void *)arg, sizeof(cmd)) != 0) ++ return -EFAULT; ++ ++ /* ++ * FIXME: proper do_ioctl() should expect a __user pointer ++ * arg. This only works with the existing WLAN support because the ++ * only driver currently providing this feature is broken, not ++ * doing the copy_to/from_user dance. ++ */ ++ memset(&ifr, 0, sizeof(ifr)); ++ ifr.ifr_data = &cmd; ++ ++ switch (request) { ++ case IOC_RTWLAN_IFINFO: ++ if (cmd.args.info.ifindex > 0) ++ rtdev = rtdev_get_by_index(cmd.args.info.ifindex); ++ else ++ rtdev = rtdev_get_by_name(cmd.head.if_name); ++ if (rtdev == NULL) ++ return -ENODEV; ++ ++ if (mutex_lock_interruptible(&rtdev->nrt_lock)) { ++ rtdev_dereference(rtdev); ++ return -ERESTARTSYS; ++ } ++ ++ if (rtdev->do_ioctl) ++ ret = rtdev->do_ioctl(rtdev, &ifr, request); ++ else ++ ret = -ENORTWLANDEV; ++ ++ memcpy(cmd.head.if_name, rtdev->name, IFNAMSIZ); ++ cmd.args.info.ifindex = rtdev->ifindex; ++ cmd.args.info.flags = rtdev->flags; ++ ++ mutex_unlock(&rtdev->nrt_lock); ++ ++ rtdev_dereference(rtdev); ++ ++ break; ++ ++ case IOC_RTWLAN_TXMODE: ++ case IOC_RTWLAN_BITRATE: ++ case IOC_RTWLAN_CHANNEL: ++ case IOC_RTWLAN_RETRY: ++ case IOC_RTWLAN_TXPOWER: ++ case IOC_RTWLAN_AUTORESP: ++ case IOC_RTWLAN_DROPBCAST: ++ case IOC_RTWLAN_DROPMCAST: ++ case IOC_RTWLAN_REGREAD: ++ case IOC_RTWLAN_REGWRITE: ++ case IOC_RTWLAN_BBPWRITE: ++ case IOC_RTWLAN_BBPREAD: ++ case IOC_RTWLAN_BBPSENS: ++ if (mutex_lock_interruptible(&rtdev->nrt_lock)) ++ return -ERESTARTSYS; ++ ++ if (rtdev->do_ioctl) ++ ret = rtdev->do_ioctl(rtdev, &ifr, request); ++ else ++ ret = -ENORTWLANDEV; ++ ++ mutex_unlock(&rtdev->nrt_lock); ++ ++ break; ++ ++ default: ++ ret = -ENOTTY; ++ } ++ ++ if (copy_to_user((void *)arg, &cmd, sizeof(cmd)) != 0) ++ return -EFAULT; ++ ++ return ret; ++} ++ ++struct rtnet_ioctls rtnet_wlan_ioctls = { ++ service_name: "rtwlan ioctl", ++ ioctl_type: RTNET_IOC_TYPE_RTWLAN, ++ handler: rtwlan_ioctl ++}; ++ ++int __init rtwlan_init(void) ++{ ++ if (rtnet_register_ioctls(&rtnet_wlan_ioctls)) ++ rtdm_printk(KERN_ERR "Failed to register rtnet_wlan_ioctl!\n"); ++ ++ return 0; ++} ++ ++void rtwlan_exit(void) ++{ ++ rtnet_unregister_ioctls(&rtnet_wlan_ioctls); ++} +--- linux/drivers/xenomai/net/stack/iovec.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/iovec.c 2021-04-07 16:01:26.576635096 +0800 +@@ -0,0 +1,103 @@ ++/*** ++ * ++ * stack/iovec.c ++ * ++ * RTnet - real-time networking subsystem ++ * Copyright (C) 1999,2000 Zentropic Computing, LLC ++ * 2002 Ulrich Marx ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++ssize_t rtnet_write_to_iov(struct rtdm_fd *fd, struct iovec *iov, int iovlen, ++ const void *data, size_t len) ++{ ++ ssize_t ret = 0; ++ size_t nbytes; ++ int n; ++ ++ for (n = 0; len > 0 && n < iovlen; n++, iov++) { ++ if (iov->iov_len == 0) ++ continue; ++ ++ nbytes = iov->iov_len; ++ if (nbytes > len) ++ nbytes = len; ++ ++ ret = rtnet_put_arg(fd, iov->iov_base, data, nbytes); ++ if (ret) ++ break; ++ ++ len -= nbytes; ++ data += nbytes; ++ iov->iov_len -= nbytes; ++ iov->iov_base += nbytes; ++ ret += nbytes; ++ if (ret < 0) { ++ ret = -EINVAL; ++ break; ++ } ++ } ++ ++ return ret; ++} ++EXPORT_SYMBOL_GPL(rtnet_write_to_iov); ++ ++ssize_t rtnet_read_from_iov(struct rtdm_fd *fd, struct iovec *iov, int iovlen, ++ void *data, size_t len) ++{ ++ ssize_t ret = 0; ++ size_t nbytes; ++ int n; ++ ++ for (n = 0; len > 0 && n < iovlen; n++, iov++) { ++ if (iov->iov_len == 0) ++ continue; ++ ++ nbytes = iov->iov_len; ++ if (nbytes > len) ++ nbytes = len; ++ ++ if (!rtdm_fd_is_user(fd)) ++ memcpy(data, iov->iov_base, nbytes); ++ else { ++ ret = rtdm_copy_from_user(fd, data, iov->iov_base, ++ nbytes); ++ if (ret) ++ break; ++ } ++ ++ len -= nbytes; ++ data += nbytes; ++ iov->iov_len -= nbytes; ++ iov->iov_base += nbytes; ++ ret += nbytes; ++ if (ret < 0) { ++ ret = -EINVAL; ++ break; ++ } ++ } ++ ++ return ret; ++} ++EXPORT_SYMBOL_GPL(rtnet_read_from_iov); +--- linux/drivers/xenomai/net/stack/corectl.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/stack/corectl.c 2021-04-07 16:01:26.572635102 +0800 +@@ -0,0 +1,77 @@ ++/* ++ * Copyright (C) 2016 Gilles Chanteperdrix . ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#include ++#include ++ ++static int rtnet_corectl_call(struct notifier_block *self, unsigned long arg, ++ void *cookie) ++{ ++ struct cobalt_config_vector *vec = cookie; ++ int ret = 0; ++ ++ if (arg != _CC_COBALT_GET_NET_CONFIG) ++ return NOTIFY_DONE; ++ ++ if (vec->u_bufsz < sizeof(ret)) ++ return notifier_from_errno(-EINVAL); ++ ++ if (IS_ENABLED(CONFIG_XENO_DRIVERS_NET)) ++ ret |= _CC_COBALT_NET; ++ if (IS_ENABLED(CONFIG_XENO_DRIVERS_NET_ETH_P_ALL)) ++ ret |= _CC_COBALT_NET_ETH_P_ALL; ++ if (IS_ENABLED(CONFIG_XENO_DRIVERS_NET_RTIPV4)) ++ ret |= _CC_COBALT_NET_IPV4; ++ if (IS_ENABLED(CONFIG_XENO_DRIVERS_NET_RTIPV4_ICMP)) ++ ret |= _CC_COBALT_NET_ICMP; ++ if (IS_ENABLED(CONFIG_XENO_DRIVERS_NET_RTIPV4_NETROUTING)) ++ ret |= _CC_COBALT_NET_NETROUTING; ++ if (IS_ENABLED(CONFIG_XENO_DRIVERS_NET_RTIPV4_ROUTE)) ++ ret |= _CC_COBALT_NET_ROUTER; ++ if (IS_ENABLED(CONFIG_XENO_DRIVERS_NET_RTIPV4_UDP)) ++ ret |= _CC_COBALT_NET_UDP; ++ if (IS_ENABLED(CONFIG_XENO_DRIVERS_NET_RTPACKET)) ++ ret |= _CC_COBALT_NET_AF_PACKET; ++ if (IS_ENABLED(CONFIG_XENO_DRIVERS_NET_TDMA)) ++ ret |= _CC_COBALT_NET_TDMA; ++ if (IS_ENABLED(CONFIG_XENO_DRIVERS_NET_NOMAC)) ++ ret |= _CC_COBALT_NET_NOMAC; ++ if (IS_ENABLED(CONFIG_XENO_DRIVERS_NET_RTCFG)) ++ ret |= _CC_COBALT_NET_CFG; ++ if (IS_ENABLED(CONFIG_XENO_DRIVERS_NET_ADDON_RTCAP)) ++ ret |= _CC_COBALT_NET_CAP; ++ if (IS_ENABLED(CONFIG_XENO_DRIVERS_NET_ADDON_PROXY)) ++ ret |= _CC_COBALT_NET_PROXY; ++ ++ ret = cobalt_copy_to_user(vec->u_buf, &ret, sizeof(ret)); ++ ++ return ret ? notifier_from_errno(-EFAULT) : NOTIFY_STOP; ++} ++ ++static struct notifier_block rtnet_corectl_notifier = { ++ .notifier_call = rtnet_corectl_call, ++}; ++ ++void rtnet_corectl_register(void) ++{ ++ cobalt_add_config_chain(&rtnet_corectl_notifier); ++} ++ ++void rtnet_corectl_unregister(void) ++{ ++ cobalt_remove_config_chain(&rtnet_corectl_notifier); ++} +--- linux/drivers/xenomai/net/Makefile 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/Makefile 2021-04-07 16:01:26.567635109 +0800 +@@ -0,0 +1 @@ ++obj-$(CONFIG_XENO_DRIVERS_NET) += stack/ drivers/ addons/ +--- linux/drivers/xenomai/net/Kconfig 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/net/Kconfig 2021-04-07 16:01:26.562635116 +0800 +@@ -0,0 +1,25 @@ ++menu "RTnet" ++ ++config XENO_DRIVERS_NET ++ depends on m ++ select NET ++ tristate "RTnet, TCP/IP socket interface" ++ ++if XENO_DRIVERS_NET ++ ++config XENO_DRIVERS_RTNET_CHECKED ++ bool "Internal Bug Checks" ++ default n ++ ---help--- ++ Switch on if you face crashes when RTnet is running or if you suspect ++ any other RTnet-related issues. This feature will add a few sanity ++ checks at critical points that will produce warnings on the kernel ++ console in case certain internal bugs are detected. ++ ++source "drivers/xenomai/net/stack/Kconfig" ++source "drivers/xenomai/net/drivers/Kconfig" ++source "drivers/xenomai/net/addons/Kconfig" ++ ++endif ++ ++endmenu +--- linux/drivers/xenomai/Kconfig 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/Kconfig 2021-04-07 16:01:26.552635131 +0800 +@@ -0,0 +1,35 @@ ++menu "Drivers" ++ ++config XENO_OPT_RTDM_COMPAT_DEVNODE ++ bool "Enable legacy pathnames for named RTDM devices" ++ default y ++ help ++ This compatibility option allows applications to open named ++ RTDM devices using the legacy naming scheme, i.e. ++ ++ fd = open("devname", ...); ++ or ++ fd = open("/dev/devname", ...); ++ ++ When such a request is received by RTDM, a warning message is ++ issued to the kernel log whenever XENO_OPT_DEBUG_LEGACY is ++ also enabled in the kernel configuration. ++ ++ Applications should open named devices via their actual device ++ nodes instead, i.e. ++ ++ fd = open("/dev/rtdm/devname", ...); ++ ++source "drivers/xenomai/autotune/Kconfig" ++source "drivers/xenomai/serial/Kconfig" ++source "drivers/xenomai/testing/Kconfig" ++source "drivers/xenomai/can/Kconfig" ++source "drivers/xenomai/net/Kconfig" ++source "drivers/xenomai/analogy/Kconfig" ++source "drivers/xenomai/ipc/Kconfig" ++source "drivers/xenomai/udd/Kconfig" ++source "drivers/xenomai/gpio/Kconfig" ++source "drivers/xenomai/gpiopwm/Kconfig" ++source "drivers/xenomai/spi/Kconfig" ++ ++endmenu +--- linux/drivers/xenomai/udd/Makefile 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/udd/Makefile 2021-04-07 16:01:26.547635138 +0800 +@@ -0,0 +1,5 @@ ++ccflags-y += -Ikernel ++ ++obj-$(CONFIG_XENO_DRIVERS_UDD) += xeno_udd.o ++ ++xeno_udd-y := udd.o +--- linux/drivers/xenomai/udd/Kconfig 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/udd/Kconfig 2021-04-07 16:01:26.543635144 +0800 +@@ -0,0 +1,10 @@ ++menu "UDD support" ++ ++config XENO_DRIVERS_UDD ++ tristate "User-space device driver framework" ++ help ++ ++ A RTDM-based driver for enabling interrupt control and I/O ++ memory access interfaces to user-space device drivers. ++ ++endmenu +--- linux/drivers/xenomai/udd/udd.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/udd/udd.c 2021-04-07 16:01:26.538635151 +0800 +@@ -0,0 +1,658 @@ ++/* ++ * This file is part of the Xenomai project. ++ * ++ * Copyright (C) 2014 Philippe Gerum ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++struct udd_context { ++ u32 event_count; ++}; ++ ++static int udd_open(struct rtdm_fd *fd, int oflags) ++{ ++ struct udd_context *context; ++ struct udd_device *udd; ++ int ret; ++ ++ udd = container_of(rtdm_fd_device(fd), struct udd_device, __reserved.device); ++ if (udd->ops.open) { ++ ret = udd->ops.open(fd, oflags); ++ if (ret) ++ return ret; ++ } ++ ++ context = rtdm_fd_to_private(fd); ++ context->event_count = 0; ++ ++ return 0; ++} ++ ++static void udd_close(struct rtdm_fd *fd) ++{ ++ struct udd_device *udd; ++ ++ udd = container_of(rtdm_fd_device(fd), struct udd_device, __reserved.device); ++ if (udd->ops.close) ++ udd->ops.close(fd); ++} ++ ++static int udd_ioctl_rt(struct rtdm_fd *fd, ++ unsigned int request, void __user *arg) ++{ ++ struct udd_signotify signfy; ++ struct udd_reserved *ur; ++ struct udd_device *udd; ++ rtdm_event_t done; ++ int ret; ++ ++ udd = container_of(rtdm_fd_device(fd), struct udd_device, __reserved.device); ++ if (udd->ops.ioctl) { ++ ret = udd->ops.ioctl(fd, request, arg); ++ if (ret != -ENOSYS) ++ return ret; ++ } ++ ++ ur = &udd->__reserved; ++ ++ switch (request) { ++ case UDD_RTIOC_IRQSIG: ++ ret = rtdm_safe_copy_from_user(fd, &signfy, arg, sizeof(signfy)); ++ if (ret) ++ return ret; ++ /* Early check, we'll redo at each signal issue. */ ++ if (signfy.pid <= 0) ++ ur->signfy.pid = -1; ++ else { ++ if (signfy.sig < SIGRTMIN || signfy.sig > SIGRTMAX) ++ return -EINVAL; ++ if (cobalt_thread_find_local(signfy.pid) == NULL) ++ return -EINVAL; ++ ur->signfy = signfy; ++ } ++ break; ++ case UDD_RTIOC_IRQEN: ++ case UDD_RTIOC_IRQDIS: ++ if (udd->irq == UDD_IRQ_NONE || udd->irq == UDD_IRQ_CUSTOM) ++ return -EIO; ++ rtdm_event_init(&done, 0); ++ if (request == UDD_RTIOC_IRQEN) ++ udd_enable_irq(udd, &done); ++ else ++ udd_disable_irq(udd, &done); ++ ret = rtdm_event_wait(&done); ++ if (ret != -EIDRM) ++ rtdm_event_destroy(&done); ++ break; ++ default: ++ ret = -EINVAL; ++ } ++ ++ return ret; ++} ++ ++static ssize_t udd_read_rt(struct rtdm_fd *fd, ++ void __user *buf, size_t len) ++{ ++ struct udd_context *context; ++ struct udd_reserved *ur; ++ struct udd_device *udd; ++ rtdm_lockctx_t ctx; ++ ssize_t ret = 0; ++ u32 count; ++ ++ if (len != sizeof(count)) ++ return -EINVAL; ++ ++ udd = container_of(rtdm_fd_device(fd), struct udd_device, __reserved.device); ++ if (udd->irq == UDD_IRQ_NONE) ++ return -EIO; ++ ++ ur = &udd->__reserved; ++ context = rtdm_fd_to_private(fd); ++ ++ cobalt_atomic_enter(ctx); ++ ++ if (ur->event_count != context->event_count) ++ rtdm_event_clear(&ur->pulse); ++ else ++ ret = rtdm_event_wait(&ur->pulse); ++ ++ count = ur->event_count; ++ ++ cobalt_atomic_leave(ctx); ++ ++ if (ret) ++ return ret; ++ ++ context->event_count = count; ++ ret = rtdm_copy_to_user(fd, buf, &count, sizeof(count)); ++ ++ return ret ?: sizeof(count); ++} ++ ++static ssize_t udd_write_rt(struct rtdm_fd *fd, ++ const void __user *buf, size_t len) ++{ ++ int ret; ++ u32 val; ++ ++ if (len != sizeof(val)) ++ return -EINVAL; ++ ++ ret = rtdm_safe_copy_from_user(fd, &val, buf, sizeof(val)); ++ if (ret) ++ return ret; ++ ++ ret = udd_ioctl_rt(fd, val ? UDD_RTIOC_IRQEN : UDD_RTIOC_IRQDIS, NULL); ++ ++ return ret ?: len; ++} ++ ++static int udd_select(struct rtdm_fd *fd, struct xnselector *selector, ++ unsigned int type, unsigned int index) ++{ ++ struct udd_device *udd; ++ ++ udd = container_of(rtdm_fd_device(fd), struct udd_device, __reserved.device); ++ if (udd->irq == UDD_IRQ_NONE) ++ return -EIO; ++ ++ return rtdm_event_select(&udd->__reserved.pulse, ++ selector, type, index); ++} ++ ++static int udd_irq_handler(rtdm_irq_t *irqh) ++{ ++ struct udd_device *udd; ++ int ret; ++ ++ udd = rtdm_irq_get_arg(irqh, struct udd_device); ++ ret = udd->ops.interrupt(udd); ++ if (ret == RTDM_IRQ_HANDLED) ++ udd_notify_event(udd); ++ ++ return ret; ++} ++ ++static int mapper_open(struct rtdm_fd *fd, int oflags) ++{ ++ int minor = rtdm_fd_minor(fd); ++ struct udd_device *udd; ++ ++ /* ++ * Check that we are opening a mapper instance pointing at a ++ * valid memory region. e.g. UDD creates the companion device ++ * "foo,mapper" on the fly when registering the main device ++ * "foo". Userland may then open("/dev/foo,mapper0", ...) ++ * followed by a call to mmap() for mapping the memory region ++ * #0 as declared in the mem_regions[] array of the main ++ * device. ++ * ++ * We support sparse region arrays, so the device minor shall ++ * match the mem_regions[] index exactly. ++ */ ++ if (minor < 0 || minor >= UDD_NR_MAPS) ++ return -EIO; ++ ++ udd = udd_get_device(fd); ++ if (udd->mem_regions[minor].type == UDD_MEM_NONE) ++ return -EIO; ++ ++ return 0; ++} ++ ++static void mapper_close(struct rtdm_fd *fd) ++{ ++ /* nop */ ++} ++ ++static int mapper_mmap(struct rtdm_fd *fd, struct vm_area_struct *vma) ++{ ++ struct udd_memregion *rn; ++ struct udd_device *udd; ++ size_t len; ++ int ret; ++ ++ udd = udd_get_device(fd); ++ if (udd->ops.mmap) ++ /* Offload to client driver if handler is present. */ ++ return udd->ops.mmap(fd, vma); ++ ++ /* Otherwise DIY using the RTDM helpers. */ ++ ++ len = vma->vm_end - vma->vm_start; ++ rn = udd->mem_regions + rtdm_fd_minor(fd); ++ if (rn->len < len) ++ /* Can't map that much, bail out. */ ++ return -EINVAL; ++ ++ switch (rn->type) { ++ case UDD_MEM_PHYS: ++ ret = rtdm_mmap_iomem(vma, rn->addr); ++ break; ++ case UDD_MEM_LOGICAL: ++ ret = rtdm_mmap_kmem(vma, (void *)rn->addr); ++ break; ++ case UDD_MEM_VIRTUAL: ++ ret = rtdm_mmap_vmem(vma, (void *)rn->addr); ++ break; ++ default: ++ ret = -EINVAL; /* Paranoid, can't happen. */ ++ } ++ ++ return ret; ++} ++ ++static inline int check_memregion(struct udd_device *udd, ++ struct udd_memregion *rn) ++{ ++ if (rn->name == NULL) ++ return -EINVAL; ++ ++ if (rn->addr == 0) ++ return -EINVAL; ++ ++ if (rn->len == 0) ++ return -EINVAL; ++ ++ return 0; ++} ++ ++static inline int register_mapper(struct udd_device *udd) ++{ ++ struct udd_reserved *ur = &udd->__reserved; ++ struct rtdm_driver *drv = &ur->mapper_driver; ++ struct udd_mapper *mapper; ++ struct udd_memregion *rn; ++ int n, ret; ++ ++ ur->mapper_name = kasformat("%s,mapper%%d", udd->device_name); ++ if (ur->mapper_name == NULL) ++ return -ENOMEM; ++ ++ drv->profile_info = (struct rtdm_profile_info) ++ RTDM_PROFILE_INFO(mapper, RTDM_CLASS_MEMORY, ++ RTDM_SUBCLASS_GENERIC, 0); ++ drv->device_flags = RTDM_NAMED_DEVICE|RTDM_FIXED_MINOR; ++ drv->device_count = UDD_NR_MAPS; ++ drv->base_minor = 0; ++ drv->ops = (struct rtdm_fd_ops){ ++ .open = mapper_open, ++ .close = mapper_close, ++ .mmap = mapper_mmap, ++ }; ++ ++ for (n = 0, mapper = ur->mapdev; n < UDD_NR_MAPS; n++, mapper++) { ++ rn = udd->mem_regions + n; ++ if (rn->type == UDD_MEM_NONE) ++ continue; ++ mapper->dev.driver = drv; ++ mapper->dev.label = ur->mapper_name; ++ mapper->dev.minor = n; ++ mapper->udd = udd; ++ ret = rtdm_dev_register(&mapper->dev); ++ if (ret) ++ goto undo; ++ } ++ ++ return 0; ++undo: ++ while (--n >= 0) ++ rtdm_dev_unregister(&ur->mapdev[n].dev); ++ ++ return ret; ++} ++ ++/** ++ * @brief Register a UDD device ++ * ++ * This routine registers a mini-driver at the UDD core. ++ * ++ * @param udd @ref udd_device "UDD device descriptor" which should ++ * describe the new device properties. ++ * ++ * @return Zero is returned upon success, otherwise a negative error ++ * code is received, from the set of error codes defined by ++ * rtdm_dev_register(). In addition, the following error codes can be ++ * returned: ++ * ++ * - -EINVAL, some of the memory regions declared in the ++ * udd_device.mem_regions[] array have invalid properties, i.e. bad ++ * type, NULL name, zero length or address. Any undeclared region ++ * entry from the array must bear the UDD_MEM_NONE type. ++ * ++ * - -EINVAL, if udd_device.irq is different from UDD_IRQ_CUSTOM and ++ * UDD_IRQ_NONE but invalid, causing rtdm_irq_request() to fail. ++ * ++ * - -EINVAL, if udd_device.device_flags contains invalid flags. ++ * ++ * - -ENOSYS, if this service is called while the real-time core is disabled. ++ * ++ * @coretags{secondary-only} ++ */ ++int udd_register_device(struct udd_device *udd) ++{ ++ struct rtdm_device *dev = &udd->__reserved.device; ++ struct udd_reserved *ur = &udd->__reserved; ++ struct rtdm_driver *drv = &ur->driver; ++ struct udd_memregion *rn; ++ int ret, n; ++ ++ if (udd->device_flags & RTDM_PROTOCOL_DEVICE) ++ return -EINVAL; ++ ++ if (udd->irq != UDD_IRQ_NONE && udd->irq != UDD_IRQ_CUSTOM && ++ udd->ops.interrupt == NULL) ++ return -EINVAL; ++ ++ for (n = 0, ur->nr_maps = 0; n < UDD_NR_MAPS; n++) { ++ /* We allow sparse region arrays. */ ++ rn = udd->mem_regions + n; ++ if (rn->type == UDD_MEM_NONE) ++ continue; ++ ret = check_memregion(udd, rn); ++ if (ret) ++ return ret; ++ udd->__reserved.nr_maps++; ++ } ++ ++ drv->profile_info = (struct rtdm_profile_info) ++ RTDM_PROFILE_INFO(udd->device_name, RTDM_CLASS_UDD, ++ udd->device_subclass, 0); ++ drv->device_flags = RTDM_NAMED_DEVICE|udd->device_flags; ++ drv->device_count = 1; ++ drv->context_size = sizeof(struct udd_context); ++ drv->ops = (struct rtdm_fd_ops){ ++ .open = udd_open, ++ .ioctl_rt = udd_ioctl_rt, ++ .read_rt = udd_read_rt, ++ .write_rt = udd_write_rt, ++ .close = udd_close, ++ .select = udd_select, ++ }; ++ ++ dev->driver = drv; ++ dev->label = udd->device_name; ++ ++ ret = rtdm_dev_register(dev); ++ if (ret) ++ return ret; ++ ++ if (ur->nr_maps > 0) { ++ ret = register_mapper(udd); ++ if (ret) ++ goto fail_mapper; ++ } else ++ ur->mapper_name = NULL; ++ ++ ur->event_count = 0; ++ rtdm_event_init(&ur->pulse, 0); ++ ur->signfy.pid = -1; ++ ++ if (udd->irq != UDD_IRQ_NONE && udd->irq != UDD_IRQ_CUSTOM) { ++ ret = rtdm_irq_request(&ur->irqh, udd->irq, ++ udd_irq_handler, 0, ++ dev->name, udd); ++ if (ret) ++ goto fail_irq_request; ++ } ++ ++ return 0; ++ ++fail_irq_request: ++ for (n = 0; n < UDD_NR_MAPS; n++) { ++ rn = udd->mem_regions + n; ++ if (rn->type != UDD_MEM_NONE) ++ rtdm_dev_unregister(&ur->mapdev[n].dev); ++ } ++fail_mapper: ++ rtdm_dev_unregister(dev); ++ if (ur->mapper_name) ++ kfree(ur->mapper_name); ++ ++ return ret; ++} ++EXPORT_SYMBOL_GPL(udd_register_device); ++ ++/** ++ * @brief Unregister a UDD device ++ * ++ * This routine unregisters a mini-driver from the UDD core. This ++ * routine waits until all connections to @a udd have been closed ++ * prior to unregistering. ++ * ++ * @param udd UDD device descriptor ++ * ++ * @return Zero is returned upon success, otherwise -ENXIO is received ++ * if this service is called while the Cobalt kernel is disabled. ++ * ++ * @coretags{secondary-only} ++ */ ++int udd_unregister_device(struct udd_device *udd) ++{ ++ struct udd_reserved *ur = &udd->__reserved; ++ struct udd_memregion *rn; ++ int n; ++ ++ rtdm_event_destroy(&ur->pulse); ++ ++ if (udd->irq != UDD_IRQ_NONE && udd->irq != UDD_IRQ_CUSTOM) ++ rtdm_irq_free(&ur->irqh); ++ ++ for (n = 0; n < UDD_NR_MAPS; n++) { ++ rn = udd->mem_regions + n; ++ if (rn->type != UDD_MEM_NONE) ++ rtdm_dev_unregister(&ur->mapdev[n].dev); ++ } ++ ++ if (ur->mapper_name) ++ kfree(ur->mapper_name); ++ ++ rtdm_dev_unregister(&ur->device); ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(udd_unregister_device); ++ ++/** ++ * @brief Notify an IRQ event for an unmanaged interrupt ++ * ++ * When the UDD core shall hand over the interrupt management for a ++ * device to the mini-driver (see UDD_IRQ_CUSTOM), the latter should ++ * notify the UDD core when IRQ events are received by calling this ++ * service. ++ * ++ * As a result, the UDD core wakes up any Cobalt thread waiting for ++ * interrupts on the device via a read(2) or select(2) call. ++ * ++ * @param udd UDD device descriptor receiving the IRQ. ++ * ++ * @coretags{coreirq-only} ++ * ++ * @note In case the @ref udd_irq_handler "IRQ handler" from the ++ * mini-driver requested the UDD core not to re-enable the interrupt ++ * line, the application may later request the unmasking by issuing ++ * the UDD_RTIOC_IRQEN ioctl(2) command. Writing a non-zero integer to ++ * the device via the write(2) system call has the same effect. ++ */ ++void udd_notify_event(struct udd_device *udd) ++{ ++ struct udd_reserved *ur = &udd->__reserved; ++ union sigval sival; ++ rtdm_lockctx_t ctx; ++ ++ cobalt_atomic_enter(ctx); ++ ur->event_count++; ++ rtdm_event_signal(&ur->pulse); ++ cobalt_atomic_leave(ctx); ++ ++ if (ur->signfy.pid > 0) { ++ sival.sival_int = (int)ur->event_count; ++ __cobalt_sigqueue(ur->signfy.pid, ur->signfy.sig, &sival); ++ } ++} ++EXPORT_SYMBOL_GPL(udd_notify_event); ++ ++struct irqswitch_work { ++ struct ipipe_work_header work; /* Must be first. */ ++ rtdm_irq_t *irqh; ++ int enabled; ++ rtdm_event_t *done; ++}; ++ ++static void lostage_irqswitch_line(struct ipipe_work_header *work) ++{ ++ struct irqswitch_work *rq; ++ ++ /* ++ * This runs from secondary mode, we may flip the IRQ state ++ * now. ++ */ ++ rq = container_of(work, struct irqswitch_work, work); ++ if (rq->enabled) ++ rtdm_irq_enable(rq->irqh); ++ else ++ rtdm_irq_disable(rq->irqh); ++ ++ if (rq->done) ++ rtdm_event_signal(rq->done); ++} ++ ++static void switch_irq_line(rtdm_irq_t *irqh, int enable, rtdm_event_t *done) ++{ ++ struct irqswitch_work switchwork = { ++ .work = { ++ .size = sizeof(switchwork), ++ .handler = lostage_irqswitch_line, ++ }, ++ .irqh = irqh, ++ .enabled = enable, ++ .done = done, ++ }; ++ ++ /* ++ * Not pretty, but we may not traverse the kernel code for ++ * enabling/disabling IRQ lines from primary mode. So we have ++ * to send a deferrable root request (i.e. low-level APC) to ++ * be callable from real-time context. ++ */ ++ ipipe_post_work_root(&switchwork, work); ++} ++ ++/** ++ * @brief Enable the device IRQ line ++ * ++ * This service issues a request to the regular kernel for enabling ++ * the IRQ line registered by the driver. If the caller runs in ++ * primary mode, the request is scheduled but deferred until the ++ * current CPU leaves the real-time domain (see note). Otherwise, the ++ * request is immediately handled. ++ * ++ * @param udd The UDD driver handling the IRQ to disable. If no IRQ ++ * was registered by the driver at the UDD core, this routine has no ++ * effect. ++ * ++ * @param done Optional event to signal upon completion. If non-NULL, ++ * @a done will be posted by a call to rtdm_event_signal() after the ++ * interrupt line is enabled. ++ * ++ * @coretags{unrestricted} ++ * ++ * @note The deferral is required as some interrupt management code ++ * involved in enabling interrupt lines may not be safely executed ++ * from primary mode. By passing a valid @a done object address, the ++ * caller can wait for the request to complete, by sleeping on ++ * rtdm_event_wait(). ++ */ ++void udd_enable_irq(struct udd_device *udd, rtdm_event_t *done) ++{ ++ struct udd_reserved *ur = &udd->__reserved; ++ ++ if (udd->irq != UDD_IRQ_NONE && udd->irq != UDD_IRQ_CUSTOM) ++ switch_irq_line(&ur->irqh, 1, done); ++} ++EXPORT_SYMBOL_GPL(udd_enable_irq); ++ ++/** ++ * @brief Disable the device IRQ line ++ * ++ * This service issues a request to the regular kernel for disabling ++ * the IRQ line registered by the driver. If the caller runs in ++ * primary mode, the request is scheduled but deferred until the ++ * current CPU leaves the real-time domain (see note). Otherwise, the ++ * request is immediately handled. ++ * ++ * @param udd The UDD driver handling the IRQ to disable. If no IRQ ++ * was registered by the driver at the UDD core, this routine has no ++ * effect. ++ * ++ * @param done Optional event to signal upon completion. If non-NULL, ++ * @a done will be posted by a call to rtdm_event_signal() after the ++ * interrupt line is disabled. ++ * ++ * @coretags{unrestricted} ++ * ++ * @note The deferral is required as some interrupt management code ++ * involved in disabling interrupt lines may not be safely executed ++ * from primary mode. By passing a valid @a done object address, the ++ * caller can wait for the request to complete, by sleeping on ++ * rtdm_event_wait(). ++ */ ++void udd_disable_irq(struct udd_device *udd, rtdm_event_t *done) ++{ ++ struct udd_reserved *ur = &udd->__reserved; ++ ++ if (udd->irq != UDD_IRQ_NONE && udd->irq != UDD_IRQ_CUSTOM) ++ switch_irq_line(&ur->irqh, 0, done); ++} ++EXPORT_SYMBOL_GPL(udd_disable_irq); ++ ++/** ++ * @brief RTDM file descriptor to target UDD device ++ * ++ * Retrieves the UDD device from a RTDM file descriptor. ++ * ++ * @param fd File descriptor received by an ancillary I/O handler ++ * from a mini-driver based on the UDD core. ++ * ++ * @return A pointer to the UDD device to which @a fd refers to. ++ * ++ * @note This service is intended for use by mini-drivers based on the ++ * UDD core exclusively. Passing file descriptors referring to other ++ * RTDM devices will certainly lead to invalid results. ++ * ++ * @coretags{mode-unrestricted} ++ */ ++struct udd_device *udd_get_device(struct rtdm_fd *fd) ++{ ++ struct rtdm_device *dev = rtdm_fd_device(fd); ++ ++ if (dev->driver->profile_info.class_id == RTDM_CLASS_MEMORY) ++ return container_of(dev, struct udd_mapper, dev)->udd; ++ ++ return container_of(dev, struct udd_device, __reserved.device); ++} ++EXPORT_SYMBOL_GPL(udd_get_device); ++ ++MODULE_LICENSE("GPL"); +--- linux/drivers/xenomai/serial/16550A_pnp.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/serial/16550A_pnp.h 2021-04-07 16:01:26.533635158 +0800 +@@ -0,0 +1,387 @@ ++/* ++ * Copyright (C) 2006-2007 Jan Kiszka . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#if defined(CONFIG_PNP) && \ ++ (defined(CONFIG_XENO_DRIVERS_16550A_PIO) || \ ++ defined(CONFIG_XENO_DRIVERS_16550A_ANY)) ++ ++#include ++ ++#define UNKNOWN_DEV 0x3000 ++ ++/* Bluntly cloned from drivers/serial/8250_pnp.c */ ++static const struct pnp_device_id rt_16550_pnp_tbl[] = { ++ /* Archtek America Corp. */ ++ /* Archtek SmartLink Modem 3334BT Plug & Play */ ++ { "AAC000F", 0 }, ++ /* Anchor Datacomm BV */ ++ /* SXPro 144 External Data Fax Modem Plug & Play */ ++ { "ADC0001", 0 }, ++ /* SXPro 288 External Data Fax Modem Plug & Play */ ++ { "ADC0002", 0 }, ++ /* PROLiNK 1456VH ISA PnP K56flex Fax Modem */ ++ { "AEI0250", 0 }, ++ /* Actiontec ISA PNP 56K X2 Fax Modem */ ++ { "AEI1240", 0 }, ++ /* Rockwell 56K ACF II Fax+Data+Voice Modem */ ++ { "AKY1021", 0 /*SPCI_FL_NO_SHIRQ*/ }, ++ /* AZT3005 PnP SOUND DEVICE */ ++ { "AZT4001", 0 }, ++ /* Best Data Products Inc. Smart One 336F PnP Modem */ ++ { "BDP3336", 0 }, ++ /* Boca Research */ ++ /* Boca Complete Ofc Communicator 14.4 Data-FAX */ ++ { "BRI0A49", 0 }, ++ /* Boca Research 33,600 ACF Modem */ ++ { "BRI1400", 0 }, ++ /* Boca 33.6 Kbps Internal FD34FSVD */ ++ { "BRI3400", 0 }, ++ /* Boca 33.6 Kbps Internal FD34FSVD */ ++ { "BRI0A49", 0 }, ++ /* Best Data Products Inc. Smart One 336F PnP Modem */ ++ { "BDP3336", 0 }, ++ /* Computer Peripherals Inc */ ++ /* EuroViVa CommCenter-33.6 SP PnP */ ++ { "CPI4050", 0 }, ++ /* Creative Labs */ ++ /* Creative Labs Phone Blaster 28.8 DSVD PnP Voice */ ++ { "CTL3001", 0 }, ++ /* Creative Labs Modem Blaster 28.8 DSVD PnP Voice */ ++ { "CTL3011", 0 }, ++ /* Creative */ ++ /* Creative Modem Blaster Flash56 DI5601-1 */ ++ { "DMB1032", 0 }, ++ /* Creative Modem Blaster V.90 DI5660 */ ++ { "DMB2001", 0 }, ++ /* E-Tech */ ++ /* E-Tech CyberBULLET PC56RVP */ ++ { "ETT0002", 0 }, ++ /* FUJITSU */ ++ /* Fujitsu 33600 PnP-I2 R Plug & Play */ ++ { "FUJ0202", 0 }, ++ /* Fujitsu FMV-FX431 Plug & Play */ ++ { "FUJ0205", 0 }, ++ /* Fujitsu 33600 PnP-I4 R Plug & Play */ ++ { "FUJ0206", 0 }, ++ /* Fujitsu Fax Voice 33600 PNP-I5 R Plug & Play */ ++ { "FUJ0209", 0 }, ++ /* Archtek America Corp. */ ++ /* Archtek SmartLink Modem 3334BT Plug & Play */ ++ { "GVC000F", 0 }, ++ /* Hayes */ ++ /* Hayes Optima 288 V.34-V.FC + FAX + Voice Plug & Play */ ++ { "HAY0001", 0 }, ++ /* Hayes Optima 336 V.34 + FAX + Voice PnP */ ++ { "HAY000C", 0 }, ++ /* Hayes Optima 336B V.34 + FAX + Voice PnP */ ++ { "HAY000D", 0 }, ++ /* Hayes Accura 56K Ext Fax Modem PnP */ ++ { "HAY5670", 0 }, ++ /* Hayes Accura 56K Ext Fax Modem PnP */ ++ { "HAY5674", 0 }, ++ /* Hayes Accura 56K Fax Modem PnP */ ++ { "HAY5675", 0 }, ++ /* Hayes 288, V.34 + FAX */ ++ { "HAYF000", 0 }, ++ /* Hayes Optima 288 V.34 + FAX + Voice, Plug & Play */ ++ { "HAYF001", 0 }, ++ /* IBM */ ++ /* IBM Thinkpad 701 Internal Modem Voice */ ++ { "IBM0033", 0 }, ++ /* Intertex */ ++ /* Intertex 28k8 33k6 Voice EXT PnP */ ++ { "IXDC801", 0 }, ++ /* Intertex 33k6 56k Voice EXT PnP */ ++ { "IXDC901", 0 }, ++ /* Intertex 28k8 33k6 Voice SP EXT PnP */ ++ { "IXDD801", 0 }, ++ /* Intertex 33k6 56k Voice SP EXT PnP */ ++ { "IXDD901", 0 }, ++ /* Intertex 28k8 33k6 Voice SP INT PnP */ ++ { "IXDF401", 0 }, ++ /* Intertex 28k8 33k6 Voice SP EXT PnP */ ++ { "IXDF801", 0 }, ++ /* Intertex 33k6 56k Voice SP EXT PnP */ ++ { "IXDF901", 0 }, ++ /* Kortex International */ ++ /* KORTEX 28800 Externe PnP */ ++ { "KOR4522", 0 }, ++ /* KXPro 33.6 Vocal ASVD PnP */ ++ { "KORF661", 0 }, ++ /* Lasat */ ++ /* LASAT Internet 33600 PnP */ ++ { "LAS4040", 0 }, ++ /* Lasat Safire 560 PnP */ ++ { "LAS4540", 0 }, ++ /* Lasat Safire 336 PnP */ ++ { "LAS5440", 0 }, ++ /* Microcom, Inc. */ ++ /* Microcom TravelPorte FAST V.34 Plug & Play */ ++ { "MNP0281", 0 }, ++ /* Microcom DeskPorte V.34 FAST or FAST+ Plug & Play */ ++ { "MNP0336", 0 }, ++ /* Microcom DeskPorte FAST EP 28.8 Plug & Play */ ++ { "MNP0339", 0 }, ++ /* Microcom DeskPorte 28.8P Plug & Play */ ++ { "MNP0342", 0 }, ++ /* Microcom DeskPorte FAST ES 28.8 Plug & Play */ ++ { "MNP0500", 0 }, ++ /* Microcom DeskPorte FAST ES 28.8 Plug & Play */ ++ { "MNP0501", 0 }, ++ /* Microcom DeskPorte 28.8S Internal Plug & Play */ ++ { "MNP0502", 0 }, ++ /* Motorola */ ++ /* Motorola BitSURFR Plug & Play */ ++ { "MOT1105", 0 }, ++ /* Motorola TA210 Plug & Play */ ++ { "MOT1111", 0 }, ++ /* Motorola HMTA 200 (ISDN) Plug & Play */ ++ { "MOT1114", 0 }, ++ /* Motorola BitSURFR Plug & Play */ ++ { "MOT1115", 0 }, ++ /* Motorola Lifestyle 28.8 Internal */ ++ { "MOT1190", 0 }, ++ /* Motorola V.3400 Plug & Play */ ++ { "MOT1501", 0 }, ++ /* Motorola Lifestyle 28.8 V.34 Plug & Play */ ++ { "MOT1502", 0 }, ++ /* Motorola Power 28.8 V.34 Plug & Play */ ++ { "MOT1505", 0 }, ++ /* Motorola ModemSURFR External 28.8 Plug & Play */ ++ { "MOT1509", 0 }, ++ /* Motorola Premier 33.6 Desktop Plug & Play */ ++ { "MOT150A", 0 }, ++ /* Motorola VoiceSURFR 56K External PnP */ ++ { "MOT150F", 0 }, ++ /* Motorola ModemSURFR 56K External PnP */ ++ { "MOT1510", 0 }, ++ /* Motorola ModemSURFR 56K Internal PnP */ ++ { "MOT1550", 0 }, ++ /* Motorola ModemSURFR Internal 28.8 Plug & Play */ ++ { "MOT1560", 0 }, ++ /* Motorola Premier 33.6 Internal Plug & Play */ ++ { "MOT1580", 0 }, ++ /* Motorola OnlineSURFR 28.8 Internal Plug & Play */ ++ { "MOT15B0", 0 }, ++ /* Motorola VoiceSURFR 56K Internal PnP */ ++ { "MOT15F0", 0 }, ++ /* Com 1 */ ++ /* Deskline K56 Phone System PnP */ ++ { "MVX00A1", 0 }, ++ /* PC Rider K56 Phone System PnP */ ++ { "MVX00F2", 0 }, ++ /* NEC 98NOTE SPEAKER PHONE FAX MODEM(33600bps) */ ++ { "nEC8241", 0 }, ++ /* Pace 56 Voice Internal Plug & Play Modem */ ++ { "PMC2430", 0 }, ++ /* Generic */ ++ /* Generic standard PC COM port */ ++ { "PNP0500", 0 }, ++ /* Generic 16550A-compatible COM port */ ++ { "PNP0501", 0 }, ++ /* Compaq 14400 Modem */ ++ { "PNPC000", 0 }, ++ /* Compaq 2400/9600 Modem */ ++ { "PNPC001", 0 }, ++ /* Dial-Up Networking Serial Cable between 2 PCs */ ++ { "PNPC031", 0 }, ++ /* Dial-Up Networking Parallel Cable between 2 PCs */ ++ { "PNPC032", 0 }, ++ /* Standard 9600 bps Modem */ ++ { "PNPC100", 0 }, ++ /* Standard 14400 bps Modem */ ++ { "PNPC101", 0 }, ++ /* Standard 28800 bps Modem*/ ++ { "PNPC102", 0 }, ++ /* Standard Modem*/ ++ { "PNPC103", 0 }, ++ /* Standard 9600 bps Modem*/ ++ { "PNPC104", 0 }, ++ /* Standard 14400 bps Modem*/ ++ { "PNPC105", 0 }, ++ /* Standard 28800 bps Modem*/ ++ { "PNPC106", 0 }, ++ /* Standard Modem */ ++ { "PNPC107", 0 }, ++ /* Standard 9600 bps Modem */ ++ { "PNPC108", 0 }, ++ /* Standard 14400 bps Modem */ ++ { "PNPC109", 0 }, ++ /* Standard 28800 bps Modem */ ++ { "PNPC10A", 0 }, ++ /* Standard Modem */ ++ { "PNPC10B", 0 }, ++ /* Standard 9600 bps Modem */ ++ { "PNPC10C", 0 }, ++ /* Standard 14400 bps Modem */ ++ { "PNPC10D", 0 }, ++ /* Standard 28800 bps Modem */ ++ { "PNPC10E", 0 }, ++ /* Standard Modem */ ++ { "PNPC10F", 0 }, ++ /* Standard PCMCIA Card Modem */ ++ { "PNP2000", 0 }, ++ /* Rockwell */ ++ /* Modular Technology */ ++ /* Rockwell 33.6 DPF Internal PnP */ ++ /* Modular Technology 33.6 Internal PnP */ ++ { "ROK0030", 0 }, ++ /* Kortex International */ ++ /* KORTEX 14400 Externe PnP */ ++ { "ROK0100", 0 }, ++ /* Rockwell 28.8 */ ++ { "ROK4120", 0 }, ++ /* Viking Components, Inc */ ++ /* Viking 28.8 INTERNAL Fax+Data+Voice PnP */ ++ { "ROK4920", 0 }, ++ /* Rockwell */ ++ /* British Telecom */ ++ /* Modular Technology */ ++ /* Rockwell 33.6 DPF External PnP */ ++ /* BT Prologue 33.6 External PnP */ ++ /* Modular Technology 33.6 External PnP */ ++ { "RSS00A0", 0 }, ++ /* Viking 56K FAX INT */ ++ { "RSS0262", 0 }, ++ /* K56 par,VV,Voice,Speakphone,AudioSpan,PnP */ ++ { "RSS0250", 0 }, ++ /* SupraExpress 28.8 Data/Fax PnP modem */ ++ { "SUP1310", 0 }, ++ /* SupraExpress 33.6 Data/Fax PnP modem */ ++ { "SUP1421", 0 }, ++ /* SupraExpress 33.6 Data/Fax PnP modem */ ++ { "SUP1590", 0 }, ++ /* SupraExpress 336i Sp ASVD */ ++ { "SUP1620", 0 }, ++ /* SupraExpress 33.6 Data/Fax PnP modem */ ++ { "SUP1760", 0 }, ++ /* SupraExpress 56i Sp Intl */ ++ { "SUP2171", 0 }, ++ /* Phoebe Micro */ ++ /* Phoebe Micro 33.6 Data Fax 1433VQH Plug & Play */ ++ { "TEX0011", 0 }, ++ /* Archtek America Corp. */ ++ /* Archtek SmartLink Modem 3334BT Plug & Play */ ++ { "UAC000F", 0 }, ++ /* 3Com Corp. */ ++ /* Gateway Telepath IIvi 33.6 */ ++ { "USR0000", 0 }, ++ /* U.S. Robotics Sporster 33.6K Fax INT PnP */ ++ { "USR0002", 0 }, ++ /* Sportster Vi 14.4 PnP FAX Voicemail */ ++ { "USR0004", 0 }, ++ /* U.S. Robotics 33.6K Voice INT PnP */ ++ { "USR0006", 0 }, ++ /* U.S. Robotics 33.6K Voice EXT PnP */ ++ { "USR0007", 0 }, ++ /* U.S. Robotics Courier V.Everything INT PnP */ ++ { "USR0009", 0 }, ++ /* U.S. Robotics 33.6K Voice INT PnP */ ++ { "USR2002", 0 }, ++ /* U.S. Robotics 56K Voice INT PnP */ ++ { "USR2070", 0 }, ++ /* U.S. Robotics 56K Voice EXT PnP */ ++ { "USR2080", 0 }, ++ /* U.S. Robotics 56K FAX INT */ ++ { "USR3031", 0 }, ++ /* U.S. Robotics 56K FAX INT */ ++ { "USR3050", 0 }, ++ /* U.S. Robotics 56K Voice INT PnP */ ++ { "USR3070", 0 }, ++ /* U.S. Robotics 56K Voice EXT PnP */ ++ { "USR3080", 0 }, ++ /* U.S. Robotics 56K Voice INT PnP */ ++ { "USR3090", 0 }, ++ /* U.S. Robotics 56K Message */ ++ { "USR9100", 0 }, ++ /* U.S. Robotics 56K FAX EXT PnP*/ ++ { "USR9160", 0 }, ++ /* U.S. Robotics 56K FAX INT PnP*/ ++ { "USR9170", 0 }, ++ /* U.S. Robotics 56K Voice EXT PnP*/ ++ { "USR9180", 0 }, ++ /* U.S. Robotics 56K Voice INT PnP*/ ++ { "USR9190", 0 }, ++ /* Wacom tablets */ ++ { "WACF004", 0 }, ++ { "WACF005", 0 }, ++ { "WACF006", 0 }, ++ /* Compaq touchscreen */ ++ { "FPI2002", 0 }, ++ /* Fujitsu Stylistic touchscreens */ ++ { "FUJ02B2", 0 }, ++ { "FUJ02B3", 0 }, ++ /* Fujitsu Stylistic LT touchscreens */ ++ { "FUJ02B4", 0 }, ++ /* Passive Fujitsu Stylistic touchscreens */ ++ { "FUJ02B6", 0 }, ++ { "FUJ02B7", 0 }, ++ { "FUJ02B8", 0 }, ++ { "FUJ02B9", 0 }, ++ { "FUJ02BC", 0 }, ++ /* Rockwell's (PORALiNK) 33600 INT PNP */ ++ { "WCI0003", 0 }, ++ /* Unkown PnP modems */ ++ { "PNPCXXX", UNKNOWN_DEV }, ++ /* More unkown PnP modems */ ++ { "PNPDXXX", UNKNOWN_DEV }, ++ { "", 0 } ++}; ++ ++static int rt_16550_pnp_probe(struct pnp_dev *dev, ++ const struct pnp_device_id *dev_id) ++{ ++ int i; ++ ++ for (i = 0; i < MAX_DEVICES; i++) ++ if (pnp_port_valid(dev, 0) && ++ pnp_port_start(dev, 0) == io[i]) { ++ if (!irq[i]) ++ irq[i] = pnp_irq(dev, 0); ++ return 0; ++ } ++ ++ return -ENODEV; ++} ++ ++static struct pnp_driver rt_16550_pnp_driver = { ++ .name = RT_16550_DRIVER_NAME, ++ .id_table = rt_16550_pnp_tbl, ++ .probe = rt_16550_pnp_probe, ++}; ++ ++static int pnp_registered; ++ ++static inline void rt_16550_pnp_init(void) ++{ ++ if (pnp_register_driver(&rt_16550_pnp_driver) == 0) ++ pnp_registered = 1; ++} ++ ++static inline void rt_16550_pnp_cleanup(void) ++{ ++ if (pnp_registered) ++ pnp_unregister_driver(&rt_16550_pnp_driver); ++} ++ ++#else /* !CONFIG_PNP || !(..._16550A_IO || ..._16550A_ANY) */ ++ ++#define rt_16550_pnp_init() do { } while (0) ++#define rt_16550_pnp_cleanup() do { } while (0) ++ ++#endif /* !CONFIG_PNP || !(..._16550A_IO || ..._16550A_ANY) */ +--- linux/drivers/xenomai/serial/16550A_io.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/serial/16550A_io.h 2021-04-07 16:01:26.528635165 +0800 +@@ -0,0 +1,210 @@ ++/* ++ * Copyright (C) 2007 Jan Kiszka . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++/* Manages the I/O access method of the driver. */ ++ ++typedef enum { MODE_PIO, MODE_MMIO } io_mode_t; ++ ++#if defined(CONFIG_XENO_DRIVERS_16550A_PIO) || \ ++ defined(CONFIG_XENO_DRIVERS_16550A_ANY) ++static unsigned long io[MAX_DEVICES]; ++module_param_array(io, ulong, NULL, 0400); ++MODULE_PARM_DESC(io, "I/O port addresses of the serial devices"); ++#endif /* CONFIG_XENO_DRIVERS_16550A_PIO || CONFIG_XENO_DRIVERS_16550A_ANY */ ++ ++#if defined(CONFIG_XENO_DRIVERS_16550A_MMIO) || \ ++ defined(CONFIG_XENO_DRIVERS_16550A_ANY) ++static unsigned long mem[MAX_DEVICES]; ++static void *mapped_io[MAX_DEVICES]; ++module_param_array(mem, ulong, NULL, 0400); ++MODULE_PARM_DESC(mem, "I/O memory addresses of the serial devices"); ++#endif /* CONFIG_XENO_DRIVERS_16550A_MMIO || CONFIG_XENO_DRIVERS_16550A_ANY */ ++ ++#ifdef CONFIG_XENO_DRIVERS_16550A_PIO ++ ++#define RT_16550_IO_INLINE inline ++ ++extern void *mapped_io[]; /* dummy */ ++ ++static inline unsigned long rt_16550_addr_param(int dev_id) ++{ ++ return io[dev_id]; ++} ++ ++static inline int rt_16550_addr_param_valid(int dev_id) ++{ ++ return 1; ++} ++ ++static inline unsigned long rt_16550_base_addr(int dev_id) ++{ ++ return io[dev_id]; ++} ++ ++static inline io_mode_t rt_16550_io_mode(int dev_id) ++{ ++ return MODE_PIO; ++} ++ ++static inline io_mode_t ++rt_16550_io_mode_from_ctx(struct rt_16550_context *ctx) ++{ ++ return MODE_PIO; ++} ++ ++static inline void ++rt_16550_init_io_ctx(int dev_id, struct rt_16550_context *ctx) ++{ ++ ctx->base_addr = io[dev_id]; ++} ++ ++#elif defined(CONFIG_XENO_DRIVERS_16550A_MMIO) ++ ++#define RT_16550_IO_INLINE inline ++ ++extern unsigned long io[]; /* dummy */ ++ ++static inline unsigned long rt_16550_addr_param(int dev_id) ++{ ++ return mem[dev_id]; ++} ++ ++static inline int rt_16550_addr_param_valid(int dev_id) ++{ ++ return 1; ++} ++ ++static inline unsigned long rt_16550_base_addr(int dev_id) ++{ ++ return (unsigned long)mapped_io[dev_id]; ++} ++ ++static inline io_mode_t rt_16550_io_mode(int dev_id) ++{ ++ return MODE_MMIO; ++} ++ ++static inline io_mode_t ++rt_16550_io_mode_from_ctx(struct rt_16550_context *ctx) ++{ ++ return MODE_MMIO; ++} ++ ++static inline void ++rt_16550_init_io_ctx(int dev_id, struct rt_16550_context *ctx) ++{ ++ ctx->base_addr = (unsigned long)mapped_io[dev_id]; ++} ++ ++#elif defined(CONFIG_XENO_DRIVERS_16550A_ANY) ++ ++#define RT_16550_IO_INLINE /* uninline */ ++ ++static inline unsigned long rt_16550_addr_param(int dev_id) ++{ ++ return (io[dev_id]) ? io[dev_id] : mem[dev_id]; ++} ++ ++static inline int rt_16550_addr_param_valid(int dev_id) ++{ ++ return !(io[dev_id] && mem[dev_id]); ++} ++ ++static inline unsigned long rt_16550_base_addr(int dev_id) ++{ ++ return (io[dev_id]) ? io[dev_id] : (unsigned long)mapped_io[dev_id]; ++} ++ ++static inline io_mode_t rt_16550_io_mode(int dev_id) ++{ ++ return (io[dev_id]) ? MODE_PIO : MODE_MMIO; ++} ++ ++static inline io_mode_t ++rt_16550_io_mode_from_ctx(struct rt_16550_context *ctx) ++{ ++ return ctx->io_mode; ++} ++ ++static inline void ++rt_16550_init_io_ctx(int dev_id, struct rt_16550_context *ctx) ++{ ++ if (io[dev_id]) { ++ ctx->base_addr = io[dev_id]; ++ ctx->io_mode = MODE_PIO; ++ } else { ++ ctx->base_addr = (unsigned long)mapped_io[dev_id]; ++ ctx->io_mode = MODE_MMIO; ++ } ++} ++ ++#else ++# error Unsupported I/O access method ++#endif ++ ++static RT_16550_IO_INLINE u8 ++rt_16550_reg_in(io_mode_t io_mode, unsigned long base, int off) ++{ ++ switch (io_mode) { ++ case MODE_PIO: ++ return inb(base + off); ++ default: /* MODE_MMIO */ ++ return readb((void *)base + off); ++ } ++} ++ ++static RT_16550_IO_INLINE void ++rt_16550_reg_out(io_mode_t io_mode, unsigned long base, int off, u8 val) ++{ ++ switch (io_mode) { ++ case MODE_PIO: ++ outb(val, base + off); ++ break; ++ case MODE_MMIO: ++ writeb(val, (void *)base + off); ++ break; ++ } ++} ++ ++static int rt_16550_init_io(int dev_id, char* name) ++{ ++ switch (rt_16550_io_mode(dev_id)) { ++ case MODE_PIO: ++ if (!request_region(rt_16550_addr_param(dev_id), 8, name)) ++ return -EBUSY; ++ break; ++ case MODE_MMIO: ++ mapped_io[dev_id] = ioremap(rt_16550_addr_param(dev_id), 8); ++ if (!mapped_io[dev_id]) ++ return -EBUSY; ++ break; ++ } ++ return 0; ++} ++ ++static void rt_16550_release_io(int dev_id) ++{ ++ switch (rt_16550_io_mode(dev_id)) { ++ case MODE_PIO: ++ release_region(io[dev_id], 8); ++ break; ++ case MODE_MMIO: ++ iounmap(mapped_io[dev_id]); ++ break; ++ } ++} +--- linux/drivers/xenomai/serial/16550A.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/serial/16550A.c 2021-04-07 16:01:26.524635171 +0800 +@@ -0,0 +1,1188 @@ ++/* ++ * Copyright (C) 2005-2007 Jan Kiszka . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++ ++MODULE_DESCRIPTION("RTDM-based driver for 16550A UARTs"); ++MODULE_AUTHOR("Jan Kiszka "); ++MODULE_VERSION("1.5.2"); ++MODULE_LICENSE("GPL"); ++ ++#define RT_16550_DRIVER_NAME "xeno_16550A" ++ ++#define MAX_DEVICES 8 ++ ++#define IN_BUFFER_SIZE 4096 ++#define OUT_BUFFER_SIZE 4096 ++ ++#define DEFAULT_BAUD_BASE 115200 ++#define DEFAULT_TX_FIFO 16 ++ ++#define PARITY_MASK 0x03 ++#define DATA_BITS_MASK 0x03 ++#define STOP_BITS_MASK 0x01 ++#define FIFO_MASK 0xC0 ++#define EVENT_MASK 0x0F ++ ++#define LCR_DLAB 0x80 ++ ++#define FCR_FIFO 0x01 ++#define FCR_RESET_RX 0x02 ++#define FCR_RESET_TX 0x04 ++ ++#define IER_RX 0x01 ++#define IER_TX 0x02 ++#define IER_STAT 0x04 ++#define IER_MODEM 0x08 ++ ++#define IIR_MODEM 0x00 ++#define IIR_PIRQ 0x01 ++#define IIR_TX 0x02 ++#define IIR_RX 0x04 ++#define IIR_STAT 0x06 ++#define IIR_MASK 0x07 ++ ++#define RHR 0 /* Receive Holding Buffer */ ++#define THR 0 /* Transmit Holding Buffer */ ++#define DLL 0 /* Divisor Latch LSB */ ++#define IER 1 /* Interrupt Enable Register */ ++#define DLM 1 /* Divisor Latch MSB */ ++#define IIR 2 /* Interrupt Id Register */ ++#define FCR 2 /* Fifo Control Register */ ++#define LCR 3 /* Line Control Register */ ++#define MCR 4 /* Modem Control Register */ ++#define LSR 5 /* Line Status Register */ ++#define MSR 6 /* Modem Status Register */ ++ ++struct rt_16550_context { ++ struct rtser_config config; /* current device configuration */ ++ ++ rtdm_irq_t irq_handle; /* device IRQ handle */ ++ rtdm_lock_t lock; /* lock to protect context struct */ ++ ++ unsigned long base_addr; /* hardware IO base address */ ++#ifdef CONFIG_XENO_DRIVERS_16550A_ANY ++ int io_mode; /* hardware IO-access mode */ ++#endif ++ int tx_fifo; /* cached global tx_fifo[] */ ++ ++ int in_head; /* RX ring buffer, head pointer */ ++ int in_tail; /* RX ring buffer, tail pointer */ ++ size_t in_npend; /* pending bytes in RX ring */ ++ int in_nwait; /* bytes the user waits for */ ++ rtdm_event_t in_event; /* raised to unblock reader */ ++ char in_buf[IN_BUFFER_SIZE]; /* RX ring buffer */ ++ volatile unsigned long in_lock; /* single-reader lock */ ++ uint64_t *in_history; /* RX timestamp buffer */ ++ ++ int out_head; /* TX ring buffer, head pointer */ ++ int out_tail; /* TX ring buffer, tail pointer */ ++ size_t out_npend; /* pending bytes in TX ring */ ++ rtdm_event_t out_event; /* raised to unblock writer */ ++ char out_buf[OUT_BUFFER_SIZE]; /* TX ring buffer */ ++ rtdm_mutex_t out_lock; /* single-writer mutex */ ++ ++ uint64_t last_timestamp; /* timestamp of last event */ ++ int ioc_events; /* recorded events */ ++ rtdm_event_t ioc_event; /* raised to unblock event waiter */ ++ volatile unsigned long ioc_event_lock; /* single-waiter lock */ ++ ++ int ier_status; /* IER cache */ ++ int mcr_status; /* MCR cache */ ++ int status; /* cache for LSR + soft-states */ ++ int saved_errors; /* error cache for RTIOC_GET_STATUS */ ++}; ++ ++static const struct rtser_config default_config = { ++ 0xFFFF, RTSER_DEF_BAUD, RTSER_DEF_PARITY, RTSER_DEF_BITS, ++ RTSER_DEF_STOPB, RTSER_DEF_HAND, RTSER_DEF_FIFO_DEPTH, 0, ++ RTSER_DEF_TIMEOUT, RTSER_DEF_TIMEOUT, RTSER_DEF_TIMEOUT, ++ RTSER_DEF_TIMESTAMP_HISTORY, RTSER_DEF_EVENT_MASK, RTSER_DEF_RS485 ++}; ++ ++static struct rtdm_device *device[MAX_DEVICES]; ++ ++static unsigned int irq[MAX_DEVICES]; ++static unsigned long irqtype[MAX_DEVICES] = { ++ [0 ... MAX_DEVICES-1] = RTDM_IRQTYPE_SHARED | RTDM_IRQTYPE_EDGE ++}; ++static unsigned int baud_base[MAX_DEVICES]; ++static int tx_fifo[MAX_DEVICES]; ++ ++module_param_array(irq, uint, NULL, 0400); ++module_param_array(baud_base, uint, NULL, 0400); ++module_param_array(tx_fifo, int, NULL, 0400); ++ ++MODULE_PARM_DESC(irq, "IRQ numbers of the serial devices"); ++MODULE_PARM_DESC(baud_base, "Maximum baud rate of the serial device " ++ "(internal clock rate / 16)"); ++MODULE_PARM_DESC(tx_fifo, "Transmitter FIFO size"); ++ ++#include "16550A_io.h" ++#include "16550A_pnp.h" ++#include "16550A_pci.h" ++ ++static inline int rt_16550_rx_interrupt(struct rt_16550_context *ctx, ++ uint64_t * timestamp) ++{ ++ unsigned long base = ctx->base_addr; ++ int mode = rt_16550_io_mode_from_ctx(ctx); ++ int rbytes = 0; ++ int lsr = 0; ++ int c; ++ ++ do { ++ c = rt_16550_reg_in(mode, base, RHR); /* read input char */ ++ ++ ctx->in_buf[ctx->in_tail] = c; ++ if (ctx->in_history) ++ ctx->in_history[ctx->in_tail] = *timestamp; ++ ctx->in_tail = (ctx->in_tail + 1) & (IN_BUFFER_SIZE - 1); ++ ++ if (++ctx->in_npend > IN_BUFFER_SIZE) { ++ lsr |= RTSER_SOFT_OVERRUN_ERR; ++ ctx->in_npend--; ++ } ++ ++ rbytes++; ++ lsr &= ~RTSER_LSR_DATA; ++ lsr |= (rt_16550_reg_in(mode, base, LSR) & ++ (RTSER_LSR_DATA | RTSER_LSR_OVERRUN_ERR | ++ RTSER_LSR_PARITY_ERR | RTSER_LSR_FRAMING_ERR | ++ RTSER_LSR_BREAK_IND)); ++ } while (lsr & RTSER_LSR_DATA); ++ ++ /* save new errors */ ++ ctx->status |= lsr; ++ ++ /* If we are enforcing the RTSCTS control flow and the input ++ buffer is busy above the specified high watermark, clear ++ RTS. */ ++/* if (uart->i_count >= uart->config.rts_hiwm && ++ (uart->config.handshake & RT_UART_RTSCTS) != 0 && ++ (uart->modem & MCR_RTS) != 0) { ++ uart->modem &= ~MCR_RTS; ++ rt_16550_reg_out(mode, base, MCR, uart->modem); ++ }*/ ++ ++ return rbytes; ++} ++ ++static void rt_16550_tx_fill(struct rt_16550_context *ctx) ++{ ++ int c; ++ int count; ++ unsigned long base = ctx->base_addr; ++ int mode = rt_16550_io_mode_from_ctx(ctx); ++ ++/* if (uart->modem & MSR_CTS)*/ ++ { ++ for (count = ctx->tx_fifo; ++ (count > 0) && (ctx->out_npend > 0); ++ count--, ctx->out_npend--) { ++ c = ctx->out_buf[ctx->out_head++]; ++ rt_16550_reg_out(mode, base, THR, c); ++ ctx->out_head &= (OUT_BUFFER_SIZE - 1); ++ } ++ } ++} ++ ++static inline void rt_16550_stat_interrupt(struct rt_16550_context *ctx) ++{ ++ unsigned long base = ctx->base_addr; ++ int mode = rt_16550_io_mode_from_ctx(ctx); ++ ++ ctx->status |= (rt_16550_reg_in(mode, base, LSR) & ++ (RTSER_LSR_OVERRUN_ERR | RTSER_LSR_PARITY_ERR | ++ RTSER_LSR_FRAMING_ERR | RTSER_LSR_BREAK_IND)); ++} ++ ++static int rt_16550_interrupt(rtdm_irq_t * irq_context) ++{ ++ struct rt_16550_context *ctx; ++ unsigned long base; ++ int mode; ++ int iir; ++ uint64_t timestamp = rtdm_clock_read(); ++ int rbytes = 0; ++ int events = 0; ++ int modem; ++ int ret = RTDM_IRQ_NONE; ++ ++ ctx = rtdm_irq_get_arg(irq_context, struct rt_16550_context); ++ base = ctx->base_addr; ++ mode = rt_16550_io_mode_from_ctx(ctx); ++ ++ rtdm_lock_get(&ctx->lock); ++ ++ while (1) { ++ iir = rt_16550_reg_in(mode, base, IIR) & IIR_MASK; ++ if (iir & IIR_PIRQ) ++ break; ++ ++ if (iir == IIR_RX) { ++ rbytes += rt_16550_rx_interrupt(ctx, ×tamp); ++ events |= RTSER_EVENT_RXPEND; ++ } else if (iir == IIR_STAT) ++ rt_16550_stat_interrupt(ctx); ++ else if (iir == IIR_TX) ++ rt_16550_tx_fill(ctx); ++ else if (iir == IIR_MODEM) { ++ modem = rt_16550_reg_in(mode, base, MSR); ++ if (modem & (modem << 4)) ++ events |= RTSER_EVENT_MODEMHI; ++ if ((modem ^ 0xF0) & (modem << 4)) ++ events |= RTSER_EVENT_MODEMLO; ++ } ++ ++ ret = RTDM_IRQ_HANDLED; ++ } ++ ++ if (ctx->in_nwait > 0) { ++ if ((ctx->in_nwait <= rbytes) || ctx->status) { ++ ctx->in_nwait = 0; ++ rtdm_event_signal(&ctx->in_event); ++ } else ++ ctx->in_nwait -= rbytes; ++ } ++ ++ if (ctx->status) { ++ events |= RTSER_EVENT_ERRPEND; ++ ctx->ier_status &= ~IER_STAT; ++ } ++ ++ if (events & ctx->config.event_mask) { ++ int old_events = ctx->ioc_events; ++ ++ ctx->last_timestamp = timestamp; ++ ctx->ioc_events = events; ++ ++ if (!old_events) ++ rtdm_event_signal(&ctx->ioc_event); ++ } ++ ++ if ((ctx->ier_status & IER_TX) && (ctx->out_npend == 0)) { ++ /* mask transmitter empty interrupt */ ++ ctx->ier_status &= ~IER_TX; ++ ++ rtdm_event_signal(&ctx->out_event); ++ } ++ ++ /* update interrupt mask */ ++ rt_16550_reg_out(mode, base, IER, ctx->ier_status); ++ ++ rtdm_lock_put(&ctx->lock); ++ ++ return ret; ++} ++ ++static int rt_16550_set_config(struct rt_16550_context *ctx, ++ const struct rtser_config *config, ++ uint64_t **in_history_ptr) ++{ ++ rtdm_lockctx_t lock_ctx; ++ unsigned long base = ctx->base_addr; ++ int mode = rt_16550_io_mode_from_ctx(ctx); ++ int err = 0; ++ ++ /* make line configuration atomic and IRQ-safe */ ++ rtdm_lock_get_irqsave(&ctx->lock, lock_ctx); ++ ++ if (config->config_mask & RTSER_SET_BAUD) { ++ int dev_id = rtdm_fd_minor(rtdm_private_to_fd(ctx)); ++ int baud_div; ++ ++ ctx->config.baud_rate = config->baud_rate; ++ baud_div = (baud_base[dev_id] + (ctx->config.baud_rate>>1)) / ++ ctx->config.baud_rate; ++ rt_16550_reg_out(mode, base, LCR, LCR_DLAB); ++ rt_16550_reg_out(mode, base, DLL, baud_div & 0xff); ++ rt_16550_reg_out(mode, base, DLM, baud_div >> 8); ++ } ++ ++ if (config->config_mask & RTSER_SET_PARITY) ++ ctx->config.parity = config->parity & PARITY_MASK; ++ if (config->config_mask & RTSER_SET_DATA_BITS) ++ ctx->config.data_bits = config->data_bits & DATA_BITS_MASK; ++ if (config->config_mask & RTSER_SET_STOP_BITS) ++ ctx->config.stop_bits = config->stop_bits & STOP_BITS_MASK; ++ ++ if (config->config_mask & (RTSER_SET_PARITY | ++ RTSER_SET_DATA_BITS | ++ RTSER_SET_STOP_BITS | ++ RTSER_SET_BAUD)) { ++ rt_16550_reg_out(mode, base, LCR, ++ (ctx->config.parity << 3) | ++ (ctx->config.stop_bits << 2) | ++ ctx->config.data_bits); ++ ctx->status = 0; ++ ctx->ioc_events &= ~RTSER_EVENT_ERRPEND; ++ } ++ ++ if (config->config_mask & RTSER_SET_FIFO_DEPTH) { ++ ctx->config.fifo_depth = config->fifo_depth & FIFO_MASK; ++ rt_16550_reg_out(mode, base, FCR, ++ FCR_FIFO | FCR_RESET_RX | FCR_RESET_TX); ++ rt_16550_reg_out(mode, base, FCR, ++ FCR_FIFO | ctx->config.fifo_depth); ++ } ++ ++ rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx); ++ ++ /* Timeout manipulation is not atomic. The user is supposed to take ++ care not to use and change timeouts at the same time. */ ++ if (config->config_mask & RTSER_SET_TIMEOUT_RX) ++ ctx->config.rx_timeout = config->rx_timeout; ++ if (config->config_mask & RTSER_SET_TIMEOUT_TX) ++ ctx->config.tx_timeout = config->tx_timeout; ++ if (config->config_mask & RTSER_SET_TIMEOUT_EVENT) ++ ctx->config.event_timeout = config->event_timeout; ++ ++ if (config->config_mask & RTSER_SET_TIMESTAMP_HISTORY) { ++ /* change timestamp history atomically */ ++ rtdm_lock_get_irqsave(&ctx->lock, lock_ctx); ++ ++ if (config->timestamp_history & RTSER_RX_TIMESTAMP_HISTORY) { ++ if (!ctx->in_history) { ++ ctx->in_history = *in_history_ptr; ++ *in_history_ptr = NULL; ++ if (!ctx->in_history) ++ err = -ENOMEM; ++ } ++ } else { ++ *in_history_ptr = ctx->in_history; ++ ctx->in_history = NULL; ++ } ++ ++ rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx); ++ } ++ ++ if (config->config_mask & RTSER_SET_EVENT_MASK) { ++ /* change event mask atomically */ ++ rtdm_lock_get_irqsave(&ctx->lock, lock_ctx); ++ ++ ctx->config.event_mask = config->event_mask & EVENT_MASK; ++ ctx->ioc_events = 0; ++ ++ if ((config->event_mask & RTSER_EVENT_RXPEND) && ++ (ctx->in_npend > 0)) ++ ctx->ioc_events |= RTSER_EVENT_RXPEND; ++ ++ if ((config->event_mask & RTSER_EVENT_ERRPEND) ++ && ctx->status) ++ ctx->ioc_events |= RTSER_EVENT_ERRPEND; ++ ++ if (config->event_mask & (RTSER_EVENT_MODEMHI | RTSER_EVENT_MODEMLO)) ++ /* enable modem status interrupt */ ++ ctx->ier_status |= IER_MODEM; ++ else ++ /* disable modem status interrupt */ ++ ctx->ier_status &= ~IER_MODEM; ++ rt_16550_reg_out(mode, base, IER, ctx->ier_status); ++ ++ rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx); ++ } ++ ++ if (config->config_mask & RTSER_SET_HANDSHAKE) { ++ /* change handshake atomically */ ++ rtdm_lock_get_irqsave(&ctx->lock, lock_ctx); ++ ++ ctx->config.handshake = config->handshake; ++ ++ switch (ctx->config.handshake) { ++ case RTSER_RTSCTS_HAND: ++ // ...? ++ ++ default: /* RTSER_NO_HAND */ ++ ctx->mcr_status = ++ RTSER_MCR_DTR | RTSER_MCR_RTS | RTSER_MCR_OUT2; ++ break; ++ } ++ rt_16550_reg_out(mode, base, MCR, ctx->mcr_status); ++ ++ rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx); ++ } ++ ++ return err; ++} ++ ++void rt_16550_cleanup_ctx(struct rt_16550_context *ctx) ++{ ++ rtdm_event_destroy(&ctx->in_event); ++ rtdm_event_destroy(&ctx->out_event); ++ rtdm_event_destroy(&ctx->ioc_event); ++ rtdm_mutex_destroy(&ctx->out_lock); ++} ++ ++int rt_16550_open(struct rtdm_fd *fd, int oflags) ++{ ++ struct rt_16550_context *ctx; ++ int dev_id = rtdm_fd_minor(fd); ++ int err; ++ uint64_t *dummy; ++ rtdm_lockctx_t lock_ctx; ++ ++ ctx = rtdm_fd_to_private(fd); ++ ++ /* IPC initialisation - cannot fail with used parameters */ ++ rtdm_lock_init(&ctx->lock); ++ rtdm_event_init(&ctx->in_event, 0); ++ rtdm_event_init(&ctx->out_event, 0); ++ rtdm_event_init(&ctx->ioc_event, 0); ++ rtdm_mutex_init(&ctx->out_lock); ++ ++ rt_16550_init_io_ctx(dev_id, ctx); ++ ++ ctx->tx_fifo = tx_fifo[dev_id]; ++ ++ ctx->in_head = 0; ++ ctx->in_tail = 0; ++ ctx->in_npend = 0; ++ ctx->in_nwait = 0; ++ ctx->in_lock = 0; ++ ctx->in_history = NULL; ++ ++ ctx->out_head = 0; ++ ctx->out_tail = 0; ++ ctx->out_npend = 0; ++ ++ ctx->ioc_events = 0; ++ ctx->ioc_event_lock = 0; ++ ctx->status = 0; ++ ctx->saved_errors = 0; ++ ++ rt_16550_set_config(ctx, &default_config, &dummy); ++ ++ err = rtdm_irq_request(&ctx->irq_handle, irq[dev_id], ++ rt_16550_interrupt, irqtype[dev_id], ++ rtdm_fd_device(fd)->name, ctx); ++ if (err) { ++ /* reset DTR and RTS */ ++ rt_16550_reg_out(rt_16550_io_mode_from_ctx(ctx), ctx->base_addr, ++ MCR, 0); ++ ++ rt_16550_cleanup_ctx(ctx); ++ ++ return err; ++ } ++ ++ rtdm_lock_get_irqsave(&ctx->lock, lock_ctx); ++ ++ /* enable interrupts */ ++ ctx->ier_status = IER_RX; ++ rt_16550_reg_out(rt_16550_io_mode_from_ctx(ctx), ctx->base_addr, IER, ++ IER_RX); ++ ++ rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx); ++ ++ return 0; ++} ++ ++void rt_16550_close(struct rtdm_fd *fd) ++{ ++ struct rt_16550_context *ctx; ++ unsigned long base; ++ int mode; ++ uint64_t *in_history; ++ rtdm_lockctx_t lock_ctx; ++ ++ ctx = rtdm_fd_to_private(fd); ++ base = ctx->base_addr; ++ mode = rt_16550_io_mode_from_ctx(ctx); ++ ++ rtdm_lock_get_irqsave(&ctx->lock, lock_ctx); ++ ++ /* reset DTR and RTS */ ++ rt_16550_reg_out(mode, base, MCR, 0); ++ ++ /* mask all UART interrupts and clear pending ones. */ ++ rt_16550_reg_out(mode, base, IER, 0); ++ rt_16550_reg_in(mode, base, IIR); ++ rt_16550_reg_in(mode, base, LSR); ++ rt_16550_reg_in(mode, base, RHR); ++ rt_16550_reg_in(mode, base, MSR); ++ ++ in_history = ctx->in_history; ++ ctx->in_history = NULL; ++ ++ rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx); ++ ++ rtdm_irq_free(&ctx->irq_handle); ++ ++ rt_16550_cleanup_ctx(ctx); ++ ++ kfree(in_history); ++} ++ ++int rt_16550_ioctl(struct rtdm_fd *fd, unsigned int request, void *arg) ++{ ++ rtdm_lockctx_t lock_ctx; ++ struct rt_16550_context *ctx; ++ int err = 0; ++ unsigned long base; ++ int mode; ++ ++ ctx = rtdm_fd_to_private(fd); ++ base = ctx->base_addr; ++ mode = rt_16550_io_mode_from_ctx(ctx); ++ ++ switch (request) { ++ case RTSER_RTIOC_GET_CONFIG: ++ if (rtdm_fd_is_user(fd)) ++ err = ++ rtdm_safe_copy_to_user(fd, arg, ++ &ctx->config, ++ sizeof(struct ++ rtser_config)); ++ else ++ memcpy(arg, &ctx->config, ++ sizeof(struct rtser_config)); ++ break; ++ ++ case RTSER_RTIOC_SET_CONFIG: { ++ struct rtser_config *config; ++ struct rtser_config config_buf; ++ uint64_t *hist_buf = NULL; ++ ++ config = (struct rtser_config *)arg; ++ ++ if (rtdm_fd_is_user(fd)) { ++ err = ++ rtdm_safe_copy_from_user(fd, &config_buf, ++ arg, ++ sizeof(struct ++ rtser_config)); ++ if (err) ++ return err; ++ ++ config = &config_buf; ++ } ++ ++ if ((config->config_mask & RTSER_SET_BAUD) && ++ (config->baud_rate > ++ baud_base[rtdm_fd_minor(fd)] || ++ config->baud_rate <= 0)) ++ /* invalid baudrate for this port */ ++ return -EINVAL; ++ ++ if (config->config_mask & RTSER_SET_TIMESTAMP_HISTORY) { ++ /* ++ * Reflect the call to non-RT as we will likely ++ * allocate or free the buffer. ++ */ ++ if (rtdm_in_rt_context()) ++ return -ENOSYS; ++ ++ if (config->timestamp_history & ++ RTSER_RX_TIMESTAMP_HISTORY) ++ hist_buf = kmalloc(IN_BUFFER_SIZE * ++ sizeof(nanosecs_abs_t), ++ GFP_KERNEL); ++ } ++ ++ rt_16550_set_config(ctx, config, &hist_buf); ++ ++ if (hist_buf) ++ kfree(hist_buf); ++ ++ break; ++ } ++ ++ case RTSER_RTIOC_GET_STATUS: { ++ int status; ++ ++ rtdm_lock_get_irqsave(&ctx->lock, lock_ctx); ++ ++ status = ctx->saved_errors | ctx->status; ++ ctx->status = 0; ++ ctx->saved_errors = 0; ++ ctx->ioc_events &= ~RTSER_EVENT_ERRPEND; ++ ++ rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx); ++ ++ if (rtdm_fd_is_user(fd)) { ++ struct rtser_status status_buf; ++ ++ status_buf.line_status = ++ rt_16550_reg_in(mode, base, LSR) | status; ++ status_buf.modem_status = ++ rt_16550_reg_in(mode, base, MSR); ++ ++ err = ++ rtdm_safe_copy_to_user(fd, arg, ++ &status_buf, ++ sizeof(struct ++ rtser_status)); ++ } else { ++ ((struct rtser_status *)arg)->line_status = ++ rt_16550_reg_in(mode, base, LSR) | status; ++ ((struct rtser_status *)arg)->modem_status = ++ rt_16550_reg_in(mode, base, MSR); ++ } ++ break; ++ } ++ ++ case RTSER_RTIOC_GET_CONTROL: ++ if (rtdm_fd_is_user(fd)) ++ err = ++ rtdm_safe_copy_to_user(fd, arg, ++ &ctx->mcr_status, ++ sizeof(int)); ++ else ++ *(int *)arg = ctx->mcr_status; ++ ++ break; ++ ++ case RTSER_RTIOC_SET_CONTROL: { ++ int new_mcr = (long)arg; ++ ++ rtdm_lock_get_irqsave(&ctx->lock, lock_ctx); ++ ctx->mcr_status = new_mcr; ++ rt_16550_reg_out(mode, base, MCR, new_mcr); ++ rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx); ++ break; ++ } ++ ++ case RTSER_RTIOC_WAIT_EVENT: { ++ struct rtser_event ev = { .rxpend_timestamp = 0 }; ++ rtdm_toseq_t timeout_seq; ++ ++ if (!rtdm_in_rt_context()) ++ return -ENOSYS; ++ ++ /* Only one waiter allowed, stop any further attempts here. */ ++ if (test_and_set_bit(0, &ctx->ioc_event_lock)) ++ return -EBUSY; ++ ++ rtdm_toseq_init(&timeout_seq, ctx->config.event_timeout); ++ ++ rtdm_lock_get_irqsave(&ctx->lock, lock_ctx); ++ ++ while (!ctx->ioc_events) { ++ /* Only enable error interrupt ++ when the user waits for it. */ ++ if (ctx->config.event_mask & RTSER_EVENT_ERRPEND) { ++ ctx->ier_status |= IER_STAT; ++ rt_16550_reg_out(mode, base, IER, ++ ctx->ier_status); ++ } ++ ++ rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx); ++ ++ err = rtdm_event_timedwait(&ctx->ioc_event, ++ ctx->config.event_timeout, ++ &timeout_seq); ++ if (err) { ++ /* Device has been closed? */ ++ if (err == -EIDRM) ++ err = -EBADF; ++ goto wait_unlock_out; ++ } ++ ++ rtdm_lock_get_irqsave(&ctx->lock, lock_ctx); ++ } ++ ++ ev.events = ctx->ioc_events; ++ ctx->ioc_events &= ++ ~(RTSER_EVENT_MODEMHI | RTSER_EVENT_MODEMLO); ++ ++ ev.last_timestamp = ctx->last_timestamp; ++ ev.rx_pending = ctx->in_npend; ++ ++ if (ctx->in_history) ++ ev.rxpend_timestamp = ctx->in_history[ctx->in_head]; ++ ++ rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx); ++ ++ if (rtdm_fd_is_user(fd)) ++ err = ++ rtdm_safe_copy_to_user(fd, arg, &ev, ++ sizeof(struct ++ rtser_event)); ++ else ++ memcpy(arg, &ev, sizeof(struct rtser_event)); ++ ++ wait_unlock_out: ++ /* release the simple event waiter lock */ ++ clear_bit(0, &ctx->ioc_event_lock); ++ break; ++ } ++ ++ case RTSER_RTIOC_BREAK_CTL: { ++ int lcr = ((long)arg & RTSER_BREAK_SET) << 6; ++ ++ rtdm_lock_get_irqsave(&ctx->lock, lock_ctx); ++ ++ lcr |= ++ (ctx->config.parity << 3) | (ctx->config.stop_bits << 2) | ++ ctx->config.data_bits; ++ ++ rt_16550_reg_out(mode, base, LCR, lcr); ++ ++ rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx); ++ break; ++ } ++ ++ case RTIOC_PURGE: { ++ int fcr = 0; ++ ++ rtdm_lock_get_irqsave(&ctx->lock, lock_ctx); ++ if ((long)arg & RTDM_PURGE_RX_BUFFER) { ++ ctx->in_head = 0; ++ ctx->in_tail = 0; ++ ctx->in_npend = 0; ++ ctx->status = 0; ++ fcr |= FCR_FIFO | FCR_RESET_RX; ++ rt_16550_reg_in(mode, base, RHR); ++ } ++ if ((long)arg & RTDM_PURGE_TX_BUFFER) { ++ ctx->out_head = 0; ++ ctx->out_tail = 0; ++ ctx->out_npend = 0; ++ fcr |= FCR_FIFO | FCR_RESET_TX; ++ } ++ if (fcr) { ++ rt_16550_reg_out(mode, base, FCR, fcr); ++ rt_16550_reg_out(mode, base, FCR, ++ FCR_FIFO | ctx->config.fifo_depth); ++ } ++ rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx); ++ break; ++ } ++ ++ default: ++ err = -ENOTTY; ++ } ++ ++ return err; ++} ++ ++ssize_t rt_16550_read(struct rtdm_fd *fd, void *buf, size_t nbyte) ++{ ++ struct rt_16550_context *ctx; ++ rtdm_lockctx_t lock_ctx; ++ size_t read = 0; ++ int pending; ++ int block; ++ int subblock; ++ int in_pos; ++ char *out_pos = (char *)buf; ++ rtdm_toseq_t timeout_seq; ++ ssize_t ret = -EAGAIN; /* for non-blocking read */ ++ int nonblocking; ++ ++ if (nbyte == 0) ++ return 0; ++ ++ if (rtdm_fd_is_user(fd) && !rtdm_rw_user_ok(fd, buf, nbyte)) ++ return -EFAULT; ++ ++ ctx = rtdm_fd_to_private(fd); ++ ++ rtdm_toseq_init(&timeout_seq, ctx->config.rx_timeout); ++ ++ /* non-blocking is handled separately here */ ++ nonblocking = (ctx->config.rx_timeout < 0); ++ ++ /* only one reader allowed, stop any further attempts here */ ++ if (test_and_set_bit(0, &ctx->in_lock)) ++ return -EBUSY; ++ ++ rtdm_lock_get_irqsave(&ctx->lock, lock_ctx); ++ ++ while (1) { ++ /* switch on error interrupt - the user is ready to listen */ ++ if ((ctx->ier_status & IER_STAT) == 0) { ++ ctx->ier_status |= IER_STAT; ++ rt_16550_reg_out(rt_16550_io_mode_from_ctx(ctx), ++ ctx->base_addr, IER, ++ ctx->ier_status); ++ } ++ ++ if (ctx->status) { ++ if (ctx->status & RTSER_LSR_BREAK_IND) ++ ret = -EPIPE; ++ else ++ ret = -EIO; ++ ctx->saved_errors = ctx->status & ++ (RTSER_LSR_OVERRUN_ERR | RTSER_LSR_PARITY_ERR | ++ RTSER_LSR_FRAMING_ERR | RTSER_SOFT_OVERRUN_ERR); ++ ctx->status = 0; ++ break; ++ } ++ ++ pending = ctx->in_npend; ++ ++ if (pending > 0) { ++ block = subblock = (pending <= nbyte) ? pending : nbyte; ++ in_pos = ctx->in_head; ++ ++ rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx); ++ ++ /* Do we have to wrap around the buffer end? */ ++ if (in_pos + subblock > IN_BUFFER_SIZE) { ++ /* Treat the block between head and buffer end ++ separately. */ ++ subblock = IN_BUFFER_SIZE - in_pos; ++ ++ if (rtdm_fd_is_user(fd)) { ++ if (rtdm_copy_to_user ++ (fd, out_pos, ++ &ctx->in_buf[in_pos], ++ subblock) != 0) { ++ ret = -EFAULT; ++ goto break_unlocked; ++ } ++ } else ++ memcpy(out_pos, &ctx->in_buf[in_pos], ++ subblock); ++ ++ read += subblock; ++ out_pos += subblock; ++ ++ subblock = block - subblock; ++ in_pos = 0; ++ } ++ ++ if (rtdm_fd_is_user(fd)) { ++ if (rtdm_copy_to_user(fd, out_pos, ++ &ctx->in_buf[in_pos], ++ subblock) != 0) { ++ ret = -EFAULT; ++ goto break_unlocked; ++ } ++ } else ++ memcpy(out_pos, &ctx->in_buf[in_pos], subblock); ++ ++ read += subblock; ++ out_pos += subblock; ++ nbyte -= block; ++ ++ rtdm_lock_get_irqsave(&ctx->lock, lock_ctx); ++ ++ ctx->in_head = ++ (ctx->in_head + block) & (IN_BUFFER_SIZE - 1); ++ if ((ctx->in_npend -= block) == 0) ++ ctx->ioc_events &= ~RTSER_EVENT_RXPEND; ++ ++ if (nbyte == 0) ++ break; /* All requested bytes read. */ ++ ++ continue; ++ } ++ ++ if (nonblocking) ++ /* ret was set to EAGAIN in case of a real ++ non-blocking call or contains the error ++ returned by rtdm_event_wait[_until] */ ++ break; ++ ++ ctx->in_nwait = nbyte; ++ ++ rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx); ++ ++ ret = rtdm_event_timedwait(&ctx->in_event, ++ ctx->config.rx_timeout, ++ &timeout_seq); ++ if (ret < 0) { ++ if (ret == -EIDRM) { ++ /* Device has been closed - ++ return immediately. */ ++ return -EBADF; ++ } ++ ++ rtdm_lock_get_irqsave(&ctx->lock, lock_ctx); ++ ++ nonblocking = 1; ++ if (ctx->in_npend > 0) { ++ /* Final turn: collect pending bytes ++ before exit. */ ++ continue; ++ } ++ ++ ctx->in_nwait = 0; ++ break; ++ } ++ ++ rtdm_lock_get_irqsave(&ctx->lock, lock_ctx); ++ } ++ ++ rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx); ++ ++break_unlocked: ++ /* Release the simple reader lock, */ ++ clear_bit(0, &ctx->in_lock); ++ ++ if ((read > 0) && ((ret == 0) || (ret == -EAGAIN) || ++ (ret == -ETIMEDOUT) || (ret == -EINTR))) ++ ret = read; ++ ++ return ret; ++} ++ ++ssize_t rt_16550_write(struct rtdm_fd *fd, const void *buf, size_t nbyte) ++{ ++ struct rt_16550_context *ctx; ++ rtdm_lockctx_t lock_ctx; ++ size_t written = 0; ++ int free; ++ int block; ++ int subblock; ++ int out_pos; ++ int lsr; ++ char *in_pos = (char *)buf; ++ rtdm_toseq_t timeout_seq; ++ ssize_t ret; ++ ++ if (nbyte == 0) ++ return 0; ++ ++ if (rtdm_fd_is_user(fd) && !rtdm_read_user_ok(fd, buf, nbyte)) ++ return -EFAULT; ++ ++ ctx = rtdm_fd_to_private(fd); ++ ++ rtdm_toseq_init(&timeout_seq, ctx->config.tx_timeout); ++ ++ /* Make write operation atomic. */ ++ ret = rtdm_mutex_timedlock(&ctx->out_lock, ctx->config.tx_timeout, ++ &timeout_seq); ++ if (ret) ++ return ret; ++ ++ while (nbyte > 0) { ++ rtdm_lock_get_irqsave(&ctx->lock, lock_ctx); ++ ++ free = OUT_BUFFER_SIZE - ctx->out_npend; ++ ++ if (free > 0) { ++ block = subblock = (nbyte <= free) ? nbyte : free; ++ out_pos = ctx->out_tail; ++ ++ rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx); ++ ++ /* Do we have to wrap around the buffer end? */ ++ if (out_pos + subblock > OUT_BUFFER_SIZE) { ++ /* Treat the block between head and buffer ++ end separately. */ ++ subblock = OUT_BUFFER_SIZE - out_pos; ++ ++ if (rtdm_fd_is_user(fd)) { ++ if (rtdm_copy_from_user ++ (fd, ++ &ctx->out_buf[out_pos], ++ in_pos, subblock) != 0) { ++ ret = -EFAULT; ++ break; ++ } ++ } else ++ memcpy(&ctx->out_buf[out_pos], in_pos, ++ subblock); ++ ++ written += subblock; ++ in_pos += subblock; ++ ++ subblock = block - subblock; ++ out_pos = 0; ++ } ++ ++ if (rtdm_fd_is_user(fd)) { ++ if (rtdm_copy_from_user ++ (fd, &ctx->out_buf[out_pos], ++ in_pos, subblock) != 0) { ++ ret = -EFAULT; ++ break; ++ } ++ } else ++ memcpy(&ctx->out_buf[out_pos], in_pos, block); ++ ++ written += subblock; ++ in_pos += subblock; ++ nbyte -= block; ++ ++ rtdm_lock_get_irqsave(&ctx->lock, lock_ctx); ++ ++ ctx->out_tail = ++ (ctx->out_tail + block) & (OUT_BUFFER_SIZE - 1); ++ ctx->out_npend += block; ++ ++ lsr = rt_16550_reg_in(rt_16550_io_mode_from_ctx(ctx), ++ ctx->base_addr, LSR); ++ if (lsr & RTSER_LSR_THR_EMTPY) ++ rt_16550_tx_fill(ctx); ++ ++ if (ctx->out_npend > 0 && !(ctx->ier_status & IER_TX)) { ++ /* unmask tx interrupt */ ++ ctx->ier_status |= IER_TX; ++ rt_16550_reg_out(rt_16550_io_mode_from_ctx(ctx), ++ ctx->base_addr, IER, ++ ctx->ier_status); ++ } ++ ++ rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx); ++ continue; ++ } ++ ++ rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx); ++ ++ ret = ++ rtdm_event_timedwait(&ctx->out_event, ++ ctx->config.tx_timeout, ++ &timeout_seq); ++ if (ret < 0) { ++ if (ret == -EIDRM) { ++ /* Device has been closed - ++ return immediately. */ ++ return -EBADF; ++ } ++ if (ret == -EWOULDBLOCK) { ++ /* Fix error code for non-blocking mode. */ ++ ret = -EAGAIN; ++ } ++ break; ++ } ++ } ++ ++ rtdm_mutex_unlock(&ctx->out_lock); ++ ++ if ((written > 0) && ((ret == 0) || (ret == -EAGAIN) || ++ (ret == -ETIMEDOUT) || (ret == -EINTR))) ++ ret = written; ++ ++ return ret; ++} ++ ++static struct rtdm_driver uart16550A_driver = { ++ .profile_info = RTDM_PROFILE_INFO(uart16550A, ++ RTDM_CLASS_SERIAL, ++ RTDM_SUBCLASS_16550A, ++ RTSER_PROFILE_VER), ++ .device_flags = RTDM_NAMED_DEVICE | RTDM_EXCLUSIVE, ++ .device_count = MAX_DEVICES, ++ .context_size = sizeof(struct rt_16550_context), ++ .ops = { ++ .open = rt_16550_open, ++ .close = rt_16550_close, ++ .ioctl_rt = rt_16550_ioctl, ++ .ioctl_nrt = rt_16550_ioctl, ++ .read_rt = rt_16550_read, ++ .write_rt = rt_16550_write, ++ }, ++}; ++ ++void rt_16550_exit(void); ++ ++int __init rt_16550_init(void) ++{ ++ struct rtdm_device *dev; ++ unsigned long base; ++ char *name; ++ int mode; ++ int err; ++ int i; ++ ++ if (!rtdm_available()) ++ return -ENODEV; ++ ++ rt_16550_pnp_init(); ++ rt_16550_pci_init(); ++ ++ for (i = 0; i < MAX_DEVICES; i++) { ++ if (!rt_16550_addr_param(i)) ++ continue; ++ ++ err = -EINVAL; ++ if (!irq[i] || !rt_16550_addr_param_valid(i)) ++ goto cleanup_out; ++ ++ dev = kmalloc(sizeof(struct rtdm_device) + ++ RTDM_MAX_DEVNAME_LEN, GFP_KERNEL); ++ err = -ENOMEM; ++ if (!dev) ++ goto cleanup_out; ++ ++ dev->driver = &uart16550A_driver; ++ dev->label = "rtser%d"; ++ name = (char *)(dev + 1); ++ ksformat(name, RTDM_MAX_DEVNAME_LEN, dev->label, i); ++ ++ err = rt_16550_init_io(i, name); ++ if (err) ++ goto kfree_out; ++ ++ if (baud_base[i] == 0) ++ baud_base[i] = DEFAULT_BAUD_BASE; ++ ++ if (tx_fifo[i] == 0) ++ tx_fifo[i] = DEFAULT_TX_FIFO; ++ ++ /* Mask all UART interrupts and clear pending ones. */ ++ base = rt_16550_base_addr(i); ++ mode = rt_16550_io_mode(i); ++ rt_16550_reg_out(mode, base, IER, 0); ++ rt_16550_reg_in(mode, base, IIR); ++ rt_16550_reg_in(mode, base, LSR); ++ rt_16550_reg_in(mode, base, RHR); ++ rt_16550_reg_in(mode, base, MSR); ++ ++ err = rtdm_dev_register(dev); ++ ++ if (err) ++ goto release_io_out; ++ ++ device[i] = dev; ++ } ++ ++ return 0; ++ ++ release_io_out: ++ rt_16550_release_io(i); ++ ++ kfree_out: ++ kfree(dev); ++ ++ cleanup_out: ++ rt_16550_exit(); ++ ++ return err; ++} ++ ++void rt_16550_exit(void) ++{ ++ int i; ++ ++ for (i = 0; i < MAX_DEVICES; i++) ++ if (device[i]) { ++ rtdm_dev_unregister(device[i]); ++ rt_16550_release_io(i); ++ kfree(device[i]); ++ } ++ ++ rt_16550_pci_cleanup(); ++ rt_16550_pnp_cleanup(); ++} ++ ++module_init(rt_16550_init); ++module_exit(rt_16550_exit); +--- linux/drivers/xenomai/serial/Makefile 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/serial/Makefile 2021-04-07 16:01:26.519635178 +0800 +@@ -0,0 +1,8 @@ ++ ++obj-$(CONFIG_XENO_DRIVERS_16550A) += xeno_16550A.o ++obj-$(CONFIG_XENO_DRIVERS_MPC52XX_UART) += xeno_mpc52xx_uart.o ++obj-$(CONFIG_XENO_DRIVERS_IMX_UART) += xeno_imx_uart.o ++ ++xeno_16550A-y := 16550A.o ++xeno_mpc52xx_uart-y := mpc52xx_uart.o ++xeno_imx_uart-y := rt_imx_uart.o +--- linux/drivers/xenomai/serial/Kconfig 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/serial/Kconfig 2021-04-07 16:01:26.514635185 +0800 +@@ -0,0 +1,79 @@ ++menu "Serial drivers" ++ ++config XENO_DRIVERS_16550A ++ tristate "16550A UART driver" ++ help ++ Real-time UART driver for 16550A compatible controllers. See ++ doc/txt/16550A-driver.txt for more details. ++ ++choice ++ prompt "Hardware access mode" ++ depends on XENO_DRIVERS_16550A ++ default XENO_DRIVERS_16550A_PIO ++ ++config XENO_DRIVERS_16550A_PIO ++ bool "Port-based I/O" ++ help ++ Hardware access only via I/O ports. Use module parameter ++ "io=[,[,...]]" to specify the base port of a device. ++ ++config XENO_DRIVERS_16550A_MMIO ++ bool "Memory-mapped I/O" ++ help ++ Hardware access only via memory mapping. Use module paramter ++ "mem=[,[,...]]" to specify the physical base address of ++ a device. ++ ++config XENO_DRIVERS_16550A_ANY ++ bool "Any access mode" ++ help ++ Decide at module load-time (or via kernel parameter) which access ++ mode to use for which device. This mode is useful when devices of ++ both types can be present in a system, also at the same time. ++ ++ Both "io" and "mem" module parameters are available, but always only ++ one of them can be applied on a particular device. Use, e.g., ++ "io=0x3f8,0 mem=0,0xe0000000" to address device 1 via IO base port ++ 0x3f8 and device 2 via physical base address 0xe0000000. ++ ++endchoice ++ ++config XENO_DRIVERS_16550A_PCI ++ depends on PCI && (XENO_DRIVERS_16550A_PIO || XENO_DRIVERS_16550A_ANY) ++ bool "PCI board support" ++ default n ++ help ++ ++ This option activates support for PCI serial boards. ++ ++config XENO_DRIVERS_16550A_PCI_MOXA ++ depends on XENO_DRIVERS_16550A_PCI ++ bool "Moxa PCI boards" ++ default n ++ help ++ ++ This option activates support for the following Moxa boards: ++ PCI Serial Boards: ++ C104H/PCI, C168H/PCI ++ CP-114, CP-132 ++ Universal PCI Serial Boards: ++ CP-102U, CP-102UL, CP-104U ++ CP-112UL, CP-114UL, CP-118U ++ CP-132U, CP-134U, CP-138U ++ CP-168U ++ ++config XENO_DRIVERS_MPC52XX_UART ++ depends on PPC_MPC52xx ++ tristate "MPC52xx PSC UART driver" ++ help ++ Real-time UART driver for the PSC on the MPC5200 processor. ++ ++config XENO_DRIVERS_IMX_UART ++ depends on ARCH_IMX || ARCH_MXC ++ tristate "RT IMX UART driver" ++ select RATIONAL ++ help ++ Real-time UART driver for the Freescale Semiconductor MXC Internal ++ UART compatible controllers. ++ ++endmenu +--- linux/drivers/xenomai/serial/16550A_pci.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/serial/16550A_pci.h 2021-04-07 16:01:26.510635191 +0800 +@@ -0,0 +1,286 @@ ++/* ++ * Copyright (C) 2006-2007 Jan Kiszka . ++ * Copyright (C) 2011 Stefan Kisdaroczi . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#if defined(CONFIG_XENO_DRIVERS_16550A_PCI) ++ ++#include ++ ++struct rt_16550_pci_board { ++ char *name; ++ resource_size_t resource_base_addr; ++ unsigned int nports; ++ unsigned int port_ofs; ++ unsigned long irqtype; ++ unsigned int baud_base; ++ int tx_fifo; ++}; ++ ++#if defined(CONFIG_XENO_DRIVERS_16550A_PCI_MOXA) ++ ++#define PCI_DEVICE_ID_CP112UL 0x1120 ++#define PCI_DEVICE_ID_CP114UL 0x1143 ++#define PCI_DEVICE_ID_CP138U 0x1380 ++ ++static const struct rt_16550_pci_board rt_16550_moxa_c104 = { ++ .name = "Moxa C104H/PCI", ++ .resource_base_addr = 2, ++ .nports = 4, ++ .port_ofs = 8, ++ .baud_base = 921600, ++ .tx_fifo = 16, ++ .irqtype = RTDM_IRQTYPE_SHARED, ++}; ++ ++static const struct rt_16550_pci_board rt_16550_moxa_c168 = { ++ .name = "Moxa C168H/PCI", ++ .resource_base_addr = 2, ++ .nports = 8, ++ .port_ofs = 8, ++ .baud_base = 921600, ++ .tx_fifo = 16, ++ .irqtype = RTDM_IRQTYPE_SHARED, ++}; ++ ++static const struct rt_16550_pci_board rt_16550_moxa_cp114 = { ++ .name = "Moxa CP-114", ++ .resource_base_addr = 2, ++ .nports = 4, ++ .port_ofs = 8, ++ .baud_base = 921600, ++ .tx_fifo = 16, ++ .irqtype = RTDM_IRQTYPE_SHARED, ++}; ++ ++static const struct rt_16550_pci_board rt_16550_moxa_cp132 = { ++ .name = "Moxa CP-132", ++ .resource_base_addr = 2, ++ .nports = 2, ++ .port_ofs = 8, ++ .baud_base = 921600, ++ .tx_fifo = 16, ++ .irqtype = RTDM_IRQTYPE_SHARED, ++}; ++ ++static const struct rt_16550_pci_board rt_16550_moxa_cp102u = { ++ .name = "Moxa CP-102U", ++ .resource_base_addr = 2, ++ .nports = 2, ++ .port_ofs = 8, ++ .baud_base = 921600, ++ .tx_fifo = 16, ++ .irqtype = RTDM_IRQTYPE_SHARED, ++}; ++ ++static const struct rt_16550_pci_board rt_16550_moxa_cp102ul = { ++ .name = "Moxa CP-102UL", ++ .resource_base_addr = 2, ++ .nports = 2, ++ .port_ofs = 8, ++ .baud_base = 921600, ++ .tx_fifo = 16, ++ .irqtype = RTDM_IRQTYPE_SHARED, ++}; ++ ++static const struct rt_16550_pci_board rt_16550_moxa_cp104u = { ++ .name = "Moxa CP-104U", ++ .resource_base_addr = 2, ++ .nports = 4, ++ .port_ofs = 8, ++ .baud_base = 921600, ++ .tx_fifo = 16, ++ .irqtype = RTDM_IRQTYPE_SHARED, ++}; ++ ++static const struct rt_16550_pci_board rt_16550_moxa_cp112ul = { ++ .name = "Moxa CP-112UL", ++ .resource_base_addr = 2, ++ .nports = 2, ++ .port_ofs = 8, ++ .baud_base = 921600, ++ .tx_fifo = 16, ++ .irqtype = RTDM_IRQTYPE_SHARED, ++}; ++ ++static const struct rt_16550_pci_board rt_16550_moxa_cp114ul = { ++ .name = "Moxa CP-114UL", ++ .resource_base_addr = 2, ++ .nports = 4, ++ .port_ofs = 8, ++ .baud_base = 921600, ++ .tx_fifo = 16, ++ .irqtype = RTDM_IRQTYPE_SHARED, ++}; ++ ++static const struct rt_16550_pci_board rt_16550_moxa_cp118u = { ++ .name = "Moxa CP-118U", ++ .resource_base_addr = 2, ++ .nports = 8, ++ .port_ofs = 8, ++ .baud_base = 921600, ++ .tx_fifo = 16, ++ .irqtype = RTDM_IRQTYPE_SHARED, ++}; ++ ++static const struct rt_16550_pci_board rt_16550_moxa_cp132u = { ++ .name = "Moxa CP-132U", ++ .resource_base_addr = 2, ++ .nports = 2, ++ .port_ofs = 8, ++ .baud_base = 921600, ++ .tx_fifo = 16, ++ .irqtype = RTDM_IRQTYPE_SHARED, ++}; ++ ++static const struct rt_16550_pci_board rt_16550_moxa_cp134u = { ++ .name = "Moxa CP-134U", ++ .resource_base_addr = 2, ++ .nports = 4, ++ .port_ofs = 8, ++ .baud_base = 921600, ++ .tx_fifo = 16, ++ .irqtype = RTDM_IRQTYPE_SHARED, ++}; ++ ++static const struct rt_16550_pci_board rt_16550_moxa_cp138u = { ++ .name = "Moxa CP-138U", ++ .resource_base_addr = 2, ++ .nports = 8, ++ .port_ofs = 8, ++ .baud_base = 921600, ++ .tx_fifo = 16, ++ .irqtype = RTDM_IRQTYPE_SHARED, ++}; ++ ++static const struct rt_16550_pci_board rt_16550_moxa_cp168u = { ++ .name = "Moxa CP-168U", ++ .resource_base_addr = 2, ++ .nports = 8, ++ .port_ofs = 8, ++ .baud_base = 921600, ++ .tx_fifo = 16, ++ .irqtype = RTDM_IRQTYPE_SHARED, ++}; ++#endif ++ ++const struct pci_device_id rt_16550_pci_table[] = { ++#if defined(CONFIG_XENO_DRIVERS_16550A_PCI_MOXA) ++ {PCI_VDEVICE(MOXA, PCI_DEVICE_ID_MOXA_C104), ++ .driver_data = (unsigned long)&rt_16550_moxa_c104}, ++ {PCI_VDEVICE(MOXA, PCI_DEVICE_ID_MOXA_C168), ++ .driver_data = (unsigned long)&rt_16550_moxa_c168}, ++ {PCI_VDEVICE(MOXA, PCI_DEVICE_ID_MOXA_CP114), ++ .driver_data = (unsigned long)&rt_16550_moxa_cp114}, ++ {PCI_VDEVICE(MOXA, PCI_DEVICE_ID_MOXA_CP132), ++ .driver_data = (unsigned long)&rt_16550_moxa_cp132}, ++ {PCI_VDEVICE(MOXA, PCI_DEVICE_ID_MOXA_CP102U), ++ .driver_data = (unsigned long)&rt_16550_moxa_cp102u}, ++ {PCI_VDEVICE(MOXA, PCI_DEVICE_ID_MOXA_CP102UL), ++ .driver_data = (unsigned long)&rt_16550_moxa_cp102ul}, ++ {PCI_VDEVICE(MOXA, PCI_DEVICE_ID_MOXA_CP104U), ++ .driver_data = (unsigned long)&rt_16550_moxa_cp104u}, ++ {PCI_VDEVICE(MOXA, PCI_DEVICE_ID_CP112UL), ++ .driver_data = (unsigned long)&rt_16550_moxa_cp112ul}, ++ {PCI_VDEVICE(MOXA, PCI_DEVICE_ID_CP114UL), ++ .driver_data = (unsigned long)&rt_16550_moxa_cp114ul}, ++ {PCI_VDEVICE(MOXA, PCI_DEVICE_ID_MOXA_CP118U), ++ .driver_data = (unsigned long)&rt_16550_moxa_cp118u}, ++ {PCI_VDEVICE(MOXA, PCI_DEVICE_ID_MOXA_CP132U), ++ .driver_data = (unsigned long)&rt_16550_moxa_cp132u}, ++ {PCI_VDEVICE(MOXA, PCI_DEVICE_ID_MOXA_CP134U), ++ .driver_data = (unsigned long)&rt_16550_moxa_cp134u}, ++ {PCI_VDEVICE(MOXA, PCI_DEVICE_ID_CP138U), ++ .driver_data = (unsigned long)&rt_16550_moxa_cp138u}, ++ {PCI_VDEVICE(MOXA, PCI_DEVICE_ID_MOXA_CP168U), ++ .driver_data = (unsigned long)&rt_16550_moxa_cp168u}, ++#endif ++ { } ++}; ++ ++static int rt_16550_pci_probe(struct pci_dev *pdev, ++ const struct pci_device_id *ent) ++{ ++ struct rt_16550_pci_board *board; ++ int err; ++ int i; ++ int port = 0; ++ int base_addr; ++ int max_devices = 0; ++ ++ if (!ent->driver_data) ++ return -ENODEV; ++ ++ board = (struct rt_16550_pci_board *)ent->driver_data; ++ ++ for (i = 0; i < MAX_DEVICES; i++) ++ if (!rt_16550_addr_param(i)) ++ max_devices++; ++ ++ if (board->nports > max_devices) ++ return -ENODEV; ++ ++ if ((err = pci_enable_device(pdev))) ++ return err; ++ ++ base_addr = pci_resource_start(pdev, board->resource_base_addr); ++ ++ for (i = 0; i < MAX_DEVICES; i++) { ++ if ((port < board->nports) && (!rt_16550_addr_param(i))) { ++ io[i] = base_addr + port * board->port_ofs; ++ irq[i] = pdev->irq; ++ irqtype[i] = board->irqtype; ++ baud_base[i] = board->baud_base; ++ tx_fifo[i] = board->tx_fifo; ++ port++; ++ } ++ } ++ ++ return 0; ++} ++ ++static void rt_16550_pci_remove(struct pci_dev *pdev) { ++ pci_disable_device( pdev ); ++}; ++ ++static struct pci_driver rt_16550_pci_driver = { ++ .name = RT_16550_DRIVER_NAME, ++ .id_table = rt_16550_pci_table, ++ .probe = rt_16550_pci_probe, ++ .remove = rt_16550_pci_remove ++}; ++ ++static int pci_registered; ++ ++static inline void rt_16550_pci_init(void) ++{ ++ if (pci_register_driver(&rt_16550_pci_driver) == 0) ++ pci_registered = 1; ++} ++ ++static inline void rt_16550_pci_cleanup(void) ++{ ++ if (pci_registered) ++ pci_unregister_driver(&rt_16550_pci_driver); ++} ++ ++#else /* Linux < 2.6.0 || !CONFIG_PCI || !(..._16550A_PCI */ ++ ++#define rt_16550_pci_init() do { } while (0) ++#define rt_16550_pci_cleanup() do { } while (0) ++ ++#endif /* Linux < 2.6.0 || !CONFIG_PCI || !(..._16550A_PCI */ +--- linux/drivers/xenomai/serial/mpc52xx_uart.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/serial/mpc52xx_uart.c 2021-04-07 16:01:26.505635198 +0800 +@@ -0,0 +1,1438 @@ ++/* ++ * Copyright (C) 2011 Wolfgang Grandegger . ++ * Copyright (C) 2005-2007 Jan Kiszka . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++ ++#include ++#include ++ ++MODULE_DESCRIPTION("RTDM-based driver for MPC52xx UARTs"); ++MODULE_AUTHOR("Wolfgang Grandegger "); ++MODULE_VERSION("1.0.0"); ++MODULE_LICENSE("GPL"); ++ ++#define RT_MPC52XX_UART_DRVNAM "xeno_mpc52xx_uart" ++ ++#define IN_BUFFER_SIZE 512 ++#define OUT_BUFFER_SIZE 512 ++ ++#define PARITY_MASK 0x03 ++#define DATA_BITS_MASK 0x03 ++#define STOP_BITS_MASK 0x01 ++#define FIFO_MASK 0xC0 ++#define EVENT_MASK 0x0F ++ ++ ++struct rt_mpc52xx_uart_port { ++ const struct device *dev; ++ struct mpc52xx_psc __iomem *psc; ++ struct mpc52xx_psc_fifo __iomem *fifo; ++ unsigned int uartclk; ++ int irq; ++ int num; ++}; ++ ++struct rt_mpc52xx_uart_ctx { ++ struct rtser_config config; /* current device configuration */ ++ ++ rtdm_irq_t irq_handle; /* device IRQ handle */ ++ rtdm_lock_t lock; /* lock to protect context struct */ ++ ++ int in_head; /* RX ring buffer, head pointer */ ++ int in_tail; /* RX ring buffer, tail pointer */ ++ size_t in_npend; /* pending bytes in RX ring */ ++ int in_nwait; /* bytes the user waits for */ ++ rtdm_event_t in_event; /* raised to unblock reader */ ++ char in_buf[IN_BUFFER_SIZE]; /* RX ring buffer */ ++ volatile unsigned long in_lock; /* single-reader lock */ ++ uint64_t *in_history; /* RX timestamp buffer */ ++ ++ int out_head; /* TX ring buffer, head pointer */ ++ int out_tail; /* TX ring buffer, tail pointer */ ++ size_t out_npend; /* pending bytes in TX ring */ ++ rtdm_event_t out_event; /* raised to unblock writer */ ++ char out_buf[OUT_BUFFER_SIZE]; /* TX ring buffer */ ++ rtdm_mutex_t out_lock; /* single-writer mutex */ ++ ++ uint64_t last_timestamp; /* timestamp of last event */ ++ int ioc_events; /* recorded events */ ++ rtdm_event_t ioc_event; /* raised to unblock event waiter */ ++ volatile unsigned long ioc_event_lock; /* single-waiter lock */ ++ ++ ++ int mcr_status; /* emulated MCR cache */ ++ int status; /* cache for LSR + soft-states */ ++ int saved_errors; /* error cache for RTIOC_GET_STATUS */ ++ ++ unsigned int imr_status; /* interrupt mask register cache */ ++ int tx_empty; /* shift register empty flag */ ++ ++ struct rt_mpc52xx_uart_port *port; /* Port related data */ ++}; ++ ++static const struct rtser_config default_config = { ++ .config_mask = 0xFFFF, ++ .baud_rate = RTSER_DEF_BAUD, ++ .parity = RTSER_DEF_PARITY, ++ .data_bits = RTSER_DEF_BITS, ++ .stop_bits = RTSER_DEF_STOPB, ++ .handshake = RTSER_DEF_HAND, ++ .fifo_depth = RTSER_DEF_FIFO_DEPTH, ++ .rx_timeout = RTSER_DEF_TIMEOUT, ++ .tx_timeout = RTSER_DEF_TIMEOUT, ++ .event_timeout = RTSER_DEF_TIMEOUT, ++ .timestamp_history = RTSER_DEF_TIMESTAMP_HISTORY, ++ .event_mask = RTSER_DEF_EVENT_MASK, ++ .rs485 = RTSER_DEF_RS485, ++}; ++ ++/* lookup table for matching device nodes to index numbers */ ++static struct device_node *rt_mpc52xx_uart_nodes[MPC52xx_PSC_MAXNUM]; ++ ++static inline void psc_fifo_init(struct rt_mpc52xx_uart_ctx *ctx) ++{ ++ out_8(&ctx->port->fifo->rfcntl, 0x00); ++ out_be16(&ctx->port->fifo->rfalarm, 0x1ff); ++ out_8(&ctx->port->fifo->tfcntl, 0x07); ++ out_be16(&ctx->port->fifo->tfalarm, 0x80); ++} ++ ++static inline int psc_raw_rx_rdy(struct rt_mpc52xx_uart_ctx *ctx) ++{ ++ return in_be16(&ctx->port->psc->mpc52xx_psc_status) & ++ MPC52xx_PSC_SR_RXRDY; ++} ++ ++static inline int psc_raw_tx_rdy(struct rt_mpc52xx_uart_ctx *ctx) ++{ ++ return in_be16(&ctx->port->psc->mpc52xx_psc_status) & ++ MPC52xx_PSC_SR_TXRDY; ++} ++ ++static inline int psc_rx_rdy(struct rt_mpc52xx_uart_ctx *ctx) ++{ ++ return in_be16(&ctx->port->psc->mpc52xx_psc_isr) & ++ ctx->imr_status & MPC52xx_PSC_IMR_RXRDY; ++} ++ ++static int psc_tx_rdy(struct rt_mpc52xx_uart_ctx *ctx) ++{ ++ return in_be16(&ctx->port->psc->mpc52xx_psc_isr) & ++ ctx->imr_status & MPC52xx_PSC_IMR_TXRDY; ++} ++ ++static inline int psc_tx_empty(struct rt_mpc52xx_uart_ctx *ctx) ++{ ++ return in_be16(&ctx->port->psc->mpc52xx_psc_status) & ++ MPC52xx_PSC_SR_TXEMP; ++} ++ ++static inline void psc_start_tx(struct rt_mpc52xx_uart_ctx *ctx) ++{ ++ ctx->imr_status |= MPC52xx_PSC_IMR_TXRDY; ++ out_be16(&ctx->port->psc->mpc52xx_psc_imr, ctx->imr_status); ++} ++ ++static inline void psc_stop_tx(struct rt_mpc52xx_uart_ctx *ctx) ++{ ++ ctx->imr_status &= ~MPC52xx_PSC_IMR_TXRDY; ++ out_be16(&ctx->port->psc->mpc52xx_psc_imr, ctx->imr_status); ++} ++ ++static inline void psc_stop_rx(struct rt_mpc52xx_uart_ctx *ctx) ++{ ++ ctx->imr_status &= ~MPC52xx_PSC_IMR_RXRDY; ++ out_be16(&ctx->port->psc->mpc52xx_psc_imr, ctx->imr_status); ++} ++ ++static inline void psc_write_char(struct rt_mpc52xx_uart_ctx *ctx, ++ unsigned char c) ++{ ++ out_8(&ctx->port->psc->mpc52xx_psc_buffer_8, c); ++} ++ ++static inline unsigned char psc_read_char(struct rt_mpc52xx_uart_ctx *ctx) ++{ ++ return in_8(&ctx->port->psc->mpc52xx_psc_buffer_8); ++} ++ ++static inline void psc_disable_ints(struct rt_mpc52xx_uart_ctx *ctx) ++{ ++ ctx->imr_status = 0; ++ out_be16(&ctx->port->psc->mpc52xx_psc_imr, ctx->imr_status); ++} ++ ++static void psc_set_mcr(struct rt_mpc52xx_uart_ctx *ctx, ++ unsigned int mcr) ++{ ++ if (mcr & RTSER_MCR_RTS) ++ out_8(&ctx->port->psc->op1, MPC52xx_PSC_OP_RTS); ++ else ++ out_8(&ctx->port->psc->op0, MPC52xx_PSC_OP_RTS); ++} ++ ++/* FIXME: status interrupts not yet handled properly */ ++static unsigned int psc_get_msr(struct rt_mpc52xx_uart_ctx *ctx) ++{ ++ unsigned int msr = RTSER_MSR_DSR; ++ u8 status = in_8(&ctx->port->psc->mpc52xx_psc_ipcr); ++ ++ if (!(status & MPC52xx_PSC_CTS)) ++ msr |= RTSER_MSR_CTS; ++ if (!(status & MPC52xx_PSC_DCD)) ++ msr |= RTSER_MSR_DCD; ++ ++ return msr; ++} ++ ++static void psc_enable_ms(struct rt_mpc52xx_uart_ctx *ctx) ++{ ++ struct mpc52xx_psc *psc = ctx->port->psc; ++ ++ /* clear D_*-bits by reading them */ ++ in_8(&psc->mpc52xx_psc_ipcr); ++ /* enable CTS and DCD as IPC interrupts */ ++ out_8(&psc->mpc52xx_psc_acr, MPC52xx_PSC_IEC_CTS | MPC52xx_PSC_IEC_DCD); ++ ++ ctx->imr_status |= MPC52xx_PSC_IMR_IPC; ++ out_be16(&psc->mpc52xx_psc_imr, ctx->imr_status); ++} ++ ++static void psc_disable_ms(struct rt_mpc52xx_uart_ctx *ctx) ++{ ++ struct mpc52xx_psc *psc = ctx->port->psc; ++ ++ /* disable CTS and DCD as IPC interrupts */ ++ out_8(&psc->mpc52xx_psc_acr, 0); ++ ++ ctx->imr_status &= ~MPC52xx_PSC_IMR_IPC; ++ out_be16(&psc->mpc52xx_psc_imr, ctx->imr_status); ++} ++ ++static struct of_device_id mpc5200_gpio_ids[] = { ++ { .compatible = "fsl,mpc5200-gpio", }, ++ { .compatible = "mpc5200-gpio", }, ++ {} ++}; ++ ++static void rt_mpc52xx_uart_init_hw(struct rt_mpc52xx_uart_port *port) ++{ ++ struct mpc52xx_gpio __iomem *gpio; ++ struct device_node *gpio_np; ++ u32 port_config; ++ ++ if (port->num == 6) { ++ gpio_np = of_find_matching_node(NULL, mpc5200_gpio_ids); ++ gpio = of_iomap(gpio_np, 0); ++ of_node_put(gpio_np); ++ if (!gpio) { ++ dev_err(port->dev, "PSC%d port_config: " ++ "couldn't map gpio ids\n", port->num); ++ return; ++ } ++ port_config = in_be32(&gpio->port_config); ++ port_config &= 0xFF0FFFFF; /* port config for PSC6 */ ++ port_config |= 0x00500000; ++ dev_dbg(port->dev, "PSC%d port_config: old:%x new:%x\n", ++ port->num, in_be32(&gpio->port_config), port_config); ++ out_be32(&gpio->port_config, port_config); ++ iounmap(gpio); ++ } ++} ++ ++static inline void rt_mpc52xx_uart_put_char(struct rt_mpc52xx_uart_ctx *ctx, ++ uint64_t *timestamp, ++ unsigned char ch) ++{ ++ ctx->in_buf[ctx->in_tail] = ch; ++ if (ctx->in_history) ++ ctx->in_history[ctx->in_tail] = *timestamp; ++ ctx->in_tail = (ctx->in_tail + 1) & (IN_BUFFER_SIZE - 1); ++ ++ if (++ctx->in_npend > IN_BUFFER_SIZE) { ++ ctx->status |= RTSER_SOFT_OVERRUN_ERR; ++ ctx->in_npend--; ++ } ++} ++ ++static inline int rt_mpc52xx_uart_rx_interrupt(struct rt_mpc52xx_uart_ctx *ctx, ++ uint64_t *timestamp) ++{ ++ int rbytes = 0; ++ int psc_status; ++ ++ psc_status = in_be16(&ctx->port->psc->mpc52xx_psc_status); ++ while (psc_status & MPC52xx_PSC_SR_RXRDY) { ++ /* read input character */ ++ rt_mpc52xx_uart_put_char(ctx, timestamp, psc_read_char(ctx)); ++ rbytes++; ++ ++ /* save new errors */ ++ if (psc_status & (MPC52xx_PSC_SR_OE | MPC52xx_PSC_SR_PE | ++ MPC52xx_PSC_SR_FE | MPC52xx_PSC_SR_RB)) { ++ if (psc_status & MPC52xx_PSC_SR_PE) ++ ctx->status |= RTSER_LSR_PARITY_ERR; ++ if (psc_status & MPC52xx_PSC_SR_FE) ++ ctx->status |= RTSER_LSR_FRAMING_ERR; ++ if (psc_status & MPC52xx_PSC_SR_RB) ++ ctx->status |= RTSER_LSR_BREAK_IND; ++ ++ /* ++ * Overrun is special, since it's reported ++ * immediately, and doesn't affect the current ++ * character. ++ */ ++ if (psc_status & MPC52xx_PSC_SR_OE) { ++ ctx->status |= RTSER_LSR_OVERRUN_ERR; ++ rt_mpc52xx_uart_put_char(ctx, timestamp, 0); ++ rbytes++; ++ } ++ ++ /* Clear error condition */ ++ out_8(&ctx->port->psc->command, ++ MPC52xx_PSC_RST_ERR_STAT); ++ } ++ ++ psc_status = in_be16(&ctx->port->psc->mpc52xx_psc_status); ++ }; ++ ++ return rbytes; ++} ++ ++static inline int rt_mpc52xx_uart_tx_interrupt(struct rt_mpc52xx_uart_ctx *ctx) ++{ ++ while (psc_raw_tx_rdy(ctx) && (ctx->out_npend > 0)) { ++ if (ctx->config.rs485 && ++ (ctx->mcr_status & RTSER_MCR_RTS) == 0) { ++ /* switch RTS */ ++ ctx->mcr_status |= RTSER_MCR_RTS; ++ dev_dbg(ctx->port->dev, "Set RTS, mcr_status=%#x\n", ++ ctx->mcr_status); ++ psc_set_mcr(ctx, ctx->mcr_status); ++ } ++ if (ctx->config.rs485 || ++ ((ctx->config.event_mask & RTSER_EVENT_TXEMPTY) && ++ (ctx->imr_status & MPC52xx_PSC_IMR_TXEMP) == 0)) { ++ /* enable tx-empty interrupt */ ++ ctx->imr_status |= MPC52xx_PSC_IMR_TXEMP; ++ dev_dbg(ctx->port->dev, "Enable TXEMP interrupt, " ++ "imr_status=%#x\n", ctx->imr_status); ++ out_be16(&ctx->port->psc->mpc52xx_psc_imr, ++ ctx->imr_status); ++ } ++ ++ psc_write_char(ctx, ctx->out_buf[ctx->out_head++]); ++ ctx->out_head &= OUT_BUFFER_SIZE - 1; ++ ctx->out_npend--; ++ } ++ ++ return ctx->out_npend; ++} ++ ++static int rt_mpc52xx_uart_interrupt(rtdm_irq_t *irq_context) ++{ ++ struct rt_mpc52xx_uart_ctx *ctx; ++ uint64_t timestamp = rtdm_clock_read(); ++ int rbytes = 0; ++ int events = 0; ++ int ret = RTDM_IRQ_NONE; ++ int goon = 1; ++ int n; ++ ++ ctx = rtdm_irq_get_arg(irq_context, struct rt_mpc52xx_uart_ctx); ++ ++ rtdm_lock_get(&ctx->lock); ++ ++ while (goon) { ++ goon = 0; ++ if (psc_rx_rdy(ctx)) { ++ dev_dbg(ctx->port->dev, "RX interrupt\n"); ++ n = rt_mpc52xx_uart_rx_interrupt(ctx, ×tamp); ++ if (n) { ++ rbytes += n; ++ events |= RTSER_EVENT_RXPEND; ++ } ++ } ++ if (psc_tx_rdy(ctx)) ++ goon |= rt_mpc52xx_uart_tx_interrupt(ctx); ++ ++ if (psc_tx_empty(ctx)) { ++ if (ctx->config.rs485 && ++ (ctx->mcr_status & RTSER_MCR_RTS)) { ++ /* reset RTS */ ++ ctx->mcr_status &= ~RTSER_MCR_RTS; ++ dev_dbg(ctx->port->dev, "Reset RTS, " ++ "mcr_status=%#x\n", ctx->mcr_status); ++ psc_set_mcr(ctx, ctx->mcr_status); ++ } ++ /* disable tx-empty interrupt */ ++ ctx->imr_status &= ~MPC52xx_PSC_IMR_TXEMP; ++ dev_dbg(ctx->port->dev, "Disable TXEMP interrupt, " ++ "imr_status=%#x\n", ctx->imr_status); ++ out_be16(&ctx->port->psc->mpc52xx_psc_imr, ++ ctx->imr_status); ++ ++ events |= RTSER_EVENT_TXEMPTY; ++ ctx->tx_empty = 1; ++ } ++ ++ if (ctx->config.event_mask & ++ (RTSER_EVENT_MODEMHI | RTSER_EVENT_MODEMLO)) { ++ u8 status = in_8(&ctx->port->psc->mpc52xx_psc_ipcr); ++ ++ if (status & MPC52xx_PSC_D_DCD) ++ events |= (status & MPC52xx_PSC_DCD) ? ++ RTSER_EVENT_MODEMLO : ++ RTSER_EVENT_MODEMHI; ++ if (status & MPC52xx_PSC_D_CTS) ++ events |= (status & MPC52xx_PSC_CTS) ? ++ RTSER_EVENT_MODEMLO : ++ RTSER_EVENT_MODEMHI; ++ dev_dbg(ctx->port->dev, "Modem line changed, " ++ "events=%#x\n", events); ++ } ++ ++ ret = RTDM_IRQ_HANDLED; ++ } ++ ++ if (ctx->in_nwait > 0) { ++ if ((ctx->in_nwait <= rbytes) || ctx->status) { ++ ctx->in_nwait = 0; ++ rtdm_event_signal(&ctx->in_event); ++ } else ++ ctx->in_nwait -= rbytes; ++ } ++ ++ if (ctx->status) ++ events |= RTSER_EVENT_ERRPEND; ++ ++ if (events & ctx->config.event_mask) { ++ int old_events = ctx->ioc_events; ++ ++ ctx->last_timestamp = timestamp; ++ ctx->ioc_events = events; ++ ++ if (!old_events) ++ rtdm_event_signal(&ctx->ioc_event); ++ } ++ ++ if ((ctx->imr_status & MPC52xx_PSC_IMR_TXRDY) && ++ (ctx->out_npend == 0)) { ++ psc_stop_tx(ctx); ++ rtdm_event_signal(&ctx->out_event); ++ } ++ ++ rtdm_lock_put(&ctx->lock); ++ ++ return ret; ++} ++ ++ ++static int rt_mpc52xx_uart_set_config(struct rt_mpc52xx_uart_ctx *ctx, ++ const struct rtser_config *config, ++ uint64_t **in_history_ptr) ++{ ++ rtdm_lockctx_t lock_ctx; ++ int err = 0; ++ ++ /* make line configuration atomic and IRQ-safe */ ++ rtdm_lock_get_irqsave(&ctx->lock, lock_ctx); ++ ++ if (config->config_mask & RTSER_SET_BAUD) ++ ctx->config.baud_rate = config->baud_rate; ++ if (config->config_mask & RTSER_SET_PARITY) ++ ctx->config.parity = config->parity & PARITY_MASK; ++ if (config->config_mask & RTSER_SET_DATA_BITS) ++ ctx->config.data_bits = config->data_bits & DATA_BITS_MASK; ++ if (config->config_mask & RTSER_SET_STOP_BITS) ++ ctx->config.stop_bits = config->stop_bits & STOP_BITS_MASK; ++ if (config->config_mask & RTSER_SET_HANDSHAKE) ++ ctx->config.handshake = config->handshake; ++ ++ if (config->config_mask & (RTSER_SET_PARITY | ++ RTSER_SET_DATA_BITS | RTSER_SET_STOP_BITS | ++ RTSER_SET_BAUD | RTSER_SET_HANDSHAKE)) { ++ struct mpc52xx_psc *psc = ctx->port->psc; ++ unsigned char mr1 = 0, mr2 = 0; ++ unsigned int divisor; ++ u16 prescaler; ++ ++ switch (ctx->config.data_bits) { ++ case RTSER_5_BITS: ++ mr1 |= MPC52xx_PSC_MODE_5_BITS; ++ break; ++ case RTSER_6_BITS: ++ mr1 |= MPC52xx_PSC_MODE_6_BITS; ++ break; ++ case RTSER_7_BITS: ++ mr1 |= MPC52xx_PSC_MODE_7_BITS; ++ break; ++ case RTSER_8_BITS: ++ default: ++ mr1 |= MPC52xx_PSC_MODE_8_BITS; ++ break; ++ } ++ ++ switch (ctx->config.parity) { ++ case RTSER_ODD_PARITY: ++ mr1 |= MPC52xx_PSC_MODE_PARODD; ++ break; ++ case RTSER_EVEN_PARITY: ++ mr1 |= MPC52xx_PSC_MODE_PAREVEN; ++ break; ++ case RTSER_NO_PARITY: ++ default: ++ mr1 |= MPC52xx_PSC_MODE_PARNONE; ++ break; ++ } ++ ++ if (ctx->config.stop_bits == RTSER_2_STOPB) ++ mr2 |= (ctx->config.data_bits == RTSER_5_BITS) ? ++ MPC52xx_PSC_MODE_ONE_STOP_5_BITS : ++ MPC52xx_PSC_MODE_TWO_STOP; ++ else ++ mr2 |= MPC52xx_PSC_MODE_ONE_STOP; ++ ++ if (ctx->config.handshake == RTSER_RTSCTS_HAND) { ++ mr1 |= MPC52xx_PSC_MODE_RXRTS; ++ mr2 |= MPC52xx_PSC_MODE_TXCTS; ++ } else if (config->config_mask & RTSER_SET_HANDSHAKE) { ++ ctx->mcr_status = ++ RTSER_MCR_DTR | RTSER_MCR_RTS | RTSER_MCR_OUT2; ++ psc_set_mcr(ctx, ctx->mcr_status); ++ } ++ ++ /* Reset the TX & RX */ ++ out_8(&psc->command, MPC52xx_PSC_RST_RX); ++ out_8(&psc->command, MPC52xx_PSC_RST_TX); ++ ++ /* Send new mode settings */ ++ out_8(&psc->command, MPC52xx_PSC_SEL_MODE_REG_1); ++ out_8(&psc->mode, mr1); ++ out_8(&psc->mode, mr2); ++ ++ /* Set baudrate */ ++ divisor = (ctx->port->uartclk + 16 * ctx->config.baud_rate) / ++ (32 * ctx->config.baud_rate); ++ prescaler = 0xdd00; ++ out_be16(&psc->mpc52xx_psc_clock_select, prescaler); ++ out_8(&psc->ctur, divisor >> 8); ++ out_8(&psc->ctlr, divisor & 0xff); ++ ++ dev_info(ctx->port->dev, ++ "mr1=%#x mr2=%#x baud=%d divisor=%d prescaler=%x\n", ++ mr1, mr2, ctx->config.baud_rate, divisor, prescaler); ++ ++ /* Reenable TX & RX */ ++ out_8(&psc->command, MPC52xx_PSC_TX_ENABLE); ++ out_8(&psc->command, MPC52xx_PSC_RX_ENABLE); ++ ++ /* Enable RX */ ++ ctx->imr_status |= MPC52xx_PSC_IMR_RXRDY; ++ out_be16(&ctx->port->psc->mpc52xx_psc_imr, ctx->imr_status); ++ ++ ctx->status = 0; ++ ctx->ioc_events &= ~RTSER_EVENT_ERRPEND; ++ ++ } ++ ++ if (config->config_mask & RTSER_SET_RS485) { ++ ctx->config.rs485 = config->rs485; ++ if (config->rs485) { ++ /* reset RTS */ ++ ctx->mcr_status &= ~RTSER_MCR_RTS; ++ dev_dbg(ctx->port->dev, "Reset RTS, mcr_status=%#x\n", ++ ctx->mcr_status); ++ psc_set_mcr(ctx, ctx->mcr_status); ++ } ++ } ++ ++ rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx); ++ ++ /* Timeout manipulation is not atomic. The user is supposed to take ++ care not to use and change timeouts at the same time. */ ++ if (config->config_mask & RTSER_SET_TIMEOUT_RX) ++ ctx->config.rx_timeout = config->rx_timeout; ++ if (config->config_mask & RTSER_SET_TIMEOUT_TX) ++ ctx->config.tx_timeout = config->tx_timeout; ++ if (config->config_mask & RTSER_SET_TIMEOUT_EVENT) ++ ctx->config.event_timeout = config->event_timeout; ++ ++ if (config->config_mask & RTSER_SET_TIMESTAMP_HISTORY) { ++ /* change timestamp history atomically */ ++ rtdm_lock_get_irqsave(&ctx->lock, lock_ctx); ++ ++ if (config->timestamp_history & RTSER_RX_TIMESTAMP_HISTORY) { ++ if (!ctx->in_history) { ++ ctx->in_history = *in_history_ptr; ++ *in_history_ptr = NULL; ++ if (!ctx->in_history) ++ err = -ENOMEM; ++ } ++ } else { ++ *in_history_ptr = ctx->in_history; ++ ctx->in_history = NULL; ++ } ++ ++ rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx); ++ } ++ ++ if (config->config_mask & RTSER_SET_EVENT_MASK) { ++ /* change event mask atomically */ ++ rtdm_lock_get_irqsave(&ctx->lock, lock_ctx); ++ ++ ctx->config.event_mask = config->event_mask & EVENT_MASK; ++ ctx->ioc_events = 0; ++ ++ if ((config->event_mask & RTSER_EVENT_RXPEND) && ++ (ctx->in_npend > 0)) ++ ctx->ioc_events |= RTSER_EVENT_RXPEND; ++ ++ if ((config->event_mask & RTSER_EVENT_ERRPEND) && ++ ctx->status) ++ ctx->ioc_events |= RTSER_EVENT_ERRPEND; ++ ++ if ((config->event_mask & RTSER_EVENT_TXEMPTY) && ++ !ctx->out_npend && ctx->tx_empty) ++ ctx->ioc_events |= RTSER_EVENT_TXEMPTY; ++ ++ if (config->event_mask & ++ (RTSER_EVENT_MODEMHI | RTSER_EVENT_MODEMLO)) ++ psc_enable_ms(ctx); ++ else ++ psc_disable_ms(ctx); ++ ++ rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx); ++ } ++ ++ return err; ++} ++ ++void rt_mpc52xx_uart_cleanup_ctx(struct rt_mpc52xx_uart_ctx *ctx) ++{ ++ rtdm_event_destroy(&ctx->in_event); ++ rtdm_event_destroy(&ctx->out_event); ++ rtdm_event_destroy(&ctx->ioc_event); ++ rtdm_mutex_destroy(&ctx->out_lock); ++} ++ ++static int rt_mpc52xx_uart_open(struct rtdm_fd *fd, int oflags) ++{ ++ struct rt_mpc52xx_uart_ctx *ctx; ++ rtdm_lockctx_t lock_ctx; ++ uint64_t *dummy; ++ int err; ++ ++ ctx = rtdm_fd_to_private(fd); ++ ctx->port = (struct rt_mpc52xx_uart_port *)rtdm_fd_device(fd)->device_data; ++ ++ /* IPC initialisation - cannot fail with used parameters */ ++ rtdm_lock_init(&ctx->lock); ++ rtdm_event_init(&ctx->in_event, 0); ++ rtdm_event_init(&ctx->out_event, 0); ++ rtdm_event_init(&ctx->ioc_event, 0); ++ rtdm_mutex_init(&ctx->out_lock); ++ ++ ctx->in_head = 0; ++ ctx->in_tail = 0; ++ ctx->in_npend = 0; ++ ctx->in_nwait = 0; ++ ctx->in_lock = 0; ++ ctx->in_history = NULL; ++ ++ ctx->out_head = 0; ++ ctx->out_tail = 0; ++ ctx->out_npend = 0; ++ ++ ctx->ioc_events = 0; ++ ctx->ioc_event_lock = 0; ++ ctx->status = 0; ++ ctx->saved_errors = 0; ++ ++ rtdm_lock_get_irqsave(&ctx->lock, lock_ctx); ++ ++ psc_disable_ints(ctx); ++ ++ /* Reset/activate the port, clear and enable interrupts */ ++ out_8(&ctx->port->psc->command, MPC52xx_PSC_RST_RX); ++ out_8(&ctx->port->psc->command, MPC52xx_PSC_RST_TX); ++ ++ out_be32(&ctx->port->psc->sicr, 0); /* UART mode DCD ignored */ ++ ++ psc_fifo_init(ctx); ++ ++ out_8(&ctx->port->psc->command, MPC52xx_PSC_TX_ENABLE); ++ out_8(&ctx->port->psc->command, MPC52xx_PSC_RX_ENABLE); ++ ++ rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx); ++ ++ rt_mpc52xx_uart_set_config(ctx, &default_config, &dummy); ++ ++ err = rtdm_irq_request(&ctx->irq_handle, ctx->port->irq, ++ rt_mpc52xx_uart_interrupt, 0, ++ rtdm_fd_device(fd)->name, ctx); ++ if (err) { ++ psc_set_mcr(ctx, 0); ++ rt_mpc52xx_uart_cleanup_ctx(ctx); ++ ++ return err; ++ } ++ ++ return 0; ++} ++ ++static void rt_mpc52xx_uart_close(struct rtdm_fd *fd) ++{ ++ struct rt_mpc52xx_uart_ctx *ctx; ++ uint64_t *in_history; ++ rtdm_lockctx_t lock_ctx; ++ ++ ctx = rtdm_fd_to_private(fd); ++ ++ rtdm_lock_get_irqsave(&ctx->lock, lock_ctx); ++ ++ /* reset DTR and RTS */ ++ psc_set_mcr(ctx, 0); ++ ++ psc_disable_ints(ctx); ++ ++ in_history = ctx->in_history; ++ ctx->in_history = NULL; ++ ++ rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx); ++ ++ rtdm_irq_free(&ctx->irq_handle); ++ ++ rt_mpc52xx_uart_cleanup_ctx(ctx); ++ ++ kfree(in_history); ++} ++ ++static int rt_mpc52xx_uart_ioctl(struct rtdm_fd *fd, ++ unsigned int request, void *arg) ++{ ++ rtdm_lockctx_t lock_ctx; ++ struct rt_mpc52xx_uart_ctx *ctx; ++ int err = 0; ++ ++ ctx = rtdm_fd_to_private(fd); ++ ++ switch (request) { ++ case RTSER_RTIOC_GET_CONFIG: ++ if (rtdm_fd_is_user(fd)) ++ err = rtdm_safe_copy_to_user(fd, arg, ++ &ctx->config, ++ sizeof(struct ++ rtser_config)); ++ else ++ memcpy(arg, &ctx->config, sizeof(struct rtser_config)); ++ break; ++ ++ case RTSER_RTIOC_SET_CONFIG: { ++ struct rtser_config *config; ++ struct rtser_config config_buf; ++ uint64_t *hist_buf = NULL; ++ ++ config = (struct rtser_config *)arg; ++ ++ if (rtdm_fd_is_user(fd)) { ++ err = rtdm_safe_copy_from_user(fd, &config_buf, ++ arg, ++ sizeof(struct ++ rtser_config)); ++ if (err) ++ return err; ++ ++ config = &config_buf; ++ } ++ ++ if ((config->config_mask & RTSER_SET_BAUD) && ++ (config->baud_rate <= 0)) ++ /* invalid baudrate for this port */ ++ return -EINVAL; ++ ++ if (config->config_mask & RTSER_SET_TIMESTAMP_HISTORY) { ++ /* ++ * Reflect the call to non-RT as we will likely ++ * allocate or free the buffer. ++ */ ++ if (rtdm_in_rt_context()) ++ return -ENOSYS; ++ ++ if (config->timestamp_history & RTSER_RX_TIMESTAMP_HISTORY) ++ hist_buf = kmalloc(IN_BUFFER_SIZE * ++ sizeof(nanosecs_abs_t), ++ GFP_KERNEL); ++ } ++ ++ rt_mpc52xx_uart_set_config(ctx, config, &hist_buf); ++ ++ if (hist_buf) ++ kfree(hist_buf); ++ ++ break; ++ } ++ ++ case RTSER_RTIOC_GET_STATUS: { ++ int status; ++ ++ rtdm_lock_get_irqsave(&ctx->lock, lock_ctx); ++ ++ status = ctx->saved_errors | ctx->status; ++ ctx->status = 0; ++ ctx->saved_errors = 0; ++ ctx->ioc_events &= ~RTSER_EVENT_ERRPEND; ++ ++ rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx); ++ ++ if (rtdm_fd_is_user(fd)) { ++ struct rtser_status status_buf; ++ ++ status_buf.line_status = status; ++ status_buf.modem_status = psc_get_msr(ctx); ++ ++ err = rtdm_safe_copy_to_user(fd, arg, ++ &status_buf, ++ sizeof(struct ++ rtser_status)); ++ } else { ++ ((struct rtser_status *)arg)->line_status = status; ++ ((struct rtser_status *)arg)->modem_status = ++ psc_get_msr(ctx); ++ } ++ break; ++ } ++ ++ case RTSER_RTIOC_GET_CONTROL: ++ if (rtdm_fd_is_user(fd)) ++ err = rtdm_safe_copy_to_user(fd, arg, ++ &ctx->mcr_status, ++ sizeof(int)); ++ else ++ *(int *)arg = ctx->mcr_status; ++ ++ break; ++ ++ case RTSER_RTIOC_SET_CONTROL: { ++ int new_mcr = (long)arg; ++ ++ if ((new_mcr & RTSER_MCR_RTS) != RTSER_MCR_RTS) ++ dev_warn(ctx->port->dev, ++ "MCR: Only RTS is supported\n"); ++ rtdm_lock_get_irqsave(&ctx->lock, lock_ctx); ++ ctx->mcr_status = new_mcr & RTSER_MCR_RTS; ++ psc_set_mcr(ctx, ctx->mcr_status); ++ rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx); ++ break; ++ } ++ ++ case RTSER_RTIOC_WAIT_EVENT: { ++ struct rtser_event ev = { .rxpend_timestamp = 0 }; ++ rtdm_toseq_t timeout_seq; ++ ++ if (!rtdm_in_rt_context()) ++ return -ENOSYS; ++ ++ /* Only one waiter allowed, stop any further attempts here. */ ++ if (test_and_set_bit(0, &ctx->ioc_event_lock)) ++ return -EBUSY; ++ ++ rtdm_toseq_init(&timeout_seq, ctx->config.event_timeout); ++ ++ rtdm_lock_get_irqsave(&ctx->lock, lock_ctx); ++ ++ while (!ctx->ioc_events) { ++ rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx); ++ ++ err = rtdm_event_timedwait(&ctx->ioc_event, ++ ctx->config.event_timeout, ++ &timeout_seq); ++ if (err) { ++ /* Device has been closed? */ ++ if (err == -EIDRM) ++ err = -EBADF; ++ goto wait_unlock_out; ++ } ++ ++ rtdm_lock_get_irqsave(&ctx->lock, lock_ctx); ++ } ++ ++ ev.events = ctx->ioc_events; ++ ctx->ioc_events &= ~(RTSER_EVENT_MODEMHI | RTSER_EVENT_MODEMLO); ++ ++ ev.last_timestamp = ctx->last_timestamp; ++ ev.rx_pending = ctx->in_npend; ++ ++ if (ctx->in_history) ++ ev.rxpend_timestamp = ctx->in_history[ctx->in_head]; ++ ++ rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx); ++ ++ if (rtdm_fd_is_user(fd)) ++ err = ++ rtdm_safe_copy_to_user(fd, arg, &ev, ++ sizeof(struct ++ rtser_event)); ++ else ++ memcpy(arg, &ev, sizeof(struct rtser_event)); ++ ++ wait_unlock_out: ++ /* release the simple event waiter lock */ ++ clear_bit(0, &ctx->ioc_event_lock); ++ break; ++ } ++ ++ case RTSER_RTIOC_BREAK_CTL: { ++ rtdm_lock_get_irqsave(&ctx->lock, lock_ctx); ++ if ((long)arg & RTSER_BREAK_SET) ++ out_8(&ctx->port->psc->command, ++ MPC52xx_PSC_START_BRK); ++ else ++ out_8(&ctx->port->psc->command, ++ MPC52xx_PSC_STOP_BRK); ++ rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx); ++ break; ++ } ++ ++#ifdef ISREADY ++ case RTIOC_PURGE: { ++ int fcr = 0; ++ ++ rtdm_lock_get_irqsave(&ctx->lock, lock_ctx); ++ if ((long)arg & RTDM_PURGE_RX_BUFFER) { ++ ctx->in_head = 0; ++ ctx->in_tail = 0; ++ ctx->in_npend = 0; ++ ctx->status = 0; ++ fcr |= FCR_FIFO | FCR_RESET_RX; ++ rt_mpc52xx_uart_reg_in(mode, base, RHR); ++ } ++ if ((long)arg & RTDM_PURGE_TX_BUFFER) { ++ ctx->out_head = 0; ++ ctx->out_tail = 0; ++ ctx->out_npend = 0; ++ fcr |= FCR_FIFO | FCR_RESET_TX; ++ } ++ if (fcr) { ++ rt_mpc52xx_uart_reg_out(mode, base, FCR, fcr); ++ rt_mpc52xx_uart_reg_out(mode, base, FCR, ++ FCR_FIFO | ctx->config.fifo_depth); ++ } ++ rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx); ++ break; ++ } ++#endif ++ ++ default: ++ err = -ENOTTY; ++ } ++ ++ return err; ++} ++ ++static ssize_t rt_mpc52xx_uart_read(struct rtdm_fd *fd, void *buf, ++ size_t nbyte) ++{ ++ struct rt_mpc52xx_uart_ctx *ctx; ++ rtdm_lockctx_t lock_ctx; ++ size_t read = 0; ++ int pending; ++ int block; ++ int subblock; ++ int in_pos; ++ char *out_pos = (char *)buf; ++ rtdm_toseq_t timeout_seq; ++ ssize_t ret = -EAGAIN; /* for non-blocking read */ ++ int nonblocking; ++ ++ if (nbyte == 0) ++ return 0; ++ ++ if (rtdm_fd_is_user(fd) && !rtdm_rw_user_ok(fd, buf, nbyte)) ++ return -EFAULT; ++ ++ ctx = rtdm_fd_to_private(fd); ++ ++ rtdm_toseq_init(&timeout_seq, ctx->config.rx_timeout); ++ ++ /* non-blocking is handled separately here */ ++ nonblocking = (ctx->config.rx_timeout < 0); ++ ++ /* only one reader allowed, stop any further attempts here */ ++ if (test_and_set_bit(0, &ctx->in_lock)) ++ return -EBUSY; ++ ++ rtdm_lock_get_irqsave(&ctx->lock, lock_ctx); ++ ++ while (1) { ++ if (ctx->status) { ++ if (ctx->status & RTSER_LSR_BREAK_IND) ++ ret = -EPIPE; ++ else ++ ret = -EIO; ++ ctx->saved_errors = ctx->status & ++ (RTSER_LSR_OVERRUN_ERR | RTSER_LSR_PARITY_ERR | ++ RTSER_LSR_FRAMING_ERR | RTSER_SOFT_OVERRUN_ERR); ++ ctx->status = 0; ++ break; ++ } ++ ++ pending = ctx->in_npend; ++ ++ if (pending > 0) { ++ block = subblock = (pending <= nbyte) ? pending : nbyte; ++ in_pos = ctx->in_head; ++ ++ rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx); ++ ++ /* Do we have to wrap around the buffer end? */ ++ if (in_pos + subblock > IN_BUFFER_SIZE) { ++ /* Treat the block between head and buffer end ++ separately. */ ++ subblock = IN_BUFFER_SIZE - in_pos; ++ ++ if (rtdm_fd_is_user(fd)) { ++ if (rtdm_copy_to_user ++ (fd, out_pos, ++ &ctx->in_buf[in_pos], ++ subblock) != 0) { ++ ret = -EFAULT; ++ goto break_unlocked; ++ } ++ } else ++ memcpy(out_pos, &ctx->in_buf[in_pos], ++ subblock); ++ ++ read += subblock; ++ out_pos += subblock; ++ ++ subblock = block - subblock; ++ in_pos = 0; ++ } ++ ++ if (rtdm_fd_is_user(fd)) { ++ if (rtdm_copy_to_user(fd, out_pos, ++ &ctx->in_buf[in_pos], ++ subblock) != 0) { ++ ret = -EFAULT; ++ goto break_unlocked; ++ } ++ } else ++ memcpy(out_pos, &ctx->in_buf[in_pos], subblock); ++ ++ read += subblock; ++ out_pos += subblock; ++ nbyte -= block; ++ ++ rtdm_lock_get_irqsave(&ctx->lock, lock_ctx); ++ ++ ctx->in_head = ++ (ctx->in_head + block) & (IN_BUFFER_SIZE - 1); ++ if ((ctx->in_npend -= block) == 0) ++ ctx->ioc_events &= ~RTSER_EVENT_RXPEND; ++ ++ if (nbyte == 0) ++ break; /* All requested bytes read. */ ++ ++ continue; ++ } ++ ++ if (nonblocking) ++ /* ret was set to EAGAIN in case of a real ++ non-blocking call or contains the error ++ returned by rtdm_event_wait[_until] */ ++ break; ++ ++ ctx->in_nwait = nbyte; ++ ++ rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx); ++ ++ ret = rtdm_event_timedwait(&ctx->in_event, ++ ctx->config.rx_timeout, ++ &timeout_seq); ++ if (ret < 0) { ++ if (ret == -EIDRM) { ++ /* Device has been closed - ++ return immediately. */ ++ return -EBADF; ++ } ++ ++ rtdm_lock_get_irqsave(&ctx->lock, lock_ctx); ++ ++ nonblocking = 1; ++ if (ctx->in_npend > 0) { ++ /* Final turn: collect pending bytes ++ before exit. */ ++ continue; ++ } ++ ++ ctx->in_nwait = 0; ++ break; ++ } ++ ++ rtdm_lock_get_irqsave(&ctx->lock, lock_ctx); ++ } ++ ++ rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx); ++ ++break_unlocked: ++ /* Release the simple reader lock, */ ++ clear_bit(0, &ctx->in_lock); ++ ++ if ((read > 0) && ((ret == 0) || (ret == -EAGAIN) || ++ (ret == -ETIMEDOUT) || (ret == -EINTR))) ++ ret = read; ++ ++ return ret; ++} ++ ++static ssize_t rt_mpc52xx_uart_write(struct rtdm_fd *fd, ++ const void *buf, ++ size_t nbyte) ++{ ++ struct rt_mpc52xx_uart_ctx *ctx; ++ rtdm_lockctx_t lock_ctx; ++ size_t written = 0; ++ int free; ++ int block; ++ int subblock; ++ int out_pos; ++ char *in_pos = (char *)buf; ++ rtdm_toseq_t timeout_seq; ++ ssize_t ret; ++ ++ if (nbyte == 0) ++ return 0; ++ ++ if (rtdm_fd_is_user(fd) && !rtdm_read_user_ok(fd, buf, nbyte)) ++ return -EFAULT; ++ ++ ctx = rtdm_fd_to_private(fd); ++ ++ rtdm_toseq_init(&timeout_seq, ctx->config.rx_timeout); ++ ++ /* Make write operation atomic. */ ++ ret = rtdm_mutex_timedlock(&ctx->out_lock, ctx->config.rx_timeout, ++ &timeout_seq); ++ if (ret) ++ return ret; ++ ++ while (nbyte > 0) { ++ rtdm_lock_get_irqsave(&ctx->lock, lock_ctx); ++ ++ free = OUT_BUFFER_SIZE - ctx->out_npend; ++ ++ if (free > 0) { ++ block = subblock = (nbyte <= free) ? nbyte : free; ++ out_pos = ctx->out_tail; ++ ++ rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx); ++ ++ /* Do we have to wrap around the buffer end? */ ++ if (out_pos + subblock > OUT_BUFFER_SIZE) { ++ /* Treat the block between head and buffer ++ end separately. */ ++ subblock = OUT_BUFFER_SIZE - out_pos; ++ ++ if (rtdm_fd_is_user(fd)) { ++ if (rtdm_copy_from_user ++ (fd, ++ &ctx->out_buf[out_pos], ++ in_pos, subblock) != 0) { ++ ret = -EFAULT; ++ break; ++ } ++ } else ++ memcpy(&ctx->out_buf[out_pos], in_pos, ++ subblock); ++ ++ written += subblock; ++ in_pos += subblock; ++ ++ subblock = block - subblock; ++ out_pos = 0; ++ } ++ ++ if (rtdm_fd_is_user(fd)) { ++ if (rtdm_copy_from_user ++ (fd, &ctx->out_buf[out_pos], ++ in_pos, subblock) != 0) { ++ ret = -EFAULT; ++ break; ++ } ++ } else ++ memcpy(&ctx->out_buf[out_pos], in_pos, block); ++ ++ written += subblock; ++ in_pos += subblock; ++ nbyte -= block; ++ ++ rtdm_lock_get_irqsave(&ctx->lock, lock_ctx); ++ ++ ctx->out_tail = ++ (ctx->out_tail + block) & (OUT_BUFFER_SIZE - 1); ++ ctx->out_npend += block; ++ ++ /* Mark shift register not empty */ ++ ctx->ioc_events &= ~RTSER_EVENT_TXEMPTY; ++ ctx->tx_empty = 0; ++ ++ psc_start_tx(ctx); ++ ++ rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx); ++ continue; ++ } ++ ++ rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx); ++ ++ ret = rtdm_event_timedwait(&ctx->out_event, ++ ctx->config.tx_timeout, ++ &timeout_seq); ++ if (ret < 0) { ++ if (ret == -EIDRM) { ++ /* Device has been closed - ++ return immediately. */ ++ return -EBADF; ++ } ++ if (ret == -EWOULDBLOCK) { ++ /* Fix error code for non-blocking mode. */ ++ ret = -EAGAIN; ++ } ++ break; ++ } ++ } ++ ++ rtdm_mutex_unlock(&ctx->out_lock); ++ ++ if ((written > 0) && ((ret == 0) || (ret == -EAGAIN) || ++ (ret == -ETIMEDOUT) || (ret == -EINTR))) ++ ret = written; ++ ++ return ret; ++} ++ ++static struct rtdm_driver mpc52xx_uart_driver = { ++ .profile_info = RTDM_PROFILE_INFO(imx_uart, ++ RTDM_CLASS_SERIAL, ++ RTDM_SUBCLASS_16550A, ++ RTSER_PROFILE_VER), ++ .device_count = MPC52xx_PSC_MAXNUM, ++ .device_flags = RTDM_NAMED_DEVICE | RTDM_EXCLUSIVE, ++ .context_size = sizeof(struct rt_mpc52xx_uart_ctx), ++ .ops = { ++ .open = rt_mpc52xx_uart_open, ++ .close = rt_mpc52xx_uart_close, ++ .ioctl_rt = rt_mpc52xx_uart_ioctl, ++ .ioctl_nrt = rt_mpc52xx_uart_ioctl, ++ .read_rt = rt_mpc52xx_uart_read, ++ .write_rt = rt_mpc52xx_uart_write, ++ }, ++}; ++ ++static int rt_mpc52xx_uart_of_probe(struct platform_device *op) ++{ ++ struct rt_mpc52xx_uart_port *port; ++ struct rtdm_device *dev; ++ struct resource res; ++ int ret, idx; ++ ++ dev_dbg(&op->dev, "mpc52xx_uart_probe(op=%p)\n", op); ++ ++ /* Check validity & presence */ ++ for (idx = 0; idx < MPC52xx_PSC_MAXNUM; idx++) ++ if (rt_mpc52xx_uart_nodes[idx] == op->dev.of_node) ++ break; ++ if (idx >= MPC52xx_PSC_MAXNUM) ++ return -EINVAL; ++ ++ port = kmalloc(sizeof(*port), GFP_KERNEL); ++ if (!port) { ++ dev_err(&op->dev, "Could allocate port space\n"); ++ return -ENOMEM; ++ } ++ port->dev = &op->dev; ++ ++ /* ++ * Set the uart clock to the input clock of the psc, the different ++ * prescalers are taken into account in the set_baudrate() methods ++ * of the respective chip ++ */ ++ port->uartclk = mpc5xxx_get_bus_frequency(op->dev.of_node); ++ if (port->uartclk == 0) { ++ dev_err(&op->dev, "Could not find uart clock frequency\n"); ++ ret = -EINVAL; ++ goto out_kfree_port; ++ } ++ ++ /* Fetch register locations */ ++ ret = of_address_to_resource(op->dev.of_node, 0, &res); ++ if (ret) { ++ dev_err(&op->dev, "Could not get resources\n"); ++ goto out_kfree_port; ++ } ++ port->num = ((res.start >> 8) & 0xf) / 2; ++ if (port->num < 6) ++ port->num++; ++ ++ if (!request_mem_region(res.start, resource_size(&res), ++ RT_MPC52XX_UART_DRVNAM)) { ++ ret = -EBUSY; ++ goto out_kfree_port; ++ } ++ ++ port->psc = ioremap(res.start, resource_size(&res)); ++ if (!port->psc) { ++ dev_err(&op->dev, "Could not map PSC registers\n"); ++ ret = -ENOMEM; ++ goto out_release_mem_region; ++ } ++ port->fifo = (struct mpc52xx_psc_fifo __iomem *)(port->psc + 1); ++ ++ port->irq = irq_of_parse_and_map(op->dev.of_node, 0); ++ if (port->irq <= 0) { ++ dev_err(&op->dev, "Could not get irq\n"); ++ ret = -ENODEV; ++ goto out_iounmap; ++ } ++ ++ dev = kmalloc(sizeof(struct rtdm_device), GFP_KERNEL); ++ if (!dev) { ++ dev_err(&op->dev, "Could allocate device context\n"); ++ ret = -ENOMEM; ++ goto out_dispose_irq_mapping; ++ } ++ ++ dev->driver = &mpc52xx_uart_driver; ++ dev->label = "rtserPSC%d"; ++ dev->device_data = port; ++ ++ rt_mpc52xx_uart_init_hw(port); ++ ++ ret = rtdm_dev_register(dev); ++ if (ret) ++ goto out_kfree_dev; ++ ++ dev_set_drvdata(&op->dev, dev); ++ ++ dev_info(&op->dev, "%s on PSC%d at 0x%p, irq=%d, clk=%i\n", ++ dev->name, port->num, port->psc, port->irq, ++ port->uartclk); ++ ++ return 0; ++ ++out_kfree_dev: ++ kfree(dev); ++out_dispose_irq_mapping: ++ irq_dispose_mapping(port->irq); ++out_iounmap: ++ iounmap(port->psc); ++out_release_mem_region: ++ release_mem_region(res.start, resource_size(&res)); ++out_kfree_port: ++ kfree(port); ++ ++ return ret; ++} ++ ++static int rt_mpc52xx_uart_of_remove(struct platform_device *op) ++{ ++ struct rtdm_device *dev = dev_get_drvdata(&op->dev); ++ struct rt_mpc52xx_uart_port *port = dev->device_data; ++ struct resource res; ++ ++ dev_set_drvdata(&op->dev, NULL); ++ ++ rtdm_dev_unregister(dev); ++ irq_dispose_mapping(port->irq); ++ iounmap(port->psc); ++ if (!of_address_to_resource(op->dev.of_node, 0, &res)) ++ release_mem_region(res.start, resource_size(&res)); ++ kfree(port); ++ kfree(dev); ++ ++ return 0; ++} ++ ++static struct of_device_id rt_mpc52xx_uart_of_match[] = { ++ { .compatible = "fsl,mpc5200b-psc-uart", }, ++ { .compatible = "fsl,mpc5200-psc-uart", }, ++ {}, ++}; ++MODULE_DEVICE_TABLE(of, rt_mpc52xx_uart_of_match); ++ ++static struct platform_driver rt_mpc52xx_uart_of_driver = { ++ .probe = rt_mpc52xx_uart_of_probe, ++ .remove = rt_mpc52xx_uart_of_remove, ++ .driver = { ++ .name = "rt-mpc52xx-psc-uart", ++ .owner = THIS_MODULE, ++ .of_match_table = rt_mpc52xx_uart_of_match, ++ }, ++}; ++ ++static void rt_mpc52xx_uart_of_enumerate(void) ++{ ++ struct device_node *np; ++ int idx = 0; ++ ++ /* Assign index to each PSC in device tree line the linux driver does */ ++ for_each_matching_node(np, rt_mpc52xx_uart_of_match) { ++ of_node_get(np); ++ rt_mpc52xx_uart_nodes[idx] = np; ++ idx++; ++ } ++} ++ ++static int __init rt_mpc52xx_uart_init(void) ++{ ++ int ret; ++ ++ if (!rtdm_available()) ++ return -ENODEV; ++ ++ printk(KERN_INFO "RTserial: MPC52xx PSC UART driver\n"); ++ ++ rt_mpc52xx_uart_of_enumerate(); ++ ++ ret = platform_driver_register(&rt_mpc52xx_uart_of_driver); ++ if (ret) { ++ printk(KERN_ERR ++ "%s; Could not register driver (err=%d)\n", ++ __func__, ret); ++ return ret; ++ } ++ ++ return 0; ++} ++ ++static void __exit rt_mpc52xx_uart_exit(void) ++{ ++ platform_driver_unregister(&rt_mpc52xx_uart_of_driver); ++} ++ ++module_init(rt_mpc52xx_uart_init); ++module_exit(rt_mpc52xx_uart_exit); +--- linux/drivers/xenomai/serial/rt_imx_uart.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/serial/rt_imx_uart.c 2021-04-07 16:01:26.490635219 +0800 +@@ -0,0 +1,1677 @@ ++/* ++ * Copyright 2012 Wolfgang Grandegger ++ * ++ * Derived from the Linux IMX UART driver (drivers/tty/serial/imx.c) ++ * and 16650A RTserial driver. ++ * ++ * Copyright (C) 2005-2007 Jan Kiszka . ++ * Copyright (C) 2004 Pengutronix ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++ ++MODULE_AUTHOR("Wolfgang Grandegger "); ++MODULE_DESCRIPTION("RTDM-based driver for IMX UARTs"); ++MODULE_VERSION("1.0.0"); ++MODULE_LICENSE("GPL"); ++ ++#define DRIVER_NAME "xeno_imx_uart" ++ ++/* Register definitions */ ++#define URXD0 0x0 /* Receiver Register */ ++#define URTX0 0x40 /* Transmitter Register */ ++#define UCR1 0x80 /* Control Register 1 */ ++#define UCR2 0x84 /* Control Register 2 */ ++#define UCR3 0x88 /* Control Register 3 */ ++#define UCR4 0x8c /* Control Register 4 */ ++#define UFCR 0x90 /* FIFO Control Register */ ++#define USR1 0x94 /* Status Register 1 */ ++#define USR2 0x98 /* Status Register 2 */ ++#define UESC 0x9c /* Escape Character Register */ ++#define UTIM 0xa0 /* Escape Timer Register */ ++#define UBIR 0xa4 /* BRM Incremental Register */ ++#define UBMR 0xa8 /* BRM Modulator Register */ ++#define UBRC 0xac /* Baud Rate Count Register */ ++#define MX2_ONEMS 0xb0 /* One Millisecond register */ ++#define IMX1_UTS 0xd0 /* UART Test Register on i.mx1 */ ++#define IMX21_UTS 0xb4 /* UART Test Register on all other i.mx*/ ++ ++ ++ ++/* UART Control Register Bit Fields.*/ ++#define URXD_CHARRDY (1<<15) ++#define URXD_ERR (1<<14) ++#define URXD_OVRRUN (1<<13) ++#define URXD_FRMERR (1<<12) ++#define URXD_BRK (1<<11) ++#define URXD_PRERR (1<<10) ++#define UCR1_ADEN (1<<15) /* Auto dectect interrupt */ ++#define UCR1_ADBR (1<<14) /* Auto detect baud rate */ ++#define UCR1_TRDYEN (1<<13) /* Transmitter ready interrupt enable */ ++#define UCR1_IDEN (1<<12) /* Idle condition interrupt */ ++#define UCR1_RRDYEN (1<<9) /* Recv ready interrupt enable */ ++#define UCR1_RDMAEN (1<<8) /* Recv ready DMA enable */ ++#define UCR1_IREN (1<<7) /* Infrared interface enable */ ++#define UCR1_TXMPTYEN (1<<6) /* Transimitter empty interrupt enable */ ++#define UCR1_RTSDEN (1<<5) /* RTS delta interrupt enable */ ++#define UCR1_SNDBRK (1<<4) /* Send break */ ++#define UCR1_TDMAEN (1<<3) /* Transmitter ready DMA enable */ ++#define MX1_UCR1_UARTCLKEN (1<<2) /* UART clock enabled, mx1 only */ ++#define UCR1_DOZE (1<<1) /* Doze */ ++#define UCR1_UARTEN (1<<0) /* UART enabled */ ++#define UCR2_ESCI (1<<15) /* Escape seq interrupt enable */ ++#define UCR2_IRTS (1<<14) /* Ignore RTS pin */ ++#define UCR2_CTSC (1<<13) /* CTS pin control */ ++#define UCR2_CTS (1<<12) /* Clear to send */ ++#define UCR2_ESCEN (1<<11) /* Escape enable */ ++#define UCR2_PREN (1<<8) /* Parity enable */ ++#define UCR2_PROE (1<<7) /* Parity odd/even */ ++#define UCR2_STPB (1<<6) /* Stop */ ++#define UCR2_WS (1<<5) /* Word size */ ++#define UCR2_RTSEN (1<<4) /* Request to send interrupt enable */ ++#define UCR2_ATEN (1<<3) /* Aging Timer Enable */ ++#define UCR2_TXEN (1<<2) /* Transmitter enabled */ ++#define UCR2_RXEN (1<<1) /* Receiver enabled */ ++#define UCR2_SRST (1<<0) /* SW reset */ ++#define UCR3_DTREN (1<<13) /* DTR interrupt enable */ ++#define UCR3_PARERREN (1<<12) /* Parity enable */ ++#define UCR3_FRAERREN (1<<11) /* Frame error interrupt enable */ ++#define UCR3_DSR (1<<10) /* Data set ready */ ++#define UCR3_DCD (1<<9) /* Data carrier detect */ ++#define UCR3_RI (1<<8) /* Ring indicator */ ++#define UCR3_ADNIMP (1<<7) /* Autobaud Detection Not Improved */ ++#define UCR3_RXDSEN (1<<6) /* Receive status interrupt enable */ ++#define UCR3_AIRINTEN (1<<5) /* Async IR wake interrupt enable */ ++#define UCR3_AWAKEN (1<<4) /* Async wake interrupt enable */ ++#define UCR3_DTRDEN (1<<3) /* Data Terminal Ready Delta Enable. */ ++#define MX1_UCR3_REF25 (1<<3) /* Ref freq 25 MHz, only on mx1 */ ++#define MX1_UCR3_REF30 (1<<2) /* Ref Freq 30 MHz, only on mx1 */ ++#define MX2_UCR3_RXDMUXSEL (1<<2) /* RXD Muxed Input Select, on mx2/mx3 */ ++#define UCR3_INVT (1<<1) /* Inverted Infrared transmission */ ++#define UCR3_BPEN (1<<0) /* Preset registers enable */ ++#define UCR4_CTSTL_SHF 10 /* CTS trigger level shift */ ++#define UCR4_CTSTL_MASK 0x3F /* CTS trigger is 6 bits wide */ ++#define UCR4_INVR (1<<9) /* Inverted infrared reception */ ++#define UCR4_ENIRI (1<<8) /* Serial infrared interrupt enable */ ++#define UCR4_WKEN (1<<7) /* Wake interrupt enable */ ++#define UCR4_REF16 (1<<6) /* Ref freq 16 MHz */ ++#define UCR4_IRSC (1<<5) /* IR special case */ ++#define UCR4_TCEN (1<<3) /* Transmit complete interrupt enable */ ++#define UCR4_BKEN (1<<2) /* Break condition interrupt enable */ ++#define UCR4_OREN (1<<1) /* Receiver overrun interrupt enable */ ++#define UCR4_DREN (1<<0) /* Recv data ready interrupt enable */ ++#define UFCR_RXTL_SHF 0 /* Receiver trigger level shift */ ++#define UFCR_RFDIV (7<<7) /* Reference freq divider mask */ ++#define UFCR_RFDIV_REG(x) (((x) < 7 ? 6 - (x) : 6) << 7) ++#define UFCR_TXTL_SHF 10 /* Transmitter trigger level shift */ ++#define UFCR_DCEDTE (1<<6) ++#define USR1_PARITYERR (1<<15) /* Parity error interrupt flag */ ++#define USR1_RTSS (1<<14) /* RTS pin status */ ++#define USR1_TRDY (1<<13) /* Transmitter ready interrupt/dma flag */ ++#define USR1_RTSD (1<<12) /* RTS delta */ ++#define USR1_ESCF (1<<11) /* Escape seq interrupt flag */ ++#define USR1_FRAMERR (1<<10) /* Frame error interrupt flag */ ++#define USR1_RRDY (1<<9) /* Receiver ready interrupt/dma flag */ ++#define USR1_AGTIM (1<<8) /* Ageing Timer Interrupt Flag */ ++#define USR1_DTRD (1<<7) /* DTR Delta */ ++#define USR1_RXDS (1<<6) /* Receiver idle interrupt flag */ ++#define USR1_AIRINT (1<<5) /* Async IR wake interrupt flag */ ++#define USR1_AWAKE (1<<4) /* Async wake interrupt flag */ ++#define USR2_ADET (1<<15) /* Auto baud rate detect complete */ ++#define USR2_TXFE (1<<14) /* Transmit buffer FIFO empty */ ++#define USR2_DTRF (1<<13) /* DTR edge interrupt flag */ ++#define USR2_IDLE (1<<12) /* Idle condition */ ++#define USR2_RIDELT (1<<10) /* Ring Indicator Delta */ ++#define USR2_RIIN (1<<9) /* Ring Indicator Input */ ++#define USR2_IRINT (1<<8) /* Serial infrared interrupt flag */ ++#define USR2_WAKE (1<<7) /* Wake */ ++#define USR2_DCDDELT (1<<6) /* Data Carrier Detect Delta */ ++#define USR2_DCDIN (1<<5) /* Data Carrier Detect Input */ ++#define USR2_RTSF (1<<4) /* RTS edge interrupt flag */ ++#define USR2_TXDC (1<<3) /* Transmitter complete */ ++#define USR2_BRCD (1<<2) /* Break condition */ ++#define USR2_ORE (1<<1) /* Overrun error */ ++#define USR2_RDR (1<<0) /* Recv data ready */ ++#define UTS_FRCPERR (1<<13) /* Force parity error */ ++#define UTS_LOOP (1<<12) /* Loop tx and rx */ ++#define UTS_TXEMPTY (1<<6) /* TxFIFO empty */ ++#define UTS_RXEMPTY (1<<5) /* RxFIFO empty */ ++#define UTS_TXFULL (1<<4) /* TxFIFO full */ ++#define UTS_RXFULL (1<<3) /* RxFIFO full */ ++#define UTS_SOFTRST (1<<0) /* Software reset */ ++ ++#define IN_BUFFER_SIZE 4096 ++#define OUT_BUFFER_SIZE 4096 ++ ++#define TX_FIFO_SIZE 32 ++ ++#define PARITY_MASK 0x03 ++#define DATA_BITS_MASK 0x03 ++#define STOP_BITS_MASK 0x01 ++#define FIFO_MASK 0xC0 ++#define EVENT_MASK 0x0F ++ ++#define IER_RX 0x01 ++#define IER_TX 0x02 ++#define IER_STAT 0x04 ++#define IER_MODEM 0x08 ++ ++#define IMX_ISR_PASS_LIMIT 256 ++#define UART_CREAD_BIT 256 ++ ++#define RT_IMX_UART_MAX 5 ++ ++static int tx_fifo[RT_IMX_UART_MAX]; ++module_param_array(tx_fifo, int, NULL, 0400); ++MODULE_PARM_DESC(tx_fifo, "Transmitter FIFO size"); ++ ++/* i.MX21 type uart runs on all i.mx except i.MX1 and i.MX6q */ ++enum imx_uart_type { ++ IMX1_UART, ++ IMX21_UART, ++ IMX53_UART, ++ IMX6Q_UART, ++}; ++ ++/* device type dependent stuff */ ++struct imx_uart_data { ++ unsigned int uts_reg; ++ enum imx_uart_type devtype; ++}; ++ ++ ++struct rt_imx_uart_port { ++ unsigned char __iomem *membase; /* read/write[bwl] */ ++ resource_size_t mapbase; /* for ioremap */ ++ unsigned int irq; /* irq number */ ++ int tx_fifo; /* TX fifo size*/ ++ unsigned int have_rtscts; ++ unsigned int use_dcedte; ++ unsigned int use_hwflow; ++ struct clk *clk_ipg; /* clock id for UART clock */ ++ struct clk *clk_per; /* clock id for UART clock */ ++ const struct imx_uart_data *devdata; ++ unsigned int uartclk; /* base uart clock */ ++ struct rtdm_device rtdm_dev; /* RTDM device structure */ ++}; ++ ++ ++static struct imx_uart_data imx_uart_devdata[] = { ++ [IMX1_UART] = { ++ .uts_reg = IMX1_UTS, ++ .devtype = IMX1_UART, ++ }, ++ [IMX21_UART] = { ++ .uts_reg = IMX21_UTS, ++ .devtype = IMX21_UART, ++ }, ++ [IMX53_UART] = { ++ .uts_reg = IMX21_UTS, ++ .devtype = IMX53_UART, ++ }, ++ [IMX6Q_UART] = { ++ .uts_reg = IMX21_UTS, ++ .devtype = IMX6Q_UART, ++ }, ++}; ++ ++static const struct platform_device_id rt_imx_uart_id_table[] = { ++ { ++ .name = "imx1-uart", ++ .driver_data = (kernel_ulong_t) &imx_uart_devdata[IMX1_UART], ++ }, { ++ .name = "imx21-uart", ++ .driver_data = (kernel_ulong_t) &imx_uart_devdata[IMX21_UART], ++ }, { ++ .name = "imx53-uart", ++ .driver_data = (kernel_ulong_t) &imx_uart_devdata[IMX53_UART], ++ }, { ++ .name = "imx6q-uart", ++ .driver_data = (kernel_ulong_t) &imx_uart_devdata[IMX6Q_UART], ++ }, { ++ /* sentinel */ ++ } ++}; ++MODULE_DEVICE_TABLE(platform, rt_imx_uart_id_table); ++ ++static const struct of_device_id rt_imx_uart_dt_ids[] = { ++ { ++ .compatible = "fsl,imx6q-uart", ++ .data = &imx_uart_devdata[IMX6Q_UART], }, ++ { ++ .compatible = "fsl,imx53-uart", ++ .data = &imx_uart_devdata[IMX53_UART], }, ++ { ++ .compatible = "fsl,imx1-uart", ++ .data = &imx_uart_devdata[IMX1_UART], }, ++ { ++ .compatible = "fsl,imx21-uart", ++ .data = &imx_uart_devdata[IMX21_UART], }, ++ { /* sentinel */ } ++}; ++MODULE_DEVICE_TABLE(of, rt_imx_uart_dt_ids); ++ ++struct rt_imx_uart_ctx { ++ struct rtser_config config; /* current device configuration */ ++ ++ rtdm_irq_t irq_handle; /* device IRQ handle */ ++ rtdm_lock_t lock; /* lock to protect context struct */ ++ ++ int in_head; /* RX ring buffer, head pointer */ ++ int in_tail; /* RX ring buffer, tail pointer */ ++ size_t in_npend; /* pending bytes in RX ring */ ++ int in_nwait; /* bytes the user waits for */ ++ rtdm_event_t in_event; /* raised to unblock reader */ ++ char in_buf[IN_BUFFER_SIZE]; /* RX ring buffer */ ++ ++ volatile unsigned long in_lock; /* single-reader lock */ ++ uint64_t *in_history; /* RX timestamp buffer */ ++ ++ int out_head; /* TX ring buffer, head pointer */ ++ int out_tail; /* TX ring buffer, tail pointer */ ++ size_t out_npend; /* pending bytes in TX ring */ ++ rtdm_event_t out_event; /* raised to unblock writer */ ++ char out_buf[OUT_BUFFER_SIZE]; /* TX ring buffer */ ++ rtdm_mutex_t out_lock; /* single-writer mutex */ ++ ++ uint64_t last_timestamp; /* timestamp of last event */ ++ int ioc_events; /* recorded events */ ++ rtdm_event_t ioc_event; /* raised to unblock event waiter */ ++ volatile unsigned long ioc_event_lock; /* single-waiter lock */ ++ ++ int ier_status; /* IER cache */ ++ int mcr_status; /* MCR cache */ ++ int status; /* cache for LSR + soft-states */ ++ int saved_errors; /* error cache for RTIOC_GET_STATUS */ ++ ++ /* ++ * The port structure holds all the information about the UART ++ * port like base address, and so on. ++ */ ++ struct rt_imx_uart_port *port; ++}; ++ ++static const struct rtser_config default_config = { ++ .config_mask = 0xFFFF, ++ .baud_rate = RTSER_DEF_BAUD, ++ .parity = RTSER_DEF_PARITY, ++ .data_bits = RTSER_DEF_BITS, ++ .stop_bits = RTSER_DEF_STOPB, ++ .handshake = RTSER_DEF_HAND, ++ .fifo_depth = RTSER_DEF_FIFO_DEPTH, ++ .rx_timeout = RTSER_DEF_TIMEOUT, ++ .tx_timeout = RTSER_DEF_TIMEOUT, ++ .event_timeout = RTSER_DEF_TIMEOUT, ++ .timestamp_history = RTSER_DEF_TIMESTAMP_HISTORY, ++ .event_mask = RTSER_DEF_EVENT_MASK, ++}; ++ ++static void rt_imx_uart_stop_tx(struct rt_imx_uart_ctx *ctx) ++{ ++ unsigned long temp; ++ ++ temp = readl(ctx->port->membase + UCR1); ++ writel(temp & ~UCR1_TXMPTYEN, ctx->port->membase + UCR1); ++} ++ ++static void rt_imx_uart_start_tx(struct rt_imx_uart_ctx *ctx) ++{ ++ unsigned long temp; ++ ++ temp = readl(ctx->port->membase + UCR1); ++ writel(temp | UCR1_TXMPTYEN, ctx->port->membase + UCR1); ++} ++ ++static void rt_imx_uart_enable_ms(struct rt_imx_uart_ctx *ctx) ++{ ++ unsigned long ucr3; ++ ++ /* ++ * RTS interrupt is enabled only if we are using interrupt-driven ++ * software controlled hardware flow control ++ */ ++ if (!ctx->port->use_hwflow) { ++ unsigned long ucr1 = readl(ctx->port->membase + UCR1); ++ ++ ucr1 |= UCR1_RTSDEN; ++ writel(ucr1, ctx->port->membase + UCR1); ++ } ++ ucr3 = readl(ctx->port->membase + UCR3); ++ ucr3 |= UCR3_DTREN; ++ if (ctx->port->use_dcedte) /* DTE mode */ ++ ucr3 |= UCR3_DCD | UCR3_RI; ++ writel(ucr3, ctx->port->membase + UCR3); ++} ++ ++static int rt_imx_uart_rx_chars(struct rt_imx_uart_ctx *ctx, ++ uint64_t *timestamp) ++{ ++ unsigned int rx, temp; ++ int rbytes = 0; ++ int lsr = 0; ++ ++ while (readl(ctx->port->membase + USR2) & USR2_RDR) { ++ rx = readl(ctx->port->membase + URXD0); ++ temp = readl(ctx->port->membase + USR2); ++ if (temp & USR2_BRCD) { ++ writel(USR2_BRCD, ctx->port->membase + USR2); ++ lsr |= RTSER_LSR_BREAK_IND; ++ } ++ ++ if (rx & (URXD_PRERR | URXD_OVRRUN | URXD_FRMERR)) { ++ if (rx & URXD_PRERR) ++ lsr |= RTSER_LSR_PARITY_ERR; ++ else if (rx & URXD_FRMERR) ++ lsr |= RTSER_LSR_FRAMING_ERR; ++ if (rx & URXD_OVRRUN) ++ lsr |= RTSER_LSR_OVERRUN_ERR; ++ } ++ ++ /* save received character */ ++ ctx->in_buf[ctx->in_tail] = rx & 0xff; ++ if (ctx->in_history) ++ ctx->in_history[ctx->in_tail] = *timestamp; ++ ctx->in_tail = (ctx->in_tail + 1) & (IN_BUFFER_SIZE - 1); ++ ++ if (unlikely(ctx->in_npend >= IN_BUFFER_SIZE)) ++ lsr |= RTSER_SOFT_OVERRUN_ERR; ++ else ++ ctx->in_npend++; ++ ++ rbytes++; ++ } ++ ++ /* save new errors */ ++ ctx->status |= lsr; ++ ++ return rbytes; ++} ++ ++static void rt_imx_uart_tx_chars(struct rt_imx_uart_ctx *ctx) ++{ ++ int ch; ++ unsigned int uts_reg = ctx->port->devdata->uts_reg; ++ ++ while (ctx->out_npend > 0 && ++ !(readl(ctx->port->membase + uts_reg) & UTS_TXFULL)) { ++ ch = ctx->out_buf[ctx->out_head++]; ++ writel(ch, ctx->port->membase + URTX0); ++ ctx->out_head &= (OUT_BUFFER_SIZE - 1); ++ ctx->out_npend--; ++ } ++} ++ ++static int rt_imx_uart_modem_status(struct rt_imx_uart_ctx *ctx, ++ unsigned int usr1, ++ unsigned int usr2) ++{ ++ int events = 0; ++ ++ /* Clear the status bits that triggered the interrupt */ ++ writel(usr1, ctx->port->membase + USR1); ++ writel(usr2, ctx->port->membase + USR2); ++ ++ if (ctx->port->use_dcedte) { /* DTE mode */ ++ if (usr2 & USR2_DCDDELT) ++ events |= !(usr2 & USR2_DCDIN) ? ++ RTSER_EVENT_MODEMHI : RTSER_EVENT_MODEMLO; ++ } ++ if (!ctx->port->use_hwflow && (usr1 & USR1_RTSD)) { ++ events |= (usr1 & USR1_RTSS) ? ++ RTSER_EVENT_MODEMHI : RTSER_EVENT_MODEMLO; ++ } ++ ++ return events; ++} ++ ++static int rt_imx_uart_int(rtdm_irq_t *irq_context) ++{ ++ uint64_t timestamp = rtdm_clock_read(); ++ struct rt_imx_uart_ctx *ctx; ++ unsigned int usr1, usr2, ucr1; ++ int rbytes = 0, events = 0; ++ int ret = RTDM_IRQ_NONE; ++ ++ ctx = rtdm_irq_get_arg(irq_context, struct rt_imx_uart_ctx); ++ ++ rtdm_lock_get(&ctx->lock); ++ ++ usr1 = readl(ctx->port->membase + USR1); ++ usr2 = readl(ctx->port->membase + USR2); ++ ucr1 = readl(ctx->port->membase + UCR1); ++ ++ /* ++ * Read if there is data available ++ */ ++ if (usr1 & USR1_RRDY) { ++ if (likely(ucr1 & UCR1_RRDYEN)) { ++ rbytes = rt_imx_uart_rx_chars(ctx, ×tamp); ++ events |= RTSER_EVENT_RXPEND; ++ } ++ ret = RTDM_IRQ_HANDLED; ++ } ++ ++ /* ++ * Send data if there is data to be sent ++ */ ++ if (usr1 & USR1_TRDY) { ++ if (likely(ucr1 & UCR1_TXMPTYEN)) ++ rt_imx_uart_tx_chars(ctx); ++ ret = RTDM_IRQ_HANDLED; ++ } ++ ++ /* ++ * Handle modem status events ++ */ ++ if ((usr1 & (USR1_RTSD | USR1_DTRD)) || ++ (usr2 & (USR2_DCDDELT | USR2_RIDELT))) { ++ events |= rt_imx_uart_modem_status(ctx, usr1, usr2); ++ ret = RTDM_IRQ_HANDLED; ++ } ++ ++ if (ctx->in_nwait > 0) { ++ if ((ctx->in_nwait <= rbytes) || ctx->status) { ++ ctx->in_nwait = 0; ++ rtdm_event_signal(&ctx->in_event); ++ } else { ++ ctx->in_nwait -= rbytes; ++ } ++ } ++ ++ if (ctx->status) { ++ events |= RTSER_EVENT_ERRPEND; ++#ifdef FIXME ++ ctx->ier_status &= ~IER_STAT; ++#endif ++ } ++ ++ if (events & ctx->config.event_mask) { ++ int old_events = ctx->ioc_events; ++ ++ ctx->last_timestamp = timestamp; ++ ctx->ioc_events = events; ++ ++ if (!old_events) ++ rtdm_event_signal(&ctx->ioc_event); ++ } ++ ++ if ((ctx->ier_status & IER_TX) && (ctx->out_npend == 0)) { ++ rt_imx_uart_stop_tx(ctx); ++ ctx->ier_status &= ~IER_TX; ++ rtdm_event_signal(&ctx->out_event); ++ } ++ ++ rtdm_lock_put(&ctx->lock); ++ ++ if (ret != RTDM_IRQ_HANDLED) ++ pr_warn("%s: unhandled interrupt\n", __func__); ++ return ret; ++} ++ ++static unsigned int rt_imx_uart_get_msr(struct rt_imx_uart_ctx *ctx) ++{ ++ unsigned long usr1 = readl(ctx->port->membase + USR1); ++ unsigned long usr2 = readl(ctx->port->membase + USR2); ++ unsigned int msr = 0; ++ ++ if (usr1 & USR1_RTSD) ++ msr |= RTSER_MSR_DCTS; ++ if (usr1 & USR1_DTRD) ++ msr |= RTSER_MSR_DDSR; ++ if (usr2 & USR2_RIDELT) ++ msr |= RTSER_MSR_TERI; ++ if (usr2 & USR2_DCDDELT) ++ msr |= RTSER_MSR_DDCD; ++ ++ if (usr1 & USR1_RTSS) ++ msr |= RTSER_MSR_CTS; ++ ++ if (ctx->port->use_dcedte) { /* DTE mode */ ++ if (!(usr2 & USR2_DCDIN)) ++ msr |= RTSER_MSR_DCD; ++ if (!(usr2 & USR2_RIIN)) ++ msr |= RTSER_MSR_RI; ++ } ++ ++ return msr; ++} ++ ++static void rt_imx_uart_set_mcr(struct rt_imx_uart_ctx *ctx, ++ unsigned int mcr) ++{ ++ unsigned int uts_reg = ctx->port->devdata->uts_reg; ++ unsigned long ucr2 = readl(ctx->port->membase + UCR2); ++ unsigned long ucr3 = readl(ctx->port->membase + UCR3); ++ unsigned long uts = readl(ctx->port->membase + uts_reg); ++ ++ if (mcr & RTSER_MCR_RTS) { ++ /* ++ * Return to hardware-driven hardware flow control if the ++ * option is enabled ++ */ ++ if (ctx->port->use_hwflow) { ++ ucr2 |= UCR2_CTSC; ++ } else { ++ ucr2 |= UCR2_CTS; ++ ucr2 &= ~UCR2_CTSC; ++ } ++ } else { ++ ucr2 &= ~(UCR2_CTS | UCR2_CTSC); ++ } ++ writel(ucr2, ctx->port->membase + UCR2); ++ ++ if (mcr & RTSER_MCR_DTR) ++ ucr3 |= UCR3_DSR; ++ else ++ ucr3 &= ~UCR3_DSR; ++ writel(ucr3, ctx->port->membase + UCR3); ++ ++ if (mcr & RTSER_MCR_LOOP) ++ uts |= UTS_LOOP; ++ else ++ uts &= ~UTS_LOOP; ++ writel(uts, ctx->port->membase + uts_reg); ++} ++ ++static void rt_imx_uart_break_ctl(struct rt_imx_uart_ctx *ctx, ++ int break_state) ++{ ++ unsigned long ucr1 = readl(ctx->port->membase + UCR1); ++ ++ if (break_state == RTSER_BREAK_SET) ++ ucr1 |= UCR1_SNDBRK; ++ else ++ ucr1 &= ~UCR1_SNDBRK; ++ writel(ucr1, ctx->port->membase + UCR1); ++} ++ ++static int rt_imx_uart_set_config(struct rt_imx_uart_ctx *ctx, ++ const struct rtser_config *config, ++ uint64_t **in_history_ptr) ++{ ++ rtdm_lockctx_t lock_ctx; ++ int err = 0; ++ ++ rtdm_lock_get_irqsave(&ctx->lock, lock_ctx); ++ ++ if (config->config_mask & RTSER_SET_BAUD) ++ ctx->config.baud_rate = config->baud_rate; ++ if (config->config_mask & RTSER_SET_DATA_BITS) ++ ctx->config.data_bits = config->data_bits & DATA_BITS_MASK; ++ if (config->config_mask & RTSER_SET_PARITY) ++ ctx->config.parity = config->parity & PARITY_MASK; ++ if (config->config_mask & RTSER_SET_STOP_BITS) ++ ctx->config.stop_bits = config->stop_bits & STOP_BITS_MASK; ++ ++ /* Timeout manipulation is not atomic. The user is supposed to take ++ * care not to use and change timeouts at the same time. ++ */ ++ if (config->config_mask & RTSER_SET_TIMEOUT_RX) ++ ctx->config.rx_timeout = config->rx_timeout; ++ if (config->config_mask & RTSER_SET_TIMEOUT_TX) ++ ctx->config.tx_timeout = config->tx_timeout; ++ if (config->config_mask & RTSER_SET_TIMEOUT_EVENT) ++ ctx->config.event_timeout = config->event_timeout; ++ ++ if (config->config_mask & RTSER_SET_TIMESTAMP_HISTORY) { ++ if (config->timestamp_history & RTSER_RX_TIMESTAMP_HISTORY) { ++ if (!ctx->in_history) { ++ ctx->in_history = *in_history_ptr; ++ *in_history_ptr = NULL; ++ if (!ctx->in_history) ++ err = -ENOMEM; ++ } ++ } else { ++ *in_history_ptr = ctx->in_history; ++ ctx->in_history = NULL; ++ } ++ } ++ ++ if (config->config_mask & RTSER_SET_EVENT_MASK) { ++ ctx->config.event_mask = config->event_mask & EVENT_MASK; ++ ctx->ioc_events = 0; ++ ++ if ((config->event_mask & RTSER_EVENT_RXPEND) && ++ (ctx->in_npend > 0)) ++ ctx->ioc_events |= RTSER_EVENT_RXPEND; ++ ++ if ((config->event_mask & RTSER_EVENT_ERRPEND) ++ && ctx->status) ++ ctx->ioc_events |= RTSER_EVENT_ERRPEND; ++ } ++ ++ if (config->config_mask & RTSER_SET_HANDSHAKE) { ++ ctx->config.handshake = config->handshake; ++ ++ switch (ctx->config.handshake) { ++ case RTSER_RTSCTS_HAND: ++ /* ...? */ ++ ++ default: /* RTSER_NO_HAND */ ++ ctx->mcr_status = RTSER_MCR_RTS | RTSER_MCR_OUT1; ++ break; ++ } ++ rt_imx_uart_set_mcr(ctx, ctx->mcr_status); ++ } ++ ++ /* configure hardware with new parameters */ ++ if (config->config_mask & (RTSER_SET_BAUD | ++ RTSER_SET_PARITY | ++ RTSER_SET_DATA_BITS | ++ RTSER_SET_STOP_BITS | ++ RTSER_SET_EVENT_MASK | ++ RTSER_SET_HANDSHAKE)) { ++ struct rt_imx_uart_port *port = ctx->port; ++ unsigned int ucr2, old_ucr1, old_txrxen, old_ucr2; ++ unsigned int baud = ctx->config.baud_rate; ++ unsigned int div, ufcr; ++ unsigned long num, denom; ++ uint64_t tdiv64; ++ ++ if (ctx->config.data_bits == RTSER_8_BITS) ++ ucr2 = UCR2_WS | UCR2_IRTS; ++ else ++ ucr2 = UCR2_IRTS; ++ ++ if (ctx->config.handshake == RTSER_RTSCTS_HAND) { ++ if (port->have_rtscts) { ++ ucr2 &= ~UCR2_IRTS; ++ ucr2 |= UCR2_CTSC; ++ } ++ } ++ ++ if (ctx->config.stop_bits == RTSER_2_STOPB) ++ ucr2 |= UCR2_STPB; ++ if (ctx->config.parity == RTSER_ODD_PARITY || ++ ctx->config.parity == RTSER_EVEN_PARITY) { ++ ucr2 |= UCR2_PREN; ++ if (ctx->config.parity == RTSER_ODD_PARITY) ++ ucr2 |= UCR2_PROE; ++ } ++ ++ /* ++ * disable interrupts and drain transmitter ++ */ ++ old_ucr1 = readl(port->membase + UCR1); ++ old_ucr1 &= ~UCR1_RTSDEN; /* reset in rt_imx_uart_enable_ms()*/ ++ writel(old_ucr1 & ~(UCR1_TXMPTYEN | UCR1_RRDYEN), ++ port->membase + UCR1); ++ old_ucr2 = readl(port->membase + USR2); ++ writel(old_ucr2 & ~UCR2_ATEN, port->membase + USR2); ++ while (!(readl(port->membase + USR2) & USR2_TXDC)) ++ barrier(); ++ ++ /* then, disable everything */ ++ old_txrxen = readl(port->membase + UCR2); ++ writel(old_txrxen & ~(UCR2_TXEN | UCR2_RXEN), ++ port->membase + UCR2); ++ old_txrxen &= (UCR2_TXEN | UCR2_RXEN); ++ div = port->uartclk / (baud * 16); ++ if (div > 7) ++ div = 7; ++ if (!div) ++ div = 1; ++ ++ rational_best_approximation(16 * div * baud, port->uartclk, ++ 1 << 16, 1 << 16, &num, &denom); ++ ++ tdiv64 = port->uartclk; ++ tdiv64 *= num; ++ do_div(tdiv64, denom * 16 * div); ++ ++ num -= 1; ++ denom -= 1; ++ ++ ufcr = readl(port->membase + UFCR); ++ ufcr = (ufcr & (~UFCR_RFDIV)) | UFCR_RFDIV_REG(div); ++ ++ if (port->use_dcedte) ++ ufcr |= UFCR_DCEDTE; ++ ++ writel(ufcr, port->membase + UFCR); ++ ++ writel(num, port->membase + UBIR); ++ writel(denom, port->membase + UBMR); ++ ++ writel(port->uartclk / div / 1000, port->membase + MX2_ONEMS); ++ ++ writel(old_ucr1, port->membase + UCR1); ++ ++ /* set the parity, stop bits and data size */ ++ writel(ucr2 | old_txrxen, port->membase + UCR2); ++ ++ if (config->event_mask & ++ (RTSER_EVENT_MODEMHI | RTSER_EVENT_MODEMLO)) ++ rt_imx_uart_enable_ms(ctx); ++ ++ ctx->status = 0; ++ ctx->ioc_events &= ~RTSER_EVENT_ERRPEND; ++ } ++ ++ rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx); ++ ++ return err; ++} ++ ++void rt_imx_uart_cleanup_ctx(struct rt_imx_uart_ctx *ctx) ++{ ++ rtdm_event_destroy(&ctx->in_event); ++ rtdm_event_destroy(&ctx->out_event); ++ rtdm_event_destroy(&ctx->ioc_event); ++ rtdm_mutex_destroy(&ctx->out_lock); ++} ++ ++#define TXTL 2 /* reset default */ ++#define RXTL 1 /* reset default */ ++ ++static int rt_imx_uart_setup_ufcr(struct rt_imx_uart_port *port) ++{ ++ unsigned int val; ++ unsigned int ufcr_rfdiv; ++ ++ /* set receiver / transmitter trigger level. ++ * RFDIV is set such way to satisfy requested uartclk value ++ */ ++ val = TXTL << 10 | RXTL; ++ ufcr_rfdiv = (clk_get_rate(port->clk_per) + port->uartclk / 2) / ++ port->uartclk; ++ ++ if (!ufcr_rfdiv) ++ ufcr_rfdiv = 1; ++ ++ val |= UFCR_RFDIV_REG(ufcr_rfdiv); ++ ++ writel(val, port->membase + UFCR); ++ ++ return 0; ++} ++ ++/* half the RX buffer size */ ++#define CTSTL 16 ++ ++static void uart_reset(struct rt_imx_uart_port *port) ++{ ++ unsigned int uts_reg = port->devdata->uts_reg; ++ int n = 100; ++ u32 temp; ++ ++ /* Reset fifo's and state machines */ ++ temp = readl(port->membase + UCR2); ++ temp &= ~UCR2_SRST; ++ writel(temp, port->membase + UCR2); ++ n = 100; ++ while (!(readl(port->membase + uts_reg) & UTS_SOFTRST) && --n > 0) ++ udelay(1); ++} ++ ++static int rt_imx_uart_open(struct rtdm_fd *fd, int oflags) ++{ ++ struct rt_imx_uart_ctx *ctx; ++ struct rt_imx_uart_port *port; ++ rtdm_lockctx_t lock_ctx; ++ unsigned long temp; ++ uint64_t *dummy; ++ ++ ctx = rtdm_fd_to_private(fd); ++ ctx->port = (struct rt_imx_uart_port *)rtdm_fd_device(fd)->device_data; ++ ++ port = ctx->port; ++ ++ /* IPC initialisation - cannot fail with used parameters */ ++ rtdm_lock_init(&ctx->lock); ++ rtdm_event_init(&ctx->in_event, 0); ++ rtdm_event_init(&ctx->out_event, 0); ++ rtdm_event_init(&ctx->ioc_event, 0); ++ rtdm_mutex_init(&ctx->out_lock); ++ ++ ctx->in_head = 0; ++ ctx->in_tail = 0; ++ ctx->in_npend = 0; ++ ctx->in_nwait = 0; ++ ctx->in_lock = 0; ++ ctx->in_history = NULL; ++ ++ ctx->out_head = 0; ++ ctx->out_tail = 0; ++ ctx->out_npend = 0; ++ ++ ctx->ioc_events = 0; ++ ctx->ioc_event_lock = 0; ++ ctx->status = 0; ++ ctx->saved_errors = 0; ++ ++ /* ++ * disable the DREN bit (Data Ready interrupt enable) before ++ * requesting IRQs ++ */ ++ temp = readl(port->membase + UCR4); ++ ++ /* set the trigger level for CTS */ ++ temp &= ~(UCR4_CTSTL_MASK << UCR4_CTSTL_SHF); ++ temp |= CTSTL << UCR4_CTSTL_SHF; ++ writel(temp & ~UCR4_DREN, port->membase + UCR4); ++ ++ uart_reset(port); ++ ++ rtdm_lock_get_irqsave(&ctx->lock, lock_ctx); ++ ++ /* ++ * Finally, clear status and enable interrupts ++ */ ++ writel(USR1_RTSD | USR1_DTRD, port->membase + USR1); ++ writel(USR2_ORE, port->membase + USR2); ++ ++ temp = readl(port->membase + UCR1) & ~UCR1_RRDYEN; ++ temp |= UCR1_UARTEN; ++ if (port->have_rtscts) ++ temp |= UCR1_RTSDEN; ++ writel(temp, port->membase + UCR1); ++ ++ temp = readl(port->membase + UCR4); ++ temp |= UCR4_OREN; ++ writel(temp, port->membase + UCR4); ++ ++ temp = readl(port->membase + UCR2) & ~(UCR2_ATEN|UCR2_RTSEN); ++ temp |= (UCR2_RXEN | UCR2_TXEN); ++ if (!port->have_rtscts) ++ temp |= UCR2_IRTS; ++ writel(temp, port->membase + UCR2); ++ ++ temp = readl(port->membase + UCR3); ++ temp |= MX2_UCR3_RXDMUXSEL; ++ writel(temp, port->membase + UCR3); ++ ++ temp = readl(port->membase + UCR1); ++ temp |= UCR1_RRDYEN; ++ writel(temp, port->membase + UCR1); ++ ++ temp = readl(port->membase + UCR2); ++ temp |= UCR2_ATEN; ++ writel(temp, port->membase + UCR2); ++ ++ rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx); ++ ++ rt_imx_uart_set_config(ctx, &default_config, &dummy); ++ ++ rt_imx_uart_setup_ufcr(port); ++ ++ return rtdm_irq_request(&ctx->irq_handle, ++ port->irq, rt_imx_uart_int, 0, ++ rtdm_fd_device(fd)->name, ctx); ++} ++ ++void rt_imx_uart_close(struct rtdm_fd *fd) ++{ ++ struct rt_imx_uart_port *port; ++ struct rt_imx_uart_ctx *ctx; ++ rtdm_lockctx_t lock_ctx; ++ unsigned long temp; ++ ++ ctx = rtdm_fd_to_private(fd); ++ port = ctx->port; ++ ++ rtdm_lock_get_irqsave(&ctx->lock, lock_ctx); ++ ++ temp = readl(port->membase + UCR2); ++ temp &= ~(UCR2_ATEN|UCR2_RTSEN|UCR2_RXEN|UCR2_TXEN|UCR2_IRTS); ++ writel(temp, port->membase + UCR2); ++ /* ++ * Disable all interrupts, port and break condition, then ++ * reset. ++ */ ++ temp = readl(port->membase + UCR1); ++ temp &= ~(UCR1_TXMPTYEN | UCR1_RRDYEN | UCR1_RTSDEN | UCR1_UARTEN); ++ writel(temp, port->membase + UCR1); ++ ++ rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx); ++ ++ rtdm_irq_free(&ctx->irq_handle); ++ ++ uart_reset(port); ++ ++ rt_imx_uart_cleanup_ctx(ctx); ++ kfree(ctx->in_history); ++} ++ ++static int rt_imx_uart_ioctl(struct rtdm_fd *fd, ++ unsigned int request, void *arg) ++{ ++ rtdm_lockctx_t lock_ctx; ++ struct rt_imx_uart_ctx *ctx; ++ int err = 0; ++ ++ ctx = rtdm_fd_to_private(fd); ++ ++ switch (request) { ++ case RTSER_RTIOC_GET_CONFIG: ++ if (rtdm_fd_is_user(fd)) ++ err = ++ rtdm_safe_copy_to_user(fd, arg, ++ &ctx->config, ++ sizeof(struct rtser_config)); ++ else ++ memcpy(arg, &ctx->config, ++ sizeof(struct rtser_config)); ++ break; ++ ++ case RTSER_RTIOC_SET_CONFIG: { ++ struct rtser_config *config; ++ struct rtser_config config_buf; ++ uint64_t *hist_buf = NULL; ++ ++ /* ++ * We may call regular kernel services ahead, ask for ++ * re-entering secondary mode if need be. ++ */ ++ if (rtdm_in_rt_context()) ++ return -ENOSYS; ++ ++ config = (struct rtser_config *)arg; ++ ++ if (rtdm_fd_is_user(fd)) { ++ err = ++ rtdm_safe_copy_from_user(fd, &config_buf, ++ arg, ++ sizeof(struct ++ rtser_config)); ++ if (err) ++ return err; ++ ++ config = &config_buf; ++ } ++ ++ if ((config->config_mask & RTSER_SET_BAUD) && ++ (config->baud_rate > clk_get_rate(ctx->port->clk_per) / 16 || ++ config->baud_rate <= 0)) ++ /* invalid baudrate for this port */ ++ return -EINVAL; ++ ++ if (config->config_mask & RTSER_SET_TIMESTAMP_HISTORY) { ++ if (config->timestamp_history & ++ RTSER_RX_TIMESTAMP_HISTORY) ++ hist_buf = kmalloc(IN_BUFFER_SIZE * ++ sizeof(nanosecs_abs_t), ++ GFP_KERNEL); ++ } ++ ++ rt_imx_uart_set_config(ctx, config, &hist_buf); ++ ++ if (hist_buf) ++ kfree(hist_buf); ++ break; ++ } ++ ++ case RTSER_RTIOC_GET_STATUS: { ++ int status, msr; ++ ++ rtdm_lock_get_irqsave(&ctx->lock, lock_ctx); ++ ++ status = ctx->saved_errors | ctx->status; ++ ctx->status = 0; ++ ctx->saved_errors = 0; ++ ctx->ioc_events &= ~RTSER_EVENT_ERRPEND; ++ ++ msr = rt_imx_uart_get_msr(ctx); ++ ++ rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx); ++ ++ if (rtdm_fd_is_user(fd)) { ++ struct rtser_status status_buf; ++ ++ ++ status_buf.line_status = status; ++ status_buf.modem_status = msr; ++ err = ++ rtdm_safe_copy_to_user(fd, arg, ++ &status_buf, ++ sizeof(struct ++ rtser_status)); ++ } else { ++ ((struct rtser_status *)arg)->line_status = 0; ++ ((struct rtser_status *)arg)->modem_status = msr; ++ } ++ break; ++ } ++ ++ case RTSER_RTIOC_GET_CONTROL: ++ if (rtdm_fd_is_user(fd)) ++ err = ++ rtdm_safe_copy_to_user(fd, arg, ++ &ctx->mcr_status, ++ sizeof(int)); ++ else ++ *(int *)arg = ctx->mcr_status; ++ ++ break; ++ ++ case RTSER_RTIOC_SET_CONTROL: { ++ int new_mcr = (long)arg; ++ ++ rtdm_lock_get_irqsave(&ctx->lock, lock_ctx); ++ ctx->mcr_status = new_mcr; ++ rt_imx_uart_set_mcr(ctx, new_mcr); ++ rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx); ++ break; ++ } ++ ++ case RTSER_RTIOC_WAIT_EVENT: { ++ struct rtser_event ev = { .rxpend_timestamp = 0 }; ++ rtdm_toseq_t timeout_seq; ++ ++ if (!rtdm_in_rt_context()) ++ return -ENOSYS; ++ ++ /* Only one waiter allowed, stop any further attempts here. */ ++ if (test_and_set_bit(0, &ctx->ioc_event_lock)) ++ return -EBUSY; ++ ++ rtdm_toseq_init(&timeout_seq, ctx->config.event_timeout); ++ ++ rtdm_lock_get_irqsave(&ctx->lock, lock_ctx); ++ ++ while (!ctx->ioc_events) { ++ /* Only enable error interrupt ++ * when the user waits for it. ++ */ ++ if (ctx->config.event_mask & RTSER_EVENT_ERRPEND) { ++ ctx->ier_status |= IER_STAT; ++#ifdef FIXME ++ rt_imx_uart_reg_out(mode, base, IER, ++ ctx->ier_status); ++#endif ++ } ++ ++ rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx); ++ ++ err = rtdm_event_timedwait(&ctx->ioc_event, ++ ctx->config.event_timeout, ++ &timeout_seq); ++ if (err) { ++ /* Device has been closed? */ ++ if (err == -EIDRM) ++ err = -EBADF; ++ goto wait_unlock_out; ++ } ++ ++ rtdm_lock_get_irqsave(&ctx->lock, lock_ctx); ++ } ++ ++ ev.events = ctx->ioc_events; ++ ctx->ioc_events &= ++ ~(RTSER_EVENT_MODEMHI | RTSER_EVENT_MODEMLO); ++ ++ ev.last_timestamp = ctx->last_timestamp; ++ ev.rx_pending = ctx->in_npend; ++ ++ if (ctx->in_history) ++ ev.rxpend_timestamp = ctx->in_history[ctx->in_head]; ++ ++ rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx); ++ ++ if (rtdm_fd_is_user(fd)) ++ err = ++ rtdm_safe_copy_to_user(fd, arg, &ev, ++ sizeof(struct ++ rtser_event)); ++ else ++ memcpy(arg, &ev, sizeof(struct rtser_event)); ++ ++wait_unlock_out: ++ /* release the simple event waiter lock */ ++ clear_bit(0, &ctx->ioc_event_lock); ++ break; ++ } ++ ++ case RTSER_RTIOC_BREAK_CTL: { ++ rtdm_lock_get_irqsave(&ctx->lock, lock_ctx); ++ rt_imx_uart_break_ctl(ctx, (int)arg); ++ rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx); ++ break; ++ } ++ ++#ifdef FIXME ++ case RTIOC_PURGE: { ++ int fcr = 0; ++ ++ rtdm_lock_get_irqsave(&ctx->lock, lock_ctx); ++ if ((long)arg & RTDM_PURGE_RX_BUFFER) { ++ ctx->in_head = 0; ++ ctx->in_tail = 0; ++ ctx->in_npend = 0; ++ ctx->status = 0; ++ fcr |= FCR_FIFO | FCR_RESET_RX; ++ rt_imx_uart_reg_in(mode, base, RHR); ++ } ++ if ((long)arg & RTDM_PURGE_TX_BUFFER) { ++ ctx->out_head = 0; ++ ctx->out_tail = 0; ++ ctx->out_npend = 0; ++ fcr |= FCR_FIFO | FCR_RESET_TX; ++ } ++ if (fcr) { ++ rt_imx_uart_reg_out(mode, base, FCR, fcr); ++ rt_imx_uart_reg_out(mode, base, FCR, ++ FCR_FIFO | ctx->config.fifo_depth); ++ } ++ rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx); ++ break; ++ } ++#endif ++ ++ default: ++ err = -ENOTTY; ++ } ++ ++ return err; ++} ++ ++ssize_t rt_imx_uart_read(struct rtdm_fd *fd, void *buf, size_t nbyte) ++{ ++ struct rt_imx_uart_ctx *ctx; ++ rtdm_lockctx_t lock_ctx; ++ size_t read = 0; ++ int pending; ++ int block; ++ int subblock; ++ int in_pos; ++ char *out_pos = (char *)buf; ++ rtdm_toseq_t timeout_seq; ++ ssize_t ret = -EAGAIN; /* for non-blocking read */ ++ int nonblocking; ++ ++ if (nbyte == 0) ++ return 0; ++ ++ if (rtdm_fd_is_user(fd) && !rtdm_rw_user_ok(fd, buf, nbyte)) ++ return -EFAULT; ++ ++ ctx = rtdm_fd_to_private(fd); ++ ++ rtdm_toseq_init(&timeout_seq, ctx->config.rx_timeout); ++ ++ /* non-blocking is handled separately here */ ++ nonblocking = (ctx->config.rx_timeout < 0); ++ ++ /* only one reader allowed, stop any further attempts here */ ++ if (test_and_set_bit(0, &ctx->in_lock)) ++ return -EBUSY; ++ ++ rtdm_lock_get_irqsave(&ctx->lock, lock_ctx); ++ ++ while (1) { ++ if (ctx->status) { ++ if (ctx->status & RTSER_LSR_BREAK_IND) ++ ret = -EPIPE; ++ else ++ ret = -EIO; ++ ctx->saved_errors = ctx->status & ++ (RTSER_LSR_OVERRUN_ERR | RTSER_LSR_PARITY_ERR | ++ RTSER_LSR_FRAMING_ERR | RTSER_SOFT_OVERRUN_ERR); ++ ctx->status = 0; ++ break; ++ } ++ ++ pending = ctx->in_npend; ++ ++ if (pending > 0) { ++ block = subblock = (pending <= nbyte) ? pending : nbyte; ++ in_pos = ctx->in_head; ++ ++ rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx); ++ ++ /* Do we have to wrap around the buffer end? */ ++ if (in_pos + subblock > IN_BUFFER_SIZE) { ++ /* Treat the block between head and buffer end ++ * separately. ++ */ ++ subblock = IN_BUFFER_SIZE - in_pos; ++ ++ if (rtdm_fd_is_user(fd)) { ++ if (rtdm_copy_to_user ++ (fd, out_pos, ++ &ctx->in_buf[in_pos], ++ subblock) != 0) { ++ ret = -EFAULT; ++ goto break_unlocked; ++ } ++ } else ++ memcpy(out_pos, &ctx->in_buf[in_pos], ++ subblock); ++ ++ read += subblock; ++ out_pos += subblock; ++ ++ subblock = block - subblock; ++ in_pos = 0; ++ } ++ ++ if (rtdm_fd_is_user(fd)) { ++ if (rtdm_copy_to_user(fd, out_pos, ++ &ctx->in_buf[in_pos], ++ subblock) != 0) { ++ ret = -EFAULT; ++ goto break_unlocked; ++ } ++ } else ++ memcpy(out_pos, &ctx->in_buf[in_pos], subblock); ++ ++ read += subblock; ++ out_pos += subblock; ++ nbyte -= block; ++ ++ rtdm_lock_get_irqsave(&ctx->lock, lock_ctx); ++ ++ ctx->in_head = ++ (ctx->in_head + block) & (IN_BUFFER_SIZE - 1); ++ ctx->in_npend -= block; ++ if (ctx->in_npend == 0) ++ ctx->ioc_events &= ~RTSER_EVENT_RXPEND; ++ ++ if (nbyte == 0) ++ break; /* All requested bytes read. */ ++ ++ continue; ++ } ++ ++ if (nonblocking) ++ /* ret was set to EAGAIN in case of a real ++ * non-blocking call or contains the error ++ * returned by rtdm_event_wait[_until] ++ */ ++ break; ++ ++ ctx->in_nwait = nbyte; ++ ++ rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx); ++ ++ ret = rtdm_event_timedwait(&ctx->in_event, ++ ctx->config.rx_timeout, ++ &timeout_seq); ++ if (ret < 0) { ++ if (ret == -EIDRM) { ++ /* Device has been closed - ++ * return immediately. ++ */ ++ return -EBADF; ++ } ++ ++ rtdm_lock_get_irqsave(&ctx->lock, lock_ctx); ++ ++ nonblocking = 1; ++ if (ctx->in_npend > 0) { ++ /* Final turn: collect pending bytes ++ * before exit. ++ */ ++ continue; ++ } ++ ++ ctx->in_nwait = 0; ++ break; ++ } ++ ++ rtdm_lock_get_irqsave(&ctx->lock, lock_ctx); ++ } ++ ++ rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx); ++ ++break_unlocked: ++ /* Release the simple reader lock, */ ++ clear_bit(0, &ctx->in_lock); ++ ++ if ((read > 0) && ((ret == 0) || (ret == -EAGAIN) || ++ (ret == -ETIMEDOUT))) ++ ret = read; ++ ++ return ret; ++} ++ ++static ssize_t rt_imx_uart_write(struct rtdm_fd *fd, const void *buf, ++ size_t nbyte) ++{ ++ struct rt_imx_uart_ctx *ctx; ++ rtdm_lockctx_t lock_ctx; ++ size_t written = 0; ++ int free; ++ int block; ++ int subblock; ++ int out_pos; ++ char *in_pos = (char *)buf; ++ rtdm_toseq_t timeout_seq; ++ ssize_t ret; ++ ++ if (nbyte == 0) ++ return 0; ++ ++ if (rtdm_fd_is_user(fd) && !rtdm_read_user_ok(fd, buf, nbyte)) ++ return -EFAULT; ++ ++ ctx = rtdm_fd_to_private(fd); ++ ++ rtdm_toseq_init(&timeout_seq, ctx->config.rx_timeout); ++ ++ /* Make write operation atomic. */ ++ ret = rtdm_mutex_timedlock(&ctx->out_lock, ctx->config.rx_timeout, ++ &timeout_seq); ++ if (ret) ++ return ret; ++ ++ while (nbyte > 0) { ++ rtdm_lock_get_irqsave(&ctx->lock, lock_ctx); ++ ++ free = OUT_BUFFER_SIZE - ctx->out_npend; ++ ++ if (free > 0) { ++ block = subblock = (nbyte <= free) ? nbyte : free; ++ out_pos = ctx->out_tail; ++ ++ rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx); ++ ++ /* Do we have to wrap around the buffer end? */ ++ if (out_pos + subblock > OUT_BUFFER_SIZE) { ++ /* Treat the block between head and buffer ++ * end separately. ++ */ ++ subblock = OUT_BUFFER_SIZE - out_pos; ++ ++ if (rtdm_fd_is_user(fd)) { ++ if (rtdm_copy_from_user ++ (fd, ++ &ctx->out_buf[out_pos], ++ in_pos, subblock) != 0) { ++ ret = -EFAULT; ++ break; ++ } ++ } else ++ memcpy(&ctx->out_buf[out_pos], in_pos, ++ subblock); ++ ++ written += subblock; ++ in_pos += subblock; ++ ++ subblock = block - subblock; ++ out_pos = 0; ++ } ++ ++ if (rtdm_fd_is_user(fd)) { ++ if (rtdm_copy_from_user ++ (fd, &ctx->out_buf[out_pos], ++ in_pos, subblock) != 0) { ++ ret = -EFAULT; ++ break; ++ } ++ } else ++ memcpy(&ctx->out_buf[out_pos], in_pos, block); ++ ++ written += subblock; ++ in_pos += subblock; ++ nbyte -= block; ++ ++ rtdm_lock_get_irqsave(&ctx->lock, lock_ctx); ++ ++ ctx->out_tail = ++ (ctx->out_tail + block) & (OUT_BUFFER_SIZE - 1); ++ ctx->out_npend += block; ++ ++ ctx->ier_status |= IER_TX; ++ rt_imx_uart_start_tx(ctx); ++ ++ rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx); ++ continue; ++ } ++ ++ rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx); ++ ++ ret = rtdm_event_timedwait(&ctx->out_event, ++ ctx->config.tx_timeout, ++ &timeout_seq); ++ if (ret < 0) { ++ if (ret == -EIDRM) { ++ /* Device has been closed - ++ * return immediately. ++ */ ++ ret = -EBADF; ++ } ++ break; ++ } ++ } ++ ++ rtdm_mutex_unlock(&ctx->out_lock); ++ ++ if ((written > 0) && ((ret == 0) || (ret == -EAGAIN) || ++ (ret == -ETIMEDOUT))) ++ ret = written; ++ ++ return ret; ++} ++ ++static struct rtdm_driver imx_uart_driver = { ++ .profile_info = RTDM_PROFILE_INFO(imx_uart, ++ RTDM_CLASS_SERIAL, ++ RTDM_SUBCLASS_16550A, ++ RTSER_PROFILE_VER), ++ .device_count = RT_IMX_UART_MAX, ++ .device_flags = RTDM_NAMED_DEVICE | RTDM_EXCLUSIVE, ++ .context_size = sizeof(struct rt_imx_uart_ctx), ++ .ops = { ++ .open = rt_imx_uart_open, ++ .close = rt_imx_uart_close, ++ .ioctl_rt = rt_imx_uart_ioctl, ++ .ioctl_nrt = rt_imx_uart_ioctl, ++ .read_rt = rt_imx_uart_read, ++ .write_rt = rt_imx_uart_write, ++ }, ++}; ++ ++ ++#ifdef CONFIG_OF ++ ++/* ++ * This function returns 1 iff pdev isn't a device instatiated by dt, 0 iff it ++ * could successfully get all information from dt or a negative errno. ++ */ ++static int rt_imx_uart_probe_dt(struct rt_imx_uart_port *port, ++ struct platform_device *pdev) ++{ ++ struct device_node *np = pdev->dev.of_node; ++ const struct of_device_id *of_id = ++ of_match_device(rt_imx_uart_dt_ids, &pdev->dev); ++ int ret; ++ ++ if (!np) ++ /* no device tree device */ ++ return 1; ++ ++ ret = of_alias_get_id(np, "serial"); ++ if (ret < 0) { ++ dev_err(&pdev->dev, "failed to get alias id, errno %d\n", ret); ++ return ret; ++ } ++ ++ pdev->id = ret; ++ ++ if (of_get_property(np, "uart-has-rtscts", NULL) || ++ of_get_property(np, "fsl,uart-has-rtscts", NULL) /* deprecated */) ++ port->have_rtscts = 1; ++ if (of_get_property(np, "fsl,irda-mode", NULL)) ++ dev_warn(&pdev->dev, "IRDA not yet supported\n"); ++ ++ if (of_get_property(np, "fsl,dte-mode", NULL)) ++ port->use_dcedte = 1; ++ ++ port->devdata = of_id->data; ++ ++ return 0; ++} ++#else ++static inline int rt_imx_uart_probe_dt(struct rt_imx_uart_port *port, ++ struct platform_device *pdev) ++{ ++ return 1; ++} ++#endif ++ ++static void rt_imx_uart_probe_pdata(struct rt_imx_uart_port *port, ++ struct platform_device *pdev) ++{ ++ struct imxuart_platform_data *pdata = dev_get_platdata(&pdev->dev); ++ ++ port->devdata = (struct imx_uart_data *) pdev->id_entry->driver_data; ++ ++ if (!pdata) ++ return; ++ ++ if (pdata->flags & IMXUART_HAVE_RTSCTS) ++ port->have_rtscts = 1; ++} ++ ++static int rt_imx_uart_probe(struct platform_device *pdev) ++{ ++ struct rtdm_device *dev; ++ struct rt_imx_uart_port *port; ++ struct resource *res; ++ int ret; ++ ++ port = devm_kzalloc(&pdev->dev, sizeof(*port), GFP_KERNEL); ++ if (!port) ++ return -ENOMEM; ++ ++ ret = rt_imx_uart_probe_dt(port, pdev); ++ if (ret > 0) ++ rt_imx_uart_probe_pdata(port, pdev); ++ else if (ret < 0) ++ return ret; ++ ++ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); ++ if (!res) ++ return -ENODEV; ++ ++ port->irq = platform_get_irq(pdev, 0); ++ ++ if (port->irq <= 0) ++ return -ENODEV; ++ ++ port->membase = devm_ioremap_resource(&pdev->dev, res); ++ if (IS_ERR(port->membase)) ++ return PTR_ERR(port->membase); ++ ++ dev = &port->rtdm_dev; ++ dev->driver = &imx_uart_driver; ++ dev->label = "rtser%d"; ++ dev->device_data = port; ++ ++ if (!tx_fifo[pdev->id] || tx_fifo[pdev->id] > TX_FIFO_SIZE) ++ port->tx_fifo = TX_FIFO_SIZE; ++ else ++ port->tx_fifo = tx_fifo[pdev->id]; ++ ++ port->clk_ipg = devm_clk_get(&pdev->dev, "ipg"); ++ if (IS_ERR(port->clk_ipg)) ++ return PTR_ERR(port->clk_ipg); ++ ++ port->clk_per = devm_clk_get(&pdev->dev, "per"); ++ if (IS_ERR(port->clk_per)) ++ return PTR_ERR(port->clk_per); ++ ++ clk_prepare_enable(port->clk_ipg); ++ clk_prepare_enable(port->clk_per); ++ port->uartclk = clk_get_rate(port->clk_per); ++ ++ port->use_hwflow = 1; ++ ++ ret = rtdm_dev_register(dev); ++ if (ret) ++ return ret; ++ ++ platform_set_drvdata(pdev, port); ++ ++ pr_info("%s on IMX UART%d: membase=0x%p irq=%d uartclk=%d\n", ++ dev->name, pdev->id, port->membase, port->irq, port->uartclk); ++ return 0; ++} ++ ++static int rt_imx_uart_remove(struct platform_device *pdev) ++{ ++ struct imxuart_platform_data *pdata; ++ struct rt_imx_uart_port *port = platform_get_drvdata(pdev); ++ struct rtdm_device *dev = &port->rtdm_dev; ++ ++ pdata = pdev->dev.platform_data; ++ platform_set_drvdata(pdev, NULL); ++ ++ clk_disable_unprepare(port->clk_ipg); ++ clk_disable_unprepare(port->clk_per); ++ rtdm_dev_unregister(dev); ++ ++ return 0; ++} ++ ++static struct platform_driver rt_imx_uart_driver = { ++ .probe = rt_imx_uart_probe, ++ .remove = rt_imx_uart_remove, ++ .id_table = rt_imx_uart_id_table, ++ .driver = { ++ .name = DRIVER_NAME, ++ .owner = THIS_MODULE, ++ .of_match_table = rt_imx_uart_dt_ids, ++ }, ++ .prevent_deferred_probe = true, ++}; ++ ++ ++static int __init rt_imx_uart_init(void) ++{ ++ int ret; ++ ++ if (!rtdm_available()) ++ return -ENODEV; ++ ++ ret = platform_driver_register(&rt_imx_uart_driver); ++ if (ret) { ++ pr_err("%s; Could not register driver (err=%d)\n", ++ __func__, ret); ++ } ++ ++ return ret; ++} ++ ++static void __exit rt_imx_uart_exit(void) ++{ ++ platform_driver_unregister(&rt_imx_uart_driver); ++} ++ ++module_init(rt_imx_uart_init); ++module_exit(rt_imx_uart_exit); +--- linux/drivers/xenomai/can/rtcan_raw.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/can/rtcan_raw.h 2021-04-07 16:01:26.481635232 +0800 +@@ -0,0 +1,59 @@ ++/* ++ * Copyright (C) 2006 Wolfgang Grandegger ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#ifndef __RTCAN_RAW_H_ ++#define __RTCAN_RAW_H_ ++ ++#ifdef __KERNEL__ ++ ++int rtcan_raw_ioctl_dev(struct rtdm_fd *fd, int request, void *arg); ++ ++int rtcan_raw_check_filter(struct rtcan_socket *sock, ++ int ifindex, struct rtcan_filter_list *flist); ++int rtcan_raw_add_filter(struct rtcan_socket *sock, int ifindex); ++void rtcan_raw_remove_filter(struct rtcan_socket *sock); ++ ++void rtcan_rcv(struct rtcan_device *rtcandev, struct rtcan_skb *skb); ++ ++void rtcan_loopback(struct rtcan_device *rtcandev); ++#ifdef CONFIG_XENO_DRIVERS_CAN_LOOPBACK ++#define rtcan_loopback_enabled(sock) (sock->loopback) ++#define rtcan_loopback_pending(dev) (dev->tx_socket) ++#else /* !CONFIG_XENO_DRIVERS_CAN_LOOPBACK */ ++#define rtcan_loopback_enabled(sock) (0) ++#define rtcan_loopback_pending(dev) (0) ++#endif /* CONFIG_XENO_DRIVERS_CAN_LOOPBACK */ ++ ++#ifdef CONFIG_XENO_DRIVERS_CAN_BUS_ERR ++void __rtcan_raw_enable_bus_err(struct rtcan_socket *sock); ++static inline void rtcan_raw_enable_bus_err(struct rtcan_socket *sock) ++{ ++ if ((sock->err_mask & CAN_ERR_BUSERROR)) ++ __rtcan_raw_enable_bus_err(sock); ++} ++#else ++#define rtcan_raw_enable_bus_err(sock) ++#endif ++ ++int __init rtcan_raw_proto_register(void); ++void __exit rtcan_raw_proto_unregister(void); ++ ++#endif /* __KERNEL__ */ ++ ++#endif /* __RTCAN_RAW_H_ */ +--- linux/drivers/xenomai/can/rtcan_socket.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/can/rtcan_socket.h 2021-04-07 16:01:26.475635241 +0800 +@@ -0,0 +1,207 @@ ++/* ++ * Copyright (C) 2005,2006 Sebastian Smolorz ++ * ++ * ++ * Copyright (C) 2006 Wolfgang Grandegger ++ * ++ * ++ * Derived from RTnet project file include/stack/socket.h: ++ * ++ * Copyright (C) 1999 Lineo, Inc ++ * 1999, 2002 David A. Schleef ++ * 2002 Ulrich Marx ++ * 2003-2005 Jan Kiszka ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#ifndef __RTCAN_SOCKET_H_ ++#define __RTCAN_SOCKET_H_ ++ ++#include ++ ++#include ++ ++ ++ ++/* This MUST BE 2^N */ ++#define RTCAN_RXBUF_SIZE CONFIG_XENO_DRIVERS_CAN_RXBUF_SIZE ++ ++/* Size of timestamp */ ++#define RTCAN_TIMESTAMP_SIZE sizeof(nanosecs_abs_t) ++ ++/* Bit in the can_dlc member of struct ring_buffer_frame used to indicate ++ * whether a frame has got a timestamp or not */ ++#define RTCAN_HAS_TIMESTAMP 0x80 ++ ++/* Mask for clearing bit RTCAN_HAS_TIMESTAMP */ ++#define RTCAN_HAS_NO_TIMESTAMP 0x7F ++ ++#define RTCAN_SOCK_UNBOUND -1 ++#define RTCAN_FLIST_NO_FILTER (struct rtcan_filter_list *)-1 ++#define rtcan_flist_no_filter(f) ((f) == RTCAN_FLIST_NO_FILTER) ++#define rtcan_sock_has_filter(s) ((s)->flistlen > 0) ++#define rtcan_sock_is_bound(s) ((s)->flistlen >= 0) ++ ++/* ++ * Internal frame representation within the ring buffer of a ++ * struct rtcan_socket. ++ * ++ * The data array is of arbitrary size when the frame is actually ++ * stored in a socket's ring buffer. The timestamp member exists if the ++ * socket was set to take timestamps (then it follows direcly after the ++ * arbitrary-sized data array), otherwise it does not exist. ++ */ ++struct rtcan_rb_frame { ++ ++ /* CAN ID representation equal to struct can_frame */ ++ uint32_t can_id; ++ ++ /* Interface index from which the frame originates */ ++ unsigned char can_ifindex; ++ ++ /* DLC (between 0 and 15) and mark if frame has got a timestamp. The ++ * existence of a timestamp is indicated by the RTCAN_HAS_TIMESTAMP ++ * bit. */ ++ unsigned char can_dlc; ++ ++ /* Data bytes */ ++ uint8_t data[8]; ++ ++ /* High precision timestamp indicating when the frame was received. ++ * Exists when RTCAN_HAS_TIMESTAMP bit in can_dlc is set. */ ++ nanosecs_abs_t timestamp; ++ ++} __attribute__ ((packed)); ++ ++ ++/* Size of struct rtcan_rb_frame without any data bytes and timestamp */ ++#define EMPTY_RB_FRAME_SIZE \ ++ sizeof(struct rtcan_rb_frame) - 8 - RTCAN_TIMESTAMP_SIZE ++ ++ ++/* ++ * Wrapper structure around a struct rtcan_rb_frame with actual size ++ * of the frame. ++ * ++ * This isn't really a socket buffer but only a sort of. It is constructed ++ * within the interrupt routine when a CAN frame is read from ++ * the controller. Then it's passed to the reception handler where only ++ * rb_frame finds its way to the sockets' ring buffers. ++ */ ++struct rtcan_skb { ++ /* Actual size of following rb_frame (without timestamp) */ ++ size_t rb_frame_size; ++ /* Frame to be stored in the sockets' ring buffers (as is) */ ++ struct rtcan_rb_frame rb_frame; ++}; ++ ++struct rtcan_filter_list { ++ int flistlen; ++ struct can_filter flist[1]; ++}; ++ ++/* ++ * Internal CAN socket structure. ++ * ++ * Every socket has an internal ring buffer for incoming messages. A message ++ * is not stored as a struct can_frame (in order to save buffer space) ++ * but as struct rtcan_rb_frame of arbitrary length depending on the ++ * actual payload. ++ */ ++struct rtcan_socket { ++ ++ struct list_head socket_list; ++ ++ unsigned long flags; ++ ++ /* Transmission timeout in ns. Protected by rtcan_socket_lock ++ * in all socket structures. */ ++ nanosecs_rel_t tx_timeout; ++ ++ /* Reception timeout in ns. Protected by rtcan_socket_lock ++ * in all socket structures. */ ++ nanosecs_rel_t rx_timeout; ++ ++ ++ /* Begin of first frame data in the ring buffer. Protected by ++ * rtcan_socket_lock in all socket structures. */ ++ int recv_head; ++ ++ /* End of last frame data in the ring buffer. I.e. position of first ++ * free byte in the ring buffer. Protected by ++ * rtcan_socket_lock in all socket structures. */ ++ int recv_tail; ++ ++ /* Ring buffer for incoming CAN frames. Protected by ++ * rtcan_socket_lock in all socket structures. */ ++ unsigned char recv_buf[RTCAN_RXBUF_SIZE]; ++ ++ /* Semaphore for receivers and incoming messages */ ++ rtdm_sem_t recv_sem; ++ ++ ++ /* All senders waiting to be able to send ++ * via this socket are queued here */ ++ struct list_head tx_wait_head; ++ ++ ++ /* Interface index the socket is bound to. Protected by ++ * rtcan_recv_list_lock in all socket structures. */ ++ atomic_t ifindex; ++ ++ /* Length of filter list. I.e. how many entries does this socket occupy in ++ * the reception list. 0 if unbound. Protected by ++ * rtcan_recv_list_lock in all socket structures. */ ++ int flistlen; ++ ++ uint32_t err_mask; ++ ++ uint32_t rx_buf_full; ++ ++ struct rtcan_filter_list *flist; ++ ++#ifdef CONFIG_XENO_DRIVERS_CAN_LOOPBACK ++ int loopback; ++#endif ++}; ++ ++ ++ ++/* ++ * Get the RTDM context from a struct rtcan_socket ++ * ++ * @param[in] sock Pointer to socket structure ++ * ++ * @return Pointer to a file descriptor of type struct rtdm_fd this socket ++ * belongs to ++ */ ++/* FIXME: to be replaced with container_of */ ++static inline struct rtdm_fd *rtcan_socket_to_fd(struct rtcan_socket *sock) ++{ ++ return rtdm_private_to_fd(sock); ++} ++ ++/* Spinlock protecting the ring buffers and the timeouts of all ++ * rtcan_sockets */ ++extern rtdm_lock_t rtcan_socket_lock; ++extern struct list_head rtcan_socket_list; ++ ++extern void rtcan_socket_init(struct rtdm_fd *fd); ++extern void rtcan_socket_cleanup(struct rtdm_fd *fd); ++ ++ ++#endif /* __RTCAN_SOCKET_H_ */ +--- linux/drivers/xenomai/can/rtcan_dev.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/can/rtcan_dev.h 2021-04-07 16:01:26.470635248 +0800 +@@ -0,0 +1,205 @@ ++/* ++ * Copyright (C) 2006 Wolfgang Grandegger ++ * ++ * Derived from RTnet project file stack/include/rtdev.h: ++ * ++ * Copyright (C) 1999 Lineo, Inc ++ * 1999, 2002 David A. Schleef ++ * 2003-2005 Jan Kiszka ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ */ ++ ++#ifndef __RTCAN_DEV_H_ ++#define __RTCAN_DEV_H_ ++ ++ ++#ifdef __KERNEL__ ++ ++#include ++#include ++#include ++ ++#include "rtcan_list.h" ++ ++ ++/* Number of MSCAN devices the driver can handle */ ++#define RTCAN_MAX_DEVICES CONFIG_XENO_DRIVERS_CAN_MAX_DEVICES ++ ++/* Maximum number of single filters per controller which can be registered ++ * for reception at the same time using Bind */ ++#define RTCAN_MAX_RECEIVERS CONFIG_XENO_DRIVERS_CAN_MAX_RECEIVERS ++ ++/* Suppress handling of refcount if module support is not enabled ++ * or modules cannot be unloaded */ ++ ++#if defined(CONFIG_MODULES) && defined(CONFIG_MODULE_UNLOAD) ++#define RTCAN_USE_REFCOUNT ++#endif ++ ++/* ++ * CAN harware-dependent bit-timing constant ++ * ++ * Used for calculating and checking bit-timing parameters ++ */ ++struct can_bittiming_const { ++ char name[16]; /* Name of the CAN controller hardware */ ++ __u32 tseg1_min; /* Time segement 1 = prop_seg + phase_seg1 */ ++ __u32 tseg1_max; ++ __u32 tseg2_min; /* Time segement 2 = phase_seg2 */ ++ __u32 tseg2_max; ++ __u32 sjw_max; /* Synchronisation jump width */ ++ __u32 brp_min; /* Bit-rate prescaler */ ++ __u32 brp_max; ++ __u32 brp_inc; ++}; ++ ++struct rtcan_device { ++ unsigned int version; ++ ++ char name[IFNAMSIZ]; ++ ++ char *ctrl_name; /* Name of CAN controller */ ++ char *board_name;/* Name of CAN board */ ++ ++ unsigned long base_addr; /* device I/O address */ ++ rtdm_irq_t irq_handle; /* RTDM IRQ handle */ ++ ++ int ifindex; ++#ifdef RTCAN_USE_REFCOUNT ++ atomic_t refcount; ++#endif ++ ++ void *priv; /* pointer to chip private data */ ++ ++ void *board_priv;/* pointer to board private data*/ ++ ++ struct semaphore nrt_lock; /* non-real-time locking */ ++ ++ /* Spinlock for all devices (but not for all attributes) and also for HW ++ * access to all CAN controllers ++ */ ++ rtdm_lock_t device_lock; ++ ++ /* Acts as a mutex allowing only one sender to write to the MSCAN ++ * simultaneously. Created when the controller goes into operating mode, ++ * destroyed if it goes into reset mode. */ ++ rtdm_sem_t tx_sem; ++ ++ /* Baudrate of this device. Protected by device_lock in all device ++ * structures. */ ++ unsigned int can_sys_clock; ++ ++ ++ /* Baudrate of this device. Protected by device_lock in all device ++ * structures. */ ++ can_baudrate_t baudrate; ++ ++ struct can_bittime bit_time; ++ const struct can_bittiming_const *bittiming_const; ++ ++ /* State which the controller is in. Protected by device_lock in all ++ * device structures. */ ++ can_state_t state; ++ ++ /* State which the controller was before sleeping. Protected by ++ * device_lock in all device structures. */ ++ can_state_t state_before_sleep; ++ ++ /* Controller specific settings. Protected by device_lock in all ++ * device structures. */ ++ can_ctrlmode_t ctrl_mode; ++ ++ /* Device operations */ ++ int (*hard_start_xmit)(struct rtcan_device *dev, ++ struct can_frame *frame); ++ int (*do_set_mode)(struct rtcan_device *dev, ++ can_mode_t mode, ++ rtdm_lockctx_t *lock_ctx); ++ can_state_t (*do_get_state)(struct rtcan_device *dev); ++ int (*do_set_bit_time)(struct rtcan_device *dev, ++ struct can_bittime *bit_time, ++ rtdm_lockctx_t *lock_ctx); ++#ifdef CONFIG_XENO_DRIVERS_CAN_BUS_ERR ++ void (*do_enable_bus_err)(struct rtcan_device *dev); ++#endif ++ ++ /* Reception list head. This list contains all filters which have been ++ * registered via a bind call. */ ++ struct rtcan_recv *recv_list; ++ ++ /* Empty list head. This list contains all empty entries not needed ++ * by the reception list and therefore is disjunctive with it. */ ++ struct rtcan_recv *empty_list; ++ ++ /* Preallocated array for the list entries. To increase cache ++ * locality all list elements are kept in this array. */ ++ struct rtcan_recv receivers[RTCAN_MAX_RECEIVERS]; ++ ++ /* Indicates the length of the empty list */ ++ int free_entries; ++ ++ /* A few statistics counters */ ++ unsigned int tx_count; ++ unsigned int rx_count; ++ unsigned int err_count; ++ ++#ifdef CONFIG_PROC_FS ++ struct proc_dir_entry *proc_root; ++#endif ++#ifdef CONFIG_XENO_DRIVERS_CAN_LOOPBACK ++ struct rtcan_skb tx_skb; ++ struct rtcan_socket *tx_socket; ++#endif /* CONFIG_XENO_DRIVERS_CAN_LOOPBACK */ ++}; ++ ++ ++extern struct semaphore rtcan_devices_nrt_lock; ++ ++ ++void rtcan_dev_free(struct rtcan_device *dev); ++ ++int rtcan_dev_register(struct rtcan_device *dev); ++int rtcan_dev_unregister(struct rtcan_device *dev); ++ ++struct rtcan_device *rtcan_dev_alloc(int sizeof_priv, int sizeof_board_priv); ++void rtcan_dev_alloc_name (struct rtcan_device *dev, const char *name_mask); ++ ++struct rtcan_device *rtcan_dev_get_by_name(const char *if_name); ++struct rtcan_device *rtcan_dev_get_by_index(int ifindex); ++ ++#ifdef RTCAN_USE_REFCOUNT ++#define rtcan_dev_reference(dev) atomic_inc(&(dev)->refcount) ++#define rtcan_dev_dereference(dev) atomic_dec(&(dev)->refcount) ++#else ++#define rtcan_dev_reference(dev) do {} while(0) ++#define rtcan_dev_dereference(dev) do {} while(0) ++#endif ++ ++#ifdef CONFIG_PROC_FS ++int rtcan_dev_create_proc(struct rtcan_device* dev); ++void rtcan_dev_remove_proc(struct rtcan_device* dev); ++#else /* !CONFIG_PROC_FS */ ++static inline int rtcan_dev_create_proc(struct rtcan_device* dev) ++{ ++ return 0; ++} ++static inline void rtcan_dev_remove_proc(struct rtcan_device* dev) { } ++#endif /* !CONFIG_PROC_FS */ ++ ++#endif /* __KERNEL__ */ ++ ++#endif /* __RTCAN_DEV_H_ */ +--- linux/drivers/xenomai/can/rtcan_raw.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/can/rtcan_raw.c 2021-04-07 16:01:26.465635255 +0800 +@@ -0,0 +1,1003 @@ ++/* ++ * Copyright (C) 2005, 2006 Sebastian Smolorz ++ * ++ * ++ * Copyright (C) 2006 Wolfgang Grandegger ++ * ++ * ++ * Parts of this software are based on the following: ++ * ++ * - RTAI CAN device driver for SJA1000 controllers by Jan Kiszka ++ * ++ * - linux-can.patch, a CAN socket framework for Linux, ++ * Copyright (C) 2004, 2005, Robert Schwebel, Benedikt Spranger, ++ * Marc Kleine-Budde, Sascha Hauer, Pengutronix ++ * ++ * - RTnet (www.rtnet.org) ++ * ++ * - serial device driver and profile included in Xenomai (RTDM), ++ * Copyright (C) 2005 Jan Kiszka . ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#include ++#include ++#include ++ ++#include ++ ++#include ++#include "rtcan_version.h" ++#include "rtcan_socket.h" ++#include "rtcan_list.h" ++#include "rtcan_dev.h" ++#include "rtcan_raw.h" ++#include "rtcan_internal.h" ++ ++ ++/* ++ * Set if socket wants to receive a high precision timestamp together with ++ * CAN frames ++ */ ++#define RTCAN_GET_TIMESTAMP 0 ++ ++ ++MODULE_AUTHOR("RT-Socket-CAN Development Team"); ++MODULE_DESCRIPTION("RTDM CAN raw socket device driver"); ++MODULE_VERSION(__stringify(RTCAN_MAJOR_VER) ++ __stringify(RTCAN_MINOR_VER) ++ __stringify(RTCAN_BUGFIX_VER)); ++MODULE_LICENSE("GPL"); ++ ++void rtcan_tx_push(struct rtcan_device *dev, struct rtcan_socket *sock, ++ can_frame_t *frame); ++ ++static inline int rtcan_accept_msg(uint32_t can_id, can_filter_t *filter) ++{ ++ if ((filter->can_mask & CAN_INV_FILTER)) ++ return ((can_id & filter->can_mask) != filter->can_id); ++ else ++ return ((can_id & filter->can_mask) == filter->can_id); ++} ++ ++ ++static void rtcan_rcv_deliver(struct rtcan_recv *recv_listener, ++ struct rtcan_skb *skb) ++{ ++ int size_free; ++ size_t cpy_size, first_part_size; ++ struct rtcan_rb_frame *frame = &skb->rb_frame; ++ struct rtdm_fd *fd = rtdm_private_to_fd(recv_listener->sock); ++ struct rtcan_socket *sock; ++ ++ if (rtdm_fd_lock(fd) < 0) ++ return; ++ ++ sock = recv_listener->sock; ++ ++ cpy_size = skb->rb_frame_size; ++ /* Check if socket wants to receive a timestamp */ ++ if (test_bit(RTCAN_GET_TIMESTAMP, &sock->flags)) { ++ cpy_size += RTCAN_TIMESTAMP_SIZE; ++ frame->can_dlc |= RTCAN_HAS_TIMESTAMP; ++ } else ++ frame->can_dlc &= RTCAN_HAS_NO_TIMESTAMP; ++ ++ /* Calculate free size in the ring buffer */ ++ size_free = sock->recv_head - sock->recv_tail; ++ if (size_free <= 0) ++ size_free += RTCAN_RXBUF_SIZE; ++ ++ /* Test if ring buffer has enough space. */ ++ if (size_free > cpy_size) { ++ /* Check if we must wrap around the end of buffer */ ++ if ((sock->recv_tail + cpy_size) > RTCAN_RXBUF_SIZE) { ++ /* Wrap around: Two memcpy operations */ ++ ++ first_part_size = RTCAN_RXBUF_SIZE - sock->recv_tail; ++ ++ memcpy(&sock->recv_buf[sock->recv_tail], (void *)frame, ++ first_part_size); ++ memcpy(&sock->recv_buf[0], (void *)frame + ++ first_part_size, cpy_size - first_part_size); ++ } else ++ memcpy(&sock->recv_buf[sock->recv_tail], (void *)frame, ++ cpy_size); ++ ++ /* Adjust tail */ ++ sock->recv_tail = (sock->recv_tail + cpy_size) & ++ (RTCAN_RXBUF_SIZE - 1); ++ ++ /*Notify the delivery of the message */ ++ rtdm_sem_up(&sock->recv_sem); ++ ++ } else { ++ /* Overflow of socket's ring buffer! */ ++ sock->rx_buf_full++; ++ RTCAN_RTDM_DBG("rtcan: socket buffer overflow, message discarded\n"); ++ } ++ ++ rtdm_fd_unlock(fd); ++} ++ ++ ++void rtcan_rcv(struct rtcan_device *dev, struct rtcan_skb *skb) ++{ ++ nanosecs_abs_t timestamp = rtdm_clock_read(); ++ /* Entry in reception list, begin with head */ ++ struct rtcan_recv *recv_listener = dev->recv_list; ++ struct rtcan_rb_frame *frame = &skb->rb_frame; ++ ++ /* Copy timestamp to skb */ ++ memcpy((void *)&skb->rb_frame + skb->rb_frame_size, ++ ×tamp, RTCAN_TIMESTAMP_SIZE); ++ ++ if ((frame->can_id & CAN_ERR_FLAG)) { ++ dev->err_count++; ++ while (recv_listener != NULL) { ++ if ((frame->can_id & recv_listener->sock->err_mask)) { ++ recv_listener->match_count++; ++ rtcan_rcv_deliver(recv_listener, skb); ++ } ++ recv_listener = recv_listener->next; ++ } ++ } else { ++ dev->rx_count++; ++ while (recv_listener != NULL) { ++ if (rtcan_accept_msg(frame->can_id, &recv_listener->can_filter)) { ++ recv_listener->match_count++; ++ rtcan_rcv_deliver(recv_listener, skb); ++ } ++ recv_listener = recv_listener->next; ++ } ++ } ++} ++ ++#ifdef CONFIG_XENO_DRIVERS_CAN_LOOPBACK ++ ++void rtcan_tx_push(struct rtcan_device *dev, struct rtcan_socket *sock, ++ can_frame_t *frame) ++{ ++ struct rtcan_rb_frame *rb_frame = &dev->tx_skb.rb_frame; ++ ++ RTCAN_ASSERT(dev->tx_socket == 0, ++ rtdm_printk("(%d) TX skb still in use", dev->ifindex);); ++ ++ rb_frame->can_id = frame->can_id; ++ rb_frame->can_dlc = frame->can_dlc; ++ dev->tx_skb.rb_frame_size = EMPTY_RB_FRAME_SIZE; ++ if (frame->can_dlc && !(frame->can_id & CAN_RTR_FLAG)) { ++ memcpy(rb_frame->data, frame->data, frame->can_dlc); ++ dev->tx_skb.rb_frame_size += frame->can_dlc; ++ } ++ rb_frame->can_ifindex = dev->ifindex; ++ dev->tx_socket = sock; ++} ++ ++void rtcan_loopback(struct rtcan_device *dev) ++{ ++ nanosecs_abs_t timestamp = rtdm_clock_read(); ++ /* Entry in reception list, begin with head */ ++ struct rtcan_recv *recv_listener = dev->recv_list; ++ struct rtcan_rb_frame *frame = &dev->tx_skb.rb_frame; ++ ++ memcpy((void *)&dev->tx_skb.rb_frame + dev->tx_skb.rb_frame_size, ++ ×tamp, RTCAN_TIMESTAMP_SIZE); ++ ++ while (recv_listener != NULL) { ++ dev->rx_count++; ++ if ((dev->tx_socket != recv_listener->sock) && ++ rtcan_accept_msg(frame->can_id, &recv_listener->can_filter)) { ++ recv_listener->match_count++; ++ rtcan_rcv_deliver(recv_listener, &dev->tx_skb); ++ } ++ recv_listener = recv_listener->next; ++ } ++ dev->tx_socket = NULL; ++} ++ ++EXPORT_SYMBOL_GPL(rtcan_loopback); ++ ++#endif /* CONFIG_XENO_DRIVERS_CAN_LOOPBACK */ ++ ++ ++int rtcan_raw_socket(struct rtdm_fd *fd, int protocol) ++{ ++ /* Only protocol CAN_RAW is supported */ ++ if (protocol != CAN_RAW && protocol != 0) ++ return -EPROTONOSUPPORT; ++ ++ rtcan_socket_init(fd); ++ ++ return 0; ++} ++ ++ ++static inline void rtcan_raw_unbind(struct rtcan_socket *sock) ++{ ++ rtcan_raw_remove_filter(sock); ++ if (!rtcan_flist_no_filter(sock->flist) && sock->flist) ++ rtdm_free(sock->flist); ++ sock->flist = NULL; ++ sock->flistlen = RTCAN_SOCK_UNBOUND; ++ atomic_set(&sock->ifindex, 0); ++} ++ ++ ++static void rtcan_raw_close(struct rtdm_fd *fd) ++{ ++ struct rtcan_socket *sock = rtdm_fd_to_private(fd); ++ rtdm_lockctx_t lock_ctx; ++ ++ /* Get lock for reception lists */ ++ rtdm_lock_get_irqsave(&rtcan_recv_list_lock, lock_ctx); ++ ++ /* Check if socket is bound */ ++ if (rtcan_sock_is_bound(sock)) ++ rtcan_raw_unbind(sock); ++ ++ rtdm_lock_put_irqrestore(&rtcan_recv_list_lock, lock_ctx); ++ ++ ++ rtcan_socket_cleanup(fd); ++} ++ ++ ++int rtcan_raw_bind(struct rtdm_fd *fd, ++ struct sockaddr_can *scan) ++{ ++ struct rtcan_socket *sock = rtdm_fd_to_private(fd); ++ rtdm_lockctx_t lock_ctx; ++ int ret = 0; ++ ++ /* Check address family and ++ check if given length of filter list is plausible */ ++ if (scan->can_family != AF_CAN) ++ return -EINVAL; ++ /* Check range of ifindex, must be between 0 and RTCAN_MAX_DEVICES */ ++ if (scan->can_ifindex < 0 || scan->can_ifindex > RTCAN_MAX_DEVICES) ++ return -ENODEV; ++ ++ /* Get lock for reception lists */ ++ rtdm_lock_get_irqsave(&rtcan_recv_list_lock, lock_ctx); ++ ++ if ((ret = rtcan_raw_check_filter(sock, scan->can_ifindex, ++ sock->flist))) ++ goto out; ++ rtcan_raw_remove_filter(sock); ++ /* Add filter and mark socket as bound */ ++ sock->flistlen = rtcan_raw_add_filter(sock, scan->can_ifindex); ++ ++ /* Set new interface index the socket is now bound to */ ++ atomic_set(&sock->ifindex, scan->can_ifindex); ++ ++ out: ++ rtdm_lock_put_irqrestore(&rtcan_recv_list_lock, lock_ctx); ++ ++ return ret; ++} ++ ++ ++static int rtcan_raw_setsockopt(struct rtdm_fd *fd, ++ struct _rtdm_setsockopt_args *so) ++{ ++ struct rtcan_socket *sock = rtdm_fd_to_private(fd); ++ struct rtcan_filter_list *flist; ++ int ifindex = atomic_read(&sock->ifindex); ++ rtdm_lockctx_t lock_ctx; ++ can_err_mask_t err_mask; ++ int val, ret = 0; ++ ++ if (so->level != SOL_CAN_RAW) ++ return -ENOPROTOOPT; ++ ++ switch (so->optname) { ++ ++ case CAN_RAW_FILTER: ++ if (so->optlen == 0) { ++ flist = RTCAN_FLIST_NO_FILTER; ++ } else { ++ int flistlen; ++ flistlen = so->optlen / sizeof(struct can_filter); ++ if (flistlen < 1 || flistlen > RTCAN_MAX_RECEIVERS || ++ so->optlen % sizeof(struct can_filter) != 0) ++ return -EINVAL; ++ ++ flist = (struct rtcan_filter_list *)rtdm_malloc(so->optlen + sizeof(int)); ++ if (flist == NULL) ++ return -ENOMEM; ++ if (rtdm_fd_is_user(fd)) { ++ if (!rtdm_read_user_ok(fd, so->optval, so->optlen) || ++ rtdm_copy_from_user(fd, flist->flist, ++ so->optval, so->optlen)) { ++ rtdm_free(flist); ++ return -EFAULT; ++ } ++ } else ++ memcpy(flist->flist, so->optval, so->optlen); ++ flist->flistlen = flistlen; ++ } ++ ++ /* Get lock for reception lists */ ++ rtdm_lock_get_irqsave(&rtcan_recv_list_lock, lock_ctx); ++ ++ /* Check if there is space for the filter list if already bound */ ++ if (rtcan_sock_is_bound(sock)) { ++ if (!rtcan_flist_no_filter(flist) && ++ (ret = rtcan_raw_check_filter(sock, ifindex, flist))) { ++ rtdm_free(flist); ++ goto out_filter; ++ } ++ rtcan_raw_remove_filter(sock); ++ } ++ ++ /* Remove previous list and attach the new one */ ++ if (!rtcan_flist_no_filter(flist) && sock->flist) ++ rtdm_free(sock->flist); ++ sock->flist = flist; ++ ++ if (rtcan_sock_is_bound(sock)) ++ sock->flistlen = rtcan_raw_add_filter(sock, ifindex); ++ ++ out_filter: ++ /* Release lock for reception lists */ ++ rtdm_lock_put_irqrestore(&rtcan_recv_list_lock, lock_ctx); ++ break; ++ ++ case CAN_RAW_ERR_FILTER: ++ ++ if (so->optlen != sizeof(can_err_mask_t)) ++ return -EINVAL; ++ ++ if (rtdm_fd_is_user(fd)) { ++ if (!rtdm_read_user_ok(fd, so->optval, so->optlen) || ++ rtdm_copy_from_user(fd, &err_mask, so->optval, so->optlen)) ++ return -EFAULT; ++ } else ++ memcpy(&err_mask, so->optval, so->optlen); ++ ++ /* Get lock for reception lists */ ++ rtdm_lock_get_irqsave(&rtcan_recv_list_lock, lock_ctx); ++ sock->err_mask = err_mask; ++ rtdm_lock_put_irqrestore(&rtcan_recv_list_lock, lock_ctx); ++ ++ break; ++ ++ case CAN_RAW_LOOPBACK: ++ ++ if (so->optlen != sizeof(int)) ++ return -EINVAL; ++ ++ if (rtdm_fd_is_user(fd)) { ++ if (!rtdm_read_user_ok(fd, so->optval, so->optlen) || ++ rtdm_copy_from_user(fd, &val, so->optval, so->optlen)) ++ return -EFAULT; ++ } else ++ memcpy(&val, so->optval, so->optlen); ++ ++#ifdef CONFIG_XENO_DRIVERS_CAN_LOOPBACK ++ sock->loopback = val; ++#else ++ if (val) ++ return -EOPNOTSUPP; ++#endif ++ break; ++ ++ default: ++ ret = -ENOPROTOOPT; ++ } ++ ++ return ret; ++} ++ ++ ++int rtcan_raw_ioctl(struct rtdm_fd *fd, ++ unsigned int request, void *arg) ++{ ++ int ret = 0; ++ ++ switch (request) { ++ case _RTIOC_BIND: { ++ struct _rtdm_setsockaddr_args *setaddr, setaddr_buf; ++ struct sockaddr_can *sockaddr, sockaddr_buf; ++ ++ if (rtdm_fd_is_user(fd)) { ++ /* Copy argument structure from userspace */ ++ if (!rtdm_read_user_ok(fd, arg, ++ sizeof(struct _rtdm_setsockaddr_args)) || ++ rtdm_copy_from_user(fd, &setaddr_buf, arg, ++ sizeof(struct _rtdm_setsockaddr_args))) ++ return -EFAULT; ++ ++ setaddr = &setaddr_buf; ++ ++ /* Check size */ ++ if (setaddr->addrlen != sizeof(struct sockaddr_can)) ++ return -EINVAL; ++ ++ /* Copy argument structure from userspace */ ++ if (!rtdm_read_user_ok(fd, arg, ++ sizeof(struct sockaddr_can)) || ++ rtdm_copy_from_user(fd, &sockaddr_buf, setaddr->addr, ++ sizeof(struct sockaddr_can))) ++ return -EFAULT; ++ sockaddr = &sockaddr_buf; ++ } else { ++ setaddr = (struct _rtdm_setsockaddr_args *)arg; ++ sockaddr = (struct sockaddr_can *)setaddr->addr; ++ } ++ ++ /* Now, all required data are in kernel space */ ++ ret = rtcan_raw_bind(fd, sockaddr); ++ ++ break; ++ } ++ ++ case _RTIOC_SETSOCKOPT: { ++ struct _rtdm_setsockopt_args *setopt; ++ struct _rtdm_setsockopt_args setopt_buf; ++ ++ if (rtdm_fd_is_user(fd)) { ++ if (!rtdm_read_user_ok(fd, arg, ++ sizeof(struct _rtdm_setsockopt_args)) || ++ rtdm_copy_from_user(fd, &setopt_buf, arg, ++ sizeof(struct _rtdm_setsockopt_args))) ++ return -EFAULT; ++ ++ setopt = &setopt_buf; ++ } else ++ setopt = (struct _rtdm_setsockopt_args *)arg; ++ ++ return rtcan_raw_setsockopt(fd, setopt); ++ } ++ ++ case RTCAN_RTIOC_TAKE_TIMESTAMP: { ++ struct rtcan_socket *sock = rtdm_fd_to_private(fd); ++ long timestamp_switch = (long)arg; ++ ++ if (timestamp_switch == RTCAN_TAKE_TIMESTAMPS) ++ set_bit(RTCAN_GET_TIMESTAMP, &sock->flags); ++ else ++ clear_bit(RTCAN_GET_TIMESTAMP, &sock->flags); ++ break; ++ } ++ ++ case RTCAN_RTIOC_RCV_TIMEOUT: ++ case RTCAN_RTIOC_SND_TIMEOUT: { ++ /* Do some work these requests have in common. */ ++ struct rtcan_socket *sock = rtdm_fd_to_private(fd); ++ ++ nanosecs_rel_t *timeout = (nanosecs_rel_t *)arg; ++ nanosecs_rel_t timeo_buf; ++ ++ if (rtdm_fd_is_user(fd)) { ++ /* Copy 64 bit timeout value from userspace */ ++ if (!rtdm_read_user_ok(fd, arg, ++ sizeof(nanosecs_rel_t)) || ++ rtdm_copy_from_user(fd, &timeo_buf, ++ arg, sizeof(nanosecs_rel_t))) ++ return -EFAULT; ++ ++ timeout = &timeo_buf; ++ } ++ ++ /* Now the differences begin between the requests. */ ++ if (request == RTCAN_RTIOC_RCV_TIMEOUT) ++ sock->rx_timeout = *timeout; ++ else ++ sock->tx_timeout = *timeout; ++ ++ break; ++ } ++ ++ default: ++ ret = rtcan_raw_ioctl_dev(fd, request, arg); ++ break; ++ } ++ ++ return ret; ++} ++ ++ ++#define MEMCPY_FROM_RING_BUF(to, len) \ ++do { \ ++ if (unlikely((recv_buf_index + len) > RTCAN_RXBUF_SIZE)) { \ ++ /* Wrap around end of buffer */ \ ++ first_part_size = RTCAN_RXBUF_SIZE - recv_buf_index; \ ++ memcpy(to, &recv_buf[recv_buf_index], first_part_size); \ ++ memcpy((void *)to + first_part_size, recv_buf, \ ++ len - first_part_size); \ ++ } else \ ++ memcpy(to, &recv_buf[recv_buf_index], len); \ ++ recv_buf_index = (recv_buf_index + len) & (RTCAN_RXBUF_SIZE - 1); \ ++} while (0) ++ ++ssize_t rtcan_raw_recvmsg(struct rtdm_fd *fd, ++ struct user_msghdr *msg, int flags) ++{ ++ struct rtcan_socket *sock = rtdm_fd_to_private(fd); ++ struct sockaddr_can scan; ++ nanosecs_rel_t timeout; ++ struct iovec *iov = (struct iovec *)msg->msg_iov; ++ struct iovec iov_buf; ++ can_frame_t frame; ++ nanosecs_abs_t timestamp = 0; ++ unsigned char ifindex; ++ unsigned char can_dlc; ++ unsigned char *recv_buf; ++ int recv_buf_index; ++ size_t first_part_size; ++ size_t payload_size; ++ rtdm_lockctx_t lock_ctx; ++ int ret; ++ ++ /* Clear frame memory location */ ++ memset(&frame, 0, sizeof(can_frame_t)); ++ ++ /* Check flags */ ++ if (flags & ~(MSG_DONTWAIT | MSG_PEEK)) ++ return -EINVAL; ++ ++ ++ /* Check if msghdr entries are sane */ ++ ++ if (msg->msg_name != NULL) { ++ if (msg->msg_namelen < sizeof(struct sockaddr_can)) ++ return -EINVAL; ++ ++ if (rtdm_fd_is_user(fd)) { ++ if (!rtdm_rw_user_ok(fd, msg->msg_name, msg->msg_namelen)) ++ return -EFAULT; ++ } ++ ++ } else { ++ if (msg->msg_namelen != 0) ++ return -EINVAL; ++ } ++ ++ /* Check msg_iovlen, only one buffer allowed */ ++ if (msg->msg_iovlen != 1) ++ return -EMSGSIZE; ++ ++ if (rtdm_fd_is_user(fd)) { ++ /* Copy IO vector from userspace */ ++ if (!rtdm_rw_user_ok(fd, msg->msg_iov, ++ sizeof(struct iovec)) || ++ rtdm_copy_from_user(fd, &iov_buf, msg->msg_iov, ++ sizeof(struct iovec))) ++ return -EFAULT; ++ ++ iov = &iov_buf; ++ } ++ ++ /* Check size of buffer */ ++ if (iov->iov_len < sizeof(can_frame_t)) ++ return -EMSGSIZE; ++ ++ /* Check buffer if in user space */ ++ if (rtdm_fd_is_user(fd)) { ++ if (!rtdm_rw_user_ok(fd, iov->iov_base, iov->iov_len)) ++ return -EFAULT; ++ } ++ ++ if (msg->msg_control != NULL) { ++ if (msg->msg_controllen < sizeof(nanosecs_abs_t)) ++ return -EINVAL; ++ ++ if (rtdm_fd_is_user(fd)) { ++ if (!rtdm_rw_user_ok(fd, msg->msg_control, ++ msg->msg_controllen)) ++ return -EFAULT; ++ } ++ ++ } else { ++ if (msg->msg_controllen != 0) ++ return -EINVAL; ++ } ++ ++ rtcan_raw_enable_bus_err(sock); ++ ++ /* Set RX timeout */ ++ timeout = (flags & MSG_DONTWAIT) ? RTDM_TIMEOUT_NONE : sock->rx_timeout; ++ ++ /* Fetch message (ok, try it ...) */ ++ ret = rtdm_sem_timeddown(&sock->recv_sem, timeout, NULL); ++ ++ /* Error code returned? */ ++ if (unlikely(ret)) { ++ /* Which error code? */ ++ ++ if (ret == -EIDRM) ++ /* Socket was closed */ ++ return -EBADF; ++ ++ else if (ret == -EWOULDBLOCK) ++ /* We would block but don't want to */ ++ return -EAGAIN; ++ ++ else ++ /* Return all other error codes unmodified. */ ++ return ret; ++ } ++ ++ ++ /* OK, we've got mail. */ ++ ++ rtdm_lock_get_irqsave(&rtcan_socket_lock, lock_ctx); ++ ++ ++ /* Construct a struct can_frame with data from socket's ring buffer */ ++ recv_buf_index = sock->recv_head; ++ recv_buf = sock->recv_buf; ++ ++ ++ /* Begin with CAN ID */ ++ MEMCPY_FROM_RING_BUF(&frame.can_id, sizeof(uint32_t)); ++ ++ ++ /* Fetch interface index */ ++ ifindex = recv_buf[recv_buf_index]; ++ recv_buf_index = (recv_buf_index + 1) & (RTCAN_RXBUF_SIZE - 1); ++ ++ ++ /* Fetch DLC (with indicator if a timestamp exists) */ ++ can_dlc = recv_buf[recv_buf_index]; ++ recv_buf_index = (recv_buf_index + 1) & (RTCAN_RXBUF_SIZE - 1); ++ ++ frame.can_dlc = can_dlc & RTCAN_HAS_NO_TIMESTAMP; ++ payload_size = (frame.can_dlc > 8) ? 8 : frame.can_dlc; ++ ++ ++ /* If frame is an RTR or one with no payload it's not necessary ++ * to copy the data bytes. */ ++ if (!(frame.can_id & CAN_RTR_FLAG) && payload_size) ++ /* Copy data bytes */ ++ MEMCPY_FROM_RING_BUF(frame.data, payload_size); ++ ++ /* Is a timestamp available and is the caller actually interested? */ ++ if (msg->msg_controllen && (can_dlc & RTCAN_HAS_TIMESTAMP)) ++ /* Copy timestamp */ ++ MEMCPY_FROM_RING_BUF(×tamp, RTCAN_TIMESTAMP_SIZE); ++ ++ /* Message completely read from the socket's ring buffer. Now check if ++ * caller is just peeking. */ ++ if (flags & MSG_PEEK) ++ /* Next one, please! */ ++ rtdm_sem_up(&sock->recv_sem); ++ else ++ /* Adjust begin of first message in the ring buffer. */ ++ sock->recv_head = recv_buf_index; ++ ++ ++ /* Release lock */ ++ rtdm_lock_put_irqrestore(&rtcan_socket_lock, lock_ctx); ++ ++ ++ /* Create CAN socket address to give back */ ++ if (msg->msg_namelen) { ++ scan.can_family = AF_CAN; ++ scan.can_ifindex = ifindex; ++ } ++ ++ ++ /* Last duty: Copy all back to the caller's buffers. */ ++ ++ if (rtdm_fd_is_user(fd)) { ++ /* Copy to user space */ ++ ++ /* Copy socket address */ ++ if (msg->msg_namelen) { ++ if (rtdm_copy_to_user(fd, msg->msg_name, &scan, ++ sizeof(struct sockaddr_can))) ++ return -EFAULT; ++ ++ msg->msg_namelen = sizeof(struct sockaddr_can); ++ } ++ ++ /* Copy CAN frame */ ++ if (rtdm_copy_to_user(fd, iov->iov_base, &frame, ++ sizeof(can_frame_t))) ++ return -EFAULT; ++ /* Adjust iovec in the common way */ ++ iov->iov_base += sizeof(can_frame_t); ++ iov->iov_len -= sizeof(can_frame_t); ++ /* ... and copy it, too. */ ++ if (rtdm_copy_to_user(fd, msg->msg_iov, iov, ++ sizeof(struct iovec))) ++ return -EFAULT; ++ ++ /* Copy timestamp if existent and wanted */ ++ if (msg->msg_controllen) { ++ if (can_dlc & RTCAN_HAS_TIMESTAMP) { ++ if (rtdm_copy_to_user(fd, msg->msg_control, ++ ×tamp, RTCAN_TIMESTAMP_SIZE)) ++ return -EFAULT; ++ ++ msg->msg_controllen = RTCAN_TIMESTAMP_SIZE; ++ } else ++ msg->msg_controllen = 0; ++ } ++ ++ } else { ++ /* Kernel space */ ++ ++ /* Copy socket address */ ++ if (msg->msg_namelen) { ++ memcpy(msg->msg_name, &scan, sizeof(struct sockaddr_can)); ++ msg->msg_namelen = sizeof(struct sockaddr_can); ++ } ++ ++ /* Copy CAN frame */ ++ memcpy(iov->iov_base, &frame, sizeof(can_frame_t)); ++ /* Adjust iovec in the common way */ ++ iov->iov_base += sizeof(can_frame_t); ++ iov->iov_len -= sizeof(can_frame_t); ++ ++ /* Copy timestamp if existent and wanted */ ++ if (msg->msg_controllen) { ++ if (can_dlc & RTCAN_HAS_TIMESTAMP) { ++ memcpy(msg->msg_control, ×tamp, RTCAN_TIMESTAMP_SIZE); ++ msg->msg_controllen = RTCAN_TIMESTAMP_SIZE; ++ } else ++ msg->msg_controllen = 0; ++ } ++ } ++ ++ ++ return sizeof(can_frame_t); ++} ++ ++ ++ssize_t rtcan_raw_sendmsg(struct rtdm_fd *fd, ++ const struct user_msghdr *msg, int flags) ++{ ++ struct rtcan_socket *sock = rtdm_fd_to_private(fd); ++ struct sockaddr_can *scan = (struct sockaddr_can *)msg->msg_name; ++ struct sockaddr_can scan_buf; ++ struct iovec *iov = (struct iovec *)msg->msg_iov; ++ struct iovec iov_buf; ++ can_frame_t *frame; ++ can_frame_t frame_buf; ++ rtdm_lockctx_t lock_ctx; ++ nanosecs_rel_t timeout = 0; ++ struct tx_wait_queue tx_wait; ++ struct rtcan_device *dev; ++ int ifindex = 0; ++ int ret = 0; ++ spl_t s; ++ ++ ++ if (flags & MSG_OOB) /* Mirror BSD error message compatibility */ ++ return -EOPNOTSUPP; ++ ++ /* Only MSG_DONTWAIT is a valid flag. */ ++ if (flags & ~MSG_DONTWAIT) ++ return -EINVAL; ++ ++ /* Check msg_iovlen, only one buffer allowed */ ++ if (msg->msg_iovlen != 1) ++ return -EMSGSIZE; ++ ++ if (scan == NULL) { ++ /* No socket address. Will use bound interface for sending */ ++ ++ if (msg->msg_namelen != 0) ++ return -EINVAL; ++ ++ ++ /* We only want a consistent value here, a spin lock would be ++ * overkill. Nevertheless, the binding could change till we have ++ * the chance to send. Blame the user, though. */ ++ ifindex = atomic_read(&sock->ifindex); ++ ++ if (!ifindex) ++ /* Socket isn't bound or bound to all interfaces. Go out. */ ++ return -ENXIO; ++ } else { ++ /* Socket address given */ ++ if (msg->msg_namelen < sizeof(struct sockaddr_can)) ++ return -EINVAL; ++ ++ if (rtdm_fd_is_user(fd)) { ++ /* Copy socket address from userspace */ ++ if (!rtdm_read_user_ok(fd, msg->msg_name, ++ sizeof(struct sockaddr_can)) || ++ rtdm_copy_from_user(fd, &scan_buf, msg->msg_name, ++ sizeof(struct sockaddr_can))) ++ return -EFAULT; ++ ++ scan = &scan_buf; ++ } ++ ++ /* Check address family */ ++ if (scan->can_family != AF_CAN) ++ return -EINVAL; ++ ++ ifindex = scan->can_ifindex; ++ } ++ ++ if (rtdm_fd_is_user(fd)) { ++ /* Copy IO vector from userspace */ ++ if (!rtdm_rw_user_ok(fd, msg->msg_iov, ++ sizeof(struct iovec)) || ++ rtdm_copy_from_user(fd, &iov_buf, msg->msg_iov, ++ sizeof(struct iovec))) ++ return -EFAULT; ++ ++ iov = &iov_buf; ++ } ++ ++ /* Check size of buffer */ ++ if (iov->iov_len != sizeof(can_frame_t)) ++ return -EMSGSIZE; ++ ++ frame = (can_frame_t *)iov->iov_base; ++ ++ if (rtdm_fd_is_user(fd)) { ++ /* Copy CAN frame from userspace */ ++ if (!rtdm_read_user_ok(fd, iov->iov_base, ++ sizeof(can_frame_t)) || ++ rtdm_copy_from_user(fd, &frame_buf, iov->iov_base, ++ sizeof(can_frame_t))) ++ return -EFAULT; ++ ++ frame = &frame_buf; ++ } ++ ++ /* Adjust iovec in the common way */ ++ iov->iov_base += sizeof(can_frame_t); ++ iov->iov_len -= sizeof(can_frame_t); ++ /* ... and copy it back to userspace if necessary */ ++ if (rtdm_fd_is_user(fd)) { ++ if (rtdm_copy_to_user(fd, msg->msg_iov, iov, ++ sizeof(struct iovec))) ++ return -EFAULT; ++ } ++ ++ /* At last, we've got the frame ... */ ++ ++ /* Check if DLC between 0 and 15 */ ++ if (frame->can_dlc > 15) ++ return -EINVAL; ++ ++ /* Check if it is a standard frame and the ID between 0 and 2031 */ ++ if (!(frame->can_id & CAN_EFF_FLAG)) { ++ u32 id = frame->can_id & CAN_EFF_MASK; ++ if (id > (CAN_SFF_MASK - 16)) ++ return -EINVAL; ++ } ++ ++ if ((dev = rtcan_dev_get_by_index(ifindex)) == NULL) ++ return -ENXIO; ++ ++ timeout = (flags & MSG_DONTWAIT) ? RTDM_TIMEOUT_NONE : sock->tx_timeout; ++ ++ tx_wait.rt_task = rtdm_task_current(); ++ ++ /* Register the task at the socket's TX wait queue and decrement ++ * the TX semaphore. This must be atomic. Finally, the task must ++ * be deregistered again (also atomic). */ ++ cobalt_atomic_enter(s); ++ ++ list_add(&tx_wait.tx_wait_list, &sock->tx_wait_head); ++ ++ /* Try to pass the guard in order to access the controller */ ++ ret = rtdm_sem_timeddown(&dev->tx_sem, timeout, NULL); ++ ++ /* Only dequeue task again if socket isn't being closed i.e. if ++ * this task was not unblocked within the close() function. */ ++ if (likely(!list_empty(&tx_wait.tx_wait_list))) ++ /* Dequeue this task from the TX wait queue */ ++ list_del_init(&tx_wait.tx_wait_list); ++ else ++ /* The socket was closed. */ ++ ret = -EBADF; ++ ++ cobalt_atomic_leave(s); ++ ++ /* Error code returned? */ ++ if (ret != 0) { ++ /* Which error code? */ ++ switch (ret) { ++ case -EIDRM: ++ /* Controller is stopped or bus-off */ ++ ret = -ENETDOWN; ++ goto send_out1; ++ ++ case -EWOULDBLOCK: ++ /* We would block but don't want to */ ++ ret = -EAGAIN; ++ goto send_out1; ++ ++ default: ++ /* Return all other error codes unmodified. */ ++ goto send_out1; ++ } ++ } ++ ++ /* We got access */ ++ ++ ++ /* Push message onto stack for loopback when TX done */ ++ if (rtcan_loopback_enabled(sock)) ++ rtcan_tx_push(dev, sock, frame); ++ ++ rtdm_lock_get_irqsave(&dev->device_lock, lock_ctx); ++ ++ /* Controller should be operating */ ++ if (!CAN_STATE_OPERATING(dev->state)) { ++ if (dev->state == CAN_STATE_SLEEPING) { ++ ret = -ECOMM; ++ rtdm_lock_put_irqrestore(&dev->device_lock, lock_ctx); ++ rtdm_sem_up(&dev->tx_sem); ++ goto send_out1; ++ } ++ ret = -ENETDOWN; ++ goto send_out2; ++ } ++ ++ dev->tx_count++; ++ ret = dev->hard_start_xmit(dev, frame); ++ ++ /* Return number of bytes sent upon successful completion */ ++ if (ret == 0) ++ ret = sizeof(can_frame_t); ++ ++ send_out2: ++ rtdm_lock_put_irqrestore(&dev->device_lock, lock_ctx); ++ send_out1: ++ rtcan_dev_dereference(dev); ++ return ret; ++} ++ ++ ++static struct rtdm_driver rtcan_driver = { ++ .profile_info = RTDM_PROFILE_INFO(rtcan, ++ RTDM_CLASS_CAN, ++ RTDM_SUBCLASS_GENERIC, ++ RTCAN_PROFILE_VER), ++ .device_flags = RTDM_PROTOCOL_DEVICE, ++ .device_count = 1, ++ .context_size = sizeof(struct rtcan_socket), ++ .protocol_family = PF_CAN, ++ .socket_type = SOCK_RAW, ++ .ops = { ++ .socket = rtcan_raw_socket, ++ .close = rtcan_raw_close, ++ .ioctl_nrt = rtcan_raw_ioctl, ++ .recvmsg_rt = rtcan_raw_recvmsg, ++ .sendmsg_rt = rtcan_raw_sendmsg, ++ }, ++}; ++ ++static struct rtdm_device rtcan_device = { ++ .driver = &rtcan_driver, ++ .label = "rtcan", ++}; ++ ++int __init rtcan_raw_proto_register(void) ++{ ++ return rtdm_dev_register(&rtcan_device); ++} ++ ++void __exit rtcan_raw_proto_unregister(void) ++{ ++ rtdm_dev_unregister(&rtcan_device); ++} ++ ++ ++EXPORT_SYMBOL_GPL(rtcan_rcv); +--- linux/drivers/xenomai/can/Makefile 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/can/Makefile 2021-04-07 16:01:26.460635262 +0800 +@@ -0,0 +1,10 @@ ++ ++ccflags-y += -Idrivers/xenomai/can ++ ++obj-$(CONFIG_XENO_DRIVERS_CAN) += xeno_can.o mscan/ sja1000/ ++obj-$(CONFIG_XENO_DRIVERS_CAN_FLEXCAN) += xeno_can_flexcan.o ++obj-$(CONFIG_XENO_DRIVERS_CAN_VIRT) += xeno_can_virt.o ++ ++xeno_can-y := rtcan_dev.o rtcan_socket.o rtcan_module.o rtcan_raw.o rtcan_raw_dev.o rtcan_raw_filter.o ++xeno_can_virt-y := rtcan_virt.o ++xeno_can_flexcan-y := rtcan_flexcan.o +--- linux/drivers/xenomai/can/rtcan_internal.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/can/rtcan_internal.h 2021-04-07 16:01:26.455635269 +0800 +@@ -0,0 +1,61 @@ ++/* ++ * Copyright (C) 2006 Wolfgang Grandegger ++ * ++ * Derived from RTnet project file stack/include/rtnet_internal.h: ++ * ++ * Copyright (C) 1999 Lineo, Inc ++ * 1999, 2002 David A. Schleef ++ * 2002 Ulrich Marx ++ * 2003-2005 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ */ ++ ++#ifndef __RTCAN_INTERNAL_H_ ++#define __RTCAN_INTERNAL_H_ ++ ++#include ++#include ++ ++#ifdef CONFIG_XENO_DRIVERS_CAN_DEBUG ++#define RTCAN_ASSERT(expr, func) \ ++ if (!(expr)) { \ ++ rtdm_printk("Assertion failed! %s:%s:%d %s\n", \ ++ __FILE__, __FUNCTION__, __LINE__, (#expr)); \ ++ func \ ++ } ++#else ++#define RTCAN_ASSERT(expr, func) ++#endif /* CONFIG_RTCAN_CHECKED */ ++ ++#ifdef CONFIG_XENO_DRIVERS_CAN_DEBUG ++# define RTCAN_DBG(fmt,args...) do { printk(fmt ,##args); } while (0) ++# define RTCAN_RTDM_DBG(fmt,args...) do { rtdm_printk(fmt ,##args); } while (0) ++#else ++# define RTCAN_DBG(fmt,args...) do {} while (0) ++# define RTCAN_RTDM_DBG(fmt,args...) do {} while (0) ++#endif ++ ++#define rtcan_priv(dev) (dev)->priv ++#define rtcandev_dbg(dev, fmt, args...) \ ++ printk(KERN_DEBUG "%s: " fmt, (dev)->name, ##args) ++#define rtcandev_info(dev, fmt, args...) \ ++ printk(KERN_INFO "%s: " fmt, (dev)->name, ##args) ++#define rtcandev_warn(dev, fmt, args...) \ ++ printk(KERN_WARNING "%s: " fmt, (dev)->name, ##args) ++#define rtcandev_err(dev, fmt, args...) \ ++ printk(KERN_ERR "%s: " fmt, (dev)->name, ##args) ++ ++#endif /* __RTCAN_INTERNAL_H_ */ +--- linux/drivers/xenomai/can/rtcan_list.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/can/rtcan_list.h 2021-04-07 16:01:26.449635278 +0800 +@@ -0,0 +1,68 @@ ++/* ++ * List management for the RTDM RTCAN device driver ++ * ++ * Copyright (C) 2005,2006 Sebastian Smolorz ++ * ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#ifndef __RTCAN_LIST_H_ ++#define __RTCAN_LIST_H_ ++ ++#include "rtcan_socket.h" ++ ++ ++/* ++ * List element in a single linked list used for registering reception sockets. ++ * Every single struct can_filter which was bound to a socket gets such a ++ * list entry. There is no member for the CAN interface because there is one ++ * reception list for every CAN controller. This is because when a CAN message ++ * is received it is clear from which interface and therefore minimizes ++ * searching time. ++ */ ++struct rtcan_recv { ++ can_filter_t can_filter; /* filter used for deciding if ++ * a socket wants to get a CAN ++ * message */ ++ unsigned int match_count; /* count accepted messages */ ++ struct rtcan_socket *sock; /* pointer to registered socket ++ */ ++ struct rtcan_recv *next; /* pointer to next list element ++ */ ++}; ++ ++ ++/* ++ * Element in a TX wait queue. ++ * ++ * Every socket holds a TX wait queue where all RT tasks are queued when they ++ * are blocked while waiting to be able to transmit a message via this socket. ++ * ++ * Every sender holds its own element. ++ */ ++struct tx_wait_queue { ++ struct list_head tx_wait_list; /* List pointers */ ++ rtdm_task_t *rt_task; /* Pointer to task handle */ ++}; ++ ++ ++/* Spinlock for all reception lists and also for some members in ++ * struct rtcan_socket */ ++extern rtdm_lock_t rtcan_recv_list_lock; ++ ++ ++#endif /* __RTCAN_LIST_H_ */ +--- linux/drivers/xenomai/can/rtcan_module.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/can/rtcan_module.c 2021-04-07 16:01:26.445635284 +0800 +@@ -0,0 +1,450 @@ ++/* ++ * Copyright (C) 2006 Wolfgang Grandegger ++ * ++ * Derived from RTnet project file stack/rtcan_module.c: ++ * ++ * Copyright (C) 2002 Ulrich Marx ++ * 2003-2006 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ */ ++ ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++MODULE_LICENSE("GPL"); ++ ++ ++const char rtcan_rtdm_provider_name[] = ++ "(C) 2006 RT-Socket-CAN Development Team"; ++ ++ ++#ifdef CONFIG_PROC_FS ++ ++struct proc_dir_entry *rtcan_proc_root; ++ ++static void rtcan_dev_get_ctrlmode_name(can_ctrlmode_t ctrlmode, ++ char* name, int max_len) ++{ ++ snprintf(name, max_len, "%s%s", ++ ctrlmode & CAN_CTRLMODE_LISTENONLY ? "listen-only " : "", ++ ctrlmode & CAN_CTRLMODE_LOOPBACK ? "loopback " : ""); ++} ++ ++static char *rtcan_state_names[] = { ++ "active", "warning", "passive" , "bus-off", ++ "scanning", "stopped", "sleeping" ++}; ++ ++static void rtcan_dev_get_state_name(can_state_t state, ++ char* name, int max_len) ++{ ++ if (state >= CAN_STATE_ACTIVE && ++ state <= CAN_STATE_SLEEPING) ++ strncpy(name, rtcan_state_names[state], max_len); ++ else ++ strncpy(name, "unknown", max_len); ++} ++ ++static void rtcan_dev_get_baudrate_name(can_baudrate_t baudrate, ++ char* name, int max_len) ++{ ++ switch (baudrate) { ++ case CAN_BAUDRATE_UNCONFIGURED: ++ strncpy(name, "undefined", max_len); ++ break; ++ case CAN_BAUDRATE_UNKNOWN: ++ strncpy(name, "unknown", max_len); ++ break; ++ default: ++ ksformat(name, max_len, "%d", baudrate); ++ break; ++ } ++} ++ ++static void rtcan_dev_get_bittime_name(struct can_bittime *bit_time, ++ char* name, int max_len) ++{ ++ switch (bit_time->type) { ++ case CAN_BITTIME_STD: ++ ksformat(name, max_len, ++ "brp=%d prop_seg=%d phase_seg1=%d " ++ "phase_seg2=%d sjw=%d sam=%d", ++ bit_time->std.brp, ++ bit_time->std.prop_seg, ++ bit_time->std.phase_seg1, ++ bit_time->std.phase_seg2, ++ bit_time->std.sjw, ++ bit_time->std.sam); ++ break; ++ case CAN_BITTIME_BTR: ++ ksformat(name, max_len, "btr0=0x%02x btr1=0x%02x", ++ bit_time->btr.btr0, bit_time->btr.btr1); ++ break; ++ default: ++ strncpy(name, "unknown", max_len); ++ break; ++ } ++} ++ ++static void rtcan_get_timeout_name(nanosecs_rel_t timeout, ++ char* name, int max_len) ++{ ++ if (timeout == RTDM_TIMEOUT_INFINITE) ++ strncpy(name, "infinite", max_len); ++ else ++ ksformat(name, max_len, "%lld", (long long)timeout); ++} ++ ++static int rtcan_read_proc_devices(struct seq_file *p, void *data) ++{ ++ int i; ++ struct rtcan_device *dev; ++ char state_name[20], baudrate_name[20]; ++ ++ if (down_interruptible(&rtcan_devices_nrt_lock)) ++ return -ERESTARTSYS; ++ ++ /* Name___________ _Baudrate State___ _TX_Counts _TX_Counts ____Errors ++ * rtcan0 125000 stopped 1234567890 1234567890 1234567890 ++ * rtcan1 undefined warning 1234567890 1234567890 1234567890 ++ * rtcan2 undefined scanning 1234567890 1234567890 1234567890 ++ */ ++ seq_printf(p, "Name___________ _Baudrate State___ TX_Counter RX_Counter " ++ "____Errors\n"); ++ ++ for (i = 1; i <= RTCAN_MAX_DEVICES; i++) { ++ if ((dev = rtcan_dev_get_by_index(i)) != NULL) { ++ rtcan_dev_get_state_name(dev->state, ++ state_name, sizeof(state_name)); ++ rtcan_dev_get_baudrate_name(dev->baudrate, ++ baudrate_name, sizeof(baudrate_name)); ++ seq_printf(p, "%-15s %9s %-8s %10d %10d %10d\n", ++ dev->name, baudrate_name, state_name, dev->tx_count, ++ dev->rx_count, dev->err_count); ++ rtcan_dev_dereference(dev); ++ } ++ } ++ ++ up(&rtcan_devices_nrt_lock); ++ ++ return 0; ++} ++ ++static int rtcan_proc_devices_open(struct inode *inode, struct file *file) ++{ ++ return single_open(file, rtcan_read_proc_devices, NULL); ++} ++ ++static const struct file_operations rtcan_proc_devices_ops = { ++ .open = rtcan_proc_devices_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = single_release, ++}; ++ ++static int rtcan_read_proc_sockets(struct seq_file *p, void *data) ++{ ++ struct rtcan_socket *sock; ++ struct rtdm_fd *fd; ++ struct rtcan_device *dev; ++ char name[IFNAMSIZ] = "not-bound"; ++ char rx_timeout[20], tx_timeout[20]; ++ rtdm_lockctx_t lock_ctx; ++ int ifindex; ++ ++ if (down_interruptible(&rtcan_devices_nrt_lock)) ++ return -ERESTARTSYS; ++ ++ /* Name___________ Filter ErrMask RX_Timeout TX_Timeout RX_BufFull TX_Lo ++ * rtcan0 1 0x00010 1234567890 1234567890 1234567890 12345 ++ */ ++ seq_printf(p, "Name___________ Filter ErrMask RX_Timeout_ns " ++ "TX_Timeout_ns RX_BufFull TX_Lo\n"); ++ ++ rtdm_lock_get_irqsave(&rtcan_recv_list_lock, lock_ctx); ++ ++ list_for_each_entry(sock, &rtcan_socket_list, socket_list) { ++ fd = rtcan_socket_to_fd(sock); ++ if (rtcan_sock_is_bound(sock)) { ++ ifindex = atomic_read(&sock->ifindex); ++ if (ifindex) { ++ dev = rtcan_dev_get_by_index(ifindex); ++ if (dev) { ++ strncpy(name, dev->name, IFNAMSIZ); ++ rtcan_dev_dereference(dev); ++ } ++ } else ++ ksformat(name, sizeof(name), "%d", ifindex); ++ } ++ rtcan_get_timeout_name(sock->tx_timeout, ++ tx_timeout, sizeof(tx_timeout)); ++ rtcan_get_timeout_name(sock->rx_timeout, ++ rx_timeout, sizeof(rx_timeout)); ++ seq_printf(p, "%-15s %6d 0x%05x %13s %13s %10d %5d\n", ++ name, sock->flistlen, sock->err_mask, ++ rx_timeout, tx_timeout, sock->rx_buf_full, ++ rtcan_loopback_enabled(sock)); ++ } ++ ++ rtdm_lock_put_irqrestore(&rtcan_recv_list_lock, lock_ctx); ++ ++ up(&rtcan_devices_nrt_lock); ++ ++ return 0; ++} ++ ++static int rtcan_proc_sockets_open(struct inode *inode, struct file *file) ++{ ++ return single_open(file, rtcan_read_proc_sockets, NULL); ++} ++ ++static const struct file_operations rtcan_proc_sockets_ops = { ++ .open = rtcan_proc_sockets_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = single_release, ++}; ++ ++ ++static int rtcan_read_proc_info(struct seq_file *p, void *data) ++{ ++ struct rtcan_device *dev = p->private; ++ char state_name[20], baudrate_name[20]; ++ char ctrlmode_name[80], bittime_name[80]; ++ ++ if (down_interruptible(&rtcan_devices_nrt_lock)) ++ return -ERESTARTSYS; ++ ++ rtcan_dev_get_state_name(dev->state, ++ state_name, sizeof(state_name)); ++ rtcan_dev_get_ctrlmode_name(dev->ctrl_mode, ++ ctrlmode_name, sizeof(ctrlmode_name)); ++ rtcan_dev_get_baudrate_name(dev->baudrate, ++ baudrate_name, sizeof(baudrate_name)); ++ rtcan_dev_get_bittime_name(&dev->bit_time, ++ bittime_name, sizeof(bittime_name)); ++ ++ seq_printf(p, "Device %s\n", dev->name); ++ seq_printf(p, "Controller %s\n", dev->ctrl_name); ++ seq_printf(p, "Board %s\n", dev->board_name); ++ seq_printf(p, "Clock-Hz %d\n", dev->can_sys_clock); ++ seq_printf(p, "Baudrate %s\n", baudrate_name); ++ seq_printf(p, "Bit-time %s\n", bittime_name); ++ seq_printf(p, "Ctrl-Mode %s\n", ctrlmode_name); ++ seq_printf(p, "State %s\n", state_name); ++ seq_printf(p, "TX-Counter %d\n", dev->tx_count); ++ seq_printf(p, "RX-Counter %d\n", dev->rx_count); ++ seq_printf(p, "Errors %d\n", dev->err_count); ++#ifdef RTCAN_USE_REFCOUNT ++ seq_printf(p, "Refcount %d\n", atomic_read(&dev->refcount)); ++#endif ++ ++ up(&rtcan_devices_nrt_lock); ++ ++ return 0; ++} ++ ++static int rtcan_proc_info_open(struct inode *inode, struct file *file) ++{ ++ return single_open(file, rtcan_read_proc_info, PDE_DATA(inode)); ++} ++ ++static const struct file_operations rtcan_proc_info_ops = { ++ .open = rtcan_proc_info_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = single_release, ++}; ++ ++ ++ ++static int rtcan_read_proc_filter(struct seq_file *p, void *data) ++{ ++ struct rtcan_device *dev = p->private; ++ struct rtcan_recv *recv_listener = dev->recv_list; ++ struct rtdm_fd *fd; ++ rtdm_lockctx_t lock_ctx; ++ ++ /* __CAN_ID__ _CAN_Mask_ Inv MatchCount ++ * 0x12345678 0x12345678 no 1234567890 ++ */ ++ ++ seq_printf(p, "__CAN_ID__ _CAN_Mask_ Inv MatchCount\n"); ++ ++ rtdm_lock_get_irqsave(&rtcan_recv_list_lock, lock_ctx); ++ ++ /* Loop over the reception list of the device */ ++ while (recv_listener != NULL) { ++ fd = rtcan_socket_to_fd(recv_listener->sock); ++ ++ seq_printf(p, "0x%08x 0x%08x %s %10d\n", ++ recv_listener->can_filter.can_id, ++ recv_listener->can_filter.can_mask & ~CAN_INV_FILTER, ++ (recv_listener->can_filter.can_mask & CAN_INV_FILTER) ? ++ "yes" : " no", ++ recv_listener->match_count); ++ ++ recv_listener = recv_listener->next; ++ } ++ ++ rtdm_lock_put_irqrestore(&rtcan_recv_list_lock, lock_ctx); ++ ++ return 0; ++} ++ ++static int rtcan_proc_filter_open(struct inode *inode, struct file *file) ++{ ++ return single_open(file, rtcan_read_proc_filter, PDE_DATA(inode)); ++} ++ ++static const struct file_operations rtcan_proc_filter_ops = { ++ .open = rtcan_proc_filter_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = single_release, ++}; ++ ++ ++ ++static int rtcan_read_proc_version(struct seq_file *p, void *data) ++{ ++ seq_printf(p, "RT-Socket-CAN %d.%d.%d\n", ++ RTCAN_MAJOR_VER, RTCAN_MINOR_VER, RTCAN_BUGFIX_VER); ++ ++ return 0; ++} ++ ++static int rtcan_proc_version_open(struct inode *inode, struct file *file) ++{ ++ return single_open(file, rtcan_read_proc_version, NULL); ++} ++ ++static const struct file_operations rtcan_proc_version_ops = { ++ .open = rtcan_proc_version_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = single_release, ++}; ++ ++ ++void rtcan_dev_remove_proc(struct rtcan_device* dev) ++{ ++ if (!dev->proc_root) ++ return; ++ ++ remove_proc_entry("info", dev->proc_root); ++ remove_proc_entry("filters", dev->proc_root); ++ remove_proc_entry(dev->name, rtcan_proc_root); ++ ++ dev->proc_root = NULL; ++} ++ ++int rtcan_dev_create_proc(struct rtcan_device* dev) ++{ ++ if (!rtcan_proc_root) ++ return -EINVAL; ++ ++ dev->proc_root = proc_mkdir(dev->name, rtcan_proc_root); ++ if (!dev->proc_root) { ++ printk("%s: unable to create /proc device entries\n", dev->name); ++ return -1; ++ } ++ ++ proc_create_data("info", S_IFREG | S_IRUGO | S_IWUSR, dev->proc_root, ++ &rtcan_proc_info_ops, dev); ++ proc_create_data("filters", S_IFREG | S_IRUGO | S_IWUSR, dev->proc_root, ++ &rtcan_proc_filter_ops, dev); ++ return 0; ++ ++} ++ ++ ++static int rtcan_proc_register(void) ++{ ++ rtcan_proc_root = proc_mkdir("rtcan", NULL); ++ if (!rtcan_proc_root) { ++ printk("rtcan: unable to initialize /proc entries\n"); ++ return -1; ++ } ++ ++ proc_create("devices", S_IFREG | S_IRUGO | S_IWUSR, rtcan_proc_root, ++ &rtcan_proc_devices_ops); ++ proc_create("version", S_IFREG | S_IRUGO | S_IWUSR, rtcan_proc_root, ++ &rtcan_proc_version_ops); ++ proc_create("sockets", S_IFREG | S_IRUGO | S_IWUSR, rtcan_proc_root, ++ &rtcan_proc_sockets_ops); ++ return 0; ++} ++ ++ ++ ++static void rtcan_proc_unregister(void) ++{ ++ remove_proc_entry("devices", rtcan_proc_root); ++ remove_proc_entry("version", rtcan_proc_root); ++ remove_proc_entry("sockets", rtcan_proc_root); ++ remove_proc_entry("rtcan", 0); ++} ++#endif /* CONFIG_PROC_FS */ ++ ++ ++ ++int __init rtcan_init(void) ++{ ++ int err = 0; ++ ++ if (!rtdm_available()) ++ return -ENOSYS; ++ ++ printk("RT-Socket-CAN %d.%d.%d - %s\n", ++ RTCAN_MAJOR_VER, RTCAN_MINOR_VER, RTCAN_BUGFIX_VER, ++ rtcan_rtdm_provider_name); ++ ++ if ((err = rtcan_raw_proto_register()) != 0) ++ goto out; ++ ++#ifdef CONFIG_PROC_FS ++ if ((err = rtcan_proc_register()) != 0) ++ goto out; ++#endif ++ ++ out: ++ return err; ++} ++ ++ ++void __exit rtcan_exit(void) ++{ ++ rtcan_raw_proto_unregister(); ++#ifdef CONFIG_PROC_FS ++ rtcan_proc_unregister(); ++#endif ++ ++ printk("rtcan: unloaded\n"); ++} ++ ++ ++module_init(rtcan_init); ++module_exit(rtcan_exit); +--- linux/drivers/xenomai/can/Kconfig 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/can/Kconfig 2021-04-07 16:01:26.440635291 +0800 +@@ -0,0 +1,91 @@ ++menu "CAN drivers" ++ ++config XENO_DRIVERS_CAN ++ tristate "RT-Socket-CAN, CAN raw socket interface" ++ help ++ RT-Socket-CAN is a real-time socket interface for CAN controllers. ++ ++config XENO_DRIVERS_CAN_DEBUG ++ depends on XENO_DRIVERS_CAN && PROC_FS ++ bool "Enable debug output" ++ default y ++ help ++ ++ This option activates debugging checks and enhanced output for the ++ RT-Socket-CAN driver. It also allows to list the hardware registers ++ of the registered CAN controllers. It is a recommended option for ++ getting started and analysing potential problems. For production ++ purposes, it should be switched off (for the sake of latency). ++ ++config XENO_DRIVERS_CAN_LOOPBACK ++ depends on XENO_DRIVERS_CAN ++ bool "Enable TX loopback to local sockets" ++ default n ++ help ++ ++ This options adds support for TX loopback to local sockets. Normally, ++ messages sent to the CAN bus are not visible to sockets listening to ++ the same local device. When this option is enabled, TX messages are ++ looped back locally when the transmit has been done by default. This ++ behaviour can be deactivated or reactivated with "setsockopt". Enable ++ this option, if you want to have a "net-alike" behaviour. ++ ++config XENO_DRIVERS_CAN_RXBUF_SIZE ++ depends on XENO_DRIVERS_CAN ++ int "Size of receive ring buffers (must be 2^N)" ++ default 1024 ++ ++config XENO_DRIVERS_CAN_MAX_DEVICES ++ depends on XENO_DRIVERS_CAN ++ int "Maximum number of devices" ++ default 4 ++ ++config XENO_DRIVERS_CAN_MAX_RECEIVERS ++ depends on XENO_DRIVERS_CAN ++ int "Maximum number of receive filters per device" ++ default 16 ++ help ++ ++ The driver maintains a receive filter list per device for fast access. ++ ++config XENO_DRIVERS_CAN_BUS_ERR ++ depends on XENO_DRIVERS_CAN ++ bool ++ default n ++ help ++ ++ To avoid unnecessary bus error interrupt flooding, this option enables ++ bus error interrupts when an application is calling a receive function ++ on a socket listening on bus errors. After one bus error has occured, ++ the interrupt will be disabled to allow the application time for error ++ processing. This option is automatically selected for CAN controllers ++ supporting bus error interrupts like the SJA1000. ++ ++config XENO_DRIVERS_CAN_CALC_BITTIME_OLD ++ depends on XENO_DRIVERS_CAN ++ bool "Old bit-time calculation algorithm (deprecated)" ++ default n ++ help ++ ++ This option allows to enable the old algorithm to calculate the ++ CAN bit-timing parameters for backward compatibility. ++ ++config XENO_DRIVERS_CAN_VIRT ++ depends on XENO_DRIVERS_CAN ++ tristate "Virtual CAN bus driver" ++ help ++ ++ This driver provides two CAN ports that are virtually interconnected. ++ More ports can be enabled with the module parameter "devices". ++ ++config XENO_DRIVERS_CAN_FLEXCAN ++ depends on XENO_DRIVERS_CAN && OF && !XENO_DRIVERS_CAN_CALC_BITTIME_OLD ++ tristate "Freescale FLEXCAN based chips" ++ help ++ ++ Say Y here if you want to support for Freescale FlexCAN. ++ ++source "drivers/xenomai/can/mscan/Kconfig" ++source "drivers/xenomai/can/sja1000/Kconfig" ++ ++endmenu +--- linux/drivers/xenomai/can/rtcan_raw_filter.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/can/rtcan_raw_filter.c 2021-04-07 16:01:26.435635298 +0800 +@@ -0,0 +1,256 @@ ++/* ++ * Copyright (C) 2005, 2006 Sebastian Smolorz ++ * ++ * ++ * Copyright (C) 2006 Wolfgang Grandegger ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; eitherer version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#include ++#include ++ ++#include ++ ++#include ++#include "rtcan_internal.h" ++#include "rtcan_socket.h" ++#include "rtcan_list.h" ++#include "rtcan_dev.h" ++#include "rtcan_raw.h" ++ ++ ++#if 0 ++void rtcan_raw_print_filter(struct rtcan_device *dev) ++{ ++ int i; ++ struct rtcan_recv *r = dev->receivers; ++ ++ rtdm_printk("%s: recv_list=%p empty_list=%p free_entries=%d\n", ++ dev->name, dev->recv_list, dev->empty_list, dev->free_entries); ++ for (i = 0; i < RTCAN_MAX_RECEIVERS; i++, r++) { ++ rtdm_printk("%2d %p sock=%p next=%p id=%x mask=%x\n", ++ i, r, r->sock, r->next, ++ r->can_filter.can_id, r->can_filter.can_mask); ++ } ++} ++#else ++#define rtcan_raw_print_filter(dev) ++#endif ++ ++ ++static inline void rtcan_raw_mount_filter(can_filter_t *recv_filter, ++ can_filter_t *filter) ++{ ++ if (filter->can_id & CAN_INV_FILTER) { ++ recv_filter->can_id = filter->can_id & ~CAN_INV_FILTER; ++ recv_filter->can_mask = filter->can_mask | CAN_INV_FILTER; ++ } else { ++ recv_filter->can_id = filter->can_id; ++ recv_filter->can_mask = filter->can_mask & ~CAN_INV_FILTER; ++ } ++ ++ /* Apply mask for fast filter check */ ++ recv_filter->can_id &= recv_filter->can_mask; ++} ++ ++ ++int rtcan_raw_check_filter(struct rtcan_socket *sock, int ifindex, ++ struct rtcan_filter_list *flist) ++{ ++ int old_ifindex = 0, old_flistlen_all = 0; ++ int free_entries, i, begin, end; ++ struct rtcan_device *dev; ++ int flistlen; ++ ++ if (rtcan_flist_no_filter(flist)) ++ return 0; ++ ++ /* Check if filter list has been defined by user */ ++ flistlen = (flist) ? flist->flistlen : 1; ++ ++ /* Now we check if a reception list would overflow. This takes some ++ * preparation, so let's go ... */ ++ ++ /* Check current bind status */ ++ if (rtcan_sock_has_filter(sock)) { ++ /* Socket is bound */ ++ i = atomic_read(&sock->ifindex); ++ ++ if (i == 0) ++ /* Socket was bound to ALL interfaces */ ++ old_flistlen_all = sock->flistlen; ++ else /* Socket was bound to only one interface */ ++ old_ifindex = i; ++ } ++ ++ if (ifindex) { ++ /* We bind the socket to only one interface. */ ++ begin = ifindex; ++ end = ifindex; ++ } else { ++ /* Socket must be bound to all interfaces. */ ++ begin = 1; ++ end = RTCAN_MAX_DEVICES; ++ } ++ ++ /* Check if there is space for the new binding */ ++ for (i = begin; i <= end; i++) { ++ if ((dev = rtcan_dev_get_by_index(i)) == NULL) ++ continue; ++ free_entries = dev->free_entries + old_flistlen_all; ++ rtcan_dev_dereference(dev); ++ if (i == old_ifindex) ++ free_entries += sock->flistlen; ++ /* Compare free list space to new filter list length */ ++ if (free_entries < flistlen) ++ return -ENOSPC; ++ } ++ ++ return 0; ++} ++ ++ ++int rtcan_raw_add_filter(struct rtcan_socket *sock, int ifindex) ++{ ++ int i, j, begin, end; ++ struct rtcan_recv *first, *last; ++ struct rtcan_device *dev; ++ /* Check if filter list has been defined by user */ ++ int flistlen; ++ ++ if (rtcan_flist_no_filter(sock->flist)) { ++ return 0; ++ } ++ ++ flistlen = (sock->flist) ? sock->flist->flistlen : 0; ++ ++ if (ifindex) { ++ /* We bind the socket to only one interface. */ ++ begin = ifindex; ++ end = ifindex; ++ } else { ++ /* Socket must be bound to all interfaces. */ ++ begin = 1; ++ end = RTCAN_MAX_DEVICES; ++ } ++ ++ for (i = begin; i <= end; i++) { ++ if ((dev = rtcan_dev_get_by_index(i)) == NULL) ++ continue; ++ ++ /* Take first entry of empty list */ ++ first = last = dev->empty_list; ++ /* Check if filter list is empty */ ++ if (flistlen) { ++ /* Filter list is not empty */ ++ /* Register first filter */ ++ rtcan_raw_mount_filter(&last->can_filter, ++ &sock->flist->flist[0]); ++ last->match_count = 0; ++ last->sock = sock; ++ for (j = 1; j < flistlen; j++) { ++ /* Register remaining filters */ ++ last = last->next; ++ rtcan_raw_mount_filter(&last->can_filter, ++ &sock->flist->flist[j]); ++ last->sock = sock; ++ last->match_count = 0; ++ } ++ /* Decrease free entries counter by length of filter list */ ++ dev->free_entries -= flistlen; ++ ++ } else { ++ /* Filter list is empty. Socket must be bound to all CAN IDs. */ ++ /* Fill list entry members */ ++ last->can_filter.can_id = last->can_filter.can_mask = 0; ++ last->sock = sock; ++ last->match_count = 0; ++ /* Decrease free entries counter by 1 ++ * (one filter for all CAN frames) */ ++ dev->free_entries--; ++ } ++ ++ /* Set new empty list header */ ++ dev->empty_list = last->next; ++ /* Add new partial recv list to the head of reception list */ ++ last->next = dev->recv_list; ++ /* Adjust rececption list pointer */ ++ dev->recv_list = first; ++ ++ rtcan_raw_print_filter(dev); ++ rtcan_dev_dereference(dev); ++ } ++ ++ return (flistlen) ? flistlen : 1; ++} ++ ++ ++void rtcan_raw_remove_filter(struct rtcan_socket *sock) ++{ ++ int i, j, begin, end; ++ struct rtcan_recv *first, *next, *last; ++ int ifindex = atomic_read(&sock->ifindex); ++ struct rtcan_device *dev; ++ ++ if (!rtcan_sock_has_filter(sock)) /* nothing to do */ ++ return; ++ ++ if (ifindex) { ++ /* Socket was bound to one interface only. */ ++ begin = ifindex; ++ end = ifindex; ++ } else { ++ /* Socket was bound to all interfaces */ ++ begin = 1; ++ end = RTCAN_MAX_DEVICES; ++ } ++ ++ for (i = begin; i <= end; i++) { ++ ++ if ((dev = rtcan_dev_get_by_index(i)) == NULL) ++ continue; ++ ++ /* Search for first list entry pointing to this socket */ ++ first = NULL; ++ next = dev->recv_list; ++ while (next->sock != sock) { ++ first = next; ++ next = first->next; ++ } ++ ++ /* Now go to the end of the old filter list */ ++ last = next; ++ for (j = 1; j < sock->flistlen; j++) ++ last = last->next; ++ ++ /* Detach found first list entry from reception list */ ++ if (first) ++ first->next = last->next; ++ else ++ dev->recv_list = last->next; ++ /* Add partial list to the head of empty list */ ++ last->next = dev->empty_list; ++ /* Adjust empty list pointer */ ++ dev->empty_list = next; ++ ++ /* Increase free entries counter by length of old filter list */ ++ dev->free_entries += sock->flistlen; ++ ++ rtcan_raw_print_filter(dev); ++ rtcan_dev_dereference(dev); ++ } ++} +--- linux/drivers/xenomai/can/rtcan_raw_dev.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/can/rtcan_raw_dev.c 2021-04-07 16:01:26.430635305 +0800 +@@ -0,0 +1,455 @@ ++/* ++ * Copyright (C) 2006 Wolfgang Grandegger, ++ * Copyright (C) 2005 Marc Kleine-Budde, Pengutronix ++ * Copyright (C) 2006 Andrey Volkov, Varma Electronics ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the version 2 of the GNU General Public License ++ * as published by the Free Software Foundation ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ++ */ ++ ++#include ++#include ++#include ++ ++#include ++ ++#include ++#include "rtcan_dev.h" ++#include "rtcan_raw.h" ++#include "rtcan_internal.h" ++ ++#ifdef CONFIG_XENO_DRIVERS_CAN_CALC_BITTIME_OLD ++ ++#define RTCAN_MAX_TSEG1 15 ++#define RTCAN_MAX_TSEG2 7 ++ ++/* ++ * Calculate standard bit-time values for odd bitrates. ++ * Most parts of this code is from Arnaud Westenberg ++ */ ++static int rtcan_calc_bit_time(struct rtcan_device *dev, ++ can_baudrate_t rate, ++ struct can_bittime_std *bit_time) ++{ ++ int best_error = 1000000000; ++ int error; ++ int best_tseg=0, best_brp=0, best_rate=0, brp=0; ++ int tseg=0, tseg1=0, tseg2=0; ++ int clock = dev->can_sys_clock; ++ int sjw = 0; ++ int sampl_pt = 90; ++ ++ /* some heuristic specials */ ++ if (rate > ((1000000 + 500000) / 2)) ++ sampl_pt = 75; ++ ++ if (rate < ((12500 + 10000) / 2)) ++ sampl_pt = 75; ++ ++ if (rate < ((100000 + 125000) / 2)) ++ sjw = 1; ++ ++ /* tseg even = round down, odd = round up */ ++ for (tseg = (0 + 0 + 2) * 2; ++ tseg <= (RTCAN_MAX_TSEG2 + RTCAN_MAX_TSEG1 + 2) * 2 + 1; ++ tseg++) { ++ brp = clock / ((1 + tseg / 2) * rate) + tseg % 2; ++ if ((brp == 0) || (brp > 64)) ++ continue; ++ ++ error = rate - clock / (brp * (1 + tseg / 2)); ++ if (error < 0) ++ error = -error; ++ ++ if (error <= best_error) { ++ best_error = error; ++ best_tseg = tseg/2; ++ best_brp = brp - 1; ++ best_rate = clock / (brp * (1 + tseg / 2)); ++ } ++ } ++ ++ if (best_error && (rate / best_error < 10)) { ++ RTCAN_RTDM_DBG("%s: bitrate %d is not possible with %d Hz clock\n", ++ dev->name, rate, clock); ++ return -EDOM; ++ } ++ ++ tseg2 = best_tseg - (sampl_pt * (best_tseg + 1)) / 100; ++ ++ if (tseg2 < 0) ++ tseg2 = 0; ++ ++ if (tseg2 > RTCAN_MAX_TSEG2) ++ tseg2 = RTCAN_MAX_TSEG2; ++ ++ tseg1 = best_tseg - tseg2 - 2; ++ ++ if (tseg1 > RTCAN_MAX_TSEG1) { ++ tseg1 = RTCAN_MAX_TSEG1; ++ tseg2 = best_tseg-tseg1-2; ++ } ++ ++ bit_time->brp = best_brp + 1; ++ bit_time->prop_seg = 0; ++ bit_time->phase_seg1 = tseg1 + 1; ++ bit_time->phase_seg2 = tseg2 + 1; ++ bit_time->sjw = sjw + 1; ++ bit_time->sam = 0; ++ ++ return 0; ++} ++ ++#else /* !CONFIG_XENO_DRIVERS_CAN_CALC_BITTIME_OLD */ ++ ++/* This is the bit-time calculation method from the Linux kernel */ ++ ++#define CAN_CALC_MAX_ERROR 50 /* in one-tenth of a percent */ ++ ++static int can_update_spt(const struct can_bittiming_const *btc, ++ unsigned int sampl_pt, unsigned int tseg, ++ unsigned int *tseg1, unsigned int *tseg2) ++{ ++ *tseg2 = tseg + 1 - (sampl_pt * (tseg + 1)) / 1000; ++ *tseg2 = clamp(*tseg2, btc->tseg2_min, btc->tseg2_max); ++ *tseg1 = tseg - *tseg2; ++ if (*tseg1 > btc->tseg1_max) { ++ *tseg1 = btc->tseg1_max; ++ *tseg2 = tseg - *tseg1; ++ } ++ ++ return 1000 * (tseg + 1 - *tseg2) / (tseg + 1); ++} ++ ++static int rtcan_calc_bit_time(struct rtcan_device *dev, ++ can_baudrate_t bitrate, ++ struct can_bittime_std *bt) ++{ ++ const struct can_bittiming_const *btc = dev->bittiming_const; ++ long rate; /* current bitrate */ ++ long rate_error;/* difference between current and target value */ ++ long best_rate_error = 1000000000; ++ int spt; /* current sample point in thousandth */ ++ int spt_error; /* difference between current and target value */ ++ int best_spt_error = 1000; ++ int sampl_pt; /* target sample point */ ++ int best_tseg = 0, best_brp = 0; /* current best values for tseg and brp */ ++ unsigned int brp, tsegall, tseg, tseg1, tseg2; ++ u64 v64; ++ ++ if (!dev->bittiming_const) ++ return -ENOTSUPP; ++ ++ /* Use CIA recommended sample points */ ++ if (bitrate > 800000) ++ sampl_pt = 750; ++ else if (bitrate > 500000) ++ sampl_pt = 800; ++ else ++ sampl_pt = 875; ++ ++ /* tseg even = round down, odd = round up */ ++ for (tseg = (btc->tseg1_max + btc->tseg2_max) * 2 + 1; ++ tseg >= (btc->tseg1_min + btc->tseg2_min) * 2; tseg--) { ++ tsegall = 1 + tseg / 2; ++ ++ /* Compute all possible tseg choices (tseg=tseg1+tseg2) */ ++ brp = dev->can_sys_clock / (tsegall * bitrate) + tseg % 2; ++ ++ /* chose brp step which is possible in system */ ++ brp = (brp / btc->brp_inc) * btc->brp_inc; ++ if ((brp < btc->brp_min) || (brp > btc->brp_max)) ++ continue; ++ ++ rate = dev->can_sys_clock / (brp * tsegall); ++ rate_error = abs((long)(bitrate - rate)); ++ ++ /* tseg brp biterror */ ++ if (rate_error > best_rate_error) ++ continue; ++ ++ /* reset sample point error if we have a better bitrate */ ++ if (rate_error < best_rate_error) ++ best_spt_error = 1000; ++ ++ spt = can_update_spt(btc, sampl_pt, tseg / 2, &tseg1, &tseg2); ++ spt_error = abs((long)(sampl_pt - spt)); ++ if (spt_error > best_spt_error) ++ continue; ++ ++ best_spt_error = spt_error; ++ best_rate_error = rate_error; ++ best_tseg = tseg / 2; ++ best_brp = brp; ++ ++ if (rate_error == 0 && spt_error == 0) ++ break; ++ } ++ ++ if (best_rate_error) { ++ /* Error in one-tenth of a percent */ ++ rate_error = (best_rate_error * 1000) / bitrate; ++ if (rate_error > CAN_CALC_MAX_ERROR) { ++ rtcandev_err(dev, ++ "bitrate error %ld.%ld%% too high\n", ++ rate_error / 10, rate_error % 10); ++ return -EDOM; ++ } else { ++ rtcandev_warn(dev, "bitrate error %ld.%ld%%\n", ++ rate_error / 10, rate_error % 10); ++ } ++ } ++ ++ /* real sample point */ ++ sampl_pt = can_update_spt(btc, sampl_pt, best_tseg, &tseg1, &tseg2); ++ ++ v64 = (u64)best_brp * 1000000000UL; ++ do_div(v64, dev->can_sys_clock); ++ bt->prop_seg = tseg1 / 2; ++ bt->phase_seg1 = tseg1 - bt->prop_seg; ++ bt->phase_seg2 = tseg2; ++ bt->sjw = 1; ++ bt->sam = 0; ++ bt->brp = best_brp; ++ ++ /* real bit-rate */ ++ rate = dev->can_sys_clock / (bt->brp * (tseg1 + tseg2 + 1)); ++ ++ rtcandev_dbg(dev, "real bitrate %ld, sampling point %d.%d%%\n", ++ rate, sampl_pt/10, sampl_pt%10); ++ ++ return 0; ++} ++ ++#endif /* CONFIG_XENO_DRIVERS_CAN_CALC_BITTIME_OLD */ ++ ++static inline int rtcan_raw_ioctl_dev_get(struct rtcan_device *dev, ++ int request, struct can_ifreq *ifr) ++{ ++ rtdm_lockctx_t lock_ctx; ++ ++ switch (request) { ++ ++ case SIOCGIFINDEX: ++ ifr->ifr_ifindex = dev->ifindex; ++ break; ++ ++ case SIOCGCANSTATE: ++ rtdm_lock_get_irqsave(&dev->device_lock, lock_ctx); ++ if (dev->do_get_state) ++ dev->state = dev->do_get_state(dev); ++ ifr->ifr_ifru.state = dev->state; ++ rtdm_lock_put_irqrestore(&dev->device_lock, lock_ctx); ++ break; ++ ++ case SIOCGCANCTRLMODE: ++ ifr->ifr_ifru.ctrlmode = dev->ctrl_mode; ++ break; ++ ++ case SIOCGCANBAUDRATE: ++ ifr->ifr_ifru.baudrate = dev->baudrate; ++ break; ++ ++ case SIOCGCANCUSTOMBITTIME: ++ ifr->ifr_ifru.bittime = dev->bit_time; ++ break; ++ } ++ ++ return 0; ++} ++ ++static inline int rtcan_raw_ioctl_dev_set(struct rtcan_device *dev, ++ int request, struct can_ifreq *ifr) ++{ ++ rtdm_lockctx_t lock_ctx; ++ int ret = 0, started = 0; ++ struct can_bittime bit_time, *bt; ++ ++ switch (request) { ++ case SIOCSCANBAUDRATE: ++ if (!dev->do_set_bit_time) ++ return 0; ++ ret = rtcan_calc_bit_time(dev, ifr->ifr_ifru.baudrate, &bit_time.std); ++ if (ret) ++ break; ++ bit_time.type = CAN_BITTIME_STD; ++ break; ++ } ++ ++ rtdm_lock_get_irqsave(&dev->device_lock, lock_ctx); ++ ++ if (dev->do_get_state) ++ dev->state = dev->do_get_state(dev); ++ ++ switch (request) { ++ case SIOCSCANCTRLMODE: ++ case SIOCSCANBAUDRATE: ++ case SIOCSCANCUSTOMBITTIME: ++ if ((started = CAN_STATE_OPERATING(dev->state))) { ++ if ((ret = dev->do_set_mode(dev, CAN_MODE_STOP, &lock_ctx))) ++ goto out; ++ } ++ break; ++ } ++ ++ switch (request) { ++ case SIOCSCANMODE: ++ if (dev->do_set_mode && ++ !(ifr->ifr_ifru.mode == CAN_MODE_START && ++ CAN_STATE_OPERATING(dev->state))) ++ ret = dev->do_set_mode(dev, ifr->ifr_ifru.mode, &lock_ctx); ++ break; ++ ++ case SIOCSCANCTRLMODE: ++ dev->ctrl_mode = ifr->ifr_ifru.ctrlmode; ++ break; ++ ++ case SIOCSCANBAUDRATE: ++ ret = dev->do_set_bit_time(dev, &bit_time, &lock_ctx); ++ if (!ret) { ++ dev->baudrate = ifr->ifr_ifru.baudrate; ++ dev->bit_time = bit_time; ++ } ++ break; ++ ++ case SIOCSCANCUSTOMBITTIME: ++ bt = &ifr->ifr_ifru.bittime; ++ ret = dev->do_set_bit_time(dev, bt, &lock_ctx); ++ if (!ret) { ++ dev->bit_time = *bt; ++ if (bt->type == CAN_BITTIME_STD && bt->std.brp) ++ dev->baudrate = (dev->can_sys_clock / ++ (bt->std.brp * (1 + bt->std.prop_seg + ++ bt->std.phase_seg1 + ++ bt->std.phase_seg2))); ++ else ++ dev->baudrate = CAN_BAUDRATE_UNKNOWN; ++ } ++ break; ++ ++ default: ++ ret = -EOPNOTSUPP; ++ break; ++ } ++ ++ out: ++ if (started) ++ dev->do_set_mode(dev, CAN_MODE_START, &lock_ctx); ++ ++ rtdm_lock_put_irqrestore(&dev->device_lock, lock_ctx); ++ ++ return ret; ++} ++ ++int rtcan_raw_ioctl_dev(struct rtdm_fd *fd, int request, void *arg) ++{ ++ struct can_ifreq *ifr; ++ int ret = 0, get = 0; ++ union { ++ /* ++ * We need to deal with callers still passing struct ifreq ++ * instead of can_ifreq, which might have a larger memory ++ * footprint (but can't be smaller though). Field offsets ++ * will be the same regardless. ++ */ ++ struct ifreq ifr_legacy; ++ struct can_ifreq ifr_can; ++ } ifr_buf; ++ struct rtcan_device *dev; ++ ++ switch (request) { ++ ++ case SIOCGIFINDEX: ++ case SIOCGCANSTATE: ++ case SIOCGCANBAUDRATE: ++ case SIOCGCANCUSTOMBITTIME: ++ get = 1; ++ /* Falldown wanted. */ ++ case SIOCSCANMODE: ++ case SIOCSCANCTRLMODE: ++ case SIOCSCANBAUDRATE: ++ case SIOCSCANCUSTOMBITTIME: ++ ++ if (rtdm_fd_is_user(fd)) { ++ /* Copy struct can_ifreq from userspace */ ++ if (!rtdm_read_user_ok(fd, arg, ++ sizeof(struct can_ifreq)) || ++ rtdm_copy_from_user(fd, &ifr_buf, arg, ++ sizeof(struct can_ifreq))) ++ return -EFAULT; ++ ++ ifr = &ifr_buf.ifr_can; ++ } else ++ ifr = (struct can_ifreq *)arg; ++ ++ /* Get interface index and data */ ++ dev = rtcan_dev_get_by_name(ifr->ifr_name); ++ if (dev == NULL) ++ return -ENODEV; ++ ++ if (get) { ++ ret = rtcan_raw_ioctl_dev_get(dev, request, ifr); ++ rtcan_dev_dereference(dev); ++ if (ret == 0 && rtdm_fd_is_user(fd)) { ++ /* ++ * Since we yet tested if user memory is rw safe, ++ * we can copy to user space directly. ++ */ ++ if (rtdm_copy_to_user(fd, arg, ifr, ++ sizeof(struct can_ifreq))) ++ return -EFAULT; ++ } ++ } else { ++ ret = rtcan_raw_ioctl_dev_set(dev, request, ifr); ++ rtcan_dev_dereference(dev); ++ } ++ break; ++ ++ default: ++ ret = -EOPNOTSUPP; ++ break; ++ ++ } ++ ++ return ret; ++} ++ ++#ifdef CONFIG_XENO_DRIVERS_CAN_BUS_ERR ++void __rtcan_raw_enable_bus_err(struct rtcan_socket *sock) ++{ ++ int i, begin, end; ++ struct rtcan_device *dev; ++ rtdm_lockctx_t lock_ctx; ++ int ifindex = atomic_read(&sock->ifindex); ++ ++ if (ifindex) { ++ begin = ifindex; ++ end = ifindex; ++ } else { ++ begin = 1; ++ end = RTCAN_MAX_DEVICES; ++ } ++ ++ for (i = begin; i <= end; i++) { ++ if ((dev = rtcan_dev_get_by_index(i)) == NULL) ++ continue; ++ ++ if (dev->do_enable_bus_err) { ++ rtdm_lock_get_irqsave(&dev->device_lock, lock_ctx); ++ dev->do_enable_bus_err(dev); ++ rtdm_lock_put_irqrestore(&dev->device_lock, lock_ctx); ++ } ++ rtcan_dev_dereference(dev); ++ } ++} ++#endif /* CONFIG_XENO_DRIVERS_CAN_BUS_ERR*/ +--- linux/drivers/xenomai/can/rtcan_flexcan.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/can/rtcan_flexcan.c 2021-04-07 16:01:26.425635312 +0800 +@@ -0,0 +1,1536 @@ ++/* ++ * RTDM-based FLEXCAN CAN controller driver ++ * ++ * Rebased on linux 4.14.58 flexcan driver: ++ * Copyright (c) 2018 Philippe Gerum ++ * ++ * Original port to RTDM: ++ * Copyright (c) 2012 Wolfgang Grandegger ++ * ++ * Copyright (c) 2005-2006 Varma Electronics Oy ++ * Copyright (c) 2009 Sascha Hauer, Pengutronix ++ * Copyright (c) 2010-2017 Pengutronix, Marc Kleine-Budde ++ * Copyright (c) 2014 David Jander, Protonic Holland ++ * ++ * Based on code originally by Andrey Volkov ++ * ++ * LICENCE: ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation version 2. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "rtcan_dev.h" ++#include "rtcan_raw.h" ++#include "rtcan_internal.h" ++#include ++ ++#define DRV_NAME "flexcan" ++#define DEV_NAME "rtcan%d" ++ ++#define CAN_MAX_DLC 8 ++#define get_can_dlc(i) (min_t(__u8, (i), CAN_MAX_DLC)) ++ ++/* 8 for RX fifo and 2 error handling */ ++#define FLEXCAN_NAPI_WEIGHT (8 + 2) ++ ++/* FLEXCAN module configuration register (CANMCR) bits */ ++#define FLEXCAN_MCR_MDIS BIT(31) ++#define FLEXCAN_MCR_FRZ BIT(30) ++#define FLEXCAN_MCR_FEN BIT(29) ++#define FLEXCAN_MCR_HALT BIT(28) ++#define FLEXCAN_MCR_NOT_RDY BIT(27) ++#define FLEXCAN_MCR_WAK_MSK BIT(26) ++#define FLEXCAN_MCR_SOFTRST BIT(25) ++#define FLEXCAN_MCR_FRZ_ACK BIT(24) ++#define FLEXCAN_MCR_SUPV BIT(23) ++#define FLEXCAN_MCR_SLF_WAK BIT(22) ++#define FLEXCAN_MCR_WRN_EN BIT(21) ++#define FLEXCAN_MCR_LPM_ACK BIT(20) ++#define FLEXCAN_MCR_WAK_SRC BIT(19) ++#define FLEXCAN_MCR_DOZE BIT(18) ++#define FLEXCAN_MCR_SRX_DIS BIT(17) ++#define FLEXCAN_MCR_IRMQ BIT(16) ++#define FLEXCAN_MCR_LPRIO_EN BIT(13) ++#define FLEXCAN_MCR_AEN BIT(12) ++/* MCR_MAXMB: maximum used MBs is MAXMB + 1 */ ++#define FLEXCAN_MCR_MAXMB(x) ((x) & 0x7f) ++#define FLEXCAN_MCR_IDAM_A (0x0 << 8) ++#define FLEXCAN_MCR_IDAM_B (0x1 << 8) ++#define FLEXCAN_MCR_IDAM_C (0x2 << 8) ++#define FLEXCAN_MCR_IDAM_D (0x3 << 8) ++ ++/* FLEXCAN control register (CANCTRL) bits */ ++#define FLEXCAN_CTRL_PRESDIV(x) (((x) & 0xff) << 24) ++#define FLEXCAN_CTRL_RJW(x) (((x) & 0x03) << 22) ++#define FLEXCAN_CTRL_PSEG1(x) (((x) & 0x07) << 19) ++#define FLEXCAN_CTRL_PSEG2(x) (((x) & 0x07) << 16) ++#define FLEXCAN_CTRL_BOFF_MSK BIT(15) ++#define FLEXCAN_CTRL_ERR_MSK BIT(14) ++#define FLEXCAN_CTRL_CLK_SRC BIT(13) ++#define FLEXCAN_CTRL_LPB BIT(12) ++#define FLEXCAN_CTRL_TWRN_MSK BIT(11) ++#define FLEXCAN_CTRL_RWRN_MSK BIT(10) ++#define FLEXCAN_CTRL_SMP BIT(7) ++#define FLEXCAN_CTRL_BOFF_REC BIT(6) ++#define FLEXCAN_CTRL_TSYN BIT(5) ++#define FLEXCAN_CTRL_LBUF BIT(4) ++#define FLEXCAN_CTRL_LOM BIT(3) ++#define FLEXCAN_CTRL_PROPSEG(x) ((x) & 0x07) ++#define FLEXCAN_CTRL_ERR_BUS (FLEXCAN_CTRL_ERR_MSK) ++#define FLEXCAN_CTRL_ERR_STATE \ ++ (FLEXCAN_CTRL_TWRN_MSK | FLEXCAN_CTRL_RWRN_MSK | \ ++ FLEXCAN_CTRL_BOFF_MSK) ++#define FLEXCAN_CTRL_ERR_ALL \ ++ (FLEXCAN_CTRL_ERR_BUS | FLEXCAN_CTRL_ERR_STATE) ++ ++/* FLEXCAN control register 2 (CTRL2) bits */ ++#define FLEXCAN_CTRL2_ECRWRE BIT(29) ++#define FLEXCAN_CTRL2_WRMFRZ BIT(28) ++#define FLEXCAN_CTRL2_RFFN(x) (((x) & 0x0f) << 24) ++#define FLEXCAN_CTRL2_TASD(x) (((x) & 0x1f) << 19) ++#define FLEXCAN_CTRL2_MRP BIT(18) ++#define FLEXCAN_CTRL2_RRS BIT(17) ++#define FLEXCAN_CTRL2_EACEN BIT(16) ++ ++/* FLEXCAN memory error control register (MECR) bits */ ++#define FLEXCAN_MECR_ECRWRDIS BIT(31) ++#define FLEXCAN_MECR_HANCEI_MSK BIT(19) ++#define FLEXCAN_MECR_FANCEI_MSK BIT(18) ++#define FLEXCAN_MECR_CEI_MSK BIT(16) ++#define FLEXCAN_MECR_HAERRIE BIT(15) ++#define FLEXCAN_MECR_FAERRIE BIT(14) ++#define FLEXCAN_MECR_EXTERRIE BIT(13) ++#define FLEXCAN_MECR_RERRDIS BIT(9) ++#define FLEXCAN_MECR_ECCDIS BIT(8) ++#define FLEXCAN_MECR_NCEFAFRZ BIT(7) ++ ++/* FLEXCAN error and status register (ESR) bits */ ++#define FLEXCAN_ESR_TWRN_INT BIT(17) ++#define FLEXCAN_ESR_RWRN_INT BIT(16) ++#define FLEXCAN_ESR_BIT1_ERR BIT(15) ++#define FLEXCAN_ESR_BIT0_ERR BIT(14) ++#define FLEXCAN_ESR_ACK_ERR BIT(13) ++#define FLEXCAN_ESR_CRC_ERR BIT(12) ++#define FLEXCAN_ESR_FRM_ERR BIT(11) ++#define FLEXCAN_ESR_STF_ERR BIT(10) ++#define FLEXCAN_ESR_TX_WRN BIT(9) ++#define FLEXCAN_ESR_RX_WRN BIT(8) ++#define FLEXCAN_ESR_IDLE BIT(7) ++#define FLEXCAN_ESR_TXRX BIT(6) ++#define FLEXCAN_EST_FLT_CONF_SHIFT (4) ++#define FLEXCAN_ESR_FLT_CONF_MASK (0x3 << FLEXCAN_EST_FLT_CONF_SHIFT) ++#define FLEXCAN_ESR_FLT_CONF_ACTIVE (0x0 << FLEXCAN_EST_FLT_CONF_SHIFT) ++#define FLEXCAN_ESR_FLT_CONF_PASSIVE (0x1 << FLEXCAN_EST_FLT_CONF_SHIFT) ++#define FLEXCAN_ESR_BOFF_INT BIT(2) ++#define FLEXCAN_ESR_ERR_INT BIT(1) ++#define FLEXCAN_ESR_WAK_INT BIT(0) ++#define FLEXCAN_ESR_ERR_BUS \ ++ (FLEXCAN_ESR_BIT1_ERR | FLEXCAN_ESR_BIT0_ERR | \ ++ FLEXCAN_ESR_ACK_ERR | FLEXCAN_ESR_CRC_ERR | \ ++ FLEXCAN_ESR_FRM_ERR | FLEXCAN_ESR_STF_ERR) ++#define FLEXCAN_ESR_ERR_STATE \ ++ (FLEXCAN_ESR_TWRN_INT | FLEXCAN_ESR_RWRN_INT | FLEXCAN_ESR_BOFF_INT) ++#define FLEXCAN_ESR_ERR_ALL \ ++ (FLEXCAN_ESR_ERR_BUS | FLEXCAN_ESR_ERR_STATE) ++#define FLEXCAN_ESR_ALL_INT \ ++ (FLEXCAN_ESR_TWRN_INT | FLEXCAN_ESR_RWRN_INT | \ ++ FLEXCAN_ESR_BOFF_INT | FLEXCAN_ESR_ERR_INT) ++ ++/* FLEXCAN interrupt flag register (IFLAG) bits */ ++/* Errata ERR005829 step7: Reserve first valid MB */ ++#define FLEXCAN_TX_MB_RESERVED_OFF_FIFO 8 ++#define FLEXCAN_TX_MB_OFF_FIFO 9 ++#define FLEXCAN_TX_MB_RESERVED_OFF_TIMESTAMP 0 ++#define FLEXCAN_TX_MB_OFF_TIMESTAMP 1 ++#define FLEXCAN_RX_MB_OFF_TIMESTAMP_FIRST (FLEXCAN_TX_MB_OFF_TIMESTAMP + 1) ++#define FLEXCAN_RX_MB_OFF_TIMESTAMP_LAST 63 ++#define FLEXCAN_RX_MB_TIMESTAMP_COUNT (FLEXCAN_RX_MB_OFF_TIMESTAMP_LAST - \ ++ FLEXCAN_RX_MB_OFF_TIMESTAMP_FIRST + 1) ++#define FLEXCAN_IFLAG_MB(x) BIT(x) ++#define FLEXCAN_IFLAG_RX_FIFO_OVERFLOW BIT(7) ++#define FLEXCAN_IFLAG_RX_FIFO_WARN BIT(6) ++#define FLEXCAN_IFLAG_RX_FIFO_AVAILABLE BIT(5) ++ ++/* FLEXCAN message buffers */ ++#define FLEXCAN_MB_CODE_MASK (0xf << 24) ++#define FLEXCAN_MB_CODE_RX_BUSY_BIT (0x1 << 24) ++#define FLEXCAN_MB_CODE_RX_INACTIVE (0x0 << 24) ++#define FLEXCAN_MB_CODE_RX_EMPTY (0x4 << 24) ++#define FLEXCAN_MB_CODE_RX_FULL (0x2 << 24) ++#define FLEXCAN_MB_CODE_RX_OVERRUN (0x6 << 24) ++#define FLEXCAN_MB_CODE_RX_RANSWER (0xa << 24) ++ ++#define FLEXCAN_MB_CODE_TX_INACTIVE (0x8 << 24) ++#define FLEXCAN_MB_CODE_TX_ABORT (0x9 << 24) ++#define FLEXCAN_MB_CODE_TX_DATA (0xc << 24) ++#define FLEXCAN_MB_CODE_TX_TANSWER (0xe << 24) ++ ++#define FLEXCAN_MB_CNT_SRR BIT(22) ++#define FLEXCAN_MB_CNT_IDE BIT(21) ++#define FLEXCAN_MB_CNT_RTR BIT(20) ++#define FLEXCAN_MB_CNT_LENGTH(x) (((x) & 0xf) << 16) ++#define FLEXCAN_MB_CNT_TIMESTAMP(x) ((x) & 0xffff) ++ ++#define FLEXCAN_TIMEOUT_US (50) ++ ++/* FLEXCAN hardware feature flags ++ * ++ * Below is some version info we got: ++ * SOC Version IP-Version Glitch- [TR]WRN_INT IRQ Err Memory err RTR re- ++ * Filter? connected? Passive detection ception in MB ++ * MX25 FlexCAN2 03.00.00.00 no no ? no no ++ * MX28 FlexCAN2 03.00.04.00 yes yes no no no ++ * MX35 FlexCAN2 03.00.00.00 no no ? no no ++ * MX53 FlexCAN2 03.00.00.00 yes no no no no ++ * MX6s FlexCAN3 10.00.12.00 yes yes no no yes ++ * VF610 FlexCAN3 ? no yes no yes yes? ++ * ++ * Some SOCs do not have the RX_WARN & TX_WARN interrupt line connected. ++ */ ++#define FLEXCAN_QUIRK_BROKEN_WERR_STATE BIT(1) /* [TR]WRN_INT not connected */ ++#define FLEXCAN_QUIRK_DISABLE_RXFG BIT(2) /* Disable RX FIFO Global mask */ ++#define FLEXCAN_QUIRK_ENABLE_EACEN_RRS BIT(3) /* Enable EACEN and RRS bit in ctrl2 */ ++#define FLEXCAN_QUIRK_DISABLE_MECR BIT(4) /* Disable Memory error detection */ ++#define FLEXCAN_QUIRK_USE_OFF_TIMESTAMP BIT(5) /* Use timestamp based offloading */ ++#define FLEXCAN_QUIRK_BROKEN_PERR_STATE BIT(6) /* No interrupt for error passive */ ++ ++/* Structure of the message buffer */ ++struct flexcan_mb { ++ u32 can_ctrl; ++ u32 can_id; ++ u32 data[2]; ++}; ++ ++/* Structure of the hardware registers */ ++struct flexcan_regs { ++ u32 mcr; /* 0x00 */ ++ u32 ctrl; /* 0x04 */ ++ u32 timer; /* 0x08 */ ++ u32 _reserved1; /* 0x0c */ ++ u32 rxgmask; /* 0x10 */ ++ u32 rx14mask; /* 0x14 */ ++ u32 rx15mask; /* 0x18 */ ++ u32 ecr; /* 0x1c */ ++ u32 esr; /* 0x20 */ ++ u32 imask2; /* 0x24 */ ++ u32 imask1; /* 0x28 */ ++ u32 iflag2; /* 0x2c */ ++ u32 iflag1; /* 0x30 */ ++ union { /* 0x34 */ ++ u32 gfwr_mx28; /* MX28, MX53 */ ++ u32 ctrl2; /* MX6, VF610 */ ++ }; ++ u32 esr2; /* 0x38 */ ++ u32 imeur; /* 0x3c */ ++ u32 lrfr; /* 0x40 */ ++ u32 crcr; /* 0x44 */ ++ u32 rxfgmask; /* 0x48 */ ++ u32 rxfir; /* 0x4c */ ++ u32 _reserved3[12]; /* 0x50 */ ++ struct flexcan_mb mb[64]; /* 0x80 */ ++ /* FIFO-mode: ++ * MB ++ * 0x080...0x08f 0 RX message buffer ++ * 0x090...0x0df 1-5 reserverd ++ * 0x0e0...0x0ff 6-7 8 entry ID table ++ * (mx25, mx28, mx35, mx53) ++ * 0x0e0...0x2df 6-7..37 8..128 entry ID table ++ * size conf'ed via ctrl2::RFFN ++ * (mx6, vf610) ++ */ ++ u32 _reserved4[256]; /* 0x480 */ ++ u32 rximr[64]; /* 0x880 */ ++ u32 _reserved5[24]; /* 0x980 */ ++ u32 gfwr_mx6; /* 0x9e0 - MX6 */ ++ u32 _reserved6[63]; /* 0x9e4 */ ++ u32 mecr; /* 0xae0 */ ++ u32 erriar; /* 0xae4 */ ++ u32 erridpr; /* 0xae8 */ ++ u32 errippr; /* 0xaec */ ++ u32 rerrar; /* 0xaf0 */ ++ u32 rerrdr; /* 0xaf4 */ ++ u32 rerrsynr; /* 0xaf8 */ ++ u32 errsr; /* 0xafc */ ++}; ++ ++struct flexcan_devtype_data { ++ u32 quirks; /* quirks needed for different IP cores */ ++}; ++ ++struct flexcan_timestamped_frame { ++ struct rtcan_skb skb; ++ u32 timestamp; ++ struct list_head next; ++}; ++ ++struct flexcan_priv { ++ unsigned int irq; ++ unsigned int mb_first; ++ unsigned int mb_last; ++ struct can_bittime bittiming; ++ struct flexcan_timestamped_frame *ts_frames; ++ ++ struct flexcan_regs __iomem *regs; ++ struct flexcan_mb __iomem *tx_mb; ++ struct flexcan_mb __iomem *tx_mb_reserved; ++ u8 tx_mb_idx; ++ u32 reg_ctrl_default; ++ u32 reg_imask1_default; ++ u32 reg_imask2_default; ++ ++ struct clk *clk_ipg; ++ struct clk *clk_per; ++ const struct flexcan_devtype_data *devtype_data; ++ struct regulator *reg_xceiver; ++ ++ unsigned long bus_errors; ++}; ++ ++static const struct flexcan_devtype_data fsl_p1010_devtype_data = { ++ .quirks = FLEXCAN_QUIRK_BROKEN_WERR_STATE | ++ FLEXCAN_QUIRK_BROKEN_PERR_STATE, ++}; ++ ++static const struct flexcan_devtype_data fsl_imx28_devtype_data = { ++ .quirks = FLEXCAN_QUIRK_BROKEN_PERR_STATE, ++}; ++ ++static const struct flexcan_devtype_data fsl_imx6q_devtype_data = { ++ .quirks = FLEXCAN_QUIRK_DISABLE_RXFG | FLEXCAN_QUIRK_ENABLE_EACEN_RRS | ++ FLEXCAN_QUIRK_USE_OFF_TIMESTAMP | FLEXCAN_QUIRK_BROKEN_PERR_STATE, ++}; ++ ++static const struct flexcan_devtype_data fsl_vf610_devtype_data = { ++ .quirks = FLEXCAN_QUIRK_DISABLE_RXFG | FLEXCAN_QUIRK_ENABLE_EACEN_RRS | ++ FLEXCAN_QUIRK_DISABLE_MECR | FLEXCAN_QUIRK_USE_OFF_TIMESTAMP | ++ FLEXCAN_QUIRK_BROKEN_PERR_STATE, ++}; ++ ++static const struct can_bittiming_const flexcan_bittiming_const = { ++ .name = DRV_NAME, ++ .tseg1_min = 4, ++ .tseg1_max = 16, ++ .tseg2_min = 2, ++ .tseg2_max = 8, ++ .sjw_max = 4, ++ .brp_min = 1, ++ .brp_max = 256, ++ .brp_inc = 1, ++}; ++ ++/* Abstract off the read/write for arm versus ppc. This ++ * assumes that PPC uses big-endian registers and everything ++ * else uses little-endian registers, independent of CPU ++ * endianness. ++ */ ++#if defined(CONFIG_PPC) ++static inline u32 flexcan_read(void __iomem *addr) ++{ ++ return in_be32(addr); ++} ++ ++static inline void flexcan_write(u32 val, void __iomem *addr) ++{ ++ out_be32(addr, val); ++} ++#else ++static inline u32 flexcan_read(void __iomem *addr) ++{ ++ return readl(addr); ++} ++ ++static inline void flexcan_write(u32 val, void __iomem *addr) ++{ ++ writel(val, addr); ++} ++#endif ++ ++static inline void flexcan_error_irq_enable(const struct flexcan_priv *priv) ++{ ++ struct flexcan_regs __iomem *regs = priv->regs; ++ u32 reg_ctrl = (priv->reg_ctrl_default | FLEXCAN_CTRL_ERR_MSK); ++ ++ flexcan_write(reg_ctrl, ®s->ctrl); ++} ++ ++static inline void flexcan_error_irq_disable(const struct flexcan_priv *priv) ++{ ++ struct flexcan_regs __iomem *regs = priv->regs; ++ u32 reg_ctrl = (priv->reg_ctrl_default & ~FLEXCAN_CTRL_ERR_MSK); ++ ++ flexcan_write(reg_ctrl, ®s->ctrl); ++} ++ ++static inline int flexcan_transceiver_enable(const struct flexcan_priv *priv) ++{ ++ if (!priv->reg_xceiver) ++ return 0; ++ ++ return regulator_enable(priv->reg_xceiver); ++} ++ ++static inline int flexcan_transceiver_disable(const struct flexcan_priv *priv) ++{ ++ if (!priv->reg_xceiver) ++ return 0; ++ ++ return regulator_disable(priv->reg_xceiver); ++} ++ ++static int flexcan_chip_enable(struct flexcan_priv *priv) ++{ ++ struct flexcan_regs __iomem *regs = priv->regs; ++ unsigned int timeout = FLEXCAN_TIMEOUT_US / 10; ++ u32 reg; ++ ++ reg = flexcan_read(®s->mcr); ++ reg &= ~FLEXCAN_MCR_MDIS; ++ flexcan_write(reg, ®s->mcr); ++ ++ while (timeout-- && (flexcan_read(®s->mcr) & FLEXCAN_MCR_LPM_ACK)) ++ udelay(10); ++ ++ if (flexcan_read(®s->mcr) & FLEXCAN_MCR_LPM_ACK) ++ return -ETIMEDOUT; ++ ++ return 0; ++} ++ ++static int flexcan_chip_disable(struct flexcan_priv *priv) ++{ ++ struct flexcan_regs __iomem *regs = priv->regs; ++ unsigned int timeout = FLEXCAN_TIMEOUT_US / 10; ++ u32 reg; ++ ++ reg = flexcan_read(®s->mcr); ++ reg |= FLEXCAN_MCR_MDIS; ++ flexcan_write(reg, ®s->mcr); ++ ++ while (timeout-- && !(flexcan_read(®s->mcr) & FLEXCAN_MCR_LPM_ACK)) ++ udelay(10); ++ ++ if (!(flexcan_read(®s->mcr) & FLEXCAN_MCR_LPM_ACK)) ++ return -ETIMEDOUT; ++ ++ return 0; ++} ++ ++static int flexcan_chip_freeze(struct rtcan_device *dev) ++{ ++ struct flexcan_priv *priv = rtcan_priv(dev); ++ struct flexcan_regs __iomem *regs = priv->regs; ++ unsigned int timeout = 1000 * 1000 * 10 / dev->baudrate; ++ u32 reg; ++ ++ reg = flexcan_read(®s->mcr); ++ reg |= FLEXCAN_MCR_HALT; ++ flexcan_write(reg, ®s->mcr); ++ ++ while (timeout-- && !(flexcan_read(®s->mcr) & FLEXCAN_MCR_FRZ_ACK)) ++ udelay(100); ++ ++ if (!(flexcan_read(®s->mcr) & FLEXCAN_MCR_FRZ_ACK)) ++ return -ETIMEDOUT; ++ ++ return 0; ++} ++ ++static int flexcan_chip_unfreeze(struct flexcan_priv *priv) ++{ ++ struct flexcan_regs __iomem *regs = priv->regs; ++ unsigned int timeout = FLEXCAN_TIMEOUT_US / 10; ++ u32 reg; ++ ++ reg = flexcan_read(®s->mcr); ++ reg &= ~FLEXCAN_MCR_HALT; ++ flexcan_write(reg, ®s->mcr); ++ ++ while (timeout-- && (flexcan_read(®s->mcr) & FLEXCAN_MCR_FRZ_ACK)) ++ udelay(10); ++ ++ if (flexcan_read(®s->mcr) & FLEXCAN_MCR_FRZ_ACK) ++ return -ETIMEDOUT; ++ ++ return 0; ++} ++ ++static int flexcan_chip_softreset(struct flexcan_priv *priv) ++{ ++ struct flexcan_regs __iomem *regs = priv->regs; ++ unsigned int timeout = FLEXCAN_TIMEOUT_US / 10; ++ ++ flexcan_write(FLEXCAN_MCR_SOFTRST, ®s->mcr); ++ while (timeout-- && (flexcan_read(®s->mcr) & FLEXCAN_MCR_SOFTRST)) ++ udelay(10); ++ ++ if (flexcan_read(®s->mcr) & FLEXCAN_MCR_SOFTRST) ++ return -ETIMEDOUT; ++ ++ return 0; ++} ++ ++static int flexcan_start_xmit(struct rtcan_device *dev, struct can_frame *cf) ++{ ++ const struct flexcan_priv *priv = rtcan_priv(dev); ++ u32 can_id, data, ctrl; ++ ++ ctrl = FLEXCAN_MB_CODE_TX_DATA | (cf->can_dlc << 16); ++ if (cf->can_id & CAN_EFF_FLAG) { ++ can_id = cf->can_id & CAN_EFF_MASK; ++ ctrl |= FLEXCAN_MB_CNT_IDE | FLEXCAN_MB_CNT_SRR; ++ } else { ++ can_id = (cf->can_id & CAN_SFF_MASK) << 18; ++ } ++ ++ if (cf->can_id & CAN_RTR_FLAG) ++ ctrl |= FLEXCAN_MB_CNT_RTR; ++ ++ if (cf->can_dlc > CAN_MAX_DLC) ++ cf->can_dlc = CAN_MAX_DLC; ++ ++ if (cf->can_dlc > 0) { ++ data = be32_to_cpup((__be32 *)&cf->data[0]); ++ flexcan_write(data, &priv->tx_mb->data[0]); ++ } ++ if (cf->can_dlc > 4) { ++ data = be32_to_cpup((__be32 *)&cf->data[4]); ++ flexcan_write(data, &priv->tx_mb->data[1]); ++ } ++ ++ flexcan_write(can_id, &priv->tx_mb->can_id); ++ flexcan_write(ctrl, &priv->tx_mb->can_ctrl); ++ ++ /* Errata ERR005829 step8: ++ * Write twice INACTIVE(0x8) code to first MB. ++ */ ++ flexcan_write(FLEXCAN_MB_CODE_TX_INACTIVE, ++ &priv->tx_mb_reserved->can_ctrl); ++ flexcan_write(FLEXCAN_MB_CODE_TX_INACTIVE, ++ &priv->tx_mb_reserved->can_ctrl); ++ ++ return 0; ++} ++ ++static void init_err_skb(struct rtcan_skb *skb) ++{ ++ struct rtcan_rb_frame *cf = &skb->rb_frame; ++ ++ skb->rb_frame_size = EMPTY_RB_FRAME_SIZE + CAN_ERR_DLC; ++ cf->can_id = CAN_ERR_FLAG; ++ cf->can_dlc = CAN_ERR_DLC; ++ memset(&cf->data[0], 0, cf->can_dlc); ++} ++ ++static void flexcan_irq_bus_err(struct rtcan_device *dev, ++ u32 reg_esr, struct rtcan_skb *skb) ++{ ++ struct flexcan_priv *priv = rtcan_priv(dev); ++ struct rtcan_rb_frame *cf = &skb->rb_frame; ++ ++ init_err_skb(skb); ++ ++ cf->can_id |= CAN_ERR_PROT | CAN_ERR_BUSERROR; ++ ++ if (reg_esr & FLEXCAN_ESR_BIT1_ERR) { ++ rtcandev_dbg(dev, "BIT1_ERR irq\n"); ++ cf->data[2] |= CAN_ERR_PROT_BIT1; ++ } ++ if (reg_esr & FLEXCAN_ESR_BIT0_ERR) { ++ rtcandev_dbg(dev, "BIT0_ERR irq\n"); ++ cf->data[2] |= CAN_ERR_PROT_BIT0; ++ } ++ if (reg_esr & FLEXCAN_ESR_ACK_ERR) { ++ rtcandev_dbg(dev, "ACK_ERR irq\n"); ++ cf->can_id |= CAN_ERR_ACK; ++ cf->data[3] = CAN_ERR_PROT_LOC_ACK; ++ } ++ if (reg_esr & FLEXCAN_ESR_CRC_ERR) { ++ rtcandev_dbg(dev, "CRC_ERR irq\n"); ++ cf->data[2] |= CAN_ERR_PROT_BIT; ++ cf->data[3] = CAN_ERR_PROT_LOC_CRC_SEQ; ++ } ++ if (reg_esr & FLEXCAN_ESR_FRM_ERR) { ++ rtcandev_dbg(dev, "FRM_ERR irq\n"); ++ cf->data[2] |= CAN_ERR_PROT_FORM; ++ } ++ if (reg_esr & FLEXCAN_ESR_STF_ERR) { ++ rtcandev_dbg(dev, "STF_ERR irq\n"); ++ cf->data[2] |= CAN_ERR_PROT_STUFF; ++ } ++ ++ priv->bus_errors++; ++} ++ ++struct berr_counter { ++ u16 txerr; ++ u16 rxerr; ++}; ++ ++static void flexcan_change_state(struct rtcan_device *dev, ++ struct rtcan_rb_frame *cf, ++ struct berr_counter *bec, ++ can_state_t new_state) ++{ ++ switch (dev->state) { ++ case CAN_STATE_ERROR_ACTIVE: ++ /* ++ * from: ERROR_ACTIVE ++ * to : ERROR_WARNING, ERROR_PASSIVE, BUS_OFF ++ * => : there was a warning int ++ */ ++ if (new_state >= CAN_STATE_ERROR_WARNING && ++ new_state <= CAN_STATE_BUS_OFF) { ++ rtcandev_dbg(dev, "Error Warning IRQ\n"); ++ ++ cf->can_id |= CAN_ERR_CRTL; ++ cf->data[1] = (bec->txerr > bec->rxerr) ? ++ CAN_ERR_CRTL_TX_WARNING : ++ CAN_ERR_CRTL_RX_WARNING; ++ } ++ case CAN_STATE_ERROR_WARNING: /* fallthrough */ ++ /* ++ * from: ERROR_ACTIVE, ERROR_WARNING ++ * to : ERROR_PASSIVE, BUS_OFF ++ * => : error passive int ++ */ ++ if (new_state >= CAN_STATE_ERROR_PASSIVE && ++ new_state <= CAN_STATE_BUS_OFF) { ++ rtcandev_dbg(dev, "Error Passive IRQ\n"); ++ ++ cf->can_id |= CAN_ERR_CRTL; ++ cf->data[1] = (bec->txerr > bec->rxerr) ? ++ CAN_ERR_CRTL_TX_PASSIVE : ++ CAN_ERR_CRTL_RX_PASSIVE; ++ } ++ break; ++ case CAN_STATE_BUS_OFF: ++ rtcandev_err(dev, "BUG! " ++ "hardware recovered automatically from BUS_OFF\n"); ++ break; ++ default: ++ break; ++ } ++ ++ /* process state changes depending on the new state */ ++ switch (new_state) { ++ case CAN_STATE_ERROR_ACTIVE: ++ rtcandev_dbg(dev, "Error Active\n"); ++ cf->can_id |= CAN_ERR_PROT; ++ cf->data[2] = CAN_ERR_PROT_ACTIVE; ++ break; ++ case CAN_STATE_BUS_OFF: ++ cf->can_id |= CAN_ERR_BUSOFF; ++ /* Wake up waiting senders */ ++ rtdm_sem_destroy(&dev->tx_sem); ++ break; ++ default: ++ break; ++ } ++ ++ dev->state = new_state; ++} ++ ++static bool flexcan_irq_state(struct rtcan_device *dev, u32 reg_esr, ++ struct rtcan_skb *skb) ++{ ++ struct flexcan_priv *priv = rtcan_priv(dev); ++ struct flexcan_regs __iomem *regs = priv->regs; ++ enum CAN_STATE new_state, rx_state, tx_state; ++ struct rtcan_rb_frame *cf = &skb->rb_frame; ++ struct berr_counter bec; ++ u32 reg; ++ int flt; ++ ++ reg = flexcan_read(®s->ecr); ++ bec.txerr = (reg >> 0) & 0xff; ++ bec.rxerr = (reg >> 8) & 0xff; ++ ++ flt = reg_esr & FLEXCAN_ESR_FLT_CONF_MASK; ++ if (likely(flt == FLEXCAN_ESR_FLT_CONF_ACTIVE)) { ++ tx_state = unlikely(reg_esr & FLEXCAN_ESR_TX_WRN) ? ++ CAN_STATE_ERROR_WARNING : CAN_STATE_ERROR_ACTIVE; ++ rx_state = unlikely(reg_esr & FLEXCAN_ESR_RX_WRN) ? ++ CAN_STATE_ERROR_WARNING : CAN_STATE_ERROR_ACTIVE; ++ new_state = max(tx_state, rx_state); ++ } else ++ new_state = flt == FLEXCAN_ESR_FLT_CONF_PASSIVE ? ++ CAN_STATE_ERROR_PASSIVE : CAN_STATE_BUS_OFF; ++ ++ /* state hasn't changed */ ++ if (likely(new_state == dev->state)) ++ return false; ++ ++ init_err_skb(skb); ++ ++ flexcan_change_state(dev, cf, &bec, new_state); ++ ++ return true; ++} ++ ++static unsigned int flexcan_mailbox_read(struct rtcan_device *dev, ++ struct rtcan_skb *skb, ++ u32 *timestamp, unsigned int n) ++{ ++ struct flexcan_priv *priv = rtcan_priv(dev); ++ struct flexcan_regs __iomem *regs = priv->regs; ++ struct flexcan_mb __iomem *mb = ®s->mb[n]; ++ u32 reg_ctrl, reg_id, reg_iflag1, code; ++ struct rtcan_rb_frame *cf = &skb->rb_frame; ++ ++ if (priv->devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP) { ++ do { ++ reg_ctrl = flexcan_read(&mb->can_ctrl); ++ } while (reg_ctrl & FLEXCAN_MB_CODE_RX_BUSY_BIT); ++ ++ /* is this MB empty? */ ++ code = reg_ctrl & FLEXCAN_MB_CODE_MASK; ++ if ((code != FLEXCAN_MB_CODE_RX_FULL) && ++ (code != FLEXCAN_MB_CODE_RX_OVERRUN)) ++ return 0; ++ } else { ++ reg_iflag1 = flexcan_read(®s->iflag1); ++ if (!(reg_iflag1 & FLEXCAN_IFLAG_RX_FIFO_AVAILABLE)) ++ return 0; ++ ++ reg_ctrl = flexcan_read(&mb->can_ctrl); ++ } ++ ++ /* increase timstamp to full 32 bit */ ++ *timestamp = reg_ctrl << 16; ++ ++ cf->can_dlc = get_can_dlc((reg_ctrl >> 16) & 0xf); ++ reg_id = flexcan_read(&mb->can_id); ++ if (reg_ctrl & FLEXCAN_MB_CNT_IDE) ++ cf->can_id = ((reg_id >> 0) & CAN_EFF_MASK) | CAN_EFF_FLAG; ++ else ++ cf->can_id = (reg_id >> 18) & CAN_SFF_MASK; ++ ++ skb->rb_frame_size = EMPTY_RB_FRAME_SIZE; ++ ++ if (reg_ctrl & FLEXCAN_MB_CNT_RTR) ++ cf->can_id |= CAN_RTR_FLAG; ++ else ++ skb->rb_frame_size += cf->can_dlc; ++ ++ put_unaligned_be32(flexcan_read(&mb->data[0]), cf->data + 0); ++ put_unaligned_be32(flexcan_read(&mb->data[1]), cf->data + 4); ++ ++ cf->can_ifindex = dev->ifindex; ++ ++ /* mark as read */ ++ if (priv->devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP) { ++ /* Clear IRQ */ ++ if (n < 32) ++ flexcan_write(BIT(n), ®s->iflag1); ++ else ++ flexcan_write(BIT(n - 32), ®s->iflag2); ++ } else { ++ flexcan_write(FLEXCAN_IFLAG_RX_FIFO_AVAILABLE, ®s->iflag1); ++ flexcan_read(®s->timer); ++ } ++ ++ return 1; ++} ++ ++static inline bool flexcan_rx_le(struct flexcan_priv *priv, unsigned int a, unsigned int b) ++{ ++ if (priv->mb_first < priv->mb_last) ++ return a <= b; ++ ++ return a >= b; ++} ++ ++static inline unsigned int flexcan_rx_inc(struct flexcan_priv *priv, unsigned int *val) ++{ ++ if (priv->mb_first < priv->mb_last) ++ return (*val)++; ++ ++ return (*val)--; ++} ++ ++static int flexcan_mailbox_read_timestamp(struct rtcan_device *dev, u64 pending) ++{ ++ struct flexcan_timestamped_frame *new, *pos, *tmp; ++ struct flexcan_priv *priv = rtcan_priv(dev); ++ struct list_head q, *head; ++ int i, count = 0; ++ ++ INIT_LIST_HEAD(&q); ++ ++ for (i = priv->mb_first; ++ flexcan_rx_le(priv, i, priv->mb_last); ++ flexcan_rx_inc(priv, &i)) { ++ if (!(pending & BIT_ULL(i))) ++ continue; ++ ++ new = priv->ts_frames + (i - priv->mb_first); ++ if (!flexcan_mailbox_read(dev, &new->skb, &new->timestamp, i)) ++ break; ++ ++ head = &q; ++ if (list_empty(&q)) ++ goto add; ++ ++ list_for_each_entry_reverse(pos, &q, next) { ++ /* ++ * Substract two u32 and return result as int, ++ * to keep difference steady around the u32 ++ * overflow. ++ */ ++ if (((int)(new->timestamp - pos->timestamp)) >= 0) { ++ head = &pos->next; ++ break; ++ } ++ } ++ add: ++ list_add(&new->next, head); ++ count++; ++ } ++ ++ if (list_empty(&q)) ++ return 0; ++ ++ list_for_each_entry_safe(pos, tmp, &q, next) ++ rtcan_rcv(dev, &pos->skb); ++ ++ return count; ++} ++ ++static void flexcan_mailbox_read_fifo(struct rtcan_device *dev) ++{ ++ struct rtcan_skb skb; ++ u32 timestamp; ++ ++ for (;;) { ++ if (!flexcan_mailbox_read(dev, &skb, ×tamp, 0)) ++ break; ++ rtcan_rcv(dev, &skb); ++ } ++} ++ ++static inline u64 flexcan_read_reg_iflag_rx(struct flexcan_priv *priv) ++{ ++ struct flexcan_regs __iomem *regs = priv->regs; ++ u32 iflag1, iflag2; ++ ++ iflag2 = flexcan_read(®s->iflag2) & priv->reg_imask2_default; ++ iflag1 = flexcan_read(®s->iflag1) & priv->reg_imask1_default & ++ ~FLEXCAN_IFLAG_MB(priv->tx_mb_idx); ++ ++ return (u64)iflag2 << 32 | iflag1; ++} ++ ++static int flexcan_do_rx(struct rtcan_device *dev, u32 reg_iflag1) ++{ ++ struct flexcan_priv *priv = rtcan_priv(dev); ++ struct flexcan_regs __iomem *regs = priv->regs; ++ struct rtcan_skb skb; ++ struct rtcan_rb_frame *cf = &skb.rb_frame; ++ bool input = false; ++ u64 reg; ++ int ret; ++ ++ if (priv->devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP) { ++ while ((reg = flexcan_read_reg_iflag_rx(priv))) { ++ input = true; ++ ret = flexcan_mailbox_read_timestamp(dev, reg); ++ if (!ret) ++ break; ++ } ++ } else { ++ if (reg_iflag1 & FLEXCAN_IFLAG_RX_FIFO_OVERFLOW) { ++ flexcan_write(FLEXCAN_IFLAG_RX_FIFO_OVERFLOW, ®s->iflag1); ++ init_err_skb(&skb); ++ cf->can_id |= CAN_ERR_CRTL; ++ cf->data[1] = CAN_ERR_CRTL_RX_OVERFLOW; ++ input = true; ++ } else if (reg_iflag1 & FLEXCAN_IFLAG_RX_FIFO_AVAILABLE) { ++ flexcan_mailbox_read_fifo(dev); ++ input = true; ++ } ++ } ++ ++ return input; ++} ++ ++static int flexcan_irq(rtdm_irq_t *irq_handle) ++{ ++ struct rtcan_device *dev = rtdm_irq_get_arg(irq_handle, void); ++ struct flexcan_priv *priv = rtcan_priv(dev); ++ struct flexcan_regs __iomem *regs = priv->regs; ++ u32 reg_iflag1, reg_esr; ++ struct rtcan_skb skb; ++ int handled; ++ ++ rtdm_lock_get(&dev->device_lock); ++ rtdm_lock_get(&rtcan_recv_list_lock); ++ rtdm_lock_get(&rtcan_socket_lock); ++ ++ reg_iflag1 = flexcan_read(®s->iflag1); ++ ++ /* reception interrupt */ ++ if (flexcan_do_rx(dev, reg_iflag1)) ++ handled = RTDM_IRQ_HANDLED; ++ ++ /* transmission complete interrupt */ ++ if (reg_iflag1 & FLEXCAN_IFLAG_MB(priv->tx_mb_idx)) { ++ /* after sending a RTR frame MB is in RX mode */ ++ flexcan_write(FLEXCAN_MB_CODE_TX_INACTIVE, ++ &priv->tx_mb->can_ctrl); ++ flexcan_write(FLEXCAN_IFLAG_MB(priv->tx_mb_idx), ®s->iflag1); ++ rtdm_sem_up(&dev->tx_sem); ++ if (rtcan_loopback_pending(dev)) ++ rtcan_loopback(dev); ++ handled = RTDM_IRQ_HANDLED; ++ } ++ ++ reg_esr = flexcan_read(®s->esr); ++ ++ /* ACK all bus error and state change IRQ sources */ ++ if (reg_esr & FLEXCAN_ESR_ALL_INT) { ++ flexcan_write(reg_esr & FLEXCAN_ESR_ALL_INT, ®s->esr); ++ handled = RTDM_IRQ_HANDLED; ++ } ++ ++ /* state change interrupt or broken error state quirk fix is enabled */ ++ if (reg_esr & FLEXCAN_ESR_ERR_STATE) ++ handled = RTDM_IRQ_HANDLED; ++ else if (priv->devtype_data->quirks & (FLEXCAN_QUIRK_BROKEN_WERR_STATE | ++ FLEXCAN_QUIRK_BROKEN_PERR_STATE)) ++ goto esr_err; ++ ++ if (reg_esr & FLEXCAN_ESR_ERR_STATE) { ++ esr_err: ++ if (flexcan_irq_state(dev, reg_esr, &skb)) { ++ rtcan_rcv(dev, &skb); ++ } ++ } ++ ++ /* bus error IRQ - report unconditionally */ ++ if (reg_esr & FLEXCAN_ESR_ERR_BUS) { ++ flexcan_irq_bus_err(dev, reg_esr, &skb); ++ rtcan_rcv(dev, &skb); ++ handled = RTDM_IRQ_HANDLED; ++ } ++ ++ rtdm_lock_put(&rtcan_socket_lock); ++ rtdm_lock_put(&rtcan_recv_list_lock); ++ rtdm_lock_put(&dev->device_lock); ++ ++ return handled; ++} ++ ++static void flexcan_set_bittiming(struct rtcan_device *dev) ++{ ++ const struct flexcan_priv *priv = rtcan_priv(dev); ++ const struct can_bittime *bt = &priv->bittiming; ++ struct flexcan_regs __iomem *regs = priv->regs; ++ u32 reg; ++ ++ reg = flexcan_read(®s->ctrl); ++ reg &= ~(FLEXCAN_CTRL_PRESDIV(0xff) | ++ FLEXCAN_CTRL_RJW(0x3) | ++ FLEXCAN_CTRL_PSEG1(0x7) | ++ FLEXCAN_CTRL_PSEG2(0x7) | ++ FLEXCAN_CTRL_PROPSEG(0x7) | ++ FLEXCAN_CTRL_LPB | ++ FLEXCAN_CTRL_SMP | ++ FLEXCAN_CTRL_LOM); ++ ++ reg |= FLEXCAN_CTRL_PRESDIV(bt->std.brp - 1) | ++ FLEXCAN_CTRL_PSEG1(bt->std.phase_seg1 - 1) | ++ FLEXCAN_CTRL_PSEG2(bt->std.phase_seg2 - 1) | ++ FLEXCAN_CTRL_RJW(bt->std.sjw - 1) | ++ FLEXCAN_CTRL_PROPSEG(bt->std.prop_seg - 1); ++ ++ if (dev->ctrl_mode & CAN_CTRLMODE_LOOPBACK) ++ reg |= FLEXCAN_CTRL_LPB; ++ if (dev->ctrl_mode & CAN_CTRLMODE_LISTENONLY) ++ reg |= FLEXCAN_CTRL_LOM; ++ if (dev->ctrl_mode & CAN_CTRLMODE_3_SAMPLES) ++ reg |= FLEXCAN_CTRL_SMP; ++ ++ rtcandev_dbg(dev, "writing ctrl=0x%08x\n", reg); ++ flexcan_write(reg, ®s->ctrl); ++ ++ /* print chip status */ ++ rtcandev_dbg(dev, "%s: mcr=0x%08x ctrl=0x%08x\n", __func__, ++ flexcan_read(®s->mcr), flexcan_read(®s->ctrl)); ++} ++ ++/* flexcan_chip_start ++ * ++ * this functions is entered with clocks enabled ++ * ++ */ ++static int flexcan_chip_start(struct rtcan_device *dev) ++{ ++ struct flexcan_priv *priv = rtcan_priv(dev); ++ struct flexcan_regs __iomem *regs = priv->regs; ++ u32 reg_mcr, reg_ctrl, reg_ctrl2, reg_mecr; ++ int err, i; ++ ++ err = clk_prepare_enable(priv->clk_ipg); ++ if (err) ++ return err; ++ ++ err = clk_prepare_enable(priv->clk_per); ++ if (err) ++ goto out_disable_ipg; ++ ++ /* enable module */ ++ err = flexcan_chip_enable(priv); ++ if (err) ++ goto out_disable_per; ++ ++ /* soft reset */ ++ err = flexcan_chip_softreset(priv); ++ if (err) ++ goto out_chip_disable; ++ ++ flexcan_set_bittiming(dev); ++ ++ /* MCR ++ * ++ * enable freeze ++ * enable fifo ++ * halt now ++ * only supervisor access ++ * enable warning int ++ * disable local echo ++ * enable individual RX masking ++ * choose format C ++ * set max mailbox number ++ */ ++ reg_mcr = flexcan_read(®s->mcr); ++ reg_mcr &= ~FLEXCAN_MCR_MAXMB(0xff); ++ reg_mcr |= FLEXCAN_MCR_FRZ | FLEXCAN_MCR_HALT | FLEXCAN_MCR_SUPV | ++ FLEXCAN_MCR_WRN_EN | FLEXCAN_MCR_SRX_DIS | FLEXCAN_MCR_IRMQ | ++ FLEXCAN_MCR_IDAM_C; ++ ++ if (priv->devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP) { ++ reg_mcr &= ~FLEXCAN_MCR_FEN; ++ reg_mcr |= FLEXCAN_MCR_MAXMB(priv->mb_last); ++ } else { ++ reg_mcr |= FLEXCAN_MCR_FEN | ++ FLEXCAN_MCR_MAXMB(priv->tx_mb_idx); ++ } ++ rtcandev_dbg(dev, "%s: writing mcr=0x%08x", __func__, reg_mcr); ++ flexcan_write(reg_mcr, ®s->mcr); ++ ++ /* CTRL ++ * ++ * disable timer sync feature ++ * ++ * disable auto busoff recovery ++ * transmit lowest buffer first ++ * ++ * enable tx and rx warning interrupt ++ * enable bus off interrupt ++ * (== FLEXCAN_CTRL_ERR_STATE) ++ */ ++ reg_ctrl = flexcan_read(®s->ctrl); ++ reg_ctrl &= ~FLEXCAN_CTRL_TSYN; ++ reg_ctrl |= FLEXCAN_CTRL_BOFF_REC | FLEXCAN_CTRL_LBUF | ++ FLEXCAN_CTRL_ERR_STATE; ++ ++ /* enable the "error interrupt" (FLEXCAN_CTRL_ERR_MSK), ++ * on most Flexcan cores, too. Otherwise we don't get ++ * any error warning or passive interrupts. ++ */ ++ if (priv->devtype_data->quirks & FLEXCAN_QUIRK_BROKEN_WERR_STATE) ++ reg_ctrl |= FLEXCAN_CTRL_ERR_MSK; ++ else ++ reg_ctrl &= ~FLEXCAN_CTRL_ERR_MSK; ++ ++ /* save for later use */ ++ priv->reg_ctrl_default = reg_ctrl; ++ /* leave interrupts disabled for now */ ++ reg_ctrl &= ~FLEXCAN_CTRL_ERR_ALL; ++ rtcandev_dbg(dev, "%s: writing ctrl=0x%08x", __func__, reg_ctrl); ++ flexcan_write(reg_ctrl, ®s->ctrl); ++ ++ if ((priv->devtype_data->quirks & FLEXCAN_QUIRK_ENABLE_EACEN_RRS)) { ++ reg_ctrl2 = flexcan_read(®s->ctrl2); ++ reg_ctrl2 |= FLEXCAN_CTRL2_EACEN | FLEXCAN_CTRL2_RRS; ++ flexcan_write(reg_ctrl2, ®s->ctrl2); ++ } ++ ++ /* clear and invalidate all mailboxes first */ ++ for (i = priv->tx_mb_idx; i < ARRAY_SIZE(regs->mb); i++) { ++ flexcan_write(FLEXCAN_MB_CODE_RX_INACTIVE, ++ ®s->mb[i].can_ctrl); ++ } ++ ++ if (priv->devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP) { ++ for (i = priv->mb_first; i <= priv->mb_last; i++) ++ flexcan_write(FLEXCAN_MB_CODE_RX_EMPTY, ++ ®s->mb[i].can_ctrl); ++ } ++ ++ /* Errata ERR005829: mark first TX mailbox as INACTIVE */ ++ flexcan_write(FLEXCAN_MB_CODE_TX_INACTIVE, ++ &priv->tx_mb_reserved->can_ctrl); ++ ++ /* mark TX mailbox as INACTIVE */ ++ flexcan_write(FLEXCAN_MB_CODE_TX_INACTIVE, ++ &priv->tx_mb->can_ctrl); ++ ++ /* acceptance mask/acceptance code (accept everything) */ ++ flexcan_write(0x0, ®s->rxgmask); ++ flexcan_write(0x0, ®s->rx14mask); ++ flexcan_write(0x0, ®s->rx15mask); ++ ++ if (priv->devtype_data->quirks & FLEXCAN_QUIRK_DISABLE_RXFG) ++ flexcan_write(0x0, ®s->rxfgmask); ++ ++ /* clear acceptance filters */ ++ for (i = 0; i < ARRAY_SIZE(regs->mb); i++) ++ flexcan_write(0, ®s->rximr[i]); ++ ++ /* On Vybrid, disable memory error detection interrupts ++ * and freeze mode. ++ * This also works around errata e5295 which generates ++ * false positive memory errors and put the device in ++ * freeze mode. ++ */ ++ if (priv->devtype_data->quirks & FLEXCAN_QUIRK_DISABLE_MECR) { ++ /* Follow the protocol as described in "Detection ++ * and Correction of Memory Errors" to write to ++ * MECR register ++ */ ++ reg_ctrl2 = flexcan_read(®s->ctrl2); ++ reg_ctrl2 |= FLEXCAN_CTRL2_ECRWRE; ++ flexcan_write(reg_ctrl2, ®s->ctrl2); ++ ++ reg_mecr = flexcan_read(®s->mecr); ++ reg_mecr &= ~FLEXCAN_MECR_ECRWRDIS; ++ flexcan_write(reg_mecr, ®s->mecr); ++ reg_mecr &= ~(FLEXCAN_MECR_NCEFAFRZ | FLEXCAN_MECR_HANCEI_MSK | ++ FLEXCAN_MECR_FANCEI_MSK); ++ flexcan_write(reg_mecr, ®s->mecr); ++ } ++ ++ err = flexcan_transceiver_enable(priv); ++ if (err) ++ goto out_chip_disable; ++ ++ /* synchronize with the can bus */ ++ err = flexcan_chip_unfreeze(priv); ++ if (err) ++ goto out_transceiver_disable; ++ ++ dev->state = CAN_STATE_ERROR_ACTIVE; ++ ++ /* enable interrupts atomically */ ++ rtdm_irq_disable(&dev->irq_handle); ++ flexcan_write(priv->reg_ctrl_default, ®s->ctrl); ++ flexcan_write(priv->reg_imask1_default, ®s->imask1); ++ flexcan_write(priv->reg_imask2_default, ®s->imask2); ++ rtdm_irq_enable(&dev->irq_handle); ++ ++ /* print chip status */ ++ rtcandev_dbg(dev, "%s: reading mcr=0x%08x ctrl=0x%08x\n", __func__, ++ flexcan_read(®s->mcr), flexcan_read(®s->ctrl)); ++ ++ return 0; ++ ++ out_transceiver_disable: ++ flexcan_transceiver_disable(priv); ++ out_chip_disable: ++ flexcan_chip_disable(priv); ++ out_disable_per: ++ clk_disable_unprepare(priv->clk_per); ++ out_disable_ipg: ++ clk_disable_unprepare(priv->clk_ipg); ++ ++ return err; ++} ++ ++/* flexcan_chip_stop ++ * ++ * this functions is entered with clocks enabled ++ */ ++static void flexcan_chip_stop(struct rtcan_device *dev) ++{ ++ struct flexcan_priv *priv = rtcan_priv(dev); ++ struct flexcan_regs __iomem *regs = priv->regs; ++ ++ /* freeze + disable module */ ++ flexcan_chip_freeze(dev); ++ flexcan_chip_disable(priv); ++ ++ /* Disable all interrupts */ ++ flexcan_write(0, ®s->imask2); ++ flexcan_write(0, ®s->imask1); ++ flexcan_write(priv->reg_ctrl_default & ~FLEXCAN_CTRL_ERR_ALL, ++ ®s->ctrl); ++ ++ flexcan_transceiver_disable(priv); ++ ++ clk_disable_unprepare(priv->clk_per); ++ clk_disable_unprepare(priv->clk_ipg); ++} ++ ++static int flexcan_mode_start(struct rtcan_device *dev, ++ rtdm_lockctx_t *lock_ctx) ++{ ++ struct flexcan_priv *priv = rtcan_priv(dev); ++ int err = 0; ++ ++ rtdm_lock_put_irqrestore(&dev->device_lock, *lock_ctx); ++ ++ switch (dev->state) { ++ ++ case CAN_STATE_ACTIVE: ++ case CAN_STATE_BUS_WARNING: ++ case CAN_STATE_BUS_PASSIVE: ++ break; ++ ++ case CAN_STATE_STOPPED: ++ /* Register IRQ handler and pass device structure as arg */ ++ err = rtdm_irq_request(&dev->irq_handle, priv->irq, ++ flexcan_irq, 0, DRV_NAME, ++ dev); ++ if (err) { ++ rtcandev_err(dev, "couldn't request irq %d\n", ++ priv->irq); ++ goto out; ++ } ++ ++ /* Set up sender "mutex" */ ++ rtdm_sem_init(&dev->tx_sem, 1); ++ ++ /* start chip and queuing */ ++ err = flexcan_chip_start(dev); ++ if (err) { ++ rtdm_irq_free(&dev->irq_handle); ++ rtdm_sem_destroy(&dev->tx_sem); ++ goto out; ++ } ++ break; ++ ++ case CAN_STATE_BUS_OFF: ++ /* Set up sender "mutex" */ ++ rtdm_sem_init(&dev->tx_sem, 1); ++ /* start chip and queuing */ ++ err = flexcan_chip_start(dev); ++ if (err) { ++ rtdm_sem_destroy(&dev->tx_sem); ++ goto out; ++ } ++ break; ++ ++ case CAN_STATE_SLEEPING: ++ default: ++ err = 0; ++ break; ++ } ++ ++out: ++ rtdm_lock_get_irqsave(&dev->device_lock, *lock_ctx); ++ ++ return err; ++} ++ ++static int flexcan_mode_stop(struct rtcan_device *dev, ++ rtdm_lockctx_t *lock_ctx) ++{ ++ if (!CAN_STATE_OPERATING(dev->state)) ++ return 0; ++ ++ dev->state = CAN_STATE_STOPPED; ++ ++ rtdm_lock_put_irqrestore(&dev->device_lock, *lock_ctx); ++ ++ flexcan_chip_stop(dev); ++ rtdm_irq_free(&dev->irq_handle); ++ rtdm_sem_destroy(&dev->tx_sem); ++ ++ rtdm_lock_get_irqsave(&dev->device_lock, *lock_ctx); ++ ++ return 0; ++} ++ ++static int flexcan_set_mode(struct rtcan_device *dev, can_mode_t mode, ++ rtdm_lockctx_t *lock_ctx) ++{ ++ if (mode == CAN_MODE_START) ++ return flexcan_mode_start(dev, lock_ctx); ++ ++ if (mode == CAN_MODE_STOP) ++ return flexcan_mode_stop(dev, lock_ctx); ++ ++ return -EOPNOTSUPP; ++} ++ ++static int flexcan_copy_bittiming(struct rtcan_device *dev, ++ struct can_bittime *bt, ++ rtdm_lockctx_t *lock_ctx) ++{ ++ struct flexcan_priv *priv = rtcan_priv(dev); ++ ++ memcpy(&priv->bittiming, bt, sizeof(*bt)); ++ ++ return 0; ++} ++ ++static int register_flexcandev(struct rtcan_device *dev) ++{ ++ struct flexcan_priv *priv = rtcan_priv(dev); ++ struct flexcan_regs __iomem *regs = priv->regs; ++ u32 reg, err; ++ ++ err = clk_prepare_enable(priv->clk_ipg); ++ if (err) ++ return err; ++ ++ err = clk_prepare_enable(priv->clk_per); ++ if (err) ++ goto out_disable_ipg; ++ ++ /* select "bus clock", chip must be disabled */ ++ err = flexcan_chip_disable(priv); ++ if (err) ++ goto out_disable_per; ++ reg = flexcan_read(®s->ctrl); ++ reg |= FLEXCAN_CTRL_CLK_SRC; ++ flexcan_write(reg, ®s->ctrl); ++ ++ err = flexcan_chip_enable(priv); ++ if (err) ++ goto out_chip_disable; ++ ++ /* set freeze, halt and activate FIFO, restrict register access */ ++ reg = flexcan_read(®s->mcr); ++ reg |= FLEXCAN_MCR_FRZ | FLEXCAN_MCR_HALT | ++ FLEXCAN_MCR_FEN | FLEXCAN_MCR_SUPV; ++ flexcan_write(reg, ®s->mcr); ++ ++ /* Currently we only support newer versions of this core ++ * featuring a RX hardware FIFO (although this driver doesn't ++ * make use of it on some cores). Older cores, found on some ++ * Coldfire derivates are not tested. ++ */ ++ reg = flexcan_read(®s->mcr); ++ if (!(reg & FLEXCAN_MCR_FEN)) { ++ rtcandev_err(dev, "Could not enable RX FIFO, unsupported core\n"); ++ err = -ENODEV; ++ goto out_chip_disable; ++ } ++ ++ err = rtcan_dev_register(dev); ++ ++ /* disable core and turn off clocks */ ++ out_chip_disable: ++ flexcan_chip_disable(priv); ++ out_disable_per: ++ clk_disable_unprepare(priv->clk_per); ++ out_disable_ipg: ++ clk_disable_unprepare(priv->clk_ipg); ++ ++ return err; ++} ++ ++static void unregister_flexcandev(struct rtcan_device *dev) ++{ ++ struct flexcan_priv *priv = rtcan_priv(dev); ++ ++ rtcan_dev_unregister(dev); ++ if (priv->ts_frames) ++ kfree(priv->ts_frames); ++} ++ ++static const struct of_device_id flexcan_of_match[] = { ++ { .compatible = "fsl,imx6q-flexcan", .data = &fsl_imx6q_devtype_data, }, ++ { .compatible = "fsl,imx28-flexcan", .data = &fsl_imx28_devtype_data, }, ++ { .compatible = "fsl,p1010-flexcan", .data = &fsl_p1010_devtype_data, }, ++ { .compatible = "fsl,vf610-flexcan", .data = &fsl_vf610_devtype_data, }, ++ { /* sentinel */ }, ++}; ++MODULE_DEVICE_TABLE(of, flexcan_of_match); ++ ++static const struct platform_device_id flexcan_id_table[] = { ++ { .name = "flexcan", .driver_data = (kernel_ulong_t)&fsl_p1010_devtype_data, }, ++ { /* sentinel */ }, ++}; ++MODULE_DEVICE_TABLE(platform, flexcan_id_table); ++ ++static int flexcan_probe(struct platform_device *pdev) ++{ ++ const struct of_device_id *of_id; ++ const struct flexcan_devtype_data *devtype_data; ++ struct rtcan_device *dev; ++ struct flexcan_priv *priv; ++ struct regulator *reg_xceiver; ++ struct resource *mem; ++ struct clk *clk_ipg = NULL, *clk_per = NULL; ++ struct flexcan_regs __iomem *regs; ++ int err, irq; ++ u32 clock_freq = 0; ++ ++ reg_xceiver = devm_regulator_get(&pdev->dev, "xceiver"); ++ if (PTR_ERR(reg_xceiver) == -EPROBE_DEFER) ++ return -EPROBE_DEFER; ++ else if (IS_ERR(reg_xceiver)) ++ reg_xceiver = NULL; ++ ++ if (pdev->dev.of_node) ++ of_property_read_u32(pdev->dev.of_node, ++ "clock-frequency", &clock_freq); ++ ++ if (!clock_freq) { ++ clk_ipg = devm_clk_get(&pdev->dev, "ipg"); ++ if (IS_ERR(clk_ipg)) { ++ dev_err(&pdev->dev, "no ipg clock defined\n"); ++ return PTR_ERR(clk_ipg); ++ } ++ ++ clk_per = devm_clk_get(&pdev->dev, "per"); ++ if (IS_ERR(clk_per)) { ++ dev_err(&pdev->dev, "no per clock defined\n"); ++ return PTR_ERR(clk_per); ++ } ++ clock_freq = clk_get_rate(clk_per); ++ } ++ ++ mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); ++ irq = platform_get_irq(pdev, 0); ++ if (irq <= 0) ++ return -ENODEV; ++ ++ regs = devm_ioremap_resource(&pdev->dev, mem); ++ if (IS_ERR(regs)) ++ return PTR_ERR(regs); ++ ++ of_id = of_match_device(flexcan_of_match, &pdev->dev); ++ if (of_id) { ++ devtype_data = of_id->data; ++ } else if (platform_get_device_id(pdev)->driver_data) { ++ devtype_data = (struct flexcan_devtype_data *) ++ platform_get_device_id(pdev)->driver_data; ++ } else { ++ return -ENODEV; ++ } ++ ++ dev = rtcan_dev_alloc(sizeof(struct flexcan_priv), 0); ++ if (!dev) ++ return -ENOMEM; ++ ++ platform_set_drvdata(pdev, dev); ++ ++ priv = rtcan_priv(dev); ++ priv->regs = regs; ++ priv->irq = irq; ++ priv->clk_ipg = clk_ipg; ++ priv->clk_per = clk_per; ++ priv->devtype_data = devtype_data; ++ priv->reg_xceiver = reg_xceiver; ++ ++ if (priv->devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP) { ++ priv->tx_mb_idx = FLEXCAN_TX_MB_OFF_TIMESTAMP; ++ priv->tx_mb_reserved = ®s->mb[FLEXCAN_TX_MB_RESERVED_OFF_TIMESTAMP]; ++ } else { ++ priv->tx_mb_idx = FLEXCAN_TX_MB_OFF_FIFO; ++ priv->tx_mb_reserved = ®s->mb[FLEXCAN_TX_MB_RESERVED_OFF_FIFO]; ++ } ++ priv->tx_mb = ®s->mb[priv->tx_mb_idx]; ++ ++ priv->reg_imask1_default = FLEXCAN_IFLAG_MB(priv->tx_mb_idx); ++ priv->reg_imask2_default = 0; ++ ++ if (priv->devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP) { ++ u64 imask; ++ ++ priv->mb_first = FLEXCAN_RX_MB_OFF_TIMESTAMP_FIRST; ++ priv->mb_last = FLEXCAN_RX_MB_OFF_TIMESTAMP_LAST; ++ priv->ts_frames = kzalloc(sizeof(*priv->ts_frames) * ++ FLEXCAN_RX_MB_TIMESTAMP_COUNT, GFP_KERNEL); ++ if (priv->ts_frames == NULL) { ++ err = -ENOMEM; ++ goto failed_fralloc; ++ } ++ ++ imask = GENMASK_ULL(priv->mb_last, priv->mb_first); ++ priv->reg_imask1_default |= imask; ++ priv->reg_imask2_default |= imask >> 32; ++ } else { ++ priv->reg_imask1_default |= FLEXCAN_IFLAG_RX_FIFO_OVERFLOW | ++ FLEXCAN_IFLAG_RX_FIFO_AVAILABLE; ++ priv->ts_frames = NULL; ++ } ++ ++ dev->ctrl_name = "FLEXCAN"; ++ dev->board_name = "FLEXCAN"; ++ dev->base_addr = (unsigned long)regs; ++ dev->can_sys_clock = clock_freq; ++ dev->hard_start_xmit = flexcan_start_xmit; ++ dev->do_set_mode = flexcan_set_mode; ++ dev->do_set_bit_time = flexcan_copy_bittiming; ++ dev->bittiming_const = &flexcan_bittiming_const; ++ dev->state = CAN_STATE_STOPPED; ++ strncpy(dev->name, DEV_NAME, IFNAMSIZ); ++ ++ err = register_flexcandev(dev); ++ if (err) { ++ dev_err(&pdev->dev, "registering netdev failed\n"); ++ goto failed_register; ++ } ++ ++ dev_info(&pdev->dev, "device registered (reg_base=%p, irq=%d)\n", ++ priv->regs, priv->irq); ++ ++ return 0; ++ ++ failed_register: ++ if (priv->ts_frames) ++ kfree(priv->ts_frames); ++ failed_fralloc: ++ rtcan_dev_free(dev); ++ return err; ++} ++ ++static int flexcan_remove(struct platform_device *pdev) ++{ ++ struct rtcan_device *dev = platform_get_drvdata(pdev); ++ ++ unregister_flexcandev(dev); ++ rtcan_dev_free(dev); ++ ++ return 0; ++} ++ ++static struct platform_driver flexcan_driver = { ++ .driver = { ++ .name = DRV_NAME, ++ .of_match_table = flexcan_of_match, ++ }, ++ .probe = flexcan_probe, ++ .remove = flexcan_remove, ++ .id_table = flexcan_id_table, ++}; ++ ++module_platform_driver(flexcan_driver); ++ ++MODULE_AUTHOR("Wolfgang Grandegger , " ++ "Sascha Hauer , " ++ "Marc Kleine-Budde "); ++MODULE_LICENSE("GPL v2"); ++MODULE_DESCRIPTION("RT-CAN port driver for flexcan based chip"); +--- linux/drivers/xenomai/can/mscan/rtcan_mscan_regs.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/can/mscan/rtcan_mscan_regs.h 2021-04-07 16:01:26.420635319 +0800 +@@ -0,0 +1,226 @@ ++/* ++ * Copyright (C) 2006 Wolfgang Grandegger ++ * ++ * Based on linux-2.4.25/include/asm-ppc/mpc5xxx.h ++ * Prototypes, etc. for the Motorola MPC5xxx embedded cpu chips ++ * ++ * Author: Dale Farnsworth ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#ifndef __RTCAN_MSCAN_REGS_H_ ++#define __RTCAN_MSCAN_REGS_H_ ++ ++#include ++#include ++#include ++ ++static inline void __iomem *mpc5xxx_gpio_find_and_map(void) ++{ ++ struct device_node *ofn; ++ ofn = of_find_compatible_node(NULL, NULL, "mpc5200-gpio"); ++ if (!ofn) ++ ofn = of_find_compatible_node(NULL, NULL, "fsl,mpc5200-gpio"); ++ return ofn ? of_iomap(ofn, 0) : NULL; ++} ++ ++#define MPC5xxx_GPIO mpc5xxx_gpio_find_and_map() ++#define mpc5xxx_gpio mpc52xx_gpio ++ ++#define mpc5xxx_get_of_node(ofdev) (ofdev)->dev.of_node ++ ++#define MSCAN_CAN1_ADDR (MSCAN_MBAR + 0x0900) /* MSCAN Module 1 */ ++#define MSCAN_CAN2_ADDR (MSCAN_MBAR + 0x0980) /* MSCAN Module 2 */ ++#define MSCAN_SIZE 0x80 ++ ++/* MSCAN control register 0 (CANCTL0) bits */ ++#define MSCAN_RXFRM 0x80 ++#define MSCAN_RXACT 0x40 ++#define MSCAN_CSWAI 0x20 ++#define MSCAN_SYNCH 0x10 ++#define MSCAN_TIME 0x08 ++#define MSCAN_WUPE 0x04 ++#define MSCAN_SLPRQ 0x02 ++#define MSCAN_INITRQ 0x01 ++ ++/* MSCAN control register 1 (CANCTL1) bits */ ++#define MSCAN_CANE 0x80 ++#define MSCAN_CLKSRC 0x40 ++#define MSCAN_LOOPB 0x20 ++#define MSCAN_LISTEN 0x10 ++#define MSCAN_WUPM 0x04 ++#define MSCAN_SLPAK 0x02 ++#define MSCAN_INITAK 0x01 ++ ++/* MSCAN receiver flag register (CANRFLG) bits */ ++#define MSCAN_WUPIF 0x80 ++#define MSCAN_CSCIF 0x40 ++#define MSCAN_RSTAT1 0x20 ++#define MSCAN_RSTAT0 0x10 ++#define MSCAN_TSTAT1 0x08 ++#define MSCAN_TSTAT0 0x04 ++#define MSCAN_OVRIF 0x02 ++#define MSCAN_RXF 0x01 ++ ++/* MSCAN receiver interrupt enable register (CANRIER) bits */ ++#define MSCAN_WUPIE 0x80 ++#define MSCAN_CSCIE 0x40 ++#define MSCAN_RSTATE1 0x20 ++#define MSCAN_RSTATE0 0x10 ++#define MSCAN_TSTATE1 0x08 ++#define MSCAN_TSTATE0 0x04 ++#define MSCAN_OVRIE 0x02 ++#define MSCAN_RXFIE 0x01 ++ ++/* MSCAN transmitter flag register (CANTFLG) bits */ ++#define MSCAN_TXE2 0x04 ++#define MSCAN_TXE1 0x02 ++#define MSCAN_TXE0 0x01 ++#define MSCAN_TXE (MSCAN_TXE2 | MSCAN_TXE1 | MSCAN_TXE0) ++ ++/* MSCAN transmitter interrupt enable register (CANTIER) bits */ ++#define MSCAN_TXIE2 0x04 ++#define MSCAN_TXIE1 0x02 ++#define MSCAN_TXIE0 0x01 ++#define MSCAN_TXIE (MSCAN_TXIE2 | MSCAN_TXIE1 | MSCAN_TXIE0) ++ ++/* MSCAN transmitter message abort request (CANTARQ) bits */ ++#define MSCAN_ABTRQ2 0x04 ++#define MSCAN_ABTRQ1 0x02 ++#define MSCAN_ABTRQ0 0x01 ++ ++/* MSCAN transmitter message abort ack (CANTAAK) bits */ ++#define MSCAN_ABTAK2 0x04 ++#define MSCAN_ABTAK1 0x02 ++#define MSCAN_ABTAK0 0x01 ++ ++/* MSCAN transmit buffer selection (CANTBSEL) bits */ ++#define MSCAN_TX2 0x04 ++#define MSCAN_TX1 0x02 ++#define MSCAN_TX0 0x01 ++ ++/* MSCAN ID acceptance control register (CANIDAC) bits */ ++#define MSCAN_IDAM1 0x20 ++#define MSCAN_IDAM0 0x10 ++#define MSCAN_IDHIT2 0x04 ++#define MSCAN_IDHIT1 0x02 ++#define MSCAN_IDHIT0 0x01 ++ ++struct mscan_msgbuf { ++ volatile u8 idr[0x8]; /* 0x00 */ ++ volatile u8 dsr[0x10]; /* 0x08 */ ++ volatile u8 dlr; /* 0x18 */ ++ volatile u8 tbpr; /* 0x19 */ /* This register is not applicable for receive buffers */ ++ volatile u16 rsrv1; /* 0x1A */ ++ volatile u8 tsrh; /* 0x1C */ ++ volatile u8 tsrl; /* 0x1D */ ++ volatile u16 rsrv2; /* 0x1E */ ++}; ++ ++struct mscan_regs { ++ volatile u8 canctl0; /* MSCAN + 0x00 */ ++ volatile u8 canctl1; /* MSCAN + 0x01 */ ++ volatile u16 rsrv1; /* MSCAN + 0x02 */ ++ volatile u8 canbtr0; /* MSCAN + 0x04 */ ++ volatile u8 canbtr1; /* MSCAN + 0x05 */ ++ volatile u16 rsrv2; /* MSCAN + 0x06 */ ++ volatile u8 canrflg; /* MSCAN + 0x08 */ ++ volatile u8 canrier; /* MSCAN + 0x09 */ ++ volatile u16 rsrv3; /* MSCAN + 0x0A */ ++ volatile u8 cantflg; /* MSCAN + 0x0C */ ++ volatile u8 cantier; /* MSCAN + 0x0D */ ++ volatile u16 rsrv4; /* MSCAN + 0x0E */ ++ volatile u8 cantarq; /* MSCAN + 0x10 */ ++ volatile u8 cantaak; /* MSCAN + 0x11 */ ++ volatile u16 rsrv5; /* MSCAN + 0x12 */ ++ volatile u8 cantbsel; /* MSCAN + 0x14 */ ++ volatile u8 canidac; /* MSCAN + 0x15 */ ++ volatile u16 rsrv6[3]; /* MSCAN + 0x16 */ ++ volatile u8 canrxerr; /* MSCAN + 0x1C */ ++ volatile u8 cantxerr; /* MSCAN + 0x1D */ ++ volatile u16 rsrv7; /* MSCAN + 0x1E */ ++ volatile u8 canidar0; /* MSCAN + 0x20 */ ++ volatile u8 canidar1; /* MSCAN + 0x21 */ ++ volatile u16 rsrv8; /* MSCAN + 0x22 */ ++ volatile u8 canidar2; /* MSCAN + 0x24 */ ++ volatile u8 canidar3; /* MSCAN + 0x25 */ ++ volatile u16 rsrv9; /* MSCAN + 0x26 */ ++ volatile u8 canidmr0; /* MSCAN + 0x28 */ ++ volatile u8 canidmr1; /* MSCAN + 0x29 */ ++ volatile u16 rsrv10; /* MSCAN + 0x2A */ ++ volatile u8 canidmr2; /* MSCAN + 0x2C */ ++ volatile u8 canidmr3; /* MSCAN + 0x2D */ ++ volatile u16 rsrv11; /* MSCAN + 0x2E */ ++ volatile u8 canidar4; /* MSCAN + 0x30 */ ++ volatile u8 canidar5; /* MSCAN + 0x31 */ ++ volatile u16 rsrv12; /* MSCAN + 0x32 */ ++ volatile u8 canidar6; /* MSCAN + 0x34 */ ++ volatile u8 canidar7; /* MSCAN + 0x35 */ ++ volatile u16 rsrv13; /* MSCAN + 0x36 */ ++ volatile u8 canidmr4; /* MSCAN + 0x38 */ ++ volatile u8 canidmr5; /* MSCAN + 0x39 */ ++ volatile u16 rsrv14; /* MSCAN + 0x3A */ ++ volatile u8 canidmr6; /* MSCAN + 0x3C */ ++ volatile u8 canidmr7; /* MSCAN + 0x3D */ ++ volatile u16 rsrv15; /* MSCAN + 0x3E */ ++ ++ struct mscan_msgbuf canrxfg; /* MSCAN + 0x40 */ /* Foreground receive buffer */ ++ struct mscan_msgbuf cantxfg; /* MSCAN + 0x60 */ /* Foreground transmit buffer */ ++}; ++ ++/* Clock source selection ++ */ ++#define MSCAN_CLKSRC_BUS 0 ++#define MSCAN_CLKSRC_XTAL MSCAN_CLKSRC ++#define MSCAN_CLKSRC_IPS MSCAN_CLKSRC ++ ++/* Message type access macros. ++ */ ++#define MSCAN_BUF_STD_RTR 0x10 ++#define MSCAN_BUF_EXT_RTR 0x01 ++#define MSCAN_BUF_EXTENDED 0x08 ++ ++#define MSCAN_IDAM1 0x20 ++/* Value for the interrupt enable register */ ++#define MSCAN_RIER (MSCAN_OVRIE | \ ++ MSCAN_RXFIE | \ ++ MSCAN_WUPIF | \ ++ MSCAN_CSCIE | \ ++ MSCAN_RSTATE0 | \ ++ MSCAN_RSTATE1 | \ ++ MSCAN_TSTATE0 | \ ++ MSCAN_TSTATE1) ++ ++#define BTR0_BRP_MASK 0x3f ++#define BTR0_SJW_SHIFT 6 ++#define BTR0_SJW_MASK (0x3 << BTR0_SJW_SHIFT) ++ ++#define BTR1_TSEG1_MASK 0xf ++#define BTR1_TSEG2_SHIFT 4 ++#define BTR1_TSEG2_MASK (0x7 << BTR1_TSEG2_SHIFT) ++#define BTR1_SAM_SHIFT 7 ++ ++#define BTR0_SET_BRP(brp) (((brp) - 1) & BTR0_BRP_MASK) ++#define BTR0_SET_SJW(sjw) ((((sjw) - 1) << BTR0_SJW_SHIFT) & \ ++ BTR0_SJW_MASK) ++ ++#define BTR1_SET_TSEG1(tseg1) (((tseg1) - 1) & BTR1_TSEG1_MASK) ++#define BTR1_SET_TSEG2(tseg2) ((((tseg2) - 1) << BTR1_TSEG2_SHIFT) & \ ++ BTR1_TSEG2_MASK) ++#define BTR1_SET_SAM(sam) (((sam) & 1) << BTR1_SAM_SHIFT) ++ ++#endif /* __RTCAN_MSCAN_REGS_H_ */ +--- linux/drivers/xenomai/can/mscan/Makefile 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/can/mscan/Makefile 2021-04-07 16:01:26.415635326 +0800 +@@ -0,0 +1,6 @@ ++ ++ccflags-y += -Idrivers/xenomai/can -Idrivers/xenomai/can/mscan ++ ++obj-$(CONFIG_XENO_DRIVERS_CAN_MSCAN) += xeno_can_mscan.o ++ ++xeno_can_mscan-y := rtcan_mscan.o rtcan_mscan_proc.o rtcan_mscan_mpc5xxx.o +--- linux/drivers/xenomai/can/mscan/Kconfig 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/can/mscan/Kconfig 2021-04-07 16:01:26.410635334 +0800 +@@ -0,0 +1,8 @@ ++config XENO_DRIVERS_CAN_MSCAN ++ depends on XENO_DRIVERS_CAN && (PPC_MPC52xx || PPC_MPC512x) ++ tristate "MSCAN driver for MPC52xx and MPC512x" ++ default n ++ help ++ ++ This driver is for the MSCAN on the MPC5200 and MPC512x processor ++ from Freescale. +--- linux/drivers/xenomai/can/mscan/rtcan_mscan_mpc5xxx.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/can/mscan/rtcan_mscan_mpc5xxx.c 2021-04-07 16:01:26.405635341 +0800 +@@ -0,0 +1,392 @@ ++/* ++ * CAN bus driver for the Freescale MPC5xxx embedded CPU. ++ * ++ * Copyright (C) 2004-2005 Andrey Volkov , ++ * Varma Electronics Oy ++ * Copyright (C) 2008-2010 Wolfgang Grandegger ++ * Copyright (C) 2009 Wolfram Sang, Pengutronix ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the version 2 of the GNU General Public License ++ * as published by the Free Software Foundation ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "rtcan_dev.h" ++#include "rtcan_mscan_regs.h" ++#include "rtcan_mscan.h" ++ ++#define of_device platform_device ++#define of_platform_driver platform_driver ++#define of_register_platform_driver platform_driver_register ++#define of_unregister_platform_driver platform_driver_unregister ++ ++static char mscan_ctrl_name_mpc5200[] = "MSCAN-MPC5200"; ++static char mscan_ctrl_name_mpc512x[] = "MSCAN-MPC512x"; ++static char mscan_board_name[] = "unkown"; ++ ++struct mpc5xxx_can_data { ++ unsigned int type; ++ u32 (*get_clock)(struct of_device *ofdev, const char *clock_name, ++ int *mscan_clksrc); ++}; ++ ++#ifdef CONFIG_PPC_MPC52xx ++static struct of_device_id mpc52xx_cdm_ids[] = { ++ { .compatible = "fsl,mpc5200-cdm", }, ++ {} ++}; ++ ++static u32 mpc52xx_can_get_clock(struct of_device *ofdev, ++ const char *clock_name, ++ int *mscan_clksrc) ++{ ++ unsigned int pvr; ++ struct mpc52xx_cdm __iomem *cdm; ++ struct device_node *np_cdm; ++ unsigned int freq; ++ u32 val; ++ ++ pvr = mfspr(SPRN_PVR); ++ ++ /* ++ * Either the oscillator clock (SYS_XTAL_IN) or the IP bus clock ++ * (IP_CLK) can be selected as MSCAN clock source. According to ++ * the MPC5200 user's manual, the oscillator clock is the better ++ * choice as it has less jitter. For this reason, it is selected ++ * by default. Unfortunately, it can not be selected for the old ++ * MPC5200 Rev. A chips due to a hardware bug (check errata). ++ */ ++ if (clock_name && strcmp(clock_name, "ip") == 0) ++ *mscan_clksrc = MSCAN_CLKSRC_BUS; ++ else ++ *mscan_clksrc = MSCAN_CLKSRC_XTAL; ++ ++ freq = mpc5xxx_get_bus_frequency(mpc5xxx_get_of_node(ofdev)); ++ if (!freq) ++ return 0; ++ ++ if (*mscan_clksrc == MSCAN_CLKSRC_BUS || pvr == 0x80822011) ++ return freq; ++ ++ /* Determine SYS_XTAL_IN frequency from the clock domain settings */ ++ np_cdm = of_find_matching_node(NULL, mpc52xx_cdm_ids); ++ if (!np_cdm) { ++ dev_err(&ofdev->dev, "can't get clock node!\n"); ++ return 0; ++ } ++ cdm = of_iomap(np_cdm, 0); ++ ++ if (in_8(&cdm->ipb_clk_sel) & 0x1) ++ freq *= 2; ++ val = in_be32(&cdm->rstcfg); ++ ++ freq *= (val & (1 << 5)) ? 8 : 4; ++ freq /= (val & (1 << 6)) ? 12 : 16; ++ ++ of_node_put(np_cdm); ++ iounmap(cdm); ++ ++ return freq; ++} ++#else /* !CONFIG_PPC_MPC5200 */ ++static u32 mpc52xx_can_get_clock(struct of_device *ofdev, ++ const char *clock_name, ++ int *mscan_clksrc) ++{ ++ return 0; ++} ++#endif /* CONFIG_PPC_MPC52xx */ ++ ++#ifdef CONFIG_PPC_MPC512x ++struct mpc512x_clockctl { ++ u32 spmr; /* System PLL Mode Reg */ ++ u32 sccr[2]; /* System Clk Ctrl Reg 1 & 2 */ ++ u32 scfr1; /* System Clk Freq Reg 1 */ ++ u32 scfr2; /* System Clk Freq Reg 2 */ ++ u32 reserved; ++ u32 bcr; /* Bread Crumb Reg */ ++ u32 pccr[12]; /* PSC Clk Ctrl Reg 0-11 */ ++ u32 spccr; /* SPDIF Clk Ctrl Reg */ ++ u32 cccr; /* CFM Clk Ctrl Reg */ ++ u32 dccr; /* DIU Clk Cnfg Reg */ ++ u32 mccr[4]; /* MSCAN Clk Ctrl Reg 1-3 */ ++}; ++ ++static struct of_device_id mpc512x_clock_ids[] = { ++ { .compatible = "fsl,mpc5121-clock", }, ++ {} ++}; ++ ++static u32 mpc512x_can_get_clock(struct of_device *ofdev, ++ const char *clock_name, ++ int *mscan_clksrc) ++{ ++ struct mpc512x_clockctl __iomem *clockctl; ++ struct device_node *np_clock; ++ struct clk *sys_clk, *ref_clk; ++ int plen, clockidx, clocksrc = -1; ++ u32 sys_freq, val, clockdiv = 1, freq = 0; ++ const u32 *pval; ++ ++ np_clock = of_find_matching_node(NULL, mpc512x_clock_ids); ++ if (!np_clock) { ++ dev_err(&ofdev->dev, "couldn't find clock node\n"); ++ return -ENODEV; ++ } ++ clockctl = of_iomap(np_clock, 0); ++ if (!clockctl) { ++ dev_err(&ofdev->dev, "couldn't map clock registers\n"); ++ return 0; ++ } ++ ++ /* Determine the MSCAN device index from the physical address */ ++ pval = of_get_property(mpc5xxx_get_of_node(ofdev), "reg", &plen); ++ BUG_ON(!pval || plen < sizeof(*pval)); ++ clockidx = (*pval & 0x80) ? 1 : 0; ++ if (*pval & 0x2000) ++ clockidx += 2; ++ ++ /* ++ * Clock source and divider selection: 3 different clock sources ++ * can be selected: "ip", "ref" or "sys". For the latter two, a ++ * clock divider can be defined as well. If the clock source is ++ * not specified by the device tree, we first try to find an ++ * optimal CAN source clock based on the system clock. If that ++ * is not posslible, the reference clock will be used. ++ */ ++ if (clock_name && !strcmp(clock_name, "ip")) { ++ *mscan_clksrc = MSCAN_CLKSRC_IPS; ++ freq = mpc5xxx_get_bus_frequency(mpc5xxx_get_of_node(ofdev)); ++ } else { ++ *mscan_clksrc = MSCAN_CLKSRC_BUS; ++ ++ pval = of_get_property(mpc5xxx_get_of_node(ofdev), ++ "fsl,mscan-clock-divider", &plen); ++ if (pval && plen == sizeof(*pval)) ++ clockdiv = *pval; ++ if (!clockdiv) ++ clockdiv = 1; ++ ++ if (!clock_name || !strcmp(clock_name, "sys")) { ++ sys_clk = clk_get(&ofdev->dev, "sys_clk"); ++ if (!sys_clk) { ++ dev_err(&ofdev->dev, "couldn't get sys_clk\n"); ++ goto exit_unmap; ++ } ++ /* Get and round up/down sys clock rate */ ++ sys_freq = 1000000 * ++ ((clk_get_rate(sys_clk) + 499999) / 1000000); ++ ++ if (!clock_name) { ++ /* A multiple of 16 MHz would be optimal */ ++ if ((sys_freq % 16000000) == 0) { ++ clocksrc = 0; ++ clockdiv = sys_freq / 16000000; ++ freq = sys_freq / clockdiv; ++ } ++ } else { ++ clocksrc = 0; ++ freq = sys_freq / clockdiv; ++ } ++ } ++ ++ if (clocksrc < 0) { ++ ref_clk = clk_get(&ofdev->dev, "ref_clk"); ++ if (!ref_clk) { ++ dev_err(&ofdev->dev, "couldn't get ref_clk\n"); ++ goto exit_unmap; ++ } ++ clocksrc = 1; ++ freq = clk_get_rate(ref_clk) / clockdiv; ++ } ++ } ++ ++ /* Disable clock */ ++ out_be32(&clockctl->mccr[clockidx], 0x0); ++ if (clocksrc >= 0) { ++ /* Set source and divider */ ++ val = (clocksrc << 14) | ((clockdiv - 1) << 17); ++ out_be32(&clockctl->mccr[clockidx], val); ++ /* Enable clock */ ++ out_be32(&clockctl->mccr[clockidx], val | 0x10000); ++ } ++ ++ /* Enable MSCAN clock domain */ ++ val = in_be32(&clockctl->sccr[1]); ++ if (!(val & (1 << 25))) ++ out_be32(&clockctl->sccr[1], val | (1 << 25)); ++ ++ dev_dbg(&ofdev->dev, "using '%s' with frequency divider %d\n", ++ *mscan_clksrc == MSCAN_CLKSRC_IPS ? "ips_clk" : ++ clocksrc == 1 ? "ref_clk" : "sys_clk", clockdiv); ++ ++exit_unmap: ++ of_node_put(np_clock); ++ iounmap(clockctl); ++ ++ return freq; ++} ++#else /* !CONFIG_PPC_MPC512x */ ++static u32 mpc512x_can_get_clock(struct of_device *ofdev, ++ const char *clock_name, ++ int *mscan_clksrc) ++{ ++ return 0; ++} ++#endif /* CONFIG_PPC_MPC512x */ ++ ++static struct of_device_id mpc5xxx_can_table[]; ++static int mpc5xxx_can_probe(struct of_device *ofdev) ++{ ++ struct device_node *np = mpc5xxx_get_of_node(ofdev); ++ struct mpc5xxx_can_data *data; ++ struct rtcan_device *dev; ++ void __iomem *base; ++ const char *clock_name = NULL; ++ int irq, mscan_clksrc = 0; ++ int err = -ENOMEM; ++ ++ const struct of_device_id *id; ++ ++ id = of_match_device(mpc5xxx_can_table, &ofdev->dev); ++ if (!id) ++ return -EINVAL; ++ ++ data = (struct mpc5xxx_can_data *)id->data; ++ ++ base = of_iomap(np, 0); ++ if (!base) { ++ dev_err(&ofdev->dev, "couldn't ioremap\n"); ++ return err; ++ } ++ ++ irq = irq_of_parse_and_map(np, 0); ++ if (!irq) { ++ dev_err(&ofdev->dev, "no irq found\n"); ++ err = -ENODEV; ++ goto exit_unmap_mem; ++ } ++ ++ dev = rtcan_dev_alloc(0, 0); ++ if (!dev) ++ goto exit_dispose_irq; ++ ++ clock_name = of_get_property(np, "fsl,mscan-clock-source", NULL); ++ ++ BUG_ON(!data); ++ dev->can_sys_clock = data->get_clock(ofdev, clock_name, ++ &mscan_clksrc); ++ if (!dev->can_sys_clock) { ++ dev_err(&ofdev->dev, "couldn't get MSCAN clock properties\n"); ++ goto exit_free_mscan; ++ } ++ ++ if (data->type == MSCAN_TYPE_MPC5121) ++ dev->ctrl_name = mscan_ctrl_name_mpc512x; ++ else ++ dev->ctrl_name = mscan_ctrl_name_mpc5200; ++ dev->board_name = mscan_board_name; ++ dev->base_addr = (unsigned long)base; ++ ++ err = rtcan_mscan_register(dev, irq, mscan_clksrc); ++ if (err) { ++ dev_err(&ofdev->dev, "registering %s failed (err=%d)\n", ++ RTCAN_DRV_NAME, err); ++ goto exit_free_mscan; ++ } ++ ++ dev_set_drvdata(&ofdev->dev, dev); ++ ++ dev_info(&ofdev->dev, "MSCAN at 0x%p, irq %d, clock %d Hz\n", ++ base, irq, dev->can_sys_clock); ++ ++ return 0; ++ ++exit_free_mscan: ++ rtcan_dev_free(dev); ++exit_dispose_irq: ++ irq_dispose_mapping(irq); ++exit_unmap_mem: ++ iounmap(base); ++ ++ return err; ++} ++ ++static int mpc5xxx_can_remove(struct of_device *ofdev) ++{ ++ struct rtcan_device *dev = dev_get_drvdata(&ofdev->dev); ++ ++ dev_set_drvdata(&ofdev->dev, NULL); ++ ++ rtcan_mscan_unregister(dev); ++ iounmap((void *)dev->base_addr); ++ rtcan_dev_free(dev); ++ ++ return 0; ++} ++ ++static struct mpc5xxx_can_data mpc5200_can_data = { ++ .type = MSCAN_TYPE_MPC5200, ++ .get_clock = mpc52xx_can_get_clock, ++}; ++ ++static struct mpc5xxx_can_data mpc5121_can_data = { ++ .type = MSCAN_TYPE_MPC5121, ++ .get_clock = mpc512x_can_get_clock, ++}; ++ ++static struct of_device_id mpc5xxx_can_table[] = { ++ { .compatible = "fsl,mpc5200-mscan", .data = &mpc5200_can_data, }, ++ /* Note that only MPC5121 Rev. 2 (and later) is supported */ ++ { .compatible = "fsl,mpc5121-mscan", .data = &mpc5121_can_data, }, ++ {}, ++}; ++ ++static struct of_platform_driver mpc5xxx_can_driver = { ++ .driver = { ++ .owner = THIS_MODULE, ++ .name = RTCAN_DRV_NAME, ++ .of_match_table = mpc5xxx_can_table, ++ }, ++ .probe = mpc5xxx_can_probe, ++ .remove = mpc5xxx_can_remove, ++}; ++ ++static int __init mpc5xxx_can_init(void) ++{ ++ if (!rtdm_available()) ++ return -ENOSYS; ++ ++ return of_register_platform_driver(&mpc5xxx_can_driver); ++} ++module_init(mpc5xxx_can_init); ++ ++static void __exit mpc5xxx_can_exit(void) ++{ ++ return of_unregister_platform_driver(&mpc5xxx_can_driver); ++}; ++module_exit(mpc5xxx_can_exit); ++ ++MODULE_AUTHOR("Wolfgang Grandegger "); ++MODULE_DESCRIPTION("RT-Socket-CAN driver for MPC5200 and MPC521x"); ++MODULE_LICENSE("GPL v2"); +--- linux/drivers/xenomai/can/mscan/rtcan_mscan.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/can/mscan/rtcan_mscan.h 2021-04-07 16:01:26.400635348 +0800 +@@ -0,0 +1,38 @@ ++/* ++ * Copyright (C) 2009 Wolfgang Grandegger ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#ifndef __RTCAN_MSCAN_H_ ++#define __RTCAN_MSCAN_H_ ++ ++#define RTCAN_DEV_NAME "rtcan%d" ++#define RTCAN_DRV_NAME "rtcan_mscan" ++ ++/* MSCAN type variants */ ++enum { ++ MSCAN_TYPE_MPC5200, ++ MSCAN_TYPE_MPC5121 ++}; ++ ++extern int rtcan_mscan_register(struct rtcan_device *dev, int irq, ++ int mscan_clksrc); ++extern int rtcan_mscan_unregister(struct rtcan_device *dev); ++ ++extern int rtcan_mscan_create_proc(struct rtcan_device* dev); ++extern void rtcan_mscan_remove_proc(struct rtcan_device* dev); ++ ++#endif /* __RTCAN_MSCAN_H_ */ +--- linux/drivers/xenomai/can/mscan/rtcan_mscan.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/can/mscan/rtcan_mscan.c 2021-04-07 16:01:26.396635354 +0800 +@@ -0,0 +1,797 @@ ++/* ++ * Copyright (C) 2006-2010 Wolfgang Grandegger ++ * ++ * Copyright (C) 2005, 2006 Sebastian Smolorz ++ * ++ * ++ * Derived from the PCAN project file driver/src/pcan_mpc5200.c: ++ * ++ * Copyright (c) 2003 Wolfgang Denk, DENX Software Engineering, wd@denx.de. ++ * ++ * Copyright (c) 2005 Felix Daners, Plugit AG, felix.daners@plugit.ch ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#include ++#include ++#include ++ ++#include ++ ++/* CAN device profile */ ++#include ++#include "rtcan_dev.h" ++#include "rtcan_raw.h" ++#include "rtcan_internal.h" ++#include "rtcan_mscan_regs.h" ++#include "rtcan_mscan.h" ++ ++#define MSCAN_SET_MODE_RETRIES 255 ++ ++#ifndef CONFIG_XENO_DRIVERS_CAN_CALC_BITTIME_OLD ++static struct can_bittiming_const mscan_bittiming_const = { ++ .name = "mscan", ++ .tseg1_min = 4, ++ .tseg1_max = 16, ++ .tseg2_min = 2, ++ .tseg2_max = 8, ++ .sjw_max = 4, ++ .brp_min = 1, ++ .brp_max = 64, ++ .brp_inc = 1, ++}; ++#endif ++ ++/** ++ * Reception Interrupt handler ++ * ++ * Inline function first called within @ref rtcan_mscan_interrupt when an RX ++ * interrupt was detected. Here the HW registers are read out and composed ++ * to a struct rtcan_skb. ++ * ++ * @param[out] skb Pointer to an instance of struct rtcan_skb which will be ++ * filled with received CAN message ++ * @param[in] dev Device ID ++ */ ++static inline void rtcan_mscan_rx_interrupt(struct rtcan_device *dev, ++ struct rtcan_skb *skb) ++{ ++ int i; ++ unsigned char size; ++ struct rtcan_rb_frame *frame = &skb->rb_frame; ++ struct mscan_regs *regs = (struct mscan_regs *)dev->base_addr; ++ ++ skb->rb_frame_size = EMPTY_RB_FRAME_SIZE; ++ ++ frame->can_dlc = in_8(®s->canrxfg.dlr) & 0x0F; ++ ++ /* If DLC exceeds 8 bytes adjust it to 8 (for the payload size) */ ++ size = (frame->can_dlc > 8) ? 8 : frame->can_dlc; ++ ++ if (in_8(®s->canrxfg.idr[1]) & MSCAN_BUF_EXTENDED) { ++ frame->can_id = ((in_8(®s->canrxfg.idr[0]) << 21) | ++ ((in_8(®s->canrxfg.idr[1]) & 0xE0) << 13) | ++ ((in_8(®s->canrxfg.idr[1]) & 0x07) << 15) | ++ (in_8(®s->canrxfg.idr[4]) << 7) | ++ (in_8(®s->canrxfg.idr[5]) >> 1)); ++ ++ frame->can_id |= CAN_EFF_FLAG; ++ ++ if ((in_8(®s->canrxfg.idr[5]) & MSCAN_BUF_EXT_RTR)) { ++ frame->can_id |= CAN_RTR_FLAG; ++ } else { ++ for (i = 0; i < size; i++) ++ frame->data[i] = ++ in_8(®s->canrxfg.dsr[i + ++ (i / 2) * 2]); ++ skb->rb_frame_size += size; ++ } ++ ++ } else { ++ frame->can_id = ((in_8(®s->canrxfg.idr[0]) << 3) | ++ (in_8(®s->canrxfg.idr[1]) >> 5)); ++ ++ if ((in_8(®s->canrxfg.idr[1]) & MSCAN_BUF_STD_RTR)) { ++ frame->can_id |= CAN_RTR_FLAG; ++ } else { ++ for (i = 0; i < size; i++) ++ frame->data[i] = ++ in_8(®s->canrxfg.dsr[i + ++ (i / 2) * 2]); ++ skb->rb_frame_size += size; ++ } ++ } ++ ++ ++ /* Store the interface index */ ++ frame->can_ifindex = dev->ifindex; ++} ++ ++static can_state_t mscan_stat_map[4] = { ++ CAN_STATE_ACTIVE, ++ CAN_STATE_BUS_WARNING, ++ CAN_STATE_BUS_PASSIVE, ++ CAN_STATE_BUS_OFF ++}; ++ ++static inline void rtcan_mscan_err_interrupt(struct rtcan_device *dev, ++ struct rtcan_skb *skb, ++ int r_status) ++{ ++ u8 rstat, tstat; ++ struct rtcan_rb_frame *frame = &skb->rb_frame; ++ struct mscan_regs *regs = (struct mscan_regs *)dev->base_addr; ++ ++ skb->rb_frame_size = EMPTY_RB_FRAME_SIZE + CAN_ERR_DLC; ++ ++ frame->can_id = CAN_ERR_FLAG; ++ frame->can_dlc = CAN_ERR_DLC; ++ ++ memset(&frame->data[0], 0, frame->can_dlc); ++ ++ if ((r_status & MSCAN_OVRIF)) { ++ frame->can_id |= CAN_ERR_CRTL; ++ frame->data[1] = CAN_ERR_CRTL_RX_OVERFLOW; ++ ++ } else if ((r_status & (MSCAN_CSCIF))) { ++ ++ rstat = (r_status & (MSCAN_TSTAT0 | ++ MSCAN_TSTAT1)) >> 2 & 0x3; ++ tstat = (r_status & (MSCAN_RSTAT0 | ++ MSCAN_RSTAT1)) >> 4 & 0x3; ++ dev->state = mscan_stat_map[max(rstat, tstat)]; ++ ++ switch (dev->state) { ++ case CAN_STATE_BUS_OFF: ++ /* Bus-off condition */ ++ frame->can_id |= CAN_ERR_BUSOFF; ++ dev->state = CAN_STATE_BUS_OFF; ++ /* Disable receiver interrupts */ ++ out_8(®s->canrier, 0); ++ /* Wake up waiting senders */ ++ rtdm_sem_destroy(&dev->tx_sem); ++ break; ++ ++ case CAN_STATE_BUS_PASSIVE: ++ frame->can_id |= CAN_ERR_CRTL; ++ if (tstat > rstat) ++ frame->data[1] = CAN_ERR_CRTL_TX_PASSIVE; ++ else ++ frame->data[1] = CAN_ERR_CRTL_RX_PASSIVE; ++ break; ++ ++ case CAN_STATE_BUS_WARNING: ++ frame->can_id |= CAN_ERR_CRTL; ++ if (tstat > rstat) ++ frame->data[1] = CAN_ERR_CRTL_TX_WARNING; ++ else ++ frame->data[1] = CAN_ERR_CRTL_RX_WARNING; ++ break; ++ ++ default: ++ break; ++ ++ } ++ } ++ /* Store the interface index */ ++ frame->can_ifindex = dev->ifindex; ++} ++ ++/** Interrupt handler */ ++static int rtcan_mscan_interrupt(rtdm_irq_t *irq_handle) ++{ ++ struct rtcan_skb skb; ++ struct rtcan_device *dev; ++ struct mscan_regs *regs; ++ u8 canrflg; ++ int recv_lock_free = 1; ++ int ret = RTDM_IRQ_NONE; ++ ++ ++ dev = (struct rtcan_device *)rtdm_irq_get_arg(irq_handle, void); ++ regs = (struct mscan_regs *)dev->base_addr; ++ ++ rtdm_lock_get(&dev->device_lock); ++ ++ canrflg = in_8(®s->canrflg); ++ ++ ret = RTDM_IRQ_HANDLED; ++ ++ /* Transmit Interrupt? */ ++ if ((in_8(®s->cantier) & MSCAN_TXIE0) && ++ (in_8(®s->cantflg) & MSCAN_TXE0)) { ++ out_8(®s->cantier, 0); ++ /* Wake up a sender */ ++ rtdm_sem_up(&dev->tx_sem); ++ ++ if (rtcan_loopback_pending(dev)) { ++ ++ if (recv_lock_free) { ++ recv_lock_free = 0; ++ rtdm_lock_get(&rtcan_recv_list_lock); ++ rtdm_lock_get(&rtcan_socket_lock); ++ } ++ ++ rtcan_loopback(dev); ++ } ++ } ++ ++ /* Wakeup interrupt? */ ++ if ((canrflg & MSCAN_WUPIF)) { ++ rtdm_printk("WUPIF interrupt\n"); ++ } ++ ++ /* Receive Interrupt? */ ++ if ((canrflg & MSCAN_RXF)) { ++ ++ /* Read out HW registers */ ++ rtcan_mscan_rx_interrupt(dev, &skb); ++ ++ /* Take more locks. Ensure that they are taken and ++ * released only once in the IRQ handler. */ ++ /* WARNING: Nested locks are dangerous! But they are ++ * nested only in this routine so a deadlock should ++ * not be possible. */ ++ if (recv_lock_free) { ++ recv_lock_free = 0; ++ rtdm_lock_get(&rtcan_recv_list_lock); ++ rtdm_lock_get(&rtcan_socket_lock); ++ } ++ ++ /* Pass received frame out to the sockets */ ++ rtcan_rcv(dev, &skb); ++ } ++ ++ /* Error Interrupt? */ ++ if ((canrflg & (MSCAN_CSCIF | MSCAN_OVRIF))) { ++ /* Check error condition and fill error frame */ ++ rtcan_mscan_err_interrupt(dev, &skb, canrflg); ++ ++ if (recv_lock_free) { ++ recv_lock_free = 0; ++ rtdm_lock_get(&rtcan_recv_list_lock); ++ rtdm_lock_get(&rtcan_socket_lock); ++ } ++ ++ /* Pass error frame out to the sockets */ ++ rtcan_rcv(dev, &skb); ++ } ++ ++ /* Acknowledge the handled interrupt within the controller. ++ * Only do so for the receiver interrupts. ++ */ ++ if (canrflg) ++ out_8(®s->canrflg, canrflg); ++ ++ if (!recv_lock_free) { ++ rtdm_lock_put(&rtcan_socket_lock); ++ rtdm_lock_put(&rtcan_recv_list_lock); ++ } ++ rtdm_lock_put(&dev->device_lock); ++ ++ return ret; ++} ++ ++/** ++ * Set controller into reset mode. Called from @ref rtcan_mscan_ioctl ++ * (main usage), init_module and cleanup_module. ++ * ++ * @param dev_id Device ID ++ * @param lock_ctx Pointer to saved IRQ context (if stored before calling ++ * this function). Only evaluated if @c locked is true. ++ * @param locked Boolean value indicating if function was called in an ++ * spin locked and IRQ disabled context ++ * ++ * @return 0 on success, otherwise: ++ * - -EAGAIN: Reset mode bit could not be verified after setting it. ++ * See also note. ++ * ++ * @note According to the MSCAN specification, it is necessary to check ++ * the reset mode bit in PeliCAN mode after having set it. So we do. But if ++ * using a ISA card like the PHYTEC eNET card this should not be necessary ++ * because the CAN controller clock of this card (16 MHz) is twice as high ++ * as the ISA bus clock. ++ */ ++static int rtcan_mscan_mode_stop(struct rtcan_device *dev, ++ rtdm_lockctx_t *lock_ctx) ++{ ++ int ret = 0; ++ int rinit = 0; ++ can_state_t state; ++ struct mscan_regs *regs = (struct mscan_regs *)dev->base_addr; ++ u8 reg; ++ ++ state = dev->state; ++ /* If controller is not operating anyway, go out */ ++ if (!CAN_STATE_OPERATING(state)) ++ goto out; ++ ++ /* Switch to sleep mode */ ++ setbits8(®s->canctl0, MSCAN_SLPRQ); ++ reg = in_8(®s->canctl1); ++ while (!(reg & MSCAN_SLPAK) && ++ (rinit < MSCAN_SET_MODE_RETRIES)) { ++ if (likely(lock_ctx != NULL)) ++ rtdm_lock_put_irqrestore(&dev->device_lock, *lock_ctx); ++ /* Busy sleep 1 microsecond */ ++ rtdm_task_busy_sleep(1000); ++ if (likely(lock_ctx != NULL)) ++ rtdm_lock_get_irqsave(&dev->device_lock, *lock_ctx); ++ rinit++; ++ reg = in_8(®s->canctl1); ++ } ++ /* ++ * The mscan controller will fail to enter sleep mode, ++ * while there are irregular activities on bus, like ++ * somebody keeps retransmitting. This behavior is ++ * undocumented and seems to differ between mscan built ++ * in mpc5200b and mpc5200. We proceed in that case, ++ * since otherwise the slprq will be kept set and the ++ * controller will get stuck. NOTE: INITRQ or CSWAI ++ * will abort all active transmit actions, if still ++ * any, at once. ++ */ ++ if (rinit >= MSCAN_SET_MODE_RETRIES) ++ rtdm_printk("rtcan_mscan: device failed to enter sleep mode. " ++ "We proceed anyhow.\n"); ++ else ++ dev->state = CAN_STATE_SLEEPING; ++ ++ rinit = 0; ++ setbits8(®s->canctl0, MSCAN_INITRQ); ++ ++ reg = in_8(®s->canctl1); ++ while (!(reg & MSCAN_INITAK) && ++ (rinit < MSCAN_SET_MODE_RETRIES)) { ++ if (likely(lock_ctx != NULL)) ++ rtdm_lock_put_irqrestore(&dev->device_lock, *lock_ctx); ++ /* Busy sleep 1 microsecond */ ++ rtdm_task_busy_sleep(1000); ++ if (likely(lock_ctx != NULL)) ++ rtdm_lock_get_irqsave(&dev->device_lock, *lock_ctx); ++ rinit++; ++ reg = in_8(®s->canctl1); ++ } ++ if (rinit >= MSCAN_SET_MODE_RETRIES) ++ ret = -ENODEV; ++ ++ /* Volatile state could have changed while we slept busy. */ ++ dev->state = CAN_STATE_STOPPED; ++ /* Wake up waiting senders */ ++ rtdm_sem_destroy(&dev->tx_sem); ++ ++out: ++ return ret; ++} ++ ++/** ++ * Set controller into operating mode. ++ * ++ * Called from @ref rtcan_mscan_ioctl in spin locked and IRQ disabled ++ * context. ++ * ++ * @param dev_id Device ID ++ * @param lock_ctx Pointer to saved IRQ context (only used when coming ++ * from @ref CAN_STATE_SLEEPING, see also note) ++ * ++ * @return 0 on success, otherwise: ++ * - -EINVAL: No Baud rate set before request to set start mode ++ * ++ * @note If coming from @c CAN_STATE_SLEEPING, the controller must wait ++ * some time to avoid bus errors. Measured on an PHYTEC eNET card, ++ * this time was 110 microseconds. ++ */ ++static int rtcan_mscan_mode_start(struct rtcan_device *dev, ++ rtdm_lockctx_t *lock_ctx) ++{ ++ int ret = 0, retries = 0; ++ can_state_t state; ++ struct mscan_regs *regs = (struct mscan_regs *)dev->base_addr; ++ ++ /* We won't forget that state in the device structure is volatile and ++ * access to it will not be optimized by the compiler. So ... */ ++ state = dev->state; ++ ++ switch (state) { ++ case CAN_STATE_ACTIVE: ++ case CAN_STATE_BUS_WARNING: ++ case CAN_STATE_BUS_PASSIVE: ++ break; ++ ++ case CAN_STATE_SLEEPING: ++ case CAN_STATE_STOPPED: ++ /* Set error active state */ ++ state = CAN_STATE_ACTIVE; ++ /* Set up sender "mutex" */ ++ rtdm_sem_init(&dev->tx_sem, 1); ++ ++ if ((dev->ctrl_mode & CAN_CTRLMODE_LISTENONLY)) { ++ setbits8(®s->canctl1, MSCAN_LISTEN); ++ } else { ++ clrbits8(®s->canctl1, MSCAN_LISTEN); ++ } ++ if ((dev->ctrl_mode & CAN_CTRLMODE_LOOPBACK)) { ++ setbits8(®s->canctl1, MSCAN_LOOPB); ++ } else { ++ clrbits8(®s->canctl1, MSCAN_LOOPB); ++ } ++ ++ /* Switch to normal mode */ ++ clrbits8(®s->canctl0, MSCAN_INITRQ); ++ clrbits8(®s->canctl0, MSCAN_SLPRQ); ++ while ((in_8(®s->canctl1) & MSCAN_INITAK) || ++ (in_8(®s->canctl1) & MSCAN_SLPAK)) { ++ if (likely(lock_ctx != NULL)) ++ rtdm_lock_put_irqrestore(&dev->device_lock, ++ *lock_ctx); ++ /* Busy sleep 1 microsecond */ ++ rtdm_task_busy_sleep(1000); ++ if (likely(lock_ctx != NULL)) ++ rtdm_lock_get_irqsave(&dev->device_lock, ++ *lock_ctx); ++ retries++; ++ } ++ /* Enable interrupts */ ++ setbits8(®s->canrier, MSCAN_RIER); ++ ++ break; ++ ++ case CAN_STATE_BUS_OFF: ++ /* Trigger bus-off recovery */ ++ out_8(®s->canrier, MSCAN_RIER); ++ /* Set up sender "mutex" */ ++ rtdm_sem_init(&dev->tx_sem, 1); ++ /* Set error active state */ ++ state = CAN_STATE_ACTIVE; ++ ++ break; ++ ++ default: ++ /* Never reached, but we don't want nasty compiler warnings */ ++ break; ++ } ++ /* Store new state in device structure (or old state) */ ++ dev->state = state; ++ ++ return ret; ++} ++ ++static int rtcan_mscan_set_bit_time(struct rtcan_device *dev, ++ struct can_bittime *bit_time, ++ rtdm_lockctx_t *lock_ctx) ++{ ++ struct mscan_regs *regs = (struct mscan_regs *)dev->base_addr; ++ u8 btr0, btr1; ++ ++ switch (bit_time->type) { ++ case CAN_BITTIME_BTR: ++ btr0 = bit_time->btr.btr0; ++ btr1 = bit_time->btr.btr1; ++ break; ++ ++ case CAN_BITTIME_STD: ++ btr0 = (BTR0_SET_BRP(bit_time->std.brp) | ++ BTR0_SET_SJW(bit_time->std.sjw)); ++ btr1 = (BTR1_SET_TSEG1(bit_time->std.prop_seg + ++ bit_time->std.phase_seg1) | ++ BTR1_SET_TSEG2(bit_time->std.phase_seg2) | ++ BTR1_SET_SAM(bit_time->std.sam)); ++ break; ++ ++ default: ++ return -EINVAL; ++ } ++ ++ out_8(®s->canbtr0, btr0); ++ out_8(®s->canbtr1, btr1); ++ ++ rtdm_printk("%s: btr0=0x%02x btr1=0x%02x\n", dev->name, btr0, btr1); ++ ++ return 0; ++} ++ ++static int rtcan_mscan_set_mode(struct rtcan_device *dev, ++ can_mode_t mode, ++ rtdm_lockctx_t *lock_ctx) ++{ ++ int ret = 0, retries = 0; ++ can_state_t state; ++ struct mscan_regs *regs = (struct mscan_regs *)dev->base_addr; ++ ++ switch (mode) { ++ ++ case CAN_MODE_STOP: ++ ret = rtcan_mscan_mode_stop(dev, lock_ctx); ++ break; ++ ++ case CAN_MODE_START: ++ ret = rtcan_mscan_mode_start(dev, lock_ctx); ++ break; ++ ++ case CAN_MODE_SLEEP: ++ ++ state = dev->state; ++ ++ /* Controller must operate, otherwise go out */ ++ if (!CAN_STATE_OPERATING(state)) { ++ ret = -ENETDOWN; ++ goto mode_sleep_out; ++ } ++ ++ /* Is controller sleeping yet? If yes, go out */ ++ if (state == CAN_STATE_SLEEPING) ++ goto mode_sleep_out; ++ ++ /* Remember into which state to return when we ++ * wake up */ ++ dev->state_before_sleep = state; ++ state = CAN_STATE_SLEEPING; ++ ++ /* Let's take a nap. (Now I REALLY understand ++ * the meaning of interrupts ...) */ ++ out_8(®s->canrier, 0); ++ out_8(®s->cantier, 0); ++ setbits8(®s->canctl0, ++ MSCAN_SLPRQ /*| MSCAN_INITRQ*/ | MSCAN_WUPE); ++ while (!(in_8(®s->canctl1) & MSCAN_SLPAK)) { ++ rtdm_lock_put_irqrestore(&dev->device_lock, *lock_ctx); ++ /* Busy sleep 1 microsecond */ ++ rtdm_task_busy_sleep(1000); ++ rtdm_lock_get_irqsave(&dev->device_lock, *lock_ctx); ++ if (retries++ >= 1000) ++ break; ++ } ++ rtdm_printk("Fallen asleep after %d tries.\n", retries); ++ clrbits8(®s->canctl0, MSCAN_INITRQ); ++ while ((in_8(®s->canctl1) & MSCAN_INITAK)) { ++ rtdm_lock_put_irqrestore(&dev->device_lock, *lock_ctx); ++ /* Busy sleep 1 microsecond */ ++ rtdm_task_busy_sleep(1000); ++ rtdm_lock_get_irqsave(&dev->device_lock, *lock_ctx); ++ if (retries++ >= 1000) ++ break; ++ } ++ rtdm_printk("Back to normal after %d tries.\n", retries); ++ out_8(®s->canrier, MSCAN_WUPIE); ++ ++ mode_sleep_out: ++ dev->state = state; ++ break; ++ ++ default: ++ ret = -EOPNOTSUPP; ++ } ++ ++ return ret; ++} ++ ++/** ++ * Start a transmission to a MSCAN ++ * ++ * Inline function called within @ref rtcan_mscan_sendmsg. ++ * This is the completion of a send call when hardware access is granted. ++ * Spinlock is taken before calling this function. ++ * ++ * @param[in] frame Pointer to CAN frame which is about to be sent ++ * @param[in] dev Device ID ++ */ ++static int rtcan_mscan_start_xmit(struct rtcan_device *dev, can_frame_t *frame) ++{ ++ int i, id; ++ /* "Real" size of the payload */ ++ unsigned char size; ++ /* Content of frame information register */ ++ unsigned char dlc; ++ struct mscan_regs *regs = (struct mscan_regs *)dev->base_addr; ++ ++ /* Is TX buffer empty? */ ++ if (!(in_8(®s->cantflg) & MSCAN_TXE0)) { ++ rtdm_printk("rtcan_mscan_start_xmit: TX buffer not empty"); ++ return -EIO; ++ } ++ /* Select the buffer we've found. */ ++ out_8(®s->cantbsel, MSCAN_TXE0); ++ ++ /* Get DLC and ID */ ++ dlc = frame->can_dlc; ++ ++ /* If DLC exceeds 8 bytes adjust it to 8 (for the payload) */ ++ size = (dlc > 8) ? 8 : dlc; ++ ++ id = frame->can_id; ++ if (frame->can_id & CAN_EFF_FLAG) { ++ out_8(®s->cantxfg.idr[0], (id & 0x1fe00000) >> 21); ++ out_8(®s->cantxfg.idr[1], ((id & 0x001c0000) >> 13) | ++ ((id & 0x00038000) >> 15) | ++ 0x18); /* set SRR and IDE bits */ ++ ++ out_8(®s->cantxfg.idr[4], (id & 0x00007f80) >> 7); ++ out_8(®s->cantxfg.idr[5], (id & 0x0000007f) << 1); ++ ++ /* RTR? */ ++ if (frame->can_id & CAN_RTR_FLAG) ++ setbits8(®s->cantxfg.idr[5], 0x1); ++ else { ++ clrbits8(®s->cantxfg.idr[5], 0x1); ++ /* No RTR, write data bytes */ ++ for (i = 0; i < size; i++) ++ out_8(®s->cantxfg.dsr[i + (i / 2) * 2], ++ frame->data[i]); ++ } ++ ++ } else { ++ /* Send standard frame */ ++ ++ out_8(®s->cantxfg.idr[0], (id & 0x000007f8) >> 3); ++ out_8(®s->cantxfg.idr[1], (id & 0x00000007) << 5); ++ ++ /* RTR? */ ++ if (frame->can_id & CAN_RTR_FLAG) ++ setbits8(®s->cantxfg.idr[1], 0x10); ++ else { ++ clrbits8(®s->cantxfg.idr[1], 0x10); ++ /* No RTR, write data bytes */ ++ for (i = 0; i < size; i++) ++ out_8(®s->cantxfg.dsr[i + (i / 2) * 2], ++ frame->data[i]); ++ } ++ } ++ ++ out_8(®s->cantxfg.dlr, frame->can_dlc); ++ out_8(®s->cantxfg.tbpr, 0); /* all messages have the same prio */ ++ ++ /* Trigger transmission. */ ++ out_8(®s->cantflg, MSCAN_TXE0); ++ ++ /* Enable interrupt. */ ++ setbits8(®s->cantier, MSCAN_TXIE0); ++ ++ return 0; ++} ++ ++/** ++ * MSCAN Chip configuration ++ * ++ * Called during @ref init_module. Here, the configuration registers which ++ * must be set only once are written with the right values. The controller ++ * is left in reset mode and goes into operating mode not until the IOCTL ++ * for starting it is triggered. ++ * ++ * @param[in] dev Device ID of the controller to be configured ++ */ ++static inline void __init mscan_chip_config(struct mscan_regs *regs, ++ int mscan_clksrc) ++{ ++ /* Choose IP bus as clock source. ++ */ ++ if (mscan_clksrc) ++ setbits8(®s->canctl1, MSCAN_CLKSRC); ++ clrbits8(®s->canctl1, MSCAN_LISTEN); ++ ++ /* Configure MSCAN to accept all incoming messages. ++ */ ++ out_8(®s->canidar0, 0x00); ++ out_8(®s->canidar1, 0x00); ++ out_8(®s->canidar2, 0x00); ++ out_8(®s->canidar3, 0x00); ++ out_8(®s->canidmr0, 0xFF); ++ out_8(®s->canidmr1, 0xFF); ++ out_8(®s->canidmr2, 0xFF); ++ out_8(®s->canidmr3, 0xFF); ++ out_8(®s->canidar4, 0x00); ++ out_8(®s->canidar5, 0x00); ++ out_8(®s->canidar6, 0x00); ++ out_8(®s->canidar7, 0x00); ++ out_8(®s->canidmr4, 0xFF); ++ out_8(®s->canidmr5, 0xFF); ++ out_8(®s->canidmr6, 0xFF); ++ out_8(®s->canidmr7, 0xFF); ++ clrbits8(®s->canidac, MSCAN_IDAM0 | MSCAN_IDAM1); ++} ++ ++/** ++ * MSCAN Chip registration ++ * ++ * Called during @ref init_module. ++ * ++ * @param[in] dev Device ID of the controller to be registered ++ * @param[in] mscan_clksrc clock source to be used ++ */ ++int rtcan_mscan_register(struct rtcan_device *dev, int irq, int mscan_clksrc) ++{ ++ int ret; ++ struct mscan_regs *regs; ++ ++ regs = (struct mscan_regs *)dev->base_addr; ++ ++ /* Enable MSCAN module. */ ++ setbits8(®s->canctl1, MSCAN_CANE); ++ udelay(100); ++ ++ /* Set dummy state for following call */ ++ dev->state = CAN_STATE_ACTIVE; ++ ++ /* Enter reset mode */ ++ rtcan_mscan_mode_stop(dev, NULL); ++ ++ /* Give device an interface name (so that programs using this driver ++ don't need to know the device ID) */ ++ ++ strncpy(dev->name, RTCAN_DEV_NAME, IFNAMSIZ); ++ ++ dev->hard_start_xmit = rtcan_mscan_start_xmit; ++ dev->do_set_mode = rtcan_mscan_set_mode; ++ dev->do_set_bit_time = rtcan_mscan_set_bit_time; ++#ifndef CONFIG_XENO_DRIVERS_CAN_CALC_BITTIME_OLD ++ dev->bittiming_const = &mscan_bittiming_const; ++#endif ++ ++ /* Register IRQ handler and pass device structure as arg */ ++ ret = rtdm_irq_request(&dev->irq_handle, irq, rtcan_mscan_interrupt, ++ 0, RTCAN_DRV_NAME, (void *)dev); ++ if (ret) { ++ printk("ERROR! rtdm_irq_request for IRQ %d failed\n", irq); ++ goto out_can_disable; ++ } ++ ++ mscan_chip_config(regs, mscan_clksrc); ++ ++ /* Register RTDM device */ ++ ret = rtcan_dev_register(dev); ++ if (ret) { ++ printk(KERN_ERR ++ "ERROR while trying to register RTCAN device!\n"); ++ goto out_irq_free; ++ } ++ ++ rtcan_mscan_create_proc(dev); ++ ++ return 0; ++ ++out_irq_free: ++ rtdm_irq_free(&dev->irq_handle); ++ ++out_can_disable: ++ /* Disable MSCAN module. */ ++ clrbits8(®s->canctl1, MSCAN_CANE); ++ ++ return ret; ++} ++ ++/** ++ * MSCAN Chip deregistration ++ * ++ * Called during @ref cleanup_module ++ * ++ * @param[in] dev Device ID of the controller to be registered ++ */ ++int rtcan_mscan_unregister(struct rtcan_device *dev) ++{ ++ struct mscan_regs *regs = (struct mscan_regs *)dev->base_addr; ++ ++ printk("Unregistering %s device %s\n", RTCAN_DRV_NAME, dev->name); ++ ++ rtcan_mscan_mode_stop(dev, NULL); ++ rtdm_irq_free(&dev->irq_handle); ++ rtcan_mscan_remove_proc(dev); ++ rtcan_dev_unregister(dev); ++ ++ /* Disable MSCAN module. */ ++ clrbits8(®s->canctl1, MSCAN_CANE); ++ ++ return 0; ++} +--- linux/drivers/xenomai/can/mscan/rtcan_mscan_proc.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/can/mscan/rtcan_mscan_proc.c 2021-04-07 16:01:26.391635361 +0800 +@@ -0,0 +1,152 @@ ++/* ++ * Copyright (C) 2006 Wolfgang Grandegger ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#include ++#include ++#include ++ ++#include ++ ++/* CAN device profile */ ++#include "rtcan_dev.h" ++#include "rtcan_internal.h" ++#include "rtcan_mscan_regs.h" ++ ++#define MSCAN_REG_ARGS(reg) \ ++ "%-8s 0x%02x\n", #reg, (int)(in_8(®s->reg)) & 0xff ++ ++#ifdef CONFIG_XENO_DRIVERS_CAN_DEBUG ++ ++static int rtcan_mscan_proc_regs(struct seq_file *p, void *data) ++{ ++ struct rtcan_device *dev = (struct rtcan_device *)data; ++ struct mscan_regs *regs = (struct mscan_regs *)dev->base_addr; ++#ifdef MPC5xxx_GPIO ++ struct mpc5xxx_gpio *gpio = (struct mpc5xxx_gpio *)MPC5xxx_GPIO; ++ u32 port_config; ++#endif ++ u8 canctl0, canctl1; ++ ++ seq_printf(p, "MSCAN registers at %p\n", regs); ++ ++ canctl0 = in_8(®s->canctl0); ++ seq_printf(p, "canctl0 0x%02x%s%s%s%s%s%s%s%s\n", ++ canctl0, ++ (canctl0 & MSCAN_RXFRM) ? " rxfrm" :"", ++ (canctl0 & MSCAN_RXACT) ? " rxact" :"", ++ (canctl0 & MSCAN_CSWAI) ? " cswai" :"", ++ (canctl0 & MSCAN_SYNCH) ? " synch" :"", ++ (canctl0 & MSCAN_TIME) ? " time" :"", ++ (canctl0 & MSCAN_WUPE) ? " wupe" :"", ++ (canctl0 & MSCAN_SLPRQ) ? " slprq" :"", ++ (canctl0 & MSCAN_INITRQ)? " initrq":"" ); ++ canctl1 = in_8(®s->canctl1); ++ seq_printf(p, "canctl1 0x%02x%s%s%s%s%s%s%s\n", ++ canctl1, ++ (canctl1 & MSCAN_CANE) ? " cane" :"", ++ (canctl1 & MSCAN_CLKSRC)? " clksrc":"", ++ (canctl1 & MSCAN_LOOPB) ? " loopb" :"", ++ (canctl1 & MSCAN_LISTEN)? " listen":"", ++ (canctl1 & MSCAN_WUPM) ? " wump" :"", ++ (canctl1 & MSCAN_SLPAK) ? " slpak" :"", ++ (canctl1 & MSCAN_INITAK)? " initak":""); ++ seq_printf(p, MSCAN_REG_ARGS(canbtr0 )); ++ seq_printf(p, MSCAN_REG_ARGS(canbtr1 )); ++ seq_printf(p, MSCAN_REG_ARGS(canrflg )); ++ seq_printf(p, MSCAN_REG_ARGS(canrier )); ++ seq_printf(p, MSCAN_REG_ARGS(cantflg )); ++ seq_printf(p, MSCAN_REG_ARGS(cantier )); ++ seq_printf(p, MSCAN_REG_ARGS(cantarq )); ++ seq_printf(p, MSCAN_REG_ARGS(cantaak )); ++ seq_printf(p, MSCAN_REG_ARGS(cantbsel)); ++ seq_printf(p, MSCAN_REG_ARGS(canidac )); ++ seq_printf(p, MSCAN_REG_ARGS(canrxerr)); ++ seq_printf(p, MSCAN_REG_ARGS(cantxerr)); ++ seq_printf(p, MSCAN_REG_ARGS(canidar0)); ++ seq_printf(p, MSCAN_REG_ARGS(canidar1)); ++ seq_printf(p, MSCAN_REG_ARGS(canidar2)); ++ seq_printf(p, MSCAN_REG_ARGS(canidar3)); ++ seq_printf(p, MSCAN_REG_ARGS(canidmr0)); ++ seq_printf(p, MSCAN_REG_ARGS(canidmr1)); ++ seq_printf(p, MSCAN_REG_ARGS(canidmr2)); ++ seq_printf(p, MSCAN_REG_ARGS(canidmr3)); ++ seq_printf(p, MSCAN_REG_ARGS(canidar4)); ++ seq_printf(p, MSCAN_REG_ARGS(canidar5)); ++ seq_printf(p, MSCAN_REG_ARGS(canidar6)); ++ seq_printf(p, MSCAN_REG_ARGS(canidar7)); ++ seq_printf(p, MSCAN_REG_ARGS(canidmr4)); ++ seq_printf(p, MSCAN_REG_ARGS(canidmr5)); ++ seq_printf(p, MSCAN_REG_ARGS(canidmr6)); ++ seq_printf(p, MSCAN_REG_ARGS(canidmr7)); ++ ++#ifdef MPC5xxx_GPIO ++ seq_printf(p, "GPIO registers\n"); ++ port_config = in_be32(&gpio->port_config); ++ seq_printf(p, "port_config 0x%08x %s\n", port_config, ++ (port_config & 0x10000000 ? ++ "CAN1 on I2C1, CAN2 on TMR0/1 pins" : ++ (port_config & 0x70) == 0x10 ? ++ "CAN1/2 on PSC2 pins" : ++ "MSCAN1/2 not routed")); ++#endif ++ ++ return 0; ++} ++ ++static int rtcan_mscan_proc_regs_open(struct inode *inode, struct file *file) ++{ ++ return single_open(file, rtcan_mscan_proc_regs, PDE_DATA(inode)); ++} ++ ++static const struct file_operations rtcan_mscan_proc_regs_ops = { ++ .open = rtcan_mscan_proc_regs_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = single_release, ++}; ++ ++int rtcan_mscan_create_proc(struct rtcan_device* dev) ++{ ++ if (!dev->proc_root) ++ return -EINVAL; ++ ++ proc_create_data("registers", S_IFREG | S_IRUGO | S_IWUSR, ++ dev->proc_root, &rtcan_mscan_proc_regs_ops, dev); ++ return 0; ++} ++ ++void rtcan_mscan_remove_proc(struct rtcan_device* dev) ++{ ++ if (!dev->proc_root) ++ return; ++ ++ remove_proc_entry("registers", dev->proc_root); ++} ++ ++#else /* !CONFIG_XENO_DRIVERS_CAN_DEBUG */ ++ ++void rtcan_mscan_remove_proc(struct rtcan_device* dev) ++{ ++} ++ ++int rtcan_mscan_create_proc(struct rtcan_device* dev) ++{ ++ return 0; ++} ++#endif /* CONFIG_XENO_DRIVERS_CAN_DEBUG */ +--- linux/drivers/xenomai/can/rtcan_dev.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/can/rtcan_dev.c 2021-04-07 16:01:26.386635368 +0800 +@@ -0,0 +1,321 @@ ++/* ++ * Copyright (C) 2006 Wolfgang Grandegger ++ * ++ * Derived from RTnet project file stack/rtdev.c: ++ * ++ * Copyright (C) 1999 Lineo, Inc ++ * 1999, 2002 David A. Schleef ++ * 2002 Ulrich Marx ++ * 2003-2005 Jan Kiszka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++#include "rtcan_internal.h" ++#include "rtcan_dev.h" ++ ++ ++static struct rtcan_device *rtcan_devices[RTCAN_MAX_DEVICES]; ++static DEFINE_RTDM_LOCK(rtcan_devices_rt_lock); ++ ++static int rtcan_global_init_done; ++ ++DEFINE_SEMAPHORE(rtcan_devices_nrt_lock); ++ ++/* Spinlock for all reception lists and also for some members in ++ * struct rtcan_socket */ ++rtdm_lock_t rtcan_socket_lock; ++ ++/* Spinlock for all reception lists and also for some members in ++ * struct rtcan_socket */ ++rtdm_lock_t rtcan_recv_list_lock; ++ ++ ++ ++static inline void rtcan_global_init(void) ++{ ++ if (!rtcan_global_init_done) { ++ rtdm_lock_init(&rtcan_socket_lock); ++ rtdm_lock_init(&rtcan_recv_list_lock); ++ rtcan_global_init_done = 1; ++ } ++} ++ ++ ++static inline struct rtcan_device *__rtcan_dev_get_by_name(const char *name) ++{ ++ int i; ++ struct rtcan_device *dev; ++ ++ ++ for (i = 0; i < RTCAN_MAX_DEVICES; i++) { ++ dev = rtcan_devices[i]; ++ if ((dev != NULL) && (strncmp(dev->name, name, IFNAMSIZ) == 0)) ++ return dev; ++ } ++ return NULL; ++} ++ ++ ++struct rtcan_device *rtcan_dev_get_by_name(const char *name) ++{ ++ struct rtcan_device *dev; ++#ifdef RTCAN_USE_REFCOUNT ++ rtdm_lockctx_t context; ++#endif ++ ++ ++#ifdef RTCAN_USE_REFCOUNT ++ rtdm_lock_get_irqsave(&rtcan_devices_rt_lock, context); ++#endif ++ ++ dev = __rtcan_dev_get_by_name(name); ++ ++#ifdef RTCAN_USE_REFCOUNT ++ if (dev != NULL) ++ atomic_inc(&dev->refcount); ++ rtdm_lock_put_irqrestore(&rtcan_devices_rt_lock, context); ++#endif ++ ++ return dev; ++} ++ ++ ++static inline struct rtcan_device *__rtcan_dev_get_by_index(int ifindex) ++{ ++ return rtcan_devices[ifindex - 1]; ++} ++ ++ ++struct rtcan_device *rtcan_dev_get_by_index(int ifindex) ++{ ++ struct rtcan_device *dev; ++#ifdef RTCAN_USE_REFCOUNT ++ rtdm_lockctx_t context; ++#endif ++ ++ ++ if ((ifindex <= 0) || (ifindex > RTCAN_MAX_DEVICES)) ++ return NULL; ++ ++#ifdef RTCAN_USE_REFCOUNT ++ rtdm_lock_get_irqsave(&rtcan_devices_rt_lock, context); ++#endif ++ ++ dev = __rtcan_dev_get_by_index(ifindex); ++ ++#ifdef RTCAN_USE_REFCOUNT ++ if (dev != NULL) ++ atomic_inc(&dev->refcount); ++ rtdm_lock_put_irqrestore(&rtcan_devices_rt_lock, context); ++#endif ++ ++ return dev; ++} ++ ++ ++void rtcan_dev_alloc_name(struct rtcan_device *dev, const char *mask) ++{ ++ char buf[IFNAMSIZ]; ++ struct rtcan_device *tmp; ++ int i; ++ ++ ++ for (i = 0; i < RTCAN_MAX_DEVICES; i++) { ++ ksformat(buf, IFNAMSIZ, mask, i); ++ if ((tmp = rtcan_dev_get_by_name(buf)) == NULL) { ++ strncpy(dev->name, buf, IFNAMSIZ); ++ break; ++ } ++#ifdef RTCAN_USE_REFCOUNT ++ else ++ rtcan_dev_dereference(tmp); ++#endif ++ } ++} ++ ++ ++struct rtcan_device *rtcan_dev_alloc(int sizeof_priv, int sizeof_board_priv) ++{ ++ struct rtcan_device *dev; ++ struct rtcan_recv *recv_list_elem; ++ int alloc_size; ++ int j; ++ ++ ++ alloc_size = sizeof(*dev) + sizeof_priv + sizeof_board_priv; ++ ++ dev = (struct rtcan_device *)kmalloc(alloc_size, GFP_KERNEL); ++ if (dev == NULL) { ++ printk(KERN_ERR "rtcan: cannot allocate rtcan device\n"); ++ return NULL; ++ } ++ ++ memset(dev, 0, alloc_size); ++ ++ sema_init(&dev->nrt_lock, 1); ++ ++ rtdm_lock_init(&dev->device_lock); ++ ++ /* Init TX Semaphore, will be destroyed forthwith ++ * when setting stop mode */ ++ rtdm_sem_init(&dev->tx_sem, 0); ++#ifdef RTCAN_USE_REFCOUNT ++ atomic_set(&dev->refcount, 0); ++#endif ++ ++ /* Initialize receive list */ ++ dev->empty_list = recv_list_elem = dev->receivers; ++ for (j = 0; j < RTCAN_MAX_RECEIVERS - 1; j++, recv_list_elem++) ++ recv_list_elem->next = recv_list_elem + 1; ++ recv_list_elem->next = NULL; ++ dev->free_entries = RTCAN_MAX_RECEIVERS; ++ ++ if (sizeof_priv) ++ dev->priv = (void *)((unsigned long)dev + sizeof(*dev)); ++ if (sizeof_board_priv) ++ dev->board_priv = (void *)((unsigned long)dev + sizeof(*dev) + sizeof_priv); ++ ++ return dev; ++} ++ ++void rtcan_dev_free (struct rtcan_device *dev) ++{ ++ if (dev != NULL) { ++ rtdm_sem_destroy(&dev->tx_sem); ++ kfree(dev); ++ } ++} ++ ++ ++static inline int __rtcan_dev_new_index(void) ++{ ++ int i; ++ ++ ++ for (i = 0; i < RTCAN_MAX_DEVICES; i++) ++ if (rtcan_devices[i] == NULL) ++ return i+1; ++ ++ return -ENOMEM; ++} ++ ++ ++int rtcan_dev_register(struct rtcan_device *dev) ++{ ++ rtdm_lockctx_t context; ++ int ret; ++ ++ down(&rtcan_devices_nrt_lock); ++ ++ rtcan_global_init(); ++ ++ if ((ret = __rtcan_dev_new_index()) < 0) { ++ up(&rtcan_devices_nrt_lock); ++ return ret; ++ } ++ dev->ifindex = ret; ++ ++ if (strchr(dev->name,'%') != NULL) ++ rtcan_dev_alloc_name(dev, dev->name); ++ ++ if (__rtcan_dev_get_by_name(dev->name) != NULL) { ++ up(&rtcan_devices_nrt_lock); ++ return -EEXIST; ++ } ++ ++ rtdm_lock_get_irqsave(&rtcan_devices_rt_lock, context); ++ ++ rtcan_devices[dev->ifindex - 1] = dev; ++ ++ rtdm_lock_put_irqrestore(&rtcan_devices_rt_lock, context); ++ rtcan_dev_create_proc(dev); ++ ++ up(&rtcan_devices_nrt_lock); ++ ++ printk("rtcan: registered %s\n", dev->name); ++ ++ return 0; ++} ++ ++ ++int rtcan_dev_unregister(struct rtcan_device *dev) ++{ ++ rtdm_lockctx_t context; ++ ++ ++ RTCAN_ASSERT(dev->ifindex != 0, ++ printk("RTCAN: device %s/%p was not registered\n", ++ dev->name, dev); return -ENODEV;); ++ ++ /* If device is running, close it first. */ ++ if (CAN_STATE_OPERATING(dev->state)) ++ return -EBUSY; ++ ++ down(&rtcan_devices_nrt_lock); ++ ++ rtcan_dev_remove_proc(dev); ++ ++ rtdm_lock_get_irqsave(&rtcan_devices_rt_lock, context); ++ ++#ifdef RTCAN_USE_REFCOUNT ++ while (atomic_read(&dev->refcount) > 0) { ++ rtdm_lock_put_irqrestore(&rtcan_devices_rt_lock, context); ++ up(&rtcan_devices_nrt_lock); ++ ++ RTCAN_DBG("RTCAN: unregistering %s deferred (refcount = %d)\n", ++ dev->name, atomic_read(&dev->refcount)); ++ set_current_state(TASK_UNINTERRUPTIBLE); ++ schedule_timeout(1*HZ); /* wait a second */ ++ ++ down(&rtcan_devices_nrt_lock); ++ rtdm_lock_get_irqsave(&rtcan_devices_rt_lock, context); ++ } ++#endif ++ rtcan_devices[dev->ifindex - 1] = NULL; ++ ++ rtdm_lock_put_irqrestore(&rtcan_devices_rt_lock, context); ++ up(&rtcan_devices_nrt_lock); ++ ++#ifdef RTCAN_USE_REFCOUNT ++ RTCAN_ASSERT(atomic_read(&dev->refcount) == 0, ++ printk("RTCAN: dev reference counter < 0!\n");); ++#endif ++ ++ printk("RTCAN: unregistered %s\n", dev->name); ++ ++ return 0; ++} ++ ++ ++EXPORT_SYMBOL_GPL(rtcan_socket_lock); ++EXPORT_SYMBOL_GPL(rtcan_recv_list_lock); ++ ++EXPORT_SYMBOL_GPL(rtcan_dev_free); ++ ++EXPORT_SYMBOL_GPL(rtcan_dev_alloc); ++EXPORT_SYMBOL_GPL(rtcan_dev_alloc_name); ++ ++EXPORT_SYMBOL_GPL(rtcan_dev_register); ++EXPORT_SYMBOL_GPL(rtcan_dev_unregister); ++ ++EXPORT_SYMBOL_GPL(rtcan_dev_get_by_name); ++EXPORT_SYMBOL_GPL(rtcan_dev_get_by_index); +--- linux/drivers/xenomai/can/rtcan_socket.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/can/rtcan_socket.c 2021-04-07 16:01:26.381635375 +0800 +@@ -0,0 +1,105 @@ ++/* ++ * Copyright (C) 2005,2006 Sebastian Smolorz ++ * ++ * ++ * Copyright (C) 2006 Wolfgang Grandegger ++ * ++ * ++ * Based on stack/socket.c - sockets implementation for RTnet ++ * ++ * Copyright (C) 1999 Lineo, Inc ++ * 1999, 2002 David A. Schleef ++ * 2002 Ulrich Marx ++ * 2003-2005 Jan Kiszka ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#include "rtcan_socket.h" ++#include "rtcan_list.h" ++ ++ ++LIST_HEAD(rtcan_socket_list); ++ ++void rtcan_socket_init(struct rtdm_fd *fd) ++{ ++ struct rtcan_socket *sock = rtdm_fd_to_private(fd); ++ rtdm_lockctx_t lock_ctx; ++ ++ ++ rtdm_sem_init(&sock->recv_sem, 0); ++ ++ sock->recv_head = 0; ++ sock->recv_tail = 0; ++ atomic_set(&sock->ifindex, 0); ++ sock->flistlen = RTCAN_SOCK_UNBOUND; ++ sock->flist = NULL; ++ sock->err_mask = 0; ++ sock->rx_buf_full = 0; ++ sock->flags = 0; ++#ifdef CONFIG_XENO_DRIVERS_CAN_LOOPBACK ++ sock->loopback = 1; ++#endif ++ ++ sock->tx_timeout = RTDM_TIMEOUT_INFINITE; ++ sock->rx_timeout = RTDM_TIMEOUT_INFINITE; ++ ++ INIT_LIST_HEAD(&sock->tx_wait_head); ++ ++ rtdm_lock_get_irqsave(&rtcan_recv_list_lock, lock_ctx); ++ list_add(&sock->socket_list, &rtcan_socket_list); ++ rtdm_lock_put_irqrestore(&rtcan_recv_list_lock, lock_ctx); ++} ++ ++ ++void rtcan_socket_cleanup(struct rtdm_fd *fd) ++{ ++ struct rtcan_socket *sock = rtdm_fd_to_private(fd); ++ struct tx_wait_queue *tx_waiting; ++ rtdm_lockctx_t lock_ctx; ++ int tx_list_empty; ++ ++ /* Wake up sleeping senders. This is re-entrant-safe. */ ++ do { ++ cobalt_atomic_enter(lock_ctx); ++ /* Is someone there? */ ++ if (list_empty(&sock->tx_wait_head)) ++ tx_list_empty = 1; ++ else { ++ tx_list_empty = 0; ++ ++ /* Get next entry pointing to a waiting task */ ++ tx_waiting = list_entry(sock->tx_wait_head.next, ++ struct tx_wait_queue, tx_wait_list); ++ ++ /* Remove it from list */ ++ list_del_init(&tx_waiting->tx_wait_list); ++ ++ /* Wake task up (atomic section is left implicitly) */ ++ rtdm_task_unblock(tx_waiting->rt_task); ++ } ++ cobalt_atomic_leave(lock_ctx); ++ } while (!tx_list_empty); ++ ++ rtdm_sem_destroy(&sock->recv_sem); ++ ++ rtdm_lock_get_irqsave(&rtcan_recv_list_lock, lock_ctx); ++ if (sock->socket_list.next) { ++ list_del(&sock->socket_list); ++ sock->socket_list.next = NULL; ++ } ++ rtdm_lock_put_irqrestore(&rtcan_recv_list_lock, lock_ctx); ++} +--- linux/drivers/xenomai/can/rtcan_virt.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/can/rtcan_virt.c 2021-04-07 16:01:26.376635382 +0800 +@@ -0,0 +1,198 @@ ++/* ++ * Copyright (C) 2006 Jan Kiszka ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++ ++#include ++#include ++#include ++#include "rtcan_dev.h" ++#include "rtcan_raw.h" ++ ++#define RTCAN_DEV_NAME "rtcan%d" ++#define RTCAN_DRV_NAME "VIRT" ++#define RTCAN_MAX_VIRT_DEVS 8 ++ ++#define VIRT_TX_BUFS 1 ++ ++static char *virt_ctlr_name = ""; ++static char *virt_board_name = ""; ++ ++MODULE_AUTHOR("Jan Kiszka "); ++MODULE_DESCRIPTION("Virtual RT-Socket-CAN driver"); ++MODULE_LICENSE("GPL"); ++ ++static unsigned int devices = 2; ++ ++module_param(devices, uint, 0400); ++MODULE_PARM_DESC(devices, "Number of devices on the virtual bus"); ++ ++static struct rtcan_device *rtcan_virt_devs[RTCAN_MAX_VIRT_DEVS]; ++ ++ ++static int rtcan_virt_start_xmit(struct rtcan_device *tx_dev, ++ can_frame_t *tx_frame) ++{ ++ int i; ++ struct rtcan_device *rx_dev; ++ struct rtcan_skb skb; ++ struct rtcan_rb_frame *rx_frame = &skb.rb_frame; ++ rtdm_lockctx_t lock_ctx; ++ ++ /* we can transmit immediately again */ ++ rtdm_sem_up(&tx_dev->tx_sem); ++ ++ skb.rb_frame_size = EMPTY_RB_FRAME_SIZE; ++ ++ rx_frame->can_dlc = tx_frame->can_dlc; ++ rx_frame->can_id = tx_frame->can_id; ++ ++ if (!(tx_frame->can_id & CAN_RTR_FLAG)) { ++ memcpy(rx_frame->data, tx_frame->data, tx_frame->can_dlc); ++ skb.rb_frame_size += tx_frame->can_dlc; ++ } ++ ++ rtdm_lock_get_irqsave(&rtcan_recv_list_lock, lock_ctx); ++ rtdm_lock_get(&rtcan_socket_lock); ++ ++ ++ /* Deliver to all other devices on the virtual bus */ ++ for (i = 0; i < devices; i++) { ++ rx_dev = rtcan_virt_devs[i]; ++ if (rx_dev->state == CAN_STATE_ACTIVE) { ++ if (tx_dev != rx_dev) { ++ rx_frame->can_ifindex = rx_dev->ifindex; ++ rtcan_rcv(rx_dev, &skb); ++ } else if (rtcan_loopback_pending(tx_dev)) ++ rtcan_loopback(tx_dev); ++ } ++ } ++ rtdm_lock_put(&rtcan_socket_lock); ++ rtdm_lock_put_irqrestore(&rtcan_recv_list_lock, lock_ctx); ++ ++ return 0; ++} ++ ++ ++static int rtcan_virt_set_mode(struct rtcan_device *dev, can_mode_t mode, ++ rtdm_lockctx_t *lock_ctx) ++{ ++ int err = 0; ++ ++ switch (mode) { ++ case CAN_MODE_STOP: ++ dev->state = CAN_STATE_STOPPED; ++ /* Wake up waiting senders */ ++ rtdm_sem_destroy(&dev->tx_sem); ++ break; ++ ++ case CAN_MODE_START: ++ rtdm_sem_init(&dev->tx_sem, VIRT_TX_BUFS); ++ dev->state = CAN_STATE_ACTIVE; ++ break; ++ ++ default: ++ err = -EOPNOTSUPP; ++ } ++ ++ return err; ++} ++ ++ ++static int __init rtcan_virt_init_one(int idx) ++{ ++ struct rtcan_device *dev; ++ int err; ++ ++ if ((dev = rtcan_dev_alloc(0, 0)) == NULL) ++ return -ENOMEM; ++ ++ dev->ctrl_name = virt_ctlr_name; ++ dev->board_name = virt_board_name; ++ ++ rtcan_virt_set_mode(dev, CAN_MODE_STOP, NULL); ++ ++ strncpy(dev->name, RTCAN_DEV_NAME, IFNAMSIZ); ++ ++ dev->hard_start_xmit = rtcan_virt_start_xmit; ++ dev->do_set_mode = rtcan_virt_set_mode; ++ ++ /* Register RTDM device */ ++ err = rtcan_dev_register(dev); ++ if (err) { ++ printk(KERN_ERR "ERROR %d while trying to register RTCAN device!\n", err); ++ goto error_out; ++ } ++ ++ /* Remember initialized devices */ ++ rtcan_virt_devs[idx] = dev; ++ ++ printk("%s: %s driver loaded\n", dev->name, RTCAN_DRV_NAME); ++ ++ return 0; ++ ++ error_out: ++ rtcan_dev_free(dev); ++ return err; ++} ++ ++ ++/** Init module */ ++static int __init rtcan_virt_init(void) ++{ ++ int i, err = 0; ++ ++ if (!rtdm_available()) ++ return -ENOSYS; ++ ++ for (i = 0; i < devices; i++) { ++ err = rtcan_virt_init_one(i); ++ if (err) { ++ while (--i >= 0) { ++ struct rtcan_device *dev = rtcan_virt_devs[i]; ++ ++ rtcan_dev_unregister(dev); ++ rtcan_dev_free(dev); ++ } ++ break; ++ } ++ } ++ ++ return err; ++} ++ ++ ++/** Cleanup module */ ++static void __exit rtcan_virt_exit(void) ++{ ++ int i; ++ struct rtcan_device *dev; ++ ++ for (i = 0; i < devices; i++) { ++ dev = rtcan_virt_devs[i]; ++ ++ printk("Unloading %s device %s\n", RTCAN_DRV_NAME, dev->name); ++ ++ rtcan_virt_set_mode(dev, CAN_MODE_STOP, NULL); ++ rtcan_dev_unregister(dev); ++ rtcan_dev_free(dev); ++ } ++} ++ ++module_init(rtcan_virt_init); ++module_exit(rtcan_virt_exit); +--- linux/drivers/xenomai/can/rtcan_version.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/can/rtcan_version.h 2021-04-07 16:01:26.371635389 +0800 +@@ -0,0 +1,27 @@ ++/* ++ * Copyright (C) 2006 Wolfgang Grandegger ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#ifndef __RTCAN_VERSION_H_ ++#define __RTCAN_VERSION_H_ ++ ++#define RTCAN_MAJOR_VER 0 ++#define RTCAN_MINOR_VER 90 ++#define RTCAN_BUGFIX_VER 2 ++ ++#endif /* __RTCAN_VERSION_H_ */ +--- linux/drivers/xenomai/can/sja1000/rtcan_sja1000.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/can/sja1000/rtcan_sja1000.h 2021-04-07 16:01:26.366635396 +0800 +@@ -0,0 +1,48 @@ ++/* ++ * Copyright (C) 2006, Wolfgang Grandegger ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#ifndef __SJA1000_H_ ++#define __SJA1000_H_ ++ ++#include ++ ++struct rtcan_sja1000 { ++ unsigned char (*read_reg)(struct rtcan_device *dev, int off); ++ void (*write_reg)(struct rtcan_device *dev, int off, unsigned char val); ++ void (*irq_ack)(struct rtcan_device *dev); ++ unsigned short irq_num; ++ unsigned short irq_flags; ++ unsigned char ocr; ++ unsigned char cdr; ++ char bus_err_on; ++}; ++ ++#ifdef CONFIG_FS_PROCFS ++int rtcan_sja_create_proc(struct rtcan_device* dev); ++void rtcan_sja_remove_proc(struct rtcan_device* dev); ++#else ++static inline int rtcan_sja_create_proc(struct rtcan_device* dev) ++{ return 0; } ++static inline void rtcan_sja_remove_proc(struct rtcan_device* dev) { } ++#endif ++int rtcan_sja1000_register(struct rtcan_device *dev); ++void rtcan_sja1000_unregister(struct rtcan_device *dev); ++ ++ ++#endif /* __SJA1000_H_ */ +--- linux/drivers/xenomai/can/sja1000/rtcan_plx_pci.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/can/sja1000/rtcan_plx_pci.c 2021-04-07 16:01:26.361635404 +0800 +@@ -0,0 +1,600 @@ ++/* ++ * Copyright (C) 2008-2010 Pavel Cheblakov ++ * ++ * Derived from the ems_pci.c driver: ++ * Copyright (C) 2007 Wolfgang Grandegger ++ * Copyright (C) 2008 Markus Plessing ++ * Copyright (C) 2008 Sebastian Haas ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the version 2 of the GNU General Public License ++ * as published by the Free Software Foundation ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#include ++#include ++#include ++#include ++ ++#include ++ ++/* CAN device profile */ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#define RTCAN_DRV_NAME "rt_sja1000_plx_pci" ++#define RTCAN_DEV_NAME "rtcan%d" ++ ++MODULE_AUTHOR("Pavel Cheblakov "); ++MODULE_DESCRIPTION("RTCAN driver for PLX90xx PCI-bridge cards with " ++ "the SJA1000 chips"); ++MODULE_SUPPORTED_DEVICE("Adlink PCI-7841/cPCI-7841, " ++ "Adlink PCI-7841/cPCI-7841 SE, " ++ "Marathon CAN-bus-PCI, " ++ "TEWS TECHNOLOGIES TPMC810, " ++ "esd CAN-PCI/CPCI/PCI104/200, " ++ "esd CAN-PCI/PMC/266, " ++ "esd CAN-PCIe/2000") ++MODULE_LICENSE("GPL v2"); ++ ++#define PLX_PCI_MAX_CHAN 2 ++ ++struct plx_pci_card { ++ int channels; /* detected channels count */ ++ struct rtcan_device *rtcan_dev[PLX_PCI_MAX_CHAN]; ++ void __iomem *conf_addr; ++ ++ /* Pointer to device-dependent reset function */ ++ void (*reset_func)(struct pci_dev *pdev); ++}; ++ ++#define PLX_PCI_CAN_CLOCK (16000000 / 2) ++ ++/* PLX9030/9050/9052 registers */ ++#define PLX_INTCSR 0x4c /* Interrupt Control/Status */ ++#define PLX_CNTRL 0x50 /* User I/O, Direct Slave Response, ++ * Serial EEPROM, and Initialization ++ * Control register ++ */ ++ ++#define PLX_LINT1_EN 0x1 /* Local interrupt 1 enable */ ++#define PLX_LINT2_EN (1 << 3) /* Local interrupt 2 enable */ ++#define PLX_PCI_INT_EN (1 << 6) /* PCI Interrupt Enable */ ++#define PLX_PCI_RESET (1 << 30) /* PCI Adapter Software Reset */ ++ ++/* PLX9056 registers */ ++#define PLX9056_INTCSR 0x68 /* Interrupt Control/Status */ ++#define PLX9056_CNTRL 0x6c /* Control / Software Reset */ ++ ++#define PLX9056_LINTI (1 << 11) ++#define PLX9056_PCI_INT_EN (1 << 8) ++#define PLX9056_PCI_RCR (1 << 29) /* Read Configuration Registers */ ++ ++/* ++ * The board configuration is probably following: ++ * RX1 is connected to ground. ++ * TX1 is not connected. ++ * CLKO is not connected. ++ * Setting the OCR register to 0xDA is a good idea. ++ * This means normal output mode, push-pull and the correct polarity. ++ */ ++#define PLX_PCI_OCR (SJA_OCR_MODE_NORMAL | SJA_OCR_TX0_PUSHPULL | SJA_OCR_TX1_PUSHPULL) ++ ++/* ++ * In the CDR register, you should set CBP to 1. ++ * You will probably also want to set the clock divider value to 7 ++ * (meaning direct oscillator output) because the second SJA1000 chip ++ * is driven by the first one CLKOUT output. ++ */ ++#define PLX_PCI_CDR (SJA_CDR_CBP | SJA_CDR_CAN_MODE) ++ ++/* SJA1000 Control Register in the BasicCAN Mode */ ++#define SJA_CR 0x00 ++ ++/* States of some SJA1000 registers after hardware reset in the BasicCAN mode*/ ++#define REG_CR_BASICCAN_INITIAL 0x21 ++#define REG_CR_BASICCAN_INITIAL_MASK 0xa1 ++#define REG_SR_BASICCAN_INITIAL 0x0c ++#define REG_IR_BASICCAN_INITIAL 0xe0 ++ ++/* States of some SJA1000 registers after hardware reset in the PeliCAN mode*/ ++#define REG_MOD_PELICAN_INITIAL 0x01 ++#define REG_SR_PELICAN_INITIAL 0x3c ++#define REG_IR_PELICAN_INITIAL 0x00 ++ ++#define ADLINK_PCI_VENDOR_ID 0x144A ++#define ADLINK_PCI_DEVICE_ID 0x7841 ++ ++#define ESD_PCI_SUB_SYS_ID_PCI200 0x0004 ++#define ESD_PCI_SUB_SYS_ID_PCI266 0x0009 ++#define ESD_PCI_SUB_SYS_ID_PMC266 0x000e ++#define ESD_PCI_SUB_SYS_ID_CPCI200 0x010b ++#define ESD_PCI_SUB_SYS_ID_PCIE2000 0x0200 ++#define ESD_PCI_SUB_SYS_ID_PCI104200 0x0501 ++ ++#define MARATHON_PCI_DEVICE_ID 0x2715 ++ ++#define TEWS_PCI_VENDOR_ID 0x1498 ++#define TEWS_PCI_DEVICE_ID_TMPC810 0x032A ++ ++static void plx_pci_reset_common(struct pci_dev *pdev); ++static void plx_pci_reset_marathon(struct pci_dev *pdev); ++static void plx9056_pci_reset_common(struct pci_dev *pdev); ++ ++struct plx_pci_channel_map { ++ u32 bar; ++ u32 offset; ++ u32 size; /* 0x00 - auto, e.g. length of entire bar */ ++}; ++ ++struct plx_pci_card_info { ++ const char *name; ++ int channel_count; ++ u32 can_clock; ++ u8 ocr; /* output control register */ ++ u8 cdr; /* clock divider register */ ++ ++ /* Parameters for mapping local configuration space */ ++ struct plx_pci_channel_map conf_map; ++ ++ /* Parameters for mapping the SJA1000 chips */ ++ struct plx_pci_channel_map chan_map_tbl[PLX_PCI_MAX_CHAN]; ++ ++ /* Pointer to device-dependent reset function */ ++ void (*reset_func)(struct pci_dev *pdev); ++}; ++ ++static struct plx_pci_card_info plx_pci_card_info_adlink = { ++ "Adlink PCI-7841/cPCI-7841", 2, ++ PLX_PCI_CAN_CLOCK, PLX_PCI_OCR, PLX_PCI_CDR, ++ {1, 0x00, 0x00}, { {2, 0x00, 0x80}, {2, 0x80, 0x80} }, ++ &plx_pci_reset_common ++ /* based on PLX9052 */ ++}; ++ ++static struct plx_pci_card_info plx_pci_card_info_adlink_se = { ++ "Adlink PCI-7841/cPCI-7841 SE", 2, ++ PLX_PCI_CAN_CLOCK, PLX_PCI_OCR, PLX_PCI_CDR, ++ {0, 0x00, 0x00}, { {2, 0x00, 0x80}, {2, 0x80, 0x80} }, ++ &plx_pci_reset_common ++ /* based on PLX9052 */ ++}; ++ ++static struct plx_pci_card_info plx_pci_card_info_esd200 = { ++ "esd CAN-PCI/CPCI/PCI104/200", 2, ++ PLX_PCI_CAN_CLOCK, PLX_PCI_OCR, PLX_PCI_CDR, ++ {0, 0x00, 0x00}, { {2, 0x00, 0x80}, {2, 0x100, 0x80} }, ++ &plx_pci_reset_common ++ /* based on PLX9030/9050 */ ++}; ++ ++static struct plx_pci_card_info plx_pci_card_info_esd266 = { ++ "esd CAN-PCI/PMC/266", 2, ++ PLX_PCI_CAN_CLOCK, PLX_PCI_OCR, PLX_PCI_CDR, ++ {0, 0x00, 0x00}, { {2, 0x00, 0x80}, {2, 0x100, 0x80} }, ++ &plx9056_pci_reset_common ++ /* based on PLX9056 */ ++}; ++ ++static struct plx_pci_card_info plx_pci_card_info_esd2000 = { ++ "esd CAN-PCIe/2000", 2, ++ PLX_PCI_CAN_CLOCK, PLX_PCI_OCR, PLX_PCI_CDR, ++ {0, 0x00, 0x00}, { {2, 0x00, 0x80}, {2, 0x100, 0x80} }, ++ &plx9056_pci_reset_common ++ /* based on PEX8311 */ ++}; ++ ++static struct plx_pci_card_info plx_pci_card_info_marathon = { ++ "Marathon CAN-bus-PCI", 2, ++ PLX_PCI_CAN_CLOCK, PLX_PCI_OCR, PLX_PCI_CDR, ++ {0, 0x00, 0x00}, { {2, 0x00, 0x00}, {4, 0x00, 0x00} }, ++ &plx_pci_reset_marathon ++ /* based on PLX9052 */ ++}; ++ ++static struct plx_pci_card_info plx_pci_card_info_tews = { ++ "TEWS TECHNOLOGIES TPMC810", 2, ++ PLX_PCI_CAN_CLOCK, PLX_PCI_OCR, PLX_PCI_CDR, ++ {0, 0x00, 0x00}, { {2, 0x000, 0x80}, {2, 0x100, 0x80} }, ++ &plx_pci_reset_common ++ /* based on PLX9030 */ ++}; ++ ++static const struct pci_device_id plx_pci_tbl[] = { ++ { ++ /* Adlink PCI-7841/cPCI-7841 */ ++ ADLINK_PCI_VENDOR_ID, ADLINK_PCI_DEVICE_ID, ++ PCI_ANY_ID, PCI_ANY_ID, ++ PCI_CLASS_NETWORK_OTHER << 8, ~0, ++ (kernel_ulong_t)&plx_pci_card_info_adlink ++ }, ++ { ++ /* Adlink PCI-7841/cPCI-7841 SE */ ++ ADLINK_PCI_VENDOR_ID, ADLINK_PCI_DEVICE_ID, ++ PCI_ANY_ID, PCI_ANY_ID, ++ PCI_CLASS_COMMUNICATION_OTHER << 8, ~0, ++ (kernel_ulong_t)&plx_pci_card_info_adlink_se ++ }, ++ { ++ /* esd CAN-PCI/200 */ ++ PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_9050, ++ PCI_VENDOR_ID_ESDGMBH, ESD_PCI_SUB_SYS_ID_PCI200, ++ 0, 0, ++ (kernel_ulong_t)&plx_pci_card_info_esd200 ++ }, ++ { ++ /* esd CAN-CPCI/200 */ ++ PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_9030, ++ PCI_VENDOR_ID_ESDGMBH, ESD_PCI_SUB_SYS_ID_CPCI200, ++ 0, 0, ++ (kernel_ulong_t)&plx_pci_card_info_esd200 ++ }, ++ { ++ /* esd CAN-PCI104/200 */ ++ PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_9030, ++ PCI_VENDOR_ID_ESDGMBH, ESD_PCI_SUB_SYS_ID_PCI104200, ++ 0, 0, ++ (kernel_ulong_t)&plx_pci_card_info_esd200 ++ }, ++ { ++ /* esd CAN-PCI/266 */ ++ PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_9056, ++ PCI_VENDOR_ID_ESDGMBH, ESD_PCI_SUB_SYS_ID_PCI266, ++ 0, 0, ++ (kernel_ulong_t)&plx_pci_card_info_esd266 ++ }, ++ { ++ /* esd CAN-PMC/266 */ ++ PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_9056, ++ PCI_VENDOR_ID_ESDGMBH, ESD_PCI_SUB_SYS_ID_PMC266, ++ 0, 0, ++ (kernel_ulong_t)&plx_pci_card_info_esd266 ++ }, ++ { ++ /* esd CAN-PCIE/2000 */ ++ PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_9056, ++ PCI_VENDOR_ID_ESDGMBH, ESD_PCI_SUB_SYS_ID_PCIE2000, ++ 0, 0, ++ (kernel_ulong_t)&plx_pci_card_info_esd2000 ++ }, ++ { ++ /* Marathon CAN-bus-PCI card */ ++ PCI_VENDOR_ID_PLX, MARATHON_PCI_DEVICE_ID, ++ PCI_ANY_ID, PCI_ANY_ID, ++ 0, 0, ++ (kernel_ulong_t)&plx_pci_card_info_marathon ++ }, ++ { ++ /* TEWS TECHNOLOGIES TPMC810 card */ ++ TEWS_PCI_VENDOR_ID, TEWS_PCI_DEVICE_ID_TMPC810, ++ PCI_ANY_ID, PCI_ANY_ID, ++ 0, 0, ++ (kernel_ulong_t)&plx_pci_card_info_tews ++ }, ++ { 0,} ++}; ++MODULE_DEVICE_TABLE(pci, plx_pci_tbl); ++ ++static u8 plx_pci_read_reg(struct rtcan_device *dev, int port) ++{ ++ return ioread8((void* __iomem)dev->base_addr + port); ++} ++ ++static void plx_pci_write_reg(struct rtcan_device *dev, int port, u8 val) ++{ ++ iowrite8(val, (void* __iomem)dev->base_addr + port); ++} ++ ++/* ++ * Check if a CAN controller is present at the specified location ++ * by trying to switch 'em from the Basic mode into the PeliCAN mode. ++ * Also check states of some registers in reset mode. ++ */ ++static inline int plx_pci_check_sja1000(struct rtcan_device *dev) ++{ ++ int flag = 0; ++ ++ struct rtcan_sja1000 *chip = (struct rtcan_sja1000 *)dev->priv; ++ ++ /* ++ * Check registers after hardware reset (the Basic mode) ++ * See states on p. 10 of the Datasheet. ++ */ ++ if ((chip->read_reg(dev, SJA_CR) & REG_CR_BASICCAN_INITIAL_MASK) == ++ REG_CR_BASICCAN_INITIAL && ++ (chip->read_reg(dev, SJA_SR) == REG_SR_BASICCAN_INITIAL) && ++ (chip->read_reg(dev, SJA_IR) == REG_IR_BASICCAN_INITIAL)) ++ flag = 1; ++ ++ /* Bring the SJA1000 into the PeliCAN mode*/ ++ chip->write_reg(dev, SJA_CDR, SJA_CDR_CAN_MODE); ++ ++ /* ++ * Check registers after reset in the PeliCAN mode. ++ * See states on p. 23 of the Datasheet. ++ */ ++ if (chip->read_reg(dev, SJA_MOD) == REG_MOD_PELICAN_INITIAL && ++ chip->read_reg(dev, SJA_SR) == REG_SR_PELICAN_INITIAL && ++ chip->read_reg(dev, SJA_IR) == REG_IR_PELICAN_INITIAL) ++ return flag; ++ ++ return 0; ++} ++ ++/* ++ * PLX9030/50/52 software reset ++ * Also LRESET# asserts and brings to reset device on the Local Bus (if wired). ++ * For most cards it's enough for reset the SJA1000 chips. ++ */ ++static void plx_pci_reset_common(struct pci_dev *pdev) ++{ ++ struct plx_pci_card *card = pci_get_drvdata(pdev); ++ u32 cntrl; ++ ++ cntrl = ioread32(card->conf_addr + PLX_CNTRL); ++ cntrl |= PLX_PCI_RESET; ++ iowrite32(cntrl, card->conf_addr + PLX_CNTRL); ++ udelay(100); ++ cntrl ^= PLX_PCI_RESET; ++ iowrite32(cntrl, card->conf_addr + PLX_CNTRL); ++}; ++ ++/* ++ * PLX9056 software reset ++ * Assert LRESET# and reset device(s) on the Local Bus (if wired). ++ */ ++static void plx9056_pci_reset_common(struct pci_dev *pdev) ++{ ++ struct plx_pci_card *card = pci_get_drvdata(pdev); ++ u32 cntrl; ++ ++ /* issue a local bus reset */ ++ cntrl = ioread32(card->conf_addr + PLX9056_CNTRL); ++ cntrl |= PLX_PCI_RESET; ++ iowrite32(cntrl, card->conf_addr + PLX9056_CNTRL); ++ udelay(100); ++ cntrl ^= PLX_PCI_RESET; ++ iowrite32(cntrl, card->conf_addr + PLX9056_CNTRL); ++ ++ /* reload local configuration from EEPROM */ ++ cntrl |= PLX9056_PCI_RCR; ++ iowrite32(cntrl, card->conf_addr + PLX9056_CNTRL); ++ ++ /* ++ * There is no safe way to poll for the end ++ * of reconfiguration process. Waiting for 10ms ++ * is safe. ++ */ ++ mdelay(10); ++ ++ cntrl ^= PLX9056_PCI_RCR; ++ iowrite32(cntrl, card->conf_addr + PLX9056_CNTRL); ++}; ++ ++/* Special reset function for Marathon card */ ++static void plx_pci_reset_marathon(struct pci_dev *pdev) ++{ ++ void __iomem *reset_addr; ++ int i; ++ int reset_bar[2] = {3, 5}; ++ ++ plx_pci_reset_common(pdev); ++ ++ for (i = 0; i < 2; i++) { ++ reset_addr = pci_iomap(pdev, reset_bar[i], 0); ++ if (!reset_addr) { ++ dev_err(&pdev->dev, "Failed to remap reset " ++ "space %d (BAR%d)\n", i, reset_bar[i]); ++ } else { ++ /* reset the SJA1000 chip */ ++ iowrite8(0x1, reset_addr); ++ udelay(100); ++ pci_iounmap(pdev, reset_addr); ++ } ++ } ++} ++ ++static void plx_pci_del_card(struct pci_dev *pdev) ++{ ++ struct plx_pci_card *card = pci_get_drvdata(pdev); ++ struct rtcan_device *dev; ++ int i = 0; ++ ++ for (i = 0; i < card->channels; i++) { ++ dev = card->rtcan_dev[i]; ++ if (!dev) ++ continue; ++ ++ dev_info(&pdev->dev, "Removing %s\n", dev->name); ++ rtcan_sja1000_unregister(dev); ++ if (dev->base_addr) ++ pci_iounmap(pdev, (void* __iomem)dev->base_addr); ++ rtcan_dev_free(dev); ++ } ++ ++ card->reset_func(pdev); ++ ++ /* ++ * Disable interrupts from PCI-card and disable local ++ * interrupts ++ */ ++ if (pdev->device != PCI_DEVICE_ID_PLX_9056) ++ iowrite32(0x0, card->conf_addr + PLX_INTCSR); ++ else ++ iowrite32(0x0, card->conf_addr + PLX9056_INTCSR); ++ ++ if (card->conf_addr) ++ pci_iounmap(pdev, card->conf_addr); ++ ++ kfree(card); ++ ++ pci_disable_device(pdev); ++ pci_set_drvdata(pdev, NULL); ++} ++ ++/* ++ * Probe PLX90xx based device for the SJA1000 chips and register each ++ * available CAN channel to SJA1000 Socket-CAN subsystem. ++ */ ++static int plx_pci_add_card(struct pci_dev *pdev, ++ const struct pci_device_id *ent) ++{ ++ struct rtcan_sja1000 *chip; ++ struct rtcan_device *dev; ++ struct plx_pci_card *card; ++ struct plx_pci_card_info *ci; ++ int err, i; ++ u32 val; ++ void __iomem *addr; ++ ++ if (!rtdm_available()) ++ return -ENODEV; ++ ++ ci = (struct plx_pci_card_info *)ent->driver_data; ++ ++ if (pci_enable_device(pdev) < 0) { ++ dev_err(&pdev->dev, "Failed to enable PCI device\n"); ++ return -ENODEV; ++ } ++ ++ dev_info(&pdev->dev, "Detected \"%s\" card at slot #%i\n", ++ ci->name, PCI_SLOT(pdev->devfn)); ++ ++ /* Allocate card structures to hold addresses, ... */ ++ card = kzalloc(sizeof(*card), GFP_KERNEL); ++ if (!card) { ++ dev_err(&pdev->dev, "Unable to allocate memory\n"); ++ pci_disable_device(pdev); ++ return -ENOMEM; ++ } ++ ++ pci_set_drvdata(pdev, card); ++ ++ card->channels = 0; ++ ++ /* Remap PLX90xx configuration space */ ++ addr = pci_iomap(pdev, ci->conf_map.bar, ci->conf_map.size); ++ if (!addr) { ++ err = -ENOMEM; ++ dev_err(&pdev->dev, "Failed to remap configuration space " ++ "(BAR%d)\n", ci->conf_map.bar); ++ goto failure_cleanup; ++ } ++ card->conf_addr = addr + ci->conf_map.offset; ++ ++ ci->reset_func(pdev); ++ card->reset_func = ci->reset_func; ++ ++ /* Detect available channels */ ++ for (i = 0; i < ci->channel_count; i++) { ++ struct plx_pci_channel_map *cm = &ci->chan_map_tbl[i]; ++ ++ dev = rtcan_dev_alloc(sizeof(struct rtcan_sja1000), ++ sizeof(struct plx_pci_card)); ++ if (!dev) { ++ err = -ENOMEM; ++ goto failure_cleanup; ++ } ++ ++ strncpy(dev->name, RTCAN_DEV_NAME, IFNAMSIZ); ++ dev->board_name = (char *)ci->name; ++ ++ card->rtcan_dev[i] = dev; ++ chip = card->rtcan_dev[i]->priv; ++ chip->irq_flags = RTDM_IRQTYPE_SHARED; ++ chip->irq_num = pdev->irq; ++ ++ /* ++ * Remap IO space of the SJA1000 chips ++ * This is device-dependent mapping ++ */ ++ addr = pci_iomap(pdev, cm->bar, cm->size); ++ if (!addr) { ++ err = -ENOMEM; ++ dev_err(&pdev->dev, "Failed to remap BAR%d\n", cm->bar); ++ goto failure_cleanup; ++ } ++ ++ dev->base_addr = (unsigned long)(addr + cm->offset); ++ chip->read_reg = plx_pci_read_reg; ++ chip->write_reg = plx_pci_write_reg; ++ ++ /* Check if channel is present */ ++ if (plx_pci_check_sja1000(dev)) { ++ dev->can_sys_clock = ci->can_clock; ++ chip->ocr = ci->ocr; ++ chip->cdr = ci->cdr; ++ ++ /* Register SJA1000 device */ ++ err = rtcan_sja1000_register(dev); ++ if (err) { ++ dev_err(&pdev->dev, "Registering device failed " ++ "(err=%d)\n", err); ++ rtcan_dev_free(dev); ++ goto failure_cleanup; ++ } ++ ++ card->channels++; ++ ++ dev_info(&pdev->dev, "Channel #%d at 0x%p, irq %d " ++ "registered as %s\n", i + 1, ++ (void* __iomem)dev->base_addr, chip->irq_num, ++ dev->name); ++ } else { ++ dev_err(&pdev->dev, "Channel #%d not detected\n", ++ i + 1); ++ rtcan_dev_free(dev); ++ } ++ } ++ ++ if (!card->channels) { ++ err = -ENODEV; ++ goto failure_cleanup; ++ } ++ ++ /* ++ * Enable interrupts from PCI-card (PLX90xx) and enable Local_1, ++ * Local_2 interrupts from the SJA1000 chips ++ */ ++ if (pdev->device != PCI_DEVICE_ID_PLX_9056) { ++ val = ioread32(card->conf_addr + PLX_INTCSR); ++ if (pdev->subsystem_vendor == PCI_VENDOR_ID_ESDGMBH) ++ val |= PLX_LINT1_EN | PLX_PCI_INT_EN; ++ else ++ val |= PLX_LINT1_EN | PLX_LINT2_EN | PLX_PCI_INT_EN; ++ iowrite32(val, card->conf_addr + PLX_INTCSR); ++ } else { ++ iowrite32(PLX9056_LINTI | PLX9056_PCI_INT_EN, ++ card->conf_addr + PLX9056_INTCSR); ++ } ++ return 0; ++ ++failure_cleanup: ++ dev_err(&pdev->dev, "Error: %d. Cleaning Up.\n", err); ++ ++ plx_pci_del_card(pdev); ++ ++ return err; ++} ++ ++static struct pci_driver plx_pci_driver = { ++ .name = RTCAN_DRV_NAME, ++ .id_table = plx_pci_tbl, ++ .probe = plx_pci_add_card, ++ .remove = plx_pci_del_card, ++}; ++ ++module_pci_driver(plx_pci_driver); +--- linux/drivers/xenomai/can/sja1000/rtcan_peak_pci.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/can/sja1000/rtcan_peak_pci.c 2021-04-07 16:01:26.356635411 +0800 +@@ -0,0 +1,357 @@ ++/* ++ * Copyright (C) 2006 Wolfgang Grandegger ++ * ++ * Derived from the PCAN project file driver/src/pcan_pci.c: ++ * ++ * Copyright (C) 2001-2006 PEAK System-Technik GmbH ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++#include ++ ++/* CAN device profile */ ++#include ++#include ++#include ++#include ++#include ++ ++#define RTCAN_DEV_NAME "rtcan%d" ++#define RTCAN_DRV_NAME "PEAK-PCI-CAN" ++ ++static char *peak_pci_board_name = "PEAK-PCI"; ++ ++MODULE_AUTHOR("Wolfgang Grandegger "); ++MODULE_DESCRIPTION("RTCAN board driver for PEAK-PCI cards"); ++MODULE_SUPPORTED_DEVICE("PEAK-PCI card CAN controller"); ++MODULE_LICENSE("GPL"); ++ ++struct rtcan_peak_pci ++{ ++ struct pci_dev *pci_dev; ++ struct rtcan_device *slave_dev; ++ int channel; ++ volatile void __iomem *base_addr; ++ volatile void __iomem *conf_addr; ++}; ++ ++#define PEAK_PCI_CAN_SYS_CLOCK (16000000 / 2) ++ ++#define PELICAN_SINGLE (SJA_CDR_CAN_MODE | SJA_CDR_CBP | 0x07 | SJA_CDR_CLK_OFF) ++#define PELICAN_MASTER (SJA_CDR_CAN_MODE | SJA_CDR_CBP | 0x07 ) ++#define PELICAN_DEFAULT (SJA_CDR_CAN_MODE ) ++ ++#define CHANNEL_SINGLE 0 /* this is a single channel device */ ++#define CHANNEL_MASTER 1 /* multi channel device, this device is master */ ++#define CHANNEL_SLAVE 2 /* multi channel device, this is slave */ ++ ++// important PITA registers ++#define PITA_ICR 0x00 // interrupt control register ++#define PITA_GPIOICR 0x18 // general purpose IO interface control register ++#define PITA_MISC 0x1C // miscellanoes register ++ ++#define PEAK_PCI_VENDOR_ID 0x001C // the PCI device and vendor IDs ++#define PEAK_PCI_DEVICE_ID 0x0001 // Device ID for PCI and older PCIe cards ++#define PEAK_PCIE_DEVICE_ID 0x0003 // Device ID for newer PCIe cards (IPEH-003027) ++#define PEAK_CPCI_DEVICE_ID 0x0004 // for nextgen cPCI slot cards ++#define PEAK_MPCI_DEVICE_ID 0x0005 // for nextgen miniPCI slot cards ++#define PEAK_PC_104P_DEVICE_ID 0x0006 // PCAN-PC/104+ cards ++#define PEAK_PCI_104E_DEVICE_ID 0x0007 // PCAN-PCI/104 Express cards ++#define PEAK_MPCIE_DEVICE_ID 0x0008 // The miniPCIe slot cards ++#define PEAK_PCIE_OEM_ID 0x0009 // PCAN-PCI Express OEM ++ ++#define PCI_CONFIG_PORT_SIZE 0x1000 // size of the config io-memory ++#define PCI_PORT_SIZE 0x0400 // size of a channel io-memory ++ ++static struct pci_device_id peak_pci_tbl[] = { ++ {PEAK_PCI_VENDOR_ID, PEAK_PCI_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,}, ++ {PEAK_PCI_VENDOR_ID, PEAK_PCIE_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,}, ++ {PEAK_PCI_VENDOR_ID, PEAK_MPCI_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,}, ++ {PEAK_PCI_VENDOR_ID, PEAK_MPCIE_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,}, ++ {PEAK_PCI_VENDOR_ID, PEAK_PC_104P_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,}, ++ {PEAK_PCI_VENDOR_ID, PEAK_PCI_104E_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,}, ++ {PEAK_PCI_VENDOR_ID, PEAK_CPCI_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,}, ++ {PEAK_PCI_VENDOR_ID, PEAK_PCIE_OEM_ID, PCI_ANY_ID, PCI_ANY_ID,}, ++ { } ++}; ++MODULE_DEVICE_TABLE (pci, peak_pci_tbl); ++ ++ ++static u8 rtcan_peak_pci_read_reg(struct rtcan_device *dev, int port) ++{ ++ struct rtcan_peak_pci *board = (struct rtcan_peak_pci *)dev->board_priv; ++ return readb(board->base_addr + ((unsigned long)port << 2)); ++} ++ ++static void rtcan_peak_pci_write_reg(struct rtcan_device *dev, int port, u8 data) ++{ ++ struct rtcan_peak_pci *board = (struct rtcan_peak_pci *)dev->board_priv; ++ writeb(data, board->base_addr + ((unsigned long)port << 2)); ++} ++ ++static void rtcan_peak_pci_irq_ack(struct rtcan_device *dev) ++{ ++ struct rtcan_peak_pci *board = (struct rtcan_peak_pci *)dev->board_priv; ++ u16 pita_icr_low; ++ ++ /* Select and clear in Pita stored interrupt */ ++ pita_icr_low = readw(board->conf_addr + PITA_ICR); ++ if (board->channel == CHANNEL_SLAVE) { ++ if (pita_icr_low & 0x0001) ++ writew(0x0001, board->conf_addr + PITA_ICR); ++ } ++ else { ++ if (pita_icr_low & 0x0002) ++ writew(0x0002, board->conf_addr + PITA_ICR); ++ } ++} ++ ++static void rtcan_peak_pci_del_chan(struct rtcan_device *dev, ++ int init_step) ++{ ++ struct rtcan_peak_pci *board; ++ u16 pita_icr_high; ++ ++ if (!dev) ++ return; ++ ++ board = (struct rtcan_peak_pci *)dev->board_priv; ++ ++ switch (init_step) { ++ case 0: /* Full cleanup */ ++ printk("Removing %s %s device %s\n", ++ peak_pci_board_name, dev->ctrl_name, dev->name); ++ rtcan_sja1000_unregister(dev); ++ case 5: ++ pita_icr_high = readw(board->conf_addr + PITA_ICR + 2); ++ if (board->channel == CHANNEL_SLAVE) { ++ pita_icr_high &= ~0x0001; ++ } else { ++ pita_icr_high &= ~0x0002; ++ } ++ writew(pita_icr_high, board->conf_addr + PITA_ICR + 2); ++ case 4: ++ iounmap((void *)board->base_addr); ++ case 3: ++ if (board->channel != CHANNEL_SLAVE) ++ iounmap((void *)board->conf_addr); ++ case 2: ++ rtcan_dev_free(dev); ++ case 1: ++ break; ++ } ++ ++} ++ ++static int rtcan_peak_pci_add_chan(struct pci_dev *pdev, int channel, ++ struct rtcan_device **master_dev) ++{ ++ struct rtcan_device *dev; ++ struct rtcan_sja1000 *chip; ++ struct rtcan_peak_pci *board; ++ u16 pita_icr_high; ++ unsigned long addr; ++ int ret, init_step = 1; ++ ++ dev = rtcan_dev_alloc(sizeof(struct rtcan_sja1000), ++ sizeof(struct rtcan_peak_pci)); ++ if (dev == NULL) ++ return -ENOMEM; ++ init_step = 2; ++ ++ chip = (struct rtcan_sja1000 *)dev->priv; ++ board = (struct rtcan_peak_pci *)dev->board_priv; ++ ++ board->pci_dev = pdev; ++ board->channel = channel; ++ ++ if (channel != CHANNEL_SLAVE) { ++ ++ addr = pci_resource_start(pdev, 0); ++ board->conf_addr = ioremap(addr, PCI_CONFIG_PORT_SIZE); ++ if (board->conf_addr == 0) { ++ ret = -ENODEV; ++ goto failure; ++ } ++ init_step = 3; ++ ++ /* Set GPIO control register */ ++ writew(0x0005, board->conf_addr + PITA_GPIOICR + 2); ++ ++ if (channel == CHANNEL_MASTER) ++ writeb(0x00, board->conf_addr + PITA_GPIOICR); /* enable both */ ++ else ++ writeb(0x04, board->conf_addr + PITA_GPIOICR); /* enable single */ ++ ++ writeb(0x05, board->conf_addr + PITA_MISC + 3); /* toggle reset */ ++ mdelay(5); ++ writeb(0x04, board->conf_addr + PITA_MISC + 3); /* leave parport mux mode */ ++ } else { ++ struct rtcan_peak_pci *master_board = ++ (struct rtcan_peak_pci *)(*master_dev)->board_priv; ++ master_board->slave_dev = dev; ++ board->conf_addr = master_board->conf_addr; ++ } ++ ++ addr = pci_resource_start(pdev, 1); ++ if (channel == CHANNEL_SLAVE) ++ addr += 0x400; ++ ++ board->base_addr = ioremap(addr, PCI_PORT_SIZE); ++ if (board->base_addr == 0) { ++ ret = -ENODEV; ++ goto failure; ++ } ++ init_step = 4; ++ ++ dev->board_name = peak_pci_board_name; ++ ++ chip->read_reg = rtcan_peak_pci_read_reg; ++ chip->write_reg = rtcan_peak_pci_write_reg; ++ chip->irq_ack = rtcan_peak_pci_irq_ack; ++ ++ /* Clock frequency in Hz */ ++ dev->can_sys_clock = PEAK_PCI_CAN_SYS_CLOCK; ++ ++ /* Output control register */ ++ chip->ocr = SJA_OCR_MODE_NORMAL | SJA_OCR_TX0_PUSHPULL; ++ ++ /* Clock divider register */ ++ if (channel == CHANNEL_MASTER) ++ chip->cdr = PELICAN_MASTER; ++ else ++ chip->cdr = PELICAN_SINGLE; ++ ++ strncpy(dev->name, RTCAN_DEV_NAME, IFNAMSIZ); ++ ++ /* Register and setup interrupt handling */ ++ chip->irq_flags = RTDM_IRQTYPE_SHARED; ++ chip->irq_num = pdev->irq; ++ pita_icr_high = readw(board->conf_addr + PITA_ICR + 2); ++ if (channel == CHANNEL_SLAVE) { ++ pita_icr_high |= 0x0001; ++ } else { ++ pita_icr_high |= 0x0002; ++ } ++ writew(pita_icr_high, board->conf_addr + PITA_ICR + 2); ++ init_step = 5; ++ ++ printk("%s: base_addr=%p conf_addr=%p irq=%d\n", RTCAN_DRV_NAME, ++ board->base_addr, board->conf_addr, chip->irq_num); ++ ++ /* Register SJA1000 device */ ++ ret = rtcan_sja1000_register(dev); ++ if (ret) { ++ printk(KERN_ERR ++ "ERROR %d while trying to register SJA1000 device!\n", ret); ++ goto failure; ++ } ++ ++ if (channel != CHANNEL_SLAVE) ++ *master_dev = dev; ++ ++ return 0; ++ ++ failure: ++ rtcan_peak_pci_del_chan(dev, init_step); ++ return ret; ++} ++ ++static int peak_pci_init_one(struct pci_dev *pdev, ++ const struct pci_device_id *ent) ++{ ++ int ret; ++ u16 sub_sys_id; ++ struct rtcan_device *master_dev = NULL; ++ ++ if (!rtdm_available()) ++ return -ENODEV; ++ ++ printk("%s: initializing device %04x:%04x\n", ++ RTCAN_DRV_NAME, pdev->vendor, pdev->device); ++ ++ if ((ret = pci_enable_device (pdev))) ++ goto failure; ++ ++ if ((ret = pci_request_regions(pdev, RTCAN_DRV_NAME))) ++ goto failure; ++ ++ if ((ret = pci_read_config_word(pdev, 0x2e, &sub_sys_id))) ++ goto failure_cleanup; ++ ++ /* Enable memory space */ ++ if ((ret = pci_write_config_word(pdev, 0x04, 2))) ++ goto failure_cleanup; ++ ++ if ((ret = pci_write_config_word(pdev, 0x44, 0))) ++ goto failure_cleanup; ++ ++ if (sub_sys_id > 3) { ++ if ((ret = rtcan_peak_pci_add_chan(pdev, CHANNEL_MASTER, ++ &master_dev))) ++ goto failure_cleanup; ++ if ((ret = rtcan_peak_pci_add_chan(pdev, CHANNEL_SLAVE, ++ &master_dev))) ++ goto failure_cleanup; ++ } else { ++ if ((ret = rtcan_peak_pci_add_chan(pdev, CHANNEL_SINGLE, ++ &master_dev))) ++ goto failure_cleanup; ++ } ++ ++ pci_set_drvdata(pdev, master_dev); ++ return 0; ++ ++ failure_cleanup: ++ if (master_dev) ++ rtcan_peak_pci_del_chan(master_dev, 0); ++ ++ pci_release_regions(pdev); ++ ++ failure: ++ return ret; ++ ++} ++ ++static void peak_pci_remove_one(struct pci_dev *pdev) ++{ ++ struct rtcan_device *dev = pci_get_drvdata(pdev); ++ struct rtcan_peak_pci *board = (struct rtcan_peak_pci *)dev->board_priv; ++ ++ if (board->slave_dev) ++ rtcan_peak_pci_del_chan(board->slave_dev, 0); ++ rtcan_peak_pci_del_chan(dev, 0); ++ ++ pci_release_regions(pdev); ++ pci_disable_device(pdev); ++ pci_set_drvdata(pdev, NULL); ++} ++ ++static struct pci_driver rtcan_peak_pci_driver = { ++ .name = RTCAN_DRV_NAME, ++ .id_table = peak_pci_tbl, ++ .probe = peak_pci_init_one, ++ .remove = peak_pci_remove_one, ++}; ++ ++module_pci_driver(rtcan_peak_pci_driver); +--- linux/drivers/xenomai/can/sja1000/rtcan_sja1000_regs.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/can/sja1000/rtcan_sja1000_regs.h 2021-04-07 16:01:26.351635418 +0800 +@@ -0,0 +1,206 @@ ++/* ++ * Copyright (C) 2005,2006 Sebastian Smolorz ++ * ++ * ++ * Based on drivers/can/sja1000.h in linux-can.patch, a CAN socket ++ * framework for Linux: ++ * ++ * Copyright (C) 2005, Sascha Hauer, Pengutronix ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#ifndef __SJA1000_REGS_H_ ++#define __SJA1000_REGS_H_ ++ ++ ++/* PeliCAN mode address map */ ++ ++/* reset and operating mode */ ++#define SJA_MOD 0 /* Mode register */ ++#define SJA_CMR 1 /* Command register */ ++#define SJA_SR 2 /* Status register */ ++#define SJA_IR 3 /* Interrupt register */ ++#define SJA_IER 4 /* Interrupt enable register */ ++#define SJA_BTR0 6 /* Bus timing register 0 */ ++#define SJA_BTR1 7 /* Bus timing register 1 */ ++#define SJA_OCR 8 /* Output control register */ ++#define SJA_ALC 11 /* Arbitration lost capture */ ++#define SJA_ECC 12 /* Error code capture register */ ++#define SJA_RXERR 14 /* Receive error counter */ ++#define SJA_TXERR 15 /* Transmit error counter */ ++#define SJA_CDR 31 /* Clock divider register */ ++ ++/* reset mode */ ++#define SJA_ACR0 16 /* Acceptance code register 0 */ ++#define SJA_ACR1 17 /* Acceptance code register 1 */ ++#define SJA_ACR2 18 /* Acceptance code register 2 */ ++#define SJA_ACR3 19 /* Acceptance code register 3 */ ++#define SJA_AMR0 20 /* Acceptance mask register 0 */ ++#define SJA_AMR1 21 /* Acceptance mask register 1 */ ++#define SJA_AMR2 22 /* Acceptance mask register 2 */ ++#define SJA_AMR3 23 /* Acceptance mask register 3 */ ++ ++/* operating mode */ ++#define SJA_FIR 16 /* Frame information register */ ++#define SJA_ID1 17 /* Identifier 1 */ ++#define SJA_ID2 18 /* Identifier 2 */ ++#define SJA_ID3 19 /* Identifier 3 (EFF only) */ ++#define SJA_ID4 20 /* Identifier 4 (EFF only) */ ++ ++#define SJA_DATA_SFF(x) (19 + (x)) /* Data registers in case of standard ++ * frame format; 0 <= x <= 7 */ ++#define SJA_DATA_EFF(x) (21 + (x)) /* Data registers in case of extended ++ * frame format; 0 <= x <= 7 */ ++ ++/* Mode register */ ++enum SJA1000_PELI_MOD { ++ SJA_MOD_RM = 1, /* Reset Mode */ ++ SJA_MOD_LOM = 1<<1, /* Listen Only Mode */ ++ SJA_MOD_STM = 1<<2, /* Self Test Mode */ ++ SJA_MOD_AFM = 1<<3, /* Acceptance Filter Mode */ ++ SJA_MOD_SM = 1<<4 /* Sleep Mode */ ++}; ++ ++/* Command register */ ++enum SJA1000_PELI_CMR { ++ SJA_CMR_TR = 1, /* Transmission request */ ++ SJA_CMR_AT = 1<<1, /* Abort Transmission */ ++ SJA_CMR_RRB = 1<<2, /* Release Receive Buffer */ ++ SJA_CMR_CDO = 1<<3, /* Clear Data Overrun */ ++ SJA_CMR_SRR = 1<<4 /* Self reception request */ ++}; ++ ++/* Status register */ ++enum SJA1000_PELI_SR { ++ SJA_SR_RBS = 1, /* Receive Buffer Status */ ++ SJA_SR_DOS = 1<<1, /* Data Overrun Status */ ++ SJA_SR_TBS = 1<<2, /* Transmit Buffer Status */ ++ SJA_SR_ES = 1<<6, /* Error Status */ ++ SJA_SR_BS = 1<<7 /* Bus Status */ ++}; ++ ++/* Interrupt register */ ++enum SJA1000_PELI_IR { ++ SJA_IR_RI = 1, /* Receive Interrupt */ ++ SJA_IR_TI = 1<<1, /* Transmit Interrupt */ ++ SJA_IR_EI = 1<<2, /* Error Warning Interrupt */ ++ SJA_IR_DOI = 1<<3, /* Data Overrun Interrupt */ ++ SJA_IR_WUI = 1<<4, /* Wake-Up Interrupt */ ++ SJA_IR_EPI = 1<<5, /* Error Passive Interrupt */ ++ SJA_IR_ALI = 1<<6, /* Arbitration Lost Interrupt */ ++ SJA_IR_BEI = 1<<7, /* Bus Error Interrupt */ ++}; ++ ++/* Interrupt enable register */ ++enum SJA1000_PELI_IER { ++ SJA_IER_RIE = 1, /* Receive Interrupt Enable */ ++ SJA_IER_TIE = 1<<1, /* Transmit Interrupt Enable */ ++ SJA_IER_EIE = 1<<2, /* Error Warning Interrupt Enable */ ++ SJA_IER_DOIE = 1<<3, /* Data Overrun Interrupt Enable */ ++ SJA_IER_WUIE = 1<<4, /* Wake-Up Interrupt Enable */ ++ SJA_IER_EPIE = 1<<5, /* Error Passive Interrupt Enable */ ++ SJA_IER_ALIE = 1<<6, /* Arbitration Lost Interrupt Enable */ ++ SJA_IER_BEIE = 1<<7, /* Bus Error Interrupt Enable */ ++}; ++ ++/* Bus timing register 0 */ ++enum SJA1000_PELI_BTR0 { ++ /* Period of the CAN system clock t_SCl ++ * (t_CLK = time period of XTAL frequency) */ ++ SJA_BTR0_T_SCL_2_T_CLK = 0, /* t_SCl = 2 x t_CLK */ ++ SJA_BTR0_T_SCL_4_T_CLK = 1, /* t_SCl = 4 x t_CLK */ ++ SJA_BTR0_T_SCL_6_T_CLK = 2, /* t_SCl = 6 x t_CLK */ ++ SJA_BTR0_T_SCL_8_T_CLK = 3, /* t_SCl = 8 x t_CLK */ ++ SJA_BTR0_T_SCL_10_T_CLK = 4, /* t_SCl = 10 x t_CLK */ ++ SJA_BTR0_T_SCL_12_T_CLK = 5, /* t_SCl = 12 x t_CLK */ ++ SJA_BTR0_T_SCL_14_T_CLK = 6, /* t_SCl = 14 x t_CLK */ ++ SJA_BTR0_T_SCL_16_T_CLK = 7, /* t_SCl = 16 x t_CLK */ ++ SJA_BTR0_T_SCL_20_T_CLK = 9, /* t_SCl = 20 x t_CLK */ ++ SJA_BTR0_T_SCL_40_T_CLK = 19, /* t_SCl = 40 x t_CLK */ ++ SJA_BTR0_T_SCL_100_T_CLK = 49, /* t_SCl = 100 x t_CLK */ ++ ++}; ++ ++/* Bus timing register 1 */ ++enum SJA1000_PELI_BTR1 { ++ /* Time segment 1 */ ++ SJA_BTR1_T_SEG1_1_T_SCL = 0, /* t_SEG1 = 1 x t_SCl */ ++ SJA_BTR1_T_SEG1_2_T_SCL = 1, /* t_SEG1 = 2 x t_SCl */ ++ SJA_BTR1_T_SEG1_3_T_SCL = 2, /* t_SEG1 = 3 x t_SCl */ ++ SJA_BTR1_T_SEG1_4_T_SCL = 3, /* t_SEG1 = 4 x t_SCl */ ++ SJA_BTR1_T_SEG1_5_T_SCL = 4, /* t_SEG1 = 5 x t_SCl */ ++ SJA_BTR1_T_SEG1_6_T_SCL = 5, /* t_SEG1 = 6 x t_SCl */ ++ SJA_BTR1_T_SEG1_7_T_SCL = 6, /* t_SEG1 = 7 x t_SCl */ ++ SJA_BTR1_T_SEG1_8_T_SCL = 7, /* t_SEG1 = 8 x t_SCl */ ++ /* Time segment 2 */ ++ SJA_BTR1_T_SEG2_1_T_SCL = 0<<4, /* t_SEG2 = 1 x t_SCl */ ++ SJA_BTR1_T_SEG2_2_T_SCL = 1<<4, /* t_SEG2 = 2 x t_SCl */ ++ SJA_BTR1_T_SEG2_3_T_SCL = 2<<4, /* t_SEG2 = 3 x t_SCl */ ++ SJA_BTR1_T_SEG2_4_T_SCL = 3<<4, /* t_SEG2 = 4 x t_SCl */ ++ SJA_BTR1_T_SEG2_5_T_SCL = 4<<4, /* t_SEG2 = 5 x t_SCl */ ++ SJA_BTR1_T_SEG2_6_T_SCL = 5<<4, /* t_SEG2 = 6 x t_SCl */ ++ SJA_BTR1_T_SEG2_7_T_SCL = 6<<4, /* t_SEG2 = 7 x t_SCl */ ++ SJA_BTR1_T_SEG2_8_T_SCL = 7<<4, /* t_SEG2 = 8 x t_SCl */ ++}; ++ ++/* One bit time = t_SCl + t_SEG1 + t_SEG2 */ ++ ++ ++/* Output control register */ ++enum SJA1000_PELI_OCR { ++ SJA_OCR_MODE_BIPHASE = 0, ++ SJA_OCR_MODE_TEST = 1, ++ SJA_OCR_MODE_NORMAL = 2, ++ SJA_OCR_MODE_CLOCK = 3, ++ SJA_OCR_TX0_INVERT = 1<<2, ++ SJA_OCR_TX0_PULLDOWN = 1<<3, ++ SJA_OCR_TX0_PULLUP = 2<<3, ++ SJA_OCR_TX0_PUSHPULL = 3<<3, ++ SJA_OCR_TX1_INVERT = 1<<5, ++ SJA_OCR_TX1_PULLDOWN = 1<<6, ++ SJA_OCR_TX1_PULLUP = 2<<6, ++ SJA_OCR_TX1_PUSHPULL = 3<<6 ++}; ++ ++/* Error code capture register */ ++enum SJA1000_PELI_ECC { ++ /* The segmentation field gives information about the location of ++ * errors on the bus */ ++ SJA_ECC_SEG_MASK = 31, /* Segmentation field mask */ ++ SJA_ECC_DIR = 1<<5, /* Transfer direction */ ++ SJA_ECC_ERR_BIT = 0<<6, ++ SJA_ECC_ERR_FORM = 1<<6, ++ SJA_ECC_ERR_STUFF = 2<<6, ++ SJA_ECC_ERR_MASK = 3<<6 /* Error code mask */ ++}; ++ ++/* Frame information register */ ++enum SJA1000_PELI_FIR { ++ SJA_FIR_DLC_MASK = 15, /* Data length code mask */ ++ SJA_FIR_RTR = 1<<6, /* Remote transmission request */ ++ SJA_FIR_EFF = 1<<7 /* Extended frame format */ ++}; ++ ++/* Clock divider register */ ++enum SJA1000_PELI_CDR { ++ SJA_CDR_CLKOUT_MASK = 0x07, ++ SJA_CDR_CLK_OFF = 1<<3, /* Clock off (CLKOUT pin) */ ++ SJA_CDR_CBP = 1<<6, /* CAN input comparator bypass */ ++ SJA_CDR_CAN_MODE = 1<<7 /* CAN mode: 1 = PeliCAN */ ++}; ++ ++#endif /* __SJA1000_REGS_H_ */ +--- linux/drivers/xenomai/can/sja1000/rtcan_adv_pci.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/can/sja1000/rtcan_adv_pci.c 2021-04-07 16:01:26.346635425 +0800 +@@ -0,0 +1,361 @@ ++/* ++ * Copyright (C) 2006 Wolfgang Grandegger ++ * ++ * Copyright (C) 2012 Thierry Bultel ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++#include ++ ++#define ADV_PCI_BASE_SIZE 0x80 ++ ++/* CAN device profile */ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#define RTCAN_DEV_NAME "rtcan%d" ++#define RTCAN_DRV_NAME "ADV-PCI-CAN" ++ ++static char *adv_pci_board_name = "ADV-PCI"; ++ ++MODULE_AUTHOR("Thierry Bultel "); ++MODULE_DESCRIPTION("RTCAN board driver for Advantech PCI cards"); ++MODULE_SUPPORTED_DEVICE("ADV-PCI card CAN controller"); ++MODULE_LICENSE("GPL"); ++ ++struct rtcan_adv_pci { ++ struct pci_dev *pci_dev; ++ struct rtcan_device *slave_dev; ++ void __iomem *conf_addr; ++ void __iomem *base_addr; ++}; ++ ++/* ++ * According to the datasheet, ++ * internal clock is 1/2 of the external oscillator frequency ++ * which is 16 MHz ++ */ ++#define ADV_PCI_CAN_CLOCK (16000000 / 2) ++ ++/* ++ * Output control register ++ Depends on the board configuration ++ */ ++ ++#define ADV_PCI_OCR (SJA_OCR_MODE_NORMAL |\ ++ SJA_OCR_TX0_PUSHPULL |\ ++ SJA_OCR_TX1_PUSHPULL |\ ++ SJA_OCR_TX1_INVERT) ++ ++/* ++ * In the CDR register, you should set CBP to 1. ++ */ ++#define ADV_PCI_CDR (SJA_CDR_CBP | SJA_CDR_CAN_MODE) ++ ++#define ADV_PCI_VENDOR_ID 0x13fe ++ ++#define CHANNEL_SINGLE 0 /* this is a single channel device */ ++#define CHANNEL_MASTER 1 /* multi channel device, this device is master */ ++#define CHANNEL_SLAVE 2 /* multi channel device, this is slave */ ++ ++#define ADV_PCI_DEVICE(device_id)\ ++ { ADV_PCI_VENDOR_ID, device_id, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 } ++ ++static const struct pci_device_id adv_pci_tbl[] = { ++ ADV_PCI_DEVICE(0x1680), ++ ADV_PCI_DEVICE(0x3680), ++ ADV_PCI_DEVICE(0x2052), ++ ADV_PCI_DEVICE(0x1681), ++ ADV_PCI_DEVICE(0xc001), ++ ADV_PCI_DEVICE(0xc002), ++ ADV_PCI_DEVICE(0xc004), ++ ADV_PCI_DEVICE(0xc101), ++ ADV_PCI_DEVICE(0xc102), ++ ADV_PCI_DEVICE(0xc104), ++ /* required last entry */ ++ { } ++}; ++ ++MODULE_DEVICE_TABLE(pci, adv_pci_tbl); ++ ++static u8 rtcan_adv_pci_read_reg(struct rtcan_device *dev, int port) ++{ ++ struct rtcan_adv_pci *board = (struct rtcan_adv_pci *)dev->board_priv; ++ ++ return ioread8(board->base_addr + port); ++} ++ ++static void rtcan_adv_pci_write_reg(struct rtcan_device *dev, int port, u8 data) ++{ ++ struct rtcan_adv_pci *board = (struct rtcan_adv_pci *)dev->board_priv; ++ ++ iowrite8(data, board->base_addr + port); ++} ++ ++static void rtcan_adv_pci_del_chan(struct pci_dev *pdev, ++ struct rtcan_device *dev) ++{ ++ struct rtcan_adv_pci *board; ++ ++ if (!dev) ++ return; ++ ++ board = (struct rtcan_adv_pci *)dev->board_priv; ++ ++ rtcan_sja1000_unregister(dev); ++ ++ pci_iounmap(pdev, board->base_addr); ++ ++ rtcan_dev_free(dev); ++} ++ ++ ++static int rtcan_adv_pci_add_chan(struct pci_dev *pdev, ++ int channel, ++ unsigned int bar, ++ unsigned int offset, ++ struct rtcan_device **master_dev) ++{ ++ struct rtcan_device *dev; ++ struct rtcan_sja1000 *chip; ++ struct rtcan_adv_pci *board; ++ void __iomem *base_addr; ++ int ret; ++ ++ dev = rtcan_dev_alloc(sizeof(struct rtcan_sja1000), ++ sizeof(struct rtcan_adv_pci)); ++ if (dev == NULL) ++ return -ENOMEM; ++ ++ chip = (struct rtcan_sja1000 *)dev->priv; ++ board = (struct rtcan_adv_pci *)dev->board_priv; ++ ++ if (channel == CHANNEL_SLAVE) { ++ struct rtcan_adv_pci *master_board = ++ (struct rtcan_adv_pci *)(*master_dev)->board_priv; ++ master_board->slave_dev = dev; ++ ++ if (offset) { ++ base_addr = master_board->base_addr+offset; ++ } else { ++ base_addr = pci_iomap(pdev, bar, ADV_PCI_BASE_SIZE); ++ if (!base_addr) { ++ ret = -EIO; ++ goto failure; ++ } ++ } ++ } else { ++ base_addr = pci_iomap(pdev, bar, ADV_PCI_BASE_SIZE) + offset; ++ if (!base_addr) { ++ ret = -EIO; ++ goto failure; ++ } ++ } ++ ++ board->pci_dev = pdev; ++ board->conf_addr = NULL; ++ board->base_addr = base_addr; ++ ++ dev->board_name = adv_pci_board_name; ++ ++ chip->read_reg = rtcan_adv_pci_read_reg; ++ chip->write_reg = rtcan_adv_pci_write_reg; ++ ++ /* Clock frequency in Hz */ ++ dev->can_sys_clock = ADV_PCI_CAN_CLOCK; ++ ++ /* Output control register */ ++ chip->ocr = ADV_PCI_OCR; ++ ++ /* Clock divider register */ ++ chip->cdr = ADV_PCI_CDR; ++ ++ strncpy(dev->name, RTCAN_DEV_NAME, IFNAMSIZ); ++ ++ /* Make sure SJA1000 is in reset mode */ ++ chip->write_reg(dev, SJA_MOD, SJA_MOD_RM); ++ /* Set PeliCAN mode */ ++ chip->write_reg(dev, SJA_CDR, SJA_CDR_CAN_MODE); ++ ++ /* check if mode is set */ ++ ret = chip->read_reg(dev, SJA_CDR); ++ if (ret != SJA_CDR_CAN_MODE) { ++ ret = -EIO; ++ goto failure_iounmap; ++ } ++ ++ /* Register and setup interrupt handling */ ++ chip->irq_flags = RTDM_IRQTYPE_SHARED; ++ chip->irq_num = pdev->irq; ++ ++ RTCAN_DBG("%s: base_addr=%p conf_addr=%p irq=%d ocr=%#x cdr=%#x\n", ++ RTCAN_DRV_NAME, board->base_addr, board->conf_addr, ++ chip->irq_num, chip->ocr, chip->cdr); ++ ++ /* Register SJA1000 device */ ++ ret = rtcan_sja1000_register(dev); ++ if (ret) { ++ printk(KERN_ERR "ERROR %d while trying to register SJA1000 device!\n", ++ ret); ++ goto failure_iounmap; ++ } ++ ++ if (channel != CHANNEL_SLAVE) ++ *master_dev = dev; ++ ++ return 0; ++ ++failure_iounmap: ++ if (channel != CHANNEL_SLAVE || !offset) ++ pci_iounmap(pdev, base_addr); ++failure: ++ rtcan_dev_free(dev); ++ ++ return ret; ++} ++ ++static int adv_pci_init_one(struct pci_dev *pdev, ++ const struct pci_device_id *ent) ++{ ++ int ret, channel; ++ unsigned int nb_ports = 0; ++ unsigned int bar = 0; ++ unsigned int bar_flag = 0; ++ unsigned int offset = 0; ++ unsigned int ix; ++ ++ struct rtcan_device *master_dev = NULL; ++ ++ if (!rtdm_available()) ++ return -ENODEV; ++ ++ dev_info(&pdev->dev, "RTCAN Registering card"); ++ ++ ret = pci_enable_device(pdev); ++ if (ret) ++ goto failure; ++ ++ dev_info(&pdev->dev, "RTCAN detected Advantech PCI card at slot #%i\n", ++ PCI_SLOT(pdev->devfn)); ++ ++ ret = pci_request_regions(pdev, RTCAN_DRV_NAME); ++ if (ret) ++ goto failure_device; ++ ++ switch (pdev->device) { ++ case 0xc001: ++ case 0xc002: ++ case 0xc004: ++ case 0xc101: ++ case 0xc102: ++ case 0xc104: ++ nb_ports = pdev->device & 0x7; ++ offset = 0x100; ++ bar = 0; ++ break; ++ case 0x1680: ++ case 0x2052: ++ nb_ports = 2; ++ bar = 2; ++ bar_flag = 1; ++ break; ++ case 0x1681: ++ nb_ports = 1; ++ bar = 2; ++ bar_flag = 1; ++ break; ++ default: ++ goto failure_regions; ++ } ++ ++ if (nb_ports > 1) ++ channel = CHANNEL_MASTER; ++ else ++ channel = CHANNEL_SINGLE; ++ ++ RTCAN_DBG("%s: Initializing device %04x:%04x:%04x\n", ++ RTCAN_DRV_NAME, ++ pdev->vendor, ++ pdev->device, ++ pdev->subsystem_device); ++ ++ ret = rtcan_adv_pci_add_chan(pdev, channel, bar, offset, &master_dev); ++ if (ret) ++ goto failure_iounmap; ++ ++ /* register slave channel, if any */ ++ ++ for (ix = 1; ix < nb_ports; ix++) { ++ ret = rtcan_adv_pci_add_chan(pdev, ++ CHANNEL_SLAVE, ++ bar + (bar_flag ? ix : 0), ++ offset * ix, ++ &master_dev); ++ if (ret) ++ goto failure_iounmap; ++ } ++ ++ pci_set_drvdata(pdev, master_dev); ++ ++ return 0; ++ ++failure_iounmap: ++ if (master_dev) ++ rtcan_adv_pci_del_chan(pdev, master_dev); ++ ++failure_regions: ++ pci_release_regions(pdev); ++ ++failure_device: ++ pci_disable_device(pdev); ++ ++failure: ++ return ret; ++} ++ ++static void adv_pci_remove_one(struct pci_dev *pdev) ++{ ++ struct rtcan_device *dev = pci_get_drvdata(pdev); ++ struct rtcan_adv_pci *board = (struct rtcan_adv_pci *)dev->board_priv; ++ ++ if (board->slave_dev) ++ rtcan_adv_pci_del_chan(pdev, board->slave_dev); ++ ++ rtcan_adv_pci_del_chan(pdev, dev); ++ ++ pci_release_regions(pdev); ++ pci_disable_device(pdev); ++ pci_set_drvdata(pdev, NULL); ++} ++ ++static struct pci_driver rtcan_adv_pci_driver = { ++ .name = RTCAN_DRV_NAME, ++ .id_table = adv_pci_tbl, ++ .probe = adv_pci_init_one, ++ .remove = adv_pci_remove_one, ++}; ++ ++module_pci_driver(rtcan_adv_pci_driver); +--- linux/drivers/xenomai/can/sja1000/Makefile 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/can/sja1000/Makefile 2021-04-07 16:01:26.341635432 +0800 +@@ -0,0 +1,24 @@ ++ccflags-y += -Idrivers/xenomai/can -Idrivers/xenomai/can/sja1000 ++ ++obj-$(CONFIG_XENO_DRIVERS_CAN_SJA1000) += xeno_can_sja1000.o ++obj-$(CONFIG_XENO_DRIVERS_CAN_SJA1000_PEAK_PCI) += xeno_can_peak_pci.o ++obj-$(CONFIG_XENO_DRIVERS_CAN_SJA1000_PEAK_DNG) += xeno_can_peak_dng.o ++obj-$(CONFIG_XENO_DRIVERS_CAN_SJA1000_PLX_PCI) += xeno_can_plx_pci.o ++obj-$(CONFIG_XENO_DRIVERS_CAN_SJA1000_IXXAT_PCI) += xeno_can_ixxat_pci.o ++obj-$(CONFIG_XENO_DRIVERS_CAN_SJA1000_ADV_PCI) += xeno_can_adv_pci.o ++obj-$(CONFIG_XENO_DRIVERS_CAN_SJA1000_EMS_PCI) += xeno_can_ems_pci.o ++obj-$(CONFIG_XENO_DRIVERS_CAN_SJA1000_ESD_PCI) += xeno_can_esd_pci.o ++obj-$(CONFIG_XENO_DRIVERS_CAN_SJA1000_ISA) += xeno_can_isa.o ++obj-$(CONFIG_XENO_DRIVERS_CAN_SJA1000_MEM) += xeno_can_mem.o ++ ++xeno_can_sja1000-y := rtcan_sja1000.o ++xeno_can_sja1000-$(CONFIG_FS_PROCFS) += rtcan_sja1000_proc.o ++xeno_can_peak_pci-y := rtcan_peak_pci.o ++xeno_can_peak_dng-y := rtcan_peak_dng.o ++xeno_can_plx_pci-y := rtcan_plx_pci.o ++xeno_can_ixxat_pci-y := rtcan_ixxat_pci.o ++xeno_can_adv_pci-y := rtcan_adv_pci.o ++xeno_can_ems_pci-y := rtcan_ems_pci.o ++xeno_can_esd_pci-y := rtcan_esd_pci.o ++xeno_can_isa-y := rtcan_isa.o ++xeno_can_mem-y := rtcan_mem.o +--- linux/drivers/xenomai/can/sja1000/Kconfig 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/can/sja1000/Kconfig 2021-04-07 16:01:26.337635438 +0800 +@@ -0,0 +1,100 @@ ++config XENO_DRIVERS_CAN_SJA1000 ++ depends on XENO_DRIVERS_CAN ++ tristate "Philips SJA1000 CAN controller" ++ select XENO_DRIVERS_CAN_BUS_ERR ++ ++config XENO_DRIVERS_CAN_SJA1000_ISA ++ depends on XENO_DRIVERS_CAN_SJA1000 ++ tristate "Standard ISA controllers" ++ help ++ ++ This driver is for CAN devices connected to the ISA bus of a PC ++ or a PC/104 system. The I/O port, interrupt number and a few other ++ hardware specific parameters can be defined via module parameters. ++ ++config XENO_DRIVERS_CAN_SJA1000_MEM ++ depends on XENO_DRIVERS_CAN_SJA1000 ++ tristate "Memory mapped controllers" ++ help ++ ++ This driver is for memory mapped CAN devices. The memory address, ++ interrupt number and a few other hardware specific parameters can ++ be defined via module parameters. ++ ++config XENO_DRIVERS_CAN_SJA1000_PEAK_PCI ++ depends on XENO_DRIVERS_CAN_SJA1000 && PCI ++ tristate "PEAK PCI Card" ++ help ++ ++ This driver is for the PCAN PCI, the PC-PCI CAN plug-in card (1 or ++ 2 channel) from PEAK Systems (http://www.peak-system.com). To get ++ the second channel working, Xenomai's shared interrupt support ++ must be enabled. ++ ++config XENO_DRIVERS_CAN_SJA1000_IXXAT_PCI ++ depends on XENO_DRIVERS_CAN_SJA1000 && PCI ++ tristate "IXXAT PCI Card" ++ help ++ ++ This driver is for the IXXAT PC-I 04/PCI card (1 or 2 channel) ++ from the IXXAT Automation GmbH (http://www.ixxat.de). To get ++ the second channel working, Xenomai's shared interrupt support ++ must be enabled. ++ ++config XENO_DRIVERS_CAN_SJA1000_ADV_PCI ++ depends on XENO_DRIVERS_CAN_SJA1000 && PCI ++ tristate "ADVANTECH PCI Cards" ++ help ++ ++ This driver is for the ADVANTECH PCI cards (1 or more channels) ++ It supports the 1680U and some other ones. ++ ++ ++config XENO_DRIVERS_CAN_SJA1000_PLX_PCI ++ depends on XENO_DRIVERS_CAN_SJA1000 && PCI ++ tristate "PLX90xx PCI-bridge based Cards" ++ help ++ ++ This driver is for CAN interface cards based on ++ the PLX90xx PCI bridge. ++ Driver supports now: ++ - Adlink PCI-7841/cPCI-7841 card (http://www.adlinktech.com/) ++ - Adlink PCI-7841/cPCI-7841 SE card ++ - esd CAN-PCI/CPCI/PCI104/200 (http://www.esd.eu/) ++ - esd CAN-PCI/PMC/266 ++ - esd CAN-PCIe/2000 ++ - Marathon CAN-bus-PCI card (http://www.marathon.ru/) ++ - TEWS TECHNOLOGIES TPMC810 card (http://www.tews.com/) ++ ++config XENO_DRIVERS_CAN_SJA1000_EMS_PCI ++ depends on XENO_DRIVERS_CAN_SJA1000 && PCI ++ tristate "EMS CPC PCI Card" ++ help ++ ++ This driver is for the 2 channel CPC PCI card from EMS Dr. Thomas ++ Wünsche (http://www.ems-wuensche.de). To get the second channel ++ working, Xenomai's shared interrupt support must be enabled. ++ ++config XENO_DRIVERS_CAN_SJA1000_ESD_PCI ++ depends on XENO_DRIVERS_CAN_SJA1000 && PCI ++ tristate "ESD PCI Cards (DEPRECATED)" ++ help ++ ++ This driver supports the esd PCI CAN cards CAN-PCI/200, ++ CAN-PCI/266, CAN-PMC/266 (PMC), CAN-CPCI/200 (CompactPCI), ++ CAN-PCIe2000 (PCI Express) and CAN-PCI104/200 (PCI104) ++ from the esd electronic system design gmbh (http://www.esd.eu). ++ ++ This driver is deprecated. It's functionality is now provided by ++ "PLX90xx PCI-bridge based Cards" driver. ++ ++config XENO_DRIVERS_CAN_SJA1000_PEAK_DNG ++ depends on XENO_DRIVERS_CAN_SJA1000 && !PARPORT ++ tristate "PEAK Parallel Port Dongle" ++ help ++ ++ This driver is for the PCAN Dongle, the PC parallel port to CAN ++ converter from PEAK Systems (http://www.peak-system.com). You need ++ to disable parallel port support in the kernel (CONFIG_PARPORT) for ++ proper operation. The interface type (sp or epp), I/O port and ++ interrupt number should be defined via module parameters. +--- linux/drivers/xenomai/can/sja1000/rtcan_sja1000.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/can/sja1000/rtcan_sja1000.c 2021-04-07 16:01:26.332635445 +0800 +@@ -0,0 +1,842 @@ ++/* ++ * Copyright (C) 2005, 2006 Sebastian Smolorz ++ * ++ * ++ * Copyright (C) 2006 Wolfgang Grandegger ++ * ++ * ++ * Parts of this software are based on the following: ++ * ++ * - RTAI CAN device driver for SJA1000 controllers by Jan Kiszka ++ * ++ * - linux-can.patch, a CAN socket framework for Linux, ++ * Copyright (C) 2004, 2005, Robert Schwebel, Benedikt Spranger, ++ * Marc Kleine-Budde, Sascha Hauer, Pengutronix ++ * ++ * - RTnet (www.rtnet.org) ++ * ++ * - serial device driver and profile included in Xenomai (RTDM), ++ * Copyright (C) 2005 Jan Kiszka . ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#include ++ ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++ ++#define BTR0_BRP_MASK 0x3f ++#define BTR0_SJW_SHIFT 6 ++#define BTR0_SJW_MASK (0x3 << BTR0_SJW_SHIFT) ++ ++#define BTR1_TSEG1_MASK 0xf ++#define BTR1_TSEG2_SHIFT 4 ++#define BTR1_TSEG2_MASK (0x7 << BTR1_TSEG2_SHIFT) ++#define BTR1_SAM_SHIFT 7 ++ ++#define BTR0_SET_BRP(brp) (((brp) - 1) & BTR0_BRP_MASK) ++#define BTR0_SET_SJW(sjw) ((((sjw) - 1) << BTR0_SJW_SHIFT) & BTR0_SJW_MASK) ++ ++#define BTR1_SET_TSEG1(tseg1) (((tseg1) - 1) & BTR1_TSEG1_MASK) ++#define BTR1_SET_TSEG2(tseg2) ((((tseg2) - 1) << BTR1_TSEG2_SHIFT) & BTR1_TSEG2_MASK) ++#define BTR1_SET_SAM(sam) (((sam) & 1) << BTR1_SAM_SHIFT) ++ ++/* Value for the interrupt enable register */ ++#define SJA1000_IER SJA_IER_RIE | SJA_IER_TIE | \ ++ SJA_IER_EIE | SJA_IER_WUIE | \ ++ SJA_IER_EPIE | SJA_IER_BEIE | \ ++ SJA_IER_ALIE | SJA_IER_DOIE ++ ++static char *sja_ctrl_name = "SJA1000"; ++ ++#define STATE_OPERATING(state) \ ++ ((state) != CAN_STATE_STOPPED && (state) != CAN_STATE_BUS_OFF) ++ ++#define STATE_RESET(state) \ ++ ((state) == CAN_STATE_STOPPED || (state) == CAN_STATE_BUS_OFF) ++ ++ ++MODULE_AUTHOR("Sebastian.Smolorz@stud.uni-hannover.de"); ++MODULE_LICENSE("GPL"); ++MODULE_DESCRIPTION("RT-Socket-CAN driver for SJA1000"); ++MODULE_SUPPORTED_DEVICE("SJA1000 CAN controller"); ++ ++#ifndef CONFIG_XENO_DRIVERS_CAN_CALC_BITTIME_OLD ++static struct can_bittiming_const sja1000_bittiming_const = { ++ .name = "sja1000", ++ .tseg1_min = 1, ++ .tseg1_max = 16, ++ .tseg2_min = 1, ++ .tseg2_max = 8, ++ .sjw_max = 4, ++ .brp_min = 1, ++ .brp_max = 64, ++ .brp_inc = 1, ++}; ++#endif ++ ++static inline void rtcan_sja_rx_interrupt(struct rtcan_device *dev, ++ struct rtcan_skb *skb) ++{ ++ int i; ++ /* "Real" size of the payload */ ++ u8 size; ++ /* Content of frame information register */ ++ u8 fir; ++ /* Ring buffer frame within skb */ ++ struct rtcan_rb_frame *frame = &skb->rb_frame; ++ struct rtcan_sja1000 *chip = dev->priv; ++ ++ /* Read out frame information register */ ++ fir = chip->read_reg(dev, SJA_FIR); ++ ++ /* Extract data length code */ ++ frame->can_dlc = fir & SJA_FIR_DLC_MASK; ++ ++ /* If DLC exceeds 8 bytes adjust it to 8 (for the payload size) */ ++ size = (frame->can_dlc > 8) ? 8 : frame->can_dlc; ++ ++ ++ if (fir & SJA_FIR_EFF) { ++ /* Extended frame */ ++ frame->can_id = CAN_EFF_FLAG; ++ ++ /* Read ID */ ++ frame->can_id |= chip->read_reg(dev, SJA_ID1) << 21; ++ frame->can_id |= chip->read_reg(dev, SJA_ID2) << 13; ++ frame->can_id |= chip->read_reg(dev, SJA_ID3) << 5; ++ frame->can_id |= chip->read_reg(dev, SJA_ID4) >> 3; ++ ++ if (!(fir & SJA_FIR_RTR)) { ++ /* No RTR, read data bytes */ ++ for (i = 0; i < size; i++) ++ frame->data[i] = chip->read_reg(dev, ++ SJA_DATA_EFF(i)); ++ } ++ ++ } else { ++ /* Standard frame */ ++ ++ /* Read ID */ ++ frame->can_id = chip->read_reg(dev, SJA_ID1) << 3; ++ frame->can_id |= chip->read_reg(dev, SJA_ID2) >> 5; ++ ++ if (!(fir & SJA_FIR_RTR)) { ++ /* No RTR, read data bytes */ ++ for (i = 0; i < size; i++) ++ frame->data[i] = chip->read_reg(dev, SJA_DATA_SFF(i)); ++ } ++ } ++ ++ /* Release Receive Buffer */ ++ chip->write_reg(dev, SJA_CMR, SJA_CMR_RRB); ++ ++ ++ /* RTR? */ ++ if (fir & SJA_FIR_RTR) { ++ frame->can_id |= CAN_RTR_FLAG; ++ skb->rb_frame_size = EMPTY_RB_FRAME_SIZE; ++ } else ++ skb->rb_frame_size = EMPTY_RB_FRAME_SIZE + size; ++ ++ /* Store the interface index */ ++ frame->can_ifindex = dev->ifindex; ++} ++ ++ ++static inline void rtcan_sja_err_interrupt(struct rtcan_device *dev, ++ struct rtcan_sja1000 *chip, ++ struct rtcan_skb *skb, ++ u8 irq_source) ++{ ++ struct rtcan_rb_frame *frame = &skb->rb_frame; ++ can_state_t state = dev->state; ++ u8 status, txerr, rxerr; ++ ++ status = chip->read_reg(dev, SJA_SR); ++ txerr = chip->read_reg(dev, SJA_TXERR); ++ rxerr = chip->read_reg(dev, SJA_RXERR); ++ ++ skb->rb_frame_size = EMPTY_RB_FRAME_SIZE + CAN_ERR_DLC; ++ ++ frame->can_id = CAN_ERR_FLAG; ++ frame->can_dlc = CAN_ERR_DLC; ++ ++ memset(&frame->data[0], 0, frame->can_dlc); ++ ++ /* Data overrun interrupt? */ ++ if (irq_source & SJA_IR_DOI) { ++ frame->can_id |= CAN_ERR_CRTL; ++ frame->data[1] = CAN_ERR_CRTL_RX_OVERFLOW; ++ } ++ ++ /* Arbitratio lost interrupt? */ ++ if (irq_source & SJA_IR_ALI) { ++ frame->can_id |= CAN_ERR_LOSTARB; ++ frame->data[0] = chip->read_reg(dev, SJA_ALC) & 0x1f; ++ } ++ ++ /* Bus error interrupt? */ ++ if (irq_source & SJA_IR_BEI) { ++ u8 ecc = chip->read_reg(dev, SJA_ECC); ++ ++ frame->can_id |= CAN_ERR_PROT | CAN_ERR_BUSERROR; ++ ++ switch (ecc & SJA_ECC_ERR_MASK) { ++ case SJA_ECC_ERR_BIT: ++ frame->data[2] |= CAN_ERR_PROT_BIT; ++ break; ++ case SJA_ECC_ERR_FORM: ++ frame->data[2] |= CAN_ERR_PROT_FORM; ++ break; ++ case SJA_ECC_ERR_STUFF: ++ frame->data[2] |= CAN_ERR_PROT_STUFF; ++ break; ++ default: ++ frame->data[2] |= CAN_ERR_PROT_UNSPEC; ++ frame->data[3] = ecc & SJA_ECC_SEG_MASK; ++ break; ++ } ++ /* Error occured during transmission? */ ++ if ((ecc & SJA_ECC_DIR) == 0) ++ frame->data[2] |= CAN_ERR_PROT_TX; ++ } ++ ++ /* Error passive interrupt? */ ++ if (unlikely(irq_source & SJA_IR_EPI)) { ++ if (state == CAN_STATE_BUS_WARNING) { ++ state = CAN_STATE_BUS_PASSIVE; ++ } else { ++ state = CAN_STATE_BUS_WARNING; ++ } ++ } ++ ++ /* Error warning interrupt? */ ++ if (irq_source & SJA_IR_EI) { ++ ++ /* Test bus status (bus-off condition) */ ++ if (status & SJA_SR_BS) { ++ /* Bus-off */ ++ state = CAN_STATE_BUS_OFF; ++ frame->can_id |= CAN_ERR_BUSOFF; ++ /* Only allow error warning interrupts ++ (otherwise an EPI would arise during bus-off ++ recovery) */ ++ chip->write_reg(dev, SJA_IER, SJA_IER_EIE); ++ /* Wake up waiting senders */ ++ rtdm_sem_destroy(&dev->tx_sem); ++ } ++ ++ /* Test error status (error warning limit) */ ++ else if (status & SJA_SR_ES) ++ /* error warning limit reached */ ++ state = CAN_STATE_BUS_WARNING; ++ ++ /* Re-entrance into error active state from bus-warn? */ ++ else if (state == CAN_STATE_BUS_WARNING) ++ state = CAN_STATE_ACTIVE; ++ ++ else ++ /* Bus-off recovery complete, enable all interrupts again */ ++ chip->write_reg(dev, SJA_IER, SJA1000_IER); ++ } ++ ++ if (state != dev->state && ++ (state == CAN_STATE_BUS_WARNING || state == CAN_STATE_BUS_PASSIVE)) { ++ frame->can_id |= CAN_ERR_PROT; ++ if (txerr > rxerr) ++ frame->data[1] = CAN_ERR_CRTL_TX_WARNING; ++ else ++ frame->data[1] = CAN_ERR_CRTL_RX_WARNING; ++ } ++ ++ dev->state = state; ++ frame->can_ifindex = dev->ifindex; ++} ++ ++static int rtcan_sja_interrupt(rtdm_irq_t *irq_handle) ++{ ++ struct rtcan_device *dev; ++ struct rtcan_sja1000 *chip; ++ struct rtcan_skb skb; ++ int recv_lock_free = 1; ++ int irq_count = 0; ++ int ret = RTDM_IRQ_NONE; ++ u8 irq_source; ++ ++ ++ /* Get the ID of the device which registered this IRQ. */ ++ dev = (struct rtcan_device *)rtdm_irq_get_arg(irq_handle, void); ++ chip = (struct rtcan_sja1000 *)dev->priv; ++ ++ /* Take spinlock protecting HW register access and device structures. */ ++ rtdm_lock_get(&dev->device_lock); ++ ++ /* Loop as long as the device reports an event */ ++ while ((irq_source = chip->read_reg(dev, SJA_IR))) { ++ ret = RTDM_IRQ_HANDLED; ++ irq_count++; ++ ++ /* Now look up which interrupts appeared */ ++ ++ /* Wake-up interrupt? */ ++ if (irq_source & SJA_IR_WUI) ++ dev->state = dev->state_before_sleep; ++ ++ /* Error Interrupt? */ ++ if (irq_source & (SJA_IR_EI | SJA_IR_DOI | SJA_IR_EPI | ++ SJA_IR_ALI | SJA_IR_BEI)) { ++ ++ /* Check error condition and fill error frame */ ++ if (!((irq_source & SJA_IR_BEI) && (chip->bus_err_on-- < 2))) { ++ rtcan_sja_err_interrupt(dev, chip, &skb, irq_source); ++ ++ if (recv_lock_free) { ++ recv_lock_free = 0; ++ rtdm_lock_get(&rtcan_recv_list_lock); ++ rtdm_lock_get(&rtcan_socket_lock); ++ } ++ /* Pass error frame out to the sockets */ ++ rtcan_rcv(dev, &skb); ++ } ++ } ++ ++ /* Transmit Interrupt? */ ++ if (irq_source & SJA_IR_TI) { ++ /* Wake up a sender */ ++ rtdm_sem_up(&dev->tx_sem); ++ ++ if (rtcan_loopback_pending(dev)) { ++ ++ if (recv_lock_free) { ++ recv_lock_free = 0; ++ rtdm_lock_get(&rtcan_recv_list_lock); ++ rtdm_lock_get(&rtcan_socket_lock); ++ } ++ ++ rtcan_loopback(dev); ++ } ++ } ++ ++ /* Receive Interrupt? */ ++ if (irq_source & SJA_IR_RI) { ++ ++ /* Read out HW registers */ ++ rtcan_sja_rx_interrupt(dev, &skb); ++ ++ /* Take more locks. Ensure that they are taken and ++ * released only once in the IRQ handler. */ ++ /* WARNING: Nested locks are dangerous! But they are ++ * nested only in this routine so a deadlock should ++ * not be possible. */ ++ if (recv_lock_free) { ++ recv_lock_free = 0; ++ rtdm_lock_get(&rtcan_recv_list_lock); ++ rtdm_lock_get(&rtcan_socket_lock); ++ } ++ ++ /* Pass received frame out to the sockets */ ++ rtcan_rcv(dev, &skb); ++ } ++ } ++ ++ if (chip->irq_ack) ++ chip->irq_ack(dev); ++ ++ /* Release spinlocks */ ++ if (!recv_lock_free) { ++ rtdm_lock_put(&rtcan_socket_lock); ++ rtdm_lock_put(&rtcan_recv_list_lock); ++ } ++ rtdm_lock_put(&dev->device_lock); ++ ++ return ret; ++} ++ ++ ++ ++/* ++ * Inline function to decide if controller is operating ++ * ++ * Catch the very unlikely case that setting stop mode ++ * returned without success before this call but in the ++ * meantime the controller went into reset mode. ++ */ ++static inline int rtcan_sja_is_operating(struct rtcan_device *dev, ++ can_state_t *state) ++{ ++ int is_operating = STATE_OPERATING(*state); ++ struct rtcan_sja1000 *chip = (struct rtcan_sja1000 *)dev->priv; ++ ++ if (unlikely(is_operating && chip->read_reg(dev, SJA_MOD) & SJA_MOD_RM)) { ++ *state = CAN_STATE_STOPPED; ++ is_operating = 0; ++ /* Disable the controller's interrupts */ ++ chip->write_reg(dev, SJA_IER, 0x00); ++ /* Wake up waiting senders */ ++ rtdm_sem_destroy(&dev->tx_sem); ++ } ++ ++ return is_operating; ++} ++ ++ ++/* ++ * Set controller into reset mode. ++ * ++ * According to the SJA1000 specification, it is necessary to check the ++ * reset mode bit in PeliCAN mode after having set it. So we do. But if ++ * using a ISA card like the PHYTEC eNET card this should not be necessary ++ * because the CAN controller clock of this card (16 MHz) is twice as high ++ * as the ISA bus clock. ++ */ ++static int rtcan_sja_mode_stop(struct rtcan_device *dev, ++ rtdm_lockctx_t *lock_ctx) ++{ ++ int ret = 0; ++ /* Max. 50 loops busy sleep. If the controller is stopped while in ++ * sleep mode 20-40 loops are needed (tested on PHYTEC eNET). */ ++ int wait_loop = 50; ++ can_state_t state; ++ struct rtcan_sja1000 *chip = (struct rtcan_sja1000 *)dev->priv; ++ ++ state = dev->state; ++ /* If controller is not operating anyway, go out */ ++ if (STATE_RESET(state)) ++ goto out; ++ ++ /* Disable the controller's interrupts */ ++ chip->write_reg(dev, SJA_IER, 0x00); ++ ++ /* Set reset mode bit */ ++ chip->write_reg(dev, SJA_MOD, SJA_MOD_RM); ++ ++ /* Read reset mode bit, multiple tests */ ++ do { ++ if (chip->read_reg(dev, SJA_MOD) & SJA_MOD_RM) ++ break; ++ ++ if (lock_ctx) ++ rtdm_lock_put_irqrestore(&dev->device_lock, *lock_ctx); ++ /* Busy sleep 1 microsecond */ ++ rtdm_task_busy_sleep(1000); ++ if (lock_ctx) ++ rtdm_lock_get_irqsave(&dev->device_lock, *lock_ctx); ++ } while(--wait_loop); ++ ++ ++ if (wait_loop) { ++ /* Volatile state could have changed while we slept busy. */ ++ dev->state = CAN_STATE_STOPPED; ++ /* Wake up waiting senders */ ++ rtdm_sem_destroy(&dev->tx_sem); ++ } else { ++ ret = -EAGAIN; ++ /* Enable interrupts again as we did not succeed */ ++ chip->write_reg(dev, SJA_IER, SJA1000_IER); ++ } ++ ++ out: ++ return ret; ++} ++ ++ ++ ++/* ++ * Set controller into operating mode. ++ * ++ * If coming from CAN_STATE_SLEEPING, the controller must wait ++ * some time to avoid bus errors. Measured on an PHYTEC eNET card, ++ * this time was 110 microseconds. ++ */ ++static int rtcan_sja_mode_start(struct rtcan_device *dev, ++ rtdm_lockctx_t *lock_ctx) ++{ ++ int ret = 0; ++ u8 mod_reg; ++ struct rtcan_sja1000 *chip = (struct rtcan_sja1000 *)dev->priv; ++ ++ /* We won't forget that state in the device structure is volatile and ++ * access to it will not be optimized by the compiler. So ... */ ++ ++ mod_reg = 0; ++ if (dev->ctrl_mode & CAN_CTRLMODE_LISTENONLY) ++ mod_reg |= SJA_MOD_LOM; ++ if (dev->ctrl_mode & CAN_CTRLMODE_LOOPBACK) ++ mod_reg |= SJA_MOD_STM; ++ ++ switch (dev->state) { ++ ++ case CAN_STATE_ACTIVE: ++ case CAN_STATE_BUS_WARNING: ++ case CAN_STATE_BUS_PASSIVE: ++ break; ++ ++ case CAN_STATE_STOPPED: ++ /* Clear error counters */ ++ chip->write_reg(dev, SJA_RXERR , 0); ++ chip->write_reg(dev, SJA_TXERR , 0); ++ /* Clear error code capture (i.e. read it) */ ++ chip->read_reg(dev, SJA_ECC); ++ /* Set error active state */ ++ dev->state = CAN_STATE_ACTIVE; ++ /* Set up sender "mutex" */ ++ rtdm_sem_init(&dev->tx_sem, 1); ++ /* Enable interrupts */ ++ chip->write_reg(dev, SJA_IER, SJA1000_IER); ++ ++ /* Clear reset mode bit in SJA1000 */ ++ chip->write_reg(dev, SJA_MOD, mod_reg); ++ ++ break; ++ ++ case CAN_STATE_SLEEPING: ++ /* Trigger Wake-up interrupt */ ++ chip->write_reg(dev, SJA_MOD, mod_reg); ++ ++ /* Ok, coming from sleep mode is problematic. We have to wait ++ * for the SJA1000 to get on both feet again. */ ++ rtdm_lock_put_irqrestore(&dev->device_lock, *lock_ctx); ++ rtdm_task_busy_sleep(110000); ++ rtdm_lock_get_irqsave(&dev->device_lock, *lock_ctx); ++ ++ /* Meanwhile, the Wake-up interrupt was serviced and has set the ++ * right state. As we don't want to set it back jump out. */ ++ goto out; ++ ++ break; ++ ++ case CAN_STATE_BUS_OFF: ++ /* Trigger bus-off recovery */ ++ chip->write_reg(dev, SJA_MOD, mod_reg); ++ /* Set up sender "mutex" */ ++ rtdm_sem_init(&dev->tx_sem, 1); ++ /* Set error active state */ ++ dev->state = CAN_STATE_ACTIVE; ++ ++ break; ++ ++ default: ++ /* Never reached, but we don't want nasty compiler warnings ... */ ++ break; ++ } ++ ++ out: ++ return ret; ++} ++ ++can_state_t rtcan_sja_get_state(struct rtcan_device *dev) ++{ ++ can_state_t state = dev->state; ++ rtcan_sja_is_operating(dev, &state); ++ return state; ++} ++ ++int rtcan_sja_set_mode(struct rtcan_device *dev, ++ can_mode_t mode, ++ rtdm_lockctx_t *lock_ctx) ++{ ++ int ret = 0; ++ can_state_t state; ++ struct rtcan_sja1000 *chip = (struct rtcan_sja1000*)dev->priv; ++ ++ switch (mode) { ++ ++ case CAN_MODE_STOP: ++ ret = rtcan_sja_mode_stop(dev, lock_ctx); ++ break; ++ ++ case CAN_MODE_START: ++ ret = rtcan_sja_mode_start(dev, lock_ctx); ++ break; ++ ++ case CAN_MODE_SLEEP: ++ ++ state = dev->state; ++ ++ /* Controller must operate, otherwise go out */ ++ if (!rtcan_sja_is_operating(dev, &state)) { ++ ret = -ENETDOWN; ++ goto mode_sleep_out; ++ } ++ ++ /* Is controller sleeping yet? If yes, go out */ ++ if (state == CAN_STATE_SLEEPING) ++ goto mode_sleep_out; ++ ++ /* Remember into which state to return when we ++ * wake up */ ++ dev->state_before_sleep = state; ++ ++ /* Let's take a nap. (Now I REALLY understand ++ * the meaning of interrupts ...) */ ++ state = CAN_STATE_SLEEPING; ++ chip->write_reg(dev, SJA_MOD, ++ chip->read_reg(dev, SJA_MOD) | SJA_MOD_SM); ++ ++ mode_sleep_out: ++ dev->state = state; ++ break; ++ ++ default: ++ ret = -EOPNOTSUPP; ++ break; ++ } ++ ++ return ret; ++} ++ ++int rtcan_sja_set_bit_time(struct rtcan_device *dev, ++ struct can_bittime *bit_time, ++ rtdm_lockctx_t *lock_ctx) ++{ ++ struct rtcan_sja1000 *chip = (struct rtcan_sja1000 *)dev->priv; ++ u8 btr0, btr1; ++ ++ switch (bit_time->type) { ++ case CAN_BITTIME_BTR: ++ btr0 = bit_time->btr.btr0; ++ btr1 = bit_time->btr.btr1; ++ break; ++ ++ case CAN_BITTIME_STD: ++ btr0 = (BTR0_SET_BRP(bit_time->std.brp) | ++ BTR0_SET_SJW(bit_time->std.sjw)); ++ btr1 = (BTR1_SET_TSEG1(bit_time->std.prop_seg + ++ bit_time->std.phase_seg1) | ++ BTR1_SET_TSEG2(bit_time->std.phase_seg2) | ++ BTR1_SET_SAM(bit_time->std.sam)); ++ ++ break; ++ ++ default: ++ return -EINVAL; ++ } ++ ++ printk("%s: btr0=%#x btr1=%#x\n", __func__, btr0, btr1); ++ chip->write_reg(dev, SJA_BTR0, btr0); ++ chip->write_reg(dev, SJA_BTR1, btr1); ++ ++ return 0; ++} ++ ++void rtcan_sja_enable_bus_err(struct rtcan_device *dev) ++{ ++ struct rtcan_sja1000 *chip = (struct rtcan_sja1000 *)dev->priv; ++ ++ if (chip->bus_err_on < 2) { ++ if (chip->bus_err_on < 1) ++ chip->read_reg(dev, SJA_ECC); ++ chip->bus_err_on = 2; ++ } ++} ++ ++/* ++ * Start a transmission to a SJA1000 device ++ */ ++static int rtcan_sja_start_xmit(struct rtcan_device *dev, ++ can_frame_t *frame) ++{ ++ int i; ++ /* "Real" size of the payload */ ++ u8 size; ++ /* Content of frame information register */ ++ u8 fir; ++ struct rtcan_sja1000 *chip = (struct rtcan_sja1000 *)dev->priv; ++ ++ /* Get DLC */ ++ fir = frame->can_dlc; ++ ++ /* If DLC exceeds 8 bytes adjust it to 8 (for the payload) */ ++ size = (fir > 8) ? 8 : fir; ++ ++ ++ if (frame->can_id & CAN_EFF_FLAG) { ++ /* Send extended frame */ ++ fir |= SJA_FIR_EFF; ++ ++ /* Write ID */ ++ chip->write_reg(dev, SJA_ID1, frame->can_id >> 21); ++ chip->write_reg(dev, SJA_ID2, frame->can_id >> 13); ++ chip->write_reg(dev, SJA_ID3, frame->can_id >> 5); ++ chip->write_reg(dev, SJA_ID4, frame->can_id << 3); ++ ++ /* RTR? */ ++ if (frame->can_id & CAN_RTR_FLAG) ++ fir |= SJA_FIR_RTR; ++ ++ else { ++ /* No RTR, write data bytes */ ++ for (i = 0; i < size; i++) ++ chip->write_reg(dev, SJA_DATA_EFF(i), ++ frame->data[i]); ++ } ++ ++ } else { ++ /* Send standard frame */ ++ ++ /* Write ID */ ++ chip->write_reg(dev, SJA_ID1, frame->can_id >> 3); ++ chip->write_reg(dev, SJA_ID2, frame->can_id << 5); ++ ++ /* RTR? */ ++ if (frame->can_id & CAN_RTR_FLAG) ++ fir |= SJA_FIR_RTR; ++ ++ else { ++ /* No RTR, write data bytes */ ++ for (i = 0; i < size; i++) ++ chip->write_reg(dev, SJA_DATA_SFF(i), ++ frame->data[i]); ++ } ++ } ++ ++ ++ /* Write frame information register */ ++ chip->write_reg(dev, SJA_FIR, fir); ++ ++ /* Push the 'send' button */ ++ if (dev->ctrl_mode & CAN_CTRLMODE_LOOPBACK) ++ chip->write_reg(dev, SJA_CMR, SJA_CMR_SRR); ++ else ++ chip->write_reg(dev, SJA_CMR, SJA_CMR_TR); ++ ++ return 0; ++} ++ ++ ++ ++/* ++ * SJA1000 chip configuration ++ */ ++static void sja1000_chip_config(struct rtcan_device *dev) ++{ ++ struct rtcan_sja1000 *chip = (struct rtcan_sja1000* )dev->priv; ++ ++ chip->write_reg(dev, SJA_CDR, chip->cdr); ++ chip->write_reg(dev, SJA_OCR, chip->ocr); ++ ++ chip->write_reg(dev, SJA_AMR0, 0xFF); ++ chip->write_reg(dev, SJA_AMR1, 0xFF); ++ chip->write_reg(dev, SJA_AMR2, 0xFF); ++ chip->write_reg(dev, SJA_AMR3, 0xFF); ++} ++ ++ ++int rtcan_sja1000_register(struct rtcan_device *dev) ++{ ++ int ret; ++ struct rtcan_sja1000 *chip = dev->priv; ++ ++ if (chip == NULL) ++ return -EINVAL; ++ ++ /* Set dummy state for following call */ ++ dev->state = CAN_STATE_ACTIVE; ++ /* Enter reset mode */ ++ rtcan_sja_mode_stop(dev, NULL); ++ ++ if ((chip->read_reg(dev, SJA_SR) & ++ (SJA_SR_RBS | SJA_SR_DOS | SJA_SR_TBS)) != SJA_SR_TBS) { ++ printk("ERROR! No SJA1000 device found!\n"); ++ return -ENODEV; ++ } ++ ++ dev->ctrl_name = sja_ctrl_name; ++ ++ dev->hard_start_xmit = rtcan_sja_start_xmit; ++ dev->do_set_mode = rtcan_sja_set_mode; ++ dev->do_get_state = rtcan_sja_get_state; ++ dev->do_set_bit_time = rtcan_sja_set_bit_time; ++ dev->do_enable_bus_err = rtcan_sja_enable_bus_err; ++#ifndef CONFIG_XENO_DRIVERS_CAN_CALC_BITTIME_OLD ++ dev->bittiming_const = &sja1000_bittiming_const; ++#endif ++ ++ chip->bus_err_on = 1; ++ ++ ret = rtdm_irq_request(&dev->irq_handle, ++ chip->irq_num, rtcan_sja_interrupt, ++ chip->irq_flags, sja_ctrl_name, dev); ++ if (ret) { ++ printk(KERN_ERR "ERROR %d: IRQ %d is %s!\n", ++ ret, chip->irq_num, ret == -EBUSY ? ++ "busy, check shared interrupt support" : "invalid"); ++ return ret; ++ } ++ ++ sja1000_chip_config(dev); ++ ++ /* Register RTDM device */ ++ ret = rtcan_dev_register(dev); ++ if (ret) { ++ printk(KERN_ERR ++ "ERROR %d while trying to register RTCAN device!\n", ret); ++ goto out_irq_free; ++ } ++ ++ rtcan_sja_create_proc(dev); ++ ++ return 0; ++ ++ out_irq_free: ++ rtdm_irq_free(&dev->irq_handle); ++ ++ return ret; ++} ++ ++ ++/* Cleanup module */ ++void rtcan_sja1000_unregister(struct rtcan_device *dev) ++{ ++ printk("Unregistering SJA1000 device %s\n", dev->name); ++ ++ rtdm_irq_disable(&dev->irq_handle); ++ rtcan_sja_mode_stop(dev, NULL); ++ rtdm_irq_free(&dev->irq_handle); ++ rtcan_sja_remove_proc(dev); ++ rtcan_dev_unregister(dev); ++} ++ ++int __init rtcan_sja_init(void) ++{ ++ if (!rtdm_available()) ++ return -ENOSYS; ++ ++ printk("RTCAN SJA1000 driver initialized\n"); ++ return 0; ++} ++ ++ ++void __exit rtcan_sja_exit(void) ++{ ++ printk("%s removed\n", sja_ctrl_name); ++} ++ ++module_init(rtcan_sja_init); ++module_exit(rtcan_sja_exit); ++ ++EXPORT_SYMBOL_GPL(rtcan_sja1000_register); ++EXPORT_SYMBOL_GPL(rtcan_sja1000_unregister); +--- linux/drivers/xenomai/can/sja1000/rtcan_isa.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/can/sja1000/rtcan_isa.c 2021-04-07 16:01:26.326635454 +0800 +@@ -0,0 +1,201 @@ ++/* ++ * Copyright (C) 2006 Wolfgang Grandegger ++ * ++ * Copyright (C) 2005, 2006, 2009 Sebastian Smolorz ++ * ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; eitherer version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#include ++#include ++#include ++ ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#define RTCAN_DEV_NAME "rtcan%d" ++#define RTCAN_DRV_NAME "sja1000-isa" ++ ++#define RTCAN_ISA_MAX_DEV 4 ++ ++static char *isa_board_name = "ISA-Board"; ++ ++MODULE_AUTHOR("Wolfgang Grandegger "); ++MODULE_DESCRIPTION("RTCAN board driver for standard ISA boards"); ++MODULE_SUPPORTED_DEVICE("ISA board"); ++MODULE_LICENSE("GPL"); ++ ++static u16 io[RTCAN_ISA_MAX_DEV]; ++static int irq[RTCAN_ISA_MAX_DEV]; ++static u32 can_clock[RTCAN_ISA_MAX_DEV]; ++static u8 ocr[RTCAN_ISA_MAX_DEV]; ++static u8 cdr[RTCAN_ISA_MAX_DEV]; ++ ++module_param_array(io, ushort, NULL, 0444); ++module_param_array(irq, int, NULL, 0444); ++module_param_array(can_clock, uint, NULL, 0444); ++module_param_array(ocr, byte, NULL, 0444); ++module_param_array(cdr, byte, NULL, 0444); ++ ++MODULE_PARM_DESC(io, "The io-port address"); ++MODULE_PARM_DESC(irq, "The interrupt number"); ++MODULE_PARM_DESC(can_clock, "External clock frequency (default 16 MHz)"); ++MODULE_PARM_DESC(ocr, "Value of output control register (default 0x1a)"); ++MODULE_PARM_DESC(cdr, "Value of clock divider register (default 0xc8"); ++ ++#define RTCAN_ISA_PORT_SIZE 32 ++ ++struct rtcan_isa ++{ ++ u16 io; ++}; ++ ++static struct rtcan_device *rtcan_isa_devs[RTCAN_ISA_MAX_DEV]; ++ ++static u8 rtcan_isa_readreg(struct rtcan_device *dev, int port) ++{ ++ struct rtcan_isa *board = (struct rtcan_isa *)dev->board_priv; ++ return inb(board->io + port); ++} ++ ++static void rtcan_isa_writereg(struct rtcan_device *dev, int port, u8 val) ++{ ++ struct rtcan_isa *board = (struct rtcan_isa *)dev->board_priv; ++ outb(val, board->io + port); ++} ++ ++ ++int __init rtcan_isa_init_one(int idx) ++{ ++ struct rtcan_device *dev; ++ struct rtcan_sja1000 *chip; ++ struct rtcan_isa *board; ++ int ret; ++ ++ if ((dev = rtcan_dev_alloc(sizeof(struct rtcan_sja1000), ++ sizeof(struct rtcan_isa))) == NULL) ++ return -ENOMEM; ++ ++ chip = (struct rtcan_sja1000 *)dev->priv; ++ board = (struct rtcan_isa *)dev->board_priv; ++ ++ dev->board_name = isa_board_name; ++ ++ board->io = io[idx]; ++ ++ chip->irq_num = irq[idx]; ++ chip->irq_flags = RTDM_IRQTYPE_SHARED | RTDM_IRQTYPE_EDGE; ++ ++ chip->read_reg = rtcan_isa_readreg; ++ chip->write_reg = rtcan_isa_writereg; ++ ++ /* Check and request I/O ports */ ++ if (!request_region(board->io, RTCAN_ISA_PORT_SIZE, RTCAN_DRV_NAME)) { ++ ret = -EBUSY; ++ goto out_dev_free; ++ } ++ ++ /* Clock frequency in Hz */ ++ if (can_clock[idx]) ++ dev->can_sys_clock = can_clock[idx] / 2; ++ else ++ dev->can_sys_clock = 8000000; /* 16/2 MHz */ ++ ++ /* Output control register */ ++ if (ocr[idx]) ++ chip->ocr = ocr[idx]; ++ else ++ chip->ocr = SJA_OCR_MODE_NORMAL | SJA_OCR_TX0_PUSHPULL; ++ ++ if (cdr[idx]) ++ chip->cdr = cdr[idx]; ++ else ++ chip->cdr = SJA_CDR_CAN_MODE | SJA_CDR_CLK_OFF | SJA_CDR_CBP; ++ ++ strncpy(dev->name, RTCAN_DEV_NAME, IFNAMSIZ); ++ ++ ret = rtcan_sja1000_register(dev); ++ if (ret) { ++ printk(KERN_ERR "ERROR %d while trying to register SJA1000 " ++ "device!\n", ret); ++ goto out_free_region; ++ } ++ ++ rtcan_isa_devs[idx] = dev; ++ return 0; ++ ++ out_free_region: ++ release_region(board->io, RTCAN_ISA_PORT_SIZE); ++ ++ out_dev_free: ++ rtcan_dev_free(dev); ++ ++ return ret; ++} ++ ++static void rtcan_isa_exit(void); ++ ++/** Init module */ ++static int __init rtcan_isa_init(void) ++{ ++ int i, err; ++ int devices = 0; ++ ++ if (!rtdm_available()) ++ return -ENOSYS; ++ ++ for (i = 0; i < RTCAN_ISA_MAX_DEV && io[i] != 0; i++) { ++ err = rtcan_isa_init_one(i); ++ if (err) { ++ rtcan_isa_exit(); ++ return err; ++ } ++ devices++; ++ } ++ if (devices) ++ return 0; ++ ++ printk(KERN_ERR "ERROR! No devices specified! " ++ "Use io=[,...] irq=[,...]\n"); ++ return -EINVAL; ++} ++ ++ ++/** Cleanup module */ ++static void rtcan_isa_exit(void) ++{ ++ int i; ++ struct rtcan_device *dev; ++ ++ for (i = 0; i < RTCAN_ISA_MAX_DEV; i++) { ++ dev = rtcan_isa_devs[i]; ++ if (!dev) ++ continue; ++ rtcan_sja1000_unregister(dev); ++ release_region(io[i], RTCAN_ISA_PORT_SIZE); ++ rtcan_dev_free(dev); ++ } ++} ++ ++module_init(rtcan_isa_init); ++module_exit(rtcan_isa_exit); +--- linux/drivers/xenomai/can/sja1000/rtcan_peak_dng.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/can/sja1000/rtcan_peak_dng.c 2021-04-07 16:01:26.321635461 +0800 +@@ -0,0 +1,390 @@ ++/* ++ * Copyright (C) 2006 Wolfgang Grandegger ++ * ++ * Derived from the PCAN project file driver/src/pcan_dongle.c: ++ * ++ * Copyright (C) 2001-2006 PEAK System-Technik GmbH ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++#include ++ ++/* CAN device profile */ ++#include ++#include ++#include ++#include ++#include ++ ++#define RTCAN_DEV_NAME "rtcan%d" ++#define RTCAN_DRV_NAME "PEAK-Dongle" ++ ++#define RTCAN_PEAK_DNG_MAX_DEV 1 ++ ++static char *dongle_board_name = "PEAK-Dongle"; ++ ++MODULE_AUTHOR("Wolfgang Grandegger "); ++MODULE_DESCRIPTION("RTCAN board driver for PEAK-Dongle"); ++MODULE_SUPPORTED_DEVICE("PEAK-Dongle CAN controller"); ++MODULE_LICENSE("GPL"); ++ ++static char *type[RTCAN_PEAK_DNG_MAX_DEV]; ++static ushort io[RTCAN_PEAK_DNG_MAX_DEV]; ++static char irq[RTCAN_PEAK_DNG_MAX_DEV]; ++ ++module_param_array(type, charp, NULL, 0444); ++module_param_array(io, ushort, NULL, 0444); ++module_param_array(irq, byte, NULL, 0444); ++ ++MODULE_PARM_DESC(type, "The type of interface (sp, epp)"); ++MODULE_PARM_DESC(io, "The io-port address"); ++MODULE_PARM_DESC(irq, "The interrupt number"); ++ ++#define DONGLE_TYPE_SP 0 ++#define DONGLE_TYPE_EPP 1 ++ ++#define DNG_PORT_SIZE 4 /* the address range of the dongle-port */ ++#define ECR_PORT_SIZE 1 /* size of the associated ECR register */ ++ ++struct rtcan_peak_dng ++{ ++ u16 ioport; ++ u16 ecr; /* ECR register in case of EPP */ ++ u8 old_data; /* the overwritten contents of the port registers */ ++ u8 old_ctrl; ++ u8 old_ecr; ++ u8 type; ++}; ++ ++static struct rtcan_device *rtcan_peak_dng_devs[RTCAN_PEAK_DNG_MAX_DEV]; ++ ++static u16 dng_ports[] = {0x378, 0x278, 0x3bc, 0x2bc}; ++static u8 dng_irqs[] = {7, 5, 7, 5}; ++ ++static unsigned char nibble_decode[32] = ++{ ++ 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, ++ 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, ++ 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, ++ 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 ++}; ++ ++/* Enable and disable irqs */ ++static inline void rtcan_parport_disable_irq(u32 port) ++{ ++ u32 pc = port + 2; ++ outb(inb(pc) & ~0x10, pc); ++} ++ ++static inline void rtcan_parport_enable_irq(u32 port) ++{ ++ u32 pc = port + 2; ++ outb(inb(pc) | 0x10, pc); ++} ++ ++/* Functions for SP port */ ++static u8 rtcan_peak_dng_sp_readreg(struct rtcan_device *dev, int port) ++{ ++ struct rtcan_peak_dng *dng = (struct rtcan_peak_dng *)dev->board_priv; ++ u32 pa = dng->ioport; ++ u32 pb = pa + 1; ++ u32 pc = pb + 1; ++ u8 b0, b1 ; ++ u8 irq_enable = inb(pc) & 0x10; /* don't influence irq_enable */ ++ ++ outb((0x0B ^ 0x0D) | irq_enable, pc); ++ outb((port & 0x1F) | 0x80, pa); ++ outb((0x0B ^ 0x0C) | irq_enable, pc); ++ b1=nibble_decode[inb(pb)>>3]; ++ outb(0x40, pa); ++ b0=nibble_decode[inb(pb)>>3]; ++ outb((0x0B ^ 0x0D) | irq_enable, pc); ++ ++ return (b1 << 4) | b0 ; ++} ++ ++static void rtcan_peak_dng_writereg(struct rtcan_device *dev, int port, u8 data) ++{ ++ struct rtcan_peak_dng *dng = (struct rtcan_peak_dng *)dev->board_priv; ++ u32 pa = dng->ioport; ++ u32 pc = pa + 2; ++ u8 irq_enable = inb(pc) & 0x10; /* don't influence irq_enable */ ++ ++ outb((0x0B ^ 0x0D) | irq_enable, pc); ++ outb(port & 0x1F, pa); ++ outb((0x0B ^ 0x0C) | irq_enable, pc); ++ outb(data, pa); ++ outb((0x0B ^ 0x0D) | irq_enable, pc); ++} ++ ++/* Functions for EPP port */ ++static u8 rtcan_peak_dng_epp_readreg(struct rtcan_device *dev, int port) ++{ ++ struct rtcan_peak_dng *dng = (struct rtcan_peak_dng *)dev->board_priv; ++ u32 pa = dng->ioport; ++ u32 pc = pa + 2; ++ u8 val; ++ u8 irq_enable = inb(pc) & 0x10; /* don't influence irq_enable */ ++ ++ outb((0x0B ^ 0x0F) | irq_enable, pc); ++ outb((port & 0x1F) | 0x80, pa); ++ outb((0x0B ^ 0x2E) | irq_enable, pc); ++ val = inb(pa); ++ outb((0x0B ^ 0x0F) | irq_enable, pc); ++ ++ return val; ++} ++ ++ ++/* to switch epp on or restore register */ ++static void dongle_set_ecr(u16 port, struct rtcan_peak_dng *dng) ++{ ++ u32 ecr = dng->ecr; ++ ++ dng->old_ecr = inb(ecr); ++ outb((dng->old_ecr & 0x1F) | 0x20, ecr); ++ ++ if (dng->old_ecr == 0xff) ++ printk(KERN_DEBUG "%s: realy ECP mode configured?\n", RTCAN_DRV_NAME); ++} ++ ++static void dongle_restore_ecr(u16 port, struct rtcan_peak_dng *dng) ++{ ++ u32 ecr = dng->ecr; ++ ++ outb(dng->old_ecr, ecr); ++ ++ printk(KERN_DEBUG "%s: restore ECR\n", RTCAN_DRV_NAME); ++} ++ ++static inline void rtcan_peak_dng_enable(struct rtcan_device *dev) ++{ ++ struct rtcan_peak_dng *dng = (struct rtcan_peak_dng *)dev->board_priv; ++ u32 port = dng->ioport; ++ ++ /* save old port contents */ ++ dng->old_data = inb(port); ++ dng->old_ctrl = inb(port + 2); ++ ++ /* switch to epp mode if possible */ ++ if (dng->type == DONGLE_TYPE_EPP) ++ dongle_set_ecr(port, dng); ++ ++ rtcan_parport_enable_irq(port); ++} ++ ++static inline void rtcan_peak_dng_disable(struct rtcan_device *dev) ++{ ++ struct rtcan_peak_dng *dng = (struct rtcan_peak_dng *)dev->board_priv; ++ u32 port = dng->ioport; ++ ++ rtcan_parport_disable_irq(port); ++ ++ if (dng->type == DONGLE_TYPE_EPP) ++ dongle_restore_ecr(port, dng); ++ ++ /* restore port state */ ++ outb(dng->old_data, port); ++ outb(dng->old_ctrl, port + 2); ++} ++ ++/** Init module */ ++int __init rtcan_peak_dng_init_one(int idx) ++{ ++ int ret, dtype; ++ struct rtcan_device *dev; ++ struct rtcan_sja1000 *sja; ++ struct rtcan_peak_dng *dng; ++ ++ if (strncmp(type[idx], "sp", 2) == 0) ++ dtype = DONGLE_TYPE_SP; ++ else if (strncmp(type[idx], "epp", 3) == 0) ++ dtype = DONGLE_TYPE_EPP; ++ else { ++ printk("%s: type %s is invalid, use \"sp\" or \"epp\".", ++ RTCAN_DRV_NAME, type[idx]); ++ return -EINVAL; ++ } ++ ++ if ((dev = rtcan_dev_alloc(sizeof(struct rtcan_sja1000), ++ sizeof(struct rtcan_peak_dng))) == NULL) ++ return -ENOMEM; ++ ++ sja = (struct rtcan_sja1000 *)dev->priv; ++ dng = (struct rtcan_peak_dng *)dev->board_priv; ++ ++ dev->board_name = dongle_board_name; ++ ++ if (io[idx]) ++ dng->ioport = io[idx]; ++ else ++ dng->ioport = dng_ports[idx]; ++ ++ if (irq[idx]) ++ sja->irq_num = irq[idx]; ++ else ++ sja->irq_num = dng_irqs[idx]; ++ sja->irq_flags = 0; ++ ++ if (dtype == DONGLE_TYPE_SP) { ++ sja->read_reg = rtcan_peak_dng_sp_readreg; ++ sja->write_reg = rtcan_peak_dng_writereg; ++ dng->ecr = 0; /* set to anything */ ++ } else { ++ sja->read_reg = rtcan_peak_dng_epp_readreg; ++ sja->write_reg = rtcan_peak_dng_writereg; ++ dng->ecr = dng->ioport + 0x402; ++ } ++ ++ /* Check and request I/O ports */ ++ if (!request_region(dng->ioport, DNG_PORT_SIZE, RTCAN_DRV_NAME)) { ++ ret = -EBUSY; ++ goto out_dev_free; ++ } ++ ++ if (dng->type == DONGLE_TYPE_EPP) { ++ if (!request_region(dng->ecr, ECR_PORT_SIZE, RTCAN_DRV_NAME)) { ++ ret = -EBUSY; ++ goto out_free_region; ++ } ++ } ++ ++ /* Clock frequency in Hz */ ++ dev->can_sys_clock = 8000000; /* 16/2 MHz */ ++ ++ /* Output control register */ ++ sja->ocr = SJA_OCR_MODE_NORMAL | SJA_OCR_TX0_PUSHPULL; ++ ++ sja->cdr = SJA_CDR_CAN_MODE; ++ ++ strncpy(dev->name, RTCAN_DEV_NAME, IFNAMSIZ); ++ ++ rtcan_peak_dng_enable(dev); ++ ++ /* Register RTDM device */ ++ ret = rtcan_sja1000_register(dev); ++ if (ret) { ++ printk(KERN_ERR "ERROR while trying to register SJA1000 device %d!\n", ++ ret); ++ goto out_free_region2; ++ } ++ ++ rtcan_peak_dng_devs[idx] = dev; ++ return 0; ++ ++ out_free_region2: ++ if (dng->type == DONGLE_TYPE_EPP) ++ release_region(dng->ecr, ECR_PORT_SIZE); ++ ++ out_free_region: ++ release_region(dng->ioport, DNG_PORT_SIZE); ++ ++ out_dev_free: ++ rtcan_dev_free(dev); ++ ++ return ret; ++} ++ ++void rtcan_peak_dng_exit_one(struct rtcan_device *dev) ++{ ++ struct rtcan_peak_dng *dng = (struct rtcan_peak_dng *)dev->board_priv; ++ ++ rtcan_sja1000_unregister(dev); ++ rtcan_peak_dng_disable(dev); ++ if (dng->type == DONGLE_TYPE_EPP) ++ release_region(dng->ecr, ECR_PORT_SIZE); ++ release_region(dng->ioport, DNG_PORT_SIZE); ++ rtcan_dev_free(dev); ++} ++ ++static const struct pnp_device_id rtcan_peak_dng_pnp_tbl[] = { ++ /* Standard LPT Printer Port */ ++ {.id = "PNP0400", .driver_data = 0}, ++ /* ECP Printer Port */ ++ {.id = "PNP0401", .driver_data = 0}, ++ { } ++}; ++ ++static int rtcan_peak_dng_pnp_probe(struct pnp_dev *dev, ++ const struct pnp_device_id *id) ++{ ++ return 0; ++} ++ ++static struct pnp_driver rtcan_peak_dng_pnp_driver = { ++ .name = RTCAN_DRV_NAME, ++ .id_table = rtcan_peak_dng_pnp_tbl, ++ .probe = rtcan_peak_dng_pnp_probe, ++}; ++ ++static int pnp_registered; ++ ++/** Cleanup module */ ++static void rtcan_peak_dng_exit(void) ++{ ++ int i; ++ struct rtcan_device *dev; ++ ++ for (i = 0, dev = rtcan_peak_dng_devs[i]; ++ i < RTCAN_PEAK_DNG_MAX_DEV && dev != NULL; ++ i++) ++ rtcan_peak_dng_exit_one(dev); ++ ++ if (pnp_registered) ++ pnp_unregister_driver(&rtcan_peak_dng_pnp_driver); ++} ++ ++/** Init module */ ++static int __init rtcan_peak_dng_init(void) ++{ ++ int i, ret = -EINVAL, done = 0; ++ ++ if (!rtdm_available()) ++ return -ENOSYS; ++ ++ if (pnp_register_driver(&rtcan_peak_dng_pnp_driver) == 0) ++ pnp_registered = 1; ++ ++ for (i = 0; ++ i < RTCAN_PEAK_DNG_MAX_DEV && type[i] != 0; ++ i++) { ++ ++ if ((ret = rtcan_peak_dng_init_one(i)) != 0) { ++ printk(KERN_ERR "%s: Init failed with %d\n", RTCAN_DRV_NAME, ret); ++ goto cleanup; ++ } ++ done++; ++ } ++ if (done) ++ return 0; ++ ++ printk(KERN_ERR "%s: Please specify type=epp or type=sp\n", ++ RTCAN_DRV_NAME); ++ ++cleanup: ++ rtcan_peak_dng_exit(); ++ return ret; ++} ++ ++module_init(rtcan_peak_dng_init); ++module_exit(rtcan_peak_dng_exit); +--- linux/drivers/xenomai/can/sja1000/rtcan_sja1000_proc.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/can/sja1000/rtcan_sja1000_proc.c 2021-04-07 16:01:26.316635468 +0800 +@@ -0,0 +1,88 @@ ++/* ++ * Copyright (C) 2006 Wolfgang Grandegger ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#include ++#include ++#include ++ ++#include ++ ++#include ++#include ++#include ++ ++#ifdef CONFIG_XENO_DRIVERS_CAN_DEBUG ++ ++static int rtcan_sja_proc_regs(struct seq_file *p, void *data) ++{ ++ struct rtcan_device *dev = (struct rtcan_device *)data; ++ struct rtcan_sja1000 *chip = (struct rtcan_sja1000 *)dev->priv; ++ int i; ++ ++ seq_printf(p, "SJA1000 registers"); ++ for (i = 0; i < 0x20; i++) { ++ if ((i % 0x10) == 0) ++ seq_printf(p, "\n%02x:", i); ++ seq_printf(p, " %02x", chip->read_reg(dev, i)); ++ } ++ seq_printf(p, "\n"); ++ return 0; ++} ++ ++static int rtcan_sja_proc_regs_open(struct inode *inode, struct file *file) ++{ ++ return single_open(file, rtcan_sja_proc_regs, PDE_DATA(inode)); ++} ++ ++static const struct file_operations rtcan_sja_proc_regs_ops = { ++ .open = rtcan_sja_proc_regs_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = single_release, ++}; ++ ++int rtcan_sja_create_proc(struct rtcan_device* dev) ++{ ++ if (!dev->proc_root) ++ return -EINVAL; ++ ++ proc_create_data("registers", S_IFREG | S_IRUGO | S_IWUSR, dev->proc_root, ++ &rtcan_sja_proc_regs_ops, dev); ++ return 0; ++} ++ ++void rtcan_sja_remove_proc(struct rtcan_device* dev) ++{ ++ if (!dev->proc_root) ++ return; ++ ++ remove_proc_entry("registers", dev->proc_root); ++} ++ ++#else /* !CONFIG_XENO_DRIVERS_CAN_DEBUG */ ++ ++void rtcan_sja_remove_proc(struct rtcan_device* dev) ++{ ++} ++ ++int rtcan_sja_create_proc(struct rtcan_device* dev) ++{ ++ return 0; ++} ++#endif /* CONFIG_XENO_DRIVERS_CAN_DEBUG */ +--- linux/drivers/xenomai/can/sja1000/rtcan_ems_pci.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/can/sja1000/rtcan_ems_pci.c 2021-04-07 16:01:26.307635481 +0800 +@@ -0,0 +1,394 @@ ++/* ++ * Copyright (C) 2007, 2016 Wolfgang Grandegger ++ * Copyright (C) 2008 Markus Plessing ++ * Copyright (C) 2008 Sebastian Haas ++ * ++ * Derived from Linux CAN SJA1000 PCI driver "ems_pci". ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the version 2 of the GNU General Public License ++ * as published by the Free Software Foundation ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, see . ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++#include ++ ++/* CAN device profile */ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#define RTCAN_DEV_NAME "rtcan%d" ++#define RTCAN_DRV_NAME "EMS-CPC-PCI-CAN" ++ ++static char *ems_pci_board_name = "EMS-CPC-PCI"; ++ ++MODULE_AUTHOR("Wolfgang Grandegger "); ++MODULE_DESCRIPTION("RTCAN board driver for EMS CPC-PCI/PCIe/104P CAN cards"); ++MODULE_SUPPORTED_DEVICE("EMS CPC-PCI/PCIe/104P CAN card"); ++MODULE_LICENSE("GPL v2"); ++ ++#define EMS_PCI_V1_MAX_CHAN 2 ++#define EMS_PCI_V2_MAX_CHAN 4 ++#define EMS_PCI_MAX_CHAN EMS_PCI_V2_MAX_CHAN ++ ++struct ems_pci_card { ++ int version; ++ int channels; ++ ++ struct pci_dev *pci_dev; ++ struct rtcan_device *rtcan_dev[EMS_PCI_MAX_CHAN]; ++ ++ void __iomem *conf_addr; ++ void __iomem *base_addr; ++}; ++ ++#define EMS_PCI_CAN_CLOCK (16000000 / 2) ++ ++/* ++ * Register definitions and descriptions are from LinCAN 0.3.3. ++ * ++ * PSB4610 PITA-2 bridge control registers ++ */ ++#define PITA2_ICR 0x00 /* Interrupt Control Register */ ++#define PITA2_ICR_INT0 0x00000002 /* [RC] INT0 Active/Clear */ ++#define PITA2_ICR_INT0_EN 0x00020000 /* [RW] Enable INT0 */ ++ ++#define PITA2_MISC 0x1c /* Miscellaneous Register */ ++#define PITA2_MISC_CONFIG 0x04000000 /* Multiplexed parallel interface */ ++ ++/* ++ * Register definitions for the PLX 9030 ++ */ ++#define PLX_ICSR 0x4c /* Interrupt Control/Status register */ ++#define PLX_ICSR_LINTI1_ENA 0x0001 /* LINTi1 Enable */ ++#define PLX_ICSR_PCIINT_ENA 0x0040 /* PCI Interrupt Enable */ ++#define PLX_ICSR_LINTI1_CLR 0x0400 /* Local Edge Triggerable Interrupt Clear */ ++#define PLX_ICSR_ENA_CLR (PLX_ICSR_LINTI1_ENA | PLX_ICSR_PCIINT_ENA | \ ++ PLX_ICSR_LINTI1_CLR) ++ ++/* ++ * The board configuration is probably following: ++ * RX1 is connected to ground. ++ * TX1 is not connected. ++ * CLKO is not connected. ++ * Setting the OCR register to 0xDA is a good idea. ++ * This means normal output mode, push-pull and the correct polarity. ++ */ ++#define EMS_PCI_OCR (SJA_OCR_TX0_PUSHPULL | SJA_OCR_TX1_PUSHPULL) ++ ++/* ++ * In the CDR register, you should set CBP to 1. ++ * You will probably also want to set the clock divider value to 7 ++ * (meaning direct oscillator output) because the second SJA1000 chip ++ * is driven by the first one CLKOUT output. ++ */ ++#define EMS_PCI_CDR (SJA_CDR_CBP | SJA_CDR_CLKOUT_MASK) ++ ++#define EMS_PCI_V1_BASE_BAR 1 ++#define EMS_PCI_V1_CONF_SIZE 4096 /* size of PITA control area */ ++#define EMS_PCI_V2_BASE_BAR 2 ++#define EMS_PCI_V2_CONF_SIZE 128 /* size of PLX control area */ ++#define EMS_PCI_CAN_BASE_OFFSET 0x400 /* offset where the controllers starts */ ++#define EMS_PCI_CAN_CTRL_SIZE 0x200 /* memory size for each controller */ ++ ++#define EMS_PCI_BASE_SIZE 4096 /* size of controller area */ ++ ++static const struct pci_device_id ems_pci_tbl[] = { ++ /* CPC-PCI v1 */ ++ {PCI_VENDOR_ID_SIEMENS, 0x2104, PCI_ANY_ID, PCI_ANY_ID,}, ++ /* CPC-PCI v2 */ ++ {PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_9030, PCI_VENDOR_ID_PLX, 0x4000}, ++ /* CPC-104P v2 */ ++ {PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_9030, PCI_VENDOR_ID_PLX, 0x4002}, ++ {0,} ++}; ++MODULE_DEVICE_TABLE(pci, ems_pci_tbl); ++ ++/* ++ * Helper to read internal registers from card logic (not CAN) ++ */ ++static u8 ems_pci_v1_readb(struct ems_pci_card *card, unsigned int port) ++{ ++ return readb((void __iomem *)card->base_addr + (port * 4)); ++} ++ ++static u8 ems_pci_v1_read_reg(struct rtcan_device *dev, int port) ++{ ++ return readb((void __iomem *)dev->base_addr + (port * 4)); ++} ++ ++static void ems_pci_v1_write_reg(struct rtcan_device *dev, ++ int port, u8 val) ++{ ++ writeb(val, (void __iomem *)dev->base_addr + (port * 4)); ++} ++ ++static void ems_pci_v1_post_irq(struct rtcan_device *dev) ++{ ++ struct ems_pci_card *card = (struct ems_pci_card *)dev->board_priv; ++ ++ /* reset int flag of pita */ ++ writel(PITA2_ICR_INT0_EN | PITA2_ICR_INT0, ++ card->conf_addr + PITA2_ICR); ++} ++ ++static u8 ems_pci_v2_read_reg(struct rtcan_device *dev, int port) ++{ ++ return readb((void __iomem *)dev->base_addr + port); ++} ++ ++static void ems_pci_v2_write_reg(struct rtcan_device *dev, ++ int port, u8 val) ++{ ++ writeb(val, (void __iomem *)dev->base_addr + port); ++} ++ ++static void ems_pci_v2_post_irq(struct rtcan_device *dev) ++{ ++ struct ems_pci_card *card = (struct ems_pci_card *)dev->board_priv; ++ ++ writel(PLX_ICSR_ENA_CLR, card->conf_addr + PLX_ICSR); ++} ++ ++/* ++ * Check if a CAN controller is present at the specified location ++ * by trying to set 'em into the PeliCAN mode ++ */ ++static inline int ems_pci_check_chan(struct rtcan_device *dev) ++{ ++ struct rtcan_sja1000 *chip = (struct rtcan_sja1000 *)dev->priv; ++ unsigned char res; ++ ++ /* Make sure SJA1000 is in reset mode */ ++ chip->write_reg(dev, SJA_MOD, 1); ++ ++ chip->write_reg(dev, SJA_CDR, SJA_CDR_CAN_MODE); ++ ++ /* read reset-values */ ++ res = chip->read_reg(dev, SJA_CDR); ++ ++ if (res == SJA_CDR_CAN_MODE) ++ return 1; ++ ++ return 0; ++} ++ ++static void ems_pci_del_card(struct pci_dev *pdev) ++{ ++ struct ems_pci_card *card = pci_get_drvdata(pdev); ++ struct rtcan_device *dev; ++ int i = 0; ++ ++ for (i = 0; i < card->channels; i++) { ++ dev = card->rtcan_dev[i]; ++ ++ if (!dev) ++ continue; ++ ++ dev_info(&pdev->dev, "Removing %s.\n", dev->name); ++ rtcan_sja1000_unregister(dev); ++ rtcan_dev_free(dev); ++ } ++ ++ if (card->base_addr != NULL) ++ pci_iounmap(card->pci_dev, card->base_addr); ++ ++ if (card->conf_addr != NULL) ++ pci_iounmap(card->pci_dev, card->conf_addr); ++ ++ kfree(card); ++ ++ pci_disable_device(pdev); ++ pci_set_drvdata(pdev, NULL); ++} ++ ++static void ems_pci_card_reset(struct ems_pci_card *card) ++{ ++ /* Request board reset */ ++ writeb(0, card->base_addr); ++} ++ ++/* ++ * Probe PCI device for EMS CAN signature and register each available ++ * CAN channel to RTCAN subsystem. ++ */ ++static int ems_pci_add_card(struct pci_dev *pdev, ++ const struct pci_device_id *ent) ++{ ++ struct rtcan_sja1000 *chip; ++ struct rtcan_device *dev; ++ struct ems_pci_card *card; ++ int max_chan, conf_size, base_bar; ++ int err, i; ++ ++ if (!rtdm_available()) ++ return -ENODEV; ++ ++ /* Enabling PCI device */ ++ if (pci_enable_device(pdev) < 0) { ++ dev_err(&pdev->dev, "Enabling PCI device failed\n"); ++ return -ENODEV; ++ } ++ ++ /* Allocating card structures to hold addresses, ... */ ++ card = kzalloc(sizeof(*card), GFP_KERNEL); ++ if (card == NULL) { ++ pci_disable_device(pdev); ++ return -ENOMEM; ++ } ++ ++ pci_set_drvdata(pdev, card); ++ ++ card->pci_dev = pdev; ++ ++ card->channels = 0; ++ ++ if (pdev->vendor == PCI_VENDOR_ID_PLX) { ++ card->version = 2; /* CPC-PCI v2 */ ++ max_chan = EMS_PCI_V2_MAX_CHAN; ++ base_bar = EMS_PCI_V2_BASE_BAR; ++ conf_size = EMS_PCI_V2_CONF_SIZE; ++ } else { ++ card->version = 1; /* CPC-PCI v1 */ ++ max_chan = EMS_PCI_V1_MAX_CHAN; ++ base_bar = EMS_PCI_V1_BASE_BAR; ++ conf_size = EMS_PCI_V1_CONF_SIZE; ++ } ++ ++ /* Remap configuration space and controller memory area */ ++ card->conf_addr = pci_iomap(pdev, 0, conf_size); ++ if (card->conf_addr == NULL) { ++ err = -ENOMEM; ++ goto failure_cleanup; ++ } ++ ++ card->base_addr = pci_iomap(pdev, base_bar, EMS_PCI_BASE_SIZE); ++ if (card->base_addr == NULL) { ++ err = -ENOMEM; ++ goto failure_cleanup; ++ } ++ ++ if (card->version == 1) { ++ /* Configure PITA-2 parallel interface (enable MUX) */ ++ writel(PITA2_MISC_CONFIG, card->conf_addr + PITA2_MISC); ++ ++ /* Check for unique EMS CAN signature */ ++ if (ems_pci_v1_readb(card, 0) != 0x55 || ++ ems_pci_v1_readb(card, 1) != 0xAA || ++ ems_pci_v1_readb(card, 2) != 0x01 || ++ ems_pci_v1_readb(card, 3) != 0xCB || ++ ems_pci_v1_readb(card, 4) != 0x11) { ++ dev_err(&pdev->dev, ++ "Not EMS Dr. Thomas Wuensche interface\n"); ++ err = -ENODEV; ++ goto failure_cleanup; ++ } ++ } ++ ++ ems_pci_card_reset(card); ++ ++ for (i = 0; i < max_chan; i++) { ++ dev = rtcan_dev_alloc(sizeof(struct rtcan_sja1000), 0); ++ if (!dev) { ++ err = -ENOMEM; ++ goto failure_cleanup; ++ } ++ ++ strncpy(dev->name, RTCAN_DEV_NAME, IFNAMSIZ); ++ dev->board_name = ems_pci_board_name; ++ dev->board_priv = card; ++ ++ card->rtcan_dev[i] = dev; ++ chip = card->rtcan_dev[i]->priv; ++ chip->irq_flags = RTDM_IRQTYPE_SHARED; ++ chip->irq_num = pdev->irq; ++ ++ dev->base_addr = (unsigned long)card->base_addr + ++ EMS_PCI_CAN_BASE_OFFSET + (i * EMS_PCI_CAN_CTRL_SIZE); ++ if (card->version == 1) { ++ chip->read_reg = ems_pci_v1_read_reg; ++ chip->write_reg = ems_pci_v1_write_reg; ++ chip->irq_ack = ems_pci_v1_post_irq; ++ } else { ++ chip->read_reg = ems_pci_v2_read_reg; ++ chip->write_reg = ems_pci_v2_write_reg; ++ chip->irq_ack = ems_pci_v2_post_irq; ++ } ++ ++ /* Check if channel is present */ ++ if (ems_pci_check_chan(dev)) { ++ dev->can_sys_clock = EMS_PCI_CAN_CLOCK; ++ chip->ocr = EMS_PCI_OCR | SJA_OCR_MODE_NORMAL; ++ chip->cdr = EMS_PCI_CDR | SJA_CDR_CAN_MODE; ++ ++ if (card->version == 1) ++ /* reset int flag of pita */ ++ writel(PITA2_ICR_INT0_EN | PITA2_ICR_INT0, ++ card->conf_addr + PITA2_ICR); ++ else ++ /* enable IRQ in PLX 9030 */ ++ writel(PLX_ICSR_ENA_CLR, ++ card->conf_addr + PLX_ICSR); ++ ++ /* Register SJA1000 device */ ++ err = rtcan_sja1000_register(dev); ++ if (err) { ++ dev_err(&pdev->dev, "Registering device failed " ++ "(err=%d)\n", err); ++ rtcan_dev_free(dev); ++ goto failure_cleanup; ++ } ++ ++ card->channels++; ++ ++ dev_info(&pdev->dev, "Channel #%d at 0x%p, irq %d " ++ "registered as %s\n", i + 1, ++ (void* __iomem)dev->base_addr, chip->irq_num, ++ dev->name); ++ } else { ++ dev_err(&pdev->dev, "Channel #%d not detected\n", ++ i + 1); ++ rtcan_dev_free(dev); ++ } ++ } ++ ++ if (!card->channels) { ++ err = -ENODEV; ++ goto failure_cleanup; ++ } ++ ++ return 0; ++ ++failure_cleanup: ++ dev_err(&pdev->dev, "Error: %d. Cleaning Up.\n", err); ++ ++ ems_pci_del_card(pdev); ++ ++ return err; ++} ++ ++static struct pci_driver ems_pci_driver = { ++ .name = RTCAN_DRV_NAME, ++ .id_table = ems_pci_tbl, ++ .probe = ems_pci_add_card, ++ .remove = ems_pci_del_card, ++}; ++ ++module_pci_driver(ems_pci_driver); +--- linux/drivers/xenomai/can/sja1000/rtcan_mem.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/can/sja1000/rtcan_mem.c 2021-04-07 16:01:26.302635488 +0800 +@@ -0,0 +1,216 @@ ++/* ++ * Copyright (C) 2006 Matthias Fuchs , ++ * Jan Kiszka ++ * ++ * RTCAN driver for memory mapped SJA1000 CAN controller ++ * This code has been tested on esd's CPCI405/EPPC405 PPC405 systems. ++ * ++ * This driver is derived from the rtcan-isa driver by ++ * Wolfgang Grandegger and Sebastian Smolorz. ++ * ++ * Copyright (C) 2006 Wolfgang Grandegger ++ * Copyright (C) 2005, 2006 Sebastian Smolorz ++ * ++ * ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; eitherer version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#include ++#include ++#include ++ ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#define RTCAN_DEV_NAME "rtcan%d" ++#define RTCAN_DRV_NAME "sja1000-mem" ++ ++#define RTCAN_MEM_MAX_DEV 4 ++ ++static char *mem_board_name = "mem mapped"; ++ ++MODULE_AUTHOR("Matthias Fuchs "); ++MODULE_DESCRIPTION("RTCAN driver for memory mapped SJA1000 controller"); ++MODULE_SUPPORTED_DEVICE("mem mapped"); ++MODULE_LICENSE("GPL"); ++ ++static u32 mem[RTCAN_MEM_MAX_DEV]; ++static int irq[RTCAN_MEM_MAX_DEV]; ++static u32 can_clock[RTCAN_MEM_MAX_DEV]; ++static u8 ocr[RTCAN_MEM_MAX_DEV]; ++static u8 cdr[RTCAN_MEM_MAX_DEV]; ++ ++module_param_array(mem, uint, NULL, 0444); ++module_param_array(irq, int, NULL, 0444); ++module_param_array(can_clock, uint, NULL, 0444); ++module_param_array(ocr, byte, NULL, 0444); ++module_param_array(cdr, byte, NULL, 0444); ++ ++MODULE_PARM_DESC(mem, "The io-memory address"); ++MODULE_PARM_DESC(irq, "The interrupt number"); ++MODULE_PARM_DESC(can_clock, "External clock frequency (default 16 MHz)"); ++MODULE_PARM_DESC(ocr, "Value of output control register (default 0x1a)"); ++MODULE_PARM_DESC(cdr, "Value of clock divider register (default 0xc8"); ++ ++#define RTCAN_MEM_RANGE 0x80 ++ ++struct rtcan_mem ++{ ++ volatile void __iomem *vmem; ++}; ++ ++static struct rtcan_device *rtcan_mem_devs[RTCAN_MEM_MAX_DEV]; ++ ++static u8 rtcan_mem_readreg(struct rtcan_device *dev, int reg) ++{ ++ struct rtcan_mem *board = (struct rtcan_mem *)dev->board_priv; ++ return readb(board->vmem + reg); ++} ++ ++static void rtcan_mem_writereg(struct rtcan_device *dev, int reg, u8 val) ++{ ++ struct rtcan_mem *board = (struct rtcan_mem *)dev->board_priv; ++ writeb(val, board->vmem + reg); ++} ++ ++int __init rtcan_mem_init_one(int idx) ++{ ++ struct rtcan_device *dev; ++ struct rtcan_sja1000 *chip; ++ struct rtcan_mem *board; ++ int ret; ++ ++ if ((dev = rtcan_dev_alloc(sizeof(struct rtcan_sja1000), ++ sizeof(struct rtcan_mem))) == NULL) ++ return -ENOMEM; ++ ++ chip = (struct rtcan_sja1000 *)dev->priv; ++ board = (struct rtcan_mem *)dev->board_priv; ++ ++ dev->board_name = mem_board_name; ++ ++ chip->irq_num = irq[idx]; ++ chip->irq_flags = RTDM_IRQTYPE_SHARED; ++ chip->read_reg = rtcan_mem_readreg; ++ chip->write_reg = rtcan_mem_writereg; ++ ++ if (!request_mem_region(mem[idx], RTCAN_MEM_RANGE, RTCAN_DRV_NAME)) { ++ ret = -EBUSY; ++ goto out_dev_free; ++ } ++ ++ /* ioremap io memory */ ++ if (!(board->vmem = ioremap(mem[idx], RTCAN_MEM_RANGE))) { ++ ret = -EBUSY; ++ goto out_release_mem; ++ } ++ ++ /* Clock frequency in Hz */ ++ if (can_clock[idx]) ++ dev->can_sys_clock = can_clock[idx] / 2; ++ else ++ dev->can_sys_clock = 8000000; /* 16/2 MHz */ ++ ++ /* Output control register */ ++ if (ocr[idx]) ++ chip->ocr = ocr[idx]; ++ else ++ chip->ocr = SJA_OCR_MODE_NORMAL | SJA_OCR_TX0_PUSHPULL; ++ ++ if (cdr[idx]) ++ chip->cdr = cdr[idx]; ++ else ++ chip->cdr = SJA_CDR_CAN_MODE | SJA_CDR_CLK_OFF | SJA_CDR_CBP; ++ ++ strncpy(dev->name, RTCAN_DEV_NAME, IFNAMSIZ); ++ ++ ret = rtcan_sja1000_register(dev); ++ if (ret) { ++ printk(KERN_ERR "ERROR %d while trying to register SJA1000 " ++ "device!\n", ret); ++ goto out_iounmap; ++ } ++ ++ rtcan_mem_devs[idx] = dev; ++ return 0; ++ ++ out_iounmap: ++ iounmap((void *)board->vmem); ++ ++ out_release_mem: ++ release_mem_region(mem[idx], RTCAN_MEM_RANGE); ++ ++ out_dev_free: ++ rtcan_dev_free(dev); ++ ++ return ret; ++} ++ ++static void rtcan_mem_exit(void); ++ ++/** Init module */ ++static int __init rtcan_mem_init(void) ++{ ++ int i, err; ++ int devices = 0; ++ ++ if (!rtdm_available()) ++ return -ENOSYS; ++ ++ for (i = 0; i < RTCAN_MEM_MAX_DEV && mem[i] != 0; i++) { ++ err = rtcan_mem_init_one(i); ++ if (err) { ++ rtcan_mem_exit(); ++ return err; ++ } ++ devices++; ++ } ++ if (devices) ++ return 0; ++ ++ printk(KERN_ERR "ERROR! No devices specified! " ++ "Use mem=[,...] irq=[,...]\n"); ++ return -EINVAL; ++} ++ ++ ++/** Cleanup module */ ++static void rtcan_mem_exit(void) ++{ ++ int i; ++ struct rtcan_device *dev; ++ volatile void __iomem *vmem; ++ ++ for (i = 0; i < RTCAN_MEM_MAX_DEV; i++) { ++ dev = rtcan_mem_devs[i]; ++ if (!dev) ++ continue; ++ vmem = ((struct rtcan_mem *)dev->board_priv)->vmem; ++ rtcan_sja1000_unregister(dev); ++ iounmap((void *)vmem); ++ release_mem_region(mem[i], RTCAN_MEM_RANGE); ++ rtcan_dev_free(dev); ++ } ++} ++ ++module_init(rtcan_mem_init); ++module_exit(rtcan_mem_exit); +--- linux/drivers/xenomai/can/sja1000/rtcan_esd_pci.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/can/sja1000/rtcan_esd_pci.c 2021-04-07 16:01:26.297635495 +0800 +@@ -0,0 +1,346 @@ ++/* ++ * Copyright (C) 2009 Sebastian Smolorz ++ * ++ * This driver is based on the Socket-CAN driver esd_pci.c, ++ * Copyright (C) 2007 Wolfgang Grandegger ++ * Copyright (C) 2008 Sascha Hauer , Pengutronix ++ * Copyright (C) 2009 Matthias Fuchs , esd gmbh ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the version 2 of the GNU General Public License ++ * as published by the Free Software Foundation ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++#include ++ ++/* CAN device profile */ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#define RTCAN_DEV_NAME "rtcan%d" ++#define RTCAN_DRV_NAME "ESD-PCI-CAN" ++ ++static char *esd_pci_board_name = "ESD-PCI"; ++ ++MODULE_AUTHOR("Sebastian Smolorz board_priv; ++ return readb(board->base_addr + port); ++} ++ ++static void rtcan_esd_pci_write_reg(struct rtcan_device *dev, int port, u8 val) ++{ ++ struct rtcan_esd_pci *board = (struct rtcan_esd_pci *)dev->board_priv; ++ writeb(val, board->base_addr + port); ++} ++ ++static void rtcan_esd_pci_del_chan(struct rtcan_device *dev) ++{ ++ struct rtcan_esd_pci *board; ++ ++ if (!dev) ++ return; ++ ++ board = (struct rtcan_esd_pci *)dev->board_priv; ++ ++ printk("Removing %s %s device %s\n", ++ esd_pci_board_name, dev->ctrl_name, dev->name); ++ ++ rtcan_sja1000_unregister(dev); ++ ++ rtcan_dev_free(dev); ++} ++ ++static int rtcan_esd_pci_add_chan(struct pci_dev *pdev, int channel, ++ struct rtcan_device **master_dev, ++ void __iomem *conf_addr, ++ void __iomem *base_addr) ++{ ++ struct rtcan_device *dev; ++ struct rtcan_sja1000 *chip; ++ struct rtcan_esd_pci *board; ++ int ret; ++ ++ dev = rtcan_dev_alloc(sizeof(struct rtcan_sja1000), ++ sizeof(struct rtcan_esd_pci)); ++ if (dev == NULL) ++ return -ENOMEM; ++ ++ chip = (struct rtcan_sja1000 *)dev->priv; ++ board = (struct rtcan_esd_pci *)dev->board_priv; ++ ++ board->pci_dev = pdev; ++ board->conf_addr = conf_addr; ++ board->base_addr = base_addr; ++ ++ if (channel == CHANNEL_SLAVE) { ++ struct rtcan_esd_pci *master_board = ++ (struct rtcan_esd_pci *)(*master_dev)->board_priv; ++ master_board->slave_dev = dev; ++ } ++ ++ dev->board_name = esd_pci_board_name; ++ ++ chip->read_reg = rtcan_esd_pci_read_reg; ++ chip->write_reg = rtcan_esd_pci_write_reg; ++ ++ dev->can_sys_clock = ESD_PCI_CAN_CLOCK; ++ ++ chip->ocr = ESD_PCI_OCR; ++ chip->cdr = ESD_PCI_CDR; ++ ++ strncpy(dev->name, RTCAN_DEV_NAME, IFNAMSIZ); ++ ++ chip->irq_flags = RTDM_IRQTYPE_SHARED; ++ chip->irq_num = pdev->irq; ++ ++ RTCAN_DBG("%s: base_addr=0x%p conf_addr=0x%p irq=%d ocr=%#x cdr=%#x\n", ++ RTCAN_DRV_NAME, board->base_addr, board->conf_addr, ++ chip->irq_num, chip->ocr, chip->cdr); ++ ++ /* Register SJA1000 device */ ++ ret = rtcan_sja1000_register(dev); ++ if (ret) { ++ printk(KERN_ERR "ERROR %d while trying to register SJA1000 " ++ "device!\n", ret); ++ goto failure; ++ } ++ ++ if (channel != CHANNEL_SLAVE) ++ *master_dev = dev; ++ ++ return 0; ++ ++ ++failure: ++ rtcan_dev_free(dev); ++ return ret; ++} ++ ++static int esd_pci_init_one(struct pci_dev *pdev, ++ const struct pci_device_id *ent) ++{ ++ int ret, channel; ++ void __iomem *base_addr; ++ void __iomem *conf_addr; ++ struct rtcan_device *master_dev = NULL; ++ ++ if (!rtdm_available()) ++ return -ENODEV; ++ ++ if ((ret = pci_enable_device (pdev))) ++ goto failure; ++ ++ if ((ret = pci_request_regions(pdev, RTCAN_DRV_NAME))) ++ goto failure; ++ ++ RTCAN_DBG("%s: Initializing device %04x:%04x %04x:%04x\n", ++ RTCAN_DRV_NAME, pdev->vendor, pdev->device, ++ pdev->subsystem_vendor, pdev->subsystem_device); ++ ++ conf_addr = pci_iomap(pdev, 0, ESD_PCI_BASE_SIZE); ++ if (conf_addr == NULL) { ++ ret = -ENODEV; ++ goto failure_release_pci; ++ } ++ ++ base_addr = pci_iomap(pdev, 2, ESD_PCI_BASE_SIZE); ++ if (base_addr == NULL) { ++ ret = -ENODEV; ++ goto failure_iounmap_conf; ++ } ++ ++ /* Check if second channel is available */ ++ writeb(SJA_MOD_RM, base_addr + CHANNEL_OFFSET + SJA_MOD); ++ writeb(SJA_CDR_CBP, base_addr + CHANNEL_OFFSET + SJA_CDR); ++ writeb(SJA_MOD_RM, base_addr + CHANNEL_OFFSET + SJA_MOD); ++ if (readb(base_addr + CHANNEL_OFFSET + SJA_MOD) == 0x21) { ++ writeb(SJA_MOD_SM | SJA_MOD_AFM | SJA_MOD_STM | SJA_MOD_LOM | ++ SJA_MOD_RM, base_addr + CHANNEL_OFFSET + SJA_MOD); ++ if (readb(base_addr + CHANNEL_OFFSET + SJA_MOD) == 0x3f) ++ channel = CHANNEL_MASTER; ++ else { ++ writeb(SJA_MOD_RM, ++ base_addr + CHANNEL_OFFSET + SJA_MOD); ++ channel = CHANNEL_SINGLE; ++ } ++ } else { ++ writeb(SJA_MOD_RM, base_addr + CHANNEL_OFFSET + SJA_MOD); ++ channel = CHANNEL_SINGLE; ++ } ++ ++ if ((ret = rtcan_esd_pci_add_chan(pdev, channel, &master_dev, ++ conf_addr, base_addr))) ++ goto failure_iounmap_base; ++ ++ if (channel != CHANNEL_SINGLE) { ++ channel = CHANNEL_SLAVE; ++ if ((ret = rtcan_esd_pci_add_chan(pdev, channel, &master_dev, ++ conf_addr, base_addr + CHANNEL_OFFSET))) ++ goto failure_iounmap_base; ++ } ++ ++ if ((pdev->device == PCI_DEVICE_ID_PLX_9050) || ++ (pdev->device == PCI_DEVICE_ID_PLX_9030)) { ++ /* Enable interrupts in PLX9050 */ ++ writel(INTCSR_LINTI1 | INTCSR_PCI, conf_addr + INTCSR_OFFSET); ++ } else { ++ /* Enable interrupts in PLX9056*/ ++ writel(INTCSR9056_LINTI | INTCSR9056_PCI, ++ conf_addr + INTCSR9056_OFFSET); ++ } ++ ++ pci_set_drvdata(pdev, master_dev); ++ ++ return 0; ++ ++ ++failure_iounmap_base: ++ if (master_dev) ++ rtcan_esd_pci_del_chan(master_dev); ++ pci_iounmap(pdev, base_addr); ++ ++failure_iounmap_conf: ++ pci_iounmap(pdev, conf_addr); ++ ++failure_release_pci: ++ pci_release_regions(pdev); ++ ++failure: ++ return ret; ++} ++ ++static void esd_pci_remove_one(struct pci_dev *pdev) ++{ ++ struct rtcan_device *dev = pci_get_drvdata(pdev); ++ struct rtcan_esd_pci *board = (struct rtcan_esd_pci *)dev->board_priv; ++ ++ if ((pdev->device == PCI_DEVICE_ID_PLX_9050) || ++ (pdev->device == PCI_DEVICE_ID_PLX_9030)) { ++ /* Disable interrupts in PLX9050*/ ++ writel(0, board->conf_addr + INTCSR_OFFSET); ++ } else { ++ /* Disable interrupts in PLX9056*/ ++ writel(0, board->conf_addr + INTCSR9056_OFFSET); ++ } ++ ++ if (board->slave_dev) ++ rtcan_esd_pci_del_chan(board->slave_dev); ++ rtcan_esd_pci_del_chan(dev); ++ ++ ++ pci_iounmap(pdev, board->base_addr); ++ pci_iounmap(pdev, board->conf_addr); ++ ++ pci_release_regions(pdev); ++ pci_disable_device(pdev); ++ pci_set_drvdata(pdev, NULL); ++} ++ ++static struct pci_driver rtcan_esd_pci_driver = { ++ .name = RTCAN_DRV_NAME, ++ .id_table = esd_pci_tbl, ++ .probe = esd_pci_init_one, ++ .remove = esd_pci_remove_one, ++}; ++ ++module_pci_driver(rtcan_esd_pci_driver); +--- linux/drivers/xenomai/can/sja1000/rtcan_ixxat_pci.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/can/sja1000/rtcan_ixxat_pci.c 2021-04-07 16:01:26.292635502 +0800 +@@ -0,0 +1,300 @@ ++/* ++ * Copyright (C) 2006 Wolfgang Grandegger ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software Foundation, ++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++#include ++ ++/* CAN device profile */ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#define RTCAN_DEV_NAME "rtcan%d" ++#define RTCAN_DRV_NAME "IXXAT-PCI-CAN" ++ ++static char *ixxat_pci_board_name = "IXXAT-PCI"; ++ ++MODULE_AUTHOR("Wolfgang Grandegger "); ++MODULE_DESCRIPTION("RTCAN board driver for IXXAT-PCI cards"); ++MODULE_SUPPORTED_DEVICE("IXXAT-PCI card CAN controller"); ++MODULE_LICENSE("GPL"); ++ ++struct rtcan_ixxat_pci ++{ ++ struct pci_dev *pci_dev; ++ struct rtcan_device *slave_dev; ++ int conf_addr; ++ void __iomem *base_addr; ++}; ++ ++#define IXXAT_PCI_CAN_SYS_CLOCK (16000000 / 2) ++ ++#define CHANNEL_SINGLE 0 /* this is a single channel device */ ++#define CHANNEL_MASTER 1 /* multi channel device, this device is master */ ++#define CHANNEL_SLAVE 2 /* multi channel device, this is slave */ ++ ++#define CHANNEL_OFFSET 0x200 ++#define CHANNEL_MASTER_RESET 0x110 ++#define CHANNEL_SLAVE_RESET (CHANNEL_MASTER_RESET + CHANNEL_OFFSET) ++ ++#define IXXAT_INTCSR_OFFSET 0x4c /* Offset in PLX9050 conf registers */ ++#define IXXAT_INTCSR_SLAVE 0x41 /* LINT1 and PCI interrupt enabled */ ++#define IXXAT_INTCSR_MASTER 0x08 /* LINT2 enabled */ ++#define IXXAT_SJA_MOD_MASK 0xa1 /* Mask for reading dual/single channel */ ++ ++/* PCI vender, device and sub-device ID */ ++#define IXXAT_PCI_VENDOR_ID 0x10b5 ++#define IXXAT_PCI_DEVICE_ID 0x9050 ++#define IXXAT_PCI_SUB_SYS_ID 0x2540 ++ ++#define IXXAT_CONF_PORT_SIZE 0x0080 ++#define IXXAT_BASE_PORT_SIZE 0x0400 ++ ++static struct pci_device_id ixxat_pci_tbl[] = { ++ {IXXAT_PCI_VENDOR_ID, IXXAT_PCI_DEVICE_ID, ++ IXXAT_PCI_VENDOR_ID, IXXAT_PCI_SUB_SYS_ID, 0, 0, 0}, ++ { } ++}; ++MODULE_DEVICE_TABLE (pci, ixxat_pci_tbl); ++ ++ ++static u8 rtcan_ixxat_pci_read_reg(struct rtcan_device *dev, int port) ++{ ++ struct rtcan_ixxat_pci *board = (struct rtcan_ixxat_pci *)dev->board_priv; ++ return readb(board->base_addr + port); ++} ++ ++static void rtcan_ixxat_pci_write_reg(struct rtcan_device *dev, int port, u8 data) ++{ ++ struct rtcan_ixxat_pci *board = (struct rtcan_ixxat_pci *)dev->board_priv; ++ writeb(data, board->base_addr + port); ++} ++ ++static void rtcan_ixxat_pci_del_chan(struct rtcan_device *dev) ++{ ++ struct rtcan_ixxat_pci *board; ++ u8 intcsr; ++ ++ if (!dev) ++ return; ++ ++ board = (struct rtcan_ixxat_pci *)dev->board_priv; ++ ++ printk("Removing %s %s device %s\n", ++ ixxat_pci_board_name, dev->ctrl_name, dev->name); ++ ++ rtcan_sja1000_unregister(dev); ++ ++ /* Disable PCI interrupts */ ++ intcsr = inb(board->conf_addr + IXXAT_INTCSR_OFFSET); ++ if (board->slave_dev) { ++ intcsr &= ~IXXAT_INTCSR_MASTER; ++ outb(intcsr, board->conf_addr + IXXAT_INTCSR_OFFSET); ++ writeb(0x1, board->base_addr + CHANNEL_MASTER_RESET); ++ iounmap(board->base_addr); ++ } else { ++ intcsr &= ~IXXAT_INTCSR_SLAVE; ++ outb(intcsr, board->conf_addr + IXXAT_INTCSR_OFFSET); ++ writeb(0x1, board->base_addr + CHANNEL_SLAVE_RESET ); ++ } ++ rtcan_dev_free(dev); ++} ++ ++static int rtcan_ixxat_pci_add_chan(struct pci_dev *pdev, ++ int channel, ++ struct rtcan_device **master_dev, ++ int conf_addr, ++ void __iomem *base_addr) ++{ ++ struct rtcan_device *dev; ++ struct rtcan_sja1000 *chip; ++ struct rtcan_ixxat_pci *board; ++ u8 intcsr; ++ int ret; ++ ++ dev = rtcan_dev_alloc(sizeof(struct rtcan_sja1000), ++ sizeof(struct rtcan_ixxat_pci)); ++ if (dev == NULL) ++ return -ENOMEM; ++ ++ chip = (struct rtcan_sja1000 *)dev->priv; ++ board = (struct rtcan_ixxat_pci *)dev->board_priv; ++ ++ board->pci_dev = pdev; ++ board->conf_addr = conf_addr; ++ board->base_addr = base_addr; ++ ++ if (channel == CHANNEL_SLAVE) { ++ struct rtcan_ixxat_pci *master_board = ++ (struct rtcan_ixxat_pci *)(*master_dev)->board_priv; ++ master_board->slave_dev = dev; ++ } ++ ++ dev->board_name = ixxat_pci_board_name; ++ ++ chip->read_reg = rtcan_ixxat_pci_read_reg; ++ chip->write_reg = rtcan_ixxat_pci_write_reg; ++ ++ /* Clock frequency in Hz */ ++ dev->can_sys_clock = IXXAT_PCI_CAN_SYS_CLOCK; ++ ++ /* Output control register */ ++ chip->ocr = (SJA_OCR_MODE_NORMAL | SJA_OCR_TX0_INVERT | ++ SJA_OCR_TX0_PUSHPULL | SJA_OCR_TX1_PUSHPULL); ++ ++ /* Clock divider register */ ++ chip->cdr = SJA_CDR_CAN_MODE; ++ ++ strncpy(dev->name, RTCAN_DEV_NAME, IFNAMSIZ); ++ ++ /* Enable PCI interrupts */ ++ intcsr = inb(board->conf_addr + IXXAT_INTCSR_OFFSET); ++ if (channel == CHANNEL_SLAVE) ++ intcsr |= IXXAT_INTCSR_SLAVE; ++ else ++ intcsr |= IXXAT_INTCSR_MASTER; ++ outb(intcsr, board->conf_addr + IXXAT_INTCSR_OFFSET); ++ ++ /* Register and setup interrupt handling */ ++ chip->irq_flags = RTDM_IRQTYPE_SHARED; ++ chip->irq_num = pdev->irq; ++ ++ RTCAN_DBG("%s: base_addr=0x%p conf_addr=%#x irq=%d ocr=%#x cdr=%#x\n", ++ RTCAN_DRV_NAME, board->base_addr, board->conf_addr, ++ chip->irq_num, chip->ocr, chip->cdr); ++ ++ /* Register SJA1000 device */ ++ ret = rtcan_sja1000_register(dev); ++ if (ret) { ++ printk(KERN_ERR "ERROR %d while trying to register SJA1000 device!\n", ++ ret); ++ goto failure; ++ } ++ ++ if (channel != CHANNEL_SLAVE) ++ *master_dev = dev; ++ ++ return 0; ++ ++ failure: ++ rtcan_dev_free(dev); ++ return ret; ++} ++ ++static int ixxat_pci_init_one(struct pci_dev *pdev, ++ const struct pci_device_id *ent) ++{ ++ int ret, channel, conf_addr; ++ unsigned long addr; ++ void __iomem *base_addr; ++ struct rtcan_device *master_dev = NULL; ++ ++ if (!rtdm_available()) ++ return -ENODEV; ++ ++ if ((ret = pci_enable_device (pdev))) ++ goto failure; ++ ++ if ((ret = pci_request_regions(pdev, RTCAN_DRV_NAME))) ++ goto failure; ++ ++ RTCAN_DBG("%s: Initializing device %04x:%04x:%04x\n", ++ RTCAN_DRV_NAME, pdev->vendor, pdev->device, ++ pdev->subsystem_device); ++ ++ /* Enable memory and I/O space */ ++ if ((ret = pci_write_config_word(pdev, 0x04, 0x3))) ++ goto failure_release_pci; ++ ++ conf_addr = pci_resource_start(pdev, 1); ++ ++ addr = pci_resource_start(pdev, 2); ++ base_addr = ioremap(addr, IXXAT_BASE_PORT_SIZE); ++ if (base_addr == 0) { ++ ret = -ENODEV; ++ goto failure_release_pci; ++ } ++ ++ /* Check if second channel is available after reset */ ++ writeb(0x1, base_addr + CHANNEL_MASTER_RESET); ++ writeb(0x1, base_addr + CHANNEL_SLAVE_RESET); ++ udelay(100); ++ if ( (readb(base_addr + CHANNEL_OFFSET + SJA_MOD) & IXXAT_SJA_MOD_MASK ) != 0x21 || ++ readb(base_addr + CHANNEL_OFFSET + SJA_SR ) != 0x0c || ++ readb(base_addr + CHANNEL_OFFSET + SJA_IR ) != 0xe0) ++ channel = CHANNEL_SINGLE; ++ else ++ channel = CHANNEL_MASTER; ++ ++ if ((ret = rtcan_ixxat_pci_add_chan(pdev, channel, &master_dev, ++ conf_addr, base_addr))) ++ goto failure_iounmap; ++ ++ if (channel != CHANNEL_SINGLE) { ++ channel = CHANNEL_SLAVE; ++ if ((ret = rtcan_ixxat_pci_add_chan(pdev, channel, ++ &master_dev, conf_addr, ++ base_addr + CHANNEL_OFFSET))) ++ goto failure_iounmap; ++ } ++ ++ pci_set_drvdata(pdev, master_dev); ++ return 0; ++ ++failure_iounmap: ++ if (master_dev) ++ rtcan_ixxat_pci_del_chan(master_dev); ++ iounmap(base_addr); ++ ++failure_release_pci: ++ pci_release_regions(pdev); ++ ++failure: ++ return ret; ++} ++ ++static void ixxat_pci_remove_one(struct pci_dev *pdev) ++{ ++ struct rtcan_device *dev = pci_get_drvdata(pdev); ++ struct rtcan_ixxat_pci *board = (struct rtcan_ixxat_pci *)dev->board_priv; ++ ++ if (board->slave_dev) ++ rtcan_ixxat_pci_del_chan(board->slave_dev); ++ rtcan_ixxat_pci_del_chan(dev); ++ ++ pci_release_regions(pdev); ++ pci_disable_device(pdev); ++ pci_set_drvdata(pdev, NULL); ++} ++ ++static struct pci_driver rtcan_ixxat_pci_driver = { ++ .name = RTCAN_DRV_NAME, ++ .id_table = ixxat_pci_tbl, ++ .probe = ixxat_pci_init_one, ++ .remove = ixxat_pci_remove_one, ++}; ++ ++module_pci_driver(rtcan_ixxat_pci_driver); +--- linux/drivers/xenomai/ipc/iddp.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/ipc/iddp.c 2021-04-07 16:01:26.283635515 +0800 +@@ -0,0 +1,988 @@ ++/** ++ * This file is part of the Xenomai project. ++ * ++ * @note Copyright (C) 2009 Philippe Gerum ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "internal.h" ++ ++#define IDDP_SOCKET_MAGIC 0xa37a37a8 ++ ++struct iddp_message { ++ struct list_head next; ++ int from; ++ size_t rdoff; ++ size_t len; ++ char data[]; ++}; ++ ++struct iddp_socket { ++ int magic; ++ struct sockaddr_ipc name; ++ struct sockaddr_ipc peer; ++ struct xnheap *bufpool; ++ struct xnheap privpool; ++ rtdm_waitqueue_t *poolwaitq; ++ rtdm_waitqueue_t privwaitq; ++ size_t poolsz; ++ rtdm_sem_t insem; ++ struct list_head inq; ++ u_long status; ++ xnhandle_t handle; ++ char label[XNOBJECT_NAME_LEN]; ++ nanosecs_rel_t rx_timeout; ++ nanosecs_rel_t tx_timeout; ++ unsigned long stalls; /* Buffer stall counter. */ ++ struct rtipc_private *priv; ++}; ++ ++static struct sockaddr_ipc nullsa = { ++ .sipc_family = AF_RTIPC, ++ .sipc_port = -1 ++}; ++ ++static struct xnmap *portmap; ++ ++static rtdm_waitqueue_t poolwaitq; ++ ++#define _IDDP_BINDING 0 ++#define _IDDP_BOUND 1 ++#define _IDDP_CONNECTED 2 ++ ++#ifdef CONFIG_XENO_OPT_VFILE ++ ++static char *__iddp_link_target(void *obj) ++{ ++ struct iddp_socket *sk = obj; ++ ++ return kasformat("%d", sk->name.sipc_port); ++} ++ ++extern struct xnptree rtipc_ptree; ++ ++static struct xnpnode_link __iddp_pnode = { ++ .node = { ++ .dirname = "iddp", ++ .root = &rtipc_ptree, ++ .ops = &xnregistry_vlink_ops, ++ }, ++ .target = __iddp_link_target, ++}; ++ ++#else /* !CONFIG_XENO_OPT_VFILE */ ++ ++static struct xnpnode_link __iddp_pnode = { ++ .node = { ++ .dirname = "iddp", ++ }, ++}; ++ ++#endif /* !CONFIG_XENO_OPT_VFILE */ ++ ++static inline void __iddp_init_mbuf(struct iddp_message *mbuf, size_t len) ++{ ++ mbuf->rdoff = 0; ++ mbuf->len = len; ++ INIT_LIST_HEAD(&mbuf->next); ++} ++ ++static struct iddp_message * ++__iddp_alloc_mbuf(struct iddp_socket *sk, size_t len, ++ nanosecs_rel_t timeout, int flags, int *pret) ++{ ++ struct iddp_message *mbuf = NULL; ++ rtdm_toseq_t timeout_seq; ++ rtdm_lockctx_t s; ++ int ret = 0; ++ ++ rtdm_toseq_init(&timeout_seq, timeout); ++ ++ for (;;) { ++ mbuf = xnheap_alloc(sk->bufpool, len + sizeof(*mbuf)); ++ if (mbuf) { ++ __iddp_init_mbuf(mbuf, len); ++ break; ++ } ++ if (flags & MSG_DONTWAIT) { ++ ret = -EAGAIN; ++ break; ++ } ++ /* ++ * No luck, no buffer free. Wait for a buffer to be ++ * released and retry. Admittedly, we might create a ++ * thundering herd effect if many waiters put a lot of ++ * memory pressure on the pool, but in this case, the ++ * pool size should be adjusted. ++ */ ++ rtdm_waitqueue_lock(sk->poolwaitq, s); ++ ++sk->stalls; ++ ret = rtdm_timedwait_locked(sk->poolwaitq, timeout, &timeout_seq); ++ rtdm_waitqueue_unlock(sk->poolwaitq, s); ++ if (unlikely(ret == -EIDRM)) ++ ret = -ECONNRESET; ++ if (ret) ++ break; ++ } ++ ++ *pret = ret; ++ ++ return mbuf; ++} ++ ++static void __iddp_free_mbuf(struct iddp_socket *sk, ++ struct iddp_message *mbuf) ++{ ++ xnheap_free(sk->bufpool, mbuf); ++ rtdm_waitqueue_broadcast(sk->poolwaitq); ++} ++ ++static int iddp_socket(struct rtdm_fd *fd) ++{ ++ struct rtipc_private *priv = rtdm_fd_to_private(fd); ++ struct iddp_socket *sk = priv->state; ++ ++ sk->magic = IDDP_SOCKET_MAGIC; ++ sk->name = nullsa; /* Unbound */ ++ sk->peer = nullsa; ++ sk->bufpool = &cobalt_heap; ++ sk->poolwaitq = &poolwaitq; ++ sk->poolsz = 0; ++ sk->status = 0; ++ sk->handle = 0; ++ sk->rx_timeout = RTDM_TIMEOUT_INFINITE; ++ sk->tx_timeout = RTDM_TIMEOUT_INFINITE; ++ sk->stalls = 0; ++ *sk->label = 0; ++ INIT_LIST_HEAD(&sk->inq); ++ rtdm_sem_init(&sk->insem, 0); ++ rtdm_waitqueue_init(&sk->privwaitq); ++ sk->priv = priv; ++ ++ return 0; ++} ++ ++static void iddp_close(struct rtdm_fd *fd) ++{ ++ struct rtipc_private *priv = rtdm_fd_to_private(fd); ++ struct iddp_socket *sk = priv->state; ++ struct iddp_message *mbuf; ++ rtdm_lockctx_t s; ++ void *poolmem; ++ u32 poolsz; ++ ++ rtdm_sem_destroy(&sk->insem); ++ rtdm_waitqueue_destroy(&sk->privwaitq); ++ ++ if (test_bit(_IDDP_BOUND, &sk->status)) { ++ if (sk->handle) ++ xnregistry_remove(sk->handle); ++ if (sk->name.sipc_port > -1) { ++ cobalt_atomic_enter(s); ++ xnmap_remove(portmap, sk->name.sipc_port); ++ cobalt_atomic_leave(s); ++ } ++ if (sk->bufpool != &cobalt_heap) { ++ poolmem = xnheap_get_membase(&sk->privpool); ++ poolsz = xnheap_get_size(&sk->privpool); ++ xnheap_destroy(&sk->privpool); ++ xnheap_vfree(poolmem); ++ return; ++ } ++ } ++ ++ /* Send unread datagrams back to the system heap. */ ++ while (!list_empty(&sk->inq)) { ++ mbuf = list_entry(sk->inq.next, struct iddp_message, next); ++ list_del(&mbuf->next); ++ xnheap_free(&cobalt_heap, mbuf); ++ } ++ ++ kfree(sk); ++ ++ return; ++} ++ ++static ssize_t __iddp_recvmsg(struct rtdm_fd *fd, ++ struct iovec *iov, int iovlen, int flags, ++ struct sockaddr_ipc *saddr) ++{ ++ struct rtipc_private *priv = rtdm_fd_to_private(fd); ++ struct iddp_socket *sk = priv->state; ++ ssize_t maxlen, len, wrlen, vlen; ++ rtdm_toseq_t timeout_seq, *toseq; ++ int nvec, rdoff, ret, dofree; ++ struct iddp_message *mbuf; ++ nanosecs_rel_t timeout; ++ struct xnbufd bufd; ++ rtdm_lockctx_t s; ++ ++ if (!test_bit(_IDDP_BOUND, &sk->status)) ++ return -EAGAIN; ++ ++ maxlen = rtdm_get_iov_flatlen(iov, iovlen); ++ if (maxlen == 0) ++ return 0; ++ ++ if (flags & MSG_DONTWAIT) { ++ timeout = RTDM_TIMEOUT_NONE; ++ toseq = NULL; ++ } else { ++ timeout = sk->rx_timeout; ++ toseq = &timeout_seq; ++ } ++ ++ /* We want to pick one buffer from the queue. */ ++ ++ for (;;) { ++ ret = rtdm_sem_timeddown(&sk->insem, timeout, toseq); ++ if (unlikely(ret)) { ++ if (ret == -EIDRM) ++ return -ECONNRESET; ++ return ret; ++ } ++ /* We may have spurious wakeups. */ ++ cobalt_atomic_enter(s); ++ if (!list_empty(&sk->inq)) ++ break; ++ cobalt_atomic_leave(s); ++ } ++ ++ /* Pull heading message from input queue. */ ++ mbuf = list_entry(sk->inq.next, struct iddp_message, next); ++ rdoff = mbuf->rdoff; ++ len = mbuf->len - rdoff; ++ if (saddr) { ++ saddr->sipc_family = AF_RTIPC; ++ saddr->sipc_port = mbuf->from; ++ } ++ if (maxlen >= len) { ++ list_del(&mbuf->next); ++ dofree = 1; ++ if (list_empty(&sk->inq)) /* -> non-readable */ ++ xnselect_signal(&priv->recv_block, 0); ++ ++ } else { ++ /* Buffer is only partially read: repost. */ ++ mbuf->rdoff += maxlen; ++ len = maxlen; ++ dofree = 0; ++ } ++ ++ if (!dofree) ++ rtdm_sem_up(&sk->insem); ++ ++ cobalt_atomic_leave(s); ++ ++ /* Now, write "len" bytes from mbuf->data to the vector cells */ ++ for (nvec = 0, wrlen = len; nvec < iovlen && wrlen > 0; nvec++) { ++ if (iov[nvec].iov_len == 0) ++ continue; ++ vlen = wrlen >= iov[nvec].iov_len ? iov[nvec].iov_len : wrlen; ++ if (rtdm_fd_is_user(fd)) { ++ xnbufd_map_uread(&bufd, iov[nvec].iov_base, vlen); ++ ret = xnbufd_copy_from_kmem(&bufd, mbuf->data + rdoff, vlen); ++ xnbufd_unmap_uread(&bufd); ++ } else { ++ xnbufd_map_kread(&bufd, iov[nvec].iov_base, vlen); ++ ret = xnbufd_copy_from_kmem(&bufd, mbuf->data + rdoff, vlen); ++ xnbufd_unmap_kread(&bufd); ++ } ++ if (ret < 0) ++ break; ++ iov[nvec].iov_base += vlen; ++ iov[nvec].iov_len -= vlen; ++ wrlen -= vlen; ++ rdoff += vlen; ++ } ++ ++ if (dofree) ++ __iddp_free_mbuf(sk, mbuf); ++ ++ return ret ?: len; ++} ++ ++static ssize_t iddp_recvmsg(struct rtdm_fd *fd, ++ struct user_msghdr *msg, int flags) ++{ ++ struct iovec iov_fast[RTDM_IOV_FASTMAX], *iov; ++ struct sockaddr_ipc saddr; ++ ssize_t ret; ++ ++ if (flags & ~MSG_DONTWAIT) ++ return -EINVAL; ++ ++ if (msg->msg_name) { ++ if (msg->msg_namelen < sizeof(struct sockaddr_ipc)) ++ return -EINVAL; ++ } else if (msg->msg_namelen != 0) ++ return -EINVAL; ++ ++ if (msg->msg_iovlen >= UIO_MAXIOV) ++ return -EINVAL; ++ ++ /* Copy I/O vector in */ ++ ret = rtdm_get_iovec(fd, &iov, msg, iov_fast); ++ if (ret) ++ return ret; ++ ++ ret = __iddp_recvmsg(fd, iov, msg->msg_iovlen, flags, &saddr); ++ if (ret <= 0) { ++ rtdm_drop_iovec(iov, iov_fast); ++ return ret; ++ } ++ ++ /* Copy the updated I/O vector back */ ++ if (rtdm_put_iovec(fd, iov, msg, iov_fast)) ++ return -EFAULT; ++ ++ /* Copy the source address if required. */ ++ if (msg->msg_name) { ++ if (rtipc_put_arg(fd, msg->msg_name, &saddr, sizeof(saddr))) ++ return -EFAULT; ++ msg->msg_namelen = sizeof(struct sockaddr_ipc); ++ } ++ ++ return ret; ++} ++ ++static ssize_t iddp_read(struct rtdm_fd *fd, void *buf, size_t len) ++{ ++ struct iovec iov = { .iov_base = buf, .iov_len = len }; ++ ++ return __iddp_recvmsg(fd, &iov, 1, 0, NULL); ++} ++ ++static ssize_t __iddp_sendmsg(struct rtdm_fd *fd, ++ struct iovec *iov, int iovlen, int flags, ++ const struct sockaddr_ipc *daddr) ++{ ++ struct rtipc_private *priv = rtdm_fd_to_private(fd); ++ struct iddp_socket *sk = priv->state, *rsk; ++ struct iddp_message *mbuf; ++ ssize_t len, rdlen, vlen; ++ int nvec, wroff, ret; ++ struct rtdm_fd *rfd; ++ struct xnbufd bufd; ++ rtdm_lockctx_t s; ++ ++ len = rtdm_get_iov_flatlen(iov, iovlen); ++ if (len == 0) ++ return 0; ++ ++ cobalt_atomic_enter(s); ++ rfd = xnmap_fetch_nocheck(portmap, daddr->sipc_port); ++ if (rfd && rtdm_fd_lock(rfd) < 0) ++ rfd = NULL; ++ cobalt_atomic_leave(s); ++ if (rfd == NULL) ++ return -ECONNRESET; ++ ++ rsk = rtipc_fd_to_state(rfd); ++ if (!test_bit(_IDDP_BOUND, &rsk->status)) { ++ rtdm_fd_unlock(rfd); ++ return -ECONNREFUSED; ++ } ++ ++ mbuf = __iddp_alloc_mbuf(rsk, len, sk->tx_timeout, flags, &ret); ++ if (unlikely(ret)) { ++ rtdm_fd_unlock(rfd); ++ return ret; ++ } ++ ++ /* Now, move "len" bytes to mbuf->data from the vector cells */ ++ for (nvec = 0, rdlen = len, wroff = 0; ++ nvec < iovlen && rdlen > 0; nvec++) { ++ if (iov[nvec].iov_len == 0) ++ continue; ++ vlen = rdlen >= iov[nvec].iov_len ? iov[nvec].iov_len : rdlen; ++ if (rtdm_fd_is_user(fd)) { ++ xnbufd_map_uread(&bufd, iov[nvec].iov_base, vlen); ++ ret = xnbufd_copy_to_kmem(mbuf->data + wroff, &bufd, vlen); ++ xnbufd_unmap_uread(&bufd); ++ } else { ++ xnbufd_map_kread(&bufd, iov[nvec].iov_base, vlen); ++ ret = xnbufd_copy_to_kmem(mbuf->data + wroff, &bufd, vlen); ++ xnbufd_unmap_kread(&bufd); ++ } ++ if (ret < 0) ++ goto fail; ++ iov[nvec].iov_base += vlen; ++ iov[nvec].iov_len -= vlen; ++ rdlen -= vlen; ++ wroff += vlen; ++ } ++ ++ cobalt_atomic_enter(s); ++ ++ /* ++ * CAUTION: we must remain atomic from the moment we signal ++ * POLLIN, until sem_up has happened. ++ */ ++ if (list_empty(&rsk->inq)) /* -> readable */ ++ xnselect_signal(&rsk->priv->recv_block, POLLIN); ++ ++ mbuf->from = sk->name.sipc_port; ++ ++ if (flags & MSG_OOB) ++ list_add(&mbuf->next, &rsk->inq); ++ else ++ list_add_tail(&mbuf->next, &rsk->inq); ++ ++ rtdm_sem_up(&rsk->insem); /* Will resched. */ ++ ++ cobalt_atomic_leave(s); ++ ++ rtdm_fd_unlock(rfd); ++ ++ return len; ++ ++fail: ++ __iddp_free_mbuf(rsk, mbuf); ++ ++ rtdm_fd_unlock(rfd); ++ ++ return ret; ++} ++ ++static ssize_t iddp_sendmsg(struct rtdm_fd *fd, ++ const struct user_msghdr *msg, int flags) ++{ ++ struct rtipc_private *priv = rtdm_fd_to_private(fd); ++ struct iovec iov_fast[RTDM_IOV_FASTMAX], *iov; ++ struct iddp_socket *sk = priv->state; ++ struct sockaddr_ipc daddr; ++ ssize_t ret; ++ ++ if (flags & ~(MSG_OOB | MSG_DONTWAIT)) ++ return -EINVAL; ++ ++ if (msg->msg_name) { ++ if (msg->msg_namelen != sizeof(struct sockaddr_ipc)) ++ return -EINVAL; ++ ++ /* Fetch the destination address to send to. */ ++ if (rtipc_get_arg(fd, &daddr, msg->msg_name, sizeof(daddr))) ++ return -EFAULT; ++ ++ if (daddr.sipc_port < 0 || ++ daddr.sipc_port >= CONFIG_XENO_OPT_IDDP_NRPORT) ++ return -EINVAL; ++ } else { ++ if (msg->msg_namelen != 0) ++ return -EINVAL; ++ daddr = sk->peer; ++ if (daddr.sipc_port < 0) ++ return -EDESTADDRREQ; ++ } ++ ++ if (msg->msg_iovlen >= UIO_MAXIOV) ++ return -EINVAL; ++ ++ /* Copy I/O vector in */ ++ ret = rtdm_get_iovec(fd, &iov, msg, iov_fast); ++ if (ret) ++ return ret; ++ ++ ret = __iddp_sendmsg(fd, iov, msg->msg_iovlen, flags, &daddr); ++ if (ret <= 0) { ++ rtdm_drop_iovec(iov, iov_fast); ++ return ret; ++ } ++ ++ /* Copy updated I/O vector back */ ++ return rtdm_put_iovec(fd, iov, msg, iov_fast) ?: ret; ++} ++ ++static ssize_t iddp_write(struct rtdm_fd *fd, ++ const void *buf, size_t len) ++{ ++ struct rtipc_private *priv = rtdm_fd_to_private(fd); ++ struct iovec iov = { .iov_base = (void *)buf, .iov_len = len }; ++ struct iddp_socket *sk = priv->state; ++ ++ if (sk->peer.sipc_port < 0) ++ return -EDESTADDRREQ; ++ ++ return __iddp_sendmsg(fd, &iov, 1, 0, &sk->peer); ++} ++ ++static int __iddp_bind_socket(struct rtdm_fd *fd, ++ struct sockaddr_ipc *sa) ++{ ++ struct rtipc_private *priv = rtdm_fd_to_private(fd); ++ struct iddp_socket *sk = priv->state; ++ int ret = 0, port; ++ rtdm_lockctx_t s; ++ void *poolmem; ++ size_t poolsz; ++ ++ if (sa->sipc_family != AF_RTIPC) ++ return -EINVAL; ++ ++ if (sa->sipc_port < -1 || ++ sa->sipc_port >= CONFIG_XENO_OPT_IDDP_NRPORT) ++ return -EINVAL; ++ ++ cobalt_atomic_enter(s); ++ if (test_bit(_IDDP_BOUND, &sk->status) || ++ __test_and_set_bit(_IDDP_BINDING, &sk->status)) ++ ret = -EADDRINUSE; ++ cobalt_atomic_leave(s); ++ if (ret) ++ return ret; ++ ++ /* Will auto-select a free port number if unspec (-1). */ ++ port = sa->sipc_port; ++ cobalt_atomic_enter(s); ++ port = xnmap_enter(portmap, port, fd); ++ cobalt_atomic_leave(s); ++ if (port < 0) ++ return port == -EEXIST ? -EADDRINUSE : -ENOMEM; ++ ++ sa->sipc_port = port; ++ ++ /* ++ * Allocate a local buffer pool if we were told to do so via ++ * setsockopt() before we got there. ++ */ ++ poolsz = sk->poolsz; ++ if (poolsz > 0) { ++ poolsz = PAGE_ALIGN(poolsz); ++ poolmem = xnheap_vmalloc(poolsz); ++ if (poolmem == NULL) { ++ ret = -ENOMEM; ++ goto fail; ++ } ++ ++ ret = xnheap_init(&sk->privpool, poolmem, poolsz); ++ if (ret) { ++ xnheap_vfree(poolmem); ++ goto fail; ++ } ++ xnheap_set_name(&sk->privpool, "iddp-pool@%d", port); ++ sk->poolwaitq = &sk->privwaitq; ++ sk->bufpool = &sk->privpool; ++ } ++ ++ sk->name = *sa; ++ /* Set default destination if unset at binding time. */ ++ if (sk->peer.sipc_port < 0) ++ sk->peer = *sa; ++ ++ if (*sk->label) { ++ ret = xnregistry_enter(sk->label, sk, ++ &sk->handle, &__iddp_pnode.node); ++ if (ret) { ++ if (poolsz > 0) { ++ xnheap_destroy(&sk->privpool); ++ xnheap_vfree(poolmem); ++ } ++ goto fail; ++ } ++ } ++ ++ cobalt_atomic_enter(s); ++ __clear_bit(_IDDP_BINDING, &sk->status); ++ __set_bit(_IDDP_BOUND, &sk->status); ++ if (xnselect_signal(&priv->send_block, POLLOUT)) ++ xnsched_run(); ++ cobalt_atomic_leave(s); ++ ++ return 0; ++fail: ++ xnmap_remove(portmap, port); ++ clear_bit(_IDDP_BINDING, &sk->status); ++ ++ return ret; ++} ++ ++static int __iddp_connect_socket(struct iddp_socket *sk, ++ struct sockaddr_ipc *sa) ++{ ++ struct sockaddr_ipc _sa; ++ struct iddp_socket *rsk; ++ int ret, resched = 0; ++ rtdm_lockctx_t s; ++ xnhandle_t h; ++ ++ if (sa == NULL) { ++ _sa = nullsa; ++ sa = &_sa; ++ goto set_assoc; ++ } ++ ++ if (sa->sipc_family != AF_RTIPC) ++ return -EINVAL; ++ ++ if (sa->sipc_port < -1 || ++ sa->sipc_port >= CONFIG_XENO_OPT_IDDP_NRPORT) ++ return -EINVAL; ++ /* ++ * - If a valid sipc_port is passed in the [0..NRPORT-1] range, ++ * it is used verbatim and the connection succeeds ++ * immediately, regardless of whether the destination is ++ * bound at the time of the call. ++ * ++ * - If sipc_port is -1 and a label was set via IDDP_LABEL, ++ * connect() blocks for the requested amount of time (see ++ * SO_RCVTIMEO) until a socket is bound to the same label. ++ * ++ * - If sipc_port is -1 and no label is given, the default ++ * destination address is cleared, meaning that any subsequent ++ * write() to the socket will return -EDESTADDRREQ, until a ++ * valid destination address is set via connect() or bind(). ++ * ++ * - In all other cases, -EINVAL is returned. ++ */ ++ if (sa->sipc_port < 0 && *sk->label) { ++ ret = xnregistry_bind(sk->label, ++ sk->rx_timeout, XN_RELATIVE, &h); ++ if (ret) ++ return ret; ++ ++ cobalt_atomic_enter(s); ++ rsk = xnregistry_lookup(h, NULL); ++ if (rsk == NULL || rsk->magic != IDDP_SOCKET_MAGIC) ++ ret = -EINVAL; ++ else { ++ /* Fetch labeled port number. */ ++ sa->sipc_port = rsk->name.sipc_port; ++ resched = xnselect_signal(&sk->priv->send_block, POLLOUT); ++ } ++ cobalt_atomic_leave(s); ++ if (ret) ++ return ret; ++ } else if (sa->sipc_port < 0) ++ sa = &nullsa; ++set_assoc: ++ cobalt_atomic_enter(s); ++ if (!test_bit(_IDDP_BOUND, &sk->status)) ++ /* Set default name. */ ++ sk->name = *sa; ++ /* Set default destination. */ ++ sk->peer = *sa; ++ if (sa->sipc_port < 0) ++ __clear_bit(_IDDP_CONNECTED, &sk->status); ++ else ++ __set_bit(_IDDP_CONNECTED, &sk->status); ++ if (resched) ++ xnsched_run(); ++ cobalt_atomic_leave(s); ++ ++ return 0; ++} ++ ++static int __iddp_setsockopt(struct iddp_socket *sk, ++ struct rtdm_fd *fd, ++ void *arg) ++{ ++ struct _rtdm_setsockopt_args sopt; ++ struct rtipc_port_label plabel; ++ struct timeval tv; ++ rtdm_lockctx_t s; ++ size_t len; ++ int ret; ++ ++ ret = rtipc_get_sockoptin(fd, &sopt, arg); ++ if (ret) ++ return ret; ++ ++ if (sopt.level == SOL_SOCKET) { ++ switch (sopt.optname) { ++ ++ case SO_RCVTIMEO: ++ ret = rtipc_get_timeval(fd, &tv, sopt.optval, sopt.optlen); ++ if (ret) ++ return ret; ++ sk->rx_timeout = rtipc_timeval_to_ns(&tv); ++ break; ++ ++ case SO_SNDTIMEO: ++ ret = rtipc_get_timeval(fd, &tv, sopt.optval, sopt.optlen); ++ if (ret) ++ return ret; ++ sk->tx_timeout = rtipc_timeval_to_ns(&tv); ++ break; ++ ++ default: ++ ret = -EINVAL; ++ } ++ ++ return ret; ++ } ++ ++ if (sopt.level != SOL_IDDP) ++ return -ENOPROTOOPT; ++ ++ switch (sopt.optname) { ++ ++ case IDDP_POOLSZ: ++ ret = rtipc_get_length(fd, &len, sopt.optval, sopt.optlen); ++ if (ret) ++ return ret; ++ if (len == 0) ++ return -EINVAL; ++ cobalt_atomic_enter(s); ++ /* ++ * We may not do this more than once, and we have to ++ * do this before the first binding. ++ */ ++ if (test_bit(_IDDP_BOUND, &sk->status) || ++ test_bit(_IDDP_BINDING, &sk->status)) ++ ret = -EALREADY; ++ else ++ sk->poolsz = len; ++ cobalt_atomic_leave(s); ++ break; ++ ++ case IDDP_LABEL: ++ if (sopt.optlen < sizeof(plabel)) ++ return -EINVAL; ++ if (rtipc_get_arg(fd, &plabel, sopt.optval, sizeof(plabel))) ++ return -EFAULT; ++ cobalt_atomic_enter(s); ++ /* ++ * We may attach a label to a client socket which was ++ * previously bound in IDDP. ++ */ ++ if (test_bit(_IDDP_BINDING, &sk->status)) ++ ret = -EALREADY; ++ else { ++ strcpy(sk->label, plabel.label); ++ sk->label[XNOBJECT_NAME_LEN-1] = 0; ++ } ++ cobalt_atomic_leave(s); ++ break; ++ ++ default: ++ ret = -EINVAL; ++ } ++ ++ return ret; ++} ++ ++static int __iddp_getsockopt(struct iddp_socket *sk, ++ struct rtdm_fd *fd, ++ void *arg) ++{ ++ struct _rtdm_getsockopt_args sopt; ++ struct rtipc_port_label plabel; ++ struct timeval tv; ++ rtdm_lockctx_t s; ++ socklen_t len; ++ int ret; ++ ++ ret = rtipc_get_sockoptout(fd, &sopt, arg); ++ if (ret) ++ return ret; ++ ++ ret = rtipc_get_arg(fd, &len, sopt.optlen, sizeof(len)); ++ if (ret) ++ return ret; ++ ++ if (sopt.level == SOL_SOCKET) { ++ switch (sopt.optname) { ++ ++ case SO_RCVTIMEO: ++ rtipc_ns_to_timeval(&tv, sk->rx_timeout); ++ ret = rtipc_put_timeval(fd, sopt.optval, &tv, len); ++ if (ret) ++ return ret; ++ break; ++ ++ case SO_SNDTIMEO: ++ rtipc_ns_to_timeval(&tv, sk->tx_timeout); ++ ret = rtipc_put_timeval(fd, sopt.optval, &tv, len); ++ if (ret) ++ return ret; ++ break; ++ ++ default: ++ ret = -EINVAL; ++ } ++ ++ return ret; ++ } ++ ++ if (sopt.level != SOL_IDDP) ++ return -ENOPROTOOPT; ++ ++ switch (sopt.optname) { ++ ++ case IDDP_LABEL: ++ if (len < sizeof(plabel)) ++ return -EINVAL; ++ cobalt_atomic_enter(s); ++ strcpy(plabel.label, sk->label); ++ cobalt_atomic_leave(s); ++ if (rtipc_put_arg(fd, sopt.optval, &plabel, sizeof(plabel))) ++ return -EFAULT; ++ break; ++ ++ default: ++ ret = -EINVAL; ++ } ++ ++ return ret; ++} ++ ++static int __iddp_ioctl(struct rtdm_fd *fd, ++ unsigned int request, void *arg) ++{ ++ struct rtipc_private *priv = rtdm_fd_to_private(fd); ++ struct sockaddr_ipc saddr, *saddrp = &saddr; ++ struct iddp_socket *sk = priv->state; ++ int ret = 0; ++ ++ switch (request) { ++ ++ COMPAT_CASE(_RTIOC_CONNECT): ++ ret = rtipc_get_sockaddr(fd, &saddrp, arg); ++ if (ret) ++ return ret; ++ ret = __iddp_connect_socket(sk, saddrp); ++ break; ++ ++ COMPAT_CASE(_RTIOC_BIND): ++ ret = rtipc_get_sockaddr(fd, &saddrp, arg); ++ if (ret) ++ return ret; ++ if (saddrp == NULL) ++ return -EFAULT; ++ ret = __iddp_bind_socket(fd, saddrp); ++ break; ++ ++ COMPAT_CASE(_RTIOC_GETSOCKNAME): ++ ret = rtipc_put_sockaddr(fd, arg, &sk->name); ++ break; ++ ++ COMPAT_CASE(_RTIOC_GETPEERNAME): ++ ret = rtipc_put_sockaddr(fd, arg, &sk->peer); ++ break; ++ ++ COMPAT_CASE(_RTIOC_SETSOCKOPT): ++ ret = __iddp_setsockopt(sk, fd, arg); ++ break; ++ ++ COMPAT_CASE(_RTIOC_GETSOCKOPT): ++ ret = __iddp_getsockopt(sk, fd, arg); ++ break; ++ ++ case _RTIOC_LISTEN: ++ COMPAT_CASE(_RTIOC_ACCEPT): ++ ret = -EOPNOTSUPP; ++ break; ++ ++ case _RTIOC_SHUTDOWN: ++ ret = -ENOTCONN; ++ break; ++ ++ default: ++ ret = -EINVAL; ++ } ++ ++ return ret; ++} ++ ++static int iddp_ioctl(struct rtdm_fd *fd, ++ unsigned int request, void *arg) ++{ ++ int ret; ++ ++ switch (request) { ++ COMPAT_CASE(_RTIOC_BIND): ++ if (rtdm_in_rt_context()) ++ return -ENOSYS; /* Try downgrading to NRT */ ++ default: ++ ret = __iddp_ioctl(fd, request, arg); ++ } ++ ++ return ret; ++} ++ ++static int iddp_init(void) ++{ ++ portmap = xnmap_create(CONFIG_XENO_OPT_IDDP_NRPORT, 0, 0); ++ if (portmap == NULL) ++ return -ENOMEM; ++ ++ rtdm_waitqueue_init(&poolwaitq); ++ ++ return 0; ++} ++ ++static void iddp_exit(void) ++{ ++ rtdm_waitqueue_destroy(&poolwaitq); ++ xnmap_delete(portmap); ++} ++ ++static unsigned int iddp_pollstate(struct rtdm_fd *fd) /* atomic */ ++{ ++ struct rtipc_private *priv = rtdm_fd_to_private(fd); ++ struct iddp_socket *sk = priv->state; ++ unsigned int mask = 0; ++ struct rtdm_fd *rfd; ++ ++ if (test_bit(_IDDP_BOUND, &sk->status) && !list_empty(&sk->inq)) ++ mask |= POLLIN; ++ ++ /* ++ * If the socket is connected, POLLOUT means that the peer ++ * exists. Otherwise POLLOUT is always set, assuming the ++ * client is likely to use explicit addressing in send ++ * operations. ++ * ++ * If the peer exists, we still can't really know whether ++ * writing to the socket would block as it depends on the ++ * message size and other highly dynamic factors, so pretend ++ * it would not. ++ */ ++ if (test_bit(_IDDP_CONNECTED, &sk->status)) { ++ rfd = xnmap_fetch_nocheck(portmap, sk->peer.sipc_port); ++ if (rfd) ++ mask |= POLLOUT; ++ } else ++ mask |= POLLOUT; ++ ++ return mask; ++} ++ ++struct rtipc_protocol iddp_proto_driver = { ++ .proto_name = "iddp", ++ .proto_statesz = sizeof(struct iddp_socket), ++ .proto_init = iddp_init, ++ .proto_exit = iddp_exit, ++ .proto_ops = { ++ .socket = iddp_socket, ++ .close = iddp_close, ++ .recvmsg = iddp_recvmsg, ++ .sendmsg = iddp_sendmsg, ++ .read = iddp_read, ++ .write = iddp_write, ++ .ioctl = iddp_ioctl, ++ .pollstate = iddp_pollstate, ++ } ++}; +--- linux/drivers/xenomai/ipc/Makefile 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/ipc/Makefile 2021-04-07 16:01:26.278635522 +0800 +@@ -0,0 +1,8 @@ ++ ++obj-$(CONFIG_XENO_DRIVERS_RTIPC) += xeno_rtipc.o ++ ++xeno_rtipc-y := rtipc.o ++ ++xeno_rtipc-$(CONFIG_XENO_DRIVERS_RTIPC_XDDP) += xddp.o ++xeno_rtipc-$(CONFIG_XENO_DRIVERS_RTIPC_IDDP) += iddp.o ++xeno_rtipc-$(CONFIG_XENO_DRIVERS_RTIPC_BUFP) += bufp.o +--- linux/drivers/xenomai/ipc/Kconfig 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/ipc/Kconfig 2021-04-07 16:01:26.274635528 +0800 +@@ -0,0 +1,81 @@ ++menu "Real-time IPC drivers" ++ ++config XENO_DRIVERS_RTIPC ++ tristate "RTIPC protocol family" ++ help ++ ++ This driver provides the real-time IPC protocol family ++ (PF_RTIPC) over RTDM. ++ ++config XENO_DRIVERS_RTIPC_XDDP ++ depends on XENO_DRIVERS_RTIPC ++ select XENO_OPT_PIPE ++ default y ++ bool "XDDP cross-domain datagram protocol" ++ help ++ ++ Xenomai's XDDP protocol enables threads to exchange datagrams ++ across the Xenomai/Linux domain boundary, using "message ++ pipes". ++ ++ Message pipes are bi-directional FIFO communication channels ++ allowing data exchange between real-time Xenomai threads and ++ regular (i.e. non real-time) user-space processes. Message ++ pipes are datagram-based and thus natively preserve message ++ boundaries, but they can also be used in byte stream mode when ++ sending from the real-time to the non real-time domain. ++ ++ The maximum number of communication ports available in the ++ system can be configured using the XENO_OPT_PIPE_NRDEV option ++ from the Nucleus menu. ++ ++config XENO_DRIVERS_RTIPC_IDDP ++ depends on XENO_DRIVERS_RTIPC ++ select XENO_OPT_MAP ++ default y ++ bool "IDDP intra-domain datagram protocol" ++ help ++ ++ Xenomai's IDDP protocol enables real-time threads to exchange ++ datagrams within the Xenomai domain. ++ ++config XENO_OPT_IDDP_NRPORT ++ depends on XENO_DRIVERS_RTIPC_IDDP ++ int "Number of IDDP communication ports" ++ default 32 ++ help ++ ++ This parameter defines the number of IDDP ports available in ++ the system for creating receiver endpoints. Port numbers range ++ from 0 to CONFIG_XENO_OPT_IDDP_NRPORT - 1. ++ ++config XENO_DRIVERS_RTIPC_BUFP ++ depends on XENO_DRIVERS_RTIPC ++ select XENO_OPT_MAP ++ default y ++ bool "Buffer protocol" ++ help ++ ++ The buffer protocol implements a byte-oriented, one-way ++ Producer-Consumer data path, which makes it a bit faster than ++ datagram-oriented protocols. All messages written are buffered ++ into a single memory area in strict FIFO order, until read by ++ the consumer. ++ ++ This protocol prevents short writes, and only allows short ++ reads when a potential deadlock situation arises (i.e. readers ++ and writers waiting for each other indefinitely), which ++ usually means that the buffer size does not fit the use peer ++ threads are making from the protocol. ++ ++config XENO_OPT_BUFP_NRPORT ++ depends on XENO_DRIVERS_RTIPC_BUFP ++ int "Number of BUFP communication ports" ++ default 32 ++ help ++ ++ This parameter defines the number of BUFP ports available in ++ the system for creating receiver endpoints. Port numbers range ++ from 0 to CONFIG_XENO_OPT_BUFP_NRPORT - 1. ++ ++endmenu +--- linux/drivers/xenomai/ipc/xddp.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/ipc/xddp.c 2021-04-07 16:01:26.269635535 +0800 +@@ -0,0 +1,1130 @@ ++/** ++ * This file is part of the Xenomai project. ++ * ++ * @note Copyright (C) 2009 Philippe Gerum ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "internal.h" ++ ++#define XDDP_SOCKET_MAGIC 0xa21a21a2 ++ ++struct xddp_message { ++ struct xnpipe_mh mh; ++ char data[]; ++}; ++ ++struct xddp_socket { ++ int magic; ++ struct sockaddr_ipc name; ++ struct sockaddr_ipc peer; ++ ++ int minor; ++ size_t poolsz; ++ xnhandle_t handle; ++ char label[XNOBJECT_NAME_LEN]; ++ struct rtdm_fd *fd; /* i.e. RTDM socket fd */ ++ ++ struct xddp_message *buffer; ++ int buffer_port; ++ struct xnheap *bufpool; ++ struct xnheap privpool; ++ size_t fillsz; ++ size_t curbufsz; /* Current streaming buffer size */ ++ u_long status; ++ rtdm_lock_t lock; ++ ++ nanosecs_rel_t timeout; /* connect()/recvmsg() timeout */ ++ size_t reqbufsz; /* Requested streaming buffer size */ ++ ++ int (*monitor)(struct rtdm_fd *fd, int event, long arg); ++ struct rtipc_private *priv; ++}; ++ ++static struct sockaddr_ipc nullsa = { ++ .sipc_family = AF_RTIPC, ++ .sipc_port = -1 ++}; ++ ++static struct rtdm_fd *portmap[CONFIG_XENO_OPT_PIPE_NRDEV]; /* indexes RTDM fildes */ ++ ++#define _XDDP_SYNCWAIT 0 ++#define _XDDP_ATOMIC 1 ++#define _XDDP_BINDING 2 ++#define _XDDP_BOUND 3 ++#define _XDDP_CONNECTED 4 ++ ++#ifdef CONFIG_XENO_OPT_VFILE ++ ++static char *__xddp_link_target(void *obj) ++{ ++ struct xddp_socket *sk = obj; ++ ++ return kasformat("/dev/rtp%d", sk->minor); ++} ++ ++extern struct xnptree rtipc_ptree; ++ ++static struct xnpnode_link __xddp_pnode = { ++ .node = { ++ .dirname = "xddp", ++ .root = &rtipc_ptree, ++ .ops = &xnregistry_vlink_ops, ++ }, ++ .target = __xddp_link_target, ++}; ++ ++#else /* !CONFIG_XENO_OPT_VFILE */ ++ ++static struct xnpnode_link __xddp_pnode = { ++ .node = { ++ .dirname = "xddp", ++ }, ++}; ++ ++#endif /* !CONFIG_XENO_OPT_VFILE */ ++ ++static void *__xddp_alloc_handler(size_t size, void *skarg) /* nklock free */ ++{ ++ struct xddp_socket *sk = skarg; ++ void *buf; ++ ++ /* Try to allocate memory for the incoming message. */ ++ buf = xnheap_alloc(sk->bufpool, size); ++ if (unlikely(buf == NULL)) { ++ if (sk->monitor) ++ sk->monitor(sk->fd, XDDP_EVTNOBUF, size); ++ if (size > xnheap_get_size(sk->bufpool)) ++ buf = (void *)-1; /* Will never succeed. */ ++ } ++ ++ return buf; ++} ++ ++static int __xddp_resize_streambuf(struct xddp_socket *sk) /* sk->lock held */ ++{ ++ if (sk->buffer) ++ xnheap_free(sk->bufpool, sk->buffer); ++ ++ if (sk->reqbufsz == 0) { ++ sk->buffer = NULL; ++ sk->curbufsz = 0; ++ return 0; ++ } ++ ++ sk->buffer = xnheap_alloc(sk->bufpool, sk->reqbufsz); ++ if (sk->buffer == NULL) { ++ sk->curbufsz = 0; ++ return -ENOMEM; ++ } ++ ++ sk->curbufsz = sk->reqbufsz; ++ ++ return 0; ++} ++ ++static void __xddp_free_handler(void *buf, void *skarg) /* nklock free */ ++{ ++ struct xddp_socket *sk = skarg; ++ rtdm_lockctx_t s; ++ ++ if (buf != sk->buffer) { ++ xnheap_free(sk->bufpool, buf); ++ return; ++ } ++ ++ /* Reset the streaming buffer. */ ++ ++ rtdm_lock_get_irqsave(&sk->lock, s); ++ ++ sk->fillsz = 0; ++ sk->buffer_port = -1; ++ __clear_bit(_XDDP_SYNCWAIT, &sk->status); ++ __clear_bit(_XDDP_ATOMIC, &sk->status); ++ ++ /* ++ * If a XDDP_BUFSZ request is pending, resize the streaming ++ * buffer on-the-fly. ++ */ ++ if (unlikely(sk->curbufsz != sk->reqbufsz)) ++ __xddp_resize_streambuf(sk); ++ ++ rtdm_lock_put_irqrestore(&sk->lock, s); ++} ++ ++static void __xddp_output_handler(struct xnpipe_mh *mh, void *skarg) /* nklock held */ ++{ ++ struct xddp_socket *sk = skarg; ++ ++ if (sk->monitor) ++ sk->monitor(sk->fd, XDDP_EVTOUT, xnpipe_m_size(mh)); ++} ++ ++static int __xddp_input_handler(struct xnpipe_mh *mh, int retval, void *skarg) /* nklock held */ ++{ ++ struct xddp_socket *sk = skarg; ++ ++ if (sk->monitor) { ++ if (retval == 0) ++ /* Callee may alter the return value passed to userland. */ ++ retval = sk->monitor(sk->fd, XDDP_EVTIN, xnpipe_m_size(mh)); ++ else if (retval == -EPIPE && mh == NULL) ++ sk->monitor(sk->fd, XDDP_EVTDOWN, 0); ++ } ++ ++ if (retval == 0 && ++ (__xnpipe_pollstate(sk->minor) & POLLIN) != 0 && ++ xnselect_signal(&sk->priv->recv_block, POLLIN)) ++ xnsched_run(); ++ ++ return retval; ++} ++ ++static void __xddp_release_handler(void *skarg) /* nklock free */ ++{ ++ struct xddp_socket *sk = skarg; ++ void *poolmem; ++ u32 poolsz; ++ ++ if (sk->bufpool == &sk->privpool) { ++ poolmem = xnheap_get_membase(&sk->privpool); ++ poolsz = xnheap_get_size(&sk->privpool); ++ xnheap_destroy(&sk->privpool); ++ xnheap_vfree(poolmem); ++ } else if (sk->buffer) ++ xnfree(sk->buffer); ++ ++ kfree(sk); ++} ++ ++static int xddp_socket(struct rtdm_fd *fd) ++{ ++ struct rtipc_private *priv = rtdm_fd_to_private(fd); ++ struct xddp_socket *sk = priv->state; ++ ++ sk->magic = XDDP_SOCKET_MAGIC; ++ sk->name = nullsa; /* Unbound */ ++ sk->peer = nullsa; ++ sk->minor = -1; ++ sk->handle = 0; ++ *sk->label = 0; ++ sk->poolsz = 0; ++ sk->buffer = NULL; ++ sk->buffer_port = -1; ++ sk->bufpool = NULL; ++ sk->fillsz = 0; ++ sk->status = 0; ++ sk->timeout = RTDM_TIMEOUT_INFINITE; ++ sk->curbufsz = 0; ++ sk->reqbufsz = 0; ++ sk->monitor = NULL; ++ rtdm_lock_init(&sk->lock); ++ sk->priv = priv; ++ ++ return 0; ++} ++ ++static void xddp_close(struct rtdm_fd *fd) ++{ ++ struct rtipc_private *priv = rtdm_fd_to_private(fd); ++ struct xddp_socket *sk = priv->state; ++ rtdm_lockctx_t s; ++ ++ sk->monitor = NULL; ++ ++ if (!test_bit(_XDDP_BOUND, &sk->status)) ++ return; ++ ++ cobalt_atomic_enter(s); ++ portmap[sk->name.sipc_port] = NULL; ++ cobalt_atomic_leave(s); ++ ++ if (sk->handle) ++ xnregistry_remove(sk->handle); ++ ++ xnpipe_disconnect(sk->minor); ++} ++ ++static ssize_t __xddp_recvmsg(struct rtdm_fd *fd, ++ struct iovec *iov, int iovlen, int flags, ++ struct sockaddr_ipc *saddr) ++{ ++ struct rtipc_private *priv = rtdm_fd_to_private(fd); ++ struct xddp_message *mbuf = NULL; /* Fake GCC */ ++ struct xddp_socket *sk = priv->state; ++ ssize_t maxlen, len, wrlen, vlen; ++ nanosecs_rel_t timeout; ++ struct xnpipe_mh *mh; ++ int nvec, rdoff, ret; ++ struct xnbufd bufd; ++ spl_t s; ++ ++ if (!test_bit(_XDDP_BOUND, &sk->status)) ++ return -EAGAIN; ++ ++ maxlen = rtdm_get_iov_flatlen(iov, iovlen); ++ if (maxlen == 0) ++ return 0; ++ ++ timeout = (flags & MSG_DONTWAIT) ? RTDM_TIMEOUT_NONE : sk->timeout; ++ /* Pull heading message from the input queue. */ ++ len = xnpipe_recv(sk->minor, &mh, timeout); ++ if (len < 0) ++ return len == -EIDRM ? 0 : len; ++ if (len > maxlen) { ++ ret = -ENOBUFS; ++ goto out; ++ } ++ ++ mbuf = container_of(mh, struct xddp_message, mh); ++ ++ if (saddr) ++ *saddr = sk->name; ++ ++ /* Write "len" bytes from mbuf->data to the vector cells */ ++ for (ret = 0, nvec = 0, rdoff = 0, wrlen = len; ++ nvec < iovlen && wrlen > 0; nvec++) { ++ if (iov[nvec].iov_len == 0) ++ continue; ++ vlen = wrlen >= iov[nvec].iov_len ? iov[nvec].iov_len : wrlen; ++ if (rtdm_fd_is_user(fd)) { ++ xnbufd_map_uread(&bufd, iov[nvec].iov_base, vlen); ++ ret = xnbufd_copy_from_kmem(&bufd, mbuf->data + rdoff, vlen); ++ xnbufd_unmap_uread(&bufd); ++ } else { ++ xnbufd_map_kread(&bufd, iov[nvec].iov_base, vlen); ++ ret = xnbufd_copy_from_kmem(&bufd, mbuf->data + rdoff, vlen); ++ xnbufd_unmap_kread(&bufd); ++ } ++ if (ret < 0) ++ goto out; ++ iov[nvec].iov_base += vlen; ++ iov[nvec].iov_len -= vlen; ++ wrlen -= vlen; ++ rdoff += vlen; ++ } ++out: ++ xnheap_free(sk->bufpool, mbuf); ++ cobalt_atomic_enter(s); ++ if ((__xnpipe_pollstate(sk->minor) & POLLIN) == 0 && ++ xnselect_signal(&priv->recv_block, 0)) ++ xnsched_run(); ++ cobalt_atomic_leave(s); ++ ++ return ret ?: len; ++} ++ ++static ssize_t xddp_recvmsg(struct rtdm_fd *fd, ++ struct user_msghdr *msg, int flags) ++{ ++ struct iovec iov_fast[RTDM_IOV_FASTMAX], *iov; ++ struct sockaddr_ipc saddr; ++ ssize_t ret; ++ ++ if (flags & ~MSG_DONTWAIT) ++ return -EINVAL; ++ ++ if (msg->msg_name) { ++ if (msg->msg_namelen < sizeof(struct sockaddr_ipc)) ++ return -EINVAL; ++ } else if (msg->msg_namelen != 0) ++ return -EINVAL; ++ ++ if (msg->msg_iovlen >= UIO_MAXIOV) ++ return -EINVAL; ++ ++ /* Copy I/O vector in */ ++ ret = rtdm_get_iovec(fd, &iov, msg, iov_fast); ++ if (ret) ++ return ret; ++ ++ ret = __xddp_recvmsg(fd, iov, msg->msg_iovlen, flags, &saddr); ++ if (ret <= 0) { ++ rtdm_drop_iovec(iov, iov_fast); ++ return ret; ++ } ++ ++ /* Copy the updated I/O vector back */ ++ if (rtdm_put_iovec(fd, iov, msg, iov_fast)) ++ return -EFAULT; ++ ++ /* Copy the source address if required. */ ++ if (msg->msg_name) { ++ if (rtipc_put_arg(fd, msg->msg_name, &saddr, sizeof(saddr))) ++ return -EFAULT; ++ msg->msg_namelen = sizeof(struct sockaddr_ipc); ++ } ++ ++ return ret; ++} ++ ++static ssize_t xddp_read(struct rtdm_fd *fd, void *buf, size_t len) ++{ ++ struct iovec iov = { .iov_base = buf, .iov_len = len }; ++ ++ return __xddp_recvmsg(fd, &iov, 1, 0, NULL); ++} ++ ++static ssize_t __xddp_stream(struct xddp_socket *sk, ++ int from, struct xnbufd *bufd) ++{ ++ struct xddp_message *mbuf; ++ size_t fillptr, rembytes; ++ rtdm_lockctx_t s; ++ ssize_t outbytes; ++ int ret; ++ ++ /* ++ * xnpipe_msend() and xnpipe_mfixup() routines will only grab ++ * the nklock directly or indirectly, so holding our socket ++ * lock across those calls is fine. ++ */ ++ rtdm_lock_get_irqsave(&sk->lock, s); ++ ++ /* ++ * There are two cases in which we must remove the cork ++ * unconditionally and send the incoming data as a standalone ++ * datagram: the destination port does not support streaming, ++ * or its streaming buffer is already filled with data issued ++ * from another port. ++ */ ++ if (sk->curbufsz == 0 || ++ (sk->buffer_port >= 0 && sk->buffer_port != from)) { ++ /* This will end up into a standalone datagram. */ ++ outbytes = 0; ++ goto out; ++ } ++ ++ mbuf = sk->buffer; ++ rembytes = sk->curbufsz - sizeof(*mbuf) - sk->fillsz; ++ outbytes = bufd->b_len > rembytes ? rembytes : bufd->b_len; ++ if (likely(outbytes > 0)) { ++ repeat: ++ /* Mark the beginning of a should-be-atomic section. */ ++ __set_bit(_XDDP_ATOMIC, &sk->status); ++ fillptr = sk->fillsz; ++ sk->fillsz += outbytes; ++ ++ rtdm_lock_put_irqrestore(&sk->lock, s); ++ ret = xnbufd_copy_to_kmem(mbuf->data + fillptr, ++ bufd, outbytes); ++ rtdm_lock_get_irqsave(&sk->lock, s); ++ ++ if (ret < 0) { ++ outbytes = ret; ++ __clear_bit(_XDDP_ATOMIC, &sk->status); ++ goto out; ++ } ++ ++ /* We haven't been atomic, let's try again. */ ++ if (!__test_and_clear_bit(_XDDP_ATOMIC, &sk->status)) ++ goto repeat; ++ ++ if (__test_and_set_bit(_XDDP_SYNCWAIT, &sk->status)) ++ outbytes = xnpipe_mfixup(sk->minor, ++ &mbuf->mh, outbytes); ++ else { ++ sk->buffer_port = from; ++ outbytes = xnpipe_send(sk->minor, &mbuf->mh, ++ outbytes + sizeof(*mbuf), ++ XNPIPE_NORMAL); ++ if (outbytes > 0) ++ outbytes -= sizeof(*mbuf); ++ } ++ } ++ ++out: ++ rtdm_lock_put_irqrestore(&sk->lock, s); ++ ++ return outbytes; ++} ++ ++static ssize_t __xddp_sendmsg(struct rtdm_fd *fd, ++ struct iovec *iov, int iovlen, int flags, ++ const struct sockaddr_ipc *daddr) ++{ ++ struct rtipc_private *priv = rtdm_fd_to_private(fd); ++ ssize_t len, rdlen, wrlen, vlen, ret, sublen; ++ struct xddp_socket *sk = priv->state; ++ struct xddp_message *mbuf; ++ struct xddp_socket *rsk; ++ struct rtdm_fd *rfd; ++ int nvec, to, from; ++ struct xnbufd bufd; ++ rtdm_lockctx_t s; ++ ++ len = rtdm_get_iov_flatlen(iov, iovlen); ++ if (len == 0) ++ return 0; ++ ++ from = sk->name.sipc_port; ++ to = daddr->sipc_port; ++ ++ cobalt_atomic_enter(s); ++ rfd = portmap[to]; ++ if (rfd && rtdm_fd_lock(rfd) < 0) ++ rfd = NULL; ++ cobalt_atomic_leave(s); ++ ++ if (rfd == NULL) ++ return -ECONNRESET; ++ ++ rsk = rtipc_fd_to_state(rfd); ++ if (!test_bit(_XDDP_BOUND, &rsk->status)) { ++ rtdm_fd_unlock(rfd); ++ return -ECONNREFUSED; ++ } ++ ++ sublen = len; ++ nvec = 0; ++ ++ /* ++ * If active, the streaming buffer is already pending on the ++ * output queue, so we basically have nothing to do during a ++ * MSG_MORE -> MSG_NONE transition. Therefore, we only have to ++ * take care of filling that buffer when MSG_MORE is ++ * given. Yummie. ++ */ ++ if (flags & MSG_MORE) { ++ for (rdlen = sublen, wrlen = 0; ++ nvec < iovlen && rdlen > 0; nvec++) { ++ if (iov[nvec].iov_len == 0) ++ continue; ++ vlen = rdlen >= iov[nvec].iov_len ? iov[nvec].iov_len : rdlen; ++ if (rtdm_fd_is_user(fd)) { ++ xnbufd_map_uread(&bufd, iov[nvec].iov_base, vlen); ++ ret = __xddp_stream(rsk, from, &bufd); ++ xnbufd_unmap_uread(&bufd); ++ } else { ++ xnbufd_map_kread(&bufd, iov[nvec].iov_base, vlen); ++ ret = __xddp_stream(rsk, from, &bufd); ++ xnbufd_unmap_kread(&bufd); ++ } ++ if (ret < 0) ++ goto fail_unlock; ++ wrlen += ret; ++ rdlen -= ret; ++ iov[nvec].iov_base += ret; ++ iov[nvec].iov_len -= ret; ++ /* ++ * In case of a short write to the streaming ++ * buffer, send the unsent part as a ++ * standalone datagram. ++ */ ++ if (ret < vlen) { ++ sublen = rdlen; ++ goto nostream; ++ } ++ } ++ len = wrlen; ++ goto done; ++ } ++ ++nostream: ++ mbuf = xnheap_alloc(rsk->bufpool, sublen + sizeof(*mbuf)); ++ if (unlikely(mbuf == NULL)) { ++ ret = -ENOMEM; ++ goto fail_unlock; ++ } ++ ++ /* ++ * Move "sublen" bytes to mbuf->data from the vector cells ++ */ ++ for (rdlen = sublen, wrlen = 0; nvec < iovlen && rdlen > 0; nvec++) { ++ if (iov[nvec].iov_len == 0) ++ continue; ++ vlen = rdlen >= iov[nvec].iov_len ? iov[nvec].iov_len : rdlen; ++ if (rtdm_fd_is_user(fd)) { ++ xnbufd_map_uread(&bufd, iov[nvec].iov_base, vlen); ++ ret = xnbufd_copy_to_kmem(mbuf->data + wrlen, &bufd, vlen); ++ xnbufd_unmap_uread(&bufd); ++ } else { ++ xnbufd_map_kread(&bufd, iov[nvec].iov_base, vlen); ++ ret = xnbufd_copy_to_kmem(mbuf->data + wrlen, &bufd, vlen); ++ xnbufd_unmap_kread(&bufd); ++ } ++ if (ret < 0) ++ goto fail_freebuf; ++ iov[nvec].iov_base += vlen; ++ iov[nvec].iov_len -= vlen; ++ rdlen -= vlen; ++ wrlen += vlen; ++ } ++ ++ ret = xnpipe_send(rsk->minor, &mbuf->mh, ++ sublen + sizeof(*mbuf), ++ (flags & MSG_OOB) ? ++ XNPIPE_URGENT : XNPIPE_NORMAL); ++ ++ if (unlikely(ret < 0)) { ++ fail_freebuf: ++ xnheap_free(rsk->bufpool, mbuf); ++ fail_unlock: ++ rtdm_fd_unlock(rfd); ++ return ret; ++ } ++done: ++ rtdm_fd_unlock(rfd); ++ ++ return len; ++} ++ ++static ssize_t xddp_sendmsg(struct rtdm_fd *fd, ++ const struct user_msghdr *msg, int flags) ++{ ++ struct rtipc_private *priv = rtdm_fd_to_private(fd); ++ struct iovec iov_fast[RTDM_IOV_FASTMAX], *iov; ++ struct xddp_socket *sk = priv->state; ++ struct sockaddr_ipc daddr; ++ ssize_t ret; ++ ++ /* ++ * We accept MSG_DONTWAIT, but do not care about it, since ++ * writing to the real-time endpoint of a message pipe must be ++ * a non-blocking operation. ++ */ ++ if (flags & ~(MSG_MORE | MSG_OOB | MSG_DONTWAIT)) ++ return -EINVAL; ++ ++ /* ++ * MSG_MORE and MSG_OOB are mutually exclusive in our ++ * implementation. ++ */ ++ if ((flags & (MSG_MORE | MSG_OOB)) == (MSG_MORE | MSG_OOB)) ++ return -EINVAL; ++ ++ if (msg->msg_name) { ++ if (msg->msg_namelen != sizeof(struct sockaddr_ipc)) ++ return -EINVAL; ++ ++ /* Fetch the destination address to send to. */ ++ if (rtipc_get_arg(fd, &daddr, msg->msg_name, sizeof(daddr))) ++ return -EFAULT; ++ ++ if (daddr.sipc_port < 0 || ++ daddr.sipc_port >= CONFIG_XENO_OPT_PIPE_NRDEV) ++ return -EINVAL; ++ } else { ++ if (msg->msg_namelen != 0) ++ return -EINVAL; ++ daddr = sk->peer; ++ if (daddr.sipc_port < 0) ++ return -EDESTADDRREQ; ++ } ++ ++ if (msg->msg_iovlen >= UIO_MAXIOV) ++ return -EINVAL; ++ ++ /* Copy I/O vector in */ ++ ret = rtdm_get_iovec(fd, &iov, msg, iov_fast); ++ if (ret) ++ return ret; ++ ++ ret = __xddp_sendmsg(fd, iov, msg->msg_iovlen, flags, &daddr); ++ if (ret <= 0) { ++ rtdm_drop_iovec(iov, iov_fast); ++ return ret; ++ } ++ ++ /* Copy updated I/O vector back */ ++ return rtdm_put_iovec(fd, iov, msg, iov_fast) ?: ret; ++} ++ ++static ssize_t xddp_write(struct rtdm_fd *fd, ++ const void *buf, size_t len) ++{ ++ struct rtipc_private *priv = rtdm_fd_to_private(fd); ++ struct iovec iov = { .iov_base = (void *)buf, .iov_len = len }; ++ struct xddp_socket *sk = priv->state; ++ ++ if (sk->peer.sipc_port < 0) ++ return -EDESTADDRREQ; ++ ++ return __xddp_sendmsg(fd, &iov, 1, 0, &sk->peer); ++} ++ ++static int __xddp_bind_socket(struct rtipc_private *priv, ++ struct sockaddr_ipc *sa) ++{ ++ struct xddp_socket *sk = priv->state; ++ struct xnpipe_operations ops; ++ rtdm_lockctx_t s; ++ size_t poolsz; ++ void *poolmem; ++ int ret = 0; ++ ++ if (sa->sipc_family != AF_RTIPC) ++ return -EINVAL; ++ ++ /* Allow special port -1 for auto-selection. */ ++ if (sa->sipc_port < -1 || ++ sa->sipc_port >= CONFIG_XENO_OPT_PIPE_NRDEV) ++ return -EINVAL; ++ ++ cobalt_atomic_enter(s); ++ if (test_bit(_XDDP_BOUND, &sk->status) || ++ __test_and_set_bit(_XDDP_BINDING, &sk->status)) ++ ret = -EADDRINUSE; ++ cobalt_atomic_leave(s); ++ if (ret) ++ return ret; ++ ++ poolsz = sk->poolsz; ++ if (poolsz > 0) { ++ poolsz = PAGE_ALIGN(poolsz); ++ poolsz += PAGE_ALIGN(sk->reqbufsz); ++ poolmem = xnheap_vmalloc(poolsz); ++ if (poolmem == NULL) { ++ ret = -ENOMEM; ++ goto fail; ++ } ++ ++ ret = xnheap_init(&sk->privpool, poolmem, poolsz); ++ if (ret) { ++ xnheap_vfree(poolmem); ++ goto fail; ++ } ++ ++ sk->bufpool = &sk->privpool; ++ } else ++ sk->bufpool = &cobalt_heap; ++ ++ if (sk->reqbufsz > 0) { ++ sk->buffer = xnheap_alloc(sk->bufpool, sk->reqbufsz); ++ if (sk->buffer == NULL) { ++ ret = -ENOMEM; ++ goto fail_freeheap; ++ } ++ sk->curbufsz = sk->reqbufsz; ++ } ++ ++ sk->fd = rtdm_private_to_fd(priv); ++ ++ ops.output = &__xddp_output_handler; ++ ops.input = &__xddp_input_handler; ++ ops.alloc_ibuf = &__xddp_alloc_handler; ++ ops.free_ibuf = &__xddp_free_handler; ++ ops.free_obuf = &__xddp_free_handler; ++ ops.release = &__xddp_release_handler; ++ ++ ret = xnpipe_connect(sa->sipc_port, &ops, sk); ++ if (ret < 0) { ++ if (ret == -EBUSY) ++ ret = -EADDRINUSE; ++ fail_freeheap: ++ if (poolsz > 0) { ++ xnheap_destroy(&sk->privpool); ++ xnheap_vfree(poolmem); ++ } ++ fail: ++ clear_bit(_XDDP_BINDING, &sk->status); ++ return ret; ++ } ++ ++ sk->minor = ret; ++ sa->sipc_port = ret; ++ sk->name = *sa; ++ /* Set default destination if unset at binding time. */ ++ if (sk->peer.sipc_port < 0) ++ sk->peer = *sa; ++ ++ if (poolsz > 0) ++ xnheap_set_name(sk->bufpool, "xddp-pool@%d", sa->sipc_port); ++ ++ if (*sk->label) { ++ ret = xnregistry_enter(sk->label, sk, &sk->handle, ++ &__xddp_pnode.node); ++ if (ret) { ++ /* The release handler will cleanup the pool for us. */ ++ xnpipe_disconnect(sk->minor); ++ return ret; ++ } ++ } ++ ++ cobalt_atomic_enter(s); ++ portmap[sk->minor] = rtdm_private_to_fd(priv); ++ __clear_bit(_XDDP_BINDING, &sk->status); ++ __set_bit(_XDDP_BOUND, &sk->status); ++ if (xnselect_signal(&priv->send_block, POLLOUT)) ++ xnsched_run(); ++ cobalt_atomic_leave(s); ++ ++ return 0; ++} ++ ++static int __xddp_connect_socket(struct xddp_socket *sk, ++ struct sockaddr_ipc *sa) ++{ ++ struct sockaddr_ipc _sa; ++ struct xddp_socket *rsk; ++ int ret, resched = 0; ++ rtdm_lockctx_t s; ++ xnhandle_t h; ++ ++ if (sa == NULL) { ++ _sa = nullsa; ++ sa = &_sa; ++ goto set_assoc; ++ } ++ ++ if (sa->sipc_family != AF_RTIPC) ++ return -EINVAL; ++ ++ if (sa->sipc_port < -1 || ++ sa->sipc_port >= CONFIG_XENO_OPT_PIPE_NRDEV) ++ return -EINVAL; ++ /* ++ * - If a valid sipc_port is passed in the [0..NRDEV-1] range, ++ * it is used verbatim and the connection succeeds ++ * immediately, regardless of whether the destination is ++ * bound at the time of the call. ++ * ++ * - If sipc_port is -1 and a label was set via XDDP_LABEL, ++ * connect() blocks for the requested amount of time (see ++ * SO_RCVTIMEO) until a socket is bound to the same label. ++ * ++ * - If sipc_port is -1 and no label is given, the default ++ * destination address is cleared, meaning that any subsequent ++ * write() to the socket will return -EDESTADDRREQ, until a ++ * valid destination address is set via connect() or bind(). ++ * ++ * - In all other cases, -EINVAL is returned. ++ */ ++ if (sa->sipc_port < 0 && *sk->label) { ++ ret = xnregistry_bind(sk->label, ++ sk->timeout, XN_RELATIVE, &h); ++ if (ret) ++ return ret; ++ ++ cobalt_atomic_enter(s); ++ rsk = xnregistry_lookup(h, NULL); ++ if (rsk == NULL || rsk->magic != XDDP_SOCKET_MAGIC) ++ ret = -EINVAL; ++ else { ++ /* Fetch labeled port number. */ ++ sa->sipc_port = rsk->minor; ++ resched = xnselect_signal(&sk->priv->send_block, POLLOUT); ++ } ++ cobalt_atomic_leave(s); ++ if (ret) ++ return ret; ++ } else if (sa->sipc_port < 0) ++ sa = &nullsa; ++set_assoc: ++ cobalt_atomic_enter(s); ++ if (!test_bit(_XDDP_BOUND, &sk->status)) ++ /* Set default name. */ ++ sk->name = *sa; ++ /* Set default destination. */ ++ sk->peer = *sa; ++ if (sa->sipc_port < 0) ++ __clear_bit(_XDDP_CONNECTED, &sk->status); ++ else ++ __set_bit(_XDDP_CONNECTED, &sk->status); ++ if (resched) ++ xnsched_run(); ++ cobalt_atomic_leave(s); ++ ++ return 0; ++} ++ ++static int __xddp_setsockopt(struct xddp_socket *sk, ++ struct rtdm_fd *fd, ++ void *arg) ++{ ++ int (*monitor)(struct rtdm_fd *fd, int event, long arg); ++ struct _rtdm_setsockopt_args sopt; ++ struct rtipc_port_label plabel; ++ struct timeval tv; ++ rtdm_lockctx_t s; ++ size_t len; ++ int ret; ++ ++ ret = rtipc_get_sockoptin(fd, &sopt, arg); ++ if (ret) ++ return ret; ++ ++ if (sopt.level == SOL_SOCKET) { ++ switch (sopt.optname) { ++ ++ case SO_RCVTIMEO: ++ ret = rtipc_get_timeval(fd, &tv, sopt.optval, sopt.optlen); ++ if (ret) ++ return ret; ++ sk->timeout = rtipc_timeval_to_ns(&tv); ++ break; ++ ++ default: ++ ret = -EINVAL; ++ } ++ ++ return ret; ++ } ++ ++ if (sopt.level != SOL_XDDP) ++ return -ENOPROTOOPT; ++ ++ switch (sopt.optname) { ++ ++ case XDDP_BUFSZ: ++ ret = rtipc_get_length(fd, &len, sopt.optval, sopt.optlen); ++ if (ret) ++ return ret; ++ if (len > 0) { ++ len += sizeof(struct xddp_message); ++ if (sk->bufpool && ++ len > xnheap_get_size(sk->bufpool)) { ++ return -EINVAL; ++ } ++ } ++ rtdm_lock_get_irqsave(&sk->lock, s); ++ sk->reqbufsz = len; ++ if (len != sk->curbufsz && ++ !test_bit(_XDDP_SYNCWAIT, &sk->status) && ++ test_bit(_XDDP_BOUND, &sk->status)) ++ ret = __xddp_resize_streambuf(sk); ++ rtdm_lock_put_irqrestore(&sk->lock, s); ++ break; ++ ++ case XDDP_POOLSZ: ++ ret = rtipc_get_length(fd, &len, sopt.optval, sopt.optlen); ++ if (ret) ++ return ret; ++ if (len == 0) ++ return -EINVAL; ++ cobalt_atomic_enter(s); ++ if (test_bit(_XDDP_BOUND, &sk->status) || ++ test_bit(_XDDP_BINDING, &sk->status)) ++ ret = -EALREADY; ++ else ++ sk->poolsz = len; ++ cobalt_atomic_leave(s); ++ break; ++ ++ case XDDP_MONITOR: ++ /* Monitoring is available from kernel-space only. */ ++ if (rtdm_fd_is_user(fd)) ++ return -EPERM; ++ if (sopt.optlen != sizeof(monitor)) ++ return -EINVAL; ++ if (rtipc_get_arg(NULL, &monitor, sopt.optval, sizeof(monitor))) ++ return -EFAULT; ++ sk->monitor = monitor; ++ break; ++ ++ case XDDP_LABEL: ++ if (sopt.optlen < sizeof(plabel)) ++ return -EINVAL; ++ if (rtipc_get_arg(fd, &plabel, sopt.optval, sizeof(plabel))) ++ return -EFAULT; ++ cobalt_atomic_enter(s); ++ if (test_bit(_XDDP_BOUND, &sk->status) || ++ test_bit(_XDDP_BINDING, &sk->status)) ++ ret = -EALREADY; ++ else { ++ strcpy(sk->label, plabel.label); ++ sk->label[XNOBJECT_NAME_LEN-1] = 0; ++ } ++ cobalt_atomic_leave(s); ++ break; ++ ++ default: ++ ret = -EINVAL; ++ } ++ ++ return ret; ++} ++ ++static int __xddp_getsockopt(struct xddp_socket *sk, ++ struct rtdm_fd *fd, ++ void *arg) ++{ ++ struct _rtdm_getsockopt_args sopt; ++ struct rtipc_port_label plabel; ++ struct timeval tv; ++ rtdm_lockctx_t s; ++ socklen_t len; ++ int ret; ++ ++ ret = rtipc_get_sockoptout(fd, &sopt, arg); ++ if (ret) ++ return ret; ++ ++ if (rtipc_get_arg(fd, &len, sopt.optlen, sizeof(len))) ++ return -EFAULT; ++ ++ if (sopt.level == SOL_SOCKET) { ++ switch (sopt.optname) { ++ ++ case SO_RCVTIMEO: ++ rtipc_ns_to_timeval(&tv, sk->timeout); ++ ret = rtipc_put_timeval(fd, sopt.optval, &tv, len); ++ if (ret) ++ return ret; ++ break; ++ ++ default: ++ ret = -EINVAL; ++ } ++ ++ return ret; ++ } ++ ++ if (sopt.level != SOL_XDDP) ++ return -ENOPROTOOPT; ++ ++ switch (sopt.optname) { ++ ++ case XDDP_LABEL: ++ if (len < sizeof(plabel)) ++ return -EINVAL; ++ cobalt_atomic_enter(s); ++ strcpy(plabel.label, sk->label); ++ cobalt_atomic_leave(s); ++ if (rtipc_put_arg(fd, sopt.optval, &plabel, sizeof(plabel))) ++ return -EFAULT; ++ break; ++ ++ default: ++ ret = -EINVAL; ++ } ++ ++ return ret; ++} ++ ++static int __xddp_ioctl(struct rtdm_fd *fd, ++ unsigned int request, void *arg) ++{ ++ struct rtipc_private *priv = rtdm_fd_to_private(fd); ++ struct sockaddr_ipc saddr, *saddrp = &saddr; ++ struct xddp_socket *sk = priv->state; ++ int ret = 0; ++ ++ switch (request) { ++ ++ COMPAT_CASE(_RTIOC_CONNECT): ++ ret = rtipc_get_sockaddr(fd, &saddrp, arg); ++ if (ret == 0) ++ ret = __xddp_connect_socket(sk, saddrp); ++ break; ++ ++ COMPAT_CASE(_RTIOC_BIND): ++ ret = rtipc_get_sockaddr(fd, &saddrp, arg); ++ if (ret) ++ return ret; ++ if (saddrp == NULL) ++ return -EFAULT; ++ ret = __xddp_bind_socket(priv, saddrp); ++ break; ++ ++ COMPAT_CASE(_RTIOC_GETSOCKNAME): ++ ret = rtipc_put_sockaddr(fd, arg, &sk->name); ++ break; ++ ++ COMPAT_CASE(_RTIOC_GETPEERNAME): ++ ret = rtipc_put_sockaddr(fd, arg, &sk->peer); ++ break; ++ ++ COMPAT_CASE(_RTIOC_SETSOCKOPT): ++ ret = __xddp_setsockopt(sk, fd, arg); ++ break; ++ ++ COMPAT_CASE(_RTIOC_GETSOCKOPT): ++ ret = __xddp_getsockopt(sk, fd, arg); ++ break; ++ ++ case _RTIOC_LISTEN: ++ COMPAT_CASE(_RTIOC_ACCEPT): ++ ret = -EOPNOTSUPP; ++ break; ++ ++ case _RTIOC_SHUTDOWN: ++ ret = -ENOTCONN; ++ break; ++ ++ default: ++ ret = -EINVAL; ++ } ++ ++ return ret; ++} ++ ++static int xddp_ioctl(struct rtdm_fd *fd, ++ unsigned int request, void *arg) ++{ ++ int ret; ++ ++ switch (request) { ++ COMPAT_CASE(_RTIOC_BIND): ++ if (rtdm_in_rt_context()) ++ return -ENOSYS; /* Try downgrading to NRT */ ++ default: ++ ret = __xddp_ioctl(fd, request, arg); ++ } ++ ++ return ret; ++} ++ ++static unsigned int xddp_pollstate(struct rtdm_fd *fd) /* atomic */ ++{ ++ struct rtipc_private *priv = rtdm_fd_to_private(fd); ++ struct xddp_socket *sk = priv->state, *rsk; ++ unsigned int mask = 0, pollstate; ++ struct rtdm_fd *rfd; ++ ++ pollstate = __xnpipe_pollstate(sk->minor); ++ if (test_bit(_XDDP_BOUND, &sk->status)) ++ mask |= (pollstate & POLLIN); ++ ++ /* ++ * If the socket is connected, POLLOUT means that the peer ++ * exists, is bound and can receive data. Otherwise POLLOUT is ++ * always set, assuming the client is likely to use explicit ++ * addressing in send operations. ++ */ ++ if (test_bit(_XDDP_CONNECTED, &sk->status)) { ++ rfd = portmap[sk->peer.sipc_port]; ++ if (rfd) { ++ rsk = rtipc_fd_to_state(rfd); ++ mask |= (pollstate & POLLOUT); ++ } ++ } else ++ mask |= POLLOUT; ++ ++ return mask; ++} ++ ++struct rtipc_protocol xddp_proto_driver = { ++ .proto_name = "xddp", ++ .proto_statesz = sizeof(struct xddp_socket), ++ .proto_ops = { ++ .socket = xddp_socket, ++ .close = xddp_close, ++ .recvmsg = xddp_recvmsg, ++ .sendmsg = xddp_sendmsg, ++ .read = xddp_read, ++ .write = xddp_write, ++ .ioctl = xddp_ioctl, ++ .pollstate = xddp_pollstate, ++ } ++}; +--- linux/drivers/xenomai/ipc/bufp.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/ipc/bufp.c 2021-04-07 16:01:26.264635542 +0800 +@@ -0,0 +1,1100 @@ ++/** ++ * This file is part of the Xenomai project. ++ * ++ * @note Copyright (C) 2009 Philippe Gerum ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "internal.h" ++ ++#define BUFP_SOCKET_MAGIC 0xa61a61a6 ++ ++struct bufp_socket { ++ int magic; ++ struct sockaddr_ipc name; ++ struct sockaddr_ipc peer; ++ ++ void *bufmem; ++ size_t bufsz; ++ u_long status; ++ xnhandle_t handle; ++ char label[XNOBJECT_NAME_LEN]; ++ ++ off_t rdoff; ++ off_t rdrsvd; ++ int rdsem; ++ off_t wroff; ++ off_t wrrsvd; ++ int wrsem; ++ size_t fillsz; ++ rtdm_event_t i_event; ++ rtdm_event_t o_event; ++ ++ nanosecs_rel_t rx_timeout; ++ nanosecs_rel_t tx_timeout; ++ ++ struct rtipc_private *priv; ++}; ++ ++struct bufp_wait_context { ++ struct rtipc_wait_context wc; ++ size_t len; ++ struct bufp_socket *sk; ++}; ++ ++static struct sockaddr_ipc nullsa = { ++ .sipc_family = AF_RTIPC, ++ .sipc_port = -1 ++}; ++ ++static struct xnmap *portmap; ++ ++#define _BUFP_BINDING 0 ++#define _BUFP_BOUND 1 ++#define _BUFP_CONNECTED 2 ++ ++#ifdef CONFIG_XENO_OPT_VFILE ++ ++static char *__bufp_link_target(void *obj) ++{ ++ struct bufp_socket *sk = obj; ++ ++ return kasformat("%d", sk->name.sipc_port); ++} ++ ++extern struct xnptree rtipc_ptree; ++ ++static struct xnpnode_link __bufp_pnode = { ++ .node = { ++ .dirname = "bufp", ++ .root = &rtipc_ptree, ++ .ops = &xnregistry_vlink_ops, ++ }, ++ .target = __bufp_link_target, ++}; ++ ++#else /* !CONFIG_XENO_OPT_VFILE */ ++ ++static struct xnpnode_link __bufp_pnode = { ++ .node = { ++ .dirname = "bufp", ++ }, ++}; ++ ++#endif /* !CONFIG_XENO_OPT_VFILE */ ++ ++static int bufp_socket(struct rtdm_fd *fd) ++{ ++ struct rtipc_private *priv = rtdm_fd_to_private(fd); ++ struct bufp_socket *sk = priv->state; ++ ++ sk->magic = BUFP_SOCKET_MAGIC; ++ sk->name = nullsa; /* Unbound */ ++ sk->peer = nullsa; ++ sk->bufmem = NULL; ++ sk->bufsz = 0; ++ sk->rdoff = 0; ++ sk->wroff = 0; ++ sk->fillsz = 0; ++ sk->rdrsvd = 0; ++ sk->wrrsvd = 0; ++ sk->status = 0; ++ sk->handle = 0; ++ sk->rx_timeout = RTDM_TIMEOUT_INFINITE; ++ sk->tx_timeout = RTDM_TIMEOUT_INFINITE; ++ *sk->label = 0; ++ rtdm_event_init(&sk->i_event, 0); ++ rtdm_event_init(&sk->o_event, 0); ++ sk->priv = priv; ++ ++ return 0; ++} ++ ++static void bufp_close(struct rtdm_fd *fd) ++{ ++ struct rtipc_private *priv = rtdm_fd_to_private(fd); ++ struct bufp_socket *sk = priv->state; ++ rtdm_lockctx_t s; ++ ++ rtdm_event_destroy(&sk->i_event); ++ rtdm_event_destroy(&sk->o_event); ++ ++ if (test_bit(_BUFP_BOUND, &sk->status)) { ++ if (sk->name.sipc_port > -1) { ++ cobalt_atomic_enter(s); ++ xnmap_remove(portmap, sk->name.sipc_port); ++ cobalt_atomic_leave(s); ++ } ++ ++ if (sk->handle) ++ xnregistry_remove(sk->handle); ++ ++ if (sk->bufmem) ++ xnheap_vfree(sk->bufmem); ++ } ++ ++ kfree(sk); ++} ++ ++static ssize_t __bufp_readbuf(struct bufp_socket *sk, ++ struct xnbufd *bufd, ++ int flags) ++{ ++ struct bufp_wait_context wait, *bufwc; ++ struct rtipc_wait_context *wc; ++ struct xnthread *waiter; ++ size_t rbytes, n, avail; ++ ssize_t len, ret, xret; ++ rtdm_toseq_t toseq; ++ rtdm_lockctx_t s; ++ off_t rdoff; ++ int resched; ++ ++ len = bufd->b_len; ++ ++ rtdm_toseq_init(&toseq, sk->rx_timeout); ++ ++ cobalt_atomic_enter(s); ++redo: ++ for (;;) { ++ /* ++ * We should be able to read a complete message of the ++ * requested length, or block. ++ */ ++ avail = sk->fillsz - sk->rdrsvd; ++ if (avail < len) ++ goto wait; ++ ++ /* Reserve a read slot into the circular buffer. */ ++ rdoff = sk->rdoff; ++ sk->rdoff = (rdoff + len) % sk->bufsz; ++ sk->rdrsvd += len; ++ sk->rdsem++; ++ rbytes = ret = len; ++ ++ do { ++ if (rdoff + rbytes > sk->bufsz) ++ n = sk->bufsz - rdoff; ++ else ++ n = rbytes; ++ /* ++ * Drop the lock before copying data to ++ * user. The read slot is consumed in any ++ * case: the non-copied portion of the message ++ * is lost on bad write. ++ */ ++ cobalt_atomic_leave(s); ++ xret = xnbufd_copy_from_kmem(bufd, sk->bufmem + rdoff, n); ++ cobalt_atomic_enter(s); ++ if (xret < 0) { ++ ret = -EFAULT; ++ break; ++ } ++ ++ rbytes -= n; ++ rdoff = (rdoff + n) % sk->bufsz; ++ } while (rbytes > 0); ++ ++ if (--sk->rdsem > 0) ++ goto out; ++ ++ resched = 0; ++ if (sk->fillsz == sk->bufsz) /* -> becomes writable */ ++ resched |= xnselect_signal(&sk->priv->send_block, POLLOUT); ++ ++ sk->fillsz -= sk->rdrsvd; ++ sk->rdrsvd = 0; ++ ++ if (sk->fillsz == 0) /* -> becomes non-readable */ ++ resched |= xnselect_signal(&sk->priv->recv_block, 0); ++ ++ /* ++ * Wake up all threads pending on the output wait ++ * queue, if we freed enough room for the leading one ++ * to post its message. ++ */ ++ waiter = rtipc_peek_wait_head(&sk->o_event); ++ if (waiter == NULL) ++ goto out; ++ ++ wc = rtipc_get_wait_context(waiter); ++ XENO_BUG_ON(COBALT, wc == NULL); ++ bufwc = container_of(wc, struct bufp_wait_context, wc); ++ if (bufwc->len + sk->fillsz <= sk->bufsz) ++ /* This call rescheds internally. */ ++ rtdm_event_pulse(&sk->o_event); ++ else if (resched) ++ xnsched_run(); ++ /* ++ * We cannot fail anymore once some data has been ++ * copied via the buffer descriptor, so no need to ++ * check for any reason to invalidate the latter. ++ */ ++ goto out; ++ ++ wait: ++ if (flags & MSG_DONTWAIT) { ++ ret = -EWOULDBLOCK; ++ break; ++ } ++ ++ /* ++ * Check whether writers are already waiting for ++ * sending data, while we are about to wait for ++ * receiving some. In such a case, we have a ++ * pathological use of the buffer. We must allow for a ++ * short read to prevent a deadlock. ++ */ ++ if (sk->fillsz > 0 && rtipc_peek_wait_head(&sk->o_event)) { ++ len = sk->fillsz; ++ goto redo; ++ } ++ ++ wait.len = len; ++ wait.sk = sk; ++ rtipc_prepare_wait(&wait.wc); ++ /* ++ * Keep the nucleus lock across the wait call, so that ++ * we don't miss a pulse. ++ */ ++ ret = rtdm_event_timedwait(&sk->i_event, ++ sk->rx_timeout, &toseq); ++ if (unlikely(ret)) ++ break; ++ } ++out: ++ cobalt_atomic_leave(s); ++ ++ return ret; ++} ++ ++static ssize_t __bufp_recvmsg(struct rtdm_fd *fd, ++ struct iovec *iov, int iovlen, int flags, ++ struct sockaddr_ipc *saddr) ++{ ++ struct rtipc_private *priv = rtdm_fd_to_private(fd); ++ struct bufp_socket *sk = priv->state; ++ ssize_t len, wrlen, vlen, ret; ++ struct xnbufd bufd; ++ int nvec; ++ ++ if (!test_bit(_BUFP_BOUND, &sk->status)) ++ return -EAGAIN; ++ ++ len = rtdm_get_iov_flatlen(iov, iovlen); ++ if (len == 0) ++ return 0; ++ /* ++ * We may only return complete messages to readers, so there ++ * is no point in waiting for messages which are larger than ++ * what the buffer can hold. ++ */ ++ if (len > sk->bufsz) ++ return -EINVAL; ++ ++ /* ++ * Write "len" bytes from the buffer to the vector cells. Each ++ * cell is handled as a separate message. ++ */ ++ for (nvec = 0, wrlen = len; nvec < iovlen && wrlen > 0; nvec++) { ++ if (iov[nvec].iov_len == 0) ++ continue; ++ vlen = wrlen >= iov[nvec].iov_len ? iov[nvec].iov_len : wrlen; ++ if (rtdm_fd_is_user(fd)) { ++ xnbufd_map_uread(&bufd, iov[nvec].iov_base, vlen); ++ ret = __bufp_readbuf(sk, &bufd, flags); ++ xnbufd_unmap_uread(&bufd); ++ } else { ++ xnbufd_map_kread(&bufd, iov[nvec].iov_base, vlen); ++ ret = __bufp_readbuf(sk, &bufd, flags); ++ xnbufd_unmap_kread(&bufd); ++ } ++ if (ret < 0) ++ return ret; ++ iov[nvec].iov_base += vlen; ++ iov[nvec].iov_len -= vlen; ++ wrlen -= vlen; ++ if (ret < vlen) ++ /* Short reads may happen in rare cases. */ ++ break; ++ } ++ ++ /* ++ * There is no way to determine who the sender was since we ++ * process data in byte-oriented mode, so we just copy our own ++ * sockaddr to send back a valid address. ++ */ ++ if (saddr) ++ *saddr = sk->name; ++ ++ return len - wrlen; ++} ++ ++static ssize_t bufp_recvmsg(struct rtdm_fd *fd, ++ struct user_msghdr *msg, int flags) ++{ ++ struct iovec iov_fast[RTDM_IOV_FASTMAX], *iov; ++ struct sockaddr_ipc saddr; ++ ssize_t ret; ++ ++ if (flags & ~MSG_DONTWAIT) ++ return -EINVAL; ++ ++ if (msg->msg_name) { ++ if (msg->msg_namelen < sizeof(struct sockaddr_ipc)) ++ return -EINVAL; ++ } else if (msg->msg_namelen != 0) ++ return -EINVAL; ++ ++ if (msg->msg_iovlen >= UIO_MAXIOV) ++ return -EINVAL; ++ ++ /* Copy I/O vector in */ ++ ret = rtdm_get_iovec(fd, &iov, msg, iov_fast); ++ if (ret) ++ return ret; ++ ++ ret = __bufp_recvmsg(fd, iov, msg->msg_iovlen, flags, &saddr); ++ if (ret <= 0) { ++ rtdm_drop_iovec(iov, iov_fast); ++ return ret; ++ } ++ ++ /* Copy the updated I/O vector back */ ++ if (rtdm_put_iovec(fd, iov, msg, iov_fast)) ++ return -EFAULT; ++ ++ /* Copy the source address if required. */ ++ if (msg->msg_name) { ++ if (rtipc_put_arg(fd, msg->msg_name, ++ &saddr, sizeof(saddr))) ++ return -EFAULT; ++ msg->msg_namelen = sizeof(struct sockaddr_ipc); ++ } ++ ++ return ret; ++} ++ ++static ssize_t bufp_read(struct rtdm_fd *fd, void *buf, size_t len) ++{ ++ struct iovec iov = { .iov_base = buf, .iov_len = len }; ++ ++ return __bufp_recvmsg(fd, &iov, 1, 0, NULL); ++} ++ ++static ssize_t __bufp_writebuf(struct bufp_socket *rsk, ++ struct bufp_socket *sk, ++ struct xnbufd *bufd, ++ int flags) ++{ ++ struct bufp_wait_context wait, *bufwc; ++ struct rtipc_wait_context *wc; ++ struct xnthread *waiter; ++ size_t wbytes, n, avail; ++ ssize_t len, ret, xret; ++ rtdm_toseq_t toseq; ++ rtdm_lockctx_t s; ++ off_t wroff; ++ int resched; ++ ++ len = bufd->b_len; ++ ++ rtdm_toseq_init(&toseq, sk->tx_timeout); ++ ++ cobalt_atomic_enter(s); ++ ++ for (;;) { ++ /* ++ * No short or scattered writes: we should write the ++ * entire message atomically or block. ++ */ ++ avail = rsk->fillsz + rsk->wrrsvd; ++ if (avail + len > rsk->bufsz) ++ goto wait; ++ ++ /* Reserve a write slot into the circular buffer. */ ++ wroff = rsk->wroff; ++ rsk->wroff = (wroff + len) % rsk->bufsz; ++ rsk->wrrsvd += len; ++ rsk->wrsem++; ++ wbytes = ret = len; ++ ++ do { ++ if (wroff + wbytes > rsk->bufsz) ++ n = rsk->bufsz - wroff; ++ else ++ n = wbytes; ++ /* ++ * We have to drop the lock while reading in ++ * data, but we can't rollback on bad read ++ * from user because some other thread might ++ * have populated the memory ahead of our ++ * write slot already: bluntly clear the ++ * unavailable bytes on copy error. ++ */ ++ cobalt_atomic_leave(s); ++ xret = xnbufd_copy_to_kmem(rsk->bufmem + wroff, bufd, n); ++ cobalt_atomic_enter(s); ++ if (xret < 0) { ++ memset(rsk->bufmem + wroff, 0, n); ++ ret = -EFAULT; ++ break; ++ } ++ ++ wbytes -= n; ++ wroff = (wroff + n) % rsk->bufsz; ++ } while (wbytes > 0); ++ ++ if (--rsk->wrsem > 0) ++ goto out; ++ ++ resched = 0; ++ if (rsk->fillsz == 0) /* -> becomes readable */ ++ resched |= xnselect_signal(&rsk->priv->recv_block, POLLIN); ++ ++ rsk->fillsz += rsk->wrrsvd; ++ rsk->wrrsvd = 0; ++ ++ if (rsk->fillsz == rsk->bufsz) /* becomes non-writable */ ++ resched |= xnselect_signal(&rsk->priv->send_block, 0); ++ /* ++ * Wake up all threads pending on the input wait ++ * queue, if we accumulated enough data to feed the ++ * leading one. ++ */ ++ waiter = rtipc_peek_wait_head(&rsk->i_event); ++ if (waiter == NULL) ++ goto out; ++ ++ wc = rtipc_get_wait_context(waiter); ++ XENO_BUG_ON(COBALT, wc == NULL); ++ bufwc = container_of(wc, struct bufp_wait_context, wc); ++ if (bufwc->len <= rsk->fillsz) ++ rtdm_event_pulse(&rsk->i_event); ++ else if (resched) ++ xnsched_run(); ++ /* ++ * We cannot fail anymore once some data has been ++ * copied via the buffer descriptor, so no need to ++ * check for any reason to invalidate the latter. ++ */ ++ goto out; ++ wait: ++ if (flags & MSG_DONTWAIT) { ++ ret = -EWOULDBLOCK; ++ break; ++ } ++ ++ wait.len = len; ++ wait.sk = rsk; ++ rtipc_prepare_wait(&wait.wc); ++ /* ++ * Keep the nucleus lock across the wait call, so that ++ * we don't miss a pulse. ++ */ ++ ret = rtdm_event_timedwait(&rsk->o_event, ++ sk->tx_timeout, &toseq); ++ if (unlikely(ret)) ++ break; ++ } ++out: ++ cobalt_atomic_leave(s); ++ ++ return ret; ++} ++ ++static ssize_t __bufp_sendmsg(struct rtdm_fd *fd, ++ struct iovec *iov, int iovlen, int flags, ++ const struct sockaddr_ipc *daddr) ++{ ++ struct rtipc_private *priv = rtdm_fd_to_private(fd); ++ struct bufp_socket *sk = priv->state, *rsk; ++ ssize_t len, rdlen, vlen, ret = 0; ++ struct rtdm_fd *rfd; ++ struct xnbufd bufd; ++ rtdm_lockctx_t s; ++ int nvec; ++ ++ len = rtdm_get_iov_flatlen(iov, iovlen); ++ if (len == 0) ++ return 0; ++ ++ cobalt_atomic_enter(s); ++ rfd = xnmap_fetch_nocheck(portmap, daddr->sipc_port); ++ if (rfd && rtdm_fd_lock(rfd) < 0) ++ rfd = NULL; ++ cobalt_atomic_leave(s); ++ if (rfd == NULL) ++ return -ECONNRESET; ++ ++ rsk = rtipc_fd_to_state(rfd); ++ if (!test_bit(_BUFP_BOUND, &rsk->status)) { ++ rtdm_fd_unlock(rfd); ++ return -ECONNREFUSED; ++ } ++ ++ /* ++ * We may only send complete messages, so there is no point in ++ * accepting messages which are larger than what the buffer ++ * can hold. ++ */ ++ if (len > rsk->bufsz) { ++ ret = -EINVAL; ++ goto fail; ++ } ++ ++ /* ++ * Read "len" bytes to the buffer from the vector cells. Each ++ * cell is handled as a separate message. ++ */ ++ for (nvec = 0, rdlen = len; nvec < iovlen && rdlen > 0; nvec++) { ++ if (iov[nvec].iov_len == 0) ++ continue; ++ vlen = rdlen >= iov[nvec].iov_len ? iov[nvec].iov_len : rdlen; ++ if (rtdm_fd_is_user(fd)) { ++ xnbufd_map_uread(&bufd, iov[nvec].iov_base, vlen); ++ ret = __bufp_writebuf(rsk, sk, &bufd, flags); ++ xnbufd_unmap_uread(&bufd); ++ } else { ++ xnbufd_map_kread(&bufd, iov[nvec].iov_base, vlen); ++ ret = __bufp_writebuf(rsk, sk, &bufd, flags); ++ xnbufd_unmap_kread(&bufd); ++ } ++ if (ret < 0) ++ goto fail; ++ iov[nvec].iov_base += vlen; ++ iov[nvec].iov_len -= vlen; ++ rdlen -= vlen; ++ } ++ ++ rtdm_fd_unlock(rfd); ++ ++ return len - rdlen; ++fail: ++ rtdm_fd_unlock(rfd); ++ ++ return ret; ++} ++ ++static ssize_t bufp_sendmsg(struct rtdm_fd *fd, ++ const struct user_msghdr *msg, int flags) ++{ ++ struct rtipc_private *priv = rtdm_fd_to_private(fd); ++ struct iovec iov_fast[RTDM_IOV_FASTMAX], *iov; ++ struct bufp_socket *sk = priv->state; ++ struct sockaddr_ipc daddr; ++ ssize_t ret; ++ ++ if (flags & ~MSG_DONTWAIT) ++ return -EINVAL; ++ ++ if (msg->msg_name) { ++ if (msg->msg_namelen != sizeof(struct sockaddr_ipc)) ++ return -EINVAL; ++ ++ /* Fetch the destination address to send to. */ ++ if (rtipc_get_arg(fd, &daddr, msg->msg_name, sizeof(daddr))) ++ return -EFAULT; ++ ++ if (daddr.sipc_port < 0 || ++ daddr.sipc_port >= CONFIG_XENO_OPT_BUFP_NRPORT) ++ return -EINVAL; ++ } else { ++ if (msg->msg_namelen != 0) ++ return -EINVAL; ++ daddr = sk->peer; ++ if (daddr.sipc_port < 0) ++ return -EDESTADDRREQ; ++ } ++ ++ if (msg->msg_iovlen >= UIO_MAXIOV) ++ return -EINVAL; ++ ++ /* Copy I/O vector in */ ++ ret = rtdm_get_iovec(fd, &iov, msg, iov_fast); ++ if (ret) ++ return ret; ++ ++ ret = __bufp_sendmsg(fd, iov, msg->msg_iovlen, flags, &daddr); ++ if (ret <= 0) { ++ rtdm_drop_iovec(iov, iov_fast); ++ return ret; ++ } ++ ++ /* Copy updated I/O vector back */ ++ return rtdm_put_iovec(fd, iov, msg, iov_fast) ?: ret; ++} ++ ++static ssize_t bufp_write(struct rtdm_fd *fd, ++ const void *buf, size_t len) ++{ ++ struct rtipc_private *priv = rtdm_fd_to_private(fd); ++ struct iovec iov = { .iov_base = (void *)buf, .iov_len = len }; ++ struct bufp_socket *sk = priv->state; ++ ++ if (sk->peer.sipc_port < 0) ++ return -EDESTADDRREQ; ++ ++ return __bufp_sendmsg(fd, &iov, 1, 0, &sk->peer); ++} ++ ++static int __bufp_bind_socket(struct rtipc_private *priv, ++ struct sockaddr_ipc *sa) ++{ ++ struct bufp_socket *sk = priv->state; ++ int ret = 0, port; ++ struct rtdm_fd *fd; ++ rtdm_lockctx_t s; ++ ++ if (sa->sipc_family != AF_RTIPC) ++ return -EINVAL; ++ ++ if (sa->sipc_port < -1 || ++ sa->sipc_port >= CONFIG_XENO_OPT_BUFP_NRPORT) ++ return -EINVAL; ++ ++ cobalt_atomic_enter(s); ++ if (test_bit(_BUFP_BOUND, &sk->status) || ++ __test_and_set_bit(_BUFP_BINDING, &sk->status)) ++ ret = -EADDRINUSE; ++ cobalt_atomic_leave(s); ++ ++ if (ret) ++ return ret; ++ ++ /* Will auto-select a free port number if unspec (-1). */ ++ port = sa->sipc_port; ++ fd = rtdm_private_to_fd(priv); ++ cobalt_atomic_enter(s); ++ port = xnmap_enter(portmap, port, fd); ++ cobalt_atomic_leave(s); ++ if (port < 0) ++ return port == -EEXIST ? -EADDRINUSE : -ENOMEM; ++ ++ sa->sipc_port = port; ++ ++ /* ++ * The caller must have told us how much memory is needed for ++ * buffer space via setsockopt(), before we got there. ++ */ ++ if (sk->bufsz == 0) ++ return -ENOBUFS; ++ ++ sk->bufmem = xnheap_vmalloc(sk->bufsz); ++ if (sk->bufmem == NULL) { ++ ret = -ENOMEM; ++ goto fail; ++ } ++ ++ sk->name = *sa; ++ /* Set default destination if unset at binding time. */ ++ if (sk->peer.sipc_port < 0) ++ sk->peer = *sa; ++ ++ if (*sk->label) { ++ ret = xnregistry_enter(sk->label, sk, ++ &sk->handle, &__bufp_pnode.node); ++ if (ret) { ++ xnheap_vfree(sk->bufmem); ++ goto fail; ++ } ++ } ++ ++ cobalt_atomic_enter(s); ++ __clear_bit(_BUFP_BINDING, &sk->status); ++ __set_bit(_BUFP_BOUND, &sk->status); ++ if (xnselect_signal(&priv->send_block, POLLOUT)) ++ xnsched_run(); ++ cobalt_atomic_leave(s); ++ ++ return 0; ++fail: ++ xnmap_remove(portmap, port); ++ clear_bit(_BUFP_BINDING, &sk->status); ++ ++ return ret; ++} ++ ++static int __bufp_connect_socket(struct bufp_socket *sk, ++ struct sockaddr_ipc *sa) ++{ ++ struct sockaddr_ipc _sa; ++ struct bufp_socket *rsk; ++ int ret, resched = 0; ++ rtdm_lockctx_t s; ++ xnhandle_t h; ++ ++ if (sa == NULL) { ++ _sa = nullsa; ++ sa = &_sa; ++ goto set_assoc; ++ } ++ ++ if (sa->sipc_family != AF_RTIPC) ++ return -EINVAL; ++ ++ if (sa->sipc_port < -1 || ++ sa->sipc_port >= CONFIG_XENO_OPT_BUFP_NRPORT) ++ return -EINVAL; ++ /* ++ * - If a valid sipc_port is passed in the [0..NRPORT-1] range, ++ * it is used verbatim and the connection succeeds ++ * immediately, regardless of whether the destination is ++ * bound at the time of the call. ++ * ++ * - If sipc_port is -1 and a label was set via BUFP_LABEL, ++ * connect() blocks for the requested amount of time (see ++ * SO_RCVTIMEO) until a socket is bound to the same label. ++ * ++ * - If sipc_port is -1 and no label is given, the default ++ * destination address is cleared, meaning that any subsequent ++ * write() to the socket will return -EDESTADDRREQ, until a ++ * valid destination address is set via connect() or bind(). ++ * ++ * - In all other cases, -EINVAL is returned. ++ */ ++ if (sa->sipc_port < 0 && *sk->label) { ++ ret = xnregistry_bind(sk->label, ++ sk->rx_timeout, XN_RELATIVE, &h); ++ if (ret) ++ return ret; ++ ++ cobalt_atomic_enter(s); ++ rsk = xnregistry_lookup(h, NULL); ++ if (rsk == NULL || rsk->magic != BUFP_SOCKET_MAGIC) ++ ret = -EINVAL; ++ else { ++ /* Fetch labeled port number. */ ++ sa->sipc_port = rsk->name.sipc_port; ++ resched = xnselect_signal(&sk->priv->send_block, POLLOUT); ++ } ++ cobalt_atomic_leave(s); ++ if (ret) ++ return ret; ++ } else if (sa->sipc_port < 0) ++ sa = &nullsa; ++set_assoc: ++ cobalt_atomic_enter(s); ++ if (!test_bit(_BUFP_BOUND, &sk->status)) ++ /* Set default name. */ ++ sk->name = *sa; ++ /* Set default destination. */ ++ sk->peer = *sa; ++ if (sa->sipc_port < 0) ++ __clear_bit(_BUFP_CONNECTED, &sk->status); ++ else ++ __set_bit(_BUFP_CONNECTED, &sk->status); ++ if (resched) ++ xnsched_run(); ++ cobalt_atomic_leave(s); ++ ++ return 0; ++} ++ ++static int __bufp_setsockopt(struct bufp_socket *sk, ++ struct rtdm_fd *fd, ++ void *arg) ++{ ++ struct _rtdm_setsockopt_args sopt; ++ struct rtipc_port_label plabel; ++ struct timeval tv; ++ rtdm_lockctx_t s; ++ size_t len; ++ int ret; ++ ++ ret = rtipc_get_sockoptin(fd, &sopt, arg); ++ if (ret) ++ return ret; ++ ++ if (sopt.level == SOL_SOCKET) { ++ switch (sopt.optname) { ++ ++ case SO_RCVTIMEO: ++ ret = rtipc_get_timeval(fd, &tv, sopt.optval, sopt.optlen); ++ if (ret) ++ return ret; ++ sk->rx_timeout = rtipc_timeval_to_ns(&tv); ++ break; ++ ++ case SO_SNDTIMEO: ++ ret = rtipc_get_timeval(fd, &tv, sopt.optval, sopt.optlen); ++ if (ret) ++ return ret; ++ sk->tx_timeout = rtipc_timeval_to_ns(&tv); ++ break; ++ ++ default: ++ ret = -EINVAL; ++ } ++ ++ return ret; ++ } ++ ++ if (sopt.level != SOL_BUFP) ++ return -ENOPROTOOPT; ++ ++ switch (sopt.optname) { ++ ++ case BUFP_BUFSZ: ++ ret = rtipc_get_length(fd, &len, sopt.optval, sopt.optlen); ++ if (ret) ++ return ret; ++ if (len == 0) ++ return -EINVAL; ++ cobalt_atomic_enter(s); ++ /* ++ * We may not do this more than once, and we have to ++ * do this before the first binding. ++ */ ++ if (test_bit(_BUFP_BOUND, &sk->status) || ++ test_bit(_BUFP_BINDING, &sk->status)) ++ ret = -EALREADY; ++ else ++ sk->bufsz = len; ++ cobalt_atomic_leave(s); ++ break; ++ ++ case BUFP_LABEL: ++ if (sopt.optlen < sizeof(plabel)) ++ return -EINVAL; ++ if (rtipc_get_arg(fd, &plabel, sopt.optval, sizeof(plabel))) ++ return -EFAULT; ++ cobalt_atomic_enter(s); ++ /* ++ * We may attach a label to a client socket which was ++ * previously bound in BUFP. ++ */ ++ if (test_bit(_BUFP_BINDING, &sk->status)) ++ ret = -EALREADY; ++ else { ++ strcpy(sk->label, plabel.label); ++ sk->label[XNOBJECT_NAME_LEN-1] = 0; ++ } ++ cobalt_atomic_leave(s); ++ break; ++ ++ default: ++ ret = -EINVAL; ++ } ++ ++ return ret; ++} ++ ++static int __bufp_getsockopt(struct bufp_socket *sk, ++ struct rtdm_fd *fd, ++ void *arg) ++{ ++ struct _rtdm_getsockopt_args sopt; ++ struct rtipc_port_label plabel; ++ struct timeval tv; ++ rtdm_lockctx_t s; ++ socklen_t len; ++ int ret; ++ ++ ret = rtipc_get_sockoptout(fd, &sopt, arg); ++ if (ret) ++ return ret; ++ ++ if (rtipc_get_arg(fd, &len, sopt.optlen, sizeof(len))) ++ return -EFAULT; ++ ++ if (sopt.level == SOL_SOCKET) { ++ switch (sopt.optname) { ++ ++ case SO_RCVTIMEO: ++ rtipc_ns_to_timeval(&tv, sk->rx_timeout); ++ ret = rtipc_put_timeval(fd, sopt.optval, &tv, len); ++ if (ret) ++ return ret; ++ break; ++ ++ case SO_SNDTIMEO: ++ rtipc_ns_to_timeval(&tv, sk->tx_timeout); ++ ret = rtipc_put_timeval(fd, sopt.optval, &tv, len); ++ if (ret) ++ return ret; ++ break; ++ ++ default: ++ ret = -EINVAL; ++ } ++ ++ return ret; ++ } ++ ++ if (sopt.level != SOL_BUFP) ++ return -ENOPROTOOPT; ++ ++ switch (sopt.optname) { ++ ++ case BUFP_LABEL: ++ if (len < sizeof(plabel)) ++ return -EINVAL; ++ cobalt_atomic_enter(s); ++ strcpy(plabel.label, sk->label); ++ cobalt_atomic_leave(s); ++ if (rtipc_put_arg(fd, sopt.optval, &plabel, sizeof(plabel))) ++ return -EFAULT; ++ break; ++ ++ default: ++ ret = -EINVAL; ++ } ++ ++ return ret; ++} ++ ++static int __bufp_ioctl(struct rtdm_fd *fd, ++ unsigned int request, void *arg) ++{ ++ struct rtipc_private *priv = rtdm_fd_to_private(fd); ++ struct sockaddr_ipc saddr, *saddrp = &saddr; ++ struct bufp_socket *sk = priv->state; ++ int ret = 0; ++ ++ switch (request) { ++ ++ COMPAT_CASE(_RTIOC_CONNECT): ++ ret = rtipc_get_sockaddr(fd, &saddrp, arg); ++ if (ret) ++ return ret; ++ ret = __bufp_connect_socket(sk, saddrp); ++ break; ++ ++ COMPAT_CASE(_RTIOC_BIND): ++ ret = rtipc_get_sockaddr(fd, &saddrp, arg); ++ if (ret) ++ return ret; ++ if (saddrp == NULL) ++ return -EFAULT; ++ ret = __bufp_bind_socket(priv, saddrp); ++ break; ++ ++ COMPAT_CASE(_RTIOC_GETSOCKNAME): ++ ret = rtipc_put_sockaddr(fd, arg, &sk->name); ++ break; ++ ++ COMPAT_CASE(_RTIOC_GETPEERNAME): ++ ret = rtipc_put_sockaddr(fd, arg, &sk->peer); ++ break; ++ ++ COMPAT_CASE(_RTIOC_SETSOCKOPT): ++ ret = __bufp_setsockopt(sk, fd, arg); ++ break; ++ ++ COMPAT_CASE(_RTIOC_GETSOCKOPT): ++ ret = __bufp_getsockopt(sk, fd, arg); ++ break; ++ ++ case _RTIOC_LISTEN: ++ COMPAT_CASE(_RTIOC_ACCEPT): ++ ret = -EOPNOTSUPP; ++ break; ++ ++ case _RTIOC_SHUTDOWN: ++ ret = -ENOTCONN; ++ break; ++ ++ default: ++ ret = -EINVAL; ++ } ++ ++ return ret; ++} ++ ++static int bufp_ioctl(struct rtdm_fd *fd, ++ unsigned int request, void *arg) ++{ ++ int ret; ++ ++ switch (request) { ++ COMPAT_CASE(_RTIOC_BIND): ++ if (rtdm_in_rt_context()) ++ return -ENOSYS; /* Try downgrading to NRT */ ++ default: ++ ret = __bufp_ioctl(fd, request, arg); ++ } ++ ++ return ret; ++} ++ ++static unsigned int bufp_pollstate(struct rtdm_fd *fd) /* atomic */ ++{ ++ struct rtipc_private *priv = rtdm_fd_to_private(fd); ++ struct bufp_socket *sk = priv->state, *rsk; ++ unsigned int mask = 0; ++ struct rtdm_fd *rfd; ++ ++ if (test_bit(_BUFP_BOUND, &sk->status) && sk->fillsz > 0) ++ mask |= POLLIN; ++ ++ /* ++ * If the socket is connected, POLLOUT means that the peer ++ * exists, is bound and can receive data. Otherwise POLLOUT is ++ * always set, assuming the client is likely to use explicit ++ * addressing in send operations. ++ */ ++ if (test_bit(_BUFP_CONNECTED, &sk->status)) { ++ rfd = xnmap_fetch_nocheck(portmap, sk->peer.sipc_port); ++ if (rfd) { ++ rsk = rtipc_fd_to_state(rfd); ++ if (rsk->fillsz < rsk->bufsz) ++ mask |= POLLOUT; ++ } ++ } else ++ mask |= POLLOUT; ++ ++ return mask; ++} ++ ++static int bufp_init(void) ++{ ++ portmap = xnmap_create(CONFIG_XENO_OPT_BUFP_NRPORT, 0, 0); ++ if (portmap == NULL) ++ return -ENOMEM; ++ ++ return 0; ++} ++ ++static void bufp_exit(void) ++{ ++ xnmap_delete(portmap); ++} ++ ++struct rtipc_protocol bufp_proto_driver = { ++ .proto_name = "bufp", ++ .proto_statesz = sizeof(struct bufp_socket), ++ .proto_init = bufp_init, ++ .proto_exit = bufp_exit, ++ .proto_ops = { ++ .socket = bufp_socket, ++ .close = bufp_close, ++ .recvmsg = bufp_recvmsg, ++ .sendmsg = bufp_sendmsg, ++ .read = bufp_read, ++ .write = bufp_write, ++ .ioctl = bufp_ioctl, ++ .pollstate = bufp_pollstate, ++ } ++}; +--- linux/drivers/xenomai/ipc/internal.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/ipc/internal.h 2021-04-07 16:01:26.260635548 +0800 +@@ -0,0 +1,134 @@ ++/** ++ * This file is part of the Xenomai project. ++ * ++ * @note Copyright (C) 2009 Philippe Gerum ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#ifndef _RTIPC_INTERNAL_H ++#define _RTIPC_INTERNAL_H ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++struct rtipc_protocol; ++ ++struct rtipc_private { ++ struct rtipc_protocol *proto; ++ DECLARE_XNSELECT(send_block); ++ DECLARE_XNSELECT(recv_block); ++ void *state; ++}; ++ ++struct rtipc_protocol { ++ const char *proto_name; ++ int proto_statesz; ++ int (*proto_init)(void); ++ void (*proto_exit)(void); ++ struct { ++ int (*socket)(struct rtdm_fd *fd); ++ void (*close)(struct rtdm_fd *fd); ++ ssize_t (*recvmsg)(struct rtdm_fd *fd, ++ struct user_msghdr *msg, int flags); ++ ssize_t (*sendmsg)(struct rtdm_fd *fd, ++ const struct user_msghdr *msg, int flags); ++ ssize_t (*read)(struct rtdm_fd *fd, ++ void *buf, size_t len); ++ ssize_t (*write)(struct rtdm_fd *fd, ++ const void *buf, size_t len); ++ int (*ioctl)(struct rtdm_fd *fd, ++ unsigned int request, void *arg); ++ unsigned int (*pollstate)(struct rtdm_fd *fd); ++ } proto_ops; ++}; ++ ++static inline void *rtipc_fd_to_state(struct rtdm_fd *fd) ++{ ++ struct rtipc_private *p = rtdm_fd_to_private(fd); ++ return p->state; ++} ++ ++static inline nanosecs_rel_t rtipc_timeval_to_ns(const struct timeval *tv) ++{ ++ nanosecs_rel_t ns = tv->tv_usec * 1000; ++ ++ if (tv->tv_sec) ++ ns += (nanosecs_rel_t)tv->tv_sec * 1000000000UL; ++ ++ return ns; ++} ++ ++static inline void rtipc_ns_to_timeval(struct timeval *tv, nanosecs_rel_t ns) ++{ ++ unsigned long nsecs; ++ ++ tv->tv_sec = xnclock_divrem_billion(ns, &nsecs); ++ tv->tv_usec = nsecs / 1000; ++} ++ ++int rtipc_get_sockaddr(struct rtdm_fd *fd, ++ struct sockaddr_ipc **saddrp, ++ const void *arg); ++ ++int rtipc_put_sockaddr(struct rtdm_fd *fd, void *arg, ++ const struct sockaddr_ipc *saddr); ++ ++int rtipc_get_sockoptout(struct rtdm_fd *fd, ++ struct _rtdm_getsockopt_args *sopt, ++ const void *arg); ++ ++int rtipc_put_sockoptout(struct rtdm_fd *fd, void *arg, ++ const struct _rtdm_getsockopt_args *sopt); ++ ++int rtipc_get_sockoptin(struct rtdm_fd *fd, ++ struct _rtdm_setsockopt_args *sopt, ++ const void *arg); ++ ++int rtipc_get_timeval(struct rtdm_fd *fd, struct timeval *tv, ++ const void *arg, size_t arglen); ++ ++int rtipc_put_timeval(struct rtdm_fd *fd, void *arg, ++ const struct timeval *tv, size_t arglen); ++ ++int rtipc_get_length(struct rtdm_fd *fd, size_t *lenp, ++ const void *arg, size_t arglen); ++ ++int rtipc_get_arg(struct rtdm_fd *fd, void *dst, const void *src, ++ size_t len); ++ ++int rtipc_put_arg(struct rtdm_fd *fd, void *dst, const void *src, ++ size_t len); ++ ++extern struct rtipc_protocol xddp_proto_driver; ++ ++extern struct rtipc_protocol iddp_proto_driver; ++ ++extern struct rtipc_protocol bufp_proto_driver; ++ ++extern struct xnptree rtipc_ptree; ++ ++#define rtipc_wait_context xnthread_wait_context ++#define rtipc_prepare_wait xnthread_prepare_wait ++#define rtipc_get_wait_context xnthread_get_wait_context ++#define rtipc_peek_wait_head(obj) xnsynch_peek_pendq(&(obj)->synch_base) ++ ++#define COMPAT_CASE(__op) case __op __COMPAT_CASE(__op ## _COMPAT) ++ ++#endif /* !_RTIPC_INTERNAL_H */ +--- linux/drivers/xenomai/ipc/rtipc.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/ipc/rtipc.c 2021-04-07 16:01:26.255635555 +0800 +@@ -0,0 +1,523 @@ ++/** ++ * This file is part of the Xenomai project. ++ * ++ * @note Copyright (C) 2009 Philippe Gerum ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include "internal.h" ++ ++MODULE_DESCRIPTION("Real-time IPC interface"); ++MODULE_AUTHOR("Philippe Gerum "); ++MODULE_LICENSE("GPL"); ++ ++static struct rtipc_protocol *protocols[IPCPROTO_MAX] = { ++#ifdef CONFIG_XENO_DRIVERS_RTIPC_XDDP ++ [IPCPROTO_XDDP - 1] = &xddp_proto_driver, ++#endif ++#ifdef CONFIG_XENO_DRIVERS_RTIPC_IDDP ++ [IPCPROTO_IDDP - 1] = &iddp_proto_driver, ++#endif ++#ifdef CONFIG_XENO_DRIVERS_RTIPC_BUFP ++ [IPCPROTO_BUFP - 1] = &bufp_proto_driver, ++#endif ++}; ++ ++DEFINE_XNPTREE(rtipc_ptree, "rtipc"); ++ ++int rtipc_get_arg(struct rtdm_fd *fd, void *dst, const void *src, size_t len) ++{ ++ if (!rtdm_fd_is_user(fd)) { ++ memcpy(dst, src, len); ++ return 0; ++ } ++ ++ return rtdm_copy_from_user(fd, dst, src, len); ++} ++ ++int rtipc_put_arg(struct rtdm_fd *fd, void *dst, const void *src, size_t len) ++{ ++ if (!rtdm_fd_is_user(fd)) { ++ memcpy(dst, src, len); ++ return 0; ++ } ++ ++ return rtdm_copy_to_user(fd, dst, src, len); ++} ++ ++int rtipc_get_sockaddr(struct rtdm_fd *fd, struct sockaddr_ipc **saddrp, ++ const void *arg) ++{ ++ const struct _rtdm_setsockaddr_args *p; ++ struct _rtdm_setsockaddr_args sreq; ++ int ret; ++ ++ if (!rtdm_fd_is_user(fd)) { ++ p = arg; ++ if (p->addrlen > 0) { ++ if (p->addrlen != sizeof(**saddrp)) ++ return -EINVAL; ++ memcpy(*saddrp, p->addr, sizeof(**saddrp)); ++ } else { ++ if (p->addr) ++ return -EINVAL; ++ *saddrp = NULL; ++ } ++ return 0; ++ } ++ ++#ifdef CONFIG_XENO_ARCH_SYS3264 ++ if (rtdm_fd_is_compat(fd)) { ++ struct compat_rtdm_setsockaddr_args csreq; ++ ret = rtdm_safe_copy_from_user(fd, &csreq, arg, sizeof(csreq)); ++ if (ret) ++ return ret; ++ if (csreq.addrlen > 0) { ++ if (csreq.addrlen != sizeof(**saddrp)) ++ return -EINVAL; ++ return rtdm_safe_copy_from_user(fd, *saddrp, ++ compat_ptr(csreq.addr), ++ sizeof(**saddrp)); ++ } ++ if (csreq.addr) ++ return -EINVAL; ++ ++ *saddrp = NULL; ++ ++ return 0; ++ } ++#endif ++ ++ ret = rtdm_safe_copy_from_user(fd, &sreq, arg, sizeof(sreq)); ++ if (ret) ++ return ret; ++ if (sreq.addrlen > 0) { ++ if (sreq.addrlen != sizeof(**saddrp)) ++ return -EINVAL; ++ return rtdm_safe_copy_from_user(fd, *saddrp, ++ sreq.addr, sizeof(**saddrp)); ++ } ++ if (sreq.addr) ++ return -EINVAL; ++ ++ *saddrp = NULL; ++ ++ return 0; ++} ++ ++int rtipc_put_sockaddr(struct rtdm_fd *fd, void *arg, ++ const struct sockaddr_ipc *saddr) ++{ ++ const struct _rtdm_getsockaddr_args *p; ++ struct _rtdm_getsockaddr_args sreq; ++ socklen_t len; ++ int ret; ++ ++ if (!rtdm_fd_is_user(fd)) { ++ p = arg; ++ if (*p->addrlen < sizeof(*saddr)) ++ return -EINVAL; ++ memcpy(p->addr, saddr, sizeof(*saddr)); ++ *p->addrlen = sizeof(*saddr); ++ return 0; ++ } ++ ++#ifdef CONFIG_XENO_ARCH_SYS3264 ++ if (rtdm_fd_is_compat(fd)) { ++ struct compat_rtdm_getsockaddr_args csreq; ++ ret = rtdm_safe_copy_from_user(fd, &csreq, arg, sizeof(csreq)); ++ if (ret) ++ return ret; ++ ++ ret = rtdm_safe_copy_from_user(fd, &len, ++ compat_ptr(csreq.addrlen), ++ sizeof(len)); ++ if (ret) ++ return ret; ++ ++ if (len < sizeof(*saddr)) ++ return -EINVAL; ++ ++ ret = rtdm_safe_copy_to_user(fd, compat_ptr(csreq.addr), ++ saddr, sizeof(*saddr)); ++ if (ret) ++ return ret; ++ ++ len = sizeof(*saddr); ++ return rtdm_safe_copy_to_user(fd, compat_ptr(csreq.addrlen), ++ &len, sizeof(len)); ++ } ++#endif ++ ++ sreq.addr = NULL; ++ sreq.addrlen = NULL; ++ ret = rtdm_safe_copy_from_user(fd, &sreq, arg, sizeof(sreq)); ++ if (ret) ++ return ret; ++ ++ ret = rtdm_safe_copy_from_user(fd, &len, sreq.addrlen, sizeof(len)); ++ if (ret) ++ return ret; ++ ++ if (len < sizeof(*saddr)) ++ return -EINVAL; ++ ++ ret = rtdm_safe_copy_to_user(fd, sreq.addr, saddr, sizeof(*saddr)); ++ if (ret) ++ return ret; ++ ++ len = sizeof(*saddr); ++ ++ return rtdm_safe_copy_to_user(fd, sreq.addrlen, &len, sizeof(len)); ++} ++ ++int rtipc_get_sockoptout(struct rtdm_fd *fd, struct _rtdm_getsockopt_args *sopt, ++ const void *arg) ++{ ++ if (!rtdm_fd_is_user(fd)) { ++ *sopt = *(struct _rtdm_getsockopt_args *)arg; ++ return 0; ++ } ++ ++#ifdef CONFIG_XENO_ARCH_SYS3264 ++ if (rtdm_fd_is_compat(fd)) { ++ struct compat_rtdm_getsockopt_args csopt; ++ int ret; ++ ret = rtdm_safe_copy_from_user(fd, &csopt, arg, sizeof(csopt)); ++ if (ret) ++ return ret; ++ sopt->level = csopt.level; ++ sopt->optname = csopt.optname; ++ sopt->optval = compat_ptr(csopt.optval); ++ sopt->optlen = compat_ptr(csopt.optlen); ++ return 0; ++ } ++#endif ++ ++ return rtdm_safe_copy_from_user(fd, sopt, arg, sizeof(*sopt)); ++} ++ ++int rtipc_put_sockoptout(struct rtdm_fd *fd, void *arg, ++ const struct _rtdm_getsockopt_args *sopt) ++{ ++ if (!rtdm_fd_is_user(fd)) { ++ *(struct _rtdm_getsockopt_args *)arg = *sopt; ++ return 0; ++ } ++ ++#ifdef CONFIG_XENO_ARCH_SYS3264 ++ if (rtdm_fd_is_compat(fd)) { ++ struct compat_rtdm_getsockopt_args csopt; ++ int ret; ++ csopt.level = sopt->level; ++ csopt.optname = sopt->optname; ++ csopt.optval = ptr_to_compat(sopt->optval); ++ csopt.optlen = ptr_to_compat(sopt->optlen); ++ ret = rtdm_safe_copy_to_user(fd, arg, &csopt, sizeof(csopt)); ++ if (ret) ++ return ret; ++ return 0; ++ } ++#endif ++ ++ return rtdm_safe_copy_to_user(fd, arg, sopt, sizeof(*sopt)); ++} ++ ++int rtipc_get_sockoptin(struct rtdm_fd *fd, struct _rtdm_setsockopt_args *sopt, ++ const void *arg) ++{ ++ if (!rtdm_fd_is_user(fd)) { ++ *sopt = *(struct _rtdm_setsockopt_args *)arg; ++ return 0; ++ } ++ ++#ifdef CONFIG_XENO_ARCH_SYS3264 ++ if (rtdm_fd_is_compat(fd)) { ++ struct compat_rtdm_setsockopt_args csopt; ++ int ret; ++ ret = rtdm_safe_copy_from_user(fd, &csopt, arg, sizeof(csopt)); ++ if (ret) ++ return ret; ++ sopt->level = csopt.level; ++ sopt->optname = csopt.optname; ++ sopt->optval = compat_ptr(csopt.optval); ++ sopt->optlen = csopt.optlen; ++ return 0; ++ } ++#endif ++ ++ return rtdm_safe_copy_from_user(fd, sopt, arg, sizeof(*sopt)); ++} ++ ++int rtipc_get_timeval(struct rtdm_fd *fd, struct timeval *tv, ++ const void *arg, size_t arglen) ++{ ++#ifdef CONFIG_XENO_ARCH_SYS3264 ++ if (rtdm_fd_is_compat(fd)) { ++ if (arglen != sizeof(struct compat_timeval)) ++ return -EINVAL; ++ return sys32_get_timeval(tv, arg); ++ } ++#endif ++ ++ if (arglen != sizeof(*tv)) ++ return -EINVAL; ++ ++ if (!rtdm_fd_is_user(fd)) { ++ *tv = *(struct timeval *)arg; ++ return 0; ++ } ++ ++ return rtdm_safe_copy_from_user(fd, tv, arg, sizeof(*tv)); ++} ++ ++int rtipc_put_timeval(struct rtdm_fd *fd, void *arg, ++ const struct timeval *tv, size_t arglen) ++{ ++#ifdef CONFIG_XENO_ARCH_SYS3264 ++ if (rtdm_fd_is_compat(fd)) { ++ if (arglen != sizeof(struct compat_timeval)) ++ return -EINVAL; ++ return sys32_put_timeval(arg, tv); ++ } ++#endif ++ ++ if (arglen != sizeof(*tv)) ++ return -EINVAL; ++ ++ if (!rtdm_fd_is_user(fd)) { ++ *(struct timeval *)arg = *tv; ++ return 0; ++ } ++ ++ return rtdm_safe_copy_to_user(fd, arg, tv, sizeof(*tv)); ++} ++ ++int rtipc_get_length(struct rtdm_fd *fd, size_t *lenp, ++ const void *arg, size_t arglen) ++{ ++#ifdef CONFIG_XENO_ARCH_SYS3264 ++ if (rtdm_fd_is_compat(fd)) { ++ const compat_size_t *csz; ++ if (arglen != sizeof(*csz)) ++ return -EINVAL; ++ csz = arg; ++ return csz == NULL || ++ !access_rok(csz, sizeof(*csz)) || ++ __xn_get_user(*lenp, csz) ? -EFAULT : 0; ++ } ++#endif ++ ++ if (arglen != sizeof(size_t)) ++ return -EINVAL; ++ ++ if (!rtdm_fd_is_user(fd)) { ++ *lenp = *(size_t *)arg; ++ return 0; ++ } ++ ++ return rtdm_safe_copy_from_user(fd, lenp, arg, sizeof(*lenp)); ++} ++ ++static int rtipc_socket(struct rtdm_fd *fd, int protocol) ++{ ++ struct rtipc_protocol *proto; ++ struct rtipc_private *priv; ++ int ret; ++ ++ if (protocol < 0 || protocol >= IPCPROTO_MAX) ++ return -EPROTONOSUPPORT; ++ ++ if (protocol == IPCPROTO_IPC) ++ /* Default protocol is IDDP */ ++ protocol = IPCPROTO_IDDP; ++ ++ proto = protocols[protocol - 1]; ++ if (proto == NULL) /* Not compiled in? */ ++ return -ENOPROTOOPT; ++ ++ priv = rtdm_fd_to_private(fd); ++ priv->proto = proto; ++ priv->state = kmalloc(proto->proto_statesz, GFP_KERNEL); ++ if (priv->state == NULL) ++ return -ENOMEM; ++ ++ xnselect_init(&priv->send_block); ++ xnselect_init(&priv->recv_block); ++ ++ ret = proto->proto_ops.socket(fd); ++ if (ret) ++ kfree(priv->state); ++ ++ return ret; ++} ++ ++static void rtipc_close(struct rtdm_fd *fd) ++{ ++ struct rtipc_private *priv = rtdm_fd_to_private(fd); ++ /* ++ * CAUTION: priv->state shall be released by the ++ * proto_ops.close() handler when appropriate (which may be ++ * done asynchronously later, see XDDP). ++ */ ++ priv->proto->proto_ops.close(fd); ++ xnselect_destroy(&priv->recv_block); ++ xnselect_destroy(&priv->send_block); ++} ++ ++static ssize_t rtipc_recvmsg(struct rtdm_fd *fd, ++ struct user_msghdr *msg, int flags) ++{ ++ struct rtipc_private *priv = rtdm_fd_to_private(fd); ++ return priv->proto->proto_ops.recvmsg(fd, msg, flags); ++} ++ ++static ssize_t rtipc_sendmsg(struct rtdm_fd *fd, ++ const struct user_msghdr *msg, int flags) ++{ ++ struct rtipc_private *priv = rtdm_fd_to_private(fd); ++ return priv->proto->proto_ops.sendmsg(fd, msg, flags); ++} ++ ++static ssize_t rtipc_read(struct rtdm_fd *fd, ++ void *buf, size_t len) ++{ ++ struct rtipc_private *priv = rtdm_fd_to_private(fd); ++ return priv->proto->proto_ops.read(fd, buf, len); ++} ++ ++static ssize_t rtipc_write(struct rtdm_fd *fd, ++ const void *buf, size_t len) ++{ ++ struct rtipc_private *priv = rtdm_fd_to_private(fd); ++ return priv->proto->proto_ops.write(fd, buf, len); ++} ++ ++static int rtipc_ioctl(struct rtdm_fd *fd, ++ unsigned int request, void *arg) ++{ ++ struct rtipc_private *priv = rtdm_fd_to_private(fd); ++ return priv->proto->proto_ops.ioctl(fd, request, arg); ++} ++ ++static int rtipc_select(struct rtdm_fd *fd, struct xnselector *selector, ++ unsigned int type, unsigned int index) ++{ ++ struct rtipc_private *priv = rtdm_fd_to_private(fd); ++ struct xnselect_binding *binding; ++ unsigned int pollstate, mask; ++ struct xnselect *block; ++ spl_t s; ++ int ret; ++ ++ if (type != XNSELECT_READ && type != XNSELECT_WRITE) ++ return -EINVAL; ++ ++ binding = xnmalloc(sizeof(*binding)); ++ if (binding == NULL) ++ return -ENOMEM; ++ ++ cobalt_atomic_enter(s); ++ ++ pollstate = priv->proto->proto_ops.pollstate(fd); ++ ++ if (type == XNSELECT_READ) { ++ mask = pollstate & POLLIN; ++ block = &priv->recv_block; ++ } else { ++ mask = pollstate & POLLOUT; ++ block = &priv->send_block; ++ } ++ ++ ret = xnselect_bind(block, binding, selector, type, index, mask); ++ ++ cobalt_atomic_leave(s); ++ ++ if (ret) ++ xnfree(binding); ++ ++ return ret; ++} ++ ++static struct rtdm_driver rtipc_driver = { ++ .profile_info = RTDM_PROFILE_INFO(rtipc, ++ RTDM_CLASS_RTIPC, ++ RTDM_SUBCLASS_GENERIC, ++ 1), ++ .device_flags = RTDM_PROTOCOL_DEVICE, ++ .device_count = 1, ++ .context_size = sizeof(struct rtipc_private), ++ .protocol_family = PF_RTIPC, ++ .socket_type = SOCK_DGRAM, ++ .ops = { ++ .socket = rtipc_socket, ++ .close = rtipc_close, ++ .recvmsg_rt = rtipc_recvmsg, ++ .recvmsg_nrt = NULL, ++ .sendmsg_rt = rtipc_sendmsg, ++ .sendmsg_nrt = NULL, ++ .ioctl_rt = rtipc_ioctl, ++ .ioctl_nrt = rtipc_ioctl, ++ .read_rt = rtipc_read, ++ .read_nrt = NULL, ++ .write_rt = rtipc_write, ++ .write_nrt = NULL, ++ .select = rtipc_select, ++ }, ++}; ++ ++static struct rtdm_device device = { ++ .driver = &rtipc_driver, ++ .label = "rtipc", ++}; ++ ++int __init __rtipc_init(void) ++{ ++ int ret, n; ++ ++ if (!rtdm_available()) ++ return -ENOSYS; ++ ++ for (n = 0; n < IPCPROTO_MAX; n++) { ++ if (protocols[n] && protocols[n]->proto_init) { ++ ret = protocols[n]->proto_init(); ++ if (ret) ++ return ret; ++ } ++ } ++ ++ return rtdm_dev_register(&device); ++} ++ ++void __exit __rtipc_exit(void) ++{ ++ int n; ++ ++ rtdm_dev_unregister(&device); ++ ++ for (n = 0; n < IPCPROTO_MAX; n++) { ++ if (protocols[n] && protocols[n]->proto_exit) ++ protocols[n]->proto_exit(); ++ } ++} ++ ++module_init(__rtipc_init); ++module_exit(__rtipc_exit); +--- linux/drivers/xenomai/autotune/autotune.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/autotune/autotune.c 2021-04-07 16:01:26.250635562 +0800 +@@ -0,0 +1,820 @@ ++/* ++ * This file is part of the Xenomai project. ++ * ++ * Copyright (C) 2014 Philippe Gerum ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++MODULE_DESCRIPTION("Xenomai/cobalt core clock autotuner"); ++MODULE_AUTHOR("Philippe Gerum "); ++MODULE_LICENSE("GPL"); ++ ++/* Auto-tuning services for the Cobalt core clock. */ ++ ++#define SAMPLING_TIME 500000000UL ++#define ADJUSTMENT_STEP 500 ++#define WARMUP_STEPS 10 ++#define AUTOTUNE_STEPS 40 ++ ++#define progress(__tuner, __fmt, __args...) \ ++ do { \ ++ if (!(__tuner)->quiet) \ ++ printk(XENO_INFO "autotune(%s) " __fmt "\n", \ ++ (__tuner)->name, ##__args); \ ++ } while (0) ++ ++struct tuning_score { ++ int pmean; ++ int stddev; ++ int minlat; ++ unsigned int step; ++ unsigned int gravity; ++}; ++ ++struct tuner_state { ++ xnticks_t ideal; ++ xnticks_t step; ++ int min_lat; ++ int max_lat; ++ int prev_mean; ++ long long prev_sqs; ++ long long cur_sqs; ++ unsigned int sum; ++ unsigned int cur_samples; ++ unsigned int max_samples; ++}; ++ ++struct gravity_tuner { ++ const char *name; ++ unsigned int (*get_gravity)(struct gravity_tuner *tuner); ++ void (*set_gravity)(struct gravity_tuner *tuner, unsigned int gravity); ++ unsigned int (*adjust_gravity)(struct gravity_tuner *tuner, int adjust); ++ int (*init_tuner)(struct gravity_tuner *tuner); ++ int (*start_tuner)(struct gravity_tuner *tuner, xnticks_t start_time, ++ xnticks_t interval); ++ void (*destroy_tuner)(struct gravity_tuner *tuner); ++ struct tuner_state state; ++ rtdm_event_t done; ++ int status; ++ int quiet; ++ struct tuning_score scores[AUTOTUNE_STEPS]; ++ int nscores; ++ atomic_t refcount; ++}; ++ ++struct irq_gravity_tuner { ++ rtdm_timer_t timer; ++ struct gravity_tuner tuner; ++}; ++ ++struct kthread_gravity_tuner { ++ rtdm_task_t task; ++ rtdm_event_t barrier; ++ xnticks_t start_time; ++ xnticks_t interval; ++ struct gravity_tuner tuner; ++}; ++ ++struct uthread_gravity_tuner { ++ rtdm_timer_t timer; ++ rtdm_event_t pulse; ++ struct gravity_tuner tuner; ++}; ++ ++struct autotune_context { ++ struct gravity_tuner *tuner; ++ struct autotune_setup setup; ++ rtdm_lock_t tuner_lock; ++}; ++ ++static inline void init_tuner(struct gravity_tuner *tuner) ++{ ++ rtdm_event_init(&tuner->done, 0); ++ tuner->status = 0; ++ atomic_set(&tuner->refcount, 0); ++} ++ ++static inline void destroy_tuner(struct gravity_tuner *tuner) ++{ ++ rtdm_event_destroy(&tuner->done); ++} ++ ++static inline void done_sampling(struct gravity_tuner *tuner, ++ int status) ++{ ++ tuner->status = status; ++ rtdm_event_signal(&tuner->done); ++} ++ ++static int add_sample(struct gravity_tuner *tuner, xnticks_t timestamp) ++{ ++ struct tuner_state *state; ++ int n, delta, cur_mean; ++ ++ state = &tuner->state; ++ ++ delta = (int)(timestamp - state->ideal); ++ if (delta < state->min_lat) ++ state->min_lat = delta; ++ if (delta > state->max_lat) ++ state->max_lat = delta; ++ if (delta < 0) ++ delta = 0; ++ ++ state->sum += delta; ++ state->ideal += state->step; ++ n = ++state->cur_samples; ++ ++ /* ++ * Knuth citing Welford in TAOCP (Vol 2), single-pass ++ * computation of variance using a recurrence relation. ++ */ ++ if (n == 1) ++ state->prev_mean = delta; ++ else { ++ cur_mean = state->prev_mean + (delta - state->prev_mean) / n; ++ state->cur_sqs = state->prev_sqs + (delta - state->prev_mean) ++ * (delta - cur_mean); ++ state->prev_mean = cur_mean; ++ state->prev_sqs = state->cur_sqs; ++ } ++ ++ if (n >= state->max_samples) { ++ done_sampling(tuner, 0); ++ return 1; /* Finished. */ ++ } ++ ++ return 0; /* Keep going. */ ++} ++ ++static void timer_handler(rtdm_timer_t *timer) ++{ ++ struct irq_gravity_tuner *irq_tuner; ++ xnticks_t now; ++ ++ irq_tuner = container_of(timer, struct irq_gravity_tuner, timer); ++ now = xnclock_read_raw(&nkclock); ++ ++ if (add_sample(&irq_tuner->tuner, now)) ++ rtdm_timer_stop_in_handler(timer); ++} ++ ++static int init_irq_tuner(struct gravity_tuner *tuner) ++{ ++ struct irq_gravity_tuner *irq_tuner; ++ int ret; ++ ++ irq_tuner = container_of(tuner, struct irq_gravity_tuner, tuner); ++ ret = rtdm_timer_init(&irq_tuner->timer, timer_handler, "autotune"); ++ if (ret) ++ return ret; ++ ++ init_tuner(tuner); ++ ++ return 0; ++} ++ ++static void destroy_irq_tuner(struct gravity_tuner *tuner) ++{ ++ struct irq_gravity_tuner *irq_tuner; ++ ++ irq_tuner = container_of(tuner, struct irq_gravity_tuner, tuner); ++ rtdm_timer_destroy(&irq_tuner->timer); ++ destroy_tuner(tuner); ++} ++ ++static unsigned int get_irq_gravity(struct gravity_tuner *tuner) ++{ ++ return nkclock.gravity.irq; ++} ++ ++static void set_irq_gravity(struct gravity_tuner *tuner, unsigned int gravity) ++{ ++ nkclock.gravity.irq = gravity; ++} ++ ++static unsigned int adjust_irq_gravity(struct gravity_tuner *tuner, int adjust) ++{ ++ return nkclock.gravity.irq += adjust; ++} ++ ++static int start_irq_tuner(struct gravity_tuner *tuner, ++ xnticks_t start_time, xnticks_t interval) ++{ ++ struct irq_gravity_tuner *irq_tuner; ++ ++ irq_tuner = container_of(tuner, struct irq_gravity_tuner, tuner); ++ ++ return rtdm_timer_start(&irq_tuner->timer, start_time, ++ interval, RTDM_TIMERMODE_ABSOLUTE); ++} ++ ++struct irq_gravity_tuner irq_tuner = { ++ .tuner = { ++ .name = "irqhand", ++ .init_tuner = init_irq_tuner, ++ .destroy_tuner = destroy_irq_tuner, ++ .get_gravity = get_irq_gravity, ++ .set_gravity = set_irq_gravity, ++ .adjust_gravity = adjust_irq_gravity, ++ .start_tuner = start_irq_tuner, ++ }, ++}; ++ ++void task_handler(void *arg) ++{ ++ struct kthread_gravity_tuner *k_tuner = arg; ++ xnticks_t now; ++ int ret = 0; ++ ++ for (;;) { ++ if (rtdm_task_should_stop()) ++ break; ++ ++ ret = rtdm_event_wait(&k_tuner->barrier); ++ if (ret) ++ break; ++ ++ ret = rtdm_task_set_period(&k_tuner->task, k_tuner->start_time, ++ k_tuner->interval); ++ if (ret) ++ break; ++ ++ for (;;) { ++ ret = rtdm_task_wait_period(NULL); ++ if (ret && ret != -ETIMEDOUT) ++ goto out; ++ ++ now = xnclock_read_raw(&nkclock); ++ if (add_sample(&k_tuner->tuner, now)) { ++ rtdm_task_set_period(&k_tuner->task, 0, 0); ++ break; ++ } ++ } ++ } ++out: ++ done_sampling(&k_tuner->tuner, ret); ++ rtdm_task_destroy(&k_tuner->task); ++} ++ ++static int init_kthread_tuner(struct gravity_tuner *tuner) ++{ ++ struct kthread_gravity_tuner *k_tuner; ++ ++ init_tuner(tuner); ++ k_tuner = container_of(tuner, struct kthread_gravity_tuner, tuner); ++ rtdm_event_init(&k_tuner->barrier, 0); ++ ++ return rtdm_task_init(&k_tuner->task, "autotune", ++ task_handler, k_tuner, ++ RTDM_TASK_HIGHEST_PRIORITY, 0); ++} ++ ++static void destroy_kthread_tuner(struct gravity_tuner *tuner) ++{ ++ struct kthread_gravity_tuner *k_tuner; ++ ++ k_tuner = container_of(tuner, struct kthread_gravity_tuner, tuner); ++ rtdm_task_destroy(&k_tuner->task); ++ rtdm_event_destroy(&k_tuner->barrier); ++} ++ ++static unsigned int get_kthread_gravity(struct gravity_tuner *tuner) ++{ ++ return nkclock.gravity.kernel; ++} ++ ++static void set_kthread_gravity(struct gravity_tuner *tuner, unsigned int gravity) ++{ ++ nkclock.gravity.kernel = gravity; ++} ++ ++static unsigned int adjust_kthread_gravity(struct gravity_tuner *tuner, int adjust) ++{ ++ return nkclock.gravity.kernel += adjust; ++} ++ ++static int start_kthread_tuner(struct gravity_tuner *tuner, ++ xnticks_t start_time, xnticks_t interval) ++{ ++ struct kthread_gravity_tuner *k_tuner; ++ ++ k_tuner = container_of(tuner, struct kthread_gravity_tuner, tuner); ++ ++ k_tuner->start_time = start_time; ++ k_tuner->interval = interval; ++ rtdm_event_signal(&k_tuner->barrier); ++ ++ return 0; ++} ++ ++struct kthread_gravity_tuner kthread_tuner = { ++ .tuner = { ++ .name = "kthread", ++ .init_tuner = init_kthread_tuner, ++ .destroy_tuner = destroy_kthread_tuner, ++ .get_gravity = get_kthread_gravity, ++ .set_gravity = set_kthread_gravity, ++ .adjust_gravity = adjust_kthread_gravity, ++ .start_tuner = start_kthread_tuner, ++ }, ++}; ++ ++static void pulse_handler(rtdm_timer_t *timer) ++{ ++ struct uthread_gravity_tuner *u_tuner; ++ ++ u_tuner = container_of(timer, struct uthread_gravity_tuner, timer); ++ rtdm_event_signal(&u_tuner->pulse); ++} ++ ++static int init_uthread_tuner(struct gravity_tuner *tuner) ++{ ++ struct uthread_gravity_tuner *u_tuner; ++ int ret; ++ ++ u_tuner = container_of(tuner, struct uthread_gravity_tuner, tuner); ++ ret = rtdm_timer_init(&u_tuner->timer, pulse_handler, "autotune"); ++ if (ret) ++ return ret; ++ ++ xntimer_set_gravity(&u_tuner->timer, XNTIMER_UGRAVITY); /* gasp... */ ++ rtdm_event_init(&u_tuner->pulse, 0); ++ init_tuner(tuner); ++ ++ return 0; ++} ++ ++static void destroy_uthread_tuner(struct gravity_tuner *tuner) ++{ ++ struct uthread_gravity_tuner *u_tuner; ++ ++ u_tuner = container_of(tuner, struct uthread_gravity_tuner, tuner); ++ rtdm_timer_destroy(&u_tuner->timer); ++ rtdm_event_destroy(&u_tuner->pulse); ++} ++ ++static unsigned int get_uthread_gravity(struct gravity_tuner *tuner) ++{ ++ return nkclock.gravity.user; ++} ++ ++static void set_uthread_gravity(struct gravity_tuner *tuner, unsigned int gravity) ++{ ++ nkclock.gravity.user = gravity; ++} ++ ++static unsigned int adjust_uthread_gravity(struct gravity_tuner *tuner, int adjust) ++{ ++ return nkclock.gravity.user += adjust; ++} ++ ++static int start_uthread_tuner(struct gravity_tuner *tuner, ++ xnticks_t start_time, xnticks_t interval) ++{ ++ struct uthread_gravity_tuner *u_tuner; ++ ++ u_tuner = container_of(tuner, struct uthread_gravity_tuner, tuner); ++ ++ return rtdm_timer_start(&u_tuner->timer, start_time, ++ interval, RTDM_TIMERMODE_ABSOLUTE); ++} ++ ++static int add_uthread_sample(struct gravity_tuner *tuner, ++ nanosecs_abs_t user_timestamp) ++{ ++ struct uthread_gravity_tuner *u_tuner; ++ int ret; ++ ++ u_tuner = container_of(tuner, struct uthread_gravity_tuner, tuner); ++ ++ if (user_timestamp && ++ add_sample(tuner, xnclock_ns_to_ticks(&nkclock, user_timestamp))) { ++ rtdm_timer_stop(&u_tuner->timer); ++ /* Tell the caller to park until next round. */ ++ ret = -EPIPE; ++ } else ++ ret = rtdm_event_wait(&u_tuner->pulse); ++ ++ return ret; ++} ++ ++struct uthread_gravity_tuner uthread_tuner = { ++ .tuner = { ++ .name = "uthread", ++ .init_tuner = init_uthread_tuner, ++ .destroy_tuner = destroy_uthread_tuner, ++ .get_gravity = get_uthread_gravity, ++ .set_gravity = set_uthread_gravity, ++ .adjust_gravity = adjust_uthread_gravity, ++ .start_tuner = start_uthread_tuner, ++ }, ++}; ++ ++static inline void build_score(struct gravity_tuner *tuner, int step) ++{ ++ struct tuner_state *state = &tuner->state; ++ unsigned int variance, n; ++ ++ n = state->cur_samples; ++ tuner->scores[step].pmean = state->sum / n; ++ variance = n > 1 ? xnarch_llimd(state->cur_sqs, 1, n - 1) : 0; ++ tuner->scores[step].stddev = int_sqrt(variance); ++ tuner->scores[step].minlat = state->min_lat; ++ tuner->scores[step].gravity = tuner->get_gravity(tuner); ++ tuner->scores[step].step = step; ++ tuner->nscores++; ++} ++ ++static int cmp_score_mean(const void *c, const void *r) ++{ ++ const struct tuning_score *sc = c, *sr = r; ++ return sc->pmean - sr->pmean; ++} ++ ++static int cmp_score_stddev(const void *c, const void *r) ++{ ++ const struct tuning_score *sc = c, *sr = r; ++ return sc->stddev - sr->stddev; ++} ++ ++static int cmp_score_minlat(const void *c, const void *r) ++{ ++ const struct tuning_score *sc = c, *sr = r; ++ return sc->minlat - sr->minlat; ++} ++ ++static int cmp_score_gravity(const void *c, const void *r) ++{ ++ const struct tuning_score *sc = c, *sr = r; ++ return sc->gravity - sr->gravity; ++} ++ ++static int filter_mean(struct gravity_tuner *tuner) ++{ ++ sort(tuner->scores, tuner->nscores, sizeof(struct tuning_score), ++ cmp_score_mean, NULL); ++ ++ /* Top half of the best pondered means. */ ++ ++ return (tuner->nscores + 1) / 2; ++} ++ ++static int filter_stddev(struct gravity_tuner *tuner) ++{ ++ sort(tuner->scores, tuner->nscores, sizeof(struct tuning_score), ++ cmp_score_stddev, NULL); ++ ++ /* Top half of the best standard deviations. */ ++ ++ return (tuner->nscores + 1) / 2; ++} ++ ++static int filter_minlat(struct gravity_tuner *tuner) ++{ ++ sort(tuner->scores, tuner->nscores, sizeof(struct tuning_score), ++ cmp_score_minlat, NULL); ++ ++ /* Top half of the minimum latencies. */ ++ ++ return (tuner->nscores + 1) / 2; ++} ++ ++static int filter_gravity(struct gravity_tuner *tuner) ++{ ++ sort(tuner->scores, tuner->nscores, sizeof(struct tuning_score), ++ cmp_score_gravity, NULL); ++ ++ /* Smallest gravity required among the shortest latencies. */ ++ ++ return tuner->nscores; ++} ++ ++static void dump_scores(struct gravity_tuner *tuner) ++{ ++ int n; ++ ++ if (tuner->quiet) ++ return; ++ ++ for (n = 0; n < tuner->nscores; n++) ++ printk(KERN_INFO ++ ".. S%.2d pmean=%Ld stddev=%Lu minlat=%Lu gravity=%Lu\n", ++ tuner->scores[n].step, ++ xnclock_ticks_to_ns(&nkclock, tuner->scores[n].pmean), ++ xnclock_ticks_to_ns(&nkclock, tuner->scores[n].stddev), ++ xnclock_ticks_to_ns(&nkclock, tuner->scores[n].minlat), ++ xnclock_ticks_to_ns(&nkclock, tuner->scores[n].gravity)); ++} ++ ++static inline void filter_score(struct gravity_tuner *tuner, ++ int (*filter)(struct gravity_tuner *tuner)) ++{ ++ tuner->nscores = filter(tuner); ++ dump_scores(tuner); ++} ++ ++static int tune_gravity(struct gravity_tuner *tuner, int period) ++{ ++ struct tuner_state *state = &tuner->state; ++ int ret, step, gravity_limit, adjust; ++ unsigned int orig_gravity; ++ ++ state->step = xnclock_ns_to_ticks(&nkclock, period); ++ state->max_samples = SAMPLING_TIME / (period ?: 1); ++ orig_gravity = tuner->get_gravity(tuner); ++ tuner->set_gravity(tuner, 0); ++ tuner->nscores = 0; ++ /* Gravity adjustment step */ ++ adjust = xnclock_ns_to_ticks(&nkclock, ADJUSTMENT_STEP) ?: 1; ++ gravity_limit = 0; ++ progress(tuner, "warming up..."); ++ ++ for (step = 0; step < WARMUP_STEPS + AUTOTUNE_STEPS; step++) { ++ state->ideal = xnclock_read_raw(&nkclock) + state->step * WARMUP_STEPS; ++ state->min_lat = xnclock_ns_to_ticks(&nkclock, SAMPLING_TIME); ++ state->max_lat = 0; ++ state->prev_mean = 0; ++ state->prev_sqs = 0; ++ state->cur_sqs = 0; ++ state->sum = 0; ++ state->cur_samples = 0; ++ ++ ret = tuner->start_tuner(tuner, ++ xnclock_ticks_to_ns(&nkclock, state->ideal), ++ period); ++ if (ret) ++ goto fail; ++ ++ /* Tuner stops when posting. */ ++ ret = rtdm_event_wait(&tuner->done); ++ if (ret) ++ goto fail; ++ ++ ret = tuner->status; ++ if (ret) ++ goto fail; ++ ++ if (step < WARMUP_STEPS) { ++ if (state->min_lat > gravity_limit) { ++ gravity_limit = state->min_lat; ++ progress(tuner, "gravity limit set to %Lu ns (%d)", ++ xnclock_ticks_to_ns(&nkclock, gravity_limit), state->min_lat); ++ } ++ continue; ++ } ++ ++ /* ++ * We should not be early by more than the gravity ++ * value minus one tick, to account for the rounding ++ * error involved when the timer frequency is lower ++ * than 1e9 / ADJUSTMENT_STEP. ++ */ ++ if (state->min_lat < 0) { ++ if (tuner->get_gravity(tuner) < -state->min_lat - 1) { ++ printk(XENO_WARNING ++ "autotune(%s) failed with early shot (%Ld ns)\n", ++ tuner->name, ++ xnclock_ticks_to_ns(&nkclock, ++ -(tuner->get_gravity(tuner) + ++ state->min_lat))); ++ ret = -EAGAIN; ++ goto fail; ++ } ++ break; ++ } ++ ++ if (((step - WARMUP_STEPS) % 5) == 0) ++ progress(tuner, "calibrating... (slice %d)", ++ (step - WARMUP_STEPS) / 5 + 1); ++ ++ build_score(tuner, step - WARMUP_STEPS); ++ ++ /* ++ * Anticipating by more than the minimum latency ++ * detected at warmup would make no sense: cap the ++ * gravity we may try. ++ */ ++ if (tuner->adjust_gravity(tuner, adjust) > gravity_limit) { ++ progress(tuner, "beyond gravity limit at %Lu ns", ++ xnclock_ticks_to_ns(&nkclock, ++ tuner->get_gravity(tuner))); ++ break; ++ } ++ } ++ ++ progress(tuner, "calibration scores"); ++ dump_scores(tuner); ++ progress(tuner, "pondered mean filter"); ++ filter_score(tuner, filter_mean); ++ progress(tuner, "standard deviation filter"); ++ filter_score(tuner, filter_stddev); ++ progress(tuner, "minimum latency filter"); ++ filter_score(tuner, filter_minlat); ++ progress(tuner, "gravity filter"); ++ filter_score(tuner, filter_gravity); ++ tuner->set_gravity(tuner, tuner->scores[0].gravity); ++ ++ return 0; ++fail: ++ tuner->set_gravity(tuner, orig_gravity); ++ ++ return ret; ++} ++ ++static int autotune_ioctl_nrt(struct rtdm_fd *fd, unsigned int request, void *arg) ++{ ++ struct autotune_context *context; ++ struct autotune_setup setup; ++ struct gravity_tuner *tuner, *old_tuner; ++ rtdm_lockctx_t lock_ctx; ++ int ret; ++ ++ switch (request) { ++ case AUTOTUNE_RTIOC_RESET: ++ xnclock_reset_gravity(&nkclock); ++ return 0; ++ case AUTOTUNE_RTIOC_IRQ: ++ tuner = &irq_tuner.tuner; ++ break; ++ case AUTOTUNE_RTIOC_KERN: ++ tuner = &kthread_tuner.tuner; ++ break; ++ case AUTOTUNE_RTIOC_USER: ++ tuner = &uthread_tuner.tuner; ++ break; ++ default: ++ return -ENOSYS; ++ } ++ ++ ret = rtdm_copy_from_user(fd, &setup, arg, sizeof(setup)); ++ if (ret) ++ return ret; ++ ++ ret = tuner->init_tuner(tuner); ++ if (ret) ++ return ret; ++ ++ context = rtdm_fd_to_private(fd); ++ ++ rtdm_lock_get_irqsave(&context->tuner_lock, lock_ctx); ++ ++ old_tuner = context->tuner; ++ if (old_tuner && atomic_read(&old_tuner->refcount) > 0) { ++ rtdm_lock_put_irqrestore(&context->tuner_lock, lock_ctx); ++ tuner->destroy_tuner(tuner); ++ return -EBUSY; ++ } ++ ++ context->tuner = tuner; ++ context->setup = setup; ++ ++ rtdm_lock_put_irqrestore(&context->tuner_lock, lock_ctx); ++ ++ if (old_tuner) ++ old_tuner->destroy_tuner(old_tuner); ++ ++ if (setup.quiet <= 1) ++ printk(XENO_INFO "autotune(%s) started\n", tuner->name); ++ ++ return ret; ++} ++ ++static int autotune_ioctl_rt(struct rtdm_fd *fd, unsigned int request, void *arg) ++{ ++ struct autotune_context *context; ++ struct gravity_tuner *tuner; ++ rtdm_lockctx_t lock_ctx; ++ __u64 timestamp; ++ __u32 gravity; ++ int ret; ++ ++ context = rtdm_fd_to_private(fd); ++ ++ rtdm_lock_get_irqsave(&context->tuner_lock, lock_ctx); ++ ++ tuner = context->tuner; ++ if (tuner) ++ atomic_inc(&tuner->refcount); ++ ++ rtdm_lock_put_irqrestore(&context->tuner_lock, lock_ctx); ++ ++ if (tuner == NULL) ++ return -ENOSYS; ++ ++ switch (request) { ++ case AUTOTUNE_RTIOC_RUN: ++ tuner->quiet = context->setup.quiet; ++ ret = tune_gravity(tuner, context->setup.period); ++ if (ret) ++ break; ++ gravity = xnclock_ticks_to_ns(&nkclock, ++ tuner->get_gravity(tuner)); ++ ret = rtdm_safe_copy_to_user(fd, arg, &gravity, ++ sizeof(gravity)); ++ break; ++ case AUTOTUNE_RTIOC_PULSE: ++ if (tuner != &uthread_tuner.tuner) { ++ ret = -EINVAL; ++ break; ++ } ++ ret = rtdm_safe_copy_from_user(fd, ×tamp, arg, ++ sizeof(timestamp)); ++ if (ret) ++ break; ++ ret = add_uthread_sample(tuner, timestamp); ++ break; ++ default: ++ ret = -ENOSYS; ++ } ++ ++ atomic_dec(&tuner->refcount); ++ ++ return ret; ++} ++ ++static int autotune_open(struct rtdm_fd *fd, int oflags) ++{ ++ struct autotune_context *context; ++ ++ context = rtdm_fd_to_private(fd); ++ context->tuner = NULL; ++ rtdm_lock_init(&context->tuner_lock); ++ ++ return 0; ++} ++ ++static void autotune_close(struct rtdm_fd *fd) ++{ ++ struct autotune_context *context; ++ struct gravity_tuner *tuner; ++ ++ context = rtdm_fd_to_private(fd); ++ tuner = context->tuner; ++ if (tuner) { ++ if (context->setup.quiet <= 1) ++ printk(XENO_INFO "autotune finished [%Lui/%Luk/%Luu]\n", ++ xnclock_ticks_to_ns(&nkclock, ++ xnclock_get_gravity(&nkclock, irq)), ++ xnclock_ticks_to_ns(&nkclock, ++ xnclock_get_gravity(&nkclock, kernel)), ++ xnclock_ticks_to_ns(&nkclock, ++ xnclock_get_gravity(&nkclock, user))); ++ tuner->destroy_tuner(tuner); ++ } ++} ++ ++static struct rtdm_driver autotune_driver = { ++ .profile_info = RTDM_PROFILE_INFO(autotune, ++ RTDM_CLASS_AUTOTUNE, ++ RTDM_SUBCLASS_AUTOTUNE, ++ 0), ++ .device_flags = RTDM_NAMED_DEVICE|RTDM_EXCLUSIVE, ++ .device_count = 1, ++ .context_size = sizeof(struct autotune_context), ++ .ops = { ++ .open = autotune_open, ++ .ioctl_rt = autotune_ioctl_rt, ++ .ioctl_nrt = autotune_ioctl_nrt, ++ .close = autotune_close, ++ }, ++}; ++ ++static struct rtdm_device device = { ++ .driver = &autotune_driver, ++ .label = "autotune", ++}; ++ ++static int __init autotune_init(void) ++{ ++ return rtdm_dev_register(&device); ++} ++ ++static void __exit autotune_exit(void) ++{ ++ rtdm_dev_unregister(&device); ++} ++ ++module_init(autotune_init); ++module_exit(autotune_exit); +--- linux/drivers/xenomai/autotune/Makefile 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/autotune/Makefile 2021-04-07 16:01:26.245635569 +0800 +@@ -0,0 +1,4 @@ ++ ++obj-$(CONFIG_XENO_DRIVERS_AUTOTUNE) += xeno_autotune.o ++ ++xeno_autotune-y := autotune.o +--- linux/drivers/xenomai/autotune/Kconfig 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/autotune/Kconfig 2021-04-07 16:01:26.240635577 +0800 +@@ -0,0 +1,3 @@ ++ ++config XENO_DRIVERS_AUTOTUNE ++ tristate +--- linux/drivers/xenomai/gpio/gpio-sun8i-h3.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/gpio/gpio-sun8i-h3.c 2021-04-07 16:01:26.235635584 +0800 +@@ -0,0 +1,43 @@ ++/** ++ * Copyright (C) 2017 Philippe Gerum ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#include ++#include ++ ++#define RTDM_SUBCLASS_H3 3 ++ ++static int __init h3_gpio_init(void) ++{ ++ int ret; ++ ++ ret = rtdm_gpiochip_scan_of(NULL, "allwinner,sun8i-h3-pinctrl", ++ RTDM_SUBCLASS_H3); ++ if (ret) ++ return ret; ++ ++ return rtdm_gpiochip_scan_of(NULL, "allwinner,sun8i-h3-r-pinctrl", ++ RTDM_SUBCLASS_H3); ++} ++module_init(h3_gpio_init); ++ ++static void __exit h3_gpio_exit(void) ++{ ++ rtdm_gpiochip_remove_of(RTDM_SUBCLASS_H3); ++} ++module_exit(h3_gpio_exit); ++ ++MODULE_LICENSE("GPL"); +--- linux/drivers/xenomai/gpio/gpio-core.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/gpio/gpio-core.c 2021-04-07 16:01:26.230635591 +0800 +@@ -0,0 +1,640 @@ ++/** ++ * @note Copyright (C) 2016 Philippe Gerum ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++struct rtdm_gpio_chan { ++ int requested : 1, ++ has_direction : 1, ++ is_output : 1, ++ is_interrupt : 1, ++ want_timestamp : 1; ++}; ++ ++static LIST_HEAD(rtdm_gpio_chips); ++ ++static DEFINE_MUTEX(chip_lock); ++ ++static int gpio_pin_interrupt(rtdm_irq_t *irqh) ++{ ++ struct rtdm_gpio_pin *pin; ++ ++ pin = rtdm_irq_get_arg(irqh, struct rtdm_gpio_pin); ++ ++ pin->timestamp = rtdm_clock_read_monotonic(); ++ rtdm_event_signal(&pin->event); ++ ++ return RTDM_IRQ_HANDLED; ++} ++ ++static int request_gpio_irq(unsigned int gpio, struct rtdm_gpio_pin *pin, ++ struct rtdm_gpio_chan *chan, ++ int trigger) ++{ ++ int ret, irq_trigger, irq; ++ ++ if (trigger & ~GPIO_TRIGGER_MASK) ++ return -EINVAL; ++ ++ if (!chan->requested) { ++ ret = gpio_request(gpio, pin->name); ++ if (ret) { ++ if (ret != -EPROBE_DEFER) ++ printk(XENO_ERR ++ "can not request GPIO%d\n", gpio); ++ return ret; ++ } ++ chan->requested = true; ++ } ++ ++ ret = gpio_direction_input(gpio); ++ if (ret) { ++ printk(XENO_ERR "cannot set GPIO%d as input\n", gpio); ++ goto fail; ++ } ++ ++ chan->has_direction = true; ++ gpio_export(gpio, true); ++ ++ rtdm_event_clear(&pin->event); ++ ++ /* ++ * Attempt to hook the interrupt associated to that pin. We ++ * might fail getting a valid IRQ number, in case the GPIO ++ * chip did not define any mapping handler (->to_irq). If so, ++ * just assume that either we have no IRQ indeed, or interrupt ++ * handling may be open coded elsewhere. ++ */ ++ irq = gpio_to_irq(gpio); ++ if (irq < 0) ++ goto done; ++ ++ irq_trigger = 0; ++ if (trigger & GPIO_TRIGGER_EDGE_RISING) ++ irq_trigger |= IRQ_TYPE_EDGE_RISING; ++ if (trigger & GPIO_TRIGGER_EDGE_FALLING) ++ irq_trigger |= IRQ_TYPE_EDGE_FALLING; ++ if (trigger & GPIO_TRIGGER_LEVEL_HIGH) ++ irq_trigger |= IRQ_TYPE_LEVEL_HIGH; ++ if (trigger & GPIO_TRIGGER_LEVEL_LOW) ++ irq_trigger |= IRQ_TYPE_LEVEL_LOW; ++ ++ if (irq_trigger) ++ irq_set_irq_type(irq, irq_trigger); ++ ++ ret = rtdm_irq_request(&pin->irqh, irq, gpio_pin_interrupt, ++ 0, pin->name, pin); ++ if (ret) { ++ printk(XENO_ERR "cannot request GPIO%d interrupt\n", gpio); ++ goto fail; ++ } ++ ++ ++ rtdm_irq_enable(&pin->irqh); ++done: ++ chan->is_interrupt = true; ++ ++ return 0; ++fail: ++ gpio_free(gpio); ++ chan->requested = false; ++ ++ return ret; ++} ++ ++static void release_gpio_irq(unsigned int gpio, struct rtdm_gpio_pin *pin, ++ struct rtdm_gpio_chan *chan) ++{ ++ if (chan->is_interrupt) { ++ rtdm_irq_free(&pin->irqh); ++ chan->is_interrupt = false; ++ } ++ gpio_free(gpio); ++ chan->requested = false; ++} ++ ++static int gpio_pin_ioctl_nrt(struct rtdm_fd *fd, ++ unsigned int request, void *arg) ++{ ++ struct rtdm_gpio_chan *chan = rtdm_fd_to_private(fd); ++ struct rtdm_device *dev = rtdm_fd_device(fd); ++ unsigned int gpio = rtdm_fd_minor(fd); ++ int ret = 0, val, trigger; ++ struct rtdm_gpio_pin *pin; ++ ++ pin = container_of(dev, struct rtdm_gpio_pin, dev); ++ ++ switch (request) { ++ case GPIO_RTIOC_DIR_OUT: ++ ret = rtdm_safe_copy_from_user(fd, &val, arg, sizeof(val)); ++ if (ret) ++ return ret; ++ ret = gpio_direction_output(gpio, val); ++ if (ret == 0) { ++ chan->has_direction = true; ++ chan->is_output = true; ++ } ++ break; ++ case GPIO_RTIOC_DIR_IN: ++ ret = gpio_direction_input(gpio); ++ if (ret == 0) ++ chan->has_direction = true; ++ break; ++ case GPIO_RTIOC_IRQEN: ++ if (chan->is_interrupt) { ++ return -EBUSY; ++ } ++ ret = rtdm_safe_copy_from_user(fd, &trigger, ++ arg, sizeof(trigger)); ++ if (ret) ++ return ret; ++ ret = request_gpio_irq(gpio, pin, chan, trigger); ++ break; ++ case GPIO_RTIOC_IRQDIS: ++ if (chan->is_interrupt) { ++ release_gpio_irq(gpio, pin, chan); ++ chan->requested = false; ++ chan->is_interrupt = false; ++ } ++ break; ++ case GPIO_RTIOC_REQS: ++ ret = gpio_request(gpio, pin->name); ++ if (ret) ++ return ret; ++ else ++ chan->requested = true; ++ break; ++ case GPIO_RTIOC_RELS: ++ gpio_free(gpio); ++ chan->requested = false; ++ break; ++ case GPIO_RTIOC_TS: ++ ret = rtdm_safe_copy_from_user(fd, &val, arg, sizeof(val)); ++ if (ret) ++ return ret; ++ chan->want_timestamp = !!val; ++ break; ++ default: ++ return -EINVAL; ++ } ++ ++ return ret; ++} ++ ++static ssize_t gpio_pin_read_rt(struct rtdm_fd *fd, ++ void __user *buf, size_t len) ++{ ++ struct rtdm_gpio_chan *chan = rtdm_fd_to_private(fd); ++ struct rtdm_device *dev = rtdm_fd_device(fd); ++ struct rtdm_gpio_readout rdo; ++ struct rtdm_gpio_pin *pin; ++ int ret; ++ ++ if (!chan->has_direction) ++ return -EAGAIN; ++ ++ if (chan->is_output) ++ return -EINVAL; ++ ++ pin = container_of(dev, struct rtdm_gpio_pin, dev); ++ ++ if (chan->want_timestamp) { ++ if (len < sizeof(rdo)) ++ return -EINVAL; ++ ++ if (!(fd->oflags & O_NONBLOCK)) { ++ ret = rtdm_event_wait(&pin->event); ++ if (ret) ++ return ret; ++ rdo.timestamp = pin->timestamp; ++ } else ++ rdo.timestamp = rtdm_clock_read_monotonic(); ++ ++ len = sizeof(rdo); ++ rdo.value = gpiod_get_raw_value(pin->desc); ++ ret = rtdm_safe_copy_to_user(fd, buf, &rdo, len); ++ } else { ++ if (len < sizeof(rdo.value)) ++ return -EINVAL; ++ ++ if (!(fd->oflags & O_NONBLOCK)) { ++ ret = rtdm_event_wait(&pin->event); ++ if (ret) ++ return ret; ++ } ++ ++ len = sizeof(rdo.value); ++ rdo.value = gpiod_get_raw_value(pin->desc); ++ ret = rtdm_safe_copy_to_user(fd, buf, &rdo.value, len); ++ } ++ ++ return ret ?: len; ++} ++ ++static ssize_t gpio_pin_write_rt(struct rtdm_fd *fd, ++ const void __user *buf, size_t len) ++{ ++ struct rtdm_gpio_chan *chan = rtdm_fd_to_private(fd); ++ struct rtdm_device *dev = rtdm_fd_device(fd); ++ struct rtdm_gpio_pin *pin; ++ int value, ret; ++ ++ if (len < sizeof(value)) ++ return -EINVAL; ++ ++ if (!chan->has_direction) ++ return -EAGAIN; ++ ++ if (!chan->is_output) ++ return -EINVAL; ++ ++ ret = rtdm_safe_copy_from_user(fd, &value, buf, sizeof(value)); ++ if (ret) ++ return ret; ++ ++ pin = container_of(dev, struct rtdm_gpio_pin, dev); ++ gpiod_set_raw_value(pin->desc, value); ++ ++ return sizeof(value); ++} ++ ++static int gpio_pin_select(struct rtdm_fd *fd, struct xnselector *selector, ++ unsigned int type, unsigned int index) ++{ ++ struct rtdm_gpio_chan *chan = rtdm_fd_to_private(fd); ++ struct rtdm_device *dev = rtdm_fd_device(fd); ++ struct rtdm_gpio_pin *pin; ++ ++ if (!chan->has_direction) ++ return -EAGAIN; ++ ++ if (chan->is_output) ++ return -EINVAL; ++ ++ pin = container_of(dev, struct rtdm_gpio_pin, dev); ++ ++ return rtdm_event_select(&pin->event, selector, type, index); ++} ++ ++int gpio_pin_open(struct rtdm_fd *fd, int oflags) ++{ ++ struct rtdm_gpio_chan *chan = rtdm_fd_to_private(fd); ++ struct rtdm_device *dev = rtdm_fd_device(fd); ++ unsigned int gpio = rtdm_fd_minor(fd); ++ int ret = 0; ++ struct rtdm_gpio_pin *pin; ++ ++ pin = container_of(dev, struct rtdm_gpio_pin, dev); ++ ret = gpio_request(gpio, pin->name); ++ if (ret) { ++ printk(XENO_ERR "failed to request pin %d : %d\n", gpio, ret); ++ return ret; ++ } else { ++ chan->requested = true; ++ } ++ ++ return 0; ++} ++ ++static void gpio_pin_close(struct rtdm_fd *fd) ++{ ++ struct rtdm_gpio_chan *chan = rtdm_fd_to_private(fd); ++ struct rtdm_device *dev = rtdm_fd_device(fd); ++ unsigned int gpio = rtdm_fd_minor(fd); ++ struct rtdm_gpio_pin *pin; ++ ++ if (chan->requested) { ++ pin = container_of(dev, struct rtdm_gpio_pin, dev); ++ release_gpio_irq(gpio, pin, chan); ++ } ++} ++ ++static void delete_pin_devices(struct rtdm_gpio_chip *rgc) ++{ ++ struct rtdm_gpio_pin *pin; ++ struct rtdm_device *dev; ++ int offset; ++ ++ for (offset = 0; offset < rgc->gc->ngpio; offset++) { ++ pin = rgc->pins + offset; ++ dev = &pin->dev; ++ rtdm_dev_unregister(dev); ++ rtdm_event_destroy(&pin->event); ++ kfree(dev->label); ++ kfree(pin->name); ++ } ++} ++ ++static int create_pin_devices(struct rtdm_gpio_chip *rgc) ++{ ++ struct gpio_chip *gc = rgc->gc; ++ struct rtdm_gpio_pin *pin; ++ struct rtdm_device *dev; ++ int offset, ret, gpio; ++ ++ for (offset = 0; offset < gc->ngpio; offset++) { ++ ret = -ENOMEM; ++ gpio = gc->base + offset; ++ pin = rgc->pins + offset; ++ pin->name = kasprintf(GFP_KERNEL, "gpio%d", gpio); ++ if (pin->name == NULL) ++ goto fail_name; ++ pin->desc = gpio_to_desc(gpio); ++ if (pin->desc == NULL) { ++ ret = -ENODEV; ++ goto fail_desc; ++ } ++ dev = &pin->dev; ++ dev->driver = &rgc->driver; ++ dev->label = kasprintf(GFP_KERNEL, "%s/gpio%%d", gc->label); ++ if (dev->label == NULL) ++ goto fail_label; ++ dev->minor = gpio; ++ dev->device_data = rgc; ++ ret = rtdm_dev_register(dev); ++ if (ret) ++ goto fail_register; ++ rtdm_event_init(&pin->event, 0); ++ } ++ ++ return 0; ++ ++fail_register: ++ kfree(dev->label); ++fail_desc: ++fail_label: ++ kfree(pin->name); ++fail_name: ++ delete_pin_devices(rgc); ++ ++ return ret; ++} ++ ++static char *gpio_pin_devnode(struct device *dev, umode_t *mode) ++{ ++ return kasprintf(GFP_KERNEL, "rtdm/%s/%s", ++ dev->class->name, ++ dev_name(dev)); ++} ++ ++int rtdm_gpiochip_add(struct rtdm_gpio_chip *rgc, ++ struct gpio_chip *gc, int gpio_subclass) ++{ ++ int ret; ++ ++ rgc->devclass = class_create(gc->owner, gc->label); ++ if (IS_ERR(rgc->devclass)) { ++ printk(XENO_ERR "cannot create sysfs class\n"); ++ return PTR_ERR(rgc->devclass); ++ } ++ rgc->devclass->devnode = gpio_pin_devnode; ++ ++ rgc->driver.profile_info = (struct rtdm_profile_info) ++ RTDM_PROFILE_INFO(rtdm_gpio_chip, ++ RTDM_CLASS_GPIO, ++ gpio_subclass, ++ 0); ++ rgc->driver.device_flags = RTDM_NAMED_DEVICE|RTDM_FIXED_MINOR; ++ rgc->driver.base_minor = gc->base; ++ rgc->driver.device_count = gc->ngpio; ++ rgc->driver.context_size = sizeof(struct rtdm_gpio_chan); ++ rgc->driver.ops = (struct rtdm_fd_ops){ ++ .open = gpio_pin_open, ++ .close = gpio_pin_close, ++ .ioctl_nrt = gpio_pin_ioctl_nrt, ++ .read_rt = gpio_pin_read_rt, ++ .write_rt = gpio_pin_write_rt, ++ .select = gpio_pin_select, ++ }; ++ ++ rtdm_drv_set_sysclass(&rgc->driver, rgc->devclass); ++ ++ rgc->gc = gc; ++ rtdm_lock_init(&rgc->lock); ++ ++ ret = create_pin_devices(rgc); ++ if (ret) ++ class_destroy(rgc->devclass); ++ ++ return ret; ++} ++EXPORT_SYMBOL_GPL(rtdm_gpiochip_add); ++ ++struct rtdm_gpio_chip * ++rtdm_gpiochip_alloc(struct gpio_chip *gc, int gpio_subclass) ++{ ++ struct rtdm_gpio_chip *rgc; ++ size_t asize; ++ int ret; ++ ++ if (gc->ngpio == 0) ++ return ERR_PTR(-EINVAL); ++ ++ asize = sizeof(*rgc) + gc->ngpio * sizeof(struct rtdm_gpio_pin); ++ rgc = kzalloc(asize, GFP_KERNEL); ++ if (rgc == NULL) ++ return ERR_PTR(-ENOMEM); ++ ++ ret = rtdm_gpiochip_add(rgc, gc, gpio_subclass); ++ if (ret) { ++ kfree(rgc); ++ return ERR_PTR(ret); ++ } ++ ++ mutex_lock(&chip_lock); ++ list_add(&rgc->next, &rtdm_gpio_chips); ++ mutex_unlock(&chip_lock); ++ ++ return rgc; ++} ++EXPORT_SYMBOL_GPL(rtdm_gpiochip_alloc); ++ ++void rtdm_gpiochip_remove(struct rtdm_gpio_chip *rgc) ++{ ++ mutex_lock(&chip_lock); ++ list_del(&rgc->next); ++ mutex_unlock(&chip_lock); ++ delete_pin_devices(rgc); ++ class_destroy(rgc->devclass); ++} ++EXPORT_SYMBOL_GPL(rtdm_gpiochip_remove); ++ ++int rtdm_gpiochip_post_event(struct rtdm_gpio_chip *rgc, ++ unsigned int offset) ++{ ++ struct rtdm_gpio_pin *pin; ++ ++ if (offset >= rgc->gc->ngpio) ++ return -EINVAL; ++ ++ pin = rgc->pins + offset; ++ pin->timestamp = rtdm_clock_read_monotonic(); ++ rtdm_event_signal(&pin->event); ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(rtdm_gpiochip_post_event); ++ ++static int gpiochip_match_name(struct gpio_chip *chip, void *data) ++{ ++ const char *name = data; ++ ++ return !strcmp(chip->label, name); ++} ++ ++static struct gpio_chip *find_chip_by_name(const char *name) ++{ ++ return gpiochip_find((void *)name, gpiochip_match_name); ++} ++ ++int rtdm_gpiochip_add_by_name(struct rtdm_gpio_chip *rgc, ++ const char *label, int gpio_subclass) ++{ ++ struct gpio_chip *gc = find_chip_by_name(label); ++ ++ if (gc == NULL) ++ return -EPROBE_DEFER; ++ ++ return rtdm_gpiochip_add(rgc, gc, gpio_subclass); ++} ++EXPORT_SYMBOL_GPL(rtdm_gpiochip_add_by_name); ++ ++#ifdef CONFIG_OF ++ ++#include ++ ++struct gpiochip_holder { ++ struct gpio_chip *chip; ++ struct list_head next; ++}; ++ ++struct gpiochip_match_data { ++ struct device *parent; ++ struct list_head list; ++}; ++ ++static int match_gpio_chip(struct gpio_chip *gc, void *data) ++{ ++ struct gpiochip_match_data *d = data; ++ struct gpiochip_holder *h; ++ ++ if (cobalt_gpiochip_dev(gc) == d->parent) { ++ h = kmalloc(sizeof(*h), GFP_KERNEL); ++ if (h) { ++ h->chip = gc; ++ list_add(&h->next, &d->list); ++ } ++ } ++ ++ /* ++ * Iterate over all existing GPIO chips, we may have several ++ * hosted by the same pin controller mapping different ranges. ++ */ ++ return 0; ++} ++ ++int rtdm_gpiochip_scan_of(struct device_node *from, const char *compat, ++ int type) ++{ ++ struct gpiochip_match_data match; ++ struct gpiochip_holder *h, *n; ++ struct device_node *np = from; ++ struct platform_device *pdev; ++ struct rtdm_gpio_chip *rgc; ++ int ret = -ENODEV, _ret; ++ ++ if (!rtdm_available()) ++ return -ENOSYS; ++ ++ for (;;) { ++ np = of_find_compatible_node(np, NULL, compat); ++ if (np == NULL) ++ break; ++ pdev = of_find_device_by_node(np); ++ of_node_put(np); ++ if (pdev == NULL) ++ break; ++ match.parent = &pdev->dev; ++ INIT_LIST_HEAD(&match.list); ++ gpiochip_find(&match, match_gpio_chip); ++ if (!list_empty(&match.list)) { ++ ret = 0; ++ list_for_each_entry_safe(h, n, &match.list, next) { ++ list_del(&h->next); ++ _ret = 0; ++ rgc = rtdm_gpiochip_alloc(h->chip, type); ++ if (IS_ERR(rgc)) ++ _ret = PTR_ERR(rgc); ++ kfree(h); ++ if (_ret && !ret) ++ ret = _ret; ++ } ++ if (ret) ++ break; ++ } ++ } ++ ++ return ret; ++} ++EXPORT_SYMBOL_GPL(rtdm_gpiochip_scan_of); ++ ++int rtdm_gpiochip_scan_array_of(struct device_node *from, ++ const char *compat[], ++ int nentries, int type) ++{ ++ int ret = -ENODEV, _ret, n; ++ ++ for (n = 0; n < nentries; n++) { ++ _ret = rtdm_gpiochip_scan_of(from, compat[n], type); ++ if (_ret) { ++ if (_ret != -ENODEV) ++ return _ret; ++ } else ++ ret = 0; ++ } ++ ++ return ret; ++} ++EXPORT_SYMBOL_GPL(rtdm_gpiochip_scan_array_of); ++ ++void rtdm_gpiochip_remove_of(int type) ++{ ++ struct rtdm_gpio_chip *rgc, *n; ++ ++ mutex_lock(&chip_lock); ++ ++ list_for_each_entry_safe(rgc, n, &rtdm_gpio_chips, next) { ++ if (rgc->driver.profile_info.subclass_id == type) { ++ mutex_unlock(&chip_lock); ++ rtdm_gpiochip_remove(rgc); ++ kfree(rgc); ++ mutex_lock(&chip_lock); ++ } ++ } ++ ++ mutex_unlock(&chip_lock); ++} ++EXPORT_SYMBOL_GPL(rtdm_gpiochip_remove_of); ++ ++#endif /* CONFIG_OF */ +--- linux/drivers/xenomai/gpio/Makefile 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/gpio/Makefile 2021-04-07 16:01:26.225635598 +0800 +@@ -0,0 +1,14 @@ ++ccflags-$(CONFIG_XENO_DRIVERS_GPIO_DEBUG) := -DDEBUG ++ ++obj-$(CONFIG_XENO_DRIVERS_GPIO_BCM2835) += xeno-gpio-bcm2835.o ++obj-$(CONFIG_XENO_DRIVERS_GPIO_MXC) += xeno-gpio-mxc.o ++obj-$(CONFIG_XENO_DRIVERS_GPIO_SUN8I_H3) += xeno-gpio-sun8i-h3.o ++obj-$(CONFIG_XENO_DRIVERS_GPIO_ZYNQ7000) += xeno-gpio-zynq7000.o ++obj-$(CONFIG_XENO_DRIVERS_GPIO_XILINX) += xeno-gpio-xilinx.o ++obj-$(CONFIG_XENO_DRIVERS_GPIO) += gpio-core.o ++ ++xeno-gpio-bcm2835-y := gpio-bcm2835.o ++xeno-gpio-mxc-y := gpio-mxc.o ++xeno-gpio-sun8i-h3-y := gpio-sun8i-h3.o ++xeno-gpio-zynq7000-y := gpio-zynq7000.o ++xeno-gpio-xilinx-y := gpio-xilinx.o +--- linux/drivers/xenomai/gpio/gpio-xilinx.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/gpio/gpio-xilinx.c 2021-04-07 16:01:26.220635605 +0800 +@@ -0,0 +1,40 @@ ++/** ++ * @note Copyright (C) 2017 Greg Gallagher ++ * ++ * This driver controls the gpio that can be located on the PL ++ * of the Zynq SOC ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#include ++#include ++ ++#define RTDM_SUBCLASS_XILINX 5 ++ ++static int __init xilinx_gpio_init(void) ++{ ++ return rtdm_gpiochip_scan_of(NULL, "xlnx,xps-gpio-1.00.a", ++ RTDM_SUBCLASS_XILINX); ++} ++module_init(xilinx_gpio_init); ++ ++static void __exit xilinx_gpio_exit(void) ++{ ++ rtdm_gpiochip_remove_of(RTDM_SUBCLASS_XILINX); ++} ++module_exit(xilinx_gpio_exit); ++ ++MODULE_LICENSE("GPL"); ++ +--- linux/drivers/xenomai/gpio/gpio-zynq7000.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/gpio/gpio-zynq7000.c 2021-04-07 16:01:26.215635612 +0800 +@@ -0,0 +1,40 @@ ++/** ++ * @note Copyright (C) 2017 Greg Gallagher ++ * ++ * This driver is inspired by: ++ * gpio-bcm2835.c, please see original file for copyright information ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#include ++#include ++ ++#define RTDM_SUBCLASS_ZYNQ7000 4 ++ ++static int __init zynq7000_gpio_init(void) ++{ ++ return rtdm_gpiochip_scan_of(NULL, "xlnx,zynq-gpio-1.0", ++ RTDM_SUBCLASS_ZYNQ7000); ++} ++module_init(zynq7000_gpio_init); ++ ++static void __exit zynq7000_gpio_exit(void) ++{ ++ rtdm_gpiochip_remove_of(RTDM_SUBCLASS_ZYNQ7000); ++} ++module_exit(zynq7000_gpio_exit); ++ ++MODULE_LICENSE("GPL"); ++ +--- linux/drivers/xenomai/gpio/Kconfig 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/gpio/Kconfig 2021-04-07 16:01:26.210635619 +0800 +@@ -0,0 +1,57 @@ ++menu "Real-time GPIO drivers" ++ ++config XENO_DRIVERS_GPIO ++ bool "GPIO controller" ++ depends on GPIOLIB ++ help ++ ++ Real-time capable GPIO module. ++ ++if XENO_DRIVERS_GPIO ++ ++config XENO_DRIVERS_GPIO_BCM2835 ++ depends on MACH_BCM2708 || ARCH_BCM2835 ++ tristate "Support for BCM2835 GPIOs" ++ help ++ ++ Enables support for the GPIO controller available from ++ Broadcom's BCM2835 SoC. ++ ++config XENO_DRIVERS_GPIO_MXC ++ depends on GPIO_MXC ++ tristate "Support for MXC GPIOs" ++ help ++ ++ Suitable for the GPIO controller available from ++ Freescale/NXP's MXC architecture. ++ ++config XENO_DRIVERS_GPIO_SUN8I_H3 ++ depends on MACH_SUN8I && PINCTRL_SUN8I_H3 ++ tristate "Support for SUN8I H3 GPIOs" ++ help ++ ++ Suitable for the GPIO controller available from Allwinner's H3 ++ SoC, as found on the NanoPI boards. ++ ++config XENO_DRIVERS_GPIO_ZYNQ7000 ++ depends on ARCH_ZYNQ ++ tristate "Support for Zynq7000 GPIOs" ++ help ++ ++ Enables support for the GPIO controller available from ++ Xilinx's Zynq7000 SoC. ++ ++config XENO_DRIVERS_GPIO_XILINX ++ depends on ARCH_ZYNQ ++ tristate "Support for Xilinx GPIOs" ++ help ++ ++ Enables support for the GPIO controller available from ++ Xilinx's softcore IP. ++ ++config XENO_DRIVERS_GPIO_DEBUG ++ bool "Enable GPIO core debugging features" ++ ++endif ++ ++endmenu +--- linux/drivers/xenomai/gpio/gpio-bcm2835.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/gpio/gpio-bcm2835.c 2021-04-07 16:01:26.205635627 +0800 +@@ -0,0 +1,37 @@ ++/** ++ * @note Copyright (C) 2016 Philippe Gerum ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#include ++#include ++ ++#define RTDM_SUBCLASS_BCM2835 1 ++ ++static int __init bcm2835_gpio_init(void) ++{ ++ return rtdm_gpiochip_scan_of(NULL, "brcm,bcm2835-gpio", ++ RTDM_SUBCLASS_BCM2835); ++} ++module_init(bcm2835_gpio_init); ++ ++static void __exit bcm2835_gpio_exit(void) ++{ ++ rtdm_gpiochip_remove_of(RTDM_SUBCLASS_BCM2835); ++} ++module_exit(bcm2835_gpio_exit); ++ ++MODULE_LICENSE("GPL"); ++ +--- linux/drivers/xenomai/gpio/gpio-mxc.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/drivers/xenomai/gpio/gpio-mxc.c 2021-04-07 16:01:26.199635635 +0800 +@@ -0,0 +1,42 @@ ++/** ++ * @note Copyright (C) 2016 Philippe Gerum ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#include ++#include ++ ++#define RTDM_SUBCLASS_MXC 2 ++ ++static const char *compat_array[] = { ++ "fsl,imx6q-gpio", ++ "fsl,imx7d-gpio", ++}; ++ ++static int __init mxc_gpio_init(void) ++{ ++ return rtdm_gpiochip_scan_array_of(NULL, compat_array, ++ ARRAY_SIZE(compat_array), ++ RTDM_SUBCLASS_MXC); ++} ++module_init(mxc_gpio_init); ++ ++static void __exit mxc_gpio_exit(void) ++{ ++ rtdm_gpiochip_remove_of(RTDM_SUBCLASS_MXC); ++} ++module_exit(mxc_gpio_exit); ++ ++MODULE_LICENSE("GPL"); +--- linux/drivers/Makefile 2020-12-21 21:59:17.000000000 +0800 ++++ linux-patched/drivers/Makefile 2021-04-07 16:01:25.590636505 +0800 +@@ -187,3 +187,5 @@ + obj-$(CONFIG_SIOX) += siox/ + obj-$(CONFIG_GNSS) += gnss/ + obj-y += uacce/ ++ ++obj-$(CONFIG_XENOMAI) += xenomai/ +--- linux/arch/x86/include/ipipe/thread_info.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/arch/x86/include/ipipe/thread_info.h 2021-04-07 16:01:25.732636302 +0800 +@@ -0,0 +1,38 @@ ++/** ++ * Copyright (C) 2012 Philippe Gerum . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, ++ * USA; either version 2 of the License, or (at your option) any later ++ * version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#ifndef _COBALT_IPIPE_THREAD_INFO_H ++#define _COBALT_IPIPE_THREAD_INFO_H ++ ++struct xnthread; ++struct cobalt_process; ++ ++struct ipipe_threadinfo { ++ /* Core thread backlink. */ ++ struct xnthread *thread; ++ /* User process backlink. NULL for core threads. */ ++ struct cobalt_process *process; ++}; ++ ++static inline void __ipipe_init_threadinfo(struct ipipe_threadinfo *p) ++{ ++ p->thread = NULL; ++ p->process = NULL; ++} ++ ++#endif /* !_COBALT_IPIPE_THREAD_INFO_H */ +--- linux/arch/x86/xenomai/machine.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/arch/x86/xenomai/machine.c 2021-04-07 16:01:25.721636318 +0800 +@@ -0,0 +1,134 @@ ++/** ++ * Copyright (C) 2007-2012 Philippe Gerum. ++ * ++ * Xenomai is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation, Inc., 675 Mass Ave, ++ * Cambridge MA 02139, USA; either version 2 of the License, or (at ++ * your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++long strncpy_from_user_nocheck(char *dst, const char __user *src, long count) ++{ ++ int ret; ++ char c; ++ long n; ++ ++ for (n = 0; n < count; n++, src++, dst++) { ++ ret = __xn_get_user(c, src); ++ if (ret) ++ return -EFAULT; ++ *dst = c; ++ if (c == 0) ++ break; ++ } ++ ++ return n; ++} ++EXPORT_SYMBOL_GPL(strncpy_from_user_nocheck); ++ ++static unsigned long mach_x86_calibrate(void) ++{ ++ unsigned long delay = (cobalt_pipeline.timer_freq + HZ / 2) / HZ; ++ unsigned long long t0, t1, dt; ++ unsigned long flags; ++ int i; ++ ++ flags = ipipe_critical_enter(NULL); ++ ++ ipipe_timer_set(delay); ++ ++ ipipe_read_tsc(t0); ++ ++ for (i = 0; i < 100; i++) ++ ipipe_timer_set(delay); ++ ++ ipipe_read_tsc(t1); ++ dt = t1 - t0; ++ ++ ipipe_critical_exit(flags); ++ ++ /* ++ * Reset the max trace, since it contains the calibration time ++ * now. ++ */ ++ ipipe_trace_max_reset(); ++ ++ /* ++ * Compute average with a 5% margin to avoid negative ++ * latencies with PIT. ++ */ ++ return xnarch_ulldiv(dt, i + 5, NULL); ++} ++ ++static int mach_x86_init(void) ++{ ++ int ret; ++ ++ ret = mach_x86_thread_init(); ++ if (ret) ++ return ret; ++ ++ mach_x86_c1e_disable(); ++ mach_x86_smi_init(); ++ mach_x86_smi_disable(); ++ ++ return 0; ++} ++ ++static void mach_x86_cleanup(void) ++{ ++ mach_x86_smi_restore(); ++ mach_x86_thread_cleanup(); ++} ++ ++static const char *const fault_labels[] = { ++ [0] = "Divide error", ++ [1] = "Debug", ++ [2] = "", /* NMI is not pipelined. */ ++ [3] = "Int3", ++ [4] = "Overflow", ++ [5] = "Bounds", ++ [6] = "Invalid opcode", ++ [7] = "FPU not available", ++ [8] = "Double fault", ++ [9] = "FPU segment overrun", ++ [10] = "Invalid TSS", ++ [11] = "Segment not present", ++ [12] = "Stack segment", ++ [13] = "General protection", ++ [14] = "Page fault", ++ [15] = "Spurious interrupt", ++ [16] = "FPU error", ++ [17] = "Alignment check", ++ [18] = "Machine check", ++ [19] = "SIMD error", ++ [20] = NULL, ++}; ++ ++struct cobalt_machine cobalt_machine = { ++ .name = "x86", ++ .init = mach_x86_init, ++ .late_init = NULL, ++ .cleanup = mach_x86_cleanup, ++ .calibrate = mach_x86_calibrate, ++ .prefault = NULL, ++ .fault_labels = fault_labels, ++}; +--- linux/arch/x86/xenomai/smi.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/arch/x86/xenomai/smi.c 2021-04-07 16:01:25.716636325 +0800 +@@ -0,0 +1,168 @@ ++/** ++ * SMI workaround for x86. ++ * ++ * Cut/Pasted from Vitor Angelo "smi" module. ++ * Adapted by Gilles Chanteperdrix . ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, ++ * USA; either version 2 of the License, or (at your option) any later ++ * version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#define DEVFN 0xf8 /* device 31, function 0 */ ++ ++#define PMBASE_B0 0x40 ++#define PMBASE_B1 0x41 ++ ++#define SMI_CTRL_ADDR 0x30 ++ ++static int smi_state; ++static char smi_state_arg[16] = "detect"; ++module_param_string(smi, smi_state_arg, sizeof(smi_state_arg), 0444); ++ ++static unsigned int smi_masked_bits = 1; /* Global disable bit */ ++module_param_named(smi_mask, smi_masked_bits, int, 0400); ++ ++static unsigned int smi_saved_bits; ++static unsigned short smi_en_addr; ++ ++#define mask_bits(v, p) outl(inl(p)&~(v),(p)) ++#define set_bits(v, p) outl(inl(p)|(v), (p)) ++ ++static int smi_reboot(struct notifier_block *nb, ulong event, void *buf); ++ ++static struct notifier_block smi_notifier = { ++ .notifier_call = smi_reboot ++}; ++ ++static int smi_reboot(struct notifier_block *nb, ulong event, void *buf) ++{ ++ if (((event == SYS_RESTART) || (event == SYS_HALT) || ++ (event == SYS_POWER_OFF)) && smi_en_addr) ++ set_bits(smi_saved_bits, smi_en_addr); ++ ++ return NOTIFY_DONE; ++} ++ ++void mach_x86_smi_disable(void) ++{ ++ if (smi_en_addr == 0) ++ return; ++ ++ smi_saved_bits = inl(smi_en_addr) & smi_masked_bits; ++ mask_bits(smi_masked_bits, smi_en_addr); ++ ++ if (inl(smi_en_addr) & smi_masked_bits) ++ printk(XENO_WARNING "SMI workaround failed!\n"); ++ else ++ printk(XENO_INFO "SMI workaround enabled\n"); ++ ++ register_reboot_notifier(&smi_notifier); ++} ++ ++void mach_x86_smi_restore(void) ++{ ++ if (smi_en_addr == 0) ++ return; ++ ++ printk(XENO_INFO "SMI configuration restored\n"); ++ ++ set_bits(smi_saved_bits, smi_en_addr); ++ ++ unregister_reboot_notifier(&smi_notifier); ++} ++ ++static unsigned short get_smi_en_addr(struct pci_dev *dev) ++{ ++ u_int8_t byte0, byte1; ++ ++ pci_read_config_byte(dev, PMBASE_B0, &byte0); ++ pci_read_config_byte(dev, PMBASE_B1, &byte1); ++ return SMI_CTRL_ADDR + (((byte1 << 1) | (byte0 >> 7)) << 7); // bits 7-15 ++} ++ ++ ++static const char *smi_state_labels[] = { ++ "disabled", ++ "detect", ++ "enabled", ++}; ++ ++static void setup_smi_state(void) ++{ ++ static char warn_bad_state[] = ++ XENO_WARNING "invalid SMI state '%s'\n"; ++ char *p; ++ int n; ++ ++ /* Backward compat with legacy state specifiers. */ ++ n = simple_strtol(smi_state_arg, &p, 10); ++ if (*p == '\0') { ++ smi_state = n; ++ return; ++ } ++ ++ for (n = 0; n < ARRAY_SIZE(smi_state_labels); n++) ++ if (strcmp(smi_state_labels[n], smi_state_arg) == 0) { ++ smi_state = n - 1; ++ return; ++ } ++ ++ printk(warn_bad_state, smi_state_arg); ++} ++ ++void mach_x86_smi_init(void) ++{ ++ struct pci_dev *dev = NULL; ++ ++ setup_smi_state(); ++ ++ if (smi_state < 0) ++ return; ++ ++ /* ++ * Do not use pci_register_driver, pci_enable_device, ... ++ * Just register the used ports. ++ */ ++ dev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL); ++ if (dev == NULL || dev->bus->number || ++ dev->devfn != DEVFN || dev->vendor != PCI_VENDOR_ID_INTEL) { ++ pci_dev_put(dev); ++ return; ++ } ++ ++ if (smi_state == 0) { ++ printk(XENO_WARNING "SMI-enabled chipset found, but SMI workaround disabled\n" ++ " (see xenomai.smi parameter). You might encounter\n" ++ " high latencies!\n"); ++ pci_dev_put(dev); ++ return; ++ } ++ ++ printk(XENO_INFO "SMI-enabled chipset found\n"); ++ smi_en_addr = get_smi_en_addr(dev); ++ ++ pci_dev_put(dev); ++} +--- linux/arch/x86/xenomai/Makefile 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/arch/x86/xenomai/Makefile 2021-04-07 16:01:25.711636332 +0800 +@@ -0,0 +1,5 @@ ++ ++obj-$(CONFIG_XENOMAI) += xenomai.o ++xenomai-y := machine.o thread.o smi.o c1e.o ++ ++ccflags-y := -Iarch/x86/xenomai/include -Iinclude/xenomai +--- linux/arch/x86/xenomai/Kconfig 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/arch/x86/xenomai/Kconfig 2021-04-07 16:01:25.707636338 +0800 +@@ -0,0 +1,8 @@ ++config XENO_ARCH_FPU ++ def_bool y ++ ++config XENO_ARCH_SYS3264 ++ def_bool IA32_EMULATION ++ ++source "kernel/xenomai/Kconfig" ++source "drivers/xenomai/Kconfig" +--- linux/arch/x86/xenomai/include/asm/xenomai/machine.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/arch/x86/xenomai/include/asm/xenomai/machine.h 2021-04-07 16:01:25.702636345 +0800 +@@ -0,0 +1,43 @@ ++/** ++ * Copyright (C) 2007-2012 Philippe Gerum . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#ifndef _COBALT_X86_ASM_MACHINE_H ++#define _COBALT_X86_ASM_MACHINE_H ++ ++#include ++ ++static inline __attribute_const__ unsigned long ffnz(unsigned long ul) ++{ ++#ifdef __i386__ ++ __asm__("bsfl %1, %0":"=r,r" (ul) : "r,?m" (ul)); ++#else ++ __asm__("bsfq %1, %0":"=r" (ul) : "rm" (ul)); ++#endif ++ return ul; ++} ++ ++#define XNARCH_HOST_TICK_IRQ __ipipe_hrtimer_irq ++ ++long strncpy_from_user_nocheck(char *dst, ++ const char __user *src, ++ long count); ++ ++/* Read this last to enable default settings. */ ++#include ++ ++#endif /* !_COBALT_X86_ASM_MACHINE_H */ +--- linux/arch/x86/xenomai/include/asm/xenomai/smi.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/arch/x86/xenomai/include/asm/xenomai/smi.h 2021-04-07 16:01:25.697636352 +0800 +@@ -0,0 +1,32 @@ ++/** ++ * Copyright © 2005 Gilles Chanteperdrix. ++ * ++ * SMI workaround for x86. ++ * ++ * Xenomai free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, ++ * USA; either version 2 of the License, or (at your option) any later ++ * version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#ifndef _COBALT_X86_ASM_SMI_H ++#define _COBALT_X86_ASM_SMI_H ++ ++#ifndef _COBALT_X86_ASM_MACHINE_H ++#error "please don't include asm/smi.h directly" ++#endif ++ ++void mach_x86_smi_disable(void); ++void mach_x86_smi_restore(void); ++void mach_x86_smi_init(void); ++ ++#endif /* !_COBALT_X86_ASM_SMI_64_H */ +--- linux/arch/x86/xenomai/include/asm/xenomai/syscall32.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/arch/x86/xenomai/include/asm/xenomai/syscall32.h 2021-04-07 16:01:25.693636358 +0800 +@@ -0,0 +1,187 @@ ++/* ++ * Copyright (C) 2014 Philippe Gerum . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#ifndef _COBALT_X86_ASM_SYSCALL32_H ++#define _COBALT_X86_ASM_SYSCALL32_H ++ ++#include ++ ++#ifdef CONFIG_X86_X32 ++ ++#define __COBALT_X32_BASE 128 ++ ++#define __COBALT_SYSNR32x(__reg) \ ++ ({ \ ++ long __nr = __reg; \ ++ if (__nr & __X32_SYSCALL_BIT) { \ ++ __nr &= ~__X32_SYSCALL_BIT; \ ++ __nr += __COBALT_X32_BASE; \ ++ } \ ++ __nr; \ ++ }) ++ ++#define __COBALT_COMPAT32x(__reg) \ ++ (((__reg) & __X32_SYSCALL_BIT) ? __COBALT_COMPATX_BIT : 0) ++ ++#if __NR_COBALT_SYSCALLS > __COBALT_X32_BASE ++#error "__NR_COBALT_SYSCALLS > __COBALT_X32_BASE" ++#endif ++ ++#define __syshand32x__(__name) ((cobalt_syshand)(CoBaLt32x_ ## __name)) ++ ++#define __COBALT_CALL32x_INITHAND(__handler) \ ++ [__COBALT_X32_BASE ... __COBALT_X32_BASE + __NR_COBALT_SYSCALLS-1] = __handler, ++ ++#define __COBALT_CALL32x_INITMODE(__mode) \ ++ [__COBALT_X32_BASE ... __COBALT_X32_BASE + __NR_COBALT_SYSCALLS-1] = __mode, ++ ++/* x32 default entry (no thunk) */ ++#define __COBALT_CALL32x_ENTRY(__name, __handler) \ ++ [sc_cobalt_ ## __name + __COBALT_X32_BASE] = __handler, ++ ++/* x32 thunk installation */ ++#define __COBALT_CALL32x_pure_THUNK(__name) \ ++ __COBALT_CALL32x_ENTRY(__name, __syshand32x__(__name)) ++ ++#define __COBALT_CALL32x_THUNK(__name) \ ++ __COBALT_CALL32x_ENTRY(__name, __syshand32emu__(__name)) ++ ++/* x32 thunk implementation. */ ++#define COBALT_SYSCALL32x(__name, __mode, __args) \ ++ long CoBaLt32x_ ## __name __args ++ ++/* x32 thunk declaration. */ ++#define COBALT_SYSCALL32x_DECL(__name, __args) \ ++ long CoBaLt32x_ ## __name __args ++ ++#else /* !CONFIG_X86_X32 */ ++ ++/* x32 support disabled. */ ++ ++#define __COBALT_SYSNR32x(__reg) (__reg) ++ ++#define __COBALT_COMPAT32x(__reg) 0 ++ ++#define __COBALT_CALL32x_INITHAND(__handler) ++ ++#define __COBALT_CALL32x_INITMODE(__mode) ++ ++#define __COBALT_CALL32x_ENTRY(__name, __handler) ++ ++#define __COBALT_CALL32x_pure_THUNK(__name) ++ ++#define __COBALT_CALL32x_THUNK(__name) ++ ++#define COBALT_SYSCALL32x_DECL(__name, __args) ++ ++#endif /* !CONFIG_X86_X32 */ ++ ++#ifdef CONFIG_IA32_EMULATION ++ ++#define __COBALT_IA32_BASE 256 /* Power of two. */ ++ ++#define __COBALT_SYSNR32emu(__reg) \ ++ ({ \ ++ long __nr = __reg; \ ++ if (in_ia32_syscall()) \ ++ __nr += __COBALT_IA32_BASE; \ ++ __nr; \ ++ }) ++ ++#define __COBALT_COMPAT32emu(__reg) \ ++ (in_ia32_syscall() ? __COBALT_COMPAT_BIT : 0) ++ ++#if __NR_COBALT_SYSCALLS > __COBALT_IA32_BASE ++#error "__NR_COBALT_SYSCALLS > __COBALT_IA32_BASE" ++#endif ++ ++#define __syshand32emu__(__name) ((cobalt_syshand)(CoBaLt32emu_ ## __name)) ++ ++#define __COBALT_CALL32emu_INITHAND(__handler) \ ++ [__COBALT_IA32_BASE ... __COBALT_IA32_BASE + __NR_COBALT_SYSCALLS-1] = __handler, ++ ++#define __COBALT_CALL32emu_INITMODE(__mode) \ ++ [__COBALT_IA32_BASE ... __COBALT_IA32_BASE + __NR_COBALT_SYSCALLS-1] = __mode, ++ ++/* ia32 default entry (no thunk) */ ++#define __COBALT_CALL32emu_ENTRY(__name, __handler) \ ++ [sc_cobalt_ ## __name + __COBALT_IA32_BASE] = __handler, ++ ++/* ia32 thunk installation */ ++#define __COBALT_CALL32emu_THUNK(__name) \ ++ __COBALT_CALL32emu_ENTRY(__name, __syshand32emu__(__name)) ++ ++/* ia32 thunk implementation. */ ++#define COBALT_SYSCALL32emu(__name, __mode, __args) \ ++ long CoBaLt32emu_ ## __name __args ++ ++/* ia32 thunk declaration. */ ++#define COBALT_SYSCALL32emu_DECL(__name, __args) \ ++ long CoBaLt32emu_ ## __name __args ++ ++#else /* !CONFIG_IA32_EMULATION */ ++ ++/* ia32 emulation support disabled. */ ++ ++#define __COBALT_SYSNR32emu(__reg) (__reg) ++ ++#define __COBALT_COMPAT32emu(__reg) 0 ++ ++#define __COBALT_CALL32emu_INITHAND(__handler) ++ ++#define __COBALT_CALL32emu_INITMODE(__mode) ++ ++#define __COBALT_CALL32emu_ENTRY(__name, __handler) ++ ++#define __COBALT_CALL32emu_THUNK(__name) ++ ++#define COBALT_SYSCALL32emu_DECL(__name, __args) ++ ++#endif /* !CONFIG_IA32_EMULATION */ ++ ++#define __COBALT_CALL32_ENTRY(__name, __handler) \ ++ __COBALT_CALL32x_ENTRY(__name, __handler) \ ++ __COBALT_CALL32emu_ENTRY(__name, __handler) ++ ++#define __COBALT_CALL32_INITHAND(__handler) \ ++ __COBALT_CALL32x_INITHAND(__handler) \ ++ __COBALT_CALL32emu_INITHAND(__handler) ++ ++#define __COBALT_CALL32_INITMODE(__mode) \ ++ __COBALT_CALL32x_INITMODE(__mode) \ ++ __COBALT_CALL32emu_INITMODE(__mode) ++ ++/* Already checked for __COBALT_SYSCALL_BIT */ ++#define __COBALT_CALL32_SYSNR(__reg) \ ++ ({ \ ++ long __nr; \ ++ __nr = __COBALT_SYSNR32x(__reg); \ ++ if (__nr == (__reg)) \ ++ __nr = __COBALT_SYSNR32emu(__reg); \ ++ __nr; \ ++ }) ++ ++#define __COBALT_CALL_COMPAT(__reg) \ ++ ({ \ ++ int __ret = __COBALT_COMPAT32x(__reg); \ ++ if (__ret == 0) \ ++ __ret = __COBALT_COMPAT32emu(__reg); \ ++ __ret; \ ++ }) ++ ++#endif /* !_COBALT_X86_ASM_SYSCALL32_H */ +--- linux/arch/x86/xenomai/include/asm/xenomai/calibration.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/arch/x86/xenomai/include/asm/xenomai/calibration.h 2021-04-07 16:01:25.688636365 +0800 +@@ -0,0 +1,70 @@ ++/* ++ * Copyright (C) 2001,2002,2003,2004,2005 Philippe Gerum . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#ifndef _COBALT_X86_ASM_CALIBRATION_H ++#define _COBALT_X86_ASM_CALIBRATION_H ++ ++#include ++ ++static inline unsigned long __get_bogomips(void) ++{ ++ return this_cpu_read(cpu_info.loops_per_jiffy)/(500000/HZ); ++} ++ ++static inline void xnarch_get_latencies(struct xnclock_gravity *p) ++{ ++ unsigned long sched_latency; ++ ++#if CONFIG_XENO_OPT_TIMING_SCHEDLAT != 0 ++ sched_latency = CONFIG_XENO_OPT_TIMING_SCHEDLAT; ++#else /* !CONFIG_XENO_OPT_TIMING_SCHEDLAT */ ++ ++ if (strcmp(ipipe_timer_name(), "lapic") == 0) { ++#ifdef CONFIG_SMP ++ if (num_online_cpus() > 1) ++ sched_latency = 3350; ++ else ++ sched_latency = 2000; ++#else /* !SMP */ ++ sched_latency = 1000; ++#endif /* !SMP */ ++ } else if (strcmp(ipipe_timer_name(), "pit")) { /* HPET */ ++#ifdef CONFIG_SMP ++ if (num_online_cpus() > 1) ++ sched_latency = 3350; ++ else ++ sched_latency = 1500; ++#else /* !SMP */ ++ sched_latency = 1000; ++#endif /* !SMP */ ++ } else { ++ sched_latency = (__get_bogomips() < 250 ? 17000 : ++ __get_bogomips() < 2500 ? 4200 : ++ 3500); ++#ifdef CONFIG_SMP ++ sched_latency += 1000; ++#endif /* CONFIG_SMP */ ++ } ++#endif /* !CONFIG_XENO_OPT_TIMING_SCHEDLAT */ ++ ++ p->user = sched_latency; ++ p->kernel = CONFIG_XENO_OPT_TIMING_KSCHEDLAT; ++ p->irq = CONFIG_XENO_OPT_TIMING_IRQLAT; ++} ++ ++#endif /* !_COBALT_X86_ASM_CALIBRATION_H */ +--- linux/arch/x86/xenomai/include/asm/xenomai/c1e.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/arch/x86/xenomai/include/asm/xenomai/c1e.h 2021-04-07 16:01:25.683636372 +0800 +@@ -0,0 +1,23 @@ ++/* ++ * Copyright (C) 2014 Gilles Chanteperdrix . ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#ifndef C1E_H ++#define C1E_H ++ ++void mach_x86_c1e_disable(void); ++ ++#endif /* C1E_H */ +--- linux/arch/x86/xenomai/include/asm/xenomai/syscall.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/arch/x86/xenomai/include/asm/xenomai/syscall.h 2021-04-07 16:01:25.679636378 +0800 +@@ -0,0 +1,91 @@ ++/* ++ * Copyright (C) 2001-2014 Philippe Gerum . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#ifndef _COBALT_X86_ASM_SYSCALL_H ++#define _COBALT_X86_ASM_SYSCALL_H ++ ++#include ++#include ++#include ++ ++/* ++ * Cobalt and Linux syscall numbers can be fetched from ORIG_AX, ++ * masking out the __COBALT_SYSCALL_BIT marker. Make sure to offset ++ * the number by __COBALT_X32_BASE for Cobalt 32-bit compat syscalls ++ * only. ++ */ ++#define __xn_reg_sys(regs) ((regs)->orig_ax) ++#define __xn_reg_rval(regs) ((regs)->ax) ++#ifdef __i386__ ++#define __xn_reg_arg1(regs) ((regs)->bx) ++#define __xn_reg_arg2(regs) ((regs)->cx) ++#define __xn_reg_arg3(regs) ((regs)->dx) ++#define __xn_reg_arg4(regs) ((regs)->si) ++#define __xn_reg_arg5(regs) ((regs)->di) ++#else /* x86_64 */ ++#define __xn_reg_arg1(regs) ((regs)->di) ++#define __xn_reg_arg2(regs) ((regs)->si) ++#define __xn_reg_arg3(regs) ((regs)->dx) ++#define __xn_reg_arg4(regs) ((regs)->r10) ++#define __xn_reg_arg5(regs) ((regs)->r8) ++#endif /* x86_64 */ ++#define __xn_reg_pc(regs) ((regs)->ip) ++#define __xn_reg_sp(regs) ((regs)->sp) ++ ++#define __xn_syscall_p(regs) (__xn_reg_sys(regs) & __COBALT_SYSCALL_BIT) ++#ifdef CONFIG_XENO_ARCH_SYS3264 ++#define __xn_syscall(regs) __COBALT_CALL32_SYSNR(__xn_reg_sys(regs) \ ++ & ~__COBALT_SYSCALL_BIT) ++#else ++#define __xn_syscall(regs) (__xn_reg_sys(regs) & ~__COBALT_SYSCALL_BIT) ++#endif ++ ++/* ++ * Root syscall number with predicate (valid only if ++ * !__xn_syscall_p(__regs)). ++ */ ++#define __xn_rootcall_p(__regs, __code) \ ++ ({ \ ++ *(__code) = __xn_reg_sys(__regs); \ ++ *(__code) < ipipe_root_nr_syscalls(current_thread_info()); \ ++ }) ++ ++static inline void __xn_error_return(struct pt_regs *regs, int v) ++{ ++ __xn_reg_rval(regs) = v; ++} ++ ++static inline void __xn_status_return(struct pt_regs *regs, long v) ++{ ++ __xn_reg_rval(regs) = v; ++} ++ ++static inline int __xn_interrupted_p(struct pt_regs *regs) ++{ ++ return __xn_reg_rval(regs) == -EINTR; ++} ++ ++static inline ++int xnarch_local_syscall(unsigned long a1, unsigned long a2, ++ unsigned long a3, unsigned long a4, ++ unsigned long a5) ++{ ++ return -ENOSYS; ++} ++ ++#endif /* !_COBALT_X86_ASM_SYSCALL_H */ +--- linux/arch/x86/xenomai/include/asm/xenomai/uapi/syscall.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/arch/x86/xenomai/include/asm/xenomai/uapi/syscall.h 2021-04-07 16:01:25.674636385 +0800 +@@ -0,0 +1,29 @@ ++/* ++ * Copyright (C) 2001-2014 Philippe Gerum . ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. ++ */ ++#ifndef _COBALT_X86_ASM_UAPI_SYSCALL_H ++#define _COBALT_X86_ASM_UAPI_SYSCALL_H ++ ++#ifdef __ILP32__ ++#define __xn_syscall_base __COBALT_X32_BASE ++#else ++#define __xn_syscall_base 0 ++#endif ++ ++#define __xn_syscode(__nr) (__COBALT_SYSCALL_BIT | (__nr + __xn_syscall_base)) ++ ++#endif /* !_COBALT_X86_ASM_UAPI_SYSCALL_H */ +--- linux/arch/x86/xenomai/include/asm/xenomai/uapi/fptest.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/arch/x86/xenomai/include/asm/xenomai/uapi/fptest.h 2021-04-07 16:01:25.669636392 +0800 +@@ -0,0 +1,132 @@ ++/* ++ * Copyright (C) 2006 Gilles Chanteperdrix . ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. ++ */ ++#ifndef _COBALT_X86_ASM_UAPI_FPTEST_H ++#define _COBALT_X86_ASM_UAPI_FPTEST_H ++ ++#define __COBALT_HAVE_SSE2 0x1 ++#define __COBALT_HAVE_AVX 0x2 ++ ++static inline void fp_regs_set(int features, unsigned int val) ++{ ++ unsigned long long vec[4] = { val, 0, val, 0 }; ++ unsigned i; ++ ++ for (i = 0; i < 8; i++) ++ __asm__ __volatile__("fildl %0": /* no output */ :"m"(val)); ++ ++ if (features & __COBALT_HAVE_AVX) { ++ __asm__ __volatile__( ++ "vmovupd %0,%%ymm0;" ++ "vmovupd %0,%%ymm1;" ++ "vmovupd %0,%%ymm2;" ++ "vmovupd %0,%%ymm3;" ++ "vmovupd %0,%%ymm4;" ++ "vmovupd %0,%%ymm5;" ++ "vmovupd %0,%%ymm6;" ++ "vmovupd %0,%%ymm7;" ++ : : "m"(vec[0]), "m"(vec[1]), "m"(vec[2]), "m"(vec[3])); ++ } else if (features & __COBALT_HAVE_SSE2) { ++ __asm__ __volatile__( ++ "movupd %0,%%xmm0;" ++ "movupd %0,%%xmm1;" ++ "movupd %0,%%xmm2;" ++ "movupd %0,%%xmm3;" ++ "movupd %0,%%xmm4;" ++ "movupd %0,%%xmm5;" ++ "movupd %0,%%xmm6;" ++ "movupd %0,%%xmm7;" ++ : : "m"(vec[0]), "m"(vec[1]), "m"(vec[2]), "m"(vec[3])); ++ } ++} ++ ++static inline unsigned int fp_regs_check(int features, unsigned int val, ++ int (*report)(const char *fmt, ...)) ++{ ++ unsigned long long vec[8][4]; ++ unsigned int i, result = val; ++ unsigned e[8]; ++ ++ for (i = 0; i < 8; i++) ++ __asm__ __volatile__("fistpl %0":"=m"(e[7 - i])); ++ ++ if (features & __COBALT_HAVE_AVX) { ++ __asm__ __volatile__( ++ "vmovupd %%ymm0,%0;" ++ "vmovupd %%ymm1,%1;" ++ "vmovupd %%ymm2,%2;" ++ "vmovupd %%ymm3,%3;" ++ "vmovupd %%ymm4,%4;" ++ "vmovupd %%ymm5,%5;" ++ "vmovupd %%ymm6,%6;" ++ "vmovupd %%ymm7,%7;" ++ : "=m" (vec[0][0]), "=m" (vec[1][0]), ++ "=m" (vec[2][0]), "=m" (vec[3][0]), ++ "=m" (vec[4][0]), "=m" (vec[5][0]), ++ "=m" (vec[6][0]), "=m" (vec[7][0])); ++ } else if (features & __COBALT_HAVE_SSE2) { ++ __asm__ __volatile__( ++ "movupd %%xmm0,%0;" ++ "movupd %%xmm1,%1;" ++ "movupd %%xmm2,%2;" ++ "movupd %%xmm3,%3;" ++ "movupd %%xmm4,%4;" ++ "movupd %%xmm5,%5;" ++ "movupd %%xmm6,%6;" ++ "movupd %%xmm7,%7;" ++ : "=m" (vec[0][0]), "=m" (vec[1][0]), ++ "=m" (vec[2][0]), "=m" (vec[3][0]), ++ "=m" (vec[4][0]), "=m" (vec[5][0]), ++ "=m" (vec[6][0]), "=m" (vec[7][0])); ++ } ++ ++ for (i = 0; i < 8; i++) ++ if (e[i] != val) { ++ report("r%d: %u != %u\n", i, e[i], val); ++ result = e[i]; ++ } ++ ++ if (features & __COBALT_HAVE_AVX) { ++ for (i = 0; i < 8; i++) { ++ int error = 0; ++ if (vec[i][0] != val) { ++ result = vec[i][0]; ++ error = 1; ++ } ++ if (vec[i][2] != val) { ++ result = vec[i][2]; ++ error = 1; ++ } ++ if (error) ++ report("ymm%d: %llu/%llu != %u/%u\n", ++ i, (unsigned long long)vec[i][0], ++ (unsigned long long)vec[i][2], ++ val, val); ++ } ++ } else if (features & __COBALT_HAVE_SSE2) { ++ for (i = 0; i < 8; i++) ++ if (vec[i][0] != val) { ++ report("xmm%d: %llu != %u\n", ++ i, (unsigned long long)vec[i][0], val); ++ result = vec[i][0]; ++ } ++ } ++ ++ return result; ++} ++ ++#endif /* _COBALT_X86_ASM_UAPI_FPTEST_H */ +--- linux/arch/x86/xenomai/include/asm/xenomai/uapi/features.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/arch/x86/xenomai/include/asm/xenomai/uapi/features.h 2021-04-07 16:01:25.665636398 +0800 +@@ -0,0 +1,40 @@ ++/* ++ * Copyright (C) 2005-2013 Philippe Gerum . ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. ++ */ ++#ifndef _COBALT_X86_ASM_UAPI_FEATURES_H ++#define _COBALT_X86_ASM_UAPI_FEATURES_H ++ ++/* The ABI revision level we use on this arch. */ ++#define XENOMAI_ABI_REV 17UL ++ ++#define XENOMAI_FEAT_DEP __xn_feat_generic_mask ++ ++#define XENOMAI_FEAT_MAN __xn_feat_generic_man_mask ++ ++#define XNARCH_HAVE_LLMULSHFT 1 ++#define XNARCH_HAVE_NODIV_LLIMD 1 ++ ++struct cobalt_featinfo_archdep { /* no arch-specific feature */ }; ++ ++#include ++ ++static inline const char *get_feature_label(unsigned int feature) ++{ ++ return get_generic_feature_label(feature); ++} ++ ++#endif /* !_COBALT_X86_ASM_UAPI_FEATURES_H */ +--- linux/arch/x86/xenomai/include/asm/xenomai/uapi/arith.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/arch/x86/xenomai/include/asm/xenomai/uapi/arith.h 2021-04-07 16:01:25.660636405 +0800 +@@ -0,0 +1,243 @@ ++/** ++ * Arithmetic/conversion routines for x86. ++ * ++ * Copyright © 2005 Gilles Chanteperdrix, 32bit version. ++ * Copyright © 2007 Jan Kiszka, 64bit version. ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. ++ */ ++#ifndef _COBALT_X86_ASM_UAPI_ARITH_H ++#define _COBALT_X86_ASM_UAPI_ARITH_H ++#define _COBALT_X86_ASM_UAPI_ARITH_H ++ ++#include ++ ++#ifdef __i386__ ++ ++#define xnarch_u64tou32(ull, h, l) ({ \ ++ unsigned long long _ull = (ull); \ ++ (l) = _ull & 0xffffffff; \ ++ (h) = _ull >> 32; \ ++}) ++ ++#define xnarch_u64fromu32(h, l) ({ \ ++ unsigned long long _ull; \ ++ asm ( "": "=A"(_ull) : "d"(h), "a"(l)); \ ++ _ull; \ ++}) ++ ++/* const helper for xnarch_uldivrem, so that the compiler will eliminate ++ multiple calls with same arguments, at no additionnal cost. */ ++static inline __attribute__((__const__)) unsigned long long ++__mach_x86_32_uldivrem(const unsigned long long ull, const unsigned long d) ++{ ++ unsigned long long ret; ++ __asm__ ("divl %1" : "=A,A"(ret) : "r,?m"(d), "A,A"(ull)); ++ /* Exception if quotient does not fit on unsigned long. */ ++ return ret; ++} ++ ++/* Fast long long division: when the quotient and remainder fit on 32 bits. */ ++static inline unsigned long mach_x86_32_uldivrem(unsigned long long ull, ++ const unsigned d, ++ unsigned long *const rp) ++{ ++ unsigned long q, r; ++ ull = __mach_x86_32_uldivrem(ull, d); ++ __asm__ ( "": "=d"(r), "=a"(q) : "A"(ull)); ++ if(rp) ++ *rp = r; ++ return q; ++} ++#define xnarch_uldivrem(ull, d, rp) mach_x86_32_uldivrem((ull),(d),(rp)) ++ ++/* Division of an unsigned 96 bits ((h << 32) + l) by an unsigned 32 bits. ++ Building block for ulldiv. */ ++static inline unsigned long long mach_x86_32_div96by32(const unsigned long long h, ++ const unsigned long l, ++ const unsigned long d, ++ unsigned long *const rp) ++{ ++ unsigned long rh; ++ const unsigned long qh = xnarch_uldivrem(h, d, &rh); ++ const unsigned long long t = xnarch_u64fromu32(rh, l); ++ const unsigned long ql = xnarch_uldivrem(t, d, rp); ++ ++ return xnarch_u64fromu32(qh, ql); ++} ++ ++/* Slow long long division. Uses xnarch_uldivrem, hence has the same property: ++ the compiler removes redundant calls. */ ++static inline unsigned long long ++mach_x86_32_ulldiv(const unsigned long long ull, ++ const unsigned d, ++ unsigned long *const rp) ++{ ++ unsigned long h, l; ++ xnarch_u64tou32(ull, h, l); ++ return mach_x86_32_div96by32(h, l, d, rp); ++} ++#define xnarch_ulldiv(ull,d,rp) mach_x86_32_ulldiv((ull),(d),(rp)) ++ ++/* Fast scaled-math-based replacement for long long multiply-divide */ ++#define xnarch_llmulshft(ll, m, s) \ ++({ \ ++ long long __ret; \ ++ unsigned __lo, __hi; \ ++ \ ++ __asm__ ( \ ++ /* HI = HIWORD(ll) * m */ \ ++ "mov %%eax,%%ecx\n\t" \ ++ "mov %%edx,%%eax\n\t" \ ++ "imull %[__m]\n\t" \ ++ "mov %%eax,%[__lo]\n\t" \ ++ "mov %%edx,%[__hi]\n\t" \ ++ \ ++ /* LO = LOWORD(ll) * m */ \ ++ "mov %%ecx,%%eax\n\t" \ ++ "mull %[__m]\n\t" \ ++ \ ++ /* ret = (HI << 32) + LO */ \ ++ "add %[__lo],%%edx\n\t" \ ++ "adc $0,%[__hi]\n\t" \ ++ \ ++ /* ret = ret >> s */ \ ++ "mov %[__s],%%ecx\n\t" \ ++ "shrd %%cl,%%edx,%%eax\n\t" \ ++ "shrd %%cl,%[__hi],%%edx\n\t" \ ++ : "=A" (__ret), [__lo] "=&r" (__lo), [__hi] "=&r" (__hi) \ ++ : "A" (ll), [__m] "m" (m), [__s] "m" (s) \ ++ : "ecx"); \ ++ __ret; \ ++}) ++ ++static inline __attribute__((const)) unsigned long long ++mach_x86_32_nodiv_ullimd(const unsigned long long op, ++ const unsigned long long frac, ++ unsigned rhs_integ) ++{ ++ register unsigned rl __asm__("ecx"); ++ register unsigned rm __asm__("esi"); ++ register unsigned rh __asm__("edi"); ++ unsigned fracl, frach, opl, oph; ++ volatile unsigned integ = rhs_integ; ++ register unsigned long long t; ++ ++ xnarch_u64tou32(op, oph, opl); ++ xnarch_u64tou32(frac, frach, fracl); ++ ++ __asm__ ("mov %[oph], %%eax\n\t" ++ "mull %[frach]\n\t" ++ "mov %%eax, %[rm]\n\t" ++ "mov %%edx, %[rh]\n\t" ++ "mov %[opl], %%eax\n\t" ++ "mull %[fracl]\n\t" ++ "mov %%edx, %[rl]\n\t" ++ "shl $1, %%eax\n\t" ++ "adc $0, %[rl]\n\t" ++ "adc $0, %[rm]\n\t" ++ "adc $0, %[rh]\n\t" ++ "mov %[oph], %%eax\n\t" ++ "mull %[fracl]\n\t" ++ "add %%eax, %[rl]\n\t" ++ "adc %%edx, %[rm]\n\t" ++ "adc $0, %[rh]\n\t" ++ "mov %[opl], %%eax\n\t" ++ "mull %[frach]\n\t" ++ "add %%eax, %[rl]\n\t" ++ "adc %%edx, %[rm]\n\t" ++ "adc $0, %[rh]\n\t" ++ "mov %[opl], %%eax\n\t" ++ "mull %[integ]\n\t" ++ "add %[rm], %%eax\n\t" ++ "adc %%edx, %[rh]\n\t" ++ "mov %[oph], %%edx\n\t" ++ "imul %[integ], %%edx\n\t" ++ "add %[rh], %%edx\n\t" ++ : [rl]"=&c"(rl), [rm]"=&S"(rm), [rh]"=&D"(rh), "=&A"(t) ++ : [opl]"m"(opl), [oph]"m"(oph), ++ [fracl]"m"(fracl), [frach]"m"(frach), [integ]"m"(integ) ++ : "cc"); ++ ++ return t; ++} ++ ++#define xnarch_nodiv_ullimd(op, frac, integ) \ ++ mach_x86_32_nodiv_ullimd((op), (frac), (integ)) ++ ++#else /* x86_64 */ ++ ++static inline __attribute__((__const__)) long long ++mach_x86_64_llimd (long long op, unsigned m, unsigned d) ++{ ++ long long result; ++ ++ __asm__ ( ++ "imul %[m]\n\t" ++ "idiv %[d]\n\t" ++ : "=a" (result) ++ : "a" (op), [m] "r" ((unsigned long long)m), ++ [d] "r" ((unsigned long long)d) ++ : "rdx"); ++ ++ return result; ++} ++#define xnarch_llimd(ll,m,d) mach_x86_64_llimd((ll),(m),(d)) ++ ++static inline __attribute__((__const__)) long long ++mach_x86_64_llmulshft(long long op, unsigned m, unsigned s) ++{ ++ long long result; ++ ++ __asm__ ( ++ "imulq %[m]\n\t" ++ "shrd %%cl,%%rdx,%%rax\n\t" ++ : "=a,a" (result) ++ : "a,a" (op), [m] "m,r" ((unsigned long long)m), ++ "c,c" (s) ++ : "rdx"); ++ ++ return result; ++} ++#define xnarch_llmulshft(op, m, s) mach_x86_64_llmulshft((op), (m), (s)) ++ ++static inline __attribute__((__const__)) unsigned long long ++mach_x86_64_nodiv_ullimd(unsigned long long op, ++ unsigned long long frac, unsigned rhs_integ) ++{ ++ register unsigned long long rl __asm__("rax") = frac; ++ register unsigned long long rh __asm__("rdx"); ++ register unsigned long long integ __asm__("rsi") = rhs_integ; ++ register unsigned long long t __asm__("r8") = 0x80000000ULL; ++ ++ __asm__ ("mulq %[op]\n\t" ++ "addq %[t], %[rl]\n\t" ++ "adcq $0, %[rh]\n\t" ++ "imulq %[op], %[integ]\n\t" ++ "leaq (%[integ], %[rh], 1),%[rl]": ++ [rh]"=&d"(rh), [rl]"+&a"(rl), [integ]"+S"(integ): ++ [op]"D"(op), [t]"r"(t): "cc"); ++ ++ return rl; ++} ++ ++#define xnarch_nodiv_ullimd(op, frac, integ) \ ++ mach_x86_64_nodiv_ullimd((op), (frac), (integ)) ++ ++#endif /* x86_64 */ ++ ++#include ++ ++#endif /* _COBALT_X86_ASM_UAPI_ARITH_H */ +--- linux/arch/x86/xenomai/include/asm/xenomai/syscall32-table.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/arch/x86/xenomai/include/asm/xenomai/syscall32-table.h 2021-04-07 16:01:25.655636412 +0800 +@@ -0,0 +1,83 @@ ++/* ++ * Copyright (C) 2014 Philippe Gerum . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#ifndef _COBALT_X86_ASM_SYSCALL32_TABLE_H ++#define _COBALT_X86_ASM_SYSCALL32_TABLE_H ++ ++/* ++ * CAUTION: This file is read verbatim into the main syscall ++ * table. Only preprocessor stuff and syscall entries here. ++ */ ++ ++__COBALT_CALL32emu_THUNK(thread_create) ++__COBALT_CALL32emu_THUNK(thread_setschedparam_ex) ++__COBALT_CALL32emu_THUNK(thread_getschedparam_ex) ++__COBALT_CALL32emu_THUNK(thread_setschedprio) ++__COBALT_CALL32emu_THUNK(sem_open) ++__COBALT_CALL32x_THUNK(sem_open) ++__COBALT_CALL32emu_THUNK(sem_timedwait) ++__COBALT_CALL32emu_THUNK(clock_getres) ++__COBALT_CALL32emu_THUNK(clock_gettime) ++__COBALT_CALL32emu_THUNK(clock_settime) ++__COBALT_CALL32emu_THUNK(clock_nanosleep) ++__COBALT_CALL32emu_THUNK(mutex_timedlock) ++__COBALT_CALL32emu_THUNK(cond_wait_prologue) ++__COBALT_CALL32emu_THUNK(mq_open) ++__COBALT_CALL32x_THUNK(mq_open) ++__COBALT_CALL32emu_THUNK(mq_getattr) ++__COBALT_CALL32x_THUNK(mq_getattr) ++__COBALT_CALL32emu_THUNK(mq_timedsend) ++__COBALT_CALL32emu_THUNK(mq_timedreceive) ++__COBALT_CALL32x_pure_THUNK(mq_timedreceive) ++__COBALT_CALL32emu_THUNK(mq_notify) ++__COBALT_CALL32x_THUNK(mq_notify) ++__COBALT_CALL32emu_THUNK(sched_weightprio) ++__COBALT_CALL32emu_THUNK(sched_setconfig_np) ++__COBALT_CALL32emu_THUNK(sched_getconfig_np) ++__COBALT_CALL32emu_THUNK(sched_setscheduler_ex) ++__COBALT_CALL32emu_THUNK(sched_getscheduler_ex) ++__COBALT_CALL32emu_THUNK(timer_create) ++__COBALT_CALL32x_THUNK(timer_create) ++__COBALT_CALL32emu_THUNK(timer_settime) ++__COBALT_CALL32emu_THUNK(timer_gettime) ++__COBALT_CALL32emu_THUNK(timerfd_settime) ++__COBALT_CALL32emu_THUNK(timerfd_gettime) ++__COBALT_CALL32emu_THUNK(sigwait) ++__COBALT_CALL32x_THUNK(sigwait) ++__COBALT_CALL32emu_THUNK(sigtimedwait) ++__COBALT_CALL32x_THUNK(sigtimedwait) ++__COBALT_CALL32emu_THUNK(sigwaitinfo) ++__COBALT_CALL32x_THUNK(sigwaitinfo) ++__COBALT_CALL32emu_THUNK(sigpending) ++__COBALT_CALL32x_THUNK(sigpending) ++__COBALT_CALL32emu_THUNK(sigqueue) ++__COBALT_CALL32x_THUNK(sigqueue) ++__COBALT_CALL32emu_THUNK(monitor_wait) ++__COBALT_CALL32emu_THUNK(event_wait) ++__COBALT_CALL32emu_THUNK(select) ++__COBALT_CALL32x_THUNK(select) ++__COBALT_CALL32emu_THUNK(recvmsg) ++__COBALT_CALL32x_THUNK(recvmsg) ++__COBALT_CALL32emu_THUNK(sendmsg) ++__COBALT_CALL32x_THUNK(sendmsg) ++__COBALT_CALL32emu_THUNK(mmap) ++__COBALT_CALL32x_THUNK(mmap) ++__COBALT_CALL32emu_THUNK(backtrace) ++__COBALT_CALL32x_THUNK(backtrace) ++ ++#endif /* !_COBALT_X86_ASM_SYSCALL32_TABLE_H */ +--- linux/arch/x86/xenomai/include/asm/xenomai/wrappers.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/arch/x86/xenomai/include/asm/xenomai/wrappers.h 2021-04-07 16:01:25.651636418 +0800 +@@ -0,0 +1,64 @@ ++/* ++ * Copyright (C) 2005 Philippe Gerum . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#ifndef _COBALT_X86_ASM_WRAPPERS_H ++#define _COBALT_X86_ASM_WRAPPERS_H ++ ++#include /* Read the generic portion. */ ++ ++#define __get_user_inatomic __get_user ++#define __put_user_inatomic __put_user ++ ++#if LINUX_VERSION_CODE > KERNEL_VERSION(4,9,108) && \ ++ LINUX_VERSION_CODE < KERNEL_VERSION(4,10,0) ++#define IPIPE_X86_FPU_EAGER ++#endif ++#if LINUX_VERSION_CODE > KERNEL_VERSION(4,4,137) && \ ++ LINUX_VERSION_CODE < KERNEL_VERSION(4,5,0) ++#define IPIPE_X86_FPU_EAGER ++#endif ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,14,0) ++#define IPIPE_X86_FPU_EAGER ++#endif ++ ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4,2,0) ++#include ++#include ++#else ++#include ++#endif ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4,0,0) ++ ++static inline void kernel_fpu_disable(void) ++{ ++ __thread_clear_has_fpu(current); ++} ++ ++static inline void kernel_fpu_enable(void) ++{ ++} ++ ++static inline bool kernel_fpu_disabled(void) ++{ ++ return __thread_has_fpu(current) == 0 && (read_cr0() & X86_CR0_TS) == 0; ++} ++#endif /* linux < 4.1.0 */ ++ ++#endif /* _COBALT_X86_ASM_WRAPPERS_H */ +--- linux/arch/x86/xenomai/include/asm/xenomai/fptest.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/arch/x86/xenomai/include/asm/xenomai/fptest.h 2021-04-07 16:01:25.646636425 +0800 +@@ -0,0 +1,83 @@ ++/* ++ * Copyright (C) 2006 Gilles Chanteperdrix . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#ifndef _COBALT_X86_ASM_FPTEST_H ++#define _COBALT_X86_ASM_FPTEST_H ++ ++#include ++#include ++#include ++#include ++ ++static inline int fp_kernel_supported(void) ++{ ++ return 1; ++} ++ ++static inline int fp_linux_begin(void) ++{ ++#if defined(CONFIG_X86_USE_3DNOW) \ ++ || defined(CONFIG_MD_RAID456) || defined(CONFIG_MD_RAID456_MODULE) ++ /* Ther kernel uses x86 FPU, we can not also use it in our tests. */ ++ static int once = 0; ++ if (!once) { ++ once = 1; ++ printk("%s:%d: Warning: Linux is compiled to use FPU in " ++ "kernel-space.\nFor this reason, switchtest can not " ++ "test using FPU in Linux kernel-space.\n", ++ __FILE__, __LINE__); ++ } ++ return -EBUSY; ++#endif /* 3DNow or RAID 456 */ ++ kernel_fpu_begin(); ++ /* kernel_fpu_begin() does no re-initialize the fpu context, but ++ fp_regs_set() implicitely expects an initialized fpu context, so ++ initialize it here. */ ++ __asm__ __volatile__("fninit"); ++ return 0; ++} ++ ++static inline void fp_linux_end(void) ++{ ++ kernel_fpu_end(); ++} ++ ++static inline int fp_detect(void) ++{ ++ int features = 0; ++ ++#ifndef cpu_has_xmm2 ++#ifdef cpu_has_sse2 ++#define cpu_has_xmm2 cpu_has_sse2 ++#else ++#define cpu_has_xmm2 0 ++#endif ++#endif ++ if (cpu_has_xmm2) ++ features |= __COBALT_HAVE_SSE2; ++ ++#ifndef cpu_has_avx ++#define cpu_has_avx 0 ++#endif ++ if (cpu_has_avx) ++ features |= __COBALT_HAVE_AVX; ++ ++ return features; ++} ++ ++#endif /* _COBALT_X86_ASM_FPTEST_H */ +--- linux/arch/x86/xenomai/include/asm/xenomai/features.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/arch/x86/xenomai/include/asm/xenomai/features.h 2021-04-07 16:01:25.641636432 +0800 +@@ -0,0 +1,27 @@ ++/* ++ * Copyright (C) 2005-2013 Philippe Gerum . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#ifndef _COBALT_X86_ASM_FEATURES_H ++#define _COBALT_X86_ASM_FEATURES_H ++ ++struct cobalt_featinfo; ++static inline void collect_arch_features(struct cobalt_featinfo *p) { } ++ ++#include ++ ++#endif /* !_COBALT_X86_ASM_FEATURES_H */ +--- linux/arch/x86/xenomai/include/asm/xenomai/thread.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/arch/x86/xenomai/include/asm/xenomai/thread.h 2021-04-07 16:01:25.637636438 +0800 +@@ -0,0 +1,95 @@ ++/* ++ * Copyright (C) 2001-2013 Philippe Gerum . ++ * Copyright (C) 2004-2006 Gilles Chanteperdrix . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++#ifndef _COBALT_X86_ASM_THREAD_H ++#define _COBALT_X86_ASM_THREAD_H ++ ++#include ++#include ++#include ++ ++#ifndef IPIPE_X86_FPU_EAGER ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4,4,0) ++typedef union thread_xstate x86_fpustate; ++#define x86_fpustate_ptr(t) ((t)->fpu.state) ++#else ++typedef union fpregs_state x86_fpustate; ++#define x86_fpustate_ptr(t) ((t)->fpu.active_state) ++#endif ++#endif ++ ++struct xnarchtcb { ++ struct xntcb core; ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4,8,0) ++ unsigned long sp; ++ unsigned long *spp; ++ unsigned long ip; ++ unsigned long *ipp; ++#endif ++#ifdef IPIPE_X86_FPU_EAGER ++ struct fpu *kfpu; ++#else ++ x86_fpustate *fpup; ++ unsigned int root_used_math: 1; ++ x86_fpustate *kfpu_state; ++#endif ++ unsigned int root_kfpu: 1; ++}; ++ ++#define xnarch_fpu_ptr(tcb) ((tcb)->fpup) ++ ++#define xnarch_fault_regs(d) ((d)->regs) ++#define xnarch_fault_trap(d) ((d)->exception) ++#define xnarch_fault_code(d) ((d)->regs->orig_ax) ++#define xnarch_fault_pc(d) ((d)->regs->ip) ++#define xnarch_fault_fpu_p(d) ((d)->exception == X86_TRAP_NM) ++#define xnarch_fault_pf_p(d) ((d)->exception == X86_TRAP_PF) ++#define xnarch_fault_bp_p(d) ((current->ptrace & PT_PTRACED) && \ ++ ((d)->exception == X86_TRAP_DB || (d)->exception == X86_TRAP_BP)) ++#define xnarch_fault_notify(d) (!xnarch_fault_bp_p(d)) ++ ++void xnarch_switch_fpu(struct xnthread *from, struct xnthread *to); ++ ++int xnarch_handle_fpu_fault(struct xnthread *from, ++ struct xnthread *to, struct ipipe_trap_data *d); ++ ++void xnarch_leave_root(struct xnthread *root); ++ ++void xnarch_init_root_tcb(struct xnthread *thread); ++ ++void xnarch_init_shadow_tcb(struct xnthread *thread); ++ ++void xnarch_switch_to(struct xnthread *out, struct xnthread *in); ++ ++static inline void xnarch_enter_root(struct xnthread *root) { } ++ ++static inline int xnarch_escalate(void) ++{ ++ if (ipipe_root_p) { ++ ipipe_raise_irq(cobalt_pipeline.escalate_virq); ++ return 1; ++ } ++ ++ return 0; ++} ++ ++int mach_x86_thread_init(void); ++void mach_x86_thread_cleanup(void); ++ ++#endif /* !_COBALT_X86_ASM_THREAD_H */ +--- linux/arch/x86/xenomai/thread.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/arch/x86/xenomai/thread.c 2021-04-07 16:01:25.632636445 +0800 +@@ -0,0 +1,569 @@ ++/* ++ * Copyright (C) 2001-2013 Philippe Gerum . ++ * Copyright (C) 2004-2006 Gilles Chanteperdrix . ++ * ++ * Xenomai is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * Xenomai is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Xenomai; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ++ * 02111-1307, USA. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++static struct kmem_cache *xstate_cache; ++ ++#ifdef IPIPE_X86_FPU_EAGER ++#define fpu_kernel_xstate_size sizeof(struct fpu) ++#else ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4,7,0) ++#define fpu_kernel_xstate_size xstate_size ++#endif ++#endif /* IPIPE_X86_FPU_EAGER */ ++ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,6,0) ++#define cpu_has_xmm boot_cpu_has(X86_FEATURE_XMM) ++#define cpu_has_fxsr boot_cpu_has(X86_FEATURE_FXSR) ++#define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE) ++#endif ++ ++#ifndef IPIPE_X86_FPU_EAGER ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4,2,0) ++#include ++#include ++#define x86_fpregs_active(t) __thread_has_fpu(t) ++#define x86_fpregs_deactivate(t) __thread_clear_has_fpu(t) ++#define x86_fpregs_activate(t) __thread_set_has_fpu(t) ++#define x86_xstate_alignment __alignof__(union thread_xstate) ++#else ++#include ++ ++static inline int x86_fpregs_active(struct task_struct *t) ++{ ++ return t->thread.fpu.fpregs_active; ++} ++ ++static inline void x86_fpregs_deactivate(struct task_struct *t) ++{ ++ if (x86_fpregs_active(t)) ++ __fpregs_deactivate(&t->thread.fpu); ++} ++ ++static inline void x86_fpregs_activate(struct task_struct *t) ++{ ++ if (!x86_fpregs_active(t)) ++ __fpregs_activate(&t->thread.fpu); ++} ++ ++#define x86_xstate_alignment __alignof__(union fpregs_state) ++ ++#endif ++#else /* IPIPE_X86_FPU_EAGER */ ++#define x86_xstate_alignment __alignof__(union fpregs_state) ++#endif /* ! IPIPE_X86_FPU_EAGER */ ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4,8,0) ++/* ++ * This is obsolete context switch code uselessly duplicating ++ * mainline's. ++ */ ++#ifdef CONFIG_X86_32 ++ ++#ifdef CONFIG_CC_STACKPROTECTOR ++ ++#define __CANARY_OUTPUT \ ++ , [stack_canary] "=m" (stack_canary.canary) ++ ++#define __CANARY_INPUT \ ++ , [task_canary] "i" (offsetof(struct task_struct, stack_canary)) ++ ++#define __CANARY_SWITCH \ ++ "movl %P[task_canary](%%edx), %%ebx\n\t" \ ++ "movl %%ebx, "__percpu_arg([stack_canary])"\n\t" ++ ++#else /* !CONFIG_CC_STACKPROTECTOR */ ++ ++#define __CANARY_OUTPUT ++#define __CANARY_INPUT ++#define __CANARY_SWITCH ++ ++#endif /* !CONFIG_CC_STACKPROTECTOR */ ++ ++static inline void do_switch_threads(struct xnarchtcb *out_tcb, ++ struct xnarchtcb *in_tcb, ++ struct task_struct *outproc, ++ struct task_struct *inproc) ++{ ++ long ebx_out, ecx_out, edi_out, esi_out; ++ ++ __asm__ __volatile__("pushfl\n\t" ++ "pushl %%ebp\n\t" ++ "movl %[spp_out_ptr],%%ecx\n\t" ++ "movl %%esp,(%%ecx)\n\t" ++ "movl %[ipp_out_ptr],%%ecx\n\t" ++ "movl $1f,(%%ecx)\n\t" ++ "movl %[spp_in_ptr],%%ecx\n\t" ++ "movl %[ipp_in_ptr],%%edi\n\t" ++ "movl (%%ecx),%%esp\n\t" ++ "pushl (%%edi)\n\t" ++ __CANARY_SWITCH ++ "jmp __switch_to\n\t" ++ "1: popl %%ebp\n\t" ++ "popfl\n\t" ++ : "=b"(ebx_out), ++ "=&c"(ecx_out), ++ "=S"(esi_out), ++ "=D"(edi_out), ++ "+a"(outproc), ++ "+d"(inproc) ++ __CANARY_OUTPUT ++ : [spp_out_ptr] "m"(out_tcb->spp), ++ [ipp_out_ptr] "m"(out_tcb->ipp), ++ [spp_in_ptr] "m"(in_tcb->spp), ++ [ipp_in_ptr] "m"(in_tcb->ipp) ++ __CANARY_INPUT ++ : "memory"); ++} ++ ++#else /* CONFIG_X86_64 */ ++ ++#define __SWITCH_CLOBBER_LIST , "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" ++ ++#ifdef CONFIG_CC_STACKPROTECTOR ++ ++#define __CANARY_OUTPUT \ ++ , [gs_canary] "=m" (irq_stack_union.stack_canary) ++ ++#define __CANARY_INPUT \ ++ , [task_canary] "i" (offsetof(struct task_struct, stack_canary)) \ ++ , [current_task] "m" (current_task) ++ ++#define __CANARY_SWITCH \ ++ "movq "__percpu_arg([current_task])",%%rsi\n\t" \ ++ "movq %P[task_canary](%%rsi),%%r8\n\t" \ ++ "movq %%r8,"__percpu_arg([gs_canary])"\n\t" ++ ++#else /* !CONFIG_CC_STACKPROTECTOR */ ++ ++#define __CANARY_OUTPUT ++#define __CANARY_INPUT ++#define __CANARY_SWITCH ++ ++#endif /* !CONFIG_CC_STACKPROTECTOR */ ++ ++#define do_switch_threads(prev, next, p_rsp, n_rsp, p_rip, n_rip) \ ++ ({ \ ++ long __rdi, __rsi, __rax, __rbx, __rcx, __rdx; \ ++ \ ++ __asm__ __volatile__("pushfq\n\t" \ ++ "pushq %%rbp\n\t" \ ++ "movq %%rsi, %%rbp\n\t" \ ++ "movq %%rsp, (%%rdx)\n\t" \ ++ "movq $1f, (%%rax)\n\t" \ ++ "movq (%%rcx), %%rsp\n\t" \ ++ "pushq (%%rbx)\n\t" \ ++ "jmp __switch_to\n\t" \ ++ "1:\n\t" \ ++ __CANARY_SWITCH \ ++ "movq %%rbp, %%rsi\n\t" \ ++ "popq %%rbp\n\t" \ ++ "popfq\n\t" \ ++ : "=S" (__rsi), "=D" (__rdi), "=a" (__rax), \ ++ "=b" (__rbx), "=c" (__rcx), "=d" (__rdx) \ ++ __CANARY_OUTPUT \ ++ : "0" (next), "1" (prev), "5" (p_rsp), "4" (n_rsp), \ ++ "2" (p_rip), "3" (n_rip) \ ++ __CANARY_INPUT \ ++ : "memory", "cc" __SWITCH_CLOBBER_LIST); \ ++ }) ++ ++#endif /* CONFIG_X86_64 */ ++ ++#else /* LINUX_VERSION_CODE >= 4.8 */ ++ ++#include ++ ++#endif /* LINUX_VERSION_CODE >= 4.8 */ ++ ++void xnarch_switch_to(struct xnthread *out, struct xnthread *in) ++{ ++ struct xnarchtcb *out_tcb = &out->tcb, *in_tcb = &in->tcb; ++ struct task_struct *prev, *next, *last; ++ struct mm_struct *prev_mm, *next_mm; ++ ++ prev = out_tcb->core.host_task; ++#ifndef IPIPE_X86_FPU_EAGER ++ if (x86_fpregs_active(prev)) ++ /* ++ * __switch_to will try and use __unlazy_fpu, so we ++ * need to clear the ts bit. ++ */ ++ clts(); ++#endif /* ! IPIPE_X86_FPU_EAGER */ ++ ++ next = in_tcb->core.host_task; ++#ifndef IPIPE_X86_FPU_EAGER ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,2,0) ++ next->thread.fpu.counter = 0; ++#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3,13,0) ++ next->thread.fpu_counter = 0; ++#else ++ next->fpu_counter = 0; ++#endif ++#endif /* ! IPIPE_X86_FPU_EAGER */ ++ prev_mm = out_tcb->core.active_mm; ++ next_mm = in_tcb->core.mm; ++ if (next_mm == NULL) { ++ in_tcb->core.active_mm = prev_mm; ++ enter_lazy_tlb(prev_mm, next); ++ } else { ++ ipipe_switch_mm_head(prev_mm, next_mm, next); ++ /* ++ * We might be switching back to the root thread, ++ * which we preempted earlier, shortly after "current" ++ * dropped its mm context in the do_exit() path ++ * (next->mm == NULL). In that particular case, the ++ * kernel expects a lazy TLB state for leaving the mm. ++ */ ++ if (next->mm == NULL) ++ enter_lazy_tlb(prev_mm, next); ++ } ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4,8,0) ++#ifdef CONFIG_X86_32 ++ do_switch_threads(out_tcb, in_tcb, prev, next); ++#else /* CONFIG_X86_64 */ ++ do_switch_threads(prev, next, ++ out_tcb->spp, in_tcb->spp, ++ out_tcb->ipp, in_tcb->ipp); ++#endif /* CONFIG_X86_64 */ ++ (void)last; ++#else /* LINUX_VERSION_CODE >= 4.8 */ ++ switch_to(prev, next, last); ++#endif /* LINUX_VERSION_CODE >= 4.8 */ ++ ++#ifndef IPIPE_X86_FPU_EAGER ++ stts(); ++#endif /* ! IPIPE_X86_FPU_EAGER */ ++} ++ ++#ifndef IPIPE_X86_FPU_EAGER ++ ++#ifdef CONFIG_X86_64 ++#define XSAVE_PREFIX "0x48," ++#define XSAVE_SUFFIX "q" ++#else ++#define XSAVE_PREFIX ++#define XSAVE_SUFFIX ++#endif ++ ++static inline void __do_save_fpu_state(x86_fpustate *fpup) ++{ ++#ifdef cpu_has_xsave ++ if (cpu_has_xsave) { ++#ifdef CONFIG_AS_AVX ++ __asm__ __volatile__("xsave" XSAVE_SUFFIX " %0" ++ : "=m" (fpup->xsave) : "a" (-1), "d" (-1) ++ : "memory"); ++#else /* !CONFIG_AS_AVX */ ++ __asm __volatile__(".byte " XSAVE_PREFIX "0x0f,0xae,0x27" ++ : : "D" (&fpup->xsave), "m" (fpup->xsave), ++ "a" (-1), "d" (-1) ++ : "memory"); ++#endif /* !CONFIG_AS_AVX */ ++ return; ++ } ++#endif /* cpu_has_xsave */ ++#ifdef CONFIG_X86_32 ++ if (cpu_has_fxsr) ++ __asm__ __volatile__("fxsave %0; fnclex":"=m"(*fpup)); ++ else ++ __asm__ __volatile__("fnsave %0; fwait":"=m"(*fpup)); ++#else /* CONFIG_X86_64 */ ++#ifdef CONFIG_AS_FXSAVEQ ++ __asm __volatile__("fxsaveq %0" : "=m" (fpup->fxsave)); ++#else /* !CONFIG_AS_FXSAVEQ */ ++ __asm__ __volatile__("rex64/fxsave (%[fx])" ++ : "=m" (fpup->fxsave) ++ : [fx] "R" (&fpup->fxsave)); ++#endif /* !CONFIG_AS_FXSAVEQ */ ++#endif /* CONFIG_X86_64 */ ++} ++ ++static inline void __do_restore_fpu_state(x86_fpustate *fpup) ++{ ++#ifdef cpu_has_xsave ++ if (cpu_has_xsave) { ++#ifdef CONFIG_AS_AVX ++ __asm__ __volatile__("xrstor" XSAVE_SUFFIX " %0" ++ : : "m" (fpup->xsave), "a" (-1), "d" (-1) ++ : "memory"); ++#else /* !CONFIG_AS_AVX */ ++ __asm__ __volatile__(".byte " XSAVE_PREFIX "0x0f,0xae,0x2f" ++ : : "D" (&fpup->xsave), "m" (fpup->xsave), ++ "a" (-1), "d" (-1) ++ : "memory"); ++#endif /* !CONFIG_AS_AVX */ ++ return; ++ } ++#endif /* cpu_has_xsave */ ++#ifdef CONFIG_X86_32 ++ if (cpu_has_fxsr) ++ __asm__ __volatile__("fxrstor %0": /* no output */ :"m"(*fpup)); ++ else ++ __asm__ __volatile__("frstor %0": /* no output */ :"m"(*fpup)); ++#else /* CONFIG_X86_64 */ ++#ifdef CONFIG_AS_FXSAVEQ ++ __asm__ __volatile__("fxrstorq %0" : : "m" (fpup->fxsave)); ++#else /* !CONFIG_AS_FXSAVEQ */ ++ __asm__ __volatile__("rex64/fxrstor (%0)" ++ : : "R" (&fpup->fxsave), "m" (fpup->fxsave)); ++#endif /* !CONFIG_AS_FXSAVEQ */ ++#endif /* CONFIG_X86_64 */ ++} ++ ++int xnarch_handle_fpu_fault(struct xnthread *from, ++ struct xnthread *to, struct ipipe_trap_data *d) ++{ ++ struct xnarchtcb *tcb = xnthread_archtcb(to); ++ struct task_struct *p = tcb->core.host_task; ++ ++ if (x86_fpregs_active(p)) ++ return 0; ++ ++ if (!(p->flags & PF_USED_MATH)) { ++ /* ++ * The faulting task is a shadow using the FPU for the first ++ * time, initialize the FPU context and tell linux about it. ++ */ ++ __asm__ __volatile__("clts; fninit"); ++ ++ if (cpu_has_xmm) { ++ unsigned long __mxcsr = 0x1f80UL & 0xffbfUL; ++ __asm__ __volatile__("ldmxcsr %0"::"m"(__mxcsr)); ++ } ++ p->flags |= PF_USED_MATH; ++ } else { ++ /* ++ * The faulting task already used FPU in secondary ++ * mode. ++ */ ++ clts(); ++ __do_restore_fpu_state(tcb->fpup); ++ } ++ ++ x86_fpregs_activate(p); ++ ++ xnlock_get(&nklock); ++ xnthread_set_state(to, XNFPU); ++ xnlock_put(&nklock); ++ ++ return 1; ++} ++#else /* IPIPE_X86_FPU_EAGER */ ++ ++int xnarch_handle_fpu_fault(struct xnthread *from, ++ struct xnthread *to, struct ipipe_trap_data *d) ++{ ++ /* in eager mode there are no such faults */ ++ BUG_ON(1); ++} ++#endif /* ! IPIPE_X86_FPU_EAGER */ ++ ++#define current_task_used_kfpu() kernel_fpu_disabled() ++ ++#define tcb_used_kfpu(t) ((t)->root_kfpu) ++ ++#ifndef IPIPE_X86_FPU_EAGER ++void xnarch_leave_root(struct xnthread *root) ++{ ++ struct xnarchtcb *const rootcb = xnthread_archtcb(root); ++ struct task_struct *const p = current; ++ x86_fpustate *const current_task_fpup = x86_fpustate_ptr(&p->thread); ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4,8,0) && defined(CONFIG_X86_64) ++ rootcb->spp = &p->thread.sp; ++ rootcb->ipp = &p->thread.rip; ++#endif ++ if (!current_task_used_kfpu()) { ++ rootcb->root_kfpu = 0; ++ rootcb->fpup = x86_fpregs_active(p) ? current_task_fpup : NULL; ++ return; ++ } ++ ++ /* ++ * We need to save the kernel FPU context before preempting, ++ * store it in our root control block. ++ */ ++ rootcb->root_kfpu = 1; ++ rootcb->fpup = current_task_fpup; ++ rootcb->root_used_math = !!(p->flags & PF_USED_MATH); ++ x86_fpustate_ptr(&p->thread) = rootcb->kfpu_state; ++ x86_fpregs_activate(p); ++ p->flags |= PF_USED_MATH; ++ kernel_fpu_enable(); ++} ++ ++void xnarch_switch_fpu(struct xnthread *from, struct xnthread *to) ++{ ++ x86_fpustate *const prev_fpup = from ? from->tcb.fpup : NULL; ++ struct xnarchtcb *const tcb = xnthread_archtcb(to); ++ struct task_struct *const p = tcb->core.host_task; ++ x86_fpustate *const next_task_fpup = x86_fpustate_ptr(&p->thread); ++ ++ /* Restore lazy mode only if root fpu owner is not current. */ ++ if (xnthread_test_state(to, XNROOT) && ++ prev_fpup != next_task_fpup && ++ !tcb_used_kfpu(tcb)) ++ return; ++ ++ clts(); ++ /* ++ * The only case where we can skip restoring the FPU is: ++ * - the fpu context of the next task is the current fpu ++ * context; ++ * - root thread has not used fpu in kernel-space; ++ * - cpu has fxsr (because if it does not, last context switch ++ * reinitialized fpu) ++ */ ++ if (prev_fpup != next_task_fpup || !cpu_has_fxsr) ++ __do_restore_fpu_state(next_task_fpup); ++ ++ if (!tcb_used_kfpu(tcb)) { ++ x86_fpregs_activate(p); ++ return; ++ } ++ kernel_fpu_disable(); ++ ++ x86_fpustate_ptr(&p->thread) = to->tcb.fpup; ++ if (!tcb->root_used_math) { ++ x86_fpregs_deactivate(p); ++ p->flags &= ~PF_USED_MATH; ++ } ++} ++#else /* IPIPE_X86_FPU_EAGER */ ++void xnarch_leave_root(struct xnthread *root) ++{ ++ struct xnarchtcb *const rootcb = xnthread_archtcb(root); ++ ++ rootcb->root_kfpu = current_task_used_kfpu(); ++ ++ if (!tcb_used_kfpu(rootcb)) ++ return; ++ ++ /* save fpregs from in-kernel use */ ++ copy_fpregs_to_fpstate(rootcb->kfpu); ++ kernel_fpu_enable(); ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,14,0) ++ /* restore current's fpregs */ ++ __cpu_invalidate_fpregs_state(); ++ switch_fpu_finish(¤t->thread.fpu, smp_processor_id()); ++#else ++ /* mark current thread as not owning the FPU anymore */ ++ if (fpregs_active()) ++ fpregs_deactivate(¤t->thread.fpu); ++#endif ++} ++ ++void xnarch_switch_fpu(struct xnthread *from, struct xnthread *to) ++{ ++ struct xnarchtcb *const to_tcb = xnthread_archtcb(to); ++ ++ if (!tcb_used_kfpu(to_tcb)) ++ return; ++ ++ copy_kernel_to_fpregs(&to_tcb->kfpu->state); ++ kernel_fpu_disable(); ++} ++#endif /* ! IPIPE_X86_FPU_EAGER */ ++ ++void xnarch_init_root_tcb(struct xnthread *thread) ++{ ++ struct xnarchtcb *tcb = xnthread_archtcb(thread); ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4,8,0) ++ tcb->sp = 0; ++ tcb->spp = &tcb->sp; ++ tcb->ipp = &tcb->ip; ++#endif ++#ifndef IPIPE_X86_FPU_EAGER ++ tcb->fpup = NULL; ++ tcb->kfpu_state = kmem_cache_zalloc(xstate_cache, GFP_KERNEL); ++#else /* IPIPE_X86_FPU_EAGER */ ++ tcb->kfpu = kmem_cache_zalloc(xstate_cache, GFP_KERNEL); ++#endif /* ! IPIPE_X86_FPU_EAGER */ ++ tcb->root_kfpu = 0; ++} ++ ++void xnarch_init_shadow_tcb(struct xnthread *thread) ++{ ++ struct xnarchtcb *tcb = xnthread_archtcb(thread); ++ struct task_struct *p = tcb->core.host_task; ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4,8,0) ++ tcb->sp = 0; ++ tcb->spp = &p->thread.sp; ++#ifdef CONFIG_X86_32 ++ tcb->ipp = &p->thread.ip; ++#else ++ tcb->ipp = &p->thread.rip; /* raw naming intended. */ ++#endif ++#endif ++#ifndef IPIPE_X86_FPU_EAGER ++ tcb->fpup = x86_fpustate_ptr(&p->thread); ++ tcb->kfpu_state = NULL; ++#else /* IPIPE_X86_FPU_EAGER */ ++ tcb->kfpu = NULL; ++#endif /* ! IPIPE_X86_FPU_EAGER */ ++ tcb->root_kfpu = 0; ++ ++#ifndef IPIPE_X86_FPU_EAGER ++ /* XNFPU is set upon first FPU fault */ ++ xnthread_clear_state(thread, XNFPU); ++#else /* IPIPE_X86_FPU_EAGER */ ++ /* XNFPU is always set */ ++ xnthread_set_state(thread, XNFPU); ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4,14,0) ++ fpu__activate_fpstate_read(&p->thread.fpu); ++#else ++ fpu__initialize(&p->thread.fpu); ++#endif ++#endif /* ! IPIPE_X86_FPU_EAGER */ ++} ++ ++int mach_x86_thread_init(void) ++{ ++ xstate_cache = kmem_cache_create("cobalt_x86_xstate", ++ fpu_kernel_xstate_size, ++ x86_xstate_alignment, ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4,14,0) ++ SLAB_NOTRACK, ++#else ++ 0, ++#endif ++ NULL); ++ if (xstate_cache == NULL) ++ return -ENOMEM; ++ ++ return 0; ++} ++ ++void mach_x86_thread_cleanup(void) ++{ ++ kmem_cache_destroy(xstate_cache); ++} +--- linux/arch/x86/xenomai/c1e.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-patched/arch/x86/xenomai/c1e.c 2021-04-07 16:01:25.627636452 +0800 +@@ -0,0 +1,72 @@ ++/* ++ * Disable Intel automatic promotion to C1E mode. ++ * Lifted from drivers/idle/intel_idle.c ++ * Copyright (c) 2013, Intel Corporation. ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++#include ++#include ++#include ++#include ++ ++#define ICPU(model) \ ++ { X86_VENDOR_INTEL, 6, model, X86_FEATURE_MWAIT, 1UL } ++ ++static const struct x86_cpu_id c1e_ids[] = { ++ ICPU(0x1a), ++ ICPU(0x1e), ++ ICPU(0x1f), ++ ICPU(0x25), ++ ICPU(0x2c), ++ ICPU(0x2e), ++ ICPU(0x2f), ++ ICPU(0x2a), ++ ICPU(0x2d), ++ ICPU(0x3a), ++ ICPU(0x3e), ++ ICPU(0x3c), ++ ICPU(0x3f), ++ ICPU(0x45), ++ ICPU(0x46), ++ ICPU(0x4D), ++ {} ++}; ++ ++#undef ICPU ++ ++static void c1e_promotion_disable(void *dummy) ++{ ++ unsigned long long msr_bits; ++ ++ rdmsrl(MSR_IA32_POWER_CTL, msr_bits); ++ msr_bits &= ~0x2; ++ wrmsrl(MSR_IA32_POWER_CTL, msr_bits); ++} ++ ++void mach_x86_c1e_disable(void) ++{ ++ const struct x86_cpu_id *id; ++ ++ id = x86_match_cpu(c1e_ids); ++ if (id) { ++ printk("[Xenomai] disabling automatic C1E state promotion on Intel processor\n"); ++ /* ++ * cpu uses C1E, disable this feature (copied from ++ * intel_idle driver) ++ */ ++ on_each_cpu(c1e_promotion_disable, NULL, 1); ++ } ++} +--- linux/arch/x86/Makefile 2020-12-21 21:59:17.000000000 +0800 ++++ linux-patched/arch/x86/Makefile 2021-04-07 16:01:25.584636514 +0800 +@@ -338,3 +338,6 @@ + echo ' FDARGS="..." arguments for the booted kernel' + echo ' FDINITRD=file initrd for the booted kernel' + endef ++ ++KBUILD_CFLAGS += -Iarch/$(SRCARCH)/xenomai/include -Iinclude/xenomai ++core-$(CONFIG_XENOMAI) += arch/x86/xenomai/ +--- linux/init/Kconfig 2021-04-07 16:00:26.626720756 +0800 ++++ linux-patched/init/Kconfig 2021-04-07 16:01:25.577636524 +0800 +@@ -2056,3 +2056,54 @@ + # . + config ARCH_HAS_SYSCALL_WRAPPER + def_bool n ++menuconfig XENOMAI ++ depends on X86_TSC || !X86 ++ bool "Xenomai/cobalt" ++ select IPIPE ++ select IPIPE_WANT_APIREV_2 ++ default y ++ help ++ Xenomai's Cobalt core is a real-time extension to the Linux ++ kernel, which exhibits very short interrupt and scheduling ++ latency, without affecting the regular kernel services. ++ ++ This option enables the set of extended kernel services ++ required to run the real-time applications in user-space, ++ over the Xenomai libraries. ++ ++ Please visit http://xenomai.org for more information. ++ ++if XENOMAI ++source "arch/x86/xenomai/Kconfig" ++endif ++ ++if MIGRATION ++comment "WARNING! Page migration (CONFIG_MIGRATION) may increase" ++comment "latency." ++endif ++ ++if APM || CPU_FREQ || ACPI_PROCESSOR || INTEL_IDLE ++comment "WARNING! At least one of APM, CPU frequency scaling, ACPI 'processor'" ++comment "or CPU idle features is enabled. Any of these options may" ++comment "cause troubles with Xenomai. You should disable them." ++endif ++ ++if !GENERIC_CLOCKEVENTS ++comment "NOTE: Xenomai 3.x requires CONFIG_GENERIC_CLOCKEVENTS" ++endif ++ ++config XENO_VERSION_MAJOR ++ int ++ default 3 ++ ++config XENO_VERSION_MINOR ++ int ++ default 1 ++ ++config XENO_REVISION_LEVEL ++ int ++ default 0 ++ ++config XENO_VERSION_STRING ++ string ++ default "3.1" diff --git a/enable_irq.patch b/enable_irq_arm64.patch similarity index 100% rename from enable_irq.patch rename to enable_irq_arm64.patch diff --git a/ipipe-core-4.19.55-oe1.patch b/ipipe-core-4.19.55-oe1_arm64.patch similarity index 100% rename from ipipe-core-4.19.55-oe1.patch rename to ipipe-core-4.19.55-oe1_arm64.patch diff --git a/ipipe-core-4.19.90-oe1_x86.patch b/ipipe-core-4.19.90-oe1_x86.patch new file mode 100755 index 0000000000000000000000000000000000000000..f9319362f92cf4d96fe4f175a9a82ac7dfb1d5ee --- /dev/null +++ b/ipipe-core-4.19.90-oe1_x86.patch @@ -0,0 +1,142264 @@ +diff -uprN kernel/arch/x86/entry/common.c kernel_new/arch/x86/entry/common.c +--- kernel/arch/x86/entry/common.c 2020-12-21 21:59:17.000000000 +0800 ++++ kernel_new/arch/x86/entry/common.c 2021-04-01 18:28:07.548863405 +0800 +@@ -17,6 +17,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -47,6 +48,22 @@ __visible inline void enter_from_user_mo + static inline void enter_from_user_mode(void) {} + #endif + ++#ifdef CONFIG_IPIPE ++#define disable_local_irqs() do { \ ++ hard_local_irq_disable(); \ ++ trace_hardirqs_off(); \ ++} while (0) ++#define enable_local_irqs() do { \ ++ trace_hardirqs_on(); \ ++ hard_local_irq_enable(); \ ++} while (0) ++#define check_irqs_disabled() hard_irqs_disabled() ++#else ++#define disable_local_irqs() local_irq_disable() ++#define enable_local_irqs() local_irq_enable() ++#define check_irqs_disabled() irqs_disabled() ++#endif ++ + static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch) + { + #ifdef CONFIG_X86_64 +@@ -147,7 +164,7 @@ static void exit_to_usermode_loop(struct + */ + while (true) { + /* We have work to do. */ +- local_irq_enable(); ++ enable_local_irqs(); + + if (cached_flags & _TIF_NEED_RESCHED) + schedule(); +@@ -172,7 +189,7 @@ static void exit_to_usermode_loop(struct + fire_user_return_notifiers(); + + /* Disable IRQs and retry */ +- local_irq_disable(); ++ disable_local_irqs(); + + cached_flags = READ_ONCE(current_thread_info()->flags); + +@@ -192,11 +209,23 @@ __visible inline void prepare_exit_to_us + lockdep_assert_irqs_disabled(); + lockdep_sys_exit(); + ++again: + cached_flags = READ_ONCE(ti->flags); + + if (unlikely(cached_flags & EXIT_TO_USERMODE_LOOP_FLAGS)) + exit_to_usermode_loop(regs, cached_flags); + ++ if (ipipe_user_intret_notifier_enabled(ti)) { ++ int ret; ++ ++ enable_local_irqs(); ++ ret = __ipipe_notify_user_intreturn(); ++ disable_local_irqs(); ++ ++ if (ret == 0) ++ goto again; ++ } ++ + #ifdef CONFIG_COMPAT + /* + * Compat syscalls set TS_COMPAT. Make sure we clear it before +@@ -255,8 +284,8 @@ __visible inline void syscall_return_slo + CT_WARN_ON(ct_state() != CONTEXT_KERNEL); + + if (IS_ENABLED(CONFIG_PROVE_LOCKING) && +- WARN(irqs_disabled(), "syscall %ld left IRQs disabled", regs->orig_ax)) +- local_irq_enable(); ++ WARN(check_irqs_disabled(), "syscall %ld left IRQs disabled", regs->orig_ax)) ++ enable_local_irqs(); + + rseq_syscall(regs); + +@@ -264,10 +293,13 @@ __visible inline void syscall_return_slo + * First do one-time work. If these work items are enabled, we + * want to run them exactly once per syscall exit with IRQs on. + */ +- if (unlikely(cached_flags & SYSCALL_EXIT_WORK_FLAGS)) ++ if (unlikely((!IS_ENABLED(CONFIG_IPIPE) || ++ syscall_get_nr(current, regs) < ++ ipipe_root_nr_syscalls(ti)) && ++ (cached_flags & SYSCALL_EXIT_WORK_FLAGS))) + syscall_slow_exit_work(regs, cached_flags); + +- local_irq_disable(); ++ disable_local_irqs(); + prepare_exit_to_usermode(regs); + } + +@@ -275,10 +307,20 @@ __visible inline void syscall_return_slo + __visible void do_syscall_64(unsigned long nr, struct pt_regs *regs) + { + struct thread_info *ti; ++ int ret; + + enter_from_user_mode(); +- local_irq_enable(); ++ enable_local_irqs(); + ti = current_thread_info(); ++ ++ ret = ipipe_handle_syscall(ti, nr & __SYSCALL_MASK, regs); ++ if (ret > 0) { ++ disable_local_irqs(); ++ return; ++ } ++ if (ret < 0) ++ goto done; ++ + if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY) + nr = syscall_trace_enter(regs); + +@@ -292,12 +334,45 @@ __visible void do_syscall_64(unsigned lo + nr = array_index_nospec(nr, NR_syscalls); + regs->ax = sys_call_table[nr](regs); + } +- ++done: + syscall_return_slowpath(regs); + } + #endif + + #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) ++ ++#ifdef CONFIG_IPIPE ++#ifdef CONFIG_X86_32 ++static inline int pipeline_syscall(struct thread_info *ti, ++ unsigned long nr, struct pt_regs *regs) ++{ ++ return ipipe_handle_syscall(ti, nr, regs); ++} ++#else ++static inline int pipeline_syscall(struct thread_info *ti, ++ unsigned long nr, struct pt_regs *regs) ++{ ++ struct pt_regs regs64 = *regs; ++ int ret; ++ ++ regs64.di = (unsigned int)regs->bx; ++ regs64.si = (unsigned int)regs->cx; ++ regs64.r10 = (unsigned int)regs->si; ++ regs64.r8 = (unsigned int)regs->di; ++ regs64.r9 = (unsigned int)regs->bp; ++ ret = ipipe_handle_syscall(ti, nr, ®s64); ++ regs->ax = (unsigned int)regs64.ax; ++ ++ return ret; ++} ++#endif /* CONFIG_X86_32 */ ++#else /* CONFIG_IPIPE */ ++static inline int pipeline_syscall(struct thread_info *ti, ++ unsigned long nr, struct pt_regs *regs) ++{ ++ return 0; ++} ++#endif /* CONFIG_IPIPE */ + /* + * Does a 32-bit syscall. Called with IRQs on in CONTEXT_KERNEL. Does + * all entry and exit work and returns with IRQs off. This function is +@@ -308,11 +383,20 @@ static __always_inline void do_syscall_3 + { + struct thread_info *ti = current_thread_info(); + unsigned int nr = (unsigned int)regs->orig_ax; ++ int ret; + + #ifdef CONFIG_IA32_EMULATION + ti->status |= TS_COMPAT; + #endif + ++ ret = pipeline_syscall(ti, nr, regs); ++ if (ret > 0) { ++ disable_local_irqs(); ++ return; ++ } ++ if (ret < 0) ++ goto done; ++ + if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY) { + /* + * Subtlety here: if ptrace pokes something larger than +@@ -340,7 +424,7 @@ static __always_inline void do_syscall_3 + (unsigned int)regs->di, (unsigned int)regs->bp); + #endif /* CONFIG_IA32_EMULATION */ + } +- ++done: + syscall_return_slowpath(regs); + } + +@@ -348,7 +432,7 @@ static __always_inline void do_syscall_3 + __visible void do_int80_syscall_32(struct pt_regs *regs) + { + enter_from_user_mode(); +- local_irq_enable(); ++ enable_local_irqs(); + do_syscall_32_irqs_on(regs); + } + +@@ -372,7 +456,7 @@ __visible long do_fast_syscall_32(struct + + enter_from_user_mode(); + +- local_irq_enable(); ++ enable_local_irqs(); + + /* Fetch EBP from where the vDSO stashed it. */ + if ( +@@ -390,7 +474,7 @@ __visible long do_fast_syscall_32(struct + ) { + + /* User code screwed up. */ +- local_irq_disable(); ++ disable_local_irqs(); + regs->ax = -EFAULT; + prepare_exit_to_usermode(regs); + return 0; /* Keep it simple: use IRET. */ +diff -uprN kernel/arch/x86/entry/entry_64.S kernel_new/arch/x86/entry/entry_64.S +--- kernel/arch/x86/entry/entry_64.S 2020-12-21 21:59:17.000000000 +0800 ++++ kernel_new/arch/x86/entry/entry_64.S 2021-04-01 18:28:07.548863405 +0800 +@@ -30,6 +30,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -63,7 +64,12 @@ END(native_usergs_sysret64) + .endm + + .macro TRACE_IRQS_IRETQ +- TRACE_IRQS_FLAGS EFLAGS(%rsp) ++#ifdef CONFIG_TRACE_IRQFLAGS ++ btl $9, EFLAGS(%rsp) /* interrupts off? */ ++ jnc 1f ++ TRACE_IRQS_ON_VIRT ++1: ++#endif + .endm + + /* +@@ -77,7 +83,8 @@ END(native_usergs_sysret64) + * make sure the stack pointer does not get reset back to the top + * of the debug stack, and instead just reuses the current stack. + */ +-#if defined(CONFIG_DYNAMIC_FTRACE) && defined(CONFIG_TRACE_IRQFLAGS) ++#if defined(CONFIG_DYNAMIC_FTRACE) && defined(CONFIG_TRACE_IRQFLAGS) \ ++ && !defined(CONFIG_IPIPE) + + .macro TRACE_IRQS_OFF_DEBUG + call debug_stack_set_zero +@@ -395,6 +402,7 @@ END(__switch_to_asm) + */ + ENTRY(ret_from_fork) + UNWIND_HINT_EMPTY ++ HARD_COND_ENABLE_INTERRUPTS + movq %rax, %rdi + call schedule_tail /* rdi: 'prev' task parameter */ + +@@ -638,8 +646,13 @@ ENTRY(interrupt_entry) + + 1: + ENTER_IRQ_STACK old_rsp=%rdi save_ret=1 +- /* We entered an interrupt context - irqs are off: */ ++#ifndef CONFIG_IPIPE ++ /* We entered an interrupt context - irqs are off unless ++ pipelining is enabled, in which case we defer tracing until ++ __ipipe_do_sync_stage() where the virtual IRQ state is ++ updated for the root stage. */ + TRACE_IRQS_OFF ++#endif + + ret + END(interrupt_entry) +@@ -667,7 +680,17 @@ common_interrupt: + addq $-0x80, (%rsp) /* Adjust vector to [-256, -1] range */ + call interrupt_entry + UNWIND_HINT_REGS indirect=1 ++#ifdef CONFIG_IPIPE ++ call __ipipe_handle_irq ++ testl %eax, %eax ++ jnz ret_from_intr ++ LEAVE_IRQ_STACK ++ testb $3, CS(%rsp) ++ jz retint_kernel_early ++ jmp retint_user_early ++#else + call do_IRQ /* rdi points to pt_regs */ ++#endif + /* 0(%rsp): old RSP */ + ret_from_intr: + DISABLE_INTERRUPTS(CLBR_ANY) +@@ -682,6 +705,7 @@ ret_from_intr: + GLOBAL(retint_user) + mov %rsp,%rdi + call prepare_exit_to_usermode ++retint_user_early: + TRACE_IRQS_IRETQ + + GLOBAL(swapgs_restore_regs_and_return_to_usermode) +@@ -733,13 +757,18 @@ retint_kernel: + jnc 1f + 0: cmpl $0, PER_CPU_VAR(__preempt_count) + jnz 1f ++#ifdef CONFIG_IPIPE ++ call __ipipe_preempt_schedule_irq ++#else + call preempt_schedule_irq ++#endif + jmp 0b + 1: + #endif + /* + * The iretq could re-enable interrupts: +- */ ++ */ ++retint_kernel_early: + TRACE_IRQS_IRETQ + + GLOBAL(restore_regs_and_return_to_kernel) +@@ -858,6 +887,28 @@ _ASM_NOKPROBE(common_interrupt) + /* + * APIC interrupts. + */ ++#ifdef CONFIG_IPIPE ++.macro apicinterrupt2 num sym ++ENTRY(\sym) ++ UNWIND_HINT_IRET_REGS ++ ASM_CLAC ++ pushq $~(\num) ++.Lcommon_\sym: ++ call interrupt_entry ++ UNWIND_HINT_REGS indirect=1 ++ call __ipipe_handle_irq ++ testl %eax, %eax ++ jnz ret_from_intr ++ LEAVE_IRQ_STACK ++ testb $3, CS(%rsp) ++ jz retint_kernel_early ++ jmp retint_user_early ++END(\sym) ++.endm ++.macro apicinterrupt3 num sym do_sym ++apicinterrupt2 \num \sym ++.endm ++#else /* !CONFIG_IPIPE */ + .macro apicinterrupt3 num sym do_sym + ENTRY(\sym) + UNWIND_HINT_IRET_REGS +@@ -870,6 +921,7 @@ ENTRY(\sym) + END(\sym) + _ASM_NOKPROBE(\sym) + .endm ++#endif /* !CONFIG_IPIPE */ + + /* Make sure APIC interrupt handlers end up in the irqentry section: */ + #define PUSH_SECTION_IRQENTRY .pushsection .irqentry.text, "ax" +@@ -915,6 +967,14 @@ apicinterrupt THERMAL_APIC_VECTOR therm + apicinterrupt CALL_FUNCTION_SINGLE_VECTOR call_function_single_interrupt smp_call_function_single_interrupt + apicinterrupt CALL_FUNCTION_VECTOR call_function_interrupt smp_call_function_interrupt + apicinterrupt RESCHEDULE_VECTOR reschedule_interrupt smp_reschedule_interrupt ++#ifdef CONFIG_IPIPE ++apicinterrupt2 IPIPE_RESCHEDULE_VECTOR ipipe_reschedule_interrupt ++apicinterrupt2 IPIPE_CRITICAL_VECTOR ipipe_critical_interrupt ++#endif ++#endif ++ ++#ifdef CONFIG_IPIPE ++apicinterrupt2 IPIPE_HRTIMER_VECTOR ipipe_hrtimer_interrupt + #endif + + apicinterrupt ERROR_APIC_VECTOR error_interrupt smp_error_interrupt +@@ -929,7 +989,47 @@ apicinterrupt IRQ_WORK_VECTOR irq_work + */ + #define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8) + +-.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 create_gap=0 ++.macro ipipe_idtentry_prologue paranoid=0 trapnr=-1 skip_label=-invalid- ++#ifdef CONFIG_IPIPE ++ movq EFLAGS(%rsp), %r14 /* regs->flags */ ++ movq %rsp, %rdi /* pt_regs pointer */ ++ movl $\trapnr, %esi /* trap number */ ++ subq $8, %rsp ++ movq %rsp, %rdx /* &flags */ ++ call __ipipe_trap_prologue ++ popq %r13 ++ mov %rax, %r12 /* save propagation status */ ++ .if \paranoid == 0 /* paranoid may not skip handler */ ++ testl %eax, %eax ++ jg \skip_label /* skip regular handler if > 0 */ ++ .endif ++#endif ++.endm ++ ++.macro ipipe_idtentry_epilogue paranoid=0 skip_label=-invalid- ++#ifdef CONFIG_IPIPE ++ testl %r12d, %r12d ++ jnz 1000f ++ movq %rsp, %rdi /* pt_regs pointer */ ++ movq %r13, %rsi /* &flags from prologue */ ++ movq %r14, %rdx /* original regs->flags before fixup */ ++ call __ipipe_trap_epilogue ++1000: ++ .if \paranoid == 0 /* paranoid implies normal epilogue */ ++ testl %r12d, %r12d ++ jz 1001f ++\skip_label: ++ UNWIND_HINT_REGS ++ DISABLE_INTERRUPTS(CLBR_ANY) ++ testb $3, CS(%rsp) ++ jz retint_kernel_early ++ jmp retint_user_early ++ .endif ++1001: ++#endif ++.endm ++ ++.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 create_gap=0 trapnr=-1 + ENTRY(\sym) + UNWIND_HINT_IRET_REGS offset=\has_error_code*8 + +@@ -979,6 +1079,8 @@ ENTRY(\sym) + .endif + .endif + ++ ipipe_idtentry_prologue paranoid=\paranoid trapnr=\trapnr skip_label=kernel_skip_\@ ++ + movq %rsp, %rdi /* pt_regs pointer */ + + .if \has_error_code +@@ -994,6 +1096,8 @@ ENTRY(\sym) + + call \do_sym + ++ ipipe_idtentry_epilogue paranoid=\paranoid skip_label=kernel_skip_\@ ++ + .if \shift_ist != -1 + addq $EXCEPTION_STKSZ, CPU_TSS_IST(\shift_ist) + .endif +@@ -1014,6 +1118,8 @@ ENTRY(\sym) + .Lfrom_usermode_switch_stack_\@: + call error_entry + ++ ipipe_idtentry_prologue paranoid=\paranoid trapnr=\trapnr skip_label=user_skip_\@ ++ + movq %rsp, %rdi /* pt_regs pointer */ + + .if \has_error_code +@@ -1025,25 +1131,27 @@ ENTRY(\sym) + + call \do_sym + ++ ipipe_idtentry_epilogue paranoid=\paranoid skip_label=user_skip_\@ ++ + jmp error_exit + .endif + _ASM_NOKPROBE(\sym) + END(\sym) + .endm + +-idtentry divide_error do_divide_error has_error_code=0 +-idtentry overflow do_overflow has_error_code=0 +-idtentry bounds do_bounds has_error_code=0 +-idtentry invalid_op do_invalid_op has_error_code=0 +-idtentry device_not_available do_device_not_available has_error_code=0 +-idtentry double_fault do_double_fault has_error_code=1 paranoid=2 +-idtentry coprocessor_segment_overrun do_coprocessor_segment_overrun has_error_code=0 +-idtentry invalid_TSS do_invalid_TSS has_error_code=1 +-idtentry segment_not_present do_segment_not_present has_error_code=1 +-idtentry spurious_interrupt_bug do_spurious_interrupt_bug has_error_code=0 +-idtentry coprocessor_error do_coprocessor_error has_error_code=0 +-idtentry alignment_check do_alignment_check has_error_code=1 +-idtentry simd_coprocessor_error do_simd_coprocessor_error has_error_code=0 ++idtentry divide_error do_divide_error has_error_code=0 trapnr=0 ++idtentry overflow do_overflow has_error_code=0 trapnr=4 ++idtentry bounds do_bounds has_error_code=0 trapnr=5 ++idtentry invalid_op do_invalid_op has_error_code=0 trapnr=6 ++idtentry device_not_available do_device_not_available has_error_code=0 trapnr=7 ++idtentry double_fault do_double_fault has_error_code=1 paranoid=2 trapnr=8 ++idtentry coprocessor_segment_overrun do_coprocessor_segment_overrun has_error_code=0 trapnr=9 ++idtentry invalid_TSS do_invalid_TSS has_error_code=1 trapnr=10 ++idtentry segment_not_present do_segment_not_present has_error_code=1 trapnr=11 ++idtentry spurious_interrupt_bug do_spurious_interrupt_bug has_error_code=0 trapnr=15 ++idtentry coprocessor_error do_coprocessor_error has_error_code=0 trapnr=16 ++idtentry alignment_check do_alignment_check has_error_code=1 trapnr=17 ++idtentry simd_coprocessor_error do_simd_coprocessor_error has_error_code=0 trapnr=19 + + + /* +@@ -1087,10 +1195,14 @@ bad_gs: + ENTRY(do_softirq_own_stack) + pushq %rbp + mov %rsp, %rbp ++ HARD_COND_DISABLE_INTERRUPTS + ENTER_IRQ_STACK regs=0 old_rsp=%r11 ++ HARD_COND_ENABLE_INTERRUPTS + call __do_softirq ++ HARD_COND_DISABLE_INTERRUPTS + LEAVE_IRQ_STACK regs=0 + leaveq ++ HARD_COND_ENABLE_INTERRUPTS + ret + ENDPROC(do_softirq_own_stack) + +@@ -1191,24 +1303,28 @@ apicinterrupt3 HYPERV_STIMER0_VECTOR \ + hv_stimer0_callback_vector hv_stimer0_vector_handler + #endif /* CONFIG_HYPERV */ + ++#ifdef CONFIG_IPIPE ++idtentry debug do_debug has_error_code=0 paranoid=1 trapnr=1 ++#else + idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK +-idtentry int3 do_int3 has_error_code=0 create_gap=1 +-idtentry stack_segment do_stack_segment has_error_code=1 ++#endif ++idtentry int3 do_int3 has_error_code=0 create_gap=1 trapnr=3 ++idtentry stack_segment do_stack_segment has_error_code=1 trapnr=12 + + #ifdef CONFIG_XEN + idtentry xennmi do_nmi has_error_code=0 + idtentry xendebug do_debug has_error_code=0 + #endif + +-idtentry general_protection do_general_protection has_error_code=1 +-idtentry page_fault do_page_fault has_error_code=1 ++idtentry general_protection do_general_protection has_error_code=1 trapnr=13 ++idtentry page_fault do_page_fault has_error_code=1 trapnr=14 + + #ifdef CONFIG_KVM_GUEST +-idtentry async_page_fault do_async_page_fault has_error_code=1 ++idtentry async_page_fault do_async_page_fault has_error_code=1 trapnr=14 + #endif + + #ifdef CONFIG_X86_MCE +-idtentry machine_check do_mce has_error_code=0 paranoid=1 ++idtentry machine_check do_mce has_error_code=0 paranoid=1 trapnr=18 + #endif + + /* +diff -uprN kernel/arch/x86/entry/thunk_64.S kernel_new/arch/x86/entry/thunk_64.S +--- kernel/arch/x86/entry/thunk_64.S 2020-12-21 21:59:17.000000000 +0800 ++++ kernel_new/arch/x86/entry/thunk_64.S 2021-04-01 18:28:07.651863292 +0800 +@@ -40,6 +40,7 @@ + + #ifdef CONFIG_TRACE_IRQFLAGS + THUNK trace_hardirqs_on_thunk,trace_hardirqs_on_caller,1 ++ THUNK trace_hardirqs_on_virt_thunk,trace_hardirqs_on_virt_caller,1 + THUNK trace_hardirqs_off_thunk,trace_hardirqs_off_caller,1 + #endif + +diff -uprN kernel/arch/x86/entry/vsyscall/vsyscall_gtod.c kernel_new/arch/x86/entry/vsyscall/vsyscall_gtod.c +--- kernel/arch/x86/entry/vsyscall/vsyscall_gtod.c 2020-12-21 21:59:17.000000000 +0800 ++++ kernel_new/arch/x86/entry/vsyscall/vsyscall_gtod.c 2021-04-01 18:28:07.651863292 +0800 +@@ -14,6 +14,7 @@ + */ + + #include ++#include + #include + #include + +@@ -75,4 +76,7 @@ void update_vsyscall(struct timekeeper * + } + + gtod_write_end(vdata); ++ ++ if (tk->tkr_mono.clock == &clocksource_tsc) ++ ipipe_update_hostrt(tk); + } +diff -uprN kernel/arch/x86/events/core.c kernel_new/arch/x86/events/core.c +--- kernel/arch/x86/events/core.c 2020-12-21 21:59:17.000000000 +0800 ++++ kernel_new/arch/x86/events/core.c 2021-04-01 18:28:07.651863292 +0800 +@@ -2111,7 +2111,7 @@ static int x86_pmu_event_init(struct per + + static void refresh_pce(void *ignored) + { +- load_mm_cr4(this_cpu_read(cpu_tlbstate.loaded_mm)); ++ load_mm_cr4_irqsoff(this_cpu_read(cpu_tlbstate.loaded_mm)); + } + + static void x86_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm) +diff -uprN kernel/arch/x86/events/core.c.orig kernel_new/arch/x86/events/core.c.orig +--- kernel/arch/x86/events/core.c.orig 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/arch/x86/events/core.c.orig 2020-12-21 21:59:17.000000000 +0800 +@@ -0,0 +1,2568 @@ ++/* ++ * Performance events x86 architecture code ++ * ++ * Copyright (C) 2008 Thomas Gleixner ++ * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar ++ * Copyright (C) 2009 Jaswinder Singh Rajput ++ * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter ++ * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra ++ * Copyright (C) 2009 Intel Corporation, ++ * Copyright (C) 2009 Google, Inc., Stephane Eranian ++ * ++ * For licencing details see kernel-base/COPYING ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "perf_event.h" ++ ++struct x86_pmu x86_pmu __read_mostly; ++ ++DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { ++ .enabled = 1, ++}; ++ ++DEFINE_STATIC_KEY_FALSE(rdpmc_always_available_key); ++ ++u64 __read_mostly hw_cache_event_ids ++ [PERF_COUNT_HW_CACHE_MAX] ++ [PERF_COUNT_HW_CACHE_OP_MAX] ++ [PERF_COUNT_HW_CACHE_RESULT_MAX]; ++u64 __read_mostly hw_cache_extra_regs ++ [PERF_COUNT_HW_CACHE_MAX] ++ [PERF_COUNT_HW_CACHE_OP_MAX] ++ [PERF_COUNT_HW_CACHE_RESULT_MAX]; ++ ++/* ++ * Propagate event elapsed time into the generic event. ++ * Can only be executed on the CPU where the event is active. ++ * Returns the delta events processed. ++ */ ++u64 x86_perf_event_update(struct perf_event *event) ++{ ++ struct hw_perf_event *hwc = &event->hw; ++ int shift = 64 - x86_pmu.cntval_bits; ++ u64 prev_raw_count, new_raw_count; ++ int idx = hwc->idx; ++ u64 delta; ++ ++ if (idx == INTEL_PMC_IDX_FIXED_BTS) ++ return 0; ++ ++ /* ++ * Careful: an NMI might modify the previous event value. ++ * ++ * Our tactic to handle this is to first atomically read and ++ * exchange a new raw count - then add that new-prev delta ++ * count to the generic event atomically: ++ */ ++again: ++ prev_raw_count = local64_read(&hwc->prev_count); ++ rdpmcl(hwc->event_base_rdpmc, new_raw_count); ++ ++ if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, ++ new_raw_count) != prev_raw_count) ++ goto again; ++ ++ /* ++ * Now we have the new raw value and have updated the prev ++ * timestamp already. We can now calculate the elapsed delta ++ * (event-)time and add that to the generic event. ++ * ++ * Careful, not all hw sign-extends above the physical width ++ * of the count. ++ */ ++ delta = (new_raw_count << shift) - (prev_raw_count << shift); ++ delta >>= shift; ++ ++ local64_add(delta, &event->count); ++ local64_sub(delta, &hwc->period_left); ++ ++ return new_raw_count; ++} ++ ++/* ++ * Find and validate any extra registers to set up. ++ */ ++static int x86_pmu_extra_regs(u64 config, struct perf_event *event) ++{ ++ struct hw_perf_event_extra *reg; ++ struct extra_reg *er; ++ ++ reg = &event->hw.extra_reg; ++ ++ if (!x86_pmu.extra_regs) ++ return 0; ++ ++ for (er = x86_pmu.extra_regs; er->msr; er++) { ++ if (er->event != (config & er->config_mask)) ++ continue; ++ if (event->attr.config1 & ~er->valid_mask) ++ return -EINVAL; ++ /* Check if the extra msrs can be safely accessed*/ ++ if (!er->extra_msr_access) ++ return -ENXIO; ++ ++ reg->idx = er->idx; ++ reg->config = event->attr.config1; ++ reg->reg = er->msr; ++ break; ++ } ++ return 0; ++} ++ ++static atomic_t active_events; ++static atomic_t pmc_refcount; ++static DEFINE_MUTEX(pmc_reserve_mutex); ++ ++#ifdef CONFIG_X86_LOCAL_APIC ++ ++static bool reserve_pmc_hardware(void) ++{ ++ int i; ++ ++ for (i = 0; i < x86_pmu.num_counters; i++) { ++ if (!reserve_perfctr_nmi(x86_pmu_event_addr(i))) ++ goto perfctr_fail; ++ } ++ ++ for (i = 0; i < x86_pmu.num_counters; i++) { ++ if (!reserve_evntsel_nmi(x86_pmu_config_addr(i))) ++ goto eventsel_fail; ++ } ++ ++ return true; ++ ++eventsel_fail: ++ for (i--; i >= 0; i--) ++ release_evntsel_nmi(x86_pmu_config_addr(i)); ++ ++ i = x86_pmu.num_counters; ++ ++perfctr_fail: ++ for (i--; i >= 0; i--) ++ release_perfctr_nmi(x86_pmu_event_addr(i)); ++ ++ return false; ++} ++ ++static void release_pmc_hardware(void) ++{ ++ int i; ++ ++ for (i = 0; i < x86_pmu.num_counters; i++) { ++ release_perfctr_nmi(x86_pmu_event_addr(i)); ++ release_evntsel_nmi(x86_pmu_config_addr(i)); ++ } ++} ++ ++#else ++ ++static bool reserve_pmc_hardware(void) { return true; } ++static void release_pmc_hardware(void) {} ++ ++#endif ++ ++static bool check_hw_exists(void) ++{ ++ u64 val, val_fail = -1, val_new= ~0; ++ int i, reg, reg_fail = -1, ret = 0; ++ int bios_fail = 0; ++ int reg_safe = -1; ++ ++ /* ++ * Check to see if the BIOS enabled any of the counters, if so ++ * complain and bail. ++ */ ++ for (i = 0; i < x86_pmu.num_counters; i++) { ++ reg = x86_pmu_config_addr(i); ++ ret = rdmsrl_safe(reg, &val); ++ if (ret) ++ goto msr_fail; ++ if (val & ARCH_PERFMON_EVENTSEL_ENABLE) { ++ bios_fail = 1; ++ val_fail = val; ++ reg_fail = reg; ++ } else { ++ reg_safe = i; ++ } ++ } ++ ++ if (x86_pmu.num_counters_fixed) { ++ reg = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; ++ ret = rdmsrl_safe(reg, &val); ++ if (ret) ++ goto msr_fail; ++ for (i = 0; i < x86_pmu.num_counters_fixed; i++) { ++ if (val & (0x03 << i*4)) { ++ bios_fail = 1; ++ val_fail = val; ++ reg_fail = reg; ++ } ++ } ++ } ++ ++ /* ++ * If all the counters are enabled, the below test will always ++ * fail. The tools will also become useless in this scenario. ++ * Just fail and disable the hardware counters. ++ */ ++ ++ if (reg_safe == -1) { ++ reg = reg_safe; ++ goto msr_fail; ++ } ++ ++ /* ++ * Read the current value, change it and read it back to see if it ++ * matches, this is needed to detect certain hardware emulators ++ * (qemu/kvm) that don't trap on the MSR access and always return 0s. ++ */ ++ reg = x86_pmu_event_addr(reg_safe); ++ if (rdmsrl_safe(reg, &val)) ++ goto msr_fail; ++ val ^= 0xffffUL; ++ ret = wrmsrl_safe(reg, val); ++ ret |= rdmsrl_safe(reg, &val_new); ++ if (ret || val != val_new) ++ goto msr_fail; ++ ++ /* ++ * We still allow the PMU driver to operate: ++ */ ++ if (bios_fail) { ++ pr_cont("Broken BIOS detected, complain to your hardware vendor.\n"); ++ pr_err(FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n", ++ reg_fail, val_fail); ++ } ++ ++ return true; ++ ++msr_fail: ++ if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) { ++ pr_cont("PMU not available due to virtualization, using software events only.\n"); ++ } else { ++ pr_cont("Broken PMU hardware detected, using software events only.\n"); ++ pr_err("Failed to access perfctr msr (MSR %x is %Lx)\n", ++ reg, val_new); ++ } ++ ++ return false; ++} ++ ++static void hw_perf_event_destroy(struct perf_event *event) ++{ ++ x86_release_hardware(); ++ atomic_dec(&active_events); ++} ++ ++void hw_perf_lbr_event_destroy(struct perf_event *event) ++{ ++ hw_perf_event_destroy(event); ++ ++ /* undo the lbr/bts event accounting */ ++ x86_del_exclusive(x86_lbr_exclusive_lbr); ++} ++ ++static inline int x86_pmu_initialized(void) ++{ ++ return x86_pmu.handle_irq != NULL; ++} ++ ++static inline int ++set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event) ++{ ++ struct perf_event_attr *attr = &event->attr; ++ unsigned int cache_type, cache_op, cache_result; ++ u64 config, val; ++ ++ config = attr->config; ++ ++ cache_type = (config >> 0) & 0xff; ++ if (cache_type >= PERF_COUNT_HW_CACHE_MAX) ++ return -EINVAL; ++ cache_type = array_index_nospec(cache_type, PERF_COUNT_HW_CACHE_MAX); ++ ++ cache_op = (config >> 8) & 0xff; ++ if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) ++ return -EINVAL; ++ cache_op = array_index_nospec(cache_op, PERF_COUNT_HW_CACHE_OP_MAX); ++ ++ cache_result = (config >> 16) & 0xff; ++ if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) ++ return -EINVAL; ++ cache_result = array_index_nospec(cache_result, PERF_COUNT_HW_CACHE_RESULT_MAX); ++ ++ val = hw_cache_event_ids[cache_type][cache_op][cache_result]; ++ ++ if (val == 0) ++ return -ENOENT; ++ ++ if (val == -1) ++ return -EINVAL; ++ ++ hwc->config |= val; ++ attr->config1 = hw_cache_extra_regs[cache_type][cache_op][cache_result]; ++ return x86_pmu_extra_regs(val, event); ++} ++ ++int x86_reserve_hardware(void) ++{ ++ int err = 0; ++ ++ if (!atomic_inc_not_zero(&pmc_refcount)) { ++ mutex_lock(&pmc_reserve_mutex); ++ if (atomic_read(&pmc_refcount) == 0) { ++ if (!reserve_pmc_hardware()) ++ err = -EBUSY; ++ else ++ reserve_ds_buffers(); ++ } ++ if (!err) ++ atomic_inc(&pmc_refcount); ++ mutex_unlock(&pmc_reserve_mutex); ++ } ++ ++ return err; ++} ++ ++void x86_release_hardware(void) ++{ ++ if (atomic_dec_and_mutex_lock(&pmc_refcount, &pmc_reserve_mutex)) { ++ release_pmc_hardware(); ++ release_ds_buffers(); ++ mutex_unlock(&pmc_reserve_mutex); ++ } ++} ++ ++/* ++ * Check if we can create event of a certain type (that no conflicting events ++ * are present). ++ */ ++int x86_add_exclusive(unsigned int what) ++{ ++ int i; ++ ++ /* ++ * When lbr_pt_coexist we allow PT to coexist with either LBR or BTS. ++ * LBR and BTS are still mutually exclusive. ++ */ ++ if (x86_pmu.lbr_pt_coexist && what == x86_lbr_exclusive_pt) ++ goto out; ++ ++ if (!atomic_inc_not_zero(&x86_pmu.lbr_exclusive[what])) { ++ mutex_lock(&pmc_reserve_mutex); ++ for (i = 0; i < ARRAY_SIZE(x86_pmu.lbr_exclusive); i++) { ++ if (i != what && atomic_read(&x86_pmu.lbr_exclusive[i])) ++ goto fail_unlock; ++ } ++ atomic_inc(&x86_pmu.lbr_exclusive[what]); ++ mutex_unlock(&pmc_reserve_mutex); ++ } ++ ++out: ++ atomic_inc(&active_events); ++ return 0; ++ ++fail_unlock: ++ mutex_unlock(&pmc_reserve_mutex); ++ return -EBUSY; ++} ++ ++void x86_del_exclusive(unsigned int what) ++{ ++ atomic_dec(&active_events); ++ ++ /* ++ * See the comment in x86_add_exclusive(). ++ */ ++ if (x86_pmu.lbr_pt_coexist && what == x86_lbr_exclusive_pt) ++ return; ++ ++ atomic_dec(&x86_pmu.lbr_exclusive[what]); ++} ++ ++int x86_setup_perfctr(struct perf_event *event) ++{ ++ struct perf_event_attr *attr = &event->attr; ++ struct hw_perf_event *hwc = &event->hw; ++ u64 config; ++ ++ if (!is_sampling_event(event)) { ++ hwc->sample_period = x86_pmu.max_period; ++ hwc->last_period = hwc->sample_period; ++ local64_set(&hwc->period_left, hwc->sample_period); ++ } ++ ++ if (attr->type == PERF_TYPE_RAW) ++ return x86_pmu_extra_regs(event->attr.config, event); ++ ++ if (attr->type == PERF_TYPE_HW_CACHE) ++ return set_ext_hw_attr(hwc, event); ++ ++ if (attr->config >= x86_pmu.max_events) ++ return -EINVAL; ++ ++ attr->config = array_index_nospec((unsigned long)attr->config, x86_pmu.max_events); ++ ++ /* ++ * The generic map: ++ */ ++ config = x86_pmu.event_map(attr->config); ++ ++ if (config == 0) ++ return -ENOENT; ++ ++ if (config == -1LL) ++ return -EINVAL; ++ ++ hwc->config |= config; ++ ++ return 0; ++} ++ ++/* ++ * check that branch_sample_type is compatible with ++ * settings needed for precise_ip > 1 which implies ++ * using the LBR to capture ALL taken branches at the ++ * priv levels of the measurement ++ */ ++static inline int precise_br_compat(struct perf_event *event) ++{ ++ u64 m = event->attr.branch_sample_type; ++ u64 b = 0; ++ ++ /* must capture all branches */ ++ if (!(m & PERF_SAMPLE_BRANCH_ANY)) ++ return 0; ++ ++ m &= PERF_SAMPLE_BRANCH_KERNEL | PERF_SAMPLE_BRANCH_USER; ++ ++ if (!event->attr.exclude_user) ++ b |= PERF_SAMPLE_BRANCH_USER; ++ ++ if (!event->attr.exclude_kernel) ++ b |= PERF_SAMPLE_BRANCH_KERNEL; ++ ++ /* ++ * ignore PERF_SAMPLE_BRANCH_HV, not supported on x86 ++ */ ++ ++ return m == b; ++} ++ ++int x86_pmu_max_precise(void) ++{ ++ int precise = 0; ++ ++ /* Support for constant skid */ ++ if (x86_pmu.pebs_active && !x86_pmu.pebs_broken) { ++ precise++; ++ ++ /* Support for IP fixup */ ++ if (x86_pmu.lbr_nr || x86_pmu.intel_cap.pebs_format >= 2) ++ precise++; ++ ++ if (x86_pmu.pebs_prec_dist) ++ precise++; ++ } ++ return precise; ++} ++ ++int x86_pmu_hw_config(struct perf_event *event) ++{ ++ if (event->attr.precise_ip) { ++ int precise = x86_pmu_max_precise(); ++ ++ if (event->attr.precise_ip > precise) ++ return -EOPNOTSUPP; ++ ++ /* There's no sense in having PEBS for non sampling events: */ ++ if (!is_sampling_event(event)) ++ return -EINVAL; ++ } ++ /* ++ * check that PEBS LBR correction does not conflict with ++ * whatever the user is asking with attr->branch_sample_type ++ */ ++ if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format < 2) { ++ u64 *br_type = &event->attr.branch_sample_type; ++ ++ if (has_branch_stack(event)) { ++ if (!precise_br_compat(event)) ++ return -EOPNOTSUPP; ++ ++ /* branch_sample_type is compatible */ ++ ++ } else { ++ /* ++ * user did not specify branch_sample_type ++ * ++ * For PEBS fixups, we capture all ++ * the branches at the priv level of the ++ * event. ++ */ ++ *br_type = PERF_SAMPLE_BRANCH_ANY; ++ ++ if (!event->attr.exclude_user) ++ *br_type |= PERF_SAMPLE_BRANCH_USER; ++ ++ if (!event->attr.exclude_kernel) ++ *br_type |= PERF_SAMPLE_BRANCH_KERNEL; ++ } ++ } ++ ++ if (event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK) ++ event->attach_state |= PERF_ATTACH_TASK_DATA; ++ ++ /* ++ * Generate PMC IRQs: ++ * (keep 'enabled' bit clear for now) ++ */ ++ event->hw.config = ARCH_PERFMON_EVENTSEL_INT; ++ ++ /* ++ * Count user and OS events unless requested not to ++ */ ++ if (!event->attr.exclude_user) ++ event->hw.config |= ARCH_PERFMON_EVENTSEL_USR; ++ if (!event->attr.exclude_kernel) ++ event->hw.config |= ARCH_PERFMON_EVENTSEL_OS; ++ ++ if (event->attr.type == PERF_TYPE_RAW) ++ event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK; ++ ++ if (event->attr.sample_period && x86_pmu.limit_period) { ++ if (x86_pmu.limit_period(event, event->attr.sample_period) > ++ event->attr.sample_period) ++ return -EINVAL; ++ } ++ ++ return x86_setup_perfctr(event); ++} ++ ++/* ++ * Setup the hardware configuration for a given attr_type ++ */ ++static int __x86_pmu_event_init(struct perf_event *event) ++{ ++ int err; ++ ++ if (!x86_pmu_initialized()) ++ return -ENODEV; ++ ++ err = x86_reserve_hardware(); ++ if (err) ++ return err; ++ ++ atomic_inc(&active_events); ++ event->destroy = hw_perf_event_destroy; ++ ++ event->hw.idx = -1; ++ event->hw.last_cpu = -1; ++ event->hw.last_tag = ~0ULL; ++ ++ /* mark unused */ ++ event->hw.extra_reg.idx = EXTRA_REG_NONE; ++ event->hw.branch_reg.idx = EXTRA_REG_NONE; ++ ++ return x86_pmu.hw_config(event); ++} ++ ++void x86_pmu_disable_all(void) ++{ ++ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); ++ int idx; ++ ++ for (idx = 0; idx < x86_pmu.num_counters; idx++) { ++ u64 val; ++ ++ if (!test_bit(idx, cpuc->active_mask)) ++ continue; ++ rdmsrl(x86_pmu_config_addr(idx), val); ++ if (!(val & ARCH_PERFMON_EVENTSEL_ENABLE)) ++ continue; ++ val &= ~ARCH_PERFMON_EVENTSEL_ENABLE; ++ wrmsrl(x86_pmu_config_addr(idx), val); ++ } ++} ++ ++/* ++ * There may be PMI landing after enabled=0. The PMI hitting could be before or ++ * after disable_all. ++ * ++ * If PMI hits before disable_all, the PMU will be disabled in the NMI handler. ++ * It will not be re-enabled in the NMI handler again, because enabled=0. After ++ * handling the NMI, disable_all will be called, which will not change the ++ * state either. If PMI hits after disable_all, the PMU is already disabled ++ * before entering NMI handler. The NMI handler will not change the state ++ * either. ++ * ++ * So either situation is harmless. ++ */ ++static void x86_pmu_disable(struct pmu *pmu) ++{ ++ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); ++ ++ if (!x86_pmu_initialized()) ++ return; ++ ++ if (!cpuc->enabled) ++ return; ++ ++ cpuc->n_added = 0; ++ cpuc->enabled = 0; ++ barrier(); ++ ++ x86_pmu.disable_all(); ++} ++ ++void x86_pmu_enable_all(int added) ++{ ++ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); ++ int idx; ++ ++ for (idx = 0; idx < x86_pmu.num_counters; idx++) { ++ struct hw_perf_event *hwc = &cpuc->events[idx]->hw; ++ ++ if (!test_bit(idx, cpuc->active_mask)) ++ continue; ++ ++ __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE); ++ } ++} ++ ++static struct pmu pmu; ++ ++static inline int is_x86_event(struct perf_event *event) ++{ ++ return event->pmu == &pmu; ++} ++ ++/* ++ * Event scheduler state: ++ * ++ * Assign events iterating over all events and counters, beginning ++ * with events with least weights first. Keep the current iterator ++ * state in struct sched_state. ++ */ ++struct sched_state { ++ int weight; ++ int event; /* event index */ ++ int counter; /* counter index */ ++ int unassigned; /* number of events to be assigned left */ ++ int nr_gp; /* number of GP counters used */ ++ unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; ++}; ++ ++/* Total max is X86_PMC_IDX_MAX, but we are O(n!) limited */ ++#define SCHED_STATES_MAX 2 ++ ++struct perf_sched { ++ int max_weight; ++ int max_events; ++ int max_gp; ++ int saved_states; ++ struct event_constraint **constraints; ++ struct sched_state state; ++ struct sched_state saved[SCHED_STATES_MAX]; ++}; ++ ++/* ++ * Initialize interator that runs through all events and counters. ++ */ ++static void perf_sched_init(struct perf_sched *sched, struct event_constraint **constraints, ++ int num, int wmin, int wmax, int gpmax) ++{ ++ int idx; ++ ++ memset(sched, 0, sizeof(*sched)); ++ sched->max_events = num; ++ sched->max_weight = wmax; ++ sched->max_gp = gpmax; ++ sched->constraints = constraints; ++ ++ for (idx = 0; idx < num; idx++) { ++ if (constraints[idx]->weight == wmin) ++ break; ++ } ++ ++ sched->state.event = idx; /* start with min weight */ ++ sched->state.weight = wmin; ++ sched->state.unassigned = num; ++} ++ ++static void perf_sched_save_state(struct perf_sched *sched) ++{ ++ if (WARN_ON_ONCE(sched->saved_states >= SCHED_STATES_MAX)) ++ return; ++ ++ sched->saved[sched->saved_states] = sched->state; ++ sched->saved_states++; ++} ++ ++static bool perf_sched_restore_state(struct perf_sched *sched) ++{ ++ if (!sched->saved_states) ++ return false; ++ ++ sched->saved_states--; ++ sched->state = sched->saved[sched->saved_states]; ++ ++ /* continue with next counter: */ ++ clear_bit(sched->state.counter++, sched->state.used); ++ ++ return true; ++} ++ ++/* ++ * Select a counter for the current event to schedule. Return true on ++ * success. ++ */ ++static bool __perf_sched_find_counter(struct perf_sched *sched) ++{ ++ struct event_constraint *c; ++ int idx; ++ ++ if (!sched->state.unassigned) ++ return false; ++ ++ if (sched->state.event >= sched->max_events) ++ return false; ++ ++ c = sched->constraints[sched->state.event]; ++ /* Prefer fixed purpose counters */ ++ if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) { ++ idx = INTEL_PMC_IDX_FIXED; ++ for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_MAX) { ++ if (!__test_and_set_bit(idx, sched->state.used)) ++ goto done; ++ } ++ } ++ ++ /* Grab the first unused counter starting with idx */ ++ idx = sched->state.counter; ++ for_each_set_bit_from(idx, c->idxmsk, INTEL_PMC_IDX_FIXED) { ++ if (!__test_and_set_bit(idx, sched->state.used)) { ++ if (sched->state.nr_gp++ >= sched->max_gp) ++ return false; ++ ++ goto done; ++ } ++ } ++ ++ return false; ++ ++done: ++ sched->state.counter = idx; ++ ++ if (c->overlap) ++ perf_sched_save_state(sched); ++ ++ return true; ++} ++ ++static bool perf_sched_find_counter(struct perf_sched *sched) ++{ ++ while (!__perf_sched_find_counter(sched)) { ++ if (!perf_sched_restore_state(sched)) ++ return false; ++ } ++ ++ return true; ++} ++ ++/* ++ * Go through all unassigned events and find the next one to schedule. ++ * Take events with the least weight first. Return true on success. ++ */ ++static bool perf_sched_next_event(struct perf_sched *sched) ++{ ++ struct event_constraint *c; ++ ++ if (!sched->state.unassigned || !--sched->state.unassigned) ++ return false; ++ ++ do { ++ /* next event */ ++ sched->state.event++; ++ if (sched->state.event >= sched->max_events) { ++ /* next weight */ ++ sched->state.event = 0; ++ sched->state.weight++; ++ if (sched->state.weight > sched->max_weight) ++ return false; ++ } ++ c = sched->constraints[sched->state.event]; ++ } while (c->weight != sched->state.weight); ++ ++ sched->state.counter = 0; /* start with first counter */ ++ ++ return true; ++} ++ ++/* ++ * Assign a counter for each event. ++ */ ++int perf_assign_events(struct event_constraint **constraints, int n, ++ int wmin, int wmax, int gpmax, int *assign) ++{ ++ struct perf_sched sched; ++ ++ perf_sched_init(&sched, constraints, n, wmin, wmax, gpmax); ++ ++ do { ++ if (!perf_sched_find_counter(&sched)) ++ break; /* failed */ ++ if (assign) ++ assign[sched.state.event] = sched.state.counter; ++ } while (perf_sched_next_event(&sched)); ++ ++ return sched.state.unassigned; ++} ++EXPORT_SYMBOL_GPL(perf_assign_events); ++ ++int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) ++{ ++ struct event_constraint *c; ++ unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; ++ struct perf_event *e; ++ int i, wmin, wmax, unsched = 0; ++ struct hw_perf_event *hwc; ++ ++ bitmap_zero(used_mask, X86_PMC_IDX_MAX); ++ ++ if (x86_pmu.start_scheduling) ++ x86_pmu.start_scheduling(cpuc); ++ ++ for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) { ++ cpuc->event_constraint[i] = NULL; ++ c = x86_pmu.get_event_constraints(cpuc, i, cpuc->event_list[i]); ++ cpuc->event_constraint[i] = c; ++ ++ wmin = min(wmin, c->weight); ++ wmax = max(wmax, c->weight); ++ } ++ ++ /* ++ * fastpath, try to reuse previous register ++ */ ++ for (i = 0; i < n; i++) { ++ hwc = &cpuc->event_list[i]->hw; ++ c = cpuc->event_constraint[i]; ++ ++ /* never assigned */ ++ if (hwc->idx == -1) ++ break; ++ ++ /* constraint still honored */ ++ if (!test_bit(hwc->idx, c->idxmsk)) ++ break; ++ ++ /* not already used */ ++ if (test_bit(hwc->idx, used_mask)) ++ break; ++ ++ __set_bit(hwc->idx, used_mask); ++ if (assign) ++ assign[i] = hwc->idx; ++ } ++ ++ /* slow path */ ++ if (i != n) { ++ int gpmax = x86_pmu.num_counters; ++ ++ /* ++ * Do not allow scheduling of more than half the available ++ * generic counters. ++ * ++ * This helps avoid counter starvation of sibling thread by ++ * ensuring at most half the counters cannot be in exclusive ++ * mode. There is no designated counters for the limits. Any ++ * N/2 counters can be used. This helps with events with ++ * specific counter constraints. ++ */ ++ if (is_ht_workaround_enabled() && !cpuc->is_fake && ++ READ_ONCE(cpuc->excl_cntrs->exclusive_present)) ++ gpmax /= 2; ++ ++ unsched = perf_assign_events(cpuc->event_constraint, n, wmin, ++ wmax, gpmax, assign); ++ } ++ ++ /* ++ * In case of success (unsched = 0), mark events as committed, ++ * so we do not put_constraint() in case new events are added ++ * and fail to be scheduled ++ * ++ * We invoke the lower level commit callback to lock the resource ++ * ++ * We do not need to do all of this in case we are called to ++ * validate an event group (assign == NULL) ++ */ ++ if (!unsched && assign) { ++ for (i = 0; i < n; i++) { ++ e = cpuc->event_list[i]; ++ e->hw.flags |= PERF_X86_EVENT_COMMITTED; ++ if (x86_pmu.commit_scheduling) ++ x86_pmu.commit_scheduling(cpuc, i, assign[i]); ++ } ++ } else { ++ for (i = 0; i < n; i++) { ++ e = cpuc->event_list[i]; ++ /* ++ * do not put_constraint() on comitted events, ++ * because they are good to go ++ */ ++ if ((e->hw.flags & PERF_X86_EVENT_COMMITTED)) ++ continue; ++ ++ /* ++ * release events that failed scheduling ++ */ ++ if (x86_pmu.put_event_constraints) ++ x86_pmu.put_event_constraints(cpuc, e); ++ } ++ } ++ ++ if (x86_pmu.stop_scheduling) ++ x86_pmu.stop_scheduling(cpuc); ++ ++ return unsched ? -EINVAL : 0; ++} ++ ++/* ++ * dogrp: true if must collect siblings events (group) ++ * returns total number of events and error code ++ */ ++static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, bool dogrp) ++{ ++ struct perf_event *event; ++ int n, max_count; ++ ++ max_count = x86_pmu.num_counters + x86_pmu.num_counters_fixed; ++ ++ /* current number of events already accepted */ ++ n = cpuc->n_events; ++ ++ if (is_x86_event(leader)) { ++ if (n >= max_count) ++ return -EINVAL; ++ cpuc->event_list[n] = leader; ++ n++; ++ } ++ if (!dogrp) ++ return n; ++ ++ for_each_sibling_event(event, leader) { ++ if (!is_x86_event(event) || ++ event->state <= PERF_EVENT_STATE_OFF) ++ continue; ++ ++ if (n >= max_count) ++ return -EINVAL; ++ ++ cpuc->event_list[n] = event; ++ n++; ++ } ++ return n; ++} ++ ++static inline void x86_assign_hw_event(struct perf_event *event, ++ struct cpu_hw_events *cpuc, int i) ++{ ++ struct hw_perf_event *hwc = &event->hw; ++ ++ hwc->idx = cpuc->assign[i]; ++ hwc->last_cpu = smp_processor_id(); ++ hwc->last_tag = ++cpuc->tags[i]; ++ ++ if (hwc->idx == INTEL_PMC_IDX_FIXED_BTS) { ++ hwc->config_base = 0; ++ hwc->event_base = 0; ++ } else if (hwc->idx >= INTEL_PMC_IDX_FIXED) { ++ hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; ++ hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 + (hwc->idx - INTEL_PMC_IDX_FIXED); ++ hwc->event_base_rdpmc = (hwc->idx - INTEL_PMC_IDX_FIXED) | 1<<30; ++ } else { ++ hwc->config_base = x86_pmu_config_addr(hwc->idx); ++ hwc->event_base = x86_pmu_event_addr(hwc->idx); ++ hwc->event_base_rdpmc = x86_pmu_rdpmc_index(hwc->idx); ++ } ++} ++ ++static inline int match_prev_assignment(struct hw_perf_event *hwc, ++ struct cpu_hw_events *cpuc, ++ int i) ++{ ++ return hwc->idx == cpuc->assign[i] && ++ hwc->last_cpu == smp_processor_id() && ++ hwc->last_tag == cpuc->tags[i]; ++} ++ ++static void x86_pmu_start(struct perf_event *event, int flags); ++ ++static void x86_pmu_enable(struct pmu *pmu) ++{ ++ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); ++ struct perf_event *event; ++ struct hw_perf_event *hwc; ++ int i, added = cpuc->n_added; ++ ++ if (!x86_pmu_initialized()) ++ return; ++ ++ if (cpuc->enabled) ++ return; ++ ++ if (cpuc->n_added) { ++ int n_running = cpuc->n_events - cpuc->n_added; ++ /* ++ * apply assignment obtained either from ++ * hw_perf_group_sched_in() or x86_pmu_enable() ++ * ++ * step1: save events moving to new counters ++ */ ++ for (i = 0; i < n_running; i++) { ++ event = cpuc->event_list[i]; ++ hwc = &event->hw; ++ ++ /* ++ * we can avoid reprogramming counter if: ++ * - assigned same counter as last time ++ * - running on same CPU as last time ++ * - no other event has used the counter since ++ */ ++ if (hwc->idx == -1 || ++ match_prev_assignment(hwc, cpuc, i)) ++ continue; ++ ++ /* ++ * Ensure we don't accidentally enable a stopped ++ * counter simply because we rescheduled. ++ */ ++ if (hwc->state & PERF_HES_STOPPED) ++ hwc->state |= PERF_HES_ARCH; ++ ++ x86_pmu_stop(event, PERF_EF_UPDATE); ++ } ++ ++ /* ++ * step2: reprogram moved events into new counters ++ */ ++ for (i = 0; i < cpuc->n_events; i++) { ++ event = cpuc->event_list[i]; ++ hwc = &event->hw; ++ ++ if (!match_prev_assignment(hwc, cpuc, i)) ++ x86_assign_hw_event(event, cpuc, i); ++ else if (i < n_running) ++ continue; ++ ++ if (hwc->state & PERF_HES_ARCH) ++ continue; ++ ++ x86_pmu_start(event, PERF_EF_RELOAD); ++ } ++ cpuc->n_added = 0; ++ perf_events_lapic_init(); ++ } ++ ++ cpuc->enabled = 1; ++ barrier(); ++ ++ x86_pmu.enable_all(added); ++} ++ ++static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); ++ ++/* ++ * Set the next IRQ period, based on the hwc->period_left value. ++ * To be called with the event disabled in hw: ++ */ ++int x86_perf_event_set_period(struct perf_event *event) ++{ ++ struct hw_perf_event *hwc = &event->hw; ++ s64 left = local64_read(&hwc->period_left); ++ s64 period = hwc->sample_period; ++ int ret = 0, idx = hwc->idx; ++ ++ if (idx == INTEL_PMC_IDX_FIXED_BTS) ++ return 0; ++ ++ /* ++ * If we are way outside a reasonable range then just skip forward: ++ */ ++ if (unlikely(left <= -period)) { ++ left = period; ++ local64_set(&hwc->period_left, left); ++ hwc->last_period = period; ++ ret = 1; ++ } ++ ++ if (unlikely(left <= 0)) { ++ left += period; ++ local64_set(&hwc->period_left, left); ++ hwc->last_period = period; ++ ret = 1; ++ } ++ /* ++ * Quirk: certain CPUs dont like it if just 1 hw_event is left: ++ */ ++ if (unlikely(left < 2)) ++ left = 2; ++ ++ if (left > x86_pmu.max_period) ++ left = x86_pmu.max_period; ++ ++ if (x86_pmu.limit_period) ++ left = x86_pmu.limit_period(event, left); ++ ++ per_cpu(pmc_prev_left[idx], smp_processor_id()) = left; ++ ++ /* ++ * The hw event starts counting from this event offset, ++ * mark it to be able to extra future deltas: ++ */ ++ local64_set(&hwc->prev_count, (u64)-left); ++ ++ wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask); ++ ++ /* ++ * Due to erratum on certan cpu we need ++ * a second write to be sure the register ++ * is updated properly ++ */ ++ if (x86_pmu.perfctr_second_write) { ++ wrmsrl(hwc->event_base, ++ (u64)(-left) & x86_pmu.cntval_mask); ++ } ++ ++ perf_event_update_userpage(event); ++ ++ return ret; ++} ++ ++void x86_pmu_enable_event(struct perf_event *event) ++{ ++ if (__this_cpu_read(cpu_hw_events.enabled)) ++ __x86_pmu_enable_event(&event->hw, ++ ARCH_PERFMON_EVENTSEL_ENABLE); ++} ++ ++/* ++ * Add a single event to the PMU. ++ * ++ * The event is added to the group of enabled events ++ * but only if it can be scehduled with existing events. ++ */ ++static int x86_pmu_add(struct perf_event *event, int flags) ++{ ++ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); ++ struct hw_perf_event *hwc; ++ int assign[X86_PMC_IDX_MAX]; ++ int n, n0, ret; ++ ++ hwc = &event->hw; ++ ++ n0 = cpuc->n_events; ++ ret = n = collect_events(cpuc, event, false); ++ if (ret < 0) ++ goto out; ++ ++ hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; ++ if (!(flags & PERF_EF_START)) ++ hwc->state |= PERF_HES_ARCH; ++ ++ /* ++ * If group events scheduling transaction was started, ++ * skip the schedulability test here, it will be performed ++ * at commit time (->commit_txn) as a whole. ++ * ++ * If commit fails, we'll call ->del() on all events ++ * for which ->add() was called. ++ */ ++ if (cpuc->txn_flags & PERF_PMU_TXN_ADD) ++ goto done_collect; ++ ++ ret = x86_pmu.schedule_events(cpuc, n, assign); ++ if (ret) ++ goto out; ++ /* ++ * copy new assignment, now we know it is possible ++ * will be used by hw_perf_enable() ++ */ ++ memcpy(cpuc->assign, assign, n*sizeof(int)); ++ ++done_collect: ++ /* ++ * Commit the collect_events() state. See x86_pmu_del() and ++ * x86_pmu_*_txn(). ++ */ ++ cpuc->n_events = n; ++ cpuc->n_added += n - n0; ++ cpuc->n_txn += n - n0; ++ ++ if (x86_pmu.add) { ++ /* ++ * This is before x86_pmu_enable() will call x86_pmu_start(), ++ * so we enable LBRs before an event needs them etc.. ++ */ ++ x86_pmu.add(event); ++ } ++ ++ ret = 0; ++out: ++ return ret; ++} ++ ++static void x86_pmu_start(struct perf_event *event, int flags) ++{ ++ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); ++ int idx = event->hw.idx; ++ ++ if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) ++ return; ++ ++ if (WARN_ON_ONCE(idx == -1)) ++ return; ++ ++ if (flags & PERF_EF_RELOAD) { ++ WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); ++ x86_perf_event_set_period(event); ++ } ++ ++ event->hw.state = 0; ++ ++ cpuc->events[idx] = event; ++ __set_bit(idx, cpuc->active_mask); ++ __set_bit(idx, cpuc->running); ++ x86_pmu.enable(event); ++ perf_event_update_userpage(event); ++} ++ ++void perf_event_print_debug(void) ++{ ++ u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed; ++ u64 pebs, debugctl; ++ struct cpu_hw_events *cpuc; ++ unsigned long flags; ++ int cpu, idx; ++ ++ if (!x86_pmu.num_counters) ++ return; ++ ++ local_irq_save(flags); ++ ++ cpu = smp_processor_id(); ++ cpuc = &per_cpu(cpu_hw_events, cpu); ++ ++ if (x86_pmu.version >= 2) { ++ rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl); ++ rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); ++ rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow); ++ rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed); ++ ++ pr_info("\n"); ++ pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl); ++ pr_info("CPU#%d: status: %016llx\n", cpu, status); ++ pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow); ++ pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); ++ if (x86_pmu.pebs_constraints) { ++ rdmsrl(MSR_IA32_PEBS_ENABLE, pebs); ++ pr_info("CPU#%d: pebs: %016llx\n", cpu, pebs); ++ } ++ if (x86_pmu.lbr_nr) { ++ rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); ++ pr_info("CPU#%d: debugctl: %016llx\n", cpu, debugctl); ++ } ++ } ++ pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask); ++ ++ for (idx = 0; idx < x86_pmu.num_counters; idx++) { ++ rdmsrl(x86_pmu_config_addr(idx), pmc_ctrl); ++ rdmsrl(x86_pmu_event_addr(idx), pmc_count); ++ ++ prev_left = per_cpu(pmc_prev_left[idx], cpu); ++ ++ pr_info("CPU#%d: gen-PMC%d ctrl: %016llx\n", ++ cpu, idx, pmc_ctrl); ++ pr_info("CPU#%d: gen-PMC%d count: %016llx\n", ++ cpu, idx, pmc_count); ++ pr_info("CPU#%d: gen-PMC%d left: %016llx\n", ++ cpu, idx, prev_left); ++ } ++ for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) { ++ rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count); ++ ++ pr_info("CPU#%d: fixed-PMC%d count: %016llx\n", ++ cpu, idx, pmc_count); ++ } ++ local_irq_restore(flags); ++} ++ ++void x86_pmu_stop(struct perf_event *event, int flags) ++{ ++ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); ++ struct hw_perf_event *hwc = &event->hw; ++ ++ if (test_bit(hwc->idx, cpuc->active_mask)) { ++ x86_pmu.disable(event); ++ __clear_bit(hwc->idx, cpuc->active_mask); ++ cpuc->events[hwc->idx] = NULL; ++ WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); ++ hwc->state |= PERF_HES_STOPPED; ++ } ++ ++ if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { ++ /* ++ * Drain the remaining delta count out of a event ++ * that we are disabling: ++ */ ++ x86_perf_event_update(event); ++ hwc->state |= PERF_HES_UPTODATE; ++ } ++} ++ ++static void x86_pmu_del(struct perf_event *event, int flags) ++{ ++ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); ++ int i; ++ ++ /* ++ * event is descheduled ++ */ ++ event->hw.flags &= ~PERF_X86_EVENT_COMMITTED; ++ ++ /* ++ * If we're called during a txn, we only need to undo x86_pmu.add. ++ * The events never got scheduled and ->cancel_txn will truncate ++ * the event_list. ++ * ++ * XXX assumes any ->del() called during a TXN will only be on ++ * an event added during that same TXN. ++ */ ++ if (cpuc->txn_flags & PERF_PMU_TXN_ADD) ++ goto do_del; ++ ++ /* ++ * Not a TXN, therefore cleanup properly. ++ */ ++ x86_pmu_stop(event, PERF_EF_UPDATE); ++ ++ for (i = 0; i < cpuc->n_events; i++) { ++ if (event == cpuc->event_list[i]) ++ break; ++ } ++ ++ if (WARN_ON_ONCE(i == cpuc->n_events)) /* called ->del() without ->add() ? */ ++ return; ++ ++ /* If we have a newly added event; make sure to decrease n_added. */ ++ if (i >= cpuc->n_events - cpuc->n_added) ++ --cpuc->n_added; ++ ++ if (x86_pmu.put_event_constraints) ++ x86_pmu.put_event_constraints(cpuc, event); ++ ++ /* Delete the array entry. */ ++ while (++i < cpuc->n_events) { ++ cpuc->event_list[i-1] = cpuc->event_list[i]; ++ cpuc->event_constraint[i-1] = cpuc->event_constraint[i]; ++ } ++ --cpuc->n_events; ++ ++ perf_event_update_userpage(event); ++ ++do_del: ++ if (x86_pmu.del) { ++ /* ++ * This is after x86_pmu_stop(); so we disable LBRs after any ++ * event can need them etc.. ++ */ ++ x86_pmu.del(event); ++ } ++} ++ ++int x86_pmu_handle_irq(struct pt_regs *regs) ++{ ++ struct perf_sample_data data; ++ struct cpu_hw_events *cpuc; ++ struct perf_event *event; ++ int idx, handled = 0; ++ u64 val; ++ ++ cpuc = this_cpu_ptr(&cpu_hw_events); ++ ++ /* ++ * Some chipsets need to unmask the LVTPC in a particular spot ++ * inside the nmi handler. As a result, the unmasking was pushed ++ * into all the nmi handlers. ++ * ++ * This generic handler doesn't seem to have any issues where the ++ * unmasking occurs so it was left at the top. ++ */ ++ apic_write(APIC_LVTPC, APIC_DM_NMI); ++ ++ for (idx = 0; idx < x86_pmu.num_counters; idx++) { ++ if (!test_bit(idx, cpuc->active_mask)) ++ continue; ++ ++ event = cpuc->events[idx]; ++ ++ val = x86_perf_event_update(event); ++ if (val & (1ULL << (x86_pmu.cntval_bits - 1))) ++ continue; ++ ++ /* ++ * event overflow ++ */ ++ handled++; ++ perf_sample_data_init(&data, 0, event->hw.last_period); ++ ++ if (!x86_perf_event_set_period(event)) ++ continue; ++ ++ if (perf_event_overflow(event, &data, regs)) ++ x86_pmu_stop(event, 0); ++ } ++ ++ if (handled) ++ inc_irq_stat(apic_perf_irqs); ++ ++ return handled; ++} ++ ++void perf_events_lapic_init(void) ++{ ++ if (!x86_pmu.apic || !x86_pmu_initialized()) ++ return; ++ ++ /* ++ * Always use NMI for PMU ++ */ ++ apic_write(APIC_LVTPC, APIC_DM_NMI); ++} ++ ++static int ++perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs) ++{ ++ u64 start_clock; ++ u64 finish_clock; ++ int ret; ++ ++ /* ++ * All PMUs/events that share this PMI handler should make sure to ++ * increment active_events for their events. ++ */ ++ if (!atomic_read(&active_events)) ++ return NMI_DONE; ++ ++ start_clock = sched_clock(); ++ ret = x86_pmu.handle_irq(regs); ++ finish_clock = sched_clock(); ++ ++ perf_sample_event_took(finish_clock - start_clock); ++ ++ return ret; ++} ++NOKPROBE_SYMBOL(perf_event_nmi_handler); ++ ++struct event_constraint emptyconstraint; ++struct event_constraint unconstrained; ++ ++static int x86_pmu_prepare_cpu(unsigned int cpu) ++{ ++ struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); ++ int i; ++ ++ for (i = 0 ; i < X86_PERF_KFREE_MAX; i++) ++ cpuc->kfree_on_online[i] = NULL; ++ if (x86_pmu.cpu_prepare) ++ return x86_pmu.cpu_prepare(cpu); ++ return 0; ++} ++ ++static int x86_pmu_dead_cpu(unsigned int cpu) ++{ ++ if (x86_pmu.cpu_dead) ++ x86_pmu.cpu_dead(cpu); ++ return 0; ++} ++ ++static int x86_pmu_online_cpu(unsigned int cpu) ++{ ++ struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); ++ int i; ++ ++ for (i = 0 ; i < X86_PERF_KFREE_MAX; i++) { ++ kfree(cpuc->kfree_on_online[i]); ++ cpuc->kfree_on_online[i] = NULL; ++ } ++ return 0; ++} ++ ++static int x86_pmu_starting_cpu(unsigned int cpu) ++{ ++ if (x86_pmu.cpu_starting) ++ x86_pmu.cpu_starting(cpu); ++ return 0; ++} ++ ++static int x86_pmu_dying_cpu(unsigned int cpu) ++{ ++ if (x86_pmu.cpu_dying) ++ x86_pmu.cpu_dying(cpu); ++ return 0; ++} ++ ++static void __init pmu_check_apic(void) ++{ ++ if (boot_cpu_has(X86_FEATURE_APIC)) ++ return; ++ ++ x86_pmu.apic = 0; ++ pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n"); ++ pr_info("no hardware sampling interrupt available.\n"); ++ ++ /* ++ * If we have a PMU initialized but no APIC ++ * interrupts, we cannot sample hardware ++ * events (user-space has to fall back and ++ * sample via a hrtimer based software event): ++ */ ++ pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; ++ ++} ++ ++static struct attribute_group x86_pmu_format_group = { ++ .name = "format", ++ .attrs = NULL, ++}; ++ ++/* ++ * Remove all undefined events (x86_pmu.event_map(id) == 0) ++ * out of events_attr attributes. ++ */ ++static void __init filter_events(struct attribute **attrs) ++{ ++ struct device_attribute *d; ++ struct perf_pmu_events_attr *pmu_attr; ++ int offset = 0; ++ int i, j; ++ ++ for (i = 0; attrs[i]; i++) { ++ d = (struct device_attribute *)attrs[i]; ++ pmu_attr = container_of(d, struct perf_pmu_events_attr, attr); ++ /* str trumps id */ ++ if (pmu_attr->event_str) ++ continue; ++ if (x86_pmu.event_map(i + offset)) ++ continue; ++ ++ for (j = i; attrs[j]; j++) ++ attrs[j] = attrs[j + 1]; ++ ++ /* Check the shifted attr. */ ++ i--; ++ ++ /* ++ * event_map() is index based, the attrs array is organized ++ * by increasing event index. If we shift the events, then ++ * we need to compensate for the event_map(), otherwise ++ * we are looking up the wrong event in the map ++ */ ++ offset++; ++ } ++} ++ ++/* Merge two pointer arrays */ ++__init struct attribute **merge_attr(struct attribute **a, struct attribute **b) ++{ ++ struct attribute **new; ++ int j, i; ++ ++ for (j = 0; a[j]; j++) ++ ; ++ for (i = 0; b[i]; i++) ++ j++; ++ j++; ++ ++ new = kmalloc_array(j, sizeof(struct attribute *), GFP_KERNEL); ++ if (!new) ++ return NULL; ++ ++ j = 0; ++ for (i = 0; a[i]; i++) ++ new[j++] = a[i]; ++ for (i = 0; b[i]; i++) ++ new[j++] = b[i]; ++ new[j] = NULL; ++ ++ return new; ++} ++ ++ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, char *page) ++{ ++ struct perf_pmu_events_attr *pmu_attr = \ ++ container_of(attr, struct perf_pmu_events_attr, attr); ++ u64 config = x86_pmu.event_map(pmu_attr->id); ++ ++ /* string trumps id */ ++ if (pmu_attr->event_str) ++ return sprintf(page, "%s", pmu_attr->event_str); ++ ++ return x86_pmu.events_sysfs_show(page, config); ++} ++EXPORT_SYMBOL_GPL(events_sysfs_show); ++ ++ssize_t events_ht_sysfs_show(struct device *dev, struct device_attribute *attr, ++ char *page) ++{ ++ struct perf_pmu_events_ht_attr *pmu_attr = ++ container_of(attr, struct perf_pmu_events_ht_attr, attr); ++ ++ /* ++ * Report conditional events depending on Hyper-Threading. ++ * ++ * This is overly conservative as usually the HT special ++ * handling is not needed if the other CPU thread is idle. ++ * ++ * Note this does not (and cannot) handle the case when thread ++ * siblings are invisible, for example with virtualization ++ * if they are owned by some other guest. The user tool ++ * has to re-read when a thread sibling gets onlined later. ++ */ ++ return sprintf(page, "%s", ++ topology_max_smt_threads() > 1 ? ++ pmu_attr->event_str_ht : ++ pmu_attr->event_str_noht); ++} ++ ++EVENT_ATTR(cpu-cycles, CPU_CYCLES ); ++EVENT_ATTR(instructions, INSTRUCTIONS ); ++EVENT_ATTR(cache-references, CACHE_REFERENCES ); ++EVENT_ATTR(cache-misses, CACHE_MISSES ); ++EVENT_ATTR(branch-instructions, BRANCH_INSTRUCTIONS ); ++EVENT_ATTR(branch-misses, BRANCH_MISSES ); ++EVENT_ATTR(bus-cycles, BUS_CYCLES ); ++EVENT_ATTR(stalled-cycles-frontend, STALLED_CYCLES_FRONTEND ); ++EVENT_ATTR(stalled-cycles-backend, STALLED_CYCLES_BACKEND ); ++EVENT_ATTR(ref-cycles, REF_CPU_CYCLES ); ++ ++static struct attribute *empty_attrs; ++ ++static struct attribute *events_attr[] = { ++ EVENT_PTR(CPU_CYCLES), ++ EVENT_PTR(INSTRUCTIONS), ++ EVENT_PTR(CACHE_REFERENCES), ++ EVENT_PTR(CACHE_MISSES), ++ EVENT_PTR(BRANCH_INSTRUCTIONS), ++ EVENT_PTR(BRANCH_MISSES), ++ EVENT_PTR(BUS_CYCLES), ++ EVENT_PTR(STALLED_CYCLES_FRONTEND), ++ EVENT_PTR(STALLED_CYCLES_BACKEND), ++ EVENT_PTR(REF_CPU_CYCLES), ++ NULL, ++}; ++ ++static struct attribute_group x86_pmu_events_group = { ++ .name = "events", ++ .attrs = events_attr, ++}; ++ ++ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event) ++{ ++ u64 umask = (config & ARCH_PERFMON_EVENTSEL_UMASK) >> 8; ++ u64 cmask = (config & ARCH_PERFMON_EVENTSEL_CMASK) >> 24; ++ bool edge = (config & ARCH_PERFMON_EVENTSEL_EDGE); ++ bool pc = (config & ARCH_PERFMON_EVENTSEL_PIN_CONTROL); ++ bool any = (config & ARCH_PERFMON_EVENTSEL_ANY); ++ bool inv = (config & ARCH_PERFMON_EVENTSEL_INV); ++ ssize_t ret; ++ ++ /* ++ * We have whole page size to spend and just little data ++ * to write, so we can safely use sprintf. ++ */ ++ ret = sprintf(page, "event=0x%02llx", event); ++ ++ if (umask) ++ ret += sprintf(page + ret, ",umask=0x%02llx", umask); ++ ++ if (edge) ++ ret += sprintf(page + ret, ",edge"); ++ ++ if (pc) ++ ret += sprintf(page + ret, ",pc"); ++ ++ if (any) ++ ret += sprintf(page + ret, ",any"); ++ ++ if (inv) ++ ret += sprintf(page + ret, ",inv"); ++ ++ if (cmask) ++ ret += sprintf(page + ret, ",cmask=0x%02llx", cmask); ++ ++ ret += sprintf(page + ret, "\n"); ++ ++ return ret; ++} ++ ++static struct attribute_group x86_pmu_attr_group; ++static struct attribute_group x86_pmu_caps_group; ++ ++static int __init init_hw_perf_events(void) ++{ ++ struct x86_pmu_quirk *quirk; ++ int err; ++ ++ pr_info("Performance Events: "); ++ ++ switch (boot_cpu_data.x86_vendor) { ++ case X86_VENDOR_INTEL: ++ err = intel_pmu_init(); ++ break; ++ case X86_VENDOR_AMD: ++ err = amd_pmu_init(); ++ break; ++ case X86_VENDOR_HYGON: ++ err = amd_pmu_init(); ++ x86_pmu.name = "HYGON"; ++ break; ++ default: ++ err = -ENOTSUPP; ++ } ++ if (err != 0) { ++ pr_cont("no PMU driver, software events only.\n"); ++ return 0; ++ } ++ ++ pmu_check_apic(); ++ ++ /* sanity check that the hardware exists or is emulated */ ++ if (!check_hw_exists()) ++ return 0; ++ ++ pr_cont("%s PMU driver.\n", x86_pmu.name); ++ ++ x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */ ++ ++ for (quirk = x86_pmu.quirks; quirk; quirk = quirk->next) ++ quirk->func(); ++ ++ if (!x86_pmu.intel_ctrl) ++ x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1; ++ ++ perf_events_lapic_init(); ++ register_nmi_handler(NMI_LOCAL, perf_event_nmi_handler, 0, "PMI"); ++ ++ unconstrained = (struct event_constraint) ++ __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1, ++ 0, x86_pmu.num_counters, 0, 0); ++ ++ x86_pmu_format_group.attrs = x86_pmu.format_attrs; ++ ++ if (x86_pmu.caps_attrs) { ++ struct attribute **tmp; ++ ++ tmp = merge_attr(x86_pmu_caps_group.attrs, x86_pmu.caps_attrs); ++ if (!WARN_ON(!tmp)) ++ x86_pmu_caps_group.attrs = tmp; ++ } ++ ++ if (x86_pmu.event_attrs) ++ x86_pmu_events_group.attrs = x86_pmu.event_attrs; ++ ++ if (!x86_pmu.events_sysfs_show) ++ x86_pmu_events_group.attrs = &empty_attrs; ++ else ++ filter_events(x86_pmu_events_group.attrs); ++ ++ if (x86_pmu.cpu_events) { ++ struct attribute **tmp; ++ ++ tmp = merge_attr(x86_pmu_events_group.attrs, x86_pmu.cpu_events); ++ if (!WARN_ON(!tmp)) ++ x86_pmu_events_group.attrs = tmp; ++ } ++ ++ if (x86_pmu.attrs) { ++ struct attribute **tmp; ++ ++ tmp = merge_attr(x86_pmu_attr_group.attrs, x86_pmu.attrs); ++ if (!WARN_ON(!tmp)) ++ x86_pmu_attr_group.attrs = tmp; ++ } ++ ++ pr_info("... version: %d\n", x86_pmu.version); ++ pr_info("... bit width: %d\n", x86_pmu.cntval_bits); ++ pr_info("... generic registers: %d\n", x86_pmu.num_counters); ++ pr_info("... value mask: %016Lx\n", x86_pmu.cntval_mask); ++ pr_info("... max period: %016Lx\n", x86_pmu.max_period); ++ pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed); ++ pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl); ++ ++ /* ++ * Install callbacks. Core will call them for each online ++ * cpu. ++ */ ++ err = cpuhp_setup_state(CPUHP_PERF_X86_PREPARE, "perf/x86:prepare", ++ x86_pmu_prepare_cpu, x86_pmu_dead_cpu); ++ if (err) ++ return err; ++ ++ err = cpuhp_setup_state(CPUHP_AP_PERF_X86_STARTING, ++ "perf/x86:starting", x86_pmu_starting_cpu, ++ x86_pmu_dying_cpu); ++ if (err) ++ goto out; ++ ++ err = cpuhp_setup_state(CPUHP_AP_PERF_X86_ONLINE, "perf/x86:online", ++ x86_pmu_online_cpu, NULL); ++ if (err) ++ goto out1; ++ ++ err = perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW); ++ if (err) ++ goto out2; ++ ++ return 0; ++ ++out2: ++ cpuhp_remove_state(CPUHP_AP_PERF_X86_ONLINE); ++out1: ++ cpuhp_remove_state(CPUHP_AP_PERF_X86_STARTING); ++out: ++ cpuhp_remove_state(CPUHP_PERF_X86_PREPARE); ++ return err; ++} ++early_initcall(init_hw_perf_events); ++ ++static inline void x86_pmu_read(struct perf_event *event) ++{ ++ if (x86_pmu.read) ++ return x86_pmu.read(event); ++ x86_perf_event_update(event); ++} ++ ++/* ++ * Start group events scheduling transaction ++ * Set the flag to make pmu::enable() not perform the ++ * schedulability test, it will be performed at commit time ++ * ++ * We only support PERF_PMU_TXN_ADD transactions. Save the ++ * transaction flags but otherwise ignore non-PERF_PMU_TXN_ADD ++ * transactions. ++ */ ++static void x86_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags) ++{ ++ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); ++ ++ WARN_ON_ONCE(cpuc->txn_flags); /* txn already in flight */ ++ ++ cpuc->txn_flags = txn_flags; ++ if (txn_flags & ~PERF_PMU_TXN_ADD) ++ return; ++ ++ perf_pmu_disable(pmu); ++ __this_cpu_write(cpu_hw_events.n_txn, 0); ++} ++ ++/* ++ * Stop group events scheduling transaction ++ * Clear the flag and pmu::enable() will perform the ++ * schedulability test. ++ */ ++static void x86_pmu_cancel_txn(struct pmu *pmu) ++{ ++ unsigned int txn_flags; ++ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); ++ ++ WARN_ON_ONCE(!cpuc->txn_flags); /* no txn in flight */ ++ ++ txn_flags = cpuc->txn_flags; ++ cpuc->txn_flags = 0; ++ if (txn_flags & ~PERF_PMU_TXN_ADD) ++ return; ++ ++ /* ++ * Truncate collected array by the number of events added in this ++ * transaction. See x86_pmu_add() and x86_pmu_*_txn(). ++ */ ++ __this_cpu_sub(cpu_hw_events.n_added, __this_cpu_read(cpu_hw_events.n_txn)); ++ __this_cpu_sub(cpu_hw_events.n_events, __this_cpu_read(cpu_hw_events.n_txn)); ++ perf_pmu_enable(pmu); ++} ++ ++/* ++ * Commit group events scheduling transaction ++ * Perform the group schedulability test as a whole ++ * Return 0 if success ++ * ++ * Does not cancel the transaction on failure; expects the caller to do this. ++ */ ++static int x86_pmu_commit_txn(struct pmu *pmu) ++{ ++ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); ++ int assign[X86_PMC_IDX_MAX]; ++ int n, ret; ++ ++ WARN_ON_ONCE(!cpuc->txn_flags); /* no txn in flight */ ++ ++ if (cpuc->txn_flags & ~PERF_PMU_TXN_ADD) { ++ cpuc->txn_flags = 0; ++ return 0; ++ } ++ ++ n = cpuc->n_events; ++ ++ if (!x86_pmu_initialized()) ++ return -EAGAIN; ++ ++ ret = x86_pmu.schedule_events(cpuc, n, assign); ++ if (ret) ++ return ret; ++ ++ /* ++ * copy new assignment, now we know it is possible ++ * will be used by hw_perf_enable() ++ */ ++ memcpy(cpuc->assign, assign, n*sizeof(int)); ++ ++ cpuc->txn_flags = 0; ++ perf_pmu_enable(pmu); ++ return 0; ++} ++/* ++ * a fake_cpuc is used to validate event groups. Due to ++ * the extra reg logic, we need to also allocate a fake ++ * per_core and per_cpu structure. Otherwise, group events ++ * using extra reg may conflict without the kernel being ++ * able to catch this when the last event gets added to ++ * the group. ++ */ ++static void free_fake_cpuc(struct cpu_hw_events *cpuc) ++{ ++ intel_cpuc_finish(cpuc); ++ kfree(cpuc); ++} ++ ++static struct cpu_hw_events *allocate_fake_cpuc(void) ++{ ++ struct cpu_hw_events *cpuc; ++ int cpu = raw_smp_processor_id(); ++ ++ cpuc = kzalloc(sizeof(*cpuc), GFP_KERNEL); ++ if (!cpuc) ++ return ERR_PTR(-ENOMEM); ++ cpuc->is_fake = 1; ++ ++ if (intel_cpuc_prepare(cpuc, cpu)) ++ goto error; ++ ++ return cpuc; ++error: ++ free_fake_cpuc(cpuc); ++ return ERR_PTR(-ENOMEM); ++} ++ ++/* ++ * validate that we can schedule this event ++ */ ++static int validate_event(struct perf_event *event) ++{ ++ struct cpu_hw_events *fake_cpuc; ++ struct event_constraint *c; ++ int ret = 0; ++ ++ fake_cpuc = allocate_fake_cpuc(); ++ if (IS_ERR(fake_cpuc)) ++ return PTR_ERR(fake_cpuc); ++ ++ c = x86_pmu.get_event_constraints(fake_cpuc, -1, event); ++ ++ if (!c || !c->weight) ++ ret = -EINVAL; ++ ++ if (x86_pmu.put_event_constraints) ++ x86_pmu.put_event_constraints(fake_cpuc, event); ++ ++ free_fake_cpuc(fake_cpuc); ++ ++ return ret; ++} ++ ++/* ++ * validate a single event group ++ * ++ * validation include: ++ * - check events are compatible which each other ++ * - events do not compete for the same counter ++ * - number of events <= number of counters ++ * ++ * validation ensures the group can be loaded onto the ++ * PMU if it was the only group available. ++ */ ++static int validate_group(struct perf_event *event) ++{ ++ struct perf_event *leader = event->group_leader; ++ struct cpu_hw_events *fake_cpuc; ++ int ret = -EINVAL, n; ++ ++ fake_cpuc = allocate_fake_cpuc(); ++ if (IS_ERR(fake_cpuc)) ++ return PTR_ERR(fake_cpuc); ++ /* ++ * the event is not yet connected with its ++ * siblings therefore we must first collect ++ * existing siblings, then add the new event ++ * before we can simulate the scheduling ++ */ ++ n = collect_events(fake_cpuc, leader, true); ++ if (n < 0) ++ goto out; ++ ++ fake_cpuc->n_events = n; ++ n = collect_events(fake_cpuc, event, false); ++ if (n < 0) ++ goto out; ++ ++ fake_cpuc->n_events = n; ++ ++ ret = x86_pmu.schedule_events(fake_cpuc, n, NULL); ++ ++out: ++ free_fake_cpuc(fake_cpuc); ++ return ret; ++} ++ ++static int x86_pmu_event_init(struct perf_event *event) ++{ ++ struct pmu *tmp; ++ int err; ++ ++ switch (event->attr.type) { ++ case PERF_TYPE_RAW: ++ case PERF_TYPE_HARDWARE: ++ case PERF_TYPE_HW_CACHE: ++ break; ++ ++ default: ++ return -ENOENT; ++ } ++ ++ err = __x86_pmu_event_init(event); ++ if (!err) { ++ /* ++ * we temporarily connect event to its pmu ++ * such that validate_group() can classify ++ * it as an x86 event using is_x86_event() ++ */ ++ tmp = event->pmu; ++ event->pmu = &pmu; ++ ++ if (event->group_leader != event) ++ err = validate_group(event); ++ else ++ err = validate_event(event); ++ ++ event->pmu = tmp; ++ } ++ if (err) { ++ if (event->destroy) ++ event->destroy(event); ++ } ++ ++ if (READ_ONCE(x86_pmu.attr_rdpmc) && ++ !(event->hw.flags & PERF_X86_EVENT_LARGE_PEBS)) ++ event->hw.flags |= PERF_X86_EVENT_RDPMC_ALLOWED; ++ ++ return err; ++} ++ ++static void refresh_pce(void *ignored) ++{ ++ load_mm_cr4(this_cpu_read(cpu_tlbstate.loaded_mm)); ++} ++ ++static void x86_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm) ++{ ++ if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED)) ++ return; ++ ++ /* ++ * This function relies on not being called concurrently in two ++ * tasks in the same mm. Otherwise one task could observe ++ * perf_rdpmc_allowed > 1 and return all the way back to ++ * userspace with CR4.PCE clear while another task is still ++ * doing on_each_cpu_mask() to propagate CR4.PCE. ++ * ++ * For now, this can't happen because all callers hold mmap_sem ++ * for write. If this changes, we'll need a different solution. ++ */ ++ lockdep_assert_held_exclusive(&mm->mmap_sem); ++ ++ if (atomic_inc_return(&mm->context.perf_rdpmc_allowed) == 1) ++ on_each_cpu_mask(mm_cpumask(mm), refresh_pce, NULL, 1); ++} ++ ++static void x86_pmu_event_unmapped(struct perf_event *event, struct mm_struct *mm) ++{ ++ ++ if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED)) ++ return; ++ ++ if (atomic_dec_and_test(&mm->context.perf_rdpmc_allowed)) ++ on_each_cpu_mask(mm_cpumask(mm), refresh_pce, NULL, 1); ++} ++ ++static int x86_pmu_event_idx(struct perf_event *event) ++{ ++ int idx = event->hw.idx; ++ ++ if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED)) ++ return 0; ++ ++ if (x86_pmu.num_counters_fixed && idx >= INTEL_PMC_IDX_FIXED) { ++ idx -= INTEL_PMC_IDX_FIXED; ++ idx |= 1 << 30; ++ } ++ ++ return idx + 1; ++} ++ ++static ssize_t get_attr_rdpmc(struct device *cdev, ++ struct device_attribute *attr, ++ char *buf) ++{ ++ return snprintf(buf, 40, "%d\n", x86_pmu.attr_rdpmc); ++} ++ ++static ssize_t set_attr_rdpmc(struct device *cdev, ++ struct device_attribute *attr, ++ const char *buf, size_t count) ++{ ++ unsigned long val; ++ ssize_t ret; ++ ++ ret = kstrtoul(buf, 0, &val); ++ if (ret) ++ return ret; ++ ++ if (val > 2) ++ return -EINVAL; ++ ++ if (x86_pmu.attr_rdpmc_broken) ++ return -ENOTSUPP; ++ ++ if ((val == 2) != (x86_pmu.attr_rdpmc == 2)) { ++ /* ++ * Changing into or out of always available, aka ++ * perf-event-bypassing mode. This path is extremely slow, ++ * but only root can trigger it, so it's okay. ++ */ ++ if (val == 2) ++ static_branch_inc(&rdpmc_always_available_key); ++ else ++ static_branch_dec(&rdpmc_always_available_key); ++ on_each_cpu(refresh_pce, NULL, 1); ++ } ++ ++ x86_pmu.attr_rdpmc = val; ++ ++ return count; ++} ++ ++static DEVICE_ATTR(rdpmc, S_IRUSR | S_IWUSR, get_attr_rdpmc, set_attr_rdpmc); ++ ++static struct attribute *x86_pmu_attrs[] = { ++ &dev_attr_rdpmc.attr, ++ NULL, ++}; ++ ++static struct attribute_group x86_pmu_attr_group = { ++ .attrs = x86_pmu_attrs, ++}; ++ ++static ssize_t max_precise_show(struct device *cdev, ++ struct device_attribute *attr, ++ char *buf) ++{ ++ return snprintf(buf, PAGE_SIZE, "%d\n", x86_pmu_max_precise()); ++} ++ ++static DEVICE_ATTR_RO(max_precise); ++ ++static struct attribute *x86_pmu_caps_attrs[] = { ++ &dev_attr_max_precise.attr, ++ NULL ++}; ++ ++static struct attribute_group x86_pmu_caps_group = { ++ .name = "caps", ++ .attrs = x86_pmu_caps_attrs, ++}; ++ ++static const struct attribute_group *x86_pmu_attr_groups[] = { ++ &x86_pmu_attr_group, ++ &x86_pmu_format_group, ++ &x86_pmu_events_group, ++ &x86_pmu_caps_group, ++ NULL, ++}; ++ ++static void x86_pmu_sched_task(struct perf_event_context *ctx, bool sched_in) ++{ ++ if (x86_pmu.sched_task) ++ x86_pmu.sched_task(ctx, sched_in); ++} ++ ++void perf_check_microcode(void) ++{ ++ if (x86_pmu.check_microcode) ++ x86_pmu.check_microcode(); ++} ++ ++static int x86_pmu_check_period(struct perf_event *event, u64 value) ++{ ++ if (x86_pmu.check_period && x86_pmu.check_period(event, value)) ++ return -EINVAL; ++ ++ if (value && x86_pmu.limit_period) { ++ if (x86_pmu.limit_period(event, value) > value) ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ ++static struct pmu pmu = { ++ .pmu_enable = x86_pmu_enable, ++ .pmu_disable = x86_pmu_disable, ++ ++ .attr_groups = x86_pmu_attr_groups, ++ ++ .event_init = x86_pmu_event_init, ++ ++ .event_mapped = x86_pmu_event_mapped, ++ .event_unmapped = x86_pmu_event_unmapped, ++ ++ .add = x86_pmu_add, ++ .del = x86_pmu_del, ++ .start = x86_pmu_start, ++ .stop = x86_pmu_stop, ++ .read = x86_pmu_read, ++ ++ .start_txn = x86_pmu_start_txn, ++ .cancel_txn = x86_pmu_cancel_txn, ++ .commit_txn = x86_pmu_commit_txn, ++ ++ .event_idx = x86_pmu_event_idx, ++ .sched_task = x86_pmu_sched_task, ++ .task_ctx_size = sizeof(struct x86_perf_task_context), ++ .check_period = x86_pmu_check_period, ++}; ++ ++void arch_perf_update_userpage(struct perf_event *event, ++ struct perf_event_mmap_page *userpg, u64 now) ++{ ++ struct cyc2ns_data data; ++ u64 offset; ++ ++ userpg->cap_user_time = 0; ++ userpg->cap_user_time_zero = 0; ++ userpg->cap_user_rdpmc = ++ !!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED); ++ userpg->pmc_width = x86_pmu.cntval_bits; ++ ++ if (!using_native_sched_clock() || !sched_clock_stable()) ++ return; ++ ++ cyc2ns_read_begin(&data); ++ ++ offset = data.cyc2ns_offset + __sched_clock_offset; ++ ++ /* ++ * Internal timekeeping for enabled/running/stopped times ++ * is always in the local_clock domain. ++ */ ++ userpg->cap_user_time = 1; ++ userpg->time_mult = data.cyc2ns_mul; ++ userpg->time_shift = data.cyc2ns_shift; ++ userpg->time_offset = offset - now; ++ ++ /* ++ * cap_user_time_zero doesn't make sense when we're using a different ++ * time base for the records. ++ */ ++ if (!event->attr.use_clockid) { ++ userpg->cap_user_time_zero = 1; ++ userpg->time_zero = offset; ++ } ++ ++ cyc2ns_read_end(); ++} ++ ++void ++perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) ++{ ++ struct unwind_state state; ++ unsigned long addr; ++ ++ if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { ++ /* TODO: We don't support guest os callchain now */ ++ return; ++ } ++ ++ if (perf_callchain_store(entry, regs->ip)) ++ return; ++ ++ for (unwind_start(&state, current, regs, NULL); !unwind_done(&state); ++ unwind_next_frame(&state)) { ++ addr = unwind_get_return_address(&state); ++ if (!addr || perf_callchain_store(entry, addr)) ++ return; ++ } ++} ++ ++static inline int ++valid_user_frame(const void __user *fp, unsigned long size) ++{ ++ return (__range_not_ok(fp, size, TASK_SIZE) == 0); ++} ++ ++static unsigned long get_segment_base(unsigned int segment) ++{ ++ struct desc_struct *desc; ++ unsigned int idx = segment >> 3; ++ ++ if ((segment & SEGMENT_TI_MASK) == SEGMENT_LDT) { ++#ifdef CONFIG_MODIFY_LDT_SYSCALL ++ struct ldt_struct *ldt; ++ ++ /* IRQs are off, so this synchronizes with smp_store_release */ ++ ldt = READ_ONCE(current->active_mm->context.ldt); ++ if (!ldt || idx >= ldt->nr_entries) ++ return 0; ++ ++ desc = &ldt->entries[idx]; ++#else ++ return 0; ++#endif ++ } else { ++ if (idx >= GDT_ENTRIES) ++ return 0; ++ ++ desc = raw_cpu_ptr(gdt_page.gdt) + idx; ++ } ++ ++ return get_desc_base(desc); ++} ++ ++#ifdef CONFIG_IA32_EMULATION ++ ++#include ++ ++static inline int ++perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry_ctx *entry) ++{ ++ /* 32-bit process in 64-bit kernel. */ ++ unsigned long ss_base, cs_base; ++ struct stack_frame_ia32 frame; ++ const void __user *fp; ++ ++ if (!test_thread_flag(TIF_IA32)) ++ return 0; ++ ++ cs_base = get_segment_base(regs->cs); ++ ss_base = get_segment_base(regs->ss); ++ ++ fp = compat_ptr(ss_base + regs->bp); ++ pagefault_disable(); ++ while (entry->nr < entry->max_stack) { ++ unsigned long bytes; ++ frame.next_frame = 0; ++ frame.return_address = 0; ++ ++ if (!valid_user_frame(fp, sizeof(frame))) ++ break; ++ ++ bytes = __copy_from_user_nmi(&frame.next_frame, fp, 4); ++ if (bytes != 0) ++ break; ++ bytes = __copy_from_user_nmi(&frame.return_address, fp+4, 4); ++ if (bytes != 0) ++ break; ++ ++ perf_callchain_store(entry, cs_base + frame.return_address); ++ fp = compat_ptr(ss_base + frame.next_frame); ++ } ++ pagefault_enable(); ++ return 1; ++} ++#else ++static inline int ++perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry_ctx *entry) ++{ ++ return 0; ++} ++#endif ++ ++void ++perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) ++{ ++ struct stack_frame frame; ++ const unsigned long __user *fp; ++ ++ if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { ++ /* TODO: We don't support guest os callchain now */ ++ return; ++ } ++ ++ /* ++ * We don't know what to do with VM86 stacks.. ignore them for now. ++ */ ++ if (regs->flags & (X86_VM_MASK | PERF_EFLAGS_VM)) ++ return; ++ ++ fp = (unsigned long __user *)regs->bp; ++ ++ perf_callchain_store(entry, regs->ip); ++ ++ if (!nmi_uaccess_okay()) ++ return; ++ ++ if (perf_callchain_user32(regs, entry)) ++ return; ++ ++ pagefault_disable(); ++ while (entry->nr < entry->max_stack) { ++ unsigned long bytes; ++ ++ frame.next_frame = NULL; ++ frame.return_address = 0; ++ ++ if (!valid_user_frame(fp, sizeof(frame))) ++ break; ++ ++ bytes = __copy_from_user_nmi(&frame.next_frame, fp, sizeof(*fp)); ++ if (bytes != 0) ++ break; ++ bytes = __copy_from_user_nmi(&frame.return_address, fp + 1, sizeof(*fp)); ++ if (bytes != 0) ++ break; ++ ++ perf_callchain_store(entry, frame.return_address); ++ fp = (void __user *)frame.next_frame; ++ } ++ pagefault_enable(); ++} ++ ++/* ++ * Deal with code segment offsets for the various execution modes: ++ * ++ * VM86 - the good olde 16 bit days, where the linear address is ++ * 20 bits and we use regs->ip + 0x10 * regs->cs. ++ * ++ * IA32 - Where we need to look at GDT/LDT segment descriptor tables ++ * to figure out what the 32bit base address is. ++ * ++ * X32 - has TIF_X32 set, but is running in x86_64 ++ * ++ * X86_64 - CS,DS,SS,ES are all zero based. ++ */ ++static unsigned long code_segment_base(struct pt_regs *regs) ++{ ++ /* ++ * For IA32 we look at the GDT/LDT segment base to convert the ++ * effective IP to a linear address. ++ */ ++ ++#ifdef CONFIG_X86_32 ++ /* ++ * If we are in VM86 mode, add the segment offset to convert to a ++ * linear address. ++ */ ++ if (regs->flags & X86_VM_MASK) ++ return 0x10 * regs->cs; ++ ++ if (user_mode(regs) && regs->cs != __USER_CS) ++ return get_segment_base(regs->cs); ++#else ++ if (user_mode(regs) && !user_64bit_mode(regs) && ++ regs->cs != __USER32_CS) ++ return get_segment_base(regs->cs); ++#endif ++ return 0; ++} ++ ++unsigned long perf_instruction_pointer(struct pt_regs *regs) ++{ ++ if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) ++ return perf_guest_cbs->get_guest_ip(); ++ ++ return regs->ip + code_segment_base(regs); ++} ++ ++unsigned long perf_misc_flags(struct pt_regs *regs) ++{ ++ int misc = 0; ++ ++ if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { ++ if (perf_guest_cbs->is_user_mode()) ++ misc |= PERF_RECORD_MISC_GUEST_USER; ++ else ++ misc |= PERF_RECORD_MISC_GUEST_KERNEL; ++ } else { ++ if (user_mode(regs)) ++ misc |= PERF_RECORD_MISC_USER; ++ else ++ misc |= PERF_RECORD_MISC_KERNEL; ++ } ++ ++ if (regs->flags & PERF_EFLAGS_EXACT) ++ misc |= PERF_RECORD_MISC_EXACT_IP; ++ ++ return misc; ++} ++ ++void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap) ++{ ++ cap->version = x86_pmu.version; ++ cap->num_counters_gp = x86_pmu.num_counters; ++ cap->num_counters_fixed = x86_pmu.num_counters_fixed; ++ cap->bit_width_gp = x86_pmu.cntval_bits; ++ cap->bit_width_fixed = x86_pmu.cntval_bits; ++ cap->events_mask = (unsigned int)x86_pmu.events_maskl; ++ cap->events_mask_len = x86_pmu.events_mask_len; ++} ++EXPORT_SYMBOL_GPL(perf_get_x86_pmu_capability); +diff -uprN kernel/arch/x86/include/asm/apic.h kernel_new/arch/x86/include/asm/apic.h +--- kernel/arch/x86/include/asm/apic.h 2020-12-21 21:59:17.000000000 +0800 ++++ kernel_new/arch/x86/include/asm/apic.h 2021-04-01 18:28:07.651863292 +0800 +@@ -439,7 +439,17 @@ static inline void apic_set_eoi_write(vo + + extern void apic_ack_irq(struct irq_data *data); + ++#ifdef CONFIG_IPIPE ++#ifdef CONFIG_SMP ++struct irq_data; ++void move_xxapic_irq(struct irq_data *data); ++#endif ++#define ack_APIC_irq() do { } while(0) ++static inline void __ack_APIC_irq(void) ++#else /* !CONFIG_IPIPE */ ++#define __ack_APIC_irq() ack_APIC_irq() + static inline void ack_APIC_irq(void) ++#endif /* CONFIG_IPIPE */ + { + /* + * ack_APIC_irq() actually gets compiled as a single instruction +diff -uprN kernel/arch/x86/include/asm/debugreg.h kernel_new/arch/x86/include/asm/debugreg.h +--- kernel/arch/x86/include/asm/debugreg.h 2020-12-21 21:59:17.000000000 +0800 ++++ kernel_new/arch/x86/include/asm/debugreg.h 2021-04-01 18:28:07.651863292 +0800 +@@ -94,7 +94,7 @@ extern void aout_dump_debugregs(struct u + + extern void hw_breakpoint_restore(void); + +-#ifdef CONFIG_X86_64 ++#if defined(CONFIG_X86_64) && !defined(CONFIG_IPIPE) + DECLARE_PER_CPU(int, debug_stack_usage); + static inline void debug_stack_usage_inc(void) + { +diff -uprN kernel/arch/x86/include/asm/desc.h kernel_new/arch/x86/include/asm/desc.h +--- kernel/arch/x86/include/asm/desc.h 2020-12-21 21:59:17.000000000 +0800 ++++ kernel_new/arch/x86/include/asm/desc.h 2021-04-01 18:28:07.651863292 +0800 +@@ -309,7 +309,7 @@ static inline void force_reload_TR(void) + */ + static inline void refresh_tss_limit(void) + { +- DEBUG_LOCKS_WARN_ON(preemptible()); ++ DEBUG_LOCKS_WARN_ON(!hard_irqs_disabled() && preemptible()); + + if (unlikely(this_cpu_read(__tss_limit_invalid))) + force_reload_TR(); +@@ -326,7 +326,7 @@ static inline void refresh_tss_limit(voi + */ + static inline void invalidate_tss_limit(void) + { +- DEBUG_LOCKS_WARN_ON(preemptible()); ++ DEBUG_LOCKS_WARN_ON(!hard_irqs_disabled() && preemptible()); + + if (unlikely(test_thread_flag(TIF_IO_BITMAP))) + force_reload_TR(); +@@ -391,7 +391,7 @@ void alloc_intr_gate(unsigned int n, con + + extern unsigned long system_vectors[]; + +-#ifdef CONFIG_X86_64 ++#if defined(CONFIG_X86_64) && !defined(CONFIG_IPIPE) + DECLARE_PER_CPU(u32, debug_idt_ctr); + static inline bool is_debug_idt_enabled(void) + { +diff -uprN kernel/arch/x86/include/asm/fpu/internal.h kernel_new/arch/x86/include/asm/fpu/internal.h +--- kernel/arch/x86/include/asm/fpu/internal.h 2020-12-21 21:59:17.000000000 +0800 ++++ kernel_new/arch/x86/include/asm/fpu/internal.h 2021-04-01 18:28:07.652863290 +0800 +@@ -607,4 +607,24 @@ static inline void xsetbv(u32 index, u64 + : : "a" (eax), "d" (edx), "c" (index)); + } + ++DECLARE_PER_CPU(bool, in_kernel_fpu); ++ ++static inline void kernel_fpu_disable(void) ++{ ++ WARN_ON_FPU(this_cpu_read(in_kernel_fpu)); ++ this_cpu_write(in_kernel_fpu, true); ++} ++ ++static inline void kernel_fpu_enable(void) ++{ ++ WARN_ON_FPU(!this_cpu_read(in_kernel_fpu)); ++ this_cpu_write(in_kernel_fpu, false); ++} ++ ++static inline bool kernel_fpu_disabled(void) ++{ ++ return this_cpu_read(in_kernel_fpu); ++} ++ ++ + #endif /* _ASM_X86_FPU_INTERNAL_H */ +diff -uprN kernel/arch/x86/include/asm/i8259.h kernel_new/arch/x86/include/asm/i8259.h +--- kernel/arch/x86/include/asm/i8259.h 2020-12-21 21:59:17.000000000 +0800 ++++ kernel_new/arch/x86/include/asm/i8259.h 2021-04-01 18:28:07.652863290 +0800 +@@ -26,7 +26,7 @@ extern unsigned int cached_irq_mask; + #define SLAVE_ICW4_DEFAULT 0x01 + #define PIC_ICW4_AEOI 2 + +-extern raw_spinlock_t i8259A_lock; ++IPIPE_DECLARE_RAW_SPINLOCK(i8259A_lock); + + /* the PIC may need a careful delay on some platforms, hence specific calls */ + static inline unsigned char inb_pic(unsigned int port) +diff -uprN kernel/arch/x86/include/asm/ipipe_base.h kernel_new/arch/x86/include/asm/ipipe_base.h +--- kernel/arch/x86/include/asm/ipipe_base.h 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/arch/x86/include/asm/ipipe_base.h 2021-04-01 18:28:07.652863290 +0800 +@@ -0,0 +1,156 @@ ++/* -*- linux-c -*- ++ * arch/x86/include/asm/ipipe_base.h ++ * ++ * Copyright (C) 2007-2012 Philippe Gerum. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, ++ * USA; either version 2 of the License, or (at your option) any later ++ * version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#ifndef __X86_IPIPE_BASE_H ++#define __X86_IPIPE_BASE_H ++ ++#include ++#include ++ ++#ifdef CONFIG_X86_32 ++/* 32 from IDT + iret_error + mayday trap */ ++#define IPIPE_TRAP_MAYDAY 33 /* Internal recovery trap */ ++#define IPIPE_NR_FAULTS 34 ++#else ++/* 32 from IDT + mayday trap */ ++#define IPIPE_TRAP_MAYDAY 32 /* Internal recovery trap */ ++#define IPIPE_NR_FAULTS 33 ++#endif ++ ++#ifdef CONFIG_X86_LOCAL_APIC ++/* ++ * Special APIC interrupts are mapped above the last defined external ++ * IRQ number. ++ */ ++#define nr_apic_vectors (NR_VECTORS - FIRST_SYSTEM_VECTOR) ++#define IPIPE_FIRST_APIC_IRQ NR_IRQS ++#define IPIPE_HRTIMER_IPI ipipe_apic_vector_irq(IPIPE_HRTIMER_VECTOR) ++#ifdef CONFIG_SMP ++#define IPIPE_RESCHEDULE_IPI ipipe_apic_vector_irq(IPIPE_RESCHEDULE_VECTOR) ++#define IPIPE_CRITICAL_IPI ipipe_apic_vector_irq(IPIPE_CRITICAL_VECTOR) ++#endif /* CONFIG_SMP */ ++#define IPIPE_NR_XIRQS (NR_IRQS + nr_apic_vectors) ++#define ipipe_apic_irq_vector(irq) ((irq) - IPIPE_FIRST_APIC_IRQ + FIRST_SYSTEM_VECTOR) ++#define ipipe_apic_vector_irq(vec) ((vec) - FIRST_SYSTEM_VECTOR + IPIPE_FIRST_APIC_IRQ) ++#else ++#define IPIPE_NR_XIRQS NR_IRQS ++#endif /* !CONFIG_X86_LOCAL_APIC */ ++ ++#ifndef __ASSEMBLY__ ++ ++#include ++ ++extern unsigned int cpu_khz; ++ ++static inline const char *ipipe_clock_name(void) ++{ ++ return "tsc"; ++} ++ ++#define __ipipe_cpu_freq ({ u64 __freq = 1000ULL * cpu_khz; __freq; }) ++#define __ipipe_hrclock_freq __ipipe_cpu_freq ++ ++#ifdef CONFIG_X86_32 ++ ++#define ipipe_read_tsc(t) \ ++ __asm__ __volatile__("rdtsc" : "=A"(t)) ++ ++#define ipipe_tsc2ns(t) \ ++({ \ ++ unsigned long long delta = (t) * 1000000ULL; \ ++ unsigned long long freq = __ipipe_hrclock_freq; \ ++ do_div(freq, 1000); \ ++ do_div(delta, (unsigned)freq + 1); \ ++ (unsigned long)delta; \ ++}) ++ ++#define ipipe_tsc2us(t) \ ++({ \ ++ unsigned long long delta = (t) * 1000ULL; \ ++ unsigned long long freq = __ipipe_hrclock_freq; \ ++ do_div(freq, 1000); \ ++ do_div(delta, (unsigned)freq + 1); \ ++ (unsigned long)delta; \ ++}) ++ ++static inline unsigned long __ipipe_ffnz(unsigned long ul) ++{ ++ __asm__("bsrl %1, %0":"=r"(ul) : "r"(ul)); ++ return ul; ++} ++ ++#else /* X86_64 */ ++ ++#define ipipe_read_tsc(t) do { \ ++ unsigned int __a,__d; \ ++ asm volatile("rdtsc" : "=a" (__a), "=d" (__d)); \ ++ (t) = ((unsigned long)__a) | (((unsigned long)__d)<<32); \ ++} while(0) ++ ++#define ipipe_tsc2ns(t) (((t) * 1000UL) / (__ipipe_hrclock_freq / 1000000UL)) ++#define ipipe_tsc2us(t) ((t) / (__ipipe_hrclock_freq / 1000000UL)) ++ ++static inline unsigned long __ipipe_ffnz(unsigned long ul) ++{ ++ __asm__("bsrq %1, %0":"=r"(ul) ++ : "rm"(ul)); ++ return ul; ++} ++ ++#ifdef CONFIG_IA32_EMULATION ++#define ipipe_root_nr_syscalls(ti) \ ++ ((ti->status & TS_COMPAT) ? IA32_NR_syscalls : NR_syscalls) ++#endif /* CONFIG_IA32_EMULATION */ ++ ++#endif /* X86_64 */ ++ ++struct pt_regs; ++struct irq_desc; ++struct ipipe_vm_notifier; ++ ++static inline unsigned __ipipe_get_irq_vector(int irq) ++{ ++#ifdef CONFIG_X86_IO_APIC ++ unsigned int __ipipe_get_ioapic_irq_vector(int irq); ++ return __ipipe_get_ioapic_irq_vector(irq); ++#elif defined(CONFIG_X86_LOCAL_APIC) ++ return irq >= IPIPE_FIRST_APIC_IRQ ? ++ ipipe_apic_irq_vector(irq) : ISA_IRQ_VECTOR(irq); ++#else ++ return ISA_IRQ_VECTOR(irq); ++#endif ++} ++ ++void ipipe_hrtimer_interrupt(void); ++ ++void ipipe_reschedule_interrupt(void); ++ ++void ipipe_critical_interrupt(void); ++ ++int __ipipe_handle_irq(struct pt_regs *regs); ++ ++void __ipipe_handle_vm_preemption(struct ipipe_vm_notifier *nfy); ++ ++extern int __ipipe_hrtimer_irq; ++ ++#endif /* !__ASSEMBLY__ */ ++ ++#endif /* !__X86_IPIPE_BASE_H */ +diff -uprN kernel/arch/x86/include/asm/ipipe.h kernel_new/arch/x86/include/asm/ipipe.h +--- kernel/arch/x86/include/asm/ipipe.h 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/arch/x86/include/asm/ipipe.h 2021-04-01 18:28:07.652863290 +0800 +@@ -0,0 +1,70 @@ ++/* -*- linux-c -*- ++ * arch/x86/include/asm/ipipe.h ++ * ++ * Copyright (C) 2007 Philippe Gerum. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, ++ * USA; either version 2 of the License, or (at your option) any later ++ * version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#ifndef __X86_IPIPE_H ++#define __X86_IPIPE_H ++ ++#ifdef CONFIG_IPIPE ++ ++#define IPIPE_CORE_RELEASE 9 ++ ++struct ipipe_domain; ++ ++struct ipipe_arch_sysinfo { ++}; ++ ++#define ipipe_processor_id() raw_smp_processor_id() ++ ++/* Private interface -- Internal use only */ ++ ++#define __ipipe_early_core_setup() do { } while(0) ++ ++#define __ipipe_enable_irq(irq) irq_to_desc(irq)->chip->enable(irq) ++#define __ipipe_disable_irq(irq) irq_to_desc(irq)->chip->disable(irq) ++ ++#ifdef CONFIG_SMP ++void __ipipe_hook_critical_ipi(struct ipipe_domain *ipd); ++#else ++#define __ipipe_hook_critical_ipi(ipd) do { } while(0) ++#endif ++ ++void __ipipe_enable_pipeline(void); ++ ++#define __ipipe_root_tick_p(regs) ((regs)->flags & X86_EFLAGS_IF) ++ ++#define ipipe_notify_root_preemption() __ipipe_notify_vm_preemption() ++ ++#endif /* CONFIG_IPIPE */ ++ ++#if defined(CONFIG_SMP) && defined(CONFIG_IPIPE) ++#define __ipipe_move_root_irq(__desc) \ ++ do { \ ++ if (!IS_ERR_OR_NULL(__desc)) { \ ++ struct irq_chip *__chip = irq_desc_get_chip(__desc); \ ++ if (__chip->irq_move) \ ++ __chip->irq_move(irq_desc_get_irq_data(__desc)); \ ++ } \ ++ } while (0) ++#else /* !(CONFIG_SMP && CONFIG_IPIPE) */ ++#define __ipipe_move_root_irq(irq) do { } while (0) ++#endif /* !(CONFIG_SMP && CONFIG_IPIPE) */ ++ ++#endif /* !__X86_IPIPE_H */ +diff -uprN kernel/arch/x86/include/asm/irqflags.h kernel_new/arch/x86/include/asm/irqflags.h +--- kernel/arch/x86/include/asm/irqflags.h 2020-12-21 21:59:17.000000000 +0800 ++++ kernel_new/arch/x86/include/asm/irqflags.h 2021-04-01 18:28:07.652863290 +0800 +@@ -8,6 +8,10 @@ + + #include + ++#include ++#include ++#include ++ + /* Provide __cpuidle; we can't safely include */ + #define __cpuidle __attribute__((__section__(".cpuidle.text"))) + +@@ -66,14 +70,76 @@ static inline __cpuidle void native_halt + asm volatile("hlt": : :"memory"); + } + ++static inline int native_irqs_disabled(void) ++{ ++ unsigned long flags = native_save_fl(); ++ ++ return !(flags & X86_EFLAGS_IF); ++} ++ + #endif + + #ifdef CONFIG_PARAVIRT + #include ++#define HARD_COND_ENABLE_INTERRUPTS ++#define HARD_COND_DISABLE_INTERRUPTS + #else + #ifndef __ASSEMBLY__ + #include + ++#ifdef CONFIG_IPIPE ++ ++void __ipipe_halt_root(int use_mwait); ++ ++static inline notrace unsigned long arch_local_save_flags(void) ++{ ++ unsigned long flags; ++ ++ flags = (!ipipe_test_root()) << 9; ++ barrier(); ++ return flags; ++} ++ ++static inline notrace void arch_local_irq_restore(unsigned long flags) ++{ ++ barrier(); ++ ipipe_restore_root(!(flags & X86_EFLAGS_IF)); ++} ++ ++static inline notrace void arch_local_irq_disable(void) ++{ ++ ipipe_stall_root(); ++ barrier(); ++} ++ ++static inline notrace void arch_local_irq_enable(void) ++{ ++ barrier(); ++ ipipe_unstall_root(); ++} ++ ++static inline __cpuidle void arch_safe_halt(void) ++{ ++ barrier(); ++ __ipipe_halt_root(0); ++} ++ ++/* Merge virtual+real interrupt mask bits into a single word. */ ++static inline unsigned long arch_mangle_irq_bits(int virt, unsigned long real) ++{ ++ return (real & ~(1L << 31)) | ((unsigned long)(virt != 0) << 31); ++} ++ ++/* Converse operation of arch_mangle_irq_bits() */ ++static inline int arch_demangle_irq_bits(unsigned long *x) ++{ ++ int virt = (*x & (1L << 31)) != 0; ++ *x &= ~(1L << 31); ++ return virt; ++} ++ ++#else /* !CONFIG_IPIPE */ ++ + static inline notrace unsigned long arch_local_save_flags(void) + { + return native_save_fl(); +@@ -103,6 +169,8 @@ static inline __cpuidle void arch_safe_h + native_safe_halt(); + } + ++#endif /* !CONFIG_IPIPE */ ++ + /* + * Used when interrupts are already enabled or to + * shutdown the processor: +@@ -126,6 +194,14 @@ static inline notrace unsigned long arch + #define ENABLE_INTERRUPTS(x) sti + #define DISABLE_INTERRUPTS(x) cli + ++#ifdef CONFIG_IPIPE ++#define HARD_COND_ENABLE_INTERRUPTS sti ++#define HARD_COND_DISABLE_INTERRUPTS cli ++#else /* !CONFIG_IPIPE */ ++#define HARD_COND_ENABLE_INTERRUPTS ++#define HARD_COND_DISABLE_INTERRUPTS ++#endif /* !CONFIG_IPIPE */ ++ + #ifdef CONFIG_X86_64 + #define SWAPGS swapgs + /* +@@ -174,40 +250,156 @@ static inline int arch_irqs_disabled(voi + + return arch_irqs_disabled_flags(flags); + } ++ ++#ifdef CONFIG_IPIPE ++ ++static inline unsigned long hard_local_irq_save_notrace(void) ++{ ++ unsigned long flags; ++ ++ flags = native_save_fl(); ++ native_irq_disable(); ++ ++ return flags; ++} ++ ++static inline void hard_local_irq_restore_notrace(unsigned long flags) ++{ ++ native_restore_fl(flags); ++} ++ ++static inline void hard_local_irq_disable_notrace(void) ++{ ++ native_irq_disable(); ++} ++ ++static inline void hard_local_irq_enable_notrace(void) ++{ ++ native_irq_enable(); ++} ++ ++static inline int hard_irqs_disabled(void) ++{ ++ return native_irqs_disabled(); ++} ++ ++#define hard_irqs_disabled_flags(flags) arch_irqs_disabled_flags(flags) ++ ++#ifdef CONFIG_IPIPE_TRACE_IRQSOFF ++ ++static inline void hard_local_irq_disable(void) ++{ ++ if (!native_irqs_disabled()) { ++ native_irq_disable(); ++ ipipe_trace_begin(0x80000000); ++ } ++} ++ ++static inline void hard_local_irq_enable(void) ++{ ++ if (native_irqs_disabled()) { ++ ipipe_trace_end(0x80000000); ++ native_irq_enable(); ++ } ++} ++ ++static inline unsigned long hard_local_irq_save(void) ++{ ++ unsigned long flags; ++ ++ flags = native_save_fl(); ++ if (flags & X86_EFLAGS_IF) { ++ native_irq_disable(); ++ ipipe_trace_begin(0x80000001); ++ } ++ ++ return flags; ++} ++ ++static inline void hard_local_irq_restore(unsigned long flags) ++{ ++ if (flags & X86_EFLAGS_IF) ++ ipipe_trace_end(0x80000001); ++ ++ native_restore_fl(flags); ++} ++ ++#else /* !CONFIG_IPIPE_TRACE_IRQSOFF */ ++ ++static inline unsigned long hard_local_irq_save(void) ++{ ++ return hard_local_irq_save_notrace(); ++} ++ ++static inline void hard_local_irq_restore(unsigned long flags) ++{ ++ hard_local_irq_restore_notrace(flags); ++} ++ ++static inline void hard_local_irq_enable(void) ++{ ++ hard_local_irq_enable_notrace(); ++} ++ ++static inline void hard_local_irq_disable(void) ++{ ++ hard_local_irq_disable_notrace(); ++} ++ ++#endif /* CONFIG_IPIPE_TRACE_IRQSOFF */ ++ ++static inline unsigned long hard_local_save_flags(void) ++{ ++ return native_save_fl(); ++} ++ ++#endif /* CONFIG_IPIPE */ ++ + #endif /* !__ASSEMBLY__ */ + + #ifdef __ASSEMBLY__ + #ifdef CONFIG_TRACE_IRQFLAGS + # define TRACE_IRQS_ON call trace_hardirqs_on_thunk; ++#ifdef CONFIG_IPIPE ++# define TRACE_IRQS_ON_VIRT call trace_hardirqs_on_virt_thunk; ++#else ++# define TRACE_IRQS_ON_VIRT TRACE_IRQS_ON ++#endif + # define TRACE_IRQS_OFF call trace_hardirqs_off_thunk; + #else + # define TRACE_IRQS_ON ++# define TRACE_IRQS_ON_VIRT + # define TRACE_IRQS_OFF + #endif + #ifdef CONFIG_DEBUG_LOCK_ALLOC + # ifdef CONFIG_X86_64 +-# define LOCKDEP_SYS_EXIT call lockdep_sys_exit_thunk ++# define LOCKDEP_SYS_EXIT call lockdep_sys_exit_thunk + # define LOCKDEP_SYS_EXIT_IRQ \ + TRACE_IRQS_ON; \ + sti; \ + call lockdep_sys_exit_thunk; \ + cli; \ + TRACE_IRQS_OFF; ++ + # else +-# define LOCKDEP_SYS_EXIT \ ++# define LOCKDEP_SYS_EXIT \ + pushl %eax; \ + pushl %ecx; \ + pushl %edx; \ ++ pushfl; \ ++ sti; \ + call lockdep_sys_exit; \ ++ popfl; \ + popl %edx; \ + popl %ecx; \ + popl %eax; ++ + # define LOCKDEP_SYS_EXIT_IRQ + # endif + #else + # define LOCKDEP_SYS_EXIT + # define LOCKDEP_SYS_EXIT_IRQ + #endif +-#endif /* __ASSEMBLY__ */ + ++#endif /* __ASSEMBLY__ */ + #endif +diff -uprN kernel/arch/x86/include/asm/irq_vectors.h kernel_new/arch/x86/include/asm/irq_vectors.h +--- kernel/arch/x86/include/asm/irq_vectors.h 2020-12-21 21:59:17.000000000 +0800 ++++ kernel_new/arch/x86/include/asm/irq_vectors.h 2021-04-01 18:28:07.652863290 +0800 +@@ -106,13 +106,18 @@ + + #define LOCAL_TIMER_VECTOR 0xec + +-#define NR_VECTORS 256 ++/* Interrupt pipeline IPIs */ ++#define IPIPE_HRTIMER_VECTOR 0xeb ++#define IPIPE_RESCHEDULE_VECTOR 0xea ++#define IPIPE_CRITICAL_VECTOR 0xe9 ++ ++/* ++ * I-pipe: Lowest vector number which may be assigned to a special ++ * APIC IRQ. We must know this at build time. ++ */ ++#define FIRST_SYSTEM_VECTOR IPIPE_CRITICAL_VECTOR + +-#ifdef CONFIG_X86_LOCAL_APIC +-#define FIRST_SYSTEM_VECTOR LOCAL_TIMER_VECTOR +-#else +-#define FIRST_SYSTEM_VECTOR NR_VECTORS +-#endif ++#define NR_VECTORS 256 + + /* + * Size the maximum number of interrupts. +diff -uprN kernel/arch/x86/include/asm/mmu_context.h kernel_new/arch/x86/include/asm/mmu_context.h +--- kernel/arch/x86/include/asm/mmu_context.h 2020-12-21 21:59:17.000000000 +0800 ++++ kernel_new/arch/x86/include/asm/mmu_context.h 2021-04-01 18:28:07.652863290 +0800 +@@ -27,16 +27,16 @@ static inline void paravirt_activate_mm( + + DECLARE_STATIC_KEY_FALSE(rdpmc_always_available_key); + +-static inline void load_mm_cr4(struct mm_struct *mm) ++static inline void load_mm_cr4_irqsoff(struct mm_struct *mm) + { + if (static_branch_unlikely(&rdpmc_always_available_key) || + atomic_read(&mm->context.perf_rdpmc_allowed)) +- cr4_set_bits(X86_CR4_PCE); ++ cr4_set_bits_irqsoff(X86_CR4_PCE); + else +- cr4_clear_bits(X86_CR4_PCE); ++ cr4_clear_bits_irqsoff(X86_CR4_PCE); + } + #else +-static inline void load_mm_cr4(struct mm_struct *mm) {} ++static inline void load_mm_cr4_irqsoff(struct mm_struct *mm) {} + #endif + + #ifdef CONFIG_MODIFY_LDT_SYSCALL +@@ -173,7 +173,8 @@ static inline void switch_ldt(struct mm_ + load_mm_ldt(next); + #endif + +- DEBUG_LOCKS_WARN_ON(preemptible()); ++ DEBUG_LOCKS_WARN_ON(preemptible() && ++ (!IS_ENABLED(CONFIG_IPIPE) || !hard_irqs_disabled())); + } + + void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk); +@@ -213,6 +214,9 @@ extern void switch_mm_irqs_off(struct mm + struct task_struct *tsk); + #define switch_mm_irqs_off switch_mm_irqs_off + ++#define ipipe_switch_mm_head(prev, next, tsk) \ ++ switch_mm_irqs_off(prev, next, tsk) ++ + #define activate_mm(prev, next) \ + do { \ + paravirt_activate_mm((prev), (next)); \ +diff -uprN kernel/arch/x86/include/asm/thread_info.h kernel_new/arch/x86/include/asm/thread_info.h +--- kernel/arch/x86/include/asm/thread_info.h 2020-12-21 21:59:17.000000000 +0800 ++++ kernel_new/arch/x86/include/asm/thread_info.h 2021-04-01 18:28:07.652863290 +0800 +@@ -52,10 +52,15 @@ + struct task_struct; + #include + #include ++#include + + struct thread_info { + unsigned long flags; /* low level flags */ + u32 status; /* thread synchronous flags */ ++#ifdef CONFIG_IPIPE ++ unsigned long ipipe_flags; ++ struct ipipe_threadinfo ipipe_data; ++#endif + }; + + #define INIT_THREAD_INFO(tsk) \ +@@ -165,6 +170,17 @@ struct thread_info { + #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) + #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) + ++/* ti->ipipe_flags */ ++#define TIP_HEAD 0 /* Runs in head domain */ ++#define TIP_NOTIFY 1 /* Notify head domain about kernel events */ ++#define TIP_MAYDAY 2 /* MAYDAY call is pending */ ++#define TIP_USERINTRET 3 /* Notify on IRQ/trap return to root userspace */ ++ ++#define _TIP_HEAD (1 << TIP_HEAD) ++#define _TIP_NOTIFY (1 << TIP_NOTIFY) ++#define _TIP_MAYDAY (1 << TIP_MAYDAY) ++#define _TIP_USERINTRET (1 << TIP_USERINTRET) ++ + #define STACK_WARN (THREAD_SIZE/8) + + /* +diff -uprN kernel/arch/x86/include/asm/tlbflush.h kernel_new/arch/x86/include/asm/tlbflush.h +--- kernel/arch/x86/include/asm/tlbflush.h 2020-12-21 21:59:17.000000000 +0800 ++++ kernel_new/arch/x86/include/asm/tlbflush.h 2021-04-01 18:28:07.652863290 +0800 +@@ -304,26 +304,42 @@ static inline void __cr4_set(unsigned lo + } + + /* Set in this cpu's CR4. */ +-static inline void cr4_set_bits(unsigned long mask) ++static inline void cr4_set_bits_irqsoff(unsigned long mask) + { +- unsigned long cr4, flags; ++ unsigned long cr4; + +- local_irq_save(flags); + cr4 = this_cpu_read(cpu_tlbstate.cr4); + if ((cr4 | mask) != cr4) + __cr4_set(cr4 | mask); +- local_irq_restore(flags); + } + + /* Clear in this cpu's CR4. */ +-static inline void cr4_clear_bits(unsigned long mask) ++static inline void cr4_clear_bits_irqsoff(unsigned long mask) + { +- unsigned long cr4, flags; ++ unsigned long cr4; + +- local_irq_save(flags); + cr4 = this_cpu_read(cpu_tlbstate.cr4); + if ((cr4 & ~mask) != cr4) + __cr4_set(cr4 & ~mask); ++} ++ ++/* Set in this cpu's CR4. */ ++static inline void cr4_set_bits(unsigned long mask) ++{ ++ unsigned long flags; ++ ++ local_irq_save(flags); ++ cr4_set_bits_irqsoff(mask); ++ local_irq_restore(flags); ++} ++ ++/* Clear in this cpu's CR4. */ ++static inline void cr4_clear_bits(unsigned long mask) ++{ ++ unsigned long flags; ++ ++ local_irq_save(flags); ++ cr4_clear_bits_irqsoff(mask); + local_irq_restore(flags); + } + +diff -uprN kernel/arch/x86/include/asm/tsc.h kernel_new/arch/x86/include/asm/tsc.h +--- kernel/arch/x86/include/asm/tsc.h 2020-12-21 21:59:17.000000000 +0800 ++++ kernel_new/arch/x86/include/asm/tsc.h 2021-04-01 18:28:07.652863290 +0800 +@@ -15,6 +15,8 @@ + */ + typedef unsigned long long cycles_t; + ++extern struct clocksource clocksource_tsc; ++ + extern unsigned int cpu_khz; + extern unsigned int tsc_khz; + +diff -uprN kernel/arch/x86/include/asm/uaccess.h kernel_new/arch/x86/include/asm/uaccess.h +--- kernel/arch/x86/include/asm/uaccess.h 2020-12-21 21:59:17.000000000 +0800 ++++ kernel_new/arch/x86/include/asm/uaccess.h 2021-04-01 18:28:07.653863289 +0800 +@@ -7,6 +7,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -70,7 +71,7 @@ static inline bool __chk_range_not_ok(un + }) + + #ifdef CONFIG_DEBUG_ATOMIC_SLEEP +-# define WARN_ON_IN_IRQ() WARN_ON_ONCE(!in_task()) ++# define WARN_ON_IN_IRQ() WARN_ON_ONCE(ipipe_root_p && !in_task()) + #else + # define WARN_ON_IN_IRQ() + #endif +diff -uprN kernel/arch/x86/Kconfig kernel_new/arch/x86/Kconfig +--- kernel/arch/x86/Kconfig 2020-12-21 21:59:17.000000000 +0800 ++++ kernel_new/arch/x86/Kconfig 2021-04-01 18:28:07.653863289 +0800 +@@ -118,7 +118,7 @@ config X86 + select HAVE_ALIGNED_STRUCT_PAGE if SLUB + select HAVE_ARCH_AUDITSYSCALL + select HAVE_ARCH_HUGE_VMAP if X86_64 || X86_PAE +- select HAVE_ARCH_JUMP_LABEL ++ select HAVE_ARCH_JUMP_LABEL if !IPIPE + select HAVE_ARCH_KASAN if X86_64 + select HAVE_ARCH_KGDB + select HAVE_ARCH_MMAP_RND_BITS if MMU +@@ -134,7 +134,7 @@ config X86 + select HAVE_ARCH_WITHIN_STACK_FRAMES + select HAVE_CMPXCHG_DOUBLE + select HAVE_CMPXCHG_LOCAL +- select HAVE_CONTEXT_TRACKING if X86_64 ++ select HAVE_CONTEXT_TRACKING if X86_64 && !IPIPE + select HAVE_COPY_THREAD_TLS + select HAVE_C_RECORDMCOUNT + select HAVE_DEBUG_KMEMLEAK +@@ -155,6 +155,12 @@ config X86 + select HAVE_IOREMAP_PROT + select HAVE_IRQ_EXIT_ON_IRQ_STACK if X86_64 + select HAVE_IRQ_TIME_ACCOUNTING ++ select HAVE_IPIPE_SUPPORT if X86_64 ++ select HAVE_IPIPE_TRACER_SUPPORT ++ select IPIPE_HAVE_HOSTRT if IPIPE ++ select IPIPE_HAVE_SAFE_THREAD_INFO if IPIPE ++ select IPIPE_WANT_PTE_PINNING if IPIPE ++ select IPIPE_HAVE_VM_NOTIFIER if IPIPE + select HAVE_KERNEL_BZIP2 + select HAVE_KERNEL_GZIP + select HAVE_KERNEL_LZ4 +@@ -745,6 +751,7 @@ if HYPERVISOR_GUEST + + config PARAVIRT + bool "Enable paravirtualization code" ++ depends on !IPIPE + ---help--- + This changes the kernel so it can modify itself when it is run + under a hypervisor, potentially improving performance significantly +@@ -934,7 +941,7 @@ config CALGARY_IOMMU_ENABLED_BY_DEFAULT + + config MAXSMP + bool "Enable Maximum number of SMP Processors and NUMA Nodes" +- depends on X86_64 && SMP && DEBUG_KERNEL ++ depends on X86_64 && SMP && DEBUG_KERNEL && !IPIPE + select CPUMASK_OFFSTACK + ---help--- + Enable maximum number of CPUS and NUMA Nodes for this architecture. +@@ -1034,6 +1041,8 @@ config SCHED_MC_PRIO + + If unsure say Y here. + ++source "kernel/ipipe/Kconfig" ++ + config UP_LATE_INIT + def_bool y + depends on !SMP && X86_LOCAL_APIC +diff -uprN kernel/arch/x86/Kconfig.orig kernel_new/arch/x86/Kconfig.orig +--- kernel/arch/x86/Kconfig.orig 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/arch/x86/Kconfig.orig 2020-12-21 21:59:17.000000000 +0800 +@@ -0,0 +1,2982 @@ ++# SPDX-License-Identifier: GPL-2.0 ++# Select 32 or 64 bit ++config 64BIT ++ bool "64-bit kernel" if "$(ARCH)" = "x86" ++ default "$(ARCH)" != "i386" ++ ---help--- ++ Say yes to build a 64-bit kernel - formerly known as x86_64 ++ Say no to build a 32-bit kernel - formerly known as i386 ++ ++config X86_32 ++ def_bool y ++ depends on !64BIT ++ # Options that are inherently 32-bit kernel only: ++ select ARCH_WANT_IPC_PARSE_VERSION ++ select CLKSRC_I8253 ++ select CLONE_BACKWARDS ++ select HAVE_GENERIC_DMA_COHERENT ++ select MODULES_USE_ELF_REL ++ select OLD_SIGACTION ++ ++config X86_64 ++ def_bool y ++ depends on 64BIT ++ # Options that are inherently 64-bit kernel only: ++ select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA ++ select ARCH_SUPPORTS_INT128 ++ select ARCH_USE_CMPXCHG_LOCKREF ++ select HAVE_ARCH_SOFT_DIRTY ++ select MODULES_USE_ELF_RELA ++ select NEED_DMA_MAP_STATE ++ select SWIOTLB ++ select X86_DEV_DMA_OPS ++ select ARCH_HAS_SYSCALL_WRAPPER ++ ++# ++# Arch settings ++# ++# ( Note that options that are marked 'if X86_64' could in principle be ++# ported to 32-bit as well. ) ++# ++config X86 ++ def_bool y ++ # ++ # Note: keep this list sorted alphabetically ++ # ++ select ACPI_LEGACY_TABLES_LOOKUP if ACPI ++ select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI ++ select ANON_INODES ++ select ARCH_32BIT_OFF_T if X86_32 ++ select ARCH_CLOCKSOURCE_DATA ++ select ARCH_DISCARD_MEMBLOCK ++ select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI ++ select ARCH_HAS_DEBUG_VIRTUAL ++ select ARCH_HAS_DEVMEM_IS_ALLOWED ++ select ARCH_HAS_ELF_RANDOMIZE ++ select ARCH_HAS_FAST_MULTIPLIER ++ select ARCH_HAS_FILTER_PGPROT ++ select ARCH_HAS_FORTIFY_SOURCE ++ select ARCH_HAS_GCOV_PROFILE_ALL ++ select ARCH_HAS_KCOV if X86_64 ++ select ARCH_HAS_MEMBARRIER_SYNC_CORE ++ select ARCH_HAS_PMEM_API if X86_64 ++ select ARCH_HAS_PTE_SPECIAL ++ select ARCH_HAS_REFCOUNT ++ select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64 ++ select ARCH_HAS_UACCESS_MCSAFE if X86_64 && X86_MCE ++ select ARCH_HAS_SET_MEMORY ++ select ARCH_HAS_SG_CHAIN ++ select ARCH_HAS_STRICT_KERNEL_RWX ++ select ARCH_HAS_STRICT_MODULE_RWX ++ select ARCH_HAS_SYNC_CORE_BEFORE_USERMODE ++ select ARCH_HAS_UBSAN_SANITIZE_ALL ++ select ARCH_HAS_ZONE_DEVICE if X86_64 ++ select ARCH_HAVE_NMI_SAFE_CMPXCHG ++ select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI ++ select ARCH_MIGHT_HAVE_PC_PARPORT ++ select ARCH_MIGHT_HAVE_PC_SERIO ++ select ARCH_SUPPORTS_ACPI ++ select ARCH_SUPPORTS_ATOMIC_RMW ++ select ARCH_SUPPORTS_NUMA_BALANCING if X86_64 ++ select ARCH_USE_BUILTIN_BSWAP ++ select ARCH_USE_QUEUED_RWLOCKS ++ select ARCH_USE_QUEUED_SPINLOCKS ++ select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH ++ select ARCH_WANTS_DYNAMIC_TASK_STRUCT ++ select ARCH_WANTS_THP_SWAP if X86_64 ++ select BUILDTIME_EXTABLE_SORT ++ select CLKEVT_I8253 ++ select CLOCKSOURCE_VALIDATE_LAST_CYCLE ++ select CLOCKSOURCE_WATCHDOG ++ select DCACHE_WORD_ACCESS ++ select DMA_DIRECT_OPS ++ select EDAC_ATOMIC_SCRUB ++ select EDAC_SUPPORT ++ select GENERIC_CLOCKEVENTS ++ select GENERIC_CLOCKEVENTS_BROADCAST if X86_64 || (X86_32 && X86_LOCAL_APIC) ++ select GENERIC_CLOCKEVENTS_MIN_ADJUST ++ select GENERIC_CMOS_UPDATE ++ select GENERIC_CPU_AUTOPROBE ++ select GENERIC_CPU_VULNERABILITIES ++ select GENERIC_EARLY_IOREMAP ++ select GENERIC_FIND_FIRST_BIT ++ select GENERIC_IOMAP ++ select GENERIC_IRQ_EFFECTIVE_AFF_MASK if SMP ++ select GENERIC_IRQ_MATRIX_ALLOCATOR if X86_LOCAL_APIC ++ select GENERIC_IRQ_MIGRATION if SMP ++ select GENERIC_IRQ_PROBE ++ select GENERIC_IRQ_RESERVATION_MODE ++ select GENERIC_IRQ_SHOW ++ select GENERIC_PENDING_IRQ if SMP ++ select GENERIC_SMP_IDLE_THREAD ++ select GENERIC_STRNCPY_FROM_USER ++ select GENERIC_STRNLEN_USER ++ select GENERIC_TIME_VSYSCALL ++ select HARDLOCKUP_CHECK_TIMESTAMP if X86_64 ++ select HAVE_ACPI_APEI if ACPI ++ select HAVE_ACPI_APEI_NMI if ACPI ++ select HAVE_ALIGNED_STRUCT_PAGE if SLUB ++ select HAVE_ARCH_AUDITSYSCALL ++ select HAVE_ARCH_HUGE_VMAP if X86_64 || X86_PAE ++ select HAVE_ARCH_JUMP_LABEL ++ select HAVE_ARCH_KASAN if X86_64 ++ select HAVE_ARCH_KGDB ++ select HAVE_ARCH_MMAP_RND_BITS if MMU ++ select HAVE_ARCH_MMAP_RND_COMPAT_BITS if MMU && COMPAT ++ select HAVE_ARCH_COMPAT_MMAP_BASES if MMU && COMPAT ++ select HAVE_ARCH_PREL32_RELOCATIONS ++ select HAVE_ARCH_SECCOMP_FILTER ++ select HAVE_ARCH_THREAD_STRUCT_WHITELIST ++ select HAVE_ARCH_TRACEHOOK ++ select HAVE_ARCH_TRANSPARENT_HUGEPAGE ++ select HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD if X86_64 ++ select HAVE_ARCH_VMAP_STACK if X86_64 ++ select HAVE_ARCH_WITHIN_STACK_FRAMES ++ select HAVE_CMPXCHG_DOUBLE ++ select HAVE_CMPXCHG_LOCAL ++ select HAVE_CONTEXT_TRACKING if X86_64 ++ select HAVE_COPY_THREAD_TLS ++ select HAVE_C_RECORDMCOUNT ++ select HAVE_DEBUG_KMEMLEAK ++ select HAVE_DEBUG_STACKOVERFLOW ++ select HAVE_DMA_CONTIGUOUS ++ select HAVE_DYNAMIC_FTRACE ++ select HAVE_DYNAMIC_FTRACE_WITH_REGS ++ select HAVE_EBPF_JIT ++ select HAVE_EFFICIENT_UNALIGNED_ACCESS ++ select HAVE_EXIT_THREAD ++ select HAVE_FENTRY if X86_64 || DYNAMIC_FTRACE ++ select HAVE_FTRACE_MCOUNT_RECORD ++ select HAVE_FUNCTION_GRAPH_TRACER ++ select HAVE_FUNCTION_TRACER ++ select HAVE_GCC_PLUGINS ++ select HAVE_HW_BREAKPOINT ++ select HAVE_IDE ++ select HAVE_IOREMAP_PROT ++ select HAVE_IRQ_EXIT_ON_IRQ_STACK if X86_64 ++ select HAVE_IRQ_TIME_ACCOUNTING ++ select HAVE_KERNEL_BZIP2 ++ select HAVE_KERNEL_GZIP ++ select HAVE_KERNEL_LZ4 ++ select HAVE_KERNEL_LZMA ++ select HAVE_KERNEL_LZO ++ select HAVE_KERNEL_XZ ++ select HAVE_KPROBES ++ select HAVE_KPROBES_ON_FTRACE ++ select HAVE_FUNCTION_ERROR_INJECTION ++ select HAVE_KRETPROBES ++ select HAVE_KVM ++ select HAVE_LIVEPATCH_FTRACE if X86_64 ++ select HAVE_LIVEPATCH_WO_FTRACE if X86_64 ++ select HAVE_MEMBLOCK ++ select HAVE_MEMBLOCK_NODE_MAP ++ select HAVE_MIXED_BREAKPOINTS_REGS ++ select HAVE_MOD_ARCH_SPECIFIC ++ select HAVE_NMI ++ select HAVE_OPROFILE ++ select HAVE_OPTPROBES ++ select HAVE_PCSPKR_PLATFORM ++ select HAVE_PERF_EVENTS ++ select HAVE_PERF_EVENTS_NMI ++ select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI ++ select HAVE_PERF_REGS ++ select HAVE_PERF_USER_STACK_DUMP ++ select HAVE_RCU_TABLE_FREE if PARAVIRT ++ select HAVE_RCU_TABLE_INVALIDATE if HAVE_RCU_TABLE_FREE ++ select HAVE_REGS_AND_STACK_ACCESS_API ++ select HAVE_RELIABLE_STACKTRACE if X86_64 && (UNWINDER_FRAME_POINTER || UNWINDER_ORC) && STACK_VALIDATION ++ select HAVE_STACKPROTECTOR if CC_HAS_SANE_STACKPROTECTOR ++ select HAVE_STACK_VALIDATION if X86_64 ++ select HAVE_RSEQ ++ select HAVE_SYSCALL_TRACEPOINTS ++ select HAVE_UNSTABLE_SCHED_CLOCK ++ select HAVE_USER_RETURN_NOTIFIER ++ select HOTPLUG_SMT if SMP ++ select IRQ_FORCED_THREADING ++ select NEED_SG_DMA_LENGTH ++ select PCI_LOCKLESS_CONFIG ++ select PERF_EVENTS ++ select RTC_LIB ++ select RTC_MC146818_LIB ++ select SPARSE_IRQ ++ select SRCU ++ select SYSCTL_EXCEPTION_TRACE ++ select THREAD_INFO_IN_TASK ++ select USER_STACKTRACE_SUPPORT ++ select VIRT_TO_BUS ++ select X86_FEATURE_NAMES if PROC_FS ++ ++config INSTRUCTION_DECODER ++ def_bool y ++ depends on KPROBES || PERF_EVENTS || UPROBES ++ ++config OUTPUT_FORMAT ++ string ++ default "elf32-i386" if X86_32 ++ default "elf64-x86-64" if X86_64 ++ ++config ARCH_DEFCONFIG ++ string ++ default "arch/x86/configs/i386_defconfig" if X86_32 ++ default "arch/x86/configs/x86_64_defconfig" if X86_64 ++ ++config LOCKDEP_SUPPORT ++ def_bool y ++ ++config STACKTRACE_SUPPORT ++ def_bool y ++ ++config MMU ++ def_bool y ++ ++config ARCH_MMAP_RND_BITS_MIN ++ default 28 if 64BIT ++ default 8 ++ ++config ARCH_MMAP_RND_BITS_MAX ++ default 32 if 64BIT ++ default 16 ++ ++config ARCH_MMAP_RND_COMPAT_BITS_MIN ++ default 8 ++ ++config ARCH_MMAP_RND_COMPAT_BITS_MAX ++ default 16 ++ ++config SBUS ++ bool ++ ++config GENERIC_ISA_DMA ++ def_bool y ++ depends on ISA_DMA_API ++ ++config GENERIC_BUG ++ def_bool y ++ depends on BUG ++ select GENERIC_BUG_RELATIVE_POINTERS if X86_64 ++ ++config GENERIC_BUG_RELATIVE_POINTERS ++ bool ++ ++config GENERIC_HWEIGHT ++ def_bool y ++ ++config ARCH_MAY_HAVE_PC_FDC ++ def_bool y ++ depends on ISA_DMA_API ++ ++config RWSEM_XCHGADD_ALGORITHM ++ def_bool y ++ ++config GENERIC_CALIBRATE_DELAY ++ def_bool y ++ ++config ARCH_HAS_CPU_RELAX ++ def_bool y ++ ++config ARCH_HAS_CACHE_LINE_SIZE ++ def_bool y ++ ++config ARCH_HAS_FILTER_PGPROT ++ def_bool y ++ ++config HAVE_SETUP_PER_CPU_AREA ++ def_bool y ++ ++config NEED_PER_CPU_EMBED_FIRST_CHUNK ++ def_bool y ++ ++config NEED_PER_CPU_PAGE_FIRST_CHUNK ++ def_bool y ++ ++config ARCH_HIBERNATION_POSSIBLE ++ def_bool y ++ ++config ARCH_SUSPEND_POSSIBLE ++ def_bool y ++ ++config ARCH_WANT_HUGE_PMD_SHARE ++ def_bool y ++ ++config ARCH_WANT_GENERAL_HUGETLB ++ def_bool y ++ ++config ZONE_DMA32 ++ def_bool y if X86_64 ++ ++config AUDIT_ARCH ++ def_bool y if X86_64 ++ ++config ARCH_SUPPORTS_OPTIMIZED_INLINING ++ def_bool y ++ ++config ARCH_SUPPORTS_DEBUG_PAGEALLOC ++ def_bool y ++ ++config KASAN_SHADOW_OFFSET ++ hex ++ depends on KASAN ++ default 0xdffffc0000000000 ++ ++config HAVE_INTEL_TXT ++ def_bool y ++ depends on INTEL_IOMMU && ACPI ++ ++config X86_32_SMP ++ def_bool y ++ depends on X86_32 && SMP ++ ++config X86_64_SMP ++ def_bool y ++ depends on X86_64 && SMP ++ ++config X86_32_LAZY_GS ++ def_bool y ++ depends on X86_32 && !STACKPROTECTOR ++ ++config ARCH_SUPPORTS_UPROBES ++ def_bool y ++ ++config FIX_EARLYCON_MEM ++ def_bool y ++ ++config DYNAMIC_PHYSICAL_MASK ++ bool ++ ++config PGTABLE_LEVELS ++ int ++ default 5 if X86_5LEVEL ++ default 4 if X86_64 ++ default 3 if X86_PAE ++ default 2 ++ ++config CC_HAS_SANE_STACKPROTECTOR ++ bool ++ default $(success,$(srctree)/scripts/gcc-x86_64-has-stack-protector.sh $(CC)) if 64BIT ++ default $(success,$(srctree)/scripts/gcc-x86_32-has-stack-protector.sh $(CC)) ++ help ++ We have to make sure stack protector is unconditionally disabled if ++ the compiler produces broken code. ++ ++menu "Processor type and features" ++ ++config ZONE_DMA ++ bool "DMA memory allocation support" if EXPERT ++ default y ++ help ++ DMA memory allocation support allows devices with less than 32-bit ++ addressing to allocate within the first 16MB of address space. ++ Disable if no such devices will be used. ++ ++ If unsure, say Y. ++ ++config SMP ++ bool "Symmetric multi-processing support" ++ ---help--- ++ This enables support for systems with more than one CPU. If you have ++ a system with only one CPU, say N. If you have a system with more ++ than one CPU, say Y. ++ ++ If you say N here, the kernel will run on uni- and multiprocessor ++ machines, but will use only one CPU of a multiprocessor machine. If ++ you say Y here, the kernel will run on many, but not all, ++ uniprocessor machines. On a uniprocessor machine, the kernel ++ will run faster if you say N here. ++ ++ Note that if you say Y here and choose architecture "586" or ++ "Pentium" under "Processor family", the kernel will not work on 486 ++ architectures. Similarly, multiprocessor kernels for the "PPro" ++ architecture may not work on all Pentium based boards. ++ ++ People using multiprocessor machines who say Y here should also say ++ Y to "Enhanced Real Time Clock Support", below. The "Advanced Power ++ Management" code will be disabled if you say Y here. ++ ++ See also , ++ and the SMP-HOWTO available at ++ . ++ ++ If you don't know what to do here, say N. ++ ++config X86_FEATURE_NAMES ++ bool "Processor feature human-readable names" if EMBEDDED ++ default y ++ ---help--- ++ This option compiles in a table of x86 feature bits and corresponding ++ names. This is required to support /proc/cpuinfo and a few kernel ++ messages. You can disable this to save space, at the expense of ++ making those few kernel messages show numeric feature bits instead. ++ ++ If in doubt, say Y. ++ ++config X86_X2APIC ++ bool "Support x2apic" ++ depends on X86_LOCAL_APIC && X86_64 && (IRQ_REMAP || HYPERVISOR_GUEST) ++ ---help--- ++ This enables x2apic support on CPUs that have this feature. ++ ++ This allows 32-bit apic IDs (so it can support very large systems), ++ and accesses the local apic via MSRs not via mmio. ++ ++ If you don't know what to do here, say N. ++ ++config X86_MPPARSE ++ bool "Enable MPS table" if ACPI || SFI ++ default y ++ depends on X86_LOCAL_APIC ++ ---help--- ++ For old smp systems that do not have proper acpi support. Newer systems ++ (esp with 64bit cpus) with acpi support, MADT and DSDT will override it ++ ++config GOLDFISH ++ def_bool y ++ depends on X86_GOLDFISH ++ ++config RETPOLINE ++ bool "Avoid speculative indirect branches in kernel" ++ default y ++ select STACK_VALIDATION if HAVE_STACK_VALIDATION ++ help ++ Compile kernel with the retpoline compiler options to guard against ++ kernel-to-user data leaks by avoiding speculative indirect ++ branches. Requires a compiler with -mindirect-branch=thunk-extern ++ support for full protection. The kernel may run slower. ++ ++config INTEL_RDT ++ bool "Intel Resource Director Technology support" ++ default n ++ depends on X86 && CPU_SUP_INTEL ++ select KERNFS ++ help ++ Select to enable resource allocation and monitoring which are ++ sub-features of Intel Resource Director Technology(RDT). More ++ information about RDT can be found in the Intel x86 ++ Architecture Software Developer Manual. ++ ++ Say N if unsure. ++ ++if X86_32 ++config X86_BIGSMP ++ bool "Support for big SMP systems with more than 8 CPUs" ++ depends on SMP ++ ---help--- ++ This option is needed for the systems that have more than 8 CPUs ++ ++config X86_EXTENDED_PLATFORM ++ bool "Support for extended (non-PC) x86 platforms" ++ default y ++ ---help--- ++ If you disable this option then the kernel will only support ++ standard PC platforms. (which covers the vast majority of ++ systems out there.) ++ ++ If you enable this option then you'll be able to select support ++ for the following (non-PC) 32 bit x86 platforms: ++ Goldfish (Android emulator) ++ AMD Elan ++ RDC R-321x SoC ++ SGI 320/540 (Visual Workstation) ++ STA2X11-based (e.g. Northville) ++ Moorestown MID devices ++ ++ If you have one of these systems, or if you want to build a ++ generic distribution kernel, say Y here - otherwise say N. ++endif ++ ++if X86_64 ++config X86_EXTENDED_PLATFORM ++ bool "Support for extended (non-PC) x86 platforms" ++ default y ++ ---help--- ++ If you disable this option then the kernel will only support ++ standard PC platforms. (which covers the vast majority of ++ systems out there.) ++ ++ If you enable this option then you'll be able to select support ++ for the following (non-PC) 64 bit x86 platforms: ++ Numascale NumaChip ++ ScaleMP vSMP ++ SGI Ultraviolet ++ ++ If you have one of these systems, or if you want to build a ++ generic distribution kernel, say Y here - otherwise say N. ++endif ++# This is an alphabetically sorted list of 64 bit extended platforms ++# Please maintain the alphabetic order if and when there are additions ++config X86_NUMACHIP ++ bool "Numascale NumaChip" ++ depends on X86_64 ++ depends on X86_EXTENDED_PLATFORM ++ depends on NUMA ++ depends on SMP ++ depends on X86_X2APIC ++ depends on PCI_MMCONFIG ++ ---help--- ++ Adds support for Numascale NumaChip large-SMP systems. Needed to ++ enable more than ~168 cores. ++ If you don't have one of these, you should say N here. ++ ++config X86_VSMP ++ bool "ScaleMP vSMP" ++ select HYPERVISOR_GUEST ++ select PARAVIRT ++ depends on X86_64 && PCI ++ depends on X86_EXTENDED_PLATFORM ++ depends on SMP ++ ---help--- ++ Support for ScaleMP vSMP systems. Say 'Y' here if this kernel is ++ supposed to run on these EM64T-based machines. Only choose this option ++ if you have one of these machines. ++ ++config X86_UV ++ bool "SGI Ultraviolet" ++ depends on X86_64 ++ depends on X86_EXTENDED_PLATFORM ++ depends on NUMA ++ depends on EFI ++ depends on X86_X2APIC ++ depends on PCI ++ ---help--- ++ This option is needed in order to support SGI Ultraviolet systems. ++ If you don't have one of these, you should say N here. ++ ++# Following is an alphabetically sorted list of 32 bit extended platforms ++# Please maintain the alphabetic order if and when there are additions ++ ++config X86_GOLDFISH ++ bool "Goldfish (Virtual Platform)" ++ depends on X86_EXTENDED_PLATFORM ++ ---help--- ++ Enable support for the Goldfish virtual platform used primarily ++ for Android development. Unless you are building for the Android ++ Goldfish emulator say N here. ++ ++config X86_INTEL_CE ++ bool "CE4100 TV platform" ++ depends on PCI ++ depends on PCI_GODIRECT ++ depends on X86_IO_APIC ++ depends on X86_32 ++ depends on X86_EXTENDED_PLATFORM ++ select X86_REBOOTFIXUPS ++ select OF ++ select OF_EARLY_FLATTREE ++ ---help--- ++ Select for the Intel CE media processor (CE4100) SOC. ++ This option compiles in support for the CE4100 SOC for settop ++ boxes and media devices. ++ ++config X86_INTEL_MID ++ bool "Intel MID platform support" ++ depends on X86_EXTENDED_PLATFORM ++ depends on X86_PLATFORM_DEVICES ++ depends on PCI ++ depends on X86_64 || (PCI_GOANY && X86_32) ++ depends on X86_IO_APIC ++ select SFI ++ select I2C ++ select DW_APB_TIMER ++ select APB_TIMER ++ select INTEL_SCU_IPC ++ select MFD_INTEL_MSIC ++ ---help--- ++ Select to build a kernel capable of supporting Intel MID (Mobile ++ Internet Device) platform systems which do not have the PCI legacy ++ interfaces. If you are building for a PC class system say N here. ++ ++ Intel MID platforms are based on an Intel processor and chipset which ++ consume less power than most of the x86 derivatives. ++ ++config X86_INTEL_QUARK ++ bool "Intel Quark platform support" ++ depends on X86_32 ++ depends on X86_EXTENDED_PLATFORM ++ depends on X86_PLATFORM_DEVICES ++ depends on X86_TSC ++ depends on PCI ++ depends on PCI_GOANY ++ depends on X86_IO_APIC ++ select IOSF_MBI ++ select INTEL_IMR ++ select COMMON_CLK ++ ---help--- ++ Select to include support for Quark X1000 SoC. ++ Say Y here if you have a Quark based system such as the Arduino ++ compatible Intel Galileo. ++ ++config X86_INTEL_LPSS ++ bool "Intel Low Power Subsystem Support" ++ depends on X86 && ACPI ++ select COMMON_CLK ++ select PINCTRL ++ select IOSF_MBI ++ ---help--- ++ Select to build support for Intel Low Power Subsystem such as ++ found on Intel Lynxpoint PCH. Selecting this option enables ++ things like clock tree (common clock framework) and pincontrol ++ which are needed by the LPSS peripheral drivers. ++ ++config X86_AMD_PLATFORM_DEVICE ++ bool "AMD ACPI2Platform devices support" ++ depends on ACPI ++ select COMMON_CLK ++ select PINCTRL ++ ---help--- ++ Select to interpret AMD specific ACPI device to platform device ++ such as I2C, UART, GPIO found on AMD Carrizo and later chipsets. ++ I2C and UART depend on COMMON_CLK to set clock. GPIO driver is ++ implemented under PINCTRL subsystem. ++ ++config IOSF_MBI ++ tristate "Intel SoC IOSF Sideband support for SoC platforms" ++ depends on PCI ++ ---help--- ++ This option enables sideband register access support for Intel SoC ++ platforms. On these platforms the IOSF sideband is used in lieu of ++ MSR's for some register accesses, mostly but not limited to thermal ++ and power. Drivers may query the availability of this device to ++ determine if they need the sideband in order to work on these ++ platforms. The sideband is available on the following SoC products. ++ This list is not meant to be exclusive. ++ - BayTrail ++ - Braswell ++ - Quark ++ ++ You should say Y if you are running a kernel on one of these SoC's. ++ ++config IOSF_MBI_DEBUG ++ bool "Enable IOSF sideband access through debugfs" ++ depends on IOSF_MBI && DEBUG_FS ++ ---help--- ++ Select this option to expose the IOSF sideband access registers (MCR, ++ MDR, MCRX) through debugfs to write and read register information from ++ different units on the SoC. This is most useful for obtaining device ++ state information for debug and analysis. As this is a general access ++ mechanism, users of this option would have specific knowledge of the ++ device they want to access. ++ ++ If you don't require the option or are in doubt, say N. ++ ++config X86_RDC321X ++ bool "RDC R-321x SoC" ++ depends on X86_32 ++ depends on X86_EXTENDED_PLATFORM ++ select M486 ++ select X86_REBOOTFIXUPS ++ ---help--- ++ This option is needed for RDC R-321x system-on-chip, also known ++ as R-8610-(G). ++ If you don't have one of these chips, you should say N here. ++ ++config X86_32_NON_STANDARD ++ bool "Support non-standard 32-bit SMP architectures" ++ depends on X86_32 && SMP ++ depends on X86_EXTENDED_PLATFORM ++ ---help--- ++ This option compiles in the bigsmp and STA2X11 default ++ subarchitectures. It is intended for a generic binary ++ kernel. If you select them all, kernel will probe it one by ++ one and will fallback to default. ++ ++# Alphabetically sorted list of Non standard 32 bit platforms ++ ++config X86_SUPPORTS_MEMORY_FAILURE ++ def_bool y ++ # MCE code calls memory_failure(): ++ depends on X86_MCE ++ # On 32-bit this adds too big of NODES_SHIFT and we run out of page flags: ++ # On 32-bit SPARSEMEM adds too big of SECTIONS_WIDTH: ++ depends on X86_64 || !SPARSEMEM ++ select ARCH_SUPPORTS_MEMORY_FAILURE ++ ++config STA2X11 ++ bool "STA2X11 Companion Chip Support" ++ depends on X86_32_NON_STANDARD && PCI ++ select ARCH_HAS_PHYS_TO_DMA ++ select X86_DEV_DMA_OPS ++ select X86_DMA_REMAP ++ select SWIOTLB ++ select MFD_STA2X11 ++ select GPIOLIB ++ default n ++ ---help--- ++ This adds support for boards based on the STA2X11 IO-Hub, ++ a.k.a. "ConneXt". The chip is used in place of the standard ++ PC chipset, so all "standard" peripherals are missing. If this ++ option is selected the kernel will still be able to boot on ++ standard PC machines. ++ ++config X86_32_IRIS ++ tristate "Eurobraille/Iris poweroff module" ++ depends on X86_32 ++ ---help--- ++ The Iris machines from EuroBraille do not have APM or ACPI support ++ to shut themselves down properly. A special I/O sequence is ++ needed to do so, which is what this module does at ++ kernel shutdown. ++ ++ This is only for Iris machines from EuroBraille. ++ ++ If unused, say N. ++ ++config SCHED_OMIT_FRAME_POINTER ++ def_bool y ++ prompt "Single-depth WCHAN output" ++ depends on X86 ++ ---help--- ++ Calculate simpler /proc//wchan values. If this option ++ is disabled then wchan values will recurse back to the ++ caller function. This provides more accurate wchan values, ++ at the expense of slightly more scheduling overhead. ++ ++ If in doubt, say "Y". ++ ++menuconfig HYPERVISOR_GUEST ++ bool "Linux guest support" ++ ---help--- ++ Say Y here to enable options for running Linux under various hyper- ++ visors. This option enables basic hypervisor detection and platform ++ setup. ++ ++ If you say N, all options in this submenu will be skipped and ++ disabled, and Linux guest support won't be built in. ++ ++if HYPERVISOR_GUEST ++ ++config PARAVIRT ++ bool "Enable paravirtualization code" ++ ---help--- ++ This changes the kernel so it can modify itself when it is run ++ under a hypervisor, potentially improving performance significantly ++ over full virtualization. However, when run without a hypervisor ++ the kernel is theoretically slower and slightly larger. ++ ++config PARAVIRT_DEBUG ++ bool "paravirt-ops debugging" ++ depends on PARAVIRT && DEBUG_KERNEL ++ ---help--- ++ Enable to debug paravirt_ops internals. Specifically, BUG if ++ a paravirt_op is missing when it is called. ++ ++config PARAVIRT_SPINLOCKS ++ bool "Paravirtualization layer for spinlocks" ++ depends on PARAVIRT && SMP ++ ---help--- ++ Paravirtualized spinlocks allow a pvops backend to replace the ++ spinlock implementation with something virtualization-friendly ++ (for example, block the virtual CPU rather than spinning). ++ ++ It has a minimal impact on native kernels and gives a nice performance ++ benefit on paravirtualized KVM / Xen kernels. ++ ++ If you are unsure how to answer this question, answer Y. ++ ++config QUEUED_LOCK_STAT ++ bool "Paravirt queued spinlock statistics" ++ depends on PARAVIRT_SPINLOCKS && DEBUG_FS ++ ---help--- ++ Enable the collection of statistical data on the slowpath ++ behavior of paravirtualized queued spinlocks and report ++ them on debugfs. ++ ++source "arch/x86/xen/Kconfig" ++ ++config KVM_GUEST ++ bool "KVM Guest support (including kvmclock)" ++ depends on PARAVIRT ++ select PARAVIRT_CLOCK ++ default y ++ ---help--- ++ This option enables various optimizations for running under the KVM ++ hypervisor. It includes a paravirtualized clock, so that instead ++ of relying on a PIT (or probably other) emulation by the ++ underlying device model, the host provides the guest with ++ timing infrastructure such as time of day, and system time ++ ++config KVM_DEBUG_FS ++ bool "Enable debug information for KVM Guests in debugfs" ++ depends on KVM_GUEST && DEBUG_FS ++ default n ++ ---help--- ++ This option enables collection of various statistics for KVM guest. ++ Statistics are displayed in debugfs filesystem. Enabling this option ++ may incur significant overhead. ++ ++config PARAVIRT_TIME_ACCOUNTING ++ bool "Paravirtual steal time accounting" ++ depends on PARAVIRT ++ default n ++ ---help--- ++ Select this option to enable fine granularity task steal time ++ accounting. Time spent executing other tasks in parallel with ++ the current vCPU is discounted from the vCPU power. To account for ++ that, there can be a small performance impact. ++ ++ If in doubt, say N here. ++ ++config PARAVIRT_CLOCK ++ bool ++ ++config JAILHOUSE_GUEST ++ bool "Jailhouse non-root cell support" ++ depends on X86_64 && PCI ++ select X86_PM_TIMER ++ ---help--- ++ This option allows to run Linux as guest in a Jailhouse non-root ++ cell. You can leave this option disabled if you only want to start ++ Jailhouse and run Linux afterwards in the root cell. ++ ++endif #HYPERVISOR_GUEST ++ ++config NO_BOOTMEM ++ def_bool y ++ ++source "arch/x86/Kconfig.cpu" ++ ++config HPET_TIMER ++ def_bool X86_64 ++ prompt "HPET Timer Support" if X86_32 ++ ---help--- ++ Use the IA-PC HPET (High Precision Event Timer) to manage ++ time in preference to the PIT and RTC, if a HPET is ++ present. ++ HPET is the next generation timer replacing legacy 8254s. ++ The HPET provides a stable time base on SMP ++ systems, unlike the TSC, but it is more expensive to access, ++ as it is off-chip. The interface used is documented ++ in the HPET spec, revision 1. ++ ++ You can safely choose Y here. However, HPET will only be ++ activated if the platform and the BIOS support this feature. ++ Otherwise the 8254 will be used for timing services. ++ ++ Choose N to continue using the legacy 8254 timer. ++ ++config HPET_EMULATE_RTC ++ def_bool y ++ depends on HPET_TIMER && (RTC=y || RTC=m || RTC_DRV_CMOS=m || RTC_DRV_CMOS=y) ++ ++config APB_TIMER ++ def_bool y if X86_INTEL_MID ++ prompt "Intel MID APB Timer Support" if X86_INTEL_MID ++ select DW_APB_TIMER ++ depends on X86_INTEL_MID && SFI ++ help ++ APB timer is the replacement for 8254, HPET on X86 MID platforms. ++ The APBT provides a stable time base on SMP ++ systems, unlike the TSC, but it is more expensive to access, ++ as it is off-chip. APB timers are always running regardless of CPU ++ C states, they are used as per CPU clockevent device when possible. ++ ++# Mark as expert because too many people got it wrong. ++# The code disables itself when not needed. ++config DMI ++ default y ++ select DMI_SCAN_MACHINE_NON_EFI_FALLBACK ++ bool "Enable DMI scanning" if EXPERT ++ ---help--- ++ Enabled scanning of DMI to identify machine quirks. Say Y ++ here unless you have verified that your setup is not ++ affected by entries in the DMI blacklist. Required by PNP ++ BIOS code. ++ ++config GART_IOMMU ++ bool "Old AMD GART IOMMU support" ++ select IOMMU_HELPER ++ select SWIOTLB ++ depends on X86_64 && PCI && AMD_NB ++ ---help--- ++ Provides a driver for older AMD Athlon64/Opteron/Turion/Sempron ++ GART based hardware IOMMUs. ++ ++ The GART supports full DMA access for devices with 32-bit access ++ limitations, on systems with more than 3 GB. This is usually needed ++ for USB, sound, many IDE/SATA chipsets and some other devices. ++ ++ Newer systems typically have a modern AMD IOMMU, supported via ++ the CONFIG_AMD_IOMMU=y config option. ++ ++ In normal configurations this driver is only active when needed: ++ there's more than 3 GB of memory and the system contains a ++ 32-bit limited device. ++ ++ If unsure, say Y. ++ ++config CALGARY_IOMMU ++ bool "IBM Calgary IOMMU support" ++ select IOMMU_HELPER ++ select SWIOTLB ++ depends on X86_64 && PCI ++ ---help--- ++ Support for hardware IOMMUs in IBM's xSeries x366 and x460 ++ systems. Needed to run systems with more than 3GB of memory ++ properly with 32-bit PCI devices that do not support DAC ++ (Double Address Cycle). Calgary also supports bus level ++ isolation, where all DMAs pass through the IOMMU. This ++ prevents them from going anywhere except their intended ++ destination. This catches hard-to-find kernel bugs and ++ mis-behaving drivers and devices that do not use the DMA-API ++ properly to set up their DMA buffers. The IOMMU can be ++ turned off at boot time with the iommu=off parameter. ++ Normally the kernel will make the right choice by itself. ++ If unsure, say Y. ++ ++config CALGARY_IOMMU_ENABLED_BY_DEFAULT ++ def_bool y ++ prompt "Should Calgary be enabled by default?" ++ depends on CALGARY_IOMMU ++ ---help--- ++ Should Calgary be enabled by default? if you choose 'y', Calgary ++ will be used (if it exists). If you choose 'n', Calgary will not be ++ used even if it exists. If you choose 'n' and would like to use ++ Calgary anyway, pass 'iommu=calgary' on the kernel command line. ++ If unsure, say Y. ++ ++config MAXSMP ++ bool "Enable Maximum number of SMP Processors and NUMA Nodes" ++ depends on X86_64 && SMP && DEBUG_KERNEL ++ select CPUMASK_OFFSTACK ++ ---help--- ++ Enable maximum number of CPUS and NUMA Nodes for this architecture. ++ If unsure, say N. ++ ++# ++# The maximum number of CPUs supported: ++# ++# The main config value is NR_CPUS, which defaults to NR_CPUS_DEFAULT, ++# and which can be configured interactively in the ++# [NR_CPUS_RANGE_BEGIN ... NR_CPUS_RANGE_END] range. ++# ++# The ranges are different on 32-bit and 64-bit kernels, depending on ++# hardware capabilities and scalability features of the kernel. ++# ++# ( If MAXSMP is enabled we just use the highest possible value and disable ++# interactive configuration. ) ++# ++ ++config NR_CPUS_RANGE_BEGIN ++ int ++ default NR_CPUS_RANGE_END if MAXSMP ++ default 1 if !SMP ++ default 2 ++ ++config NR_CPUS_RANGE_END ++ int ++ depends on X86_32 ++ default 64 if SMP && X86_BIGSMP ++ default 8 if SMP && !X86_BIGSMP ++ default 1 if !SMP ++ ++config NR_CPUS_RANGE_END ++ int ++ depends on X86_64 ++ default 8192 if SMP && ( MAXSMP || CPUMASK_OFFSTACK) ++ default 512 if SMP && (!MAXSMP && !CPUMASK_OFFSTACK) ++ default 1 if !SMP ++ ++config NR_CPUS_DEFAULT ++ int ++ depends on X86_32 ++ default 32 if X86_BIGSMP ++ default 8 if SMP ++ default 1 if !SMP ++ ++config NR_CPUS_DEFAULT ++ int ++ depends on X86_64 ++ default 8192 if MAXSMP ++ default 64 if SMP ++ default 1 if !SMP ++ ++config NR_CPUS ++ int "Maximum number of CPUs" if SMP && !MAXSMP ++ range NR_CPUS_RANGE_BEGIN NR_CPUS_RANGE_END ++ default NR_CPUS_DEFAULT ++ ---help--- ++ This allows you to specify the maximum number of CPUs which this ++ kernel will support. If CPUMASK_OFFSTACK is enabled, the maximum ++ supported value is 8192, otherwise the maximum value is 512. The ++ minimum value which makes sense is 2. ++ ++ This is purely to save memory: each supported CPU adds about 8KB ++ to the kernel image. ++ ++config SCHED_SMT ++ def_bool y if SMP ++ ++config SCHED_MC ++ def_bool y ++ prompt "Multi-core scheduler support" ++ depends on SMP ++ ---help--- ++ Multi-core scheduler support improves the CPU scheduler's decision ++ making when dealing with multi-core CPU chips at a cost of slightly ++ increased overhead in some places. If unsure say N here. ++ ++config SCHED_MC_PRIO ++ bool "CPU core priorities scheduler support" ++ depends on SCHED_MC && CPU_SUP_INTEL ++ select X86_INTEL_PSTATE ++ select CPU_FREQ ++ default y ++ ---help--- ++ Intel Turbo Boost Max Technology 3.0 enabled CPUs have a ++ core ordering determined at manufacturing time, which allows ++ certain cores to reach higher turbo frequencies (when running ++ single threaded workloads) than others. ++ ++ Enabling this kernel feature teaches the scheduler about ++ the TBM3 (aka ITMT) priority order of the CPU cores and adjusts the ++ scheduler's CPU selection logic accordingly, so that higher ++ overall system performance can be achieved. ++ ++ This feature will have no effect on CPUs without this feature. ++ ++ If unsure say Y here. ++ ++config UP_LATE_INIT ++ def_bool y ++ depends on !SMP && X86_LOCAL_APIC ++ ++config X86_UP_APIC ++ bool "Local APIC support on uniprocessors" if !PCI_MSI ++ default PCI_MSI ++ depends on X86_32 && !SMP && !X86_32_NON_STANDARD ++ ---help--- ++ A local APIC (Advanced Programmable Interrupt Controller) is an ++ integrated interrupt controller in the CPU. If you have a single-CPU ++ system which has a processor with a local APIC, you can say Y here to ++ enable and use it. If you say Y here even though your machine doesn't ++ have a local APIC, then the kernel will still run with no slowdown at ++ all. The local APIC supports CPU-generated self-interrupts (timer, ++ performance counters), and the NMI watchdog which detects hard ++ lockups. ++ ++config X86_UP_IOAPIC ++ bool "IO-APIC support on uniprocessors" ++ depends on X86_UP_APIC ++ ---help--- ++ An IO-APIC (I/O Advanced Programmable Interrupt Controller) is an ++ SMP-capable replacement for PC-style interrupt controllers. Most ++ SMP systems and many recent uniprocessor systems have one. ++ ++ If you have a single-CPU system with an IO-APIC, you can say Y here ++ to use it. If you say Y here even though your machine doesn't have ++ an IO-APIC, then the kernel will still run with no slowdown at all. ++ ++config X86_LOCAL_APIC ++ def_bool y ++ depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC || PCI_MSI ++ select IRQ_DOMAIN_HIERARCHY ++ select PCI_MSI_IRQ_DOMAIN if PCI_MSI ++ ++config X86_IO_APIC ++ def_bool y ++ depends on X86_LOCAL_APIC || X86_UP_IOAPIC ++ ++config X86_REROUTE_FOR_BROKEN_BOOT_IRQS ++ bool "Reroute for broken boot IRQs" ++ depends on X86_IO_APIC ++ ---help--- ++ This option enables a workaround that fixes a source of ++ spurious interrupts. This is recommended when threaded ++ interrupt handling is used on systems where the generation of ++ superfluous "boot interrupts" cannot be disabled. ++ ++ Some chipsets generate a legacy INTx "boot IRQ" when the IRQ ++ entry in the chipset's IO-APIC is masked (as, e.g. the RT ++ kernel does during interrupt handling). On chipsets where this ++ boot IRQ generation cannot be disabled, this workaround keeps ++ the original IRQ line masked so that only the equivalent "boot ++ IRQ" is delivered to the CPUs. The workaround also tells the ++ kernel to set up the IRQ handler on the boot IRQ line. In this ++ way only one interrupt is delivered to the kernel. Otherwise ++ the spurious second interrupt may cause the kernel to bring ++ down (vital) interrupt lines. ++ ++ Only affects "broken" chipsets. Interrupt sharing may be ++ increased on these systems. ++ ++config X86_MCE ++ bool "Machine Check / overheating reporting" ++ select GENERIC_ALLOCATOR ++ default y ++ ---help--- ++ Machine Check support allows the processor to notify the ++ kernel if it detects a problem (e.g. overheating, data corruption). ++ The action the kernel takes depends on the severity of the problem, ++ ranging from warning messages to halting the machine. ++ ++config X86_MCELOG_LEGACY ++ bool "Support for deprecated /dev/mcelog character device" ++ depends on X86_MCE ++ ---help--- ++ Enable support for /dev/mcelog which is needed by the old mcelog ++ userspace logging daemon. Consider switching to the new generation ++ rasdaemon solution. ++ ++config X86_MCE_INTEL ++ def_bool y ++ prompt "Intel MCE features" ++ depends on X86_MCE && X86_LOCAL_APIC ++ ---help--- ++ Additional support for intel specific MCE features such as ++ the thermal monitor. ++ ++config X86_MCE_AMD ++ def_bool y ++ prompt "AMD MCE features" ++ depends on X86_MCE && X86_LOCAL_APIC && AMD_NB ++ ---help--- ++ Additional support for AMD specific MCE features such as ++ the DRAM Error Threshold. ++ ++config X86_ANCIENT_MCE ++ bool "Support for old Pentium 5 / WinChip machine checks" ++ depends on X86_32 && X86_MCE ++ ---help--- ++ Include support for machine check handling on old Pentium 5 or WinChip ++ systems. These typically need to be enabled explicitly on the command ++ line. ++ ++config X86_MCE_THRESHOLD ++ depends on X86_MCE_AMD || X86_MCE_INTEL ++ def_bool y ++ ++config X86_MCE_INJECT ++ depends on X86_MCE && X86_LOCAL_APIC && DEBUG_FS ++ tristate "Machine check injector support" ++ ---help--- ++ Provide support for injecting machine checks for testing purposes. ++ If you don't know what a machine check is and you don't do kernel ++ QA it is safe to say n. ++ ++config X86_THERMAL_VECTOR ++ def_bool y ++ depends on X86_MCE_INTEL ++ ++source "arch/x86/events/Kconfig" ++ ++config X86_LEGACY_VM86 ++ bool "Legacy VM86 support" ++ default n ++ depends on X86_32 ++ ---help--- ++ This option allows user programs to put the CPU into V8086 ++ mode, which is an 80286-era approximation of 16-bit real mode. ++ ++ Some very old versions of X and/or vbetool require this option ++ for user mode setting. Similarly, DOSEMU will use it if ++ available to accelerate real mode DOS programs. However, any ++ recent version of DOSEMU, X, or vbetool should be fully ++ functional even without kernel VM86 support, as they will all ++ fall back to software emulation. Nevertheless, if you are using ++ a 16-bit DOS program where 16-bit performance matters, vm86 ++ mode might be faster than emulation and you might want to ++ enable this option. ++ ++ Note that any app that works on a 64-bit kernel is unlikely to ++ need this option, as 64-bit kernels don't, and can't, support ++ V8086 mode. This option is also unrelated to 16-bit protected ++ mode and is not needed to run most 16-bit programs under Wine. ++ ++ Enabling this option increases the complexity of the kernel ++ and slows down exception handling a tiny bit. ++ ++ If unsure, say N here. ++ ++config VM86 ++ bool ++ default X86_LEGACY_VM86 ++ ++config X86_16BIT ++ bool "Enable support for 16-bit segments" if EXPERT ++ default y ++ depends on MODIFY_LDT_SYSCALL ++ ---help--- ++ This option is required by programs like Wine to run 16-bit ++ protected mode legacy code on x86 processors. Disabling ++ this option saves about 300 bytes on i386, or around 6K text ++ plus 16K runtime memory on x86-64, ++ ++config X86_ESPFIX32 ++ def_bool y ++ depends on X86_16BIT && X86_32 ++ ++config X86_ESPFIX64 ++ def_bool y ++ depends on X86_16BIT && X86_64 ++ ++config X86_VSYSCALL_EMULATION ++ bool "Enable vsyscall emulation" if EXPERT ++ default y ++ depends on X86_64 ++ ---help--- ++ This enables emulation of the legacy vsyscall page. Disabling ++ it is roughly equivalent to booting with vsyscall=none, except ++ that it will also disable the helpful warning if a program ++ tries to use a vsyscall. With this option set to N, offending ++ programs will just segfault, citing addresses of the form ++ 0xffffffffff600?00. ++ ++ This option is required by many programs built before 2013, and ++ care should be used even with newer programs if set to N. ++ ++ Disabling this option saves about 7K of kernel size and ++ possibly 4K of additional runtime pagetable memory. ++ ++config TOSHIBA ++ tristate "Toshiba Laptop support" ++ depends on X86_32 ++ ---help--- ++ This adds a driver to safely access the System Management Mode of ++ the CPU on Toshiba portables with a genuine Toshiba BIOS. It does ++ not work on models with a Phoenix BIOS. The System Management Mode ++ is used to set the BIOS and power saving options on Toshiba portables. ++ ++ For information on utilities to make use of this driver see the ++ Toshiba Linux utilities web site at: ++ . ++ ++ Say Y if you intend to run this kernel on a Toshiba portable. ++ Say N otherwise. ++ ++config I8K ++ tristate "Dell i8k legacy laptop support" ++ select HWMON ++ select SENSORS_DELL_SMM ++ ---help--- ++ This option enables legacy /proc/i8k userspace interface in hwmon ++ dell-smm-hwmon driver. Character file /proc/i8k reports bios version, ++ temperature and allows controlling fan speeds of Dell laptops via ++ System Management Mode. For old Dell laptops (like Dell Inspiron 8000) ++ it reports also power and hotkey status. For fan speed control is ++ needed userspace package i8kutils. ++ ++ Say Y if you intend to run this kernel on old Dell laptops or want to ++ use userspace package i8kutils. ++ Say N otherwise. ++ ++config X86_REBOOTFIXUPS ++ bool "Enable X86 board specific fixups for reboot" ++ depends on X86_32 ++ ---help--- ++ This enables chipset and/or board specific fixups to be done ++ in order to get reboot to work correctly. This is only needed on ++ some combinations of hardware and BIOS. The symptom, for which ++ this config is intended, is when reboot ends with a stalled/hung ++ system. ++ ++ Currently, the only fixup is for the Geode machines using ++ CS5530A and CS5536 chipsets and the RDC R-321x SoC. ++ ++ Say Y if you want to enable the fixup. Currently, it's safe to ++ enable this option even if you don't need it. ++ Say N otherwise. ++ ++config MICROCODE ++ bool "CPU microcode loading support" ++ default y ++ depends on CPU_SUP_AMD || CPU_SUP_INTEL ++ select FW_LOADER ++ ---help--- ++ If you say Y here, you will be able to update the microcode on ++ Intel and AMD processors. The Intel support is for the IA32 family, ++ e.g. Pentium Pro, Pentium II, Pentium III, Pentium 4, Xeon etc. The ++ AMD support is for families 0x10 and later. You will obviously need ++ the actual microcode binary data itself which is not shipped with ++ the Linux kernel. ++ ++ The preferred method to load microcode from a detached initrd is described ++ in Documentation/x86/microcode.txt. For that you need to enable ++ CONFIG_BLK_DEV_INITRD in order for the loader to be able to scan the ++ initrd for microcode blobs. ++ ++ In addition, you can build the microcode into the kernel. For that you ++ need to add the vendor-supplied microcode to the CONFIG_EXTRA_FIRMWARE ++ config option. ++ ++config MICROCODE_INTEL ++ bool "Intel microcode loading support" ++ depends on MICROCODE ++ default MICROCODE ++ select FW_LOADER ++ ---help--- ++ This options enables microcode patch loading support for Intel ++ processors. ++ ++ For the current Intel microcode data package go to ++ and search for ++ 'Linux Processor Microcode Data File'. ++ ++config MICROCODE_AMD ++ bool "AMD microcode loading support" ++ depends on MICROCODE ++ select FW_LOADER ++ ---help--- ++ If you select this option, microcode patch loading support for AMD ++ processors will be enabled. ++ ++config MICROCODE_OLD_INTERFACE ++ def_bool y ++ depends on MICROCODE ++ ++config X86_MSR ++ tristate "/dev/cpu/*/msr - Model-specific register support" ++ ---help--- ++ This device gives privileged processes access to the x86 ++ Model-Specific Registers (MSRs). It is a character device with ++ major 202 and minors 0 to 31 for /dev/cpu/0/msr to /dev/cpu/31/msr. ++ MSR accesses are directed to a specific CPU on multi-processor ++ systems. ++ ++config X86_CPUID ++ tristate "/dev/cpu/*/cpuid - CPU information support" ++ ---help--- ++ This device gives processes access to the x86 CPUID instruction to ++ be executed on a specific processor. It is a character device ++ with major 203 and minors 0 to 31 for /dev/cpu/0/cpuid to ++ /dev/cpu/31/cpuid. ++ ++choice ++ prompt "High Memory Support" ++ default HIGHMEM4G ++ depends on X86_32 ++ ++config NOHIGHMEM ++ bool "off" ++ ---help--- ++ Linux can use up to 64 Gigabytes of physical memory on x86 systems. ++ However, the address space of 32-bit x86 processors is only 4 ++ Gigabytes large. That means that, if you have a large amount of ++ physical memory, not all of it can be "permanently mapped" by the ++ kernel. The physical memory that's not permanently mapped is called ++ "high memory". ++ ++ If you are compiling a kernel which will never run on a machine with ++ more than 1 Gigabyte total physical RAM, answer "off" here (default ++ choice and suitable for most users). This will result in a "3GB/1GB" ++ split: 3GB are mapped so that each process sees a 3GB virtual memory ++ space and the remaining part of the 4GB virtual memory space is used ++ by the kernel to permanently map as much physical memory as ++ possible. ++ ++ If the machine has between 1 and 4 Gigabytes physical RAM, then ++ answer "4GB" here. ++ ++ If more than 4 Gigabytes is used then answer "64GB" here. This ++ selection turns Intel PAE (Physical Address Extension) mode on. ++ PAE implements 3-level paging on IA32 processors. PAE is fully ++ supported by Linux, PAE mode is implemented on all recent Intel ++ processors (Pentium Pro and better). NOTE: If you say "64GB" here, ++ then the kernel will not boot on CPUs that don't support PAE! ++ ++ The actual amount of total physical memory will either be ++ auto detected or can be forced by using a kernel command line option ++ such as "mem=256M". (Try "man bootparam" or see the documentation of ++ your boot loader (lilo or loadlin) about how to pass options to the ++ kernel at boot time.) ++ ++ If unsure, say "off". ++ ++config HIGHMEM4G ++ bool "4GB" ++ ---help--- ++ Select this if you have a 32-bit processor and between 1 and 4 ++ gigabytes of physical RAM. ++ ++config HIGHMEM64G ++ bool "64GB" ++ depends on !M486 && !M586 && !M586TSC && !M586MMX && !MGEODE_LX && !MGEODEGX1 && !MCYRIXIII && !MELAN && !MWINCHIPC6 && !WINCHIP3D && !MK6 ++ select X86_PAE ++ ---help--- ++ Select this if you have a 32-bit processor and more than 4 ++ gigabytes of physical RAM. ++ ++endchoice ++ ++choice ++ prompt "Memory split" if EXPERT ++ default VMSPLIT_3G ++ depends on X86_32 ++ ---help--- ++ Select the desired split between kernel and user memory. ++ ++ If the address range available to the kernel is less than the ++ physical memory installed, the remaining memory will be available ++ as "high memory". Accessing high memory is a little more costly ++ than low memory, as it needs to be mapped into the kernel first. ++ Note that increasing the kernel address space limits the range ++ available to user programs, making the address space there ++ tighter. Selecting anything other than the default 3G/1G split ++ will also likely make your kernel incompatible with binary-only ++ kernel modules. ++ ++ If you are not absolutely sure what you are doing, leave this ++ option alone! ++ ++ config VMSPLIT_3G ++ bool "3G/1G user/kernel split" ++ config VMSPLIT_3G_OPT ++ depends on !X86_PAE ++ bool "3G/1G user/kernel split (for full 1G low memory)" ++ config VMSPLIT_2G ++ bool "2G/2G user/kernel split" ++ config VMSPLIT_2G_OPT ++ depends on !X86_PAE ++ bool "2G/2G user/kernel split (for full 2G low memory)" ++ config VMSPLIT_1G ++ bool "1G/3G user/kernel split" ++endchoice ++ ++config PAGE_OFFSET ++ hex ++ default 0xB0000000 if VMSPLIT_3G_OPT ++ default 0x80000000 if VMSPLIT_2G ++ default 0x78000000 if VMSPLIT_2G_OPT ++ default 0x40000000 if VMSPLIT_1G ++ default 0xC0000000 ++ depends on X86_32 ++ ++config HIGHMEM ++ def_bool y ++ depends on X86_32 && (HIGHMEM64G || HIGHMEM4G) ++ ++config X86_PAE ++ bool "PAE (Physical Address Extension) Support" ++ depends on X86_32 && !HIGHMEM4G ++ select PHYS_ADDR_T_64BIT ++ select SWIOTLB ++ ---help--- ++ PAE is required for NX support, and furthermore enables ++ larger swapspace support for non-overcommit purposes. It ++ has the cost of more pagetable lookup overhead, and also ++ consumes more pagetable space per process. ++ ++config X86_5LEVEL ++ bool "Enable 5-level page tables support" ++ select DYNAMIC_MEMORY_LAYOUT ++ select SPARSEMEM_VMEMMAP ++ depends on X86_64 ++ ---help--- ++ 5-level paging enables access to larger address space: ++ upto 128 PiB of virtual address space and 4 PiB of ++ physical address space. ++ ++ It will be supported by future Intel CPUs. ++ ++ A kernel with the option enabled can be booted on machines that ++ support 4- or 5-level paging. ++ ++ See Documentation/x86/x86_64/5level-paging.txt for more ++ information. ++ ++ Say N if unsure. ++ ++config X86_DIRECT_GBPAGES ++ def_bool y ++ depends on X86_64 && !DEBUG_PAGEALLOC ++ ---help--- ++ Certain kernel features effectively disable kernel ++ linear 1 GB mappings (even if the CPU otherwise ++ supports them), so don't confuse the user by printing ++ that we have them enabled. ++ ++config ARCH_HAS_MEM_ENCRYPT ++ def_bool y ++ ++config AMD_MEM_ENCRYPT ++ bool "AMD Secure Memory Encryption (SME) support" ++ depends on X86_64 && CPU_SUP_AMD ++ select DYNAMIC_PHYSICAL_MASK ++ select ARCH_USE_MEMREMAP_PROT ++ ---help--- ++ Say yes to enable support for the encryption of system memory. ++ This requires an AMD processor that supports Secure Memory ++ Encryption (SME). ++ ++config AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT ++ bool "Activate AMD Secure Memory Encryption (SME) by default" ++ default y ++ depends on AMD_MEM_ENCRYPT ++ ---help--- ++ Say yes to have system memory encrypted by default if running on ++ an AMD processor that supports Secure Memory Encryption (SME). ++ ++ If set to Y, then the encryption of system memory can be ++ deactivated with the mem_encrypt=off command line option. ++ ++ If set to N, then the encryption of system memory can be ++ activated with the mem_encrypt=on command line option. ++ ++# Common NUMA Features ++config NUMA ++ bool "Numa Memory Allocation and Scheduler Support" ++ depends on SMP ++ depends on X86_64 || (X86_32 && HIGHMEM64G && X86_BIGSMP) ++ default y if X86_BIGSMP ++ ---help--- ++ Enable NUMA (Non Uniform Memory Access) support. ++ ++ The kernel will try to allocate memory used by a CPU on the ++ local memory controller of the CPU and add some more ++ NUMA awareness to the kernel. ++ ++ For 64-bit this is recommended if the system is Intel Core i7 ++ (or later), AMD Opteron, or EM64T NUMA. ++ ++ For 32-bit this is only needed if you boot a 32-bit ++ kernel on a 64-bit NUMA platform. ++ ++ Otherwise, you should say N. ++ ++config NUMA_AWARE_SPINLOCKS ++ bool "Numa-aware spinlocks" ++ depends on NUMA ++ depends on QUEUED_SPINLOCKS ++ # For now, we depend on PARAVIRT_SPINLOCKS to make the patching work. ++ # This is awkward, but hopefully would be resolved once static_call() ++ # is available. ++ depends on PARAVIRT_SPINLOCKS ++ default y ++ help ++ Introduce NUMA (Non Uniform Memory Access) awareness into ++ the slow path of spinlocks. ++ ++ In this variant of qspinlock, the kernel will try to keep the lock ++ on the same node, thus reducing the number of remote cache misses, ++ while trading some of the short term fairness for better performance. ++ ++ Say N if you want absolute first come first serve fairness. ++ ++config AMD_NUMA ++ def_bool y ++ prompt "Old style AMD Opteron NUMA detection" ++ depends on X86_64 && NUMA && PCI ++ ---help--- ++ Enable AMD NUMA node topology detection. You should say Y here if ++ you have a multi processor AMD system. This uses an old method to ++ read the NUMA configuration directly from the builtin Northbridge ++ of Opteron. It is recommended to use X86_64_ACPI_NUMA instead, ++ which also takes priority if both are compiled in. ++ ++config X86_64_ACPI_NUMA ++ def_bool y ++ prompt "ACPI NUMA detection" ++ depends on X86_64 && NUMA && ACPI && PCI ++ select ACPI_NUMA ++ ---help--- ++ Enable ACPI SRAT based node topology detection. ++ ++# Some NUMA nodes have memory ranges that span ++# other nodes. Even though a pfn is valid and ++# between a node's start and end pfns, it may not ++# reside on that node. See memmap_init_zone() ++# for details. ++config NODES_SPAN_OTHER_NODES ++ def_bool y ++ depends on X86_64_ACPI_NUMA ++ ++config NUMA_EMU ++ bool "NUMA emulation" ++ depends on NUMA ++ ---help--- ++ Enable NUMA emulation. A flat machine will be split ++ into virtual nodes when booted with "numa=fake=N", where N is the ++ number of nodes. This is only useful for debugging. ++ ++config NODES_SHIFT ++ int "Maximum NUMA Nodes (as a power of 2)" if !MAXSMP ++ range 1 10 ++ default "10" if MAXSMP ++ default "6" if X86_64 ++ default "3" ++ depends on NEED_MULTIPLE_NODES ++ ---help--- ++ Specify the maximum number of NUMA Nodes available on the target ++ system. Increases memory reserved to accommodate various tables. ++ ++config ARCH_HAVE_MEMORY_PRESENT ++ def_bool y ++ depends on X86_32 && DISCONTIGMEM ++ ++config ARCH_FLATMEM_ENABLE ++ def_bool y ++ depends on X86_32 && !NUMA ++ ++config ARCH_DISCONTIGMEM_ENABLE ++ def_bool y ++ depends on NUMA && X86_32 ++ ++config ARCH_DISCONTIGMEM_DEFAULT ++ def_bool y ++ depends on NUMA && X86_32 ++ ++config ARCH_SPARSEMEM_ENABLE ++ def_bool y ++ depends on X86_64 || NUMA || X86_32 || X86_32_NON_STANDARD ++ select SPARSEMEM_STATIC if X86_32 ++ select SPARSEMEM_VMEMMAP_ENABLE if X86_64 ++ ++config ARCH_SPARSEMEM_DEFAULT ++ def_bool y ++ depends on X86_64 ++ ++config ARCH_SELECT_MEMORY_MODEL ++ def_bool y ++ depends on ARCH_SPARSEMEM_ENABLE ++ ++config ARCH_MEMORY_PROBE ++ bool "Enable sysfs memory/probe interface" ++ depends on X86_64 && MEMORY_HOTPLUG ++ help ++ This option enables a sysfs memory/probe interface for testing. ++ See Documentation/memory-hotplug.txt for more information. ++ If you are unsure how to answer this question, answer N. ++ ++config ARCH_PROC_KCORE_TEXT ++ def_bool y ++ depends on X86_64 && PROC_KCORE ++ ++config ILLEGAL_POINTER_VALUE ++ hex ++ default 0 if X86_32 ++ default 0xdead000000000000 if X86_64 ++ ++config X86_PMEM_LEGACY_DEVICE ++ bool ++ ++config X86_PMEM_LEGACY ++ tristate "Support non-standard NVDIMMs and ADR protected memory" ++ depends on PHYS_ADDR_T_64BIT ++ depends on BLK_DEV ++ select X86_PMEM_LEGACY_DEVICE ++ select LIBNVDIMM ++ help ++ Treat memory marked using the non-standard e820 type of 12 as used ++ by the Intel Sandy Bridge-EP reference BIOS as protected memory. ++ The kernel will offer these regions to the 'pmem' driver so ++ they can be used for persistent storage. ++ ++ Say Y if unsure. ++ ++config HIGHPTE ++ bool "Allocate 3rd-level pagetables from highmem" ++ depends on HIGHMEM ++ ---help--- ++ The VM uses one page table entry for each page of physical memory. ++ For systems with a lot of RAM, this can be wasteful of precious ++ low memory. Setting this option will put user-space page table ++ entries in high memory. ++ ++config X86_CHECK_BIOS_CORRUPTION ++ bool "Check for low memory corruption" ++ ---help--- ++ Periodically check for memory corruption in low memory, which ++ is suspected to be caused by BIOS. Even when enabled in the ++ configuration, it is disabled at runtime. Enable it by ++ setting "memory_corruption_check=1" on the kernel command ++ line. By default it scans the low 64k of memory every 60 ++ seconds; see the memory_corruption_check_size and ++ memory_corruption_check_period parameters in ++ Documentation/admin-guide/kernel-parameters.rst to adjust this. ++ ++ When enabled with the default parameters, this option has ++ almost no overhead, as it reserves a relatively small amount ++ of memory and scans it infrequently. It both detects corruption ++ and prevents it from affecting the running system. ++ ++ It is, however, intended as a diagnostic tool; if repeatable ++ BIOS-originated corruption always affects the same memory, ++ you can use memmap= to prevent the kernel from using that ++ memory. ++ ++config X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK ++ bool "Set the default setting of memory_corruption_check" ++ depends on X86_CHECK_BIOS_CORRUPTION ++ default y ++ ---help--- ++ Set whether the default state of memory_corruption_check is ++ on or off. ++ ++config X86_RESERVE_LOW ++ int "Amount of low memory, in kilobytes, to reserve for the BIOS" ++ default 64 ++ range 4 640 ++ ---help--- ++ Specify the amount of low memory to reserve for the BIOS. ++ ++ The first page contains BIOS data structures that the kernel ++ must not use, so that page must always be reserved. ++ ++ By default we reserve the first 64K of physical RAM, as a ++ number of BIOSes are known to corrupt that memory range ++ during events such as suspend/resume or monitor cable ++ insertion, so it must not be used by the kernel. ++ ++ You can set this to 4 if you are absolutely sure that you ++ trust the BIOS to get all its memory reservations and usages ++ right. If you know your BIOS have problems beyond the ++ default 64K area, you can set this to 640 to avoid using the ++ entire low memory range. ++ ++ If you have doubts about the BIOS (e.g. suspend/resume does ++ not work or there's kernel crashes after certain hardware ++ hotplug events) then you might want to enable ++ X86_CHECK_BIOS_CORRUPTION=y to allow the kernel to check ++ typical corruption patterns. ++ ++ Leave this to the default value of 64 if you are unsure. ++ ++config MATH_EMULATION ++ bool ++ depends on MODIFY_LDT_SYSCALL ++ prompt "Math emulation" if X86_32 ++ ---help--- ++ Linux can emulate a math coprocessor (used for floating point ++ operations) if you don't have one. 486DX and Pentium processors have ++ a math coprocessor built in, 486SX and 386 do not, unless you added ++ a 487DX or 387, respectively. (The messages during boot time can ++ give you some hints here ["man dmesg"].) Everyone needs either a ++ coprocessor or this emulation. ++ ++ If you don't have a math coprocessor, you need to say Y here; if you ++ say Y here even though you have a coprocessor, the coprocessor will ++ be used nevertheless. (This behavior can be changed with the kernel ++ command line option "no387", which comes handy if your coprocessor ++ is broken. Try "man bootparam" or see the documentation of your boot ++ loader (lilo or loadlin) about how to pass options to the kernel at ++ boot time.) This means that it is a good idea to say Y here if you ++ intend to use this kernel on different machines. ++ ++ More information about the internals of the Linux math coprocessor ++ emulation can be found in . ++ ++ If you are not sure, say Y; apart from resulting in a 66 KB bigger ++ kernel, it won't hurt. ++ ++config MTRR ++ def_bool y ++ prompt "MTRR (Memory Type Range Register) support" if EXPERT ++ ---help--- ++ On Intel P6 family processors (Pentium Pro, Pentium II and later) ++ the Memory Type Range Registers (MTRRs) may be used to control ++ processor access to memory ranges. This is most useful if you have ++ a video (VGA) card on a PCI or AGP bus. Enabling write-combining ++ allows bus write transfers to be combined into a larger transfer ++ before bursting over the PCI/AGP bus. This can increase performance ++ of image write operations 2.5 times or more. Saying Y here creates a ++ /proc/mtrr file which may be used to manipulate your processor's ++ MTRRs. Typically the X server should use this. ++ ++ This code has a reasonably generic interface so that similar ++ control registers on other processors can be easily supported ++ as well: ++ ++ The Cyrix 6x86, 6x86MX and M II processors have Address Range ++ Registers (ARRs) which provide a similar functionality to MTRRs. For ++ these, the ARRs are used to emulate the MTRRs. ++ The AMD K6-2 (stepping 8 and above) and K6-3 processors have two ++ MTRRs. The Centaur C6 (WinChip) has 8 MCRs, allowing ++ write-combining. All of these processors are supported by this code ++ and it makes sense to say Y here if you have one of them. ++ ++ Saying Y here also fixes a problem with buggy SMP BIOSes which only ++ set the MTRRs for the boot CPU and not for the secondary CPUs. This ++ can lead to all sorts of problems, so it's good to say Y here. ++ ++ You can safely say Y even if your machine doesn't have MTRRs, you'll ++ just add about 9 KB to your kernel. ++ ++ See for more information. ++ ++config MTRR_SANITIZER ++ def_bool y ++ prompt "MTRR cleanup support" ++ depends on MTRR ++ ---help--- ++ Convert MTRR layout from continuous to discrete, so X drivers can ++ add writeback entries. ++ ++ Can be disabled with disable_mtrr_cleanup on the kernel command line. ++ The largest mtrr entry size for a continuous block can be set with ++ mtrr_chunk_size. ++ ++ If unsure, say Y. ++ ++config MTRR_SANITIZER_ENABLE_DEFAULT ++ int "MTRR cleanup enable value (0-1)" ++ range 0 1 ++ default "0" ++ depends on MTRR_SANITIZER ++ ---help--- ++ Enable mtrr cleanup default value ++ ++config MTRR_SANITIZER_SPARE_REG_NR_DEFAULT ++ int "MTRR cleanup spare reg num (0-7)" ++ range 0 7 ++ default "1" ++ depends on MTRR_SANITIZER ++ ---help--- ++ mtrr cleanup spare entries default, it can be changed via ++ mtrr_spare_reg_nr=N on the kernel command line. ++ ++config X86_PAT ++ def_bool y ++ prompt "x86 PAT support" if EXPERT ++ depends on MTRR ++ ---help--- ++ Use PAT attributes to setup page level cache control. ++ ++ PATs are the modern equivalents of MTRRs and are much more ++ flexible than MTRRs. ++ ++ Say N here if you see bootup problems (boot crash, boot hang, ++ spontaneous reboots) or a non-working video driver. ++ ++ If unsure, say Y. ++ ++config ARCH_USES_PG_UNCACHED ++ def_bool y ++ depends on X86_PAT ++ ++config ARCH_RANDOM ++ def_bool y ++ prompt "x86 architectural random number generator" if EXPERT ++ ---help--- ++ Enable the x86 architectural RDRAND instruction ++ (Intel Bull Mountain technology) to generate random numbers. ++ If supported, this is a high bandwidth, cryptographically ++ secure hardware random number generator. ++ ++config X86_SMAP ++ def_bool y ++ prompt "Supervisor Mode Access Prevention" if EXPERT ++ ---help--- ++ Supervisor Mode Access Prevention (SMAP) is a security ++ feature in newer Intel processors. There is a small ++ performance cost if this enabled and turned on; there is ++ also a small increase in the kernel size if this is enabled. ++ ++ If unsure, say Y. ++ ++config X86_INTEL_UMIP ++ def_bool y ++ depends on CPU_SUP_INTEL ++ prompt "Intel User Mode Instruction Prevention" if EXPERT ++ ---help--- ++ The User Mode Instruction Prevention (UMIP) is a security ++ feature in newer Intel processors. If enabled, a general ++ protection fault is issued if the SGDT, SLDT, SIDT, SMSW ++ or STR instructions are executed in user mode. These instructions ++ unnecessarily expose information about the hardware state. ++ ++ The vast majority of applications do not use these instructions. ++ For the very few that do, software emulation is provided in ++ specific cases in protected and virtual-8086 modes. Emulated ++ results are dummy. ++ ++config X86_INTEL_MPX ++ prompt "Intel MPX (Memory Protection Extensions)" ++ def_bool n ++ # Note: only available in 64-bit mode due to VMA flags shortage ++ depends on CPU_SUP_INTEL && X86_64 ++ select ARCH_USES_HIGH_VMA_FLAGS ++ ---help--- ++ MPX provides hardware features that can be used in ++ conjunction with compiler-instrumented code to check ++ memory references. It is designed to detect buffer ++ overflow or underflow bugs. ++ ++ This option enables running applications which are ++ instrumented or otherwise use MPX. It does not use MPX ++ itself inside the kernel or to protect the kernel ++ against bad memory references. ++ ++ Enabling this option will make the kernel larger: ++ ~8k of kernel text and 36 bytes of data on a 64-bit ++ defconfig. It adds a long to the 'mm_struct' which ++ will increase the kernel memory overhead of each ++ process and adds some branches to paths used during ++ exec() and munmap(). ++ ++ For details, see Documentation/x86/intel_mpx.txt ++ ++ If unsure, say N. ++ ++config X86_INTEL_MEMORY_PROTECTION_KEYS ++ prompt "Intel Memory Protection Keys" ++ def_bool y ++ # Note: only available in 64-bit mode ++ depends on CPU_SUP_INTEL && X86_64 ++ select ARCH_USES_HIGH_VMA_FLAGS ++ select ARCH_HAS_PKEYS ++ ---help--- ++ Memory Protection Keys provides a mechanism for enforcing ++ page-based protections, but without requiring modification of the ++ page tables when an application changes protection domains. ++ ++ For details, see Documentation/x86/protection-keys.txt ++ ++ If unsure, say y. ++ ++choice ++ prompt "TSX enable mode" ++ depends on CPU_SUP_INTEL ++ default X86_INTEL_TSX_MODE_OFF ++ help ++ Intel's TSX (Transactional Synchronization Extensions) feature ++ allows to optimize locking protocols through lock elision which ++ can lead to a noticeable performance boost. ++ ++ On the other hand it has been shown that TSX can be exploited ++ to form side channel attacks (e.g. TAA) and chances are there ++ will be more of those attacks discovered in the future. ++ ++ Therefore TSX is not enabled by default (aka tsx=off). An admin ++ might override this decision by tsx=on the command line parameter. ++ Even with TSX enabled, the kernel will attempt to enable the best ++ possible TAA mitigation setting depending on the microcode available ++ for the particular machine. ++ ++ This option allows to set the default tsx mode between tsx=on, =off ++ and =auto. See Documentation/admin-guide/kernel-parameters.txt for more ++ details. ++ ++ Say off if not sure, auto if TSX is in use but it should be used on safe ++ platforms or on if TSX is in use and the security aspect of tsx is not ++ relevant. ++ ++config X86_INTEL_TSX_MODE_OFF ++ bool "off" ++ help ++ TSX is disabled if possible - equals to tsx=off command line parameter. ++ ++config X86_INTEL_TSX_MODE_ON ++ bool "on" ++ help ++ TSX is always enabled on TSX capable HW - equals the tsx=on command ++ line parameter. ++ ++config X86_INTEL_TSX_MODE_AUTO ++ bool "auto" ++ help ++ TSX is enabled on TSX capable HW that is believed to be safe against ++ side channel attacks- equals the tsx=auto command line parameter. ++endchoice ++ ++config EFI ++ bool "EFI runtime service support" ++ depends on ACPI ++ select UCS2_STRING ++ select EFI_RUNTIME_WRAPPERS ++ ---help--- ++ This enables the kernel to use EFI runtime services that are ++ available (such as the EFI variable services). ++ ++ This option is only useful on systems that have EFI firmware. ++ In addition, you should use the latest ELILO loader available ++ at in order to take advantage ++ of EFI runtime services. However, even with this option, the ++ resultant kernel should continue to boot on existing non-EFI ++ platforms. ++ ++config EFI_STUB ++ bool "EFI stub support" ++ depends on EFI && !X86_USE_3DNOW ++ select RELOCATABLE ++ ---help--- ++ This kernel feature allows a bzImage to be loaded directly ++ by EFI firmware without the use of a bootloader. ++ ++ See Documentation/efi-stub.txt for more information. ++ ++config EFI_MIXED ++ bool "EFI mixed-mode support" ++ depends on EFI_STUB && X86_64 ++ ---help--- ++ Enabling this feature allows a 64-bit kernel to be booted ++ on a 32-bit firmware, provided that your CPU supports 64-bit ++ mode. ++ ++ Note that it is not possible to boot a mixed-mode enabled ++ kernel via the EFI boot stub - a bootloader that supports ++ the EFI handover protocol must be used. ++ ++ If unsure, say N. ++ ++config SECCOMP ++ def_bool y ++ prompt "Enable seccomp to safely compute untrusted bytecode" ++ ---help--- ++ This kernel feature is useful for number crunching applications ++ that may need to compute untrusted bytecode during their ++ execution. By using pipes or other transports made available to ++ the process as file descriptors supporting the read/write ++ syscalls, it's possible to isolate those applications in ++ their own address space using seccomp. Once seccomp is ++ enabled via prctl(PR_SET_SECCOMP), it cannot be disabled ++ and the task is only allowed to execute a few safe syscalls ++ defined by each seccomp mode. ++ ++ If unsure, say Y. Only embedded should say N here. ++ ++source kernel/Kconfig.hz ++ ++config KEXEC ++ bool "kexec system call" ++ select KEXEC_CORE ++ ---help--- ++ kexec is a system call that implements the ability to shutdown your ++ current kernel, and to start another kernel. It is like a reboot ++ but it is independent of the system firmware. And like a reboot ++ you can start any kernel with it, not just Linux. ++ ++ The name comes from the similarity to the exec system call. ++ ++ It is an ongoing process to be certain the hardware in a machine ++ is properly shutdown, so do not be surprised if this code does not ++ initially work for you. As of this writing the exact hardware ++ interface is strongly in flux, so no good recommendation can be ++ made. ++ ++config KEXEC_FILE ++ bool "kexec file based system call" ++ select KEXEC_CORE ++ select BUILD_BIN2C ++ depends on X86_64 ++ depends on CRYPTO=y ++ depends on CRYPTO_SHA256=y ++ ---help--- ++ This is new version of kexec system call. This system call is ++ file based and takes file descriptors as system call argument ++ for kernel and initramfs as opposed to list of segments as ++ accepted by previous system call. ++ ++config ARCH_HAS_KEXEC_PURGATORY ++ def_bool KEXEC_FILE ++ ++config KEXEC_VERIFY_SIG ++ bool "Verify kernel signature during kexec_file_load() syscall" ++ depends on KEXEC_FILE ++ ---help--- ++ This option makes kernel signature verification mandatory for ++ the kexec_file_load() syscall. ++ ++ In addition to that option, you need to enable signature ++ verification for the corresponding kernel image type being ++ loaded in order for this to work. ++ ++config KEXEC_BZIMAGE_VERIFY_SIG ++ bool "Enable bzImage signature verification support" ++ depends on KEXEC_VERIFY_SIG ++ depends on SIGNED_PE_FILE_VERIFICATION ++ select SYSTEM_TRUSTED_KEYRING ++ ---help--- ++ Enable bzImage signature verification support. ++ ++config CRASH_DUMP ++ bool "kernel crash dumps" ++ depends on X86_64 || (X86_32 && HIGHMEM) ++ ---help--- ++ Generate crash dump after being started by kexec. ++ This should be normally only set in special crash dump kernels ++ which are loaded in the main kernel with kexec-tools into ++ a specially reserved region and then later executed after ++ a crash by kdump/kexec. The crash dump kernel must be compiled ++ to a memory address not used by the main kernel or BIOS using ++ PHYSICAL_START, or it must be built as a relocatable image ++ (CONFIG_RELOCATABLE=y). ++ For more details see Documentation/kdump/kdump.txt ++ ++config KEXEC_JUMP ++ bool "kexec jump" ++ depends on KEXEC && HIBERNATION ++ ---help--- ++ Jump between original kernel and kexeced kernel and invoke ++ code in physical address mode via KEXEC ++ ++config PHYSICAL_START ++ hex "Physical address where the kernel is loaded" if (EXPERT || CRASH_DUMP) ++ default "0x1000000" ++ ---help--- ++ This gives the physical address where the kernel is loaded. ++ ++ If kernel is a not relocatable (CONFIG_RELOCATABLE=n) then ++ bzImage will decompress itself to above physical address and ++ run from there. Otherwise, bzImage will run from the address where ++ it has been loaded by the boot loader and will ignore above physical ++ address. ++ ++ In normal kdump cases one does not have to set/change this option ++ as now bzImage can be compiled as a completely relocatable image ++ (CONFIG_RELOCATABLE=y) and be used to load and run from a different ++ address. This option is mainly useful for the folks who don't want ++ to use a bzImage for capturing the crash dump and want to use a ++ vmlinux instead. vmlinux is not relocatable hence a kernel needs ++ to be specifically compiled to run from a specific memory area ++ (normally a reserved region) and this option comes handy. ++ ++ So if you are using bzImage for capturing the crash dump, ++ leave the value here unchanged to 0x1000000 and set ++ CONFIG_RELOCATABLE=y. Otherwise if you plan to use vmlinux ++ for capturing the crash dump change this value to start of ++ the reserved region. In other words, it can be set based on ++ the "X" value as specified in the "crashkernel=YM@XM" ++ command line boot parameter passed to the panic-ed ++ kernel. Please take a look at Documentation/kdump/kdump.txt ++ for more details about crash dumps. ++ ++ Usage of bzImage for capturing the crash dump is recommended as ++ one does not have to build two kernels. Same kernel can be used ++ as production kernel and capture kernel. Above option should have ++ gone away after relocatable bzImage support is introduced. But it ++ is present because there are users out there who continue to use ++ vmlinux for dump capture. This option should go away down the ++ line. ++ ++ Don't change this unless you know what you are doing. ++ ++config RELOCATABLE ++ bool "Build a relocatable kernel" ++ default y ++ ---help--- ++ This builds a kernel image that retains relocation information ++ so it can be loaded someplace besides the default 1MB. ++ The relocations tend to make the kernel binary about 10% larger, ++ but are discarded at runtime. ++ ++ One use is for the kexec on panic case where the recovery kernel ++ must live at a different physical address than the primary ++ kernel. ++ ++ Note: If CONFIG_RELOCATABLE=y, then the kernel runs from the address ++ it has been loaded at and the compile time physical address ++ (CONFIG_PHYSICAL_START) is used as the minimum location. ++ ++config RANDOMIZE_BASE ++ bool "Randomize the address of the kernel image (KASLR)" ++ depends on RELOCATABLE ++ default y ++ ---help--- ++ In support of Kernel Address Space Layout Randomization (KASLR), ++ this randomizes the physical address at which the kernel image ++ is decompressed and the virtual address where the kernel ++ image is mapped, as a security feature that deters exploit ++ attempts relying on knowledge of the location of kernel ++ code internals. ++ ++ On 64-bit, the kernel physical and virtual addresses are ++ randomized separately. The physical address will be anywhere ++ between 16MB and the top of physical memory (up to 64TB). The ++ virtual address will be randomized from 16MB up to 1GB (9 bits ++ of entropy). Note that this also reduces the memory space ++ available to kernel modules from 1.5GB to 1GB. ++ ++ On 32-bit, the kernel physical and virtual addresses are ++ randomized together. They will be randomized from 16MB up to ++ 512MB (8 bits of entropy). ++ ++ Entropy is generated using the RDRAND instruction if it is ++ supported. If RDTSC is supported, its value is mixed into ++ the entropy pool as well. If neither RDRAND nor RDTSC are ++ supported, then entropy is read from the i8254 timer. The ++ usable entropy is limited by the kernel being built using ++ 2GB addressing, and that PHYSICAL_ALIGN must be at a ++ minimum of 2MB. As a result, only 10 bits of entropy are ++ theoretically possible, but the implementations are further ++ limited due to memory layouts. ++ ++ If unsure, say Y. ++ ++# Relocation on x86 needs some additional build support ++config X86_NEED_RELOCS ++ def_bool y ++ depends on RANDOMIZE_BASE || (X86_32 && RELOCATABLE) ++ ++config PHYSICAL_ALIGN ++ hex "Alignment value to which kernel should be aligned" ++ default "0x200000" ++ range 0x2000 0x1000000 if X86_32 ++ range 0x200000 0x1000000 if X86_64 ++ ---help--- ++ This value puts the alignment restrictions on physical address ++ where kernel is loaded and run from. Kernel is compiled for an ++ address which meets above alignment restriction. ++ ++ If bootloader loads the kernel at a non-aligned address and ++ CONFIG_RELOCATABLE is set, kernel will move itself to nearest ++ address aligned to above value and run from there. ++ ++ If bootloader loads the kernel at a non-aligned address and ++ CONFIG_RELOCATABLE is not set, kernel will ignore the run time ++ load address and decompress itself to the address it has been ++ compiled for and run from there. The address for which kernel is ++ compiled already meets above alignment restrictions. Hence the ++ end result is that kernel runs from a physical address meeting ++ above alignment restrictions. ++ ++ On 32-bit this value must be a multiple of 0x2000. On 64-bit ++ this value must be a multiple of 0x200000. ++ ++ Don't change this unless you know what you are doing. ++ ++config DYNAMIC_MEMORY_LAYOUT ++ bool ++ ---help--- ++ This option makes base addresses of vmalloc and vmemmap as well as ++ __PAGE_OFFSET movable during boot. ++ ++config RANDOMIZE_MEMORY ++ bool "Randomize the kernel memory sections" ++ depends on X86_64 ++ depends on RANDOMIZE_BASE ++ select DYNAMIC_MEMORY_LAYOUT ++ default RANDOMIZE_BASE ++ ---help--- ++ Randomizes the base virtual address of kernel memory sections ++ (physical memory mapping, vmalloc & vmemmap). This security feature ++ makes exploits relying on predictable memory locations less reliable. ++ ++ The order of allocations remains unchanged. Entropy is generated in ++ the same way as RANDOMIZE_BASE. Current implementation in the optimal ++ configuration have in average 30,000 different possible virtual ++ addresses for each memory section. ++ ++ If unsure, say Y. ++ ++config RANDOMIZE_MEMORY_PHYSICAL_PADDING ++ hex "Physical memory mapping padding" if EXPERT ++ depends on RANDOMIZE_MEMORY ++ default "0xa" if MEMORY_HOTPLUG ++ default "0x0" ++ range 0x1 0x40 if MEMORY_HOTPLUG ++ range 0x0 0x40 ++ ---help--- ++ Define the padding in terabytes added to the existing physical ++ memory size during kernel memory randomization. It is useful ++ for memory hotplug support but reduces the entropy available for ++ address randomization. ++ ++ If unsure, leave at the default value. ++ ++config HOTPLUG_CPU ++ def_bool y ++ depends on SMP ++ ++config BOOTPARAM_HOTPLUG_CPU0 ++ bool "Set default setting of cpu0_hotpluggable" ++ default n ++ depends on HOTPLUG_CPU ++ ---help--- ++ Set whether default state of cpu0_hotpluggable is on or off. ++ ++ Say Y here to enable CPU0 hotplug by default. If this switch ++ is turned on, there is no need to give cpu0_hotplug kernel ++ parameter and the CPU0 hotplug feature is enabled by default. ++ ++ Please note: there are two known CPU0 dependencies if you want ++ to enable the CPU0 hotplug feature either by this switch or by ++ cpu0_hotplug kernel parameter. ++ ++ First, resume from hibernate or suspend always starts from CPU0. ++ So hibernate and suspend are prevented if CPU0 is offline. ++ ++ Second dependency is PIC interrupts always go to CPU0. CPU0 can not ++ offline if any interrupt can not migrate out of CPU0. There may ++ be other CPU0 dependencies. ++ ++ Please make sure the dependencies are under your control before ++ you enable this feature. ++ ++ Say N if you don't want to enable CPU0 hotplug feature by default. ++ You still can enable the CPU0 hotplug feature at boot by kernel ++ parameter cpu0_hotplug. ++ ++config DEBUG_HOTPLUG_CPU0 ++ def_bool n ++ prompt "Debug CPU0 hotplug" ++ depends on HOTPLUG_CPU ++ ---help--- ++ Enabling this option offlines CPU0 (if CPU0 can be offlined) as ++ soon as possible and boots up userspace with CPU0 offlined. User ++ can online CPU0 back after boot time. ++ ++ To debug CPU0 hotplug, you need to enable CPU0 offline/online ++ feature by either turning on CONFIG_BOOTPARAM_HOTPLUG_CPU0 during ++ compilation or giving cpu0_hotplug kernel parameter at boot. ++ ++ If unsure, say N. ++ ++config COMPAT_VDSO ++ def_bool n ++ prompt "Disable the 32-bit vDSO (needed for glibc 2.3.3)" ++ depends on COMPAT_32 ++ ---help--- ++ Certain buggy versions of glibc will crash if they are ++ presented with a 32-bit vDSO that is not mapped at the address ++ indicated in its segment table. ++ ++ The bug was introduced by f866314b89d56845f55e6f365e18b31ec978ec3a ++ and fixed by 3b3ddb4f7db98ec9e912ccdf54d35df4aa30e04a and ++ 49ad572a70b8aeb91e57483a11dd1b77e31c4468. Glibc 2.3.3 is ++ the only released version with the bug, but OpenSUSE 9 ++ contains a buggy "glibc 2.3.2". ++ ++ The symptom of the bug is that everything crashes on startup, saying: ++ dl_main: Assertion `(void *) ph->p_vaddr == _rtld_local._dl_sysinfo_dso' failed! ++ ++ Saying Y here changes the default value of the vdso32 boot ++ option from 1 to 0, which turns off the 32-bit vDSO entirely. ++ This works around the glibc bug but hurts performance. ++ ++ If unsure, say N: if you are compiling your own kernel, you ++ are unlikely to be using a buggy version of glibc. ++ ++choice ++ prompt "vsyscall table for legacy applications" ++ depends on X86_64 ++ default LEGACY_VSYSCALL_EMULATE ++ help ++ Legacy user code that does not know how to find the vDSO expects ++ to be able to issue three syscalls by calling fixed addresses in ++ kernel space. Since this location is not randomized with ASLR, ++ it can be used to assist security vulnerability exploitation. ++ ++ This setting can be changed at boot time via the kernel command ++ line parameter vsyscall=[emulate|none]. ++ ++ On a system with recent enough glibc (2.14 or newer) and no ++ static binaries, you can say None without a performance penalty ++ to improve security. ++ ++ If unsure, select "Emulate". ++ ++ config LEGACY_VSYSCALL_EMULATE ++ bool "Emulate" ++ help ++ The kernel traps and emulates calls into the fixed ++ vsyscall address mapping. This makes the mapping ++ non-executable, but it still contains known contents, ++ which could be used in certain rare security vulnerability ++ exploits. This configuration is recommended when userspace ++ still uses the vsyscall area. ++ ++ config LEGACY_VSYSCALL_NONE ++ bool "None" ++ help ++ There will be no vsyscall mapping at all. This will ++ eliminate any risk of ASLR bypass due to the vsyscall ++ fixed address mapping. Attempts to use the vsyscalls ++ will be reported to dmesg, so that either old or ++ malicious userspace programs can be identified. ++ ++endchoice ++ ++config CMDLINE_BOOL ++ bool "Built-in kernel command line" ++ ---help--- ++ Allow for specifying boot arguments to the kernel at ++ build time. On some systems (e.g. embedded ones), it is ++ necessary or convenient to provide some or all of the ++ kernel boot arguments with the kernel itself (that is, ++ to not rely on the boot loader to provide them.) ++ ++ To compile command line arguments into the kernel, ++ set this option to 'Y', then fill in the ++ boot arguments in CONFIG_CMDLINE. ++ ++ Systems with fully functional boot loaders (i.e. non-embedded) ++ should leave this option set to 'N'. ++ ++config CMDLINE ++ string "Built-in kernel command string" ++ depends on CMDLINE_BOOL ++ default "" ++ ---help--- ++ Enter arguments here that should be compiled into the kernel ++ image and used at boot time. If the boot loader provides a ++ command line at boot time, it is appended to this string to ++ form the full kernel command line, when the system boots. ++ ++ However, you can use the CONFIG_CMDLINE_OVERRIDE option to ++ change this behavior. ++ ++ In most cases, the command line (whether built-in or provided ++ by the boot loader) should specify the device for the root ++ file system. ++ ++config CMDLINE_OVERRIDE ++ bool "Built-in command line overrides boot loader arguments" ++ depends on CMDLINE_BOOL ++ ---help--- ++ Set this option to 'Y' to have the kernel ignore the boot loader ++ command line, and use ONLY the built-in command line. ++ ++ This is used to work around broken boot loaders. This should ++ be set to 'N' under normal conditions. ++ ++config MODIFY_LDT_SYSCALL ++ bool "Enable the LDT (local descriptor table)" if EXPERT ++ default y ++ ---help--- ++ Linux can allow user programs to install a per-process x86 ++ Local Descriptor Table (LDT) using the modify_ldt(2) system ++ call. This is required to run 16-bit or segmented code such as ++ DOSEMU or some Wine programs. It is also used by some very old ++ threading libraries. ++ ++ Enabling this feature adds a small amount of overhead to ++ context switches and increases the low-level kernel attack ++ surface. Disabling it removes the modify_ldt(2) system call. ++ ++ Saying 'N' here may make sense for embedded or server kernels. ++ ++source "kernel/livepatch/Kconfig" ++ ++endmenu ++ ++config ARCH_HAS_ADD_PAGES ++ def_bool y ++ depends on X86_64 && ARCH_ENABLE_MEMORY_HOTPLUG ++ ++config ARCH_ENABLE_MEMORY_HOTPLUG ++ def_bool y ++ depends on X86_64 || (X86_32 && HIGHMEM) ++ ++config ARCH_ENABLE_MEMORY_HOTREMOVE ++ def_bool y ++ depends on MEMORY_HOTPLUG ++ ++config USE_PERCPU_NUMA_NODE_ID ++ def_bool y ++ depends on NUMA ++ ++config ARCH_ENABLE_SPLIT_PMD_PTLOCK ++ def_bool y ++ depends on X86_64 || X86_PAE ++ ++config ARCH_ENABLE_HUGEPAGE_MIGRATION ++ def_bool y ++ depends on X86_64 && HUGETLB_PAGE && MIGRATION ++ ++config ARCH_ENABLE_THP_MIGRATION ++ def_bool y ++ depends on X86_64 && TRANSPARENT_HUGEPAGE ++ ++menu "Power management and ACPI options" ++ ++config ARCH_HIBERNATION_HEADER ++ def_bool y ++ depends on X86_64 && HIBERNATION ++ ++source "kernel/power/Kconfig" ++ ++source "drivers/acpi/Kconfig" ++ ++source "drivers/sfi/Kconfig" ++ ++config X86_APM_BOOT ++ def_bool y ++ depends on APM ++ ++menuconfig APM ++ tristate "APM (Advanced Power Management) BIOS support" ++ depends on X86_32 && PM_SLEEP ++ ---help--- ++ APM is a BIOS specification for saving power using several different ++ techniques. This is mostly useful for battery powered laptops with ++ APM compliant BIOSes. If you say Y here, the system time will be ++ reset after a RESUME operation, the /proc/apm device will provide ++ battery status information, and user-space programs will receive ++ notification of APM "events" (e.g. battery status change). ++ ++ If you select "Y" here, you can disable actual use of the APM ++ BIOS by passing the "apm=off" option to the kernel at boot time. ++ ++ Note that the APM support is almost completely disabled for ++ machines with more than one CPU. ++ ++ In order to use APM, you will need supporting software. For location ++ and more information, read ++ and the Battery Powered Linux mini-HOWTO, available from ++ . ++ ++ This driver does not spin down disk drives (see the hdparm(8) ++ manpage ("man 8 hdparm") for that), and it doesn't turn off ++ VESA-compliant "green" monitors. ++ ++ This driver does not support the TI 4000M TravelMate and the ACER ++ 486/DX4/75 because they don't have compliant BIOSes. Many "green" ++ desktop machines also don't have compliant BIOSes, and this driver ++ may cause those machines to panic during the boot phase. ++ ++ Generally, if you don't have a battery in your machine, there isn't ++ much point in using this driver and you should say N. If you get ++ random kernel OOPSes or reboots that don't seem to be related to ++ anything, try disabling/enabling this option (or disabling/enabling ++ APM in your BIOS). ++ ++ Some other things you should try when experiencing seemingly random, ++ "weird" problems: ++ ++ 1) make sure that you have enough swap space and that it is ++ enabled. ++ 2) pass the "no-hlt" option to the kernel ++ 3) switch on floating point emulation in the kernel and pass ++ the "no387" option to the kernel ++ 4) pass the "floppy=nodma" option to the kernel ++ 5) pass the "mem=4M" option to the kernel (thereby disabling ++ all but the first 4 MB of RAM) ++ 6) make sure that the CPU is not over clocked. ++ 7) read the sig11 FAQ at ++ 8) disable the cache from your BIOS settings ++ 9) install a fan for the video card or exchange video RAM ++ 10) install a better fan for the CPU ++ 11) exchange RAM chips ++ 12) exchange the motherboard. ++ ++ To compile this driver as a module, choose M here: the ++ module will be called apm. ++ ++if APM ++ ++config APM_IGNORE_USER_SUSPEND ++ bool "Ignore USER SUSPEND" ++ ---help--- ++ This option will ignore USER SUSPEND requests. On machines with a ++ compliant APM BIOS, you want to say N. However, on the NEC Versa M ++ series notebooks, it is necessary to say Y because of a BIOS bug. ++ ++config APM_DO_ENABLE ++ bool "Enable PM at boot time" ++ ---help--- ++ Enable APM features at boot time. From page 36 of the APM BIOS ++ specification: "When disabled, the APM BIOS does not automatically ++ power manage devices, enter the Standby State, enter the Suspend ++ State, or take power saving steps in response to CPU Idle calls." ++ This driver will make CPU Idle calls when Linux is idle (unless this ++ feature is turned off -- see "Do CPU IDLE calls", below). This ++ should always save battery power, but more complicated APM features ++ will be dependent on your BIOS implementation. You may need to turn ++ this option off if your computer hangs at boot time when using APM ++ support, or if it beeps continuously instead of suspending. Turn ++ this off if you have a NEC UltraLite Versa 33/C or a Toshiba ++ T400CDT. This is off by default since most machines do fine without ++ this feature. ++ ++config APM_CPU_IDLE ++ depends on CPU_IDLE ++ bool "Make CPU Idle calls when idle" ++ ---help--- ++ Enable calls to APM CPU Idle/CPU Busy inside the kernel's idle loop. ++ On some machines, this can activate improved power savings, such as ++ a slowed CPU clock rate, when the machine is idle. These idle calls ++ are made after the idle loop has run for some length of time (e.g., ++ 333 mS). On some machines, this will cause a hang at boot time or ++ whenever the CPU becomes idle. (On machines with more than one CPU, ++ this option does nothing.) ++ ++config APM_DISPLAY_BLANK ++ bool "Enable console blanking using APM" ++ ---help--- ++ Enable console blanking using the APM. Some laptops can use this to ++ turn off the LCD backlight when the screen blanker of the Linux ++ virtual console blanks the screen. Note that this is only used by ++ the virtual console screen blanker, and won't turn off the backlight ++ when using the X Window system. This also doesn't have anything to ++ do with your VESA-compliant power-saving monitor. Further, this ++ option doesn't work for all laptops -- it might not turn off your ++ backlight at all, or it might print a lot of errors to the console, ++ especially if you are using gpm. ++ ++config APM_ALLOW_INTS ++ bool "Allow interrupts during APM BIOS calls" ++ ---help--- ++ Normally we disable external interrupts while we are making calls to ++ the APM BIOS as a measure to lessen the effects of a badly behaving ++ BIOS implementation. The BIOS should reenable interrupts if it ++ needs to. Unfortunately, some BIOSes do not -- especially those in ++ many of the newer IBM Thinkpads. If you experience hangs when you ++ suspend, try setting this to Y. Otherwise, say N. ++ ++endif # APM ++ ++source "drivers/cpufreq/Kconfig" ++ ++source "drivers/cpuidle/Kconfig" ++ ++source "drivers/idle/Kconfig" ++ ++endmenu ++ ++ ++menu "Bus options (PCI etc.)" ++ ++config PCI ++ bool "PCI support" ++ default y ++ ---help--- ++ Find out whether you have a PCI motherboard. PCI is the name of a ++ bus system, i.e. the way the CPU talks to the other stuff inside ++ your box. Other bus systems are ISA, EISA, MicroChannel (MCA) or ++ VESA. If you have PCI, say Y, otherwise N. ++ ++choice ++ prompt "PCI access mode" ++ depends on X86_32 && PCI ++ default PCI_GOANY ++ ---help--- ++ On PCI systems, the BIOS can be used to detect the PCI devices and ++ determine their configuration. However, some old PCI motherboards ++ have BIOS bugs and may crash if this is done. Also, some embedded ++ PCI-based systems don't have any BIOS at all. Linux can also try to ++ detect the PCI hardware directly without using the BIOS. ++ ++ With this option, you can specify how Linux should detect the ++ PCI devices. If you choose "BIOS", the BIOS will be used, ++ if you choose "Direct", the BIOS won't be used, and if you ++ choose "MMConfig", then PCI Express MMCONFIG will be used. ++ If you choose "Any", the kernel will try MMCONFIG, then the ++ direct access method and falls back to the BIOS if that doesn't ++ work. If unsure, go with the default, which is "Any". ++ ++config PCI_GOBIOS ++ bool "BIOS" ++ ++config PCI_GOMMCONFIG ++ bool "MMConfig" ++ ++config PCI_GODIRECT ++ bool "Direct" ++ ++config PCI_GOOLPC ++ bool "OLPC XO-1" ++ depends on OLPC ++ ++config PCI_GOANY ++ bool "Any" ++ ++endchoice ++ ++config PCI_BIOS ++ def_bool y ++ depends on X86_32 && PCI && (PCI_GOBIOS || PCI_GOANY) ++ ++# x86-64 doesn't support PCI BIOS access from long mode so always go direct. ++config PCI_DIRECT ++ def_bool y ++ depends on PCI && (X86_64 || (PCI_GODIRECT || PCI_GOANY || PCI_GOOLPC || PCI_GOMMCONFIG)) ++ ++config PCI_MMCONFIG ++ bool "Support mmconfig PCI config space access" if X86_64 ++ default y ++ depends on PCI && (ACPI || SFI || JAILHOUSE_GUEST) ++ depends on X86_64 || (PCI_GOANY || PCI_GOMMCONFIG) ++ ++config PCI_OLPC ++ def_bool y ++ depends on PCI && OLPC && (PCI_GOOLPC || PCI_GOANY) ++ ++config PCI_XEN ++ def_bool y ++ depends on PCI && XEN ++ select SWIOTLB_XEN ++ ++config PCI_DOMAINS ++ def_bool y ++ depends on PCI ++ ++config MMCONF_FAM10H ++ def_bool y ++ depends on X86_64 && PCI_MMCONFIG && ACPI ++ ++config PCI_CNB20LE_QUIRK ++ bool "Read CNB20LE Host Bridge Windows" if EXPERT ++ depends on PCI ++ help ++ Read the PCI windows out of the CNB20LE host bridge. This allows ++ PCI hotplug to work on systems with the CNB20LE chipset which do ++ not have ACPI. ++ ++ There's no public spec for this chipset, and this functionality ++ is known to be incomplete. ++ ++ You should say N unless you know you need this. ++ ++source "drivers/pci/Kconfig" ++ ++config ISA_BUS ++ bool "ISA bus support on modern systems" if EXPERT ++ help ++ Expose ISA bus device drivers and options available for selection and ++ configuration. Enable this option if your target machine has an ISA ++ bus. ISA is an older system, displaced by PCI and newer bus ++ architectures -- if your target machine is modern, it probably does ++ not have an ISA bus. ++ ++ If unsure, say N. ++ ++# x86_64 have no ISA slots, but can have ISA-style DMA. ++config ISA_DMA_API ++ bool "ISA-style DMA support" if (X86_64 && EXPERT) ++ default y ++ help ++ Enables ISA-style DMA support for devices requiring such controllers. ++ If unsure, say Y. ++ ++if X86_32 ++ ++config ISA ++ bool "ISA support" ++ ---help--- ++ Find out whether you have ISA slots on your motherboard. ISA is the ++ name of a bus system, i.e. the way the CPU talks to the other stuff ++ inside your box. Other bus systems are PCI, EISA, MicroChannel ++ (MCA) or VESA. ISA is an older system, now being displaced by PCI; ++ newer boards don't support it. If you have ISA, say Y, otherwise N. ++ ++config EISA ++ bool "EISA support" ++ depends on ISA ++ ---help--- ++ The Extended Industry Standard Architecture (EISA) bus was ++ developed as an open alternative to the IBM MicroChannel bus. ++ ++ The EISA bus provided some of the features of the IBM MicroChannel ++ bus while maintaining backward compatibility with cards made for ++ the older ISA bus. The EISA bus saw limited use between 1988 and ++ 1995 when it was made obsolete by the PCI bus. ++ ++ Say Y here if you are building a kernel for an EISA-based machine. ++ ++ Otherwise, say N. ++ ++source "drivers/eisa/Kconfig" ++ ++config SCx200 ++ tristate "NatSemi SCx200 support" ++ ---help--- ++ This provides basic support for National Semiconductor's ++ (now AMD's) Geode processors. The driver probes for the ++ PCI-IDs of several on-chip devices, so its a good dependency ++ for other scx200_* drivers. ++ ++ If compiled as a module, the driver is named scx200. ++ ++config SCx200HR_TIMER ++ tristate "NatSemi SCx200 27MHz High-Resolution Timer Support" ++ depends on SCx200 ++ default y ++ ---help--- ++ This driver provides a clocksource built upon the on-chip ++ 27MHz high-resolution timer. Its also a workaround for ++ NSC Geode SC-1100's buggy TSC, which loses time when the ++ processor goes idle (as is done by the scheduler). The ++ other workaround is idle=poll boot option. ++ ++config OLPC ++ bool "One Laptop Per Child support" ++ depends on !X86_PAE ++ select GPIOLIB ++ select OF ++ select OF_PROMTREE ++ select IRQ_DOMAIN ++ ---help--- ++ Add support for detecting the unique features of the OLPC ++ XO hardware. ++ ++config OLPC_XO1_PM ++ bool "OLPC XO-1 Power Management" ++ depends on OLPC && MFD_CS5535=y && PM_SLEEP ++ ---help--- ++ Add support for poweroff and suspend of the OLPC XO-1 laptop. ++ ++config OLPC_XO1_RTC ++ bool "OLPC XO-1 Real Time Clock" ++ depends on OLPC_XO1_PM && RTC_DRV_CMOS ++ ---help--- ++ Add support for the XO-1 real time clock, which can be used as a ++ programmable wakeup source. ++ ++config OLPC_XO1_SCI ++ bool "OLPC XO-1 SCI extras" ++ depends on OLPC && OLPC_XO1_PM && GPIO_CS5535=y ++ depends on INPUT=y ++ select POWER_SUPPLY ++ ---help--- ++ Add support for SCI-based features of the OLPC XO-1 laptop: ++ - EC-driven system wakeups ++ - Power button ++ - Ebook switch ++ - Lid switch ++ - AC adapter status updates ++ - Battery status updates ++ ++config OLPC_XO15_SCI ++ bool "OLPC XO-1.5 SCI extras" ++ depends on OLPC && ACPI ++ select POWER_SUPPLY ++ ---help--- ++ Add support for SCI-based features of the OLPC XO-1.5 laptop: ++ - EC-driven system wakeups ++ - AC adapter status updates ++ - Battery status updates ++ ++config ALIX ++ bool "PCEngines ALIX System Support (LED setup)" ++ select GPIOLIB ++ ---help--- ++ This option enables system support for the PCEngines ALIX. ++ At present this just sets up LEDs for GPIO control on ++ ALIX2/3/6 boards. However, other system specific setup should ++ get added here. ++ ++ Note: You must still enable the drivers for GPIO and LED support ++ (GPIO_CS5535 & LEDS_GPIO) to actually use the LEDs ++ ++ Note: You have to set alix.force=1 for boards with Award BIOS. ++ ++config NET5501 ++ bool "Soekris Engineering net5501 System Support (LEDS, GPIO, etc)" ++ select GPIOLIB ++ ---help--- ++ This option enables system support for the Soekris Engineering net5501. ++ ++config GEOS ++ bool "Traverse Technologies GEOS System Support (LEDS, GPIO, etc)" ++ select GPIOLIB ++ depends on DMI ++ ---help--- ++ This option enables system support for the Traverse Technologies GEOS. ++ ++config TS5500 ++ bool "Technologic Systems TS-5500 platform support" ++ depends on MELAN ++ select CHECK_SIGNATURE ++ select NEW_LEDS ++ select LEDS_CLASS ++ ---help--- ++ This option enables system support for the Technologic Systems TS-5500. ++ ++endif # X86_32 ++ ++config AMD_NB ++ def_bool y ++ depends on CPU_SUP_AMD && PCI ++ ++source "drivers/pcmcia/Kconfig" ++ ++config RAPIDIO ++ tristate "RapidIO support" ++ depends on PCI ++ default n ++ help ++ If enabled this option will include drivers and the core ++ infrastructure code to support RapidIO interconnect devices. ++ ++source "drivers/rapidio/Kconfig" ++ ++config X86_SYSFB ++ bool "Mark VGA/VBE/EFI FB as generic system framebuffer" ++ help ++ Firmwares often provide initial graphics framebuffers so the BIOS, ++ bootloader or kernel can show basic video-output during boot for ++ user-guidance and debugging. Historically, x86 used the VESA BIOS ++ Extensions and EFI-framebuffers for this, which are mostly limited ++ to x86. ++ This option, if enabled, marks VGA/VBE/EFI framebuffers as generic ++ framebuffers so the new generic system-framebuffer drivers can be ++ used on x86. If the framebuffer is not compatible with the generic ++ modes, it is advertised as fallback platform framebuffer so legacy ++ drivers like efifb, vesafb and uvesafb can pick it up. ++ If this option is not selected, all system framebuffers are always ++ marked as fallback platform framebuffers as usual. ++ ++ Note: Legacy fbdev drivers, including vesafb, efifb, uvesafb, will ++ not be able to pick up generic system framebuffers if this option ++ is selected. You are highly encouraged to enable simplefb as ++ replacement if you select this option. simplefb can correctly deal ++ with generic system framebuffers. But you should still keep vesafb ++ and others enabled as fallback if a system framebuffer is ++ incompatible with simplefb. ++ ++ If unsure, say Y. ++ ++endmenu ++ ++ ++menu "Binary Emulations" ++ ++config IA32_EMULATION ++ bool "IA32 Emulation" ++ depends on X86_64 ++ select ARCH_WANT_OLD_COMPAT_IPC ++ select BINFMT_ELF ++ select COMPAT_BINFMT_ELF ++ select COMPAT_OLD_SIGACTION ++ ---help--- ++ Include code to run legacy 32-bit programs under a ++ 64-bit kernel. You should likely turn this on, unless you're ++ 100% sure that you don't have any 32-bit programs left. ++ ++config IA32_AOUT ++ tristate "IA32 a.out support" ++ depends on IA32_EMULATION ++ depends on BROKEN ++ ---help--- ++ Support old a.out binaries in the 32bit emulation. ++ ++config X86_X32 ++ bool "x32 ABI for 64-bit mode" ++ depends on X86_64 ++ ---help--- ++ Include code to run binaries for the x32 native 32-bit ABI ++ for 64-bit processors. An x32 process gets access to the ++ full 64-bit register file and wide data path while leaving ++ pointers at 32 bits for smaller memory footprint. ++ ++ You will need a recent binutils (2.22 or later) with ++ elf32_x86_64 support enabled to compile a kernel with this ++ option set. ++ ++config COMPAT_32 ++ def_bool y ++ depends on IA32_EMULATION || X86_32 ++ select HAVE_UID16 ++ select OLD_SIGSUSPEND3 ++ ++config COMPAT ++ def_bool y ++ depends on IA32_EMULATION || X86_X32 ++ ++if COMPAT ++config COMPAT_FOR_U64_ALIGNMENT ++ def_bool y ++ ++config SYSVIPC_COMPAT ++ def_bool y ++ depends on SYSVIPC ++endif ++ ++endmenu ++ ++ ++config HAVE_ATOMIC_IOMAP ++ def_bool y ++ depends on X86_32 ++ ++config X86_DEV_DMA_OPS ++ bool ++ depends on X86_64 || STA2X11 ++ ++config X86_DMA_REMAP ++ bool ++ depends on STA2X11 ++ ++config HAVE_GENERIC_GUP ++ def_bool y ++ ++source "drivers/firmware/Kconfig" ++ ++source "arch/x86/kvm/Kconfig" +diff -uprN kernel/arch/x86/kernel/apic/apic.c kernel_new/arch/x86/kernel/apic/apic.c +--- kernel/arch/x86/kernel/apic/apic.c 2020-12-21 21:59:17.000000000 +0800 ++++ kernel_new/arch/x86/kernel/apic/apic.c 2021-04-01 18:28:07.653863289 +0800 +@@ -34,6 +34,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -269,10 +270,10 @@ void native_apic_icr_write(u32 low, u32 + { + unsigned long flags; + +- local_irq_save(flags); ++ flags = hard_local_irq_save(); + apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id)); + apic_write(APIC_ICR, low); +- local_irq_restore(flags); ++ hard_local_irq_restore(flags); + } + + u64 native_apic_icr_read(void) +@@ -479,16 +480,20 @@ static int lapic_next_deadline(unsigned + + static int lapic_timer_shutdown(struct clock_event_device *evt) + { ++ unsigned long flags; + unsigned int v; + + /* Lapic used as dummy for broadcast ? */ + if (evt->features & CLOCK_EVT_FEAT_DUMMY) + return 0; + ++ flags = hard_local_irq_save(); + v = apic_read(APIC_LVTT); + v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); + apic_write(APIC_LVTT, v); + apic_write(APIC_TMICT, 0); ++ hard_local_irq_restore(flags); ++ + return 0; + } + +@@ -523,6 +528,17 @@ static void lapic_timer_broadcast(const + #endif + } + ++#ifdef CONFIG_IPIPE ++static void lapic_itimer_ack(void) ++{ ++ __ack_APIC_irq(); ++} ++ ++static DEFINE_PER_CPU(struct ipipe_timer, lapic_itimer) = { ++ .irq = ipipe_apic_vector_irq(LOCAL_TIMER_VECTOR), ++ .ack = lapic_itimer_ack, ++}; ++#endif /* CONFIG_IPIPE */ + + /* + * The local apic timer can be used for any function which is CPU local. +@@ -653,6 +669,16 @@ static void setup_APIC_timer(void) + + memcpy(levt, &lapic_clockevent, sizeof(*levt)); + levt->cpumask = cpumask_of(smp_processor_id()); ++#ifdef CONFIG_IPIPE ++ if (!(lapic_clockevent.features & CLOCK_EVT_FEAT_DUMMY)) ++ levt->ipipe_timer = this_cpu_ptr(&lapic_itimer); ++ else { ++ static atomic_t once = ATOMIC_INIT(-1); ++ if (atomic_inc_and_test(&once)) ++ printk(KERN_INFO ++ "I-pipe: cannot use LAPIC as a tick device\n"); ++ } ++#endif /* CONFIG_IPIPE */ + + if (this_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) { + levt->name = "lapic-deadline"; +@@ -1239,7 +1265,7 @@ void lapic_shutdown(void) + if (!boot_cpu_has(X86_FEATURE_APIC) && !apic_from_smp_config()) + return; + +- local_irq_save(flags); ++ flags = hard_local_irq_save(); + + #ifdef CONFIG_X86_32 + if (!enabled_via_apicbase) +@@ -1249,7 +1275,7 @@ void lapic_shutdown(void) + disable_local_APIC(); + + +- local_irq_restore(flags); ++ hard_local_irq_restore(flags); + } + + /** +@@ -1493,7 +1519,7 @@ static bool apic_check_and_ack(union api + * per set bit. + */ + for_each_set_bit(bit, isr->map, APIC_IR_BITS) +- ack_APIC_irq(); ++ __ack_APIC_irq(); + return true; + } + +@@ -2126,7 +2152,7 @@ __visible void __irq_entry smp_spurious_ + if (v & (1 << (vector & 0x1f))) { + pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Acked\n", + vector, smp_processor_id()); +- ack_APIC_irq(); ++ __ack_APIC_irq(); + } else { + pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Not pending!\n", + vector, smp_processor_id()); +@@ -2581,12 +2607,12 @@ static int lapic_suspend(void) + apic_pm_state.apic_cmci = apic_read(APIC_LVTCMCI); + #endif + +- local_irq_save(flags); ++ flags = hard_local_irq_save(); + disable_local_APIC(); + + irq_remapping_disable(); + +- local_irq_restore(flags); ++ hard_local_irq_restore(flags); + return 0; + } + +@@ -2599,7 +2625,7 @@ static void lapic_resume(void) + if (!apic_pm_state.active) + return; + +- local_irq_save(flags); ++ flags = hard_local_irq_save(); + + /* + * IO-APIC and PIC have their own resume routines. +@@ -2657,7 +2683,7 @@ static void lapic_resume(void) + + irq_remapping_reenable(x2apic_mode); + +- local_irq_restore(flags); ++ hard_local_irq_restore(flags); + } + + /* +diff -uprN kernel/arch/x86/kernel/apic/apic.c.orig kernel_new/arch/x86/kernel/apic/apic.c.orig +--- kernel/arch/x86/kernel/apic/apic.c.orig 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/arch/x86/kernel/apic/apic.c.orig 2020-12-21 21:59:17.000000000 +0800 +@@ -0,0 +1,2857 @@ ++/* ++ * Local APIC handling, local APIC timers ++ * ++ * (c) 1999, 2000, 2009 Ingo Molnar ++ * ++ * Fixes ++ * Maciej W. Rozycki : Bits for genuine 82489DX APICs; ++ * thanks to Eric Gilmore ++ * and Rolf G. Tews ++ * for testing these extensively. ++ * Maciej W. Rozycki : Various updates and fixes. ++ * Mikael Pettersson : Power Management for UP-APIC. ++ * Pavel Machek and ++ * Mikael Pettersson : PM converted to driver model. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++unsigned int num_processors; ++ ++unsigned disabled_cpus; ++ ++/* Processor that is doing the boot up */ ++unsigned int boot_cpu_physical_apicid = -1U; ++EXPORT_SYMBOL_GPL(boot_cpu_physical_apicid); ++ ++u8 boot_cpu_apic_version; ++ ++/* ++ * The highest APIC ID seen during enumeration. ++ */ ++static unsigned int max_physical_apicid; ++ ++/* ++ * Bitmask of physically existing CPUs: ++ */ ++physid_mask_t phys_cpu_present_map; ++ ++/* ++ * Processor to be disabled specified by kernel parameter ++ * disable_cpu_apicid=, mostly used for the kdump 2nd kernel to ++ * avoid undefined behaviour caused by sending INIT from AP to BSP. ++ */ ++static unsigned int disabled_cpu_apicid __read_mostly = BAD_APICID; ++ ++/* ++ * This variable controls which CPUs receive external NMIs. By default, ++ * external NMIs are delivered only to the BSP. ++ */ ++static int apic_extnmi = APIC_EXTNMI_BSP; ++ ++/* ++ * Map cpu index to physical APIC ID ++ */ ++DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid, BAD_APICID); ++DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid, BAD_APICID); ++DEFINE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid, U32_MAX); ++EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); ++EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); ++EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_acpiid); ++ ++#ifdef CONFIG_X86_32 ++ ++/* ++ * On x86_32, the mapping between cpu and logical apicid may vary ++ * depending on apic in use. The following early percpu variable is ++ * used for the mapping. This is where the behaviors of x86_64 and 32 ++ * actually diverge. Let's keep it ugly for now. ++ */ ++DEFINE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid, BAD_APICID); ++ ++/* Local APIC was disabled by the BIOS and enabled by the kernel */ ++static int enabled_via_apicbase; ++ ++/* ++ * Handle interrupt mode configuration register (IMCR). ++ * This register controls whether the interrupt signals ++ * that reach the BSP come from the master PIC or from the ++ * local APIC. Before entering Symmetric I/O Mode, either ++ * the BIOS or the operating system must switch out of ++ * PIC Mode by changing the IMCR. ++ */ ++static inline void imcr_pic_to_apic(void) ++{ ++ /* select IMCR register */ ++ outb(0x70, 0x22); ++ /* NMI and 8259 INTR go through APIC */ ++ outb(0x01, 0x23); ++} ++ ++static inline void imcr_apic_to_pic(void) ++{ ++ /* select IMCR register */ ++ outb(0x70, 0x22); ++ /* NMI and 8259 INTR go directly to BSP */ ++ outb(0x00, 0x23); ++} ++#endif ++ ++/* ++ * Knob to control our willingness to enable the local APIC. ++ * ++ * +1=force-enable ++ */ ++static int force_enable_local_apic __initdata; ++ ++/* ++ * APIC command line parameters ++ */ ++static int __init parse_lapic(char *arg) ++{ ++ if (IS_ENABLED(CONFIG_X86_32) && !arg) ++ force_enable_local_apic = 1; ++ else if (arg && !strncmp(arg, "notscdeadline", 13)) ++ setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER); ++ return 0; ++} ++early_param("lapic", parse_lapic); ++ ++#ifdef CONFIG_X86_64 ++static int apic_calibrate_pmtmr __initdata; ++static __init int setup_apicpmtimer(char *s) ++{ ++ apic_calibrate_pmtmr = 1; ++ notsc_setup(NULL); ++ return 0; ++} ++__setup("apicpmtimer", setup_apicpmtimer); ++#endif ++ ++unsigned long mp_lapic_addr; ++int disable_apic; ++/* Disable local APIC timer from the kernel commandline or via dmi quirk */ ++static int disable_apic_timer __initdata; ++/* Local APIC timer works in C2 */ ++int local_apic_timer_c2_ok; ++EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); ++ ++/* ++ * Debug level, exported for io_apic.c ++ */ ++int apic_verbosity; ++ ++int pic_mode; ++ ++/* Have we found an MP table */ ++int smp_found_config; ++ ++static struct resource lapic_resource = { ++ .name = "Local APIC", ++ .flags = IORESOURCE_MEM | IORESOURCE_BUSY, ++}; ++ ++unsigned int lapic_timer_frequency = 0; ++ ++static void apic_pm_activate(void); ++ ++static unsigned long apic_phys; ++ ++/* ++ * Get the LAPIC version ++ */ ++static inline int lapic_get_version(void) ++{ ++ return GET_APIC_VERSION(apic_read(APIC_LVR)); ++} ++ ++/* ++ * Check, if the APIC is integrated or a separate chip ++ */ ++static inline int lapic_is_integrated(void) ++{ ++ return APIC_INTEGRATED(lapic_get_version()); ++} ++ ++/* ++ * Check, whether this is a modern or a first generation APIC ++ */ ++static int modern_apic(void) ++{ ++ /* AMD systems use old APIC versions, so check the CPU */ ++ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && ++ boot_cpu_data.x86 >= 0xf) ++ return 1; ++ ++ /* Hygon systems use modern APIC */ ++ if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) ++ return 1; ++ ++ return lapic_get_version() >= 0x14; ++} ++ ++/* ++ * right after this call apic become NOOP driven ++ * so apic->write/read doesn't do anything ++ */ ++static void __init apic_disable(void) ++{ ++ pr_info("APIC: switched to apic NOOP\n"); ++ apic = &apic_noop; ++} ++ ++void native_apic_wait_icr_idle(void) ++{ ++ while (apic_read(APIC_ICR) & APIC_ICR_BUSY) ++ cpu_relax(); ++} ++ ++u32 native_safe_apic_wait_icr_idle(void) ++{ ++ u32 send_status; ++ int timeout; ++ ++ timeout = 0; ++ do { ++ send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; ++ if (!send_status) ++ break; ++ inc_irq_stat(icr_read_retry_count); ++ udelay(100); ++ } while (timeout++ < 1000); ++ ++ return send_status; ++} ++ ++void native_apic_icr_write(u32 low, u32 id) ++{ ++ unsigned long flags; ++ ++ local_irq_save(flags); ++ apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id)); ++ apic_write(APIC_ICR, low); ++ local_irq_restore(flags); ++} ++ ++u64 native_apic_icr_read(void) ++{ ++ u32 icr1, icr2; ++ ++ icr2 = apic_read(APIC_ICR2); ++ icr1 = apic_read(APIC_ICR); ++ ++ return icr1 | ((u64)icr2 << 32); ++} ++ ++#ifdef CONFIG_X86_32 ++/** ++ * get_physical_broadcast - Get number of physical broadcast IDs ++ */ ++int get_physical_broadcast(void) ++{ ++ return modern_apic() ? 0xff : 0xf; ++} ++#endif ++ ++/** ++ * lapic_get_maxlvt - get the maximum number of local vector table entries ++ */ ++int lapic_get_maxlvt(void) ++{ ++ /* ++ * - we always have APIC integrated on 64bit mode ++ * - 82489DXs do not report # of LVT entries ++ */ ++ return lapic_is_integrated() ? GET_APIC_MAXLVT(apic_read(APIC_LVR)) : 2; ++} ++ ++/* ++ * Local APIC timer ++ */ ++ ++/* Clock divisor */ ++#define APIC_DIVISOR 16 ++#define TSC_DIVISOR 8 ++ ++/* ++ * This function sets up the local APIC timer, with a timeout of ++ * 'clocks' APIC bus clock. During calibration we actually call ++ * this function twice on the boot CPU, once with a bogus timeout ++ * value, second time for real. The other (noncalibrating) CPUs ++ * call this function only once, with the real, calibrated value. ++ * ++ * We do reads before writes even if unnecessary, to get around the ++ * P5 APIC double write bug. ++ */ ++static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) ++{ ++ unsigned int lvtt_value, tmp_value; ++ ++ lvtt_value = LOCAL_TIMER_VECTOR; ++ if (!oneshot) ++ lvtt_value |= APIC_LVT_TIMER_PERIODIC; ++ else if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) ++ lvtt_value |= APIC_LVT_TIMER_TSCDEADLINE; ++ ++ if (!lapic_is_integrated()) ++ lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV); ++ ++ if (!irqen) ++ lvtt_value |= APIC_LVT_MASKED; ++ ++ apic_write(APIC_LVTT, lvtt_value); ++ ++ if (lvtt_value & APIC_LVT_TIMER_TSCDEADLINE) { ++ /* ++ * See Intel SDM: TSC-Deadline Mode chapter. In xAPIC mode, ++ * writing to the APIC LVTT and TSC_DEADLINE MSR isn't serialized. ++ * According to Intel, MFENCE can do the serialization here. ++ */ ++ asm volatile("mfence" : : : "memory"); ++ ++ printk_once(KERN_DEBUG "TSC deadline timer enabled\n"); ++ return; ++ } ++ ++ /* ++ * Divide PICLK by 16 ++ */ ++ tmp_value = apic_read(APIC_TDCR); ++ apic_write(APIC_TDCR, ++ (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) | ++ APIC_TDR_DIV_16); ++ ++ if (!oneshot) ++ apic_write(APIC_TMICT, clocks / APIC_DIVISOR); ++} ++ ++/* ++ * Setup extended LVT, AMD specific ++ * ++ * Software should use the LVT offsets the BIOS provides. The offsets ++ * are determined by the subsystems using it like those for MCE ++ * threshold or IBS. On K8 only offset 0 (APIC500) and MCE interrupts ++ * are supported. Beginning with family 10h at least 4 offsets are ++ * available. ++ * ++ * Since the offsets must be consistent for all cores, we keep track ++ * of the LVT offsets in software and reserve the offset for the same ++ * vector also to be used on other cores. An offset is freed by ++ * setting the entry to APIC_EILVT_MASKED. ++ * ++ * If the BIOS is right, there should be no conflicts. Otherwise a ++ * "[Firmware Bug]: ..." error message is generated. However, if ++ * software does not properly determines the offsets, it is not ++ * necessarily a BIOS bug. ++ */ ++ ++static atomic_t eilvt_offsets[APIC_EILVT_NR_MAX]; ++ ++static inline int eilvt_entry_is_changeable(unsigned int old, unsigned int new) ++{ ++ return (old & APIC_EILVT_MASKED) ++ || (new == APIC_EILVT_MASKED) ++ || ((new & ~APIC_EILVT_MASKED) == old); ++} ++ ++static unsigned int reserve_eilvt_offset(int offset, unsigned int new) ++{ ++ unsigned int rsvd, vector; ++ ++ if (offset >= APIC_EILVT_NR_MAX) ++ return ~0; ++ ++ rsvd = atomic_read(&eilvt_offsets[offset]); ++ do { ++ vector = rsvd & ~APIC_EILVT_MASKED; /* 0: unassigned */ ++ if (vector && !eilvt_entry_is_changeable(vector, new)) ++ /* may not change if vectors are different */ ++ return rsvd; ++ rsvd = atomic_cmpxchg(&eilvt_offsets[offset], rsvd, new); ++ } while (rsvd != new); ++ ++ rsvd &= ~APIC_EILVT_MASKED; ++ if (rsvd && rsvd != vector) ++ pr_info("LVT offset %d assigned for vector 0x%02x\n", ++ offset, rsvd); ++ ++ return new; ++} ++ ++/* ++ * If mask=1, the LVT entry does not generate interrupts while mask=0 ++ * enables the vector. See also the BKDGs. Must be called with ++ * preemption disabled. ++ */ ++ ++int setup_APIC_eilvt(u8 offset, u8 vector, u8 msg_type, u8 mask) ++{ ++ unsigned long reg = APIC_EILVTn(offset); ++ unsigned int new, old, reserved; ++ ++ new = (mask << 16) | (msg_type << 8) | vector; ++ old = apic_read(reg); ++ reserved = reserve_eilvt_offset(offset, new); ++ ++ if (reserved != new) { ++ pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for " ++ "vector 0x%x, but the register is already in use for " ++ "vector 0x%x on another cpu\n", ++ smp_processor_id(), reg, offset, new, reserved); ++ return -EINVAL; ++ } ++ ++ if (!eilvt_entry_is_changeable(old, new)) { ++ pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for " ++ "vector 0x%x, but the register is already in use for " ++ "vector 0x%x on this cpu\n", ++ smp_processor_id(), reg, offset, new, old); ++ return -EBUSY; ++ } ++ ++ apic_write(reg, new); ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(setup_APIC_eilvt); ++ ++/* ++ * Program the next event, relative to now ++ */ ++static int lapic_next_event(unsigned long delta, ++ struct clock_event_device *evt) ++{ ++ apic_write(APIC_TMICT, delta); ++ return 0; ++} ++ ++static int lapic_next_deadline(unsigned long delta, ++ struct clock_event_device *evt) ++{ ++ u64 tsc; ++ ++ tsc = rdtsc(); ++ wrmsrl(MSR_IA32_TSC_DEADLINE, tsc + (((u64) delta) * TSC_DIVISOR)); ++ return 0; ++} ++ ++static int lapic_timer_shutdown(struct clock_event_device *evt) ++{ ++ unsigned int v; ++ ++ /* Lapic used as dummy for broadcast ? */ ++ if (evt->features & CLOCK_EVT_FEAT_DUMMY) ++ return 0; ++ ++ v = apic_read(APIC_LVTT); ++ v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); ++ apic_write(APIC_LVTT, v); ++ apic_write(APIC_TMICT, 0); ++ return 0; ++} ++ ++static inline int ++lapic_timer_set_periodic_oneshot(struct clock_event_device *evt, bool oneshot) ++{ ++ /* Lapic used as dummy for broadcast ? */ ++ if (evt->features & CLOCK_EVT_FEAT_DUMMY) ++ return 0; ++ ++ __setup_APIC_LVTT(lapic_timer_frequency, oneshot, 1); ++ return 0; ++} ++ ++static int lapic_timer_set_periodic(struct clock_event_device *evt) ++{ ++ return lapic_timer_set_periodic_oneshot(evt, false); ++} ++ ++static int lapic_timer_set_oneshot(struct clock_event_device *evt) ++{ ++ return lapic_timer_set_periodic_oneshot(evt, true); ++} ++ ++/* ++ * Local APIC timer broadcast function ++ */ ++static void lapic_timer_broadcast(const struct cpumask *mask) ++{ ++#ifdef CONFIG_SMP ++ apic->send_IPI_mask(mask, LOCAL_TIMER_VECTOR); ++#endif ++} ++ ++ ++/* ++ * The local apic timer can be used for any function which is CPU local. ++ */ ++static struct clock_event_device lapic_clockevent = { ++ .name = "lapic", ++ .features = CLOCK_EVT_FEAT_PERIODIC | ++ CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_C3STOP ++ | CLOCK_EVT_FEAT_DUMMY, ++ .shift = 32, ++ .set_state_shutdown = lapic_timer_shutdown, ++ .set_state_periodic = lapic_timer_set_periodic, ++ .set_state_oneshot = lapic_timer_set_oneshot, ++ .set_state_oneshot_stopped = lapic_timer_shutdown, ++ .set_next_event = lapic_next_event, ++ .broadcast = lapic_timer_broadcast, ++ .rating = 100, ++ .irq = -1, ++}; ++static DEFINE_PER_CPU(struct clock_event_device, lapic_events); ++ ++#define DEADLINE_MODEL_MATCH_FUNC(model, func) \ ++ { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&func } ++ ++#define DEADLINE_MODEL_MATCH_REV(model, rev) \ ++ { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)rev } ++ ++static u32 hsx_deadline_rev(void) ++{ ++ switch (boot_cpu_data.x86_stepping) { ++ case 0x02: return 0x3a; /* EP */ ++ case 0x04: return 0x0f; /* EX */ ++ } ++ ++ return ~0U; ++} ++ ++static u32 bdx_deadline_rev(void) ++{ ++ switch (boot_cpu_data.x86_stepping) { ++ case 0x02: return 0x00000011; ++ case 0x03: return 0x0700000e; ++ case 0x04: return 0x0f00000c; ++ case 0x05: return 0x0e000003; ++ } ++ ++ return ~0U; ++} ++ ++static u32 skx_deadline_rev(void) ++{ ++ switch (boot_cpu_data.x86_stepping) { ++ case 0x03: return 0x01000136; ++ case 0x04: return 0x02000014; ++ } ++ ++ if (boot_cpu_data.x86_stepping > 4) ++ return 0; ++ ++ return ~0U; ++} ++ ++static const struct x86_cpu_id deadline_match[] = { ++ DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_HASWELL_X, hsx_deadline_rev), ++ DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL_X, 0x0b000020), ++ DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_BROADWELL_XEON_D, bdx_deadline_rev), ++ DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_SKYLAKE_X, skx_deadline_rev), ++ ++ DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_CORE, 0x22), ++ DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_ULT, 0x20), ++ DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_GT3E, 0x17), ++ ++ DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL_CORE, 0x25), ++ DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL_GT3E, 0x17), ++ ++ DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_SKYLAKE_MOBILE, 0xb2), ++ DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_SKYLAKE_DESKTOP, 0xb2), ++ ++ DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_KABYLAKE_MOBILE, 0x52), ++ DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_KABYLAKE_DESKTOP, 0x52), ++ ++ {}, ++}; ++ ++static void apic_check_deadline_errata(void) ++{ ++ const struct x86_cpu_id *m; ++ u32 rev; ++ ++ if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER) || ++ boot_cpu_has(X86_FEATURE_HYPERVISOR)) ++ return; ++ ++ m = x86_match_cpu(deadline_match); ++ if (!m) ++ return; ++ ++ /* ++ * Function pointers will have the MSB set due to address layout, ++ * immediate revisions will not. ++ */ ++ if ((long)m->driver_data < 0) ++ rev = ((u32 (*)(void))(m->driver_data))(); ++ else ++ rev = (u32)m->driver_data; ++ ++ if (boot_cpu_data.microcode >= rev) ++ return; ++ ++ setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER); ++ pr_err(FW_BUG "TSC_DEADLINE disabled due to Errata; " ++ "please update microcode to version: 0x%x (or later)\n", rev); ++} ++ ++/* ++ * Setup the local APIC timer for this CPU. Copy the initialized values ++ * of the boot CPU and register the clock event in the framework. ++ */ ++static void setup_APIC_timer(void) ++{ ++ struct clock_event_device *levt = this_cpu_ptr(&lapic_events); ++ ++ if (this_cpu_has(X86_FEATURE_ARAT)) { ++ lapic_clockevent.features &= ~CLOCK_EVT_FEAT_C3STOP; ++ /* Make LAPIC timer preferrable over percpu HPET */ ++ lapic_clockevent.rating = 150; ++ } ++ ++ memcpy(levt, &lapic_clockevent, sizeof(*levt)); ++ levt->cpumask = cpumask_of(smp_processor_id()); ++ ++ if (this_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) { ++ levt->name = "lapic-deadline"; ++ levt->features &= ~(CLOCK_EVT_FEAT_PERIODIC | ++ CLOCK_EVT_FEAT_DUMMY); ++ levt->set_next_event = lapic_next_deadline; ++ clockevents_config_and_register(levt, ++ tsc_khz * (1000 / TSC_DIVISOR), ++ 0xF, ~0UL); ++ } else ++ clockevents_register_device(levt); ++} ++ ++/* ++ * Install the updated TSC frequency from recalibration at the TSC ++ * deadline clockevent devices. ++ */ ++static void __lapic_update_tsc_freq(void *info) ++{ ++ struct clock_event_device *levt = this_cpu_ptr(&lapic_events); ++ ++ if (!this_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) ++ return; ++ ++ clockevents_update_freq(levt, tsc_khz * (1000 / TSC_DIVISOR)); ++} ++ ++void lapic_update_tsc_freq(void) ++{ ++ /* ++ * The clockevent device's ->mult and ->shift can both be ++ * changed. In order to avoid races, schedule the frequency ++ * update code on each CPU. ++ */ ++ on_each_cpu(__lapic_update_tsc_freq, NULL, 0); ++} ++ ++/* ++ * In this functions we calibrate APIC bus clocks to the external timer. ++ * ++ * We want to do the calibration only once since we want to have local timer ++ * irqs syncron. CPUs connected by the same APIC bus have the very same bus ++ * frequency. ++ * ++ * This was previously done by reading the PIT/HPET and waiting for a wrap ++ * around to find out, that a tick has elapsed. I have a box, where the PIT ++ * readout is broken, so it never gets out of the wait loop again. This was ++ * also reported by others. ++ * ++ * Monitoring the jiffies value is inaccurate and the clockevents ++ * infrastructure allows us to do a simple substitution of the interrupt ++ * handler. ++ * ++ * The calibration routine also uses the pm_timer when possible, as the PIT ++ * happens to run way too slow (factor 2.3 on my VAIO CoreDuo, which goes ++ * back to normal later in the boot process). ++ */ ++ ++#define LAPIC_CAL_LOOPS (HZ/10) ++ ++static __initdata int lapic_cal_loops = -1; ++static __initdata long lapic_cal_t1, lapic_cal_t2; ++static __initdata unsigned long long lapic_cal_tsc1, lapic_cal_tsc2; ++static __initdata unsigned long lapic_cal_pm1, lapic_cal_pm2; ++static __initdata unsigned long lapic_cal_j1, lapic_cal_j2; ++ ++/* ++ * Temporary interrupt handler and polled calibration function. ++ */ ++static void __init lapic_cal_handler(struct clock_event_device *dev) ++{ ++ unsigned long long tsc = 0; ++ long tapic = apic_read(APIC_TMCCT); ++ unsigned long pm = acpi_pm_read_early(); ++ ++ if (boot_cpu_has(X86_FEATURE_TSC)) ++ tsc = rdtsc(); ++ ++ switch (lapic_cal_loops++) { ++ case 0: ++ lapic_cal_t1 = tapic; ++ lapic_cal_tsc1 = tsc; ++ lapic_cal_pm1 = pm; ++ lapic_cal_j1 = jiffies; ++ break; ++ ++ case LAPIC_CAL_LOOPS: ++ lapic_cal_t2 = tapic; ++ lapic_cal_tsc2 = tsc; ++ if (pm < lapic_cal_pm1) ++ pm += ACPI_PM_OVRRUN; ++ lapic_cal_pm2 = pm; ++ lapic_cal_j2 = jiffies; ++ break; ++ } ++} ++ ++static int __init ++calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc) ++{ ++ const long pm_100ms = PMTMR_TICKS_PER_SEC / 10; ++ const long pm_thresh = pm_100ms / 100; ++ unsigned long mult; ++ u64 res; ++ ++#ifndef CONFIG_X86_PM_TIMER ++ return -1; ++#endif ++ ++ apic_printk(APIC_VERBOSE, "... PM-Timer delta = %ld\n", deltapm); ++ ++ /* Check, if the PM timer is available */ ++ if (!deltapm) ++ return -1; ++ ++ mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22); ++ ++ if (deltapm > (pm_100ms - pm_thresh) && ++ deltapm < (pm_100ms + pm_thresh)) { ++ apic_printk(APIC_VERBOSE, "... PM-Timer result ok\n"); ++ return 0; ++ } ++ ++ res = (((u64)deltapm) * mult) >> 22; ++ do_div(res, 1000000); ++ pr_warning("APIC calibration not consistent " ++ "with PM-Timer: %ldms instead of 100ms\n",(long)res); ++ ++ /* Correct the lapic counter value */ ++ res = (((u64)(*delta)) * pm_100ms); ++ do_div(res, deltapm); ++ pr_info("APIC delta adjusted to PM-Timer: " ++ "%lu (%ld)\n", (unsigned long)res, *delta); ++ *delta = (long)res; ++ ++ /* Correct the tsc counter value */ ++ if (boot_cpu_has(X86_FEATURE_TSC)) { ++ res = (((u64)(*deltatsc)) * pm_100ms); ++ do_div(res, deltapm); ++ apic_printk(APIC_VERBOSE, "TSC delta adjusted to " ++ "PM-Timer: %lu (%ld)\n", ++ (unsigned long)res, *deltatsc); ++ *deltatsc = (long)res; ++ } ++ ++ return 0; ++} ++ ++static int __init lapic_init_clockevent(void) ++{ ++ if (!lapic_timer_frequency) ++ return -1; ++ ++ /* Calculate the scaled math multiplication factor */ ++ lapic_clockevent.mult = div_sc(lapic_timer_frequency/APIC_DIVISOR, ++ TICK_NSEC, lapic_clockevent.shift); ++ lapic_clockevent.max_delta_ns = ++ clockevent_delta2ns(0x7FFFFFFF, &lapic_clockevent); ++ lapic_clockevent.max_delta_ticks = 0x7FFFFFFF; ++ lapic_clockevent.min_delta_ns = ++ clockevent_delta2ns(0xF, &lapic_clockevent); ++ lapic_clockevent.min_delta_ticks = 0xF; ++ ++ return 0; ++} ++ ++static int __init calibrate_APIC_clock(void) ++{ ++ struct clock_event_device *levt = this_cpu_ptr(&lapic_events); ++ u64 tsc_perj = 0, tsc_start = 0; ++ unsigned long jif_start; ++ unsigned long deltaj; ++ long delta, deltatsc; ++ int pm_referenced = 0; ++ ++ if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) ++ return 0; ++ ++ /* ++ * Check if lapic timer has already been calibrated by platform ++ * specific routine, such as tsc calibration code. If so just fill ++ * in the clockevent structure and return. ++ */ ++ if (!lapic_init_clockevent()) { ++ apic_printk(APIC_VERBOSE, "lapic timer already calibrated %d\n", ++ lapic_timer_frequency); ++ /* ++ * Direct calibration methods must have an always running ++ * local APIC timer, no need for broadcast timer. ++ */ ++ lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; ++ return 0; ++ } ++ ++ apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n" ++ "calibrating APIC timer ...\n"); ++ ++ /* ++ * There are platforms w/o global clockevent devices. Instead of ++ * making the calibration conditional on that, use a polling based ++ * approach everywhere. ++ */ ++ local_irq_disable(); ++ ++ /* ++ * Setup the APIC counter to maximum. There is no way the lapic ++ * can underflow in the 100ms detection time frame ++ */ ++ __setup_APIC_LVTT(0xffffffff, 0, 0); ++ ++ /* ++ * Methods to terminate the calibration loop: ++ * 1) Global clockevent if available (jiffies) ++ * 2) TSC if available and frequency is known ++ */ ++ jif_start = READ_ONCE(jiffies); ++ ++ if (tsc_khz) { ++ tsc_start = rdtsc(); ++ tsc_perj = div_u64((u64)tsc_khz * 1000, HZ); ++ } ++ ++ /* ++ * Enable interrupts so the tick can fire, if a global ++ * clockevent device is available ++ */ ++ local_irq_enable(); ++ ++ while (lapic_cal_loops <= LAPIC_CAL_LOOPS) { ++ /* Wait for a tick to elapse */ ++ while (1) { ++ if (tsc_khz) { ++ u64 tsc_now = rdtsc(); ++ if ((tsc_now - tsc_start) >= tsc_perj) { ++ tsc_start += tsc_perj; ++ break; ++ } ++ } else { ++ unsigned long jif_now = READ_ONCE(jiffies); ++ ++ if (time_after(jif_now, jif_start)) { ++ jif_start = jif_now; ++ break; ++ } ++ } ++ cpu_relax(); ++ } ++ ++ /* Invoke the calibration routine */ ++ local_irq_disable(); ++ lapic_cal_handler(NULL); ++ local_irq_enable(); ++ } ++ ++ local_irq_disable(); ++ ++ /* Build delta t1-t2 as apic timer counts down */ ++ delta = lapic_cal_t1 - lapic_cal_t2; ++ apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta); ++ ++ deltatsc = (long)(lapic_cal_tsc2 - lapic_cal_tsc1); ++ ++ /* we trust the PM based calibration if possible */ ++ pm_referenced = !calibrate_by_pmtimer(lapic_cal_pm2 - lapic_cal_pm1, ++ &delta, &deltatsc); ++ ++ lapic_timer_frequency = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS; ++ lapic_init_clockevent(); ++ ++ apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta); ++ apic_printk(APIC_VERBOSE, "..... mult: %u\n", lapic_clockevent.mult); ++ apic_printk(APIC_VERBOSE, "..... calibration result: %u\n", ++ lapic_timer_frequency); ++ ++ if (boot_cpu_has(X86_FEATURE_TSC)) { ++ apic_printk(APIC_VERBOSE, "..... CPU clock speed is " ++ "%ld.%04ld MHz.\n", ++ (deltatsc / LAPIC_CAL_LOOPS) / (1000000 / HZ), ++ (deltatsc / LAPIC_CAL_LOOPS) % (1000000 / HZ)); ++ } ++ ++ apic_printk(APIC_VERBOSE, "..... host bus clock speed is " ++ "%u.%04u MHz.\n", ++ lapic_timer_frequency / (1000000 / HZ), ++ lapic_timer_frequency % (1000000 / HZ)); ++ ++ /* ++ * Do a sanity check on the APIC calibration result ++ */ ++ if (lapic_timer_frequency < (1000000 / HZ)) { ++ local_irq_enable(); ++ pr_warning("APIC frequency too slow, disabling apic timer\n"); ++ return -1; ++ } ++ ++ levt->features &= ~CLOCK_EVT_FEAT_DUMMY; ++ ++ /* ++ * PM timer calibration failed or not turned on so lets try APIC ++ * timer based calibration, if a global clockevent device is ++ * available. ++ */ ++ if (!pm_referenced && global_clock_event) { ++ apic_printk(APIC_VERBOSE, "... verify APIC timer\n"); ++ ++ /* ++ * Setup the apic timer manually ++ */ ++ levt->event_handler = lapic_cal_handler; ++ lapic_timer_set_periodic(levt); ++ lapic_cal_loops = -1; ++ ++ /* Let the interrupts run */ ++ local_irq_enable(); ++ ++ while (lapic_cal_loops <= LAPIC_CAL_LOOPS) ++ cpu_relax(); ++ ++ /* Stop the lapic timer */ ++ local_irq_disable(); ++ lapic_timer_shutdown(levt); ++ ++ /* Jiffies delta */ ++ deltaj = lapic_cal_j2 - lapic_cal_j1; ++ apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj); ++ ++ /* Check, if the jiffies result is consistent */ ++ if (deltaj >= LAPIC_CAL_LOOPS-2 && deltaj <= LAPIC_CAL_LOOPS+2) ++ apic_printk(APIC_VERBOSE, "... jiffies result ok\n"); ++ else ++ levt->features |= CLOCK_EVT_FEAT_DUMMY; ++ } ++ local_irq_enable(); ++ ++ if (levt->features & CLOCK_EVT_FEAT_DUMMY) { ++ pr_warning("APIC timer disabled due to verification failure\n"); ++ return -1; ++ } ++ ++ return 0; ++} ++ ++/* ++ * Setup the boot APIC ++ * ++ * Calibrate and verify the result. ++ */ ++void __init setup_boot_APIC_clock(void) ++{ ++ /* ++ * The local apic timer can be disabled via the kernel ++ * commandline or from the CPU detection code. Register the lapic ++ * timer as a dummy clock event source on SMP systems, so the ++ * broadcast mechanism is used. On UP systems simply ignore it. ++ */ ++ if (disable_apic_timer) { ++ pr_info("Disabling APIC timer\n"); ++ /* No broadcast on UP ! */ ++ if (num_possible_cpus() > 1) { ++ lapic_clockevent.mult = 1; ++ setup_APIC_timer(); ++ } ++ return; ++ } ++ ++ if (calibrate_APIC_clock()) { ++ /* No broadcast on UP ! */ ++ if (num_possible_cpus() > 1) ++ setup_APIC_timer(); ++ return; ++ } ++ ++ /* ++ * If nmi_watchdog is set to IO_APIC, we need the ++ * PIT/HPET going. Otherwise register lapic as a dummy ++ * device. ++ */ ++ lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; ++ ++ /* Setup the lapic or request the broadcast */ ++ setup_APIC_timer(); ++ amd_e400_c1e_apic_setup(); ++} ++ ++void setup_secondary_APIC_clock(void) ++{ ++ setup_APIC_timer(); ++ amd_e400_c1e_apic_setup(); ++} ++ ++/* ++ * The guts of the apic timer interrupt ++ */ ++static void local_apic_timer_interrupt(void) ++{ ++ struct clock_event_device *evt = this_cpu_ptr(&lapic_events); ++ ++ /* ++ * Normally we should not be here till LAPIC has been initialized but ++ * in some cases like kdump, its possible that there is a pending LAPIC ++ * timer interrupt from previous kernel's context and is delivered in ++ * new kernel the moment interrupts are enabled. ++ * ++ * Interrupts are enabled early and LAPIC is setup much later, hence ++ * its possible that when we get here evt->event_handler is NULL. ++ * Check for event_handler being NULL and discard the interrupt as ++ * spurious. ++ */ ++ if (!evt->event_handler) { ++ pr_warning("Spurious LAPIC timer interrupt on cpu %d\n", ++ smp_processor_id()); ++ /* Switch it off */ ++ lapic_timer_shutdown(evt); ++ return; ++ } ++ ++ /* ++ * the NMI deadlock-detector uses this. ++ */ ++ inc_irq_stat(apic_timer_irqs); ++ ++ evt->event_handler(evt); ++} ++ ++/* ++ * Local APIC timer interrupt. This is the most natural way for doing ++ * local interrupts, but local timer interrupts can be emulated by ++ * broadcast interrupts too. [in case the hw doesn't support APIC timers] ++ * ++ * [ if a single-CPU system runs an SMP kernel then we call the local ++ * interrupt as well. Thus we cannot inline the local irq ... ] ++ */ ++__visible void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs) ++{ ++ struct pt_regs *old_regs = set_irq_regs(regs); ++ ++ /* ++ * NOTE! We'd better ACK the irq immediately, ++ * because timer handling can be slow. ++ * ++ * update_process_times() expects us to have done irq_enter(). ++ * Besides, if we don't timer interrupts ignore the global ++ * interrupt lock, which is the WrongThing (tm) to do. ++ */ ++ entering_ack_irq(); ++ trace_local_timer_entry(LOCAL_TIMER_VECTOR); ++ local_apic_timer_interrupt(); ++ trace_local_timer_exit(LOCAL_TIMER_VECTOR); ++ exiting_irq(); ++ ++ set_irq_regs(old_regs); ++} ++ ++int setup_profiling_timer(unsigned int multiplier) ++{ ++ return -EINVAL; ++} ++ ++/* ++ * Local APIC start and shutdown ++ */ ++ ++/** ++ * clear_local_APIC - shutdown the local APIC ++ * ++ * This is called, when a CPU is disabled and before rebooting, so the state of ++ * the local APIC has no dangling leftovers. Also used to cleanout any BIOS ++ * leftovers during boot. ++ */ ++void clear_local_APIC(void) ++{ ++ int maxlvt; ++ u32 v; ++ ++ /* APIC hasn't been mapped yet */ ++ if (!x2apic_mode && !apic_phys) ++ return; ++ ++ maxlvt = lapic_get_maxlvt(); ++ /* ++ * Masking an LVT entry can trigger a local APIC error ++ * if the vector is zero. Mask LVTERR first to prevent this. ++ */ ++ if (maxlvt >= 3) { ++ v = ERROR_APIC_VECTOR; /* any non-zero vector will do */ ++ apic_write(APIC_LVTERR, v | APIC_LVT_MASKED); ++ } ++ /* ++ * Careful: we have to set masks only first to deassert ++ * any level-triggered sources. ++ */ ++ v = apic_read(APIC_LVTT); ++ apic_write(APIC_LVTT, v | APIC_LVT_MASKED); ++ v = apic_read(APIC_LVT0); ++ apic_write(APIC_LVT0, v | APIC_LVT_MASKED); ++ v = apic_read(APIC_LVT1); ++ apic_write(APIC_LVT1, v | APIC_LVT_MASKED); ++ if (maxlvt >= 4) { ++ v = apic_read(APIC_LVTPC); ++ apic_write(APIC_LVTPC, v | APIC_LVT_MASKED); ++ } ++ ++ /* lets not touch this if we didn't frob it */ ++#ifdef CONFIG_X86_THERMAL_VECTOR ++ if (maxlvt >= 5) { ++ v = apic_read(APIC_LVTTHMR); ++ apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED); ++ } ++#endif ++#ifdef CONFIG_X86_MCE_INTEL ++ if (maxlvt >= 6) { ++ v = apic_read(APIC_LVTCMCI); ++ if (!(v & APIC_LVT_MASKED)) ++ apic_write(APIC_LVTCMCI, v | APIC_LVT_MASKED); ++ } ++#endif ++ ++ /* ++ * Clean APIC state for other OSs: ++ */ ++ apic_write(APIC_LVTT, APIC_LVT_MASKED); ++ apic_write(APIC_LVT0, APIC_LVT_MASKED); ++ apic_write(APIC_LVT1, APIC_LVT_MASKED); ++ if (maxlvt >= 3) ++ apic_write(APIC_LVTERR, APIC_LVT_MASKED); ++ if (maxlvt >= 4) ++ apic_write(APIC_LVTPC, APIC_LVT_MASKED); ++ ++ /* Integrated APIC (!82489DX) ? */ ++ if (lapic_is_integrated()) { ++ if (maxlvt > 3) ++ /* Clear ESR due to Pentium errata 3AP and 11AP */ ++ apic_write(APIC_ESR, 0); ++ apic_read(APIC_ESR); ++ } ++} ++ ++/** ++ * disable_local_APIC - clear and disable the local APIC ++ */ ++void disable_local_APIC(void) ++{ ++ unsigned int value; ++ ++ /* APIC hasn't been mapped yet */ ++ if (!x2apic_mode && !apic_phys) ++ return; ++ ++ clear_local_APIC(); ++ ++ /* ++ * Disable APIC (implies clearing of registers ++ * for 82489DX!). ++ */ ++ value = apic_read(APIC_SPIV); ++ value &= ~APIC_SPIV_APIC_ENABLED; ++ apic_write(APIC_SPIV, value); ++ ++#ifdef CONFIG_X86_32 ++ /* ++ * When LAPIC was disabled by the BIOS and enabled by the kernel, ++ * restore the disabled state. ++ */ ++ if (enabled_via_apicbase) { ++ unsigned int l, h; ++ ++ rdmsr(MSR_IA32_APICBASE, l, h); ++ l &= ~MSR_IA32_APICBASE_ENABLE; ++ wrmsr(MSR_IA32_APICBASE, l, h); ++ } ++#endif ++} ++ ++/* ++ * If Linux enabled the LAPIC against the BIOS default disable it down before ++ * re-entering the BIOS on shutdown. Otherwise the BIOS may get confused and ++ * not power-off. Additionally clear all LVT entries before disable_local_APIC ++ * for the case where Linux didn't enable the LAPIC. ++ */ ++void lapic_shutdown(void) ++{ ++ unsigned long flags; ++ ++ if (!boot_cpu_has(X86_FEATURE_APIC) && !apic_from_smp_config()) ++ return; ++ ++ local_irq_save(flags); ++ ++#ifdef CONFIG_X86_32 ++ if (!enabled_via_apicbase) ++ clear_local_APIC(); ++ else ++#endif ++ disable_local_APIC(); ++ ++ ++ local_irq_restore(flags); ++} ++ ++/** ++ * sync_Arb_IDs - synchronize APIC bus arbitration IDs ++ */ ++void __init sync_Arb_IDs(void) ++{ ++ /* ++ * Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 And not ++ * needed on AMD. ++ */ ++ if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD) ++ return; ++ ++ /* ++ * Wait for idle. ++ */ ++ apic_wait_icr_idle(); ++ ++ apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n"); ++ apic_write(APIC_ICR, APIC_DEST_ALLINC | ++ APIC_INT_LEVELTRIG | APIC_DM_INIT); ++} ++ ++enum apic_intr_mode_id apic_intr_mode; ++ ++static int __init apic_intr_mode_select(void) ++{ ++ /* Check kernel option */ ++ if (disable_apic) { ++ pr_info("APIC disabled via kernel command line\n"); ++ return APIC_PIC; ++ } ++ ++ /* Check BIOS */ ++#ifdef CONFIG_X86_64 ++ /* On 64-bit, the APIC must be integrated, Check local APIC only */ ++ if (!boot_cpu_has(X86_FEATURE_APIC)) { ++ disable_apic = 1; ++ pr_info("APIC disabled by BIOS\n"); ++ return APIC_PIC; ++ } ++#else ++ /* On 32-bit, the APIC may be integrated APIC or 82489DX */ ++ ++ /* Neither 82489DX nor integrated APIC ? */ ++ if (!boot_cpu_has(X86_FEATURE_APIC) && !smp_found_config) { ++ disable_apic = 1; ++ return APIC_PIC; ++ } ++ ++ /* If the BIOS pretends there is an integrated APIC ? */ ++ if (!boot_cpu_has(X86_FEATURE_APIC) && ++ APIC_INTEGRATED(boot_cpu_apic_version)) { ++ disable_apic = 1; ++ pr_err(FW_BUG "Local APIC %d not detected, force emulation\n", ++ boot_cpu_physical_apicid); ++ return APIC_PIC; ++ } ++#endif ++ ++ /* Check MP table or ACPI MADT configuration */ ++ if (!smp_found_config) { ++ disable_ioapic_support(); ++ if (!acpi_lapic) { ++ pr_info("APIC: ACPI MADT or MP tables are not detected\n"); ++ return APIC_VIRTUAL_WIRE_NO_CONFIG; ++ } ++ return APIC_VIRTUAL_WIRE; ++ } ++ ++#ifdef CONFIG_SMP ++ /* If SMP should be disabled, then really disable it! */ ++ if (!setup_max_cpus) { ++ pr_info("APIC: SMP mode deactivated\n"); ++ return APIC_SYMMETRIC_IO_NO_ROUTING; ++ } ++ ++ if (read_apic_id() != boot_cpu_physical_apicid) { ++ panic("Boot APIC ID in local APIC unexpected (%d vs %d)", ++ read_apic_id(), boot_cpu_physical_apicid); ++ /* Or can we switch back to PIC here? */ ++ } ++#endif ++ ++ return APIC_SYMMETRIC_IO; ++} ++ ++/* ++ * An initial setup of the virtual wire mode. ++ */ ++void __init init_bsp_APIC(void) ++{ ++ unsigned int value; ++ ++ /* ++ * Don't do the setup now if we have a SMP BIOS as the ++ * through-I/O-APIC virtual wire mode might be active. ++ */ ++ if (smp_found_config || !boot_cpu_has(X86_FEATURE_APIC)) ++ return; ++ ++ /* ++ * Do not trust the local APIC being empty at bootup. ++ */ ++ clear_local_APIC(); ++ ++ /* ++ * Enable APIC. ++ */ ++ value = apic_read(APIC_SPIV); ++ value &= ~APIC_VECTOR_MASK; ++ value |= APIC_SPIV_APIC_ENABLED; ++ ++#ifdef CONFIG_X86_32 ++ /* This bit is reserved on P4/Xeon and should be cleared */ ++ if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && ++ (boot_cpu_data.x86 == 15)) ++ value &= ~APIC_SPIV_FOCUS_DISABLED; ++ else ++#endif ++ value |= APIC_SPIV_FOCUS_DISABLED; ++ value |= SPURIOUS_APIC_VECTOR; ++ apic_write(APIC_SPIV, value); ++ ++ /* ++ * Set up the virtual wire mode. ++ */ ++ apic_write(APIC_LVT0, APIC_DM_EXTINT); ++ value = APIC_DM_NMI; ++ if (!lapic_is_integrated()) /* 82489DX */ ++ value |= APIC_LVT_LEVEL_TRIGGER; ++ if (apic_extnmi == APIC_EXTNMI_NONE) ++ value |= APIC_LVT_MASKED; ++ apic_write(APIC_LVT1, value); ++} ++ ++/* Init the interrupt delivery mode for the BSP */ ++void __init apic_intr_mode_init(void) ++{ ++ bool upmode = IS_ENABLED(CONFIG_UP_LATE_INIT); ++ ++ apic_intr_mode = apic_intr_mode_select(); ++ ++ switch (apic_intr_mode) { ++ case APIC_PIC: ++ pr_info("APIC: Keep in PIC mode(8259)\n"); ++ return; ++ case APIC_VIRTUAL_WIRE: ++ pr_info("APIC: Switch to virtual wire mode setup\n"); ++ default_setup_apic_routing(); ++ break; ++ case APIC_VIRTUAL_WIRE_NO_CONFIG: ++ pr_info("APIC: Switch to virtual wire mode setup with no configuration\n"); ++ upmode = true; ++ default_setup_apic_routing(); ++ break; ++ case APIC_SYMMETRIC_IO: ++ pr_info("APIC: Switch to symmetric I/O mode setup\n"); ++ default_setup_apic_routing(); ++ break; ++ case APIC_SYMMETRIC_IO_NO_ROUTING: ++ pr_info("APIC: Switch to symmetric I/O mode setup in no SMP routine\n"); ++ break; ++ } ++ ++ apic_bsp_setup(upmode); ++} ++ ++static void lapic_setup_esr(void) ++{ ++ unsigned int oldvalue, value, maxlvt; ++ ++ if (!lapic_is_integrated()) { ++ pr_info("No ESR for 82489DX.\n"); ++ return; ++ } ++ ++ if (apic->disable_esr) { ++ /* ++ * Something untraceable is creating bad interrupts on ++ * secondary quads ... for the moment, just leave the ++ * ESR disabled - we can't do anything useful with the ++ * errors anyway - mbligh ++ */ ++ pr_info("Leaving ESR disabled.\n"); ++ return; ++ } ++ ++ maxlvt = lapic_get_maxlvt(); ++ if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ ++ apic_write(APIC_ESR, 0); ++ oldvalue = apic_read(APIC_ESR); ++ ++ /* enables sending errors */ ++ value = ERROR_APIC_VECTOR; ++ apic_write(APIC_LVTERR, value); ++ ++ /* ++ * spec says clear errors after enabling vector. ++ */ ++ if (maxlvt > 3) ++ apic_write(APIC_ESR, 0); ++ value = apic_read(APIC_ESR); ++ if (value != oldvalue) ++ apic_printk(APIC_VERBOSE, "ESR value before enabling " ++ "vector: 0x%08x after: 0x%08x\n", ++ oldvalue, value); ++} ++ ++#define APIC_IR_REGS APIC_ISR_NR ++#define APIC_IR_BITS (APIC_IR_REGS * 32) ++#define APIC_IR_MAPSIZE (APIC_IR_BITS / BITS_PER_LONG) ++ ++union apic_ir { ++ unsigned long map[APIC_IR_MAPSIZE]; ++ u32 regs[APIC_IR_REGS]; ++}; ++ ++static bool apic_check_and_ack(union apic_ir *irr, union apic_ir *isr) ++{ ++ int i, bit; ++ ++ /* Read the IRRs */ ++ for (i = 0; i < APIC_IR_REGS; i++) ++ irr->regs[i] = apic_read(APIC_IRR + i * 0x10); ++ ++ /* Read the ISRs */ ++ for (i = 0; i < APIC_IR_REGS; i++) ++ isr->regs[i] = apic_read(APIC_ISR + i * 0x10); ++ ++ /* ++ * If the ISR map is not empty. ACK the APIC and run another round ++ * to verify whether a pending IRR has been unblocked and turned ++ * into a ISR. ++ */ ++ if (!bitmap_empty(isr->map, APIC_IR_BITS)) { ++ /* ++ * There can be multiple ISR bits set when a high priority ++ * interrupt preempted a lower priority one. Issue an ACK ++ * per set bit. ++ */ ++ for_each_set_bit(bit, isr->map, APIC_IR_BITS) ++ ack_APIC_irq(); ++ return true; ++ } ++ ++ return !bitmap_empty(irr->map, APIC_IR_BITS); ++} ++ ++/* ++ * After a crash, we no longer service the interrupts and a pending ++ * interrupt from previous kernel might still have ISR bit set. ++ * ++ * Most probably by now the CPU has serviced that pending interrupt and it ++ * might not have done the ack_APIC_irq() because it thought, interrupt ++ * came from i8259 as ExtInt. LAPIC did not get EOI so it does not clear ++ * the ISR bit and cpu thinks it has already serivced the interrupt. Hence ++ * a vector might get locked. It was noticed for timer irq (vector ++ * 0x31). Issue an extra EOI to clear ISR. ++ * ++ * If there are pending IRR bits they turn into ISR bits after a higher ++ * priority ISR bit has been acked. ++ */ ++static void apic_pending_intr_clear(void) ++{ ++ union apic_ir irr, isr; ++ unsigned int i; ++ ++ /* 512 loops are way oversized and give the APIC a chance to obey. */ ++ for (i = 0; i < 512; i++) { ++ if (!apic_check_and_ack(&irr, &isr)) ++ return; ++ } ++ /* Dump the IRR/ISR content if that failed */ ++ pr_warn("APIC: Stale IRR: %256pb ISR: %256pb\n", irr.map, isr.map); ++} ++ ++/** ++ * setup_local_APIC - setup the local APIC ++ * ++ * Used to setup local APIC while initializing BSP or bringing up APs. ++ * Always called with preemption disabled. ++ */ ++static void setup_local_APIC(void) ++{ ++ int cpu = smp_processor_id(); ++ unsigned int value; ++ ++ ++ if (disable_apic) { ++ disable_ioapic_support(); ++ return; ++ } ++ ++ /* ++ * If this comes from kexec/kcrash the APIC might be enabled in ++ * SPIV. Soft disable it before doing further initialization. ++ */ ++ value = apic_read(APIC_SPIV); ++ value &= ~APIC_SPIV_APIC_ENABLED; ++ apic_write(APIC_SPIV, value); ++ ++#ifdef CONFIG_X86_32 ++ /* Pound the ESR really hard over the head with a big hammer - mbligh */ ++ if (lapic_is_integrated() && apic->disable_esr) { ++ apic_write(APIC_ESR, 0); ++ apic_write(APIC_ESR, 0); ++ apic_write(APIC_ESR, 0); ++ apic_write(APIC_ESR, 0); ++ } ++#endif ++ perf_events_lapic_init(); ++ ++ /* ++ * Double-check whether this APIC is really registered. ++ * This is meaningless in clustered apic mode, so we skip it. ++ */ ++ BUG_ON(!apic->apic_id_registered()); ++ ++ /* ++ * Intel recommends to set DFR, LDR and TPR before enabling ++ * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel ++ * document number 292116). So here it goes... ++ */ ++ apic->init_apic_ldr(); ++ ++#ifdef CONFIG_X86_32 ++ if (apic->dest_logical) { ++ int logical_apicid, ldr_apicid; ++ ++ /* ++ * APIC LDR is initialized. If logical_apicid mapping was ++ * initialized during get_smp_config(), make sure it matches ++ * the actual value. ++ */ ++ logical_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu); ++ ldr_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR)); ++ if (logical_apicid != BAD_APICID) ++ WARN_ON(logical_apicid != ldr_apicid); ++ /* Always use the value from LDR. */ ++ early_per_cpu(x86_cpu_to_logical_apicid, cpu) = ldr_apicid; ++ } ++#endif ++ ++ /* ++ * Set Task Priority to 'accept all'. We never change this ++ * later on. ++ */ ++ value = apic_read(APIC_TASKPRI); ++ value &= ~APIC_TPRI_MASK; ++ apic_write(APIC_TASKPRI, value); ++ ++ /* Clear eventually stale ISR/IRR bits */ ++ apic_pending_intr_clear(); ++ ++ /* ++ * Now that we are all set up, enable the APIC ++ */ ++ value = apic_read(APIC_SPIV); ++ value &= ~APIC_VECTOR_MASK; ++ /* ++ * Enable APIC ++ */ ++ value |= APIC_SPIV_APIC_ENABLED; ++ ++#ifdef CONFIG_X86_32 ++ /* ++ * Some unknown Intel IO/APIC (or APIC) errata is biting us with ++ * certain networking cards. If high frequency interrupts are ++ * happening on a particular IOAPIC pin, plus the IOAPIC routing ++ * entry is masked/unmasked at a high rate as well then sooner or ++ * later IOAPIC line gets 'stuck', no more interrupts are received ++ * from the device. If focus CPU is disabled then the hang goes ++ * away, oh well :-( ++ * ++ * [ This bug can be reproduced easily with a level-triggered ++ * PCI Ne2000 networking cards and PII/PIII processors, dual ++ * BX chipset. ] ++ */ ++ /* ++ * Actually disabling the focus CPU check just makes the hang less ++ * frequent as it makes the interrupt distributon model be more ++ * like LRU than MRU (the short-term load is more even across CPUs). ++ */ ++ ++ /* ++ * - enable focus processor (bit==0) ++ * - 64bit mode always use processor focus ++ * so no need to set it ++ */ ++ value &= ~APIC_SPIV_FOCUS_DISABLED; ++#endif ++ ++ /* ++ * Set spurious IRQ vector ++ */ ++ value |= SPURIOUS_APIC_VECTOR; ++ apic_write(APIC_SPIV, value); ++ ++ /* ++ * Set up LVT0, LVT1: ++ * ++ * set up through-local-APIC on the boot CPU's LINT0. This is not ++ * strictly necessary in pure symmetric-IO mode, but sometimes ++ * we delegate interrupts to the 8259A. ++ */ ++ /* ++ * TODO: set up through-local-APIC from through-I/O-APIC? --macro ++ */ ++ value = apic_read(APIC_LVT0) & APIC_LVT_MASKED; ++ if (!cpu && (pic_mode || !value || skip_ioapic_setup)) { ++ value = APIC_DM_EXTINT; ++ apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n", cpu); ++ } else { ++ value = APIC_DM_EXTINT | APIC_LVT_MASKED; ++ apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n", cpu); ++ } ++ apic_write(APIC_LVT0, value); ++ ++ /* ++ * Only the BSP sees the LINT1 NMI signal by default. This can be ++ * modified by apic_extnmi= boot option. ++ */ ++ if ((!cpu && apic_extnmi != APIC_EXTNMI_NONE) || ++ apic_extnmi == APIC_EXTNMI_ALL) ++ value = APIC_DM_NMI; ++ else ++ value = APIC_DM_NMI | APIC_LVT_MASKED; ++ ++ /* Is 82489DX ? */ ++ if (!lapic_is_integrated()) ++ value |= APIC_LVT_LEVEL_TRIGGER; ++ apic_write(APIC_LVT1, value); ++ ++#ifdef CONFIG_X86_MCE_INTEL ++ /* Recheck CMCI information after local APIC is up on CPU #0 */ ++ if (!cpu) ++ cmci_recheck(); ++#endif ++} ++ ++static void end_local_APIC_setup(void) ++{ ++ lapic_setup_esr(); ++ ++#ifdef CONFIG_X86_32 ++ { ++ unsigned int value; ++ /* Disable the local apic timer */ ++ value = apic_read(APIC_LVTT); ++ value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); ++ apic_write(APIC_LVTT, value); ++ } ++#endif ++ ++ apic_pm_activate(); ++} ++ ++/* ++ * APIC setup function for application processors. Called from smpboot.c ++ */ ++void apic_ap_setup(void) ++{ ++ setup_local_APIC(); ++ end_local_APIC_setup(); ++} ++ ++#ifdef CONFIG_X86_X2APIC ++int x2apic_mode; ++ ++enum { ++ X2APIC_OFF, ++ X2APIC_ON, ++ X2APIC_DISABLED, ++}; ++static int x2apic_state; ++ ++static void __x2apic_disable(void) ++{ ++ u64 msr; ++ ++ if (!boot_cpu_has(X86_FEATURE_APIC)) ++ return; ++ ++ rdmsrl(MSR_IA32_APICBASE, msr); ++ if (!(msr & X2APIC_ENABLE)) ++ return; ++ /* Disable xapic and x2apic first and then reenable xapic mode */ ++ wrmsrl(MSR_IA32_APICBASE, msr & ~(X2APIC_ENABLE | XAPIC_ENABLE)); ++ wrmsrl(MSR_IA32_APICBASE, msr & ~X2APIC_ENABLE); ++ printk_once(KERN_INFO "x2apic disabled\n"); ++} ++ ++static void __x2apic_enable(void) ++{ ++ u64 msr; ++ ++ rdmsrl(MSR_IA32_APICBASE, msr); ++ if (msr & X2APIC_ENABLE) ++ return; ++ wrmsrl(MSR_IA32_APICBASE, msr | X2APIC_ENABLE); ++ printk_once(KERN_INFO "x2apic enabled\n"); ++} ++ ++static int __init setup_nox2apic(char *str) ++{ ++ if (x2apic_enabled()) { ++ int apicid = native_apic_msr_read(APIC_ID); ++ ++ if (apicid >= 255) { ++ pr_warning("Apicid: %08x, cannot enforce nox2apic\n", ++ apicid); ++ return 0; ++ } ++ pr_warning("x2apic already enabled.\n"); ++ __x2apic_disable(); ++ } ++ setup_clear_cpu_cap(X86_FEATURE_X2APIC); ++ x2apic_state = X2APIC_DISABLED; ++ x2apic_mode = 0; ++ return 0; ++} ++early_param("nox2apic", setup_nox2apic); ++ ++/* Called from cpu_init() to enable x2apic on (secondary) cpus */ ++void x2apic_setup(void) ++{ ++ /* ++ * If x2apic is not in ON state, disable it if already enabled ++ * from BIOS. ++ */ ++ if (x2apic_state != X2APIC_ON) { ++ __x2apic_disable(); ++ return; ++ } ++ __x2apic_enable(); ++} ++ ++static __init void x2apic_disable(void) ++{ ++ u32 x2apic_id, state = x2apic_state; ++ ++ x2apic_mode = 0; ++ x2apic_state = X2APIC_DISABLED; ++ ++ if (state != X2APIC_ON) ++ return; ++ ++ x2apic_id = read_apic_id(); ++ if (x2apic_id >= 255) ++ panic("Cannot disable x2apic, id: %08x\n", x2apic_id); ++ ++ __x2apic_disable(); ++ register_lapic_address(mp_lapic_addr); ++} ++ ++static __init void x2apic_enable(void) ++{ ++ if (x2apic_state != X2APIC_OFF) ++ return; ++ ++ x2apic_mode = 1; ++ x2apic_state = X2APIC_ON; ++ __x2apic_enable(); ++} ++ ++static __init void try_to_enable_x2apic(int remap_mode) ++{ ++ if (x2apic_state == X2APIC_DISABLED) ++ return; ++ ++ if (remap_mode != IRQ_REMAP_X2APIC_MODE) { ++ /* IR is required if there is APIC ID > 255 even when running ++ * under KVM ++ */ ++ if (max_physical_apicid > 255 || ++ !x86_init.hyper.x2apic_available()) { ++ pr_info("x2apic: IRQ remapping doesn't support X2APIC mode\n"); ++ x2apic_disable(); ++ return; ++ } ++ ++ /* ++ * without IR all CPUs can be addressed by IOAPIC/MSI ++ * only in physical mode ++ */ ++ x2apic_phys = 1; ++ } ++ x2apic_enable(); ++} ++ ++void __init check_x2apic(void) ++{ ++ if (x2apic_enabled()) { ++ pr_info("x2apic: enabled by BIOS, switching to x2apic ops\n"); ++ x2apic_mode = 1; ++ x2apic_state = X2APIC_ON; ++ } else if (!boot_cpu_has(X86_FEATURE_X2APIC)) { ++ x2apic_state = X2APIC_DISABLED; ++ } ++} ++#else /* CONFIG_X86_X2APIC */ ++static int __init validate_x2apic(void) ++{ ++ if (!apic_is_x2apic_enabled()) ++ return 0; ++ /* ++ * Checkme: Can we simply turn off x2apic here instead of panic? ++ */ ++ panic("BIOS has enabled x2apic but kernel doesn't support x2apic, please disable x2apic in BIOS.\n"); ++} ++early_initcall(validate_x2apic); ++ ++static inline void try_to_enable_x2apic(int remap_mode) { } ++static inline void __x2apic_enable(void) { } ++#endif /* !CONFIG_X86_X2APIC */ ++ ++void __init enable_IR_x2apic(void) ++{ ++ unsigned long flags; ++ int ret, ir_stat; ++ ++ if (skip_ioapic_setup) { ++ pr_info("Not enabling interrupt remapping due to skipped IO-APIC setup\n"); ++ return; ++ } ++ ++ ir_stat = irq_remapping_prepare(); ++ if (ir_stat < 0 && !x2apic_supported()) ++ return; ++ ++ ret = save_ioapic_entries(); ++ if (ret) { ++ pr_info("Saving IO-APIC state failed: %d\n", ret); ++ return; ++ } ++ ++ local_irq_save(flags); ++ legacy_pic->mask_all(); ++ mask_ioapic_entries(); ++ ++ /* If irq_remapping_prepare() succeeded, try to enable it */ ++ if (ir_stat >= 0) ++ ir_stat = irq_remapping_enable(); ++ /* ir_stat contains the remap mode or an error code */ ++ try_to_enable_x2apic(ir_stat); ++ ++ if (ir_stat < 0) ++ restore_ioapic_entries(); ++ legacy_pic->restore_mask(); ++ local_irq_restore(flags); ++} ++ ++#ifdef CONFIG_X86_64 ++/* ++ * Detect and enable local APICs on non-SMP boards. ++ * Original code written by Keir Fraser. ++ * On AMD64 we trust the BIOS - if it says no APIC it is likely ++ * not correctly set up (usually the APIC timer won't work etc.) ++ */ ++static int __init detect_init_APIC(void) ++{ ++ if (!boot_cpu_has(X86_FEATURE_APIC)) { ++ pr_info("No local APIC present\n"); ++ return -1; ++ } ++ ++ mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; ++ return 0; ++} ++#else ++ ++static int __init apic_verify(void) ++{ ++ u32 features, h, l; ++ ++ /* ++ * The APIC feature bit should now be enabled ++ * in `cpuid' ++ */ ++ features = cpuid_edx(1); ++ if (!(features & (1 << X86_FEATURE_APIC))) { ++ pr_warning("Could not enable APIC!\n"); ++ return -1; ++ } ++ set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); ++ mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; ++ ++ /* The BIOS may have set up the APIC at some other address */ ++ if (boot_cpu_data.x86 >= 6) { ++ rdmsr(MSR_IA32_APICBASE, l, h); ++ if (l & MSR_IA32_APICBASE_ENABLE) ++ mp_lapic_addr = l & MSR_IA32_APICBASE_BASE; ++ } ++ ++ pr_info("Found and enabled local APIC!\n"); ++ return 0; ++} ++ ++int __init apic_force_enable(unsigned long addr) ++{ ++ u32 h, l; ++ ++ if (disable_apic) ++ return -1; ++ ++ /* ++ * Some BIOSes disable the local APIC in the APIC_BASE ++ * MSR. This can only be done in software for Intel P6 or later ++ * and AMD K7 (Model > 1) or later. ++ */ ++ if (boot_cpu_data.x86 >= 6) { ++ rdmsr(MSR_IA32_APICBASE, l, h); ++ if (!(l & MSR_IA32_APICBASE_ENABLE)) { ++ pr_info("Local APIC disabled by BIOS -- reenabling.\n"); ++ l &= ~MSR_IA32_APICBASE_BASE; ++ l |= MSR_IA32_APICBASE_ENABLE | addr; ++ wrmsr(MSR_IA32_APICBASE, l, h); ++ enabled_via_apicbase = 1; ++ } ++ } ++ return apic_verify(); ++} ++ ++/* ++ * Detect and initialize APIC ++ */ ++static int __init detect_init_APIC(void) ++{ ++ /* Disabled by kernel option? */ ++ if (disable_apic) ++ return -1; ++ ++ switch (boot_cpu_data.x86_vendor) { ++ case X86_VENDOR_AMD: ++ if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1) || ++ (boot_cpu_data.x86 >= 15)) ++ break; ++ goto no_apic; ++ case X86_VENDOR_HYGON: ++ break; ++ case X86_VENDOR_INTEL: ++ if (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15 || ++ (boot_cpu_data.x86 == 5 && boot_cpu_has(X86_FEATURE_APIC))) ++ break; ++ goto no_apic; ++ default: ++ goto no_apic; ++ } ++ ++ if (!boot_cpu_has(X86_FEATURE_APIC)) { ++ /* ++ * Over-ride BIOS and try to enable the local APIC only if ++ * "lapic" specified. ++ */ ++ if (!force_enable_local_apic) { ++ pr_info("Local APIC disabled by BIOS -- " ++ "you can enable it with \"lapic\"\n"); ++ return -1; ++ } ++ if (apic_force_enable(APIC_DEFAULT_PHYS_BASE)) ++ return -1; ++ } else { ++ if (apic_verify()) ++ return -1; ++ } ++ ++ apic_pm_activate(); ++ ++ return 0; ++ ++no_apic: ++ pr_info("No local APIC present or hardware disabled\n"); ++ return -1; ++} ++#endif ++ ++/** ++ * init_apic_mappings - initialize APIC mappings ++ */ ++void __init init_apic_mappings(void) ++{ ++ unsigned int new_apicid; ++ ++ apic_check_deadline_errata(); ++ ++ if (x2apic_mode) { ++ boot_cpu_physical_apicid = read_apic_id(); ++ return; ++ } ++ ++ /* If no local APIC can be found return early */ ++ if (!smp_found_config && detect_init_APIC()) { ++ /* lets NOP'ify apic operations */ ++ pr_info("APIC: disable apic facility\n"); ++ apic_disable(); ++ } else { ++ apic_phys = mp_lapic_addr; ++ ++ /* ++ * If the system has ACPI MADT tables or MP info, the LAPIC ++ * address is already registered. ++ */ ++ if (!acpi_lapic && !smp_found_config) ++ register_lapic_address(apic_phys); ++ } ++ ++ /* ++ * Fetch the APIC ID of the BSP in case we have a ++ * default configuration (or the MP table is broken). ++ */ ++ new_apicid = read_apic_id(); ++ if (boot_cpu_physical_apicid != new_apicid) { ++ boot_cpu_physical_apicid = new_apicid; ++ /* ++ * yeah -- we lie about apic_version ++ * in case if apic was disabled via boot option ++ * but it's not a problem for SMP compiled kernel ++ * since apic_intr_mode_select is prepared for such ++ * a case and disable smp mode ++ */ ++ boot_cpu_apic_version = GET_APIC_VERSION(apic_read(APIC_LVR)); ++ } ++} ++ ++void __init register_lapic_address(unsigned long address) ++{ ++ mp_lapic_addr = address; ++ ++ if (!x2apic_mode) { ++ set_fixmap_nocache(FIX_APIC_BASE, address); ++ apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n", ++ APIC_BASE, address); ++ } ++ if (boot_cpu_physical_apicid == -1U) { ++ boot_cpu_physical_apicid = read_apic_id(); ++ boot_cpu_apic_version = GET_APIC_VERSION(apic_read(APIC_LVR)); ++ } ++} ++ ++/* ++ * Local APIC interrupts ++ */ ++ ++/* ++ * This interrupt should _never_ happen with our APIC/SMP architecture ++ */ ++__visible void __irq_entry smp_spurious_interrupt(struct pt_regs *regs) ++{ ++ u8 vector = ~regs->orig_ax; ++ u32 v; ++ ++ entering_irq(); ++ trace_spurious_apic_entry(vector); ++ ++ inc_irq_stat(irq_spurious_count); ++ ++ /* ++ * If this is a spurious interrupt then do not acknowledge ++ */ ++ if (vector == SPURIOUS_APIC_VECTOR) { ++ /* See SDM vol 3 */ ++ pr_info("Spurious APIC interrupt (vector 0xFF) on CPU#%d, should never happen.\n", ++ smp_processor_id()); ++ goto out; ++ } ++ ++ /* ++ * If it is a vectored one, verify it's set in the ISR. If set, ++ * acknowledge it. ++ */ ++ v = apic_read(APIC_ISR + ((vector & ~0x1f) >> 1)); ++ if (v & (1 << (vector & 0x1f))) { ++ pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Acked\n", ++ vector, smp_processor_id()); ++ ack_APIC_irq(); ++ } else { ++ pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Not pending!\n", ++ vector, smp_processor_id()); ++ } ++out: ++ trace_spurious_apic_exit(vector); ++ exiting_irq(); ++} ++ ++/* ++ * This interrupt should never happen with our APIC/SMP architecture ++ */ ++__visible void __irq_entry smp_error_interrupt(struct pt_regs *regs) ++{ ++ static const char * const error_interrupt_reason[] = { ++ "Send CS error", /* APIC Error Bit 0 */ ++ "Receive CS error", /* APIC Error Bit 1 */ ++ "Send accept error", /* APIC Error Bit 2 */ ++ "Receive accept error", /* APIC Error Bit 3 */ ++ "Redirectable IPI", /* APIC Error Bit 4 */ ++ "Send illegal vector", /* APIC Error Bit 5 */ ++ "Received illegal vector", /* APIC Error Bit 6 */ ++ "Illegal register address", /* APIC Error Bit 7 */ ++ }; ++ u32 v, i = 0; ++ ++ entering_irq(); ++ trace_error_apic_entry(ERROR_APIC_VECTOR); ++ ++ /* First tickle the hardware, only then report what went on. -- REW */ ++ if (lapic_get_maxlvt() > 3) /* Due to the Pentium erratum 3AP. */ ++ apic_write(APIC_ESR, 0); ++ v = apic_read(APIC_ESR); ++ ack_APIC_irq(); ++ atomic_inc(&irq_err_count); ++ ++ apic_printk(APIC_DEBUG, KERN_DEBUG "APIC error on CPU%d: %02x", ++ smp_processor_id(), v); ++ ++ v &= 0xff; ++ while (v) { ++ if (v & 0x1) ++ apic_printk(APIC_DEBUG, KERN_CONT " : %s", error_interrupt_reason[i]); ++ i++; ++ v >>= 1; ++ } ++ ++ apic_printk(APIC_DEBUG, KERN_CONT "\n"); ++ ++ trace_error_apic_exit(ERROR_APIC_VECTOR); ++ exiting_irq(); ++} ++ ++/** ++ * connect_bsp_APIC - attach the APIC to the interrupt system ++ */ ++static void __init connect_bsp_APIC(void) ++{ ++#ifdef CONFIG_X86_32 ++ if (pic_mode) { ++ /* ++ * Do not trust the local APIC being empty at bootup. ++ */ ++ clear_local_APIC(); ++ /* ++ * PIC mode, enable APIC mode in the IMCR, i.e. connect BSP's ++ * local APIC to INT and NMI lines. ++ */ ++ apic_printk(APIC_VERBOSE, "leaving PIC mode, " ++ "enabling APIC mode.\n"); ++ imcr_pic_to_apic(); ++ } ++#endif ++} ++ ++/** ++ * disconnect_bsp_APIC - detach the APIC from the interrupt system ++ * @virt_wire_setup: indicates, whether virtual wire mode is selected ++ * ++ * Virtual wire mode is necessary to deliver legacy interrupts even when the ++ * APIC is disabled. ++ */ ++void disconnect_bsp_APIC(int virt_wire_setup) ++{ ++ unsigned int value; ++ ++#ifdef CONFIG_X86_32 ++ if (pic_mode) { ++ /* ++ * Put the board back into PIC mode (has an effect only on ++ * certain older boards). Note that APIC interrupts, including ++ * IPIs, won't work beyond this point! The only exception are ++ * INIT IPIs. ++ */ ++ apic_printk(APIC_VERBOSE, "disabling APIC mode, " ++ "entering PIC mode.\n"); ++ imcr_apic_to_pic(); ++ return; ++ } ++#endif ++ ++ /* Go back to Virtual Wire compatibility mode */ ++ ++ /* For the spurious interrupt use vector F, and enable it */ ++ value = apic_read(APIC_SPIV); ++ value &= ~APIC_VECTOR_MASK; ++ value |= APIC_SPIV_APIC_ENABLED; ++ value |= 0xf; ++ apic_write(APIC_SPIV, value); ++ ++ if (!virt_wire_setup) { ++ /* ++ * For LVT0 make it edge triggered, active high, ++ * external and enabled ++ */ ++ value = apic_read(APIC_LVT0); ++ value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | ++ APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | ++ APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); ++ value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; ++ value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT); ++ apic_write(APIC_LVT0, value); ++ } else { ++ /* Disable LVT0 */ ++ apic_write(APIC_LVT0, APIC_LVT_MASKED); ++ } ++ ++ /* ++ * For LVT1 make it edge triggered, active high, ++ * nmi and enabled ++ */ ++ value = apic_read(APIC_LVT1); ++ value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | ++ APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | ++ APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); ++ value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; ++ value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI); ++ apic_write(APIC_LVT1, value); ++} ++ ++/* ++ * The number of allocated logical CPU IDs. Since logical CPU IDs are allocated ++ * contiguously, it equals to current allocated max logical CPU ID plus 1. ++ * All allocated CPU IDs should be in the [0, nr_logical_cpuids) range, ++ * so the maximum of nr_logical_cpuids is nr_cpu_ids. ++ * ++ * NOTE: Reserve 0 for BSP. ++ */ ++static int nr_logical_cpuids = 1; ++ ++/* ++ * Used to store mapping between logical CPU IDs and APIC IDs. ++ */ ++static int cpuid_to_apicid[] = { ++ [0 ... NR_CPUS - 1] = -1, ++}; ++ ++#ifdef CONFIG_SMP ++/** ++ * apic_id_is_primary_thread - Check whether APIC ID belongs to a primary thread ++ * @id: APIC ID to check ++ */ ++bool apic_id_is_primary_thread(unsigned int apicid) ++{ ++ u32 mask; ++ ++ if (smp_num_siblings == 1) ++ return true; ++ /* Isolate the SMT bit(s) in the APICID and check for 0 */ ++ mask = (1U << (fls(smp_num_siblings) - 1)) - 1; ++ return !(apicid & mask); ++} ++#endif ++ ++/* ++ * Should use this API to allocate logical CPU IDs to keep nr_logical_cpuids ++ * and cpuid_to_apicid[] synchronized. ++ */ ++static int allocate_logical_cpuid(int apicid) ++{ ++ int i; ++ ++ /* ++ * cpuid <-> apicid mapping is persistent, so when a cpu is up, ++ * check if the kernel has allocated a cpuid for it. ++ */ ++ for (i = 0; i < nr_logical_cpuids; i++) { ++ if (cpuid_to_apicid[i] == apicid) ++ return i; ++ } ++ ++ /* Allocate a new cpuid. */ ++ if (nr_logical_cpuids >= nr_cpu_ids) { ++ WARN_ONCE(1, "APIC: NR_CPUS/possible_cpus limit of %u reached. " ++ "Processor %d/0x%x and the rest are ignored.\n", ++ nr_cpu_ids, nr_logical_cpuids, apicid); ++ return -EINVAL; ++ } ++ ++ cpuid_to_apicid[nr_logical_cpuids] = apicid; ++ return nr_logical_cpuids++; ++} ++ ++int generic_processor_info(int apicid, int version) ++{ ++ int cpu, max = nr_cpu_ids; ++ bool boot_cpu_detected = physid_isset(boot_cpu_physical_apicid, ++ phys_cpu_present_map); ++ ++ /* ++ * boot_cpu_physical_apicid is designed to have the apicid ++ * returned by read_apic_id(), i.e, the apicid of the ++ * currently booting-up processor. However, on some platforms, ++ * it is temporarily modified by the apicid reported as BSP ++ * through MP table. Concretely: ++ * ++ * - arch/x86/kernel/mpparse.c: MP_processor_info() ++ * - arch/x86/mm/amdtopology.c: amd_numa_init() ++ * ++ * This function is executed with the modified ++ * boot_cpu_physical_apicid. So, disabled_cpu_apicid kernel ++ * parameter doesn't work to disable APs on kdump 2nd kernel. ++ * ++ * Since fixing handling of boot_cpu_physical_apicid requires ++ * another discussion and tests on each platform, we leave it ++ * for now and here we use read_apic_id() directly in this ++ * function, generic_processor_info(). ++ */ ++ if (disabled_cpu_apicid != BAD_APICID && ++ disabled_cpu_apicid != read_apic_id() && ++ disabled_cpu_apicid == apicid) { ++ int thiscpu = num_processors + disabled_cpus; ++ ++ pr_warning("APIC: Disabling requested cpu." ++ " Processor %d/0x%x ignored.\n", ++ thiscpu, apicid); ++ ++ disabled_cpus++; ++ return -ENODEV; ++ } ++ ++ /* ++ * If boot cpu has not been detected yet, then only allow upto ++ * nr_cpu_ids - 1 processors and keep one slot free for boot cpu ++ */ ++ if (!boot_cpu_detected && num_processors >= nr_cpu_ids - 1 && ++ apicid != boot_cpu_physical_apicid) { ++ int thiscpu = max + disabled_cpus - 1; ++ ++ pr_warning( ++ "APIC: NR_CPUS/possible_cpus limit of %i almost" ++ " reached. Keeping one slot for boot cpu." ++ " Processor %d/0x%x ignored.\n", max, thiscpu, apicid); ++ ++ disabled_cpus++; ++ return -ENODEV; ++ } ++ ++ if (num_processors >= nr_cpu_ids) { ++ int thiscpu = max + disabled_cpus; ++ ++ pr_warning("APIC: NR_CPUS/possible_cpus limit of %i " ++ "reached. Processor %d/0x%x ignored.\n", ++ max, thiscpu, apicid); ++ ++ disabled_cpus++; ++ return -EINVAL; ++ } ++ ++ if (apicid == boot_cpu_physical_apicid) { ++ /* ++ * x86_bios_cpu_apicid is required to have processors listed ++ * in same order as logical cpu numbers. Hence the first ++ * entry is BSP, and so on. ++ * boot_cpu_init() already hold bit 0 in cpu_present_mask ++ * for BSP. ++ */ ++ cpu = 0; ++ ++ /* Logical cpuid 0 is reserved for BSP. */ ++ cpuid_to_apicid[0] = apicid; ++ } else { ++ cpu = allocate_logical_cpuid(apicid); ++ if (cpu < 0) { ++ disabled_cpus++; ++ return -EINVAL; ++ } ++ } ++ ++ /* ++ * Validate version ++ */ ++ if (version == 0x0) { ++ pr_warning("BIOS bug: APIC version is 0 for CPU %d/0x%x, fixing up to 0x10\n", ++ cpu, apicid); ++ version = 0x10; ++ } ++ ++ if (version != boot_cpu_apic_version) { ++ pr_warning("BIOS bug: APIC version mismatch, boot CPU: %x, CPU %d: version %x\n", ++ boot_cpu_apic_version, cpu, version); ++ } ++ ++ if (apicid > max_physical_apicid) ++ max_physical_apicid = apicid; ++ ++#if defined(CONFIG_SMP) || defined(CONFIG_X86_64) ++ early_per_cpu(x86_cpu_to_apicid, cpu) = apicid; ++ early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid; ++#endif ++#ifdef CONFIG_X86_32 ++ early_per_cpu(x86_cpu_to_logical_apicid, cpu) = ++ apic->x86_32_early_logical_apicid(cpu); ++#endif ++ set_cpu_possible(cpu, true); ++ physid_set(apicid, phys_cpu_present_map); ++ set_cpu_present(cpu, true); ++ num_processors++; ++ ++ return cpu; ++} ++ ++int hard_smp_processor_id(void) ++{ ++ return read_apic_id(); ++} ++ ++/* ++ * Override the generic EOI implementation with an optimized version. ++ * Only called during early boot when only one CPU is active and with ++ * interrupts disabled, so we know this does not race with actual APIC driver ++ * use. ++ */ ++void __init apic_set_eoi_write(void (*eoi_write)(u32 reg, u32 v)) ++{ ++ struct apic **drv; ++ ++ for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) { ++ /* Should happen once for each apic */ ++ WARN_ON((*drv)->eoi_write == eoi_write); ++ (*drv)->native_eoi_write = (*drv)->eoi_write; ++ (*drv)->eoi_write = eoi_write; ++ } ++} ++ ++static void __init apic_bsp_up_setup(void) ++{ ++#ifdef CONFIG_X86_64 ++ apic_write(APIC_ID, apic->set_apic_id(boot_cpu_physical_apicid)); ++#else ++ /* ++ * Hack: In case of kdump, after a crash, kernel might be booting ++ * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid ++ * might be zero if read from MP tables. Get it from LAPIC. ++ */ ++# ifdef CONFIG_CRASH_DUMP ++ boot_cpu_physical_apicid = read_apic_id(); ++# endif ++#endif ++ physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); ++} ++ ++/** ++ * apic_bsp_setup - Setup function for local apic and io-apic ++ * @upmode: Force UP mode (for APIC_init_uniprocessor) ++ * ++ * Returns: ++ * apic_id of BSP APIC ++ */ ++void __init apic_bsp_setup(bool upmode) ++{ ++ connect_bsp_APIC(); ++ if (upmode) ++ apic_bsp_up_setup(); ++ setup_local_APIC(); ++ ++ enable_IO_APIC(); ++ end_local_APIC_setup(); ++ irq_remap_enable_fault_handling(); ++ setup_IO_APIC(); ++} ++ ++#ifdef CONFIG_UP_LATE_INIT ++void __init up_late_init(void) ++{ ++ if (apic_intr_mode == APIC_PIC) ++ return; ++ ++ /* Setup local timer */ ++ x86_init.timers.setup_percpu_clockev(); ++} ++#endif ++ ++/* ++ * Power management ++ */ ++#ifdef CONFIG_PM ++ ++static struct { ++ /* ++ * 'active' is true if the local APIC was enabled by us and ++ * not the BIOS; this signifies that we are also responsible ++ * for disabling it before entering apm/acpi suspend ++ */ ++ int active; ++ /* r/w apic fields */ ++ unsigned int apic_id; ++ unsigned int apic_taskpri; ++ unsigned int apic_ldr; ++ unsigned int apic_dfr; ++ unsigned int apic_spiv; ++ unsigned int apic_lvtt; ++ unsigned int apic_lvtpc; ++ unsigned int apic_lvt0; ++ unsigned int apic_lvt1; ++ unsigned int apic_lvterr; ++ unsigned int apic_tmict; ++ unsigned int apic_tdcr; ++ unsigned int apic_thmr; ++ unsigned int apic_cmci; ++} apic_pm_state; ++ ++static int lapic_suspend(void) ++{ ++ unsigned long flags; ++ int maxlvt; ++ ++ if (!apic_pm_state.active) ++ return 0; ++ ++ maxlvt = lapic_get_maxlvt(); ++ ++ apic_pm_state.apic_id = apic_read(APIC_ID); ++ apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI); ++ apic_pm_state.apic_ldr = apic_read(APIC_LDR); ++ apic_pm_state.apic_dfr = apic_read(APIC_DFR); ++ apic_pm_state.apic_spiv = apic_read(APIC_SPIV); ++ apic_pm_state.apic_lvtt = apic_read(APIC_LVTT); ++ if (maxlvt >= 4) ++ apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC); ++ apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0); ++ apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1); ++ apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); ++ apic_pm_state.apic_tmict = apic_read(APIC_TMICT); ++ apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); ++#ifdef CONFIG_X86_THERMAL_VECTOR ++ if (maxlvt >= 5) ++ apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); ++#endif ++#ifdef CONFIG_X86_MCE_INTEL ++ if (maxlvt >= 6) ++ apic_pm_state.apic_cmci = apic_read(APIC_LVTCMCI); ++#endif ++ ++ local_irq_save(flags); ++ disable_local_APIC(); ++ ++ irq_remapping_disable(); ++ ++ local_irq_restore(flags); ++ return 0; ++} ++ ++static void lapic_resume(void) ++{ ++ unsigned int l, h; ++ unsigned long flags; ++ int maxlvt; ++ ++ if (!apic_pm_state.active) ++ return; ++ ++ local_irq_save(flags); ++ ++ /* ++ * IO-APIC and PIC have their own resume routines. ++ * We just mask them here to make sure the interrupt ++ * subsystem is completely quiet while we enable x2apic ++ * and interrupt-remapping. ++ */ ++ mask_ioapic_entries(); ++ legacy_pic->mask_all(); ++ ++ if (x2apic_mode) { ++ __x2apic_enable(); ++ } else { ++ /* ++ * Make sure the APICBASE points to the right address ++ * ++ * FIXME! This will be wrong if we ever support suspend on ++ * SMP! We'll need to do this as part of the CPU restore! ++ */ ++ if (boot_cpu_data.x86 >= 6) { ++ rdmsr(MSR_IA32_APICBASE, l, h); ++ l &= ~MSR_IA32_APICBASE_BASE; ++ l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; ++ wrmsr(MSR_IA32_APICBASE, l, h); ++ } ++ } ++ ++ maxlvt = lapic_get_maxlvt(); ++ apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); ++ apic_write(APIC_ID, apic_pm_state.apic_id); ++ apic_write(APIC_DFR, apic_pm_state.apic_dfr); ++ apic_write(APIC_LDR, apic_pm_state.apic_ldr); ++ apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri); ++ apic_write(APIC_SPIV, apic_pm_state.apic_spiv); ++ apic_write(APIC_LVT0, apic_pm_state.apic_lvt0); ++ apic_write(APIC_LVT1, apic_pm_state.apic_lvt1); ++#ifdef CONFIG_X86_THERMAL_VECTOR ++ if (maxlvt >= 5) ++ apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); ++#endif ++#ifdef CONFIG_X86_MCE_INTEL ++ if (maxlvt >= 6) ++ apic_write(APIC_LVTCMCI, apic_pm_state.apic_cmci); ++#endif ++ if (maxlvt >= 4) ++ apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc); ++ apic_write(APIC_LVTT, apic_pm_state.apic_lvtt); ++ apic_write(APIC_TDCR, apic_pm_state.apic_tdcr); ++ apic_write(APIC_TMICT, apic_pm_state.apic_tmict); ++ apic_write(APIC_ESR, 0); ++ apic_read(APIC_ESR); ++ apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr); ++ apic_write(APIC_ESR, 0); ++ apic_read(APIC_ESR); ++ ++ irq_remapping_reenable(x2apic_mode); ++ ++ local_irq_restore(flags); ++} ++ ++/* ++ * This device has no shutdown method - fully functioning local APICs ++ * are needed on every CPU up until machine_halt/restart/poweroff. ++ */ ++ ++static struct syscore_ops lapic_syscore_ops = { ++ .resume = lapic_resume, ++ .suspend = lapic_suspend, ++}; ++ ++static void apic_pm_activate(void) ++{ ++ apic_pm_state.active = 1; ++} ++ ++static int __init init_lapic_sysfs(void) ++{ ++ /* XXX: remove suspend/resume procs if !apic_pm_state.active? */ ++ if (boot_cpu_has(X86_FEATURE_APIC)) ++ register_syscore_ops(&lapic_syscore_ops); ++ ++ return 0; ++} ++ ++/* local apic needs to resume before other devices access its registers. */ ++core_initcall(init_lapic_sysfs); ++ ++#else /* CONFIG_PM */ ++ ++static void apic_pm_activate(void) { } ++ ++#endif /* CONFIG_PM */ ++ ++#ifdef CONFIG_X86_64 ++ ++static int multi_checked; ++static int multi; ++ ++static int set_multi(const struct dmi_system_id *d) ++{ ++ if (multi) ++ return 0; ++ pr_info("APIC: %s detected, Multi Chassis\n", d->ident); ++ multi = 1; ++ return 0; ++} ++ ++static const struct dmi_system_id multi_dmi_table[] = { ++ { ++ .callback = set_multi, ++ .ident = "IBM System Summit2", ++ .matches = { ++ DMI_MATCH(DMI_SYS_VENDOR, "IBM"), ++ DMI_MATCH(DMI_PRODUCT_NAME, "Summit2"), ++ }, ++ }, ++ {} ++}; ++ ++static void dmi_check_multi(void) ++{ ++ if (multi_checked) ++ return; ++ ++ dmi_check_system(multi_dmi_table); ++ multi_checked = 1; ++} ++ ++/* ++ * apic_is_clustered_box() -- Check if we can expect good TSC ++ * ++ * Thus far, the major user of this is IBM's Summit2 series: ++ * Clustered boxes may have unsynced TSC problems if they are ++ * multi-chassis. ++ * Use DMI to check them ++ */ ++int apic_is_clustered_box(void) ++{ ++ dmi_check_multi(); ++ return multi; ++} ++#endif ++ ++/* ++ * APIC command line parameters ++ */ ++static int __init setup_disableapic(char *arg) ++{ ++ disable_apic = 1; ++ setup_clear_cpu_cap(X86_FEATURE_APIC); ++ return 0; ++} ++early_param("disableapic", setup_disableapic); ++ ++/* same as disableapic, for compatibility */ ++static int __init setup_nolapic(char *arg) ++{ ++ return setup_disableapic(arg); ++} ++early_param("nolapic", setup_nolapic); ++ ++static int __init parse_lapic_timer_c2_ok(char *arg) ++{ ++ local_apic_timer_c2_ok = 1; ++ return 0; ++} ++early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); ++ ++static int __init parse_disable_apic_timer(char *arg) ++{ ++ disable_apic_timer = 1; ++ return 0; ++} ++early_param("noapictimer", parse_disable_apic_timer); ++ ++static int __init parse_nolapic_timer(char *arg) ++{ ++ disable_apic_timer = 1; ++ return 0; ++} ++early_param("nolapic_timer", parse_nolapic_timer); ++ ++static int __init apic_set_verbosity(char *arg) ++{ ++ if (!arg) { ++#ifdef CONFIG_X86_64 ++ skip_ioapic_setup = 0; ++ return 0; ++#endif ++ return -EINVAL; ++ } ++ ++ if (strcmp("debug", arg) == 0) ++ apic_verbosity = APIC_DEBUG; ++ else if (strcmp("verbose", arg) == 0) ++ apic_verbosity = APIC_VERBOSE; ++#ifdef CONFIG_X86_64 ++ else { ++ pr_warning("APIC Verbosity level %s not recognised" ++ " use apic=verbose or apic=debug\n", arg); ++ return -EINVAL; ++ } ++#endif ++ ++ return 0; ++} ++early_param("apic", apic_set_verbosity); ++ ++static int __init lapic_insert_resource(void) ++{ ++ if (!apic_phys) ++ return -1; ++ ++ /* Put local APIC into the resource map. */ ++ lapic_resource.start = apic_phys; ++ lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1; ++ insert_resource(&iomem_resource, &lapic_resource); ++ ++ return 0; ++} ++ ++/* ++ * need call insert after e820__reserve_resources() ++ * that is using request_resource ++ */ ++late_initcall(lapic_insert_resource); ++ ++static int __init apic_set_disabled_cpu_apicid(char *arg) ++{ ++ if (!arg || !get_option(&arg, &disabled_cpu_apicid)) ++ return -EINVAL; ++ ++ return 0; ++} ++early_param("disable_cpu_apicid", apic_set_disabled_cpu_apicid); ++ ++static int __init apic_set_extnmi(char *arg) ++{ ++ if (!arg) ++ return -EINVAL; ++ ++ if (!strncmp("all", arg, 3)) ++ apic_extnmi = APIC_EXTNMI_ALL; ++ else if (!strncmp("none", arg, 4)) ++ apic_extnmi = APIC_EXTNMI_NONE; ++ else if (!strncmp("bsp", arg, 3)) ++ apic_extnmi = APIC_EXTNMI_BSP; ++ else { ++ pr_warn("Unknown external NMI delivery mode `%s' ignored\n", arg); ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++early_param("apic_extnmi", apic_set_extnmi); +diff -uprN kernel/arch/x86/kernel/apic/apic_flat_64.c kernel_new/arch/x86/kernel/apic/apic_flat_64.c +--- kernel/arch/x86/kernel/apic/apic_flat_64.c 2020-12-21 21:59:17.000000000 +0800 ++++ kernel_new/arch/x86/kernel/apic/apic_flat_64.c 2021-04-01 18:28:07.653863289 +0800 +@@ -58,9 +58,9 @@ static void _flat_send_IPI_mask(unsigned + { + unsigned long flags; + +- local_irq_save(flags); ++ flags = hard_local_irq_save(); + __default_send_IPI_dest_field(mask, vector, apic->dest_logical); +- local_irq_restore(flags); ++ hard_local_irq_restore(flags); + } + + static void flat_send_IPI_mask(const struct cpumask *cpumask, int vector) +diff -uprN kernel/arch/x86/kernel/apic/io_apic.c kernel_new/arch/x86/kernel/apic/io_apic.c +--- kernel/arch/x86/kernel/apic/io_apic.c 2020-12-21 21:59:17.000000000 +0800 ++++ kernel_new/arch/x86/kernel/apic/io_apic.c 2021-04-01 18:28:07.654863288 +0800 +@@ -77,7 +77,7 @@ + #define for_each_irq_pin(entry, head) \ + list_for_each_entry(entry, &head, list) + +-static DEFINE_RAW_SPINLOCK(ioapic_lock); ++static IPIPE_DEFINE_RAW_SPINLOCK(ioapic_lock); + static DEFINE_MUTEX(ioapic_mutex); + static unsigned int ioapic_dynirq_base; + static int ioapic_initialized; +@@ -465,13 +465,19 @@ static void io_apic_sync(struct irq_pin_ + readl(&io_apic->data); + } + ++static inline void __mask_ioapic(struct mp_chip_data *data) ++{ ++ io_apic_modify_irq(data, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); ++} ++ + static void mask_ioapic_irq(struct irq_data *irq_data) + { + struct mp_chip_data *data = irq_data->chip_data; + unsigned long flags; + + raw_spin_lock_irqsave(&ioapic_lock, flags); +- io_apic_modify_irq(data, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); ++ ipipe_lock_irq(irq_data->irq); ++ __mask_ioapic(data); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); + } + +@@ -487,6 +493,7 @@ static void unmask_ioapic_irq(struct irq + + raw_spin_lock_irqsave(&ioapic_lock, flags); + __unmask_ioapic(data); ++ ipipe_unlock_irq(irq_data->irq); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); + } + +@@ -530,14 +537,20 @@ static void __eoi_ioapic_pin(int apic, i + } + } + +-static void eoi_ioapic_pin(int vector, struct mp_chip_data *data) ++static void _eoi_ioapic_pin(int vector, struct mp_chip_data *data) + { +- unsigned long flags; + struct irq_pin_list *entry; + +- raw_spin_lock_irqsave(&ioapic_lock, flags); + for_each_irq_pin(entry, data->irq_2_pin) + __eoi_ioapic_pin(entry->apic, entry->pin, vector); ++} ++ ++void eoi_ioapic_pin(int vector, struct mp_chip_data *data) ++{ ++ unsigned long flags; ++ ++ raw_spin_lock_irqsave(&ioapic_lock, flags); ++ _eoi_ioapic_pin(vector, data); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); + } + +@@ -1203,6 +1216,19 @@ EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector + + static struct irq_chip ioapic_chip, ioapic_ir_chip; + ++#ifdef CONFIG_IPIPE ++static void startup_legacy_irq(unsigned irq) ++{ ++ unsigned long flags; ++ legacy_pic->mask(irq); ++ flags = hard_local_irq_save(); ++ __ipipe_unlock_irq(irq); ++ hard_local_irq_restore(flags); ++} ++#else /* !CONFIG_IPIPE */ ++#define startup_legacy_irq(irq) legacy_pic->mask(irq) ++#endif /* !CONFIG_IPIPE */ ++ + static void __init setup_IO_APIC_irqs(void) + { + unsigned int ioapic, pin; +@@ -1686,11 +1712,12 @@ static unsigned int startup_ioapic_irq(s + + raw_spin_lock_irqsave(&ioapic_lock, flags); + if (irq < nr_legacy_irqs()) { +- legacy_pic->mask(irq); ++ startup_legacy_irq(irq); + if (legacy_pic->irq_pending(irq)) + was_pending = 1; + } + __unmask_ioapic(data->chip_data); ++ ipipe_unlock_irq(irq); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); + + return was_pending; +@@ -1698,7 +1725,7 @@ static unsigned int startup_ioapic_irq(s + + atomic_t irq_mis_count; + +-#ifdef CONFIG_GENERIC_PENDING_IRQ ++#if defined(CONFIG_GENERIC_PENDING_IRQ) || (defined(CONFIG_IPIPE) && defined(CONFIG_SMP)) + static bool io_apic_level_ack_pending(struct mp_chip_data *data) + { + struct irq_pin_list *entry; +@@ -1783,9 +1810,9 @@ static void ioapic_ack_level(struct irq_ + { + struct irq_cfg *cfg = irqd_cfg(irq_data); + unsigned long v; +- bool masked; + int i; +- ++#ifndef CONFIG_IPIPE ++ bool masked; + irq_complete_move(cfg); + masked = ioapic_irqd_mask(irq_data); + +@@ -1843,6 +1870,24 @@ static void ioapic_ack_level(struct irq_ + } + + ioapic_irqd_unmask(irq_data, masked); ++#else /* CONFIG_IPIPE */ ++ /* ++ * Prevent low priority IRQs grabbed by high priority domains ++ * from being delayed, waiting for a high priority interrupt ++ * handler running in a low priority domain to complete. ++ * This code assumes hw interrupts off. ++ */ ++ i = cfg->vector; ++ v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); ++ if (unlikely(!(v & (1 << (i & 0x1f))))) { ++ /* IO-APIC erratum: see comment above. */ ++ atomic_inc(&irq_mis_count); ++ raw_spin_lock(&ioapic_lock); ++ _eoi_ioapic_pin(cfg->vector, irq_data->chip_data); ++ raw_spin_unlock(&ioapic_lock); ++ } ++ __ack_APIC_irq(); ++#endif /* CONFIG_IPIPE */ + } + + static void ioapic_ir_ack_level(struct irq_data *irq_data) +@@ -1938,6 +1983,69 @@ static int ioapic_irq_get_chip_state(str + return 0; + } + ++#ifdef CONFIG_IPIPE ++ ++#ifdef CONFIG_SMP ++ ++void move_xxapic_irq(struct irq_data *irq_data) ++{ ++ unsigned int irq = irq_data->irq; ++ struct irq_desc *desc = irq_to_desc(irq); ++ struct mp_chip_data *data = irq_data->chip_data; ++ struct irq_cfg *cfg = irqd_cfg(irq_data); ++ ++ if (desc->handle_irq == &handle_edge_irq) { ++ raw_spin_lock(&desc->lock); ++ irq_complete_move(cfg); ++ irq_move_irq(irq_data); ++ raw_spin_unlock(&desc->lock); ++ } else if (desc->handle_irq == &handle_fasteoi_irq) { ++ raw_spin_lock(&desc->lock); ++ irq_complete_move(cfg); ++ if (unlikely(irqd_is_setaffinity_pending(irq_data))) { ++ if (!io_apic_level_ack_pending(data)) ++ irq_move_masked_irq(irq_data); ++ unmask_ioapic_irq(irq_data); ++ } ++ raw_spin_unlock(&desc->lock); ++ } else ++ WARN_ON_ONCE(1); ++} ++ ++#endif /* CONFIG_SMP */ ++ ++static void hold_ioapic_irq(struct irq_data *irq_data) ++{ ++ struct mp_chip_data *data = irq_data->chip_data; ++ ++ raw_spin_lock(&ioapic_lock); ++ __mask_ioapic(data); ++ raw_spin_unlock(&ioapic_lock); ++ ioapic_ack_level(irq_data); ++} ++ ++static void hold_ioapic_ir_irq(struct irq_data *irq_data) ++{ ++ struct mp_chip_data *data = irq_data->chip_data; ++ ++ raw_spin_lock(&ioapic_lock); ++ __mask_ioapic(data); ++ raw_spin_unlock(&ioapic_lock); ++ ioapic_ir_ack_level(irq_data); ++} ++ ++static void release_ioapic_irq(struct irq_data *irq_data) ++{ ++ struct mp_chip_data *data = irq_data->chip_data; ++ unsigned long flags; ++ ++ raw_spin_lock_irqsave(&ioapic_lock, flags); ++ __unmask_ioapic(data); ++ raw_spin_unlock_irqrestore(&ioapic_lock, flags); ++} ++ ++#endif /* CONFIG_IPIPE */ ++ + static struct irq_chip ioapic_chip __read_mostly = { + .name = "IO-APIC", + .irq_startup = startup_ioapic_irq, +@@ -1948,6 +2056,13 @@ static struct irq_chip ioapic_chip __rea + .irq_set_affinity = ioapic_set_affinity, + .irq_retrigger = irq_chip_retrigger_hierarchy, + .irq_get_irqchip_state = ioapic_irq_get_chip_state, ++#ifdef CONFIG_IPIPE ++#ifdef CONFIG_SMP ++ .irq_move = move_xxapic_irq, ++#endif ++ .irq_hold = hold_ioapic_irq, ++ .irq_release = release_ioapic_irq, ++#endif + .flags = IRQCHIP_SKIP_SET_WAKE, + }; + +@@ -1961,6 +2076,13 @@ static struct irq_chip ioapic_ir_chip __ + .irq_set_affinity = ioapic_set_affinity, + .irq_retrigger = irq_chip_retrigger_hierarchy, + .irq_get_irqchip_state = ioapic_irq_get_chip_state, ++#ifdef CONFIG_IPIPE ++#ifdef CONFIG_SMP ++ .irq_move = move_xxapic_irq, ++#endif ++ .irq_hold = hold_ioapic_ir_irq, ++ .irq_release = release_ioapic_irq, ++#endif + .flags = IRQCHIP_SKIP_SET_WAKE, + }; + +@@ -1992,23 +2114,29 @@ static inline void init_IO_APIC_traps(vo + + static void mask_lapic_irq(struct irq_data *data) + { +- unsigned long v; ++ unsigned long v, flags; + ++ flags = hard_cond_local_irq_save(); ++ ipipe_lock_irq(data->irq); + v = apic_read(APIC_LVT0); + apic_write(APIC_LVT0, v | APIC_LVT_MASKED); ++ hard_cond_local_irq_restore(flags); + } + + static void unmask_lapic_irq(struct irq_data *data) + { +- unsigned long v; ++ unsigned long v, flags; + ++ flags = hard_cond_local_irq_save(); + v = apic_read(APIC_LVT0); + apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED); ++ ipipe_unlock_irq(data->irq); ++ hard_cond_local_irq_restore(flags); + } + + static void ack_lapic_irq(struct irq_data *data) + { +- ack_APIC_irq(); ++ __ack_APIC_irq(); + } + + static struct irq_chip lapic_chip __read_mostly = { +@@ -2016,6 +2144,9 @@ static struct irq_chip lapic_chip __read + .irq_mask = mask_lapic_irq, + .irq_unmask = unmask_lapic_irq, + .irq_ack = ack_lapic_irq, ++#if defined(CONFIG_IPIPE) && defined(CONFIG_SMP) ++ .irq_move = move_xxapic_irq, ++#endif + }; + + static void lapic_register_intr(int irq) +@@ -2135,7 +2266,7 @@ static inline void __init check_timer(vo + /* + * get/set the timer IRQ vector: + */ +- legacy_pic->mask(0); ++ startup_legacy_irq(0); + + /* + * As IRQ0 is to be enabled in the 8259A, the virtual +@@ -2232,6 +2363,10 @@ static inline void __init check_timer(vo + "...trying to set up timer as Virtual Wire IRQ...\n"); + + lapic_register_intr(0); ++#if defined(CONFIG_IPIPE) && defined(CONFIG_X86_64) ++ irq_to_desc(0)->ipipe_ack = __ipipe_ack_edge_irq; ++ irq_to_desc(0)->ipipe_end = __ipipe_nop_irq; ++#endif + apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ + legacy_pic->unmask(0); + +@@ -2240,7 +2375,7 @@ static inline void __init check_timer(vo + goto out; + } + local_irq_disable(); +- legacy_pic->mask(0); ++ startup_legacy_irq(0); + apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector); + apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n"); + +@@ -2612,6 +2747,21 @@ int acpi_get_override_irq(u32 gsi, int * + return 0; + } + ++#ifdef CONFIG_IPIPE ++unsigned int __ipipe_get_ioapic_irq_vector(int irq) ++{ ++ if (irq >= IPIPE_FIRST_APIC_IRQ && irq < IPIPE_NR_XIRQS) ++ return ipipe_apic_irq_vector(irq); ++ else if (irq == IRQ_MOVE_CLEANUP_VECTOR) ++ return irq; ++ else { ++ if (irq_cfg(irq) == NULL) ++ return ISA_IRQ_VECTOR(irq); /* Assume ISA. */ ++ return irq_cfg(irq)->vector; ++ } ++} ++#endif /* CONFIG_IPIPE */ ++ + /* + * This function updates target affinity of IOAPIC interrupts to include + * the CPUs which came online during SMP bringup. +@@ -3006,7 +3156,7 @@ int mp_irqdomain_alloc(struct irq_domain + mp_setup_entry(cfg, data, info->ioapic_entry); + mp_register_handler(virq, data->trigger); + if (virq < nr_legacy_irqs()) +- legacy_pic->mask(virq); ++ startup_legacy_irq(virq); + local_irq_restore(flags); + + apic_printk(APIC_VERBOSE, KERN_DEBUG +diff -uprN kernel/arch/x86/kernel/apic/io_apic.c.orig kernel_new/arch/x86/kernel/apic/io_apic.c.orig +--- kernel/arch/x86/kernel/apic/io_apic.c.orig 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/arch/x86/kernel/apic/io_apic.c.orig 2020-12-21 21:59:17.000000000 +0800 +@@ -0,0 +1,3067 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * Intel IO-APIC support for multi-Pentium hosts. ++ * ++ * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo ++ * ++ * Many thanks to Stig Venaas for trying out countless experimental ++ * patches and reporting/debugging problems patiently! ++ * ++ * (c) 1999, Multiple IO-APIC support, developed by ++ * Ken-ichi Yaku and ++ * Hidemi Kishimoto , ++ * further tested and cleaned up by Zach Brown ++ * and Ingo Molnar ++ * ++ * Fixes ++ * Maciej W. Rozycki : Bits for genuine 82489DX APICs; ++ * thanks to Eric Gilmore ++ * and Rolf G. Tews ++ * for testing these extensively ++ * Paul Diefenbaugh : Added full ACPI support ++ * ++ * Historical information which is worth to be preserved: ++ * ++ * - SiS APIC rmw bug: ++ * ++ * We used to have a workaround for a bug in SiS chips which ++ * required to rewrite the index register for a read-modify-write ++ * operation as the chip lost the index information which was ++ * setup for the read already. We cache the data now, so that ++ * workaround has been removed. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include /* time_after() */ ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++ ++#define for_each_ioapic(idx) \ ++ for ((idx) = 0; (idx) < nr_ioapics; (idx)++) ++#define for_each_ioapic_reverse(idx) \ ++ for ((idx) = nr_ioapics - 1; (idx) >= 0; (idx)--) ++#define for_each_pin(idx, pin) \ ++ for ((pin) = 0; (pin) < ioapics[(idx)].nr_registers; (pin)++) ++#define for_each_ioapic_pin(idx, pin) \ ++ for_each_ioapic((idx)) \ ++ for_each_pin((idx), (pin)) ++#define for_each_irq_pin(entry, head) \ ++ list_for_each_entry(entry, &head, list) ++ ++static DEFINE_RAW_SPINLOCK(ioapic_lock); ++static DEFINE_MUTEX(ioapic_mutex); ++static unsigned int ioapic_dynirq_base; ++static int ioapic_initialized; ++ ++struct irq_pin_list { ++ struct list_head list; ++ int apic, pin; ++}; ++ ++struct mp_chip_data { ++ struct list_head irq_2_pin; ++ struct IO_APIC_route_entry entry; ++ int trigger; ++ int polarity; ++ u32 count; ++ bool isa_irq; ++}; ++ ++struct mp_ioapic_gsi { ++ u32 gsi_base; ++ u32 gsi_end; ++}; ++ ++static struct ioapic { ++ /* ++ * # of IRQ routing registers ++ */ ++ int nr_registers; ++ /* ++ * Saved state during suspend/resume, or while enabling intr-remap. ++ */ ++ struct IO_APIC_route_entry *saved_registers; ++ /* I/O APIC config */ ++ struct mpc_ioapic mp_config; ++ /* IO APIC gsi routing info */ ++ struct mp_ioapic_gsi gsi_config; ++ struct ioapic_domain_cfg irqdomain_cfg; ++ struct irq_domain *irqdomain; ++ struct resource *iomem_res; ++} ioapics[MAX_IO_APICS]; ++ ++#define mpc_ioapic_ver(ioapic_idx) ioapics[ioapic_idx].mp_config.apicver ++ ++int mpc_ioapic_id(int ioapic_idx) ++{ ++ return ioapics[ioapic_idx].mp_config.apicid; ++} ++ ++unsigned int mpc_ioapic_addr(int ioapic_idx) ++{ ++ return ioapics[ioapic_idx].mp_config.apicaddr; ++} ++ ++static inline struct mp_ioapic_gsi *mp_ioapic_gsi_routing(int ioapic_idx) ++{ ++ return &ioapics[ioapic_idx].gsi_config; ++} ++ ++static inline int mp_ioapic_pin_count(int ioapic) ++{ ++ struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(ioapic); ++ ++ return gsi_cfg->gsi_end - gsi_cfg->gsi_base + 1; ++} ++ ++static inline u32 mp_pin_to_gsi(int ioapic, int pin) ++{ ++ return mp_ioapic_gsi_routing(ioapic)->gsi_base + pin; ++} ++ ++static inline bool mp_is_legacy_irq(int irq) ++{ ++ return irq >= 0 && irq < nr_legacy_irqs(); ++} ++ ++/* ++ * Initialize all legacy IRQs and all pins on the first IOAPIC ++ * if we have legacy interrupt controller. Kernel boot option "pirq=" ++ * may rely on non-legacy pins on the first IOAPIC. ++ */ ++static inline int mp_init_irq_at_boot(int ioapic, int irq) ++{ ++ if (!nr_legacy_irqs()) ++ return 0; ++ ++ return ioapic == 0 || mp_is_legacy_irq(irq); ++} ++ ++static inline struct irq_domain *mp_ioapic_irqdomain(int ioapic) ++{ ++ return ioapics[ioapic].irqdomain; ++} ++ ++int nr_ioapics; ++ ++/* The one past the highest gsi number used */ ++u32 gsi_top; ++ ++/* MP IRQ source entries */ ++struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES]; ++ ++/* # of MP IRQ source entries */ ++int mp_irq_entries; ++ ++#ifdef CONFIG_EISA ++int mp_bus_id_to_type[MAX_MP_BUSSES]; ++#endif ++ ++DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); ++ ++int skip_ioapic_setup; ++ ++/** ++ * disable_ioapic_support() - disables ioapic support at runtime ++ */ ++void disable_ioapic_support(void) ++{ ++#ifdef CONFIG_PCI ++ noioapicquirk = 1; ++ noioapicreroute = -1; ++#endif ++ skip_ioapic_setup = 1; ++} ++ ++static int __init parse_noapic(char *str) ++{ ++ /* disable IO-APIC */ ++ disable_ioapic_support(); ++ return 0; ++} ++early_param("noapic", parse_noapic); ++ ++/* Will be called in mpparse/acpi/sfi codes for saving IRQ info */ ++void mp_save_irq(struct mpc_intsrc *m) ++{ ++ int i; ++ ++ apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x," ++ " IRQ %02x, APIC ID %x, APIC INT %02x\n", ++ m->irqtype, m->irqflag & 3, (m->irqflag >> 2) & 3, m->srcbus, ++ m->srcbusirq, m->dstapic, m->dstirq); ++ ++ for (i = 0; i < mp_irq_entries; i++) { ++ if (!memcmp(&mp_irqs[i], m, sizeof(*m))) ++ return; ++ } ++ ++ memcpy(&mp_irqs[mp_irq_entries], m, sizeof(*m)); ++ if (++mp_irq_entries == MAX_IRQ_SOURCES) ++ panic("Max # of irq sources exceeded!!\n"); ++} ++ ++static void alloc_ioapic_saved_registers(int idx) ++{ ++ size_t size; ++ ++ if (ioapics[idx].saved_registers) ++ return; ++ ++ size = sizeof(struct IO_APIC_route_entry) * ioapics[idx].nr_registers; ++ ioapics[idx].saved_registers = kzalloc(size, GFP_KERNEL); ++ if (!ioapics[idx].saved_registers) ++ pr_err("IOAPIC %d: suspend/resume impossible!\n", idx); ++} ++ ++static void free_ioapic_saved_registers(int idx) ++{ ++ kfree(ioapics[idx].saved_registers); ++ ioapics[idx].saved_registers = NULL; ++} ++ ++int __init arch_early_ioapic_init(void) ++{ ++ int i; ++ ++ if (!nr_legacy_irqs()) ++ io_apic_irqs = ~0UL; ++ ++ for_each_ioapic(i) ++ alloc_ioapic_saved_registers(i); ++ ++ return 0; ++} ++ ++struct io_apic { ++ unsigned int index; ++ unsigned int unused[3]; ++ unsigned int data; ++ unsigned int unused2[11]; ++ unsigned int eoi; ++}; ++ ++static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) ++{ ++ return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx) ++ + (mpc_ioapic_addr(idx) & ~PAGE_MASK); ++} ++ ++static inline void io_apic_eoi(unsigned int apic, unsigned int vector) ++{ ++ struct io_apic __iomem *io_apic = io_apic_base(apic); ++ writel(vector, &io_apic->eoi); ++} ++ ++unsigned int native_io_apic_read(unsigned int apic, unsigned int reg) ++{ ++ struct io_apic __iomem *io_apic = io_apic_base(apic); ++ writel(reg, &io_apic->index); ++ return readl(&io_apic->data); ++} ++ ++static void io_apic_write(unsigned int apic, unsigned int reg, ++ unsigned int value) ++{ ++ struct io_apic __iomem *io_apic = io_apic_base(apic); ++ ++ writel(reg, &io_apic->index); ++ writel(value, &io_apic->data); ++} ++ ++union entry_union { ++ struct { u32 w1, w2; }; ++ struct IO_APIC_route_entry entry; ++}; ++ ++static struct IO_APIC_route_entry __ioapic_read_entry(int apic, int pin) ++{ ++ union entry_union eu; ++ ++ eu.w1 = io_apic_read(apic, 0x10 + 2 * pin); ++ eu.w2 = io_apic_read(apic, 0x11 + 2 * pin); ++ ++ return eu.entry; ++} ++ ++static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin) ++{ ++ union entry_union eu; ++ unsigned long flags; ++ ++ raw_spin_lock_irqsave(&ioapic_lock, flags); ++ eu.entry = __ioapic_read_entry(apic, pin); ++ raw_spin_unlock_irqrestore(&ioapic_lock, flags); ++ ++ return eu.entry; ++} ++ ++/* ++ * When we write a new IO APIC routing entry, we need to write the high ++ * word first! If the mask bit in the low word is clear, we will enable ++ * the interrupt, and we need to make sure the entry is fully populated ++ * before that happens. ++ */ ++static void __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) ++{ ++ union entry_union eu = {{0, 0}}; ++ ++ eu.entry = e; ++ io_apic_write(apic, 0x11 + 2*pin, eu.w2); ++ io_apic_write(apic, 0x10 + 2*pin, eu.w1); ++} ++ ++static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) ++{ ++ unsigned long flags; ++ ++ raw_spin_lock_irqsave(&ioapic_lock, flags); ++ __ioapic_write_entry(apic, pin, e); ++ raw_spin_unlock_irqrestore(&ioapic_lock, flags); ++} ++ ++/* ++ * When we mask an IO APIC routing entry, we need to write the low ++ * word first, in order to set the mask bit before we change the ++ * high bits! ++ */ ++static void ioapic_mask_entry(int apic, int pin) ++{ ++ unsigned long flags; ++ union entry_union eu = { .entry.mask = IOAPIC_MASKED }; ++ ++ raw_spin_lock_irqsave(&ioapic_lock, flags); ++ io_apic_write(apic, 0x10 + 2*pin, eu.w1); ++ io_apic_write(apic, 0x11 + 2*pin, eu.w2); ++ raw_spin_unlock_irqrestore(&ioapic_lock, flags); ++} ++ ++/* ++ * The common case is 1:1 IRQ<->pin mappings. Sometimes there are ++ * shared ISA-space IRQs, so we have to support them. We are super ++ * fast in the common case, and fast for shared ISA-space IRQs. ++ */ ++static int __add_pin_to_irq_node(struct mp_chip_data *data, ++ int node, int apic, int pin) ++{ ++ struct irq_pin_list *entry; ++ ++ /* don't allow duplicates */ ++ for_each_irq_pin(entry, data->irq_2_pin) ++ if (entry->apic == apic && entry->pin == pin) ++ return 0; ++ ++ entry = kzalloc_node(sizeof(struct irq_pin_list), GFP_ATOMIC, node); ++ if (!entry) { ++ pr_err("can not alloc irq_pin_list (%d,%d,%d)\n", ++ node, apic, pin); ++ return -ENOMEM; ++ } ++ entry->apic = apic; ++ entry->pin = pin; ++ list_add_tail(&entry->list, &data->irq_2_pin); ++ ++ return 0; ++} ++ ++static void __remove_pin_from_irq(struct mp_chip_data *data, int apic, int pin) ++{ ++ struct irq_pin_list *tmp, *entry; ++ ++ list_for_each_entry_safe(entry, tmp, &data->irq_2_pin, list) ++ if (entry->apic == apic && entry->pin == pin) { ++ list_del(&entry->list); ++ kfree(entry); ++ return; ++ } ++} ++ ++static void add_pin_to_irq_node(struct mp_chip_data *data, ++ int node, int apic, int pin) ++{ ++ if (__add_pin_to_irq_node(data, node, apic, pin)) ++ panic("IO-APIC: failed to add irq-pin. Can not proceed\n"); ++} ++ ++/* ++ * Reroute an IRQ to a different pin. ++ */ ++static void __init replace_pin_at_irq_node(struct mp_chip_data *data, int node, ++ int oldapic, int oldpin, ++ int newapic, int newpin) ++{ ++ struct irq_pin_list *entry; ++ ++ for_each_irq_pin(entry, data->irq_2_pin) { ++ if (entry->apic == oldapic && entry->pin == oldpin) { ++ entry->apic = newapic; ++ entry->pin = newpin; ++ /* every one is different, right? */ ++ return; ++ } ++ } ++ ++ /* old apic/pin didn't exist, so just add new ones */ ++ add_pin_to_irq_node(data, node, newapic, newpin); ++} ++ ++static void io_apic_modify_irq(struct mp_chip_data *data, ++ int mask_and, int mask_or, ++ void (*final)(struct irq_pin_list *entry)) ++{ ++ union entry_union eu; ++ struct irq_pin_list *entry; ++ ++ eu.entry = data->entry; ++ eu.w1 &= mask_and; ++ eu.w1 |= mask_or; ++ data->entry = eu.entry; ++ ++ for_each_irq_pin(entry, data->irq_2_pin) { ++ io_apic_write(entry->apic, 0x10 + 2 * entry->pin, eu.w1); ++ if (final) ++ final(entry); ++ } ++} ++ ++static void io_apic_sync(struct irq_pin_list *entry) ++{ ++ /* ++ * Synchronize the IO-APIC and the CPU by doing ++ * a dummy read from the IO-APIC ++ */ ++ struct io_apic __iomem *io_apic; ++ ++ io_apic = io_apic_base(entry->apic); ++ readl(&io_apic->data); ++} ++ ++static void mask_ioapic_irq(struct irq_data *irq_data) ++{ ++ struct mp_chip_data *data = irq_data->chip_data; ++ unsigned long flags; ++ ++ raw_spin_lock_irqsave(&ioapic_lock, flags); ++ io_apic_modify_irq(data, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); ++ raw_spin_unlock_irqrestore(&ioapic_lock, flags); ++} ++ ++static void __unmask_ioapic(struct mp_chip_data *data) ++{ ++ io_apic_modify_irq(data, ~IO_APIC_REDIR_MASKED, 0, NULL); ++} ++ ++static void unmask_ioapic_irq(struct irq_data *irq_data) ++{ ++ struct mp_chip_data *data = irq_data->chip_data; ++ unsigned long flags; ++ ++ raw_spin_lock_irqsave(&ioapic_lock, flags); ++ __unmask_ioapic(data); ++ raw_spin_unlock_irqrestore(&ioapic_lock, flags); ++} ++ ++/* ++ * IO-APIC versions below 0x20 don't support EOI register. ++ * For the record, here is the information about various versions: ++ * 0Xh 82489DX ++ * 1Xh I/OAPIC or I/O(x)APIC which are not PCI 2.2 Compliant ++ * 2Xh I/O(x)APIC which is PCI 2.2 Compliant ++ * 30h-FFh Reserved ++ * ++ * Some of the Intel ICH Specs (ICH2 to ICH5) documents the io-apic ++ * version as 0x2. This is an error with documentation and these ICH chips ++ * use io-apic's of version 0x20. ++ * ++ * For IO-APIC's with EOI register, we use that to do an explicit EOI. ++ * Otherwise, we simulate the EOI message manually by changing the trigger ++ * mode to edge and then back to level, with RTE being masked during this. ++ */ ++static void __eoi_ioapic_pin(int apic, int pin, int vector) ++{ ++ if (mpc_ioapic_ver(apic) >= 0x20) { ++ io_apic_eoi(apic, vector); ++ } else { ++ struct IO_APIC_route_entry entry, entry1; ++ ++ entry = entry1 = __ioapic_read_entry(apic, pin); ++ ++ /* ++ * Mask the entry and change the trigger mode to edge. ++ */ ++ entry1.mask = IOAPIC_MASKED; ++ entry1.trigger = IOAPIC_EDGE; ++ ++ __ioapic_write_entry(apic, pin, entry1); ++ ++ /* ++ * Restore the previous level triggered entry. ++ */ ++ __ioapic_write_entry(apic, pin, entry); ++ } ++} ++ ++static void eoi_ioapic_pin(int vector, struct mp_chip_data *data) ++{ ++ unsigned long flags; ++ struct irq_pin_list *entry; ++ ++ raw_spin_lock_irqsave(&ioapic_lock, flags); ++ for_each_irq_pin(entry, data->irq_2_pin) ++ __eoi_ioapic_pin(entry->apic, entry->pin, vector); ++ raw_spin_unlock_irqrestore(&ioapic_lock, flags); ++} ++ ++static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) ++{ ++ struct IO_APIC_route_entry entry; ++ ++ /* Check delivery_mode to be sure we're not clearing an SMI pin */ ++ entry = ioapic_read_entry(apic, pin); ++ if (entry.delivery_mode == dest_SMI) ++ return; ++ ++ /* ++ * Make sure the entry is masked and re-read the contents to check ++ * if it is a level triggered pin and if the remote-IRR is set. ++ */ ++ if (entry.mask == IOAPIC_UNMASKED) { ++ entry.mask = IOAPIC_MASKED; ++ ioapic_write_entry(apic, pin, entry); ++ entry = ioapic_read_entry(apic, pin); ++ } ++ ++ if (entry.irr) { ++ unsigned long flags; ++ ++ /* ++ * Make sure the trigger mode is set to level. Explicit EOI ++ * doesn't clear the remote-IRR if the trigger mode is not ++ * set to level. ++ */ ++ if (entry.trigger == IOAPIC_EDGE) { ++ entry.trigger = IOAPIC_LEVEL; ++ ioapic_write_entry(apic, pin, entry); ++ } ++ raw_spin_lock_irqsave(&ioapic_lock, flags); ++ __eoi_ioapic_pin(apic, pin, entry.vector); ++ raw_spin_unlock_irqrestore(&ioapic_lock, flags); ++ } ++ ++ /* ++ * Clear the rest of the bits in the IO-APIC RTE except for the mask ++ * bit. ++ */ ++ ioapic_mask_entry(apic, pin); ++ entry = ioapic_read_entry(apic, pin); ++ if (entry.irr) ++ pr_err("Unable to reset IRR for apic: %d, pin :%d\n", ++ mpc_ioapic_id(apic), pin); ++} ++ ++void clear_IO_APIC (void) ++{ ++ int apic, pin; ++ ++ for_each_ioapic_pin(apic, pin) ++ clear_IO_APIC_pin(apic, pin); ++} ++ ++#ifdef CONFIG_X86_32 ++/* ++ * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to ++ * specific CPU-side IRQs. ++ */ ++ ++#define MAX_PIRQS 8 ++static int pirq_entries[MAX_PIRQS] = { ++ [0 ... MAX_PIRQS - 1] = -1 ++}; ++ ++static int __init ioapic_pirq_setup(char *str) ++{ ++ int i, max; ++ int ints[MAX_PIRQS+1]; ++ ++ get_options(str, ARRAY_SIZE(ints), ints); ++ ++ apic_printk(APIC_VERBOSE, KERN_INFO ++ "PIRQ redirection, working around broken MP-BIOS.\n"); ++ max = MAX_PIRQS; ++ if (ints[0] < MAX_PIRQS) ++ max = ints[0]; ++ ++ for (i = 0; i < max; i++) { ++ apic_printk(APIC_VERBOSE, KERN_DEBUG ++ "... PIRQ%d -> IRQ %d\n", i, ints[i+1]); ++ /* ++ * PIRQs are mapped upside down, usually. ++ */ ++ pirq_entries[MAX_PIRQS-i-1] = ints[i+1]; ++ } ++ return 1; ++} ++ ++__setup("pirq=", ioapic_pirq_setup); ++#endif /* CONFIG_X86_32 */ ++ ++/* ++ * Saves all the IO-APIC RTE's ++ */ ++int save_ioapic_entries(void) ++{ ++ int apic, pin; ++ int err = 0; ++ ++ for_each_ioapic(apic) { ++ if (!ioapics[apic].saved_registers) { ++ err = -ENOMEM; ++ continue; ++ } ++ ++ for_each_pin(apic, pin) ++ ioapics[apic].saved_registers[pin] = ++ ioapic_read_entry(apic, pin); ++ } ++ ++ return err; ++} ++ ++/* ++ * Mask all IO APIC entries. ++ */ ++void mask_ioapic_entries(void) ++{ ++ int apic, pin; ++ ++ for_each_ioapic(apic) { ++ if (!ioapics[apic].saved_registers) ++ continue; ++ ++ for_each_pin(apic, pin) { ++ struct IO_APIC_route_entry entry; ++ ++ entry = ioapics[apic].saved_registers[pin]; ++ if (entry.mask == IOAPIC_UNMASKED) { ++ entry.mask = IOAPIC_MASKED; ++ ioapic_write_entry(apic, pin, entry); ++ } ++ } ++ } ++} ++ ++/* ++ * Restore IO APIC entries which was saved in the ioapic structure. ++ */ ++int restore_ioapic_entries(void) ++{ ++ int apic, pin; ++ ++ for_each_ioapic(apic) { ++ if (!ioapics[apic].saved_registers) ++ continue; ++ ++ for_each_pin(apic, pin) ++ ioapic_write_entry(apic, pin, ++ ioapics[apic].saved_registers[pin]); ++ } ++ return 0; ++} ++ ++/* ++ * Find the IRQ entry number of a certain pin. ++ */ ++static int find_irq_entry(int ioapic_idx, int pin, int type) ++{ ++ int i; ++ ++ for (i = 0; i < mp_irq_entries; i++) ++ if (mp_irqs[i].irqtype == type && ++ (mp_irqs[i].dstapic == mpc_ioapic_id(ioapic_idx) || ++ mp_irqs[i].dstapic == MP_APIC_ALL) && ++ mp_irqs[i].dstirq == pin) ++ return i; ++ ++ return -1; ++} ++ ++/* ++ * Find the pin to which IRQ[irq] (ISA) is connected ++ */ ++static int __init find_isa_irq_pin(int irq, int type) ++{ ++ int i; ++ ++ for (i = 0; i < mp_irq_entries; i++) { ++ int lbus = mp_irqs[i].srcbus; ++ ++ if (test_bit(lbus, mp_bus_not_pci) && ++ (mp_irqs[i].irqtype == type) && ++ (mp_irqs[i].srcbusirq == irq)) ++ ++ return mp_irqs[i].dstirq; ++ } ++ return -1; ++} ++ ++static int __init find_isa_irq_apic(int irq, int type) ++{ ++ int i; ++ ++ for (i = 0; i < mp_irq_entries; i++) { ++ int lbus = mp_irqs[i].srcbus; ++ ++ if (test_bit(lbus, mp_bus_not_pci) && ++ (mp_irqs[i].irqtype == type) && ++ (mp_irqs[i].srcbusirq == irq)) ++ break; ++ } ++ ++ if (i < mp_irq_entries) { ++ int ioapic_idx; ++ ++ for_each_ioapic(ioapic_idx) ++ if (mpc_ioapic_id(ioapic_idx) == mp_irqs[i].dstapic) ++ return ioapic_idx; ++ } ++ ++ return -1; ++} ++ ++#ifdef CONFIG_EISA ++/* ++ * EISA Edge/Level control register, ELCR ++ */ ++static int EISA_ELCR(unsigned int irq) ++{ ++ if (irq < nr_legacy_irqs()) { ++ unsigned int port = 0x4d0 + (irq >> 3); ++ return (inb(port) >> (irq & 7)) & 1; ++ } ++ apic_printk(APIC_VERBOSE, KERN_INFO ++ "Broken MPtable reports ISA irq %d\n", irq); ++ return 0; ++} ++ ++#endif ++ ++/* ISA interrupts are always active high edge triggered, ++ * when listed as conforming in the MP table. */ ++ ++#define default_ISA_trigger(idx) (IOAPIC_EDGE) ++#define default_ISA_polarity(idx) (IOAPIC_POL_HIGH) ++ ++/* EISA interrupts are always polarity zero and can be edge or level ++ * trigger depending on the ELCR value. If an interrupt is listed as ++ * EISA conforming in the MP table, that means its trigger type must ++ * be read in from the ELCR */ ++ ++#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].srcbusirq)) ++#define default_EISA_polarity(idx) default_ISA_polarity(idx) ++ ++/* PCI interrupts are always active low level triggered, ++ * when listed as conforming in the MP table. */ ++ ++#define default_PCI_trigger(idx) (IOAPIC_LEVEL) ++#define default_PCI_polarity(idx) (IOAPIC_POL_LOW) ++ ++static int irq_polarity(int idx) ++{ ++ int bus = mp_irqs[idx].srcbus; ++ ++ /* ++ * Determine IRQ line polarity (high active or low active): ++ */ ++ switch (mp_irqs[idx].irqflag & MP_IRQPOL_MASK) { ++ case MP_IRQPOL_DEFAULT: ++ /* conforms to spec, ie. bus-type dependent polarity */ ++ if (test_bit(bus, mp_bus_not_pci)) ++ return default_ISA_polarity(idx); ++ else ++ return default_PCI_polarity(idx); ++ case MP_IRQPOL_ACTIVE_HIGH: ++ return IOAPIC_POL_HIGH; ++ case MP_IRQPOL_RESERVED: ++ pr_warn("IOAPIC: Invalid polarity: 2, defaulting to low\n"); ++ case MP_IRQPOL_ACTIVE_LOW: ++ default: /* Pointless default required due to do gcc stupidity */ ++ return IOAPIC_POL_LOW; ++ } ++} ++ ++#ifdef CONFIG_EISA ++static int eisa_irq_trigger(int idx, int bus, int trigger) ++{ ++ switch (mp_bus_id_to_type[bus]) { ++ case MP_BUS_PCI: ++ case MP_BUS_ISA: ++ return trigger; ++ case MP_BUS_EISA: ++ return default_EISA_trigger(idx); ++ } ++ pr_warn("IOAPIC: Invalid srcbus: %d defaulting to level\n", bus); ++ return IOAPIC_LEVEL; ++} ++#else ++static inline int eisa_irq_trigger(int idx, int bus, int trigger) ++{ ++ return trigger; ++} ++#endif ++ ++static int irq_trigger(int idx) ++{ ++ int bus = mp_irqs[idx].srcbus; ++ int trigger; ++ ++ /* ++ * Determine IRQ trigger mode (edge or level sensitive): ++ */ ++ switch (mp_irqs[idx].irqflag & MP_IRQTRIG_MASK) { ++ case MP_IRQTRIG_DEFAULT: ++ /* conforms to spec, ie. bus-type dependent trigger mode */ ++ if (test_bit(bus, mp_bus_not_pci)) ++ trigger = default_ISA_trigger(idx); ++ else ++ trigger = default_PCI_trigger(idx); ++ /* Take EISA into account */ ++ return eisa_irq_trigger(idx, bus, trigger); ++ case MP_IRQTRIG_EDGE: ++ return IOAPIC_EDGE; ++ case MP_IRQTRIG_RESERVED: ++ pr_warn("IOAPIC: Invalid trigger mode 2 defaulting to level\n"); ++ case MP_IRQTRIG_LEVEL: ++ default: /* Pointless default required due to do gcc stupidity */ ++ return IOAPIC_LEVEL; ++ } ++} ++ ++void ioapic_set_alloc_attr(struct irq_alloc_info *info, int node, ++ int trigger, int polarity) ++{ ++ init_irq_alloc_info(info, NULL); ++ info->type = X86_IRQ_ALLOC_TYPE_IOAPIC; ++ info->ioapic_node = node; ++ info->ioapic_trigger = trigger; ++ info->ioapic_polarity = polarity; ++ info->ioapic_valid = 1; ++} ++ ++#ifndef CONFIG_ACPI ++int acpi_get_override_irq(u32 gsi, int *trigger, int *polarity); ++#endif ++ ++static void ioapic_copy_alloc_attr(struct irq_alloc_info *dst, ++ struct irq_alloc_info *src, ++ u32 gsi, int ioapic_idx, int pin) ++{ ++ int trigger, polarity; ++ ++ copy_irq_alloc_info(dst, src); ++ dst->type = X86_IRQ_ALLOC_TYPE_IOAPIC; ++ dst->ioapic_id = mpc_ioapic_id(ioapic_idx); ++ dst->ioapic_pin = pin; ++ dst->ioapic_valid = 1; ++ if (src && src->ioapic_valid) { ++ dst->ioapic_node = src->ioapic_node; ++ dst->ioapic_trigger = src->ioapic_trigger; ++ dst->ioapic_polarity = src->ioapic_polarity; ++ } else { ++ dst->ioapic_node = NUMA_NO_NODE; ++ if (acpi_get_override_irq(gsi, &trigger, &polarity) >= 0) { ++ dst->ioapic_trigger = trigger; ++ dst->ioapic_polarity = polarity; ++ } else { ++ /* ++ * PCI interrupts are always active low level ++ * triggered. ++ */ ++ dst->ioapic_trigger = IOAPIC_LEVEL; ++ dst->ioapic_polarity = IOAPIC_POL_LOW; ++ } ++ } ++} ++ ++static int ioapic_alloc_attr_node(struct irq_alloc_info *info) ++{ ++ return (info && info->ioapic_valid) ? info->ioapic_node : NUMA_NO_NODE; ++} ++ ++static void mp_register_handler(unsigned int irq, unsigned long trigger) ++{ ++ irq_flow_handler_t hdl; ++ bool fasteoi; ++ ++ if (trigger) { ++ irq_set_status_flags(irq, IRQ_LEVEL); ++ fasteoi = true; ++ } else { ++ irq_clear_status_flags(irq, IRQ_LEVEL); ++ fasteoi = false; ++ } ++ ++ hdl = fasteoi ? handle_fasteoi_irq : handle_edge_irq; ++ __irq_set_handler(irq, hdl, 0, fasteoi ? "fasteoi" : "edge"); ++} ++ ++static bool mp_check_pin_attr(int irq, struct irq_alloc_info *info) ++{ ++ struct mp_chip_data *data = irq_get_chip_data(irq); ++ ++ /* ++ * setup_IO_APIC_irqs() programs all legacy IRQs with default trigger ++ * and polarity attirbutes. So allow the first user to reprogram the ++ * pin with real trigger and polarity attributes. ++ */ ++ if (irq < nr_legacy_irqs() && data->count == 1) { ++ if (info->ioapic_trigger != data->trigger) ++ mp_register_handler(irq, info->ioapic_trigger); ++ data->entry.trigger = data->trigger = info->ioapic_trigger; ++ data->entry.polarity = data->polarity = info->ioapic_polarity; ++ } ++ ++ return data->trigger == info->ioapic_trigger && ++ data->polarity == info->ioapic_polarity; ++} ++ ++static int alloc_irq_from_domain(struct irq_domain *domain, int ioapic, u32 gsi, ++ struct irq_alloc_info *info) ++{ ++ bool legacy = false; ++ int irq = -1; ++ int type = ioapics[ioapic].irqdomain_cfg.type; ++ ++ switch (type) { ++ case IOAPIC_DOMAIN_LEGACY: ++ /* ++ * Dynamically allocate IRQ number for non-ISA IRQs in the first ++ * 16 GSIs on some weird platforms. ++ */ ++ if (!ioapic_initialized || gsi >= nr_legacy_irqs()) ++ irq = gsi; ++ legacy = mp_is_legacy_irq(irq); ++ break; ++ case IOAPIC_DOMAIN_STRICT: ++ irq = gsi; ++ break; ++ case IOAPIC_DOMAIN_DYNAMIC: ++ break; ++ default: ++ WARN(1, "ioapic: unknown irqdomain type %d\n", type); ++ return -1; ++ } ++ ++ return __irq_domain_alloc_irqs(domain, irq, 1, ++ ioapic_alloc_attr_node(info), ++ info, legacy, NULL); ++} ++ ++/* ++ * Need special handling for ISA IRQs because there may be multiple IOAPIC pins ++ * sharing the same ISA IRQ number and irqdomain only supports 1:1 mapping ++ * between IOAPIC pin and IRQ number. A typical IOAPIC has 24 pins, pin 0-15 are ++ * used for legacy IRQs and pin 16-23 are used for PCI IRQs (PIRQ A-H). ++ * When ACPI is disabled, only legacy IRQ numbers (IRQ0-15) are available, and ++ * some BIOSes may use MP Interrupt Source records to override IRQ numbers for ++ * PIRQs instead of reprogramming the interrupt routing logic. Thus there may be ++ * multiple pins sharing the same legacy IRQ number when ACPI is disabled. ++ */ ++static int alloc_isa_irq_from_domain(struct irq_domain *domain, ++ int irq, int ioapic, int pin, ++ struct irq_alloc_info *info) ++{ ++ struct mp_chip_data *data; ++ struct irq_data *irq_data = irq_get_irq_data(irq); ++ int node = ioapic_alloc_attr_node(info); ++ ++ /* ++ * Legacy ISA IRQ has already been allocated, just add pin to ++ * the pin list assoicated with this IRQ and program the IOAPIC ++ * entry. The IOAPIC entry ++ */ ++ if (irq_data && irq_data->parent_data) { ++ if (!mp_check_pin_attr(irq, info)) ++ return -EBUSY; ++ if (__add_pin_to_irq_node(irq_data->chip_data, node, ioapic, ++ info->ioapic_pin)) ++ return -ENOMEM; ++ } else { ++ info->flags |= X86_IRQ_ALLOC_LEGACY; ++ irq = __irq_domain_alloc_irqs(domain, irq, 1, node, info, true, ++ NULL); ++ if (irq >= 0) { ++ irq_data = irq_domain_get_irq_data(domain, irq); ++ data = irq_data->chip_data; ++ data->isa_irq = true; ++ } ++ } ++ ++ return irq; ++} ++ ++static int mp_map_pin_to_irq(u32 gsi, int idx, int ioapic, int pin, ++ unsigned int flags, struct irq_alloc_info *info) ++{ ++ int irq; ++ bool legacy = false; ++ struct irq_alloc_info tmp; ++ struct mp_chip_data *data; ++ struct irq_domain *domain = mp_ioapic_irqdomain(ioapic); ++ ++ if (!domain) ++ return -ENOSYS; ++ ++ if (idx >= 0 && test_bit(mp_irqs[idx].srcbus, mp_bus_not_pci)) { ++ irq = mp_irqs[idx].srcbusirq; ++ legacy = mp_is_legacy_irq(irq); ++ } ++ ++ mutex_lock(&ioapic_mutex); ++ if (!(flags & IOAPIC_MAP_ALLOC)) { ++ if (!legacy) { ++ irq = irq_find_mapping(domain, pin); ++ if (irq == 0) ++ irq = -ENOENT; ++ } ++ } else { ++ ioapic_copy_alloc_attr(&tmp, info, gsi, ioapic, pin); ++ if (legacy) ++ irq = alloc_isa_irq_from_domain(domain, irq, ++ ioapic, pin, &tmp); ++ else if ((irq = irq_find_mapping(domain, pin)) == 0) ++ irq = alloc_irq_from_domain(domain, ioapic, gsi, &tmp); ++ else if (!mp_check_pin_attr(irq, &tmp)) ++ irq = -EBUSY; ++ if (irq >= 0) { ++ data = irq_get_chip_data(irq); ++ data->count++; ++ } ++ } ++ mutex_unlock(&ioapic_mutex); ++ ++ return irq; ++} ++ ++static int pin_2_irq(int idx, int ioapic, int pin, unsigned int flags) ++{ ++ u32 gsi = mp_pin_to_gsi(ioapic, pin); ++ ++ /* ++ * Debugging check, we are in big trouble if this message pops up! ++ */ ++ if (mp_irqs[idx].dstirq != pin) ++ pr_err("broken BIOS or MPTABLE parser, ayiee!!\n"); ++ ++#ifdef CONFIG_X86_32 ++ /* ++ * PCI IRQ command line redirection. Yes, limits are hardcoded. ++ */ ++ if ((pin >= 16) && (pin <= 23)) { ++ if (pirq_entries[pin-16] != -1) { ++ if (!pirq_entries[pin-16]) { ++ apic_printk(APIC_VERBOSE, KERN_DEBUG ++ "disabling PIRQ%d\n", pin-16); ++ } else { ++ int irq = pirq_entries[pin-16]; ++ apic_printk(APIC_VERBOSE, KERN_DEBUG ++ "using PIRQ%d -> IRQ %d\n", ++ pin-16, irq); ++ return irq; ++ } ++ } ++ } ++#endif ++ ++ return mp_map_pin_to_irq(gsi, idx, ioapic, pin, flags, NULL); ++} ++ ++int mp_map_gsi_to_irq(u32 gsi, unsigned int flags, struct irq_alloc_info *info) ++{ ++ int ioapic, pin, idx; ++ ++ ioapic = mp_find_ioapic(gsi); ++ if (ioapic < 0) ++ return -ENODEV; ++ ++ pin = mp_find_ioapic_pin(ioapic, gsi); ++ idx = find_irq_entry(ioapic, pin, mp_INT); ++ if ((flags & IOAPIC_MAP_CHECK) && idx < 0) ++ return -ENODEV; ++ ++ return mp_map_pin_to_irq(gsi, idx, ioapic, pin, flags, info); ++} ++ ++void mp_unmap_irq(int irq) ++{ ++ struct irq_data *irq_data = irq_get_irq_data(irq); ++ struct mp_chip_data *data; ++ ++ if (!irq_data || !irq_data->domain) ++ return; ++ ++ data = irq_data->chip_data; ++ if (!data || data->isa_irq) ++ return; ++ ++ mutex_lock(&ioapic_mutex); ++ if (--data->count == 0) ++ irq_domain_free_irqs(irq, 1); ++ mutex_unlock(&ioapic_mutex); ++} ++ ++/* ++ * Find a specific PCI IRQ entry. ++ * Not an __init, possibly needed by modules ++ */ ++int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) ++{ ++ int irq, i, best_ioapic = -1, best_idx = -1; ++ ++ apic_printk(APIC_DEBUG, ++ "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n", ++ bus, slot, pin); ++ if (test_bit(bus, mp_bus_not_pci)) { ++ apic_printk(APIC_VERBOSE, ++ "PCI BIOS passed nonexistent PCI bus %d!\n", bus); ++ return -1; ++ } ++ ++ for (i = 0; i < mp_irq_entries; i++) { ++ int lbus = mp_irqs[i].srcbus; ++ int ioapic_idx, found = 0; ++ ++ if (bus != lbus || mp_irqs[i].irqtype != mp_INT || ++ slot != ((mp_irqs[i].srcbusirq >> 2) & 0x1f)) ++ continue; ++ ++ for_each_ioapic(ioapic_idx) ++ if (mpc_ioapic_id(ioapic_idx) == mp_irqs[i].dstapic || ++ mp_irqs[i].dstapic == MP_APIC_ALL) { ++ found = 1; ++ break; ++ } ++ if (!found) ++ continue; ++ ++ /* Skip ISA IRQs */ ++ irq = pin_2_irq(i, ioapic_idx, mp_irqs[i].dstirq, 0); ++ if (irq > 0 && !IO_APIC_IRQ(irq)) ++ continue; ++ ++ if (pin == (mp_irqs[i].srcbusirq & 3)) { ++ best_idx = i; ++ best_ioapic = ioapic_idx; ++ goto out; ++ } ++ ++ /* ++ * Use the first all-but-pin matching entry as a ++ * best-guess fuzzy result for broken mptables. ++ */ ++ if (best_idx < 0) { ++ best_idx = i; ++ best_ioapic = ioapic_idx; ++ } ++ } ++ if (best_idx < 0) ++ return -1; ++ ++out: ++ return pin_2_irq(best_idx, best_ioapic, mp_irqs[best_idx].dstirq, ++ IOAPIC_MAP_ALLOC); ++} ++EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); ++ ++static struct irq_chip ioapic_chip, ioapic_ir_chip; ++ ++static void __init setup_IO_APIC_irqs(void) ++{ ++ unsigned int ioapic, pin; ++ int idx; ++ ++ apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); ++ ++ for_each_ioapic_pin(ioapic, pin) { ++ idx = find_irq_entry(ioapic, pin, mp_INT); ++ if (idx < 0) ++ apic_printk(APIC_VERBOSE, ++ KERN_DEBUG " apic %d pin %d not connected\n", ++ mpc_ioapic_id(ioapic), pin); ++ else ++ pin_2_irq(idx, ioapic, pin, ++ ioapic ? 0 : IOAPIC_MAP_ALLOC); ++ } ++} ++ ++void ioapic_zap_locks(void) ++{ ++ raw_spin_lock_init(&ioapic_lock); ++} ++ ++static void io_apic_print_entries(unsigned int apic, unsigned int nr_entries) ++{ ++ int i; ++ char buf[256]; ++ struct IO_APIC_route_entry entry; ++ struct IR_IO_APIC_route_entry *ir_entry = (void *)&entry; ++ ++ printk(KERN_DEBUG "IOAPIC %d:\n", apic); ++ for (i = 0; i <= nr_entries; i++) { ++ entry = ioapic_read_entry(apic, i); ++ snprintf(buf, sizeof(buf), ++ " pin%02x, %s, %s, %s, V(%02X), IRR(%1d), S(%1d)", ++ i, ++ entry.mask == IOAPIC_MASKED ? "disabled" : "enabled ", ++ entry.trigger == IOAPIC_LEVEL ? "level" : "edge ", ++ entry.polarity == IOAPIC_POL_LOW ? "low " : "high", ++ entry.vector, entry.irr, entry.delivery_status); ++ if (ir_entry->format) ++ printk(KERN_DEBUG "%s, remapped, I(%04X), Z(%X)\n", ++ buf, (ir_entry->index2 << 15) | ir_entry->index, ++ ir_entry->zero); ++ else ++ printk(KERN_DEBUG "%s, %s, D(%02X), M(%1d)\n", ++ buf, ++ entry.dest_mode == IOAPIC_DEST_MODE_LOGICAL ? ++ "logical " : "physical", ++ entry.dest, entry.delivery_mode); ++ } ++} ++ ++static void __init print_IO_APIC(int ioapic_idx) ++{ ++ union IO_APIC_reg_00 reg_00; ++ union IO_APIC_reg_01 reg_01; ++ union IO_APIC_reg_02 reg_02; ++ union IO_APIC_reg_03 reg_03; ++ unsigned long flags; ++ ++ raw_spin_lock_irqsave(&ioapic_lock, flags); ++ reg_00.raw = io_apic_read(ioapic_idx, 0); ++ reg_01.raw = io_apic_read(ioapic_idx, 1); ++ if (reg_01.bits.version >= 0x10) ++ reg_02.raw = io_apic_read(ioapic_idx, 2); ++ if (reg_01.bits.version >= 0x20) ++ reg_03.raw = io_apic_read(ioapic_idx, 3); ++ raw_spin_unlock_irqrestore(&ioapic_lock, flags); ++ ++ printk(KERN_DEBUG "IO APIC #%d......\n", mpc_ioapic_id(ioapic_idx)); ++ printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); ++ printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); ++ printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); ++ printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.bits.LTS); ++ ++ printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)®_01); ++ printk(KERN_DEBUG "....... : max redirection entries: %02X\n", ++ reg_01.bits.entries); ++ ++ printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ); ++ printk(KERN_DEBUG "....... : IO APIC version: %02X\n", ++ reg_01.bits.version); ++ ++ /* ++ * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02, ++ * but the value of reg_02 is read as the previous read register ++ * value, so ignore it if reg_02 == reg_01. ++ */ ++ if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) { ++ printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw); ++ printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration); ++ } ++ ++ /* ++ * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02 ++ * or reg_03, but the value of reg_0[23] is read as the previous read ++ * register value, so ignore it if reg_03 == reg_0[12]. ++ */ ++ if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw && ++ reg_03.raw != reg_01.raw) { ++ printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw); ++ printk(KERN_DEBUG "....... : Boot DT : %X\n", reg_03.bits.boot_DT); ++ } ++ ++ printk(KERN_DEBUG ".... IRQ redirection table:\n"); ++ io_apic_print_entries(ioapic_idx, reg_01.bits.entries); ++} ++ ++void __init print_IO_APICs(void) ++{ ++ int ioapic_idx; ++ unsigned int irq; ++ ++ printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); ++ for_each_ioapic(ioapic_idx) ++ printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", ++ mpc_ioapic_id(ioapic_idx), ++ ioapics[ioapic_idx].nr_registers); ++ ++ /* ++ * We are a bit conservative about what we expect. We have to ++ * know about every hardware change ASAP. ++ */ ++ printk(KERN_INFO "testing the IO APIC.......................\n"); ++ ++ for_each_ioapic(ioapic_idx) ++ print_IO_APIC(ioapic_idx); ++ ++ printk(KERN_DEBUG "IRQ to pin mappings:\n"); ++ for_each_active_irq(irq) { ++ struct irq_pin_list *entry; ++ struct irq_chip *chip; ++ struct mp_chip_data *data; ++ ++ chip = irq_get_chip(irq); ++ if (chip != &ioapic_chip && chip != &ioapic_ir_chip) ++ continue; ++ data = irq_get_chip_data(irq); ++ if (!data) ++ continue; ++ if (list_empty(&data->irq_2_pin)) ++ continue; ++ ++ printk(KERN_DEBUG "IRQ%d ", irq); ++ for_each_irq_pin(entry, data->irq_2_pin) ++ pr_cont("-> %d:%d", entry->apic, entry->pin); ++ pr_cont("\n"); ++ } ++ ++ printk(KERN_INFO ".................................... done.\n"); ++} ++ ++/* Where if anywhere is the i8259 connect in external int mode */ ++static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; ++ ++void __init enable_IO_APIC(void) ++{ ++ int i8259_apic, i8259_pin; ++ int apic, pin; ++ ++ if (skip_ioapic_setup) ++ nr_ioapics = 0; ++ ++ if (!nr_legacy_irqs() || !nr_ioapics) ++ return; ++ ++ for_each_ioapic_pin(apic, pin) { ++ /* See if any of the pins is in ExtINT mode */ ++ struct IO_APIC_route_entry entry = ioapic_read_entry(apic, pin); ++ ++ /* If the interrupt line is enabled and in ExtInt mode ++ * I have found the pin where the i8259 is connected. ++ */ ++ if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) { ++ ioapic_i8259.apic = apic; ++ ioapic_i8259.pin = pin; ++ goto found_i8259; ++ } ++ } ++ found_i8259: ++ /* Look to see what if the MP table has reported the ExtINT */ ++ /* If we could not find the appropriate pin by looking at the ioapic ++ * the i8259 probably is not connected the ioapic but give the ++ * mptable a chance anyway. ++ */ ++ i8259_pin = find_isa_irq_pin(0, mp_ExtINT); ++ i8259_apic = find_isa_irq_apic(0, mp_ExtINT); ++ /* Trust the MP table if nothing is setup in the hardware */ ++ if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) { ++ printk(KERN_WARNING "ExtINT not setup in hardware but reported by MP table\n"); ++ ioapic_i8259.pin = i8259_pin; ++ ioapic_i8259.apic = i8259_apic; ++ } ++ /* Complain if the MP table and the hardware disagree */ ++ if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != i8259_pin)) && ++ (i8259_pin >= 0) && (ioapic_i8259.pin >= 0)) ++ { ++ printk(KERN_WARNING "ExtINT in hardware and MP table differ\n"); ++ } ++ ++ /* ++ * Do not trust the IO-APIC being empty at bootup ++ */ ++ clear_IO_APIC(); ++} ++ ++void native_restore_boot_irq_mode(void) ++{ ++ /* ++ * If the i8259 is routed through an IOAPIC ++ * Put that IOAPIC in virtual wire mode ++ * so legacy interrupts can be delivered. ++ */ ++ if (ioapic_i8259.pin != -1) { ++ struct IO_APIC_route_entry entry; ++ ++ memset(&entry, 0, sizeof(entry)); ++ entry.mask = IOAPIC_UNMASKED; ++ entry.trigger = IOAPIC_EDGE; ++ entry.polarity = IOAPIC_POL_HIGH; ++ entry.dest_mode = IOAPIC_DEST_MODE_PHYSICAL; ++ entry.delivery_mode = dest_ExtINT; ++ entry.dest = read_apic_id(); ++ ++ /* ++ * Add it to the IO-APIC irq-routing table: ++ */ ++ ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry); ++ } ++ ++ if (boot_cpu_has(X86_FEATURE_APIC) || apic_from_smp_config()) ++ disconnect_bsp_APIC(ioapic_i8259.pin != -1); ++} ++ ++void restore_boot_irq_mode(void) ++{ ++ if (!nr_legacy_irqs()) ++ return; ++ ++ x86_apic_ops.restore(); ++} ++ ++#ifdef CONFIG_X86_32 ++/* ++ * function to set the IO-APIC physical IDs based on the ++ * values stored in the MPC table. ++ * ++ * by Matt Domsch Tue Dec 21 12:25:05 CST 1999 ++ */ ++void __init setup_ioapic_ids_from_mpc_nocheck(void) ++{ ++ union IO_APIC_reg_00 reg_00; ++ physid_mask_t phys_id_present_map; ++ int ioapic_idx; ++ int i; ++ unsigned char old_id; ++ unsigned long flags; ++ ++ /* ++ * This is broken; anything with a real cpu count has to ++ * circumvent this idiocy regardless. ++ */ ++ apic->ioapic_phys_id_map(&phys_cpu_present_map, &phys_id_present_map); ++ ++ /* ++ * Set the IOAPIC ID to the value stored in the MPC table. ++ */ ++ for_each_ioapic(ioapic_idx) { ++ /* Read the register 0 value */ ++ raw_spin_lock_irqsave(&ioapic_lock, flags); ++ reg_00.raw = io_apic_read(ioapic_idx, 0); ++ raw_spin_unlock_irqrestore(&ioapic_lock, flags); ++ ++ old_id = mpc_ioapic_id(ioapic_idx); ++ ++ if (mpc_ioapic_id(ioapic_idx) >= get_physical_broadcast()) { ++ printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", ++ ioapic_idx, mpc_ioapic_id(ioapic_idx)); ++ printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", ++ reg_00.bits.ID); ++ ioapics[ioapic_idx].mp_config.apicid = reg_00.bits.ID; ++ } ++ ++ /* ++ * Sanity check, is the ID really free? Every APIC in a ++ * system must have a unique ID or we get lots of nice ++ * 'stuck on smp_invalidate_needed IPI wait' messages. ++ */ ++ if (apic->check_apicid_used(&phys_id_present_map, ++ mpc_ioapic_id(ioapic_idx))) { ++ printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", ++ ioapic_idx, mpc_ioapic_id(ioapic_idx)); ++ for (i = 0; i < get_physical_broadcast(); i++) ++ if (!physid_isset(i, phys_id_present_map)) ++ break; ++ if (i >= get_physical_broadcast()) ++ panic("Max APIC ID exceeded!\n"); ++ printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", ++ i); ++ physid_set(i, phys_id_present_map); ++ ioapics[ioapic_idx].mp_config.apicid = i; ++ } else { ++ physid_mask_t tmp; ++ apic->apicid_to_cpu_present(mpc_ioapic_id(ioapic_idx), ++ &tmp); ++ apic_printk(APIC_VERBOSE, "Setting %d in the " ++ "phys_id_present_map\n", ++ mpc_ioapic_id(ioapic_idx)); ++ physids_or(phys_id_present_map, phys_id_present_map, tmp); ++ } ++ ++ /* ++ * We need to adjust the IRQ routing table ++ * if the ID changed. ++ */ ++ if (old_id != mpc_ioapic_id(ioapic_idx)) ++ for (i = 0; i < mp_irq_entries; i++) ++ if (mp_irqs[i].dstapic == old_id) ++ mp_irqs[i].dstapic ++ = mpc_ioapic_id(ioapic_idx); ++ ++ /* ++ * Update the ID register according to the right value ++ * from the MPC table if they are different. ++ */ ++ if (mpc_ioapic_id(ioapic_idx) == reg_00.bits.ID) ++ continue; ++ ++ apic_printk(APIC_VERBOSE, KERN_INFO ++ "...changing IO-APIC physical APIC ID to %d ...", ++ mpc_ioapic_id(ioapic_idx)); ++ ++ reg_00.bits.ID = mpc_ioapic_id(ioapic_idx); ++ raw_spin_lock_irqsave(&ioapic_lock, flags); ++ io_apic_write(ioapic_idx, 0, reg_00.raw); ++ raw_spin_unlock_irqrestore(&ioapic_lock, flags); ++ ++ /* ++ * Sanity check ++ */ ++ raw_spin_lock_irqsave(&ioapic_lock, flags); ++ reg_00.raw = io_apic_read(ioapic_idx, 0); ++ raw_spin_unlock_irqrestore(&ioapic_lock, flags); ++ if (reg_00.bits.ID != mpc_ioapic_id(ioapic_idx)) ++ pr_cont("could not set ID!\n"); ++ else ++ apic_printk(APIC_VERBOSE, " ok.\n"); ++ } ++} ++ ++void __init setup_ioapic_ids_from_mpc(void) ++{ ++ ++ if (acpi_ioapic) ++ return; ++ /* ++ * Don't check I/O APIC IDs for xAPIC systems. They have ++ * no meaning without the serial APIC bus. ++ */ ++ if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) ++ || APIC_XAPIC(boot_cpu_apic_version)) ++ return; ++ setup_ioapic_ids_from_mpc_nocheck(); ++} ++#endif ++ ++int no_timer_check __initdata; ++ ++static int __init notimercheck(char *s) ++{ ++ no_timer_check = 1; ++ return 1; ++} ++__setup("no_timer_check", notimercheck); ++ ++static void __init delay_with_tsc(void) ++{ ++ unsigned long long start, now; ++ unsigned long end = jiffies + 4; ++ ++ start = rdtsc(); ++ ++ /* ++ * We don't know the TSC frequency yet, but waiting for ++ * 40000000000/HZ TSC cycles is safe: ++ * 4 GHz == 10 jiffies ++ * 1 GHz == 40 jiffies ++ */ ++ do { ++ rep_nop(); ++ now = rdtsc(); ++ } while ((now - start) < 40000000000ULL / HZ && ++ time_before_eq(jiffies, end)); ++} ++ ++static void __init delay_without_tsc(void) ++{ ++ unsigned long end = jiffies + 4; ++ int band = 1; ++ ++ /* ++ * We don't know any frequency yet, but waiting for ++ * 40940000000/HZ cycles is safe: ++ * 4 GHz == 10 jiffies ++ * 1 GHz == 40 jiffies ++ * 1 << 1 + 1 << 2 +...+ 1 << 11 = 4094 ++ */ ++ do { ++ __delay(((1U << band++) * 10000000UL) / HZ); ++ } while (band < 12 && time_before_eq(jiffies, end)); ++} ++ ++/* ++ * There is a nasty bug in some older SMP boards, their mptable lies ++ * about the timer IRQ. We do the following to work around the situation: ++ * ++ * - timer IRQ defaults to IO-APIC IRQ ++ * - if this function detects that timer IRQs are defunct, then we fall ++ * back to ISA timer IRQs ++ */ ++static int __init timer_irq_works(void) ++{ ++ unsigned long t1 = jiffies; ++ unsigned long flags; ++ ++ if (no_timer_check) ++ return 1; ++ ++ local_save_flags(flags); ++ local_irq_enable(); ++ ++ if (boot_cpu_has(X86_FEATURE_TSC)) ++ delay_with_tsc(); ++ else ++ delay_without_tsc(); ++ ++ local_irq_restore(flags); ++ ++ /* ++ * Expect a few ticks at least, to be sure some possible ++ * glue logic does not lock up after one or two first ++ * ticks in a non-ExtINT mode. Also the local APIC ++ * might have cached one ExtINT interrupt. Finally, at ++ * least one tick may be lost due to delays. ++ */ ++ ++ /* jiffies wrap? */ ++ if (time_after(jiffies, t1 + 4)) ++ return 1; ++ return 0; ++} ++ ++/* ++ * In the SMP+IOAPIC case it might happen that there are an unspecified ++ * number of pending IRQ events unhandled. These cases are very rare, ++ * so we 'resend' these IRQs via IPIs, to the same CPU. It's much ++ * better to do it this way as thus we do not have to be aware of ++ * 'pending' interrupts in the IRQ path, except at this point. ++ */ ++/* ++ * Edge triggered needs to resend any interrupt ++ * that was delayed but this is now handled in the device ++ * independent code. ++ */ ++ ++/* ++ * Starting up a edge-triggered IO-APIC interrupt is ++ * nasty - we need to make sure that we get the edge. ++ * If it is already asserted for some reason, we need ++ * return 1 to indicate that is was pending. ++ * ++ * This is not complete - we should be able to fake ++ * an edge even if it isn't on the 8259A... ++ */ ++static unsigned int startup_ioapic_irq(struct irq_data *data) ++{ ++ int was_pending = 0, irq = data->irq; ++ unsigned long flags; ++ ++ raw_spin_lock_irqsave(&ioapic_lock, flags); ++ if (irq < nr_legacy_irqs()) { ++ legacy_pic->mask(irq); ++ if (legacy_pic->irq_pending(irq)) ++ was_pending = 1; ++ } ++ __unmask_ioapic(data->chip_data); ++ raw_spin_unlock_irqrestore(&ioapic_lock, flags); ++ ++ return was_pending; ++} ++ ++atomic_t irq_mis_count; ++ ++#ifdef CONFIG_GENERIC_PENDING_IRQ ++static bool io_apic_level_ack_pending(struct mp_chip_data *data) ++{ ++ struct irq_pin_list *entry; ++ unsigned long flags; ++ ++ raw_spin_lock_irqsave(&ioapic_lock, flags); ++ for_each_irq_pin(entry, data->irq_2_pin) { ++ unsigned int reg; ++ int pin; ++ ++ pin = entry->pin; ++ reg = io_apic_read(entry->apic, 0x10 + pin*2); ++ /* Is the remote IRR bit set? */ ++ if (reg & IO_APIC_REDIR_REMOTE_IRR) { ++ raw_spin_unlock_irqrestore(&ioapic_lock, flags); ++ return true; ++ } ++ } ++ raw_spin_unlock_irqrestore(&ioapic_lock, flags); ++ ++ return false; ++} ++ ++static inline bool ioapic_irqd_mask(struct irq_data *data) ++{ ++ /* If we are moving the IRQ we need to mask it */ ++ if (unlikely(irqd_is_setaffinity_pending(data))) { ++ if (!irqd_irq_masked(data)) ++ mask_ioapic_irq(data); ++ return true; ++ } ++ return false; ++} ++ ++static inline void ioapic_irqd_unmask(struct irq_data *data, bool masked) ++{ ++ if (unlikely(masked)) { ++ /* Only migrate the irq if the ack has been received. ++ * ++ * On rare occasions the broadcast level triggered ack gets ++ * delayed going to ioapics, and if we reprogram the ++ * vector while Remote IRR is still set the irq will never ++ * fire again. ++ * ++ * To prevent this scenario we read the Remote IRR bit ++ * of the ioapic. This has two effects. ++ * - On any sane system the read of the ioapic will ++ * flush writes (and acks) going to the ioapic from ++ * this cpu. ++ * - We get to see if the ACK has actually been delivered. ++ * ++ * Based on failed experiments of reprogramming the ++ * ioapic entry from outside of irq context starting ++ * with masking the ioapic entry and then polling until ++ * Remote IRR was clear before reprogramming the ++ * ioapic I don't trust the Remote IRR bit to be ++ * completey accurate. ++ * ++ * However there appears to be no other way to plug ++ * this race, so if the Remote IRR bit is not ++ * accurate and is causing problems then it is a hardware bug ++ * and you can go talk to the chipset vendor about it. ++ */ ++ if (!io_apic_level_ack_pending(data->chip_data)) ++ irq_move_masked_irq(data); ++ /* If the IRQ is masked in the core, leave it: */ ++ if (!irqd_irq_masked(data)) ++ unmask_ioapic_irq(data); ++ } ++} ++#else ++static inline bool ioapic_irqd_mask(struct irq_data *data) ++{ ++ return false; ++} ++static inline void ioapic_irqd_unmask(struct irq_data *data, bool masked) ++{ ++} ++#endif ++ ++static void ioapic_ack_level(struct irq_data *irq_data) ++{ ++ struct irq_cfg *cfg = irqd_cfg(irq_data); ++ unsigned long v; ++ bool masked; ++ int i; ++ ++ irq_complete_move(cfg); ++ masked = ioapic_irqd_mask(irq_data); ++ ++ /* ++ * It appears there is an erratum which affects at least version 0x11 ++ * of I/O APIC (that's the 82093AA and cores integrated into various ++ * chipsets). Under certain conditions a level-triggered interrupt is ++ * erroneously delivered as edge-triggered one but the respective IRR ++ * bit gets set nevertheless. As a result the I/O unit expects an EOI ++ * message but it will never arrive and further interrupts are blocked ++ * from the source. The exact reason is so far unknown, but the ++ * phenomenon was observed when two consecutive interrupt requests ++ * from a given source get delivered to the same CPU and the source is ++ * temporarily disabled in between. ++ * ++ * A workaround is to simulate an EOI message manually. We achieve it ++ * by setting the trigger mode to edge and then to level when the edge ++ * trigger mode gets detected in the TMR of a local APIC for a ++ * level-triggered interrupt. We mask the source for the time of the ++ * operation to prevent an edge-triggered interrupt escaping meanwhile. ++ * The idea is from Manfred Spraul. --macro ++ * ++ * Also in the case when cpu goes offline, fixup_irqs() will forward ++ * any unhandled interrupt on the offlined cpu to the new cpu ++ * destination that is handling the corresponding interrupt. This ++ * interrupt forwarding is done via IPI's. Hence, in this case also ++ * level-triggered io-apic interrupt will be seen as an edge ++ * interrupt in the IRR. And we can't rely on the cpu's EOI ++ * to be broadcasted to the IO-APIC's which will clear the remoteIRR ++ * corresponding to the level-triggered interrupt. Hence on IO-APIC's ++ * supporting EOI register, we do an explicit EOI to clear the ++ * remote IRR and on IO-APIC's which don't have an EOI register, ++ * we use the above logic (mask+edge followed by unmask+level) from ++ * Manfred Spraul to clear the remote IRR. ++ */ ++ i = cfg->vector; ++ v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); ++ ++ /* ++ * We must acknowledge the irq before we move it or the acknowledge will ++ * not propagate properly. ++ */ ++ ack_APIC_irq(); ++ ++ /* ++ * Tail end of clearing remote IRR bit (either by delivering the EOI ++ * message via io-apic EOI register write or simulating it using ++ * mask+edge followed by unnask+level logic) manually when the ++ * level triggered interrupt is seen as the edge triggered interrupt ++ * at the cpu. ++ */ ++ if (!(v & (1 << (i & 0x1f)))) { ++ atomic_inc(&irq_mis_count); ++ eoi_ioapic_pin(cfg->vector, irq_data->chip_data); ++ } ++ ++ ioapic_irqd_unmask(irq_data, masked); ++} ++ ++static void ioapic_ir_ack_level(struct irq_data *irq_data) ++{ ++ struct mp_chip_data *data = irq_data->chip_data; ++ ++ /* ++ * Intr-remapping uses pin number as the virtual vector ++ * in the RTE. Actual vector is programmed in ++ * intr-remapping table entry. Hence for the io-apic ++ * EOI we use the pin number. ++ */ ++ apic_ack_irq(irq_data); ++ eoi_ioapic_pin(data->entry.vector, data); ++} ++ ++static void ioapic_configure_entry(struct irq_data *irqd) ++{ ++ struct mp_chip_data *mpd = irqd->chip_data; ++ struct irq_cfg *cfg = irqd_cfg(irqd); ++ struct irq_pin_list *entry; ++ ++ /* ++ * Only update when the parent is the vector domain, don't touch it ++ * if the parent is the remapping domain. Check the installed ++ * ioapic chip to verify that. ++ */ ++ if (irqd->chip == &ioapic_chip) { ++ mpd->entry.dest = cfg->dest_apicid; ++ mpd->entry.vector = cfg->vector; ++ } ++ for_each_irq_pin(entry, mpd->irq_2_pin) ++ __ioapic_write_entry(entry->apic, entry->pin, mpd->entry); ++} ++ ++static int ioapic_set_affinity(struct irq_data *irq_data, ++ const struct cpumask *mask, bool force) ++{ ++ struct irq_data *parent = irq_data->parent_data; ++ unsigned long flags; ++ int ret; ++ ++ ret = parent->chip->irq_set_affinity(parent, mask, force); ++ raw_spin_lock_irqsave(&ioapic_lock, flags); ++ if (ret >= 0 && ret != IRQ_SET_MASK_OK_DONE) ++ ioapic_configure_entry(irq_data); ++ raw_spin_unlock_irqrestore(&ioapic_lock, flags); ++ ++ return ret; ++} ++ ++/* ++ * Interrupt shutdown masks the ioapic pin, but the interrupt might already ++ * be in flight, but not yet serviced by the target CPU. That means ++ * __synchronize_hardirq() would return and claim that everything is calmed ++ * down. So free_irq() would proceed and deactivate the interrupt and free ++ * resources. ++ * ++ * Once the target CPU comes around to service it it will find a cleared ++ * vector and complain. While the spurious interrupt is harmless, the full ++ * release of resources might prevent the interrupt from being acknowledged ++ * which keeps the hardware in a weird state. ++ * ++ * Verify that the corresponding Remote-IRR bits are clear. ++ */ ++static int ioapic_irq_get_chip_state(struct irq_data *irqd, ++ enum irqchip_irq_state which, ++ bool *state) ++{ ++ struct mp_chip_data *mcd = irqd->chip_data; ++ struct IO_APIC_route_entry rentry; ++ struct irq_pin_list *p; ++ ++ if (which != IRQCHIP_STATE_ACTIVE) ++ return -EINVAL; ++ ++ *state = false; ++ raw_spin_lock(&ioapic_lock); ++ for_each_irq_pin(p, mcd->irq_2_pin) { ++ rentry = __ioapic_read_entry(p->apic, p->pin); ++ /* ++ * The remote IRR is only valid in level trigger mode. It's ++ * meaning is undefined for edge triggered interrupts and ++ * irrelevant because the IO-APIC treats them as fire and ++ * forget. ++ */ ++ if (rentry.irr && rentry.trigger) { ++ *state = true; ++ break; ++ } ++ } ++ raw_spin_unlock(&ioapic_lock); ++ return 0; ++} ++ ++static struct irq_chip ioapic_chip __read_mostly = { ++ .name = "IO-APIC", ++ .irq_startup = startup_ioapic_irq, ++ .irq_mask = mask_ioapic_irq, ++ .irq_unmask = unmask_ioapic_irq, ++ .irq_ack = irq_chip_ack_parent, ++ .irq_eoi = ioapic_ack_level, ++ .irq_set_affinity = ioapic_set_affinity, ++ .irq_retrigger = irq_chip_retrigger_hierarchy, ++ .irq_get_irqchip_state = ioapic_irq_get_chip_state, ++ .flags = IRQCHIP_SKIP_SET_WAKE, ++}; ++ ++static struct irq_chip ioapic_ir_chip __read_mostly = { ++ .name = "IR-IO-APIC", ++ .irq_startup = startup_ioapic_irq, ++ .irq_mask = mask_ioapic_irq, ++ .irq_unmask = unmask_ioapic_irq, ++ .irq_ack = irq_chip_ack_parent, ++ .irq_eoi = ioapic_ir_ack_level, ++ .irq_set_affinity = ioapic_set_affinity, ++ .irq_retrigger = irq_chip_retrigger_hierarchy, ++ .irq_get_irqchip_state = ioapic_irq_get_chip_state, ++ .flags = IRQCHIP_SKIP_SET_WAKE, ++}; ++ ++static inline void init_IO_APIC_traps(void) ++{ ++ struct irq_cfg *cfg; ++ unsigned int irq; ++ ++ for_each_active_irq(irq) { ++ cfg = irq_cfg(irq); ++ if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) { ++ /* ++ * Hmm.. We don't have an entry for this, ++ * so default to an old-fashioned 8259 ++ * interrupt if we can.. ++ */ ++ if (irq < nr_legacy_irqs()) ++ legacy_pic->make_irq(irq); ++ else ++ /* Strange. Oh, well.. */ ++ irq_set_chip(irq, &no_irq_chip); ++ } ++ } ++} ++ ++/* ++ * The local APIC irq-chip implementation: ++ */ ++ ++static void mask_lapic_irq(struct irq_data *data) ++{ ++ unsigned long v; ++ ++ v = apic_read(APIC_LVT0); ++ apic_write(APIC_LVT0, v | APIC_LVT_MASKED); ++} ++ ++static void unmask_lapic_irq(struct irq_data *data) ++{ ++ unsigned long v; ++ ++ v = apic_read(APIC_LVT0); ++ apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED); ++} ++ ++static void ack_lapic_irq(struct irq_data *data) ++{ ++ ack_APIC_irq(); ++} ++ ++static struct irq_chip lapic_chip __read_mostly = { ++ .name = "local-APIC", ++ .irq_mask = mask_lapic_irq, ++ .irq_unmask = unmask_lapic_irq, ++ .irq_ack = ack_lapic_irq, ++}; ++ ++static void lapic_register_intr(int irq) ++{ ++ irq_clear_status_flags(irq, IRQ_LEVEL); ++ irq_set_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq, ++ "edge"); ++} ++ ++/* ++ * This looks a bit hackish but it's about the only one way of sending ++ * a few INTA cycles to 8259As and any associated glue logic. ICR does ++ * not support the ExtINT mode, unfortunately. We need to send these ++ * cycles as some i82489DX-based boards have glue logic that keeps the ++ * 8259A interrupt line asserted until INTA. --macro ++ */ ++static inline void __init unlock_ExtINT_logic(void) ++{ ++ int apic, pin, i; ++ struct IO_APIC_route_entry entry0, entry1; ++ unsigned char save_control, save_freq_select; ++ ++ pin = find_isa_irq_pin(8, mp_INT); ++ if (pin == -1) { ++ WARN_ON_ONCE(1); ++ return; ++ } ++ apic = find_isa_irq_apic(8, mp_INT); ++ if (apic == -1) { ++ WARN_ON_ONCE(1); ++ return; ++ } ++ ++ entry0 = ioapic_read_entry(apic, pin); ++ clear_IO_APIC_pin(apic, pin); ++ ++ memset(&entry1, 0, sizeof(entry1)); ++ ++ entry1.dest_mode = IOAPIC_DEST_MODE_PHYSICAL; ++ entry1.mask = IOAPIC_UNMASKED; ++ entry1.dest = hard_smp_processor_id(); ++ entry1.delivery_mode = dest_ExtINT; ++ entry1.polarity = entry0.polarity; ++ entry1.trigger = IOAPIC_EDGE; ++ entry1.vector = 0; ++ ++ ioapic_write_entry(apic, pin, entry1); ++ ++ save_control = CMOS_READ(RTC_CONTROL); ++ save_freq_select = CMOS_READ(RTC_FREQ_SELECT); ++ CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6, ++ RTC_FREQ_SELECT); ++ CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL); ++ ++ i = 100; ++ while (i-- > 0) { ++ mdelay(10); ++ if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF) ++ i -= 10; ++ } ++ ++ CMOS_WRITE(save_control, RTC_CONTROL); ++ CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); ++ clear_IO_APIC_pin(apic, pin); ++ ++ ioapic_write_entry(apic, pin, entry0); ++} ++ ++static int disable_timer_pin_1 __initdata; ++/* Actually the next is obsolete, but keep it for paranoid reasons -AK */ ++static int __init disable_timer_pin_setup(char *arg) ++{ ++ disable_timer_pin_1 = 1; ++ return 0; ++} ++early_param("disable_timer_pin_1", disable_timer_pin_setup); ++ ++static int mp_alloc_timer_irq(int ioapic, int pin) ++{ ++ int irq = -1; ++ struct irq_domain *domain = mp_ioapic_irqdomain(ioapic); ++ ++ if (domain) { ++ struct irq_alloc_info info; ++ ++ ioapic_set_alloc_attr(&info, NUMA_NO_NODE, 0, 0); ++ info.ioapic_id = mpc_ioapic_id(ioapic); ++ info.ioapic_pin = pin; ++ mutex_lock(&ioapic_mutex); ++ irq = alloc_isa_irq_from_domain(domain, 0, ioapic, pin, &info); ++ mutex_unlock(&ioapic_mutex); ++ } ++ ++ return irq; ++} ++ ++/* ++ * This code may look a bit paranoid, but it's supposed to cooperate with ++ * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ ++ * is so screwy. Thanks to Brian Perkins for testing/hacking this beast ++ * fanatically on his truly buggy board. ++ * ++ * FIXME: really need to revamp this for all platforms. ++ */ ++static inline void __init check_timer(void) ++{ ++ struct irq_data *irq_data = irq_get_irq_data(0); ++ struct mp_chip_data *data = irq_data->chip_data; ++ struct irq_cfg *cfg = irqd_cfg(irq_data); ++ int node = cpu_to_node(0); ++ int apic1, pin1, apic2, pin2; ++ unsigned long flags; ++ int no_pin1 = 0; ++ ++ local_irq_save(flags); ++ ++ /* ++ * get/set the timer IRQ vector: ++ */ ++ legacy_pic->mask(0); ++ ++ /* ++ * As IRQ0 is to be enabled in the 8259A, the virtual ++ * wire has to be disabled in the local APIC. Also ++ * timer interrupts need to be acknowledged manually in ++ * the 8259A for the i82489DX when using the NMI ++ * watchdog as that APIC treats NMIs as level-triggered. ++ * The AEOI mode will finish them in the 8259A ++ * automatically. ++ */ ++ apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); ++ legacy_pic->init(1); ++ ++ pin1 = find_isa_irq_pin(0, mp_INT); ++ apic1 = find_isa_irq_apic(0, mp_INT); ++ pin2 = ioapic_i8259.pin; ++ apic2 = ioapic_i8259.apic; ++ ++ apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X " ++ "apic1=%d pin1=%d apic2=%d pin2=%d\n", ++ cfg->vector, apic1, pin1, apic2, pin2); ++ ++ /* ++ * Some BIOS writers are clueless and report the ExtINTA ++ * I/O APIC input from the cascaded 8259A as the timer ++ * interrupt input. So just in case, if only one pin ++ * was found above, try it both directly and through the ++ * 8259A. ++ */ ++ if (pin1 == -1) { ++ panic_if_irq_remap("BIOS bug: timer not connected to IO-APIC"); ++ pin1 = pin2; ++ apic1 = apic2; ++ no_pin1 = 1; ++ } else if (pin2 == -1) { ++ pin2 = pin1; ++ apic2 = apic1; ++ } ++ ++ if (pin1 != -1) { ++ /* Ok, does IRQ0 through the IOAPIC work? */ ++ if (no_pin1) { ++ mp_alloc_timer_irq(apic1, pin1); ++ } else { ++ /* ++ * for edge trigger, it's already unmasked, ++ * so only need to unmask if it is level-trigger ++ * do we really have level trigger timer? ++ */ ++ int idx; ++ idx = find_irq_entry(apic1, pin1, mp_INT); ++ if (idx != -1 && irq_trigger(idx)) ++ unmask_ioapic_irq(irq_get_irq_data(0)); ++ } ++ irq_domain_deactivate_irq(irq_data); ++ irq_domain_activate_irq(irq_data, false); ++ if (timer_irq_works()) { ++ if (disable_timer_pin_1 > 0) ++ clear_IO_APIC_pin(0, pin1); ++ goto out; ++ } ++ panic_if_irq_remap("timer doesn't work through Interrupt-remapped IO-APIC"); ++ local_irq_disable(); ++ clear_IO_APIC_pin(apic1, pin1); ++ if (!no_pin1) ++ apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: " ++ "8254 timer not connected to IO-APIC\n"); ++ ++ apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer " ++ "(IRQ0) through the 8259A ...\n"); ++ apic_printk(APIC_QUIET, KERN_INFO ++ "..... (found apic %d pin %d) ...\n", apic2, pin2); ++ /* ++ * legacy devices should be connected to IO APIC #0 ++ */ ++ replace_pin_at_irq_node(data, node, apic1, pin1, apic2, pin2); ++ irq_domain_deactivate_irq(irq_data); ++ irq_domain_activate_irq(irq_data, false); ++ legacy_pic->unmask(0); ++ if (timer_irq_works()) { ++ apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); ++ goto out; ++ } ++ /* ++ * Cleanup, just in case ... ++ */ ++ local_irq_disable(); ++ legacy_pic->mask(0); ++ clear_IO_APIC_pin(apic2, pin2); ++ apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); ++ } ++ ++ apic_printk(APIC_QUIET, KERN_INFO ++ "...trying to set up timer as Virtual Wire IRQ...\n"); ++ ++ lapic_register_intr(0); ++ apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ ++ legacy_pic->unmask(0); ++ ++ if (timer_irq_works()) { ++ apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); ++ goto out; ++ } ++ local_irq_disable(); ++ legacy_pic->mask(0); ++ apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector); ++ apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n"); ++ ++ apic_printk(APIC_QUIET, KERN_INFO ++ "...trying to set up timer as ExtINT IRQ...\n"); ++ ++ legacy_pic->init(0); ++ legacy_pic->make_irq(0); ++ apic_write(APIC_LVT0, APIC_DM_EXTINT); ++ ++ unlock_ExtINT_logic(); ++ ++ if (timer_irq_works()) { ++ apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); ++ goto out; ++ } ++ local_irq_disable(); ++ apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n"); ++ if (apic_is_x2apic_enabled()) ++ apic_printk(APIC_QUIET, KERN_INFO ++ "Perhaps problem with the pre-enabled x2apic mode\n" ++ "Try booting with x2apic and interrupt-remapping disabled in the bios.\n"); ++ panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a " ++ "report. Then try booting with the 'noapic' option.\n"); ++out: ++ local_irq_restore(flags); ++} ++ ++/* ++ * Traditionally ISA IRQ2 is the cascade IRQ, and is not available ++ * to devices. However there may be an I/O APIC pin available for ++ * this interrupt regardless. The pin may be left unconnected, but ++ * typically it will be reused as an ExtINT cascade interrupt for ++ * the master 8259A. In the MPS case such a pin will normally be ++ * reported as an ExtINT interrupt in the MP table. With ACPI ++ * there is no provision for ExtINT interrupts, and in the absence ++ * of an override it would be treated as an ordinary ISA I/O APIC ++ * interrupt, that is edge-triggered and unmasked by default. We ++ * used to do this, but it caused problems on some systems because ++ * of the NMI watchdog and sometimes IRQ0 of the 8254 timer using ++ * the same ExtINT cascade interrupt to drive the local APIC of the ++ * bootstrap processor. Therefore we refrain from routing IRQ2 to ++ * the I/O APIC in all cases now. No actual device should request ++ * it anyway. --macro ++ */ ++#define PIC_IRQS (1UL << PIC_CASCADE_IR) ++ ++static int mp_irqdomain_create(int ioapic) ++{ ++ struct irq_alloc_info info; ++ struct irq_domain *parent; ++ int hwirqs = mp_ioapic_pin_count(ioapic); ++ struct ioapic *ip = &ioapics[ioapic]; ++ struct ioapic_domain_cfg *cfg = &ip->irqdomain_cfg; ++ struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(ioapic); ++ struct fwnode_handle *fn; ++ char *name = "IO-APIC"; ++ ++ if (cfg->type == IOAPIC_DOMAIN_INVALID) ++ return 0; ++ ++ init_irq_alloc_info(&info, NULL); ++ info.type = X86_IRQ_ALLOC_TYPE_IOAPIC; ++ info.ioapic_id = mpc_ioapic_id(ioapic); ++ parent = irq_remapping_get_ir_irq_domain(&info); ++ if (!parent) ++ parent = x86_vector_domain; ++ else ++ name = "IO-APIC-IR"; ++ ++ /* Handle device tree enumerated APICs proper */ ++ if (cfg->dev) { ++ fn = of_node_to_fwnode(cfg->dev); ++ } else { ++ fn = irq_domain_alloc_named_id_fwnode(name, ioapic); ++ if (!fn) ++ return -ENOMEM; ++ } ++ ++ ip->irqdomain = irq_domain_create_linear(fn, hwirqs, cfg->ops, ++ (void *)(long)ioapic); ++ ++ /* Release fw handle if it was allocated above */ ++ if (!cfg->dev) ++ irq_domain_free_fwnode(fn); ++ ++ if (!ip->irqdomain) ++ return -ENOMEM; ++ ++ ip->irqdomain->parent = parent; ++ ++ if (cfg->type == IOAPIC_DOMAIN_LEGACY || ++ cfg->type == IOAPIC_DOMAIN_STRICT) ++ ioapic_dynirq_base = max(ioapic_dynirq_base, ++ gsi_cfg->gsi_end + 1); ++ ++ return 0; ++} ++ ++static void ioapic_destroy_irqdomain(int idx) ++{ ++ if (ioapics[idx].irqdomain) { ++ irq_domain_remove(ioapics[idx].irqdomain); ++ ioapics[idx].irqdomain = NULL; ++ } ++} ++ ++void __init setup_IO_APIC(void) ++{ ++ int ioapic; ++ ++ if (skip_ioapic_setup || !nr_ioapics) ++ return; ++ ++ io_apic_irqs = nr_legacy_irqs() ? ~PIC_IRQS : ~0UL; ++ ++ apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n"); ++ for_each_ioapic(ioapic) ++ BUG_ON(mp_irqdomain_create(ioapic)); ++ ++ /* ++ * Set up IO-APIC IRQ routing. ++ */ ++ x86_init.mpparse.setup_ioapic_ids(); ++ ++ sync_Arb_IDs(); ++ setup_IO_APIC_irqs(); ++ init_IO_APIC_traps(); ++ if (nr_legacy_irqs()) ++ check_timer(); ++ ++ ioapic_initialized = 1; ++} ++ ++static void resume_ioapic_id(int ioapic_idx) ++{ ++ unsigned long flags; ++ union IO_APIC_reg_00 reg_00; ++ ++ raw_spin_lock_irqsave(&ioapic_lock, flags); ++ reg_00.raw = io_apic_read(ioapic_idx, 0); ++ if (reg_00.bits.ID != mpc_ioapic_id(ioapic_idx)) { ++ reg_00.bits.ID = mpc_ioapic_id(ioapic_idx); ++ io_apic_write(ioapic_idx, 0, reg_00.raw); ++ } ++ raw_spin_unlock_irqrestore(&ioapic_lock, flags); ++} ++ ++static void ioapic_resume(void) ++{ ++ int ioapic_idx; ++ ++ for_each_ioapic_reverse(ioapic_idx) ++ resume_ioapic_id(ioapic_idx); ++ ++ restore_ioapic_entries(); ++} ++ ++static struct syscore_ops ioapic_syscore_ops = { ++ .suspend = save_ioapic_entries, ++ .resume = ioapic_resume, ++}; ++ ++static int __init ioapic_init_ops(void) ++{ ++ register_syscore_ops(&ioapic_syscore_ops); ++ ++ return 0; ++} ++ ++device_initcall(ioapic_init_ops); ++ ++static int io_apic_get_redir_entries(int ioapic) ++{ ++ union IO_APIC_reg_01 reg_01; ++ unsigned long flags; ++ ++ raw_spin_lock_irqsave(&ioapic_lock, flags); ++ reg_01.raw = io_apic_read(ioapic, 1); ++ raw_spin_unlock_irqrestore(&ioapic_lock, flags); ++ ++ /* The register returns the maximum index redir index ++ * supported, which is one less than the total number of redir ++ * entries. ++ */ ++ return reg_01.bits.entries + 1; ++} ++ ++unsigned int arch_dynirq_lower_bound(unsigned int from) ++{ ++ /* ++ * dmar_alloc_hwirq() may be called before setup_IO_APIC(), so use ++ * gsi_top if ioapic_dynirq_base hasn't been initialized yet. ++ */ ++ if (!ioapic_initialized) ++ return gsi_top; ++ /* ++ * For DT enabled machines ioapic_dynirq_base is irrelevant and not ++ * updated. So simply return @from if ioapic_dynirq_base == 0. ++ */ ++ return ioapic_dynirq_base ? : from; ++} ++ ++#ifdef CONFIG_X86_32 ++static int io_apic_get_unique_id(int ioapic, int apic_id) ++{ ++ union IO_APIC_reg_00 reg_00; ++ static physid_mask_t apic_id_map = PHYSID_MASK_NONE; ++ physid_mask_t tmp; ++ unsigned long flags; ++ int i = 0; ++ ++ /* ++ * The P4 platform supports up to 256 APIC IDs on two separate APIC ++ * buses (one for LAPICs, one for IOAPICs), where predecessors only ++ * supports up to 16 on one shared APIC bus. ++ * ++ * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full ++ * advantage of new APIC bus architecture. ++ */ ++ ++ if (physids_empty(apic_id_map)) ++ apic->ioapic_phys_id_map(&phys_cpu_present_map, &apic_id_map); ++ ++ raw_spin_lock_irqsave(&ioapic_lock, flags); ++ reg_00.raw = io_apic_read(ioapic, 0); ++ raw_spin_unlock_irqrestore(&ioapic_lock, flags); ++ ++ if (apic_id >= get_physical_broadcast()) { ++ printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying " ++ "%d\n", ioapic, apic_id, reg_00.bits.ID); ++ apic_id = reg_00.bits.ID; ++ } ++ ++ /* ++ * Every APIC in a system must have a unique ID or we get lots of nice ++ * 'stuck on smp_invalidate_needed IPI wait' messages. ++ */ ++ if (apic->check_apicid_used(&apic_id_map, apic_id)) { ++ ++ for (i = 0; i < get_physical_broadcast(); i++) { ++ if (!apic->check_apicid_used(&apic_id_map, i)) ++ break; ++ } ++ ++ if (i == get_physical_broadcast()) ++ panic("Max apic_id exceeded!\n"); ++ ++ printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, " ++ "trying %d\n", ioapic, apic_id, i); ++ ++ apic_id = i; ++ } ++ ++ apic->apicid_to_cpu_present(apic_id, &tmp); ++ physids_or(apic_id_map, apic_id_map, tmp); ++ ++ if (reg_00.bits.ID != apic_id) { ++ reg_00.bits.ID = apic_id; ++ ++ raw_spin_lock_irqsave(&ioapic_lock, flags); ++ io_apic_write(ioapic, 0, reg_00.raw); ++ reg_00.raw = io_apic_read(ioapic, 0); ++ raw_spin_unlock_irqrestore(&ioapic_lock, flags); ++ ++ /* Sanity check */ ++ if (reg_00.bits.ID != apic_id) { ++ pr_err("IOAPIC[%d]: Unable to change apic_id!\n", ++ ioapic); ++ return -1; ++ } ++ } ++ ++ apic_printk(APIC_VERBOSE, KERN_INFO ++ "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id); ++ ++ return apic_id; ++} ++ ++static u8 io_apic_unique_id(int idx, u8 id) ++{ ++ if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && ++ !APIC_XAPIC(boot_cpu_apic_version)) ++ return io_apic_get_unique_id(idx, id); ++ else ++ return id; ++} ++#else ++static u8 io_apic_unique_id(int idx, u8 id) ++{ ++ union IO_APIC_reg_00 reg_00; ++ DECLARE_BITMAP(used, 256); ++ unsigned long flags; ++ u8 new_id; ++ int i; ++ ++ bitmap_zero(used, 256); ++ for_each_ioapic(i) ++ __set_bit(mpc_ioapic_id(i), used); ++ ++ /* Hand out the requested id if available */ ++ if (!test_bit(id, used)) ++ return id; ++ ++ /* ++ * Read the current id from the ioapic and keep it if ++ * available. ++ */ ++ raw_spin_lock_irqsave(&ioapic_lock, flags); ++ reg_00.raw = io_apic_read(idx, 0); ++ raw_spin_unlock_irqrestore(&ioapic_lock, flags); ++ new_id = reg_00.bits.ID; ++ if (!test_bit(new_id, used)) { ++ apic_printk(APIC_VERBOSE, KERN_INFO ++ "IOAPIC[%d]: Using reg apic_id %d instead of %d\n", ++ idx, new_id, id); ++ return new_id; ++ } ++ ++ /* ++ * Get the next free id and write it to the ioapic. ++ */ ++ new_id = find_first_zero_bit(used, 256); ++ reg_00.bits.ID = new_id; ++ raw_spin_lock_irqsave(&ioapic_lock, flags); ++ io_apic_write(idx, 0, reg_00.raw); ++ reg_00.raw = io_apic_read(idx, 0); ++ raw_spin_unlock_irqrestore(&ioapic_lock, flags); ++ /* Sanity check */ ++ BUG_ON(reg_00.bits.ID != new_id); ++ ++ return new_id; ++} ++#endif ++ ++static int io_apic_get_version(int ioapic) ++{ ++ union IO_APIC_reg_01 reg_01; ++ unsigned long flags; ++ ++ raw_spin_lock_irqsave(&ioapic_lock, flags); ++ reg_01.raw = io_apic_read(ioapic, 1); ++ raw_spin_unlock_irqrestore(&ioapic_lock, flags); ++ ++ return reg_01.bits.version; ++} ++ ++int acpi_get_override_irq(u32 gsi, int *trigger, int *polarity) ++{ ++ int ioapic, pin, idx; ++ ++ if (skip_ioapic_setup) ++ return -1; ++ ++ ioapic = mp_find_ioapic(gsi); ++ if (ioapic < 0) ++ return -1; ++ ++ pin = mp_find_ioapic_pin(ioapic, gsi); ++ if (pin < 0) ++ return -1; ++ ++ idx = find_irq_entry(ioapic, pin, mp_INT); ++ if (idx < 0) ++ return -1; ++ ++ *trigger = irq_trigger(idx); ++ *polarity = irq_polarity(idx); ++ return 0; ++} ++ ++/* ++ * This function updates target affinity of IOAPIC interrupts to include ++ * the CPUs which came online during SMP bringup. ++ */ ++#define IOAPIC_RESOURCE_NAME_SIZE 11 ++ ++static struct resource *ioapic_resources; ++ ++static struct resource * __init ioapic_setup_resources(void) ++{ ++ unsigned long n; ++ struct resource *res; ++ char *mem; ++ int i; ++ ++ if (nr_ioapics == 0) ++ return NULL; ++ ++ n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource); ++ n *= nr_ioapics; ++ ++ mem = alloc_bootmem(n); ++ res = (void *)mem; ++ ++ mem += sizeof(struct resource) * nr_ioapics; ++ ++ for_each_ioapic(i) { ++ res[i].name = mem; ++ res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY; ++ snprintf(mem, IOAPIC_RESOURCE_NAME_SIZE, "IOAPIC %u", i); ++ mem += IOAPIC_RESOURCE_NAME_SIZE; ++ ioapics[i].iomem_res = &res[i]; ++ } ++ ++ ioapic_resources = res; ++ ++ return res; ++} ++ ++void __init io_apic_init_mappings(void) ++{ ++ unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; ++ struct resource *ioapic_res; ++ int i; ++ ++ ioapic_res = ioapic_setup_resources(); ++ for_each_ioapic(i) { ++ if (smp_found_config) { ++ ioapic_phys = mpc_ioapic_addr(i); ++#ifdef CONFIG_X86_32 ++ if (!ioapic_phys) { ++ printk(KERN_ERR ++ "WARNING: bogus zero IO-APIC " ++ "address found in MPTABLE, " ++ "disabling IO/APIC support!\n"); ++ smp_found_config = 0; ++ skip_ioapic_setup = 1; ++ goto fake_ioapic_page; ++ } ++#endif ++ } else { ++#ifdef CONFIG_X86_32 ++fake_ioapic_page: ++#endif ++ ioapic_phys = (unsigned long)alloc_bootmem_pages(PAGE_SIZE); ++ ioapic_phys = __pa(ioapic_phys); ++ } ++ set_fixmap_nocache(idx, ioapic_phys); ++ apic_printk(APIC_VERBOSE, "mapped IOAPIC to %08lx (%08lx)\n", ++ __fix_to_virt(idx) + (ioapic_phys & ~PAGE_MASK), ++ ioapic_phys); ++ idx++; ++ ++ ioapic_res->start = ioapic_phys; ++ ioapic_res->end = ioapic_phys + IO_APIC_SLOT_SIZE - 1; ++ ioapic_res++; ++ } ++} ++ ++void __init ioapic_insert_resources(void) ++{ ++ int i; ++ struct resource *r = ioapic_resources; ++ ++ if (!r) { ++ if (nr_ioapics > 0) ++ printk(KERN_ERR ++ "IO APIC resources couldn't be allocated.\n"); ++ return; ++ } ++ ++ for_each_ioapic(i) { ++ insert_resource(&iomem_resource, r); ++ r++; ++ } ++} ++ ++int mp_find_ioapic(u32 gsi) ++{ ++ int i; ++ ++ if (nr_ioapics == 0) ++ return -1; ++ ++ /* Find the IOAPIC that manages this GSI. */ ++ for_each_ioapic(i) { ++ struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(i); ++ if (gsi >= gsi_cfg->gsi_base && gsi <= gsi_cfg->gsi_end) ++ return i; ++ } ++ ++ printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi); ++ return -1; ++} ++ ++int mp_find_ioapic_pin(int ioapic, u32 gsi) ++{ ++ struct mp_ioapic_gsi *gsi_cfg; ++ ++ if (WARN_ON(ioapic < 0)) ++ return -1; ++ ++ gsi_cfg = mp_ioapic_gsi_routing(ioapic); ++ if (WARN_ON(gsi > gsi_cfg->gsi_end)) ++ return -1; ++ ++ return gsi - gsi_cfg->gsi_base; ++} ++ ++static int bad_ioapic_register(int idx) ++{ ++ union IO_APIC_reg_00 reg_00; ++ union IO_APIC_reg_01 reg_01; ++ union IO_APIC_reg_02 reg_02; ++ ++ reg_00.raw = io_apic_read(idx, 0); ++ reg_01.raw = io_apic_read(idx, 1); ++ reg_02.raw = io_apic_read(idx, 2); ++ ++ if (reg_00.raw == -1 && reg_01.raw == -1 && reg_02.raw == -1) { ++ pr_warn("I/O APIC 0x%x registers return all ones, skipping!\n", ++ mpc_ioapic_addr(idx)); ++ return 1; ++ } ++ ++ return 0; ++} ++ ++static int find_free_ioapic_entry(void) ++{ ++ int idx; ++ ++ for (idx = 0; idx < MAX_IO_APICS; idx++) ++ if (ioapics[idx].nr_registers == 0) ++ return idx; ++ ++ return MAX_IO_APICS; ++} ++ ++/** ++ * mp_register_ioapic - Register an IOAPIC device ++ * @id: hardware IOAPIC ID ++ * @address: physical address of IOAPIC register area ++ * @gsi_base: base of GSI associated with the IOAPIC ++ * @cfg: configuration information for the IOAPIC ++ */ ++int mp_register_ioapic(int id, u32 address, u32 gsi_base, ++ struct ioapic_domain_cfg *cfg) ++{ ++ bool hotplug = !!ioapic_initialized; ++ struct mp_ioapic_gsi *gsi_cfg; ++ int idx, ioapic, entries; ++ u32 gsi_end; ++ ++ if (!address) { ++ pr_warn("Bogus (zero) I/O APIC address found, skipping!\n"); ++ return -EINVAL; ++ } ++ for_each_ioapic(ioapic) ++ if (ioapics[ioapic].mp_config.apicaddr == address) { ++ pr_warn("address 0x%x conflicts with IOAPIC%d\n", ++ address, ioapic); ++ return -EEXIST; ++ } ++ ++ idx = find_free_ioapic_entry(); ++ if (idx >= MAX_IO_APICS) { ++ pr_warn("Max # of I/O APICs (%d) exceeded (found %d), skipping\n", ++ MAX_IO_APICS, idx); ++ return -ENOSPC; ++ } ++ ++ ioapics[idx].mp_config.type = MP_IOAPIC; ++ ioapics[idx].mp_config.flags = MPC_APIC_USABLE; ++ ioapics[idx].mp_config.apicaddr = address; ++ ++ set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); ++ if (bad_ioapic_register(idx)) { ++ clear_fixmap(FIX_IO_APIC_BASE_0 + idx); ++ return -ENODEV; ++ } ++ ++ ioapics[idx].mp_config.apicid = io_apic_unique_id(idx, id); ++ ioapics[idx].mp_config.apicver = io_apic_get_version(idx); ++ ++ /* ++ * Build basic GSI lookup table to facilitate gsi->io_apic lookups ++ * and to prevent reprogramming of IOAPIC pins (PCI GSIs). ++ */ ++ entries = io_apic_get_redir_entries(idx); ++ gsi_end = gsi_base + entries - 1; ++ for_each_ioapic(ioapic) { ++ gsi_cfg = mp_ioapic_gsi_routing(ioapic); ++ if ((gsi_base >= gsi_cfg->gsi_base && ++ gsi_base <= gsi_cfg->gsi_end) || ++ (gsi_end >= gsi_cfg->gsi_base && ++ gsi_end <= gsi_cfg->gsi_end)) { ++ pr_warn("GSI range [%u-%u] for new IOAPIC conflicts with GSI[%u-%u]\n", ++ gsi_base, gsi_end, ++ gsi_cfg->gsi_base, gsi_cfg->gsi_end); ++ clear_fixmap(FIX_IO_APIC_BASE_0 + idx); ++ return -ENOSPC; ++ } ++ } ++ gsi_cfg = mp_ioapic_gsi_routing(idx); ++ gsi_cfg->gsi_base = gsi_base; ++ gsi_cfg->gsi_end = gsi_end; ++ ++ ioapics[idx].irqdomain = NULL; ++ ioapics[idx].irqdomain_cfg = *cfg; ++ ++ /* ++ * If mp_register_ioapic() is called during early boot stage when ++ * walking ACPI/SFI/DT tables, it's too early to create irqdomain, ++ * we are still using bootmem allocator. So delay it to setup_IO_APIC(). ++ */ ++ if (hotplug) { ++ if (mp_irqdomain_create(idx)) { ++ clear_fixmap(FIX_IO_APIC_BASE_0 + idx); ++ return -ENOMEM; ++ } ++ alloc_ioapic_saved_registers(idx); ++ } ++ ++ if (gsi_cfg->gsi_end >= gsi_top) ++ gsi_top = gsi_cfg->gsi_end + 1; ++ if (nr_ioapics <= idx) ++ nr_ioapics = idx + 1; ++ ++ /* Set nr_registers to mark entry present */ ++ ioapics[idx].nr_registers = entries; ++ ++ pr_info("IOAPIC[%d]: apic_id %d, version %d, address 0x%x, GSI %d-%d\n", ++ idx, mpc_ioapic_id(idx), ++ mpc_ioapic_ver(idx), mpc_ioapic_addr(idx), ++ gsi_cfg->gsi_base, gsi_cfg->gsi_end); ++ ++ return 0; ++} ++ ++int mp_unregister_ioapic(u32 gsi_base) ++{ ++ int ioapic, pin; ++ int found = 0; ++ ++ for_each_ioapic(ioapic) ++ if (ioapics[ioapic].gsi_config.gsi_base == gsi_base) { ++ found = 1; ++ break; ++ } ++ if (!found) { ++ pr_warn("can't find IOAPIC for GSI %d\n", gsi_base); ++ return -ENODEV; ++ } ++ ++ for_each_pin(ioapic, pin) { ++ u32 gsi = mp_pin_to_gsi(ioapic, pin); ++ int irq = mp_map_gsi_to_irq(gsi, 0, NULL); ++ struct mp_chip_data *data; ++ ++ if (irq >= 0) { ++ data = irq_get_chip_data(irq); ++ if (data && data->count) { ++ pr_warn("pin%d on IOAPIC%d is still in use.\n", ++ pin, ioapic); ++ return -EBUSY; ++ } ++ } ++ } ++ ++ /* Mark entry not present */ ++ ioapics[ioapic].nr_registers = 0; ++ ioapic_destroy_irqdomain(ioapic); ++ free_ioapic_saved_registers(ioapic); ++ if (ioapics[ioapic].iomem_res) ++ release_resource(ioapics[ioapic].iomem_res); ++ clear_fixmap(FIX_IO_APIC_BASE_0 + ioapic); ++ memset(&ioapics[ioapic], 0, sizeof(ioapics[ioapic])); ++ ++ return 0; ++} ++ ++int mp_ioapic_registered(u32 gsi_base) ++{ ++ int ioapic; ++ ++ for_each_ioapic(ioapic) ++ if (ioapics[ioapic].gsi_config.gsi_base == gsi_base) ++ return 1; ++ ++ return 0; ++} ++ ++static void mp_irqdomain_get_attr(u32 gsi, struct mp_chip_data *data, ++ struct irq_alloc_info *info) ++{ ++ if (info && info->ioapic_valid) { ++ data->trigger = info->ioapic_trigger; ++ data->polarity = info->ioapic_polarity; ++ } else if (acpi_get_override_irq(gsi, &data->trigger, ++ &data->polarity) < 0) { ++ /* PCI interrupts are always active low level triggered. */ ++ data->trigger = IOAPIC_LEVEL; ++ data->polarity = IOAPIC_POL_LOW; ++ } ++} ++ ++static void mp_setup_entry(struct irq_cfg *cfg, struct mp_chip_data *data, ++ struct IO_APIC_route_entry *entry) ++{ ++ memset(entry, 0, sizeof(*entry)); ++ entry->delivery_mode = apic->irq_delivery_mode; ++ entry->dest_mode = apic->irq_dest_mode; ++ entry->dest = cfg->dest_apicid; ++ entry->vector = cfg->vector; ++ entry->trigger = data->trigger; ++ entry->polarity = data->polarity; ++ /* ++ * Mask level triggered irqs. Edge triggered irqs are masked ++ * by the irq core code in case they fire. ++ */ ++ if (data->trigger == IOAPIC_LEVEL) ++ entry->mask = IOAPIC_MASKED; ++ else ++ entry->mask = IOAPIC_UNMASKED; ++} ++ ++int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq, ++ unsigned int nr_irqs, void *arg) ++{ ++ int ret, ioapic, pin; ++ struct irq_cfg *cfg; ++ struct irq_data *irq_data; ++ struct mp_chip_data *data; ++ struct irq_alloc_info *info = arg; ++ unsigned long flags; ++ ++ if (!info || nr_irqs > 1) ++ return -EINVAL; ++ irq_data = irq_domain_get_irq_data(domain, virq); ++ if (!irq_data) ++ return -EINVAL; ++ ++ ioapic = mp_irqdomain_ioapic_idx(domain); ++ pin = info->ioapic_pin; ++ if (irq_find_mapping(domain, (irq_hw_number_t)pin) > 0) ++ return -EEXIST; ++ ++ data = kzalloc(sizeof(*data), GFP_KERNEL); ++ if (!data) ++ return -ENOMEM; ++ ++ info->ioapic_entry = &data->entry; ++ ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, info); ++ if (ret < 0) { ++ kfree(data); ++ return ret; ++ } ++ ++ INIT_LIST_HEAD(&data->irq_2_pin); ++ irq_data->hwirq = info->ioapic_pin; ++ irq_data->chip = (domain->parent == x86_vector_domain) ? ++ &ioapic_chip : &ioapic_ir_chip; ++ irq_data->chip_data = data; ++ mp_irqdomain_get_attr(mp_pin_to_gsi(ioapic, pin), data, info); ++ ++ cfg = irqd_cfg(irq_data); ++ add_pin_to_irq_node(data, ioapic_alloc_attr_node(info), ioapic, pin); ++ ++ local_irq_save(flags); ++ if (info->ioapic_entry) ++ mp_setup_entry(cfg, data, info->ioapic_entry); ++ mp_register_handler(virq, data->trigger); ++ if (virq < nr_legacy_irqs()) ++ legacy_pic->mask(virq); ++ local_irq_restore(flags); ++ ++ apic_printk(APIC_VERBOSE, KERN_DEBUG ++ "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i Dest:%d)\n", ++ ioapic, mpc_ioapic_id(ioapic), pin, cfg->vector, ++ virq, data->trigger, data->polarity, cfg->dest_apicid); ++ ++ return 0; ++} ++ ++void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq, ++ unsigned int nr_irqs) ++{ ++ struct irq_data *irq_data; ++ struct mp_chip_data *data; ++ ++ BUG_ON(nr_irqs != 1); ++ irq_data = irq_domain_get_irq_data(domain, virq); ++ if (irq_data && irq_data->chip_data) { ++ data = irq_data->chip_data; ++ __remove_pin_from_irq(data, mp_irqdomain_ioapic_idx(domain), ++ (int)irq_data->hwirq); ++ WARN_ON(!list_empty(&data->irq_2_pin)); ++ kfree(irq_data->chip_data); ++ } ++ irq_domain_free_irqs_top(domain, virq, nr_irqs); ++} ++ ++int mp_irqdomain_activate(struct irq_domain *domain, ++ struct irq_data *irq_data, bool reserve) ++{ ++ unsigned long flags; ++ ++ raw_spin_lock_irqsave(&ioapic_lock, flags); ++ ioapic_configure_entry(irq_data); ++ raw_spin_unlock_irqrestore(&ioapic_lock, flags); ++ return 0; ++} ++ ++void mp_irqdomain_deactivate(struct irq_domain *domain, ++ struct irq_data *irq_data) ++{ ++ /* It won't be called for IRQ with multiple IOAPIC pins associated */ ++ ioapic_mask_entry(mp_irqdomain_ioapic_idx(domain), ++ (int)irq_data->hwirq); ++} ++ ++int mp_irqdomain_ioapic_idx(struct irq_domain *domain) ++{ ++ return (int)(long)domain->host_data; ++} ++ ++const struct irq_domain_ops mp_ioapic_irqdomain_ops = { ++ .alloc = mp_irqdomain_alloc, ++ .free = mp_irqdomain_free, ++ .activate = mp_irqdomain_activate, ++ .deactivate = mp_irqdomain_deactivate, ++}; +diff -uprN kernel/arch/x86/kernel/apic/ipi.c kernel_new/arch/x86/kernel/apic/ipi.c +--- kernel/arch/x86/kernel/apic/ipi.c 2020-12-21 21:59:17.000000000 +0800 ++++ kernel_new/arch/x86/kernel/apic/ipi.c 2021-04-01 18:28:07.654863288 +0800 +@@ -28,7 +28,9 @@ void __default_send_IPI_shortcut(unsigne + * to the APIC. + */ + unsigned int cfg; ++ unsigned long flags; + ++ flags = hard_cond_local_irq_save(); + /* + * Wait for idle. + */ +@@ -43,6 +45,8 @@ void __default_send_IPI_shortcut(unsigne + * Send the IPI. The write to APIC_ICR fires this off. + */ + native_apic_mem_write(APIC_ICR, cfg); ++ ++ hard_cond_local_irq_restore(flags); + } + + /* +@@ -51,8 +55,9 @@ void __default_send_IPI_shortcut(unsigne + */ + void __default_send_IPI_dest_field(unsigned int mask, int vector, unsigned int dest) + { +- unsigned long cfg; ++ unsigned long cfg, flags; + ++ flags = hard_cond_local_irq_save(); + /* + * Wait for idle. + */ +@@ -76,6 +81,8 @@ void __default_send_IPI_dest_field(unsig + * Send the IPI. The write to APIC_ICR fires this off. + */ + native_apic_mem_write(APIC_ICR, cfg); ++ ++ hard_cond_local_irq_restore(flags); + } + + void default_send_IPI_single_phys(int cpu, int vector) +@@ -98,12 +105,12 @@ void default_send_IPI_mask_sequence_phys + * to an arbitrary mask, so I do a unicast to each CPU instead. + * - mbligh + */ +- local_irq_save(flags); ++ flags = hard_local_irq_save(); + for_each_cpu(query_cpu, mask) { + __default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, + query_cpu), vector, APIC_DEST_PHYSICAL); + } +- local_irq_restore(flags); ++ hard_local_irq_restore(flags); + } + + void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask, +@@ -115,14 +122,14 @@ void default_send_IPI_mask_allbutself_ph + + /* See Hack comment above */ + +- local_irq_save(flags); ++ flags = hard_local_irq_save(); + for_each_cpu(query_cpu, mask) { + if (query_cpu == this_cpu) + continue; + __default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, + query_cpu), vector, APIC_DEST_PHYSICAL); + } +- local_irq_restore(flags); ++ hard_local_irq_restore(flags); + } + + /* +@@ -147,12 +154,12 @@ void default_send_IPI_mask_sequence_logi + * should be modified to do 1 message per cluster ID - mbligh + */ + +- local_irq_save(flags); ++ flags = hard_local_irq_save(); + for_each_cpu(query_cpu, mask) + __default_send_IPI_dest_field( + early_per_cpu(x86_cpu_to_logical_apicid, query_cpu), + vector, apic->dest_logical); +- local_irq_restore(flags); ++ hard_local_irq_restore(flags); + } + + void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask, +@@ -164,7 +171,7 @@ void default_send_IPI_mask_allbutself_lo + + /* See Hack comment above */ + +- local_irq_save(flags); ++ flags = hard_local_irq_save(); + for_each_cpu(query_cpu, mask) { + if (query_cpu == this_cpu) + continue; +@@ -172,7 +179,7 @@ void default_send_IPI_mask_allbutself_lo + early_per_cpu(x86_cpu_to_logical_apicid, query_cpu), + vector, apic->dest_logical); + } +- local_irq_restore(flags); ++ hard_local_irq_restore(flags); + } + + /* +@@ -186,10 +193,10 @@ void default_send_IPI_mask_logical(const + if (!mask) + return; + +- local_irq_save(flags); ++ flags = hard_local_irq_save(); + WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]); + __default_send_IPI_dest_field(mask, vector, apic->dest_logical); +- local_irq_restore(flags); ++ hard_local_irq_restore(flags); + } + + void default_send_IPI_allbutself(int vector) +diff -uprN kernel/arch/x86/kernel/apic/msi.c kernel_new/arch/x86/kernel/apic/msi.c +--- kernel/arch/x86/kernel/apic/msi.c 2020-12-21 21:59:17.000000000 +0800 ++++ kernel_new/arch/x86/kernel/apic/msi.c 2021-04-02 09:05:45.996880795 +0800 +@@ -181,7 +181,10 @@ static struct irq_chip pci_msi_controlle + .irq_retrigger = irq_chip_retrigger_hierarchy, + .irq_compose_msi_msg = irq_msi_compose_msg, + .irq_set_affinity = msi_set_affinity, +- .flags = IRQCHIP_SKIP_SET_WAKE, ++#if defined(CONFIG_IPIPE) && defined(CONFIG_SMP) ++ .irq_move = move_xxapic_irq, ++#endif ++ .flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_PIPELINE_SAFE, + }; + + int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) +@@ -281,7 +284,10 @@ static struct irq_chip pci_msi_ir_contro + .irq_ack = irq_chip_ack_parent, + .irq_retrigger = irq_chip_retrigger_hierarchy, + .irq_set_vcpu_affinity = irq_chip_set_vcpu_affinity_parent, +- .flags = IRQCHIP_SKIP_SET_WAKE, ++#if defined(CONFIG_IPIPE) && defined(CONFIG_SMP) ++ .irq_move = move_xxapic_irq, ++#endif ++ .flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_PIPELINE_SAFE, + }; + + static struct msi_domain_info pci_msi_ir_domain_info = { +@@ -323,7 +329,10 @@ static struct irq_chip dmar_msi_controll + .irq_retrigger = irq_chip_retrigger_hierarchy, + .irq_compose_msi_msg = irq_msi_compose_msg, + .irq_write_msi_msg = dmar_msi_write_msg, +- .flags = IRQCHIP_SKIP_SET_WAKE, ++#if defined(CONFIG_IPIPE) && defined(CONFIG_SMP) ++ .irq_move = move_xxapic_irq, ++#endif ++ .flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_PIPELINE_SAFE, + }; + + static irq_hw_number_t dmar_msi_get_hwirq(struct msi_domain_info *info, +@@ -420,7 +429,10 @@ static struct irq_chip hpet_msi_controll + .irq_retrigger = irq_chip_retrigger_hierarchy, + .irq_compose_msi_msg = irq_msi_compose_msg, + .irq_write_msi_msg = hpet_msi_write_msg, +- .flags = IRQCHIP_SKIP_SET_WAKE, ++#if defined(CONFIG_IPIPE) && defined(CONFIG_SMP) ++ .irq_move = move_xxapic_irq, ++#endif ++ .flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_PIPELINE_SAFE, + }; + + static irq_hw_number_t hpet_msi_get_hwirq(struct msi_domain_info *info, +diff -uprN kernel/arch/x86/kernel/apic/msi.c.orig kernel_new/arch/x86/kernel/apic/msi.c.orig +--- kernel/arch/x86/kernel/apic/msi.c.orig 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/arch/x86/kernel/apic/msi.c.orig 2020-12-21 21:59:17.000000000 +0800 +@@ -0,0 +1,511 @@ ++/* ++ * Support of MSI, HPET and DMAR interrupts. ++ * ++ * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo ++ * Moved from arch/x86/kernel/apic/io_apic.c. ++ * Jiang Liu ++ * Convert to hierarchical irqdomain ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++static struct irq_domain *msi_default_domain; ++ ++static void __irq_msi_compose_msg(struct irq_cfg *cfg, struct msi_msg *msg) ++{ ++ msg->address_hi = MSI_ADDR_BASE_HI; ++ ++ if (x2apic_enabled()) ++ msg->address_hi |= MSI_ADDR_EXT_DEST_ID(cfg->dest_apicid); ++ ++ msg->address_lo = ++ MSI_ADDR_BASE_LO | ++ ((apic->irq_dest_mode == 0) ? ++ MSI_ADDR_DEST_MODE_PHYSICAL : ++ MSI_ADDR_DEST_MODE_LOGICAL) | ++ MSI_ADDR_REDIRECTION_CPU | ++ MSI_ADDR_DEST_ID(cfg->dest_apicid); ++ ++ msg->data = ++ MSI_DATA_TRIGGER_EDGE | ++ MSI_DATA_LEVEL_ASSERT | ++ MSI_DATA_DELIVERY_FIXED | ++ MSI_DATA_VECTOR(cfg->vector); ++} ++ ++static void irq_msi_compose_msg(struct irq_data *data, struct msi_msg *msg) ++{ ++ __irq_msi_compose_msg(irqd_cfg(data), msg); ++} ++ ++static void irq_msi_update_msg(struct irq_data *irqd, struct irq_cfg *cfg) ++{ ++ struct msi_msg msg[2] = { [1] = { }, }; ++ ++ __irq_msi_compose_msg(cfg, msg); ++ irq_data_get_irq_chip(irqd)->irq_write_msi_msg(irqd, msg); ++} ++ ++static int ++msi_set_affinity(struct irq_data *irqd, const struct cpumask *mask, bool force) ++{ ++ struct irq_cfg old_cfg, *cfg = irqd_cfg(irqd); ++ struct irq_data *parent = irqd->parent_data; ++ unsigned int cpu; ++ int ret; ++ ++ /* Save the current configuration */ ++ cpu = cpumask_first(irq_data_get_effective_affinity_mask(irqd)); ++ old_cfg = *cfg; ++ ++ /* Allocate a new target vector */ ++ ret = parent->chip->irq_set_affinity(parent, mask, force); ++ if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE) ++ return ret; ++ ++ /* ++ * For non-maskable and non-remapped MSI interrupts the migration ++ * to a different destination CPU and a different vector has to be ++ * done careful to handle the possible stray interrupt which can be ++ * caused by the non-atomic update of the address/data pair. ++ * ++ * Direct update is possible when: ++ * - The MSI is maskable (remapped MSI does not use this code path)). ++ * The quirk bit is not set in this case. ++ * - The new vector is the same as the old vector ++ * - The old vector is MANAGED_IRQ_SHUTDOWN_VECTOR (interrupt starts up) ++ * - The new destination CPU is the same as the old destination CPU ++ */ ++ if (!irqd_msi_nomask_quirk(irqd) || ++ cfg->vector == old_cfg.vector || ++ old_cfg.vector == MANAGED_IRQ_SHUTDOWN_VECTOR || ++ cfg->dest_apicid == old_cfg.dest_apicid) { ++ irq_msi_update_msg(irqd, cfg); ++ return ret; ++ } ++ ++ /* ++ * Paranoia: Validate that the interrupt target is the local ++ * CPU. ++ */ ++ if (WARN_ON_ONCE(cpu != smp_processor_id())) { ++ irq_msi_update_msg(irqd, cfg); ++ return ret; ++ } ++ ++ /* ++ * Redirect the interrupt to the new vector on the current CPU ++ * first. This might cause a spurious interrupt on this vector if ++ * the device raises an interrupt right between this update and the ++ * update to the final destination CPU. ++ * ++ * If the vector is in use then the installed device handler will ++ * denote it as spurious which is no harm as this is a rare event ++ * and interrupt handlers have to cope with spurious interrupts ++ * anyway. If the vector is unused, then it is marked so it won't ++ * trigger the 'No irq handler for vector' warning in do_IRQ(). ++ * ++ * This requires to hold vector lock to prevent concurrent updates to ++ * the affected vector. ++ */ ++ lock_vector_lock(); ++ ++ /* ++ * Mark the new target vector on the local CPU if it is currently ++ * unused. Reuse the VECTOR_RETRIGGERED state which is also used in ++ * the CPU hotplug path for a similar purpose. This cannot be ++ * undone here as the current CPU has interrupts disabled and ++ * cannot handle the interrupt before the whole set_affinity() ++ * section is done. In the CPU unplug case, the current CPU is ++ * about to vanish and will not handle any interrupts anymore. The ++ * vector is cleaned up when the CPU comes online again. ++ */ ++ if (IS_ERR_OR_NULL(this_cpu_read(vector_irq[cfg->vector]))) ++ this_cpu_write(vector_irq[cfg->vector], VECTOR_RETRIGGERED); ++ ++ /* Redirect it to the new vector on the local CPU temporarily */ ++ old_cfg.vector = cfg->vector; ++ irq_msi_update_msg(irqd, &old_cfg); ++ ++ /* Now transition it to the target CPU */ ++ irq_msi_update_msg(irqd, cfg); ++ ++ /* ++ * All interrupts after this point are now targeted at the new ++ * vector/CPU. ++ * ++ * Drop vector lock before testing whether the temporary assignment ++ * to the local CPU was hit by an interrupt raised in the device, ++ * because the retrigger function acquires vector lock again. ++ */ ++ unlock_vector_lock(); ++ ++ /* ++ * Check whether the transition raced with a device interrupt and ++ * is pending in the local APICs IRR. It is safe to do this outside ++ * of vector lock as the irq_desc::lock of this interrupt is still ++ * held and interrupts are disabled: The check is not accessing the ++ * underlying vector store. It's just checking the local APIC's ++ * IRR. ++ */ ++ if (lapic_vector_set_in_irr(cfg->vector)) ++ irq_data_get_irq_chip(irqd)->irq_retrigger(irqd); ++ ++ return ret; ++} ++ ++/* ++ * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices, ++ * which implement the MSI or MSI-X Capability Structure. ++ */ ++static struct irq_chip pci_msi_controller = { ++ .name = "PCI-MSI", ++ .irq_unmask = pci_msi_unmask_irq, ++ .irq_mask = pci_msi_mask_irq, ++ .irq_ack = irq_chip_ack_parent, ++ .irq_retrigger = irq_chip_retrigger_hierarchy, ++ .irq_compose_msi_msg = irq_msi_compose_msg, ++ .irq_set_affinity = msi_set_affinity, ++ .flags = IRQCHIP_SKIP_SET_WAKE, ++}; ++ ++int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) ++{ ++ struct irq_domain *domain; ++ struct irq_alloc_info info; ++ ++ init_irq_alloc_info(&info, NULL); ++ info.type = X86_IRQ_ALLOC_TYPE_MSI; ++ info.msi_dev = dev; ++ ++ domain = irq_remapping_get_irq_domain(&info); ++ if (domain == NULL) ++ domain = msi_default_domain; ++ if (domain == NULL) ++ return -ENOSYS; ++ ++ return msi_domain_alloc_irqs(domain, &dev->dev, nvec); ++} ++ ++void native_teardown_msi_irq(unsigned int irq) ++{ ++ irq_domain_free_irqs(irq, 1); ++} ++ ++static irq_hw_number_t pci_msi_get_hwirq(struct msi_domain_info *info, ++ msi_alloc_info_t *arg) ++{ ++ return arg->msi_hwirq; ++} ++ ++int pci_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec, ++ msi_alloc_info_t *arg) ++{ ++ struct pci_dev *pdev = to_pci_dev(dev); ++ struct msi_desc *desc = first_pci_msi_entry(pdev); ++ ++ init_irq_alloc_info(arg, NULL); ++ arg->msi_dev = pdev; ++ if (desc->msi_attrib.is_msix) { ++ arg->type = X86_IRQ_ALLOC_TYPE_MSIX; ++ } else { ++ arg->type = X86_IRQ_ALLOC_TYPE_MSI; ++ arg->flags |= X86_IRQ_ALLOC_CONTIGUOUS_VECTORS; ++ } ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(pci_msi_prepare); ++ ++void pci_msi_set_desc(msi_alloc_info_t *arg, struct msi_desc *desc) ++{ ++ arg->msi_hwirq = pci_msi_domain_calc_hwirq(arg->msi_dev, desc); ++} ++EXPORT_SYMBOL_GPL(pci_msi_set_desc); ++ ++static struct msi_domain_ops pci_msi_domain_ops = { ++ .get_hwirq = pci_msi_get_hwirq, ++ .msi_prepare = pci_msi_prepare, ++ .set_desc = pci_msi_set_desc, ++}; ++ ++static struct msi_domain_info pci_msi_domain_info = { ++ .flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS | ++ MSI_FLAG_PCI_MSIX, ++ .ops = &pci_msi_domain_ops, ++ .chip = &pci_msi_controller, ++ .handler = handle_edge_irq, ++ .handler_name = "edge", ++}; ++ ++void __init arch_init_msi_domain(struct irq_domain *parent) ++{ ++ struct fwnode_handle *fn; ++ ++ if (disable_apic) ++ return; ++ ++ fn = irq_domain_alloc_named_fwnode("PCI-MSI"); ++ if (fn) { ++ msi_default_domain = ++ pci_msi_create_irq_domain(fn, &pci_msi_domain_info, ++ parent); ++ irq_domain_free_fwnode(fn); ++ } ++ if (!msi_default_domain) ++ pr_warn("failed to initialize irqdomain for MSI/MSI-x.\n"); ++ else ++ msi_default_domain->flags |= IRQ_DOMAIN_MSI_NOMASK_QUIRK; ++} ++ ++#ifdef CONFIG_IRQ_REMAP ++static struct irq_chip pci_msi_ir_controller = { ++ .name = "IR-PCI-MSI", ++ .irq_unmask = pci_msi_unmask_irq, ++ .irq_mask = pci_msi_mask_irq, ++ .irq_ack = irq_chip_ack_parent, ++ .irq_retrigger = irq_chip_retrigger_hierarchy, ++ .irq_set_vcpu_affinity = irq_chip_set_vcpu_affinity_parent, ++ .flags = IRQCHIP_SKIP_SET_WAKE, ++}; ++ ++static struct msi_domain_info pci_msi_ir_domain_info = { ++ .flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS | ++ MSI_FLAG_MULTI_PCI_MSI | MSI_FLAG_PCI_MSIX, ++ .ops = &pci_msi_domain_ops, ++ .chip = &pci_msi_ir_controller, ++ .handler = handle_edge_irq, ++ .handler_name = "edge", ++}; ++ ++struct irq_domain *arch_create_remap_msi_irq_domain(struct irq_domain *parent, ++ const char *name, int id) ++{ ++ struct fwnode_handle *fn; ++ struct irq_domain *d; ++ ++ fn = irq_domain_alloc_named_id_fwnode(name, id); ++ if (!fn) ++ return NULL; ++ d = pci_msi_create_irq_domain(fn, &pci_msi_ir_domain_info, parent); ++ irq_domain_free_fwnode(fn); ++ return d; ++} ++#endif ++ ++#ifdef CONFIG_DMAR_TABLE ++static void dmar_msi_write_msg(struct irq_data *data, struct msi_msg *msg) ++{ ++ dmar_msi_write(data->irq, msg); ++} ++ ++static struct irq_chip dmar_msi_controller = { ++ .name = "DMAR-MSI", ++ .irq_unmask = dmar_msi_unmask, ++ .irq_mask = dmar_msi_mask, ++ .irq_ack = irq_chip_ack_parent, ++ .irq_set_affinity = msi_domain_set_affinity, ++ .irq_retrigger = irq_chip_retrigger_hierarchy, ++ .irq_compose_msi_msg = irq_msi_compose_msg, ++ .irq_write_msi_msg = dmar_msi_write_msg, ++ .flags = IRQCHIP_SKIP_SET_WAKE, ++}; ++ ++static irq_hw_number_t dmar_msi_get_hwirq(struct msi_domain_info *info, ++ msi_alloc_info_t *arg) ++{ ++ return arg->dmar_id; ++} ++ ++static int dmar_msi_init(struct irq_domain *domain, ++ struct msi_domain_info *info, unsigned int virq, ++ irq_hw_number_t hwirq, msi_alloc_info_t *arg) ++{ ++ irq_domain_set_info(domain, virq, arg->dmar_id, info->chip, NULL, ++ handle_edge_irq, arg->dmar_data, "edge"); ++ ++ return 0; ++} ++ ++static struct msi_domain_ops dmar_msi_domain_ops = { ++ .get_hwirq = dmar_msi_get_hwirq, ++ .msi_init = dmar_msi_init, ++}; ++ ++static struct msi_domain_info dmar_msi_domain_info = { ++ .ops = &dmar_msi_domain_ops, ++ .chip = &dmar_msi_controller, ++}; ++ ++static struct irq_domain *dmar_get_irq_domain(void) ++{ ++ static struct irq_domain *dmar_domain; ++ static DEFINE_MUTEX(dmar_lock); ++ struct fwnode_handle *fn; ++ ++ mutex_lock(&dmar_lock); ++ if (dmar_domain) ++ goto out; ++ ++ fn = irq_domain_alloc_named_fwnode("DMAR-MSI"); ++ if (fn) { ++ dmar_domain = msi_create_irq_domain(fn, &dmar_msi_domain_info, ++ x86_vector_domain); ++ irq_domain_free_fwnode(fn); ++ } ++out: ++ mutex_unlock(&dmar_lock); ++ return dmar_domain; ++} ++ ++int dmar_alloc_hwirq(int id, int node, void *arg) ++{ ++ struct irq_domain *domain = dmar_get_irq_domain(); ++ struct irq_alloc_info info; ++ ++ if (!domain) ++ return -1; ++ ++ init_irq_alloc_info(&info, NULL); ++ info.type = X86_IRQ_ALLOC_TYPE_DMAR; ++ info.dmar_id = id; ++ info.dmar_data = arg; ++ ++ return irq_domain_alloc_irqs(domain, 1, node, &info); ++} ++ ++void dmar_free_hwirq(int irq) ++{ ++ irq_domain_free_irqs(irq, 1); ++} ++#endif ++ ++/* ++ * MSI message composition ++ */ ++#ifdef CONFIG_HPET_TIMER ++static inline int hpet_dev_id(struct irq_domain *domain) ++{ ++ struct msi_domain_info *info = msi_get_domain_info(domain); ++ ++ return (int)(long)info->data; ++} ++ ++static void hpet_msi_write_msg(struct irq_data *data, struct msi_msg *msg) ++{ ++ hpet_msi_write(irq_data_get_irq_handler_data(data), msg); ++} ++ ++static struct irq_chip hpet_msi_controller __ro_after_init = { ++ .name = "HPET-MSI", ++ .irq_unmask = hpet_msi_unmask, ++ .irq_mask = hpet_msi_mask, ++ .irq_ack = irq_chip_ack_parent, ++ .irq_set_affinity = msi_domain_set_affinity, ++ .irq_retrigger = irq_chip_retrigger_hierarchy, ++ .irq_compose_msi_msg = irq_msi_compose_msg, ++ .irq_write_msi_msg = hpet_msi_write_msg, ++ .flags = IRQCHIP_SKIP_SET_WAKE, ++}; ++ ++static irq_hw_number_t hpet_msi_get_hwirq(struct msi_domain_info *info, ++ msi_alloc_info_t *arg) ++{ ++ return arg->hpet_index; ++} ++ ++static int hpet_msi_init(struct irq_domain *domain, ++ struct msi_domain_info *info, unsigned int virq, ++ irq_hw_number_t hwirq, msi_alloc_info_t *arg) ++{ ++ irq_set_status_flags(virq, IRQ_MOVE_PCNTXT); ++ irq_domain_set_info(domain, virq, arg->hpet_index, info->chip, NULL, ++ handle_edge_irq, arg->hpet_data, "edge"); ++ ++ return 0; ++} ++ ++static void hpet_msi_free(struct irq_domain *domain, ++ struct msi_domain_info *info, unsigned int virq) ++{ ++ irq_clear_status_flags(virq, IRQ_MOVE_PCNTXT); ++} ++ ++static struct msi_domain_ops hpet_msi_domain_ops = { ++ .get_hwirq = hpet_msi_get_hwirq, ++ .msi_init = hpet_msi_init, ++ .msi_free = hpet_msi_free, ++}; ++ ++static struct msi_domain_info hpet_msi_domain_info = { ++ .ops = &hpet_msi_domain_ops, ++ .chip = &hpet_msi_controller, ++}; ++ ++struct irq_domain *hpet_create_irq_domain(int hpet_id) ++{ ++ struct msi_domain_info *domain_info; ++ struct irq_domain *parent, *d; ++ struct irq_alloc_info info; ++ struct fwnode_handle *fn; ++ ++ if (x86_vector_domain == NULL) ++ return NULL; ++ ++ domain_info = kzalloc(sizeof(*domain_info), GFP_KERNEL); ++ if (!domain_info) ++ return NULL; ++ ++ *domain_info = hpet_msi_domain_info; ++ domain_info->data = (void *)(long)hpet_id; ++ ++ init_irq_alloc_info(&info, NULL); ++ info.type = X86_IRQ_ALLOC_TYPE_HPET; ++ info.hpet_id = hpet_id; ++ parent = irq_remapping_get_ir_irq_domain(&info); ++ if (parent == NULL) ++ parent = x86_vector_domain; ++ else ++ hpet_msi_controller.name = "IR-HPET-MSI"; ++ ++ fn = irq_domain_alloc_named_id_fwnode(hpet_msi_controller.name, ++ hpet_id); ++ if (!fn) { ++ kfree(domain_info); ++ return NULL; ++ } ++ ++ d = msi_create_irq_domain(fn, domain_info, parent); ++ irq_domain_free_fwnode(fn); ++ return d; ++} ++ ++int hpet_assign_irq(struct irq_domain *domain, struct hpet_dev *dev, ++ int dev_num) ++{ ++ struct irq_alloc_info info; ++ ++ init_irq_alloc_info(&info, NULL); ++ info.type = X86_IRQ_ALLOC_TYPE_HPET; ++ info.hpet_data = dev; ++ info.hpet_id = hpet_dev_id(domain); ++ info.hpet_index = dev_num; ++ ++ return irq_domain_alloc_irqs(domain, 1, NUMA_NO_NODE, &info); ++} ++#endif +diff -uprN kernel/arch/x86/kernel/apic/msi.c.rej kernel_new/arch/x86/kernel/apic/msi.c.rej +--- kernel/arch/x86/kernel/apic/msi.c.rej 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/arch/x86/kernel/apic/msi.c.rej 2021-04-01 18:28:07.654863288 +0800 +@@ -0,0 +1,14 @@ ++--- arch/x86/kernel/apic/msi.c 2019-12-18 03:36:04.000000000 +0800 +++++ arch/x86/kernel/apic/msi.c 2021-03-22 09:21:43.194415288 +0800 ++@@ -61,7 +61,10 @@ static struct irq_chip pci_msi_controlle ++ .irq_ack = irq_chip_ack_parent, ++ .irq_retrigger = irq_chip_retrigger_hierarchy, ++ .irq_compose_msi_msg = irq_msi_compose_msg, ++- .flags = IRQCHIP_SKIP_SET_WAKE, +++#if defined(CONFIG_IPIPE) && defined(CONFIG_SMP) +++ .irq_move = move_xxapic_irq, +++#endif +++ .flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_PIPELINE_SAFE, ++ }; ++ ++ int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) +diff -uprN kernel/arch/x86/kernel/apic/vector.c kernel_new/arch/x86/kernel/apic/vector.c +--- kernel/arch/x86/kernel/apic/vector.c 2020-12-21 21:59:17.000000000 +0800 ++++ kernel_new/arch/x86/kernel/apic/vector.c 2021-04-01 18:28:07.654863288 +0800 +@@ -41,7 +41,7 @@ struct apic_chip_data { + + struct irq_domain *x86_vector_domain; + EXPORT_SYMBOL_GPL(x86_vector_domain); +-static DEFINE_RAW_SPINLOCK(vector_lock); ++static IPIPE_DEFINE_RAW_SPINLOCK(vector_lock); + static cpumask_var_t vector_searchmask; + static struct irq_chip lapic_controller; + static struct irq_matrix *vector_matrix; +@@ -121,7 +121,9 @@ static void apic_update_irq_cfg(struct i + { + struct apic_chip_data *apicd = apic_chip_data(irqd); + ++#ifndef CONFIG_IPIPE + lockdep_assert_held(&vector_lock); ++#endif + + apicd->hw_irq_cfg.vector = vector; + apicd->hw_irq_cfg.dest_apicid = apic->calc_dest_apicid(cpu); +@@ -137,7 +139,9 @@ static void apic_update_vector(struct ir + struct irq_desc *desc = irq_data_to_desc(irqd); + bool managed = irqd_affinity_is_managed(irqd); + ++#ifndef CONFIG_IPIPE + lockdep_assert_held(&vector_lock); ++#endif + + trace_vector_update(irqd->irq, newvec, newcpu, apicd->vector, + apicd->cpu); +@@ -227,7 +231,9 @@ assign_vector_locked(struct irq_data *ir + unsigned int cpu = apicd->cpu; + int vector = apicd->vector; + ++#ifndef CONFIG_IPIPE + lockdep_assert_held(&vector_lock); ++#endif + + /* + * If the current target CPU is online and in the new requested +@@ -334,7 +340,9 @@ static void clear_irq_vector(struct irq_ + bool managed = irqd_affinity_is_managed(irqd); + unsigned int vector = apicd->vector; + ++#ifndef CONFIG_IPIPE + lockdep_assert_held(&vector_lock); ++#endif + + if (!vector) + return; +@@ -745,7 +753,9 @@ void lapic_online(void) + { + unsigned int vector; + ++#ifndef CONFIG_IPIPE + lockdep_assert_held(&vector_lock); ++#endif + + /* Online the vector matrix array for this CPU */ + irq_matrix_online(vector_matrix); +@@ -806,13 +816,17 @@ static int apic_retrigger_irq(struct irq + + void apic_ack_irq(struct irq_data *irqd) + { ++#ifndef CONFIG_IPIPE + irq_move_irq(irqd); +- ack_APIC_irq(); ++#endif /* !CONFIG_IPIPE */ ++ __ack_APIC_irq(); + } + + void apic_ack_edge(struct irq_data *irqd) + { ++#ifndef CONFIG_IPIPE + irq_complete_move(irqd_cfg(irqd)); ++#endif /* !CONFIG_IPIPE */ + apic_ack_irq(irqd); + } + +diff -uprN kernel/arch/x86/kernel/apic/vector.c.orig kernel_new/arch/x86/kernel/apic/vector.c.orig +--- kernel/arch/x86/kernel/apic/vector.c.orig 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/arch/x86/kernel/apic/vector.c.orig 2020-12-21 21:59:17.000000000 +0800 +@@ -0,0 +1,1249 @@ ++/* ++ * Local APIC related interfaces to support IOAPIC, MSI, etc. ++ * ++ * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo ++ * Moved from arch/x86/kernel/apic/io_apic.c. ++ * Jiang Liu ++ * Enable support of hierarchical irqdomains ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++ ++struct apic_chip_data { ++ struct irq_cfg hw_irq_cfg; ++ unsigned int vector; ++ unsigned int prev_vector; ++ unsigned int cpu; ++ unsigned int prev_cpu; ++ unsigned int irq; ++ struct hlist_node clist; ++ unsigned int move_in_progress : 1, ++ is_managed : 1, ++ can_reserve : 1, ++ has_reserved : 1; ++}; ++ ++struct irq_domain *x86_vector_domain; ++EXPORT_SYMBOL_GPL(x86_vector_domain); ++static DEFINE_RAW_SPINLOCK(vector_lock); ++static cpumask_var_t vector_searchmask; ++static struct irq_chip lapic_controller; ++static struct irq_matrix *vector_matrix; ++#ifdef CONFIG_SMP ++static DEFINE_PER_CPU(struct hlist_head, cleanup_list); ++#endif ++ ++void lock_vector_lock(void) ++{ ++ /* Used to the online set of cpus does not change ++ * during assign_irq_vector. ++ */ ++ raw_spin_lock(&vector_lock); ++} ++ ++void unlock_vector_lock(void) ++{ ++ raw_spin_unlock(&vector_lock); ++} ++ ++void init_irq_alloc_info(struct irq_alloc_info *info, ++ const struct cpumask *mask) ++{ ++ memset(info, 0, sizeof(*info)); ++ info->mask = mask; ++} ++ ++void copy_irq_alloc_info(struct irq_alloc_info *dst, struct irq_alloc_info *src) ++{ ++ if (src) ++ *dst = *src; ++ else ++ memset(dst, 0, sizeof(*dst)); ++} ++ ++static struct apic_chip_data *apic_chip_data(struct irq_data *irqd) ++{ ++ if (!irqd) ++ return NULL; ++ ++ while (irqd->parent_data) ++ irqd = irqd->parent_data; ++ ++ return irqd->chip_data; ++} ++ ++struct irq_cfg *irqd_cfg(struct irq_data *irqd) ++{ ++ struct apic_chip_data *apicd = apic_chip_data(irqd); ++ ++ return apicd ? &apicd->hw_irq_cfg : NULL; ++} ++EXPORT_SYMBOL_GPL(irqd_cfg); ++ ++struct irq_cfg *irq_cfg(unsigned int irq) ++{ ++ return irqd_cfg(irq_get_irq_data(irq)); ++} ++ ++static struct apic_chip_data *alloc_apic_chip_data(int node) ++{ ++ struct apic_chip_data *apicd; ++ ++ apicd = kzalloc_node(sizeof(*apicd), GFP_KERNEL, node); ++ if (apicd) ++ INIT_HLIST_NODE(&apicd->clist); ++ return apicd; ++} ++ ++static void free_apic_chip_data(struct apic_chip_data *apicd) ++{ ++ kfree(apicd); ++} ++ ++static void apic_update_irq_cfg(struct irq_data *irqd, unsigned int vector, ++ unsigned int cpu) ++{ ++ struct apic_chip_data *apicd = apic_chip_data(irqd); ++ ++ lockdep_assert_held(&vector_lock); ++ ++ apicd->hw_irq_cfg.vector = vector; ++ apicd->hw_irq_cfg.dest_apicid = apic->calc_dest_apicid(cpu); ++ irq_data_update_effective_affinity(irqd, cpumask_of(cpu)); ++ trace_vector_config(irqd->irq, vector, cpu, ++ apicd->hw_irq_cfg.dest_apicid); ++} ++ ++static void apic_update_vector(struct irq_data *irqd, unsigned int newvec, ++ unsigned int newcpu) ++{ ++ struct apic_chip_data *apicd = apic_chip_data(irqd); ++ struct irq_desc *desc = irq_data_to_desc(irqd); ++ bool managed = irqd_affinity_is_managed(irqd); ++ ++ lockdep_assert_held(&vector_lock); ++ ++ trace_vector_update(irqd->irq, newvec, newcpu, apicd->vector, ++ apicd->cpu); ++ ++ /* ++ * If there is no vector associated or if the associated vector is ++ * the shutdown vector, which is associated to make PCI/MSI ++ * shutdown mode work, then there is nothing to release. Clear out ++ * prev_vector for this and the offlined target case. ++ */ ++ apicd->prev_vector = 0; ++ if (!apicd->vector || apicd->vector == MANAGED_IRQ_SHUTDOWN_VECTOR) ++ goto setnew; ++ /* ++ * If the target CPU of the previous vector is online, then mark ++ * the vector as move in progress and store it for cleanup when the ++ * first interrupt on the new vector arrives. If the target CPU is ++ * offline then the regular release mechanism via the cleanup ++ * vector is not possible and the vector can be immediately freed ++ * in the underlying matrix allocator. ++ */ ++ if (cpu_online(apicd->cpu)) { ++ apicd->move_in_progress = true; ++ apicd->prev_vector = apicd->vector; ++ apicd->prev_cpu = apicd->cpu; ++ } else { ++ irq_matrix_free(vector_matrix, apicd->cpu, apicd->vector, ++ managed); ++ } ++ ++setnew: ++ apicd->vector = newvec; ++ apicd->cpu = newcpu; ++ BUG_ON(!IS_ERR_OR_NULL(per_cpu(vector_irq, newcpu)[newvec])); ++ per_cpu(vector_irq, newcpu)[newvec] = desc; ++} ++ ++static void vector_assign_managed_shutdown(struct irq_data *irqd) ++{ ++ unsigned int cpu = cpumask_first(cpu_online_mask); ++ ++ apic_update_irq_cfg(irqd, MANAGED_IRQ_SHUTDOWN_VECTOR, cpu); ++} ++ ++static int reserve_managed_vector(struct irq_data *irqd) ++{ ++ const struct cpumask *affmsk = irq_data_get_affinity_mask(irqd); ++ struct apic_chip_data *apicd = apic_chip_data(irqd); ++ unsigned long flags; ++ int ret; ++ ++ raw_spin_lock_irqsave(&vector_lock, flags); ++ apicd->is_managed = true; ++ ret = irq_matrix_reserve_managed(vector_matrix, affmsk); ++ raw_spin_unlock_irqrestore(&vector_lock, flags); ++ trace_vector_reserve_managed(irqd->irq, ret); ++ return ret; ++} ++ ++static void reserve_irq_vector_locked(struct irq_data *irqd) ++{ ++ struct apic_chip_data *apicd = apic_chip_data(irqd); ++ ++ irq_matrix_reserve(vector_matrix); ++ apicd->can_reserve = true; ++ apicd->has_reserved = true; ++ irqd_set_can_reserve(irqd); ++ trace_vector_reserve(irqd->irq, 0); ++ vector_assign_managed_shutdown(irqd); ++} ++ ++static int reserve_irq_vector(struct irq_data *irqd) ++{ ++ unsigned long flags; ++ ++ raw_spin_lock_irqsave(&vector_lock, flags); ++ reserve_irq_vector_locked(irqd); ++ raw_spin_unlock_irqrestore(&vector_lock, flags); ++ return 0; ++} ++ ++static int ++assign_vector_locked(struct irq_data *irqd, const struct cpumask *dest) ++{ ++ struct apic_chip_data *apicd = apic_chip_data(irqd); ++ bool resvd = apicd->has_reserved; ++ unsigned int cpu = apicd->cpu; ++ int vector = apicd->vector; ++ ++ lockdep_assert_held(&vector_lock); ++ ++ /* ++ * If the current target CPU is online and in the new requested ++ * affinity mask, there is no point in moving the interrupt from ++ * one CPU to another. ++ */ ++ if (vector && cpu_online(cpu) && cpumask_test_cpu(cpu, dest)) ++ return 0; ++ ++ /* ++ * Careful here. @apicd might either have move_in_progress set or ++ * be enqueued for cleanup. Assigning a new vector would either ++ * leave a stale vector on some CPU around or in case of a pending ++ * cleanup corrupt the hlist. ++ */ ++ if (apicd->move_in_progress || !hlist_unhashed(&apicd->clist)) ++ return -EBUSY; ++ ++ vector = irq_matrix_alloc(vector_matrix, dest, resvd, &cpu); ++ trace_vector_alloc(irqd->irq, vector, resvd, vector); ++ if (vector < 0) ++ return vector; ++ apic_update_vector(irqd, vector, cpu); ++ apic_update_irq_cfg(irqd, vector, cpu); ++ ++ return 0; ++} ++ ++static int assign_irq_vector(struct irq_data *irqd, const struct cpumask *dest) ++{ ++ unsigned long flags; ++ int ret; ++ ++ raw_spin_lock_irqsave(&vector_lock, flags); ++ cpumask_and(vector_searchmask, dest, cpu_online_mask); ++ ret = assign_vector_locked(irqd, vector_searchmask); ++ raw_spin_unlock_irqrestore(&vector_lock, flags); ++ return ret; ++} ++ ++static int assign_irq_vector_any_locked(struct irq_data *irqd) ++{ ++ /* Get the affinity mask - either irq_default_affinity or (user) set */ ++ const struct cpumask *affmsk = irq_data_get_affinity_mask(irqd); ++ int node = irq_data_get_node(irqd); ++ ++ if (node == NUMA_NO_NODE) ++ goto all; ++ /* Try the intersection of @affmsk and node mask */ ++ cpumask_and(vector_searchmask, cpumask_of_node(node), affmsk); ++ if (!assign_vector_locked(irqd, vector_searchmask)) ++ return 0; ++ /* Try the node mask */ ++ if (!assign_vector_locked(irqd, cpumask_of_node(node))) ++ return 0; ++all: ++ /* Try the full affinity mask */ ++ cpumask_and(vector_searchmask, affmsk, cpu_online_mask); ++ if (!assign_vector_locked(irqd, vector_searchmask)) ++ return 0; ++ /* Try the full online mask */ ++ return assign_vector_locked(irqd, cpu_online_mask); ++} ++ ++static int ++assign_irq_vector_policy(struct irq_data *irqd, struct irq_alloc_info *info) ++{ ++ if (irqd_affinity_is_managed(irqd)) ++ return reserve_managed_vector(irqd); ++ if (info->mask) ++ return assign_irq_vector(irqd, info->mask); ++ /* ++ * Make only a global reservation with no guarantee. A real vector ++ * is associated at activation time. ++ */ ++ return reserve_irq_vector(irqd); ++} ++ ++static int ++assign_managed_vector(struct irq_data *irqd, const struct cpumask *dest) ++{ ++ const struct cpumask *affmsk = irq_data_get_affinity_mask(irqd); ++ struct apic_chip_data *apicd = apic_chip_data(irqd); ++ int vector, cpu; ++ ++ cpumask_and(vector_searchmask, dest, affmsk); ++ ++ /* set_affinity might call here for nothing */ ++ if (apicd->vector && cpumask_test_cpu(apicd->cpu, vector_searchmask)) ++ return 0; ++ vector = irq_matrix_alloc_managed(vector_matrix, vector_searchmask, ++ &cpu); ++ trace_vector_alloc_managed(irqd->irq, vector, vector); ++ if (vector < 0) ++ return vector; ++ apic_update_vector(irqd, vector, cpu); ++ apic_update_irq_cfg(irqd, vector, cpu); ++ return 0; ++} ++ ++static void clear_irq_vector(struct irq_data *irqd) ++{ ++ struct apic_chip_data *apicd = apic_chip_data(irqd); ++ bool managed = irqd_affinity_is_managed(irqd); ++ unsigned int vector = apicd->vector; ++ ++ lockdep_assert_held(&vector_lock); ++ ++ if (!vector) ++ return; ++ ++ trace_vector_clear(irqd->irq, vector, apicd->cpu, apicd->prev_vector, ++ apicd->prev_cpu); ++ ++ per_cpu(vector_irq, apicd->cpu)[vector] = VECTOR_SHUTDOWN; ++ irq_matrix_free(vector_matrix, apicd->cpu, vector, managed); ++ apicd->vector = 0; ++ ++ /* Clean up move in progress */ ++ vector = apicd->prev_vector; ++ if (!vector) ++ return; ++ ++ per_cpu(vector_irq, apicd->prev_cpu)[vector] = VECTOR_SHUTDOWN; ++ irq_matrix_free(vector_matrix, apicd->prev_cpu, vector, managed); ++ apicd->prev_vector = 0; ++ apicd->move_in_progress = 0; ++ hlist_del_init(&apicd->clist); ++} ++ ++static void x86_vector_deactivate(struct irq_domain *dom, struct irq_data *irqd) ++{ ++ struct apic_chip_data *apicd = apic_chip_data(irqd); ++ unsigned long flags; ++ ++ trace_vector_deactivate(irqd->irq, apicd->is_managed, ++ apicd->can_reserve, false); ++ ++ /* Regular fixed assigned interrupt */ ++ if (!apicd->is_managed && !apicd->can_reserve) ++ return; ++ /* If the interrupt has a global reservation, nothing to do */ ++ if (apicd->has_reserved) ++ return; ++ ++ raw_spin_lock_irqsave(&vector_lock, flags); ++ clear_irq_vector(irqd); ++ if (apicd->can_reserve) ++ reserve_irq_vector_locked(irqd); ++ else ++ vector_assign_managed_shutdown(irqd); ++ raw_spin_unlock_irqrestore(&vector_lock, flags); ++} ++ ++static int activate_reserved(struct irq_data *irqd) ++{ ++ struct apic_chip_data *apicd = apic_chip_data(irqd); ++ int ret; ++ ++ ret = assign_irq_vector_any_locked(irqd); ++ if (!ret) { ++ apicd->has_reserved = false; ++ /* ++ * Core might have disabled reservation mode after ++ * allocating the irq descriptor. Ideally this should ++ * happen before allocation time, but that would require ++ * completely convoluted ways of transporting that ++ * information. ++ */ ++ if (!irqd_can_reserve(irqd)) ++ apicd->can_reserve = false; ++ } ++ ++ /* ++ * Check to ensure that the effective affinity mask is a subset ++ * the user supplied affinity mask, and warn the user if it is not ++ */ ++ if (!cpumask_subset(irq_data_get_effective_affinity_mask(irqd), ++ irq_data_get_affinity_mask(irqd))) { ++ pr_warn("irq %u: Affinity broken due to vector space exhaustion.\n", ++ irqd->irq); ++ } ++ ++ return ret; ++} ++ ++static int activate_managed(struct irq_data *irqd) ++{ ++ const struct cpumask *dest = irq_data_get_affinity_mask(irqd); ++ int ret; ++ ++ cpumask_and(vector_searchmask, dest, cpu_online_mask); ++ if (WARN_ON_ONCE(cpumask_empty(vector_searchmask))) { ++ /* Something in the core code broke! Survive gracefully */ ++ pr_err("Managed startup for irq %u, but no CPU\n", irqd->irq); ++ return -EINVAL; ++ } ++ ++ ret = assign_managed_vector(irqd, vector_searchmask); ++ /* ++ * This should not happen. The vector reservation got buggered. Handle ++ * it gracefully. ++ */ ++ if (WARN_ON_ONCE(ret < 0)) { ++ pr_err("Managed startup irq %u, no vector available\n", ++ irqd->irq); ++ } ++ return ret; ++} ++ ++static int x86_vector_activate(struct irq_domain *dom, struct irq_data *irqd, ++ bool reserve) ++{ ++ struct apic_chip_data *apicd = apic_chip_data(irqd); ++ unsigned long flags; ++ int ret = 0; ++ ++ trace_vector_activate(irqd->irq, apicd->is_managed, ++ apicd->can_reserve, reserve); ++ ++ raw_spin_lock_irqsave(&vector_lock, flags); ++ if (!apicd->can_reserve && !apicd->is_managed) ++ assign_irq_vector_any_locked(irqd); ++ else if (reserve || irqd_is_managed_and_shutdown(irqd)) ++ vector_assign_managed_shutdown(irqd); ++ else if (apicd->is_managed) ++ ret = activate_managed(irqd); ++ else if (apicd->has_reserved) ++ ret = activate_reserved(irqd); ++ raw_spin_unlock_irqrestore(&vector_lock, flags); ++ return ret; ++} ++ ++static void vector_free_reserved_and_managed(struct irq_data *irqd) ++{ ++ const struct cpumask *dest = irq_data_get_affinity_mask(irqd); ++ struct apic_chip_data *apicd = apic_chip_data(irqd); ++ ++ trace_vector_teardown(irqd->irq, apicd->is_managed, ++ apicd->has_reserved); ++ ++ if (apicd->has_reserved) ++ irq_matrix_remove_reserved(vector_matrix); ++ if (apicd->is_managed) ++ irq_matrix_remove_managed(vector_matrix, dest); ++} ++ ++static void x86_vector_free_irqs(struct irq_domain *domain, ++ unsigned int virq, unsigned int nr_irqs) ++{ ++ struct apic_chip_data *apicd; ++ struct irq_data *irqd; ++ unsigned long flags; ++ int i; ++ ++ for (i = 0; i < nr_irqs; i++) { ++ irqd = irq_domain_get_irq_data(x86_vector_domain, virq + i); ++ if (irqd && irqd->chip_data) { ++ raw_spin_lock_irqsave(&vector_lock, flags); ++ clear_irq_vector(irqd); ++ vector_free_reserved_and_managed(irqd); ++ apicd = irqd->chip_data; ++ irq_domain_reset_irq_data(irqd); ++ raw_spin_unlock_irqrestore(&vector_lock, flags); ++ free_apic_chip_data(apicd); ++ } ++ } ++} ++ ++static bool vector_configure_legacy(unsigned int virq, struct irq_data *irqd, ++ struct apic_chip_data *apicd) ++{ ++ unsigned long flags; ++ bool realloc = false; ++ ++ apicd->vector = ISA_IRQ_VECTOR(virq); ++ apicd->cpu = 0; ++ ++ raw_spin_lock_irqsave(&vector_lock, flags); ++ /* ++ * If the interrupt is activated, then it must stay at this vector ++ * position. That's usually the timer interrupt (0). ++ */ ++ if (irqd_is_activated(irqd)) { ++ trace_vector_setup(virq, true, 0); ++ apic_update_irq_cfg(irqd, apicd->vector, apicd->cpu); ++ } else { ++ /* Release the vector */ ++ apicd->can_reserve = true; ++ irqd_set_can_reserve(irqd); ++ clear_irq_vector(irqd); ++ realloc = true; ++ } ++ raw_spin_unlock_irqrestore(&vector_lock, flags); ++ return realloc; ++} ++ ++static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq, ++ unsigned int nr_irqs, void *arg) ++{ ++ struct irq_alloc_info *info = arg; ++ struct apic_chip_data *apicd; ++ struct irq_data *irqd; ++ int i, err, node; ++ ++ if (disable_apic) ++ return -ENXIO; ++ ++ /* Currently vector allocator can't guarantee contiguous allocations */ ++ if ((info->flags & X86_IRQ_ALLOC_CONTIGUOUS_VECTORS) && nr_irqs > 1) ++ return -ENOSYS; ++ ++ for (i = 0; i < nr_irqs; i++) { ++ irqd = irq_domain_get_irq_data(domain, virq + i); ++ BUG_ON(!irqd); ++ node = irq_data_get_node(irqd); ++ WARN_ON_ONCE(irqd->chip_data); ++ apicd = alloc_apic_chip_data(node); ++ if (!apicd) { ++ err = -ENOMEM; ++ goto error; ++ } ++ ++ apicd->irq = virq + i; ++ irqd->chip = &lapic_controller; ++ irqd->chip_data = apicd; ++ irqd->hwirq = virq + i; ++ irqd_set_single_target(irqd); ++ ++ /* Don't invoke affinity setter on deactivated interrupts */ ++ irqd_set_affinity_on_activate(irqd); ++ ++ /* ++ * Legacy vectors are already assigned when the IOAPIC ++ * takes them over. They stay on the same vector. This is ++ * required for check_timer() to work correctly as it might ++ * switch back to legacy mode. Only update the hardware ++ * config. ++ */ ++ if (info->flags & X86_IRQ_ALLOC_LEGACY) { ++ if (!vector_configure_legacy(virq + i, irqd, apicd)) ++ continue; ++ } ++ ++ err = assign_irq_vector_policy(irqd, info); ++ trace_vector_setup(virq + i, false, err); ++ if (err) { ++ irqd->chip_data = NULL; ++ free_apic_chip_data(apicd); ++ goto error; ++ } ++ } ++ ++ return 0; ++ ++error: ++ x86_vector_free_irqs(domain, virq, i); ++ return err; ++} ++ ++#ifdef CONFIG_GENERIC_IRQ_DEBUGFS ++static void x86_vector_debug_show(struct seq_file *m, struct irq_domain *d, ++ struct irq_data *irqd, int ind) ++{ ++ struct apic_chip_data apicd; ++ unsigned long flags; ++ int irq; ++ ++ if (!irqd) { ++ irq_matrix_debug_show(m, vector_matrix, ind); ++ return; ++ } ++ ++ irq = irqd->irq; ++ if (irq < nr_legacy_irqs() && !test_bit(irq, &io_apic_irqs)) { ++ seq_printf(m, "%*sVector: %5d\n", ind, "", ISA_IRQ_VECTOR(irq)); ++ seq_printf(m, "%*sTarget: Legacy PIC all CPUs\n", ind, ""); ++ return; ++ } ++ ++ if (!irqd->chip_data) { ++ seq_printf(m, "%*sVector: Not assigned\n", ind, ""); ++ return; ++ } ++ ++ raw_spin_lock_irqsave(&vector_lock, flags); ++ memcpy(&apicd, irqd->chip_data, sizeof(apicd)); ++ raw_spin_unlock_irqrestore(&vector_lock, flags); ++ ++ seq_printf(m, "%*sVector: %5u\n", ind, "", apicd.vector); ++ seq_printf(m, "%*sTarget: %5u\n", ind, "", apicd.cpu); ++ if (apicd.prev_vector) { ++ seq_printf(m, "%*sPrevious vector: %5u\n", ind, "", apicd.prev_vector); ++ seq_printf(m, "%*sPrevious target: %5u\n", ind, "", apicd.prev_cpu); ++ } ++ seq_printf(m, "%*smove_in_progress: %u\n", ind, "", apicd.move_in_progress ? 1 : 0); ++ seq_printf(m, "%*sis_managed: %u\n", ind, "", apicd.is_managed ? 1 : 0); ++ seq_printf(m, "%*scan_reserve: %u\n", ind, "", apicd.can_reserve ? 1 : 0); ++ seq_printf(m, "%*shas_reserved: %u\n", ind, "", apicd.has_reserved ? 1 : 0); ++ seq_printf(m, "%*scleanup_pending: %u\n", ind, "", !hlist_unhashed(&apicd.clist)); ++} ++#endif ++ ++static const struct irq_domain_ops x86_vector_domain_ops = { ++ .alloc = x86_vector_alloc_irqs, ++ .free = x86_vector_free_irqs, ++ .activate = x86_vector_activate, ++ .deactivate = x86_vector_deactivate, ++#ifdef CONFIG_GENERIC_IRQ_DEBUGFS ++ .debug_show = x86_vector_debug_show, ++#endif ++}; ++ ++int __init arch_probe_nr_irqs(void) ++{ ++ int nr; ++ ++ if (nr_irqs > (NR_VECTORS * nr_cpu_ids)) ++ nr_irqs = NR_VECTORS * nr_cpu_ids; ++ ++ nr = (gsi_top + nr_legacy_irqs()) + 8 * nr_cpu_ids; ++#if defined(CONFIG_PCI_MSI) ++ /* ++ * for MSI and HT dyn irq ++ */ ++ if (gsi_top <= NR_IRQS_LEGACY) ++ nr += 8 * nr_cpu_ids; ++ else ++ nr += gsi_top * 16; ++#endif ++ if (nr < nr_irqs) ++ nr_irqs = nr; ++ ++ /* ++ * We don't know if PIC is present at this point so we need to do ++ * probe() to get the right number of legacy IRQs. ++ */ ++ return legacy_pic->probe(); ++} ++ ++void lapic_assign_legacy_vector(unsigned int irq, bool replace) ++{ ++ /* ++ * Use assign system here so it wont get accounted as allocated ++ * and moveable in the cpu hotplug check and it prevents managed ++ * irq reservation from touching it. ++ */ ++ irq_matrix_assign_system(vector_matrix, ISA_IRQ_VECTOR(irq), replace); ++} ++ ++void __init lapic_assign_system_vectors(void) ++{ ++ unsigned int i, vector = 0; ++ ++ for_each_set_bit_from(vector, system_vectors, NR_VECTORS) ++ irq_matrix_assign_system(vector_matrix, vector, false); ++ ++ if (nr_legacy_irqs() > 1) ++ lapic_assign_legacy_vector(PIC_CASCADE_IR, false); ++ ++ /* System vectors are reserved, online it */ ++ irq_matrix_online(vector_matrix); ++ ++ /* Mark the preallocated legacy interrupts */ ++ for (i = 0; i < nr_legacy_irqs(); i++) { ++ if (i != PIC_CASCADE_IR) ++ irq_matrix_assign(vector_matrix, ISA_IRQ_VECTOR(i)); ++ } ++} ++ ++int __init arch_early_irq_init(void) ++{ ++ struct fwnode_handle *fn; ++ ++ fn = irq_domain_alloc_named_fwnode("VECTOR"); ++ BUG_ON(!fn); ++ x86_vector_domain = irq_domain_create_tree(fn, &x86_vector_domain_ops, ++ NULL); ++ BUG_ON(x86_vector_domain == NULL); ++ irq_domain_free_fwnode(fn); ++ irq_set_default_host(x86_vector_domain); ++ ++ arch_init_msi_domain(x86_vector_domain); ++ ++ BUG_ON(!alloc_cpumask_var(&vector_searchmask, GFP_KERNEL)); ++ ++ /* ++ * Allocate the vector matrix allocator data structure and limit the ++ * search area. ++ */ ++ vector_matrix = irq_alloc_matrix(NR_VECTORS, FIRST_EXTERNAL_VECTOR, ++ FIRST_SYSTEM_VECTOR); ++ BUG_ON(!vector_matrix); ++ ++ return arch_early_ioapic_init(); ++} ++ ++#ifdef CONFIG_SMP ++ ++static struct irq_desc *__setup_vector_irq(int vector) ++{ ++ int isairq = vector - ISA_IRQ_VECTOR(0); ++ ++ /* Check whether the irq is in the legacy space */ ++ if (isairq < 0 || isairq >= nr_legacy_irqs()) ++ return VECTOR_UNUSED; ++ /* Check whether the irq is handled by the IOAPIC */ ++ if (test_bit(isairq, &io_apic_irqs)) ++ return VECTOR_UNUSED; ++ return irq_to_desc(isairq); ++} ++ ++/* Online the local APIC infrastructure and initialize the vectors */ ++void lapic_online(void) ++{ ++ unsigned int vector; ++ ++ lockdep_assert_held(&vector_lock); ++ ++ /* Online the vector matrix array for this CPU */ ++ irq_matrix_online(vector_matrix); ++ ++ /* ++ * The interrupt affinity logic never targets interrupts to offline ++ * CPUs. The exception are the legacy PIC interrupts. In general ++ * they are only targeted to CPU0, but depending on the platform ++ * they can be distributed to any online CPU in hardware. The ++ * kernel has no influence on that. So all active legacy vectors ++ * must be installed on all CPUs. All non legacy interrupts can be ++ * cleared. ++ */ ++ for (vector = 0; vector < NR_VECTORS; vector++) ++ this_cpu_write(vector_irq[vector], __setup_vector_irq(vector)); ++} ++ ++void lapic_offline(void) ++{ ++ lock_vector_lock(); ++ irq_matrix_offline(vector_matrix); ++ unlock_vector_lock(); ++} ++ ++static int apic_set_affinity(struct irq_data *irqd, ++ const struct cpumask *dest, bool force) ++{ ++ int err; ++ ++ if (WARN_ON_ONCE(!irqd_is_activated(irqd))) ++ return -EIO; ++ ++ raw_spin_lock(&vector_lock); ++ cpumask_and(vector_searchmask, dest, cpu_online_mask); ++ if (irqd_affinity_is_managed(irqd)) ++ err = assign_managed_vector(irqd, vector_searchmask); ++ else ++ err = assign_vector_locked(irqd, vector_searchmask); ++ raw_spin_unlock(&vector_lock); ++ return err ? err : IRQ_SET_MASK_OK; ++} ++ ++#else ++# define apic_set_affinity NULL ++#endif ++ ++static int apic_retrigger_irq(struct irq_data *irqd) ++{ ++ struct apic_chip_data *apicd = apic_chip_data(irqd); ++ unsigned long flags; ++ ++ raw_spin_lock_irqsave(&vector_lock, flags); ++ apic->send_IPI(apicd->cpu, apicd->vector); ++ raw_spin_unlock_irqrestore(&vector_lock, flags); ++ ++ return 1; ++} ++ ++void apic_ack_irq(struct irq_data *irqd) ++{ ++ irq_move_irq(irqd); ++ ack_APIC_irq(); ++} ++ ++void apic_ack_edge(struct irq_data *irqd) ++{ ++ irq_complete_move(irqd_cfg(irqd)); ++ apic_ack_irq(irqd); ++} ++ ++static struct irq_chip lapic_controller = { ++ .name = "APIC", ++ .irq_ack = apic_ack_edge, ++ .irq_set_affinity = apic_set_affinity, ++ .irq_retrigger = apic_retrigger_irq, ++}; ++ ++#ifdef CONFIG_SMP ++ ++static void free_moved_vector(struct apic_chip_data *apicd) ++{ ++ unsigned int vector = apicd->prev_vector; ++ unsigned int cpu = apicd->prev_cpu; ++ bool managed = apicd->is_managed; ++ ++ /* ++ * This should never happen. Managed interrupts are not ++ * migrated except on CPU down, which does not involve the ++ * cleanup vector. But try to keep the accounting correct ++ * nevertheless. ++ */ ++ WARN_ON_ONCE(managed); ++ ++ trace_vector_free_moved(apicd->irq, cpu, vector, managed); ++ irq_matrix_free(vector_matrix, cpu, vector, managed); ++ per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED; ++ hlist_del_init(&apicd->clist); ++ apicd->prev_vector = 0; ++ apicd->move_in_progress = 0; ++} ++ ++asmlinkage __visible void __irq_entry smp_irq_move_cleanup_interrupt(void) ++{ ++ struct hlist_head *clhead = this_cpu_ptr(&cleanup_list); ++ struct apic_chip_data *apicd; ++ struct hlist_node *tmp; ++ ++ entering_ack_irq(); ++ /* Prevent vectors vanishing under us */ ++ raw_spin_lock(&vector_lock); ++ ++ hlist_for_each_entry_safe(apicd, tmp, clhead, clist) { ++ unsigned int irr, vector = apicd->prev_vector; ++ ++ /* ++ * Paranoia: Check if the vector that needs to be cleaned ++ * up is registered at the APICs IRR. If so, then this is ++ * not the best time to clean it up. Clean it up in the ++ * next attempt by sending another IRQ_MOVE_CLEANUP_VECTOR ++ * to this CPU. IRQ_MOVE_CLEANUP_VECTOR is the lowest ++ * priority external vector, so on return from this ++ * interrupt the device interrupt will happen first. ++ */ ++ irr = apic_read(APIC_IRR + (vector / 32 * 0x10)); ++ if (irr & (1U << (vector % 32))) { ++ apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR); ++ continue; ++ } ++ free_moved_vector(apicd); ++ } ++ ++ raw_spin_unlock(&vector_lock); ++ exiting_irq(); ++} ++ ++static void __send_cleanup_vector(struct apic_chip_data *apicd) ++{ ++ unsigned int cpu; ++ ++ raw_spin_lock(&vector_lock); ++ apicd->move_in_progress = 0; ++ cpu = apicd->prev_cpu; ++ if (cpu_online(cpu)) { ++ hlist_add_head(&apicd->clist, per_cpu_ptr(&cleanup_list, cpu)); ++ apic->send_IPI(cpu, IRQ_MOVE_CLEANUP_VECTOR); ++ } else { ++ apicd->prev_vector = 0; ++ } ++ raw_spin_unlock(&vector_lock); ++} ++ ++void send_cleanup_vector(struct irq_cfg *cfg) ++{ ++ struct apic_chip_data *apicd; ++ ++ apicd = container_of(cfg, struct apic_chip_data, hw_irq_cfg); ++ if (apicd->move_in_progress) ++ __send_cleanup_vector(apicd); ++} ++ ++static void __irq_complete_move(struct irq_cfg *cfg, unsigned vector) ++{ ++ struct apic_chip_data *apicd; ++ ++ apicd = container_of(cfg, struct apic_chip_data, hw_irq_cfg); ++ if (likely(!apicd->move_in_progress)) ++ return; ++ ++ if (vector == apicd->vector && apicd->cpu == smp_processor_id()) ++ __send_cleanup_vector(apicd); ++} ++ ++void irq_complete_move(struct irq_cfg *cfg) ++{ ++ __irq_complete_move(cfg, ~get_irq_regs()->orig_ax); ++} ++ ++/* ++ * Called from fixup_irqs() with @desc->lock held and interrupts disabled. ++ */ ++void irq_force_complete_move(struct irq_desc *desc) ++{ ++ struct apic_chip_data *apicd; ++ struct irq_data *irqd; ++ unsigned int vector; ++ ++ /* ++ * The function is called for all descriptors regardless of which ++ * irqdomain they belong to. For example if an IRQ is provided by ++ * an irq_chip as part of a GPIO driver, the chip data for that ++ * descriptor is specific to the irq_chip in question. ++ * ++ * Check first that the chip_data is what we expect ++ * (apic_chip_data) before touching it any further. ++ */ ++ irqd = irq_domain_get_irq_data(x86_vector_domain, ++ irq_desc_get_irq(desc)); ++ if (!irqd) ++ return; ++ ++ raw_spin_lock(&vector_lock); ++ apicd = apic_chip_data(irqd); ++ if (!apicd) ++ goto unlock; ++ ++ /* ++ * If prev_vector is empty, no action required. ++ */ ++ vector = apicd->prev_vector; ++ if (!vector) ++ goto unlock; ++ ++ /* ++ * This is tricky. If the cleanup of the old vector has not been ++ * done yet, then the following setaffinity call will fail with ++ * -EBUSY. This can leave the interrupt in a stale state. ++ * ++ * All CPUs are stuck in stop machine with interrupts disabled so ++ * calling __irq_complete_move() would be completely pointless. ++ * ++ * 1) The interrupt is in move_in_progress state. That means that we ++ * have not seen an interrupt since the io_apic was reprogrammed to ++ * the new vector. ++ * ++ * 2) The interrupt has fired on the new vector, but the cleanup IPIs ++ * have not been processed yet. ++ */ ++ if (apicd->move_in_progress) { ++ /* ++ * In theory there is a race: ++ * ++ * set_ioapic(new_vector) <-- Interrupt is raised before update ++ * is effective, i.e. it's raised on ++ * the old vector. ++ * ++ * So if the target cpu cannot handle that interrupt before ++ * the old vector is cleaned up, we get a spurious interrupt ++ * and in the worst case the ioapic irq line becomes stale. ++ * ++ * But in case of cpu hotplug this should be a non issue ++ * because if the affinity update happens right before all ++ * cpus rendevouz in stop machine, there is no way that the ++ * interrupt can be blocked on the target cpu because all cpus ++ * loops first with interrupts enabled in stop machine, so the ++ * old vector is not yet cleaned up when the interrupt fires. ++ * ++ * So the only way to run into this issue is if the delivery ++ * of the interrupt on the apic/system bus would be delayed ++ * beyond the point where the target cpu disables interrupts ++ * in stop machine. I doubt that it can happen, but at least ++ * there is a theroretical chance. Virtualization might be ++ * able to expose this, but AFAICT the IOAPIC emulation is not ++ * as stupid as the real hardware. ++ * ++ * Anyway, there is nothing we can do about that at this point ++ * w/o refactoring the whole fixup_irq() business completely. ++ * We print at least the irq number and the old vector number, ++ * so we have the necessary information when a problem in that ++ * area arises. ++ */ ++ pr_warn("IRQ fixup: irq %d move in progress, old vector %d\n", ++ irqd->irq, vector); ++ } ++ free_moved_vector(apicd); ++unlock: ++ raw_spin_unlock(&vector_lock); ++} ++ ++#ifdef CONFIG_HOTPLUG_CPU ++/* ++ * Note, this is not accurate accounting, but at least good enough to ++ * prevent that the actual interrupt move will run out of vectors. ++ */ ++int lapic_can_unplug_cpu(void) ++{ ++ unsigned int rsvd, avl, tomove, cpu = smp_processor_id(); ++ int ret = 0; ++ ++ raw_spin_lock(&vector_lock); ++ tomove = irq_matrix_allocated(vector_matrix); ++ avl = irq_matrix_available(vector_matrix, true); ++ if (avl < tomove) { ++ pr_warn("CPU %u has %u vectors, %u available. Cannot disable CPU\n", ++ cpu, tomove, avl); ++ ret = -ENOSPC; ++ goto out; ++ } ++ rsvd = irq_matrix_reserved(vector_matrix); ++ if (avl < rsvd) { ++ pr_warn("Reserved vectors %u > available %u. IRQ request may fail\n", ++ rsvd, avl); ++ } ++out: ++ raw_spin_unlock(&vector_lock); ++ return ret; ++} ++#endif /* HOTPLUG_CPU */ ++#endif /* SMP */ ++ ++static void __init print_APIC_field(int base) ++{ ++ int i; ++ ++ printk(KERN_DEBUG); ++ ++ for (i = 0; i < 8; i++) ++ pr_cont("%08x", apic_read(base + i*0x10)); ++ ++ pr_cont("\n"); ++} ++ ++static void __init print_local_APIC(void *dummy) ++{ ++ unsigned int i, v, ver, maxlvt; ++ u64 icr; ++ ++ pr_debug("printing local APIC contents on CPU#%d/%d:\n", ++ smp_processor_id(), hard_smp_processor_id()); ++ v = apic_read(APIC_ID); ++ pr_info("... APIC ID: %08x (%01x)\n", v, read_apic_id()); ++ v = apic_read(APIC_LVR); ++ pr_info("... APIC VERSION: %08x\n", v); ++ ver = GET_APIC_VERSION(v); ++ maxlvt = lapic_get_maxlvt(); ++ ++ v = apic_read(APIC_TASKPRI); ++ pr_debug("... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK); ++ ++ /* !82489DX */ ++ if (APIC_INTEGRATED(ver)) { ++ if (!APIC_XAPIC(ver)) { ++ v = apic_read(APIC_ARBPRI); ++ pr_debug("... APIC ARBPRI: %08x (%02x)\n", ++ v, v & APIC_ARBPRI_MASK); ++ } ++ v = apic_read(APIC_PROCPRI); ++ pr_debug("... APIC PROCPRI: %08x\n", v); ++ } ++ ++ /* ++ * Remote read supported only in the 82489DX and local APIC for ++ * Pentium processors. ++ */ ++ if (!APIC_INTEGRATED(ver) || maxlvt == 3) { ++ v = apic_read(APIC_RRR); ++ pr_debug("... APIC RRR: %08x\n", v); ++ } ++ ++ v = apic_read(APIC_LDR); ++ pr_debug("... APIC LDR: %08x\n", v); ++ if (!x2apic_enabled()) { ++ v = apic_read(APIC_DFR); ++ pr_debug("... APIC DFR: %08x\n", v); ++ } ++ v = apic_read(APIC_SPIV); ++ pr_debug("... APIC SPIV: %08x\n", v); ++ ++ pr_debug("... APIC ISR field:\n"); ++ print_APIC_field(APIC_ISR); ++ pr_debug("... APIC TMR field:\n"); ++ print_APIC_field(APIC_TMR); ++ pr_debug("... APIC IRR field:\n"); ++ print_APIC_field(APIC_IRR); ++ ++ /* !82489DX */ ++ if (APIC_INTEGRATED(ver)) { ++ /* Due to the Pentium erratum 3AP. */ ++ if (maxlvt > 3) ++ apic_write(APIC_ESR, 0); ++ ++ v = apic_read(APIC_ESR); ++ pr_debug("... APIC ESR: %08x\n", v); ++ } ++ ++ icr = apic_icr_read(); ++ pr_debug("... APIC ICR: %08x\n", (u32)icr); ++ pr_debug("... APIC ICR2: %08x\n", (u32)(icr >> 32)); ++ ++ v = apic_read(APIC_LVTT); ++ pr_debug("... APIC LVTT: %08x\n", v); ++ ++ if (maxlvt > 3) { ++ /* PC is LVT#4. */ ++ v = apic_read(APIC_LVTPC); ++ pr_debug("... APIC LVTPC: %08x\n", v); ++ } ++ v = apic_read(APIC_LVT0); ++ pr_debug("... APIC LVT0: %08x\n", v); ++ v = apic_read(APIC_LVT1); ++ pr_debug("... APIC LVT1: %08x\n", v); ++ ++ if (maxlvt > 2) { ++ /* ERR is LVT#3. */ ++ v = apic_read(APIC_LVTERR); ++ pr_debug("... APIC LVTERR: %08x\n", v); ++ } ++ ++ v = apic_read(APIC_TMICT); ++ pr_debug("... APIC TMICT: %08x\n", v); ++ v = apic_read(APIC_TMCCT); ++ pr_debug("... APIC TMCCT: %08x\n", v); ++ v = apic_read(APIC_TDCR); ++ pr_debug("... APIC TDCR: %08x\n", v); ++ ++ if (boot_cpu_has(X86_FEATURE_EXTAPIC)) { ++ v = apic_read(APIC_EFEAT); ++ maxlvt = (v >> 16) & 0xff; ++ pr_debug("... APIC EFEAT: %08x\n", v); ++ v = apic_read(APIC_ECTRL); ++ pr_debug("... APIC ECTRL: %08x\n", v); ++ for (i = 0; i < maxlvt; i++) { ++ v = apic_read(APIC_EILVTn(i)); ++ pr_debug("... APIC EILVT%d: %08x\n", i, v); ++ } ++ } ++ pr_cont("\n"); ++} ++ ++static void __init print_local_APICs(int maxcpu) ++{ ++ int cpu; ++ ++ if (!maxcpu) ++ return; ++ ++ preempt_disable(); ++ for_each_online_cpu(cpu) { ++ if (cpu >= maxcpu) ++ break; ++ smp_call_function_single(cpu, print_local_APIC, NULL, 1); ++ } ++ preempt_enable(); ++} ++ ++static void __init print_PIC(void) ++{ ++ unsigned int v; ++ unsigned long flags; ++ ++ if (!nr_legacy_irqs()) ++ return; ++ ++ pr_debug("\nprinting PIC contents\n"); ++ ++ raw_spin_lock_irqsave(&i8259A_lock, flags); ++ ++ v = inb(0xa1) << 8 | inb(0x21); ++ pr_debug("... PIC IMR: %04x\n", v); ++ ++ v = inb(0xa0) << 8 | inb(0x20); ++ pr_debug("... PIC IRR: %04x\n", v); ++ ++ outb(0x0b, 0xa0); ++ outb(0x0b, 0x20); ++ v = inb(0xa0) << 8 | inb(0x20); ++ outb(0x0a, 0xa0); ++ outb(0x0a, 0x20); ++ ++ raw_spin_unlock_irqrestore(&i8259A_lock, flags); ++ ++ pr_debug("... PIC ISR: %04x\n", v); ++ ++ v = inb(0x4d1) << 8 | inb(0x4d0); ++ pr_debug("... PIC ELCR: %04x\n", v); ++} ++ ++static int show_lapic __initdata = 1; ++static __init int setup_show_lapic(char *arg) ++{ ++ int num = -1; ++ ++ if (strcmp(arg, "all") == 0) { ++ show_lapic = CONFIG_NR_CPUS; ++ } else { ++ get_option(&arg, &num); ++ if (num >= 0) ++ show_lapic = num; ++ } ++ ++ return 1; ++} ++__setup("show_lapic=", setup_show_lapic); ++ ++static int __init print_ICs(void) ++{ ++ if (apic_verbosity == APIC_QUIET) ++ return 0; ++ ++ print_PIC(); ++ ++ /* don't print out if apic is not there */ ++ if (!boot_cpu_has(X86_FEATURE_APIC) && !apic_from_smp_config()) ++ return 0; ++ ++ print_local_APICs(show_lapic); ++ print_IO_APICs(); ++ ++ return 0; ++} ++ ++late_initcall(print_ICs); +diff -uprN kernel/arch/x86/kernel/apic/x2apic_cluster.c kernel_new/arch/x86/kernel/apic/x2apic_cluster.c +--- kernel/arch/x86/kernel/apic/x2apic_cluster.c 2020-12-21 21:59:17.000000000 +0800 ++++ kernel_new/arch/x86/kernel/apic/x2apic_cluster.c 2021-04-01 18:28:07.654863288 +0800 +@@ -44,7 +44,7 @@ __x2apic_send_IPI_mask(const struct cpum + u32 dest; + + x2apic_wrmsr_fence(); +- local_irq_save(flags); ++ flags = hard_local_irq_save(); + + tmpmsk = this_cpu_cpumask_var_ptr(ipi_mask); + cpumask_copy(tmpmsk, mask); +@@ -68,7 +68,7 @@ __x2apic_send_IPI_mask(const struct cpum + cpumask_andnot(tmpmsk, tmpmsk, &cmsk->mask); + } + +- local_irq_restore(flags); ++ hard_local_irq_restore(flags); + } + + static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) +diff -uprN kernel/arch/x86/kernel/apic/x2apic_phys.c kernel_new/arch/x86/kernel/apic/x2apic_phys.c +--- kernel/arch/x86/kernel/apic/x2apic_phys.c 2020-12-21 21:59:17.000000000 +0800 ++++ kernel_new/arch/x86/kernel/apic/x2apic_phys.c 2021-04-01 18:28:07.654863288 +0800 +@@ -55,7 +55,7 @@ __x2apic_send_IPI_mask(const struct cpum + + x2apic_wrmsr_fence(); + +- local_irq_save(flags); ++ flags = hard_local_irq_save(); + + this_cpu = smp_processor_id(); + for_each_cpu(query_cpu, mask) { +@@ -64,7 +64,7 @@ __x2apic_send_IPI_mask(const struct cpum + __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu), + vector, APIC_DEST_PHYSICAL); + } +- local_irq_restore(flags); ++ hard_local_irq_restore(flags); + } + + static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) +diff -uprN kernel/arch/x86/kernel/asm-offsets.c kernel_new/arch/x86/kernel/asm-offsets.c +--- kernel/arch/x86/kernel/asm-offsets.c 2020-12-21 21:59:17.000000000 +0800 ++++ kernel_new/arch/x86/kernel/asm-offsets.c 2021-04-01 18:28:07.654863288 +0800 +@@ -38,6 +38,9 @@ void common(void) { + + BLANK(); + OFFSET(TASK_TI_flags, task_struct, thread_info.flags); ++#ifdef CONFIG_IPIPE ++ OFFSET(TASK_TI_ipipe, task_struct, thread_info.ipipe_flags); ++#endif + OFFSET(TASK_addr_limit, task_struct, thread.addr_limit); + + BLANK(); +diff -uprN kernel/arch/x86/kernel/cpu/common.c kernel_new/arch/x86/kernel/cpu/common.c +--- kernel/arch/x86/kernel/cpu/common.c 2020-12-21 21:59:17.000000000 +0800 ++++ kernel_new/arch/x86/kernel/cpu/common.c 2021-04-01 18:28:07.655863287 +0800 +@@ -1674,6 +1674,7 @@ void syscall_init(void) + DEFINE_PER_CPU(struct orig_ist, orig_ist); + + static DEFINE_PER_CPU(unsigned long, debug_stack_addr); ++#ifndef CONFIG_IPIPE + DEFINE_PER_CPU(int, debug_stack_usage); + + int is_debug_stack(unsigned long addr) +@@ -1701,6 +1702,7 @@ void debug_stack_reset(void) + load_current_idt(); + } + NOKPROBE_SYMBOL(debug_stack_reset); ++#endif /* !CONFIG_IPIPE */ + + #else /* CONFIG_X86_64 */ + +diff -uprN kernel/arch/x86/kernel/cpu/common.c.orig kernel_new/arch/x86/kernel/cpu/common.c.orig +--- kernel/arch/x86/kernel/cpu/common.c.orig 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/arch/x86/kernel/cpu/common.c.orig 2020-12-21 21:59:17.000000000 +0800 +@@ -0,0 +1,1993 @@ ++/* cpu_feature_enabled() cannot be used this early */ ++#define USE_EARLY_PGTABLE_L5 ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#ifdef CONFIG_X86_LOCAL_APIC ++#include ++#endif ++ ++#include "cpu.h" ++ ++u32 elf_hwcap2 __read_mostly; ++ ++/* all of these masks are initialized in setup_cpu_local_masks() */ ++cpumask_var_t cpu_initialized_mask; ++cpumask_var_t cpu_callout_mask; ++cpumask_var_t cpu_callin_mask; ++ ++/* representing cpus for which sibling maps can be computed */ ++cpumask_var_t cpu_sibling_setup_mask; ++ ++/* Number of siblings per CPU package */ ++int smp_num_siblings = 1; ++EXPORT_SYMBOL(smp_num_siblings); ++ ++/* Last level cache ID of each logical CPU */ ++DEFINE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id) = BAD_APICID; ++ ++/* correctly size the local cpu masks */ ++void __init setup_cpu_local_masks(void) ++{ ++ alloc_bootmem_cpumask_var(&cpu_initialized_mask); ++ alloc_bootmem_cpumask_var(&cpu_callin_mask); ++ alloc_bootmem_cpumask_var(&cpu_callout_mask); ++ alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask); ++} ++ ++static void default_init(struct cpuinfo_x86 *c) ++{ ++#ifdef CONFIG_X86_64 ++ cpu_detect_cache_sizes(c); ++#else ++ /* Not much we can do here... */ ++ /* Check if at least it has cpuid */ ++ if (c->cpuid_level == -1) { ++ /* No cpuid. It must be an ancient CPU */ ++ if (c->x86 == 4) ++ strcpy(c->x86_model_id, "486"); ++ else if (c->x86 == 3) ++ strcpy(c->x86_model_id, "386"); ++ } ++#endif ++} ++ ++static const struct cpu_dev default_cpu = { ++ .c_init = default_init, ++ .c_vendor = "Unknown", ++ .c_x86_vendor = X86_VENDOR_UNKNOWN, ++}; ++ ++static const struct cpu_dev *this_cpu = &default_cpu; ++ ++DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { ++#ifdef CONFIG_X86_64 ++ /* ++ * We need valid kernel segments for data and code in long mode too ++ * IRET will check the segment types kkeil 2000/10/28 ++ * Also sysret mandates a special GDT layout ++ * ++ * TLS descriptors are currently at a different place compared to i386. ++ * Hopefully nobody expects them at a fixed place (Wine?) ++ */ ++ [GDT_ENTRY_KERNEL32_CS] = GDT_ENTRY_INIT(0xc09b, 0, 0xfffff), ++ [GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(0xa09b, 0, 0xfffff), ++ [GDT_ENTRY_KERNEL_DS] = GDT_ENTRY_INIT(0xc093, 0, 0xfffff), ++ [GDT_ENTRY_DEFAULT_USER32_CS] = GDT_ENTRY_INIT(0xc0fb, 0, 0xfffff), ++ [GDT_ENTRY_DEFAULT_USER_DS] = GDT_ENTRY_INIT(0xc0f3, 0, 0xfffff), ++ [GDT_ENTRY_DEFAULT_USER_CS] = GDT_ENTRY_INIT(0xa0fb, 0, 0xfffff), ++#else ++ [GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(0xc09a, 0, 0xfffff), ++ [GDT_ENTRY_KERNEL_DS] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff), ++ [GDT_ENTRY_DEFAULT_USER_CS] = GDT_ENTRY_INIT(0xc0fa, 0, 0xfffff), ++ [GDT_ENTRY_DEFAULT_USER_DS] = GDT_ENTRY_INIT(0xc0f2, 0, 0xfffff), ++ /* ++ * Segments used for calling PnP BIOS have byte granularity. ++ * They code segments and data segments have fixed 64k limits, ++ * the transfer segment sizes are set at run time. ++ */ ++ /* 32-bit code */ ++ [GDT_ENTRY_PNPBIOS_CS32] = GDT_ENTRY_INIT(0x409a, 0, 0xffff), ++ /* 16-bit code */ ++ [GDT_ENTRY_PNPBIOS_CS16] = GDT_ENTRY_INIT(0x009a, 0, 0xffff), ++ /* 16-bit data */ ++ [GDT_ENTRY_PNPBIOS_DS] = GDT_ENTRY_INIT(0x0092, 0, 0xffff), ++ /* 16-bit data */ ++ [GDT_ENTRY_PNPBIOS_TS1] = GDT_ENTRY_INIT(0x0092, 0, 0), ++ /* 16-bit data */ ++ [GDT_ENTRY_PNPBIOS_TS2] = GDT_ENTRY_INIT(0x0092, 0, 0), ++ /* ++ * The APM segments have byte granularity and their bases ++ * are set at run time. All have 64k limits. ++ */ ++ /* 32-bit code */ ++ [GDT_ENTRY_APMBIOS_BASE] = GDT_ENTRY_INIT(0x409a, 0, 0xffff), ++ /* 16-bit code */ ++ [GDT_ENTRY_APMBIOS_BASE+1] = GDT_ENTRY_INIT(0x009a, 0, 0xffff), ++ /* data */ ++ [GDT_ENTRY_APMBIOS_BASE+2] = GDT_ENTRY_INIT(0x4092, 0, 0xffff), ++ ++ [GDT_ENTRY_ESPFIX_SS] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff), ++ [GDT_ENTRY_PERCPU] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff), ++ GDT_STACK_CANARY_INIT ++#endif ++} }; ++EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); ++ ++static int __init x86_mpx_setup(char *s) ++{ ++ /* require an exact match without trailing characters */ ++ if (strlen(s)) ++ return 0; ++ ++ /* do not emit a message if the feature is not present */ ++ if (!boot_cpu_has(X86_FEATURE_MPX)) ++ return 1; ++ ++ setup_clear_cpu_cap(X86_FEATURE_MPX); ++ pr_info("nompx: Intel Memory Protection Extensions (MPX) disabled\n"); ++ return 1; ++} ++__setup("nompx", x86_mpx_setup); ++ ++#ifdef CONFIG_X86_64 ++static int __init x86_nopcid_setup(char *s) ++{ ++ /* nopcid doesn't accept parameters */ ++ if (s) ++ return -EINVAL; ++ ++ /* do not emit a message if the feature is not present */ ++ if (!boot_cpu_has(X86_FEATURE_PCID)) ++ return 0; ++ ++ setup_clear_cpu_cap(X86_FEATURE_PCID); ++ pr_info("nopcid: PCID feature disabled\n"); ++ return 0; ++} ++early_param("nopcid", x86_nopcid_setup); ++#endif ++ ++static int __init x86_noinvpcid_setup(char *s) ++{ ++ /* noinvpcid doesn't accept parameters */ ++ if (s) ++ return -EINVAL; ++ ++ /* do not emit a message if the feature is not present */ ++ if (!boot_cpu_has(X86_FEATURE_INVPCID)) ++ return 0; ++ ++ setup_clear_cpu_cap(X86_FEATURE_INVPCID); ++ pr_info("noinvpcid: INVPCID feature disabled\n"); ++ return 0; ++} ++early_param("noinvpcid", x86_noinvpcid_setup); ++ ++#ifdef CONFIG_X86_32 ++static int cachesize_override = -1; ++static int disable_x86_serial_nr = 1; ++ ++static int __init cachesize_setup(char *str) ++{ ++ get_option(&str, &cachesize_override); ++ return 1; ++} ++__setup("cachesize=", cachesize_setup); ++ ++static int __init x86_sep_setup(char *s) ++{ ++ setup_clear_cpu_cap(X86_FEATURE_SEP); ++ return 1; ++} ++__setup("nosep", x86_sep_setup); ++ ++/* Standard macro to see if a specific flag is changeable */ ++static inline int flag_is_changeable_p(u32 flag) ++{ ++ u32 f1, f2; ++ ++ /* ++ * Cyrix and IDT cpus allow disabling of CPUID ++ * so the code below may return different results ++ * when it is executed before and after enabling ++ * the CPUID. Add "volatile" to not allow gcc to ++ * optimize the subsequent calls to this function. ++ */ ++ asm volatile ("pushfl \n\t" ++ "pushfl \n\t" ++ "popl %0 \n\t" ++ "movl %0, %1 \n\t" ++ "xorl %2, %0 \n\t" ++ "pushl %0 \n\t" ++ "popfl \n\t" ++ "pushfl \n\t" ++ "popl %0 \n\t" ++ "popfl \n\t" ++ ++ : "=&r" (f1), "=&r" (f2) ++ : "ir" (flag)); ++ ++ return ((f1^f2) & flag) != 0; ++} ++ ++/* Probe for the CPUID instruction */ ++int have_cpuid_p(void) ++{ ++ return flag_is_changeable_p(X86_EFLAGS_ID); ++} ++ ++static void squash_the_stupid_serial_number(struct cpuinfo_x86 *c) ++{ ++ unsigned long lo, hi; ++ ++ if (!cpu_has(c, X86_FEATURE_PN) || !disable_x86_serial_nr) ++ return; ++ ++ /* Disable processor serial number: */ ++ ++ rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi); ++ lo |= 0x200000; ++ wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi); ++ ++ pr_notice("CPU serial number disabled.\n"); ++ clear_cpu_cap(c, X86_FEATURE_PN); ++ ++ /* Disabling the serial number may affect the cpuid level */ ++ c->cpuid_level = cpuid_eax(0); ++} ++ ++static int __init x86_serial_nr_setup(char *s) ++{ ++ disable_x86_serial_nr = 0; ++ return 1; ++} ++__setup("serialnumber", x86_serial_nr_setup); ++#else ++static inline int flag_is_changeable_p(u32 flag) ++{ ++ return 1; ++} ++static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c) ++{ ++} ++#endif ++ ++static __init int setup_disable_smep(char *arg) ++{ ++ setup_clear_cpu_cap(X86_FEATURE_SMEP); ++ /* Check for things that depend on SMEP being enabled: */ ++ check_mpx_erratum(&boot_cpu_data); ++ return 1; ++} ++__setup("nosmep", setup_disable_smep); ++ ++static __always_inline void setup_smep(struct cpuinfo_x86 *c) ++{ ++ if (cpu_has(c, X86_FEATURE_SMEP)) ++ cr4_set_bits(X86_CR4_SMEP); ++} ++ ++static __init int setup_disable_smap(char *arg) ++{ ++ setup_clear_cpu_cap(X86_FEATURE_SMAP); ++ return 1; ++} ++__setup("nosmap", setup_disable_smap); ++ ++static __always_inline void setup_smap(struct cpuinfo_x86 *c) ++{ ++ unsigned long eflags = native_save_fl(); ++ ++ /* This should have been cleared long ago */ ++ BUG_ON(eflags & X86_EFLAGS_AC); ++ ++ if (cpu_has(c, X86_FEATURE_SMAP)) { ++#ifdef CONFIG_X86_SMAP ++ cr4_set_bits(X86_CR4_SMAP); ++#else ++ cr4_clear_bits(X86_CR4_SMAP); ++#endif ++ } ++} ++ ++static __always_inline void setup_umip(struct cpuinfo_x86 *c) ++{ ++ /* Check the boot processor, plus build option for UMIP. */ ++ if (!cpu_feature_enabled(X86_FEATURE_UMIP)) ++ goto out; ++ ++ /* Check the current processor's cpuid bits. */ ++ if (!cpu_has(c, X86_FEATURE_UMIP)) ++ goto out; ++ ++ cr4_set_bits(X86_CR4_UMIP); ++ ++ pr_info("x86/cpu: Activated the Intel User Mode Instruction Prevention (UMIP) CPU feature\n"); ++ ++ return; ++ ++out: ++ /* ++ * Make sure UMIP is disabled in case it was enabled in a ++ * previous boot (e.g., via kexec). ++ */ ++ cr4_clear_bits(X86_CR4_UMIP); ++} ++ ++/* ++ * Protection Keys are not available in 32-bit mode. ++ */ ++static bool pku_disabled; ++ ++static __always_inline void setup_pku(struct cpuinfo_x86 *c) ++{ ++ /* check the boot processor, plus compile options for PKU: */ ++ if (!cpu_feature_enabled(X86_FEATURE_PKU)) ++ return; ++ /* checks the actual processor's cpuid bits: */ ++ if (!cpu_has(c, X86_FEATURE_PKU)) ++ return; ++ if (pku_disabled) ++ return; ++ ++ cr4_set_bits(X86_CR4_PKE); ++ /* ++ * Seting X86_CR4_PKE will cause the X86_FEATURE_OSPKE ++ * cpuid bit to be set. We need to ensure that we ++ * update that bit in this CPU's "cpu_info". ++ */ ++ get_cpu_cap(c); ++} ++ ++#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS ++static __init int setup_disable_pku(char *arg) ++{ ++ /* ++ * Do not clear the X86_FEATURE_PKU bit. All of the ++ * runtime checks are against OSPKE so clearing the ++ * bit does nothing. ++ * ++ * This way, we will see "pku" in cpuinfo, but not ++ * "ospke", which is exactly what we want. It shows ++ * that the CPU has PKU, but the OS has not enabled it. ++ * This happens to be exactly how a system would look ++ * if we disabled the config option. ++ */ ++ pr_info("x86: 'nopku' specified, disabling Memory Protection Keys\n"); ++ pku_disabled = true; ++ return 1; ++} ++__setup("nopku", setup_disable_pku); ++#endif /* CONFIG_X86_64 */ ++ ++/* ++ * Some CPU features depend on higher CPUID levels, which may not always ++ * be available due to CPUID level capping or broken virtualization ++ * software. Add those features to this table to auto-disable them. ++ */ ++struct cpuid_dependent_feature { ++ u32 feature; ++ u32 level; ++}; ++ ++static const struct cpuid_dependent_feature ++cpuid_dependent_features[] = { ++ { X86_FEATURE_MWAIT, 0x00000005 }, ++ { X86_FEATURE_DCA, 0x00000009 }, ++ { X86_FEATURE_XSAVE, 0x0000000d }, ++ { 0, 0 } ++}; ++ ++static void filter_cpuid_features(struct cpuinfo_x86 *c, bool warn) ++{ ++ const struct cpuid_dependent_feature *df; ++ ++ for (df = cpuid_dependent_features; df->feature; df++) { ++ ++ if (!cpu_has(c, df->feature)) ++ continue; ++ /* ++ * Note: cpuid_level is set to -1 if unavailable, but ++ * extended_extended_level is set to 0 if unavailable ++ * and the legitimate extended levels are all negative ++ * when signed; hence the weird messing around with ++ * signs here... ++ */ ++ if (!((s32)df->level < 0 ? ++ (u32)df->level > (u32)c->extended_cpuid_level : ++ (s32)df->level > (s32)c->cpuid_level)) ++ continue; ++ ++ clear_cpu_cap(c, df->feature); ++ if (!warn) ++ continue; ++ ++ pr_warn("CPU: CPU feature " X86_CAP_FMT " disabled, no CPUID level 0x%x\n", ++ x86_cap_flag(df->feature), df->level); ++ } ++} ++ ++/* ++ * Naming convention should be: [()] ++ * This table only is used unless init_() below doesn't set it; ++ * in particular, if CPUID levels 0x80000002..4 are supported, this ++ * isn't used ++ */ ++ ++/* Look up CPU names by table lookup. */ ++static const char *table_lookup_model(struct cpuinfo_x86 *c) ++{ ++#ifdef CONFIG_X86_32 ++ const struct legacy_cpu_model_info *info; ++ ++ if (c->x86_model >= 16) ++ return NULL; /* Range check */ ++ ++ if (!this_cpu) ++ return NULL; ++ ++ info = this_cpu->legacy_models; ++ ++ while (info->family) { ++ if (info->family == c->x86) ++ return info->model_names[c->x86_model]; ++ info++; ++ } ++#endif ++ return NULL; /* Not found */ ++} ++ ++__u32 cpu_caps_cleared[NCAPINTS + NBUGINTS]; ++__u32 cpu_caps_set[NCAPINTS + NBUGINTS]; ++ ++void load_percpu_segment(int cpu) ++{ ++#ifdef CONFIG_X86_32 ++ loadsegment(fs, __KERNEL_PERCPU); ++#else ++ __loadsegment_simple(gs, 0); ++ wrmsrl(MSR_GS_BASE, cpu_kernelmode_gs_base(cpu)); ++#endif ++ load_stack_canary_segment(); ++} ++ ++#ifdef CONFIG_X86_32 ++/* The 32-bit entry code needs to find cpu_entry_area. */ ++DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area); ++#endif ++ ++#ifdef CONFIG_X86_64 ++/* ++ * Special IST stacks which the CPU switches to when it calls ++ * an IST-marked descriptor entry. Up to 7 stacks (hardware ++ * limit), all of them are 4K, except the debug stack which ++ * is 8K. ++ */ ++static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = { ++ [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ, ++ [DEBUG_STACK - 1] = DEBUG_STKSZ ++}; ++#endif ++ ++/* Load the original GDT from the per-cpu structure */ ++void load_direct_gdt(int cpu) ++{ ++ struct desc_ptr gdt_descr; ++ ++ gdt_descr.address = (long)get_cpu_gdt_rw(cpu); ++ gdt_descr.size = GDT_SIZE - 1; ++ load_gdt(&gdt_descr); ++} ++EXPORT_SYMBOL_GPL(load_direct_gdt); ++ ++/* Load a fixmap remapping of the per-cpu GDT */ ++void load_fixmap_gdt(int cpu) ++{ ++ struct desc_ptr gdt_descr; ++ ++ gdt_descr.address = (long)get_cpu_gdt_ro(cpu); ++ gdt_descr.size = GDT_SIZE - 1; ++ load_gdt(&gdt_descr); ++} ++EXPORT_SYMBOL_GPL(load_fixmap_gdt); ++ ++/* ++ * Current gdt points %fs at the "master" per-cpu area: after this, ++ * it's on the real one. ++ */ ++void switch_to_new_gdt(int cpu) ++{ ++ /* Load the original GDT */ ++ load_direct_gdt(cpu); ++ /* Reload the per-cpu base */ ++ load_percpu_segment(cpu); ++} ++ ++static const struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; ++ ++static void get_model_name(struct cpuinfo_x86 *c) ++{ ++ unsigned int *v; ++ char *p, *q, *s; ++ ++ if (c->extended_cpuid_level < 0x80000004) ++ return; ++ ++ v = (unsigned int *)c->x86_model_id; ++ cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]); ++ cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]); ++ cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]); ++ c->x86_model_id[48] = 0; ++ ++ /* Trim whitespace */ ++ p = q = s = &c->x86_model_id[0]; ++ ++ while (*p == ' ') ++ p++; ++ ++ while (*p) { ++ /* Note the last non-whitespace index */ ++ if (!isspace(*p)) ++ s = q; ++ ++ *q++ = *p++; ++ } ++ ++ *(s + 1) = '\0'; ++} ++ ++void detect_num_cpu_cores(struct cpuinfo_x86 *c) ++{ ++ unsigned int eax, ebx, ecx, edx; ++ ++ c->x86_max_cores = 1; ++ if (!IS_ENABLED(CONFIG_SMP) || c->cpuid_level < 4) ++ return; ++ ++ cpuid_count(4, 0, &eax, &ebx, &ecx, &edx); ++ if (eax & 0x1f) ++ c->x86_max_cores = (eax >> 26) + 1; ++} ++ ++void cpu_detect_cache_sizes(struct cpuinfo_x86 *c) ++{ ++ unsigned int n, dummy, ebx, ecx, edx, l2size; ++ ++ n = c->extended_cpuid_level; ++ ++ if (n >= 0x80000005) { ++ cpuid(0x80000005, &dummy, &ebx, &ecx, &edx); ++ c->x86_cache_size = (ecx>>24) + (edx>>24); ++#ifdef CONFIG_X86_64 ++ /* On K8 L1 TLB is inclusive, so don't count it */ ++ c->x86_tlbsize = 0; ++#endif ++ } ++ ++ if (n < 0x80000006) /* Some chips just has a large L1. */ ++ return; ++ ++ cpuid(0x80000006, &dummy, &ebx, &ecx, &edx); ++ l2size = ecx >> 16; ++ ++#ifdef CONFIG_X86_64 ++ c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff); ++#else ++ /* do processor-specific cache resizing */ ++ if (this_cpu->legacy_cache_size) ++ l2size = this_cpu->legacy_cache_size(c, l2size); ++ ++ /* Allow user to override all this if necessary. */ ++ if (cachesize_override != -1) ++ l2size = cachesize_override; ++ ++ if (l2size == 0) ++ return; /* Again, no L2 cache is possible */ ++#endif ++ ++ c->x86_cache_size = l2size; ++} ++ ++u16 __read_mostly tlb_lli_4k[NR_INFO]; ++u16 __read_mostly tlb_lli_2m[NR_INFO]; ++u16 __read_mostly tlb_lli_4m[NR_INFO]; ++u16 __read_mostly tlb_lld_4k[NR_INFO]; ++u16 __read_mostly tlb_lld_2m[NR_INFO]; ++u16 __read_mostly tlb_lld_4m[NR_INFO]; ++u16 __read_mostly tlb_lld_1g[NR_INFO]; ++ ++static void cpu_detect_tlb(struct cpuinfo_x86 *c) ++{ ++ if (this_cpu->c_detect_tlb) ++ this_cpu->c_detect_tlb(c); ++ ++ pr_info("Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n", ++ tlb_lli_4k[ENTRIES], tlb_lli_2m[ENTRIES], ++ tlb_lli_4m[ENTRIES]); ++ ++ pr_info("Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d, 1GB %d\n", ++ tlb_lld_4k[ENTRIES], tlb_lld_2m[ENTRIES], ++ tlb_lld_4m[ENTRIES], tlb_lld_1g[ENTRIES]); ++} ++ ++int detect_ht_early(struct cpuinfo_x86 *c) ++{ ++#ifdef CONFIG_SMP ++ u32 eax, ebx, ecx, edx; ++ ++ if (!cpu_has(c, X86_FEATURE_HT)) ++ return -1; ++ ++ if (cpu_has(c, X86_FEATURE_CMP_LEGACY)) ++ return -1; ++ ++ if (cpu_has(c, X86_FEATURE_XTOPOLOGY)) ++ return -1; ++ ++ cpuid(1, &eax, &ebx, &ecx, &edx); ++ ++ smp_num_siblings = (ebx & 0xff0000) >> 16; ++ if (smp_num_siblings == 1) ++ pr_info_once("CPU0: Hyper-Threading is disabled\n"); ++#endif ++ return 0; ++} ++ ++void detect_ht(struct cpuinfo_x86 *c) ++{ ++#ifdef CONFIG_SMP ++ int index_msb, core_bits; ++ ++ if (detect_ht_early(c) < 0) ++ return; ++ ++ index_msb = get_count_order(smp_num_siblings); ++ c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, index_msb); ++ ++ smp_num_siblings = smp_num_siblings / c->x86_max_cores; ++ ++ index_msb = get_count_order(smp_num_siblings); ++ ++ core_bits = get_count_order(c->x86_max_cores); ++ ++ c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, index_msb) & ++ ((1 << core_bits) - 1); ++#endif ++} ++ ++static void get_cpu_vendor(struct cpuinfo_x86 *c) ++{ ++ char *v = c->x86_vendor_id; ++ int i; ++ ++ for (i = 0; i < X86_VENDOR_NUM; i++) { ++ if (!cpu_devs[i]) ++ break; ++ ++ if (!strcmp(v, cpu_devs[i]->c_ident[0]) || ++ (cpu_devs[i]->c_ident[1] && ++ !strcmp(v, cpu_devs[i]->c_ident[1]))) { ++ ++ this_cpu = cpu_devs[i]; ++ c->x86_vendor = this_cpu->c_x86_vendor; ++ return; ++ } ++ } ++ ++ pr_err_once("CPU: vendor_id '%s' unknown, using generic init.\n" \ ++ "CPU: Your system may be unstable.\n", v); ++ ++ c->x86_vendor = X86_VENDOR_UNKNOWN; ++ this_cpu = &default_cpu; ++} ++ ++void cpu_detect(struct cpuinfo_x86 *c) ++{ ++ /* Get vendor name */ ++ cpuid(0x00000000, (unsigned int *)&c->cpuid_level, ++ (unsigned int *)&c->x86_vendor_id[0], ++ (unsigned int *)&c->x86_vendor_id[8], ++ (unsigned int *)&c->x86_vendor_id[4]); ++ ++ c->x86 = 4; ++ /* Intel-defined flags: level 0x00000001 */ ++ if (c->cpuid_level >= 0x00000001) { ++ u32 junk, tfms, cap0, misc; ++ ++ cpuid(0x00000001, &tfms, &misc, &junk, &cap0); ++ c->x86 = x86_family(tfms); ++ c->x86_model = x86_model(tfms); ++ c->x86_stepping = x86_stepping(tfms); ++ ++ if (cap0 & (1<<19)) { ++ c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; ++ c->x86_cache_alignment = c->x86_clflush_size; ++ } ++ } ++} ++ ++static void apply_forced_caps(struct cpuinfo_x86 *c) ++{ ++ int i; ++ ++ for (i = 0; i < NCAPINTS + NBUGINTS; i++) { ++ c->x86_capability[i] &= ~cpu_caps_cleared[i]; ++ c->x86_capability[i] |= cpu_caps_set[i]; ++ } ++} ++ ++static void init_speculation_control(struct cpuinfo_x86 *c) ++{ ++ /* ++ * The Intel SPEC_CTRL CPUID bit implies IBRS and IBPB support, ++ * and they also have a different bit for STIBP support. Also, ++ * a hypervisor might have set the individual AMD bits even on ++ * Intel CPUs, for finer-grained selection of what's available. ++ */ ++ if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) { ++ set_cpu_cap(c, X86_FEATURE_IBRS); ++ set_cpu_cap(c, X86_FEATURE_IBPB); ++ set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL); ++ } ++ ++ if (cpu_has(c, X86_FEATURE_INTEL_STIBP)) ++ set_cpu_cap(c, X86_FEATURE_STIBP); ++ ++ if (cpu_has(c, X86_FEATURE_SPEC_CTRL_SSBD) || ++ cpu_has(c, X86_FEATURE_VIRT_SSBD)) ++ set_cpu_cap(c, X86_FEATURE_SSBD); ++ ++ if (cpu_has(c, X86_FEATURE_AMD_IBRS)) { ++ set_cpu_cap(c, X86_FEATURE_IBRS); ++ set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL); ++ } ++ ++ if (cpu_has(c, X86_FEATURE_AMD_IBPB)) ++ set_cpu_cap(c, X86_FEATURE_IBPB); ++ ++ if (cpu_has(c, X86_FEATURE_AMD_STIBP)) { ++ set_cpu_cap(c, X86_FEATURE_STIBP); ++ set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL); ++ } ++ ++ if (cpu_has(c, X86_FEATURE_AMD_SSBD)) { ++ set_cpu_cap(c, X86_FEATURE_SSBD); ++ set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL); ++ clear_cpu_cap(c, X86_FEATURE_VIRT_SSBD); ++ } ++} ++ ++static void init_cqm(struct cpuinfo_x86 *c) ++{ ++ if (!cpu_has(c, X86_FEATURE_CQM_LLC)) { ++ c->x86_cache_max_rmid = -1; ++ c->x86_cache_occ_scale = -1; ++ return; ++ } ++ ++ /* will be overridden if occupancy monitoring exists */ ++ c->x86_cache_max_rmid = cpuid_ebx(0xf); ++ ++ if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC) || ++ cpu_has(c, X86_FEATURE_CQM_MBM_TOTAL) || ++ cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL)) { ++ u32 eax, ebx, ecx, edx; ++ ++ /* QoS sub-leaf, EAX=0Fh, ECX=1 */ ++ cpuid_count(0xf, 1, &eax, &ebx, &ecx, &edx); ++ ++ c->x86_cache_max_rmid = ecx; ++ c->x86_cache_occ_scale = ebx; ++ } ++} ++ ++void get_cpu_cap(struct cpuinfo_x86 *c) ++{ ++ u32 eax, ebx, ecx, edx; ++ ++ /* Intel-defined flags: level 0x00000001 */ ++ if (c->cpuid_level >= 0x00000001) { ++ cpuid(0x00000001, &eax, &ebx, &ecx, &edx); ++ ++ c->x86_capability[CPUID_1_ECX] = ecx; ++ c->x86_capability[CPUID_1_EDX] = edx; ++ } ++ ++ /* Thermal and Power Management Leaf: level 0x00000006 (eax) */ ++ if (c->cpuid_level >= 0x00000006) ++ c->x86_capability[CPUID_6_EAX] = cpuid_eax(0x00000006); ++ ++ /* Additional Intel-defined flags: level 0x00000007 */ ++ if (c->cpuid_level >= 0x00000007) { ++ cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx); ++ c->x86_capability[CPUID_7_0_EBX] = ebx; ++ c->x86_capability[CPUID_7_ECX] = ecx; ++ c->x86_capability[CPUID_7_EDX] = edx; ++ } ++ ++ /* Extended state features: level 0x0000000d */ ++ if (c->cpuid_level >= 0x0000000d) { ++ cpuid_count(0x0000000d, 1, &eax, &ebx, &ecx, &edx); ++ ++ c->x86_capability[CPUID_D_1_EAX] = eax; ++ } ++ ++ /* AMD-defined flags: level 0x80000001 */ ++ eax = cpuid_eax(0x80000000); ++ c->extended_cpuid_level = eax; ++ ++ if ((eax & 0xffff0000) == 0x80000000) { ++ if (eax >= 0x80000001) { ++ cpuid(0x80000001, &eax, &ebx, &ecx, &edx); ++ ++ c->x86_capability[CPUID_8000_0001_ECX] = ecx; ++ c->x86_capability[CPUID_8000_0001_EDX] = edx; ++ } ++ } ++ ++ if (c->extended_cpuid_level >= 0x80000007) { ++ cpuid(0x80000007, &eax, &ebx, &ecx, &edx); ++ ++ c->x86_capability[CPUID_8000_0007_EBX] = ebx; ++ c->x86_power = edx; ++ } ++ ++ if (c->extended_cpuid_level >= 0x80000008) { ++ cpuid(0x80000008, &eax, &ebx, &ecx, &edx); ++ c->x86_capability[CPUID_8000_0008_EBX] = ebx; ++ } ++ ++ if (c->extended_cpuid_level >= 0x8000000a) ++ c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a); ++ ++ init_scattered_cpuid_features(c); ++ init_speculation_control(c); ++ init_cqm(c); ++ ++ /* ++ * Clear/Set all flags overridden by options, after probe. ++ * This needs to happen each time we re-probe, which may happen ++ * several times during CPU initialization. ++ */ ++ apply_forced_caps(c); ++} ++ ++void get_cpu_address_sizes(struct cpuinfo_x86 *c) ++{ ++ u32 eax, ebx, ecx, edx; ++ ++ if (c->extended_cpuid_level >= 0x80000008) { ++ cpuid(0x80000008, &eax, &ebx, &ecx, &edx); ++ ++ c->x86_virt_bits = (eax >> 8) & 0xff; ++ c->x86_phys_bits = eax & 0xff; ++ } ++#ifdef CONFIG_X86_32 ++ else if (cpu_has(c, X86_FEATURE_PAE) || cpu_has(c, X86_FEATURE_PSE36)) ++ c->x86_phys_bits = 36; ++#endif ++ c->x86_cache_bits = c->x86_phys_bits; ++} ++ ++static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) ++{ ++#ifdef CONFIG_X86_32 ++ int i; ++ ++ /* ++ * First of all, decide if this is a 486 or higher ++ * It's a 486 if we can modify the AC flag ++ */ ++ if (flag_is_changeable_p(X86_EFLAGS_AC)) ++ c->x86 = 4; ++ else ++ c->x86 = 3; ++ ++ for (i = 0; i < X86_VENDOR_NUM; i++) ++ if (cpu_devs[i] && cpu_devs[i]->c_identify) { ++ c->x86_vendor_id[0] = 0; ++ cpu_devs[i]->c_identify(c); ++ if (c->x86_vendor_id[0]) { ++ get_cpu_vendor(c); ++ break; ++ } ++ } ++#endif ++} ++ ++#define NO_SPECULATION BIT(0) ++#define NO_MELTDOWN BIT(1) ++#define NO_SSB BIT(2) ++#define NO_L1TF BIT(3) ++#define NO_MDS BIT(4) ++#define MSBDS_ONLY BIT(5) ++#define NO_SWAPGS BIT(6) ++#define NO_ITLB_MULTIHIT BIT(7) ++ ++#define VULNWL(_vendor, _family, _model, _whitelist) \ ++ { X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY, _whitelist } ++ ++#define VULNWL_INTEL(model, whitelist) \ ++ VULNWL(INTEL, 6, INTEL_FAM6_##model, whitelist) ++ ++#define VULNWL_AMD(family, whitelist) \ ++ VULNWL(AMD, family, X86_MODEL_ANY, whitelist) ++ ++#define VULNWL_HYGON(family, whitelist) \ ++ VULNWL(HYGON, family, X86_MODEL_ANY, whitelist) ++ ++static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { ++ VULNWL(ANY, 4, X86_MODEL_ANY, NO_SPECULATION), ++ VULNWL(CENTAUR, 5, X86_MODEL_ANY, NO_SPECULATION), ++ VULNWL(INTEL, 5, X86_MODEL_ANY, NO_SPECULATION), ++ VULNWL(NSC, 5, X86_MODEL_ANY, NO_SPECULATION), ++ ++ /* Intel Family 6 */ ++ VULNWL_INTEL(ATOM_SALTWELL, NO_SPECULATION | NO_ITLB_MULTIHIT), ++ VULNWL_INTEL(ATOM_SALTWELL_TABLET, NO_SPECULATION | NO_ITLB_MULTIHIT), ++ VULNWL_INTEL(ATOM_SALTWELL_MID, NO_SPECULATION | NO_ITLB_MULTIHIT), ++ VULNWL_INTEL(ATOM_BONNELL, NO_SPECULATION | NO_ITLB_MULTIHIT), ++ VULNWL_INTEL(ATOM_BONNELL_MID, NO_SPECULATION | NO_ITLB_MULTIHIT), ++ ++ VULNWL_INTEL(ATOM_SILVERMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), ++ VULNWL_INTEL(ATOM_SILVERMONT_X, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), ++ VULNWL_INTEL(ATOM_SILVERMONT_MID, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), ++ VULNWL_INTEL(ATOM_AIRMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), ++ VULNWL_INTEL(XEON_PHI_KNL, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), ++ VULNWL_INTEL(XEON_PHI_KNM, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), ++ ++ VULNWL_INTEL(CORE_YONAH, NO_SSB), ++ ++ VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), ++ ++ VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), ++ VULNWL_INTEL(ATOM_GOLDMONT_X, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), ++ VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), ++ ++ /* ++ * Technically, swapgs isn't serializing on AMD (despite it previously ++ * being documented as such in the APM). But according to AMD, %gs is ++ * updated non-speculatively, and the issuing of %gs-relative memory ++ * operands will be blocked until the %gs update completes, which is ++ * good enough for our purposes. ++ */ ++ ++ VULNWL_INTEL(ATOM_TREMONT_X, NO_ITLB_MULTIHIT), ++ ++ /* AMD Family 0xf - 0x12 */ ++ VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), ++ VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), ++ VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), ++ VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), ++ ++ /* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */ ++ VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), ++ VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), ++ {} ++}; ++ ++#define VULNBL_INTEL_STEPPINGS(model, steppings, issues) \ ++ X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(INTEL, 6, \ ++ INTEL_FAM6_##model, steppings, \ ++ X86_FEATURE_ANY, issues) ++ ++#define SRBDS BIT(0) ++ ++static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { ++ VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS), ++ VULNBL_INTEL_STEPPINGS(HASWELL_CORE, X86_STEPPING_ANY, SRBDS), ++ VULNBL_INTEL_STEPPINGS(HASWELL_ULT, X86_STEPPING_ANY, SRBDS), ++ VULNBL_INTEL_STEPPINGS(HASWELL_GT3E, X86_STEPPING_ANY, SRBDS), ++ VULNBL_INTEL_STEPPINGS(BROADWELL_GT3E, X86_STEPPING_ANY, SRBDS), ++ VULNBL_INTEL_STEPPINGS(BROADWELL_CORE, X86_STEPPING_ANY, SRBDS), ++ VULNBL_INTEL_STEPPINGS(SKYLAKE_MOBILE, X86_STEPPING_ANY, SRBDS), ++ VULNBL_INTEL_STEPPINGS(SKYLAKE_DESKTOP, X86_STEPPING_ANY, SRBDS), ++ VULNBL_INTEL_STEPPINGS(KABYLAKE_MOBILE, X86_STEPPINGS(0x0, 0xC), SRBDS), ++ VULNBL_INTEL_STEPPINGS(KABYLAKE_DESKTOP,X86_STEPPINGS(0x0, 0xD), SRBDS), ++ {} ++}; ++ ++static bool __init cpu_matches(const struct x86_cpu_id *table, unsigned long which) ++{ ++ const struct x86_cpu_id *m = x86_match_cpu(table); ++ ++ return m && !!(m->driver_data & which); ++} ++ ++u64 x86_read_arch_cap_msr(void) ++{ ++ u64 ia32_cap = 0; ++ ++ if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) ++ rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); ++ ++ return ia32_cap; ++} ++ ++static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) ++{ ++ u64 ia32_cap = x86_read_arch_cap_msr(); ++ ++ /* Set ITLB_MULTIHIT bug if cpu is not in the whitelist and not mitigated */ ++ if (!cpu_matches(cpu_vuln_whitelist, NO_ITLB_MULTIHIT) && ++ !(ia32_cap & ARCH_CAP_PSCHANGE_MC_NO)) ++ setup_force_cpu_bug(X86_BUG_ITLB_MULTIHIT); ++ ++ if (cpu_matches(cpu_vuln_whitelist, NO_SPECULATION)) ++ return; ++ ++ setup_force_cpu_bug(X86_BUG_SPECTRE_V1); ++ setup_force_cpu_bug(X86_BUG_SPECTRE_V2); ++ ++ if (!cpu_matches(cpu_vuln_whitelist, NO_SSB) && ++ !(ia32_cap & ARCH_CAP_SSB_NO) && ++ !cpu_has(c, X86_FEATURE_AMD_SSB_NO)) ++ setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); ++ ++ if (ia32_cap & ARCH_CAP_IBRS_ALL) ++ setup_force_cpu_cap(X86_FEATURE_IBRS_ENHANCED); ++ ++ if (!cpu_matches(cpu_vuln_whitelist, NO_MDS) && ++ !(ia32_cap & ARCH_CAP_MDS_NO)) { ++ setup_force_cpu_bug(X86_BUG_MDS); ++ if (cpu_matches(cpu_vuln_whitelist, MSBDS_ONLY)) ++ setup_force_cpu_bug(X86_BUG_MSBDS_ONLY); ++ } ++ ++ if (!cpu_matches(cpu_vuln_whitelist, NO_SWAPGS)) ++ setup_force_cpu_bug(X86_BUG_SWAPGS); ++ ++ /* ++ * When the CPU is not mitigated for TAA (TAA_NO=0) set TAA bug when: ++ * - TSX is supported or ++ * - TSX_CTRL is present ++ * ++ * TSX_CTRL check is needed for cases when TSX could be disabled before ++ * the kernel boot e.g. kexec. ++ * TSX_CTRL check alone is not sufficient for cases when the microcode ++ * update is not present or running as guest that don't get TSX_CTRL. ++ */ ++ if (!(ia32_cap & ARCH_CAP_TAA_NO) && ++ (cpu_has(c, X86_FEATURE_RTM) || ++ (ia32_cap & ARCH_CAP_TSX_CTRL_MSR))) ++ setup_force_cpu_bug(X86_BUG_TAA); ++ ++ /* ++ * SRBDS affects CPUs which support RDRAND or RDSEED and are listed ++ * in the vulnerability blacklist. ++ */ ++ if ((cpu_has(c, X86_FEATURE_RDRAND) || ++ cpu_has(c, X86_FEATURE_RDSEED)) && ++ cpu_matches(cpu_vuln_blacklist, SRBDS)) ++ setup_force_cpu_bug(X86_BUG_SRBDS); ++ ++ if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) ++ return; ++ ++ /* Rogue Data Cache Load? No! */ ++ if (ia32_cap & ARCH_CAP_RDCL_NO) ++ return; ++ ++ setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); ++ ++ if (cpu_matches(cpu_vuln_whitelist, NO_L1TF)) ++ return; ++ ++ setup_force_cpu_bug(X86_BUG_L1TF); ++} ++ ++/* ++ * The NOPL instruction is supposed to exist on all CPUs of family >= 6; ++ * unfortunately, that's not true in practice because of early VIA ++ * chips and (more importantly) broken virtualizers that are not easy ++ * to detect. In the latter case it doesn't even *fail* reliably, so ++ * probing for it doesn't even work. Disable it completely on 32-bit ++ * unless we can find a reliable way to detect all the broken cases. ++ * Enable it explicitly on 64-bit for non-constant inputs of cpu_has(). ++ */ ++static void detect_nopl(void) ++{ ++#ifdef CONFIG_X86_32 ++ setup_clear_cpu_cap(X86_FEATURE_NOPL); ++#else ++ setup_force_cpu_cap(X86_FEATURE_NOPL); ++#endif ++} ++ ++/* ++ * Do minimum CPU detection early. ++ * Fields really needed: vendor, cpuid_level, family, model, mask, ++ * cache alignment. ++ * The others are not touched to avoid unwanted side effects. ++ * ++ * WARNING: this function is only called on the boot CPU. Don't add code ++ * here that is supposed to run on all CPUs. ++ */ ++static void __init early_identify_cpu(struct cpuinfo_x86 *c) ++{ ++#ifdef CONFIG_X86_64 ++ c->x86_clflush_size = 64; ++ c->x86_phys_bits = 36; ++ c->x86_virt_bits = 48; ++#else ++ c->x86_clflush_size = 32; ++ c->x86_phys_bits = 32; ++ c->x86_virt_bits = 32; ++#endif ++ c->x86_cache_alignment = c->x86_clflush_size; ++ ++ memset(&c->x86_capability, 0, sizeof c->x86_capability); ++ c->extended_cpuid_level = 0; ++ ++ if (!have_cpuid_p()) ++ identify_cpu_without_cpuid(c); ++ ++ /* cyrix could have cpuid enabled via c_identify()*/ ++ if (have_cpuid_p()) { ++ cpu_detect(c); ++ get_cpu_vendor(c); ++ get_cpu_cap(c); ++ get_cpu_address_sizes(c); ++ setup_force_cpu_cap(X86_FEATURE_CPUID); ++ ++ if (this_cpu->c_early_init) ++ this_cpu->c_early_init(c); ++ ++ c->cpu_index = 0; ++ filter_cpuid_features(c, false); ++ ++ if (this_cpu->c_bsp_init) ++ this_cpu->c_bsp_init(c); ++ } else { ++ setup_clear_cpu_cap(X86_FEATURE_CPUID); ++ } ++ ++ setup_force_cpu_cap(X86_FEATURE_ALWAYS); ++ ++ cpu_set_bug_bits(c); ++ ++ fpu__init_system(c); ++ ++#ifdef CONFIG_X86_32 ++ /* ++ * Regardless of whether PCID is enumerated, the SDM says ++ * that it can't be enabled in 32-bit mode. ++ */ ++ setup_clear_cpu_cap(X86_FEATURE_PCID); ++#endif ++ ++ /* ++ * Later in the boot process pgtable_l5_enabled() relies on ++ * cpu_feature_enabled(X86_FEATURE_LA57). If 5-level paging is not ++ * enabled by this point we need to clear the feature bit to avoid ++ * false-positives at the later stage. ++ * ++ * pgtable_l5_enabled() can be false here for several reasons: ++ * - 5-level paging is disabled compile-time; ++ * - it's 32-bit kernel; ++ * - machine doesn't support 5-level paging; ++ * - user specified 'no5lvl' in kernel command line. ++ */ ++ if (!pgtable_l5_enabled()) ++ setup_clear_cpu_cap(X86_FEATURE_LA57); ++ ++ detect_nopl(); ++} ++ ++void __init early_cpu_init(void) ++{ ++ const struct cpu_dev *const *cdev; ++ int count = 0; ++ ++#ifdef CONFIG_PROCESSOR_SELECT ++ pr_info("KERNEL supported cpus:\n"); ++#endif ++ ++ for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) { ++ const struct cpu_dev *cpudev = *cdev; ++ ++ if (count >= X86_VENDOR_NUM) ++ break; ++ cpu_devs[count] = cpudev; ++ count++; ++ ++#ifdef CONFIG_PROCESSOR_SELECT ++ { ++ unsigned int j; ++ ++ for (j = 0; j < 2; j++) { ++ if (!cpudev->c_ident[j]) ++ continue; ++ pr_info(" %s %s\n", cpudev->c_vendor, ++ cpudev->c_ident[j]); ++ } ++ } ++#endif ++ } ++ early_identify_cpu(&boot_cpu_data); ++} ++ ++static void detect_null_seg_behavior(struct cpuinfo_x86 *c) ++{ ++#ifdef CONFIG_X86_64 ++ /* ++ * Empirically, writing zero to a segment selector on AMD does ++ * not clear the base, whereas writing zero to a segment ++ * selector on Intel does clear the base. Intel's behavior ++ * allows slightly faster context switches in the common case ++ * where GS is unused by the prev and next threads. ++ * ++ * Since neither vendor documents this anywhere that I can see, ++ * detect it directly instead of hardcoding the choice by ++ * vendor. ++ * ++ * I've designated AMD's behavior as the "bug" because it's ++ * counterintuitive and less friendly. ++ */ ++ ++ unsigned long old_base, tmp; ++ rdmsrl(MSR_FS_BASE, old_base); ++ wrmsrl(MSR_FS_BASE, 1); ++ loadsegment(fs, 0); ++ rdmsrl(MSR_FS_BASE, tmp); ++ if (tmp != 0) ++ set_cpu_bug(c, X86_BUG_NULL_SEG); ++ wrmsrl(MSR_FS_BASE, old_base); ++#endif ++} ++ ++static void generic_identify(struct cpuinfo_x86 *c) ++{ ++ c->extended_cpuid_level = 0; ++ ++ if (!have_cpuid_p()) ++ identify_cpu_without_cpuid(c); ++ ++ /* cyrix could have cpuid enabled via c_identify()*/ ++ if (!have_cpuid_p()) ++ return; ++ ++ cpu_detect(c); ++ ++ get_cpu_vendor(c); ++ ++ get_cpu_cap(c); ++ ++ get_cpu_address_sizes(c); ++ ++ if (c->cpuid_level >= 0x00000001) { ++ c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF; ++#ifdef CONFIG_X86_32 ++# ifdef CONFIG_SMP ++ c->apicid = apic->phys_pkg_id(c->initial_apicid, 0); ++# else ++ c->apicid = c->initial_apicid; ++# endif ++#endif ++ c->phys_proc_id = c->initial_apicid; ++ } ++ ++ get_model_name(c); /* Default name */ ++ ++ detect_null_seg_behavior(c); ++ ++ /* ++ * ESPFIX is a strange bug. All real CPUs have it. Paravirt ++ * systems that run Linux at CPL > 0 may or may not have the ++ * issue, but, even if they have the issue, there's absolutely ++ * nothing we can do about it because we can't use the real IRET ++ * instruction. ++ * ++ * NB: For the time being, only 32-bit kernels support ++ * X86_BUG_ESPFIX as such. 64-bit kernels directly choose ++ * whether to apply espfix using paravirt hooks. If any ++ * non-paravirt system ever shows up that does *not* have the ++ * ESPFIX issue, we can change this. ++ */ ++#ifdef CONFIG_X86_32 ++# ifdef CONFIG_PARAVIRT ++ do { ++ extern void native_iret(void); ++ if (pv_cpu_ops.iret == native_iret) ++ set_cpu_bug(c, X86_BUG_ESPFIX); ++ } while (0); ++# else ++ set_cpu_bug(c, X86_BUG_ESPFIX); ++# endif ++#endif ++} ++ ++static void x86_init_cache_qos(struct cpuinfo_x86 *c) ++{ ++ /* ++ * The heavy lifting of max_rmid and cache_occ_scale are handled ++ * in get_cpu_cap(). Here we just set the max_rmid for the boot_cpu ++ * in case CQM bits really aren't there in this CPU. ++ */ ++ if (c != &boot_cpu_data) { ++ boot_cpu_data.x86_cache_max_rmid = ++ min(boot_cpu_data.x86_cache_max_rmid, ++ c->x86_cache_max_rmid); ++ } ++} ++ ++/* ++ * Validate that ACPI/mptables have the same information about the ++ * effective APIC id and update the package map. ++ */ ++static void validate_apic_and_package_id(struct cpuinfo_x86 *c) ++{ ++#ifdef CONFIG_SMP ++ unsigned int apicid, cpu = smp_processor_id(); ++ ++ apicid = apic->cpu_present_to_apicid(cpu); ++ ++ if (apicid != c->apicid) { ++ pr_err(FW_BUG "CPU%u: APIC id mismatch. Firmware: %x APIC: %x\n", ++ cpu, apicid, c->initial_apicid); ++ } ++ BUG_ON(topology_update_package_map(c->phys_proc_id, cpu)); ++#else ++ c->logical_proc_id = 0; ++#endif ++} ++ ++/* ++ * This does the hard work of actually picking apart the CPU stuff... ++ */ ++static void identify_cpu(struct cpuinfo_x86 *c) ++{ ++ int i; ++ ++ c->loops_per_jiffy = loops_per_jiffy; ++ c->x86_cache_size = 0; ++ c->x86_vendor = X86_VENDOR_UNKNOWN; ++ c->x86_model = c->x86_stepping = 0; /* So far unknown... */ ++ c->x86_vendor_id[0] = '\0'; /* Unset */ ++ c->x86_model_id[0] = '\0'; /* Unset */ ++ c->x86_max_cores = 1; ++ c->x86_coreid_bits = 0; ++ c->cu_id = 0xff; ++#ifdef CONFIG_X86_64 ++ c->x86_clflush_size = 64; ++ c->x86_phys_bits = 36; ++ c->x86_virt_bits = 48; ++#else ++ c->cpuid_level = -1; /* CPUID not detected */ ++ c->x86_clflush_size = 32; ++ c->x86_phys_bits = 32; ++ c->x86_virt_bits = 32; ++#endif ++ c->x86_cache_alignment = c->x86_clflush_size; ++ memset(&c->x86_capability, 0, sizeof c->x86_capability); ++ ++ generic_identify(c); ++ ++ if (this_cpu->c_identify) ++ this_cpu->c_identify(c); ++ ++ /* Clear/Set all flags overridden by options, after probe */ ++ apply_forced_caps(c); ++ ++#ifdef CONFIG_X86_64 ++ c->apicid = apic->phys_pkg_id(c->initial_apicid, 0); ++#endif ++ ++ /* ++ * Vendor-specific initialization. In this section we ++ * canonicalize the feature flags, meaning if there are ++ * features a certain CPU supports which CPUID doesn't ++ * tell us, CPUID claiming incorrect flags, or other bugs, ++ * we handle them here. ++ * ++ * At the end of this section, c->x86_capability better ++ * indicate the features this CPU genuinely supports! ++ */ ++ if (this_cpu->c_init) ++ this_cpu->c_init(c); ++ ++ /* Disable the PN if appropriate */ ++ squash_the_stupid_serial_number(c); ++ ++ /* Set up SMEP/SMAP/UMIP */ ++ setup_smep(c); ++ setup_smap(c); ++ setup_umip(c); ++ ++ /* ++ * The vendor-specific functions might have changed features. ++ * Now we do "generic changes." ++ */ ++ ++ /* Filter out anything that depends on CPUID levels we don't have */ ++ filter_cpuid_features(c, true); ++ ++ /* If the model name is still unset, do table lookup. */ ++ if (!c->x86_model_id[0]) { ++ const char *p; ++ p = table_lookup_model(c); ++ if (p) ++ strcpy(c->x86_model_id, p); ++ else ++ /* Last resort... */ ++ sprintf(c->x86_model_id, "%02x/%02x", ++ c->x86, c->x86_model); ++ } ++ ++#ifdef CONFIG_X86_64 ++ detect_ht(c); ++#endif ++ ++ x86_init_rdrand(c); ++ x86_init_cache_qos(c); ++ setup_pku(c); ++ ++ /* ++ * Clear/Set all flags overridden by options, need do it ++ * before following smp all cpus cap AND. ++ */ ++ apply_forced_caps(c); ++ ++ /* ++ * On SMP, boot_cpu_data holds the common feature set between ++ * all CPUs; so make sure that we indicate which features are ++ * common between the CPUs. The first time this routine gets ++ * executed, c == &boot_cpu_data. ++ */ ++ if (c != &boot_cpu_data) { ++ /* AND the already accumulated flags with these */ ++ for (i = 0; i < NCAPINTS; i++) ++ boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; ++ ++ /* OR, i.e. replicate the bug flags */ ++ for (i = NCAPINTS; i < NCAPINTS + NBUGINTS; i++) ++ c->x86_capability[i] |= boot_cpu_data.x86_capability[i]; ++ } ++ ++ /* Init Machine Check Exception if available. */ ++ mcheck_cpu_init(c); ++ ++ select_idle_routine(c); ++ ++#ifdef CONFIG_NUMA ++ numa_add_cpu(smp_processor_id()); ++#endif ++} ++ ++/* ++ * Set up the CPU state needed to execute SYSENTER/SYSEXIT instructions ++ * on 32-bit kernels: ++ */ ++#ifdef CONFIG_X86_32 ++void enable_sep_cpu(void) ++{ ++ struct tss_struct *tss; ++ int cpu; ++ ++ if (!boot_cpu_has(X86_FEATURE_SEP)) ++ return; ++ ++ cpu = get_cpu(); ++ tss = &per_cpu(cpu_tss_rw, cpu); ++ ++ /* ++ * We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field -- ++ * see the big comment in struct x86_hw_tss's definition. ++ */ ++ ++ tss->x86_tss.ss1 = __KERNEL_CS; ++ wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0); ++ wrmsr(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1), 0); ++ wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0); ++ ++ put_cpu(); ++} ++#endif ++ ++void __init identify_boot_cpu(void) ++{ ++ identify_cpu(&boot_cpu_data); ++#ifdef CONFIG_X86_32 ++ sysenter_setup(); ++ enable_sep_cpu(); ++#endif ++ cpu_detect_tlb(&boot_cpu_data); ++ tsx_init(); ++} ++ ++void identify_secondary_cpu(struct cpuinfo_x86 *c) ++{ ++ BUG_ON(c == &boot_cpu_data); ++ identify_cpu(c); ++#ifdef CONFIG_X86_32 ++ enable_sep_cpu(); ++#endif ++ mtrr_ap_init(); ++ validate_apic_and_package_id(c); ++ x86_spec_ctrl_setup_ap(); ++ update_srbds_msr(); ++} ++ ++static __init int setup_noclflush(char *arg) ++{ ++ setup_clear_cpu_cap(X86_FEATURE_CLFLUSH); ++ setup_clear_cpu_cap(X86_FEATURE_CLFLUSHOPT); ++ return 1; ++} ++__setup("noclflush", setup_noclflush); ++ ++void print_cpu_info(struct cpuinfo_x86 *c) ++{ ++ const char *vendor = NULL; ++ ++ if (c->x86_vendor < X86_VENDOR_NUM) { ++ vendor = this_cpu->c_vendor; ++ } else { ++ if (c->cpuid_level >= 0) ++ vendor = c->x86_vendor_id; ++ } ++ ++ if (vendor && !strstr(c->x86_model_id, vendor)) ++ pr_cont("%s ", vendor); ++ ++ if (c->x86_model_id[0]) ++ pr_cont("%s", c->x86_model_id); ++ else ++ pr_cont("%d86", c->x86); ++ ++ pr_cont(" (family: 0x%x, model: 0x%x", c->x86, c->x86_model); ++ ++ if (c->x86_stepping || c->cpuid_level >= 0) ++ pr_cont(", stepping: 0x%x)\n", c->x86_stepping); ++ else ++ pr_cont(")\n"); ++} ++ ++/* ++ * clearcpuid= was already parsed in fpu__init_parse_early_param. ++ * But we need to keep a dummy __setup around otherwise it would ++ * show up as an environment variable for init. ++ */ ++static __init int setup_clearcpuid(char *arg) ++{ ++ return 1; ++} ++__setup("clearcpuid=", setup_clearcpuid); ++ ++#ifdef CONFIG_X86_64 ++DEFINE_PER_CPU_FIRST(union irq_stack_union, ++ irq_stack_union) __aligned(PAGE_SIZE) __visible; ++EXPORT_PER_CPU_SYMBOL_GPL(irq_stack_union); ++ ++/* ++ * The following percpu variables are hot. Align current_task to ++ * cacheline size such that they fall in the same cacheline. ++ */ ++DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned = ++ &init_task; ++EXPORT_PER_CPU_SYMBOL(current_task); ++ ++DEFINE_PER_CPU(char *, irq_stack_ptr) = ++ init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE; ++ ++DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1; ++ ++DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT; ++EXPORT_PER_CPU_SYMBOL(__preempt_count); ++ ++/* May not be marked __init: used by software suspend */ ++void syscall_init(void) ++{ ++ extern char _entry_trampoline[]; ++ extern char entry_SYSCALL_64_trampoline[]; ++ ++ int cpu = smp_processor_id(); ++ unsigned long SYSCALL64_entry_trampoline = ++ (unsigned long)get_cpu_entry_area(cpu)->entry_trampoline + ++ (entry_SYSCALL_64_trampoline - _entry_trampoline); ++ ++ wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS); ++ if (static_cpu_has(X86_FEATURE_PTI)) ++ wrmsrl(MSR_LSTAR, SYSCALL64_entry_trampoline); ++ else ++ wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64); ++ ++#ifdef CONFIG_IA32_EMULATION ++ wrmsrl(MSR_CSTAR, (unsigned long)entry_SYSCALL_compat); ++ /* ++ * This only works on Intel CPUs. ++ * On AMD CPUs these MSRs are 32-bit, CPU truncates MSR_IA32_SYSENTER_EIP. ++ * This does not cause SYSENTER to jump to the wrong location, because ++ * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit). ++ */ ++ wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS); ++ wrmsrl_safe(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1)); ++ wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat); ++#else ++ wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret); ++ wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)GDT_ENTRY_INVALID_SEG); ++ wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL); ++ wrmsrl_safe(MSR_IA32_SYSENTER_EIP, 0ULL); ++#endif ++ ++ /* Flags to clear on syscall */ ++ wrmsrl(MSR_SYSCALL_MASK, ++ X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF| ++ X86_EFLAGS_IOPL|X86_EFLAGS_AC|X86_EFLAGS_NT); ++} ++ ++/* ++ * Copies of the original ist values from the tss are only accessed during ++ * debugging, no special alignment required. ++ */ ++DEFINE_PER_CPU(struct orig_ist, orig_ist); ++ ++static DEFINE_PER_CPU(unsigned long, debug_stack_addr); ++DEFINE_PER_CPU(int, debug_stack_usage); ++ ++int is_debug_stack(unsigned long addr) ++{ ++ return __this_cpu_read(debug_stack_usage) || ++ (addr <= __this_cpu_read(debug_stack_addr) && ++ addr > (__this_cpu_read(debug_stack_addr) - DEBUG_STKSZ)); ++} ++NOKPROBE_SYMBOL(is_debug_stack); ++ ++DEFINE_PER_CPU(u32, debug_idt_ctr); ++ ++void debug_stack_set_zero(void) ++{ ++ this_cpu_inc(debug_idt_ctr); ++ load_current_idt(); ++} ++NOKPROBE_SYMBOL(debug_stack_set_zero); ++ ++void debug_stack_reset(void) ++{ ++ if (WARN_ON(!this_cpu_read(debug_idt_ctr))) ++ return; ++ if (this_cpu_dec_return(debug_idt_ctr) == 0) ++ load_current_idt(); ++} ++NOKPROBE_SYMBOL(debug_stack_reset); ++ ++#else /* CONFIG_X86_64 */ ++ ++DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; ++EXPORT_PER_CPU_SYMBOL(current_task); ++DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT; ++EXPORT_PER_CPU_SYMBOL(__preempt_count); ++ ++/* ++ * On x86_32, vm86 modifies tss.sp0, so sp0 isn't a reliable way to find ++ * the top of the kernel stack. Use an extra percpu variable to track the ++ * top of the kernel stack directly. ++ */ ++DEFINE_PER_CPU(unsigned long, cpu_current_top_of_stack) = ++ (unsigned long)&init_thread_union + THREAD_SIZE; ++EXPORT_PER_CPU_SYMBOL(cpu_current_top_of_stack); ++ ++#ifdef CONFIG_STACKPROTECTOR ++DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary); ++#endif ++ ++#endif /* CONFIG_X86_64 */ ++ ++/* ++ * Clear all 6 debug registers: ++ */ ++static void clear_all_debug_regs(void) ++{ ++ int i; ++ ++ for (i = 0; i < 8; i++) { ++ /* Ignore db4, db5 */ ++ if ((i == 4) || (i == 5)) ++ continue; ++ ++ set_debugreg(0, i); ++ } ++} ++ ++#ifdef CONFIG_KGDB ++/* ++ * Restore debug regs if using kgdbwait and you have a kernel debugger ++ * connection established. ++ */ ++static void dbg_restore_debug_regs(void) ++{ ++ if (unlikely(kgdb_connected && arch_kgdb_ops.correct_hw_break)) ++ arch_kgdb_ops.correct_hw_break(); ++} ++#else /* ! CONFIG_KGDB */ ++#define dbg_restore_debug_regs() ++#endif /* ! CONFIG_KGDB */ ++ ++static void wait_for_master_cpu(int cpu) ++{ ++#ifdef CONFIG_SMP ++ /* ++ * wait for ACK from master CPU before continuing ++ * with AP initialization ++ */ ++ WARN_ON(cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)); ++ while (!cpumask_test_cpu(cpu, cpu_callout_mask)) ++ cpu_relax(); ++#endif ++} ++ ++/* ++ * cpu_init() initializes state that is per-CPU. Some data is already ++ * initialized (naturally) in the bootstrap process, such as the GDT ++ * and IDT. We reload them nevertheless, this function acts as a ++ * 'CPU state barrier', nothing should get across. ++ * A lot of state is already set up in PDA init for 64 bit ++ */ ++#ifdef CONFIG_X86_64 ++ ++void cpu_init(void) ++{ ++ struct orig_ist *oist; ++ struct task_struct *me; ++ struct tss_struct *t; ++ unsigned long v; ++ int cpu = raw_smp_processor_id(); ++ int i; ++ ++ wait_for_master_cpu(cpu); ++ ++ /* ++ * Initialize the CR4 shadow before doing anything that could ++ * try to read it. ++ */ ++ cr4_init_shadow(); ++ ++ if (cpu) ++ load_ucode_ap(); ++ ++ t = &per_cpu(cpu_tss_rw, cpu); ++ oist = &per_cpu(orig_ist, cpu); ++ ++#ifdef CONFIG_NUMA ++ if (this_cpu_read(numa_node) == 0 && ++ early_cpu_to_node(cpu) != NUMA_NO_NODE) ++ set_numa_node(early_cpu_to_node(cpu)); ++#endif ++ ++ me = current; ++ ++ pr_debug("Initializing CPU#%d\n", cpu); ++ ++ cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); ++ ++ /* ++ * Initialize the per-CPU GDT with the boot GDT, ++ * and set up the GDT descriptor: ++ */ ++ ++ switch_to_new_gdt(cpu); ++ loadsegment(fs, 0); ++ ++ load_current_idt(); ++ ++ memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); ++ syscall_init(); ++ ++ wrmsrl(MSR_FS_BASE, 0); ++ wrmsrl(MSR_KERNEL_GS_BASE, 0); ++ barrier(); ++ ++ x86_configure_nx(); ++ x2apic_setup(); ++ ++ /* ++ * set up and load the per-CPU TSS ++ */ ++ if (!oist->ist[0]) { ++ char *estacks = get_cpu_entry_area(cpu)->exception_stacks; ++ ++ for (v = 0; v < N_EXCEPTION_STACKS; v++) { ++ estacks += exception_stack_sizes[v]; ++ oist->ist[v] = t->x86_tss.ist[v] = ++ (unsigned long)estacks; ++ if (v == DEBUG_STACK-1) ++ per_cpu(debug_stack_addr, cpu) = (unsigned long)estacks; ++ } ++ } ++ ++ t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET; ++ ++ /* ++ * <= is required because the CPU will access up to ++ * 8 bits beyond the end of the IO permission bitmap. ++ */ ++ for (i = 0; i <= IO_BITMAP_LONGS; i++) ++ t->io_bitmap[i] = ~0UL; ++ ++ mmgrab(&init_mm); ++ me->active_mm = &init_mm; ++ BUG_ON(me->mm); ++ initialize_tlbstate_and_flush(); ++ enter_lazy_tlb(&init_mm, me); ++ ++ /* ++ * Initialize the TSS. sp0 points to the entry trampoline stack ++ * regardless of what task is running. ++ */ ++ set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss); ++ load_TR_desc(); ++ load_sp0((unsigned long)(cpu_entry_stack(cpu) + 1)); ++ ++ load_mm_ldt(&init_mm); ++ ++ clear_all_debug_regs(); ++ dbg_restore_debug_regs(); ++ ++ fpu__init_cpu(); ++ ++ if (is_uv_system()) ++ uv_cpu_init(); ++ ++ load_fixmap_gdt(cpu); ++} ++ ++#else ++ ++void cpu_init(void) ++{ ++ int cpu = smp_processor_id(); ++ struct task_struct *curr = current; ++ struct tss_struct *t = &per_cpu(cpu_tss_rw, cpu); ++ ++ wait_for_master_cpu(cpu); ++ ++ /* ++ * Initialize the CR4 shadow before doing anything that could ++ * try to read it. ++ */ ++ cr4_init_shadow(); ++ ++ show_ucode_info_early(); ++ ++ pr_info("Initializing CPU#%d\n", cpu); ++ ++ if (cpu_feature_enabled(X86_FEATURE_VME) || ++ boot_cpu_has(X86_FEATURE_TSC) || ++ boot_cpu_has(X86_FEATURE_DE)) ++ cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); ++ ++ load_current_idt(); ++ switch_to_new_gdt(cpu); ++ ++ /* ++ * Set up and load the per-CPU TSS and LDT ++ */ ++ mmgrab(&init_mm); ++ curr->active_mm = &init_mm; ++ BUG_ON(curr->mm); ++ initialize_tlbstate_and_flush(); ++ enter_lazy_tlb(&init_mm, curr); ++ ++ /* ++ * Initialize the TSS. sp0 points to the entry trampoline stack ++ * regardless of what task is running. ++ */ ++ set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss); ++ load_TR_desc(); ++ load_sp0((unsigned long)(cpu_entry_stack(cpu) + 1)); ++ ++ load_mm_ldt(&init_mm); ++ ++ t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET; ++ ++#ifdef CONFIG_DOUBLEFAULT ++ /* Set up doublefault TSS pointer in the GDT */ ++ __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); ++#endif ++ ++ clear_all_debug_regs(); ++ dbg_restore_debug_regs(); ++ ++ fpu__init_cpu(); ++ ++ load_fixmap_gdt(cpu); ++} ++#endif ++ ++static void bsp_resume(void) ++{ ++ if (this_cpu->c_bsp_resume) ++ this_cpu->c_bsp_resume(&boot_cpu_data); ++} ++ ++static struct syscore_ops cpu_syscore_ops = { ++ .resume = bsp_resume, ++}; ++ ++static int __init init_cpu_syscore(void) ++{ ++ register_syscore_ops(&cpu_syscore_ops); ++ return 0; ++} ++core_initcall(init_cpu_syscore); ++ ++/* ++ * The microcode loader calls this upon late microcode load to recheck features, ++ * only when microcode has been updated. Caller holds microcode_mutex and CPU ++ * hotplug lock. ++ */ ++void microcode_check(void) ++{ ++ struct cpuinfo_x86 info; ++ ++ perf_check_microcode(); ++ ++ /* Reload CPUID max function as it might've changed. */ ++ info.cpuid_level = cpuid_eax(0); ++ ++ /* ++ * Copy all capability leafs to pick up the synthetic ones so that ++ * memcmp() below doesn't fail on that. The ones coming from CPUID will ++ * get overwritten in get_cpu_cap(). ++ */ ++ memcpy(&info.x86_capability, &boot_cpu_data.x86_capability, sizeof(info.x86_capability)); ++ ++ get_cpu_cap(&info); ++ ++ if (!memcmp(&info.x86_capability, &boot_cpu_data.x86_capability, sizeof(info.x86_capability))) ++ return; ++ ++ pr_warn("x86/CPU: CPU features have changed after loading microcode, but might not take effect.\n"); ++ pr_warn("x86/CPU: Please consider either early loading through initrd/built-in or a potential BIOS update.\n"); ++} +diff -uprN kernel/arch/x86/kernel/cpu/mtrr/cyrix.c kernel_new/arch/x86/kernel/cpu/mtrr/cyrix.c +--- kernel/arch/x86/kernel/cpu/mtrr/cyrix.c 2020-12-21 21:59:17.000000000 +0800 ++++ kernel_new/arch/x86/kernel/cpu/mtrr/cyrix.c 2021-04-01 18:28:07.655863287 +0800 +@@ -19,7 +19,7 @@ cyrix_get_arr(unsigned int reg, unsigned + + arr = CX86_ARR_BASE + (reg << 1) + reg; /* avoid multiplication by 3 */ + +- local_irq_save(flags); ++ flags = hard_local_irq_save(); + + ccr3 = getCx86(CX86_CCR3); + setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ +@@ -29,7 +29,7 @@ cyrix_get_arr(unsigned int reg, unsigned + rcr = getCx86(CX86_RCR_BASE + reg); + setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ + +- local_irq_restore(flags); ++ hard_local_irq_restore(flags); + + shift = ((unsigned char *) base)[1] & 0x0f; + *base >>= PAGE_SHIFT; +@@ -179,6 +179,7 @@ static void cyrix_set_arr(unsigned int r + unsigned long size, mtrr_type type) + { + unsigned char arr, arr_type, arr_size; ++ unsigned long flags; + + arr = CX86_ARR_BASE + (reg << 1) + reg; /* avoid multiplication by 3 */ + +@@ -222,6 +223,8 @@ static void cyrix_set_arr(unsigned int r + } + } + ++ flags = hard_local_irq_save(); ++ + prepare_set(); + + base <<= PAGE_SHIFT; +@@ -231,6 +234,8 @@ static void cyrix_set_arr(unsigned int r + setCx86(CX86_RCR_BASE + reg, arr_type); + + post_set(); ++ ++ hard_local_irq_restore(flags); + } + + typedef struct { +@@ -248,8 +253,10 @@ static unsigned char ccr_state[7] = { 0, + + static void cyrix_set_all(void) + { ++ unsigned long flags; + int i; + ++ flags = hard_local_irq_save(); + prepare_set(); + + /* the CCRs are not contiguous */ +@@ -264,6 +271,7 @@ static void cyrix_set_all(void) + } + + post_set(); ++ hard_local_irq_restore(flags); + } + + static const struct mtrr_ops cyrix_mtrr_ops = { +diff -uprN kernel/arch/x86/kernel/cpu/mtrr/generic.c kernel_new/arch/x86/kernel/cpu/mtrr/generic.c +--- kernel/arch/x86/kernel/cpu/mtrr/generic.c 2020-12-21 21:59:17.000000000 +0800 ++++ kernel_new/arch/x86/kernel/cpu/mtrr/generic.c 2021-04-01 18:28:07.655863287 +0800 +@@ -785,7 +785,7 @@ static void generic_set_all(void) + unsigned long mask, count; + unsigned long flags; + +- local_irq_save(flags); ++ flags = hard_local_irq_save(); + prepare_set(); + + /* Actually set the state */ +@@ -795,7 +795,7 @@ static void generic_set_all(void) + pat_init(); + + post_set(); +- local_irq_restore(flags); ++ hard_local_irq_restore(flags); + + /* Use the atomic bitops to update the global mask */ + for (count = 0; count < sizeof mask * 8; ++count) { +@@ -819,12 +819,13 @@ static void generic_set_all(void) + static void generic_set_mtrr(unsigned int reg, unsigned long base, + unsigned long size, mtrr_type type) + { +- unsigned long flags; ++ unsigned long rflags, vflags; + struct mtrr_var_range *vr; + + vr = &mtrr_state.var_ranges[reg]; + +- local_irq_save(flags); ++ local_irq_save(vflags); ++ rflags = hard_local_irq_save(); + prepare_set(); + + if (size == 0) { +@@ -845,7 +846,8 @@ static void generic_set_mtrr(unsigned in + } + + post_set(); +- local_irq_restore(flags); ++ hard_local_irq_restore(rflags); ++ local_irq_restore(vflags); + } + + int generic_validate_add_page(unsigned long base, unsigned long size, +diff -uprN kernel/arch/x86/kernel/fpu/core.c kernel_new/arch/x86/kernel/fpu/core.c +--- kernel/arch/x86/kernel/fpu/core.c 2020-12-21 21:59:17.000000000 +0800 ++++ kernel_new/arch/x86/kernel/fpu/core.c 2021-04-01 18:28:07.655863287 +0800 +@@ -35,30 +35,13 @@ union fpregs_state init_fpstate __read_m + * + * - to debug kernel_fpu_begin()/end() correctness + */ +-static DEFINE_PER_CPU(bool, in_kernel_fpu); ++DEFINE_PER_CPU(bool, in_kernel_fpu); + + /* + * Track which context is using the FPU on the CPU: + */ + DEFINE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx); + +-static void kernel_fpu_disable(void) +-{ +- WARN_ON_FPU(this_cpu_read(in_kernel_fpu)); +- this_cpu_write(in_kernel_fpu, true); +-} +- +-static void kernel_fpu_enable(void) +-{ +- WARN_ON_FPU(!this_cpu_read(in_kernel_fpu)); +- this_cpu_write(in_kernel_fpu, false); +-} +- +-static bool kernel_fpu_disabled(void) +-{ +- return this_cpu_read(in_kernel_fpu); +-} +- + static bool interrupted_kernel_fpu_idle(void) + { + return !kernel_fpu_disabled(); +@@ -96,9 +79,11 @@ EXPORT_SYMBOL(irq_fpu_usable); + static void __kernel_fpu_begin(void) + { + struct fpu *fpu = ¤t->thread.fpu; ++ unsigned long flags; + + WARN_ON_FPU(!irq_fpu_usable()); + ++ flags = hard_cond_local_irq_save(); + kernel_fpu_disable(); + + if (fpu->initialized) { +@@ -110,16 +95,20 @@ static void __kernel_fpu_begin(void) + } else { + __cpu_invalidate_fpregs_state(); + } ++ hard_cond_local_irq_restore(flags); + } + + static void __kernel_fpu_end(void) + { + struct fpu *fpu = ¤t->thread.fpu; ++ unsigned long flags; + ++ flags = hard_cond_local_irq_save(); + if (fpu->initialized) + copy_kernel_to_fpregs(&fpu->state); + + kernel_fpu_enable(); ++ hard_cond_local_irq_restore(flags); + } + + void kernel_fpu_begin(void) +@@ -143,9 +132,11 @@ EXPORT_SYMBOL_GPL(kernel_fpu_end); + */ + void fpu__save(struct fpu *fpu) + { ++ unsigned long flags; ++ + WARN_ON_FPU(fpu != ¤t->thread.fpu); + +- preempt_disable(); ++ flags = hard_preempt_disable(); + trace_x86_fpu_before_save(fpu); + if (fpu->initialized) { + if (!copy_fpregs_to_fpstate(fpu)) { +@@ -153,7 +144,7 @@ void fpu__save(struct fpu *fpu) + } + } + trace_x86_fpu_after_save(fpu); +- preempt_enable(); ++ hard_preempt_enable(flags); + } + EXPORT_SYMBOL_GPL(fpu__save); + +@@ -315,6 +306,9 @@ void fpu__prepare_write(struct fpu *fpu) + */ + void fpu__restore(struct fpu *fpu) + { ++ unsigned long flags; ++ ++ flags = hard_local_irq_save(); + fpu__initialize(fpu); + + /* Avoid __kernel_fpu_begin() right after fpregs_activate() */ +@@ -324,9 +318,18 @@ void fpu__restore(struct fpu *fpu) + copy_kernel_to_fpregs(&fpu->state); + trace_x86_fpu_after_restore(fpu); + kernel_fpu_enable(); ++ hard_local_irq_restore(flags); + } + EXPORT_SYMBOL_GPL(fpu__restore); + ++#ifdef CONFIG_IPIPE ++#define FWAIT_PROLOGUE "sti\n" ++#define FWAIT_EPILOGUE "cli\n" ++#else ++#define FWAIT_PROLOGUE ++#define FWAIT_EPILOGUE ++#endif ++ + /* + * Drops current FPU state: deactivates the fpregs and + * the fpstate. NOTE: it still leaves previous contents +@@ -338,13 +341,16 @@ EXPORT_SYMBOL_GPL(fpu__restore); + */ + void fpu__drop(struct fpu *fpu) + { +- preempt_disable(); ++ unsigned long flags; + ++ flags = hard_preempt_disable(); + if (fpu == ¤t->thread.fpu) { + if (fpu->initialized) { + /* Ignore delayed exceptions from user space */ +- asm volatile("1: fwait\n" ++ asm volatile(FWAIT_PROLOGUE ++ "1: fwait\n" + "2:\n" ++ FWAIT_EPILOGUE + _ASM_EXTABLE(1b, 2b)); + fpregs_deactivate(fpu); + } +@@ -354,7 +360,7 @@ void fpu__drop(struct fpu *fpu) + + trace_x86_fpu_dropped(fpu); + +- preempt_enable(); ++ hard_preempt_enable(flags); + } + + /* +@@ -382,6 +388,8 @@ static inline void copy_init_fpstate_to_ + */ + void fpu__clear(struct fpu *fpu) + { ++ unsigned long flags; ++ + WARN_ON_FPU(fpu != ¤t->thread.fpu); /* Almost certainly an anomaly */ + + fpu__drop(fpu); +@@ -390,11 +398,11 @@ void fpu__clear(struct fpu *fpu) + * Make sure fpstate is cleared and initialized. + */ + if (static_cpu_has(X86_FEATURE_FPU)) { +- preempt_disable(); ++ flags = hard_local_irq_save(); + fpu__initialize(fpu); + user_fpu_begin(); + copy_init_fpstate_to_fpregs(); +- preempt_enable(); ++ hard_local_irq_restore(flags); + } + } + +diff -uprN kernel/arch/x86/kernel/i8259.c kernel_new/arch/x86/kernel/i8259.c +--- kernel/arch/x86/kernel/i8259.c 2020-12-21 21:59:17.000000000 +0800 ++++ kernel_new/arch/x86/kernel/i8259.c 2021-04-01 18:28:07.655863287 +0800 +@@ -33,7 +33,7 @@ + static void init_8259A(int auto_eoi); + + static int i8259A_auto_eoi; +-DEFINE_RAW_SPINLOCK(i8259A_lock); ++IPIPE_DEFINE_RAW_SPINLOCK(i8259A_lock); + + /* + * 8259A PIC functions to handle ISA devices: +@@ -61,6 +61,7 @@ static void mask_8259A_irq(unsigned int + unsigned long flags; + + raw_spin_lock_irqsave(&i8259A_lock, flags); ++ ipipe_lock_irq(irq); + cached_irq_mask |= mask; + if (irq & 8) + outb(cached_slave_mask, PIC_SLAVE_IMR); +@@ -76,15 +77,18 @@ static void disable_8259A_irq(struct irq + + static void unmask_8259A_irq(unsigned int irq) + { +- unsigned int mask = ~(1 << irq); ++ unsigned int mask = (1 << irq); + unsigned long flags; + + raw_spin_lock_irqsave(&i8259A_lock, flags); +- cached_irq_mask &= mask; +- if (irq & 8) +- outb(cached_slave_mask, PIC_SLAVE_IMR); +- else +- outb(cached_master_mask, PIC_MASTER_IMR); ++ if (cached_irq_mask & mask) { ++ cached_irq_mask &= ~mask; ++ if (irq & 8) ++ outb(cached_slave_mask, PIC_SLAVE_IMR); ++ else ++ outb(cached_master_mask, PIC_MASTER_IMR); ++ ipipe_unlock_irq(irq); ++ } + raw_spin_unlock_irqrestore(&i8259A_lock, flags); + } + +@@ -171,6 +175,18 @@ static void mask_and_ack_8259A(struct ir + */ + if (cached_irq_mask & irqmask) + goto spurious_8259A_irq; ++#ifdef CONFIG_IPIPE ++ if (irq == 0) { ++ /* ++ * Fast timer ack -- don't mask (unless supposedly ++ * spurious). We trace outb's in order to detect ++ * broken hardware inducing large delays. ++ */ ++ outb(0x60, PIC_MASTER_CMD); /* Specific EOI to master. */ ++ raw_spin_unlock_irqrestore(&i8259A_lock, flags); ++ return; ++ } ++#endif /* CONFIG_IPIPE */ + cached_irq_mask |= irqmask; + + handle_real_irq: +@@ -227,6 +243,7 @@ struct irq_chip i8259A_chip = { + .irq_disable = disable_8259A_irq, + .irq_unmask = enable_8259A_irq, + .irq_mask_ack = mask_and_ack_8259A, ++ .flags = IRQCHIP_PIPELINE_SAFE, + }; + + static char irq_trigger[2]; +diff -uprN kernel/arch/x86/kernel/idt.c kernel_new/arch/x86/kernel/idt.c +--- kernel/arch/x86/kernel/idt.c 2020-12-21 21:59:17.000000000 +0800 ++++ kernel_new/arch/x86/kernel/idt.c 2021-04-01 18:28:07.655863287 +0800 +@@ -116,6 +116,10 @@ static const __initconst struct idt_data + INTG(CALL_FUNCTION_SINGLE_VECTOR, call_function_single_interrupt), + INTG(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt), + INTG(REBOOT_VECTOR, reboot_interrupt), ++#ifdef CONFIG_IPIPE ++ INTG(IPIPE_RESCHEDULE_VECTOR, ipipe_reschedule_interrupt), ++ INTG(IPIPE_CRITICAL_VECTOR, ipipe_critical_interrupt), ++#endif + #endif + + #ifdef CONFIG_X86_THERMAL_VECTOR +@@ -146,6 +150,9 @@ static const __initconst struct idt_data + #endif + INTG(SPURIOUS_APIC_VECTOR, spurious_interrupt), + INTG(ERROR_APIC_VECTOR, error_interrupt), ++#ifdef CONFIG_IPIPE ++ INTG(IPIPE_HRTIMER_VECTOR, ipipe_hrtimer_interrupt), ++#endif + #endif + }; + +@@ -310,9 +317,26 @@ void __init idt_setup_apic_and_irq_gates + { + int i = FIRST_EXTERNAL_VECTOR; + void *entry; ++ unsigned int __maybe_unused cpu, ret; + + idt_setup_from_table(idt_table, apic_idts, ARRAY_SIZE(apic_idts), true); + ++#if defined(CONFIG_SMP) && defined(CONFIG_IPIPE) ++ /* ++ * The cleanup vector is not part of the system vector range ++ * but rather belongs to the external IRQ range, however we ++ * still need to map it early to a legit interrupt number for ++ * pipelining. Allocate a specific descriptor manually for it, ++ * using IRQ_MOVE_CLEANUP_VECTOR as both the vector number and ++ * interrupt number, so that we know the latter at build time. ++ */ ++ ret = irq_alloc_descs(IRQ_MOVE_CLEANUP_VECTOR, 0, 1, 0); ++ BUG_ON(IRQ_MOVE_CLEANUP_VECTOR != ret); ++ for_each_possible_cpu(cpu) ++ per_cpu(vector_irq, cpu)[IRQ_MOVE_CLEANUP_VECTOR] = ++ irq_to_desc(IRQ_MOVE_CLEANUP_VECTOR); ++#endif ++ + for_each_clear_bit_from(i, system_vectors, FIRST_SYSTEM_VECTOR) { + entry = irq_entries_start + 8 * (i - FIRST_EXTERNAL_VECTOR); + set_intr_gate(i, entry); +diff -uprN kernel/arch/x86/kernel/ipipe.c kernel_new/arch/x86/kernel/ipipe.c +--- kernel/arch/x86/kernel/ipipe.c 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/arch/x86/kernel/ipipe.c 2021-04-01 18:28:07.655863287 +0800 +@@ -0,0 +1,564 @@ ++/* -*- linux-c -*- ++ * linux/arch/x86/kernel/ipipe.c ++ * ++ * Copyright (C) 2002-2012 Philippe Gerum. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, ++ * USA; either version 2 of the License, or (at your option) any later ++ * version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ * ++ * Architecture-dependent I-PIPE support for x86. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#ifdef CONFIG_X86_LOCAL_APIC ++#include ++#include ++#include ++#include ++#ifdef CONFIG_X86_IO_APIC ++#include ++#endif /* CONFIG_X86_IO_APIC */ ++#include ++#endif /* CONFIG_X86_LOCAL_APIC */ ++#include ++#include ++#include ++#include ++#include ++ ++void smp_apic_timer_interrupt(struct pt_regs *regs); ++void smp_kvm_posted_intr_wakeup_ipi(struct pt_regs *regs); ++void smp_kvm_posted_intr_ipi(struct pt_regs *regs); ++void smp_spurious_interrupt(struct pt_regs *regs); ++void smp_error_interrupt(struct pt_regs *regs); ++void smp_x86_platform_ipi(struct pt_regs *regs); ++void smp_irq_work_interrupt(struct pt_regs *regs); ++void smp_reschedule_interrupt(struct pt_regs *regs); ++void smp_call_function_interrupt(struct pt_regs *regs); ++void smp_call_function_single_interrupt(struct pt_regs *regs); ++void smp_irq_move_cleanup_interrupt(struct pt_regs *regs); ++void smp_reboot_interrupt(void); ++void smp_thermal_interrupt(struct pt_regs *regs); ++void smp_threshold_interrupt(struct pt_regs *regs); ++ ++DEFINE_PER_CPU(unsigned long, __ipipe_cr2); ++EXPORT_PER_CPU_SYMBOL_GPL(__ipipe_cr2); ++ ++int ipipe_get_sysinfo(struct ipipe_sysinfo *info) ++{ ++ info->sys_nr_cpus = num_online_cpus(); ++ info->sys_cpu_freq = __ipipe_cpu_freq; ++ info->sys_hrtimer_irq = per_cpu(ipipe_percpu.hrtimer_irq, 0); ++ info->sys_hrtimer_freq = __ipipe_hrtimer_freq; ++ info->sys_hrclock_freq = __ipipe_hrclock_freq; ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(ipipe_get_sysinfo); ++ ++static void __ipipe_do_IRQ(unsigned int irq, void *cookie) ++{ ++ void (*handler)(struct pt_regs *regs); ++ struct pt_regs *regs; ++ ++ regs = raw_cpu_ptr(&ipipe_percpu.tick_regs); ++ regs->orig_ax = ~__ipipe_get_irq_vector(irq); ++ handler = (typeof(handler))cookie; ++ handler(regs); ++} ++ ++#ifdef CONFIG_X86_LOCAL_APIC ++ ++static void __ipipe_noack_apic(struct irq_desc *desc) ++{ ++} ++ ++static void __ipipe_ack_apic(struct irq_desc *desc) ++{ ++ __ack_APIC_irq(); ++} ++ ++#endif /* CONFIG_X86_LOCAL_APIC */ ++ ++/* ++ * __ipipe_enable_pipeline() -- We are running on the boot CPU, hw ++ * interrupts are off, and secondary CPUs are still lost in space. ++ */ ++void __init __ipipe_enable_pipeline(void) ++{ ++ unsigned int irq; ++ ++#ifdef CONFIG_X86_LOCAL_APIC ++ ++ /* Map the APIC system vectors. */ ++ ++ ipipe_request_irq(ipipe_root_domain, ++ ipipe_apic_vector_irq(LOCAL_TIMER_VECTOR), ++ __ipipe_do_IRQ, smp_apic_timer_interrupt, ++ __ipipe_ack_apic); ++ ++#ifdef CONFIG_HAVE_KVM ++ ipipe_request_irq(ipipe_root_domain, ++ ipipe_apic_vector_irq(POSTED_INTR_WAKEUP_VECTOR), ++ __ipipe_do_IRQ, smp_kvm_posted_intr_wakeup_ipi, ++ __ipipe_ack_apic); ++ ++ ipipe_request_irq(ipipe_root_domain, ++ ipipe_apic_vector_irq(POSTED_INTR_VECTOR), ++ __ipipe_do_IRQ, smp_kvm_posted_intr_ipi, ++ __ipipe_ack_apic); ++#endif ++ ++#if defined(CONFIG_X86_MCE_AMD) && defined(CONFIG_X86_64) ++ ipipe_request_irq(ipipe_root_domain, ++ ipipe_apic_vector_irq(DEFERRED_ERROR_VECTOR), ++ __ipipe_do_IRQ, smp_deferred_error_interrupt, ++ __ipipe_ack_apic); ++#endif ++ ++#ifdef CONFIG_X86_UV ++ ipipe_request_irq(ipipe_root_domain, ++ ipipe_apic_vector_irq(UV_BAU_MESSAGE), ++ __ipipe_do_IRQ, uv_bau_message_interrupt, ++ __ipipe_ack_apic); ++#endif /* CONFIG_X86_UV */ ++ ++ ipipe_request_irq(ipipe_root_domain, ++ ipipe_apic_vector_irq(SPURIOUS_APIC_VECTOR), ++ __ipipe_do_IRQ, smp_spurious_interrupt, ++ __ipipe_noack_apic); ++ ++ ipipe_request_irq(ipipe_root_domain, ++ ipipe_apic_vector_irq(ERROR_APIC_VECTOR), ++ __ipipe_do_IRQ, smp_error_interrupt, ++ __ipipe_ack_apic); ++ ++#ifdef CONFIG_X86_THERMAL_VECTOR ++ ipipe_request_irq(ipipe_root_domain, ++ ipipe_apic_vector_irq(THERMAL_APIC_VECTOR), ++ __ipipe_do_IRQ, smp_thermal_interrupt, ++ __ipipe_ack_apic); ++#endif /* CONFIG_X86_THERMAL_VECTOR */ ++ ++#ifdef CONFIG_X86_MCE_THRESHOLD ++ ipipe_request_irq(ipipe_root_domain, ++ ipipe_apic_vector_irq(THRESHOLD_APIC_VECTOR), ++ __ipipe_do_IRQ, smp_threshold_interrupt, ++ __ipipe_ack_apic); ++#endif /* CONFIG_X86_MCE_THRESHOLD */ ++ ++ ipipe_request_irq(ipipe_root_domain, ++ ipipe_apic_vector_irq(X86_PLATFORM_IPI_VECTOR), ++ __ipipe_do_IRQ, smp_x86_platform_ipi, ++ __ipipe_ack_apic); ++ ++ /* ++ * We expose two high priority APIC vectors the head domain ++ * may use respectively for hires timing and SMP rescheduling. ++ * We should never receive them in the root domain. ++ */ ++ ipipe_request_irq(ipipe_root_domain, ++ ipipe_apic_vector_irq(IPIPE_HRTIMER_VECTOR), ++ __ipipe_do_IRQ, smp_spurious_interrupt, ++ __ipipe_ack_apic); ++ ++ ipipe_request_irq(ipipe_root_domain, ++ ipipe_apic_vector_irq(IPIPE_RESCHEDULE_VECTOR), ++ __ipipe_do_IRQ, smp_spurious_interrupt, ++ __ipipe_ack_apic); ++ ++#ifdef CONFIG_IRQ_WORK ++ ipipe_request_irq(ipipe_root_domain, ++ ipipe_apic_vector_irq(IRQ_WORK_VECTOR), ++ __ipipe_do_IRQ, smp_irq_work_interrupt, ++ __ipipe_ack_apic); ++#endif /* CONFIG_IRQ_WORK */ ++ ++#endif /* CONFIG_X86_LOCAL_APIC */ ++ ++#ifdef CONFIG_SMP ++ ipipe_request_irq(ipipe_root_domain, ++ ipipe_apic_vector_irq(RESCHEDULE_VECTOR), ++ __ipipe_do_IRQ, smp_reschedule_interrupt, ++ __ipipe_ack_apic); ++ ++ ipipe_request_irq(ipipe_root_domain, ++ ipipe_apic_vector_irq(CALL_FUNCTION_VECTOR), ++ __ipipe_do_IRQ, smp_call_function_interrupt, ++ __ipipe_ack_apic); ++ ++ ipipe_request_irq(ipipe_root_domain, ++ ipipe_apic_vector_irq(CALL_FUNCTION_SINGLE_VECTOR), ++ __ipipe_do_IRQ, smp_call_function_single_interrupt, ++ __ipipe_ack_apic); ++ ++ ipipe_request_irq(ipipe_root_domain, ++ IRQ_MOVE_CLEANUP_VECTOR, ++ __ipipe_do_IRQ, smp_irq_move_cleanup_interrupt, ++ __ipipe_ack_apic); ++ ++ ipipe_request_irq(ipipe_root_domain, ++ ipipe_apic_vector_irq(REBOOT_VECTOR), ++ __ipipe_do_IRQ, smp_reboot_interrupt, ++ __ipipe_ack_apic); ++#endif /* CONFIG_SMP */ ++ ++ /* ++ * Finally, request the remaining ISA and IO-APIC ++ * interrupts. Interrupts which have already been requested ++ * will just beget a silent -EBUSY error, that's ok. ++ */ ++ for (irq = 0; irq < IPIPE_NR_XIRQS; irq++) ++ ipipe_request_irq(ipipe_root_domain, irq, ++ __ipipe_do_IRQ, do_IRQ, ++ NULL); ++} ++ ++#ifdef CONFIG_SMP ++int irq_activate(struct irq_desc *desc); ++ ++int ipipe_set_irq_affinity(unsigned int irq, cpumask_t cpumask) ++{ ++ struct irq_desc *desc; ++ struct irq_chip *chip; ++ int err; ++ ++ cpumask_and(&cpumask, &cpumask, cpu_online_mask); ++ if (cpumask_empty(&cpumask) || ipipe_virtual_irq_p(irq)) ++ return -EINVAL; ++ ++ desc = irq_to_desc(irq); ++ if (desc == NULL) ++ return -EINVAL; ++ ++ chip = irq_desc_get_chip(desc); ++ if (chip->irq_set_affinity == NULL) ++ return -ENOSYS; ++ ++ err = irq_activate(desc); ++ if (err) ++ return err; ++ ++ chip->irq_set_affinity(irq_get_irq_data(irq), &cpumask, true); ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(ipipe_set_irq_affinity); ++ ++void ipipe_send_ipi(unsigned int ipi, cpumask_t cpumask) ++{ ++ unsigned long flags; ++ ++ flags = hard_local_irq_save(); ++ ++ cpumask_clear_cpu(ipipe_processor_id(), &cpumask); ++ if (likely(!cpumask_empty(&cpumask))) ++ apic->send_IPI_mask(&cpumask, ipipe_apic_irq_vector(ipi)); ++ ++ hard_local_irq_restore(flags); ++} ++EXPORT_SYMBOL_GPL(ipipe_send_ipi); ++ ++void __ipipe_hook_critical_ipi(struct ipipe_domain *ipd) ++{ ++ unsigned int ipi = IPIPE_CRITICAL_IPI; ++ ++ ipd->irqs[ipi].ackfn = __ipipe_ack_apic; ++ ipd->irqs[ipi].handler = __ipipe_do_critical_sync; ++ ipd->irqs[ipi].cookie = NULL; ++ ipd->irqs[ipi].control = IPIPE_HANDLE_MASK|IPIPE_STICKY_MASK; ++} ++ ++#endif /* CONFIG_SMP */ ++ ++void __ipipe_halt_root(int use_mwait) ++{ ++ struct ipipe_percpu_domain_data *p; ++ ++ /* Emulate sti+hlt sequence over the root domain. */ ++ ++ hard_local_irq_disable(); ++ ++ p = ipipe_this_cpu_root_context(); ++ ++ trace_hardirqs_on(); ++ __clear_bit(IPIPE_STALL_FLAG, &p->status); ++ ++ if (unlikely(__ipipe_ipending_p(p))) { ++ __ipipe_sync_stage(); ++ hard_local_irq_enable(); ++ } else { ++#ifdef CONFIG_IPIPE_TRACE_IRQSOFF ++ ipipe_trace_end(0x8000000E); ++#endif /* CONFIG_IPIPE_TRACE_IRQSOFF */ ++ if (use_mwait) ++ asm volatile("sti; .byte 0x0f, 0x01, 0xc9;" ++ :: "a" (0), "c" (0)); ++ else ++ asm volatile("sti; hlt": : :"memory"); ++ } ++} ++EXPORT_SYMBOL_GPL(__ipipe_halt_root); ++ ++static inline void __ipipe_fixup_if(bool stalled, struct pt_regs *regs) ++{ ++ /* ++ * Have the saved hw state look like the domain stall bit, so ++ * that __ipipe_unstall_iret_root() restores the proper ++ * pipeline state for the root stage upon exit. ++ */ ++ if (stalled) ++ regs->flags &= ~X86_EFLAGS_IF; ++ else ++ regs->flags |= X86_EFLAGS_IF; ++} ++ ++dotraplinkage int __ipipe_trap_prologue(struct pt_regs *regs, int trapnr, unsigned long *flags) ++{ ++ bool entry_irqs_off = hard_irqs_disabled(); ++ struct ipipe_domain *ipd; ++ unsigned long cr2; ++ ++ if (trapnr == X86_TRAP_PF) ++ cr2 = native_read_cr2(); ++ ++ /* ++ * KGDB and ftrace may poke int3/debug ops into the kernel ++ * code. Trap those exceptions early, do conditional fixups to ++ * the interrupt state depending on the current domain, let ++ * the regular handler see them. ++ */ ++ if (unlikely(!user_mode(regs) && ++ (trapnr == X86_TRAP_DB || trapnr == X86_TRAP_BP))) { ++ ++ if (ipipe_root_p) ++ goto root_fixup; ++ ++ /* ++ * Skip interrupt state fixup from the head domain, ++ * but do call the regular handler which is assumed to ++ * run fine within such context. ++ */ ++ return -1; ++ } ++ ++ /* ++ * Now that we have filtered out all debug traps which might ++ * happen anywhere in kernel code in theory, detect attempts ++ * to probe kernel memory (i.e. calls to probe_kernel_{read, ++ * write}()). If that happened over the head domain, do the ++ * fixup immediately then return right after upon success. If ++ * that fails, the kernel is likely to crash but let's follow ++ * the standard recovery procedure in that case anyway. ++ */ ++ if (unlikely(!ipipe_root_p && faulthandler_disabled())) { ++ if (fixup_exception(regs, trapnr)) ++ return 1; ++ } ++ ++ if (unlikely(__ipipe_notify_trap(trapnr, regs))) ++ return 1; ++ ++ if (likely(ipipe_root_p)) { ++ root_fixup: ++ /* ++ * If no head domain is installed, or in case we faulted in ++ * the iret path of x86-32, regs->flags does not match the root ++ * domain state. The fault handler may evaluate it. So fix this ++ * up with the current state. ++ */ ++ local_save_flags(*flags); ++ __ipipe_fixup_if(raw_irqs_disabled_flags(*flags), regs); ++ ++ /* ++ * Sync Linux interrupt state with hardware state on ++ * entry. ++ */ ++ if (entry_irqs_off) ++ local_irq_disable(); ++ } else { ++ /* Plan for restoring the original flags at fault. */ ++ *flags = regs->flags; ++ ++ /* ++ * Detect unhandled faults over the head domain, ++ * switching to root so that it can handle the fault ++ * cleanly. ++ */ ++ hard_local_irq_disable(); ++ ipd = __ipipe_current_domain; ++ __ipipe_set_current_domain(ipipe_root_domain); ++ ++ /* Sync Linux interrupt state with hardware state on entry. */ ++ if (entry_irqs_off) ++ local_irq_disable(); ++ ++ ipipe_trace_panic_freeze(); ++ ++ /* Always warn about user land and unfixable faults. */ ++ if (user_mode(regs) || ++ !search_exception_tables(instruction_pointer(regs))) { ++ printk(KERN_ERR "BUG: Unhandled exception over domain" ++ " %s at 0x%lx - switching to ROOT\n", ++ ipd->name, instruction_pointer(regs)); ++ dump_stack(); ++ ipipe_trace_panic_dump(); ++ } else if (IS_ENABLED(CONFIG_IPIPE_DEBUG)) { ++ /* Also report fixable ones when debugging is enabled. */ ++ printk(KERN_WARNING "WARNING: Fixable exception over " ++ "domain %s at 0x%lx - switching to ROOT\n", ++ ipd->name, instruction_pointer(regs)); ++ dump_stack(); ++ ipipe_trace_panic_dump(); ++ } ++ } ++ ++ if (trapnr == X86_TRAP_PF) ++ write_cr2(cr2); ++ ++ return 0; ++} ++ ++dotraplinkage ++void __ipipe_trap_epilogue(struct pt_regs *regs, ++ unsigned long flags, unsigned long regs_flags) ++{ ++ ipipe_restore_root(raw_irqs_disabled_flags(flags)); ++ __ipipe_fixup_if(raw_irqs_disabled_flags(regs_flags), regs); ++} ++ ++static inline int __ipipe_irq_from_vector(int vector, int *irq) ++{ ++ struct irq_desc *desc; ++ ++ if (vector >= FIRST_SYSTEM_VECTOR) { ++ *irq = ipipe_apic_vector_irq(vector); ++ return 0; ++ } ++ ++ desc = __this_cpu_read(vector_irq[vector]); ++ if (likely(!IS_ERR_OR_NULL(desc))) { ++ *irq = irq_desc_get_irq(desc); ++ return 0; ++ } ++ ++ if (vector == IRQ_MOVE_CLEANUP_VECTOR) { ++ *irq = vector; ++ return 0; ++ } ++ ++#ifdef CONFIG_X86_LOCAL_APIC ++ __ack_APIC_irq(); ++#endif ++ pr_err("unexpected IRQ trap at vector %#x\n", vector); ++ return -1; ++} ++ ++int __ipipe_handle_irq(struct pt_regs *regs) ++{ ++ struct ipipe_percpu_data *p = __ipipe_raw_cpu_ptr(&ipipe_percpu); ++ int irq, vector = regs->orig_ax, flags = 0; ++ struct pt_regs *tick_regs; ++ ++ if (likely(vector < 0)) { ++ if (__ipipe_irq_from_vector(~vector, &irq) < 0) ++ goto out; ++ } else { /* Software-generated. */ ++ irq = vector; ++ flags = IPIPE_IRQF_NOACK; ++ } ++ ++ ipipe_trace_irqbegin(irq, regs); ++ ++ /* ++ * Given our deferred dispatching model for regular IRQs, we ++ * only record CPU regs for the last timer interrupt, so that ++ * the timer handler charges CPU times properly. It is assumed ++ * that no other interrupt handler cares for such information. ++ */ ++ if (irq == p->hrtimer_irq || p->hrtimer_irq == -1) { ++ tick_regs = &p->tick_regs; ++ tick_regs->flags = regs->flags; ++ tick_regs->cs = regs->cs; ++ tick_regs->ip = regs->ip; ++ tick_regs->bp = regs->bp; ++#ifdef CONFIG_X86_64 ++ tick_regs->ss = regs->ss; ++ tick_regs->sp = regs->sp; ++#endif ++ if (!__ipipe_root_p) ++ tick_regs->flags &= ~X86_EFLAGS_IF; ++ } ++ ++ __ipipe_dispatch_irq(irq, flags); ++ ++ if (user_mode(regs) && ipipe_test_thread_flag(TIP_MAYDAY)) ++ __ipipe_call_mayday(regs); ++ ++ ipipe_trace_irqend(irq, regs); ++ ++out: ++ if (!__ipipe_root_p || ++ test_bit(IPIPE_STALL_FLAG, &__ipipe_root_status)) ++ return 0; ++ ++ return 1; ++} ++ ++void __ipipe_arch_share_current(int flags) ++{ ++ struct task_struct *p = current; ++ ++ /* ++ * Setup a clean extended FPU state for kernel threads. ++ */ ++ if (p->mm == NULL) ++ memcpy(&p->thread.fpu.state, ++ &init_fpstate, fpu_kernel_xstate_size); ++} ++ ++struct task_struct *__switch_to(struct task_struct *prev_p, ++ struct task_struct *next_p); ++EXPORT_SYMBOL_GPL(do_munmap); ++EXPORT_SYMBOL_GPL(__switch_to); ++EXPORT_SYMBOL_GPL(show_stack); ++ ++#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) ++EXPORT_SYMBOL(tasklist_lock); ++#endif /* CONFIG_SMP || CONFIG_DEBUG_SPINLOCK */ ++ ++#if defined(CONFIG_CC_STACKPROTECTOR) && defined(CONFIG_X86_64) ++EXPORT_PER_CPU_SYMBOL_GPL(irq_stack_union); ++#endif +diff -uprN kernel/arch/x86/kernel/irq_64.c kernel_new/arch/x86/kernel/irq_64.c +--- kernel/arch/x86/kernel/irq_64.c 2020-12-21 21:59:17.000000000 +0800 ++++ kernel_new/arch/x86/kernel/irq_64.c 2021-04-01 18:28:07.655863287 +0800 +@@ -47,28 +47,30 @@ static inline void stack_overflow_check( + u64 irq_stack_top, irq_stack_bottom; + u64 estack_top, estack_bottom; + u64 curbase = (u64)task_stack_page(current); ++ unsigned long sp; + + if (user_mode(regs)) + return; + +- if (regs->sp >= curbase + sizeof(struct pt_regs) + STACK_TOP_MARGIN && +- regs->sp <= curbase + THREAD_SIZE) ++ sp = IS_ENABLED(CONFIG_IPIPE) ? current_stack_pointer : regs->sp; ++ if (sp >= curbase + sizeof(struct pt_regs) + STACK_TOP_MARGIN && ++ sp <= curbase + THREAD_SIZE) + return; + + irq_stack_top = (u64)this_cpu_ptr(irq_stack_union.irq_stack) + + STACK_TOP_MARGIN; + irq_stack_bottom = (u64)__this_cpu_read(irq_stack_ptr); +- if (regs->sp >= irq_stack_top && regs->sp <= irq_stack_bottom) ++ if (sp >= irq_stack_top && sp <= irq_stack_bottom) + return; + + oist = this_cpu_ptr(&orig_ist); + estack_bottom = (u64)oist->ist[DEBUG_STACK]; + estack_top = estack_bottom - DEBUG_STKSZ + STACK_TOP_MARGIN; +- if (regs->sp >= estack_top && regs->sp <= estack_bottom) ++ if (sp >= estack_top && sp <= estack_bottom) + return; + + WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx,ip:%pF)\n", +- current->comm, curbase, regs->sp, ++ current->comm, curbase, sp, + irq_stack_top, irq_stack_bottom, + estack_top, estack_bottom, (void *)regs->ip); + +diff -uprN kernel/arch/x86/kernel/irq.c kernel_new/arch/x86/kernel/irq.c +--- kernel/arch/x86/kernel/irq.c 2020-12-21 21:59:17.000000000 +0800 ++++ kernel_new/arch/x86/kernel/irq.c 2021-04-01 18:28:07.656863286 +0800 +@@ -48,7 +48,7 @@ void ack_bad_irq(unsigned int irq) + * completely. + * But only ack when the APIC is enabled -AK + */ +- ack_APIC_irq(); ++ __ack_APIC_irq(); + } + + #define irq_stats(x) (&per_cpu(irq_stat, x)) +@@ -236,12 +236,13 @@ __visible unsigned int __irq_entry do_IR + /* high bit used in ret_from_ code */ + unsigned vector = ~regs->orig_ax; + ++ desc = __this_cpu_read(vector_irq[vector]); ++ __ipipe_move_root_irq(desc); + entering_irq(); + + /* entering_irq() tells RCU that we're not quiescent. Check it. */ + RCU_LOCKDEP_WARN(!rcu_is_watching(), "IRQ failed to wake up RCU"); + +- desc = __this_cpu_read(vector_irq[vector]); + + if (!handle_irq(desc, regs)) { + ack_APIC_irq(); +diff -uprN kernel/arch/x86/kernel/kgdb.c kernel_new/arch/x86/kernel/kgdb.c +--- kernel/arch/x86/kernel/kgdb.c 2020-12-21 21:59:17.000000000 +0800 ++++ kernel_new/arch/x86/kernel/kgdb.c 2021-04-01 18:28:07.656863286 +0800 +@@ -598,9 +598,9 @@ kgdb_notify(struct notifier_block *self, + unsigned long flags; + int ret; + +- local_irq_save(flags); ++ flags = hard_local_irq_save(); + ret = __kgdb_notify(ptr, cmd); +- local_irq_restore(flags); ++ hard_local_irq_restore(flags); + + return ret; + } +diff -uprN kernel/arch/x86/kernel/Makefile kernel_new/arch/x86/kernel/Makefile +--- kernel/arch/x86/kernel/Makefile 2020-12-21 21:59:17.000000000 +0800 ++++ kernel_new/arch/x86/kernel/Makefile 2021-04-01 18:28:07.656863286 +0800 +@@ -79,6 +79,7 @@ obj-y += reboot.o + obj-$(CONFIG_X86_MSR) += msr.o + obj-$(CONFIG_X86_CPUID) += cpuid.o + obj-$(CONFIG_PCI) += early-quirks.o ++obj-$(CONFIG_IPIPE) += ipipe.o + apm-y := apm_32.o + obj-$(CONFIG_APM) += apm.o + obj-$(CONFIG_SMP) += smp.o +diff -uprN kernel/arch/x86/kernel/process_64.c kernel_new/arch/x86/kernel/process_64.c +--- kernel/arch/x86/kernel/process_64.c 2020-12-21 21:59:17.000000000 +0800 ++++ kernel_new/arch/x86/kernel/process_64.c 2021-04-01 18:28:07.656863286 +0800 +@@ -431,7 +431,7 @@ __switch_to(struct task_struct *prev_p, + struct thread_struct *next = &next_p->thread; + struct fpu *prev_fpu = &prev->fpu; + struct fpu *next_fpu = &next->fpu; +- int cpu = smp_processor_id(); ++ int cpu = raw_smp_processor_id(); + + WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) && + this_cpu_read(irq_count) != -1); +diff -uprN kernel/arch/x86/kernel/process.c kernel_new/arch/x86/kernel/process.c +--- kernel/arch/x86/kernel/process.c 2020-12-21 21:59:17.000000000 +0800 ++++ kernel_new/arch/x86/kernel/process.c 2021-04-01 18:28:07.656863286 +0800 +@@ -113,8 +113,16 @@ void exit_thread(struct task_struct *tsk + if (bp) { + struct tss_struct *tss = &per_cpu(cpu_tss_rw, get_cpu()); + +- t->io_bitmap_ptr = NULL; ++ /* ++ * The caller may be preempted via I-pipe: to make ++ * sure TIF_IO_BITMAP always denotes a valid I/O ++ * bitmap when set, we clear it _before_ the I/O ++ * bitmap pointer. No cache coherence issue ahead as ++ * migration is currently locked (the primary domain ++ * may never migrate either). ++ */ + clear_thread_flag(TIF_IO_BITMAP); ++ t->io_bitmap_ptr = NULL; + /* + * Careful, clear this in the TSS too: + */ +@@ -411,7 +419,9 @@ static __always_inline void __speculatio + u64 msr = x86_spec_ctrl_base; + bool updmsr = false; + ++#ifndef CONFIG_IPIPE + lockdep_assert_irqs_disabled(); ++#endif + + /* Handle change of TIF_SSBD depending on the mitigation method. */ + if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) { +@@ -459,9 +469,9 @@ void speculation_ctrl_update(unsigned lo + unsigned long flags; + + /* Forced update. Make sure all relevant TIF flags are different */ +- local_irq_save(flags); ++ flags = hard_local_irq_save(); + __speculation_ctrl_update(~tif, tif); +- local_irq_restore(flags); ++ hard_local_irq_restore(flags); + } + + /* Called from seccomp/prctl update */ +@@ -574,7 +584,7 @@ bool xen_set_default_idle(void) + + void stop_this_cpu(void *dummy) + { +- local_irq_disable(); ++ hard_local_irq_disable(); + /* + * Remove this CPU: + */ +@@ -670,7 +680,11 @@ static __cpuidle void mwait_idle(void) + + __monitor((void *)¤t_thread_info()->flags, 0, 0); + if (!need_resched()) ++#ifdef CONFIG_IPIPE ++ __ipipe_halt_root(1); ++#else + __sti_mwait(0, 0); ++#endif + else + local_irq_enable(); + trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); +@@ -730,6 +744,10 @@ void __init arch_post_acpi_subsys_init(v + if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) + mark_tsc_unstable("TSC halt in AMD C1E"); + pr_info("System has AMD C1E enabled\n"); ++#ifdef CONFIG_IPIPE ++ pr_info("I-pipe: will not be able to use LAPIC as a tick device\n" ++ "I-pipe: disable C1E power state in your BIOS\n"); ++#endif + } + + static int __init idle_setup(char *str) +diff -uprN kernel/arch/x86/kernel/process.c.orig kernel_new/arch/x86/kernel/process.c.orig +--- kernel/arch/x86/kernel/process.c.orig 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/arch/x86/kernel/process.c.orig 2020-12-21 21:59:17.000000000 +0800 +@@ -0,0 +1,854 @@ ++// SPDX-License-Identifier: GPL-2.0 ++#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "process.h" ++ ++/* ++ * per-CPU TSS segments. Threads are completely 'soft' on Linux, ++ * no more per-task TSS's. The TSS size is kept cacheline-aligned ++ * so they are allowed to end up in the .data..cacheline_aligned ++ * section. Since TSS's are completely CPU-local, we want them ++ * on exact cacheline boundaries, to eliminate cacheline ping-pong. ++ */ ++__visible DEFINE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw) = { ++ .x86_tss = { ++ /* ++ * .sp0 is only used when entering ring 0 from a lower ++ * privilege level. Since the init task never runs anything ++ * but ring 0 code, there is no need for a valid value here. ++ * Poison it. ++ */ ++ .sp0 = (1UL << (BITS_PER_LONG-1)) + 1, ++ ++ /* ++ * .sp1 is cpu_current_top_of_stack. The init task never ++ * runs user code, but cpu_current_top_of_stack should still ++ * be well defined before the first context switch. ++ */ ++ .sp1 = TOP_OF_INIT_STACK, ++ ++#ifdef CONFIG_X86_32 ++ .ss0 = __KERNEL_DS, ++ .ss1 = __KERNEL_CS, ++ .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, ++#endif ++ }, ++#ifdef CONFIG_X86_32 ++ /* ++ * Note that the .io_bitmap member must be extra-big. This is because ++ * the CPU will access an additional byte beyond the end of the IO ++ * permission bitmap. The extra byte must be all 1 bits, and must ++ * be within the limit. ++ */ ++ .io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 }, ++#endif ++}; ++EXPORT_PER_CPU_SYMBOL(cpu_tss_rw); ++ ++DEFINE_PER_CPU(bool, __tss_limit_invalid); ++EXPORT_PER_CPU_SYMBOL_GPL(__tss_limit_invalid); ++ ++/* ++ * this gets called so that we can store lazy state into memory and copy the ++ * current task into the new thread. ++ */ ++int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) ++{ ++ memcpy(dst, src, arch_task_struct_size); ++#ifdef CONFIG_VM86 ++ dst->thread.vm86 = NULL; ++#endif ++ ++ return fpu__copy(&dst->thread.fpu, &src->thread.fpu); ++} ++ ++/* ++ * Free current thread data structures etc.. ++ */ ++void exit_thread(struct task_struct *tsk) ++{ ++ struct thread_struct *t = &tsk->thread; ++ unsigned long *bp = t->io_bitmap_ptr; ++ struct fpu *fpu = &t->fpu; ++ ++ if (bp) { ++ struct tss_struct *tss = &per_cpu(cpu_tss_rw, get_cpu()); ++ ++ t->io_bitmap_ptr = NULL; ++ clear_thread_flag(TIF_IO_BITMAP); ++ /* ++ * Careful, clear this in the TSS too: ++ */ ++ memset(tss->io_bitmap, 0xff, t->io_bitmap_max); ++ t->io_bitmap_max = 0; ++ put_cpu(); ++ kfree(bp); ++ } ++ ++ free_vm86(t); ++ ++ fpu__drop(fpu); ++} ++ ++void flush_thread(void) ++{ ++ struct task_struct *tsk = current; ++ ++ flush_ptrace_hw_breakpoint(tsk); ++ memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); ++ ++ fpu__clear(&tsk->thread.fpu); ++} ++ ++void disable_TSC(void) ++{ ++ preempt_disable(); ++ if (!test_and_set_thread_flag(TIF_NOTSC)) ++ /* ++ * Must flip the CPU state synchronously with ++ * TIF_NOTSC in the current running context. ++ */ ++ cr4_set_bits(X86_CR4_TSD); ++ preempt_enable(); ++} ++ ++static void enable_TSC(void) ++{ ++ preempt_disable(); ++ if (test_and_clear_thread_flag(TIF_NOTSC)) ++ /* ++ * Must flip the CPU state synchronously with ++ * TIF_NOTSC in the current running context. ++ */ ++ cr4_clear_bits(X86_CR4_TSD); ++ preempt_enable(); ++} ++ ++int get_tsc_mode(unsigned long adr) ++{ ++ unsigned int val; ++ ++ if (test_thread_flag(TIF_NOTSC)) ++ val = PR_TSC_SIGSEGV; ++ else ++ val = PR_TSC_ENABLE; ++ ++ return put_user(val, (unsigned int __user *)adr); ++} ++ ++int set_tsc_mode(unsigned int val) ++{ ++ if (val == PR_TSC_SIGSEGV) ++ disable_TSC(); ++ else if (val == PR_TSC_ENABLE) ++ enable_TSC(); ++ else ++ return -EINVAL; ++ ++ return 0; ++} ++ ++DEFINE_PER_CPU(u64, msr_misc_features_shadow); ++ ++static void set_cpuid_faulting(bool on) ++{ ++ u64 msrval; ++ ++ msrval = this_cpu_read(msr_misc_features_shadow); ++ msrval &= ~MSR_MISC_FEATURES_ENABLES_CPUID_FAULT; ++ msrval |= (on << MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT); ++ this_cpu_write(msr_misc_features_shadow, msrval); ++ wrmsrl(MSR_MISC_FEATURES_ENABLES, msrval); ++} ++ ++static void disable_cpuid(void) ++{ ++ preempt_disable(); ++ if (!test_and_set_thread_flag(TIF_NOCPUID)) { ++ /* ++ * Must flip the CPU state synchronously with ++ * TIF_NOCPUID in the current running context. ++ */ ++ set_cpuid_faulting(true); ++ } ++ preempt_enable(); ++} ++ ++static void enable_cpuid(void) ++{ ++ preempt_disable(); ++ if (test_and_clear_thread_flag(TIF_NOCPUID)) { ++ /* ++ * Must flip the CPU state synchronously with ++ * TIF_NOCPUID in the current running context. ++ */ ++ set_cpuid_faulting(false); ++ } ++ preempt_enable(); ++} ++ ++static int get_cpuid_mode(void) ++{ ++ return !test_thread_flag(TIF_NOCPUID); ++} ++ ++static int set_cpuid_mode(struct task_struct *task, unsigned long cpuid_enabled) ++{ ++ if (!static_cpu_has(X86_FEATURE_CPUID_FAULT)) ++ return -ENODEV; ++ ++ if (cpuid_enabled) ++ enable_cpuid(); ++ else ++ disable_cpuid(); ++ ++ return 0; ++} ++ ++/* ++ * Called immediately after a successful exec. ++ */ ++void arch_setup_new_exec(void) ++{ ++ /* If cpuid was previously disabled for this task, re-enable it. */ ++ if (test_thread_flag(TIF_NOCPUID)) ++ enable_cpuid(); ++} ++ ++static inline void switch_to_bitmap(struct thread_struct *prev, ++ struct thread_struct *next, ++ unsigned long tifp, unsigned long tifn) ++{ ++ struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw); ++ ++ if (tifn & _TIF_IO_BITMAP) { ++ /* ++ * Copy the relevant range of the IO bitmap. ++ * Normally this is 128 bytes or less: ++ */ ++ memcpy(tss->io_bitmap, next->io_bitmap_ptr, ++ max(prev->io_bitmap_max, next->io_bitmap_max)); ++ /* ++ * Make sure that the TSS limit is correct for the CPU ++ * to notice the IO bitmap. ++ */ ++ refresh_tss_limit(); ++ } else if (tifp & _TIF_IO_BITMAP) { ++ /* ++ * Clear any possible leftover bits: ++ */ ++ memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); ++ } ++} ++ ++#ifdef CONFIG_SMP ++ ++struct ssb_state { ++ struct ssb_state *shared_state; ++ raw_spinlock_t lock; ++ unsigned int disable_state; ++ unsigned long local_state; ++}; ++ ++#define LSTATE_SSB 0 ++ ++static DEFINE_PER_CPU(struct ssb_state, ssb_state); ++ ++void speculative_store_bypass_ht_init(void) ++{ ++ struct ssb_state *st = this_cpu_ptr(&ssb_state); ++ unsigned int this_cpu = smp_processor_id(); ++ unsigned int cpu; ++ ++ st->local_state = 0; ++ ++ /* ++ * Shared state setup happens once on the first bringup ++ * of the CPU. It's not destroyed on CPU hotunplug. ++ */ ++ if (st->shared_state) ++ return; ++ ++ raw_spin_lock_init(&st->lock); ++ ++ /* ++ * Go over HT siblings and check whether one of them has set up the ++ * shared state pointer already. ++ */ ++ for_each_cpu(cpu, topology_sibling_cpumask(this_cpu)) { ++ if (cpu == this_cpu) ++ continue; ++ ++ if (!per_cpu(ssb_state, cpu).shared_state) ++ continue; ++ ++ /* Link it to the state of the sibling: */ ++ st->shared_state = per_cpu(ssb_state, cpu).shared_state; ++ return; ++ } ++ ++ /* ++ * First HT sibling to come up on the core. Link shared state of ++ * the first HT sibling to itself. The siblings on the same core ++ * which come up later will see the shared state pointer and link ++ * themself to the state of this CPU. ++ */ ++ st->shared_state = st; ++} ++ ++/* ++ * Logic is: First HT sibling enables SSBD for both siblings in the core ++ * and last sibling to disable it, disables it for the whole core. This how ++ * MSR_SPEC_CTRL works in "hardware": ++ * ++ * CORE_SPEC_CTRL = THREAD0_SPEC_CTRL | THREAD1_SPEC_CTRL ++ */ ++static __always_inline void amd_set_core_ssb_state(unsigned long tifn) ++{ ++ struct ssb_state *st = this_cpu_ptr(&ssb_state); ++ u64 msr = x86_amd_ls_cfg_base; ++ ++ if (!static_cpu_has(X86_FEATURE_ZEN)) { ++ msr |= ssbd_tif_to_amd_ls_cfg(tifn); ++ wrmsrl(MSR_AMD64_LS_CFG, msr); ++ return; ++ } ++ ++ if (tifn & _TIF_SSBD) { ++ /* ++ * Since this can race with prctl(), block reentry on the ++ * same CPU. ++ */ ++ if (__test_and_set_bit(LSTATE_SSB, &st->local_state)) ++ return; ++ ++ msr |= x86_amd_ls_cfg_ssbd_mask; ++ ++ raw_spin_lock(&st->shared_state->lock); ++ /* First sibling enables SSBD: */ ++ if (!st->shared_state->disable_state) ++ wrmsrl(MSR_AMD64_LS_CFG, msr); ++ st->shared_state->disable_state++; ++ raw_spin_unlock(&st->shared_state->lock); ++ } else { ++ if (!__test_and_clear_bit(LSTATE_SSB, &st->local_state)) ++ return; ++ ++ raw_spin_lock(&st->shared_state->lock); ++ st->shared_state->disable_state--; ++ if (!st->shared_state->disable_state) ++ wrmsrl(MSR_AMD64_LS_CFG, msr); ++ raw_spin_unlock(&st->shared_state->lock); ++ } ++} ++#else ++static __always_inline void amd_set_core_ssb_state(unsigned long tifn) ++{ ++ u64 msr = x86_amd_ls_cfg_base | ssbd_tif_to_amd_ls_cfg(tifn); ++ ++ wrmsrl(MSR_AMD64_LS_CFG, msr); ++} ++#endif ++ ++static __always_inline void amd_set_ssb_virt_state(unsigned long tifn) ++{ ++ /* ++ * SSBD has the same definition in SPEC_CTRL and VIRT_SPEC_CTRL, ++ * so ssbd_tif_to_spec_ctrl() just works. ++ */ ++ wrmsrl(MSR_AMD64_VIRT_SPEC_CTRL, ssbd_tif_to_spec_ctrl(tifn)); ++} ++ ++/* ++ * Update the MSRs managing speculation control, during context switch. ++ * ++ * tifp: Previous task's thread flags ++ * tifn: Next task's thread flags ++ */ ++static __always_inline void __speculation_ctrl_update(unsigned long tifp, ++ unsigned long tifn) ++{ ++ unsigned long tif_diff = tifp ^ tifn; ++ u64 msr = x86_spec_ctrl_base; ++ bool updmsr = false; ++ ++ lockdep_assert_irqs_disabled(); ++ ++ /* Handle change of TIF_SSBD depending on the mitigation method. */ ++ if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) { ++ if (tif_diff & _TIF_SSBD) ++ amd_set_ssb_virt_state(tifn); ++ } else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) { ++ if (tif_diff & _TIF_SSBD) ++ amd_set_core_ssb_state(tifn); ++ } else if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) || ++ static_cpu_has(X86_FEATURE_AMD_SSBD)) { ++ updmsr |= !!(tif_diff & _TIF_SSBD); ++ msr |= ssbd_tif_to_spec_ctrl(tifn); ++ } ++ ++ /* Only evaluate TIF_SPEC_IB if conditional STIBP is enabled. */ ++ if (IS_ENABLED(CONFIG_SMP) && ++ static_branch_unlikely(&switch_to_cond_stibp)) { ++ updmsr |= !!(tif_diff & _TIF_SPEC_IB); ++ msr |= stibp_tif_to_spec_ctrl(tifn); ++ } ++ ++ if (updmsr) ++ wrmsrl(MSR_IA32_SPEC_CTRL, msr); ++} ++ ++static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk) ++{ ++ if (test_and_clear_tsk_thread_flag(tsk, TIF_SPEC_FORCE_UPDATE)) { ++ if (task_spec_ssb_disable(tsk)) ++ set_tsk_thread_flag(tsk, TIF_SSBD); ++ else ++ clear_tsk_thread_flag(tsk, TIF_SSBD); ++ ++ if (task_spec_ib_disable(tsk)) ++ set_tsk_thread_flag(tsk, TIF_SPEC_IB); ++ else ++ clear_tsk_thread_flag(tsk, TIF_SPEC_IB); ++ } ++ /* Return the updated threadinfo flags*/ ++ return task_thread_info(tsk)->flags; ++} ++ ++void speculation_ctrl_update(unsigned long tif) ++{ ++ unsigned long flags; ++ ++ /* Forced update. Make sure all relevant TIF flags are different */ ++ local_irq_save(flags); ++ __speculation_ctrl_update(~tif, tif); ++ local_irq_restore(flags); ++} ++ ++/* Called from seccomp/prctl update */ ++void speculation_ctrl_update_current(void) ++{ ++ preempt_disable(); ++ speculation_ctrl_update(speculation_ctrl_update_tif(current)); ++ preempt_enable(); ++} ++ ++void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p) ++{ ++ struct thread_struct *prev, *next; ++ unsigned long tifp, tifn; ++ ++ prev = &prev_p->thread; ++ next = &next_p->thread; ++ ++ tifn = READ_ONCE(task_thread_info(next_p)->flags); ++ tifp = READ_ONCE(task_thread_info(prev_p)->flags); ++ switch_to_bitmap(prev, next, tifp, tifn); ++ ++ propagate_user_return_notify(prev_p, next_p); ++ ++ if ((tifp & _TIF_BLOCKSTEP || tifn & _TIF_BLOCKSTEP) && ++ arch_has_block_step()) { ++ unsigned long debugctl, msk; ++ ++ rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); ++ debugctl &= ~DEBUGCTLMSR_BTF; ++ msk = tifn & _TIF_BLOCKSTEP; ++ debugctl |= (msk >> TIF_BLOCKSTEP) << DEBUGCTLMSR_BTF_SHIFT; ++ wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); ++ } ++ ++ if ((tifp ^ tifn) & _TIF_NOTSC) ++ cr4_toggle_bits_irqsoff(X86_CR4_TSD); ++ ++ if ((tifp ^ tifn) & _TIF_NOCPUID) ++ set_cpuid_faulting(!!(tifn & _TIF_NOCPUID)); ++ ++ if (likely(!((tifp | tifn) & _TIF_SPEC_FORCE_UPDATE))) { ++ __speculation_ctrl_update(tifp, tifn); ++ } else { ++ speculation_ctrl_update_tif(prev_p); ++ tifn = speculation_ctrl_update_tif(next_p); ++ ++ /* Enforce MSR update to ensure consistent state */ ++ __speculation_ctrl_update(~tifn, tifn); ++ } ++} ++ ++/* ++ * Idle related variables and functions ++ */ ++unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE; ++EXPORT_SYMBOL(boot_option_idle_override); ++ ++static void (*x86_idle)(void); ++ ++#ifndef CONFIG_SMP ++static inline void play_dead(void) ++{ ++ BUG(); ++} ++#endif ++ ++void arch_cpu_idle_enter(void) ++{ ++ tsc_verify_tsc_adjust(false); ++ local_touch_nmi(); ++} ++ ++void arch_cpu_idle_dead(void) ++{ ++ play_dead(); ++} ++ ++/* ++ * Called from the generic idle code. ++ */ ++void arch_cpu_idle(void) ++{ ++ x86_idle(); ++} ++ ++/* ++ * We use this if we don't have any better idle routine.. ++ */ ++void __cpuidle default_idle(void) ++{ ++ trace_cpu_idle_rcuidle(1, smp_processor_id()); ++ safe_halt(); ++ trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); ++} ++#ifdef CONFIG_APM_MODULE ++EXPORT_SYMBOL(default_idle); ++#endif ++ ++#ifdef CONFIG_XEN ++bool xen_set_default_idle(void) ++{ ++ bool ret = !!x86_idle; ++ ++ x86_idle = default_idle; ++ ++ return ret; ++} ++#endif ++ ++void stop_this_cpu(void *dummy) ++{ ++ local_irq_disable(); ++ /* ++ * Remove this CPU: ++ */ ++ set_cpu_online(smp_processor_id(), false); ++ disable_local_APIC(); ++ mcheck_cpu_clear(this_cpu_ptr(&cpu_info)); ++ ++ /* ++ * Use wbinvd on processors that support SME. This provides support ++ * for performing a successful kexec when going from SME inactive ++ * to SME active (or vice-versa). The cache must be cleared so that ++ * if there are entries with the same physical address, both with and ++ * without the encryption bit, they don't race each other when flushed ++ * and potentially end up with the wrong entry being committed to ++ * memory. ++ */ ++ if (boot_cpu_has(X86_FEATURE_SME)) ++ native_wbinvd(); ++ for (;;) { ++ /* ++ * Use native_halt() so that memory contents don't change ++ * (stack usage and variables) after possibly issuing the ++ * native_wbinvd() above. ++ */ ++ native_halt(); ++ } ++} ++ ++/* ++ * AMD Erratum 400 aware idle routine. We handle it the same way as C3 power ++ * states (local apic timer and TSC stop). ++ */ ++static void amd_e400_idle(void) ++{ ++ /* ++ * We cannot use static_cpu_has_bug() here because X86_BUG_AMD_APIC_C1E ++ * gets set after static_cpu_has() places have been converted via ++ * alternatives. ++ */ ++ if (!boot_cpu_has_bug(X86_BUG_AMD_APIC_C1E)) { ++ default_idle(); ++ return; ++ } ++ ++ tick_broadcast_enter(); ++ ++ default_idle(); ++ ++ /* ++ * The switch back from broadcast mode needs to be called with ++ * interrupts disabled. ++ */ ++ local_irq_disable(); ++ tick_broadcast_exit(); ++ local_irq_enable(); ++} ++ ++/* ++ * Intel Core2 and older machines prefer MWAIT over HALT for C1. ++ * We can't rely on cpuidle installing MWAIT, because it will not load ++ * on systems that support only C1 -- so the boot default must be MWAIT. ++ * ++ * Some AMD machines are the opposite, they depend on using HALT. ++ * ++ * So for default C1, which is used during boot until cpuidle loads, ++ * use MWAIT-C1 on Intel HW that has it, else use HALT. ++ */ ++static int prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c) ++{ ++ if (c->x86_vendor != X86_VENDOR_INTEL) ++ return 0; ++ ++ if (!cpu_has(c, X86_FEATURE_MWAIT) || static_cpu_has_bug(X86_BUG_MONITOR)) ++ return 0; ++ ++ return 1; ++} ++ ++/* ++ * MONITOR/MWAIT with no hints, used for default C1 state. This invokes MWAIT ++ * with interrupts enabled and no flags, which is backwards compatible with the ++ * original MWAIT implementation. ++ */ ++static __cpuidle void mwait_idle(void) ++{ ++ if (!current_set_polling_and_test()) { ++ trace_cpu_idle_rcuidle(1, smp_processor_id()); ++ if (this_cpu_has(X86_BUG_CLFLUSH_MONITOR)) { ++ mb(); /* quirk */ ++ clflush((void *)¤t_thread_info()->flags); ++ mb(); /* quirk */ ++ } ++ ++ __monitor((void *)¤t_thread_info()->flags, 0, 0); ++ if (!need_resched()) ++ __sti_mwait(0, 0); ++ else ++ local_irq_enable(); ++ trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); ++ } else { ++ local_irq_enable(); ++ } ++ __current_clr_polling(); ++} ++ ++void select_idle_routine(const struct cpuinfo_x86 *c) ++{ ++#ifdef CONFIG_SMP ++ if (boot_option_idle_override == IDLE_POLL && smp_num_siblings > 1) ++ pr_warn_once("WARNING: polling idle and HT enabled, performance may degrade\n"); ++#endif ++ if (x86_idle || boot_option_idle_override == IDLE_POLL) ++ return; ++ ++ if (boot_cpu_has_bug(X86_BUG_AMD_E400)) { ++ pr_info("using AMD E400 aware idle routine\n"); ++ x86_idle = amd_e400_idle; ++ } else if (prefer_mwait_c1_over_halt(c)) { ++ pr_info("using mwait in idle threads\n"); ++ x86_idle = mwait_idle; ++ } else ++ x86_idle = default_idle; ++} ++ ++void amd_e400_c1e_apic_setup(void) ++{ ++ if (boot_cpu_has_bug(X86_BUG_AMD_APIC_C1E)) { ++ pr_info("Switch to broadcast mode on CPU%d\n", smp_processor_id()); ++ local_irq_disable(); ++ tick_broadcast_force(); ++ local_irq_enable(); ++ } ++} ++ ++void __init arch_post_acpi_subsys_init(void) ++{ ++ u32 lo, hi; ++ ++ if (!boot_cpu_has_bug(X86_BUG_AMD_E400)) ++ return; ++ ++ /* ++ * AMD E400 detection needs to happen after ACPI has been enabled. If ++ * the machine is affected K8_INTP_C1E_ACTIVE_MASK bits are set in ++ * MSR_K8_INT_PENDING_MSG. ++ */ ++ rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi); ++ if (!(lo & K8_INTP_C1E_ACTIVE_MASK)) ++ return; ++ ++ boot_cpu_set_bug(X86_BUG_AMD_APIC_C1E); ++ ++ if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) ++ mark_tsc_unstable("TSC halt in AMD C1E"); ++ pr_info("System has AMD C1E enabled\n"); ++} ++ ++static int __init idle_setup(char *str) ++{ ++ if (!str) ++ return -EINVAL; ++ ++ if (!strcmp(str, "poll")) { ++ pr_info("using polling idle threads\n"); ++ boot_option_idle_override = IDLE_POLL; ++ cpu_idle_poll_ctrl(true); ++ } else if (!strcmp(str, "halt")) { ++ /* ++ * When the boot option of idle=halt is added, halt is ++ * forced to be used for CPU idle. In such case CPU C2/C3 ++ * won't be used again. ++ * To continue to load the CPU idle driver, don't touch ++ * the boot_option_idle_override. ++ */ ++ x86_idle = default_idle; ++ boot_option_idle_override = IDLE_HALT; ++ } else if (!strcmp(str, "nomwait")) { ++ /* ++ * If the boot option of "idle=nomwait" is added, ++ * it means that mwait will be disabled for CPU C2/C3 ++ * states. In such case it won't touch the variable ++ * of boot_option_idle_override. ++ */ ++ boot_option_idle_override = IDLE_NOMWAIT; ++ } else ++ return -1; ++ ++ return 0; ++} ++early_param("idle", idle_setup); ++ ++unsigned long arch_align_stack(unsigned long sp) ++{ ++ if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) ++ sp -= get_random_int() % 8192; ++ return sp & ~0xf; ++} ++ ++unsigned long arch_randomize_brk(struct mm_struct *mm) ++{ ++ return randomize_page(mm->brk, 0x02000000); ++} ++ ++/* ++ * Called from fs/proc with a reference on @p to find the function ++ * which called into schedule(). This needs to be done carefully ++ * because the task might wake up and we might look at a stack ++ * changing under us. ++ */ ++unsigned long get_wchan(struct task_struct *p) ++{ ++ unsigned long start, bottom, top, sp, fp, ip, ret = 0; ++ int count = 0; ++ ++ if (!p || p == current || p->state == TASK_RUNNING) ++ return 0; ++ ++ if (!try_get_task_stack(p)) ++ return 0; ++ ++ start = (unsigned long)task_stack_page(p); ++ if (!start) ++ goto out; ++ ++ /* ++ * Layout of the stack page: ++ * ++ * ----------- topmax = start + THREAD_SIZE - sizeof(unsigned long) ++ * PADDING ++ * ----------- top = topmax - TOP_OF_KERNEL_STACK_PADDING ++ * stack ++ * ----------- bottom = start ++ * ++ * The tasks stack pointer points at the location where the ++ * framepointer is stored. The data on the stack is: ++ * ... IP FP ... IP FP ++ * ++ * We need to read FP and IP, so we need to adjust the upper ++ * bound by another unsigned long. ++ */ ++ top = start + THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; ++ top -= 2 * sizeof(unsigned long); ++ bottom = start; ++ ++ sp = READ_ONCE(p->thread.sp); ++ if (sp < bottom || sp > top) ++ goto out; ++ ++ fp = READ_ONCE_NOCHECK(((struct inactive_task_frame *)sp)->bp); ++ do { ++ if (fp < bottom || fp > top) ++ goto out; ++ ip = READ_ONCE_NOCHECK(*(unsigned long *)(fp + sizeof(unsigned long))); ++ if (!in_sched_functions(ip)) { ++ ret = ip; ++ goto out; ++ } ++ fp = READ_ONCE_NOCHECK(*(unsigned long *)fp); ++ } while (count++ < 16 && p->state != TASK_RUNNING); ++ ++out: ++ put_task_stack(p); ++ return ret; ++} ++ ++long do_arch_prctl_common(struct task_struct *task, int option, ++ unsigned long cpuid_enabled) ++{ ++ switch (option) { ++ case ARCH_GET_CPUID: ++ return get_cpuid_mode(); ++ case ARCH_SET_CPUID: ++ return set_cpuid_mode(task, cpuid_enabled); ++ } ++ ++ return -EINVAL; ++} +diff -uprN kernel/arch/x86/kernel/smpboot.c kernel_new/arch/x86/kernel/smpboot.c +--- kernel/arch/x86/kernel/smpboot.c 2020-12-21 21:59:17.000000000 +0800 ++++ kernel_new/arch/x86/kernel/smpboot.c 2021-04-01 18:28:07.656863286 +0800 +@@ -1074,7 +1074,7 @@ int native_cpu_up(unsigned int cpu, stru + { + int apicid = apic->cpu_present_to_apicid(cpu); + int cpu0_nmi_registered = 0; +- unsigned long flags; ++ unsigned long vflags, rflags; + int err, ret = 0; + + lockdep_assert_irqs_enabled(); +@@ -1123,9 +1123,11 @@ int native_cpu_up(unsigned int cpu, stru + * Check TSC synchronization with the AP (keep irqs disabled + * while doing so): + */ +- local_irq_save(flags); ++ local_irq_save(vflags); ++ rflags = hard_local_irq_save(); + check_tsc_sync_source(cpu); +- local_irq_restore(flags); ++ hard_local_irq_restore(rflags); ++ local_irq_restore(vflags); + + while (!cpu_online(cpu)) { + cpu_relax(); +diff -uprN kernel/arch/x86/kernel/smpboot.c.orig kernel_new/arch/x86/kernel/smpboot.c.orig +--- kernel/arch/x86/kernel/smpboot.c.orig 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/arch/x86/kernel/smpboot.c.orig 2020-12-21 21:59:17.000000000 +0800 +@@ -0,0 +1,1701 @@ ++ /* ++ * x86 SMP booting functions ++ * ++ * (c) 1995 Alan Cox, Building #3 ++ * (c) 1998, 1999, 2000, 2009 Ingo Molnar ++ * Copyright 2001 Andi Kleen, SuSE Labs. ++ * ++ * Much of the core SMP work is based on previous work by Thomas Radke, to ++ * whom a great many thanks are extended. ++ * ++ * Thanks to Intel for making available several different Pentium, ++ * Pentium Pro and Pentium-II/Xeon MP machines. ++ * Original development of Linux SMP code supported by Caldera. ++ * ++ * This code is released under the GNU General Public License version 2 or ++ * later. ++ * ++ * Fixes ++ * Felix Koop : NR_CPUS used properly ++ * Jose Renau : Handle single CPU case. ++ * Alan Cox : By repeated request 8) - Total BogoMIPS report. ++ * Greg Wright : Fix for kernel stacks panic. ++ * Erich Boleyn : MP v1.4 and additional changes. ++ * Matthias Sattler : Changes for 2.1 kernel map. ++ * Michel Lespinasse : Changes for 2.1 kernel map. ++ * Michael Chastain : Change trampoline.S to gnu as. ++ * Alan Cox : Dumb bug: 'B' step PPro's are fine ++ * Ingo Molnar : Added APIC timers, based on code ++ * from Jose Renau ++ * Ingo Molnar : various cleanups and rewrites ++ * Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug. ++ * Maciej W. Rozycki : Bits for genuine 82489DX APICs ++ * Andi Kleen : Changed for SMP boot into long mode. ++ * Martin J. Bligh : Added support for multi-quad systems ++ * Dave Jones : Report invalid combinations of Athlon CPUs. ++ * Rusty Russell : Hacked into shape for new "hotplug" boot process. ++ * Andi Kleen : Converted to new state machine. ++ * Ashok Raj : CPU hotplug support ++ * Glauber Costa : i386 and x86_64 integration ++ */ ++ ++#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* representing HT siblings of each logical CPU */ ++DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map); ++EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); ++ ++/* representing HT and core siblings of each logical CPU */ ++DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map); ++EXPORT_PER_CPU_SYMBOL(cpu_core_map); ++ ++DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map); ++ ++/* Per CPU bogomips and other parameters */ ++DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info); ++EXPORT_PER_CPU_SYMBOL(cpu_info); ++ ++/* Logical package management. We might want to allocate that dynamically */ ++unsigned int __max_logical_packages __read_mostly; ++EXPORT_SYMBOL(__max_logical_packages); ++static unsigned int logical_packages __read_mostly; ++ ++/* Maximum number of SMT threads on any online core */ ++int __read_mostly __max_smt_threads = 1; ++ ++/* Flag to indicate if a complete sched domain rebuild is required */ ++bool x86_topology_update; ++ ++int arch_update_cpu_topology(void) ++{ ++ int retval = x86_topology_update; ++ ++ x86_topology_update = false; ++ return retval; ++} ++ ++static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip) ++{ ++ unsigned long flags; ++ ++ spin_lock_irqsave(&rtc_lock, flags); ++ CMOS_WRITE(0xa, 0xf); ++ spin_unlock_irqrestore(&rtc_lock, flags); ++ *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) = ++ start_eip >> 4; ++ *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = ++ start_eip & 0xf; ++} ++ ++static inline void smpboot_restore_warm_reset_vector(void) ++{ ++ unsigned long flags; ++ ++ /* ++ * Paranoid: Set warm reset code and vector here back ++ * to default values. ++ */ ++ spin_lock_irqsave(&rtc_lock, flags); ++ CMOS_WRITE(0, 0xf); ++ spin_unlock_irqrestore(&rtc_lock, flags); ++ ++ *((volatile u32 *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = 0; ++} ++ ++/* ++ * Report back to the Boot Processor during boot time or to the caller processor ++ * during CPU online. ++ */ ++static void smp_callin(void) ++{ ++ int cpuid, phys_id; ++ ++ /* ++ * If waken up by an INIT in an 82489DX configuration ++ * cpu_callout_mask guarantees we don't get here before ++ * an INIT_deassert IPI reaches our local APIC, so it is ++ * now safe to touch our local APIC. ++ */ ++ cpuid = smp_processor_id(); ++ ++ /* ++ * (This works even if the APIC is not enabled.) ++ */ ++ phys_id = read_apic_id(); ++ ++ /* ++ * the boot CPU has finished the init stage and is spinning ++ * on callin_map until we finish. We are free to set up this ++ * CPU, first the APIC. (this is probably redundant on most ++ * boards) ++ */ ++ apic_ap_setup(); ++ ++ /* ++ * Save our processor parameters. Note: this information ++ * is needed for clock calibration. ++ */ ++ smp_store_cpu_info(cpuid); ++ ++ /* ++ * The topology information must be up to date before ++ * calibrate_delay() and notify_cpu_starting(). ++ */ ++ set_cpu_sibling_map(raw_smp_processor_id()); ++ ++ /* ++ * Get our bogomips. ++ * Update loops_per_jiffy in cpu_data. Previous call to ++ * smp_store_cpu_info() stored a value that is close but not as ++ * accurate as the value just calculated. ++ */ ++ calibrate_delay(); ++ cpu_data(cpuid).loops_per_jiffy = loops_per_jiffy; ++ pr_debug("Stack at about %p\n", &cpuid); ++ ++ wmb(); ++ ++ notify_cpu_starting(cpuid); ++ ++ /* ++ * Allow the master to continue. ++ */ ++ cpumask_set_cpu(cpuid, cpu_callin_mask); ++} ++ ++static int cpu0_logical_apicid; ++static int enable_start_cpu0; ++/* ++ * Activate a secondary processor. ++ */ ++static void notrace start_secondary(void *unused) ++{ ++ /* ++ * Don't put *anything* except direct CPU state initialization ++ * before cpu_init(), SMP booting is too fragile that we want to ++ * limit the things done here to the most necessary things. ++ */ ++ if (boot_cpu_has(X86_FEATURE_PCID)) ++ __write_cr4(__read_cr4() | X86_CR4_PCIDE); ++ ++#ifdef CONFIG_X86_32 ++ /* switch away from the initial page table */ ++ load_cr3(swapper_pg_dir); ++ /* ++ * Initialize the CR4 shadow before doing anything that could ++ * try to read it. ++ */ ++ cr4_init_shadow(); ++ __flush_tlb_all(); ++#endif ++ load_current_idt(); ++ cpu_init(); ++ x86_cpuinit.early_percpu_clock_init(); ++ preempt_disable(); ++ smp_callin(); ++ ++ enable_start_cpu0 = 0; ++ ++ /* otherwise gcc will move up smp_processor_id before the cpu_init */ ++ barrier(); ++ /* ++ * Check TSC synchronization with the boot CPU: ++ */ ++ check_tsc_sync_target(); ++ ++ speculative_store_bypass_ht_init(); ++ ++ /* ++ * Lock vector_lock, set CPU online and bring the vector ++ * allocator online. Online must be set with vector_lock held ++ * to prevent a concurrent irq setup/teardown from seeing a ++ * half valid vector space. ++ */ ++ lock_vector_lock(); ++ set_cpu_online(smp_processor_id(), true); ++ lapic_online(); ++ unlock_vector_lock(); ++ cpu_set_state_online(smp_processor_id()); ++ x86_platform.nmi_init(); ++ ++ /* enable local interrupts */ ++ local_irq_enable(); ++ ++ /* to prevent fake stack check failure in clock setup */ ++ boot_init_stack_canary(); ++ ++ x86_cpuinit.setup_percpu_clockev(); ++ ++ wmb(); ++ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); ++} ++ ++/** ++ * topology_is_primary_thread - Check whether CPU is the primary SMT thread ++ * @cpu: CPU to check ++ */ ++bool topology_is_primary_thread(unsigned int cpu) ++{ ++ return apic_id_is_primary_thread(per_cpu(x86_cpu_to_apicid, cpu)); ++} ++ ++/** ++ * topology_smt_supported - Check whether SMT is supported by the CPUs ++ */ ++bool topology_smt_supported(void) ++{ ++ return smp_num_siblings > 1; ++} ++ ++/** ++ * topology_phys_to_logical_pkg - Map a physical package id to a logical ++ * ++ * Returns logical package id or -1 if not found ++ */ ++int topology_phys_to_logical_pkg(unsigned int phys_pkg) ++{ ++ int cpu; ++ ++ for_each_possible_cpu(cpu) { ++ struct cpuinfo_x86 *c = &cpu_data(cpu); ++ ++ if (c->initialized && c->phys_proc_id == phys_pkg) ++ return c->logical_proc_id; ++ } ++ return -1; ++} ++EXPORT_SYMBOL(topology_phys_to_logical_pkg); ++ ++/** ++ * topology_update_package_map - Update the physical to logical package map ++ * @pkg: The physical package id as retrieved via CPUID ++ * @cpu: The cpu for which this is updated ++ */ ++int topology_update_package_map(unsigned int pkg, unsigned int cpu) ++{ ++ int new; ++ ++ /* Already available somewhere? */ ++ new = topology_phys_to_logical_pkg(pkg); ++ if (new >= 0) ++ goto found; ++ ++ new = logical_packages++; ++ if (new != pkg) { ++ pr_info("CPU %u Converting physical %u to logical package %u\n", ++ cpu, pkg, new); ++ } ++found: ++ cpu_data(cpu).logical_proc_id = new; ++ return 0; ++} ++ ++void __init smp_store_boot_cpu_info(void) ++{ ++ int id = 0; /* CPU 0 */ ++ struct cpuinfo_x86 *c = &cpu_data(id); ++ ++ *c = boot_cpu_data; ++ c->cpu_index = id; ++ topology_update_package_map(c->phys_proc_id, id); ++ c->initialized = true; ++} ++ ++/* ++ * The bootstrap kernel entry code has set these up. Save them for ++ * a given CPU ++ */ ++void smp_store_cpu_info(int id) ++{ ++ struct cpuinfo_x86 *c = &cpu_data(id); ++ ++ /* Copy boot_cpu_data only on the first bringup */ ++ if (!c->initialized) ++ *c = boot_cpu_data; ++ c->cpu_index = id; ++ /* ++ * During boot time, CPU0 has this setup already. Save the info when ++ * bringing up AP or offlined CPU0. ++ */ ++ identify_secondary_cpu(c); ++ c->initialized = true; ++} ++ ++static bool ++topology_same_node(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) ++{ ++ int cpu1 = c->cpu_index, cpu2 = o->cpu_index; ++ ++ return (cpu_to_node(cpu1) == cpu_to_node(cpu2)); ++} ++ ++static bool ++topology_sane(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o, const char *name) ++{ ++ int cpu1 = c->cpu_index, cpu2 = o->cpu_index; ++ ++ return !WARN_ONCE(!topology_same_node(c, o), ++ "sched: CPU #%d's %s-sibling CPU #%d is not on the same node! " ++ "[node: %d != %d]. Ignoring dependency.\n", ++ cpu1, name, cpu2, cpu_to_node(cpu1), cpu_to_node(cpu2)); ++} ++ ++#define link_mask(mfunc, c1, c2) \ ++do { \ ++ cpumask_set_cpu((c1), mfunc(c2)); \ ++ cpumask_set_cpu((c2), mfunc(c1)); \ ++} while (0) ++ ++static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) ++{ ++ if (boot_cpu_has(X86_FEATURE_TOPOEXT)) { ++ int cpu1 = c->cpu_index, cpu2 = o->cpu_index; ++ ++ if (c->phys_proc_id == o->phys_proc_id && ++ per_cpu(cpu_llc_id, cpu1) == per_cpu(cpu_llc_id, cpu2)) { ++ if (c->cpu_core_id == o->cpu_core_id) ++ return topology_sane(c, o, "smt"); ++ ++ if ((c->cu_id != 0xff) && ++ (o->cu_id != 0xff) && ++ (c->cu_id == o->cu_id)) ++ return topology_sane(c, o, "smt"); ++ } ++ ++ } else if (c->phys_proc_id == o->phys_proc_id && ++ c->cpu_core_id == o->cpu_core_id) { ++ return topology_sane(c, o, "smt"); ++ } ++ ++ return false; ++} ++ ++/* ++ * Define snc_cpu[] for SNC (Sub-NUMA Cluster) CPUs. ++ * ++ * These are Intel CPUs that enumerate an LLC that is shared by ++ * multiple NUMA nodes. The LLC on these systems is shared for ++ * off-package data access but private to the NUMA node (half ++ * of the package) for on-package access. ++ * ++ * CPUID (the source of the information about the LLC) can only ++ * enumerate the cache as being shared *or* unshared, but not ++ * this particular configuration. The CPU in this case enumerates ++ * the cache to be shared across the entire package (spanning both ++ * NUMA nodes). ++ */ ++ ++static const struct x86_cpu_id snc_cpu[] = { ++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_SKYLAKE_X }, ++ {} ++}; ++ ++static bool match_llc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) ++{ ++ int cpu1 = c->cpu_index, cpu2 = o->cpu_index; ++ ++ /* Do not match if we do not have a valid APICID for cpu: */ ++ if (per_cpu(cpu_llc_id, cpu1) == BAD_APICID) ++ return false; ++ ++ /* Do not match if LLC id does not match: */ ++ if (per_cpu(cpu_llc_id, cpu1) != per_cpu(cpu_llc_id, cpu2)) ++ return false; ++ ++ /* ++ * Allow the SNC topology without warning. Return of false ++ * means 'c' does not share the LLC of 'o'. This will be ++ * reflected to userspace. ++ */ ++ if (!topology_same_node(c, o) && x86_match_cpu(snc_cpu)) ++ return false; ++ ++ return topology_sane(c, o, "llc"); ++} ++ ++/* ++ * Unlike the other levels, we do not enforce keeping a ++ * multicore group inside a NUMA node. If this happens, we will ++ * discard the MC level of the topology later. ++ */ ++static bool match_die(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) ++{ ++ if (c->phys_proc_id == o->phys_proc_id) ++ return true; ++ return false; ++} ++ ++#if defined(CONFIG_SCHED_SMT) || defined(CONFIG_SCHED_MC) ++static inline int x86_sched_itmt_flags(void) ++{ ++ return sysctl_sched_itmt_enabled ? SD_ASYM_PACKING : 0; ++} ++ ++#ifdef CONFIG_SCHED_MC ++static int x86_core_flags(void) ++{ ++ return cpu_core_flags() | x86_sched_itmt_flags(); ++} ++#endif ++#ifdef CONFIG_SCHED_SMT ++static int x86_smt_flags(void) ++{ ++ return cpu_smt_flags() | x86_sched_itmt_flags(); ++} ++#endif ++#endif ++ ++static struct sched_domain_topology_level x86_numa_in_package_topology[] = { ++#ifdef CONFIG_SCHED_SMT ++ { cpu_smt_mask, x86_smt_flags, SD_INIT_NAME(SMT) }, ++#endif ++#ifdef CONFIG_SCHED_MC ++ { cpu_coregroup_mask, x86_core_flags, SD_INIT_NAME(MC) }, ++#endif ++ { NULL, }, ++}; ++ ++static struct sched_domain_topology_level x86_topology[] = { ++#ifdef CONFIG_SCHED_SMT ++ { cpu_smt_mask, x86_smt_flags, SD_INIT_NAME(SMT) }, ++#endif ++#ifdef CONFIG_SCHED_MC ++ { cpu_coregroup_mask, x86_core_flags, SD_INIT_NAME(MC) }, ++#endif ++ { cpu_cpu_mask, SD_INIT_NAME(DIE) }, ++ { NULL, }, ++}; ++ ++/* ++ * Set if a package/die has multiple NUMA nodes inside. ++ * AMD Magny-Cours, Intel Cluster-on-Die, and Intel ++ * Sub-NUMA Clustering have this. ++ */ ++static bool x86_has_numa_in_package; ++ ++void set_cpu_sibling_map(int cpu) ++{ ++ bool has_smt = smp_num_siblings > 1; ++ bool has_mp = has_smt || boot_cpu_data.x86_max_cores > 1; ++ struct cpuinfo_x86 *c = &cpu_data(cpu); ++ struct cpuinfo_x86 *o; ++ int i, threads; ++ ++ cpumask_set_cpu(cpu, cpu_sibling_setup_mask); ++ ++ if (!has_mp) { ++ cpumask_set_cpu(cpu, topology_sibling_cpumask(cpu)); ++ cpumask_set_cpu(cpu, cpu_llc_shared_mask(cpu)); ++ cpumask_set_cpu(cpu, topology_core_cpumask(cpu)); ++ c->booted_cores = 1; ++ return; ++ } ++ ++ for_each_cpu(i, cpu_sibling_setup_mask) { ++ o = &cpu_data(i); ++ ++ if ((i == cpu) || (has_smt && match_smt(c, o))) ++ link_mask(topology_sibling_cpumask, cpu, i); ++ ++ if ((i == cpu) || (has_mp && match_llc(c, o))) ++ link_mask(cpu_llc_shared_mask, cpu, i); ++ ++ } ++ ++ /* ++ * This needs a separate iteration over the cpus because we rely on all ++ * topology_sibling_cpumask links to be set-up. ++ */ ++ for_each_cpu(i, cpu_sibling_setup_mask) { ++ o = &cpu_data(i); ++ ++ if ((i == cpu) || (has_mp && match_die(c, o))) { ++ link_mask(topology_core_cpumask, cpu, i); ++ ++ /* ++ * Does this new cpu bringup a new core? ++ */ ++ if (cpumask_weight( ++ topology_sibling_cpumask(cpu)) == 1) { ++ /* ++ * for each core in package, increment ++ * the booted_cores for this new cpu ++ */ ++ if (cpumask_first( ++ topology_sibling_cpumask(i)) == i) ++ c->booted_cores++; ++ /* ++ * increment the core count for all ++ * the other cpus in this package ++ */ ++ if (i != cpu) ++ cpu_data(i).booted_cores++; ++ } else if (i != cpu && !c->booted_cores) ++ c->booted_cores = cpu_data(i).booted_cores; ++ } ++ if (match_die(c, o) && !topology_same_node(c, o)) ++ x86_has_numa_in_package = true; ++ } ++ ++ threads = cpumask_weight(topology_sibling_cpumask(cpu)); ++ if (threads > __max_smt_threads) ++ __max_smt_threads = threads; ++} ++ ++/* maps the cpu to the sched domain representing multi-core */ ++const struct cpumask *cpu_coregroup_mask(int cpu) ++{ ++ return cpu_llc_shared_mask(cpu); ++} ++ ++static void impress_friends(void) ++{ ++ int cpu; ++ unsigned long bogosum = 0; ++ /* ++ * Allow the user to impress friends. ++ */ ++ pr_debug("Before bogomips\n"); ++ for_each_possible_cpu(cpu) ++ if (cpumask_test_cpu(cpu, cpu_callout_mask)) ++ bogosum += cpu_data(cpu).loops_per_jiffy; ++ pr_info("Total of %d processors activated (%lu.%02lu BogoMIPS)\n", ++ num_online_cpus(), ++ bogosum/(500000/HZ), ++ (bogosum/(5000/HZ))%100); ++ ++ pr_debug("Before bogocount - setting activated=1\n"); ++} ++ ++void __inquire_remote_apic(int apicid) ++{ ++ unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 }; ++ const char * const names[] = { "ID", "VERSION", "SPIV" }; ++ int timeout; ++ u32 status; ++ ++ pr_info("Inquiring remote APIC 0x%x...\n", apicid); ++ ++ for (i = 0; i < ARRAY_SIZE(regs); i++) { ++ pr_info("... APIC 0x%x %s: ", apicid, names[i]); ++ ++ /* ++ * Wait for idle. ++ */ ++ status = safe_apic_wait_icr_idle(); ++ if (status) ++ pr_cont("a previous APIC delivery may have failed\n"); ++ ++ apic_icr_write(APIC_DM_REMRD | regs[i], apicid); ++ ++ timeout = 0; ++ do { ++ udelay(100); ++ status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK; ++ } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000); ++ ++ switch (status) { ++ case APIC_ICR_RR_VALID: ++ status = apic_read(APIC_RRR); ++ pr_cont("%08x\n", status); ++ break; ++ default: ++ pr_cont("failed\n"); ++ } ++ } ++} ++ ++/* ++ * The Multiprocessor Specification 1.4 (1997) example code suggests ++ * that there should be a 10ms delay between the BSP asserting INIT ++ * and de-asserting INIT, when starting a remote processor. ++ * But that slows boot and resume on modern processors, which include ++ * many cores and don't require that delay. ++ * ++ * Cmdline "init_cpu_udelay=" is available to over-ride this delay. ++ * Modern processor families are quirked to remove the delay entirely. ++ */ ++#define UDELAY_10MS_DEFAULT 10000 ++ ++static unsigned int init_udelay = UINT_MAX; ++ ++static int __init cpu_init_udelay(char *str) ++{ ++ get_option(&str, &init_udelay); ++ ++ return 0; ++} ++early_param("cpu_init_udelay", cpu_init_udelay); ++ ++static void __init smp_quirk_init_udelay(void) ++{ ++ /* if cmdline changed it from default, leave it alone */ ++ if (init_udelay != UINT_MAX) ++ return; ++ ++ /* if modern processor, use no delay */ ++ if (((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 == 6)) || ++ ((boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) && (boot_cpu_data.x86 >= 0x18)) || ++ ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && (boot_cpu_data.x86 >= 0xF))) { ++ init_udelay = 0; ++ return; ++ } ++ /* else, use legacy delay */ ++ init_udelay = UDELAY_10MS_DEFAULT; ++} ++ ++/* ++ * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal ++ * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this ++ * won't ... remember to clear down the APIC, etc later. ++ */ ++int ++wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip) ++{ ++ unsigned long send_status, accept_status = 0; ++ int maxlvt; ++ ++ /* Target chip */ ++ /* Boot on the stack */ ++ /* Kick the second */ ++ apic_icr_write(APIC_DM_NMI | apic->dest_logical, apicid); ++ ++ pr_debug("Waiting for send to finish...\n"); ++ send_status = safe_apic_wait_icr_idle(); ++ ++ /* ++ * Give the other CPU some time to accept the IPI. ++ */ ++ udelay(200); ++ if (APIC_INTEGRATED(boot_cpu_apic_version)) { ++ maxlvt = lapic_get_maxlvt(); ++ if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ ++ apic_write(APIC_ESR, 0); ++ accept_status = (apic_read(APIC_ESR) & 0xEF); ++ } ++ pr_debug("NMI sent\n"); ++ ++ if (send_status) ++ pr_err("APIC never delivered???\n"); ++ if (accept_status) ++ pr_err("APIC delivery error (%lx)\n", accept_status); ++ ++ return (send_status | accept_status); ++} ++ ++static int ++wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) ++{ ++ unsigned long send_status = 0, accept_status = 0; ++ int maxlvt, num_starts, j; ++ ++ maxlvt = lapic_get_maxlvt(); ++ ++ /* ++ * Be paranoid about clearing APIC errors. ++ */ ++ if (APIC_INTEGRATED(boot_cpu_apic_version)) { ++ if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ ++ apic_write(APIC_ESR, 0); ++ apic_read(APIC_ESR); ++ } ++ ++ pr_debug("Asserting INIT\n"); ++ ++ /* ++ * Turn INIT on target chip ++ */ ++ /* ++ * Send IPI ++ */ ++ apic_icr_write(APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT, ++ phys_apicid); ++ ++ pr_debug("Waiting for send to finish...\n"); ++ send_status = safe_apic_wait_icr_idle(); ++ ++ udelay(init_udelay); ++ ++ pr_debug("Deasserting INIT\n"); ++ ++ /* Target chip */ ++ /* Send IPI */ ++ apic_icr_write(APIC_INT_LEVELTRIG | APIC_DM_INIT, phys_apicid); ++ ++ pr_debug("Waiting for send to finish...\n"); ++ send_status = safe_apic_wait_icr_idle(); ++ ++ mb(); ++ ++ /* ++ * Should we send STARTUP IPIs ? ++ * ++ * Determine this based on the APIC version. ++ * If we don't have an integrated APIC, don't send the STARTUP IPIs. ++ */ ++ if (APIC_INTEGRATED(boot_cpu_apic_version)) ++ num_starts = 2; ++ else ++ num_starts = 0; ++ ++ /* ++ * Run STARTUP IPI loop. ++ */ ++ pr_debug("#startup loops: %d\n", num_starts); ++ ++ for (j = 1; j <= num_starts; j++) { ++ pr_debug("Sending STARTUP #%d\n", j); ++ if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ ++ apic_write(APIC_ESR, 0); ++ apic_read(APIC_ESR); ++ pr_debug("After apic_write\n"); ++ ++ /* ++ * STARTUP IPI ++ */ ++ ++ /* Target chip */ ++ /* Boot on the stack */ ++ /* Kick the second */ ++ apic_icr_write(APIC_DM_STARTUP | (start_eip >> 12), ++ phys_apicid); ++ ++ /* ++ * Give the other CPU some time to accept the IPI. ++ */ ++ if (init_udelay == 0) ++ udelay(10); ++ else ++ udelay(300); ++ ++ pr_debug("Startup point 1\n"); ++ ++ pr_debug("Waiting for send to finish...\n"); ++ send_status = safe_apic_wait_icr_idle(); ++ ++ /* ++ * Give the other CPU some time to accept the IPI. ++ */ ++ if (init_udelay == 0) ++ udelay(10); ++ else ++ udelay(200); ++ ++ if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ ++ apic_write(APIC_ESR, 0); ++ accept_status = (apic_read(APIC_ESR) & 0xEF); ++ if (send_status || accept_status) ++ break; ++ } ++ pr_debug("After Startup\n"); ++ ++ if (send_status) ++ pr_err("APIC never delivered???\n"); ++ if (accept_status) ++ pr_err("APIC delivery error (%lx)\n", accept_status); ++ ++ return (send_status | accept_status); ++} ++ ++/* reduce the number of lines printed when booting a large cpu count system */ ++static void announce_cpu(int cpu, int apicid) ++{ ++ static int current_node = -1; ++ int node = early_cpu_to_node(cpu); ++ static int width, node_width; ++ ++ if (!width) ++ width = num_digits(num_possible_cpus()) + 1; /* + '#' sign */ ++ ++ if (!node_width) ++ node_width = num_digits(num_possible_nodes()) + 1; /* + '#' */ ++ ++ if (cpu == 1) ++ printk(KERN_INFO "x86: Booting SMP configuration:\n"); ++ ++ if (system_state < SYSTEM_RUNNING) { ++ if (node != current_node) { ++ if (current_node > (-1)) ++ pr_cont("\n"); ++ current_node = node; ++ ++ printk(KERN_INFO ".... node %*s#%d, CPUs: ", ++ node_width - num_digits(node), " ", node); ++ } ++ ++ /* Add padding for the BSP */ ++ if (cpu == 1) ++ pr_cont("%*s", width + 1, " "); ++ ++ pr_cont("%*s#%d", width - num_digits(cpu), " ", cpu); ++ ++ } else ++ pr_info("Booting Node %d Processor %d APIC 0x%x\n", ++ node, cpu, apicid); ++} ++ ++static int wakeup_cpu0_nmi(unsigned int cmd, struct pt_regs *regs) ++{ ++ int cpu; ++ ++ cpu = smp_processor_id(); ++ if (cpu == 0 && !cpu_online(cpu) && enable_start_cpu0) ++ return NMI_HANDLED; ++ ++ return NMI_DONE; ++} ++ ++/* ++ * Wake up AP by INIT, INIT, STARTUP sequence. ++ * ++ * Instead of waiting for STARTUP after INITs, BSP will execute the BIOS ++ * boot-strap code which is not a desired behavior for waking up BSP. To ++ * void the boot-strap code, wake up CPU0 by NMI instead. ++ * ++ * This works to wake up soft offlined CPU0 only. If CPU0 is hard offlined ++ * (i.e. physically hot removed and then hot added), NMI won't wake it up. ++ * We'll change this code in the future to wake up hard offlined CPU0 if ++ * real platform and request are available. ++ */ ++static int ++wakeup_cpu_via_init_nmi(int cpu, unsigned long start_ip, int apicid, ++ int *cpu0_nmi_registered) ++{ ++ int id; ++ int boot_error; ++ ++ preempt_disable(); ++ ++ /* ++ * Wake up AP by INIT, INIT, STARTUP sequence. ++ */ ++ if (cpu) { ++ boot_error = wakeup_secondary_cpu_via_init(apicid, start_ip); ++ goto out; ++ } ++ ++ /* ++ * Wake up BSP by nmi. ++ * ++ * Register a NMI handler to help wake up CPU0. ++ */ ++ boot_error = register_nmi_handler(NMI_LOCAL, ++ wakeup_cpu0_nmi, 0, "wake_cpu0"); ++ ++ if (!boot_error) { ++ enable_start_cpu0 = 1; ++ *cpu0_nmi_registered = 1; ++ if (apic->dest_logical == APIC_DEST_LOGICAL) ++ id = cpu0_logical_apicid; ++ else ++ id = apicid; ++ boot_error = wakeup_secondary_cpu_via_nmi(id, start_ip); ++ } ++ ++out: ++ preempt_enable(); ++ ++ return boot_error; ++} ++ ++void common_cpu_up(unsigned int cpu, struct task_struct *idle) ++{ ++ /* Just in case we booted with a single CPU. */ ++ alternatives_enable_smp(); ++ ++ per_cpu(current_task, cpu) = idle; ++ ++#ifdef CONFIG_X86_32 ++ /* Stack for startup_32 can be just as for start_secondary onwards */ ++ irq_ctx_init(cpu); ++ per_cpu(cpu_current_top_of_stack, cpu) = task_top_of_stack(idle); ++#else ++ initial_gs = per_cpu_offset(cpu); ++#endif ++} ++ ++/* ++ * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad ++ * (ie clustered apic addressing mode), this is a LOGICAL apic ID. ++ * Returns zero if CPU booted OK, else error code from ++ * ->wakeup_secondary_cpu. ++ */ ++static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle, ++ int *cpu0_nmi_registered) ++{ ++ volatile u32 *trampoline_status = ++ (volatile u32 *) __va(real_mode_header->trampoline_status); ++ /* start_ip had better be page-aligned! */ ++ unsigned long start_ip = real_mode_header->trampoline_start; ++ ++ unsigned long boot_error = 0; ++ unsigned long timeout; ++ ++ idle->thread.sp = (unsigned long)task_pt_regs(idle); ++ early_gdt_descr.address = (unsigned long)get_cpu_gdt_rw(cpu); ++ initial_code = (unsigned long)start_secondary; ++ initial_stack = idle->thread.sp; ++ ++ /* Enable the espfix hack for this CPU */ ++ init_espfix_ap(cpu); ++ ++ /* So we see what's up */ ++ announce_cpu(cpu, apicid); ++ ++ /* ++ * This grunge runs the startup process for ++ * the targeted processor. ++ */ ++ ++ if (x86_platform.legacy.warm_reset) { ++ ++ pr_debug("Setting warm reset code and vector.\n"); ++ ++ smpboot_setup_warm_reset_vector(start_ip); ++ /* ++ * Be paranoid about clearing APIC errors. ++ */ ++ if (APIC_INTEGRATED(boot_cpu_apic_version)) { ++ apic_write(APIC_ESR, 0); ++ apic_read(APIC_ESR); ++ } ++ } ++ ++ /* ++ * AP might wait on cpu_callout_mask in cpu_init() with ++ * cpu_initialized_mask set if previous attempt to online ++ * it timed-out. Clear cpu_initialized_mask so that after ++ * INIT/SIPI it could start with a clean state. ++ */ ++ cpumask_clear_cpu(cpu, cpu_initialized_mask); ++ smp_mb(); ++ ++ /* ++ * Wake up a CPU in difference cases: ++ * - Use the method in the APIC driver if it's defined ++ * Otherwise, ++ * - Use an INIT boot APIC message for APs or NMI for BSP. ++ */ ++ if (apic->wakeup_secondary_cpu) ++ boot_error = apic->wakeup_secondary_cpu(apicid, start_ip); ++ else ++ boot_error = wakeup_cpu_via_init_nmi(cpu, start_ip, apicid, ++ cpu0_nmi_registered); ++ ++ if (!boot_error) { ++ /* ++ * Wait 10s total for first sign of life from AP ++ */ ++ boot_error = -1; ++ timeout = jiffies + 10*HZ; ++ while (time_before(jiffies, timeout)) { ++ if (cpumask_test_cpu(cpu, cpu_initialized_mask)) { ++ /* ++ * Tell AP to proceed with initialization ++ */ ++ cpumask_set_cpu(cpu, cpu_callout_mask); ++ boot_error = 0; ++ break; ++ } ++ schedule(); ++ } ++ } ++ ++ if (!boot_error) { ++ /* ++ * Wait till AP completes initial initialization ++ */ ++ while (!cpumask_test_cpu(cpu, cpu_callin_mask)) { ++ /* ++ * Allow other tasks to run while we wait for the ++ * AP to come online. This also gives a chance ++ * for the MTRR work(triggered by the AP coming online) ++ * to be completed in the stop machine context. ++ */ ++ schedule(); ++ } ++ } ++ ++ /* mark "stuck" area as not stuck */ ++ *trampoline_status = 0; ++ ++ if (x86_platform.legacy.warm_reset) { ++ /* ++ * Cleanup possible dangling ends... ++ */ ++ smpboot_restore_warm_reset_vector(); ++ } ++ ++ return boot_error; ++} ++ ++int native_cpu_up(unsigned int cpu, struct task_struct *tidle) ++{ ++ int apicid = apic->cpu_present_to_apicid(cpu); ++ int cpu0_nmi_registered = 0; ++ unsigned long flags; ++ int err, ret = 0; ++ ++ lockdep_assert_irqs_enabled(); ++ ++ pr_debug("++++++++++++++++++++=_---CPU UP %u\n", cpu); ++ ++ if (apicid == BAD_APICID || ++ !physid_isset(apicid, phys_cpu_present_map) || ++ !apic->apic_id_valid(apicid)) { ++ pr_err("%s: bad cpu %d\n", __func__, cpu); ++ return -EINVAL; ++ } ++ ++ /* ++ * Already booted CPU? ++ */ ++ if (cpumask_test_cpu(cpu, cpu_callin_mask)) { ++ pr_debug("do_boot_cpu %d Already started\n", cpu); ++ return -ENOSYS; ++ } ++ ++ /* ++ * Save current MTRR state in case it was changed since early boot ++ * (e.g. by the ACPI SMI) to initialize new CPUs with MTRRs in sync: ++ */ ++ mtrr_save_state(); ++ ++ /* x86 CPUs take themselves offline, so delayed offline is OK. */ ++ err = cpu_check_up_prepare(cpu); ++ if (err && err != -EBUSY) ++ return err; ++ ++ /* the FPU context is blank, nobody can own it */ ++ per_cpu(fpu_fpregs_owner_ctx, cpu) = NULL; ++ ++ common_cpu_up(cpu, tidle); ++ ++ err = do_boot_cpu(apicid, cpu, tidle, &cpu0_nmi_registered); ++ if (err) { ++ pr_err("do_boot_cpu failed(%d) to wakeup CPU#%u\n", err, cpu); ++ ret = -EIO; ++ goto unreg_nmi; ++ } ++ ++ /* ++ * Check TSC synchronization with the AP (keep irqs disabled ++ * while doing so): ++ */ ++ local_irq_save(flags); ++ check_tsc_sync_source(cpu); ++ local_irq_restore(flags); ++ ++ while (!cpu_online(cpu)) { ++ cpu_relax(); ++ touch_nmi_watchdog(); ++ } ++ ++unreg_nmi: ++ /* ++ * Clean up the nmi handler. Do this after the callin and callout sync ++ * to avoid impact of possible long unregister time. ++ */ ++ if (cpu0_nmi_registered) ++ unregister_nmi_handler(NMI_LOCAL, "wake_cpu0"); ++ ++ return ret; ++} ++ ++/** ++ * arch_disable_smp_support() - disables SMP support for x86 at runtime ++ */ ++void arch_disable_smp_support(void) ++{ ++ disable_ioapic_support(); ++} ++ ++/* ++ * Fall back to non SMP mode after errors. ++ * ++ * RED-PEN audit/test this more. I bet there is more state messed up here. ++ */ ++static __init void disable_smp(void) ++{ ++ pr_info("SMP disabled\n"); ++ ++ disable_ioapic_support(); ++ ++ init_cpu_present(cpumask_of(0)); ++ init_cpu_possible(cpumask_of(0)); ++ ++ if (smp_found_config) ++ physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); ++ else ++ physid_set_mask_of_physid(0, &phys_cpu_present_map); ++ cpumask_set_cpu(0, topology_sibling_cpumask(0)); ++ cpumask_set_cpu(0, topology_core_cpumask(0)); ++} ++ ++/* ++ * Various sanity checks. ++ */ ++static void __init smp_sanity_check(void) ++{ ++ preempt_disable(); ++ ++#if !defined(CONFIG_X86_BIGSMP) && defined(CONFIG_X86_32) ++ if (def_to_bigsmp && nr_cpu_ids > 8) { ++ unsigned int cpu; ++ unsigned nr; ++ ++ pr_warn("More than 8 CPUs detected - skipping them\n" ++ "Use CONFIG_X86_BIGSMP\n"); ++ ++ nr = 0; ++ for_each_present_cpu(cpu) { ++ if (nr >= 8) ++ set_cpu_present(cpu, false); ++ nr++; ++ } ++ ++ nr = 0; ++ for_each_possible_cpu(cpu) { ++ if (nr >= 8) ++ set_cpu_possible(cpu, false); ++ nr++; ++ } ++ ++ nr_cpu_ids = 8; ++ } ++#endif ++ ++ if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) { ++ pr_warn("weird, boot CPU (#%d) not listed by the BIOS\n", ++ hard_smp_processor_id()); ++ ++ physid_set(hard_smp_processor_id(), phys_cpu_present_map); ++ } ++ ++ /* ++ * Should not be necessary because the MP table should list the boot ++ * CPU too, but we do it for the sake of robustness anyway. ++ */ ++ if (!apic->check_phys_apicid_present(boot_cpu_physical_apicid)) { ++ pr_notice("weird, boot CPU (#%d) not listed by the BIOS\n", ++ boot_cpu_physical_apicid); ++ physid_set(hard_smp_processor_id(), phys_cpu_present_map); ++ } ++ preempt_enable(); ++} ++ ++static void __init smp_cpu_index_default(void) ++{ ++ int i; ++ struct cpuinfo_x86 *c; ++ ++ for_each_possible_cpu(i) { ++ c = &cpu_data(i); ++ /* mark all to hotplug */ ++ c->cpu_index = nr_cpu_ids; ++ } ++} ++ ++static void __init smp_get_logical_apicid(void) ++{ ++ if (x2apic_mode) ++ cpu0_logical_apicid = apic_read(APIC_LDR); ++ else ++ cpu0_logical_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR)); ++} ++ ++/* ++ * Prepare for SMP bootup. ++ * @max_cpus: configured maximum number of CPUs, It is a legacy parameter ++ * for common interface support. ++ */ ++void __init native_smp_prepare_cpus(unsigned int max_cpus) ++{ ++ unsigned int i; ++ ++ smp_cpu_index_default(); ++ ++ /* ++ * Setup boot CPU information ++ */ ++ smp_store_boot_cpu_info(); /* Final full version of the data */ ++ cpumask_copy(cpu_callin_mask, cpumask_of(0)); ++ mb(); ++ ++ for_each_possible_cpu(i) { ++ zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL); ++ zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL); ++ zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL); ++ } ++ ++ /* ++ * Set 'default' x86 topology, this matches default_topology() in that ++ * it has NUMA nodes as a topology level. See also ++ * native_smp_cpus_done(). ++ * ++ * Must be done before set_cpus_sibling_map() is ran. ++ */ ++ set_sched_topology(x86_topology); ++ ++ set_cpu_sibling_map(0); ++ ++ smp_sanity_check(); ++ ++ switch (apic_intr_mode) { ++ case APIC_PIC: ++ case APIC_VIRTUAL_WIRE_NO_CONFIG: ++ disable_smp(); ++ return; ++ case APIC_SYMMETRIC_IO_NO_ROUTING: ++ disable_smp(); ++ /* Setup local timer */ ++ x86_init.timers.setup_percpu_clockev(); ++ return; ++ case APIC_VIRTUAL_WIRE: ++ case APIC_SYMMETRIC_IO: ++ break; ++ } ++ ++ /* Setup local timer */ ++ x86_init.timers.setup_percpu_clockev(); ++ ++ smp_get_logical_apicid(); ++ ++ pr_info("CPU0: "); ++ print_cpu_info(&cpu_data(0)); ++ ++ native_pv_lock_init(); ++ ++ uv_system_init(); ++ ++ set_mtrr_aps_delayed_init(); ++ ++ smp_quirk_init_udelay(); ++ ++ speculative_store_bypass_ht_init(); ++} ++ ++void arch_enable_nonboot_cpus_begin(void) ++{ ++ set_mtrr_aps_delayed_init(); ++} ++ ++void arch_enable_nonboot_cpus_end(void) ++{ ++ mtrr_aps_init(); ++} ++ ++/* ++ * Early setup to make printk work. ++ */ ++void __init native_smp_prepare_boot_cpu(void) ++{ ++ int me = smp_processor_id(); ++ switch_to_new_gdt(me); ++ /* already set me in cpu_online_mask in boot_cpu_init() */ ++ cpumask_set_cpu(me, cpu_callout_mask); ++ cpu_set_state_online(me); ++} ++ ++void __init calculate_max_logical_packages(void) ++{ ++ int ncpus; ++ ++ /* ++ * Today neither Intel nor AMD support heterogenous systems so ++ * extrapolate the boot cpu's data to all packages. ++ */ ++ ncpus = cpu_data(0).booted_cores * topology_max_smt_threads(); ++ __max_logical_packages = DIV_ROUND_UP(total_cpus, ncpus); ++ pr_info("Max logical packages: %u\n", __max_logical_packages); ++} ++ ++void __init native_smp_cpus_done(unsigned int max_cpus) ++{ ++ pr_debug("Boot done\n"); ++ ++ calculate_max_logical_packages(); ++ ++ if (x86_has_numa_in_package) ++ set_sched_topology(x86_numa_in_package_topology); ++ ++ nmi_selftest(); ++ impress_friends(); ++ mtrr_aps_init(); ++} ++ ++static int __initdata setup_possible_cpus = -1; ++static int __init _setup_possible_cpus(char *str) ++{ ++ get_option(&str, &setup_possible_cpus); ++ return 0; ++} ++early_param("possible_cpus", _setup_possible_cpus); ++ ++ ++/* ++ * cpu_possible_mask should be static, it cannot change as cpu's ++ * are onlined, or offlined. The reason is per-cpu data-structures ++ * are allocated by some modules at init time, and dont expect to ++ * do this dynamically on cpu arrival/departure. ++ * cpu_present_mask on the other hand can change dynamically. ++ * In case when cpu_hotplug is not compiled, then we resort to current ++ * behaviour, which is cpu_possible == cpu_present. ++ * - Ashok Raj ++ * ++ * Three ways to find out the number of additional hotplug CPUs: ++ * - If the BIOS specified disabled CPUs in ACPI/mptables use that. ++ * - The user can overwrite it with possible_cpus=NUM ++ * - Otherwise don't reserve additional CPUs. ++ * We do this because additional CPUs waste a lot of memory. ++ * -AK ++ */ ++__init void prefill_possible_map(void) ++{ ++ int i, possible; ++ ++ /* No boot processor was found in mptable or ACPI MADT */ ++ if (!num_processors) { ++ if (boot_cpu_has(X86_FEATURE_APIC)) { ++ int apicid = boot_cpu_physical_apicid; ++ int cpu = hard_smp_processor_id(); ++ ++ pr_warn("Boot CPU (id %d) not listed by BIOS\n", cpu); ++ ++ /* Make sure boot cpu is enumerated */ ++ if (apic->cpu_present_to_apicid(0) == BAD_APICID && ++ apic->apic_id_valid(apicid)) ++ generic_processor_info(apicid, boot_cpu_apic_version); ++ } ++ ++ if (!num_processors) ++ num_processors = 1; ++ } ++ ++ i = setup_max_cpus ?: 1; ++ if (setup_possible_cpus == -1) { ++ possible = num_processors; ++#ifdef CONFIG_HOTPLUG_CPU ++ if (setup_max_cpus) ++ possible += disabled_cpus; ++#else ++ if (possible > i) ++ possible = i; ++#endif ++ } else ++ possible = setup_possible_cpus; ++ ++ total_cpus = max_t(int, possible, num_processors + disabled_cpus); ++ ++ /* nr_cpu_ids could be reduced via nr_cpus= */ ++ if (possible > nr_cpu_ids) { ++ pr_warn("%d Processors exceeds NR_CPUS limit of %u\n", ++ possible, nr_cpu_ids); ++ possible = nr_cpu_ids; ++ } ++ ++#ifdef CONFIG_HOTPLUG_CPU ++ if (!setup_max_cpus) ++#endif ++ if (possible > i) { ++ pr_warn("%d Processors exceeds max_cpus limit of %u\n", ++ possible, setup_max_cpus); ++ possible = i; ++ } ++ ++ nr_cpu_ids = possible; ++ ++ pr_info("Allowing %d CPUs, %d hotplug CPUs\n", ++ possible, max_t(int, possible - num_processors, 0)); ++ ++ reset_cpu_possible_mask(); ++ ++ for (i = 0; i < possible; i++) ++ set_cpu_possible(i, true); ++} ++ ++#ifdef CONFIG_HOTPLUG_CPU ++ ++/* Recompute SMT state for all CPUs on offline */ ++static void recompute_smt_state(void) ++{ ++ int max_threads, cpu; ++ ++ max_threads = 0; ++ for_each_online_cpu (cpu) { ++ int threads = cpumask_weight(topology_sibling_cpumask(cpu)); ++ ++ if (threads > max_threads) ++ max_threads = threads; ++ } ++ __max_smt_threads = max_threads; ++} ++ ++static void remove_siblinginfo(int cpu) ++{ ++ int sibling; ++ struct cpuinfo_x86 *c = &cpu_data(cpu); ++ ++ for_each_cpu(sibling, topology_core_cpumask(cpu)) { ++ cpumask_clear_cpu(cpu, topology_core_cpumask(sibling)); ++ /*/ ++ * last thread sibling in this cpu core going down ++ */ ++ if (cpumask_weight(topology_sibling_cpumask(cpu)) == 1) ++ cpu_data(sibling).booted_cores--; ++ } ++ ++ for_each_cpu(sibling, topology_sibling_cpumask(cpu)) ++ cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling)); ++ for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) ++ cpumask_clear_cpu(cpu, cpu_llc_shared_mask(sibling)); ++ cpumask_clear(cpu_llc_shared_mask(cpu)); ++ cpumask_clear(topology_sibling_cpumask(cpu)); ++ cpumask_clear(topology_core_cpumask(cpu)); ++ c->cpu_core_id = 0; ++ c->booted_cores = 0; ++ cpumask_clear_cpu(cpu, cpu_sibling_setup_mask); ++ recompute_smt_state(); ++} ++ ++static void remove_cpu_from_maps(int cpu) ++{ ++ set_cpu_online(cpu, false); ++ cpumask_clear_cpu(cpu, cpu_callout_mask); ++ cpumask_clear_cpu(cpu, cpu_callin_mask); ++ /* was set by cpu_init() */ ++ cpumask_clear_cpu(cpu, cpu_initialized_mask); ++ numa_remove_cpu(cpu); ++} ++ ++void cpu_disable_common(void) ++{ ++ int cpu = smp_processor_id(); ++ ++ remove_siblinginfo(cpu); ++ ++ /* It's now safe to remove this processor from the online map */ ++ lock_vector_lock(); ++ remove_cpu_from_maps(cpu); ++ unlock_vector_lock(); ++ fixup_irqs(); ++ lapic_offline(); ++} ++ ++int native_cpu_disable(void) ++{ ++ int ret; ++ ++ ret = lapic_can_unplug_cpu(); ++ if (ret) ++ return ret; ++ ++ clear_local_APIC(); ++ cpu_disable_common(); ++ ++ return 0; ++} ++ ++int common_cpu_die(unsigned int cpu) ++{ ++ int ret = 0; ++ ++ /* We don't do anything here: idle task is faking death itself. */ ++ ++ /* They ack this in play_dead() by setting CPU_DEAD */ ++ if (cpu_wait_death(cpu, 5)) { ++ if (system_state == SYSTEM_RUNNING) ++ pr_info("CPU %u is now offline\n", cpu); ++ } else { ++ pr_err("CPU %u didn't die...\n", cpu); ++ ret = -1; ++ } ++ ++ return ret; ++} ++ ++void native_cpu_die(unsigned int cpu) ++{ ++ common_cpu_die(cpu); ++} ++ ++void play_dead_common(void) ++{ ++ idle_task_exit(); ++ ++ /* Ack it */ ++ (void)cpu_report_death(); ++ ++ /* ++ * With physical CPU hotplug, we should halt the cpu ++ */ ++ local_irq_disable(); ++} ++ ++static bool wakeup_cpu0(void) ++{ ++ if (smp_processor_id() == 0 && enable_start_cpu0) ++ return true; ++ ++ return false; ++} ++ ++/* ++ * We need to flush the caches before going to sleep, lest we have ++ * dirty data in our caches when we come back up. ++ */ ++static inline void mwait_play_dead(void) ++{ ++ unsigned int eax, ebx, ecx, edx; ++ unsigned int highest_cstate = 0; ++ unsigned int highest_subcstate = 0; ++ void *mwait_ptr; ++ int i; ++ ++ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || ++ boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) ++ return; ++ if (!this_cpu_has(X86_FEATURE_MWAIT)) ++ return; ++ if (!this_cpu_has(X86_FEATURE_CLFLUSH)) ++ return; ++ if (__this_cpu_read(cpu_info.cpuid_level) < CPUID_MWAIT_LEAF) ++ return; ++ ++ eax = CPUID_MWAIT_LEAF; ++ ecx = 0; ++ native_cpuid(&eax, &ebx, &ecx, &edx); ++ ++ /* ++ * eax will be 0 if EDX enumeration is not valid. ++ * Initialized below to cstate, sub_cstate value when EDX is valid. ++ */ ++ if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED)) { ++ eax = 0; ++ } else { ++ edx >>= MWAIT_SUBSTATE_SIZE; ++ for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) { ++ if (edx & MWAIT_SUBSTATE_MASK) { ++ highest_cstate = i; ++ highest_subcstate = edx & MWAIT_SUBSTATE_MASK; ++ } ++ } ++ eax = (highest_cstate << MWAIT_SUBSTATE_SIZE) | ++ (highest_subcstate - 1); ++ } ++ ++ /* ++ * This should be a memory location in a cache line which is ++ * unlikely to be touched by other processors. The actual ++ * content is immaterial as it is not actually modified in any way. ++ */ ++ mwait_ptr = ¤t_thread_info()->flags; ++ ++ wbinvd(); ++ ++ while (1) { ++ /* ++ * The CLFLUSH is a workaround for erratum AAI65 for ++ * the Xeon 7400 series. It's not clear it is actually ++ * needed, but it should be harmless in either case. ++ * The WBINVD is insufficient due to the spurious-wakeup ++ * case where we return around the loop. ++ */ ++ mb(); ++ clflush(mwait_ptr); ++ mb(); ++ __monitor(mwait_ptr, 0, 0); ++ mb(); ++ __mwait(eax, 0); ++ /* ++ * If NMI wants to wake up CPU0, start CPU0. ++ */ ++ if (wakeup_cpu0()) ++ start_cpu0(); ++ } ++} ++ ++void hlt_play_dead(void) ++{ ++ if (__this_cpu_read(cpu_info.x86) >= 4) ++ wbinvd(); ++ ++ while (1) { ++ native_halt(); ++ /* ++ * If NMI wants to wake up CPU0, start CPU0. ++ */ ++ if (wakeup_cpu0()) ++ start_cpu0(); ++ } ++} ++ ++void native_play_dead(void) ++{ ++ play_dead_common(); ++ tboot_shutdown(TB_SHUTDOWN_WFS); ++ ++ mwait_play_dead(); /* Only returns on failure */ ++ if (cpuidle_play_dead()) ++ hlt_play_dead(); ++} ++ ++#else /* ... !CONFIG_HOTPLUG_CPU */ ++int native_cpu_disable(void) ++{ ++ return -ENOSYS; ++} ++ ++void native_cpu_die(unsigned int cpu) ++{ ++ /* We said "no" in __cpu_disable */ ++ BUG(); ++} ++ ++void native_play_dead(void) ++{ ++ BUG(); ++} ++ ++#endif +diff -uprN kernel/arch/x86/kernel/smp.c kernel_new/arch/x86/kernel/smp.c +--- kernel/arch/x86/kernel/smp.c 2020-12-21 21:59:17.000000000 +0800 ++++ kernel_new/arch/x86/kernel/smp.c 2021-04-01 18:28:07.657863285 +0800 +@@ -255,10 +255,10 @@ static void native_stop_other_cpus(int w + udelay(1); + } + +- local_irq_save(flags); ++ flags = hard_local_irq_save(); + disable_local_APIC(); + mcheck_cpu_clear(this_cpu_ptr(&cpu_info)); +- local_irq_restore(flags); ++ hard_local_irq_restore(flags); + } + + /* +diff -uprN kernel/arch/x86/kernel/traps.c kernel_new/arch/x86/kernel/traps.c +--- kernel/arch/x86/kernel/traps.c 2020-12-21 21:59:17.000000000 +0800 ++++ kernel_new/arch/x86/kernel/traps.c 2021-04-01 18:28:07.657863285 +0800 +@@ -14,6 +14,7 @@ + + #include + #include ++#include + #include + #include + #include +@@ -77,13 +78,13 @@ DECLARE_BITMAP(system_vectors, NR_VECTOR + static inline void cond_local_irq_enable(struct pt_regs *regs) + { + if (regs->flags & X86_EFLAGS_IF) +- local_irq_enable(); ++ hard_local_irq_enable_notrace(); + } + + static inline void cond_local_irq_disable(struct pt_regs *regs) + { + if (regs->flags & X86_EFLAGS_IF) +- local_irq_disable(); ++ hard_local_irq_disable_notrace(); + } + + /* +@@ -544,7 +545,7 @@ do_general_protection(struct pt_regs *re + } + + if (v8086_mode(regs)) { +- local_irq_enable(); ++ hard_local_irq_enable(); + handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code); + return; + } +@@ -931,7 +932,7 @@ dotraplinkage void do_iret_error(struct + siginfo_t info; + + RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); +- local_irq_enable(); ++ hard_local_irq_enable(); + + clear_siginfo(&info); + info.si_signo = SIGILL; +diff -uprN kernel/arch/x86/kernel/tsc.c kernel_new/arch/x86/kernel/tsc.c +--- kernel/arch/x86/kernel/tsc.c 2020-12-21 21:59:17.000000000 +0800 ++++ kernel_new/arch/x86/kernel/tsc.c 2021-04-01 18:28:07.657863285 +0800 +@@ -731,11 +731,11 @@ static unsigned long pit_hpet_ptimer_cal + * calibration, which will take at least 50ms, and + * read the end value. + */ +- local_irq_save(flags); ++ flags = hard_local_irq_save(); + tsc1 = tsc_read_refs(&ref1, hpet); + tsc_pit_khz = pit_calibrate_tsc(latch, ms, loopmin); + tsc2 = tsc_read_refs(&ref2, hpet); +- local_irq_restore(flags); ++ hard_local_irq_restore(flags); + + /* Pick the lowest PIT TSC calibration so far */ + tsc_pit_min = min(tsc_pit_min, tsc_pit_khz); +@@ -844,9 +844,9 @@ unsigned long native_calibrate_cpu_early + if (!fast_calibrate) + fast_calibrate = cpu_khz_from_msr(); + if (!fast_calibrate) { +- local_irq_save(flags); ++ flags = hard_local_irq_save(); + fast_calibrate = quick_pit_calibrate(); +- local_irq_restore(flags); ++ hard_local_irq_restore(flags); + } + return fast_calibrate; + } +@@ -1109,7 +1109,7 @@ static struct clocksource clocksource_ts + * this one will immediately take over. We will only register if TSC has + * been found good. + */ +-static struct clocksource clocksource_tsc = { ++struct clocksource clocksource_tsc = { + .name = "tsc", + .rating = 300, + .read = read_tsc, +diff -uprN kernel/arch/x86/kernel/vm86_32.c kernel_new/arch/x86/kernel/vm86_32.c +--- kernel/arch/x86/kernel/vm86_32.c 2020-12-21 21:59:17.000000000 +0800 ++++ kernel_new/arch/x86/kernel/vm86_32.c 2021-04-01 18:28:07.657863285 +0800 +@@ -147,12 +147,14 @@ void save_v86_state(struct kernel_vm86_r + } + + preempt_disable(); ++ hard_cond_local_irq_disable(); + tsk->thread.sp0 = vm86->saved_sp0; + tsk->thread.sysenter_cs = __KERNEL_CS; + update_task_stack(tsk); + refresh_sysenter_cs(&tsk->thread); + vm86->saved_sp0 = 0; + preempt_enable(); ++ hard_cond_local_irq_enable(); + + memcpy(®s->pt, &vm86->regs32, sizeof(struct pt_regs)); + +@@ -365,6 +367,7 @@ static long do_sys_vm86(struct vm86plus_ + vm86->saved_sp0 = tsk->thread.sp0; + lazy_save_gs(vm86->regs32.gs); + ++ hard_cond_local_irq_disable(); + /* make room for real-mode segments */ + preempt_disable(); + tsk->thread.sp0 += 16; +@@ -376,6 +379,7 @@ static long do_sys_vm86(struct vm86plus_ + + update_task_stack(tsk); + preempt_enable(); ++ hard_cond_local_irq_enable(); + + if (vm86->flags & VM86_SCREEN_BITMAP) + mark_screen_rdonly(tsk->mm); +diff -uprN kernel/arch/x86/kvm/svm.c kernel_new/arch/x86/kvm/svm.c +--- kernel/arch/x86/kvm/svm.c 2020-12-21 21:59:17.000000000 +0800 ++++ kernel_new/arch/x86/kvm/svm.c 2021-04-01 18:28:07.657863285 +0800 +@@ -5637,7 +5637,7 @@ static void svm_vcpu_run(struct kvm_vcpu + */ + x86_spec_ctrl_set_guest(svm->spec_ctrl, svm->virt_spec_ctrl); + +- local_irq_enable(); ++ hard_local_irq_enable(); + + asm volatile ( + "push %%" _ASM_BP "; \n\t" +@@ -5763,7 +5763,7 @@ static void svm_vcpu_run(struct kvm_vcpu + + reload_tss(vcpu); + +- local_irq_disable(); ++ hard_local_irq_disable(); + + x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl); + +@@ -6150,6 +6150,7 @@ out: + + static void svm_handle_external_intr(struct kvm_vcpu *vcpu) + { ++ hard_cond_local_irq_enable(); + local_irq_enable(); + /* + * We must have an instruction with interrupts enabled, so +diff -uprN kernel/arch/x86/kvm/svm.c.orig kernel_new/arch/x86/kvm/svm.c.orig +--- kernel/arch/x86/kvm/svm.c.orig 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/arch/x86/kvm/svm.c.orig 2020-12-21 21:59:17.000000000 +0800 +@@ -0,0 +1,7244 @@ ++/* ++ * Kernel-based Virtual Machine driver for Linux ++ * ++ * AMD SVM support ++ * ++ * Copyright (C) 2006 Qumranet, Inc. ++ * Copyright 2010 Red Hat, Inc. and/or its affiliates. ++ * ++ * Authors: ++ * Yaniv Kamay ++ * Avi Kivity ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2. See ++ * the COPYING file in the top-level directory. ++ * ++ */ ++ ++#define pr_fmt(fmt) "SVM: " fmt ++ ++#include ++ ++#include "irq.h" ++#include "mmu.h" ++#include "kvm_cache_regs.h" ++#include "x86.h" ++#include "cpuid.h" ++#include "pmu.h" ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include "trace.h" ++ ++#define __ex(x) __kvm_handle_fault_on_reboot(x) ++ ++MODULE_AUTHOR("Qumranet"); ++MODULE_LICENSE("GPL"); ++ ++static const struct x86_cpu_id svm_cpu_id[] = { ++ X86_FEATURE_MATCH(X86_FEATURE_SVM), ++ {} ++}; ++MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id); ++ ++#define IOPM_ALLOC_ORDER 2 ++#define MSRPM_ALLOC_ORDER 1 ++ ++#define SEG_TYPE_LDT 2 ++#define SEG_TYPE_BUSY_TSS16 3 ++ ++#define SVM_FEATURE_NPT (1 << 0) ++#define SVM_FEATURE_LBRV (1 << 1) ++#define SVM_FEATURE_SVML (1 << 2) ++#define SVM_FEATURE_NRIP (1 << 3) ++#define SVM_FEATURE_TSC_RATE (1 << 4) ++#define SVM_FEATURE_VMCB_CLEAN (1 << 5) ++#define SVM_FEATURE_FLUSH_ASID (1 << 6) ++#define SVM_FEATURE_DECODE_ASSIST (1 << 7) ++#define SVM_FEATURE_PAUSE_FILTER (1 << 10) ++ ++#define SVM_AVIC_DOORBELL 0xc001011b ++ ++#define NESTED_EXIT_HOST 0 /* Exit handled on host level */ ++#define NESTED_EXIT_DONE 1 /* Exit caused nested vmexit */ ++#define NESTED_EXIT_CONTINUE 2 /* Further checks needed */ ++ ++#define DEBUGCTL_RESERVED_BITS (~(0x3fULL)) ++ ++#define TSC_RATIO_RSVD 0xffffff0000000000ULL ++#define TSC_RATIO_MIN 0x0000000000000001ULL ++#define TSC_RATIO_MAX 0x000000ffffffffffULL ++ ++#define AVIC_HPA_MASK ~((0xFFFULL << 52) | 0xFFF) ++ ++/* ++ * 0xff is broadcast, so the max index allowed for physical APIC ID ++ * table is 0xfe. APIC IDs above 0xff are reserved. ++ */ ++#define AVIC_MAX_PHYSICAL_ID_COUNT 255 ++ ++#define AVIC_UNACCEL_ACCESS_WRITE_MASK 1 ++#define AVIC_UNACCEL_ACCESS_OFFSET_MASK 0xFF0 ++#define AVIC_UNACCEL_ACCESS_VECTOR_MASK 0xFFFFFFFF ++ ++/* AVIC GATAG is encoded using VM and VCPU IDs */ ++#define AVIC_VCPU_ID_BITS 8 ++#define AVIC_VCPU_ID_MASK ((1 << AVIC_VCPU_ID_BITS) - 1) ++ ++#define AVIC_VM_ID_BITS 24 ++#define AVIC_VM_ID_NR (1 << AVIC_VM_ID_BITS) ++#define AVIC_VM_ID_MASK ((1 << AVIC_VM_ID_BITS) - 1) ++ ++#define AVIC_GATAG(x, y) (((x & AVIC_VM_ID_MASK) << AVIC_VCPU_ID_BITS) | \ ++ (y & AVIC_VCPU_ID_MASK)) ++#define AVIC_GATAG_TO_VMID(x) ((x >> AVIC_VCPU_ID_BITS) & AVIC_VM_ID_MASK) ++#define AVIC_GATAG_TO_VCPUID(x) (x & AVIC_VCPU_ID_MASK) ++ ++static bool erratum_383_found __read_mostly; ++ ++static const u32 host_save_user_msrs[] = { ++#ifdef CONFIG_X86_64 ++ MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE, ++ MSR_FS_BASE, ++#endif ++ MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, ++ MSR_TSC_AUX, ++}; ++ ++#define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs) ++ ++struct kvm_sev_info { ++ bool active; /* SEV enabled guest */ ++ unsigned int asid; /* ASID used for this guest */ ++ unsigned int handle; /* SEV firmware handle */ ++ int fd; /* SEV device fd */ ++ unsigned long pages_locked; /* Number of pages locked */ ++ struct list_head regions_list; /* List of registered regions */ ++}; ++ ++struct kvm_svm { ++ struct kvm kvm; ++ ++ /* Struct members for AVIC */ ++ u32 avic_vm_id; ++ u32 ldr_mode; ++ struct page *avic_logical_id_table_page; ++ struct page *avic_physical_id_table_page; ++ struct hlist_node hnode; ++ ++ struct kvm_sev_info sev_info; ++}; ++ ++struct kvm_vcpu; ++ ++struct nested_state { ++ struct vmcb *hsave; ++ u64 hsave_msr; ++ u64 vm_cr_msr; ++ u64 vmcb; ++ ++ /* These are the merged vectors */ ++ u32 *msrpm; ++ ++ /* gpa pointers to the real vectors */ ++ u64 vmcb_msrpm; ++ u64 vmcb_iopm; ++ ++ /* A VMEXIT is required but not yet emulated */ ++ bool exit_required; ++ ++ /* cache for intercepts of the guest */ ++ u32 intercept_cr; ++ u32 intercept_dr; ++ u32 intercept_exceptions; ++ u64 intercept; ++ ++ /* Nested Paging related state */ ++ u64 nested_cr3; ++}; ++ ++#define MSRPM_OFFSETS 16 ++static u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly; ++ ++/* ++ * Set osvw_len to higher value when updated Revision Guides ++ * are published and we know what the new status bits are ++ */ ++static uint64_t osvw_len = 4, osvw_status; ++ ++struct vcpu_svm { ++ struct kvm_vcpu vcpu; ++ struct vmcb *vmcb; ++ unsigned long vmcb_pa; ++ struct svm_cpu_data *svm_data; ++ uint64_t asid_generation; ++ uint64_t sysenter_esp; ++ uint64_t sysenter_eip; ++ uint64_t tsc_aux; ++ ++ u64 msr_decfg; ++ ++ u64 next_rip; ++ ++ u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS]; ++ struct { ++ u16 fs; ++ u16 gs; ++ u16 ldt; ++ u64 gs_base; ++ } host; ++ ++ u64 spec_ctrl; ++ /* ++ * Contains guest-controlled bits of VIRT_SPEC_CTRL, which will be ++ * translated into the appropriate L2_CFG bits on the host to ++ * perform speculative control. ++ */ ++ u64 virt_spec_ctrl; ++ ++ u32 *msrpm; ++ ++ ulong nmi_iret_rip; ++ ++ struct nested_state nested; ++ ++ bool nmi_singlestep; ++ u64 nmi_singlestep_guest_rflags; ++ ++ unsigned int3_injected; ++ unsigned long int3_rip; ++ ++ /* cached guest cpuid flags for faster access */ ++ bool nrips_enabled : 1; ++ ++ u32 ldr_reg; ++ struct page *avic_backing_page; ++ u64 *avic_physical_id_cache; ++ bool avic_is_running; ++ ++ /* ++ * Per-vcpu list of struct amd_svm_iommu_ir: ++ * This is used mainly to store interrupt remapping information used ++ * when update the vcpu affinity. This avoids the need to scan for ++ * IRTE and try to match ga_tag in the IOMMU driver. ++ */ ++ struct list_head ir_list; ++ spinlock_t ir_list_lock; ++ ++ /* which host CPU was used for running this vcpu */ ++ unsigned int last_cpu; ++}; ++ ++/* ++ * This is a wrapper of struct amd_iommu_ir_data. ++ */ ++struct amd_svm_iommu_ir { ++ struct list_head node; /* Used by SVM for per-vcpu ir_list */ ++ void *data; /* Storing pointer to struct amd_ir_data */ ++}; ++ ++#define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK (0xFF) ++#define AVIC_LOGICAL_ID_ENTRY_VALID_MASK (1 << 31) ++ ++#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK (0xFFULL) ++#define AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK (0xFFFFFFFFFFULL << 12) ++#define AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK (1ULL << 62) ++#define AVIC_PHYSICAL_ID_ENTRY_VALID_MASK (1ULL << 63) ++ ++static DEFINE_PER_CPU(u64, current_tsc_ratio); ++#define TSC_RATIO_DEFAULT 0x0100000000ULL ++ ++#define MSR_INVALID 0xffffffffU ++ ++static const struct svm_direct_access_msrs { ++ u32 index; /* Index of the MSR */ ++ bool always; /* True if intercept is always on */ ++} direct_access_msrs[] = { ++ { .index = MSR_STAR, .always = true }, ++ { .index = MSR_IA32_SYSENTER_CS, .always = true }, ++#ifdef CONFIG_X86_64 ++ { .index = MSR_GS_BASE, .always = true }, ++ { .index = MSR_FS_BASE, .always = true }, ++ { .index = MSR_KERNEL_GS_BASE, .always = true }, ++ { .index = MSR_LSTAR, .always = true }, ++ { .index = MSR_CSTAR, .always = true }, ++ { .index = MSR_SYSCALL_MASK, .always = true }, ++#endif ++ { .index = MSR_IA32_SPEC_CTRL, .always = false }, ++ { .index = MSR_IA32_PRED_CMD, .always = false }, ++ { .index = MSR_IA32_LASTBRANCHFROMIP, .always = false }, ++ { .index = MSR_IA32_LASTBRANCHTOIP, .always = false }, ++ { .index = MSR_IA32_LASTINTFROMIP, .always = false }, ++ { .index = MSR_IA32_LASTINTTOIP, .always = false }, ++ { .index = MSR_INVALID, .always = false }, ++}; ++ ++/* enable NPT for AMD64 and X86 with PAE */ ++#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) ++static bool npt_enabled = true; ++#else ++static bool npt_enabled; ++#endif ++ ++/* ++ * These 2 parameters are used to config the controls for Pause-Loop Exiting: ++ * pause_filter_count: On processors that support Pause filtering(indicated ++ * by CPUID Fn8000_000A_EDX), the VMCB provides a 16 bit pause filter ++ * count value. On VMRUN this value is loaded into an internal counter. ++ * Each time a pause instruction is executed, this counter is decremented ++ * until it reaches zero at which time a #VMEXIT is generated if pause ++ * intercept is enabled. Refer to AMD APM Vol 2 Section 15.14.4 Pause ++ * Intercept Filtering for more details. ++ * This also indicate if ple logic enabled. ++ * ++ * pause_filter_thresh: In addition, some processor families support advanced ++ * pause filtering (indicated by CPUID Fn8000_000A_EDX) upper bound on ++ * the amount of time a guest is allowed to execute in a pause loop. ++ * In this mode, a 16-bit pause filter threshold field is added in the ++ * VMCB. The threshold value is a cycle count that is used to reset the ++ * pause counter. As with simple pause filtering, VMRUN loads the pause ++ * count value from VMCB into an internal counter. Then, on each pause ++ * instruction the hardware checks the elapsed number of cycles since ++ * the most recent pause instruction against the pause filter threshold. ++ * If the elapsed cycle count is greater than the pause filter threshold, ++ * then the internal pause count is reloaded from the VMCB and execution ++ * continues. If the elapsed cycle count is less than the pause filter ++ * threshold, then the internal pause count is decremented. If the count ++ * value is less than zero and PAUSE intercept is enabled, a #VMEXIT is ++ * triggered. If advanced pause filtering is supported and pause filter ++ * threshold field is set to zero, the filter will operate in the simpler, ++ * count only mode. ++ */ ++ ++static unsigned short pause_filter_thresh = KVM_DEFAULT_PLE_GAP; ++module_param(pause_filter_thresh, ushort, 0444); ++ ++static unsigned short pause_filter_count = KVM_SVM_DEFAULT_PLE_WINDOW; ++module_param(pause_filter_count, ushort, 0444); ++ ++/* Default doubles per-vcpu window every exit. */ ++static unsigned short pause_filter_count_grow = KVM_DEFAULT_PLE_WINDOW_GROW; ++module_param(pause_filter_count_grow, ushort, 0444); ++ ++/* Default resets per-vcpu window every exit to pause_filter_count. */ ++static unsigned short pause_filter_count_shrink = KVM_DEFAULT_PLE_WINDOW_SHRINK; ++module_param(pause_filter_count_shrink, ushort, 0444); ++ ++/* Default is to compute the maximum so we can never overflow. */ ++static unsigned short pause_filter_count_max = KVM_SVM_DEFAULT_PLE_WINDOW_MAX; ++module_param(pause_filter_count_max, ushort, 0444); ++ ++/* allow nested paging (virtualized MMU) for all guests */ ++static int npt = true; ++module_param(npt, int, S_IRUGO); ++ ++/* allow nested virtualization in KVM/SVM */ ++static int nested = true; ++module_param(nested, int, S_IRUGO); ++ ++/* enable / disable AVIC */ ++static int avic; ++#ifdef CONFIG_X86_LOCAL_APIC ++module_param(avic, int, S_IRUGO); ++#endif ++ ++/* enable/disable Virtual VMLOAD VMSAVE */ ++static int vls = true; ++module_param(vls, int, 0444); ++ ++/* enable/disable Virtual GIF */ ++static int vgif = true; ++module_param(vgif, int, 0444); ++ ++/* enable/disable SEV support */ ++static int sev = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT); ++module_param(sev, int, 0444); ++ ++static u8 rsm_ins_bytes[] = "\x0f\xaa"; ++ ++static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); ++static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa); ++static void svm_complete_interrupts(struct vcpu_svm *svm); ++ ++static int nested_svm_exit_handled(struct vcpu_svm *svm); ++static int nested_svm_intercept(struct vcpu_svm *svm); ++static int nested_svm_vmexit(struct vcpu_svm *svm); ++static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, ++ bool has_error_code, u32 error_code); ++ ++enum { ++ VMCB_INTERCEPTS, /* Intercept vectors, TSC offset, ++ pause filter count */ ++ VMCB_PERM_MAP, /* IOPM Base and MSRPM Base */ ++ VMCB_ASID, /* ASID */ ++ VMCB_INTR, /* int_ctl, int_vector */ ++ VMCB_NPT, /* npt_en, nCR3, gPAT */ ++ VMCB_CR, /* CR0, CR3, CR4, EFER */ ++ VMCB_DR, /* DR6, DR7 */ ++ VMCB_DT, /* GDT, IDT */ ++ VMCB_SEG, /* CS, DS, SS, ES, CPL */ ++ VMCB_CR2, /* CR2 only */ ++ VMCB_LBR, /* DBGCTL, BR_FROM, BR_TO, LAST_EX_FROM, LAST_EX_TO */ ++ VMCB_AVIC, /* AVIC APIC_BAR, AVIC APIC_BACKING_PAGE, ++ * AVIC PHYSICAL_TABLE pointer, ++ * AVIC LOGICAL_TABLE pointer ++ */ ++ VMCB_DIRTY_MAX, ++}; ++ ++/* TPR and CR2 are always written before VMRUN */ ++#define VMCB_ALWAYS_DIRTY_MASK ((1U << VMCB_INTR) | (1U << VMCB_CR2)) ++ ++#define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL ++ ++static unsigned int max_sev_asid; ++static unsigned int min_sev_asid; ++static unsigned long *sev_asid_bitmap; ++#define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT) ++ ++struct enc_region { ++ struct list_head list; ++ unsigned long npages; ++ struct page **pages; ++ unsigned long uaddr; ++ unsigned long size; ++}; ++ ++ ++static inline struct kvm_svm *to_kvm_svm(struct kvm *kvm) ++{ ++ return container_of(kvm, struct kvm_svm, kvm); ++} ++ ++static inline bool svm_sev_enabled(void) ++{ ++ return IS_ENABLED(CONFIG_KVM_AMD_SEV) ? max_sev_asid : 0; ++} ++ ++static inline bool sev_guest(struct kvm *kvm) ++{ ++#ifdef CONFIG_KVM_AMD_SEV ++ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; ++ ++ return sev->active; ++#else ++ return false; ++#endif ++} ++ ++static inline int sev_get_asid(struct kvm *kvm) ++{ ++ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; ++ ++ return sev->asid; ++} ++ ++static inline void mark_all_dirty(struct vmcb *vmcb) ++{ ++ vmcb->control.clean = 0; ++} ++ ++static inline void mark_all_clean(struct vmcb *vmcb) ++{ ++ vmcb->control.clean = ((1 << VMCB_DIRTY_MAX) - 1) ++ & ~VMCB_ALWAYS_DIRTY_MASK; ++} ++ ++static inline void mark_dirty(struct vmcb *vmcb, int bit) ++{ ++ vmcb->control.clean &= ~(1 << bit); ++} ++ ++static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu) ++{ ++ return container_of(vcpu, struct vcpu_svm, vcpu); ++} ++ ++static inline void avic_update_vapic_bar(struct vcpu_svm *svm, u64 data) ++{ ++ svm->vmcb->control.avic_vapic_bar = data & VMCB_AVIC_APIC_BAR_MASK; ++ mark_dirty(svm->vmcb, VMCB_AVIC); ++} ++ ++static inline bool avic_vcpu_is_running(struct kvm_vcpu *vcpu) ++{ ++ struct vcpu_svm *svm = to_svm(vcpu); ++ u64 *entry = svm->avic_physical_id_cache; ++ ++ if (!entry) ++ return false; ++ ++ return (READ_ONCE(*entry) & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK); ++} ++ ++static void recalc_intercepts(struct vcpu_svm *svm) ++{ ++ struct vmcb_control_area *c, *h; ++ struct nested_state *g; ++ ++ mark_dirty(svm->vmcb, VMCB_INTERCEPTS); ++ ++ if (!is_guest_mode(&svm->vcpu)) ++ return; ++ ++ c = &svm->vmcb->control; ++ h = &svm->nested.hsave->control; ++ g = &svm->nested; ++ ++ c->intercept_cr = h->intercept_cr | g->intercept_cr; ++ c->intercept_dr = h->intercept_dr | g->intercept_dr; ++ c->intercept_exceptions = h->intercept_exceptions | g->intercept_exceptions; ++ c->intercept = h->intercept | g->intercept; ++} ++ ++static inline struct vmcb *get_host_vmcb(struct vcpu_svm *svm) ++{ ++ if (is_guest_mode(&svm->vcpu)) ++ return svm->nested.hsave; ++ else ++ return svm->vmcb; ++} ++ ++static inline void set_cr_intercept(struct vcpu_svm *svm, int bit) ++{ ++ struct vmcb *vmcb = get_host_vmcb(svm); ++ ++ vmcb->control.intercept_cr |= (1U << bit); ++ ++ recalc_intercepts(svm); ++} ++ ++static inline void clr_cr_intercept(struct vcpu_svm *svm, int bit) ++{ ++ struct vmcb *vmcb = get_host_vmcb(svm); ++ ++ vmcb->control.intercept_cr &= ~(1U << bit); ++ ++ recalc_intercepts(svm); ++} ++ ++static inline bool is_cr_intercept(struct vcpu_svm *svm, int bit) ++{ ++ struct vmcb *vmcb = get_host_vmcb(svm); ++ ++ return vmcb->control.intercept_cr & (1U << bit); ++} ++ ++static inline void set_dr_intercepts(struct vcpu_svm *svm) ++{ ++ struct vmcb *vmcb = get_host_vmcb(svm); ++ ++ vmcb->control.intercept_dr = (1 << INTERCEPT_DR0_READ) ++ | (1 << INTERCEPT_DR1_READ) ++ | (1 << INTERCEPT_DR2_READ) ++ | (1 << INTERCEPT_DR3_READ) ++ | (1 << INTERCEPT_DR4_READ) ++ | (1 << INTERCEPT_DR5_READ) ++ | (1 << INTERCEPT_DR6_READ) ++ | (1 << INTERCEPT_DR7_READ) ++ | (1 << INTERCEPT_DR0_WRITE) ++ | (1 << INTERCEPT_DR1_WRITE) ++ | (1 << INTERCEPT_DR2_WRITE) ++ | (1 << INTERCEPT_DR3_WRITE) ++ | (1 << INTERCEPT_DR4_WRITE) ++ | (1 << INTERCEPT_DR5_WRITE) ++ | (1 << INTERCEPT_DR6_WRITE) ++ | (1 << INTERCEPT_DR7_WRITE); ++ ++ recalc_intercepts(svm); ++} ++ ++static inline void clr_dr_intercepts(struct vcpu_svm *svm) ++{ ++ struct vmcb *vmcb = get_host_vmcb(svm); ++ ++ vmcb->control.intercept_dr = 0; ++ ++ recalc_intercepts(svm); ++} ++ ++static inline void set_exception_intercept(struct vcpu_svm *svm, int bit) ++{ ++ struct vmcb *vmcb = get_host_vmcb(svm); ++ ++ vmcb->control.intercept_exceptions |= (1U << bit); ++ ++ recalc_intercepts(svm); ++} ++ ++static inline void clr_exception_intercept(struct vcpu_svm *svm, int bit) ++{ ++ struct vmcb *vmcb = get_host_vmcb(svm); ++ ++ vmcb->control.intercept_exceptions &= ~(1U << bit); ++ ++ recalc_intercepts(svm); ++} ++ ++static inline void set_intercept(struct vcpu_svm *svm, int bit) ++{ ++ struct vmcb *vmcb = get_host_vmcb(svm); ++ ++ vmcb->control.intercept |= (1ULL << bit); ++ ++ recalc_intercepts(svm); ++} ++ ++static inline void clr_intercept(struct vcpu_svm *svm, int bit) ++{ ++ struct vmcb *vmcb = get_host_vmcb(svm); ++ ++ vmcb->control.intercept &= ~(1ULL << bit); ++ ++ recalc_intercepts(svm); ++} ++ ++static inline bool vgif_enabled(struct vcpu_svm *svm) ++{ ++ return !!(svm->vmcb->control.int_ctl & V_GIF_ENABLE_MASK); ++} ++ ++static inline void enable_gif(struct vcpu_svm *svm) ++{ ++ if (vgif_enabled(svm)) ++ svm->vmcb->control.int_ctl |= V_GIF_MASK; ++ else ++ svm->vcpu.arch.hflags |= HF_GIF_MASK; ++} ++ ++static inline void disable_gif(struct vcpu_svm *svm) ++{ ++ if (vgif_enabled(svm)) ++ svm->vmcb->control.int_ctl &= ~V_GIF_MASK; ++ else ++ svm->vcpu.arch.hflags &= ~HF_GIF_MASK; ++} ++ ++static inline bool gif_set(struct vcpu_svm *svm) ++{ ++ if (vgif_enabled(svm)) ++ return !!(svm->vmcb->control.int_ctl & V_GIF_MASK); ++ else ++ return !!(svm->vcpu.arch.hflags & HF_GIF_MASK); ++} ++ ++static unsigned long iopm_base; ++ ++struct kvm_ldttss_desc { ++ u16 limit0; ++ u16 base0; ++ unsigned base1:8, type:5, dpl:2, p:1; ++ unsigned limit1:4, zero0:3, g:1, base2:8; ++ u32 base3; ++ u32 zero1; ++} __attribute__((packed)); ++ ++struct svm_cpu_data { ++ int cpu; ++ ++ u64 asid_generation; ++ u32 max_asid; ++ u32 next_asid; ++ u32 min_asid; ++ struct kvm_ldttss_desc *tss_desc; ++ ++ struct page *save_area; ++ struct vmcb *current_vmcb; ++ ++ /* index = sev_asid, value = vmcb pointer */ ++ struct vmcb **sev_vmcbs; ++}; ++ ++static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data); ++ ++struct svm_init_data { ++ int cpu; ++ int r; ++}; ++ ++static const u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000}; ++ ++#define NUM_MSR_MAPS ARRAY_SIZE(msrpm_ranges) ++#define MSRS_RANGE_SIZE 2048 ++#define MSRS_IN_RANGE (MSRS_RANGE_SIZE * 8 / 2) ++ ++static u32 svm_msrpm_offset(u32 msr) ++{ ++ u32 offset; ++ int i; ++ ++ for (i = 0; i < NUM_MSR_MAPS; i++) { ++ if (msr < msrpm_ranges[i] || ++ msr >= msrpm_ranges[i] + MSRS_IN_RANGE) ++ continue; ++ ++ offset = (msr - msrpm_ranges[i]) / 4; /* 4 msrs per u8 */ ++ offset += (i * MSRS_RANGE_SIZE); /* add range offset */ ++ ++ /* Now we have the u8 offset - but need the u32 offset */ ++ return offset / 4; ++ } ++ ++ /* MSR not in any range */ ++ return MSR_INVALID; ++} ++ ++#define MAX_INST_SIZE 15 ++ ++static inline void clgi(void) ++{ ++ asm volatile (__ex(SVM_CLGI)); ++} ++ ++static inline void stgi(void) ++{ ++ asm volatile (__ex(SVM_STGI)); ++} ++ ++static inline void invlpga(unsigned long addr, u32 asid) ++{ ++ asm volatile (__ex(SVM_INVLPGA) : : "a"(addr), "c"(asid)); ++} ++ ++static int get_npt_level(struct kvm_vcpu *vcpu) ++{ ++#ifdef CONFIG_X86_64 ++ return PT64_ROOT_4LEVEL; ++#else ++ return PT32E_ROOT_LEVEL; ++#endif ++} ++ ++static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) ++{ ++ vcpu->arch.efer = efer; ++ ++ if (!npt_enabled) { ++ /* Shadow paging assumes NX to be available. */ ++ efer |= EFER_NX; ++ ++ if (!(efer & EFER_LMA)) ++ efer &= ~EFER_LME; ++ } ++ ++ to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME; ++ mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR); ++} ++ ++static int is_external_interrupt(u32 info) ++{ ++ info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID; ++ return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR); ++} ++ ++static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu) ++{ ++ struct vcpu_svm *svm = to_svm(vcpu); ++ u32 ret = 0; ++ ++ if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) ++ ret = KVM_X86_SHADOW_INT_STI | KVM_X86_SHADOW_INT_MOV_SS; ++ return ret; ++} ++ ++static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) ++{ ++ struct vcpu_svm *svm = to_svm(vcpu); ++ ++ if (mask == 0) ++ svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK; ++ else ++ svm->vmcb->control.int_state |= SVM_INTERRUPT_SHADOW_MASK; ++ ++} ++ ++static void skip_emulated_instruction(struct kvm_vcpu *vcpu) ++{ ++ struct vcpu_svm *svm = to_svm(vcpu); ++ ++ if (svm->vmcb->control.next_rip != 0) { ++ WARN_ON_ONCE(!static_cpu_has(X86_FEATURE_NRIPS)); ++ svm->next_rip = svm->vmcb->control.next_rip; ++ } ++ ++ if (!svm->next_rip) { ++ if (kvm_emulate_instruction(vcpu, EMULTYPE_SKIP) != ++ EMULATE_DONE) ++ printk(KERN_DEBUG "%s: NOP\n", __func__); ++ return; ++ } ++ if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE) ++ printk(KERN_ERR "%s: ip 0x%lx next 0x%llx\n", ++ __func__, kvm_rip_read(vcpu), svm->next_rip); ++ ++ kvm_rip_write(vcpu, svm->next_rip); ++ svm_set_interrupt_shadow(vcpu, 0); ++} ++ ++static void svm_queue_exception(struct kvm_vcpu *vcpu) ++{ ++ struct vcpu_svm *svm = to_svm(vcpu); ++ unsigned nr = vcpu->arch.exception.nr; ++ bool has_error_code = vcpu->arch.exception.has_error_code; ++ bool reinject = vcpu->arch.exception.injected; ++ u32 error_code = vcpu->arch.exception.error_code; ++ ++ /* ++ * If we are within a nested VM we'd better #VMEXIT and let the guest ++ * handle the exception ++ */ ++ if (!reinject && ++ nested_svm_check_exception(svm, nr, has_error_code, error_code)) ++ return; ++ ++ if (nr == BP_VECTOR && !static_cpu_has(X86_FEATURE_NRIPS)) { ++ unsigned long rip, old_rip = kvm_rip_read(&svm->vcpu); ++ ++ /* ++ * For guest debugging where we have to reinject #BP if some ++ * INT3 is guest-owned: ++ * Emulate nRIP by moving RIP forward. Will fail if injection ++ * raises a fault that is not intercepted. Still better than ++ * failing in all cases. ++ */ ++ skip_emulated_instruction(&svm->vcpu); ++ rip = kvm_rip_read(&svm->vcpu); ++ svm->int3_rip = rip + svm->vmcb->save.cs.base; ++ svm->int3_injected = rip - old_rip; ++ } ++ ++ svm->vmcb->control.event_inj = nr ++ | SVM_EVTINJ_VALID ++ | (has_error_code ? SVM_EVTINJ_VALID_ERR : 0) ++ | SVM_EVTINJ_TYPE_EXEPT; ++ svm->vmcb->control.event_inj_err = error_code; ++} ++ ++static void svm_init_erratum_383(void) ++{ ++ u32 low, high; ++ int err; ++ u64 val; ++ ++ if (!static_cpu_has_bug(X86_BUG_AMD_TLB_MMATCH)) ++ return; ++ ++ /* Use _safe variants to not break nested virtualization */ ++ val = native_read_msr_safe(MSR_AMD64_DC_CFG, &err); ++ if (err) ++ return; ++ ++ val |= (1ULL << 47); ++ ++ low = lower_32_bits(val); ++ high = upper_32_bits(val); ++ ++ native_write_msr_safe(MSR_AMD64_DC_CFG, low, high); ++ ++ erratum_383_found = true; ++} ++ ++static void svm_init_osvw(struct kvm_vcpu *vcpu) ++{ ++ /* ++ * Guests should see errata 400 and 415 as fixed (assuming that ++ * HLT and IO instructions are intercepted). ++ */ ++ vcpu->arch.osvw.length = (osvw_len >= 3) ? (osvw_len) : 3; ++ vcpu->arch.osvw.status = osvw_status & ~(6ULL); ++ ++ /* ++ * By increasing VCPU's osvw.length to 3 we are telling the guest that ++ * all osvw.status bits inside that length, including bit 0 (which is ++ * reserved for erratum 298), are valid. However, if host processor's ++ * osvw_len is 0 then osvw_status[0] carries no information. We need to ++ * be conservative here and therefore we tell the guest that erratum 298 ++ * is present (because we really don't know). ++ */ ++ if (osvw_len == 0 && boot_cpu_data.x86 == 0x10) ++ vcpu->arch.osvw.status |= 1; ++} ++ ++static int has_svm(void) ++{ ++ const char *msg; ++ ++ if (!cpu_has_svm(&msg)) { ++ printk(KERN_INFO "has_svm: %s\n", msg); ++ return 0; ++ } ++ ++ return 1; ++} ++ ++static void svm_hardware_disable(void) ++{ ++ /* Make sure we clean up behind us */ ++ if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) ++ wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT); ++ ++ cpu_svm_disable(); ++ ++ amd_pmu_disable_virt(); ++} ++ ++static int svm_hardware_enable(void) ++{ ++ ++ struct svm_cpu_data *sd; ++ uint64_t efer; ++ struct desc_struct *gdt; ++ int me = raw_smp_processor_id(); ++ ++ rdmsrl(MSR_EFER, efer); ++ if (efer & EFER_SVME) ++ return -EBUSY; ++ ++ if (!has_svm()) { ++ pr_err("%s: err EOPNOTSUPP on %d\n", __func__, me); ++ return -EINVAL; ++ } ++ sd = per_cpu(svm_data, me); ++ if (!sd) { ++ pr_err("%s: svm_data is NULL on %d\n", __func__, me); ++ return -EINVAL; ++ } ++ ++ sd->asid_generation = 1; ++ sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1; ++ sd->next_asid = sd->max_asid + 1; ++ sd->min_asid = max_sev_asid + 1; ++ ++ gdt = get_current_gdt_rw(); ++ sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS); ++ ++ wrmsrl(MSR_EFER, efer | EFER_SVME); ++ ++ wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(sd->save_area) << PAGE_SHIFT); ++ ++ if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) { ++ wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT); ++ __this_cpu_write(current_tsc_ratio, TSC_RATIO_DEFAULT); ++ } ++ ++ ++ /* ++ * Get OSVW bits. ++ * ++ * Note that it is possible to have a system with mixed processor ++ * revisions and therefore different OSVW bits. If bits are not the same ++ * on different processors then choose the worst case (i.e. if erratum ++ * is present on one processor and not on another then assume that the ++ * erratum is present everywhere). ++ */ ++ if (cpu_has(&boot_cpu_data, X86_FEATURE_OSVW)) { ++ uint64_t len, status = 0; ++ int err; ++ ++ len = native_read_msr_safe(MSR_AMD64_OSVW_ID_LENGTH, &err); ++ if (!err) ++ status = native_read_msr_safe(MSR_AMD64_OSVW_STATUS, ++ &err); ++ ++ if (err) ++ osvw_status = osvw_len = 0; ++ else { ++ if (len < osvw_len) ++ osvw_len = len; ++ osvw_status |= status; ++ osvw_status &= (1ULL << osvw_len) - 1; ++ } ++ } else ++ osvw_status = osvw_len = 0; ++ ++ svm_init_erratum_383(); ++ ++ amd_pmu_enable_virt(); ++ ++ return 0; ++} ++ ++static void svm_cpu_uninit(int cpu) ++{ ++ struct svm_cpu_data *sd = per_cpu(svm_data, raw_smp_processor_id()); ++ ++ if (!sd) ++ return; ++ ++ per_cpu(svm_data, raw_smp_processor_id()) = NULL; ++ kfree(sd->sev_vmcbs); ++ __free_page(sd->save_area); ++ kfree(sd); ++} ++ ++static int svm_cpu_init(int cpu) ++{ ++ struct svm_cpu_data *sd; ++ ++ sd = kzalloc(sizeof(struct svm_cpu_data), GFP_KERNEL); ++ if (!sd) ++ return -ENOMEM; ++ sd->cpu = cpu; ++ sd->save_area = alloc_page(GFP_KERNEL); ++ if (!sd->save_area) ++ goto free_cpu_data; ++ ++ if (svm_sev_enabled()) { ++ sd->sev_vmcbs = kmalloc_array(max_sev_asid + 1, ++ sizeof(void *), ++ GFP_KERNEL); ++ if (!sd->sev_vmcbs) ++ goto free_save_area; ++ } ++ ++ per_cpu(svm_data, cpu) = sd; ++ ++ return 0; ++ ++free_save_area: ++ __free_page(sd->save_area); ++free_cpu_data: ++ kfree(sd); ++ return -ENOMEM; ++ ++} ++ ++static bool valid_msr_intercept(u32 index) ++{ ++ int i; ++ ++ for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) ++ if (direct_access_msrs[i].index == index) ++ return true; ++ ++ return false; ++} ++ ++static bool msr_write_intercepted(struct kvm_vcpu *vcpu, unsigned msr) ++{ ++ u8 bit_write; ++ unsigned long tmp; ++ u32 offset; ++ u32 *msrpm; ++ ++ msrpm = is_guest_mode(vcpu) ? to_svm(vcpu)->nested.msrpm: ++ to_svm(vcpu)->msrpm; ++ ++ offset = svm_msrpm_offset(msr); ++ bit_write = 2 * (msr & 0x0f) + 1; ++ tmp = msrpm[offset]; ++ ++ BUG_ON(offset == MSR_INVALID); ++ ++ return !!test_bit(bit_write, &tmp); ++} ++ ++static void set_msr_interception(u32 *msrpm, unsigned msr, ++ int read, int write) ++{ ++ u8 bit_read, bit_write; ++ unsigned long tmp; ++ u32 offset; ++ ++ /* ++ * If this warning triggers extend the direct_access_msrs list at the ++ * beginning of the file ++ */ ++ WARN_ON(!valid_msr_intercept(msr)); ++ ++ offset = svm_msrpm_offset(msr); ++ bit_read = 2 * (msr & 0x0f); ++ bit_write = 2 * (msr & 0x0f) + 1; ++ tmp = msrpm[offset]; ++ ++ BUG_ON(offset == MSR_INVALID); ++ ++ read ? clear_bit(bit_read, &tmp) : set_bit(bit_read, &tmp); ++ write ? clear_bit(bit_write, &tmp) : set_bit(bit_write, &tmp); ++ ++ msrpm[offset] = tmp; ++} ++ ++static void svm_vcpu_init_msrpm(u32 *msrpm) ++{ ++ int i; ++ ++ memset(msrpm, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER)); ++ ++ for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) { ++ if (!direct_access_msrs[i].always) ++ continue; ++ ++ set_msr_interception(msrpm, direct_access_msrs[i].index, 1, 1); ++ } ++} ++ ++static void add_msr_offset(u32 offset) ++{ ++ int i; ++ ++ for (i = 0; i < MSRPM_OFFSETS; ++i) { ++ ++ /* Offset already in list? */ ++ if (msrpm_offsets[i] == offset) ++ return; ++ ++ /* Slot used by another offset? */ ++ if (msrpm_offsets[i] != MSR_INVALID) ++ continue; ++ ++ /* Add offset to list */ ++ msrpm_offsets[i] = offset; ++ ++ return; ++ } ++ ++ /* ++ * If this BUG triggers the msrpm_offsets table has an overflow. Just ++ * increase MSRPM_OFFSETS in this case. ++ */ ++ BUG(); ++} ++ ++static void init_msrpm_offsets(void) ++{ ++ int i; ++ ++ memset(msrpm_offsets, 0xff, sizeof(msrpm_offsets)); ++ ++ for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) { ++ u32 offset; ++ ++ offset = svm_msrpm_offset(direct_access_msrs[i].index); ++ BUG_ON(offset == MSR_INVALID); ++ ++ add_msr_offset(offset); ++ } ++} ++ ++static void svm_enable_lbrv(struct vcpu_svm *svm) ++{ ++ u32 *msrpm = svm->msrpm; ++ ++ svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK; ++ set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1); ++ set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1); ++ set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 1, 1); ++ set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 1, 1); ++} ++ ++static void svm_disable_lbrv(struct vcpu_svm *svm) ++{ ++ u32 *msrpm = svm->msrpm; ++ ++ svm->vmcb->control.virt_ext &= ~LBR_CTL_ENABLE_MASK; ++ set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 0, 0); ++ set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 0, 0); ++ set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 0, 0); ++ set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0); ++} ++ ++static void disable_nmi_singlestep(struct vcpu_svm *svm) ++{ ++ svm->nmi_singlestep = false; ++ ++ if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) { ++ /* Clear our flags if they were not set by the guest */ ++ if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF)) ++ svm->vmcb->save.rflags &= ~X86_EFLAGS_TF; ++ if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_RF)) ++ svm->vmcb->save.rflags &= ~X86_EFLAGS_RF; ++ } ++} ++ ++/* Note: ++ * This hash table is used to map VM_ID to a struct kvm_svm, ++ * when handling AMD IOMMU GALOG notification to schedule in ++ * a particular vCPU. ++ */ ++#define SVM_VM_DATA_HASH_BITS 8 ++static DEFINE_HASHTABLE(svm_vm_data_hash, SVM_VM_DATA_HASH_BITS); ++static u32 next_vm_id = 0; ++static bool next_vm_id_wrapped = 0; ++static DEFINE_SPINLOCK(svm_vm_data_hash_lock); ++ ++/* Note: ++ * This function is called from IOMMU driver to notify ++ * SVM to schedule in a particular vCPU of a particular VM. ++ */ ++static int avic_ga_log_notifier(u32 ga_tag) ++{ ++ unsigned long flags; ++ struct kvm_svm *kvm_svm; ++ struct kvm_vcpu *vcpu = NULL; ++ u32 vm_id = AVIC_GATAG_TO_VMID(ga_tag); ++ u32 vcpu_id = AVIC_GATAG_TO_VCPUID(ga_tag); ++ ++ pr_debug("SVM: %s: vm_id=%#x, vcpu_id=%#x\n", __func__, vm_id, vcpu_id); ++ ++ spin_lock_irqsave(&svm_vm_data_hash_lock, flags); ++ hash_for_each_possible(svm_vm_data_hash, kvm_svm, hnode, vm_id) { ++ if (kvm_svm->avic_vm_id != vm_id) ++ continue; ++ vcpu = kvm_get_vcpu_by_id(&kvm_svm->kvm, vcpu_id); ++ break; ++ } ++ spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags); ++ ++ /* Note: ++ * At this point, the IOMMU should have already set the pending ++ * bit in the vAPIC backing page. So, we just need to schedule ++ * in the vcpu. ++ */ ++ if (vcpu) ++ kvm_vcpu_wake_up(vcpu); ++ ++ return 0; ++} ++ ++static __init int sev_hardware_setup(void) ++{ ++ struct sev_user_data_status *status; ++ int rc; ++ ++ /* Maximum number of encrypted guests supported simultaneously */ ++ max_sev_asid = cpuid_ecx(0x8000001F); ++ ++ if (!max_sev_asid) ++ return 1; ++ ++ /* Minimum ASID value that should be used for SEV guest */ ++ min_sev_asid = cpuid_edx(0x8000001F); ++ ++ /* Initialize SEV ASID bitmap */ ++ sev_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL); ++ if (!sev_asid_bitmap) ++ return 1; ++ ++ status = kmalloc(sizeof(*status), GFP_KERNEL); ++ if (!status) ++ return 1; ++ ++ /* ++ * Check SEV platform status. ++ * ++ * PLATFORM_STATUS can be called in any state, if we failed to query ++ * the PLATFORM status then either PSP firmware does not support SEV ++ * feature or SEV firmware is dead. ++ */ ++ rc = sev_platform_status(status, NULL); ++ if (rc) ++ goto err; ++ ++ pr_info("SEV supported\n"); ++ ++err: ++ kfree(status); ++ return rc; ++} ++ ++static void grow_ple_window(struct kvm_vcpu *vcpu) ++{ ++ struct vcpu_svm *svm = to_svm(vcpu); ++ struct vmcb_control_area *control = &svm->vmcb->control; ++ int old = control->pause_filter_count; ++ ++ control->pause_filter_count = __grow_ple_window(old, ++ pause_filter_count, ++ pause_filter_count_grow, ++ pause_filter_count_max); ++ ++ if (control->pause_filter_count != old) ++ mark_dirty(svm->vmcb, VMCB_INTERCEPTS); ++ ++ trace_kvm_ple_window_grow(vcpu->vcpu_id, ++ control->pause_filter_count, old); ++} ++ ++static void shrink_ple_window(struct kvm_vcpu *vcpu) ++{ ++ struct vcpu_svm *svm = to_svm(vcpu); ++ struct vmcb_control_area *control = &svm->vmcb->control; ++ int old = control->pause_filter_count; ++ ++ control->pause_filter_count = ++ __shrink_ple_window(old, ++ pause_filter_count, ++ pause_filter_count_shrink, ++ pause_filter_count); ++ if (control->pause_filter_count != old) ++ mark_dirty(svm->vmcb, VMCB_INTERCEPTS); ++ ++ trace_kvm_ple_window_shrink(vcpu->vcpu_id, ++ control->pause_filter_count, old); ++} ++ ++static __init int svm_hardware_setup(void) ++{ ++ int cpu; ++ struct page *iopm_pages; ++ void *iopm_va; ++ int r; ++ ++ iopm_pages = alloc_pages(GFP_KERNEL, IOPM_ALLOC_ORDER); ++ ++ if (!iopm_pages) ++ return -ENOMEM; ++ ++ iopm_va = page_address(iopm_pages); ++ memset(iopm_va, 0xff, PAGE_SIZE * (1 << IOPM_ALLOC_ORDER)); ++ iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT; ++ ++ init_msrpm_offsets(); ++ ++ if (boot_cpu_has(X86_FEATURE_NX)) ++ kvm_enable_efer_bits(EFER_NX); ++ ++ if (boot_cpu_has(X86_FEATURE_FXSR_OPT)) ++ kvm_enable_efer_bits(EFER_FFXSR); ++ ++ if (boot_cpu_has(X86_FEATURE_TSCRATEMSR)) { ++ kvm_has_tsc_control = true; ++ kvm_max_tsc_scaling_ratio = TSC_RATIO_MAX; ++ kvm_tsc_scaling_ratio_frac_bits = 32; ++ } ++ ++ /* Check for pause filtering support */ ++ if (!boot_cpu_has(X86_FEATURE_PAUSEFILTER)) { ++ pause_filter_count = 0; ++ pause_filter_thresh = 0; ++ } else if (!boot_cpu_has(X86_FEATURE_PFTHRESHOLD)) { ++ pause_filter_thresh = 0; ++ } ++ ++ if (nested) { ++ printk(KERN_INFO "kvm: Nested Virtualization enabled\n"); ++ kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE); ++ } ++ ++ if (sev) { ++ if (boot_cpu_has(X86_FEATURE_SEV) && ++ IS_ENABLED(CONFIG_KVM_AMD_SEV)) { ++ r = sev_hardware_setup(); ++ if (r) ++ sev = false; ++ } else { ++ sev = false; ++ } ++ } ++ ++ for_each_possible_cpu(cpu) { ++ r = svm_cpu_init(cpu); ++ if (r) ++ goto err; ++ } ++ ++ if (!boot_cpu_has(X86_FEATURE_NPT)) ++ npt_enabled = false; ++ ++ if (npt_enabled && !npt) { ++ printk(KERN_INFO "kvm: Nested Paging disabled\n"); ++ npt_enabled = false; ++ } ++ ++ if (npt_enabled) { ++ printk(KERN_INFO "kvm: Nested Paging enabled\n"); ++ kvm_enable_tdp(); ++ } else ++ kvm_disable_tdp(); ++ ++ if (avic) { ++ if (!npt_enabled || ++ !boot_cpu_has(X86_FEATURE_AVIC) || ++ !IS_ENABLED(CONFIG_X86_LOCAL_APIC)) { ++ avic = false; ++ } else { ++ pr_info("AVIC enabled\n"); ++ ++ amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier); ++ } ++ } ++ ++ if (vls) { ++ if (!npt_enabled || ++ !boot_cpu_has(X86_FEATURE_V_VMSAVE_VMLOAD) || ++ !IS_ENABLED(CONFIG_X86_64)) { ++ vls = false; ++ } else { ++ pr_info("Virtual VMLOAD VMSAVE supported\n"); ++ } ++ } ++ ++ if (vgif) { ++ if (!boot_cpu_has(X86_FEATURE_VGIF)) ++ vgif = false; ++ else ++ pr_info("Virtual GIF supported\n"); ++ } ++ ++ return 0; ++ ++err: ++ __free_pages(iopm_pages, IOPM_ALLOC_ORDER); ++ iopm_base = 0; ++ return r; ++} ++ ++static __exit void svm_hardware_unsetup(void) ++{ ++ int cpu; ++ ++ if (svm_sev_enabled()) ++ bitmap_free(sev_asid_bitmap); ++ ++ for_each_possible_cpu(cpu) ++ svm_cpu_uninit(cpu); ++ ++ __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER); ++ iopm_base = 0; ++} ++ ++static void init_seg(struct vmcb_seg *seg) ++{ ++ seg->selector = 0; ++ seg->attrib = SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK | ++ SVM_SELECTOR_WRITE_MASK; /* Read/Write Data Segment */ ++ seg->limit = 0xffff; ++ seg->base = 0; ++} ++ ++static void init_sys_seg(struct vmcb_seg *seg, uint32_t type) ++{ ++ seg->selector = 0; ++ seg->attrib = SVM_SELECTOR_P_MASK | type; ++ seg->limit = 0xffff; ++ seg->base = 0; ++} ++ ++static u64 svm_read_l1_tsc_offset(struct kvm_vcpu *vcpu) ++{ ++ struct vcpu_svm *svm = to_svm(vcpu); ++ ++ if (is_guest_mode(vcpu)) ++ return svm->nested.hsave->control.tsc_offset; ++ ++ return vcpu->arch.tsc_offset; ++} ++ ++static u64 svm_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) ++{ ++ struct vcpu_svm *svm = to_svm(vcpu); ++ u64 g_tsc_offset = 0; ++ ++ if (is_guest_mode(vcpu)) { ++ /* Write L1's TSC offset. */ ++ g_tsc_offset = svm->vmcb->control.tsc_offset - ++ svm->nested.hsave->control.tsc_offset; ++ svm->nested.hsave->control.tsc_offset = offset; ++ } else ++ trace_kvm_write_tsc_offset(vcpu->vcpu_id, ++ svm->vmcb->control.tsc_offset, ++ offset); ++ ++ svm->vmcb->control.tsc_offset = offset + g_tsc_offset; ++ ++ mark_dirty(svm->vmcb, VMCB_INTERCEPTS); ++ return svm->vmcb->control.tsc_offset; ++} ++ ++static void avic_init_vmcb(struct vcpu_svm *svm) ++{ ++ struct vmcb *vmcb = svm->vmcb; ++ struct kvm_svm *kvm_svm = to_kvm_svm(svm->vcpu.kvm); ++ phys_addr_t bpa = __sme_set(page_to_phys(svm->avic_backing_page)); ++ phys_addr_t lpa = __sme_set(page_to_phys(kvm_svm->avic_logical_id_table_page)); ++ phys_addr_t ppa = __sme_set(page_to_phys(kvm_svm->avic_physical_id_table_page)); ++ ++ vmcb->control.avic_backing_page = bpa & AVIC_HPA_MASK; ++ vmcb->control.avic_logical_id = lpa & AVIC_HPA_MASK; ++ vmcb->control.avic_physical_id = ppa & AVIC_HPA_MASK; ++ vmcb->control.avic_physical_id |= AVIC_MAX_PHYSICAL_ID_COUNT; ++ vmcb->control.int_ctl |= AVIC_ENABLE_MASK; ++} ++ ++static void init_vmcb(struct vcpu_svm *svm) ++{ ++ struct vmcb_control_area *control = &svm->vmcb->control; ++ struct vmcb_save_area *save = &svm->vmcb->save; ++ ++ svm->vcpu.arch.hflags = 0; ++ ++ set_cr_intercept(svm, INTERCEPT_CR0_READ); ++ set_cr_intercept(svm, INTERCEPT_CR3_READ); ++ set_cr_intercept(svm, INTERCEPT_CR4_READ); ++ set_cr_intercept(svm, INTERCEPT_CR0_WRITE); ++ set_cr_intercept(svm, INTERCEPT_CR3_WRITE); ++ set_cr_intercept(svm, INTERCEPT_CR4_WRITE); ++ if (!kvm_vcpu_apicv_active(&svm->vcpu)) ++ set_cr_intercept(svm, INTERCEPT_CR8_WRITE); ++ ++ set_dr_intercepts(svm); ++ ++ set_exception_intercept(svm, PF_VECTOR); ++ set_exception_intercept(svm, UD_VECTOR); ++ set_exception_intercept(svm, MC_VECTOR); ++ set_exception_intercept(svm, AC_VECTOR); ++ set_exception_intercept(svm, DB_VECTOR); ++ /* ++ * Guest access to VMware backdoor ports could legitimately ++ * trigger #GP because of TSS I/O permission bitmap. ++ * We intercept those #GP and allow access to them anyway ++ * as VMware does. ++ */ ++ if (enable_vmware_backdoor) ++ set_exception_intercept(svm, GP_VECTOR); ++ ++ set_intercept(svm, INTERCEPT_INTR); ++ set_intercept(svm, INTERCEPT_NMI); ++ set_intercept(svm, INTERCEPT_SMI); ++ set_intercept(svm, INTERCEPT_SELECTIVE_CR0); ++ set_intercept(svm, INTERCEPT_RDPMC); ++ set_intercept(svm, INTERCEPT_CPUID); ++ set_intercept(svm, INTERCEPT_INVD); ++ set_intercept(svm, INTERCEPT_INVLPG); ++ set_intercept(svm, INTERCEPT_INVLPGA); ++ set_intercept(svm, INTERCEPT_IOIO_PROT); ++ set_intercept(svm, INTERCEPT_MSR_PROT); ++ set_intercept(svm, INTERCEPT_TASK_SWITCH); ++ set_intercept(svm, INTERCEPT_SHUTDOWN); ++ set_intercept(svm, INTERCEPT_VMRUN); ++ set_intercept(svm, INTERCEPT_VMMCALL); ++ set_intercept(svm, INTERCEPT_VMLOAD); ++ set_intercept(svm, INTERCEPT_VMSAVE); ++ set_intercept(svm, INTERCEPT_STGI); ++ set_intercept(svm, INTERCEPT_CLGI); ++ set_intercept(svm, INTERCEPT_SKINIT); ++ set_intercept(svm, INTERCEPT_WBINVD); ++ set_intercept(svm, INTERCEPT_XSETBV); ++ set_intercept(svm, INTERCEPT_RSM); ++ ++ if (!kvm_mwait_in_guest(svm->vcpu.kvm)) { ++ set_intercept(svm, INTERCEPT_MONITOR); ++ set_intercept(svm, INTERCEPT_MWAIT); ++ } ++ ++ if (!kvm_hlt_in_guest(svm->vcpu.kvm)) ++ set_intercept(svm, INTERCEPT_HLT); ++ ++ control->iopm_base_pa = __sme_set(iopm_base); ++ control->msrpm_base_pa = __sme_set(__pa(svm->msrpm)); ++ control->int_ctl = V_INTR_MASKING_MASK; ++ ++ init_seg(&save->es); ++ init_seg(&save->ss); ++ init_seg(&save->ds); ++ init_seg(&save->fs); ++ init_seg(&save->gs); ++ ++ save->cs.selector = 0xf000; ++ save->cs.base = 0xffff0000; ++ /* Executable/Readable Code Segment */ ++ save->cs.attrib = SVM_SELECTOR_READ_MASK | SVM_SELECTOR_P_MASK | ++ SVM_SELECTOR_S_MASK | SVM_SELECTOR_CODE_MASK; ++ save->cs.limit = 0xffff; ++ ++ save->gdtr.limit = 0xffff; ++ save->idtr.limit = 0xffff; ++ ++ init_sys_seg(&save->ldtr, SEG_TYPE_LDT); ++ init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16); ++ ++ svm_set_efer(&svm->vcpu, 0); ++ save->dr6 = 0xffff0ff0; ++ kvm_set_rflags(&svm->vcpu, 2); ++ save->rip = 0x0000fff0; ++ svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip; ++ ++ /* ++ * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0. ++ * It also updates the guest-visible cr0 value. ++ */ ++ svm_set_cr0(&svm->vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET); ++ kvm_mmu_reset_context(&svm->vcpu); ++ ++ save->cr4 = X86_CR4_PAE; ++ /* rdx = ?? */ ++ ++ if (npt_enabled) { ++ /* Setup VMCB for Nested Paging */ ++ control->nested_ctl |= SVM_NESTED_CTL_NP_ENABLE; ++ clr_intercept(svm, INTERCEPT_INVLPG); ++ clr_exception_intercept(svm, PF_VECTOR); ++ clr_cr_intercept(svm, INTERCEPT_CR3_READ); ++ clr_cr_intercept(svm, INTERCEPT_CR3_WRITE); ++ save->g_pat = svm->vcpu.arch.pat; ++ save->cr3 = 0; ++ save->cr4 = 0; ++ } ++ svm->asid_generation = 0; ++ ++ svm->nested.vmcb = 0; ++ svm->vcpu.arch.hflags = 0; ++ ++ if (pause_filter_count) { ++ control->pause_filter_count = pause_filter_count; ++ if (pause_filter_thresh) ++ control->pause_filter_thresh = pause_filter_thresh; ++ set_intercept(svm, INTERCEPT_PAUSE); ++ } else { ++ clr_intercept(svm, INTERCEPT_PAUSE); ++ } ++ ++ if (kvm_vcpu_apicv_active(&svm->vcpu)) ++ avic_init_vmcb(svm); ++ ++ /* ++ * If hardware supports Virtual VMLOAD VMSAVE then enable it ++ * in VMCB and clear intercepts to avoid #VMEXIT. ++ */ ++ if (vls) { ++ clr_intercept(svm, INTERCEPT_VMLOAD); ++ clr_intercept(svm, INTERCEPT_VMSAVE); ++ svm->vmcb->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK; ++ } ++ ++ if (vgif) { ++ clr_intercept(svm, INTERCEPT_STGI); ++ clr_intercept(svm, INTERCEPT_CLGI); ++ svm->vmcb->control.int_ctl |= V_GIF_ENABLE_MASK; ++ } ++ ++ if (sev_guest(svm->vcpu.kvm)) { ++ svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ENABLE; ++ clr_exception_intercept(svm, UD_VECTOR); ++ } ++ ++ mark_all_dirty(svm->vmcb); ++ ++ enable_gif(svm); ++ ++} ++ ++static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu, ++ unsigned int index) ++{ ++ u64 *avic_physical_id_table; ++ struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm); ++ ++ if (index >= AVIC_MAX_PHYSICAL_ID_COUNT) ++ return NULL; ++ ++ avic_physical_id_table = page_address(kvm_svm->avic_physical_id_table_page); ++ ++ return &avic_physical_id_table[index]; ++} ++ ++/** ++ * Note: ++ * AVIC hardware walks the nested page table to check permissions, ++ * but does not use the SPA address specified in the leaf page ++ * table entry since it uses address in the AVIC_BACKING_PAGE pointer ++ * field of the VMCB. Therefore, we set up the ++ * APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (4KB) here. ++ */ ++static int avic_init_access_page(struct kvm_vcpu *vcpu) ++{ ++ struct kvm *kvm = vcpu->kvm; ++ int ret = 0; ++ ++ mutex_lock(&kvm->slots_lock); ++ if (kvm->arch.apic_access_page_done) ++ goto out; ++ ++ ret = __x86_set_memory_region(kvm, ++ APIC_ACCESS_PAGE_PRIVATE_MEMSLOT, ++ APIC_DEFAULT_PHYS_BASE, ++ PAGE_SIZE); ++ if (ret) ++ goto out; ++ ++ kvm->arch.apic_access_page_done = true; ++out: ++ mutex_unlock(&kvm->slots_lock); ++ return ret; ++} ++ ++static int avic_init_backing_page(struct kvm_vcpu *vcpu) ++{ ++ int ret; ++ u64 *entry, new_entry; ++ int id = vcpu->vcpu_id; ++ struct vcpu_svm *svm = to_svm(vcpu); ++ ++ ret = avic_init_access_page(vcpu); ++ if (ret) ++ return ret; ++ ++ if (id >= AVIC_MAX_PHYSICAL_ID_COUNT) ++ return -EINVAL; ++ ++ if (!svm->vcpu.arch.apic->regs) ++ return -EINVAL; ++ ++ svm->avic_backing_page = virt_to_page(svm->vcpu.arch.apic->regs); ++ ++ /* Setting AVIC backing page address in the phy APIC ID table */ ++ entry = avic_get_physical_id_entry(vcpu, id); ++ if (!entry) ++ return -EINVAL; ++ ++ new_entry = READ_ONCE(*entry); ++ new_entry = __sme_set((page_to_phys(svm->avic_backing_page) & ++ AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK) | ++ AVIC_PHYSICAL_ID_ENTRY_VALID_MASK); ++ WRITE_ONCE(*entry, new_entry); ++ ++ svm->avic_physical_id_cache = entry; ++ ++ return 0; ++} ++ ++static void __sev_asid_free(int asid) ++{ ++ struct svm_cpu_data *sd; ++ int cpu, pos; ++ ++ pos = asid - 1; ++ clear_bit(pos, sev_asid_bitmap); ++ ++ for_each_possible_cpu(cpu) { ++ sd = per_cpu(svm_data, cpu); ++ sd->sev_vmcbs[pos] = NULL; ++ } ++} ++ ++static void sev_asid_free(struct kvm *kvm) ++{ ++ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; ++ ++ __sev_asid_free(sev->asid); ++} ++ ++static void sev_unbind_asid(struct kvm *kvm, unsigned int handle) ++{ ++ struct sev_data_decommission *decommission; ++ struct sev_data_deactivate *data; ++ ++ if (!handle) ++ return; ++ ++ data = kzalloc(sizeof(*data), GFP_KERNEL); ++ if (!data) ++ return; ++ ++ /* deactivate handle */ ++ data->handle = handle; ++ sev_guest_deactivate(data, NULL); ++ ++ wbinvd_on_all_cpus(); ++ sev_guest_df_flush(NULL); ++ kfree(data); ++ ++ decommission = kzalloc(sizeof(*decommission), GFP_KERNEL); ++ if (!decommission) ++ return; ++ ++ /* decommission handle */ ++ decommission->handle = handle; ++ sev_guest_decommission(decommission, NULL); ++ ++ kfree(decommission); ++} ++ ++static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr, ++ unsigned long ulen, unsigned long *n, ++ int write) ++{ ++ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; ++ unsigned long npages, npinned, size; ++ unsigned long locked, lock_limit; ++ struct page **pages; ++ unsigned long first, last; ++ ++ if (ulen == 0 || uaddr + ulen < uaddr) ++ return NULL; ++ ++ /* Calculate number of pages. */ ++ first = (uaddr & PAGE_MASK) >> PAGE_SHIFT; ++ last = ((uaddr + ulen - 1) & PAGE_MASK) >> PAGE_SHIFT; ++ npages = (last - first + 1); ++ ++ locked = sev->pages_locked + npages; ++ lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; ++ if (locked > lock_limit && !capable(CAP_IPC_LOCK)) { ++ pr_err("SEV: %lu locked pages exceed the lock limit of %lu.\n", locked, lock_limit); ++ return NULL; ++ } ++ ++ /* Avoid using vmalloc for smaller buffers. */ ++ size = npages * sizeof(struct page *); ++ if (size > PAGE_SIZE) ++ pages = vmalloc(size); ++ else ++ pages = kmalloc(size, GFP_KERNEL); ++ ++ if (!pages) ++ return NULL; ++ ++ /* Pin the user virtual address. */ ++ npinned = get_user_pages_fast(uaddr, npages, write ? FOLL_WRITE : 0, pages); ++ if (npinned != npages) { ++ pr_err("SEV: Failure locking %lu pages.\n", npages); ++ goto err; ++ } ++ ++ *n = npages; ++ sev->pages_locked = locked; ++ ++ return pages; ++ ++err: ++ if (npinned > 0) ++ release_pages(pages, npinned); ++ ++ kvfree(pages); ++ return NULL; ++} ++ ++static void sev_unpin_memory(struct kvm *kvm, struct page **pages, ++ unsigned long npages) ++{ ++ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; ++ ++ release_pages(pages, npages); ++ kvfree(pages); ++ sev->pages_locked -= npages; ++} ++ ++static void sev_clflush_pages(struct page *pages[], unsigned long npages) ++{ ++ uint8_t *page_virtual; ++ unsigned long i; ++ ++ if (npages == 0 || pages == NULL) ++ return; ++ ++ for (i = 0; i < npages; i++) { ++ page_virtual = kmap_atomic(pages[i]); ++ clflush_cache_range(page_virtual, PAGE_SIZE); ++ kunmap_atomic(page_virtual); ++ } ++} ++ ++static void __unregister_enc_region_locked(struct kvm *kvm, ++ struct enc_region *region) ++{ ++ /* ++ * The guest may change the memory encryption attribute from C=0 -> C=1 ++ * or vice versa for this memory range. Lets make sure caches are ++ * flushed to ensure that guest data gets written into memory with ++ * correct C-bit. ++ */ ++ sev_clflush_pages(region->pages, region->npages); ++ ++ sev_unpin_memory(kvm, region->pages, region->npages); ++ list_del(®ion->list); ++ kfree(region); ++} ++ ++static struct kvm *svm_vm_alloc(void) ++{ ++ struct kvm_svm *kvm_svm = vzalloc(sizeof(struct kvm_svm)); ++ return &kvm_svm->kvm; ++} ++ ++static void svm_vm_free(struct kvm *kvm) ++{ ++ vfree(to_kvm_svm(kvm)); ++} ++ ++static void sev_vm_destroy(struct kvm *kvm) ++{ ++ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; ++ struct list_head *head = &sev->regions_list; ++ struct list_head *pos, *q; ++ ++ if (!sev_guest(kvm)) ++ return; ++ ++ mutex_lock(&kvm->lock); ++ ++ /* ++ * if userspace was terminated before unregistering the memory regions ++ * then lets unpin all the registered memory. ++ */ ++ if (!list_empty(head)) { ++ list_for_each_safe(pos, q, head) { ++ __unregister_enc_region_locked(kvm, ++ list_entry(pos, struct enc_region, list)); ++ } ++ } ++ ++ mutex_unlock(&kvm->lock); ++ ++ sev_unbind_asid(kvm, sev->handle); ++ sev_asid_free(kvm); ++} ++ ++static void avic_vm_destroy(struct kvm *kvm) ++{ ++ unsigned long flags; ++ struct kvm_svm *kvm_svm = to_kvm_svm(kvm); ++ ++ if (!avic) ++ return; ++ ++ if (kvm_svm->avic_logical_id_table_page) ++ __free_page(kvm_svm->avic_logical_id_table_page); ++ if (kvm_svm->avic_physical_id_table_page) ++ __free_page(kvm_svm->avic_physical_id_table_page); ++ ++ spin_lock_irqsave(&svm_vm_data_hash_lock, flags); ++ hash_del(&kvm_svm->hnode); ++ spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags); ++} ++ ++static void svm_vm_destroy(struct kvm *kvm) ++{ ++ avic_vm_destroy(kvm); ++ sev_vm_destroy(kvm); ++} ++ ++static int avic_vm_init(struct kvm *kvm) ++{ ++ unsigned long flags; ++ int err = -ENOMEM; ++ struct kvm_svm *kvm_svm = to_kvm_svm(kvm); ++ struct kvm_svm *k2; ++ struct page *p_page; ++ struct page *l_page; ++ u32 vm_id; ++ ++ if (!avic) ++ return 0; ++ ++ /* Allocating physical APIC ID table (4KB) */ ++ p_page = alloc_page(GFP_KERNEL); ++ if (!p_page) ++ goto free_avic; ++ ++ kvm_svm->avic_physical_id_table_page = p_page; ++ clear_page(page_address(p_page)); ++ ++ /* Allocating logical APIC ID table (4KB) */ ++ l_page = alloc_page(GFP_KERNEL); ++ if (!l_page) ++ goto free_avic; ++ ++ kvm_svm->avic_logical_id_table_page = l_page; ++ clear_page(page_address(l_page)); ++ ++ spin_lock_irqsave(&svm_vm_data_hash_lock, flags); ++ again: ++ vm_id = next_vm_id = (next_vm_id + 1) & AVIC_VM_ID_MASK; ++ if (vm_id == 0) { /* id is 1-based, zero is not okay */ ++ next_vm_id_wrapped = 1; ++ goto again; ++ } ++ /* Is it still in use? Only possible if wrapped at least once */ ++ if (next_vm_id_wrapped) { ++ hash_for_each_possible(svm_vm_data_hash, k2, hnode, vm_id) { ++ if (k2->avic_vm_id == vm_id) ++ goto again; ++ } ++ } ++ kvm_svm->avic_vm_id = vm_id; ++ hash_add(svm_vm_data_hash, &kvm_svm->hnode, kvm_svm->avic_vm_id); ++ spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags); ++ ++ return 0; ++ ++free_avic: ++ avic_vm_destroy(kvm); ++ return err; ++} ++ ++static inline int ++avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r) ++{ ++ int ret = 0; ++ unsigned long flags; ++ struct amd_svm_iommu_ir *ir; ++ struct vcpu_svm *svm = to_svm(vcpu); ++ ++ if (!kvm_arch_has_assigned_device(vcpu->kvm)) ++ return 0; ++ ++ /* ++ * Here, we go through the per-vcpu ir_list to update all existing ++ * interrupt remapping table entry targeting this vcpu. ++ */ ++ spin_lock_irqsave(&svm->ir_list_lock, flags); ++ ++ if (list_empty(&svm->ir_list)) ++ goto out; ++ ++ list_for_each_entry(ir, &svm->ir_list, node) { ++ ret = amd_iommu_update_ga(cpu, r, ir->data); ++ if (ret) ++ break; ++ } ++out: ++ spin_unlock_irqrestore(&svm->ir_list_lock, flags); ++ return ret; ++} ++ ++static void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu) ++{ ++ u64 entry; ++ /* ID = 0xff (broadcast), ID > 0xff (reserved) */ ++ int h_physical_id = kvm_cpu_get_apicid(cpu); ++ struct vcpu_svm *svm = to_svm(vcpu); ++ ++ if (!kvm_vcpu_apicv_active(vcpu)) ++ return; ++ ++ /* ++ * Since the host physical APIC id is 8 bits, ++ * we can support host APIC ID upto 255. ++ */ ++ if (WARN_ON(h_physical_id > AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK)) ++ return; ++ ++ entry = READ_ONCE(*(svm->avic_physical_id_cache)); ++ WARN_ON(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK); ++ ++ entry &= ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK; ++ entry |= (h_physical_id & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK); ++ ++ entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK; ++ if (svm->avic_is_running) ++ entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK; ++ ++ WRITE_ONCE(*(svm->avic_physical_id_cache), entry); ++ avic_update_iommu_vcpu_affinity(vcpu, h_physical_id, ++ svm->avic_is_running); ++} ++ ++static void avic_vcpu_put(struct kvm_vcpu *vcpu) ++{ ++ u64 entry; ++ struct vcpu_svm *svm = to_svm(vcpu); ++ ++ if (!kvm_vcpu_apicv_active(vcpu)) ++ return; ++ ++ entry = READ_ONCE(*(svm->avic_physical_id_cache)); ++ if (entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK) ++ avic_update_iommu_vcpu_affinity(vcpu, -1, 0); ++ ++ entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK; ++ WRITE_ONCE(*(svm->avic_physical_id_cache), entry); ++} ++ ++/** ++ * This function is called during VCPU halt/unhalt. ++ */ ++static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run) ++{ ++ struct vcpu_svm *svm = to_svm(vcpu); ++ ++ svm->avic_is_running = is_run; ++ if (is_run) ++ avic_vcpu_load(vcpu, vcpu->cpu); ++ else ++ avic_vcpu_put(vcpu); ++} ++ ++static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) ++{ ++ struct vcpu_svm *svm = to_svm(vcpu); ++ u32 dummy; ++ u32 eax = 1; ++ ++ vcpu->arch.microcode_version = 0x01000065; ++ svm->spec_ctrl = 0; ++ svm->virt_spec_ctrl = 0; ++ ++ if (!init_event) { ++ svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE | ++ MSR_IA32_APICBASE_ENABLE; ++ if (kvm_vcpu_is_reset_bsp(&svm->vcpu)) ++ svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP; ++ } ++ init_vmcb(svm); ++ ++ kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy, true); ++ kvm_register_write(vcpu, VCPU_REGS_RDX, eax); ++ ++ if (kvm_vcpu_apicv_active(vcpu) && !init_event) ++ avic_update_vapic_bar(svm, APIC_DEFAULT_PHYS_BASE); ++} ++ ++static int avic_init_vcpu(struct vcpu_svm *svm) ++{ ++ int ret; ++ ++ if (!kvm_vcpu_apicv_active(&svm->vcpu)) ++ return 0; ++ ++ ret = avic_init_backing_page(&svm->vcpu); ++ if (ret) ++ return ret; ++ ++ INIT_LIST_HEAD(&svm->ir_list); ++ spin_lock_init(&svm->ir_list_lock); ++ ++ return ret; ++} ++ ++static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) ++{ ++ struct vcpu_svm *svm; ++ struct page *page; ++ struct page *msrpm_pages; ++ struct page *hsave_page; ++ struct page *nested_msrpm_pages; ++ int err; ++ ++ svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); ++ if (!svm) { ++ err = -ENOMEM; ++ goto out; ++ } ++ ++ err = kvm_vcpu_init(&svm->vcpu, kvm, id); ++ if (err) ++ goto free_svm; ++ ++ err = -ENOMEM; ++ page = alloc_page(GFP_KERNEL); ++ if (!page) ++ goto uninit; ++ ++ msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER); ++ if (!msrpm_pages) ++ goto free_page1; ++ ++ nested_msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER); ++ if (!nested_msrpm_pages) ++ goto free_page2; ++ ++ hsave_page = alloc_page(GFP_KERNEL); ++ if (!hsave_page) ++ goto free_page3; ++ ++ err = avic_init_vcpu(svm); ++ if (err) ++ goto free_page4; ++ ++ /* We initialize this flag to true to make sure that the is_running ++ * bit would be set the first time the vcpu is loaded. ++ */ ++ svm->avic_is_running = true; ++ ++ svm->nested.hsave = page_address(hsave_page); ++ ++ svm->msrpm = page_address(msrpm_pages); ++ svm_vcpu_init_msrpm(svm->msrpm); ++ ++ svm->nested.msrpm = page_address(nested_msrpm_pages); ++ svm_vcpu_init_msrpm(svm->nested.msrpm); ++ ++ svm->vmcb = page_address(page); ++ clear_page(svm->vmcb); ++ svm->vmcb_pa = __sme_set(page_to_pfn(page) << PAGE_SHIFT); ++ svm->asid_generation = 0; ++ init_vmcb(svm); ++ ++ svm_init_osvw(&svm->vcpu); ++ ++ return &svm->vcpu; ++ ++free_page4: ++ __free_page(hsave_page); ++free_page3: ++ __free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER); ++free_page2: ++ __free_pages(msrpm_pages, MSRPM_ALLOC_ORDER); ++free_page1: ++ __free_page(page); ++uninit: ++ kvm_vcpu_uninit(&svm->vcpu); ++free_svm: ++ kmem_cache_free(kvm_vcpu_cache, svm); ++out: ++ return ERR_PTR(err); ++} ++ ++static void svm_clear_current_vmcb(struct vmcb *vmcb) ++{ ++ int i; ++ ++ for_each_online_cpu(i) ++ cmpxchg(&per_cpu(svm_data, i)->current_vmcb, vmcb, NULL); ++} ++ ++static void svm_free_vcpu(struct kvm_vcpu *vcpu) ++{ ++ struct vcpu_svm *svm = to_svm(vcpu); ++ ++ /* ++ * The vmcb page can be recycled, causing a false negative in ++ * svm_vcpu_load(). So, ensure that no logical CPU has this ++ * vmcb page recorded as its current vmcb. ++ */ ++ svm_clear_current_vmcb(svm->vmcb); ++ ++ __free_page(pfn_to_page(__sme_clr(svm->vmcb_pa) >> PAGE_SHIFT)); ++ __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER); ++ __free_page(virt_to_page(svm->nested.hsave)); ++ __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER); ++ kvm_vcpu_uninit(vcpu); ++ kmem_cache_free(kvm_vcpu_cache, svm); ++} ++ ++static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) ++{ ++ struct vcpu_svm *svm = to_svm(vcpu); ++ struct svm_cpu_data *sd = per_cpu(svm_data, cpu); ++ int i; ++ ++ if (unlikely(cpu != vcpu->cpu)) { ++ svm->asid_generation = 0; ++ mark_all_dirty(svm->vmcb); ++ } ++ ++#ifdef CONFIG_X86_64 ++ rdmsrl(MSR_GS_BASE, to_svm(vcpu)->host.gs_base); ++#endif ++ savesegment(fs, svm->host.fs); ++ savesegment(gs, svm->host.gs); ++ svm->host.ldt = kvm_read_ldt(); ++ ++ for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) ++ rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); ++ ++ if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) { ++ u64 tsc_ratio = vcpu->arch.tsc_scaling_ratio; ++ if (tsc_ratio != __this_cpu_read(current_tsc_ratio)) { ++ __this_cpu_write(current_tsc_ratio, tsc_ratio); ++ wrmsrl(MSR_AMD64_TSC_RATIO, tsc_ratio); ++ } ++ } ++ /* This assumes that the kernel never uses MSR_TSC_AUX */ ++ if (static_cpu_has(X86_FEATURE_RDTSCP)) ++ wrmsrl(MSR_TSC_AUX, svm->tsc_aux); ++ ++ if (sd->current_vmcb != svm->vmcb) { ++ sd->current_vmcb = svm->vmcb; ++ indirect_branch_prediction_barrier(); ++ } ++ avic_vcpu_load(vcpu, cpu); ++} ++ ++static void svm_vcpu_put(struct kvm_vcpu *vcpu) ++{ ++ struct vcpu_svm *svm = to_svm(vcpu); ++ int i; ++ ++ avic_vcpu_put(vcpu); ++ ++ ++vcpu->stat.host_state_reload; ++ kvm_load_ldt(svm->host.ldt); ++#ifdef CONFIG_X86_64 ++ loadsegment(fs, svm->host.fs); ++ wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gsbase); ++ load_gs_index(svm->host.gs); ++#else ++#ifdef CONFIG_X86_32_LAZY_GS ++ loadsegment(gs, svm->host.gs); ++#endif ++#endif ++ for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) ++ wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); ++} ++ ++static void svm_vcpu_blocking(struct kvm_vcpu *vcpu) ++{ ++ avic_set_running(vcpu, false); ++} ++ ++static void svm_vcpu_unblocking(struct kvm_vcpu *vcpu) ++{ ++ avic_set_running(vcpu, true); ++} ++ ++static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu) ++{ ++ struct vcpu_svm *svm = to_svm(vcpu); ++ unsigned long rflags = svm->vmcb->save.rflags; ++ ++ if (svm->nmi_singlestep) { ++ /* Hide our flags if they were not set by the guest */ ++ if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF)) ++ rflags &= ~X86_EFLAGS_TF; ++ if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_RF)) ++ rflags &= ~X86_EFLAGS_RF; ++ } ++ return rflags; ++} ++ ++static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) ++{ ++ if (to_svm(vcpu)->nmi_singlestep) ++ rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); ++ ++ /* ++ * Any change of EFLAGS.VM is accompanied by a reload of SS ++ * (caused by either a task switch or an inter-privilege IRET), ++ * so we do not need to update the CPL here. ++ */ ++ to_svm(vcpu)->vmcb->save.rflags = rflags; ++} ++ ++static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) ++{ ++ switch (reg) { ++ case VCPU_EXREG_PDPTR: ++ BUG_ON(!npt_enabled); ++ load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu)); ++ break; ++ default: ++ BUG(); ++ } ++} ++ ++static void svm_set_vintr(struct vcpu_svm *svm) ++{ ++ set_intercept(svm, INTERCEPT_VINTR); ++} ++ ++static void svm_clear_vintr(struct vcpu_svm *svm) ++{ ++ clr_intercept(svm, INTERCEPT_VINTR); ++} ++ ++static struct vmcb_seg *svm_seg(struct kvm_vcpu *vcpu, int seg) ++{ ++ struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save; ++ ++ switch (seg) { ++ case VCPU_SREG_CS: return &save->cs; ++ case VCPU_SREG_DS: return &save->ds; ++ case VCPU_SREG_ES: return &save->es; ++ case VCPU_SREG_FS: return &save->fs; ++ case VCPU_SREG_GS: return &save->gs; ++ case VCPU_SREG_SS: return &save->ss; ++ case VCPU_SREG_TR: return &save->tr; ++ case VCPU_SREG_LDTR: return &save->ldtr; ++ } ++ BUG(); ++ return NULL; ++} ++ ++static u64 svm_get_segment_base(struct kvm_vcpu *vcpu, int seg) ++{ ++ struct vmcb_seg *s = svm_seg(vcpu, seg); ++ ++ return s->base; ++} ++ ++static void svm_get_segment(struct kvm_vcpu *vcpu, ++ struct kvm_segment *var, int seg) ++{ ++ struct vmcb_seg *s = svm_seg(vcpu, seg); ++ ++ var->base = s->base; ++ var->limit = s->limit; ++ var->selector = s->selector; ++ var->type = s->attrib & SVM_SELECTOR_TYPE_MASK; ++ var->s = (s->attrib >> SVM_SELECTOR_S_SHIFT) & 1; ++ var->dpl = (s->attrib >> SVM_SELECTOR_DPL_SHIFT) & 3; ++ var->present = (s->attrib >> SVM_SELECTOR_P_SHIFT) & 1; ++ var->avl = (s->attrib >> SVM_SELECTOR_AVL_SHIFT) & 1; ++ var->l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1; ++ var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1; ++ ++ /* ++ * AMD CPUs circa 2014 track the G bit for all segments except CS. ++ * However, the SVM spec states that the G bit is not observed by the ++ * CPU, and some VMware virtual CPUs drop the G bit for all segments. ++ * So let's synthesize a legal G bit for all segments, this helps ++ * running KVM nested. It also helps cross-vendor migration, because ++ * Intel's vmentry has a check on the 'G' bit. ++ */ ++ var->g = s->limit > 0xfffff; ++ ++ /* ++ * AMD's VMCB does not have an explicit unusable field, so emulate it ++ * for cross vendor migration purposes by "not present" ++ */ ++ var->unusable = !var->present; ++ ++ switch (seg) { ++ case VCPU_SREG_TR: ++ /* ++ * Work around a bug where the busy flag in the tr selector ++ * isn't exposed ++ */ ++ var->type |= 0x2; ++ break; ++ case VCPU_SREG_DS: ++ case VCPU_SREG_ES: ++ case VCPU_SREG_FS: ++ case VCPU_SREG_GS: ++ /* ++ * The accessed bit must always be set in the segment ++ * descriptor cache, although it can be cleared in the ++ * descriptor, the cached bit always remains at 1. Since ++ * Intel has a check on this, set it here to support ++ * cross-vendor migration. ++ */ ++ if (!var->unusable) ++ var->type |= 0x1; ++ break; ++ case VCPU_SREG_SS: ++ /* ++ * On AMD CPUs sometimes the DB bit in the segment ++ * descriptor is left as 1, although the whole segment has ++ * been made unusable. Clear it here to pass an Intel VMX ++ * entry check when cross vendor migrating. ++ */ ++ if (var->unusable) ++ var->db = 0; ++ /* This is symmetric with svm_set_segment() */ ++ var->dpl = to_svm(vcpu)->vmcb->save.cpl; ++ break; ++ } ++} ++ ++static int svm_get_cpl(struct kvm_vcpu *vcpu) ++{ ++ struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save; ++ ++ return save->cpl; ++} ++ ++static void svm_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) ++{ ++ struct vcpu_svm *svm = to_svm(vcpu); ++ ++ dt->size = svm->vmcb->save.idtr.limit; ++ dt->address = svm->vmcb->save.idtr.base; ++} ++ ++static void svm_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) ++{ ++ struct vcpu_svm *svm = to_svm(vcpu); ++ ++ svm->vmcb->save.idtr.limit = dt->size; ++ svm->vmcb->save.idtr.base = dt->address ; ++ mark_dirty(svm->vmcb, VMCB_DT); ++} ++ ++static void svm_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) ++{ ++ struct vcpu_svm *svm = to_svm(vcpu); ++ ++ dt->size = svm->vmcb->save.gdtr.limit; ++ dt->address = svm->vmcb->save.gdtr.base; ++} ++ ++static void svm_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) ++{ ++ struct vcpu_svm *svm = to_svm(vcpu); ++ ++ svm->vmcb->save.gdtr.limit = dt->size; ++ svm->vmcb->save.gdtr.base = dt->address ; ++ mark_dirty(svm->vmcb, VMCB_DT); ++} ++ ++static void svm_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) ++{ ++} ++ ++static void svm_decache_cr3(struct kvm_vcpu *vcpu) ++{ ++} ++ ++static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) ++{ ++} ++ ++static void update_cr0_intercept(struct vcpu_svm *svm) ++{ ++ ulong gcr0 = svm->vcpu.arch.cr0; ++ u64 *hcr0 = &svm->vmcb->save.cr0; ++ ++ *hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK) ++ | (gcr0 & SVM_CR0_SELECTIVE_MASK); ++ ++ mark_dirty(svm->vmcb, VMCB_CR); ++ ++ if (gcr0 == *hcr0) { ++ clr_cr_intercept(svm, INTERCEPT_CR0_READ); ++ clr_cr_intercept(svm, INTERCEPT_CR0_WRITE); ++ } else { ++ set_cr_intercept(svm, INTERCEPT_CR0_READ); ++ set_cr_intercept(svm, INTERCEPT_CR0_WRITE); ++ } ++} ++ ++static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) ++{ ++ struct vcpu_svm *svm = to_svm(vcpu); ++ ++#ifdef CONFIG_X86_64 ++ if (vcpu->arch.efer & EFER_LME) { ++ if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { ++ vcpu->arch.efer |= EFER_LMA; ++ svm->vmcb->save.efer |= EFER_LMA | EFER_LME; ++ } ++ ++ if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) { ++ vcpu->arch.efer &= ~EFER_LMA; ++ svm->vmcb->save.efer &= ~(EFER_LMA | EFER_LME); ++ } ++ } ++#endif ++ vcpu->arch.cr0 = cr0; ++ ++ if (!npt_enabled) ++ cr0 |= X86_CR0_PG | X86_CR0_WP; ++ ++ /* ++ * re-enable caching here because the QEMU bios ++ * does not do it - this results in some delay at ++ * reboot ++ */ ++ if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED)) ++ cr0 &= ~(X86_CR0_CD | X86_CR0_NW); ++ svm->vmcb->save.cr0 = cr0; ++ mark_dirty(svm->vmcb, VMCB_CR); ++ update_cr0_intercept(svm); ++} ++ ++static int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) ++{ ++ unsigned long host_cr4_mce = cr4_read_shadow() & X86_CR4_MCE; ++ unsigned long old_cr4 = to_svm(vcpu)->vmcb->save.cr4; ++ ++ if (cr4 & X86_CR4_VMXE) ++ return 1; ++ ++ if (npt_enabled && ((old_cr4 ^ cr4) & X86_CR4_PGE)) ++ svm_flush_tlb(vcpu, true); ++ ++ vcpu->arch.cr4 = cr4; ++ if (!npt_enabled) ++ cr4 |= X86_CR4_PAE; ++ cr4 |= host_cr4_mce; ++ to_svm(vcpu)->vmcb->save.cr4 = cr4; ++ mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR); ++ return 0; ++} ++ ++static void svm_set_segment(struct kvm_vcpu *vcpu, ++ struct kvm_segment *var, int seg) ++{ ++ struct vcpu_svm *svm = to_svm(vcpu); ++ struct vmcb_seg *s = svm_seg(vcpu, seg); ++ ++ s->base = var->base; ++ s->limit = var->limit; ++ s->selector = var->selector; ++ s->attrib = (var->type & SVM_SELECTOR_TYPE_MASK); ++ s->attrib |= (var->s & 1) << SVM_SELECTOR_S_SHIFT; ++ s->attrib |= (var->dpl & 3) << SVM_SELECTOR_DPL_SHIFT; ++ s->attrib |= ((var->present & 1) && !var->unusable) << SVM_SELECTOR_P_SHIFT; ++ s->attrib |= (var->avl & 1) << SVM_SELECTOR_AVL_SHIFT; ++ s->attrib |= (var->l & 1) << SVM_SELECTOR_L_SHIFT; ++ s->attrib |= (var->db & 1) << SVM_SELECTOR_DB_SHIFT; ++ s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT; ++ ++ /* ++ * This is always accurate, except if SYSRET returned to a segment ++ * with SS.DPL != 3. Intel does not have this quirk, and always ++ * forces SS.DPL to 3 on sysret, so we ignore that case; fixing it ++ * would entail passing the CPL to userspace and back. ++ */ ++ if (seg == VCPU_SREG_SS) ++ /* This is symmetric with svm_get_segment() */ ++ svm->vmcb->save.cpl = (var->dpl & 3); ++ ++ mark_dirty(svm->vmcb, VMCB_SEG); ++} ++ ++static void update_bp_intercept(struct kvm_vcpu *vcpu) ++{ ++ struct vcpu_svm *svm = to_svm(vcpu); ++ ++ clr_exception_intercept(svm, BP_VECTOR); ++ ++ if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) { ++ if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) ++ set_exception_intercept(svm, BP_VECTOR); ++ } else ++ vcpu->guest_debug = 0; ++} ++ ++static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd) ++{ ++ if (sd->next_asid > sd->max_asid) { ++ ++sd->asid_generation; ++ sd->next_asid = sd->min_asid; ++ svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID; ++ } ++ ++ svm->asid_generation = sd->asid_generation; ++ svm->vmcb->control.asid = sd->next_asid++; ++ ++ mark_dirty(svm->vmcb, VMCB_ASID); ++} ++ ++static u64 svm_get_dr6(struct kvm_vcpu *vcpu) ++{ ++ return to_svm(vcpu)->vmcb->save.dr6; ++} ++ ++static void svm_set_dr6(struct kvm_vcpu *vcpu, unsigned long value) ++{ ++ struct vcpu_svm *svm = to_svm(vcpu); ++ ++ svm->vmcb->save.dr6 = value; ++ mark_dirty(svm->vmcb, VMCB_DR); ++} ++ ++static void svm_sync_dirty_debug_regs(struct kvm_vcpu *vcpu) ++{ ++ struct vcpu_svm *svm = to_svm(vcpu); ++ ++ get_debugreg(vcpu->arch.db[0], 0); ++ get_debugreg(vcpu->arch.db[1], 1); ++ get_debugreg(vcpu->arch.db[2], 2); ++ get_debugreg(vcpu->arch.db[3], 3); ++ vcpu->arch.dr6 = svm_get_dr6(vcpu); ++ vcpu->arch.dr7 = svm->vmcb->save.dr7; ++ ++ vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT; ++ set_dr_intercepts(svm); ++} ++ ++static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value) ++{ ++ struct vcpu_svm *svm = to_svm(vcpu); ++ ++ svm->vmcb->save.dr7 = value; ++ mark_dirty(svm->vmcb, VMCB_DR); ++} ++ ++static int pf_interception(struct vcpu_svm *svm) ++{ ++ u64 fault_address = __sme_clr(svm->vmcb->control.exit_info_2); ++ u64 error_code = svm->vmcb->control.exit_info_1; ++ ++ return kvm_handle_page_fault(&svm->vcpu, error_code, fault_address, ++ static_cpu_has(X86_FEATURE_DECODEASSISTS) ? ++ svm->vmcb->control.insn_bytes : NULL, ++ svm->vmcb->control.insn_len); ++} ++ ++static int npf_interception(struct vcpu_svm *svm) ++{ ++ u64 fault_address = __sme_clr(svm->vmcb->control.exit_info_2); ++ u64 error_code = svm->vmcb->control.exit_info_1; ++ ++ trace_kvm_page_fault(fault_address, error_code); ++ return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code, ++ static_cpu_has(X86_FEATURE_DECODEASSISTS) ? ++ svm->vmcb->control.insn_bytes : NULL, ++ svm->vmcb->control.insn_len); ++} ++ ++static int db_interception(struct vcpu_svm *svm) ++{ ++ struct kvm_run *kvm_run = svm->vcpu.run; ++ struct kvm_vcpu *vcpu = &svm->vcpu; ++ ++ if (!(svm->vcpu.guest_debug & ++ (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) && ++ !svm->nmi_singlestep) { ++ kvm_queue_exception(&svm->vcpu, DB_VECTOR); ++ return 1; ++ } ++ ++ if (svm->nmi_singlestep) { ++ disable_nmi_singlestep(svm); ++ /* Make sure we check for pending NMIs upon entry */ ++ kvm_make_request(KVM_REQ_EVENT, vcpu); ++ } ++ ++ if (svm->vcpu.guest_debug & ++ (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) { ++ kvm_run->exit_reason = KVM_EXIT_DEBUG; ++ kvm_run->debug.arch.pc = ++ svm->vmcb->save.cs.base + svm->vmcb->save.rip; ++ kvm_run->debug.arch.exception = DB_VECTOR; ++ return 0; ++ } ++ ++ return 1; ++} ++ ++static int bp_interception(struct vcpu_svm *svm) ++{ ++ struct kvm_run *kvm_run = svm->vcpu.run; ++ ++ kvm_run->exit_reason = KVM_EXIT_DEBUG; ++ kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip; ++ kvm_run->debug.arch.exception = BP_VECTOR; ++ return 0; ++} ++ ++static int ud_interception(struct vcpu_svm *svm) ++{ ++ return handle_ud(&svm->vcpu); ++} ++ ++static int ac_interception(struct vcpu_svm *svm) ++{ ++ kvm_queue_exception_e(&svm->vcpu, AC_VECTOR, 0); ++ return 1; ++} ++ ++static int gp_interception(struct vcpu_svm *svm) ++{ ++ struct kvm_vcpu *vcpu = &svm->vcpu; ++ u32 error_code = svm->vmcb->control.exit_info_1; ++ int er; ++ ++ WARN_ON_ONCE(!enable_vmware_backdoor); ++ ++ er = kvm_emulate_instruction(vcpu, ++ EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL); ++ if (er == EMULATE_USER_EXIT) ++ return 0; ++ else if (er != EMULATE_DONE) ++ kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); ++ return 1; ++} ++ ++static bool is_erratum_383(void) ++{ ++ int err, i; ++ u64 value; ++ ++ if (!erratum_383_found) ++ return false; ++ ++ value = native_read_msr_safe(MSR_IA32_MC0_STATUS, &err); ++ if (err) ++ return false; ++ ++ /* Bit 62 may or may not be set for this mce */ ++ value &= ~(1ULL << 62); ++ ++ if (value != 0xb600000000010015ULL) ++ return false; ++ ++ /* Clear MCi_STATUS registers */ ++ for (i = 0; i < 6; ++i) ++ native_write_msr_safe(MSR_IA32_MCx_STATUS(i), 0, 0); ++ ++ value = native_read_msr_safe(MSR_IA32_MCG_STATUS, &err); ++ if (!err) { ++ u32 low, high; ++ ++ value &= ~(1ULL << 2); ++ low = lower_32_bits(value); ++ high = upper_32_bits(value); ++ ++ native_write_msr_safe(MSR_IA32_MCG_STATUS, low, high); ++ } ++ ++ /* Flush tlb to evict multi-match entries */ ++ __flush_tlb_all(); ++ ++ return true; ++} ++ ++static void svm_handle_mce(struct vcpu_svm *svm) ++{ ++ if (is_erratum_383()) { ++ /* ++ * Erratum 383 triggered. Guest state is corrupt so kill the ++ * guest. ++ */ ++ pr_err("KVM: Guest triggered AMD Erratum 383\n"); ++ ++ kvm_make_request(KVM_REQ_TRIPLE_FAULT, &svm->vcpu); ++ ++ return; ++ } ++ ++ /* ++ * On an #MC intercept the MCE handler is not called automatically in ++ * the host. So do it by hand here. ++ */ ++ asm volatile ( ++ "int $0x12\n"); ++ /* not sure if we ever come back to this point */ ++ ++ return; ++} ++ ++static int mc_interception(struct vcpu_svm *svm) ++{ ++ return 1; ++} ++ ++static int shutdown_interception(struct vcpu_svm *svm) ++{ ++ struct kvm_run *kvm_run = svm->vcpu.run; ++ ++ /* ++ * VMCB is undefined after a SHUTDOWN intercept ++ * so reinitialize it. ++ */ ++ clear_page(svm->vmcb); ++ init_vmcb(svm); ++ ++ kvm_run->exit_reason = KVM_EXIT_SHUTDOWN; ++ return 0; ++} ++ ++static int io_interception(struct vcpu_svm *svm) ++{ ++ struct kvm_vcpu *vcpu = &svm->vcpu; ++ u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */ ++ int size, in, string; ++ unsigned port; ++ ++ ++svm->vcpu.stat.io_exits; ++ string = (io_info & SVM_IOIO_STR_MASK) != 0; ++ in = (io_info & SVM_IOIO_TYPE_MASK) != 0; ++ if (string) ++ return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE; ++ ++ port = io_info >> 16; ++ size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; ++ svm->next_rip = svm->vmcb->control.exit_info_2; ++ ++ return kvm_fast_pio(&svm->vcpu, size, port, in); ++} ++ ++static int nmi_interception(struct vcpu_svm *svm) ++{ ++ return 1; ++} ++ ++static int intr_interception(struct vcpu_svm *svm) ++{ ++ ++svm->vcpu.stat.irq_exits; ++ return 1; ++} ++ ++static int nop_on_interception(struct vcpu_svm *svm) ++{ ++ return 1; ++} ++ ++static int halt_interception(struct vcpu_svm *svm) ++{ ++ svm->next_rip = kvm_rip_read(&svm->vcpu) + 1; ++ return kvm_emulate_halt(&svm->vcpu); ++} ++ ++static int vmmcall_interception(struct vcpu_svm *svm) ++{ ++ svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; ++ return kvm_emulate_hypercall(&svm->vcpu); ++} ++ ++static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu) ++{ ++ struct vcpu_svm *svm = to_svm(vcpu); ++ ++ return svm->nested.nested_cr3; ++} ++ ++static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index) ++{ ++ struct vcpu_svm *svm = to_svm(vcpu); ++ u64 cr3 = svm->nested.nested_cr3; ++ u64 pdpte; ++ int ret; ++ ++ ret = kvm_vcpu_read_guest_page(vcpu, gpa_to_gfn(__sme_clr(cr3)), &pdpte, ++ offset_in_page(cr3) + index * 8, 8); ++ if (ret) ++ return 0; ++ return pdpte; ++} ++ ++static void nested_svm_set_tdp_cr3(struct kvm_vcpu *vcpu, ++ unsigned long root) ++{ ++ struct vcpu_svm *svm = to_svm(vcpu); ++ ++ svm->vmcb->control.nested_cr3 = __sme_set(root); ++ mark_dirty(svm->vmcb, VMCB_NPT); ++} ++ ++static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu, ++ struct x86_exception *fault) ++{ ++ struct vcpu_svm *svm = to_svm(vcpu); ++ ++ if (svm->vmcb->control.exit_code != SVM_EXIT_NPF) { ++ /* ++ * TODO: track the cause of the nested page fault, and ++ * correctly fill in the high bits of exit_info_1. ++ */ ++ svm->vmcb->control.exit_code = SVM_EXIT_NPF; ++ svm->vmcb->control.exit_code_hi = 0; ++ svm->vmcb->control.exit_info_1 = (1ULL << 32); ++ svm->vmcb->control.exit_info_2 = fault->address; ++ } ++ ++ svm->vmcb->control.exit_info_1 &= ~0xffffffffULL; ++ svm->vmcb->control.exit_info_1 |= fault->error_code; ++ ++ /* ++ * The present bit is always zero for page structure faults on real ++ * hardware. ++ */ ++ if (svm->vmcb->control.exit_info_1 & (2ULL << 32)) ++ svm->vmcb->control.exit_info_1 &= ~1; ++ ++ nested_svm_vmexit(svm); ++} ++ ++static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu) ++{ ++ WARN_ON(mmu_is_nested(vcpu)); ++ kvm_init_shadow_mmu(vcpu); ++ vcpu->arch.mmu.set_cr3 = nested_svm_set_tdp_cr3; ++ vcpu->arch.mmu.get_cr3 = nested_svm_get_tdp_cr3; ++ vcpu->arch.mmu.get_pdptr = nested_svm_get_tdp_pdptr; ++ vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit; ++ vcpu->arch.mmu.shadow_root_level = get_npt_level(vcpu); ++ reset_shadow_zero_bits_mask(vcpu, &vcpu->arch.mmu); ++ vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu; ++} ++ ++static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu) ++{ ++ vcpu->arch.walk_mmu = &vcpu->arch.mmu; ++} ++ ++static int nested_svm_check_permissions(struct vcpu_svm *svm) ++{ ++ if (!(svm->vcpu.arch.efer & EFER_SVME) || ++ !is_paging(&svm->vcpu)) { ++ kvm_queue_exception(&svm->vcpu, UD_VECTOR); ++ return 1; ++ } ++ ++ if (svm->vmcb->save.cpl) { ++ kvm_inject_gp(&svm->vcpu, 0); ++ return 1; ++ } ++ ++ return 0; ++} ++ ++static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, ++ bool has_error_code, u32 error_code) ++{ ++ int vmexit; ++ ++ if (!is_guest_mode(&svm->vcpu)) ++ return 0; ++ ++ vmexit = nested_svm_intercept(svm); ++ if (vmexit != NESTED_EXIT_DONE) ++ return 0; ++ ++ svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr; ++ svm->vmcb->control.exit_code_hi = 0; ++ svm->vmcb->control.exit_info_1 = error_code; ++ ++ /* ++ * FIXME: we should not write CR2 when L1 intercepts an L2 #PF exception. ++ * The fix is to add the ancillary datum (CR2 or DR6) to structs ++ * kvm_queued_exception and kvm_vcpu_events, so that CR2 and DR6 can be ++ * written only when inject_pending_event runs (DR6 would written here ++ * too). This should be conditional on a new capability---if the ++ * capability is disabled, kvm_multiple_exception would write the ++ * ancillary information to CR2 or DR6, for backwards ABI-compatibility. ++ */ ++ if (svm->vcpu.arch.exception.nested_apf) ++ svm->vmcb->control.exit_info_2 = svm->vcpu.arch.apf.nested_apf_token; ++ else ++ svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2; ++ ++ svm->nested.exit_required = true; ++ return vmexit; ++} ++ ++/* This function returns true if it is save to enable the irq window */ ++static inline bool nested_svm_intr(struct vcpu_svm *svm) ++{ ++ if (!is_guest_mode(&svm->vcpu)) ++ return true; ++ ++ if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK)) ++ return true; ++ ++ if (!(svm->vcpu.arch.hflags & HF_HIF_MASK)) ++ return false; ++ ++ /* ++ * if vmexit was already requested (by intercepted exception ++ * for instance) do not overwrite it with "external interrupt" ++ * vmexit. ++ */ ++ if (svm->nested.exit_required) ++ return false; ++ ++ svm->vmcb->control.exit_code = SVM_EXIT_INTR; ++ svm->vmcb->control.exit_info_1 = 0; ++ svm->vmcb->control.exit_info_2 = 0; ++ ++ if (svm->nested.intercept & 1ULL) { ++ /* ++ * The #vmexit can't be emulated here directly because this ++ * code path runs with irqs and preemption disabled. A ++ * #vmexit emulation might sleep. Only signal request for ++ * the #vmexit here. ++ */ ++ svm->nested.exit_required = true; ++ trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip); ++ return false; ++ } ++ ++ return true; ++} ++ ++/* This function returns true if it is save to enable the nmi window */ ++static inline bool nested_svm_nmi(struct vcpu_svm *svm) ++{ ++ if (!is_guest_mode(&svm->vcpu)) ++ return true; ++ ++ if (!(svm->nested.intercept & (1ULL << INTERCEPT_NMI))) ++ return true; ++ ++ svm->vmcb->control.exit_code = SVM_EXIT_NMI; ++ svm->nested.exit_required = true; ++ ++ return false; ++} ++ ++static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, struct page **_page) ++{ ++ struct page *page; ++ ++ might_sleep(); ++ ++ page = kvm_vcpu_gfn_to_page(&svm->vcpu, gpa >> PAGE_SHIFT); ++ if (is_error_page(page)) ++ goto error; ++ ++ *_page = page; ++ ++ return kmap(page); ++ ++error: ++ kvm_inject_gp(&svm->vcpu, 0); ++ ++ return NULL; ++} ++ ++static void nested_svm_unmap(struct page *page) ++{ ++ kunmap(page); ++ kvm_release_page_dirty(page); ++} ++ ++static int nested_svm_intercept_ioio(struct vcpu_svm *svm) ++{ ++ unsigned port, size, iopm_len; ++ u16 val, mask; ++ u8 start_bit; ++ u64 gpa; ++ ++ if (!(svm->nested.intercept & (1ULL << INTERCEPT_IOIO_PROT))) ++ return NESTED_EXIT_HOST; ++ ++ port = svm->vmcb->control.exit_info_1 >> 16; ++ size = (svm->vmcb->control.exit_info_1 & SVM_IOIO_SIZE_MASK) >> ++ SVM_IOIO_SIZE_SHIFT; ++ gpa = svm->nested.vmcb_iopm + (port / 8); ++ start_bit = port % 8; ++ iopm_len = (start_bit + size > 8) ? 2 : 1; ++ mask = (0xf >> (4 - size)) << start_bit; ++ val = 0; ++ ++ if (kvm_vcpu_read_guest(&svm->vcpu, gpa, &val, iopm_len)) ++ return NESTED_EXIT_DONE; ++ ++ return (val & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST; ++} ++ ++static int nested_svm_exit_handled_msr(struct vcpu_svm *svm) ++{ ++ u32 offset, msr, value; ++ int write, mask; ++ ++ if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT))) ++ return NESTED_EXIT_HOST; ++ ++ msr = svm->vcpu.arch.regs[VCPU_REGS_RCX]; ++ offset = svm_msrpm_offset(msr); ++ write = svm->vmcb->control.exit_info_1 & 1; ++ mask = 1 << ((2 * (msr & 0xf)) + write); ++ ++ if (offset == MSR_INVALID) ++ return NESTED_EXIT_DONE; ++ ++ /* Offset is in 32 bit units but need in 8 bit units */ ++ offset *= 4; ++ ++ if (kvm_vcpu_read_guest(&svm->vcpu, svm->nested.vmcb_msrpm + offset, &value, 4)) ++ return NESTED_EXIT_DONE; ++ ++ return (value & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST; ++} ++ ++/* DB exceptions for our internal use must not cause vmexit */ ++static int nested_svm_intercept_db(struct vcpu_svm *svm) ++{ ++ unsigned long dr6; ++ ++ /* if we're not singlestepping, it's not ours */ ++ if (!svm->nmi_singlestep) ++ return NESTED_EXIT_DONE; ++ ++ /* if it's not a singlestep exception, it's not ours */ ++ if (kvm_get_dr(&svm->vcpu, 6, &dr6)) ++ return NESTED_EXIT_DONE; ++ if (!(dr6 & DR6_BS)) ++ return NESTED_EXIT_DONE; ++ ++ /* if the guest is singlestepping, it should get the vmexit */ ++ if (svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF) { ++ disable_nmi_singlestep(svm); ++ return NESTED_EXIT_DONE; ++ } ++ ++ /* it's ours, the nested hypervisor must not see this one */ ++ return NESTED_EXIT_HOST; ++} ++ ++static int nested_svm_exit_special(struct vcpu_svm *svm) ++{ ++ u32 exit_code = svm->vmcb->control.exit_code; ++ ++ switch (exit_code) { ++ case SVM_EXIT_INTR: ++ case SVM_EXIT_NMI: ++ case SVM_EXIT_EXCP_BASE + MC_VECTOR: ++ return NESTED_EXIT_HOST; ++ case SVM_EXIT_NPF: ++ /* For now we are always handling NPFs when using them */ ++ if (npt_enabled) ++ return NESTED_EXIT_HOST; ++ break; ++ case SVM_EXIT_EXCP_BASE + PF_VECTOR: ++ /* When we're shadowing, trap PFs, but not async PF */ ++ if (!npt_enabled && svm->vcpu.arch.apf.host_apf_reason == 0) ++ return NESTED_EXIT_HOST; ++ break; ++ default: ++ break; ++ } ++ ++ return NESTED_EXIT_CONTINUE; ++} ++ ++/* ++ * If this function returns true, this #vmexit was already handled ++ */ ++static int nested_svm_intercept(struct vcpu_svm *svm) ++{ ++ u32 exit_code = svm->vmcb->control.exit_code; ++ int vmexit = NESTED_EXIT_HOST; ++ ++ switch (exit_code) { ++ case SVM_EXIT_MSR: ++ vmexit = nested_svm_exit_handled_msr(svm); ++ break; ++ case SVM_EXIT_IOIO: ++ vmexit = nested_svm_intercept_ioio(svm); ++ break; ++ case SVM_EXIT_READ_CR0 ... SVM_EXIT_WRITE_CR8: { ++ u32 bit = 1U << (exit_code - SVM_EXIT_READ_CR0); ++ if (svm->nested.intercept_cr & bit) ++ vmexit = NESTED_EXIT_DONE; ++ break; ++ } ++ case SVM_EXIT_READ_DR0 ... SVM_EXIT_WRITE_DR7: { ++ u32 bit = 1U << (exit_code - SVM_EXIT_READ_DR0); ++ if (svm->nested.intercept_dr & bit) ++ vmexit = NESTED_EXIT_DONE; ++ break; ++ } ++ case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: { ++ u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE); ++ if (svm->nested.intercept_exceptions & excp_bits) { ++ if (exit_code == SVM_EXIT_EXCP_BASE + DB_VECTOR) ++ vmexit = nested_svm_intercept_db(svm); ++ else ++ vmexit = NESTED_EXIT_DONE; ++ } ++ /* async page fault always cause vmexit */ ++ else if ((exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) && ++ svm->vcpu.arch.exception.nested_apf != 0) ++ vmexit = NESTED_EXIT_DONE; ++ break; ++ } ++ case SVM_EXIT_ERR: { ++ vmexit = NESTED_EXIT_DONE; ++ break; ++ } ++ default: { ++ u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR); ++ if (svm->nested.intercept & exit_bits) ++ vmexit = NESTED_EXIT_DONE; ++ } ++ } ++ ++ return vmexit; ++} ++ ++static int nested_svm_exit_handled(struct vcpu_svm *svm) ++{ ++ int vmexit; ++ ++ vmexit = nested_svm_intercept(svm); ++ ++ if (vmexit == NESTED_EXIT_DONE) ++ nested_svm_vmexit(svm); ++ ++ return vmexit; ++} ++ ++static inline void copy_vmcb_control_area(struct vmcb *dst_vmcb, struct vmcb *from_vmcb) ++{ ++ struct vmcb_control_area *dst = &dst_vmcb->control; ++ struct vmcb_control_area *from = &from_vmcb->control; ++ ++ dst->intercept_cr = from->intercept_cr; ++ dst->intercept_dr = from->intercept_dr; ++ dst->intercept_exceptions = from->intercept_exceptions; ++ dst->intercept = from->intercept; ++ dst->iopm_base_pa = from->iopm_base_pa; ++ dst->msrpm_base_pa = from->msrpm_base_pa; ++ dst->tsc_offset = from->tsc_offset; ++ dst->asid = from->asid; ++ dst->tlb_ctl = from->tlb_ctl; ++ dst->int_ctl = from->int_ctl; ++ dst->int_vector = from->int_vector; ++ dst->int_state = from->int_state; ++ dst->exit_code = from->exit_code; ++ dst->exit_code_hi = from->exit_code_hi; ++ dst->exit_info_1 = from->exit_info_1; ++ dst->exit_info_2 = from->exit_info_2; ++ dst->exit_int_info = from->exit_int_info; ++ dst->exit_int_info_err = from->exit_int_info_err; ++ dst->nested_ctl = from->nested_ctl; ++ dst->event_inj = from->event_inj; ++ dst->event_inj_err = from->event_inj_err; ++ dst->nested_cr3 = from->nested_cr3; ++ dst->virt_ext = from->virt_ext; ++} ++ ++static int nested_svm_vmexit(struct vcpu_svm *svm) ++{ ++ struct vmcb *nested_vmcb; ++ struct vmcb *hsave = svm->nested.hsave; ++ struct vmcb *vmcb = svm->vmcb; ++ struct page *page; ++ ++ trace_kvm_nested_vmexit_inject(vmcb->control.exit_code, ++ vmcb->control.exit_info_1, ++ vmcb->control.exit_info_2, ++ vmcb->control.exit_int_info, ++ vmcb->control.exit_int_info_err, ++ KVM_ISA_SVM); ++ ++ nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, &page); ++ if (!nested_vmcb) ++ return 1; ++ ++ /* Exit Guest-Mode */ ++ leave_guest_mode(&svm->vcpu); ++ svm->nested.vmcb = 0; ++ ++ /* Give the current vmcb to the guest */ ++ disable_gif(svm); ++ ++ nested_vmcb->save.es = vmcb->save.es; ++ nested_vmcb->save.cs = vmcb->save.cs; ++ nested_vmcb->save.ss = vmcb->save.ss; ++ nested_vmcb->save.ds = vmcb->save.ds; ++ nested_vmcb->save.gdtr = vmcb->save.gdtr; ++ nested_vmcb->save.idtr = vmcb->save.idtr; ++ nested_vmcb->save.efer = svm->vcpu.arch.efer; ++ nested_vmcb->save.cr0 = kvm_read_cr0(&svm->vcpu); ++ nested_vmcb->save.cr3 = kvm_read_cr3(&svm->vcpu); ++ nested_vmcb->save.cr2 = vmcb->save.cr2; ++ nested_vmcb->save.cr4 = svm->vcpu.arch.cr4; ++ nested_vmcb->save.rflags = kvm_get_rflags(&svm->vcpu); ++ nested_vmcb->save.rip = vmcb->save.rip; ++ nested_vmcb->save.rsp = vmcb->save.rsp; ++ nested_vmcb->save.rax = vmcb->save.rax; ++ nested_vmcb->save.dr7 = vmcb->save.dr7; ++ nested_vmcb->save.dr6 = vmcb->save.dr6; ++ nested_vmcb->save.cpl = vmcb->save.cpl; ++ ++ nested_vmcb->control.int_ctl = vmcb->control.int_ctl; ++ nested_vmcb->control.int_vector = vmcb->control.int_vector; ++ nested_vmcb->control.int_state = vmcb->control.int_state; ++ nested_vmcb->control.exit_code = vmcb->control.exit_code; ++ nested_vmcb->control.exit_code_hi = vmcb->control.exit_code_hi; ++ nested_vmcb->control.exit_info_1 = vmcb->control.exit_info_1; ++ nested_vmcb->control.exit_info_2 = vmcb->control.exit_info_2; ++ nested_vmcb->control.exit_int_info = vmcb->control.exit_int_info; ++ nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err; ++ ++ if (svm->nrips_enabled) ++ nested_vmcb->control.next_rip = vmcb->control.next_rip; ++ ++ /* ++ * If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have ++ * to make sure that we do not lose injected events. So check event_inj ++ * here and copy it to exit_int_info if it is valid. ++ * Exit_int_info and event_inj can't be both valid because the case ++ * below only happens on a VMRUN instruction intercept which has ++ * no valid exit_int_info set. ++ */ ++ if (vmcb->control.event_inj & SVM_EVTINJ_VALID) { ++ struct vmcb_control_area *nc = &nested_vmcb->control; ++ ++ nc->exit_int_info = vmcb->control.event_inj; ++ nc->exit_int_info_err = vmcb->control.event_inj_err; ++ } ++ ++ nested_vmcb->control.tlb_ctl = 0; ++ nested_vmcb->control.event_inj = 0; ++ nested_vmcb->control.event_inj_err = 0; ++ ++ /* We always set V_INTR_MASKING and remember the old value in hflags */ ++ if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK)) ++ nested_vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK; ++ ++ /* Restore the original control entries */ ++ copy_vmcb_control_area(vmcb, hsave); ++ ++ svm->vcpu.arch.tsc_offset = svm->vmcb->control.tsc_offset; ++ kvm_clear_exception_queue(&svm->vcpu); ++ kvm_clear_interrupt_queue(&svm->vcpu); ++ ++ svm->nested.nested_cr3 = 0; ++ ++ /* Restore selected save entries */ ++ svm->vmcb->save.es = hsave->save.es; ++ svm->vmcb->save.cs = hsave->save.cs; ++ svm->vmcb->save.ss = hsave->save.ss; ++ svm->vmcb->save.ds = hsave->save.ds; ++ svm->vmcb->save.gdtr = hsave->save.gdtr; ++ svm->vmcb->save.idtr = hsave->save.idtr; ++ kvm_set_rflags(&svm->vcpu, hsave->save.rflags); ++ svm_set_efer(&svm->vcpu, hsave->save.efer); ++ svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE); ++ svm_set_cr4(&svm->vcpu, hsave->save.cr4); ++ if (npt_enabled) { ++ svm->vmcb->save.cr3 = hsave->save.cr3; ++ svm->vcpu.arch.cr3 = hsave->save.cr3; ++ } else { ++ (void)kvm_set_cr3(&svm->vcpu, hsave->save.cr3); ++ } ++ kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, hsave->save.rax); ++ kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, hsave->save.rsp); ++ kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, hsave->save.rip); ++ svm->vmcb->save.dr7 = 0; ++ svm->vmcb->save.cpl = 0; ++ svm->vmcb->control.exit_int_info = 0; ++ ++ mark_all_dirty(svm->vmcb); ++ ++ nested_svm_unmap(page); ++ ++ nested_svm_uninit_mmu_context(&svm->vcpu); ++ kvm_mmu_reset_context(&svm->vcpu); ++ kvm_mmu_load(&svm->vcpu); ++ ++ /* ++ * Drop what we picked up for L2 via svm_complete_interrupts() so it ++ * doesn't end up in L1. ++ */ ++ svm->vcpu.arch.nmi_injected = false; ++ kvm_clear_exception_queue(&svm->vcpu); ++ kvm_clear_interrupt_queue(&svm->vcpu); ++ ++ return 0; ++} ++ ++static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm) ++{ ++ /* ++ * This function merges the msr permission bitmaps of kvm and the ++ * nested vmcb. It is optimized in that it only merges the parts where ++ * the kvm msr permission bitmap may contain zero bits ++ */ ++ int i; ++ ++ if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT))) ++ return true; ++ ++ for (i = 0; i < MSRPM_OFFSETS; i++) { ++ u32 value, p; ++ u64 offset; ++ ++ if (msrpm_offsets[i] == 0xffffffff) ++ break; ++ ++ p = msrpm_offsets[i]; ++ offset = svm->nested.vmcb_msrpm + (p * 4); ++ ++ if (kvm_vcpu_read_guest(&svm->vcpu, offset, &value, 4)) ++ return false; ++ ++ svm->nested.msrpm[p] = svm->msrpm[p] | value; ++ } ++ ++ svm->vmcb->control.msrpm_base_pa = __sme_set(__pa(svm->nested.msrpm)); ++ ++ return true; ++} ++ ++static bool nested_vmcb_checks(struct vmcb *vmcb) ++{ ++ if ((vmcb->control.intercept & (1ULL << INTERCEPT_VMRUN)) == 0) ++ return false; ++ ++ if (vmcb->control.asid == 0) ++ return false; ++ ++ if ((vmcb->control.nested_ctl & SVM_NESTED_CTL_NP_ENABLE) && ++ !npt_enabled) ++ return false; ++ ++ return true; ++} ++ ++static void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa, ++ struct vmcb *nested_vmcb, struct page *page) ++{ ++ if (kvm_get_rflags(&svm->vcpu) & X86_EFLAGS_IF) ++ svm->vcpu.arch.hflags |= HF_HIF_MASK; ++ else ++ svm->vcpu.arch.hflags &= ~HF_HIF_MASK; ++ ++ if (nested_vmcb->control.nested_ctl & SVM_NESTED_CTL_NP_ENABLE) { ++ kvm_mmu_unload(&svm->vcpu); ++ svm->nested.nested_cr3 = nested_vmcb->control.nested_cr3; ++ nested_svm_init_mmu_context(&svm->vcpu); ++ } ++ ++ /* Load the nested guest state */ ++ svm->vmcb->save.es = nested_vmcb->save.es; ++ svm->vmcb->save.cs = nested_vmcb->save.cs; ++ svm->vmcb->save.ss = nested_vmcb->save.ss; ++ svm->vmcb->save.ds = nested_vmcb->save.ds; ++ svm->vmcb->save.gdtr = nested_vmcb->save.gdtr; ++ svm->vmcb->save.idtr = nested_vmcb->save.idtr; ++ kvm_set_rflags(&svm->vcpu, nested_vmcb->save.rflags); ++ svm_set_efer(&svm->vcpu, nested_vmcb->save.efer); ++ svm_set_cr0(&svm->vcpu, nested_vmcb->save.cr0); ++ svm_set_cr4(&svm->vcpu, nested_vmcb->save.cr4); ++ if (npt_enabled) { ++ svm->vmcb->save.cr3 = nested_vmcb->save.cr3; ++ svm->vcpu.arch.cr3 = nested_vmcb->save.cr3; ++ } else ++ (void)kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3); ++ ++ /* Guest paging mode is active - reset mmu */ ++ kvm_mmu_reset_context(&svm->vcpu); ++ ++ svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2; ++ kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, nested_vmcb->save.rax); ++ kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, nested_vmcb->save.rsp); ++ kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, nested_vmcb->save.rip); ++ ++ /* In case we don't even reach vcpu_run, the fields are not updated */ ++ svm->vmcb->save.rax = nested_vmcb->save.rax; ++ svm->vmcb->save.rsp = nested_vmcb->save.rsp; ++ svm->vmcb->save.rip = nested_vmcb->save.rip; ++ svm->vmcb->save.dr7 = nested_vmcb->save.dr7; ++ svm->vmcb->save.dr6 = nested_vmcb->save.dr6; ++ svm->vmcb->save.cpl = nested_vmcb->save.cpl; ++ ++ svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa & ~0x0fffULL; ++ svm->nested.vmcb_iopm = nested_vmcb->control.iopm_base_pa & ~0x0fffULL; ++ ++ /* cache intercepts */ ++ svm->nested.intercept_cr = nested_vmcb->control.intercept_cr; ++ svm->nested.intercept_dr = nested_vmcb->control.intercept_dr; ++ svm->nested.intercept_exceptions = nested_vmcb->control.intercept_exceptions; ++ svm->nested.intercept = nested_vmcb->control.intercept; ++ ++ svm_flush_tlb(&svm->vcpu, true); ++ svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK; ++ if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK) ++ svm->vcpu.arch.hflags |= HF_VINTR_MASK; ++ else ++ svm->vcpu.arch.hflags &= ~HF_VINTR_MASK; ++ ++ if (svm->vcpu.arch.hflags & HF_VINTR_MASK) { ++ /* We only want the cr8 intercept bits of the guest */ ++ clr_cr_intercept(svm, INTERCEPT_CR8_READ); ++ clr_cr_intercept(svm, INTERCEPT_CR8_WRITE); ++ } ++ ++ /* We don't want to see VMMCALLs from a nested guest */ ++ clr_intercept(svm, INTERCEPT_VMMCALL); ++ ++ svm->vcpu.arch.tsc_offset += nested_vmcb->control.tsc_offset; ++ svm->vmcb->control.tsc_offset = svm->vcpu.arch.tsc_offset; ++ ++ svm->vmcb->control.virt_ext = nested_vmcb->control.virt_ext; ++ svm->vmcb->control.int_vector = nested_vmcb->control.int_vector; ++ svm->vmcb->control.int_state = nested_vmcb->control.int_state; ++ svm->vmcb->control.event_inj = nested_vmcb->control.event_inj; ++ svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err; ++ ++ nested_svm_unmap(page); ++ ++ /* Enter Guest-Mode */ ++ enter_guest_mode(&svm->vcpu); ++ ++ /* ++ * Merge guest and host intercepts - must be called with vcpu in ++ * guest-mode to take affect here ++ */ ++ recalc_intercepts(svm); ++ ++ svm->nested.vmcb = vmcb_gpa; ++ ++ enable_gif(svm); ++ ++ mark_all_dirty(svm->vmcb); ++} ++ ++static bool nested_svm_vmrun(struct vcpu_svm *svm) ++{ ++ struct vmcb *nested_vmcb; ++ struct vmcb *hsave = svm->nested.hsave; ++ struct vmcb *vmcb = svm->vmcb; ++ struct page *page; ++ u64 vmcb_gpa; ++ ++ vmcb_gpa = svm->vmcb->save.rax; ++ ++ nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); ++ if (!nested_vmcb) ++ return false; ++ ++ if (!nested_vmcb_checks(nested_vmcb)) { ++ nested_vmcb->control.exit_code = SVM_EXIT_ERR; ++ nested_vmcb->control.exit_code_hi = 0; ++ nested_vmcb->control.exit_info_1 = 0; ++ nested_vmcb->control.exit_info_2 = 0; ++ ++ nested_svm_unmap(page); ++ ++ return false; ++ } ++ ++ trace_kvm_nested_vmrun(svm->vmcb->save.rip, vmcb_gpa, ++ nested_vmcb->save.rip, ++ nested_vmcb->control.int_ctl, ++ nested_vmcb->control.event_inj, ++ nested_vmcb->control.nested_ctl); ++ ++ trace_kvm_nested_intercepts(nested_vmcb->control.intercept_cr & 0xffff, ++ nested_vmcb->control.intercept_cr >> 16, ++ nested_vmcb->control.intercept_exceptions, ++ nested_vmcb->control.intercept); ++ ++ /* Clear internal status */ ++ kvm_clear_exception_queue(&svm->vcpu); ++ kvm_clear_interrupt_queue(&svm->vcpu); ++ ++ /* ++ * Save the old vmcb, so we don't need to pick what we save, but can ++ * restore everything when a VMEXIT occurs ++ */ ++ hsave->save.es = vmcb->save.es; ++ hsave->save.cs = vmcb->save.cs; ++ hsave->save.ss = vmcb->save.ss; ++ hsave->save.ds = vmcb->save.ds; ++ hsave->save.gdtr = vmcb->save.gdtr; ++ hsave->save.idtr = vmcb->save.idtr; ++ hsave->save.efer = svm->vcpu.arch.efer; ++ hsave->save.cr0 = kvm_read_cr0(&svm->vcpu); ++ hsave->save.cr4 = svm->vcpu.arch.cr4; ++ hsave->save.rflags = kvm_get_rflags(&svm->vcpu); ++ hsave->save.rip = kvm_rip_read(&svm->vcpu); ++ hsave->save.rsp = vmcb->save.rsp; ++ hsave->save.rax = vmcb->save.rax; ++ if (npt_enabled) ++ hsave->save.cr3 = vmcb->save.cr3; ++ else ++ hsave->save.cr3 = kvm_read_cr3(&svm->vcpu); ++ ++ copy_vmcb_control_area(hsave, vmcb); ++ ++ enter_svm_guest_mode(svm, vmcb_gpa, nested_vmcb, page); ++ ++ return true; ++} ++ ++static void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb) ++{ ++ to_vmcb->save.fs = from_vmcb->save.fs; ++ to_vmcb->save.gs = from_vmcb->save.gs; ++ to_vmcb->save.tr = from_vmcb->save.tr; ++ to_vmcb->save.ldtr = from_vmcb->save.ldtr; ++ to_vmcb->save.kernel_gs_base = from_vmcb->save.kernel_gs_base; ++ to_vmcb->save.star = from_vmcb->save.star; ++ to_vmcb->save.lstar = from_vmcb->save.lstar; ++ to_vmcb->save.cstar = from_vmcb->save.cstar; ++ to_vmcb->save.sfmask = from_vmcb->save.sfmask; ++ to_vmcb->save.sysenter_cs = from_vmcb->save.sysenter_cs; ++ to_vmcb->save.sysenter_esp = from_vmcb->save.sysenter_esp; ++ to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip; ++} ++ ++static int vmload_interception(struct vcpu_svm *svm) ++{ ++ struct vmcb *nested_vmcb; ++ struct page *page; ++ int ret; ++ ++ if (nested_svm_check_permissions(svm)) ++ return 1; ++ ++ nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); ++ if (!nested_vmcb) ++ return 1; ++ ++ svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; ++ ret = kvm_skip_emulated_instruction(&svm->vcpu); ++ ++ nested_svm_vmloadsave(nested_vmcb, svm->vmcb); ++ nested_svm_unmap(page); ++ ++ return ret; ++} ++ ++static int vmsave_interception(struct vcpu_svm *svm) ++{ ++ struct vmcb *nested_vmcb; ++ struct page *page; ++ int ret; ++ ++ if (nested_svm_check_permissions(svm)) ++ return 1; ++ ++ nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); ++ if (!nested_vmcb) ++ return 1; ++ ++ svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; ++ ret = kvm_skip_emulated_instruction(&svm->vcpu); ++ ++ nested_svm_vmloadsave(svm->vmcb, nested_vmcb); ++ nested_svm_unmap(page); ++ ++ return ret; ++} ++ ++static int vmrun_interception(struct vcpu_svm *svm) ++{ ++ if (nested_svm_check_permissions(svm)) ++ return 1; ++ ++ /* Save rip after vmrun instruction */ ++ kvm_rip_write(&svm->vcpu, kvm_rip_read(&svm->vcpu) + 3); ++ ++ if (!nested_svm_vmrun(svm)) ++ return 1; ++ ++ if (!nested_svm_vmrun_msrpm(svm)) ++ goto failed; ++ ++ return 1; ++ ++failed: ++ ++ svm->vmcb->control.exit_code = SVM_EXIT_ERR; ++ svm->vmcb->control.exit_code_hi = 0; ++ svm->vmcb->control.exit_info_1 = 0; ++ svm->vmcb->control.exit_info_2 = 0; ++ ++ nested_svm_vmexit(svm); ++ ++ return 1; ++} ++ ++static int stgi_interception(struct vcpu_svm *svm) ++{ ++ int ret; ++ ++ if (nested_svm_check_permissions(svm)) ++ return 1; ++ ++ /* ++ * If VGIF is enabled, the STGI intercept is only added to ++ * detect the opening of the SMI/NMI window; remove it now. ++ */ ++ if (vgif_enabled(svm)) ++ clr_intercept(svm, INTERCEPT_STGI); ++ ++ svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; ++ ret = kvm_skip_emulated_instruction(&svm->vcpu); ++ kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); ++ ++ enable_gif(svm); ++ ++ return ret; ++} ++ ++static int clgi_interception(struct vcpu_svm *svm) ++{ ++ int ret; ++ ++ if (nested_svm_check_permissions(svm)) ++ return 1; ++ ++ svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; ++ ret = kvm_skip_emulated_instruction(&svm->vcpu); ++ ++ disable_gif(svm); ++ ++ /* After a CLGI no interrupts should come */ ++ if (!kvm_vcpu_apicv_active(&svm->vcpu)) { ++ svm_clear_vintr(svm); ++ svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; ++ mark_dirty(svm->vmcb, VMCB_INTR); ++ } ++ ++ return ret; ++} ++ ++static int invlpga_interception(struct vcpu_svm *svm) ++{ ++ struct kvm_vcpu *vcpu = &svm->vcpu; ++ ++ trace_kvm_invlpga(svm->vmcb->save.rip, kvm_register_read(&svm->vcpu, VCPU_REGS_RCX), ++ kvm_register_read(&svm->vcpu, VCPU_REGS_RAX)); ++ ++ /* Let's treat INVLPGA the same as INVLPG (can be optimized!) */ ++ kvm_mmu_invlpg(vcpu, kvm_register_read(&svm->vcpu, VCPU_REGS_RAX)); ++ ++ svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; ++ return kvm_skip_emulated_instruction(&svm->vcpu); ++} ++ ++static int skinit_interception(struct vcpu_svm *svm) ++{ ++ trace_kvm_skinit(svm->vmcb->save.rip, kvm_register_read(&svm->vcpu, VCPU_REGS_RAX)); ++ ++ kvm_queue_exception(&svm->vcpu, UD_VECTOR); ++ return 1; ++} ++ ++static int wbinvd_interception(struct vcpu_svm *svm) ++{ ++ return kvm_emulate_wbinvd(&svm->vcpu); ++} ++ ++static int xsetbv_interception(struct vcpu_svm *svm) ++{ ++ u64 new_bv = kvm_read_edx_eax(&svm->vcpu); ++ u32 index = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX); ++ ++ if (kvm_set_xcr(&svm->vcpu, index, new_bv) == 0) { ++ svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; ++ return kvm_skip_emulated_instruction(&svm->vcpu); ++ } ++ ++ return 1; ++} ++ ++static int task_switch_interception(struct vcpu_svm *svm) ++{ ++ u16 tss_selector; ++ int reason; ++ int int_type = svm->vmcb->control.exit_int_info & ++ SVM_EXITINTINFO_TYPE_MASK; ++ int int_vec = svm->vmcb->control.exit_int_info & SVM_EVTINJ_VEC_MASK; ++ uint32_t type = ++ svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_TYPE_MASK; ++ uint32_t idt_v = ++ svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID; ++ bool has_error_code = false; ++ u32 error_code = 0; ++ ++ tss_selector = (u16)svm->vmcb->control.exit_info_1; ++ ++ if (svm->vmcb->control.exit_info_2 & ++ (1ULL << SVM_EXITINFOSHIFT_TS_REASON_IRET)) ++ reason = TASK_SWITCH_IRET; ++ else if (svm->vmcb->control.exit_info_2 & ++ (1ULL << SVM_EXITINFOSHIFT_TS_REASON_JMP)) ++ reason = TASK_SWITCH_JMP; ++ else if (idt_v) ++ reason = TASK_SWITCH_GATE; ++ else ++ reason = TASK_SWITCH_CALL; ++ ++ if (reason == TASK_SWITCH_GATE) { ++ switch (type) { ++ case SVM_EXITINTINFO_TYPE_NMI: ++ svm->vcpu.arch.nmi_injected = false; ++ break; ++ case SVM_EXITINTINFO_TYPE_EXEPT: ++ if (svm->vmcb->control.exit_info_2 & ++ (1ULL << SVM_EXITINFOSHIFT_TS_HAS_ERROR_CODE)) { ++ has_error_code = true; ++ error_code = ++ (u32)svm->vmcb->control.exit_info_2; ++ } ++ kvm_clear_exception_queue(&svm->vcpu); ++ break; ++ case SVM_EXITINTINFO_TYPE_INTR: ++ kvm_clear_interrupt_queue(&svm->vcpu); ++ break; ++ default: ++ break; ++ } ++ } ++ ++ if (reason != TASK_SWITCH_GATE || ++ int_type == SVM_EXITINTINFO_TYPE_SOFT || ++ (int_type == SVM_EXITINTINFO_TYPE_EXEPT && ++ (int_vec == OF_VECTOR || int_vec == BP_VECTOR))) ++ skip_emulated_instruction(&svm->vcpu); ++ ++ if (int_type != SVM_EXITINTINFO_TYPE_SOFT) ++ int_vec = -1; ++ ++ if (kvm_task_switch(&svm->vcpu, tss_selector, int_vec, reason, ++ has_error_code, error_code) == EMULATE_FAIL) { ++ svm->vcpu.run->exit_reason = KVM_EXIT_INTERNAL_ERROR; ++ svm->vcpu.run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; ++ svm->vcpu.run->internal.ndata = 0; ++ return 0; ++ } ++ return 1; ++} ++ ++static int cpuid_interception(struct vcpu_svm *svm) ++{ ++ svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; ++ return kvm_emulate_cpuid(&svm->vcpu); ++} ++ ++static int iret_interception(struct vcpu_svm *svm) ++{ ++ ++svm->vcpu.stat.nmi_window_exits; ++ clr_intercept(svm, INTERCEPT_IRET); ++ svm->vcpu.arch.hflags |= HF_IRET_MASK; ++ svm->nmi_iret_rip = kvm_rip_read(&svm->vcpu); ++ kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); ++ return 1; ++} ++ ++static int invlpg_interception(struct vcpu_svm *svm) ++{ ++ if (!static_cpu_has(X86_FEATURE_DECODEASSISTS)) ++ return kvm_emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; ++ ++ kvm_mmu_invlpg(&svm->vcpu, svm->vmcb->control.exit_info_1); ++ return kvm_skip_emulated_instruction(&svm->vcpu); ++} ++ ++static int emulate_on_interception(struct vcpu_svm *svm) ++{ ++ return kvm_emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; ++} ++ ++static int rsm_interception(struct vcpu_svm *svm) ++{ ++ return kvm_emulate_instruction_from_buffer(&svm->vcpu, ++ rsm_ins_bytes, 2) == EMULATE_DONE; ++} ++ ++static int rdpmc_interception(struct vcpu_svm *svm) ++{ ++ int err; ++ ++ if (!static_cpu_has(X86_FEATURE_NRIPS)) ++ return emulate_on_interception(svm); ++ ++ err = kvm_rdpmc(&svm->vcpu); ++ return kvm_complete_insn_gp(&svm->vcpu, err); ++} ++ ++static bool check_selective_cr0_intercepted(struct vcpu_svm *svm, ++ unsigned long val) ++{ ++ unsigned long cr0 = svm->vcpu.arch.cr0; ++ bool ret = false; ++ u64 intercept; ++ ++ intercept = svm->nested.intercept; ++ ++ if (!is_guest_mode(&svm->vcpu) || ++ (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0)))) ++ return false; ++ ++ cr0 &= ~SVM_CR0_SELECTIVE_MASK; ++ val &= ~SVM_CR0_SELECTIVE_MASK; ++ ++ if (cr0 ^ val) { ++ svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE; ++ ret = (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE); ++ } ++ ++ return ret; ++} ++ ++#define CR_VALID (1ULL << 63) ++ ++static int cr_interception(struct vcpu_svm *svm) ++{ ++ int reg, cr; ++ unsigned long val; ++ int err; ++ ++ if (!static_cpu_has(X86_FEATURE_DECODEASSISTS)) ++ return emulate_on_interception(svm); ++ ++ if (unlikely((svm->vmcb->control.exit_info_1 & CR_VALID) == 0)) ++ return emulate_on_interception(svm); ++ ++ reg = svm->vmcb->control.exit_info_1 & SVM_EXITINFO_REG_MASK; ++ if (svm->vmcb->control.exit_code == SVM_EXIT_CR0_SEL_WRITE) ++ cr = SVM_EXIT_WRITE_CR0 - SVM_EXIT_READ_CR0; ++ else ++ cr = svm->vmcb->control.exit_code - SVM_EXIT_READ_CR0; ++ ++ err = 0; ++ if (cr >= 16) { /* mov to cr */ ++ cr -= 16; ++ val = kvm_register_read(&svm->vcpu, reg); ++ switch (cr) { ++ case 0: ++ if (!check_selective_cr0_intercepted(svm, val)) ++ err = kvm_set_cr0(&svm->vcpu, val); ++ else ++ return 1; ++ ++ break; ++ case 3: ++ err = kvm_set_cr3(&svm->vcpu, val); ++ break; ++ case 4: ++ err = kvm_set_cr4(&svm->vcpu, val); ++ break; ++ case 8: ++ err = kvm_set_cr8(&svm->vcpu, val); ++ break; ++ default: ++ WARN(1, "unhandled write to CR%d", cr); ++ kvm_queue_exception(&svm->vcpu, UD_VECTOR); ++ return 1; ++ } ++ } else { /* mov from cr */ ++ switch (cr) { ++ case 0: ++ val = kvm_read_cr0(&svm->vcpu); ++ break; ++ case 2: ++ val = svm->vcpu.arch.cr2; ++ break; ++ case 3: ++ val = kvm_read_cr3(&svm->vcpu); ++ break; ++ case 4: ++ val = kvm_read_cr4(&svm->vcpu); ++ break; ++ case 8: ++ val = kvm_get_cr8(&svm->vcpu); ++ break; ++ default: ++ WARN(1, "unhandled read from CR%d", cr); ++ kvm_queue_exception(&svm->vcpu, UD_VECTOR); ++ return 1; ++ } ++ kvm_register_write(&svm->vcpu, reg, val); ++ } ++ return kvm_complete_insn_gp(&svm->vcpu, err); ++} ++ ++static int dr_interception(struct vcpu_svm *svm) ++{ ++ int reg, dr; ++ unsigned long val; ++ ++ if (svm->vcpu.guest_debug == 0) { ++ /* ++ * No more DR vmexits; force a reload of the debug registers ++ * and reenter on this instruction. The next vmexit will ++ * retrieve the full state of the debug registers. ++ */ ++ clr_dr_intercepts(svm); ++ svm->vcpu.arch.switch_db_regs |= KVM_DEBUGREG_WONT_EXIT; ++ return 1; ++ } ++ ++ if (!boot_cpu_has(X86_FEATURE_DECODEASSISTS)) ++ return emulate_on_interception(svm); ++ ++ reg = svm->vmcb->control.exit_info_1 & SVM_EXITINFO_REG_MASK; ++ dr = svm->vmcb->control.exit_code - SVM_EXIT_READ_DR0; ++ ++ if (dr >= 16) { /* mov to DRn */ ++ if (!kvm_require_dr(&svm->vcpu, dr - 16)) ++ return 1; ++ val = kvm_register_read(&svm->vcpu, reg); ++ kvm_set_dr(&svm->vcpu, dr - 16, val); ++ } else { ++ if (!kvm_require_dr(&svm->vcpu, dr)) ++ return 1; ++ kvm_get_dr(&svm->vcpu, dr, &val); ++ kvm_register_write(&svm->vcpu, reg, val); ++ } ++ ++ return kvm_skip_emulated_instruction(&svm->vcpu); ++} ++ ++static int cr8_write_interception(struct vcpu_svm *svm) ++{ ++ struct kvm_run *kvm_run = svm->vcpu.run; ++ int r; ++ ++ u8 cr8_prev = kvm_get_cr8(&svm->vcpu); ++ /* instruction emulation calls kvm_set_cr8() */ ++ r = cr_interception(svm); ++ if (lapic_in_kernel(&svm->vcpu)) ++ return r; ++ if (cr8_prev <= kvm_get_cr8(&svm->vcpu)) ++ return r; ++ kvm_run->exit_reason = KVM_EXIT_SET_TPR; ++ return 0; ++} ++ ++static int svm_get_msr_feature(struct kvm_msr_entry *msr) ++{ ++ msr->data = 0; ++ ++ switch (msr->index) { ++ case MSR_F10H_DECFG: ++ if (boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) ++ msr->data |= MSR_F10H_DECFG_LFENCE_SERIALIZE; ++ break; ++ default: ++ return 1; ++ } ++ ++ return 0; ++} ++ ++static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) ++{ ++ struct vcpu_svm *svm = to_svm(vcpu); ++ ++ switch (msr_info->index) { ++ case MSR_STAR: ++ msr_info->data = svm->vmcb->save.star; ++ break; ++#ifdef CONFIG_X86_64 ++ case MSR_LSTAR: ++ msr_info->data = svm->vmcb->save.lstar; ++ break; ++ case MSR_CSTAR: ++ msr_info->data = svm->vmcb->save.cstar; ++ break; ++ case MSR_KERNEL_GS_BASE: ++ msr_info->data = svm->vmcb->save.kernel_gs_base; ++ break; ++ case MSR_SYSCALL_MASK: ++ msr_info->data = svm->vmcb->save.sfmask; ++ break; ++#endif ++ case MSR_IA32_SYSENTER_CS: ++ msr_info->data = svm->vmcb->save.sysenter_cs; ++ break; ++ case MSR_IA32_SYSENTER_EIP: ++ msr_info->data = svm->sysenter_eip; ++ break; ++ case MSR_IA32_SYSENTER_ESP: ++ msr_info->data = svm->sysenter_esp; ++ break; ++ case MSR_TSC_AUX: ++ if (!boot_cpu_has(X86_FEATURE_RDTSCP)) ++ return 1; ++ msr_info->data = svm->tsc_aux; ++ break; ++ /* ++ * Nobody will change the following 5 values in the VMCB so we can ++ * safely return them on rdmsr. They will always be 0 until LBRV is ++ * implemented. ++ */ ++ case MSR_IA32_DEBUGCTLMSR: ++ msr_info->data = svm->vmcb->save.dbgctl; ++ break; ++ case MSR_IA32_LASTBRANCHFROMIP: ++ msr_info->data = svm->vmcb->save.br_from; ++ break; ++ case MSR_IA32_LASTBRANCHTOIP: ++ msr_info->data = svm->vmcb->save.br_to; ++ break; ++ case MSR_IA32_LASTINTFROMIP: ++ msr_info->data = svm->vmcb->save.last_excp_from; ++ break; ++ case MSR_IA32_LASTINTTOIP: ++ msr_info->data = svm->vmcb->save.last_excp_to; ++ break; ++ case MSR_VM_HSAVE_PA: ++ msr_info->data = svm->nested.hsave_msr; ++ break; ++ case MSR_VM_CR: ++ msr_info->data = svm->nested.vm_cr_msr; ++ break; ++ case MSR_IA32_SPEC_CTRL: ++ if (!msr_info->host_initiated && ++ !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) && ++ !guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD)) ++ return 1; ++ ++ msr_info->data = svm->spec_ctrl; ++ break; ++ case MSR_AMD64_VIRT_SPEC_CTRL: ++ if (!msr_info->host_initiated && ++ !guest_cpuid_has(vcpu, X86_FEATURE_VIRT_SSBD)) ++ return 1; ++ ++ msr_info->data = svm->virt_spec_ctrl; ++ break; ++ case MSR_F15H_IC_CFG: { ++ ++ int family, model; ++ ++ family = guest_cpuid_family(vcpu); ++ model = guest_cpuid_model(vcpu); ++ ++ if (family < 0 || model < 0) ++ return kvm_get_msr_common(vcpu, msr_info); ++ ++ msr_info->data = 0; ++ ++ if (family == 0x15 && ++ (model >= 0x2 && model < 0x20)) ++ msr_info->data = 0x1E; ++ } ++ break; ++ case MSR_F10H_DECFG: ++ msr_info->data = svm->msr_decfg; ++ break; ++ default: ++ return kvm_get_msr_common(vcpu, msr_info); ++ } ++ return 0; ++} ++ ++static int rdmsr_interception(struct vcpu_svm *svm) ++{ ++ u32 ecx = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX); ++ struct msr_data msr_info; ++ ++ msr_info.index = ecx; ++ msr_info.host_initiated = false; ++ if (svm_get_msr(&svm->vcpu, &msr_info)) { ++ trace_kvm_msr_read_ex(ecx); ++ kvm_inject_gp(&svm->vcpu, 0); ++ return 1; ++ } else { ++ trace_kvm_msr_read(ecx, msr_info.data); ++ ++ kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, ++ msr_info.data & 0xffffffff); ++ kvm_register_write(&svm->vcpu, VCPU_REGS_RDX, ++ msr_info.data >> 32); ++ svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; ++ return kvm_skip_emulated_instruction(&svm->vcpu); ++ } ++} ++ ++static int svm_set_vm_cr(struct kvm_vcpu *vcpu, u64 data) ++{ ++ struct vcpu_svm *svm = to_svm(vcpu); ++ int svm_dis, chg_mask; ++ ++ if (data & ~SVM_VM_CR_VALID_MASK) ++ return 1; ++ ++ chg_mask = SVM_VM_CR_VALID_MASK; ++ ++ if (svm->nested.vm_cr_msr & SVM_VM_CR_SVM_DIS_MASK) ++ chg_mask &= ~(SVM_VM_CR_SVM_LOCK_MASK | SVM_VM_CR_SVM_DIS_MASK); ++ ++ svm->nested.vm_cr_msr &= ~chg_mask; ++ svm->nested.vm_cr_msr |= (data & chg_mask); ++ ++ svm_dis = svm->nested.vm_cr_msr & SVM_VM_CR_SVM_DIS_MASK; ++ ++ /* check for svm_disable while efer.svme is set */ ++ if (svm_dis && (vcpu->arch.efer & EFER_SVME)) ++ return 1; ++ ++ return 0; ++} ++ ++static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) ++{ ++ struct vcpu_svm *svm = to_svm(vcpu); ++ ++ u32 ecx = msr->index; ++ u64 data = msr->data; ++ switch (ecx) { ++ case MSR_IA32_CR_PAT: ++ if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data)) ++ return 1; ++ vcpu->arch.pat = data; ++ svm->vmcb->save.g_pat = data; ++ mark_dirty(svm->vmcb, VMCB_NPT); ++ break; ++ case MSR_IA32_SPEC_CTRL: ++ if (!msr->host_initiated && ++ !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) && ++ !guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD)) ++ return 1; ++ ++ /* The STIBP bit doesn't fault even if it's not advertised */ ++ if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD)) ++ return 1; ++ ++ svm->spec_ctrl = data; ++ ++ if (!data) ++ break; ++ ++ /* ++ * For non-nested: ++ * When it's written (to non-zero) for the first time, pass ++ * it through. ++ * ++ * For nested: ++ * The handling of the MSR bitmap for L2 guests is done in ++ * nested_svm_vmrun_msrpm. ++ * We update the L1 MSR bit as well since it will end up ++ * touching the MSR anyway now. ++ */ ++ set_msr_interception(svm->msrpm, MSR_IA32_SPEC_CTRL, 1, 1); ++ break; ++ case MSR_IA32_PRED_CMD: ++ if (!msr->host_initiated && ++ !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBPB)) ++ return 1; ++ ++ if (data & ~PRED_CMD_IBPB) ++ return 1; ++ ++ if (!data) ++ break; ++ ++ wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB); ++ if (is_guest_mode(vcpu)) ++ break; ++ set_msr_interception(svm->msrpm, MSR_IA32_PRED_CMD, 0, 1); ++ break; ++ case MSR_AMD64_VIRT_SPEC_CTRL: ++ if (!msr->host_initiated && ++ !guest_cpuid_has(vcpu, X86_FEATURE_VIRT_SSBD)) ++ return 1; ++ ++ if (data & ~SPEC_CTRL_SSBD) ++ return 1; ++ ++ svm->virt_spec_ctrl = data; ++ break; ++ case MSR_STAR: ++ svm->vmcb->save.star = data; ++ break; ++#ifdef CONFIG_X86_64 ++ case MSR_LSTAR: ++ svm->vmcb->save.lstar = data; ++ break; ++ case MSR_CSTAR: ++ svm->vmcb->save.cstar = data; ++ break; ++ case MSR_KERNEL_GS_BASE: ++ svm->vmcb->save.kernel_gs_base = data; ++ break; ++ case MSR_SYSCALL_MASK: ++ svm->vmcb->save.sfmask = data; ++ break; ++#endif ++ case MSR_IA32_SYSENTER_CS: ++ svm->vmcb->save.sysenter_cs = data; ++ break; ++ case MSR_IA32_SYSENTER_EIP: ++ svm->sysenter_eip = data; ++ svm->vmcb->save.sysenter_eip = data; ++ break; ++ case MSR_IA32_SYSENTER_ESP: ++ svm->sysenter_esp = data; ++ svm->vmcb->save.sysenter_esp = data; ++ break; ++ case MSR_TSC_AUX: ++ if (!boot_cpu_has(X86_FEATURE_RDTSCP)) ++ return 1; ++ ++ /* ++ * This is rare, so we update the MSR here instead of using ++ * direct_access_msrs. Doing that would require a rdmsr in ++ * svm_vcpu_put. ++ */ ++ svm->tsc_aux = data; ++ wrmsrl(MSR_TSC_AUX, svm->tsc_aux); ++ break; ++ case MSR_IA32_DEBUGCTLMSR: ++ if (!boot_cpu_has(X86_FEATURE_LBRV)) { ++ vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTL 0x%llx, nop\n", ++ __func__, data); ++ break; ++ } ++ if (data & DEBUGCTL_RESERVED_BITS) ++ return 1; ++ ++ svm->vmcb->save.dbgctl = data; ++ mark_dirty(svm->vmcb, VMCB_LBR); ++ if (data & (1ULL<<0)) ++ svm_enable_lbrv(svm); ++ else ++ svm_disable_lbrv(svm); ++ break; ++ case MSR_VM_HSAVE_PA: ++ svm->nested.hsave_msr = data; ++ break; ++ case MSR_VM_CR: ++ return svm_set_vm_cr(vcpu, data); ++ case MSR_VM_IGNNE: ++ vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data); ++ break; ++ case MSR_F10H_DECFG: { ++ struct kvm_msr_entry msr_entry; ++ ++ msr_entry.index = msr->index; ++ if (svm_get_msr_feature(&msr_entry)) ++ return 1; ++ ++ /* Check the supported bits */ ++ if (data & ~msr_entry.data) ++ return 1; ++ ++ /* Don't allow the guest to change a bit, #GP */ ++ if (!msr->host_initiated && (data ^ msr_entry.data)) ++ return 1; ++ ++ svm->msr_decfg = data; ++ break; ++ } ++ case MSR_IA32_APICBASE: ++ if (kvm_vcpu_apicv_active(vcpu)) ++ avic_update_vapic_bar(to_svm(vcpu), data); ++ /* Follow through */ ++ default: ++ return kvm_set_msr_common(vcpu, msr); ++ } ++ return 0; ++} ++ ++static int wrmsr_interception(struct vcpu_svm *svm) ++{ ++ struct msr_data msr; ++ u32 ecx = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX); ++ u64 data = kvm_read_edx_eax(&svm->vcpu); ++ ++ msr.data = data; ++ msr.index = ecx; ++ msr.host_initiated = false; ++ ++ svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; ++ if (kvm_set_msr(&svm->vcpu, &msr)) { ++ trace_kvm_msr_write_ex(ecx, data); ++ kvm_inject_gp(&svm->vcpu, 0); ++ return 1; ++ } else { ++ trace_kvm_msr_write(ecx, data); ++ return kvm_skip_emulated_instruction(&svm->vcpu); ++ } ++} ++ ++static int msr_interception(struct vcpu_svm *svm) ++{ ++ if (svm->vmcb->control.exit_info_1) ++ return wrmsr_interception(svm); ++ else ++ return rdmsr_interception(svm); ++} ++ ++static int interrupt_window_interception(struct vcpu_svm *svm) ++{ ++ kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); ++ svm_clear_vintr(svm); ++ svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; ++ mark_dirty(svm->vmcb, VMCB_INTR); ++ ++svm->vcpu.stat.irq_window_exits; ++ return 1; ++} ++ ++static int pause_interception(struct vcpu_svm *svm) ++{ ++ struct kvm_vcpu *vcpu = &svm->vcpu; ++ bool in_kernel = (svm_get_cpl(vcpu) == 0); ++ ++ if (pause_filter_thresh) ++ grow_ple_window(vcpu); ++ ++ kvm_vcpu_on_spin(vcpu, in_kernel); ++ return 1; ++} ++ ++static int nop_interception(struct vcpu_svm *svm) ++{ ++ return kvm_skip_emulated_instruction(&(svm->vcpu)); ++} ++ ++static int monitor_interception(struct vcpu_svm *svm) ++{ ++ printk_once(KERN_WARNING "kvm: MONITOR instruction emulated as NOP!\n"); ++ return nop_interception(svm); ++} ++ ++static int mwait_interception(struct vcpu_svm *svm) ++{ ++ printk_once(KERN_WARNING "kvm: MWAIT instruction emulated as NOP!\n"); ++ return nop_interception(svm); ++} ++ ++enum avic_ipi_failure_cause { ++ AVIC_IPI_FAILURE_INVALID_INT_TYPE, ++ AVIC_IPI_FAILURE_TARGET_NOT_RUNNING, ++ AVIC_IPI_FAILURE_INVALID_TARGET, ++ AVIC_IPI_FAILURE_INVALID_BACKING_PAGE, ++}; ++ ++static int avic_incomplete_ipi_interception(struct vcpu_svm *svm) ++{ ++ u32 icrh = svm->vmcb->control.exit_info_1 >> 32; ++ u32 icrl = svm->vmcb->control.exit_info_1; ++ u32 id = svm->vmcb->control.exit_info_2 >> 32; ++ u32 index = svm->vmcb->control.exit_info_2 & 0xFF; ++ struct kvm_lapic *apic = svm->vcpu.arch.apic; ++ ++ trace_kvm_avic_incomplete_ipi(svm->vcpu.vcpu_id, icrh, icrl, id, index); ++ ++ switch (id) { ++ case AVIC_IPI_FAILURE_INVALID_INT_TYPE: ++ /* ++ * AVIC hardware handles the generation of ++ * IPIs when the specified Message Type is Fixed ++ * (also known as fixed delivery mode) and ++ * the Trigger Mode is edge-triggered. The hardware ++ * also supports self and broadcast delivery modes ++ * specified via the Destination Shorthand(DSH) ++ * field of the ICRL. Logical and physical APIC ID ++ * formats are supported. All other IPI types cause ++ * a #VMEXIT, which needs to emulated. ++ */ ++ kvm_lapic_reg_write(apic, APIC_ICR2, icrh); ++ kvm_lapic_reg_write(apic, APIC_ICR, icrl); ++ break; ++ case AVIC_IPI_FAILURE_TARGET_NOT_RUNNING: { ++ int i; ++ struct kvm_vcpu *vcpu; ++ struct kvm *kvm = svm->vcpu.kvm; ++ struct kvm_lapic *apic = svm->vcpu.arch.apic; ++ ++ /* ++ * At this point, we expect that the AVIC HW has already ++ * set the appropriate IRR bits on the valid target ++ * vcpus. So, we just need to kick the appropriate vcpu. ++ */ ++ kvm_for_each_vcpu(i, vcpu, kvm) { ++ bool m = kvm_apic_match_dest(vcpu, apic, ++ icrl & KVM_APIC_SHORT_MASK, ++ GET_APIC_DEST_FIELD(icrh), ++ icrl & KVM_APIC_DEST_MASK); ++ ++ if (m && !avic_vcpu_is_running(vcpu)) ++ kvm_vcpu_wake_up(vcpu); ++ } ++ break; ++ } ++ case AVIC_IPI_FAILURE_INVALID_TARGET: ++ break; ++ case AVIC_IPI_FAILURE_INVALID_BACKING_PAGE: ++ WARN_ONCE(1, "Invalid backing page\n"); ++ break; ++ default: ++ pr_err("Unknown IPI interception\n"); ++ } ++ ++ return 1; ++} ++ ++static u32 *avic_get_logical_id_entry(struct kvm_vcpu *vcpu, u32 ldr, bool flat) ++{ ++ struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm); ++ int index; ++ u32 *logical_apic_id_table; ++ int dlid = GET_APIC_LOGICAL_ID(ldr); ++ ++ if (!dlid) ++ return NULL; ++ ++ if (flat) { /* flat */ ++ index = ffs(dlid) - 1; ++ if (index > 7) ++ return NULL; ++ } else { /* cluster */ ++ int cluster = (dlid & 0xf0) >> 4; ++ int apic = ffs(dlid & 0x0f) - 1; ++ ++ if ((apic < 0) || (apic > 7) || ++ (cluster >= 0xf)) ++ return NULL; ++ index = (cluster << 2) + apic; ++ } ++ ++ logical_apic_id_table = (u32 *) page_address(kvm_svm->avic_logical_id_table_page); ++ ++ return &logical_apic_id_table[index]; ++} ++ ++static int avic_ldr_write(struct kvm_vcpu *vcpu, u8 g_physical_id, u32 ldr, ++ bool valid) ++{ ++ bool flat; ++ u32 *entry, new_entry; ++ ++ flat = kvm_lapic_get_reg(vcpu->arch.apic, APIC_DFR) == APIC_DFR_FLAT; ++ entry = avic_get_logical_id_entry(vcpu, ldr, flat); ++ if (!entry) ++ return -EINVAL; ++ ++ new_entry = READ_ONCE(*entry); ++ new_entry &= ~AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK; ++ new_entry |= (g_physical_id & AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK); ++ if (valid) ++ new_entry |= AVIC_LOGICAL_ID_ENTRY_VALID_MASK; ++ else ++ new_entry &= ~AVIC_LOGICAL_ID_ENTRY_VALID_MASK; ++ WRITE_ONCE(*entry, new_entry); ++ ++ return 0; ++} ++ ++static int avic_handle_ldr_update(struct kvm_vcpu *vcpu) ++{ ++ int ret; ++ struct vcpu_svm *svm = to_svm(vcpu); ++ u32 ldr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LDR); ++ ++ if (!ldr) ++ return 1; ++ ++ ret = avic_ldr_write(vcpu, vcpu->vcpu_id, ldr, true); ++ if (ret && svm->ldr_reg) { ++ avic_ldr_write(vcpu, 0, svm->ldr_reg, false); ++ svm->ldr_reg = 0; ++ } else { ++ svm->ldr_reg = ldr; ++ } ++ return ret; ++} ++ ++static int avic_handle_apic_id_update(struct kvm_vcpu *vcpu) ++{ ++ u64 *old, *new; ++ struct vcpu_svm *svm = to_svm(vcpu); ++ u32 apic_id_reg = kvm_lapic_get_reg(vcpu->arch.apic, APIC_ID); ++ u32 id = (apic_id_reg >> 24) & 0xff; ++ ++ if (vcpu->vcpu_id == id) ++ return 0; ++ ++ old = avic_get_physical_id_entry(vcpu, vcpu->vcpu_id); ++ new = avic_get_physical_id_entry(vcpu, id); ++ if (!new || !old) ++ return 1; ++ ++ /* We need to move physical_id_entry to new offset */ ++ *new = *old; ++ *old = 0ULL; ++ to_svm(vcpu)->avic_physical_id_cache = new; ++ ++ /* ++ * Also update the guest physical APIC ID in the logical ++ * APIC ID table entry if already setup the LDR. ++ */ ++ if (svm->ldr_reg) ++ avic_handle_ldr_update(vcpu); ++ ++ return 0; ++} ++ ++static int avic_handle_dfr_update(struct kvm_vcpu *vcpu) ++{ ++ struct vcpu_svm *svm = to_svm(vcpu); ++ struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm); ++ u32 dfr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_DFR); ++ u32 mod = (dfr >> 28) & 0xf; ++ ++ /* ++ * We assume that all local APICs are using the same type. ++ * If this changes, we need to flush the AVIC logical ++ * APID id table. ++ */ ++ if (kvm_svm->ldr_mode == mod) ++ return 0; ++ ++ clear_page(page_address(kvm_svm->avic_logical_id_table_page)); ++ kvm_svm->ldr_mode = mod; ++ ++ if (svm->ldr_reg) ++ avic_handle_ldr_update(vcpu); ++ return 0; ++} ++ ++static int avic_unaccel_trap_write(struct vcpu_svm *svm) ++{ ++ struct kvm_lapic *apic = svm->vcpu.arch.apic; ++ u32 offset = svm->vmcb->control.exit_info_1 & ++ AVIC_UNACCEL_ACCESS_OFFSET_MASK; ++ ++ switch (offset) { ++ case APIC_ID: ++ if (avic_handle_apic_id_update(&svm->vcpu)) ++ return 0; ++ break; ++ case APIC_LDR: ++ if (avic_handle_ldr_update(&svm->vcpu)) ++ return 0; ++ break; ++ case APIC_DFR: ++ avic_handle_dfr_update(&svm->vcpu); ++ break; ++ default: ++ break; ++ } ++ ++ kvm_lapic_reg_write(apic, offset, kvm_lapic_get_reg(apic, offset)); ++ ++ return 1; ++} ++ ++static bool is_avic_unaccelerated_access_trap(u32 offset) ++{ ++ bool ret = false; ++ ++ switch (offset) { ++ case APIC_ID: ++ case APIC_EOI: ++ case APIC_RRR: ++ case APIC_LDR: ++ case APIC_DFR: ++ case APIC_SPIV: ++ case APIC_ESR: ++ case APIC_ICR: ++ case APIC_LVTT: ++ case APIC_LVTTHMR: ++ case APIC_LVTPC: ++ case APIC_LVT0: ++ case APIC_LVT1: ++ case APIC_LVTERR: ++ case APIC_TMICT: ++ case APIC_TDCR: ++ ret = true; ++ break; ++ default: ++ break; ++ } ++ return ret; ++} ++ ++static int avic_unaccelerated_access_interception(struct vcpu_svm *svm) ++{ ++ int ret = 0; ++ u32 offset = svm->vmcb->control.exit_info_1 & ++ AVIC_UNACCEL_ACCESS_OFFSET_MASK; ++ u32 vector = svm->vmcb->control.exit_info_2 & ++ AVIC_UNACCEL_ACCESS_VECTOR_MASK; ++ bool write = (svm->vmcb->control.exit_info_1 >> 32) & ++ AVIC_UNACCEL_ACCESS_WRITE_MASK; ++ bool trap = is_avic_unaccelerated_access_trap(offset); ++ ++ trace_kvm_avic_unaccelerated_access(svm->vcpu.vcpu_id, offset, ++ trap, write, vector); ++ if (trap) { ++ /* Handling Trap */ ++ WARN_ONCE(!write, "svm: Handling trap read.\n"); ++ ret = avic_unaccel_trap_write(svm); ++ } else { ++ /* Handling Fault */ ++ ret = (kvm_emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE); ++ } ++ ++ return ret; ++} ++ ++static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { ++ [SVM_EXIT_READ_CR0] = cr_interception, ++ [SVM_EXIT_READ_CR3] = cr_interception, ++ [SVM_EXIT_READ_CR4] = cr_interception, ++ [SVM_EXIT_READ_CR8] = cr_interception, ++ [SVM_EXIT_CR0_SEL_WRITE] = cr_interception, ++ [SVM_EXIT_WRITE_CR0] = cr_interception, ++ [SVM_EXIT_WRITE_CR3] = cr_interception, ++ [SVM_EXIT_WRITE_CR4] = cr_interception, ++ [SVM_EXIT_WRITE_CR8] = cr8_write_interception, ++ [SVM_EXIT_READ_DR0] = dr_interception, ++ [SVM_EXIT_READ_DR1] = dr_interception, ++ [SVM_EXIT_READ_DR2] = dr_interception, ++ [SVM_EXIT_READ_DR3] = dr_interception, ++ [SVM_EXIT_READ_DR4] = dr_interception, ++ [SVM_EXIT_READ_DR5] = dr_interception, ++ [SVM_EXIT_READ_DR6] = dr_interception, ++ [SVM_EXIT_READ_DR7] = dr_interception, ++ [SVM_EXIT_WRITE_DR0] = dr_interception, ++ [SVM_EXIT_WRITE_DR1] = dr_interception, ++ [SVM_EXIT_WRITE_DR2] = dr_interception, ++ [SVM_EXIT_WRITE_DR3] = dr_interception, ++ [SVM_EXIT_WRITE_DR4] = dr_interception, ++ [SVM_EXIT_WRITE_DR5] = dr_interception, ++ [SVM_EXIT_WRITE_DR6] = dr_interception, ++ [SVM_EXIT_WRITE_DR7] = dr_interception, ++ [SVM_EXIT_EXCP_BASE + DB_VECTOR] = db_interception, ++ [SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception, ++ [SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception, ++ [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, ++ [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception, ++ [SVM_EXIT_EXCP_BASE + AC_VECTOR] = ac_interception, ++ [SVM_EXIT_EXCP_BASE + GP_VECTOR] = gp_interception, ++ [SVM_EXIT_INTR] = intr_interception, ++ [SVM_EXIT_NMI] = nmi_interception, ++ [SVM_EXIT_SMI] = nop_on_interception, ++ [SVM_EXIT_INIT] = nop_on_interception, ++ [SVM_EXIT_VINTR] = interrupt_window_interception, ++ [SVM_EXIT_RDPMC] = rdpmc_interception, ++ [SVM_EXIT_CPUID] = cpuid_interception, ++ [SVM_EXIT_IRET] = iret_interception, ++ [SVM_EXIT_INVD] = emulate_on_interception, ++ [SVM_EXIT_PAUSE] = pause_interception, ++ [SVM_EXIT_HLT] = halt_interception, ++ [SVM_EXIT_INVLPG] = invlpg_interception, ++ [SVM_EXIT_INVLPGA] = invlpga_interception, ++ [SVM_EXIT_IOIO] = io_interception, ++ [SVM_EXIT_MSR] = msr_interception, ++ [SVM_EXIT_TASK_SWITCH] = task_switch_interception, ++ [SVM_EXIT_SHUTDOWN] = shutdown_interception, ++ [SVM_EXIT_VMRUN] = vmrun_interception, ++ [SVM_EXIT_VMMCALL] = vmmcall_interception, ++ [SVM_EXIT_VMLOAD] = vmload_interception, ++ [SVM_EXIT_VMSAVE] = vmsave_interception, ++ [SVM_EXIT_STGI] = stgi_interception, ++ [SVM_EXIT_CLGI] = clgi_interception, ++ [SVM_EXIT_SKINIT] = skinit_interception, ++ [SVM_EXIT_WBINVD] = wbinvd_interception, ++ [SVM_EXIT_MONITOR] = monitor_interception, ++ [SVM_EXIT_MWAIT] = mwait_interception, ++ [SVM_EXIT_XSETBV] = xsetbv_interception, ++ [SVM_EXIT_NPF] = npf_interception, ++ [SVM_EXIT_RSM] = rsm_interception, ++ [SVM_EXIT_AVIC_INCOMPLETE_IPI] = avic_incomplete_ipi_interception, ++ [SVM_EXIT_AVIC_UNACCELERATED_ACCESS] = avic_unaccelerated_access_interception, ++}; ++ ++static void dump_vmcb(struct kvm_vcpu *vcpu) ++{ ++ struct vcpu_svm *svm = to_svm(vcpu); ++ struct vmcb_control_area *control = &svm->vmcb->control; ++ struct vmcb_save_area *save = &svm->vmcb->save; ++ ++ pr_err("VMCB Control Area:\n"); ++ pr_err("%-20s%04x\n", "cr_read:", control->intercept_cr & 0xffff); ++ pr_err("%-20s%04x\n", "cr_write:", control->intercept_cr >> 16); ++ pr_err("%-20s%04x\n", "dr_read:", control->intercept_dr & 0xffff); ++ pr_err("%-20s%04x\n", "dr_write:", control->intercept_dr >> 16); ++ pr_err("%-20s%08x\n", "exceptions:", control->intercept_exceptions); ++ pr_err("%-20s%016llx\n", "intercepts:", control->intercept); ++ pr_err("%-20s%d\n", "pause filter count:", control->pause_filter_count); ++ pr_err("%-20s%d\n", "pause filter threshold:", ++ control->pause_filter_thresh); ++ pr_err("%-20s%016llx\n", "iopm_base_pa:", control->iopm_base_pa); ++ pr_err("%-20s%016llx\n", "msrpm_base_pa:", control->msrpm_base_pa); ++ pr_err("%-20s%016llx\n", "tsc_offset:", control->tsc_offset); ++ pr_err("%-20s%d\n", "asid:", control->asid); ++ pr_err("%-20s%d\n", "tlb_ctl:", control->tlb_ctl); ++ pr_err("%-20s%08x\n", "int_ctl:", control->int_ctl); ++ pr_err("%-20s%08x\n", "int_vector:", control->int_vector); ++ pr_err("%-20s%08x\n", "int_state:", control->int_state); ++ pr_err("%-20s%08x\n", "exit_code:", control->exit_code); ++ pr_err("%-20s%016llx\n", "exit_info1:", control->exit_info_1); ++ pr_err("%-20s%016llx\n", "exit_info2:", control->exit_info_2); ++ pr_err("%-20s%08x\n", "exit_int_info:", control->exit_int_info); ++ pr_err("%-20s%08x\n", "exit_int_info_err:", control->exit_int_info_err); ++ pr_err("%-20s%lld\n", "nested_ctl:", control->nested_ctl); ++ pr_err("%-20s%016llx\n", "nested_cr3:", control->nested_cr3); ++ pr_err("%-20s%016llx\n", "avic_vapic_bar:", control->avic_vapic_bar); ++ pr_err("%-20s%08x\n", "event_inj:", control->event_inj); ++ pr_err("%-20s%08x\n", "event_inj_err:", control->event_inj_err); ++ pr_err("%-20s%lld\n", "virt_ext:", control->virt_ext); ++ pr_err("%-20s%016llx\n", "next_rip:", control->next_rip); ++ pr_err("%-20s%016llx\n", "avic_backing_page:", control->avic_backing_page); ++ pr_err("%-20s%016llx\n", "avic_logical_id:", control->avic_logical_id); ++ pr_err("%-20s%016llx\n", "avic_physical_id:", control->avic_physical_id); ++ pr_err("VMCB State Save Area:\n"); ++ pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", ++ "es:", ++ save->es.selector, save->es.attrib, ++ save->es.limit, save->es.base); ++ pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", ++ "cs:", ++ save->cs.selector, save->cs.attrib, ++ save->cs.limit, save->cs.base); ++ pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", ++ "ss:", ++ save->ss.selector, save->ss.attrib, ++ save->ss.limit, save->ss.base); ++ pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", ++ "ds:", ++ save->ds.selector, save->ds.attrib, ++ save->ds.limit, save->ds.base); ++ pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", ++ "fs:", ++ save->fs.selector, save->fs.attrib, ++ save->fs.limit, save->fs.base); ++ pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", ++ "gs:", ++ save->gs.selector, save->gs.attrib, ++ save->gs.limit, save->gs.base); ++ pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", ++ "gdtr:", ++ save->gdtr.selector, save->gdtr.attrib, ++ save->gdtr.limit, save->gdtr.base); ++ pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", ++ "ldtr:", ++ save->ldtr.selector, save->ldtr.attrib, ++ save->ldtr.limit, save->ldtr.base); ++ pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", ++ "idtr:", ++ save->idtr.selector, save->idtr.attrib, ++ save->idtr.limit, save->idtr.base); ++ pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", ++ "tr:", ++ save->tr.selector, save->tr.attrib, ++ save->tr.limit, save->tr.base); ++ pr_err("cpl: %d efer: %016llx\n", ++ save->cpl, save->efer); ++ pr_err("%-15s %016llx %-13s %016llx\n", ++ "cr0:", save->cr0, "cr2:", save->cr2); ++ pr_err("%-15s %016llx %-13s %016llx\n", ++ "cr3:", save->cr3, "cr4:", save->cr4); ++ pr_err("%-15s %016llx %-13s %016llx\n", ++ "dr6:", save->dr6, "dr7:", save->dr7); ++ pr_err("%-15s %016llx %-13s %016llx\n", ++ "rip:", save->rip, "rflags:", save->rflags); ++ pr_err("%-15s %016llx %-13s %016llx\n", ++ "rsp:", save->rsp, "rax:", save->rax); ++ pr_err("%-15s %016llx %-13s %016llx\n", ++ "star:", save->star, "lstar:", save->lstar); ++ pr_err("%-15s %016llx %-13s %016llx\n", ++ "cstar:", save->cstar, "sfmask:", save->sfmask); ++ pr_err("%-15s %016llx %-13s %016llx\n", ++ "kernel_gs_base:", save->kernel_gs_base, ++ "sysenter_cs:", save->sysenter_cs); ++ pr_err("%-15s %016llx %-13s %016llx\n", ++ "sysenter_esp:", save->sysenter_esp, ++ "sysenter_eip:", save->sysenter_eip); ++ pr_err("%-15s %016llx %-13s %016llx\n", ++ "gpat:", save->g_pat, "dbgctl:", save->dbgctl); ++ pr_err("%-15s %016llx %-13s %016llx\n", ++ "br_from:", save->br_from, "br_to:", save->br_to); ++ pr_err("%-15s %016llx %-13s %016llx\n", ++ "excp_from:", save->last_excp_from, ++ "excp_to:", save->last_excp_to); ++} ++ ++static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) ++{ ++ struct vmcb_control_area *control = &to_svm(vcpu)->vmcb->control; ++ ++ *info1 = control->exit_info_1; ++ *info2 = control->exit_info_2; ++} ++ ++static int handle_exit(struct kvm_vcpu *vcpu) ++{ ++ struct vcpu_svm *svm = to_svm(vcpu); ++ struct kvm_run *kvm_run = vcpu->run; ++ u32 exit_code = svm->vmcb->control.exit_code; ++ ++ trace_kvm_exit(exit_code, vcpu, KVM_ISA_SVM); ++ ++ if (!is_cr_intercept(svm, INTERCEPT_CR0_WRITE)) ++ vcpu->arch.cr0 = svm->vmcb->save.cr0; ++ if (npt_enabled) ++ vcpu->arch.cr3 = svm->vmcb->save.cr3; ++ ++ if (unlikely(svm->nested.exit_required)) { ++ nested_svm_vmexit(svm); ++ svm->nested.exit_required = false; ++ ++ return 1; ++ } ++ ++ if (is_guest_mode(vcpu)) { ++ int vmexit; ++ ++ trace_kvm_nested_vmexit(svm->vmcb->save.rip, exit_code, ++ svm->vmcb->control.exit_info_1, ++ svm->vmcb->control.exit_info_2, ++ svm->vmcb->control.exit_int_info, ++ svm->vmcb->control.exit_int_info_err, ++ KVM_ISA_SVM); ++ ++ vmexit = nested_svm_exit_special(svm); ++ ++ if (vmexit == NESTED_EXIT_CONTINUE) ++ vmexit = nested_svm_exit_handled(svm); ++ ++ if (vmexit == NESTED_EXIT_DONE) ++ return 1; ++ } ++ ++ svm_complete_interrupts(svm); ++ ++ if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) { ++ kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; ++ kvm_run->fail_entry.hardware_entry_failure_reason ++ = svm->vmcb->control.exit_code; ++ pr_err("KVM: FAILED VMRUN WITH VMCB:\n"); ++ dump_vmcb(vcpu); ++ return 0; ++ } ++ ++ if (is_external_interrupt(svm->vmcb->control.exit_int_info) && ++ exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR && ++ exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH && ++ exit_code != SVM_EXIT_INTR && exit_code != SVM_EXIT_NMI) ++ printk(KERN_ERR "%s: unexpected exit_int_info 0x%x " ++ "exit_code 0x%x\n", ++ __func__, svm->vmcb->control.exit_int_info, ++ exit_code); ++ ++ if (exit_code >= ARRAY_SIZE(svm_exit_handlers) ++ || !svm_exit_handlers[exit_code]) { ++ WARN_ONCE(1, "svm: unexpected exit reason 0x%x\n", exit_code); ++ kvm_queue_exception(vcpu, UD_VECTOR); ++ return 1; ++ } ++ ++ return svm_exit_handlers[exit_code](svm); ++} ++ ++static void reload_tss(struct kvm_vcpu *vcpu) ++{ ++ int cpu = raw_smp_processor_id(); ++ ++ struct svm_cpu_data *sd = per_cpu(svm_data, cpu); ++ sd->tss_desc->type = 9; /* available 32/64-bit TSS */ ++ load_TR_desc(); ++} ++ ++static void pre_sev_run(struct vcpu_svm *svm, int cpu) ++{ ++ struct svm_cpu_data *sd = per_cpu(svm_data, cpu); ++ int asid = sev_get_asid(svm->vcpu.kvm); ++ ++ /* Assign the asid allocated with this SEV guest */ ++ svm->vmcb->control.asid = asid; ++ ++ /* ++ * Flush guest TLB: ++ * ++ * 1) when different VMCB for the same ASID is to be run on the same host CPU. ++ * 2) or this VMCB was executed on different host CPU in previous VMRUNs. ++ */ ++ if (sd->sev_vmcbs[asid] == svm->vmcb && ++ svm->last_cpu == cpu) ++ return; ++ ++ svm->last_cpu = cpu; ++ sd->sev_vmcbs[asid] = svm->vmcb; ++ svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID; ++ mark_dirty(svm->vmcb, VMCB_ASID); ++} ++ ++static void pre_svm_run(struct vcpu_svm *svm) ++{ ++ int cpu = raw_smp_processor_id(); ++ ++ struct svm_cpu_data *sd = per_cpu(svm_data, cpu); ++ ++ if (sev_guest(svm->vcpu.kvm)) ++ return pre_sev_run(svm, cpu); ++ ++ /* FIXME: handle wraparound of asid_generation */ ++ if (svm->asid_generation != sd->asid_generation) ++ new_asid(svm, sd); ++} ++ ++static void svm_inject_nmi(struct kvm_vcpu *vcpu) ++{ ++ struct vcpu_svm *svm = to_svm(vcpu); ++ ++ svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI; ++ vcpu->arch.hflags |= HF_NMI_MASK; ++ set_intercept(svm, INTERCEPT_IRET); ++ ++vcpu->stat.nmi_injections; ++} ++ ++static inline void svm_inject_irq(struct vcpu_svm *svm, int irq) ++{ ++ struct vmcb_control_area *control; ++ ++ /* The following fields are ignored when AVIC is enabled */ ++ control = &svm->vmcb->control; ++ control->int_vector = irq; ++ control->int_ctl &= ~V_INTR_PRIO_MASK; ++ control->int_ctl |= V_IRQ_MASK | ++ ((/*control->int_vector >> 4*/ 0xf) << V_INTR_PRIO_SHIFT); ++ mark_dirty(svm->vmcb, VMCB_INTR); ++} ++ ++static void svm_set_irq(struct kvm_vcpu *vcpu) ++{ ++ struct vcpu_svm *svm = to_svm(vcpu); ++ ++ BUG_ON(!(gif_set(svm))); ++ ++ trace_kvm_inj_virq(vcpu->arch.interrupt.nr); ++ ++vcpu->stat.irq_injections; ++ ++ svm->vmcb->control.event_inj = vcpu->arch.interrupt.nr | ++ SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR; ++} ++ ++static inline bool svm_nested_virtualize_tpr(struct kvm_vcpu *vcpu) ++{ ++ return is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK); ++} ++ ++static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) ++{ ++ struct vcpu_svm *svm = to_svm(vcpu); ++ ++ if (svm_nested_virtualize_tpr(vcpu) || ++ kvm_vcpu_apicv_active(vcpu)) ++ return; ++ ++ clr_cr_intercept(svm, INTERCEPT_CR8_WRITE); ++ ++ if (irr == -1) ++ return; ++ ++ if (tpr >= irr) ++ set_cr_intercept(svm, INTERCEPT_CR8_WRITE); ++} ++ ++static void svm_set_virtual_apic_mode(struct kvm_vcpu *vcpu) ++{ ++ return; ++} ++ ++static bool svm_get_enable_apicv(struct kvm_vcpu *vcpu) ++{ ++ return avic && irqchip_split(vcpu->kvm); ++} ++ ++static void svm_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr) ++{ ++} ++ ++static void svm_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr) ++{ ++} ++ ++/* Note: Currently only used by Hyper-V. */ ++static void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) ++{ ++ struct vcpu_svm *svm = to_svm(vcpu); ++ struct vmcb *vmcb = svm->vmcb; ++ ++ if (!kvm_vcpu_apicv_active(&svm->vcpu)) ++ return; ++ ++ vmcb->control.int_ctl &= ~AVIC_ENABLE_MASK; ++ mark_dirty(vmcb, VMCB_INTR); ++} ++ ++static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) ++{ ++ return; ++} ++ ++static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec) ++{ ++ kvm_lapic_set_irr(vec, vcpu->arch.apic); ++ smp_mb__after_atomic(); ++ ++ if (avic_vcpu_is_running(vcpu)) ++ wrmsrl(SVM_AVIC_DOORBELL, ++ kvm_cpu_get_apicid(vcpu->cpu)); ++ else ++ kvm_vcpu_wake_up(vcpu); ++} ++ ++static bool svm_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu) ++{ ++ return false; ++} ++ ++static void svm_ir_list_del(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi) ++{ ++ unsigned long flags; ++ struct amd_svm_iommu_ir *cur; ++ ++ spin_lock_irqsave(&svm->ir_list_lock, flags); ++ list_for_each_entry(cur, &svm->ir_list, node) { ++ if (cur->data != pi->ir_data) ++ continue; ++ list_del(&cur->node); ++ kfree(cur); ++ break; ++ } ++ spin_unlock_irqrestore(&svm->ir_list_lock, flags); ++} ++ ++static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi) ++{ ++ int ret = 0; ++ unsigned long flags; ++ struct amd_svm_iommu_ir *ir; ++ ++ /** ++ * In some cases, the existing irte is updaed and re-set, ++ * so we need to check here if it's already been * added ++ * to the ir_list. ++ */ ++ if (pi->ir_data && (pi->prev_ga_tag != 0)) { ++ struct kvm *kvm = svm->vcpu.kvm; ++ u32 vcpu_id = AVIC_GATAG_TO_VCPUID(pi->prev_ga_tag); ++ struct kvm_vcpu *prev_vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id); ++ struct vcpu_svm *prev_svm; ++ ++ if (!prev_vcpu) { ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ prev_svm = to_svm(prev_vcpu); ++ svm_ir_list_del(prev_svm, pi); ++ } ++ ++ /** ++ * Allocating new amd_iommu_pi_data, which will get ++ * add to the per-vcpu ir_list. ++ */ ++ ir = kzalloc(sizeof(struct amd_svm_iommu_ir), GFP_KERNEL); ++ if (!ir) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ ir->data = pi->ir_data; ++ ++ spin_lock_irqsave(&svm->ir_list_lock, flags); ++ list_add(&ir->node, &svm->ir_list); ++ spin_unlock_irqrestore(&svm->ir_list_lock, flags); ++out: ++ return ret; ++} ++ ++/** ++ * Note: ++ * The HW cannot support posting multicast/broadcast ++ * interrupts to a vCPU. So, we still use legacy interrupt ++ * remapping for these kind of interrupts. ++ * ++ * For lowest-priority interrupts, we only support ++ * those with single CPU as the destination, e.g. user ++ * configures the interrupts via /proc/irq or uses ++ * irqbalance to make the interrupts single-CPU. ++ */ ++static int ++get_pi_vcpu_info(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, ++ struct vcpu_data *vcpu_info, struct vcpu_svm **svm) ++{ ++ struct kvm_lapic_irq irq; ++ struct kvm_vcpu *vcpu = NULL; ++ ++ kvm_set_msi_irq(kvm, e, &irq); ++ ++ if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) { ++ pr_debug("SVM: %s: use legacy intr remap mode for irq %u\n", ++ __func__, irq.vector); ++ return -1; ++ } ++ ++ pr_debug("SVM: %s: use GA mode for irq %u\n", __func__, ++ irq.vector); ++ *svm = to_svm(vcpu); ++ vcpu_info->pi_desc_addr = __sme_set(page_to_phys((*svm)->avic_backing_page)); ++ vcpu_info->vector = irq.vector; ++ ++ return 0; ++} ++ ++/* ++ * svm_update_pi_irte - set IRTE for Posted-Interrupts ++ * ++ * @kvm: kvm ++ * @host_irq: host irq of the interrupt ++ * @guest_irq: gsi of the interrupt ++ * @set: set or unset PI ++ * returns 0 on success, < 0 on failure ++ */ ++static int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq, ++ uint32_t guest_irq, bool set) ++{ ++ struct kvm_kernel_irq_routing_entry *e; ++ struct kvm_irq_routing_table *irq_rt; ++ int idx, ret = -EINVAL; ++ ++ if (!kvm_arch_has_assigned_device(kvm) || ++ !irq_remapping_cap(IRQ_POSTING_CAP)) ++ return 0; ++ ++ pr_debug("SVM: %s: host_irq=%#x, guest_irq=%#x, set=%#x\n", ++ __func__, host_irq, guest_irq, set); ++ ++ idx = srcu_read_lock(&kvm->irq_srcu); ++ irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); ++ WARN_ON(guest_irq >= irq_rt->nr_rt_entries); ++ ++ hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) { ++ struct vcpu_data vcpu_info; ++ struct vcpu_svm *svm = NULL; ++ ++ if (e->type != KVM_IRQ_ROUTING_MSI) ++ continue; ++ ++ /** ++ * Here, we setup with legacy mode in the following cases: ++ * 1. When cannot target interrupt to a specific vcpu. ++ * 2. Unsetting posted interrupt. ++ * 3. APIC virtialization is disabled for the vcpu. ++ */ ++ if (!get_pi_vcpu_info(kvm, e, &vcpu_info, &svm) && set && ++ kvm_vcpu_apicv_active(&svm->vcpu)) { ++ struct amd_iommu_pi_data pi; ++ ++ /* Try to enable guest_mode in IRTE */ ++ pi.base = __sme_set(page_to_phys(svm->avic_backing_page) & ++ AVIC_HPA_MASK); ++ pi.ga_tag = AVIC_GATAG(to_kvm_svm(kvm)->avic_vm_id, ++ svm->vcpu.vcpu_id); ++ pi.is_guest_mode = true; ++ pi.vcpu_data = &vcpu_info; ++ ret = irq_set_vcpu_affinity(host_irq, &pi); ++ ++ /** ++ * Here, we successfully setting up vcpu affinity in ++ * IOMMU guest mode. Now, we need to store the posted ++ * interrupt information in a per-vcpu ir_list so that ++ * we can reference to them directly when we update vcpu ++ * scheduling information in IOMMU irte. ++ */ ++ if (!ret && pi.is_guest_mode) ++ svm_ir_list_add(svm, &pi); ++ } else { ++ /* Use legacy mode in IRTE */ ++ struct amd_iommu_pi_data pi; ++ ++ /** ++ * Here, pi is used to: ++ * - Tell IOMMU to use legacy mode for this interrupt. ++ * - Retrieve ga_tag of prior interrupt remapping data. ++ */ ++ pi.is_guest_mode = false; ++ ret = irq_set_vcpu_affinity(host_irq, &pi); ++ ++ /** ++ * Check if the posted interrupt was previously ++ * setup with the guest_mode by checking if the ga_tag ++ * was cached. If so, we need to clean up the per-vcpu ++ * ir_list. ++ */ ++ if (!ret && pi.prev_ga_tag) { ++ int id = AVIC_GATAG_TO_VCPUID(pi.prev_ga_tag); ++ struct kvm_vcpu *vcpu; ++ ++ vcpu = kvm_get_vcpu_by_id(kvm, id); ++ if (vcpu) ++ svm_ir_list_del(to_svm(vcpu), &pi); ++ } ++ } ++ ++ if (!ret && svm) { ++ trace_kvm_pi_irte_update(host_irq, svm->vcpu.vcpu_id, ++ e->gsi, vcpu_info.vector, ++ vcpu_info.pi_desc_addr, set); ++ } ++ ++ if (ret < 0) { ++ pr_err("%s: failed to update PI IRTE\n", __func__); ++ goto out; ++ } ++ } ++ ++ ret = 0; ++out: ++ srcu_read_unlock(&kvm->irq_srcu, idx); ++ return ret; ++} ++ ++static int svm_nmi_allowed(struct kvm_vcpu *vcpu) ++{ ++ struct vcpu_svm *svm = to_svm(vcpu); ++ struct vmcb *vmcb = svm->vmcb; ++ int ret; ++ ret = !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) && ++ !(svm->vcpu.arch.hflags & HF_NMI_MASK); ++ ret = ret && gif_set(svm) && nested_svm_nmi(svm); ++ ++ return ret; ++} ++ ++static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu) ++{ ++ struct vcpu_svm *svm = to_svm(vcpu); ++ ++ return !!(svm->vcpu.arch.hflags & HF_NMI_MASK); ++} ++ ++static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) ++{ ++ struct vcpu_svm *svm = to_svm(vcpu); ++ ++ if (masked) { ++ svm->vcpu.arch.hflags |= HF_NMI_MASK; ++ set_intercept(svm, INTERCEPT_IRET); ++ } else { ++ svm->vcpu.arch.hflags &= ~HF_NMI_MASK; ++ clr_intercept(svm, INTERCEPT_IRET); ++ } ++} ++ ++static int svm_interrupt_allowed(struct kvm_vcpu *vcpu) ++{ ++ struct vcpu_svm *svm = to_svm(vcpu); ++ struct vmcb *vmcb = svm->vmcb; ++ int ret; ++ ++ if (!gif_set(svm) || ++ (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)) ++ return 0; ++ ++ ret = !!(kvm_get_rflags(vcpu) & X86_EFLAGS_IF); ++ ++ if (is_guest_mode(vcpu)) ++ return ret && !(svm->vcpu.arch.hflags & HF_VINTR_MASK); ++ ++ return ret; ++} ++ ++static void enable_irq_window(struct kvm_vcpu *vcpu) ++{ ++ struct vcpu_svm *svm = to_svm(vcpu); ++ ++ if (kvm_vcpu_apicv_active(vcpu)) ++ return; ++ ++ /* ++ * In case GIF=0 we can't rely on the CPU to tell us when GIF becomes ++ * 1, because that's a separate STGI/VMRUN intercept. The next time we ++ * get that intercept, this function will be called again though and ++ * we'll get the vintr intercept. However, if the vGIF feature is ++ * enabled, the STGI interception will not occur. Enable the irq ++ * window under the assumption that the hardware will set the GIF. ++ */ ++ if ((vgif_enabled(svm) || gif_set(svm)) && nested_svm_intr(svm)) { ++ svm_set_vintr(svm); ++ svm_inject_irq(svm, 0x0); ++ } ++} ++ ++static void enable_nmi_window(struct kvm_vcpu *vcpu) ++{ ++ struct vcpu_svm *svm = to_svm(vcpu); ++ ++ if ((svm->vcpu.arch.hflags & (HF_NMI_MASK | HF_IRET_MASK)) ++ == HF_NMI_MASK) ++ return; /* IRET will cause a vm exit */ ++ ++ if (!gif_set(svm)) { ++ if (vgif_enabled(svm)) ++ set_intercept(svm, INTERCEPT_STGI); ++ return; /* STGI will cause a vm exit */ ++ } ++ ++ if (svm->nested.exit_required) ++ return; /* we're not going to run the guest yet */ ++ ++ /* ++ * Something prevents NMI from been injected. Single step over possible ++ * problem (IRET or exception injection or interrupt shadow) ++ */ ++ svm->nmi_singlestep_guest_rflags = svm_get_rflags(vcpu); ++ svm->nmi_singlestep = true; ++ svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); ++} ++ ++static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr) ++{ ++ return 0; ++} ++ ++static int svm_set_identity_map_addr(struct kvm *kvm, u64 ident_addr) ++{ ++ return 0; ++} ++ ++static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa) ++{ ++ struct vcpu_svm *svm = to_svm(vcpu); ++ ++ if (static_cpu_has(X86_FEATURE_FLUSHBYASID)) ++ svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID; ++ else ++ svm->asid_generation--; ++} ++ ++static void svm_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t gva) ++{ ++ struct vcpu_svm *svm = to_svm(vcpu); ++ ++ invlpga(gva, svm->vmcb->control.asid); ++} ++ ++static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu) ++{ ++} ++ ++static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu) ++{ ++ struct vcpu_svm *svm = to_svm(vcpu); ++ ++ if (svm_nested_virtualize_tpr(vcpu)) ++ return; ++ ++ if (!is_cr_intercept(svm, INTERCEPT_CR8_WRITE)) { ++ int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK; ++ kvm_set_cr8(vcpu, cr8); ++ } ++} ++ ++static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu) ++{ ++ struct vcpu_svm *svm = to_svm(vcpu); ++ u64 cr8; ++ ++ if (svm_nested_virtualize_tpr(vcpu) || ++ kvm_vcpu_apicv_active(vcpu)) ++ return; ++ ++ cr8 = kvm_get_cr8(vcpu); ++ svm->vmcb->control.int_ctl &= ~V_TPR_MASK; ++ svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK; ++} ++ ++static void svm_complete_interrupts(struct vcpu_svm *svm) ++{ ++ u8 vector; ++ int type; ++ u32 exitintinfo = svm->vmcb->control.exit_int_info; ++ unsigned int3_injected = svm->int3_injected; ++ ++ svm->int3_injected = 0; ++ ++ /* ++ * If we've made progress since setting HF_IRET_MASK, we've ++ * executed an IRET and can allow NMI injection. ++ */ ++ if ((svm->vcpu.arch.hflags & HF_IRET_MASK) ++ && kvm_rip_read(&svm->vcpu) != svm->nmi_iret_rip) { ++ svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK); ++ kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); ++ } ++ ++ svm->vcpu.arch.nmi_injected = false; ++ kvm_clear_exception_queue(&svm->vcpu); ++ kvm_clear_interrupt_queue(&svm->vcpu); ++ ++ if (!(exitintinfo & SVM_EXITINTINFO_VALID)) ++ return; ++ ++ kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); ++ ++ vector = exitintinfo & SVM_EXITINTINFO_VEC_MASK; ++ type = exitintinfo & SVM_EXITINTINFO_TYPE_MASK; ++ ++ switch (type) { ++ case SVM_EXITINTINFO_TYPE_NMI: ++ svm->vcpu.arch.nmi_injected = true; ++ break; ++ case SVM_EXITINTINFO_TYPE_EXEPT: ++ /* ++ * In case of software exceptions, do not reinject the vector, ++ * but re-execute the instruction instead. Rewind RIP first ++ * if we emulated INT3 before. ++ */ ++ if (kvm_exception_is_soft(vector)) { ++ if (vector == BP_VECTOR && int3_injected && ++ kvm_is_linear_rip(&svm->vcpu, svm->int3_rip)) ++ kvm_rip_write(&svm->vcpu, ++ kvm_rip_read(&svm->vcpu) - ++ int3_injected); ++ break; ++ } ++ if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) { ++ u32 err = svm->vmcb->control.exit_int_info_err; ++ kvm_requeue_exception_e(&svm->vcpu, vector, err); ++ ++ } else ++ kvm_requeue_exception(&svm->vcpu, vector); ++ break; ++ case SVM_EXITINTINFO_TYPE_INTR: ++ kvm_queue_interrupt(&svm->vcpu, vector, false); ++ break; ++ default: ++ break; ++ } ++} ++ ++static void svm_cancel_injection(struct kvm_vcpu *vcpu) ++{ ++ struct vcpu_svm *svm = to_svm(vcpu); ++ struct vmcb_control_area *control = &svm->vmcb->control; ++ ++ control->exit_int_info = control->event_inj; ++ control->exit_int_info_err = control->event_inj_err; ++ control->event_inj = 0; ++ svm_complete_interrupts(svm); ++} ++ ++static void svm_vcpu_run(struct kvm_vcpu *vcpu) ++{ ++ struct vcpu_svm *svm = to_svm(vcpu); ++ ++ svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX]; ++ svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP]; ++ svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP]; ++ ++ /* ++ * A vmexit emulation is required before the vcpu can be executed ++ * again. ++ */ ++ if (unlikely(svm->nested.exit_required)) ++ return; ++ ++ /* ++ * Disable singlestep if we're injecting an interrupt/exception. ++ * We don't want our modified rflags to be pushed on the stack where ++ * we might not be able to easily reset them if we disabled NMI ++ * singlestep later. ++ */ ++ if (svm->nmi_singlestep && svm->vmcb->control.event_inj) { ++ /* ++ * Event injection happens before external interrupts cause a ++ * vmexit and interrupts are disabled here, so smp_send_reschedule ++ * is enough to force an immediate vmexit. ++ */ ++ disable_nmi_singlestep(svm); ++ smp_send_reschedule(vcpu->cpu); ++ } ++ ++ pre_svm_run(svm); ++ ++ sync_lapic_to_cr8(vcpu); ++ ++ svm->vmcb->save.cr2 = vcpu->arch.cr2; ++ ++ clgi(); ++ kvm_load_guest_xcr0(vcpu); ++ ++ /* ++ * If this vCPU has touched SPEC_CTRL, restore the guest's value if ++ * it's non-zero. Since vmentry is serialising on affected CPUs, there ++ * is no need to worry about the conditional branch over the wrmsr ++ * being speculatively taken. ++ */ ++ x86_spec_ctrl_set_guest(svm->spec_ctrl, svm->virt_spec_ctrl); ++ ++ local_irq_enable(); ++ ++ asm volatile ( ++ "push %%" _ASM_BP "; \n\t" ++ "mov %c[rbx](%[svm]), %%" _ASM_BX " \n\t" ++ "mov %c[rcx](%[svm]), %%" _ASM_CX " \n\t" ++ "mov %c[rdx](%[svm]), %%" _ASM_DX " \n\t" ++ "mov %c[rsi](%[svm]), %%" _ASM_SI " \n\t" ++ "mov %c[rdi](%[svm]), %%" _ASM_DI " \n\t" ++ "mov %c[rbp](%[svm]), %%" _ASM_BP " \n\t" ++#ifdef CONFIG_X86_64 ++ "mov %c[r8](%[svm]), %%r8 \n\t" ++ "mov %c[r9](%[svm]), %%r9 \n\t" ++ "mov %c[r10](%[svm]), %%r10 \n\t" ++ "mov %c[r11](%[svm]), %%r11 \n\t" ++ "mov %c[r12](%[svm]), %%r12 \n\t" ++ "mov %c[r13](%[svm]), %%r13 \n\t" ++ "mov %c[r14](%[svm]), %%r14 \n\t" ++ "mov %c[r15](%[svm]), %%r15 \n\t" ++#endif ++ ++ /* Enter guest mode */ ++ "push %%" _ASM_AX " \n\t" ++ "mov %c[vmcb](%[svm]), %%" _ASM_AX " \n\t" ++ __ex(SVM_VMLOAD) "\n\t" ++ __ex(SVM_VMRUN) "\n\t" ++ __ex(SVM_VMSAVE) "\n\t" ++ "pop %%" _ASM_AX " \n\t" ++ ++ /* Save guest registers, load host registers */ ++ "mov %%" _ASM_BX ", %c[rbx](%[svm]) \n\t" ++ "mov %%" _ASM_CX ", %c[rcx](%[svm]) \n\t" ++ "mov %%" _ASM_DX ", %c[rdx](%[svm]) \n\t" ++ "mov %%" _ASM_SI ", %c[rsi](%[svm]) \n\t" ++ "mov %%" _ASM_DI ", %c[rdi](%[svm]) \n\t" ++ "mov %%" _ASM_BP ", %c[rbp](%[svm]) \n\t" ++#ifdef CONFIG_X86_64 ++ "mov %%r8, %c[r8](%[svm]) \n\t" ++ "mov %%r9, %c[r9](%[svm]) \n\t" ++ "mov %%r10, %c[r10](%[svm]) \n\t" ++ "mov %%r11, %c[r11](%[svm]) \n\t" ++ "mov %%r12, %c[r12](%[svm]) \n\t" ++ "mov %%r13, %c[r13](%[svm]) \n\t" ++ "mov %%r14, %c[r14](%[svm]) \n\t" ++ "mov %%r15, %c[r15](%[svm]) \n\t" ++#endif ++ /* ++ * Clear host registers marked as clobbered to prevent ++ * speculative use. ++ */ ++ "xor %%" _ASM_BX ", %%" _ASM_BX " \n\t" ++ "xor %%" _ASM_CX ", %%" _ASM_CX " \n\t" ++ "xor %%" _ASM_DX ", %%" _ASM_DX " \n\t" ++ "xor %%" _ASM_SI ", %%" _ASM_SI " \n\t" ++ "xor %%" _ASM_DI ", %%" _ASM_DI " \n\t" ++#ifdef CONFIG_X86_64 ++ "xor %%r8, %%r8 \n\t" ++ "xor %%r9, %%r9 \n\t" ++ "xor %%r10, %%r10 \n\t" ++ "xor %%r11, %%r11 \n\t" ++ "xor %%r12, %%r12 \n\t" ++ "xor %%r13, %%r13 \n\t" ++ "xor %%r14, %%r14 \n\t" ++ "xor %%r15, %%r15 \n\t" ++#endif ++ "pop %%" _ASM_BP ++ : ++ : [svm]"a"(svm), ++ [vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)), ++ [rbx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RBX])), ++ [rcx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RCX])), ++ [rdx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RDX])), ++ [rsi]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RSI])), ++ [rdi]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RDI])), ++ [rbp]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RBP])) ++#ifdef CONFIG_X86_64 ++ , [r8]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R8])), ++ [r9]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R9])), ++ [r10]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R10])), ++ [r11]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R11])), ++ [r12]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R12])), ++ [r13]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R13])), ++ [r14]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R14])), ++ [r15]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R15])) ++#endif ++ : "cc", "memory" ++#ifdef CONFIG_X86_64 ++ , "rbx", "rcx", "rdx", "rsi", "rdi" ++ , "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15" ++#else ++ , "ebx", "ecx", "edx", "esi", "edi" ++#endif ++ ); ++ ++ /* Eliminate branch target predictions from guest mode */ ++ vmexit_fill_RSB(); ++ ++#ifdef CONFIG_X86_64 ++ wrmsrl(MSR_GS_BASE, svm->host.gs_base); ++#else ++ loadsegment(fs, svm->host.fs); ++#ifndef CONFIG_X86_32_LAZY_GS ++ loadsegment(gs, svm->host.gs); ++#endif ++#endif ++ ++ /* ++ * We do not use IBRS in the kernel. If this vCPU has used the ++ * SPEC_CTRL MSR it may have left it on; save the value and ++ * turn it off. This is much more efficient than blindly adding ++ * it to the atomic save/restore list. Especially as the former ++ * (Saving guest MSRs on vmexit) doesn't even exist in KVM. ++ * ++ * For non-nested case: ++ * If the L01 MSR bitmap does not intercept the MSR, then we need to ++ * save it. ++ * ++ * For nested case: ++ * If the L02 MSR bitmap does not intercept the MSR, then we need to ++ * save it. ++ */ ++ if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) ++ svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); ++ ++ reload_tss(vcpu); ++ ++ local_irq_disable(); ++ ++ x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl); ++ ++ vcpu->arch.cr2 = svm->vmcb->save.cr2; ++ vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax; ++ vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp; ++ vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip; ++ ++ if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI)) ++ kvm_before_interrupt(&svm->vcpu); ++ ++ kvm_put_guest_xcr0(vcpu); ++ stgi(); ++ ++ /* Any pending NMI will happen here */ ++ ++ if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI)) ++ kvm_after_interrupt(&svm->vcpu); ++ ++ sync_cr8_to_lapic(vcpu); ++ ++ svm->next_rip = 0; ++ ++ svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING; ++ ++ /* if exit due to PF check for async PF */ ++ if (svm->vmcb->control.exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) ++ svm->vcpu.arch.apf.host_apf_reason = kvm_read_and_reset_pf_reason(); ++ ++ if (npt_enabled) { ++ vcpu->arch.regs_avail &= ~(1 << VCPU_EXREG_PDPTR); ++ vcpu->arch.regs_dirty &= ~(1 << VCPU_EXREG_PDPTR); ++ } ++ ++ /* ++ * We need to handle MC intercepts here before the vcpu has a chance to ++ * change the physical cpu ++ */ ++ if (unlikely(svm->vmcb->control.exit_code == ++ SVM_EXIT_EXCP_BASE + MC_VECTOR)) ++ svm_handle_mce(svm); ++ ++ mark_all_clean(svm->vmcb); ++} ++STACK_FRAME_NON_STANDARD(svm_vcpu_run); ++ ++static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root) ++{ ++ struct vcpu_svm *svm = to_svm(vcpu); ++ ++ svm->vmcb->save.cr3 = __sme_set(root); ++ mark_dirty(svm->vmcb, VMCB_CR); ++} ++ ++static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root) ++{ ++ struct vcpu_svm *svm = to_svm(vcpu); ++ ++ svm->vmcb->control.nested_cr3 = __sme_set(root); ++ mark_dirty(svm->vmcb, VMCB_NPT); ++ ++ /* Also sync guest cr3 here in case we live migrate */ ++ svm->vmcb->save.cr3 = kvm_read_cr3(vcpu); ++ mark_dirty(svm->vmcb, VMCB_CR); ++} ++ ++static int is_disabled(void) ++{ ++ u64 vm_cr; ++ ++ rdmsrl(MSR_VM_CR, vm_cr); ++ if (vm_cr & (1 << SVM_VM_CR_SVM_DISABLE)) ++ return 1; ++ ++ return 0; ++} ++ ++static void ++svm_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall) ++{ ++ /* ++ * Patch in the VMMCALL instruction: ++ */ ++ hypercall[0] = 0x0f; ++ hypercall[1] = 0x01; ++ hypercall[2] = 0xd9; ++} ++ ++static void svm_check_processor_compat(void *rtn) ++{ ++ *(int *)rtn = 0; ++} ++ ++static bool svm_cpu_has_accelerated_tpr(void) ++{ ++ return false; ++} ++ ++static bool svm_has_emulated_msr(int index) ++{ ++ switch (index) { ++ case MSR_IA32_MCG_EXT_CTL: ++ return false; ++ default: ++ break; ++ } ++ ++ return true; ++} ++ ++static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) ++{ ++ return 0; ++} ++ ++static void svm_cpuid_update(struct kvm_vcpu *vcpu) ++{ ++ struct vcpu_svm *svm = to_svm(vcpu); ++ ++ /* Update nrips enabled cache */ ++ svm->nrips_enabled = !!guest_cpuid_has(&svm->vcpu, X86_FEATURE_NRIPS); ++ ++ if (!kvm_vcpu_apicv_active(vcpu)) ++ return; ++ ++ guest_cpuid_clear(vcpu, X86_FEATURE_X2APIC); ++} ++ ++static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) ++{ ++ switch (func) { ++ case 0x1: ++ if (avic) ++ entry->ecx &= ~bit(X86_FEATURE_X2APIC); ++ break; ++ case 0x80000001: ++ if (nested) ++ entry->ecx |= (1 << 2); /* Set SVM bit */ ++ break; ++ case 0x8000000A: ++ entry->eax = 1; /* SVM revision 1 */ ++ entry->ebx = 8; /* Lets support 8 ASIDs in case we add proper ++ ASID emulation to nested SVM */ ++ entry->ecx = 0; /* Reserved */ ++ entry->edx = 0; /* Per default do not support any ++ additional features */ ++ ++ /* Support next_rip if host supports it */ ++ if (boot_cpu_has(X86_FEATURE_NRIPS)) ++ entry->edx |= SVM_FEATURE_NRIP; ++ ++ /* Support NPT for the guest if enabled */ ++ if (npt_enabled) ++ entry->edx |= SVM_FEATURE_NPT; ++ ++ break; ++ case 0x8000001F: ++ /* Support memory encryption cpuid if host supports it */ ++ if (boot_cpu_has(X86_FEATURE_SEV)) ++ cpuid(0x8000001f, &entry->eax, &entry->ebx, ++ &entry->ecx, &entry->edx); ++ ++ } ++} ++ ++static int svm_get_lpage_level(void) ++{ ++ return PT_PDPE_LEVEL; ++} ++ ++static bool svm_rdtscp_supported(void) ++{ ++ return boot_cpu_has(X86_FEATURE_RDTSCP); ++} ++ ++static bool svm_invpcid_supported(void) ++{ ++ return false; ++} ++ ++static bool svm_mpx_supported(void) ++{ ++ return false; ++} ++ ++static bool svm_xsaves_supported(void) ++{ ++ return false; ++} ++ ++static bool svm_umip_emulated(void) ++{ ++ return false; ++} ++ ++static bool svm_has_wbinvd_exit(void) ++{ ++ return true; ++} ++ ++#define PRE_EX(exit) { .exit_code = (exit), \ ++ .stage = X86_ICPT_PRE_EXCEPT, } ++#define POST_EX(exit) { .exit_code = (exit), \ ++ .stage = X86_ICPT_POST_EXCEPT, } ++#define POST_MEM(exit) { .exit_code = (exit), \ ++ .stage = X86_ICPT_POST_MEMACCESS, } ++ ++static const struct __x86_intercept { ++ u32 exit_code; ++ enum x86_intercept_stage stage; ++} x86_intercept_map[] = { ++ [x86_intercept_cr_read] = POST_EX(SVM_EXIT_READ_CR0), ++ [x86_intercept_cr_write] = POST_EX(SVM_EXIT_WRITE_CR0), ++ [x86_intercept_clts] = POST_EX(SVM_EXIT_WRITE_CR0), ++ [x86_intercept_lmsw] = POST_EX(SVM_EXIT_WRITE_CR0), ++ [x86_intercept_smsw] = POST_EX(SVM_EXIT_READ_CR0), ++ [x86_intercept_dr_read] = POST_EX(SVM_EXIT_READ_DR0), ++ [x86_intercept_dr_write] = POST_EX(SVM_EXIT_WRITE_DR0), ++ [x86_intercept_sldt] = POST_EX(SVM_EXIT_LDTR_READ), ++ [x86_intercept_str] = POST_EX(SVM_EXIT_TR_READ), ++ [x86_intercept_lldt] = POST_EX(SVM_EXIT_LDTR_WRITE), ++ [x86_intercept_ltr] = POST_EX(SVM_EXIT_TR_WRITE), ++ [x86_intercept_sgdt] = POST_EX(SVM_EXIT_GDTR_READ), ++ [x86_intercept_sidt] = POST_EX(SVM_EXIT_IDTR_READ), ++ [x86_intercept_lgdt] = POST_EX(SVM_EXIT_GDTR_WRITE), ++ [x86_intercept_lidt] = POST_EX(SVM_EXIT_IDTR_WRITE), ++ [x86_intercept_vmrun] = POST_EX(SVM_EXIT_VMRUN), ++ [x86_intercept_vmmcall] = POST_EX(SVM_EXIT_VMMCALL), ++ [x86_intercept_vmload] = POST_EX(SVM_EXIT_VMLOAD), ++ [x86_intercept_vmsave] = POST_EX(SVM_EXIT_VMSAVE), ++ [x86_intercept_stgi] = POST_EX(SVM_EXIT_STGI), ++ [x86_intercept_clgi] = POST_EX(SVM_EXIT_CLGI), ++ [x86_intercept_skinit] = POST_EX(SVM_EXIT_SKINIT), ++ [x86_intercept_invlpga] = POST_EX(SVM_EXIT_INVLPGA), ++ [x86_intercept_rdtscp] = POST_EX(SVM_EXIT_RDTSCP), ++ [x86_intercept_monitor] = POST_MEM(SVM_EXIT_MONITOR), ++ [x86_intercept_mwait] = POST_EX(SVM_EXIT_MWAIT), ++ [x86_intercept_invlpg] = POST_EX(SVM_EXIT_INVLPG), ++ [x86_intercept_invd] = POST_EX(SVM_EXIT_INVD), ++ [x86_intercept_wbinvd] = POST_EX(SVM_EXIT_WBINVD), ++ [x86_intercept_wrmsr] = POST_EX(SVM_EXIT_MSR), ++ [x86_intercept_rdtsc] = POST_EX(SVM_EXIT_RDTSC), ++ [x86_intercept_rdmsr] = POST_EX(SVM_EXIT_MSR), ++ [x86_intercept_rdpmc] = POST_EX(SVM_EXIT_RDPMC), ++ [x86_intercept_cpuid] = PRE_EX(SVM_EXIT_CPUID), ++ [x86_intercept_rsm] = PRE_EX(SVM_EXIT_RSM), ++ [x86_intercept_pause] = PRE_EX(SVM_EXIT_PAUSE), ++ [x86_intercept_pushf] = PRE_EX(SVM_EXIT_PUSHF), ++ [x86_intercept_popf] = PRE_EX(SVM_EXIT_POPF), ++ [x86_intercept_intn] = PRE_EX(SVM_EXIT_SWINT), ++ [x86_intercept_iret] = PRE_EX(SVM_EXIT_IRET), ++ [x86_intercept_icebp] = PRE_EX(SVM_EXIT_ICEBP), ++ [x86_intercept_hlt] = POST_EX(SVM_EXIT_HLT), ++ [x86_intercept_in] = POST_EX(SVM_EXIT_IOIO), ++ [x86_intercept_ins] = POST_EX(SVM_EXIT_IOIO), ++ [x86_intercept_out] = POST_EX(SVM_EXIT_IOIO), ++ [x86_intercept_outs] = POST_EX(SVM_EXIT_IOIO), ++}; ++ ++#undef PRE_EX ++#undef POST_EX ++#undef POST_MEM ++ ++static int svm_check_intercept(struct kvm_vcpu *vcpu, ++ struct x86_instruction_info *info, ++ enum x86_intercept_stage stage) ++{ ++ struct vcpu_svm *svm = to_svm(vcpu); ++ int vmexit, ret = X86EMUL_CONTINUE; ++ struct __x86_intercept icpt_info; ++ struct vmcb *vmcb = svm->vmcb; ++ ++ if (info->intercept >= ARRAY_SIZE(x86_intercept_map)) ++ goto out; ++ ++ icpt_info = x86_intercept_map[info->intercept]; ++ ++ if (stage != icpt_info.stage) ++ goto out; ++ ++ switch (icpt_info.exit_code) { ++ case SVM_EXIT_READ_CR0: ++ if (info->intercept == x86_intercept_cr_read) ++ icpt_info.exit_code += info->modrm_reg; ++ break; ++ case SVM_EXIT_WRITE_CR0: { ++ unsigned long cr0, val; ++ u64 intercept; ++ ++ if (info->intercept == x86_intercept_cr_write) ++ icpt_info.exit_code += info->modrm_reg; ++ ++ if (icpt_info.exit_code != SVM_EXIT_WRITE_CR0 || ++ info->intercept == x86_intercept_clts) ++ break; ++ ++ intercept = svm->nested.intercept; ++ ++ if (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0))) ++ break; ++ ++ cr0 = vcpu->arch.cr0 & ~SVM_CR0_SELECTIVE_MASK; ++ val = info->src_val & ~SVM_CR0_SELECTIVE_MASK; ++ ++ if (info->intercept == x86_intercept_lmsw) { ++ cr0 &= 0xfUL; ++ val &= 0xfUL; ++ /* lmsw can't clear PE - catch this here */ ++ if (cr0 & X86_CR0_PE) ++ val |= X86_CR0_PE; ++ } ++ ++ if (cr0 ^ val) ++ icpt_info.exit_code = SVM_EXIT_CR0_SEL_WRITE; ++ ++ break; ++ } ++ case SVM_EXIT_READ_DR0: ++ case SVM_EXIT_WRITE_DR0: ++ icpt_info.exit_code += info->modrm_reg; ++ break; ++ case SVM_EXIT_MSR: ++ if (info->intercept == x86_intercept_wrmsr) ++ vmcb->control.exit_info_1 = 1; ++ else ++ vmcb->control.exit_info_1 = 0; ++ break; ++ case SVM_EXIT_PAUSE: ++ /* ++ * We get this for NOP only, but pause ++ * is rep not, check this here ++ */ ++ if (info->rep_prefix != REPE_PREFIX) ++ goto out; ++ break; ++ case SVM_EXIT_IOIO: { ++ u64 exit_info; ++ u32 bytes; ++ ++ if (info->intercept == x86_intercept_in || ++ info->intercept == x86_intercept_ins) { ++ exit_info = ((info->src_val & 0xffff) << 16) | ++ SVM_IOIO_TYPE_MASK; ++ bytes = info->dst_bytes; ++ } else { ++ exit_info = (info->dst_val & 0xffff) << 16; ++ bytes = info->src_bytes; ++ } ++ ++ if (info->intercept == x86_intercept_outs || ++ info->intercept == x86_intercept_ins) ++ exit_info |= SVM_IOIO_STR_MASK; ++ ++ if (info->rep_prefix) ++ exit_info |= SVM_IOIO_REP_MASK; ++ ++ bytes = min(bytes, 4u); ++ ++ exit_info |= bytes << SVM_IOIO_SIZE_SHIFT; ++ ++ exit_info |= (u32)info->ad_bytes << (SVM_IOIO_ASIZE_SHIFT - 1); ++ ++ vmcb->control.exit_info_1 = exit_info; ++ vmcb->control.exit_info_2 = info->next_rip; ++ ++ break; ++ } ++ default: ++ break; ++ } ++ ++ /* TODO: Advertise NRIPS to guest hypervisor unconditionally */ ++ if (static_cpu_has(X86_FEATURE_NRIPS)) ++ vmcb->control.next_rip = info->next_rip; ++ vmcb->control.exit_code = icpt_info.exit_code; ++ vmexit = nested_svm_exit_handled(svm); ++ ++ ret = (vmexit == NESTED_EXIT_DONE) ? X86EMUL_INTERCEPTED ++ : X86EMUL_CONTINUE; ++ ++out: ++ return ret; ++} ++ ++static void svm_handle_external_intr(struct kvm_vcpu *vcpu) ++{ ++ local_irq_enable(); ++ /* ++ * We must have an instruction with interrupts enabled, so ++ * the timer interrupt isn't delayed by the interrupt shadow. ++ */ ++ asm("nop"); ++ local_irq_disable(); ++} ++ ++static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu) ++{ ++ if (pause_filter_thresh) ++ shrink_ple_window(vcpu); ++} ++ ++static inline void avic_post_state_restore(struct kvm_vcpu *vcpu) ++{ ++ if (avic_handle_apic_id_update(vcpu) != 0) ++ return; ++ if (avic_handle_dfr_update(vcpu) != 0) ++ return; ++ avic_handle_ldr_update(vcpu); ++} ++ ++static void svm_setup_mce(struct kvm_vcpu *vcpu) ++{ ++ /* [63:9] are reserved. */ ++ vcpu->arch.mcg_cap &= 0x1ff; ++} ++ ++static int svm_smi_allowed(struct kvm_vcpu *vcpu) ++{ ++ struct vcpu_svm *svm = to_svm(vcpu); ++ ++ /* Per APM Vol.2 15.22.2 "Response to SMI" */ ++ if (!gif_set(svm)) ++ return 0; ++ ++ if (is_guest_mode(&svm->vcpu) && ++ svm->nested.intercept & (1ULL << INTERCEPT_SMI)) { ++ /* TODO: Might need to set exit_info_1 and exit_info_2 here */ ++ svm->vmcb->control.exit_code = SVM_EXIT_SMI; ++ svm->nested.exit_required = true; ++ return 0; ++ } ++ ++ return 1; ++} ++ ++static int svm_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate) ++{ ++ struct vcpu_svm *svm = to_svm(vcpu); ++ int ret; ++ ++ if (is_guest_mode(vcpu)) { ++ /* FED8h - SVM Guest */ ++ put_smstate(u64, smstate, 0x7ed8, 1); ++ /* FEE0h - SVM Guest VMCB Physical Address */ ++ put_smstate(u64, smstate, 0x7ee0, svm->nested.vmcb); ++ ++ svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX]; ++ svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP]; ++ svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP]; ++ ++ ret = nested_svm_vmexit(svm); ++ if (ret) ++ return ret; ++ } ++ return 0; ++} ++ ++static int svm_pre_leave_smm(struct kvm_vcpu *vcpu, u64 smbase) ++{ ++ struct vcpu_svm *svm = to_svm(vcpu); ++ struct vmcb *nested_vmcb; ++ struct page *page; ++ struct { ++ u64 guest; ++ u64 vmcb; ++ } svm_state_save; ++ int ret; ++ ++ ret = kvm_vcpu_read_guest(vcpu, smbase + 0xfed8, &svm_state_save, ++ sizeof(svm_state_save)); ++ if (ret) ++ return ret; ++ ++ if (svm_state_save.guest) { ++ vcpu->arch.hflags &= ~HF_SMM_MASK; ++ nested_vmcb = nested_svm_map(svm, svm_state_save.vmcb, &page); ++ if (nested_vmcb) ++ enter_svm_guest_mode(svm, svm_state_save.vmcb, nested_vmcb, page); ++ else ++ ret = 1; ++ vcpu->arch.hflags |= HF_SMM_MASK; ++ } ++ return ret; ++} ++ ++static int enable_smi_window(struct kvm_vcpu *vcpu) ++{ ++ struct vcpu_svm *svm = to_svm(vcpu); ++ ++ if (!gif_set(svm)) { ++ if (vgif_enabled(svm)) ++ set_intercept(svm, INTERCEPT_STGI); ++ /* STGI will cause a vm exit */ ++ return 1; ++ } ++ return 0; ++} ++ ++static int sev_asid_new(void) ++{ ++ int pos; ++ ++ /* ++ * SEV-enabled guest must use asid from min_sev_asid to max_sev_asid. ++ */ ++ pos = find_next_zero_bit(sev_asid_bitmap, max_sev_asid, min_sev_asid - 1); ++ if (pos >= max_sev_asid) ++ return -EBUSY; ++ ++ set_bit(pos, sev_asid_bitmap); ++ return pos + 1; ++} ++ ++static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) ++{ ++ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; ++ int asid, ret; ++ ++ ret = -EBUSY; ++ if (unlikely(sev->active)) ++ return ret; ++ ++ asid = sev_asid_new(); ++ if (asid < 0) ++ return ret; ++ ++ ret = sev_platform_init(&argp->error); ++ if (ret) ++ goto e_free; ++ ++ sev->active = true; ++ sev->asid = asid; ++ INIT_LIST_HEAD(&sev->regions_list); ++ ++ return 0; ++ ++e_free: ++ __sev_asid_free(asid); ++ return ret; ++} ++ ++static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error) ++{ ++ struct sev_data_activate *data; ++ int asid = sev_get_asid(kvm); ++ int ret; ++ ++ wbinvd_on_all_cpus(); ++ ++ ret = sev_guest_df_flush(error); ++ if (ret) ++ return ret; ++ ++ data = kzalloc(sizeof(*data), GFP_KERNEL); ++ if (!data) ++ return -ENOMEM; ++ ++ /* activate ASID on the given handle */ ++ data->handle = handle; ++ data->asid = asid; ++ ret = sev_guest_activate(data, error); ++ kfree(data); ++ ++ return ret; ++} ++ ++static int __sev_issue_cmd(int fd, int id, void *data, int *error) ++{ ++ struct fd f; ++ int ret; ++ ++ f = fdget(fd); ++ if (!f.file) ++ return -EBADF; ++ ++ ret = sev_issue_cmd_external_user(f.file, id, data, error); ++ ++ fdput(f); ++ return ret; ++} ++ ++static int sev_issue_cmd(struct kvm *kvm, int id, void *data, int *error) ++{ ++ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; ++ ++ return __sev_issue_cmd(sev->fd, id, data, error); ++} ++ ++static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp) ++{ ++ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; ++ struct sev_data_launch_start *start; ++ struct kvm_sev_launch_start params; ++ void *dh_blob, *session_blob; ++ int *error = &argp->error; ++ int ret; ++ ++ if (!sev_guest(kvm)) ++ return -ENOTTY; ++ ++ if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params))) ++ return -EFAULT; ++ ++ start = kzalloc(sizeof(*start), GFP_KERNEL); ++ if (!start) ++ return -ENOMEM; ++ ++ dh_blob = NULL; ++ if (params.dh_uaddr) { ++ dh_blob = psp_copy_user_blob(params.dh_uaddr, params.dh_len); ++ if (IS_ERR(dh_blob)) { ++ ret = PTR_ERR(dh_blob); ++ goto e_free; ++ } ++ ++ start->dh_cert_address = __sme_set(__pa(dh_blob)); ++ start->dh_cert_len = params.dh_len; ++ } ++ ++ session_blob = NULL; ++ if (params.session_uaddr) { ++ session_blob = psp_copy_user_blob(params.session_uaddr, params.session_len); ++ if (IS_ERR(session_blob)) { ++ ret = PTR_ERR(session_blob); ++ goto e_free_dh; ++ } ++ ++ start->session_address = __sme_set(__pa(session_blob)); ++ start->session_len = params.session_len; ++ } ++ ++ start->handle = params.handle; ++ start->policy = params.policy; ++ ++ /* create memory encryption context */ ++ ret = __sev_issue_cmd(argp->sev_fd, SEV_CMD_LAUNCH_START, start, error); ++ if (ret) ++ goto e_free_session; ++ ++ /* Bind ASID to this guest */ ++ ret = sev_bind_asid(kvm, start->handle, error); ++ if (ret) ++ goto e_free_session; ++ ++ /* return handle to userspace */ ++ params.handle = start->handle; ++ if (copy_to_user((void __user *)(uintptr_t)argp->data, ¶ms, sizeof(params))) { ++ sev_unbind_asid(kvm, start->handle); ++ ret = -EFAULT; ++ goto e_free_session; ++ } ++ ++ sev->handle = start->handle; ++ sev->fd = argp->sev_fd; ++ ++e_free_session: ++ kfree(session_blob); ++e_free_dh: ++ kfree(dh_blob); ++e_free: ++ kfree(start); ++ return ret; ++} ++ ++static unsigned long get_num_contig_pages(unsigned long idx, ++ struct page **inpages, unsigned long npages) ++{ ++ unsigned long paddr, next_paddr; ++ unsigned long i = idx + 1, pages = 1; ++ ++ /* find the number of contiguous pages starting from idx */ ++ paddr = __sme_page_pa(inpages[idx]); ++ while (i < npages) { ++ next_paddr = __sme_page_pa(inpages[i++]); ++ if ((paddr + PAGE_SIZE) == next_paddr) { ++ pages++; ++ paddr = next_paddr; ++ continue; ++ } ++ break; ++ } ++ ++ return pages; ++} ++ ++static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp) ++{ ++ unsigned long vaddr, vaddr_end, next_vaddr, npages, pages, size, i; ++ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; ++ struct kvm_sev_launch_update_data params; ++ struct sev_data_launch_update_data *data; ++ struct page **inpages; ++ int ret; ++ ++ if (!sev_guest(kvm)) ++ return -ENOTTY; ++ ++ if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params))) ++ return -EFAULT; ++ ++ data = kzalloc(sizeof(*data), GFP_KERNEL); ++ if (!data) ++ return -ENOMEM; ++ ++ vaddr = params.uaddr; ++ size = params.len; ++ vaddr_end = vaddr + size; ++ ++ /* Lock the user memory. */ ++ inpages = sev_pin_memory(kvm, vaddr, size, &npages, 1); ++ if (!inpages) { ++ ret = -ENOMEM; ++ goto e_free; ++ } ++ ++ /* ++ * The LAUNCH_UPDATE command will perform in-place encryption of the ++ * memory content (i.e it will write the same memory region with C=1). ++ * It's possible that the cache may contain the data with C=0, i.e., ++ * unencrypted so invalidate it first. ++ */ ++ sev_clflush_pages(inpages, npages); ++ ++ for (i = 0; vaddr < vaddr_end; vaddr = next_vaddr, i += pages) { ++ int offset, len; ++ ++ /* ++ * If the user buffer is not page-aligned, calculate the offset ++ * within the page. ++ */ ++ offset = vaddr & (PAGE_SIZE - 1); ++ ++ /* Calculate the number of pages that can be encrypted in one go. */ ++ pages = get_num_contig_pages(i, inpages, npages); ++ ++ len = min_t(size_t, ((pages * PAGE_SIZE) - offset), size); ++ ++ data->handle = sev->handle; ++ data->len = len; ++ data->address = __sme_page_pa(inpages[i]) + offset; ++ ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_DATA, data, &argp->error); ++ if (ret) ++ goto e_unpin; ++ ++ size -= len; ++ next_vaddr = vaddr + len; ++ } ++ ++e_unpin: ++ /* content of memory is updated, mark pages dirty */ ++ for (i = 0; i < npages; i++) { ++ set_page_dirty_lock(inpages[i]); ++ mark_page_accessed(inpages[i]); ++ } ++ /* unlock the user pages */ ++ sev_unpin_memory(kvm, inpages, npages); ++e_free: ++ kfree(data); ++ return ret; ++} ++ ++static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp) ++{ ++ void __user *measure = (void __user *)(uintptr_t)argp->data; ++ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; ++ struct sev_data_launch_measure *data; ++ struct kvm_sev_launch_measure params; ++ void __user *p = NULL; ++ void *blob = NULL; ++ int ret; ++ ++ if (!sev_guest(kvm)) ++ return -ENOTTY; ++ ++ if (copy_from_user(¶ms, measure, sizeof(params))) ++ return -EFAULT; ++ ++ data = kzalloc(sizeof(*data), GFP_KERNEL); ++ if (!data) ++ return -ENOMEM; ++ ++ /* User wants to query the blob length */ ++ if (!params.len) ++ goto cmd; ++ ++ p = (void __user *)(uintptr_t)params.uaddr; ++ if (p) { ++ if (params.len > SEV_FW_BLOB_MAX_SIZE) { ++ ret = -EINVAL; ++ goto e_free; ++ } ++ ++ ret = -ENOMEM; ++ blob = kmalloc(params.len, GFP_KERNEL); ++ if (!blob) ++ goto e_free; ++ ++ data->address = __psp_pa(blob); ++ data->len = params.len; ++ } ++ ++cmd: ++ data->handle = sev->handle; ++ ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_MEASURE, data, &argp->error); ++ ++ /* ++ * If we query the session length, FW responded with expected data. ++ */ ++ if (!params.len) ++ goto done; ++ ++ if (ret) ++ goto e_free_blob; ++ ++ if (blob) { ++ if (copy_to_user(p, blob, params.len)) ++ ret = -EFAULT; ++ } ++ ++done: ++ params.len = data->len; ++ if (copy_to_user(measure, ¶ms, sizeof(params))) ++ ret = -EFAULT; ++e_free_blob: ++ kfree(blob); ++e_free: ++ kfree(data); ++ return ret; ++} ++ ++static int sev_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp) ++{ ++ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; ++ struct sev_data_launch_finish *data; ++ int ret; ++ ++ if (!sev_guest(kvm)) ++ return -ENOTTY; ++ ++ data = kzalloc(sizeof(*data), GFP_KERNEL); ++ if (!data) ++ return -ENOMEM; ++ ++ data->handle = sev->handle; ++ ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_FINISH, data, &argp->error); ++ ++ kfree(data); ++ return ret; ++} ++ ++static int sev_guest_status(struct kvm *kvm, struct kvm_sev_cmd *argp) ++{ ++ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; ++ struct kvm_sev_guest_status params; ++ struct sev_data_guest_status *data; ++ int ret; ++ ++ if (!sev_guest(kvm)) ++ return -ENOTTY; ++ ++ data = kzalloc(sizeof(*data), GFP_KERNEL); ++ if (!data) ++ return -ENOMEM; ++ ++ data->handle = sev->handle; ++ ret = sev_issue_cmd(kvm, SEV_CMD_GUEST_STATUS, data, &argp->error); ++ if (ret) ++ goto e_free; ++ ++ params.policy = data->policy; ++ params.state = data->state; ++ params.handle = data->handle; ++ ++ if (copy_to_user((void __user *)(uintptr_t)argp->data, ¶ms, sizeof(params))) ++ ret = -EFAULT; ++e_free: ++ kfree(data); ++ return ret; ++} ++ ++static int __sev_issue_dbg_cmd(struct kvm *kvm, unsigned long src, ++ unsigned long dst, int size, ++ int *error, bool enc) ++{ ++ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; ++ struct sev_data_dbg *data; ++ int ret; ++ ++ data = kzalloc(sizeof(*data), GFP_KERNEL); ++ if (!data) ++ return -ENOMEM; ++ ++ data->handle = sev->handle; ++ data->dst_addr = dst; ++ data->src_addr = src; ++ data->len = size; ++ ++ ret = sev_issue_cmd(kvm, ++ enc ? SEV_CMD_DBG_ENCRYPT : SEV_CMD_DBG_DECRYPT, ++ data, error); ++ kfree(data); ++ return ret; ++} ++ ++static int __sev_dbg_decrypt(struct kvm *kvm, unsigned long src_paddr, ++ unsigned long dst_paddr, int sz, int *err) ++{ ++ int offset; ++ ++ /* ++ * Its safe to read more than we are asked, caller should ensure that ++ * destination has enough space. ++ */ ++ src_paddr = round_down(src_paddr, 16); ++ offset = src_paddr & 15; ++ sz = round_up(sz + offset, 16); ++ ++ return __sev_issue_dbg_cmd(kvm, src_paddr, dst_paddr, sz, err, false); ++} ++ ++static int __sev_dbg_decrypt_user(struct kvm *kvm, unsigned long paddr, ++ unsigned long __user dst_uaddr, ++ unsigned long dst_paddr, ++ int size, int *err) ++{ ++ struct page *tpage = NULL; ++ int ret, offset; ++ ++ /* if inputs are not 16-byte then use intermediate buffer */ ++ if (!IS_ALIGNED(dst_paddr, 16) || ++ !IS_ALIGNED(paddr, 16) || ++ !IS_ALIGNED(size, 16)) { ++ tpage = (void *)alloc_page(GFP_KERNEL); ++ if (!tpage) ++ return -ENOMEM; ++ ++ dst_paddr = __sme_page_pa(tpage); ++ } ++ ++ ret = __sev_dbg_decrypt(kvm, paddr, dst_paddr, size, err); ++ if (ret) ++ goto e_free; ++ ++ if (tpage) { ++ offset = paddr & 15; ++ if (copy_to_user((void __user *)(uintptr_t)dst_uaddr, ++ page_address(tpage) + offset, size)) ++ ret = -EFAULT; ++ } ++ ++e_free: ++ if (tpage) ++ __free_page(tpage); ++ ++ return ret; ++} ++ ++static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr, ++ unsigned long __user vaddr, ++ unsigned long dst_paddr, ++ unsigned long __user dst_vaddr, ++ int size, int *error) ++{ ++ struct page *src_tpage = NULL; ++ struct page *dst_tpage = NULL; ++ int ret, len = size; ++ ++ /* If source buffer is not aligned then use an intermediate buffer */ ++ if (!IS_ALIGNED(vaddr, 16)) { ++ src_tpage = alloc_page(GFP_KERNEL); ++ if (!src_tpage) ++ return -ENOMEM; ++ ++ if (copy_from_user(page_address(src_tpage), ++ (void __user *)(uintptr_t)vaddr, size)) { ++ __free_page(src_tpage); ++ return -EFAULT; ++ } ++ ++ paddr = __sme_page_pa(src_tpage); ++ } ++ ++ /* ++ * If destination buffer or length is not aligned then do read-modify-write: ++ * - decrypt destination in an intermediate buffer ++ * - copy the source buffer in an intermediate buffer ++ * - use the intermediate buffer as source buffer ++ */ ++ if (!IS_ALIGNED(dst_vaddr, 16) || !IS_ALIGNED(size, 16)) { ++ int dst_offset; ++ ++ dst_tpage = alloc_page(GFP_KERNEL); ++ if (!dst_tpage) { ++ ret = -ENOMEM; ++ goto e_free; ++ } ++ ++ ret = __sev_dbg_decrypt(kvm, dst_paddr, ++ __sme_page_pa(dst_tpage), size, error); ++ if (ret) ++ goto e_free; ++ ++ /* ++ * If source is kernel buffer then use memcpy() otherwise ++ * copy_from_user(). ++ */ ++ dst_offset = dst_paddr & 15; ++ ++ if (src_tpage) ++ memcpy(page_address(dst_tpage) + dst_offset, ++ page_address(src_tpage), size); ++ else { ++ if (copy_from_user(page_address(dst_tpage) + dst_offset, ++ (void __user *)(uintptr_t)vaddr, size)) { ++ ret = -EFAULT; ++ goto e_free; ++ } ++ } ++ ++ paddr = __sme_page_pa(dst_tpage); ++ dst_paddr = round_down(dst_paddr, 16); ++ len = round_up(size, 16); ++ } ++ ++ ret = __sev_issue_dbg_cmd(kvm, paddr, dst_paddr, len, error, true); ++ ++e_free: ++ if (src_tpage) ++ __free_page(src_tpage); ++ if (dst_tpage) ++ __free_page(dst_tpage); ++ return ret; ++} ++ ++static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec) ++{ ++ unsigned long vaddr, vaddr_end, next_vaddr; ++ unsigned long dst_vaddr; ++ struct page **src_p, **dst_p; ++ struct kvm_sev_dbg debug; ++ unsigned long n; ++ unsigned int size; ++ int ret; ++ ++ if (!sev_guest(kvm)) ++ return -ENOTTY; ++ ++ if (copy_from_user(&debug, (void __user *)(uintptr_t)argp->data, sizeof(debug))) ++ return -EFAULT; ++ ++ if (!debug.len || debug.src_uaddr + debug.len < debug.src_uaddr) ++ return -EINVAL; ++ if (!debug.dst_uaddr) ++ return -EINVAL; ++ ++ vaddr = debug.src_uaddr; ++ size = debug.len; ++ vaddr_end = vaddr + size; ++ dst_vaddr = debug.dst_uaddr; ++ ++ for (; vaddr < vaddr_end; vaddr = next_vaddr) { ++ int len, s_off, d_off; ++ ++ /* lock userspace source and destination page */ ++ src_p = sev_pin_memory(kvm, vaddr & PAGE_MASK, PAGE_SIZE, &n, 0); ++ if (!src_p) ++ return -EFAULT; ++ ++ dst_p = sev_pin_memory(kvm, dst_vaddr & PAGE_MASK, PAGE_SIZE, &n, 1); ++ if (!dst_p) { ++ sev_unpin_memory(kvm, src_p, n); ++ return -EFAULT; ++ } ++ ++ /* ++ * The DBG_{DE,EN}CRYPT commands will perform {dec,en}cryption of the ++ * memory content (i.e it will write the same memory region with C=1). ++ * It's possible that the cache may contain the data with C=0, i.e., ++ * unencrypted so invalidate it first. ++ */ ++ sev_clflush_pages(src_p, 1); ++ sev_clflush_pages(dst_p, 1); ++ ++ /* ++ * Since user buffer may not be page aligned, calculate the ++ * offset within the page. ++ */ ++ s_off = vaddr & ~PAGE_MASK; ++ d_off = dst_vaddr & ~PAGE_MASK; ++ len = min_t(size_t, (PAGE_SIZE - s_off), size); ++ ++ if (dec) ++ ret = __sev_dbg_decrypt_user(kvm, ++ __sme_page_pa(src_p[0]) + s_off, ++ dst_vaddr, ++ __sme_page_pa(dst_p[0]) + d_off, ++ len, &argp->error); ++ else ++ ret = __sev_dbg_encrypt_user(kvm, ++ __sme_page_pa(src_p[0]) + s_off, ++ vaddr, ++ __sme_page_pa(dst_p[0]) + d_off, ++ dst_vaddr, ++ len, &argp->error); ++ ++ sev_unpin_memory(kvm, src_p, n); ++ sev_unpin_memory(kvm, dst_p, n); ++ ++ if (ret) ++ goto err; ++ ++ next_vaddr = vaddr + len; ++ dst_vaddr = dst_vaddr + len; ++ size -= len; ++ } ++err: ++ return ret; ++} ++ ++static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp) ++{ ++ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; ++ struct sev_data_launch_secret *data; ++ struct kvm_sev_launch_secret params; ++ struct page **pages; ++ void *blob, *hdr; ++ unsigned long n; ++ int ret, offset; ++ ++ if (!sev_guest(kvm)) ++ return -ENOTTY; ++ ++ if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params))) ++ return -EFAULT; ++ ++ pages = sev_pin_memory(kvm, params.guest_uaddr, params.guest_len, &n, 1); ++ if (!pages) ++ return -ENOMEM; ++ ++ /* ++ * The secret must be copied into contiguous memory region, lets verify ++ * that userspace memory pages are contiguous before we issue command. ++ */ ++ if (get_num_contig_pages(0, pages, n) != n) { ++ ret = -EINVAL; ++ goto e_unpin_memory; ++ } ++ ++ ret = -ENOMEM; ++ data = kzalloc(sizeof(*data), GFP_KERNEL); ++ if (!data) ++ goto e_unpin_memory; ++ ++ offset = params.guest_uaddr & (PAGE_SIZE - 1); ++ data->guest_address = __sme_page_pa(pages[0]) + offset; ++ data->guest_len = params.guest_len; ++ ++ blob = psp_copy_user_blob(params.trans_uaddr, params.trans_len); ++ if (IS_ERR(blob)) { ++ ret = PTR_ERR(blob); ++ goto e_free; ++ } ++ ++ data->trans_address = __psp_pa(blob); ++ data->trans_len = params.trans_len; ++ ++ hdr = psp_copy_user_blob(params.hdr_uaddr, params.hdr_len); ++ if (IS_ERR(hdr)) { ++ ret = PTR_ERR(hdr); ++ goto e_free_blob; ++ } ++ data->hdr_address = __psp_pa(hdr); ++ data->hdr_len = params.hdr_len; ++ ++ data->handle = sev->handle; ++ ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_SECRET, data, &argp->error); ++ ++ kfree(hdr); ++ ++e_free_blob: ++ kfree(blob); ++e_free: ++ kfree(data); ++e_unpin_memory: ++ sev_unpin_memory(kvm, pages, n); ++ return ret; ++} ++ ++static int svm_mem_enc_op(struct kvm *kvm, void __user *argp) ++{ ++ struct kvm_sev_cmd sev_cmd; ++ int r; ++ ++ if (!svm_sev_enabled()) ++ return -ENOTTY; ++ ++ if (copy_from_user(&sev_cmd, argp, sizeof(struct kvm_sev_cmd))) ++ return -EFAULT; ++ ++ mutex_lock(&kvm->lock); ++ ++ switch (sev_cmd.id) { ++ case KVM_SEV_INIT: ++ r = sev_guest_init(kvm, &sev_cmd); ++ break; ++ case KVM_SEV_LAUNCH_START: ++ r = sev_launch_start(kvm, &sev_cmd); ++ break; ++ case KVM_SEV_LAUNCH_UPDATE_DATA: ++ r = sev_launch_update_data(kvm, &sev_cmd); ++ break; ++ case KVM_SEV_LAUNCH_MEASURE: ++ r = sev_launch_measure(kvm, &sev_cmd); ++ break; ++ case KVM_SEV_LAUNCH_FINISH: ++ r = sev_launch_finish(kvm, &sev_cmd); ++ break; ++ case KVM_SEV_GUEST_STATUS: ++ r = sev_guest_status(kvm, &sev_cmd); ++ break; ++ case KVM_SEV_DBG_DECRYPT: ++ r = sev_dbg_crypt(kvm, &sev_cmd, true); ++ break; ++ case KVM_SEV_DBG_ENCRYPT: ++ r = sev_dbg_crypt(kvm, &sev_cmd, false); ++ break; ++ case KVM_SEV_LAUNCH_SECRET: ++ r = sev_launch_secret(kvm, &sev_cmd); ++ break; ++ default: ++ r = -EINVAL; ++ goto out; ++ } ++ ++ if (copy_to_user(argp, &sev_cmd, sizeof(struct kvm_sev_cmd))) ++ r = -EFAULT; ++ ++out: ++ mutex_unlock(&kvm->lock); ++ return r; ++} ++ ++static int svm_register_enc_region(struct kvm *kvm, ++ struct kvm_enc_region *range) ++{ ++ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; ++ struct enc_region *region; ++ int ret = 0; ++ ++ if (!sev_guest(kvm)) ++ return -ENOTTY; ++ ++ if (range->addr > ULONG_MAX || range->size > ULONG_MAX) ++ return -EINVAL; ++ ++ region = kzalloc(sizeof(*region), GFP_KERNEL); ++ if (!region) ++ return -ENOMEM; ++ ++ region->pages = sev_pin_memory(kvm, range->addr, range->size, ®ion->npages, 1); ++ if (!region->pages) { ++ ret = -ENOMEM; ++ goto e_free; ++ } ++ ++ /* ++ * The guest may change the memory encryption attribute from C=0 -> C=1 ++ * or vice versa for this memory range. Lets make sure caches are ++ * flushed to ensure that guest data gets written into memory with ++ * correct C-bit. ++ */ ++ sev_clflush_pages(region->pages, region->npages); ++ ++ region->uaddr = range->addr; ++ region->size = range->size; ++ ++ mutex_lock(&kvm->lock); ++ list_add_tail(®ion->list, &sev->regions_list); ++ mutex_unlock(&kvm->lock); ++ ++ return ret; ++ ++e_free: ++ kfree(region); ++ return ret; ++} ++ ++static struct enc_region * ++find_enc_region(struct kvm *kvm, struct kvm_enc_region *range) ++{ ++ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; ++ struct list_head *head = &sev->regions_list; ++ struct enc_region *i; ++ ++ list_for_each_entry(i, head, list) { ++ if (i->uaddr == range->addr && ++ i->size == range->size) ++ return i; ++ } ++ ++ return NULL; ++} ++ ++ ++static int svm_unregister_enc_region(struct kvm *kvm, ++ struct kvm_enc_region *range) ++{ ++ struct enc_region *region; ++ int ret; ++ ++ mutex_lock(&kvm->lock); ++ ++ if (!sev_guest(kvm)) { ++ ret = -ENOTTY; ++ goto failed; ++ } ++ ++ region = find_enc_region(kvm, range); ++ if (!region) { ++ ret = -EINVAL; ++ goto failed; ++ } ++ ++ __unregister_enc_region_locked(kvm, region); ++ ++ mutex_unlock(&kvm->lock); ++ return 0; ++ ++failed: ++ mutex_unlock(&kvm->lock); ++ return ret; ++} ++ ++static struct kvm_x86_ops svm_x86_ops __ro_after_init = { ++ .cpu_has_kvm_support = has_svm, ++ .disabled_by_bios = is_disabled, ++ .hardware_setup = svm_hardware_setup, ++ .hardware_unsetup = svm_hardware_unsetup, ++ .check_processor_compatibility = svm_check_processor_compat, ++ .hardware_enable = svm_hardware_enable, ++ .hardware_disable = svm_hardware_disable, ++ .cpu_has_accelerated_tpr = svm_cpu_has_accelerated_tpr, ++ .has_emulated_msr = svm_has_emulated_msr, ++ ++ .vcpu_create = svm_create_vcpu, ++ .vcpu_free = svm_free_vcpu, ++ .vcpu_reset = svm_vcpu_reset, ++ ++ .vm_alloc = svm_vm_alloc, ++ .vm_free = svm_vm_free, ++ .vm_init = avic_vm_init, ++ .vm_destroy = svm_vm_destroy, ++ ++ .prepare_guest_switch = svm_prepare_guest_switch, ++ .vcpu_load = svm_vcpu_load, ++ .vcpu_put = svm_vcpu_put, ++ .vcpu_blocking = svm_vcpu_blocking, ++ .vcpu_unblocking = svm_vcpu_unblocking, ++ ++ .update_bp_intercept = update_bp_intercept, ++ .get_msr_feature = svm_get_msr_feature, ++ .get_msr = svm_get_msr, ++ .set_msr = svm_set_msr, ++ .get_segment_base = svm_get_segment_base, ++ .get_segment = svm_get_segment, ++ .set_segment = svm_set_segment, ++ .get_cpl = svm_get_cpl, ++ .get_cs_db_l_bits = kvm_get_cs_db_l_bits, ++ .decache_cr0_guest_bits = svm_decache_cr0_guest_bits, ++ .decache_cr3 = svm_decache_cr3, ++ .decache_cr4_guest_bits = svm_decache_cr4_guest_bits, ++ .set_cr0 = svm_set_cr0, ++ .set_cr3 = svm_set_cr3, ++ .set_cr4 = svm_set_cr4, ++ .set_efer = svm_set_efer, ++ .get_idt = svm_get_idt, ++ .set_idt = svm_set_idt, ++ .get_gdt = svm_get_gdt, ++ .set_gdt = svm_set_gdt, ++ .get_dr6 = svm_get_dr6, ++ .set_dr6 = svm_set_dr6, ++ .set_dr7 = svm_set_dr7, ++ .sync_dirty_debug_regs = svm_sync_dirty_debug_regs, ++ .cache_reg = svm_cache_reg, ++ .get_rflags = svm_get_rflags, ++ .set_rflags = svm_set_rflags, ++ ++ .tlb_flush = svm_flush_tlb, ++ .tlb_flush_gva = svm_flush_tlb_gva, ++ ++ .run = svm_vcpu_run, ++ .handle_exit = handle_exit, ++ .skip_emulated_instruction = skip_emulated_instruction, ++ .set_interrupt_shadow = svm_set_interrupt_shadow, ++ .get_interrupt_shadow = svm_get_interrupt_shadow, ++ .patch_hypercall = svm_patch_hypercall, ++ .set_irq = svm_set_irq, ++ .set_nmi = svm_inject_nmi, ++ .queue_exception = svm_queue_exception, ++ .cancel_injection = svm_cancel_injection, ++ .interrupt_allowed = svm_interrupt_allowed, ++ .nmi_allowed = svm_nmi_allowed, ++ .get_nmi_mask = svm_get_nmi_mask, ++ .set_nmi_mask = svm_set_nmi_mask, ++ .enable_nmi_window = enable_nmi_window, ++ .enable_irq_window = enable_irq_window, ++ .update_cr8_intercept = update_cr8_intercept, ++ .set_virtual_apic_mode = svm_set_virtual_apic_mode, ++ .get_enable_apicv = svm_get_enable_apicv, ++ .refresh_apicv_exec_ctrl = svm_refresh_apicv_exec_ctrl, ++ .load_eoi_exitmap = svm_load_eoi_exitmap, ++ .hwapic_irr_update = svm_hwapic_irr_update, ++ .hwapic_isr_update = svm_hwapic_isr_update, ++ .sync_pir_to_irr = kvm_lapic_find_highest_irr, ++ .apicv_post_state_restore = avic_post_state_restore, ++ ++ .set_tss_addr = svm_set_tss_addr, ++ .set_identity_map_addr = svm_set_identity_map_addr, ++ .get_tdp_level = get_npt_level, ++ .get_mt_mask = svm_get_mt_mask, ++ ++ .get_exit_info = svm_get_exit_info, ++ ++ .get_lpage_level = svm_get_lpage_level, ++ ++ .cpuid_update = svm_cpuid_update, ++ ++ .rdtscp_supported = svm_rdtscp_supported, ++ .invpcid_supported = svm_invpcid_supported, ++ .mpx_supported = svm_mpx_supported, ++ .xsaves_supported = svm_xsaves_supported, ++ .umip_emulated = svm_umip_emulated, ++ ++ .set_supported_cpuid = svm_set_supported_cpuid, ++ ++ .has_wbinvd_exit = svm_has_wbinvd_exit, ++ ++ .read_l1_tsc_offset = svm_read_l1_tsc_offset, ++ .write_l1_tsc_offset = svm_write_l1_tsc_offset, ++ ++ .set_tdp_cr3 = set_tdp_cr3, ++ ++ .check_intercept = svm_check_intercept, ++ .handle_external_intr = svm_handle_external_intr, ++ ++ .request_immediate_exit = __kvm_request_immediate_exit, ++ ++ .sched_in = svm_sched_in, ++ ++ .pmu_ops = &amd_pmu_ops, ++ .deliver_posted_interrupt = svm_deliver_avic_intr, ++ .dy_apicv_has_pending_interrupt = svm_dy_apicv_has_pending_interrupt, ++ .update_pi_irte = svm_update_pi_irte, ++ .setup_mce = svm_setup_mce, ++ ++ .smi_allowed = svm_smi_allowed, ++ .pre_enter_smm = svm_pre_enter_smm, ++ .pre_leave_smm = svm_pre_leave_smm, ++ .enable_smi_window = enable_smi_window, ++ ++ .mem_enc_op = svm_mem_enc_op, ++ .mem_enc_reg_region = svm_register_enc_region, ++ .mem_enc_unreg_region = svm_unregister_enc_region, ++}; ++ ++static int __init svm_init(void) ++{ ++ return kvm_init(&svm_x86_ops, sizeof(struct vcpu_svm), ++ __alignof__(struct vcpu_svm), THIS_MODULE); ++} ++ ++static void __exit svm_exit(void) ++{ ++ kvm_exit(); ++} ++ ++module_init(svm_init) ++module_exit(svm_exit) +diff -uprN kernel/arch/x86/kvm/vmx.c kernel_new/arch/x86/kvm/vmx.c +--- kernel/arch/x86/kvm/vmx.c 2020-12-21 21:59:17.000000000 +0800 ++++ kernel_new/arch/x86/kvm/vmx.c 2021-04-01 18:28:07.658863284 +0800 +@@ -2986,19 +2986,23 @@ static void vmx_prepare_switch_to_host(s + #ifdef CONFIG_X86_64 + static u64 vmx_read_guest_kernel_gs_base(struct vcpu_vmx *vmx) + { +- preempt_disable(); ++ unsigned long flags; ++ ++ flags = hard_preempt_disable(); + if (vmx->loaded_cpu_state) + rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); +- preempt_enable(); ++ hard_preempt_enable(flags); + return vmx->msr_guest_kernel_gs_base; + } + + static void vmx_write_guest_kernel_gs_base(struct vcpu_vmx *vmx, u64 data) + { +- preempt_disable(); ++ unsigned long flags; ++ ++ flags = hard_preempt_disable(); + if (vmx->loaded_cpu_state) + wrmsrl(MSR_KERNEL_GS_BASE, data); +- preempt_enable(); ++ hard_preempt_enable(flags); + vmx->msr_guest_kernel_gs_base = data; + } + #endif +@@ -3392,6 +3396,7 @@ static void setup_msrs(struct vcpu_vmx * + { + int save_nmsrs, index; + ++ hard_cond_local_irq_disable(); + save_nmsrs = 0; + #ifdef CONFIG_X86_64 + if (is_long_mode(&vmx->vcpu)) { +@@ -3422,6 +3427,7 @@ static void setup_msrs(struct vcpu_vmx * + + vmx->save_nmsrs = save_nmsrs; + vmx->guest_msrs_dirty = true; ++ hard_cond_local_irq_enable(); + + if (cpu_has_vmx_msr_bitmap()) + vmx_update_msr_bitmap(&vmx->vcpu); +@@ -4329,9 +4335,22 @@ static int vmx_set_msr(struct kvm_vcpu * + u64 old_msr_data = msr->data; + msr->data = data; + if (msr - vmx->guest_msrs < vmx->save_nmsrs) { ++ unsigned long flags; ++ + preempt_disable(); ++ flags = hard_cond_local_irq_save(); ++ /* ++ * This may be called without a ipipe notifier ++ * registered, i.e. outside of vcpu_run. In ++ * that case, shared MSRs may be set to guest ++ * state while I-pipe will have no chance to ++ * restore them when interrupting afterwards. ++ * Therefore register the notifier. ++ */ ++ __ipipe_enter_vm(&vcpu->ipipe_notifier); + ret = kvm_set_shared_msr(msr->index, msr->data, + msr->mask); ++ hard_cond_local_irq_restore(flags); + preempt_enable(); + if (ret) + msr->data = old_msr_data; +@@ -11113,7 +11132,9 @@ static struct kvm_vcpu *vmx_create_vcpu( + vmx_vcpu_load(&vmx->vcpu, cpu); + vmx->vcpu.cpu = cpu; + vmx_vcpu_setup(vmx); ++ hard_cond_local_irq_disable(); + vmx_vcpu_put(&vmx->vcpu); ++ hard_cond_local_irq_enable(); + put_cpu(); + if (cpu_need_virtualize_apic_accesses(&vmx->vcpu)) { + err = alloc_apic_access_page(kvm); +diff -uprN kernel/arch/x86/kvm/vmx.c.orig kernel_new/arch/x86/kvm/vmx.c.orig +--- kernel/arch/x86/kvm/vmx.c.orig 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/arch/x86/kvm/vmx.c.orig 2020-12-21 21:59:17.000000000 +0800 +@@ -0,0 +1,14627 @@ ++/* ++ * Kernel-based Virtual Machine driver for Linux ++ * ++ * This module enables machines with Intel VT-x extensions to run virtual ++ * machines without emulation or binary translation. ++ * ++ * Copyright (C) 2006 Qumranet, Inc. ++ * Copyright 2010 Red Hat, Inc. and/or its affiliates. ++ * ++ * Authors: ++ * Avi Kivity ++ * Yaniv Kamay ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2. See ++ * the COPYING file in the top-level directory. ++ * ++ */ ++ ++#include "irq.h" ++#include "mmu.h" ++#include "cpuid.h" ++#include "lapic.h" ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "kvm_cache_regs.h" ++#include "x86.h" ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "trace.h" ++#include "pmu.h" ++#include "vmx_evmcs.h" ++ ++#define __ex(x) __kvm_handle_fault_on_reboot(x) ++#define __ex_clear(x, reg) \ ++ ____kvm_handle_fault_on_reboot(x, "xor " reg " , " reg) ++ ++MODULE_AUTHOR("Qumranet"); ++MODULE_LICENSE("GPL"); ++ ++static const struct x86_cpu_id vmx_cpu_id[] = { ++ X86_FEATURE_MATCH(X86_FEATURE_VMX), ++ {} ++}; ++MODULE_DEVICE_TABLE(x86cpu, vmx_cpu_id); ++ ++static bool __read_mostly enable_vpid = 1; ++module_param_named(vpid, enable_vpid, bool, 0444); ++ ++static bool __read_mostly enable_vnmi = 1; ++module_param_named(vnmi, enable_vnmi, bool, S_IRUGO); ++ ++static bool __read_mostly flexpriority_enabled = 1; ++module_param_named(flexpriority, flexpriority_enabled, bool, S_IRUGO); ++ ++static bool __read_mostly enable_ept = 1; ++module_param_named(ept, enable_ept, bool, S_IRUGO); ++ ++static bool __read_mostly enable_unrestricted_guest = 1; ++module_param_named(unrestricted_guest, ++ enable_unrestricted_guest, bool, S_IRUGO); ++ ++static bool __read_mostly enable_ept_ad_bits = 1; ++module_param_named(eptad, enable_ept_ad_bits, bool, S_IRUGO); ++ ++static bool __read_mostly emulate_invalid_guest_state = true; ++module_param(emulate_invalid_guest_state, bool, S_IRUGO); ++ ++static bool __read_mostly fasteoi = 1; ++module_param(fasteoi, bool, S_IRUGO); ++ ++static bool __read_mostly enable_apicv = 1; ++module_param(enable_apicv, bool, S_IRUGO); ++ ++static bool __read_mostly enable_shadow_vmcs = 1; ++module_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO); ++/* ++ * If nested=1, nested virtualization is supported, i.e., guests may use ++ * VMX and be a hypervisor for its own guests. If nested=0, guests may not ++ * use VMX instructions. ++ */ ++static bool __read_mostly nested = 0; ++module_param(nested, bool, S_IRUGO); ++ ++static u64 __read_mostly host_xss; ++ ++static bool __read_mostly enable_pml = 1; ++module_param_named(pml, enable_pml, bool, S_IRUGO); ++ ++#define MSR_TYPE_R 1 ++#define MSR_TYPE_W 2 ++#define MSR_TYPE_RW 3 ++ ++#define MSR_BITMAP_MODE_X2APIC 1 ++#define MSR_BITMAP_MODE_X2APIC_APICV 2 ++ ++#define KVM_VMX_TSC_MULTIPLIER_MAX 0xffffffffffffffffULL ++ ++/* Guest_tsc -> host_tsc conversion requires 64-bit division. */ ++static int __read_mostly cpu_preemption_timer_multi; ++static bool __read_mostly enable_preemption_timer = 1; ++#ifdef CONFIG_X86_64 ++module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO); ++#endif ++ ++#define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD) ++#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR0_NE ++#define KVM_VM_CR0_ALWAYS_ON \ ++ (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | \ ++ X86_CR0_WP | X86_CR0_PG | X86_CR0_PE) ++#define KVM_CR4_GUEST_OWNED_BITS \ ++ (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \ ++ | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_TSD) ++ ++#define KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR4_VMXE ++#define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE) ++#define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE) ++ ++#define RMODE_GUEST_OWNED_EFLAGS_BITS (~(X86_EFLAGS_IOPL | X86_EFLAGS_VM)) ++ ++#define VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE 5 ++ ++/* ++ * Hyper-V requires all of these, so mark them as supported even though ++ * they are just treated the same as all-context. ++ */ ++#define VMX_VPID_EXTENT_SUPPORTED_MASK \ ++ (VMX_VPID_EXTENT_INDIVIDUAL_ADDR_BIT | \ ++ VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT | \ ++ VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT | \ ++ VMX_VPID_EXTENT_SINGLE_NON_GLOBAL_BIT) ++ ++/* ++ * These 2 parameters are used to config the controls for Pause-Loop Exiting: ++ * ple_gap: upper bound on the amount of time between two successive ++ * executions of PAUSE in a loop. Also indicate if ple enabled. ++ * According to test, this time is usually smaller than 128 cycles. ++ * ple_window: upper bound on the amount of time a guest is allowed to execute ++ * in a PAUSE loop. Tests indicate that most spinlocks are held for ++ * less than 2^12 cycles ++ * Time is measured based on a counter that runs at the same rate as the TSC, ++ * refer SDM volume 3b section 21.6.13 & 22.1.3. ++ */ ++static unsigned int ple_gap = KVM_DEFAULT_PLE_GAP; ++module_param(ple_gap, uint, 0444); ++ ++static unsigned int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW; ++module_param(ple_window, uint, 0444); ++ ++/* Default doubles per-vcpu window every exit. */ ++static unsigned int ple_window_grow = KVM_DEFAULT_PLE_WINDOW_GROW; ++module_param(ple_window_grow, uint, 0444); ++ ++/* Default resets per-vcpu window every exit to ple_window. */ ++static unsigned int ple_window_shrink = KVM_DEFAULT_PLE_WINDOW_SHRINK; ++module_param(ple_window_shrink, uint, 0444); ++ ++/* Default is to compute the maximum so we can never overflow. */ ++static unsigned int ple_window_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX; ++module_param(ple_window_max, uint, 0444); ++ ++extern const ulong vmx_return; ++ ++static DEFINE_STATIC_KEY_FALSE(vmx_l1d_should_flush); ++static DEFINE_STATIC_KEY_FALSE(vmx_l1d_flush_cond); ++static DEFINE_MUTEX(vmx_l1d_flush_mutex); ++ ++/* Storage for pre module init parameter parsing */ ++static enum vmx_l1d_flush_state __read_mostly vmentry_l1d_flush_param = VMENTER_L1D_FLUSH_AUTO; ++ ++static const struct { ++ const char *option; ++ bool for_parse; ++} vmentry_l1d_param[] = { ++ [VMENTER_L1D_FLUSH_AUTO] = {"auto", true}, ++ [VMENTER_L1D_FLUSH_NEVER] = {"never", true}, ++ [VMENTER_L1D_FLUSH_COND] = {"cond", true}, ++ [VMENTER_L1D_FLUSH_ALWAYS] = {"always", true}, ++ [VMENTER_L1D_FLUSH_EPT_DISABLED] = {"EPT disabled", false}, ++ [VMENTER_L1D_FLUSH_NOT_REQUIRED] = {"not required", false}, ++}; ++ ++#define L1D_CACHE_ORDER 4 ++static void *vmx_l1d_flush_pages; ++ ++static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf) ++{ ++ struct page *page; ++ unsigned int i; ++ ++ if (!enable_ept) { ++ l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_EPT_DISABLED; ++ return 0; ++ } ++ ++ if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) { ++ u64 msr; ++ ++ rdmsrl(MSR_IA32_ARCH_CAPABILITIES, msr); ++ if (msr & ARCH_CAP_SKIP_VMENTRY_L1DFLUSH) { ++ l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED; ++ return 0; ++ } ++ } ++ ++ /* If set to auto use the default l1tf mitigation method */ ++ if (l1tf == VMENTER_L1D_FLUSH_AUTO) { ++ switch (l1tf_mitigation) { ++ case L1TF_MITIGATION_OFF: ++ l1tf = VMENTER_L1D_FLUSH_NEVER; ++ break; ++ case L1TF_MITIGATION_FLUSH_NOWARN: ++ case L1TF_MITIGATION_FLUSH: ++ case L1TF_MITIGATION_FLUSH_NOSMT: ++ l1tf = VMENTER_L1D_FLUSH_COND; ++ break; ++ case L1TF_MITIGATION_FULL: ++ case L1TF_MITIGATION_FULL_FORCE: ++ l1tf = VMENTER_L1D_FLUSH_ALWAYS; ++ break; ++ } ++ } else if (l1tf_mitigation == L1TF_MITIGATION_FULL_FORCE) { ++ l1tf = VMENTER_L1D_FLUSH_ALWAYS; ++ } ++ ++ if (l1tf != VMENTER_L1D_FLUSH_NEVER && !vmx_l1d_flush_pages && ++ !boot_cpu_has(X86_FEATURE_FLUSH_L1D)) { ++ page = alloc_pages(GFP_KERNEL, L1D_CACHE_ORDER); ++ if (!page) ++ return -ENOMEM; ++ vmx_l1d_flush_pages = page_address(page); ++ ++ /* ++ * Initialize each page with a different pattern in ++ * order to protect against KSM in the nested ++ * virtualization case. ++ */ ++ for (i = 0; i < 1u << L1D_CACHE_ORDER; ++i) { ++ memset(vmx_l1d_flush_pages + i * PAGE_SIZE, i + 1, ++ PAGE_SIZE); ++ } ++ } ++ ++ l1tf_vmx_mitigation = l1tf; ++ ++ if (l1tf != VMENTER_L1D_FLUSH_NEVER) ++ static_branch_enable(&vmx_l1d_should_flush); ++ else ++ static_branch_disable(&vmx_l1d_should_flush); ++ ++ if (l1tf == VMENTER_L1D_FLUSH_COND) ++ static_branch_enable(&vmx_l1d_flush_cond); ++ else ++ static_branch_disable(&vmx_l1d_flush_cond); ++ return 0; ++} ++ ++static int vmentry_l1d_flush_parse(const char *s) ++{ ++ unsigned int i; ++ ++ if (s) { ++ for (i = 0; i < ARRAY_SIZE(vmentry_l1d_param); i++) { ++ if (vmentry_l1d_param[i].for_parse && ++ sysfs_streq(s, vmentry_l1d_param[i].option)) ++ return i; ++ } ++ } ++ return -EINVAL; ++} ++ ++static int vmentry_l1d_flush_set(const char *s, const struct kernel_param *kp) ++{ ++ int l1tf, ret; ++ ++ l1tf = vmentry_l1d_flush_parse(s); ++ if (l1tf < 0) ++ return l1tf; ++ ++ if (!boot_cpu_has(X86_BUG_L1TF)) ++ return 0; ++ ++ /* ++ * Has vmx_init() run already? If not then this is the pre init ++ * parameter parsing. In that case just store the value and let ++ * vmx_init() do the proper setup after enable_ept has been ++ * established. ++ */ ++ if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_AUTO) { ++ vmentry_l1d_flush_param = l1tf; ++ return 0; ++ } ++ ++ mutex_lock(&vmx_l1d_flush_mutex); ++ ret = vmx_setup_l1d_flush(l1tf); ++ mutex_unlock(&vmx_l1d_flush_mutex); ++ return ret; ++} ++ ++static int vmentry_l1d_flush_get(char *s, const struct kernel_param *kp) ++{ ++ if (WARN_ON_ONCE(l1tf_vmx_mitigation >= ARRAY_SIZE(vmentry_l1d_param))) ++ return sprintf(s, "???\n"); ++ ++ return sprintf(s, "%s\n", vmentry_l1d_param[l1tf_vmx_mitigation].option); ++} ++ ++static const struct kernel_param_ops vmentry_l1d_flush_ops = { ++ .set = vmentry_l1d_flush_set, ++ .get = vmentry_l1d_flush_get, ++}; ++module_param_cb(vmentry_l1d_flush, &vmentry_l1d_flush_ops, NULL, 0644); ++ ++enum ept_pointers_status { ++ EPT_POINTERS_CHECK = 0, ++ EPT_POINTERS_MATCH = 1, ++ EPT_POINTERS_MISMATCH = 2 ++}; ++ ++struct kvm_vmx { ++ struct kvm kvm; ++ ++ unsigned int tss_addr; ++ bool ept_identity_pagetable_done; ++ gpa_t ept_identity_map_addr; ++ ++ enum ept_pointers_status ept_pointers_match; ++ spinlock_t ept_pointer_lock; ++}; ++ ++#define NR_AUTOLOAD_MSRS 8 ++ ++struct vmcs_hdr { ++ u32 revision_id:31; ++ u32 shadow_vmcs:1; ++}; ++ ++struct vmcs { ++ struct vmcs_hdr hdr; ++ u32 abort; ++ char data[0]; ++}; ++ ++/* ++ * vmcs_host_state tracks registers that are loaded from the VMCS on VMEXIT ++ * and whose values change infrequently, but are not constant. I.e. this is ++ * used as a write-through cache of the corresponding VMCS fields. ++ */ ++struct vmcs_host_state { ++ unsigned long cr3; /* May not match real cr3 */ ++ unsigned long cr4; /* May not match real cr4 */ ++ unsigned long gs_base; ++ unsigned long fs_base; ++ ++ u16 fs_sel, gs_sel, ldt_sel; ++#ifdef CONFIG_X86_64 ++ u16 ds_sel, es_sel; ++#endif ++}; ++ ++/* ++ * Track a VMCS that may be loaded on a certain CPU. If it is (cpu!=-1), also ++ * remember whether it was VMLAUNCHed, and maintain a linked list of all VMCSs ++ * loaded on this CPU (so we can clear them if the CPU goes down). ++ */ ++struct loaded_vmcs { ++ struct vmcs *vmcs; ++ struct vmcs *shadow_vmcs; ++ int cpu; ++ bool launched; ++ bool nmi_known_unmasked; ++ bool hv_timer_armed; ++ /* Support for vnmi-less CPUs */ ++ int soft_vnmi_blocked; ++ ktime_t entry_time; ++ s64 vnmi_blocked_time; ++ unsigned long *msr_bitmap; ++ struct list_head loaded_vmcss_on_cpu_link; ++ struct vmcs_host_state host_state; ++}; ++ ++struct shared_msr_entry { ++ unsigned index; ++ u64 data; ++ u64 mask; ++}; ++ ++/* ++ * struct vmcs12 describes the state that our guest hypervisor (L1) keeps for a ++ * single nested guest (L2), hence the name vmcs12. Any VMX implementation has ++ * a VMCS structure, and vmcs12 is our emulated VMX's VMCS. This structure is ++ * stored in guest memory specified by VMPTRLD, but is opaque to the guest, ++ * which must access it using VMREAD/VMWRITE/VMCLEAR instructions. ++ * More than one of these structures may exist, if L1 runs multiple L2 guests. ++ * nested_vmx_run() will use the data here to build the vmcs02: a VMCS for the ++ * underlying hardware which will be used to run L2. ++ * This structure is packed to ensure that its layout is identical across ++ * machines (necessary for live migration). ++ * ++ * IMPORTANT: Changing the layout of existing fields in this structure ++ * will break save/restore compatibility with older kvm releases. When ++ * adding new fields, either use space in the reserved padding* arrays ++ * or add the new fields to the end of the structure. ++ */ ++typedef u64 natural_width; ++struct __packed vmcs12 { ++ /* According to the Intel spec, a VMCS region must start with the ++ * following two fields. Then follow implementation-specific data. ++ */ ++ struct vmcs_hdr hdr; ++ u32 abort; ++ ++ u32 launch_state; /* set to 0 by VMCLEAR, to 1 by VMLAUNCH */ ++ u32 padding[7]; /* room for future expansion */ ++ ++ u64 io_bitmap_a; ++ u64 io_bitmap_b; ++ u64 msr_bitmap; ++ u64 vm_exit_msr_store_addr; ++ u64 vm_exit_msr_load_addr; ++ u64 vm_entry_msr_load_addr; ++ u64 tsc_offset; ++ u64 virtual_apic_page_addr; ++ u64 apic_access_addr; ++ u64 posted_intr_desc_addr; ++ u64 ept_pointer; ++ u64 eoi_exit_bitmap0; ++ u64 eoi_exit_bitmap1; ++ u64 eoi_exit_bitmap2; ++ u64 eoi_exit_bitmap3; ++ u64 xss_exit_bitmap; ++ u64 guest_physical_address; ++ u64 vmcs_link_pointer; ++ u64 guest_ia32_debugctl; ++ u64 guest_ia32_pat; ++ u64 guest_ia32_efer; ++ u64 guest_ia32_perf_global_ctrl; ++ u64 guest_pdptr0; ++ u64 guest_pdptr1; ++ u64 guest_pdptr2; ++ u64 guest_pdptr3; ++ u64 guest_bndcfgs; ++ u64 host_ia32_pat; ++ u64 host_ia32_efer; ++ u64 host_ia32_perf_global_ctrl; ++ u64 vmread_bitmap; ++ u64 vmwrite_bitmap; ++ u64 vm_function_control; ++ u64 eptp_list_address; ++ u64 pml_address; ++ u64 padding64[3]; /* room for future expansion */ ++ /* ++ * To allow migration of L1 (complete with its L2 guests) between ++ * machines of different natural widths (32 or 64 bit), we cannot have ++ * unsigned long fields with no explict size. We use u64 (aliased ++ * natural_width) instead. Luckily, x86 is little-endian. ++ */ ++ natural_width cr0_guest_host_mask; ++ natural_width cr4_guest_host_mask; ++ natural_width cr0_read_shadow; ++ natural_width cr4_read_shadow; ++ natural_width cr3_target_value0; ++ natural_width cr3_target_value1; ++ natural_width cr3_target_value2; ++ natural_width cr3_target_value3; ++ natural_width exit_qualification; ++ natural_width guest_linear_address; ++ natural_width guest_cr0; ++ natural_width guest_cr3; ++ natural_width guest_cr4; ++ natural_width guest_es_base; ++ natural_width guest_cs_base; ++ natural_width guest_ss_base; ++ natural_width guest_ds_base; ++ natural_width guest_fs_base; ++ natural_width guest_gs_base; ++ natural_width guest_ldtr_base; ++ natural_width guest_tr_base; ++ natural_width guest_gdtr_base; ++ natural_width guest_idtr_base; ++ natural_width guest_dr7; ++ natural_width guest_rsp; ++ natural_width guest_rip; ++ natural_width guest_rflags; ++ natural_width guest_pending_dbg_exceptions; ++ natural_width guest_sysenter_esp; ++ natural_width guest_sysenter_eip; ++ natural_width host_cr0; ++ natural_width host_cr3; ++ natural_width host_cr4; ++ natural_width host_fs_base; ++ natural_width host_gs_base; ++ natural_width host_tr_base; ++ natural_width host_gdtr_base; ++ natural_width host_idtr_base; ++ natural_width host_ia32_sysenter_esp; ++ natural_width host_ia32_sysenter_eip; ++ natural_width host_rsp; ++ natural_width host_rip; ++ natural_width paddingl[8]; /* room for future expansion */ ++ u32 pin_based_vm_exec_control; ++ u32 cpu_based_vm_exec_control; ++ u32 exception_bitmap; ++ u32 page_fault_error_code_mask; ++ u32 page_fault_error_code_match; ++ u32 cr3_target_count; ++ u32 vm_exit_controls; ++ u32 vm_exit_msr_store_count; ++ u32 vm_exit_msr_load_count; ++ u32 vm_entry_controls; ++ u32 vm_entry_msr_load_count; ++ u32 vm_entry_intr_info_field; ++ u32 vm_entry_exception_error_code; ++ u32 vm_entry_instruction_len; ++ u32 tpr_threshold; ++ u32 secondary_vm_exec_control; ++ u32 vm_instruction_error; ++ u32 vm_exit_reason; ++ u32 vm_exit_intr_info; ++ u32 vm_exit_intr_error_code; ++ u32 idt_vectoring_info_field; ++ u32 idt_vectoring_error_code; ++ u32 vm_exit_instruction_len; ++ u32 vmx_instruction_info; ++ u32 guest_es_limit; ++ u32 guest_cs_limit; ++ u32 guest_ss_limit; ++ u32 guest_ds_limit; ++ u32 guest_fs_limit; ++ u32 guest_gs_limit; ++ u32 guest_ldtr_limit; ++ u32 guest_tr_limit; ++ u32 guest_gdtr_limit; ++ u32 guest_idtr_limit; ++ u32 guest_es_ar_bytes; ++ u32 guest_cs_ar_bytes; ++ u32 guest_ss_ar_bytes; ++ u32 guest_ds_ar_bytes; ++ u32 guest_fs_ar_bytes; ++ u32 guest_gs_ar_bytes; ++ u32 guest_ldtr_ar_bytes; ++ u32 guest_tr_ar_bytes; ++ u32 guest_interruptibility_info; ++ u32 guest_activity_state; ++ u32 guest_sysenter_cs; ++ u32 host_ia32_sysenter_cs; ++ u32 vmx_preemption_timer_value; ++ u32 padding32[7]; /* room for future expansion */ ++ u16 virtual_processor_id; ++ u16 posted_intr_nv; ++ u16 guest_es_selector; ++ u16 guest_cs_selector; ++ u16 guest_ss_selector; ++ u16 guest_ds_selector; ++ u16 guest_fs_selector; ++ u16 guest_gs_selector; ++ u16 guest_ldtr_selector; ++ u16 guest_tr_selector; ++ u16 guest_intr_status; ++ u16 host_es_selector; ++ u16 host_cs_selector; ++ u16 host_ss_selector; ++ u16 host_ds_selector; ++ u16 host_fs_selector; ++ u16 host_gs_selector; ++ u16 host_tr_selector; ++ u16 guest_pml_index; ++}; ++ ++/* ++ * For save/restore compatibility, the vmcs12 field offsets must not change. ++ */ ++#define CHECK_OFFSET(field, loc) \ ++ BUILD_BUG_ON_MSG(offsetof(struct vmcs12, field) != (loc), \ ++ "Offset of " #field " in struct vmcs12 has changed.") ++ ++static inline void vmx_check_vmcs12_offsets(void) { ++ CHECK_OFFSET(hdr, 0); ++ CHECK_OFFSET(abort, 4); ++ CHECK_OFFSET(launch_state, 8); ++ CHECK_OFFSET(io_bitmap_a, 40); ++ CHECK_OFFSET(io_bitmap_b, 48); ++ CHECK_OFFSET(msr_bitmap, 56); ++ CHECK_OFFSET(vm_exit_msr_store_addr, 64); ++ CHECK_OFFSET(vm_exit_msr_load_addr, 72); ++ CHECK_OFFSET(vm_entry_msr_load_addr, 80); ++ CHECK_OFFSET(tsc_offset, 88); ++ CHECK_OFFSET(virtual_apic_page_addr, 96); ++ CHECK_OFFSET(apic_access_addr, 104); ++ CHECK_OFFSET(posted_intr_desc_addr, 112); ++ CHECK_OFFSET(ept_pointer, 120); ++ CHECK_OFFSET(eoi_exit_bitmap0, 128); ++ CHECK_OFFSET(eoi_exit_bitmap1, 136); ++ CHECK_OFFSET(eoi_exit_bitmap2, 144); ++ CHECK_OFFSET(eoi_exit_bitmap3, 152); ++ CHECK_OFFSET(xss_exit_bitmap, 160); ++ CHECK_OFFSET(guest_physical_address, 168); ++ CHECK_OFFSET(vmcs_link_pointer, 176); ++ CHECK_OFFSET(guest_ia32_debugctl, 184); ++ CHECK_OFFSET(guest_ia32_pat, 192); ++ CHECK_OFFSET(guest_ia32_efer, 200); ++ CHECK_OFFSET(guest_ia32_perf_global_ctrl, 208); ++ CHECK_OFFSET(guest_pdptr0, 216); ++ CHECK_OFFSET(guest_pdptr1, 224); ++ CHECK_OFFSET(guest_pdptr2, 232); ++ CHECK_OFFSET(guest_pdptr3, 240); ++ CHECK_OFFSET(guest_bndcfgs, 248); ++ CHECK_OFFSET(host_ia32_pat, 256); ++ CHECK_OFFSET(host_ia32_efer, 264); ++ CHECK_OFFSET(host_ia32_perf_global_ctrl, 272); ++ CHECK_OFFSET(vmread_bitmap, 280); ++ CHECK_OFFSET(vmwrite_bitmap, 288); ++ CHECK_OFFSET(vm_function_control, 296); ++ CHECK_OFFSET(eptp_list_address, 304); ++ CHECK_OFFSET(pml_address, 312); ++ CHECK_OFFSET(cr0_guest_host_mask, 344); ++ CHECK_OFFSET(cr4_guest_host_mask, 352); ++ CHECK_OFFSET(cr0_read_shadow, 360); ++ CHECK_OFFSET(cr4_read_shadow, 368); ++ CHECK_OFFSET(cr3_target_value0, 376); ++ CHECK_OFFSET(cr3_target_value1, 384); ++ CHECK_OFFSET(cr3_target_value2, 392); ++ CHECK_OFFSET(cr3_target_value3, 400); ++ CHECK_OFFSET(exit_qualification, 408); ++ CHECK_OFFSET(guest_linear_address, 416); ++ CHECK_OFFSET(guest_cr0, 424); ++ CHECK_OFFSET(guest_cr3, 432); ++ CHECK_OFFSET(guest_cr4, 440); ++ CHECK_OFFSET(guest_es_base, 448); ++ CHECK_OFFSET(guest_cs_base, 456); ++ CHECK_OFFSET(guest_ss_base, 464); ++ CHECK_OFFSET(guest_ds_base, 472); ++ CHECK_OFFSET(guest_fs_base, 480); ++ CHECK_OFFSET(guest_gs_base, 488); ++ CHECK_OFFSET(guest_ldtr_base, 496); ++ CHECK_OFFSET(guest_tr_base, 504); ++ CHECK_OFFSET(guest_gdtr_base, 512); ++ CHECK_OFFSET(guest_idtr_base, 520); ++ CHECK_OFFSET(guest_dr7, 528); ++ CHECK_OFFSET(guest_rsp, 536); ++ CHECK_OFFSET(guest_rip, 544); ++ CHECK_OFFSET(guest_rflags, 552); ++ CHECK_OFFSET(guest_pending_dbg_exceptions, 560); ++ CHECK_OFFSET(guest_sysenter_esp, 568); ++ CHECK_OFFSET(guest_sysenter_eip, 576); ++ CHECK_OFFSET(host_cr0, 584); ++ CHECK_OFFSET(host_cr3, 592); ++ CHECK_OFFSET(host_cr4, 600); ++ CHECK_OFFSET(host_fs_base, 608); ++ CHECK_OFFSET(host_gs_base, 616); ++ CHECK_OFFSET(host_tr_base, 624); ++ CHECK_OFFSET(host_gdtr_base, 632); ++ CHECK_OFFSET(host_idtr_base, 640); ++ CHECK_OFFSET(host_ia32_sysenter_esp, 648); ++ CHECK_OFFSET(host_ia32_sysenter_eip, 656); ++ CHECK_OFFSET(host_rsp, 664); ++ CHECK_OFFSET(host_rip, 672); ++ CHECK_OFFSET(pin_based_vm_exec_control, 744); ++ CHECK_OFFSET(cpu_based_vm_exec_control, 748); ++ CHECK_OFFSET(exception_bitmap, 752); ++ CHECK_OFFSET(page_fault_error_code_mask, 756); ++ CHECK_OFFSET(page_fault_error_code_match, 760); ++ CHECK_OFFSET(cr3_target_count, 764); ++ CHECK_OFFSET(vm_exit_controls, 768); ++ CHECK_OFFSET(vm_exit_msr_store_count, 772); ++ CHECK_OFFSET(vm_exit_msr_load_count, 776); ++ CHECK_OFFSET(vm_entry_controls, 780); ++ CHECK_OFFSET(vm_entry_msr_load_count, 784); ++ CHECK_OFFSET(vm_entry_intr_info_field, 788); ++ CHECK_OFFSET(vm_entry_exception_error_code, 792); ++ CHECK_OFFSET(vm_entry_instruction_len, 796); ++ CHECK_OFFSET(tpr_threshold, 800); ++ CHECK_OFFSET(secondary_vm_exec_control, 804); ++ CHECK_OFFSET(vm_instruction_error, 808); ++ CHECK_OFFSET(vm_exit_reason, 812); ++ CHECK_OFFSET(vm_exit_intr_info, 816); ++ CHECK_OFFSET(vm_exit_intr_error_code, 820); ++ CHECK_OFFSET(idt_vectoring_info_field, 824); ++ CHECK_OFFSET(idt_vectoring_error_code, 828); ++ CHECK_OFFSET(vm_exit_instruction_len, 832); ++ CHECK_OFFSET(vmx_instruction_info, 836); ++ CHECK_OFFSET(guest_es_limit, 840); ++ CHECK_OFFSET(guest_cs_limit, 844); ++ CHECK_OFFSET(guest_ss_limit, 848); ++ CHECK_OFFSET(guest_ds_limit, 852); ++ CHECK_OFFSET(guest_fs_limit, 856); ++ CHECK_OFFSET(guest_gs_limit, 860); ++ CHECK_OFFSET(guest_ldtr_limit, 864); ++ CHECK_OFFSET(guest_tr_limit, 868); ++ CHECK_OFFSET(guest_gdtr_limit, 872); ++ CHECK_OFFSET(guest_idtr_limit, 876); ++ CHECK_OFFSET(guest_es_ar_bytes, 880); ++ CHECK_OFFSET(guest_cs_ar_bytes, 884); ++ CHECK_OFFSET(guest_ss_ar_bytes, 888); ++ CHECK_OFFSET(guest_ds_ar_bytes, 892); ++ CHECK_OFFSET(guest_fs_ar_bytes, 896); ++ CHECK_OFFSET(guest_gs_ar_bytes, 900); ++ CHECK_OFFSET(guest_ldtr_ar_bytes, 904); ++ CHECK_OFFSET(guest_tr_ar_bytes, 908); ++ CHECK_OFFSET(guest_interruptibility_info, 912); ++ CHECK_OFFSET(guest_activity_state, 916); ++ CHECK_OFFSET(guest_sysenter_cs, 920); ++ CHECK_OFFSET(host_ia32_sysenter_cs, 924); ++ CHECK_OFFSET(vmx_preemption_timer_value, 928); ++ CHECK_OFFSET(virtual_processor_id, 960); ++ CHECK_OFFSET(posted_intr_nv, 962); ++ CHECK_OFFSET(guest_es_selector, 964); ++ CHECK_OFFSET(guest_cs_selector, 966); ++ CHECK_OFFSET(guest_ss_selector, 968); ++ CHECK_OFFSET(guest_ds_selector, 970); ++ CHECK_OFFSET(guest_fs_selector, 972); ++ CHECK_OFFSET(guest_gs_selector, 974); ++ CHECK_OFFSET(guest_ldtr_selector, 976); ++ CHECK_OFFSET(guest_tr_selector, 978); ++ CHECK_OFFSET(guest_intr_status, 980); ++ CHECK_OFFSET(host_es_selector, 982); ++ CHECK_OFFSET(host_cs_selector, 984); ++ CHECK_OFFSET(host_ss_selector, 986); ++ CHECK_OFFSET(host_ds_selector, 988); ++ CHECK_OFFSET(host_fs_selector, 990); ++ CHECK_OFFSET(host_gs_selector, 992); ++ CHECK_OFFSET(host_tr_selector, 994); ++ CHECK_OFFSET(guest_pml_index, 996); ++} ++ ++/* ++ * VMCS12_REVISION is an arbitrary id that should be changed if the content or ++ * layout of struct vmcs12 is changed. MSR_IA32_VMX_BASIC returns this id, and ++ * VMPTRLD verifies that the VMCS region that L1 is loading contains this id. ++ * ++ * IMPORTANT: Changing this value will break save/restore compatibility with ++ * older kvm releases. ++ */ ++#define VMCS12_REVISION 0x11e57ed0 ++ ++/* ++ * VMCS12_SIZE is the number of bytes L1 should allocate for the VMXON region ++ * and any VMCS region. Although only sizeof(struct vmcs12) are used by the ++ * current implementation, 4K are reserved to avoid future complications. ++ */ ++#define VMCS12_SIZE 0x1000 ++ ++/* ++ * VMCS12_MAX_FIELD_INDEX is the highest index value used in any ++ * supported VMCS12 field encoding. ++ */ ++#define VMCS12_MAX_FIELD_INDEX 0x17 ++ ++struct nested_vmx_msrs { ++ /* ++ * We only store the "true" versions of the VMX capability MSRs. We ++ * generate the "non-true" versions by setting the must-be-1 bits ++ * according to the SDM. ++ */ ++ u32 procbased_ctls_low; ++ u32 procbased_ctls_high; ++ u32 secondary_ctls_low; ++ u32 secondary_ctls_high; ++ u32 pinbased_ctls_low; ++ u32 pinbased_ctls_high; ++ u32 exit_ctls_low; ++ u32 exit_ctls_high; ++ u32 entry_ctls_low; ++ u32 entry_ctls_high; ++ u32 misc_low; ++ u32 misc_high; ++ u32 ept_caps; ++ u32 vpid_caps; ++ u64 basic; ++ u64 cr0_fixed0; ++ u64 cr0_fixed1; ++ u64 cr4_fixed0; ++ u64 cr4_fixed1; ++ u64 vmcs_enum; ++ u64 vmfunc_controls; ++}; ++ ++/* ++ * The nested_vmx structure is part of vcpu_vmx, and holds information we need ++ * for correct emulation of VMX (i.e., nested VMX) on this vcpu. ++ */ ++struct nested_vmx { ++ /* Has the level1 guest done vmxon? */ ++ bool vmxon; ++ gpa_t vmxon_ptr; ++ bool pml_full; ++ ++ /* The guest-physical address of the current VMCS L1 keeps for L2 */ ++ gpa_t current_vmptr; ++ /* ++ * Cache of the guest's VMCS, existing outside of guest memory. ++ * Loaded from guest memory during VMPTRLD. Flushed to guest ++ * memory during VMCLEAR and VMPTRLD. ++ */ ++ struct vmcs12 *cached_vmcs12; ++ /* ++ * Cache of the guest's shadow VMCS, existing outside of guest ++ * memory. Loaded from guest memory during VM entry. Flushed ++ * to guest memory during VM exit. ++ */ ++ struct vmcs12 *cached_shadow_vmcs12; ++ /* ++ * Indicates if the shadow vmcs must be updated with the ++ * data hold by vmcs12 ++ */ ++ bool sync_shadow_vmcs; ++ bool dirty_vmcs12; ++ ++ bool change_vmcs01_virtual_apic_mode; ++ ++ /* L2 must run next, and mustn't decide to exit to L1. */ ++ bool nested_run_pending; ++ ++ struct loaded_vmcs vmcs02; ++ ++ /* ++ * Guest pages referred to in the vmcs02 with host-physical ++ * pointers, so we must keep them pinned while L2 runs. ++ */ ++ struct page *apic_access_page; ++ struct page *virtual_apic_page; ++ struct page *pi_desc_page; ++ struct pi_desc *pi_desc; ++ bool pi_pending; ++ u16 posted_intr_nv; ++ ++ struct hrtimer preemption_timer; ++ bool preemption_timer_expired; ++ ++ /* to migrate it to L2 if VM_ENTRY_LOAD_DEBUG_CONTROLS is off */ ++ u64 vmcs01_debugctl; ++ u64 vmcs01_guest_bndcfgs; ++ ++ u16 vpid02; ++ u16 last_vpid; ++ ++ struct nested_vmx_msrs msrs; ++ ++ /* SMM related state */ ++ struct { ++ /* in VMX operation on SMM entry? */ ++ bool vmxon; ++ /* in guest mode on SMM entry? */ ++ bool guest_mode; ++ } smm; ++}; ++ ++#define POSTED_INTR_ON 0 ++#define POSTED_INTR_SN 1 ++ ++/* Posted-Interrupt Descriptor */ ++struct pi_desc { ++ u32 pir[8]; /* Posted interrupt requested */ ++ union { ++ struct { ++ /* bit 256 - Outstanding Notification */ ++ u16 on : 1, ++ /* bit 257 - Suppress Notification */ ++ sn : 1, ++ /* bit 271:258 - Reserved */ ++ rsvd_1 : 14; ++ /* bit 279:272 - Notification Vector */ ++ u8 nv; ++ /* bit 287:280 - Reserved */ ++ u8 rsvd_2; ++ /* bit 319:288 - Notification Destination */ ++ u32 ndst; ++ }; ++ u64 control; ++ }; ++ u32 rsvd[6]; ++} __aligned(64); ++ ++static bool pi_test_and_set_on(struct pi_desc *pi_desc) ++{ ++ return test_and_set_bit(POSTED_INTR_ON, ++ (unsigned long *)&pi_desc->control); ++} ++ ++static bool pi_test_and_clear_on(struct pi_desc *pi_desc) ++{ ++ return test_and_clear_bit(POSTED_INTR_ON, ++ (unsigned long *)&pi_desc->control); ++} ++ ++static int pi_test_and_set_pir(int vector, struct pi_desc *pi_desc) ++{ ++ return test_and_set_bit(vector, (unsigned long *)pi_desc->pir); ++} ++ ++static inline void pi_clear_sn(struct pi_desc *pi_desc) ++{ ++ return clear_bit(POSTED_INTR_SN, ++ (unsigned long *)&pi_desc->control); ++} ++ ++static inline void pi_set_sn(struct pi_desc *pi_desc) ++{ ++ return set_bit(POSTED_INTR_SN, ++ (unsigned long *)&pi_desc->control); ++} ++ ++static inline void pi_clear_on(struct pi_desc *pi_desc) ++{ ++ clear_bit(POSTED_INTR_ON, ++ (unsigned long *)&pi_desc->control); ++} ++ ++static inline int pi_test_on(struct pi_desc *pi_desc) ++{ ++ return test_bit(POSTED_INTR_ON, ++ (unsigned long *)&pi_desc->control); ++} ++ ++static inline int pi_test_sn(struct pi_desc *pi_desc) ++{ ++ return test_bit(POSTED_INTR_SN, ++ (unsigned long *)&pi_desc->control); ++} ++ ++struct vmx_msrs { ++ unsigned int nr; ++ struct vmx_msr_entry val[NR_AUTOLOAD_MSRS]; ++}; ++ ++struct vcpu_vmx { ++ struct kvm_vcpu vcpu; ++ unsigned long host_rsp; ++ u8 fail; ++ u8 msr_bitmap_mode; ++ u32 exit_intr_info; ++ u32 idt_vectoring_info; ++ ulong rflags; ++ struct shared_msr_entry *guest_msrs; ++ int nmsrs; ++ int save_nmsrs; ++ bool guest_msrs_dirty; ++ unsigned long host_idt_base; ++#ifdef CONFIG_X86_64 ++ u64 msr_host_kernel_gs_base; ++ u64 msr_guest_kernel_gs_base; ++#endif ++ ++ u64 spec_ctrl; ++ ++ u32 vm_entry_controls_shadow; ++ u32 vm_exit_controls_shadow; ++ u32 secondary_exec_control; ++ ++ /* ++ * loaded_vmcs points to the VMCS currently used in this vcpu. For a ++ * non-nested (L1) guest, it always points to vmcs01. For a nested ++ * guest (L2), it points to a different VMCS. loaded_cpu_state points ++ * to the VMCS whose state is loaded into the CPU registers that only ++ * need to be switched when transitioning to/from the kernel; a NULL ++ * value indicates that host state is loaded. ++ */ ++ struct loaded_vmcs vmcs01; ++ struct loaded_vmcs *loaded_vmcs; ++ struct loaded_vmcs *loaded_cpu_state; ++ bool __launched; /* temporary, used in vmx_vcpu_run */ ++ struct msr_autoload { ++ struct vmx_msrs guest; ++ struct vmx_msrs host; ++ } msr_autoload; ++ ++ struct { ++ int vm86_active; ++ ulong save_rflags; ++ struct kvm_segment segs[8]; ++ } rmode; ++ struct { ++ u32 bitmask; /* 4 bits per segment (1 bit per field) */ ++ struct kvm_save_segment { ++ u16 selector; ++ unsigned long base; ++ u32 limit; ++ u32 ar; ++ } seg[8]; ++ } segment_cache; ++ int vpid; ++ bool emulation_required; ++ ++ u32 exit_reason; ++ ++ /* Posted interrupt descriptor */ ++ struct pi_desc pi_desc; ++ ++ /* Support for a guest hypervisor (nested VMX) */ ++ struct nested_vmx nested; ++ ++ /* Dynamic PLE window. */ ++ int ple_window; ++ bool ple_window_dirty; ++ ++ bool req_immediate_exit; ++ ++ /* Support for PML */ ++#define PML_ENTITY_NUM 512 ++ struct page *pml_pg; ++ ++ /* apic deadline value in host tsc */ ++ u64 hv_deadline_tsc; ++ ++ u64 current_tsc_ratio; ++ ++ u32 host_pkru; ++ ++ unsigned long host_debugctlmsr; ++ ++ /* ++ * Only bits masked by msr_ia32_feature_control_valid_bits can be set in ++ * msr_ia32_feature_control. FEATURE_CONTROL_LOCKED is always included ++ * in msr_ia32_feature_control_valid_bits. ++ */ ++ u64 msr_ia32_feature_control; ++ u64 msr_ia32_feature_control_valid_bits; ++ u64 ept_pointer; ++}; ++ ++enum segment_cache_field { ++ SEG_FIELD_SEL = 0, ++ SEG_FIELD_BASE = 1, ++ SEG_FIELD_LIMIT = 2, ++ SEG_FIELD_AR = 3, ++ ++ SEG_FIELD_NR = 4 ++}; ++ ++static inline struct kvm_vmx *to_kvm_vmx(struct kvm *kvm) ++{ ++ return container_of(kvm, struct kvm_vmx, kvm); ++} ++ ++static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) ++{ ++ return container_of(vcpu, struct vcpu_vmx, vcpu); ++} ++ ++static struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu) ++{ ++ return &(to_vmx(vcpu)->pi_desc); ++} ++ ++#define ROL16(val, n) ((u16)(((u16)(val) << (n)) | ((u16)(val) >> (16 - (n))))) ++#define VMCS12_OFFSET(x) offsetof(struct vmcs12, x) ++#define FIELD(number, name) [ROL16(number, 6)] = VMCS12_OFFSET(name) ++#define FIELD64(number, name) \ ++ FIELD(number, name), \ ++ [ROL16(number##_HIGH, 6)] = VMCS12_OFFSET(name) + sizeof(u32) ++ ++ ++static u16 shadow_read_only_fields[] = { ++#define SHADOW_FIELD_RO(x) x, ++#include "vmx_shadow_fields.h" ++}; ++static int max_shadow_read_only_fields = ++ ARRAY_SIZE(shadow_read_only_fields); ++ ++static u16 shadow_read_write_fields[] = { ++#define SHADOW_FIELD_RW(x) x, ++#include "vmx_shadow_fields.h" ++}; ++static int max_shadow_read_write_fields = ++ ARRAY_SIZE(shadow_read_write_fields); ++ ++static const unsigned short vmcs_field_to_offset_table[] = { ++ FIELD(VIRTUAL_PROCESSOR_ID, virtual_processor_id), ++ FIELD(POSTED_INTR_NV, posted_intr_nv), ++ FIELD(GUEST_ES_SELECTOR, guest_es_selector), ++ FIELD(GUEST_CS_SELECTOR, guest_cs_selector), ++ FIELD(GUEST_SS_SELECTOR, guest_ss_selector), ++ FIELD(GUEST_DS_SELECTOR, guest_ds_selector), ++ FIELD(GUEST_FS_SELECTOR, guest_fs_selector), ++ FIELD(GUEST_GS_SELECTOR, guest_gs_selector), ++ FIELD(GUEST_LDTR_SELECTOR, guest_ldtr_selector), ++ FIELD(GUEST_TR_SELECTOR, guest_tr_selector), ++ FIELD(GUEST_INTR_STATUS, guest_intr_status), ++ FIELD(GUEST_PML_INDEX, guest_pml_index), ++ FIELD(HOST_ES_SELECTOR, host_es_selector), ++ FIELD(HOST_CS_SELECTOR, host_cs_selector), ++ FIELD(HOST_SS_SELECTOR, host_ss_selector), ++ FIELD(HOST_DS_SELECTOR, host_ds_selector), ++ FIELD(HOST_FS_SELECTOR, host_fs_selector), ++ FIELD(HOST_GS_SELECTOR, host_gs_selector), ++ FIELD(HOST_TR_SELECTOR, host_tr_selector), ++ FIELD64(IO_BITMAP_A, io_bitmap_a), ++ FIELD64(IO_BITMAP_B, io_bitmap_b), ++ FIELD64(MSR_BITMAP, msr_bitmap), ++ FIELD64(VM_EXIT_MSR_STORE_ADDR, vm_exit_msr_store_addr), ++ FIELD64(VM_EXIT_MSR_LOAD_ADDR, vm_exit_msr_load_addr), ++ FIELD64(VM_ENTRY_MSR_LOAD_ADDR, vm_entry_msr_load_addr), ++ FIELD64(PML_ADDRESS, pml_address), ++ FIELD64(TSC_OFFSET, tsc_offset), ++ FIELD64(VIRTUAL_APIC_PAGE_ADDR, virtual_apic_page_addr), ++ FIELD64(APIC_ACCESS_ADDR, apic_access_addr), ++ FIELD64(POSTED_INTR_DESC_ADDR, posted_intr_desc_addr), ++ FIELD64(VM_FUNCTION_CONTROL, vm_function_control), ++ FIELD64(EPT_POINTER, ept_pointer), ++ FIELD64(EOI_EXIT_BITMAP0, eoi_exit_bitmap0), ++ FIELD64(EOI_EXIT_BITMAP1, eoi_exit_bitmap1), ++ FIELD64(EOI_EXIT_BITMAP2, eoi_exit_bitmap2), ++ FIELD64(EOI_EXIT_BITMAP3, eoi_exit_bitmap3), ++ FIELD64(EPTP_LIST_ADDRESS, eptp_list_address), ++ FIELD64(VMREAD_BITMAP, vmread_bitmap), ++ FIELD64(VMWRITE_BITMAP, vmwrite_bitmap), ++ FIELD64(XSS_EXIT_BITMAP, xss_exit_bitmap), ++ FIELD64(GUEST_PHYSICAL_ADDRESS, guest_physical_address), ++ FIELD64(VMCS_LINK_POINTER, vmcs_link_pointer), ++ FIELD64(GUEST_IA32_DEBUGCTL, guest_ia32_debugctl), ++ FIELD64(GUEST_IA32_PAT, guest_ia32_pat), ++ FIELD64(GUEST_IA32_EFER, guest_ia32_efer), ++ FIELD64(GUEST_IA32_PERF_GLOBAL_CTRL, guest_ia32_perf_global_ctrl), ++ FIELD64(GUEST_PDPTR0, guest_pdptr0), ++ FIELD64(GUEST_PDPTR1, guest_pdptr1), ++ FIELD64(GUEST_PDPTR2, guest_pdptr2), ++ FIELD64(GUEST_PDPTR3, guest_pdptr3), ++ FIELD64(GUEST_BNDCFGS, guest_bndcfgs), ++ FIELD64(HOST_IA32_PAT, host_ia32_pat), ++ FIELD64(HOST_IA32_EFER, host_ia32_efer), ++ FIELD64(HOST_IA32_PERF_GLOBAL_CTRL, host_ia32_perf_global_ctrl), ++ FIELD(PIN_BASED_VM_EXEC_CONTROL, pin_based_vm_exec_control), ++ FIELD(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control), ++ FIELD(EXCEPTION_BITMAP, exception_bitmap), ++ FIELD(PAGE_FAULT_ERROR_CODE_MASK, page_fault_error_code_mask), ++ FIELD(PAGE_FAULT_ERROR_CODE_MATCH, page_fault_error_code_match), ++ FIELD(CR3_TARGET_COUNT, cr3_target_count), ++ FIELD(VM_EXIT_CONTROLS, vm_exit_controls), ++ FIELD(VM_EXIT_MSR_STORE_COUNT, vm_exit_msr_store_count), ++ FIELD(VM_EXIT_MSR_LOAD_COUNT, vm_exit_msr_load_count), ++ FIELD(VM_ENTRY_CONTROLS, vm_entry_controls), ++ FIELD(VM_ENTRY_MSR_LOAD_COUNT, vm_entry_msr_load_count), ++ FIELD(VM_ENTRY_INTR_INFO_FIELD, vm_entry_intr_info_field), ++ FIELD(VM_ENTRY_EXCEPTION_ERROR_CODE, vm_entry_exception_error_code), ++ FIELD(VM_ENTRY_INSTRUCTION_LEN, vm_entry_instruction_len), ++ FIELD(TPR_THRESHOLD, tpr_threshold), ++ FIELD(SECONDARY_VM_EXEC_CONTROL, secondary_vm_exec_control), ++ FIELD(VM_INSTRUCTION_ERROR, vm_instruction_error), ++ FIELD(VM_EXIT_REASON, vm_exit_reason), ++ FIELD(VM_EXIT_INTR_INFO, vm_exit_intr_info), ++ FIELD(VM_EXIT_INTR_ERROR_CODE, vm_exit_intr_error_code), ++ FIELD(IDT_VECTORING_INFO_FIELD, idt_vectoring_info_field), ++ FIELD(IDT_VECTORING_ERROR_CODE, idt_vectoring_error_code), ++ FIELD(VM_EXIT_INSTRUCTION_LEN, vm_exit_instruction_len), ++ FIELD(VMX_INSTRUCTION_INFO, vmx_instruction_info), ++ FIELD(GUEST_ES_LIMIT, guest_es_limit), ++ FIELD(GUEST_CS_LIMIT, guest_cs_limit), ++ FIELD(GUEST_SS_LIMIT, guest_ss_limit), ++ FIELD(GUEST_DS_LIMIT, guest_ds_limit), ++ FIELD(GUEST_FS_LIMIT, guest_fs_limit), ++ FIELD(GUEST_GS_LIMIT, guest_gs_limit), ++ FIELD(GUEST_LDTR_LIMIT, guest_ldtr_limit), ++ FIELD(GUEST_TR_LIMIT, guest_tr_limit), ++ FIELD(GUEST_GDTR_LIMIT, guest_gdtr_limit), ++ FIELD(GUEST_IDTR_LIMIT, guest_idtr_limit), ++ FIELD(GUEST_ES_AR_BYTES, guest_es_ar_bytes), ++ FIELD(GUEST_CS_AR_BYTES, guest_cs_ar_bytes), ++ FIELD(GUEST_SS_AR_BYTES, guest_ss_ar_bytes), ++ FIELD(GUEST_DS_AR_BYTES, guest_ds_ar_bytes), ++ FIELD(GUEST_FS_AR_BYTES, guest_fs_ar_bytes), ++ FIELD(GUEST_GS_AR_BYTES, guest_gs_ar_bytes), ++ FIELD(GUEST_LDTR_AR_BYTES, guest_ldtr_ar_bytes), ++ FIELD(GUEST_TR_AR_BYTES, guest_tr_ar_bytes), ++ FIELD(GUEST_INTERRUPTIBILITY_INFO, guest_interruptibility_info), ++ FIELD(GUEST_ACTIVITY_STATE, guest_activity_state), ++ FIELD(GUEST_SYSENTER_CS, guest_sysenter_cs), ++ FIELD(HOST_IA32_SYSENTER_CS, host_ia32_sysenter_cs), ++ FIELD(VMX_PREEMPTION_TIMER_VALUE, vmx_preemption_timer_value), ++ FIELD(CR0_GUEST_HOST_MASK, cr0_guest_host_mask), ++ FIELD(CR4_GUEST_HOST_MASK, cr4_guest_host_mask), ++ FIELD(CR0_READ_SHADOW, cr0_read_shadow), ++ FIELD(CR4_READ_SHADOW, cr4_read_shadow), ++ FIELD(CR3_TARGET_VALUE0, cr3_target_value0), ++ FIELD(CR3_TARGET_VALUE1, cr3_target_value1), ++ FIELD(CR3_TARGET_VALUE2, cr3_target_value2), ++ FIELD(CR3_TARGET_VALUE3, cr3_target_value3), ++ FIELD(EXIT_QUALIFICATION, exit_qualification), ++ FIELD(GUEST_LINEAR_ADDRESS, guest_linear_address), ++ FIELD(GUEST_CR0, guest_cr0), ++ FIELD(GUEST_CR3, guest_cr3), ++ FIELD(GUEST_CR4, guest_cr4), ++ FIELD(GUEST_ES_BASE, guest_es_base), ++ FIELD(GUEST_CS_BASE, guest_cs_base), ++ FIELD(GUEST_SS_BASE, guest_ss_base), ++ FIELD(GUEST_DS_BASE, guest_ds_base), ++ FIELD(GUEST_FS_BASE, guest_fs_base), ++ FIELD(GUEST_GS_BASE, guest_gs_base), ++ FIELD(GUEST_LDTR_BASE, guest_ldtr_base), ++ FIELD(GUEST_TR_BASE, guest_tr_base), ++ FIELD(GUEST_GDTR_BASE, guest_gdtr_base), ++ FIELD(GUEST_IDTR_BASE, guest_idtr_base), ++ FIELD(GUEST_DR7, guest_dr7), ++ FIELD(GUEST_RSP, guest_rsp), ++ FIELD(GUEST_RIP, guest_rip), ++ FIELD(GUEST_RFLAGS, guest_rflags), ++ FIELD(GUEST_PENDING_DBG_EXCEPTIONS, guest_pending_dbg_exceptions), ++ FIELD(GUEST_SYSENTER_ESP, guest_sysenter_esp), ++ FIELD(GUEST_SYSENTER_EIP, guest_sysenter_eip), ++ FIELD(HOST_CR0, host_cr0), ++ FIELD(HOST_CR3, host_cr3), ++ FIELD(HOST_CR4, host_cr4), ++ FIELD(HOST_FS_BASE, host_fs_base), ++ FIELD(HOST_GS_BASE, host_gs_base), ++ FIELD(HOST_TR_BASE, host_tr_base), ++ FIELD(HOST_GDTR_BASE, host_gdtr_base), ++ FIELD(HOST_IDTR_BASE, host_idtr_base), ++ FIELD(HOST_IA32_SYSENTER_ESP, host_ia32_sysenter_esp), ++ FIELD(HOST_IA32_SYSENTER_EIP, host_ia32_sysenter_eip), ++ FIELD(HOST_RSP, host_rsp), ++ FIELD(HOST_RIP, host_rip), ++}; ++ ++static inline short vmcs_field_to_offset(unsigned long field) ++{ ++ const size_t size = ARRAY_SIZE(vmcs_field_to_offset_table); ++ unsigned short offset; ++ unsigned index; ++ ++ if (field >> 15) ++ return -ENOENT; ++ ++ index = ROL16(field, 6); ++ if (index >= size) ++ return -ENOENT; ++ ++ index = array_index_nospec(index, size); ++ offset = vmcs_field_to_offset_table[index]; ++ if (offset == 0) ++ return -ENOENT; ++ return offset; ++} ++ ++static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu) ++{ ++ return to_vmx(vcpu)->nested.cached_vmcs12; ++} ++ ++static inline struct vmcs12 *get_shadow_vmcs12(struct kvm_vcpu *vcpu) ++{ ++ return to_vmx(vcpu)->nested.cached_shadow_vmcs12; ++} ++ ++static bool nested_ept_ad_enabled(struct kvm_vcpu *vcpu); ++static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu); ++static u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa); ++static bool vmx_xsaves_supported(void); ++static void vmx_set_segment(struct kvm_vcpu *vcpu, ++ struct kvm_segment *var, int seg); ++static void vmx_get_segment(struct kvm_vcpu *vcpu, ++ struct kvm_segment *var, int seg); ++static bool guest_state_valid(struct kvm_vcpu *vcpu); ++static u32 vmx_segment_access_rights(struct kvm_segment *var); ++static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx); ++static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu); ++static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked); ++static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12, ++ u16 error_code); ++static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu); ++static __always_inline void vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, ++ u32 msr, int type); ++ ++static DEFINE_PER_CPU(struct vmcs *, vmxarea); ++static DEFINE_PER_CPU(struct vmcs *, current_vmcs); ++/* ++ * We maintain a per-CPU linked-list of VMCS loaded on that CPU. This is needed ++ * when a CPU is brought down, and we need to VMCLEAR all VMCSs loaded on it. ++ */ ++static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu); ++ ++/* ++ * We maintian a per-CPU linked-list of vCPU, so in wakeup_handler() we ++ * can find which vCPU should be waken up. ++ */ ++static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu); ++static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock); ++ ++enum { ++ VMX_VMREAD_BITMAP, ++ VMX_VMWRITE_BITMAP, ++ VMX_BITMAP_NR ++}; ++ ++static unsigned long *vmx_bitmap[VMX_BITMAP_NR]; ++ ++#define vmx_vmread_bitmap (vmx_bitmap[VMX_VMREAD_BITMAP]) ++#define vmx_vmwrite_bitmap (vmx_bitmap[VMX_VMWRITE_BITMAP]) ++ ++static bool cpu_has_load_ia32_efer; ++static bool cpu_has_load_perf_global_ctrl; ++ ++static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS); ++static DEFINE_SPINLOCK(vmx_vpid_lock); ++ ++static struct vmcs_config { ++ int size; ++ int order; ++ u32 basic_cap; ++ u32 revision_id; ++ u32 pin_based_exec_ctrl; ++ u32 cpu_based_exec_ctrl; ++ u32 cpu_based_2nd_exec_ctrl; ++ u32 vmexit_ctrl; ++ u32 vmentry_ctrl; ++ struct nested_vmx_msrs nested; ++} vmcs_config; ++ ++static struct vmx_capability { ++ u32 ept; ++ u32 vpid; ++} vmx_capability; ++ ++#define VMX_SEGMENT_FIELD(seg) \ ++ [VCPU_SREG_##seg] = { \ ++ .selector = GUEST_##seg##_SELECTOR, \ ++ .base = GUEST_##seg##_BASE, \ ++ .limit = GUEST_##seg##_LIMIT, \ ++ .ar_bytes = GUEST_##seg##_AR_BYTES, \ ++ } ++ ++static const struct kvm_vmx_segment_field { ++ unsigned selector; ++ unsigned base; ++ unsigned limit; ++ unsigned ar_bytes; ++} kvm_vmx_segment_fields[] = { ++ VMX_SEGMENT_FIELD(CS), ++ VMX_SEGMENT_FIELD(DS), ++ VMX_SEGMENT_FIELD(ES), ++ VMX_SEGMENT_FIELD(FS), ++ VMX_SEGMENT_FIELD(GS), ++ VMX_SEGMENT_FIELD(SS), ++ VMX_SEGMENT_FIELD(TR), ++ VMX_SEGMENT_FIELD(LDTR), ++}; ++ ++static u64 host_efer; ++ ++static void ept_save_pdptrs(struct kvm_vcpu *vcpu); ++ ++/* ++ * Keep MSR_STAR at the end, as setup_msrs() will try to optimize it ++ * away by decrementing the array size. ++ */ ++static const u32 vmx_msr_index[] = { ++#ifdef CONFIG_X86_64 ++ MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR, ++#endif ++ MSR_EFER, MSR_TSC_AUX, MSR_STAR, ++}; ++ ++DEFINE_STATIC_KEY_FALSE(enable_evmcs); ++ ++#define current_evmcs ((struct hv_enlightened_vmcs *)this_cpu_read(current_vmcs)) ++ ++#define KVM_EVMCS_VERSION 1 ++ ++#if IS_ENABLED(CONFIG_HYPERV) ++static bool __read_mostly enlightened_vmcs = true; ++module_param(enlightened_vmcs, bool, 0444); ++ ++static inline void evmcs_write64(unsigned long field, u64 value) ++{ ++ u16 clean_field; ++ int offset = get_evmcs_offset(field, &clean_field); ++ ++ if (offset < 0) ++ return; ++ ++ *(u64 *)((char *)current_evmcs + offset) = value; ++ ++ current_evmcs->hv_clean_fields &= ~clean_field; ++} ++ ++static inline void evmcs_write32(unsigned long field, u32 value) ++{ ++ u16 clean_field; ++ int offset = get_evmcs_offset(field, &clean_field); ++ ++ if (offset < 0) ++ return; ++ ++ *(u32 *)((char *)current_evmcs + offset) = value; ++ current_evmcs->hv_clean_fields &= ~clean_field; ++} ++ ++static inline void evmcs_write16(unsigned long field, u16 value) ++{ ++ u16 clean_field; ++ int offset = get_evmcs_offset(field, &clean_field); ++ ++ if (offset < 0) ++ return; ++ ++ *(u16 *)((char *)current_evmcs + offset) = value; ++ current_evmcs->hv_clean_fields &= ~clean_field; ++} ++ ++static inline u64 evmcs_read64(unsigned long field) ++{ ++ int offset = get_evmcs_offset(field, NULL); ++ ++ if (offset < 0) ++ return 0; ++ ++ return *(u64 *)((char *)current_evmcs + offset); ++} ++ ++static inline u32 evmcs_read32(unsigned long field) ++{ ++ int offset = get_evmcs_offset(field, NULL); ++ ++ if (offset < 0) ++ return 0; ++ ++ return *(u32 *)((char *)current_evmcs + offset); ++} ++ ++static inline u16 evmcs_read16(unsigned long field) ++{ ++ int offset = get_evmcs_offset(field, NULL); ++ ++ if (offset < 0) ++ return 0; ++ ++ return *(u16 *)((char *)current_evmcs + offset); ++} ++ ++static inline void evmcs_touch_msr_bitmap(void) ++{ ++ if (unlikely(!current_evmcs)) ++ return; ++ ++ if (current_evmcs->hv_enlightenments_control.msr_bitmap) ++ current_evmcs->hv_clean_fields &= ++ ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP; ++} ++ ++static void evmcs_load(u64 phys_addr) ++{ ++ struct hv_vp_assist_page *vp_ap = ++ hv_get_vp_assist_page(smp_processor_id()); ++ ++ vp_ap->current_nested_vmcs = phys_addr; ++ vp_ap->enlighten_vmentry = 1; ++} ++ ++static void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf) ++{ ++ /* ++ * Enlightened VMCSv1 doesn't support these: ++ * ++ * POSTED_INTR_NV = 0x00000002, ++ * GUEST_INTR_STATUS = 0x00000810, ++ * APIC_ACCESS_ADDR = 0x00002014, ++ * POSTED_INTR_DESC_ADDR = 0x00002016, ++ * EOI_EXIT_BITMAP0 = 0x0000201c, ++ * EOI_EXIT_BITMAP1 = 0x0000201e, ++ * EOI_EXIT_BITMAP2 = 0x00002020, ++ * EOI_EXIT_BITMAP3 = 0x00002022, ++ */ ++ vmcs_conf->pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR; ++ vmcs_conf->cpu_based_2nd_exec_ctrl &= ++ ~SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY; ++ vmcs_conf->cpu_based_2nd_exec_ctrl &= ++ ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; ++ vmcs_conf->cpu_based_2nd_exec_ctrl &= ++ ~SECONDARY_EXEC_APIC_REGISTER_VIRT; ++ ++ /* ++ * GUEST_PML_INDEX = 0x00000812, ++ * PML_ADDRESS = 0x0000200e, ++ */ ++ vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_ENABLE_PML; ++ ++ /* VM_FUNCTION_CONTROL = 0x00002018, */ ++ vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_ENABLE_VMFUNC; ++ ++ /* ++ * EPTP_LIST_ADDRESS = 0x00002024, ++ * VMREAD_BITMAP = 0x00002026, ++ * VMWRITE_BITMAP = 0x00002028, ++ */ ++ vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_SHADOW_VMCS; ++ ++ /* ++ * TSC_MULTIPLIER = 0x00002032, ++ */ ++ vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_TSC_SCALING; ++ ++ /* ++ * PLE_GAP = 0x00004020, ++ * PLE_WINDOW = 0x00004022, ++ */ ++ vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; ++ ++ /* ++ * VMX_PREEMPTION_TIMER_VALUE = 0x0000482E, ++ */ ++ vmcs_conf->pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER; ++ ++ /* ++ * GUEST_IA32_PERF_GLOBAL_CTRL = 0x00002808, ++ * HOST_IA32_PERF_GLOBAL_CTRL = 0x00002c04, ++ */ ++ vmcs_conf->vmexit_ctrl &= ~VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL; ++ vmcs_conf->vmentry_ctrl &= ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL; ++ ++ /* ++ * Currently unsupported in KVM: ++ * GUEST_IA32_RTIT_CTL = 0x00002814, ++ */ ++} ++ ++/* check_ept_pointer() should be under protection of ept_pointer_lock. */ ++static void check_ept_pointer_match(struct kvm *kvm) ++{ ++ struct kvm_vcpu *vcpu; ++ u64 tmp_eptp = INVALID_PAGE; ++ int i; ++ ++ kvm_for_each_vcpu(i, vcpu, kvm) { ++ if (!VALID_PAGE(tmp_eptp)) { ++ tmp_eptp = to_vmx(vcpu)->ept_pointer; ++ } else if (tmp_eptp != to_vmx(vcpu)->ept_pointer) { ++ to_kvm_vmx(kvm)->ept_pointers_match ++ = EPT_POINTERS_MISMATCH; ++ return; ++ } ++ } ++ ++ to_kvm_vmx(kvm)->ept_pointers_match = EPT_POINTERS_MATCH; ++} ++ ++static int vmx_hv_remote_flush_tlb(struct kvm *kvm) ++{ ++ int ret; ++ ++ spin_lock(&to_kvm_vmx(kvm)->ept_pointer_lock); ++ ++ if (to_kvm_vmx(kvm)->ept_pointers_match == EPT_POINTERS_CHECK) ++ check_ept_pointer_match(kvm); ++ ++ if (to_kvm_vmx(kvm)->ept_pointers_match != EPT_POINTERS_MATCH) { ++ ret = -ENOTSUPP; ++ goto out; ++ } ++ ++ /* ++ * FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE hypercall needs the address of the ++ * base of EPT PML4 table, strip off EPT configuration information. ++ */ ++ ret = hyperv_flush_guest_mapping( ++ to_vmx(kvm_get_vcpu(kvm, 0))->ept_pointer & PAGE_MASK); ++ ++out: ++ spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock); ++ return ret; ++} ++#else /* !IS_ENABLED(CONFIG_HYPERV) */ ++static inline void evmcs_write64(unsigned long field, u64 value) {} ++static inline void evmcs_write32(unsigned long field, u32 value) {} ++static inline void evmcs_write16(unsigned long field, u16 value) {} ++static inline u64 evmcs_read64(unsigned long field) { return 0; } ++static inline u32 evmcs_read32(unsigned long field) { return 0; } ++static inline u16 evmcs_read16(unsigned long field) { return 0; } ++static inline void evmcs_load(u64 phys_addr) {} ++static inline void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf) {} ++static inline void evmcs_touch_msr_bitmap(void) {} ++#endif /* IS_ENABLED(CONFIG_HYPERV) */ ++ ++static inline bool is_exception_n(u32 intr_info, u8 vector) ++{ ++ return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | ++ INTR_INFO_VALID_MASK)) == ++ (INTR_TYPE_HARD_EXCEPTION | vector | INTR_INFO_VALID_MASK); ++} ++ ++static inline bool is_debug(u32 intr_info) ++{ ++ return is_exception_n(intr_info, DB_VECTOR); ++} ++ ++static inline bool is_breakpoint(u32 intr_info) ++{ ++ return is_exception_n(intr_info, BP_VECTOR); ++} ++ ++static inline bool is_page_fault(u32 intr_info) ++{ ++ return is_exception_n(intr_info, PF_VECTOR); ++} ++ ++static inline bool is_no_device(u32 intr_info) ++{ ++ return is_exception_n(intr_info, NM_VECTOR); ++} ++ ++static inline bool is_invalid_opcode(u32 intr_info) ++{ ++ return is_exception_n(intr_info, UD_VECTOR); ++} ++ ++static inline bool is_gp_fault(u32 intr_info) ++{ ++ return is_exception_n(intr_info, GP_VECTOR); ++} ++ ++static inline bool is_external_interrupt(u32 intr_info) ++{ ++ return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) ++ == (INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK); ++} ++ ++static inline bool is_machine_check(u32 intr_info) ++{ ++ return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | ++ INTR_INFO_VALID_MASK)) == ++ (INTR_TYPE_HARD_EXCEPTION | MC_VECTOR | INTR_INFO_VALID_MASK); ++} ++ ++/* Undocumented: icebp/int1 */ ++static inline bool is_icebp(u32 intr_info) ++{ ++ return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) ++ == (INTR_TYPE_PRIV_SW_EXCEPTION | INTR_INFO_VALID_MASK); ++} ++ ++static inline bool cpu_has_vmx_msr_bitmap(void) ++{ ++ return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_USE_MSR_BITMAPS; ++} ++ ++static inline bool cpu_has_vmx_tpr_shadow(void) ++{ ++ return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW; ++} ++ ++static inline bool cpu_need_tpr_shadow(struct kvm_vcpu *vcpu) ++{ ++ return cpu_has_vmx_tpr_shadow() && lapic_in_kernel(vcpu); ++} ++ ++static inline bool cpu_has_secondary_exec_ctrls(void) ++{ ++ return vmcs_config.cpu_based_exec_ctrl & ++ CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; ++} ++ ++static inline bool cpu_has_vmx_virtualize_apic_accesses(void) ++{ ++ return vmcs_config.cpu_based_2nd_exec_ctrl & ++ SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; ++} ++ ++static inline bool cpu_has_vmx_virtualize_x2apic_mode(void) ++{ ++ return vmcs_config.cpu_based_2nd_exec_ctrl & ++ SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; ++} ++ ++static inline bool cpu_has_vmx_apic_register_virt(void) ++{ ++ return vmcs_config.cpu_based_2nd_exec_ctrl & ++ SECONDARY_EXEC_APIC_REGISTER_VIRT; ++} ++ ++static inline bool cpu_has_vmx_virtual_intr_delivery(void) ++{ ++ return vmcs_config.cpu_based_2nd_exec_ctrl & ++ SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY; ++} ++ ++static inline bool cpu_has_vmx_encls_vmexit(void) ++{ ++ return vmcs_config.cpu_based_2nd_exec_ctrl & ++ SECONDARY_EXEC_ENCLS_EXITING; ++} ++ ++/* ++ * Comment's format: document - errata name - stepping - processor name. ++ * Refer from ++ * https://www.virtualbox.org/svn/vbox/trunk/src/VBox/VMM/VMMR0/HMR0.cpp ++ */ ++static u32 vmx_preemption_cpu_tfms[] = { ++/* 323344.pdf - BA86 - D0 - Xeon 7500 Series */ ++0x000206E6, ++/* 323056.pdf - AAX65 - C2 - Xeon L3406 */ ++/* 322814.pdf - AAT59 - C2 - i7-600, i5-500, i5-400 and i3-300 Mobile */ ++/* 322911.pdf - AAU65 - C2 - i5-600, i3-500 Desktop and Pentium G6950 */ ++0x00020652, ++/* 322911.pdf - AAU65 - K0 - i5-600, i3-500 Desktop and Pentium G6950 */ ++0x00020655, ++/* 322373.pdf - AAO95 - B1 - Xeon 3400 Series */ ++/* 322166.pdf - AAN92 - B1 - i7-800 and i5-700 Desktop */ ++/* ++ * 320767.pdf - AAP86 - B1 - ++ * i7-900 Mobile Extreme, i7-800 and i7-700 Mobile ++ */ ++0x000106E5, ++/* 321333.pdf - AAM126 - C0 - Xeon 3500 */ ++0x000106A0, ++/* 321333.pdf - AAM126 - C1 - Xeon 3500 */ ++0x000106A1, ++/* 320836.pdf - AAJ124 - C0 - i7-900 Desktop Extreme and i7-900 Desktop */ ++0x000106A4, ++ /* 321333.pdf - AAM126 - D0 - Xeon 3500 */ ++ /* 321324.pdf - AAK139 - D0 - Xeon 5500 */ ++ /* 320836.pdf - AAJ124 - D0 - i7-900 Extreme and i7-900 Desktop */ ++0x000106A5, ++}; ++ ++static inline bool cpu_has_broken_vmx_preemption_timer(void) ++{ ++ u32 eax = cpuid_eax(0x00000001), i; ++ ++ /* Clear the reserved bits */ ++ eax &= ~(0x3U << 14 | 0xfU << 28); ++ for (i = 0; i < ARRAY_SIZE(vmx_preemption_cpu_tfms); i++) ++ if (eax == vmx_preemption_cpu_tfms[i]) ++ return true; ++ ++ return false; ++} ++ ++static inline bool cpu_has_vmx_preemption_timer(void) ++{ ++ return vmcs_config.pin_based_exec_ctrl & ++ PIN_BASED_VMX_PREEMPTION_TIMER; ++} ++ ++static inline bool cpu_has_vmx_posted_intr(void) ++{ ++ return IS_ENABLED(CONFIG_X86_LOCAL_APIC) && ++ vmcs_config.pin_based_exec_ctrl & PIN_BASED_POSTED_INTR; ++} ++ ++static inline bool cpu_has_vmx_apicv(void) ++{ ++ return cpu_has_vmx_apic_register_virt() && ++ cpu_has_vmx_virtual_intr_delivery() && ++ cpu_has_vmx_posted_intr(); ++} ++ ++static inline bool cpu_has_vmx_flexpriority(void) ++{ ++ return cpu_has_vmx_tpr_shadow() && ++ cpu_has_vmx_virtualize_apic_accesses(); ++} ++ ++static inline bool cpu_has_vmx_ept_execute_only(void) ++{ ++ return vmx_capability.ept & VMX_EPT_EXECUTE_ONLY_BIT; ++} ++ ++static inline bool cpu_has_vmx_ept_2m_page(void) ++{ ++ return vmx_capability.ept & VMX_EPT_2MB_PAGE_BIT; ++} ++ ++static inline bool cpu_has_vmx_ept_1g_page(void) ++{ ++ return vmx_capability.ept & VMX_EPT_1GB_PAGE_BIT; ++} ++ ++static inline bool cpu_has_vmx_ept_4levels(void) ++{ ++ return vmx_capability.ept & VMX_EPT_PAGE_WALK_4_BIT; ++} ++ ++static inline bool cpu_has_vmx_ept_mt_wb(void) ++{ ++ return vmx_capability.ept & VMX_EPTP_WB_BIT; ++} ++ ++static inline bool cpu_has_vmx_ept_5levels(void) ++{ ++ return vmx_capability.ept & VMX_EPT_PAGE_WALK_5_BIT; ++} ++ ++static inline bool cpu_has_vmx_ept_ad_bits(void) ++{ ++ return vmx_capability.ept & VMX_EPT_AD_BIT; ++} ++ ++static inline bool cpu_has_vmx_invept_context(void) ++{ ++ return vmx_capability.ept & VMX_EPT_EXTENT_CONTEXT_BIT; ++} ++ ++static inline bool cpu_has_vmx_invept_global(void) ++{ ++ return vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT; ++} ++ ++static inline bool cpu_has_vmx_invvpid_individual_addr(void) ++{ ++ return vmx_capability.vpid & VMX_VPID_EXTENT_INDIVIDUAL_ADDR_BIT; ++} ++ ++static inline bool cpu_has_vmx_invvpid_single(void) ++{ ++ return vmx_capability.vpid & VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT; ++} ++ ++static inline bool cpu_has_vmx_invvpid_global(void) ++{ ++ return vmx_capability.vpid & VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT; ++} ++ ++static inline bool cpu_has_vmx_invvpid(void) ++{ ++ return vmx_capability.vpid & VMX_VPID_INVVPID_BIT; ++} ++ ++static inline bool cpu_has_vmx_ept(void) ++{ ++ return vmcs_config.cpu_based_2nd_exec_ctrl & ++ SECONDARY_EXEC_ENABLE_EPT; ++} ++ ++static inline bool cpu_has_vmx_unrestricted_guest(void) ++{ ++ return vmcs_config.cpu_based_2nd_exec_ctrl & ++ SECONDARY_EXEC_UNRESTRICTED_GUEST; ++} ++ ++static inline bool cpu_has_vmx_ple(void) ++{ ++ return vmcs_config.cpu_based_2nd_exec_ctrl & ++ SECONDARY_EXEC_PAUSE_LOOP_EXITING; ++} ++ ++static inline bool cpu_has_vmx_basic_inout(void) ++{ ++ return (((u64)vmcs_config.basic_cap << 32) & VMX_BASIC_INOUT); ++} ++ ++static inline bool cpu_need_virtualize_apic_accesses(struct kvm_vcpu *vcpu) ++{ ++ return flexpriority_enabled && lapic_in_kernel(vcpu); ++} ++ ++static inline bool cpu_has_vmx_vpid(void) ++{ ++ return vmcs_config.cpu_based_2nd_exec_ctrl & ++ SECONDARY_EXEC_ENABLE_VPID; ++} ++ ++static inline bool cpu_has_vmx_rdtscp(void) ++{ ++ return vmcs_config.cpu_based_2nd_exec_ctrl & ++ SECONDARY_EXEC_RDTSCP; ++} ++ ++static inline bool cpu_has_vmx_invpcid(void) ++{ ++ return vmcs_config.cpu_based_2nd_exec_ctrl & ++ SECONDARY_EXEC_ENABLE_INVPCID; ++} ++ ++static inline bool cpu_has_virtual_nmis(void) ++{ ++ return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS; ++} ++ ++static inline bool cpu_has_vmx_wbinvd_exit(void) ++{ ++ return vmcs_config.cpu_based_2nd_exec_ctrl & ++ SECONDARY_EXEC_WBINVD_EXITING; ++} ++ ++static inline bool cpu_has_vmx_shadow_vmcs(void) ++{ ++ u64 vmx_msr; ++ rdmsrl(MSR_IA32_VMX_MISC, vmx_msr); ++ /* check if the cpu supports writing r/o exit information fields */ ++ if (!(vmx_msr & MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS)) ++ return false; ++ ++ return vmcs_config.cpu_based_2nd_exec_ctrl & ++ SECONDARY_EXEC_SHADOW_VMCS; ++} ++ ++static inline bool cpu_has_vmx_pml(void) ++{ ++ return vmcs_config.cpu_based_2nd_exec_ctrl & SECONDARY_EXEC_ENABLE_PML; ++} ++ ++static inline bool cpu_has_vmx_tsc_scaling(void) ++{ ++ return vmcs_config.cpu_based_2nd_exec_ctrl & ++ SECONDARY_EXEC_TSC_SCALING; ++} ++ ++static inline bool cpu_has_vmx_vmfunc(void) ++{ ++ return vmcs_config.cpu_based_2nd_exec_ctrl & ++ SECONDARY_EXEC_ENABLE_VMFUNC; ++} ++ ++static bool vmx_umip_emulated(void) ++{ ++ return vmcs_config.cpu_based_2nd_exec_ctrl & ++ SECONDARY_EXEC_DESC; ++} ++ ++static inline bool report_flexpriority(void) ++{ ++ return flexpriority_enabled; ++} ++ ++static inline unsigned nested_cpu_vmx_misc_cr3_count(struct kvm_vcpu *vcpu) ++{ ++ return vmx_misc_cr3_count(to_vmx(vcpu)->nested.msrs.misc_low); ++} ++ ++/* ++ * Do the virtual VMX capability MSRs specify that L1 can use VMWRITE ++ * to modify any valid field of the VMCS, or are the VM-exit ++ * information fields read-only? ++ */ ++static inline bool nested_cpu_has_vmwrite_any_field(struct kvm_vcpu *vcpu) ++{ ++ return to_vmx(vcpu)->nested.msrs.misc_low & ++ MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS; ++} ++ ++static inline bool nested_cpu_has_zero_length_injection(struct kvm_vcpu *vcpu) ++{ ++ return to_vmx(vcpu)->nested.msrs.misc_low & VMX_MISC_ZERO_LEN_INS; ++} ++ ++static inline bool nested_cpu_supports_monitor_trap_flag(struct kvm_vcpu *vcpu) ++{ ++ return to_vmx(vcpu)->nested.msrs.procbased_ctls_high & ++ CPU_BASED_MONITOR_TRAP_FLAG; ++} ++ ++static inline bool nested_cpu_has_vmx_shadow_vmcs(struct kvm_vcpu *vcpu) ++{ ++ return to_vmx(vcpu)->nested.msrs.secondary_ctls_high & ++ SECONDARY_EXEC_SHADOW_VMCS; ++} ++ ++static inline bool nested_cpu_has(struct vmcs12 *vmcs12, u32 bit) ++{ ++ return vmcs12->cpu_based_vm_exec_control & bit; ++} ++ ++static inline bool nested_cpu_has2(struct vmcs12 *vmcs12, u32 bit) ++{ ++ return (vmcs12->cpu_based_vm_exec_control & ++ CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) && ++ (vmcs12->secondary_vm_exec_control & bit); ++} ++ ++static inline bool nested_cpu_has_preemption_timer(struct vmcs12 *vmcs12) ++{ ++ return vmcs12->pin_based_vm_exec_control & ++ PIN_BASED_VMX_PREEMPTION_TIMER; ++} ++ ++static inline bool nested_cpu_has_nmi_exiting(struct vmcs12 *vmcs12) ++{ ++ return vmcs12->pin_based_vm_exec_control & PIN_BASED_NMI_EXITING; ++} ++ ++static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12) ++{ ++ return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS; ++} ++ ++static inline int nested_cpu_has_ept(struct vmcs12 *vmcs12) ++{ ++ return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_EPT); ++} ++ ++static inline bool nested_cpu_has_xsaves(struct vmcs12 *vmcs12) ++{ ++ return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES); ++} ++ ++static inline bool nested_cpu_has_pml(struct vmcs12 *vmcs12) ++{ ++ return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_PML); ++} ++ ++static inline bool nested_cpu_has_virt_x2apic_mode(struct vmcs12 *vmcs12) ++{ ++ return nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE); ++} ++ ++static inline bool nested_cpu_has_vpid(struct vmcs12 *vmcs12) ++{ ++ return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_VPID); ++} ++ ++static inline bool nested_cpu_has_apic_reg_virt(struct vmcs12 *vmcs12) ++{ ++ return nested_cpu_has2(vmcs12, SECONDARY_EXEC_APIC_REGISTER_VIRT); ++} ++ ++static inline bool nested_cpu_has_vid(struct vmcs12 *vmcs12) ++{ ++ return nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); ++} ++ ++static inline bool nested_cpu_has_posted_intr(struct vmcs12 *vmcs12) ++{ ++ return vmcs12->pin_based_vm_exec_control & PIN_BASED_POSTED_INTR; ++} ++ ++static inline bool nested_cpu_has_vmfunc(struct vmcs12 *vmcs12) ++{ ++ return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_VMFUNC); ++} ++ ++static inline bool nested_cpu_has_eptp_switching(struct vmcs12 *vmcs12) ++{ ++ return nested_cpu_has_vmfunc(vmcs12) && ++ (vmcs12->vm_function_control & ++ VMX_VMFUNC_EPTP_SWITCHING); ++} ++ ++static inline bool nested_cpu_has_shadow_vmcs(struct vmcs12 *vmcs12) ++{ ++ return nested_cpu_has2(vmcs12, SECONDARY_EXEC_SHADOW_VMCS); ++} ++ ++static inline bool is_nmi(u32 intr_info) ++{ ++ return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) ++ == (INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK); ++} ++ ++static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, ++ u32 exit_intr_info, ++ unsigned long exit_qualification); ++static void nested_vmx_entry_failure(struct kvm_vcpu *vcpu, ++ struct vmcs12 *vmcs12, ++ u32 reason, unsigned long qualification); ++ ++static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr) ++{ ++ int i; ++ ++ for (i = 0; i < vmx->nmsrs; ++i) ++ if (vmx_msr_index[vmx->guest_msrs[i].index] == msr) ++ return i; ++ return -1; ++} ++ ++static inline void __invvpid(unsigned long ext, u16 vpid, gva_t gva) ++{ ++ struct { ++ u64 vpid : 16; ++ u64 rsvd : 48; ++ u64 gva; ++ } operand = { vpid, 0, gva }; ++ bool error; ++ ++ asm volatile (__ex(ASM_VMX_INVVPID) CC_SET(na) ++ : CC_OUT(na) (error) : "a"(&operand), "c"(ext) ++ : "memory"); ++ BUG_ON(error); ++} ++ ++static inline void __invept(unsigned long ext, u64 eptp, gpa_t gpa) ++{ ++ struct { ++ u64 eptp, gpa; ++ } operand = {eptp, gpa}; ++ bool error; ++ ++ asm volatile (__ex(ASM_VMX_INVEPT) CC_SET(na) ++ : CC_OUT(na) (error) : "a" (&operand), "c" (ext) ++ : "memory"); ++ BUG_ON(error); ++} ++ ++static struct shared_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr) ++{ ++ int i; ++ ++ i = __find_msr_index(vmx, msr); ++ if (i >= 0) ++ return &vmx->guest_msrs[i]; ++ return NULL; ++} ++ ++static void vmcs_clear(struct vmcs *vmcs) ++{ ++ u64 phys_addr = __pa(vmcs); ++ bool error; ++ ++ asm volatile (__ex(ASM_VMX_VMCLEAR_RAX) CC_SET(na) ++ : CC_OUT(na) (error) : "a"(&phys_addr), "m"(phys_addr) ++ : "memory"); ++ if (unlikely(error)) ++ printk(KERN_ERR "kvm: vmclear fail: %p/%llx\n", ++ vmcs, phys_addr); ++} ++ ++static inline void loaded_vmcs_init(struct loaded_vmcs *loaded_vmcs) ++{ ++ vmcs_clear(loaded_vmcs->vmcs); ++ if (loaded_vmcs->shadow_vmcs && loaded_vmcs->launched) ++ vmcs_clear(loaded_vmcs->shadow_vmcs); ++ loaded_vmcs->cpu = -1; ++ loaded_vmcs->launched = 0; ++} ++ ++static void vmcs_load(struct vmcs *vmcs) ++{ ++ u64 phys_addr = __pa(vmcs); ++ bool error; ++ ++ if (static_branch_unlikely(&enable_evmcs)) ++ return evmcs_load(phys_addr); ++ ++ asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) CC_SET(na) ++ : CC_OUT(na) (error) : "a"(&phys_addr), "m"(phys_addr) ++ : "memory"); ++ if (unlikely(error)) ++ printk(KERN_ERR "kvm: vmptrld %p/%llx failed\n", ++ vmcs, phys_addr); ++} ++ ++#ifdef CONFIG_KEXEC_CORE ++/* ++ * This bitmap is used to indicate whether the vmclear ++ * operation is enabled on all cpus. All disabled by ++ * default. ++ */ ++static cpumask_t crash_vmclear_enabled_bitmap = CPU_MASK_NONE; ++ ++static inline void crash_enable_local_vmclear(int cpu) ++{ ++ cpumask_set_cpu(cpu, &crash_vmclear_enabled_bitmap); ++} ++ ++static inline void crash_disable_local_vmclear(int cpu) ++{ ++ cpumask_clear_cpu(cpu, &crash_vmclear_enabled_bitmap); ++} ++ ++static inline int crash_local_vmclear_enabled(int cpu) ++{ ++ return cpumask_test_cpu(cpu, &crash_vmclear_enabled_bitmap); ++} ++ ++static void crash_vmclear_local_loaded_vmcss(void) ++{ ++ int cpu = raw_smp_processor_id(); ++ struct loaded_vmcs *v; ++ ++ if (!crash_local_vmclear_enabled(cpu)) ++ return; ++ ++ list_for_each_entry(v, &per_cpu(loaded_vmcss_on_cpu, cpu), ++ loaded_vmcss_on_cpu_link) ++ vmcs_clear(v->vmcs); ++} ++#else ++static inline void crash_enable_local_vmclear(int cpu) { } ++static inline void crash_disable_local_vmclear(int cpu) { } ++#endif /* CONFIG_KEXEC_CORE */ ++ ++static void __loaded_vmcs_clear(void *arg) ++{ ++ struct loaded_vmcs *loaded_vmcs = arg; ++ int cpu = raw_smp_processor_id(); ++ ++ if (loaded_vmcs->cpu != cpu) ++ return; /* vcpu migration can race with cpu offline */ ++ if (per_cpu(current_vmcs, cpu) == loaded_vmcs->vmcs) ++ per_cpu(current_vmcs, cpu) = NULL; ++ crash_disable_local_vmclear(cpu); ++ list_del(&loaded_vmcs->loaded_vmcss_on_cpu_link); ++ ++ /* ++ * we should ensure updating loaded_vmcs->loaded_vmcss_on_cpu_link ++ * is before setting loaded_vmcs->vcpu to -1 which is done in ++ * loaded_vmcs_init. Otherwise, other cpu can see vcpu = -1 fist ++ * then adds the vmcs into percpu list before it is deleted. ++ */ ++ smp_wmb(); ++ ++ loaded_vmcs_init(loaded_vmcs); ++ crash_enable_local_vmclear(cpu); ++} ++ ++static void loaded_vmcs_clear(struct loaded_vmcs *loaded_vmcs) ++{ ++ int cpu = loaded_vmcs->cpu; ++ ++ if (cpu != -1) ++ smp_call_function_single(cpu, ++ __loaded_vmcs_clear, loaded_vmcs, 1); ++} ++ ++static inline bool vpid_sync_vcpu_addr(int vpid, gva_t addr) ++{ ++ if (vpid == 0) ++ return true; ++ ++ if (cpu_has_vmx_invvpid_individual_addr()) { ++ __invvpid(VMX_VPID_EXTENT_INDIVIDUAL_ADDR, vpid, addr); ++ return true; ++ } ++ ++ return false; ++} ++ ++static inline void vpid_sync_vcpu_single(int vpid) ++{ ++ if (vpid == 0) ++ return; ++ ++ if (cpu_has_vmx_invvpid_single()) ++ __invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vpid, 0); ++} ++ ++static inline void vpid_sync_vcpu_global(void) ++{ ++ if (cpu_has_vmx_invvpid_global()) ++ __invvpid(VMX_VPID_EXTENT_ALL_CONTEXT, 0, 0); ++} ++ ++static inline void vpid_sync_context(int vpid) ++{ ++ if (cpu_has_vmx_invvpid_single()) ++ vpid_sync_vcpu_single(vpid); ++ else ++ vpid_sync_vcpu_global(); ++} ++ ++static inline void ept_sync_global(void) ++{ ++ __invept(VMX_EPT_EXTENT_GLOBAL, 0, 0); ++} ++ ++static inline void ept_sync_context(u64 eptp) ++{ ++ if (cpu_has_vmx_invept_context()) ++ __invept(VMX_EPT_EXTENT_CONTEXT, eptp, 0); ++ else ++ ept_sync_global(); ++} ++ ++static __always_inline void vmcs_check16(unsigned long field) ++{ ++ BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2000, ++ "16-bit accessor invalid for 64-bit field"); ++ BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2001, ++ "16-bit accessor invalid for 64-bit high field"); ++ BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x4000, ++ "16-bit accessor invalid for 32-bit high field"); ++ BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x6000, ++ "16-bit accessor invalid for natural width field"); ++} ++ ++static __always_inline void vmcs_check32(unsigned long field) ++{ ++ BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0, ++ "32-bit accessor invalid for 16-bit field"); ++ BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x6000, ++ "32-bit accessor invalid for natural width field"); ++} ++ ++static __always_inline void vmcs_check64(unsigned long field) ++{ ++ BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0, ++ "64-bit accessor invalid for 16-bit field"); ++ BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2001, ++ "64-bit accessor invalid for 64-bit high field"); ++ BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x4000, ++ "64-bit accessor invalid for 32-bit field"); ++ BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x6000, ++ "64-bit accessor invalid for natural width field"); ++} ++ ++static __always_inline void vmcs_checkl(unsigned long field) ++{ ++ BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0, ++ "Natural width accessor invalid for 16-bit field"); ++ BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2000, ++ "Natural width accessor invalid for 64-bit field"); ++ BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2001, ++ "Natural width accessor invalid for 64-bit high field"); ++ BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x4000, ++ "Natural width accessor invalid for 32-bit field"); ++} ++ ++static __always_inline unsigned long __vmcs_readl(unsigned long field) ++{ ++ unsigned long value; ++ ++ asm volatile (__ex_clear(ASM_VMX_VMREAD_RDX_RAX, "%0") ++ : "=a"(value) : "d"(field) : "cc"); ++ return value; ++} ++ ++static __always_inline u16 vmcs_read16(unsigned long field) ++{ ++ vmcs_check16(field); ++ if (static_branch_unlikely(&enable_evmcs)) ++ return evmcs_read16(field); ++ return __vmcs_readl(field); ++} ++ ++static __always_inline u32 vmcs_read32(unsigned long field) ++{ ++ vmcs_check32(field); ++ if (static_branch_unlikely(&enable_evmcs)) ++ return evmcs_read32(field); ++ return __vmcs_readl(field); ++} ++ ++static __always_inline u64 vmcs_read64(unsigned long field) ++{ ++ vmcs_check64(field); ++ if (static_branch_unlikely(&enable_evmcs)) ++ return evmcs_read64(field); ++#ifdef CONFIG_X86_64 ++ return __vmcs_readl(field); ++#else ++ return __vmcs_readl(field) | ((u64)__vmcs_readl(field+1) << 32); ++#endif ++} ++ ++static __always_inline unsigned long vmcs_readl(unsigned long field) ++{ ++ vmcs_checkl(field); ++ if (static_branch_unlikely(&enable_evmcs)) ++ return evmcs_read64(field); ++ return __vmcs_readl(field); ++} ++ ++static noinline void vmwrite_error(unsigned long field, unsigned long value) ++{ ++ printk(KERN_ERR "vmwrite error: reg %lx value %lx (err %d)\n", ++ field, value, vmcs_read32(VM_INSTRUCTION_ERROR)); ++ dump_stack(); ++} ++ ++static __always_inline void __vmcs_writel(unsigned long field, unsigned long value) ++{ ++ bool error; ++ ++ asm volatile (__ex(ASM_VMX_VMWRITE_RAX_RDX) CC_SET(na) ++ : CC_OUT(na) (error) : "a"(value), "d"(field)); ++ if (unlikely(error)) ++ vmwrite_error(field, value); ++} ++ ++static __always_inline void vmcs_write16(unsigned long field, u16 value) ++{ ++ vmcs_check16(field); ++ if (static_branch_unlikely(&enable_evmcs)) ++ return evmcs_write16(field, value); ++ ++ __vmcs_writel(field, value); ++} ++ ++static __always_inline void vmcs_write32(unsigned long field, u32 value) ++{ ++ vmcs_check32(field); ++ if (static_branch_unlikely(&enable_evmcs)) ++ return evmcs_write32(field, value); ++ ++ __vmcs_writel(field, value); ++} ++ ++static __always_inline void vmcs_write64(unsigned long field, u64 value) ++{ ++ vmcs_check64(field); ++ if (static_branch_unlikely(&enable_evmcs)) ++ return evmcs_write64(field, value); ++ ++ __vmcs_writel(field, value); ++#ifndef CONFIG_X86_64 ++ asm volatile (""); ++ __vmcs_writel(field+1, value >> 32); ++#endif ++} ++ ++static __always_inline void vmcs_writel(unsigned long field, unsigned long value) ++{ ++ vmcs_checkl(field); ++ if (static_branch_unlikely(&enable_evmcs)) ++ return evmcs_write64(field, value); ++ ++ __vmcs_writel(field, value); ++} ++ ++static __always_inline void vmcs_clear_bits(unsigned long field, u32 mask) ++{ ++ BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x2000, ++ "vmcs_clear_bits does not support 64-bit fields"); ++ if (static_branch_unlikely(&enable_evmcs)) ++ return evmcs_write32(field, evmcs_read32(field) & ~mask); ++ ++ __vmcs_writel(field, __vmcs_readl(field) & ~mask); ++} ++ ++static __always_inline void vmcs_set_bits(unsigned long field, u32 mask) ++{ ++ BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x2000, ++ "vmcs_set_bits does not support 64-bit fields"); ++ if (static_branch_unlikely(&enable_evmcs)) ++ return evmcs_write32(field, evmcs_read32(field) | mask); ++ ++ __vmcs_writel(field, __vmcs_readl(field) | mask); ++} ++ ++static inline void vm_entry_controls_reset_shadow(struct vcpu_vmx *vmx) ++{ ++ vmx->vm_entry_controls_shadow = vmcs_read32(VM_ENTRY_CONTROLS); ++} ++ ++static inline void vm_entry_controls_init(struct vcpu_vmx *vmx, u32 val) ++{ ++ vmcs_write32(VM_ENTRY_CONTROLS, val); ++ vmx->vm_entry_controls_shadow = val; ++} ++ ++static inline void vm_entry_controls_set(struct vcpu_vmx *vmx, u32 val) ++{ ++ if (vmx->vm_entry_controls_shadow != val) ++ vm_entry_controls_init(vmx, val); ++} ++ ++static inline u32 vm_entry_controls_get(struct vcpu_vmx *vmx) ++{ ++ return vmx->vm_entry_controls_shadow; ++} ++ ++ ++static inline void vm_entry_controls_setbit(struct vcpu_vmx *vmx, u32 val) ++{ ++ vm_entry_controls_set(vmx, vm_entry_controls_get(vmx) | val); ++} ++ ++static inline void vm_entry_controls_clearbit(struct vcpu_vmx *vmx, u32 val) ++{ ++ vm_entry_controls_set(vmx, vm_entry_controls_get(vmx) & ~val); ++} ++ ++static inline void vm_exit_controls_reset_shadow(struct vcpu_vmx *vmx) ++{ ++ vmx->vm_exit_controls_shadow = vmcs_read32(VM_EXIT_CONTROLS); ++} ++ ++static inline void vm_exit_controls_init(struct vcpu_vmx *vmx, u32 val) ++{ ++ vmcs_write32(VM_EXIT_CONTROLS, val); ++ vmx->vm_exit_controls_shadow = val; ++} ++ ++static inline void vm_exit_controls_set(struct vcpu_vmx *vmx, u32 val) ++{ ++ if (vmx->vm_exit_controls_shadow != val) ++ vm_exit_controls_init(vmx, val); ++} ++ ++static inline u32 vm_exit_controls_get(struct vcpu_vmx *vmx) ++{ ++ return vmx->vm_exit_controls_shadow; ++} ++ ++ ++static inline void vm_exit_controls_setbit(struct vcpu_vmx *vmx, u32 val) ++{ ++ vm_exit_controls_set(vmx, vm_exit_controls_get(vmx) | val); ++} ++ ++static inline void vm_exit_controls_clearbit(struct vcpu_vmx *vmx, u32 val) ++{ ++ vm_exit_controls_set(vmx, vm_exit_controls_get(vmx) & ~val); ++} ++ ++static void vmx_segment_cache_clear(struct vcpu_vmx *vmx) ++{ ++ vmx->segment_cache.bitmask = 0; ++} ++ ++static bool vmx_segment_cache_test_set(struct vcpu_vmx *vmx, unsigned seg, ++ unsigned field) ++{ ++ bool ret; ++ u32 mask = 1 << (seg * SEG_FIELD_NR + field); ++ ++ if (!(vmx->vcpu.arch.regs_avail & (1 << VCPU_EXREG_SEGMENTS))) { ++ vmx->vcpu.arch.regs_avail |= (1 << VCPU_EXREG_SEGMENTS); ++ vmx->segment_cache.bitmask = 0; ++ } ++ ret = vmx->segment_cache.bitmask & mask; ++ vmx->segment_cache.bitmask |= mask; ++ return ret; ++} ++ ++static u16 vmx_read_guest_seg_selector(struct vcpu_vmx *vmx, unsigned seg) ++{ ++ u16 *p = &vmx->segment_cache.seg[seg].selector; ++ ++ if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_SEL)) ++ *p = vmcs_read16(kvm_vmx_segment_fields[seg].selector); ++ return *p; ++} ++ ++static ulong vmx_read_guest_seg_base(struct vcpu_vmx *vmx, unsigned seg) ++{ ++ ulong *p = &vmx->segment_cache.seg[seg].base; ++ ++ if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_BASE)) ++ *p = vmcs_readl(kvm_vmx_segment_fields[seg].base); ++ return *p; ++} ++ ++static u32 vmx_read_guest_seg_limit(struct vcpu_vmx *vmx, unsigned seg) ++{ ++ u32 *p = &vmx->segment_cache.seg[seg].limit; ++ ++ if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_LIMIT)) ++ *p = vmcs_read32(kvm_vmx_segment_fields[seg].limit); ++ return *p; ++} ++ ++static u32 vmx_read_guest_seg_ar(struct vcpu_vmx *vmx, unsigned seg) ++{ ++ u32 *p = &vmx->segment_cache.seg[seg].ar; ++ ++ if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_AR)) ++ *p = vmcs_read32(kvm_vmx_segment_fields[seg].ar_bytes); ++ return *p; ++} ++ ++static void update_exception_bitmap(struct kvm_vcpu *vcpu) ++{ ++ u32 eb; ++ ++ eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) | ++ (1u << DB_VECTOR) | (1u << AC_VECTOR); ++ /* ++ * Guest access to VMware backdoor ports could legitimately ++ * trigger #GP because of TSS I/O permission bitmap. ++ * We intercept those #GP and allow access to them anyway ++ * as VMware does. ++ */ ++ if (enable_vmware_backdoor) ++ eb |= (1u << GP_VECTOR); ++ if ((vcpu->guest_debug & ++ (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) == ++ (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) ++ eb |= 1u << BP_VECTOR; ++ if (to_vmx(vcpu)->rmode.vm86_active) ++ eb = ~0; ++ if (enable_ept) ++ eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */ ++ ++ /* When we are running a nested L2 guest and L1 specified for it a ++ * certain exception bitmap, we must trap the same exceptions and pass ++ * them to L1. When running L2, we will only handle the exceptions ++ * specified above if L1 did not want them. ++ */ ++ if (is_guest_mode(vcpu)) ++ eb |= get_vmcs12(vcpu)->exception_bitmap; ++ ++ vmcs_write32(EXCEPTION_BITMAP, eb); ++} ++ ++/* ++ * Check if MSR is intercepted for currently loaded MSR bitmap. ++ */ ++static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr) ++{ ++ unsigned long *msr_bitmap; ++ int f = sizeof(unsigned long); ++ ++ if (!cpu_has_vmx_msr_bitmap()) ++ return true; ++ ++ msr_bitmap = to_vmx(vcpu)->loaded_vmcs->msr_bitmap; ++ ++ if (msr <= 0x1fff) { ++ return !!test_bit(msr, msr_bitmap + 0x800 / f); ++ } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { ++ msr &= 0x1fff; ++ return !!test_bit(msr, msr_bitmap + 0xc00 / f); ++ } ++ ++ return true; ++} ++ ++/* ++ * Check if MSR is intercepted for L01 MSR bitmap. ++ */ ++static bool msr_write_intercepted_l01(struct kvm_vcpu *vcpu, u32 msr) ++{ ++ unsigned long *msr_bitmap; ++ int f = sizeof(unsigned long); ++ ++ if (!cpu_has_vmx_msr_bitmap()) ++ return true; ++ ++ msr_bitmap = to_vmx(vcpu)->vmcs01.msr_bitmap; ++ ++ if (msr <= 0x1fff) { ++ return !!test_bit(msr, msr_bitmap + 0x800 / f); ++ } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { ++ msr &= 0x1fff; ++ return !!test_bit(msr, msr_bitmap + 0xc00 / f); ++ } ++ ++ return true; ++} ++ ++static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx, ++ unsigned long entry, unsigned long exit) ++{ ++ vm_entry_controls_clearbit(vmx, entry); ++ vm_exit_controls_clearbit(vmx, exit); ++} ++ ++static int find_msr(struct vmx_msrs *m, unsigned int msr) ++{ ++ unsigned int i; ++ ++ for (i = 0; i < m->nr; ++i) { ++ if (m->val[i].index == msr) ++ return i; ++ } ++ return -ENOENT; ++} ++ ++static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr) ++{ ++ int i; ++ struct msr_autoload *m = &vmx->msr_autoload; ++ ++ switch (msr) { ++ case MSR_EFER: ++ if (cpu_has_load_ia32_efer) { ++ clear_atomic_switch_msr_special(vmx, ++ VM_ENTRY_LOAD_IA32_EFER, ++ VM_EXIT_LOAD_IA32_EFER); ++ return; ++ } ++ break; ++ case MSR_CORE_PERF_GLOBAL_CTRL: ++ if (cpu_has_load_perf_global_ctrl) { ++ clear_atomic_switch_msr_special(vmx, ++ VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL, ++ VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL); ++ return; ++ } ++ break; ++ } ++ i = find_msr(&m->guest, msr); ++ if (i < 0) ++ goto skip_guest; ++ --m->guest.nr; ++ m->guest.val[i] = m->guest.val[m->guest.nr]; ++ vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->guest.nr); ++ ++skip_guest: ++ i = find_msr(&m->host, msr); ++ if (i < 0) ++ return; ++ ++ --m->host.nr; ++ m->host.val[i] = m->host.val[m->host.nr]; ++ vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->host.nr); ++} ++ ++static void add_atomic_switch_msr_special(struct vcpu_vmx *vmx, ++ unsigned long entry, unsigned long exit, ++ unsigned long guest_val_vmcs, unsigned long host_val_vmcs, ++ u64 guest_val, u64 host_val) ++{ ++ vmcs_write64(guest_val_vmcs, guest_val); ++ vmcs_write64(host_val_vmcs, host_val); ++ vm_entry_controls_setbit(vmx, entry); ++ vm_exit_controls_setbit(vmx, exit); ++} ++ ++static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, ++ u64 guest_val, u64 host_val, bool entry_only) ++{ ++ int i, j = 0; ++ struct msr_autoload *m = &vmx->msr_autoload; ++ ++ switch (msr) { ++ case MSR_EFER: ++ if (cpu_has_load_ia32_efer) { ++ add_atomic_switch_msr_special(vmx, ++ VM_ENTRY_LOAD_IA32_EFER, ++ VM_EXIT_LOAD_IA32_EFER, ++ GUEST_IA32_EFER, ++ HOST_IA32_EFER, ++ guest_val, host_val); ++ return; ++ } ++ break; ++ case MSR_CORE_PERF_GLOBAL_CTRL: ++ if (cpu_has_load_perf_global_ctrl) { ++ add_atomic_switch_msr_special(vmx, ++ VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL, ++ VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL, ++ GUEST_IA32_PERF_GLOBAL_CTRL, ++ HOST_IA32_PERF_GLOBAL_CTRL, ++ guest_val, host_val); ++ return; ++ } ++ break; ++ case MSR_IA32_PEBS_ENABLE: ++ /* PEBS needs a quiescent period after being disabled (to write ++ * a record). Disabling PEBS through VMX MSR swapping doesn't ++ * provide that period, so a CPU could write host's record into ++ * guest's memory. ++ */ ++ wrmsrl(MSR_IA32_PEBS_ENABLE, 0); ++ } ++ ++ i = find_msr(&m->guest, msr); ++ if (!entry_only) ++ j = find_msr(&m->host, msr); ++ ++ if ((i < 0 && m->guest.nr == NR_AUTOLOAD_MSRS) || ++ (j < 0 && m->host.nr == NR_AUTOLOAD_MSRS)) { ++ printk_once(KERN_WARNING "Not enough msr switch entries. " ++ "Can't add msr %x\n", msr); ++ return; ++ } ++ if (i < 0) { ++ i = m->guest.nr++; ++ vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->guest.nr); ++ } ++ m->guest.val[i].index = msr; ++ m->guest.val[i].value = guest_val; ++ ++ if (entry_only) ++ return; ++ ++ if (j < 0) { ++ j = m->host.nr++; ++ vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->host.nr); ++ } ++ m->host.val[j].index = msr; ++ m->host.val[j].value = host_val; ++} ++ ++static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) ++{ ++ u64 guest_efer = vmx->vcpu.arch.efer; ++ u64 ignore_bits = 0; ++ ++ /* Shadow paging assumes NX to be available. */ ++ if (!enable_ept) ++ guest_efer |= EFER_NX; ++ ++ /* ++ * LMA and LME handled by hardware; SCE meaningless outside long mode. ++ */ ++ ignore_bits |= EFER_SCE; ++#ifdef CONFIG_X86_64 ++ ignore_bits |= EFER_LMA | EFER_LME; ++ /* SCE is meaningful only in long mode on Intel */ ++ if (guest_efer & EFER_LMA) ++ ignore_bits &= ~(u64)EFER_SCE; ++#endif ++ ++ clear_atomic_switch_msr(vmx, MSR_EFER); ++ ++ /* ++ * On EPT, we can't emulate NX, so we must switch EFER atomically. ++ * On CPUs that support "load IA32_EFER", always switch EFER ++ * atomically, since it's faster than switching it manually. ++ */ ++ if (cpu_has_load_ia32_efer || ++ (enable_ept && ((vmx->vcpu.arch.efer ^ host_efer) & EFER_NX))) { ++ if (!(guest_efer & EFER_LMA)) ++ guest_efer &= ~EFER_LME; ++ if (guest_efer != host_efer) ++ add_atomic_switch_msr(vmx, MSR_EFER, ++ guest_efer, host_efer, false); ++ return false; ++ } else { ++ guest_efer &= ~ignore_bits; ++ guest_efer |= host_efer & ignore_bits; ++ ++ vmx->guest_msrs[efer_offset].data = guest_efer; ++ vmx->guest_msrs[efer_offset].mask = ~ignore_bits; ++ ++ return true; ++ } ++} ++ ++#ifdef CONFIG_X86_32 ++/* ++ * On 32-bit kernels, VM exits still load the FS and GS bases from the ++ * VMCS rather than the segment table. KVM uses this helper to figure ++ * out the current bases to poke them into the VMCS before entry. ++ */ ++static unsigned long segment_base(u16 selector) ++{ ++ struct desc_struct *table; ++ unsigned long v; ++ ++ if (!(selector & ~SEGMENT_RPL_MASK)) ++ return 0; ++ ++ table = get_current_gdt_ro(); ++ ++ if ((selector & SEGMENT_TI_MASK) == SEGMENT_LDT) { ++ u16 ldt_selector = kvm_read_ldt(); ++ ++ if (!(ldt_selector & ~SEGMENT_RPL_MASK)) ++ return 0; ++ ++ table = (struct desc_struct *)segment_base(ldt_selector); ++ } ++ v = get_desc_base(&table[selector >> 3]); ++ return v; ++} ++#endif ++ ++static void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) ++{ ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ struct vmcs_host_state *host_state; ++#ifdef CONFIG_X86_64 ++ int cpu = raw_smp_processor_id(); ++#endif ++ unsigned long fs_base, gs_base; ++ u16 fs_sel, gs_sel; ++ int i; ++ ++ vmx->req_immediate_exit = false; ++ ++ /* ++ * Note that guest MSRs to be saved/restored can also be changed ++ * when guest state is loaded. This happens when guest transitions ++ * to/from long-mode by setting MSR_EFER.LMA. ++ */ ++ if (!vmx->loaded_cpu_state || vmx->guest_msrs_dirty) { ++ vmx->guest_msrs_dirty = false; ++ for (i = 0; i < vmx->save_nmsrs; ++i) ++ kvm_set_shared_msr(vmx->guest_msrs[i].index, ++ vmx->guest_msrs[i].data, ++ vmx->guest_msrs[i].mask); ++ ++ } ++ ++ if (vmx->loaded_cpu_state) ++ return; ++ ++ vmx->loaded_cpu_state = vmx->loaded_vmcs; ++ host_state = &vmx->loaded_cpu_state->host_state; ++ ++ /* ++ * Set host fs and gs selectors. Unfortunately, 22.2.3 does not ++ * allow segment selectors with cpl > 0 or ti == 1. ++ */ ++ host_state->ldt_sel = kvm_read_ldt(); ++ ++#ifdef CONFIG_X86_64 ++ savesegment(ds, host_state->ds_sel); ++ savesegment(es, host_state->es_sel); ++ ++ gs_base = cpu_kernelmode_gs_base(cpu); ++ if (likely(is_64bit_mm(current->mm))) { ++ save_fsgs_for_kvm(); ++ fs_sel = current->thread.fsindex; ++ gs_sel = current->thread.gsindex; ++ fs_base = current->thread.fsbase; ++ vmx->msr_host_kernel_gs_base = current->thread.gsbase; ++ } else { ++ savesegment(fs, fs_sel); ++ savesegment(gs, gs_sel); ++ fs_base = read_msr(MSR_FS_BASE); ++ vmx->msr_host_kernel_gs_base = read_msr(MSR_KERNEL_GS_BASE); ++ } ++ ++ wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); ++#else ++ savesegment(fs, fs_sel); ++ savesegment(gs, gs_sel); ++ fs_base = segment_base(fs_sel); ++ gs_base = segment_base(gs_sel); ++#endif ++ ++ if (unlikely(fs_sel != host_state->fs_sel)) { ++ if (!(fs_sel & 7)) ++ vmcs_write16(HOST_FS_SELECTOR, fs_sel); ++ else ++ vmcs_write16(HOST_FS_SELECTOR, 0); ++ host_state->fs_sel = fs_sel; ++ } ++ if (unlikely(gs_sel != host_state->gs_sel)) { ++ if (!(gs_sel & 7)) ++ vmcs_write16(HOST_GS_SELECTOR, gs_sel); ++ else ++ vmcs_write16(HOST_GS_SELECTOR, 0); ++ host_state->gs_sel = gs_sel; ++ } ++ if (unlikely(fs_base != host_state->fs_base)) { ++ vmcs_writel(HOST_FS_BASE, fs_base); ++ host_state->fs_base = fs_base; ++ } ++ if (unlikely(gs_base != host_state->gs_base)) { ++ vmcs_writel(HOST_GS_BASE, gs_base); ++ host_state->gs_base = gs_base; ++ } ++} ++ ++static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx) ++{ ++ struct vmcs_host_state *host_state; ++ ++ if (!vmx->loaded_cpu_state) ++ return; ++ ++ WARN_ON_ONCE(vmx->loaded_cpu_state != vmx->loaded_vmcs); ++ host_state = &vmx->loaded_cpu_state->host_state; ++ ++ ++vmx->vcpu.stat.host_state_reload; ++ vmx->loaded_cpu_state = NULL; ++ ++#ifdef CONFIG_X86_64 ++ rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); ++#endif ++ if (host_state->ldt_sel || (host_state->gs_sel & 7)) { ++ kvm_load_ldt(host_state->ldt_sel); ++#ifdef CONFIG_X86_64 ++ load_gs_index(host_state->gs_sel); ++#else ++ loadsegment(gs, host_state->gs_sel); ++#endif ++ } ++ if (host_state->fs_sel & 7) ++ loadsegment(fs, host_state->fs_sel); ++#ifdef CONFIG_X86_64 ++ if (unlikely(host_state->ds_sel | host_state->es_sel)) { ++ loadsegment(ds, host_state->ds_sel); ++ loadsegment(es, host_state->es_sel); ++ } ++#endif ++ invalidate_tss_limit(); ++#ifdef CONFIG_X86_64 ++ wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); ++#endif ++ load_fixmap_gdt(raw_smp_processor_id()); ++} ++ ++#ifdef CONFIG_X86_64 ++static u64 vmx_read_guest_kernel_gs_base(struct vcpu_vmx *vmx) ++{ ++ preempt_disable(); ++ if (vmx->loaded_cpu_state) ++ rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); ++ preempt_enable(); ++ return vmx->msr_guest_kernel_gs_base; ++} ++ ++static void vmx_write_guest_kernel_gs_base(struct vcpu_vmx *vmx, u64 data) ++{ ++ preempt_disable(); ++ if (vmx->loaded_cpu_state) ++ wrmsrl(MSR_KERNEL_GS_BASE, data); ++ preempt_enable(); ++ vmx->msr_guest_kernel_gs_base = data; ++} ++#endif ++ ++static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu) ++{ ++ struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); ++ struct pi_desc old, new; ++ unsigned int dest; ++ ++ /* ++ * In case of hot-plug or hot-unplug, we may have to undo ++ * vmx_vcpu_pi_put even if there is no assigned device. And we ++ * always keep PI.NDST up to date for simplicity: it makes the ++ * code easier, and CPU migration is not a fast path. ++ */ ++ if (!pi_test_sn(pi_desc) && vcpu->cpu == cpu) ++ return; ++ ++ /* ++ * First handle the simple case where no cmpxchg is necessary; just ++ * allow posting non-urgent interrupts. ++ * ++ * If the 'nv' field is POSTED_INTR_WAKEUP_VECTOR, do not change ++ * PI.NDST: pi_post_block will do it for us and the wakeup_handler ++ * expects the VCPU to be on the blocked_vcpu_list that matches ++ * PI.NDST. ++ */ ++ if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR || ++ vcpu->cpu == cpu) { ++ pi_clear_sn(pi_desc); ++ return; ++ } ++ ++ /* The full case. */ ++ do { ++ old.control = new.control = pi_desc->control; ++ ++ dest = cpu_physical_id(cpu); ++ ++ if (x2apic_enabled()) ++ new.ndst = dest; ++ else ++ new.ndst = (dest << 8) & 0xFF00; ++ ++ new.sn = 0; ++ } while (cmpxchg64(&pi_desc->control, old.control, ++ new.control) != old.control); ++} ++ ++static void decache_tsc_multiplier(struct vcpu_vmx *vmx) ++{ ++ vmx->current_tsc_ratio = vmx->vcpu.arch.tsc_scaling_ratio; ++ vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio); ++} ++ ++/* ++ * Switches to specified vcpu, until a matching vcpu_put(), but assumes ++ * vcpu mutex is already taken. ++ */ ++static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) ++{ ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ bool already_loaded = vmx->loaded_vmcs->cpu == cpu; ++ ++ if (!already_loaded) { ++ loaded_vmcs_clear(vmx->loaded_vmcs); ++ local_irq_disable(); ++ crash_disable_local_vmclear(cpu); ++ ++ /* ++ * Read loaded_vmcs->cpu should be before fetching ++ * loaded_vmcs->loaded_vmcss_on_cpu_link. ++ * See the comments in __loaded_vmcs_clear(). ++ */ ++ smp_rmb(); ++ ++ list_add(&vmx->loaded_vmcs->loaded_vmcss_on_cpu_link, ++ &per_cpu(loaded_vmcss_on_cpu, cpu)); ++ crash_enable_local_vmclear(cpu); ++ local_irq_enable(); ++ } ++ ++ if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) { ++ per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs; ++ vmcs_load(vmx->loaded_vmcs->vmcs); ++ indirect_branch_prediction_barrier(); ++ } ++ ++ if (!already_loaded) { ++ void *gdt = get_current_gdt_ro(); ++ unsigned long sysenter_esp; ++ ++ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); ++ ++ /* ++ * Linux uses per-cpu TSS and GDT, so set these when switching ++ * processors. See 22.2.4. ++ */ ++ vmcs_writel(HOST_TR_BASE, ++ (unsigned long)&get_cpu_entry_area(cpu)->tss.x86_tss); ++ vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt); /* 22.2.4 */ ++ ++ /* ++ * VM exits change the host TR limit to 0x67 after a VM ++ * exit. This is okay, since 0x67 covers everything except ++ * the IO bitmap and have have code to handle the IO bitmap ++ * being lost after a VM exit. ++ */ ++ BUILD_BUG_ON(IO_BITMAP_OFFSET - 1 != 0x67); ++ ++ rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp); ++ vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */ ++ ++ vmx->loaded_vmcs->cpu = cpu; ++ } ++ ++ /* Setup TSC multiplier */ ++ if (kvm_has_tsc_control && ++ vmx->current_tsc_ratio != vcpu->arch.tsc_scaling_ratio) ++ decache_tsc_multiplier(vmx); ++ ++ vmx_vcpu_pi_load(vcpu, cpu); ++ vmx->host_pkru = read_pkru(); ++ vmx->host_debugctlmsr = get_debugctlmsr(); ++} ++ ++static void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu) ++{ ++ struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); ++ ++ if (!kvm_arch_has_assigned_device(vcpu->kvm) || ++ !irq_remapping_cap(IRQ_POSTING_CAP) || ++ !kvm_vcpu_apicv_active(vcpu)) ++ return; ++ ++ /* Set SN when the vCPU is preempted */ ++ if (vcpu->preempted) ++ pi_set_sn(pi_desc); ++} ++ ++static void vmx_vcpu_put(struct kvm_vcpu *vcpu) ++{ ++ vmx_vcpu_pi_put(vcpu); ++ ++ vmx_prepare_switch_to_host(to_vmx(vcpu)); ++} ++ ++static bool emulation_required(struct kvm_vcpu *vcpu) ++{ ++ return emulate_invalid_guest_state && !guest_state_valid(vcpu); ++} ++ ++static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu); ++ ++/* ++ * Return the cr0 value that a nested guest would read. This is a combination ++ * of the real cr0 used to run the guest (guest_cr0), and the bits shadowed by ++ * its hypervisor (cr0_read_shadow). ++ */ ++static inline unsigned long nested_read_cr0(struct vmcs12 *fields) ++{ ++ return (fields->guest_cr0 & ~fields->cr0_guest_host_mask) | ++ (fields->cr0_read_shadow & fields->cr0_guest_host_mask); ++} ++static inline unsigned long nested_read_cr4(struct vmcs12 *fields) ++{ ++ return (fields->guest_cr4 & ~fields->cr4_guest_host_mask) | ++ (fields->cr4_read_shadow & fields->cr4_guest_host_mask); ++} ++ ++static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) ++{ ++ unsigned long rflags, save_rflags; ++ ++ if (!test_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail)) { ++ __set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail); ++ rflags = vmcs_readl(GUEST_RFLAGS); ++ if (to_vmx(vcpu)->rmode.vm86_active) { ++ rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS; ++ save_rflags = to_vmx(vcpu)->rmode.save_rflags; ++ rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS; ++ } ++ to_vmx(vcpu)->rflags = rflags; ++ } ++ return to_vmx(vcpu)->rflags; ++} ++ ++static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) ++{ ++ unsigned long old_rflags = vmx_get_rflags(vcpu); ++ ++ __set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail); ++ to_vmx(vcpu)->rflags = rflags; ++ if (to_vmx(vcpu)->rmode.vm86_active) { ++ to_vmx(vcpu)->rmode.save_rflags = rflags; ++ rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; ++ } ++ vmcs_writel(GUEST_RFLAGS, rflags); ++ ++ if ((old_rflags ^ to_vmx(vcpu)->rflags) & X86_EFLAGS_VM) ++ to_vmx(vcpu)->emulation_required = emulation_required(vcpu); ++} ++ ++static u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu) ++{ ++ u32 interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); ++ int ret = 0; ++ ++ if (interruptibility & GUEST_INTR_STATE_STI) ++ ret |= KVM_X86_SHADOW_INT_STI; ++ if (interruptibility & GUEST_INTR_STATE_MOV_SS) ++ ret |= KVM_X86_SHADOW_INT_MOV_SS; ++ ++ return ret; ++} ++ ++static void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) ++{ ++ u32 interruptibility_old = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); ++ u32 interruptibility = interruptibility_old; ++ ++ interruptibility &= ~(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS); ++ ++ if (mask & KVM_X86_SHADOW_INT_MOV_SS) ++ interruptibility |= GUEST_INTR_STATE_MOV_SS; ++ else if (mask & KVM_X86_SHADOW_INT_STI) ++ interruptibility |= GUEST_INTR_STATE_STI; ++ ++ if ((interruptibility != interruptibility_old)) ++ vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, interruptibility); ++} ++ ++static void skip_emulated_instruction(struct kvm_vcpu *vcpu) ++{ ++ unsigned long rip; ++ ++ rip = kvm_rip_read(vcpu); ++ rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN); ++ kvm_rip_write(vcpu, rip); ++ ++ /* skipping an emulated instruction also counts */ ++ vmx_set_interrupt_shadow(vcpu, 0); ++} ++ ++static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu, ++ unsigned long exit_qual) ++{ ++ struct vmcs12 *vmcs12 = get_vmcs12(vcpu); ++ unsigned int nr = vcpu->arch.exception.nr; ++ u32 intr_info = nr | INTR_INFO_VALID_MASK; ++ ++ if (vcpu->arch.exception.has_error_code) { ++ vmcs12->vm_exit_intr_error_code = vcpu->arch.exception.error_code; ++ intr_info |= INTR_INFO_DELIVER_CODE_MASK; ++ } ++ ++ if (kvm_exception_is_soft(nr)) ++ intr_info |= INTR_TYPE_SOFT_EXCEPTION; ++ else ++ intr_info |= INTR_TYPE_HARD_EXCEPTION; ++ ++ if (!(vmcs12->idt_vectoring_info_field & VECTORING_INFO_VALID_MASK) && ++ vmx_get_nmi_mask(vcpu)) ++ intr_info |= INTR_INFO_UNBLOCK_NMI; ++ ++ nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, intr_info, exit_qual); ++} ++ ++/* ++ * KVM wants to inject page-faults which it got to the guest. This function ++ * checks whether in a nested guest, we need to inject them to L1 or L2. ++ */ ++static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned long *exit_qual) ++{ ++ struct vmcs12 *vmcs12 = get_vmcs12(vcpu); ++ unsigned int nr = vcpu->arch.exception.nr; ++ ++ if (nr == PF_VECTOR) { ++ if (vcpu->arch.exception.nested_apf) { ++ *exit_qual = vcpu->arch.apf.nested_apf_token; ++ return 1; ++ } ++ /* ++ * FIXME: we must not write CR2 when L1 intercepts an L2 #PF exception. ++ * The fix is to add the ancillary datum (CR2 or DR6) to structs ++ * kvm_queued_exception and kvm_vcpu_events, so that CR2 and DR6 ++ * can be written only when inject_pending_event runs. This should be ++ * conditional on a new capability---if the capability is disabled, ++ * kvm_multiple_exception would write the ancillary information to ++ * CR2 or DR6, for backwards ABI-compatibility. ++ */ ++ if (nested_vmx_is_page_fault_vmexit(vmcs12, ++ vcpu->arch.exception.error_code)) { ++ *exit_qual = vcpu->arch.cr2; ++ return 1; ++ } ++ } else { ++ if (vmcs12->exception_bitmap & (1u << nr)) { ++ if (nr == DB_VECTOR) { ++ *exit_qual = vcpu->arch.dr6; ++ *exit_qual &= ~(DR6_FIXED_1 | DR6_BT); ++ *exit_qual ^= DR6_RTM; ++ } else { ++ *exit_qual = 0; ++ } ++ return 1; ++ } ++ } ++ ++ return 0; ++} ++ ++static void vmx_clear_hlt(struct kvm_vcpu *vcpu) ++{ ++ /* ++ * Ensure that we clear the HLT state in the VMCS. We don't need to ++ * explicitly skip the instruction because if the HLT state is set, ++ * then the instruction is already executing and RIP has already been ++ * advanced. ++ */ ++ if (kvm_hlt_in_guest(vcpu->kvm) && ++ vmcs_read32(GUEST_ACTIVITY_STATE) == GUEST_ACTIVITY_HLT) ++ vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE); ++} ++ ++static void vmx_queue_exception(struct kvm_vcpu *vcpu) ++{ ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ unsigned nr = vcpu->arch.exception.nr; ++ bool has_error_code = vcpu->arch.exception.has_error_code; ++ u32 error_code = vcpu->arch.exception.error_code; ++ u32 intr_info = nr | INTR_INFO_VALID_MASK; ++ ++ if (has_error_code) { ++ vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code); ++ intr_info |= INTR_INFO_DELIVER_CODE_MASK; ++ } ++ ++ if (vmx->rmode.vm86_active) { ++ int inc_eip = 0; ++ if (kvm_exception_is_soft(nr)) ++ inc_eip = vcpu->arch.event_exit_inst_len; ++ if (kvm_inject_realmode_interrupt(vcpu, nr, inc_eip) != EMULATE_DONE) ++ kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); ++ return; ++ } ++ ++ WARN_ON_ONCE(vmx->emulation_required); ++ ++ if (kvm_exception_is_soft(nr)) { ++ vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, ++ vmx->vcpu.arch.event_exit_inst_len); ++ intr_info |= INTR_TYPE_SOFT_EXCEPTION; ++ } else ++ intr_info |= INTR_TYPE_HARD_EXCEPTION; ++ ++ vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info); ++ ++ vmx_clear_hlt(vcpu); ++} ++ ++static bool vmx_rdtscp_supported(void) ++{ ++ return cpu_has_vmx_rdtscp(); ++} ++ ++static bool vmx_invpcid_supported(void) ++{ ++ return cpu_has_vmx_invpcid(); ++} ++ ++/* ++ * Swap MSR entry in host/guest MSR entry array. ++ */ ++static void move_msr_up(struct vcpu_vmx *vmx, int from, int to) ++{ ++ struct shared_msr_entry tmp; ++ ++ tmp = vmx->guest_msrs[to]; ++ vmx->guest_msrs[to] = vmx->guest_msrs[from]; ++ vmx->guest_msrs[from] = tmp; ++} ++ ++/* ++ * Set up the vmcs to automatically save and restore system ++ * msrs. Don't touch the 64-bit msrs if the guest is in legacy ++ * mode, as fiddling with msrs is very expensive. ++ */ ++static void setup_msrs(struct vcpu_vmx *vmx) ++{ ++ int save_nmsrs, index; ++ ++ save_nmsrs = 0; ++#ifdef CONFIG_X86_64 ++ if (is_long_mode(&vmx->vcpu)) { ++ index = __find_msr_index(vmx, MSR_SYSCALL_MASK); ++ if (index >= 0) ++ move_msr_up(vmx, index, save_nmsrs++); ++ index = __find_msr_index(vmx, MSR_LSTAR); ++ if (index >= 0) ++ move_msr_up(vmx, index, save_nmsrs++); ++ index = __find_msr_index(vmx, MSR_CSTAR); ++ if (index >= 0) ++ move_msr_up(vmx, index, save_nmsrs++); ++ /* ++ * MSR_STAR is only needed on long mode guests, and only ++ * if efer.sce is enabled. ++ */ ++ index = __find_msr_index(vmx, MSR_STAR); ++ if ((index >= 0) && (vmx->vcpu.arch.efer & EFER_SCE)) ++ move_msr_up(vmx, index, save_nmsrs++); ++ } ++#endif ++ index = __find_msr_index(vmx, MSR_EFER); ++ if (index >= 0 && update_transition_efer(vmx, index)) ++ move_msr_up(vmx, index, save_nmsrs++); ++ index = __find_msr_index(vmx, MSR_TSC_AUX); ++ if (index >= 0 && guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP)) ++ move_msr_up(vmx, index, save_nmsrs++); ++ ++ vmx->save_nmsrs = save_nmsrs; ++ vmx->guest_msrs_dirty = true; ++ ++ if (cpu_has_vmx_msr_bitmap()) ++ vmx_update_msr_bitmap(&vmx->vcpu); ++} ++ ++static u64 vmx_read_l1_tsc_offset(struct kvm_vcpu *vcpu) ++{ ++ struct vmcs12 *vmcs12 = get_vmcs12(vcpu); ++ ++ if (is_guest_mode(vcpu) && ++ (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)) ++ return vcpu->arch.tsc_offset - vmcs12->tsc_offset; ++ ++ return vcpu->arch.tsc_offset; ++} ++ ++static u64 vmx_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) ++{ ++ u64 active_offset = offset; ++ if (is_guest_mode(vcpu)) { ++ /* ++ * We're here if L1 chose not to trap WRMSR to TSC. According ++ * to the spec, this should set L1's TSC; The offset that L1 ++ * set for L2 remains unchanged, and still needs to be added ++ * to the newly set TSC to get L2's TSC. ++ */ ++ struct vmcs12 *vmcs12 = get_vmcs12(vcpu); ++ if (nested_cpu_has(vmcs12, CPU_BASED_USE_TSC_OFFSETING)) ++ active_offset += vmcs12->tsc_offset; ++ } else { ++ trace_kvm_write_tsc_offset(vcpu->vcpu_id, ++ vmcs_read64(TSC_OFFSET), offset); ++ } ++ ++ vmcs_write64(TSC_OFFSET, active_offset); ++ return active_offset; ++} ++ ++/* ++ * nested_vmx_allowed() checks whether a guest should be allowed to use VMX ++ * instructions and MSRs (i.e., nested VMX). Nested VMX is disabled for ++ * all guests if the "nested" module option is off, and can also be disabled ++ * for a single guest by disabling its VMX cpuid bit. ++ */ ++static inline bool nested_vmx_allowed(struct kvm_vcpu *vcpu) ++{ ++ return nested && guest_cpuid_has(vcpu, X86_FEATURE_VMX); ++} ++ ++/* ++ * nested_vmx_setup_ctls_msrs() sets up variables containing the values to be ++ * returned for the various VMX controls MSRs when nested VMX is enabled. ++ * The same values should also be used to verify that vmcs12 control fields are ++ * valid during nested entry from L1 to L2. ++ * Each of these control msrs has a low and high 32-bit half: A low bit is on ++ * if the corresponding bit in the (32-bit) control field *must* be on, and a ++ * bit in the high half is on if the corresponding bit in the control field ++ * may be on. See also vmx_control_verify(). ++ */ ++static void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, bool apicv) ++{ ++ if (!nested) { ++ memset(msrs, 0, sizeof(*msrs)); ++ return; ++ } ++ ++ /* ++ * Note that as a general rule, the high half of the MSRs (bits in ++ * the control fields which may be 1) should be initialized by the ++ * intersection of the underlying hardware's MSR (i.e., features which ++ * can be supported) and the list of features we want to expose - ++ * because they are known to be properly supported in our code. ++ * Also, usually, the low half of the MSRs (bits which must be 1) can ++ * be set to 0, meaning that L1 may turn off any of these bits. The ++ * reason is that if one of these bits is necessary, it will appear ++ * in vmcs01 and prepare_vmcs02, when it bitwise-or's the control ++ * fields of vmcs01 and vmcs02, will turn these bits off - and ++ * nested_vmx_exit_reflected() will not pass related exits to L1. ++ * These rules have exceptions below. ++ */ ++ ++ /* pin-based controls */ ++ rdmsr(MSR_IA32_VMX_PINBASED_CTLS, ++ msrs->pinbased_ctls_low, ++ msrs->pinbased_ctls_high); ++ msrs->pinbased_ctls_low |= ++ PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR; ++ msrs->pinbased_ctls_high &= ++ PIN_BASED_EXT_INTR_MASK | ++ PIN_BASED_NMI_EXITING | ++ PIN_BASED_VIRTUAL_NMIS | ++ (apicv ? PIN_BASED_POSTED_INTR : 0); ++ msrs->pinbased_ctls_high |= ++ PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR | ++ PIN_BASED_VMX_PREEMPTION_TIMER; ++ ++ /* exit controls */ ++ rdmsr(MSR_IA32_VMX_EXIT_CTLS, ++ msrs->exit_ctls_low, ++ msrs->exit_ctls_high); ++ msrs->exit_ctls_low = ++ VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR; ++ ++ msrs->exit_ctls_high &= ++#ifdef CONFIG_X86_64 ++ VM_EXIT_HOST_ADDR_SPACE_SIZE | ++#endif ++ VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT; ++ msrs->exit_ctls_high |= ++ VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | ++ VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER | ++ VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | VM_EXIT_ACK_INTR_ON_EXIT; ++ ++ /* We support free control of debug control saving. */ ++ msrs->exit_ctls_low &= ~VM_EXIT_SAVE_DEBUG_CONTROLS; ++ ++ /* entry controls */ ++ rdmsr(MSR_IA32_VMX_ENTRY_CTLS, ++ msrs->entry_ctls_low, ++ msrs->entry_ctls_high); ++ msrs->entry_ctls_low = ++ VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR; ++ msrs->entry_ctls_high &= ++#ifdef CONFIG_X86_64 ++ VM_ENTRY_IA32E_MODE | ++#endif ++ VM_ENTRY_LOAD_IA32_PAT; ++ msrs->entry_ctls_high |= ++ (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER); ++ ++ /* We support free control of debug control loading. */ ++ msrs->entry_ctls_low &= ~VM_ENTRY_LOAD_DEBUG_CONTROLS; ++ ++ /* cpu-based controls */ ++ rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, ++ msrs->procbased_ctls_low, ++ msrs->procbased_ctls_high); ++ msrs->procbased_ctls_low = ++ CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR; ++ msrs->procbased_ctls_high &= ++ CPU_BASED_VIRTUAL_INTR_PENDING | ++ CPU_BASED_VIRTUAL_NMI_PENDING | CPU_BASED_USE_TSC_OFFSETING | ++ CPU_BASED_HLT_EXITING | CPU_BASED_INVLPG_EXITING | ++ CPU_BASED_MWAIT_EXITING | CPU_BASED_CR3_LOAD_EXITING | ++ CPU_BASED_CR3_STORE_EXITING | ++#ifdef CONFIG_X86_64 ++ CPU_BASED_CR8_LOAD_EXITING | CPU_BASED_CR8_STORE_EXITING | ++#endif ++ CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING | ++ CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_TRAP_FLAG | ++ CPU_BASED_MONITOR_EXITING | CPU_BASED_RDPMC_EXITING | ++ CPU_BASED_RDTSC_EXITING | CPU_BASED_PAUSE_EXITING | ++ CPU_BASED_TPR_SHADOW | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; ++ /* ++ * We can allow some features even when not supported by the ++ * hardware. For example, L1 can specify an MSR bitmap - and we ++ * can use it to avoid exits to L1 - even when L0 runs L2 ++ * without MSR bitmaps. ++ */ ++ msrs->procbased_ctls_high |= ++ CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR | ++ CPU_BASED_USE_MSR_BITMAPS; ++ ++ /* We support free control of CR3 access interception. */ ++ msrs->procbased_ctls_low &= ++ ~(CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_CR3_STORE_EXITING); ++ ++ /* ++ * secondary cpu-based controls. Do not include those that ++ * depend on CPUID bits, they are added later by vmx_cpuid_update. ++ */ ++ if (msrs->procbased_ctls_high & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) ++ rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2, ++ msrs->secondary_ctls_low, ++ msrs->secondary_ctls_high); ++ ++ msrs->secondary_ctls_low = 0; ++ msrs->secondary_ctls_high &= ++ SECONDARY_EXEC_DESC | ++ SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | ++ SECONDARY_EXEC_APIC_REGISTER_VIRT | ++ SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | ++ SECONDARY_EXEC_WBINVD_EXITING; ++ ++ /* ++ * We can emulate "VMCS shadowing," even if the hardware ++ * doesn't support it. ++ */ ++ msrs->secondary_ctls_high |= ++ SECONDARY_EXEC_SHADOW_VMCS; ++ ++ if (enable_ept) { ++ /* nested EPT: emulate EPT also to L1 */ ++ msrs->secondary_ctls_high |= ++ SECONDARY_EXEC_ENABLE_EPT; ++ msrs->ept_caps = VMX_EPT_PAGE_WALK_4_BIT | ++ VMX_EPTP_WB_BIT | VMX_EPT_INVEPT_BIT; ++ if (cpu_has_vmx_ept_execute_only()) ++ msrs->ept_caps |= ++ VMX_EPT_EXECUTE_ONLY_BIT; ++ msrs->ept_caps &= vmx_capability.ept; ++ msrs->ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT | ++ VMX_EPT_EXTENT_CONTEXT_BIT | VMX_EPT_2MB_PAGE_BIT | ++ VMX_EPT_1GB_PAGE_BIT; ++ if (enable_ept_ad_bits) { ++ msrs->secondary_ctls_high |= ++ SECONDARY_EXEC_ENABLE_PML; ++ msrs->ept_caps |= VMX_EPT_AD_BIT; ++ } ++ } ++ ++ if (cpu_has_vmx_vmfunc()) { ++ msrs->secondary_ctls_high |= ++ SECONDARY_EXEC_ENABLE_VMFUNC; ++ /* ++ * Advertise EPTP switching unconditionally ++ * since we emulate it ++ */ ++ if (enable_ept) ++ msrs->vmfunc_controls = ++ VMX_VMFUNC_EPTP_SWITCHING; ++ } ++ ++ /* ++ * Old versions of KVM use the single-context version without ++ * checking for support, so declare that it is supported even ++ * though it is treated as global context. The alternative is ++ * not failing the single-context invvpid, and it is worse. ++ */ ++ if (enable_vpid) { ++ msrs->secondary_ctls_high |= ++ SECONDARY_EXEC_ENABLE_VPID; ++ msrs->vpid_caps = VMX_VPID_INVVPID_BIT | ++ VMX_VPID_EXTENT_SUPPORTED_MASK; ++ } ++ ++ if (enable_unrestricted_guest) ++ msrs->secondary_ctls_high |= ++ SECONDARY_EXEC_UNRESTRICTED_GUEST; ++ ++ if (flexpriority_enabled) ++ msrs->secondary_ctls_high |= ++ SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; ++ ++ /* miscellaneous data */ ++ rdmsr(MSR_IA32_VMX_MISC, ++ msrs->misc_low, ++ msrs->misc_high); ++ msrs->misc_low &= VMX_MISC_SAVE_EFER_LMA; ++ msrs->misc_low |= ++ MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS | ++ VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE | ++ VMX_MISC_ACTIVITY_HLT; ++ msrs->misc_high = 0; ++ ++ /* ++ * This MSR reports some information about VMX support. We ++ * should return information about the VMX we emulate for the ++ * guest, and the VMCS structure we give it - not about the ++ * VMX support of the underlying hardware. ++ */ ++ msrs->basic = ++ VMCS12_REVISION | ++ VMX_BASIC_TRUE_CTLS | ++ ((u64)VMCS12_SIZE << VMX_BASIC_VMCS_SIZE_SHIFT) | ++ (VMX_BASIC_MEM_TYPE_WB << VMX_BASIC_MEM_TYPE_SHIFT); ++ ++ if (cpu_has_vmx_basic_inout()) ++ msrs->basic |= VMX_BASIC_INOUT; ++ ++ /* ++ * These MSRs specify bits which the guest must keep fixed on ++ * while L1 is in VMXON mode (in L1's root mode, or running an L2). ++ * We picked the standard core2 setting. ++ */ ++#define VMXON_CR0_ALWAYSON (X86_CR0_PE | X86_CR0_PG | X86_CR0_NE) ++#define VMXON_CR4_ALWAYSON X86_CR4_VMXE ++ msrs->cr0_fixed0 = VMXON_CR0_ALWAYSON; ++ msrs->cr4_fixed0 = VMXON_CR4_ALWAYSON; ++ ++ /* These MSRs specify bits which the guest must keep fixed off. */ ++ rdmsrl(MSR_IA32_VMX_CR0_FIXED1, msrs->cr0_fixed1); ++ rdmsrl(MSR_IA32_VMX_CR4_FIXED1, msrs->cr4_fixed1); ++ ++ /* highest index: VMX_PREEMPTION_TIMER_VALUE */ ++ msrs->vmcs_enum = VMCS12_MAX_FIELD_INDEX << 1; ++} ++ ++/* ++ * if fixed0[i] == 1: val[i] must be 1 ++ * if fixed1[i] == 0: val[i] must be 0 ++ */ ++static inline bool fixed_bits_valid(u64 val, u64 fixed0, u64 fixed1) ++{ ++ return ((val & fixed1) | fixed0) == val; ++} ++ ++static inline bool vmx_control_verify(u32 control, u32 low, u32 high) ++{ ++ return fixed_bits_valid(control, low, high); ++} ++ ++static inline u64 vmx_control_msr(u32 low, u32 high) ++{ ++ return low | ((u64)high << 32); ++} ++ ++static bool is_bitwise_subset(u64 superset, u64 subset, u64 mask) ++{ ++ superset &= mask; ++ subset &= mask; ++ ++ return (superset | subset) == superset; ++} ++ ++static int vmx_restore_vmx_basic(struct vcpu_vmx *vmx, u64 data) ++{ ++ const u64 feature_and_reserved = ++ /* feature (except bit 48; see below) */ ++ BIT_ULL(49) | BIT_ULL(54) | BIT_ULL(55) | ++ /* reserved */ ++ BIT_ULL(31) | GENMASK_ULL(47, 45) | GENMASK_ULL(63, 56); ++ u64 vmx_basic = vmx->nested.msrs.basic; ++ ++ if (!is_bitwise_subset(vmx_basic, data, feature_and_reserved)) ++ return -EINVAL; ++ ++ /* ++ * KVM does not emulate a version of VMX that constrains physical ++ * addresses of VMX structures (e.g. VMCS) to 32-bits. ++ */ ++ if (data & BIT_ULL(48)) ++ return -EINVAL; ++ ++ if (vmx_basic_vmcs_revision_id(vmx_basic) != ++ vmx_basic_vmcs_revision_id(data)) ++ return -EINVAL; ++ ++ if (vmx_basic_vmcs_size(vmx_basic) > vmx_basic_vmcs_size(data)) ++ return -EINVAL; ++ ++ vmx->nested.msrs.basic = data; ++ return 0; ++} ++ ++static int ++vmx_restore_control_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data) ++{ ++ u64 supported; ++ u32 *lowp, *highp; ++ ++ switch (msr_index) { ++ case MSR_IA32_VMX_TRUE_PINBASED_CTLS: ++ lowp = &vmx->nested.msrs.pinbased_ctls_low; ++ highp = &vmx->nested.msrs.pinbased_ctls_high; ++ break; ++ case MSR_IA32_VMX_TRUE_PROCBASED_CTLS: ++ lowp = &vmx->nested.msrs.procbased_ctls_low; ++ highp = &vmx->nested.msrs.procbased_ctls_high; ++ break; ++ case MSR_IA32_VMX_TRUE_EXIT_CTLS: ++ lowp = &vmx->nested.msrs.exit_ctls_low; ++ highp = &vmx->nested.msrs.exit_ctls_high; ++ break; ++ case MSR_IA32_VMX_TRUE_ENTRY_CTLS: ++ lowp = &vmx->nested.msrs.entry_ctls_low; ++ highp = &vmx->nested.msrs.entry_ctls_high; ++ break; ++ case MSR_IA32_VMX_PROCBASED_CTLS2: ++ lowp = &vmx->nested.msrs.secondary_ctls_low; ++ highp = &vmx->nested.msrs.secondary_ctls_high; ++ break; ++ default: ++ BUG(); ++ } ++ ++ supported = vmx_control_msr(*lowp, *highp); ++ ++ /* Check must-be-1 bits are still 1. */ ++ if (!is_bitwise_subset(data, supported, GENMASK_ULL(31, 0))) ++ return -EINVAL; ++ ++ /* Check must-be-0 bits are still 0. */ ++ if (!is_bitwise_subset(supported, data, GENMASK_ULL(63, 32))) ++ return -EINVAL; ++ ++ *lowp = data; ++ *highp = data >> 32; ++ return 0; ++} ++ ++static int vmx_restore_vmx_misc(struct vcpu_vmx *vmx, u64 data) ++{ ++ const u64 feature_and_reserved_bits = ++ /* feature */ ++ BIT_ULL(5) | GENMASK_ULL(8, 6) | BIT_ULL(14) | BIT_ULL(15) | ++ BIT_ULL(28) | BIT_ULL(29) | BIT_ULL(30) | ++ /* reserved */ ++ GENMASK_ULL(13, 9) | BIT_ULL(31); ++ u64 vmx_misc; ++ ++ vmx_misc = vmx_control_msr(vmx->nested.msrs.misc_low, ++ vmx->nested.msrs.misc_high); ++ ++ if (!is_bitwise_subset(vmx_misc, data, feature_and_reserved_bits)) ++ return -EINVAL; ++ ++ if ((vmx->nested.msrs.pinbased_ctls_high & ++ PIN_BASED_VMX_PREEMPTION_TIMER) && ++ vmx_misc_preemption_timer_rate(data) != ++ vmx_misc_preemption_timer_rate(vmx_misc)) ++ return -EINVAL; ++ ++ if (vmx_misc_cr3_count(data) > vmx_misc_cr3_count(vmx_misc)) ++ return -EINVAL; ++ ++ if (vmx_misc_max_msr(data) > vmx_misc_max_msr(vmx_misc)) ++ return -EINVAL; ++ ++ if (vmx_misc_mseg_revid(data) != vmx_misc_mseg_revid(vmx_misc)) ++ return -EINVAL; ++ ++ vmx->nested.msrs.misc_low = data; ++ vmx->nested.msrs.misc_high = data >> 32; ++ ++ /* ++ * If L1 has read-only VM-exit information fields, use the ++ * less permissive vmx_vmwrite_bitmap to specify write ++ * permissions for the shadow VMCS. ++ */ ++ if (enable_shadow_vmcs && !nested_cpu_has_vmwrite_any_field(&vmx->vcpu)) ++ vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap)); ++ ++ return 0; ++} ++ ++static int vmx_restore_vmx_ept_vpid_cap(struct vcpu_vmx *vmx, u64 data) ++{ ++ u64 vmx_ept_vpid_cap; ++ ++ vmx_ept_vpid_cap = vmx_control_msr(vmx->nested.msrs.ept_caps, ++ vmx->nested.msrs.vpid_caps); ++ ++ /* Every bit is either reserved or a feature bit. */ ++ if (!is_bitwise_subset(vmx_ept_vpid_cap, data, -1ULL)) ++ return -EINVAL; ++ ++ vmx->nested.msrs.ept_caps = data; ++ vmx->nested.msrs.vpid_caps = data >> 32; ++ return 0; ++} ++ ++static int vmx_restore_fixed0_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data) ++{ ++ u64 *msr; ++ ++ switch (msr_index) { ++ case MSR_IA32_VMX_CR0_FIXED0: ++ msr = &vmx->nested.msrs.cr0_fixed0; ++ break; ++ case MSR_IA32_VMX_CR4_FIXED0: ++ msr = &vmx->nested.msrs.cr4_fixed0; ++ break; ++ default: ++ BUG(); ++ } ++ ++ /* ++ * 1 bits (which indicates bits which "must-be-1" during VMX operation) ++ * must be 1 in the restored value. ++ */ ++ if (!is_bitwise_subset(data, *msr, -1ULL)) ++ return -EINVAL; ++ ++ *msr = data; ++ return 0; ++} ++ ++/* ++ * Called when userspace is restoring VMX MSRs. ++ * ++ * Returns 0 on success, non-0 otherwise. ++ */ ++static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) ++{ ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ ++ /* ++ * Don't allow changes to the VMX capability MSRs while the vCPU ++ * is in VMX operation. ++ */ ++ if (vmx->nested.vmxon) ++ return -EBUSY; ++ ++ switch (msr_index) { ++ case MSR_IA32_VMX_BASIC: ++ return vmx_restore_vmx_basic(vmx, data); ++ case MSR_IA32_VMX_PINBASED_CTLS: ++ case MSR_IA32_VMX_PROCBASED_CTLS: ++ case MSR_IA32_VMX_EXIT_CTLS: ++ case MSR_IA32_VMX_ENTRY_CTLS: ++ /* ++ * The "non-true" VMX capability MSRs are generated from the ++ * "true" MSRs, so we do not support restoring them directly. ++ * ++ * If userspace wants to emulate VMX_BASIC[55]=0, userspace ++ * should restore the "true" MSRs with the must-be-1 bits ++ * set according to the SDM Vol 3. A.2 "RESERVED CONTROLS AND ++ * DEFAULT SETTINGS". ++ */ ++ return -EINVAL; ++ case MSR_IA32_VMX_TRUE_PINBASED_CTLS: ++ case MSR_IA32_VMX_TRUE_PROCBASED_CTLS: ++ case MSR_IA32_VMX_TRUE_EXIT_CTLS: ++ case MSR_IA32_VMX_TRUE_ENTRY_CTLS: ++ case MSR_IA32_VMX_PROCBASED_CTLS2: ++ return vmx_restore_control_msr(vmx, msr_index, data); ++ case MSR_IA32_VMX_MISC: ++ return vmx_restore_vmx_misc(vmx, data); ++ case MSR_IA32_VMX_CR0_FIXED0: ++ case MSR_IA32_VMX_CR4_FIXED0: ++ return vmx_restore_fixed0_msr(vmx, msr_index, data); ++ case MSR_IA32_VMX_CR0_FIXED1: ++ case MSR_IA32_VMX_CR4_FIXED1: ++ /* ++ * These MSRs are generated based on the vCPU's CPUID, so we ++ * do not support restoring them directly. ++ */ ++ return -EINVAL; ++ case MSR_IA32_VMX_EPT_VPID_CAP: ++ return vmx_restore_vmx_ept_vpid_cap(vmx, data); ++ case MSR_IA32_VMX_VMCS_ENUM: ++ vmx->nested.msrs.vmcs_enum = data; ++ return 0; ++ default: ++ /* ++ * The rest of the VMX capability MSRs do not support restore. ++ */ ++ return -EINVAL; ++ } ++} ++ ++/* Returns 0 on success, non-0 otherwise. */ ++static int vmx_get_vmx_msr(struct nested_vmx_msrs *msrs, u32 msr_index, u64 *pdata) ++{ ++ switch (msr_index) { ++ case MSR_IA32_VMX_BASIC: ++ *pdata = msrs->basic; ++ break; ++ case MSR_IA32_VMX_TRUE_PINBASED_CTLS: ++ case MSR_IA32_VMX_PINBASED_CTLS: ++ *pdata = vmx_control_msr( ++ msrs->pinbased_ctls_low, ++ msrs->pinbased_ctls_high); ++ if (msr_index == MSR_IA32_VMX_PINBASED_CTLS) ++ *pdata |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR; ++ break; ++ case MSR_IA32_VMX_TRUE_PROCBASED_CTLS: ++ case MSR_IA32_VMX_PROCBASED_CTLS: ++ *pdata = vmx_control_msr( ++ msrs->procbased_ctls_low, ++ msrs->procbased_ctls_high); ++ if (msr_index == MSR_IA32_VMX_PROCBASED_CTLS) ++ *pdata |= CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR; ++ break; ++ case MSR_IA32_VMX_TRUE_EXIT_CTLS: ++ case MSR_IA32_VMX_EXIT_CTLS: ++ *pdata = vmx_control_msr( ++ msrs->exit_ctls_low, ++ msrs->exit_ctls_high); ++ if (msr_index == MSR_IA32_VMX_EXIT_CTLS) ++ *pdata |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR; ++ break; ++ case MSR_IA32_VMX_TRUE_ENTRY_CTLS: ++ case MSR_IA32_VMX_ENTRY_CTLS: ++ *pdata = vmx_control_msr( ++ msrs->entry_ctls_low, ++ msrs->entry_ctls_high); ++ if (msr_index == MSR_IA32_VMX_ENTRY_CTLS) ++ *pdata |= VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR; ++ break; ++ case MSR_IA32_VMX_MISC: ++ *pdata = vmx_control_msr( ++ msrs->misc_low, ++ msrs->misc_high); ++ break; ++ case MSR_IA32_VMX_CR0_FIXED0: ++ *pdata = msrs->cr0_fixed0; ++ break; ++ case MSR_IA32_VMX_CR0_FIXED1: ++ *pdata = msrs->cr0_fixed1; ++ break; ++ case MSR_IA32_VMX_CR4_FIXED0: ++ *pdata = msrs->cr4_fixed0; ++ break; ++ case MSR_IA32_VMX_CR4_FIXED1: ++ *pdata = msrs->cr4_fixed1; ++ break; ++ case MSR_IA32_VMX_VMCS_ENUM: ++ *pdata = msrs->vmcs_enum; ++ break; ++ case MSR_IA32_VMX_PROCBASED_CTLS2: ++ *pdata = vmx_control_msr( ++ msrs->secondary_ctls_low, ++ msrs->secondary_ctls_high); ++ break; ++ case MSR_IA32_VMX_EPT_VPID_CAP: ++ *pdata = msrs->ept_caps | ++ ((u64)msrs->vpid_caps << 32); ++ break; ++ case MSR_IA32_VMX_VMFUNC: ++ *pdata = msrs->vmfunc_controls; ++ break; ++ default: ++ return 1; ++ } ++ ++ return 0; ++} ++ ++static inline bool vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu, ++ uint64_t val) ++{ ++ uint64_t valid_bits = to_vmx(vcpu)->msr_ia32_feature_control_valid_bits; ++ ++ return !(val & ~valid_bits); ++} ++ ++static int vmx_get_msr_feature(struct kvm_msr_entry *msr) ++{ ++ switch (msr->index) { ++ case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: ++ if (!nested) ++ return 1; ++ return vmx_get_vmx_msr(&vmcs_config.nested, msr->index, &msr->data); ++ default: ++ return 1; ++ } ++ ++ return 0; ++} ++ ++/* ++ * Reads an msr value (of 'msr_index') into 'pdata'. ++ * Returns 0 on success, non-0 otherwise. ++ * Assumes vcpu_load() was already called. ++ */ ++static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) ++{ ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ struct shared_msr_entry *msr; ++ ++ switch (msr_info->index) { ++#ifdef CONFIG_X86_64 ++ case MSR_FS_BASE: ++ msr_info->data = vmcs_readl(GUEST_FS_BASE); ++ break; ++ case MSR_GS_BASE: ++ msr_info->data = vmcs_readl(GUEST_GS_BASE); ++ break; ++ case MSR_KERNEL_GS_BASE: ++ msr_info->data = vmx_read_guest_kernel_gs_base(vmx); ++ break; ++#endif ++ case MSR_EFER: ++ return kvm_get_msr_common(vcpu, msr_info); ++ case MSR_IA32_SPEC_CTRL: ++ if (!msr_info->host_initiated && ++ !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) ++ return 1; ++ ++ msr_info->data = to_vmx(vcpu)->spec_ctrl; ++ break; ++ case MSR_IA32_SYSENTER_CS: ++ msr_info->data = vmcs_read32(GUEST_SYSENTER_CS); ++ break; ++ case MSR_IA32_SYSENTER_EIP: ++ msr_info->data = vmcs_readl(GUEST_SYSENTER_EIP); ++ break; ++ case MSR_IA32_SYSENTER_ESP: ++ msr_info->data = vmcs_readl(GUEST_SYSENTER_ESP); ++ break; ++ case MSR_IA32_BNDCFGS: ++ if (!kvm_mpx_supported() || ++ (!msr_info->host_initiated && ++ !guest_cpuid_has(vcpu, X86_FEATURE_MPX))) ++ return 1; ++ msr_info->data = vmcs_read64(GUEST_BNDCFGS); ++ break; ++ case MSR_IA32_MCG_EXT_CTL: ++ if (!msr_info->host_initiated && ++ !(vmx->msr_ia32_feature_control & ++ FEATURE_CONTROL_LMCE)) ++ return 1; ++ msr_info->data = vcpu->arch.mcg_ext_ctl; ++ break; ++ case MSR_IA32_FEATURE_CONTROL: ++ msr_info->data = vmx->msr_ia32_feature_control; ++ break; ++ case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: ++ if (!nested_vmx_allowed(vcpu)) ++ return 1; ++ return vmx_get_vmx_msr(&vmx->nested.msrs, msr_info->index, ++ &msr_info->data); ++ case MSR_IA32_XSS: ++ if (!vmx_xsaves_supported() || ++ (!msr_info->host_initiated && ++ !(guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && ++ guest_cpuid_has(vcpu, X86_FEATURE_XSAVES)))) ++ return 1; ++ msr_info->data = vcpu->arch.ia32_xss; ++ break; ++ case MSR_TSC_AUX: ++ if (!msr_info->host_initiated && ++ !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP)) ++ return 1; ++ /* Otherwise falls through */ ++ default: ++ msr = find_msr_entry(vmx, msr_info->index); ++ if (msr) { ++ msr_info->data = msr->data; ++ break; ++ } ++ return kvm_get_msr_common(vcpu, msr_info); ++ } ++ ++ return 0; ++} ++ ++static void vmx_leave_nested(struct kvm_vcpu *vcpu); ++ ++/* ++ * Writes msr value into into the appropriate "register". ++ * Returns 0 on success, non-0 otherwise. ++ * Assumes vcpu_load() was already called. ++ */ ++static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) ++{ ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ struct shared_msr_entry *msr; ++ int ret = 0; ++ u32 msr_index = msr_info->index; ++ u64 data = msr_info->data; ++ ++ switch (msr_index) { ++ case MSR_EFER: ++ ret = kvm_set_msr_common(vcpu, msr_info); ++ break; ++#ifdef CONFIG_X86_64 ++ case MSR_FS_BASE: ++ vmx_segment_cache_clear(vmx); ++ vmcs_writel(GUEST_FS_BASE, data); ++ break; ++ case MSR_GS_BASE: ++ vmx_segment_cache_clear(vmx); ++ vmcs_writel(GUEST_GS_BASE, data); ++ break; ++ case MSR_KERNEL_GS_BASE: ++ vmx_write_guest_kernel_gs_base(vmx, data); ++ break; ++#endif ++ case MSR_IA32_SYSENTER_CS: ++ vmcs_write32(GUEST_SYSENTER_CS, data); ++ break; ++ case MSR_IA32_SYSENTER_EIP: ++ vmcs_writel(GUEST_SYSENTER_EIP, data); ++ break; ++ case MSR_IA32_SYSENTER_ESP: ++ vmcs_writel(GUEST_SYSENTER_ESP, data); ++ break; ++ case MSR_IA32_BNDCFGS: ++ if (!kvm_mpx_supported() || ++ (!msr_info->host_initiated && ++ !guest_cpuid_has(vcpu, X86_FEATURE_MPX))) ++ return 1; ++ if (is_noncanonical_address(data & PAGE_MASK, vcpu) || ++ (data & MSR_IA32_BNDCFGS_RSVD)) ++ return 1; ++ vmcs_write64(GUEST_BNDCFGS, data); ++ break; ++ case MSR_IA32_SPEC_CTRL: ++ if (!msr_info->host_initiated && ++ !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) ++ return 1; ++ ++ /* The STIBP bit doesn't fault even if it's not advertised */ ++ if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD)) ++ return 1; ++ ++ vmx->spec_ctrl = data; ++ ++ if (!data) ++ break; ++ ++ /* ++ * For non-nested: ++ * When it's written (to non-zero) for the first time, pass ++ * it through. ++ * ++ * For nested: ++ * The handling of the MSR bitmap for L2 guests is done in ++ * nested_vmx_merge_msr_bitmap. We should not touch the ++ * vmcs02.msr_bitmap here since it gets completely overwritten ++ * in the merging. We update the vmcs01 here for L1 as well ++ * since it will end up touching the MSR anyway now. ++ */ ++ vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, ++ MSR_IA32_SPEC_CTRL, ++ MSR_TYPE_RW); ++ break; ++ case MSR_IA32_PRED_CMD: ++ if (!msr_info->host_initiated && ++ !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) ++ return 1; ++ ++ if (data & ~PRED_CMD_IBPB) ++ return 1; ++ ++ if (!data) ++ break; ++ ++ wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB); ++ ++ /* ++ * For non-nested: ++ * When it's written (to non-zero) for the first time, pass ++ * it through. ++ * ++ * For nested: ++ * The handling of the MSR bitmap for L2 guests is done in ++ * nested_vmx_merge_msr_bitmap. We should not touch the ++ * vmcs02.msr_bitmap here since it gets completely overwritten ++ * in the merging. ++ */ ++ vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, MSR_IA32_PRED_CMD, ++ MSR_TYPE_W); ++ break; ++ case MSR_IA32_CR_PAT: ++ if (!kvm_pat_valid(data)) ++ return 1; ++ ++ if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { ++ vmcs_write64(GUEST_IA32_PAT, data); ++ vcpu->arch.pat = data; ++ break; ++ } ++ ret = kvm_set_msr_common(vcpu, msr_info); ++ break; ++ case MSR_IA32_TSC_ADJUST: ++ ret = kvm_set_msr_common(vcpu, msr_info); ++ break; ++ case MSR_IA32_MCG_EXT_CTL: ++ if ((!msr_info->host_initiated && ++ !(to_vmx(vcpu)->msr_ia32_feature_control & ++ FEATURE_CONTROL_LMCE)) || ++ (data & ~MCG_EXT_CTL_LMCE_EN)) ++ return 1; ++ vcpu->arch.mcg_ext_ctl = data; ++ break; ++ case MSR_IA32_FEATURE_CONTROL: ++ if (!vmx_feature_control_msr_valid(vcpu, data) || ++ (to_vmx(vcpu)->msr_ia32_feature_control & ++ FEATURE_CONTROL_LOCKED && !msr_info->host_initiated)) ++ return 1; ++ vmx->msr_ia32_feature_control = data; ++ if (msr_info->host_initiated && data == 0) ++ vmx_leave_nested(vcpu); ++ break; ++ case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: ++ if (!msr_info->host_initiated) ++ return 1; /* they are read-only */ ++ if (!nested_vmx_allowed(vcpu)) ++ return 1; ++ return vmx_set_vmx_msr(vcpu, msr_index, data); ++ case MSR_IA32_XSS: ++ if (!vmx_xsaves_supported() || ++ (!msr_info->host_initiated && ++ !(guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && ++ guest_cpuid_has(vcpu, X86_FEATURE_XSAVES)))) ++ return 1; ++ /* ++ * The only supported bit as of Skylake is bit 8, but ++ * it is not supported on KVM. ++ */ ++ if (data != 0) ++ return 1; ++ vcpu->arch.ia32_xss = data; ++ if (vcpu->arch.ia32_xss != host_xss) ++ add_atomic_switch_msr(vmx, MSR_IA32_XSS, ++ vcpu->arch.ia32_xss, host_xss, false); ++ else ++ clear_atomic_switch_msr(vmx, MSR_IA32_XSS); ++ break; ++ case MSR_TSC_AUX: ++ if (!msr_info->host_initiated && ++ !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP)) ++ return 1; ++ /* Check reserved bit, higher 32 bits should be zero */ ++ if ((data >> 32) != 0) ++ return 1; ++ /* Otherwise falls through */ ++ default: ++ msr = find_msr_entry(vmx, msr_index); ++ if (msr) { ++ u64 old_msr_data = msr->data; ++ msr->data = data; ++ if (msr - vmx->guest_msrs < vmx->save_nmsrs) { ++ preempt_disable(); ++ ret = kvm_set_shared_msr(msr->index, msr->data, ++ msr->mask); ++ preempt_enable(); ++ if (ret) ++ msr->data = old_msr_data; ++ } ++ break; ++ } ++ ret = kvm_set_msr_common(vcpu, msr_info); ++ } ++ ++ return ret; ++} ++ ++static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) ++{ ++ __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail); ++ switch (reg) { ++ case VCPU_REGS_RSP: ++ vcpu->arch.regs[VCPU_REGS_RSP] = vmcs_readl(GUEST_RSP); ++ break; ++ case VCPU_REGS_RIP: ++ vcpu->arch.regs[VCPU_REGS_RIP] = vmcs_readl(GUEST_RIP); ++ break; ++ case VCPU_EXREG_PDPTR: ++ if (enable_ept) ++ ept_save_pdptrs(vcpu); ++ break; ++ default: ++ break; ++ } ++} ++ ++static __init int cpu_has_kvm_support(void) ++{ ++ return cpu_has_vmx(); ++} ++ ++static __init int vmx_disabled_by_bios(void) ++{ ++ u64 msr; ++ ++ rdmsrl(MSR_IA32_FEATURE_CONTROL, msr); ++ if (msr & FEATURE_CONTROL_LOCKED) { ++ /* launched w/ TXT and VMX disabled */ ++ if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX) ++ && tboot_enabled()) ++ return 1; ++ /* launched w/o TXT and VMX only enabled w/ TXT */ ++ if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX) ++ && (msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX) ++ && !tboot_enabled()) { ++ printk(KERN_WARNING "kvm: disable TXT in the BIOS or " ++ "activate TXT before enabling KVM\n"); ++ return 1; ++ } ++ /* launched w/o TXT and VMX disabled */ ++ if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX) ++ && !tboot_enabled()) ++ return 1; ++ } ++ ++ return 0; ++} ++ ++static void kvm_cpu_vmxon(u64 addr) ++{ ++ cr4_set_bits(X86_CR4_VMXE); ++ intel_pt_handle_vmx(1); ++ ++ asm volatile (ASM_VMX_VMXON_RAX ++ : : "a"(&addr), "m"(addr) ++ : "memory", "cc"); ++} ++ ++static int hardware_enable(void) ++{ ++ int cpu = raw_smp_processor_id(); ++ u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); ++ u64 old, test_bits; ++ ++ if (cr4_read_shadow() & X86_CR4_VMXE) ++ return -EBUSY; ++ ++ /* ++ * This can happen if we hot-added a CPU but failed to allocate ++ * VP assist page for it. ++ */ ++ if (static_branch_unlikely(&enable_evmcs) && ++ !hv_get_vp_assist_page(cpu)) ++ return -EFAULT; ++ ++ INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu)); ++ INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu)); ++ spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); ++ ++ /* ++ * Now we can enable the vmclear operation in kdump ++ * since the loaded_vmcss_on_cpu list on this cpu ++ * has been initialized. ++ * ++ * Though the cpu is not in VMX operation now, there ++ * is no problem to enable the vmclear operation ++ * for the loaded_vmcss_on_cpu list is empty! ++ */ ++ crash_enable_local_vmclear(cpu); ++ ++ rdmsrl(MSR_IA32_FEATURE_CONTROL, old); ++ ++ test_bits = FEATURE_CONTROL_LOCKED; ++ test_bits |= FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; ++ if (tboot_enabled()) ++ test_bits |= FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX; ++ ++ if ((old & test_bits) != test_bits) { ++ /* enable and lock */ ++ wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits); ++ } ++ kvm_cpu_vmxon(phys_addr); ++ if (enable_ept) ++ ept_sync_global(); ++ ++ return 0; ++} ++ ++static void vmclear_local_loaded_vmcss(void) ++{ ++ int cpu = raw_smp_processor_id(); ++ struct loaded_vmcs *v, *n; ++ ++ list_for_each_entry_safe(v, n, &per_cpu(loaded_vmcss_on_cpu, cpu), ++ loaded_vmcss_on_cpu_link) ++ __loaded_vmcs_clear(v); ++} ++ ++ ++/* Just like cpu_vmxoff(), but with the __kvm_handle_fault_on_reboot() ++ * tricks. ++ */ ++static void kvm_cpu_vmxoff(void) ++{ ++ asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc"); ++ ++ intel_pt_handle_vmx(0); ++ cr4_clear_bits(X86_CR4_VMXE); ++} ++ ++static void hardware_disable(void) ++{ ++ vmclear_local_loaded_vmcss(); ++ kvm_cpu_vmxoff(); ++} ++ ++static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, ++ u32 msr, u32 *result) ++{ ++ u32 vmx_msr_low, vmx_msr_high; ++ u32 ctl = ctl_min | ctl_opt; ++ ++ rdmsr(msr, vmx_msr_low, vmx_msr_high); ++ ++ ctl &= vmx_msr_high; /* bit == 0 in high word ==> must be zero */ ++ ctl |= vmx_msr_low; /* bit == 1 in low word ==> must be one */ ++ ++ /* Ensure minimum (required) set of control bits are supported. */ ++ if (ctl_min & ~ctl) ++ return -EIO; ++ ++ *result = ctl; ++ return 0; ++} ++ ++static __init bool allow_1_setting(u32 msr, u32 ctl) ++{ ++ u32 vmx_msr_low, vmx_msr_high; ++ ++ rdmsr(msr, vmx_msr_low, vmx_msr_high); ++ return vmx_msr_high & ctl; ++} ++ ++static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) ++{ ++ u32 vmx_msr_low, vmx_msr_high; ++ u32 min, opt, min2, opt2; ++ u32 _pin_based_exec_control = 0; ++ u32 _cpu_based_exec_control = 0; ++ u32 _cpu_based_2nd_exec_control = 0; ++ u32 _vmexit_control = 0; ++ u32 _vmentry_control = 0; ++ ++ memset(vmcs_conf, 0, sizeof(*vmcs_conf)); ++ min = CPU_BASED_HLT_EXITING | ++#ifdef CONFIG_X86_64 ++ CPU_BASED_CR8_LOAD_EXITING | ++ CPU_BASED_CR8_STORE_EXITING | ++#endif ++ CPU_BASED_CR3_LOAD_EXITING | ++ CPU_BASED_CR3_STORE_EXITING | ++ CPU_BASED_UNCOND_IO_EXITING | ++ CPU_BASED_MOV_DR_EXITING | ++ CPU_BASED_USE_TSC_OFFSETING | ++ CPU_BASED_MWAIT_EXITING | ++ CPU_BASED_MONITOR_EXITING | ++ CPU_BASED_INVLPG_EXITING | ++ CPU_BASED_RDPMC_EXITING; ++ ++ opt = CPU_BASED_TPR_SHADOW | ++ CPU_BASED_USE_MSR_BITMAPS | ++ CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; ++ if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS, ++ &_cpu_based_exec_control) < 0) ++ return -EIO; ++#ifdef CONFIG_X86_64 ++ if ((_cpu_based_exec_control & CPU_BASED_TPR_SHADOW)) ++ _cpu_based_exec_control &= ~CPU_BASED_CR8_LOAD_EXITING & ++ ~CPU_BASED_CR8_STORE_EXITING; ++#endif ++ if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) { ++ min2 = 0; ++ opt2 = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | ++ SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | ++ SECONDARY_EXEC_WBINVD_EXITING | ++ SECONDARY_EXEC_ENABLE_VPID | ++ SECONDARY_EXEC_ENABLE_EPT | ++ SECONDARY_EXEC_UNRESTRICTED_GUEST | ++ SECONDARY_EXEC_PAUSE_LOOP_EXITING | ++ SECONDARY_EXEC_DESC | ++ SECONDARY_EXEC_RDTSCP | ++ SECONDARY_EXEC_ENABLE_INVPCID | ++ SECONDARY_EXEC_APIC_REGISTER_VIRT | ++ SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | ++ SECONDARY_EXEC_SHADOW_VMCS | ++ SECONDARY_EXEC_XSAVES | ++ SECONDARY_EXEC_RDSEED_EXITING | ++ SECONDARY_EXEC_RDRAND_EXITING | ++ SECONDARY_EXEC_ENABLE_PML | ++ SECONDARY_EXEC_TSC_SCALING | ++ SECONDARY_EXEC_ENABLE_VMFUNC | ++ SECONDARY_EXEC_ENCLS_EXITING; ++ if (adjust_vmx_controls(min2, opt2, ++ MSR_IA32_VMX_PROCBASED_CTLS2, ++ &_cpu_based_2nd_exec_control) < 0) ++ return -EIO; ++ } ++#ifndef CONFIG_X86_64 ++ if (!(_cpu_based_2nd_exec_control & ++ SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) ++ _cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW; ++#endif ++ ++ if (!(_cpu_based_exec_control & CPU_BASED_TPR_SHADOW)) ++ _cpu_based_2nd_exec_control &= ~( ++ SECONDARY_EXEC_APIC_REGISTER_VIRT | ++ SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | ++ SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); ++ ++ rdmsr_safe(MSR_IA32_VMX_EPT_VPID_CAP, ++ &vmx_capability.ept, &vmx_capability.vpid); ++ ++ if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) { ++ /* CR3 accesses and invlpg don't need to cause VM Exits when EPT ++ enabled */ ++ _cpu_based_exec_control &= ~(CPU_BASED_CR3_LOAD_EXITING | ++ CPU_BASED_CR3_STORE_EXITING | ++ CPU_BASED_INVLPG_EXITING); ++ } else if (vmx_capability.ept) { ++ vmx_capability.ept = 0; ++ pr_warn_once("EPT CAP should not exist if not support " ++ "1-setting enable EPT VM-execution control\n"); ++ } ++ if (!(_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_VPID) && ++ vmx_capability.vpid) { ++ vmx_capability.vpid = 0; ++ pr_warn_once("VPID CAP should not exist if not support " ++ "1-setting enable VPID VM-execution control\n"); ++ } ++ ++ min = VM_EXIT_SAVE_DEBUG_CONTROLS | VM_EXIT_ACK_INTR_ON_EXIT; ++#ifdef CONFIG_X86_64 ++ min |= VM_EXIT_HOST_ADDR_SPACE_SIZE; ++#endif ++ opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT | ++ VM_EXIT_CLEAR_BNDCFGS; ++ if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS, ++ &_vmexit_control) < 0) ++ return -EIO; ++ ++ min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING; ++ opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR | ++ PIN_BASED_VMX_PREEMPTION_TIMER; ++ if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS, ++ &_pin_based_exec_control) < 0) ++ return -EIO; ++ ++ if (cpu_has_broken_vmx_preemption_timer()) ++ _pin_based_exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER; ++ if (!(_cpu_based_2nd_exec_control & ++ SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY)) ++ _pin_based_exec_control &= ~PIN_BASED_POSTED_INTR; ++ ++ min = VM_ENTRY_LOAD_DEBUG_CONTROLS; ++ opt = VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_BNDCFGS; ++ if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS, ++ &_vmentry_control) < 0) ++ return -EIO; ++ ++ rdmsr(MSR_IA32_VMX_BASIC, vmx_msr_low, vmx_msr_high); ++ ++ /* IA-32 SDM Vol 3B: VMCS size is never greater than 4kB. */ ++ if ((vmx_msr_high & 0x1fff) > PAGE_SIZE) ++ return -EIO; ++ ++#ifdef CONFIG_X86_64 ++ /* IA-32 SDM Vol 3B: 64-bit CPUs always have VMX_BASIC_MSR[48]==0. */ ++ if (vmx_msr_high & (1u<<16)) ++ return -EIO; ++#endif ++ ++ /* Require Write-Back (WB) memory type for VMCS accesses. */ ++ if (((vmx_msr_high >> 18) & 15) != 6) ++ return -EIO; ++ ++ vmcs_conf->size = vmx_msr_high & 0x1fff; ++ vmcs_conf->order = get_order(vmcs_conf->size); ++ vmcs_conf->basic_cap = vmx_msr_high & ~0x1fff; ++ ++ vmcs_conf->revision_id = vmx_msr_low; ++ ++ vmcs_conf->pin_based_exec_ctrl = _pin_based_exec_control; ++ vmcs_conf->cpu_based_exec_ctrl = _cpu_based_exec_control; ++ vmcs_conf->cpu_based_2nd_exec_ctrl = _cpu_based_2nd_exec_control; ++ vmcs_conf->vmexit_ctrl = _vmexit_control; ++ vmcs_conf->vmentry_ctrl = _vmentry_control; ++ ++ if (static_branch_unlikely(&enable_evmcs)) ++ evmcs_sanitize_exec_ctrls(vmcs_conf); ++ ++ cpu_has_load_ia32_efer = ++ allow_1_setting(MSR_IA32_VMX_ENTRY_CTLS, ++ VM_ENTRY_LOAD_IA32_EFER) ++ && allow_1_setting(MSR_IA32_VMX_EXIT_CTLS, ++ VM_EXIT_LOAD_IA32_EFER); ++ ++ cpu_has_load_perf_global_ctrl = ++ allow_1_setting(MSR_IA32_VMX_ENTRY_CTLS, ++ VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) ++ && allow_1_setting(MSR_IA32_VMX_EXIT_CTLS, ++ VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL); ++ ++ /* ++ * Some cpus support VM_ENTRY_(LOAD|SAVE)_IA32_PERF_GLOBAL_CTRL ++ * but due to errata below it can't be used. Workaround is to use ++ * msr load mechanism to switch IA32_PERF_GLOBAL_CTRL. ++ * ++ * VM Exit May Incorrectly Clear IA32_PERF_GLOBAL_CTRL [34:32] ++ * ++ * AAK155 (model 26) ++ * AAP115 (model 30) ++ * AAT100 (model 37) ++ * BC86,AAY89,BD102 (model 44) ++ * BA97 (model 46) ++ * ++ */ ++ if (cpu_has_load_perf_global_ctrl && boot_cpu_data.x86 == 0x6) { ++ switch (boot_cpu_data.x86_model) { ++ case 26: ++ case 30: ++ case 37: ++ case 44: ++ case 46: ++ cpu_has_load_perf_global_ctrl = false; ++ printk_once(KERN_WARNING"kvm: VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL " ++ "does not work properly. Using workaround\n"); ++ break; ++ default: ++ break; ++ } ++ } ++ ++ if (boot_cpu_has(X86_FEATURE_XSAVES)) ++ rdmsrl(MSR_IA32_XSS, host_xss); ++ ++ return 0; ++} ++ ++static struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu) ++{ ++ int node = cpu_to_node(cpu); ++ struct page *pages; ++ struct vmcs *vmcs; ++ ++ pages = __alloc_pages_node(node, GFP_KERNEL, vmcs_config.order); ++ if (!pages) ++ return NULL; ++ vmcs = page_address(pages); ++ memset(vmcs, 0, vmcs_config.size); ++ ++ /* KVM supports Enlightened VMCS v1 only */ ++ if (static_branch_unlikely(&enable_evmcs)) ++ vmcs->hdr.revision_id = KVM_EVMCS_VERSION; ++ else ++ vmcs->hdr.revision_id = vmcs_config.revision_id; ++ ++ if (shadow) ++ vmcs->hdr.shadow_vmcs = 1; ++ return vmcs; ++} ++ ++static void free_vmcs(struct vmcs *vmcs) ++{ ++ free_pages((unsigned long)vmcs, vmcs_config.order); ++} ++ ++/* ++ * Free a VMCS, but before that VMCLEAR it on the CPU where it was last loaded ++ */ ++static void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) ++{ ++ if (!loaded_vmcs->vmcs) ++ return; ++ loaded_vmcs_clear(loaded_vmcs); ++ free_vmcs(loaded_vmcs->vmcs); ++ loaded_vmcs->vmcs = NULL; ++ if (loaded_vmcs->msr_bitmap) ++ free_page((unsigned long)loaded_vmcs->msr_bitmap); ++ WARN_ON(loaded_vmcs->shadow_vmcs != NULL); ++} ++ ++static struct vmcs *alloc_vmcs(bool shadow) ++{ ++ return alloc_vmcs_cpu(shadow, raw_smp_processor_id()); ++} ++ ++static int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) ++{ ++ loaded_vmcs->vmcs = alloc_vmcs(false); ++ if (!loaded_vmcs->vmcs) ++ return -ENOMEM; ++ ++ loaded_vmcs->shadow_vmcs = NULL; ++ loaded_vmcs_init(loaded_vmcs); ++ ++ if (cpu_has_vmx_msr_bitmap()) { ++ loaded_vmcs->msr_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); ++ if (!loaded_vmcs->msr_bitmap) ++ goto out_vmcs; ++ memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE); ++ ++ if (IS_ENABLED(CONFIG_HYPERV) && ++ static_branch_unlikely(&enable_evmcs) && ++ (ms_hyperv.nested_features & HV_X64_NESTED_MSR_BITMAP)) { ++ struct hv_enlightened_vmcs *evmcs = ++ (struct hv_enlightened_vmcs *)loaded_vmcs->vmcs; ++ ++ evmcs->hv_enlightenments_control.msr_bitmap = 1; ++ } ++ } ++ ++ memset(&loaded_vmcs->host_state, 0, sizeof(struct vmcs_host_state)); ++ ++ return 0; ++ ++out_vmcs: ++ free_loaded_vmcs(loaded_vmcs); ++ return -ENOMEM; ++} ++ ++static void free_kvm_area(void) ++{ ++ int cpu; ++ ++ for_each_possible_cpu(cpu) { ++ free_vmcs(per_cpu(vmxarea, cpu)); ++ per_cpu(vmxarea, cpu) = NULL; ++ } ++} ++ ++enum vmcs_field_width { ++ VMCS_FIELD_WIDTH_U16 = 0, ++ VMCS_FIELD_WIDTH_U64 = 1, ++ VMCS_FIELD_WIDTH_U32 = 2, ++ VMCS_FIELD_WIDTH_NATURAL_WIDTH = 3 ++}; ++ ++static inline int vmcs_field_width(unsigned long field) ++{ ++ if (0x1 & field) /* the *_HIGH fields are all 32 bit */ ++ return VMCS_FIELD_WIDTH_U32; ++ return (field >> 13) & 0x3 ; ++} ++ ++static inline int vmcs_field_readonly(unsigned long field) ++{ ++ return (((field >> 10) & 0x3) == 1); ++} ++ ++static void init_vmcs_shadow_fields(void) ++{ ++ int i, j; ++ ++ for (i = j = 0; i < max_shadow_read_only_fields; i++) { ++ u16 field = shadow_read_only_fields[i]; ++ if (vmcs_field_width(field) == VMCS_FIELD_WIDTH_U64 && ++ (i + 1 == max_shadow_read_only_fields || ++ shadow_read_only_fields[i + 1] != field + 1)) ++ pr_err("Missing field from shadow_read_only_field %x\n", ++ field + 1); ++ ++ clear_bit(field, vmx_vmread_bitmap); ++#ifdef CONFIG_X86_64 ++ if (field & 1) ++ continue; ++#endif ++ if (j < i) ++ shadow_read_only_fields[j] = field; ++ j++; ++ } ++ max_shadow_read_only_fields = j; ++ ++ for (i = j = 0; i < max_shadow_read_write_fields; i++) { ++ u16 field = shadow_read_write_fields[i]; ++ if (vmcs_field_width(field) == VMCS_FIELD_WIDTH_U64 && ++ (i + 1 == max_shadow_read_write_fields || ++ shadow_read_write_fields[i + 1] != field + 1)) ++ pr_err("Missing field from shadow_read_write_field %x\n", ++ field + 1); ++ ++ /* ++ * PML and the preemption timer can be emulated, but the ++ * processor cannot vmwrite to fields that don't exist ++ * on bare metal. ++ */ ++ switch (field) { ++ case GUEST_PML_INDEX: ++ if (!cpu_has_vmx_pml()) ++ continue; ++ break; ++ case VMX_PREEMPTION_TIMER_VALUE: ++ if (!cpu_has_vmx_preemption_timer()) ++ continue; ++ break; ++ case GUEST_INTR_STATUS: ++ if (!cpu_has_vmx_apicv()) ++ continue; ++ break; ++ default: ++ break; ++ } ++ ++ clear_bit(field, vmx_vmwrite_bitmap); ++ clear_bit(field, vmx_vmread_bitmap); ++#ifdef CONFIG_X86_64 ++ if (field & 1) ++ continue; ++#endif ++ if (j < i) ++ shadow_read_write_fields[j] = field; ++ j++; ++ } ++ max_shadow_read_write_fields = j; ++} ++ ++static __init int alloc_kvm_area(void) ++{ ++ int cpu; ++ ++ for_each_possible_cpu(cpu) { ++ struct vmcs *vmcs; ++ ++ vmcs = alloc_vmcs_cpu(false, cpu); ++ if (!vmcs) { ++ free_kvm_area(); ++ return -ENOMEM; ++ } ++ ++ /* ++ * When eVMCS is enabled, alloc_vmcs_cpu() sets ++ * vmcs->revision_id to KVM_EVMCS_VERSION instead of ++ * revision_id reported by MSR_IA32_VMX_BASIC. ++ * ++ * However, even though not explictly documented by ++ * TLFS, VMXArea passed as VMXON argument should ++ * still be marked with revision_id reported by ++ * physical CPU. ++ */ ++ if (static_branch_unlikely(&enable_evmcs)) ++ vmcs->hdr.revision_id = vmcs_config.revision_id; ++ ++ per_cpu(vmxarea, cpu) = vmcs; ++ } ++ return 0; ++} ++ ++static void fix_pmode_seg(struct kvm_vcpu *vcpu, int seg, ++ struct kvm_segment *save) ++{ ++ if (!emulate_invalid_guest_state) { ++ /* ++ * CS and SS RPL should be equal during guest entry according ++ * to VMX spec, but in reality it is not always so. Since vcpu ++ * is in the middle of the transition from real mode to ++ * protected mode it is safe to assume that RPL 0 is a good ++ * default value. ++ */ ++ if (seg == VCPU_SREG_CS || seg == VCPU_SREG_SS) ++ save->selector &= ~SEGMENT_RPL_MASK; ++ save->dpl = save->selector & SEGMENT_RPL_MASK; ++ save->s = 1; ++ } ++ vmx_set_segment(vcpu, save, seg); ++} ++ ++static void enter_pmode(struct kvm_vcpu *vcpu) ++{ ++ unsigned long flags; ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ ++ /* ++ * Update real mode segment cache. It may be not up-to-date if sement ++ * register was written while vcpu was in a guest mode. ++ */ ++ vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES); ++ vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS); ++ vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS); ++ vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS); ++ vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_SS], VCPU_SREG_SS); ++ vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_CS], VCPU_SREG_CS); ++ ++ vmx->rmode.vm86_active = 0; ++ ++ vmx_segment_cache_clear(vmx); ++ ++ vmx_set_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR); ++ ++ flags = vmcs_readl(GUEST_RFLAGS); ++ flags &= RMODE_GUEST_OWNED_EFLAGS_BITS; ++ flags |= vmx->rmode.save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS; ++ vmcs_writel(GUEST_RFLAGS, flags); ++ ++ vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~X86_CR4_VME) | ++ (vmcs_readl(CR4_READ_SHADOW) & X86_CR4_VME)); ++ ++ update_exception_bitmap(vcpu); ++ ++ fix_pmode_seg(vcpu, VCPU_SREG_CS, &vmx->rmode.segs[VCPU_SREG_CS]); ++ fix_pmode_seg(vcpu, VCPU_SREG_SS, &vmx->rmode.segs[VCPU_SREG_SS]); ++ fix_pmode_seg(vcpu, VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]); ++ fix_pmode_seg(vcpu, VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]); ++ fix_pmode_seg(vcpu, VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]); ++ fix_pmode_seg(vcpu, VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]); ++} ++ ++static void fix_rmode_seg(int seg, struct kvm_segment *save) ++{ ++ const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; ++ struct kvm_segment var = *save; ++ ++ var.dpl = 0x3; ++ if (seg == VCPU_SREG_CS) ++ var.type = 0x3; ++ ++ if (!emulate_invalid_guest_state) { ++ var.selector = var.base >> 4; ++ var.base = var.base & 0xffff0; ++ var.limit = 0xffff; ++ var.g = 0; ++ var.db = 0; ++ var.present = 1; ++ var.s = 1; ++ var.l = 0; ++ var.unusable = 0; ++ var.type = 0x3; ++ var.avl = 0; ++ if (save->base & 0xf) ++ printk_once(KERN_WARNING "kvm: segment base is not " ++ "paragraph aligned when entering " ++ "protected mode (seg=%d)", seg); ++ } ++ ++ vmcs_write16(sf->selector, var.selector); ++ vmcs_writel(sf->base, var.base); ++ vmcs_write32(sf->limit, var.limit); ++ vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(&var)); ++} ++ ++static void enter_rmode(struct kvm_vcpu *vcpu) ++{ ++ unsigned long flags; ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ struct kvm_vmx *kvm_vmx = to_kvm_vmx(vcpu->kvm); ++ ++ vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR); ++ vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES); ++ vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS); ++ vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS); ++ vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS); ++ vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_SS], VCPU_SREG_SS); ++ vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_CS], VCPU_SREG_CS); ++ ++ vmx->rmode.vm86_active = 1; ++ ++ /* ++ * Very old userspace does not call KVM_SET_TSS_ADDR before entering ++ * vcpu. Warn the user that an update is overdue. ++ */ ++ if (!kvm_vmx->tss_addr) ++ printk_once(KERN_WARNING "kvm: KVM_SET_TSS_ADDR need to be " ++ "called before entering vcpu\n"); ++ ++ vmx_segment_cache_clear(vmx); ++ ++ vmcs_writel(GUEST_TR_BASE, kvm_vmx->tss_addr); ++ vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1); ++ vmcs_write32(GUEST_TR_AR_BYTES, 0x008b); ++ ++ flags = vmcs_readl(GUEST_RFLAGS); ++ vmx->rmode.save_rflags = flags; ++ ++ flags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; ++ ++ vmcs_writel(GUEST_RFLAGS, flags); ++ vmcs_writel(GUEST_CR4, vmcs_readl(GUEST_CR4) | X86_CR4_VME); ++ update_exception_bitmap(vcpu); ++ ++ fix_rmode_seg(VCPU_SREG_SS, &vmx->rmode.segs[VCPU_SREG_SS]); ++ fix_rmode_seg(VCPU_SREG_CS, &vmx->rmode.segs[VCPU_SREG_CS]); ++ fix_rmode_seg(VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]); ++ fix_rmode_seg(VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]); ++ fix_rmode_seg(VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]); ++ fix_rmode_seg(VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]); ++ ++ kvm_mmu_reset_context(vcpu); ++} ++ ++static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) ++{ ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ struct shared_msr_entry *msr = find_msr_entry(vmx, MSR_EFER); ++ ++ if (!msr) ++ return; ++ ++ vcpu->arch.efer = efer; ++ if (efer & EFER_LMA) { ++ vm_entry_controls_setbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE); ++ msr->data = efer; ++ } else { ++ vm_entry_controls_clearbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE); ++ ++ msr->data = efer & ~EFER_LME; ++ } ++ setup_msrs(vmx); ++} ++ ++#ifdef CONFIG_X86_64 ++ ++static void enter_lmode(struct kvm_vcpu *vcpu) ++{ ++ u32 guest_tr_ar; ++ ++ vmx_segment_cache_clear(to_vmx(vcpu)); ++ ++ guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES); ++ if ((guest_tr_ar & VMX_AR_TYPE_MASK) != VMX_AR_TYPE_BUSY_64_TSS) { ++ pr_debug_ratelimited("%s: tss fixup for long mode. \n", ++ __func__); ++ vmcs_write32(GUEST_TR_AR_BYTES, ++ (guest_tr_ar & ~VMX_AR_TYPE_MASK) ++ | VMX_AR_TYPE_BUSY_64_TSS); ++ } ++ vmx_set_efer(vcpu, vcpu->arch.efer | EFER_LMA); ++} ++ ++static void exit_lmode(struct kvm_vcpu *vcpu) ++{ ++ vm_entry_controls_clearbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE); ++ vmx_set_efer(vcpu, vcpu->arch.efer & ~EFER_LMA); ++} ++ ++#endif ++ ++static inline void __vmx_flush_tlb(struct kvm_vcpu *vcpu, int vpid, ++ bool invalidate_gpa) ++{ ++ if (enable_ept && (invalidate_gpa || !enable_vpid)) { ++ if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) ++ return; ++ ept_sync_context(construct_eptp(vcpu, vcpu->arch.mmu.root_hpa)); ++ } else { ++ vpid_sync_context(vpid); ++ } ++} ++ ++static void vmx_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa) ++{ ++ __vmx_flush_tlb(vcpu, to_vmx(vcpu)->vpid, invalidate_gpa); ++} ++ ++static void vmx_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t addr) ++{ ++ int vpid = to_vmx(vcpu)->vpid; ++ ++ if (!vpid_sync_vcpu_addr(vpid, addr)) ++ vpid_sync_context(vpid); ++ ++ /* ++ * If VPIDs are not supported or enabled, then the above is a no-op. ++ * But we don't really need a TLB flush in that case anyway, because ++ * each VM entry/exit includes an implicit flush when VPID is 0. ++ */ ++} ++ ++static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) ++{ ++ ulong cr0_guest_owned_bits = vcpu->arch.cr0_guest_owned_bits; ++ ++ vcpu->arch.cr0 &= ~cr0_guest_owned_bits; ++ vcpu->arch.cr0 |= vmcs_readl(GUEST_CR0) & cr0_guest_owned_bits; ++} ++ ++static void vmx_decache_cr3(struct kvm_vcpu *vcpu) ++{ ++ if (enable_unrestricted_guest || (enable_ept && is_paging(vcpu))) ++ vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); ++ __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); ++} ++ ++static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) ++{ ++ ulong cr4_guest_owned_bits = vcpu->arch.cr4_guest_owned_bits; ++ ++ vcpu->arch.cr4 &= ~cr4_guest_owned_bits; ++ vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & cr4_guest_owned_bits; ++} ++ ++static void ept_load_pdptrs(struct kvm_vcpu *vcpu) ++{ ++ struct kvm_mmu *mmu = vcpu->arch.walk_mmu; ++ ++ if (!test_bit(VCPU_EXREG_PDPTR, ++ (unsigned long *)&vcpu->arch.regs_dirty)) ++ return; ++ ++ if (is_pae_paging(vcpu)) { ++ vmcs_write64(GUEST_PDPTR0, mmu->pdptrs[0]); ++ vmcs_write64(GUEST_PDPTR1, mmu->pdptrs[1]); ++ vmcs_write64(GUEST_PDPTR2, mmu->pdptrs[2]); ++ vmcs_write64(GUEST_PDPTR3, mmu->pdptrs[3]); ++ } ++} ++ ++static void ept_save_pdptrs(struct kvm_vcpu *vcpu) ++{ ++ struct kvm_mmu *mmu = vcpu->arch.walk_mmu; ++ ++ if (is_pae_paging(vcpu)) { ++ mmu->pdptrs[0] = vmcs_read64(GUEST_PDPTR0); ++ mmu->pdptrs[1] = vmcs_read64(GUEST_PDPTR1); ++ mmu->pdptrs[2] = vmcs_read64(GUEST_PDPTR2); ++ mmu->pdptrs[3] = vmcs_read64(GUEST_PDPTR3); ++ } ++ ++ __set_bit(VCPU_EXREG_PDPTR, ++ (unsigned long *)&vcpu->arch.regs_avail); ++ __set_bit(VCPU_EXREG_PDPTR, ++ (unsigned long *)&vcpu->arch.regs_dirty); ++} ++ ++static bool nested_guest_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val) ++{ ++ u64 fixed0 = to_vmx(vcpu)->nested.msrs.cr0_fixed0; ++ u64 fixed1 = to_vmx(vcpu)->nested.msrs.cr0_fixed1; ++ struct vmcs12 *vmcs12 = get_vmcs12(vcpu); ++ ++ if (to_vmx(vcpu)->nested.msrs.secondary_ctls_high & ++ SECONDARY_EXEC_UNRESTRICTED_GUEST && ++ nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST)) ++ fixed0 &= ~(X86_CR0_PE | X86_CR0_PG); ++ ++ return fixed_bits_valid(val, fixed0, fixed1); ++} ++ ++static bool nested_host_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val) ++{ ++ u64 fixed0 = to_vmx(vcpu)->nested.msrs.cr0_fixed0; ++ u64 fixed1 = to_vmx(vcpu)->nested.msrs.cr0_fixed1; ++ ++ return fixed_bits_valid(val, fixed0, fixed1); ++} ++ ++static bool nested_cr4_valid(struct kvm_vcpu *vcpu, unsigned long val) ++{ ++ u64 fixed0 = to_vmx(vcpu)->nested.msrs.cr4_fixed0; ++ u64 fixed1 = to_vmx(vcpu)->nested.msrs.cr4_fixed1; ++ ++ return fixed_bits_valid(val, fixed0, fixed1); ++} ++ ++/* No difference in the restrictions on guest and host CR4 in VMX operation. */ ++#define nested_guest_cr4_valid nested_cr4_valid ++#define nested_host_cr4_valid nested_cr4_valid ++ ++static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); ++ ++static void ept_update_paging_mode_cr0(unsigned long *hw_cr0, ++ unsigned long cr0, ++ struct kvm_vcpu *vcpu) ++{ ++ if (!test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail)) ++ vmx_decache_cr3(vcpu); ++ if (!(cr0 & X86_CR0_PG)) { ++ /* From paging/starting to nonpaging */ ++ vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, ++ vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) | ++ (CPU_BASED_CR3_LOAD_EXITING | ++ CPU_BASED_CR3_STORE_EXITING)); ++ vcpu->arch.cr0 = cr0; ++ vmx_set_cr4(vcpu, kvm_read_cr4(vcpu)); ++ } else if (!is_paging(vcpu)) { ++ /* From nonpaging to paging */ ++ vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, ++ vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) & ++ ~(CPU_BASED_CR3_LOAD_EXITING | ++ CPU_BASED_CR3_STORE_EXITING)); ++ vcpu->arch.cr0 = cr0; ++ vmx_set_cr4(vcpu, kvm_read_cr4(vcpu)); ++ } ++ ++ if (!(cr0 & X86_CR0_WP)) ++ *hw_cr0 &= ~X86_CR0_WP; ++} ++ ++static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) ++{ ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ unsigned long hw_cr0; ++ ++ hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK); ++ if (enable_unrestricted_guest) ++ hw_cr0 |= KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST; ++ else { ++ hw_cr0 |= KVM_VM_CR0_ALWAYS_ON; ++ ++ if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE)) ++ enter_pmode(vcpu); ++ ++ if (!vmx->rmode.vm86_active && !(cr0 & X86_CR0_PE)) ++ enter_rmode(vcpu); ++ } ++ ++#ifdef CONFIG_X86_64 ++ if (vcpu->arch.efer & EFER_LME) { ++ if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) ++ enter_lmode(vcpu); ++ if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) ++ exit_lmode(vcpu); ++ } ++#endif ++ ++ if (enable_ept && !enable_unrestricted_guest) ++ ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu); ++ ++ vmcs_writel(CR0_READ_SHADOW, cr0); ++ vmcs_writel(GUEST_CR0, hw_cr0); ++ vcpu->arch.cr0 = cr0; ++ ++ /* depends on vcpu->arch.cr0 to be set to a new value */ ++ vmx->emulation_required = emulation_required(vcpu); ++} ++ ++static int get_ept_level(struct kvm_vcpu *vcpu) ++{ ++ if (cpu_has_vmx_ept_5levels() && (cpuid_maxphyaddr(vcpu) > 48)) ++ return 5; ++ return 4; ++} ++ ++static u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa) ++{ ++ u64 eptp = VMX_EPTP_MT_WB; ++ ++ eptp |= (get_ept_level(vcpu) == 5) ? VMX_EPTP_PWL_5 : VMX_EPTP_PWL_4; ++ ++ if (enable_ept_ad_bits && ++ (!is_guest_mode(vcpu) || nested_ept_ad_enabled(vcpu))) ++ eptp |= VMX_EPTP_AD_ENABLE_BIT; ++ eptp |= (root_hpa & PAGE_MASK); ++ ++ return eptp; ++} ++ ++static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) ++{ ++ struct kvm *kvm = vcpu->kvm; ++ unsigned long guest_cr3; ++ u64 eptp; ++ ++ guest_cr3 = cr3; ++ if (enable_ept) { ++ eptp = construct_eptp(vcpu, cr3); ++ vmcs_write64(EPT_POINTER, eptp); ++ ++ if (kvm_x86_ops->tlb_remote_flush) { ++ spin_lock(&to_kvm_vmx(kvm)->ept_pointer_lock); ++ to_vmx(vcpu)->ept_pointer = eptp; ++ to_kvm_vmx(kvm)->ept_pointers_match ++ = EPT_POINTERS_CHECK; ++ spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock); ++ } ++ ++ if (enable_unrestricted_guest || is_paging(vcpu) || ++ is_guest_mode(vcpu)) ++ guest_cr3 = kvm_read_cr3(vcpu); ++ else ++ guest_cr3 = to_kvm_vmx(kvm)->ept_identity_map_addr; ++ ept_load_pdptrs(vcpu); ++ } ++ ++ vmcs_writel(GUEST_CR3, guest_cr3); ++} ++ ++static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) ++{ ++ /* ++ * Pass through host's Machine Check Enable value to hw_cr4, which ++ * is in force while we are in guest mode. Do not let guests control ++ * this bit, even if host CR4.MCE == 0. ++ */ ++ unsigned long hw_cr4; ++ ++ hw_cr4 = (cr4_read_shadow() & X86_CR4_MCE) | (cr4 & ~X86_CR4_MCE); ++ if (enable_unrestricted_guest) ++ hw_cr4 |= KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST; ++ else if (to_vmx(vcpu)->rmode.vm86_active) ++ hw_cr4 |= KVM_RMODE_VM_CR4_ALWAYS_ON; ++ else ++ hw_cr4 |= KVM_PMODE_VM_CR4_ALWAYS_ON; ++ ++ if (!boot_cpu_has(X86_FEATURE_UMIP) && vmx_umip_emulated()) { ++ if (cr4 & X86_CR4_UMIP) { ++ vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL, ++ SECONDARY_EXEC_DESC); ++ hw_cr4 &= ~X86_CR4_UMIP; ++ } else if (!is_guest_mode(vcpu) || ++ !nested_cpu_has2(get_vmcs12(vcpu), SECONDARY_EXEC_DESC)) ++ vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL, ++ SECONDARY_EXEC_DESC); ++ } ++ ++ if (cr4 & X86_CR4_VMXE) { ++ /* ++ * To use VMXON (and later other VMX instructions), a guest ++ * must first be able to turn on cr4.VMXE (see handle_vmon()). ++ * So basically the check on whether to allow nested VMX ++ * is here. We operate under the default treatment of SMM, ++ * so VMX cannot be enabled under SMM. ++ */ ++ if (!nested_vmx_allowed(vcpu) || is_smm(vcpu)) ++ return 1; ++ } ++ ++ if (to_vmx(vcpu)->nested.vmxon && !nested_cr4_valid(vcpu, cr4)) ++ return 1; ++ ++ vcpu->arch.cr4 = cr4; ++ ++ if (!enable_unrestricted_guest) { ++ if (enable_ept) { ++ if (!is_paging(vcpu)) { ++ hw_cr4 &= ~X86_CR4_PAE; ++ hw_cr4 |= X86_CR4_PSE; ++ } else if (!(cr4 & X86_CR4_PAE)) { ++ hw_cr4 &= ~X86_CR4_PAE; ++ } ++ } ++ ++ /* ++ * SMEP/SMAP/PKU is disabled if CPU is in non-paging mode in ++ * hardware. To emulate this behavior, SMEP/SMAP/PKU needs ++ * to be manually disabled when guest switches to non-paging ++ * mode. ++ * ++ * If !enable_unrestricted_guest, the CPU is always running ++ * with CR0.PG=1 and CR4 needs to be modified. ++ * If enable_unrestricted_guest, the CPU automatically ++ * disables SMEP/SMAP/PKU when the guest sets CR0.PG=0. ++ */ ++ if (!is_paging(vcpu)) ++ hw_cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE); ++ } ++ ++ vmcs_writel(CR4_READ_SHADOW, cr4); ++ vmcs_writel(GUEST_CR4, hw_cr4); ++ return 0; ++} ++ ++static void vmx_get_segment(struct kvm_vcpu *vcpu, ++ struct kvm_segment *var, int seg) ++{ ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ u32 ar; ++ ++ if (vmx->rmode.vm86_active && seg != VCPU_SREG_LDTR) { ++ *var = vmx->rmode.segs[seg]; ++ if (seg == VCPU_SREG_TR ++ || var->selector == vmx_read_guest_seg_selector(vmx, seg)) ++ return; ++ var->base = vmx_read_guest_seg_base(vmx, seg); ++ var->selector = vmx_read_guest_seg_selector(vmx, seg); ++ return; ++ } ++ var->base = vmx_read_guest_seg_base(vmx, seg); ++ var->limit = vmx_read_guest_seg_limit(vmx, seg); ++ var->selector = vmx_read_guest_seg_selector(vmx, seg); ++ ar = vmx_read_guest_seg_ar(vmx, seg); ++ var->unusable = (ar >> 16) & 1; ++ var->type = ar & 15; ++ var->s = (ar >> 4) & 1; ++ var->dpl = (ar >> 5) & 3; ++ /* ++ * Some userspaces do not preserve unusable property. Since usable ++ * segment has to be present according to VMX spec we can use present ++ * property to amend userspace bug by making unusable segment always ++ * nonpresent. vmx_segment_access_rights() already marks nonpresent ++ * segment as unusable. ++ */ ++ var->present = !var->unusable; ++ var->avl = (ar >> 12) & 1; ++ var->l = (ar >> 13) & 1; ++ var->db = (ar >> 14) & 1; ++ var->g = (ar >> 15) & 1; ++} ++ ++static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg) ++{ ++ struct kvm_segment s; ++ ++ if (to_vmx(vcpu)->rmode.vm86_active) { ++ vmx_get_segment(vcpu, &s, seg); ++ return s.base; ++ } ++ return vmx_read_guest_seg_base(to_vmx(vcpu), seg); ++} ++ ++static int vmx_get_cpl(struct kvm_vcpu *vcpu) ++{ ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ ++ if (unlikely(vmx->rmode.vm86_active)) ++ return 0; ++ else { ++ int ar = vmx_read_guest_seg_ar(vmx, VCPU_SREG_SS); ++ return VMX_AR_DPL(ar); ++ } ++} ++ ++static u32 vmx_segment_access_rights(struct kvm_segment *var) ++{ ++ u32 ar; ++ ++ if (var->unusable || !var->present) ++ ar = 1 << 16; ++ else { ++ ar = var->type & 15; ++ ar |= (var->s & 1) << 4; ++ ar |= (var->dpl & 3) << 5; ++ ar |= (var->present & 1) << 7; ++ ar |= (var->avl & 1) << 12; ++ ar |= (var->l & 1) << 13; ++ ar |= (var->db & 1) << 14; ++ ar |= (var->g & 1) << 15; ++ } ++ ++ return ar; ++} ++ ++static void vmx_set_segment(struct kvm_vcpu *vcpu, ++ struct kvm_segment *var, int seg) ++{ ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; ++ ++ vmx_segment_cache_clear(vmx); ++ ++ if (vmx->rmode.vm86_active && seg != VCPU_SREG_LDTR) { ++ vmx->rmode.segs[seg] = *var; ++ if (seg == VCPU_SREG_TR) ++ vmcs_write16(sf->selector, var->selector); ++ else if (var->s) ++ fix_rmode_seg(seg, &vmx->rmode.segs[seg]); ++ goto out; ++ } ++ ++ vmcs_writel(sf->base, var->base); ++ vmcs_write32(sf->limit, var->limit); ++ vmcs_write16(sf->selector, var->selector); ++ ++ /* ++ * Fix the "Accessed" bit in AR field of segment registers for older ++ * qemu binaries. ++ * IA32 arch specifies that at the time of processor reset the ++ * "Accessed" bit in the AR field of segment registers is 1. And qemu ++ * is setting it to 0 in the userland code. This causes invalid guest ++ * state vmexit when "unrestricted guest" mode is turned on. ++ * Fix for this setup issue in cpu_reset is being pushed in the qemu ++ * tree. Newer qemu binaries with that qemu fix would not need this ++ * kvm hack. ++ */ ++ if (enable_unrestricted_guest && (seg != VCPU_SREG_LDTR)) ++ var->type |= 0x1; /* Accessed */ ++ ++ vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(var)); ++ ++out: ++ vmx->emulation_required = emulation_required(vcpu); ++} ++ ++static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) ++{ ++ u32 ar = vmx_read_guest_seg_ar(to_vmx(vcpu), VCPU_SREG_CS); ++ ++ *db = (ar >> 14) & 1; ++ *l = (ar >> 13) & 1; ++} ++ ++static void vmx_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) ++{ ++ dt->size = vmcs_read32(GUEST_IDTR_LIMIT); ++ dt->address = vmcs_readl(GUEST_IDTR_BASE); ++} ++ ++static void vmx_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) ++{ ++ vmcs_write32(GUEST_IDTR_LIMIT, dt->size); ++ vmcs_writel(GUEST_IDTR_BASE, dt->address); ++} ++ ++static void vmx_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) ++{ ++ dt->size = vmcs_read32(GUEST_GDTR_LIMIT); ++ dt->address = vmcs_readl(GUEST_GDTR_BASE); ++} ++ ++static void vmx_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) ++{ ++ vmcs_write32(GUEST_GDTR_LIMIT, dt->size); ++ vmcs_writel(GUEST_GDTR_BASE, dt->address); ++} ++ ++static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg) ++{ ++ struct kvm_segment var; ++ u32 ar; ++ ++ vmx_get_segment(vcpu, &var, seg); ++ var.dpl = 0x3; ++ if (seg == VCPU_SREG_CS) ++ var.type = 0x3; ++ ar = vmx_segment_access_rights(&var); ++ ++ if (var.base != (var.selector << 4)) ++ return false; ++ if (var.limit != 0xffff) ++ return false; ++ if (ar != 0xf3) ++ return false; ++ ++ return true; ++} ++ ++static bool code_segment_valid(struct kvm_vcpu *vcpu) ++{ ++ struct kvm_segment cs; ++ unsigned int cs_rpl; ++ ++ vmx_get_segment(vcpu, &cs, VCPU_SREG_CS); ++ cs_rpl = cs.selector & SEGMENT_RPL_MASK; ++ ++ if (cs.unusable) ++ return false; ++ if (~cs.type & (VMX_AR_TYPE_CODE_MASK|VMX_AR_TYPE_ACCESSES_MASK)) ++ return false; ++ if (!cs.s) ++ return false; ++ if (cs.type & VMX_AR_TYPE_WRITEABLE_MASK) { ++ if (cs.dpl > cs_rpl) ++ return false; ++ } else { ++ if (cs.dpl != cs_rpl) ++ return false; ++ } ++ if (!cs.present) ++ return false; ++ ++ /* TODO: Add Reserved field check, this'll require a new member in the kvm_segment_field structure */ ++ return true; ++} ++ ++static bool stack_segment_valid(struct kvm_vcpu *vcpu) ++{ ++ struct kvm_segment ss; ++ unsigned int ss_rpl; ++ ++ vmx_get_segment(vcpu, &ss, VCPU_SREG_SS); ++ ss_rpl = ss.selector & SEGMENT_RPL_MASK; ++ ++ if (ss.unusable) ++ return true; ++ if (ss.type != 3 && ss.type != 7) ++ return false; ++ if (!ss.s) ++ return false; ++ if (ss.dpl != ss_rpl) /* DPL != RPL */ ++ return false; ++ if (!ss.present) ++ return false; ++ ++ return true; ++} ++ ++static bool data_segment_valid(struct kvm_vcpu *vcpu, int seg) ++{ ++ struct kvm_segment var; ++ unsigned int rpl; ++ ++ vmx_get_segment(vcpu, &var, seg); ++ rpl = var.selector & SEGMENT_RPL_MASK; ++ ++ if (var.unusable) ++ return true; ++ if (!var.s) ++ return false; ++ if (!var.present) ++ return false; ++ if (~var.type & (VMX_AR_TYPE_CODE_MASK|VMX_AR_TYPE_WRITEABLE_MASK)) { ++ if (var.dpl < rpl) /* DPL < RPL */ ++ return false; ++ } ++ ++ /* TODO: Add other members to kvm_segment_field to allow checking for other access ++ * rights flags ++ */ ++ return true; ++} ++ ++static bool tr_valid(struct kvm_vcpu *vcpu) ++{ ++ struct kvm_segment tr; ++ ++ vmx_get_segment(vcpu, &tr, VCPU_SREG_TR); ++ ++ if (tr.unusable) ++ return false; ++ if (tr.selector & SEGMENT_TI_MASK) /* TI = 1 */ ++ return false; ++ if (tr.type != 3 && tr.type != 11) /* TODO: Check if guest is in IA32e mode */ ++ return false; ++ if (!tr.present) ++ return false; ++ ++ return true; ++} ++ ++static bool ldtr_valid(struct kvm_vcpu *vcpu) ++{ ++ struct kvm_segment ldtr; ++ ++ vmx_get_segment(vcpu, &ldtr, VCPU_SREG_LDTR); ++ ++ if (ldtr.unusable) ++ return true; ++ if (ldtr.selector & SEGMENT_TI_MASK) /* TI = 1 */ ++ return false; ++ if (ldtr.type != 2) ++ return false; ++ if (!ldtr.present) ++ return false; ++ ++ return true; ++} ++ ++static bool cs_ss_rpl_check(struct kvm_vcpu *vcpu) ++{ ++ struct kvm_segment cs, ss; ++ ++ vmx_get_segment(vcpu, &cs, VCPU_SREG_CS); ++ vmx_get_segment(vcpu, &ss, VCPU_SREG_SS); ++ ++ return ((cs.selector & SEGMENT_RPL_MASK) == ++ (ss.selector & SEGMENT_RPL_MASK)); ++} ++ ++static bool nested_vmx_check_io_bitmaps(struct kvm_vcpu *vcpu, ++ unsigned int port, int size); ++static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu, ++ struct vmcs12 *vmcs12) ++{ ++ unsigned long exit_qualification; ++ unsigned short port; ++ int size; ++ ++ if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS)) ++ return nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING); ++ ++ exit_qualification = vmcs_readl(EXIT_QUALIFICATION); ++ ++ port = exit_qualification >> 16; ++ size = (exit_qualification & 7) + 1; ++ ++ return nested_vmx_check_io_bitmaps(vcpu, port, size); ++} ++ ++/* ++ * Check if guest state is valid. Returns true if valid, false if ++ * not. ++ * We assume that registers are always usable ++ */ ++static bool guest_state_valid(struct kvm_vcpu *vcpu) ++{ ++ if (enable_unrestricted_guest) ++ return true; ++ ++ /* real mode guest state checks */ ++ if (!is_protmode(vcpu) || (vmx_get_rflags(vcpu) & X86_EFLAGS_VM)) { ++ if (!rmode_segment_valid(vcpu, VCPU_SREG_CS)) ++ return false; ++ if (!rmode_segment_valid(vcpu, VCPU_SREG_SS)) ++ return false; ++ if (!rmode_segment_valid(vcpu, VCPU_SREG_DS)) ++ return false; ++ if (!rmode_segment_valid(vcpu, VCPU_SREG_ES)) ++ return false; ++ if (!rmode_segment_valid(vcpu, VCPU_SREG_FS)) ++ return false; ++ if (!rmode_segment_valid(vcpu, VCPU_SREG_GS)) ++ return false; ++ } else { ++ /* protected mode guest state checks */ ++ if (!cs_ss_rpl_check(vcpu)) ++ return false; ++ if (!code_segment_valid(vcpu)) ++ return false; ++ if (!stack_segment_valid(vcpu)) ++ return false; ++ if (!data_segment_valid(vcpu, VCPU_SREG_DS)) ++ return false; ++ if (!data_segment_valid(vcpu, VCPU_SREG_ES)) ++ return false; ++ if (!data_segment_valid(vcpu, VCPU_SREG_FS)) ++ return false; ++ if (!data_segment_valid(vcpu, VCPU_SREG_GS)) ++ return false; ++ if (!tr_valid(vcpu)) ++ return false; ++ if (!ldtr_valid(vcpu)) ++ return false; ++ } ++ /* TODO: ++ * - Add checks on RIP ++ * - Add checks on RFLAGS ++ */ ++ ++ return true; ++} ++ ++static bool page_address_valid(struct kvm_vcpu *vcpu, gpa_t gpa) ++{ ++ return PAGE_ALIGNED(gpa) && !(gpa >> cpuid_maxphyaddr(vcpu)); ++} ++ ++static int init_rmode_tss(struct kvm *kvm) ++{ ++ gfn_t fn; ++ u16 data = 0; ++ int idx, r; ++ ++ idx = srcu_read_lock(&kvm->srcu); ++ fn = to_kvm_vmx(kvm)->tss_addr >> PAGE_SHIFT; ++ r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE); ++ if (r < 0) ++ goto out; ++ data = TSS_BASE_SIZE + TSS_REDIRECTION_SIZE; ++ r = kvm_write_guest_page(kvm, fn++, &data, ++ TSS_IOPB_BASE_OFFSET, sizeof(u16)); ++ if (r < 0) ++ goto out; ++ r = kvm_clear_guest_page(kvm, fn++, 0, PAGE_SIZE); ++ if (r < 0) ++ goto out; ++ r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE); ++ if (r < 0) ++ goto out; ++ data = ~0; ++ r = kvm_write_guest_page(kvm, fn, &data, ++ RMODE_TSS_SIZE - 2 * PAGE_SIZE - 1, ++ sizeof(u8)); ++out: ++ srcu_read_unlock(&kvm->srcu, idx); ++ return r; ++} ++ ++static int init_rmode_identity_map(struct kvm *kvm) ++{ ++ struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm); ++ int i, idx, r = 0; ++ kvm_pfn_t identity_map_pfn; ++ u32 tmp; ++ ++ /* Protect kvm_vmx->ept_identity_pagetable_done. */ ++ mutex_lock(&kvm->slots_lock); ++ ++ if (likely(kvm_vmx->ept_identity_pagetable_done)) ++ goto out2; ++ ++ if (!kvm_vmx->ept_identity_map_addr) ++ kvm_vmx->ept_identity_map_addr = VMX_EPT_IDENTITY_PAGETABLE_ADDR; ++ identity_map_pfn = kvm_vmx->ept_identity_map_addr >> PAGE_SHIFT; ++ ++ r = __x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT, ++ kvm_vmx->ept_identity_map_addr, PAGE_SIZE); ++ if (r < 0) ++ goto out2; ++ ++ idx = srcu_read_lock(&kvm->srcu); ++ r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE); ++ if (r < 0) ++ goto out; ++ /* Set up identity-mapping pagetable for EPT in real mode */ ++ for (i = 0; i < PT32_ENT_PER_PAGE; i++) { ++ tmp = (i << 22) + (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | ++ _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE); ++ r = kvm_write_guest_page(kvm, identity_map_pfn, ++ &tmp, i * sizeof(tmp), sizeof(tmp)); ++ if (r < 0) ++ goto out; ++ } ++ kvm_vmx->ept_identity_pagetable_done = true; ++ ++out: ++ srcu_read_unlock(&kvm->srcu, idx); ++ ++out2: ++ mutex_unlock(&kvm->slots_lock); ++ return r; ++} ++ ++static void seg_setup(int seg) ++{ ++ const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; ++ unsigned int ar; ++ ++ vmcs_write16(sf->selector, 0); ++ vmcs_writel(sf->base, 0); ++ vmcs_write32(sf->limit, 0xffff); ++ ar = 0x93; ++ if (seg == VCPU_SREG_CS) ++ ar |= 0x08; /* code segment */ ++ ++ vmcs_write32(sf->ar_bytes, ar); ++} ++ ++static int alloc_apic_access_page(struct kvm *kvm) ++{ ++ struct page *page; ++ int r = 0; ++ ++ mutex_lock(&kvm->slots_lock); ++ if (kvm->arch.apic_access_page_done) ++ goto out; ++ r = __x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT, ++ APIC_DEFAULT_PHYS_BASE, PAGE_SIZE); ++ if (r) ++ goto out; ++ ++ page = gfn_to_page(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT); ++ if (is_error_page(page)) { ++ r = -EFAULT; ++ goto out; ++ } ++ ++ /* ++ * Do not pin the page in memory, so that memory hot-unplug ++ * is able to migrate it. ++ */ ++ put_page(page); ++ kvm->arch.apic_access_page_done = true; ++out: ++ mutex_unlock(&kvm->slots_lock); ++ return r; ++} ++ ++static int allocate_vpid(void) ++{ ++ int vpid; ++ ++ if (!enable_vpid) ++ return 0; ++ spin_lock(&vmx_vpid_lock); ++ vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS); ++ if (vpid < VMX_NR_VPIDS) ++ __set_bit(vpid, vmx_vpid_bitmap); ++ else ++ vpid = 0; ++ spin_unlock(&vmx_vpid_lock); ++ return vpid; ++} ++ ++static void free_vpid(int vpid) ++{ ++ if (!enable_vpid || vpid == 0) ++ return; ++ spin_lock(&vmx_vpid_lock); ++ __clear_bit(vpid, vmx_vpid_bitmap); ++ spin_unlock(&vmx_vpid_lock); ++} ++ ++static __always_inline void vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, ++ u32 msr, int type) ++{ ++ int f = sizeof(unsigned long); ++ ++ if (!cpu_has_vmx_msr_bitmap()) ++ return; ++ ++ if (static_branch_unlikely(&enable_evmcs)) ++ evmcs_touch_msr_bitmap(); ++ ++ /* ++ * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals ++ * have the write-low and read-high bitmap offsets the wrong way round. ++ * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff. ++ */ ++ if (msr <= 0x1fff) { ++ if (type & MSR_TYPE_R) ++ /* read-low */ ++ __clear_bit(msr, msr_bitmap + 0x000 / f); ++ ++ if (type & MSR_TYPE_W) ++ /* write-low */ ++ __clear_bit(msr, msr_bitmap + 0x800 / f); ++ ++ } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { ++ msr &= 0x1fff; ++ if (type & MSR_TYPE_R) ++ /* read-high */ ++ __clear_bit(msr, msr_bitmap + 0x400 / f); ++ ++ if (type & MSR_TYPE_W) ++ /* write-high */ ++ __clear_bit(msr, msr_bitmap + 0xc00 / f); ++ ++ } ++} ++ ++static __always_inline void vmx_enable_intercept_for_msr(unsigned long *msr_bitmap, ++ u32 msr, int type) ++{ ++ int f = sizeof(unsigned long); ++ ++ if (!cpu_has_vmx_msr_bitmap()) ++ return; ++ ++ if (static_branch_unlikely(&enable_evmcs)) ++ evmcs_touch_msr_bitmap(); ++ ++ /* ++ * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals ++ * have the write-low and read-high bitmap offsets the wrong way round. ++ * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff. ++ */ ++ if (msr <= 0x1fff) { ++ if (type & MSR_TYPE_R) ++ /* read-low */ ++ __set_bit(msr, msr_bitmap + 0x000 / f); ++ ++ if (type & MSR_TYPE_W) ++ /* write-low */ ++ __set_bit(msr, msr_bitmap + 0x800 / f); ++ ++ } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { ++ msr &= 0x1fff; ++ if (type & MSR_TYPE_R) ++ /* read-high */ ++ __set_bit(msr, msr_bitmap + 0x400 / f); ++ ++ if (type & MSR_TYPE_W) ++ /* write-high */ ++ __set_bit(msr, msr_bitmap + 0xc00 / f); ++ ++ } ++} ++ ++static __always_inline void vmx_set_intercept_for_msr(unsigned long *msr_bitmap, ++ u32 msr, int type, bool value) ++{ ++ if (value) ++ vmx_enable_intercept_for_msr(msr_bitmap, msr, type); ++ else ++ vmx_disable_intercept_for_msr(msr_bitmap, msr, type); ++} ++ ++/* ++ * If a msr is allowed by L0, we should check whether it is allowed by L1. ++ * The corresponding bit will be cleared unless both of L0 and L1 allow it. ++ */ ++static void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1, ++ unsigned long *msr_bitmap_nested, ++ u32 msr, int type) ++{ ++ int f = sizeof(unsigned long); ++ ++ /* ++ * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals ++ * have the write-low and read-high bitmap offsets the wrong way round. ++ * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff. ++ */ ++ if (msr <= 0x1fff) { ++ if (type & MSR_TYPE_R && ++ !test_bit(msr, msr_bitmap_l1 + 0x000 / f)) ++ /* read-low */ ++ __clear_bit(msr, msr_bitmap_nested + 0x000 / f); ++ ++ if (type & MSR_TYPE_W && ++ !test_bit(msr, msr_bitmap_l1 + 0x800 / f)) ++ /* write-low */ ++ __clear_bit(msr, msr_bitmap_nested + 0x800 / f); ++ ++ } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { ++ msr &= 0x1fff; ++ if (type & MSR_TYPE_R && ++ !test_bit(msr, msr_bitmap_l1 + 0x400 / f)) ++ /* read-high */ ++ __clear_bit(msr, msr_bitmap_nested + 0x400 / f); ++ ++ if (type & MSR_TYPE_W && ++ !test_bit(msr, msr_bitmap_l1 + 0xc00 / f)) ++ /* write-high */ ++ __clear_bit(msr, msr_bitmap_nested + 0xc00 / f); ++ ++ } ++} ++ ++static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu) ++{ ++ u8 mode = 0; ++ ++ if (cpu_has_secondary_exec_ctrls() && ++ (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) & ++ SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) { ++ mode |= MSR_BITMAP_MODE_X2APIC; ++ if (enable_apicv && kvm_vcpu_apicv_active(vcpu)) ++ mode |= MSR_BITMAP_MODE_X2APIC_APICV; ++ } ++ ++ return mode; ++} ++ ++#define X2APIC_MSR(r) (APIC_BASE_MSR + ((r) >> 4)) ++ ++static void vmx_update_msr_bitmap_x2apic(unsigned long *msr_bitmap, ++ u8 mode) ++{ ++ int msr; ++ ++ for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) { ++ unsigned word = msr / BITS_PER_LONG; ++ msr_bitmap[word] = (mode & MSR_BITMAP_MODE_X2APIC_APICV) ? 0 : ~0; ++ msr_bitmap[word + (0x800 / sizeof(long))] = ~0; ++ } ++ ++ if (mode & MSR_BITMAP_MODE_X2APIC) { ++ /* ++ * TPR reads and writes can be virtualized even if virtual interrupt ++ * delivery is not in use. ++ */ ++ vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TASKPRI), MSR_TYPE_RW); ++ if (mode & MSR_BITMAP_MODE_X2APIC_APICV) { ++ vmx_enable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TMCCT), MSR_TYPE_R); ++ vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_EOI), MSR_TYPE_W); ++ vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_SELF_IPI), MSR_TYPE_W); ++ } ++ } ++} ++ ++static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu) ++{ ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap; ++ u8 mode = vmx_msr_bitmap_mode(vcpu); ++ u8 changed = mode ^ vmx->msr_bitmap_mode; ++ ++ if (!changed) ++ return; ++ ++ if (changed & (MSR_BITMAP_MODE_X2APIC | MSR_BITMAP_MODE_X2APIC_APICV)) ++ vmx_update_msr_bitmap_x2apic(msr_bitmap, mode); ++ ++ vmx->msr_bitmap_mode = mode; ++} ++ ++static bool vmx_get_enable_apicv(struct kvm_vcpu *vcpu) ++{ ++ return enable_apicv; ++} ++ ++static void nested_mark_vmcs12_pages_dirty(struct kvm_vcpu *vcpu) ++{ ++ struct vmcs12 *vmcs12 = get_vmcs12(vcpu); ++ gfn_t gfn; ++ ++ /* ++ * Don't need to mark the APIC access page dirty; it is never ++ * written to by the CPU during APIC virtualization. ++ */ ++ ++ if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) { ++ gfn = vmcs12->virtual_apic_page_addr >> PAGE_SHIFT; ++ kvm_vcpu_mark_page_dirty(vcpu, gfn); ++ } ++ ++ if (nested_cpu_has_posted_intr(vmcs12)) { ++ gfn = vmcs12->posted_intr_desc_addr >> PAGE_SHIFT; ++ kvm_vcpu_mark_page_dirty(vcpu, gfn); ++ } ++} ++ ++ ++static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) ++{ ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ int max_irr; ++ void *vapic_page; ++ u16 status; ++ ++ if (!vmx->nested.pi_desc || !vmx->nested.pi_pending) ++ return; ++ ++ vmx->nested.pi_pending = false; ++ if (!pi_test_and_clear_on(vmx->nested.pi_desc)) ++ return; ++ ++ max_irr = find_last_bit((unsigned long *)vmx->nested.pi_desc->pir, 256); ++ if (max_irr != 256) { ++ vapic_page = kmap(vmx->nested.virtual_apic_page); ++ __kvm_apic_update_irr(vmx->nested.pi_desc->pir, ++ vapic_page, &max_irr); ++ kunmap(vmx->nested.virtual_apic_page); ++ ++ status = vmcs_read16(GUEST_INTR_STATUS); ++ if ((u8)max_irr > ((u8)status & 0xff)) { ++ status &= ~0xff; ++ status |= (u8)max_irr; ++ vmcs_write16(GUEST_INTR_STATUS, status); ++ } ++ } ++ ++ nested_mark_vmcs12_pages_dirty(vcpu); ++} ++ ++static u8 vmx_get_rvi(void) ++{ ++ return vmcs_read16(GUEST_INTR_STATUS) & 0xff; ++} ++ ++static bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu) ++{ ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ void *vapic_page; ++ u32 vppr; ++ int rvi; ++ ++ if (WARN_ON_ONCE(!is_guest_mode(vcpu)) || ++ !nested_cpu_has_vid(get_vmcs12(vcpu)) || ++ WARN_ON_ONCE(!vmx->nested.virtual_apic_page)) ++ return false; ++ ++ rvi = vmx_get_rvi(); ++ ++ vapic_page = kmap(vmx->nested.virtual_apic_page); ++ vppr = *((u32 *)(vapic_page + APIC_PROCPRI)); ++ kunmap(vmx->nested.virtual_apic_page); ++ ++ return ((rvi & 0xf0) > (vppr & 0xf0)); ++} ++ ++static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu, ++ bool nested) ++{ ++#ifdef CONFIG_SMP ++ int pi_vec = nested ? POSTED_INTR_NESTED_VECTOR : POSTED_INTR_VECTOR; ++ ++ if (vcpu->mode == IN_GUEST_MODE) { ++ /* ++ * The vector of interrupt to be delivered to vcpu had ++ * been set in PIR before this function. ++ * ++ * Following cases will be reached in this block, and ++ * we always send a notification event in all cases as ++ * explained below. ++ * ++ * Case 1: vcpu keeps in non-root mode. Sending a ++ * notification event posts the interrupt to vcpu. ++ * ++ * Case 2: vcpu exits to root mode and is still ++ * runnable. PIR will be synced to vIRR before the ++ * next vcpu entry. Sending a notification event in ++ * this case has no effect, as vcpu is not in root ++ * mode. ++ * ++ * Case 3: vcpu exits to root mode and is blocked. ++ * vcpu_block() has already synced PIR to vIRR and ++ * never blocks vcpu if vIRR is not cleared. Therefore, ++ * a blocked vcpu here does not wait for any requested ++ * interrupts in PIR, and sending a notification event ++ * which has no effect is safe here. ++ */ ++ ++ apic->send_IPI_mask(get_cpu_mask(vcpu->cpu), pi_vec); ++ return true; ++ } ++#endif ++ return false; ++} ++ ++static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu, ++ int vector) ++{ ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ ++ if (is_guest_mode(vcpu) && ++ vector == vmx->nested.posted_intr_nv) { ++ /* ++ * If a posted intr is not recognized by hardware, ++ * we will accomplish it in the next vmentry. ++ */ ++ vmx->nested.pi_pending = true; ++ kvm_make_request(KVM_REQ_EVENT, vcpu); ++ /* the PIR and ON have been set by L1. */ ++ if (!kvm_vcpu_trigger_posted_interrupt(vcpu, true)) ++ kvm_vcpu_kick(vcpu); ++ return 0; ++ } ++ return -1; ++} ++/* ++ * Send interrupt to vcpu via posted interrupt way. ++ * 1. If target vcpu is running(non-root mode), send posted interrupt ++ * notification to vcpu and hardware will sync PIR to vIRR atomically. ++ * 2. If target vcpu isn't running(root mode), kick it to pick up the ++ * interrupt from PIR in next vmentry. ++ */ ++static void vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector) ++{ ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ int r; ++ ++ r = vmx_deliver_nested_posted_interrupt(vcpu, vector); ++ if (!r) ++ return; ++ ++ if (pi_test_and_set_pir(vector, &vmx->pi_desc)) ++ return; ++ ++ /* If a previous notification has sent the IPI, nothing to do. */ ++ if (pi_test_and_set_on(&vmx->pi_desc)) ++ return; ++ ++ if (!kvm_vcpu_trigger_posted_interrupt(vcpu, false)) ++ kvm_vcpu_kick(vcpu); ++} ++ ++/* ++ * Set up the vmcs's constant host-state fields, i.e., host-state fields that ++ * will not change in the lifetime of the guest. ++ * Note that host-state that does change is set elsewhere. E.g., host-state ++ * that is set differently for each CPU is set in vmx_vcpu_load(), not here. ++ */ ++static void vmx_set_constant_host_state(struct vcpu_vmx *vmx) ++{ ++ u32 low32, high32; ++ unsigned long tmpl; ++ struct desc_ptr dt; ++ unsigned long cr0, cr3, cr4; ++ ++ cr0 = read_cr0(); ++ WARN_ON(cr0 & X86_CR0_TS); ++ vmcs_writel(HOST_CR0, cr0); /* 22.2.3 */ ++ ++ /* ++ * Save the most likely value for this task's CR3 in the VMCS. ++ * We can't use __get_current_cr3_fast() because we're not atomic. ++ */ ++ cr3 = __read_cr3(); ++ vmcs_writel(HOST_CR3, cr3); /* 22.2.3 FIXME: shadow tables */ ++ vmx->loaded_vmcs->host_state.cr3 = cr3; ++ ++ /* Save the most likely value for this task's CR4 in the VMCS. */ ++ cr4 = cr4_read_shadow(); ++ vmcs_writel(HOST_CR4, cr4); /* 22.2.3, 22.2.5 */ ++ vmx->loaded_vmcs->host_state.cr4 = cr4; ++ ++ vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */ ++#ifdef CONFIG_X86_64 ++ /* ++ * Load null selectors, so we can avoid reloading them in ++ * vmx_prepare_switch_to_host(), in case userspace uses ++ * the null selectors too (the expected case). ++ */ ++ vmcs_write16(HOST_DS_SELECTOR, 0); ++ vmcs_write16(HOST_ES_SELECTOR, 0); ++#else ++ vmcs_write16(HOST_DS_SELECTOR, __KERNEL_DS); /* 22.2.4 */ ++ vmcs_write16(HOST_ES_SELECTOR, __KERNEL_DS); /* 22.2.4 */ ++#endif ++ vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS); /* 22.2.4 */ ++ vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */ ++ ++ store_idt(&dt); ++ vmcs_writel(HOST_IDTR_BASE, dt.address); /* 22.2.4 */ ++ vmx->host_idt_base = dt.address; ++ ++ vmcs_writel(HOST_RIP, vmx_return); /* 22.2.5 */ ++ ++ rdmsr(MSR_IA32_SYSENTER_CS, low32, high32); ++ vmcs_write32(HOST_IA32_SYSENTER_CS, low32); ++ rdmsrl(MSR_IA32_SYSENTER_EIP, tmpl); ++ vmcs_writel(HOST_IA32_SYSENTER_EIP, tmpl); /* 22.2.3 */ ++ ++ if (vmcs_config.vmexit_ctrl & VM_EXIT_LOAD_IA32_PAT) { ++ rdmsr(MSR_IA32_CR_PAT, low32, high32); ++ vmcs_write64(HOST_IA32_PAT, low32 | ((u64) high32 << 32)); ++ } ++} ++ ++static void set_cr4_guest_host_mask(struct vcpu_vmx *vmx) ++{ ++ vmx->vcpu.arch.cr4_guest_owned_bits = KVM_CR4_GUEST_OWNED_BITS; ++ if (enable_ept) ++ vmx->vcpu.arch.cr4_guest_owned_bits |= X86_CR4_PGE; ++ if (is_guest_mode(&vmx->vcpu)) ++ vmx->vcpu.arch.cr4_guest_owned_bits &= ++ ~get_vmcs12(&vmx->vcpu)->cr4_guest_host_mask; ++ vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits); ++} ++ ++static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx) ++{ ++ u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl; ++ ++ if (!kvm_vcpu_apicv_active(&vmx->vcpu)) ++ pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR; ++ ++ if (!enable_vnmi) ++ pin_based_exec_ctrl &= ~PIN_BASED_VIRTUAL_NMIS; ++ ++ /* Enable the preemption timer dynamically */ ++ pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER; ++ return pin_based_exec_ctrl; ++} ++ ++static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) ++{ ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ ++ vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx)); ++ if (cpu_has_secondary_exec_ctrls()) { ++ if (kvm_vcpu_apicv_active(vcpu)) ++ vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL, ++ SECONDARY_EXEC_APIC_REGISTER_VIRT | ++ SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); ++ else ++ vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL, ++ SECONDARY_EXEC_APIC_REGISTER_VIRT | ++ SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); ++ } ++ ++ if (cpu_has_vmx_msr_bitmap()) ++ vmx_update_msr_bitmap(vcpu); ++} ++ ++static u32 vmx_exec_control(struct vcpu_vmx *vmx) ++{ ++ u32 exec_control = vmcs_config.cpu_based_exec_ctrl; ++ ++ if (vmx->vcpu.arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT) ++ exec_control &= ~CPU_BASED_MOV_DR_EXITING; ++ ++ if (!cpu_need_tpr_shadow(&vmx->vcpu)) { ++ exec_control &= ~CPU_BASED_TPR_SHADOW; ++#ifdef CONFIG_X86_64 ++ exec_control |= CPU_BASED_CR8_STORE_EXITING | ++ CPU_BASED_CR8_LOAD_EXITING; ++#endif ++ } ++ if (!enable_ept) ++ exec_control |= CPU_BASED_CR3_STORE_EXITING | ++ CPU_BASED_CR3_LOAD_EXITING | ++ CPU_BASED_INVLPG_EXITING; ++ if (kvm_mwait_in_guest(vmx->vcpu.kvm)) ++ exec_control &= ~(CPU_BASED_MWAIT_EXITING | ++ CPU_BASED_MONITOR_EXITING); ++ if (kvm_hlt_in_guest(vmx->vcpu.kvm)) ++ exec_control &= ~CPU_BASED_HLT_EXITING; ++ return exec_control; ++} ++ ++static bool vmx_rdrand_supported(void) ++{ ++ return vmcs_config.cpu_based_2nd_exec_ctrl & ++ SECONDARY_EXEC_RDRAND_EXITING; ++} ++ ++static bool vmx_rdseed_supported(void) ++{ ++ return vmcs_config.cpu_based_2nd_exec_ctrl & ++ SECONDARY_EXEC_RDSEED_EXITING; ++} ++ ++static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx) ++{ ++ struct kvm_vcpu *vcpu = &vmx->vcpu; ++ ++ u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl; ++ ++ if (!cpu_need_virtualize_apic_accesses(vcpu)) ++ exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; ++ if (vmx->vpid == 0) ++ exec_control &= ~SECONDARY_EXEC_ENABLE_VPID; ++ if (!enable_ept) { ++ exec_control &= ~SECONDARY_EXEC_ENABLE_EPT; ++ enable_unrestricted_guest = 0; ++ } ++ if (!enable_unrestricted_guest) ++ exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; ++ if (kvm_pause_in_guest(vmx->vcpu.kvm)) ++ exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; ++ if (!kvm_vcpu_apicv_active(vcpu)) ++ exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT | ++ SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); ++ exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; ++ ++ /* SECONDARY_EXEC_DESC is enabled/disabled on writes to CR4.UMIP, ++ * in vmx_set_cr4. */ ++ exec_control &= ~SECONDARY_EXEC_DESC; ++ ++ /* SECONDARY_EXEC_SHADOW_VMCS is enabled when L1 executes VMPTRLD ++ (handle_vmptrld). ++ We can NOT enable shadow_vmcs here because we don't have yet ++ a current VMCS12 ++ */ ++ exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS; ++ ++ if (!enable_pml) ++ exec_control &= ~SECONDARY_EXEC_ENABLE_PML; ++ ++ if (vmx_xsaves_supported()) { ++ /* Exposing XSAVES only when XSAVE is exposed */ ++ bool xsaves_enabled = ++ guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && ++ guest_cpuid_has(vcpu, X86_FEATURE_XSAVES); ++ ++ if (!xsaves_enabled) ++ exec_control &= ~SECONDARY_EXEC_XSAVES; ++ ++ if (nested) { ++ if (xsaves_enabled) ++ vmx->nested.msrs.secondary_ctls_high |= ++ SECONDARY_EXEC_XSAVES; ++ else ++ vmx->nested.msrs.secondary_ctls_high &= ++ ~SECONDARY_EXEC_XSAVES; ++ } ++ } ++ ++ if (vmx_rdtscp_supported()) { ++ bool rdtscp_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP); ++ if (!rdtscp_enabled) ++ exec_control &= ~SECONDARY_EXEC_RDTSCP; ++ ++ if (nested) { ++ if (rdtscp_enabled) ++ vmx->nested.msrs.secondary_ctls_high |= ++ SECONDARY_EXEC_RDTSCP; ++ else ++ vmx->nested.msrs.secondary_ctls_high &= ++ ~SECONDARY_EXEC_RDTSCP; ++ } ++ } ++ ++ if (vmx_invpcid_supported()) { ++ /* Exposing INVPCID only when PCID is exposed */ ++ bool invpcid_enabled = ++ guest_cpuid_has(vcpu, X86_FEATURE_INVPCID) && ++ guest_cpuid_has(vcpu, X86_FEATURE_PCID); ++ ++ if (!invpcid_enabled) { ++ exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID; ++ guest_cpuid_clear(vcpu, X86_FEATURE_INVPCID); ++ } ++ ++ if (nested) { ++ if (invpcid_enabled) ++ vmx->nested.msrs.secondary_ctls_high |= ++ SECONDARY_EXEC_ENABLE_INVPCID; ++ else ++ vmx->nested.msrs.secondary_ctls_high &= ++ ~SECONDARY_EXEC_ENABLE_INVPCID; ++ } ++ } ++ ++ if (vmx_rdrand_supported()) { ++ bool rdrand_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDRAND); ++ if (rdrand_enabled) ++ exec_control &= ~SECONDARY_EXEC_RDRAND_EXITING; ++ ++ if (nested) { ++ if (rdrand_enabled) ++ vmx->nested.msrs.secondary_ctls_high |= ++ SECONDARY_EXEC_RDRAND_EXITING; ++ else ++ vmx->nested.msrs.secondary_ctls_high &= ++ ~SECONDARY_EXEC_RDRAND_EXITING; ++ } ++ } ++ ++ if (vmx_rdseed_supported()) { ++ bool rdseed_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDSEED); ++ if (rdseed_enabled) ++ exec_control &= ~SECONDARY_EXEC_RDSEED_EXITING; ++ ++ if (nested) { ++ if (rdseed_enabled) ++ vmx->nested.msrs.secondary_ctls_high |= ++ SECONDARY_EXEC_RDSEED_EXITING; ++ else ++ vmx->nested.msrs.secondary_ctls_high &= ++ ~SECONDARY_EXEC_RDSEED_EXITING; ++ } ++ } ++ ++ vmx->secondary_exec_control = exec_control; ++} ++ ++static void ept_set_mmio_spte_mask(void) ++{ ++ /* ++ * EPT Misconfigurations can be generated if the value of bits 2:0 ++ * of an EPT paging-structure entry is 110b (write/execute). ++ */ ++ kvm_mmu_set_mmio_spte_mask(VMX_EPT_RWX_MASK, ++ VMX_EPT_MISCONFIG_WX_VALUE); ++} ++ ++#define VMX_XSS_EXIT_BITMAP 0 ++/* ++ * Sets up the vmcs for emulated real mode. ++ */ ++static void vmx_vcpu_setup(struct vcpu_vmx *vmx) ++{ ++ int i; ++ ++ if (enable_shadow_vmcs) { ++ /* ++ * At vCPU creation, "VMWRITE to any supported field ++ * in the VMCS" is supported, so use the more ++ * permissive vmx_vmread_bitmap to specify both read ++ * and write permissions for the shadow VMCS. ++ */ ++ vmcs_write64(VMREAD_BITMAP, __pa(vmx_vmread_bitmap)); ++ vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmread_bitmap)); ++ } ++ if (cpu_has_vmx_msr_bitmap()) ++ vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap)); ++ ++ vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */ ++ ++ /* Control */ ++ vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx)); ++ vmx->hv_deadline_tsc = -1; ++ ++ vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx)); ++ ++ if (cpu_has_secondary_exec_ctrls()) { ++ vmx_compute_secondary_exec_control(vmx); ++ vmcs_write32(SECONDARY_VM_EXEC_CONTROL, ++ vmx->secondary_exec_control); ++ } ++ ++ if (kvm_vcpu_apicv_active(&vmx->vcpu)) { ++ vmcs_write64(EOI_EXIT_BITMAP0, 0); ++ vmcs_write64(EOI_EXIT_BITMAP1, 0); ++ vmcs_write64(EOI_EXIT_BITMAP2, 0); ++ vmcs_write64(EOI_EXIT_BITMAP3, 0); ++ ++ vmcs_write16(GUEST_INTR_STATUS, 0); ++ ++ vmcs_write16(POSTED_INTR_NV, POSTED_INTR_VECTOR); ++ vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc))); ++ } ++ ++ if (!kvm_pause_in_guest(vmx->vcpu.kvm)) { ++ vmcs_write32(PLE_GAP, ple_gap); ++ vmx->ple_window = ple_window; ++ vmx->ple_window_dirty = true; ++ } ++ ++ vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0); ++ vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, 0); ++ vmcs_write32(CR3_TARGET_COUNT, 0); /* 22.2.1 */ ++ ++ vmcs_write16(HOST_FS_SELECTOR, 0); /* 22.2.4 */ ++ vmcs_write16(HOST_GS_SELECTOR, 0); /* 22.2.4 */ ++ vmx_set_constant_host_state(vmx); ++ vmcs_writel(HOST_FS_BASE, 0); /* 22.2.4 */ ++ vmcs_writel(HOST_GS_BASE, 0); /* 22.2.4 */ ++ ++ if (cpu_has_vmx_vmfunc()) ++ vmcs_write64(VM_FUNCTION_CONTROL, 0); ++ ++ vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0); ++ vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0); ++ vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host.val)); ++ vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0); ++ vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest.val)); ++ ++ if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) ++ vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat); ++ ++ for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i) { ++ u32 index = vmx_msr_index[i]; ++ u32 data_low, data_high; ++ int j = vmx->nmsrs; ++ ++ if (rdmsr_safe(index, &data_low, &data_high) < 0) ++ continue; ++ if (wrmsr_safe(index, data_low, data_high) < 0) ++ continue; ++ vmx->guest_msrs[j].index = i; ++ vmx->guest_msrs[j].data = 0; ++ vmx->guest_msrs[j].mask = -1ull; ++ ++vmx->nmsrs; ++ } ++ ++ vm_exit_controls_init(vmx, vmcs_config.vmexit_ctrl); ++ ++ /* 22.2.1, 20.8.1 */ ++ vm_entry_controls_init(vmx, vmcs_config.vmentry_ctrl); ++ ++ vmx->vcpu.arch.cr0_guest_owned_bits = X86_CR0_TS; ++ vmcs_writel(CR0_GUEST_HOST_MASK, ~X86_CR0_TS); ++ ++ set_cr4_guest_host_mask(vmx); ++ ++ if (vmx_xsaves_supported()) ++ vmcs_write64(XSS_EXIT_BITMAP, VMX_XSS_EXIT_BITMAP); ++ ++ if (enable_pml) { ++ ASSERT(vmx->pml_pg); ++ vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg)); ++ vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1); ++ } ++ ++ if (cpu_has_vmx_encls_vmexit()) ++ vmcs_write64(ENCLS_EXITING_BITMAP, -1ull); ++} ++ ++static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) ++{ ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ struct msr_data apic_base_msr; ++ u64 cr0; ++ ++ vmx->rmode.vm86_active = 0; ++ vmx->spec_ctrl = 0; ++ ++ vcpu->arch.microcode_version = 0x100000000ULL; ++ vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); ++ kvm_set_cr8(vcpu, 0); ++ ++ if (!init_event) { ++ apic_base_msr.data = APIC_DEFAULT_PHYS_BASE | ++ MSR_IA32_APICBASE_ENABLE; ++ if (kvm_vcpu_is_reset_bsp(vcpu)) ++ apic_base_msr.data |= MSR_IA32_APICBASE_BSP; ++ apic_base_msr.host_initiated = true; ++ kvm_set_apic_base(vcpu, &apic_base_msr); ++ } ++ ++ vmx_segment_cache_clear(vmx); ++ ++ seg_setup(VCPU_SREG_CS); ++ vmcs_write16(GUEST_CS_SELECTOR, 0xf000); ++ vmcs_writel(GUEST_CS_BASE, 0xffff0000ul); ++ ++ seg_setup(VCPU_SREG_DS); ++ seg_setup(VCPU_SREG_ES); ++ seg_setup(VCPU_SREG_FS); ++ seg_setup(VCPU_SREG_GS); ++ seg_setup(VCPU_SREG_SS); ++ ++ vmcs_write16(GUEST_TR_SELECTOR, 0); ++ vmcs_writel(GUEST_TR_BASE, 0); ++ vmcs_write32(GUEST_TR_LIMIT, 0xffff); ++ vmcs_write32(GUEST_TR_AR_BYTES, 0x008b); ++ ++ vmcs_write16(GUEST_LDTR_SELECTOR, 0); ++ vmcs_writel(GUEST_LDTR_BASE, 0); ++ vmcs_write32(GUEST_LDTR_LIMIT, 0xffff); ++ vmcs_write32(GUEST_LDTR_AR_BYTES, 0x00082); ++ ++ if (!init_event) { ++ vmcs_write32(GUEST_SYSENTER_CS, 0); ++ vmcs_writel(GUEST_SYSENTER_ESP, 0); ++ vmcs_writel(GUEST_SYSENTER_EIP, 0); ++ vmcs_write64(GUEST_IA32_DEBUGCTL, 0); ++ } ++ ++ kvm_set_rflags(vcpu, X86_EFLAGS_FIXED); ++ kvm_rip_write(vcpu, 0xfff0); ++ ++ vmcs_writel(GUEST_GDTR_BASE, 0); ++ vmcs_write32(GUEST_GDTR_LIMIT, 0xffff); ++ ++ vmcs_writel(GUEST_IDTR_BASE, 0); ++ vmcs_write32(GUEST_IDTR_LIMIT, 0xffff); ++ ++ vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE); ++ vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0); ++ vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, 0); ++ if (kvm_mpx_supported()) ++ vmcs_write64(GUEST_BNDCFGS, 0); ++ ++ setup_msrs(vmx); ++ ++ vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); /* 22.2.1 */ ++ ++ if (cpu_has_vmx_tpr_shadow() && !init_event) { ++ vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0); ++ if (cpu_need_tpr_shadow(vcpu)) ++ vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, ++ __pa(vcpu->arch.apic->regs)); ++ vmcs_write32(TPR_THRESHOLD, 0); ++ } ++ ++ kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu); ++ ++ if (vmx->vpid != 0) ++ vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); ++ ++ cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; ++ vmx->vcpu.arch.cr0 = cr0; ++ vmx_set_cr0(vcpu, cr0); /* enter rmode */ ++ vmx_set_cr4(vcpu, 0); ++ vmx_set_efer(vcpu, 0); ++ ++ update_exception_bitmap(vcpu); ++ ++ vpid_sync_context(vmx->vpid); ++ if (init_event) ++ vmx_clear_hlt(vcpu); ++} ++ ++/* ++ * In nested virtualization, check if L1 asked to exit on external interrupts. ++ * For most existing hypervisors, this will always return true. ++ */ ++static bool nested_exit_on_intr(struct kvm_vcpu *vcpu) ++{ ++ return get_vmcs12(vcpu)->pin_based_vm_exec_control & ++ PIN_BASED_EXT_INTR_MASK; ++} ++ ++/* ++ * In nested virtualization, check if L1 has set ++ * VM_EXIT_ACK_INTR_ON_EXIT ++ */ ++static bool nested_exit_intr_ack_set(struct kvm_vcpu *vcpu) ++{ ++ return get_vmcs12(vcpu)->vm_exit_controls & ++ VM_EXIT_ACK_INTR_ON_EXIT; ++} ++ ++static bool nested_exit_on_nmi(struct kvm_vcpu *vcpu) ++{ ++ return nested_cpu_has_nmi_exiting(get_vmcs12(vcpu)); ++} ++ ++static void enable_irq_window(struct kvm_vcpu *vcpu) ++{ ++ vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL, ++ CPU_BASED_VIRTUAL_INTR_PENDING); ++} ++ ++static void enable_nmi_window(struct kvm_vcpu *vcpu) ++{ ++ if (!enable_vnmi || ++ vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) { ++ enable_irq_window(vcpu); ++ return; ++ } ++ ++ vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL, ++ CPU_BASED_VIRTUAL_NMI_PENDING); ++} ++ ++static void vmx_inject_irq(struct kvm_vcpu *vcpu) ++{ ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ uint32_t intr; ++ int irq = vcpu->arch.interrupt.nr; ++ ++ trace_kvm_inj_virq(irq); ++ ++ ++vcpu->stat.irq_injections; ++ if (vmx->rmode.vm86_active) { ++ int inc_eip = 0; ++ if (vcpu->arch.interrupt.soft) ++ inc_eip = vcpu->arch.event_exit_inst_len; ++ if (kvm_inject_realmode_interrupt(vcpu, irq, inc_eip) != EMULATE_DONE) ++ kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); ++ return; ++ } ++ intr = irq | INTR_INFO_VALID_MASK; ++ if (vcpu->arch.interrupt.soft) { ++ intr |= INTR_TYPE_SOFT_INTR; ++ vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, ++ vmx->vcpu.arch.event_exit_inst_len); ++ } else ++ intr |= INTR_TYPE_EXT_INTR; ++ vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr); ++ ++ vmx_clear_hlt(vcpu); ++} ++ ++static void vmx_inject_nmi(struct kvm_vcpu *vcpu) ++{ ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ ++ if (!enable_vnmi) { ++ /* ++ * Tracking the NMI-blocked state in software is built upon ++ * finding the next open IRQ window. This, in turn, depends on ++ * well-behaving guests: They have to keep IRQs disabled at ++ * least as long as the NMI handler runs. Otherwise we may ++ * cause NMI nesting, maybe breaking the guest. But as this is ++ * highly unlikely, we can live with the residual risk. ++ */ ++ vmx->loaded_vmcs->soft_vnmi_blocked = 1; ++ vmx->loaded_vmcs->vnmi_blocked_time = 0; ++ } ++ ++ ++vcpu->stat.nmi_injections; ++ vmx->loaded_vmcs->nmi_known_unmasked = false; ++ ++ if (vmx->rmode.vm86_active) { ++ if (kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR, 0) != EMULATE_DONE) ++ kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); ++ return; ++ } ++ ++ vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, ++ INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); ++ ++ vmx_clear_hlt(vcpu); ++} ++ ++static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) ++{ ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ bool masked; ++ ++ if (!enable_vnmi) ++ return vmx->loaded_vmcs->soft_vnmi_blocked; ++ if (vmx->loaded_vmcs->nmi_known_unmasked) ++ return false; ++ masked = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI; ++ vmx->loaded_vmcs->nmi_known_unmasked = !masked; ++ return masked; ++} ++ ++static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) ++{ ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ ++ if (!enable_vnmi) { ++ if (vmx->loaded_vmcs->soft_vnmi_blocked != masked) { ++ vmx->loaded_vmcs->soft_vnmi_blocked = masked; ++ vmx->loaded_vmcs->vnmi_blocked_time = 0; ++ } ++ } else { ++ vmx->loaded_vmcs->nmi_known_unmasked = !masked; ++ if (masked) ++ vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, ++ GUEST_INTR_STATE_NMI); ++ else ++ vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, ++ GUEST_INTR_STATE_NMI); ++ } ++} ++ ++static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) ++{ ++ if (to_vmx(vcpu)->nested.nested_run_pending) ++ return 0; ++ ++ if (!enable_vnmi && ++ to_vmx(vcpu)->loaded_vmcs->soft_vnmi_blocked) ++ return 0; ++ ++ return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & ++ (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI ++ | GUEST_INTR_STATE_NMI)); ++} ++ ++static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) ++{ ++ return (!to_vmx(vcpu)->nested.nested_run_pending && ++ vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && ++ !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & ++ (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)); ++} ++ ++static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) ++{ ++ int ret; ++ ++ if (enable_unrestricted_guest) ++ return 0; ++ ++ ret = x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, addr, ++ PAGE_SIZE * 3); ++ if (ret) ++ return ret; ++ to_kvm_vmx(kvm)->tss_addr = addr; ++ return init_rmode_tss(kvm); ++} ++ ++static int vmx_set_identity_map_addr(struct kvm *kvm, u64 ident_addr) ++{ ++ to_kvm_vmx(kvm)->ept_identity_map_addr = ident_addr; ++ return 0; ++} ++ ++static bool rmode_exception(struct kvm_vcpu *vcpu, int vec) ++{ ++ switch (vec) { ++ case BP_VECTOR: ++ /* ++ * Update instruction length as we may reinject the exception ++ * from user space while in guest debugging mode. ++ */ ++ to_vmx(vcpu)->vcpu.arch.event_exit_inst_len = ++ vmcs_read32(VM_EXIT_INSTRUCTION_LEN); ++ if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) ++ return false; ++ /* fall through */ ++ case DB_VECTOR: ++ if (vcpu->guest_debug & ++ (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) ++ return false; ++ /* fall through */ ++ case DE_VECTOR: ++ case OF_VECTOR: ++ case BR_VECTOR: ++ case UD_VECTOR: ++ case DF_VECTOR: ++ case SS_VECTOR: ++ case GP_VECTOR: ++ case MF_VECTOR: ++ return true; ++ break; ++ } ++ return false; ++} ++ ++static int handle_rmode_exception(struct kvm_vcpu *vcpu, ++ int vec, u32 err_code) ++{ ++ /* ++ * Instruction with address size override prefix opcode 0x67 ++ * Cause the #SS fault with 0 error code in VM86 mode. ++ */ ++ if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) { ++ if (kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE) { ++ if (vcpu->arch.halt_request) { ++ vcpu->arch.halt_request = 0; ++ return kvm_vcpu_halt(vcpu); ++ } ++ return 1; ++ } ++ return 0; ++ } ++ ++ /* ++ * Forward all other exceptions that are valid in real mode. ++ * FIXME: Breaks guest debugging in real mode, needs to be fixed with ++ * the required debugging infrastructure rework. ++ */ ++ kvm_queue_exception(vcpu, vec); ++ return 1; ++} ++ ++/* ++ * Trigger machine check on the host. We assume all the MSRs are already set up ++ * by the CPU and that we still run on the same CPU as the MCE occurred on. ++ * We pass a fake environment to the machine check handler because we want ++ * the guest to be always treated like user space, no matter what context ++ * it used internally. ++ */ ++static void kvm_machine_check(void) ++{ ++#if defined(CONFIG_X86_MCE) && defined(CONFIG_X86_64) ++ struct pt_regs regs = { ++ .cs = 3, /* Fake ring 3 no matter what the guest ran on */ ++ .flags = X86_EFLAGS_IF, ++ }; ++ ++ do_machine_check(®s, 0); ++#endif ++} ++ ++static int handle_machine_check(struct kvm_vcpu *vcpu) ++{ ++ /* already handled by vcpu_run */ ++ return 1; ++} ++ ++static int handle_exception(struct kvm_vcpu *vcpu) ++{ ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ struct kvm_run *kvm_run = vcpu->run; ++ u32 intr_info, ex_no, error_code; ++ unsigned long cr2, rip, dr6; ++ u32 vect_info; ++ enum emulation_result er; ++ ++ vect_info = vmx->idt_vectoring_info; ++ intr_info = vmx->exit_intr_info; ++ ++ if (is_machine_check(intr_info)) ++ return handle_machine_check(vcpu); ++ ++ if (is_nmi(intr_info)) ++ return 1; /* already handled by vmx_vcpu_run() */ ++ ++ if (is_invalid_opcode(intr_info)) ++ return handle_ud(vcpu); ++ ++ error_code = 0; ++ if (intr_info & INTR_INFO_DELIVER_CODE_MASK) ++ error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); ++ ++ if (!vmx->rmode.vm86_active && is_gp_fault(intr_info)) { ++ WARN_ON_ONCE(!enable_vmware_backdoor); ++ er = kvm_emulate_instruction(vcpu, ++ EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL); ++ if (er == EMULATE_USER_EXIT) ++ return 0; ++ else if (er != EMULATE_DONE) ++ kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); ++ return 1; ++ } ++ ++ /* ++ * The #PF with PFEC.RSVD = 1 indicates the guest is accessing ++ * MMIO, it is better to report an internal error. ++ * See the comments in vmx_handle_exit. ++ */ ++ if ((vect_info & VECTORING_INFO_VALID_MASK) && ++ !(is_page_fault(intr_info) && !(error_code & PFERR_RSVD_MASK))) { ++ vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; ++ vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_SIMUL_EX; ++ vcpu->run->internal.ndata = 3; ++ vcpu->run->internal.data[0] = vect_info; ++ vcpu->run->internal.data[1] = intr_info; ++ vcpu->run->internal.data[2] = error_code; ++ return 0; ++ } ++ ++ if (is_page_fault(intr_info)) { ++ cr2 = vmcs_readl(EXIT_QUALIFICATION); ++ /* EPT won't cause page fault directly */ ++ WARN_ON_ONCE(!vcpu->arch.apf.host_apf_reason && enable_ept); ++ return kvm_handle_page_fault(vcpu, error_code, cr2, NULL, 0); ++ } ++ ++ ex_no = intr_info & INTR_INFO_VECTOR_MASK; ++ ++ if (vmx->rmode.vm86_active && rmode_exception(vcpu, ex_no)) ++ return handle_rmode_exception(vcpu, ex_no, error_code); ++ ++ switch (ex_no) { ++ case AC_VECTOR: ++ kvm_queue_exception_e(vcpu, AC_VECTOR, error_code); ++ return 1; ++ case DB_VECTOR: ++ dr6 = vmcs_readl(EXIT_QUALIFICATION); ++ if (!(vcpu->guest_debug & ++ (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) { ++ vcpu->arch.dr6 &= ~15; ++ vcpu->arch.dr6 |= dr6 | DR6_RTM; ++ if (is_icebp(intr_info)) ++ skip_emulated_instruction(vcpu); ++ ++ kvm_queue_exception(vcpu, DB_VECTOR); ++ return 1; ++ } ++ kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1; ++ kvm_run->debug.arch.dr7 = vmcs_readl(GUEST_DR7); ++ /* fall through */ ++ case BP_VECTOR: ++ /* ++ * Update instruction length as we may reinject #BP from ++ * user space while in guest debugging mode. Reading it for ++ * #DB as well causes no harm, it is not used in that case. ++ */ ++ vmx->vcpu.arch.event_exit_inst_len = ++ vmcs_read32(VM_EXIT_INSTRUCTION_LEN); ++ kvm_run->exit_reason = KVM_EXIT_DEBUG; ++ rip = kvm_rip_read(vcpu); ++ kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip; ++ kvm_run->debug.arch.exception = ex_no; ++ break; ++ default: ++ kvm_run->exit_reason = KVM_EXIT_EXCEPTION; ++ kvm_run->ex.exception = ex_no; ++ kvm_run->ex.error_code = error_code; ++ break; ++ } ++ return 0; ++} ++ ++static int handle_external_interrupt(struct kvm_vcpu *vcpu) ++{ ++ ++vcpu->stat.irq_exits; ++ return 1; ++} ++ ++static int handle_triple_fault(struct kvm_vcpu *vcpu) ++{ ++ vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; ++ vcpu->mmio_needed = 0; ++ return 0; ++} ++ ++static int handle_io(struct kvm_vcpu *vcpu) ++{ ++ unsigned long exit_qualification; ++ int size, in, string; ++ unsigned port; ++ ++ exit_qualification = vmcs_readl(EXIT_QUALIFICATION); ++ string = (exit_qualification & 16) != 0; ++ ++ ++vcpu->stat.io_exits; ++ ++ if (string) ++ return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE; ++ ++ port = exit_qualification >> 16; ++ size = (exit_qualification & 7) + 1; ++ in = (exit_qualification & 8) != 0; ++ ++ return kvm_fast_pio(vcpu, size, port, in); ++} ++ ++static void ++vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall) ++{ ++ /* ++ * Patch in the VMCALL instruction: ++ */ ++ hypercall[0] = 0x0f; ++ hypercall[1] = 0x01; ++ hypercall[2] = 0xc1; ++} ++ ++/* called to set cr0 as appropriate for a mov-to-cr0 exit. */ ++static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val) ++{ ++ if (is_guest_mode(vcpu)) { ++ struct vmcs12 *vmcs12 = get_vmcs12(vcpu); ++ unsigned long orig_val = val; ++ ++ /* ++ * We get here when L2 changed cr0 in a way that did not change ++ * any of L1's shadowed bits (see nested_vmx_exit_handled_cr), ++ * but did change L0 shadowed bits. So we first calculate the ++ * effective cr0 value that L1 would like to write into the ++ * hardware. It consists of the L2-owned bits from the new ++ * value combined with the L1-owned bits from L1's guest_cr0. ++ */ ++ val = (val & ~vmcs12->cr0_guest_host_mask) | ++ (vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask); ++ ++ if (!nested_guest_cr0_valid(vcpu, val)) ++ return 1; ++ ++ if (kvm_set_cr0(vcpu, val)) ++ return 1; ++ vmcs_writel(CR0_READ_SHADOW, orig_val); ++ return 0; ++ } else { ++ if (to_vmx(vcpu)->nested.vmxon && ++ !nested_host_cr0_valid(vcpu, val)) ++ return 1; ++ ++ return kvm_set_cr0(vcpu, val); ++ } ++} ++ ++static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val) ++{ ++ if (is_guest_mode(vcpu)) { ++ struct vmcs12 *vmcs12 = get_vmcs12(vcpu); ++ unsigned long orig_val = val; ++ ++ /* analogously to handle_set_cr0 */ ++ val = (val & ~vmcs12->cr4_guest_host_mask) | ++ (vmcs12->guest_cr4 & vmcs12->cr4_guest_host_mask); ++ if (kvm_set_cr4(vcpu, val)) ++ return 1; ++ vmcs_writel(CR4_READ_SHADOW, orig_val); ++ return 0; ++ } else ++ return kvm_set_cr4(vcpu, val); ++} ++ ++static int handle_desc(struct kvm_vcpu *vcpu) ++{ ++ WARN_ON(!(vcpu->arch.cr4 & X86_CR4_UMIP)); ++ return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE; ++} ++ ++static int handle_cr(struct kvm_vcpu *vcpu) ++{ ++ unsigned long exit_qualification, val; ++ int cr; ++ int reg; ++ int err; ++ int ret; ++ ++ exit_qualification = vmcs_readl(EXIT_QUALIFICATION); ++ cr = exit_qualification & 15; ++ reg = (exit_qualification >> 8) & 15; ++ switch ((exit_qualification >> 4) & 3) { ++ case 0: /* mov to cr */ ++ val = kvm_register_readl(vcpu, reg); ++ trace_kvm_cr_write(cr, val); ++ switch (cr) { ++ case 0: ++ err = handle_set_cr0(vcpu, val); ++ return kvm_complete_insn_gp(vcpu, err); ++ case 3: ++ WARN_ON_ONCE(enable_unrestricted_guest); ++ err = kvm_set_cr3(vcpu, val); ++ return kvm_complete_insn_gp(vcpu, err); ++ case 4: ++ err = handle_set_cr4(vcpu, val); ++ return kvm_complete_insn_gp(vcpu, err); ++ case 8: { ++ u8 cr8_prev = kvm_get_cr8(vcpu); ++ u8 cr8 = (u8)val; ++ err = kvm_set_cr8(vcpu, cr8); ++ ret = kvm_complete_insn_gp(vcpu, err); ++ if (lapic_in_kernel(vcpu)) ++ return ret; ++ if (cr8_prev <= cr8) ++ return ret; ++ /* ++ * TODO: we might be squashing a ++ * KVM_GUESTDBG_SINGLESTEP-triggered ++ * KVM_EXIT_DEBUG here. ++ */ ++ vcpu->run->exit_reason = KVM_EXIT_SET_TPR; ++ return 0; ++ } ++ } ++ break; ++ case 2: /* clts */ ++ WARN_ONCE(1, "Guest should always own CR0.TS"); ++ vmx_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS)); ++ trace_kvm_cr_write(0, kvm_read_cr0(vcpu)); ++ return kvm_skip_emulated_instruction(vcpu); ++ case 1: /*mov from cr*/ ++ switch (cr) { ++ case 3: ++ WARN_ON_ONCE(enable_unrestricted_guest); ++ val = kvm_read_cr3(vcpu); ++ kvm_register_write(vcpu, reg, val); ++ trace_kvm_cr_read(cr, val); ++ return kvm_skip_emulated_instruction(vcpu); ++ case 8: ++ val = kvm_get_cr8(vcpu); ++ kvm_register_write(vcpu, reg, val); ++ trace_kvm_cr_read(cr, val); ++ return kvm_skip_emulated_instruction(vcpu); ++ } ++ break; ++ case 3: /* lmsw */ ++ val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f; ++ trace_kvm_cr_write(0, (kvm_read_cr0(vcpu) & ~0xful) | val); ++ kvm_lmsw(vcpu, val); ++ ++ return kvm_skip_emulated_instruction(vcpu); ++ default: ++ break; ++ } ++ vcpu->run->exit_reason = 0; ++ vcpu_unimpl(vcpu, "unhandled control register: op %d cr %d\n", ++ (int)(exit_qualification >> 4) & 3, cr); ++ return 0; ++} ++ ++static int handle_dr(struct kvm_vcpu *vcpu) ++{ ++ unsigned long exit_qualification; ++ int dr, dr7, reg; ++ ++ exit_qualification = vmcs_readl(EXIT_QUALIFICATION); ++ dr = exit_qualification & DEBUG_REG_ACCESS_NUM; ++ ++ /* First, if DR does not exist, trigger UD */ ++ if (!kvm_require_dr(vcpu, dr)) ++ return 1; ++ ++ /* Do not handle if the CPL > 0, will trigger GP on re-entry */ ++ if (!kvm_require_cpl(vcpu, 0)) ++ return 1; ++ dr7 = vmcs_readl(GUEST_DR7); ++ if (dr7 & DR7_GD) { ++ /* ++ * As the vm-exit takes precedence over the debug trap, we ++ * need to emulate the latter, either for the host or the ++ * guest debugging itself. ++ */ ++ if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) { ++ vcpu->run->debug.arch.dr6 = vcpu->arch.dr6; ++ vcpu->run->debug.arch.dr7 = dr7; ++ vcpu->run->debug.arch.pc = kvm_get_linear_rip(vcpu); ++ vcpu->run->debug.arch.exception = DB_VECTOR; ++ vcpu->run->exit_reason = KVM_EXIT_DEBUG; ++ return 0; ++ } else { ++ vcpu->arch.dr6 &= ~15; ++ vcpu->arch.dr6 |= DR6_BD | DR6_RTM; ++ kvm_queue_exception(vcpu, DB_VECTOR); ++ return 1; ++ } ++ } ++ ++ if (vcpu->guest_debug == 0) { ++ vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL, ++ CPU_BASED_MOV_DR_EXITING); ++ ++ /* ++ * No more DR vmexits; force a reload of the debug registers ++ * and reenter on this instruction. The next vmexit will ++ * retrieve the full state of the debug registers. ++ */ ++ vcpu->arch.switch_db_regs |= KVM_DEBUGREG_WONT_EXIT; ++ return 1; ++ } ++ ++ reg = DEBUG_REG_ACCESS_REG(exit_qualification); ++ if (exit_qualification & TYPE_MOV_FROM_DR) { ++ unsigned long val; ++ ++ if (kvm_get_dr(vcpu, dr, &val)) ++ return 1; ++ kvm_register_write(vcpu, reg, val); ++ } else ++ if (kvm_set_dr(vcpu, dr, kvm_register_readl(vcpu, reg))) ++ return 1; ++ ++ return kvm_skip_emulated_instruction(vcpu); ++} ++ ++static u64 vmx_get_dr6(struct kvm_vcpu *vcpu) ++{ ++ return vcpu->arch.dr6; ++} ++ ++static void vmx_set_dr6(struct kvm_vcpu *vcpu, unsigned long val) ++{ ++} ++ ++static void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu) ++{ ++ get_debugreg(vcpu->arch.db[0], 0); ++ get_debugreg(vcpu->arch.db[1], 1); ++ get_debugreg(vcpu->arch.db[2], 2); ++ get_debugreg(vcpu->arch.db[3], 3); ++ get_debugreg(vcpu->arch.dr6, 6); ++ vcpu->arch.dr7 = vmcs_readl(GUEST_DR7); ++ ++ vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT; ++ vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL, CPU_BASED_MOV_DR_EXITING); ++} ++ ++static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val) ++{ ++ vmcs_writel(GUEST_DR7, val); ++} ++ ++static int handle_cpuid(struct kvm_vcpu *vcpu) ++{ ++ return kvm_emulate_cpuid(vcpu); ++} ++ ++static int handle_rdmsr(struct kvm_vcpu *vcpu) ++{ ++ u32 ecx = vcpu->arch.regs[VCPU_REGS_RCX]; ++ struct msr_data msr_info; ++ ++ msr_info.index = ecx; ++ msr_info.host_initiated = false; ++ if (vmx_get_msr(vcpu, &msr_info)) { ++ trace_kvm_msr_read_ex(ecx); ++ kvm_inject_gp(vcpu, 0); ++ return 1; ++ } ++ ++ trace_kvm_msr_read(ecx, msr_info.data); ++ ++ /* FIXME: handling of bits 32:63 of rax, rdx */ ++ vcpu->arch.regs[VCPU_REGS_RAX] = msr_info.data & -1u; ++ vcpu->arch.regs[VCPU_REGS_RDX] = (msr_info.data >> 32) & -1u; ++ return kvm_skip_emulated_instruction(vcpu); ++} ++ ++static int handle_wrmsr(struct kvm_vcpu *vcpu) ++{ ++ struct msr_data msr; ++ u32 ecx = vcpu->arch.regs[VCPU_REGS_RCX]; ++ u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u) ++ | ((u64)(vcpu->arch.regs[VCPU_REGS_RDX] & -1u) << 32); ++ ++ msr.data = data; ++ msr.index = ecx; ++ msr.host_initiated = false; ++ if (kvm_set_msr(vcpu, &msr) != 0) { ++ trace_kvm_msr_write_ex(ecx, data); ++ kvm_inject_gp(vcpu, 0); ++ return 1; ++ } ++ ++ trace_kvm_msr_write(ecx, data); ++ return kvm_skip_emulated_instruction(vcpu); ++} ++ ++static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu) ++{ ++ kvm_apic_update_ppr(vcpu); ++ return 1; ++} ++ ++static int handle_interrupt_window(struct kvm_vcpu *vcpu) ++{ ++ vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL, ++ CPU_BASED_VIRTUAL_INTR_PENDING); ++ ++ kvm_make_request(KVM_REQ_EVENT, vcpu); ++ ++ ++vcpu->stat.irq_window_exits; ++ return 1; ++} ++ ++static int handle_halt(struct kvm_vcpu *vcpu) ++{ ++ return kvm_emulate_halt(vcpu); ++} ++ ++static int handle_vmcall(struct kvm_vcpu *vcpu) ++{ ++ return kvm_emulate_hypercall(vcpu); ++} ++ ++static int handle_invd(struct kvm_vcpu *vcpu) ++{ ++ return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE; ++} ++ ++static int handle_invlpg(struct kvm_vcpu *vcpu) ++{ ++ unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); ++ ++ kvm_mmu_invlpg(vcpu, exit_qualification); ++ return kvm_skip_emulated_instruction(vcpu); ++} ++ ++static int handle_rdpmc(struct kvm_vcpu *vcpu) ++{ ++ int err; ++ ++ err = kvm_rdpmc(vcpu); ++ return kvm_complete_insn_gp(vcpu, err); ++} ++ ++static int handle_wbinvd(struct kvm_vcpu *vcpu) ++{ ++ return kvm_emulate_wbinvd(vcpu); ++} ++ ++static int handle_xsetbv(struct kvm_vcpu *vcpu) ++{ ++ u64 new_bv = kvm_read_edx_eax(vcpu); ++ u32 index = kvm_register_read(vcpu, VCPU_REGS_RCX); ++ ++ if (kvm_set_xcr(vcpu, index, new_bv) == 0) ++ return kvm_skip_emulated_instruction(vcpu); ++ return 1; ++} ++ ++static int handle_xsaves(struct kvm_vcpu *vcpu) ++{ ++ kvm_skip_emulated_instruction(vcpu); ++ WARN(1, "this should never happen\n"); ++ return 1; ++} ++ ++static int handle_xrstors(struct kvm_vcpu *vcpu) ++{ ++ kvm_skip_emulated_instruction(vcpu); ++ WARN(1, "this should never happen\n"); ++ return 1; ++} ++ ++static int handle_apic_access(struct kvm_vcpu *vcpu) ++{ ++ if (likely(fasteoi)) { ++ unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); ++ int access_type, offset; ++ ++ access_type = exit_qualification & APIC_ACCESS_TYPE; ++ offset = exit_qualification & APIC_ACCESS_OFFSET; ++ /* ++ * Sane guest uses MOV to write EOI, with written value ++ * not cared. So make a short-circuit here by avoiding ++ * heavy instruction emulation. ++ */ ++ if ((access_type == TYPE_LINEAR_APIC_INST_WRITE) && ++ (offset == APIC_EOI)) { ++ kvm_lapic_set_eoi(vcpu); ++ return kvm_skip_emulated_instruction(vcpu); ++ } ++ } ++ return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE; ++} ++ ++static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu) ++{ ++ unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); ++ int vector = exit_qualification & 0xff; ++ ++ /* EOI-induced VM exit is trap-like and thus no need to adjust IP */ ++ kvm_apic_set_eoi_accelerated(vcpu, vector); ++ return 1; ++} ++ ++static int handle_apic_write(struct kvm_vcpu *vcpu) ++{ ++ unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); ++ u32 offset = exit_qualification & 0xfff; ++ ++ /* APIC-write VM exit is trap-like and thus no need to adjust IP */ ++ kvm_apic_write_nodecode(vcpu, offset); ++ return 1; ++} ++ ++static int handle_task_switch(struct kvm_vcpu *vcpu) ++{ ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ unsigned long exit_qualification; ++ bool has_error_code = false; ++ u32 error_code = 0; ++ u16 tss_selector; ++ int reason, type, idt_v, idt_index; ++ ++ idt_v = (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK); ++ idt_index = (vmx->idt_vectoring_info & VECTORING_INFO_VECTOR_MASK); ++ type = (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK); ++ ++ exit_qualification = vmcs_readl(EXIT_QUALIFICATION); ++ ++ reason = (u32)exit_qualification >> 30; ++ if (reason == TASK_SWITCH_GATE && idt_v) { ++ switch (type) { ++ case INTR_TYPE_NMI_INTR: ++ vcpu->arch.nmi_injected = false; ++ vmx_set_nmi_mask(vcpu, true); ++ break; ++ case INTR_TYPE_EXT_INTR: ++ case INTR_TYPE_SOFT_INTR: ++ kvm_clear_interrupt_queue(vcpu); ++ break; ++ case INTR_TYPE_HARD_EXCEPTION: ++ if (vmx->idt_vectoring_info & ++ VECTORING_INFO_DELIVER_CODE_MASK) { ++ has_error_code = true; ++ error_code = ++ vmcs_read32(IDT_VECTORING_ERROR_CODE); ++ } ++ /* fall through */ ++ case INTR_TYPE_SOFT_EXCEPTION: ++ kvm_clear_exception_queue(vcpu); ++ break; ++ default: ++ break; ++ } ++ } ++ tss_selector = exit_qualification; ++ ++ if (!idt_v || (type != INTR_TYPE_HARD_EXCEPTION && ++ type != INTR_TYPE_EXT_INTR && ++ type != INTR_TYPE_NMI_INTR)) ++ skip_emulated_instruction(vcpu); ++ ++ if (kvm_task_switch(vcpu, tss_selector, ++ type == INTR_TYPE_SOFT_INTR ? idt_index : -1, reason, ++ has_error_code, error_code) == EMULATE_FAIL) { ++ vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; ++ vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; ++ vcpu->run->internal.ndata = 0; ++ return 0; ++ } ++ ++ /* ++ * TODO: What about debug traps on tss switch? ++ * Are we supposed to inject them and update dr6? ++ */ ++ ++ return 1; ++} ++ ++static int handle_ept_violation(struct kvm_vcpu *vcpu) ++{ ++ unsigned long exit_qualification; ++ gpa_t gpa; ++ u64 error_code; ++ ++ exit_qualification = vmcs_readl(EXIT_QUALIFICATION); ++ ++ /* ++ * EPT violation happened while executing iret from NMI, ++ * "blocked by NMI" bit has to be set before next VM entry. ++ * There are errata that may cause this bit to not be set: ++ * AAK134, BY25. ++ */ ++ if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) && ++ enable_vnmi && ++ (exit_qualification & INTR_INFO_UNBLOCK_NMI)) ++ vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI); ++ ++ gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); ++ trace_kvm_page_fault(gpa, exit_qualification); ++ ++ /* Is it a read fault? */ ++ error_code = (exit_qualification & EPT_VIOLATION_ACC_READ) ++ ? PFERR_USER_MASK : 0; ++ /* Is it a write fault? */ ++ error_code |= (exit_qualification & EPT_VIOLATION_ACC_WRITE) ++ ? PFERR_WRITE_MASK : 0; ++ /* Is it a fetch fault? */ ++ error_code |= (exit_qualification & EPT_VIOLATION_ACC_INSTR) ++ ? PFERR_FETCH_MASK : 0; ++ /* ept page table entry is present? */ ++ error_code |= (exit_qualification & ++ (EPT_VIOLATION_READABLE | EPT_VIOLATION_WRITABLE | ++ EPT_VIOLATION_EXECUTABLE)) ++ ? PFERR_PRESENT_MASK : 0; ++ ++ error_code |= (exit_qualification & 0x100) != 0 ? ++ PFERR_GUEST_FINAL_MASK : PFERR_GUEST_PAGE_MASK; ++ ++ vcpu->arch.exit_qualification = exit_qualification; ++ return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0); ++} ++ ++static int handle_ept_misconfig(struct kvm_vcpu *vcpu) ++{ ++ gpa_t gpa; ++ ++ /* ++ * A nested guest cannot optimize MMIO vmexits, because we have an ++ * nGPA here instead of the required GPA. ++ */ ++ gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); ++ if (!is_guest_mode(vcpu) && ++ !kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) { ++ trace_kvm_fast_mmio(gpa); ++ /* ++ * Doing kvm_skip_emulated_instruction() depends on undefined ++ * behavior: Intel's manual doesn't mandate ++ * VM_EXIT_INSTRUCTION_LEN to be set in VMCS when EPT MISCONFIG ++ * occurs and while on real hardware it was observed to be set, ++ * other hypervisors (namely Hyper-V) don't set it, we end up ++ * advancing IP with some random value. Disable fast mmio when ++ * running nested and keep it for real hardware in hope that ++ * VM_EXIT_INSTRUCTION_LEN will always be set correctly. ++ */ ++ if (!static_cpu_has(X86_FEATURE_HYPERVISOR)) ++ return kvm_skip_emulated_instruction(vcpu); ++ else ++ return kvm_emulate_instruction(vcpu, EMULTYPE_SKIP) == ++ EMULATE_DONE; ++ } ++ ++ return kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0); ++} ++ ++static int handle_nmi_window(struct kvm_vcpu *vcpu) ++{ ++ WARN_ON_ONCE(!enable_vnmi); ++ vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL, ++ CPU_BASED_VIRTUAL_NMI_PENDING); ++ ++vcpu->stat.nmi_window_exits; ++ kvm_make_request(KVM_REQ_EVENT, vcpu); ++ ++ return 1; ++} ++ ++static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) ++{ ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ enum emulation_result err = EMULATE_DONE; ++ int ret = 1; ++ u32 cpu_exec_ctrl; ++ bool intr_window_requested; ++ unsigned count = 130; ++ ++ /* ++ * We should never reach the point where we are emulating L2 ++ * due to invalid guest state as that means we incorrectly ++ * allowed a nested VMEntry with an invalid vmcs12. ++ */ ++ WARN_ON_ONCE(vmx->emulation_required && vmx->nested.nested_run_pending); ++ ++ cpu_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); ++ intr_window_requested = cpu_exec_ctrl & CPU_BASED_VIRTUAL_INTR_PENDING; ++ ++ while (vmx->emulation_required && count-- != 0) { ++ if (intr_window_requested && vmx_interrupt_allowed(vcpu)) ++ return handle_interrupt_window(&vmx->vcpu); ++ ++ if (kvm_test_request(KVM_REQ_EVENT, vcpu)) ++ return 1; ++ ++ err = kvm_emulate_instruction(vcpu, 0); ++ ++ if (err == EMULATE_USER_EXIT) { ++ ++vcpu->stat.mmio_exits; ++ ret = 0; ++ goto out; ++ } ++ ++ if (err != EMULATE_DONE) ++ goto emulation_error; ++ ++ if (vmx->emulation_required && !vmx->rmode.vm86_active && ++ vcpu->arch.exception.pending) ++ goto emulation_error; ++ ++ if (vcpu->arch.halt_request) { ++ vcpu->arch.halt_request = 0; ++ ret = kvm_vcpu_halt(vcpu); ++ goto out; ++ } ++ ++ if (signal_pending(current)) ++ goto out; ++ if (need_resched()) ++ schedule(); ++ } ++ ++out: ++ return ret; ++ ++emulation_error: ++ vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; ++ vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; ++ vcpu->run->internal.ndata = 0; ++ return 0; ++} ++ ++static void grow_ple_window(struct kvm_vcpu *vcpu) ++{ ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ int old = vmx->ple_window; ++ ++ vmx->ple_window = __grow_ple_window(old, ple_window, ++ ple_window_grow, ++ ple_window_max); ++ ++ if (vmx->ple_window != old) ++ vmx->ple_window_dirty = true; ++ ++ trace_kvm_ple_window_grow(vcpu->vcpu_id, vmx->ple_window, old); ++} ++ ++static void shrink_ple_window(struct kvm_vcpu *vcpu) ++{ ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ int old = vmx->ple_window; ++ ++ vmx->ple_window = __shrink_ple_window(old, ple_window, ++ ple_window_shrink, ++ ple_window); ++ ++ if (vmx->ple_window != old) ++ vmx->ple_window_dirty = true; ++ ++ trace_kvm_ple_window_shrink(vcpu->vcpu_id, vmx->ple_window, old); ++} ++ ++/* ++ * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR. ++ */ ++static void wakeup_handler(void) ++{ ++ struct kvm_vcpu *vcpu; ++ int cpu = smp_processor_id(); ++ ++ spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); ++ list_for_each_entry(vcpu, &per_cpu(blocked_vcpu_on_cpu, cpu), ++ blocked_vcpu_list) { ++ struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); ++ ++ if (pi_test_on(pi_desc) == 1) ++ kvm_vcpu_kick(vcpu); ++ } ++ spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); ++} ++ ++static void vmx_enable_tdp(void) ++{ ++ kvm_mmu_set_mask_ptes(VMX_EPT_READABLE_MASK, ++ enable_ept_ad_bits ? VMX_EPT_ACCESS_BIT : 0ull, ++ enable_ept_ad_bits ? VMX_EPT_DIRTY_BIT : 0ull, ++ 0ull, VMX_EPT_EXECUTABLE_MASK, ++ cpu_has_vmx_ept_execute_only() ? 0ull : VMX_EPT_READABLE_MASK, ++ VMX_EPT_RWX_MASK, 0ull); ++ ++ ept_set_mmio_spte_mask(); ++ kvm_enable_tdp(); ++} ++ ++static __init int hardware_setup(void) ++{ ++ unsigned long host_bndcfgs; ++ int r = -ENOMEM, i; ++ ++ rdmsrl_safe(MSR_EFER, &host_efer); ++ ++ for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i) ++ kvm_define_shared_msr(i, vmx_msr_index[i]); ++ ++ for (i = 0; i < VMX_BITMAP_NR; i++) { ++ vmx_bitmap[i] = (unsigned long *)__get_free_page(GFP_KERNEL); ++ if (!vmx_bitmap[i]) ++ goto out; ++ } ++ ++ memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE); ++ memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE); ++ ++ if (setup_vmcs_config(&vmcs_config) < 0) { ++ r = -EIO; ++ goto out; ++ } ++ ++ if (boot_cpu_has(X86_FEATURE_NX)) ++ kvm_enable_efer_bits(EFER_NX); ++ ++ if (boot_cpu_has(X86_FEATURE_MPX)) { ++ rdmsrl(MSR_IA32_BNDCFGS, host_bndcfgs); ++ WARN_ONCE(host_bndcfgs, "KVM: BNDCFGS in host will be lost"); ++ } ++ ++ if (!cpu_has_vmx_vpid() || !cpu_has_vmx_invvpid() || ++ !(cpu_has_vmx_invvpid_single() || cpu_has_vmx_invvpid_global())) ++ enable_vpid = 0; ++ ++ if (!cpu_has_vmx_ept() || ++ !cpu_has_vmx_ept_4levels() || ++ !cpu_has_vmx_ept_mt_wb() || ++ !cpu_has_vmx_invept_global()) ++ enable_ept = 0; ++ ++ if (!cpu_has_vmx_ept_ad_bits() || !enable_ept) ++ enable_ept_ad_bits = 0; ++ ++ if (!cpu_has_vmx_unrestricted_guest() || !enable_ept) ++ enable_unrestricted_guest = 0; ++ ++ if (!cpu_has_vmx_flexpriority()) ++ flexpriority_enabled = 0; ++ ++ if (!cpu_has_virtual_nmis()) ++ enable_vnmi = 0; ++ ++ /* ++ * set_apic_access_page_addr() is used to reload apic access ++ * page upon invalidation. No need to do anything if not ++ * using the APIC_ACCESS_ADDR VMCS field. ++ */ ++ if (!flexpriority_enabled) ++ kvm_x86_ops->set_apic_access_page_addr = NULL; ++ ++ if (!cpu_has_vmx_tpr_shadow()) ++ kvm_x86_ops->update_cr8_intercept = NULL; ++ ++ if (enable_ept && !cpu_has_vmx_ept_2m_page()) ++ kvm_disable_largepages(); ++ ++#if IS_ENABLED(CONFIG_HYPERV) ++ if (ms_hyperv.nested_features & HV_X64_NESTED_GUEST_MAPPING_FLUSH ++ && enable_ept) ++ kvm_x86_ops->tlb_remote_flush = vmx_hv_remote_flush_tlb; ++#endif ++ ++ if (!cpu_has_vmx_ple()) { ++ ple_gap = 0; ++ ple_window = 0; ++ ple_window_grow = 0; ++ ple_window_max = 0; ++ ple_window_shrink = 0; ++ } ++ ++ if (!cpu_has_vmx_apicv()) { ++ enable_apicv = 0; ++ kvm_x86_ops->sync_pir_to_irr = NULL; ++ } ++ ++ if (cpu_has_vmx_tsc_scaling()) { ++ kvm_has_tsc_control = true; ++ kvm_max_tsc_scaling_ratio = KVM_VMX_TSC_MULTIPLIER_MAX; ++ kvm_tsc_scaling_ratio_frac_bits = 48; ++ } ++ ++ set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ ++ ++ if (enable_ept) ++ vmx_enable_tdp(); ++ else ++ kvm_disable_tdp(); ++ ++ if (!nested) { ++ kvm_x86_ops->get_nested_state = NULL; ++ kvm_x86_ops->set_nested_state = NULL; ++ } ++ ++ /* ++ * Only enable PML when hardware supports PML feature, and both EPT ++ * and EPT A/D bit features are enabled -- PML depends on them to work. ++ */ ++ if (!enable_ept || !enable_ept_ad_bits || !cpu_has_vmx_pml()) ++ enable_pml = 0; ++ ++ if (!enable_pml) { ++ kvm_x86_ops->slot_enable_log_dirty = NULL; ++ kvm_x86_ops->slot_disable_log_dirty = NULL; ++ kvm_x86_ops->flush_log_dirty = NULL; ++ kvm_x86_ops->enable_log_dirty_pt_masked = NULL; ++ } ++ ++ if (!cpu_has_vmx_preemption_timer()) ++ kvm_x86_ops->request_immediate_exit = __kvm_request_immediate_exit; ++ ++ if (cpu_has_vmx_preemption_timer() && enable_preemption_timer) { ++ u64 vmx_msr; ++ ++ rdmsrl(MSR_IA32_VMX_MISC, vmx_msr); ++ cpu_preemption_timer_multi = ++ vmx_msr & VMX_MISC_PREEMPTION_TIMER_RATE_MASK; ++ } else { ++ kvm_x86_ops->set_hv_timer = NULL; ++ kvm_x86_ops->cancel_hv_timer = NULL; ++ } ++ ++ if (!cpu_has_vmx_shadow_vmcs()) ++ enable_shadow_vmcs = 0; ++ if (enable_shadow_vmcs) ++ init_vmcs_shadow_fields(); ++ ++ kvm_set_posted_intr_wakeup_handler(wakeup_handler); ++ nested_vmx_setup_ctls_msrs(&vmcs_config.nested, enable_apicv); ++ ++ kvm_mce_cap_supported |= MCG_LMCE_P; ++ ++ r = alloc_kvm_area(); ++ if (r) ++ goto out; ++ return 0; ++ ++out: ++ for (i = 0; i < VMX_BITMAP_NR; i++) ++ free_page((unsigned long)vmx_bitmap[i]); ++ ++ return r; ++} ++ ++static __exit void hardware_unsetup(void) ++{ ++ int i; ++ ++ for (i = 0; i < VMX_BITMAP_NR; i++) ++ free_page((unsigned long)vmx_bitmap[i]); ++ ++ free_kvm_area(); ++} ++ ++/* ++ * Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE ++ * exiting, so only get here on cpu with PAUSE-Loop-Exiting. ++ */ ++static int handle_pause(struct kvm_vcpu *vcpu) ++{ ++ if (!kvm_pause_in_guest(vcpu->kvm)) ++ grow_ple_window(vcpu); ++ ++ /* ++ * Intel sdm vol3 ch-25.1.3 says: The "PAUSE-loop exiting" ++ * VM-execution control is ignored if CPL > 0. OTOH, KVM ++ * never set PAUSE_EXITING and just set PLE if supported, ++ * so the vcpu must be CPL=0 if it gets a PAUSE exit. ++ */ ++ kvm_vcpu_on_spin(vcpu, true); ++ return kvm_skip_emulated_instruction(vcpu); ++} ++ ++static int handle_nop(struct kvm_vcpu *vcpu) ++{ ++ return kvm_skip_emulated_instruction(vcpu); ++} ++ ++static int handle_mwait(struct kvm_vcpu *vcpu) ++{ ++ printk_once(KERN_WARNING "kvm: MWAIT instruction emulated as NOP!\n"); ++ return handle_nop(vcpu); ++} ++ ++static int handle_invalid_op(struct kvm_vcpu *vcpu) ++{ ++ kvm_queue_exception(vcpu, UD_VECTOR); ++ return 1; ++} ++ ++static int handle_monitor_trap(struct kvm_vcpu *vcpu) ++{ ++ return 1; ++} ++ ++static int handle_monitor(struct kvm_vcpu *vcpu) ++{ ++ printk_once(KERN_WARNING "kvm: MONITOR instruction emulated as NOP!\n"); ++ return handle_nop(vcpu); ++} ++ ++/* ++ * The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(), ++ * set the success or error code of an emulated VMX instruction, as specified ++ * by Vol 2B, VMX Instruction Reference, "Conventions". ++ */ ++static void nested_vmx_succeed(struct kvm_vcpu *vcpu) ++{ ++ vmx_set_rflags(vcpu, vmx_get_rflags(vcpu) ++ & ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF | ++ X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF)); ++} ++ ++static void nested_vmx_failInvalid(struct kvm_vcpu *vcpu) ++{ ++ vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu) ++ & ~(X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_ZF | ++ X86_EFLAGS_SF | X86_EFLAGS_OF)) ++ | X86_EFLAGS_CF); ++} ++ ++static void nested_vmx_failValid(struct kvm_vcpu *vcpu, ++ u32 vm_instruction_error) ++{ ++ if (to_vmx(vcpu)->nested.current_vmptr == -1ull) { ++ /* ++ * failValid writes the error number to the current VMCS, which ++ * can't be done there isn't a current VMCS. ++ */ ++ nested_vmx_failInvalid(vcpu); ++ return; ++ } ++ vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu) ++ & ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF | ++ X86_EFLAGS_SF | X86_EFLAGS_OF)) ++ | X86_EFLAGS_ZF); ++ get_vmcs12(vcpu)->vm_instruction_error = vm_instruction_error; ++ /* ++ * We don't need to force a shadow sync because ++ * VM_INSTRUCTION_ERROR is not shadowed ++ */ ++} ++ ++static void nested_vmx_abort(struct kvm_vcpu *vcpu, u32 indicator) ++{ ++ /* TODO: not to reset guest simply here. */ ++ kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); ++ pr_debug_ratelimited("kvm: nested vmx abort, indicator %d\n", indicator); ++} ++ ++static enum hrtimer_restart vmx_preemption_timer_fn(struct hrtimer *timer) ++{ ++ struct vcpu_vmx *vmx = ++ container_of(timer, struct vcpu_vmx, nested.preemption_timer); ++ ++ vmx->nested.preemption_timer_expired = true; ++ kvm_make_request(KVM_REQ_EVENT, &vmx->vcpu); ++ kvm_vcpu_kick(&vmx->vcpu); ++ ++ return HRTIMER_NORESTART; ++} ++ ++/* ++ * Decode the memory-address operand of a vmx instruction, as recorded on an ++ * exit caused by such an instruction (run by a guest hypervisor). ++ * On success, returns 0. When the operand is invalid, returns 1 and throws ++ * #UD or #GP. ++ */ ++static int get_vmx_mem_address(struct kvm_vcpu *vcpu, ++ unsigned long exit_qualification, ++ u32 vmx_instruction_info, bool wr, gva_t *ret) ++{ ++ gva_t off; ++ bool exn; ++ struct kvm_segment s; ++ ++ /* ++ * According to Vol. 3B, "Information for VM Exits Due to Instruction ++ * Execution", on an exit, vmx_instruction_info holds most of the ++ * addressing components of the operand. Only the displacement part ++ * is put in exit_qualification (see 3B, "Basic VM-Exit Information"). ++ * For how an actual address is calculated from all these components, ++ * refer to Vol. 1, "Operand Addressing". ++ */ ++ int scaling = vmx_instruction_info & 3; ++ int addr_size = (vmx_instruction_info >> 7) & 7; ++ bool is_reg = vmx_instruction_info & (1u << 10); ++ int seg_reg = (vmx_instruction_info >> 15) & 7; ++ int index_reg = (vmx_instruction_info >> 18) & 0xf; ++ bool index_is_valid = !(vmx_instruction_info & (1u << 22)); ++ int base_reg = (vmx_instruction_info >> 23) & 0xf; ++ bool base_is_valid = !(vmx_instruction_info & (1u << 27)); ++ ++ if (is_reg) { ++ kvm_queue_exception(vcpu, UD_VECTOR); ++ return 1; ++ } ++ ++ /* Addr = segment_base + offset */ ++ /* offset = base + [index * scale] + displacement */ ++ off = exit_qualification; /* holds the displacement */ ++ if (addr_size == 1) ++ off = (gva_t)sign_extend64(off, 31); ++ else if (addr_size == 0) ++ off = (gva_t)sign_extend64(off, 15); ++ if (base_is_valid) ++ off += kvm_register_read(vcpu, base_reg); ++ if (index_is_valid) ++ off += kvm_register_read(vcpu, index_reg)< s.limit); ++ } ++ if (exn) { ++ kvm_queue_exception_e(vcpu, ++ seg_reg == VCPU_SREG_SS ? ++ SS_VECTOR : GP_VECTOR, ++ 0); ++ return 1; ++ } ++ ++ return 0; ++} ++ ++static int nested_vmx_get_vmptr(struct kvm_vcpu *vcpu, gpa_t *vmpointer) ++{ ++ gva_t gva; ++ struct x86_exception e; ++ ++ if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION), ++ vmcs_read32(VMX_INSTRUCTION_INFO), false, &gva)) ++ return 1; ++ ++ if (kvm_read_guest_virt(vcpu, gva, vmpointer, sizeof(*vmpointer), &e)) { ++ kvm_inject_page_fault(vcpu, &e); ++ return 1; ++ } ++ ++ return 0; ++} ++ ++/* ++ * Allocate a shadow VMCS and associate it with the currently loaded ++ * VMCS, unless such a shadow VMCS already exists. The newly allocated ++ * VMCS is also VMCLEARed, so that it is ready for use. ++ */ ++static struct vmcs *alloc_shadow_vmcs(struct kvm_vcpu *vcpu) ++{ ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ struct loaded_vmcs *loaded_vmcs = vmx->loaded_vmcs; ++ ++ /* ++ * We should allocate a shadow vmcs for vmcs01 only when L1 ++ * executes VMXON and free it when L1 executes VMXOFF. ++ * As it is invalid to execute VMXON twice, we shouldn't reach ++ * here when vmcs01 already have an allocated shadow vmcs. ++ */ ++ WARN_ON(loaded_vmcs == &vmx->vmcs01 && loaded_vmcs->shadow_vmcs); ++ ++ if (!loaded_vmcs->shadow_vmcs) { ++ loaded_vmcs->shadow_vmcs = alloc_vmcs(true); ++ if (loaded_vmcs->shadow_vmcs) ++ vmcs_clear(loaded_vmcs->shadow_vmcs); ++ } ++ return loaded_vmcs->shadow_vmcs; ++} ++ ++static int enter_vmx_operation(struct kvm_vcpu *vcpu) ++{ ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ int r; ++ ++ r = alloc_loaded_vmcs(&vmx->nested.vmcs02); ++ if (r < 0) ++ goto out_vmcs02; ++ ++ vmx->nested.cached_vmcs12 = kzalloc(VMCS12_SIZE, GFP_KERNEL); ++ if (!vmx->nested.cached_vmcs12) ++ goto out_cached_vmcs12; ++ ++ vmx->nested.cached_shadow_vmcs12 = kzalloc(VMCS12_SIZE, GFP_KERNEL); ++ if (!vmx->nested.cached_shadow_vmcs12) ++ goto out_cached_shadow_vmcs12; ++ ++ if (enable_shadow_vmcs && !alloc_shadow_vmcs(vcpu)) ++ goto out_shadow_vmcs; ++ ++ hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC, ++ HRTIMER_MODE_REL_PINNED); ++ vmx->nested.preemption_timer.function = vmx_preemption_timer_fn; ++ ++ vmx->nested.vpid02 = allocate_vpid(); ++ ++ vmx->nested.vmxon = true; ++ return 0; ++ ++out_shadow_vmcs: ++ kfree(vmx->nested.cached_shadow_vmcs12); ++ ++out_cached_shadow_vmcs12: ++ kfree(vmx->nested.cached_vmcs12); ++ ++out_cached_vmcs12: ++ free_loaded_vmcs(&vmx->nested.vmcs02); ++ ++out_vmcs02: ++ return -ENOMEM; ++} ++ ++/* ++ * Emulate the VMXON instruction. ++ * Currently, we just remember that VMX is active, and do not save or even ++ * inspect the argument to VMXON (the so-called "VMXON pointer") because we ++ * do not currently need to store anything in that guest-allocated memory ++ * region. Consequently, VMCLEAR and VMPTRLD also do not verify that the their ++ * argument is different from the VMXON pointer (which the spec says they do). ++ */ ++static int handle_vmon(struct kvm_vcpu *vcpu) ++{ ++ int ret; ++ gpa_t vmptr; ++ struct page *page; ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ const u64 VMXON_NEEDED_FEATURES = FEATURE_CONTROL_LOCKED ++ | FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; ++ ++ /* ++ * The Intel VMX Instruction Reference lists a bunch of bits that are ++ * prerequisite to running VMXON, most notably cr4.VMXE must be set to ++ * 1 (see vmx_set_cr4() for when we allow the guest to set this). ++ * Otherwise, we should fail with #UD. But most faulting conditions ++ * have already been checked by hardware, prior to the VM-exit for ++ * VMXON. We do test guest cr4.VMXE because processor CR4 always has ++ * that bit set to 1 in non-root mode. ++ */ ++ if (!kvm_read_cr4_bits(vcpu, X86_CR4_VMXE)) { ++ kvm_queue_exception(vcpu, UD_VECTOR); ++ return 1; ++ } ++ ++ /* CPL=0 must be checked manually. */ ++ if (vmx_get_cpl(vcpu)) { ++ kvm_inject_gp(vcpu, 0); ++ return 1; ++ } ++ ++ if (vmx->nested.vmxon) { ++ nested_vmx_failValid(vcpu, VMXERR_VMXON_IN_VMX_ROOT_OPERATION); ++ return kvm_skip_emulated_instruction(vcpu); ++ } ++ ++ if ((vmx->msr_ia32_feature_control & VMXON_NEEDED_FEATURES) ++ != VMXON_NEEDED_FEATURES) { ++ kvm_inject_gp(vcpu, 0); ++ return 1; ++ } ++ ++ if (nested_vmx_get_vmptr(vcpu, &vmptr)) ++ return 1; ++ ++ /* ++ * SDM 3: 24.11.5 ++ * The first 4 bytes of VMXON region contain the supported ++ * VMCS revision identifier ++ * ++ * Note - IA32_VMX_BASIC[48] will never be 1 for the nested case; ++ * which replaces physical address width with 32 ++ */ ++ if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu))) { ++ nested_vmx_failInvalid(vcpu); ++ return kvm_skip_emulated_instruction(vcpu); ++ } ++ ++ page = kvm_vcpu_gpa_to_page(vcpu, vmptr); ++ if (is_error_page(page)) { ++ nested_vmx_failInvalid(vcpu); ++ return kvm_skip_emulated_instruction(vcpu); ++ } ++ if (*(u32 *)kmap(page) != VMCS12_REVISION) { ++ kunmap(page); ++ kvm_release_page_clean(page); ++ nested_vmx_failInvalid(vcpu); ++ return kvm_skip_emulated_instruction(vcpu); ++ } ++ kunmap(page); ++ kvm_release_page_clean(page); ++ ++ vmx->nested.vmxon_ptr = vmptr; ++ ret = enter_vmx_operation(vcpu); ++ if (ret) ++ return ret; ++ ++ nested_vmx_succeed(vcpu); ++ return kvm_skip_emulated_instruction(vcpu); ++} ++ ++/* ++ * Intel's VMX Instruction Reference specifies a common set of prerequisites ++ * for running VMX instructions (except VMXON, whose prerequisites are ++ * slightly different). It also specifies what exception to inject otherwise. ++ * Note that many of these exceptions have priority over VM exits, so they ++ * don't have to be checked again here. ++ */ ++static int nested_vmx_check_permission(struct kvm_vcpu *vcpu) ++{ ++ if (!to_vmx(vcpu)->nested.vmxon) { ++ kvm_queue_exception(vcpu, UD_VECTOR); ++ return 0; ++ } ++ ++ if (vmx_get_cpl(vcpu)) { ++ kvm_inject_gp(vcpu, 0); ++ return 0; ++ } ++ ++ return 1; ++} ++ ++static void vmx_disable_shadow_vmcs(struct vcpu_vmx *vmx) ++{ ++ vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL, SECONDARY_EXEC_SHADOW_VMCS); ++ vmcs_write64(VMCS_LINK_POINTER, -1ull); ++ vmx->nested.sync_shadow_vmcs = false; ++} ++ ++static inline void nested_release_vmcs12(struct vcpu_vmx *vmx) ++{ ++ if (vmx->nested.current_vmptr == -1ull) ++ return; ++ ++ if (enable_shadow_vmcs) { ++ /* copy to memory all shadowed fields in case ++ they were modified */ ++ copy_shadow_to_vmcs12(vmx); ++ vmx_disable_shadow_vmcs(vmx); ++ } ++ vmx->nested.posted_intr_nv = -1; ++ ++ /* Flush VMCS12 to guest memory */ ++ kvm_vcpu_write_guest_page(&vmx->vcpu, ++ vmx->nested.current_vmptr >> PAGE_SHIFT, ++ vmx->nested.cached_vmcs12, 0, VMCS12_SIZE); ++ ++ vmx->nested.current_vmptr = -1ull; ++} ++ ++/* ++ * Free whatever needs to be freed from vmx->nested when L1 goes down, or ++ * just stops using VMX. ++ */ ++static void free_nested(struct vcpu_vmx *vmx) ++{ ++ if (!vmx->nested.vmxon && !vmx->nested.smm.vmxon) ++ return; ++ ++ kvm_clear_request(KVM_REQ_GET_VMCS12_PAGES, &vmx->vcpu); ++ ++ hrtimer_cancel(&vmx->nested.preemption_timer); ++ vmx->nested.vmxon = false; ++ vmx->nested.smm.vmxon = false; ++ free_vpid(vmx->nested.vpid02); ++ vmx->nested.posted_intr_nv = -1; ++ vmx->nested.current_vmptr = -1ull; ++ if (enable_shadow_vmcs) { ++ vmx_disable_shadow_vmcs(vmx); ++ vmcs_clear(vmx->vmcs01.shadow_vmcs); ++ free_vmcs(vmx->vmcs01.shadow_vmcs); ++ vmx->vmcs01.shadow_vmcs = NULL; ++ } ++ kfree(vmx->nested.cached_vmcs12); ++ kfree(vmx->nested.cached_shadow_vmcs12); ++ /* Unpin physical memory we referred to in the vmcs02 */ ++ if (vmx->nested.apic_access_page) { ++ kvm_release_page_dirty(vmx->nested.apic_access_page); ++ vmx->nested.apic_access_page = NULL; ++ } ++ if (vmx->nested.virtual_apic_page) { ++ kvm_release_page_dirty(vmx->nested.virtual_apic_page); ++ vmx->nested.virtual_apic_page = NULL; ++ } ++ if (vmx->nested.pi_desc_page) { ++ kunmap(vmx->nested.pi_desc_page); ++ kvm_release_page_dirty(vmx->nested.pi_desc_page); ++ vmx->nested.pi_desc_page = NULL; ++ vmx->nested.pi_desc = NULL; ++ } ++ ++ free_loaded_vmcs(&vmx->nested.vmcs02); ++} ++ ++/* Emulate the VMXOFF instruction */ ++static int handle_vmoff(struct kvm_vcpu *vcpu) ++{ ++ if (!nested_vmx_check_permission(vcpu)) ++ return 1; ++ free_nested(to_vmx(vcpu)); ++ nested_vmx_succeed(vcpu); ++ return kvm_skip_emulated_instruction(vcpu); ++} ++ ++/* Emulate the VMCLEAR instruction */ ++static int handle_vmclear(struct kvm_vcpu *vcpu) ++{ ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ u32 zero = 0; ++ gpa_t vmptr; ++ ++ if (!nested_vmx_check_permission(vcpu)) ++ return 1; ++ ++ if (nested_vmx_get_vmptr(vcpu, &vmptr)) ++ return 1; ++ ++ if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu))) { ++ nested_vmx_failValid(vcpu, VMXERR_VMCLEAR_INVALID_ADDRESS); ++ return kvm_skip_emulated_instruction(vcpu); ++ } ++ ++ if (vmptr == vmx->nested.vmxon_ptr) { ++ nested_vmx_failValid(vcpu, VMXERR_VMCLEAR_VMXON_POINTER); ++ return kvm_skip_emulated_instruction(vcpu); ++ } ++ ++ if (vmptr == vmx->nested.current_vmptr) ++ nested_release_vmcs12(vmx); ++ ++ kvm_vcpu_write_guest(vcpu, ++ vmptr + offsetof(struct vmcs12, launch_state), ++ &zero, sizeof(zero)); ++ ++ nested_vmx_succeed(vcpu); ++ return kvm_skip_emulated_instruction(vcpu); ++} ++ ++static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch); ++ ++/* Emulate the VMLAUNCH instruction */ ++static int handle_vmlaunch(struct kvm_vcpu *vcpu) ++{ ++ return nested_vmx_run(vcpu, true); ++} ++ ++/* Emulate the VMRESUME instruction */ ++static int handle_vmresume(struct kvm_vcpu *vcpu) ++{ ++ ++ return nested_vmx_run(vcpu, false); ++} ++ ++/* ++ * Read a vmcs12 field. Since these can have varying lengths and we return ++ * one type, we chose the biggest type (u64) and zero-extend the return value ++ * to that size. Note that the caller, handle_vmread, might need to use only ++ * some of the bits we return here (e.g., on 32-bit guests, only 32 bits of ++ * 64-bit fields are to be returned). ++ */ ++static inline int vmcs12_read_any(struct vmcs12 *vmcs12, ++ unsigned long field, u64 *ret) ++{ ++ short offset = vmcs_field_to_offset(field); ++ char *p; ++ ++ if (offset < 0) ++ return offset; ++ ++ p = (char *)vmcs12 + offset; ++ ++ switch (vmcs_field_width(field)) { ++ case VMCS_FIELD_WIDTH_NATURAL_WIDTH: ++ *ret = *((natural_width *)p); ++ return 0; ++ case VMCS_FIELD_WIDTH_U16: ++ *ret = *((u16 *)p); ++ return 0; ++ case VMCS_FIELD_WIDTH_U32: ++ *ret = *((u32 *)p); ++ return 0; ++ case VMCS_FIELD_WIDTH_U64: ++ *ret = *((u64 *)p); ++ return 0; ++ default: ++ WARN_ON(1); ++ return -ENOENT; ++ } ++} ++ ++ ++static inline int vmcs12_write_any(struct vmcs12 *vmcs12, ++ unsigned long field, u64 field_value){ ++ short offset = vmcs_field_to_offset(field); ++ char *p = (char *)vmcs12 + offset; ++ if (offset < 0) ++ return offset; ++ ++ switch (vmcs_field_width(field)) { ++ case VMCS_FIELD_WIDTH_U16: ++ *(u16 *)p = field_value; ++ return 0; ++ case VMCS_FIELD_WIDTH_U32: ++ *(u32 *)p = field_value; ++ return 0; ++ case VMCS_FIELD_WIDTH_U64: ++ *(u64 *)p = field_value; ++ return 0; ++ case VMCS_FIELD_WIDTH_NATURAL_WIDTH: ++ *(natural_width *)p = field_value; ++ return 0; ++ default: ++ WARN_ON(1); ++ return -ENOENT; ++ } ++ ++} ++ ++/* ++ * Copy the writable VMCS shadow fields back to the VMCS12, in case ++ * they have been modified by the L1 guest. Note that the "read-only" ++ * VM-exit information fields are actually writable if the vCPU is ++ * configured to support "VMWRITE to any supported field in the VMCS." ++ */ ++static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx) ++{ ++ const u16 *fields[] = { ++ shadow_read_write_fields, ++ shadow_read_only_fields ++ }; ++ const int max_fields[] = { ++ max_shadow_read_write_fields, ++ max_shadow_read_only_fields ++ }; ++ int i, q; ++ unsigned long field; ++ u64 field_value; ++ struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs; ++ ++ if (WARN_ON(!shadow_vmcs)) ++ return; ++ ++ preempt_disable(); ++ ++ vmcs_load(shadow_vmcs); ++ ++ for (q = 0; q < ARRAY_SIZE(fields); q++) { ++ for (i = 0; i < max_fields[q]; i++) { ++ field = fields[q][i]; ++ field_value = __vmcs_readl(field); ++ vmcs12_write_any(get_vmcs12(&vmx->vcpu), field, field_value); ++ } ++ /* ++ * Skip the VM-exit information fields if they are read-only. ++ */ ++ if (!nested_cpu_has_vmwrite_any_field(&vmx->vcpu)) ++ break; ++ } ++ ++ vmcs_clear(shadow_vmcs); ++ vmcs_load(vmx->loaded_vmcs->vmcs); ++ ++ preempt_enable(); ++} ++ ++static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx) ++{ ++ const u16 *fields[] = { ++ shadow_read_write_fields, ++ shadow_read_only_fields ++ }; ++ const int max_fields[] = { ++ max_shadow_read_write_fields, ++ max_shadow_read_only_fields ++ }; ++ int i, q; ++ unsigned long field; ++ u64 field_value = 0; ++ struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs; ++ ++ if (WARN_ON(!shadow_vmcs)) ++ return; ++ ++ vmcs_load(shadow_vmcs); ++ ++ for (q = 0; q < ARRAY_SIZE(fields); q++) { ++ for (i = 0; i < max_fields[q]; i++) { ++ field = fields[q][i]; ++ vmcs12_read_any(get_vmcs12(&vmx->vcpu), field, &field_value); ++ __vmcs_writel(field, field_value); ++ } ++ } ++ ++ vmcs_clear(shadow_vmcs); ++ vmcs_load(vmx->loaded_vmcs->vmcs); ++} ++ ++/* ++ * VMX instructions which assume a current vmcs12 (i.e., that VMPTRLD was ++ * used before) all generate the same failure when it is missing. ++ */ ++static int nested_vmx_check_vmcs12(struct kvm_vcpu *vcpu) ++{ ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ if (vmx->nested.current_vmptr == -1ull) { ++ nested_vmx_failInvalid(vcpu); ++ return 0; ++ } ++ return 1; ++} ++ ++static int handle_vmread(struct kvm_vcpu *vcpu) ++{ ++ unsigned long field; ++ u64 field_value; ++ unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); ++ u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); ++ gva_t gva = 0; ++ struct vmcs12 *vmcs12; ++ struct x86_exception e; ++ ++ if (!nested_vmx_check_permission(vcpu)) ++ return 1; ++ ++ if (!nested_vmx_check_vmcs12(vcpu)) ++ return kvm_skip_emulated_instruction(vcpu); ++ ++ if (!is_guest_mode(vcpu)) ++ vmcs12 = get_vmcs12(vcpu); ++ else { ++ /* ++ * When vmcs->vmcs_link_pointer is -1ull, any VMREAD ++ * to shadowed-field sets the ALU flags for VMfailInvalid. ++ */ ++ if (get_vmcs12(vcpu)->vmcs_link_pointer == -1ull) { ++ nested_vmx_failInvalid(vcpu); ++ return kvm_skip_emulated_instruction(vcpu); ++ } ++ vmcs12 = get_shadow_vmcs12(vcpu); ++ } ++ ++ /* Decode instruction info and find the field to read */ ++ field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf)); ++ /* Read the field, zero-extended to a u64 field_value */ ++ if (vmcs12_read_any(vmcs12, field, &field_value) < 0) { ++ nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); ++ return kvm_skip_emulated_instruction(vcpu); ++ } ++ /* ++ * Now copy part of this value to register or memory, as requested. ++ * Note that the number of bits actually copied is 32 or 64 depending ++ * on the guest's mode (32 or 64 bit), not on the given field's length. ++ */ ++ if (vmx_instruction_info & (1u << 10)) { ++ kvm_register_writel(vcpu, (((vmx_instruction_info) >> 3) & 0xf), ++ field_value); ++ } else { ++ if (get_vmx_mem_address(vcpu, exit_qualification, ++ vmx_instruction_info, true, &gva)) ++ return 1; ++ /* _system ok, nested_vmx_check_permission has verified cpl=0 */ ++ if (kvm_write_guest_virt_system(vcpu, gva, &field_value, ++ (is_long_mode(vcpu) ? 8 : 4), ++ &e)) ++ kvm_inject_page_fault(vcpu, &e); ++ } ++ ++ nested_vmx_succeed(vcpu); ++ return kvm_skip_emulated_instruction(vcpu); ++} ++ ++ ++static int handle_vmwrite(struct kvm_vcpu *vcpu) ++{ ++ unsigned long field; ++ gva_t gva; ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); ++ u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); ++ ++ /* The value to write might be 32 or 64 bits, depending on L1's long ++ * mode, and eventually we need to write that into a field of several ++ * possible lengths. The code below first zero-extends the value to 64 ++ * bit (field_value), and then copies only the appropriate number of ++ * bits into the vmcs12 field. ++ */ ++ u64 field_value = 0; ++ struct x86_exception e; ++ struct vmcs12 *vmcs12; ++ ++ if (!nested_vmx_check_permission(vcpu)) ++ return 1; ++ ++ if (!nested_vmx_check_vmcs12(vcpu)) ++ return kvm_skip_emulated_instruction(vcpu); ++ ++ if (vmx_instruction_info & (1u << 10)) ++ field_value = kvm_register_readl(vcpu, ++ (((vmx_instruction_info) >> 3) & 0xf)); ++ else { ++ if (get_vmx_mem_address(vcpu, exit_qualification, ++ vmx_instruction_info, false, &gva)) ++ return 1; ++ if (kvm_read_guest_virt(vcpu, gva, &field_value, ++ (is_64_bit_mode(vcpu) ? 8 : 4), &e)) { ++ kvm_inject_page_fault(vcpu, &e); ++ return 1; ++ } ++ } ++ ++ ++ field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf)); ++ /* ++ * If the vCPU supports "VMWRITE to any supported field in the ++ * VMCS," then the "read-only" fields are actually read/write. ++ */ ++ if (vmcs_field_readonly(field) && ++ !nested_cpu_has_vmwrite_any_field(vcpu)) { ++ nested_vmx_failValid(vcpu, ++ VMXERR_VMWRITE_READ_ONLY_VMCS_COMPONENT); ++ return kvm_skip_emulated_instruction(vcpu); ++ } ++ ++ if (!is_guest_mode(vcpu)) ++ vmcs12 = get_vmcs12(vcpu); ++ else { ++ /* ++ * When vmcs->vmcs_link_pointer is -1ull, any VMWRITE ++ * to shadowed-field sets the ALU flags for VMfailInvalid. ++ */ ++ if (get_vmcs12(vcpu)->vmcs_link_pointer == -1ull) { ++ nested_vmx_failInvalid(vcpu); ++ return kvm_skip_emulated_instruction(vcpu); ++ } ++ vmcs12 = get_shadow_vmcs12(vcpu); ++ ++ } ++ ++ if (vmcs12_write_any(vmcs12, field, field_value) < 0) { ++ nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); ++ return kvm_skip_emulated_instruction(vcpu); ++ } ++ ++ /* ++ * Do not track vmcs12 dirty-state if in guest-mode ++ * as we actually dirty shadow vmcs12 instead of vmcs12. ++ */ ++ if (!is_guest_mode(vcpu)) { ++ switch (field) { ++#define SHADOW_FIELD_RW(x) case x: ++#include "vmx_shadow_fields.h" ++ /* ++ * The fields that can be updated by L1 without a vmexit are ++ * always updated in the vmcs02, the others go down the slow ++ * path of prepare_vmcs02. ++ */ ++ break; ++ default: ++ vmx->nested.dirty_vmcs12 = true; ++ break; ++ } ++ } ++ ++ nested_vmx_succeed(vcpu); ++ return kvm_skip_emulated_instruction(vcpu); ++} ++ ++static void set_current_vmptr(struct vcpu_vmx *vmx, gpa_t vmptr) ++{ ++ vmx->nested.current_vmptr = vmptr; ++ if (enable_shadow_vmcs) { ++ vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL, ++ SECONDARY_EXEC_SHADOW_VMCS); ++ vmcs_write64(VMCS_LINK_POINTER, ++ __pa(vmx->vmcs01.shadow_vmcs)); ++ vmx->nested.sync_shadow_vmcs = true; ++ } ++ vmx->nested.dirty_vmcs12 = true; ++} ++ ++/* Emulate the VMPTRLD instruction */ ++static int handle_vmptrld(struct kvm_vcpu *vcpu) ++{ ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ gpa_t vmptr; ++ ++ if (!nested_vmx_check_permission(vcpu)) ++ return 1; ++ ++ if (nested_vmx_get_vmptr(vcpu, &vmptr)) ++ return 1; ++ ++ if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu))) { ++ nested_vmx_failValid(vcpu, VMXERR_VMPTRLD_INVALID_ADDRESS); ++ return kvm_skip_emulated_instruction(vcpu); ++ } ++ ++ if (vmptr == vmx->nested.vmxon_ptr) { ++ nested_vmx_failValid(vcpu, VMXERR_VMPTRLD_VMXON_POINTER); ++ return kvm_skip_emulated_instruction(vcpu); ++ } ++ ++ if (vmx->nested.current_vmptr != vmptr) { ++ struct vmcs12 *new_vmcs12; ++ struct page *page; ++ page = kvm_vcpu_gpa_to_page(vcpu, vmptr); ++ if (is_error_page(page)) { ++ nested_vmx_failInvalid(vcpu); ++ return kvm_skip_emulated_instruction(vcpu); ++ } ++ new_vmcs12 = kmap(page); ++ if (new_vmcs12->hdr.revision_id != VMCS12_REVISION || ++ (new_vmcs12->hdr.shadow_vmcs && ++ !nested_cpu_has_vmx_shadow_vmcs(vcpu))) { ++ kunmap(page); ++ kvm_release_page_clean(page); ++ nested_vmx_failValid(vcpu, ++ VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID); ++ return kvm_skip_emulated_instruction(vcpu); ++ } ++ ++ nested_release_vmcs12(vmx); ++ /* ++ * Load VMCS12 from guest memory since it is not already ++ * cached. ++ */ ++ memcpy(vmx->nested.cached_vmcs12, new_vmcs12, VMCS12_SIZE); ++ kunmap(page); ++ kvm_release_page_clean(page); ++ ++ set_current_vmptr(vmx, vmptr); ++ } ++ ++ nested_vmx_succeed(vcpu); ++ return kvm_skip_emulated_instruction(vcpu); ++} ++ ++/* Emulate the VMPTRST instruction */ ++static int handle_vmptrst(struct kvm_vcpu *vcpu) ++{ ++ unsigned long exit_qual = vmcs_readl(EXIT_QUALIFICATION); ++ u32 instr_info = vmcs_read32(VMX_INSTRUCTION_INFO); ++ gpa_t current_vmptr = to_vmx(vcpu)->nested.current_vmptr; ++ struct x86_exception e; ++ gva_t gva; ++ ++ if (!nested_vmx_check_permission(vcpu)) ++ return 1; ++ ++ if (get_vmx_mem_address(vcpu, exit_qual, instr_info, true, &gva)) ++ return 1; ++ /* *_system ok, nested_vmx_check_permission has verified cpl=0 */ ++ if (kvm_write_guest_virt_system(vcpu, gva, (void *)¤t_vmptr, ++ sizeof(gpa_t), &e)) { ++ kvm_inject_page_fault(vcpu, &e); ++ return 1; ++ } ++ nested_vmx_succeed(vcpu); ++ return kvm_skip_emulated_instruction(vcpu); ++} ++ ++/* Emulate the INVEPT instruction */ ++static int handle_invept(struct kvm_vcpu *vcpu) ++{ ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ u32 vmx_instruction_info, types; ++ unsigned long type; ++ gva_t gva; ++ struct x86_exception e; ++ struct { ++ u64 eptp, gpa; ++ } operand; ++ ++ if (!(vmx->nested.msrs.secondary_ctls_high & ++ SECONDARY_EXEC_ENABLE_EPT) || ++ !(vmx->nested.msrs.ept_caps & VMX_EPT_INVEPT_BIT)) { ++ kvm_queue_exception(vcpu, UD_VECTOR); ++ return 1; ++ } ++ ++ if (!nested_vmx_check_permission(vcpu)) ++ return 1; ++ ++ vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); ++ type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf); ++ ++ types = (vmx->nested.msrs.ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6; ++ ++ if (type >= 32 || !(types & (1 << type))) { ++ nested_vmx_failValid(vcpu, ++ VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); ++ return kvm_skip_emulated_instruction(vcpu); ++ } ++ ++ /* According to the Intel VMX instruction reference, the memory ++ * operand is read even if it isn't needed (e.g., for type==global) ++ */ ++ if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION), ++ vmx_instruction_info, false, &gva)) ++ return 1; ++ if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) { ++ kvm_inject_page_fault(vcpu, &e); ++ return 1; ++ } ++ ++ switch (type) { ++ case VMX_EPT_EXTENT_GLOBAL: ++ /* ++ * TODO: track mappings and invalidate ++ * single context requests appropriately ++ */ ++ case VMX_EPT_EXTENT_CONTEXT: ++ kvm_mmu_sync_roots(vcpu); ++ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); ++ nested_vmx_succeed(vcpu); ++ break; ++ default: ++ BUG_ON(1); ++ break; ++ } ++ ++ return kvm_skip_emulated_instruction(vcpu); ++} ++ ++static int handle_invvpid(struct kvm_vcpu *vcpu) ++{ ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ u32 vmx_instruction_info; ++ unsigned long type, types; ++ gva_t gva; ++ struct x86_exception e; ++ struct { ++ u64 vpid; ++ u64 gla; ++ } operand; ++ ++ if (!(vmx->nested.msrs.secondary_ctls_high & ++ SECONDARY_EXEC_ENABLE_VPID) || ++ !(vmx->nested.msrs.vpid_caps & VMX_VPID_INVVPID_BIT)) { ++ kvm_queue_exception(vcpu, UD_VECTOR); ++ return 1; ++ } ++ ++ if (!nested_vmx_check_permission(vcpu)) ++ return 1; ++ ++ vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); ++ type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf); ++ ++ types = (vmx->nested.msrs.vpid_caps & ++ VMX_VPID_EXTENT_SUPPORTED_MASK) >> 8; ++ ++ if (type >= 32 || !(types & (1 << type))) { ++ nested_vmx_failValid(vcpu, ++ VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); ++ return kvm_skip_emulated_instruction(vcpu); ++ } ++ ++ /* according to the intel vmx instruction reference, the memory ++ * operand is read even if it isn't needed (e.g., for type==global) ++ */ ++ if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION), ++ vmx_instruction_info, false, &gva)) ++ return 1; ++ if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) { ++ kvm_inject_page_fault(vcpu, &e); ++ return 1; ++ } ++ if (operand.vpid >> 16) { ++ nested_vmx_failValid(vcpu, ++ VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); ++ return kvm_skip_emulated_instruction(vcpu); ++ } ++ ++ switch (type) { ++ case VMX_VPID_EXTENT_INDIVIDUAL_ADDR: ++ if (!operand.vpid || ++ is_noncanonical_address(operand.gla, vcpu)) { ++ nested_vmx_failValid(vcpu, ++ VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); ++ return kvm_skip_emulated_instruction(vcpu); ++ } ++ if (cpu_has_vmx_invvpid_individual_addr() && ++ vmx->nested.vpid02) { ++ __invvpid(VMX_VPID_EXTENT_INDIVIDUAL_ADDR, ++ vmx->nested.vpid02, operand.gla); ++ } else ++ __vmx_flush_tlb(vcpu, vmx->nested.vpid02, true); ++ break; ++ case VMX_VPID_EXTENT_SINGLE_CONTEXT: ++ case VMX_VPID_EXTENT_SINGLE_NON_GLOBAL: ++ if (!operand.vpid) { ++ nested_vmx_failValid(vcpu, ++ VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); ++ return kvm_skip_emulated_instruction(vcpu); ++ } ++ __vmx_flush_tlb(vcpu, vmx->nested.vpid02, true); ++ break; ++ case VMX_VPID_EXTENT_ALL_CONTEXT: ++ __vmx_flush_tlb(vcpu, vmx->nested.vpid02, true); ++ break; ++ default: ++ WARN_ON_ONCE(1); ++ return kvm_skip_emulated_instruction(vcpu); ++ } ++ ++ nested_vmx_succeed(vcpu); ++ ++ return kvm_skip_emulated_instruction(vcpu); ++} ++ ++static int handle_invpcid(struct kvm_vcpu *vcpu) ++{ ++ u32 vmx_instruction_info; ++ unsigned long type; ++ bool pcid_enabled; ++ gva_t gva; ++ struct x86_exception e; ++ unsigned i; ++ unsigned long roots_to_free = 0; ++ struct { ++ u64 pcid; ++ u64 gla; ++ } operand; ++ ++ if (!guest_cpuid_has(vcpu, X86_FEATURE_INVPCID)) { ++ kvm_queue_exception(vcpu, UD_VECTOR); ++ return 1; ++ } ++ ++ vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); ++ type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf); ++ ++ if (type > 3) { ++ kvm_inject_gp(vcpu, 0); ++ return 1; ++ } ++ ++ /* According to the Intel instruction reference, the memory operand ++ * is read even if it isn't needed (e.g., for type==all) ++ */ ++ if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION), ++ vmx_instruction_info, false, &gva)) ++ return 1; ++ ++ if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) { ++ kvm_inject_page_fault(vcpu, &e); ++ return 1; ++ } ++ ++ if (operand.pcid >> 12 != 0) { ++ kvm_inject_gp(vcpu, 0); ++ return 1; ++ } ++ ++ pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE); ++ ++ switch (type) { ++ case INVPCID_TYPE_INDIV_ADDR: ++ if ((!pcid_enabled && (operand.pcid != 0)) || ++ is_noncanonical_address(operand.gla, vcpu)) { ++ kvm_inject_gp(vcpu, 0); ++ return 1; ++ } ++ kvm_mmu_invpcid_gva(vcpu, operand.gla, operand.pcid); ++ return kvm_skip_emulated_instruction(vcpu); ++ ++ case INVPCID_TYPE_SINGLE_CTXT: ++ if (!pcid_enabled && (operand.pcid != 0)) { ++ kvm_inject_gp(vcpu, 0); ++ return 1; ++ } ++ ++ if (kvm_get_active_pcid(vcpu) == operand.pcid) { ++ kvm_mmu_sync_roots(vcpu); ++ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); ++ } ++ ++ for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) ++ if (kvm_get_pcid(vcpu, vcpu->arch.mmu.prev_roots[i].cr3) ++ == operand.pcid) ++ roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i); ++ ++ kvm_mmu_free_roots(vcpu, roots_to_free); ++ /* ++ * If neither the current cr3 nor any of the prev_roots use the ++ * given PCID, then nothing needs to be done here because a ++ * resync will happen anyway before switching to any other CR3. ++ */ ++ ++ return kvm_skip_emulated_instruction(vcpu); ++ ++ case INVPCID_TYPE_ALL_NON_GLOBAL: ++ /* ++ * Currently, KVM doesn't mark global entries in the shadow ++ * page tables, so a non-global flush just degenerates to a ++ * global flush. If needed, we could optimize this later by ++ * keeping track of global entries in shadow page tables. ++ */ ++ ++ /* fall-through */ ++ case INVPCID_TYPE_ALL_INCL_GLOBAL: ++ kvm_mmu_unload(vcpu); ++ return kvm_skip_emulated_instruction(vcpu); ++ ++ default: ++ BUG(); /* We have already checked above that type <= 3 */ ++ } ++} ++ ++static int handle_pml_full(struct kvm_vcpu *vcpu) ++{ ++ unsigned long exit_qualification; ++ ++ trace_kvm_pml_full(vcpu->vcpu_id); ++ ++ exit_qualification = vmcs_readl(EXIT_QUALIFICATION); ++ ++ /* ++ * PML buffer FULL happened while executing iret from NMI, ++ * "blocked by NMI" bit has to be set before next VM entry. ++ */ ++ if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) && ++ enable_vnmi && ++ (exit_qualification & INTR_INFO_UNBLOCK_NMI)) ++ vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, ++ GUEST_INTR_STATE_NMI); ++ ++ /* ++ * PML buffer already flushed at beginning of VMEXIT. Nothing to do ++ * here.., and there's no userspace involvement needed for PML. ++ */ ++ return 1; ++} ++ ++static int handle_preemption_timer(struct kvm_vcpu *vcpu) ++{ ++ if (!to_vmx(vcpu)->req_immediate_exit) ++ kvm_lapic_expired_hv_timer(vcpu); ++ return 1; ++} ++ ++static bool valid_ept_address(struct kvm_vcpu *vcpu, u64 address) ++{ ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ int maxphyaddr = cpuid_maxphyaddr(vcpu); ++ ++ /* Check for memory type validity */ ++ switch (address & VMX_EPTP_MT_MASK) { ++ case VMX_EPTP_MT_UC: ++ if (!(vmx->nested.msrs.ept_caps & VMX_EPTP_UC_BIT)) ++ return false; ++ break; ++ case VMX_EPTP_MT_WB: ++ if (!(vmx->nested.msrs.ept_caps & VMX_EPTP_WB_BIT)) ++ return false; ++ break; ++ default: ++ return false; ++ } ++ ++ /* only 4 levels page-walk length are valid */ ++ if ((address & VMX_EPTP_PWL_MASK) != VMX_EPTP_PWL_4) ++ return false; ++ ++ /* Reserved bits should not be set */ ++ if (address >> maxphyaddr || ((address >> 7) & 0x1f)) ++ return false; ++ ++ /* AD, if set, should be supported */ ++ if (address & VMX_EPTP_AD_ENABLE_BIT) { ++ if (!(vmx->nested.msrs.ept_caps & VMX_EPT_AD_BIT)) ++ return false; ++ } ++ ++ return true; ++} ++ ++static int nested_vmx_eptp_switching(struct kvm_vcpu *vcpu, ++ struct vmcs12 *vmcs12) ++{ ++ u32 index = vcpu->arch.regs[VCPU_REGS_RCX]; ++ u64 address; ++ bool accessed_dirty; ++ struct kvm_mmu *mmu = vcpu->arch.walk_mmu; ++ ++ if (!nested_cpu_has_eptp_switching(vmcs12) || ++ !nested_cpu_has_ept(vmcs12)) ++ return 1; ++ ++ if (index >= VMFUNC_EPTP_ENTRIES) ++ return 1; ++ ++ ++ if (kvm_vcpu_read_guest_page(vcpu, vmcs12->eptp_list_address >> PAGE_SHIFT, ++ &address, index * 8, 8)) ++ return 1; ++ ++ accessed_dirty = !!(address & VMX_EPTP_AD_ENABLE_BIT); ++ ++ /* ++ * If the (L2) guest does a vmfunc to the currently ++ * active ept pointer, we don't have to do anything else ++ */ ++ if (vmcs12->ept_pointer != address) { ++ if (!valid_ept_address(vcpu, address)) ++ return 1; ++ ++ kvm_mmu_unload(vcpu); ++ mmu->ept_ad = accessed_dirty; ++ mmu->base_role.ad_disabled = !accessed_dirty; ++ vmcs12->ept_pointer = address; ++ /* ++ * TODO: Check what's the correct approach in case ++ * mmu reload fails. Currently, we just let the next ++ * reload potentially fail ++ */ ++ kvm_mmu_reload(vcpu); ++ } ++ ++ return 0; ++} ++ ++static int handle_vmfunc(struct kvm_vcpu *vcpu) ++{ ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ struct vmcs12 *vmcs12; ++ u32 function = vcpu->arch.regs[VCPU_REGS_RAX]; ++ ++ /* ++ * VMFUNC is only supported for nested guests, but we always enable the ++ * secondary control for simplicity; for non-nested mode, fake that we ++ * didn't by injecting #UD. ++ */ ++ if (!is_guest_mode(vcpu)) { ++ kvm_queue_exception(vcpu, UD_VECTOR); ++ return 1; ++ } ++ ++ vmcs12 = get_vmcs12(vcpu); ++ if ((vmcs12->vm_function_control & (1 << function)) == 0) ++ goto fail; ++ ++ switch (function) { ++ case 0: ++ if (nested_vmx_eptp_switching(vcpu, vmcs12)) ++ goto fail; ++ break; ++ default: ++ goto fail; ++ } ++ return kvm_skip_emulated_instruction(vcpu); ++ ++fail: ++ nested_vmx_vmexit(vcpu, vmx->exit_reason, ++ vmcs_read32(VM_EXIT_INTR_INFO), ++ vmcs_readl(EXIT_QUALIFICATION)); ++ return 1; ++} ++ ++static int handle_encls(struct kvm_vcpu *vcpu) ++{ ++ /* ++ * SGX virtualization is not yet supported. There is no software ++ * enable bit for SGX, so we have to trap ENCLS and inject a #UD ++ * to prevent the guest from executing ENCLS. ++ */ ++ kvm_queue_exception(vcpu, UD_VECTOR); ++ return 1; ++} ++ ++/* ++ * The exit handlers return 1 if the exit was handled fully and guest execution ++ * may resume. Otherwise they set the kvm_run parameter to indicate what needs ++ * to be done to userspace and return 0. ++ */ ++static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { ++ [EXIT_REASON_EXCEPTION_NMI] = handle_exception, ++ [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt, ++ [EXIT_REASON_TRIPLE_FAULT] = handle_triple_fault, ++ [EXIT_REASON_NMI_WINDOW] = handle_nmi_window, ++ [EXIT_REASON_IO_INSTRUCTION] = handle_io, ++ [EXIT_REASON_CR_ACCESS] = handle_cr, ++ [EXIT_REASON_DR_ACCESS] = handle_dr, ++ [EXIT_REASON_CPUID] = handle_cpuid, ++ [EXIT_REASON_MSR_READ] = handle_rdmsr, ++ [EXIT_REASON_MSR_WRITE] = handle_wrmsr, ++ [EXIT_REASON_PENDING_INTERRUPT] = handle_interrupt_window, ++ [EXIT_REASON_HLT] = handle_halt, ++ [EXIT_REASON_INVD] = handle_invd, ++ [EXIT_REASON_INVLPG] = handle_invlpg, ++ [EXIT_REASON_RDPMC] = handle_rdpmc, ++ [EXIT_REASON_VMCALL] = handle_vmcall, ++ [EXIT_REASON_VMCLEAR] = handle_vmclear, ++ [EXIT_REASON_VMLAUNCH] = handle_vmlaunch, ++ [EXIT_REASON_VMPTRLD] = handle_vmptrld, ++ [EXIT_REASON_VMPTRST] = handle_vmptrst, ++ [EXIT_REASON_VMREAD] = handle_vmread, ++ [EXIT_REASON_VMRESUME] = handle_vmresume, ++ [EXIT_REASON_VMWRITE] = handle_vmwrite, ++ [EXIT_REASON_VMOFF] = handle_vmoff, ++ [EXIT_REASON_VMON] = handle_vmon, ++ [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold, ++ [EXIT_REASON_APIC_ACCESS] = handle_apic_access, ++ [EXIT_REASON_APIC_WRITE] = handle_apic_write, ++ [EXIT_REASON_EOI_INDUCED] = handle_apic_eoi_induced, ++ [EXIT_REASON_WBINVD] = handle_wbinvd, ++ [EXIT_REASON_XSETBV] = handle_xsetbv, ++ [EXIT_REASON_TASK_SWITCH] = handle_task_switch, ++ [EXIT_REASON_MCE_DURING_VMENTRY] = handle_machine_check, ++ [EXIT_REASON_GDTR_IDTR] = handle_desc, ++ [EXIT_REASON_LDTR_TR] = handle_desc, ++ [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation, ++ [EXIT_REASON_EPT_MISCONFIG] = handle_ept_misconfig, ++ [EXIT_REASON_PAUSE_INSTRUCTION] = handle_pause, ++ [EXIT_REASON_MWAIT_INSTRUCTION] = handle_mwait, ++ [EXIT_REASON_MONITOR_TRAP_FLAG] = handle_monitor_trap, ++ [EXIT_REASON_MONITOR_INSTRUCTION] = handle_monitor, ++ [EXIT_REASON_INVEPT] = handle_invept, ++ [EXIT_REASON_INVVPID] = handle_invvpid, ++ [EXIT_REASON_RDRAND] = handle_invalid_op, ++ [EXIT_REASON_RDSEED] = handle_invalid_op, ++ [EXIT_REASON_XSAVES] = handle_xsaves, ++ [EXIT_REASON_XRSTORS] = handle_xrstors, ++ [EXIT_REASON_PML_FULL] = handle_pml_full, ++ [EXIT_REASON_INVPCID] = handle_invpcid, ++ [EXIT_REASON_VMFUNC] = handle_vmfunc, ++ [EXIT_REASON_PREEMPTION_TIMER] = handle_preemption_timer, ++ [EXIT_REASON_ENCLS] = handle_encls, ++}; ++ ++static const int kvm_vmx_max_exit_handlers = ++ ARRAY_SIZE(kvm_vmx_exit_handlers); ++ ++/* ++ * Return true if an IO instruction with the specified port and size should cause ++ * a VM-exit into L1. ++ */ ++bool nested_vmx_check_io_bitmaps(struct kvm_vcpu *vcpu, unsigned int port, ++ int size) ++{ ++ struct vmcs12 *vmcs12 = get_vmcs12(vcpu); ++ gpa_t bitmap, last_bitmap; ++ u8 b; ++ ++ last_bitmap = (gpa_t)-1; ++ b = -1; ++ ++ while (size > 0) { ++ if (port < 0x8000) ++ bitmap = vmcs12->io_bitmap_a; ++ else if (port < 0x10000) ++ bitmap = vmcs12->io_bitmap_b; ++ else ++ return true; ++ bitmap += (port & 0x7fff) / 8; ++ ++ if (last_bitmap != bitmap) ++ if (kvm_vcpu_read_guest(vcpu, bitmap, &b, 1)) ++ return true; ++ if (b & (1 << (port & 7))) ++ return true; ++ ++ port++; ++ size--; ++ last_bitmap = bitmap; ++ } ++ ++ return false; ++} ++ ++/* ++ * Return 1 if we should exit from L2 to L1 to handle an MSR access access, ++ * rather than handle it ourselves in L0. I.e., check whether L1 expressed ++ * disinterest in the current event (read or write a specific MSR) by using an ++ * MSR bitmap. This may be the case even when L0 doesn't use MSR bitmaps. ++ */ ++static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu, ++ struct vmcs12 *vmcs12, u32 exit_reason) ++{ ++ u32 msr_index = vcpu->arch.regs[VCPU_REGS_RCX]; ++ gpa_t bitmap; ++ ++ if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS)) ++ return true; ++ ++ /* ++ * The MSR_BITMAP page is divided into four 1024-byte bitmaps, ++ * for the four combinations of read/write and low/high MSR numbers. ++ * First we need to figure out which of the four to use: ++ */ ++ bitmap = vmcs12->msr_bitmap; ++ if (exit_reason == EXIT_REASON_MSR_WRITE) ++ bitmap += 2048; ++ if (msr_index >= 0xc0000000) { ++ msr_index -= 0xc0000000; ++ bitmap += 1024; ++ } ++ ++ /* Then read the msr_index'th bit from this bitmap: */ ++ if (msr_index < 1024*8) { ++ unsigned char b; ++ if (kvm_vcpu_read_guest(vcpu, bitmap + msr_index/8, &b, 1)) ++ return true; ++ return 1 & (b >> (msr_index & 7)); ++ } else ++ return true; /* let L1 handle the wrong parameter */ ++} ++ ++/* ++ * Return 1 if we should exit from L2 to L1 to handle a CR access exit, ++ * rather than handle it ourselves in L0. I.e., check if L1 wanted to ++ * intercept (via guest_host_mask etc.) the current event. ++ */ ++static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu, ++ struct vmcs12 *vmcs12) ++{ ++ unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); ++ int cr = exit_qualification & 15; ++ int reg; ++ unsigned long val; ++ ++ switch ((exit_qualification >> 4) & 3) { ++ case 0: /* mov to cr */ ++ reg = (exit_qualification >> 8) & 15; ++ val = kvm_register_readl(vcpu, reg); ++ switch (cr) { ++ case 0: ++ if (vmcs12->cr0_guest_host_mask & ++ (val ^ vmcs12->cr0_read_shadow)) ++ return true; ++ break; ++ case 3: ++ if ((vmcs12->cr3_target_count >= 1 && ++ vmcs12->cr3_target_value0 == val) || ++ (vmcs12->cr3_target_count >= 2 && ++ vmcs12->cr3_target_value1 == val) || ++ (vmcs12->cr3_target_count >= 3 && ++ vmcs12->cr3_target_value2 == val) || ++ (vmcs12->cr3_target_count >= 4 && ++ vmcs12->cr3_target_value3 == val)) ++ return false; ++ if (nested_cpu_has(vmcs12, CPU_BASED_CR3_LOAD_EXITING)) ++ return true; ++ break; ++ case 4: ++ if (vmcs12->cr4_guest_host_mask & ++ (vmcs12->cr4_read_shadow ^ val)) ++ return true; ++ break; ++ case 8: ++ if (nested_cpu_has(vmcs12, CPU_BASED_CR8_LOAD_EXITING)) ++ return true; ++ break; ++ } ++ break; ++ case 2: /* clts */ ++ if ((vmcs12->cr0_guest_host_mask & X86_CR0_TS) && ++ (vmcs12->cr0_read_shadow & X86_CR0_TS)) ++ return true; ++ break; ++ case 1: /* mov from cr */ ++ switch (cr) { ++ case 3: ++ if (vmcs12->cpu_based_vm_exec_control & ++ CPU_BASED_CR3_STORE_EXITING) ++ return true; ++ break; ++ case 8: ++ if (vmcs12->cpu_based_vm_exec_control & ++ CPU_BASED_CR8_STORE_EXITING) ++ return true; ++ break; ++ } ++ break; ++ case 3: /* lmsw */ ++ /* ++ * lmsw can change bits 1..3 of cr0, and only set bit 0 of ++ * cr0. Other attempted changes are ignored, with no exit. ++ */ ++ val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f; ++ if (vmcs12->cr0_guest_host_mask & 0xe & ++ (val ^ vmcs12->cr0_read_shadow)) ++ return true; ++ if ((vmcs12->cr0_guest_host_mask & 0x1) && ++ !(vmcs12->cr0_read_shadow & 0x1) && ++ (val & 0x1)) ++ return true; ++ break; ++ } ++ return false; ++} ++ ++static bool nested_vmx_exit_handled_vmcs_access(struct kvm_vcpu *vcpu, ++ struct vmcs12 *vmcs12, gpa_t bitmap) ++{ ++ u32 vmx_instruction_info; ++ unsigned long field; ++ u8 b; ++ ++ if (!nested_cpu_has_shadow_vmcs(vmcs12)) ++ return true; ++ ++ /* Decode instruction info and find the field to access */ ++ vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); ++ field = kvm_register_read(vcpu, (((vmx_instruction_info) >> 28) & 0xf)); ++ ++ /* Out-of-range fields always cause a VM exit from L2 to L1 */ ++ if (field >> 15) ++ return true; ++ ++ if (kvm_vcpu_read_guest(vcpu, bitmap + field/8, &b, 1)) ++ return true; ++ ++ return 1 & (b >> (field & 7)); ++} ++ ++/* ++ * Return 1 if we should exit from L2 to L1 to handle an exit, or 0 if we ++ * should handle it ourselves in L0 (and then continue L2). Only call this ++ * when in is_guest_mode (L2). ++ */ ++static bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason) ++{ ++ u32 intr_info = vmcs_read32(VM_EXIT_INTR_INFO); ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ struct vmcs12 *vmcs12 = get_vmcs12(vcpu); ++ ++ if (vmx->nested.nested_run_pending) ++ return false; ++ ++ if (unlikely(vmx->fail)) { ++ pr_info_ratelimited("%s failed vm entry %x\n", __func__, ++ vmcs_read32(VM_INSTRUCTION_ERROR)); ++ return true; ++ } ++ ++ /* ++ * The host physical addresses of some pages of guest memory ++ * are loaded into the vmcs02 (e.g. vmcs12's Virtual APIC ++ * Page). The CPU may write to these pages via their host ++ * physical address while L2 is running, bypassing any ++ * address-translation-based dirty tracking (e.g. EPT write ++ * protection). ++ * ++ * Mark them dirty on every exit from L2 to prevent them from ++ * getting out of sync with dirty tracking. ++ */ ++ nested_mark_vmcs12_pages_dirty(vcpu); ++ ++ trace_kvm_nested_vmexit(kvm_rip_read(vcpu), exit_reason, ++ vmcs_readl(EXIT_QUALIFICATION), ++ vmx->idt_vectoring_info, ++ intr_info, ++ vmcs_read32(VM_EXIT_INTR_ERROR_CODE), ++ KVM_ISA_VMX); ++ ++ switch (exit_reason) { ++ case EXIT_REASON_EXCEPTION_NMI: ++ if (is_nmi(intr_info)) ++ return false; ++ else if (is_page_fault(intr_info)) ++ return !vmx->vcpu.arch.apf.host_apf_reason && enable_ept; ++ else if (is_no_device(intr_info) && ++ !(vmcs12->guest_cr0 & X86_CR0_TS)) ++ return false; ++ else if (is_debug(intr_info) && ++ vcpu->guest_debug & ++ (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) ++ return false; ++ else if (is_breakpoint(intr_info) && ++ vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) ++ return false; ++ return vmcs12->exception_bitmap & ++ (1u << (intr_info & INTR_INFO_VECTOR_MASK)); ++ case EXIT_REASON_EXTERNAL_INTERRUPT: ++ return false; ++ case EXIT_REASON_TRIPLE_FAULT: ++ return true; ++ case EXIT_REASON_PENDING_INTERRUPT: ++ return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_INTR_PENDING); ++ case EXIT_REASON_NMI_WINDOW: ++ return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_NMI_PENDING); ++ case EXIT_REASON_TASK_SWITCH: ++ return true; ++ case EXIT_REASON_CPUID: ++ return true; ++ case EXIT_REASON_HLT: ++ return nested_cpu_has(vmcs12, CPU_BASED_HLT_EXITING); ++ case EXIT_REASON_INVD: ++ return true; ++ case EXIT_REASON_INVLPG: ++ return nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING); ++ case EXIT_REASON_RDPMC: ++ return nested_cpu_has(vmcs12, CPU_BASED_RDPMC_EXITING); ++ case EXIT_REASON_RDRAND: ++ return nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDRAND_EXITING); ++ case EXIT_REASON_RDSEED: ++ return nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDSEED_EXITING); ++ case EXIT_REASON_RDTSC: case EXIT_REASON_RDTSCP: ++ return nested_cpu_has(vmcs12, CPU_BASED_RDTSC_EXITING); ++ case EXIT_REASON_VMREAD: ++ return nested_vmx_exit_handled_vmcs_access(vcpu, vmcs12, ++ vmcs12->vmread_bitmap); ++ case EXIT_REASON_VMWRITE: ++ return nested_vmx_exit_handled_vmcs_access(vcpu, vmcs12, ++ vmcs12->vmwrite_bitmap); ++ case EXIT_REASON_VMCALL: case EXIT_REASON_VMCLEAR: ++ case EXIT_REASON_VMLAUNCH: case EXIT_REASON_VMPTRLD: ++ case EXIT_REASON_VMPTRST: case EXIT_REASON_VMRESUME: ++ case EXIT_REASON_VMOFF: case EXIT_REASON_VMON: ++ case EXIT_REASON_INVEPT: case EXIT_REASON_INVVPID: ++ /* ++ * VMX instructions trap unconditionally. This allows L1 to ++ * emulate them for its L2 guest, i.e., allows 3-level nesting! ++ */ ++ return true; ++ case EXIT_REASON_CR_ACCESS: ++ return nested_vmx_exit_handled_cr(vcpu, vmcs12); ++ case EXIT_REASON_DR_ACCESS: ++ return nested_cpu_has(vmcs12, CPU_BASED_MOV_DR_EXITING); ++ case EXIT_REASON_IO_INSTRUCTION: ++ return nested_vmx_exit_handled_io(vcpu, vmcs12); ++ case EXIT_REASON_GDTR_IDTR: case EXIT_REASON_LDTR_TR: ++ return nested_cpu_has2(vmcs12, SECONDARY_EXEC_DESC); ++ case EXIT_REASON_MSR_READ: ++ case EXIT_REASON_MSR_WRITE: ++ return nested_vmx_exit_handled_msr(vcpu, vmcs12, exit_reason); ++ case EXIT_REASON_INVALID_STATE: ++ return true; ++ case EXIT_REASON_MWAIT_INSTRUCTION: ++ return nested_cpu_has(vmcs12, CPU_BASED_MWAIT_EXITING); ++ case EXIT_REASON_MONITOR_TRAP_FLAG: ++ return nested_cpu_has(vmcs12, CPU_BASED_MONITOR_TRAP_FLAG); ++ case EXIT_REASON_MONITOR_INSTRUCTION: ++ return nested_cpu_has(vmcs12, CPU_BASED_MONITOR_EXITING); ++ case EXIT_REASON_PAUSE_INSTRUCTION: ++ return nested_cpu_has(vmcs12, CPU_BASED_PAUSE_EXITING) || ++ nested_cpu_has2(vmcs12, ++ SECONDARY_EXEC_PAUSE_LOOP_EXITING); ++ case EXIT_REASON_MCE_DURING_VMENTRY: ++ return false; ++ case EXIT_REASON_TPR_BELOW_THRESHOLD: ++ return nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW); ++ case EXIT_REASON_APIC_ACCESS: ++ case EXIT_REASON_APIC_WRITE: ++ case EXIT_REASON_EOI_INDUCED: ++ /* ++ * The controls for "virtualize APIC accesses," "APIC- ++ * register virtualization," and "virtual-interrupt ++ * delivery" only come from vmcs12. ++ */ ++ return true; ++ case EXIT_REASON_EPT_VIOLATION: ++ /* ++ * L0 always deals with the EPT violation. If nested EPT is ++ * used, and the nested mmu code discovers that the address is ++ * missing in the guest EPT table (EPT12), the EPT violation ++ * will be injected with nested_ept_inject_page_fault() ++ */ ++ return false; ++ case EXIT_REASON_EPT_MISCONFIG: ++ /* ++ * L2 never uses directly L1's EPT, but rather L0's own EPT ++ * table (shadow on EPT) or a merged EPT table that L0 built ++ * (EPT on EPT). So any problems with the structure of the ++ * table is L0's fault. ++ */ ++ return false; ++ case EXIT_REASON_INVPCID: ++ return ++ nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_INVPCID) && ++ nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING); ++ case EXIT_REASON_WBINVD: ++ return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING); ++ case EXIT_REASON_XSETBV: ++ return true; ++ case EXIT_REASON_XSAVES: case EXIT_REASON_XRSTORS: ++ /* ++ * This should never happen, since it is not possible to ++ * set XSS to a non-zero value---neither in L1 nor in L2. ++ * If if it were, XSS would have to be checked against ++ * the XSS exit bitmap in vmcs12. ++ */ ++ return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES); ++ case EXIT_REASON_PREEMPTION_TIMER: ++ return false; ++ case EXIT_REASON_PML_FULL: ++ /* We emulate PML support to L1. */ ++ return false; ++ case EXIT_REASON_VMFUNC: ++ /* VM functions are emulated through L2->L0 vmexits. */ ++ return false; ++ case EXIT_REASON_ENCLS: ++ /* SGX is never exposed to L1 */ ++ return false; ++ default: ++ return true; ++ } ++} ++ ++static int nested_vmx_reflect_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason) ++{ ++ u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); ++ ++ /* ++ * At this point, the exit interruption info in exit_intr_info ++ * is only valid for EXCEPTION_NMI exits. For EXTERNAL_INTERRUPT ++ * we need to query the in-kernel LAPIC. ++ */ ++ WARN_ON(exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT); ++ if ((exit_intr_info & ++ (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) == ++ (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) { ++ struct vmcs12 *vmcs12 = get_vmcs12(vcpu); ++ vmcs12->vm_exit_intr_error_code = ++ vmcs_read32(VM_EXIT_INTR_ERROR_CODE); ++ } ++ ++ nested_vmx_vmexit(vcpu, exit_reason, exit_intr_info, ++ vmcs_readl(EXIT_QUALIFICATION)); ++ return 1; ++} ++ ++static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) ++{ ++ *info1 = vmcs_readl(EXIT_QUALIFICATION); ++ *info2 = vmcs_read32(VM_EXIT_INTR_INFO); ++} ++ ++static void vmx_destroy_pml_buffer(struct vcpu_vmx *vmx) ++{ ++ if (vmx->pml_pg) { ++ __free_page(vmx->pml_pg); ++ vmx->pml_pg = NULL; ++ } ++} ++ ++static void vmx_flush_pml_buffer(struct kvm_vcpu *vcpu) ++{ ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ u64 *pml_buf; ++ u16 pml_idx; ++ ++ pml_idx = vmcs_read16(GUEST_PML_INDEX); ++ ++ /* Do nothing if PML buffer is empty */ ++ if (pml_idx == (PML_ENTITY_NUM - 1)) ++ return; ++ ++ /* PML index always points to next available PML buffer entity */ ++ if (pml_idx >= PML_ENTITY_NUM) ++ pml_idx = 0; ++ else ++ pml_idx++; ++ ++ pml_buf = page_address(vmx->pml_pg); ++ for (; pml_idx < PML_ENTITY_NUM; pml_idx++) { ++ u64 gpa; ++ ++ gpa = pml_buf[pml_idx]; ++ WARN_ON(gpa & (PAGE_SIZE - 1)); ++ kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT); ++ } ++ ++ /* reset PML index */ ++ vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1); ++} ++ ++/* ++ * Flush all vcpus' PML buffer and update logged GPAs to dirty_bitmap. ++ * Called before reporting dirty_bitmap to userspace. ++ */ ++static void kvm_flush_pml_buffers(struct kvm *kvm) ++{ ++ int i; ++ struct kvm_vcpu *vcpu; ++ /* ++ * We only need to kick vcpu out of guest mode here, as PML buffer ++ * is flushed at beginning of all VMEXITs, and it's obvious that only ++ * vcpus running in guest are possible to have unflushed GPAs in PML ++ * buffer. ++ */ ++ kvm_for_each_vcpu(i, vcpu, kvm) ++ kvm_vcpu_kick(vcpu); ++} ++ ++static void vmx_dump_sel(char *name, uint32_t sel) ++{ ++ pr_err("%s sel=0x%04x, attr=0x%05x, limit=0x%08x, base=0x%016lx\n", ++ name, vmcs_read16(sel), ++ vmcs_read32(sel + GUEST_ES_AR_BYTES - GUEST_ES_SELECTOR), ++ vmcs_read32(sel + GUEST_ES_LIMIT - GUEST_ES_SELECTOR), ++ vmcs_readl(sel + GUEST_ES_BASE - GUEST_ES_SELECTOR)); ++} ++ ++static void vmx_dump_dtsel(char *name, uint32_t limit) ++{ ++ pr_err("%s limit=0x%08x, base=0x%016lx\n", ++ name, vmcs_read32(limit), ++ vmcs_readl(limit + GUEST_GDTR_BASE - GUEST_GDTR_LIMIT)); ++} ++ ++static void dump_vmcs(void) ++{ ++ u32 vmentry_ctl = vmcs_read32(VM_ENTRY_CONTROLS); ++ u32 vmexit_ctl = vmcs_read32(VM_EXIT_CONTROLS); ++ u32 cpu_based_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); ++ u32 pin_based_exec_ctrl = vmcs_read32(PIN_BASED_VM_EXEC_CONTROL); ++ u32 secondary_exec_control = 0; ++ unsigned long cr4 = vmcs_readl(GUEST_CR4); ++ u64 efer = vmcs_read64(GUEST_IA32_EFER); ++ int i, n; ++ ++ if (cpu_has_secondary_exec_ctrls()) ++ secondary_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); ++ ++ pr_err("*** Guest State ***\n"); ++ pr_err("CR0: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n", ++ vmcs_readl(GUEST_CR0), vmcs_readl(CR0_READ_SHADOW), ++ vmcs_readl(CR0_GUEST_HOST_MASK)); ++ pr_err("CR4: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n", ++ cr4, vmcs_readl(CR4_READ_SHADOW), vmcs_readl(CR4_GUEST_HOST_MASK)); ++ pr_err("CR3 = 0x%016lx\n", vmcs_readl(GUEST_CR3)); ++ if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT) && ++ (cr4 & X86_CR4_PAE) && !(efer & EFER_LMA)) ++ { ++ pr_err("PDPTR0 = 0x%016llx PDPTR1 = 0x%016llx\n", ++ vmcs_read64(GUEST_PDPTR0), vmcs_read64(GUEST_PDPTR1)); ++ pr_err("PDPTR2 = 0x%016llx PDPTR3 = 0x%016llx\n", ++ vmcs_read64(GUEST_PDPTR2), vmcs_read64(GUEST_PDPTR3)); ++ } ++ pr_err("RSP = 0x%016lx RIP = 0x%016lx\n", ++ vmcs_readl(GUEST_RSP), vmcs_readl(GUEST_RIP)); ++ pr_err("RFLAGS=0x%08lx DR7 = 0x%016lx\n", ++ vmcs_readl(GUEST_RFLAGS), vmcs_readl(GUEST_DR7)); ++ pr_err("Sysenter RSP=%016lx CS:RIP=%04x:%016lx\n", ++ vmcs_readl(GUEST_SYSENTER_ESP), ++ vmcs_read32(GUEST_SYSENTER_CS), vmcs_readl(GUEST_SYSENTER_EIP)); ++ vmx_dump_sel("CS: ", GUEST_CS_SELECTOR); ++ vmx_dump_sel("DS: ", GUEST_DS_SELECTOR); ++ vmx_dump_sel("SS: ", GUEST_SS_SELECTOR); ++ vmx_dump_sel("ES: ", GUEST_ES_SELECTOR); ++ vmx_dump_sel("FS: ", GUEST_FS_SELECTOR); ++ vmx_dump_sel("GS: ", GUEST_GS_SELECTOR); ++ vmx_dump_dtsel("GDTR:", GUEST_GDTR_LIMIT); ++ vmx_dump_sel("LDTR:", GUEST_LDTR_SELECTOR); ++ vmx_dump_dtsel("IDTR:", GUEST_IDTR_LIMIT); ++ vmx_dump_sel("TR: ", GUEST_TR_SELECTOR); ++ if ((vmexit_ctl & (VM_EXIT_SAVE_IA32_PAT | VM_EXIT_SAVE_IA32_EFER)) || ++ (vmentry_ctl & (VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_IA32_EFER))) ++ pr_err("EFER = 0x%016llx PAT = 0x%016llx\n", ++ efer, vmcs_read64(GUEST_IA32_PAT)); ++ pr_err("DebugCtl = 0x%016llx DebugExceptions = 0x%016lx\n", ++ vmcs_read64(GUEST_IA32_DEBUGCTL), ++ vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS)); ++ if (cpu_has_load_perf_global_ctrl && ++ vmentry_ctl & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) ++ pr_err("PerfGlobCtl = 0x%016llx\n", ++ vmcs_read64(GUEST_IA32_PERF_GLOBAL_CTRL)); ++ if (vmentry_ctl & VM_ENTRY_LOAD_BNDCFGS) ++ pr_err("BndCfgS = 0x%016llx\n", vmcs_read64(GUEST_BNDCFGS)); ++ pr_err("Interruptibility = %08x ActivityState = %08x\n", ++ vmcs_read32(GUEST_INTERRUPTIBILITY_INFO), ++ vmcs_read32(GUEST_ACTIVITY_STATE)); ++ if (secondary_exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) ++ pr_err("InterruptStatus = %04x\n", ++ vmcs_read16(GUEST_INTR_STATUS)); ++ ++ pr_err("*** Host State ***\n"); ++ pr_err("RIP = 0x%016lx RSP = 0x%016lx\n", ++ vmcs_readl(HOST_RIP), vmcs_readl(HOST_RSP)); ++ pr_err("CS=%04x SS=%04x DS=%04x ES=%04x FS=%04x GS=%04x TR=%04x\n", ++ vmcs_read16(HOST_CS_SELECTOR), vmcs_read16(HOST_SS_SELECTOR), ++ vmcs_read16(HOST_DS_SELECTOR), vmcs_read16(HOST_ES_SELECTOR), ++ vmcs_read16(HOST_FS_SELECTOR), vmcs_read16(HOST_GS_SELECTOR), ++ vmcs_read16(HOST_TR_SELECTOR)); ++ pr_err("FSBase=%016lx GSBase=%016lx TRBase=%016lx\n", ++ vmcs_readl(HOST_FS_BASE), vmcs_readl(HOST_GS_BASE), ++ vmcs_readl(HOST_TR_BASE)); ++ pr_err("GDTBase=%016lx IDTBase=%016lx\n", ++ vmcs_readl(HOST_GDTR_BASE), vmcs_readl(HOST_IDTR_BASE)); ++ pr_err("CR0=%016lx CR3=%016lx CR4=%016lx\n", ++ vmcs_readl(HOST_CR0), vmcs_readl(HOST_CR3), ++ vmcs_readl(HOST_CR4)); ++ pr_err("Sysenter RSP=%016lx CS:RIP=%04x:%016lx\n", ++ vmcs_readl(HOST_IA32_SYSENTER_ESP), ++ vmcs_read32(HOST_IA32_SYSENTER_CS), ++ vmcs_readl(HOST_IA32_SYSENTER_EIP)); ++ if (vmexit_ctl & (VM_EXIT_LOAD_IA32_PAT | VM_EXIT_LOAD_IA32_EFER)) ++ pr_err("EFER = 0x%016llx PAT = 0x%016llx\n", ++ vmcs_read64(HOST_IA32_EFER), ++ vmcs_read64(HOST_IA32_PAT)); ++ if (cpu_has_load_perf_global_ctrl && ++ vmexit_ctl & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) ++ pr_err("PerfGlobCtl = 0x%016llx\n", ++ vmcs_read64(HOST_IA32_PERF_GLOBAL_CTRL)); ++ ++ pr_err("*** Control State ***\n"); ++ pr_err("PinBased=%08x CPUBased=%08x SecondaryExec=%08x\n", ++ pin_based_exec_ctrl, cpu_based_exec_ctrl, secondary_exec_control); ++ pr_err("EntryControls=%08x ExitControls=%08x\n", vmentry_ctl, vmexit_ctl); ++ pr_err("ExceptionBitmap=%08x PFECmask=%08x PFECmatch=%08x\n", ++ vmcs_read32(EXCEPTION_BITMAP), ++ vmcs_read32(PAGE_FAULT_ERROR_CODE_MASK), ++ vmcs_read32(PAGE_FAULT_ERROR_CODE_MATCH)); ++ pr_err("VMEntry: intr_info=%08x errcode=%08x ilen=%08x\n", ++ vmcs_read32(VM_ENTRY_INTR_INFO_FIELD), ++ vmcs_read32(VM_ENTRY_EXCEPTION_ERROR_CODE), ++ vmcs_read32(VM_ENTRY_INSTRUCTION_LEN)); ++ pr_err("VMExit: intr_info=%08x errcode=%08x ilen=%08x\n", ++ vmcs_read32(VM_EXIT_INTR_INFO), ++ vmcs_read32(VM_EXIT_INTR_ERROR_CODE), ++ vmcs_read32(VM_EXIT_INSTRUCTION_LEN)); ++ pr_err(" reason=%08x qualification=%016lx\n", ++ vmcs_read32(VM_EXIT_REASON), vmcs_readl(EXIT_QUALIFICATION)); ++ pr_err("IDTVectoring: info=%08x errcode=%08x\n", ++ vmcs_read32(IDT_VECTORING_INFO_FIELD), ++ vmcs_read32(IDT_VECTORING_ERROR_CODE)); ++ pr_err("TSC Offset = 0x%016llx\n", vmcs_read64(TSC_OFFSET)); ++ if (secondary_exec_control & SECONDARY_EXEC_TSC_SCALING) ++ pr_err("TSC Multiplier = 0x%016llx\n", ++ vmcs_read64(TSC_MULTIPLIER)); ++ if (cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW) ++ pr_err("TPR Threshold = 0x%02x\n", vmcs_read32(TPR_THRESHOLD)); ++ if (pin_based_exec_ctrl & PIN_BASED_POSTED_INTR) ++ pr_err("PostedIntrVec = 0x%02x\n", vmcs_read16(POSTED_INTR_NV)); ++ if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT)) ++ pr_err("EPT pointer = 0x%016llx\n", vmcs_read64(EPT_POINTER)); ++ n = vmcs_read32(CR3_TARGET_COUNT); ++ for (i = 0; i + 1 < n; i += 4) ++ pr_err("CR3 target%u=%016lx target%u=%016lx\n", ++ i, vmcs_readl(CR3_TARGET_VALUE0 + i * 2), ++ i + 1, vmcs_readl(CR3_TARGET_VALUE0 + i * 2 + 2)); ++ if (i < n) ++ pr_err("CR3 target%u=%016lx\n", ++ i, vmcs_readl(CR3_TARGET_VALUE0 + i * 2)); ++ if (secondary_exec_control & SECONDARY_EXEC_PAUSE_LOOP_EXITING) ++ pr_err("PLE Gap=%08x Window=%08x\n", ++ vmcs_read32(PLE_GAP), vmcs_read32(PLE_WINDOW)); ++ if (secondary_exec_control & SECONDARY_EXEC_ENABLE_VPID) ++ pr_err("Virtual processor ID = 0x%04x\n", ++ vmcs_read16(VIRTUAL_PROCESSOR_ID)); ++} ++ ++/* ++ * The guest has exited. See if we can fix it or if we need userspace ++ * assistance. ++ */ ++static int vmx_handle_exit(struct kvm_vcpu *vcpu) ++{ ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ u32 exit_reason = vmx->exit_reason; ++ u32 vectoring_info = vmx->idt_vectoring_info; ++ ++ trace_kvm_exit(exit_reason, vcpu, KVM_ISA_VMX); ++ ++ /* ++ * Flush logged GPAs PML buffer, this will make dirty_bitmap more ++ * updated. Another good is, in kvm_vm_ioctl_get_dirty_log, before ++ * querying dirty_bitmap, we only need to kick all vcpus out of guest ++ * mode as if vcpus is in root mode, the PML buffer must has been ++ * flushed already. ++ */ ++ if (enable_pml) ++ vmx_flush_pml_buffer(vcpu); ++ ++ /* If guest state is invalid, start emulating */ ++ if (vmx->emulation_required) ++ return handle_invalid_guest_state(vcpu); ++ ++ if (is_guest_mode(vcpu) && nested_vmx_exit_reflected(vcpu, exit_reason)) ++ return nested_vmx_reflect_vmexit(vcpu, exit_reason); ++ ++ if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) { ++ dump_vmcs(); ++ vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; ++ vcpu->run->fail_entry.hardware_entry_failure_reason ++ = exit_reason; ++ return 0; ++ } ++ ++ if (unlikely(vmx->fail)) { ++ vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; ++ vcpu->run->fail_entry.hardware_entry_failure_reason ++ = vmcs_read32(VM_INSTRUCTION_ERROR); ++ return 0; ++ } ++ ++ /* ++ * Note: ++ * Do not try to fix EXIT_REASON_EPT_MISCONFIG if it caused by ++ * delivery event since it indicates guest is accessing MMIO. ++ * The vm-exit can be triggered again after return to guest that ++ * will cause infinite loop. ++ */ ++ if ((vectoring_info & VECTORING_INFO_VALID_MASK) && ++ (exit_reason != EXIT_REASON_EXCEPTION_NMI && ++ exit_reason != EXIT_REASON_EPT_VIOLATION && ++ exit_reason != EXIT_REASON_PML_FULL && ++ exit_reason != EXIT_REASON_TASK_SWITCH)) { ++ vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; ++ vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV; ++ vcpu->run->internal.ndata = 3; ++ vcpu->run->internal.data[0] = vectoring_info; ++ vcpu->run->internal.data[1] = exit_reason; ++ vcpu->run->internal.data[2] = vcpu->arch.exit_qualification; ++ if (exit_reason == EXIT_REASON_EPT_MISCONFIG) { ++ vcpu->run->internal.ndata++; ++ vcpu->run->internal.data[3] = ++ vmcs_read64(GUEST_PHYSICAL_ADDRESS); ++ } ++ return 0; ++ } ++ ++ if (unlikely(!enable_vnmi && ++ vmx->loaded_vmcs->soft_vnmi_blocked)) { ++ if (vmx_interrupt_allowed(vcpu)) { ++ vmx->loaded_vmcs->soft_vnmi_blocked = 0; ++ } else if (vmx->loaded_vmcs->vnmi_blocked_time > 1000000000LL && ++ vcpu->arch.nmi_pending) { ++ /* ++ * This CPU don't support us in finding the end of an ++ * NMI-blocked window if the guest runs with IRQs ++ * disabled. So we pull the trigger after 1 s of ++ * futile waiting, but inform the user about this. ++ */ ++ printk(KERN_WARNING "%s: Breaking out of NMI-blocked " ++ "state on VCPU %d after 1 s timeout\n", ++ __func__, vcpu->vcpu_id); ++ vmx->loaded_vmcs->soft_vnmi_blocked = 0; ++ } ++ } ++ ++ if (exit_reason < kvm_vmx_max_exit_handlers ++ && kvm_vmx_exit_handlers[exit_reason]) ++ return kvm_vmx_exit_handlers[exit_reason](vcpu); ++ else { ++ vcpu_unimpl(vcpu, "vmx: unexpected exit reason 0x%x\n", ++ exit_reason); ++ kvm_queue_exception(vcpu, UD_VECTOR); ++ return 1; ++ } ++} ++ ++/* ++ * Software based L1D cache flush which is used when microcode providing ++ * the cache control MSR is not loaded. ++ * ++ * The L1D cache is 32 KiB on Nehalem and later microarchitectures, but to ++ * flush it is required to read in 64 KiB because the replacement algorithm ++ * is not exactly LRU. This could be sized at runtime via topology ++ * information but as all relevant affected CPUs have 32KiB L1D cache size ++ * there is no point in doing so. ++ */ ++static void vmx_l1d_flush(struct kvm_vcpu *vcpu) ++{ ++ int size = PAGE_SIZE << L1D_CACHE_ORDER; ++ ++ /* ++ * This code is only executed when the the flush mode is 'cond' or ++ * 'always' ++ */ ++ if (static_branch_likely(&vmx_l1d_flush_cond)) { ++ bool flush_l1d; ++ ++ /* ++ * Clear the per-vcpu flush bit, it gets set again ++ * either from vcpu_run() or from one of the unsafe ++ * VMEXIT handlers. ++ */ ++ flush_l1d = vcpu->arch.l1tf_flush_l1d; ++ vcpu->arch.l1tf_flush_l1d = false; ++ ++ /* ++ * Clear the per-cpu flush bit, it gets set again from ++ * the interrupt handlers. ++ */ ++ flush_l1d |= kvm_get_cpu_l1tf_flush_l1d(); ++ kvm_clear_cpu_l1tf_flush_l1d(); ++ ++ if (!flush_l1d) ++ return; ++ } ++ ++ vcpu->stat.l1d_flush++; ++ ++ if (static_cpu_has(X86_FEATURE_FLUSH_L1D)) { ++ wrmsrl(MSR_IA32_FLUSH_CMD, L1D_FLUSH); ++ return; ++ } ++ ++ asm volatile( ++ /* First ensure the pages are in the TLB */ ++ "xorl %%eax, %%eax\n" ++ ".Lpopulate_tlb:\n\t" ++ "movzbl (%[flush_pages], %%" _ASM_AX "), %%ecx\n\t" ++ "addl $4096, %%eax\n\t" ++ "cmpl %%eax, %[size]\n\t" ++ "jne .Lpopulate_tlb\n\t" ++ "xorl %%eax, %%eax\n\t" ++ "cpuid\n\t" ++ /* Now fill the cache */ ++ "xorl %%eax, %%eax\n" ++ ".Lfill_cache:\n" ++ "movzbl (%[flush_pages], %%" _ASM_AX "), %%ecx\n\t" ++ "addl $64, %%eax\n\t" ++ "cmpl %%eax, %[size]\n\t" ++ "jne .Lfill_cache\n\t" ++ "lfence\n" ++ :: [flush_pages] "r" (vmx_l1d_flush_pages), ++ [size] "r" (size) ++ : "eax", "ebx", "ecx", "edx"); ++} ++ ++static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) ++{ ++ struct vmcs12 *vmcs12 = get_vmcs12(vcpu); ++ ++ if (is_guest_mode(vcpu) && ++ nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) ++ return; ++ ++ if (irr == -1 || tpr < irr) { ++ vmcs_write32(TPR_THRESHOLD, 0); ++ return; ++ } ++ ++ vmcs_write32(TPR_THRESHOLD, irr); ++} ++ ++static void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu) ++{ ++ u32 sec_exec_control; ++ ++ if (!lapic_in_kernel(vcpu)) ++ return; ++ ++ if (!flexpriority_enabled && ++ !cpu_has_vmx_virtualize_x2apic_mode()) ++ return; ++ ++ /* Postpone execution until vmcs01 is the current VMCS. */ ++ if (is_guest_mode(vcpu)) { ++ to_vmx(vcpu)->nested.change_vmcs01_virtual_apic_mode = true; ++ return; ++ } ++ ++ sec_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); ++ sec_exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | ++ SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE); ++ ++ switch (kvm_get_apic_mode(vcpu)) { ++ case LAPIC_MODE_INVALID: ++ WARN_ONCE(true, "Invalid local APIC state"); ++ case LAPIC_MODE_DISABLED: ++ break; ++ case LAPIC_MODE_XAPIC: ++ if (flexpriority_enabled) { ++ sec_exec_control |= ++ SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; ++ vmx_flush_tlb(vcpu, true); ++ } ++ break; ++ case LAPIC_MODE_X2APIC: ++ if (cpu_has_vmx_virtualize_x2apic_mode()) ++ sec_exec_control |= ++ SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; ++ break; ++ } ++ vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control); ++ ++ vmx_update_msr_bitmap(vcpu); ++} ++ ++static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa) ++{ ++ if (!is_guest_mode(vcpu)) { ++ vmcs_write64(APIC_ACCESS_ADDR, hpa); ++ vmx_flush_tlb(vcpu, true); ++ } ++} ++ ++static void vmx_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr) ++{ ++ u16 status; ++ u8 old; ++ ++ if (max_isr == -1) ++ max_isr = 0; ++ ++ status = vmcs_read16(GUEST_INTR_STATUS); ++ old = status >> 8; ++ if (max_isr != old) { ++ status &= 0xff; ++ status |= max_isr << 8; ++ vmcs_write16(GUEST_INTR_STATUS, status); ++ } ++} ++ ++static void vmx_set_rvi(int vector) ++{ ++ u16 status; ++ u8 old; ++ ++ if (vector == -1) ++ vector = 0; ++ ++ status = vmcs_read16(GUEST_INTR_STATUS); ++ old = (u8)status & 0xff; ++ if ((u8)vector != old) { ++ status &= ~0xff; ++ status |= (u8)vector; ++ vmcs_write16(GUEST_INTR_STATUS, status); ++ } ++} ++ ++static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr) ++{ ++ /* ++ * When running L2, updating RVI is only relevant when ++ * vmcs12 virtual-interrupt-delivery enabled. ++ * However, it can be enabled only when L1 also ++ * intercepts external-interrupts and in that case ++ * we should not update vmcs02 RVI but instead intercept ++ * interrupt. Therefore, do nothing when running L2. ++ */ ++ if (!is_guest_mode(vcpu)) ++ vmx_set_rvi(max_irr); ++} ++ ++static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) ++{ ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ int max_irr; ++ bool max_irr_updated; ++ ++ WARN_ON(!vcpu->arch.apicv_active); ++ if (pi_test_on(&vmx->pi_desc)) { ++ pi_clear_on(&vmx->pi_desc); ++ /* ++ * IOMMU can write to PIR.ON, so the barrier matters even on UP. ++ * But on x86 this is just a compiler barrier anyway. ++ */ ++ smp_mb__after_atomic(); ++ max_irr_updated = ++ kvm_apic_update_irr(vcpu, vmx->pi_desc.pir, &max_irr); ++ ++ /* ++ * If we are running L2 and L1 has a new pending interrupt ++ * which can be injected, we should re-evaluate ++ * what should be done with this new L1 interrupt. ++ * If L1 intercepts external-interrupts, we should ++ * exit from L2 to L1. Otherwise, interrupt should be ++ * delivered directly to L2. ++ */ ++ if (is_guest_mode(vcpu) && max_irr_updated) { ++ if (nested_exit_on_intr(vcpu)) ++ kvm_vcpu_exiting_guest_mode(vcpu); ++ else ++ kvm_make_request(KVM_REQ_EVENT, vcpu); ++ } ++ } else { ++ max_irr = kvm_lapic_find_highest_irr(vcpu); ++ } ++ vmx_hwapic_irr_update(vcpu, max_irr); ++ return max_irr; ++} ++ ++static u8 vmx_has_apicv_interrupt(struct kvm_vcpu *vcpu) ++{ ++ u8 rvi = vmx_get_rvi(); ++ u8 vppr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_PROCPRI); ++ ++ return ((rvi & 0xf0) > (vppr & 0xf0)); ++} ++ ++static bool vmx_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu) ++{ ++ return pi_test_on(vcpu_to_pi_desc(vcpu)); ++} ++ ++static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) ++{ ++ if (!kvm_vcpu_apicv_active(vcpu)) ++ return; ++ ++ vmcs_write64(EOI_EXIT_BITMAP0, eoi_exit_bitmap[0]); ++ vmcs_write64(EOI_EXIT_BITMAP1, eoi_exit_bitmap[1]); ++ vmcs_write64(EOI_EXIT_BITMAP2, eoi_exit_bitmap[2]); ++ vmcs_write64(EOI_EXIT_BITMAP3, eoi_exit_bitmap[3]); ++} ++ ++static void vmx_apicv_post_state_restore(struct kvm_vcpu *vcpu) ++{ ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ ++ pi_clear_on(&vmx->pi_desc); ++ memset(vmx->pi_desc.pir, 0, sizeof(vmx->pi_desc.pir)); ++} ++ ++static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) ++{ ++ if (vmx->exit_reason != EXIT_REASON_EXCEPTION_NMI) ++ return; ++ ++ vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); ++ ++ /* if exit due to PF check for async PF */ ++ if (is_page_fault(vmx->exit_intr_info)) ++ vmx->vcpu.arch.apf.host_apf_reason = kvm_read_and_reset_pf_reason(); ++ ++ /* Handle machine checks before interrupts are enabled */ ++ if (is_machine_check(vmx->exit_intr_info)) ++ kvm_machine_check(); ++ ++ /* We need to handle NMIs before interrupts are enabled */ ++ if (is_nmi(vmx->exit_intr_info)) { ++ kvm_before_interrupt(&vmx->vcpu); ++ asm("int $2"); ++ kvm_after_interrupt(&vmx->vcpu); ++ } ++} ++ ++static void vmx_handle_external_intr(struct kvm_vcpu *vcpu) ++{ ++ u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); ++ ++ if ((exit_intr_info & (INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK)) ++ == (INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR)) { ++ unsigned int vector; ++ unsigned long entry; ++ gate_desc *desc; ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++#ifdef CONFIG_X86_64 ++ unsigned long tmp; ++#endif ++ ++ vector = exit_intr_info & INTR_INFO_VECTOR_MASK; ++ desc = (gate_desc *)vmx->host_idt_base + vector; ++ entry = gate_offset(desc); ++ asm volatile( ++#ifdef CONFIG_X86_64 ++ "mov %%" _ASM_SP ", %[sp]\n\t" ++ "and $0xfffffffffffffff0, %%" _ASM_SP "\n\t" ++ "push $%c[ss]\n\t" ++ "push %[sp]\n\t" ++#endif ++ "pushf\n\t" ++ __ASM_SIZE(push) " $%c[cs]\n\t" ++ CALL_NOSPEC ++ : ++#ifdef CONFIG_X86_64 ++ [sp]"=&r"(tmp), ++#endif ++ ASM_CALL_CONSTRAINT ++ : ++ THUNK_TARGET(entry), ++ [ss]"i"(__KERNEL_DS), ++ [cs]"i"(__KERNEL_CS) ++ ); ++ } ++} ++STACK_FRAME_NON_STANDARD(vmx_handle_external_intr); ++ ++static bool vmx_has_emulated_msr(int index) ++{ ++ switch (index) { ++ case MSR_IA32_SMBASE: ++ /* ++ * We cannot do SMM unless we can run the guest in big ++ * real mode. ++ */ ++ return enable_unrestricted_guest || emulate_invalid_guest_state; ++ case MSR_AMD64_VIRT_SPEC_CTRL: ++ /* This is AMD only. */ ++ return false; ++ default: ++ return true; ++ } ++} ++ ++static bool vmx_mpx_supported(void) ++{ ++ return (vmcs_config.vmexit_ctrl & VM_EXIT_CLEAR_BNDCFGS) && ++ (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_BNDCFGS); ++} ++ ++static bool vmx_xsaves_supported(void) ++{ ++ return vmcs_config.cpu_based_2nd_exec_ctrl & ++ SECONDARY_EXEC_XSAVES; ++} ++ ++static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) ++{ ++ u32 exit_intr_info; ++ bool unblock_nmi; ++ u8 vector; ++ bool idtv_info_valid; ++ ++ idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK; ++ ++ if (enable_vnmi) { ++ if (vmx->loaded_vmcs->nmi_known_unmasked) ++ return; ++ /* ++ * Can't use vmx->exit_intr_info since we're not sure what ++ * the exit reason is. ++ */ ++ exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); ++ unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0; ++ vector = exit_intr_info & INTR_INFO_VECTOR_MASK; ++ /* ++ * SDM 3: 27.7.1.2 (September 2008) ++ * Re-set bit "block by NMI" before VM entry if vmexit caused by ++ * a guest IRET fault. ++ * SDM 3: 23.2.2 (September 2008) ++ * Bit 12 is undefined in any of the following cases: ++ * If the VM exit sets the valid bit in the IDT-vectoring ++ * information field. ++ * If the VM exit is due to a double fault. ++ */ ++ if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi && ++ vector != DF_VECTOR && !idtv_info_valid) ++ vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, ++ GUEST_INTR_STATE_NMI); ++ else ++ vmx->loaded_vmcs->nmi_known_unmasked = ++ !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) ++ & GUEST_INTR_STATE_NMI); ++ } else if (unlikely(vmx->loaded_vmcs->soft_vnmi_blocked)) ++ vmx->loaded_vmcs->vnmi_blocked_time += ++ ktime_to_ns(ktime_sub(ktime_get(), ++ vmx->loaded_vmcs->entry_time)); ++} ++ ++static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu, ++ u32 idt_vectoring_info, ++ int instr_len_field, ++ int error_code_field) ++{ ++ u8 vector; ++ int type; ++ bool idtv_info_valid; ++ ++ idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK; ++ ++ vcpu->arch.nmi_injected = false; ++ kvm_clear_exception_queue(vcpu); ++ kvm_clear_interrupt_queue(vcpu); ++ ++ if (!idtv_info_valid) ++ return; ++ ++ kvm_make_request(KVM_REQ_EVENT, vcpu); ++ ++ vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK; ++ type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK; ++ ++ switch (type) { ++ case INTR_TYPE_NMI_INTR: ++ vcpu->arch.nmi_injected = true; ++ /* ++ * SDM 3: 27.7.1.2 (September 2008) ++ * Clear bit "block by NMI" before VM entry if a NMI ++ * delivery faulted. ++ */ ++ vmx_set_nmi_mask(vcpu, false); ++ break; ++ case INTR_TYPE_SOFT_EXCEPTION: ++ vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field); ++ /* fall through */ ++ case INTR_TYPE_HARD_EXCEPTION: ++ if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) { ++ u32 err = vmcs_read32(error_code_field); ++ kvm_requeue_exception_e(vcpu, vector, err); ++ } else ++ kvm_requeue_exception(vcpu, vector); ++ break; ++ case INTR_TYPE_SOFT_INTR: ++ vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field); ++ /* fall through */ ++ case INTR_TYPE_EXT_INTR: ++ kvm_queue_interrupt(vcpu, vector, type == INTR_TYPE_SOFT_INTR); ++ break; ++ default: ++ break; ++ } ++} ++ ++static void vmx_complete_interrupts(struct vcpu_vmx *vmx) ++{ ++ __vmx_complete_interrupts(&vmx->vcpu, vmx->idt_vectoring_info, ++ VM_EXIT_INSTRUCTION_LEN, ++ IDT_VECTORING_ERROR_CODE); ++} ++ ++static void vmx_cancel_injection(struct kvm_vcpu *vcpu) ++{ ++ __vmx_complete_interrupts(vcpu, ++ vmcs_read32(VM_ENTRY_INTR_INFO_FIELD), ++ VM_ENTRY_INSTRUCTION_LEN, ++ VM_ENTRY_EXCEPTION_ERROR_CODE); ++ ++ vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); ++} ++ ++static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx) ++{ ++ int i, nr_msrs; ++ struct perf_guest_switch_msr *msrs; ++ ++ msrs = perf_guest_get_msrs(&nr_msrs); ++ ++ if (!msrs) ++ return; ++ ++ for (i = 0; i < nr_msrs; i++) ++ if (msrs[i].host == msrs[i].guest) ++ clear_atomic_switch_msr(vmx, msrs[i].msr); ++ else ++ add_atomic_switch_msr(vmx, msrs[i].msr, msrs[i].guest, ++ msrs[i].host, false); ++} ++ ++static void vmx_arm_hv_timer(struct vcpu_vmx *vmx, u32 val) ++{ ++ vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, val); ++ if (!vmx->loaded_vmcs->hv_timer_armed) ++ vmcs_set_bits(PIN_BASED_VM_EXEC_CONTROL, ++ PIN_BASED_VMX_PREEMPTION_TIMER); ++ vmx->loaded_vmcs->hv_timer_armed = true; ++} ++ ++static void vmx_update_hv_timer(struct kvm_vcpu *vcpu) ++{ ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ u64 tscl; ++ u32 delta_tsc; ++ ++ if (vmx->req_immediate_exit) { ++ vmx_arm_hv_timer(vmx, 0); ++ return; ++ } ++ ++ if (vmx->hv_deadline_tsc != -1) { ++ tscl = rdtsc(); ++ if (vmx->hv_deadline_tsc > tscl) ++ /* set_hv_timer ensures the delta fits in 32-bits */ ++ delta_tsc = (u32)((vmx->hv_deadline_tsc - tscl) >> ++ cpu_preemption_timer_multi); ++ else ++ delta_tsc = 0; ++ ++ vmx_arm_hv_timer(vmx, delta_tsc); ++ return; ++ } ++ ++ if (vmx->loaded_vmcs->hv_timer_armed) ++ vmcs_clear_bits(PIN_BASED_VM_EXEC_CONTROL, ++ PIN_BASED_VMX_PREEMPTION_TIMER); ++ vmx->loaded_vmcs->hv_timer_armed = false; ++} ++ ++static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) ++{ ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ unsigned long cr3, cr4, evmcs_rsp; ++ ++ /* Record the guest's net vcpu time for enforced NMI injections. */ ++ if (unlikely(!enable_vnmi && ++ vmx->loaded_vmcs->soft_vnmi_blocked)) ++ vmx->loaded_vmcs->entry_time = ktime_get(); ++ ++ /* Don't enter VMX if guest state is invalid, let the exit handler ++ start emulation until we arrive back to a valid state */ ++ if (vmx->emulation_required) ++ return; ++ ++ if (vmx->ple_window_dirty) { ++ vmx->ple_window_dirty = false; ++ vmcs_write32(PLE_WINDOW, vmx->ple_window); ++ } ++ ++ if (vmx->nested.sync_shadow_vmcs) { ++ copy_vmcs12_to_shadow(vmx); ++ vmx->nested.sync_shadow_vmcs = false; ++ } ++ ++ if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty)) ++ vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]); ++ if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty)) ++ vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]); ++ ++ cr3 = __get_current_cr3_fast(); ++ if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) { ++ vmcs_writel(HOST_CR3, cr3); ++ vmx->loaded_vmcs->host_state.cr3 = cr3; ++ } ++ ++ cr4 = cr4_read_shadow(); ++ if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) { ++ vmcs_writel(HOST_CR4, cr4); ++ vmx->loaded_vmcs->host_state.cr4 = cr4; ++ } ++ ++ /* When single-stepping over STI and MOV SS, we must clear the ++ * corresponding interruptibility bits in the guest state. Otherwise ++ * vmentry fails as it then expects bit 14 (BS) in pending debug ++ * exceptions being set, but that's not correct for the guest debugging ++ * case. */ ++ if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) ++ vmx_set_interrupt_shadow(vcpu, 0); ++ ++ kvm_load_guest_xcr0(vcpu); ++ ++ if (static_cpu_has(X86_FEATURE_PKU) && ++ kvm_read_cr4_bits(vcpu, X86_CR4_PKE) && ++ vcpu->arch.pkru != vmx->host_pkru) ++ __write_pkru(vcpu->arch.pkru); ++ ++ atomic_switch_perf_msrs(vmx); ++ ++ vmx_update_hv_timer(vcpu); ++ ++ /* ++ * If this vCPU has touched SPEC_CTRL, restore the guest's value if ++ * it's non-zero. Since vmentry is serialising on affected CPUs, there ++ * is no need to worry about the conditional branch over the wrmsr ++ * being speculatively taken. ++ */ ++ x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0); ++ ++ vmx->__launched = vmx->loaded_vmcs->launched; ++ ++ evmcs_rsp = static_branch_unlikely(&enable_evmcs) ? ++ (unsigned long)¤t_evmcs->host_rsp : 0; ++ ++ /* L1D Flush includes CPU buffer clear to mitigate MDS */ ++ if (static_branch_unlikely(&vmx_l1d_should_flush)) ++ vmx_l1d_flush(vcpu); ++ else if (static_branch_unlikely(&mds_user_clear)) ++ mds_clear_cpu_buffers(); ++ ++ asm( ++ /* Store host registers */ ++ "push %%" _ASM_DX "; push %%" _ASM_BP ";" ++ "push %%" _ASM_CX " \n\t" /* placeholder for guest rcx */ ++ "push %%" _ASM_CX " \n\t" ++ "cmp %%" _ASM_SP ", %c[host_rsp](%0) \n\t" ++ "je 1f \n\t" ++ "mov %%" _ASM_SP ", %c[host_rsp](%0) \n\t" ++ /* Avoid VMWRITE when Enlightened VMCS is in use */ ++ "test %%" _ASM_SI ", %%" _ASM_SI " \n\t" ++ "jz 2f \n\t" ++ "mov %%" _ASM_SP ", (%%" _ASM_SI ") \n\t" ++ "jmp 1f \n\t" ++ "2: \n\t" ++ __ex(ASM_VMX_VMWRITE_RSP_RDX) "\n\t" ++ "1: \n\t" ++ /* Reload cr2 if changed */ ++ "mov %c[cr2](%0), %%" _ASM_AX " \n\t" ++ "mov %%cr2, %%" _ASM_DX " \n\t" ++ "cmp %%" _ASM_AX ", %%" _ASM_DX " \n\t" ++ "je 3f \n\t" ++ "mov %%" _ASM_AX", %%cr2 \n\t" ++ "3: \n\t" ++ /* Check if vmlaunch of vmresume is needed */ ++ "cmpb $0, %c[launched](%0) \n\t" ++ /* Load guest registers. Don't clobber flags. */ ++ "mov %c[rax](%0), %%" _ASM_AX " \n\t" ++ "mov %c[rbx](%0), %%" _ASM_BX " \n\t" ++ "mov %c[rdx](%0), %%" _ASM_DX " \n\t" ++ "mov %c[rsi](%0), %%" _ASM_SI " \n\t" ++ "mov %c[rdi](%0), %%" _ASM_DI " \n\t" ++ "mov %c[rbp](%0), %%" _ASM_BP " \n\t" ++#ifdef CONFIG_X86_64 ++ "mov %c[r8](%0), %%r8 \n\t" ++ "mov %c[r9](%0), %%r9 \n\t" ++ "mov %c[r10](%0), %%r10 \n\t" ++ "mov %c[r11](%0), %%r11 \n\t" ++ "mov %c[r12](%0), %%r12 \n\t" ++ "mov %c[r13](%0), %%r13 \n\t" ++ "mov %c[r14](%0), %%r14 \n\t" ++ "mov %c[r15](%0), %%r15 \n\t" ++#endif ++ "mov %c[rcx](%0), %%" _ASM_CX " \n\t" /* kills %0 (ecx) */ ++ ++ /* Enter guest mode */ ++ "jne 1f \n\t" ++ __ex(ASM_VMX_VMLAUNCH) "\n\t" ++ "jmp 2f \n\t" ++ "1: " __ex(ASM_VMX_VMRESUME) "\n\t" ++ "2: " ++ /* Save guest registers, load host registers, keep flags */ ++ "mov %0, %c[wordsize](%%" _ASM_SP ") \n\t" ++ "pop %0 \n\t" ++ "setbe %c[fail](%0)\n\t" ++ "mov %%" _ASM_AX ", %c[rax](%0) \n\t" ++ "mov %%" _ASM_BX ", %c[rbx](%0) \n\t" ++ __ASM_SIZE(pop) " %c[rcx](%0) \n\t" ++ "mov %%" _ASM_DX ", %c[rdx](%0) \n\t" ++ "mov %%" _ASM_SI ", %c[rsi](%0) \n\t" ++ "mov %%" _ASM_DI ", %c[rdi](%0) \n\t" ++ "mov %%" _ASM_BP ", %c[rbp](%0) \n\t" ++#ifdef CONFIG_X86_64 ++ "mov %%r8, %c[r8](%0) \n\t" ++ "mov %%r9, %c[r9](%0) \n\t" ++ "mov %%r10, %c[r10](%0) \n\t" ++ "mov %%r11, %c[r11](%0) \n\t" ++ "mov %%r12, %c[r12](%0) \n\t" ++ "mov %%r13, %c[r13](%0) \n\t" ++ "mov %%r14, %c[r14](%0) \n\t" ++ "mov %%r15, %c[r15](%0) \n\t" ++ "xor %%r8d, %%r8d \n\t" ++ "xor %%r9d, %%r9d \n\t" ++ "xor %%r10d, %%r10d \n\t" ++ "xor %%r11d, %%r11d \n\t" ++ "xor %%r12d, %%r12d \n\t" ++ "xor %%r13d, %%r13d \n\t" ++ "xor %%r14d, %%r14d \n\t" ++ "xor %%r15d, %%r15d \n\t" ++#endif ++ "mov %%cr2, %%" _ASM_AX " \n\t" ++ "mov %%" _ASM_AX ", %c[cr2](%0) \n\t" ++ ++ "xor %%eax, %%eax \n\t" ++ "xor %%ebx, %%ebx \n\t" ++ "xor %%esi, %%esi \n\t" ++ "xor %%edi, %%edi \n\t" ++ "pop %%" _ASM_BP "; pop %%" _ASM_DX " \n\t" ++ ".pushsection .rodata \n\t" ++ ".global vmx_return \n\t" ++ "vmx_return: " _ASM_PTR " 2b \n\t" ++ ".popsection" ++ : : "c"(vmx), "d"((unsigned long)HOST_RSP), "S"(evmcs_rsp), ++ [launched]"i"(offsetof(struct vcpu_vmx, __launched)), ++ [fail]"i"(offsetof(struct vcpu_vmx, fail)), ++ [host_rsp]"i"(offsetof(struct vcpu_vmx, host_rsp)), ++ [rax]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RAX])), ++ [rbx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RBX])), ++ [rcx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RCX])), ++ [rdx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RDX])), ++ [rsi]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RSI])), ++ [rdi]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RDI])), ++ [rbp]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RBP])), ++#ifdef CONFIG_X86_64 ++ [r8]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R8])), ++ [r9]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R9])), ++ [r10]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R10])), ++ [r11]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R11])), ++ [r12]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R12])), ++ [r13]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R13])), ++ [r14]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R14])), ++ [r15]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R15])), ++#endif ++ [cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2)), ++ [wordsize]"i"(sizeof(ulong)) ++ : "cc", "memory" ++#ifdef CONFIG_X86_64 ++ , "rax", "rbx", "rdi" ++ , "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" ++#else ++ , "eax", "ebx", "edi" ++#endif ++ ); ++ ++ /* ++ * We do not use IBRS in the kernel. If this vCPU has used the ++ * SPEC_CTRL MSR it may have left it on; save the value and ++ * turn it off. This is much more efficient than blindly adding ++ * it to the atomic save/restore list. Especially as the former ++ * (Saving guest MSRs on vmexit) doesn't even exist in KVM. ++ * ++ * For non-nested case: ++ * If the L01 MSR bitmap does not intercept the MSR, then we need to ++ * save it. ++ * ++ * For nested case: ++ * If the L02 MSR bitmap does not intercept the MSR, then we need to ++ * save it. ++ */ ++ if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) ++ vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); ++ ++ x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0); ++ ++ /* Eliminate branch target predictions from guest mode */ ++ vmexit_fill_RSB(); ++ ++ /* All fields are clean at this point */ ++ if (static_branch_unlikely(&enable_evmcs)) ++ current_evmcs->hv_clean_fields |= ++ HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; ++ ++ /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */ ++ if (vmx->host_debugctlmsr) ++ update_debugctlmsr(vmx->host_debugctlmsr); ++ ++#ifndef CONFIG_X86_64 ++ /* ++ * The sysexit path does not restore ds/es, so we must set them to ++ * a reasonable value ourselves. ++ * ++ * We can't defer this to vmx_prepare_switch_to_host() since that ++ * function may be executed in interrupt context, which saves and ++ * restore segments around it, nullifying its effect. ++ */ ++ loadsegment(ds, __USER_DS); ++ loadsegment(es, __USER_DS); ++#endif ++ ++ vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP) ++ | (1 << VCPU_EXREG_RFLAGS) ++ | (1 << VCPU_EXREG_PDPTR) ++ | (1 << VCPU_EXREG_SEGMENTS) ++ | (1 << VCPU_EXREG_CR3)); ++ vcpu->arch.regs_dirty = 0; ++ ++ /* ++ * eager fpu is enabled if PKEY is supported and CR4 is switched ++ * back on host, so it is safe to read guest PKRU from current ++ * XSAVE. ++ */ ++ if (static_cpu_has(X86_FEATURE_PKU) && ++ kvm_read_cr4_bits(vcpu, X86_CR4_PKE)) { ++ vcpu->arch.pkru = __read_pkru(); ++ if (vcpu->arch.pkru != vmx->host_pkru) ++ __write_pkru(vmx->host_pkru); ++ } ++ ++ kvm_put_guest_xcr0(vcpu); ++ ++ vmx->nested.nested_run_pending = 0; ++ vmx->idt_vectoring_info = 0; ++ ++ vmx->exit_reason = vmx->fail ? 0xdead : vmcs_read32(VM_EXIT_REASON); ++ if ((u16)vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY) ++ kvm_machine_check(); ++ ++ if (vmx->fail || (vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) ++ return; ++ ++ vmx->loaded_vmcs->launched = 1; ++ vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); ++ ++ vmx_complete_atomic_exit(vmx); ++ vmx_recover_nmi_blocking(vmx); ++ vmx_complete_interrupts(vmx); ++} ++STACK_FRAME_NON_STANDARD(vmx_vcpu_run); ++ ++static struct kvm *vmx_vm_alloc(void) ++{ ++ struct kvm_vmx *kvm_vmx = vzalloc(sizeof(struct kvm_vmx)); ++ return &kvm_vmx->kvm; ++} ++ ++static void vmx_vm_free(struct kvm *kvm) ++{ ++ vfree(to_kvm_vmx(kvm)); ++} ++ ++static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs) ++{ ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ int cpu; ++ ++ if (vmx->loaded_vmcs == vmcs) ++ return; ++ ++ cpu = get_cpu(); ++ vmx_vcpu_put(vcpu); ++ vmx->loaded_vmcs = vmcs; ++ vmx_vcpu_load(vcpu, cpu); ++ put_cpu(); ++} ++ ++/* ++ * Ensure that the current vmcs of the logical processor is the ++ * vmcs01 of the vcpu before calling free_nested(). ++ */ ++static void vmx_free_vcpu_nested(struct kvm_vcpu *vcpu) ++{ ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ ++ vcpu_load(vcpu); ++ vmx_switch_vmcs(vcpu, &vmx->vmcs01); ++ free_nested(vmx); ++ vcpu_put(vcpu); ++} ++ ++static void vmx_free_vcpu(struct kvm_vcpu *vcpu) ++{ ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ ++ if (enable_pml) ++ vmx_destroy_pml_buffer(vmx); ++ free_vpid(vmx->vpid); ++ leave_guest_mode(vcpu); ++ vmx_free_vcpu_nested(vcpu); ++ free_loaded_vmcs(vmx->loaded_vmcs); ++ kfree(vmx->guest_msrs); ++ kvm_vcpu_uninit(vcpu); ++ kmem_cache_free(kvm_vcpu_cache, vmx); ++} ++ ++static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) ++{ ++ int err; ++ struct vcpu_vmx *vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); ++ unsigned long *msr_bitmap; ++ int cpu; ++ ++ if (!vmx) ++ return ERR_PTR(-ENOMEM); ++ ++ vmx->vpid = allocate_vpid(); ++ ++ err = kvm_vcpu_init(&vmx->vcpu, kvm, id); ++ if (err) ++ goto free_vcpu; ++ ++ err = -ENOMEM; ++ ++ /* ++ * If PML is turned on, failure on enabling PML just results in failure ++ * of creating the vcpu, therefore we can simplify PML logic (by ++ * avoiding dealing with cases, such as enabling PML partially on vcpus ++ * for the guest, etc. ++ */ ++ if (enable_pml) { ++ vmx->pml_pg = alloc_page(GFP_KERNEL | __GFP_ZERO); ++ if (!vmx->pml_pg) ++ goto uninit_vcpu; ++ } ++ ++ vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL); ++ BUILD_BUG_ON(ARRAY_SIZE(vmx_msr_index) * sizeof(vmx->guest_msrs[0]) ++ > PAGE_SIZE); ++ ++ if (!vmx->guest_msrs) ++ goto free_pml; ++ ++ err = alloc_loaded_vmcs(&vmx->vmcs01); ++ if (err < 0) ++ goto free_msrs; ++ ++ msr_bitmap = vmx->vmcs01.msr_bitmap; ++ vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE, MSR_TYPE_RW); ++ vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE, MSR_TYPE_RW); ++ vmx_disable_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW); ++ vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW); ++ vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW); ++ vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW); ++ vmx->msr_bitmap_mode = 0; ++ ++ vmx->loaded_vmcs = &vmx->vmcs01; ++ cpu = get_cpu(); ++ vmx_vcpu_load(&vmx->vcpu, cpu); ++ vmx->vcpu.cpu = cpu; ++ vmx_vcpu_setup(vmx); ++ vmx_vcpu_put(&vmx->vcpu); ++ put_cpu(); ++ if (cpu_need_virtualize_apic_accesses(&vmx->vcpu)) { ++ err = alloc_apic_access_page(kvm); ++ if (err) ++ goto free_vmcs; ++ } ++ ++ if (enable_ept && !enable_unrestricted_guest) { ++ err = init_rmode_identity_map(kvm); ++ if (err) ++ goto free_vmcs; ++ } ++ ++ if (nested) ++ nested_vmx_setup_ctls_msrs(&vmx->nested.msrs, ++ kvm_vcpu_apicv_active(&vmx->vcpu)); ++ ++ vmx->nested.posted_intr_nv = -1; ++ vmx->nested.current_vmptr = -1ull; ++ ++ vmx->msr_ia32_feature_control_valid_bits = FEATURE_CONTROL_LOCKED; ++ ++ /* ++ * Enforce invariant: pi_desc.nv is always either POSTED_INTR_VECTOR ++ * or POSTED_INTR_WAKEUP_VECTOR. ++ */ ++ vmx->pi_desc.nv = POSTED_INTR_VECTOR; ++ vmx->pi_desc.sn = 1; ++ ++ return &vmx->vcpu; ++ ++free_vmcs: ++ free_loaded_vmcs(vmx->loaded_vmcs); ++free_msrs: ++ kfree(vmx->guest_msrs); ++free_pml: ++ vmx_destroy_pml_buffer(vmx); ++uninit_vcpu: ++ kvm_vcpu_uninit(&vmx->vcpu); ++free_vcpu: ++ free_vpid(vmx->vpid); ++ kmem_cache_free(kvm_vcpu_cache, vmx); ++ return ERR_PTR(err); ++} ++ ++#define L1TF_MSG_SMT "L1TF CPU bug present and SMT on, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.\n" ++#define L1TF_MSG_L1D "L1TF CPU bug present and virtualization mitigation disabled, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.\n" ++ ++static int vmx_vm_init(struct kvm *kvm) ++{ ++ spin_lock_init(&to_kvm_vmx(kvm)->ept_pointer_lock); ++ ++ if (!ple_gap) ++ kvm->arch.pause_in_guest = true; ++ ++ if (boot_cpu_has(X86_BUG_L1TF) && enable_ept) { ++ switch (l1tf_mitigation) { ++ case L1TF_MITIGATION_OFF: ++ case L1TF_MITIGATION_FLUSH_NOWARN: ++ /* 'I explicitly don't care' is set */ ++ break; ++ case L1TF_MITIGATION_FLUSH: ++ case L1TF_MITIGATION_FLUSH_NOSMT: ++ case L1TF_MITIGATION_FULL: ++ /* ++ * Warn upon starting the first VM in a potentially ++ * insecure environment. ++ */ ++ if (sched_smt_active()) ++ pr_warn_once(L1TF_MSG_SMT); ++ if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_NEVER) ++ pr_warn_once(L1TF_MSG_L1D); ++ break; ++ case L1TF_MITIGATION_FULL_FORCE: ++ /* Flush is enforced */ ++ break; ++ } ++ } ++ return 0; ++} ++ ++static void __init vmx_check_processor_compat(void *rtn) ++{ ++ struct vmcs_config vmcs_conf; ++ ++ *(int *)rtn = 0; ++ if (setup_vmcs_config(&vmcs_conf) < 0) ++ *(int *)rtn = -EIO; ++ nested_vmx_setup_ctls_msrs(&vmcs_conf.nested, enable_apicv); ++ if (memcmp(&vmcs_config, &vmcs_conf, sizeof(struct vmcs_config)) != 0) { ++ printk(KERN_ERR "kvm: CPU %d feature inconsistency!\n", ++ smp_processor_id()); ++ *(int *)rtn = -EIO; ++ } ++} ++ ++static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) ++{ ++ u8 cache; ++ u64 ipat = 0; ++ ++ /* For VT-d and EPT combination ++ * 1. MMIO: always map as UC ++ * 2. EPT with VT-d: ++ * a. VT-d without snooping control feature: can't guarantee the ++ * result, try to trust guest. ++ * b. VT-d with snooping control feature: snooping control feature of ++ * VT-d engine can guarantee the cache correctness. Just set it ++ * to WB to keep consistent with host. So the same as item 3. ++ * 3. EPT without VT-d: always map as WB and set IPAT=1 to keep ++ * consistent with host MTRR ++ */ ++ if (is_mmio) { ++ cache = MTRR_TYPE_UNCACHABLE; ++ goto exit; ++ } ++ ++ if (!kvm_arch_has_noncoherent_dma(vcpu->kvm)) { ++ ipat = VMX_EPT_IPAT_BIT; ++ cache = MTRR_TYPE_WRBACK; ++ goto exit; ++ } ++ ++ if (kvm_read_cr0(vcpu) & X86_CR0_CD) { ++ ipat = VMX_EPT_IPAT_BIT; ++ if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED)) ++ cache = MTRR_TYPE_WRBACK; ++ else ++ cache = MTRR_TYPE_UNCACHABLE; ++ goto exit; ++ } ++ ++ cache = kvm_mtrr_get_guest_memory_type(vcpu, gfn); ++ ++exit: ++ return (cache << VMX_EPT_MT_EPTE_SHIFT) | ipat; ++} ++ ++static int vmx_get_lpage_level(void) ++{ ++ if (enable_ept && !cpu_has_vmx_ept_1g_page()) ++ return PT_DIRECTORY_LEVEL; ++ else ++ /* For shadow and EPT supported 1GB page */ ++ return PT_PDPE_LEVEL; ++} ++ ++static void vmcs_set_secondary_exec_control(u32 new_ctl) ++{ ++ /* ++ * These bits in the secondary execution controls field ++ * are dynamic, the others are mostly based on the hypervisor ++ * architecture and the guest's CPUID. Do not touch the ++ * dynamic bits. ++ */ ++ u32 mask = ++ SECONDARY_EXEC_SHADOW_VMCS | ++ SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | ++ SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | ++ SECONDARY_EXEC_DESC; ++ ++ u32 cur_ctl = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); ++ ++ vmcs_write32(SECONDARY_VM_EXEC_CONTROL, ++ (new_ctl & ~mask) | (cur_ctl & mask)); ++} ++ ++/* ++ * Generate MSR_IA32_VMX_CR{0,4}_FIXED1 according to CPUID. Only set bits ++ * (indicating "allowed-1") if they are supported in the guest's CPUID. ++ */ ++static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu) ++{ ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ struct kvm_cpuid_entry2 *entry; ++ ++ vmx->nested.msrs.cr0_fixed1 = 0xffffffff; ++ vmx->nested.msrs.cr4_fixed1 = X86_CR4_PCE; ++ ++#define cr4_fixed1_update(_cr4_mask, _reg, _cpuid_mask) do { \ ++ if (entry && (entry->_reg & (_cpuid_mask))) \ ++ vmx->nested.msrs.cr4_fixed1 |= (_cr4_mask); \ ++} while (0) ++ ++ entry = kvm_find_cpuid_entry(vcpu, 0x1, 0); ++ cr4_fixed1_update(X86_CR4_VME, edx, bit(X86_FEATURE_VME)); ++ cr4_fixed1_update(X86_CR4_PVI, edx, bit(X86_FEATURE_VME)); ++ cr4_fixed1_update(X86_CR4_TSD, edx, bit(X86_FEATURE_TSC)); ++ cr4_fixed1_update(X86_CR4_DE, edx, bit(X86_FEATURE_DE)); ++ cr4_fixed1_update(X86_CR4_PSE, edx, bit(X86_FEATURE_PSE)); ++ cr4_fixed1_update(X86_CR4_PAE, edx, bit(X86_FEATURE_PAE)); ++ cr4_fixed1_update(X86_CR4_MCE, edx, bit(X86_FEATURE_MCE)); ++ cr4_fixed1_update(X86_CR4_PGE, edx, bit(X86_FEATURE_PGE)); ++ cr4_fixed1_update(X86_CR4_OSFXSR, edx, bit(X86_FEATURE_FXSR)); ++ cr4_fixed1_update(X86_CR4_OSXMMEXCPT, edx, bit(X86_FEATURE_XMM)); ++ cr4_fixed1_update(X86_CR4_VMXE, ecx, bit(X86_FEATURE_VMX)); ++ cr4_fixed1_update(X86_CR4_SMXE, ecx, bit(X86_FEATURE_SMX)); ++ cr4_fixed1_update(X86_CR4_PCIDE, ecx, bit(X86_FEATURE_PCID)); ++ cr4_fixed1_update(X86_CR4_OSXSAVE, ecx, bit(X86_FEATURE_XSAVE)); ++ ++ entry = kvm_find_cpuid_entry(vcpu, 0x7, 0); ++ cr4_fixed1_update(X86_CR4_FSGSBASE, ebx, bit(X86_FEATURE_FSGSBASE)); ++ cr4_fixed1_update(X86_CR4_SMEP, ebx, bit(X86_FEATURE_SMEP)); ++ cr4_fixed1_update(X86_CR4_SMAP, ebx, bit(X86_FEATURE_SMAP)); ++ cr4_fixed1_update(X86_CR4_PKE, ecx, bit(X86_FEATURE_PKU)); ++ cr4_fixed1_update(X86_CR4_UMIP, ecx, bit(X86_FEATURE_UMIP)); ++ ++#undef cr4_fixed1_update ++} ++ ++static void nested_vmx_entry_exit_ctls_update(struct kvm_vcpu *vcpu) ++{ ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ ++ if (kvm_mpx_supported()) { ++ bool mpx_enabled = guest_cpuid_has(vcpu, X86_FEATURE_MPX); ++ ++ if (mpx_enabled) { ++ vmx->nested.msrs.entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS; ++ vmx->nested.msrs.exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS; ++ } else { ++ vmx->nested.msrs.entry_ctls_high &= ~VM_ENTRY_LOAD_BNDCFGS; ++ vmx->nested.msrs.exit_ctls_high &= ~VM_EXIT_CLEAR_BNDCFGS; ++ } ++ } ++} ++ ++static void vmx_cpuid_update(struct kvm_vcpu *vcpu) ++{ ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ ++ if (cpu_has_secondary_exec_ctrls()) { ++ vmx_compute_secondary_exec_control(vmx); ++ vmcs_set_secondary_exec_control(vmx->secondary_exec_control); ++ } ++ ++ if (nested_vmx_allowed(vcpu)) ++ to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |= ++ FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; ++ else ++ to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &= ++ ~FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; ++ ++ if (nested_vmx_allowed(vcpu)) { ++ nested_vmx_cr_fixed1_bits_update(vcpu); ++ nested_vmx_entry_exit_ctls_update(vcpu); ++ } ++} ++ ++static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) ++{ ++ if (func == 1 && nested) ++ entry->ecx |= bit(X86_FEATURE_VMX); ++} ++ ++static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu, ++ struct x86_exception *fault) ++{ ++ struct vmcs12 *vmcs12 = get_vmcs12(vcpu); ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ u32 exit_reason; ++ unsigned long exit_qualification = vcpu->arch.exit_qualification; ++ ++ if (vmx->nested.pml_full) { ++ exit_reason = EXIT_REASON_PML_FULL; ++ vmx->nested.pml_full = false; ++ exit_qualification &= INTR_INFO_UNBLOCK_NMI; ++ } else if (fault->error_code & PFERR_RSVD_MASK) ++ exit_reason = EXIT_REASON_EPT_MISCONFIG; ++ else ++ exit_reason = EXIT_REASON_EPT_VIOLATION; ++ ++ nested_vmx_vmexit(vcpu, exit_reason, 0, exit_qualification); ++ vmcs12->guest_physical_address = fault->address; ++} ++ ++static bool nested_ept_ad_enabled(struct kvm_vcpu *vcpu) ++{ ++ return nested_ept_get_cr3(vcpu) & VMX_EPTP_AD_ENABLE_BIT; ++} ++ ++/* Callbacks for nested_ept_init_mmu_context: */ ++ ++static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu) ++{ ++ /* return the page table to be shadowed - in our case, EPT12 */ ++ return get_vmcs12(vcpu)->ept_pointer; ++} ++ ++static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu) ++{ ++ WARN_ON(mmu_is_nested(vcpu)); ++ if (!valid_ept_address(vcpu, nested_ept_get_cr3(vcpu))) ++ return 1; ++ ++ kvm_init_shadow_ept_mmu(vcpu, ++ to_vmx(vcpu)->nested.msrs.ept_caps & ++ VMX_EPT_EXECUTE_ONLY_BIT, ++ nested_ept_ad_enabled(vcpu), ++ nested_ept_get_cr3(vcpu)); ++ vcpu->arch.mmu.set_cr3 = vmx_set_cr3; ++ vcpu->arch.mmu.get_cr3 = nested_ept_get_cr3; ++ vcpu->arch.mmu.inject_page_fault = nested_ept_inject_page_fault; ++ ++ vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu; ++ return 0; ++} ++ ++static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu) ++{ ++ vcpu->arch.walk_mmu = &vcpu->arch.mmu; ++} ++ ++static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12, ++ u16 error_code) ++{ ++ bool inequality, bit; ++ ++ bit = (vmcs12->exception_bitmap & (1u << PF_VECTOR)) != 0; ++ inequality = ++ (error_code & vmcs12->page_fault_error_code_mask) != ++ vmcs12->page_fault_error_code_match; ++ return inequality ^ bit; ++} ++ ++static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu, ++ struct x86_exception *fault) ++{ ++ struct vmcs12 *vmcs12 = get_vmcs12(vcpu); ++ ++ WARN_ON(!is_guest_mode(vcpu)); ++ ++ if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code) && ++ !to_vmx(vcpu)->nested.nested_run_pending) { ++ vmcs12->vm_exit_intr_error_code = fault->error_code; ++ nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, ++ PF_VECTOR | INTR_TYPE_HARD_EXCEPTION | ++ INTR_INFO_DELIVER_CODE_MASK | INTR_INFO_VALID_MASK, ++ fault->address); ++ } else { ++ kvm_inject_page_fault(vcpu, fault); ++ } ++} ++ ++static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, ++ struct vmcs12 *vmcs12); ++ ++static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu) ++{ ++ struct vmcs12 *vmcs12 = get_vmcs12(vcpu); ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ struct page *page; ++ u64 hpa; ++ ++ if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { ++ /* ++ * Translate L1 physical address to host physical ++ * address for vmcs02. Keep the page pinned, so this ++ * physical address remains valid. We keep a reference ++ * to it so we can release it later. ++ */ ++ if (vmx->nested.apic_access_page) { /* shouldn't happen */ ++ kvm_release_page_dirty(vmx->nested.apic_access_page); ++ vmx->nested.apic_access_page = NULL; ++ } ++ page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->apic_access_addr); ++ /* ++ * If translation failed, no matter: This feature asks ++ * to exit when accessing the given address, and if it ++ * can never be accessed, this feature won't do ++ * anything anyway. ++ */ ++ if (!is_error_page(page)) { ++ vmx->nested.apic_access_page = page; ++ hpa = page_to_phys(vmx->nested.apic_access_page); ++ vmcs_write64(APIC_ACCESS_ADDR, hpa); ++ } else { ++ vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL, ++ SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES); ++ } ++ } ++ ++ if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) { ++ if (vmx->nested.virtual_apic_page) { /* shouldn't happen */ ++ kvm_release_page_dirty(vmx->nested.virtual_apic_page); ++ vmx->nested.virtual_apic_page = NULL; ++ } ++ page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->virtual_apic_page_addr); ++ ++ /* ++ * If translation failed, VM entry will fail because ++ * prepare_vmcs02 set VIRTUAL_APIC_PAGE_ADDR to -1ull. ++ * Failing the vm entry is _not_ what the processor ++ * does but it's basically the only possibility we ++ * have. We could still enter the guest if CR8 load ++ * exits are enabled, CR8 store exits are enabled, and ++ * virtualize APIC access is disabled; in this case ++ * the processor would never use the TPR shadow and we ++ * could simply clear the bit from the execution ++ * control. But such a configuration is useless, so ++ * let's keep the code simple. ++ */ ++ if (!is_error_page(page)) { ++ vmx->nested.virtual_apic_page = page; ++ hpa = page_to_phys(vmx->nested.virtual_apic_page); ++ vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, hpa); ++ } ++ } ++ ++ if (nested_cpu_has_posted_intr(vmcs12)) { ++ if (vmx->nested.pi_desc_page) { /* shouldn't happen */ ++ kunmap(vmx->nested.pi_desc_page); ++ kvm_release_page_dirty(vmx->nested.pi_desc_page); ++ vmx->nested.pi_desc_page = NULL; ++ vmx->nested.pi_desc = NULL; ++ vmcs_write64(POSTED_INTR_DESC_ADDR, -1ull); ++ } ++ page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->posted_intr_desc_addr); ++ if (is_error_page(page)) ++ return; ++ vmx->nested.pi_desc_page = page; ++ vmx->nested.pi_desc = kmap(vmx->nested.pi_desc_page); ++ vmx->nested.pi_desc = ++ (struct pi_desc *)((void *)vmx->nested.pi_desc + ++ (unsigned long)(vmcs12->posted_intr_desc_addr & ++ (PAGE_SIZE - 1))); ++ vmcs_write64(POSTED_INTR_DESC_ADDR, ++ page_to_phys(vmx->nested.pi_desc_page) + ++ (unsigned long)(vmcs12->posted_intr_desc_addr & ++ (PAGE_SIZE - 1))); ++ } ++ if (nested_vmx_prepare_msr_bitmap(vcpu, vmcs12)) ++ vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL, ++ CPU_BASED_USE_MSR_BITMAPS); ++ else ++ vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL, ++ CPU_BASED_USE_MSR_BITMAPS); ++} ++ ++static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu) ++{ ++ u64 preemption_timeout = get_vmcs12(vcpu)->vmx_preemption_timer_value; ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ ++ /* ++ * A timer value of zero is architecturally guaranteed to cause ++ * a VMExit prior to executing any instructions in the guest. ++ */ ++ if (preemption_timeout == 0) { ++ vmx_preemption_timer_fn(&vmx->nested.preemption_timer); ++ return; ++ } ++ ++ if (vcpu->arch.virtual_tsc_khz == 0) ++ return; ++ ++ preemption_timeout <<= VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE; ++ preemption_timeout *= 1000000; ++ do_div(preemption_timeout, vcpu->arch.virtual_tsc_khz); ++ hrtimer_start(&vmx->nested.preemption_timer, ++ ns_to_ktime(preemption_timeout), HRTIMER_MODE_REL); ++} ++ ++static int nested_vmx_check_io_bitmap_controls(struct kvm_vcpu *vcpu, ++ struct vmcs12 *vmcs12) ++{ ++ if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS)) ++ return 0; ++ ++ if (!page_address_valid(vcpu, vmcs12->io_bitmap_a) || ++ !page_address_valid(vcpu, vmcs12->io_bitmap_b)) ++ return -EINVAL; ++ ++ return 0; ++} ++ ++static int nested_vmx_check_msr_bitmap_controls(struct kvm_vcpu *vcpu, ++ struct vmcs12 *vmcs12) ++{ ++ if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS)) ++ return 0; ++ ++ if (!page_address_valid(vcpu, vmcs12->msr_bitmap)) ++ return -EINVAL; ++ ++ return 0; ++} ++ ++static int nested_vmx_check_tpr_shadow_controls(struct kvm_vcpu *vcpu, ++ struct vmcs12 *vmcs12) ++{ ++ if (!nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) ++ return 0; ++ ++ if (!page_address_valid(vcpu, vmcs12->virtual_apic_page_addr)) ++ return -EINVAL; ++ ++ return 0; ++} ++ ++static inline void enable_x2apic_msr_intercepts(unsigned long *msr_bitmap) { ++ int msr; ++ ++ for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) { ++ unsigned word = msr / BITS_PER_LONG; ++ ++ msr_bitmap[word] = ~0; ++ msr_bitmap[word + (0x800 / sizeof(long))] = ~0; ++ } ++} ++ ++/* ++ * Merge L0's and L1's MSR bitmap, return false to indicate that ++ * we do not use the hardware. ++ */ ++static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, ++ struct vmcs12 *vmcs12) ++{ ++ int msr; ++ struct page *page; ++ unsigned long *msr_bitmap_l1; ++ unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap; ++ /* ++ * pred_cmd & spec_ctrl are trying to verify two things: ++ * ++ * 1. L0 gave a permission to L1 to actually passthrough the MSR. This ++ * ensures that we do not accidentally generate an L02 MSR bitmap ++ * from the L12 MSR bitmap that is too permissive. ++ * 2. That L1 or L2s have actually used the MSR. This avoids ++ * unnecessarily merging of the bitmap if the MSR is unused. This ++ * works properly because we only update the L01 MSR bitmap lazily. ++ * So even if L0 should pass L1 these MSRs, the L01 bitmap is only ++ * updated to reflect this when L1 (or its L2s) actually write to ++ * the MSR. ++ */ ++ bool pred_cmd = !msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD); ++ bool spec_ctrl = !msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL); ++ ++ /* Nothing to do if the MSR bitmap is not in use. */ ++ if (!cpu_has_vmx_msr_bitmap() || ++ !nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS)) ++ return false; ++ ++ if (!nested_cpu_has_virt_x2apic_mode(vmcs12) && ++ !pred_cmd && !spec_ctrl) ++ return false; ++ ++ page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->msr_bitmap); ++ if (is_error_page(page)) ++ return false; ++ ++ msr_bitmap_l1 = (unsigned long *)kmap(page); ++ ++ /* ++ * To keep the control flow simple, pay eight 8-byte writes (sixteen ++ * 4-byte writes on 32-bit systems) up front to enable intercepts for ++ * the x2APIC MSR range and selectively disable them below. ++ */ ++ enable_x2apic_msr_intercepts(msr_bitmap_l0); ++ ++ if (nested_cpu_has_virt_x2apic_mode(vmcs12)) { ++ if (nested_cpu_has_apic_reg_virt(vmcs12)) { ++ /* ++ * L0 need not intercept reads for MSRs between 0x800 ++ * and 0x8ff, it just lets the processor take the value ++ * from the virtual-APIC page; take those 256 bits ++ * directly from the L1 bitmap. ++ */ ++ for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) { ++ unsigned word = msr / BITS_PER_LONG; ++ ++ msr_bitmap_l0[word] = msr_bitmap_l1[word]; ++ } ++ } ++ ++ nested_vmx_disable_intercept_for_msr( ++ msr_bitmap_l1, msr_bitmap_l0, ++ X2APIC_MSR(APIC_TASKPRI), ++ MSR_TYPE_R | MSR_TYPE_W); ++ ++ if (nested_cpu_has_vid(vmcs12)) { ++ nested_vmx_disable_intercept_for_msr( ++ msr_bitmap_l1, msr_bitmap_l0, ++ X2APIC_MSR(APIC_EOI), ++ MSR_TYPE_W); ++ nested_vmx_disable_intercept_for_msr( ++ msr_bitmap_l1, msr_bitmap_l0, ++ X2APIC_MSR(APIC_SELF_IPI), ++ MSR_TYPE_W); ++ } ++ } ++ ++ if (spec_ctrl) ++ nested_vmx_disable_intercept_for_msr( ++ msr_bitmap_l1, msr_bitmap_l0, ++ MSR_IA32_SPEC_CTRL, ++ MSR_TYPE_R | MSR_TYPE_W); ++ ++ if (pred_cmd) ++ nested_vmx_disable_intercept_for_msr( ++ msr_bitmap_l1, msr_bitmap_l0, ++ MSR_IA32_PRED_CMD, ++ MSR_TYPE_W); ++ ++ kunmap(page); ++ kvm_release_page_clean(page); ++ ++ return true; ++} ++ ++static void nested_cache_shadow_vmcs12(struct kvm_vcpu *vcpu, ++ struct vmcs12 *vmcs12) ++{ ++ struct vmcs12 *shadow; ++ struct page *page; ++ ++ if (!nested_cpu_has_shadow_vmcs(vmcs12) || ++ vmcs12->vmcs_link_pointer == -1ull) ++ return; ++ ++ shadow = get_shadow_vmcs12(vcpu); ++ page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->vmcs_link_pointer); ++ ++ memcpy(shadow, kmap(page), VMCS12_SIZE); ++ ++ kunmap(page); ++ kvm_release_page_clean(page); ++} ++ ++static void nested_flush_cached_shadow_vmcs12(struct kvm_vcpu *vcpu, ++ struct vmcs12 *vmcs12) ++{ ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ ++ if (!nested_cpu_has_shadow_vmcs(vmcs12) || ++ vmcs12->vmcs_link_pointer == -1ull) ++ return; ++ ++ kvm_write_guest(vmx->vcpu.kvm, vmcs12->vmcs_link_pointer, ++ get_shadow_vmcs12(vcpu), VMCS12_SIZE); ++} ++ ++static int nested_vmx_check_apic_access_controls(struct kvm_vcpu *vcpu, ++ struct vmcs12 *vmcs12) ++{ ++ if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) && ++ !page_address_valid(vcpu, vmcs12->apic_access_addr)) ++ return -EINVAL; ++ else ++ return 0; ++} ++ ++static int nested_vmx_check_apicv_controls(struct kvm_vcpu *vcpu, ++ struct vmcs12 *vmcs12) ++{ ++ if (!nested_cpu_has_virt_x2apic_mode(vmcs12) && ++ !nested_cpu_has_apic_reg_virt(vmcs12) && ++ !nested_cpu_has_vid(vmcs12) && ++ !nested_cpu_has_posted_intr(vmcs12)) ++ return 0; ++ ++ /* ++ * If virtualize x2apic mode is enabled, ++ * virtualize apic access must be disabled. ++ */ ++ if (nested_cpu_has_virt_x2apic_mode(vmcs12) && ++ nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) ++ return -EINVAL; ++ ++ /* ++ * If virtual interrupt delivery is enabled, ++ * we must exit on external interrupts. ++ */ ++ if (nested_cpu_has_vid(vmcs12) && ++ !nested_exit_on_intr(vcpu)) ++ return -EINVAL; ++ ++ /* ++ * bits 15:8 should be zero in posted_intr_nv, ++ * the descriptor address has been already checked ++ * in nested_get_vmcs12_pages. ++ * ++ * bits 5:0 of posted_intr_desc_addr should be zero. ++ */ ++ if (nested_cpu_has_posted_intr(vmcs12) && ++ (!nested_cpu_has_vid(vmcs12) || ++ !nested_exit_intr_ack_set(vcpu) || ++ (vmcs12->posted_intr_nv & 0xff00) || ++ (vmcs12->posted_intr_desc_addr & 0x3f) || ++ (vmcs12->posted_intr_desc_addr >> cpuid_maxphyaddr(vcpu)))) ++ return -EINVAL; ++ ++ /* tpr shadow is needed by all apicv features. */ ++ if (!nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) ++ return -EINVAL; ++ ++ return 0; ++} ++ ++static int nested_vmx_check_msr_switch(struct kvm_vcpu *vcpu, ++ unsigned long count_field, ++ unsigned long addr_field) ++{ ++ struct vmcs12 *vmcs12 = get_vmcs12(vcpu); ++ int maxphyaddr; ++ u64 count, addr; ++ ++ if (vmcs12_read_any(vmcs12, count_field, &count) || ++ vmcs12_read_any(vmcs12, addr_field, &addr)) { ++ WARN_ON(1); ++ return -EINVAL; ++ } ++ if (count == 0) ++ return 0; ++ maxphyaddr = cpuid_maxphyaddr(vcpu); ++ if (!IS_ALIGNED(addr, 16) || addr >> maxphyaddr || ++ (addr + count * sizeof(struct vmx_msr_entry) - 1) >> maxphyaddr) { ++ pr_debug_ratelimited( ++ "nVMX: invalid MSR switch (0x%lx, %d, %llu, 0x%08llx)", ++ addr_field, maxphyaddr, count, addr); ++ return -EINVAL; ++ } ++ return 0; ++} ++ ++static int nested_vmx_check_msr_switch_controls(struct kvm_vcpu *vcpu, ++ struct vmcs12 *vmcs12) ++{ ++ if (vmcs12->vm_exit_msr_load_count == 0 && ++ vmcs12->vm_exit_msr_store_count == 0 && ++ vmcs12->vm_entry_msr_load_count == 0) ++ return 0; /* Fast path */ ++ if (nested_vmx_check_msr_switch(vcpu, VM_EXIT_MSR_LOAD_COUNT, ++ VM_EXIT_MSR_LOAD_ADDR) || ++ nested_vmx_check_msr_switch(vcpu, VM_EXIT_MSR_STORE_COUNT, ++ VM_EXIT_MSR_STORE_ADDR) || ++ nested_vmx_check_msr_switch(vcpu, VM_ENTRY_MSR_LOAD_COUNT, ++ VM_ENTRY_MSR_LOAD_ADDR)) ++ return -EINVAL; ++ return 0; ++} ++ ++static int nested_vmx_check_pml_controls(struct kvm_vcpu *vcpu, ++ struct vmcs12 *vmcs12) ++{ ++ u64 address = vmcs12->pml_address; ++ int maxphyaddr = cpuid_maxphyaddr(vcpu); ++ ++ if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_PML)) { ++ if (!nested_cpu_has_ept(vmcs12) || ++ !IS_ALIGNED(address, 4096) || ++ address >> maxphyaddr) ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ ++static int nested_vmx_check_shadow_vmcs_controls(struct kvm_vcpu *vcpu, ++ struct vmcs12 *vmcs12) ++{ ++ if (!nested_cpu_has_shadow_vmcs(vmcs12)) ++ return 0; ++ ++ if (!page_address_valid(vcpu, vmcs12->vmread_bitmap) || ++ !page_address_valid(vcpu, vmcs12->vmwrite_bitmap)) ++ return -EINVAL; ++ ++ return 0; ++} ++ ++static int nested_vmx_msr_check_common(struct kvm_vcpu *vcpu, ++ struct vmx_msr_entry *e) ++{ ++ /* x2APIC MSR accesses are not allowed */ ++ if (vcpu->arch.apic_base & X2APIC_ENABLE && e->index >> 8 == 0x8) ++ return -EINVAL; ++ if (e->index == MSR_IA32_UCODE_WRITE || /* SDM Table 35-2 */ ++ e->index == MSR_IA32_UCODE_REV) ++ return -EINVAL; ++ if (e->reserved != 0) ++ return -EINVAL; ++ return 0; ++} ++ ++static int nested_vmx_load_msr_check(struct kvm_vcpu *vcpu, ++ struct vmx_msr_entry *e) ++{ ++ if (e->index == MSR_FS_BASE || ++ e->index == MSR_GS_BASE || ++ e->index == MSR_IA32_SMM_MONITOR_CTL || /* SMM is not supported */ ++ nested_vmx_msr_check_common(vcpu, e)) ++ return -EINVAL; ++ return 0; ++} ++ ++static int nested_vmx_store_msr_check(struct kvm_vcpu *vcpu, ++ struct vmx_msr_entry *e) ++{ ++ if (e->index == MSR_IA32_SMBASE || /* SMM is not supported */ ++ nested_vmx_msr_check_common(vcpu, e)) ++ return -EINVAL; ++ return 0; ++} ++ ++/* ++ * Load guest's/host's msr at nested entry/exit. ++ * return 0 for success, entry index for failure. ++ */ ++static u32 nested_vmx_load_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count) ++{ ++ u32 i; ++ struct vmx_msr_entry e; ++ struct msr_data msr; ++ ++ msr.host_initiated = false; ++ for (i = 0; i < count; i++) { ++ if (kvm_vcpu_read_guest(vcpu, gpa + i * sizeof(e), ++ &e, sizeof(e))) { ++ pr_debug_ratelimited( ++ "%s cannot read MSR entry (%u, 0x%08llx)\n", ++ __func__, i, gpa + i * sizeof(e)); ++ goto fail; ++ } ++ if (nested_vmx_load_msr_check(vcpu, &e)) { ++ pr_debug_ratelimited( ++ "%s check failed (%u, 0x%x, 0x%x)\n", ++ __func__, i, e.index, e.reserved); ++ goto fail; ++ } ++ msr.index = e.index; ++ msr.data = e.value; ++ if (kvm_set_msr(vcpu, &msr)) { ++ pr_debug_ratelimited( ++ "%s cannot write MSR (%u, 0x%x, 0x%llx)\n", ++ __func__, i, e.index, e.value); ++ goto fail; ++ } ++ } ++ return 0; ++fail: ++ return i + 1; ++} ++ ++static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count) ++{ ++ u32 i; ++ struct vmx_msr_entry e; ++ ++ for (i = 0; i < count; i++) { ++ struct msr_data msr_info; ++ if (kvm_vcpu_read_guest(vcpu, ++ gpa + i * sizeof(e), ++ &e, 2 * sizeof(u32))) { ++ pr_debug_ratelimited( ++ "%s cannot read MSR entry (%u, 0x%08llx)\n", ++ __func__, i, gpa + i * sizeof(e)); ++ return -EINVAL; ++ } ++ if (nested_vmx_store_msr_check(vcpu, &e)) { ++ pr_debug_ratelimited( ++ "%s check failed (%u, 0x%x, 0x%x)\n", ++ __func__, i, e.index, e.reserved); ++ return -EINVAL; ++ } ++ msr_info.host_initiated = false; ++ msr_info.index = e.index; ++ if (kvm_get_msr(vcpu, &msr_info)) { ++ pr_debug_ratelimited( ++ "%s cannot read MSR (%u, 0x%x)\n", ++ __func__, i, e.index); ++ return -EINVAL; ++ } ++ if (kvm_vcpu_write_guest(vcpu, ++ gpa + i * sizeof(e) + ++ offsetof(struct vmx_msr_entry, value), ++ &msr_info.data, sizeof(msr_info.data))) { ++ pr_debug_ratelimited( ++ "%s cannot write MSR (%u, 0x%x, 0x%llx)\n", ++ __func__, i, e.index, msr_info.data); ++ return -EINVAL; ++ } ++ } ++ return 0; ++} ++ ++static bool nested_cr3_valid(struct kvm_vcpu *vcpu, unsigned long val) ++{ ++ unsigned long invalid_mask; ++ ++ invalid_mask = (~0ULL) << cpuid_maxphyaddr(vcpu); ++ return (val & invalid_mask) == 0; ++} ++ ++/* ++ * Load guest's/host's cr3 at nested entry/exit. nested_ept is true if we are ++ * emulating VM entry into a guest with EPT enabled. ++ * Returns 0 on success, 1 on failure. Invalid state exit qualification code ++ * is assigned to entry_failure_code on failure. ++ */ ++static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool nested_ept, ++ u32 *entry_failure_code) ++{ ++ if (cr3 != kvm_read_cr3(vcpu) || (!nested_ept && pdptrs_changed(vcpu))) { ++ if (!nested_cr3_valid(vcpu, cr3)) { ++ *entry_failure_code = ENTRY_FAIL_DEFAULT; ++ return 1; ++ } ++ ++ /* ++ * If PAE paging and EPT are both on, CR3 is not used by the CPU and ++ * must not be dereferenced. ++ */ ++ if (is_pae_paging(vcpu) && !nested_ept) { ++ if (!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)) { ++ *entry_failure_code = ENTRY_FAIL_PDPTE; ++ return 1; ++ } ++ } ++ } ++ ++ if (!nested_ept) ++ kvm_mmu_new_cr3(vcpu, cr3, false); ++ ++ vcpu->arch.cr3 = cr3; ++ __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); ++ ++ kvm_init_mmu(vcpu, false); ++ ++ return 0; ++} ++ ++static void prepare_vmcs02_full(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) ++{ ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ ++ vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector); ++ vmcs_write16(GUEST_SS_SELECTOR, vmcs12->guest_ss_selector); ++ vmcs_write16(GUEST_DS_SELECTOR, vmcs12->guest_ds_selector); ++ vmcs_write16(GUEST_FS_SELECTOR, vmcs12->guest_fs_selector); ++ vmcs_write16(GUEST_GS_SELECTOR, vmcs12->guest_gs_selector); ++ vmcs_write16(GUEST_LDTR_SELECTOR, vmcs12->guest_ldtr_selector); ++ vmcs_write16(GUEST_TR_SELECTOR, vmcs12->guest_tr_selector); ++ vmcs_write32(GUEST_ES_LIMIT, vmcs12->guest_es_limit); ++ vmcs_write32(GUEST_SS_LIMIT, vmcs12->guest_ss_limit); ++ vmcs_write32(GUEST_DS_LIMIT, vmcs12->guest_ds_limit); ++ vmcs_write32(GUEST_FS_LIMIT, vmcs12->guest_fs_limit); ++ vmcs_write32(GUEST_GS_LIMIT, vmcs12->guest_gs_limit); ++ vmcs_write32(GUEST_LDTR_LIMIT, vmcs12->guest_ldtr_limit); ++ vmcs_write32(GUEST_TR_LIMIT, vmcs12->guest_tr_limit); ++ vmcs_write32(GUEST_GDTR_LIMIT, vmcs12->guest_gdtr_limit); ++ vmcs_write32(GUEST_IDTR_LIMIT, vmcs12->guest_idtr_limit); ++ vmcs_write32(GUEST_ES_AR_BYTES, vmcs12->guest_es_ar_bytes); ++ vmcs_write32(GUEST_SS_AR_BYTES, vmcs12->guest_ss_ar_bytes); ++ vmcs_write32(GUEST_DS_AR_BYTES, vmcs12->guest_ds_ar_bytes); ++ vmcs_write32(GUEST_FS_AR_BYTES, vmcs12->guest_fs_ar_bytes); ++ vmcs_write32(GUEST_GS_AR_BYTES, vmcs12->guest_gs_ar_bytes); ++ vmcs_write32(GUEST_LDTR_AR_BYTES, vmcs12->guest_ldtr_ar_bytes); ++ vmcs_write32(GUEST_TR_AR_BYTES, vmcs12->guest_tr_ar_bytes); ++ vmcs_writel(GUEST_SS_BASE, vmcs12->guest_ss_base); ++ vmcs_writel(GUEST_DS_BASE, vmcs12->guest_ds_base); ++ vmcs_writel(GUEST_FS_BASE, vmcs12->guest_fs_base); ++ vmcs_writel(GUEST_GS_BASE, vmcs12->guest_gs_base); ++ vmcs_writel(GUEST_LDTR_BASE, vmcs12->guest_ldtr_base); ++ vmcs_writel(GUEST_TR_BASE, vmcs12->guest_tr_base); ++ vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base); ++ vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base); ++ ++ vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs); ++ vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, ++ vmcs12->guest_pending_dbg_exceptions); ++ vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp); ++ vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->guest_sysenter_eip); ++ ++ if (nested_cpu_has_xsaves(vmcs12)) ++ vmcs_write64(XSS_EXIT_BITMAP, vmcs12->xss_exit_bitmap); ++ vmcs_write64(VMCS_LINK_POINTER, -1ull); ++ ++ if (cpu_has_vmx_posted_intr()) ++ vmcs_write16(POSTED_INTR_NV, POSTED_INTR_NESTED_VECTOR); ++ ++ /* ++ * Whether page-faults are trapped is determined by a combination of ++ * 3 settings: PFEC_MASK, PFEC_MATCH and EXCEPTION_BITMAP.PF. ++ * If enable_ept, L0 doesn't care about page faults and we should ++ * set all of these to L1's desires. However, if !enable_ept, L0 does ++ * care about (at least some) page faults, and because it is not easy ++ * (if at all possible?) to merge L0 and L1's desires, we simply ask ++ * to exit on each and every L2 page fault. This is done by setting ++ * MASK=MATCH=0 and (see below) EB.PF=1. ++ * Note that below we don't need special code to set EB.PF beyond the ++ * "or"ing of the EB of vmcs01 and vmcs12, because when enable_ept, ++ * vmcs01's EB.PF is 0 so the "or" will take vmcs12's value, and when ++ * !enable_ept, EB.PF is 1, so the "or" will always be 1. ++ */ ++ vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, ++ enable_ept ? vmcs12->page_fault_error_code_mask : 0); ++ vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, ++ enable_ept ? vmcs12->page_fault_error_code_match : 0); ++ ++ /* All VMFUNCs are currently emulated through L0 vmexits. */ ++ if (cpu_has_vmx_vmfunc()) ++ vmcs_write64(VM_FUNCTION_CONTROL, 0); ++ ++ if (cpu_has_vmx_apicv()) { ++ vmcs_write64(EOI_EXIT_BITMAP0, vmcs12->eoi_exit_bitmap0); ++ vmcs_write64(EOI_EXIT_BITMAP1, vmcs12->eoi_exit_bitmap1); ++ vmcs_write64(EOI_EXIT_BITMAP2, vmcs12->eoi_exit_bitmap2); ++ vmcs_write64(EOI_EXIT_BITMAP3, vmcs12->eoi_exit_bitmap3); ++ } ++ ++ /* ++ * Set host-state according to L0's settings (vmcs12 is irrelevant here) ++ * Some constant fields are set here by vmx_set_constant_host_state(). ++ * Other fields are different per CPU, and will be set later when ++ * vmx_vcpu_load() is called, and when vmx_prepare_switch_to_guest() ++ * is called. ++ */ ++ vmx_set_constant_host_state(vmx); ++ ++ /* ++ * Set the MSR load/store lists to match L0's settings. ++ */ ++ vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0); ++ vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr); ++ vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host.val)); ++ vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr); ++ vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest.val)); ++ ++ set_cr4_guest_host_mask(vmx); ++ ++ if (kvm_mpx_supported()) { ++ if (vmx->nested.nested_run_pending && ++ (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)) ++ vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs); ++ else ++ vmcs_write64(GUEST_BNDCFGS, vmx->nested.vmcs01_guest_bndcfgs); ++ } ++ ++ if (enable_vpid) { ++ if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02) ++ vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->nested.vpid02); ++ else ++ vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); ++ } ++ ++ /* ++ * L1 may access the L2's PDPTR, so save them to construct vmcs12 ++ */ ++ if (enable_ept) { ++ vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0); ++ vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1); ++ vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2); ++ vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3); ++ } ++ ++ if (cpu_has_vmx_msr_bitmap()) ++ vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap)); ++} ++ ++/* ++ * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested ++ * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it ++ * with L0's requirements for its guest (a.k.a. vmcs01), so we can run the L2 ++ * guest in a way that will both be appropriate to L1's requests, and our ++ * needs. In addition to modifying the active vmcs (which is vmcs02), this ++ * function also has additional necessary side-effects, like setting various ++ * vcpu->arch fields. ++ * Returns 0 on success, 1 on failure. Invalid state exit qualification code ++ * is assigned to entry_failure_code on failure. ++ */ ++static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, ++ u32 *entry_failure_code) ++{ ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ u32 exec_control, vmcs12_exec_ctrl; ++ ++ if (vmx->nested.dirty_vmcs12) { ++ prepare_vmcs02_full(vcpu, vmcs12); ++ vmx->nested.dirty_vmcs12 = false; ++ } ++ ++ /* ++ * First, the fields that are shadowed. This must be kept in sync ++ * with vmx_shadow_fields.h. ++ */ ++ ++ vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector); ++ vmcs_write32(GUEST_CS_LIMIT, vmcs12->guest_cs_limit); ++ vmcs_write32(GUEST_CS_AR_BYTES, vmcs12->guest_cs_ar_bytes); ++ vmcs_writel(GUEST_ES_BASE, vmcs12->guest_es_base); ++ vmcs_writel(GUEST_CS_BASE, vmcs12->guest_cs_base); ++ ++ if (vmx->nested.nested_run_pending && ++ (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) { ++ kvm_set_dr(vcpu, 7, vmcs12->guest_dr7); ++ vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl); ++ } else { ++ kvm_set_dr(vcpu, 7, vcpu->arch.dr7); ++ vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.vmcs01_debugctl); ++ } ++ if (vmx->nested.nested_run_pending) { ++ vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, ++ vmcs12->vm_entry_intr_info_field); ++ vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, ++ vmcs12->vm_entry_exception_error_code); ++ vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, ++ vmcs12->vm_entry_instruction_len); ++ vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, ++ vmcs12->guest_interruptibility_info); ++ vmx->loaded_vmcs->nmi_known_unmasked = ++ !(vmcs12->guest_interruptibility_info & GUEST_INTR_STATE_NMI); ++ } else { ++ vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); ++ } ++ vmx_set_rflags(vcpu, vmcs12->guest_rflags); ++ ++ exec_control = vmcs12->pin_based_vm_exec_control; ++ ++ /* Preemption timer setting is computed directly in vmx_vcpu_run. */ ++ exec_control |= vmcs_config.pin_based_exec_ctrl; ++ exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER; ++ vmx->loaded_vmcs->hv_timer_armed = false; ++ ++ /* Posted interrupts setting is only taken from vmcs12. */ ++ if (nested_cpu_has_posted_intr(vmcs12)) { ++ vmx->nested.posted_intr_nv = vmcs12->posted_intr_nv; ++ vmx->nested.pi_pending = false; ++ } else { ++ exec_control &= ~PIN_BASED_POSTED_INTR; ++ } ++ ++ vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, exec_control); ++ ++ vmx->nested.preemption_timer_expired = false; ++ if (nested_cpu_has_preemption_timer(vmcs12)) ++ vmx_start_preemption_timer(vcpu); ++ ++ if (cpu_has_secondary_exec_ctrls()) { ++ exec_control = vmx->secondary_exec_control; ++ ++ /* Take the following fields only from vmcs12 */ ++ exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | ++ SECONDARY_EXEC_ENABLE_INVPCID | ++ SECONDARY_EXEC_RDTSCP | ++ SECONDARY_EXEC_XSAVES | ++ SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | ++ SECONDARY_EXEC_APIC_REGISTER_VIRT | ++ SECONDARY_EXEC_ENABLE_VMFUNC); ++ if (nested_cpu_has(vmcs12, ++ CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) { ++ vmcs12_exec_ctrl = vmcs12->secondary_vm_exec_control & ++ ~SECONDARY_EXEC_ENABLE_PML; ++ exec_control |= vmcs12_exec_ctrl; ++ } ++ ++ /* VMCS shadowing for L2 is emulated for now */ ++ exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS; ++ ++ if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) ++ vmcs_write16(GUEST_INTR_STATUS, ++ vmcs12->guest_intr_status); ++ ++ /* ++ * Write an illegal value to APIC_ACCESS_ADDR. Later, ++ * nested_get_vmcs12_pages will either fix it up or ++ * remove the VM execution control. ++ */ ++ if (exec_control & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) ++ vmcs_write64(APIC_ACCESS_ADDR, -1ull); ++ ++ if (exec_control & SECONDARY_EXEC_ENCLS_EXITING) ++ vmcs_write64(ENCLS_EXITING_BITMAP, -1ull); ++ ++ vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); ++ } ++ ++ /* ++ * HOST_RSP is normally set correctly in vmx_vcpu_run() just before ++ * entry, but only if the current (host) sp changed from the value ++ * we wrote last (vmx->host_rsp). This cache is no longer relevant ++ * if we switch vmcs, and rather than hold a separate cache per vmcs, ++ * here we just force the write to happen on entry. ++ */ ++ vmx->host_rsp = 0; ++ ++ exec_control = vmx_exec_control(vmx); /* L0's desires */ ++ exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING; ++ exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING; ++ exec_control &= ~CPU_BASED_TPR_SHADOW; ++ exec_control |= vmcs12->cpu_based_vm_exec_control; ++ ++ /* ++ * Write an illegal value to VIRTUAL_APIC_PAGE_ADDR. Later, if ++ * nested_get_vmcs12_pages can't fix it up, the illegal value ++ * will result in a VM entry failure. ++ */ ++ if (exec_control & CPU_BASED_TPR_SHADOW) { ++ vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, -1ull); ++ vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold); ++ } else { ++#ifdef CONFIG_X86_64 ++ exec_control |= CPU_BASED_CR8_LOAD_EXITING | ++ CPU_BASED_CR8_STORE_EXITING; ++#endif ++ } ++ ++ /* ++ * A vmexit (to either L1 hypervisor or L0 userspace) is always needed ++ * for I/O port accesses. ++ */ ++ exec_control &= ~CPU_BASED_USE_IO_BITMAPS; ++ exec_control |= CPU_BASED_UNCOND_IO_EXITING; ++ ++ vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, exec_control); ++ ++ /* EXCEPTION_BITMAP and CR0_GUEST_HOST_MASK should basically be the ++ * bitwise-or of what L1 wants to trap for L2, and what we want to ++ * trap. Note that CR0.TS also needs updating - we do this later. ++ */ ++ update_exception_bitmap(vcpu); ++ vcpu->arch.cr0_guest_owned_bits &= ~vmcs12->cr0_guest_host_mask; ++ vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits); ++ ++ /* L2->L1 exit controls are emulated - the hardware exit is to L0 so ++ * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER ++ * bits are further modified by vmx_set_efer() below. ++ */ ++ vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl); ++ ++ /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are ++ * emulated by vmx_set_efer(), below. ++ */ ++ vm_entry_controls_init(vmx, ++ (vmcs12->vm_entry_controls & ~VM_ENTRY_LOAD_IA32_EFER & ++ ~VM_ENTRY_IA32E_MODE) | ++ (vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE)); ++ ++ if (vmx->nested.nested_run_pending && ++ (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT)) { ++ vmcs_write64(GUEST_IA32_PAT, vmcs12->guest_ia32_pat); ++ vcpu->arch.pat = vmcs12->guest_ia32_pat; ++ } else if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { ++ vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat); ++ } ++ ++ vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset); ++ ++ if (kvm_has_tsc_control) ++ decache_tsc_multiplier(vmx); ++ ++ if (enable_vpid) { ++ /* ++ * There is no direct mapping between vpid02 and vpid12, the ++ * vpid02 is per-vCPU for L0 and reused while the value of ++ * vpid12 is changed w/ one invvpid during nested vmentry. ++ * The vpid12 is allocated by L1 for L2, so it will not ++ * influence global bitmap(for vpid01 and vpid02 allocation) ++ * even if spawn a lot of nested vCPUs. ++ */ ++ if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02) { ++ if (vmcs12->virtual_processor_id != vmx->nested.last_vpid) { ++ vmx->nested.last_vpid = vmcs12->virtual_processor_id; ++ __vmx_flush_tlb(vcpu, vmx->nested.vpid02, true); ++ } ++ } else { ++ vmx_flush_tlb(vcpu, true); ++ } ++ } ++ ++ if (enable_pml) { ++ /* ++ * Conceptually we want to copy the PML address and index from ++ * vmcs01 here, and then back to vmcs01 on nested vmexit. But, ++ * since we always flush the log on each vmexit, this happens ++ * to be equivalent to simply resetting the fields in vmcs02. ++ */ ++ ASSERT(vmx->pml_pg); ++ vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg)); ++ vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1); ++ } ++ ++ if (nested_cpu_has_ept(vmcs12)) { ++ if (nested_ept_init_mmu_context(vcpu)) { ++ *entry_failure_code = ENTRY_FAIL_DEFAULT; ++ return 1; ++ } ++ } else if (nested_cpu_has2(vmcs12, ++ SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { ++ vmx_flush_tlb(vcpu, true); ++ } ++ ++ /* ++ * This sets GUEST_CR0 to vmcs12->guest_cr0, possibly modifying those ++ * bits which we consider mandatory enabled. ++ * The CR0_READ_SHADOW is what L2 should have expected to read given ++ * the specifications by L1; It's not enough to take ++ * vmcs12->cr0_read_shadow because on our cr0_guest_host_mask we we ++ * have more bits than L1 expected. ++ */ ++ vmx_set_cr0(vcpu, vmcs12->guest_cr0); ++ vmcs_writel(CR0_READ_SHADOW, nested_read_cr0(vmcs12)); ++ ++ vmx_set_cr4(vcpu, vmcs12->guest_cr4); ++ vmcs_writel(CR4_READ_SHADOW, nested_read_cr4(vmcs12)); ++ ++ if (vmx->nested.nested_run_pending && ++ (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER)) ++ vcpu->arch.efer = vmcs12->guest_ia32_efer; ++ else if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) ++ vcpu->arch.efer |= (EFER_LMA | EFER_LME); ++ else ++ vcpu->arch.efer &= ~(EFER_LMA | EFER_LME); ++ /* Note: modifies VM_ENTRY/EXIT_CONTROLS and GUEST/HOST_IA32_EFER */ ++ vmx_set_efer(vcpu, vcpu->arch.efer); ++ ++ /* ++ * Guest state is invalid and unrestricted guest is disabled, ++ * which means L1 attempted VMEntry to L2 with invalid state. ++ * Fail the VMEntry. ++ */ ++ if (vmx->emulation_required) { ++ *entry_failure_code = ENTRY_FAIL_DEFAULT; ++ return 1; ++ } ++ ++ /* Shadow page tables on either EPT or shadow page tables. */ ++ if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_cpu_has_ept(vmcs12), ++ entry_failure_code)) ++ return 1; ++ ++ if (!enable_ept) ++ vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested; ++ ++ kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->guest_rsp); ++ kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->guest_rip); ++ return 0; ++} ++ ++static int nested_vmx_check_nmi_controls(struct vmcs12 *vmcs12) ++{ ++ if (!nested_cpu_has_nmi_exiting(vmcs12) && ++ nested_cpu_has_virtual_nmis(vmcs12)) ++ return -EINVAL; ++ ++ if (!nested_cpu_has_virtual_nmis(vmcs12) && ++ nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_NMI_PENDING)) ++ return -EINVAL; ++ ++ return 0; ++} ++ ++static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) ++{ ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ ++ if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE && ++ vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT) ++ return VMXERR_ENTRY_INVALID_CONTROL_FIELD; ++ ++ if (nested_cpu_has_vpid(vmcs12) && !vmcs12->virtual_processor_id) ++ return VMXERR_ENTRY_INVALID_CONTROL_FIELD; ++ ++ if (nested_vmx_check_io_bitmap_controls(vcpu, vmcs12)) ++ return VMXERR_ENTRY_INVALID_CONTROL_FIELD; ++ ++ if (nested_vmx_check_msr_bitmap_controls(vcpu, vmcs12)) ++ return VMXERR_ENTRY_INVALID_CONTROL_FIELD; ++ ++ if (nested_vmx_check_apic_access_controls(vcpu, vmcs12)) ++ return VMXERR_ENTRY_INVALID_CONTROL_FIELD; ++ ++ if (nested_vmx_check_tpr_shadow_controls(vcpu, vmcs12)) ++ return VMXERR_ENTRY_INVALID_CONTROL_FIELD; ++ ++ if (nested_vmx_check_apicv_controls(vcpu, vmcs12)) ++ return VMXERR_ENTRY_INVALID_CONTROL_FIELD; ++ ++ if (nested_vmx_check_msr_switch_controls(vcpu, vmcs12)) ++ return VMXERR_ENTRY_INVALID_CONTROL_FIELD; ++ ++ if (nested_vmx_check_pml_controls(vcpu, vmcs12)) ++ return VMXERR_ENTRY_INVALID_CONTROL_FIELD; ++ ++ if (nested_vmx_check_shadow_vmcs_controls(vcpu, vmcs12)) ++ return VMXERR_ENTRY_INVALID_CONTROL_FIELD; ++ ++ if (!vmx_control_verify(vmcs12->cpu_based_vm_exec_control, ++ vmx->nested.msrs.procbased_ctls_low, ++ vmx->nested.msrs.procbased_ctls_high) || ++ (nested_cpu_has(vmcs12, CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) && ++ !vmx_control_verify(vmcs12->secondary_vm_exec_control, ++ vmx->nested.msrs.secondary_ctls_low, ++ vmx->nested.msrs.secondary_ctls_high)) || ++ !vmx_control_verify(vmcs12->pin_based_vm_exec_control, ++ vmx->nested.msrs.pinbased_ctls_low, ++ vmx->nested.msrs.pinbased_ctls_high) || ++ !vmx_control_verify(vmcs12->vm_exit_controls, ++ vmx->nested.msrs.exit_ctls_low, ++ vmx->nested.msrs.exit_ctls_high) || ++ !vmx_control_verify(vmcs12->vm_entry_controls, ++ vmx->nested.msrs.entry_ctls_low, ++ vmx->nested.msrs.entry_ctls_high)) ++ return VMXERR_ENTRY_INVALID_CONTROL_FIELD; ++ ++ if (nested_vmx_check_nmi_controls(vmcs12)) ++ return VMXERR_ENTRY_INVALID_CONTROL_FIELD; ++ ++ if (nested_cpu_has_vmfunc(vmcs12)) { ++ if (vmcs12->vm_function_control & ++ ~vmx->nested.msrs.vmfunc_controls) ++ return VMXERR_ENTRY_INVALID_CONTROL_FIELD; ++ ++ if (nested_cpu_has_eptp_switching(vmcs12)) { ++ if (!nested_cpu_has_ept(vmcs12) || ++ !page_address_valid(vcpu, vmcs12->eptp_list_address)) ++ return VMXERR_ENTRY_INVALID_CONTROL_FIELD; ++ } ++ } ++ ++ if (vmcs12->cr3_target_count > nested_cpu_vmx_misc_cr3_count(vcpu)) ++ return VMXERR_ENTRY_INVALID_CONTROL_FIELD; ++ ++ if (!nested_host_cr0_valid(vcpu, vmcs12->host_cr0) || ++ !nested_host_cr4_valid(vcpu, vmcs12->host_cr4) || ++ !nested_cr3_valid(vcpu, vmcs12->host_cr3)) ++ return VMXERR_ENTRY_INVALID_HOST_STATE_FIELD; ++ ++ /* ++ * From the Intel SDM, volume 3: ++ * Fields relevant to VM-entry event injection must be set properly. ++ * These fields are the VM-entry interruption-information field, the ++ * VM-entry exception error code, and the VM-entry instruction length. ++ */ ++ if (vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK) { ++ u32 intr_info = vmcs12->vm_entry_intr_info_field; ++ u8 vector = intr_info & INTR_INFO_VECTOR_MASK; ++ u32 intr_type = intr_info & INTR_INFO_INTR_TYPE_MASK; ++ bool has_error_code = intr_info & INTR_INFO_DELIVER_CODE_MASK; ++ bool should_have_error_code; ++ bool urg = nested_cpu_has2(vmcs12, ++ SECONDARY_EXEC_UNRESTRICTED_GUEST); ++ bool prot_mode = !urg || vmcs12->guest_cr0 & X86_CR0_PE; ++ ++ /* VM-entry interruption-info field: interruption type */ ++ if (intr_type == INTR_TYPE_RESERVED || ++ (intr_type == INTR_TYPE_OTHER_EVENT && ++ !nested_cpu_supports_monitor_trap_flag(vcpu))) ++ return VMXERR_ENTRY_INVALID_CONTROL_FIELD; ++ ++ /* VM-entry interruption-info field: vector */ ++ if ((intr_type == INTR_TYPE_NMI_INTR && vector != NMI_VECTOR) || ++ (intr_type == INTR_TYPE_HARD_EXCEPTION && vector > 31) || ++ (intr_type == INTR_TYPE_OTHER_EVENT && vector != 0)) ++ return VMXERR_ENTRY_INVALID_CONTROL_FIELD; ++ ++ /* VM-entry interruption-info field: deliver error code */ ++ should_have_error_code = ++ intr_type == INTR_TYPE_HARD_EXCEPTION && prot_mode && ++ x86_exception_has_error_code(vector); ++ if (has_error_code != should_have_error_code) ++ return VMXERR_ENTRY_INVALID_CONTROL_FIELD; ++ ++ /* VM-entry exception error code */ ++ if (has_error_code && ++ vmcs12->vm_entry_exception_error_code & GENMASK(31, 16)) ++ return VMXERR_ENTRY_INVALID_CONTROL_FIELD; ++ ++ /* VM-entry interruption-info field: reserved bits */ ++ if (intr_info & INTR_INFO_RESVD_BITS_MASK) ++ return VMXERR_ENTRY_INVALID_CONTROL_FIELD; ++ ++ /* VM-entry instruction length */ ++ switch (intr_type) { ++ case INTR_TYPE_SOFT_EXCEPTION: ++ case INTR_TYPE_SOFT_INTR: ++ case INTR_TYPE_PRIV_SW_EXCEPTION: ++ if ((vmcs12->vm_entry_instruction_len > 15) || ++ (vmcs12->vm_entry_instruction_len == 0 && ++ !nested_cpu_has_zero_length_injection(vcpu))) ++ return VMXERR_ENTRY_INVALID_CONTROL_FIELD; ++ } ++ } ++ ++ return 0; ++} ++ ++static int nested_vmx_check_vmcs_link_ptr(struct kvm_vcpu *vcpu, ++ struct vmcs12 *vmcs12) ++{ ++ int r; ++ struct page *page; ++ struct vmcs12 *shadow; ++ ++ if (vmcs12->vmcs_link_pointer == -1ull) ++ return 0; ++ ++ if (!page_address_valid(vcpu, vmcs12->vmcs_link_pointer)) ++ return -EINVAL; ++ ++ page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->vmcs_link_pointer); ++ if (is_error_page(page)) ++ return -EINVAL; ++ ++ r = 0; ++ shadow = kmap(page); ++ if (shadow->hdr.revision_id != VMCS12_REVISION || ++ shadow->hdr.shadow_vmcs != nested_cpu_has_shadow_vmcs(vmcs12)) ++ r = -EINVAL; ++ kunmap(page); ++ kvm_release_page_clean(page); ++ return r; ++} ++ ++static int check_vmentry_postreqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, ++ u32 *exit_qual) ++{ ++ bool ia32e; ++ ++ *exit_qual = ENTRY_FAIL_DEFAULT; ++ ++ if (!nested_guest_cr0_valid(vcpu, vmcs12->guest_cr0) || ++ !nested_guest_cr4_valid(vcpu, vmcs12->guest_cr4)) ++ return 1; ++ ++ if (nested_vmx_check_vmcs_link_ptr(vcpu, vmcs12)) { ++ *exit_qual = ENTRY_FAIL_VMCS_LINK_PTR; ++ return 1; ++ } ++ ++ /* ++ * If the load IA32_EFER VM-entry control is 1, the following checks ++ * are performed on the field for the IA32_EFER MSR: ++ * - Bits reserved in the IA32_EFER MSR must be 0. ++ * - Bit 10 (corresponding to IA32_EFER.LMA) must equal the value of ++ * the IA-32e mode guest VM-exit control. It must also be identical ++ * to bit 8 (LME) if bit 31 in the CR0 field (corresponding to ++ * CR0.PG) is 1. ++ */ ++ if (to_vmx(vcpu)->nested.nested_run_pending && ++ (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER)) { ++ ia32e = (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) != 0; ++ if (!kvm_valid_efer(vcpu, vmcs12->guest_ia32_efer) || ++ ia32e != !!(vmcs12->guest_ia32_efer & EFER_LMA) || ++ ((vmcs12->guest_cr0 & X86_CR0_PG) && ++ ia32e != !!(vmcs12->guest_ia32_efer & EFER_LME))) ++ return 1; ++ } ++ ++ /* ++ * If the load IA32_EFER VM-exit control is 1, bits reserved in the ++ * IA32_EFER MSR must be 0 in the field for that register. In addition, ++ * the values of the LMA and LME bits in the field must each be that of ++ * the host address-space size VM-exit control. ++ */ ++ if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) { ++ ia32e = (vmcs12->vm_exit_controls & ++ VM_EXIT_HOST_ADDR_SPACE_SIZE) != 0; ++ if (!kvm_valid_efer(vcpu, vmcs12->host_ia32_efer) || ++ ia32e != !!(vmcs12->host_ia32_efer & EFER_LMA) || ++ ia32e != !!(vmcs12->host_ia32_efer & EFER_LME)) ++ return 1; ++ } ++ ++ if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS) && ++ (is_noncanonical_address(vmcs12->guest_bndcfgs & PAGE_MASK, vcpu) || ++ (vmcs12->guest_bndcfgs & MSR_IA32_BNDCFGS_RSVD))) ++ return 1; ++ ++ return 0; ++} ++ ++/* ++ * If exit_qual is NULL, this is being called from state restore (either RSM ++ * or KVM_SET_NESTED_STATE). Otherwise it's called from vmlaunch/vmresume. ++ */ ++static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, u32 *exit_qual) ++{ ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ struct vmcs12 *vmcs12 = get_vmcs12(vcpu); ++ bool from_vmentry = !!exit_qual; ++ u32 dummy_exit_qual; ++ bool evaluate_pending_interrupts; ++ int r = 0; ++ ++ evaluate_pending_interrupts = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) & ++ (CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_VIRTUAL_NMI_PENDING); ++ if (likely(!evaluate_pending_interrupts) && kvm_vcpu_apicv_active(vcpu)) ++ evaluate_pending_interrupts |= vmx_has_apicv_interrupt(vcpu); ++ ++ enter_guest_mode(vcpu); ++ ++ if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) ++ vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); ++ if (kvm_mpx_supported() && ++ !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)) ++ vmx->nested.vmcs01_guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS); ++ ++ vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02); ++ vmx_segment_cache_clear(vmx); ++ ++ if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING) ++ vcpu->arch.tsc_offset += vmcs12->tsc_offset; ++ ++ r = EXIT_REASON_INVALID_STATE; ++ if (prepare_vmcs02(vcpu, vmcs12, from_vmentry ? exit_qual : &dummy_exit_qual)) ++ goto fail; ++ ++ if (from_vmentry) { ++ nested_get_vmcs12_pages(vcpu); ++ ++ r = EXIT_REASON_MSR_LOAD_FAIL; ++ *exit_qual = nested_vmx_load_msr(vcpu, ++ vmcs12->vm_entry_msr_load_addr, ++ vmcs12->vm_entry_msr_load_count); ++ if (*exit_qual) ++ goto fail; ++ } else { ++ /* ++ * The MMU is not initialized to point at the right entities yet and ++ * "get pages" would need to read data from the guest (i.e. we will ++ * need to perform gpa to hpa translation). Request a call ++ * to nested_get_vmcs12_pages before the next VM-entry. The MSRs ++ * have already been set at vmentry time and should not be reset. ++ */ ++ kvm_make_request(KVM_REQ_GET_VMCS12_PAGES, vcpu); ++ } ++ ++ /* ++ * If L1 had a pending IRQ/NMI until it executed ++ * VMLAUNCH/VMRESUME which wasn't delivered because it was ++ * disallowed (e.g. interrupts disabled), L0 needs to ++ * evaluate if this pending event should cause an exit from L2 ++ * to L1 or delivered directly to L2 (e.g. In case L1 don't ++ * intercept EXTERNAL_INTERRUPT). ++ * ++ * Usually this would be handled by the processor noticing an ++ * IRQ/NMI window request, or checking RVI during evaluation of ++ * pending virtual interrupts. However, this setting was done ++ * on VMCS01 and now VMCS02 is active instead. Thus, we force L0 ++ * to perform pending event evaluation by requesting a KVM_REQ_EVENT. ++ */ ++ if (unlikely(evaluate_pending_interrupts)) ++ kvm_make_request(KVM_REQ_EVENT, vcpu); ++ ++ /* ++ * Note no nested_vmx_succeed or nested_vmx_fail here. At this point ++ * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet ++ * returned as far as L1 is concerned. It will only return (and set ++ * the success flag) when L2 exits (see nested_vmx_vmexit()). ++ */ ++ return 0; ++ ++fail: ++ if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING) ++ vcpu->arch.tsc_offset -= vmcs12->tsc_offset; ++ leave_guest_mode(vcpu); ++ vmx_switch_vmcs(vcpu, &vmx->vmcs01); ++ return r; ++} ++ ++/* ++ * nested_vmx_run() handles a nested entry, i.e., a VMLAUNCH or VMRESUME on L1 ++ * for running an L2 nested guest. ++ */ ++static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) ++{ ++ struct vmcs12 *vmcs12; ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ u32 interrupt_shadow = vmx_get_interrupt_shadow(vcpu); ++ u32 exit_qual; ++ int ret; ++ ++ if (!nested_vmx_check_permission(vcpu)) ++ return 1; ++ ++ if (!nested_vmx_check_vmcs12(vcpu)) ++ goto out; ++ ++ vmcs12 = get_vmcs12(vcpu); ++ ++ /* ++ * Can't VMLAUNCH or VMRESUME a shadow VMCS. Despite the fact ++ * that there *is* a valid VMCS pointer, RFLAGS.CF is set ++ * rather than RFLAGS.ZF, and no error number is stored to the ++ * VM-instruction error field. ++ */ ++ if (vmcs12->hdr.shadow_vmcs) { ++ nested_vmx_failInvalid(vcpu); ++ goto out; ++ } ++ ++ if (enable_shadow_vmcs) ++ copy_shadow_to_vmcs12(vmx); ++ ++ /* ++ * The nested entry process starts with enforcing various prerequisites ++ * on vmcs12 as required by the Intel SDM, and act appropriately when ++ * they fail: As the SDM explains, some conditions should cause the ++ * instruction to fail, while others will cause the instruction to seem ++ * to succeed, but return an EXIT_REASON_INVALID_STATE. ++ * To speed up the normal (success) code path, we should avoid checking ++ * for misconfigurations which will anyway be caught by the processor ++ * when using the merged vmcs02. ++ */ ++ if (interrupt_shadow & KVM_X86_SHADOW_INT_MOV_SS) { ++ nested_vmx_failValid(vcpu, ++ VMXERR_ENTRY_EVENTS_BLOCKED_BY_MOV_SS); ++ goto out; ++ } ++ ++ if (vmcs12->launch_state == launch) { ++ nested_vmx_failValid(vcpu, ++ launch ? VMXERR_VMLAUNCH_NONCLEAR_VMCS ++ : VMXERR_VMRESUME_NONLAUNCHED_VMCS); ++ goto out; ++ } ++ ++ ret = check_vmentry_prereqs(vcpu, vmcs12); ++ if (ret) { ++ nested_vmx_failValid(vcpu, ret); ++ goto out; ++ } ++ ++ /* ++ * After this point, the trap flag no longer triggers a singlestep trap ++ * on the vm entry instructions; don't call kvm_skip_emulated_instruction. ++ * This is not 100% correct; for performance reasons, we delegate most ++ * of the checks on host state to the processor. If those fail, ++ * the singlestep trap is missed. ++ */ ++ skip_emulated_instruction(vcpu); ++ ++ ret = check_vmentry_postreqs(vcpu, vmcs12, &exit_qual); ++ if (ret) { ++ nested_vmx_entry_failure(vcpu, vmcs12, ++ EXIT_REASON_INVALID_STATE, exit_qual); ++ return 1; ++ } ++ ++ /* ++ * We're finally done with prerequisite checking, and can start with ++ * the nested entry. ++ */ ++ ++ vmx->nested.nested_run_pending = 1; ++ ret = enter_vmx_non_root_mode(vcpu, &exit_qual); ++ if (ret) { ++ nested_vmx_entry_failure(vcpu, vmcs12, ret, exit_qual); ++ vmx->nested.nested_run_pending = 0; ++ return 1; ++ } ++ ++ /* Hide L1D cache contents from the nested guest. */ ++ vmx->vcpu.arch.l1tf_flush_l1d = true; ++ ++ /* ++ * Must happen outside of enter_vmx_non_root_mode() as it will ++ * also be used as part of restoring nVMX state for ++ * snapshot restore (migration). ++ * ++ * In this flow, it is assumed that vmcs12 cache was ++ * trasferred as part of captured nVMX state and should ++ * therefore not be read from guest memory (which may not ++ * exist on destination host yet). ++ */ ++ nested_cache_shadow_vmcs12(vcpu, vmcs12); ++ ++ /* ++ * If we're entering a halted L2 vcpu and the L2 vcpu won't be ++ * awakened by event injection or by an NMI-window VM-exit or ++ * by an interrupt-window VM-exit, halt the vcpu. ++ */ ++ if ((vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) && ++ !(vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK) && ++ !(vmcs12->cpu_based_vm_exec_control & CPU_BASED_VIRTUAL_NMI_PENDING) && ++ !((vmcs12->cpu_based_vm_exec_control & CPU_BASED_VIRTUAL_INTR_PENDING) && ++ (vmcs12->guest_rflags & X86_EFLAGS_IF))) { ++ vmx->nested.nested_run_pending = 0; ++ return kvm_vcpu_halt(vcpu); ++ } ++ return 1; ++ ++out: ++ return kvm_skip_emulated_instruction(vcpu); ++} ++ ++/* ++ * On a nested exit from L2 to L1, vmcs12.guest_cr0 might not be up-to-date ++ * because L2 may have changed some cr0 bits directly (CRO_GUEST_HOST_MASK). ++ * This function returns the new value we should put in vmcs12.guest_cr0. ++ * It's not enough to just return the vmcs02 GUEST_CR0. Rather, ++ * 1. Bits that neither L0 nor L1 trapped, were set directly by L2 and are now ++ * available in vmcs02 GUEST_CR0. (Note: It's enough to check that L0 ++ * didn't trap the bit, because if L1 did, so would L0). ++ * 2. Bits that L1 asked to trap (and therefore L0 also did) could not have ++ * been modified by L2, and L1 knows it. So just leave the old value of ++ * the bit from vmcs12.guest_cr0. Note that the bit from vmcs02 GUEST_CR0 ++ * isn't relevant, because if L0 traps this bit it can set it to anything. ++ * 3. Bits that L1 didn't trap, but L0 did. L1 believes the guest could have ++ * changed these bits, and therefore they need to be updated, but L0 ++ * didn't necessarily allow them to be changed in GUEST_CR0 - and rather ++ * put them in vmcs02 CR0_READ_SHADOW. So take these bits from there. ++ */ ++static inline unsigned long ++vmcs12_guest_cr0(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) ++{ ++ return ++ /*1*/ (vmcs_readl(GUEST_CR0) & vcpu->arch.cr0_guest_owned_bits) | ++ /*2*/ (vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask) | ++ /*3*/ (vmcs_readl(CR0_READ_SHADOW) & ~(vmcs12->cr0_guest_host_mask | ++ vcpu->arch.cr0_guest_owned_bits)); ++} ++ ++static inline unsigned long ++vmcs12_guest_cr4(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) ++{ ++ return ++ /*1*/ (vmcs_readl(GUEST_CR4) & vcpu->arch.cr4_guest_owned_bits) | ++ /*2*/ (vmcs12->guest_cr4 & vmcs12->cr4_guest_host_mask) | ++ /*3*/ (vmcs_readl(CR4_READ_SHADOW) & ~(vmcs12->cr4_guest_host_mask | ++ vcpu->arch.cr4_guest_owned_bits)); ++} ++ ++static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu, ++ struct vmcs12 *vmcs12) ++{ ++ u32 idt_vectoring; ++ unsigned int nr; ++ ++ if (vcpu->arch.exception.injected) { ++ nr = vcpu->arch.exception.nr; ++ idt_vectoring = nr | VECTORING_INFO_VALID_MASK; ++ ++ if (kvm_exception_is_soft(nr)) { ++ vmcs12->vm_exit_instruction_len = ++ vcpu->arch.event_exit_inst_len; ++ idt_vectoring |= INTR_TYPE_SOFT_EXCEPTION; ++ } else ++ idt_vectoring |= INTR_TYPE_HARD_EXCEPTION; ++ ++ if (vcpu->arch.exception.has_error_code) { ++ idt_vectoring |= VECTORING_INFO_DELIVER_CODE_MASK; ++ vmcs12->idt_vectoring_error_code = ++ vcpu->arch.exception.error_code; ++ } ++ ++ vmcs12->idt_vectoring_info_field = idt_vectoring; ++ } else if (vcpu->arch.nmi_injected) { ++ vmcs12->idt_vectoring_info_field = ++ INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR; ++ } else if (vcpu->arch.interrupt.injected) { ++ nr = vcpu->arch.interrupt.nr; ++ idt_vectoring = nr | VECTORING_INFO_VALID_MASK; ++ ++ if (vcpu->arch.interrupt.soft) { ++ idt_vectoring |= INTR_TYPE_SOFT_INTR; ++ vmcs12->vm_entry_instruction_len = ++ vcpu->arch.event_exit_inst_len; ++ } else ++ idt_vectoring |= INTR_TYPE_EXT_INTR; ++ ++ vmcs12->idt_vectoring_info_field = idt_vectoring; ++ } ++} ++ ++static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr) ++{ ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ unsigned long exit_qual; ++ bool block_nested_events = ++ vmx->nested.nested_run_pending || kvm_event_needs_reinjection(vcpu); ++ ++ if (vcpu->arch.exception.pending && ++ nested_vmx_check_exception(vcpu, &exit_qual)) { ++ if (block_nested_events) ++ return -EBUSY; ++ nested_vmx_inject_exception_vmexit(vcpu, exit_qual); ++ return 0; ++ } ++ ++ if (nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) && ++ vmx->nested.preemption_timer_expired) { ++ if (block_nested_events) ++ return -EBUSY; ++ nested_vmx_vmexit(vcpu, EXIT_REASON_PREEMPTION_TIMER, 0, 0); ++ return 0; ++ } ++ ++ if (vcpu->arch.nmi_pending && nested_exit_on_nmi(vcpu)) { ++ if (block_nested_events) ++ return -EBUSY; ++ nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, ++ NMI_VECTOR | INTR_TYPE_NMI_INTR | ++ INTR_INFO_VALID_MASK, 0); ++ /* ++ * The NMI-triggered VM exit counts as injection: ++ * clear this one and block further NMIs. ++ */ ++ vcpu->arch.nmi_pending = 0; ++ vmx_set_nmi_mask(vcpu, true); ++ return 0; ++ } ++ ++ if ((kvm_cpu_has_interrupt(vcpu) || external_intr) && ++ nested_exit_on_intr(vcpu)) { ++ if (block_nested_events) ++ return -EBUSY; ++ nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0); ++ return 0; ++ } ++ ++ vmx_complete_nested_posted_interrupt(vcpu); ++ return 0; ++} ++ ++static void vmx_request_immediate_exit(struct kvm_vcpu *vcpu) ++{ ++ to_vmx(vcpu)->req_immediate_exit = true; ++} ++ ++static u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu) ++{ ++ ktime_t remaining = ++ hrtimer_get_remaining(&to_vmx(vcpu)->nested.preemption_timer); ++ u64 value; ++ ++ if (ktime_to_ns(remaining) <= 0) ++ return 0; ++ ++ value = ktime_to_ns(remaining) * vcpu->arch.virtual_tsc_khz; ++ do_div(value, 1000000); ++ return value >> VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE; ++} ++ ++/* ++ * Update the guest state fields of vmcs12 to reflect changes that ++ * occurred while L2 was running. (The "IA-32e mode guest" bit of the ++ * VM-entry controls is also updated, since this is really a guest ++ * state bit.) ++ */ ++static void sync_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) ++{ ++ vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12); ++ vmcs12->guest_cr4 = vmcs12_guest_cr4(vcpu, vmcs12); ++ ++ vmcs12->guest_rsp = kvm_register_read(vcpu, VCPU_REGS_RSP); ++ vmcs12->guest_rip = kvm_register_read(vcpu, VCPU_REGS_RIP); ++ vmcs12->guest_rflags = vmcs_readl(GUEST_RFLAGS); ++ ++ vmcs12->guest_es_selector = vmcs_read16(GUEST_ES_SELECTOR); ++ vmcs12->guest_cs_selector = vmcs_read16(GUEST_CS_SELECTOR); ++ vmcs12->guest_ss_selector = vmcs_read16(GUEST_SS_SELECTOR); ++ vmcs12->guest_ds_selector = vmcs_read16(GUEST_DS_SELECTOR); ++ vmcs12->guest_fs_selector = vmcs_read16(GUEST_FS_SELECTOR); ++ vmcs12->guest_gs_selector = vmcs_read16(GUEST_GS_SELECTOR); ++ vmcs12->guest_ldtr_selector = vmcs_read16(GUEST_LDTR_SELECTOR); ++ vmcs12->guest_tr_selector = vmcs_read16(GUEST_TR_SELECTOR); ++ vmcs12->guest_es_limit = vmcs_read32(GUEST_ES_LIMIT); ++ vmcs12->guest_cs_limit = vmcs_read32(GUEST_CS_LIMIT); ++ vmcs12->guest_ss_limit = vmcs_read32(GUEST_SS_LIMIT); ++ vmcs12->guest_ds_limit = vmcs_read32(GUEST_DS_LIMIT); ++ vmcs12->guest_fs_limit = vmcs_read32(GUEST_FS_LIMIT); ++ vmcs12->guest_gs_limit = vmcs_read32(GUEST_GS_LIMIT); ++ vmcs12->guest_ldtr_limit = vmcs_read32(GUEST_LDTR_LIMIT); ++ vmcs12->guest_tr_limit = vmcs_read32(GUEST_TR_LIMIT); ++ vmcs12->guest_gdtr_limit = vmcs_read32(GUEST_GDTR_LIMIT); ++ vmcs12->guest_idtr_limit = vmcs_read32(GUEST_IDTR_LIMIT); ++ vmcs12->guest_es_ar_bytes = vmcs_read32(GUEST_ES_AR_BYTES); ++ vmcs12->guest_cs_ar_bytes = vmcs_read32(GUEST_CS_AR_BYTES); ++ vmcs12->guest_ss_ar_bytes = vmcs_read32(GUEST_SS_AR_BYTES); ++ vmcs12->guest_ds_ar_bytes = vmcs_read32(GUEST_DS_AR_BYTES); ++ vmcs12->guest_fs_ar_bytes = vmcs_read32(GUEST_FS_AR_BYTES); ++ vmcs12->guest_gs_ar_bytes = vmcs_read32(GUEST_GS_AR_BYTES); ++ vmcs12->guest_ldtr_ar_bytes = vmcs_read32(GUEST_LDTR_AR_BYTES); ++ vmcs12->guest_tr_ar_bytes = vmcs_read32(GUEST_TR_AR_BYTES); ++ vmcs12->guest_es_base = vmcs_readl(GUEST_ES_BASE); ++ vmcs12->guest_cs_base = vmcs_readl(GUEST_CS_BASE); ++ vmcs12->guest_ss_base = vmcs_readl(GUEST_SS_BASE); ++ vmcs12->guest_ds_base = vmcs_readl(GUEST_DS_BASE); ++ vmcs12->guest_fs_base = vmcs_readl(GUEST_FS_BASE); ++ vmcs12->guest_gs_base = vmcs_readl(GUEST_GS_BASE); ++ vmcs12->guest_ldtr_base = vmcs_readl(GUEST_LDTR_BASE); ++ vmcs12->guest_tr_base = vmcs_readl(GUEST_TR_BASE); ++ vmcs12->guest_gdtr_base = vmcs_readl(GUEST_GDTR_BASE); ++ vmcs12->guest_idtr_base = vmcs_readl(GUEST_IDTR_BASE); ++ ++ vmcs12->guest_interruptibility_info = ++ vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); ++ vmcs12->guest_pending_dbg_exceptions = ++ vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS); ++ if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) ++ vmcs12->guest_activity_state = GUEST_ACTIVITY_HLT; ++ else ++ vmcs12->guest_activity_state = GUEST_ACTIVITY_ACTIVE; ++ ++ if (nested_cpu_has_preemption_timer(vmcs12)) { ++ if (vmcs12->vm_exit_controls & ++ VM_EXIT_SAVE_VMX_PREEMPTION_TIMER) ++ vmcs12->vmx_preemption_timer_value = ++ vmx_get_preemption_timer_value(vcpu); ++ hrtimer_cancel(&to_vmx(vcpu)->nested.preemption_timer); ++ } ++ ++ /* ++ * In some cases (usually, nested EPT), L2 is allowed to change its ++ * own CR3 without exiting. If it has changed it, we must keep it. ++ * Of course, if L0 is using shadow page tables, GUEST_CR3 was defined ++ * by L0, not L1 or L2, so we mustn't unconditionally copy it to vmcs12. ++ * ++ * Additionally, restore L2's PDPTR to vmcs12. ++ */ ++ if (enable_ept) { ++ vmcs12->guest_cr3 = vmcs_readl(GUEST_CR3); ++ vmcs12->guest_pdptr0 = vmcs_read64(GUEST_PDPTR0); ++ vmcs12->guest_pdptr1 = vmcs_read64(GUEST_PDPTR1); ++ vmcs12->guest_pdptr2 = vmcs_read64(GUEST_PDPTR2); ++ vmcs12->guest_pdptr3 = vmcs_read64(GUEST_PDPTR3); ++ } ++ ++ vmcs12->guest_linear_address = vmcs_readl(GUEST_LINEAR_ADDRESS); ++ ++ if (nested_cpu_has_vid(vmcs12)) ++ vmcs12->guest_intr_status = vmcs_read16(GUEST_INTR_STATUS); ++ ++ vmcs12->vm_entry_controls = ++ (vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) | ++ (vm_entry_controls_get(to_vmx(vcpu)) & VM_ENTRY_IA32E_MODE); ++ ++ if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_DEBUG_CONTROLS) { ++ kvm_get_dr(vcpu, 7, (unsigned long *)&vmcs12->guest_dr7); ++ vmcs12->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); ++ } ++ ++ /* TODO: These cannot have changed unless we have MSR bitmaps and ++ * the relevant bit asks not to trap the change */ ++ if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT) ++ vmcs12->guest_ia32_pat = vmcs_read64(GUEST_IA32_PAT); ++ if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_EFER) ++ vmcs12->guest_ia32_efer = vcpu->arch.efer; ++ vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS); ++ vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP); ++ vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP); ++ if (kvm_mpx_supported()) ++ vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS); ++} ++ ++/* ++ * prepare_vmcs12 is part of what we need to do when the nested L2 guest exits ++ * and we want to prepare to run its L1 parent. L1 keeps a vmcs for L2 (vmcs12), ++ * and this function updates it to reflect the changes to the guest state while ++ * L2 was running (and perhaps made some exits which were handled directly by L0 ++ * without going back to L1), and to reflect the exit reason. ++ * Note that we do not have to copy here all VMCS fields, just those that ++ * could have changed by the L2 guest or the exit - i.e., the guest-state and ++ * exit-information fields only. Other fields are modified by L1 with VMWRITE, ++ * which already writes to vmcs12 directly. ++ */ ++static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, ++ u32 exit_reason, u32 exit_intr_info, ++ unsigned long exit_qualification) ++{ ++ /* update guest state fields: */ ++ sync_vmcs12(vcpu, vmcs12); ++ ++ /* update exit information fields: */ ++ ++ vmcs12->vm_exit_reason = exit_reason; ++ vmcs12->exit_qualification = exit_qualification; ++ vmcs12->vm_exit_intr_info = exit_intr_info; ++ ++ vmcs12->idt_vectoring_info_field = 0; ++ vmcs12->vm_exit_instruction_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN); ++ vmcs12->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); ++ ++ if (!(vmcs12->vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) { ++ vmcs12->launch_state = 1; ++ ++ /* vm_entry_intr_info_field is cleared on exit. Emulate this ++ * instead of reading the real value. */ ++ vmcs12->vm_entry_intr_info_field &= ~INTR_INFO_VALID_MASK; ++ ++ /* ++ * Transfer the event that L0 or L1 may wanted to inject into ++ * L2 to IDT_VECTORING_INFO_FIELD. ++ */ ++ vmcs12_save_pending_event(vcpu, vmcs12); ++ } ++ ++ /* ++ * Drop what we picked up for L2 via vmx_complete_interrupts. It is ++ * preserved above and would only end up incorrectly in L1. ++ */ ++ vcpu->arch.nmi_injected = false; ++ kvm_clear_exception_queue(vcpu); ++ kvm_clear_interrupt_queue(vcpu); ++} ++ ++/* ++ * A part of what we need to when the nested L2 guest exits and we want to ++ * run its L1 parent, is to reset L1's guest state to the host state specified ++ * in vmcs12. ++ * This function is to be called not only on normal nested exit, but also on ++ * a nested entry failure, as explained in Intel's spec, 3B.23.7 ("VM-Entry ++ * Failures During or After Loading Guest State"). ++ * This function should be called when the active VMCS is L1's (vmcs01). ++ */ ++static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, ++ struct vmcs12 *vmcs12) ++{ ++ struct kvm_segment seg; ++ u32 entry_failure_code; ++ ++ if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) ++ vcpu->arch.efer = vmcs12->host_ia32_efer; ++ else if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) ++ vcpu->arch.efer |= (EFER_LMA | EFER_LME); ++ else ++ vcpu->arch.efer &= ~(EFER_LMA | EFER_LME); ++ vmx_set_efer(vcpu, vcpu->arch.efer); ++ ++ kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->host_rsp); ++ kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->host_rip); ++ vmx_set_rflags(vcpu, X86_EFLAGS_FIXED); ++ /* ++ * Note that calling vmx_set_cr0 is important, even if cr0 hasn't ++ * actually changed, because vmx_set_cr0 refers to efer set above. ++ * ++ * CR0_GUEST_HOST_MASK is already set in the original vmcs01 ++ * (KVM doesn't change it); ++ */ ++ vcpu->arch.cr0_guest_owned_bits = X86_CR0_TS; ++ vmx_set_cr0(vcpu, vmcs12->host_cr0); ++ ++ /* Same as above - no reason to call set_cr4_guest_host_mask(). */ ++ vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK); ++ vmx_set_cr4(vcpu, vmcs12->host_cr4); ++ ++ nested_ept_uninit_mmu_context(vcpu); ++ ++ /* ++ * Only PDPTE load can fail as the value of cr3 was checked on entry and ++ * couldn't have changed. ++ */ ++ if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, &entry_failure_code)) ++ nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_PDPTE_FAIL); ++ ++ if (!enable_ept) ++ vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault; ++ ++ /* ++ * If vmcs01 don't use VPID, CPU flushes TLB on every ++ * VMEntry/VMExit. Thus, no need to flush TLB. ++ * ++ * If vmcs12 uses VPID, TLB entries populated by L2 are ++ * tagged with vmx->nested.vpid02 while L1 entries are tagged ++ * with vmx->vpid. Thus, no need to flush TLB. ++ * ++ * Therefore, flush TLB only in case vmcs01 uses VPID and ++ * vmcs12 don't use VPID as in this case L1 & L2 TLB entries ++ * are both tagged with vmx->vpid. ++ */ ++ if (enable_vpid && ++ !(nested_cpu_has_vpid(vmcs12) && to_vmx(vcpu)->nested.vpid02)) { ++ vmx_flush_tlb(vcpu, true); ++ } ++ ++ vmcs_write32(GUEST_SYSENTER_CS, vmcs12->host_ia32_sysenter_cs); ++ vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->host_ia32_sysenter_esp); ++ vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->host_ia32_sysenter_eip); ++ vmcs_writel(GUEST_IDTR_BASE, vmcs12->host_idtr_base); ++ vmcs_writel(GUEST_GDTR_BASE, vmcs12->host_gdtr_base); ++ vmcs_write32(GUEST_IDTR_LIMIT, 0xFFFF); ++ vmcs_write32(GUEST_GDTR_LIMIT, 0xFFFF); ++ ++ /* If not VM_EXIT_CLEAR_BNDCFGS, the L2 value propagates to L1. */ ++ if (vmcs12->vm_exit_controls & VM_EXIT_CLEAR_BNDCFGS) ++ vmcs_write64(GUEST_BNDCFGS, 0); ++ ++ if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) { ++ vmcs_write64(GUEST_IA32_PAT, vmcs12->host_ia32_pat); ++ vcpu->arch.pat = vmcs12->host_ia32_pat; ++ } ++ if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) ++ vmcs_write64(GUEST_IA32_PERF_GLOBAL_CTRL, ++ vmcs12->host_ia32_perf_global_ctrl); ++ ++ /* Set L1 segment info according to Intel SDM ++ 27.5.2 Loading Host Segment and Descriptor-Table Registers */ ++ seg = (struct kvm_segment) { ++ .base = 0, ++ .limit = 0xFFFFFFFF, ++ .selector = vmcs12->host_cs_selector, ++ .type = 11, ++ .present = 1, ++ .s = 1, ++ .g = 1 ++ }; ++ if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) ++ seg.l = 1; ++ else ++ seg.db = 1; ++ vmx_set_segment(vcpu, &seg, VCPU_SREG_CS); ++ seg = (struct kvm_segment) { ++ .base = 0, ++ .limit = 0xFFFFFFFF, ++ .type = 3, ++ .present = 1, ++ .s = 1, ++ .db = 1, ++ .g = 1 ++ }; ++ seg.selector = vmcs12->host_ds_selector; ++ vmx_set_segment(vcpu, &seg, VCPU_SREG_DS); ++ seg.selector = vmcs12->host_es_selector; ++ vmx_set_segment(vcpu, &seg, VCPU_SREG_ES); ++ seg.selector = vmcs12->host_ss_selector; ++ vmx_set_segment(vcpu, &seg, VCPU_SREG_SS); ++ seg.selector = vmcs12->host_fs_selector; ++ seg.base = vmcs12->host_fs_base; ++ vmx_set_segment(vcpu, &seg, VCPU_SREG_FS); ++ seg.selector = vmcs12->host_gs_selector; ++ seg.base = vmcs12->host_gs_base; ++ vmx_set_segment(vcpu, &seg, VCPU_SREG_GS); ++ seg = (struct kvm_segment) { ++ .base = vmcs12->host_tr_base, ++ .limit = 0x67, ++ .selector = vmcs12->host_tr_selector, ++ .type = 11, ++ .present = 1 ++ }; ++ vmx_set_segment(vcpu, &seg, VCPU_SREG_TR); ++ ++ kvm_set_dr(vcpu, 7, 0x400); ++ vmcs_write64(GUEST_IA32_DEBUGCTL, 0); ++ ++ if (cpu_has_vmx_msr_bitmap()) ++ vmx_update_msr_bitmap(vcpu); ++ ++ if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr, ++ vmcs12->vm_exit_msr_load_count)) ++ nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_MSR_FAIL); ++} ++ ++static inline u64 nested_vmx_get_vmcs01_guest_efer(struct vcpu_vmx *vmx) ++{ ++ struct shared_msr_entry *efer_msr; ++ unsigned int i; ++ ++ if (vm_entry_controls_get(vmx) & VM_ENTRY_LOAD_IA32_EFER) ++ return vmcs_read64(GUEST_IA32_EFER); ++ ++ if (cpu_has_load_ia32_efer) ++ return host_efer; ++ ++ for (i = 0; i < vmx->msr_autoload.guest.nr; ++i) { ++ if (vmx->msr_autoload.guest.val[i].index == MSR_EFER) ++ return vmx->msr_autoload.guest.val[i].value; ++ } ++ ++ efer_msr = find_msr_entry(vmx, MSR_EFER); ++ if (efer_msr) ++ return efer_msr->data; ++ ++ return host_efer; ++} ++ ++static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu) ++{ ++ struct vmcs12 *vmcs12 = get_vmcs12(vcpu); ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ struct vmx_msr_entry g, h; ++ struct msr_data msr; ++ gpa_t gpa; ++ u32 i, j; ++ ++ vcpu->arch.pat = vmcs_read64(GUEST_IA32_PAT); ++ ++ if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) { ++ /* ++ * L1's host DR7 is lost if KVM_GUESTDBG_USE_HW_BP is set ++ * as vmcs01.GUEST_DR7 contains a userspace defined value ++ * and vcpu->arch.dr7 is not squirreled away before the ++ * nested VMENTER (not worth adding a variable in nested_vmx). ++ */ ++ if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) ++ kvm_set_dr(vcpu, 7, DR7_FIXED_1); ++ else ++ WARN_ON(kvm_set_dr(vcpu, 7, vmcs_readl(GUEST_DR7))); ++ } ++ ++ /* ++ * Note that calling vmx_set_{efer,cr0,cr4} is important as they ++ * handle a variety of side effects to KVM's software model. ++ */ ++ vmx_set_efer(vcpu, nested_vmx_get_vmcs01_guest_efer(vmx)); ++ ++ vcpu->arch.cr0_guest_owned_bits = X86_CR0_TS; ++ vmx_set_cr0(vcpu, vmcs_readl(CR0_READ_SHADOW)); ++ ++ vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK); ++ vmx_set_cr4(vcpu, vmcs_readl(CR4_READ_SHADOW)); ++ ++ nested_ept_uninit_mmu_context(vcpu); ++ vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); ++ __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); ++ ++ /* ++ * Use ept_save_pdptrs(vcpu) to load the MMU's cached PDPTRs ++ * from vmcs01 (if necessary). The PDPTRs are not loaded on ++ * VMFail, like everything else we just need to ensure our ++ * software model is up-to-date. ++ */ ++ ept_save_pdptrs(vcpu); ++ ++ kvm_mmu_reset_context(vcpu); ++ ++ if (cpu_has_vmx_msr_bitmap()) ++ vmx_update_msr_bitmap(vcpu); ++ ++ /* ++ * This nasty bit of open coding is a compromise between blindly ++ * loading L1's MSRs using the exit load lists (incorrect emulation ++ * of VMFail), leaving the nested VM's MSRs in the software model ++ * (incorrect behavior) and snapshotting the modified MSRs (too ++ * expensive since the lists are unbound by hardware). For each ++ * MSR that was (prematurely) loaded from the nested VMEntry load ++ * list, reload it from the exit load list if it exists and differs ++ * from the guest value. The intent is to stuff host state as ++ * silently as possible, not to fully process the exit load list. ++ */ ++ msr.host_initiated = false; ++ for (i = 0; i < vmcs12->vm_entry_msr_load_count; i++) { ++ gpa = vmcs12->vm_entry_msr_load_addr + (i * sizeof(g)); ++ if (kvm_vcpu_read_guest(vcpu, gpa, &g, sizeof(g))) { ++ pr_debug_ratelimited( ++ "%s read MSR index failed (%u, 0x%08llx)\n", ++ __func__, i, gpa); ++ goto vmabort; ++ } ++ ++ for (j = 0; j < vmcs12->vm_exit_msr_load_count; j++) { ++ gpa = vmcs12->vm_exit_msr_load_addr + (j * sizeof(h)); ++ if (kvm_vcpu_read_guest(vcpu, gpa, &h, sizeof(h))) { ++ pr_debug_ratelimited( ++ "%s read MSR failed (%u, 0x%08llx)\n", ++ __func__, j, gpa); ++ goto vmabort; ++ } ++ if (h.index != g.index) ++ continue; ++ if (h.value == g.value) ++ break; ++ ++ if (nested_vmx_load_msr_check(vcpu, &h)) { ++ pr_debug_ratelimited( ++ "%s check failed (%u, 0x%x, 0x%x)\n", ++ __func__, j, h.index, h.reserved); ++ goto vmabort; ++ } ++ ++ msr.index = h.index; ++ msr.data = h.value; ++ if (kvm_set_msr(vcpu, &msr)) { ++ pr_debug_ratelimited( ++ "%s WRMSR failed (%u, 0x%x, 0x%llx)\n", ++ __func__, j, h.index, h.value); ++ goto vmabort; ++ } ++ } ++ } ++ ++ return; ++ ++vmabort: ++ nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_MSR_FAIL); ++} ++ ++/* ++ * Emulate an exit from nested guest (L2) to L1, i.e., prepare to run L1 ++ * and modify vmcs12 to make it see what it would expect to see there if ++ * L2 was its real guest. Must only be called when in L2 (is_guest_mode()) ++ */ ++static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, ++ u32 exit_intr_info, ++ unsigned long exit_qualification) ++{ ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ struct vmcs12 *vmcs12 = get_vmcs12(vcpu); ++ ++ /* trying to cancel vmlaunch/vmresume is a bug */ ++ WARN_ON_ONCE(vmx->nested.nested_run_pending); ++ ++ /* ++ * The only expected VM-instruction error is "VM entry with ++ * invalid control field(s)." Anything else indicates a ++ * problem with L0. ++ */ ++ WARN_ON_ONCE(vmx->fail && (vmcs_read32(VM_INSTRUCTION_ERROR) != ++ VMXERR_ENTRY_INVALID_CONTROL_FIELD)); ++ ++ leave_guest_mode(vcpu); ++ ++ if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING) ++ vcpu->arch.tsc_offset -= vmcs12->tsc_offset; ++ ++ if (likely(!vmx->fail)) { ++ if (exit_reason == -1) ++ sync_vmcs12(vcpu, vmcs12); ++ else ++ prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info, ++ exit_qualification); ++ ++ /* ++ * Must happen outside of sync_vmcs12() as it will ++ * also be used to capture vmcs12 cache as part of ++ * capturing nVMX state for snapshot (migration). ++ * ++ * Otherwise, this flush will dirty guest memory at a ++ * point it is already assumed by user-space to be ++ * immutable. ++ */ ++ nested_flush_cached_shadow_vmcs12(vcpu, vmcs12); ++ ++ if (nested_vmx_store_msr(vcpu, vmcs12->vm_exit_msr_store_addr, ++ vmcs12->vm_exit_msr_store_count)) ++ nested_vmx_abort(vcpu, VMX_ABORT_SAVE_GUEST_MSR_FAIL); ++ } ++ ++ vmx_switch_vmcs(vcpu, &vmx->vmcs01); ++ vm_entry_controls_reset_shadow(vmx); ++ vm_exit_controls_reset_shadow(vmx); ++ vmx_segment_cache_clear(vmx); ++ ++ /* Update any VMCS fields that might have changed while L2 ran */ ++ vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr); ++ vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr); ++ vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset); ++ ++ if (kvm_has_tsc_control) ++ decache_tsc_multiplier(vmx); ++ ++ if (vmx->nested.change_vmcs01_virtual_apic_mode) { ++ vmx->nested.change_vmcs01_virtual_apic_mode = false; ++ vmx_set_virtual_apic_mode(vcpu); ++ } else if (!nested_cpu_has_ept(vmcs12) && ++ nested_cpu_has2(vmcs12, ++ SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { ++ vmx_flush_tlb(vcpu, true); ++ } ++ ++ /* This is needed for same reason as it was needed in prepare_vmcs02 */ ++ vmx->host_rsp = 0; ++ ++ /* Unpin physical memory we referred to in vmcs02 */ ++ if (vmx->nested.apic_access_page) { ++ kvm_release_page_dirty(vmx->nested.apic_access_page); ++ vmx->nested.apic_access_page = NULL; ++ } ++ if (vmx->nested.virtual_apic_page) { ++ kvm_release_page_dirty(vmx->nested.virtual_apic_page); ++ vmx->nested.virtual_apic_page = NULL; ++ } ++ if (vmx->nested.pi_desc_page) { ++ kunmap(vmx->nested.pi_desc_page); ++ kvm_release_page_dirty(vmx->nested.pi_desc_page); ++ vmx->nested.pi_desc_page = NULL; ++ vmx->nested.pi_desc = NULL; ++ } ++ ++ /* ++ * We are now running in L2, mmu_notifier will force to reload the ++ * page's hpa for L2 vmcs. Need to reload it for L1 before entering L1. ++ */ ++ kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu); ++ ++ if (enable_shadow_vmcs && exit_reason != -1) ++ vmx->nested.sync_shadow_vmcs = true; ++ ++ /* in case we halted in L2 */ ++ vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; ++ ++ if (likely(!vmx->fail)) { ++ /* ++ * TODO: SDM says that with acknowledge interrupt on ++ * exit, bit 31 of the VM-exit interrupt information ++ * (valid interrupt) is always set to 1 on ++ * EXIT_REASON_EXTERNAL_INTERRUPT, so we shouldn't ++ * need kvm_cpu_has_interrupt(). See the commit ++ * message for details. ++ */ ++ if (nested_exit_intr_ack_set(vcpu) && ++ exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT && ++ kvm_cpu_has_interrupt(vcpu)) { ++ int irq = kvm_cpu_get_interrupt(vcpu); ++ WARN_ON(irq < 0); ++ vmcs12->vm_exit_intr_info = irq | ++ INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR; ++ } ++ ++ if (exit_reason != -1) ++ trace_kvm_nested_vmexit_inject(vmcs12->vm_exit_reason, ++ vmcs12->exit_qualification, ++ vmcs12->idt_vectoring_info_field, ++ vmcs12->vm_exit_intr_info, ++ vmcs12->vm_exit_intr_error_code, ++ KVM_ISA_VMX); ++ ++ load_vmcs12_host_state(vcpu, vmcs12); ++ ++ return; ++ } ++ ++ /* ++ * After an early L2 VM-entry failure, we're now back ++ * in L1 which thinks it just finished a VMLAUNCH or ++ * VMRESUME instruction, so we need to set the failure ++ * flag and the VM-instruction error field of the VMCS ++ * accordingly. ++ */ ++ nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); ++ ++ /* ++ * Restore L1's host state to KVM's software model. We're here ++ * because a consistency check was caught by hardware, which ++ * means some amount of guest state has been propagated to KVM's ++ * model and needs to be unwound to the host's state. ++ */ ++ nested_vmx_restore_host_state(vcpu); ++ ++ /* ++ * The emulated instruction was already skipped in ++ * nested_vmx_run, but the updated RIP was never ++ * written back to the vmcs01. ++ */ ++ skip_emulated_instruction(vcpu); ++ vmx->fail = 0; ++} ++ ++/* ++ * Forcibly leave nested mode in order to be able to reset the VCPU later on. ++ */ ++static void vmx_leave_nested(struct kvm_vcpu *vcpu) ++{ ++ if (is_guest_mode(vcpu)) { ++ to_vmx(vcpu)->nested.nested_run_pending = 0; ++ nested_vmx_vmexit(vcpu, -1, 0, 0); ++ } ++ free_nested(to_vmx(vcpu)); ++} ++ ++/* ++ * L1's failure to enter L2 is a subset of a normal exit, as explained in ++ * 23.7 "VM-entry failures during or after loading guest state" (this also ++ * lists the acceptable exit-reason and exit-qualification parameters). ++ * It should only be called before L2 actually succeeded to run, and when ++ * vmcs01 is current (it doesn't leave_guest_mode() or switch vmcss). ++ */ ++static void nested_vmx_entry_failure(struct kvm_vcpu *vcpu, ++ struct vmcs12 *vmcs12, ++ u32 reason, unsigned long qualification) ++{ ++ load_vmcs12_host_state(vcpu, vmcs12); ++ vmcs12->vm_exit_reason = reason | VMX_EXIT_REASONS_FAILED_VMENTRY; ++ vmcs12->exit_qualification = qualification; ++ nested_vmx_succeed(vcpu); ++ if (enable_shadow_vmcs) ++ to_vmx(vcpu)->nested.sync_shadow_vmcs = true; ++} ++ ++static int vmx_check_intercept_io(struct kvm_vcpu *vcpu, ++ struct x86_instruction_info *info) ++{ ++ struct vmcs12 *vmcs12 = get_vmcs12(vcpu); ++ unsigned short port; ++ bool intercept; ++ int size; ++ ++ if (info->intercept == x86_intercept_in || ++ info->intercept == x86_intercept_ins) { ++ port = info->src_val; ++ size = info->dst_bytes; ++ } else { ++ port = info->dst_val; ++ size = info->src_bytes; ++ } ++ ++ /* ++ * If the 'use IO bitmaps' VM-execution control is 0, IO instruction ++ * VM-exits depend on the 'unconditional IO exiting' VM-execution ++ * control. ++ * ++ * Otherwise, IO instruction VM-exits are controlled by the IO bitmaps. ++ */ ++ if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS)) ++ intercept = nested_cpu_has(vmcs12, ++ CPU_BASED_UNCOND_IO_EXITING); ++ else ++ intercept = nested_vmx_check_io_bitmaps(vcpu, port, size); ++ ++ return intercept ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE; ++} ++ ++static int vmx_check_intercept(struct kvm_vcpu *vcpu, ++ struct x86_instruction_info *info, ++ enum x86_intercept_stage stage) ++{ ++ struct vmcs12 *vmcs12 = get_vmcs12(vcpu); ++ struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; ++ ++ switch (info->intercept) { ++ /* ++ * RDPID causes #UD if disabled through secondary execution controls. ++ * Because it is marked as EmulateOnUD, we need to intercept it here. ++ */ ++ case x86_intercept_rdtscp: ++ if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDTSCP)) { ++ ctxt->exception.vector = UD_VECTOR; ++ ctxt->exception.error_code_valid = false; ++ return X86EMUL_PROPAGATE_FAULT; ++ } ++ break; ++ ++ case x86_intercept_in: ++ case x86_intercept_ins: ++ case x86_intercept_out: ++ case x86_intercept_outs: ++ return vmx_check_intercept_io(vcpu, info); ++ ++ /* TODO: check more intercepts... */ ++ default: ++ break; ++ } ++ ++ return X86EMUL_UNHANDLEABLE; ++} ++ ++#ifdef CONFIG_X86_64 ++/* (a << shift) / divisor, return 1 if overflow otherwise 0 */ ++static inline int u64_shl_div_u64(u64 a, unsigned int shift, ++ u64 divisor, u64 *result) ++{ ++ u64 low = a << shift, high = a >> (64 - shift); ++ ++ /* To avoid the overflow on divq */ ++ if (high >= divisor) ++ return 1; ++ ++ /* Low hold the result, high hold rem which is discarded */ ++ asm("divq %2\n\t" : "=a" (low), "=d" (high) : ++ "rm" (divisor), "0" (low), "1" (high)); ++ *result = low; ++ ++ return 0; ++} ++ ++static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc) ++{ ++ struct vcpu_vmx *vmx; ++ u64 tscl, guest_tscl, delta_tsc, lapic_timer_advance_cycles; ++ ++ if (kvm_mwait_in_guest(vcpu->kvm)) ++ return -EOPNOTSUPP; ++ ++ vmx = to_vmx(vcpu); ++ tscl = rdtsc(); ++ guest_tscl = kvm_read_l1_tsc(vcpu, tscl); ++ delta_tsc = max(guest_deadline_tsc, guest_tscl) - guest_tscl; ++ lapic_timer_advance_cycles = nsec_to_cycles(vcpu, lapic_timer_advance_ns); ++ ++ if (delta_tsc > lapic_timer_advance_cycles) ++ delta_tsc -= lapic_timer_advance_cycles; ++ else ++ delta_tsc = 0; ++ ++ /* Convert to host delta tsc if tsc scaling is enabled */ ++ if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio && ++ u64_shl_div_u64(delta_tsc, ++ kvm_tsc_scaling_ratio_frac_bits, ++ vcpu->arch.tsc_scaling_ratio, ++ &delta_tsc)) ++ return -ERANGE; ++ ++ /* ++ * If the delta tsc can't fit in the 32 bit after the multi shift, ++ * we can't use the preemption timer. ++ * It's possible that it fits on later vmentries, but checking ++ * on every vmentry is costly so we just use an hrtimer. ++ */ ++ if (delta_tsc >> (cpu_preemption_timer_multi + 32)) ++ return -ERANGE; ++ ++ vmx->hv_deadline_tsc = tscl + delta_tsc; ++ return delta_tsc == 0; ++} ++ ++static void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu) ++{ ++ to_vmx(vcpu)->hv_deadline_tsc = -1; ++} ++#endif ++ ++static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu) ++{ ++ if (!kvm_pause_in_guest(vcpu->kvm)) ++ shrink_ple_window(vcpu); ++} ++ ++static void vmx_slot_enable_log_dirty(struct kvm *kvm, ++ struct kvm_memory_slot *slot) ++{ ++ kvm_mmu_slot_leaf_clear_dirty(kvm, slot); ++ kvm_mmu_slot_largepage_remove_write_access(kvm, slot); ++} ++ ++static void vmx_slot_disable_log_dirty(struct kvm *kvm, ++ struct kvm_memory_slot *slot) ++{ ++ kvm_mmu_slot_set_dirty(kvm, slot); ++} ++ ++static void vmx_flush_log_dirty(struct kvm *kvm) ++{ ++ kvm_flush_pml_buffers(kvm); ++} ++ ++static int vmx_write_pml_buffer(struct kvm_vcpu *vcpu) ++{ ++ struct vmcs12 *vmcs12; ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ gpa_t gpa; ++ struct page *page = NULL; ++ u64 *pml_address; ++ ++ if (is_guest_mode(vcpu)) { ++ WARN_ON_ONCE(vmx->nested.pml_full); ++ ++ /* ++ * Check if PML is enabled for the nested guest. ++ * Whether eptp bit 6 is set is already checked ++ * as part of A/D emulation. ++ */ ++ vmcs12 = get_vmcs12(vcpu); ++ if (!nested_cpu_has_pml(vmcs12)) ++ return 0; ++ ++ if (vmcs12->guest_pml_index >= PML_ENTITY_NUM) { ++ vmx->nested.pml_full = true; ++ return 1; ++ } ++ ++ gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS) & ~0xFFFull; ++ ++ page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->pml_address); ++ if (is_error_page(page)) ++ return 0; ++ ++ pml_address = kmap(page); ++ pml_address[vmcs12->guest_pml_index--] = gpa; ++ kunmap(page); ++ kvm_release_page_clean(page); ++ } ++ ++ return 0; ++} ++ ++static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm, ++ struct kvm_memory_slot *memslot, ++ gfn_t offset, unsigned long mask) ++{ ++ kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask); ++} ++ ++static void __pi_post_block(struct kvm_vcpu *vcpu) ++{ ++ struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); ++ struct pi_desc old, new; ++ unsigned int dest; ++ ++ do { ++ old.control = new.control = pi_desc->control; ++ WARN(old.nv != POSTED_INTR_WAKEUP_VECTOR, ++ "Wakeup handler not enabled while the VCPU is blocked\n"); ++ ++ dest = cpu_physical_id(vcpu->cpu); ++ ++ if (x2apic_enabled()) ++ new.ndst = dest; ++ else ++ new.ndst = (dest << 8) & 0xFF00; ++ ++ /* set 'NV' to 'notification vector' */ ++ new.nv = POSTED_INTR_VECTOR; ++ } while (cmpxchg64(&pi_desc->control, old.control, ++ new.control) != old.control); ++ ++ if (!WARN_ON_ONCE(vcpu->pre_pcpu == -1)) { ++ spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); ++ list_del(&vcpu->blocked_vcpu_list); ++ spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); ++ vcpu->pre_pcpu = -1; ++ } ++} ++ ++/* ++ * This routine does the following things for vCPU which is going ++ * to be blocked if VT-d PI is enabled. ++ * - Store the vCPU to the wakeup list, so when interrupts happen ++ * we can find the right vCPU to wake up. ++ * - Change the Posted-interrupt descriptor as below: ++ * 'NDST' <-- vcpu->pre_pcpu ++ * 'NV' <-- POSTED_INTR_WAKEUP_VECTOR ++ * - If 'ON' is set during this process, which means at least one ++ * interrupt is posted for this vCPU, we cannot block it, in ++ * this case, return 1, otherwise, return 0. ++ * ++ */ ++static int pi_pre_block(struct kvm_vcpu *vcpu) ++{ ++ unsigned int dest; ++ struct pi_desc old, new; ++ struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); ++ ++ if (!kvm_arch_has_assigned_device(vcpu->kvm) || ++ !irq_remapping_cap(IRQ_POSTING_CAP) || ++ !kvm_vcpu_apicv_active(vcpu)) ++ return 0; ++ ++ WARN_ON(irqs_disabled()); ++ local_irq_disable(); ++ if (!WARN_ON_ONCE(vcpu->pre_pcpu != -1)) { ++ vcpu->pre_pcpu = vcpu->cpu; ++ spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); ++ list_add_tail(&vcpu->blocked_vcpu_list, ++ &per_cpu(blocked_vcpu_on_cpu, ++ vcpu->pre_pcpu)); ++ spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); ++ } ++ ++ do { ++ old.control = new.control = pi_desc->control; ++ ++ WARN((pi_desc->sn == 1), ++ "Warning: SN field of posted-interrupts " ++ "is set before blocking\n"); ++ ++ /* ++ * Since vCPU can be preempted during this process, ++ * vcpu->cpu could be different with pre_pcpu, we ++ * need to set pre_pcpu as the destination of wakeup ++ * notification event, then we can find the right vCPU ++ * to wakeup in wakeup handler if interrupts happen ++ * when the vCPU is in blocked state. ++ */ ++ dest = cpu_physical_id(vcpu->pre_pcpu); ++ ++ if (x2apic_enabled()) ++ new.ndst = dest; ++ else ++ new.ndst = (dest << 8) & 0xFF00; ++ ++ /* set 'NV' to 'wakeup vector' */ ++ new.nv = POSTED_INTR_WAKEUP_VECTOR; ++ } while (cmpxchg64(&pi_desc->control, old.control, ++ new.control) != old.control); ++ ++ /* We should not block the vCPU if an interrupt is posted for it. */ ++ if (pi_test_on(pi_desc) == 1) ++ __pi_post_block(vcpu); ++ ++ local_irq_enable(); ++ return (vcpu->pre_pcpu == -1); ++} ++ ++static int vmx_pre_block(struct kvm_vcpu *vcpu) ++{ ++ if (pi_pre_block(vcpu)) ++ return 1; ++ ++ if (kvm_lapic_hv_timer_in_use(vcpu)) ++ kvm_lapic_switch_to_sw_timer(vcpu); ++ ++ return 0; ++} ++ ++static void pi_post_block(struct kvm_vcpu *vcpu) ++{ ++ if (vcpu->pre_pcpu == -1) ++ return; ++ ++ WARN_ON(irqs_disabled()); ++ local_irq_disable(); ++ __pi_post_block(vcpu); ++ local_irq_enable(); ++} ++ ++static void vmx_post_block(struct kvm_vcpu *vcpu) ++{ ++ if (kvm_x86_ops->set_hv_timer) ++ kvm_lapic_switch_to_hv_timer(vcpu); ++ ++ pi_post_block(vcpu); ++} ++ ++/* ++ * vmx_update_pi_irte - set IRTE for Posted-Interrupts ++ * ++ * @kvm: kvm ++ * @host_irq: host irq of the interrupt ++ * @guest_irq: gsi of the interrupt ++ * @set: set or unset PI ++ * returns 0 on success, < 0 on failure ++ */ ++static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq, ++ uint32_t guest_irq, bool set) ++{ ++ struct kvm_kernel_irq_routing_entry *e; ++ struct kvm_irq_routing_table *irq_rt; ++ struct kvm_lapic_irq irq; ++ struct kvm_vcpu *vcpu; ++ struct vcpu_data vcpu_info; ++ int idx, ret = 0; ++ ++ if (!kvm_arch_has_assigned_device(kvm) || ++ !irq_remapping_cap(IRQ_POSTING_CAP) || ++ !kvm_vcpu_apicv_active(kvm->vcpus[0])) ++ return 0; ++ ++ idx = srcu_read_lock(&kvm->irq_srcu); ++ irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); ++ if (guest_irq >= irq_rt->nr_rt_entries || ++ hlist_empty(&irq_rt->map[guest_irq])) { ++ pr_warn_once("no route for guest_irq %u/%u (broken user space?)\n", ++ guest_irq, irq_rt->nr_rt_entries); ++ goto out; ++ } ++ ++ hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) { ++ if (e->type != KVM_IRQ_ROUTING_MSI) ++ continue; ++ /* ++ * VT-d PI cannot support posting multicast/broadcast ++ * interrupts to a vCPU, we still use interrupt remapping ++ * for these kind of interrupts. ++ * ++ * For lowest-priority interrupts, we only support ++ * those with single CPU as the destination, e.g. user ++ * configures the interrupts via /proc/irq or uses ++ * irqbalance to make the interrupts single-CPU. ++ * ++ * We will support full lowest-priority interrupt later. ++ */ ++ ++ kvm_set_msi_irq(kvm, e, &irq); ++ if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) { ++ /* ++ * Make sure the IRTE is in remapped mode if ++ * we don't handle it in posted mode. ++ */ ++ ret = irq_set_vcpu_affinity(host_irq, NULL); ++ if (ret < 0) { ++ printk(KERN_INFO ++ "failed to back to remapped mode, irq: %u\n", ++ host_irq); ++ goto out; ++ } ++ ++ continue; ++ } ++ ++ vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu)); ++ vcpu_info.vector = irq.vector; ++ ++ trace_kvm_pi_irte_update(host_irq, vcpu->vcpu_id, e->gsi, ++ vcpu_info.vector, vcpu_info.pi_desc_addr, set); ++ ++ if (set) ++ ret = irq_set_vcpu_affinity(host_irq, &vcpu_info); ++ else ++ ret = irq_set_vcpu_affinity(host_irq, NULL); ++ ++ if (ret < 0) { ++ printk(KERN_INFO "%s: failed to update PI IRTE\n", ++ __func__); ++ goto out; ++ } ++ } ++ ++ ret = 0; ++out: ++ srcu_read_unlock(&kvm->irq_srcu, idx); ++ return ret; ++} ++ ++static void vmx_setup_mce(struct kvm_vcpu *vcpu) ++{ ++ if (vcpu->arch.mcg_cap & MCG_LMCE_P) ++ to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |= ++ FEATURE_CONTROL_LMCE; ++ else ++ to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &= ++ ~FEATURE_CONTROL_LMCE; ++} ++ ++static int vmx_smi_allowed(struct kvm_vcpu *vcpu) ++{ ++ /* we need a nested vmexit to enter SMM, postpone if run is pending */ ++ if (to_vmx(vcpu)->nested.nested_run_pending) ++ return 0; ++ return 1; ++} ++ ++static int vmx_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate) ++{ ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ ++ vmx->nested.smm.guest_mode = is_guest_mode(vcpu); ++ if (vmx->nested.smm.guest_mode) ++ nested_vmx_vmexit(vcpu, -1, 0, 0); ++ ++ vmx->nested.smm.vmxon = vmx->nested.vmxon; ++ vmx->nested.vmxon = false; ++ vmx_clear_hlt(vcpu); ++ return 0; ++} ++ ++static int vmx_pre_leave_smm(struct kvm_vcpu *vcpu, u64 smbase) ++{ ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ int ret; ++ ++ if (vmx->nested.smm.vmxon) { ++ vmx->nested.vmxon = true; ++ vmx->nested.smm.vmxon = false; ++ } ++ ++ if (vmx->nested.smm.guest_mode) { ++ vcpu->arch.hflags &= ~HF_SMM_MASK; ++ ret = enter_vmx_non_root_mode(vcpu, NULL); ++ vcpu->arch.hflags |= HF_SMM_MASK; ++ if (ret) ++ return ret; ++ ++ vmx->nested.smm.guest_mode = false; ++ } ++ return 0; ++} ++ ++static int enable_smi_window(struct kvm_vcpu *vcpu) ++{ ++ return 0; ++} ++ ++static int vmx_get_nested_state(struct kvm_vcpu *vcpu, ++ struct kvm_nested_state __user *user_kvm_nested_state, ++ u32 user_data_size) ++{ ++ struct vcpu_vmx *vmx; ++ struct vmcs12 *vmcs12; ++ struct kvm_nested_state kvm_state = { ++ .flags = 0, ++ .format = 0, ++ .size = sizeof(kvm_state), ++ .vmx.vmxon_pa = -1ull, ++ .vmx.vmcs_pa = -1ull, ++ }; ++ ++ if (!vcpu) ++ return kvm_state.size + 2 * VMCS12_SIZE; ++ ++ vmx = to_vmx(vcpu); ++ vmcs12 = get_vmcs12(vcpu); ++ if (nested_vmx_allowed(vcpu) && ++ (vmx->nested.vmxon || vmx->nested.smm.vmxon)) { ++ kvm_state.vmx.vmxon_pa = vmx->nested.vmxon_ptr; ++ kvm_state.vmx.vmcs_pa = vmx->nested.current_vmptr; ++ ++ if (vmx->nested.current_vmptr != -1ull) { ++ kvm_state.size += VMCS12_SIZE; ++ ++ if (is_guest_mode(vcpu) && ++ nested_cpu_has_shadow_vmcs(vmcs12) && ++ vmcs12->vmcs_link_pointer != -1ull) ++ kvm_state.size += VMCS12_SIZE; ++ } ++ ++ if (vmx->nested.smm.vmxon) ++ kvm_state.vmx.smm.flags |= KVM_STATE_NESTED_SMM_VMXON; ++ ++ if (vmx->nested.smm.guest_mode) ++ kvm_state.vmx.smm.flags |= KVM_STATE_NESTED_SMM_GUEST_MODE; ++ ++ if (is_guest_mode(vcpu)) { ++ kvm_state.flags |= KVM_STATE_NESTED_GUEST_MODE; ++ ++ if (vmx->nested.nested_run_pending) ++ kvm_state.flags |= KVM_STATE_NESTED_RUN_PENDING; ++ } ++ } ++ ++ if (user_data_size < kvm_state.size) ++ goto out; ++ ++ if (copy_to_user(user_kvm_nested_state, &kvm_state, sizeof(kvm_state))) ++ return -EFAULT; ++ ++ if (vmx->nested.current_vmptr == -1ull) ++ goto out; ++ ++ /* ++ * When running L2, the authoritative vmcs12 state is in the ++ * vmcs02. When running L1, the authoritative vmcs12 state is ++ * in the shadow vmcs linked to vmcs01, unless ++ * sync_shadow_vmcs is set, in which case, the authoritative ++ * vmcs12 state is in the vmcs12 already. ++ */ ++ if (is_guest_mode(vcpu)) ++ sync_vmcs12(vcpu, vmcs12); ++ else if (enable_shadow_vmcs && !vmx->nested.sync_shadow_vmcs) ++ copy_shadow_to_vmcs12(vmx); ++ ++ /* ++ * Copy over the full allocated size of vmcs12 rather than just the size ++ * of the struct. ++ */ ++ if (copy_to_user(user_kvm_nested_state->data, vmcs12, VMCS12_SIZE)) ++ return -EFAULT; ++ ++ if (nested_cpu_has_shadow_vmcs(vmcs12) && ++ vmcs12->vmcs_link_pointer != -1ull) { ++ if (copy_to_user(user_kvm_nested_state->data + VMCS12_SIZE, ++ get_shadow_vmcs12(vcpu), VMCS12_SIZE)) ++ return -EFAULT; ++ } ++ ++out: ++ return kvm_state.size; ++} ++ ++static int vmx_set_nested_state(struct kvm_vcpu *vcpu, ++ struct kvm_nested_state __user *user_kvm_nested_state, ++ struct kvm_nested_state *kvm_state) ++{ ++ struct vcpu_vmx *vmx = to_vmx(vcpu); ++ struct vmcs12 *vmcs12; ++ u32 exit_qual; ++ int ret; ++ ++ if (kvm_state->format != 0) ++ return -EINVAL; ++ ++ if (!nested_vmx_allowed(vcpu)) ++ return kvm_state->vmx.vmxon_pa == -1ull ? 0 : -EINVAL; ++ ++ if (kvm_state->vmx.vmxon_pa == -1ull) { ++ if (kvm_state->vmx.smm.flags) ++ return -EINVAL; ++ ++ if (kvm_state->vmx.vmcs_pa != -1ull) ++ return -EINVAL; ++ ++ vmx_leave_nested(vcpu); ++ return 0; ++ } ++ ++ if (!page_address_valid(vcpu, kvm_state->vmx.vmxon_pa)) ++ return -EINVAL; ++ ++ if ((kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) && ++ (kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE)) ++ return -EINVAL; ++ ++ if (kvm_state->vmx.smm.flags & ++ ~(KVM_STATE_NESTED_SMM_GUEST_MODE | KVM_STATE_NESTED_SMM_VMXON)) ++ return -EINVAL; ++ ++ /* ++ * SMM temporarily disables VMX, so we cannot be in guest mode, ++ * nor can VMLAUNCH/VMRESUME be pending. Outside SMM, SMM flags ++ * must be zero. ++ */ ++ if (is_smm(vcpu) ? kvm_state->flags : kvm_state->vmx.smm.flags) ++ return -EINVAL; ++ ++ if ((kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) && ++ !(kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON)) ++ return -EINVAL; ++ ++ vmx_leave_nested(vcpu); ++ if (kvm_state->vmx.vmxon_pa == -1ull) ++ return 0; ++ ++ vmx->nested.vmxon_ptr = kvm_state->vmx.vmxon_pa; ++ ret = enter_vmx_operation(vcpu); ++ if (ret) ++ return ret; ++ ++ /* Empty 'VMXON' state is permitted */ ++ if (kvm_state->size < sizeof(*kvm_state) + sizeof(*vmcs12)) ++ return 0; ++ ++ if (kvm_state->vmx.vmcs_pa == kvm_state->vmx.vmxon_pa || ++ !page_address_valid(vcpu, kvm_state->vmx.vmcs_pa)) ++ return -EINVAL; ++ ++ set_current_vmptr(vmx, kvm_state->vmx.vmcs_pa); ++ ++ if (kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON) { ++ vmx->nested.smm.vmxon = true; ++ vmx->nested.vmxon = false; ++ ++ if (kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) ++ vmx->nested.smm.guest_mode = true; ++ } ++ ++ vmcs12 = get_vmcs12(vcpu); ++ if (copy_from_user(vmcs12, user_kvm_nested_state->data, sizeof(*vmcs12))) ++ return -EFAULT; ++ ++ if (vmcs12->hdr.revision_id != VMCS12_REVISION) ++ return -EINVAL; ++ ++ if (!(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE)) ++ return 0; ++ ++ vmx->nested.nested_run_pending = ++ !!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING); ++ ++ if (nested_cpu_has_shadow_vmcs(vmcs12) && ++ vmcs12->vmcs_link_pointer != -1ull) { ++ struct vmcs12 *shadow_vmcs12 = get_shadow_vmcs12(vcpu); ++ if (kvm_state->size < sizeof(*kvm_state) + 2 * sizeof(*vmcs12)) ++ return -EINVAL; ++ ++ if (copy_from_user(shadow_vmcs12, ++ user_kvm_nested_state->data + VMCS12_SIZE, ++ sizeof(*vmcs12))) ++ return -EFAULT; ++ ++ if (shadow_vmcs12->hdr.revision_id != VMCS12_REVISION || ++ !shadow_vmcs12->hdr.shadow_vmcs) ++ return -EINVAL; ++ } ++ ++ if (check_vmentry_prereqs(vcpu, vmcs12) || ++ check_vmentry_postreqs(vcpu, vmcs12, &exit_qual)) ++ return -EINVAL; ++ ++ vmx->nested.dirty_vmcs12 = true; ++ ret = enter_vmx_non_root_mode(vcpu, NULL); ++ if (ret) ++ return -EINVAL; ++ ++ return 0; ++} ++ ++static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { ++ .cpu_has_kvm_support = cpu_has_kvm_support, ++ .disabled_by_bios = vmx_disabled_by_bios, ++ .hardware_setup = hardware_setup, ++ .hardware_unsetup = hardware_unsetup, ++ .check_processor_compatibility = vmx_check_processor_compat, ++ .hardware_enable = hardware_enable, ++ .hardware_disable = hardware_disable, ++ .cpu_has_accelerated_tpr = report_flexpriority, ++ .has_emulated_msr = vmx_has_emulated_msr, ++ ++ .vm_init = vmx_vm_init, ++ .vm_alloc = vmx_vm_alloc, ++ .vm_free = vmx_vm_free, ++ ++ .vcpu_create = vmx_create_vcpu, ++ .vcpu_free = vmx_free_vcpu, ++ .vcpu_reset = vmx_vcpu_reset, ++ ++ .prepare_guest_switch = vmx_prepare_switch_to_guest, ++ .vcpu_load = vmx_vcpu_load, ++ .vcpu_put = vmx_vcpu_put, ++ ++ .update_bp_intercept = update_exception_bitmap, ++ .get_msr_feature = vmx_get_msr_feature, ++ .get_msr = vmx_get_msr, ++ .set_msr = vmx_set_msr, ++ .get_segment_base = vmx_get_segment_base, ++ .get_segment = vmx_get_segment, ++ .set_segment = vmx_set_segment, ++ .get_cpl = vmx_get_cpl, ++ .get_cs_db_l_bits = vmx_get_cs_db_l_bits, ++ .decache_cr0_guest_bits = vmx_decache_cr0_guest_bits, ++ .decache_cr3 = vmx_decache_cr3, ++ .decache_cr4_guest_bits = vmx_decache_cr4_guest_bits, ++ .set_cr0 = vmx_set_cr0, ++ .set_cr3 = vmx_set_cr3, ++ .set_cr4 = vmx_set_cr4, ++ .set_efer = vmx_set_efer, ++ .get_idt = vmx_get_idt, ++ .set_idt = vmx_set_idt, ++ .get_gdt = vmx_get_gdt, ++ .set_gdt = vmx_set_gdt, ++ .get_dr6 = vmx_get_dr6, ++ .set_dr6 = vmx_set_dr6, ++ .set_dr7 = vmx_set_dr7, ++ .sync_dirty_debug_regs = vmx_sync_dirty_debug_regs, ++ .cache_reg = vmx_cache_reg, ++ .get_rflags = vmx_get_rflags, ++ .set_rflags = vmx_set_rflags, ++ ++ .tlb_flush = vmx_flush_tlb, ++ .tlb_flush_gva = vmx_flush_tlb_gva, ++ ++ .run = vmx_vcpu_run, ++ .handle_exit = vmx_handle_exit, ++ .skip_emulated_instruction = skip_emulated_instruction, ++ .set_interrupt_shadow = vmx_set_interrupt_shadow, ++ .get_interrupt_shadow = vmx_get_interrupt_shadow, ++ .patch_hypercall = vmx_patch_hypercall, ++ .set_irq = vmx_inject_irq, ++ .set_nmi = vmx_inject_nmi, ++ .queue_exception = vmx_queue_exception, ++ .cancel_injection = vmx_cancel_injection, ++ .interrupt_allowed = vmx_interrupt_allowed, ++ .nmi_allowed = vmx_nmi_allowed, ++ .get_nmi_mask = vmx_get_nmi_mask, ++ .set_nmi_mask = vmx_set_nmi_mask, ++ .enable_nmi_window = enable_nmi_window, ++ .enable_irq_window = enable_irq_window, ++ .update_cr8_intercept = update_cr8_intercept, ++ .set_virtual_apic_mode = vmx_set_virtual_apic_mode, ++ .set_apic_access_page_addr = vmx_set_apic_access_page_addr, ++ .get_enable_apicv = vmx_get_enable_apicv, ++ .refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl, ++ .load_eoi_exitmap = vmx_load_eoi_exitmap, ++ .apicv_post_state_restore = vmx_apicv_post_state_restore, ++ .hwapic_irr_update = vmx_hwapic_irr_update, ++ .hwapic_isr_update = vmx_hwapic_isr_update, ++ .guest_apic_has_interrupt = vmx_guest_apic_has_interrupt, ++ .sync_pir_to_irr = vmx_sync_pir_to_irr, ++ .deliver_posted_interrupt = vmx_deliver_posted_interrupt, ++ .dy_apicv_has_pending_interrupt = vmx_dy_apicv_has_pending_interrupt, ++ ++ .set_tss_addr = vmx_set_tss_addr, ++ .set_identity_map_addr = vmx_set_identity_map_addr, ++ .get_tdp_level = get_ept_level, ++ .get_mt_mask = vmx_get_mt_mask, ++ ++ .get_exit_info = vmx_get_exit_info, ++ ++ .get_lpage_level = vmx_get_lpage_level, ++ ++ .cpuid_update = vmx_cpuid_update, ++ ++ .rdtscp_supported = vmx_rdtscp_supported, ++ .invpcid_supported = vmx_invpcid_supported, ++ ++ .set_supported_cpuid = vmx_set_supported_cpuid, ++ ++ .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit, ++ ++ .read_l1_tsc_offset = vmx_read_l1_tsc_offset, ++ .write_l1_tsc_offset = vmx_write_l1_tsc_offset, ++ ++ .set_tdp_cr3 = vmx_set_cr3, ++ ++ .check_intercept = vmx_check_intercept, ++ .handle_external_intr = vmx_handle_external_intr, ++ .mpx_supported = vmx_mpx_supported, ++ .xsaves_supported = vmx_xsaves_supported, ++ .umip_emulated = vmx_umip_emulated, ++ ++ .check_nested_events = vmx_check_nested_events, ++ .request_immediate_exit = vmx_request_immediate_exit, ++ ++ .sched_in = vmx_sched_in, ++ ++ .slot_enable_log_dirty = vmx_slot_enable_log_dirty, ++ .slot_disable_log_dirty = vmx_slot_disable_log_dirty, ++ .flush_log_dirty = vmx_flush_log_dirty, ++ .enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked, ++ .write_log_dirty = vmx_write_pml_buffer, ++ ++ .pre_block = vmx_pre_block, ++ .post_block = vmx_post_block, ++ ++ .pmu_ops = &intel_pmu_ops, ++ ++ .update_pi_irte = vmx_update_pi_irte, ++ ++#ifdef CONFIG_X86_64 ++ .set_hv_timer = vmx_set_hv_timer, ++ .cancel_hv_timer = vmx_cancel_hv_timer, ++#endif ++ ++ .setup_mce = vmx_setup_mce, ++ ++ .get_nested_state = vmx_get_nested_state, ++ .set_nested_state = vmx_set_nested_state, ++ .get_vmcs12_pages = nested_get_vmcs12_pages, ++ ++ .smi_allowed = vmx_smi_allowed, ++ .pre_enter_smm = vmx_pre_enter_smm, ++ .pre_leave_smm = vmx_pre_leave_smm, ++ .enable_smi_window = enable_smi_window, ++}; ++ ++static void vmx_cleanup_l1d_flush(void) ++{ ++ if (vmx_l1d_flush_pages) { ++ free_pages((unsigned long)vmx_l1d_flush_pages, L1D_CACHE_ORDER); ++ vmx_l1d_flush_pages = NULL; ++ } ++ /* Restore state so sysfs ignores VMX */ ++ l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_AUTO; ++} ++ ++static void vmx_exit(void) ++{ ++#ifdef CONFIG_KEXEC_CORE ++ RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL); ++ synchronize_rcu(); ++#endif ++ ++ kvm_exit(); ++ ++#if IS_ENABLED(CONFIG_HYPERV) ++ if (static_branch_unlikely(&enable_evmcs)) { ++ int cpu; ++ struct hv_vp_assist_page *vp_ap; ++ /* ++ * Reset everything to support using non-enlightened VMCS ++ * access later (e.g. when we reload the module with ++ * enlightened_vmcs=0) ++ */ ++ for_each_online_cpu(cpu) { ++ vp_ap = hv_get_vp_assist_page(cpu); ++ ++ if (!vp_ap) ++ continue; ++ ++ vp_ap->current_nested_vmcs = 0; ++ vp_ap->enlighten_vmentry = 0; ++ } ++ ++ static_branch_disable(&enable_evmcs); ++ } ++#endif ++ vmx_cleanup_l1d_flush(); ++} ++module_exit(vmx_exit); ++ ++static int __init vmx_init(void) ++{ ++ int r; ++ ++#if IS_ENABLED(CONFIG_HYPERV) ++ /* ++ * Enlightened VMCS usage should be recommended and the host needs ++ * to support eVMCS v1 or above. We can also disable eVMCS support ++ * with module parameter. ++ */ ++ if (enlightened_vmcs && ++ ms_hyperv.hints & HV_X64_ENLIGHTENED_VMCS_RECOMMENDED && ++ (ms_hyperv.nested_features & HV_X64_ENLIGHTENED_VMCS_VERSION) >= ++ KVM_EVMCS_VERSION) { ++ int cpu; ++ ++ /* Check that we have assist pages on all online CPUs */ ++ for_each_online_cpu(cpu) { ++ if (!hv_get_vp_assist_page(cpu)) { ++ enlightened_vmcs = false; ++ break; ++ } ++ } ++ ++ if (enlightened_vmcs) { ++ pr_info("KVM: vmx: using Hyper-V Enlightened VMCS\n"); ++ static_branch_enable(&enable_evmcs); ++ } ++ } else { ++ enlightened_vmcs = false; ++ } ++#endif ++ ++ r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), ++ __alignof__(struct vcpu_vmx), THIS_MODULE); ++ if (r) ++ return r; ++ ++ /* ++ * Must be called after kvm_init() so enable_ept is properly set ++ * up. Hand the parameter mitigation value in which was stored in ++ * the pre module init parser. If no parameter was given, it will ++ * contain 'auto' which will be turned into the default 'cond' ++ * mitigation mode. ++ */ ++ if (boot_cpu_has(X86_BUG_L1TF)) { ++ r = vmx_setup_l1d_flush(vmentry_l1d_flush_param); ++ if (r) { ++ vmx_exit(); ++ return r; ++ } ++ } ++ ++#ifdef CONFIG_KEXEC_CORE ++ rcu_assign_pointer(crash_vmclear_loaded_vmcss, ++ crash_vmclear_local_loaded_vmcss); ++#endif ++ vmx_check_vmcs12_offsets(); ++ ++ return 0; ++} ++module_init(vmx_init); +diff -uprN kernel/arch/x86/kvm/x86.c kernel_new/arch/x86/kvm/x86.c +--- kernel/arch/x86/kvm/x86.c 2020-12-21 21:59:17.000000000 +0800 ++++ kernel_new/arch/x86/kvm/x86.c 2021-04-01 18:28:07.659863283 +0800 +@@ -42,6 +42,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -160,6 +161,7 @@ struct kvm_shared_msrs_global { + struct kvm_shared_msrs { + struct user_return_notifier urn; + bool registered; ++ bool dirty; + struct kvm_shared_msr_values { + u64 host; + u64 curr; +@@ -228,12 +230,31 @@ static inline void kvm_async_pf_hash_res + vcpu->arch.apf.gfns[i] = ~0; + } + ++static void kvm_restore_shared_msrs(struct kvm_shared_msrs *locals) ++{ ++ struct kvm_shared_msr_values *values; ++ unsigned long flags; ++ unsigned int slot; ++ ++ flags = hard_cond_local_irq_save(); ++ if (locals->dirty) { ++ for (slot = 0; slot < shared_msrs_global.nr; ++slot) { ++ values = &locals->values[slot]; ++ if (values->host != values->curr) { ++ wrmsrl(shared_msrs_global.msrs[slot], ++ values->host); ++ values->curr = values->host; ++ } ++ } ++ locals->dirty = false; ++ } ++ hard_cond_local_irq_restore(flags); ++} ++ + static void kvm_on_user_return(struct user_return_notifier *urn) + { +- unsigned slot; + struct kvm_shared_msrs *locals + = container_of(urn, struct kvm_shared_msrs, urn); +- struct kvm_shared_msr_values *values; + unsigned long flags; + + /* +@@ -246,13 +267,8 @@ static void kvm_on_user_return(struct us + user_return_notifier_unregister(urn); + } + local_irq_restore(flags); +- for (slot = 0; slot < shared_msrs_global.nr; ++slot) { +- values = &locals->values[slot]; +- if (values->host != values->curr) { +- wrmsrl(shared_msrs_global.msrs[slot], values->host); +- values->curr = values->host; +- } +- } ++ kvm_restore_shared_msrs(locals); ++ __ipipe_exit_vm(); + } + + static void shared_msr_update(unsigned slot, u32 msr) +@@ -302,6 +318,7 @@ int kvm_set_shared_msr(unsigned slot, u6 + if (err) + return 1; + ++ smsr->dirty = true; + smsr->values[slot].curr = value; + if (!smsr->registered) { + smsr->urn.on_user_return = kvm_on_user_return; +@@ -3252,11 +3269,25 @@ static void kvm_steal_time_set_preempted + + void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) + { ++ unsigned int cpu = smp_processor_id(); ++ struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu); ++ unsigned long flags; + int idx; + + if (vcpu->preempted) + vcpu->arch.preempted_in_kernel = !kvm_x86_ops->get_cpl(vcpu); + ++ flags = hard_cond_local_irq_save(); ++ ++ /* ++ * Do not update steal time accounting while running over the head ++ * domain as this may introduce high latencies and will also issue ++ * context violation reports. The code will be executed when kvm does ++ * the regular kvm_arch_vcpu_put, after returning from the head domain. ++ */ ++ if (!ipipe_root_p) ++ goto skip_steal_time_update; ++ + /* + * Disable page faults because we're in atomic context here. + * kvm_write_guest_offset_cached() would call might_fault() +@@ -3274,6 +3305,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu * + kvm_steal_time_set_preempted(vcpu); + srcu_read_unlock(&vcpu->kvm->srcu, idx); + pagefault_enable(); ++skip_steal_time_update: + kvm_x86_ops->vcpu_put(vcpu); + vcpu->arch.last_host_tsc = rdtsc(); + /* +@@ -3282,7 +3314,42 @@ void kvm_arch_vcpu_put(struct kvm_vcpu * + * guest. do_debug expects dr6 to be cleared after it runs, do the same. + */ + set_debugreg(0, 6); ++ ++#ifdef CONFIG_IPIPE ++ vcpu->ipipe_put_vcpu = false; ++ if (!smsr->dirty) ++ __ipipe_exit_vm(); ++#endif ++ ++ hard_cond_local_irq_restore(flags); ++} ++ ++#ifdef CONFIG_IPIPE ++ ++void __ipipe_handle_vm_preemption(struct ipipe_vm_notifier *nfy) ++{ ++ unsigned int cpu = raw_smp_processor_id(); ++ struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu); ++ struct kvm_vcpu *vcpu; ++ ++ vcpu = container_of(nfy, struct kvm_vcpu, ipipe_notifier); ++ ++ /* ++ * We may leave kvm_arch_vcpu_put with the ipipe notifier still ++ * registered in case shared MSRs are still active. If a VM preemption ++ * hits us after that point but before the user return notifier fired, ++ * we may run kvm_arch_vcpu_put again from here. Do not rely on this ++ * being harmless and rather use a flag to decide if the run is needed. ++ */ ++ if (vcpu->ipipe_put_vcpu) ++ kvm_arch_vcpu_put(vcpu); ++ ++ kvm_restore_shared_msrs(smsr); ++ __ipipe_exit_vm(); + } ++EXPORT_SYMBOL_GPL(__ipipe_handle_vm_preemption); ++ ++#endif + + static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, + struct kvm_lapic_state *s) +@@ -7682,6 +7749,13 @@ static int vcpu_enter_guest(struct kvm_v + } + + preempt_disable(); ++ local_irq_disable(); ++ hard_cond_local_irq_disable(); ++ ++#ifdef CONFIG_IPIPE ++ __ipipe_enter_vm(&vcpu->ipipe_notifier); ++ vcpu->ipipe_put_vcpu = true; ++#endif + + kvm_x86_ops->prepare_guest_switch(vcpu); + +@@ -7690,7 +7764,6 @@ static int vcpu_enter_guest(struct kvm_v + * IPI are then delayed after guest entry, which ensures that they + * result in virtual interrupt delivery. + */ +- local_irq_disable(); + vcpu->mode = IN_GUEST_MODE; + + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); +@@ -7720,6 +7793,7 @@ static int vcpu_enter_guest(struct kvm_v + || need_resched() || signal_pending(current)) { + vcpu->mode = OUTSIDE_GUEST_MODE; + smp_wmb(); ++ hard_cond_local_irq_enable(); + local_irq_enable(); + preempt_enable(); + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); +@@ -7787,6 +7861,7 @@ static int vcpu_enter_guest(struct kvm_v + + guest_exit_irqoff(); + ++ hard_cond_local_irq_enable(); + local_irq_enable(); + preempt_enable(); + +@@ -8608,6 +8683,9 @@ struct kvm_vcpu *kvm_arch_vcpu_create(st + "guest TSC will not be reliable\n"); + + vcpu = kvm_x86_ops->vcpu_create(kvm, id); ++#ifdef CONFIG_IPIPE ++ vcpu->ipipe_notifier.handler = __ipipe_handle_vm_preemption; ++#endif + + return vcpu; + } +diff -uprN kernel/arch/x86/kvm/x86.c.orig kernel_new/arch/x86/kvm/x86.c.orig +--- kernel/arch/x86/kvm/x86.c.orig 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/arch/x86/kvm/x86.c.orig 2020-12-21 21:59:17.000000000 +0800 +@@ -0,0 +1,9763 @@ ++/* ++ * Kernel-based Virtual Machine driver for Linux ++ * ++ * derived from drivers/kvm/kvm_main.c ++ * ++ * Copyright (C) 2006 Qumranet, Inc. ++ * Copyright (C) 2008 Qumranet, Inc. ++ * Copyright IBM Corporation, 2008 ++ * Copyright 2010 Red Hat, Inc. and/or its affiliates. ++ * ++ * Authors: ++ * Avi Kivity ++ * Yaniv Kamay ++ * Amit Shah ++ * Ben-Ami Yassour ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2. See ++ * the COPYING file in the top-level directory. ++ * ++ */ ++ ++#include ++#include "irq.h" ++#include "mmu.h" ++#include "i8254.h" ++#include "tss.h" ++#include "kvm_cache_regs.h" ++#include "x86.h" ++#include "cpuid.h" ++#include "pmu.h" ++#include "hyperv.h" ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include /* Ugh! */ ++#include ++#include ++#include ++#include ++#include ++ ++#define CREATE_TRACE_POINTS ++#include "trace.h" ++ ++#define MAX_IO_MSRS 256 ++#define KVM_MAX_MCE_BANKS 32 ++u64 __read_mostly kvm_mce_cap_supported = MCG_CTL_P | MCG_SER_P; ++EXPORT_SYMBOL_GPL(kvm_mce_cap_supported); ++ ++#define emul_to_vcpu(ctxt) \ ++ container_of(ctxt, struct kvm_vcpu, arch.emulate_ctxt) ++ ++/* EFER defaults: ++ * - enable syscall per default because its emulated by KVM ++ * - enable LME and LMA per default on 64 bit KVM ++ */ ++#ifdef CONFIG_X86_64 ++static ++u64 __read_mostly efer_reserved_bits = ~((u64)(EFER_SCE | EFER_LME | EFER_LMA)); ++#else ++static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE); ++#endif ++ ++#define VM_STAT(x, ...) offsetof(struct kvm, stat.x), KVM_STAT_VM, ## __VA_ARGS__ ++#define VCPU_STAT(x, ...) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU, ## __VA_ARGS__ ++ ++#define KVM_X2APIC_API_VALID_FLAGS (KVM_X2APIC_API_USE_32BIT_IDS | \ ++ KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK) ++ ++static void update_cr8_intercept(struct kvm_vcpu *vcpu); ++static void process_nmi(struct kvm_vcpu *vcpu); ++static void enter_smm(struct kvm_vcpu *vcpu); ++static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags); ++static void store_regs(struct kvm_vcpu *vcpu); ++static int sync_regs(struct kvm_vcpu *vcpu); ++ ++struct kvm_x86_ops *kvm_x86_ops __read_mostly; ++EXPORT_SYMBOL_GPL(kvm_x86_ops); ++ ++static bool __read_mostly ignore_msrs = 0; ++module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR); ++ ++static bool __read_mostly report_ignored_msrs = true; ++module_param(report_ignored_msrs, bool, S_IRUGO | S_IWUSR); ++ ++unsigned int min_timer_period_us = 200; ++module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR); ++ ++static bool __read_mostly kvmclock_periodic_sync = true; ++module_param(kvmclock_periodic_sync, bool, S_IRUGO); ++ ++bool __read_mostly kvm_has_tsc_control; ++EXPORT_SYMBOL_GPL(kvm_has_tsc_control); ++u32 __read_mostly kvm_max_guest_tsc_khz; ++EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz); ++u8 __read_mostly kvm_tsc_scaling_ratio_frac_bits; ++EXPORT_SYMBOL_GPL(kvm_tsc_scaling_ratio_frac_bits); ++u64 __read_mostly kvm_max_tsc_scaling_ratio; ++EXPORT_SYMBOL_GPL(kvm_max_tsc_scaling_ratio); ++u64 __read_mostly kvm_default_tsc_scaling_ratio; ++EXPORT_SYMBOL_GPL(kvm_default_tsc_scaling_ratio); ++ ++/* tsc tolerance in parts per million - default to 1/2 of the NTP threshold */ ++static u32 __read_mostly tsc_tolerance_ppm = 250; ++module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR); ++ ++/* lapic timer advance (tscdeadline mode only) in nanoseconds */ ++unsigned int __read_mostly lapic_timer_advance_ns = 0; ++module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR); ++EXPORT_SYMBOL_GPL(lapic_timer_advance_ns); ++ ++static bool __read_mostly vector_hashing = true; ++module_param(vector_hashing, bool, S_IRUGO); ++ ++bool __read_mostly enable_vmware_backdoor = false; ++module_param(enable_vmware_backdoor, bool, S_IRUGO); ++EXPORT_SYMBOL_GPL(enable_vmware_backdoor); ++ ++static bool __read_mostly force_emulation_prefix = false; ++module_param(force_emulation_prefix, bool, S_IRUGO); ++ ++#define KVM_NR_SHARED_MSRS 16 ++ ++struct kvm_shared_msrs_global { ++ int nr; ++ u32 msrs[KVM_NR_SHARED_MSRS]; ++}; ++ ++struct kvm_shared_msrs { ++ struct user_return_notifier urn; ++ bool registered; ++ struct kvm_shared_msr_values { ++ u64 host; ++ u64 curr; ++ } values[KVM_NR_SHARED_MSRS]; ++}; ++ ++static struct kvm_shared_msrs_global __read_mostly shared_msrs_global; ++static struct kvm_shared_msrs __percpu *shared_msrs; ++ ++struct kvm_stats_debugfs_item debugfs_entries[] = { ++ { "pf_fixed", VCPU_STAT(pf_fixed) }, ++ { "pf_guest", VCPU_STAT(pf_guest) }, ++ { "tlb_flush", VCPU_STAT(tlb_flush) }, ++ { "invlpg", VCPU_STAT(invlpg) }, ++ { "exits", VCPU_STAT(exits) }, ++ { "io_exits", VCPU_STAT(io_exits) }, ++ { "mmio_exits", VCPU_STAT(mmio_exits) }, ++ { "signal_exits", VCPU_STAT(signal_exits) }, ++ { "irq_window", VCPU_STAT(irq_window_exits) }, ++ { "nmi_window", VCPU_STAT(nmi_window_exits) }, ++ { "halt_exits", VCPU_STAT(halt_exits) }, ++ { "halt_successful_poll", VCPU_STAT(halt_successful_poll) }, ++ { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) }, ++ { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) }, ++ { "halt_wakeup", VCPU_STAT(halt_wakeup) }, ++ { "hypercalls", VCPU_STAT(hypercalls) }, ++ { "request_irq", VCPU_STAT(request_irq_exits) }, ++ { "irq_exits", VCPU_STAT(irq_exits) }, ++ { "host_state_reload", VCPU_STAT(host_state_reload) }, ++ { "fpu_reload", VCPU_STAT(fpu_reload) }, ++ { "insn_emulation", VCPU_STAT(insn_emulation) }, ++ { "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) }, ++ { "irq_injections", VCPU_STAT(irq_injections) }, ++ { "nmi_injections", VCPU_STAT(nmi_injections) }, ++ { "req_event", VCPU_STAT(req_event) }, ++ { "l1d_flush", VCPU_STAT(l1d_flush) }, ++ { "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) }, ++ { "mmu_pte_write", VM_STAT(mmu_pte_write) }, ++ { "mmu_pte_updated", VM_STAT(mmu_pte_updated) }, ++ { "mmu_pde_zapped", VM_STAT(mmu_pde_zapped) }, ++ { "mmu_flooded", VM_STAT(mmu_flooded) }, ++ { "mmu_recycled", VM_STAT(mmu_recycled) }, ++ { "mmu_cache_miss", VM_STAT(mmu_cache_miss) }, ++ { "mmu_unsync", VM_STAT(mmu_unsync) }, ++ { "remote_tlb_flush", VM_STAT(remote_tlb_flush) }, ++ { "largepages", VM_STAT(lpages, .mode = 0444) }, ++ { "nx_largepages_splitted", VM_STAT(nx_lpage_splits, .mode = 0444) }, ++ { "max_mmu_page_hash_collisions", ++ VM_STAT(max_mmu_page_hash_collisions) }, ++ { NULL } ++}; ++ ++/* debugfs entries of Detail For vcpu stat EXtension */ ++struct dfx_kvm_stats_debugfs_item dfx_debugfs_entries[] = { ++ { NULL } ++}; ++ ++u64 __read_mostly host_xcr0; ++ ++static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt); ++ ++static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu) ++{ ++ int i; ++ for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU); i++) ++ vcpu->arch.apf.gfns[i] = ~0; ++} ++ ++static void kvm_on_user_return(struct user_return_notifier *urn) ++{ ++ unsigned slot; ++ struct kvm_shared_msrs *locals ++ = container_of(urn, struct kvm_shared_msrs, urn); ++ struct kvm_shared_msr_values *values; ++ unsigned long flags; ++ ++ /* ++ * Disabling irqs at this point since the following code could be ++ * interrupted and executed through kvm_arch_hardware_disable() ++ */ ++ local_irq_save(flags); ++ if (locals->registered) { ++ locals->registered = false; ++ user_return_notifier_unregister(urn); ++ } ++ local_irq_restore(flags); ++ for (slot = 0; slot < shared_msrs_global.nr; ++slot) { ++ values = &locals->values[slot]; ++ if (values->host != values->curr) { ++ wrmsrl(shared_msrs_global.msrs[slot], values->host); ++ values->curr = values->host; ++ } ++ } ++} ++ ++static void shared_msr_update(unsigned slot, u32 msr) ++{ ++ u64 value; ++ unsigned int cpu = smp_processor_id(); ++ struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu); ++ ++ /* only read, and nobody should modify it at this time, ++ * so don't need lock */ ++ if (slot >= shared_msrs_global.nr) { ++ printk(KERN_ERR "kvm: invalid MSR slot!"); ++ return; ++ } ++ rdmsrl_safe(msr, &value); ++ smsr->values[slot].host = value; ++ smsr->values[slot].curr = value; ++} ++ ++void kvm_define_shared_msr(unsigned slot, u32 msr) ++{ ++ BUG_ON(slot >= KVM_NR_SHARED_MSRS); ++ shared_msrs_global.msrs[slot] = msr; ++ if (slot >= shared_msrs_global.nr) ++ shared_msrs_global.nr = slot + 1; ++} ++EXPORT_SYMBOL_GPL(kvm_define_shared_msr); ++ ++static void kvm_shared_msr_cpu_online(void) ++{ ++ unsigned i; ++ ++ for (i = 0; i < shared_msrs_global.nr; ++i) ++ shared_msr_update(i, shared_msrs_global.msrs[i]); ++} ++ ++int kvm_set_shared_msr(unsigned slot, u64 value, u64 mask) ++{ ++ unsigned int cpu = smp_processor_id(); ++ struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu); ++ int err; ++ ++ value = (value & mask) | (smsr->values[slot].host & ~mask); ++ if (value == smsr->values[slot].curr) ++ return 0; ++ err = wrmsrl_safe(shared_msrs_global.msrs[slot], value); ++ if (err) ++ return 1; ++ ++ smsr->values[slot].curr = value; ++ if (!smsr->registered) { ++ smsr->urn.on_user_return = kvm_on_user_return; ++ user_return_notifier_register(&smsr->urn); ++ smsr->registered = true; ++ } ++ return 0; ++} ++EXPORT_SYMBOL_GPL(kvm_set_shared_msr); ++ ++static void drop_user_return_notifiers(void) ++{ ++ unsigned int cpu = smp_processor_id(); ++ struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu); ++ ++ if (smsr->registered) ++ kvm_on_user_return(&smsr->urn); ++} ++ ++u64 kvm_get_apic_base(struct kvm_vcpu *vcpu) ++{ ++ return vcpu->arch.apic_base; ++} ++EXPORT_SYMBOL_GPL(kvm_get_apic_base); ++ ++enum lapic_mode kvm_get_apic_mode(struct kvm_vcpu *vcpu) ++{ ++ return kvm_apic_mode(kvm_get_apic_base(vcpu)); ++} ++EXPORT_SYMBOL_GPL(kvm_get_apic_mode); ++ ++int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info) ++{ ++ enum lapic_mode old_mode = kvm_get_apic_mode(vcpu); ++ enum lapic_mode new_mode = kvm_apic_mode(msr_info->data); ++ u64 reserved_bits = ((~0ULL) << cpuid_maxphyaddr(vcpu)) | 0x2ff | ++ (guest_cpuid_has(vcpu, X86_FEATURE_X2APIC) ? 0 : X2APIC_ENABLE); ++ ++ if ((msr_info->data & reserved_bits) != 0 || new_mode == LAPIC_MODE_INVALID) ++ return 1; ++ if (!msr_info->host_initiated) { ++ if (old_mode == LAPIC_MODE_X2APIC && new_mode == LAPIC_MODE_XAPIC) ++ return 1; ++ if (old_mode == LAPIC_MODE_DISABLED && new_mode == LAPIC_MODE_X2APIC) ++ return 1; ++ } ++ ++ kvm_lapic_set_base(vcpu, msr_info->data); ++ return 0; ++} ++EXPORT_SYMBOL_GPL(kvm_set_apic_base); ++ ++asmlinkage __visible void kvm_spurious_fault(void) ++{ ++ /* Fault while not rebooting. We want the trace. */ ++ BUG(); ++} ++EXPORT_SYMBOL_GPL(kvm_spurious_fault); ++ ++#define EXCPT_BENIGN 0 ++#define EXCPT_CONTRIBUTORY 1 ++#define EXCPT_PF 2 ++ ++static int exception_class(int vector) ++{ ++ switch (vector) { ++ case PF_VECTOR: ++ return EXCPT_PF; ++ case DE_VECTOR: ++ case TS_VECTOR: ++ case NP_VECTOR: ++ case SS_VECTOR: ++ case GP_VECTOR: ++ return EXCPT_CONTRIBUTORY; ++ default: ++ break; ++ } ++ return EXCPT_BENIGN; ++} ++ ++#define EXCPT_FAULT 0 ++#define EXCPT_TRAP 1 ++#define EXCPT_ABORT 2 ++#define EXCPT_INTERRUPT 3 ++ ++static int exception_type(int vector) ++{ ++ unsigned int mask; ++ ++ if (WARN_ON(vector > 31 || vector == NMI_VECTOR)) ++ return EXCPT_INTERRUPT; ++ ++ mask = 1 << vector; ++ ++ /* #DB is trap, as instruction watchpoints are handled elsewhere */ ++ if (mask & ((1 << DB_VECTOR) | (1 << BP_VECTOR) | (1 << OF_VECTOR))) ++ return EXCPT_TRAP; ++ ++ if (mask & ((1 << DF_VECTOR) | (1 << MC_VECTOR))) ++ return EXCPT_ABORT; ++ ++ /* Reserved exceptions will result in fault */ ++ return EXCPT_FAULT; ++} ++ ++static void kvm_multiple_exception(struct kvm_vcpu *vcpu, ++ unsigned nr, bool has_error, u32 error_code, ++ bool reinject) ++{ ++ u32 prev_nr; ++ int class1, class2; ++ ++ kvm_make_request(KVM_REQ_EVENT, vcpu); ++ ++ if (!vcpu->arch.exception.pending && !vcpu->arch.exception.injected) { ++ queue: ++ if (has_error && !is_protmode(vcpu)) ++ has_error = false; ++ if (reinject) { ++ /* ++ * On vmentry, vcpu->arch.exception.pending is only ++ * true if an event injection was blocked by ++ * nested_run_pending. In that case, however, ++ * vcpu_enter_guest requests an immediate exit, ++ * and the guest shouldn't proceed far enough to ++ * need reinjection. ++ */ ++ WARN_ON_ONCE(vcpu->arch.exception.pending); ++ vcpu->arch.exception.injected = true; ++ } else { ++ vcpu->arch.exception.pending = true; ++ vcpu->arch.exception.injected = false; ++ } ++ vcpu->arch.exception.has_error_code = has_error; ++ vcpu->arch.exception.nr = nr; ++ vcpu->arch.exception.error_code = error_code; ++ return; ++ } ++ ++ /* to check exception */ ++ prev_nr = vcpu->arch.exception.nr; ++ if (prev_nr == DF_VECTOR) { ++ /* triple fault -> shutdown */ ++ kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); ++ return; ++ } ++ class1 = exception_class(prev_nr); ++ class2 = exception_class(nr); ++ if ((class1 == EXCPT_CONTRIBUTORY && class2 == EXCPT_CONTRIBUTORY) ++ || (class1 == EXCPT_PF && class2 != EXCPT_BENIGN)) { ++ /* ++ * Generate double fault per SDM Table 5-5. Set ++ * exception.pending = true so that the double fault ++ * can trigger a nested vmexit. ++ */ ++ vcpu->arch.exception.pending = true; ++ vcpu->arch.exception.injected = false; ++ vcpu->arch.exception.has_error_code = true; ++ vcpu->arch.exception.nr = DF_VECTOR; ++ vcpu->arch.exception.error_code = 0; ++ } else ++ /* replace previous exception with a new one in a hope ++ that instruction re-execution will regenerate lost ++ exception */ ++ goto queue; ++} ++ ++void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr) ++{ ++ kvm_multiple_exception(vcpu, nr, false, 0, false); ++} ++EXPORT_SYMBOL_GPL(kvm_queue_exception); ++ ++void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr) ++{ ++ kvm_multiple_exception(vcpu, nr, false, 0, true); ++} ++EXPORT_SYMBOL_GPL(kvm_requeue_exception); ++ ++int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err) ++{ ++ if (err) ++ kvm_inject_gp(vcpu, 0); ++ else ++ return kvm_skip_emulated_instruction(vcpu); ++ ++ return 1; ++} ++EXPORT_SYMBOL_GPL(kvm_complete_insn_gp); ++ ++void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault) ++{ ++ ++vcpu->stat.pf_guest; ++ vcpu->arch.exception.nested_apf = ++ is_guest_mode(vcpu) && fault->async_page_fault; ++ if (vcpu->arch.exception.nested_apf) ++ vcpu->arch.apf.nested_apf_token = fault->address; ++ else ++ vcpu->arch.cr2 = fault->address; ++ kvm_queue_exception_e(vcpu, PF_VECTOR, fault->error_code); ++} ++EXPORT_SYMBOL_GPL(kvm_inject_page_fault); ++ ++static bool kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault) ++{ ++ if (mmu_is_nested(vcpu) && !fault->nested_page_fault) ++ vcpu->arch.nested_mmu.inject_page_fault(vcpu, fault); ++ else ++ vcpu->arch.mmu.inject_page_fault(vcpu, fault); ++ ++ return fault->nested_page_fault; ++} ++ ++void kvm_inject_nmi(struct kvm_vcpu *vcpu) ++{ ++ atomic_inc(&vcpu->arch.nmi_queued); ++ kvm_make_request(KVM_REQ_NMI, vcpu); ++} ++EXPORT_SYMBOL_GPL(kvm_inject_nmi); ++ ++void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code) ++{ ++ kvm_multiple_exception(vcpu, nr, true, error_code, false); ++} ++EXPORT_SYMBOL_GPL(kvm_queue_exception_e); ++ ++void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code) ++{ ++ kvm_multiple_exception(vcpu, nr, true, error_code, true); ++} ++EXPORT_SYMBOL_GPL(kvm_requeue_exception_e); ++ ++/* ++ * Checks if cpl <= required_cpl; if true, return true. Otherwise queue ++ * a #GP and return false. ++ */ ++bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl) ++{ ++ if (kvm_x86_ops->get_cpl(vcpu) <= required_cpl) ++ return true; ++ kvm_queue_exception_e(vcpu, GP_VECTOR, 0); ++ return false; ++} ++EXPORT_SYMBOL_GPL(kvm_require_cpl); ++ ++bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr) ++{ ++ if ((dr != 4 && dr != 5) || !kvm_read_cr4_bits(vcpu, X86_CR4_DE)) ++ return true; ++ ++ kvm_queue_exception(vcpu, UD_VECTOR); ++ return false; ++} ++EXPORT_SYMBOL_GPL(kvm_require_dr); ++ ++/* ++ * This function will be used to read from the physical memory of the currently ++ * running guest. The difference to kvm_vcpu_read_guest_page is that this function ++ * can read from guest physical or from the guest's guest physical memory. ++ */ ++int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, ++ gfn_t ngfn, void *data, int offset, int len, ++ u32 access) ++{ ++ struct x86_exception exception; ++ gfn_t real_gfn; ++ gpa_t ngpa; ++ ++ ngpa = gfn_to_gpa(ngfn); ++ real_gfn = mmu->translate_gpa(vcpu, ngpa, access, &exception); ++ if (real_gfn == UNMAPPED_GVA) ++ return -EFAULT; ++ ++ real_gfn = gpa_to_gfn(real_gfn); ++ ++ return kvm_vcpu_read_guest_page(vcpu, real_gfn, data, offset, len); ++} ++EXPORT_SYMBOL_GPL(kvm_read_guest_page_mmu); ++ ++static int kvm_read_nested_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, ++ void *data, int offset, int len, u32 access) ++{ ++ return kvm_read_guest_page_mmu(vcpu, vcpu->arch.walk_mmu, gfn, ++ data, offset, len, access); ++} ++ ++static inline u64 pdptr_rsvd_bits(struct kvm_vcpu *vcpu) ++{ ++ return rsvd_bits(cpuid_maxphyaddr(vcpu), 63) | rsvd_bits(5, 8) | ++ rsvd_bits(1, 2); ++} ++ ++/* ++ * Load the pae pdptrs. Return 1 if they are all valid, 0 otherwise. ++ */ ++int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3) ++{ ++ gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT; ++ unsigned offset = ((cr3 & (PAGE_SIZE-1)) >> 5) << 2; ++ int i; ++ int ret; ++ u64 pdpte[ARRAY_SIZE(mmu->pdptrs)]; ++ ++ ret = kvm_read_guest_page_mmu(vcpu, mmu, pdpt_gfn, pdpte, ++ offset * sizeof(u64), sizeof(pdpte), ++ PFERR_USER_MASK|PFERR_WRITE_MASK); ++ if (ret < 0) { ++ ret = 0; ++ goto out; ++ } ++ for (i = 0; i < ARRAY_SIZE(pdpte); ++i) { ++ if ((pdpte[i] & PT_PRESENT_MASK) && ++ (pdpte[i] & pdptr_rsvd_bits(vcpu))) { ++ ret = 0; ++ goto out; ++ } ++ } ++ ret = 1; ++ ++ memcpy(mmu->pdptrs, pdpte, sizeof(mmu->pdptrs)); ++ __set_bit(VCPU_EXREG_PDPTR, ++ (unsigned long *)&vcpu->arch.regs_avail); ++ __set_bit(VCPU_EXREG_PDPTR, ++ (unsigned long *)&vcpu->arch.regs_dirty); ++out: ++ ++ return ret; ++} ++EXPORT_SYMBOL_GPL(load_pdptrs); ++ ++bool pdptrs_changed(struct kvm_vcpu *vcpu) ++{ ++ u64 pdpte[ARRAY_SIZE(vcpu->arch.walk_mmu->pdptrs)]; ++ bool changed = true; ++ int offset; ++ gfn_t gfn; ++ int r; ++ ++ if (!is_pae_paging(vcpu)) ++ return false; ++ ++ if (!test_bit(VCPU_EXREG_PDPTR, ++ (unsigned long *)&vcpu->arch.regs_avail)) ++ return true; ++ ++ gfn = (kvm_read_cr3(vcpu) & 0xffffffe0ul) >> PAGE_SHIFT; ++ offset = (kvm_read_cr3(vcpu) & 0xffffffe0ul) & (PAGE_SIZE - 1); ++ r = kvm_read_nested_guest_page(vcpu, gfn, pdpte, offset, sizeof(pdpte), ++ PFERR_USER_MASK | PFERR_WRITE_MASK); ++ if (r < 0) ++ goto out; ++ changed = memcmp(pdpte, vcpu->arch.walk_mmu->pdptrs, sizeof(pdpte)) != 0; ++out: ++ ++ return changed; ++} ++EXPORT_SYMBOL_GPL(pdptrs_changed); ++ ++int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) ++{ ++ unsigned long old_cr0 = kvm_read_cr0(vcpu); ++ unsigned long update_bits = X86_CR0_PG | X86_CR0_WP; ++ ++ cr0 |= X86_CR0_ET; ++ ++#ifdef CONFIG_X86_64 ++ if (cr0 & 0xffffffff00000000UL) ++ return 1; ++#endif ++ ++ cr0 &= ~CR0_RESERVED_BITS; ++ ++ if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) ++ return 1; ++ ++ if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) ++ return 1; ++ ++ if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { ++#ifdef CONFIG_X86_64 ++ if ((vcpu->arch.efer & EFER_LME)) { ++ int cs_db, cs_l; ++ ++ if (!is_pae(vcpu)) ++ return 1; ++ kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); ++ if (cs_l) ++ return 1; ++ } else ++#endif ++ if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.walk_mmu, ++ kvm_read_cr3(vcpu))) ++ return 1; ++ } ++ ++ if (!(cr0 & X86_CR0_PG) && kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE)) ++ return 1; ++ ++ kvm_x86_ops->set_cr0(vcpu, cr0); ++ ++ if ((cr0 ^ old_cr0) & X86_CR0_PG) { ++ kvm_clear_async_pf_completion_queue(vcpu); ++ kvm_async_pf_hash_reset(vcpu); ++ } ++ ++ if ((cr0 ^ old_cr0) & update_bits) ++ kvm_mmu_reset_context(vcpu); ++ ++ if (((cr0 ^ old_cr0) & X86_CR0_CD) && ++ kvm_arch_has_noncoherent_dma(vcpu->kvm) && ++ !kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED)) ++ kvm_zap_gfn_range(vcpu->kvm, 0, ~0ULL); ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(kvm_set_cr0); ++ ++void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw) ++{ ++ (void)kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0eul) | (msw & 0x0f)); ++} ++EXPORT_SYMBOL_GPL(kvm_lmsw); ++ ++void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu) ++{ ++ if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) && ++ !vcpu->guest_xcr0_loaded) { ++ /* kvm_set_xcr() also depends on this */ ++ if (vcpu->arch.xcr0 != host_xcr0) ++ xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0); ++ vcpu->guest_xcr0_loaded = 1; ++ } ++} ++EXPORT_SYMBOL_GPL(kvm_load_guest_xcr0); ++ ++void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu) ++{ ++ if (vcpu->guest_xcr0_loaded) { ++ if (vcpu->arch.xcr0 != host_xcr0) ++ xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0); ++ vcpu->guest_xcr0_loaded = 0; ++ } ++} ++EXPORT_SYMBOL_GPL(kvm_put_guest_xcr0); ++ ++static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) ++{ ++ u64 xcr0 = xcr; ++ u64 old_xcr0 = vcpu->arch.xcr0; ++ u64 valid_bits; ++ ++ /* Only support XCR_XFEATURE_ENABLED_MASK(xcr0) now */ ++ if (index != XCR_XFEATURE_ENABLED_MASK) ++ return 1; ++ if (!(xcr0 & XFEATURE_MASK_FP)) ++ return 1; ++ if ((xcr0 & XFEATURE_MASK_YMM) && !(xcr0 & XFEATURE_MASK_SSE)) ++ return 1; ++ ++ /* ++ * Do not allow the guest to set bits that we do not support ++ * saving. However, xcr0 bit 0 is always set, even if the ++ * emulated CPU does not support XSAVE (see fx_init). ++ */ ++ valid_bits = vcpu->arch.guest_supported_xcr0 | XFEATURE_MASK_FP; ++ if (xcr0 & ~valid_bits) ++ return 1; ++ ++ if ((!(xcr0 & XFEATURE_MASK_BNDREGS)) != ++ (!(xcr0 & XFEATURE_MASK_BNDCSR))) ++ return 1; ++ ++ if (xcr0 & XFEATURE_MASK_AVX512) { ++ if (!(xcr0 & XFEATURE_MASK_YMM)) ++ return 1; ++ if ((xcr0 & XFEATURE_MASK_AVX512) != XFEATURE_MASK_AVX512) ++ return 1; ++ } ++ vcpu->arch.xcr0 = xcr0; ++ ++ if ((xcr0 ^ old_xcr0) & XFEATURE_MASK_EXTEND) ++ kvm_update_cpuid(vcpu); ++ return 0; ++} ++ ++int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) ++{ ++ if (kvm_x86_ops->get_cpl(vcpu) != 0 || ++ __kvm_set_xcr(vcpu, index, xcr)) { ++ kvm_inject_gp(vcpu, 0); ++ return 1; ++ } ++ return 0; ++} ++EXPORT_SYMBOL_GPL(kvm_set_xcr); ++ ++static int kvm_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) ++{ ++ if (cr4 & CR4_RESERVED_BITS) ++ return -EINVAL; ++ ++ if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && (cr4 & X86_CR4_OSXSAVE)) ++ return -EINVAL; ++ ++ if (!guest_cpuid_has(vcpu, X86_FEATURE_SMEP) && (cr4 & X86_CR4_SMEP)) ++ return -EINVAL; ++ ++ if (!guest_cpuid_has(vcpu, X86_FEATURE_SMAP) && (cr4 & X86_CR4_SMAP)) ++ return -EINVAL; ++ ++ if (!guest_cpuid_has(vcpu, X86_FEATURE_FSGSBASE) && (cr4 & X86_CR4_FSGSBASE)) ++ return -EINVAL; ++ ++ if (!guest_cpuid_has(vcpu, X86_FEATURE_PKU) && (cr4 & X86_CR4_PKE)) ++ return -EINVAL; ++ ++ if (!guest_cpuid_has(vcpu, X86_FEATURE_LA57) && (cr4 & X86_CR4_LA57)) ++ return -EINVAL; ++ ++ if (!guest_cpuid_has(vcpu, X86_FEATURE_UMIP) && (cr4 & X86_CR4_UMIP)) ++ return -EINVAL; ++ ++ return 0; ++} ++ ++int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) ++{ ++ unsigned long old_cr4 = kvm_read_cr4(vcpu); ++ unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE | ++ X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE; ++ ++ if (kvm_valid_cr4(vcpu, cr4)) ++ return 1; ++ ++ if (is_long_mode(vcpu)) { ++ if (!(cr4 & X86_CR4_PAE)) ++ return 1; ++ } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE) ++ && ((cr4 ^ old_cr4) & pdptr_bits) ++ && !load_pdptrs(vcpu, vcpu->arch.walk_mmu, ++ kvm_read_cr3(vcpu))) ++ return 1; ++ ++ if ((cr4 & X86_CR4_PCIDE) && !(old_cr4 & X86_CR4_PCIDE)) { ++ if (!guest_cpuid_has(vcpu, X86_FEATURE_PCID)) ++ return 1; ++ ++ /* PCID can not be enabled when cr3[11:0]!=000H or EFER.LMA=0 */ ++ if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_MASK) || !is_long_mode(vcpu)) ++ return 1; ++ } ++ ++ if (kvm_x86_ops->set_cr4(vcpu, cr4)) ++ return 1; ++ ++ if (((cr4 ^ old_cr4) & pdptr_bits) || ++ (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE))) ++ kvm_mmu_reset_context(vcpu); ++ ++ if ((cr4 ^ old_cr4) & (X86_CR4_OSXSAVE | X86_CR4_PKE)) ++ kvm_update_cpuid(vcpu); ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(kvm_set_cr4); ++ ++int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) ++{ ++ bool skip_tlb_flush = false; ++#ifdef CONFIG_X86_64 ++ bool pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE); ++ ++ if (pcid_enabled) { ++ skip_tlb_flush = cr3 & X86_CR3_PCID_NOFLUSH; ++ cr3 &= ~X86_CR3_PCID_NOFLUSH; ++ } ++#endif ++ ++ if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) { ++ if (!skip_tlb_flush) { ++ kvm_mmu_sync_roots(vcpu); ++ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); ++ } ++ return 0; ++ } ++ ++ if (is_long_mode(vcpu) && ++ (cr3 & rsvd_bits(cpuid_maxphyaddr(vcpu), 63))) ++ return 1; ++ else if (is_pae_paging(vcpu) && ++ !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)) ++ return 1; ++ ++ kvm_mmu_new_cr3(vcpu, cr3, skip_tlb_flush); ++ vcpu->arch.cr3 = cr3; ++ __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(kvm_set_cr3); ++ ++int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) ++{ ++ if (cr8 & CR8_RESERVED_BITS) ++ return 1; ++ if (lapic_in_kernel(vcpu)) ++ kvm_lapic_set_tpr(vcpu, cr8); ++ else ++ vcpu->arch.cr8 = cr8; ++ return 0; ++} ++EXPORT_SYMBOL_GPL(kvm_set_cr8); ++ ++unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu) ++{ ++ if (lapic_in_kernel(vcpu)) ++ return kvm_lapic_get_cr8(vcpu); ++ else ++ return vcpu->arch.cr8; ++} ++EXPORT_SYMBOL_GPL(kvm_get_cr8); ++ ++static void kvm_update_dr0123(struct kvm_vcpu *vcpu) ++{ ++ int i; ++ ++ if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { ++ for (i = 0; i < KVM_NR_DB_REGS; i++) ++ vcpu->arch.eff_db[i] = vcpu->arch.db[i]; ++ vcpu->arch.switch_db_regs |= KVM_DEBUGREG_RELOAD; ++ } ++} ++ ++static void kvm_update_dr6(struct kvm_vcpu *vcpu) ++{ ++ if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) ++ kvm_x86_ops->set_dr6(vcpu, vcpu->arch.dr6); ++} ++ ++static void kvm_update_dr7(struct kvm_vcpu *vcpu) ++{ ++ unsigned long dr7; ++ ++ if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) ++ dr7 = vcpu->arch.guest_debug_dr7; ++ else ++ dr7 = vcpu->arch.dr7; ++ kvm_x86_ops->set_dr7(vcpu, dr7); ++ vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_BP_ENABLED; ++ if (dr7 & DR7_BP_EN_MASK) ++ vcpu->arch.switch_db_regs |= KVM_DEBUGREG_BP_ENABLED; ++} ++ ++static u64 kvm_dr6_fixed(struct kvm_vcpu *vcpu) ++{ ++ u64 fixed = DR6_FIXED_1; ++ ++ if (!guest_cpuid_has(vcpu, X86_FEATURE_RTM)) ++ fixed |= DR6_RTM; ++ return fixed; ++} ++ ++static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) ++{ ++ switch (dr) { ++ case 0 ... 3: ++ vcpu->arch.db[dr] = val; ++ if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) ++ vcpu->arch.eff_db[dr] = val; ++ break; ++ case 4: ++ /* fall through */ ++ case 6: ++ if (val & 0xffffffff00000000ULL) ++ return -1; /* #GP */ ++ vcpu->arch.dr6 = (val & DR6_VOLATILE) | kvm_dr6_fixed(vcpu); ++ kvm_update_dr6(vcpu); ++ break; ++ case 5: ++ /* fall through */ ++ default: /* 7 */ ++ if (val & 0xffffffff00000000ULL) ++ return -1; /* #GP */ ++ vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1; ++ kvm_update_dr7(vcpu); ++ break; ++ } ++ ++ return 0; ++} ++ ++int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) ++{ ++ if (__kvm_set_dr(vcpu, dr, val)) { ++ kvm_inject_gp(vcpu, 0); ++ return 1; ++ } ++ return 0; ++} ++EXPORT_SYMBOL_GPL(kvm_set_dr); ++ ++int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val) ++{ ++ switch (dr) { ++ case 0 ... 3: ++ *val = vcpu->arch.db[dr]; ++ break; ++ case 4: ++ /* fall through */ ++ case 6: ++ if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) ++ *val = vcpu->arch.dr6; ++ else ++ *val = kvm_x86_ops->get_dr6(vcpu); ++ break; ++ case 5: ++ /* fall through */ ++ default: /* 7 */ ++ *val = vcpu->arch.dr7; ++ break; ++ } ++ return 0; ++} ++EXPORT_SYMBOL_GPL(kvm_get_dr); ++ ++bool kvm_rdpmc(struct kvm_vcpu *vcpu) ++{ ++ u32 ecx = kvm_register_read(vcpu, VCPU_REGS_RCX); ++ u64 data; ++ int err; ++ ++ err = kvm_pmu_rdpmc(vcpu, ecx, &data); ++ if (err) ++ return err; ++ kvm_register_write(vcpu, VCPU_REGS_RAX, (u32)data); ++ kvm_register_write(vcpu, VCPU_REGS_RDX, data >> 32); ++ return err; ++} ++EXPORT_SYMBOL_GPL(kvm_rdpmc); ++ ++/* ++ * List of msr numbers which we expose to userspace through KVM_GET_MSRS ++ * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST. ++ * ++ * This list is modified at module load time to reflect the ++ * capabilities of the host cpu. This capabilities test skips MSRs that are ++ * kvm-specific. Those are put in emulated_msrs; filtering of emulated_msrs ++ * may depend on host virtualization features rather than host cpu features. ++ */ ++ ++static u32 msrs_to_save[] = { ++ MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, ++ MSR_STAR, ++#ifdef CONFIG_X86_64 ++ MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, ++#endif ++ MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA, ++ MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX, ++ MSR_IA32_SPEC_CTRL, MSR_IA32_ARCH_CAPABILITIES ++}; ++ ++static unsigned num_msrs_to_save; ++ ++static u32 emulated_msrs[] = { ++ MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, ++ MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, ++ HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, ++ HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC, ++ HV_X64_MSR_TSC_FREQUENCY, HV_X64_MSR_APIC_FREQUENCY, ++ HV_X64_MSR_CRASH_P0, HV_X64_MSR_CRASH_P1, HV_X64_MSR_CRASH_P2, ++ HV_X64_MSR_CRASH_P3, HV_X64_MSR_CRASH_P4, HV_X64_MSR_CRASH_CTL, ++ HV_X64_MSR_RESET, ++ HV_X64_MSR_VP_INDEX, ++ HV_X64_MSR_VP_RUNTIME, ++ HV_X64_MSR_SCONTROL, ++ HV_X64_MSR_STIMER0_CONFIG, ++ HV_X64_MSR_VP_ASSIST_PAGE, ++ HV_X64_MSR_REENLIGHTENMENT_CONTROL, HV_X64_MSR_TSC_EMULATION_CONTROL, ++ HV_X64_MSR_TSC_EMULATION_STATUS, ++ ++ MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, ++ MSR_KVM_PV_EOI_EN, ++ ++ MSR_IA32_TSC_ADJUST, ++ MSR_IA32_TSCDEADLINE, ++ MSR_IA32_MISC_ENABLE, ++ MSR_IA32_MCG_STATUS, ++ MSR_IA32_MCG_CTL, ++ MSR_IA32_MCG_EXT_CTL, ++ MSR_IA32_SMBASE, ++ MSR_SMI_COUNT, ++ MSR_PLATFORM_INFO, ++ MSR_MISC_FEATURES_ENABLES, ++ MSR_AMD64_VIRT_SPEC_CTRL, ++}; ++ ++static unsigned num_emulated_msrs; ++ ++/* ++ * List of msr numbers which are used to expose MSR-based features that ++ * can be used by a hypervisor to validate requested CPU features. ++ */ ++static u32 msr_based_features[] = { ++ MSR_IA32_VMX_BASIC, ++ MSR_IA32_VMX_TRUE_PINBASED_CTLS, ++ MSR_IA32_VMX_PINBASED_CTLS, ++ MSR_IA32_VMX_TRUE_PROCBASED_CTLS, ++ MSR_IA32_VMX_PROCBASED_CTLS, ++ MSR_IA32_VMX_TRUE_EXIT_CTLS, ++ MSR_IA32_VMX_EXIT_CTLS, ++ MSR_IA32_VMX_TRUE_ENTRY_CTLS, ++ MSR_IA32_VMX_ENTRY_CTLS, ++ MSR_IA32_VMX_MISC, ++ MSR_IA32_VMX_CR0_FIXED0, ++ MSR_IA32_VMX_CR0_FIXED1, ++ MSR_IA32_VMX_CR4_FIXED0, ++ MSR_IA32_VMX_CR4_FIXED1, ++ MSR_IA32_VMX_VMCS_ENUM, ++ MSR_IA32_VMX_PROCBASED_CTLS2, ++ MSR_IA32_VMX_EPT_VPID_CAP, ++ MSR_IA32_VMX_VMFUNC, ++ ++ MSR_F10H_DECFG, ++ MSR_IA32_UCODE_REV, ++ MSR_IA32_ARCH_CAPABILITIES, ++}; ++ ++static unsigned int num_msr_based_features; ++ ++u64 kvm_get_arch_capabilities(void) ++{ ++ u64 data; ++ ++ rdmsrl_safe(MSR_IA32_ARCH_CAPABILITIES, &data); ++ ++ /* ++ * If nx_huge_pages is enabled, KVM's shadow paging will ensure that ++ * the nested hypervisor runs with NX huge pages. If it is not, ++ * L1 is anyway vulnerable to ITLB_MULTIHIT explots from other ++ * L1 guests, so it need not worry about its own (L2) guests. ++ */ ++ data |= ARCH_CAP_PSCHANGE_MC_NO; ++ ++ /* ++ * If we're doing cache flushes (either "always" or "cond") ++ * we will do one whenever the guest does a vmlaunch/vmresume. ++ * If an outer hypervisor is doing the cache flush for us ++ * (VMENTER_L1D_FLUSH_NESTED_VM), we can safely pass that ++ * capability to the guest too, and if EPT is disabled we're not ++ * vulnerable. Overall, only VMENTER_L1D_FLUSH_NEVER will ++ * require a nested hypervisor to do a flush of its own. ++ */ ++ if (l1tf_vmx_mitigation != VMENTER_L1D_FLUSH_NEVER) ++ data |= ARCH_CAP_SKIP_VMENTRY_L1DFLUSH; ++ ++ if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN)) ++ data |= ARCH_CAP_RDCL_NO; ++ if (!boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS)) ++ data |= ARCH_CAP_SSB_NO; ++ if (!boot_cpu_has_bug(X86_BUG_MDS)) ++ data |= ARCH_CAP_MDS_NO; ++ ++ /* ++ * On TAA affected systems, export MDS_NO=0 when: ++ * - TSX is enabled on the host, i.e. X86_FEATURE_RTM=1. ++ * - Updated microcode is present. This is detected by ++ * the presence of ARCH_CAP_TSX_CTRL_MSR and ensures ++ * that VERW clears CPU buffers. ++ * ++ * When MDS_NO=0 is exported, guests deploy clear CPU buffer ++ * mitigation and don't complain: ++ * ++ * "Vulnerable: Clear CPU buffers attempted, no microcode" ++ * ++ * If TSX is disabled on the system, guests are also mitigated against ++ * TAA and clear CPU buffer mitigation is not required for guests. ++ */ ++ if (!boot_cpu_has(X86_FEATURE_RTM)) ++ data &= ~ARCH_CAP_TAA_NO; ++ else if (!boot_cpu_has_bug(X86_BUG_TAA)) ++ data |= ARCH_CAP_TAA_NO; ++ else if (data & ARCH_CAP_TSX_CTRL_MSR) ++ data &= ~ARCH_CAP_MDS_NO; ++ ++ /* KVM does not emulate MSR_IA32_TSX_CTRL. */ ++ data &= ~ARCH_CAP_TSX_CTRL_MSR; ++ return data; ++} ++ ++EXPORT_SYMBOL_GPL(kvm_get_arch_capabilities); ++ ++static int kvm_get_msr_feature(struct kvm_msr_entry *msr) ++{ ++ switch (msr->index) { ++ case MSR_IA32_ARCH_CAPABILITIES: ++ msr->data = kvm_get_arch_capabilities(); ++ break; ++ case MSR_IA32_UCODE_REV: ++ rdmsrl_safe(msr->index, &msr->data); ++ break; ++ default: ++ if (kvm_x86_ops->get_msr_feature(msr)) ++ return 1; ++ } ++ return 0; ++} ++ ++static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data) ++{ ++ struct kvm_msr_entry msr; ++ int r; ++ ++ msr.index = index; ++ r = kvm_get_msr_feature(&msr); ++ if (r) ++ return r; ++ ++ *data = msr.data; ++ ++ return 0; ++} ++ ++static bool __kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer) ++{ ++ if (efer & EFER_FFXSR && !guest_cpuid_has(vcpu, X86_FEATURE_FXSR_OPT)) ++ return false; ++ ++ if (efer & EFER_SVME && !guest_cpuid_has(vcpu, X86_FEATURE_SVM)) ++ return false; ++ ++ return true; ++ ++} ++bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer) ++{ ++ if (efer & efer_reserved_bits) ++ return false; ++ ++ return __kvm_valid_efer(vcpu, efer); ++} ++EXPORT_SYMBOL_GPL(kvm_valid_efer); ++ ++static int set_efer(struct kvm_vcpu *vcpu, struct msr_data *msr_info) ++{ ++ u64 old_efer = vcpu->arch.efer; ++ u64 efer = msr_info->data; ++ ++ if (efer & efer_reserved_bits) ++ return 1; ++ ++ if (!msr_info->host_initiated) { ++ if (!__kvm_valid_efer(vcpu, efer)) ++ return 1; ++ ++ if (is_paging(vcpu) && ++ (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) ++ return 1; ++ } ++ ++ efer &= ~EFER_LMA; ++ efer |= vcpu->arch.efer & EFER_LMA; ++ ++ kvm_x86_ops->set_efer(vcpu, efer); ++ ++ /* Update reserved bits */ ++ if ((efer ^ old_efer) & EFER_NX) ++ kvm_mmu_reset_context(vcpu); ++ ++ return 0; ++} ++ ++void kvm_enable_efer_bits(u64 mask) ++{ ++ efer_reserved_bits &= ~mask; ++} ++EXPORT_SYMBOL_GPL(kvm_enable_efer_bits); ++ ++/* ++ * Writes msr value into into the appropriate "register". ++ * Returns 0 on success, non-0 otherwise. ++ * Assumes vcpu_load() was already called. ++ */ ++int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) ++{ ++ switch (msr->index) { ++ case MSR_FS_BASE: ++ case MSR_GS_BASE: ++ case MSR_KERNEL_GS_BASE: ++ case MSR_CSTAR: ++ case MSR_LSTAR: ++ if (is_noncanonical_address(msr->data, vcpu)) ++ return 1; ++ break; ++ case MSR_IA32_SYSENTER_EIP: ++ case MSR_IA32_SYSENTER_ESP: ++ /* ++ * IA32_SYSENTER_ESP and IA32_SYSENTER_EIP cause #GP if ++ * non-canonical address is written on Intel but not on ++ * AMD (which ignores the top 32-bits, because it does ++ * not implement 64-bit SYSENTER). ++ * ++ * 64-bit code should hence be able to write a non-canonical ++ * value on AMD. Making the address canonical ensures that ++ * vmentry does not fail on Intel after writing a non-canonical ++ * value, and that something deterministic happens if the guest ++ * invokes 64-bit SYSENTER. ++ */ ++ msr->data = get_canonical(msr->data, vcpu_virt_addr_bits(vcpu)); ++ } ++ return kvm_x86_ops->set_msr(vcpu, msr); ++} ++EXPORT_SYMBOL_GPL(kvm_set_msr); ++ ++/* ++ * Adapt set_msr() to msr_io()'s calling convention ++ */ ++static int do_get_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data) ++{ ++ struct msr_data msr; ++ int r; ++ ++ msr.index = index; ++ msr.host_initiated = true; ++ r = kvm_get_msr(vcpu, &msr); ++ if (r) ++ return r; ++ ++ *data = msr.data; ++ return 0; ++} ++ ++static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data) ++{ ++ struct msr_data msr; ++ ++ msr.data = *data; ++ msr.index = index; ++ msr.host_initiated = true; ++ return kvm_set_msr(vcpu, &msr); ++} ++ ++#ifdef CONFIG_X86_64 ++struct pvclock_gtod_data { ++ seqcount_t seq; ++ ++ struct { /* extract of a clocksource struct */ ++ int vclock_mode; ++ u64 cycle_last; ++ u64 mask; ++ u32 mult; ++ u32 shift; ++ } clock; ++ ++ u64 boot_ns; ++ u64 nsec_base; ++ u64 wall_time_sec; ++}; ++ ++static struct pvclock_gtod_data pvclock_gtod_data; ++ ++static void update_pvclock_gtod(struct timekeeper *tk) ++{ ++ struct pvclock_gtod_data *vdata = &pvclock_gtod_data; ++ u64 boot_ns; ++ ++ boot_ns = ktime_to_ns(ktime_add(tk->tkr_mono.base, tk->offs_boot)); ++ ++ write_seqcount_begin(&vdata->seq); ++ ++ /* copy pvclock gtod data */ ++ vdata->clock.vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode; ++ vdata->clock.cycle_last = tk->tkr_mono.cycle_last; ++ vdata->clock.mask = tk->tkr_mono.mask; ++ vdata->clock.mult = tk->tkr_mono.mult; ++ vdata->clock.shift = tk->tkr_mono.shift; ++ ++ vdata->boot_ns = boot_ns; ++ vdata->nsec_base = tk->tkr_mono.xtime_nsec; ++ ++ vdata->wall_time_sec = tk->xtime_sec; ++ ++ write_seqcount_end(&vdata->seq); ++} ++#endif ++ ++void kvm_set_pending_timer(struct kvm_vcpu *vcpu) ++{ ++ /* ++ * Note: KVM_REQ_PENDING_TIMER is implicitly checked in ++ * vcpu_enter_guest. This function is only called from ++ * the physical CPU that is running vcpu. ++ */ ++ kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu); ++} ++ ++static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) ++{ ++ int version; ++ int r; ++ struct pvclock_wall_clock wc; ++ struct timespec64 boot; ++ ++ if (!wall_clock) ++ return; ++ ++ r = kvm_read_guest(kvm, wall_clock, &version, sizeof(version)); ++ if (r) ++ return; ++ ++ if (version & 1) ++ ++version; /* first time write, random junk */ ++ ++ ++version; ++ ++ if (kvm_write_guest(kvm, wall_clock, &version, sizeof(version))) ++ return; ++ ++ /* ++ * The guest calculates current wall clock time by adding ++ * system time (updated by kvm_guest_time_update below) to the ++ * wall clock specified here. guest system time equals host ++ * system time for us, thus we must fill in host boot time here. ++ */ ++ getboottime64(&boot); ++ ++ if (kvm->arch.kvmclock_offset) { ++ struct timespec64 ts = ns_to_timespec64(kvm->arch.kvmclock_offset); ++ boot = timespec64_sub(boot, ts); ++ } ++ wc.sec = (u32)boot.tv_sec; /* overflow in 2106 guest time */ ++ wc.nsec = boot.tv_nsec; ++ wc.version = version; ++ ++ kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc)); ++ ++ version++; ++ kvm_write_guest(kvm, wall_clock, &version, sizeof(version)); ++} ++ ++static uint32_t div_frac(uint32_t dividend, uint32_t divisor) ++{ ++ do_shl32_div32(dividend, divisor); ++ return dividend; ++} ++ ++static void kvm_get_time_scale(uint64_t scaled_hz, uint64_t base_hz, ++ s8 *pshift, u32 *pmultiplier) ++{ ++ uint64_t scaled64; ++ int32_t shift = 0; ++ uint64_t tps64; ++ uint32_t tps32; ++ ++ tps64 = base_hz; ++ scaled64 = scaled_hz; ++ while (tps64 > scaled64*2 || tps64 & 0xffffffff00000000ULL) { ++ tps64 >>= 1; ++ shift--; ++ } ++ ++ tps32 = (uint32_t)tps64; ++ while (tps32 <= scaled64 || scaled64 & 0xffffffff00000000ULL) { ++ if (scaled64 & 0xffffffff00000000ULL || tps32 & 0x80000000) ++ scaled64 >>= 1; ++ else ++ tps32 <<= 1; ++ shift++; ++ } ++ ++ *pshift = shift; ++ *pmultiplier = div_frac(scaled64, tps32); ++ ++ pr_debug("%s: base_hz %llu => %llu, shift %d, mul %u\n", ++ __func__, base_hz, scaled_hz, shift, *pmultiplier); ++} ++ ++#ifdef CONFIG_X86_64 ++static atomic_t kvm_guest_has_master_clock = ATOMIC_INIT(0); ++#endif ++ ++static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz); ++static unsigned long max_tsc_khz; ++ ++static u32 adjust_tsc_khz(u32 khz, s32 ppm) ++{ ++ u64 v = (u64)khz * (1000000 + ppm); ++ do_div(v, 1000000); ++ return v; ++} ++ ++static int set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale) ++{ ++ u64 ratio; ++ ++ /* Guest TSC same frequency as host TSC? */ ++ if (!scale) { ++ vcpu->arch.tsc_scaling_ratio = kvm_default_tsc_scaling_ratio; ++ return 0; ++ } ++ ++ /* TSC scaling supported? */ ++ if (!kvm_has_tsc_control) { ++ if (user_tsc_khz > tsc_khz) { ++ vcpu->arch.tsc_catchup = 1; ++ vcpu->arch.tsc_always_catchup = 1; ++ return 0; ++ } else { ++ pr_warn_ratelimited("user requested TSC rate below hardware speed\n"); ++ return -1; ++ } ++ } ++ ++ /* TSC scaling required - calculate ratio */ ++ ratio = mul_u64_u32_div(1ULL << kvm_tsc_scaling_ratio_frac_bits, ++ user_tsc_khz, tsc_khz); ++ ++ if (ratio == 0 || ratio >= kvm_max_tsc_scaling_ratio) { ++ pr_warn_ratelimited("Invalid TSC scaling ratio - virtual-tsc-khz=%u\n", ++ user_tsc_khz); ++ return -1; ++ } ++ ++ vcpu->arch.tsc_scaling_ratio = ratio; ++ return 0; ++} ++ ++static int kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz) ++{ ++ u32 thresh_lo, thresh_hi; ++ int use_scaling = 0; ++ ++ /* tsc_khz can be zero if TSC calibration fails */ ++ if (user_tsc_khz == 0) { ++ /* set tsc_scaling_ratio to a safe value */ ++ vcpu->arch.tsc_scaling_ratio = kvm_default_tsc_scaling_ratio; ++ return -1; ++ } ++ ++ /* Compute a scale to convert nanoseconds in TSC cycles */ ++ kvm_get_time_scale(user_tsc_khz * 1000LL, NSEC_PER_SEC, ++ &vcpu->arch.virtual_tsc_shift, ++ &vcpu->arch.virtual_tsc_mult); ++ vcpu->arch.virtual_tsc_khz = user_tsc_khz; ++ ++ /* ++ * Compute the variation in TSC rate which is acceptable ++ * within the range of tolerance and decide if the ++ * rate being applied is within that bounds of the hardware ++ * rate. If so, no scaling or compensation need be done. ++ */ ++ thresh_lo = adjust_tsc_khz(tsc_khz, -tsc_tolerance_ppm); ++ thresh_hi = adjust_tsc_khz(tsc_khz, tsc_tolerance_ppm); ++ if (user_tsc_khz < thresh_lo || user_tsc_khz > thresh_hi) { ++ pr_debug("kvm: requested TSC rate %u falls outside tolerance [%u,%u]\n", user_tsc_khz, thresh_lo, thresh_hi); ++ use_scaling = 1; ++ } ++ return set_tsc_khz(vcpu, user_tsc_khz, use_scaling); ++} ++ ++static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns) ++{ ++ u64 tsc = pvclock_scale_delta(kernel_ns-vcpu->arch.this_tsc_nsec, ++ vcpu->arch.virtual_tsc_mult, ++ vcpu->arch.virtual_tsc_shift); ++ tsc += vcpu->arch.this_tsc_write; ++ return tsc; ++} ++ ++static inline int gtod_is_based_on_tsc(int mode) ++{ ++ return mode == VCLOCK_TSC || mode == VCLOCK_HVCLOCK; ++} ++ ++static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu) ++{ ++#ifdef CONFIG_X86_64 ++ bool vcpus_matched; ++ struct kvm_arch *ka = &vcpu->kvm->arch; ++ struct pvclock_gtod_data *gtod = &pvclock_gtod_data; ++ ++ vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 == ++ atomic_read(&vcpu->kvm->online_vcpus)); ++ ++ /* ++ * Once the masterclock is enabled, always perform request in ++ * order to update it. ++ * ++ * In order to enable masterclock, the host clocksource must be TSC ++ * and the vcpus need to have matched TSCs. When that happens, ++ * perform request to enable masterclock. ++ */ ++ if (ka->use_master_clock || ++ (gtod_is_based_on_tsc(gtod->clock.vclock_mode) && vcpus_matched)) ++ kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu); ++ ++ trace_kvm_track_tsc(vcpu->vcpu_id, ka->nr_vcpus_matched_tsc, ++ atomic_read(&vcpu->kvm->online_vcpus), ++ ka->use_master_clock, gtod->clock.vclock_mode); ++#endif ++} ++ ++static void update_ia32_tsc_adjust_msr(struct kvm_vcpu *vcpu, s64 offset) ++{ ++ u64 curr_offset = kvm_x86_ops->read_l1_tsc_offset(vcpu); ++ vcpu->arch.ia32_tsc_adjust_msr += offset - curr_offset; ++} ++ ++/* ++ * Multiply tsc by a fixed point number represented by ratio. ++ * ++ * The most significant 64-N bits (mult) of ratio represent the ++ * integral part of the fixed point number; the remaining N bits ++ * (frac) represent the fractional part, ie. ratio represents a fixed ++ * point number (mult + frac * 2^(-N)). ++ * ++ * N equals to kvm_tsc_scaling_ratio_frac_bits. ++ */ ++static inline u64 __scale_tsc(u64 ratio, u64 tsc) ++{ ++ return mul_u64_u64_shr(tsc, ratio, kvm_tsc_scaling_ratio_frac_bits); ++} ++ ++u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc) ++{ ++ u64 _tsc = tsc; ++ u64 ratio = vcpu->arch.tsc_scaling_ratio; ++ ++ if (ratio != kvm_default_tsc_scaling_ratio) ++ _tsc = __scale_tsc(ratio, tsc); ++ ++ return _tsc; ++} ++EXPORT_SYMBOL_GPL(kvm_scale_tsc); ++ ++static u64 kvm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) ++{ ++ u64 tsc; ++ ++ tsc = kvm_scale_tsc(vcpu, rdtsc()); ++ ++ return target_tsc - tsc; ++} ++ ++u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc) ++{ ++ u64 tsc_offset = kvm_x86_ops->read_l1_tsc_offset(vcpu); ++ ++ return tsc_offset + kvm_scale_tsc(vcpu, host_tsc); ++} ++EXPORT_SYMBOL_GPL(kvm_read_l1_tsc); ++ ++static void kvm_vcpu_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) ++{ ++ vcpu->arch.tsc_offset = kvm_x86_ops->write_l1_tsc_offset(vcpu, offset); ++} ++ ++static inline bool kvm_check_tsc_unstable(void) ++{ ++#ifdef CONFIG_X86_64 ++ /* ++ * TSC is marked unstable when we're running on Hyper-V, ++ * 'TSC page' clocksource is good. ++ */ ++ if (pvclock_gtod_data.clock.vclock_mode == VCLOCK_HVCLOCK) ++ return false; ++#endif ++ return check_tsc_unstable(); ++} ++ ++void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) ++{ ++ struct kvm *kvm = vcpu->kvm; ++ u64 offset, ns, elapsed; ++ unsigned long flags; ++ bool matched; ++ bool already_matched; ++ u64 data = msr->data; ++ bool synchronizing = false; ++ ++ raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags); ++ offset = kvm_compute_tsc_offset(vcpu, data); ++ ns = ktime_get_boot_ns(); ++ elapsed = ns - kvm->arch.last_tsc_nsec; ++ ++ if (vcpu->arch.virtual_tsc_khz) { ++ if (data == 0 && msr->host_initiated) { ++ /* ++ * detection of vcpu initialization -- need to sync ++ * with other vCPUs. This particularly helps to keep ++ * kvm_clock stable after CPU hotplug ++ */ ++ synchronizing = true; ++ } else { ++ u64 tsc_exp = kvm->arch.last_tsc_write + ++ nsec_to_cycles(vcpu, elapsed); ++ u64 tsc_hz = vcpu->arch.virtual_tsc_khz * 1000LL; ++ /* ++ * Special case: TSC write with a small delta (1 second) ++ * of virtual cycle time against real time is ++ * interpreted as an attempt to synchronize the CPU. ++ */ ++ synchronizing = data < tsc_exp + tsc_hz && ++ data + tsc_hz > tsc_exp; ++ } ++ } ++ ++ /* ++ * For a reliable TSC, we can match TSC offsets, and for an unstable ++ * TSC, we add elapsed time in this computation. We could let the ++ * compensation code attempt to catch up if we fall behind, but ++ * it's better to try to match offsets from the beginning. ++ */ ++ if (synchronizing && ++ vcpu->arch.virtual_tsc_khz == kvm->arch.last_tsc_khz) { ++ if (!kvm_check_tsc_unstable()) { ++ offset = kvm->arch.cur_tsc_offset; ++ pr_debug("kvm: matched tsc offset for %llu\n", data); ++ } else { ++ u64 delta = nsec_to_cycles(vcpu, elapsed); ++ data += delta; ++ offset = kvm_compute_tsc_offset(vcpu, data); ++ pr_debug("kvm: adjusted tsc offset by %llu\n", delta); ++ } ++ matched = true; ++ already_matched = (vcpu->arch.this_tsc_generation == kvm->arch.cur_tsc_generation); ++ } else { ++ /* ++ * We split periods of matched TSC writes into generations. ++ * For each generation, we track the original measured ++ * nanosecond time, offset, and write, so if TSCs are in ++ * sync, we can match exact offset, and if not, we can match ++ * exact software computation in compute_guest_tsc() ++ * ++ * These values are tracked in kvm->arch.cur_xxx variables. ++ */ ++ kvm->arch.cur_tsc_generation++; ++ kvm->arch.cur_tsc_nsec = ns; ++ kvm->arch.cur_tsc_write = data; ++ kvm->arch.cur_tsc_offset = offset; ++ matched = false; ++ pr_debug("kvm: new tsc generation %llu, clock %llu\n", ++ kvm->arch.cur_tsc_generation, data); ++ } ++ ++ /* ++ * We also track th most recent recorded KHZ, write and time to ++ * allow the matching interval to be extended at each write. ++ */ ++ kvm->arch.last_tsc_nsec = ns; ++ kvm->arch.last_tsc_write = data; ++ kvm->arch.last_tsc_khz = vcpu->arch.virtual_tsc_khz; ++ ++ vcpu->arch.last_guest_tsc = data; ++ ++ /* Keep track of which generation this VCPU has synchronized to */ ++ vcpu->arch.this_tsc_generation = kvm->arch.cur_tsc_generation; ++ vcpu->arch.this_tsc_nsec = kvm->arch.cur_tsc_nsec; ++ vcpu->arch.this_tsc_write = kvm->arch.cur_tsc_write; ++ ++ if (!msr->host_initiated && guest_cpuid_has(vcpu, X86_FEATURE_TSC_ADJUST)) ++ update_ia32_tsc_adjust_msr(vcpu, offset); ++ ++ kvm_vcpu_write_tsc_offset(vcpu, offset); ++ raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags); ++ ++ spin_lock(&kvm->arch.pvclock_gtod_sync_lock); ++ if (!matched) { ++ kvm->arch.nr_vcpus_matched_tsc = 0; ++ } else if (!already_matched) { ++ kvm->arch.nr_vcpus_matched_tsc++; ++ } ++ ++ kvm_track_tsc_matching(vcpu); ++ spin_unlock(&kvm->arch.pvclock_gtod_sync_lock); ++} ++ ++EXPORT_SYMBOL_GPL(kvm_write_tsc); ++ ++static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, ++ s64 adjustment) ++{ ++ u64 tsc_offset = kvm_x86_ops->read_l1_tsc_offset(vcpu); ++ kvm_vcpu_write_tsc_offset(vcpu, tsc_offset + adjustment); ++} ++ ++static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment) ++{ ++ if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio) ++ WARN_ON(adjustment < 0); ++ adjustment = kvm_scale_tsc(vcpu, (u64) adjustment); ++ adjust_tsc_offset_guest(vcpu, adjustment); ++} ++ ++#ifdef CONFIG_X86_64 ++ ++static u64 read_tsc(void) ++{ ++ u64 ret = (u64)rdtsc_ordered(); ++ u64 last = pvclock_gtod_data.clock.cycle_last; ++ ++ if (likely(ret >= last)) ++ return ret; ++ ++ /* ++ * GCC likes to generate cmov here, but this branch is extremely ++ * predictable (it's just a function of time and the likely is ++ * very likely) and there's a data dependence, so force GCC ++ * to generate a branch instead. I don't barrier() because ++ * we don't actually need a barrier, and if this function ++ * ever gets inlined it will generate worse code. ++ */ ++ asm volatile (""); ++ return last; ++} ++ ++static inline u64 vgettsc(u64 *tsc_timestamp, int *mode) ++{ ++ long v; ++ struct pvclock_gtod_data *gtod = &pvclock_gtod_data; ++ u64 tsc_pg_val; ++ ++ switch (gtod->clock.vclock_mode) { ++ case VCLOCK_HVCLOCK: ++ tsc_pg_val = hv_read_tsc_page_tsc(hv_get_tsc_page(), ++ tsc_timestamp); ++ if (tsc_pg_val != U64_MAX) { ++ /* TSC page valid */ ++ *mode = VCLOCK_HVCLOCK; ++ v = (tsc_pg_val - gtod->clock.cycle_last) & ++ gtod->clock.mask; ++ } else { ++ /* TSC page invalid */ ++ *mode = VCLOCK_NONE; ++ } ++ break; ++ case VCLOCK_TSC: ++ *mode = VCLOCK_TSC; ++ *tsc_timestamp = read_tsc(); ++ v = (*tsc_timestamp - gtod->clock.cycle_last) & ++ gtod->clock.mask; ++ break; ++ default: ++ *mode = VCLOCK_NONE; ++ } ++ ++ if (*mode == VCLOCK_NONE) ++ *tsc_timestamp = v = 0; ++ ++ return v * gtod->clock.mult; ++} ++ ++static int do_monotonic_boot(s64 *t, u64 *tsc_timestamp) ++{ ++ struct pvclock_gtod_data *gtod = &pvclock_gtod_data; ++ unsigned long seq; ++ int mode; ++ u64 ns; ++ ++ do { ++ seq = read_seqcount_begin(>od->seq); ++ ns = gtod->nsec_base; ++ ns += vgettsc(tsc_timestamp, &mode); ++ ns >>= gtod->clock.shift; ++ ns += gtod->boot_ns; ++ } while (unlikely(read_seqcount_retry(>od->seq, seq))); ++ *t = ns; ++ ++ return mode; ++} ++ ++static int do_realtime(struct timespec64 *ts, u64 *tsc_timestamp) ++{ ++ struct pvclock_gtod_data *gtod = &pvclock_gtod_data; ++ unsigned long seq; ++ int mode; ++ u64 ns; ++ ++ do { ++ seq = read_seqcount_begin(>od->seq); ++ ts->tv_sec = gtod->wall_time_sec; ++ ns = gtod->nsec_base; ++ ns += vgettsc(tsc_timestamp, &mode); ++ ns >>= gtod->clock.shift; ++ } while (unlikely(read_seqcount_retry(>od->seq, seq))); ++ ++ ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); ++ ts->tv_nsec = ns; ++ ++ return mode; ++} ++ ++/* returns true if host is using TSC based clocksource */ ++static bool kvm_get_time_and_clockread(s64 *kernel_ns, u64 *tsc_timestamp) ++{ ++ /* checked again under seqlock below */ ++ if (!gtod_is_based_on_tsc(pvclock_gtod_data.clock.vclock_mode)) ++ return false; ++ ++ return gtod_is_based_on_tsc(do_monotonic_boot(kernel_ns, ++ tsc_timestamp)); ++} ++ ++/* returns true if host is using TSC based clocksource */ ++static bool kvm_get_walltime_and_clockread(struct timespec64 *ts, ++ u64 *tsc_timestamp) ++{ ++ /* checked again under seqlock below */ ++ if (!gtod_is_based_on_tsc(pvclock_gtod_data.clock.vclock_mode)) ++ return false; ++ ++ return gtod_is_based_on_tsc(do_realtime(ts, tsc_timestamp)); ++} ++#endif ++ ++/* ++ * ++ * Assuming a stable TSC across physical CPUS, and a stable TSC ++ * across virtual CPUs, the following condition is possible. ++ * Each numbered line represents an event visible to both ++ * CPUs at the next numbered event. ++ * ++ * "timespecX" represents host monotonic time. "tscX" represents ++ * RDTSC value. ++ * ++ * VCPU0 on CPU0 | VCPU1 on CPU1 ++ * ++ * 1. read timespec0,tsc0 ++ * 2. | timespec1 = timespec0 + N ++ * | tsc1 = tsc0 + M ++ * 3. transition to guest | transition to guest ++ * 4. ret0 = timespec0 + (rdtsc - tsc0) | ++ * 5. | ret1 = timespec1 + (rdtsc - tsc1) ++ * | ret1 = timespec0 + N + (rdtsc - (tsc0 + M)) ++ * ++ * Since ret0 update is visible to VCPU1 at time 5, to obey monotonicity: ++ * ++ * - ret0 < ret1 ++ * - timespec0 + (rdtsc - tsc0) < timespec0 + N + (rdtsc - (tsc0 + M)) ++ * ... ++ * - 0 < N - M => M < N ++ * ++ * That is, when timespec0 != timespec1, M < N. Unfortunately that is not ++ * always the case (the difference between two distinct xtime instances ++ * might be smaller then the difference between corresponding TSC reads, ++ * when updating guest vcpus pvclock areas). ++ * ++ * To avoid that problem, do not allow visibility of distinct ++ * system_timestamp/tsc_timestamp values simultaneously: use a master ++ * copy of host monotonic time values. Update that master copy ++ * in lockstep. ++ * ++ * Rely on synchronization of host TSCs and guest TSCs for monotonicity. ++ * ++ */ ++ ++static void pvclock_update_vm_gtod_copy(struct kvm *kvm) ++{ ++#ifdef CONFIG_X86_64 ++ struct kvm_arch *ka = &kvm->arch; ++ int vclock_mode; ++ bool host_tsc_clocksource, vcpus_matched; ++ ++ vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 == ++ atomic_read(&kvm->online_vcpus)); ++ ++ /* ++ * If the host uses TSC clock, then passthrough TSC as stable ++ * to the guest. ++ */ ++ host_tsc_clocksource = kvm_get_time_and_clockread( ++ &ka->master_kernel_ns, ++ &ka->master_cycle_now); ++ ++ ka->use_master_clock = host_tsc_clocksource && vcpus_matched ++ && !ka->backwards_tsc_observed ++ && !ka->boot_vcpu_runs_old_kvmclock; ++ ++ if (ka->use_master_clock) ++ atomic_set(&kvm_guest_has_master_clock, 1); ++ ++ vclock_mode = pvclock_gtod_data.clock.vclock_mode; ++ trace_kvm_update_master_clock(ka->use_master_clock, vclock_mode, ++ vcpus_matched); ++#endif ++} ++ ++void kvm_make_mclock_inprogress_request(struct kvm *kvm) ++{ ++ kvm_make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS); ++} ++ ++static void kvm_gen_update_masterclock(struct kvm *kvm) ++{ ++#ifdef CONFIG_X86_64 ++ int i; ++ struct kvm_vcpu *vcpu; ++ struct kvm_arch *ka = &kvm->arch; ++ ++ spin_lock(&ka->pvclock_gtod_sync_lock); ++ kvm_make_mclock_inprogress_request(kvm); ++ /* no guest entries from this point */ ++ pvclock_update_vm_gtod_copy(kvm); ++ ++ kvm_for_each_vcpu(i, vcpu, kvm) ++ kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); ++ ++ /* guest entries allowed */ ++ kvm_for_each_vcpu(i, vcpu, kvm) ++ kvm_clear_request(KVM_REQ_MCLOCK_INPROGRESS, vcpu); ++ ++ spin_unlock(&ka->pvclock_gtod_sync_lock); ++#endif ++} ++ ++u64 get_kvmclock_ns(struct kvm *kvm) ++{ ++ struct kvm_arch *ka = &kvm->arch; ++ struct pvclock_vcpu_time_info hv_clock; ++ u64 ret; ++ ++ spin_lock(&ka->pvclock_gtod_sync_lock); ++ if (!ka->use_master_clock) { ++ spin_unlock(&ka->pvclock_gtod_sync_lock); ++ return ktime_get_boot_ns() + ka->kvmclock_offset; ++ } ++ ++ hv_clock.tsc_timestamp = ka->master_cycle_now; ++ hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset; ++ spin_unlock(&ka->pvclock_gtod_sync_lock); ++ ++ /* both __this_cpu_read() and rdtsc() should be on the same cpu */ ++ get_cpu(); ++ ++ if (__this_cpu_read(cpu_tsc_khz)) { ++ kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL, ++ &hv_clock.tsc_shift, ++ &hv_clock.tsc_to_system_mul); ++ ret = __pvclock_read_cycles(&hv_clock, rdtsc()); ++ } else ++ ret = ktime_get_boot_ns() + ka->kvmclock_offset; ++ ++ put_cpu(); ++ ++ return ret; ++} ++ ++static void kvm_setup_pvclock_page(struct kvm_vcpu *v) ++{ ++ struct kvm_vcpu_arch *vcpu = &v->arch; ++ struct pvclock_vcpu_time_info guest_hv_clock; ++ ++ if (unlikely(kvm_read_guest_cached(v->kvm, &vcpu->pv_time, ++ &guest_hv_clock, sizeof(guest_hv_clock)))) ++ return; ++ ++ /* This VCPU is paused, but it's legal for a guest to read another ++ * VCPU's kvmclock, so we really have to follow the specification where ++ * it says that version is odd if data is being modified, and even after ++ * it is consistent. ++ * ++ * Version field updates must be kept separate. This is because ++ * kvm_write_guest_cached might use a "rep movs" instruction, and ++ * writes within a string instruction are weakly ordered. So there ++ * are three writes overall. ++ * ++ * As a small optimization, only write the version field in the first ++ * and third write. The vcpu->pv_time cache is still valid, because the ++ * version field is the first in the struct. ++ */ ++ BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0); ++ ++ if (guest_hv_clock.version & 1) ++ ++guest_hv_clock.version; /* first time write, random junk */ ++ ++ vcpu->hv_clock.version = guest_hv_clock.version + 1; ++ kvm_write_guest_cached(v->kvm, &vcpu->pv_time, ++ &vcpu->hv_clock, ++ sizeof(vcpu->hv_clock.version)); ++ ++ smp_wmb(); ++ ++ /* retain PVCLOCK_GUEST_STOPPED if set in guest copy */ ++ vcpu->hv_clock.flags |= (guest_hv_clock.flags & PVCLOCK_GUEST_STOPPED); ++ ++ if (vcpu->pvclock_set_guest_stopped_request) { ++ vcpu->hv_clock.flags |= PVCLOCK_GUEST_STOPPED; ++ vcpu->pvclock_set_guest_stopped_request = false; ++ } ++ ++ trace_kvm_pvclock_update(v->vcpu_id, &vcpu->hv_clock); ++ ++ kvm_write_guest_cached(v->kvm, &vcpu->pv_time, ++ &vcpu->hv_clock, ++ sizeof(vcpu->hv_clock)); ++ ++ smp_wmb(); ++ ++ vcpu->hv_clock.version++; ++ kvm_write_guest_cached(v->kvm, &vcpu->pv_time, ++ &vcpu->hv_clock, ++ sizeof(vcpu->hv_clock.version)); ++} ++ ++static int kvm_guest_time_update(struct kvm_vcpu *v) ++{ ++ unsigned long flags, tgt_tsc_khz; ++ struct kvm_vcpu_arch *vcpu = &v->arch; ++ struct kvm_arch *ka = &v->kvm->arch; ++ s64 kernel_ns; ++ u64 tsc_timestamp, host_tsc; ++ u8 pvclock_flags; ++ bool use_master_clock; ++ ++ kernel_ns = 0; ++ host_tsc = 0; ++ ++ /* ++ * If the host uses TSC clock, then passthrough TSC as stable ++ * to the guest. ++ */ ++ spin_lock(&ka->pvclock_gtod_sync_lock); ++ use_master_clock = ka->use_master_clock; ++ if (use_master_clock) { ++ host_tsc = ka->master_cycle_now; ++ kernel_ns = ka->master_kernel_ns; ++ } ++ spin_unlock(&ka->pvclock_gtod_sync_lock); ++ ++ /* Keep irq disabled to prevent changes to the clock */ ++ local_irq_save(flags); ++ tgt_tsc_khz = __this_cpu_read(cpu_tsc_khz); ++ if (unlikely(tgt_tsc_khz == 0)) { ++ local_irq_restore(flags); ++ kvm_make_request(KVM_REQ_CLOCK_UPDATE, v); ++ return 1; ++ } ++ if (!use_master_clock) { ++ host_tsc = rdtsc(); ++ kernel_ns = ktime_get_boot_ns(); ++ } ++ ++ tsc_timestamp = kvm_read_l1_tsc(v, host_tsc); ++ ++ /* ++ * We may have to catch up the TSC to match elapsed wall clock ++ * time for two reasons, even if kvmclock is used. ++ * 1) CPU could have been running below the maximum TSC rate ++ * 2) Broken TSC compensation resets the base at each VCPU ++ * entry to avoid unknown leaps of TSC even when running ++ * again on the same CPU. This may cause apparent elapsed ++ * time to disappear, and the guest to stand still or run ++ * very slowly. ++ */ ++ if (vcpu->tsc_catchup) { ++ u64 tsc = compute_guest_tsc(v, kernel_ns); ++ if (tsc > tsc_timestamp) { ++ adjust_tsc_offset_guest(v, tsc - tsc_timestamp); ++ tsc_timestamp = tsc; ++ } ++ } ++ ++ local_irq_restore(flags); ++ ++ /* With all the info we got, fill in the values */ ++ ++ if (kvm_has_tsc_control) ++ tgt_tsc_khz = kvm_scale_tsc(v, tgt_tsc_khz); ++ ++ if (unlikely(vcpu->hw_tsc_khz != tgt_tsc_khz)) { ++ kvm_get_time_scale(NSEC_PER_SEC, tgt_tsc_khz * 1000LL, ++ &vcpu->hv_clock.tsc_shift, ++ &vcpu->hv_clock.tsc_to_system_mul); ++ vcpu->hw_tsc_khz = tgt_tsc_khz; ++ } ++ ++ vcpu->hv_clock.tsc_timestamp = tsc_timestamp; ++ vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset; ++ vcpu->last_guest_tsc = tsc_timestamp; ++ ++ /* If the host uses TSC clocksource, then it is stable */ ++ pvclock_flags = 0; ++ if (use_master_clock) ++ pvclock_flags |= PVCLOCK_TSC_STABLE_BIT; ++ ++ vcpu->hv_clock.flags = pvclock_flags; ++ ++ if (vcpu->pv_time_enabled) ++ kvm_setup_pvclock_page(v); ++ if (v == kvm_get_vcpu(v->kvm, 0)) ++ kvm_hv_setup_tsc_page(v->kvm, &vcpu->hv_clock); ++ return 0; ++} ++ ++/* ++ * kvmclock updates which are isolated to a given vcpu, such as ++ * vcpu->cpu migration, should not allow system_timestamp from ++ * the rest of the vcpus to remain static. Otherwise ntp frequency ++ * correction applies to one vcpu's system_timestamp but not ++ * the others. ++ * ++ * So in those cases, request a kvmclock update for all vcpus. ++ * We need to rate-limit these requests though, as they can ++ * considerably slow guests that have a large number of vcpus. ++ * The time for a remote vcpu to update its kvmclock is bound ++ * by the delay we use to rate-limit the updates. ++ */ ++ ++#define KVMCLOCK_UPDATE_DELAY msecs_to_jiffies(100) ++ ++static void kvmclock_update_fn(struct work_struct *work) ++{ ++ int i; ++ struct delayed_work *dwork = to_delayed_work(work); ++ struct kvm_arch *ka = container_of(dwork, struct kvm_arch, ++ kvmclock_update_work); ++ struct kvm *kvm = container_of(ka, struct kvm, arch); ++ struct kvm_vcpu *vcpu; ++ ++ kvm_for_each_vcpu(i, vcpu, kvm) { ++ kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); ++ kvm_vcpu_kick(vcpu); ++ } ++} ++ ++static void kvm_gen_kvmclock_update(struct kvm_vcpu *v) ++{ ++ struct kvm *kvm = v->kvm; ++ ++ kvm_make_request(KVM_REQ_CLOCK_UPDATE, v); ++ schedule_delayed_work(&kvm->arch.kvmclock_update_work, ++ KVMCLOCK_UPDATE_DELAY); ++} ++ ++#define KVMCLOCK_SYNC_PERIOD (300 * HZ) ++ ++static void kvmclock_sync_fn(struct work_struct *work) ++{ ++ struct delayed_work *dwork = to_delayed_work(work); ++ struct kvm_arch *ka = container_of(dwork, struct kvm_arch, ++ kvmclock_sync_work); ++ struct kvm *kvm = container_of(ka, struct kvm, arch); ++ ++ if (!kvmclock_periodic_sync) ++ return; ++ ++ schedule_delayed_work(&kvm->arch.kvmclock_update_work, 0); ++ schedule_delayed_work(&kvm->arch.kvmclock_sync_work, ++ KVMCLOCK_SYNC_PERIOD); ++} ++ ++static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info) ++{ ++ u64 mcg_cap = vcpu->arch.mcg_cap; ++ unsigned bank_num = mcg_cap & 0xff; ++ u32 msr = msr_info->index; ++ u64 data = msr_info->data; ++ ++ switch (msr) { ++ case MSR_IA32_MCG_STATUS: ++ vcpu->arch.mcg_status = data; ++ break; ++ case MSR_IA32_MCG_CTL: ++ if (!(mcg_cap & MCG_CTL_P) && ++ (data || !msr_info->host_initiated)) ++ return 1; ++ if (data != 0 && data != ~(u64)0) ++ return 1; ++ vcpu->arch.mcg_ctl = data; ++ break; ++ default: ++ if (msr >= MSR_IA32_MC0_CTL && ++ msr < MSR_IA32_MCx_CTL(bank_num)) { ++ u32 offset = msr - MSR_IA32_MC0_CTL; ++ /* only 0 or all 1s can be written to IA32_MCi_CTL ++ * some Linux kernels though clear bit 10 in bank 4 to ++ * workaround a BIOS/GART TBL issue on AMD K8s, ignore ++ * this to avoid an uncatched #GP in the guest ++ */ ++ if ((offset & 0x3) == 0 && ++ data != 0 && (data | (1 << 10)) != ~(u64)0) ++ return -1; ++ if (!msr_info->host_initiated && ++ (offset & 0x3) == 1 && data != 0) ++ return -1; ++ vcpu->arch.mce_banks[offset] = data; ++ break; ++ } ++ return 1; ++ } ++ return 0; ++} ++ ++static int xen_hvm_config(struct kvm_vcpu *vcpu, u64 data) ++{ ++ struct kvm *kvm = vcpu->kvm; ++ int lm = is_long_mode(vcpu); ++ u8 *blob_addr = lm ? (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_64 ++ : (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_32; ++ u8 blob_size = lm ? kvm->arch.xen_hvm_config.blob_size_64 ++ : kvm->arch.xen_hvm_config.blob_size_32; ++ u32 page_num = data & ~PAGE_MASK; ++ u64 page_addr = data & PAGE_MASK; ++ u8 *page; ++ int r; ++ ++ r = -E2BIG; ++ if (page_num >= blob_size) ++ goto out; ++ r = -ENOMEM; ++ page = memdup_user(blob_addr + (page_num * PAGE_SIZE), PAGE_SIZE); ++ if (IS_ERR(page)) { ++ r = PTR_ERR(page); ++ goto out; ++ } ++ if (kvm_vcpu_write_guest(vcpu, page_addr, page, PAGE_SIZE)) ++ goto out_free; ++ r = 0; ++out_free: ++ kfree(page); ++out: ++ return r; ++} ++ ++static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data) ++{ ++ gpa_t gpa = data & ~0x3f; ++ ++ /* Bits 3:5 are reserved, Should be zero */ ++ if (data & 0x38) ++ return 1; ++ ++ vcpu->arch.apf.msr_val = data; ++ ++ if (!(data & KVM_ASYNC_PF_ENABLED)) { ++ kvm_clear_async_pf_completion_queue(vcpu); ++ kvm_async_pf_hash_reset(vcpu); ++ return 0; ++ } ++ ++ if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apf.data, gpa, ++ sizeof(u32))) ++ return 1; ++ ++ vcpu->arch.apf.send_user_only = !(data & KVM_ASYNC_PF_SEND_ALWAYS); ++ vcpu->arch.apf.delivery_as_pf_vmexit = data & KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT; ++ kvm_async_pf_wakeup_all(vcpu); ++ return 0; ++} ++ ++static void kvmclock_reset(struct kvm_vcpu *vcpu) ++{ ++ vcpu->arch.pv_time_enabled = false; ++} ++ ++static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa) ++{ ++ ++vcpu->stat.tlb_flush; ++ kvm_x86_ops->tlb_flush(vcpu, invalidate_gpa); ++} ++ ++static void record_steal_time(struct kvm_vcpu *vcpu) ++{ ++ if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED)) ++ return; ++ ++ if (unlikely(kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.st.stime, ++ &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)))) ++ return; ++ ++ /* ++ * Doing a TLB flush here, on the guest's behalf, can avoid ++ * expensive IPIs. ++ */ ++ if (xchg(&vcpu->arch.st.steal.preempted, 0) & KVM_VCPU_FLUSH_TLB) ++ kvm_vcpu_flush_tlb(vcpu, false); ++ ++ if (vcpu->arch.st.steal.version & 1) ++ vcpu->arch.st.steal.version += 1; /* first time write, random junk */ ++ ++ vcpu->arch.st.steal.version += 1; ++ ++ kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime, ++ &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)); ++ ++ smp_wmb(); ++ ++ vcpu->arch.st.steal.steal += current->sched_info.run_delay - ++ vcpu->arch.st.last_steal; ++ vcpu->arch.st.last_steal = current->sched_info.run_delay; ++ ++ kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime, ++ &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)); ++ ++ smp_wmb(); ++ ++ vcpu->arch.st.steal.version += 1; ++ ++ kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime, ++ &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)); ++} ++ ++int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) ++{ ++ bool pr = false; ++ u32 msr = msr_info->index; ++ u64 data = msr_info->data; ++ ++ switch (msr) { ++ case MSR_AMD64_NB_CFG: ++ case MSR_IA32_UCODE_WRITE: ++ case MSR_VM_HSAVE_PA: ++ case MSR_AMD64_PATCH_LOADER: ++ case MSR_AMD64_BU_CFG2: ++ case MSR_AMD64_DC_CFG: ++ case MSR_F15H_EX_CFG: ++ break; ++ ++ case MSR_IA32_UCODE_REV: ++ if (msr_info->host_initiated) ++ vcpu->arch.microcode_version = data; ++ break; ++ case MSR_IA32_ARCH_CAPABILITIES: ++ if (!msr_info->host_initiated) ++ return 1; ++ vcpu->arch.arch_capabilities = data; ++ break; ++ case MSR_EFER: ++ return set_efer(vcpu, msr_info); ++ case MSR_K7_HWCR: ++ data &= ~(u64)0x40; /* ignore flush filter disable */ ++ data &= ~(u64)0x100; /* ignore ignne emulation enable */ ++ data &= ~(u64)0x8; /* ignore TLB cache disable */ ++ data &= ~(u64)0x40000; /* ignore Mc status write enable */ ++ if (data != 0) { ++ vcpu_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n", ++ data); ++ return 1; ++ } ++ break; ++ case MSR_FAM10H_MMIO_CONF_BASE: ++ if (data != 0) { ++ vcpu_unimpl(vcpu, "unimplemented MMIO_CONF_BASE wrmsr: " ++ "0x%llx\n", data); ++ return 1; ++ } ++ break; ++ case MSR_IA32_DEBUGCTLMSR: ++ if (!data) { ++ /* We support the non-activated case already */ ++ break; ++ } else if (data & ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_BTF)) { ++ /* Values other than LBR and BTF are vendor-specific, ++ thus reserved and should throw a #GP */ ++ return 1; ++ } ++ vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n", ++ __func__, data); ++ break; ++ case 0x200 ... 0x2ff: ++ return kvm_mtrr_set_msr(vcpu, msr, data); ++ case MSR_IA32_APICBASE: ++ return kvm_set_apic_base(vcpu, msr_info); ++ case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff: ++ return kvm_x2apic_msr_write(vcpu, msr, data); ++ case MSR_IA32_TSCDEADLINE: ++ kvm_set_lapic_tscdeadline_msr(vcpu, data); ++ break; ++ case MSR_IA32_TSC_ADJUST: ++ if (guest_cpuid_has(vcpu, X86_FEATURE_TSC_ADJUST)) { ++ if (!msr_info->host_initiated) { ++ s64 adj = data - vcpu->arch.ia32_tsc_adjust_msr; ++ adjust_tsc_offset_guest(vcpu, adj); ++ } ++ vcpu->arch.ia32_tsc_adjust_msr = data; ++ } ++ break; ++ case MSR_IA32_MISC_ENABLE: ++ vcpu->arch.ia32_misc_enable_msr = data; ++ break; ++ case MSR_IA32_SMBASE: ++ if (!msr_info->host_initiated) ++ return 1; ++ vcpu->arch.smbase = data; ++ break; ++ case MSR_IA32_TSC: ++ kvm_write_tsc(vcpu, msr_info); ++ break; ++ case MSR_SMI_COUNT: ++ if (!msr_info->host_initiated) ++ return 1; ++ vcpu->arch.smi_count = data; ++ break; ++ case MSR_KVM_WALL_CLOCK_NEW: ++ case MSR_KVM_WALL_CLOCK: ++ vcpu->kvm->arch.wall_clock = data; ++ kvm_write_wall_clock(vcpu->kvm, data); ++ break; ++ case MSR_KVM_SYSTEM_TIME_NEW: ++ case MSR_KVM_SYSTEM_TIME: { ++ struct kvm_arch *ka = &vcpu->kvm->arch; ++ ++ kvmclock_reset(vcpu); ++ ++ if (vcpu->vcpu_id == 0 && !msr_info->host_initiated) { ++ bool tmp = (msr == MSR_KVM_SYSTEM_TIME); ++ ++ if (ka->boot_vcpu_runs_old_kvmclock != tmp) ++ kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu); ++ ++ ka->boot_vcpu_runs_old_kvmclock = tmp; ++ } ++ ++ vcpu->arch.time = data; ++ kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu); ++ ++ /* we verify if the enable bit is set... */ ++ if (!(data & 1)) ++ break; ++ ++ if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ++ &vcpu->arch.pv_time, data & ~1ULL, ++ sizeof(struct pvclock_vcpu_time_info))) ++ vcpu->arch.pv_time_enabled = false; ++ else ++ vcpu->arch.pv_time_enabled = true; ++ ++ break; ++ } ++ case MSR_KVM_ASYNC_PF_EN: ++ if (kvm_pv_enable_async_pf(vcpu, data)) ++ return 1; ++ break; ++ case MSR_KVM_STEAL_TIME: ++ ++ if (unlikely(!sched_info_on())) ++ return 1; ++ ++ if (data & KVM_STEAL_RESERVED_MASK) ++ return 1; ++ ++ if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.st.stime, ++ data & KVM_STEAL_VALID_BITS, ++ sizeof(struct kvm_steal_time))) ++ return 1; ++ ++ vcpu->arch.st.msr_val = data; ++ ++ if (!(data & KVM_MSR_ENABLED)) ++ break; ++ ++ kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); ++ ++ break; ++ case MSR_KVM_PV_EOI_EN: ++ if (kvm_lapic_enable_pv_eoi(vcpu, data, sizeof(u8))) ++ return 1; ++ break; ++ ++ case MSR_IA32_MCG_CTL: ++ case MSR_IA32_MCG_STATUS: ++ case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1: ++ return set_msr_mce(vcpu, msr_info); ++ ++ case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3: ++ case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1: ++ pr = true; /* fall through */ ++ case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3: ++ case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL1: ++ if (kvm_pmu_is_valid_msr(vcpu, msr)) ++ return kvm_pmu_set_msr(vcpu, msr_info); ++ ++ if (pr || data != 0) ++ vcpu_unimpl(vcpu, "disabled perfctr wrmsr: " ++ "0x%x data 0x%llx\n", msr, data); ++ break; ++ case MSR_K7_CLK_CTL: ++ /* ++ * Ignore all writes to this no longer documented MSR. ++ * Writes are only relevant for old K7 processors, ++ * all pre-dating SVM, but a recommended workaround from ++ * AMD for these chips. It is possible to specify the ++ * affected processor models on the command line, hence ++ * the need to ignore the workaround. ++ */ ++ break; ++ case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15: ++ case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: ++ case HV_X64_MSR_CRASH_CTL: ++ case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT: ++ case HV_X64_MSR_REENLIGHTENMENT_CONTROL: ++ case HV_X64_MSR_TSC_EMULATION_CONTROL: ++ case HV_X64_MSR_TSC_EMULATION_STATUS: ++ return kvm_hv_set_msr_common(vcpu, msr, data, ++ msr_info->host_initiated); ++ case MSR_IA32_BBL_CR_CTL3: ++ /* Drop writes to this legacy MSR -- see rdmsr ++ * counterpart for further detail. ++ */ ++ if (report_ignored_msrs) ++ vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n", ++ msr, data); ++ break; ++ case MSR_AMD64_OSVW_ID_LENGTH: ++ if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW)) ++ return 1; ++ vcpu->arch.osvw.length = data; ++ break; ++ case MSR_AMD64_OSVW_STATUS: ++ if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW)) ++ return 1; ++ vcpu->arch.osvw.status = data; ++ break; ++ case MSR_PLATFORM_INFO: ++ if (!msr_info->host_initiated || ++ (!(data & MSR_PLATFORM_INFO_CPUID_FAULT) && ++ cpuid_fault_enabled(vcpu))) ++ return 1; ++ vcpu->arch.msr_platform_info = data; ++ break; ++ case MSR_MISC_FEATURES_ENABLES: ++ if (data & ~MSR_MISC_FEATURES_ENABLES_CPUID_FAULT || ++ (data & MSR_MISC_FEATURES_ENABLES_CPUID_FAULT && ++ !supports_cpuid_fault(vcpu))) ++ return 1; ++ vcpu->arch.msr_misc_features_enables = data; ++ break; ++ default: ++ if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr)) ++ return xen_hvm_config(vcpu, data); ++ if (kvm_pmu_is_valid_msr(vcpu, msr)) ++ return kvm_pmu_set_msr(vcpu, msr_info); ++ if (!ignore_msrs) { ++ vcpu_debug_ratelimited(vcpu, "unhandled wrmsr: 0x%x data 0x%llx\n", ++ msr, data); ++ return 1; ++ } else { ++ if (report_ignored_msrs) ++ vcpu_unimpl(vcpu, ++ "ignored wrmsr: 0x%x data 0x%llx\n", ++ msr, data); ++ break; ++ } ++ } ++ return 0; ++} ++EXPORT_SYMBOL_GPL(kvm_set_msr_common); ++ ++ ++/* ++ * Reads an msr value (of 'msr_index') into 'pdata'. ++ * Returns 0 on success, non-0 otherwise. ++ * Assumes vcpu_load() was already called. ++ */ ++int kvm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) ++{ ++ return kvm_x86_ops->get_msr(vcpu, msr); ++} ++EXPORT_SYMBOL_GPL(kvm_get_msr); ++ ++static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host) ++{ ++ u64 data; ++ u64 mcg_cap = vcpu->arch.mcg_cap; ++ unsigned bank_num = mcg_cap & 0xff; ++ ++ switch (msr) { ++ case MSR_IA32_P5_MC_ADDR: ++ case MSR_IA32_P5_MC_TYPE: ++ data = 0; ++ break; ++ case MSR_IA32_MCG_CAP: ++ data = vcpu->arch.mcg_cap; ++ break; ++ case MSR_IA32_MCG_CTL: ++ if (!(mcg_cap & MCG_CTL_P) && !host) ++ return 1; ++ data = vcpu->arch.mcg_ctl; ++ break; ++ case MSR_IA32_MCG_STATUS: ++ data = vcpu->arch.mcg_status; ++ break; ++ default: ++ if (msr >= MSR_IA32_MC0_CTL && ++ msr < MSR_IA32_MCx_CTL(bank_num)) { ++ u32 offset = msr - MSR_IA32_MC0_CTL; ++ data = vcpu->arch.mce_banks[offset]; ++ break; ++ } ++ return 1; ++ } ++ *pdata = data; ++ return 0; ++} ++ ++int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) ++{ ++ switch (msr_info->index) { ++ case MSR_IA32_PLATFORM_ID: ++ case MSR_IA32_EBL_CR_POWERON: ++ case MSR_IA32_DEBUGCTLMSR: ++ case MSR_IA32_LASTBRANCHFROMIP: ++ case MSR_IA32_LASTBRANCHTOIP: ++ case MSR_IA32_LASTINTFROMIP: ++ case MSR_IA32_LASTINTTOIP: ++ case MSR_K8_SYSCFG: ++ case MSR_K8_TSEG_ADDR: ++ case MSR_K8_TSEG_MASK: ++ case MSR_K7_HWCR: ++ case MSR_VM_HSAVE_PA: ++ case MSR_K8_INT_PENDING_MSG: ++ case MSR_AMD64_NB_CFG: ++ case MSR_FAM10H_MMIO_CONF_BASE: ++ case MSR_AMD64_BU_CFG2: ++ case MSR_IA32_PERF_CTL: ++ case MSR_AMD64_DC_CFG: ++ case MSR_F15H_EX_CFG: ++ msr_info->data = 0; ++ break; ++ case MSR_F15H_PERF_CTL0 ... MSR_F15H_PERF_CTR5: ++ case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3: ++ case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3: ++ case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1: ++ case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL1: ++ if (kvm_pmu_is_valid_msr(vcpu, msr_info->index)) ++ return kvm_pmu_get_msr(vcpu, msr_info->index, &msr_info->data); ++ msr_info->data = 0; ++ break; ++ case MSR_IA32_UCODE_REV: ++ msr_info->data = vcpu->arch.microcode_version; ++ break; ++ case MSR_IA32_ARCH_CAPABILITIES: ++ if (!msr_info->host_initiated && ++ !guest_cpuid_has(vcpu, X86_FEATURE_ARCH_CAPABILITIES)) ++ return 1; ++ msr_info->data = vcpu->arch.arch_capabilities; ++ break; ++ case MSR_IA32_TSC: ++ msr_info->data = kvm_scale_tsc(vcpu, rdtsc()) + vcpu->arch.tsc_offset; ++ break; ++ case MSR_MTRRcap: ++ case 0x200 ... 0x2ff: ++ return kvm_mtrr_get_msr(vcpu, msr_info->index, &msr_info->data); ++ case 0xcd: /* fsb frequency */ ++ msr_info->data = 3; ++ break; ++ /* ++ * MSR_EBC_FREQUENCY_ID ++ * Conservative value valid for even the basic CPU models. ++ * Models 0,1: 000 in bits 23:21 indicating a bus speed of ++ * 100MHz, model 2 000 in bits 18:16 indicating 100MHz, ++ * and 266MHz for model 3, or 4. Set Core Clock ++ * Frequency to System Bus Frequency Ratio to 1 (bits ++ * 31:24) even though these are only valid for CPU ++ * models > 2, however guests may end up dividing or ++ * multiplying by zero otherwise. ++ */ ++ case MSR_EBC_FREQUENCY_ID: ++ msr_info->data = 1 << 24; ++ break; ++ case MSR_IA32_APICBASE: ++ msr_info->data = kvm_get_apic_base(vcpu); ++ break; ++ case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff: ++ return kvm_x2apic_msr_read(vcpu, msr_info->index, &msr_info->data); ++ break; ++ case MSR_IA32_TSCDEADLINE: ++ msr_info->data = kvm_get_lapic_tscdeadline_msr(vcpu); ++ break; ++ case MSR_IA32_TSC_ADJUST: ++ msr_info->data = (u64)vcpu->arch.ia32_tsc_adjust_msr; ++ break; ++ case MSR_IA32_MISC_ENABLE: ++ msr_info->data = vcpu->arch.ia32_misc_enable_msr; ++ break; ++ case MSR_IA32_SMBASE: ++ if (!msr_info->host_initiated) ++ return 1; ++ msr_info->data = vcpu->arch.smbase; ++ break; ++ case MSR_SMI_COUNT: ++ msr_info->data = vcpu->arch.smi_count; ++ break; ++ case MSR_IA32_PERF_STATUS: ++ /* TSC increment by tick */ ++ msr_info->data = 1000ULL; ++ /* CPU multiplier */ ++ msr_info->data |= (((uint64_t)4ULL) << 40); ++ break; ++ case MSR_EFER: ++ msr_info->data = vcpu->arch.efer; ++ break; ++ case MSR_KVM_WALL_CLOCK: ++ case MSR_KVM_WALL_CLOCK_NEW: ++ msr_info->data = vcpu->kvm->arch.wall_clock; ++ break; ++ case MSR_KVM_SYSTEM_TIME: ++ case MSR_KVM_SYSTEM_TIME_NEW: ++ msr_info->data = vcpu->arch.time; ++ break; ++ case MSR_KVM_ASYNC_PF_EN: ++ msr_info->data = vcpu->arch.apf.msr_val; ++ break; ++ case MSR_KVM_STEAL_TIME: ++ msr_info->data = vcpu->arch.st.msr_val; ++ break; ++ case MSR_KVM_PV_EOI_EN: ++ msr_info->data = vcpu->arch.pv_eoi.msr_val; ++ break; ++ case MSR_IA32_P5_MC_ADDR: ++ case MSR_IA32_P5_MC_TYPE: ++ case MSR_IA32_MCG_CAP: ++ case MSR_IA32_MCG_CTL: ++ case MSR_IA32_MCG_STATUS: ++ case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1: ++ return get_msr_mce(vcpu, msr_info->index, &msr_info->data, ++ msr_info->host_initiated); ++ case MSR_K7_CLK_CTL: ++ /* ++ * Provide expected ramp-up count for K7. All other ++ * are set to zero, indicating minimum divisors for ++ * every field. ++ * ++ * This prevents guest kernels on AMD host with CPU ++ * type 6, model 8 and higher from exploding due to ++ * the rdmsr failing. ++ */ ++ msr_info->data = 0x20000000; ++ break; ++ case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15: ++ case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: ++ case HV_X64_MSR_CRASH_CTL: ++ case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT: ++ case HV_X64_MSR_REENLIGHTENMENT_CONTROL: ++ case HV_X64_MSR_TSC_EMULATION_CONTROL: ++ case HV_X64_MSR_TSC_EMULATION_STATUS: ++ return kvm_hv_get_msr_common(vcpu, ++ msr_info->index, &msr_info->data, ++ msr_info->host_initiated); ++ break; ++ case MSR_IA32_BBL_CR_CTL3: ++ /* This legacy MSR exists but isn't fully documented in current ++ * silicon. It is however accessed by winxp in very narrow ++ * scenarios where it sets bit #19, itself documented as ++ * a "reserved" bit. Best effort attempt to source coherent ++ * read data here should the balance of the register be ++ * interpreted by the guest: ++ * ++ * L2 cache control register 3: 64GB range, 256KB size, ++ * enabled, latency 0x1, configured ++ */ ++ msr_info->data = 0xbe702111; ++ break; ++ case MSR_AMD64_OSVW_ID_LENGTH: ++ if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW)) ++ return 1; ++ msr_info->data = vcpu->arch.osvw.length; ++ break; ++ case MSR_AMD64_OSVW_STATUS: ++ if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW)) ++ return 1; ++ msr_info->data = vcpu->arch.osvw.status; ++ break; ++ case MSR_PLATFORM_INFO: ++ if (!msr_info->host_initiated && ++ !vcpu->kvm->arch.guest_can_read_msr_platform_info) ++ return 1; ++ msr_info->data = vcpu->arch.msr_platform_info; ++ break; ++ case MSR_MISC_FEATURES_ENABLES: ++ msr_info->data = vcpu->arch.msr_misc_features_enables; ++ break; ++ default: ++ if (kvm_pmu_is_valid_msr(vcpu, msr_info->index)) ++ return kvm_pmu_get_msr(vcpu, msr_info->index, &msr_info->data); ++ if (!ignore_msrs) { ++ vcpu_debug_ratelimited(vcpu, "unhandled rdmsr: 0x%x\n", ++ msr_info->index); ++ return 1; ++ } else { ++ if (report_ignored_msrs) ++ vcpu_unimpl(vcpu, "ignored rdmsr: 0x%x\n", ++ msr_info->index); ++ msr_info->data = 0; ++ } ++ break; ++ } ++ return 0; ++} ++EXPORT_SYMBOL_GPL(kvm_get_msr_common); ++ ++/* ++ * Read or write a bunch of msrs. All parameters are kernel addresses. ++ * ++ * @return number of msrs set successfully. ++ */ ++static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs, ++ struct kvm_msr_entry *entries, ++ int (*do_msr)(struct kvm_vcpu *vcpu, ++ unsigned index, u64 *data)) ++{ ++ int i; ++ ++ for (i = 0; i < msrs->nmsrs; ++i) ++ if (do_msr(vcpu, entries[i].index, &entries[i].data)) ++ break; ++ ++ return i; ++} ++ ++/* ++ * Read or write a bunch of msrs. Parameters are user addresses. ++ * ++ * @return number of msrs set successfully. ++ */ ++static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs, ++ int (*do_msr)(struct kvm_vcpu *vcpu, ++ unsigned index, u64 *data), ++ int writeback) ++{ ++ struct kvm_msrs msrs; ++ struct kvm_msr_entry *entries; ++ int r, n; ++ unsigned size; ++ ++ r = -EFAULT; ++ if (copy_from_user(&msrs, user_msrs, sizeof msrs)) ++ goto out; ++ ++ r = -E2BIG; ++ if (msrs.nmsrs >= MAX_IO_MSRS) ++ goto out; ++ ++ size = sizeof(struct kvm_msr_entry) * msrs.nmsrs; ++ entries = memdup_user(user_msrs->entries, size); ++ if (IS_ERR(entries)) { ++ r = PTR_ERR(entries); ++ goto out; ++ } ++ ++ r = n = __msr_io(vcpu, &msrs, entries, do_msr); ++ if (r < 0) ++ goto out_free; ++ ++ r = -EFAULT; ++ if (writeback && copy_to_user(user_msrs->entries, entries, size)) ++ goto out_free; ++ ++ r = n; ++ ++out_free: ++ kfree(entries); ++out: ++ return r; ++} ++ ++static inline bool kvm_can_mwait_in_guest(void) ++{ ++ return boot_cpu_has(X86_FEATURE_MWAIT) && ++ !boot_cpu_has_bug(X86_BUG_MONITOR) && ++ boot_cpu_has(X86_FEATURE_ARAT); ++} ++ ++int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) ++{ ++ int r = 0; ++ ++ switch (ext) { ++ case KVM_CAP_IRQCHIP: ++ case KVM_CAP_HLT: ++ case KVM_CAP_MMU_SHADOW_CACHE_CONTROL: ++ case KVM_CAP_SET_TSS_ADDR: ++ case KVM_CAP_EXT_CPUID: ++ case KVM_CAP_EXT_EMUL_CPUID: ++ case KVM_CAP_CLOCKSOURCE: ++ case KVM_CAP_PIT: ++ case KVM_CAP_NOP_IO_DELAY: ++ case KVM_CAP_MP_STATE: ++ case KVM_CAP_SYNC_MMU: ++ case KVM_CAP_USER_NMI: ++ case KVM_CAP_REINJECT_CONTROL: ++ case KVM_CAP_IRQ_INJECT_STATUS: ++ case KVM_CAP_IOEVENTFD: ++ case KVM_CAP_IOEVENTFD_NO_LENGTH: ++ case KVM_CAP_PIT2: ++ case KVM_CAP_PIT_STATE2: ++ case KVM_CAP_SET_IDENTITY_MAP_ADDR: ++ case KVM_CAP_XEN_HVM: ++ case KVM_CAP_VCPU_EVENTS: ++ case KVM_CAP_HYPERV: ++ case KVM_CAP_HYPERV_VAPIC: ++ case KVM_CAP_HYPERV_SPIN: ++ case KVM_CAP_HYPERV_SYNIC: ++ case KVM_CAP_HYPERV_SYNIC2: ++ case KVM_CAP_HYPERV_VP_INDEX: ++ case KVM_CAP_HYPERV_EVENTFD: ++ case KVM_CAP_HYPERV_TLBFLUSH: ++ case KVM_CAP_PCI_SEGMENT: ++ case KVM_CAP_DEBUGREGS: ++ case KVM_CAP_X86_ROBUST_SINGLESTEP: ++ case KVM_CAP_XSAVE: ++ case KVM_CAP_ASYNC_PF: ++ case KVM_CAP_GET_TSC_KHZ: ++ case KVM_CAP_KVMCLOCK_CTRL: ++ case KVM_CAP_READONLY_MEM: ++ case KVM_CAP_HYPERV_TIME: ++ case KVM_CAP_IOAPIC_POLARITY_IGNORED: ++ case KVM_CAP_TSC_DEADLINE_TIMER: ++ case KVM_CAP_ENABLE_CAP_VM: ++ case KVM_CAP_DISABLE_QUIRKS: ++ case KVM_CAP_SET_BOOT_CPU_ID: ++ case KVM_CAP_SPLIT_IRQCHIP: ++ case KVM_CAP_IMMEDIATE_EXIT: ++ case KVM_CAP_GET_MSR_FEATURES: ++ case KVM_CAP_MSR_PLATFORM_INFO: ++ r = 1; ++ break; ++ case KVM_CAP_SYNC_REGS: ++ r = KVM_SYNC_X86_VALID_FIELDS; ++ break; ++ case KVM_CAP_ADJUST_CLOCK: ++ r = KVM_CLOCK_TSC_STABLE; ++ break; ++ case KVM_CAP_X86_DISABLE_EXITS: ++ r |= KVM_X86_DISABLE_EXITS_HLT | KVM_X86_DISABLE_EXITS_PAUSE; ++ if(kvm_can_mwait_in_guest()) ++ r |= KVM_X86_DISABLE_EXITS_MWAIT; ++ break; ++ case KVM_CAP_X86_SMM: ++ /* SMBASE is usually relocated above 1M on modern chipsets, ++ * and SMM handlers might indeed rely on 4G segment limits, ++ * so do not report SMM to be available if real mode is ++ * emulated via vm86 mode. Still, do not go to great lengths ++ * to avoid userspace's usage of the feature, because it is a ++ * fringe case that is not enabled except via specific settings ++ * of the module parameters. ++ */ ++ r = kvm_x86_ops->has_emulated_msr(MSR_IA32_SMBASE); ++ break; ++ case KVM_CAP_VAPIC: ++ r = !kvm_x86_ops->cpu_has_accelerated_tpr(); ++ break; ++ case KVM_CAP_NR_VCPUS: ++ r = KVM_SOFT_MAX_VCPUS; ++ break; ++ case KVM_CAP_MAX_VCPUS: ++ r = KVM_MAX_VCPUS; ++ break; ++ case KVM_CAP_MAX_VCPU_ID: ++ r = KVM_MAX_VCPU_ID; ++ break; ++ case KVM_CAP_NR_MEMSLOTS: ++ r = KVM_USER_MEM_SLOTS; ++ break; ++ case KVM_CAP_PV_MMU: /* obsolete */ ++ r = 0; ++ break; ++ case KVM_CAP_MCE: ++ r = KVM_MAX_MCE_BANKS; ++ break; ++ case KVM_CAP_XCRS: ++ r = boot_cpu_has(X86_FEATURE_XSAVE); ++ break; ++ case KVM_CAP_TSC_CONTROL: ++ r = kvm_has_tsc_control; ++ break; ++ case KVM_CAP_X2APIC_API: ++ r = KVM_X2APIC_API_VALID_FLAGS; ++ break; ++ case KVM_CAP_NESTED_STATE: ++ r = kvm_x86_ops->get_nested_state ? ++ kvm_x86_ops->get_nested_state(NULL, 0, 0) : 0; ++ break; ++ default: ++ break; ++ } ++ return r; ++ ++} ++ ++long kvm_arch_dev_ioctl(struct file *filp, ++ unsigned int ioctl, unsigned long arg) ++{ ++ void __user *argp = (void __user *)arg; ++ long r; ++ ++ switch (ioctl) { ++ case KVM_GET_MSR_INDEX_LIST: { ++ struct kvm_msr_list __user *user_msr_list = argp; ++ struct kvm_msr_list msr_list; ++ unsigned n; ++ ++ r = -EFAULT; ++ if (copy_from_user(&msr_list, user_msr_list, sizeof msr_list)) ++ goto out; ++ n = msr_list.nmsrs; ++ msr_list.nmsrs = num_msrs_to_save + num_emulated_msrs; ++ if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list)) ++ goto out; ++ r = -E2BIG; ++ if (n < msr_list.nmsrs) ++ goto out; ++ r = -EFAULT; ++ if (copy_to_user(user_msr_list->indices, &msrs_to_save, ++ num_msrs_to_save * sizeof(u32))) ++ goto out; ++ if (copy_to_user(user_msr_list->indices + num_msrs_to_save, ++ &emulated_msrs, ++ num_emulated_msrs * sizeof(u32))) ++ goto out; ++ r = 0; ++ break; ++ } ++ case KVM_GET_SUPPORTED_CPUID: ++ case KVM_GET_EMULATED_CPUID: { ++ struct kvm_cpuid2 __user *cpuid_arg = argp; ++ struct kvm_cpuid2 cpuid; ++ ++ r = -EFAULT; ++ if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) ++ goto out; ++ ++ r = kvm_dev_ioctl_get_cpuid(&cpuid, cpuid_arg->entries, ++ ioctl); ++ if (r) ++ goto out; ++ ++ r = -EFAULT; ++ if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid)) ++ goto out; ++ r = 0; ++ break; ++ } ++ case KVM_X86_GET_MCE_CAP_SUPPORTED: { ++ r = -EFAULT; ++ if (copy_to_user(argp, &kvm_mce_cap_supported, ++ sizeof(kvm_mce_cap_supported))) ++ goto out; ++ r = 0; ++ break; ++ case KVM_GET_MSR_FEATURE_INDEX_LIST: { ++ struct kvm_msr_list __user *user_msr_list = argp; ++ struct kvm_msr_list msr_list; ++ unsigned int n; ++ ++ r = -EFAULT; ++ if (copy_from_user(&msr_list, user_msr_list, sizeof(msr_list))) ++ goto out; ++ n = msr_list.nmsrs; ++ msr_list.nmsrs = num_msr_based_features; ++ if (copy_to_user(user_msr_list, &msr_list, sizeof(msr_list))) ++ goto out; ++ r = -E2BIG; ++ if (n < msr_list.nmsrs) ++ goto out; ++ r = -EFAULT; ++ if (copy_to_user(user_msr_list->indices, &msr_based_features, ++ num_msr_based_features * sizeof(u32))) ++ goto out; ++ r = 0; ++ break; ++ } ++ case KVM_GET_MSRS: ++ r = msr_io(NULL, argp, do_get_msr_feature, 1); ++ break; ++ } ++ default: ++ r = -EINVAL; ++ } ++out: ++ return r; ++} ++ ++static void wbinvd_ipi(void *garbage) ++{ ++ wbinvd(); ++} ++ ++static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu) ++{ ++ return kvm_arch_has_noncoherent_dma(vcpu->kvm); ++} ++ ++void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) ++{ ++ /* Address WBINVD may be executed by guest */ ++ if (need_emulate_wbinvd(vcpu)) { ++ if (kvm_x86_ops->has_wbinvd_exit()) ++ cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask); ++ else if (vcpu->cpu != -1 && vcpu->cpu != cpu) ++ smp_call_function_single(vcpu->cpu, ++ wbinvd_ipi, NULL, 1); ++ } ++ ++ kvm_x86_ops->vcpu_load(vcpu, cpu); ++ ++ /* Apply any externally detected TSC adjustments (due to suspend) */ ++ if (unlikely(vcpu->arch.tsc_offset_adjustment)) { ++ adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment); ++ vcpu->arch.tsc_offset_adjustment = 0; ++ kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); ++ } ++ ++ if (unlikely(vcpu->cpu != cpu) || kvm_check_tsc_unstable()) { ++ s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 : ++ rdtsc() - vcpu->arch.last_host_tsc; ++ if (tsc_delta < 0) ++ mark_tsc_unstable("KVM discovered backwards TSC"); ++ ++ if (kvm_check_tsc_unstable()) { ++ u64 offset = kvm_compute_tsc_offset(vcpu, ++ vcpu->arch.last_guest_tsc); ++ kvm_vcpu_write_tsc_offset(vcpu, offset); ++ vcpu->arch.tsc_catchup = 1; ++ } ++ ++ if (kvm_lapic_hv_timer_in_use(vcpu)) ++ kvm_lapic_restart_hv_timer(vcpu); ++ ++ /* ++ * On a host with synchronized TSC, there is no need to update ++ * kvmclock on vcpu->cpu migration ++ */ ++ if (!vcpu->kvm->arch.use_master_clock || vcpu->cpu == -1) ++ kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu); ++ if (vcpu->cpu != cpu) ++ kvm_make_request(KVM_REQ_MIGRATE_TIMER, vcpu); ++ vcpu->cpu = cpu; ++ } ++ ++ kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); ++} ++ ++static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu) ++{ ++ if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED)) ++ return; ++ ++ if (vcpu->arch.st.steal.preempted) ++ return; ++ ++ vcpu->arch.st.steal.preempted = KVM_VCPU_PREEMPTED; ++ ++ kvm_write_guest_offset_cached(vcpu->kvm, &vcpu->arch.st.stime, ++ &vcpu->arch.st.steal.preempted, ++ offsetof(struct kvm_steal_time, preempted), ++ sizeof(vcpu->arch.st.steal.preempted)); ++} ++ ++void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) ++{ ++ int idx; ++ ++ if (vcpu->preempted) ++ vcpu->arch.preempted_in_kernel = !kvm_x86_ops->get_cpl(vcpu); ++ ++ /* ++ * Disable page faults because we're in atomic context here. ++ * kvm_write_guest_offset_cached() would call might_fault() ++ * that relies on pagefault_disable() to tell if there's a ++ * bug. NOTE: the write to guest memory may not go through if ++ * during postcopy live migration or if there's heavy guest ++ * paging. ++ */ ++ pagefault_disable(); ++ /* ++ * kvm_memslots() will be called by ++ * kvm_write_guest_offset_cached() so take the srcu lock. ++ */ ++ idx = srcu_read_lock(&vcpu->kvm->srcu); ++ kvm_steal_time_set_preempted(vcpu); ++ srcu_read_unlock(&vcpu->kvm->srcu, idx); ++ pagefault_enable(); ++ kvm_x86_ops->vcpu_put(vcpu); ++ vcpu->arch.last_host_tsc = rdtsc(); ++ /* ++ * If userspace has set any breakpoints or watchpoints, dr6 is restored ++ * on every vmexit, but if not, we might have a stale dr6 from the ++ * guest. do_debug expects dr6 to be cleared after it runs, do the same. ++ */ ++ set_debugreg(0, 6); ++} ++ ++static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, ++ struct kvm_lapic_state *s) ++{ ++ if (vcpu->arch.apicv_active) ++ kvm_x86_ops->sync_pir_to_irr(vcpu); ++ ++ return kvm_apic_get_state(vcpu, s); ++} ++ ++static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu, ++ struct kvm_lapic_state *s) ++{ ++ int r; ++ ++ r = kvm_apic_set_state(vcpu, s); ++ if (r) ++ return r; ++ update_cr8_intercept(vcpu); ++ ++ return 0; ++} ++ ++static int kvm_cpu_accept_dm_intr(struct kvm_vcpu *vcpu) ++{ ++ return (!lapic_in_kernel(vcpu) || ++ kvm_apic_accept_pic_intr(vcpu)); ++} ++ ++/* ++ * if userspace requested an interrupt window, check that the ++ * interrupt window is open. ++ * ++ * No need to exit to userspace if we already have an interrupt queued. ++ */ ++static int kvm_vcpu_ready_for_interrupt_injection(struct kvm_vcpu *vcpu) ++{ ++ return kvm_arch_interrupt_allowed(vcpu) && ++ !kvm_cpu_has_interrupt(vcpu) && ++ !kvm_event_needs_reinjection(vcpu) && ++ kvm_cpu_accept_dm_intr(vcpu); ++} ++ ++static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, ++ struct kvm_interrupt *irq) ++{ ++ if (irq->irq >= KVM_NR_INTERRUPTS) ++ return -EINVAL; ++ ++ if (!irqchip_in_kernel(vcpu->kvm)) { ++ kvm_queue_interrupt(vcpu, irq->irq, false); ++ kvm_make_request(KVM_REQ_EVENT, vcpu); ++ return 0; ++ } ++ ++ /* ++ * With in-kernel LAPIC, we only use this to inject EXTINT, so ++ * fail for in-kernel 8259. ++ */ ++ if (pic_in_kernel(vcpu->kvm)) ++ return -ENXIO; ++ ++ if (vcpu->arch.pending_external_vector != -1) ++ return -EEXIST; ++ ++ vcpu->arch.pending_external_vector = irq->irq; ++ kvm_make_request(KVM_REQ_EVENT, vcpu); ++ return 0; ++} ++ ++static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu) ++{ ++ kvm_inject_nmi(vcpu); ++ ++ return 0; ++} ++ ++static int kvm_vcpu_ioctl_smi(struct kvm_vcpu *vcpu) ++{ ++ kvm_make_request(KVM_REQ_SMI, vcpu); ++ ++ return 0; ++} ++ ++static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu, ++ struct kvm_tpr_access_ctl *tac) ++{ ++ if (tac->flags) ++ return -EINVAL; ++ vcpu->arch.tpr_access_reporting = !!tac->enabled; ++ return 0; ++} ++ ++static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu, ++ u64 mcg_cap) ++{ ++ int r; ++ unsigned bank_num = mcg_cap & 0xff, bank; ++ ++ r = -EINVAL; ++ if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS) ++ goto out; ++ if (mcg_cap & ~(kvm_mce_cap_supported | 0xff | 0xff0000)) ++ goto out; ++ r = 0; ++ vcpu->arch.mcg_cap = mcg_cap; ++ /* Init IA32_MCG_CTL to all 1s */ ++ if (mcg_cap & MCG_CTL_P) ++ vcpu->arch.mcg_ctl = ~(u64)0; ++ /* Init IA32_MCi_CTL to all 1s */ ++ for (bank = 0; bank < bank_num; bank++) ++ vcpu->arch.mce_banks[bank*4] = ~(u64)0; ++ ++ if (kvm_x86_ops->setup_mce) ++ kvm_x86_ops->setup_mce(vcpu); ++out: ++ return r; ++} ++ ++static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu, ++ struct kvm_x86_mce *mce) ++{ ++ u64 mcg_cap = vcpu->arch.mcg_cap; ++ unsigned bank_num = mcg_cap & 0xff; ++ u64 *banks = vcpu->arch.mce_banks; ++ ++ if (mce->bank >= bank_num || !(mce->status & MCI_STATUS_VAL)) ++ return -EINVAL; ++ /* ++ * if IA32_MCG_CTL is not all 1s, the uncorrected error ++ * reporting is disabled ++ */ ++ if ((mce->status & MCI_STATUS_UC) && (mcg_cap & MCG_CTL_P) && ++ vcpu->arch.mcg_ctl != ~(u64)0) ++ return 0; ++ banks += 4 * mce->bank; ++ /* ++ * if IA32_MCi_CTL is not all 1s, the uncorrected error ++ * reporting is disabled for the bank ++ */ ++ if ((mce->status & MCI_STATUS_UC) && banks[0] != ~(u64)0) ++ return 0; ++ if (mce->status & MCI_STATUS_UC) { ++ if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) || ++ !kvm_read_cr4_bits(vcpu, X86_CR4_MCE)) { ++ kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); ++ return 0; ++ } ++ if (banks[1] & MCI_STATUS_VAL) ++ mce->status |= MCI_STATUS_OVER; ++ banks[2] = mce->addr; ++ banks[3] = mce->misc; ++ vcpu->arch.mcg_status = mce->mcg_status; ++ banks[1] = mce->status; ++ kvm_queue_exception(vcpu, MC_VECTOR); ++ } else if (!(banks[1] & MCI_STATUS_VAL) ++ || !(banks[1] & MCI_STATUS_UC)) { ++ if (banks[1] & MCI_STATUS_VAL) ++ mce->status |= MCI_STATUS_OVER; ++ banks[2] = mce->addr; ++ banks[3] = mce->misc; ++ banks[1] = mce->status; ++ } else ++ banks[1] |= MCI_STATUS_OVER; ++ return 0; ++} ++ ++static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, ++ struct kvm_vcpu_events *events) ++{ ++ process_nmi(vcpu); ++ /* ++ * FIXME: pass injected and pending separately. This is only ++ * needed for nested virtualization, whose state cannot be ++ * migrated yet. For now we can combine them. ++ */ ++ events->exception.injected = ++ (vcpu->arch.exception.pending || ++ vcpu->arch.exception.injected) && ++ !kvm_exception_is_soft(vcpu->arch.exception.nr); ++ events->exception.nr = vcpu->arch.exception.nr; ++ events->exception.has_error_code = vcpu->arch.exception.has_error_code; ++ events->exception.pad = 0; ++ events->exception.error_code = vcpu->arch.exception.error_code; ++ ++ events->interrupt.injected = ++ vcpu->arch.interrupt.injected && !vcpu->arch.interrupt.soft; ++ events->interrupt.nr = vcpu->arch.interrupt.nr; ++ events->interrupt.soft = 0; ++ events->interrupt.shadow = kvm_x86_ops->get_interrupt_shadow(vcpu); ++ ++ events->nmi.injected = vcpu->arch.nmi_injected; ++ events->nmi.pending = vcpu->arch.nmi_pending != 0; ++ events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu); ++ events->nmi.pad = 0; ++ ++ events->sipi_vector = 0; /* never valid when reporting to user space */ ++ ++ events->smi.smm = is_smm(vcpu); ++ events->smi.pending = vcpu->arch.smi_pending; ++ events->smi.smm_inside_nmi = ++ !!(vcpu->arch.hflags & HF_SMM_INSIDE_NMI_MASK); ++ events->smi.latched_init = kvm_lapic_latched_init(vcpu); ++ ++ events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING ++ | KVM_VCPUEVENT_VALID_SHADOW ++ | KVM_VCPUEVENT_VALID_SMM); ++ memset(&events->reserved, 0, sizeof(events->reserved)); ++} ++ ++static void kvm_set_hflags(struct kvm_vcpu *vcpu, unsigned emul_flags); ++ ++static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, ++ struct kvm_vcpu_events *events) ++{ ++ if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING ++ | KVM_VCPUEVENT_VALID_SIPI_VECTOR ++ | KVM_VCPUEVENT_VALID_SHADOW ++ | KVM_VCPUEVENT_VALID_SMM)) ++ return -EINVAL; ++ ++ if (events->exception.injected && ++ (events->exception.nr > 31 || events->exception.nr == NMI_VECTOR || ++ is_guest_mode(vcpu))) ++ return -EINVAL; ++ ++ /* INITs are latched while in SMM */ ++ if (events->flags & KVM_VCPUEVENT_VALID_SMM && ++ (events->smi.smm || events->smi.pending) && ++ vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) ++ return -EINVAL; ++ ++ process_nmi(vcpu); ++ vcpu->arch.exception.injected = false; ++ vcpu->arch.exception.pending = events->exception.injected; ++ vcpu->arch.exception.nr = events->exception.nr; ++ vcpu->arch.exception.has_error_code = events->exception.has_error_code; ++ vcpu->arch.exception.error_code = events->exception.error_code; ++ ++ vcpu->arch.interrupt.injected = events->interrupt.injected; ++ vcpu->arch.interrupt.nr = events->interrupt.nr; ++ vcpu->arch.interrupt.soft = events->interrupt.soft; ++ if (events->flags & KVM_VCPUEVENT_VALID_SHADOW) ++ kvm_x86_ops->set_interrupt_shadow(vcpu, ++ events->interrupt.shadow); ++ ++ vcpu->arch.nmi_injected = events->nmi.injected; ++ if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING) ++ vcpu->arch.nmi_pending = events->nmi.pending; ++ kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked); ++ ++ if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR && ++ lapic_in_kernel(vcpu)) ++ vcpu->arch.apic->sipi_vector = events->sipi_vector; ++ ++ if (events->flags & KVM_VCPUEVENT_VALID_SMM) { ++ u32 hflags = vcpu->arch.hflags; ++ if (events->smi.smm) ++ hflags |= HF_SMM_MASK; ++ else ++ hflags &= ~HF_SMM_MASK; ++ kvm_set_hflags(vcpu, hflags); ++ ++ vcpu->arch.smi_pending = events->smi.pending; ++ ++ if (events->smi.smm) { ++ if (events->smi.smm_inside_nmi) ++ vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK; ++ else ++ vcpu->arch.hflags &= ~HF_SMM_INSIDE_NMI_MASK; ++ if (lapic_in_kernel(vcpu)) { ++ if (events->smi.latched_init) ++ set_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events); ++ else ++ clear_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events); ++ } ++ } ++ } ++ ++ kvm_make_request(KVM_REQ_EVENT, vcpu); ++ ++ return 0; ++} ++ ++static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu, ++ struct kvm_debugregs *dbgregs) ++{ ++ unsigned long val; ++ ++ memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db)); ++ kvm_get_dr(vcpu, 6, &val); ++ dbgregs->dr6 = val; ++ dbgregs->dr7 = vcpu->arch.dr7; ++ dbgregs->flags = 0; ++ memset(&dbgregs->reserved, 0, sizeof(dbgregs->reserved)); ++} ++ ++static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, ++ struct kvm_debugregs *dbgregs) ++{ ++ if (dbgregs->flags) ++ return -EINVAL; ++ ++ if (dbgregs->dr6 & ~0xffffffffull) ++ return -EINVAL; ++ if (dbgregs->dr7 & ~0xffffffffull) ++ return -EINVAL; ++ ++ memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db)); ++ kvm_update_dr0123(vcpu); ++ vcpu->arch.dr6 = dbgregs->dr6; ++ kvm_update_dr6(vcpu); ++ vcpu->arch.dr7 = dbgregs->dr7; ++ kvm_update_dr7(vcpu); ++ ++ return 0; ++} ++ ++#define XSTATE_COMPACTION_ENABLED (1ULL << 63) ++ ++static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu) ++{ ++ struct xregs_state *xsave = &vcpu->arch.guest_fpu.state.xsave; ++ u64 xstate_bv = xsave->header.xfeatures; ++ u64 valid; ++ ++ /* ++ * Copy legacy XSAVE area, to avoid complications with CPUID ++ * leaves 0 and 1 in the loop below. ++ */ ++ memcpy(dest, xsave, XSAVE_HDR_OFFSET); ++ ++ /* Set XSTATE_BV */ ++ xstate_bv &= vcpu->arch.guest_supported_xcr0 | XFEATURE_MASK_FPSSE; ++ *(u64 *)(dest + XSAVE_HDR_OFFSET) = xstate_bv; ++ ++ /* ++ * Copy each region from the possibly compacted offset to the ++ * non-compacted offset. ++ */ ++ valid = xstate_bv & ~XFEATURE_MASK_FPSSE; ++ while (valid) { ++ u64 feature = valid & -valid; ++ int index = fls64(feature) - 1; ++ void *src = get_xsave_addr(xsave, feature); ++ ++ if (src) { ++ u32 size, offset, ecx, edx; ++ cpuid_count(XSTATE_CPUID, index, ++ &size, &offset, &ecx, &edx); ++ if (feature == XFEATURE_MASK_PKRU) ++ memcpy(dest + offset, &vcpu->arch.pkru, ++ sizeof(vcpu->arch.pkru)); ++ else ++ memcpy(dest + offset, src, size); ++ ++ } ++ ++ valid -= feature; ++ } ++} ++ ++static void load_xsave(struct kvm_vcpu *vcpu, u8 *src) ++{ ++ struct xregs_state *xsave = &vcpu->arch.guest_fpu.state.xsave; ++ u64 xstate_bv = *(u64 *)(src + XSAVE_HDR_OFFSET); ++ u64 valid; ++ ++ /* ++ * Copy legacy XSAVE area, to avoid complications with CPUID ++ * leaves 0 and 1 in the loop below. ++ */ ++ memcpy(xsave, src, XSAVE_HDR_OFFSET); ++ ++ /* Set XSTATE_BV and possibly XCOMP_BV. */ ++ xsave->header.xfeatures = xstate_bv; ++ if (boot_cpu_has(X86_FEATURE_XSAVES)) ++ xsave->header.xcomp_bv = host_xcr0 | XSTATE_COMPACTION_ENABLED; ++ ++ /* ++ * Copy each region from the non-compacted offset to the ++ * possibly compacted offset. ++ */ ++ valid = xstate_bv & ~XFEATURE_MASK_FPSSE; ++ while (valid) { ++ u64 feature = valid & -valid; ++ int index = fls64(feature) - 1; ++ void *dest = get_xsave_addr(xsave, feature); ++ ++ if (dest) { ++ u32 size, offset, ecx, edx; ++ cpuid_count(XSTATE_CPUID, index, ++ &size, &offset, &ecx, &edx); ++ if (feature == XFEATURE_MASK_PKRU) ++ memcpy(&vcpu->arch.pkru, src + offset, ++ sizeof(vcpu->arch.pkru)); ++ else ++ memcpy(dest, src + offset, size); ++ } ++ ++ valid -= feature; ++ } ++} ++ ++static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, ++ struct kvm_xsave *guest_xsave) ++{ ++ if (boot_cpu_has(X86_FEATURE_XSAVE)) { ++ memset(guest_xsave, 0, sizeof(struct kvm_xsave)); ++ fill_xsave((u8 *) guest_xsave->region, vcpu); ++ } else { ++ memcpy(guest_xsave->region, ++ &vcpu->arch.guest_fpu.state.fxsave, ++ sizeof(struct fxregs_state)); ++ *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] = ++ XFEATURE_MASK_FPSSE; ++ } ++} ++ ++#define XSAVE_MXCSR_OFFSET 24 ++ ++static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu, ++ struct kvm_xsave *guest_xsave) ++{ ++ u64 xstate_bv = ++ *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)]; ++ u32 mxcsr = *(u32 *)&guest_xsave->region[XSAVE_MXCSR_OFFSET / sizeof(u32)]; ++ ++ if (boot_cpu_has(X86_FEATURE_XSAVE)) { ++ /* ++ * Here we allow setting states that are not present in ++ * CPUID leaf 0xD, index 0, EDX:EAX. This is for compatibility ++ * with old userspace. ++ */ ++ if (xstate_bv & ~kvm_supported_xcr0() || ++ mxcsr & ~mxcsr_feature_mask) ++ return -EINVAL; ++ load_xsave(vcpu, (u8 *)guest_xsave->region); ++ } else { ++ if (xstate_bv & ~XFEATURE_MASK_FPSSE || ++ mxcsr & ~mxcsr_feature_mask) ++ return -EINVAL; ++ memcpy(&vcpu->arch.guest_fpu.state.fxsave, ++ guest_xsave->region, sizeof(struct fxregs_state)); ++ } ++ return 0; ++} ++ ++static void kvm_vcpu_ioctl_x86_get_xcrs(struct kvm_vcpu *vcpu, ++ struct kvm_xcrs *guest_xcrs) ++{ ++ if (!boot_cpu_has(X86_FEATURE_XSAVE)) { ++ guest_xcrs->nr_xcrs = 0; ++ return; ++ } ++ ++ guest_xcrs->nr_xcrs = 1; ++ guest_xcrs->flags = 0; ++ guest_xcrs->xcrs[0].xcr = XCR_XFEATURE_ENABLED_MASK; ++ guest_xcrs->xcrs[0].value = vcpu->arch.xcr0; ++} ++ ++static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu, ++ struct kvm_xcrs *guest_xcrs) ++{ ++ int i, r = 0; ++ ++ if (!boot_cpu_has(X86_FEATURE_XSAVE)) ++ return -EINVAL; ++ ++ if (guest_xcrs->nr_xcrs > KVM_MAX_XCRS || guest_xcrs->flags) ++ return -EINVAL; ++ ++ for (i = 0; i < guest_xcrs->nr_xcrs; i++) ++ /* Only support XCR0 currently */ ++ if (guest_xcrs->xcrs[i].xcr == XCR_XFEATURE_ENABLED_MASK) { ++ r = __kvm_set_xcr(vcpu, XCR_XFEATURE_ENABLED_MASK, ++ guest_xcrs->xcrs[i].value); ++ break; ++ } ++ if (r) ++ r = -EINVAL; ++ return r; ++} ++ ++/* ++ * kvm_set_guest_paused() indicates to the guest kernel that it has been ++ * stopped by the hypervisor. This function will be called from the host only. ++ * EINVAL is returned when the host attempts to set the flag for a guest that ++ * does not support pv clocks. ++ */ ++static int kvm_set_guest_paused(struct kvm_vcpu *vcpu) ++{ ++ if (!vcpu->arch.pv_time_enabled) ++ return -EINVAL; ++ vcpu->arch.pvclock_set_guest_stopped_request = true; ++ kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); ++ return 0; ++} ++ ++static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, ++ struct kvm_enable_cap *cap) ++{ ++ if (cap->flags) ++ return -EINVAL; ++ ++ switch (cap->cap) { ++ case KVM_CAP_HYPERV_SYNIC2: ++ if (cap->args[0]) ++ return -EINVAL; ++ case KVM_CAP_HYPERV_SYNIC: ++ if (!irqchip_in_kernel(vcpu->kvm)) ++ return -EINVAL; ++ return kvm_hv_activate_synic(vcpu, cap->cap == ++ KVM_CAP_HYPERV_SYNIC2); ++ default: ++ return -EINVAL; ++ } ++} ++ ++long kvm_arch_vcpu_ioctl(struct file *filp, ++ unsigned int ioctl, unsigned long arg) ++{ ++ struct kvm_vcpu *vcpu = filp->private_data; ++ void __user *argp = (void __user *)arg; ++ int r; ++ union { ++ struct kvm_lapic_state *lapic; ++ struct kvm_xsave *xsave; ++ struct kvm_xcrs *xcrs; ++ void *buffer; ++ } u; ++ ++ vcpu_load(vcpu); ++ ++ u.buffer = NULL; ++ switch (ioctl) { ++ case KVM_GET_LAPIC: { ++ r = -EINVAL; ++ if (!lapic_in_kernel(vcpu)) ++ goto out; ++ u.lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL); ++ ++ r = -ENOMEM; ++ if (!u.lapic) ++ goto out; ++ r = kvm_vcpu_ioctl_get_lapic(vcpu, u.lapic); ++ if (r) ++ goto out; ++ r = -EFAULT; ++ if (copy_to_user(argp, u.lapic, sizeof(struct kvm_lapic_state))) ++ goto out; ++ r = 0; ++ break; ++ } ++ case KVM_SET_LAPIC: { ++ r = -EINVAL; ++ if (!lapic_in_kernel(vcpu)) ++ goto out; ++ u.lapic = memdup_user(argp, sizeof(*u.lapic)); ++ if (IS_ERR(u.lapic)) { ++ r = PTR_ERR(u.lapic); ++ goto out_nofree; ++ } ++ ++ r = kvm_vcpu_ioctl_set_lapic(vcpu, u.lapic); ++ break; ++ } ++ case KVM_INTERRUPT: { ++ struct kvm_interrupt irq; ++ ++ r = -EFAULT; ++ if (copy_from_user(&irq, argp, sizeof irq)) ++ goto out; ++ r = kvm_vcpu_ioctl_interrupt(vcpu, &irq); ++ break; ++ } ++ case KVM_NMI: { ++ r = kvm_vcpu_ioctl_nmi(vcpu); ++ break; ++ } ++ case KVM_SMI: { ++ r = kvm_vcpu_ioctl_smi(vcpu); ++ break; ++ } ++ case KVM_SET_CPUID: { ++ struct kvm_cpuid __user *cpuid_arg = argp; ++ struct kvm_cpuid cpuid; ++ ++ r = -EFAULT; ++ if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) ++ goto out; ++ r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries); ++ break; ++ } ++ case KVM_SET_CPUID2: { ++ struct kvm_cpuid2 __user *cpuid_arg = argp; ++ struct kvm_cpuid2 cpuid; ++ ++ r = -EFAULT; ++ if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) ++ goto out; ++ r = kvm_vcpu_ioctl_set_cpuid2(vcpu, &cpuid, ++ cpuid_arg->entries); ++ break; ++ } ++ case KVM_GET_CPUID2: { ++ struct kvm_cpuid2 __user *cpuid_arg = argp; ++ struct kvm_cpuid2 cpuid; ++ ++ r = -EFAULT; ++ if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) ++ goto out; ++ r = kvm_vcpu_ioctl_get_cpuid2(vcpu, &cpuid, ++ cpuid_arg->entries); ++ if (r) ++ goto out; ++ r = -EFAULT; ++ if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid)) ++ goto out; ++ r = 0; ++ break; ++ } ++ case KVM_GET_MSRS: { ++ int idx = srcu_read_lock(&vcpu->kvm->srcu); ++ r = msr_io(vcpu, argp, do_get_msr, 1); ++ srcu_read_unlock(&vcpu->kvm->srcu, idx); ++ break; ++ } ++ case KVM_SET_MSRS: { ++ int idx = srcu_read_lock(&vcpu->kvm->srcu); ++ r = msr_io(vcpu, argp, do_set_msr, 0); ++ srcu_read_unlock(&vcpu->kvm->srcu, idx); ++ break; ++ } ++ case KVM_TPR_ACCESS_REPORTING: { ++ struct kvm_tpr_access_ctl tac; ++ ++ r = -EFAULT; ++ if (copy_from_user(&tac, argp, sizeof tac)) ++ goto out; ++ r = vcpu_ioctl_tpr_access_reporting(vcpu, &tac); ++ if (r) ++ goto out; ++ r = -EFAULT; ++ if (copy_to_user(argp, &tac, sizeof tac)) ++ goto out; ++ r = 0; ++ break; ++ }; ++ case KVM_SET_VAPIC_ADDR: { ++ struct kvm_vapic_addr va; ++ int idx; ++ ++ r = -EINVAL; ++ if (!lapic_in_kernel(vcpu)) ++ goto out; ++ r = -EFAULT; ++ if (copy_from_user(&va, argp, sizeof va)) ++ goto out; ++ idx = srcu_read_lock(&vcpu->kvm->srcu); ++ r = kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr); ++ srcu_read_unlock(&vcpu->kvm->srcu, idx); ++ break; ++ } ++ case KVM_X86_SETUP_MCE: { ++ u64 mcg_cap; ++ ++ r = -EFAULT; ++ if (copy_from_user(&mcg_cap, argp, sizeof mcg_cap)) ++ goto out; ++ r = kvm_vcpu_ioctl_x86_setup_mce(vcpu, mcg_cap); ++ break; ++ } ++ case KVM_X86_SET_MCE: { ++ struct kvm_x86_mce mce; ++ ++ r = -EFAULT; ++ if (copy_from_user(&mce, argp, sizeof mce)) ++ goto out; ++ r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce); ++ break; ++ } ++ case KVM_GET_VCPU_EVENTS: { ++ struct kvm_vcpu_events events; ++ ++ kvm_vcpu_ioctl_x86_get_vcpu_events(vcpu, &events); ++ ++ r = -EFAULT; ++ if (copy_to_user(argp, &events, sizeof(struct kvm_vcpu_events))) ++ break; ++ r = 0; ++ break; ++ } ++ case KVM_SET_VCPU_EVENTS: { ++ struct kvm_vcpu_events events; ++ ++ r = -EFAULT; ++ if (copy_from_user(&events, argp, sizeof(struct kvm_vcpu_events))) ++ break; ++ ++ r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events); ++ break; ++ } ++ case KVM_GET_DEBUGREGS: { ++ struct kvm_debugregs dbgregs; ++ ++ kvm_vcpu_ioctl_x86_get_debugregs(vcpu, &dbgregs); ++ ++ r = -EFAULT; ++ if (copy_to_user(argp, &dbgregs, ++ sizeof(struct kvm_debugregs))) ++ break; ++ r = 0; ++ break; ++ } ++ case KVM_SET_DEBUGREGS: { ++ struct kvm_debugregs dbgregs; ++ ++ r = -EFAULT; ++ if (copy_from_user(&dbgregs, argp, ++ sizeof(struct kvm_debugregs))) ++ break; ++ ++ r = kvm_vcpu_ioctl_x86_set_debugregs(vcpu, &dbgregs); ++ break; ++ } ++ case KVM_GET_XSAVE: { ++ u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL); ++ r = -ENOMEM; ++ if (!u.xsave) ++ break; ++ ++ kvm_vcpu_ioctl_x86_get_xsave(vcpu, u.xsave); ++ ++ r = -EFAULT; ++ if (copy_to_user(argp, u.xsave, sizeof(struct kvm_xsave))) ++ break; ++ r = 0; ++ break; ++ } ++ case KVM_SET_XSAVE: { ++ u.xsave = memdup_user(argp, sizeof(*u.xsave)); ++ if (IS_ERR(u.xsave)) { ++ r = PTR_ERR(u.xsave); ++ goto out_nofree; ++ } ++ ++ r = kvm_vcpu_ioctl_x86_set_xsave(vcpu, u.xsave); ++ break; ++ } ++ case KVM_GET_XCRS: { ++ u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL); ++ r = -ENOMEM; ++ if (!u.xcrs) ++ break; ++ ++ kvm_vcpu_ioctl_x86_get_xcrs(vcpu, u.xcrs); ++ ++ r = -EFAULT; ++ if (copy_to_user(argp, u.xcrs, ++ sizeof(struct kvm_xcrs))) ++ break; ++ r = 0; ++ break; ++ } ++ case KVM_SET_XCRS: { ++ u.xcrs = memdup_user(argp, sizeof(*u.xcrs)); ++ if (IS_ERR(u.xcrs)) { ++ r = PTR_ERR(u.xcrs); ++ goto out_nofree; ++ } ++ ++ r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs); ++ break; ++ } ++ case KVM_SET_TSC_KHZ: { ++ u32 user_tsc_khz; ++ ++ r = -EINVAL; ++ user_tsc_khz = (u32)arg; ++ ++ if (user_tsc_khz >= kvm_max_guest_tsc_khz) ++ goto out; ++ ++ if (user_tsc_khz == 0) ++ user_tsc_khz = tsc_khz; ++ ++ if (!kvm_set_tsc_khz(vcpu, user_tsc_khz)) ++ r = 0; ++ ++ goto out; ++ } ++ case KVM_GET_TSC_KHZ: { ++ r = vcpu->arch.virtual_tsc_khz; ++ goto out; ++ } ++ case KVM_KVMCLOCK_CTRL: { ++ r = kvm_set_guest_paused(vcpu); ++ goto out; ++ } ++ case KVM_ENABLE_CAP: { ++ struct kvm_enable_cap cap; ++ ++ r = -EFAULT; ++ if (copy_from_user(&cap, argp, sizeof(cap))) ++ goto out; ++ r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); ++ break; ++ } ++ case KVM_GET_NESTED_STATE: { ++ struct kvm_nested_state __user *user_kvm_nested_state = argp; ++ u32 user_data_size; ++ ++ r = -EINVAL; ++ if (!kvm_x86_ops->get_nested_state) ++ break; ++ ++ BUILD_BUG_ON(sizeof(user_data_size) != sizeof(user_kvm_nested_state->size)); ++ r = -EFAULT; ++ if (get_user(user_data_size, &user_kvm_nested_state->size)) ++ break; ++ ++ r = kvm_x86_ops->get_nested_state(vcpu, user_kvm_nested_state, ++ user_data_size); ++ if (r < 0) ++ break; ++ ++ if (r > user_data_size) { ++ if (put_user(r, &user_kvm_nested_state->size)) ++ r = -EFAULT; ++ else ++ r = -E2BIG; ++ break; ++ } ++ ++ r = 0; ++ break; ++ } ++ case KVM_SET_NESTED_STATE: { ++ struct kvm_nested_state __user *user_kvm_nested_state = argp; ++ struct kvm_nested_state kvm_state; ++ int idx; ++ ++ r = -EINVAL; ++ if (!kvm_x86_ops->set_nested_state) ++ break; ++ ++ r = -EFAULT; ++ if (copy_from_user(&kvm_state, user_kvm_nested_state, sizeof(kvm_state))) ++ break; ++ ++ r = -EINVAL; ++ if (kvm_state.size < sizeof(kvm_state)) ++ break; ++ ++ if (kvm_state.flags & ++ ~(KVM_STATE_NESTED_RUN_PENDING | KVM_STATE_NESTED_GUEST_MODE)) ++ break; ++ ++ /* nested_run_pending implies guest_mode. */ ++ if (kvm_state.flags == KVM_STATE_NESTED_RUN_PENDING) ++ break; ++ ++ idx = srcu_read_lock(&vcpu->kvm->srcu); ++ r = kvm_x86_ops->set_nested_state(vcpu, user_kvm_nested_state, &kvm_state); ++ srcu_read_unlock(&vcpu->kvm->srcu, idx); ++ break; ++ } ++ default: ++ r = -EINVAL; ++ } ++out: ++ kfree(u.buffer); ++out_nofree: ++ vcpu_put(vcpu); ++ return r; ++} ++ ++vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) ++{ ++ return VM_FAULT_SIGBUS; ++} ++ ++static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr) ++{ ++ int ret; ++ ++ if (addr > (unsigned int)(-3 * PAGE_SIZE)) ++ return -EINVAL; ++ ret = kvm_x86_ops->set_tss_addr(kvm, addr); ++ return ret; ++} ++ ++static int kvm_vm_ioctl_set_identity_map_addr(struct kvm *kvm, ++ u64 ident_addr) ++{ ++ return kvm_x86_ops->set_identity_map_addr(kvm, ident_addr); ++} ++ ++static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm, ++ unsigned long kvm_nr_mmu_pages) ++{ ++ if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES) ++ return -EINVAL; ++ ++ mutex_lock(&kvm->slots_lock); ++ ++ kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages); ++ kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages; ++ ++ mutex_unlock(&kvm->slots_lock); ++ return 0; ++} ++ ++static unsigned long kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm) ++{ ++ return kvm->arch.n_max_mmu_pages; ++} ++ ++static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) ++{ ++ struct kvm_pic *pic = kvm->arch.vpic; ++ int r; ++ ++ r = 0; ++ switch (chip->chip_id) { ++ case KVM_IRQCHIP_PIC_MASTER: ++ memcpy(&chip->chip.pic, &pic->pics[0], ++ sizeof(struct kvm_pic_state)); ++ break; ++ case KVM_IRQCHIP_PIC_SLAVE: ++ memcpy(&chip->chip.pic, &pic->pics[1], ++ sizeof(struct kvm_pic_state)); ++ break; ++ case KVM_IRQCHIP_IOAPIC: ++ kvm_get_ioapic(kvm, &chip->chip.ioapic); ++ break; ++ default: ++ r = -EINVAL; ++ break; ++ } ++ return r; ++} ++ ++static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) ++{ ++ struct kvm_pic *pic = kvm->arch.vpic; ++ int r; ++ ++ r = 0; ++ switch (chip->chip_id) { ++ case KVM_IRQCHIP_PIC_MASTER: ++ spin_lock(&pic->lock); ++ memcpy(&pic->pics[0], &chip->chip.pic, ++ sizeof(struct kvm_pic_state)); ++ spin_unlock(&pic->lock); ++ break; ++ case KVM_IRQCHIP_PIC_SLAVE: ++ spin_lock(&pic->lock); ++ memcpy(&pic->pics[1], &chip->chip.pic, ++ sizeof(struct kvm_pic_state)); ++ spin_unlock(&pic->lock); ++ break; ++ case KVM_IRQCHIP_IOAPIC: ++ kvm_set_ioapic(kvm, &chip->chip.ioapic); ++ break; ++ default: ++ r = -EINVAL; ++ break; ++ } ++ kvm_pic_update_irq(pic); ++ return r; ++} ++ ++static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps) ++{ ++ struct kvm_kpit_state *kps = &kvm->arch.vpit->pit_state; ++ ++ BUILD_BUG_ON(sizeof(*ps) != sizeof(kps->channels)); ++ ++ mutex_lock(&kps->lock); ++ memcpy(ps, &kps->channels, sizeof(*ps)); ++ mutex_unlock(&kps->lock); ++ return 0; ++} ++ ++static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps) ++{ ++ int i; ++ struct kvm_pit *pit = kvm->arch.vpit; ++ ++ mutex_lock(&pit->pit_state.lock); ++ memcpy(&pit->pit_state.channels, ps, sizeof(*ps)); ++ for (i = 0; i < 3; i++) ++ kvm_pit_load_count(pit, i, ps->channels[i].count, 0); ++ mutex_unlock(&pit->pit_state.lock); ++ return 0; ++} ++ ++static int kvm_vm_ioctl_get_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps) ++{ ++ mutex_lock(&kvm->arch.vpit->pit_state.lock); ++ memcpy(ps->channels, &kvm->arch.vpit->pit_state.channels, ++ sizeof(ps->channels)); ++ ps->flags = kvm->arch.vpit->pit_state.flags; ++ mutex_unlock(&kvm->arch.vpit->pit_state.lock); ++ memset(&ps->reserved, 0, sizeof(ps->reserved)); ++ return 0; ++} ++ ++static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps) ++{ ++ int start = 0; ++ int i; ++ u32 prev_legacy, cur_legacy; ++ struct kvm_pit *pit = kvm->arch.vpit; ++ ++ mutex_lock(&pit->pit_state.lock); ++ prev_legacy = pit->pit_state.flags & KVM_PIT_FLAGS_HPET_LEGACY; ++ cur_legacy = ps->flags & KVM_PIT_FLAGS_HPET_LEGACY; ++ if (!prev_legacy && cur_legacy) ++ start = 1; ++ memcpy(&pit->pit_state.channels, &ps->channels, ++ sizeof(pit->pit_state.channels)); ++ pit->pit_state.flags = ps->flags; ++ for (i = 0; i < 3; i++) ++ kvm_pit_load_count(pit, i, pit->pit_state.channels[i].count, ++ start && i == 0); ++ mutex_unlock(&pit->pit_state.lock); ++ return 0; ++} ++ ++static int kvm_vm_ioctl_reinject(struct kvm *kvm, ++ struct kvm_reinject_control *control) ++{ ++ struct kvm_pit *pit = kvm->arch.vpit; ++ ++ if (!pit) ++ return -ENXIO; ++ ++ /* pit->pit_state.lock was overloaded to prevent userspace from getting ++ * an inconsistent state after running multiple KVM_REINJECT_CONTROL ++ * ioctls in parallel. Use a separate lock if that ioctl isn't rare. ++ */ ++ mutex_lock(&pit->pit_state.lock); ++ kvm_pit_set_reinject(pit, control->pit_reinject); ++ mutex_unlock(&pit->pit_state.lock); ++ ++ return 0; ++} ++ ++/** ++ * kvm_vm_ioctl_get_dirty_log - get and clear the log of dirty pages in a slot ++ * @kvm: kvm instance ++ * @log: slot id and address to which we copy the log ++ * ++ * Steps 1-4 below provide general overview of dirty page logging. See ++ * kvm_get_dirty_log_protect() function description for additional details. ++ * ++ * We call kvm_get_dirty_log_protect() to handle steps 1-3, upon return we ++ * always flush the TLB (step 4) even if previous step failed and the dirty ++ * bitmap may be corrupt. Regardless of previous outcome the KVM logging API ++ * does not preclude user space subsequent dirty log read. Flushing TLB ensures ++ * writes will be marked dirty for next log read. ++ * ++ * 1. Take a snapshot of the bit and clear it if needed. ++ * 2. Write protect the corresponding page. ++ * 3. Copy the snapshot to the userspace. ++ * 4. Flush TLB's if needed. ++ */ ++int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) ++{ ++ bool is_dirty = false; ++ int r; ++ ++ mutex_lock(&kvm->slots_lock); ++ ++ /* ++ * Flush potentially hardware-cached dirty pages to dirty_bitmap. ++ */ ++ if (kvm_x86_ops->flush_log_dirty) ++ kvm_x86_ops->flush_log_dirty(kvm); ++ ++ r = kvm_get_dirty_log_protect(kvm, log, &is_dirty); ++ ++ /* ++ * All the TLBs can be flushed out of mmu lock, see the comments in ++ * kvm_mmu_slot_remove_write_access(). ++ */ ++ lockdep_assert_held(&kvm->slots_lock); ++ if (is_dirty) ++ kvm_flush_remote_tlbs(kvm); ++ ++ mutex_unlock(&kvm->slots_lock); ++ return r; ++} ++ ++int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event, ++ bool line_status) ++{ ++ if (!irqchip_in_kernel(kvm)) ++ return -ENXIO; ++ ++ irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, ++ irq_event->irq, irq_event->level, ++ line_status); ++ return 0; ++} ++ ++static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, ++ struct kvm_enable_cap *cap) ++{ ++ int r; ++ ++ if (cap->flags) ++ return -EINVAL; ++ ++ switch (cap->cap) { ++ case KVM_CAP_DISABLE_QUIRKS: ++ kvm->arch.disabled_quirks = cap->args[0]; ++ r = 0; ++ break; ++ case KVM_CAP_SPLIT_IRQCHIP: { ++ mutex_lock(&kvm->lock); ++ r = -EINVAL; ++ if (cap->args[0] > MAX_NR_RESERVED_IOAPIC_PINS) ++ goto split_irqchip_unlock; ++ r = -EEXIST; ++ if (irqchip_in_kernel(kvm)) ++ goto split_irqchip_unlock; ++ if (kvm->created_vcpus) ++ goto split_irqchip_unlock; ++ r = kvm_setup_empty_irq_routing(kvm); ++ if (r) ++ goto split_irqchip_unlock; ++ /* Pairs with irqchip_in_kernel. */ ++ smp_wmb(); ++ kvm->arch.irqchip_mode = KVM_IRQCHIP_SPLIT; ++ kvm->arch.nr_reserved_ioapic_pins = cap->args[0]; ++ r = 0; ++split_irqchip_unlock: ++ mutex_unlock(&kvm->lock); ++ break; ++ } ++ case KVM_CAP_X2APIC_API: ++ r = -EINVAL; ++ if (cap->args[0] & ~KVM_X2APIC_API_VALID_FLAGS) ++ break; ++ ++ if (cap->args[0] & KVM_X2APIC_API_USE_32BIT_IDS) ++ kvm->arch.x2apic_format = true; ++ if (cap->args[0] & KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK) ++ kvm->arch.x2apic_broadcast_quirk_disabled = true; ++ ++ r = 0; ++ break; ++ case KVM_CAP_X86_DISABLE_EXITS: ++ r = -EINVAL; ++ if (cap->args[0] & ~KVM_X86_DISABLE_VALID_EXITS) ++ break; ++ ++ if ((cap->args[0] & KVM_X86_DISABLE_EXITS_MWAIT) && ++ kvm_can_mwait_in_guest()) ++ kvm->arch.mwait_in_guest = true; ++ if (cap->args[0] & KVM_X86_DISABLE_EXITS_HLT) ++ kvm->arch.hlt_in_guest = true; ++ if (cap->args[0] & KVM_X86_DISABLE_EXITS_PAUSE) ++ kvm->arch.pause_in_guest = true; ++ r = 0; ++ break; ++ case KVM_CAP_MSR_PLATFORM_INFO: ++ kvm->arch.guest_can_read_msr_platform_info = cap->args[0]; ++ r = 0; ++ break; ++ default: ++ r = -EINVAL; ++ break; ++ } ++ return r; ++} ++ ++long kvm_arch_vm_ioctl(struct file *filp, ++ unsigned int ioctl, unsigned long arg) ++{ ++ struct kvm *kvm = filp->private_data; ++ void __user *argp = (void __user *)arg; ++ int r = -ENOTTY; ++ /* ++ * This union makes it completely explicit to gcc-3.x ++ * that these two variables' stack usage should be ++ * combined, not added together. ++ */ ++ union { ++ struct kvm_pit_state ps; ++ struct kvm_pit_state2 ps2; ++ struct kvm_pit_config pit_config; ++ } u; ++ ++ switch (ioctl) { ++ case KVM_SET_TSS_ADDR: ++ r = kvm_vm_ioctl_set_tss_addr(kvm, arg); ++ break; ++ case KVM_SET_IDENTITY_MAP_ADDR: { ++ u64 ident_addr; ++ ++ mutex_lock(&kvm->lock); ++ r = -EINVAL; ++ if (kvm->created_vcpus) ++ goto set_identity_unlock; ++ r = -EFAULT; ++ if (copy_from_user(&ident_addr, argp, sizeof ident_addr)) ++ goto set_identity_unlock; ++ r = kvm_vm_ioctl_set_identity_map_addr(kvm, ident_addr); ++set_identity_unlock: ++ mutex_unlock(&kvm->lock); ++ break; ++ } ++ case KVM_SET_NR_MMU_PAGES: ++ r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg); ++ break; ++ case KVM_GET_NR_MMU_PAGES: ++ r = kvm_vm_ioctl_get_nr_mmu_pages(kvm); ++ break; ++ case KVM_CREATE_IRQCHIP: { ++ mutex_lock(&kvm->lock); ++ ++ r = -EEXIST; ++ if (irqchip_in_kernel(kvm)) ++ goto create_irqchip_unlock; ++ ++ r = -EINVAL; ++ if (kvm->created_vcpus) ++ goto create_irqchip_unlock; ++ ++ r = kvm_pic_init(kvm); ++ if (r) ++ goto create_irqchip_unlock; ++ ++ r = kvm_ioapic_init(kvm); ++ if (r) { ++ kvm_pic_destroy(kvm); ++ goto create_irqchip_unlock; ++ } ++ ++ r = kvm_setup_default_irq_routing(kvm); ++ if (r) { ++ kvm_ioapic_destroy(kvm); ++ kvm_pic_destroy(kvm); ++ goto create_irqchip_unlock; ++ } ++ /* Write kvm->irq_routing before enabling irqchip_in_kernel. */ ++ smp_wmb(); ++ kvm->arch.irqchip_mode = KVM_IRQCHIP_KERNEL; ++ create_irqchip_unlock: ++ mutex_unlock(&kvm->lock); ++ break; ++ } ++ case KVM_CREATE_PIT: ++ u.pit_config.flags = KVM_PIT_SPEAKER_DUMMY; ++ goto create_pit; ++ case KVM_CREATE_PIT2: ++ r = -EFAULT; ++ if (copy_from_user(&u.pit_config, argp, ++ sizeof(struct kvm_pit_config))) ++ goto out; ++ create_pit: ++ mutex_lock(&kvm->lock); ++ r = -EEXIST; ++ if (kvm->arch.vpit) ++ goto create_pit_unlock; ++ r = -ENOMEM; ++ kvm->arch.vpit = kvm_create_pit(kvm, u.pit_config.flags); ++ if (kvm->arch.vpit) ++ r = 0; ++ create_pit_unlock: ++ mutex_unlock(&kvm->lock); ++ break; ++ case KVM_GET_IRQCHIP: { ++ /* 0: PIC master, 1: PIC slave, 2: IOAPIC */ ++ struct kvm_irqchip *chip; ++ ++ chip = memdup_user(argp, sizeof(*chip)); ++ if (IS_ERR(chip)) { ++ r = PTR_ERR(chip); ++ goto out; ++ } ++ ++ r = -ENXIO; ++ if (!irqchip_kernel(kvm)) ++ goto get_irqchip_out; ++ r = kvm_vm_ioctl_get_irqchip(kvm, chip); ++ if (r) ++ goto get_irqchip_out; ++ r = -EFAULT; ++ if (copy_to_user(argp, chip, sizeof *chip)) ++ goto get_irqchip_out; ++ r = 0; ++ get_irqchip_out: ++ kfree(chip); ++ break; ++ } ++ case KVM_SET_IRQCHIP: { ++ /* 0: PIC master, 1: PIC slave, 2: IOAPIC */ ++ struct kvm_irqchip *chip; ++ ++ chip = memdup_user(argp, sizeof(*chip)); ++ if (IS_ERR(chip)) { ++ r = PTR_ERR(chip); ++ goto out; ++ } ++ ++ r = -ENXIO; ++ if (!irqchip_kernel(kvm)) ++ goto set_irqchip_out; ++ r = kvm_vm_ioctl_set_irqchip(kvm, chip); ++ if (r) ++ goto set_irqchip_out; ++ r = 0; ++ set_irqchip_out: ++ kfree(chip); ++ break; ++ } ++ case KVM_GET_PIT: { ++ r = -EFAULT; ++ if (copy_from_user(&u.ps, argp, sizeof(struct kvm_pit_state))) ++ goto out; ++ r = -ENXIO; ++ if (!kvm->arch.vpit) ++ goto out; ++ r = kvm_vm_ioctl_get_pit(kvm, &u.ps); ++ if (r) ++ goto out; ++ r = -EFAULT; ++ if (copy_to_user(argp, &u.ps, sizeof(struct kvm_pit_state))) ++ goto out; ++ r = 0; ++ break; ++ } ++ case KVM_SET_PIT: { ++ r = -EFAULT; ++ if (copy_from_user(&u.ps, argp, sizeof u.ps)) ++ goto out; ++ r = -ENXIO; ++ if (!kvm->arch.vpit) ++ goto out; ++ r = kvm_vm_ioctl_set_pit(kvm, &u.ps); ++ break; ++ } ++ case KVM_GET_PIT2: { ++ r = -ENXIO; ++ if (!kvm->arch.vpit) ++ goto out; ++ r = kvm_vm_ioctl_get_pit2(kvm, &u.ps2); ++ if (r) ++ goto out; ++ r = -EFAULT; ++ if (copy_to_user(argp, &u.ps2, sizeof(u.ps2))) ++ goto out; ++ r = 0; ++ break; ++ } ++ case KVM_SET_PIT2: { ++ r = -EFAULT; ++ if (copy_from_user(&u.ps2, argp, sizeof(u.ps2))) ++ goto out; ++ r = -ENXIO; ++ if (!kvm->arch.vpit) ++ goto out; ++ r = kvm_vm_ioctl_set_pit2(kvm, &u.ps2); ++ break; ++ } ++ case KVM_REINJECT_CONTROL: { ++ struct kvm_reinject_control control; ++ r = -EFAULT; ++ if (copy_from_user(&control, argp, sizeof(control))) ++ goto out; ++ r = kvm_vm_ioctl_reinject(kvm, &control); ++ break; ++ } ++ case KVM_SET_BOOT_CPU_ID: ++ r = 0; ++ mutex_lock(&kvm->lock); ++ if (kvm->created_vcpus) ++ r = -EBUSY; ++ else ++ kvm->arch.bsp_vcpu_id = arg; ++ mutex_unlock(&kvm->lock); ++ break; ++ case KVM_XEN_HVM_CONFIG: { ++ struct kvm_xen_hvm_config xhc; ++ r = -EFAULT; ++ if (copy_from_user(&xhc, argp, sizeof(xhc))) ++ goto out; ++ r = -EINVAL; ++ if (xhc.flags) ++ goto out; ++ memcpy(&kvm->arch.xen_hvm_config, &xhc, sizeof(xhc)); ++ r = 0; ++ break; ++ } ++ case KVM_SET_CLOCK: { ++ struct kvm_clock_data user_ns; ++ u64 now_ns; ++ ++ r = -EFAULT; ++ if (copy_from_user(&user_ns, argp, sizeof(user_ns))) ++ goto out; ++ ++ r = -EINVAL; ++ if (user_ns.flags) ++ goto out; ++ ++ r = 0; ++ /* ++ * TODO: userspace has to take care of races with VCPU_RUN, so ++ * kvm_gen_update_masterclock() can be cut down to locked ++ * pvclock_update_vm_gtod_copy(). ++ */ ++ kvm_gen_update_masterclock(kvm); ++ now_ns = get_kvmclock_ns(kvm); ++ kvm->arch.kvmclock_offset += user_ns.clock - now_ns; ++ kvm_make_all_cpus_request(kvm, KVM_REQ_CLOCK_UPDATE); ++ break; ++ } ++ case KVM_GET_CLOCK: { ++ struct kvm_clock_data user_ns; ++ u64 now_ns; ++ ++ now_ns = get_kvmclock_ns(kvm); ++ user_ns.clock = now_ns; ++ user_ns.flags = kvm->arch.use_master_clock ? KVM_CLOCK_TSC_STABLE : 0; ++ memset(&user_ns.pad, 0, sizeof(user_ns.pad)); ++ ++ r = -EFAULT; ++ if (copy_to_user(argp, &user_ns, sizeof(user_ns))) ++ goto out; ++ r = 0; ++ break; ++ } ++ case KVM_ENABLE_CAP: { ++ struct kvm_enable_cap cap; ++ ++ r = -EFAULT; ++ if (copy_from_user(&cap, argp, sizeof(cap))) ++ goto out; ++ r = kvm_vm_ioctl_enable_cap(kvm, &cap); ++ break; ++ } ++ case KVM_MEMORY_ENCRYPT_OP: { ++ r = -ENOTTY; ++ if (kvm_x86_ops->mem_enc_op) ++ r = kvm_x86_ops->mem_enc_op(kvm, argp); ++ break; ++ } ++ case KVM_MEMORY_ENCRYPT_REG_REGION: { ++ struct kvm_enc_region region; ++ ++ r = -EFAULT; ++ if (copy_from_user(®ion, argp, sizeof(region))) ++ goto out; ++ ++ r = -ENOTTY; ++ if (kvm_x86_ops->mem_enc_reg_region) ++ r = kvm_x86_ops->mem_enc_reg_region(kvm, ®ion); ++ break; ++ } ++ case KVM_MEMORY_ENCRYPT_UNREG_REGION: { ++ struct kvm_enc_region region; ++ ++ r = -EFAULT; ++ if (copy_from_user(®ion, argp, sizeof(region))) ++ goto out; ++ ++ r = -ENOTTY; ++ if (kvm_x86_ops->mem_enc_unreg_region) ++ r = kvm_x86_ops->mem_enc_unreg_region(kvm, ®ion); ++ break; ++ } ++ case KVM_HYPERV_EVENTFD: { ++ struct kvm_hyperv_eventfd hvevfd; ++ ++ r = -EFAULT; ++ if (copy_from_user(&hvevfd, argp, sizeof(hvevfd))) ++ goto out; ++ r = kvm_vm_ioctl_hv_eventfd(kvm, &hvevfd); ++ break; ++ } ++ default: ++ r = -ENOTTY; ++ } ++out: ++ return r; ++} ++ ++static void kvm_init_msr_list(void) ++{ ++ u32 dummy[2]; ++ unsigned i, j; ++ ++ for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) { ++ if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0) ++ continue; ++ ++ /* ++ * Even MSRs that are valid in the host may not be exposed ++ * to the guests in some cases. ++ */ ++ switch (msrs_to_save[i]) { ++ case MSR_IA32_BNDCFGS: ++ if (!kvm_mpx_supported()) ++ continue; ++ break; ++ case MSR_TSC_AUX: ++ if (!kvm_x86_ops->rdtscp_supported()) ++ continue; ++ break; ++ default: ++ break; ++ } ++ ++ if (j < i) ++ msrs_to_save[j] = msrs_to_save[i]; ++ j++; ++ } ++ num_msrs_to_save = j; ++ ++ for (i = j = 0; i < ARRAY_SIZE(emulated_msrs); i++) { ++ if (!kvm_x86_ops->has_emulated_msr(emulated_msrs[i])) ++ continue; ++ ++ if (j < i) ++ emulated_msrs[j] = emulated_msrs[i]; ++ j++; ++ } ++ num_emulated_msrs = j; ++ ++ for (i = j = 0; i < ARRAY_SIZE(msr_based_features); i++) { ++ struct kvm_msr_entry msr; ++ ++ msr.index = msr_based_features[i]; ++ if (kvm_get_msr_feature(&msr)) ++ continue; ++ ++ if (j < i) ++ msr_based_features[j] = msr_based_features[i]; ++ j++; ++ } ++ num_msr_based_features = j; ++} ++ ++static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len, ++ const void *v) ++{ ++ int handled = 0; ++ int n; ++ ++ do { ++ n = min(len, 8); ++ if (!(lapic_in_kernel(vcpu) && ++ !kvm_iodevice_write(vcpu, &vcpu->arch.apic->dev, addr, n, v)) ++ && kvm_io_bus_write(vcpu, KVM_MMIO_BUS, addr, n, v)) ++ break; ++ handled += n; ++ addr += n; ++ len -= n; ++ v += n; ++ } while (len); ++ ++ return handled; ++} ++ ++static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v) ++{ ++ int handled = 0; ++ int n; ++ ++ do { ++ n = min(len, 8); ++ if (!(lapic_in_kernel(vcpu) && ++ !kvm_iodevice_read(vcpu, &vcpu->arch.apic->dev, ++ addr, n, v)) ++ && kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v)) ++ break; ++ trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, v); ++ handled += n; ++ addr += n; ++ len -= n; ++ v += n; ++ } while (len); ++ ++ return handled; ++} ++ ++static void kvm_set_segment(struct kvm_vcpu *vcpu, ++ struct kvm_segment *var, int seg) ++{ ++ kvm_x86_ops->set_segment(vcpu, var, seg); ++} ++ ++void kvm_get_segment(struct kvm_vcpu *vcpu, ++ struct kvm_segment *var, int seg) ++{ ++ kvm_x86_ops->get_segment(vcpu, var, seg); ++} ++ ++gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access, ++ struct x86_exception *exception) ++{ ++ gpa_t t_gpa; ++ ++ BUG_ON(!mmu_is_nested(vcpu)); ++ ++ /* NPT walks are always user-walks */ ++ access |= PFERR_USER_MASK; ++ t_gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gpa, access, exception); ++ ++ return t_gpa; ++} ++ ++gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, ++ struct x86_exception *exception) ++{ ++ u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; ++ return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception); ++} ++ ++ gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, ++ struct x86_exception *exception) ++{ ++ u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; ++ access |= PFERR_FETCH_MASK; ++ return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception); ++} ++ ++gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, ++ struct x86_exception *exception) ++{ ++ u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; ++ access |= PFERR_WRITE_MASK; ++ return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception); ++} ++ ++/* uses this to access any guest's mapped memory without checking CPL */ ++gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, ++ struct x86_exception *exception) ++{ ++ return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, 0, exception); ++} ++ ++static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes, ++ struct kvm_vcpu *vcpu, u32 access, ++ struct x86_exception *exception) ++{ ++ void *data = val; ++ int r = X86EMUL_CONTINUE; ++ ++ while (bytes) { ++ gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr, access, ++ exception); ++ unsigned offset = addr & (PAGE_SIZE-1); ++ unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset); ++ int ret; ++ ++ if (gpa == UNMAPPED_GVA) ++ return X86EMUL_PROPAGATE_FAULT; ++ ret = kvm_vcpu_read_guest_page(vcpu, gpa >> PAGE_SHIFT, data, ++ offset, toread); ++ if (ret < 0) { ++ r = X86EMUL_IO_NEEDED; ++ goto out; ++ } ++ ++ bytes -= toread; ++ data += toread; ++ addr += toread; ++ } ++out: ++ return r; ++} ++ ++/* used for instruction fetching */ ++static int kvm_fetch_guest_virt(struct x86_emulate_ctxt *ctxt, ++ gva_t addr, void *val, unsigned int bytes, ++ struct x86_exception *exception) ++{ ++ struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); ++ u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; ++ unsigned offset; ++ int ret; ++ ++ /* Inline kvm_read_guest_virt_helper for speed. */ ++ gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr, access|PFERR_FETCH_MASK, ++ exception); ++ if (unlikely(gpa == UNMAPPED_GVA)) ++ return X86EMUL_PROPAGATE_FAULT; ++ ++ offset = addr & (PAGE_SIZE-1); ++ if (WARN_ON(offset + bytes > PAGE_SIZE)) ++ bytes = (unsigned)PAGE_SIZE - offset; ++ ret = kvm_vcpu_read_guest_page(vcpu, gpa >> PAGE_SHIFT, val, ++ offset, bytes); ++ if (unlikely(ret < 0)) ++ return X86EMUL_IO_NEEDED; ++ ++ return X86EMUL_CONTINUE; ++} ++ ++int kvm_read_guest_virt(struct kvm_vcpu *vcpu, ++ gva_t addr, void *val, unsigned int bytes, ++ struct x86_exception *exception) ++{ ++ u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; ++ ++ /* ++ * FIXME: this should call handle_emulation_failure if X86EMUL_IO_NEEDED ++ * is returned, but our callers are not ready for that and they blindly ++ * call kvm_inject_page_fault. Ensure that they at least do not leak ++ * uninitialized kernel stack memory into cr2 and error code. ++ */ ++ memset(exception, 0, sizeof(*exception)); ++ return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access, ++ exception); ++} ++EXPORT_SYMBOL_GPL(kvm_read_guest_virt); ++ ++static int emulator_read_std(struct x86_emulate_ctxt *ctxt, ++ gva_t addr, void *val, unsigned int bytes, ++ struct x86_exception *exception, bool system) ++{ ++ struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); ++ u32 access = 0; ++ ++ if (!system && kvm_x86_ops->get_cpl(vcpu) == 3) ++ access |= PFERR_USER_MASK; ++ ++ return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access, exception); ++} ++ ++static int kvm_read_guest_phys_system(struct x86_emulate_ctxt *ctxt, ++ unsigned long addr, void *val, unsigned int bytes) ++{ ++ struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); ++ int r = kvm_vcpu_read_guest(vcpu, addr, val, bytes); ++ ++ return r < 0 ? X86EMUL_IO_NEEDED : X86EMUL_CONTINUE; ++} ++ ++static int kvm_write_guest_virt_helper(gva_t addr, void *val, unsigned int bytes, ++ struct kvm_vcpu *vcpu, u32 access, ++ struct x86_exception *exception) ++{ ++ void *data = val; ++ int r = X86EMUL_CONTINUE; ++ ++ while (bytes) { ++ gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr, ++ access, ++ exception); ++ unsigned offset = addr & (PAGE_SIZE-1); ++ unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset); ++ int ret; ++ ++ if (gpa == UNMAPPED_GVA) ++ return X86EMUL_PROPAGATE_FAULT; ++ ret = kvm_vcpu_write_guest(vcpu, gpa, data, towrite); ++ if (ret < 0) { ++ r = X86EMUL_IO_NEEDED; ++ goto out; ++ } ++ ++ bytes -= towrite; ++ data += towrite; ++ addr += towrite; ++ } ++out: ++ return r; ++} ++ ++static int emulator_write_std(struct x86_emulate_ctxt *ctxt, gva_t addr, void *val, ++ unsigned int bytes, struct x86_exception *exception, ++ bool system) ++{ ++ struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); ++ u32 access = PFERR_WRITE_MASK; ++ ++ if (!system && kvm_x86_ops->get_cpl(vcpu) == 3) ++ access |= PFERR_USER_MASK; ++ ++ return kvm_write_guest_virt_helper(addr, val, bytes, vcpu, ++ access, exception); ++} ++ ++int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu, gva_t addr, void *val, ++ unsigned int bytes, struct x86_exception *exception) ++{ ++ /* kvm_write_guest_virt_system can pull in tons of pages. */ ++ vcpu->arch.l1tf_flush_l1d = true; ++ ++ return kvm_write_guest_virt_helper(addr, val, bytes, vcpu, ++ PFERR_WRITE_MASK, exception); ++} ++EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system); ++ ++int handle_ud(struct kvm_vcpu *vcpu) ++{ ++ int emul_type = EMULTYPE_TRAP_UD; ++ enum emulation_result er; ++ char sig[5]; /* ud2; .ascii "kvm" */ ++ struct x86_exception e; ++ ++ if (force_emulation_prefix && ++ kvm_read_guest_virt(vcpu, kvm_get_linear_rip(vcpu), ++ sig, sizeof(sig), &e) == 0 && ++ memcmp(sig, "\xf\xbkvm", sizeof(sig)) == 0) { ++ kvm_rip_write(vcpu, kvm_rip_read(vcpu) + sizeof(sig)); ++ emul_type = 0; ++ } ++ ++ er = kvm_emulate_instruction(vcpu, emul_type); ++ if (er == EMULATE_USER_EXIT) ++ return 0; ++ if (er != EMULATE_DONE) ++ kvm_queue_exception(vcpu, UD_VECTOR); ++ return 1; ++} ++EXPORT_SYMBOL_GPL(handle_ud); ++ ++static int vcpu_is_mmio_gpa(struct kvm_vcpu *vcpu, unsigned long gva, ++ gpa_t gpa, bool write) ++{ ++ /* For APIC access vmexit */ ++ if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) ++ return 1; ++ ++ if (vcpu_match_mmio_gpa(vcpu, gpa)) { ++ trace_vcpu_match_mmio(gva, gpa, write, true); ++ return 1; ++ } ++ ++ return 0; ++} ++ ++static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva, ++ gpa_t *gpa, struct x86_exception *exception, ++ bool write) ++{ ++ u32 access = ((kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0) ++ | (write ? PFERR_WRITE_MASK : 0); ++ ++ /* ++ * currently PKRU is only applied to ept enabled guest so ++ * there is no pkey in EPT page table for L1 guest or EPT ++ * shadow page table for L2 guest. ++ */ ++ if (vcpu_match_mmio_gva(vcpu, gva) ++ && !permission_fault(vcpu, vcpu->arch.walk_mmu, ++ vcpu->arch.access, 0, access)) { ++ *gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT | ++ (gva & (PAGE_SIZE - 1)); ++ trace_vcpu_match_mmio(gva, *gpa, write, false); ++ return 1; ++ } ++ ++ *gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception); ++ ++ if (*gpa == UNMAPPED_GVA) ++ return -1; ++ ++ return vcpu_is_mmio_gpa(vcpu, gva, *gpa, write); ++} ++ ++int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, ++ const void *val, int bytes) ++{ ++ int ret; ++ ++ ret = kvm_vcpu_write_guest(vcpu, gpa, val, bytes); ++ if (ret < 0) ++ return 0; ++ kvm_page_track_write(vcpu, gpa, val, bytes); ++ return 1; ++} ++ ++struct read_write_emulator_ops { ++ int (*read_write_prepare)(struct kvm_vcpu *vcpu, void *val, ++ int bytes); ++ int (*read_write_emulate)(struct kvm_vcpu *vcpu, gpa_t gpa, ++ void *val, int bytes); ++ int (*read_write_mmio)(struct kvm_vcpu *vcpu, gpa_t gpa, ++ int bytes, void *val); ++ int (*read_write_exit_mmio)(struct kvm_vcpu *vcpu, gpa_t gpa, ++ void *val, int bytes); ++ bool write; ++}; ++ ++static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes) ++{ ++ if (vcpu->mmio_read_completed) { ++ trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes, ++ vcpu->mmio_fragments[0].gpa, val); ++ vcpu->mmio_read_completed = 0; ++ return 1; ++ } ++ ++ return 0; ++} ++ ++static int read_emulate(struct kvm_vcpu *vcpu, gpa_t gpa, ++ void *val, int bytes) ++{ ++ return !kvm_vcpu_read_guest(vcpu, gpa, val, bytes); ++} ++ ++static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa, ++ void *val, int bytes) ++{ ++ return emulator_write_phys(vcpu, gpa, val, bytes); ++} ++ ++static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val) ++{ ++ trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, val); ++ return vcpu_mmio_write(vcpu, gpa, bytes, val); ++} ++ ++static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, ++ void *val, int bytes) ++{ ++ trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, NULL); ++ return X86EMUL_IO_NEEDED; ++} ++ ++static int write_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, ++ void *val, int bytes) ++{ ++ struct kvm_mmio_fragment *frag = &vcpu->mmio_fragments[0]; ++ ++ memcpy(vcpu->run->mmio.data, frag->data, min(8u, frag->len)); ++ return X86EMUL_CONTINUE; ++} ++ ++static const struct read_write_emulator_ops read_emultor = { ++ .read_write_prepare = read_prepare, ++ .read_write_emulate = read_emulate, ++ .read_write_mmio = vcpu_mmio_read, ++ .read_write_exit_mmio = read_exit_mmio, ++}; ++ ++static const struct read_write_emulator_ops write_emultor = { ++ .read_write_emulate = write_emulate, ++ .read_write_mmio = write_mmio, ++ .read_write_exit_mmio = write_exit_mmio, ++ .write = true, ++}; ++ ++static int emulator_read_write_onepage(unsigned long addr, void *val, ++ unsigned int bytes, ++ struct x86_exception *exception, ++ struct kvm_vcpu *vcpu, ++ const struct read_write_emulator_ops *ops) ++{ ++ gpa_t gpa; ++ int handled, ret; ++ bool write = ops->write; ++ struct kvm_mmio_fragment *frag; ++ struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; ++ ++ /* ++ * If the exit was due to a NPF we may already have a GPA. ++ * If the GPA is present, use it to avoid the GVA to GPA table walk. ++ * Note, this cannot be used on string operations since string ++ * operation using rep will only have the initial GPA from the NPF ++ * occurred. ++ */ ++ if (vcpu->arch.gpa_available && ++ emulator_can_use_gpa(ctxt) && ++ (addr & ~PAGE_MASK) == (vcpu->arch.gpa_val & ~PAGE_MASK)) { ++ gpa = vcpu->arch.gpa_val; ++ ret = vcpu_is_mmio_gpa(vcpu, addr, gpa, write); ++ } else { ++ ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write); ++ if (ret < 0) ++ return X86EMUL_PROPAGATE_FAULT; ++ } ++ ++ if (!ret && ops->read_write_emulate(vcpu, gpa, val, bytes)) ++ return X86EMUL_CONTINUE; ++ ++ /* ++ * Is this MMIO handled locally? ++ */ ++ handled = ops->read_write_mmio(vcpu, gpa, bytes, val); ++ if (handled == bytes) ++ return X86EMUL_CONTINUE; ++ ++ gpa += handled; ++ bytes -= handled; ++ val += handled; ++ ++ WARN_ON(vcpu->mmio_nr_fragments >= KVM_MAX_MMIO_FRAGMENTS); ++ frag = &vcpu->mmio_fragments[vcpu->mmio_nr_fragments++]; ++ frag->gpa = gpa; ++ frag->data = val; ++ frag->len = bytes; ++ return X86EMUL_CONTINUE; ++} ++ ++static int emulator_read_write(struct x86_emulate_ctxt *ctxt, ++ unsigned long addr, ++ void *val, unsigned int bytes, ++ struct x86_exception *exception, ++ const struct read_write_emulator_ops *ops) ++{ ++ struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); ++ gpa_t gpa; ++ int rc; ++ ++ if (ops->read_write_prepare && ++ ops->read_write_prepare(vcpu, val, bytes)) ++ return X86EMUL_CONTINUE; ++ ++ vcpu->mmio_nr_fragments = 0; ++ ++ /* Crossing a page boundary? */ ++ if (((addr + bytes - 1) ^ addr) & PAGE_MASK) { ++ int now; ++ ++ now = -addr & ~PAGE_MASK; ++ rc = emulator_read_write_onepage(addr, val, now, exception, ++ vcpu, ops); ++ ++ if (rc != X86EMUL_CONTINUE) ++ return rc; ++ addr += now; ++ if (ctxt->mode != X86EMUL_MODE_PROT64) ++ addr = (u32)addr; ++ val += now; ++ bytes -= now; ++ } ++ ++ rc = emulator_read_write_onepage(addr, val, bytes, exception, ++ vcpu, ops); ++ if (rc != X86EMUL_CONTINUE) ++ return rc; ++ ++ if (!vcpu->mmio_nr_fragments) ++ return rc; ++ ++ gpa = vcpu->mmio_fragments[0].gpa; ++ ++ vcpu->mmio_needed = 1; ++ vcpu->mmio_cur_fragment = 0; ++ ++ vcpu->run->mmio.len = min(8u, vcpu->mmio_fragments[0].len); ++ vcpu->run->mmio.is_write = vcpu->mmio_is_write = ops->write; ++ vcpu->run->exit_reason = KVM_EXIT_MMIO; ++ vcpu->run->mmio.phys_addr = gpa; ++ ++ return ops->read_write_exit_mmio(vcpu, gpa, val, bytes); ++} ++ ++static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt, ++ unsigned long addr, ++ void *val, ++ unsigned int bytes, ++ struct x86_exception *exception) ++{ ++ return emulator_read_write(ctxt, addr, val, bytes, ++ exception, &read_emultor); ++} ++ ++static int emulator_write_emulated(struct x86_emulate_ctxt *ctxt, ++ unsigned long addr, ++ const void *val, ++ unsigned int bytes, ++ struct x86_exception *exception) ++{ ++ return emulator_read_write(ctxt, addr, (void *)val, bytes, ++ exception, &write_emultor); ++} ++ ++#define CMPXCHG_TYPE(t, ptr, old, new) \ ++ (cmpxchg((t *)(ptr), *(t *)(old), *(t *)(new)) == *(t *)(old)) ++ ++#ifdef CONFIG_X86_64 ++# define CMPXCHG64(ptr, old, new) CMPXCHG_TYPE(u64, ptr, old, new) ++#else ++# define CMPXCHG64(ptr, old, new) \ ++ (cmpxchg64((u64 *)(ptr), *(u64 *)(old), *(u64 *)(new)) == *(u64 *)(old)) ++#endif ++ ++static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt, ++ unsigned long addr, ++ const void *old, ++ const void *new, ++ unsigned int bytes, ++ struct x86_exception *exception) ++{ ++ struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); ++ gpa_t gpa; ++ struct page *page; ++ char *kaddr; ++ bool exchanged; ++ ++ /* guests cmpxchg8b have to be emulated atomically */ ++ if (bytes > 8 || (bytes & (bytes - 1))) ++ goto emul_write; ++ ++ gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL); ++ ++ if (gpa == UNMAPPED_GVA || ++ (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) ++ goto emul_write; ++ ++ if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK)) ++ goto emul_write; ++ ++ page = kvm_vcpu_gfn_to_page(vcpu, gpa >> PAGE_SHIFT); ++ if (is_error_page(page)) ++ goto emul_write; ++ ++ kaddr = kmap_atomic(page); ++ kaddr += offset_in_page(gpa); ++ switch (bytes) { ++ case 1: ++ exchanged = CMPXCHG_TYPE(u8, kaddr, old, new); ++ break; ++ case 2: ++ exchanged = CMPXCHG_TYPE(u16, kaddr, old, new); ++ break; ++ case 4: ++ exchanged = CMPXCHG_TYPE(u32, kaddr, old, new); ++ break; ++ case 8: ++ exchanged = CMPXCHG64(kaddr, old, new); ++ break; ++ default: ++ BUG(); ++ } ++ kunmap_atomic(kaddr); ++ kvm_release_page_dirty(page); ++ ++ if (!exchanged) ++ return X86EMUL_CMPXCHG_FAILED; ++ ++ kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT); ++ kvm_page_track_write(vcpu, gpa, new, bytes); ++ ++ return X86EMUL_CONTINUE; ++ ++emul_write: ++ printk_once(KERN_WARNING "kvm: emulating exchange as write\n"); ++ ++ return emulator_write_emulated(ctxt, addr, new, bytes, exception); ++} ++ ++static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) ++{ ++ int r = 0, i; ++ ++ for (i = 0; i < vcpu->arch.pio.count; i++) { ++ if (vcpu->arch.pio.in) ++ r = kvm_io_bus_read(vcpu, KVM_PIO_BUS, vcpu->arch.pio.port, ++ vcpu->arch.pio.size, pd); ++ else ++ r = kvm_io_bus_write(vcpu, KVM_PIO_BUS, ++ vcpu->arch.pio.port, vcpu->arch.pio.size, ++ pd); ++ if (r) ++ break; ++ pd += vcpu->arch.pio.size; ++ } ++ return r; ++} ++ ++static int emulator_pio_in_out(struct kvm_vcpu *vcpu, int size, ++ unsigned short port, void *val, ++ unsigned int count, bool in) ++{ ++ vcpu->arch.pio.port = port; ++ vcpu->arch.pio.in = in; ++ vcpu->arch.pio.count = count; ++ vcpu->arch.pio.size = size; ++ ++ if (!kernel_pio(vcpu, vcpu->arch.pio_data)) { ++ vcpu->arch.pio.count = 0; ++ return 1; ++ } ++ ++ vcpu->run->exit_reason = KVM_EXIT_IO; ++ vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; ++ vcpu->run->io.size = size; ++ vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE; ++ vcpu->run->io.count = count; ++ vcpu->run->io.port = port; ++ ++ return 0; ++} ++ ++static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt, ++ int size, unsigned short port, void *val, ++ unsigned int count) ++{ ++ struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); ++ int ret; ++ ++ if (vcpu->arch.pio.count) ++ goto data_avail; ++ ++ memset(vcpu->arch.pio_data, 0, size * count); ++ ++ ret = emulator_pio_in_out(vcpu, size, port, val, count, true); ++ if (ret) { ++data_avail: ++ memcpy(val, vcpu->arch.pio_data, size * count); ++ trace_kvm_pio(KVM_PIO_IN, port, size, count, vcpu->arch.pio_data); ++ vcpu->arch.pio.count = 0; ++ return 1; ++ } ++ ++ return 0; ++} ++ ++static int emulator_pio_out_emulated(struct x86_emulate_ctxt *ctxt, ++ int size, unsigned short port, ++ const void *val, unsigned int count) ++{ ++ struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); ++ ++ memcpy(vcpu->arch.pio_data, val, size * count); ++ trace_kvm_pio(KVM_PIO_OUT, port, size, count, vcpu->arch.pio_data); ++ return emulator_pio_in_out(vcpu, size, port, (void *)val, count, false); ++} ++ ++static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg) ++{ ++ return kvm_x86_ops->get_segment_base(vcpu, seg); ++} ++ ++static void emulator_invlpg(struct x86_emulate_ctxt *ctxt, ulong address) ++{ ++ kvm_mmu_invlpg(emul_to_vcpu(ctxt), address); ++} ++ ++static int kvm_emulate_wbinvd_noskip(struct kvm_vcpu *vcpu) ++{ ++ if (!need_emulate_wbinvd(vcpu)) ++ return X86EMUL_CONTINUE; ++ ++ if (kvm_x86_ops->has_wbinvd_exit()) { ++ int cpu = get_cpu(); ++ ++ cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask); ++ smp_call_function_many(vcpu->arch.wbinvd_dirty_mask, ++ wbinvd_ipi, NULL, 1); ++ put_cpu(); ++ cpumask_clear(vcpu->arch.wbinvd_dirty_mask); ++ } else ++ wbinvd(); ++ return X86EMUL_CONTINUE; ++} ++ ++int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu) ++{ ++ kvm_emulate_wbinvd_noskip(vcpu); ++ return kvm_skip_emulated_instruction(vcpu); ++} ++EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd); ++ ++ ++ ++static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt) ++{ ++ kvm_emulate_wbinvd_noskip(emul_to_vcpu(ctxt)); ++} ++ ++static int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, ++ unsigned long *dest) ++{ ++ return kvm_get_dr(emul_to_vcpu(ctxt), dr, dest); ++} ++ ++static int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, ++ unsigned long value) ++{ ++ ++ return __kvm_set_dr(emul_to_vcpu(ctxt), dr, value); ++} ++ ++static u64 mk_cr_64(u64 curr_cr, u32 new_val) ++{ ++ return (curr_cr & ~((1ULL << 32) - 1)) | new_val; ++} ++ ++static unsigned long emulator_get_cr(struct x86_emulate_ctxt *ctxt, int cr) ++{ ++ struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); ++ unsigned long value; ++ ++ switch (cr) { ++ case 0: ++ value = kvm_read_cr0(vcpu); ++ break; ++ case 2: ++ value = vcpu->arch.cr2; ++ break; ++ case 3: ++ value = kvm_read_cr3(vcpu); ++ break; ++ case 4: ++ value = kvm_read_cr4(vcpu); ++ break; ++ case 8: ++ value = kvm_get_cr8(vcpu); ++ break; ++ default: ++ kvm_err("%s: unexpected cr %u\n", __func__, cr); ++ return 0; ++ } ++ ++ return value; ++} ++ ++static int emulator_set_cr(struct x86_emulate_ctxt *ctxt, int cr, ulong val) ++{ ++ struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); ++ int res = 0; ++ ++ switch (cr) { ++ case 0: ++ res = kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val)); ++ break; ++ case 2: ++ vcpu->arch.cr2 = val; ++ break; ++ case 3: ++ res = kvm_set_cr3(vcpu, val); ++ break; ++ case 4: ++ res = kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val)); ++ break; ++ case 8: ++ res = kvm_set_cr8(vcpu, val); ++ break; ++ default: ++ kvm_err("%s: unexpected cr %u\n", __func__, cr); ++ res = -1; ++ } ++ ++ return res; ++} ++ ++static int emulator_get_cpl(struct x86_emulate_ctxt *ctxt) ++{ ++ return kvm_x86_ops->get_cpl(emul_to_vcpu(ctxt)); ++} ++ ++static void emulator_get_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt) ++{ ++ kvm_x86_ops->get_gdt(emul_to_vcpu(ctxt), dt); ++} ++ ++static void emulator_get_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt) ++{ ++ kvm_x86_ops->get_idt(emul_to_vcpu(ctxt), dt); ++} ++ ++static void emulator_set_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt) ++{ ++ kvm_x86_ops->set_gdt(emul_to_vcpu(ctxt), dt); ++} ++ ++static void emulator_set_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt) ++{ ++ kvm_x86_ops->set_idt(emul_to_vcpu(ctxt), dt); ++} ++ ++static unsigned long emulator_get_cached_segment_base( ++ struct x86_emulate_ctxt *ctxt, int seg) ++{ ++ return get_segment_base(emul_to_vcpu(ctxt), seg); ++} ++ ++static bool emulator_get_segment(struct x86_emulate_ctxt *ctxt, u16 *selector, ++ struct desc_struct *desc, u32 *base3, ++ int seg) ++{ ++ struct kvm_segment var; ++ ++ kvm_get_segment(emul_to_vcpu(ctxt), &var, seg); ++ *selector = var.selector; ++ ++ if (var.unusable) { ++ memset(desc, 0, sizeof(*desc)); ++ if (base3) ++ *base3 = 0; ++ return false; ++ } ++ ++ if (var.g) ++ var.limit >>= 12; ++ set_desc_limit(desc, var.limit); ++ set_desc_base(desc, (unsigned long)var.base); ++#ifdef CONFIG_X86_64 ++ if (base3) ++ *base3 = var.base >> 32; ++#endif ++ desc->type = var.type; ++ desc->s = var.s; ++ desc->dpl = var.dpl; ++ desc->p = var.present; ++ desc->avl = var.avl; ++ desc->l = var.l; ++ desc->d = var.db; ++ desc->g = var.g; ++ ++ return true; ++} ++ ++static void emulator_set_segment(struct x86_emulate_ctxt *ctxt, u16 selector, ++ struct desc_struct *desc, u32 base3, ++ int seg) ++{ ++ struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); ++ struct kvm_segment var; ++ ++ var.selector = selector; ++ var.base = get_desc_base(desc); ++#ifdef CONFIG_X86_64 ++ var.base |= ((u64)base3) << 32; ++#endif ++ var.limit = get_desc_limit(desc); ++ if (desc->g) ++ var.limit = (var.limit << 12) | 0xfff; ++ var.type = desc->type; ++ var.dpl = desc->dpl; ++ var.db = desc->d; ++ var.s = desc->s; ++ var.l = desc->l; ++ var.g = desc->g; ++ var.avl = desc->avl; ++ var.present = desc->p; ++ var.unusable = !var.present; ++ var.padding = 0; ++ ++ kvm_set_segment(vcpu, &var, seg); ++ return; ++} ++ ++static int emulator_get_msr(struct x86_emulate_ctxt *ctxt, ++ u32 msr_index, u64 *pdata) ++{ ++ struct msr_data msr; ++ int r; ++ ++ msr.index = msr_index; ++ msr.host_initiated = false; ++ r = kvm_get_msr(emul_to_vcpu(ctxt), &msr); ++ if (r) ++ return r; ++ ++ *pdata = msr.data; ++ return 0; ++} ++ ++static int emulator_set_msr(struct x86_emulate_ctxt *ctxt, ++ u32 msr_index, u64 data) ++{ ++ struct msr_data msr; ++ ++ msr.data = data; ++ msr.index = msr_index; ++ msr.host_initiated = false; ++ return kvm_set_msr(emul_to_vcpu(ctxt), &msr); ++} ++ ++static u64 emulator_get_smbase(struct x86_emulate_ctxt *ctxt) ++{ ++ struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); ++ ++ return vcpu->arch.smbase; ++} ++ ++static void emulator_set_smbase(struct x86_emulate_ctxt *ctxt, u64 smbase) ++{ ++ struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); ++ ++ vcpu->arch.smbase = smbase; ++} ++ ++static int emulator_check_pmc(struct x86_emulate_ctxt *ctxt, ++ u32 pmc) ++{ ++ return kvm_pmu_is_valid_msr_idx(emul_to_vcpu(ctxt), pmc); ++} ++ ++static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt, ++ u32 pmc, u64 *pdata) ++{ ++ return kvm_pmu_rdpmc(emul_to_vcpu(ctxt), pmc, pdata); ++} ++ ++static void emulator_halt(struct x86_emulate_ctxt *ctxt) ++{ ++ emul_to_vcpu(ctxt)->arch.halt_request = 1; ++} ++ ++static int emulator_intercept(struct x86_emulate_ctxt *ctxt, ++ struct x86_instruction_info *info, ++ enum x86_intercept_stage stage) ++{ ++ return kvm_x86_ops->check_intercept(emul_to_vcpu(ctxt), info, stage); ++} ++ ++static bool emulator_get_cpuid(struct x86_emulate_ctxt *ctxt, ++ u32 *eax, u32 *ebx, u32 *ecx, u32 *edx, bool check_limit) ++{ ++ return kvm_cpuid(emul_to_vcpu(ctxt), eax, ebx, ecx, edx, check_limit); ++} ++ ++static ulong emulator_read_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg) ++{ ++ return kvm_register_read(emul_to_vcpu(ctxt), reg); ++} ++ ++static void emulator_write_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg, ulong val) ++{ ++ kvm_register_write(emul_to_vcpu(ctxt), reg, val); ++} ++ ++static void emulator_set_nmi_mask(struct x86_emulate_ctxt *ctxt, bool masked) ++{ ++ kvm_x86_ops->set_nmi_mask(emul_to_vcpu(ctxt), masked); ++} ++ ++static unsigned emulator_get_hflags(struct x86_emulate_ctxt *ctxt) ++{ ++ return emul_to_vcpu(ctxt)->arch.hflags; ++} ++ ++static void emulator_set_hflags(struct x86_emulate_ctxt *ctxt, unsigned emul_flags) ++{ ++ kvm_set_hflags(emul_to_vcpu(ctxt), emul_flags); ++} ++ ++static int emulator_pre_leave_smm(struct x86_emulate_ctxt *ctxt, u64 smbase) ++{ ++ return kvm_x86_ops->pre_leave_smm(emul_to_vcpu(ctxt), smbase); ++} ++ ++static const struct x86_emulate_ops emulate_ops = { ++ .read_gpr = emulator_read_gpr, ++ .write_gpr = emulator_write_gpr, ++ .read_std = emulator_read_std, ++ .write_std = emulator_write_std, ++ .read_phys = kvm_read_guest_phys_system, ++ .fetch = kvm_fetch_guest_virt, ++ .read_emulated = emulator_read_emulated, ++ .write_emulated = emulator_write_emulated, ++ .cmpxchg_emulated = emulator_cmpxchg_emulated, ++ .invlpg = emulator_invlpg, ++ .pio_in_emulated = emulator_pio_in_emulated, ++ .pio_out_emulated = emulator_pio_out_emulated, ++ .get_segment = emulator_get_segment, ++ .set_segment = emulator_set_segment, ++ .get_cached_segment_base = emulator_get_cached_segment_base, ++ .get_gdt = emulator_get_gdt, ++ .get_idt = emulator_get_idt, ++ .set_gdt = emulator_set_gdt, ++ .set_idt = emulator_set_idt, ++ .get_cr = emulator_get_cr, ++ .set_cr = emulator_set_cr, ++ .cpl = emulator_get_cpl, ++ .get_dr = emulator_get_dr, ++ .set_dr = emulator_set_dr, ++ .get_smbase = emulator_get_smbase, ++ .set_smbase = emulator_set_smbase, ++ .set_msr = emulator_set_msr, ++ .get_msr = emulator_get_msr, ++ .check_pmc = emulator_check_pmc, ++ .read_pmc = emulator_read_pmc, ++ .halt = emulator_halt, ++ .wbinvd = emulator_wbinvd, ++ .fix_hypercall = emulator_fix_hypercall, ++ .intercept = emulator_intercept, ++ .get_cpuid = emulator_get_cpuid, ++ .set_nmi_mask = emulator_set_nmi_mask, ++ .get_hflags = emulator_get_hflags, ++ .set_hflags = emulator_set_hflags, ++ .pre_leave_smm = emulator_pre_leave_smm, ++}; ++ ++static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask) ++{ ++ u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(vcpu); ++ /* ++ * an sti; sti; sequence only disable interrupts for the first ++ * instruction. So, if the last instruction, be it emulated or ++ * not, left the system with the INT_STI flag enabled, it ++ * means that the last instruction is an sti. We should not ++ * leave the flag on in this case. The same goes for mov ss ++ */ ++ if (int_shadow & mask) ++ mask = 0; ++ if (unlikely(int_shadow || mask)) { ++ kvm_x86_ops->set_interrupt_shadow(vcpu, mask); ++ if (!mask) ++ kvm_make_request(KVM_REQ_EVENT, vcpu); ++ } ++} ++ ++static bool inject_emulated_exception(struct kvm_vcpu *vcpu) ++{ ++ struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; ++ if (ctxt->exception.vector == PF_VECTOR) ++ return kvm_propagate_fault(vcpu, &ctxt->exception); ++ ++ if (ctxt->exception.error_code_valid) ++ kvm_queue_exception_e(vcpu, ctxt->exception.vector, ++ ctxt->exception.error_code); ++ else ++ kvm_queue_exception(vcpu, ctxt->exception.vector); ++ return false; ++} ++ ++static void init_emulate_ctxt(struct kvm_vcpu *vcpu) ++{ ++ struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; ++ int cs_db, cs_l; ++ ++ kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); ++ ++ ctxt->eflags = kvm_get_rflags(vcpu); ++ ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0; ++ ++ ctxt->eip = kvm_rip_read(vcpu); ++ ctxt->mode = (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL : ++ (ctxt->eflags & X86_EFLAGS_VM) ? X86EMUL_MODE_VM86 : ++ (cs_l && is_long_mode(vcpu)) ? X86EMUL_MODE_PROT64 : ++ cs_db ? X86EMUL_MODE_PROT32 : ++ X86EMUL_MODE_PROT16; ++ BUILD_BUG_ON(HF_GUEST_MASK != X86EMUL_GUEST_MASK); ++ BUILD_BUG_ON(HF_SMM_MASK != X86EMUL_SMM_MASK); ++ BUILD_BUG_ON(HF_SMM_INSIDE_NMI_MASK != X86EMUL_SMM_INSIDE_NMI_MASK); ++ ++ init_decode_cache(ctxt); ++ vcpu->arch.emulate_regs_need_sync_from_vcpu = false; ++} ++ ++int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip) ++{ ++ struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; ++ int ret; ++ ++ init_emulate_ctxt(vcpu); ++ ++ ctxt->op_bytes = 2; ++ ctxt->ad_bytes = 2; ++ ctxt->_eip = ctxt->eip + inc_eip; ++ ret = emulate_int_real(ctxt, irq); ++ ++ if (ret != X86EMUL_CONTINUE) ++ return EMULATE_FAIL; ++ ++ ctxt->eip = ctxt->_eip; ++ kvm_rip_write(vcpu, ctxt->eip); ++ kvm_set_rflags(vcpu, ctxt->eflags); ++ ++ return EMULATE_DONE; ++} ++EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt); ++ ++static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type) ++{ ++ int r = EMULATE_DONE; ++ ++ ++vcpu->stat.insn_emulation_fail; ++ trace_kvm_emulate_insn_failed(vcpu); ++ ++ if (emulation_type & EMULTYPE_NO_UD_ON_FAIL) ++ return EMULATE_FAIL; ++ ++ if (!is_guest_mode(vcpu) && kvm_x86_ops->get_cpl(vcpu) == 0) { ++ vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; ++ vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; ++ vcpu->run->internal.ndata = 0; ++ r = EMULATE_USER_EXIT; ++ } ++ ++ kvm_queue_exception(vcpu, UD_VECTOR); ++ ++ return r; ++} ++ ++static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2, ++ bool write_fault_to_shadow_pgtable, ++ int emulation_type) ++{ ++ gpa_t gpa = cr2; ++ kvm_pfn_t pfn; ++ ++ if (!(emulation_type & EMULTYPE_ALLOW_RETRY)) ++ return false; ++ ++ if (WARN_ON_ONCE(is_guest_mode(vcpu))) ++ return false; ++ ++ if (!vcpu->arch.mmu.direct_map) { ++ /* ++ * Write permission should be allowed since only ++ * write access need to be emulated. ++ */ ++ gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL); ++ ++ /* ++ * If the mapping is invalid in guest, let cpu retry ++ * it to generate fault. ++ */ ++ if (gpa == UNMAPPED_GVA) ++ return true; ++ } ++ ++ /* ++ * Do not retry the unhandleable instruction if it faults on the ++ * readonly host memory, otherwise it will goto a infinite loop: ++ * retry instruction -> write #PF -> emulation fail -> retry ++ * instruction -> ... ++ */ ++ pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa)); ++ ++ /* ++ * If the instruction failed on the error pfn, it can not be fixed, ++ * report the error to userspace. ++ */ ++ if (is_error_noslot_pfn(pfn)) ++ return false; ++ ++ kvm_release_pfn_clean(pfn); ++ ++ /* The instructions are well-emulated on direct mmu. */ ++ if (vcpu->arch.mmu.direct_map) { ++ unsigned int indirect_shadow_pages; ++ ++ spin_lock(&vcpu->kvm->mmu_lock); ++ indirect_shadow_pages = vcpu->kvm->arch.indirect_shadow_pages; ++ spin_unlock(&vcpu->kvm->mmu_lock); ++ ++ if (indirect_shadow_pages) ++ kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa)); ++ ++ return true; ++ } ++ ++ /* ++ * if emulation was due to access to shadowed page table ++ * and it failed try to unshadow page and re-enter the ++ * guest to let CPU execute the instruction. ++ */ ++ kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa)); ++ ++ /* ++ * If the access faults on its page table, it can not ++ * be fixed by unprotecting shadow page and it should ++ * be reported to userspace. ++ */ ++ return !write_fault_to_shadow_pgtable; ++} ++ ++static bool retry_instruction(struct x86_emulate_ctxt *ctxt, ++ unsigned long cr2, int emulation_type) ++{ ++ struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); ++ unsigned long last_retry_eip, last_retry_addr, gpa = cr2; ++ ++ last_retry_eip = vcpu->arch.last_retry_eip; ++ last_retry_addr = vcpu->arch.last_retry_addr; ++ ++ /* ++ * If the emulation is caused by #PF and it is non-page_table ++ * writing instruction, it means the VM-EXIT is caused by shadow ++ * page protected, we can zap the shadow page and retry this ++ * instruction directly. ++ * ++ * Note: if the guest uses a non-page-table modifying instruction ++ * on the PDE that points to the instruction, then we will unmap ++ * the instruction and go to an infinite loop. So, we cache the ++ * last retried eip and the last fault address, if we meet the eip ++ * and the address again, we can break out of the potential infinite ++ * loop. ++ */ ++ vcpu->arch.last_retry_eip = vcpu->arch.last_retry_addr = 0; ++ ++ if (!(emulation_type & EMULTYPE_ALLOW_RETRY)) ++ return false; ++ ++ if (WARN_ON_ONCE(is_guest_mode(vcpu))) ++ return false; ++ ++ if (x86_page_table_writing_insn(ctxt)) ++ return false; ++ ++ if (ctxt->eip == last_retry_eip && last_retry_addr == cr2) ++ return false; ++ ++ vcpu->arch.last_retry_eip = ctxt->eip; ++ vcpu->arch.last_retry_addr = cr2; ++ ++ if (!vcpu->arch.mmu.direct_map) ++ gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL); ++ ++ kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa)); ++ ++ return true; ++} ++ ++static int complete_emulated_mmio(struct kvm_vcpu *vcpu); ++static int complete_emulated_pio(struct kvm_vcpu *vcpu); ++ ++static void kvm_smm_changed(struct kvm_vcpu *vcpu) ++{ ++ if (!(vcpu->arch.hflags & HF_SMM_MASK)) { ++ /* This is a good place to trace that we are exiting SMM. */ ++ trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, false); ++ ++ /* Process a latched INIT or SMI, if any. */ ++ kvm_make_request(KVM_REQ_EVENT, vcpu); ++ } ++ ++ kvm_mmu_reset_context(vcpu); ++} ++ ++static void kvm_set_hflags(struct kvm_vcpu *vcpu, unsigned emul_flags) ++{ ++ unsigned changed = vcpu->arch.hflags ^ emul_flags; ++ ++ vcpu->arch.hflags = emul_flags; ++ ++ if (changed & HF_SMM_MASK) ++ kvm_smm_changed(vcpu); ++} ++ ++static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7, ++ unsigned long *db) ++{ ++ u32 dr6 = 0; ++ int i; ++ u32 enable, rwlen; ++ ++ enable = dr7; ++ rwlen = dr7 >> 16; ++ for (i = 0; i < 4; i++, enable >>= 2, rwlen >>= 4) ++ if ((enable & 3) && (rwlen & 15) == type && db[i] == addr) ++ dr6 |= (1 << i); ++ return dr6; ++} ++ ++static void kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu, int *r) ++{ ++ struct kvm_run *kvm_run = vcpu->run; ++ ++ if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { ++ kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 | DR6_RTM; ++ kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip; ++ kvm_run->debug.arch.exception = DB_VECTOR; ++ kvm_run->exit_reason = KVM_EXIT_DEBUG; ++ *r = EMULATE_USER_EXIT; ++ } else { ++ /* ++ * "Certain debug exceptions may clear bit 0-3. The ++ * remaining contents of the DR6 register are never ++ * cleared by the processor". ++ */ ++ vcpu->arch.dr6 &= ~15; ++ vcpu->arch.dr6 |= DR6_BS | DR6_RTM; ++ kvm_queue_exception(vcpu, DB_VECTOR); ++ } ++} ++ ++int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu) ++{ ++ unsigned long rflags = kvm_x86_ops->get_rflags(vcpu); ++ int r = EMULATE_DONE; ++ ++ kvm_x86_ops->skip_emulated_instruction(vcpu); ++ ++ /* ++ * rflags is the old, "raw" value of the flags. The new value has ++ * not been saved yet. ++ * ++ * This is correct even for TF set by the guest, because "the ++ * processor will not generate this exception after the instruction ++ * that sets the TF flag". ++ */ ++ if (unlikely(rflags & X86_EFLAGS_TF)) ++ kvm_vcpu_do_singlestep(vcpu, &r); ++ return r == EMULATE_DONE; ++} ++EXPORT_SYMBOL_GPL(kvm_skip_emulated_instruction); ++ ++static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r) ++{ ++ if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) && ++ (vcpu->arch.guest_debug_dr7 & DR7_BP_EN_MASK)) { ++ struct kvm_run *kvm_run = vcpu->run; ++ unsigned long eip = kvm_get_linear_rip(vcpu); ++ u32 dr6 = kvm_vcpu_check_hw_bp(eip, 0, ++ vcpu->arch.guest_debug_dr7, ++ vcpu->arch.eff_db); ++ ++ if (dr6 != 0) { ++ kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1 | DR6_RTM; ++ kvm_run->debug.arch.pc = eip; ++ kvm_run->debug.arch.exception = DB_VECTOR; ++ kvm_run->exit_reason = KVM_EXIT_DEBUG; ++ *r = EMULATE_USER_EXIT; ++ return true; ++ } ++ } ++ ++ if (unlikely(vcpu->arch.dr7 & DR7_BP_EN_MASK) && ++ !(kvm_get_rflags(vcpu) & X86_EFLAGS_RF)) { ++ unsigned long eip = kvm_get_linear_rip(vcpu); ++ u32 dr6 = kvm_vcpu_check_hw_bp(eip, 0, ++ vcpu->arch.dr7, ++ vcpu->arch.db); ++ ++ if (dr6 != 0) { ++ vcpu->arch.dr6 &= ~15; ++ vcpu->arch.dr6 |= dr6 | DR6_RTM; ++ kvm_queue_exception(vcpu, DB_VECTOR); ++ *r = EMULATE_DONE; ++ return true; ++ } ++ } ++ ++ return false; ++} ++ ++static bool is_vmware_backdoor_opcode(struct x86_emulate_ctxt *ctxt) ++{ ++ switch (ctxt->opcode_len) { ++ case 1: ++ switch (ctxt->b) { ++ case 0xe4: /* IN */ ++ case 0xe5: ++ case 0xec: ++ case 0xed: ++ case 0xe6: /* OUT */ ++ case 0xe7: ++ case 0xee: ++ case 0xef: ++ case 0x6c: /* INS */ ++ case 0x6d: ++ case 0x6e: /* OUTS */ ++ case 0x6f: ++ return true; ++ } ++ break; ++ case 2: ++ switch (ctxt->b) { ++ case 0x33: /* RDPMC */ ++ return true; ++ } ++ break; ++ } ++ ++ return false; ++} ++ ++int x86_emulate_instruction(struct kvm_vcpu *vcpu, ++ unsigned long cr2, ++ int emulation_type, ++ void *insn, ++ int insn_len) ++{ ++ int r; ++ struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; ++ bool writeback = true; ++ bool write_fault_to_spt = vcpu->arch.write_fault_to_shadow_pgtable; ++ ++ vcpu->arch.l1tf_flush_l1d = true; ++ ++ /* ++ * Clear write_fault_to_shadow_pgtable here to ensure it is ++ * never reused. ++ */ ++ vcpu->arch.write_fault_to_shadow_pgtable = false; ++ kvm_clear_exception_queue(vcpu); ++ ++ if (!(emulation_type & EMULTYPE_NO_DECODE)) { ++ init_emulate_ctxt(vcpu); ++ ++ /* ++ * We will reenter on the same instruction since ++ * we do not set complete_userspace_io. This does not ++ * handle watchpoints yet, those would be handled in ++ * the emulate_ops. ++ */ ++ if (!(emulation_type & EMULTYPE_SKIP) && ++ kvm_vcpu_check_breakpoint(vcpu, &r)) ++ return r; ++ ++ ctxt->interruptibility = 0; ++ ctxt->have_exception = false; ++ ctxt->exception.vector = -1; ++ ctxt->perm_ok = false; ++ ++ ctxt->ud = emulation_type & EMULTYPE_TRAP_UD; ++ ++ r = x86_decode_insn(ctxt, insn, insn_len); ++ ++ trace_kvm_emulate_insn_start(vcpu); ++ ++vcpu->stat.insn_emulation; ++ if (r != EMULATION_OK) { ++ if (emulation_type & EMULTYPE_TRAP_UD) ++ return EMULATE_FAIL; ++ if (reexecute_instruction(vcpu, cr2, write_fault_to_spt, ++ emulation_type)) ++ return EMULATE_DONE; ++ if (ctxt->have_exception) { ++ /* ++ * #UD should result in just EMULATION_FAILED, and trap-like ++ * exception should not be encountered during decode. ++ */ ++ WARN_ON_ONCE(ctxt->exception.vector == UD_VECTOR || ++ exception_type(ctxt->exception.vector) == EXCPT_TRAP); ++ inject_emulated_exception(vcpu); ++ return EMULATE_DONE; ++ } ++ if (emulation_type & EMULTYPE_SKIP) ++ return EMULATE_FAIL; ++ return handle_emulation_failure(vcpu, emulation_type); ++ } ++ } ++ ++ if ((emulation_type & EMULTYPE_VMWARE) && ++ !is_vmware_backdoor_opcode(ctxt)) ++ return EMULATE_FAIL; ++ ++ if (emulation_type & EMULTYPE_SKIP) { ++ kvm_rip_write(vcpu, ctxt->_eip); ++ if (ctxt->eflags & X86_EFLAGS_RF) ++ kvm_set_rflags(vcpu, ctxt->eflags & ~X86_EFLAGS_RF); ++ return EMULATE_DONE; ++ } ++ ++ if (retry_instruction(ctxt, cr2, emulation_type)) ++ return EMULATE_DONE; ++ ++ /* this is needed for vmware backdoor interface to work since it ++ changes registers values during IO operation */ ++ if (vcpu->arch.emulate_regs_need_sync_from_vcpu) { ++ vcpu->arch.emulate_regs_need_sync_from_vcpu = false; ++ emulator_invalidate_register_cache(ctxt); ++ } ++ ++restart: ++ /* Save the faulting GPA (cr2) in the address field */ ++ ctxt->exception.address = cr2; ++ ++ r = x86_emulate_insn(ctxt); ++ ++ if (r == EMULATION_INTERCEPTED) ++ return EMULATE_DONE; ++ ++ if (r == EMULATION_FAILED) { ++ if (reexecute_instruction(vcpu, cr2, write_fault_to_spt, ++ emulation_type)) ++ return EMULATE_DONE; ++ ++ return handle_emulation_failure(vcpu, emulation_type); ++ } ++ ++ if (ctxt->have_exception) { ++ r = EMULATE_DONE; ++ if (inject_emulated_exception(vcpu)) ++ return r; ++ } else if (vcpu->arch.pio.count) { ++ if (!vcpu->arch.pio.in) { ++ /* FIXME: return into emulator if single-stepping. */ ++ vcpu->arch.pio.count = 0; ++ } else { ++ writeback = false; ++ vcpu->arch.complete_userspace_io = complete_emulated_pio; ++ } ++ r = EMULATE_USER_EXIT; ++ } else if (vcpu->mmio_needed) { ++ if (!vcpu->mmio_is_write) ++ writeback = false; ++ r = EMULATE_USER_EXIT; ++ vcpu->arch.complete_userspace_io = complete_emulated_mmio; ++ } else if (r == EMULATION_RESTART) ++ goto restart; ++ else ++ r = EMULATE_DONE; ++ ++ if (writeback) { ++ unsigned long rflags = kvm_x86_ops->get_rflags(vcpu); ++ toggle_interruptibility(vcpu, ctxt->interruptibility); ++ vcpu->arch.emulate_regs_need_sync_to_vcpu = false; ++ if (!ctxt->have_exception || ++ exception_type(ctxt->exception.vector) == EXCPT_TRAP) { ++ kvm_rip_write(vcpu, ctxt->eip); ++ if (r == EMULATE_DONE && ctxt->tf) ++ kvm_vcpu_do_singlestep(vcpu, &r); ++ __kvm_set_rflags(vcpu, ctxt->eflags); ++ } ++ ++ /* ++ * For STI, interrupts are shadowed; so KVM_REQ_EVENT will ++ * do nothing, and it will be requested again as soon as ++ * the shadow expires. But we still need to check here, ++ * because POPF has no interrupt shadow. ++ */ ++ if (unlikely((ctxt->eflags & ~rflags) & X86_EFLAGS_IF)) ++ kvm_make_request(KVM_REQ_EVENT, vcpu); ++ } else ++ vcpu->arch.emulate_regs_need_sync_to_vcpu = true; ++ ++ return r; ++} ++ ++int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type) ++{ ++ return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0); ++} ++EXPORT_SYMBOL_GPL(kvm_emulate_instruction); ++ ++int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu, ++ void *insn, int insn_len) ++{ ++ return x86_emulate_instruction(vcpu, 0, 0, insn, insn_len); ++} ++EXPORT_SYMBOL_GPL(kvm_emulate_instruction_from_buffer); ++ ++static int complete_fast_pio_out_port_0x7e(struct kvm_vcpu *vcpu) ++{ ++ vcpu->arch.pio.count = 0; ++ return 1; ++} ++ ++static int complete_fast_pio_out(struct kvm_vcpu *vcpu) ++{ ++ vcpu->arch.pio.count = 0; ++ ++ if (unlikely(!kvm_is_linear_rip(vcpu, vcpu->arch.pio.linear_rip))) ++ return 1; ++ ++ return kvm_skip_emulated_instruction(vcpu); ++} ++ ++static int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, ++ unsigned short port) ++{ ++ unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX); ++ int ret = emulator_pio_out_emulated(&vcpu->arch.emulate_ctxt, ++ size, port, &val, 1); ++ if (ret) ++ return ret; ++ ++ /* ++ * Workaround userspace that relies on old KVM behavior of %rip being ++ * incremented prior to exiting to userspace to handle "OUT 0x7e". ++ */ ++ if (port == 0x7e && ++ kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_OUT_7E_INC_RIP)) { ++ vcpu->arch.complete_userspace_io = ++ complete_fast_pio_out_port_0x7e; ++ kvm_skip_emulated_instruction(vcpu); ++ } else { ++ vcpu->arch.pio.linear_rip = kvm_get_linear_rip(vcpu); ++ vcpu->arch.complete_userspace_io = complete_fast_pio_out; ++ } ++ return 0; ++} ++ ++static int complete_fast_pio_in(struct kvm_vcpu *vcpu) ++{ ++ unsigned long val; ++ ++ /* We should only ever be called with arch.pio.count equal to 1 */ ++ BUG_ON(vcpu->arch.pio.count != 1); ++ ++ if (unlikely(!kvm_is_linear_rip(vcpu, vcpu->arch.pio.linear_rip))) { ++ vcpu->arch.pio.count = 0; ++ return 1; ++ } ++ ++ /* For size less than 4 we merge, else we zero extend */ ++ val = (vcpu->arch.pio.size < 4) ? kvm_register_read(vcpu, VCPU_REGS_RAX) ++ : 0; ++ ++ /* ++ * Since vcpu->arch.pio.count == 1 let emulator_pio_in_emulated perform ++ * the copy and tracing ++ */ ++ emulator_pio_in_emulated(&vcpu->arch.emulate_ctxt, vcpu->arch.pio.size, ++ vcpu->arch.pio.port, &val, 1); ++ kvm_register_write(vcpu, VCPU_REGS_RAX, val); ++ ++ return kvm_skip_emulated_instruction(vcpu); ++} ++ ++static int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size, ++ unsigned short port) ++{ ++ unsigned long val; ++ int ret; ++ ++ /* For size less than 4 we merge, else we zero extend */ ++ val = (size < 4) ? kvm_register_read(vcpu, VCPU_REGS_RAX) : 0; ++ ++ ret = emulator_pio_in_emulated(&vcpu->arch.emulate_ctxt, size, port, ++ &val, 1); ++ if (ret) { ++ kvm_register_write(vcpu, VCPU_REGS_RAX, val); ++ return ret; ++ } ++ ++ vcpu->arch.pio.linear_rip = kvm_get_linear_rip(vcpu); ++ vcpu->arch.complete_userspace_io = complete_fast_pio_in; ++ ++ return 0; ++} ++ ++int kvm_fast_pio(struct kvm_vcpu *vcpu, int size, unsigned short port, int in) ++{ ++ int ret; ++ ++ if (in) ++ ret = kvm_fast_pio_in(vcpu, size, port); ++ else ++ ret = kvm_fast_pio_out(vcpu, size, port); ++ return ret && kvm_skip_emulated_instruction(vcpu); ++} ++EXPORT_SYMBOL_GPL(kvm_fast_pio); ++ ++static int kvmclock_cpu_down_prep(unsigned int cpu) ++{ ++ __this_cpu_write(cpu_tsc_khz, 0); ++ return 0; ++} ++ ++static void tsc_khz_changed(void *data) ++{ ++ struct cpufreq_freqs *freq = data; ++ unsigned long khz = 0; ++ ++ if (data) ++ khz = freq->new; ++ else if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) ++ khz = cpufreq_quick_get(raw_smp_processor_id()); ++ if (!khz) ++ khz = tsc_khz; ++ __this_cpu_write(cpu_tsc_khz, khz); ++} ++ ++#ifdef CONFIG_X86_64 ++static void kvm_hyperv_tsc_notifier(void) ++{ ++ struct kvm *kvm; ++ struct kvm_vcpu *vcpu; ++ int cpu; ++ ++ mutex_lock(&kvm_lock); ++ list_for_each_entry(kvm, &vm_list, vm_list) ++ kvm_make_mclock_inprogress_request(kvm); ++ ++ hyperv_stop_tsc_emulation(); ++ ++ /* TSC frequency always matches when on Hyper-V */ ++ for_each_present_cpu(cpu) ++ per_cpu(cpu_tsc_khz, cpu) = tsc_khz; ++ kvm_max_guest_tsc_khz = tsc_khz; ++ ++ list_for_each_entry(kvm, &vm_list, vm_list) { ++ struct kvm_arch *ka = &kvm->arch; ++ ++ spin_lock(&ka->pvclock_gtod_sync_lock); ++ ++ pvclock_update_vm_gtod_copy(kvm); ++ ++ kvm_for_each_vcpu(cpu, vcpu, kvm) ++ kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); ++ ++ kvm_for_each_vcpu(cpu, vcpu, kvm) ++ kvm_clear_request(KVM_REQ_MCLOCK_INPROGRESS, vcpu); ++ ++ spin_unlock(&ka->pvclock_gtod_sync_lock); ++ } ++ mutex_unlock(&kvm_lock); ++} ++#endif ++ ++static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val, ++ void *data) ++{ ++ struct cpufreq_freqs *freq = data; ++ struct kvm *kvm; ++ struct kvm_vcpu *vcpu; ++ int i, send_ipi = 0; ++ ++ /* ++ * We allow guests to temporarily run on slowing clocks, ++ * provided we notify them after, or to run on accelerating ++ * clocks, provided we notify them before. Thus time never ++ * goes backwards. ++ * ++ * However, we have a problem. We can't atomically update ++ * the frequency of a given CPU from this function; it is ++ * merely a notifier, which can be called from any CPU. ++ * Changing the TSC frequency at arbitrary points in time ++ * requires a recomputation of local variables related to ++ * the TSC for each VCPU. We must flag these local variables ++ * to be updated and be sure the update takes place with the ++ * new frequency before any guests proceed. ++ * ++ * Unfortunately, the combination of hotplug CPU and frequency ++ * change creates an intractable locking scenario; the order ++ * of when these callouts happen is undefined with respect to ++ * CPU hotplug, and they can race with each other. As such, ++ * merely setting per_cpu(cpu_tsc_khz) = X during a hotadd is ++ * undefined; you can actually have a CPU frequency change take ++ * place in between the computation of X and the setting of the ++ * variable. To protect against this problem, all updates of ++ * the per_cpu tsc_khz variable are done in an interrupt ++ * protected IPI, and all callers wishing to update the value ++ * must wait for a synchronous IPI to complete (which is trivial ++ * if the caller is on the CPU already). This establishes the ++ * necessary total order on variable updates. ++ * ++ * Note that because a guest time update may take place ++ * anytime after the setting of the VCPU's request bit, the ++ * correct TSC value must be set before the request. However, ++ * to ensure the update actually makes it to any guest which ++ * starts running in hardware virtualization between the set ++ * and the acquisition of the spinlock, we must also ping the ++ * CPU after setting the request bit. ++ * ++ */ ++ ++ if (val == CPUFREQ_PRECHANGE && freq->old > freq->new) ++ return 0; ++ if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new) ++ return 0; ++ ++ smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1); ++ ++ mutex_lock(&kvm_lock); ++ list_for_each_entry(kvm, &vm_list, vm_list) { ++ kvm_for_each_vcpu(i, vcpu, kvm) { ++ if (vcpu->cpu != freq->cpu) ++ continue; ++ kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); ++ if (vcpu->cpu != raw_smp_processor_id()) ++ send_ipi = 1; ++ } ++ } ++ mutex_unlock(&kvm_lock); ++ ++ if (freq->old < freq->new && send_ipi) { ++ /* ++ * We upscale the frequency. Must make the guest ++ * doesn't see old kvmclock values while running with ++ * the new frequency, otherwise we risk the guest sees ++ * time go backwards. ++ * ++ * In case we update the frequency for another cpu ++ * (which might be in guest context) send an interrupt ++ * to kick the cpu out of guest context. Next time ++ * guest context is entered kvmclock will be updated, ++ * so the guest will not see stale values. ++ */ ++ smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1); ++ } ++ return 0; ++} ++ ++static struct notifier_block kvmclock_cpufreq_notifier_block = { ++ .notifier_call = kvmclock_cpufreq_notifier ++}; ++ ++static int kvmclock_cpu_online(unsigned int cpu) ++{ ++ tsc_khz_changed(NULL); ++ return 0; ++} ++ ++static void kvm_timer_init(void) ++{ ++ max_tsc_khz = tsc_khz; ++ ++ if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { ++#ifdef CONFIG_CPU_FREQ ++ struct cpufreq_policy policy; ++ int cpu; ++ ++ memset(&policy, 0, sizeof(policy)); ++ cpu = get_cpu(); ++ cpufreq_get_policy(&policy, cpu); ++ if (policy.cpuinfo.max_freq) ++ max_tsc_khz = policy.cpuinfo.max_freq; ++ put_cpu(); ++#endif ++ cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block, ++ CPUFREQ_TRANSITION_NOTIFIER); ++ } ++ pr_debug("kvm: max_tsc_khz = %ld\n", max_tsc_khz); ++ ++ cpuhp_setup_state(CPUHP_AP_X86_KVM_CLK_ONLINE, "x86/kvm/clk:online", ++ kvmclock_cpu_online, kvmclock_cpu_down_prep); ++} ++ ++DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu); ++EXPORT_PER_CPU_SYMBOL_GPL(current_vcpu); ++ ++int kvm_is_in_guest(void) ++{ ++ return __this_cpu_read(current_vcpu) != NULL; ++} ++ ++static int kvm_is_user_mode(void) ++{ ++ int user_mode = 3; ++ ++ if (__this_cpu_read(current_vcpu)) ++ user_mode = kvm_x86_ops->get_cpl(__this_cpu_read(current_vcpu)); ++ ++ return user_mode != 0; ++} ++ ++static unsigned long kvm_get_guest_ip(void) ++{ ++ unsigned long ip = 0; ++ ++ if (__this_cpu_read(current_vcpu)) ++ ip = kvm_rip_read(__this_cpu_read(current_vcpu)); ++ ++ return ip; ++} ++ ++static struct perf_guest_info_callbacks kvm_guest_cbs = { ++ .is_in_guest = kvm_is_in_guest, ++ .is_user_mode = kvm_is_user_mode, ++ .get_guest_ip = kvm_get_guest_ip, ++}; ++ ++static void kvm_set_mmio_spte_mask(void) ++{ ++ u64 mask; ++ int maxphyaddr = boot_cpu_data.x86_phys_bits; ++ ++ /* ++ * Set the reserved bits and the present bit of an paging-structure ++ * entry to generate page fault with PFER.RSV = 1. ++ */ ++ ++ /* ++ * Mask the uppermost physical address bit, which would be reserved as ++ * long as the supported physical address width is less than 52. ++ */ ++ mask = 1ull << 51; ++ ++ /* Set the present bit. */ ++ mask |= 1ull; ++ ++ /* ++ * If reserved bit is not supported, clear the present bit to disable ++ * mmio page fault. ++ */ ++ if (IS_ENABLED(CONFIG_X86_64) && maxphyaddr == 52) ++ mask &= ~1ull; ++ ++ kvm_mmu_set_mmio_spte_mask(mask, mask); ++} ++ ++#ifdef CONFIG_X86_64 ++static void pvclock_gtod_update_fn(struct work_struct *work) ++{ ++ struct kvm *kvm; ++ ++ struct kvm_vcpu *vcpu; ++ int i; ++ ++ mutex_lock(&kvm_lock); ++ list_for_each_entry(kvm, &vm_list, vm_list) ++ kvm_for_each_vcpu(i, vcpu, kvm) ++ kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu); ++ atomic_set(&kvm_guest_has_master_clock, 0); ++ mutex_unlock(&kvm_lock); ++} ++ ++static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn); ++ ++/* ++ * Notification about pvclock gtod data update. ++ */ ++static int pvclock_gtod_notify(struct notifier_block *nb, unsigned long unused, ++ void *priv) ++{ ++ struct pvclock_gtod_data *gtod = &pvclock_gtod_data; ++ struct timekeeper *tk = priv; ++ ++ update_pvclock_gtod(tk); ++ ++ /* disable master clock if host does not trust, or does not ++ * use, TSC based clocksource. ++ */ ++ if (!gtod_is_based_on_tsc(gtod->clock.vclock_mode) && ++ atomic_read(&kvm_guest_has_master_clock) != 0) ++ queue_work(system_long_wq, &pvclock_gtod_work); ++ ++ return 0; ++} ++ ++static struct notifier_block pvclock_gtod_notifier = { ++ .notifier_call = pvclock_gtod_notify, ++}; ++#endif ++ ++int kvm_arch_init(void *opaque) ++{ ++ int r; ++ struct kvm_x86_ops *ops = opaque; ++ ++ if (kvm_x86_ops) { ++ printk(KERN_ERR "kvm: already loaded the other module\n"); ++ r = -EEXIST; ++ goto out; ++ } ++ ++ if (!ops->cpu_has_kvm_support()) { ++ printk(KERN_ERR "kvm: no hardware support\n"); ++ r = -EOPNOTSUPP; ++ goto out; ++ } ++ if (ops->disabled_by_bios()) { ++ printk(KERN_ERR "kvm: disabled by bios\n"); ++ r = -EOPNOTSUPP; ++ goto out; ++ } ++ ++ r = -ENOMEM; ++ shared_msrs = alloc_percpu(struct kvm_shared_msrs); ++ if (!shared_msrs) { ++ printk(KERN_ERR "kvm: failed to allocate percpu kvm_shared_msrs\n"); ++ goto out; ++ } ++ ++ r = kvm_mmu_module_init(); ++ if (r) ++ goto out_free_percpu; ++ ++ kvm_set_mmio_spte_mask(); ++ ++ kvm_x86_ops = ops; ++ ++ kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, ++ PT_DIRTY_MASK, PT64_NX_MASK, 0, ++ PT_PRESENT_MASK, 0, sme_me_mask); ++ kvm_timer_init(); ++ ++ perf_register_guest_info_callbacks(&kvm_guest_cbs); ++ ++ if (boot_cpu_has(X86_FEATURE_XSAVE)) ++ host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); ++ ++ kvm_lapic_init(); ++#ifdef CONFIG_X86_64 ++ pvclock_gtod_register_notifier(&pvclock_gtod_notifier); ++ ++ if (hypervisor_is_type(X86_HYPER_MS_HYPERV)) ++ set_hv_tscchange_cb(kvm_hyperv_tsc_notifier); ++#endif ++ ++ return 0; ++ ++out_free_percpu: ++ free_percpu(shared_msrs); ++out: ++ return r; ++} ++ ++void kvm_arch_exit(void) ++{ ++#ifdef CONFIG_X86_64 ++ if (hypervisor_is_type(X86_HYPER_MS_HYPERV)) ++ clear_hv_tscchange_cb(); ++#endif ++ kvm_lapic_exit(); ++ perf_unregister_guest_info_callbacks(&kvm_guest_cbs); ++ ++ if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) ++ cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block, ++ CPUFREQ_TRANSITION_NOTIFIER); ++ cpuhp_remove_state_nocalls(CPUHP_AP_X86_KVM_CLK_ONLINE); ++#ifdef CONFIG_X86_64 ++ pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier); ++#endif ++ kvm_x86_ops = NULL; ++ kvm_mmu_module_exit(); ++ free_percpu(shared_msrs); ++} ++ ++int kvm_vcpu_halt(struct kvm_vcpu *vcpu) ++{ ++ ++vcpu->stat.halt_exits; ++ if (lapic_in_kernel(vcpu)) { ++ vcpu->arch.mp_state = KVM_MP_STATE_HALTED; ++ return 1; ++ } else { ++ vcpu->run->exit_reason = KVM_EXIT_HLT; ++ return 0; ++ } ++} ++EXPORT_SYMBOL_GPL(kvm_vcpu_halt); ++ ++int kvm_emulate_halt(struct kvm_vcpu *vcpu) ++{ ++ int ret = kvm_skip_emulated_instruction(vcpu); ++ /* ++ * TODO: we might be squashing a GUESTDBG_SINGLESTEP-triggered ++ * KVM_EXIT_DEBUG here. ++ */ ++ return kvm_vcpu_halt(vcpu) && ret; ++} ++EXPORT_SYMBOL_GPL(kvm_emulate_halt); ++ ++#ifdef CONFIG_X86_64 ++static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr, ++ unsigned long clock_type) ++{ ++ struct kvm_clock_pairing clock_pairing; ++ struct timespec64 ts; ++ u64 cycle; ++ int ret; ++ ++ if (clock_type != KVM_CLOCK_PAIRING_WALLCLOCK) ++ return -KVM_EOPNOTSUPP; ++ ++ if (kvm_get_walltime_and_clockread(&ts, &cycle) == false) ++ return -KVM_EOPNOTSUPP; ++ ++ clock_pairing.sec = ts.tv_sec; ++ clock_pairing.nsec = ts.tv_nsec; ++ clock_pairing.tsc = kvm_read_l1_tsc(vcpu, cycle); ++ clock_pairing.flags = 0; ++ memset(&clock_pairing.pad, 0, sizeof(clock_pairing.pad)); ++ ++ ret = 0; ++ if (kvm_write_guest(vcpu->kvm, paddr, &clock_pairing, ++ sizeof(struct kvm_clock_pairing))) ++ ret = -KVM_EFAULT; ++ ++ return ret; ++} ++#endif ++ ++/* ++ * kvm_pv_kick_cpu_op: Kick a vcpu. ++ * ++ * @apicid - apicid of vcpu to be kicked. ++ */ ++static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid) ++{ ++ struct kvm_lapic_irq lapic_irq; ++ ++ lapic_irq.shorthand = 0; ++ lapic_irq.dest_mode = 0; ++ lapic_irq.level = 0; ++ lapic_irq.dest_id = apicid; ++ lapic_irq.msi_redir_hint = false; ++ ++ lapic_irq.delivery_mode = APIC_DM_REMRD; ++ kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL); ++} ++ ++void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu) ++{ ++ vcpu->arch.apicv_active = false; ++ kvm_x86_ops->refresh_apicv_exec_ctrl(vcpu); ++} ++ ++int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) ++{ ++ unsigned long nr, a0, a1, a2, a3, ret; ++ int op_64_bit; ++ ++ if (kvm_hv_hypercall_enabled(vcpu->kvm)) ++ return kvm_hv_hypercall(vcpu); ++ ++ nr = kvm_register_read(vcpu, VCPU_REGS_RAX); ++ a0 = kvm_register_read(vcpu, VCPU_REGS_RBX); ++ a1 = kvm_register_read(vcpu, VCPU_REGS_RCX); ++ a2 = kvm_register_read(vcpu, VCPU_REGS_RDX); ++ a3 = kvm_register_read(vcpu, VCPU_REGS_RSI); ++ ++ trace_kvm_hypercall(nr, a0, a1, a2, a3); ++ ++ op_64_bit = is_64_bit_mode(vcpu); ++ if (!op_64_bit) { ++ nr &= 0xFFFFFFFF; ++ a0 &= 0xFFFFFFFF; ++ a1 &= 0xFFFFFFFF; ++ a2 &= 0xFFFFFFFF; ++ a3 &= 0xFFFFFFFF; ++ } ++ ++ if (kvm_x86_ops->get_cpl(vcpu) != 0) { ++ ret = -KVM_EPERM; ++ goto out; ++ } ++ ++ switch (nr) { ++ case KVM_HC_VAPIC_POLL_IRQ: ++ ret = 0; ++ break; ++ case KVM_HC_KICK_CPU: ++ kvm_pv_kick_cpu_op(vcpu->kvm, a0, a1); ++ ret = 0; ++ break; ++#ifdef CONFIG_X86_64 ++ case KVM_HC_CLOCK_PAIRING: ++ ret = kvm_pv_clock_pairing(vcpu, a0, a1); ++ break; ++#endif ++ case KVM_HC_SEND_IPI: ++ ret = kvm_pv_send_ipi(vcpu->kvm, a0, a1, a2, a3, op_64_bit); ++ break; ++ default: ++ ret = -KVM_ENOSYS; ++ break; ++ } ++out: ++ if (!op_64_bit) ++ ret = (u32)ret; ++ kvm_register_write(vcpu, VCPU_REGS_RAX, ret); ++ ++ ++vcpu->stat.hypercalls; ++ return kvm_skip_emulated_instruction(vcpu); ++} ++EXPORT_SYMBOL_GPL(kvm_emulate_hypercall); ++ ++static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt) ++{ ++ struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); ++ char instruction[3]; ++ unsigned long rip = kvm_rip_read(vcpu); ++ ++ kvm_x86_ops->patch_hypercall(vcpu, instruction); ++ ++ return emulator_write_emulated(ctxt, rip, instruction, 3, ++ &ctxt->exception); ++} ++ ++static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu) ++{ ++ return vcpu->run->request_interrupt_window && ++ likely(!pic_in_kernel(vcpu->kvm)); ++} ++ ++static void post_kvm_run_save(struct kvm_vcpu *vcpu) ++{ ++ struct kvm_run *kvm_run = vcpu->run; ++ ++ kvm_run->if_flag = (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0; ++ kvm_run->flags = is_smm(vcpu) ? KVM_RUN_X86_SMM : 0; ++ kvm_run->cr8 = kvm_get_cr8(vcpu); ++ kvm_run->apic_base = kvm_get_apic_base(vcpu); ++ kvm_run->ready_for_interrupt_injection = ++ pic_in_kernel(vcpu->kvm) || ++ kvm_vcpu_ready_for_interrupt_injection(vcpu); ++} ++ ++static void update_cr8_intercept(struct kvm_vcpu *vcpu) ++{ ++ int max_irr, tpr; ++ ++ if (!kvm_x86_ops->update_cr8_intercept) ++ return; ++ ++ if (!lapic_in_kernel(vcpu)) ++ return; ++ ++ if (vcpu->arch.apicv_active) ++ return; ++ ++ if (!vcpu->arch.apic->vapic_addr) ++ max_irr = kvm_lapic_find_highest_irr(vcpu); ++ else ++ max_irr = -1; ++ ++ if (max_irr != -1) ++ max_irr >>= 4; ++ ++ tpr = kvm_lapic_get_cr8(vcpu); ++ ++ kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr); ++} ++ ++static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win) ++{ ++ int r; ++ ++ /* try to reinject previous events if any */ ++ ++ if (vcpu->arch.exception.injected) ++ kvm_x86_ops->queue_exception(vcpu); ++ /* ++ * Do not inject an NMI or interrupt if there is a pending ++ * exception. Exceptions and interrupts are recognized at ++ * instruction boundaries, i.e. the start of an instruction. ++ * Trap-like exceptions, e.g. #DB, have higher priority than ++ * NMIs and interrupts, i.e. traps are recognized before an ++ * NMI/interrupt that's pending on the same instruction. ++ * Fault-like exceptions, e.g. #GP and #PF, are the lowest ++ * priority, but are only generated (pended) during instruction ++ * execution, i.e. a pending fault-like exception means the ++ * fault occurred on the *previous* instruction and must be ++ * serviced prior to recognizing any new events in order to ++ * fully complete the previous instruction. ++ */ ++ else if (!vcpu->arch.exception.pending) { ++ if (vcpu->arch.nmi_injected) ++ kvm_x86_ops->set_nmi(vcpu); ++ else if (vcpu->arch.interrupt.injected) ++ kvm_x86_ops->set_irq(vcpu); ++ } ++ ++ /* ++ * Call check_nested_events() even if we reinjected a previous event ++ * in order for caller to determine if it should require immediate-exit ++ * from L2 to L1 due to pending L1 events which require exit ++ * from L2 to L1. ++ */ ++ if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) { ++ r = kvm_x86_ops->check_nested_events(vcpu, req_int_win); ++ if (r != 0) ++ return r; ++ } ++ ++ /* try to inject new event if pending */ ++ if (vcpu->arch.exception.pending) { ++ trace_kvm_inj_exception(vcpu->arch.exception.nr, ++ vcpu->arch.exception.has_error_code, ++ vcpu->arch.exception.error_code); ++ ++ WARN_ON_ONCE(vcpu->arch.exception.injected); ++ vcpu->arch.exception.pending = false; ++ vcpu->arch.exception.injected = true; ++ ++ if (exception_type(vcpu->arch.exception.nr) == EXCPT_FAULT) ++ __kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) | ++ X86_EFLAGS_RF); ++ ++ if (vcpu->arch.exception.nr == DB_VECTOR && ++ (vcpu->arch.dr7 & DR7_GD)) { ++ vcpu->arch.dr7 &= ~DR7_GD; ++ kvm_update_dr7(vcpu); ++ } ++ ++ kvm_x86_ops->queue_exception(vcpu); ++ } ++ ++ /* Don't consider new event if we re-injected an event */ ++ if (kvm_event_needs_reinjection(vcpu)) ++ return 0; ++ ++ if (vcpu->arch.smi_pending && !is_smm(vcpu) && ++ kvm_x86_ops->smi_allowed(vcpu)) { ++ vcpu->arch.smi_pending = false; ++ ++vcpu->arch.smi_count; ++ enter_smm(vcpu); ++ } else if (vcpu->arch.nmi_pending && kvm_x86_ops->nmi_allowed(vcpu)) { ++ --vcpu->arch.nmi_pending; ++ vcpu->arch.nmi_injected = true; ++ kvm_x86_ops->set_nmi(vcpu); ++ } else if (kvm_cpu_has_injectable_intr(vcpu)) { ++ /* ++ * Because interrupts can be injected asynchronously, we are ++ * calling check_nested_events again here to avoid a race condition. ++ * See https://lkml.org/lkml/2014/7/2/60 for discussion about this ++ * proposal and current concerns. Perhaps we should be setting ++ * KVM_REQ_EVENT only on certain events and not unconditionally? ++ */ ++ if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) { ++ r = kvm_x86_ops->check_nested_events(vcpu, req_int_win); ++ if (r != 0) ++ return r; ++ } ++ if (kvm_x86_ops->interrupt_allowed(vcpu)) { ++ kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu), ++ false); ++ kvm_x86_ops->set_irq(vcpu); ++ } ++ } ++ ++ return 0; ++} ++ ++static void process_nmi(struct kvm_vcpu *vcpu) ++{ ++ unsigned limit = 2; ++ ++ /* ++ * x86 is limited to one NMI running, and one NMI pending after it. ++ * If an NMI is already in progress, limit further NMIs to just one. ++ * Otherwise, allow two (and we'll inject the first one immediately). ++ */ ++ if (kvm_x86_ops->get_nmi_mask(vcpu) || vcpu->arch.nmi_injected) ++ limit = 1; ++ ++ vcpu->arch.nmi_pending += atomic_xchg(&vcpu->arch.nmi_queued, 0); ++ vcpu->arch.nmi_pending = min(vcpu->arch.nmi_pending, limit); ++ kvm_make_request(KVM_REQ_EVENT, vcpu); ++} ++ ++static u32 enter_smm_get_segment_flags(struct kvm_segment *seg) ++{ ++ u32 flags = 0; ++ flags |= seg->g << 23; ++ flags |= seg->db << 22; ++ flags |= seg->l << 21; ++ flags |= seg->avl << 20; ++ flags |= seg->present << 15; ++ flags |= seg->dpl << 13; ++ flags |= seg->s << 12; ++ flags |= seg->type << 8; ++ return flags; ++} ++ ++static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu, char *buf, int n) ++{ ++ struct kvm_segment seg; ++ int offset; ++ ++ kvm_get_segment(vcpu, &seg, n); ++ put_smstate(u32, buf, 0x7fa8 + n * 4, seg.selector); ++ ++ if (n < 3) ++ offset = 0x7f84 + n * 12; ++ else ++ offset = 0x7f2c + (n - 3) * 12; ++ ++ put_smstate(u32, buf, offset + 8, seg.base); ++ put_smstate(u32, buf, offset + 4, seg.limit); ++ put_smstate(u32, buf, offset, enter_smm_get_segment_flags(&seg)); ++} ++ ++#ifdef CONFIG_X86_64 ++static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n) ++{ ++ struct kvm_segment seg; ++ int offset; ++ u16 flags; ++ ++ kvm_get_segment(vcpu, &seg, n); ++ offset = 0x7e00 + n * 16; ++ ++ flags = enter_smm_get_segment_flags(&seg) >> 8; ++ put_smstate(u16, buf, offset, seg.selector); ++ put_smstate(u16, buf, offset + 2, flags); ++ put_smstate(u32, buf, offset + 4, seg.limit); ++ put_smstate(u64, buf, offset + 8, seg.base); ++} ++#endif ++ ++static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, char *buf) ++{ ++ struct desc_ptr dt; ++ struct kvm_segment seg; ++ unsigned long val; ++ int i; ++ ++ put_smstate(u32, buf, 0x7ffc, kvm_read_cr0(vcpu)); ++ put_smstate(u32, buf, 0x7ff8, kvm_read_cr3(vcpu)); ++ put_smstate(u32, buf, 0x7ff4, kvm_get_rflags(vcpu)); ++ put_smstate(u32, buf, 0x7ff0, kvm_rip_read(vcpu)); ++ ++ for (i = 0; i < 8; i++) ++ put_smstate(u32, buf, 0x7fd0 + i * 4, kvm_register_read(vcpu, i)); ++ ++ kvm_get_dr(vcpu, 6, &val); ++ put_smstate(u32, buf, 0x7fcc, (u32)val); ++ kvm_get_dr(vcpu, 7, &val); ++ put_smstate(u32, buf, 0x7fc8, (u32)val); ++ ++ kvm_get_segment(vcpu, &seg, VCPU_SREG_TR); ++ put_smstate(u32, buf, 0x7fc4, seg.selector); ++ put_smstate(u32, buf, 0x7f64, seg.base); ++ put_smstate(u32, buf, 0x7f60, seg.limit); ++ put_smstate(u32, buf, 0x7f5c, enter_smm_get_segment_flags(&seg)); ++ ++ kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR); ++ put_smstate(u32, buf, 0x7fc0, seg.selector); ++ put_smstate(u32, buf, 0x7f80, seg.base); ++ put_smstate(u32, buf, 0x7f7c, seg.limit); ++ put_smstate(u32, buf, 0x7f78, enter_smm_get_segment_flags(&seg)); ++ ++ kvm_x86_ops->get_gdt(vcpu, &dt); ++ put_smstate(u32, buf, 0x7f74, dt.address); ++ put_smstate(u32, buf, 0x7f70, dt.size); ++ ++ kvm_x86_ops->get_idt(vcpu, &dt); ++ put_smstate(u32, buf, 0x7f58, dt.address); ++ put_smstate(u32, buf, 0x7f54, dt.size); ++ ++ for (i = 0; i < 6; i++) ++ enter_smm_save_seg_32(vcpu, buf, i); ++ ++ put_smstate(u32, buf, 0x7f14, kvm_read_cr4(vcpu)); ++ ++ /* revision id */ ++ put_smstate(u32, buf, 0x7efc, 0x00020000); ++ put_smstate(u32, buf, 0x7ef8, vcpu->arch.smbase); ++} ++ ++#ifdef CONFIG_X86_64 ++static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, char *buf) ++{ ++ struct desc_ptr dt; ++ struct kvm_segment seg; ++ unsigned long val; ++ int i; ++ ++ for (i = 0; i < 16; i++) ++ put_smstate(u64, buf, 0x7ff8 - i * 8, kvm_register_read(vcpu, i)); ++ ++ put_smstate(u64, buf, 0x7f78, kvm_rip_read(vcpu)); ++ put_smstate(u32, buf, 0x7f70, kvm_get_rflags(vcpu)); ++ ++ kvm_get_dr(vcpu, 6, &val); ++ put_smstate(u64, buf, 0x7f68, val); ++ kvm_get_dr(vcpu, 7, &val); ++ put_smstate(u64, buf, 0x7f60, val); ++ ++ put_smstate(u64, buf, 0x7f58, kvm_read_cr0(vcpu)); ++ put_smstate(u64, buf, 0x7f50, kvm_read_cr3(vcpu)); ++ put_smstate(u64, buf, 0x7f48, kvm_read_cr4(vcpu)); ++ ++ put_smstate(u32, buf, 0x7f00, vcpu->arch.smbase); ++ ++ /* revision id */ ++ put_smstate(u32, buf, 0x7efc, 0x00020064); ++ ++ put_smstate(u64, buf, 0x7ed0, vcpu->arch.efer); ++ ++ kvm_get_segment(vcpu, &seg, VCPU_SREG_TR); ++ put_smstate(u16, buf, 0x7e90, seg.selector); ++ put_smstate(u16, buf, 0x7e92, enter_smm_get_segment_flags(&seg) >> 8); ++ put_smstate(u32, buf, 0x7e94, seg.limit); ++ put_smstate(u64, buf, 0x7e98, seg.base); ++ ++ kvm_x86_ops->get_idt(vcpu, &dt); ++ put_smstate(u32, buf, 0x7e84, dt.size); ++ put_smstate(u64, buf, 0x7e88, dt.address); ++ ++ kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR); ++ put_smstate(u16, buf, 0x7e70, seg.selector); ++ put_smstate(u16, buf, 0x7e72, enter_smm_get_segment_flags(&seg) >> 8); ++ put_smstate(u32, buf, 0x7e74, seg.limit); ++ put_smstate(u64, buf, 0x7e78, seg.base); ++ ++ kvm_x86_ops->get_gdt(vcpu, &dt); ++ put_smstate(u32, buf, 0x7e64, dt.size); ++ put_smstate(u64, buf, 0x7e68, dt.address); ++ ++ for (i = 0; i < 6; i++) ++ enter_smm_save_seg_64(vcpu, buf, i); ++} ++#endif ++ ++static void enter_smm(struct kvm_vcpu *vcpu) ++{ ++ struct kvm_segment cs, ds; ++ struct desc_ptr dt; ++ char buf[512]; ++ u32 cr0; ++ ++ trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, true); ++ memset(buf, 0, 512); ++#ifdef CONFIG_X86_64 ++ if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) ++ enter_smm_save_state_64(vcpu, buf); ++ else ++#endif ++ enter_smm_save_state_32(vcpu, buf); ++ ++ /* ++ * Give pre_enter_smm() a chance to make ISA-specific changes to the ++ * vCPU state (e.g. leave guest mode) after we've saved the state into ++ * the SMM state-save area. ++ */ ++ kvm_x86_ops->pre_enter_smm(vcpu, buf); ++ ++ vcpu->arch.hflags |= HF_SMM_MASK; ++ kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, buf, sizeof(buf)); ++ ++ if (kvm_x86_ops->get_nmi_mask(vcpu)) ++ vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK; ++ else ++ kvm_x86_ops->set_nmi_mask(vcpu, true); ++ ++ kvm_set_rflags(vcpu, X86_EFLAGS_FIXED); ++ kvm_rip_write(vcpu, 0x8000); ++ ++ cr0 = vcpu->arch.cr0 & ~(X86_CR0_PE | X86_CR0_EM | X86_CR0_TS | X86_CR0_PG); ++ kvm_x86_ops->set_cr0(vcpu, cr0); ++ vcpu->arch.cr0 = cr0; ++ ++ kvm_x86_ops->set_cr4(vcpu, 0); ++ ++ /* Undocumented: IDT limit is set to zero on entry to SMM. */ ++ dt.address = dt.size = 0; ++ kvm_x86_ops->set_idt(vcpu, &dt); ++ ++ __kvm_set_dr(vcpu, 7, DR7_FIXED_1); ++ ++ cs.selector = (vcpu->arch.smbase >> 4) & 0xffff; ++ cs.base = vcpu->arch.smbase; ++ ++ ds.selector = 0; ++ ds.base = 0; ++ ++ cs.limit = ds.limit = 0xffffffff; ++ cs.type = ds.type = 0x3; ++ cs.dpl = ds.dpl = 0; ++ cs.db = ds.db = 0; ++ cs.s = ds.s = 1; ++ cs.l = ds.l = 0; ++ cs.g = ds.g = 1; ++ cs.avl = ds.avl = 0; ++ cs.present = ds.present = 1; ++ cs.unusable = ds.unusable = 0; ++ cs.padding = ds.padding = 0; ++ ++ kvm_set_segment(vcpu, &cs, VCPU_SREG_CS); ++ kvm_set_segment(vcpu, &ds, VCPU_SREG_DS); ++ kvm_set_segment(vcpu, &ds, VCPU_SREG_ES); ++ kvm_set_segment(vcpu, &ds, VCPU_SREG_FS); ++ kvm_set_segment(vcpu, &ds, VCPU_SREG_GS); ++ kvm_set_segment(vcpu, &ds, VCPU_SREG_SS); ++ ++#ifdef CONFIG_X86_64 ++ if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) ++ kvm_x86_ops->set_efer(vcpu, 0); ++#endif ++ ++ kvm_update_cpuid(vcpu); ++ kvm_mmu_reset_context(vcpu); ++} ++ ++static void process_smi(struct kvm_vcpu *vcpu) ++{ ++ vcpu->arch.smi_pending = true; ++ kvm_make_request(KVM_REQ_EVENT, vcpu); ++} ++ ++void kvm_make_scan_ioapic_request(struct kvm *kvm) ++{ ++ kvm_make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC); ++} ++ ++static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) ++{ ++ if (!kvm_apic_present(vcpu)) ++ return; ++ ++ bitmap_zero(vcpu->arch.ioapic_handled_vectors, 256); ++ ++ if (irqchip_split(vcpu->kvm)) ++ kvm_scan_ioapic_routes(vcpu, vcpu->arch.ioapic_handled_vectors); ++ else { ++ if (vcpu->arch.apicv_active) ++ kvm_x86_ops->sync_pir_to_irr(vcpu); ++ if (ioapic_in_kernel(vcpu->kvm)) ++ kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors); ++ } ++ ++ if (is_guest_mode(vcpu)) ++ vcpu->arch.load_eoi_exitmap_pending = true; ++ else ++ kvm_make_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu); ++} ++ ++static void vcpu_load_eoi_exitmap(struct kvm_vcpu *vcpu) ++{ ++ u64 eoi_exit_bitmap[4]; ++ ++ if (!kvm_apic_hw_enabled(vcpu->arch.apic)) ++ return; ++ ++ bitmap_or((ulong *)eoi_exit_bitmap, vcpu->arch.ioapic_handled_vectors, ++ vcpu_to_synic(vcpu)->vec_bitmap, 256); ++ kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap); ++} ++ ++int kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, ++ unsigned long start, unsigned long end, ++ bool blockable) ++{ ++ unsigned long apic_address; ++ ++ /* ++ * The physical address of apic access page is stored in the VMCS. ++ * Update it when it becomes invalid. ++ */ ++ apic_address = gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT); ++ if (start <= apic_address && apic_address < end) ++ kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD); ++ ++ return 0; ++} ++ ++void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu) ++{ ++ struct page *page = NULL; ++ ++ if (!lapic_in_kernel(vcpu)) ++ return; ++ ++ if (!kvm_x86_ops->set_apic_access_page_addr) ++ return; ++ ++ page = gfn_to_page(vcpu->kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT); ++ if (is_error_page(page)) ++ return; ++ kvm_x86_ops->set_apic_access_page_addr(vcpu, page_to_phys(page)); ++ ++ /* ++ * Do not pin apic access page in memory, the MMU notifier ++ * will call us again if it is migrated or swapped out. ++ */ ++ put_page(page); ++} ++EXPORT_SYMBOL_GPL(kvm_vcpu_reload_apic_access_page); ++ ++void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu) ++{ ++ smp_send_reschedule(vcpu->cpu); ++} ++EXPORT_SYMBOL_GPL(__kvm_request_immediate_exit); ++ ++/* ++ * Returns 1 to let vcpu_run() continue the guest execution loop without ++ * exiting to the userspace. Otherwise, the value will be returned to the ++ * userspace. ++ */ ++static int vcpu_enter_guest(struct kvm_vcpu *vcpu) ++{ ++ int r; ++ bool req_int_win = ++ dm_request_for_irq_injection(vcpu) && ++ kvm_cpu_accept_dm_intr(vcpu); ++ ++ bool req_immediate_exit = false; ++ ++ if (kvm_request_pending(vcpu)) { ++ if (kvm_check_request(KVM_REQ_GET_VMCS12_PAGES, vcpu)) ++ kvm_x86_ops->get_vmcs12_pages(vcpu); ++ if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) ++ kvm_mmu_unload(vcpu); ++ if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu)) ++ __kvm_migrate_timers(vcpu); ++ if (kvm_check_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu)) ++ kvm_gen_update_masterclock(vcpu->kvm); ++ if (kvm_check_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu)) ++ kvm_gen_kvmclock_update(vcpu); ++ if (kvm_check_request(KVM_REQ_CLOCK_UPDATE, vcpu)) { ++ r = kvm_guest_time_update(vcpu); ++ if (unlikely(r)) ++ goto out; ++ } ++ if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu)) ++ kvm_mmu_sync_roots(vcpu); ++ if (kvm_check_request(KVM_REQ_LOAD_CR3, vcpu)) ++ kvm_mmu_load_cr3(vcpu); ++ if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) ++ kvm_vcpu_flush_tlb(vcpu, true); ++ if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) { ++ vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS; ++ r = 0; ++ goto out; ++ } ++ if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) { ++ vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; ++ vcpu->mmio_needed = 0; ++ r = 0; ++ goto out; ++ } ++ if (kvm_check_request(KVM_REQ_APF_HALT, vcpu)) { ++ /* Page is swapped out. Do synthetic halt */ ++ vcpu->arch.apf.halted = true; ++ r = 1; ++ goto out; ++ } ++ if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu)) ++ record_steal_time(vcpu); ++ if (kvm_check_request(KVM_REQ_SMI, vcpu)) ++ process_smi(vcpu); ++ if (kvm_check_request(KVM_REQ_NMI, vcpu)) ++ process_nmi(vcpu); ++ if (kvm_check_request(KVM_REQ_PMU, vcpu)) ++ kvm_pmu_handle_event(vcpu); ++ if (kvm_check_request(KVM_REQ_PMI, vcpu)) ++ kvm_pmu_deliver_pmi(vcpu); ++ if (kvm_check_request(KVM_REQ_IOAPIC_EOI_EXIT, vcpu)) { ++ BUG_ON(vcpu->arch.pending_ioapic_eoi > 255); ++ if (test_bit(vcpu->arch.pending_ioapic_eoi, ++ vcpu->arch.ioapic_handled_vectors)) { ++ vcpu->run->exit_reason = KVM_EXIT_IOAPIC_EOI; ++ vcpu->run->eoi.vector = ++ vcpu->arch.pending_ioapic_eoi; ++ r = 0; ++ goto out; ++ } ++ } ++ if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu)) ++ vcpu_scan_ioapic(vcpu); ++ if (kvm_check_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu)) ++ vcpu_load_eoi_exitmap(vcpu); ++ if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu)) ++ kvm_vcpu_reload_apic_access_page(vcpu); ++ if (kvm_check_request(KVM_REQ_HV_CRASH, vcpu)) { ++ vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT; ++ vcpu->run->system_event.type = KVM_SYSTEM_EVENT_CRASH; ++ r = 0; ++ goto out; ++ } ++ if (kvm_check_request(KVM_REQ_HV_RESET, vcpu)) { ++ vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT; ++ vcpu->run->system_event.type = KVM_SYSTEM_EVENT_RESET; ++ r = 0; ++ goto out; ++ } ++ if (kvm_check_request(KVM_REQ_HV_EXIT, vcpu)) { ++ vcpu->run->exit_reason = KVM_EXIT_HYPERV; ++ vcpu->run->hyperv = vcpu->arch.hyperv.exit; ++ r = 0; ++ goto out; ++ } ++ ++ /* ++ * KVM_REQ_HV_STIMER has to be processed after ++ * KVM_REQ_CLOCK_UPDATE, because Hyper-V SynIC timers ++ * depend on the guest clock being up-to-date ++ */ ++ if (kvm_check_request(KVM_REQ_HV_STIMER, vcpu)) ++ kvm_hv_process_stimers(vcpu); ++ } ++ ++ if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { ++ ++vcpu->stat.req_event; ++ kvm_apic_accept_events(vcpu); ++ if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { ++ r = 1; ++ goto out; ++ } ++ ++ if (inject_pending_event(vcpu, req_int_win) != 0) ++ req_immediate_exit = true; ++ else { ++ /* Enable SMI/NMI/IRQ window open exits if needed. ++ * ++ * SMIs have three cases: ++ * 1) They can be nested, and then there is nothing to ++ * do here because RSM will cause a vmexit anyway. ++ * 2) There is an ISA-specific reason why SMI cannot be ++ * injected, and the moment when this changes can be ++ * intercepted. ++ * 3) Or the SMI can be pending because ++ * inject_pending_event has completed the injection ++ * of an IRQ or NMI from the previous vmexit, and ++ * then we request an immediate exit to inject the ++ * SMI. ++ */ ++ if (vcpu->arch.smi_pending && !is_smm(vcpu)) ++ if (!kvm_x86_ops->enable_smi_window(vcpu)) ++ req_immediate_exit = true; ++ if (vcpu->arch.nmi_pending) ++ kvm_x86_ops->enable_nmi_window(vcpu); ++ if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win) ++ kvm_x86_ops->enable_irq_window(vcpu); ++ WARN_ON(vcpu->arch.exception.pending); ++ } ++ ++ if (kvm_lapic_enabled(vcpu)) { ++ update_cr8_intercept(vcpu); ++ kvm_lapic_sync_to_vapic(vcpu); ++ } ++ } ++ ++ r = kvm_mmu_reload(vcpu); ++ if (unlikely(r)) { ++ goto cancel_injection; ++ } ++ ++ preempt_disable(); ++ ++ kvm_x86_ops->prepare_guest_switch(vcpu); ++ ++ /* ++ * Disable IRQs before setting IN_GUEST_MODE. Posted interrupt ++ * IPI are then delayed after guest entry, which ensures that they ++ * result in virtual interrupt delivery. ++ */ ++ local_irq_disable(); ++ vcpu->mode = IN_GUEST_MODE; ++ ++ srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); ++ ++ /* ++ * 1) We should set ->mode before checking ->requests. Please see ++ * the comment in kvm_vcpu_exiting_guest_mode(). ++ * ++ * 2) For APICv, we should set ->mode before checking PIR.ON. This ++ * pairs with the memory barrier implicit in pi_test_and_set_on ++ * (see vmx_deliver_posted_interrupt). ++ * ++ * 3) This also orders the write to mode from any reads to the page ++ * tables done while the VCPU is running. Please see the comment ++ * in kvm_flush_remote_tlbs. ++ */ ++ smp_mb__after_srcu_read_unlock(); ++ ++ /* ++ * This handles the case where a posted interrupt was ++ * notified with kvm_vcpu_kick. ++ */ ++ if (kvm_lapic_enabled(vcpu) && vcpu->arch.apicv_active) ++ kvm_x86_ops->sync_pir_to_irr(vcpu); ++ ++ if (vcpu->mode == EXITING_GUEST_MODE || kvm_request_pending(vcpu) ++ || need_resched() || signal_pending(current)) { ++ vcpu->mode = OUTSIDE_GUEST_MODE; ++ smp_wmb(); ++ local_irq_enable(); ++ preempt_enable(); ++ vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); ++ r = 1; ++ goto cancel_injection; ++ } ++ ++ if (req_immediate_exit) { ++ kvm_make_request(KVM_REQ_EVENT, vcpu); ++ kvm_x86_ops->request_immediate_exit(vcpu); ++ } ++ ++ trace_kvm_entry(vcpu->vcpu_id); ++ if (lapic_timer_advance_ns) ++ wait_lapic_expire(vcpu); ++ guest_enter_irqoff(); ++ ++ if (unlikely(vcpu->arch.switch_db_regs)) { ++ set_debugreg(0, 7); ++ set_debugreg(vcpu->arch.eff_db[0], 0); ++ set_debugreg(vcpu->arch.eff_db[1], 1); ++ set_debugreg(vcpu->arch.eff_db[2], 2); ++ set_debugreg(vcpu->arch.eff_db[3], 3); ++ set_debugreg(vcpu->arch.dr6, 6); ++ vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD; ++ } ++ ++ kvm_x86_ops->run(vcpu); ++ ++ /* ++ * Do this here before restoring debug registers on the host. And ++ * since we do this before handling the vmexit, a DR access vmexit ++ * can (a) read the correct value of the debug registers, (b) set ++ * KVM_DEBUGREG_WONT_EXIT again. ++ */ ++ if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)) { ++ WARN_ON(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP); ++ kvm_x86_ops->sync_dirty_debug_regs(vcpu); ++ kvm_update_dr0123(vcpu); ++ kvm_update_dr6(vcpu); ++ kvm_update_dr7(vcpu); ++ vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD; ++ } ++ ++ /* ++ * If the guest has used debug registers, at least dr7 ++ * will be disabled while returning to the host. ++ * If we don't have active breakpoints in the host, we don't ++ * care about the messed up debug address registers. But if ++ * we have some of them active, restore the old state. ++ */ ++ if (hw_breakpoint_active()) ++ hw_breakpoint_restore(); ++ ++ vcpu->arch.last_guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()); ++ ++ vcpu->mode = OUTSIDE_GUEST_MODE; ++ smp_wmb(); ++ ++ kvm_before_interrupt(vcpu); ++ kvm_x86_ops->handle_external_intr(vcpu); ++ kvm_after_interrupt(vcpu); ++ ++ ++vcpu->stat.exits; ++ ++ guest_exit_irqoff(); ++ ++ local_irq_enable(); ++ preempt_enable(); ++ ++ vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); ++ ++ /* ++ * Profile KVM exit RIPs: ++ */ ++ if (unlikely(prof_on == KVM_PROFILING)) { ++ unsigned long rip = kvm_rip_read(vcpu); ++ profile_hit(KVM_PROFILING, (void *)rip); ++ } ++ ++ if (unlikely(vcpu->arch.tsc_always_catchup)) ++ kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); ++ ++ if (vcpu->arch.apic_attention) ++ kvm_lapic_sync_from_vapic(vcpu); ++ ++ vcpu->arch.gpa_available = false; ++ r = kvm_x86_ops->handle_exit(vcpu); ++ return r; ++ ++cancel_injection: ++ kvm_x86_ops->cancel_injection(vcpu); ++ if (unlikely(vcpu->arch.apic_attention)) ++ kvm_lapic_sync_from_vapic(vcpu); ++out: ++ return r; ++} ++ ++static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu) ++{ ++ if (!kvm_arch_vcpu_runnable(vcpu) && ++ (!kvm_x86_ops->pre_block || kvm_x86_ops->pre_block(vcpu) == 0)) { ++ srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); ++ kvm_vcpu_block(vcpu); ++ vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); ++ ++ if (kvm_x86_ops->post_block) ++ kvm_x86_ops->post_block(vcpu); ++ ++ if (!kvm_check_request(KVM_REQ_UNHALT, vcpu)) ++ return 1; ++ } ++ ++ kvm_apic_accept_events(vcpu); ++ switch(vcpu->arch.mp_state) { ++ case KVM_MP_STATE_HALTED: ++ vcpu->arch.pv.pv_unhalted = false; ++ vcpu->arch.mp_state = ++ KVM_MP_STATE_RUNNABLE; ++ case KVM_MP_STATE_RUNNABLE: ++ vcpu->arch.apf.halted = false; ++ break; ++ case KVM_MP_STATE_INIT_RECEIVED: ++ break; ++ default: ++ return -EINTR; ++ break; ++ } ++ return 1; ++} ++ ++static inline bool kvm_vcpu_running(struct kvm_vcpu *vcpu) ++{ ++ if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) ++ kvm_x86_ops->check_nested_events(vcpu, false); ++ ++ return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE && ++ !vcpu->arch.apf.halted); ++} ++ ++static int vcpu_run(struct kvm_vcpu *vcpu) ++{ ++ int r; ++ struct kvm *kvm = vcpu->kvm; ++ ++ vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); ++ vcpu->arch.l1tf_flush_l1d = true; ++ ++ for (;;) { ++ if (kvm_vcpu_running(vcpu)) { ++ r = vcpu_enter_guest(vcpu); ++ } else { ++ r = vcpu_block(kvm, vcpu); ++ } ++ ++ if (r <= 0) ++ break; ++ ++ kvm_clear_request(KVM_REQ_PENDING_TIMER, vcpu); ++ if (kvm_cpu_has_pending_timer(vcpu)) ++ kvm_inject_pending_timer_irqs(vcpu); ++ ++ if (dm_request_for_irq_injection(vcpu) && ++ kvm_vcpu_ready_for_interrupt_injection(vcpu)) { ++ r = 0; ++ vcpu->run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; ++ ++vcpu->stat.request_irq_exits; ++ break; ++ } ++ ++ kvm_check_async_pf_completion(vcpu); ++ ++ if (signal_pending(current)) { ++ r = -EINTR; ++ vcpu->run->exit_reason = KVM_EXIT_INTR; ++ ++vcpu->stat.signal_exits; ++ break; ++ } ++ if (need_resched()) { ++ srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); ++ cond_resched(); ++ vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); ++ } ++ } ++ ++ srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); ++ ++ return r; ++} ++ ++static inline int complete_emulated_io(struct kvm_vcpu *vcpu) ++{ ++ int r; ++ vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); ++ r = kvm_emulate_instruction(vcpu, EMULTYPE_NO_DECODE); ++ srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); ++ if (r != EMULATE_DONE) ++ return 0; ++ return 1; ++} ++ ++static int complete_emulated_pio(struct kvm_vcpu *vcpu) ++{ ++ BUG_ON(!vcpu->arch.pio.count); ++ ++ return complete_emulated_io(vcpu); ++} ++ ++/* ++ * Implements the following, as a state machine: ++ * ++ * read: ++ * for each fragment ++ * for each mmio piece in the fragment ++ * write gpa, len ++ * exit ++ * copy data ++ * execute insn ++ * ++ * write: ++ * for each fragment ++ * for each mmio piece in the fragment ++ * write gpa, len ++ * copy data ++ * exit ++ */ ++static int complete_emulated_mmio(struct kvm_vcpu *vcpu) ++{ ++ struct kvm_run *run = vcpu->run; ++ struct kvm_mmio_fragment *frag; ++ unsigned len; ++ ++ BUG_ON(!vcpu->mmio_needed); ++ ++ /* Complete previous fragment */ ++ frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment]; ++ len = min(8u, frag->len); ++ if (!vcpu->mmio_is_write) ++ memcpy(frag->data, run->mmio.data, len); ++ ++ if (frag->len <= 8) { ++ /* Switch to the next fragment. */ ++ frag++; ++ vcpu->mmio_cur_fragment++; ++ } else { ++ /* Go forward to the next mmio piece. */ ++ frag->data += len; ++ frag->gpa += len; ++ frag->len -= len; ++ } ++ ++ if (vcpu->mmio_cur_fragment >= vcpu->mmio_nr_fragments) { ++ vcpu->mmio_needed = 0; ++ ++ /* FIXME: return into emulator if single-stepping. */ ++ if (vcpu->mmio_is_write) ++ return 1; ++ vcpu->mmio_read_completed = 1; ++ return complete_emulated_io(vcpu); ++ } ++ ++ run->exit_reason = KVM_EXIT_MMIO; ++ run->mmio.phys_addr = frag->gpa; ++ if (vcpu->mmio_is_write) ++ memcpy(run->mmio.data, frag->data, min(8u, frag->len)); ++ run->mmio.len = min(8u, frag->len); ++ run->mmio.is_write = vcpu->mmio_is_write; ++ vcpu->arch.complete_userspace_io = complete_emulated_mmio; ++ return 0; ++} ++ ++/* Swap (qemu) user FPU context for the guest FPU context. */ ++static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) ++{ ++ preempt_disable(); ++ copy_fpregs_to_fpstate(&vcpu->arch.user_fpu); ++ /* PKRU is separately restored in kvm_x86_ops->run. */ ++ __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state, ++ ~XFEATURE_MASK_PKRU); ++ preempt_enable(); ++ trace_kvm_fpu(1); ++} ++ ++/* When vcpu_run ends, restore user space FPU context. */ ++static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) ++{ ++ preempt_disable(); ++ copy_fpregs_to_fpstate(&vcpu->arch.guest_fpu); ++ copy_kernel_to_fpregs(&vcpu->arch.user_fpu.state); ++ preempt_enable(); ++ ++vcpu->stat.fpu_reload; ++ trace_kvm_fpu(0); ++} ++ ++int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) ++{ ++ int r; ++ ++ vcpu_load(vcpu); ++ kvm_sigset_activate(vcpu); ++ kvm_load_guest_fpu(vcpu); ++ ++ if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { ++ if (kvm_run->immediate_exit) { ++ r = -EINTR; ++ goto out; ++ } ++ kvm_vcpu_block(vcpu); ++ kvm_apic_accept_events(vcpu); ++ kvm_clear_request(KVM_REQ_UNHALT, vcpu); ++ r = -EAGAIN; ++ if (signal_pending(current)) { ++ r = -EINTR; ++ vcpu->run->exit_reason = KVM_EXIT_INTR; ++ ++vcpu->stat.signal_exits; ++ } ++ goto out; ++ } ++ ++ if (vcpu->run->kvm_valid_regs & ~KVM_SYNC_X86_VALID_FIELDS) { ++ r = -EINVAL; ++ goto out; ++ } ++ ++ if (vcpu->run->kvm_dirty_regs) { ++ r = sync_regs(vcpu); ++ if (r != 0) ++ goto out; ++ } ++ ++ /* re-sync apic's tpr */ ++ if (!lapic_in_kernel(vcpu)) { ++ if (kvm_set_cr8(vcpu, kvm_run->cr8) != 0) { ++ r = -EINVAL; ++ goto out; ++ } ++ } ++ ++ if (unlikely(vcpu->arch.complete_userspace_io)) { ++ int (*cui)(struct kvm_vcpu *) = vcpu->arch.complete_userspace_io; ++ vcpu->arch.complete_userspace_io = NULL; ++ r = cui(vcpu); ++ if (r <= 0) ++ goto out; ++ } else ++ WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed); ++ ++ if (kvm_run->immediate_exit) ++ r = -EINTR; ++ else ++ r = vcpu_run(vcpu); ++ ++out: ++ kvm_put_guest_fpu(vcpu); ++ if (vcpu->run->kvm_valid_regs) ++ store_regs(vcpu); ++ post_kvm_run_save(vcpu); ++ kvm_sigset_deactivate(vcpu); ++ ++ vcpu_put(vcpu); ++ return r; ++} ++ ++static void __get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) ++{ ++ if (vcpu->arch.emulate_regs_need_sync_to_vcpu) { ++ /* ++ * We are here if userspace calls get_regs() in the middle of ++ * instruction emulation. Registers state needs to be copied ++ * back from emulation context to vcpu. Userspace shouldn't do ++ * that usually, but some bad designed PV devices (vmware ++ * backdoor interface) need this to work ++ */ ++ emulator_writeback_register_cache(&vcpu->arch.emulate_ctxt); ++ vcpu->arch.emulate_regs_need_sync_to_vcpu = false; ++ } ++ regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX); ++ regs->rbx = kvm_register_read(vcpu, VCPU_REGS_RBX); ++ regs->rcx = kvm_register_read(vcpu, VCPU_REGS_RCX); ++ regs->rdx = kvm_register_read(vcpu, VCPU_REGS_RDX); ++ regs->rsi = kvm_register_read(vcpu, VCPU_REGS_RSI); ++ regs->rdi = kvm_register_read(vcpu, VCPU_REGS_RDI); ++ regs->rsp = kvm_register_read(vcpu, VCPU_REGS_RSP); ++ regs->rbp = kvm_register_read(vcpu, VCPU_REGS_RBP); ++#ifdef CONFIG_X86_64 ++ regs->r8 = kvm_register_read(vcpu, VCPU_REGS_R8); ++ regs->r9 = kvm_register_read(vcpu, VCPU_REGS_R9); ++ regs->r10 = kvm_register_read(vcpu, VCPU_REGS_R10); ++ regs->r11 = kvm_register_read(vcpu, VCPU_REGS_R11); ++ regs->r12 = kvm_register_read(vcpu, VCPU_REGS_R12); ++ regs->r13 = kvm_register_read(vcpu, VCPU_REGS_R13); ++ regs->r14 = kvm_register_read(vcpu, VCPU_REGS_R14); ++ regs->r15 = kvm_register_read(vcpu, VCPU_REGS_R15); ++#endif ++ ++ regs->rip = kvm_rip_read(vcpu); ++ regs->rflags = kvm_get_rflags(vcpu); ++} ++ ++int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) ++{ ++ vcpu_load(vcpu); ++ __get_regs(vcpu, regs); ++ vcpu_put(vcpu); ++ return 0; ++} ++ ++static void __set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) ++{ ++ vcpu->arch.emulate_regs_need_sync_from_vcpu = true; ++ vcpu->arch.emulate_regs_need_sync_to_vcpu = false; ++ ++ kvm_register_write(vcpu, VCPU_REGS_RAX, regs->rax); ++ kvm_register_write(vcpu, VCPU_REGS_RBX, regs->rbx); ++ kvm_register_write(vcpu, VCPU_REGS_RCX, regs->rcx); ++ kvm_register_write(vcpu, VCPU_REGS_RDX, regs->rdx); ++ kvm_register_write(vcpu, VCPU_REGS_RSI, regs->rsi); ++ kvm_register_write(vcpu, VCPU_REGS_RDI, regs->rdi); ++ kvm_register_write(vcpu, VCPU_REGS_RSP, regs->rsp); ++ kvm_register_write(vcpu, VCPU_REGS_RBP, regs->rbp); ++#ifdef CONFIG_X86_64 ++ kvm_register_write(vcpu, VCPU_REGS_R8, regs->r8); ++ kvm_register_write(vcpu, VCPU_REGS_R9, regs->r9); ++ kvm_register_write(vcpu, VCPU_REGS_R10, regs->r10); ++ kvm_register_write(vcpu, VCPU_REGS_R11, regs->r11); ++ kvm_register_write(vcpu, VCPU_REGS_R12, regs->r12); ++ kvm_register_write(vcpu, VCPU_REGS_R13, regs->r13); ++ kvm_register_write(vcpu, VCPU_REGS_R14, regs->r14); ++ kvm_register_write(vcpu, VCPU_REGS_R15, regs->r15); ++#endif ++ ++ kvm_rip_write(vcpu, regs->rip); ++ kvm_set_rflags(vcpu, regs->rflags | X86_EFLAGS_FIXED); ++ ++ vcpu->arch.exception.pending = false; ++ ++ kvm_make_request(KVM_REQ_EVENT, vcpu); ++} ++ ++int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) ++{ ++ vcpu_load(vcpu); ++ __set_regs(vcpu, regs); ++ vcpu_put(vcpu); ++ return 0; ++} ++ ++void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) ++{ ++ struct kvm_segment cs; ++ ++ kvm_get_segment(vcpu, &cs, VCPU_SREG_CS); ++ *db = cs.db; ++ *l = cs.l; ++} ++EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits); ++ ++static void __get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) ++{ ++ struct desc_ptr dt; ++ ++ kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS); ++ kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS); ++ kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES); ++ kvm_get_segment(vcpu, &sregs->fs, VCPU_SREG_FS); ++ kvm_get_segment(vcpu, &sregs->gs, VCPU_SREG_GS); ++ kvm_get_segment(vcpu, &sregs->ss, VCPU_SREG_SS); ++ ++ kvm_get_segment(vcpu, &sregs->tr, VCPU_SREG_TR); ++ kvm_get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR); ++ ++ kvm_x86_ops->get_idt(vcpu, &dt); ++ sregs->idt.limit = dt.size; ++ sregs->idt.base = dt.address; ++ kvm_x86_ops->get_gdt(vcpu, &dt); ++ sregs->gdt.limit = dt.size; ++ sregs->gdt.base = dt.address; ++ ++ sregs->cr0 = kvm_read_cr0(vcpu); ++ sregs->cr2 = vcpu->arch.cr2; ++ sregs->cr3 = kvm_read_cr3(vcpu); ++ sregs->cr4 = kvm_read_cr4(vcpu); ++ sregs->cr8 = kvm_get_cr8(vcpu); ++ sregs->efer = vcpu->arch.efer; ++ sregs->apic_base = kvm_get_apic_base(vcpu); ++ ++ memset(sregs->interrupt_bitmap, 0, sizeof sregs->interrupt_bitmap); ++ ++ if (vcpu->arch.interrupt.injected && !vcpu->arch.interrupt.soft) ++ set_bit(vcpu->arch.interrupt.nr, ++ (unsigned long *)sregs->interrupt_bitmap); ++} ++ ++int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, ++ struct kvm_sregs *sregs) ++{ ++ vcpu_load(vcpu); ++ __get_sregs(vcpu, sregs); ++ vcpu_put(vcpu); ++ return 0; ++} ++ ++int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, ++ struct kvm_mp_state *mp_state) ++{ ++ vcpu_load(vcpu); ++ ++ kvm_apic_accept_events(vcpu); ++ if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED && ++ vcpu->arch.pv.pv_unhalted) ++ mp_state->mp_state = KVM_MP_STATE_RUNNABLE; ++ else ++ mp_state->mp_state = vcpu->arch.mp_state; ++ ++ vcpu_put(vcpu); ++ return 0; ++} ++ ++int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, ++ struct kvm_mp_state *mp_state) ++{ ++ int ret = -EINVAL; ++ ++ vcpu_load(vcpu); ++ ++ if (!lapic_in_kernel(vcpu) && ++ mp_state->mp_state != KVM_MP_STATE_RUNNABLE) ++ goto out; ++ ++ /* INITs are latched while in SMM */ ++ if ((is_smm(vcpu) || vcpu->arch.smi_pending) && ++ (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED || ++ mp_state->mp_state == KVM_MP_STATE_INIT_RECEIVED)) ++ goto out; ++ ++ if (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED) { ++ vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; ++ set_bit(KVM_APIC_SIPI, &vcpu->arch.apic->pending_events); ++ } else ++ vcpu->arch.mp_state = mp_state->mp_state; ++ kvm_make_request(KVM_REQ_EVENT, vcpu); ++ ++ ret = 0; ++out: ++ vcpu_put(vcpu); ++ return ret; ++} ++ ++int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, ++ int reason, bool has_error_code, u32 error_code) ++{ ++ struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; ++ int ret; ++ ++ init_emulate_ctxt(vcpu); ++ ++ ret = emulator_task_switch(ctxt, tss_selector, idt_index, reason, ++ has_error_code, error_code); ++ ++ if (ret) ++ return EMULATE_FAIL; ++ ++ kvm_rip_write(vcpu, ctxt->eip); ++ kvm_set_rflags(vcpu, ctxt->eflags); ++ kvm_make_request(KVM_REQ_EVENT, vcpu); ++ return EMULATE_DONE; ++} ++EXPORT_SYMBOL_GPL(kvm_task_switch); ++ ++static int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) ++{ ++ if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG)) { ++ /* ++ * When EFER.LME and CR0.PG are set, the processor is in ++ * 64-bit mode (though maybe in a 32-bit code segment). ++ * CR4.PAE and EFER.LMA must be set. ++ */ ++ if (!(sregs->cr4 & X86_CR4_PAE) ++ || !(sregs->efer & EFER_LMA)) ++ return -EINVAL; ++ } else { ++ /* ++ * Not in 64-bit mode: EFER.LMA is clear and the code ++ * segment cannot be 64-bit. ++ */ ++ if (sregs->efer & EFER_LMA || sregs->cs.l) ++ return -EINVAL; ++ } ++ ++ return kvm_valid_cr4(vcpu, sregs->cr4); ++} ++ ++static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) ++{ ++ struct msr_data apic_base_msr; ++ int mmu_reset_needed = 0; ++ int cpuid_update_needed = 0; ++ int pending_vec, max_bits, idx; ++ struct desc_ptr dt; ++ int ret = -EINVAL; ++ ++ if (kvm_valid_sregs(vcpu, sregs)) ++ goto out; ++ ++ apic_base_msr.data = sregs->apic_base; ++ apic_base_msr.host_initiated = true; ++ if (kvm_set_apic_base(vcpu, &apic_base_msr)) ++ goto out; ++ ++ dt.size = sregs->idt.limit; ++ dt.address = sregs->idt.base; ++ kvm_x86_ops->set_idt(vcpu, &dt); ++ dt.size = sregs->gdt.limit; ++ dt.address = sregs->gdt.base; ++ kvm_x86_ops->set_gdt(vcpu, &dt); ++ ++ vcpu->arch.cr2 = sregs->cr2; ++ mmu_reset_needed |= kvm_read_cr3(vcpu) != sregs->cr3; ++ vcpu->arch.cr3 = sregs->cr3; ++ __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); ++ ++ kvm_set_cr8(vcpu, sregs->cr8); ++ ++ mmu_reset_needed |= vcpu->arch.efer != sregs->efer; ++ kvm_x86_ops->set_efer(vcpu, sregs->efer); ++ ++ mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0; ++ kvm_x86_ops->set_cr0(vcpu, sregs->cr0); ++ vcpu->arch.cr0 = sregs->cr0; ++ ++ mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4; ++ cpuid_update_needed |= ((kvm_read_cr4(vcpu) ^ sregs->cr4) & ++ (X86_CR4_OSXSAVE | X86_CR4_PKE)); ++ kvm_x86_ops->set_cr4(vcpu, sregs->cr4); ++ if (cpuid_update_needed) ++ kvm_update_cpuid(vcpu); ++ ++ idx = srcu_read_lock(&vcpu->kvm->srcu); ++ if (is_pae_paging(vcpu)) { ++ load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu)); ++ mmu_reset_needed = 1; ++ } ++ srcu_read_unlock(&vcpu->kvm->srcu, idx); ++ ++ if (mmu_reset_needed) ++ kvm_mmu_reset_context(vcpu); ++ ++ max_bits = KVM_NR_INTERRUPTS; ++ pending_vec = find_first_bit( ++ (const unsigned long *)sregs->interrupt_bitmap, max_bits); ++ if (pending_vec < max_bits) { ++ kvm_queue_interrupt(vcpu, pending_vec, false); ++ pr_debug("Set back pending irq %d\n", pending_vec); ++ } ++ ++ kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS); ++ kvm_set_segment(vcpu, &sregs->ds, VCPU_SREG_DS); ++ kvm_set_segment(vcpu, &sregs->es, VCPU_SREG_ES); ++ kvm_set_segment(vcpu, &sregs->fs, VCPU_SREG_FS); ++ kvm_set_segment(vcpu, &sregs->gs, VCPU_SREG_GS); ++ kvm_set_segment(vcpu, &sregs->ss, VCPU_SREG_SS); ++ ++ kvm_set_segment(vcpu, &sregs->tr, VCPU_SREG_TR); ++ kvm_set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR); ++ ++ update_cr8_intercept(vcpu); ++ ++ /* Older userspace won't unhalt the vcpu on reset. */ ++ if (kvm_vcpu_is_bsp(vcpu) && kvm_rip_read(vcpu) == 0xfff0 && ++ sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 && ++ !is_protmode(vcpu)) ++ vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; ++ ++ kvm_make_request(KVM_REQ_EVENT, vcpu); ++ ++ ret = 0; ++out: ++ return ret; ++} ++ ++int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, ++ struct kvm_sregs *sregs) ++{ ++ int ret; ++ ++ vcpu_load(vcpu); ++ ret = __set_sregs(vcpu, sregs); ++ vcpu_put(vcpu); ++ return ret; ++} ++ ++int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, ++ struct kvm_guest_debug *dbg) ++{ ++ unsigned long rflags; ++ int i, r; ++ ++ vcpu_load(vcpu); ++ ++ if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) { ++ r = -EBUSY; ++ if (vcpu->arch.exception.pending) ++ goto out; ++ if (dbg->control & KVM_GUESTDBG_INJECT_DB) ++ kvm_queue_exception(vcpu, DB_VECTOR); ++ else ++ kvm_queue_exception(vcpu, BP_VECTOR); ++ } ++ ++ /* ++ * Read rflags as long as potentially injected trace flags are still ++ * filtered out. ++ */ ++ rflags = kvm_get_rflags(vcpu); ++ ++ vcpu->guest_debug = dbg->control; ++ if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE)) ++ vcpu->guest_debug = 0; ++ ++ if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) { ++ for (i = 0; i < KVM_NR_DB_REGS; ++i) ++ vcpu->arch.eff_db[i] = dbg->arch.debugreg[i]; ++ vcpu->arch.guest_debug_dr7 = dbg->arch.debugreg[7]; ++ } else { ++ for (i = 0; i < KVM_NR_DB_REGS; i++) ++ vcpu->arch.eff_db[i] = vcpu->arch.db[i]; ++ } ++ kvm_update_dr7(vcpu); ++ ++ if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) ++ vcpu->arch.singlestep_rip = kvm_rip_read(vcpu) + ++ get_segment_base(vcpu, VCPU_SREG_CS); ++ ++ /* ++ * Trigger an rflags update that will inject or remove the trace ++ * flags. ++ */ ++ kvm_set_rflags(vcpu, rflags); ++ ++ kvm_x86_ops->update_bp_intercept(vcpu); ++ ++ r = 0; ++ ++out: ++ vcpu_put(vcpu); ++ return r; ++} ++ ++/* ++ * Translate a guest virtual address to a guest physical address. ++ */ ++int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, ++ struct kvm_translation *tr) ++{ ++ unsigned long vaddr = tr->linear_address; ++ gpa_t gpa; ++ int idx; ++ ++ vcpu_load(vcpu); ++ ++ idx = srcu_read_lock(&vcpu->kvm->srcu); ++ gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL); ++ srcu_read_unlock(&vcpu->kvm->srcu, idx); ++ tr->physical_address = gpa; ++ tr->valid = gpa != UNMAPPED_GVA; ++ tr->writeable = 1; ++ tr->usermode = 0; ++ ++ vcpu_put(vcpu); ++ return 0; ++} ++ ++int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) ++{ ++ struct fxregs_state *fxsave; ++ ++ vcpu_load(vcpu); ++ ++ fxsave = &vcpu->arch.guest_fpu.state.fxsave; ++ memcpy(fpu->fpr, fxsave->st_space, 128); ++ fpu->fcw = fxsave->cwd; ++ fpu->fsw = fxsave->swd; ++ fpu->ftwx = fxsave->twd; ++ fpu->last_opcode = fxsave->fop; ++ fpu->last_ip = fxsave->rip; ++ fpu->last_dp = fxsave->rdp; ++ memcpy(fpu->xmm, fxsave->xmm_space, sizeof fxsave->xmm_space); ++ ++ vcpu_put(vcpu); ++ return 0; ++} ++ ++int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) ++{ ++ struct fxregs_state *fxsave; ++ ++ vcpu_load(vcpu); ++ ++ fxsave = &vcpu->arch.guest_fpu.state.fxsave; ++ ++ memcpy(fxsave->st_space, fpu->fpr, 128); ++ fxsave->cwd = fpu->fcw; ++ fxsave->swd = fpu->fsw; ++ fxsave->twd = fpu->ftwx; ++ fxsave->fop = fpu->last_opcode; ++ fxsave->rip = fpu->last_ip; ++ fxsave->rdp = fpu->last_dp; ++ memcpy(fxsave->xmm_space, fpu->xmm, sizeof fxsave->xmm_space); ++ ++ vcpu_put(vcpu); ++ return 0; ++} ++ ++static void store_regs(struct kvm_vcpu *vcpu) ++{ ++ BUILD_BUG_ON(sizeof(struct kvm_sync_regs) > SYNC_REGS_SIZE_BYTES); ++ ++ if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_REGS) ++ __get_regs(vcpu, &vcpu->run->s.regs.regs); ++ ++ if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_SREGS) ++ __get_sregs(vcpu, &vcpu->run->s.regs.sregs); ++ ++ if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_EVENTS) ++ kvm_vcpu_ioctl_x86_get_vcpu_events( ++ vcpu, &vcpu->run->s.regs.events); ++} ++ ++static int sync_regs(struct kvm_vcpu *vcpu) ++{ ++ if (vcpu->run->kvm_dirty_regs & ~KVM_SYNC_X86_VALID_FIELDS) ++ return -EINVAL; ++ ++ if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_REGS) { ++ __set_regs(vcpu, &vcpu->run->s.regs.regs); ++ vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_REGS; ++ } ++ if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_SREGS) { ++ if (__set_sregs(vcpu, &vcpu->run->s.regs.sregs)) ++ return -EINVAL; ++ vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_SREGS; ++ } ++ if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_EVENTS) { ++ if (kvm_vcpu_ioctl_x86_set_vcpu_events( ++ vcpu, &vcpu->run->s.regs.events)) ++ return -EINVAL; ++ vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_EVENTS; ++ } ++ ++ return 0; ++} ++ ++static void fx_init(struct kvm_vcpu *vcpu) ++{ ++ fpstate_init(&vcpu->arch.guest_fpu.state); ++ if (boot_cpu_has(X86_FEATURE_XSAVES)) ++ vcpu->arch.guest_fpu.state.xsave.header.xcomp_bv = ++ host_xcr0 | XSTATE_COMPACTION_ENABLED; ++ ++ /* ++ * Ensure guest xcr0 is valid for loading ++ */ ++ vcpu->arch.xcr0 = XFEATURE_MASK_FP; ++ ++ vcpu->arch.cr0 |= X86_CR0_ET; ++} ++ ++void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) ++{ ++ void *wbinvd_dirty_mask = vcpu->arch.wbinvd_dirty_mask; ++ ++ kvmclock_reset(vcpu); ++ ++ kvm_x86_ops->vcpu_free(vcpu); ++ free_cpumask_var(wbinvd_dirty_mask); ++} ++ ++struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, ++ unsigned int id) ++{ ++ struct kvm_vcpu *vcpu; ++ ++ if (kvm_check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0) ++ printk_once(KERN_WARNING ++ "kvm: SMP vm created on host with unstable TSC; " ++ "guest TSC will not be reliable\n"); ++ ++ vcpu = kvm_x86_ops->vcpu_create(kvm, id); ++ ++ return vcpu; ++} ++ ++int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) ++{ ++ vcpu->arch.arch_capabilities = kvm_get_arch_capabilities(); ++ kvm_vcpu_mtrr_init(vcpu); ++ vcpu_load(vcpu); ++ kvm_vcpu_reset(vcpu, false); ++ kvm_mmu_setup(vcpu); ++ vcpu_put(vcpu); ++ return 0; ++} ++ ++void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) ++{ ++ struct msr_data msr; ++ struct kvm *kvm = vcpu->kvm; ++ ++ kvm_hv_vcpu_postcreate(vcpu); ++ ++ if (mutex_lock_killable(&vcpu->mutex)) ++ return; ++ vcpu_load(vcpu); ++ msr.data = 0x0; ++ msr.index = MSR_IA32_TSC; ++ msr.host_initiated = true; ++ kvm_write_tsc(vcpu, &msr); ++ vcpu_put(vcpu); ++ mutex_unlock(&vcpu->mutex); ++ ++ if (!kvmclock_periodic_sync) ++ return; ++ ++ schedule_delayed_work(&kvm->arch.kvmclock_sync_work, ++ KVMCLOCK_SYNC_PERIOD); ++} ++ ++void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) ++{ ++ vcpu->arch.apf.msr_val = 0; ++ ++ vcpu_load(vcpu); ++ kvm_mmu_unload(vcpu); ++ vcpu_put(vcpu); ++ ++ kvm_x86_ops->vcpu_free(vcpu); ++} ++ ++void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) ++{ ++ kvm_lapic_reset(vcpu, init_event); ++ ++ vcpu->arch.hflags = 0; ++ ++ vcpu->arch.smi_pending = 0; ++ vcpu->arch.smi_count = 0; ++ atomic_set(&vcpu->arch.nmi_queued, 0); ++ vcpu->arch.nmi_pending = 0; ++ vcpu->arch.nmi_injected = false; ++ kvm_clear_interrupt_queue(vcpu); ++ kvm_clear_exception_queue(vcpu); ++ vcpu->arch.exception.pending = false; ++ ++ memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db)); ++ kvm_update_dr0123(vcpu); ++ vcpu->arch.dr6 = DR6_INIT; ++ kvm_update_dr6(vcpu); ++ vcpu->arch.dr7 = DR7_FIXED_1; ++ kvm_update_dr7(vcpu); ++ ++ vcpu->arch.cr2 = 0; ++ ++ kvm_make_request(KVM_REQ_EVENT, vcpu); ++ vcpu->arch.apf.msr_val = 0; ++ vcpu->arch.st.msr_val = 0; ++ ++ kvmclock_reset(vcpu); ++ ++ kvm_clear_async_pf_completion_queue(vcpu); ++ kvm_async_pf_hash_reset(vcpu); ++ vcpu->arch.apf.halted = false; ++ ++ if (kvm_mpx_supported()) { ++ void *mpx_state_buffer; ++ ++ /* ++ * To avoid have the INIT path from kvm_apic_has_events() that be ++ * called with loaded FPU and does not let userspace fix the state. ++ */ ++ if (init_event) ++ kvm_put_guest_fpu(vcpu); ++ mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu.state.xsave, ++ XFEATURE_MASK_BNDREGS); ++ if (mpx_state_buffer) ++ memset(mpx_state_buffer, 0, sizeof(struct mpx_bndreg_state)); ++ mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu.state.xsave, ++ XFEATURE_MASK_BNDCSR); ++ if (mpx_state_buffer) ++ memset(mpx_state_buffer, 0, sizeof(struct mpx_bndcsr)); ++ if (init_event) ++ kvm_load_guest_fpu(vcpu); ++ } ++ ++ if (!init_event) { ++ kvm_pmu_reset(vcpu); ++ vcpu->arch.smbase = 0x30000; ++ ++ vcpu->arch.msr_platform_info = MSR_PLATFORM_INFO_CPUID_FAULT; ++ vcpu->arch.msr_misc_features_enables = 0; ++ ++ vcpu->arch.xcr0 = XFEATURE_MASK_FP; ++ } ++ ++ memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs)); ++ vcpu->arch.regs_avail = ~0; ++ vcpu->arch.regs_dirty = ~0; ++ ++ vcpu->arch.ia32_xss = 0; ++ ++ kvm_x86_ops->vcpu_reset(vcpu, init_event); ++} ++ ++void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector) ++{ ++ struct kvm_segment cs; ++ ++ kvm_get_segment(vcpu, &cs, VCPU_SREG_CS); ++ cs.selector = vector << 8; ++ cs.base = vector << 12; ++ kvm_set_segment(vcpu, &cs, VCPU_SREG_CS); ++ kvm_rip_write(vcpu, 0); ++} ++ ++int kvm_arch_hardware_enable(void) ++{ ++ struct kvm *kvm; ++ struct kvm_vcpu *vcpu; ++ int i; ++ int ret; ++ u64 local_tsc; ++ u64 max_tsc = 0; ++ bool stable, backwards_tsc = false; ++ ++ kvm_shared_msr_cpu_online(); ++ ret = kvm_x86_ops->hardware_enable(); ++ if (ret != 0) ++ return ret; ++ ++ local_tsc = rdtsc(); ++ stable = !kvm_check_tsc_unstable(); ++ list_for_each_entry(kvm, &vm_list, vm_list) { ++ kvm_for_each_vcpu(i, vcpu, kvm) { ++ if (!stable && vcpu->cpu == smp_processor_id()) ++ kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); ++ if (stable && vcpu->arch.last_host_tsc > local_tsc) { ++ backwards_tsc = true; ++ if (vcpu->arch.last_host_tsc > max_tsc) ++ max_tsc = vcpu->arch.last_host_tsc; ++ } ++ } ++ } ++ ++ /* ++ * Sometimes, even reliable TSCs go backwards. This happens on ++ * platforms that reset TSC during suspend or hibernate actions, but ++ * maintain synchronization. We must compensate. Fortunately, we can ++ * detect that condition here, which happens early in CPU bringup, ++ * before any KVM threads can be running. Unfortunately, we can't ++ * bring the TSCs fully up to date with real time, as we aren't yet far ++ * enough into CPU bringup that we know how much real time has actually ++ * elapsed; our helper function, ktime_get_boot_ns() will be using boot ++ * variables that haven't been updated yet. ++ * ++ * So we simply find the maximum observed TSC above, then record the ++ * adjustment to TSC in each VCPU. When the VCPU later gets loaded, ++ * the adjustment will be applied. Note that we accumulate ++ * adjustments, in case multiple suspend cycles happen before some VCPU ++ * gets a chance to run again. In the event that no KVM threads get a ++ * chance to run, we will miss the entire elapsed period, as we'll have ++ * reset last_host_tsc, so VCPUs will not have the TSC adjusted and may ++ * loose cycle time. This isn't too big a deal, since the loss will be ++ * uniform across all VCPUs (not to mention the scenario is extremely ++ * unlikely). It is possible that a second hibernate recovery happens ++ * much faster than a first, causing the observed TSC here to be ++ * smaller; this would require additional padding adjustment, which is ++ * why we set last_host_tsc to the local tsc observed here. ++ * ++ * N.B. - this code below runs only on platforms with reliable TSC, ++ * as that is the only way backwards_tsc is set above. Also note ++ * that this runs for ALL vcpus, which is not a bug; all VCPUs should ++ * have the same delta_cyc adjustment applied if backwards_tsc ++ * is detected. Note further, this adjustment is only done once, ++ * as we reset last_host_tsc on all VCPUs to stop this from being ++ * called multiple times (one for each physical CPU bringup). ++ * ++ * Platforms with unreliable TSCs don't have to deal with this, they ++ * will be compensated by the logic in vcpu_load, which sets the TSC to ++ * catchup mode. This will catchup all VCPUs to real time, but cannot ++ * guarantee that they stay in perfect synchronization. ++ */ ++ if (backwards_tsc) { ++ u64 delta_cyc = max_tsc - local_tsc; ++ list_for_each_entry(kvm, &vm_list, vm_list) { ++ kvm->arch.backwards_tsc_observed = true; ++ kvm_for_each_vcpu(i, vcpu, kvm) { ++ vcpu->arch.tsc_offset_adjustment += delta_cyc; ++ vcpu->arch.last_host_tsc = local_tsc; ++ kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu); ++ } ++ ++ /* ++ * We have to disable TSC offset matching.. if you were ++ * booting a VM while issuing an S4 host suspend.... ++ * you may have some problem. Solving this issue is ++ * left as an exercise to the reader. ++ */ ++ kvm->arch.last_tsc_nsec = 0; ++ kvm->arch.last_tsc_write = 0; ++ } ++ ++ } ++ return 0; ++} ++ ++void kvm_arch_hardware_disable(void) ++{ ++ kvm_x86_ops->hardware_disable(); ++ drop_user_return_notifiers(); ++} ++ ++int kvm_arch_hardware_setup(void) ++{ ++ int r; ++ ++ r = kvm_x86_ops->hardware_setup(); ++ if (r != 0) ++ return r; ++ ++ if (kvm_has_tsc_control) { ++ /* ++ * Make sure the user can only configure tsc_khz values that ++ * fit into a signed integer. ++ * A min value is not calculated because it will always ++ * be 1 on all machines. ++ */ ++ u64 max = min(0x7fffffffULL, ++ __scale_tsc(kvm_max_tsc_scaling_ratio, tsc_khz)); ++ kvm_max_guest_tsc_khz = max; ++ ++ kvm_default_tsc_scaling_ratio = 1ULL << kvm_tsc_scaling_ratio_frac_bits; ++ } ++ ++ kvm_init_msr_list(); ++ return 0; ++} ++ ++void kvm_arch_hardware_unsetup(void) ++{ ++ kvm_x86_ops->hardware_unsetup(); ++} ++ ++void kvm_arch_check_processor_compat(void *rtn) ++{ ++ kvm_x86_ops->check_processor_compatibility(rtn); ++} ++ ++bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu) ++{ ++ return vcpu->kvm->arch.bsp_vcpu_id == vcpu->vcpu_id; ++} ++EXPORT_SYMBOL_GPL(kvm_vcpu_is_reset_bsp); ++ ++bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu) ++{ ++ return (vcpu->arch.apic_base & MSR_IA32_APICBASE_BSP) != 0; ++} ++ ++struct static_key kvm_no_apic_vcpu __read_mostly; ++EXPORT_SYMBOL_GPL(kvm_no_apic_vcpu); ++ ++int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) ++{ ++ struct page *page; ++ int r; ++ ++ vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv(vcpu); ++ vcpu->arch.emulate_ctxt.ops = &emulate_ops; ++ if (!irqchip_in_kernel(vcpu->kvm) || kvm_vcpu_is_reset_bsp(vcpu)) ++ vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; ++ else ++ vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED; ++ ++ page = alloc_page(GFP_KERNEL | __GFP_ZERO); ++ if (!page) { ++ r = -ENOMEM; ++ goto fail; ++ } ++ vcpu->arch.pio_data = page_address(page); ++ ++ kvm_set_tsc_khz(vcpu, max_tsc_khz); ++ ++ r = kvm_mmu_create(vcpu); ++ if (r < 0) ++ goto fail_free_pio_data; ++ ++ if (irqchip_in_kernel(vcpu->kvm)) { ++ r = kvm_create_lapic(vcpu); ++ if (r < 0) ++ goto fail_mmu_destroy; ++ } else ++ static_key_slow_inc(&kvm_no_apic_vcpu); ++ ++ vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4, ++ GFP_KERNEL); ++ if (!vcpu->arch.mce_banks) { ++ r = -ENOMEM; ++ goto fail_free_lapic; ++ } ++ vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS; ++ ++ if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL)) { ++ r = -ENOMEM; ++ goto fail_free_mce_banks; ++ } ++ ++ fx_init(vcpu); ++ ++ vcpu->arch.guest_xstate_size = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET; ++ ++ vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); ++ ++ vcpu->arch.pat = MSR_IA32_CR_PAT_DEFAULT; ++ ++ kvm_async_pf_hash_reset(vcpu); ++ kvm_pmu_init(vcpu); ++ ++ vcpu->arch.pending_external_vector = -1; ++ vcpu->arch.preempted_in_kernel = false; ++ ++ kvm_hv_vcpu_init(vcpu); ++ ++ return 0; ++ ++fail_free_mce_banks: ++ kfree(vcpu->arch.mce_banks); ++fail_free_lapic: ++ kvm_free_lapic(vcpu); ++fail_mmu_destroy: ++ kvm_mmu_destroy(vcpu); ++fail_free_pio_data: ++ free_page((unsigned long)vcpu->arch.pio_data); ++fail: ++ return r; ++} ++ ++void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) ++{ ++ int idx; ++ ++ kvm_hv_vcpu_uninit(vcpu); ++ kvm_pmu_destroy(vcpu); ++ kfree(vcpu->arch.mce_banks); ++ kvm_free_lapic(vcpu); ++ idx = srcu_read_lock(&vcpu->kvm->srcu); ++ kvm_mmu_destroy(vcpu); ++ srcu_read_unlock(&vcpu->kvm->srcu, idx); ++ free_page((unsigned long)vcpu->arch.pio_data); ++ if (!lapic_in_kernel(vcpu)) ++ static_key_slow_dec(&kvm_no_apic_vcpu); ++} ++ ++void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) ++{ ++ vcpu->arch.l1tf_flush_l1d = true; ++ kvm_x86_ops->sched_in(vcpu, cpu); ++} ++ ++int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) ++{ ++ if (type) ++ return -EINVAL; ++ ++ INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list); ++ INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); ++ INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages); ++ INIT_LIST_HEAD(&kvm->arch.lpage_disallowed_mmu_pages); ++ INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); ++ atomic_set(&kvm->arch.noncoherent_dma_count, 0); ++ ++ /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ ++ set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap); ++ /* Reserve bit 1 of irq_sources_bitmap for irqfd-resampler */ ++ set_bit(KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, ++ &kvm->arch.irq_sources_bitmap); ++ ++ raw_spin_lock_init(&kvm->arch.tsc_write_lock); ++ mutex_init(&kvm->arch.apic_map_lock); ++ spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock); ++ ++ kvm->arch.kvmclock_offset = -ktime_get_boot_ns(); ++ pvclock_update_vm_gtod_copy(kvm); ++ ++ kvm->arch.guest_can_read_msr_platform_info = true; ++ ++ INIT_DELAYED_WORK(&kvm->arch.kvmclock_update_work, kvmclock_update_fn); ++ INIT_DELAYED_WORK(&kvm->arch.kvmclock_sync_work, kvmclock_sync_fn); ++ ++ kvm_hv_init_vm(kvm); ++ kvm_page_track_init(kvm); ++ kvm_mmu_init_vm(kvm); ++ ++ if (kvm_x86_ops->vm_init) ++ return kvm_x86_ops->vm_init(kvm); ++ ++ return 0; ++} ++ ++int kvm_arch_post_init_vm(struct kvm *kvm) ++{ ++ return kvm_mmu_post_init_vm(kvm); ++} ++ ++static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu) ++{ ++ vcpu_load(vcpu); ++ kvm_mmu_unload(vcpu); ++ vcpu_put(vcpu); ++} ++ ++static void kvm_free_vcpus(struct kvm *kvm) ++{ ++ unsigned int i; ++ struct kvm_vcpu *vcpu; ++ ++ /* ++ * Unpin any mmu pages first. ++ */ ++ kvm_for_each_vcpu(i, vcpu, kvm) { ++ kvm_clear_async_pf_completion_queue(vcpu); ++ kvm_unload_vcpu_mmu(vcpu); ++ } ++ kvm_for_each_vcpu(i, vcpu, kvm) ++ kvm_arch_vcpu_free(vcpu); ++ ++ mutex_lock(&kvm->lock); ++ for (i = 0; i < atomic_read(&kvm->online_vcpus); i++) ++ kvm->vcpus[i] = NULL; ++ ++ atomic_set(&kvm->online_vcpus, 0); ++ mutex_unlock(&kvm->lock); ++} ++ ++void kvm_arch_sync_events(struct kvm *kvm) ++{ ++ cancel_delayed_work_sync(&kvm->arch.kvmclock_sync_work); ++ cancel_delayed_work_sync(&kvm->arch.kvmclock_update_work); ++ kvm_free_pit(kvm); ++} ++ ++int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size) ++{ ++ int i, r; ++ unsigned long hva; ++ struct kvm_memslots *slots = kvm_memslots(kvm); ++ struct kvm_memory_slot *slot, old; ++ ++ /* Called with kvm->slots_lock held. */ ++ if (WARN_ON(id >= KVM_MEM_SLOTS_NUM)) ++ return -EINVAL; ++ ++ slot = id_to_memslot(slots, id); ++ if (size) { ++ if (slot->npages) ++ return -EEXIST; ++ ++ /* ++ * MAP_SHARED to prevent internal slot pages from being moved ++ * by fork()/COW. ++ */ ++ hva = vm_mmap(NULL, 0, size, PROT_READ | PROT_WRITE, ++ MAP_SHARED | MAP_ANONYMOUS, 0); ++ if (IS_ERR((void *)hva)) ++ return PTR_ERR((void *)hva); ++ } else { ++ if (!slot->npages) ++ return 0; ++ ++ hva = 0; ++ } ++ ++ old = *slot; ++ for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { ++ struct kvm_userspace_memory_region m; ++ ++ m.slot = id | (i << 16); ++ m.flags = 0; ++ m.guest_phys_addr = gpa; ++ m.userspace_addr = hva; ++ m.memory_size = size; ++ r = __kvm_set_memory_region(kvm, &m); ++ if (r < 0) ++ return r; ++ } ++ ++ if (!size) ++ vm_munmap(old.userspace_addr, old.npages * PAGE_SIZE); ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(__x86_set_memory_region); ++ ++int x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size) ++{ ++ int r; ++ ++ mutex_lock(&kvm->slots_lock); ++ r = __x86_set_memory_region(kvm, id, gpa, size); ++ mutex_unlock(&kvm->slots_lock); ++ ++ return r; ++} ++EXPORT_SYMBOL_GPL(x86_set_memory_region); ++ ++void kvm_arch_pre_destroy_vm(struct kvm *kvm) ++{ ++ kvm_mmu_pre_destroy_vm(kvm); ++} ++ ++void kvm_arch_destroy_vm(struct kvm *kvm) ++{ ++ if (current->mm == kvm->mm) { ++ /* ++ * Free memory regions allocated on behalf of userspace, ++ * unless the the memory map has changed due to process exit ++ * or fd copying. ++ */ ++ x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT, 0, 0); ++ x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT, 0, 0); ++ x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, 0, 0); ++ } ++ if (kvm_x86_ops->vm_destroy) ++ kvm_x86_ops->vm_destroy(kvm); ++ kvm_pic_destroy(kvm); ++ kvm_ioapic_destroy(kvm); ++ kvm_free_vcpus(kvm); ++ kvfree(rcu_dereference_check(kvm->arch.apic_map, 1)); ++ kvm_mmu_uninit_vm(kvm); ++ kvm_page_track_cleanup(kvm); ++ kvm_hv_destroy_vm(kvm); ++} ++ ++void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, ++ struct kvm_memory_slot *dont) ++{ ++ int i; ++ ++ for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) { ++ if (!dont || free->arch.rmap[i] != dont->arch.rmap[i]) { ++ kvfree(free->arch.rmap[i]); ++ free->arch.rmap[i] = NULL; ++ } ++ if (i == 0) ++ continue; ++ ++ if (!dont || free->arch.lpage_info[i - 1] != ++ dont->arch.lpage_info[i - 1]) { ++ kvfree(free->arch.lpage_info[i - 1]); ++ free->arch.lpage_info[i - 1] = NULL; ++ } ++ } ++ ++ kvm_page_track_free_memslot(free, dont); ++} ++ ++int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, ++ unsigned long npages) ++{ ++ int i; ++ ++ for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) { ++ struct kvm_lpage_info *linfo; ++ unsigned long ugfn; ++ int lpages; ++ int level = i + 1; ++ ++ lpages = gfn_to_index(slot->base_gfn + npages - 1, ++ slot->base_gfn, level) + 1; ++ ++ slot->arch.rmap[i] = ++ kvcalloc(lpages, sizeof(*slot->arch.rmap[i]), ++ GFP_KERNEL); ++ if (!slot->arch.rmap[i]) ++ goto out_free; ++ if (i == 0) ++ continue; ++ ++ linfo = kvcalloc(lpages, sizeof(*linfo), GFP_KERNEL); ++ if (!linfo) ++ goto out_free; ++ ++ slot->arch.lpage_info[i - 1] = linfo; ++ ++ if (slot->base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1)) ++ linfo[0].disallow_lpage = 1; ++ if ((slot->base_gfn + npages) & (KVM_PAGES_PER_HPAGE(level) - 1)) ++ linfo[lpages - 1].disallow_lpage = 1; ++ ugfn = slot->userspace_addr >> PAGE_SHIFT; ++ /* ++ * If the gfn and userspace address are not aligned wrt each ++ * other, or if explicitly asked to, disable large page ++ * support for this slot ++ */ ++ if ((slot->base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE(level) - 1) || ++ !kvm_largepages_enabled()) { ++ unsigned long j; ++ ++ for (j = 0; j < lpages; ++j) ++ linfo[j].disallow_lpage = 1; ++ } ++ } ++ ++ if (kvm_page_track_create_memslot(slot, npages)) ++ goto out_free; ++ ++ return 0; ++ ++out_free: ++ for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) { ++ kvfree(slot->arch.rmap[i]); ++ slot->arch.rmap[i] = NULL; ++ if (i == 0) ++ continue; ++ ++ kvfree(slot->arch.lpage_info[i - 1]); ++ slot->arch.lpage_info[i - 1] = NULL; ++ } ++ return -ENOMEM; ++} ++ ++void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) ++{ ++ /* ++ * memslots->generation has been incremented. ++ * mmio generation may have reached its maximum value. ++ */ ++ kvm_mmu_invalidate_mmio_sptes(kvm, gen); ++} ++ ++int kvm_arch_prepare_memory_region(struct kvm *kvm, ++ struct kvm_memory_slot *memslot, ++ const struct kvm_userspace_memory_region *mem, ++ enum kvm_mr_change change) ++{ ++ return 0; ++} ++ ++static void kvm_mmu_slot_apply_flags(struct kvm *kvm, ++ struct kvm_memory_slot *new) ++{ ++ /* Still write protect RO slot */ ++ if (new->flags & KVM_MEM_READONLY) { ++ kvm_mmu_slot_remove_write_access(kvm, new); ++ return; ++ } ++ ++ /* ++ * Call kvm_x86_ops dirty logging hooks when they are valid. ++ * ++ * kvm_x86_ops->slot_disable_log_dirty is called when: ++ * ++ * - KVM_MR_CREATE with dirty logging is disabled ++ * - KVM_MR_FLAGS_ONLY with dirty logging is disabled in new flag ++ * ++ * The reason is, in case of PML, we need to set D-bit for any slots ++ * with dirty logging disabled in order to eliminate unnecessary GPA ++ * logging in PML buffer (and potential PML buffer full VMEXT). This ++ * guarantees leaving PML enabled during guest's lifetime won't have ++ * any additonal overhead from PML when guest is running with dirty ++ * logging disabled for memory slots. ++ * ++ * kvm_x86_ops->slot_enable_log_dirty is called when switching new slot ++ * to dirty logging mode. ++ * ++ * If kvm_x86_ops dirty logging hooks are invalid, use write protect. ++ * ++ * In case of write protect: ++ * ++ * Write protect all pages for dirty logging. ++ * ++ * All the sptes including the large sptes which point to this ++ * slot are set to readonly. We can not create any new large ++ * spte on this slot until the end of the logging. ++ * ++ * See the comments in fast_page_fault(). ++ */ ++ if (new->flags & KVM_MEM_LOG_DIRTY_PAGES) { ++ if (kvm_x86_ops->slot_enable_log_dirty) ++ kvm_x86_ops->slot_enable_log_dirty(kvm, new); ++ else ++ kvm_mmu_slot_remove_write_access(kvm, new); ++ } else { ++ if (kvm_x86_ops->slot_disable_log_dirty) ++ kvm_x86_ops->slot_disable_log_dirty(kvm, new); ++ } ++} ++ ++void kvm_arch_commit_memory_region(struct kvm *kvm, ++ const struct kvm_userspace_memory_region *mem, ++ const struct kvm_memory_slot *old, ++ const struct kvm_memory_slot *new, ++ enum kvm_mr_change change) ++{ ++ int nr_mmu_pages = 0; ++ ++ if (!kvm->arch.n_requested_mmu_pages) ++ nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm); ++ ++ if (nr_mmu_pages) ++ kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages); ++ ++ /* ++ * Dirty logging tracks sptes in 4k granularity, meaning that large ++ * sptes have to be split. If live migration is successful, the guest ++ * in the source machine will be destroyed and large sptes will be ++ * created in the destination. However, if the guest continues to run ++ * in the source machine (for example if live migration fails), small ++ * sptes will remain around and cause bad performance. ++ * ++ * Scan sptes if dirty logging has been stopped, dropping those ++ * which can be collapsed into a single large-page spte. Later ++ * page faults will create the large-page sptes. ++ */ ++ if ((change != KVM_MR_DELETE) && ++ (old->flags & KVM_MEM_LOG_DIRTY_PAGES) && ++ !(new->flags & KVM_MEM_LOG_DIRTY_PAGES)) ++ kvm_mmu_zap_collapsible_sptes(kvm, new); ++ ++ /* ++ * Set up write protection and/or dirty logging for the new slot. ++ * ++ * For KVM_MR_DELETE and KVM_MR_MOVE, the shadow pages of old slot have ++ * been zapped so no dirty logging staff is needed for old slot. For ++ * KVM_MR_FLAGS_ONLY, the old slot is essentially the same one as the ++ * new and it's also covered when dealing with the new slot. ++ * ++ * FIXME: const-ify all uses of struct kvm_memory_slot. ++ */ ++ if (change != KVM_MR_DELETE) ++ kvm_mmu_slot_apply_flags(kvm, (struct kvm_memory_slot *) new); ++} ++ ++void kvm_arch_flush_shadow_all(struct kvm *kvm) ++{ ++ kvm_mmu_invalidate_zap_all_pages(kvm); ++} ++ ++void kvm_arch_flush_shadow_memslot(struct kvm *kvm, ++ struct kvm_memory_slot *slot) ++{ ++ kvm_page_track_flush_slot(kvm, slot); ++} ++ ++static inline bool kvm_guest_apic_has_interrupt(struct kvm_vcpu *vcpu) ++{ ++ return (is_guest_mode(vcpu) && ++ kvm_x86_ops->guest_apic_has_interrupt && ++ kvm_x86_ops->guest_apic_has_interrupt(vcpu)); ++} ++ ++static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu) ++{ ++ if (!list_empty_careful(&vcpu->async_pf.done)) ++ return true; ++ ++ if (kvm_apic_has_events(vcpu)) ++ return true; ++ ++ if (vcpu->arch.pv.pv_unhalted) ++ return true; ++ ++ if (vcpu->arch.exception.pending) ++ return true; ++ ++ if (kvm_test_request(KVM_REQ_NMI, vcpu) || ++ (vcpu->arch.nmi_pending && ++ kvm_x86_ops->nmi_allowed(vcpu))) ++ return true; ++ ++ if (kvm_test_request(KVM_REQ_SMI, vcpu) || ++ (vcpu->arch.smi_pending && !is_smm(vcpu))) ++ return true; ++ ++ if (kvm_arch_interrupt_allowed(vcpu) && ++ (kvm_cpu_has_interrupt(vcpu) || ++ kvm_guest_apic_has_interrupt(vcpu))) ++ return true; ++ ++ if (kvm_hv_has_stimer_pending(vcpu)) ++ return true; ++ ++ return false; ++} ++ ++int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) ++{ ++ return kvm_vcpu_running(vcpu) || kvm_vcpu_has_events(vcpu); ++} ++ ++bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu) ++{ ++ if (READ_ONCE(vcpu->arch.pv.pv_unhalted)) ++ return true; ++ ++ if (kvm_test_request(KVM_REQ_NMI, vcpu) || ++ kvm_test_request(KVM_REQ_SMI, vcpu) || ++ kvm_test_request(KVM_REQ_EVENT, vcpu)) ++ return true; ++ ++ if (vcpu->arch.apicv_active && kvm_x86_ops->dy_apicv_has_pending_interrupt(vcpu)) ++ return true; ++ ++ return false; ++} ++ ++bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) ++{ ++ return vcpu->arch.preempted_in_kernel; ++} ++ ++int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) ++{ ++ return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE; ++} ++ ++int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu) ++{ ++ return kvm_x86_ops->interrupt_allowed(vcpu); ++} ++ ++unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu) ++{ ++ if (is_64_bit_mode(vcpu)) ++ return kvm_rip_read(vcpu); ++ return (u32)(get_segment_base(vcpu, VCPU_SREG_CS) + ++ kvm_rip_read(vcpu)); ++} ++EXPORT_SYMBOL_GPL(kvm_get_linear_rip); ++ ++bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip) ++{ ++ return kvm_get_linear_rip(vcpu) == linear_rip; ++} ++EXPORT_SYMBOL_GPL(kvm_is_linear_rip); ++ ++unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu) ++{ ++ unsigned long rflags; ++ ++ rflags = kvm_x86_ops->get_rflags(vcpu); ++ if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) ++ rflags &= ~X86_EFLAGS_TF; ++ return rflags; ++} ++EXPORT_SYMBOL_GPL(kvm_get_rflags); ++ ++static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) ++{ ++ if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP && ++ kvm_is_linear_rip(vcpu, vcpu->arch.singlestep_rip)) ++ rflags |= X86_EFLAGS_TF; ++ kvm_x86_ops->set_rflags(vcpu, rflags); ++} ++ ++void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) ++{ ++ __kvm_set_rflags(vcpu, rflags); ++ kvm_make_request(KVM_REQ_EVENT, vcpu); ++} ++EXPORT_SYMBOL_GPL(kvm_set_rflags); ++ ++void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work) ++{ ++ int r; ++ ++ if ((vcpu->arch.mmu.direct_map != work->arch.direct_map) || ++ work->wakeup_all) ++ return; ++ ++ r = kvm_mmu_reload(vcpu); ++ if (unlikely(r)) ++ return; ++ ++ if (!vcpu->arch.mmu.direct_map && ++ work->arch.cr3 != vcpu->arch.mmu.get_cr3(vcpu)) ++ return; ++ ++ vcpu->arch.mmu.page_fault(vcpu, work->gva, 0, true); ++} ++ ++static inline u32 kvm_async_pf_hash_fn(gfn_t gfn) ++{ ++ return hash_32(gfn & 0xffffffff, order_base_2(ASYNC_PF_PER_VCPU)); ++} ++ ++static inline u32 kvm_async_pf_next_probe(u32 key) ++{ ++ return (key + 1) & (roundup_pow_of_two(ASYNC_PF_PER_VCPU) - 1); ++} ++ ++static void kvm_add_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn) ++{ ++ u32 key = kvm_async_pf_hash_fn(gfn); ++ ++ while (vcpu->arch.apf.gfns[key] != ~0) ++ key = kvm_async_pf_next_probe(key); ++ ++ vcpu->arch.apf.gfns[key] = gfn; ++} ++ ++static u32 kvm_async_pf_gfn_slot(struct kvm_vcpu *vcpu, gfn_t gfn) ++{ ++ int i; ++ u32 key = kvm_async_pf_hash_fn(gfn); ++ ++ for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU) && ++ (vcpu->arch.apf.gfns[key] != gfn && ++ vcpu->arch.apf.gfns[key] != ~0); i++) ++ key = kvm_async_pf_next_probe(key); ++ ++ return key; ++} ++ ++bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn) ++{ ++ return vcpu->arch.apf.gfns[kvm_async_pf_gfn_slot(vcpu, gfn)] == gfn; ++} ++ ++static void kvm_del_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn) ++{ ++ u32 i, j, k; ++ ++ i = j = kvm_async_pf_gfn_slot(vcpu, gfn); ++ while (true) { ++ vcpu->arch.apf.gfns[i] = ~0; ++ do { ++ j = kvm_async_pf_next_probe(j); ++ if (vcpu->arch.apf.gfns[j] == ~0) ++ return; ++ k = kvm_async_pf_hash_fn(vcpu->arch.apf.gfns[j]); ++ /* ++ * k lies cyclically in ]i,j] ++ * | i.k.j | ++ * |....j i.k.| or |.k..j i...| ++ */ ++ } while ((i <= j) ? (i < k && k <= j) : (i < k || k <= j)); ++ vcpu->arch.apf.gfns[i] = vcpu->arch.apf.gfns[j]; ++ i = j; ++ } ++} ++ ++static int apf_put_user(struct kvm_vcpu *vcpu, u32 val) ++{ ++ ++ return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, &val, ++ sizeof(val)); ++} ++ ++static int apf_get_user(struct kvm_vcpu *vcpu, u32 *val) ++{ ++ ++ return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, val, ++ sizeof(u32)); ++} ++ ++void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, ++ struct kvm_async_pf *work) ++{ ++ struct x86_exception fault; ++ ++ trace_kvm_async_pf_not_present(work->arch.token, work->gva); ++ kvm_add_async_pf_gfn(vcpu, work->arch.gfn); ++ ++ if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) || ++ (vcpu->arch.apf.send_user_only && ++ kvm_x86_ops->get_cpl(vcpu) == 0)) ++ kvm_make_request(KVM_REQ_APF_HALT, vcpu); ++ else if (!apf_put_user(vcpu, KVM_PV_REASON_PAGE_NOT_PRESENT)) { ++ fault.vector = PF_VECTOR; ++ fault.error_code_valid = true; ++ fault.error_code = 0; ++ fault.nested_page_fault = false; ++ fault.address = work->arch.token; ++ fault.async_page_fault = true; ++ kvm_inject_page_fault(vcpu, &fault); ++ } ++} ++ ++void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, ++ struct kvm_async_pf *work) ++{ ++ struct x86_exception fault; ++ u32 val; ++ ++ if (work->wakeup_all) ++ work->arch.token = ~0; /* broadcast wakeup */ ++ else ++ kvm_del_async_pf_gfn(vcpu, work->arch.gfn); ++ trace_kvm_async_pf_ready(work->arch.token, work->gva); ++ ++ if (vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED && ++ !apf_get_user(vcpu, &val)) { ++ if (val == KVM_PV_REASON_PAGE_NOT_PRESENT && ++ vcpu->arch.exception.pending && ++ vcpu->arch.exception.nr == PF_VECTOR && ++ !apf_put_user(vcpu, 0)) { ++ vcpu->arch.exception.injected = false; ++ vcpu->arch.exception.pending = false; ++ vcpu->arch.exception.nr = 0; ++ vcpu->arch.exception.has_error_code = false; ++ vcpu->arch.exception.error_code = 0; ++ } else if (!apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) { ++ fault.vector = PF_VECTOR; ++ fault.error_code_valid = true; ++ fault.error_code = 0; ++ fault.nested_page_fault = false; ++ fault.address = work->arch.token; ++ fault.async_page_fault = true; ++ kvm_inject_page_fault(vcpu, &fault); ++ } ++ } ++ vcpu->arch.apf.halted = false; ++ vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; ++} ++ ++bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu) ++{ ++ if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED)) ++ return true; ++ else ++ return kvm_can_do_async_pf(vcpu); ++} ++ ++void kvm_arch_start_assignment(struct kvm *kvm) ++{ ++ atomic_inc(&kvm->arch.assigned_device_count); ++} ++EXPORT_SYMBOL_GPL(kvm_arch_start_assignment); ++ ++void kvm_arch_end_assignment(struct kvm *kvm) ++{ ++ atomic_dec(&kvm->arch.assigned_device_count); ++} ++EXPORT_SYMBOL_GPL(kvm_arch_end_assignment); ++ ++bool kvm_arch_has_assigned_device(struct kvm *kvm) ++{ ++ return atomic_read(&kvm->arch.assigned_device_count); ++} ++EXPORT_SYMBOL_GPL(kvm_arch_has_assigned_device); ++ ++void kvm_arch_register_noncoherent_dma(struct kvm *kvm) ++{ ++ atomic_inc(&kvm->arch.noncoherent_dma_count); ++} ++EXPORT_SYMBOL_GPL(kvm_arch_register_noncoherent_dma); ++ ++void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm) ++{ ++ atomic_dec(&kvm->arch.noncoherent_dma_count); ++} ++EXPORT_SYMBOL_GPL(kvm_arch_unregister_noncoherent_dma); ++ ++bool kvm_arch_has_noncoherent_dma(struct kvm *kvm) ++{ ++ return atomic_read(&kvm->arch.noncoherent_dma_count); ++} ++EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma); ++ ++bool kvm_arch_has_irq_bypass(void) ++{ ++ return kvm_x86_ops->update_pi_irte != NULL; ++} ++ ++int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons, ++ struct irq_bypass_producer *prod) ++{ ++ struct kvm_kernel_irqfd *irqfd = ++ container_of(cons, struct kvm_kernel_irqfd, consumer); ++ ++ irqfd->producer = prod; ++ ++ return kvm_x86_ops->update_pi_irte(irqfd->kvm, ++ prod->irq, irqfd->gsi, 1); ++} ++ ++void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons, ++ struct irq_bypass_producer *prod) ++{ ++ int ret; ++ struct kvm_kernel_irqfd *irqfd = ++ container_of(cons, struct kvm_kernel_irqfd, consumer); ++ ++ WARN_ON(irqfd->producer != prod); ++ irqfd->producer = NULL; ++ ++ /* ++ * When producer of consumer is unregistered, we change back to ++ * remapped mode, so we can re-use the current implementation ++ * when the irq is masked/disabled or the consumer side (KVM ++ * int this case doesn't want to receive the interrupts. ++ */ ++ ret = kvm_x86_ops->update_pi_irte(irqfd->kvm, prod->irq, irqfd->gsi, 0); ++ if (ret) ++ printk(KERN_INFO "irq bypass consumer (token %p) unregistration" ++ " fails: %d\n", irqfd->consumer.token, ret); ++} ++ ++int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq, ++ uint32_t guest_irq, bool set) ++{ ++ if (!kvm_x86_ops->update_pi_irte) ++ return -EINVAL; ++ ++ return kvm_x86_ops->update_pi_irte(kvm, host_irq, guest_irq, set); ++} ++ ++bool kvm_vector_hashing_enabled(void) ++{ ++ return vector_hashing; ++} ++EXPORT_SYMBOL_GPL(kvm_vector_hashing_enabled); ++ ++EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); ++EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio); ++EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq); ++EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault); ++EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr); ++EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr); ++EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun); ++EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit); ++EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject); ++EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit); ++EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga); ++EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit); ++EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts); ++EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset); ++EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window); ++EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pml_full); ++EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pi_irte_update); ++EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_unaccelerated_access); ++EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_incomplete_ipi); +diff -uprN kernel/arch/x86/lib/mmx_32.c kernel_new/arch/x86/lib/mmx_32.c +--- kernel/arch/x86/lib/mmx_32.c 2020-12-21 21:59:17.000000000 +0800 ++++ kernel_new/arch/x86/lib/mmx_32.c 2021-04-01 18:28:07.659863283 +0800 +@@ -31,7 +31,7 @@ void *_mmx_memcpy(void *to, const void * + void *p; + int i; + +- if (unlikely(in_interrupt())) ++ if (unlikely(!ipipe_root_p || in_interrupt())) + return __memcpy(to, from, len); + + p = to; +diff -uprN kernel/arch/x86/lib/usercopy.c kernel_new/arch/x86/lib/usercopy.c +--- kernel/arch/x86/lib/usercopy.c 2020-12-21 21:59:17.000000000 +0800 ++++ kernel_new/arch/x86/lib/usercopy.c 2021-04-01 18:28:07.659863283 +0800 +@@ -5,6 +5,7 @@ + */ + + #include ++#include + #include + + #include +@@ -18,7 +19,7 @@ copy_from_user_nmi(void *to, const void + { + unsigned long ret; + +- if (__range_not_ok(from, n, TASK_SIZE)) ++ if (!ipipe_root_p || __range_not_ok(from, n, TASK_SIZE)) + return n; + + if (!nmi_uaccess_okay()) +diff -uprN kernel/arch/x86/mm/fault.c kernel_new/arch/x86/mm/fault.c +--- kernel/arch/x86/mm/fault.c 2020-12-21 21:59:17.000000000 +0800 ++++ kernel_new/arch/x86/mm/fault.c 2021-04-01 18:28:07.659863283 +0800 +@@ -1235,6 +1235,12 @@ __do_page_fault(struct pt_regs *regs, un + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; + u32 pkey; + ++#ifdef CONFIG_IPIPE ++ if (ipipe_root_domain != ipipe_head_domain) { ++ trace_hardirqs_on(); ++ hard_local_irq_enable(); ++ } ++#endif + tsk = current; + mm = tsk->mm; + +@@ -1488,3 +1494,50 @@ do_page_fault(struct pt_regs *regs, unsi + exception_exit(prev_state); + } + NOKPROBE_SYMBOL(do_page_fault); ++ ++#ifdef CONFIG_IPIPE ++ ++void __ipipe_pin_mapping_globally(unsigned long start, unsigned long end) ++{ ++#ifdef CONFIG_X86_32 ++ unsigned long next, addr = start; ++ ++ do { ++ unsigned long flags; ++ struct page *page; ++ ++ next = pgd_addr_end(addr, end); ++ spin_lock_irqsave(&pgd_lock, flags); ++ list_for_each_entry(page, &pgd_list, lru) ++ vmalloc_sync_one(page_address(page), addr); ++ spin_unlock_irqrestore(&pgd_lock, flags); ++ ++ } while (addr = next, addr != end); ++#else ++ unsigned long next, addr = start; ++ pgd_t *pgd, *pgd_ref; ++ struct page *page; ++ ++ if (!(start >= VMALLOC_START && start < VMALLOC_END)) ++ return; ++ ++ do { ++ next = pgd_addr_end(addr, end); ++ pgd_ref = pgd_offset_k(addr); ++ if (pgd_none(*pgd_ref)) ++ continue; ++ spin_lock(&pgd_lock); ++ list_for_each_entry(page, &pgd_list, lru) { ++ pgd = page_address(page) + pgd_index(addr); ++ if (pgd_none(*pgd)) ++ set_pgd(pgd, *pgd_ref); ++ } ++ spin_unlock(&pgd_lock); ++ addr = next; ++ } while (addr != end); ++ ++ arch_flush_lazy_mmu_mode(); ++#endif ++} ++ ++#endif /* CONFIG_IPIPE */ +diff -uprN kernel/arch/x86/mm/fault.c.orig kernel_new/arch/x86/mm/fault.c.orig +--- kernel/arch/x86/mm/fault.c.orig 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/arch/x86/mm/fault.c.orig 2020-12-21 21:59:17.000000000 +0800 +@@ -0,0 +1,1490 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * Copyright (C) 1995 Linus Torvalds ++ * Copyright (C) 2001, 2002 Andi Kleen, SuSE Labs. ++ * Copyright (C) 2008-2009, Red Hat Inc., Ingo Molnar ++ */ ++#include /* test_thread_flag(), ... */ ++#include /* task_stack_*(), ... */ ++#include /* oops_begin/end, ... */ ++#include /* search_exception_tables */ ++#include /* max_low_pfn */ ++#include /* NOKPROBE_SYMBOL, ... */ ++#include /* kmmio_handler, ... */ ++#include /* perf_sw_event */ ++#include /* hstate_index_to_shift */ ++#include /* prefetchw */ ++#include /* exception_enter(), ... */ ++#include /* faulthandler_disabled() */ ++#include ++ ++#include /* boot_cpu_has, ... */ ++#include /* dotraplinkage, ... */ ++#include /* pgd_*(), ... */ ++#include /* VSYSCALL_ADDR */ ++#include /* emulate_vsyscall */ ++#include /* struct vm86 */ ++#include /* vma_pkey() */ ++ ++#define CREATE_TRACE_POINTS ++#include ++ ++/* ++ * Returns 0 if mmiotrace is disabled, or if the fault is not ++ * handled by mmiotrace: ++ */ ++static nokprobe_inline int ++kmmio_fault(struct pt_regs *regs, unsigned long addr) ++{ ++ if (unlikely(is_kmmio_active())) ++ if (kmmio_handler(regs, addr) == 1) ++ return -1; ++ return 0; ++} ++ ++static nokprobe_inline int kprobes_fault(struct pt_regs *regs) ++{ ++ int ret = 0; ++ ++ /* kprobe_running() needs smp_processor_id() */ ++ if (kprobes_built_in() && !user_mode(regs)) { ++ preempt_disable(); ++ if (kprobe_running() && kprobe_fault_handler(regs, 14)) ++ ret = 1; ++ preempt_enable(); ++ } ++ ++ return ret; ++} ++ ++/* ++ * Prefetch quirks: ++ * ++ * 32-bit mode: ++ * ++ * Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch. ++ * Check that here and ignore it. ++ * ++ * 64-bit mode: ++ * ++ * Sometimes the CPU reports invalid exceptions on prefetch. ++ * Check that here and ignore it. ++ * ++ * Opcode checker based on code by Richard Brunner. ++ */ ++static inline int ++check_prefetch_opcode(struct pt_regs *regs, unsigned char *instr, ++ unsigned char opcode, int *prefetch) ++{ ++ unsigned char instr_hi = opcode & 0xf0; ++ unsigned char instr_lo = opcode & 0x0f; ++ ++ switch (instr_hi) { ++ case 0x20: ++ case 0x30: ++ /* ++ * Values 0x26,0x2E,0x36,0x3E are valid x86 prefixes. ++ * In X86_64 long mode, the CPU will signal invalid ++ * opcode if some of these prefixes are present so ++ * X86_64 will never get here anyway ++ */ ++ return ((instr_lo & 7) == 0x6); ++#ifdef CONFIG_X86_64 ++ case 0x40: ++ /* ++ * In AMD64 long mode 0x40..0x4F are valid REX prefixes ++ * Need to figure out under what instruction mode the ++ * instruction was issued. Could check the LDT for lm, ++ * but for now it's good enough to assume that long ++ * mode only uses well known segments or kernel. ++ */ ++ return (!user_mode(regs) || user_64bit_mode(regs)); ++#endif ++ case 0x60: ++ /* 0x64 thru 0x67 are valid prefixes in all modes. */ ++ return (instr_lo & 0xC) == 0x4; ++ case 0xF0: ++ /* 0xF0, 0xF2, 0xF3 are valid prefixes in all modes. */ ++ return !instr_lo || (instr_lo>>1) == 1; ++ case 0x00: ++ /* Prefetch instruction is 0x0F0D or 0x0F18 */ ++ if (probe_kernel_address(instr, opcode)) ++ return 0; ++ ++ *prefetch = (instr_lo == 0xF) && ++ (opcode == 0x0D || opcode == 0x18); ++ return 0; ++ default: ++ return 0; ++ } ++} ++ ++static int ++is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr) ++{ ++ unsigned char *max_instr; ++ unsigned char *instr; ++ int prefetch = 0; ++ ++ /* ++ * If it was a exec (instruction fetch) fault on NX page, then ++ * do not ignore the fault: ++ */ ++ if (error_code & X86_PF_INSTR) ++ return 0; ++ ++ instr = (void *)convert_ip_to_linear(current, regs); ++ max_instr = instr + 15; ++ ++ if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE_MAX) ++ return 0; ++ ++ while (instr < max_instr) { ++ unsigned char opcode; ++ ++ if (probe_kernel_address(instr, opcode)) ++ break; ++ ++ instr++; ++ ++ if (!check_prefetch_opcode(regs, instr, opcode, &prefetch)) ++ break; ++ } ++ return prefetch; ++} ++ ++/* ++ * A protection key fault means that the PKRU value did not allow ++ * access to some PTE. Userspace can figure out what PKRU was ++ * from the XSAVE state, and this function fills out a field in ++ * siginfo so userspace can discover which protection key was set ++ * on the PTE. ++ * ++ * If we get here, we know that the hardware signaled a X86_PF_PK ++ * fault and that there was a VMA once we got in the fault ++ * handler. It does *not* guarantee that the VMA we find here ++ * was the one that we faulted on. ++ * ++ * 1. T1 : mprotect_key(foo, PAGE_SIZE, pkey=4); ++ * 2. T1 : set PKRU to deny access to pkey=4, touches page ++ * 3. T1 : faults... ++ * 4. T2: mprotect_key(foo, PAGE_SIZE, pkey=5); ++ * 5. T1 : enters fault handler, takes mmap_sem, etc... ++ * 6. T1 : reaches here, sees vma_pkey(vma)=5, when we really ++ * faulted on a pte with its pkey=4. ++ */ ++static void fill_sig_info_pkey(int si_signo, int si_code, siginfo_t *info, ++ u32 *pkey) ++{ ++ /* This is effectively an #ifdef */ ++ if (!boot_cpu_has(X86_FEATURE_OSPKE)) ++ return; ++ ++ /* Fault not from Protection Keys: nothing to do */ ++ if ((si_code != SEGV_PKUERR) || (si_signo != SIGSEGV)) ++ return; ++ /* ++ * force_sig_info_fault() is called from a number of ++ * contexts, some of which have a VMA and some of which ++ * do not. The X86_PF_PK handing happens after we have a ++ * valid VMA, so we should never reach this without a ++ * valid VMA. ++ */ ++ if (!pkey) { ++ WARN_ONCE(1, "PKU fault with no VMA passed in"); ++ info->si_pkey = 0; ++ return; ++ } ++ /* ++ * si_pkey should be thought of as a strong hint, but not ++ * absolutely guranteed to be 100% accurate because of ++ * the race explained above. ++ */ ++ info->si_pkey = *pkey; ++} ++ ++static void ++force_sig_info_fault(int si_signo, int si_code, unsigned long address, ++ struct task_struct *tsk, u32 *pkey, int fault) ++{ ++ unsigned lsb = 0; ++ siginfo_t info; ++ ++ clear_siginfo(&info); ++ info.si_signo = si_signo; ++ info.si_errno = 0; ++ info.si_code = si_code; ++ info.si_addr = (void __user *)address; ++ if (fault & VM_FAULT_HWPOISON_LARGE) ++ lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault)); ++ if (fault & VM_FAULT_HWPOISON) ++ lsb = PAGE_SHIFT; ++ info.si_addr_lsb = lsb; ++ ++ fill_sig_info_pkey(si_signo, si_code, &info, pkey); ++ ++ force_sig_info(si_signo, &info, tsk); ++} ++ ++DEFINE_SPINLOCK(pgd_lock); ++LIST_HEAD(pgd_list); ++ ++#ifdef CONFIG_X86_32 ++static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) ++{ ++ unsigned index = pgd_index(address); ++ pgd_t *pgd_k; ++ p4d_t *p4d, *p4d_k; ++ pud_t *pud, *pud_k; ++ pmd_t *pmd, *pmd_k; ++ ++ pgd += index; ++ pgd_k = init_mm.pgd + index; ++ ++ if (!pgd_present(*pgd_k)) ++ return NULL; ++ ++ /* ++ * set_pgd(pgd, *pgd_k); here would be useless on PAE ++ * and redundant with the set_pmd() on non-PAE. As would ++ * set_p4d/set_pud. ++ */ ++ p4d = p4d_offset(pgd, address); ++ p4d_k = p4d_offset(pgd_k, address); ++ if (!p4d_present(*p4d_k)) ++ return NULL; ++ ++ pud = pud_offset(p4d, address); ++ pud_k = pud_offset(p4d_k, address); ++ if (!pud_present(*pud_k)) ++ return NULL; ++ ++ pmd = pmd_offset(pud, address); ++ pmd_k = pmd_offset(pud_k, address); ++ ++ if (pmd_present(*pmd) != pmd_present(*pmd_k)) ++ set_pmd(pmd, *pmd_k); ++ ++ if (!pmd_present(*pmd_k)) ++ return NULL; ++ else ++ BUG_ON(pmd_pfn(*pmd) != pmd_pfn(*pmd_k)); ++ ++ return pmd_k; ++} ++ ++static void vmalloc_sync(void) ++{ ++ unsigned long address; ++ ++ if (SHARED_KERNEL_PMD) ++ return; ++ ++ for (address = VMALLOC_START & PMD_MASK; ++ address >= TASK_SIZE_MAX && address < VMALLOC_END; ++ address += PMD_SIZE) { ++ struct page *page; ++ ++ spin_lock(&pgd_lock); ++ list_for_each_entry(page, &pgd_list, lru) { ++ spinlock_t *pgt_lock; ++ ++ /* the pgt_lock only for Xen */ ++ pgt_lock = &pgd_page_get_mm(page)->page_table_lock; ++ ++ spin_lock(pgt_lock); ++ vmalloc_sync_one(page_address(page), address); ++ spin_unlock(pgt_lock); ++ } ++ spin_unlock(&pgd_lock); ++ } ++} ++ ++void vmalloc_sync_mappings(void) ++{ ++ vmalloc_sync(); ++} ++ ++void vmalloc_sync_unmappings(void) ++{ ++ vmalloc_sync(); ++} ++ ++/* ++ * 32-bit: ++ * ++ * Handle a fault on the vmalloc or module mapping area ++ */ ++static noinline int vmalloc_fault(unsigned long address) ++{ ++ unsigned long pgd_paddr; ++ pmd_t *pmd_k; ++ pte_t *pte_k; ++ ++ /* Make sure we are in vmalloc area: */ ++ if (!(address >= VMALLOC_START && address < VMALLOC_END)) ++ return -1; ++ ++ /* ++ * Synchronize this task's top level page-table ++ * with the 'reference' page table. ++ * ++ * Do _not_ use "current" here. We might be inside ++ * an interrupt in the middle of a task switch.. ++ */ ++ pgd_paddr = read_cr3_pa(); ++ pmd_k = vmalloc_sync_one(__va(pgd_paddr), address); ++ if (!pmd_k) ++ return -1; ++ ++ if (pmd_large(*pmd_k)) ++ return 0; ++ ++ pte_k = pte_offset_kernel(pmd_k, address); ++ if (!pte_present(*pte_k)) ++ return -1; ++ ++ return 0; ++} ++NOKPROBE_SYMBOL(vmalloc_fault); ++ ++/* ++ * Did it hit the DOS screen memory VA from vm86 mode? ++ */ ++static inline void ++check_v8086_mode(struct pt_regs *regs, unsigned long address, ++ struct task_struct *tsk) ++{ ++#ifdef CONFIG_VM86 ++ unsigned long bit; ++ ++ if (!v8086_mode(regs) || !tsk->thread.vm86) ++ return; ++ ++ bit = (address - 0xA0000) >> PAGE_SHIFT; ++ if (bit < 32) ++ tsk->thread.vm86->screen_bitmap |= 1 << bit; ++#endif ++} ++ ++static bool low_pfn(unsigned long pfn) ++{ ++ return pfn < max_low_pfn; ++} ++ ++static void dump_pagetable(unsigned long address) ++{ ++ pgd_t *base = __va(read_cr3_pa()); ++ pgd_t *pgd = &base[pgd_index(address)]; ++ p4d_t *p4d; ++ pud_t *pud; ++ pmd_t *pmd; ++ pte_t *pte; ++ ++#ifdef CONFIG_X86_PAE ++ pr_info("*pdpt = %016Lx ", pgd_val(*pgd)); ++ if (!low_pfn(pgd_val(*pgd) >> PAGE_SHIFT) || !pgd_present(*pgd)) ++ goto out; ++#define pr_pde pr_cont ++#else ++#define pr_pde pr_info ++#endif ++ p4d = p4d_offset(pgd, address); ++ pud = pud_offset(p4d, address); ++ pmd = pmd_offset(pud, address); ++ pr_pde("*pde = %0*Lx ", sizeof(*pmd) * 2, (u64)pmd_val(*pmd)); ++#undef pr_pde ++ ++ /* ++ * We must not directly access the pte in the highpte ++ * case if the page table is located in highmem. ++ * And let's rather not kmap-atomic the pte, just in case ++ * it's allocated already: ++ */ ++ if (!low_pfn(pmd_pfn(*pmd)) || !pmd_present(*pmd) || pmd_large(*pmd)) ++ goto out; ++ ++ pte = pte_offset_kernel(pmd, address); ++ pr_cont("*pte = %0*Lx ", sizeof(*pte) * 2, (u64)pte_val(*pte)); ++out: ++ pr_cont("\n"); ++} ++ ++#else /* CONFIG_X86_64: */ ++ ++void vmalloc_sync_mappings(void) ++{ ++ /* ++ * 64-bit mappings might allocate new p4d/pud pages ++ * that need to be propagated to all tasks' PGDs. ++ */ ++ sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END); ++} ++ ++void vmalloc_sync_unmappings(void) ++{ ++ /* ++ * Unmappings never allocate or free p4d/pud pages. ++ * No work is required here. ++ */ ++} ++ ++/* ++ * 64-bit: ++ * ++ * Handle a fault on the vmalloc area ++ */ ++static noinline int vmalloc_fault(unsigned long address) ++{ ++ pgd_t *pgd, *pgd_k; ++ p4d_t *p4d, *p4d_k; ++ pud_t *pud; ++ pmd_t *pmd; ++ pte_t *pte; ++ ++ /* Make sure we are in vmalloc area: */ ++ if (!(address >= VMALLOC_START && address < VMALLOC_END)) ++ return -1; ++ ++ /* ++ * Copy kernel mappings over when needed. This can also ++ * happen within a race in page table update. In the later ++ * case just flush: ++ */ ++ pgd = (pgd_t *)__va(read_cr3_pa()) + pgd_index(address); ++ pgd_k = pgd_offset_k(address); ++ if (pgd_none(*pgd_k)) ++ return -1; ++ ++ if (pgtable_l5_enabled()) { ++ if (pgd_none(*pgd)) { ++ set_pgd(pgd, *pgd_k); ++ arch_flush_lazy_mmu_mode(); ++ } else { ++ BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_k)); ++ } ++ } ++ ++ /* With 4-level paging, copying happens on the p4d level. */ ++ p4d = p4d_offset(pgd, address); ++ p4d_k = p4d_offset(pgd_k, address); ++ if (p4d_none(*p4d_k)) ++ return -1; ++ ++ if (p4d_none(*p4d) && !pgtable_l5_enabled()) { ++ set_p4d(p4d, *p4d_k); ++ arch_flush_lazy_mmu_mode(); ++ } else { ++ BUG_ON(p4d_pfn(*p4d) != p4d_pfn(*p4d_k)); ++ } ++ ++ BUILD_BUG_ON(CONFIG_PGTABLE_LEVELS < 4); ++ ++ pud = pud_offset(p4d, address); ++ if (pud_none(*pud)) ++ return -1; ++ ++ if (pud_large(*pud)) ++ return 0; ++ ++ pmd = pmd_offset(pud, address); ++ if (pmd_none(*pmd)) ++ return -1; ++ ++ if (pmd_large(*pmd)) ++ return 0; ++ ++ pte = pte_offset_kernel(pmd, address); ++ if (!pte_present(*pte)) ++ return -1; ++ ++ return 0; ++} ++NOKPROBE_SYMBOL(vmalloc_fault); ++ ++#ifdef CONFIG_CPU_SUP_AMD ++static const char errata93_warning[] = ++KERN_ERR ++"******* Your BIOS seems to not contain a fix for K8 errata #93\n" ++"******* Working around it, but it may cause SEGVs or burn power.\n" ++"******* Please consider a BIOS update.\n" ++"******* Disabling USB legacy in the BIOS may also help.\n"; ++#endif ++ ++/* ++ * No vm86 mode in 64-bit mode: ++ */ ++static inline void ++check_v8086_mode(struct pt_regs *regs, unsigned long address, ++ struct task_struct *tsk) ++{ ++} ++ ++static int bad_address(void *p) ++{ ++ unsigned long dummy; ++ ++ return probe_kernel_address((unsigned long *)p, dummy); ++} ++ ++static void dump_pagetable(unsigned long address) ++{ ++ pgd_t *base = __va(read_cr3_pa()); ++ pgd_t *pgd = base + pgd_index(address); ++ p4d_t *p4d; ++ pud_t *pud; ++ pmd_t *pmd; ++ pte_t *pte; ++ ++ if (bad_address(pgd)) ++ goto bad; ++ ++ pr_info("PGD %lx ", pgd_val(*pgd)); ++ ++ if (!pgd_present(*pgd)) ++ goto out; ++ ++ p4d = p4d_offset(pgd, address); ++ if (bad_address(p4d)) ++ goto bad; ++ ++ pr_cont("P4D %lx ", p4d_val(*p4d)); ++ if (!p4d_present(*p4d) || p4d_large(*p4d)) ++ goto out; ++ ++ pud = pud_offset(p4d, address); ++ if (bad_address(pud)) ++ goto bad; ++ ++ pr_cont("PUD %lx ", pud_val(*pud)); ++ if (!pud_present(*pud) || pud_large(*pud)) ++ goto out; ++ ++ pmd = pmd_offset(pud, address); ++ if (bad_address(pmd)) ++ goto bad; ++ ++ pr_cont("PMD %lx ", pmd_val(*pmd)); ++ if (!pmd_present(*pmd) || pmd_large(*pmd)) ++ goto out; ++ ++ pte = pte_offset_kernel(pmd, address); ++ if (bad_address(pte)) ++ goto bad; ++ ++ pr_cont("PTE %lx", pte_val(*pte)); ++out: ++ pr_cont("\n"); ++ return; ++bad: ++ pr_info("BAD\n"); ++} ++ ++#endif /* CONFIG_X86_64 */ ++ ++/* ++ * Workaround for K8 erratum #93 & buggy BIOS. ++ * ++ * BIOS SMM functions are required to use a specific workaround ++ * to avoid corruption of the 64bit RIP register on C stepping K8. ++ * ++ * A lot of BIOS that didn't get tested properly miss this. ++ * ++ * The OS sees this as a page fault with the upper 32bits of RIP cleared. ++ * Try to work around it here. ++ * ++ * Note we only handle faults in kernel here. ++ * Does nothing on 32-bit. ++ */ ++static int is_errata93(struct pt_regs *regs, unsigned long address) ++{ ++#if defined(CONFIG_X86_64) && defined(CONFIG_CPU_SUP_AMD) ++ if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD ++ || boot_cpu_data.x86 != 0xf) ++ return 0; ++ ++ if (address != regs->ip) ++ return 0; ++ ++ if ((address >> 32) != 0) ++ return 0; ++ ++ address |= 0xffffffffUL << 32; ++ if ((address >= (u64)_stext && address <= (u64)_etext) || ++ (address >= MODULES_VADDR && address <= MODULES_END)) { ++ printk_once(errata93_warning); ++ regs->ip = address; ++ return 1; ++ } ++#endif ++ return 0; ++} ++ ++/* ++ * Work around K8 erratum #100 K8 in compat mode occasionally jumps ++ * to illegal addresses >4GB. ++ * ++ * We catch this in the page fault handler because these addresses ++ * are not reachable. Just detect this case and return. Any code ++ * segment in LDT is compatibility mode. ++ */ ++static int is_errata100(struct pt_regs *regs, unsigned long address) ++{ ++#ifdef CONFIG_X86_64 ++ if ((regs->cs == __USER32_CS || (regs->cs & (1<<2))) && (address >> 32)) ++ return 1; ++#endif ++ return 0; ++} ++ ++static int is_f00f_bug(struct pt_regs *regs, unsigned long address) ++{ ++#ifdef CONFIG_X86_F00F_BUG ++ unsigned long nr; ++ ++ /* ++ * Pentium F0 0F C7 C8 bug workaround: ++ */ ++ if (boot_cpu_has_bug(X86_BUG_F00F)) { ++ nr = (address - idt_descr.address) >> 3; ++ ++ if (nr == 6) { ++ do_invalid_op(regs, 0); ++ return 1; ++ } ++ } ++#endif ++ return 0; ++} ++ ++static void ++show_fault_oops(struct pt_regs *regs, unsigned long error_code, ++ unsigned long address) ++{ ++ if (!oops_may_print()) ++ return; ++ ++ if (error_code & X86_PF_INSTR) { ++ unsigned int level; ++ pgd_t *pgd; ++ pte_t *pte; ++ ++ pgd = __va(read_cr3_pa()); ++ pgd += pgd_index(address); ++ ++ pte = lookup_address_in_pgd(pgd, address, &level); ++ ++ if (pte && pte_present(*pte) && !pte_exec(*pte)) ++ pr_crit("kernel tried to execute NX-protected page - exploit attempt? (uid: %d)\n", ++ from_kuid(&init_user_ns, current_uid())); ++ if (pte && pte_present(*pte) && pte_exec(*pte) && ++ (pgd_flags(*pgd) & _PAGE_USER) && ++ (__read_cr4() & X86_CR4_SMEP)) ++ pr_crit("unable to execute userspace code (SMEP?) (uid: %d)\n", ++ from_kuid(&init_user_ns, current_uid())); ++ } ++ ++ pr_alert("BUG: unable to handle kernel %s at %px\n", ++ address < PAGE_SIZE ? "NULL pointer dereference" : "paging request", ++ (void *)address); ++ ++ dump_pagetable(address); ++} ++ ++static noinline void ++pgtable_bad(struct pt_regs *regs, unsigned long error_code, ++ unsigned long address) ++{ ++ struct task_struct *tsk; ++ unsigned long flags; ++ int sig; ++ ++ flags = oops_begin(); ++ tsk = current; ++ sig = SIGKILL; ++ ++ printk(KERN_ALERT "%s: Corrupted page table at address %lx\n", ++ tsk->comm, address); ++ dump_pagetable(address); ++ ++ tsk->thread.cr2 = address; ++ tsk->thread.trap_nr = X86_TRAP_PF; ++ tsk->thread.error_code = error_code; ++ ++ if (__die("Bad pagetable", regs, error_code)) ++ sig = 0; ++ ++ oops_end(flags, regs, sig); ++} ++ ++static noinline void ++no_context(struct pt_regs *regs, unsigned long error_code, ++ unsigned long address, int signal, int si_code) ++{ ++ struct task_struct *tsk = current; ++ unsigned long flags; ++ int sig; ++ ++ /* Are we prepared to handle this kernel fault? */ ++ if (fixup_exception(regs, X86_TRAP_PF)) { ++ /* ++ * Any interrupt that takes a fault gets the fixup. This makes ++ * the below recursive fault logic only apply to a faults from ++ * task context. ++ */ ++ if (in_interrupt()) ++ return; ++ ++ /* ++ * Per the above we're !in_interrupt(), aka. task context. ++ * ++ * In this case we need to make sure we're not recursively ++ * faulting through the emulate_vsyscall() logic. ++ */ ++ if (current->thread.sig_on_uaccess_err && signal) { ++ tsk->thread.trap_nr = X86_TRAP_PF; ++ tsk->thread.error_code = error_code | X86_PF_USER; ++ tsk->thread.cr2 = address; ++ ++ /* XXX: hwpoison faults will set the wrong code. */ ++ force_sig_info_fault(signal, si_code, address, ++ tsk, NULL, 0); ++ } ++ ++ /* ++ * Barring that, we can do the fixup and be happy. ++ */ ++ return; ++ } ++ ++#ifdef CONFIG_VMAP_STACK ++ /* ++ * Stack overflow? During boot, we can fault near the initial ++ * stack in the direct map, but that's not an overflow -- check ++ * that we're in vmalloc space to avoid this. ++ */ ++ if (is_vmalloc_addr((void *)address) && ++ (((unsigned long)tsk->stack - 1 - address < PAGE_SIZE) || ++ address - ((unsigned long)tsk->stack + THREAD_SIZE) < PAGE_SIZE)) { ++ unsigned long stack = this_cpu_read(orig_ist.ist[DOUBLEFAULT_STACK]) - sizeof(void *); ++ /* ++ * We're likely to be running with very little stack space ++ * left. It's plausible that we'd hit this condition but ++ * double-fault even before we get this far, in which case ++ * we're fine: the double-fault handler will deal with it. ++ * ++ * We don't want to make it all the way into the oops code ++ * and then double-fault, though, because we're likely to ++ * break the console driver and lose most of the stack dump. ++ */ ++ asm volatile ("movq %[stack], %%rsp\n\t" ++ "call handle_stack_overflow\n\t" ++ "1: jmp 1b" ++ : ASM_CALL_CONSTRAINT ++ : "D" ("kernel stack overflow (page fault)"), ++ "S" (regs), "d" (address), ++ [stack] "rm" (stack)); ++ unreachable(); ++ } ++#endif ++ ++ /* ++ * 32-bit: ++ * ++ * Valid to do another page fault here, because if this fault ++ * had been triggered by is_prefetch fixup_exception would have ++ * handled it. ++ * ++ * 64-bit: ++ * ++ * Hall of shame of CPU/BIOS bugs. ++ */ ++ if (is_prefetch(regs, error_code, address)) ++ return; ++ ++ if (is_errata93(regs, address)) ++ return; ++ ++ /* ++ * Oops. The kernel tried to access some bad page. We'll have to ++ * terminate things with extreme prejudice: ++ */ ++ flags = oops_begin(); ++ ++ show_fault_oops(regs, error_code, address); ++ ++ if (task_stack_end_corrupted(tsk)) ++ printk(KERN_EMERG "Thread overran stack, or stack corrupted\n"); ++ ++ tsk->thread.cr2 = address; ++ tsk->thread.trap_nr = X86_TRAP_PF; ++ tsk->thread.error_code = error_code; ++ ++ sig = SIGKILL; ++ if (__die("Oops", regs, error_code)) ++ sig = 0; ++ ++ /* Executive summary in case the body of the oops scrolled away */ ++ printk(KERN_DEFAULT "CR2: %016lx\n", address); ++ ++ oops_end(flags, regs, sig); ++} ++ ++/* ++ * Print out info about fatal segfaults, if the show_unhandled_signals ++ * sysctl is set: ++ */ ++static inline void ++show_signal_msg(struct pt_regs *regs, unsigned long error_code, ++ unsigned long address, struct task_struct *tsk) ++{ ++ const char *loglvl = task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG; ++ ++ if (!unhandled_signal(tsk, SIGSEGV)) ++ return; ++ ++ if (!printk_ratelimit()) ++ return; ++ ++ printk("%s%s[%d]: segfault at %lx ip %px sp %px error %lx", ++ loglvl, tsk->comm, task_pid_nr(tsk), address, ++ (void *)regs->ip, (void *)regs->sp, error_code); ++ ++ print_vma_addr(KERN_CONT " in ", regs->ip); ++ ++ printk(KERN_CONT "\n"); ++ ++ show_opcodes(regs, loglvl); ++} ++ ++static void ++__bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, ++ unsigned long address, u32 *pkey, int si_code) ++{ ++ struct task_struct *tsk = current; ++ ++ /* User mode accesses just cause a SIGSEGV */ ++ if (error_code & X86_PF_USER) { ++ /* ++ * It's possible to have interrupts off here: ++ */ ++ local_irq_enable(); ++ ++ /* ++ * Valid to do another page fault here because this one came ++ * from user space: ++ */ ++ if (is_prefetch(regs, error_code, address)) ++ return; ++ ++ if (is_errata100(regs, address)) ++ return; ++ ++#ifdef CONFIG_X86_64 ++ /* ++ * Instruction fetch faults in the vsyscall page might need ++ * emulation. ++ */ ++ if (unlikely((error_code & X86_PF_INSTR) && ++ ((address & ~0xfff) == VSYSCALL_ADDR))) { ++ if (emulate_vsyscall(regs, address)) ++ return; ++ } ++#endif ++ ++ /* ++ * To avoid leaking information about the kernel page table ++ * layout, pretend that user-mode accesses to kernel addresses ++ * are always protection faults. ++ */ ++ if (address >= TASK_SIZE_MAX) ++ error_code |= X86_PF_PROT; ++ ++ if (likely(show_unhandled_signals)) ++ show_signal_msg(regs, error_code, address, tsk); ++ ++ tsk->thread.cr2 = address; ++ tsk->thread.error_code = error_code; ++ tsk->thread.trap_nr = X86_TRAP_PF; ++ ++ force_sig_info_fault(SIGSEGV, si_code, address, tsk, pkey, 0); ++ ++ return; ++ } ++ ++ if (is_f00f_bug(regs, address)) ++ return; ++ ++ no_context(regs, error_code, address, SIGSEGV, si_code); ++} ++ ++static noinline void ++bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, ++ unsigned long address, u32 *pkey) ++{ ++ __bad_area_nosemaphore(regs, error_code, address, pkey, SEGV_MAPERR); ++} ++ ++static void ++__bad_area(struct pt_regs *regs, unsigned long error_code, ++ unsigned long address, struct vm_area_struct *vma, int si_code) ++{ ++ struct mm_struct *mm = current->mm; ++ u32 pkey; ++ ++ if (vma) ++ pkey = vma_pkey(vma); ++ ++ /* ++ * Something tried to access memory that isn't in our memory map.. ++ * Fix it, but check if it's kernel or user first.. ++ */ ++ up_read(&mm->mmap_sem); ++ ++ __bad_area_nosemaphore(regs, error_code, address, ++ (vma) ? &pkey : NULL, si_code); ++} ++ ++static noinline void ++bad_area(struct pt_regs *regs, unsigned long error_code, unsigned long address) ++{ ++ __bad_area(regs, error_code, address, NULL, SEGV_MAPERR); ++} ++ ++static inline bool bad_area_access_from_pkeys(unsigned long error_code, ++ struct vm_area_struct *vma) ++{ ++ /* This code is always called on the current mm */ ++ bool foreign = false; ++ ++ if (!boot_cpu_has(X86_FEATURE_OSPKE)) ++ return false; ++ if (error_code & X86_PF_PK) ++ return true; ++ /* this checks permission keys on the VMA: */ ++ if (!arch_vma_access_permitted(vma, (error_code & X86_PF_WRITE), ++ (error_code & X86_PF_INSTR), foreign)) ++ return true; ++ return false; ++} ++ ++static noinline void ++bad_area_access_error(struct pt_regs *regs, unsigned long error_code, ++ unsigned long address, struct vm_area_struct *vma) ++{ ++ /* ++ * This OSPKE check is not strictly necessary at runtime. ++ * But, doing it this way allows compiler optimizations ++ * if pkeys are compiled out. ++ */ ++ if (bad_area_access_from_pkeys(error_code, vma)) ++ __bad_area(regs, error_code, address, vma, SEGV_PKUERR); ++ else ++ __bad_area(regs, error_code, address, vma, SEGV_ACCERR); ++} ++ ++static void ++do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address, ++ u32 *pkey, unsigned int fault) ++{ ++ struct task_struct *tsk = current; ++ int code = BUS_ADRERR; ++ ++ /* Kernel mode? Handle exceptions or die: */ ++ if (!(error_code & X86_PF_USER)) { ++ no_context(regs, error_code, address, SIGBUS, BUS_ADRERR); ++ return; ++ } ++ ++ /* User-space => ok to do another page fault: */ ++ if (is_prefetch(regs, error_code, address)) ++ return; ++ ++ tsk->thread.cr2 = address; ++ tsk->thread.error_code = error_code; ++ tsk->thread.trap_nr = X86_TRAP_PF; ++ ++#ifdef CONFIG_MEMORY_FAILURE ++ if (fault & (VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) { ++ printk(KERN_ERR ++ "MCE: Killing %s:%d due to hardware memory corruption fault at %lx\n", ++ tsk->comm, tsk->pid, address); ++ code = BUS_MCEERR_AR; ++ } ++#endif ++ force_sig_info_fault(SIGBUS, code, address, tsk, pkey, fault); ++} ++ ++static noinline void ++mm_fault_error(struct pt_regs *regs, unsigned long error_code, ++ unsigned long address, u32 *pkey, vm_fault_t fault) ++{ ++ if (fatal_signal_pending(current) && !(error_code & X86_PF_USER)) { ++ no_context(regs, error_code, address, 0, 0); ++ return; ++ } ++ ++ if (fault & VM_FAULT_OOM) { ++ /* Kernel mode? Handle exceptions or die: */ ++ if (!(error_code & X86_PF_USER)) { ++ no_context(regs, error_code, address, ++ SIGSEGV, SEGV_MAPERR); ++ return; ++ } ++ ++ /* ++ * We ran out of memory, call the OOM killer, and return the ++ * userspace (which will retry the fault, or kill us if we got ++ * oom-killed): ++ */ ++ pagefault_out_of_memory(); ++ } else { ++ if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON| ++ VM_FAULT_HWPOISON_LARGE)) ++ do_sigbus(regs, error_code, address, pkey, fault); ++ else if (fault & VM_FAULT_SIGSEGV) ++ bad_area_nosemaphore(regs, error_code, address, pkey); ++ else ++ BUG(); ++ } ++} ++ ++static int spurious_fault_check(unsigned long error_code, pte_t *pte) ++{ ++ if ((error_code & X86_PF_WRITE) && !pte_write(*pte)) ++ return 0; ++ ++ if ((error_code & X86_PF_INSTR) && !pte_exec(*pte)) ++ return 0; ++ /* ++ * Note: We do not do lazy flushing on protection key ++ * changes, so no spurious fault will ever set X86_PF_PK. ++ */ ++ if ((error_code & X86_PF_PK)) ++ return 1; ++ ++ return 1; ++} ++ ++/* ++ * Handle a spurious fault caused by a stale TLB entry. ++ * ++ * This allows us to lazily refresh the TLB when increasing the ++ * permissions of a kernel page (RO -> RW or NX -> X). Doing it ++ * eagerly is very expensive since that implies doing a full ++ * cross-processor TLB flush, even if no stale TLB entries exist ++ * on other processors. ++ * ++ * Spurious faults may only occur if the TLB contains an entry with ++ * fewer permission than the page table entry. Non-present (P = 0) ++ * and reserved bit (R = 1) faults are never spurious. ++ * ++ * There are no security implications to leaving a stale TLB when ++ * increasing the permissions on a page. ++ * ++ * Returns non-zero if a spurious fault was handled, zero otherwise. ++ * ++ * See Intel Developer's Manual Vol 3 Section 4.10.4.3, bullet 3 ++ * (Optional Invalidation). ++ */ ++static noinline int ++spurious_fault(unsigned long error_code, unsigned long address) ++{ ++ pgd_t *pgd; ++ p4d_t *p4d; ++ pud_t *pud; ++ pmd_t *pmd; ++ pte_t *pte; ++ int ret; ++ ++ /* ++ * Only writes to RO or instruction fetches from NX may cause ++ * spurious faults. ++ * ++ * These could be from user or supervisor accesses but the TLB ++ * is only lazily flushed after a kernel mapping protection ++ * change, so user accesses are not expected to cause spurious ++ * faults. ++ */ ++ if (error_code != (X86_PF_WRITE | X86_PF_PROT) && ++ error_code != (X86_PF_INSTR | X86_PF_PROT)) ++ return 0; ++ ++ pgd = init_mm.pgd + pgd_index(address); ++ if (!pgd_present(*pgd)) ++ return 0; ++ ++ p4d = p4d_offset(pgd, address); ++ if (!p4d_present(*p4d)) ++ return 0; ++ ++ if (p4d_large(*p4d)) ++ return spurious_fault_check(error_code, (pte_t *) p4d); ++ ++ pud = pud_offset(p4d, address); ++ if (!pud_present(*pud)) ++ return 0; ++ ++ if (pud_large(*pud)) ++ return spurious_fault_check(error_code, (pte_t *) pud); ++ ++ pmd = pmd_offset(pud, address); ++ if (!pmd_present(*pmd)) ++ return 0; ++ ++ if (pmd_large(*pmd)) ++ return spurious_fault_check(error_code, (pte_t *) pmd); ++ ++ pte = pte_offset_kernel(pmd, address); ++ if (!pte_present(*pte)) ++ return 0; ++ ++ ret = spurious_fault_check(error_code, pte); ++ if (!ret) ++ return 0; ++ ++ /* ++ * Make sure we have permissions in PMD. ++ * If not, then there's a bug in the page tables: ++ */ ++ ret = spurious_fault_check(error_code, (pte_t *) pmd); ++ WARN_ONCE(!ret, "PMD has incorrect permission bits\n"); ++ ++ return ret; ++} ++NOKPROBE_SYMBOL(spurious_fault); ++ ++int show_unhandled_signals = 1; ++ ++static inline int ++access_error(unsigned long error_code, struct vm_area_struct *vma) ++{ ++ /* This is only called for the current mm, so: */ ++ bool foreign = false; ++ ++ /* ++ * Read or write was blocked by protection keys. This is ++ * always an unconditional error and can never result in ++ * a follow-up action to resolve the fault, like a COW. ++ */ ++ if (error_code & X86_PF_PK) ++ return 1; ++ ++ /* ++ * Make sure to check the VMA so that we do not perform ++ * faults just to hit a X86_PF_PK as soon as we fill in a ++ * page. ++ */ ++ if (!arch_vma_access_permitted(vma, (error_code & X86_PF_WRITE), ++ (error_code & X86_PF_INSTR), foreign)) ++ return 1; ++ ++ if (error_code & X86_PF_WRITE) { ++ /* write, present and write, not present: */ ++ if (unlikely(!(vma->vm_flags & VM_WRITE))) ++ return 1; ++ return 0; ++ } ++ ++ /* read, present: */ ++ if (unlikely(error_code & X86_PF_PROT)) ++ return 1; ++ ++ /* read, not present: */ ++ if (unlikely(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))) ++ return 1; ++ ++ return 0; ++} ++ ++static int fault_in_kernel_space(unsigned long address) ++{ ++ return address >= TASK_SIZE_MAX; ++} ++ ++static inline bool smap_violation(int error_code, struct pt_regs *regs) ++{ ++ if (!IS_ENABLED(CONFIG_X86_SMAP)) ++ return false; ++ ++ if (!static_cpu_has(X86_FEATURE_SMAP)) ++ return false; ++ ++ if (error_code & X86_PF_USER) ++ return false; ++ ++ if (!user_mode(regs) && (regs->flags & X86_EFLAGS_AC)) ++ return false; ++ ++ return true; ++} ++ ++/* ++ * This routine handles page faults. It determines the address, ++ * and the problem, and then passes it off to one of the appropriate ++ * routines. ++ */ ++static noinline void ++__do_page_fault(struct pt_regs *regs, unsigned long error_code, ++ unsigned long address) ++{ ++ struct vm_area_struct *vma; ++ struct task_struct *tsk; ++ struct mm_struct *mm; ++ vm_fault_t fault, major = 0; ++ unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; ++ u32 pkey; ++ ++ tsk = current; ++ mm = tsk->mm; ++ ++ prefetchw(&mm->mmap_sem); ++ ++ if (unlikely(kmmio_fault(regs, address))) ++ return; ++ ++ /* ++ * We fault-in kernel-space virtual memory on-demand. The ++ * 'reference' page table is init_mm.pgd. ++ * ++ * NOTE! We MUST NOT take any locks for this case. We may ++ * be in an interrupt or a critical region, and should ++ * only copy the information from the master page table, ++ * nothing more. ++ * ++ * This verifies that the fault happens in kernel space ++ * (error_code & 4) == 0, and that the fault was not a ++ * protection error (error_code & 9) == 0. ++ */ ++ if (unlikely(fault_in_kernel_space(address))) { ++ if (!(error_code & (X86_PF_RSVD | X86_PF_USER | X86_PF_PROT))) { ++ if (vmalloc_fault(address) >= 0) ++ return; ++ } ++ ++ /* Can handle a stale RO->RW TLB: */ ++ if (spurious_fault(error_code, address)) ++ return; ++ ++ /* kprobes don't want to hook the spurious faults: */ ++ if (kprobes_fault(regs)) ++ return; ++ /* ++ * Don't take the mm semaphore here. If we fixup a prefetch ++ * fault we could otherwise deadlock: ++ */ ++ bad_area_nosemaphore(regs, error_code, address, NULL); ++ ++ return; ++ } ++ ++ /* kprobes don't want to hook the spurious faults: */ ++ if (unlikely(kprobes_fault(regs))) ++ return; ++ ++ if (unlikely(error_code & X86_PF_RSVD)) ++ pgtable_bad(regs, error_code, address); ++ ++ if (unlikely(smap_violation(error_code, regs))) { ++ bad_area_nosemaphore(regs, error_code, address, NULL); ++ return; ++ } ++ ++ /* ++ * If we're in an interrupt, have no user context or are running ++ * in a region with pagefaults disabled then we must not take the fault ++ */ ++ if (unlikely(faulthandler_disabled() || !mm)) { ++ bad_area_nosemaphore(regs, error_code, address, NULL); ++ return; ++ } ++ ++ /* ++ * It's safe to allow irq's after cr2 has been saved and the ++ * vmalloc fault has been handled. ++ * ++ * User-mode registers count as a user access even for any ++ * potential system fault or CPU buglet: ++ */ ++ if (user_mode(regs)) { ++ local_irq_enable(); ++ error_code |= X86_PF_USER; ++ flags |= FAULT_FLAG_USER; ++ } else { ++ if (regs->flags & X86_EFLAGS_IF) ++ local_irq_enable(); ++ } ++ ++ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); ++ ++ if (error_code & X86_PF_WRITE) ++ flags |= FAULT_FLAG_WRITE; ++ if (error_code & X86_PF_INSTR) ++ flags |= FAULT_FLAG_INSTRUCTION; ++ ++ /* ++ * When running in the kernel we expect faults to occur only to ++ * addresses in user space. All other faults represent errors in ++ * the kernel and should generate an OOPS. Unfortunately, in the ++ * case of an erroneous fault occurring in a code path which already ++ * holds mmap_sem we will deadlock attempting to validate the fault ++ * against the address space. Luckily the kernel only validly ++ * references user space from well defined areas of code, which are ++ * listed in the exceptions table. ++ * ++ * As the vast majority of faults will be valid we will only perform ++ * the source reference check when there is a possibility of a ++ * deadlock. Attempt to lock the address space, if we cannot we then ++ * validate the source. If this is invalid we can skip the address ++ * space check, thus avoiding the deadlock: ++ */ ++ if (unlikely(!down_read_trylock(&mm->mmap_sem))) { ++ if (!(error_code & X86_PF_USER) && ++ !search_exception_tables(regs->ip)) { ++ bad_area_nosemaphore(regs, error_code, address, NULL); ++ return; ++ } ++retry: ++ down_read(&mm->mmap_sem); ++ } else { ++ /* ++ * The above down_read_trylock() might have succeeded in ++ * which case we'll have missed the might_sleep() from ++ * down_read(): ++ */ ++ might_sleep(); ++ } ++ ++ vma = find_vma(mm, address); ++ if (unlikely(!vma)) { ++ bad_area(regs, error_code, address); ++ return; ++ } ++ if (likely(vma->vm_start <= address)) ++ goto good_area; ++ if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) { ++ bad_area(regs, error_code, address); ++ return; ++ } ++ if (error_code & X86_PF_USER) { ++ /* ++ * Accessing the stack below %sp is always a bug. ++ * The large cushion allows instructions like enter ++ * and pusha to work. ("enter $65535, $31" pushes ++ * 32 pointers and then decrements %sp by 65535.) ++ */ ++ if (unlikely(address + 65536 + 32 * sizeof(unsigned long) < regs->sp)) { ++ bad_area(regs, error_code, address); ++ return; ++ } ++ } ++ if (unlikely(expand_stack(vma, address))) { ++ bad_area(regs, error_code, address); ++ return; ++ } ++ ++ /* ++ * Ok, we have a good vm_area for this memory access, so ++ * we can handle it.. ++ */ ++good_area: ++ if (unlikely(access_error(error_code, vma))) { ++ bad_area_access_error(regs, error_code, address, vma); ++ return; ++ } ++ ++ /* ++ * If for any reason at all we couldn't handle the fault, ++ * make sure we exit gracefully rather than endlessly redo ++ * the fault. Since we never set FAULT_FLAG_RETRY_NOWAIT, if ++ * we get VM_FAULT_RETRY back, the mmap_sem has been unlocked. ++ * ++ * Note that handle_userfault() may also release and reacquire mmap_sem ++ * (and not return with VM_FAULT_RETRY), when returning to userland to ++ * repeat the page fault later with a VM_FAULT_NOPAGE retval ++ * (potentially after handling any pending signal during the return to ++ * userland). The return to userland is identified whenever ++ * FAULT_FLAG_USER|FAULT_FLAG_KILLABLE are both set in flags. ++ * Thus we have to be careful about not touching vma after handling the ++ * fault, so we read the pkey beforehand. ++ */ ++ pkey = vma_pkey(vma); ++ fault = handle_mm_fault(vma, address, flags); ++ major |= fault & VM_FAULT_MAJOR; ++ ++ /* ++ * If we need to retry the mmap_sem has already been released, ++ * and if there is a fatal signal pending there is no guarantee ++ * that we made any progress. Handle this case first. ++ */ ++ if (unlikely(fault & VM_FAULT_RETRY)) { ++ /* Retry at most once */ ++ if (flags & FAULT_FLAG_ALLOW_RETRY) { ++ flags &= ~FAULT_FLAG_ALLOW_RETRY; ++ flags |= FAULT_FLAG_TRIED; ++ if (!fatal_signal_pending(tsk)) ++ goto retry; ++ } ++ ++ /* User mode? Just return to handle the fatal exception */ ++ if (flags & FAULT_FLAG_USER) ++ return; ++ ++ /* Not returning to user mode? Handle exceptions or die: */ ++ no_context(regs, error_code, address, SIGBUS, BUS_ADRERR); ++ return; ++ } ++ ++ up_read(&mm->mmap_sem); ++ if (unlikely(fault & VM_FAULT_ERROR)) { ++ mm_fault_error(regs, error_code, address, &pkey, fault); ++ return; ++ } ++ ++ /* ++ * Major/minor page fault accounting. If any of the events ++ * returned VM_FAULT_MAJOR, we account it as a major fault. ++ */ ++ if (major) { ++ tsk->maj_flt++; ++ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address); ++ } else { ++ tsk->min_flt++; ++ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address); ++ } ++ ++ check_v8086_mode(regs, address, tsk); ++} ++NOKPROBE_SYMBOL(__do_page_fault); ++ ++static nokprobe_inline void ++trace_page_fault_entries(unsigned long address, struct pt_regs *regs, ++ unsigned long error_code) ++{ ++ if (user_mode(regs)) ++ trace_page_fault_user(address, regs, error_code); ++ else ++ trace_page_fault_kernel(address, regs, error_code); ++} ++ ++/* ++ * We must have this function blacklisted from kprobes, tagged with notrace ++ * and call read_cr2() before calling anything else. To avoid calling any ++ * kind of tracing machinery before we've observed the CR2 value. ++ * ++ * exception_{enter,exit}() contains all sorts of tracepoints. ++ */ ++dotraplinkage void notrace ++do_page_fault(struct pt_regs *regs, unsigned long error_code) ++{ ++ unsigned long address = read_cr2(); /* Get the faulting address */ ++ enum ctx_state prev_state; ++ ++ prev_state = exception_enter(); ++ if (trace_pagefault_enabled()) ++ trace_page_fault_entries(address, regs, error_code); ++ ++ __do_page_fault(regs, error_code, address); ++ exception_exit(prev_state); ++} ++NOKPROBE_SYMBOL(do_page_fault); +diff -uprN kernel/arch/x86/mm/tlb.c kernel_new/arch/x86/mm/tlb.c +--- kernel/arch/x86/mm/tlb.c 2020-12-21 21:59:17.000000000 +0800 ++++ kernel_new/arch/x86/mm/tlb.c 2021-04-01 18:28:07.660863282 +0800 +@@ -153,9 +153,9 @@ void switch_mm(struct mm_struct *prev, s + { + unsigned long flags; + +- local_irq_save(flags); ++ flags = hard_local_irq_save(); + switch_mm_irqs_off(prev, next, tsk); +- local_irq_restore(flags); ++ hard_local_irq_restore(flags); + } + + static void sync_current_stack_to_mm(struct mm_struct *mm) +@@ -274,7 +274,7 @@ void switch_mm_irqs_off(struct mm_struct + { + struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm); + u16 prev_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid); +- unsigned cpu = smp_processor_id(); ++ unsigned cpu = raw_smp_processor_id(); + u64 next_tlb_gen; + + /* +@@ -286,8 +286,11 @@ void switch_mm_irqs_off(struct mm_struct + * NB: leave_mm() calls us with prev == NULL and tsk == NULL. + */ + ++ WARN_ON_ONCE(IS_ENABLED(CONFIG_IPIPE_DEBUG_INTERNAL) && ++ !hard_irqs_disabled()); ++ + /* We don't want flush_tlb_func_* to run concurrently with us. */ +- if (IS_ENABLED(CONFIG_PROVE_LOCKING)) ++ if (!IS_ENABLED(CONFIG_IPIPE) && IS_ENABLED(CONFIG_PROVE_LOCKING)) + WARN_ON_ONCE(!irqs_disabled()); + + /* +@@ -413,7 +416,7 @@ void switch_mm_irqs_off(struct mm_struct + this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid); + } + +- load_mm_cr4(next); ++ load_mm_cr4_irqsoff(next); + switch_ldt(real_prev, next); + } + +@@ -519,6 +522,7 @@ static void flush_tlb_func_common(const + u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid); + u64 mm_tlb_gen = atomic64_read(&loaded_mm->context.tlb_gen); + u64 local_tlb_gen = this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen); ++ unsigned long flags; + + /* This code cannot presently handle being reentered. */ + VM_WARN_ON(!irqs_disabled()); +@@ -536,7 +540,9 @@ static void flush_tlb_func_common(const + * garbage into our TLB. Since switching to init_mm is barely + * slower than a minimal flush, just switch to init_mm. + */ ++ flags = hard_cond_local_irq_save(); + switch_mm_irqs_off(NULL, &init_mm, NULL); ++ hard_cond_local_irq_restore(flags); + return; + } + +diff -uprN kernel/Documentation/ipipe.rst kernel_new/Documentation/ipipe.rst +--- kernel/Documentation/ipipe.rst 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/Documentation/ipipe.rst 2021-04-01 18:28:07.660863282 +0800 +@@ -0,0 +1,924 @@ ++.. include:: ++ ++=================================== ++The Interrupt Pipeline (aka I-pipe) ++=================================== ++ ++:Copyright: |copy| 2018: Philippe Gerum ++ ++Purpose ++======= ++ ++Using Linux as a host for lightweight software cores specialized in ++delivering very short and bounded response times has been a popular ++way of supporting real-time applications in the embedded space over ++the years. ++ ++This design - known as the *dual kernel* approach - introduces a small ++real-time infrastructure which schedules time-critical activities ++independently from the main kernel. Application threads co-managed by ++this infrastructure still benefit from the ancillary kernel services ++such as virtual memory management, and can also leverage the rich GPOS ++feature set Linux provides such as networking, data storage or GUIs. ++ ++Although the real-time infrastructure has to present specific driver ++stack and API implementations to applications, there are nonetheless ++significant upsides to keeping the real-time core separate from the ++GPOS infrastructure: ++ ++- because the two kernels are independent, real-time activities are ++ not serialized with GPOS operations internally, removing potential ++ delays which might be induced by the non time-critical ++ work. Likewise, there is no requirement for keeping the GPOS ++ operations fine-grained and highly preemptible at any time, which ++ would otherwise induce noticeable overhead on low-end hardware, due ++ to the requirement for pervasive task priority inheritance and IRQ ++ threading. ++ ++- the functional isolation of the real-time infrastructure from the ++ rest of the kernel code restricts common bug hunting to the scope of ++ the smaller kernel, excluding most interactions with the very large ++ GPOS kernel base. ++ ++- with a dedicated infrastructure providing a specific, well-defined ++ set of real-time services, applications can unambiguously figure out ++ which API calls are available for supporting time-critical work, ++ excluding all the rest as being potentially non-deterministic with ++ respect to response time. ++ ++To support such a *dual kernel system*, we need the kernel to exhibit ++a high-priority execution context, for running out-of-band real-time ++duties concurrently to the regular operations. ++ ++.. NOTE:: The I-pipe only introduces the basic mechanisms for hosting ++such a real-time core, enabling the common programming model for its ++applications in user-space. It does *not* implement the real-time core ++per se, which should be provided by a separate kernel component. ++ ++The issue of interrupt response time ++==================================== ++ ++The real-time core has to act upon device interrupts with no delay, ++regardless of the regular kernel operations which may be ongoing when ++the interrupt is received by the CPU. ++ ++However, to protect from deadlocks and maintain data integrity, Linux ++normally hard disables interrupts around any critical section of code ++which must not be preempted by interrupt handlers on the same CPU, ++enforcing a strictly serialized execution among those contexts. ++ ++The unpredictable delay this may cause before external events can be ++handled is a major roadblock for kernel components requiring ++predictable and very short response times to external events, in the ++range of a few microseconds. ++ ++Therefore, there is a basic requirement for prioritizing interrupt ++masking and delivery between the real-time core and GPOS operations, ++while maintaining consistent internal serialization for the kernel. ++ ++To address this issue, the I-pipe implements a mechanism called ++*interrupt pipelining* turns all device IRQs into NMIs, only to run ++NMI-safe interrupt handlers from the perspective of the regular kernel ++activities. ++ ++Two-stage IRQ pipeline ++====================== ++ ++.. _pipeline ++Interrupt pipelining is a lightweight approach based on the ++introduction of a separate, high-priority execution stage for running ++out-of-band interrupt handlers immediately upon IRQ receipt, which ++cannot be delayed by the in-band, regular kernel work even if the ++latter serializes the execution by - seemingly - disabling interrupts. ++ ++IRQs which have no handlers in the high priority stage may be deferred ++on the receiving CPU until the out-of-band activity has quiesced on ++that CPU. Eventually, the preempted in-band code can resume normally, ++which may involve handling the deferred interrupts. ++ ++In other words, interrupts are flowing down from the out-of-band to ++the in-band interrupt stages, which form a two-stage pipeline for ++prioritizing interrupt delivery. ++ ++The runtime context of the out-of-band interrupt handlers is known as ++the *head stage* of the pipeline, as opposed to the in-band kernel ++activities sitting on the *root stage*:: ++ ++ Out-of-band In-band ++ IRQ handlers() IRQ handlers() ++ __________ _______________________ ______ ++ . / / . . / / . ++ . / / . . / / . ++ . / / . . / / . ++ ___/ /______________________/ / . ++ [IRQ] -----> _______________________________/ . ++ . . . . ++ . Head . . Root . ++ . Stage . . Stage . ++ _____________________________________________ ++ ++ ++A software core may base its own activities on the head stage, ++interposing on specific IRQ events, for delivering real-time ++capabilities to a particular set of applications. Meanwhile, the ++regular kernel operations keep going over the root stage unaffected, ++only delayed by short preemption times for running the out-of-band ++work. ++ ++.. NOTE:: Interrupt pipelining is a partial implementation of [#f2]_, ++ in which an interrupt *stage* is a limited form of an ++ operating system *domain*. ++ ++Virtual interrupt flag ++---------------------- ++ ++.. _flag: ++As hinted earlier, predictable response time of out-of-band handlers ++to IRQ receipts requires the in-band kernel work not to be allowed to ++delay them by masking interrupts in the CPU. ++ ++However, critical sections delimited this way by the in-band code must ++still be enforced for the *root stage*, so that system integrity is ++not at risk. This means that although out-of-band IRQ handlers may run ++at any time while the *head stage* is accepting interrupts, in-band ++IRQ handlers should be allowed to run only when the root stage is ++accepting interrupts too. ++ ++So we need to decouple the interrupt masking and delivery logic which ++applies to the head stage from the one in effect on the root stage, by ++implementing a dual interrupt control mechanism. ++ ++To this end, a software logic managing a virtual interrupt flag (aka ++*IPIPE_STALL_FLAG*) is introduced by the interrupt pipeline between ++the hardware and the generic IRQ management layer. This logic can mask ++IRQs from the perspective of the regular kernel work when ++:c:func:`local_irq_save`, :c:func:`local_irq_disable` or any ++lock-controlled masking operations like :c:func:`spin_lock_irqsave` is ++called, while still accepting IRQs from the CPU for immediate delivery ++to out-of-band handlers. ++ ++The head stage protects from interrupts by disabling them in the CPU's ++status register, while the root stage disables interrupts only ++virtually. A stage for which interrupts are disabled is said to be ++*stalled*. Conversely, *unstalling* a stage means re-enabling ++interrupts for it. ++ ++Obviously, stalling the head stage implicitly means disabling ++further IRQ receipts for the root stage too. ++ ++Interrupt deferral for the *root stage* ++--------------------------------------- ++ ++.. _deferral: ++.. _deferred: ++When the root stage is stalled by setting the virtual interrupt flag, ++the occurrence of any incoming IRQ which was not delivered to the ++*head stage* is recorded into a per-CPU log, postponing its actual ++delivery to the root stage. ++ ++The delivery of the interrupt event to the corresponding in-band IRQ ++handler is deferred until the in-band kernel code clears the virtual ++interrupt flag by calling :c:func:`local_irq_enable` or any of its ++variants, which unstalls the root stage. When this happens, the ++interrupt state is resynchronized by playing the log, firing the ++in-band handlers for which an IRQ was set pending. ++ ++:: ++ /* Both stages unstalled on entry */ ++ local_irq_save(flags); ++ ++ (pipeline logs IRQx event) ++ ... ++ local_irq_restore(flags); ++ (pipeline plays IRQx event) ++ handle_IRQx_interrupt(); ++ ++If the root stage is unstalled at the time of the IRQ receipt, the ++in-band handler is immediately invoked, just like with the ++non-pipelined IRQ model. ++ ++.. NOTE:: The principle of deferring interrupt delivery based on a ++ software flag coupled to an event log has been originally ++ described as "Optimistic interrupt protection" in [#f1]_. ++ ++Device interrupts virtually turned into NMIs ++-------------------------------------------- ++ ++From the standpoint of the in-band kernel code (i.e. the one running ++over the *root* interrupt stage) , the interrupt pipelining logic ++virtually turns all device IRQs into NMIs, for running out-of-band ++handlers. ++ ++.. _re-entry: ++For this reason, out-of-band code may generally **NOT** re-enter ++in-band code, for preventing creepy situations like this one:: ++ ++ /* in-band context */ ++ spin_lock_irqsave(&lock, flags); ++ ++ handle_oob_event(); ++ /* attempted re-entry to in-band from out-of-band. */ ++ in_band_routine(); ++ spin_lock_irqsave(&lock, flags); ++ ++ ... ++ ... ++ ... ++ ... ++ spin_unlock irqrestore(&lock, flags); ++ ++Even in absence of any attempt to get a spinlock recursively, the ++outer in-band code in the example above is entitled to assume that no ++access race can occur on the current CPU while interrupts are ++masked. Re-entering in-band code from an out-of-band handler would ++invalidate this assumption. ++ ++In rare cases, we may need to fix up the in-band kernel routines in ++order to allow out-of-band handlers to call them. Typically, atomic_ ++helpers are such routines, which serialize in-band and out-of-band ++callers. ++ ++Virtual/Synthetic interrupt vectors ++----------------------------------- ++ ++.. _synthetic: ++.. _virtual: ++The pipeline introduces an additional type of interrupts, which are ++purely software-originated, with no hardware involvement. These IRQs ++can be triggered by any kernel code. So-called virtual IRQs are ++inherently per-CPU events. ++ ++Because the common pipeline flow_ applies to virtual interrupts, it ++is possible to attach them to out-of-band and/or in-band handlers, ++just like device interrupts. ++ ++.. NOTE:: virtual interrupts and regular softirqs differ in essence: ++ the latter only exist in the in-band context, and therefore ++ cannot trigger out-of-band activities. ++ ++Virtual interrupt vectors are allocated by a call to ++:c:func:`ipipe_alloc_virq`, and conversely released with ++:c:func:`ipipe_free_virq`. ++ ++For instance, a virtual interrupt can be used for triggering an ++in-band activity on the root stage from the head stage as follows:: ++ ++ #include ++ ++ static void virq_handler(unsigned int virq, void *cookie) ++ { ++ do_in_band_work(); ++ } ++ ++ void install_virq(void) ++ { ++ unsigned int virq; ++ ... ++ virq = ipipe_alloc_virq(); ++ ... ++ ipipe_request_irq(ipipe_root_domain, virq, virq_handler, ++ handler_arg, NULL); ++ } ++ ++An out-of-band handler can schedule the execution of ++:c:func:`virq_handler` like this:: ++ ++ ipipe_post_irq_root(virq); ++ ++Conversely, a virtual interrupt can be handled from the out-of-band ++context:: ++ ++ static void virq_oob_handler(unsigned int virq, void *cookie) ++ { ++ do_oob_work(); ++ } ++ ++ void install_virq(void) ++ { ++ unsigned int virq; ++ ... ++ virq = ipipe_alloc_virq(); ++ ... ++ ipipe_request_irq(ipipe_head_domain, virq, virq_oob_handler, ++ handler_arg, NULL); ++ } ++ ++Any in-band code can trigger the immediate execution of ++:c:func:`virq_oob_handler` on the head stage as follows:: ++ ++ ipipe_post_irq_head(virq); ++ ++Pipelined interrupt flow ++------------------------ ++ ++.. _flow: ++When interrupt pipelining is enabled, IRQs are first delivered to the ++pipeline entry point via a call to the generic ++:c:func:`__ipipe_dispatch_irq` routine. Before this happens, the event ++has been propagated through the arch-specific code for handling an IRQ:: ++ ++ asm_irq_entry ++ -> irqchip_handle_irq() ++ -> ipipe_handle_domain_irq() ++ -> __ipipe_grab_irq() ++ -> __ipipe_dispatch_irq() ++ -> irq_flow_handler() ++ ++ ++Contrary to the non-pipelined model, the generic IRQ flow handler does ++*not* call the in-band interrupt handler immediately, but only runs ++the irqchip-specific handler for acknowledging the incoming IRQ event ++in the hardware. ++ ++.. _Holding interrupt lines: ++If the interrupt is either of the *level-triggered*, *fasteoi* or ++*percpu* type, the irqchip is given a chance to hold the interrupt ++line, typically by masking it, until either of the out-of-band or ++in-band handler have run. This addresses the following scenario, which ++happens for a similar reason while an IRQ thread waits for being ++scheduled in, requiring the same kind of provision:: ++ ++ /* root stage stalled on entry */ ++ asm_irq_entry ++ ... ++ -> __ipipe_dispatch_irq() ++ ... ++ ++ asm_irq_exit ++ /* ++ * CPU allowed to accept interrupts again with IRQ cause not ++ * acknowledged in device yet => **IRQ storm**. ++ */ ++ asm_irq_entry ++ ... ++ asm_irq_exit ++ asm_irq_entry ++ ... ++ asm_irq_exit ++ ++IRQ delivery logic ++------------------ ++ ++If an out-of-band handler exists for the interrupt received, ++:c:func:`__ipipe_dispatch_irq` invokes it immediately, after switching ++the execution context to the head stage if not current yet. ++ ++Otherwise, if the execution context is currently over the root stage ++and unstalled, the pipeline core delivers it immediately to the ++in-band handler. ++ ++In all other cases, the interrupt is only set pending into the per-CPU ++log, then the interrupt frame is left. ++ ++Alternate scheduling ++==================== ++ ++The I-pipe promotes the idea that a *dual kernel* system should keep ++the functional overlap between the kernel and the real-time core ++minimal. To this end, a real-time thread should be merely seen as a ++regular task with additional scheduling capabilities guaranteeing very ++low response times. ++ ++To support such idea, the I-pipe enables kthreads and regular user ++tasks to run alternatively in the out-of-band execution context ++introduced by the interrupt pipeline_ (aka *head* stage), or the ++common in-band kernel context for GPOS operations (aka *root* stage). ++ ++As a result, real-time core applications in user-space benefit from ++the common Linux programming model - including virtual memory ++protection -, and still have access to the regular Linux services for ++carrying out non time-critical work. ++ ++Task migration to the head stage ++-------------------------------- ++ ++Low latency response time to events can be achieved when Linux tasks ++wait for them from the out-of-band execution context. The real-time ++core is responsible for switching a task to such a context as part of ++its task management rules; the I-pipe facilitates this migration with ++dedicated services. ++ ++The migration process of a task from the GPOS/in-band context to the ++high-priority, out-of-band context is as follows: ++ ++1. :c:func:`__ipipe_migrate_head` is invoked from the migrating task ++ context, with the same prerequisites than for calling ++ :c:func:`schedule` (preemption enabled, interrupts on). ++ ++.. _`in-band sleep operation`: ++2. the caller is put to interruptible sleep state (S). ++ ++3. before resuming in-band operations, the next task picked by the ++ (regular kernel) scheduler on the same CPU for replacing the ++ migrating task fires :c:func:`ipipe_migration_hook` which the ++ real-time core should override (*__weak* binding). Before the call, ++ the head stage is stalled, interrupts are disabled in the CPU. The ++ root execution stage is still current though. ++ ++4. the real-time core's implementation of ++ :c:func:`ipipe_migration_hook` is passed a pointer to the ++ task_struct descriptor of the migrating task. This routine is expected ++ to perform the necessary steps for taking control over the task on ++ behalf of the real-time core, re-scheduling its code appropriately ++ over the head stage. This typically involves resuming it from the ++ `out-of-band suspended state`_ applied during the converse migration ++ path. ++ ++5. at some point later, when the migrated task is picked by the ++ real-time scheduler, it resumes execution on the head stage with ++ the register file previously saved by the kernel scheduler in ++ :c:func:`switch_to` at step 1. ++ ++Task migration to the root stage ++-------------------------------- ++ ++Sometimes, a real-time thread may want to leave the out-of-band ++context, continuing execution from the in-band context instead, so as ++to: ++ ++- run non time-critical (in-band) work involving regular system calls ++ handled by the kernel, ++ ++- recover from CPU exceptions, such as handling major memory access ++ faults, for which there is no point in caring for response time, and ++ therefore makes no sense to duplicate in the real-time core anyway. ++ ++.. NOTE: The discussion about exception_ handling covers the last ++ point in details. ++ ++The migration process of a task from the high-priority, out-of-band ++context to the GPOS/in-band context is as follows:: ++ ++1. the real-time core schedules an in-band handler for execution which ++ should call :c:func:`wake_up_process` to unblock the migrating task ++ from the standpoint of the kernel scheduler. This is the ++ counterpart of the :ref:`in-band sleep operation ` from the converse migration path. A virtual_ IRQ can be ++ used for scheduling such event from the out-of-band context. ++ ++.. _`out-of-band suspended state`: ++2. the real-time core suspends execution of the current task from its ++ own standpoint. The real-time scheduler is assumed to be using the ++ common :c:func:`switch_to` routine for switching task contexts. ++ ++3. at some point later, the out-of-band context is exited by the ++ current CPU when no more high-priority work is left, causing the ++ preempted in-band kernel code to resume execution on the root ++ stage. The handler scheduled at step 1 eventually runs, waking up ++ the migrating task from the standpoint of the kernel. ++ ++4. the migrating task resumes from the tail scheduling code of the ++ real-time scheduler, where it suspended in step 2. Noticing the ++ migration, the real-time core eventually calls ++ :c:func:`__ipipe_reenter_root` for finalizing the transition of the ++ incoming task to the root stage. ++ ++Binding to the real-time core ++----------------------------- ++ ++.. _binding: ++The I-pipe facilitates fine-grained per-thread management from the ++real-time core, as opposed to per-process. For this reason, the ++real-time core should at least implement a mechanism for turning a ++regular task into a real-time thread with extended capabilities, ++binding it to the core. ++ ++The real-time core should inform the kernel about its intent to ++receive notifications about that task, by calling ++:c:func::`ipipe_enable_notifier` when such task is current. ++ ++For this reason, the binding operation is usually carried out by a ++dedicated system call exposed by the real-time core, which a regular ++task would invoke. ++ ++.. NOTE:: Whether there should be distinct procedures for binding ++ processes *and* threads to the real-time core, or only a ++ thread binding procedure is up to the real-time core ++ implementation. ++ ++Notifications ++------------- ++ ++Exception handling ++~~~~~~~~~~~~~~~~~~ ++ ++.. _exception ++If a processor exception is raised while the CPU is busy running a ++real-time thread in the out-of-band context (e.g. due to some invalid ++memory access, bad instruction, FPU or alignment error etc), the task ++may have to leave such context immediately if the fault handler is not ++protected against out-of-band interrupts, and therefore cannot be ++properly serialized with out-of-band code. ++ ++The I-pipe notifies the real-time core about incoming exceptions early ++from the low-level fault handlers, but only when some out-of-band code ++was running when the exception was taken. The real-time core may then ++take action, such as reconciling the current task's execution context ++with the kernel's expectations before the task may traverse the ++regular fault handling code. ++ ++.. HINT:: Enabling debuggers to trace real-time thread involves ++ dealing with debug traps the former may poke into the ++ debuggee's code for breakpointing duties. ++ ++The notification is issued by a call to :c:func:`__ipipe_notify_trap` ++which in turn invokes the :c:func:`ipipe_trap_hook` routine the ++real-time core should override for receiving those events (*__weak* ++binding). Interrupts are **disabled** in the CPU when ++:c:func:`ipipe_trap_hook` is called.:: ++ ++ /* out-of-band code running */ ++ *bad_pointer = 42; ++ [ACCESS EXCEPTION] ++ /* low-level fault handler in arch//mm */ ++ -> do_page_fault() ++ -> __ipipe_notify_trap(...) ++ /* real-time core */ ++ -> ipipe_trap_hook(...) ++ -> forced task migration to root stage ++ ... ++ -> handle_mm_fault() ++ ++.. NOTE:: handling minor memory access faults only requiring quick PTE ++ fixups should not involve switching the current task to the ++ in-band context though. Instead, the fixup code should be ++ made atomic_ for serializing accesses from any context. ++ ++System calls ++~~~~~~~~~~~~ ++ ++A real-time core interfaced with the kernel via the I-pipe may ++introduce its own set of system calls. From the standpoint of the ++kernel, this is a foreign set of calls, which can be distinguished ++unambiguously from regular ones based on an arch-specific marker. ++ ++.. HINT:: Syscall numbers from this set might have a different base, ++ and/or some high-order bit set which regular syscall numbers ++ would not have. ++ ++If a task bound to the real-time core issues any system call, ++regardless of which of the kernel or real-time core should handle it, ++the latter must be given the opportunity to: ++ ++- perform the service directly, possibly switching the caller to ++ out-of-band context first would the request require it. ++ ++- pass the request downward to the normal system call path on the root ++ stage, possibly switching the caller to in-band context if needed. ++ ++If a regular task (i.e. *not* known from the real-time core [yet]) ++issues any foreign system call, the real-time core is given a chance ++to handle it. This way, a foreign system call which would initially ++bind a regular task to the real-time core would be delivered to the ++real-time core as expected (see binding_). ++ ++The I-pipe intercepts system calls early in the kernel entry code, ++delivering them to the proper handler according to the following ++logic:: ++ ++ is_foreign(syscall_nr)? ++ Y: is_bound(task) ++ Y: -> ipipe_fastcall_hook() ++ N: -> ipipe_syscall_hook() ++ N: is_bound(task) ++ Y: -> ipipe_syscall_hook() ++ N: -> normal syscall handling ++ ++:c:func:`ipipe_fastcall_hook` is the fast path for handling foreign ++system calls from tasks already running in out-of-band context. ++ ++:c:func:`ipipe_syscall_hook` is a slower path for handling requests ++which might require the caller to switch to the out-of-band context ++first before proceeding. ++ ++Kernel events ++~~~~~~~~~~~~~ ++ ++The last set of notifications involves pure kernel events which the ++real-time core may need to know about, as they may affect its own task ++management. Except for IPIPE_KEVT_CLEANUP which is called for *any* ++exiting user-space task, all other notifications are only issued for ++tasks bound to the real-time core (which may involve kthreads). ++ ++The notification is issued by a call to :c:func:`__ipipe_notify_kevent` ++which in turn invokes the :c:func:`ipipe_kevent_hook` routine the ++real-time core should override for receiving those events (*__weak* ++binding). Interrupts are **enabled** in the CPU when ++:c:func:`ipipe_kevent_hook` is called. ++ ++The notification hook is given the event type code, and a single ++pointer argument which relates to the event type. ++ ++The following events are defined (include/linux/ipipe_domain.h): ++ ++- IPIPE_KEVT_SCHEDULE(struct task_struct *next) ++ ++ sent in preparation of a context switch, right before the memory ++ context is switched to *next*. ++ ++- IPIPE_KEVT_SIGWAKE(struct task_struct *target) ++ ++ sent when *target* is about to receive a signal. The real-time core ++ may decide to schedule a transition of the recipient to the root ++ stage in order to have it handle that signal asap, which is commonly ++ required for keeping the kernel sane. This notification is always ++ sent from the context of the issuer. ++ ++- IPIPE_KEVT_SETAFFINITY(struct ipipe_migration_data *p) ++ ++ sent when p->task is about to move to CPU p->dest_cpu. ++ ++- IPIPE_KEVT_EXIT(struct task_struct *current) ++ ++ sent from :c:func:`do_exit` before the current task has dropped the ++ files and mappings it owns. ++ ++- IPIPE_KEVT_CLEANUP(struct mm_struct *mm) ++ ++ sent before *mm* is entirely dropped, before the mappings are ++ exited. Per-process resources which might be maintained by the ++ real-time core could be released there, as all threads have exited. ++ ++ ..NOTE:: IPIPE_KEVT_SETSCHED is deprecated, and should not be used. ++ ++Prerequisites ++============= ++ ++The interrupt pipeline requires the following features to be available ++from the target kernel: ++ ++- Generic IRQ handling ++- Clock event abstraction ++ ++Implementation ++============== ++ ++The following kernel areas are involved in interrupt pipelining: ++ ++- Generic IRQ core ++ ++ * IRQ flow handlers ++ ++ Generic flow handlers acknowledge the incoming IRQ event in the ++ hardware by calling the appropriate irqchip-specific ++ handler. However, the generic flow_ handlers do not immediately ++ invoke the in-band interrupt handlers, but leave this decision to ++ the pipeline core which calls them, according to the pipelined ++ delivery logic. ++ ++- Arch-specific bits ++ ++ * CPU interrupt mask handling ++ ++ The architecture-specific code which manipulates the interrupt ++ flag in the CPU's state register ++ (i.e. arch//include/asm/irqflags.h) is split between real ++ and virtual interrupt control: ++ ++ + the *hard_local_irq* level helpers affect the hardware state in ++ the CPU. ++ ++ + the *arch_* level helpers affect the virtual interrupt flag_ ++ implemented by the pipeline core for controlling the root stage ++ protection against interrupts. ++ ++ This means that generic helpers from such as ++ :c:func:`local_irq_disable` and :c:func:`local_irq_enable` ++ actually refer to the virtual protection scheme when interrupts ++ are pipelined, implementing interrupt deferral_ for the protected ++ in-band code running over the root stage. ++ ++ * Assembly-level IRQ, exception paths ++ ++ Since interrupts are only virtually masked by the in-band code, ++ IRQs can still be taken by the CPU although they should not be ++ visible from the root stage when they happen in the following ++ situations: ++ ++ + when the virtual protection flag_ is raised, meaning the root ++ stage does not accept IRQs, in which case interrupt _deferral ++ happens. ++ ++ + when the CPU runs out-of-band code, regardless of the state of ++ the virtual protection flag. ++ ++ In both cases, the low-level assembly code handling incoming IRQs ++ takes a fast exit path unwinding the interrupt frame early, ++ instead of running the common in-band epilogue which checks for ++ task rescheduling opportunities and pending signals. ++ ++ Likewise, the low-level fault/exception handling code also takes a ++ fast exit path under the same circumstances. Typically, an ++ out-of-band handler causing a minor page fault should benefit from ++ a lightweight PTE fixup performed by the high-level fault handler, ++ but is not allowed to traverse the rescheduling logic upon return ++ from exception. ++ ++- Scheduler core ++ ++ * CPUIDLE support ++ ++ The logic of the CPUIDLE framework has to account for those ++ specific issues the interrupt pipelining introduces: ++ ++ - the kernel might be idle in the sense that no in-band activity ++ is scheduled yet, and planning to shut down the timer device ++ suffering the C3STOP (mis)feature. However, at the same time, ++ some out-of-band code might wait for a tick event already ++ programmed in the timer hardware controlled by some out-of-band ++ code via the timer_ interposition mechanism. ++ ++ - switching the CPU to a power saving state may incur a ++ significant latency, particularly for waking it up before it can ++ handle an incoming IRQ, which is at odds with the purpose of ++ interrupt pipelining. ++ ++ Obviously, we don't want the CPUIDLE logic to turn off the ++ hardware timer when C3STOP is in effect for the timer device, ++ which would cause the pending out-of-band event to be ++ lost. ++ ++ Likewise, the wake up latency induced by entering a sleep state on ++ a particular hardware may not always be acceptable. ++ ++ Since the in-band kernel code does not know about the out-of-band ++ code plans by design, CPUIDLE calls :c:func:`ipipe_cpuidle_control` ++ to figure out whether the out-of-band system is fine with entering ++ the idle state as well. This routine should be overriden by the ++ out-of-band code for receiving such notification (*__weak* ++ binding). ++ ++ If this hook returns a boolean *true* value, CPUIDLE proceeds as ++ normally. Otherwise, the CPU is simply denied from entering the ++ idle state, leaving the timer hardware enabled. ++ ++ ..CAUTION:: If some out-of-band code waiting for an external event ++ cannot bear with the latency that might be induced by the default ++ architecture-specific CPU idling code, then CPUIDLE is not usable ++ and should be disabled at build time. ++ ++ * Kernel preemption control (PREEMPT) ++ ++ :c:func:`__preempt_schedule_irq` reconciles the virtual interrupt ++ state - which has not been touched by the assembly level code upon ++ kernel entry - with basic assumptions made by the scheduler core, ++ such as entering with interrupts disabled. It should be called by ++ the arch-specific assembly code in replacement of ++ :c:func:`preempt_schedule_irq`, from the call site dealing with ++ kernel preemption upon return from IRQ or system call. ++ ++- Timer management ++ ++ * Timer interposition ++ ++.. _timer: ++ The timer interposition mechanism is designed for handing over ++ control of the hardware tick device in use by the kernel to an ++ out-of-band timing logic. Typically, a real-time co-kernel would ++ make good use of this feature, for grabbing control over the timer ++ hardware. ++ ++ Once some out-of-band logic has grabbed control over the timer ++ device by calling :c:func:`ipipe_select_timers`, it can install ++ its own out-of-band handlers using :c:func:`ipipe_timer_start`. ++ From that point, it must carry out the timing requests from the ++ in-band timer core (e.g. hrtimers) in addition to its own timing ++ duties. ++ ++ In other words, once the interposition is set up, the ++ functionality of the tick device is shared between the in-band and ++ out-of-band contexts, with only the latter actually programming ++ the hardware. ++ ++ This mechanism is based on the clock event abstraction (`struct ++ clock_event_device`). Clock event devices which may be controlled ++ by this way need their drivers to be specifically adapted for such ++ use: ++ ++ + the interrupt handler receiving tick IRQs must be check with ++ :c:func:`clockevent_ipipe_stolen` whether they actually control ++ the hardware. A non-zero return from this routine means that it ++ does not, and therefore should skip the timer acknowledge ++ code, which would have run earlier in that case. ++ ++- Generic locking & atomic ++ ++ * Generic atomic ops ++ ++.. _atomic: ++ The effect of virtualizing interrupt protection must be reversed ++ for atomic helpers in and ++ , so that no interrupt can preempt ++ their execution, regardless of the stage their caller live ++ on. ++ ++ This is required to keep those helpers usable on data which ++ might be accessed concurrently from both stages. ++ ++ The usual way to revert such virtualization consists of delimiting ++ the protected section with :c:func:`hard_local_irq_save`, ++ :c:func:`hard_local_irq_restore` calls, in replacement for ++ :c:func:`local_irq_save`, :c:func:`local_irq_restore` ++ respectively. ++ ++ * Hard spinlocks ++ ++ The pipeline core introduces one more spinlock type: ++ ++ + *hard* spinlocks manipulate the CPU interrupt mask, and don't ++ affect the kernel preemption state in locking/unlocking ++ operations. ++ ++ This type of spinlock is useful for implementing a critical ++ section to serialize concurrent accesses from both in-band and ++ out-of-band contexts, i.e. from root and head stages. Obviously, ++ sleeping into a critical section protected by a hard spinlock ++ would be a very bad idea. ++ ++ In other words, hard spinlocks are not subject to virtual ++ interrupt masking, therefore can be used to serialize with ++ out-of-band activities, including from the in-band kernel ++ code. At any rate, those sections ought to be quite short, for ++ keeping latency low. ++ ++- Drivers ++ ++ * IRQ chip drivers ++ ++ .. _irqchip: ++ irqchip drivers need to be specifically adapted for supporting the ++ pipelined interrupt model. The irqchip descriptor gains additional ++ handlers: ++ ++ + irq_chip.irq_hold is an optional handler called by the pipeline ++ core upon events from *level-triggered*, *fasteoi* and *percpu* ++ types. See Holding_ interrupt lines. ++ ++ When specified in the descriptor, irq_chip.irq_hold should ++ perform as follows, depending on the hardware acknowledge logic: ++ ++ + level -> mask[+ack] ++ + percpu -> mask[+ack][+eoi] ++ + fasteoi -> mask+eoi ++ ++ .. CAUTION:: proper acknowledge and/or EOI is important when ++ holding a line, as those operations may also ++ decrease the current interrupt priority level for ++ the CPU, allowing same or lower priority ++ out-of-band interrupts to be taken while the ++ initial IRQ might be deferred_ for the root stage. ++ ++ + irq_chip.irq_release is the converse operation to ++ irq_chip.irq_hold, releasing an interrupt line from the held ++ state. ++ ++ The :c:func:`ipipe_end_irq` routine invokes the available ++ handler for releasing the interrupt line. The pipeline core ++ calls :c:func:`irq_release` automatically for each IRQ which has ++ been accepted by an in-band handler (`IRQ_HANDLED` status). This ++ routine should be called explicitly by out-of-band handlers ++ before returning to their caller. ++ ++ `IRQCHIP_PIPELINE_SAFE` must be added to `struct irqchip::flags` ++ member of a pipeline-aware irqchip driver. ++ ++ .. NOTE:: :c:func:`irq_set_chip` will complain loudly with a ++ kernel warning whenever the irqchip descriptor passed ++ does not bear the `IRQCHIP_PIPELINE_SAFE` flag and ++ CONFIG_IPIPE is enabled. ++ ++- Misc ++ ++ * :c:func:`printk` ++ ++ :c:func:`printk` may be called by out-of-band code safely, without ++ encurring extra latency. The output is delayed until the in-band ++ code resumes, and the console driver(s) can handle it. ++ ++ * Tracing core ++ ++ Tracepoints can be traversed by out-of-band code safely. Dynamic ++ tracing is available to a kernel running the pipelined interrupt ++ model too. ++ ++Terminology ++=========== ++ ++.. _terminology: ++====================== ======================================================= ++ Term Definition ++====================== ======================================================= ++Head stage high-priority execution context trigged by out-of-band IRQs ++Root stage regular kernel context performing GPOS work ++Out-of-band code code running over the head stage ++In-band code code running over the root stage ++Scheduler the regular, Linux kernel scheduler ++Real-time scheduler the out-of-band task scheduling logic implemented on top of the I-pipe ++ ++Resources ++========= ++ ++.. [#f1] Stodolsky, Chen & Bershad; "Fast Interrupt Priority Management in Operating System Kernels" ++ https://www.usenix.org/legacy/publications/library/proceedings/micro93/full_papers/stodolsky.txt ++.. [#f2] Yaghmour, Karim; "ADEOS - Adaptive Domain Environment for Operating Systems" ++ https://www.opersys.com/ftp/pub/Adeos/adeos.pdf +diff -uprN kernel/drivers/base/core.c kernel_new/drivers/base/core.c +--- kernel/drivers/base/core.c 2020-12-21 21:59:17.000000000 +0800 ++++ kernel_new/drivers/base/core.c 2021-04-01 18:28:07.660863282 +0800 +@@ -3267,6 +3267,17 @@ EXPORT_SYMBOL(dev_printk_emit); + static void __dev_printk(const char *level, const struct device *dev, + struct va_format *vaf) + { ++#ifdef CONFIG_IPIPE ++ /* ++ * Console logging only if hard locked, or over the head ++ * stage. ++ */ ++ if (hard_irqs_disabled() || !ipipe_root_p) { ++ __ipipe_log_printk(vaf->fmt, *vaf->va); ++ return; ++ } ++#endif ++ + if (dev) + dev_printk_emit(level[1] - '0', dev, "%s %s: %pV", + dev_driver_string(dev), dev_name(dev), vaf); +diff -uprN kernel/drivers/base/core.c.orig kernel_new/drivers/base/core.c.orig +--- kernel/drivers/base/core.c.orig 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/drivers/base/core.c.orig 2020-12-21 21:59:17.000000000 +0800 +@@ -0,0 +1,3392 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * drivers/base/core.c - core driver model code (device registration, etc) ++ * ++ * Copyright (c) 2002-3 Patrick Mochel ++ * Copyright (c) 2002-3 Open Source Development Labs ++ * Copyright (c) 2006 Greg Kroah-Hartman ++ * Copyright (c) 2006 Novell, Inc. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "base.h" ++#include "power/power.h" ++ ++#ifdef CONFIG_SYSFS_DEPRECATED ++#ifdef CONFIG_SYSFS_DEPRECATED_V2 ++long sysfs_deprecated = 1; ++#else ++long sysfs_deprecated = 0; ++#endif ++static int __init sysfs_deprecated_setup(char *arg) ++{ ++ return kstrtol(arg, 10, &sysfs_deprecated); ++} ++early_param("sysfs.deprecated", sysfs_deprecated_setup); ++#endif ++ ++/* Device links support. */ ++ ++#ifdef CONFIG_SRCU ++static DEFINE_MUTEX(device_links_lock); ++DEFINE_STATIC_SRCU(device_links_srcu); ++ ++static inline void device_links_write_lock(void) ++{ ++ mutex_lock(&device_links_lock); ++} ++ ++static inline void device_links_write_unlock(void) ++{ ++ mutex_unlock(&device_links_lock); ++} ++ ++int device_links_read_lock(void) ++{ ++ return srcu_read_lock(&device_links_srcu); ++} ++ ++void device_links_read_unlock(int idx) ++{ ++ srcu_read_unlock(&device_links_srcu, idx); ++} ++#else /* !CONFIG_SRCU */ ++static DECLARE_RWSEM(device_links_lock); ++ ++static inline void device_links_write_lock(void) ++{ ++ down_write(&device_links_lock); ++} ++ ++static inline void device_links_write_unlock(void) ++{ ++ up_write(&device_links_lock); ++} ++ ++int device_links_read_lock(void) ++{ ++ down_read(&device_links_lock); ++ return 0; ++} ++ ++void device_links_read_unlock(int not_used) ++{ ++ up_read(&device_links_lock); ++} ++#endif /* !CONFIG_SRCU */ ++ ++/** ++ * device_is_dependent - Check if one device depends on another one ++ * @dev: Device to check dependencies for. ++ * @target: Device to check against. ++ * ++ * Check if @target depends on @dev or any device dependent on it (its child or ++ * its consumer etc). Return 1 if that is the case or 0 otherwise. ++ */ ++static int device_is_dependent(struct device *dev, void *target) ++{ ++ struct device_link *link; ++ int ret; ++ ++ if (dev == target) ++ return 1; ++ ++ ret = device_for_each_child(dev, target, device_is_dependent); ++ if (ret) ++ return ret; ++ ++ list_for_each_entry(link, &dev->links.consumers, s_node) { ++ if (link->consumer == target) ++ return 1; ++ ++ ret = device_is_dependent(link->consumer, target); ++ if (ret) ++ break; ++ } ++ return ret; ++} ++ ++static void device_link_init_status(struct device_link *link, ++ struct device *consumer, ++ struct device *supplier) ++{ ++ switch (supplier->links.status) { ++ case DL_DEV_PROBING: ++ switch (consumer->links.status) { ++ case DL_DEV_PROBING: ++ /* ++ * A consumer driver can create a link to a supplier ++ * that has not completed its probing yet as long as it ++ * knows that the supplier is already functional (for ++ * example, it has just acquired some resources from the ++ * supplier). ++ */ ++ link->status = DL_STATE_CONSUMER_PROBE; ++ break; ++ default: ++ link->status = DL_STATE_DORMANT; ++ break; ++ } ++ break; ++ case DL_DEV_DRIVER_BOUND: ++ switch (consumer->links.status) { ++ case DL_DEV_PROBING: ++ link->status = DL_STATE_CONSUMER_PROBE; ++ break; ++ case DL_DEV_DRIVER_BOUND: ++ link->status = DL_STATE_ACTIVE; ++ break; ++ default: ++ link->status = DL_STATE_AVAILABLE; ++ break; ++ } ++ break; ++ case DL_DEV_UNBINDING: ++ link->status = DL_STATE_SUPPLIER_UNBIND; ++ break; ++ default: ++ link->status = DL_STATE_DORMANT; ++ break; ++ } ++} ++ ++static int device_reorder_to_tail(struct device *dev, void *not_used) ++{ ++ struct device_link *link; ++ ++ /* ++ * Devices that have not been registered yet will be put to the ends ++ * of the lists during the registration, so skip them here. ++ */ ++ if (device_is_registered(dev)) ++ devices_kset_move_last(dev); ++ ++ if (device_pm_initialized(dev)) ++ device_pm_move_last(dev); ++ ++ device_for_each_child(dev, NULL, device_reorder_to_tail); ++ list_for_each_entry(link, &dev->links.consumers, s_node) ++ device_reorder_to_tail(link->consumer, NULL); ++ ++ return 0; ++} ++ ++/** ++ * device_pm_move_to_tail - Move set of devices to the end of device lists ++ * @dev: Device to move ++ * ++ * This is a device_reorder_to_tail() wrapper taking the requisite locks. ++ * ++ * It moves the @dev along with all of its children and all of its consumers ++ * to the ends of the device_kset and dpm_list, recursively. ++ */ ++void device_pm_move_to_tail(struct device *dev) ++{ ++ int idx; ++ ++ idx = device_links_read_lock(); ++ device_pm_lock(); ++ device_reorder_to_tail(dev, NULL); ++ device_pm_unlock(); ++ device_links_read_unlock(idx); ++} ++ ++#define DL_MANAGED_LINK_FLAGS (DL_FLAG_AUTOREMOVE_CONSUMER | \ ++ DL_FLAG_AUTOREMOVE_SUPPLIER | \ ++ DL_FLAG_AUTOPROBE_CONSUMER) ++ ++#define DL_ADD_VALID_FLAGS (DL_MANAGED_LINK_FLAGS | DL_FLAG_STATELESS | \ ++ DL_FLAG_PM_RUNTIME | DL_FLAG_RPM_ACTIVE) ++ ++/** ++ * device_link_add - Create a link between two devices. ++ * @consumer: Consumer end of the link. ++ * @supplier: Supplier end of the link. ++ * @flags: Link flags. ++ * ++ * The caller is responsible for the proper synchronization of the link creation ++ * with runtime PM. First, setting the DL_FLAG_PM_RUNTIME flag will cause the ++ * runtime PM framework to take the link into account. Second, if the ++ * DL_FLAG_RPM_ACTIVE flag is set in addition to it, the supplier devices will ++ * be forced into the active metastate and reference-counted upon the creation ++ * of the link. If DL_FLAG_PM_RUNTIME is not set, DL_FLAG_RPM_ACTIVE will be ++ * ignored. ++ * ++ * If DL_FLAG_STATELESS is set in @flags, the caller of this function is ++ * expected to release the link returned by it directly with the help of either ++ * device_link_del() or device_link_remove(). ++ * ++ * If that flag is not set, however, the caller of this function is handing the ++ * management of the link over to the driver core entirely and its return value ++ * can only be used to check whether or not the link is present. In that case, ++ * the DL_FLAG_AUTOREMOVE_CONSUMER and DL_FLAG_AUTOREMOVE_SUPPLIER device link ++ * flags can be used to indicate to the driver core when the link can be safely ++ * deleted. Namely, setting one of them in @flags indicates to the driver core ++ * that the link is not going to be used (by the given caller of this function) ++ * after unbinding the consumer or supplier driver, respectively, from its ++ * device, so the link can be deleted at that point. If none of them is set, ++ * the link will be maintained until one of the devices pointed to by it (either ++ * the consumer or the supplier) is unregistered. ++ * ++ * Also, if DL_FLAG_STATELESS, DL_FLAG_AUTOREMOVE_CONSUMER and ++ * DL_FLAG_AUTOREMOVE_SUPPLIER are not set in @flags (that is, a persistent ++ * managed device link is being added), the DL_FLAG_AUTOPROBE_CONSUMER flag can ++ * be used to request the driver core to automaticall probe for a consmer ++ * driver after successfully binding a driver to the supplier device. ++ * ++ * The combination of DL_FLAG_STATELESS and one of DL_FLAG_AUTOREMOVE_CONSUMER, ++ * DL_FLAG_AUTOREMOVE_SUPPLIER, or DL_FLAG_AUTOPROBE_CONSUMER set in @flags at ++ * the same time is invalid and will cause NULL to be returned upfront. ++ * However, if a device link between the given @consumer and @supplier pair ++ * exists already when this function is called for them, the existing link will ++ * be returned regardless of its current type and status (the link's flags may ++ * be modified then). The caller of this function is then expected to treat ++ * the link as though it has just been created, so (in particular) if ++ * DL_FLAG_STATELESS was passed in @flags, the link needs to be released ++ * explicitly when not needed any more (as stated above). ++ * ++ * A side effect of the link creation is re-ordering of dpm_list and the ++ * devices_kset list by moving the consumer device and all devices depending ++ * on it to the ends of these lists (that does not happen to devices that have ++ * not been registered when this function is called). ++ * ++ * The supplier device is required to be registered when this function is called ++ * and NULL will be returned if that is not the case. The consumer device need ++ * not be registered, however. ++ */ ++struct device_link *device_link_add(struct device *consumer, ++ struct device *supplier, u32 flags) ++{ ++ struct device_link *link; ++ ++ if (!consumer || !supplier || flags & ~DL_ADD_VALID_FLAGS || ++ (flags & DL_FLAG_STATELESS && flags & DL_MANAGED_LINK_FLAGS) || ++ (flags & DL_FLAG_AUTOPROBE_CONSUMER && ++ flags & (DL_FLAG_AUTOREMOVE_CONSUMER | ++ DL_FLAG_AUTOREMOVE_SUPPLIER))) ++ return NULL; ++ ++ if (flags & DL_FLAG_PM_RUNTIME && flags & DL_FLAG_RPM_ACTIVE) { ++ if (pm_runtime_get_sync(supplier) < 0) { ++ pm_runtime_put_noidle(supplier); ++ return NULL; ++ } ++ } ++ ++ if (!(flags & DL_FLAG_STATELESS)) ++ flags |= DL_FLAG_MANAGED; ++ ++ device_links_write_lock(); ++ device_pm_lock(); ++ ++ /* ++ * If the supplier has not been fully registered yet or there is a ++ * reverse dependency between the consumer and the supplier already in ++ * the graph, return NULL. ++ */ ++ if (!device_pm_initialized(supplier) ++ || device_is_dependent(consumer, supplier)) { ++ link = NULL; ++ goto out; ++ } ++ ++ /* ++ * DL_FLAG_AUTOREMOVE_SUPPLIER indicates that the link will be needed ++ * longer than for DL_FLAG_AUTOREMOVE_CONSUMER and setting them both ++ * together doesn't make sense, so prefer DL_FLAG_AUTOREMOVE_SUPPLIER. ++ */ ++ if (flags & DL_FLAG_AUTOREMOVE_SUPPLIER) ++ flags &= ~DL_FLAG_AUTOREMOVE_CONSUMER; ++ ++ list_for_each_entry(link, &supplier->links.consumers, s_node) { ++ if (link->consumer != consumer) ++ continue; ++ ++ if (flags & DL_FLAG_PM_RUNTIME) { ++ if (!(link->flags & DL_FLAG_PM_RUNTIME)) { ++ pm_runtime_new_link(consumer); ++ link->flags |= DL_FLAG_PM_RUNTIME; ++ } ++ if (flags & DL_FLAG_RPM_ACTIVE) ++ refcount_inc(&link->rpm_active); ++ } ++ ++ if (flags & DL_FLAG_STATELESS) { ++ link->flags |= DL_FLAG_STATELESS; ++ kref_get(&link->kref); ++ goto out; ++ } ++ ++ /* ++ * If the life time of the link following from the new flags is ++ * longer than indicated by the flags of the existing link, ++ * update the existing link to stay around longer. ++ */ ++ if (flags & DL_FLAG_AUTOREMOVE_SUPPLIER) { ++ if (link->flags & DL_FLAG_AUTOREMOVE_CONSUMER) { ++ link->flags &= ~DL_FLAG_AUTOREMOVE_CONSUMER; ++ link->flags |= DL_FLAG_AUTOREMOVE_SUPPLIER; ++ } ++ } else if (!(flags & DL_FLAG_AUTOREMOVE_CONSUMER)) { ++ link->flags &= ~(DL_FLAG_AUTOREMOVE_CONSUMER | ++ DL_FLAG_AUTOREMOVE_SUPPLIER); ++ } ++ if (!(link->flags & DL_FLAG_MANAGED)) { ++ kref_get(&link->kref); ++ link->flags |= DL_FLAG_MANAGED; ++ device_link_init_status(link, consumer, supplier); ++ } ++ goto out; ++ } ++ ++ link = kzalloc(sizeof(*link), GFP_KERNEL); ++ if (!link) ++ goto out; ++ ++ refcount_set(&link->rpm_active, 1); ++ ++ if (flags & DL_FLAG_PM_RUNTIME) { ++ if (flags & DL_FLAG_RPM_ACTIVE) ++ refcount_inc(&link->rpm_active); ++ ++ pm_runtime_new_link(consumer); ++ } ++ ++ get_device(supplier); ++ link->supplier = supplier; ++ INIT_LIST_HEAD(&link->s_node); ++ get_device(consumer); ++ link->consumer = consumer; ++ INIT_LIST_HEAD(&link->c_node); ++ link->flags = flags; ++ kref_init(&link->kref); ++ ++ /* Determine the initial link state. */ ++ if (flags & DL_FLAG_STATELESS) ++ link->status = DL_STATE_NONE; ++ else ++ device_link_init_status(link, consumer, supplier); ++ ++ /* ++ * Some callers expect the link creation during consumer driver probe to ++ * resume the supplier even without DL_FLAG_RPM_ACTIVE. ++ */ ++ if (link->status == DL_STATE_CONSUMER_PROBE && ++ flags & DL_FLAG_PM_RUNTIME) ++ pm_runtime_resume(supplier); ++ ++ /* ++ * Move the consumer and all of the devices depending on it to the end ++ * of dpm_list and the devices_kset list. ++ * ++ * It is necessary to hold dpm_list locked throughout all that or else ++ * we may end up suspending with a wrong ordering of it. ++ */ ++ device_reorder_to_tail(consumer, NULL); ++ ++ list_add_tail_rcu(&link->s_node, &supplier->links.consumers); ++ list_add_tail_rcu(&link->c_node, &consumer->links.suppliers); ++ ++ dev_info(consumer, "Linked as a consumer to %s\n", dev_name(supplier)); ++ ++ out: ++ device_pm_unlock(); ++ device_links_write_unlock(); ++ ++ if ((flags & DL_FLAG_PM_RUNTIME && flags & DL_FLAG_RPM_ACTIVE) && !link) ++ pm_runtime_put(supplier); ++ ++ return link; ++} ++EXPORT_SYMBOL_GPL(device_link_add); ++ ++static void device_link_free(struct device_link *link) ++{ ++ while (refcount_dec_not_one(&link->rpm_active)) ++ pm_runtime_put(link->supplier); ++ ++ put_device(link->consumer); ++ put_device(link->supplier); ++ kfree(link); ++} ++ ++#ifdef CONFIG_SRCU ++static void __device_link_free_srcu(struct rcu_head *rhead) ++{ ++ device_link_free(container_of(rhead, struct device_link, rcu_head)); ++} ++ ++static void __device_link_del(struct kref *kref) ++{ ++ struct device_link *link = container_of(kref, struct device_link, kref); ++ ++ dev_info(link->consumer, "Dropping the link to %s\n", ++ dev_name(link->supplier)); ++ ++ if (link->flags & DL_FLAG_PM_RUNTIME) ++ pm_runtime_drop_link(link->consumer); ++ ++ list_del_rcu(&link->s_node); ++ list_del_rcu(&link->c_node); ++ call_srcu(&device_links_srcu, &link->rcu_head, __device_link_free_srcu); ++} ++#else /* !CONFIG_SRCU */ ++static void __device_link_del(struct kref *kref) ++{ ++ struct device_link *link = container_of(kref, struct device_link, kref); ++ ++ dev_info(link->consumer, "Dropping the link to %s\n", ++ dev_name(link->supplier)); ++ ++ if (link->flags & DL_FLAG_PM_RUNTIME) ++ pm_runtime_drop_link(link->consumer); ++ ++ list_del(&link->s_node); ++ list_del(&link->c_node); ++ device_link_free(link); ++} ++#endif /* !CONFIG_SRCU */ ++ ++static void device_link_put_kref(struct device_link *link) ++{ ++ if (link->flags & DL_FLAG_STATELESS) ++ kref_put(&link->kref, __device_link_del); ++ else ++ WARN(1, "Unable to drop a managed device link reference\n"); ++} ++ ++/** ++ * device_link_del - Delete a stateless link between two devices. ++ * @link: Device link to delete. ++ * ++ * The caller must ensure proper synchronization of this function with runtime ++ * PM. If the link was added multiple times, it needs to be deleted as often. ++ * Care is required for hotplugged devices: Their links are purged on removal ++ * and calling device_link_del() is then no longer allowed. ++ */ ++void device_link_del(struct device_link *link) ++{ ++ device_links_write_lock(); ++ device_pm_lock(); ++ device_link_put_kref(link); ++ device_pm_unlock(); ++ device_links_write_unlock(); ++} ++EXPORT_SYMBOL_GPL(device_link_del); ++ ++/** ++ * device_link_remove - Delete a stateless link between two devices. ++ * @consumer: Consumer end of the link. ++ * @supplier: Supplier end of the link. ++ * ++ * The caller must ensure proper synchronization of this function with runtime ++ * PM. ++ */ ++void device_link_remove(void *consumer, struct device *supplier) ++{ ++ struct device_link *link; ++ ++ if (WARN_ON(consumer == supplier)) ++ return; ++ ++ device_links_write_lock(); ++ device_pm_lock(); ++ ++ list_for_each_entry(link, &supplier->links.consumers, s_node) { ++ if (link->consumer == consumer) { ++ device_link_put_kref(link); ++ break; ++ } ++ } ++ ++ device_pm_unlock(); ++ device_links_write_unlock(); ++} ++EXPORT_SYMBOL_GPL(device_link_remove); ++ ++static void device_links_missing_supplier(struct device *dev) ++{ ++ struct device_link *link; ++ ++ list_for_each_entry(link, &dev->links.suppliers, c_node) ++ if (link->status == DL_STATE_CONSUMER_PROBE) ++ WRITE_ONCE(link->status, DL_STATE_AVAILABLE); ++} ++ ++/** ++ * device_links_check_suppliers - Check presence of supplier drivers. ++ * @dev: Consumer device. ++ * ++ * Check links from this device to any suppliers. Walk the list of the device's ++ * links to suppliers and see if all of them are available. If not, simply ++ * return -EPROBE_DEFER. ++ * ++ * We need to guarantee that the supplier will not go away after the check has ++ * been positive here. It only can go away in __device_release_driver() and ++ * that function checks the device's links to consumers. This means we need to ++ * mark the link as "consumer probe in progress" to make the supplier removal ++ * wait for us to complete (or bad things may happen). ++ * ++ * Links without the DL_FLAG_MANAGED flag set are ignored. ++ */ ++int device_links_check_suppliers(struct device *dev) ++{ ++ struct device_link *link; ++ int ret = 0; ++ ++ device_links_write_lock(); ++ ++ list_for_each_entry(link, &dev->links.suppliers, c_node) { ++ if (!(link->flags & DL_FLAG_MANAGED)) ++ continue; ++ ++ if (link->status != DL_STATE_AVAILABLE) { ++ device_links_missing_supplier(dev); ++ ret = -EPROBE_DEFER; ++ break; ++ } ++ WRITE_ONCE(link->status, DL_STATE_CONSUMER_PROBE); ++ } ++ dev->links.status = DL_DEV_PROBING; ++ ++ device_links_write_unlock(); ++ return ret; ++} ++ ++/** ++ * device_links_driver_bound - Update device links after probing its driver. ++ * @dev: Device to update the links for. ++ * ++ * The probe has been successful, so update links from this device to any ++ * consumers by changing their status to "available". ++ * ++ * Also change the status of @dev's links to suppliers to "active". ++ * ++ * Links without the DL_FLAG_MANAGED flag set are ignored. ++ */ ++void device_links_driver_bound(struct device *dev) ++{ ++ struct device_link *link; ++ ++ device_links_write_lock(); ++ ++ list_for_each_entry(link, &dev->links.consumers, s_node) { ++ if (!(link->flags & DL_FLAG_MANAGED)) ++ continue; ++ ++ /* ++ * Links created during consumer probe may be in the "consumer ++ * probe" state to start with if the supplier is still probing ++ * when they are created and they may become "active" if the ++ * consumer probe returns first. Skip them here. ++ */ ++ if (link->status == DL_STATE_CONSUMER_PROBE || ++ link->status == DL_STATE_ACTIVE) ++ continue; ++ ++ WARN_ON(link->status != DL_STATE_DORMANT); ++ WRITE_ONCE(link->status, DL_STATE_AVAILABLE); ++ ++ if (link->flags & DL_FLAG_AUTOPROBE_CONSUMER) ++ driver_deferred_probe_add(link->consumer); ++ } ++ ++ list_for_each_entry(link, &dev->links.suppliers, c_node) { ++ if (!(link->flags & DL_FLAG_MANAGED)) ++ continue; ++ ++ WARN_ON(link->status != DL_STATE_CONSUMER_PROBE); ++ WRITE_ONCE(link->status, DL_STATE_ACTIVE); ++ } ++ ++ dev->links.status = DL_DEV_DRIVER_BOUND; ++ ++ device_links_write_unlock(); ++} ++ ++static void device_link_drop_managed(struct device_link *link) ++{ ++ link->flags &= ~DL_FLAG_MANAGED; ++ WRITE_ONCE(link->status, DL_STATE_NONE); ++ kref_put(&link->kref, __device_link_del); ++} ++ ++/** ++ * __device_links_no_driver - Update links of a device without a driver. ++ * @dev: Device without a drvier. ++ * ++ * Delete all non-persistent links from this device to any suppliers. ++ * ++ * Persistent links stay around, but their status is changed to "available", ++ * unless they already are in the "supplier unbind in progress" state in which ++ * case they need not be updated. ++ * ++ * Links without the DL_FLAG_MANAGED flag set are ignored. ++ */ ++static void __device_links_no_driver(struct device *dev) ++{ ++ struct device_link *link, *ln; ++ ++ list_for_each_entry_safe_reverse(link, ln, &dev->links.suppliers, c_node) { ++ if (!(link->flags & DL_FLAG_MANAGED)) ++ continue; ++ ++ if (link->flags & DL_FLAG_AUTOREMOVE_CONSUMER) ++ device_link_drop_managed(link); ++ else if (link->status == DL_STATE_CONSUMER_PROBE || ++ link->status == DL_STATE_ACTIVE) ++ WRITE_ONCE(link->status, DL_STATE_AVAILABLE); ++ } ++ ++ dev->links.status = DL_DEV_NO_DRIVER; ++} ++ ++/** ++ * device_links_no_driver - Update links after failing driver probe. ++ * @dev: Device whose driver has just failed to probe. ++ * ++ * Clean up leftover links to consumers for @dev and invoke ++ * %__device_links_no_driver() to update links to suppliers for it as ++ * appropriate. ++ * ++ * Links without the DL_FLAG_MANAGED flag set are ignored. ++ */ ++void device_links_no_driver(struct device *dev) ++{ ++ struct device_link *link; ++ ++ device_links_write_lock(); ++ ++ list_for_each_entry(link, &dev->links.consumers, s_node) { ++ if (!(link->flags & DL_FLAG_MANAGED)) ++ continue; ++ ++ /* ++ * The probe has failed, so if the status of the link is ++ * "consumer probe" or "active", it must have been added by ++ * a probing consumer while this device was still probing. ++ * Change its state to "dormant", as it represents a valid ++ * relationship, but it is not functionally meaningful. ++ */ ++ if (link->status == DL_STATE_CONSUMER_PROBE || ++ link->status == DL_STATE_ACTIVE) ++ WRITE_ONCE(link->status, DL_STATE_DORMANT); ++ } ++ ++ __device_links_no_driver(dev); ++ ++ device_links_write_unlock(); ++} ++ ++/** ++ * device_links_driver_cleanup - Update links after driver removal. ++ * @dev: Device whose driver has just gone away. ++ * ++ * Update links to consumers for @dev by changing their status to "dormant" and ++ * invoke %__device_links_no_driver() to update links to suppliers for it as ++ * appropriate. ++ * ++ * Links without the DL_FLAG_MANAGED flag set are ignored. ++ */ ++void device_links_driver_cleanup(struct device *dev) ++{ ++ struct device_link *link, *ln; ++ ++ device_links_write_lock(); ++ ++ list_for_each_entry_safe(link, ln, &dev->links.consumers, s_node) { ++ if (!(link->flags & DL_FLAG_MANAGED)) ++ continue; ++ ++ WARN_ON(link->flags & DL_FLAG_AUTOREMOVE_CONSUMER); ++ WARN_ON(link->status != DL_STATE_SUPPLIER_UNBIND); ++ ++ /* ++ * autoremove the links between this @dev and its consumer ++ * devices that are not active, i.e. where the link state ++ * has moved to DL_STATE_SUPPLIER_UNBIND. ++ */ ++ if (link->status == DL_STATE_SUPPLIER_UNBIND && ++ link->flags & DL_FLAG_AUTOREMOVE_SUPPLIER) ++ device_link_drop_managed(link); ++ ++ WRITE_ONCE(link->status, DL_STATE_DORMANT); ++ } ++ ++ __device_links_no_driver(dev); ++ ++ device_links_write_unlock(); ++} ++ ++/** ++ * device_links_busy - Check if there are any busy links to consumers. ++ * @dev: Device to check. ++ * ++ * Check each consumer of the device and return 'true' if its link's status ++ * is one of "consumer probe" or "active" (meaning that the given consumer is ++ * probing right now or its driver is present). Otherwise, change the link ++ * state to "supplier unbind" to prevent the consumer from being probed ++ * successfully going forward. ++ * ++ * Return 'false' if there are no probing or active consumers. ++ * ++ * Links without the DL_FLAG_MANAGED flag set are ignored. ++ */ ++bool device_links_busy(struct device *dev) ++{ ++ struct device_link *link; ++ bool ret = false; ++ ++ device_links_write_lock(); ++ ++ list_for_each_entry(link, &dev->links.consumers, s_node) { ++ if (!(link->flags & DL_FLAG_MANAGED)) ++ continue; ++ ++ if (link->status == DL_STATE_CONSUMER_PROBE ++ || link->status == DL_STATE_ACTIVE) { ++ ret = true; ++ break; ++ } ++ WRITE_ONCE(link->status, DL_STATE_SUPPLIER_UNBIND); ++ } ++ ++ dev->links.status = DL_DEV_UNBINDING; ++ ++ device_links_write_unlock(); ++ return ret; ++} ++ ++/** ++ * device_links_unbind_consumers - Force unbind consumers of the given device. ++ * @dev: Device to unbind the consumers of. ++ * ++ * Walk the list of links to consumers for @dev and if any of them is in the ++ * "consumer probe" state, wait for all device probes in progress to complete ++ * and start over. ++ * ++ * If that's not the case, change the status of the link to "supplier unbind" ++ * and check if the link was in the "active" state. If so, force the consumer ++ * driver to unbind and start over (the consumer will not re-probe as we have ++ * changed the state of the link already). ++ * ++ * Links without the DL_FLAG_MANAGED flag set are ignored. ++ */ ++void device_links_unbind_consumers(struct device *dev) ++{ ++ struct device_link *link; ++ ++ start: ++ device_links_write_lock(); ++ ++ list_for_each_entry(link, &dev->links.consumers, s_node) { ++ enum device_link_state status; ++ ++ if (!(link->flags & DL_FLAG_MANAGED)) ++ continue; ++ ++ status = link->status; ++ if (status == DL_STATE_CONSUMER_PROBE) { ++ device_links_write_unlock(); ++ ++ wait_for_device_probe(); ++ goto start; ++ } ++ WRITE_ONCE(link->status, DL_STATE_SUPPLIER_UNBIND); ++ if (status == DL_STATE_ACTIVE) { ++ struct device *consumer = link->consumer; ++ ++ get_device(consumer); ++ ++ device_links_write_unlock(); ++ ++ device_release_driver_internal(consumer, NULL, ++ consumer->parent); ++ put_device(consumer); ++ goto start; ++ } ++ } ++ ++ device_links_write_unlock(); ++} ++ ++/** ++ * device_links_purge - Delete existing links to other devices. ++ * @dev: Target device. ++ */ ++static void device_links_purge(struct device *dev) ++{ ++ struct device_link *link, *ln; ++ ++ /* ++ * Delete all of the remaining links from this device to any other ++ * devices (either consumers or suppliers). ++ */ ++ device_links_write_lock(); ++ ++ list_for_each_entry_safe_reverse(link, ln, &dev->links.suppliers, c_node) { ++ WARN_ON(link->status == DL_STATE_ACTIVE); ++ __device_link_del(&link->kref); ++ } ++ ++ list_for_each_entry_safe_reverse(link, ln, &dev->links.consumers, s_node) { ++ WARN_ON(link->status != DL_STATE_DORMANT && ++ link->status != DL_STATE_NONE); ++ __device_link_del(&link->kref); ++ } ++ ++ device_links_write_unlock(); ++} ++ ++/* Device links support end. */ ++ ++int (*platform_notify)(struct device *dev) = NULL; ++int (*platform_notify_remove)(struct device *dev) = NULL; ++static struct kobject *dev_kobj; ++struct kobject *sysfs_dev_char_kobj; ++struct kobject *sysfs_dev_block_kobj; ++ ++static DEFINE_MUTEX(device_hotplug_lock); ++ ++void lock_device_hotplug(void) ++{ ++ mutex_lock(&device_hotplug_lock); ++} ++ ++void unlock_device_hotplug(void) ++{ ++ mutex_unlock(&device_hotplug_lock); ++} ++ ++int lock_device_hotplug_sysfs(void) ++{ ++ if (mutex_trylock(&device_hotplug_lock)) ++ return 0; ++ ++ /* Avoid busy looping (5 ms of sleep should do). */ ++ msleep(5); ++ return restart_syscall(); ++} ++ ++#ifdef CONFIG_BLOCK ++static inline int device_is_not_partition(struct device *dev) ++{ ++ return !(dev->type == &part_type); ++} ++#else ++static inline int device_is_not_partition(struct device *dev) ++{ ++ return 1; ++} ++#endif ++ ++/** ++ * dev_driver_string - Return a device's driver name, if at all possible ++ * @dev: struct device to get the name of ++ * ++ * Will return the device's driver's name if it is bound to a device. If ++ * the device is not bound to a driver, it will return the name of the bus ++ * it is attached to. If it is not attached to a bus either, an empty ++ * string will be returned. ++ */ ++const char *dev_driver_string(const struct device *dev) ++{ ++ struct device_driver *drv; ++ ++ /* dev->driver can change to NULL underneath us because of unbinding, ++ * so be careful about accessing it. dev->bus and dev->class should ++ * never change once they are set, so they don't need special care. ++ */ ++ drv = READ_ONCE(dev->driver); ++ return drv ? drv->name : ++ (dev->bus ? dev->bus->name : ++ (dev->class ? dev->class->name : "")); ++} ++EXPORT_SYMBOL(dev_driver_string); ++ ++#define to_dev_attr(_attr) container_of(_attr, struct device_attribute, attr) ++ ++static ssize_t dev_attr_show(struct kobject *kobj, struct attribute *attr, ++ char *buf) ++{ ++ struct device_attribute *dev_attr = to_dev_attr(attr); ++ struct device *dev = kobj_to_dev(kobj); ++ ssize_t ret = -EIO; ++ ++ if (dev_attr->show) ++ ret = dev_attr->show(dev, dev_attr, buf); ++ if (ret >= (ssize_t)PAGE_SIZE) { ++ printk("dev_attr_show: %pS returned bad count\n", ++ dev_attr->show); ++ } ++ return ret; ++} ++ ++static ssize_t dev_attr_store(struct kobject *kobj, struct attribute *attr, ++ const char *buf, size_t count) ++{ ++ struct device_attribute *dev_attr = to_dev_attr(attr); ++ struct device *dev = kobj_to_dev(kobj); ++ ssize_t ret = -EIO; ++ ++ if (dev_attr->store) ++ ret = dev_attr->store(dev, dev_attr, buf, count); ++ return ret; ++} ++ ++static const struct sysfs_ops dev_sysfs_ops = { ++ .show = dev_attr_show, ++ .store = dev_attr_store, ++}; ++ ++#define to_ext_attr(x) container_of(x, struct dev_ext_attribute, attr) ++ ++ssize_t device_store_ulong(struct device *dev, ++ struct device_attribute *attr, ++ const char *buf, size_t size) ++{ ++ struct dev_ext_attribute *ea = to_ext_attr(attr); ++ char *end; ++ unsigned long new = simple_strtoul(buf, &end, 0); ++ if (end == buf) ++ return -EINVAL; ++ *(unsigned long *)(ea->var) = new; ++ /* Always return full write size even if we didn't consume all */ ++ return size; ++} ++EXPORT_SYMBOL_GPL(device_store_ulong); ++ ++ssize_t device_show_ulong(struct device *dev, ++ struct device_attribute *attr, ++ char *buf) ++{ ++ struct dev_ext_attribute *ea = to_ext_attr(attr); ++ return snprintf(buf, PAGE_SIZE, "%lx\n", *(unsigned long *)(ea->var)); ++} ++EXPORT_SYMBOL_GPL(device_show_ulong); ++ ++ssize_t device_store_int(struct device *dev, ++ struct device_attribute *attr, ++ const char *buf, size_t size) ++{ ++ struct dev_ext_attribute *ea = to_ext_attr(attr); ++ char *end; ++ long new = simple_strtol(buf, &end, 0); ++ if (end == buf || new > INT_MAX || new < INT_MIN) ++ return -EINVAL; ++ *(int *)(ea->var) = new; ++ /* Always return full write size even if we didn't consume all */ ++ return size; ++} ++EXPORT_SYMBOL_GPL(device_store_int); ++ ++ssize_t device_show_int(struct device *dev, ++ struct device_attribute *attr, ++ char *buf) ++{ ++ struct dev_ext_attribute *ea = to_ext_attr(attr); ++ ++ return snprintf(buf, PAGE_SIZE, "%d\n", *(int *)(ea->var)); ++} ++EXPORT_SYMBOL_GPL(device_show_int); ++ ++ssize_t device_store_bool(struct device *dev, struct device_attribute *attr, ++ const char *buf, size_t size) ++{ ++ struct dev_ext_attribute *ea = to_ext_attr(attr); ++ ++ if (strtobool(buf, ea->var) < 0) ++ return -EINVAL; ++ ++ return size; ++} ++EXPORT_SYMBOL_GPL(device_store_bool); ++ ++ssize_t device_show_bool(struct device *dev, struct device_attribute *attr, ++ char *buf) ++{ ++ struct dev_ext_attribute *ea = to_ext_attr(attr); ++ ++ return snprintf(buf, PAGE_SIZE, "%d\n", *(bool *)(ea->var)); ++} ++EXPORT_SYMBOL_GPL(device_show_bool); ++ ++/** ++ * device_release - free device structure. ++ * @kobj: device's kobject. ++ * ++ * This is called once the reference count for the object ++ * reaches 0. We forward the call to the device's release ++ * method, which should handle actually freeing the structure. ++ */ ++static void device_release(struct kobject *kobj) ++{ ++ struct device *dev = kobj_to_dev(kobj); ++ struct device_private *p = dev->p; ++ ++ /* ++ * Some platform devices are driven without driver attached ++ * and managed resources may have been acquired. Make sure ++ * all resources are released. ++ * ++ * Drivers still can add resources into device after device ++ * is deleted but alive, so release devres here to avoid ++ * possible memory leak. ++ */ ++ devres_release_all(dev); ++ ++ if (dev->release) ++ dev->release(dev); ++ else if (dev->type && dev->type->release) ++ dev->type->release(dev); ++ else if (dev->class && dev->class->dev_release) ++ dev->class->dev_release(dev); ++ else ++ WARN(1, KERN_ERR "Device '%s' does not have a release() " ++ "function, it is broken and must be fixed.\n", ++ dev_name(dev)); ++ kfree(p); ++} ++ ++static const void *device_namespace(struct kobject *kobj) ++{ ++ struct device *dev = kobj_to_dev(kobj); ++ const void *ns = NULL; ++ ++ if (dev->class && dev->class->ns_type) ++ ns = dev->class->namespace(dev); ++ ++ return ns; ++} ++ ++static void device_get_ownership(struct kobject *kobj, kuid_t *uid, kgid_t *gid) ++{ ++ struct device *dev = kobj_to_dev(kobj); ++ ++ if (dev->class && dev->class->get_ownership) ++ dev->class->get_ownership(dev, uid, gid); ++} ++ ++static struct kobj_type device_ktype = { ++ .release = device_release, ++ .sysfs_ops = &dev_sysfs_ops, ++ .namespace = device_namespace, ++ .get_ownership = device_get_ownership, ++}; ++ ++ ++static int dev_uevent_filter(struct kset *kset, struct kobject *kobj) ++{ ++ struct kobj_type *ktype = get_ktype(kobj); ++ ++ if (ktype == &device_ktype) { ++ struct device *dev = kobj_to_dev(kobj); ++ if (dev->bus) ++ return 1; ++ if (dev->class) ++ return 1; ++ } ++ return 0; ++} ++ ++static const char *dev_uevent_name(struct kset *kset, struct kobject *kobj) ++{ ++ struct device *dev = kobj_to_dev(kobj); ++ ++ if (dev->bus) ++ return dev->bus->name; ++ if (dev->class) ++ return dev->class->name; ++ return NULL; ++} ++ ++static int dev_uevent(struct kset *kset, struct kobject *kobj, ++ struct kobj_uevent_env *env) ++{ ++ struct device *dev = kobj_to_dev(kobj); ++ int retval = 0; ++ ++ /* add device node properties if present */ ++ if (MAJOR(dev->devt)) { ++ const char *tmp; ++ const char *name; ++ umode_t mode = 0; ++ kuid_t uid = GLOBAL_ROOT_UID; ++ kgid_t gid = GLOBAL_ROOT_GID; ++ ++ add_uevent_var(env, "MAJOR=%u", MAJOR(dev->devt)); ++ add_uevent_var(env, "MINOR=%u", MINOR(dev->devt)); ++ name = device_get_devnode(dev, &mode, &uid, &gid, &tmp); ++ if (name) { ++ add_uevent_var(env, "DEVNAME=%s", name); ++ if (mode) ++ add_uevent_var(env, "DEVMODE=%#o", mode & 0777); ++ if (!uid_eq(uid, GLOBAL_ROOT_UID)) ++ add_uevent_var(env, "DEVUID=%u", from_kuid(&init_user_ns, uid)); ++ if (!gid_eq(gid, GLOBAL_ROOT_GID)) ++ add_uevent_var(env, "DEVGID=%u", from_kgid(&init_user_ns, gid)); ++ kfree(tmp); ++ } ++ } ++ ++ if (dev->type && dev->type->name) ++ add_uevent_var(env, "DEVTYPE=%s", dev->type->name); ++ ++ if (dev->driver) ++ add_uevent_var(env, "DRIVER=%s", dev->driver->name); ++ ++ /* Add common DT information about the device */ ++ of_device_uevent(dev, env); ++ ++ /* have the bus specific function add its stuff */ ++ if (dev->bus && dev->bus->uevent) { ++ retval = dev->bus->uevent(dev, env); ++ if (retval) ++ pr_debug("device: '%s': %s: bus uevent() returned %d\n", ++ dev_name(dev), __func__, retval); ++ } ++ ++ /* have the class specific function add its stuff */ ++ if (dev->class && dev->class->dev_uevent) { ++ retval = dev->class->dev_uevent(dev, env); ++ if (retval) ++ pr_debug("device: '%s': %s: class uevent() " ++ "returned %d\n", dev_name(dev), ++ __func__, retval); ++ } ++ ++ /* have the device type specific function add its stuff */ ++ if (dev->type && dev->type->uevent) { ++ retval = dev->type->uevent(dev, env); ++ if (retval) ++ pr_debug("device: '%s': %s: dev_type uevent() " ++ "returned %d\n", dev_name(dev), ++ __func__, retval); ++ } ++ ++ return retval; ++} ++ ++static const struct kset_uevent_ops device_uevent_ops = { ++ .filter = dev_uevent_filter, ++ .name = dev_uevent_name, ++ .uevent = dev_uevent, ++}; ++ ++static ssize_t uevent_show(struct device *dev, struct device_attribute *attr, ++ char *buf) ++{ ++ struct kobject *top_kobj; ++ struct kset *kset; ++ struct kobj_uevent_env *env = NULL; ++ int i; ++ size_t count = 0; ++ int retval; ++ ++ /* search the kset, the device belongs to */ ++ top_kobj = &dev->kobj; ++ while (!top_kobj->kset && top_kobj->parent) ++ top_kobj = top_kobj->parent; ++ if (!top_kobj->kset) ++ goto out; ++ ++ kset = top_kobj->kset; ++ if (!kset->uevent_ops || !kset->uevent_ops->uevent) ++ goto out; ++ ++ /* respect filter */ ++ if (kset->uevent_ops && kset->uevent_ops->filter) ++ if (!kset->uevent_ops->filter(kset, &dev->kobj)) ++ goto out; ++ ++ env = kzalloc(sizeof(struct kobj_uevent_env), GFP_KERNEL); ++ if (!env) ++ return -ENOMEM; ++ ++ /* let the kset specific function add its keys */ ++ retval = kset->uevent_ops->uevent(kset, &dev->kobj, env); ++ if (retval) ++ goto out; ++ ++ /* copy keys to file */ ++ for (i = 0; i < env->envp_idx; i++) ++ count += sprintf(&buf[count], "%s\n", env->envp[i]); ++out: ++ kfree(env); ++ return count; ++} ++ ++static ssize_t uevent_store(struct device *dev, struct device_attribute *attr, ++ const char *buf, size_t count) ++{ ++ int rc; ++ ++ rc = kobject_synth_uevent(&dev->kobj, buf, count); ++ ++ if (rc) { ++ dev_err(dev, "uevent: failed to send synthetic uevent\n"); ++ return rc; ++ } ++ ++ return count; ++} ++static DEVICE_ATTR_RW(uevent); ++ ++static ssize_t online_show(struct device *dev, struct device_attribute *attr, ++ char *buf) ++{ ++ bool val; ++ ++ device_lock(dev); ++ val = !dev->offline; ++ device_unlock(dev); ++ return sprintf(buf, "%u\n", val); ++} ++ ++static ssize_t online_store(struct device *dev, struct device_attribute *attr, ++ const char *buf, size_t count) ++{ ++ bool val; ++ int ret; ++ ++ ret = strtobool(buf, &val); ++ if (ret < 0) ++ return ret; ++ ++ ret = lock_device_hotplug_sysfs(); ++ if (ret) ++ return ret; ++ ++ ret = val ? device_online(dev) : device_offline(dev); ++ unlock_device_hotplug(); ++ return ret < 0 ? ret : count; ++} ++static DEVICE_ATTR_RW(online); ++ ++int device_add_groups(struct device *dev, const struct attribute_group **groups) ++{ ++ return sysfs_create_groups(&dev->kobj, groups); ++} ++EXPORT_SYMBOL_GPL(device_add_groups); ++ ++void device_remove_groups(struct device *dev, ++ const struct attribute_group **groups) ++{ ++ sysfs_remove_groups(&dev->kobj, groups); ++} ++EXPORT_SYMBOL_GPL(device_remove_groups); ++ ++union device_attr_group_devres { ++ const struct attribute_group *group; ++ const struct attribute_group **groups; ++}; ++ ++static int devm_attr_group_match(struct device *dev, void *res, void *data) ++{ ++ return ((union device_attr_group_devres *)res)->group == data; ++} ++ ++static void devm_attr_group_remove(struct device *dev, void *res) ++{ ++ union device_attr_group_devres *devres = res; ++ const struct attribute_group *group = devres->group; ++ ++ dev_dbg(dev, "%s: removing group %p\n", __func__, group); ++ sysfs_remove_group(&dev->kobj, group); ++} ++ ++static void devm_attr_groups_remove(struct device *dev, void *res) ++{ ++ union device_attr_group_devres *devres = res; ++ const struct attribute_group **groups = devres->groups; ++ ++ dev_dbg(dev, "%s: removing groups %p\n", __func__, groups); ++ sysfs_remove_groups(&dev->kobj, groups); ++} ++ ++/** ++ * devm_device_add_group - given a device, create a managed attribute group ++ * @dev: The device to create the group for ++ * @grp: The attribute group to create ++ * ++ * This function creates a group for the first time. It will explicitly ++ * warn and error if any of the attribute files being created already exist. ++ * ++ * Returns 0 on success or error code on failure. ++ */ ++int devm_device_add_group(struct device *dev, const struct attribute_group *grp) ++{ ++ union device_attr_group_devres *devres; ++ int error; ++ ++ devres = devres_alloc(devm_attr_group_remove, ++ sizeof(*devres), GFP_KERNEL); ++ if (!devres) ++ return -ENOMEM; ++ ++ error = sysfs_create_group(&dev->kobj, grp); ++ if (error) { ++ devres_free(devres); ++ return error; ++ } ++ ++ devres->group = grp; ++ devres_add(dev, devres); ++ return 0; ++} ++EXPORT_SYMBOL_GPL(devm_device_add_group); ++ ++/** ++ * devm_device_remove_group: remove a managed group from a device ++ * @dev: device to remove the group from ++ * @grp: group to remove ++ * ++ * This function removes a group of attributes from a device. The attributes ++ * previously have to have been created for this group, otherwise it will fail. ++ */ ++void devm_device_remove_group(struct device *dev, ++ const struct attribute_group *grp) ++{ ++ WARN_ON(devres_release(dev, devm_attr_group_remove, ++ devm_attr_group_match, ++ /* cast away const */ (void *)grp)); ++} ++EXPORT_SYMBOL_GPL(devm_device_remove_group); ++ ++/** ++ * devm_device_add_groups - create a bunch of managed attribute groups ++ * @dev: The device to create the group for ++ * @groups: The attribute groups to create, NULL terminated ++ * ++ * This function creates a bunch of managed attribute groups. If an error ++ * occurs when creating a group, all previously created groups will be ++ * removed, unwinding everything back to the original state when this ++ * function was called. It will explicitly warn and error if any of the ++ * attribute files being created already exist. ++ * ++ * Returns 0 on success or error code from sysfs_create_group on failure. ++ */ ++int devm_device_add_groups(struct device *dev, ++ const struct attribute_group **groups) ++{ ++ union device_attr_group_devres *devres; ++ int error; ++ ++ devres = devres_alloc(devm_attr_groups_remove, ++ sizeof(*devres), GFP_KERNEL); ++ if (!devres) ++ return -ENOMEM; ++ ++ error = sysfs_create_groups(&dev->kobj, groups); ++ if (error) { ++ devres_free(devres); ++ return error; ++ } ++ ++ devres->groups = groups; ++ devres_add(dev, devres); ++ return 0; ++} ++EXPORT_SYMBOL_GPL(devm_device_add_groups); ++ ++/** ++ * devm_device_remove_groups - remove a list of managed groups ++ * ++ * @dev: The device for the groups to be removed from ++ * @groups: NULL terminated list of groups to be removed ++ * ++ * If groups is not NULL, remove the specified groups from the device. ++ */ ++void devm_device_remove_groups(struct device *dev, ++ const struct attribute_group **groups) ++{ ++ WARN_ON(devres_release(dev, devm_attr_groups_remove, ++ devm_attr_group_match, ++ /* cast away const */ (void *)groups)); ++} ++EXPORT_SYMBOL_GPL(devm_device_remove_groups); ++ ++static int device_add_attrs(struct device *dev) ++{ ++ struct class *class = dev->class; ++ const struct device_type *type = dev->type; ++ int error; ++ ++ if (class) { ++ error = device_add_groups(dev, class->dev_groups); ++ if (error) ++ return error; ++ } ++ ++ if (type) { ++ error = device_add_groups(dev, type->groups); ++ if (error) ++ goto err_remove_class_groups; ++ } ++ ++ error = device_add_groups(dev, dev->groups); ++ if (error) ++ goto err_remove_type_groups; ++ ++ if (device_supports_offline(dev) && !dev->offline_disabled) { ++ error = device_create_file(dev, &dev_attr_online); ++ if (error) ++ goto err_remove_dev_groups; ++ } ++ ++ return 0; ++ ++ err_remove_dev_groups: ++ device_remove_groups(dev, dev->groups); ++ err_remove_type_groups: ++ if (type) ++ device_remove_groups(dev, type->groups); ++ err_remove_class_groups: ++ if (class) ++ device_remove_groups(dev, class->dev_groups); ++ ++ return error; ++} ++ ++static void device_remove_attrs(struct device *dev) ++{ ++ struct class *class = dev->class; ++ const struct device_type *type = dev->type; ++ ++ device_remove_file(dev, &dev_attr_online); ++ device_remove_groups(dev, dev->groups); ++ ++ if (type) ++ device_remove_groups(dev, type->groups); ++ ++ if (class) ++ device_remove_groups(dev, class->dev_groups); ++} ++ ++static ssize_t dev_show(struct device *dev, struct device_attribute *attr, ++ char *buf) ++{ ++ return print_dev_t(buf, dev->devt); ++} ++static DEVICE_ATTR_RO(dev); ++ ++/* /sys/devices/ */ ++struct kset *devices_kset; ++ ++/** ++ * devices_kset_move_before - Move device in the devices_kset's list. ++ * @deva: Device to move. ++ * @devb: Device @deva should come before. ++ */ ++static void devices_kset_move_before(struct device *deva, struct device *devb) ++{ ++ if (!devices_kset) ++ return; ++ pr_debug("devices_kset: Moving %s before %s\n", ++ dev_name(deva), dev_name(devb)); ++ spin_lock(&devices_kset->list_lock); ++ list_move_tail(&deva->kobj.entry, &devb->kobj.entry); ++ spin_unlock(&devices_kset->list_lock); ++} ++ ++/** ++ * devices_kset_move_after - Move device in the devices_kset's list. ++ * @deva: Device to move ++ * @devb: Device @deva should come after. ++ */ ++static void devices_kset_move_after(struct device *deva, struct device *devb) ++{ ++ if (!devices_kset) ++ return; ++ pr_debug("devices_kset: Moving %s after %s\n", ++ dev_name(deva), dev_name(devb)); ++ spin_lock(&devices_kset->list_lock); ++ list_move(&deva->kobj.entry, &devb->kobj.entry); ++ spin_unlock(&devices_kset->list_lock); ++} ++ ++/** ++ * devices_kset_move_last - move the device to the end of devices_kset's list. ++ * @dev: device to move ++ */ ++void devices_kset_move_last(struct device *dev) ++{ ++ if (!devices_kset) ++ return; ++ pr_debug("devices_kset: Moving %s to end of list\n", dev_name(dev)); ++ spin_lock(&devices_kset->list_lock); ++ list_move_tail(&dev->kobj.entry, &devices_kset->list); ++ spin_unlock(&devices_kset->list_lock); ++} ++ ++/** ++ * device_create_file - create sysfs attribute file for device. ++ * @dev: device. ++ * @attr: device attribute descriptor. ++ */ ++int device_create_file(struct device *dev, ++ const struct device_attribute *attr) ++{ ++ int error = 0; ++ ++ if (dev) { ++ WARN(((attr->attr.mode & S_IWUGO) && !attr->store), ++ "Attribute %s: write permission without 'store'\n", ++ attr->attr.name); ++ WARN(((attr->attr.mode & S_IRUGO) && !attr->show), ++ "Attribute %s: read permission without 'show'\n", ++ attr->attr.name); ++ error = sysfs_create_file(&dev->kobj, &attr->attr); ++ } ++ ++ return error; ++} ++EXPORT_SYMBOL_GPL(device_create_file); ++ ++/** ++ * device_remove_file - remove sysfs attribute file. ++ * @dev: device. ++ * @attr: device attribute descriptor. ++ */ ++void device_remove_file(struct device *dev, ++ const struct device_attribute *attr) ++{ ++ if (dev) ++ sysfs_remove_file(&dev->kobj, &attr->attr); ++} ++EXPORT_SYMBOL_GPL(device_remove_file); ++ ++/** ++ * device_remove_file_self - remove sysfs attribute file from its own method. ++ * @dev: device. ++ * @attr: device attribute descriptor. ++ * ++ * See kernfs_remove_self() for details. ++ */ ++bool device_remove_file_self(struct device *dev, ++ const struct device_attribute *attr) ++{ ++ if (dev) ++ return sysfs_remove_file_self(&dev->kobj, &attr->attr); ++ else ++ return false; ++} ++EXPORT_SYMBOL_GPL(device_remove_file_self); ++ ++/** ++ * device_create_bin_file - create sysfs binary attribute file for device. ++ * @dev: device. ++ * @attr: device binary attribute descriptor. ++ */ ++int device_create_bin_file(struct device *dev, ++ const struct bin_attribute *attr) ++{ ++ int error = -EINVAL; ++ if (dev) ++ error = sysfs_create_bin_file(&dev->kobj, attr); ++ return error; ++} ++EXPORT_SYMBOL_GPL(device_create_bin_file); ++ ++/** ++ * device_remove_bin_file - remove sysfs binary attribute file ++ * @dev: device. ++ * @attr: device binary attribute descriptor. ++ */ ++void device_remove_bin_file(struct device *dev, ++ const struct bin_attribute *attr) ++{ ++ if (dev) ++ sysfs_remove_bin_file(&dev->kobj, attr); ++} ++EXPORT_SYMBOL_GPL(device_remove_bin_file); ++ ++static void klist_children_get(struct klist_node *n) ++{ ++ struct device_private *p = to_device_private_parent(n); ++ struct device *dev = p->device; ++ ++ get_device(dev); ++} ++ ++static void klist_children_put(struct klist_node *n) ++{ ++ struct device_private *p = to_device_private_parent(n); ++ struct device *dev = p->device; ++ ++ put_device(dev); ++} ++ ++/** ++ * device_initialize - init device structure. ++ * @dev: device. ++ * ++ * This prepares the device for use by other layers by initializing ++ * its fields. ++ * It is the first half of device_register(), if called by ++ * that function, though it can also be called separately, so one ++ * may use @dev's fields. In particular, get_device()/put_device() ++ * may be used for reference counting of @dev after calling this ++ * function. ++ * ++ * All fields in @dev must be initialized by the caller to 0, except ++ * for those explicitly set to some other value. The simplest ++ * approach is to use kzalloc() to allocate the structure containing ++ * @dev. ++ * ++ * NOTE: Use put_device() to give up your reference instead of freeing ++ * @dev directly once you have called this function. ++ */ ++void device_initialize(struct device *dev) ++{ ++ dev->kobj.kset = devices_kset; ++ kobject_init(&dev->kobj, &device_ktype); ++ INIT_LIST_HEAD(&dev->dma_pools); ++ mutex_init(&dev->mutex); ++ lockdep_set_novalidate_class(&dev->mutex); ++ spin_lock_init(&dev->devres_lock); ++ INIT_LIST_HEAD(&dev->devres_head); ++ device_pm_init(dev); ++ set_dev_node(dev, -1); ++#ifdef CONFIG_GENERIC_MSI_IRQ ++ INIT_LIST_HEAD(&dev->msi_list); ++#endif ++ INIT_LIST_HEAD(&dev->links.consumers); ++ INIT_LIST_HEAD(&dev->links.suppliers); ++ dev->links.status = DL_DEV_NO_DRIVER; ++} ++EXPORT_SYMBOL_GPL(device_initialize); ++ ++struct kobject *virtual_device_parent(struct device *dev) ++{ ++ static struct kobject *virtual_dir = NULL; ++ ++ if (!virtual_dir) ++ virtual_dir = kobject_create_and_add("virtual", ++ &devices_kset->kobj); ++ ++ return virtual_dir; ++} ++ ++struct class_dir { ++ struct kobject kobj; ++ struct class *class; ++}; ++ ++#define to_class_dir(obj) container_of(obj, struct class_dir, kobj) ++ ++static void class_dir_release(struct kobject *kobj) ++{ ++ struct class_dir *dir = to_class_dir(kobj); ++ kfree(dir); ++} ++ ++static const ++struct kobj_ns_type_operations *class_dir_child_ns_type(struct kobject *kobj) ++{ ++ struct class_dir *dir = to_class_dir(kobj); ++ return dir->class->ns_type; ++} ++ ++static struct kobj_type class_dir_ktype = { ++ .release = class_dir_release, ++ .sysfs_ops = &kobj_sysfs_ops, ++ .child_ns_type = class_dir_child_ns_type ++}; ++ ++static struct kobject * ++class_dir_create_and_add(struct class *class, struct kobject *parent_kobj) ++{ ++ struct class_dir *dir; ++ int retval; ++ ++ dir = kzalloc(sizeof(*dir), GFP_KERNEL); ++ if (!dir) ++ return ERR_PTR(-ENOMEM); ++ ++ dir->class = class; ++ kobject_init(&dir->kobj, &class_dir_ktype); ++ ++ dir->kobj.kset = &class->p->glue_dirs; ++ ++ retval = kobject_add(&dir->kobj, parent_kobj, "%s", class->name); ++ if (retval < 0) { ++ kobject_put(&dir->kobj); ++ return ERR_PTR(retval); ++ } ++ return &dir->kobj; ++} ++ ++static DEFINE_MUTEX(gdp_mutex); ++ ++static struct kobject *get_device_parent(struct device *dev, ++ struct device *parent) ++{ ++ if (dev->class) { ++ struct kobject *kobj = NULL; ++ struct kobject *parent_kobj; ++ struct kobject *k; ++ ++#ifdef CONFIG_BLOCK ++ /* block disks show up in /sys/block */ ++ if (sysfs_deprecated && dev->class == &block_class) { ++ if (parent && parent->class == &block_class) ++ return &parent->kobj; ++ return &block_class.p->subsys.kobj; ++ } ++#endif ++ ++ /* ++ * If we have no parent, we live in "virtual". ++ * Class-devices with a non class-device as parent, live ++ * in a "glue" directory to prevent namespace collisions. ++ */ ++ if (parent == NULL) ++ parent_kobj = virtual_device_parent(dev); ++ else if (parent->class && !dev->class->ns_type) ++ return &parent->kobj; ++ else ++ parent_kobj = &parent->kobj; ++ ++ mutex_lock(&gdp_mutex); ++ ++ /* find our class-directory at the parent and reference it */ ++ spin_lock(&dev->class->p->glue_dirs.list_lock); ++ list_for_each_entry(k, &dev->class->p->glue_dirs.list, entry) ++ if (k->parent == parent_kobj) { ++ kobj = kobject_get(k); ++ break; ++ } ++ spin_unlock(&dev->class->p->glue_dirs.list_lock); ++ if (kobj) { ++ mutex_unlock(&gdp_mutex); ++ return kobj; ++ } ++ ++ /* or create a new class-directory at the parent device */ ++ k = class_dir_create_and_add(dev->class, parent_kobj); ++ /* do not emit an uevent for this simple "glue" directory */ ++ mutex_unlock(&gdp_mutex); ++ return k; ++ } ++ ++ /* subsystems can specify a default root directory for their devices */ ++ if (!parent && dev->bus && dev->bus->dev_root) ++ return &dev->bus->dev_root->kobj; ++ ++ if (parent) ++ return &parent->kobj; ++ return NULL; ++} ++ ++static inline bool live_in_glue_dir(struct kobject *kobj, ++ struct device *dev) ++{ ++ if (!kobj || !dev->class || ++ kobj->kset != &dev->class->p->glue_dirs) ++ return false; ++ return true; ++} ++ ++static inline struct kobject *get_glue_dir(struct device *dev) ++{ ++ return dev->kobj.parent; ++} ++ ++/* ++ * make sure cleaning up dir as the last step, we need to make ++ * sure .release handler of kobject is run with holding the ++ * global lock ++ */ ++static void cleanup_glue_dir(struct device *dev, struct kobject *glue_dir) ++{ ++ unsigned int ref; ++ ++ /* see if we live in a "glue" directory */ ++ if (!live_in_glue_dir(glue_dir, dev)) ++ return; ++ ++ mutex_lock(&gdp_mutex); ++ /** ++ * There is a race condition between removing glue directory ++ * and adding a new device under the glue directory. ++ * ++ * CPU1: CPU2: ++ * ++ * device_add() ++ * get_device_parent() ++ * class_dir_create_and_add() ++ * kobject_add_internal() ++ * create_dir() // create glue_dir ++ * ++ * device_add() ++ * get_device_parent() ++ * kobject_get() // get glue_dir ++ * ++ * device_del() ++ * cleanup_glue_dir() ++ * kobject_del(glue_dir) ++ * ++ * kobject_add() ++ * kobject_add_internal() ++ * create_dir() // in glue_dir ++ * sysfs_create_dir_ns() ++ * kernfs_create_dir_ns(sd) ++ * ++ * sysfs_remove_dir() // glue_dir->sd=NULL ++ * sysfs_put() // free glue_dir->sd ++ * ++ * // sd is freed ++ * kernfs_new_node(sd) ++ * kernfs_get(glue_dir) ++ * kernfs_add_one() ++ * kernfs_put() ++ * ++ * Before CPU1 remove last child device under glue dir, if CPU2 add ++ * a new device under glue dir, the glue_dir kobject reference count ++ * will be increase to 2 in kobject_get(k). And CPU2 has been called ++ * kernfs_create_dir_ns(). Meanwhile, CPU1 call sysfs_remove_dir() ++ * and sysfs_put(). This result in glue_dir->sd is freed. ++ * ++ * Then the CPU2 will see a stale "empty" but still potentially used ++ * glue dir around in kernfs_new_node(). ++ * ++ * In order to avoid this happening, we also should make sure that ++ * kernfs_node for glue_dir is released in CPU1 only when refcount ++ * for glue_dir kobj is 1. ++ */ ++ ref = kref_read(&glue_dir->kref); ++ if (!kobject_has_children(glue_dir) && !--ref) ++ kobject_del(glue_dir); ++ kobject_put(glue_dir); ++ mutex_unlock(&gdp_mutex); ++} ++ ++static int device_add_class_symlinks(struct device *dev) ++{ ++ struct device_node *of_node = dev_of_node(dev); ++ int error; ++ ++ if (of_node) { ++ error = sysfs_create_link(&dev->kobj, of_node_kobj(of_node), "of_node"); ++ if (error) ++ dev_warn(dev, "Error %d creating of_node link\n",error); ++ /* An error here doesn't warrant bringing down the device */ ++ } ++ ++ if (!dev->class) ++ return 0; ++ ++ error = sysfs_create_link(&dev->kobj, ++ &dev->class->p->subsys.kobj, ++ "subsystem"); ++ if (error) ++ goto out_devnode; ++ ++ if (dev->parent && device_is_not_partition(dev)) { ++ error = sysfs_create_link(&dev->kobj, &dev->parent->kobj, ++ "device"); ++ if (error) ++ goto out_subsys; ++ } ++ ++#ifdef CONFIG_BLOCK ++ /* /sys/block has directories and does not need symlinks */ ++ if (sysfs_deprecated && dev->class == &block_class) ++ return 0; ++#endif ++ ++ /* link in the class directory pointing to the device */ ++ error = sysfs_create_link(&dev->class->p->subsys.kobj, ++ &dev->kobj, dev_name(dev)); ++ if (error) ++ goto out_device; ++ ++ return 0; ++ ++out_device: ++ sysfs_remove_link(&dev->kobj, "device"); ++ ++out_subsys: ++ sysfs_remove_link(&dev->kobj, "subsystem"); ++out_devnode: ++ sysfs_remove_link(&dev->kobj, "of_node"); ++ return error; ++} ++ ++static void device_remove_class_symlinks(struct device *dev) ++{ ++ if (dev_of_node(dev)) ++ sysfs_remove_link(&dev->kobj, "of_node"); ++ ++ if (!dev->class) ++ return; ++ ++ if (dev->parent && device_is_not_partition(dev)) ++ sysfs_remove_link(&dev->kobj, "device"); ++ sysfs_remove_link(&dev->kobj, "subsystem"); ++#ifdef CONFIG_BLOCK ++ if (sysfs_deprecated && dev->class == &block_class) ++ return; ++#endif ++ sysfs_delete_link(&dev->class->p->subsys.kobj, &dev->kobj, dev_name(dev)); ++} ++ ++/** ++ * dev_set_name - set a device name ++ * @dev: device ++ * @fmt: format string for the device's name ++ */ ++int dev_set_name(struct device *dev, const char *fmt, ...) ++{ ++ va_list vargs; ++ int err; ++ ++ va_start(vargs, fmt); ++ err = kobject_set_name_vargs(&dev->kobj, fmt, vargs); ++ va_end(vargs); ++ return err; ++} ++EXPORT_SYMBOL_GPL(dev_set_name); ++ ++/** ++ * device_to_dev_kobj - select a /sys/dev/ directory for the device ++ * @dev: device ++ * ++ * By default we select char/ for new entries. Setting class->dev_obj ++ * to NULL prevents an entry from being created. class->dev_kobj must ++ * be set (or cleared) before any devices are registered to the class ++ * otherwise device_create_sys_dev_entry() and ++ * device_remove_sys_dev_entry() will disagree about the presence of ++ * the link. ++ */ ++static struct kobject *device_to_dev_kobj(struct device *dev) ++{ ++ struct kobject *kobj; ++ ++ if (dev->class) ++ kobj = dev->class->dev_kobj; ++ else ++ kobj = sysfs_dev_char_kobj; ++ ++ return kobj; ++} ++ ++static int device_create_sys_dev_entry(struct device *dev) ++{ ++ struct kobject *kobj = device_to_dev_kobj(dev); ++ int error = 0; ++ char devt_str[15]; ++ ++ if (kobj) { ++ format_dev_t(devt_str, dev->devt); ++ error = sysfs_create_link(kobj, &dev->kobj, devt_str); ++ } ++ ++ return error; ++} ++ ++static void device_remove_sys_dev_entry(struct device *dev) ++{ ++ struct kobject *kobj = device_to_dev_kobj(dev); ++ char devt_str[15]; ++ ++ if (kobj) { ++ format_dev_t(devt_str, dev->devt); ++ sysfs_remove_link(kobj, devt_str); ++ } ++} ++ ++static int device_private_init(struct device *dev) ++{ ++ dev->p = kzalloc(sizeof(*dev->p), GFP_KERNEL); ++ if (!dev->p) ++ return -ENOMEM; ++ dev->p->device = dev; ++ klist_init(&dev->p->klist_children, klist_children_get, ++ klist_children_put); ++ INIT_LIST_HEAD(&dev->p->deferred_probe); ++ return 0; ++} ++ ++/** ++ * device_add - add device to device hierarchy. ++ * @dev: device. ++ * ++ * This is part 2 of device_register(), though may be called ++ * separately _iff_ device_initialize() has been called separately. ++ * ++ * This adds @dev to the kobject hierarchy via kobject_add(), adds it ++ * to the global and sibling lists for the device, then ++ * adds it to the other relevant subsystems of the driver model. ++ * ++ * Do not call this routine or device_register() more than once for ++ * any device structure. The driver model core is not designed to work ++ * with devices that get unregistered and then spring back to life. ++ * (Among other things, it's very hard to guarantee that all references ++ * to the previous incarnation of @dev have been dropped.) Allocate ++ * and register a fresh new struct device instead. ++ * ++ * NOTE: _Never_ directly free @dev after calling this function, even ++ * if it returned an error! Always use put_device() to give up your ++ * reference instead. ++ */ ++int device_add(struct device *dev) ++{ ++ struct device *parent; ++ struct kobject *kobj; ++ struct class_interface *class_intf; ++ int error = -EINVAL; ++ struct kobject *glue_dir = NULL; ++ ++ dev = get_device(dev); ++ if (!dev) ++ goto done; ++ ++ if (!dev->p) { ++ error = device_private_init(dev); ++ if (error) ++ goto done; ++ } ++ ++ /* ++ * for statically allocated devices, which should all be converted ++ * some day, we need to initialize the name. We prevent reading back ++ * the name, and force the use of dev_name() ++ */ ++ if (dev->init_name) { ++ dev_set_name(dev, "%s", dev->init_name); ++ dev->init_name = NULL; ++ } ++ ++ /* subsystems can specify simple device enumeration */ ++ if (!dev_name(dev) && dev->bus && dev->bus->dev_name) ++ dev_set_name(dev, "%s%u", dev->bus->dev_name, dev->id); ++ ++ if (!dev_name(dev)) { ++ error = -EINVAL; ++ goto name_error; ++ } ++ ++ pr_debug("device: '%s': %s\n", dev_name(dev), __func__); ++ ++ parent = get_device(dev->parent); ++ kobj = get_device_parent(dev, parent); ++ if (IS_ERR(kobj)) { ++ error = PTR_ERR(kobj); ++ goto parent_error; ++ } ++ if (kobj) ++ dev->kobj.parent = kobj; ++ ++ /* use parent numa_node */ ++ if (parent && (dev_to_node(dev) == NUMA_NO_NODE)) ++ set_dev_node(dev, dev_to_node(parent)); ++ ++ /* first, register with generic layer. */ ++ /* we require the name to be set before, and pass NULL */ ++ error = kobject_add(&dev->kobj, dev->kobj.parent, NULL); ++ if (error) { ++ glue_dir = get_glue_dir(dev); ++ goto Error; ++ } ++ ++ /* notify platform of device entry */ ++ if (platform_notify) ++ platform_notify(dev); ++ ++ error = device_create_file(dev, &dev_attr_uevent); ++ if (error) ++ goto attrError; ++ ++ error = device_add_class_symlinks(dev); ++ if (error) ++ goto SymlinkError; ++ error = device_add_attrs(dev); ++ if (error) ++ goto AttrsError; ++ error = bus_add_device(dev); ++ if (error) ++ goto BusError; ++ error = dpm_sysfs_add(dev); ++ if (error) ++ goto DPMError; ++ device_pm_add(dev); ++ ++ if (MAJOR(dev->devt)) { ++ error = device_create_file(dev, &dev_attr_dev); ++ if (error) ++ goto DevAttrError; ++ ++ error = device_create_sys_dev_entry(dev); ++ if (error) ++ goto SysEntryError; ++ ++ devtmpfs_create_node(dev); ++ } ++ ++ /* Notify clients of device addition. This call must come ++ * after dpm_sysfs_add() and before kobject_uevent(). ++ */ ++ if (dev->bus) ++ blocking_notifier_call_chain(&dev->bus->p->bus_notifier, ++ BUS_NOTIFY_ADD_DEVICE, dev); ++ ++ kobject_uevent(&dev->kobj, KOBJ_ADD); ++ bus_probe_device(dev); ++ if (parent) ++ klist_add_tail(&dev->p->knode_parent, ++ &parent->p->klist_children); ++ ++ if (dev->class) { ++ mutex_lock(&dev->class->p->mutex); ++ /* tie the class to the device */ ++ klist_add_tail(&dev->knode_class, ++ &dev->class->p->klist_devices); ++ ++ /* notify any interfaces that the device is here */ ++ list_for_each_entry(class_intf, ++ &dev->class->p->interfaces, node) ++ if (class_intf->add_dev) ++ class_intf->add_dev(dev, class_intf); ++ mutex_unlock(&dev->class->p->mutex); ++ } ++done: ++ put_device(dev); ++ return error; ++ SysEntryError: ++ if (MAJOR(dev->devt)) ++ device_remove_file(dev, &dev_attr_dev); ++ DevAttrError: ++ device_pm_remove(dev); ++ dpm_sysfs_remove(dev); ++ DPMError: ++ bus_remove_device(dev); ++ BusError: ++ device_remove_attrs(dev); ++ AttrsError: ++ device_remove_class_symlinks(dev); ++ SymlinkError: ++ device_remove_file(dev, &dev_attr_uevent); ++ attrError: ++ kobject_uevent(&dev->kobj, KOBJ_REMOVE); ++ glue_dir = get_glue_dir(dev); ++ kobject_del(&dev->kobj); ++ Error: ++ cleanup_glue_dir(dev, glue_dir); ++parent_error: ++ put_device(parent); ++name_error: ++ kfree(dev->p); ++ dev->p = NULL; ++ goto done; ++} ++EXPORT_SYMBOL_GPL(device_add); ++ ++/** ++ * device_register - register a device with the system. ++ * @dev: pointer to the device structure ++ * ++ * This happens in two clean steps - initialize the device ++ * and add it to the system. The two steps can be called ++ * separately, but this is the easiest and most common. ++ * I.e. you should only call the two helpers separately if ++ * have a clearly defined need to use and refcount the device ++ * before it is added to the hierarchy. ++ * ++ * For more information, see the kerneldoc for device_initialize() ++ * and device_add(). ++ * ++ * NOTE: _Never_ directly free @dev after calling this function, even ++ * if it returned an error! Always use put_device() to give up the ++ * reference initialized in this function instead. ++ */ ++int device_register(struct device *dev) ++{ ++ device_initialize(dev); ++ return device_add(dev); ++} ++EXPORT_SYMBOL_GPL(device_register); ++ ++/** ++ * get_device - increment reference count for device. ++ * @dev: device. ++ * ++ * This simply forwards the call to kobject_get(), though ++ * we do take care to provide for the case that we get a NULL ++ * pointer passed in. ++ */ ++struct device *get_device(struct device *dev) ++{ ++ return dev ? kobj_to_dev(kobject_get(&dev->kobj)) : NULL; ++} ++EXPORT_SYMBOL_GPL(get_device); ++ ++/** ++ * put_device - decrement reference count. ++ * @dev: device in question. ++ */ ++void put_device(struct device *dev) ++{ ++ /* might_sleep(); */ ++ if (dev) ++ kobject_put(&dev->kobj); ++} ++EXPORT_SYMBOL_GPL(put_device); ++ ++bool kill_device(struct device *dev) ++{ ++ /* ++ * Require the device lock and set the "dead" flag to guarantee that ++ * the update behavior is consistent with the other bitfields near ++ * it and that we cannot have an asynchronous probe routine trying ++ * to run while we are tearing out the bus/class/sysfs from ++ * underneath the device. ++ */ ++ lockdep_assert_held(&dev->mutex); ++ ++ if (dev->p->dead) ++ return false; ++ dev->p->dead = true; ++ return true; ++} ++EXPORT_SYMBOL_GPL(kill_device); ++ ++/** ++ * device_del - delete device from system. ++ * @dev: device. ++ * ++ * This is the first part of the device unregistration ++ * sequence. This removes the device from the lists we control ++ * from here, has it removed from the other driver model ++ * subsystems it was added to in device_add(), and removes it ++ * from the kobject hierarchy. ++ * ++ * NOTE: this should be called manually _iff_ device_add() was ++ * also called manually. ++ */ ++void device_del(struct device *dev) ++{ ++ struct device *parent = dev->parent; ++ struct kobject *glue_dir = NULL; ++ struct class_interface *class_intf; ++ ++ device_lock(dev); ++ kill_device(dev); ++ device_unlock(dev); ++ ++ /* Notify clients of device removal. This call must come ++ * before dpm_sysfs_remove(). ++ */ ++ if (dev->bus) ++ blocking_notifier_call_chain(&dev->bus->p->bus_notifier, ++ BUS_NOTIFY_DEL_DEVICE, dev); ++ ++ dpm_sysfs_remove(dev); ++ if (parent) ++ klist_del(&dev->p->knode_parent); ++ if (MAJOR(dev->devt)) { ++ devtmpfs_delete_node(dev); ++ device_remove_sys_dev_entry(dev); ++ device_remove_file(dev, &dev_attr_dev); ++ } ++ if (dev->class) { ++ device_remove_class_symlinks(dev); ++ ++ mutex_lock(&dev->class->p->mutex); ++ /* notify any interfaces that the device is now gone */ ++ list_for_each_entry(class_intf, ++ &dev->class->p->interfaces, node) ++ if (class_intf->remove_dev) ++ class_intf->remove_dev(dev, class_intf); ++ /* remove the device from the class list */ ++ klist_del(&dev->knode_class); ++ mutex_unlock(&dev->class->p->mutex); ++ } ++ device_remove_file(dev, &dev_attr_uevent); ++ device_remove_attrs(dev); ++ bus_remove_device(dev); ++ device_pm_remove(dev); ++ driver_deferred_probe_del(dev); ++ device_remove_properties(dev); ++ device_links_purge(dev); ++ ++ /* Notify the platform of the removal, in case they ++ * need to do anything... ++ */ ++ if (platform_notify_remove) ++ platform_notify_remove(dev); ++ if (dev->bus) ++ blocking_notifier_call_chain(&dev->bus->p->bus_notifier, ++ BUS_NOTIFY_REMOVED_DEVICE, dev); ++ kobject_uevent(&dev->kobj, KOBJ_REMOVE); ++ glue_dir = get_glue_dir(dev); ++ kobject_del(&dev->kobj); ++ cleanup_glue_dir(dev, glue_dir); ++ put_device(parent); ++} ++EXPORT_SYMBOL_GPL(device_del); ++ ++/** ++ * device_unregister - unregister device from system. ++ * @dev: device going away. ++ * ++ * We do this in two parts, like we do device_register(). First, ++ * we remove it from all the subsystems with device_del(), then ++ * we decrement the reference count via put_device(). If that ++ * is the final reference count, the device will be cleaned up ++ * via device_release() above. Otherwise, the structure will ++ * stick around until the final reference to the device is dropped. ++ */ ++void device_unregister(struct device *dev) ++{ ++ pr_debug("device: '%s': %s\n", dev_name(dev), __func__); ++ device_del(dev); ++ put_device(dev); ++} ++EXPORT_SYMBOL_GPL(device_unregister); ++ ++static struct device *prev_device(struct klist_iter *i) ++{ ++ struct klist_node *n = klist_prev(i); ++ struct device *dev = NULL; ++ struct device_private *p; ++ ++ if (n) { ++ p = to_device_private_parent(n); ++ dev = p->device; ++ } ++ return dev; ++} ++ ++static struct device *next_device(struct klist_iter *i) ++{ ++ struct klist_node *n = klist_next(i); ++ struct device *dev = NULL; ++ struct device_private *p; ++ ++ if (n) { ++ p = to_device_private_parent(n); ++ dev = p->device; ++ } ++ return dev; ++} ++ ++/** ++ * device_get_devnode - path of device node file ++ * @dev: device ++ * @mode: returned file access mode ++ * @uid: returned file owner ++ * @gid: returned file group ++ * @tmp: possibly allocated string ++ * ++ * Return the relative path of a possible device node. ++ * Non-default names may need to allocate a memory to compose ++ * a name. This memory is returned in tmp and needs to be ++ * freed by the caller. ++ */ ++const char *device_get_devnode(struct device *dev, ++ umode_t *mode, kuid_t *uid, kgid_t *gid, ++ const char **tmp) ++{ ++ char *s; ++ ++ *tmp = NULL; ++ ++ /* the device type may provide a specific name */ ++ if (dev->type && dev->type->devnode) ++ *tmp = dev->type->devnode(dev, mode, uid, gid); ++ if (*tmp) ++ return *tmp; ++ ++ /* the class may provide a specific name */ ++ if (dev->class && dev->class->devnode) ++ *tmp = dev->class->devnode(dev, mode); ++ if (*tmp) ++ return *tmp; ++ ++ /* return name without allocation, tmp == NULL */ ++ if (strchr(dev_name(dev), '!') == NULL) ++ return dev_name(dev); ++ ++ /* replace '!' in the name with '/' */ ++ s = kstrdup(dev_name(dev), GFP_KERNEL); ++ if (!s) ++ return NULL; ++ strreplace(s, '!', '/'); ++ return *tmp = s; ++} ++ ++/** ++ * device_for_each_child - device child iterator. ++ * @parent: parent struct device. ++ * @fn: function to be called for each device. ++ * @data: data for the callback. ++ * ++ * Iterate over @parent's child devices, and call @fn for each, ++ * passing it @data. ++ * ++ * We check the return of @fn each time. If it returns anything ++ * other than 0, we break out and return that value. ++ */ ++int device_for_each_child(struct device *parent, void *data, ++ int (*fn)(struct device *dev, void *data)) ++{ ++ struct klist_iter i; ++ struct device *child; ++ int error = 0; ++ ++ if (!parent->p) ++ return 0; ++ ++ klist_iter_init(&parent->p->klist_children, &i); ++ while (!error && (child = next_device(&i))) ++ error = fn(child, data); ++ klist_iter_exit(&i); ++ return error; ++} ++EXPORT_SYMBOL_GPL(device_for_each_child); ++ ++/** ++ * device_for_each_child_reverse - device child iterator in reversed order. ++ * @parent: parent struct device. ++ * @fn: function to be called for each device. ++ * @data: data for the callback. ++ * ++ * Iterate over @parent's child devices, and call @fn for each, ++ * passing it @data. ++ * ++ * We check the return of @fn each time. If it returns anything ++ * other than 0, we break out and return that value. ++ */ ++int device_for_each_child_reverse(struct device *parent, void *data, ++ int (*fn)(struct device *dev, void *data)) ++{ ++ struct klist_iter i; ++ struct device *child; ++ int error = 0; ++ ++ if (!parent->p) ++ return 0; ++ ++ klist_iter_init(&parent->p->klist_children, &i); ++ while ((child = prev_device(&i)) && !error) ++ error = fn(child, data); ++ klist_iter_exit(&i); ++ return error; ++} ++EXPORT_SYMBOL_GPL(device_for_each_child_reverse); ++ ++/** ++ * device_find_child - device iterator for locating a particular device. ++ * @parent: parent struct device ++ * @match: Callback function to check device ++ * @data: Data to pass to match function ++ * ++ * This is similar to the device_for_each_child() function above, but it ++ * returns a reference to a device that is 'found' for later use, as ++ * determined by the @match callback. ++ * ++ * The callback should return 0 if the device doesn't match and non-zero ++ * if it does. If the callback returns non-zero and a reference to the ++ * current device can be obtained, this function will return to the caller ++ * and not iterate over any more devices. ++ * ++ * NOTE: you will need to drop the reference with put_device() after use. ++ */ ++struct device *device_find_child(struct device *parent, void *data, ++ int (*match)(struct device *dev, void *data)) ++{ ++ struct klist_iter i; ++ struct device *child; ++ ++ if (!parent) ++ return NULL; ++ ++ klist_iter_init(&parent->p->klist_children, &i); ++ while ((child = next_device(&i))) ++ if (match(child, data) && get_device(child)) ++ break; ++ klist_iter_exit(&i); ++ return child; ++} ++EXPORT_SYMBOL_GPL(device_find_child); ++ ++int __init devices_init(void) ++{ ++ devices_kset = kset_create_and_add("devices", &device_uevent_ops, NULL); ++ if (!devices_kset) ++ return -ENOMEM; ++ dev_kobj = kobject_create_and_add("dev", NULL); ++ if (!dev_kobj) ++ goto dev_kobj_err; ++ sysfs_dev_block_kobj = kobject_create_and_add("block", dev_kobj); ++ if (!sysfs_dev_block_kobj) ++ goto block_kobj_err; ++ sysfs_dev_char_kobj = kobject_create_and_add("char", dev_kobj); ++ if (!sysfs_dev_char_kobj) ++ goto char_kobj_err; ++ ++ return 0; ++ ++ char_kobj_err: ++ kobject_put(sysfs_dev_block_kobj); ++ block_kobj_err: ++ kobject_put(dev_kobj); ++ dev_kobj_err: ++ kset_unregister(devices_kset); ++ return -ENOMEM; ++} ++ ++static int device_check_offline(struct device *dev, void *not_used) ++{ ++ int ret; ++ ++ ret = device_for_each_child(dev, NULL, device_check_offline); ++ if (ret) ++ return ret; ++ ++ return device_supports_offline(dev) && !dev->offline ? -EBUSY : 0; ++} ++ ++/** ++ * device_offline - Prepare the device for hot-removal. ++ * @dev: Device to be put offline. ++ * ++ * Execute the device bus type's .offline() callback, if present, to prepare ++ * the device for a subsequent hot-removal. If that succeeds, the device must ++ * not be used until either it is removed or its bus type's .online() callback ++ * is executed. ++ * ++ * Call under device_hotplug_lock. ++ */ ++int device_offline(struct device *dev) ++{ ++ int ret; ++ ++ if (dev->offline_disabled) ++ return -EPERM; ++ ++ ret = device_for_each_child(dev, NULL, device_check_offline); ++ if (ret) ++ return ret; ++ ++ device_lock(dev); ++ if (device_supports_offline(dev)) { ++ if (dev->offline) { ++ ret = 1; ++ } else { ++ ret = dev->bus->offline(dev); ++ if (!ret) { ++ kobject_uevent(&dev->kobj, KOBJ_OFFLINE); ++ dev->offline = true; ++ } ++ } ++ } ++ device_unlock(dev); ++ ++ return ret; ++} ++ ++/** ++ * device_online - Put the device back online after successful device_offline(). ++ * @dev: Device to be put back online. ++ * ++ * If device_offline() has been successfully executed for @dev, but the device ++ * has not been removed subsequently, execute its bus type's .online() callback ++ * to indicate that the device can be used again. ++ * ++ * Call under device_hotplug_lock. ++ */ ++int device_online(struct device *dev) ++{ ++ int ret = 0; ++ ++ device_lock(dev); ++ if (device_supports_offline(dev)) { ++ if (dev->offline) { ++ ret = dev->bus->online(dev); ++ if (!ret) { ++ kobject_uevent(&dev->kobj, KOBJ_ONLINE); ++ dev->offline = false; ++ } ++ } else { ++ ret = 1; ++ } ++ } ++ device_unlock(dev); ++ ++ return ret; ++} ++ ++struct root_device { ++ struct device dev; ++ struct module *owner; ++}; ++ ++static inline struct root_device *to_root_device(struct device *d) ++{ ++ return container_of(d, struct root_device, dev); ++} ++ ++static void root_device_release(struct device *dev) ++{ ++ kfree(to_root_device(dev)); ++} ++ ++/** ++ * __root_device_register - allocate and register a root device ++ * @name: root device name ++ * @owner: owner module of the root device, usually THIS_MODULE ++ * ++ * This function allocates a root device and registers it ++ * using device_register(). In order to free the returned ++ * device, use root_device_unregister(). ++ * ++ * Root devices are dummy devices which allow other devices ++ * to be grouped under /sys/devices. Use this function to ++ * allocate a root device and then use it as the parent of ++ * any device which should appear under /sys/devices/{name} ++ * ++ * The /sys/devices/{name} directory will also contain a ++ * 'module' symlink which points to the @owner directory ++ * in sysfs. ++ * ++ * Returns &struct device pointer on success, or ERR_PTR() on error. ++ * ++ * Note: You probably want to use root_device_register(). ++ */ ++struct device *__root_device_register(const char *name, struct module *owner) ++{ ++ struct root_device *root; ++ int err = -ENOMEM; ++ ++ root = kzalloc(sizeof(struct root_device), GFP_KERNEL); ++ if (!root) ++ return ERR_PTR(err); ++ ++ err = dev_set_name(&root->dev, "%s", name); ++ if (err) { ++ kfree(root); ++ return ERR_PTR(err); ++ } ++ ++ root->dev.release = root_device_release; ++ ++ err = device_register(&root->dev); ++ if (err) { ++ put_device(&root->dev); ++ return ERR_PTR(err); ++ } ++ ++#ifdef CONFIG_MODULES /* gotta find a "cleaner" way to do this */ ++ if (owner) { ++ struct module_kobject *mk = &owner->mkobj; ++ ++ err = sysfs_create_link(&root->dev.kobj, &mk->kobj, "module"); ++ if (err) { ++ device_unregister(&root->dev); ++ return ERR_PTR(err); ++ } ++ root->owner = owner; ++ } ++#endif ++ ++ return &root->dev; ++} ++EXPORT_SYMBOL_GPL(__root_device_register); ++ ++/** ++ * root_device_unregister - unregister and free a root device ++ * @dev: device going away ++ * ++ * This function unregisters and cleans up a device that was created by ++ * root_device_register(). ++ */ ++void root_device_unregister(struct device *dev) ++{ ++ struct root_device *root = to_root_device(dev); ++ ++ if (root->owner) ++ sysfs_remove_link(&root->dev.kobj, "module"); ++ ++ device_unregister(dev); ++} ++EXPORT_SYMBOL_GPL(root_device_unregister); ++ ++ ++static void device_create_release(struct device *dev) ++{ ++ pr_debug("device: '%s': %s\n", dev_name(dev), __func__); ++ kfree(dev); ++} ++ ++static __printf(6, 0) struct device * ++device_create_groups_vargs(struct class *class, struct device *parent, ++ dev_t devt, void *drvdata, ++ const struct attribute_group **groups, ++ const char *fmt, va_list args) ++{ ++ struct device *dev = NULL; ++ int retval = -ENODEV; ++ ++ if (class == NULL || IS_ERR(class)) ++ goto error; ++ ++ dev = kzalloc(sizeof(*dev), GFP_KERNEL); ++ if (!dev) { ++ retval = -ENOMEM; ++ goto error; ++ } ++ ++ device_initialize(dev); ++ dev->devt = devt; ++ dev->class = class; ++ dev->parent = parent; ++ dev->groups = groups; ++ dev->release = device_create_release; ++ dev_set_drvdata(dev, drvdata); ++ ++ retval = kobject_set_name_vargs(&dev->kobj, fmt, args); ++ if (retval) ++ goto error; ++ ++ retval = device_add(dev); ++ if (retval) ++ goto error; ++ ++ return dev; ++ ++error: ++ put_device(dev); ++ return ERR_PTR(retval); ++} ++ ++/** ++ * device_create_vargs - creates a device and registers it with sysfs ++ * @class: pointer to the struct class that this device should be registered to ++ * @parent: pointer to the parent struct device of this new device, if any ++ * @devt: the dev_t for the char device to be added ++ * @drvdata: the data to be added to the device for callbacks ++ * @fmt: string for the device's name ++ * @args: va_list for the device's name ++ * ++ * This function can be used by char device classes. A struct device ++ * will be created in sysfs, registered to the specified class. ++ * ++ * A "dev" file will be created, showing the dev_t for the device, if ++ * the dev_t is not 0,0. ++ * If a pointer to a parent struct device is passed in, the newly created ++ * struct device will be a child of that device in sysfs. ++ * The pointer to the struct device will be returned from the call. ++ * Any further sysfs files that might be required can be created using this ++ * pointer. ++ * ++ * Returns &struct device pointer on success, or ERR_PTR() on error. ++ * ++ * Note: the struct class passed to this function must have previously ++ * been created with a call to class_create(). ++ */ ++struct device *device_create_vargs(struct class *class, struct device *parent, ++ dev_t devt, void *drvdata, const char *fmt, ++ va_list args) ++{ ++ return device_create_groups_vargs(class, parent, devt, drvdata, NULL, ++ fmt, args); ++} ++EXPORT_SYMBOL_GPL(device_create_vargs); ++ ++/** ++ * device_create - creates a device and registers it with sysfs ++ * @class: pointer to the struct class that this device should be registered to ++ * @parent: pointer to the parent struct device of this new device, if any ++ * @devt: the dev_t for the char device to be added ++ * @drvdata: the data to be added to the device for callbacks ++ * @fmt: string for the device's name ++ * ++ * This function can be used by char device classes. A struct device ++ * will be created in sysfs, registered to the specified class. ++ * ++ * A "dev" file will be created, showing the dev_t for the device, if ++ * the dev_t is not 0,0. ++ * If a pointer to a parent struct device is passed in, the newly created ++ * struct device will be a child of that device in sysfs. ++ * The pointer to the struct device will be returned from the call. ++ * Any further sysfs files that might be required can be created using this ++ * pointer. ++ * ++ * Returns &struct device pointer on success, or ERR_PTR() on error. ++ * ++ * Note: the struct class passed to this function must have previously ++ * been created with a call to class_create(). ++ */ ++struct device *device_create(struct class *class, struct device *parent, ++ dev_t devt, void *drvdata, const char *fmt, ...) ++{ ++ va_list vargs; ++ struct device *dev; ++ ++ va_start(vargs, fmt); ++ dev = device_create_vargs(class, parent, devt, drvdata, fmt, vargs); ++ va_end(vargs); ++ return dev; ++} ++EXPORT_SYMBOL_GPL(device_create); ++ ++/** ++ * device_create_with_groups - creates a device and registers it with sysfs ++ * @class: pointer to the struct class that this device should be registered to ++ * @parent: pointer to the parent struct device of this new device, if any ++ * @devt: the dev_t for the char device to be added ++ * @drvdata: the data to be added to the device for callbacks ++ * @groups: NULL-terminated list of attribute groups to be created ++ * @fmt: string for the device's name ++ * ++ * This function can be used by char device classes. A struct device ++ * will be created in sysfs, registered to the specified class. ++ * Additional attributes specified in the groups parameter will also ++ * be created automatically. ++ * ++ * A "dev" file will be created, showing the dev_t for the device, if ++ * the dev_t is not 0,0. ++ * If a pointer to a parent struct device is passed in, the newly created ++ * struct device will be a child of that device in sysfs. ++ * The pointer to the struct device will be returned from the call. ++ * Any further sysfs files that might be required can be created using this ++ * pointer. ++ * ++ * Returns &struct device pointer on success, or ERR_PTR() on error. ++ * ++ * Note: the struct class passed to this function must have previously ++ * been created with a call to class_create(). ++ */ ++struct device *device_create_with_groups(struct class *class, ++ struct device *parent, dev_t devt, ++ void *drvdata, ++ const struct attribute_group **groups, ++ const char *fmt, ...) ++{ ++ va_list vargs; ++ struct device *dev; ++ ++ va_start(vargs, fmt); ++ dev = device_create_groups_vargs(class, parent, devt, drvdata, groups, ++ fmt, vargs); ++ va_end(vargs); ++ return dev; ++} ++EXPORT_SYMBOL_GPL(device_create_with_groups); ++ ++static int __match_devt(struct device *dev, const void *data) ++{ ++ const dev_t *devt = data; ++ ++ return dev->devt == *devt; ++} ++ ++/** ++ * device_destroy - removes a device that was created with device_create() ++ * @class: pointer to the struct class that this device was registered with ++ * @devt: the dev_t of the device that was previously registered ++ * ++ * This call unregisters and cleans up a device that was created with a ++ * call to device_create(). ++ */ ++void device_destroy(struct class *class, dev_t devt) ++{ ++ struct device *dev; ++ ++ dev = class_find_device(class, NULL, &devt, __match_devt); ++ if (dev) { ++ put_device(dev); ++ device_unregister(dev); ++ } ++} ++EXPORT_SYMBOL_GPL(device_destroy); ++ ++/** ++ * device_rename - renames a device ++ * @dev: the pointer to the struct device to be renamed ++ * @new_name: the new name of the device ++ * ++ * It is the responsibility of the caller to provide mutual ++ * exclusion between two different calls of device_rename ++ * on the same device to ensure that new_name is valid and ++ * won't conflict with other devices. ++ * ++ * Note: Don't call this function. Currently, the networking layer calls this ++ * function, but that will change. The following text from Kay Sievers offers ++ * some insight: ++ * ++ * Renaming devices is racy at many levels, symlinks and other stuff are not ++ * replaced atomically, and you get a "move" uevent, but it's not easy to ++ * connect the event to the old and new device. Device nodes are not renamed at ++ * all, there isn't even support for that in the kernel now. ++ * ++ * In the meantime, during renaming, your target name might be taken by another ++ * driver, creating conflicts. Or the old name is taken directly after you ++ * renamed it -- then you get events for the same DEVPATH, before you even see ++ * the "move" event. It's just a mess, and nothing new should ever rely on ++ * kernel device renaming. Besides that, it's not even implemented now for ++ * other things than (driver-core wise very simple) network devices. ++ * ++ * We are currently about to change network renaming in udev to completely ++ * disallow renaming of devices in the same namespace as the kernel uses, ++ * because we can't solve the problems properly, that arise with swapping names ++ * of multiple interfaces without races. Means, renaming of eth[0-9]* will only ++ * be allowed to some other name than eth[0-9]*, for the aforementioned ++ * reasons. ++ * ++ * Make up a "real" name in the driver before you register anything, or add ++ * some other attributes for userspace to find the device, or use udev to add ++ * symlinks -- but never rename kernel devices later, it's a complete mess. We ++ * don't even want to get into that and try to implement the missing pieces in ++ * the core. We really have other pieces to fix in the driver core mess. :) ++ */ ++int device_rename(struct device *dev, const char *new_name) ++{ ++ struct kobject *kobj = &dev->kobj; ++ char *old_device_name = NULL; ++ int error; ++ ++ dev = get_device(dev); ++ if (!dev) ++ return -EINVAL; ++ ++ dev_dbg(dev, "renaming to %s\n", new_name); ++ ++ old_device_name = kstrdup(dev_name(dev), GFP_KERNEL); ++ if (!old_device_name) { ++ error = -ENOMEM; ++ goto out; ++ } ++ ++ if (dev->class) { ++ error = sysfs_rename_link_ns(&dev->class->p->subsys.kobj, ++ kobj, old_device_name, ++ new_name, kobject_namespace(kobj)); ++ if (error) ++ goto out; ++ } ++ ++ error = kobject_rename(kobj, new_name); ++ if (error) ++ goto out; ++ ++out: ++ put_device(dev); ++ ++ kfree(old_device_name); ++ ++ return error; ++} ++EXPORT_SYMBOL_GPL(device_rename); ++ ++static int device_move_class_links(struct device *dev, ++ struct device *old_parent, ++ struct device *new_parent) ++{ ++ int error = 0; ++ ++ if (old_parent) ++ sysfs_remove_link(&dev->kobj, "device"); ++ if (new_parent) ++ error = sysfs_create_link(&dev->kobj, &new_parent->kobj, ++ "device"); ++ return error; ++} ++ ++/** ++ * device_move - moves a device to a new parent ++ * @dev: the pointer to the struct device to be moved ++ * @new_parent: the new parent of the device (can be NULL) ++ * @dpm_order: how to reorder the dpm_list ++ */ ++int device_move(struct device *dev, struct device *new_parent, ++ enum dpm_order dpm_order) ++{ ++ int error; ++ struct device *old_parent; ++ struct kobject *new_parent_kobj; ++ ++ dev = get_device(dev); ++ if (!dev) ++ return -EINVAL; ++ ++ device_pm_lock(); ++ new_parent = get_device(new_parent); ++ new_parent_kobj = get_device_parent(dev, new_parent); ++ if (IS_ERR(new_parent_kobj)) { ++ error = PTR_ERR(new_parent_kobj); ++ put_device(new_parent); ++ goto out; ++ } ++ ++ pr_debug("device: '%s': %s: moving to '%s'\n", dev_name(dev), ++ __func__, new_parent ? dev_name(new_parent) : ""); ++ error = kobject_move(&dev->kobj, new_parent_kobj); ++ if (error) { ++ cleanup_glue_dir(dev, new_parent_kobj); ++ put_device(new_parent); ++ goto out; ++ } ++ old_parent = dev->parent; ++ dev->parent = new_parent; ++ if (old_parent) ++ klist_remove(&dev->p->knode_parent); ++ if (new_parent) { ++ klist_add_tail(&dev->p->knode_parent, ++ &new_parent->p->klist_children); ++ set_dev_node(dev, dev_to_node(new_parent)); ++ } ++ ++ if (dev->class) { ++ error = device_move_class_links(dev, old_parent, new_parent); ++ if (error) { ++ /* We ignore errors on cleanup since we're hosed anyway... */ ++ device_move_class_links(dev, new_parent, old_parent); ++ if (!kobject_move(&dev->kobj, &old_parent->kobj)) { ++ if (new_parent) ++ klist_remove(&dev->p->knode_parent); ++ dev->parent = old_parent; ++ if (old_parent) { ++ klist_add_tail(&dev->p->knode_parent, ++ &old_parent->p->klist_children); ++ set_dev_node(dev, dev_to_node(old_parent)); ++ } ++ } ++ cleanup_glue_dir(dev, new_parent_kobj); ++ put_device(new_parent); ++ goto out; ++ } ++ } ++ switch (dpm_order) { ++ case DPM_ORDER_NONE: ++ break; ++ case DPM_ORDER_DEV_AFTER_PARENT: ++ device_pm_move_after(dev, new_parent); ++ devices_kset_move_after(dev, new_parent); ++ break; ++ case DPM_ORDER_PARENT_BEFORE_DEV: ++ device_pm_move_before(new_parent, dev); ++ devices_kset_move_before(new_parent, dev); ++ break; ++ case DPM_ORDER_DEV_LAST: ++ device_pm_move_last(dev); ++ devices_kset_move_last(dev); ++ break; ++ } ++ ++ put_device(old_parent); ++out: ++ device_pm_unlock(); ++ put_device(dev); ++ return error; ++} ++EXPORT_SYMBOL_GPL(device_move); ++ ++/** ++ * device_shutdown - call ->shutdown() on each device to shutdown. ++ */ ++void device_shutdown(void) ++{ ++ struct device *dev, *parent; ++ ++ wait_for_device_probe(); ++ device_block_probing(); ++ ++ cpufreq_suspend(); ++ ++ spin_lock(&devices_kset->list_lock); ++ /* ++ * Walk the devices list backward, shutting down each in turn. ++ * Beware that device unplug events may also start pulling ++ * devices offline, even as the system is shutting down. ++ */ ++ while (!list_empty(&devices_kset->list)) { ++ dev = list_entry(devices_kset->list.prev, struct device, ++ kobj.entry); ++ ++ /* ++ * hold reference count of device's parent to ++ * prevent it from being freed because parent's ++ * lock is to be held ++ */ ++ parent = get_device(dev->parent); ++ get_device(dev); ++ /* ++ * Make sure the device is off the kset list, in the ++ * event that dev->*->shutdown() doesn't remove it. ++ */ ++ list_del_init(&dev->kobj.entry); ++ spin_unlock(&devices_kset->list_lock); ++ ++ /* hold lock to avoid race with probe/release */ ++ if (parent) ++ device_lock(parent); ++ device_lock(dev); ++ ++ /* Don't allow any more runtime suspends */ ++ pm_runtime_get_noresume(dev); ++ pm_runtime_barrier(dev); ++ ++ if (dev->class && dev->class->shutdown_pre) { ++ if (initcall_debug) ++ dev_info(dev, "shutdown_pre\n"); ++ dev->class->shutdown_pre(dev); ++ } ++ if (dev->bus && dev->bus->shutdown) { ++ if (initcall_debug) ++ dev_info(dev, "shutdown\n"); ++ dev->bus->shutdown(dev); ++ } else if (dev->driver && dev->driver->shutdown) { ++ if (initcall_debug) ++ dev_info(dev, "shutdown\n"); ++ dev->driver->shutdown(dev); ++ } ++ ++ device_unlock(dev); ++ if (parent) ++ device_unlock(parent); ++ ++ put_device(dev); ++ put_device(parent); ++ ++ spin_lock(&devices_kset->list_lock); ++ } ++ spin_unlock(&devices_kset->list_lock); ++} ++ ++/* ++ * Device logging functions ++ */ ++ ++#ifdef CONFIG_PRINTK ++static int ++create_syslog_header(const struct device *dev, char *hdr, size_t hdrlen) ++{ ++ const char *subsys; ++ size_t pos = 0; ++ ++ if (dev->class) ++ subsys = dev->class->name; ++ else if (dev->bus) ++ subsys = dev->bus->name; ++ else ++ return 0; ++ ++ pos += snprintf(hdr + pos, hdrlen - pos, "SUBSYSTEM=%s", subsys); ++ if (pos >= hdrlen) ++ goto overflow; ++ ++ /* ++ * Add device identifier DEVICE=: ++ * b12:8 block dev_t ++ * c127:3 char dev_t ++ * n8 netdev ifindex ++ * +sound:card0 subsystem:devname ++ */ ++ if (MAJOR(dev->devt)) { ++ char c; ++ ++ if (strcmp(subsys, "block") == 0) ++ c = 'b'; ++ else ++ c = 'c'; ++ pos++; ++ pos += snprintf(hdr + pos, hdrlen - pos, ++ "DEVICE=%c%u:%u", ++ c, MAJOR(dev->devt), MINOR(dev->devt)); ++ } else if (strcmp(subsys, "net") == 0) { ++ struct net_device *net = to_net_dev(dev); ++ ++ pos++; ++ pos += snprintf(hdr + pos, hdrlen - pos, ++ "DEVICE=n%u", net->ifindex); ++ } else { ++ pos++; ++ pos += snprintf(hdr + pos, hdrlen - pos, ++ "DEVICE=+%s:%s", subsys, dev_name(dev)); ++ } ++ ++ if (pos >= hdrlen) ++ goto overflow; ++ ++ return pos; ++ ++overflow: ++ dev_WARN(dev, "device/subsystem name too long"); ++ return 0; ++} ++ ++int dev_vprintk_emit(int level, const struct device *dev, ++ const char *fmt, va_list args) ++{ ++ char hdr[128]; ++ size_t hdrlen; ++ ++ hdrlen = create_syslog_header(dev, hdr, sizeof(hdr)); ++ ++ return vprintk_emit(0, level, hdrlen ? hdr : NULL, hdrlen, fmt, args); ++} ++EXPORT_SYMBOL(dev_vprintk_emit); ++ ++int dev_printk_emit(int level, const struct device *dev, const char *fmt, ...) ++{ ++ va_list args; ++ int r; ++ ++ va_start(args, fmt); ++ ++ r = dev_vprintk_emit(level, dev, fmt, args); ++ ++ va_end(args); ++ ++ return r; ++} ++EXPORT_SYMBOL(dev_printk_emit); ++ ++static void __dev_printk(const char *level, const struct device *dev, ++ struct va_format *vaf) ++{ ++ if (dev) ++ dev_printk_emit(level[1] - '0', dev, "%s %s: %pV", ++ dev_driver_string(dev), dev_name(dev), vaf); ++ else ++ printk("%s(NULL device *): %pV", level, vaf); ++} ++ ++void dev_printk(const char *level, const struct device *dev, ++ const char *fmt, ...) ++{ ++ struct va_format vaf; ++ va_list args; ++ ++ va_start(args, fmt); ++ ++ vaf.fmt = fmt; ++ vaf.va = &args; ++ ++ __dev_printk(level, dev, &vaf); ++ ++ va_end(args); ++} ++EXPORT_SYMBOL(dev_printk); ++ ++#define define_dev_printk_level(func, kern_level) \ ++void func(const struct device *dev, const char *fmt, ...) \ ++{ \ ++ struct va_format vaf; \ ++ va_list args; \ ++ \ ++ va_start(args, fmt); \ ++ \ ++ vaf.fmt = fmt; \ ++ vaf.va = &args; \ ++ \ ++ __dev_printk(kern_level, dev, &vaf); \ ++ \ ++ va_end(args); \ ++} \ ++EXPORT_SYMBOL(func); ++ ++define_dev_printk_level(_dev_emerg, KERN_EMERG); ++define_dev_printk_level(_dev_alert, KERN_ALERT); ++define_dev_printk_level(_dev_crit, KERN_CRIT); ++define_dev_printk_level(_dev_err, KERN_ERR); ++define_dev_printk_level(_dev_warn, KERN_WARNING); ++define_dev_printk_level(_dev_notice, KERN_NOTICE); ++define_dev_printk_level(_dev_info, KERN_INFO); ++ ++#endif ++ ++static inline bool fwnode_is_primary(struct fwnode_handle *fwnode) ++{ ++ return fwnode && !IS_ERR(fwnode->secondary); ++} ++ ++/** ++ * set_primary_fwnode - Change the primary firmware node of a given device. ++ * @dev: Device to handle. ++ * @fwnode: New primary firmware node of the device. ++ * ++ * Set the device's firmware node pointer to @fwnode, but if a secondary ++ * firmware node of the device is present, preserve it. ++ */ ++void set_primary_fwnode(struct device *dev, struct fwnode_handle *fwnode) ++{ ++ struct fwnode_handle *fn = dev->fwnode; ++ ++ if (fwnode) { ++ if (fwnode_is_primary(fn)) ++ fn = fn->secondary; ++ ++ if (fn) { ++ WARN_ON(fwnode->secondary); ++ fwnode->secondary = fn; ++ } ++ dev->fwnode = fwnode; ++ } else { ++ if (fwnode_is_primary(fn)) { ++ dev->fwnode = fn->secondary; ++ fn->secondary = NULL; ++ } else { ++ dev->fwnode = NULL; ++ } ++ } ++} ++EXPORT_SYMBOL_GPL(set_primary_fwnode); ++ ++/** ++ * set_secondary_fwnode - Change the secondary firmware node of a given device. ++ * @dev: Device to handle. ++ * @fwnode: New secondary firmware node of the device. ++ * ++ * If a primary firmware node of the device is present, set its secondary ++ * pointer to @fwnode. Otherwise, set the device's firmware node pointer to ++ * @fwnode. ++ */ ++void set_secondary_fwnode(struct device *dev, struct fwnode_handle *fwnode) ++{ ++ if (fwnode) ++ fwnode->secondary = ERR_PTR(-ENODEV); ++ ++ if (fwnode_is_primary(dev->fwnode)) ++ dev->fwnode->secondary = fwnode; ++ else ++ dev->fwnode = fwnode; ++} ++ ++/** ++ * device_set_of_node_from_dev - reuse device-tree node of another device ++ * @dev: device whose device-tree node is being set ++ * @dev2: device whose device-tree node is being reused ++ * ++ * Takes another reference to the new device-tree node after first dropping ++ * any reference held to the old node. ++ */ ++void device_set_of_node_from_dev(struct device *dev, const struct device *dev2) ++{ ++ of_node_put(dev->of_node); ++ dev->of_node = of_node_get(dev2->of_node); ++ dev->of_node_reused = true; ++} ++EXPORT_SYMBOL_GPL(device_set_of_node_from_dev); +diff -uprN kernel/drivers/base/regmap/regmap-irq.c kernel_new/drivers/base/regmap/regmap-irq.c +--- kernel/drivers/base/regmap/regmap-irq.c 2020-12-21 21:59:17.000000000 +0800 ++++ kernel_new/drivers/base/regmap/regmap-irq.c 2021-04-01 18:28:07.660863282 +0800 +@@ -197,8 +197,11 @@ static void regmap_irq_enable(struct irq + struct regmap_irq_chip_data *d = irq_data_get_irq_chip_data(data); + struct regmap *map = d->map; + const struct regmap_irq *irq_data = irq_to_regmap_irq(d, data->hwirq); ++ unsigned long flags; + ++ flags = hard_cond_local_irq_save(); + d->mask_buf[irq_data->reg_offset / map->reg_stride] &= ~irq_data->mask; ++ hard_cond_local_irq_restore(flags); + } + + static void regmap_irq_disable(struct irq_data *data) +@@ -206,8 +209,11 @@ static void regmap_irq_disable(struct ir + struct regmap_irq_chip_data *d = irq_data_get_irq_chip_data(data); + struct regmap *map = d->map; + const struct regmap_irq *irq_data = irq_to_regmap_irq(d, data->hwirq); ++ unsigned long flags; + ++ flags = hard_cond_local_irq_save(); + d->mask_buf[irq_data->reg_offset / map->reg_stride] |= irq_data->mask; ++ hard_cond_local_irq_restore(flags); + } + + static int regmap_irq_set_type(struct irq_data *data, unsigned int type) +@@ -270,6 +276,7 @@ static const struct irq_chip regmap_irq_ + .irq_enable = regmap_irq_enable, + .irq_set_type = regmap_irq_set_type, + .irq_set_wake = regmap_irq_set_wake, ++ .flags = IRQCHIP_PIPELINE_SAFE, + }; + + static irqreturn_t regmap_irq_thread(int irq, void *d) +diff -uprN kernel/drivers/clocksource/arm_arch_timer.c kernel_new/drivers/clocksource/arm_arch_timer.c +--- kernel/drivers/clocksource/arm_arch_timer.c 2020-12-21 21:59:17.000000000 +0800 ++++ kernel_new/drivers/clocksource/arm_arch_timer.c 2021-04-01 18:28:07.660863282 +0800 +@@ -20,6 +20,8 @@ + #include + #include + #include ++#include ++#include + #include + #include + #include +@@ -633,8 +635,7 @@ static bool arch_timer_this_cpu_has_cntv + #define arch_timer_this_cpu_has_cntvct_wa() ({false;}) + #endif /* CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND */ + +-static __always_inline irqreturn_t timer_handler(const int access, +- struct clock_event_device *evt) ++static int arch_timer_ack(const int access, struct clock_event_device *evt) + { + unsigned long ctrl; + +@@ -642,6 +643,52 @@ static __always_inline irqreturn_t timer + if (ctrl & ARCH_TIMER_CTRL_IT_STAT) { + ctrl |= ARCH_TIMER_CTRL_IT_MASK; + arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, evt); ++ return 1; ++ } ++ return 0; ++} ++ ++#ifdef CONFIG_IPIPE ++static DEFINE_PER_CPU(struct ipipe_timer, arch_itimer); ++static struct __ipipe_tscinfo tsc_info = { ++ .type = IPIPE_TSC_TYPE_FREERUNNING_ARCH, ++ .u = { ++ { ++ .mask = 0xffffffffffffffff, ++ }, ++ }, ++}; ++ ++static void arch_itimer_ack_phys(void) ++{ ++ struct clock_event_device *evt = this_cpu_ptr(arch_timer_evt); ++ arch_timer_ack(ARCH_TIMER_PHYS_ACCESS, evt); ++} ++ ++static void arch_itimer_ack_virt(void) ++{ ++ struct clock_event_device *evt = this_cpu_ptr(arch_timer_evt); ++ arch_timer_ack(ARCH_TIMER_VIRT_ACCESS, evt); ++} ++#endif /* CONFIG_IPIPE */ ++ ++static inline irqreturn_t timer_handler(int irq, const int access, ++ struct clock_event_device *evt) ++{ ++ if (clockevent_ipipe_stolen(evt)) ++ goto stolen; ++ ++ if (arch_timer_ack(access, evt)) { ++#ifdef CONFIG_IPIPE ++ struct ipipe_timer *itimer = raw_cpu_ptr(&arch_itimer); ++ if (itimer->irq != irq) ++ itimer->irq = irq; ++#endif /* CONFIG_IPIPE */ ++ stolen: ++ /* ++ * This is a 64bit clock source, no need for TSC ++ * update. ++ */ + evt->event_handler(evt); + return IRQ_HANDLED; + } +@@ -653,28 +700,28 @@ static irqreturn_t arch_timer_handler_vi + { + struct clock_event_device *evt = dev_id; + +- return timer_handler(ARCH_TIMER_VIRT_ACCESS, evt); ++ return timer_handler(irq, ARCH_TIMER_VIRT_ACCESS, evt); + } + + static irqreturn_t arch_timer_handler_phys(int irq, void *dev_id) + { + struct clock_event_device *evt = dev_id; + +- return timer_handler(ARCH_TIMER_PHYS_ACCESS, evt); ++ return timer_handler(irq, ARCH_TIMER_PHYS_ACCESS, evt); + } + + static irqreturn_t arch_timer_handler_phys_mem(int irq, void *dev_id) + { + struct clock_event_device *evt = dev_id; + +- return timer_handler(ARCH_TIMER_MEM_PHYS_ACCESS, evt); ++ return timer_handler(irq, ARCH_TIMER_MEM_PHYS_ACCESS, evt); + } + + static irqreturn_t arch_timer_handler_virt_mem(int irq, void *dev_id) + { + struct clock_event_device *evt = dev_id; + +- return timer_handler(ARCH_TIMER_MEM_VIRT_ACCESS, evt); ++ return timer_handler(irq, ARCH_TIMER_MEM_VIRT_ACCESS, evt); + } + + static __always_inline int timer_shutdown(const int access, +@@ -788,6 +835,17 @@ static void __arch_timer_setup(unsigned + } + + arch_timer_check_ool_workaround(ate_match_local_cap_id, NULL); ++#ifdef CONFIG_IPIPE ++ clk->ipipe_timer = raw_cpu_ptr(&arch_itimer); ++ if (arch_timer_uses_ppi == ARCH_TIMER_VIRT_PPI) { ++ clk->ipipe_timer->irq = arch_timer_ppi[ARCH_TIMER_VIRT_PPI]; ++ clk->ipipe_timer->ack = arch_itimer_ack_virt; ++ } else { ++ clk->ipipe_timer->irq = arch_timer_ppi[ARCH_TIMER_PHYS_SECURE_PPI]; ++ clk->ipipe_timer->ack = arch_itimer_ack_phys; ++ } ++ clk->ipipe_timer->freq = arch_timer_rate; ++#endif + } else { + clk->features |= CLOCK_EVT_FEAT_DYNIRQ; + clk->name = "arch_mem_timer"; +@@ -862,6 +920,9 @@ static void arch_counter_set_user_access + else + cntkctl |= ARCH_TIMER_USR_VCT_ACCESS_EN; + ++#ifdef CONFIG_IPIPE ++ cntkctl |= ARCH_TIMER_USR_PCT_ACCESS_EN; ++#endif + arch_timer_set_cntkctl(cntkctl); + } + +@@ -997,6 +1058,10 @@ static void __init arch_counter_register + arch_timer_read_counter = arch_counter_get_cntvct_mem; + } + ++#ifdef CONFIG_IPIPE ++ tsc_info.freq = arch_timer_rate; ++ __ipipe_tsc_register(&tsc_info); ++#endif /* CONFIG_IPIPE */ + if (!arch_counter_suspend_stop) + clocksource_counter.flags |= CLOCK_SOURCE_SUSPEND_NONSTOP; + start_count = arch_timer_read_counter(); +diff -uprN kernel/drivers/clocksource/arm_arch_timer.c.orig kernel_new/drivers/clocksource/arm_arch_timer.c.orig +--- kernel/drivers/clocksource/arm_arch_timer.c.orig 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/drivers/clocksource/arm_arch_timer.c.orig 2020-12-21 21:59:17.000000000 +0800 +@@ -0,0 +1,1631 @@ ++/* ++ * linux/drivers/clocksource/arm_arch_timer.c ++ * ++ * Copyright (C) 2011 ARM Ltd. ++ * All Rights Reserved ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#define pr_fmt(fmt) "arm_arch_timer: " fmt ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++ ++#include ++ ++#undef pr_fmt ++#define pr_fmt(fmt) "arch_timer: " fmt ++ ++#define CNTTIDR 0x08 ++#define CNTTIDR_VIRT(n) (BIT(1) << ((n) * 4)) ++ ++#define CNTACR(n) (0x40 + ((n) * 4)) ++#define CNTACR_RPCT BIT(0) ++#define CNTACR_RVCT BIT(1) ++#define CNTACR_RFRQ BIT(2) ++#define CNTACR_RVOFF BIT(3) ++#define CNTACR_RWVT BIT(4) ++#define CNTACR_RWPT BIT(5) ++ ++#define CNTVCT_LO 0x08 ++#define CNTVCT_HI 0x0c ++#define CNTFRQ 0x10 ++#define CNTP_TVAL 0x28 ++#define CNTP_CTL 0x2c ++#define CNTV_TVAL 0x38 ++#define CNTV_CTL 0x3c ++ ++static unsigned arch_timers_present __initdata; ++ ++static void __iomem *arch_counter_base; ++ ++struct arch_timer { ++ void __iomem *base; ++ struct clock_event_device evt; ++}; ++ ++#define to_arch_timer(e) container_of(e, struct arch_timer, evt) ++ ++static u32 arch_timer_rate; ++static int arch_timer_ppi[ARCH_TIMER_MAX_TIMER_PPI]; ++ ++static struct clock_event_device __percpu *arch_timer_evt; ++ ++static enum arch_timer_ppi_nr arch_timer_uses_ppi = ARCH_TIMER_VIRT_PPI; ++static bool arch_timer_c3stop; ++static bool arch_timer_mem_use_virtual; ++static bool arch_counter_suspend_stop; ++static bool vdso_default = true; ++static bool vdso_fix; ++ ++static cpumask_t evtstrm_available = CPU_MASK_NONE; ++static bool evtstrm_enable = IS_ENABLED(CONFIG_ARM_ARCH_TIMER_EVTSTREAM); ++ ++static int __init early_evtstrm_cfg(char *buf) ++{ ++ return strtobool(buf, &evtstrm_enable); ++} ++early_param("clocksource.arm_arch_timer.evtstrm", early_evtstrm_cfg); ++ ++/* ++ * Architected system timer support. ++ */ ++ ++static __always_inline ++void arch_timer_reg_write(int access, enum arch_timer_reg reg, u32 val, ++ struct clock_event_device *clk) ++{ ++ if (access == ARCH_TIMER_MEM_PHYS_ACCESS) { ++ struct arch_timer *timer = to_arch_timer(clk); ++ switch (reg) { ++ case ARCH_TIMER_REG_CTRL: ++ writel_relaxed(val, timer->base + CNTP_CTL); ++ break; ++ case ARCH_TIMER_REG_TVAL: ++ writel_relaxed(val, timer->base + CNTP_TVAL); ++ break; ++ } ++ } else if (access == ARCH_TIMER_MEM_VIRT_ACCESS) { ++ struct arch_timer *timer = to_arch_timer(clk); ++ switch (reg) { ++ case ARCH_TIMER_REG_CTRL: ++ writel_relaxed(val, timer->base + CNTV_CTL); ++ break; ++ case ARCH_TIMER_REG_TVAL: ++ writel_relaxed(val, timer->base + CNTV_TVAL); ++ break; ++ } ++ } else { ++ arch_timer_reg_write_cp15(access, reg, val); ++ } ++} ++ ++static __always_inline ++u32 arch_timer_reg_read(int access, enum arch_timer_reg reg, ++ struct clock_event_device *clk) ++{ ++ u32 val; ++ ++ if (access == ARCH_TIMER_MEM_PHYS_ACCESS) { ++ struct arch_timer *timer = to_arch_timer(clk); ++ switch (reg) { ++ case ARCH_TIMER_REG_CTRL: ++ val = readl_relaxed(timer->base + CNTP_CTL); ++ break; ++ case ARCH_TIMER_REG_TVAL: ++ val = readl_relaxed(timer->base + CNTP_TVAL); ++ break; ++ } ++ } else if (access == ARCH_TIMER_MEM_VIRT_ACCESS) { ++ struct arch_timer *timer = to_arch_timer(clk); ++ switch (reg) { ++ case ARCH_TIMER_REG_CTRL: ++ val = readl_relaxed(timer->base + CNTV_CTL); ++ break; ++ case ARCH_TIMER_REG_TVAL: ++ val = readl_relaxed(timer->base + CNTV_TVAL); ++ break; ++ } ++ } else { ++ val = arch_timer_reg_read_cp15(access, reg); ++ } ++ ++ return val; ++} ++ ++/* ++ * Default to cp15 based access because arm64 uses this function for ++ * sched_clock() before DT is probed and the cp15 method is guaranteed ++ * to exist on arm64. arm doesn't use this before DT is probed so even ++ * if we don't have the cp15 accessors we won't have a problem. ++ */ ++u64 (*arch_timer_read_counter)(void) = arch_counter_get_cntvct; ++EXPORT_SYMBOL_GPL(arch_timer_read_counter); ++ ++static u64 arch_counter_read(struct clocksource *cs) ++{ ++ return arch_timer_read_counter(); ++} ++ ++static u64 arch_counter_read_cc(const struct cyclecounter *cc) ++{ ++ return arch_timer_read_counter(); ++} ++ ++static struct clocksource clocksource_counter = { ++ .name = "arch_sys_counter", ++ .rating = 400, ++ .read = arch_counter_read, ++ .mask = CLOCKSOURCE_MASK(56), ++ .flags = CLOCK_SOURCE_IS_CONTINUOUS, ++}; ++ ++static struct cyclecounter cyclecounter __ro_after_init = { ++ .read = arch_counter_read_cc, ++ .mask = CLOCKSOURCE_MASK(56), ++}; ++ ++struct ate_acpi_oem_info { ++ char oem_id[ACPI_OEM_ID_SIZE + 1]; ++ char oem_table_id[ACPI_OEM_TABLE_ID_SIZE + 1]; ++ u32 oem_revision; ++}; ++ ++#ifdef CONFIG_FSL_ERRATUM_A008585 ++/* ++ * The number of retries is an arbitrary value well beyond the highest number ++ * of iterations the loop has been observed to take. ++ */ ++#define __fsl_a008585_read_reg(reg) ({ \ ++ u64 _old, _new; \ ++ int _retries = 200; \ ++ \ ++ do { \ ++ _old = read_sysreg(reg); \ ++ _new = read_sysreg(reg); \ ++ _retries--; \ ++ } while (unlikely(_old != _new) && _retries); \ ++ \ ++ WARN_ON_ONCE(!_retries); \ ++ _new; \ ++}) ++ ++static u32 notrace fsl_a008585_read_cntp_tval_el0(void) ++{ ++ return __fsl_a008585_read_reg(cntp_tval_el0); ++} ++ ++static u32 notrace fsl_a008585_read_cntv_tval_el0(void) ++{ ++ return __fsl_a008585_read_reg(cntv_tval_el0); ++} ++ ++static u64 notrace fsl_a008585_read_cntpct_el0(void) ++{ ++ return __fsl_a008585_read_reg(cntpct_el0); ++} ++ ++static u64 notrace fsl_a008585_read_cntvct_el0(void) ++{ ++ return __fsl_a008585_read_reg(cntvct_el0); ++} ++#endif ++ ++#ifdef CONFIG_HISILICON_ERRATUM_161010101 ++/* ++ * Verify whether the value of the second read is larger than the first by ++ * less than 32 is the only way to confirm the value is correct, so clear the ++ * lower 5 bits to check whether the difference is greater than 32 or not. ++ * Theoretically the erratum should not occur more than twice in succession ++ * when reading the system counter, but it is possible that some interrupts ++ * may lead to more than twice read errors, triggering the warning, so setting ++ * the number of retries far beyond the number of iterations the loop has been ++ * observed to take. ++ */ ++#define __hisi_161010101_read_reg(reg) ({ \ ++ u64 _old, _new; \ ++ int _retries = 50; \ ++ \ ++ do { \ ++ _old = read_sysreg(reg); \ ++ _new = read_sysreg(reg); \ ++ _retries--; \ ++ } while (unlikely((_new - _old) >> 5) && _retries); \ ++ \ ++ WARN_ON_ONCE(!_retries); \ ++ _new; \ ++}) ++ ++static u32 notrace hisi_161010101_read_cntp_tval_el0(void) ++{ ++ return __hisi_161010101_read_reg(cntp_tval_el0); ++} ++ ++static u32 notrace hisi_161010101_read_cntv_tval_el0(void) ++{ ++ return __hisi_161010101_read_reg(cntv_tval_el0); ++} ++ ++static u64 notrace hisi_161010101_read_cntpct_el0(void) ++{ ++ return __hisi_161010101_read_reg(cntpct_el0); ++} ++ ++static u64 notrace hisi_161010101_read_cntvct_el0(void) ++{ ++ return __hisi_161010101_read_reg(cntvct_el0); ++} ++ ++static struct ate_acpi_oem_info hisi_161010101_oem_info[] = { ++ /* ++ * Note that trailing spaces are required to properly match ++ * the OEM table information. ++ */ ++ { ++ .oem_id = "HISI ", ++ .oem_table_id = "HIP05 ", ++ .oem_revision = 0, ++ }, ++ { ++ .oem_id = "HISI ", ++ .oem_table_id = "HIP06 ", ++ .oem_revision = 0, ++ }, ++ { ++ .oem_id = "HISI ", ++ .oem_table_id = "HIP07 ", ++ .oem_revision = 0, ++ }, ++ { /* Sentinel indicating the end of the OEM array */ }, ++}; ++#endif ++ ++#ifdef CONFIG_ARM64_ERRATUM_858921 ++static u64 notrace arm64_858921_read_cntpct_el0(void) ++{ ++ u64 old, new; ++ ++ old = read_sysreg(cntpct_el0); ++ new = read_sysreg(cntpct_el0); ++ return (((old ^ new) >> 32) & 1) ? old : new; ++} ++ ++static u64 notrace arm64_858921_read_cntvct_el0(void) ++{ ++ u64 old, new; ++ ++ old = read_sysreg(cntvct_el0); ++ new = read_sysreg(cntvct_el0); ++ return (((old ^ new) >> 32) & 1) ? old : new; ++} ++#endif ++ ++#ifdef CONFIG_SUN50I_ERRATUM_UNKNOWN1 ++/* ++ * The low bits of the counter registers are indeterminate while bit 10 or ++ * greater is rolling over. Since the counter value can jump both backward ++ * (7ff -> 000 -> 800) and forward (7ff -> fff -> 800), ignore register values ++ * with all ones or all zeros in the low bits. Bound the loop by the maximum ++ * number of CPU cycles in 3 consecutive 24 MHz counter periods. ++ */ ++#define __sun50i_a64_read_reg(reg) ({ \ ++ u64 _val; \ ++ int _retries = 150; \ ++ \ ++ do { \ ++ _val = read_sysreg(reg); \ ++ _retries--; \ ++ } while (((_val + 1) & GENMASK(9, 0)) <= 1 && _retries); \ ++ \ ++ WARN_ON_ONCE(!_retries); \ ++ _val; \ ++}) ++ ++static u64 notrace sun50i_a64_read_cntpct_el0(void) ++{ ++ return __sun50i_a64_read_reg(cntpct_el0); ++} ++ ++static u64 notrace sun50i_a64_read_cntvct_el0(void) ++{ ++ return __sun50i_a64_read_reg(cntvct_el0); ++} ++ ++static u32 notrace sun50i_a64_read_cntp_tval_el0(void) ++{ ++ return read_sysreg(cntp_cval_el0) - sun50i_a64_read_cntpct_el0(); ++} ++ ++static u32 notrace sun50i_a64_read_cntv_tval_el0(void) ++{ ++ return read_sysreg(cntv_cval_el0) - sun50i_a64_read_cntvct_el0(); ++} ++#endif ++ ++#ifdef CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND ++DEFINE_PER_CPU(const struct arch_timer_erratum_workaround *, timer_unstable_counter_workaround); ++EXPORT_SYMBOL_GPL(timer_unstable_counter_workaround); ++ ++DEFINE_STATIC_KEY_FALSE(arch_timer_read_ool_enabled); ++EXPORT_SYMBOL_GPL(arch_timer_read_ool_enabled); ++ ++static void erratum_set_next_event_tval_generic(const int access, unsigned long evt, ++ struct clock_event_device *clk) ++{ ++ unsigned long ctrl; ++ u64 cval; ++ ++ ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL, clk); ++ ctrl |= ARCH_TIMER_CTRL_ENABLE; ++ ctrl &= ~ARCH_TIMER_CTRL_IT_MASK; ++ ++ if (access == ARCH_TIMER_PHYS_ACCESS) { ++ cval = evt + arch_counter_get_cntpct(); ++ write_sysreg(cval, cntp_cval_el0); ++ } else { ++ cval = evt + arch_counter_get_cntvct(); ++ write_sysreg(cval, cntv_cval_el0); ++ } ++ ++ arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, clk); ++} ++ ++static __maybe_unused int erratum_set_next_event_tval_virt(unsigned long evt, ++ struct clock_event_device *clk) ++{ ++ erratum_set_next_event_tval_generic(ARCH_TIMER_VIRT_ACCESS, evt, clk); ++ return 0; ++} ++ ++static __maybe_unused int erratum_set_next_event_tval_phys(unsigned long evt, ++ struct clock_event_device *clk) ++{ ++ erratum_set_next_event_tval_generic(ARCH_TIMER_PHYS_ACCESS, evt, clk); ++ return 0; ++} ++ ++static const struct arch_timer_erratum_workaround ool_workarounds[] = { ++#ifdef CONFIG_FSL_ERRATUM_A008585 ++ { ++ .match_type = ate_match_dt, ++ .id = "fsl,erratum-a008585", ++ .desc = "Freescale erratum a005858", ++ .read_cntp_tval_el0 = fsl_a008585_read_cntp_tval_el0, ++ .read_cntv_tval_el0 = fsl_a008585_read_cntv_tval_el0, ++ .read_cntpct_el0 = fsl_a008585_read_cntpct_el0, ++ .read_cntvct_el0 = fsl_a008585_read_cntvct_el0, ++ .set_next_event_phys = erratum_set_next_event_tval_phys, ++ .set_next_event_virt = erratum_set_next_event_tval_virt, ++ }, ++#endif ++#ifdef CONFIG_HISILICON_ERRATUM_161010101 ++ { ++ .match_type = ate_match_dt, ++ .id = "hisilicon,erratum-161010101", ++ .desc = "HiSilicon erratum 161010101", ++ .read_cntp_tval_el0 = hisi_161010101_read_cntp_tval_el0, ++ .read_cntv_tval_el0 = hisi_161010101_read_cntv_tval_el0, ++ .read_cntpct_el0 = hisi_161010101_read_cntpct_el0, ++ .read_cntvct_el0 = hisi_161010101_read_cntvct_el0, ++ .set_next_event_phys = erratum_set_next_event_tval_phys, ++ .set_next_event_virt = erratum_set_next_event_tval_virt, ++ }, ++ { ++ .match_type = ate_match_acpi_oem_info, ++ .id = hisi_161010101_oem_info, ++ .desc = "HiSilicon erratum 161010101", ++ .read_cntp_tval_el0 = hisi_161010101_read_cntp_tval_el0, ++ .read_cntv_tval_el0 = hisi_161010101_read_cntv_tval_el0, ++ .read_cntpct_el0 = hisi_161010101_read_cntpct_el0, ++ .read_cntvct_el0 = hisi_161010101_read_cntvct_el0, ++ .set_next_event_phys = erratum_set_next_event_tval_phys, ++ .set_next_event_virt = erratum_set_next_event_tval_virt, ++ }, ++#endif ++#ifdef CONFIG_ARM64_ERRATUM_858921 ++ { ++ .match_type = ate_match_local_cap_id, ++ .id = (void *)ARM64_WORKAROUND_858921, ++ .desc = "ARM erratum 858921", ++ .read_cntpct_el0 = arm64_858921_read_cntpct_el0, ++ .read_cntvct_el0 = arm64_858921_read_cntvct_el0, ++ }, ++#endif ++#ifdef CONFIG_SUN50I_ERRATUM_UNKNOWN1 ++ { ++ .match_type = ate_match_dt, ++ .id = "allwinner,erratum-unknown1", ++ .desc = "Allwinner erratum UNKNOWN1", ++ .read_cntp_tval_el0 = sun50i_a64_read_cntp_tval_el0, ++ .read_cntv_tval_el0 = sun50i_a64_read_cntv_tval_el0, ++ .read_cntpct_el0 = sun50i_a64_read_cntpct_el0, ++ .read_cntvct_el0 = sun50i_a64_read_cntvct_el0, ++ .set_next_event_phys = erratum_set_next_event_tval_phys, ++ .set_next_event_virt = erratum_set_next_event_tval_virt, ++ }, ++#endif ++}; ++ ++typedef bool (*ate_match_fn_t)(const struct arch_timer_erratum_workaround *, ++ const void *); ++ ++static ++bool arch_timer_check_dt_erratum(const struct arch_timer_erratum_workaround *wa, ++ const void *arg) ++{ ++ const struct device_node *np = arg; ++ ++ return of_property_read_bool(np, wa->id); ++} ++ ++static ++bool arch_timer_check_local_cap_erratum(const struct arch_timer_erratum_workaround *wa, ++ const void *arg) ++{ ++ return this_cpu_has_cap((uintptr_t)wa->id); ++} ++ ++ ++static ++bool arch_timer_check_acpi_oem_erratum(const struct arch_timer_erratum_workaround *wa, ++ const void *arg) ++{ ++ static const struct ate_acpi_oem_info empty_oem_info = {}; ++ const struct ate_acpi_oem_info *info = wa->id; ++ const struct acpi_table_header *table = arg; ++ ++ /* Iterate over the ACPI OEM info array, looking for a match */ ++ while (memcmp(info, &empty_oem_info, sizeof(*info))) { ++ if (!memcmp(info->oem_id, table->oem_id, ACPI_OEM_ID_SIZE) && ++ !memcmp(info->oem_table_id, table->oem_table_id, ACPI_OEM_TABLE_ID_SIZE) && ++ info->oem_revision == table->oem_revision) ++ return true; ++ ++ info++; ++ } ++ ++ return false; ++} ++ ++static const struct arch_timer_erratum_workaround * ++arch_timer_iterate_errata(enum arch_timer_erratum_match_type type, ++ ate_match_fn_t match_fn, ++ void *arg) ++{ ++ int i; ++ ++ for (i = 0; i < ARRAY_SIZE(ool_workarounds); i++) { ++ if (ool_workarounds[i].match_type != type) ++ continue; ++ ++ if (match_fn(&ool_workarounds[i], arg)) ++ return &ool_workarounds[i]; ++ } ++ ++ return NULL; ++} ++ ++static ++void arch_timer_enable_workaround(const struct arch_timer_erratum_workaround *wa, ++ bool local) ++{ ++ int i; ++ ++ if (local) { ++ __this_cpu_write(timer_unstable_counter_workaround, wa); ++ } else { ++ for_each_possible_cpu(i) ++ per_cpu(timer_unstable_counter_workaround, i) = wa; ++ } ++ ++ /* ++ * Use the locked version, as we're called from the CPU ++ * hotplug framework. Otherwise, we end-up in deadlock-land. ++ */ ++ static_branch_enable_cpuslocked(&arch_timer_read_ool_enabled); ++ ++ /* ++ * Don't use the vdso fastpath if errata require using the ++ * out-of-line counter accessor. We may change our mind pretty ++ * late in the game (with a per-CPU erratum, for example), so ++ * change both the default value and the vdso itself. ++ */ ++ if (wa->read_cntvct_el0) { ++ clocksource_counter.archdata.vdso_direct = true; ++ vdso_default = true; ++ vdso_fix = true; ++ } ++} ++ ++static void arch_timer_check_ool_workaround(enum arch_timer_erratum_match_type type, ++ void *arg) ++{ ++ const struct arch_timer_erratum_workaround *wa; ++ ate_match_fn_t match_fn = NULL; ++ bool local = false; ++ ++ switch (type) { ++ case ate_match_dt: ++ match_fn = arch_timer_check_dt_erratum; ++ break; ++ case ate_match_local_cap_id: ++ match_fn = arch_timer_check_local_cap_erratum; ++ local = true; ++ break; ++ case ate_match_acpi_oem_info: ++ match_fn = arch_timer_check_acpi_oem_erratum; ++ break; ++ default: ++ WARN_ON(1); ++ return; ++ } ++ ++ wa = arch_timer_iterate_errata(type, match_fn, arg); ++ if (!wa) ++ return; ++ ++ if (needs_unstable_timer_counter_workaround()) { ++ const struct arch_timer_erratum_workaround *__wa; ++ __wa = __this_cpu_read(timer_unstable_counter_workaround); ++ if (__wa && wa != __wa) ++ pr_warn("Can't enable workaround for %s (clashes with %s\n)", ++ wa->desc, __wa->desc); ++ ++ if (__wa) ++ return; ++ } ++ ++ arch_timer_enable_workaround(wa, local); ++ pr_info("Enabling %s workaround for %s\n", ++ local ? "local" : "global", wa->desc); ++} ++ ++#define erratum_handler(fn, r, ...) \ ++({ \ ++ bool __val; \ ++ if (needs_unstable_timer_counter_workaround()) { \ ++ const struct arch_timer_erratum_workaround *__wa; \ ++ __wa = __this_cpu_read(timer_unstable_counter_workaround); \ ++ if (__wa && __wa->fn) { \ ++ r = __wa->fn(__VA_ARGS__); \ ++ __val = true; \ ++ } else { \ ++ __val = false; \ ++ } \ ++ } else { \ ++ __val = false; \ ++ } \ ++ __val; \ ++}) ++ ++static bool arch_timer_this_cpu_has_cntvct_wa(void) ++{ ++ const struct arch_timer_erratum_workaround *wa; ++ ++ wa = __this_cpu_read(timer_unstable_counter_workaround); ++ return wa && wa->read_cntvct_el0; ++} ++#else ++#define arch_timer_check_ool_workaround(t,a) do { } while(0) ++#define erratum_set_next_event_tval_virt(...) ({BUG(); 0;}) ++#define erratum_set_next_event_tval_phys(...) ({BUG(); 0;}) ++#define erratum_handler(fn, r, ...) ({false;}) ++#define arch_timer_this_cpu_has_cntvct_wa() ({false;}) ++#endif /* CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND */ ++ ++static __always_inline irqreturn_t timer_handler(const int access, ++ struct clock_event_device *evt) ++{ ++ unsigned long ctrl; ++ ++ ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL, evt); ++ if (ctrl & ARCH_TIMER_CTRL_IT_STAT) { ++ ctrl |= ARCH_TIMER_CTRL_IT_MASK; ++ arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, evt); ++ evt->event_handler(evt); ++ return IRQ_HANDLED; ++ } ++ ++ return IRQ_NONE; ++} ++ ++static irqreturn_t arch_timer_handler_virt(int irq, void *dev_id) ++{ ++ struct clock_event_device *evt = dev_id; ++ ++ return timer_handler(ARCH_TIMER_VIRT_ACCESS, evt); ++} ++ ++static irqreturn_t arch_timer_handler_phys(int irq, void *dev_id) ++{ ++ struct clock_event_device *evt = dev_id; ++ ++ return timer_handler(ARCH_TIMER_PHYS_ACCESS, evt); ++} ++ ++static irqreturn_t arch_timer_handler_phys_mem(int irq, void *dev_id) ++{ ++ struct clock_event_device *evt = dev_id; ++ ++ return timer_handler(ARCH_TIMER_MEM_PHYS_ACCESS, evt); ++} ++ ++static irqreturn_t arch_timer_handler_virt_mem(int irq, void *dev_id) ++{ ++ struct clock_event_device *evt = dev_id; ++ ++ return timer_handler(ARCH_TIMER_MEM_VIRT_ACCESS, evt); ++} ++ ++static __always_inline int timer_shutdown(const int access, ++ struct clock_event_device *clk) ++{ ++ unsigned long ctrl; ++ ++ ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL, clk); ++ ctrl &= ~ARCH_TIMER_CTRL_ENABLE; ++ arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, clk); ++ ++ return 0; ++} ++ ++static int arch_timer_shutdown_virt(struct clock_event_device *clk) ++{ ++ return timer_shutdown(ARCH_TIMER_VIRT_ACCESS, clk); ++} ++ ++static int arch_timer_shutdown_phys(struct clock_event_device *clk) ++{ ++ return timer_shutdown(ARCH_TIMER_PHYS_ACCESS, clk); ++} ++ ++static int arch_timer_shutdown_virt_mem(struct clock_event_device *clk) ++{ ++ return timer_shutdown(ARCH_TIMER_MEM_VIRT_ACCESS, clk); ++} ++ ++static int arch_timer_shutdown_phys_mem(struct clock_event_device *clk) ++{ ++ return timer_shutdown(ARCH_TIMER_MEM_PHYS_ACCESS, clk); ++} ++ ++static __always_inline void set_next_event(const int access, unsigned long evt, ++ struct clock_event_device *clk) ++{ ++ unsigned long ctrl; ++ ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL, clk); ++ ctrl |= ARCH_TIMER_CTRL_ENABLE; ++ ctrl &= ~ARCH_TIMER_CTRL_IT_MASK; ++ arch_timer_reg_write(access, ARCH_TIMER_REG_TVAL, evt, clk); ++ arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, clk); ++} ++ ++static int arch_timer_set_next_event_virt(unsigned long evt, ++ struct clock_event_device *clk) ++{ ++ int ret; ++ ++ if (erratum_handler(set_next_event_virt, ret, evt, clk)) ++ return ret; ++ ++ set_next_event(ARCH_TIMER_VIRT_ACCESS, evt, clk); ++ return 0; ++} ++ ++static int arch_timer_set_next_event_phys(unsigned long evt, ++ struct clock_event_device *clk) ++{ ++ int ret; ++ ++ if (erratum_handler(set_next_event_phys, ret, evt, clk)) ++ return ret; ++ ++ set_next_event(ARCH_TIMER_PHYS_ACCESS, evt, clk); ++ return 0; ++} ++ ++static int arch_timer_set_next_event_virt_mem(unsigned long evt, ++ struct clock_event_device *clk) ++{ ++ set_next_event(ARCH_TIMER_MEM_VIRT_ACCESS, evt, clk); ++ return 0; ++} ++ ++static int arch_timer_set_next_event_phys_mem(unsigned long evt, ++ struct clock_event_device *clk) ++{ ++ set_next_event(ARCH_TIMER_MEM_PHYS_ACCESS, evt, clk); ++ return 0; ++} ++ ++static void __arch_timer_setup(unsigned type, ++ struct clock_event_device *clk) ++{ ++ clk->features = CLOCK_EVT_FEAT_ONESHOT; ++ ++ if (type == ARCH_TIMER_TYPE_CP15) { ++ if (arch_timer_c3stop) ++ clk->features |= CLOCK_EVT_FEAT_C3STOP; ++ clk->name = "arch_sys_timer"; ++ clk->rating = 450; ++ clk->cpumask = cpumask_of(smp_processor_id()); ++ clk->irq = arch_timer_ppi[arch_timer_uses_ppi]; ++ switch (arch_timer_uses_ppi) { ++ case ARCH_TIMER_VIRT_PPI: ++ clk->set_state_shutdown = arch_timer_shutdown_virt; ++ clk->set_state_oneshot_stopped = arch_timer_shutdown_virt; ++ clk->set_next_event = arch_timer_set_next_event_virt; ++ break; ++ case ARCH_TIMER_PHYS_SECURE_PPI: ++ case ARCH_TIMER_PHYS_NONSECURE_PPI: ++ case ARCH_TIMER_HYP_PPI: ++ clk->set_state_shutdown = arch_timer_shutdown_phys; ++ clk->set_state_oneshot_stopped = arch_timer_shutdown_phys; ++ clk->set_next_event = arch_timer_set_next_event_phys; ++ break; ++ default: ++ BUG(); ++ } ++ ++ arch_timer_check_ool_workaround(ate_match_local_cap_id, NULL); ++ } else { ++ clk->features |= CLOCK_EVT_FEAT_DYNIRQ; ++ clk->name = "arch_mem_timer"; ++ clk->rating = 400; ++ clk->cpumask = cpu_possible_mask; ++ if (arch_timer_mem_use_virtual) { ++ clk->set_state_shutdown = arch_timer_shutdown_virt_mem; ++ clk->set_state_oneshot_stopped = arch_timer_shutdown_virt_mem; ++ clk->set_next_event = ++ arch_timer_set_next_event_virt_mem; ++ } else { ++ clk->set_state_shutdown = arch_timer_shutdown_phys_mem; ++ clk->set_state_oneshot_stopped = arch_timer_shutdown_phys_mem; ++ clk->set_next_event = ++ arch_timer_set_next_event_phys_mem; ++ } ++ } ++ ++ clk->set_state_shutdown(clk); ++ ++ clockevents_config_and_register(clk, arch_timer_rate, 0xf, 0x7fffffff); ++} ++ ++static void arch_timer_evtstrm_enable(int divider) ++{ ++ u32 cntkctl = arch_timer_get_cntkctl(); ++ ++ cntkctl &= ~ARCH_TIMER_EVT_TRIGGER_MASK; ++ /* Set the divider and enable virtual event stream */ ++ cntkctl |= (divider << ARCH_TIMER_EVT_TRIGGER_SHIFT) ++ | ARCH_TIMER_VIRT_EVT_EN; ++ arch_timer_set_cntkctl(cntkctl); ++ elf_hwcap |= HWCAP_EVTSTRM; ++#ifdef CONFIG_AARCH32_EL0 ++ a32_elf_hwcap |= COMPAT_HWCAP_EVTSTRM; ++#endif ++ cpumask_set_cpu(smp_processor_id(), &evtstrm_available); ++} ++ ++static void arch_timer_configure_evtstream(void) ++{ ++ int evt_stream_div, pos; ++ ++ /* Find the closest power of two to the divisor */ ++ evt_stream_div = arch_timer_rate / ARCH_TIMER_EVT_STREAM_FREQ; ++ pos = fls(evt_stream_div); ++ if (pos > 1 && !(evt_stream_div & (1 << (pos - 2)))) ++ pos--; ++ /* enable event stream */ ++ arch_timer_evtstrm_enable(min(pos, 15)); ++} ++ ++static void arch_counter_set_user_access(void) ++{ ++ u32 cntkctl = arch_timer_get_cntkctl(); ++ ++ /* Disable user access to the timers and both counters */ ++ /* Also disable virtual event stream */ ++ cntkctl &= ~(ARCH_TIMER_USR_PT_ACCESS_EN ++ | ARCH_TIMER_USR_VT_ACCESS_EN ++ | ARCH_TIMER_USR_VCT_ACCESS_EN ++ | ARCH_TIMER_VIRT_EVT_EN ++ | ARCH_TIMER_USR_PCT_ACCESS_EN); ++ ++ /* ++ * Enable user access to the virtual counter if it doesn't ++ * need to be workaround. The vdso may have been already ++ * disabled though. ++ */ ++ if (arch_timer_this_cpu_has_cntvct_wa() && !vdso_fix) ++ pr_info("CPU%d: Trapping CNTVCT access\n", smp_processor_id()); ++ else ++ cntkctl |= ARCH_TIMER_USR_VCT_ACCESS_EN; ++ ++ arch_timer_set_cntkctl(cntkctl); ++} ++ ++static bool arch_timer_has_nonsecure_ppi(void) ++{ ++ return (arch_timer_uses_ppi == ARCH_TIMER_PHYS_SECURE_PPI && ++ arch_timer_ppi[ARCH_TIMER_PHYS_NONSECURE_PPI]); ++} ++ ++static u32 check_ppi_trigger(int irq) ++{ ++ u32 flags = irq_get_trigger_type(irq); ++ ++ if (flags != IRQF_TRIGGER_HIGH && flags != IRQF_TRIGGER_LOW) { ++ pr_warn("WARNING: Invalid trigger for IRQ%d, assuming level low\n", irq); ++ pr_warn("WARNING: Please fix your firmware\n"); ++ flags = IRQF_TRIGGER_LOW; ++ } ++ ++ return flags; ++} ++ ++static int arch_timer_starting_cpu(unsigned int cpu) ++{ ++ struct clock_event_device *clk = this_cpu_ptr(arch_timer_evt); ++ u32 flags; ++ ++ __arch_timer_setup(ARCH_TIMER_TYPE_CP15, clk); ++ ++ flags = check_ppi_trigger(arch_timer_ppi[arch_timer_uses_ppi]); ++ enable_percpu_irq(arch_timer_ppi[arch_timer_uses_ppi], flags); ++ ++ if (arch_timer_has_nonsecure_ppi()) { ++ flags = check_ppi_trigger(arch_timer_ppi[ARCH_TIMER_PHYS_NONSECURE_PPI]); ++ enable_percpu_irq(arch_timer_ppi[ARCH_TIMER_PHYS_NONSECURE_PPI], ++ flags); ++ } ++ ++ arch_counter_set_user_access(); ++ if (evtstrm_enable) ++ arch_timer_configure_evtstream(); ++ ++ return 0; ++} ++ ++/* ++ * For historical reasons, when probing with DT we use whichever (non-zero) ++ * rate was probed first, and don't verify that others match. If the first node ++ * probed has a clock-frequency property, this overrides the HW register. ++ */ ++static void arch_timer_of_configure_rate(u32 rate, struct device_node *np) ++{ ++ /* Who has more than one independent system counter? */ ++ if (arch_timer_rate) ++ return; ++ ++ if (of_property_read_u32(np, "clock-frequency", &arch_timer_rate)) ++ arch_timer_rate = rate; ++ ++ /* Check the timer frequency. */ ++ if (arch_timer_rate == 0) ++ pr_warn("frequency not available\n"); ++} ++ ++static void arch_timer_banner(unsigned type) ++{ ++ pr_info("%s%s%s timer(s) running at %lu.%02luMHz (%s%s%s).\n", ++ type & ARCH_TIMER_TYPE_CP15 ? "cp15" : "", ++ type == (ARCH_TIMER_TYPE_CP15 | ARCH_TIMER_TYPE_MEM) ? ++ " and " : "", ++ type & ARCH_TIMER_TYPE_MEM ? "mmio" : "", ++ (unsigned long)arch_timer_rate / 1000000, ++ (unsigned long)(arch_timer_rate / 10000) % 100, ++ type & ARCH_TIMER_TYPE_CP15 ? ++ (arch_timer_uses_ppi == ARCH_TIMER_VIRT_PPI) ? "virt" : "phys" : ++ "", ++ type == (ARCH_TIMER_TYPE_CP15 | ARCH_TIMER_TYPE_MEM) ? "/" : "", ++ type & ARCH_TIMER_TYPE_MEM ? ++ arch_timer_mem_use_virtual ? "virt" : "phys" : ++ ""); ++} ++ ++u32 arch_timer_get_rate(void) ++{ ++ return arch_timer_rate; ++} ++ ++bool arch_timer_evtstrm_available(void) ++{ ++ /* ++ * We might get called from a preemptible context. This is fine ++ * because availability of the event stream should be always the same ++ * for a preemptible context and context where we might resume a task. ++ */ ++ return cpumask_test_cpu(raw_smp_processor_id(), &evtstrm_available); ++} ++ ++static u64 arch_counter_get_cntvct_mem(void) ++{ ++ u32 vct_lo, vct_hi, tmp_hi; ++ ++ do { ++ vct_hi = readl_relaxed(arch_counter_base + CNTVCT_HI); ++ vct_lo = readl_relaxed(arch_counter_base + CNTVCT_LO); ++ tmp_hi = readl_relaxed(arch_counter_base + CNTVCT_HI); ++ } while (vct_hi != tmp_hi); ++ ++ return ((u64) vct_hi << 32) | vct_lo; ++} ++ ++static struct arch_timer_kvm_info arch_timer_kvm_info; ++ ++struct arch_timer_kvm_info *arch_timer_get_kvm_info(void) ++{ ++ return &arch_timer_kvm_info; ++} ++ ++static void __init arch_counter_register(unsigned type) ++{ ++ u64 start_count; ++ ++ /* Register the CP15 based counter if we have one */ ++ if (type & ARCH_TIMER_TYPE_CP15) { ++ if ((IS_ENABLED(CONFIG_ARM64) && !is_hyp_mode_available()) || ++ arch_timer_uses_ppi == ARCH_TIMER_VIRT_PPI) ++ arch_timer_read_counter = arch_counter_get_cntvct; ++ else ++ arch_timer_read_counter = arch_counter_get_cntpct; ++ ++ clocksource_counter.archdata.vdso_direct = vdso_default; ++ clocksource_counter.archdata.vdso_fix = vdso_fix; ++ } else { ++ arch_timer_read_counter = arch_counter_get_cntvct_mem; ++ } ++ ++ if (!arch_counter_suspend_stop) ++ clocksource_counter.flags |= CLOCK_SOURCE_SUSPEND_NONSTOP; ++ start_count = arch_timer_read_counter(); ++ clocksource_register_hz(&clocksource_counter, arch_timer_rate); ++ cyclecounter.mult = clocksource_counter.mult; ++ cyclecounter.shift = clocksource_counter.shift; ++ timecounter_init(&arch_timer_kvm_info.timecounter, ++ &cyclecounter, start_count); ++ ++ /* 56 bits minimum, so we assume worst case rollover */ ++ sched_clock_register(arch_timer_read_counter, 56, arch_timer_rate); ++} ++ ++static void arch_timer_stop(struct clock_event_device *clk) ++{ ++ pr_debug("disable IRQ%d cpu #%d\n", clk->irq, smp_processor_id()); ++ ++ disable_percpu_irq(arch_timer_ppi[arch_timer_uses_ppi]); ++ if (arch_timer_has_nonsecure_ppi()) ++ disable_percpu_irq(arch_timer_ppi[ARCH_TIMER_PHYS_NONSECURE_PPI]); ++ ++ clk->set_state_shutdown(clk); ++} ++ ++static int arch_timer_dying_cpu(unsigned int cpu) ++{ ++ struct clock_event_device *clk = this_cpu_ptr(arch_timer_evt); ++ ++ cpumask_clear_cpu(smp_processor_id(), &evtstrm_available); ++ ++ arch_timer_stop(clk); ++ return 0; ++} ++ ++#ifdef CONFIG_CPU_PM ++static DEFINE_PER_CPU(unsigned long, saved_cntkctl); ++static int arch_timer_cpu_pm_notify(struct notifier_block *self, ++ unsigned long action, void *hcpu) ++{ ++ if (action == CPU_PM_ENTER) { ++ __this_cpu_write(saved_cntkctl, arch_timer_get_cntkctl()); ++ ++ cpumask_clear_cpu(smp_processor_id(), &evtstrm_available); ++ } else if (action == CPU_PM_ENTER_FAILED || action == CPU_PM_EXIT) { ++ arch_timer_set_cntkctl(__this_cpu_read(saved_cntkctl)); ++ ++ if (elf_hwcap & HWCAP_EVTSTRM) ++ cpumask_set_cpu(smp_processor_id(), &evtstrm_available); ++ } ++ return NOTIFY_OK; ++} ++ ++static struct notifier_block arch_timer_cpu_pm_notifier = { ++ .notifier_call = arch_timer_cpu_pm_notify, ++}; ++ ++static int __init arch_timer_cpu_pm_init(void) ++{ ++ return cpu_pm_register_notifier(&arch_timer_cpu_pm_notifier); ++} ++ ++static void __init arch_timer_cpu_pm_deinit(void) ++{ ++ WARN_ON(cpu_pm_unregister_notifier(&arch_timer_cpu_pm_notifier)); ++} ++ ++#else ++static int __init arch_timer_cpu_pm_init(void) ++{ ++ return 0; ++} ++ ++static void __init arch_timer_cpu_pm_deinit(void) ++{ ++} ++#endif ++ ++static int __init arch_timer_register(void) ++{ ++ int err; ++ int ppi; ++ ++ arch_timer_evt = alloc_percpu(struct clock_event_device); ++ if (!arch_timer_evt) { ++ err = -ENOMEM; ++ goto out; ++ } ++ ++ ppi = arch_timer_ppi[arch_timer_uses_ppi]; ++ switch (arch_timer_uses_ppi) { ++ case ARCH_TIMER_VIRT_PPI: ++ err = request_percpu_irq(ppi, arch_timer_handler_virt, ++ "arch_timer", arch_timer_evt); ++ break; ++ case ARCH_TIMER_PHYS_SECURE_PPI: ++ case ARCH_TIMER_PHYS_NONSECURE_PPI: ++ err = request_percpu_irq(ppi, arch_timer_handler_phys, ++ "arch_timer", arch_timer_evt); ++ if (!err && arch_timer_has_nonsecure_ppi()) { ++ ppi = arch_timer_ppi[ARCH_TIMER_PHYS_NONSECURE_PPI]; ++ err = request_percpu_irq(ppi, arch_timer_handler_phys, ++ "arch_timer", arch_timer_evt); ++ if (err) ++ free_percpu_irq(arch_timer_ppi[ARCH_TIMER_PHYS_SECURE_PPI], ++ arch_timer_evt); ++ } ++ break; ++ case ARCH_TIMER_HYP_PPI: ++ err = request_percpu_irq(ppi, arch_timer_handler_phys, ++ "arch_timer", arch_timer_evt); ++ break; ++ default: ++ BUG(); ++ } ++ ++ if (err) { ++ pr_err("can't register interrupt %d (%d)\n", ppi, err); ++ goto out_free; ++ } ++ ++ err = arch_timer_cpu_pm_init(); ++ if (err) ++ goto out_unreg_notify; ++ ++ /* Register and immediately configure the timer on the boot CPU */ ++ err = cpuhp_setup_state(CPUHP_AP_ARM_ARCH_TIMER_STARTING, ++ "clockevents/arm/arch_timer:starting", ++ arch_timer_starting_cpu, arch_timer_dying_cpu); ++ if (err) ++ goto out_unreg_cpupm; ++ return 0; ++ ++out_unreg_cpupm: ++ arch_timer_cpu_pm_deinit(); ++ ++out_unreg_notify: ++ free_percpu_irq(arch_timer_ppi[arch_timer_uses_ppi], arch_timer_evt); ++ if (arch_timer_has_nonsecure_ppi()) ++ free_percpu_irq(arch_timer_ppi[ARCH_TIMER_PHYS_NONSECURE_PPI], ++ arch_timer_evt); ++ ++out_free: ++ free_percpu(arch_timer_evt); ++out: ++ return err; ++} ++ ++static int __init arch_timer_mem_register(void __iomem *base, unsigned int irq) ++{ ++ int ret; ++ irq_handler_t func; ++ struct arch_timer *t; ++ ++ t = kzalloc(sizeof(*t), GFP_KERNEL); ++ if (!t) ++ return -ENOMEM; ++ ++ t->base = base; ++ t->evt.irq = irq; ++ __arch_timer_setup(ARCH_TIMER_TYPE_MEM, &t->evt); ++ ++ if (arch_timer_mem_use_virtual) ++ func = arch_timer_handler_virt_mem; ++ else ++ func = arch_timer_handler_phys_mem; ++ ++ ret = request_irq(irq, func, IRQF_TIMER, "arch_mem_timer", &t->evt); ++ if (ret) { ++ pr_err("Failed to request mem timer irq\n"); ++ kfree(t); ++ } ++ ++ return ret; ++} ++ ++static const struct of_device_id arch_timer_of_match[] __initconst = { ++ { .compatible = "arm,armv7-timer", }, ++ { .compatible = "arm,armv8-timer", }, ++ {}, ++}; ++ ++static const struct of_device_id arch_timer_mem_of_match[] __initconst = { ++ { .compatible = "arm,armv7-timer-mem", }, ++ {}, ++}; ++ ++static bool __init arch_timer_needs_of_probing(void) ++{ ++ struct device_node *dn; ++ bool needs_probing = false; ++ unsigned int mask = ARCH_TIMER_TYPE_CP15 | ARCH_TIMER_TYPE_MEM; ++ ++ /* We have two timers, and both device-tree nodes are probed. */ ++ if ((arch_timers_present & mask) == mask) ++ return false; ++ ++ /* ++ * Only one type of timer is probed, ++ * check if we have another type of timer node in device-tree. ++ */ ++ if (arch_timers_present & ARCH_TIMER_TYPE_CP15) ++ dn = of_find_matching_node(NULL, arch_timer_mem_of_match); ++ else ++ dn = of_find_matching_node(NULL, arch_timer_of_match); ++ ++ if (dn && of_device_is_available(dn)) ++ needs_probing = true; ++ ++ of_node_put(dn); ++ ++ return needs_probing; ++} ++ ++static int __init arch_timer_common_init(void) ++{ ++ arch_timer_banner(arch_timers_present); ++ arch_counter_register(arch_timers_present); ++ return arch_timer_arch_init(); ++} ++ ++/** ++ * arch_timer_select_ppi() - Select suitable PPI for the current system. ++ * ++ * If HYP mode is available, we know that the physical timer ++ * has been configured to be accessible from PL1. Use it, so ++ * that a guest can use the virtual timer instead. ++ * ++ * On ARMv8.1 with VH extensions, the kernel runs in HYP. VHE ++ * accesses to CNTP_*_EL1 registers are silently redirected to ++ * their CNTHP_*_EL2 counterparts, and use a different PPI ++ * number. ++ * ++ * If no interrupt provided for virtual timer, we'll have to ++ * stick to the physical timer. It'd better be accessible... ++ * For arm64 we never use the secure interrupt. ++ * ++ * Return: a suitable PPI type for the current system. ++ */ ++static enum arch_timer_ppi_nr __init arch_timer_select_ppi(void) ++{ ++ if (is_kernel_in_hyp_mode()) ++ return ARCH_TIMER_HYP_PPI; ++ ++ if (!is_hyp_mode_available() && arch_timer_ppi[ARCH_TIMER_VIRT_PPI]) ++ return ARCH_TIMER_VIRT_PPI; ++ ++ if (IS_ENABLED(CONFIG_ARM64)) ++ return ARCH_TIMER_PHYS_NONSECURE_PPI; ++ ++ return ARCH_TIMER_PHYS_SECURE_PPI; ++} ++ ++static int __init arch_timer_of_init(struct device_node *np) ++{ ++ int i, ret; ++ u32 rate; ++ ++ if (arch_timers_present & ARCH_TIMER_TYPE_CP15) { ++ pr_warn("multiple nodes in dt, skipping\n"); ++ return 0; ++ } ++ ++ arch_timers_present |= ARCH_TIMER_TYPE_CP15; ++ for (i = ARCH_TIMER_PHYS_SECURE_PPI; i < ARCH_TIMER_MAX_TIMER_PPI; i++) ++ arch_timer_ppi[i] = irq_of_parse_and_map(np, i); ++ ++ arch_timer_kvm_info.virtual_irq = arch_timer_ppi[ARCH_TIMER_VIRT_PPI]; ++ ++ rate = arch_timer_get_cntfrq(); ++ arch_timer_of_configure_rate(rate, np); ++ ++ arch_timer_c3stop = !of_property_read_bool(np, "always-on"); ++ ++ /* Check for globally applicable workarounds */ ++ arch_timer_check_ool_workaround(ate_match_dt, np); ++ ++ /* ++ * If we cannot rely on firmware initializing the timer registers then ++ * we should use the physical timers instead. ++ */ ++ if (IS_ENABLED(CONFIG_ARM) && ++ of_property_read_bool(np, "arm,cpu-registers-not-fw-configured")) ++ arch_timer_uses_ppi = ARCH_TIMER_PHYS_SECURE_PPI; ++ else ++ arch_timer_uses_ppi = arch_timer_select_ppi(); ++ ++ if (!arch_timer_ppi[arch_timer_uses_ppi]) { ++ pr_err("No interrupt available, giving up\n"); ++ return -EINVAL; ++ } ++ ++ /* On some systems, the counter stops ticking when in suspend. */ ++ arch_counter_suspend_stop = of_property_read_bool(np, ++ "arm,no-tick-in-suspend"); ++ ++ ret = arch_timer_register(); ++ if (ret) ++ return ret; ++ ++ if (arch_timer_needs_of_probing()) ++ return 0; ++ ++ return arch_timer_common_init(); ++} ++TIMER_OF_DECLARE(armv7_arch_timer, "arm,armv7-timer", arch_timer_of_init); ++TIMER_OF_DECLARE(armv8_arch_timer, "arm,armv8-timer", arch_timer_of_init); ++ ++static u32 __init ++arch_timer_mem_frame_get_cntfrq(struct arch_timer_mem_frame *frame) ++{ ++ void __iomem *base; ++ u32 rate; ++ ++ base = ioremap(frame->cntbase, frame->size); ++ if (!base) { ++ pr_err("Unable to map frame @ %pa\n", &frame->cntbase); ++ return 0; ++ } ++ ++ rate = readl_relaxed(base + CNTFRQ); ++ ++ iounmap(base); ++ ++ return rate; ++} ++ ++static struct arch_timer_mem_frame * __init ++arch_timer_mem_find_best_frame(struct arch_timer_mem *timer_mem) ++{ ++ struct arch_timer_mem_frame *frame, *best_frame = NULL; ++ void __iomem *cntctlbase; ++ u32 cnttidr; ++ int i; ++ ++ cntctlbase = ioremap(timer_mem->cntctlbase, timer_mem->size); ++ if (!cntctlbase) { ++ pr_err("Can't map CNTCTLBase @ %pa\n", ++ &timer_mem->cntctlbase); ++ return NULL; ++ } ++ ++ cnttidr = readl_relaxed(cntctlbase + CNTTIDR); ++ ++ /* ++ * Try to find a virtual capable frame. Otherwise fall back to a ++ * physical capable frame. ++ */ ++ for (i = 0; i < ARCH_TIMER_MEM_MAX_FRAMES; i++) { ++ u32 cntacr = CNTACR_RFRQ | CNTACR_RWPT | CNTACR_RPCT | ++ CNTACR_RWVT | CNTACR_RVOFF | CNTACR_RVCT; ++ ++ frame = &timer_mem->frame[i]; ++ if (!frame->valid) ++ continue; ++ ++ /* Try enabling everything, and see what sticks */ ++ writel_relaxed(cntacr, cntctlbase + CNTACR(i)); ++ cntacr = readl_relaxed(cntctlbase + CNTACR(i)); ++ ++ if ((cnttidr & CNTTIDR_VIRT(i)) && ++ !(~cntacr & (CNTACR_RWVT | CNTACR_RVCT))) { ++ best_frame = frame; ++ arch_timer_mem_use_virtual = true; ++ break; ++ } ++ ++ if (~cntacr & (CNTACR_RWPT | CNTACR_RPCT)) ++ continue; ++ ++ best_frame = frame; ++ } ++ ++ iounmap(cntctlbase); ++ ++ return best_frame; ++} ++ ++static int __init ++arch_timer_mem_frame_register(struct arch_timer_mem_frame *frame) ++{ ++ void __iomem *base; ++ int ret, irq = 0; ++ ++ if (arch_timer_mem_use_virtual) ++ irq = frame->virt_irq; ++ else ++ irq = frame->phys_irq; ++ ++ if (!irq) { ++ pr_err("Frame missing %s irq.\n", ++ arch_timer_mem_use_virtual ? "virt" : "phys"); ++ return -EINVAL; ++ } ++ ++ if (!request_mem_region(frame->cntbase, frame->size, ++ "arch_mem_timer")) ++ return -EBUSY; ++ ++ base = ioremap(frame->cntbase, frame->size); ++ if (!base) { ++ pr_err("Can't map frame's registers\n"); ++ return -ENXIO; ++ } ++ ++ ret = arch_timer_mem_register(base, irq); ++ if (ret) { ++ iounmap(base); ++ return ret; ++ } ++ ++ arch_counter_base = base; ++ arch_timers_present |= ARCH_TIMER_TYPE_MEM; ++ ++ return 0; ++} ++ ++static int __init arch_timer_mem_of_init(struct device_node *np) ++{ ++ struct arch_timer_mem *timer_mem; ++ struct arch_timer_mem_frame *frame; ++ struct device_node *frame_node; ++ struct resource res; ++ int ret = -EINVAL; ++ u32 rate; ++ ++ timer_mem = kzalloc(sizeof(*timer_mem), GFP_KERNEL); ++ if (!timer_mem) ++ return -ENOMEM; ++ ++ if (of_address_to_resource(np, 0, &res)) ++ goto out; ++ timer_mem->cntctlbase = res.start; ++ timer_mem->size = resource_size(&res); ++ ++ for_each_available_child_of_node(np, frame_node) { ++ u32 n; ++ struct arch_timer_mem_frame *frame; ++ ++ if (of_property_read_u32(frame_node, "frame-number", &n)) { ++ pr_err(FW_BUG "Missing frame-number.\n"); ++ of_node_put(frame_node); ++ goto out; ++ } ++ if (n >= ARCH_TIMER_MEM_MAX_FRAMES) { ++ pr_err(FW_BUG "Wrong frame-number, only 0-%u are permitted.\n", ++ ARCH_TIMER_MEM_MAX_FRAMES - 1); ++ of_node_put(frame_node); ++ goto out; ++ } ++ frame = &timer_mem->frame[n]; ++ ++ if (frame->valid) { ++ pr_err(FW_BUG "Duplicated frame-number.\n"); ++ of_node_put(frame_node); ++ goto out; ++ } ++ ++ if (of_address_to_resource(frame_node, 0, &res)) { ++ of_node_put(frame_node); ++ goto out; ++ } ++ frame->cntbase = res.start; ++ frame->size = resource_size(&res); ++ ++ frame->virt_irq = irq_of_parse_and_map(frame_node, ++ ARCH_TIMER_VIRT_SPI); ++ frame->phys_irq = irq_of_parse_and_map(frame_node, ++ ARCH_TIMER_PHYS_SPI); ++ ++ frame->valid = true; ++ } ++ ++ frame = arch_timer_mem_find_best_frame(timer_mem); ++ if (!frame) { ++ pr_err("Unable to find a suitable frame in timer @ %pa\n", ++ &timer_mem->cntctlbase); ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ rate = arch_timer_mem_frame_get_cntfrq(frame); ++ arch_timer_of_configure_rate(rate, np); ++ ++ ret = arch_timer_mem_frame_register(frame); ++ if (!ret && !arch_timer_needs_of_probing()) ++ ret = arch_timer_common_init(); ++out: ++ kfree(timer_mem); ++ return ret; ++} ++TIMER_OF_DECLARE(armv7_arch_timer_mem, "arm,armv7-timer-mem", ++ arch_timer_mem_of_init); ++ ++#ifdef CONFIG_ACPI_GTDT ++static int __init ++arch_timer_mem_verify_cntfrq(struct arch_timer_mem *timer_mem) ++{ ++ struct arch_timer_mem_frame *frame; ++ u32 rate; ++ int i; ++ ++ for (i = 0; i < ARCH_TIMER_MEM_MAX_FRAMES; i++) { ++ frame = &timer_mem->frame[i]; ++ ++ if (!frame->valid) ++ continue; ++ ++ rate = arch_timer_mem_frame_get_cntfrq(frame); ++ if (rate == arch_timer_rate) ++ continue; ++ ++ pr_err(FW_BUG "CNTFRQ mismatch: frame @ %pa: (0x%08lx), CPU: (0x%08lx)\n", ++ &frame->cntbase, ++ (unsigned long)rate, (unsigned long)arch_timer_rate); ++ ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ ++static int __init arch_timer_mem_acpi_init(int platform_timer_count) ++{ ++ struct arch_timer_mem *timers, *timer; ++ struct arch_timer_mem_frame *frame, *best_frame = NULL; ++ int timer_count, i, ret = 0; ++ ++ timers = kcalloc(platform_timer_count, sizeof(*timers), ++ GFP_KERNEL); ++ if (!timers) ++ return -ENOMEM; ++ ++ ret = acpi_arch_timer_mem_init(timers, &timer_count); ++ if (ret || !timer_count) ++ goto out; ++ ++ /* ++ * While unlikely, it's theoretically possible that none of the frames ++ * in a timer expose the combination of feature we want. ++ */ ++ for (i = 0; i < timer_count; i++) { ++ timer = &timers[i]; ++ ++ frame = arch_timer_mem_find_best_frame(timer); ++ if (!best_frame) ++ best_frame = frame; ++ ++ ret = arch_timer_mem_verify_cntfrq(timer); ++ if (ret) { ++ pr_err("Disabling MMIO timers due to CNTFRQ mismatch\n"); ++ goto out; ++ } ++ ++ if (!best_frame) /* implies !frame */ ++ /* ++ * Only complain about missing suitable frames if we ++ * haven't already found one in a previous iteration. ++ */ ++ pr_err("Unable to find a suitable frame in timer @ %pa\n", ++ &timer->cntctlbase); ++ } ++ ++ if (best_frame) ++ ret = arch_timer_mem_frame_register(best_frame); ++out: ++ kfree(timers); ++ return ret; ++} ++ ++/* Initialize per-processor generic timer and memory-mapped timer(if present) */ ++static int __init arch_timer_acpi_init(struct acpi_table_header *table) ++{ ++ int ret, platform_timer_count; ++ ++ if (arch_timers_present & ARCH_TIMER_TYPE_CP15) { ++ pr_warn("already initialized, skipping\n"); ++ return -EINVAL; ++ } ++ ++ arch_timers_present |= ARCH_TIMER_TYPE_CP15; ++ ++ ret = acpi_gtdt_init(table, &platform_timer_count); ++ if (ret) { ++ pr_err("Failed to init GTDT table.\n"); ++ return ret; ++ } ++ ++ arch_timer_ppi[ARCH_TIMER_PHYS_NONSECURE_PPI] = ++ acpi_gtdt_map_ppi(ARCH_TIMER_PHYS_NONSECURE_PPI); ++ ++ arch_timer_ppi[ARCH_TIMER_VIRT_PPI] = ++ acpi_gtdt_map_ppi(ARCH_TIMER_VIRT_PPI); ++ ++ arch_timer_ppi[ARCH_TIMER_HYP_PPI] = ++ acpi_gtdt_map_ppi(ARCH_TIMER_HYP_PPI); ++ ++ arch_timer_kvm_info.virtual_irq = arch_timer_ppi[ARCH_TIMER_VIRT_PPI]; ++ ++ /* ++ * When probing via ACPI, we have no mechanism to override the sysreg ++ * CNTFRQ value. This *must* be correct. ++ */ ++ arch_timer_rate = arch_timer_get_cntfrq(); ++ if (!arch_timer_rate) { ++ pr_err(FW_BUG "frequency not available.\n"); ++ return -EINVAL; ++ } ++ ++ arch_timer_uses_ppi = arch_timer_select_ppi(); ++ if (!arch_timer_ppi[arch_timer_uses_ppi]) { ++ pr_err("No interrupt available, giving up\n"); ++ return -EINVAL; ++ } ++ ++ /* Always-on capability */ ++ arch_timer_c3stop = acpi_gtdt_c3stop(arch_timer_uses_ppi); ++ ++ /* Check for globally applicable workarounds */ ++ arch_timer_check_ool_workaround(ate_match_acpi_oem_info, table); ++ ++ ret = arch_timer_register(); ++ if (ret) ++ return ret; ++ ++ if (platform_timer_count && ++ arch_timer_mem_acpi_init(platform_timer_count)) ++ pr_err("Failed to initialize memory-mapped timer.\n"); ++ ++ return arch_timer_common_init(); ++} ++TIMER_ACPI_DECLARE(arch_timer, ACPI_SIG_GTDT, arch_timer_acpi_init); ++#endif +diff -uprN kernel/drivers/clocksource/arm_global_timer.c kernel_new/drivers/clocksource/arm_global_timer.c +--- kernel/drivers/clocksource/arm_global_timer.c 2020-12-21 21:59:17.000000000 +0800 ++++ kernel_new/drivers/clocksource/arm_global_timer.c 2021-04-01 18:28:07.661863280 +0800 +@@ -23,6 +23,7 @@ + #include + #include + #include ++#include + + #include + +@@ -49,10 +50,69 @@ + * the units for all operations. + */ + static void __iomem *gt_base; ++static unsigned long gt_pbase; ++static struct clk *gt_clk; + static unsigned long gt_clk_rate; + static int gt_ppi; + static struct clock_event_device __percpu *gt_evt; + ++#ifdef CONFIG_IPIPE ++ ++static struct clocksource gt_clocksource; ++ ++static int gt_clockevent_ack(struct clock_event_device *evt); ++ ++static DEFINE_PER_CPU(struct ipipe_timer, gt_itimer); ++ ++static unsigned int refresh_gt_freq(void) ++{ ++ gt_clk_rate = clk_get_rate(gt_clk); ++ ++ __clocksource_update_freq_hz(>_clocksource, gt_clk_rate); ++ ++ return gt_clk_rate; ++} ++ ++static inline void gt_ipipe_cs_setup(void) ++{ ++ struct __ipipe_tscinfo tsc_info = { ++ .type = IPIPE_TSC_TYPE_FREERUNNING, ++ .freq = gt_clk_rate, ++ .counter_vaddr = (unsigned long)gt_base, ++ .u = { ++ { ++ .counter_paddr = gt_pbase, ++ .mask = 0xffffffff, ++ } ++ }, ++ .refresh_freq = refresh_gt_freq, ++ }; ++ ++ __ipipe_tsc_register(&tsc_info); ++} ++ ++static void gt_itimer_ack(void) ++{ ++ struct clock_event_device *evt = this_cpu_ptr(gt_evt); ++ gt_clockevent_ack(evt); ++} ++ ++static inline void gt_ipipe_evt_setup(struct clock_event_device *evt) ++{ ++ evt->ipipe_timer = this_cpu_ptr(>_itimer); ++ evt->ipipe_timer->irq = evt->irq; ++ evt->ipipe_timer->ack = gt_itimer_ack; ++ evt->ipipe_timer->freq = gt_clk_rate; ++} ++ ++#else ++ ++static inline void gt_ipipe_cs_setup(void) { } ++ ++static inline void gt_ipipe_evt_setup(struct clock_event_device *evt) { } ++ ++#endif /* CONFIG_IPIPE */ ++ + /* + * To get the value from the Global Timer Counter register proceed as follows: + * 1. Read the upper 32-bit timer counter register +@@ -137,13 +197,11 @@ static int gt_clockevent_set_next_event( + return 0; + } + +-static irqreturn_t gt_clockevent_interrupt(int irq, void *dev_id) ++static int gt_clockevent_ack(struct clock_event_device *evt) + { +- struct clock_event_device *evt = dev_id; +- + if (!(readl_relaxed(gt_base + GT_INT_STATUS) & + GT_INT_STATUS_EVENT_FLAG)) +- return IRQ_NONE; ++ return IS_ENABLED(CONFIG_IPIPE); + + /** + * ERRATA 740657( Global Timer can send 2 interrupts for +@@ -156,10 +214,23 @@ static irqreturn_t gt_clockevent_interru + * the Global Timer flag _after_ having incremented + * the Comparator register value to a higher value. + */ +- if (clockevent_state_oneshot(evt)) ++ if (clockevent_ipipe_stolen(evt) || clockevent_state_oneshot(evt)) + gt_compare_set(ULONG_MAX, 0); + + writel_relaxed(GT_INT_STATUS_EVENT_FLAG, gt_base + GT_INT_STATUS); ++ ++ return 1; ++} ++ ++static irqreturn_t gt_clockevent_interrupt(int irq, void *dev_id) ++{ ++ struct clock_event_device *evt = dev_id; ++ ++ if (!clockevent_ipipe_stolen(evt)) { ++ if (!gt_clockevent_ack(evt)) ++ return IRQ_NONE; ++ } ++ + evt->event_handler(evt); + + return IRQ_HANDLED; +@@ -180,6 +251,7 @@ static int gt_starting_cpu(unsigned int + clk->cpumask = cpumask_of(cpu); + clk->rating = 300; + clk->irq = gt_ppi; ++ gt_ipipe_evt_setup(clk); + clockevents_config_and_register(clk, gt_clk_rate, + 1, 0xffffffff); + enable_percpu_irq(clk->irq, IRQ_TYPE_NONE); +@@ -252,13 +324,14 @@ static int __init gt_clocksource_init(vo + #ifdef CONFIG_CLKSRC_ARM_GLOBAL_TIMER_SCHED_CLOCK + sched_clock_register(gt_sched_clock_read, 64, gt_clk_rate); + #endif ++ gt_ipipe_cs_setup(); + return clocksource_register_hz(>_clocksource, gt_clk_rate); + } + + static int __init global_timer_of_register(struct device_node *np) + { +- struct clk *gt_clk; + int err = 0; ++ struct resource res; + + /* + * In A9 r2p0 the comparators for each processor with the global timer +@@ -283,6 +356,11 @@ static int __init global_timer_of_regist + return -ENXIO; + } + ++ if (of_address_to_resource(np, 0, &res)) ++ res.start = 0; ++ ++ gt_pbase = res.start; ++ + gt_clk = of_clk_get(np, 0); + if (!IS_ERR(gt_clk)) { + err = clk_prepare_enable(gt_clk); +diff -uprN kernel/drivers/clocksource/bcm2835_timer.c kernel_new/drivers/clocksource/bcm2835_timer.c +--- kernel/drivers/clocksource/bcm2835_timer.c 2020-12-21 21:59:17.000000000 +0800 ++++ kernel_new/drivers/clocksource/bcm2835_timer.c 2021-04-01 18:28:07.661863280 +0800 +@@ -29,6 +29,9 @@ + #include + #include + #include ++#include ++#include ++#include + + #include + +@@ -39,6 +42,7 @@ + #define MAX_TIMER 3 + #define DEFAULT_TIMER 3 + ++ + struct bcm2835_timer { + void __iomem *control; + void __iomem *compare; +@@ -46,9 +50,53 @@ struct bcm2835_timer { + struct clock_event_device evt; + struct irqaction act; + }; +- + static void __iomem *system_clock __read_mostly; + ++#ifdef CONFIG_IPIPE ++ ++static void __iomem *t_base; ++static unsigned long t_pbase; ++ ++static inline void bcm2835_ipipe_cs_setup(unsigned int freq) ++{ ++ struct __ipipe_tscinfo tsc_info = { ++ .type = IPIPE_TSC_TYPE_FREERUNNING, ++ .freq = freq, ++ .counter_vaddr = (unsigned long)t_base + 0x04, ++ .u = { ++ { ++ .counter_paddr = t_pbase + 0x04, ++ .mask = 0xffffffff, ++ } ++ }, ++ }; ++ ++ __ipipe_tsc_register(&tsc_info); ++} ++ ++static struct ipipe_timer bcm2835_itimer; ++ ++static void bcm2835_itimer_ack(void) ++{ ++ struct bcm2835_timer *timer = container_of(bcm2835_itimer.host_timer, ++ struct bcm2835_timer, evt); ++ writel(timer->match_mask, timer->control); ++} ++ ++static inline void bcm2835_ipipe_evt_setup(struct clock_event_device *evt, ++ int freq) ++{ ++ evt->ipipe_timer = &bcm2835_itimer; ++ evt->ipipe_timer->irq = evt->irq; ++ evt->ipipe_timer->ack = bcm2835_itimer_ack; ++ evt->ipipe_timer->freq = freq; ++} ++ ++#else ++static inline void bcm2835_ipipe_cs_setup(void) { } ++static inline void bcm2835_ipipe_evt_setup(struct clock_event_device *evt) { } ++#endif /* CONFIG_IPIPE */ ++ + static u64 notrace bcm2835_sched_read(void) + { + return readl_relaxed(system_clock); +@@ -59,8 +107,7 @@ static int bcm2835_time_set_next_event(u + { + struct bcm2835_timer *timer = container_of(evt_dev, + struct bcm2835_timer, evt); +- writel_relaxed(readl_relaxed(system_clock) + event, +- timer->compare); ++ writel_relaxed(readl_relaxed(system_clock) + event, timer->compare); + return 0; + } + +@@ -68,9 +115,13 @@ static irqreturn_t bcm2835_time_interrup + { + struct bcm2835_timer *timer = dev_id; + void (*event_handler)(struct clock_event_device *); ++ ++ if (clockevent_ipipe_stolen(&timer->evt)) { ++ goto handle; ++ } + if (readl_relaxed(timer->control) & timer->match_mask) { + writel_relaxed(timer->match_mask, timer->control); +- ++ handle: + event_handler = READ_ONCE(timer->evt.event_handler); + if (event_handler) + event_handler(&timer->evt); +@@ -93,6 +144,17 @@ static int __init bcm2835_timer_init(str + return -ENXIO; + } + ++ if (IS_ENABLED(CONFIG_IPIPE)) { ++ struct resource res; ++ int ret; ++ ++ ret = of_address_to_resource(node, 0, &res); ++ if (ret) ++ res.start = 0; ++ t_base = base; ++ t_pbase = res.start; ++ } ++ + ret = of_property_read_u32(node, "clock-frequency", &freq); + if (ret) { + pr_err("Can't read clock-frequency\n"); +@@ -127,11 +189,22 @@ static int __init bcm2835_timer_init(str + timer->evt.set_next_event = bcm2835_time_set_next_event; + timer->evt.cpumask = cpumask_of(0); + timer->act.name = node->name; +- timer->act.flags = IRQF_TIMER | IRQF_SHARED; ++ timer->act.flags = IRQF_TIMER; + timer->act.dev_id = timer; + timer->act.handler = bcm2835_time_interrupt; + +- ret = setup_irq(irq, &timer->act); ++ if (IS_ENABLED(CONFIG_IPIPE)) { ++ bcm2835_ipipe_cs_setup(freq); ++ bcm2835_ipipe_evt_setup(&timer->evt, freq); ++ timer->evt.ipipe_timer = &bcm2835_itimer; ++ timer->evt.ipipe_timer->irq = irq; ++ timer->evt.ipipe_timer->ack = bcm2835_itimer_ack; ++ timer->evt.ipipe_timer->freq = freq; ++ } else { ++ timer->act.flags |= IRQF_SHARED; ++ } ++ ++ ret = setup_irq(irq, &timer->act); + if (ret) { + pr_err("Can't set up timer IRQ\n"); + goto err_iounmap; +diff -uprN kernel/drivers/clocksource/dw_apb_timer.c kernel_new/drivers/clocksource/dw_apb_timer.c +--- kernel/drivers/clocksource/dw_apb_timer.c 2020-12-21 21:59:17.000000000 +0800 ++++ kernel_new/drivers/clocksource/dw_apb_timer.c 2021-04-01 18:28:07.661863280 +0800 +@@ -15,6 +15,7 @@ + #include + #include + #include ++#include + #include + #include + +@@ -384,7 +385,7 @@ static void apbt_restart_clocksource(str + */ + struct dw_apb_clocksource * + dw_apb_clocksource_init(unsigned rating, const char *name, void __iomem *base, +- unsigned long freq) ++ unsigned long phys, unsigned long freq) + { + struct dw_apb_clocksource *dw_cs = kzalloc(sizeof(*dw_cs), GFP_KERNEL); + +@@ -399,10 +400,22 @@ dw_apb_clocksource_init(unsigned rating, + dw_cs->cs.mask = CLOCKSOURCE_MASK(32); + dw_cs->cs.flags = CLOCK_SOURCE_IS_CONTINUOUS; + dw_cs->cs.resume = apbt_restart_clocksource; ++ dw_cs->phys = phys; + + return dw_cs; + } + ++#ifdef CONFIG_IPIPE ++static struct __ipipe_tscinfo apb_tsc_info = { ++ .type = IPIPE_TSC_TYPE_FREERUNNING_COUNTDOWN, ++ .u = { ++ .dec = { ++ .mask = 0xffffffffU, ++ }, ++ }, ++}; ++#endif ++ + /** + * dw_apb_clocksource_register() - register the APB clocksource. + * +@@ -411,6 +424,12 @@ dw_apb_clocksource_init(unsigned rating, + void dw_apb_clocksource_register(struct dw_apb_clocksource *dw_cs) + { + clocksource_register_hz(&dw_cs->cs, dw_cs->timer.freq); ++#ifdef CONFIG_IPIPE ++ apb_tsc_info.u.dec.counter = (void *)(dw_cs->phys + APBTMR_N_CURRENT_VALUE); ++ apb_tsc_info.counter_vaddr = (unsigned long)dw_cs->timer.base + APBTMR_N_CURRENT_VALUE; ++ apb_tsc_info.freq = dw_cs->timer.freq; ++ __ipipe_tsc_register(&apb_tsc_info); ++#endif + } + + /** +diff -uprN kernel/drivers/clocksource/dw_apb_timer_of.c kernel_new/drivers/clocksource/dw_apb_timer_of.c +--- kernel/drivers/clocksource/dw_apb_timer_of.c 2020-12-21 21:59:17.000000000 +0800 ++++ kernel_new/drivers/clocksource/dw_apb_timer_of.c 2021-04-01 18:28:07.661863280 +0800 +@@ -25,16 +25,20 @@ + #include + + static void __init timer_get_base_and_rate(struct device_node *np, +- void __iomem **base, u32 *rate) ++ void __iomem **base, unsigned long *phys, ++ u32 *rate) + { + struct clk *timer_clk; ++ struct resource res; + struct clk *pclk; + + *base = of_iomap(np, 0); + +- if (!*base) ++ if (!*base || of_address_to_resource(np, 0, &res)) + panic("Unable to map regs for %s", np->name); + ++ *phys = res.start; ++ + /* + * Not all implementations use a periphal clock, so don't panic + * if it's not present +@@ -64,13 +68,14 @@ static void __init add_clockevent(struct + { + void __iomem *iobase; + struct dw_apb_clock_event_device *ced; ++ unsigned long phys; + u32 irq, rate; + + irq = irq_of_parse_and_map(event_timer, 0); + if (irq == 0) + panic("No IRQ for clock event timer"); + +- timer_get_base_and_rate(event_timer, &iobase, &rate); ++ timer_get_base_and_rate(event_timer, &iobase, &phys, &rate); + + ced = dw_apb_clockevent_init(0, event_timer->name, 300, iobase, irq, + rate); +@@ -87,11 +92,12 @@ static void __init add_clocksource(struc + { + void __iomem *iobase; + struct dw_apb_clocksource *cs; ++ unsigned long phys; + u32 rate; + +- timer_get_base_and_rate(source_timer, &iobase, &rate); ++ timer_get_base_and_rate(source_timer, &iobase, &phys, &rate); + +- cs = dw_apb_clocksource_init(300, source_timer->name, iobase, rate); ++ cs = dw_apb_clocksource_init(300, source_timer->name, iobase, phys, rate); + if (!cs) + panic("Unable to initialise clocksource device"); + +@@ -120,11 +126,12 @@ static const struct of_device_id sptimer + static void __init init_sched_clock(void) + { + struct device_node *sched_timer; ++ unsigned long phys; + + sched_timer = of_find_matching_node(NULL, sptimer_ids); + if (sched_timer) { + timer_get_base_and_rate(sched_timer, &sched_io_base, +- &sched_rate); ++ &phys, &sched_rate); + of_node_put(sched_timer); + } + +diff -uprN kernel/drivers/clocksource/timer-imx-gpt.c kernel_new/drivers/clocksource/timer-imx-gpt.c +--- kernel/drivers/clocksource/timer-imx-gpt.c 2020-12-21 21:59:17.000000000 +0800 ++++ kernel_new/drivers/clocksource/timer-imx-gpt.c 2021-04-01 18:28:07.661863280 +0800 +@@ -16,6 +16,8 @@ + #include + #include + #include ++#include ++#include + #include + + /* +@@ -61,6 +63,9 @@ + + struct imx_timer { + enum imx_gpt_type type; ++#ifdef CONFIG_IPIPE ++ unsigned long pbase; ++#endif + void __iomem *base; + int irq; + struct clk *clk_per; +@@ -265,6 +270,30 @@ static int mxc_set_oneshot(struct clock_ + return 0; + } + ++#ifdef CONFIG_IPIPE ++ ++static struct imx_timer *global_imx_timer; ++ ++static void mxc_timer_ack(void) ++{ ++ global_imx_timer->gpt->gpt_irq_acknowledge(global_imx_timer); ++} ++ ++static struct __ipipe_tscinfo tsc_info = { ++ .type = IPIPE_TSC_TYPE_FREERUNNING, ++ .u = { ++ { ++ .mask = 0xffffffff, ++ }, ++ }, ++}; ++ ++static struct ipipe_timer mxc_itimer = { ++ .ack = mxc_timer_ack, ++}; ++ ++#endif ++ + /* + * IRQ handler for the timer + */ +@@ -276,7 +305,8 @@ static irqreturn_t mxc_timer_interrupt(i + + tstat = readl_relaxed(imxtm->base + imxtm->gpt->reg_tstat); + +- imxtm->gpt->gpt_irq_acknowledge(imxtm); ++ if (!clockevent_ipipe_stolen(ced)) ++ imxtm->gpt->gpt_irq_acknowledge(imxtm); + + ced->event_handler(ced); + +@@ -297,6 +327,9 @@ static int __init mxc_clockevent_init(st + ced->rating = 200; + ced->cpumask = cpumask_of(0); + ced->irq = imxtm->irq; ++#ifdef CONFIG_IPIPE ++ ced->ipipe_timer = &mxc_itimer; ++#endif + clockevents_config_and_register(ced, clk_get_rate(imxtm->clk_per), + 0xff, 0xfffffffe); + +@@ -436,6 +469,17 @@ static int __init _mxc_timer_init(struct + if (ret) + return ret; + ++#ifdef CONFIG_IPIPE ++ tsc_info.u.counter_paddr = imxtm->pbase + imxtm->gpt->reg_tcn; ++ tsc_info.counter_vaddr = (unsigned long)imxtm->base + imxtm->gpt->reg_tcn; ++ tsc_info.freq = clk_get_rate(imxtm->clk_per); ++ __ipipe_tsc_register(&tsc_info); ++ mxc_itimer.irq = imxtm->irq; ++ mxc_itimer.freq = clk_get_rate(imxtm->clk_per); ++ mxc_itimer.min_delay_ticks = ipipe_timer_ns2ticks(&mxc_itimer, 2000); ++ global_imx_timer = imxtm; ++#endif /* CONFIG_IPIPE */ ++ + return mxc_clockevent_init(imxtm); + } + +@@ -451,6 +495,9 @@ void __init mxc_timer_init(unsigned long + + imxtm->base = ioremap(pbase, SZ_4K); + BUG_ON(!imxtm->base); ++#ifdef CONFIG_IPIPE ++ imxtm->pbase = pbase; ++#endif + + imxtm->type = type; + imxtm->irq = irq; +@@ -462,6 +509,7 @@ static int __init mxc_timer_init_dt(stru + { + struct imx_timer *imxtm; + static int initialized; ++ struct resource res; + int ret; + + /* Support one instance only */ +@@ -480,6 +528,13 @@ static int __init mxc_timer_init_dt(stru + if (imxtm->irq <= 0) + return -EINVAL; + ++ if (of_address_to_resource(np, 0, &res)) ++ res.start = 0; ++ ++#ifdef CONFIG_IPIPE ++ imxtm->pbase = res.start; ++#endif ++ + imxtm->clk_ipg = of_clk_get_by_name(np, "ipg"); + + /* Try osc_per first, and fall back to per otherwise */ +diff -uprN kernel/drivers/clocksource/timer-sp804.c kernel_new/drivers/clocksource/timer-sp804.c +--- kernel/drivers/clocksource/timer-sp804.c 2020-12-21 21:59:17.000000000 +0800 ++++ kernel_new/drivers/clocksource/timer-sp804.c 2021-04-01 18:28:07.661863280 +0800 +@@ -30,11 +30,25 @@ + #include + #include + #include ++#include ++#include ++#include + + #include + + #include "timer-sp.h" + ++#ifdef CONFIG_IPIPE ++static struct __ipipe_tscinfo tsc_info = { ++ .type = IPIPE_TSC_TYPE_FREERUNNING_COUNTDOWN, ++ .u = { ++ { ++ .mask = 0xffffffff, ++ }, ++ }, ++}; ++#endif /* CONFIG_IPIPE */ ++ + static long __init sp804_get_clock_rate(struct clk *clk) + { + long rate; +@@ -79,6 +93,7 @@ void __init sp804_timer_disable(void __i + } + + int __init __sp804_clocksource_and_sched_clock_init(void __iomem *base, ++ unsigned long phys, + const char *name, + struct clk *clk, + int use_sched_clock) +@@ -113,6 +128,12 @@ int __init __sp804_clocksource_and_sche + sched_clock_register(sp804_read, 32, rate); + } + ++#ifdef CONFIG_IPIPE ++ tsc_info.freq = rate; ++ tsc_info.counter_vaddr = (unsigned long)base + TIMER_VALUE; ++ tsc_info.u.counter_paddr = phys + TIMER_VALUE; ++ __ipipe_tsc_register(&tsc_info); ++#endif + return 0; + } + +@@ -227,6 +248,7 @@ static int __init sp804_of_init(struct d + u32 irq_num = 0; + struct clk *clk1, *clk2; + const char *name = of_get_property(np, "compatible", NULL); ++ struct resource res; + + base = of_iomap(np, 0); + if (!base) +@@ -260,6 +282,9 @@ static int __init sp804_of_init(struct d + if (irq <= 0) + goto err; + ++ if (of_address_to_resource(np, 0, &res)) ++ res.start = 0; ++ + of_property_read_u32(np, "arm,sp804-has-irq", &irq_num); + if (irq_num == 2) { + +@@ -267,7 +292,7 @@ static int __init sp804_of_init(struct d + if (ret) + goto err; + +- ret = __sp804_clocksource_and_sched_clock_init(base, name, clk1, 1); ++ ret = __sp804_clocksource_and_sched_clock_init(base, res.start, name, clk1, 1); + if (ret) + goto err; + } else { +@@ -277,7 +302,7 @@ static int __init sp804_of_init(struct d + goto err; + + ret =__sp804_clocksource_and_sched_clock_init(base + TIMER_2_BASE, +- name, clk2, 1); ++ res.start, name, clk2, 1); + if (ret) + goto err; + } +@@ -297,6 +322,7 @@ static int __init integrator_cp_of_init( + int irq, ret = -EINVAL; + const char *name = of_get_property(np, "compatible", NULL); + struct clk *clk; ++ struct resource res; + + base = of_iomap(np, 0); + if (!base) { +@@ -316,8 +342,11 @@ static int __init integrator_cp_of_init( + if (init_count == 2 || !of_device_is_available(np)) + goto err; + ++ if (of_address_to_resource(np, 0, &res)) ++ res.start = 0; ++ + if (!init_count) { +- ret = __sp804_clocksource_and_sched_clock_init(base, name, clk, 0); ++ ret = __sp804_clocksource_and_sched_clock_init(base, res.start, name, clk, 0); + if (ret) + goto err; + } else { +diff -uprN kernel/drivers/cpuidle/cpuidle.c kernel_new/drivers/cpuidle/cpuidle.c +--- kernel/drivers/cpuidle/cpuidle.c 2020-12-21 21:59:17.000000000 +0800 ++++ kernel_new/drivers/cpuidle/cpuidle.c 2021-04-01 18:28:07.661863280 +0800 +@@ -17,6 +17,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -206,6 +207,19 @@ int cpuidle_enter_state(struct cpuidle_d + s64 diff; + + /* ++ * A co-kernel running on the head stage of the IRQ pipeline ++ * may deny switching to a deeper C-state. If so, call the ++ * default idle routine instead. If the co-kernel cannot bear ++ * with the latency induced by the default idling operation, ++ * then CPUIDLE is not usable and should be disabled at build ++ * time. ++ */ ++ if (!ipipe_enter_cpuidle(dev, target_state)) { ++ default_idle_call(); ++ return -EBUSY; ++ } ++ ++ /* + * Tell the time framework to switch to a broadcast timer because our + * local timer will be shut down. If a local timer is used from another + * CPU as a broadcast timer, this call may fail if it is not available. +@@ -229,6 +243,7 @@ int cpuidle_enter_state(struct cpuidle_d + + stop_critical_timings(); + entered_state = target_state->enter(dev, drv, index); ++ hard_cond_local_irq_enable(); + start_critical_timings(); + + sched_clock_idle_wakeup_event(); +diff -uprN kernel/drivers/cpuidle/cpuidle.c.orig kernel_new/drivers/cpuidle/cpuidle.c.orig +--- kernel/drivers/cpuidle/cpuidle.c.orig 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/drivers/cpuidle/cpuidle.c.orig 2020-12-21 21:59:17.000000000 +0800 +@@ -0,0 +1,706 @@ ++/* ++ * cpuidle.c - core cpuidle infrastructure ++ * ++ * (C) 2006-2007 Venkatesh Pallipadi ++ * Shaohua Li ++ * Adam Belay ++ * ++ * This code is licenced under the GPL. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "cpuidle.h" ++ ++DEFINE_PER_CPU(struct cpuidle_device *, cpuidle_devices); ++DEFINE_PER_CPU(struct cpuidle_device, cpuidle_dev); ++ ++DEFINE_MUTEX(cpuidle_lock); ++LIST_HEAD(cpuidle_detected_devices); ++ ++static int enabled_devices; ++static int off __read_mostly; ++static int initialized __read_mostly; ++ ++int cpuidle_disabled(void) ++{ ++ return off; ++} ++void disable_cpuidle(void) ++{ ++ off = 1; ++} ++ ++bool cpuidle_not_available(struct cpuidle_driver *drv, ++ struct cpuidle_device *dev) ++{ ++ return off || !initialized || !drv || !dev || !dev->enabled; ++} ++ ++/** ++ * cpuidle_play_dead - cpu off-lining ++ * ++ * Returns in case of an error or no driver ++ */ ++int cpuidle_play_dead(void) ++{ ++ struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices); ++ struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); ++ int i; ++ ++ if (!drv) ++ return -ENODEV; ++ ++ /* Find lowest-power state that supports long-term idle */ ++ for (i = drv->state_count - 1; i >= 0; i--) ++ if (drv->states[i].enter_dead) ++ return drv->states[i].enter_dead(dev, i); ++ ++ return -ENODEV; ++} ++ ++static int find_deepest_state(struct cpuidle_driver *drv, ++ struct cpuidle_device *dev, ++ unsigned int max_latency, ++ unsigned int forbidden_flags, ++ bool s2idle) ++{ ++ unsigned int latency_req = 0; ++ int i, ret = 0; ++ ++ for (i = 1; i < drv->state_count; i++) { ++ struct cpuidle_state *s = &drv->states[i]; ++ struct cpuidle_state_usage *su = &dev->states_usage[i]; ++ ++ if (s->disabled || su->disable || s->exit_latency <= latency_req ++ || s->exit_latency > max_latency ++ || (s->flags & forbidden_flags) ++ || (s2idle && !s->enter_s2idle)) ++ continue; ++ ++ latency_req = s->exit_latency; ++ ret = i; ++ } ++ return ret; ++} ++ ++/** ++ * cpuidle_use_deepest_state - Set/clear governor override flag. ++ * @enable: New value of the flag. ++ * ++ * Set/unset the current CPU to use the deepest idle state (override governors ++ * going forward if set). ++ */ ++void cpuidle_use_deepest_state(bool enable) ++{ ++ struct cpuidle_device *dev; ++ ++ preempt_disable(); ++ dev = cpuidle_get_device(); ++ if (dev) ++ dev->use_deepest_state = enable; ++ preempt_enable(); ++} ++ ++/** ++ * cpuidle_find_deepest_state - Find the deepest available idle state. ++ * @drv: cpuidle driver for the given CPU. ++ * @dev: cpuidle device for the given CPU. ++ */ ++int cpuidle_find_deepest_state(struct cpuidle_driver *drv, ++ struct cpuidle_device *dev) ++{ ++ return find_deepest_state(drv, dev, UINT_MAX, 0, false); ++} ++ ++#ifdef CONFIG_SUSPEND ++static void enter_s2idle_proper(struct cpuidle_driver *drv, ++ struct cpuidle_device *dev, int index) ++{ ++ ktime_t time_start, time_end; ++ ++ time_start = ns_to_ktime(local_clock()); ++ ++ /* ++ * trace_suspend_resume() called by tick_freeze() for the last CPU ++ * executing it contains RCU usage regarded as invalid in the idle ++ * context, so tell RCU about that. ++ */ ++ RCU_NONIDLE(tick_freeze()); ++ /* ++ * The state used here cannot be a "coupled" one, because the "coupled" ++ * cpuidle mechanism enables interrupts and doing that with timekeeping ++ * suspended is generally unsafe. ++ */ ++ stop_critical_timings(); ++ drv->states[index].enter_s2idle(dev, drv, index); ++ if (WARN_ON_ONCE(!irqs_disabled())) ++ local_irq_disable(); ++ /* ++ * timekeeping_resume() that will be called by tick_unfreeze() for the ++ * first CPU executing it calls functions containing RCU read-side ++ * critical sections, so tell RCU about that. ++ */ ++ RCU_NONIDLE(tick_unfreeze()); ++ start_critical_timings(); ++ ++ time_end = ns_to_ktime(local_clock()); ++ ++ dev->states_usage[index].s2idle_time += ktime_us_delta(time_end, time_start); ++ dev->states_usage[index].s2idle_usage++; ++} ++ ++/** ++ * cpuidle_enter_s2idle - Enter an idle state suitable for suspend-to-idle. ++ * @drv: cpuidle driver for the given CPU. ++ * @dev: cpuidle device for the given CPU. ++ * ++ * If there are states with the ->enter_s2idle callback, find the deepest of ++ * them and enter it with frozen tick. ++ */ ++int cpuidle_enter_s2idle(struct cpuidle_driver *drv, struct cpuidle_device *dev) ++{ ++ int index; ++ ++ /* ++ * Find the deepest state with ->enter_s2idle present, which guarantees ++ * that interrupts won't be enabled when it exits and allows the tick to ++ * be frozen safely. ++ */ ++ index = find_deepest_state(drv, dev, UINT_MAX, 0, true); ++ if (index > 0) ++ enter_s2idle_proper(drv, dev, index); ++ ++ return index; ++} ++#endif /* CONFIG_SUSPEND */ ++ ++/** ++ * cpuidle_enter_state - enter the state and update stats ++ * @dev: cpuidle device for this cpu ++ * @drv: cpuidle driver for this cpu ++ * @index: index into the states table in @drv of the state to enter ++ */ ++int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, ++ int index) ++{ ++ int entered_state; ++ ++ struct cpuidle_state *target_state = &drv->states[index]; ++ bool broadcast = !!(target_state->flags & CPUIDLE_FLAG_TIMER_STOP); ++ ktime_t time_start, time_end; ++ s64 diff; ++ ++ /* ++ * Tell the time framework to switch to a broadcast timer because our ++ * local timer will be shut down. If a local timer is used from another ++ * CPU as a broadcast timer, this call may fail if it is not available. ++ */ ++ if (broadcast && tick_broadcast_enter()) { ++ index = find_deepest_state(drv, dev, target_state->exit_latency, ++ CPUIDLE_FLAG_TIMER_STOP, false); ++ if (index < 0) { ++ default_idle_call(); ++ return -EBUSY; ++ } ++ target_state = &drv->states[index]; ++ broadcast = false; ++ } ++ ++ /* Take note of the planned idle state. */ ++ sched_idle_set_state(target_state); ++ ++ trace_cpu_idle_rcuidle(index, dev->cpu); ++ time_start = ns_to_ktime(local_clock()); ++ ++ stop_critical_timings(); ++ entered_state = target_state->enter(dev, drv, index); ++ start_critical_timings(); ++ ++ sched_clock_idle_wakeup_event(); ++ time_end = ns_to_ktime(local_clock()); ++ trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu); ++ ++ /* The cpu is no longer idle or about to enter idle. */ ++ sched_idle_set_state(NULL); ++ ++ if (broadcast) { ++ if (WARN_ON_ONCE(!irqs_disabled())) ++ local_irq_disable(); ++ ++ tick_broadcast_exit(); ++ } ++ ++ if (!cpuidle_state_is_coupled(drv, index)) ++ local_irq_enable(); ++ ++ diff = ktime_us_delta(time_end, time_start); ++ if (diff > INT_MAX) ++ diff = INT_MAX; ++ ++ dev->last_residency = (int) diff; ++ ++ if (entered_state >= 0) { ++ /* Update cpuidle counters */ ++ /* This can be moved to within driver enter routine ++ * but that results in multiple copies of same code. ++ */ ++ dev->states_usage[entered_state].time += dev->last_residency; ++ dev->states_usage[entered_state].usage++; ++ } else { ++ dev->last_residency = 0; ++ } ++ ++ return entered_state; ++} ++ ++/** ++ * cpuidle_select - ask the cpuidle framework to choose an idle state ++ * ++ * @drv: the cpuidle driver ++ * @dev: the cpuidle device ++ * @stop_tick: indication on whether or not to stop the tick ++ * ++ * Returns the index of the idle state. The return value must not be negative. ++ * ++ * The memory location pointed to by @stop_tick is expected to be written the ++ * 'false' boolean value if the scheduler tick should not be stopped before ++ * entering the returned state. ++ */ ++int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, ++ bool *stop_tick) ++{ ++ return cpuidle_curr_governor->select(drv, dev, stop_tick); ++} ++ ++/** ++ * cpuidle_enter - enter into the specified idle state ++ * ++ * @drv: the cpuidle driver tied with the cpu ++ * @dev: the cpuidle device ++ * @index: the index in the idle state table ++ * ++ * Returns the index in the idle state, < 0 in case of error. ++ * The error code depends on the backend driver ++ */ ++int cpuidle_enter(struct cpuidle_driver *drv, struct cpuidle_device *dev, ++ int index) ++{ ++ if (cpuidle_state_is_coupled(drv, index)) ++ return cpuidle_enter_state_coupled(dev, drv, index); ++ return cpuidle_enter_state(dev, drv, index); ++} ++ ++/** ++ * cpuidle_reflect - tell the underlying governor what was the state ++ * we were in ++ * ++ * @dev : the cpuidle device ++ * @index: the index in the idle state table ++ * ++ */ ++void cpuidle_reflect(struct cpuidle_device *dev, int index) ++{ ++ if (cpuidle_curr_governor->reflect && index >= 0) ++ cpuidle_curr_governor->reflect(dev, index); ++} ++ ++/** ++ * cpuidle_install_idle_handler - installs the cpuidle idle loop handler ++ */ ++void cpuidle_install_idle_handler(void) ++{ ++ if (enabled_devices) { ++ /* Make sure all changes finished before we switch to new idle */ ++ smp_wmb(); ++ initialized = 1; ++ } ++} ++ ++/** ++ * cpuidle_uninstall_idle_handler - uninstalls the cpuidle idle loop handler ++ */ ++void cpuidle_uninstall_idle_handler(void) ++{ ++ if (enabled_devices) { ++ initialized = 0; ++ wake_up_all_idle_cpus(); ++ } ++ ++ /* ++ * Make sure external observers (such as the scheduler) ++ * are done looking at pointed idle states. ++ */ ++ synchronize_rcu(); ++} ++ ++/** ++ * cpuidle_pause_and_lock - temporarily disables CPUIDLE ++ */ ++void cpuidle_pause_and_lock(void) ++{ ++ mutex_lock(&cpuidle_lock); ++ cpuidle_uninstall_idle_handler(); ++} ++ ++EXPORT_SYMBOL_GPL(cpuidle_pause_and_lock); ++ ++/** ++ * cpuidle_resume_and_unlock - resumes CPUIDLE operation ++ */ ++void cpuidle_resume_and_unlock(void) ++{ ++ cpuidle_install_idle_handler(); ++ mutex_unlock(&cpuidle_lock); ++} ++ ++EXPORT_SYMBOL_GPL(cpuidle_resume_and_unlock); ++ ++/* Currently used in suspend/resume path to suspend cpuidle */ ++void cpuidle_pause(void) ++{ ++ mutex_lock(&cpuidle_lock); ++ cpuidle_uninstall_idle_handler(); ++ mutex_unlock(&cpuidle_lock); ++} ++ ++/* Currently used in suspend/resume path to resume cpuidle */ ++void cpuidle_resume(void) ++{ ++ mutex_lock(&cpuidle_lock); ++ cpuidle_install_idle_handler(); ++ mutex_unlock(&cpuidle_lock); ++} ++ ++/** ++ * cpuidle_enable_device - enables idle PM for a CPU ++ * @dev: the CPU ++ * ++ * This function must be called between cpuidle_pause_and_lock and ++ * cpuidle_resume_and_unlock when used externally. ++ */ ++int cpuidle_enable_device(struct cpuidle_device *dev) ++{ ++ int ret; ++ struct cpuidle_driver *drv; ++ ++ if (!dev) ++ return -EINVAL; ++ ++ if (dev->enabled) ++ return 0; ++ ++ if (!cpuidle_curr_governor) ++ return -EIO; ++ ++ drv = cpuidle_get_cpu_driver(dev); ++ ++ if (!drv) ++ return -EIO; ++ ++ if (!dev->registered) ++ return -EINVAL; ++ ++ ret = cpuidle_add_device_sysfs(dev); ++ if (ret) ++ return ret; ++ ++ if (cpuidle_curr_governor->enable) { ++ ret = cpuidle_curr_governor->enable(drv, dev); ++ if (ret) ++ goto fail_sysfs; ++ } ++ ++ smp_wmb(); ++ ++ dev->enabled = 1; ++ ++ enabled_devices++; ++ return 0; ++ ++fail_sysfs: ++ cpuidle_remove_device_sysfs(dev); ++ ++ return ret; ++} ++ ++EXPORT_SYMBOL_GPL(cpuidle_enable_device); ++ ++/** ++ * cpuidle_disable_device - disables idle PM for a CPU ++ * @dev: the CPU ++ * ++ * This function must be called between cpuidle_pause_and_lock and ++ * cpuidle_resume_and_unlock when used externally. ++ */ ++void cpuidle_disable_device(struct cpuidle_device *dev) ++{ ++ struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); ++ ++ if (!dev || !dev->enabled) ++ return; ++ ++ if (!drv || !cpuidle_curr_governor) ++ return; ++ ++ dev->enabled = 0; ++ ++ if (cpuidle_curr_governor->disable) ++ cpuidle_curr_governor->disable(drv, dev); ++ ++ cpuidle_remove_device_sysfs(dev); ++ enabled_devices--; ++} ++ ++EXPORT_SYMBOL_GPL(cpuidle_disable_device); ++ ++static void __cpuidle_unregister_device(struct cpuidle_device *dev) ++{ ++ struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); ++ ++ list_del(&dev->device_list); ++ per_cpu(cpuidle_devices, dev->cpu) = NULL; ++ module_put(drv->owner); ++ ++ dev->registered = 0; ++} ++ ++static void __cpuidle_device_init(struct cpuidle_device *dev) ++{ ++ memset(dev->states_usage, 0, sizeof(dev->states_usage)); ++ dev->last_residency = 0; ++} ++ ++/** ++ * __cpuidle_register_device - internal register function called before register ++ * and enable routines ++ * @dev: the cpu ++ * ++ * cpuidle_lock mutex must be held before this is called ++ */ ++static int __cpuidle_register_device(struct cpuidle_device *dev) ++{ ++ int ret; ++ struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); ++ ++ if (!try_module_get(drv->owner)) ++ return -EINVAL; ++ ++ per_cpu(cpuidle_devices, dev->cpu) = dev; ++ list_add(&dev->device_list, &cpuidle_detected_devices); ++ ++ ret = cpuidle_coupled_register_device(dev); ++ if (ret) ++ __cpuidle_unregister_device(dev); ++ else ++ dev->registered = 1; ++ ++ return ret; ++} ++ ++/** ++ * cpuidle_register_device - registers a CPU's idle PM feature ++ * @dev: the cpu ++ */ ++int cpuidle_register_device(struct cpuidle_device *dev) ++{ ++ int ret = -EBUSY; ++ ++ if (!dev) ++ return -EINVAL; ++ ++ mutex_lock(&cpuidle_lock); ++ ++ if (dev->registered) ++ goto out_unlock; ++ ++ __cpuidle_device_init(dev); ++ ++ ret = __cpuidle_register_device(dev); ++ if (ret) ++ goto out_unlock; ++ ++ ret = cpuidle_add_sysfs(dev); ++ if (ret) ++ goto out_unregister; ++ ++ ret = cpuidle_enable_device(dev); ++ if (ret) ++ goto out_sysfs; ++ ++ cpuidle_install_idle_handler(); ++ ++out_unlock: ++ mutex_unlock(&cpuidle_lock); ++ ++ return ret; ++ ++out_sysfs: ++ cpuidle_remove_sysfs(dev); ++out_unregister: ++ __cpuidle_unregister_device(dev); ++ goto out_unlock; ++} ++ ++EXPORT_SYMBOL_GPL(cpuidle_register_device); ++ ++/** ++ * cpuidle_unregister_device - unregisters a CPU's idle PM feature ++ * @dev: the cpu ++ */ ++void cpuidle_unregister_device(struct cpuidle_device *dev) ++{ ++ if (!dev || dev->registered == 0) ++ return; ++ ++ cpuidle_pause_and_lock(); ++ ++ cpuidle_disable_device(dev); ++ ++ cpuidle_remove_sysfs(dev); ++ ++ __cpuidle_unregister_device(dev); ++ ++ cpuidle_coupled_unregister_device(dev); ++ ++ cpuidle_resume_and_unlock(); ++} ++ ++EXPORT_SYMBOL_GPL(cpuidle_unregister_device); ++ ++/** ++ * cpuidle_unregister: unregister a driver and the devices. This function ++ * can be used only if the driver has been previously registered through ++ * the cpuidle_register function. ++ * ++ * @drv: a valid pointer to a struct cpuidle_driver ++ */ ++void cpuidle_unregister(struct cpuidle_driver *drv) ++{ ++ int cpu; ++ struct cpuidle_device *device; ++ ++ for_each_cpu(cpu, drv->cpumask) { ++ device = &per_cpu(cpuidle_dev, cpu); ++ cpuidle_unregister_device(device); ++ } ++ ++ cpuidle_unregister_driver(drv); ++} ++EXPORT_SYMBOL_GPL(cpuidle_unregister); ++ ++/** ++ * cpuidle_register: registers the driver and the cpu devices with the ++ * coupled_cpus passed as parameter. This function is used for all common ++ * initialization pattern there are in the arch specific drivers. The ++ * devices is globally defined in this file. ++ * ++ * @drv : a valid pointer to a struct cpuidle_driver ++ * @coupled_cpus: a cpumask for the coupled states ++ * ++ * Returns 0 on success, < 0 otherwise ++ */ ++int cpuidle_register(struct cpuidle_driver *drv, ++ const struct cpumask *const coupled_cpus) ++{ ++ int ret, cpu; ++ struct cpuidle_device *device; ++ ++ ret = cpuidle_register_driver(drv); ++ if (ret) { ++ pr_err("failed to register cpuidle driver\n"); ++ return ret; ++ } ++ ++ for_each_cpu(cpu, drv->cpumask) { ++ device = &per_cpu(cpuidle_dev, cpu); ++ device->cpu = cpu; ++ ++#ifdef CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED ++ /* ++ * On multiplatform for ARM, the coupled idle states could be ++ * enabled in the kernel even if the cpuidle driver does not ++ * use it. Note, coupled_cpus is a struct copy. ++ */ ++ if (coupled_cpus) ++ device->coupled_cpus = *coupled_cpus; ++#endif ++ ret = cpuidle_register_device(device); ++ if (!ret) ++ continue; ++ ++ pr_err("Failed to register cpuidle device for cpu%d\n", cpu); ++ ++ cpuidle_unregister(drv); ++ break; ++ } ++ ++ return ret; ++} ++EXPORT_SYMBOL_GPL(cpuidle_register); ++ ++#ifdef CONFIG_SMP ++ ++/* ++ * This function gets called when a part of the kernel has a new latency ++ * requirement. This means we need to get all processors out of their C-state, ++ * and then recalculate a new suitable C-state. Just do a cross-cpu IPI; that ++ * wakes them all right up. ++ */ ++static int cpuidle_latency_notify(struct notifier_block *b, ++ unsigned long l, void *v) ++{ ++ wake_up_all_idle_cpus(); ++ return NOTIFY_OK; ++} ++ ++static struct notifier_block cpuidle_latency_notifier = { ++ .notifier_call = cpuidle_latency_notify, ++}; ++ ++static inline void latency_notifier_init(struct notifier_block *n) ++{ ++ pm_qos_add_notifier(PM_QOS_CPU_DMA_LATENCY, n); ++} ++ ++#else /* CONFIG_SMP */ ++ ++#define latency_notifier_init(x) do { } while (0) ++ ++#endif /* CONFIG_SMP */ ++ ++/** ++ * cpuidle_init - core initializer ++ */ ++static int __init cpuidle_init(void) ++{ ++ int ret; ++ ++ if (cpuidle_disabled()) ++ return -ENODEV; ++ ++ ret = cpuidle_add_interface(cpu_subsys.dev_root); ++ if (ret) ++ return ret; ++ ++ latency_notifier_init(&cpuidle_latency_notifier); ++ ++ return 0; ++} ++ ++module_param(off, int, 0444); ++core_initcall(cpuidle_init); +diff -uprN kernel/drivers/gpio/gpio-davinci.c kernel_new/drivers/gpio/gpio-davinci.c +--- kernel/drivers/gpio/gpio-davinci.c 2020-12-21 21:59:17.000000000 +0800 ++++ kernel_new/drivers/gpio/gpio-davinci.c 2021-04-01 18:28:07.662863279 +0800 +@@ -24,6 +24,7 @@ + #include + #include + #include ++#include + + struct davinci_gpio_regs { + u32 dir; +@@ -327,7 +328,7 @@ static struct irq_chip gpio_irqchip = { + .irq_enable = gpio_irq_enable, + .irq_disable = gpio_irq_disable, + .irq_set_type = gpio_irq_type, +- .flags = IRQCHIP_SET_TYPE_MASKED, ++ .flags = IRQCHIP_SET_TYPE_MASKED | IRQCHIP_PIPELINE_SAFE, + }; + + static void gpio_irq_handler(struct irq_desc *desc) +@@ -370,7 +371,7 @@ static void gpio_irq_handler(struct irq_ + */ + hw_irq = (bank_num / 2) * 32 + bit; + +- generic_handle_irq( ++ ipipe_handle_demuxed_irq( + irq_find_mapping(d->irq_domain, hw_irq)); + } + } +diff -uprN kernel/drivers/gpio/gpio-mvebu.c kernel_new/drivers/gpio/gpio-mvebu.c +--- kernel/drivers/gpio/gpio-mvebu.c 2020-12-21 21:59:17.000000000 +0800 ++++ kernel_new/drivers/gpio/gpio-mvebu.c 2021-04-01 18:28:07.662863279 +0800 +@@ -51,6 +51,7 @@ + #include + #include + #include ++#include + + /* + * GPIO unit register offsets. +@@ -391,10 +392,11 @@ static void mvebu_gpio_irq_ack(struct ir + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); + struct mvebu_gpio_chip *mvchip = gc->private; + u32 mask = d->mask; ++ unsigned long flags; + +- irq_gc_lock(gc); ++ flags = irq_gc_lock(gc); + mvebu_gpio_write_edge_cause(mvchip, ~mask); +- irq_gc_unlock(gc); ++ irq_gc_unlock(gc, flags); + } + + static void mvebu_gpio_edge_irq_mask(struct irq_data *d) +@@ -403,11 +405,12 @@ static void mvebu_gpio_edge_irq_mask(str + struct mvebu_gpio_chip *mvchip = gc->private; + struct irq_chip_type *ct = irq_data_get_chip_type(d); + u32 mask = d->mask; ++ unsigned long flags; + +- irq_gc_lock(gc); ++ flags = irq_gc_lock(gc); + ct->mask_cache_priv &= ~mask; + mvebu_gpio_write_edge_mask(mvchip, ct->mask_cache_priv); +- irq_gc_unlock(gc); ++ irq_gc_unlock(gc, flags); + } + + static void mvebu_gpio_edge_irq_unmask(struct irq_data *d) +@@ -416,11 +419,12 @@ static void mvebu_gpio_edge_irq_unmask(s + struct mvebu_gpio_chip *mvchip = gc->private; + struct irq_chip_type *ct = irq_data_get_chip_type(d); + u32 mask = d->mask; ++ unsigned long flags; + +- irq_gc_lock(gc); ++ flags = irq_gc_lock(gc); + ct->mask_cache_priv |= mask; + mvebu_gpio_write_edge_mask(mvchip, ct->mask_cache_priv); +- irq_gc_unlock(gc); ++ irq_gc_unlock(gc, flags); + } + + static void mvebu_gpio_level_irq_mask(struct irq_data *d) +@@ -429,11 +433,12 @@ static void mvebu_gpio_level_irq_mask(st + struct mvebu_gpio_chip *mvchip = gc->private; + struct irq_chip_type *ct = irq_data_get_chip_type(d); + u32 mask = d->mask; ++ unsigned long flags; + +- irq_gc_lock(gc); ++ flags = irq_gc_lock(gc); + ct->mask_cache_priv &= ~mask; + mvebu_gpio_write_level_mask(mvchip, ct->mask_cache_priv); +- irq_gc_unlock(gc); ++ irq_gc_unlock(gc, flags); + } + + static void mvebu_gpio_level_irq_unmask(struct irq_data *d) +@@ -442,11 +447,12 @@ static void mvebu_gpio_level_irq_unmask( + struct mvebu_gpio_chip *mvchip = gc->private; + struct irq_chip_type *ct = irq_data_get_chip_type(d); + u32 mask = d->mask; ++ unsigned long flags; + +- irq_gc_lock(gc); ++ flags = irq_gc_lock(gc); + ct->mask_cache_priv |= mask; + mvebu_gpio_write_level_mask(mvchip, ct->mask_cache_priv); +- irq_gc_unlock(gc); ++ irq_gc_unlock(gc, flags); + } + + /***************************************************************************** +@@ -580,7 +586,7 @@ static void mvebu_gpio_irq_handler(struc + polarity); + } + +- generic_handle_irq(irq); ++ ipipe_handle_demuxed_irq(irq); + } + + chained_irq_exit(chip, desc); +@@ -1224,6 +1230,7 @@ static int mvebu_gpio_probe(struct platf + ct->chip.irq_unmask = mvebu_gpio_level_irq_unmask; + ct->chip.irq_set_type = mvebu_gpio_irq_set_type; + ct->chip.name = mvchip->chip.label; ++ ct->chip.flags = IRQCHIP_PIPELINE_SAFE; + + ct = &gc->chip_types[1]; + ct->type = IRQ_TYPE_EDGE_RISING | IRQ_TYPE_EDGE_FALLING; +@@ -1233,6 +1240,7 @@ static int mvebu_gpio_probe(struct platf + ct->chip.irq_set_type = mvebu_gpio_irq_set_type; + ct->handler = handle_edge_irq; + ct->chip.name = mvchip->chip.label; ++ ct->chip.flags = IRQCHIP_PIPELINE_SAFE; + + /* + * Setup the interrupt handlers. Each chip can have up to 4 +diff -uprN kernel/drivers/gpio/gpio-mxc.c kernel_new/drivers/gpio/gpio-mxc.c +--- kernel/drivers/gpio/gpio-mxc.c 2020-12-21 21:59:17.000000000 +0800 ++++ kernel_new/drivers/gpio/gpio-mxc.c 2021-04-01 18:28:07.662863279 +0800 +@@ -22,6 +22,7 @@ + #include + #include + #include ++#include + + enum mxc_gpio_hwtype { + IMX1_GPIO, /* runs on i.mx1 */ +@@ -266,7 +267,7 @@ static void mxc_gpio_irq_handler(struct + if (port->both_edges & (1 << irqoffset)) + mxc_flip_edge(port, irqoffset); + +- generic_handle_irq(irq_find_mapping(port->domain, irqoffset)); ++ ipipe_handle_demuxed_irq(irq_find_mapping(port->domain, irqoffset)); + + irq_stat &= ~(1 << irqoffset); + } +@@ -359,7 +360,7 @@ static int mxc_gpio_init_gc(struct mxc_g + ct->chip.irq_unmask = irq_gc_mask_set_bit; + ct->chip.irq_set_type = gpio_set_irq_type; + ct->chip.irq_set_wake = gpio_set_wake_irq; +- ct->chip.flags = IRQCHIP_MASK_ON_SUSPEND; ++ ct->chip.flags = IRQCHIP_MASK_ON_SUSPEND | IRQCHIP_PIPELINE_SAFE; + ct->regs.ack = GPIO_ISR; + ct->regs.mask = GPIO_IMR; + +diff -uprN kernel/drivers/gpio/gpio-omap.c kernel_new/drivers/gpio/gpio-omap.c +--- kernel/drivers/gpio/gpio-omap.c 2020-12-21 21:59:17.000000000 +0800 ++++ kernel_new/drivers/gpio/gpio-omap.c 2021-04-01 18:28:07.662863279 +0800 +@@ -26,6 +26,7 @@ + #include + #include + #include ++#include + #include + + #define OFF_MODE 1 +@@ -58,7 +59,11 @@ struct gpio_bank { + u32 saved_datain; + u32 level_mask; + u32 toggle_mask; ++#ifdef CONFIG_IPIPE ++ ipipe_spinlock_t lock; ++#else + raw_spinlock_t lock; ++#endif + raw_spinlock_t wa_lock; + struct gpio_chip chip; + struct clk *dbck; +@@ -737,20 +742,17 @@ static void omap_gpio_free(struct gpio_c + * line's interrupt handler has been run, we may miss some nested + * interrupts. + */ +-static irqreturn_t omap_gpio_irq_handler(int irq, void *gpiobank) ++static void __omap_gpio_irq_handler(struct gpio_bank *bank) + { + void __iomem *isr_reg = NULL; + u32 enabled, isr, level_mask; + unsigned int bit; +- struct gpio_bank *bank = gpiobank; + unsigned long wa_lock_flags; + unsigned long lock_flags; + + isr_reg = bank->base + bank->regs->irqstatus; + if (WARN_ON(!isr_reg)) +- goto exit; +- +- pm_runtime_get_sync(bank->chip.parent); ++ return; + + while (1) { + raw_spin_lock_irqsave(&bank->lock, lock_flags); +@@ -793,18 +795,38 @@ static irqreturn_t omap_gpio_irq_handler + + raw_spin_lock_irqsave(&bank->wa_lock, wa_lock_flags); + +- generic_handle_irq(irq_find_mapping(bank->chip.irq.domain, ++ ipipe_handle_demuxed_irq(irq_find_mapping(bank->chip.irq.domain, + bit)); + + raw_spin_unlock_irqrestore(&bank->wa_lock, + wa_lock_flags); + } + } +-exit: ++} ++ ++#ifdef CONFIG_IPIPE ++ ++static void omap_gpio_irq_handler(struct irq_desc *d) ++{ ++ struct gpio_bank *bank = irq_desc_get_handler_data(d); ++ __omap_gpio_irq_handler(bank); ++} ++ ++#else ++ ++static irqreturn_t omap_gpio_irq_handler(int irq, void *gpiobank) ++{ ++ struct gpio_bank *bank = gpiobank; ++ ++ pm_runtime_get_sync(bank->chip.parent); ++ __omap_gpio_irq_handler(bank); + pm_runtime_put(bank->chip.parent); ++ + return IRQ_HANDLED; + } + ++#endif ++ + static unsigned int omap_gpio_irq_startup(struct irq_data *d) + { + struct gpio_bank *bank = omap_irq_data_get_bank(d); +@@ -886,6 +908,19 @@ static void omap_gpio_mask_irq(struct ir + raw_spin_unlock_irqrestore(&bank->lock, flags); + } + ++static void omap_gpio_mask_ack_irq(struct irq_data *d) ++{ ++ struct gpio_bank *bank = omap_irq_data_get_bank(d); ++ unsigned offset = d->hwirq; ++ unsigned long flags; ++ ++ raw_spin_lock_irqsave(&bank->lock, flags); ++ omap_set_gpio_irqenable(bank, offset, 0); ++ omap_set_gpio_triggering(bank, offset, IRQ_TYPE_NONE); ++ omap_clear_gpio_irqstatus(bank, offset); ++ raw_spin_unlock_irqrestore(&bank->lock, flags); ++} ++ + static void omap_gpio_unmask_irq(struct irq_data *d) + { + struct gpio_bank *bank = omap_irq_data_get_bank(d); +@@ -1218,11 +1253,16 @@ static int omap_gpio_chip_init(struct gp + return ret; + } + ++#ifdef CONFIG_IPIPE ++ irq_set_chained_handler_and_data(bank->irq, ++ omap_gpio_irq_handler, bank); ++#else + ret = devm_request_irq(bank->chip.parent, bank->irq, + omap_gpio_irq_handler, + 0, dev_name(bank->chip.parent), bank); + if (ret) + gpiochip_remove(&bank->chip); ++#endif + + if (!bank->is_mpuio) + gpio += bank->width; +@@ -1261,13 +1301,14 @@ static int omap_gpio_probe(struct platfo + irqc->irq_shutdown = omap_gpio_irq_shutdown, + irqc->irq_ack = omap_gpio_ack_irq, + irqc->irq_mask = omap_gpio_mask_irq, ++ irqc->irq_mask_ack = omap_gpio_mask_ack_irq, + irqc->irq_unmask = omap_gpio_unmask_irq, + irqc->irq_set_type = omap_gpio_irq_type, + irqc->irq_set_wake = omap_gpio_wake_enable, + irqc->irq_bus_lock = omap_gpio_irq_bus_lock, + irqc->irq_bus_sync_unlock = gpio_irq_bus_sync_unlock, + irqc->name = dev_name(&pdev->dev); +- irqc->flags = IRQCHIP_MASK_ON_SUSPEND; ++ irqc->flags = IRQCHIP_MASK_ON_SUSPEND | IRQCHIP_PIPELINE_SAFE; + + bank->irq = platform_get_irq(pdev, 0); + if (bank->irq <= 0) { +diff -uprN kernel/drivers/gpio/gpio-pl061.c kernel_new/drivers/gpio/gpio-pl061.c +--- kernel/drivers/gpio/gpio-pl061.c 2020-12-21 21:59:17.000000000 +0800 ++++ kernel_new/drivers/gpio/gpio-pl061.c 2021-04-01 18:28:07.662863279 +0800 +@@ -26,6 +26,7 @@ + #include + #include + #include ++#include + + #define GPIODIR 0x400 + #define GPIOIS 0x404 +@@ -50,7 +51,11 @@ struct pl061_context_save_regs { + #endif + + struct pl061 { ++#ifdef CONFIG_IPIPE ++ ipipe_spinlock_t lock; ++#else + raw_spinlock_t lock; ++#endif + + void __iomem *base; + struct gpio_chip gc; +@@ -222,8 +227,8 @@ static void pl061_irq_handler(struct irq + pending = readb(pl061->base + GPIOMIS); + if (pending) { + for_each_set_bit(offset, &pending, PL061_GPIO_NR) +- generic_handle_irq(irq_find_mapping(gc->irq.domain, +- offset)); ++ ipipe_handle_demuxed_irq(irq_find_mapping(gc->irq.domain, ++ offset)); + } + + chained_irq_exit(irqchip, desc); +@@ -234,6 +239,22 @@ static void pl061_irq_mask(struct irq_da + struct gpio_chip *gc = irq_data_get_irq_chip_data(d); + struct pl061 *pl061 = gpiochip_get_data(gc); + u8 mask = BIT(irqd_to_hwirq(d) % PL061_GPIO_NR); ++ unsigned long flags; ++ u8 gpioie; ++ ++ raw_spin_lock_irqsave(&pl061->lock, flags); ++ gpioie = readb(pl061->base + GPIOIE) & ~mask; ++ writeb(gpioie, pl061->base + GPIOIE); ++ ipipe_lock_irq(d->irq); ++ raw_spin_unlock_irqrestore(&pl061->lock, flags); ++} ++ ++#ifdef CONFIG_IPIPE ++static void pl061_irq_mask_ack(struct irq_data *d) ++{ ++ struct gpio_chip *gc = irq_data_get_irq_chip_data(d); ++ struct pl061 *pl061 = gpiochip_get_data(gc); ++ u8 mask = BIT(irqd_to_hwirq(d) % PL061_GPIO_NR); + u8 gpioie; + + raw_spin_lock(&pl061->lock); +@@ -241,6 +262,7 @@ static void pl061_irq_mask(struct irq_da + writeb(gpioie, pl061->base + GPIOIE); + raw_spin_unlock(&pl061->lock); + } ++#endif + + static void pl061_irq_unmask(struct irq_data *d) + { +@@ -326,6 +348,10 @@ static int pl061_probe(struct amba_devic + pl061->irq_chip.irq_unmask = pl061_irq_unmask; + pl061->irq_chip.irq_set_type = pl061_irq_type; + pl061->irq_chip.irq_set_wake = pl061_irq_set_wake; ++#ifdef CONFIG_IPIPE ++ pl061->irq_chip.irq_mask_ack = pl061_irq_mask_ack; ++ pl061->irq_chip.flags = IRQCHIP_PIPELINE_SAFE; ++#endif + + writeb(0, pl061->base + GPIOIE); /* disable irqs */ + irq = adev->irq[0]; +diff -uprN kernel/drivers/gpio/gpio-zynq.c kernel_new/drivers/gpio/gpio-zynq.c +--- kernel/drivers/gpio/gpio-zynq.c 2020-12-21 21:59:17.000000000 +0800 ++++ kernel_new/drivers/gpio/gpio-zynq.c 2021-04-01 18:28:07.662863279 +0800 +@@ -14,6 +14,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -130,6 +131,8 @@ struct zynq_gpio { + struct gpio_regs context; + }; + ++static IPIPE_DEFINE_RAW_SPINLOCK(zynq_gpio_lock); ++ + /** + * struct zynq_platform_data - zynq gpio platform data structure + * @label: string to store in gpio->label +@@ -302,6 +305,7 @@ static int zynq_gpio_dir_in(struct gpio_ + u32 reg; + unsigned int bank_num, bank_pin_num; + struct zynq_gpio *gpio = gpiochip_get_data(chip); ++ unsigned long flags; + + zynq_gpio_get_bank_pin(pin, &bank_num, &bank_pin_num, gpio); + +@@ -313,10 +317,12 @@ static int zynq_gpio_dir_in(struct gpio_ + (bank_pin_num == 7 || bank_pin_num == 8)) + return -EINVAL; + ++ raw_spin_lock_irqsave(&zynq_gpio_lock, flags); + /* clear the bit in direction mode reg to set the pin as input */ + reg = readl_relaxed(gpio->base_addr + ZYNQ_GPIO_DIRM_OFFSET(bank_num)); + reg &= ~BIT(bank_pin_num); + writel_relaxed(reg, gpio->base_addr + ZYNQ_GPIO_DIRM_OFFSET(bank_num)); ++ raw_spin_unlock_irqrestore(&zynq_gpio_lock, flags); + + return 0; + } +@@ -339,9 +345,11 @@ static int zynq_gpio_dir_out(struct gpio + u32 reg; + unsigned int bank_num, bank_pin_num; + struct zynq_gpio *gpio = gpiochip_get_data(chip); ++ unsigned long flags; + + zynq_gpio_get_bank_pin(pin, &bank_num, &bank_pin_num, gpio); + ++ raw_spin_lock_irqsave(&zynq_gpio_lock, flags); + /* set the GPIO pin as output */ + reg = readl_relaxed(gpio->base_addr + ZYNQ_GPIO_DIRM_OFFSET(bank_num)); + reg |= BIT(bank_pin_num); +@@ -351,6 +359,7 @@ static int zynq_gpio_dir_out(struct gpio + reg = readl_relaxed(gpio->base_addr + ZYNQ_GPIO_OUTEN_OFFSET(bank_num)); + reg |= BIT(bank_pin_num); + writel_relaxed(reg, gpio->base_addr + ZYNQ_GPIO_OUTEN_OFFSET(bank_num)); ++ raw_spin_unlock_irqrestore(&zynq_gpio_lock, flags); + + /* set the state of the pin */ + zynq_gpio_set_value(chip, pin, state); +@@ -370,11 +379,15 @@ static void zynq_gpio_irq_mask(struct ir + unsigned int device_pin_num, bank_num, bank_pin_num; + struct zynq_gpio *gpio = + gpiochip_get_data(irq_data_get_irq_chip_data(irq_data)); ++ unsigned long flags; + + device_pin_num = irq_data->hwirq; + zynq_gpio_get_bank_pin(device_pin_num, &bank_num, &bank_pin_num, gpio); ++ raw_spin_lock_irqsave(&zynq_gpio_lock, flags); ++ ipipe_lock_irq(irq_data->irq); + writel_relaxed(BIT(bank_pin_num), + gpio->base_addr + ZYNQ_GPIO_INTDIS_OFFSET(bank_num)); ++ raw_spin_unlock_irqrestore(&zynq_gpio_lock, flags); + } + + /** +@@ -391,11 +404,15 @@ static void zynq_gpio_irq_unmask(struct + unsigned int device_pin_num, bank_num, bank_pin_num; + struct zynq_gpio *gpio = + gpiochip_get_data(irq_data_get_irq_chip_data(irq_data)); ++ unsigned long flags; + + device_pin_num = irq_data->hwirq; + zynq_gpio_get_bank_pin(device_pin_num, &bank_num, &bank_pin_num, gpio); ++ raw_spin_lock_irqsave(&zynq_gpio_lock, flags); + writel_relaxed(BIT(bank_pin_num), + gpio->base_addr + ZYNQ_GPIO_INTEN_OFFSET(bank_num)); ++ ipipe_unlock_irq(irq_data->irq); ++ raw_spin_unlock_irqrestore(&zynq_gpio_lock, flags); + } + + /** +@@ -533,28 +550,68 @@ static int zynq_gpio_set_wake(struct irq + return 0; + } + ++#ifdef CONFIG_IPIPE ++ ++static void zynq_gpio_hold_irq(struct irq_data *irq_data) ++{ ++ unsigned int device_pin_num, bank_num, bank_pin_num; ++ struct zynq_gpio *gpio = ++ gpiochip_get_data(irq_data_get_irq_chip_data(irq_data)); ++ ++ device_pin_num = irq_data->hwirq; ++ zynq_gpio_get_bank_pin(device_pin_num, &bank_num, &bank_pin_num, gpio); ++ raw_spin_lock(&zynq_gpio_lock); ++ writel_relaxed(BIT(bank_pin_num), ++ gpio->base_addr + ZYNQ_GPIO_INTDIS_OFFSET(bank_num)); ++ writel_relaxed(BIT(bank_pin_num), ++ gpio->base_addr + ZYNQ_GPIO_INTSTS_OFFSET(bank_num)); ++ raw_spin_unlock(&zynq_gpio_lock); ++} ++ ++static void zynq_gpio_release_irq(struct irq_data *irq_data) ++{ ++ unsigned int device_pin_num, bank_num, bank_pin_num; ++ struct zynq_gpio *gpio = ++ gpiochip_get_data(irq_data_get_irq_chip_data(irq_data)); ++ ++ device_pin_num = irq_data->hwirq; ++ zynq_gpio_get_bank_pin(device_pin_num, &bank_num, &bank_pin_num, gpio); ++ writel_relaxed(BIT(bank_pin_num), ++ gpio->base_addr + ZYNQ_GPIO_INTEN_OFFSET(bank_num)); ++} ++ ++#endif /* CONFIG_IPIPE */ ++ + /* irq chip descriptor */ + static struct irq_chip zynq_gpio_level_irqchip = { +- .name = DRIVER_NAME, ++ .name = DRIVER_NAME "-level", + .irq_enable = zynq_gpio_irq_enable, + .irq_eoi = zynq_gpio_irq_ack, ++#ifdef CONFIG_IPIPE ++ .irq_hold = zynq_gpio_hold_irq, ++ .irq_release = zynq_gpio_release_irq, ++#endif + .irq_mask = zynq_gpio_irq_mask, + .irq_unmask = zynq_gpio_irq_unmask, + .irq_set_type = zynq_gpio_set_irq_type, + .irq_set_wake = zynq_gpio_set_wake, + .flags = IRQCHIP_EOI_THREADED | IRQCHIP_EOI_IF_HANDLED | +- IRQCHIP_MASK_ON_SUSPEND, ++ IRQCHIP_MASK_ON_SUSPEND | IRQCHIP_PIPELINE_SAFE, + }; + + static struct irq_chip zynq_gpio_edge_irqchip = { +- .name = DRIVER_NAME, ++ .name = DRIVER_NAME "-edge", + .irq_enable = zynq_gpio_irq_enable, ++#ifdef CONFIG_IPIPE ++ .irq_mask_ack = zynq_gpio_hold_irq, ++#else + .irq_ack = zynq_gpio_irq_ack, ++#endif + .irq_mask = zynq_gpio_irq_mask, + .irq_unmask = zynq_gpio_irq_unmask, + .irq_set_type = zynq_gpio_set_irq_type, + .irq_set_wake = zynq_gpio_set_wake, +- .flags = IRQCHIP_MASK_ON_SUSPEND, ++ .flags = IRQCHIP_MASK_ON_SUSPEND | IRQCHIP_PIPELINE_SAFE, + }; + + static void zynq_gpio_handle_bank_irq(struct zynq_gpio *gpio, +@@ -572,7 +629,7 @@ static void zynq_gpio_handle_bank_irq(st + unsigned int gpio_irq; + + gpio_irq = irq_find_mapping(irqdomain, offset + bank_offset); +- generic_handle_irq(gpio_irq); ++ ipipe_handle_demuxed_irq(gpio_irq); + } + } + +diff -uprN kernel/drivers/gpu/ipu-v3/ipu-common.c kernel_new/drivers/gpu/ipu-v3/ipu-common.c +--- kernel/drivers/gpu/ipu-v3/ipu-common.c 2020-12-21 21:59:18.000000000 +0800 ++++ kernel_new/drivers/gpu/ipu-v3/ipu-common.c 2021-04-01 18:28:07.663863278 +0800 +@@ -1084,7 +1084,7 @@ static void ipu_irq_handle(struct ipu_so + irq = irq_linear_revmap(ipu->domain, + regs[i] * 32 + bit); + if (irq) +- generic_handle_irq(irq); ++ ipipe_handle_demuxed_irq(irq); + } + } + } +@@ -1308,6 +1308,7 @@ static int ipu_irq_init(struct ipu_soc * + ct->chip.irq_ack = irq_gc_ack_set_bit; + ct->chip.irq_mask = irq_gc_mask_clr_bit; + ct->chip.irq_unmask = irq_gc_mask_set_bit; ++ ct->chip.flags = IRQCHIP_PIPELINE_SAFE; + ct->regs.ack = IPU_INT_STAT(i / 32); + ct->regs.mask = IPU_INT_CTRL(i / 32); + } +diff -uprN kernel/drivers/gpu/ipu-v3/ipu-prv.h kernel_new/drivers/gpu/ipu-v3/ipu-prv.h +--- kernel/drivers/gpu/ipu-v3/ipu-prv.h 2020-12-21 21:59:18.000000000 +0800 ++++ kernel_new/drivers/gpu/ipu-v3/ipu-prv.h 2021-04-01 18:28:07.663863278 +0800 +@@ -179,7 +179,7 @@ struct ipu_soc { + struct device *dev; + const struct ipu_devtype *devtype; + enum ipuv3_type ipu_type; +- spinlock_t lock; ++ ipipe_spinlock_t lock; + struct mutex channel_lock; + struct list_head channels; + +diff -uprN kernel/drivers/irqchip/irq-atmel-aic5.c kernel_new/drivers/irqchip/irq-atmel-aic5.c +--- kernel/drivers/irqchip/irq-atmel-aic5.c 2020-12-21 21:59:19.000000000 +0800 ++++ kernel_new/drivers/irqchip/irq-atmel-aic5.c 2021-04-01 18:28:07.663863278 +0800 +@@ -80,7 +80,7 @@ aic5_handle(struct pt_regs *regs) + if (!irqstat) + irq_reg_writel(bgc, 0, AT91_AIC5_EOICR); + else +- handle_domain_irq(aic5_domain, irqnr, regs); ++ ipipe_handle_domain_irq(aic5_domain, irqnr, regs); + } + + static void aic5_mask(struct irq_data *d) +@@ -88,16 +88,18 @@ static void aic5_mask(struct irq_data *d + struct irq_domain *domain = d->domain; + struct irq_chip_generic *bgc = irq_get_domain_generic_chip(domain, 0); + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); ++ unsigned long flags; + + /* + * Disable interrupt on AIC5. We always take the lock of the + * first irq chip as all chips share the same registers. + */ +- irq_gc_lock(bgc); ++ flags = irq_gc_lock(bgc); ++ ipipe_lock_irq(d->irq); + irq_reg_writel(gc, d->hwirq, AT91_AIC5_SSR); + irq_reg_writel(gc, 1, AT91_AIC5_IDCR); + gc->mask_cache &= ~d->mask; +- irq_gc_unlock(bgc); ++ irq_gc_unlock(bgc, flags); + } + + static void aic5_unmask(struct irq_data *d) +@@ -105,28 +107,59 @@ static void aic5_unmask(struct irq_data + struct irq_domain *domain = d->domain; + struct irq_chip_generic *bgc = irq_get_domain_generic_chip(domain, 0); + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); ++ unsigned long flags; + + /* + * Enable interrupt on AIC5. We always take the lock of the + * first irq chip as all chips share the same registers. + */ +- irq_gc_lock(bgc); ++ flags = irq_gc_lock(bgc); + irq_reg_writel(gc, d->hwirq, AT91_AIC5_SSR); + irq_reg_writel(gc, 1, AT91_AIC5_IECR); + gc->mask_cache |= d->mask; +- irq_gc_unlock(bgc); ++ ipipe_unlock_irq(d->irq); ++ irq_gc_unlock(bgc, flags); ++} ++ ++#ifdef CONFIG_IPIPE ++ ++static void aic5_hold(struct irq_data *d) ++{ ++ struct irq_domain *domain = d->domain; ++ struct irq_domain_chip_generic *dgc = domain->gc; ++ struct irq_chip_generic *gc = dgc->gc[0]; ++ ++ irq_reg_writel(gc, d->hwirq, AT91_AIC5_SSR); ++ irq_reg_writel(gc, 1, AT91_AIC5_IDCR); ++ irq_reg_writel(gc, 0, AT91_AIC5_EOICR); ++} ++ ++static void aic5_release(struct irq_data *d) ++{ ++ struct irq_domain *domain = d->domain; ++ struct irq_domain_chip_generic *dgc = domain->gc; ++ struct irq_chip_generic *gc = dgc->gc[0]; ++ unsigned long flags; ++ ++ flags = irq_gc_lock(gc); ++ irq_reg_writel(gc, d->hwirq, AT91_AIC5_SSR); ++ irq_reg_writel(gc, 1, AT91_AIC5_IECR); ++ irq_gc_unlock(gc, flags); + } + ++#endif ++ + static int aic5_retrigger(struct irq_data *d) + { + struct irq_domain *domain = d->domain; + struct irq_chip_generic *bgc = irq_get_domain_generic_chip(domain, 0); ++ unsigned long flags; + + /* Enable interrupt on AIC5 */ +- irq_gc_lock(bgc); ++ flags = irq_gc_lock(bgc); + irq_reg_writel(bgc, d->hwirq, AT91_AIC5_SSR); + irq_reg_writel(bgc, 1, AT91_AIC5_ISCR); +- irq_gc_unlock(bgc); ++ irq_gc_unlock(bgc, flags); + + return 0; + } +@@ -135,16 +168,17 @@ static int aic5_set_type(struct irq_data + { + struct irq_domain *domain = d->domain; + struct irq_chip_generic *bgc = irq_get_domain_generic_chip(domain, 0); ++ unsigned long flags; + unsigned int smr; + int ret; + +- irq_gc_lock(bgc); ++ flags = irq_gc_lock(bgc); + irq_reg_writel(bgc, d->hwirq, AT91_AIC5_SSR); + smr = irq_reg_readl(bgc, AT91_AIC5_SMR); + ret = aic_common_set_type(d, type, &smr); + if (!ret) + irq_reg_writel(bgc, smr, AT91_AIC5_SMR); +- irq_gc_unlock(bgc); ++ irq_gc_unlock(bgc, flags); + + return ret; + } +@@ -160,6 +194,7 @@ static void aic5_suspend(struct irq_data + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); + int i; + u32 mask; ++ unsigned long flags; + + if (smr_cache) + for (i = 0; i < domain->revmap_size; i++) { +@@ -167,7 +202,7 @@ static void aic5_suspend(struct irq_data + smr_cache[i] = irq_reg_readl(bgc, AT91_AIC5_SMR); + } + +- irq_gc_lock(bgc); ++ flags = irq_gc_lock(bgc); + for (i = 0; i < dgc->irqs_per_chip; i++) { + mask = 1 << i; + if ((mask & gc->mask_cache) == (mask & gc->wake_active)) +@@ -179,7 +214,7 @@ static void aic5_suspend(struct irq_data + else + irq_reg_writel(bgc, 1, AT91_AIC5_IDCR); + } +- irq_gc_unlock(bgc); ++ irq_gc_unlock(bgc, flags); + } + + static void aic5_resume(struct irq_data *d) +@@ -190,8 +225,9 @@ static void aic5_resume(struct irq_data + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); + int i; + u32 mask; ++ unsigned long flags; + +- irq_gc_lock(bgc); ++ flags = irq_gc_lock(bgc); + + if (smr_cache) { + irq_reg_writel(bgc, 0xffffffff, AT91_AIC5_SPU); +@@ -215,7 +251,7 @@ static void aic5_resume(struct irq_data + else + irq_reg_writel(bgc, 1, AT91_AIC5_IDCR); + } +- irq_gc_unlock(bgc); ++ irq_gc_unlock(bgc, flags); + } + + static void aic5_pm_shutdown(struct irq_data *d) +@@ -224,15 +260,16 @@ static void aic5_pm_shutdown(struct irq_ + struct irq_domain_chip_generic *dgc = domain->gc; + struct irq_chip_generic *bgc = irq_get_domain_generic_chip(domain, 0); + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); ++ unsigned long flags; + int i; + +- irq_gc_lock(bgc); ++ flags = irq_gc_lock(bgc); + for (i = 0; i < dgc->irqs_per_chip; i++) { + irq_reg_writel(bgc, i + gc->irq_base, AT91_AIC5_SSR); + irq_reg_writel(bgc, 1, AT91_AIC5_IDCR); + irq_reg_writel(bgc, 1, AT91_AIC5_ICCR); + } +- irq_gc_unlock(bgc); ++ irq_gc_unlock(bgc, flags); + } + #else + #define aic5_suspend NULL +@@ -349,6 +386,11 @@ static int __init aic5_of_init(struct de + gc->chip_types[0].chip.irq_suspend = aic5_suspend; + gc->chip_types[0].chip.irq_resume = aic5_resume; + gc->chip_types[0].chip.irq_pm_shutdown = aic5_pm_shutdown; ++#ifdef CONFIG_IPIPE ++ gc->chip_types[0].chip.irq_hold = aic5_hold; ++ gc->chip_types[0].chip.irq_release = aic5_release; ++ gc->chip_types[0].chip.flags = IRQCHIP_PIPELINE_SAFE; ++#endif + } + + aic5_hw_init(domain); +diff -uprN kernel/drivers/irqchip/irq-atmel-aic.c kernel_new/drivers/irqchip/irq-atmel-aic.c +--- kernel/drivers/irqchip/irq-atmel-aic.c 2020-12-21 21:59:19.000000000 +0800 ++++ kernel_new/drivers/irqchip/irq-atmel-aic.c 1970-01-01 08:00:00.000000000 +0800 +@@ -1,274 +0,0 @@ +-/* +- * Atmel AT91 AIC (Advanced Interrupt Controller) driver +- * +- * Copyright (C) 2004 SAN People +- * Copyright (C) 2004 ATMEL +- * Copyright (C) Rick Bronson +- * Copyright (C) 2014 Free Electrons +- * +- * Author: Boris BREZILLON +- * +- * This file is licensed under the terms of the GNU General Public +- * License version 2. This program is licensed "as is" without any +- * warranty of any kind, whether express or implied. +- */ +- +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +- +-#include +-#include +- +-#include "irq-atmel-aic-common.h" +- +-/* Number of irq lines managed by AIC */ +-#define NR_AIC_IRQS 32 +- +-#define AT91_AIC_SMR(n) ((n) * 4) +- +-#define AT91_AIC_SVR(n) (0x80 + ((n) * 4)) +-#define AT91_AIC_IVR 0x100 +-#define AT91_AIC_FVR 0x104 +-#define AT91_AIC_ISR 0x108 +- +-#define AT91_AIC_IPR 0x10c +-#define AT91_AIC_IMR 0x110 +-#define AT91_AIC_CISR 0x114 +- +-#define AT91_AIC_IECR 0x120 +-#define AT91_AIC_IDCR 0x124 +-#define AT91_AIC_ICCR 0x128 +-#define AT91_AIC_ISCR 0x12c +-#define AT91_AIC_EOICR 0x130 +-#define AT91_AIC_SPU 0x134 +-#define AT91_AIC_DCR 0x138 +- +-static struct irq_domain *aic_domain; +- +-static asmlinkage void __exception_irq_entry +-aic_handle(struct pt_regs *regs) +-{ +- struct irq_domain_chip_generic *dgc = aic_domain->gc; +- struct irq_chip_generic *gc = dgc->gc[0]; +- u32 irqnr; +- u32 irqstat; +- +- irqnr = irq_reg_readl(gc, AT91_AIC_IVR); +- irqstat = irq_reg_readl(gc, AT91_AIC_ISR); +- +- if (!irqstat) +- irq_reg_writel(gc, 0, AT91_AIC_EOICR); +- else +- handle_domain_irq(aic_domain, irqnr, regs); +-} +- +-static int aic_retrigger(struct irq_data *d) +-{ +- struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); +- +- /* Enable interrupt on AIC5 */ +- irq_gc_lock(gc); +- irq_reg_writel(gc, d->mask, AT91_AIC_ISCR); +- irq_gc_unlock(gc); +- +- return 0; +-} +- +-static int aic_set_type(struct irq_data *d, unsigned type) +-{ +- struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); +- unsigned int smr; +- int ret; +- +- smr = irq_reg_readl(gc, AT91_AIC_SMR(d->hwirq)); +- ret = aic_common_set_type(d, type, &smr); +- if (ret) +- return ret; +- +- irq_reg_writel(gc, smr, AT91_AIC_SMR(d->hwirq)); +- +- return 0; +-} +- +-#ifdef CONFIG_PM +-static void aic_suspend(struct irq_data *d) +-{ +- struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); +- +- irq_gc_lock(gc); +- irq_reg_writel(gc, gc->mask_cache, AT91_AIC_IDCR); +- irq_reg_writel(gc, gc->wake_active, AT91_AIC_IECR); +- irq_gc_unlock(gc); +-} +- +-static void aic_resume(struct irq_data *d) +-{ +- struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); +- +- irq_gc_lock(gc); +- irq_reg_writel(gc, gc->wake_active, AT91_AIC_IDCR); +- irq_reg_writel(gc, gc->mask_cache, AT91_AIC_IECR); +- irq_gc_unlock(gc); +-} +- +-static void aic_pm_shutdown(struct irq_data *d) +-{ +- struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); +- +- irq_gc_lock(gc); +- irq_reg_writel(gc, 0xffffffff, AT91_AIC_IDCR); +- irq_reg_writel(gc, 0xffffffff, AT91_AIC_ICCR); +- irq_gc_unlock(gc); +-} +-#else +-#define aic_suspend NULL +-#define aic_resume NULL +-#define aic_pm_shutdown NULL +-#endif /* CONFIG_PM */ +- +-static void __init aic_hw_init(struct irq_domain *domain) +-{ +- struct irq_chip_generic *gc = irq_get_domain_generic_chip(domain, 0); +- int i; +- +- /* +- * Perform 8 End Of Interrupt Command to make sure AIC +- * will not Lock out nIRQ +- */ +- for (i = 0; i < 8; i++) +- irq_reg_writel(gc, 0, AT91_AIC_EOICR); +- +- /* +- * Spurious Interrupt ID in Spurious Vector Register. +- * When there is no current interrupt, the IRQ Vector Register +- * reads the value stored in AIC_SPU +- */ +- irq_reg_writel(gc, 0xffffffff, AT91_AIC_SPU); +- +- /* No debugging in AIC: Debug (Protect) Control Register */ +- irq_reg_writel(gc, 0, AT91_AIC_DCR); +- +- /* Disable and clear all interrupts initially */ +- irq_reg_writel(gc, 0xffffffff, AT91_AIC_IDCR); +- irq_reg_writel(gc, 0xffffffff, AT91_AIC_ICCR); +- +- for (i = 0; i < 32; i++) +- irq_reg_writel(gc, i, AT91_AIC_SVR(i)); +-} +- +-static int aic_irq_domain_xlate(struct irq_domain *d, +- struct device_node *ctrlr, +- const u32 *intspec, unsigned int intsize, +- irq_hw_number_t *out_hwirq, +- unsigned int *out_type) +-{ +- struct irq_domain_chip_generic *dgc = d->gc; +- struct irq_chip_generic *gc; +- unsigned long flags; +- unsigned smr; +- int idx; +- int ret; +- +- if (!dgc) +- return -EINVAL; +- +- ret = aic_common_irq_domain_xlate(d, ctrlr, intspec, intsize, +- out_hwirq, out_type); +- if (ret) +- return ret; +- +- idx = intspec[0] / dgc->irqs_per_chip; +- if (idx >= dgc->num_chips) +- return -EINVAL; +- +- gc = dgc->gc[idx]; +- +- irq_gc_lock_irqsave(gc, flags); +- smr = irq_reg_readl(gc, AT91_AIC_SMR(*out_hwirq)); +- aic_common_set_priority(intspec[2], &smr); +- irq_reg_writel(gc, smr, AT91_AIC_SMR(*out_hwirq)); +- irq_gc_unlock_irqrestore(gc, flags); +- +- return ret; +-} +- +-static const struct irq_domain_ops aic_irq_ops = { +- .map = irq_map_generic_chip, +- .xlate = aic_irq_domain_xlate, +-}; +- +-static void __init at91rm9200_aic_irq_fixup(void) +-{ +- aic_common_rtc_irq_fixup(); +-} +- +-static void __init at91sam9260_aic_irq_fixup(void) +-{ +- aic_common_rtt_irq_fixup(); +-} +- +-static void __init at91sam9g45_aic_irq_fixup(void) +-{ +- aic_common_rtc_irq_fixup(); +- aic_common_rtt_irq_fixup(); +-} +- +-static const struct of_device_id aic_irq_fixups[] __initconst = { +- { .compatible = "atmel,at91rm9200", .data = at91rm9200_aic_irq_fixup }, +- { .compatible = "atmel,at91sam9g45", .data = at91sam9g45_aic_irq_fixup }, +- { .compatible = "atmel,at91sam9n12", .data = at91rm9200_aic_irq_fixup }, +- { .compatible = "atmel,at91sam9rl", .data = at91sam9g45_aic_irq_fixup }, +- { .compatible = "atmel,at91sam9x5", .data = at91rm9200_aic_irq_fixup }, +- { .compatible = "atmel,at91sam9260", .data = at91sam9260_aic_irq_fixup }, +- { .compatible = "atmel,at91sam9261", .data = at91sam9260_aic_irq_fixup }, +- { .compatible = "atmel,at91sam9263", .data = at91sam9260_aic_irq_fixup }, +- { .compatible = "atmel,at91sam9g20", .data = at91sam9260_aic_irq_fixup }, +- { /* sentinel */ }, +-}; +- +-static int __init aic_of_init(struct device_node *node, +- struct device_node *parent) +-{ +- struct irq_chip_generic *gc; +- struct irq_domain *domain; +- +- if (aic_domain) +- return -EEXIST; +- +- domain = aic_common_of_init(node, &aic_irq_ops, "atmel-aic", +- NR_AIC_IRQS, aic_irq_fixups); +- if (IS_ERR(domain)) +- return PTR_ERR(domain); +- +- aic_domain = domain; +- gc = irq_get_domain_generic_chip(domain, 0); +- +- gc->chip_types[0].regs.eoi = AT91_AIC_EOICR; +- gc->chip_types[0].regs.enable = AT91_AIC_IECR; +- gc->chip_types[0].regs.disable = AT91_AIC_IDCR; +- gc->chip_types[0].chip.irq_mask = irq_gc_mask_disable_reg; +- gc->chip_types[0].chip.irq_unmask = irq_gc_unmask_enable_reg; +- gc->chip_types[0].chip.irq_retrigger = aic_retrigger; +- gc->chip_types[0].chip.irq_set_type = aic_set_type; +- gc->chip_types[0].chip.irq_suspend = aic_suspend; +- gc->chip_types[0].chip.irq_resume = aic_resume; +- gc->chip_types[0].chip.irq_pm_shutdown = aic_pm_shutdown; +- +- aic_hw_init(domain); +- set_handle_irq(aic_handle); +- +- return 0; +-} +-IRQCHIP_DECLARE(at91rm9200_aic, "atmel,at91rm9200-aic", aic_of_init); +diff -uprN kernel/drivers/irqchip/irq-bcm2835.c kernel_new/drivers/irqchip/irq-bcm2835.c +--- kernel/drivers/irqchip/irq-bcm2835.c 2020-12-21 21:59:19.000000000 +0800 ++++ kernel_new/drivers/irqchip/irq-bcm2835.c 2021-04-01 18:28:07.663863278 +0800 +@@ -110,7 +110,12 @@ static void armctrl_unmask_irq(struct ir + static struct irq_chip armctrl_chip = { + .name = "ARMCTRL-level", + .irq_mask = armctrl_mask_irq, +- .irq_unmask = armctrl_unmask_irq ++ .irq_unmask = armctrl_unmask_irq, ++#ifdef CONFIG_IPIPE ++ .irq_hold = armctrl_mask_irq, ++ .irq_release = armctrl_unmask_irq, ++#endif ++ .flags = IRQCHIP_PIPELINE_SAFE, + }; + + static int armctrl_xlate(struct irq_domain *d, struct device_node *ctrlr, +@@ -240,7 +245,7 @@ static void __exception_irq_entry bcm283 + u32 hwirq; + + while ((hwirq = get_next_armctrl_hwirq()) != ~0) +- handle_domain_irq(intc.domain, hwirq, regs); ++ ipipe_handle_domain_irq(intc.domain, hwirq, regs); + } + + static void bcm2836_chained_handle_irq(struct irq_desc *desc) +@@ -248,7 +253,7 @@ static void bcm2836_chained_handle_irq(s + u32 hwirq; + + while ((hwirq = get_next_armctrl_hwirq()) != ~0) +- generic_handle_irq(irq_linear_revmap(intc.domain, hwirq)); ++ ipipe_handle_demuxed_irq(irq_linear_revmap(intc.domain, hwirq)); + } + + IRQCHIP_DECLARE(bcm2835_armctrl_ic, "brcm,bcm2835-armctrl-ic", +diff -uprN kernel/drivers/irqchip/irq-bcm2836.c kernel_new/drivers/irqchip/irq-bcm2836.c +--- kernel/drivers/irqchip/irq-bcm2836.c 2020-12-21 21:59:19.000000000 +0800 ++++ kernel_new/drivers/irqchip/irq-bcm2836.c 2021-04-01 18:28:07.663863278 +0800 +@@ -48,40 +48,68 @@ static void bcm2836_arm_irqchip_unmask_p + writel(readl(reg) | BIT(bit), reg); + } + +-static void bcm2836_arm_irqchip_mask_timer_irq(struct irq_data *d) ++static void __bcm2836_arm_irqchip_mask_timer_irq(struct irq_data *d) + { + bcm2836_arm_irqchip_mask_per_cpu_irq(LOCAL_TIMER_INT_CONTROL0, + d->hwirq - LOCAL_IRQ_CNTPSIRQ, +- smp_processor_id()); ++ raw_smp_processor_id()); + } + +-static void bcm2836_arm_irqchip_unmask_timer_irq(struct irq_data *d) ++static void bcm2836_arm_irqchip_mask_timer_irq(struct irq_data *d) ++{ ++ unsigned long flags; ++ ++ flags = hard_local_irq_save(); ++ __bcm2836_arm_irqchip_mask_timer_irq(d); ++ hard_local_irq_restore(flags); ++} ++ ++static void __bcm2836_arm_irqchip_unmask_timer_irq(struct irq_data *d) + { + bcm2836_arm_irqchip_unmask_per_cpu_irq(LOCAL_TIMER_INT_CONTROL0, + d->hwirq - LOCAL_IRQ_CNTPSIRQ, +- smp_processor_id()); ++ raw_smp_processor_id()); ++} ++ ++static void bcm2836_arm_irqchip_unmask_timer_irq(struct irq_data *d) ++{ ++ unsigned long flags; ++ ++ flags = hard_local_irq_save(); ++ __bcm2836_arm_irqchip_unmask_timer_irq(d); ++ hard_local_irq_restore(flags); + } + + static struct irq_chip bcm2836_arm_irqchip_timer = { + .name = "bcm2836-timer", + .irq_mask = bcm2836_arm_irqchip_mask_timer_irq, + .irq_unmask = bcm2836_arm_irqchip_unmask_timer_irq, ++#ifdef CONFIG_IPIPE ++ .irq_hold = __bcm2836_arm_irqchip_mask_timer_irq, ++ .irq_release = __bcm2836_arm_irqchip_unmask_timer_irq, ++#endif ++ .flags = IRQCHIP_PIPELINE_SAFE, + }; + + static void bcm2836_arm_irqchip_mask_pmu_irq(struct irq_data *d) + { +- writel(1 << smp_processor_id(), intc.base + LOCAL_PM_ROUTING_CLR); ++ writel(1 << raw_smp_processor_id(), intc.base + LOCAL_PM_ROUTING_CLR); + } + + static void bcm2836_arm_irqchip_unmask_pmu_irq(struct irq_data *d) + { +- writel(1 << smp_processor_id(), intc.base + LOCAL_PM_ROUTING_SET); ++ writel(1 << raw_smp_processor_id(), intc.base + LOCAL_PM_ROUTING_SET); + } + + static struct irq_chip bcm2836_arm_irqchip_pmu = { + .name = "bcm2836-pmu", + .irq_mask = bcm2836_arm_irqchip_mask_pmu_irq, + .irq_unmask = bcm2836_arm_irqchip_unmask_pmu_irq, ++#ifdef CONFIG_IPIPE ++ .irq_hold = bcm2836_arm_irqchip_mask_pmu_irq, ++ .irq_release = bcm2836_arm_irqchip_unmask_pmu_irq, ++#endif ++ .flags = IRQCHIP_PIPELINE_SAFE, + }; + + static void bcm2836_arm_irqchip_mask_gpu_irq(struct irq_data *d) +@@ -96,6 +124,11 @@ static struct irq_chip bcm2836_arm_irqch + .name = "bcm2836-gpu", + .irq_mask = bcm2836_arm_irqchip_mask_gpu_irq, + .irq_unmask = bcm2836_arm_irqchip_unmask_gpu_irq, ++#ifdef CONFIG_IPIPE ++ .irq_hold = bcm2836_arm_irqchip_mask_gpu_irq, ++ .irq_release = bcm2836_arm_irqchip_unmask_gpu_irq, ++#endif ++ .flags = IRQCHIP_PIPELINE_SAFE, + }; + + static int bcm2836_map(struct irq_domain *d, unsigned int irq, +@@ -132,7 +165,7 @@ static int bcm2836_map(struct irq_domain + static void + __exception_irq_entry bcm2836_arm_irqchip_handle_irq(struct pt_regs *regs) + { +- int cpu = smp_processor_id(); ++ int cpu = raw_smp_processor_id(); + u32 stat; + + stat = readl_relaxed(intc.base + LOCAL_IRQ_PENDING0 + 4 * cpu); +@@ -144,12 +177,12 @@ __exception_irq_entry bcm2836_arm_irqchi + u32 ipi = ffs(mbox_val) - 1; + + writel(1 << ipi, mailbox0); +- handle_IPI(ipi, regs); ++ ipipe_handle_multi_ipi(ipi, regs); + #endif + } else if (stat) { + u32 hwirq = ffs(stat) - 1; + +- handle_domain_irq(intc.domain, hwirq, regs); ++ ipipe_handle_domain_irq(intc.domain, hwirq, regs); + } + } + +diff -uprN kernel/drivers/irqchip/irq-bcm7120-l2.c kernel_new/drivers/irqchip/irq-bcm7120-l2.c +--- kernel/drivers/irqchip/irq-bcm7120-l2.c 2020-12-21 21:59:19.000000000 +0800 ++++ kernel_new/drivers/irqchip/irq-bcm7120-l2.c 2021-04-01 18:28:07.663863278 +0800 +@@ -61,6 +61,7 @@ static void bcm7120_l2_intc_irq_handle(s + struct bcm7120_l2_intc_data *b = data->b; + struct irq_chip *chip = irq_desc_get_chip(desc); + unsigned int idx; ++ unsigned long flags; + + chained_irq_enter(chip, desc); + +@@ -71,11 +72,11 @@ static void bcm7120_l2_intc_irq_handle(s + unsigned long pending; + int hwirq; + +- irq_gc_lock(gc); ++ flags = irq_gc_lock(gc); + pending = irq_reg_readl(gc, b->stat_offset[idx]) & + gc->mask_cache & + data->irq_map_mask[idx]; +- irq_gc_unlock(gc); ++ irq_gc_unlock(gc, flags); + + for_each_set_bit(hwirq, &pending, IRQS_PER_WORD) { + generic_handle_irq(irq_find_mapping(b->domain, +@@ -90,22 +91,24 @@ static void bcm7120_l2_intc_suspend(stru + { + struct bcm7120_l2_intc_data *b = gc->private; + struct irq_chip_type *ct = gc->chip_types; ++ unsigned long flags; + +- irq_gc_lock(gc); ++ flags = irq_gc_lock(gc); + if (b->can_wake) + irq_reg_writel(gc, gc->mask_cache | gc->wake_active, + ct->regs.mask); +- irq_gc_unlock(gc); ++ irq_gc_unlock(gc, flags); + } + + static void bcm7120_l2_intc_resume(struct irq_chip_generic *gc) + { + struct irq_chip_type *ct = gc->chip_types; ++ unsigned long flags; + + /* Restore the saved mask */ +- irq_gc_lock(gc); ++ flags = irq_gc_lock(gc); + irq_reg_writel(gc, gc->mask_cache, ct->regs.mask); +- irq_gc_unlock(gc); ++ irq_gc_unlock(gc, flags); + } + + static int bcm7120_l2_intc_init_one(struct device_node *dn, +diff -uprN kernel/drivers/irqchip/irq-brcmstb-l2.c kernel_new/drivers/irqchip/irq-brcmstb-l2.c +--- kernel/drivers/irqchip/irq-brcmstb-l2.c 2020-12-21 21:59:19.000000000 +0800 ++++ kernel_new/drivers/irqchip/irq-brcmstb-l2.c 2021-04-01 18:28:07.663863278 +0800 +@@ -131,7 +131,7 @@ static void brcmstb_l2_intc_suspend(stru + struct brcmstb_l2_intc_data *b = gc->private; + unsigned long flags; + +- irq_gc_lock_irqsave(gc, flags); ++ flags = irq_gc_lock(gc); + /* Save the current mask */ + b->saved_mask = irq_reg_readl(gc, ct->regs.mask); + +@@ -140,7 +140,7 @@ static void brcmstb_l2_intc_suspend(stru + irq_reg_writel(gc, ~gc->wake_active, ct->regs.disable); + irq_reg_writel(gc, gc->wake_active, ct->regs.enable); + } +- irq_gc_unlock_irqrestore(gc, flags); ++ irq_gc_unlock(gc, flags); + } + + static void brcmstb_l2_intc_resume(struct irq_data *d) +@@ -150,7 +150,7 @@ static void brcmstb_l2_intc_resume(struc + struct brcmstb_l2_intc_data *b = gc->private; + unsigned long flags; + +- irq_gc_lock_irqsave(gc, flags); ++ flags = irq_gc_lock(gc); + if (ct->chip.irq_ack) { + /* Clear unmasked non-wakeup interrupts */ + irq_reg_writel(gc, ~b->saved_mask & ~gc->wake_active, +@@ -160,7 +160,7 @@ static void brcmstb_l2_intc_resume(struc + /* Restore the saved mask */ + irq_reg_writel(gc, b->saved_mask, ct->regs.disable); + irq_reg_writel(gc, ~b->saved_mask, ct->regs.enable); +- irq_gc_unlock_irqrestore(gc, flags); ++ irq_gc_unlock(gc, flags); + } + + static int __init brcmstb_l2_intc_of_init(struct device_node *np, +diff -uprN kernel/drivers/irqchip/irq-crossbar.c kernel_new/drivers/irqchip/irq-crossbar.c +--- kernel/drivers/irqchip/irq-crossbar.c 2020-12-21 21:59:19.000000000 +0800 ++++ kernel_new/drivers/irqchip/irq-crossbar.c 2021-04-01 18:28:07.663863278 +0800 +@@ -16,6 +16,7 @@ + #include + #include + #include ++#include + + #define IRQ_FREE -1 + #define IRQ_RESERVED -2 +@@ -69,10 +70,15 @@ static struct irq_chip crossbar_chip = { + .irq_retrigger = irq_chip_retrigger_hierarchy, + .irq_set_type = irq_chip_set_type_parent, + .flags = IRQCHIP_MASK_ON_SUSPEND | +- IRQCHIP_SKIP_SET_WAKE, ++ IRQCHIP_SKIP_SET_WAKE | ++ IRQCHIP_PIPELINE_SAFE, + #ifdef CONFIG_SMP + .irq_set_affinity = irq_chip_set_affinity_parent, + #endif ++#ifdef CONFIG_IPIPE ++ .irq_hold = irq_chip_hold_parent, ++ .irq_release = irq_chip_release_parent, ++#endif + }; + + static int allocate_gic_irq(struct irq_domain *domain, unsigned virq, +diff -uprN kernel/drivers/irqchip/irq-dw-apb-ictl.c kernel_new/drivers/irqchip/irq-dw-apb-ictl.c +--- kernel/drivers/irqchip/irq-dw-apb-ictl.c 2020-12-21 21:59:19.000000000 +0800 ++++ kernel_new/drivers/irqchip/irq-dw-apb-ictl.c 2021-04-01 18:28:07.664863277 +0800 +@@ -17,6 +17,7 @@ + #include + #include + #include ++#include + + #define APB_INT_ENABLE_L 0x00 + #define APB_INT_ENABLE_H 0x04 +@@ -42,7 +43,7 @@ static void dw_apb_ictl_handler(struct i + u32 hwirq = ffs(stat) - 1; + u32 virq = irq_find_mapping(d, gc->irq_base + hwirq); + +- generic_handle_irq(virq); ++ ipipe_handle_demuxed_irq(virq); + stat &= ~(1 << hwirq); + } + } +@@ -55,11 +56,12 @@ static void dw_apb_ictl_resume(struct ir + { + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); + struct irq_chip_type *ct = irq_data_get_chip_type(d); ++ unsigned long flags; + +- irq_gc_lock(gc); ++ flags = irq_gc_lock(gc); + writel_relaxed(~0, gc->reg_base + ct->regs.enable); + writel_relaxed(*ct->mask_cache, gc->reg_base + ct->regs.mask); +- irq_gc_unlock(gc); ++ irq_gc_unlock(gc, flags); + } + #else + #define dw_apb_ictl_resume NULL +@@ -144,6 +146,7 @@ static int __init dw_apb_ictl_init(struc + gc->chip_types[0].chip.irq_mask = irq_gc_mask_set_bit; + gc->chip_types[0].chip.irq_unmask = irq_gc_mask_clr_bit; + gc->chip_types[0].chip.irq_resume = dw_apb_ictl_resume; ++ gc->chip_types[0].chip.flags |= IRQCHIP_PIPELINE_SAFE; + } + + irq_set_chained_handler_and_data(irq, dw_apb_ictl_handler, domain); +diff -uprN kernel/drivers/irqchip/irq-gic.c kernel_new/drivers/irqchip/irq-gic.c +--- kernel/drivers/irqchip/irq-gic.c 2020-12-21 21:59:19.000000000 +0800 ++++ kernel_new/drivers/irqchip/irq-gic.c 2021-04-01 18:28:07.664863277 +0800 +@@ -38,6 +38,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -91,9 +92,17 @@ struct gic_chip_data { + #endif + }; + ++#ifdef CONFIG_IPIPE ++#define pipeline_lock(__flags) do { (__flags) = hard_local_irq_save(); } while (0) ++#define pipeline_unlock(__flags) hard_local_irq_restore(__flags) ++#else ++#define pipeline_lock(__flags) do { (void)__flags; } while (0) ++#define pipeline_unlock(__flags) do { (void)__flags; } while (0) ++#endif ++ + #ifdef CONFIG_BL_SWITCHER + +-static DEFINE_RAW_SPINLOCK(cpu_map_lock); ++static IPIPE_DEFINE_RAW_SPINLOCK(cpu_map_lock); + + #define gic_lock_irqsave(f) \ + raw_spin_lock_irqsave(&cpu_map_lock, (f)) +@@ -204,7 +213,12 @@ static int gic_peek_irq(struct irq_data + + static void gic_mask_irq(struct irq_data *d) + { ++ unsigned long flags; ++ ++ pipeline_lock(flags); ++ ipipe_lock_irq(d->irq); + gic_poke_irq(d, GIC_DIST_ENABLE_CLEAR); ++ pipeline_unlock(flags); + } + + static void gic_eoimode1_mask_irq(struct irq_data *d) +@@ -224,7 +238,12 @@ static void gic_eoimode1_mask_irq(struct + + static void gic_unmask_irq(struct irq_data *d) + { ++ unsigned long flags; ++ ++ pipeline_lock(flags); + gic_poke_irq(d, GIC_DIST_ENABLE_SET); ++ ipipe_unlock_irq(d->irq); ++ pipeline_unlock(flags); + } + + static void gic_eoi_irq(struct irq_data *d) +@@ -241,6 +260,27 @@ static void gic_eoimode1_eoi_irq(struct + writel_relaxed(gic_irq(d), gic_cpu_base(d) + GIC_CPU_DEACTIVATE); + } + ++#ifdef CONFIG_IPIPE ++static void gic_hold_irq(struct irq_data *d) ++{ ++ struct irq_chip *chip = irq_data_get_irq_chip(d); ++ ++ gic_poke_irq(d, GIC_DIST_ENABLE_CLEAR); ++ ++ if (chip->irq_eoi == gic_eoimode1_eoi_irq) { ++ if (irqd_is_forwarded_to_vcpu(d)) ++ gic_poke_irq(d, GIC_DIST_ACTIVE_CLEAR); ++ gic_eoimode1_eoi_irq(d); ++ } else ++ gic_eoi_irq(d); ++} ++ ++static void gic_release_irq(struct irq_data *d) ++{ ++ gic_poke_irq(d, GIC_DIST_ENABLE_SET); ++} ++#endif /* CONFIG_IPIPE */ ++ + static int gic_irq_set_irqchip_state(struct irq_data *d, + enum irqchip_irq_state which, bool val) + { +@@ -364,7 +404,7 @@ static void __exception_irq_entry gic_ha + if (static_branch_likely(&supports_deactivate_key)) + writel_relaxed(irqstat, cpu_base + GIC_CPU_EOI); + isb(); +- handle_domain_irq(gic->domain, irqnr, regs); ++ ipipe_handle_domain_irq(gic->domain, irqnr, regs); + continue; + } + if (irqnr < 16) { +@@ -380,7 +420,7 @@ static void __exception_irq_entry gic_ha + * Pairs with the write barrier in gic_raise_softirq + */ + smp_rmb(); +- handle_IPI(irqnr, regs); ++ ipipe_handle_multi_ipi(irqnr, regs); + #endif + continue; + } +@@ -408,7 +448,7 @@ static void gic_handle_cascade_irq(struc + handle_bad_irq(desc); + } else { + isb(); +- generic_handle_irq(cascade_irq); ++ ipipe_handle_demuxed_irq(cascade_irq); + } + + out: +@@ -420,11 +460,16 @@ static const struct irq_chip gic_chip = + .irq_unmask = gic_unmask_irq, + .irq_eoi = gic_eoi_irq, + .irq_set_type = gic_set_type, ++#ifdef CONFIG_IPIPE ++ .irq_hold = gic_hold_irq, ++ .irq_release = gic_release_irq, ++#endif + .irq_get_irqchip_state = gic_irq_get_irqchip_state, + .irq_set_irqchip_state = gic_irq_set_irqchip_state, + .flags = IRQCHIP_SET_TYPE_MASKED | + IRQCHIP_SKIP_SET_WAKE | +- IRQCHIP_MASK_ON_SUSPEND, ++ IRQCHIP_MASK_ON_SUSPEND | ++ IRQCHIP_PIPELINE_SAFE, + }; + + void __init gic_cascade_irq(unsigned int gic_nr, unsigned int irq) +@@ -482,7 +527,6 @@ static void gic_cpu_if_up(struct gic_chi + writel_relaxed(bypass | mode | GICC_ENABLE, cpu_base + GIC_CPU_CTRL); + } + +- + static void gic_dist_init(struct gic_chip_data *gic) + { + unsigned int i; +diff -uprN kernel/drivers/irqchip/irq-gic-v2m.c kernel_new/drivers/irqchip/irq-gic-v2m.c +--- kernel/drivers/irqchip/irq-gic-v2m.c 2020-12-21 21:59:19.000000000 +0800 ++++ kernel_new/drivers/irqchip/irq-gic-v2m.c 2021-04-01 18:28:07.664863277 +0800 +@@ -74,14 +74,22 @@ struct v2m_data { + + static void gicv2m_mask_msi_irq(struct irq_data *d) + { ++ unsigned long flags; ++ ++ flags = hard_cond_local_irq_save(); + pci_msi_mask_irq(d); + irq_chip_mask_parent(d); ++ hard_cond_local_irq_restore(flags); + } + + static void gicv2m_unmask_msi_irq(struct irq_data *d) + { ++ unsigned long flags; ++ ++ flags = hard_cond_local_irq_save(); + pci_msi_unmask_irq(d); + irq_chip_unmask_parent(d); ++ hard_cond_local_irq_restore(flags); + } + + static struct irq_chip gicv2m_msi_irq_chip = { +@@ -90,6 +98,11 @@ static struct irq_chip gicv2m_msi_irq_ch + .irq_unmask = gicv2m_unmask_msi_irq, + .irq_eoi = irq_chip_eoi_parent, + .irq_write_msi_msg = pci_msi_domain_write_msg, ++#ifdef CONFIG_IPIPE ++ .irq_hold = irq_chip_hold_parent, ++ .irq_release = irq_chip_release_parent, ++#endif ++ .flags = IRQCHIP_PIPELINE_SAFE, + }; + + static struct msi_domain_info gicv2m_msi_domain_info = { +@@ -120,6 +133,11 @@ static struct irq_chip gicv2m_irq_chip = + .irq_eoi = irq_chip_eoi_parent, + .irq_set_affinity = irq_chip_set_affinity_parent, + .irq_compose_msi_msg = gicv2m_compose_msi_msg, ++#ifdef CONFIG_IPIPE ++ .irq_hold = irq_chip_hold_parent, ++ .irq_release = irq_chip_release_parent, ++#endif ++ .flags = IRQCHIP_PIPELINE_SAFE, + }; + + static int gicv2m_irq_gic_domain_alloc(struct irq_domain *domain, +@@ -236,6 +254,7 @@ static bool is_msi_spi_valid(u32 base, u + + static struct irq_chip gicv2m_pmsi_irq_chip = { + .name = "pMSI", ++ .flags = IRQCHIP_PIPELINE_SAFE, + }; + + static struct msi_domain_ops gicv2m_pmsi_ops = { +diff -uprN kernel/drivers/irqchip/irq-gic-v3.c kernel_new/drivers/irqchip/irq-gic-v3.c +--- kernel/drivers/irqchip/irq-gic-v3.c 2020-12-21 21:59:19.000000000 +0800 ++++ kernel_new/drivers/irqchip/irq-gic-v3.c 2021-04-02 09:09:37.322234347 +0800 +@@ -243,7 +243,12 @@ static void gic_poke_irq(struct irq_data + + static void gic_mask_irq(struct irq_data *d) + { ++ unsigned long flags; ++ ++ flags = hard_cond_local_irq_save(); ++ ipipe_lock_irq(d->irq); + gic_poke_irq(d, GICD_ICENABLER); ++ hard_cond_local_irq_restore(flags); + } + + static void gic_eoimode1_mask_irq(struct irq_data *d) +@@ -263,7 +268,12 @@ static void gic_eoimode1_mask_irq(struct + + static void gic_unmask_irq(struct irq_data *d) + { ++ unsigned long flags; ++ ++ flags = hard_cond_local_irq_save(); + gic_poke_irq(d, GICD_ISENABLER); ++ ipipe_unlock_irq(d->irq); ++ hard_cond_local_irq_restore(flags); + } + + static inline bool gic_supports_nmi(void) +@@ -421,6 +431,27 @@ static void gic_eoimode1_eoi_irq(struct + gic_write_dir(gic_irq(d)); + } + ++#ifdef CONFIG_IPIPE ++static void gic_hold_irq(struct irq_data *d) ++{ ++ struct irq_chip *chip = irq_data_get_irq_chip(d); ++ ++ gic_poke_irq(d, GICD_ICENABLER); ++ ++ if (chip->irq_eoi == gic_eoimode1_eoi_irq) { ++ if (irqd_is_forwarded_to_vcpu(d)) ++ gic_poke_irq(d, GICD_ICACTIVER); ++ gic_eoimode1_eoi_irq(d); ++ } else ++ gic_eoi_irq(d); ++} ++ ++static void gic_release_irq(struct irq_data *d) ++{ ++ gic_poke_irq(d, GICD_ISENABLER); ++} ++#endif /* CONFIG_IPIPE */ ++ + static int gic_set_type(struct irq_data *d, unsigned int type) + { + unsigned int irq = gic_irq(d); +@@ -537,7 +568,7 @@ static asmlinkage void __exception_irq_e + else + isb(); + +- err = handle_domain_irq(gic_data.domain, irqnr, regs); ++ err = ipipe_handle_domain_irq(gic_data.domain, irqnr, regs); + if (err) { + WARN_ONCE(true, "Unexpected interrupt received!\n"); + gic_deactivate_unhandled(irqnr); +@@ -556,7 +587,7 @@ static asmlinkage void __exception_irq_e + * that any shared data read by handle_IPI will + * be read after the ACK. + */ +- handle_IPI(irqnr, regs); ++ ipipe_handle_multi_ipi(irqnr, regs); + #else + WARN_ONCE(true, "Unexpected SGI received!\n"); + #endif +@@ -1270,6 +1301,10 @@ static struct irq_chip gic_chip = { + .irq_unmask = gic_unmask_irq, + .irq_eoi = gic_eoi_irq, + .irq_set_type = gic_set_type, ++#ifdef CONFIG_IPIPE ++ .irq_hold = gic_hold_irq, ++ .irq_release = gic_release_irq, ++#endif + .irq_set_affinity = gic_set_affinity, + .irq_get_irqchip_state = gic_irq_get_irqchip_state, + .irq_set_irqchip_state = gic_irq_set_irqchip_state, +@@ -1277,6 +1312,7 @@ static struct irq_chip gic_chip = { + .irq_nmi_teardown = gic_irq_nmi_teardown, + .flags = IRQCHIP_SET_TYPE_MASKED | + IRQCHIP_SKIP_SET_WAKE | ++ IRQCHIP_PIPELINE_SAFE | + IRQCHIP_MASK_ON_SUSPEND, + }; + +@@ -1286,6 +1322,10 @@ static struct irq_chip gic_eoimode1_chip + .irq_unmask = gic_unmask_irq, + .irq_eoi = gic_eoimode1_eoi_irq, + .irq_set_type = gic_set_type, ++#ifdef CONFIG_IPIPE ++ .irq_hold = gic_hold_irq, ++ .irq_release = gic_release_irq, ++#endif + .irq_set_affinity = gic_set_affinity, + .irq_get_irqchip_state = gic_irq_get_irqchip_state, + .irq_set_irqchip_state = gic_irq_set_irqchip_state, +@@ -1294,6 +1334,7 @@ static struct irq_chip gic_eoimode1_chip + .irq_nmi_teardown = gic_irq_nmi_teardown, + .flags = IRQCHIP_SET_TYPE_MASKED | + IRQCHIP_SKIP_SET_WAKE | ++ IRQCHIP_PIPELINE_SAFE | + IRQCHIP_MASK_ON_SUSPEND, + }; + +diff -uprN kernel/drivers/irqchip/irq-gic-v3.c.orig kernel_new/drivers/irqchip/irq-gic-v3.c.orig +--- kernel/drivers/irqchip/irq-gic-v3.c.orig 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/drivers/irqchip/irq-gic-v3.c.orig 2020-12-21 21:59:19.000000000 +0800 +@@ -0,0 +1,2118 @@ ++/* ++ * Copyright (C) 2013-2017 ARM Limited, All Rights Reserved. ++ * Author: Marc Zyngier ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program. If not, see . ++ */ ++ ++#define pr_fmt(fmt) "GICv3: " fmt ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++ ++#include "irq-gic-common.h" ++ ++#define GICD_INT_NMI_PRI (GICD_INT_DEF_PRI & ~0x80) ++ ++struct redist_region { ++ void __iomem *redist_base; ++ phys_addr_t phys_base; ++ bool single_redist; ++}; ++ ++struct gic_chip_data { ++ struct fwnode_handle *fwnode; ++ void __iomem *dist_base; ++ struct redist_region *redist_regions; ++ struct rdists rdists; ++ struct irq_domain *domain; ++ u64 redist_stride; ++ u32 nr_redist_regions; ++ bool has_rss; ++ unsigned int irq_nr; ++ struct partition_desc *ppi_descs[16]; ++}; ++ ++static struct gic_chip_data gic_data __read_mostly; ++static DEFINE_STATIC_KEY_TRUE(supports_deactivate_key); ++ ++/* ++ * The behaviours of RPR and PMR registers differ depending on the value of ++ * SCR_EL3.FIQ, and the behaviour of non-secure priority registers of the ++ * distributor and redistributors depends on whether security is enabled in the ++ * GIC. ++ * ++ * When security is enabled, non-secure priority values from the (re)distributor ++ * are presented to the GIC CPUIF as follow: ++ * (GIC_(R)DIST_PRI[irq] >> 1) | 0x80; ++ * ++ * If SCR_EL3.FIQ == 1, the values writen to/read from PMR and RPR at non-secure ++ * EL1 are subject to a similar operation thus matching the priorities presented ++ * from the (re)distributor when security is enabled. ++ * ++ * see GICv3/GICv4 Architecture Specification (IHI0069D): ++ * - section 4.8.1 Non-secure accesses to register fields for Secure interrupt ++ * priorities. ++ * - Figure 4-7 Secure read of the priority field for a Non-secure Group 1 ++ * interrupt. ++ * ++ * For now, we only support pseudo-NMIs if we have non-secure view of ++ * priorities. ++ */ ++static DEFINE_STATIC_KEY_FALSE(supports_pseudo_nmis); ++ ++/* ++ * Global static key controlling whether an update to PMR allowing more ++ * interrupts requires to be propagated to the redistributor (DSB SY). ++ * And this needs to be exported for modules to be able to enable ++ * interrupts... ++ */ ++DEFINE_STATIC_KEY_FALSE(gic_pmr_sync); ++EXPORT_SYMBOL(gic_pmr_sync); ++ ++/* ppi_nmi_refs[n] == number of cpus having ppi[n + 16] set as NMI */ ++static refcount_t ppi_nmi_refs[16]; ++ ++static struct gic_kvm_info gic_v3_kvm_info; ++static DEFINE_PER_CPU(bool, has_rss); ++ ++#define MPIDR_RS(mpidr) (((mpidr) & 0xF0UL) >> 4) ++#define gic_data_rdist() (this_cpu_ptr(gic_data.rdists.rdist)) ++#define gic_data_rdist_rd_base() (gic_data_rdist()->rd_base) ++#define gic_data_rdist_sgi_base() (gic_data_rdist_rd_base() + SZ_64K) ++ ++/* Our default, arbitrary priority value. Linux only uses one anyway. */ ++#define DEFAULT_PMR_VALUE 0xf0 ++ ++static inline unsigned int gic_irq(struct irq_data *d) ++{ ++ return d->hwirq; ++} ++ ++static inline int gic_irq_in_rdist(struct irq_data *d) ++{ ++ return gic_irq(d) < 32; ++} ++ ++static inline void __iomem *gic_dist_base(struct irq_data *d) ++{ ++ if (gic_irq_in_rdist(d)) /* SGI+PPI -> SGI_base for this CPU */ ++ return gic_data_rdist_sgi_base(); ++ ++ if (d->hwirq <= 1023) /* SPI -> dist_base */ ++ return gic_data.dist_base; ++ ++ return NULL; ++} ++ ++static void gic_do_wait_for_rwp(void __iomem *base) ++{ ++ u32 count = 1000000; /* 1s! */ ++ ++ while (readl_relaxed(base + GICD_CTLR) & GICD_CTLR_RWP) { ++ count--; ++ if (!count) { ++ pr_err_ratelimited("RWP timeout, gone fishing\n"); ++ return; ++ } ++ cpu_relax(); ++ udelay(1); ++ }; ++} ++ ++/* Wait for completion of a distributor change */ ++static void gic_dist_wait_for_rwp(void) ++{ ++ gic_do_wait_for_rwp(gic_data.dist_base); ++} ++ ++/* Wait for completion of a redistributor change */ ++static void gic_redist_wait_for_rwp(void) ++{ ++ gic_do_wait_for_rwp(gic_data_rdist_rd_base()); ++} ++ ++#ifdef CONFIG_ARM64 ++ ++static u64 __maybe_unused gic_read_iar(void) ++{ ++ if (cpus_have_const_cap(ARM64_WORKAROUND_CAVIUM_23154)) ++ return gic_read_iar_cavium_thunderx(); ++ else ++ return gic_read_iar_common(); ++} ++#endif ++ ++static void gic_enable_redist(bool enable) ++{ ++ void __iomem *rbase; ++ u32 count = 1000000; /* 1s! */ ++ u32 val; ++ ++ rbase = gic_data_rdist_rd_base(); ++ ++ val = readl_relaxed(rbase + GICR_WAKER); ++ if (enable) ++ /* Wake up this CPU redistributor */ ++ val &= ~GICR_WAKER_ProcessorSleep; ++ else ++ val |= GICR_WAKER_ProcessorSleep; ++ writel_relaxed(val, rbase + GICR_WAKER); ++ ++ if (!enable) { /* Check that GICR_WAKER is writeable */ ++ val = readl_relaxed(rbase + GICR_WAKER); ++ if (!(val & GICR_WAKER_ProcessorSleep)) ++ return; /* No PM support in this redistributor */ ++ } ++ ++ while (--count) { ++ val = readl_relaxed(rbase + GICR_WAKER); ++ if (enable ^ (bool)(val & GICR_WAKER_ChildrenAsleep)) ++ break; ++ cpu_relax(); ++ udelay(1); ++ }; ++ if (!count) ++ pr_err_ratelimited("redistributor failed to %s...\n", ++ enable ? "wakeup" : "sleep"); ++} ++ ++/* ++ * Routines to disable, enable, EOI and route interrupts ++ */ ++static int gic_peek_irq(struct irq_data *d, u32 offset) ++{ ++ u32 mask = 1 << (gic_irq(d) % 32); ++ void __iomem *base; ++ ++ if (gic_irq_in_rdist(d)) ++ base = gic_data_rdist_sgi_base(); ++ else ++ base = gic_data.dist_base; ++ ++ return !!(readl_relaxed(base + offset + (gic_irq(d) / 32) * 4) & mask); ++} ++ ++static void gic_poke_irq(struct irq_data *d, u32 offset) ++{ ++ u32 mask = 1 << (gic_irq(d) % 32); ++ void (*rwp_wait)(void); ++ void __iomem *base; ++ ++ if (gic_irq_in_rdist(d)) { ++ base = gic_data_rdist_sgi_base(); ++ rwp_wait = gic_redist_wait_for_rwp; ++ } else { ++ base = gic_data.dist_base; ++ rwp_wait = gic_dist_wait_for_rwp; ++ } ++ ++ writel_relaxed(mask, base + offset + (gic_irq(d) / 32) * 4); ++ rwp_wait(); ++} ++ ++static void gic_mask_irq(struct irq_data *d) ++{ ++ gic_poke_irq(d, GICD_ICENABLER); ++} ++ ++static void gic_eoimode1_mask_irq(struct irq_data *d) ++{ ++ gic_mask_irq(d); ++ /* ++ * When masking a forwarded interrupt, make sure it is ++ * deactivated as well. ++ * ++ * This ensures that an interrupt that is getting ++ * disabled/masked will not get "stuck", because there is ++ * noone to deactivate it (guest is being terminated). ++ */ ++ if (irqd_is_forwarded_to_vcpu(d)) ++ gic_poke_irq(d, GICD_ICACTIVER); ++} ++ ++static void gic_unmask_irq(struct irq_data *d) ++{ ++ gic_poke_irq(d, GICD_ISENABLER); ++} ++ ++static inline bool gic_supports_nmi(void) ++{ ++ return IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && ++ static_branch_likely(&supports_pseudo_nmis); ++} ++ ++bool gic_supports_pseudo_nmis(void) ++{ ++ return gic_supports_nmi(); ++} ++ ++static int gic_irq_set_irqchip_state(struct irq_data *d, ++ enum irqchip_irq_state which, bool val) ++{ ++ u32 reg; ++ ++ if (d->hwirq >= gic_data.irq_nr) /* PPI/SPI only */ ++ return -EINVAL; ++ ++ switch (which) { ++ case IRQCHIP_STATE_PENDING: ++ reg = val ? GICD_ISPENDR : GICD_ICPENDR; ++ break; ++ ++ case IRQCHIP_STATE_ACTIVE: ++ reg = val ? GICD_ISACTIVER : GICD_ICACTIVER; ++ break; ++ ++ case IRQCHIP_STATE_MASKED: ++ reg = val ? GICD_ICENABLER : GICD_ISENABLER; ++ break; ++ ++ default: ++ return -EINVAL; ++ } ++ ++ gic_poke_irq(d, reg); ++ return 0; ++} ++ ++static int gic_irq_get_irqchip_state(struct irq_data *d, ++ enum irqchip_irq_state which, bool *val) ++{ ++ if (d->hwirq >= gic_data.irq_nr) /* PPI/SPI only */ ++ return -EINVAL; ++ ++ switch (which) { ++ case IRQCHIP_STATE_PENDING: ++ *val = gic_peek_irq(d, GICD_ISPENDR); ++ break; ++ ++ case IRQCHIP_STATE_ACTIVE: ++ *val = gic_peek_irq(d, GICD_ISACTIVER); ++ break; ++ ++ case IRQCHIP_STATE_MASKED: ++ *val = !gic_peek_irq(d, GICD_ISENABLER); ++ break; ++ ++ default: ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ ++static void gic_irq_set_prio(struct irq_data *d, u8 prio) ++{ ++ void __iomem *base = gic_dist_base(d); ++ ++ writeb_relaxed(prio, base + GICD_IPRIORITYR + gic_irq(d)); ++} ++ ++static int gic_irq_nmi_setup(struct irq_data *d) ++{ ++ struct irq_desc *desc = irq_to_desc(d->irq); ++ ++ if (!gic_supports_nmi()) ++ return -EINVAL; ++ ++ if (gic_peek_irq(d, GICD_ISENABLER)) { ++ pr_err("Cannot set NMI property of enabled IRQ %u\n", d->irq); ++ return -EINVAL; ++ } ++ ++ /* ++ * A secondary irq_chip should be in charge of LPI request, ++ * it should not be possible to get there ++ */ ++ if (WARN_ON(gic_irq(d) >= 8192)) ++ return -EINVAL; ++ ++ /* desc lock should already be held */ ++ if (gic_irq(d) < 32) { ++ /* Setting up PPI as NMI, only switch handler for first NMI */ ++ if (!refcount_inc_not_zero(&ppi_nmi_refs[gic_irq(d) - 16])) { ++ refcount_set(&ppi_nmi_refs[gic_irq(d) - 16], 1); ++ desc->handle_irq = handle_percpu_devid_fasteoi_nmi; ++ } ++ } else { ++ desc->handle_irq = handle_fasteoi_nmi; ++ } ++ ++ gic_irq_set_prio(d, GICD_INT_NMI_PRI); ++ ++ return 0; ++} ++ ++static void gic_irq_nmi_teardown(struct irq_data *d) ++{ ++ struct irq_desc *desc = irq_to_desc(d->irq); ++ ++ if (WARN_ON(!gic_supports_nmi())) ++ return; ++ ++ if (gic_peek_irq(d, GICD_ISENABLER)) { ++ pr_err("Cannot set NMI property of enabled IRQ %u\n", d->irq); ++ return; ++ } ++ ++ /* ++ * A secondary irq_chip should be in charge of LPI request, ++ * it should not be possible to get there ++ */ ++ if (WARN_ON(gic_irq(d) >= 8192)) ++ return; ++ ++ /* desc lock should already be held */ ++ if (gic_irq(d) < 32) { ++ /* Tearing down NMI, only switch handler for last NMI */ ++ if (refcount_dec_and_test(&ppi_nmi_refs[gic_irq(d) - 16])) ++ desc->handle_irq = handle_percpu_devid_irq; ++ } else { ++ desc->handle_irq = handle_fasteoi_irq; ++ } ++ ++ gic_irq_set_prio(d, GICD_INT_DEF_PRI); ++} ++ ++static void gic_eoi_irq(struct irq_data *d) ++{ ++ gic_write_eoir(gic_irq(d)); ++} ++ ++static void gic_eoimode1_eoi_irq(struct irq_data *d) ++{ ++ /* ++ * No need to deactivate an LPI, or an interrupt that ++ * is is getting forwarded to a vcpu. ++ */ ++ if (gic_irq(d) >= 8192 || irqd_is_forwarded_to_vcpu(d)) ++ return; ++ gic_write_dir(gic_irq(d)); ++} ++ ++static int gic_set_type(struct irq_data *d, unsigned int type) ++{ ++ unsigned int irq = gic_irq(d); ++ void (*rwp_wait)(void); ++ void __iomem *base; ++ ++ /* Interrupt configuration for SGIs can't be changed */ ++ if (irq < 16) ++ return -EINVAL; ++ ++ /* SPIs have restrictions on the supported types */ ++ if (irq >= 32 && type != IRQ_TYPE_LEVEL_HIGH && ++ type != IRQ_TYPE_EDGE_RISING) ++ return -EINVAL; ++ ++ if (gic_irq_in_rdist(d)) { ++ base = gic_data_rdist_sgi_base(); ++ rwp_wait = gic_redist_wait_for_rwp; ++ } else { ++ base = gic_data.dist_base; ++ rwp_wait = gic_dist_wait_for_rwp; ++ } ++ ++ return gic_configure_irq(irq, type, base, rwp_wait); ++} ++ ++static int gic_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu) ++{ ++ if (vcpu) ++ irqd_set_forwarded_to_vcpu(d); ++ else ++ irqd_clr_forwarded_to_vcpu(d); ++ return 0; ++} ++ ++static u64 gic_mpidr_to_affinity(unsigned long mpidr) ++{ ++ u64 aff; ++ ++ aff = ((u64)MPIDR_AFFINITY_LEVEL(mpidr, 3) << 32 | ++ MPIDR_AFFINITY_LEVEL(mpidr, 2) << 16 | ++ MPIDR_AFFINITY_LEVEL(mpidr, 1) << 8 | ++ MPIDR_AFFINITY_LEVEL(mpidr, 0)); ++ ++ return aff; ++} ++ ++static void gic_deactivate_unhandled(u32 irqnr) ++{ ++ if (static_branch_likely(&supports_deactivate_key)) { ++ if (irqnr < 8192) ++ gic_write_dir(irqnr); ++ } else { ++ gic_write_eoir(irqnr); ++ } ++} ++ ++static inline void gic_handle_nmi(u32 irqnr, struct pt_regs *regs) ++{ ++ bool irqs_enabled = interrupts_enabled(regs); ++ int err; ++ ++ if (unlikely(irqnr < 16)) { ++ gic_write_eoir(irqnr); ++ if (static_branch_likely(&supports_deactivate_key)) ++ gic_write_dir(irqnr); ++#ifdef CONFIG_SMP ++ handle_IPI(irqnr, regs); ++#endif ++ return; ++ } ++ ++ if (irqs_enabled) ++ nmi_enter(); ++ ++ if (static_branch_likely(&supports_deactivate_key)) ++ gic_write_eoir(irqnr); ++ /* ++ * Leave the PSR.I bit set to prevent other NMIs to be ++ * received while handling this one. ++ * PSR.I will be restored when we ERET to the ++ * interrupted context. ++ */ ++ err = handle_domain_nmi(gic_data.domain, irqnr, regs); ++ if (err) ++ gic_deactivate_unhandled(irqnr); ++ ++ if (irqs_enabled) ++ nmi_exit(); ++} ++ ++static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs) ++{ ++ u32 irqnr; ++ ++ irqnr = gic_read_iar(); ++ ++ if (gic_supports_nmi() && ++ unlikely(gic_read_rpr() == GICD_INT_NMI_PRI)) { ++ gic_handle_nmi(irqnr, regs); ++ return; ++ } ++ ++ if (gic_prio_masking_enabled()) { ++ gic_pmr_mask_irqs(); ++ gic_arch_enable_irqs(); ++ } ++ ++ if (likely(irqnr > 15 && irqnr < 1020) || irqnr >= 8192) { ++ int err; ++ ++ if (static_branch_likely(&supports_deactivate_key)) ++ gic_write_eoir(irqnr); ++ else ++ isb(); ++ ++ err = handle_domain_irq(gic_data.domain, irqnr, regs); ++ if (err) { ++ WARN_ONCE(true, "Unexpected interrupt received!\n"); ++ gic_deactivate_unhandled(irqnr); ++ } ++ return; ++ } ++ if (irqnr < 16) { ++ gic_write_eoir(irqnr); ++ if (static_branch_likely(&supports_deactivate_key)) ++ gic_write_dir(irqnr); ++#ifdef CONFIG_SMP ++ /* ++ * Unlike GICv2, we don't need an smp_rmb() here. ++ * The control dependency from gic_read_iar to ++ * the ISB in gic_write_eoir is enough to ensure ++ * that any shared data read by handle_IPI will ++ * be read after the ACK. ++ */ ++ handle_IPI(irqnr, regs); ++#else ++ WARN_ONCE(true, "Unexpected SGI received!\n"); ++#endif ++ } ++} ++ ++static u32 gic_get_pribits(void) ++{ ++ u32 pribits; ++ ++ pribits = gic_read_ctlr(); ++ pribits &= ICC_CTLR_EL1_PRI_BITS_MASK; ++ pribits >>= ICC_CTLR_EL1_PRI_BITS_SHIFT; ++ pribits++; ++ ++ return pribits; ++} ++ ++static bool gic_has_group0(void) ++{ ++ u32 val; ++ u32 old_pmr; ++ ++ old_pmr = gic_read_pmr(); ++ ++ /* ++ * Let's find out if Group0 is under control of EL3 or not by ++ * setting the highest possible, non-zero priority in PMR. ++ * ++ * If SCR_EL3.FIQ is set, the priority gets shifted down in ++ * order for the CPU interface to set bit 7, and keep the ++ * actual priority in the non-secure range. In the process, it ++ * looses the least significant bit and the actual priority ++ * becomes 0x80. Reading it back returns 0, indicating that ++ * we're don't have access to Group0. ++ */ ++ gic_write_pmr(BIT(8 - gic_get_pribits())); ++ val = gic_read_pmr(); ++ ++ gic_write_pmr(old_pmr); ++ ++ return val != 0; ++} ++ ++static void __init gic_dist_init(void) ++{ ++ unsigned int i; ++ u64 affinity; ++ void __iomem *base = gic_data.dist_base; ++ ++ /* Disable the distributor */ ++ writel_relaxed(0, base + GICD_CTLR); ++ gic_dist_wait_for_rwp(); ++ ++ /* ++ * Configure SPIs as non-secure Group-1. This will only matter ++ * if the GIC only has a single security state. This will not ++ * do the right thing if the kernel is running in secure mode, ++ * but that's not the intended use case anyway. ++ */ ++ for (i = 32; i < gic_data.irq_nr; i += 32) ++ writel_relaxed(~0, base + GICD_IGROUPR + i / 8); ++ ++ gic_dist_config(base, gic_data.irq_nr, gic_dist_wait_for_rwp); ++ ++ /* Enable distributor with ARE, Group1 */ ++ writel_relaxed(GICD_CTLR_ARE_NS | GICD_CTLR_ENABLE_G1A | GICD_CTLR_ENABLE_G1, ++ base + GICD_CTLR); ++ ++ /* ++ * Set all global interrupts to the boot CPU only. ARE must be ++ * enabled. ++ */ ++ affinity = gic_mpidr_to_affinity(cpu_logical_map(smp_processor_id())); ++ for (i = 32; i < gic_data.irq_nr; i++) ++ gic_write_irouter(affinity, base + GICD_IROUTER + i * 8); ++} ++ ++static int gic_iterate_rdists(int (*fn)(struct redist_region *, void __iomem *)) ++{ ++ int ret = -ENODEV; ++ int i; ++ ++ for (i = 0; i < gic_data.nr_redist_regions; i++) { ++ void __iomem *ptr = gic_data.redist_regions[i].redist_base; ++ u64 typer; ++ u32 reg; ++ ++ reg = readl_relaxed(ptr + GICR_PIDR2) & GIC_PIDR2_ARCH_MASK; ++ if (reg != GIC_PIDR2_ARCH_GICv3 && ++ reg != GIC_PIDR2_ARCH_GICv4) { /* We're in trouble... */ ++ pr_warn("No redistributor present @%p\n", ptr); ++ break; ++ } ++ ++ do { ++ typer = gic_read_typer(ptr + GICR_TYPER); ++ ret = fn(gic_data.redist_regions + i, ptr); ++ if (!ret) ++ return 0; ++ ++ if (gic_data.redist_regions[i].single_redist) ++ break; ++ ++ if (gic_data.redist_stride) { ++ ptr += gic_data.redist_stride; ++ } else { ++ ptr += SZ_64K * 2; /* Skip RD_base + SGI_base */ ++ if (typer & GICR_TYPER_VLPIS) ++ ptr += SZ_64K * 2; /* Skip VLPI_base + reserved page */ ++ } ++ } while (!(typer & GICR_TYPER_LAST)); ++ } ++ ++ return ret ? -ENODEV : 0; ++} ++ ++static int __gic_populate_rdist(struct redist_region *region, void __iomem *ptr) ++{ ++ unsigned long mpidr = cpu_logical_map(smp_processor_id()); ++ u64 typer; ++ u32 aff; ++ ++ /* ++ * Convert affinity to a 32bit value that can be matched to ++ * GICR_TYPER bits [63:32]. ++ */ ++ aff = (MPIDR_AFFINITY_LEVEL(mpidr, 3) << 24 | ++ MPIDR_AFFINITY_LEVEL(mpidr, 2) << 16 | ++ MPIDR_AFFINITY_LEVEL(mpidr, 1) << 8 | ++ MPIDR_AFFINITY_LEVEL(mpidr, 0)); ++ ++ typer = gic_read_typer(ptr + GICR_TYPER); ++ if ((typer >> 32) == aff) { ++ u64 offset = ptr - region->redist_base; ++ gic_data_rdist_rd_base() = ptr; ++ gic_data_rdist()->phys_base = region->phys_base + offset; ++ ++ pr_info("CPU%d: found redistributor %lx region %d:%pa\n", ++ smp_processor_id(), mpidr, ++ (int)(region - gic_data.redist_regions), ++ &gic_data_rdist()->phys_base); ++ return 0; ++ } ++ ++ /* Try next one */ ++ return 1; ++} ++ ++static int gic_populate_rdist(void) ++{ ++ if (gic_iterate_rdists(__gic_populate_rdist) == 0) ++ return 0; ++ ++ /* We couldn't even deal with ourselves... */ ++ WARN(true, "CPU%d: mpidr %lx has no re-distributor!\n", ++ smp_processor_id(), ++ (unsigned long)cpu_logical_map(smp_processor_id())); ++ return -ENODEV; ++} ++ ++static int __gic_update_vlpi_properties(struct redist_region *region, ++ void __iomem *ptr) ++{ ++ u64 typer = gic_read_typer(ptr + GICR_TYPER); ++ gic_data.rdists.has_vlpis &= !!(typer & GICR_TYPER_VLPIS); ++ gic_data.rdists.has_direct_lpi &= !!(typer & GICR_TYPER_DirectLPIS); ++ ++ return 1; ++} ++ ++static void gic_update_vlpi_properties(void) ++{ ++ gic_iterate_rdists(__gic_update_vlpi_properties); ++ pr_info("%sVLPI support, %sdirect LPI support\n", ++ !gic_data.rdists.has_vlpis ? "no " : "", ++ !gic_data.rdists.has_direct_lpi ? "no " : ""); ++} ++ ++/* Check whether it's single security state view */ ++static inline bool gic_dist_security_disabled(void) ++{ ++ return readl_relaxed(gic_data.dist_base + GICD_CTLR) & GICD_CTLR_DS; ++} ++ ++static void gic_cpu_sys_reg_init(void) ++{ ++ int i, cpu = smp_processor_id(); ++ u64 mpidr = cpu_logical_map(cpu); ++ u64 need_rss = MPIDR_RS(mpidr); ++ bool group0; ++ u32 pribits; ++ ++ /* ++ * Need to check that the SRE bit has actually been set. If ++ * not, it means that SRE is disabled at EL2. We're going to ++ * die painfully, and there is nothing we can do about it. ++ * ++ * Kindly inform the luser. ++ */ ++ if (!gic_enable_sre()) ++ pr_err("GIC: unable to set SRE (disabled at EL2), panic ahead\n"); ++ ++ pribits = gic_get_pribits(); ++ ++ group0 = gic_has_group0(); ++ ++ /* Set priority mask register */ ++ if (!gic_prio_masking_enabled()) { ++ write_gicreg(DEFAULT_PMR_VALUE, ICC_PMR_EL1); ++ } else { ++ /* ++ * Mismatch configuration with boot CPU, the system is likely ++ * to die as interrupt masking will not work properly on all ++ * CPUs ++ */ ++ WARN_ON(gic_supports_nmi() && group0 && ++ !gic_dist_security_disabled()); ++ } ++ ++ /* ++ * Some firmwares hand over to the kernel with the BPR changed from ++ * its reset value (and with a value large enough to prevent ++ * any pre-emptive interrupts from working at all). Writing a zero ++ * to BPR restores is reset value. ++ */ ++ gic_write_bpr1(0); ++ ++ if (static_branch_likely(&supports_deactivate_key)) { ++ /* EOI drops priority only (mode 1) */ ++ gic_write_ctlr(ICC_CTLR_EL1_EOImode_drop); ++ } else { ++ /* EOI deactivates interrupt too (mode 0) */ ++ gic_write_ctlr(ICC_CTLR_EL1_EOImode_drop_dir); ++ } ++ ++ /* Always whack Group0 before Group1 */ ++ if (group0) { ++ switch(pribits) { ++ case 8: ++ case 7: ++ write_gicreg(0, ICC_AP0R3_EL1); ++ write_gicreg(0, ICC_AP0R2_EL1); ++ case 6: ++ write_gicreg(0, ICC_AP0R1_EL1); ++ case 5: ++ case 4: ++ write_gicreg(0, ICC_AP0R0_EL1); ++ } ++ ++ isb(); ++ } ++ ++ switch(pribits) { ++ case 8: ++ case 7: ++ write_gicreg(0, ICC_AP1R3_EL1); ++ write_gicreg(0, ICC_AP1R2_EL1); ++ case 6: ++ write_gicreg(0, ICC_AP1R1_EL1); ++ case 5: ++ case 4: ++ write_gicreg(0, ICC_AP1R0_EL1); ++ } ++ ++ isb(); ++ ++ /* ... and let's hit the road... */ ++ gic_write_grpen1(1); ++ ++ /* Keep the RSS capability status in per_cpu variable */ ++ per_cpu(has_rss, cpu) = !!(gic_read_ctlr() & ICC_CTLR_EL1_RSS); ++ ++ /* Check all the CPUs have capable of sending SGIs to other CPUs */ ++ for_each_online_cpu(i) { ++ bool have_rss = per_cpu(has_rss, i) && per_cpu(has_rss, cpu); ++ ++ need_rss |= MPIDR_RS(cpu_logical_map(i)); ++ if (need_rss && (!have_rss)) ++ pr_crit("CPU%d (%lx) can't SGI CPU%d (%lx), no RSS\n", ++ cpu, (unsigned long)mpidr, ++ i, (unsigned long)cpu_logical_map(i)); ++ } ++ ++ /** ++ * GIC spec says, when ICC_CTLR_EL1.RSS==1 and GICD_TYPER.RSS==0, ++ * writing ICC_ASGI1R_EL1 register with RS != 0 is a CONSTRAINED ++ * UNPREDICTABLE choice of : ++ * - The write is ignored. ++ * - The RS field is treated as 0. ++ */ ++ if (need_rss && (!gic_data.has_rss)) ++ pr_crit_once("RSS is required but GICD doesn't support it\n"); ++} ++ ++static bool gicv3_nolpi; ++ ++static int __init gicv3_nolpi_cfg(char *buf) ++{ ++ return strtobool(buf, &gicv3_nolpi); ++} ++early_param("irqchip.gicv3_nolpi", gicv3_nolpi_cfg); ++ ++static int gic_dist_supports_lpis(void) ++{ ++ return (IS_ENABLED(CONFIG_ARM_GIC_V3_ITS) && ++ !!(readl_relaxed(gic_data.dist_base + GICD_TYPER) & GICD_TYPER_LPIS) && ++ !gicv3_nolpi); ++} ++ ++static void gic_cpu_init(void) ++{ ++ void __iomem *rbase; ++ ++ /* Register ourselves with the rest of the world */ ++ if (gic_populate_rdist()) ++ return; ++ ++ gic_enable_redist(true); ++ ++ rbase = gic_data_rdist_sgi_base(); ++ ++ /* Configure SGIs/PPIs as non-secure Group-1 */ ++ writel_relaxed(~0, rbase + GICR_IGROUPR0); ++ ++ gic_cpu_config(rbase, gic_redist_wait_for_rwp); ++ ++ if (gic_supports_nmi()) ++ ipi_set_nmi_prio(rbase, GICD_INT_NMI_PRI); ++ ++ /* initialise system registers */ ++ gic_cpu_sys_reg_init(); ++} ++ ++#ifdef CONFIG_ASCEND_INIT_ALL_GICR ++struct workaround_oem_info { ++ char oem_id[ACPI_OEM_ID_SIZE + 1]; ++ char oem_table_id[ACPI_OEM_TABLE_ID_SIZE + 1]; ++ u32 oem_revision; ++}; ++ ++static struct workaround_oem_info gicr_wkrd_info[] = { ++ { ++ .oem_id = "HISI ", ++ .oem_table_id = "HIP08 ", ++ .oem_revision = 0x300, ++ }, { ++ .oem_id = "HISI ", ++ .oem_table_id = "HIP08 ", ++ .oem_revision = 0x301, ++ }, { ++ .oem_id = "HISI ", ++ .oem_table_id = "HIP08 ", ++ .oem_revision = 0x400, ++ }, { ++ .oem_id = "HISI ", ++ .oem_table_id = "HIP08 ", ++ .oem_revision = 0x401, ++ }, { ++ .oem_id = "HISI ", ++ .oem_table_id = "HIP08 ", ++ .oem_revision = 0x402, ++ } ++}; ++ ++static void gic_check_hisi_workaround(void) ++{ ++ struct acpi_table_header *tbl; ++ acpi_status status = AE_OK; ++ int i; ++ ++ status = acpi_get_table(ACPI_SIG_MADT, 0, &tbl); ++ if (ACPI_FAILURE(status) || !tbl) ++ return; ++ ++ for (i = 0; i < ARRAY_SIZE(gicr_wkrd_info); i++) { ++ if (!memcmp(gicr_wkrd_info[i].oem_id, tbl->oem_id, ACPI_OEM_ID_SIZE) && ++ !memcmp(gicr_wkrd_info[i].oem_table_id, tbl->oem_table_id, ACPI_OEM_TABLE_ID_SIZE) && ++ gicr_wkrd_info[i].oem_revision == tbl->oem_revision) { ++ its_enable_init_all_gicr(); ++ break; ++ } ++ } ++ ++ acpi_put_table(tbl); ++} ++ ++static void gic_compute_nr_gicr(void) ++{ ++ int i; ++ int sum = 0; ++ ++ for (i = 0; i < gic_data.nr_redist_regions; i++) { ++ u64 typer; ++ void __iomem *ptr = gic_data.redist_regions[i].redist_base; ++ ++ do { ++ typer = gic_read_typer(ptr + GICR_TYPER); ++ sum++; ++ ++ if (gic_data.redist_regions[i].single_redist) ++ break; ++ ++ if (gic_data.redist_stride) { ++ ptr += gic_data.redist_stride; ++ } else { ++ ptr += SZ_64K * 2; /* Skip RD_base + SGI_base */ ++ if (typer & GICR_TYPER_VLPIS) ++ /* Skip VLPI_base + reserved page */ ++ ptr += SZ_64K * 2; ++ } ++ } while (!(typer & GICR_TYPER_LAST)); ++ } ++ ++ its_set_gicr_nr(sum); ++} ++ ++static void gic_enable_redist_others(void __iomem *rbase, bool enable) ++{ ++ u32 count = 1000000; /* 1s! */ ++ u32 val; ++ ++ val = readl_relaxed(rbase + GICR_WAKER); ++ if (enable) ++ /* Wake up this CPU redistributor */ ++ val &= ~GICR_WAKER_ProcessorSleep; ++ else ++ val |= GICR_WAKER_ProcessorSleep; ++ writel_relaxed(val, rbase + GICR_WAKER); ++ ++ if (!enable) { /* Check that GICR_WAKER is writeable */ ++ val = readl_relaxed(rbase + GICR_WAKER); ++ if (!(val & GICR_WAKER_ProcessorSleep)) ++ return; /* No PM support in this redistributor */ ++ } ++ ++ while (--count) { ++ val = readl_relaxed(rbase + GICR_WAKER); ++ if (enable ^ (bool)(val & GICR_WAKER_ChildrenAsleep)) ++ break; ++ cpu_relax(); ++ udelay(1); ++ }; ++ if (!count) ++ pr_err_ratelimited("redistributor failed to %s...\n", ++ enable ? "wakeup" : "sleep"); ++} ++ ++static int gic_rdist_cpu(void __iomem *ptr, unsigned int cpu) ++{ ++ unsigned long mpidr = cpu_logical_map(cpu); ++ u64 typer; ++ u32 aff; ++ ++ /* ++ * Convert affinity to a 32bit value that can be matched to ++ * GICR_TYPER bits [63:32]. ++ */ ++ aff = (MPIDR_AFFINITY_LEVEL(mpidr, 3) << 24 | ++ MPIDR_AFFINITY_LEVEL(mpidr, 2) << 16 | ++ MPIDR_AFFINITY_LEVEL(mpidr, 1) << 8 | ++ MPIDR_AFFINITY_LEVEL(mpidr, 0)); ++ ++ typer = gic_read_typer(ptr + GICR_TYPER); ++ if ((typer >> 32) == aff) ++ return 0; ++ ++ return 1; ++} ++ ++static int gic_rdist_cpus(void __iomem *ptr) ++{ ++ unsigned int i; ++ ++ for (i = 0; i < nr_cpu_ids; i++) { ++ if (gic_rdist_cpu(ptr, i) == 0) ++ return 0; ++ } ++ ++ return 1; ++} ++ ++static void gic_cpu_init_others(void) ++{ ++ int i, cpu = nr_cpu_ids; ++ int gicr_nr = its_gicr_nr(); ++ ++ if (!its_init_all_gicr()) ++ return; ++ ++ for (i = 0; i < gic_data.nr_redist_regions; i++) { ++ u64 typer; ++ void __iomem *redist_base = ++ gic_data.redist_regions[i].redist_base; ++ phys_addr_t phys_base = gic_data.redist_regions[i].phys_base; ++ ++ do { ++ typer = gic_read_typer(redist_base + GICR_TYPER); ++ ++ if (gic_rdist_cpus(redist_base) == 1) { ++ if (cpu >= gicr_nr) { ++ pr_err("CPU over GICR number.\n"); ++ break; ++ } ++ gic_enable_redist_others(redist_base, true); ++ ++ if (gic_dist_supports_lpis()) ++ its_cpu_init_others(redist_base, phys_base, cpu); ++ cpu++; ++ } ++ ++ if (gic_data.redist_regions[i].single_redist) ++ break; ++ ++ if (gic_data.redist_stride) { ++ redist_base += gic_data.redist_stride; ++ phys_base += gic_data.redist_stride; ++ } else { ++ /* Skip RD_base + SGI_base */ ++ redist_base += SZ_64K * 2; ++ phys_base += SZ_64K * 2; ++ if (typer & GICR_TYPER_VLPIS) { ++ /* Skip VLPI_base + reserved page */ ++ redist_base += SZ_64K * 2; ++ phys_base += SZ_64K * 2; ++ } ++ } ++ } while (!(typer & GICR_TYPER_LAST)); ++ } ++} ++#else ++static inline void gic_check_hisi_workaround(void) {} ++ ++static inline void gic_compute_nr_gicr(void) {} ++ ++static inline void gic_cpu_init_others(void) {} ++#endif ++ ++#ifdef CONFIG_SMP ++ ++#define MPIDR_TO_SGI_RS(mpidr) (MPIDR_RS(mpidr) << ICC_SGI1R_RS_SHIFT) ++#define MPIDR_TO_SGI_CLUSTER_ID(mpidr) ((mpidr) & ~0xFUL) ++ ++static int gic_starting_cpu(unsigned int cpu) ++{ ++ gic_cpu_init(); ++ ++ if (gic_dist_supports_lpis()) ++ its_cpu_init(); ++ ++ return 0; ++} ++ ++static u16 gic_compute_target_list(int *base_cpu, const struct cpumask *mask, ++ unsigned long cluster_id) ++{ ++ int next_cpu, cpu = *base_cpu; ++ unsigned long mpidr = cpu_logical_map(cpu); ++ u16 tlist = 0; ++ ++ while (cpu < nr_cpu_ids) { ++ tlist |= 1 << (mpidr & 0xf); ++ ++ next_cpu = cpumask_next(cpu, mask); ++ if (next_cpu >= nr_cpu_ids) ++ goto out; ++ cpu = next_cpu; ++ ++ mpidr = cpu_logical_map(cpu); ++ ++ if (cluster_id != MPIDR_TO_SGI_CLUSTER_ID(mpidr)) { ++ cpu--; ++ goto out; ++ } ++ } ++out: ++ *base_cpu = cpu; ++ return tlist; ++} ++ ++#define MPIDR_TO_SGI_AFFINITY(cluster_id, level) \ ++ (MPIDR_AFFINITY_LEVEL(cluster_id, level) \ ++ << ICC_SGI1R_AFFINITY_## level ##_SHIFT) ++ ++static void gic_send_sgi(u64 cluster_id, u16 tlist, unsigned int irq) ++{ ++ u64 val; ++ ++ val = (MPIDR_TO_SGI_AFFINITY(cluster_id, 3) | ++ MPIDR_TO_SGI_AFFINITY(cluster_id, 2) | ++ irq << ICC_SGI1R_SGI_ID_SHIFT | ++ MPIDR_TO_SGI_AFFINITY(cluster_id, 1) | ++ MPIDR_TO_SGI_RS(cluster_id) | ++ tlist << ICC_SGI1R_TARGET_LIST_SHIFT); ++ ++ pr_devel("CPU%d: ICC_SGI1R_EL1 %llx\n", smp_processor_id(), val); ++ gic_write_sgi1r(val); ++} ++ ++static void gic_raise_softirq(const struct cpumask *mask, unsigned int irq) ++{ ++ int cpu; ++ ++ if (WARN_ON(irq >= 16)) ++ return; ++ ++ /* ++ * Ensure that stores to Normal memory are visible to the ++ * other CPUs before issuing the IPI. ++ */ ++ wmb(); ++ ++ for_each_cpu(cpu, mask) { ++ u64 cluster_id = MPIDR_TO_SGI_CLUSTER_ID(cpu_logical_map(cpu)); ++ u16 tlist; ++ ++ tlist = gic_compute_target_list(&cpu, mask, cluster_id); ++ gic_send_sgi(cluster_id, tlist, irq); ++ } ++ ++ /* Force the above writes to ICC_SGI1R_EL1 to be executed */ ++ isb(); ++} ++ ++static void gic_smp_init(void) ++{ ++ set_smp_cross_call(gic_raise_softirq); ++ cpuhp_setup_state_nocalls(CPUHP_AP_IRQ_GIC_STARTING, ++ "irqchip/arm/gicv3:starting", ++ gic_starting_cpu, NULL); ++} ++ ++static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val, ++ bool force) ++{ ++ unsigned int cpu; ++ void __iomem *reg; ++ int enabled; ++ u64 val; ++ ++ if (force) ++ cpu = cpumask_first(mask_val); ++ else ++ cpu = cpumask_any_and(mask_val, cpu_online_mask); ++ ++ if (cpu >= nr_cpu_ids) ++ return -EINVAL; ++ ++ if (gic_irq_in_rdist(d)) ++ return -EINVAL; ++ ++ /* If interrupt was enabled, disable it first */ ++ enabled = gic_peek_irq(d, GICD_ISENABLER); ++ if (enabled) ++ gic_mask_irq(d); ++ ++ reg = gic_dist_base(d) + GICD_IROUTER + (gic_irq(d) * 8); ++ val = gic_mpidr_to_affinity(cpu_logical_map(cpu)); ++ ++ gic_write_irouter(val, reg); ++ ++ /* ++ * If the interrupt was enabled, enabled it again. Otherwise, ++ * just wait for the distributor to have digested our changes. ++ */ ++ if (enabled) ++ gic_unmask_irq(d); ++ else ++ gic_dist_wait_for_rwp(); ++ ++ irq_data_update_effective_affinity(d, cpumask_of(cpu)); ++ ++ return IRQ_SET_MASK_OK_DONE; ++} ++#else ++#define gic_set_affinity NULL ++#define gic_smp_init() do { } while(0) ++#endif ++ ++#ifdef CONFIG_CPU_PM ++static int gic_cpu_pm_notifier(struct notifier_block *self, ++ unsigned long cmd, void *v) ++{ ++ if (cmd == CPU_PM_EXIT) { ++ if (gic_dist_security_disabled()) ++ gic_enable_redist(true); ++ gic_cpu_sys_reg_init(); ++ } else if (cmd == CPU_PM_ENTER && gic_dist_security_disabled()) { ++ gic_write_grpen1(0); ++ gic_enable_redist(false); ++ } ++ return NOTIFY_OK; ++} ++ ++static struct notifier_block gic_cpu_pm_notifier_block = { ++ .notifier_call = gic_cpu_pm_notifier, ++}; ++ ++static void gic_cpu_pm_init(void) ++{ ++ cpu_pm_register_notifier(&gic_cpu_pm_notifier_block); ++} ++ ++#else ++static inline void gic_cpu_pm_init(void) { } ++#endif /* CONFIG_CPU_PM */ ++ ++static struct irq_chip gic_chip = { ++ .name = "GICv3", ++ .irq_mask = gic_mask_irq, ++ .irq_unmask = gic_unmask_irq, ++ .irq_eoi = gic_eoi_irq, ++ .irq_set_type = gic_set_type, ++ .irq_set_affinity = gic_set_affinity, ++ .irq_get_irqchip_state = gic_irq_get_irqchip_state, ++ .irq_set_irqchip_state = gic_irq_set_irqchip_state, ++ .irq_nmi_setup = gic_irq_nmi_setup, ++ .irq_nmi_teardown = gic_irq_nmi_teardown, ++ .flags = IRQCHIP_SET_TYPE_MASKED | ++ IRQCHIP_SKIP_SET_WAKE | ++ IRQCHIP_MASK_ON_SUSPEND, ++}; ++ ++static struct irq_chip gic_eoimode1_chip = { ++ .name = "GICv3", ++ .irq_mask = gic_eoimode1_mask_irq, ++ .irq_unmask = gic_unmask_irq, ++ .irq_eoi = gic_eoimode1_eoi_irq, ++ .irq_set_type = gic_set_type, ++ .irq_set_affinity = gic_set_affinity, ++ .irq_get_irqchip_state = gic_irq_get_irqchip_state, ++ .irq_set_irqchip_state = gic_irq_set_irqchip_state, ++ .irq_set_vcpu_affinity = gic_irq_set_vcpu_affinity, ++ .irq_nmi_setup = gic_irq_nmi_setup, ++ .irq_nmi_teardown = gic_irq_nmi_teardown, ++ .flags = IRQCHIP_SET_TYPE_MASKED | ++ IRQCHIP_SKIP_SET_WAKE | ++ IRQCHIP_MASK_ON_SUSPEND, ++}; ++ ++#define GIC_ID_NR (1U << GICD_TYPER_ID_BITS(gic_data.rdists.gicd_typer)) ++ ++static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq, ++ irq_hw_number_t hw) ++{ ++ struct irq_chip *chip = &gic_chip; ++ ++ if (static_branch_likely(&supports_deactivate_key)) ++ chip = &gic_eoimode1_chip; ++ ++ /* SGIs are private to the core kernel */ ++ if (hw < 16) ++ return -EPERM; ++ /* Nothing here */ ++ if (hw >= gic_data.irq_nr && hw < 8192) ++ return -EPERM; ++ /* Off limits */ ++ if (hw >= GIC_ID_NR) ++ return -EPERM; ++ ++ /* PPIs */ ++ if (hw < 32) { ++ irq_set_percpu_devid(irq); ++ irq_domain_set_info(d, irq, hw, chip, d->host_data, ++ handle_percpu_devid_irq, NULL, NULL); ++ irq_set_status_flags(irq, IRQ_NOAUTOEN); ++ } ++ /* SPIs */ ++ if (hw >= 32 && hw < gic_data.irq_nr) { ++ irq_domain_set_info(d, irq, hw, chip, d->host_data, ++ handle_fasteoi_irq, NULL, NULL); ++ irq_set_probe(irq); ++ irqd_set_single_target(irq_desc_get_irq_data(irq_to_desc(irq))); ++ } ++ /* LPIs */ ++ if (hw >= 8192 && hw < GIC_ID_NR) { ++ if (!gic_dist_supports_lpis()) ++ return -EPERM; ++ irq_domain_set_info(d, irq, hw, chip, d->host_data, ++ handle_fasteoi_irq, NULL, NULL); ++ } ++ ++ return 0; ++} ++ ++#define GIC_IRQ_TYPE_PARTITION (GIC_IRQ_TYPE_LPI + 1) ++ ++static int gic_irq_domain_translate(struct irq_domain *d, ++ struct irq_fwspec *fwspec, ++ unsigned long *hwirq, ++ unsigned int *type) ++{ ++ if (is_of_node(fwspec->fwnode)) { ++ if (fwspec->param_count < 3) ++ return -EINVAL; ++ ++ switch (fwspec->param[0]) { ++ case 0: /* SPI */ ++ *hwirq = fwspec->param[1] + 32; ++ break; ++ case 1: /* PPI */ ++ case GIC_IRQ_TYPE_PARTITION: ++ *hwirq = fwspec->param[1] + 16; ++ break; ++ case GIC_IRQ_TYPE_LPI: /* LPI */ ++ *hwirq = fwspec->param[1]; ++ break; ++ default: ++ return -EINVAL; ++ } ++ ++ *type = fwspec->param[2] & IRQ_TYPE_SENSE_MASK; ++ ++ /* ++ * Make it clear that broken DTs are... broken. ++ * Partitionned PPIs are an unfortunate exception. ++ */ ++ WARN_ON(*type == IRQ_TYPE_NONE && ++ fwspec->param[0] != GIC_IRQ_TYPE_PARTITION); ++ return 0; ++ } ++ ++ if (is_fwnode_irqchip(fwspec->fwnode)) { ++ if(fwspec->param_count != 2) ++ return -EINVAL; ++ ++ *hwirq = fwspec->param[0]; ++ *type = fwspec->param[1]; ++ ++ WARN_ON(*type == IRQ_TYPE_NONE); ++ return 0; ++ } ++ ++ return -EINVAL; ++} ++ ++static int gic_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, ++ unsigned int nr_irqs, void *arg) ++{ ++ int i, ret; ++ irq_hw_number_t hwirq; ++ unsigned int type = IRQ_TYPE_NONE; ++ struct irq_fwspec *fwspec = arg; ++ ++ ret = gic_irq_domain_translate(domain, fwspec, &hwirq, &type); ++ if (ret) ++ return ret; ++ ++ for (i = 0; i < nr_irqs; i++) { ++ ret = gic_irq_domain_map(domain, virq + i, hwirq + i); ++ if (ret) ++ return ret; ++ } ++ ++ return 0; ++} ++ ++static void gic_irq_domain_free(struct irq_domain *domain, unsigned int virq, ++ unsigned int nr_irqs) ++{ ++ int i; ++ ++ for (i = 0; i < nr_irqs; i++) { ++ struct irq_data *d = irq_domain_get_irq_data(domain, virq + i); ++ irq_set_handler(virq + i, NULL); ++ irq_domain_reset_irq_data(d); ++ } ++} ++ ++static int gic_irq_domain_select(struct irq_domain *d, ++ struct irq_fwspec *fwspec, ++ enum irq_domain_bus_token bus_token) ++{ ++ /* Not for us */ ++ if (fwspec->fwnode != d->fwnode) ++ return 0; ++ ++ /* If this is not DT, then we have a single domain */ ++ if (!is_of_node(fwspec->fwnode)) ++ return 1; ++ ++ /* ++ * If this is a PPI and we have a 4th (non-null) parameter, ++ * then we need to match the partition domain. ++ */ ++ if (fwspec->param_count >= 4 && ++ fwspec->param[0] == 1 && fwspec->param[3] != 0) ++ return d == partition_get_domain(gic_data.ppi_descs[fwspec->param[1]]); ++ ++ return d == gic_data.domain; ++} ++ ++static const struct irq_domain_ops gic_irq_domain_ops = { ++ .translate = gic_irq_domain_translate, ++ .alloc = gic_irq_domain_alloc, ++ .free = gic_irq_domain_free, ++ .select = gic_irq_domain_select, ++}; ++ ++static int partition_domain_translate(struct irq_domain *d, ++ struct irq_fwspec *fwspec, ++ unsigned long *hwirq, ++ unsigned int *type) ++{ ++ struct device_node *np; ++ int ret; ++ ++ np = of_find_node_by_phandle(fwspec->param[3]); ++ if (WARN_ON(!np)) ++ return -EINVAL; ++ ++ ret = partition_translate_id(gic_data.ppi_descs[fwspec->param[1]], ++ of_node_to_fwnode(np)); ++ if (ret < 0) ++ return ret; ++ ++ *hwirq = ret; ++ *type = fwspec->param[2] & IRQ_TYPE_SENSE_MASK; ++ ++ return 0; ++} ++ ++static const struct irq_domain_ops partition_domain_ops = { ++ .translate = partition_domain_translate, ++ .select = gic_irq_domain_select, ++}; ++ ++static void gic_enable_nmi_support(void) ++{ ++ int i; ++ ++ for (i = 0; i < 16; i++) ++ refcount_set(&ppi_nmi_refs[i], 0); ++ ++ /* ++ * Linux itself doesn't use 1:N distribution, so has no need to ++ * set PMHE. The only reason to have it set is if EL3 requires it ++ * (and we can't change it). ++ */ ++ if (gic_read_ctlr() & ICC_CTLR_EL1_PMHE_MASK) ++ static_branch_enable(&gic_pmr_sync); ++ ++ pr_info("%s ICC_PMR_EL1 synchronisation\n", ++ static_branch_unlikely(&gic_pmr_sync) ? "Forcing" : "Relaxing"); ++ ++ static_branch_enable(&supports_pseudo_nmis); ++ ++ if (static_branch_likely(&supports_deactivate_key)) ++ gic_eoimode1_chip.flags |= IRQCHIP_SUPPORTS_NMI; ++ else ++ gic_chip.flags |= IRQCHIP_SUPPORTS_NMI; ++} ++ ++static int __init gic_init_bases(void __iomem *dist_base, ++ struct redist_region *rdist_regs, ++ u32 nr_redist_regions, ++ u64 redist_stride, ++ struct fwnode_handle *handle) ++{ ++ u32 typer; ++ int gic_irqs; ++ int err; ++ ++ if (!is_hyp_mode_available()) ++ static_branch_disable(&supports_deactivate_key); ++ ++ if (static_branch_likely(&supports_deactivate_key)) ++ pr_info("GIC: Using split EOI/Deactivate mode\n"); ++ ++ gic_data.fwnode = handle; ++ gic_data.dist_base = dist_base; ++ gic_data.redist_regions = rdist_regs; ++ gic_data.nr_redist_regions = nr_redist_regions; ++ gic_data.redist_stride = redist_stride; ++ ++ /* ++ * Find out how many interrupts are supported. ++ * The GIC only supports up to 1020 interrupt sources (SGI+PPI+SPI) ++ */ ++ typer = readl_relaxed(gic_data.dist_base + GICD_TYPER); ++ gic_data.rdists.gicd_typer = typer; ++ gic_irqs = GICD_TYPER_IRQS(typer); ++ if (gic_irqs > 1020) ++ gic_irqs = 1020; ++ gic_data.irq_nr = gic_irqs; ++ ++ gic_data.domain = irq_domain_create_tree(handle, &gic_irq_domain_ops, ++ &gic_data); ++ irq_domain_update_bus_token(gic_data.domain, DOMAIN_BUS_WIRED); ++ gic_data.rdists.rdist = alloc_percpu(typeof(*gic_data.rdists.rdist)); ++ gic_data.rdists.has_vlpis = true; ++ gic_data.rdists.has_direct_lpi = true; ++ gic_check_hisi_workaround(); ++ gic_compute_nr_gicr(); ++ ++ if (WARN_ON(!gic_data.domain) || WARN_ON(!gic_data.rdists.rdist)) { ++ err = -ENOMEM; ++ goto out_free; ++ } ++ ++ gic_data.has_rss = !!(typer & GICD_TYPER_RSS); ++ pr_info("Distributor has %sRange Selector support\n", ++ gic_data.has_rss ? "" : "no "); ++ ++ if (typer & GICD_TYPER_MBIS) { ++ err = mbi_init(handle, gic_data.domain); ++ if (err) ++ pr_err("Failed to initialize MBIs\n"); ++ } ++ ++ set_handle_irq(gic_handle_irq); ++ ++ gic_update_vlpi_properties(); ++ ++ /* ++ * NMI backtrace DFX need check nmi support, this should be ++ * called before enable NMI backtrace DFX. ++ */ ++ if (gic_prio_masking_enabled()) { ++ if (!gic_has_group0() || gic_dist_security_disabled()) ++ gic_enable_nmi_support(); ++ else ++ pr_warn("SCR_EL3.FIQ is cleared, cannot enable use of pseudo-NMIs\n"); ++ } ++ ++ gic_smp_init(); ++ gic_dist_init(); ++ gic_cpu_init(); ++ gic_cpu_pm_init(); ++ ++ if (gic_dist_supports_lpis()) { ++ its_init(handle, &gic_data.rdists, gic_data.domain); ++ its_cpu_init(); ++ } ++ ++ gic_cpu_init_others(); ++ ++ return 0; ++ ++out_free: ++ if (gic_data.domain) ++ irq_domain_remove(gic_data.domain); ++ free_percpu(gic_data.rdists.rdist); ++ return err; ++} ++ ++static int __init gic_validate_dist_version(void __iomem *dist_base) ++{ ++ u32 reg = readl_relaxed(dist_base + GICD_PIDR2) & GIC_PIDR2_ARCH_MASK; ++ ++ if (reg != GIC_PIDR2_ARCH_GICv3 && reg != GIC_PIDR2_ARCH_GICv4) ++ return -ENODEV; ++ ++ return 0; ++} ++ ++/* Create all possible partitions at boot time */ ++static void __init gic_populate_ppi_partitions(struct device_node *gic_node) ++{ ++ struct device_node *parts_node, *child_part; ++ int part_idx = 0, i; ++ int nr_parts; ++ struct partition_affinity *parts; ++ ++ parts_node = of_get_child_by_name(gic_node, "ppi-partitions"); ++ if (!parts_node) ++ return; ++ ++ nr_parts = of_get_child_count(parts_node); ++ ++ if (!nr_parts) ++ goto out_put_node; ++ ++ parts = kcalloc(nr_parts, sizeof(*parts), GFP_KERNEL); ++ if (WARN_ON(!parts)) ++ goto out_put_node; ++ ++ for_each_child_of_node(parts_node, child_part) { ++ struct partition_affinity *part; ++ int n; ++ ++ part = &parts[part_idx]; ++ ++ part->partition_id = of_node_to_fwnode(child_part); ++ ++ pr_info("GIC: PPI partition %s[%d] { ", ++ child_part->name, part_idx); ++ ++ n = of_property_count_elems_of_size(child_part, "affinity", ++ sizeof(u32)); ++ WARN_ON(n <= 0); ++ ++ for (i = 0; i < n; i++) { ++ int err, cpu; ++ u32 cpu_phandle; ++ struct device_node *cpu_node; ++ ++ err = of_property_read_u32_index(child_part, "affinity", ++ i, &cpu_phandle); ++ if (WARN_ON(err)) ++ continue; ++ ++ cpu_node = of_find_node_by_phandle(cpu_phandle); ++ if (WARN_ON(!cpu_node)) ++ continue; ++ ++ cpu = of_cpu_node_to_id(cpu_node); ++ if (WARN_ON(cpu < 0)) ++ continue; ++ ++ pr_cont("%pOF[%d] ", cpu_node, cpu); ++ ++ cpumask_set_cpu(cpu, &part->mask); ++ } ++ ++ pr_cont("}\n"); ++ part_idx++; ++ } ++ ++ for (i = 0; i < 16; i++) { ++ unsigned int irq; ++ struct partition_desc *desc; ++ struct irq_fwspec ppi_fwspec = { ++ .fwnode = gic_data.fwnode, ++ .param_count = 3, ++ .param = { ++ [0] = GIC_IRQ_TYPE_PARTITION, ++ [1] = i, ++ [2] = IRQ_TYPE_NONE, ++ }, ++ }; ++ ++ irq = irq_create_fwspec_mapping(&ppi_fwspec); ++ if (WARN_ON(!irq)) ++ continue; ++ desc = partition_create_desc(gic_data.fwnode, parts, nr_parts, ++ irq, &partition_domain_ops); ++ if (WARN_ON(!desc)) ++ continue; ++ ++ gic_data.ppi_descs[i] = desc; ++ } ++ ++out_put_node: ++ of_node_put(parts_node); ++} ++ ++static void __init gic_of_setup_kvm_info(struct device_node *node) ++{ ++ int ret; ++ struct resource r; ++ u32 gicv_idx; ++ ++ gic_v3_kvm_info.type = GIC_V3; ++ ++ gic_v3_kvm_info.maint_irq = irq_of_parse_and_map(node, 0); ++ if (!gic_v3_kvm_info.maint_irq) ++ return; ++ ++ if (of_property_read_u32(node, "#redistributor-regions", ++ &gicv_idx)) ++ gicv_idx = 1; ++ ++ gicv_idx += 3; /* Also skip GICD, GICC, GICH */ ++ ret = of_address_to_resource(node, gicv_idx, &r); ++ if (!ret) ++ gic_v3_kvm_info.vcpu = r; ++ ++ gic_v3_kvm_info.has_v4 = gic_data.rdists.has_vlpis; ++ gic_set_kvm_info(&gic_v3_kvm_info); ++} ++ ++static int __init gic_of_init(struct device_node *node, struct device_node *parent) ++{ ++ void __iomem *dist_base; ++ struct redist_region *rdist_regs; ++ u64 redist_stride; ++ u32 nr_redist_regions; ++ int err, i; ++ ++ dist_base = of_iomap(node, 0); ++ if (!dist_base) { ++ pr_err("%pOF: unable to map gic dist registers\n", node); ++ return -ENXIO; ++ } ++ ++ err = gic_validate_dist_version(dist_base); ++ if (err) { ++ pr_err("%pOF: no distributor detected, giving up\n", node); ++ goto out_unmap_dist; ++ } ++ ++ if (of_property_read_u32(node, "#redistributor-regions", &nr_redist_regions)) ++ nr_redist_regions = 1; ++ ++ rdist_regs = kcalloc(nr_redist_regions, sizeof(*rdist_regs), ++ GFP_KERNEL); ++ if (!rdist_regs) { ++ err = -ENOMEM; ++ goto out_unmap_dist; ++ } ++ ++ for (i = 0; i < nr_redist_regions; i++) { ++ struct resource res; ++ int ret; ++ ++ ret = of_address_to_resource(node, 1 + i, &res); ++ rdist_regs[i].redist_base = of_iomap(node, 1 + i); ++ if (ret || !rdist_regs[i].redist_base) { ++ pr_err("%pOF: couldn't map region %d\n", node, i); ++ err = -ENODEV; ++ goto out_unmap_rdist; ++ } ++ rdist_regs[i].phys_base = res.start; ++ } ++ ++ if (of_property_read_u64(node, "redistributor-stride", &redist_stride)) ++ redist_stride = 0; ++ ++ err = gic_init_bases(dist_base, rdist_regs, nr_redist_regions, ++ redist_stride, &node->fwnode); ++ if (err) ++ goto out_unmap_rdist; ++ ++ gic_populate_ppi_partitions(node); ++ ++ if (static_branch_likely(&supports_deactivate_key)) ++ gic_of_setup_kvm_info(node); ++ return 0; ++ ++out_unmap_rdist: ++ for (i = 0; i < nr_redist_regions; i++) ++ if (rdist_regs[i].redist_base) ++ iounmap(rdist_regs[i].redist_base); ++ kfree(rdist_regs); ++out_unmap_dist: ++ iounmap(dist_base); ++ return err; ++} ++ ++IRQCHIP_DECLARE(gic_v3, "arm,gic-v3", gic_of_init); ++ ++#ifdef CONFIG_ACPI ++static struct ++{ ++ void __iomem *dist_base; ++ struct redist_region *redist_regs; ++ u32 nr_redist_regions; ++ bool single_redist; ++ int enabled_rdists; ++ u32 maint_irq; ++ int maint_irq_mode; ++ phys_addr_t vcpu_base; ++} acpi_data __initdata; ++ ++static void __init ++gic_acpi_register_redist(phys_addr_t phys_base, void __iomem *redist_base) ++{ ++ static int count = 0; ++ ++ acpi_data.redist_regs[count].phys_base = phys_base; ++ acpi_data.redist_regs[count].redist_base = redist_base; ++ acpi_data.redist_regs[count].single_redist = acpi_data.single_redist; ++ count++; ++} ++ ++static int __init ++gic_acpi_parse_madt_redist(struct acpi_subtable_header *header, ++ const unsigned long end) ++{ ++ struct acpi_madt_generic_redistributor *redist = ++ (struct acpi_madt_generic_redistributor *)header; ++ void __iomem *redist_base; ++ ++ redist_base = ioremap(redist->base_address, redist->length); ++ if (!redist_base) { ++ pr_err("Couldn't map GICR region @%llx\n", redist->base_address); ++ return -ENOMEM; ++ } ++ ++ gic_acpi_register_redist(redist->base_address, redist_base); ++ return 0; ++} ++ ++static int __init ++gic_acpi_parse_madt_gicc(struct acpi_subtable_header *header, ++ const unsigned long end) ++{ ++ struct acpi_madt_generic_interrupt *gicc = ++ (struct acpi_madt_generic_interrupt *)header; ++ u32 reg = readl_relaxed(acpi_data.dist_base + GICD_PIDR2) & GIC_PIDR2_ARCH_MASK; ++ u32 size = reg == GIC_PIDR2_ARCH_GICv4 ? SZ_64K * 4 : SZ_64K * 2; ++ void __iomem *redist_base; ++ ++ /* GICC entry which has !ACPI_MADT_ENABLED is not unusable so skip */ ++ if (!(gicc->flags & ACPI_MADT_ENABLED)) ++ return 0; ++ ++ redist_base = ioremap(gicc->gicr_base_address, size); ++ if (!redist_base) ++ return -ENOMEM; ++ ++ gic_acpi_register_redist(gicc->gicr_base_address, redist_base); ++ return 0; ++} ++ ++static int __init gic_acpi_collect_gicr_base(void) ++{ ++ acpi_tbl_entry_handler redist_parser; ++ enum acpi_madt_type type; ++ ++ if (acpi_data.single_redist) { ++ type = ACPI_MADT_TYPE_GENERIC_INTERRUPT; ++ redist_parser = gic_acpi_parse_madt_gicc; ++ } else { ++ type = ACPI_MADT_TYPE_GENERIC_REDISTRIBUTOR; ++ redist_parser = gic_acpi_parse_madt_redist; ++ } ++ ++ /* Collect redistributor base addresses in GICR entries */ ++ if (acpi_table_parse_madt(type, redist_parser, 0) > 0) ++ return 0; ++ ++ pr_info("No valid GICR entries exist\n"); ++ return -ENODEV; ++} ++ ++static int __init gic_acpi_match_gicr(struct acpi_subtable_header *header, ++ const unsigned long end) ++{ ++ /* Subtable presence means that redist exists, that's it */ ++ return 0; ++} ++ ++static int __init gic_acpi_match_gicc(struct acpi_subtable_header *header, ++ const unsigned long end) ++{ ++ struct acpi_madt_generic_interrupt *gicc = ++ (struct acpi_madt_generic_interrupt *)header; ++ ++ /* ++ * If GICC is enabled and has valid gicr base address, then it means ++ * GICR base is presented via GICC ++ */ ++ if ((gicc->flags & ACPI_MADT_ENABLED) && gicc->gicr_base_address) { ++ acpi_data.enabled_rdists++; ++ return 0; ++ } ++ ++ /* ++ * It's perfectly valid firmware can pass disabled GICC entry, driver ++ * should not treat as errors, skip the entry instead of probe fail. ++ */ ++ if (!(gicc->flags & ACPI_MADT_ENABLED)) ++ return 0; ++ ++ return -ENODEV; ++} ++ ++static int __init gic_acpi_count_gicr_regions(void) ++{ ++ int count; ++ ++ /* ++ * Count how many redistributor regions we have. It is not allowed ++ * to mix redistributor description, GICR and GICC subtables have to be ++ * mutually exclusive. ++ */ ++ count = acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_REDISTRIBUTOR, ++ gic_acpi_match_gicr, 0); ++ if (count > 0) { ++ acpi_data.single_redist = false; ++ return count; ++ } ++ ++ count = acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_INTERRUPT, ++ gic_acpi_match_gicc, 0); ++ if (count > 0) { ++ acpi_data.single_redist = true; ++ count = acpi_data.enabled_rdists; ++ } ++ ++ return count; ++} ++ ++static bool __init acpi_validate_gic_table(struct acpi_subtable_header *header, ++ struct acpi_probe_entry *ape) ++{ ++ struct acpi_madt_generic_distributor *dist; ++ int count; ++ ++ dist = (struct acpi_madt_generic_distributor *)header; ++ if (dist->version != ape->driver_data) ++ return false; ++ ++ /* We need to do that exercise anyway, the sooner the better */ ++ count = gic_acpi_count_gicr_regions(); ++ if (count <= 0) ++ return false; ++ ++ acpi_data.nr_redist_regions = count; ++ return true; ++} ++ ++static int __init gic_acpi_parse_virt_madt_gicc(struct acpi_subtable_header *header, ++ const unsigned long end) ++{ ++ struct acpi_madt_generic_interrupt *gicc = ++ (struct acpi_madt_generic_interrupt *)header; ++ int maint_irq_mode; ++ static int first_madt = true; ++ ++ /* Skip unusable CPUs */ ++ if (!(gicc->flags & ACPI_MADT_ENABLED)) ++ return 0; ++ ++ maint_irq_mode = (gicc->flags & ACPI_MADT_VGIC_IRQ_MODE) ? ++ ACPI_EDGE_SENSITIVE : ACPI_LEVEL_SENSITIVE; ++ ++ if (first_madt) { ++ first_madt = false; ++ ++ acpi_data.maint_irq = gicc->vgic_interrupt; ++ acpi_data.maint_irq_mode = maint_irq_mode; ++ acpi_data.vcpu_base = gicc->gicv_base_address; ++ ++ return 0; ++ } ++ ++ /* ++ * The maintenance interrupt and GICV should be the same for every CPU ++ */ ++ if ((acpi_data.maint_irq != gicc->vgic_interrupt) || ++ (acpi_data.maint_irq_mode != maint_irq_mode) || ++ (acpi_data.vcpu_base != gicc->gicv_base_address)) ++ return -EINVAL; ++ ++ return 0; ++} ++ ++static bool __init gic_acpi_collect_virt_info(void) ++{ ++ int count; ++ ++ count = acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_INTERRUPT, ++ gic_acpi_parse_virt_madt_gicc, 0); ++ ++ return (count > 0); ++} ++ ++#define ACPI_GICV3_DIST_MEM_SIZE (SZ_64K) ++#define ACPI_GICV2_VCTRL_MEM_SIZE (SZ_4K) ++#define ACPI_GICV2_VCPU_MEM_SIZE (SZ_8K) ++ ++static void __init gic_acpi_setup_kvm_info(void) ++{ ++ int irq; ++ ++ if (!gic_acpi_collect_virt_info()) { ++ pr_warn("Unable to get hardware information used for virtualization\n"); ++ return; ++ } ++ ++ gic_v3_kvm_info.type = GIC_V3; ++ ++ irq = acpi_register_gsi(NULL, acpi_data.maint_irq, ++ acpi_data.maint_irq_mode, ++ ACPI_ACTIVE_HIGH); ++ if (irq <= 0) ++ return; ++ ++ gic_v3_kvm_info.maint_irq = irq; ++ ++ if (acpi_data.vcpu_base) { ++ struct resource *vcpu = &gic_v3_kvm_info.vcpu; ++ ++ vcpu->flags = IORESOURCE_MEM; ++ vcpu->start = acpi_data.vcpu_base; ++ vcpu->end = vcpu->start + ACPI_GICV2_VCPU_MEM_SIZE - 1; ++ } ++ ++ gic_v3_kvm_info.has_v4 = gic_data.rdists.has_vlpis; ++ gic_set_kvm_info(&gic_v3_kvm_info); ++} ++ ++static int __init ++gic_acpi_init(struct acpi_subtable_header *header, const unsigned long end) ++{ ++ struct acpi_madt_generic_distributor *dist; ++ struct fwnode_handle *domain_handle; ++ size_t size; ++ int i, err; ++ ++ /* Get distributor base address */ ++ dist = (struct acpi_madt_generic_distributor *)header; ++ acpi_data.dist_base = ioremap(dist->base_address, ++ ACPI_GICV3_DIST_MEM_SIZE); ++ if (!acpi_data.dist_base) { ++ pr_err("Unable to map GICD registers\n"); ++ return -ENOMEM; ++ } ++ ++ err = gic_validate_dist_version(acpi_data.dist_base); ++ if (err) { ++ pr_err("No distributor detected at @%p, giving up\n", ++ acpi_data.dist_base); ++ goto out_dist_unmap; ++ } ++ ++ size = sizeof(*acpi_data.redist_regs) * acpi_data.nr_redist_regions; ++ acpi_data.redist_regs = kzalloc(size, GFP_KERNEL); ++ if (!acpi_data.redist_regs) { ++ err = -ENOMEM; ++ goto out_dist_unmap; ++ } ++ ++ err = gic_acpi_collect_gicr_base(); ++ if (err) ++ goto out_redist_unmap; ++ ++ domain_handle = irq_domain_alloc_fwnode(acpi_data.dist_base); ++ if (!domain_handle) { ++ err = -ENOMEM; ++ goto out_redist_unmap; ++ } ++ ++ err = gic_init_bases(acpi_data.dist_base, acpi_data.redist_regs, ++ acpi_data.nr_redist_regions, 0, domain_handle); ++ if (err) ++ goto out_fwhandle_free; ++ ++ acpi_set_irq_model(ACPI_IRQ_MODEL_GIC, domain_handle); ++ ++ if (static_branch_likely(&supports_deactivate_key)) ++ gic_acpi_setup_kvm_info(); ++ ++ return 0; ++ ++out_fwhandle_free: ++ irq_domain_free_fwnode(domain_handle); ++out_redist_unmap: ++ for (i = 0; i < acpi_data.nr_redist_regions; i++) ++ if (acpi_data.redist_regs[i].redist_base) ++ iounmap(acpi_data.redist_regs[i].redist_base); ++ kfree(acpi_data.redist_regs); ++out_dist_unmap: ++ iounmap(acpi_data.dist_base); ++ return err; ++} ++IRQCHIP_ACPI_DECLARE(gic_v3, ACPI_MADT_TYPE_GENERIC_DISTRIBUTOR, ++ acpi_validate_gic_table, ACPI_MADT_GIC_VERSION_V3, ++ gic_acpi_init); ++IRQCHIP_ACPI_DECLARE(gic_v4, ACPI_MADT_TYPE_GENERIC_DISTRIBUTOR, ++ acpi_validate_gic_table, ACPI_MADT_GIC_VERSION_V4, ++ gic_acpi_init); ++IRQCHIP_ACPI_DECLARE(gic_v3_or_v4, ACPI_MADT_TYPE_GENERIC_DISTRIBUTOR, ++ acpi_validate_gic_table, ACPI_MADT_GIC_VERSION_NONE, ++ gic_acpi_init); ++#endif +diff -uprN kernel/drivers/irqchip/irq-gic-v3.c.rej kernel_new/drivers/irqchip/irq-gic-v3.c.rej +--- kernel/drivers/irqchip/irq-gic-v3.c.rej 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/drivers/irqchip/irq-gic-v3.c.rej 2021-04-01 18:28:07.664863277 +0800 +@@ -0,0 +1,55 @@ ++--- drivers/irqchip/irq-gic-v3.c 2019-12-18 03:36:04.000000000 +0800 +++++ drivers/irqchip/irq-gic-v3.c 2021-03-22 09:21:43.205415349 +0800 ++@@ -390,7 +421,7 @@ static asmlinkage void __exception_irq_e ++ else ++ isb(); ++ ++- err = handle_domain_irq(gic_data.domain, irqnr, regs); +++ err = ipipe_handle_domain_irq(gic_data.domain, irqnr, regs); ++ if (err) { ++ WARN_ONCE(true, "Unexpected interrupt received!\n"); ++ if (static_branch_likely(&supports_deactivate_key)) { ++@@ -414,7 +445,7 @@ static asmlinkage void __exception_irq_e ++ * that any shared data read by handle_IPI will ++ * be read after the ACK. ++ */ ++- handle_IPI(irqnr, regs); +++ ipipe_handle_multi_ipi(irqnr, regs); ++ #else ++ WARN_ONCE(true, "Unexpected SGI received!\n"); ++ #endif ++@@ -889,11 +920,16 @@ static struct irq_chip gic_chip = { ++ .irq_unmask = gic_unmask_irq, ++ .irq_eoi = gic_eoi_irq, ++ .irq_set_type = gic_set_type, +++#ifdef CONFIG_IPIPE +++ .irq_hold = gic_hold_irq, +++ .irq_release = gic_release_irq, +++#endif ++ .irq_set_affinity = gic_set_affinity, ++ .irq_get_irqchip_state = gic_irq_get_irqchip_state, ++ .irq_set_irqchip_state = gic_irq_set_irqchip_state, ++ .flags = IRQCHIP_SET_TYPE_MASKED | ++ IRQCHIP_SKIP_SET_WAKE | +++ IRQCHIP_PIPELINE_SAFE | ++ IRQCHIP_MASK_ON_SUSPEND, ++ }; ++ ++@@ -903,12 +939,17 @@ static struct irq_chip gic_eoimode1_chip ++ .irq_unmask = gic_unmask_irq, ++ .irq_eoi = gic_eoimode1_eoi_irq, ++ .irq_set_type = gic_set_type, +++#ifdef CONFIG_IPIPE +++ .irq_hold = gic_hold_irq, +++ .irq_release = gic_release_irq, +++#endif ++ .irq_set_affinity = gic_set_affinity, ++ .irq_get_irqchip_state = gic_irq_get_irqchip_state, ++ .irq_set_irqchip_state = gic_irq_set_irqchip_state, ++ .irq_set_vcpu_affinity = gic_irq_set_vcpu_affinity, ++ .flags = IRQCHIP_SET_TYPE_MASKED | ++ IRQCHIP_SKIP_SET_WAKE | +++ IRQCHIP_PIPELINE_SAFE | ++ IRQCHIP_MASK_ON_SUSPEND, ++ }; ++ +diff -uprN kernel/drivers/irqchip/irq-imx-gpcv2.c kernel_new/drivers/irqchip/irq-imx-gpcv2.c +--- kernel/drivers/irqchip/irq-imx-gpcv2.c 2020-12-21 21:59:19.000000000 +0800 ++++ kernel_new/drivers/irqchip/irq-imx-gpcv2.c 2021-04-01 18:28:07.664863277 +0800 +@@ -10,6 +10,7 @@ + #include + #include + #include ++#include + #include + + #define IMR_NUM 4 +@@ -19,7 +20,11 @@ + #define GPC_IMR1_CORE1 0x40 + + struct gpcv2_irqchip_data { ++#ifdef CONFIG_IPIPE ++ ipipe_spinlock_t rlock; ++#else + struct raw_spinlock rlock; ++#endif + void __iomem *gpc_base; + u32 wakeup_sources[IMR_NUM]; + u32 saved_irq_mask[IMR_NUM]; +@@ -31,6 +36,7 @@ static struct gpcv2_irqchip_data *imx_gp + static int gpcv2_wakeup_source_save(void) + { + struct gpcv2_irqchip_data *cd; ++ unsigned long flags; + void __iomem *reg; + int i; + +@@ -40,8 +46,10 @@ static int gpcv2_wakeup_source_save(void + + for (i = 0; i < IMR_NUM; i++) { + reg = cd->gpc_base + cd->cpu2wakeup + i * 4; ++ flags = hard_cond_local_irq_save(); + cd->saved_irq_mask[i] = readl_relaxed(reg); + writel_relaxed(cd->wakeup_sources[i], reg); ++ hard_cond_local_irq_restore(flags); + } + + return 0; +@@ -50,6 +58,7 @@ static int gpcv2_wakeup_source_save(void + static void gpcv2_wakeup_source_restore(void) + { + struct gpcv2_irqchip_data *cd; ++ unsigned long flags; + void __iomem *reg; + int i; + +@@ -58,8 +67,10 @@ static void gpcv2_wakeup_source_restore( + return; + + for (i = 0; i < IMR_NUM; i++) { ++ flags = hard_cond_local_irq_save(); + reg = cd->gpc_base + cd->cpu2wakeup + i * 4; + writel_relaxed(cd->saved_irq_mask[i], reg); ++ hard_cond_local_irq_restore(flags); + } + } + +@@ -92,38 +103,77 @@ static int imx_gpcv2_irq_set_wake(struct + return 0; + } + +-static void imx_gpcv2_irq_unmask(struct irq_data *d) ++static void __imx_gpcv2_irq_unmask(struct irq_data *d) + { + struct gpcv2_irqchip_data *cd = d->chip_data; + void __iomem *reg; + u32 val; + +- raw_spin_lock(&cd->rlock); + reg = cd->gpc_base + cd->cpu2wakeup + d->hwirq / 32 * 4; + val = readl_relaxed(reg); + val &= ~(1 << d->hwirq % 32); + writel_relaxed(val, reg); +- raw_spin_unlock(&cd->rlock); ++} ++ ++static void imx_gpcv2_irq_unmask(struct irq_data *d) ++{ ++ struct gpcv2_irqchip_data *cd = d->chip_data; ++ unsigned long flags; + ++ raw_spin_lock_irqsave(&cd->rlock, flags); ++ __imx_gpcv2_irq_unmask(d); ++ raw_spin_unlock_irqrestore(&cd->rlock, flags); + irq_chip_unmask_parent(d); + } + +-static void imx_gpcv2_irq_mask(struct irq_data *d) ++static void __imx_gpcv2_irq_mask(struct irq_data *d) + { + struct gpcv2_irqchip_data *cd = d->chip_data; + void __iomem *reg; + u32 val; + +- raw_spin_lock(&cd->rlock); + reg = cd->gpc_base + cd->cpu2wakeup + d->hwirq / 32 * 4; + val = readl_relaxed(reg); + val |= 1 << (d->hwirq % 32); + writel_relaxed(val, reg); +- raw_spin_unlock(&cd->rlock); ++} + ++static void imx_gpcv2_irq_mask(struct irq_data *d) ++{ ++ struct gpcv2_irqchip_data *cd = d->chip_data; ++ unsigned long flags; ++ ++ raw_spin_lock_irqsave(&cd->rlock, flags); ++ __imx_gpcv2_irq_mask(d); ++ raw_spin_unlock_irqrestore(&cd->rlock, flags); + irq_chip_mask_parent(d); + } + ++#ifdef CONFIG_IPIPE ++ ++static void imx_gpc_hold_irq(struct irq_data *d) ++{ ++ struct gpcv2_irqchip_data *cd = d->chip_data; ++ ++ raw_spin_lock(&cd->rlock); ++ __imx_gpcv2_irq_mask(d); ++ raw_spin_unlock(&cd->rlock); ++ irq_chip_hold_parent(d); ++} ++ ++static void imx_gpc_release_irq(struct irq_data *d) ++{ ++ struct gpcv2_irqchip_data *cd = d->chip_data; ++ unsigned long flags; ++ ++ raw_spin_lock_irqsave(&cd->rlock, flags); ++ __imx_gpcv2_irq_unmask(d); ++ raw_spin_unlock_irqrestore(&cd->rlock, flags); ++ irq_chip_release_parent(d); ++} ++ ++#endif /* CONFIG_IPIPE */ ++ + static struct irq_chip gpcv2_irqchip_data_chip = { + .name = "GPCv2", + .irq_eoi = irq_chip_eoi_parent, +@@ -135,6 +185,11 @@ static struct irq_chip gpcv2_irqchip_dat + #ifdef CONFIG_SMP + .irq_set_affinity = irq_chip_set_affinity_parent, + #endif ++#ifdef CONFIG_IPIPE ++ .irq_hold = imx_gpc_hold_irq, ++ .irq_release = imx_gpc_release_irq, ++#endif ++ .flags = IRQCHIP_PIPELINE_SAFE, + }; + + static int imx_gpcv2_domain_translate(struct irq_domain *d, +diff -uprN kernel/drivers/irqchip/irq-omap-intc.c kernel_new/drivers/irqchip/irq-omap-intc.c +--- kernel/drivers/irqchip/irq-omap-intc.c 2020-12-21 21:59:19.000000000 +0800 ++++ kernel_new/drivers/irqchip/irq-omap-intc.c 2021-04-01 18:28:07.664863277 +0800 +@@ -15,6 +15,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -39,6 +40,7 @@ + #define INTC_MIR_CLEAR0 0x0088 + #define INTC_MIR_SET0 0x008c + #define INTC_PENDING_IRQ0 0x0098 ++#define INTC_PRIO 0x0100 + #define INTC_PENDING_IRQ1 0x00b8 + #define INTC_PENDING_IRQ2 0x00d8 + #define INTC_PENDING_IRQ3 0x00f8 +@@ -49,6 +51,12 @@ + #define INTCPS_NR_ILR_REGS 128 + #define INTCPS_NR_MIR_REGS 4 + ++#if !defined(MULTI_OMAP1) && !defined(MULTI_OMAP2) ++#define inline_single inline ++#else ++#define inline_single ++#endif ++ + #define INTC_IDLE_FUNCIDLE (1 << 0) + #define INTC_IDLE_TURBO (1 << 1) + +@@ -69,12 +77,12 @@ static void __iomem *omap_irq_base; + static int omap_nr_pending; + static int omap_nr_irqs; + +-static void intc_writel(u32 reg, u32 val) ++static inline_single void intc_writel(u32 reg, u32 val) + { + writel_relaxed(val, omap_irq_base + reg); + } + +-static u32 intc_readl(u32 reg) ++static inline_single u32 intc_readl(u32 reg) + { + return readl_relaxed(omap_irq_base + reg); + } +@@ -137,9 +145,10 @@ void omap3_intc_resume_idle(void) + } + + /* XXX: FIQ and additional INTC support (only MPU at the moment) */ +-static void omap_ack_irq(struct irq_data *d) ++static inline_single void omap_ack_irq(struct irq_data *d) + { + intc_writel(INTC_CONTROL, 0x1); ++ dsb(); + } + + static void omap_mask_ack_irq(struct irq_data *d) +@@ -164,8 +173,14 @@ static void __init omap_irq_soft_reset(v + while (!(intc_readl(INTC_SYSSTATUS) & 0x1)) + /* Wait for reset to complete */; + ++#ifndef CONFIG_IPIPE + /* Enable autoidle */ + intc_writel(INTC_SYSCONFIG, 1 << 0); ++#else /* CONFIG_IPIPE */ ++ /* Disable autoidle */ ++ intc_writel(INTC_SYSCONFIG, 0); ++ intc_writel(INTC_IDLE, 0x1); ++#endif /* CONFIG_IPIPE */ + } + + int omap_irq_pending(void) +@@ -211,7 +226,7 @@ static int __init omap_alloc_gc_of(struc + ct->chip.irq_mask = irq_gc_mask_disable_reg; + ct->chip.irq_unmask = irq_gc_unmask_enable_reg; + +- ct->chip.flags |= IRQCHIP_SKIP_SET_WAKE; ++ ct->chip.flags |= IRQCHIP_SKIP_SET_WAKE | IRQCHIP_PIPELINE_SAFE; + + ct->regs.enable = INTC_MIR_CLEAR0 + 32 * i; + ct->regs.disable = INTC_MIR_SET0 + 32 * i; +@@ -231,8 +246,11 @@ static void __init omap_alloc_gc_legacy( + ct = gc->chip_types; + ct->chip.irq_ack = omap_mask_ack_irq; + ct->chip.irq_mask = irq_gc_mask_disable_reg; ++#ifdef CONFIG_IPIPE ++ ct->chip.irq_mask_ack = omap_mask_ack_irq; ++#endif + ct->chip.irq_unmask = irq_gc_unmask_enable_reg; +- ct->chip.flags |= IRQCHIP_SKIP_SET_WAKE; ++ ct->chip.flags |= IRQCHIP_SKIP_SET_WAKE | IRQCHIP_PIPELINE_SAFE; + + ct->regs.enable = INTC_MIR_CLEAR0; + ct->regs.disable = INTC_MIR_SET0; +@@ -357,7 +375,7 @@ omap_intc_handle_irq(struct pt_regs *reg + } + + irqnr &= ACTIVEIRQ_MASK; +- handle_domain_irq(domain, irqnr, regs); ++ ipipe_handle_domain_irq(domain, irqnr, regs); + } + + static int __init intc_of_init(struct device_node *node, +@@ -387,6 +405,28 @@ static int __init intc_of_init(struct de + return 0; + } + ++#if defined(CONFIG_IPIPE) && defined(CONFIG_ARCH_OMAP2PLUS) ++#if defined(CONFIG_ARCH_OMAP3) || defined(CONFIG_SOC_AM33XX) ++void omap3_intc_mute(void) ++{ ++ intc_writel(INTC_THRESHOLD, 0x1); ++ intc_writel(INTC_CONTROL, 0x1); ++} ++ ++void omap3_intc_unmute(void) ++{ ++ intc_writel(INTC_THRESHOLD, 0xff); ++} ++ ++void omap3_intc_set_irq_prio(int irq, int hi) ++{ ++ if (irq >= INTCPS_NR_MIR_REGS * 32) ++ return; ++ intc_writel(INTC_PRIO + 4 * irq, hi ? 0 : 0xfc); ++} ++#endif /* CONFIG_ARCH_OMAP3 */ ++#endif /* CONFIG_IPIPE && ARCH_OMAP2PLUS */ ++ + IRQCHIP_DECLARE(omap2_intc, "ti,omap2-intc", intc_of_init); + IRQCHIP_DECLARE(omap3_intc, "ti,omap3-intc", intc_of_init); + IRQCHIP_DECLARE(dm814x_intc, "ti,dm814-intc", intc_of_init); +diff -uprN kernel/drivers/irqchip/irq-sunxi-nmi.c kernel_new/drivers/irqchip/irq-sunxi-nmi.c +--- kernel/drivers/irqchip/irq-sunxi-nmi.c 2020-12-21 21:59:19.000000000 +0800 ++++ kernel_new/drivers/irqchip/irq-sunxi-nmi.c 2021-04-01 18:28:07.665863276 +0800 +@@ -115,8 +115,9 @@ static int sunxi_sc_nmi_set_type(struct + u32 ctrl_off = ct->regs.type; + unsigned int src_type; + unsigned int i; ++ unsigned long flags; + +- irq_gc_lock(gc); ++ flags = irq_gc_lock(gc); + + switch (flow_type & IRQF_TRIGGER_MASK) { + case IRQ_TYPE_EDGE_FALLING: +@@ -133,7 +134,7 @@ static int sunxi_sc_nmi_set_type(struct + src_type = SUNXI_SRC_TYPE_LEVEL_LOW; + break; + default: +- irq_gc_unlock(gc); ++ irq_gc_unlock(gc, flags); + pr_err("Cannot assign multiple trigger modes to IRQ %d.\n", + data->irq); + return -EBADR; +@@ -151,7 +152,7 @@ static int sunxi_sc_nmi_set_type(struct + src_type_reg |= src_type; + sunxi_sc_nmi_write(gc, ctrl_off, src_type_reg); + +- irq_gc_unlock(gc); ++ irq_gc_unlock(gc, flags); + + return IRQ_SET_MASK_OK; + } +@@ -200,7 +201,7 @@ static int __init sunxi_sc_nmi_irq_init( + gc->chip_types[0].chip.irq_unmask = irq_gc_mask_set_bit; + gc->chip_types[0].chip.irq_eoi = irq_gc_ack_set_bit; + gc->chip_types[0].chip.irq_set_type = sunxi_sc_nmi_set_type; +- gc->chip_types[0].chip.flags = IRQCHIP_EOI_THREADED | IRQCHIP_EOI_IF_HANDLED; ++ gc->chip_types[0].chip.flags = IRQCHIP_EOI_THREADED | IRQCHIP_EOI_IF_HANDLED | IRQCHIP_PIPELINE_SAFE; + gc->chip_types[0].regs.ack = reg_offs->pend; + gc->chip_types[0].regs.mask = reg_offs->enable; + gc->chip_types[0].regs.type = reg_offs->ctrl; +@@ -211,6 +212,7 @@ static int __init sunxi_sc_nmi_irq_init( + gc->chip_types[1].chip.irq_mask = irq_gc_mask_clr_bit; + gc->chip_types[1].chip.irq_unmask = irq_gc_mask_set_bit; + gc->chip_types[1].chip.irq_set_type = sunxi_sc_nmi_set_type; ++ gc->chip_types[1].chip.flags = IRQCHIP_PIPELINE_SAFE; + gc->chip_types[1].regs.ack = reg_offs->pend; + gc->chip_types[1].regs.mask = reg_offs->enable; + gc->chip_types[1].regs.type = reg_offs->ctrl; +diff -uprN kernel/drivers/irqchip/irq-versatile-fpga.c kernel_new/drivers/irqchip/irq-versatile-fpga.c +--- kernel/drivers/irqchip/irq-versatile-fpga.c 2020-12-21 21:59:19.000000000 +0800 ++++ kernel_new/drivers/irqchip/irq-versatile-fpga.c 2021-04-01 18:28:07.665863276 +0800 +@@ -80,7 +80,7 @@ static void fpga_irq_handle(struct irq_d + unsigned int irq = ffs(status) - 1; + + status &= ~(1 << irq); +- generic_handle_irq(irq_find_mapping(f->domain, irq)); ++ ipipe_handle_demuxed_irq(irq_find_mapping(f->domain, irq)); + } while (status); + } + +@@ -97,7 +97,7 @@ static int handle_one_fpga(struct fpga_i + + while ((status = readl(f->base + IRQ_STATUS))) { + irq = ffs(status) - 1; +- handle_domain_irq(f->domain, irq, regs); ++ ipipe_handle_domain_irq(f->domain, irq, regs); + handled = 1; + } + +@@ -153,7 +153,11 @@ void __init fpga_irq_init(void __iomem * + f->chip.name = name; + f->chip.irq_ack = fpga_irq_mask; + f->chip.irq_mask = fpga_irq_mask; ++#ifdef CONFIG_IPIPE ++ f->chip.irq_mask_ack = fpga_irq_mask; ++#endif + f->chip.irq_unmask = fpga_irq_unmask; ++ f->chip.flags = IRQCHIP_PIPELINE_SAFE; + f->valid = valid; + + if (parent_irq != -1) { +diff -uprN kernel/drivers/irqchip/irq-vic.c kernel_new/drivers/irqchip/irq-vic.c +--- kernel/drivers/irqchip/irq-vic.c 2020-12-21 21:59:19.000000000 +0800 ++++ kernel_new/drivers/irqchip/irq-vic.c 2021-04-01 18:28:07.665863276 +0800 +@@ -34,6 +34,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -218,7 +219,7 @@ static int handle_one_vic(struct vic_dev + + while ((stat = readl_relaxed(vic->base + VIC_IRQ_STATUS))) { + irq = ffs(stat) - 1; +- handle_domain_irq(vic->domain, irq, regs); ++ ipipe_handle_domain_irq(vic->domain, irq, regs); + handled = 1; + } + +@@ -235,7 +236,7 @@ static void vic_handle_irq_cascaded(stru + + while ((stat = readl_relaxed(vic->base + VIC_IRQ_STATUS))) { + hwirq = ffs(stat) - 1; +- generic_handle_irq(irq_find_mapping(vic->domain, hwirq)); ++ ipipe_handle_demuxed_irq(irq_find_mapping(vic->domain, hwirq)); + } + + chained_irq_exit(host_chip, desc); +@@ -339,7 +340,7 @@ static void vic_unmask_irq(struct irq_da + #if defined(CONFIG_PM) + static struct vic_device *vic_from_irq(unsigned int irq) + { +- struct vic_device *v = vic_devices; ++ struct vic_device *v = vic_devices; + unsigned int base_irq = irq & ~31; + int id; + +@@ -378,8 +379,12 @@ static struct irq_chip vic_chip = { + .name = "VIC", + .irq_ack = vic_ack_irq, + .irq_mask = vic_mask_irq, ++#ifdef CONFIG_IPIPE ++ .irq_mask_ack = vic_ack_irq, ++#endif /* CONFIG_IPIPE */ + .irq_unmask = vic_unmask_irq, + .irq_set_wake = vic_set_wake, ++ .flags = IRQCHIP_PIPELINE_SAFE, + }; + + static void __init vic_disable(void __iomem *base) +diff -uprN kernel/drivers/memory/omap-gpmc.c kernel_new/drivers/memory/omap-gpmc.c +--- kernel/drivers/memory/omap-gpmc.c 2020-12-21 21:59:19.000000000 +0800 ++++ kernel_new/drivers/memory/omap-gpmc.c 2021-04-01 18:28:07.665863276 +0800 +@@ -1261,12 +1261,15 @@ int gpmc_get_client_irq(unsigned irq_con + + static int gpmc_irq_endis(unsigned long hwirq, bool endis) + { ++ unsigned long flags; + u32 regval; + + /* bits GPMC_NR_NAND_IRQS to 8 are reserved */ + if (hwirq >= GPMC_NR_NAND_IRQS) + hwirq += 8 - GPMC_NR_NAND_IRQS; + ++ flags = hard_local_irq_save(); ++ + regval = gpmc_read_reg(GPMC_IRQENABLE); + if (endis) + regval |= BIT(hwirq); +@@ -1274,6 +1277,8 @@ static int gpmc_irq_endis(unsigned long + regval &= ~BIT(hwirq); + gpmc_write_reg(GPMC_IRQENABLE, regval); + ++ hard_local_irq_restore(flags); ++ + return 0; + } + +@@ -1299,6 +1304,7 @@ static void gpmc_irq_unmask(struct irq_d + + static void gpmc_irq_edge_config(unsigned long hwirq, bool rising_edge) + { ++ unsigned long flags; + u32 regval; + + /* NAND IRQs polarity is not configurable */ +@@ -1308,6 +1314,8 @@ static void gpmc_irq_edge_config(unsigne + /* WAITPIN starts at BIT 8 */ + hwirq += 8 - GPMC_NR_NAND_IRQS; + ++ flags = hard_local_irq_save(); ++ + regval = gpmc_read_reg(GPMC_CONFIG); + if (rising_edge) + regval &= ~BIT(hwirq); +@@ -1315,6 +1323,8 @@ static void gpmc_irq_edge_config(unsigne + regval |= BIT(hwirq); + + gpmc_write_reg(GPMC_CONFIG, regval); ++ ++ hard_local_irq_restore(flags); + } + + static void gpmc_irq_ack(struct irq_data *d) +@@ -1394,7 +1404,7 @@ static irqreturn_t gpmc_handle_irq(int i + hwirq, virq); + } + +- generic_handle_irq(virq); ++ ipipe_handle_demuxed_irq(virq); + } + } + +@@ -1422,6 +1432,7 @@ static int gpmc_setup_irq(struct gpmc_de + gpmc->irq_chip.irq_mask = gpmc_irq_mask; + gpmc->irq_chip.irq_unmask = gpmc_irq_unmask; + gpmc->irq_chip.irq_set_type = gpmc_irq_set_type; ++ gpmc->irq_chip.flags |= IRQCHIP_PIPELINE_SAFE; + + gpmc_irq_domain = irq_domain_add_linear(gpmc->dev->of_node, + gpmc->nirqs, +diff -uprN kernel/drivers/pci/controller/dwc/pcie-designware-host.c kernel_new/drivers/pci/controller/dwc/pcie-designware-host.c +--- kernel/drivers/pci/controller/dwc/pcie-designware-host.c 2020-12-21 21:59:20.000000000 +0800 ++++ kernel_new/drivers/pci/controller/dwc/pcie-designware-host.c 2021-04-01 18:28:07.665863276 +0800 +@@ -66,6 +66,7 @@ static struct irq_chip dw_pcie_msi_irq_c + .irq_ack = dw_msi_ack_irq, + .irq_mask = dw_msi_mask_irq, + .irq_unmask = dw_msi_unmask_irq, ++ .flags = IRQCHIP_PIPELINE_SAFE, + }; + + static struct msi_domain_info dw_pcie_msi_domain_info = { +diff -uprN kernel/drivers/pci/controller/pcie-altera.c kernel_new/drivers/pci/controller/pcie-altera.c +--- kernel/drivers/pci/controller/pcie-altera.c 2020-12-21 21:59:20.000000000 +0800 ++++ kernel_new/drivers/pci/controller/pcie-altera.c 2021-04-01 18:28:07.665863276 +0800 +@@ -477,7 +477,7 @@ static void altera_pcie_isr(struct irq_d + + virq = irq_find_mapping(pcie->irq_domain, bit); + if (virq) +- generic_handle_irq(virq); ++ ipipe_handle_demuxed_irq(virq); + else + dev_err(dev, "unexpected IRQ, INT%d\n", bit); + } +diff -uprN kernel/drivers/pinctrl/bcm/pinctrl-bcm2835.c kernel_new/drivers/pinctrl/bcm/pinctrl-bcm2835.c +--- kernel/drivers/pinctrl/bcm/pinctrl-bcm2835.c 2020-12-21 21:59:20.000000000 +0800 ++++ kernel_new/drivers/pinctrl/bcm/pinctrl-bcm2835.c 2021-04-01 18:28:07.665863276 +0800 +@@ -27,6 +27,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -88,7 +89,11 @@ struct bcm2835_pinctrl { + struct gpio_chip gpio_chip; + struct pinctrl_gpio_range gpio_range; + ++#ifdef CONFIG_IPIPE ++ ipipe_spinlock_t irq_lock[BCM2835_NUM_BANKS]; ++#else + raw_spinlock_t irq_lock[BCM2835_NUM_BANKS]; ++#endif + }; + + /* pins are just named GPIO0..GPIO53 */ +@@ -367,7 +372,7 @@ static void bcm2835_gpio_irq_handle_bank + events &= pc->enabled_irq_map[bank]; + for_each_set_bit(offset, &events, 32) { + gpio = (32 * bank) + offset; +- generic_handle_irq(irq_linear_revmap(pc->gpio_chip.irq.domain, ++ ipipe_handle_demuxed_irq(irq_linear_revmap(pc->gpio_chip.irq.domain, + gpio)); + } + } +@@ -462,6 +467,7 @@ static void bcm2835_gpio_irq_enable(stru + raw_spin_lock_irqsave(&pc->irq_lock[bank], flags); + set_bit(offset, &pc->enabled_irq_map[bank]); + bcm2835_gpio_irq_config(pc, gpio, true); ++ ipipe_unlock_irq(data->irq); + raw_spin_unlock_irqrestore(&pc->irq_lock[bank], flags); + } + +@@ -479,6 +485,7 @@ static void bcm2835_gpio_irq_disable(str + /* Clear events that were latched prior to clearing event sources */ + bcm2835_gpio_set_bit(pc, GPEDS0, gpio); + clear_bit(offset, &pc->enabled_irq_map[bank]); ++ ipipe_lock_irq(data->irq); + raw_spin_unlock_irqrestore(&pc->irq_lock[bank], flags); + } + +@@ -608,6 +615,39 @@ static void bcm2835_gpio_irq_ack(struct + bcm2835_gpio_set_bit(pc, GPEDS0, gpio); + } + ++#ifdef CONFIG_IPIPE ++ ++static void bcm2835_gpio_irq_hold(struct irq_data *data) ++{ ++ struct bcm2835_pinctrl *pc = irq_data_get_irq_chip_data(data); ++ unsigned gpio = irqd_to_hwirq(data); ++ unsigned offset = GPIO_REG_SHIFT(gpio); ++ unsigned bank = GPIO_REG_OFFSET(gpio); ++ unsigned long flags; ++ ++ raw_spin_lock_irqsave(&pc->irq_lock[bank], flags); ++ bcm2835_gpio_irq_config(pc, gpio, false); ++ bcm2835_gpio_set_bit(pc, GPEDS0, gpio); ++ clear_bit(offset, &pc->enabled_irq_map[bank]); ++ raw_spin_unlock_irqrestore(&pc->irq_lock[bank], flags); ++} ++ ++static void bcm2835_gpio_irq_release(struct irq_data *data) ++{ ++ struct bcm2835_pinctrl *pc = irq_data_get_irq_chip_data(data); ++ unsigned gpio = irqd_to_hwirq(data); ++ unsigned offset = GPIO_REG_SHIFT(gpio); ++ unsigned bank = GPIO_REG_OFFSET(gpio); ++ unsigned long flags; ++ ++ raw_spin_lock_irqsave(&pc->irq_lock[bank], flags); ++ set_bit(offset, &pc->enabled_irq_map[bank]); ++ bcm2835_gpio_irq_config(pc, gpio, true); ++ raw_spin_unlock_irqrestore(&pc->irq_lock[bank], flags); ++} ++ ++#endif ++ + static struct irq_chip bcm2835_gpio_irq_chip = { + .name = MODULE_NAME, + .irq_enable = bcm2835_gpio_irq_enable, +@@ -616,6 +656,11 @@ static struct irq_chip bcm2835_gpio_irq_ + .irq_ack = bcm2835_gpio_irq_ack, + .irq_mask = bcm2835_gpio_irq_disable, + .irq_unmask = bcm2835_gpio_irq_enable, ++#ifdef CONFIG_IPIPE ++ .irq_hold = bcm2835_gpio_irq_hold, ++ .irq_release = bcm2835_gpio_irq_release, ++#endif ++ .flags = IRQCHIP_PIPELINE_SAFE, + }; + + static int bcm2835_pctl_get_groups_count(struct pinctrl_dev *pctldev) +diff -uprN kernel/drivers/pinctrl/pinctrl-rockchip.c kernel_new/drivers/pinctrl/pinctrl-rockchip.c +--- kernel/drivers/pinctrl/pinctrl-rockchip.c 2020-12-21 21:59:20.000000000 +0800 ++++ kernel_new/drivers/pinctrl/pinctrl-rockchip.c 2021-04-01 18:28:07.796863130 +0800 +@@ -2868,7 +2868,7 @@ static int rockchip_irq_set_type(struct + u32 polarity; + u32 level; + u32 data; +- unsigned long flags; ++ unsigned long flags, flags2; + int ret; + + /* make sure the pin is configured as gpio input */ +@@ -2891,7 +2891,7 @@ static int rockchip_irq_set_type(struct + irq_set_handler_locked(d, handle_level_irq); + + raw_spin_lock_irqsave(&bank->slock, flags); +- irq_gc_lock(gc); ++ flags2 = irq_gc_lock(gc); + + level = readl_relaxed(gc->reg_base + GPIO_INTTYPE_LEVEL); + polarity = readl_relaxed(gc->reg_base + GPIO_INT_POLARITY); +@@ -2932,7 +2932,7 @@ static int rockchip_irq_set_type(struct + polarity &= ~mask; + break; + default: +- irq_gc_unlock(gc); ++ irq_gc_unlock(gc, flags2); + raw_spin_unlock_irqrestore(&bank->slock, flags); + clk_disable(bank->clk); + return -EINVAL; +@@ -2941,7 +2941,7 @@ static int rockchip_irq_set_type(struct + writel_relaxed(level, gc->reg_base + GPIO_INTTYPE_LEVEL); + writel_relaxed(polarity, gc->reg_base + GPIO_INT_POLARITY); + +- irq_gc_unlock(gc); ++ irq_gc_unlock(gc, flags2); + raw_spin_unlock_irqrestore(&bank->slock, flags); + clk_disable(bank->clk); + +diff -uprN kernel/drivers/pinctrl/pinctrl-single.c kernel_new/drivers/pinctrl/pinctrl-single.c +--- kernel/drivers/pinctrl/pinctrl-single.c 2020-12-21 21:59:20.000000000 +0800 ++++ kernel_new/drivers/pinctrl/pinctrl-single.c 2021-04-01 18:28:07.797863129 +0800 +@@ -16,6 +16,7 @@ + #include + #include + #include ++#include + + #include + +@@ -185,7 +186,11 @@ struct pcs_device { + #define PCS_FEAT_PINCONF (1 << 0) + struct property *missing_nr_pinctrl_cells; + struct pcs_soc_data socdata; ++#ifdef CONFIG_IPIPE ++ ipipe_spinlock_t lock; ++#else /* !IPIPE */ + raw_spinlock_t lock; ++#endif /* !IPIPE */ + struct mutex mutex; + unsigned width; + unsigned fmask; +@@ -1460,7 +1465,7 @@ static int pcs_irq_handle(struct pcs_soc + mask = pcs->read(pcswi->reg); + raw_spin_unlock(&pcs->lock); + if (mask & pcs_soc->irq_status_mask) { +- generic_handle_irq(irq_find_mapping(pcs->domain, ++ ipipe_handle_demuxed_irq(irq_find_mapping(pcs->domain, + pcswi->hwirq)); + count++; + } +@@ -1480,8 +1485,14 @@ static int pcs_irq_handle(struct pcs_soc + static irqreturn_t pcs_irq_handler(int irq, void *d) + { + struct pcs_soc_data *pcs_soc = d; ++ unsigned long flags; ++ irqreturn_t ret; + +- return pcs_irq_handle(pcs_soc) ? IRQ_HANDLED : IRQ_NONE; ++ flags = hard_cond_local_irq_save(); ++ ret = pcs_irq_handle(pcs_soc) ? IRQ_HANDLED : IRQ_NONE; ++ hard_cond_local_irq_restore(flags); ++ ++ return ret; + } + + /** +diff -uprN kernel/drivers/pinctrl/sunxi/pinctrl-sunxi.c kernel_new/drivers/pinctrl/sunxi/pinctrl-sunxi.c +--- kernel/drivers/pinctrl/sunxi/pinctrl-sunxi.c 2020-12-21 21:59:20.000000000 +0800 ++++ kernel_new/drivers/pinctrl/sunxi/pinctrl-sunxi.c 2021-04-01 18:28:07.797863129 +0800 +@@ -15,6 +15,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -937,14 +938,33 @@ static struct irq_chip sunxi_pinctrl_edg + .irq_request_resources = sunxi_pinctrl_irq_request_resources, + .irq_release_resources = sunxi_pinctrl_irq_release_resources, + .irq_set_type = sunxi_pinctrl_irq_set_type, +- .flags = IRQCHIP_SKIP_SET_WAKE, ++ .flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_PIPELINE_SAFE, + }; + ++#ifdef CONFIG_IPIPE ++ ++static void sunxi_pinctrl_irq_hold(struct irq_data *d) ++{ ++ sunxi_pinctrl_irq_mask(d); ++ sunxi_pinctrl_irq_ack(d); ++} ++ ++static void sunxi_pinctrl_irq_release(struct irq_data *d) ++{ ++ sunxi_pinctrl_irq_unmask(d); ++} ++ ++#endif ++ + static struct irq_chip sunxi_pinctrl_level_irq_chip = { + .name = "sunxi_pio_level", + .irq_eoi = sunxi_pinctrl_irq_ack, + .irq_mask = sunxi_pinctrl_irq_mask, + .irq_unmask = sunxi_pinctrl_irq_unmask, ++#ifdef CONFIG_IPIPE ++ .irq_hold = sunxi_pinctrl_irq_hold, ++ .irq_release = sunxi_pinctrl_irq_release, ++#endif + /* Define irq_enable / disable to avoid spurious irqs for drivers + * using these to suppress irqs while they clear the irq source */ + .irq_enable = sunxi_pinctrl_irq_ack_unmask, +@@ -953,7 +973,7 @@ static struct irq_chip sunxi_pinctrl_lev + .irq_release_resources = sunxi_pinctrl_irq_release_resources, + .irq_set_type = sunxi_pinctrl_irq_set_type, + .flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_EOI_THREADED | +- IRQCHIP_EOI_IF_HANDLED, ++ IRQCHIP_EOI_IF_HANDLED | IRQCHIP_PIPELINE_SAFE, + }; + + static int sunxi_pinctrl_irq_of_xlate(struct irq_domain *d, +@@ -1011,7 +1031,7 @@ static void sunxi_pinctrl_irq_handler(st + for_each_set_bit(irqoffset, &val, IRQ_PER_BANK) { + int pin_irq = irq_find_mapping(pctl->domain, + bank * IRQ_PER_BANK + irqoffset); +- generic_handle_irq(pin_irq); ++ ipipe_handle_demuxed_irq(pin_irq); + } + chained_irq_exit(chip, desc); + } +diff -uprN kernel/drivers/pinctrl/sunxi/pinctrl-sunxi.h kernel_new/drivers/pinctrl/sunxi/pinctrl-sunxi.h +--- kernel/drivers/pinctrl/sunxi/pinctrl-sunxi.h 2020-12-21 21:59:20.000000000 +0800 ++++ kernel_new/drivers/pinctrl/sunxi/pinctrl-sunxi.h 2021-04-01 18:28:07.797863129 +0800 +@@ -138,7 +138,11 @@ struct sunxi_pinctrl { + unsigned ngroups; + int *irq; + unsigned *irq_array; ++#ifdef CONFIG_IPIPE ++ ipipe_spinlock_t lock; ++#else + raw_spinlock_t lock; ++#endif + struct pinctrl_dev *pctl_dev; + unsigned long variant; + }; +diff -uprN kernel/drivers/soc/dove/pmu.c kernel_new/drivers/soc/dove/pmu.c +--- kernel/drivers/soc/dove/pmu.c 2020-12-21 21:59:20.000000000 +0800 ++++ kernel_new/drivers/soc/dove/pmu.c 2021-04-01 18:28:07.797863129 +0800 +@@ -16,6 +16,7 @@ + #include + #include + #include ++#include + + #define NR_PMU_IRQS 7 + +@@ -231,6 +232,7 @@ static void pmu_irq_handler(struct irq_d + void __iomem *base = gc->reg_base; + u32 stat = readl_relaxed(base + PMC_IRQ_CAUSE) & gc->mask_cache; + u32 done = ~0; ++ unsigned long flags; + + if (stat == 0) { + handle_bad_irq(desc); +@@ -243,7 +245,7 @@ static void pmu_irq_handler(struct irq_d + stat &= ~(1 << hwirq); + done &= ~(1 << hwirq); + +- generic_handle_irq(irq_find_mapping(domain, hwirq)); ++ ipipe_handle_demuxed_irq(irq_find_mapping(domain, hwirq)); + } + + /* +@@ -257,10 +259,10 @@ static void pmu_irq_handler(struct irq_d + * So, let's structure the code so that the window is as small as + * possible. + */ +- irq_gc_lock(gc); ++ flags = irq_gc_lock(gc); + done &= readl_relaxed(base + PMC_IRQ_CAUSE); + writel_relaxed(done, base + PMC_IRQ_CAUSE); +- irq_gc_unlock(gc); ++ irq_gc_unlock(gc, flags); + } + + static int __init dove_init_pmu_irq(struct pmu_data *pmu, int irq) +@@ -296,6 +298,7 @@ static int __init dove_init_pmu_irq(stru + gc->chip_types[0].regs.mask = PMC_IRQ_MASK; + gc->chip_types[0].chip.irq_mask = irq_gc_mask_clr_bit; + gc->chip_types[0].chip.irq_unmask = irq_gc_mask_set_bit; ++ gc->chip_types[0].chip.flags |= IRQCHIP_PIPELINE_SAFE; + + pmu->irq_domain = domain; + pmu->irq_gc = gc; +diff -uprN kernel/drivers/tty/serial/8250/8250_core.c kernel_new/drivers/tty/serial/8250/8250_core.c +--- kernel/drivers/tty/serial/8250/8250_core.c 2020-12-21 21:59:21.000000000 +0800 ++++ kernel_new/drivers/tty/serial/8250/8250_core.c 2021-04-01 18:28:07.797863129 +0800 +@@ -590,6 +590,48 @@ static void univ8250_console_write(struc + serial8250_console_write(up, s, count); + } + ++#ifdef CONFIG_RAW_PRINTK ++ ++static void raw_write_char(struct uart_8250_port *up, int c) ++{ ++ unsigned int status, tmout = 10000; ++ ++ for (;;) { ++ status = serial_in(up, UART_LSR); ++ up->lsr_saved_flags |= status & LSR_SAVE_FLAGS; ++ if ((status & UART_LSR_THRE) == UART_LSR_THRE) ++ break; ++ if (--tmout == 0) ++ break; ++ cpu_relax(); ++ } ++ serial_port_out(&up->port, UART_TX, c); ++} ++ ++static void univ8250_console_write_raw(struct console *co, const char *s, ++ unsigned int count) ++{ ++ struct uart_8250_port *up = &serial8250_ports[co->index]; ++ unsigned int ier; ++ ++ ier = serial_in(up, UART_IER); ++ ++ if (up->capabilities & UART_CAP_UUE) ++ serial_out(up, UART_IER, UART_IER_UUE); ++ else ++ serial_out(up, UART_IER, 0); ++ ++ while (count-- > 0) { ++ if (*s == '\n') ++ raw_write_char(up, '\r'); ++ raw_write_char(up, *s++); ++ } ++ ++ serial_out(up, UART_IER, ier); ++} ++ ++#endif ++ + static int univ8250_console_setup(struct console *co, char *options) + { + struct uart_port *port; +@@ -671,7 +713,12 @@ static struct console univ8250_console = + .device = uart_console_device, + .setup = univ8250_console_setup, + .match = univ8250_console_match, ++#ifdef CONFIG_RAW_PRINTK ++ .write_raw = univ8250_console_write_raw, ++ .flags = CON_PRINTBUFFER | CON_ANYTIME | CON_RAW, ++#else + .flags = CON_PRINTBUFFER | CON_ANYTIME, ++#endif + .index = -1, + .data = &serial8250_reg, + }; +diff -uprN kernel/drivers/tty/serial/amba-pl011.c kernel_new/drivers/tty/serial/amba-pl011.c +--- kernel/drivers/tty/serial/amba-pl011.c 2020-12-21 21:59:21.000000000 +0800 ++++ kernel_new/drivers/tty/serial/amba-pl011.c 2021-04-01 18:28:07.797863129 +0800 +@@ -2270,6 +2270,42 @@ static void pl011_console_putchar(struct + pl011_write(ch, uap, REG_DR); + } + ++#ifdef CONFIG_RAW_PRINTK ++ ++#define pl011_clk_setup(clk) clk_prepare_enable(clk) ++#define pl011_clk_enable(clk) do { } while (0) ++#define pl011_clk_disable(clk) do { } while (0) ++ ++static void ++pl011_console_write_raw(struct console *co, const char *s, unsigned int count) ++{ ++ struct uart_amba_port *uap = amba_ports[co->index]; ++ unsigned int old_cr, new_cr, status; ++ ++ old_cr = readw(uap->port.membase + UART011_CR); ++ new_cr = old_cr & ~UART011_CR_CTSEN; ++ new_cr |= UART01x_CR_UARTEN | UART011_CR_TXE; ++ writew(new_cr, uap->port.membase + UART011_CR); ++ ++ while (count-- > 0) { ++ if (*s == '\n') ++ pl011_console_putchar(&uap->port, '\r'); ++ pl011_console_putchar(&uap->port, *s++); ++ } ++ do ++ status = readw(uap->port.membase + UART01x_FR); ++ while (status & UART01x_FR_BUSY); ++ writew(old_cr, uap->port.membase + UART011_CR); ++} ++ ++#else /* !CONFIG_RAW_PRINTK */ ++ ++#define pl011_clk_setup(clk) clk_prepare(clk) ++#define pl011_clk_enable(clk) clk_enable(clk) ++#define pl011_clk_disable(clk) clk_disable(clk) ++ ++#endif /* !CONFIG_RAW_PRINTK */ ++ + static void + pl011_console_write(struct console *co, const char *s, unsigned int count) + { +@@ -2278,7 +2314,7 @@ pl011_console_write(struct console *co, + unsigned long flags; + int locked = 1; + +- clk_enable(uap->clk); ++ pl011_clk_enable(uap->clk); + + local_irq_save(flags); + if (uap->port.sysrq) +@@ -2315,7 +2351,7 @@ pl011_console_write(struct console *co, + spin_unlock(&uap->port.lock); + local_irq_restore(flags); + +- clk_disable(uap->clk); ++ pl011_clk_disable(uap->clk); + } + + static void pl011_console_get_options(struct uart_amba_port *uap, int *baud, +@@ -2375,7 +2411,7 @@ static int pl011_console_setup(struct co + /* Allow pins to be muxed in and configured */ + pinctrl_pm_select_default_state(uap->port.dev); + +- ret = clk_prepare(uap->clk); ++ ret = pl011_clk_setup(uap->clk); + if (ret) + return ret; + +@@ -2469,7 +2505,12 @@ static struct console amba_console = { + .device = uart_console_device, + .setup = pl011_console_setup, + .match = pl011_console_match, ++#ifdef CONFIG_RAW_PRINTK ++ .write_raw = pl011_console_write_raw, ++ .flags = CON_PRINTBUFFER | CON_RAW | CON_ANYTIME, ++#else + .flags = CON_PRINTBUFFER | CON_ANYTIME, ++#endif + .index = -1, + .data = &amba_reg, + }; +diff -uprN kernel/drivers/tty/serial/amba-pl011.c.orig kernel_new/drivers/tty/serial/amba-pl011.c.orig +--- kernel/drivers/tty/serial/amba-pl011.c.orig 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/drivers/tty/serial/amba-pl011.c.orig 2020-12-21 21:59:21.000000000 +0800 +@@ -0,0 +1,2910 @@ ++// SPDX-License-Identifier: GPL-2.0+ ++/* ++ * Driver for AMBA serial ports ++ * ++ * Based on drivers/char/serial.c, by Linus Torvalds, Theodore Ts'o. ++ * ++ * Copyright 1999 ARM Limited ++ * Copyright (C) 2000 Deep Blue Solutions Ltd. ++ * Copyright (C) 2010 ST-Ericsson SA ++ * ++ * This is a generic driver for ARM AMBA-type serial ports. They ++ * have a lot of 16550-like features, but are not register compatible. ++ * Note that although they do have CTS, DCD and DSR inputs, they do ++ * not have an RI input, nor do they have DTR or RTS outputs. If ++ * required, these have to be supplied via some other means (eg, GPIO) ++ * and hooked into this driver. ++ */ ++ ++ ++#if defined(CONFIG_SERIAL_AMBA_PL011_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ) ++#define SUPPORT_SYSRQ ++#endif ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "amba-pl011.h" ++ ++#define UART_NR 14 ++ ++#define SERIAL_AMBA_MAJOR 204 ++#define SERIAL_AMBA_MINOR 64 ++#define SERIAL_AMBA_NR UART_NR ++ ++#define AMBA_ISR_PASS_LIMIT 256 ++ ++#define UART_DR_ERROR (UART011_DR_OE|UART011_DR_BE|UART011_DR_PE|UART011_DR_FE) ++#define UART_DUMMY_DR_RX (1 << 16) ++ ++static u16 pl011_std_offsets[REG_ARRAY_SIZE] = { ++ [REG_DR] = UART01x_DR, ++ [REG_FR] = UART01x_FR, ++ [REG_LCRH_RX] = UART011_LCRH, ++ [REG_LCRH_TX] = UART011_LCRH, ++ [REG_IBRD] = UART011_IBRD, ++ [REG_FBRD] = UART011_FBRD, ++ [REG_CR] = UART011_CR, ++ [REG_IFLS] = UART011_IFLS, ++ [REG_IMSC] = UART011_IMSC, ++ [REG_RIS] = UART011_RIS, ++ [REG_MIS] = UART011_MIS, ++ [REG_ICR] = UART011_ICR, ++ [REG_DMACR] = UART011_DMACR, ++}; ++ ++/* There is by now at least one vendor with differing details, so handle it */ ++struct vendor_data { ++ const u16 *reg_offset; ++ unsigned int ifls; ++ unsigned int fr_busy; ++ unsigned int fr_dsr; ++ unsigned int fr_cts; ++ unsigned int fr_ri; ++ unsigned int inv_fr; ++ bool access_32b; ++ bool oversampling; ++ bool dma_threshold; ++ bool cts_event_workaround; ++ bool always_enabled; ++ bool fixed_options; ++ ++ unsigned int (*get_fifosize)(struct amba_device *dev); ++}; ++ ++static unsigned int get_fifosize_arm(struct amba_device *dev) ++{ ++ return amba_rev(dev) < 3 ? 16 : 32; ++} ++ ++static struct vendor_data vendor_arm = { ++ .reg_offset = pl011_std_offsets, ++ .ifls = UART011_IFLS_RX4_8|UART011_IFLS_TX4_8, ++ .fr_busy = UART01x_FR_BUSY, ++ .fr_dsr = UART01x_FR_DSR, ++ .fr_cts = UART01x_FR_CTS, ++ .fr_ri = UART011_FR_RI, ++ .oversampling = false, ++ .dma_threshold = false, ++ .cts_event_workaround = false, ++ .always_enabled = false, ++ .fixed_options = false, ++ .get_fifosize = get_fifosize_arm, ++}; ++ ++static const struct vendor_data vendor_sbsa = { ++ .reg_offset = pl011_std_offsets, ++ .fr_busy = UART01x_FR_BUSY, ++ .fr_dsr = UART01x_FR_DSR, ++ .fr_cts = UART01x_FR_CTS, ++ .fr_ri = UART011_FR_RI, ++ .access_32b = true, ++ .oversampling = false, ++ .dma_threshold = false, ++ .cts_event_workaround = false, ++ .always_enabled = true, ++ .fixed_options = true, ++}; ++ ++#ifdef CONFIG_ACPI_SPCR_TABLE ++static const struct vendor_data vendor_qdt_qdf2400_e44 = { ++ .reg_offset = pl011_std_offsets, ++ .fr_busy = UART011_FR_TXFE, ++ .fr_dsr = UART01x_FR_DSR, ++ .fr_cts = UART01x_FR_CTS, ++ .fr_ri = UART011_FR_RI, ++ .inv_fr = UART011_FR_TXFE, ++ .access_32b = true, ++ .oversampling = false, ++ .dma_threshold = false, ++ .cts_event_workaround = false, ++ .always_enabled = true, ++ .fixed_options = true, ++}; ++#endif ++ ++static u16 pl011_st_offsets[REG_ARRAY_SIZE] = { ++ [REG_DR] = UART01x_DR, ++ [REG_ST_DMAWM] = ST_UART011_DMAWM, ++ [REG_ST_TIMEOUT] = ST_UART011_TIMEOUT, ++ [REG_FR] = UART01x_FR, ++ [REG_LCRH_RX] = ST_UART011_LCRH_RX, ++ [REG_LCRH_TX] = ST_UART011_LCRH_TX, ++ [REG_IBRD] = UART011_IBRD, ++ [REG_FBRD] = UART011_FBRD, ++ [REG_CR] = UART011_CR, ++ [REG_IFLS] = UART011_IFLS, ++ [REG_IMSC] = UART011_IMSC, ++ [REG_RIS] = UART011_RIS, ++ [REG_MIS] = UART011_MIS, ++ [REG_ICR] = UART011_ICR, ++ [REG_DMACR] = UART011_DMACR, ++ [REG_ST_XFCR] = ST_UART011_XFCR, ++ [REG_ST_XON1] = ST_UART011_XON1, ++ [REG_ST_XON2] = ST_UART011_XON2, ++ [REG_ST_XOFF1] = ST_UART011_XOFF1, ++ [REG_ST_XOFF2] = ST_UART011_XOFF2, ++ [REG_ST_ITCR] = ST_UART011_ITCR, ++ [REG_ST_ITIP] = ST_UART011_ITIP, ++ [REG_ST_ABCR] = ST_UART011_ABCR, ++ [REG_ST_ABIMSC] = ST_UART011_ABIMSC, ++}; ++ ++static unsigned int get_fifosize_st(struct amba_device *dev) ++{ ++ return 64; ++} ++ ++static struct vendor_data vendor_st = { ++ .reg_offset = pl011_st_offsets, ++ .ifls = UART011_IFLS_RX_HALF|UART011_IFLS_TX_HALF, ++ .fr_busy = UART01x_FR_BUSY, ++ .fr_dsr = UART01x_FR_DSR, ++ .fr_cts = UART01x_FR_CTS, ++ .fr_ri = UART011_FR_RI, ++ .oversampling = true, ++ .dma_threshold = true, ++ .cts_event_workaround = true, ++ .always_enabled = false, ++ .fixed_options = false, ++ .get_fifosize = get_fifosize_st, ++}; ++ ++static const u16 pl011_zte_offsets[REG_ARRAY_SIZE] = { ++ [REG_DR] = ZX_UART011_DR, ++ [REG_FR] = ZX_UART011_FR, ++ [REG_LCRH_RX] = ZX_UART011_LCRH, ++ [REG_LCRH_TX] = ZX_UART011_LCRH, ++ [REG_IBRD] = ZX_UART011_IBRD, ++ [REG_FBRD] = ZX_UART011_FBRD, ++ [REG_CR] = ZX_UART011_CR, ++ [REG_IFLS] = ZX_UART011_IFLS, ++ [REG_IMSC] = ZX_UART011_IMSC, ++ [REG_RIS] = ZX_UART011_RIS, ++ [REG_MIS] = ZX_UART011_MIS, ++ [REG_ICR] = ZX_UART011_ICR, ++ [REG_DMACR] = ZX_UART011_DMACR, ++}; ++ ++static unsigned int get_fifosize_zte(struct amba_device *dev) ++{ ++ return 16; ++} ++ ++static struct vendor_data vendor_zte = { ++ .reg_offset = pl011_zte_offsets, ++ .access_32b = true, ++ .ifls = UART011_IFLS_RX4_8|UART011_IFLS_TX4_8, ++ .fr_busy = ZX_UART01x_FR_BUSY, ++ .fr_dsr = ZX_UART01x_FR_DSR, ++ .fr_cts = ZX_UART01x_FR_CTS, ++ .fr_ri = ZX_UART011_FR_RI, ++ .get_fifosize = get_fifosize_zte, ++}; ++ ++/* Deals with DMA transactions */ ++ ++struct pl011_sgbuf { ++ struct scatterlist sg; ++ char *buf; ++}; ++ ++struct pl011_dmarx_data { ++ struct dma_chan *chan; ++ struct completion complete; ++ bool use_buf_b; ++ struct pl011_sgbuf sgbuf_a; ++ struct pl011_sgbuf sgbuf_b; ++ dma_cookie_t cookie; ++ bool running; ++ struct timer_list timer; ++ unsigned int last_residue; ++ unsigned long last_jiffies; ++ bool auto_poll_rate; ++ unsigned int poll_rate; ++ unsigned int poll_timeout; ++}; ++ ++struct pl011_dmatx_data { ++ struct dma_chan *chan; ++ struct scatterlist sg; ++ char *buf; ++ bool queued; ++}; ++ ++/* ++ * We wrap our port structure around the generic uart_port. ++ */ ++struct uart_amba_port { ++ struct uart_port port; ++ const u16 *reg_offset; ++ struct clk *clk; ++ const struct vendor_data *vendor; ++ unsigned int dmacr; /* dma control reg */ ++ unsigned int im; /* interrupt mask */ ++ unsigned int old_status; ++ unsigned int fifosize; /* vendor-specific */ ++ unsigned int old_cr; /* state during shutdown */ ++ unsigned int fixed_baud; /* vendor-set fixed baud rate */ ++ char type[12]; ++#ifdef CONFIG_DMA_ENGINE ++ /* DMA stuff */ ++ bool using_tx_dma; ++ bool using_rx_dma; ++ struct pl011_dmarx_data dmarx; ++ struct pl011_dmatx_data dmatx; ++ bool dma_probed; ++#endif ++}; ++ ++static unsigned int pl011_reg_to_offset(const struct uart_amba_port *uap, ++ unsigned int reg) ++{ ++ return uap->reg_offset[reg]; ++} ++ ++static unsigned int pl011_read(const struct uart_amba_port *uap, ++ unsigned int reg) ++{ ++ void __iomem *addr = uap->port.membase + pl011_reg_to_offset(uap, reg); ++ ++ return (uap->port.iotype == UPIO_MEM32) ? ++ readl_relaxed(addr) : readw_relaxed(addr); ++} ++ ++static void pl011_write(unsigned int val, const struct uart_amba_port *uap, ++ unsigned int reg) ++{ ++ void __iomem *addr = uap->port.membase + pl011_reg_to_offset(uap, reg); ++ ++ if (uap->port.iotype == UPIO_MEM32) ++ writel_relaxed(val, addr); ++ else ++ writew_relaxed(val, addr); ++} ++ ++/* ++ * Reads up to 256 characters from the FIFO or until it's empty and ++ * inserts them into the TTY layer. Returns the number of characters ++ * read from the FIFO. ++ */ ++static int pl011_fifo_to_tty(struct uart_amba_port *uap) ++{ ++ u16 status; ++ unsigned int ch, flag, fifotaken; ++ ++ for (fifotaken = 0; fifotaken != 256; fifotaken++) { ++ status = pl011_read(uap, REG_FR); ++ if (status & UART01x_FR_RXFE) ++ break; ++ ++ /* Take chars from the FIFO and update status */ ++ ch = pl011_read(uap, REG_DR) | UART_DUMMY_DR_RX; ++ flag = TTY_NORMAL; ++ uap->port.icount.rx++; ++ ++ if (unlikely(ch & UART_DR_ERROR)) { ++ if (ch & UART011_DR_BE) { ++ ch &= ~(UART011_DR_FE | UART011_DR_PE); ++ uap->port.icount.brk++; ++ if (uart_handle_break(&uap->port)) ++ continue; ++ } else if (ch & UART011_DR_PE) ++ uap->port.icount.parity++; ++ else if (ch & UART011_DR_FE) ++ uap->port.icount.frame++; ++ if (ch & UART011_DR_OE) ++ uap->port.icount.overrun++; ++ ++ ch &= uap->port.read_status_mask; ++ ++ if (ch & UART011_DR_BE) ++ flag = TTY_BREAK; ++ else if (ch & UART011_DR_PE) ++ flag = TTY_PARITY; ++ else if (ch & UART011_DR_FE) ++ flag = TTY_FRAME; ++ } ++ ++ if (uart_handle_sysrq_char(&uap->port, ch & 255)) ++ continue; ++ ++ uart_insert_char(&uap->port, ch, UART011_DR_OE, ch, flag); ++ } ++ ++ return fifotaken; ++} ++ ++ ++/* ++ * All the DMA operation mode stuff goes inside this ifdef. ++ * This assumes that you have a generic DMA device interface, ++ * no custom DMA interfaces are supported. ++ */ ++#ifdef CONFIG_DMA_ENGINE ++ ++#define PL011_DMA_BUFFER_SIZE PAGE_SIZE ++ ++static int pl011_sgbuf_init(struct dma_chan *chan, struct pl011_sgbuf *sg, ++ enum dma_data_direction dir) ++{ ++ dma_addr_t dma_addr; ++ ++ sg->buf = dma_alloc_coherent(chan->device->dev, ++ PL011_DMA_BUFFER_SIZE, &dma_addr, GFP_KERNEL); ++ if (!sg->buf) ++ return -ENOMEM; ++ ++ sg_init_table(&sg->sg, 1); ++ sg_set_page(&sg->sg, phys_to_page(dma_addr), ++ PL011_DMA_BUFFER_SIZE, offset_in_page(dma_addr)); ++ sg_dma_address(&sg->sg) = dma_addr; ++ sg_dma_len(&sg->sg) = PL011_DMA_BUFFER_SIZE; ++ ++ return 0; ++} ++ ++static void pl011_sgbuf_free(struct dma_chan *chan, struct pl011_sgbuf *sg, ++ enum dma_data_direction dir) ++{ ++ if (sg->buf) { ++ dma_free_coherent(chan->device->dev, ++ PL011_DMA_BUFFER_SIZE, sg->buf, ++ sg_dma_address(&sg->sg)); ++ } ++} ++ ++static void pl011_dma_probe(struct uart_amba_port *uap) ++{ ++ /* DMA is the sole user of the platform data right now */ ++ struct amba_pl011_data *plat = dev_get_platdata(uap->port.dev); ++ struct device *dev = uap->port.dev; ++ struct dma_slave_config tx_conf = { ++ .dst_addr = uap->port.mapbase + ++ pl011_reg_to_offset(uap, REG_DR), ++ .dst_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE, ++ .direction = DMA_MEM_TO_DEV, ++ .dst_maxburst = uap->fifosize >> 1, ++ .device_fc = false, ++ }; ++ struct dma_chan *chan; ++ dma_cap_mask_t mask; ++ ++ uap->dma_probed = true; ++ chan = dma_request_slave_channel_reason(dev, "tx"); ++ if (IS_ERR(chan)) { ++ if (PTR_ERR(chan) == -EPROBE_DEFER) { ++ uap->dma_probed = false; ++ return; ++ } ++ ++ /* We need platform data */ ++ if (!plat || !plat->dma_filter) { ++ dev_info(uap->port.dev, "no DMA platform data\n"); ++ return; ++ } ++ ++ /* Try to acquire a generic DMA engine slave TX channel */ ++ dma_cap_zero(mask); ++ dma_cap_set(DMA_SLAVE, mask); ++ ++ chan = dma_request_channel(mask, plat->dma_filter, ++ plat->dma_tx_param); ++ if (!chan) { ++ dev_err(uap->port.dev, "no TX DMA channel!\n"); ++ return; ++ } ++ } ++ ++ dmaengine_slave_config(chan, &tx_conf); ++ uap->dmatx.chan = chan; ++ ++ dev_info(uap->port.dev, "DMA channel TX %s\n", ++ dma_chan_name(uap->dmatx.chan)); ++ ++ /* Optionally make use of an RX channel as well */ ++ chan = dma_request_slave_channel(dev, "rx"); ++ ++ if (!chan && plat && plat->dma_rx_param) { ++ chan = dma_request_channel(mask, plat->dma_filter, plat->dma_rx_param); ++ ++ if (!chan) { ++ dev_err(uap->port.dev, "no RX DMA channel!\n"); ++ return; ++ } ++ } ++ ++ if (chan) { ++ struct dma_slave_config rx_conf = { ++ .src_addr = uap->port.mapbase + ++ pl011_reg_to_offset(uap, REG_DR), ++ .src_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE, ++ .direction = DMA_DEV_TO_MEM, ++ .src_maxburst = uap->fifosize >> 2, ++ .device_fc = false, ++ }; ++ struct dma_slave_caps caps; ++ ++ /* ++ * Some DMA controllers provide information on their capabilities. ++ * If the controller does, check for suitable residue processing ++ * otherwise assime all is well. ++ */ ++ if (0 == dma_get_slave_caps(chan, &caps)) { ++ if (caps.residue_granularity == ++ DMA_RESIDUE_GRANULARITY_DESCRIPTOR) { ++ dma_release_channel(chan); ++ dev_info(uap->port.dev, ++ "RX DMA disabled - no residue processing\n"); ++ return; ++ } ++ } ++ dmaengine_slave_config(chan, &rx_conf); ++ uap->dmarx.chan = chan; ++ ++ uap->dmarx.auto_poll_rate = false; ++ if (plat && plat->dma_rx_poll_enable) { ++ /* Set poll rate if specified. */ ++ if (plat->dma_rx_poll_rate) { ++ uap->dmarx.auto_poll_rate = false; ++ uap->dmarx.poll_rate = plat->dma_rx_poll_rate; ++ } else { ++ /* ++ * 100 ms defaults to poll rate if not ++ * specified. This will be adjusted with ++ * the baud rate at set_termios. ++ */ ++ uap->dmarx.auto_poll_rate = true; ++ uap->dmarx.poll_rate = 100; ++ } ++ /* 3 secs defaults poll_timeout if not specified. */ ++ if (plat->dma_rx_poll_timeout) ++ uap->dmarx.poll_timeout = ++ plat->dma_rx_poll_timeout; ++ else ++ uap->dmarx.poll_timeout = 3000; ++ } else if (!plat && dev->of_node) { ++ uap->dmarx.auto_poll_rate = of_property_read_bool( ++ dev->of_node, "auto-poll"); ++ if (uap->dmarx.auto_poll_rate) { ++ u32 x; ++ ++ if (0 == of_property_read_u32(dev->of_node, ++ "poll-rate-ms", &x)) ++ uap->dmarx.poll_rate = x; ++ else ++ uap->dmarx.poll_rate = 100; ++ if (0 == of_property_read_u32(dev->of_node, ++ "poll-timeout-ms", &x)) ++ uap->dmarx.poll_timeout = x; ++ else ++ uap->dmarx.poll_timeout = 3000; ++ } ++ } ++ dev_info(uap->port.dev, "DMA channel RX %s\n", ++ dma_chan_name(uap->dmarx.chan)); ++ } ++} ++ ++static void pl011_dma_remove(struct uart_amba_port *uap) ++{ ++ if (uap->dmatx.chan) ++ dma_release_channel(uap->dmatx.chan); ++ if (uap->dmarx.chan) ++ dma_release_channel(uap->dmarx.chan); ++} ++ ++/* Forward declare these for the refill routine */ ++static int pl011_dma_tx_refill(struct uart_amba_port *uap); ++static void pl011_start_tx_pio(struct uart_amba_port *uap); ++ ++/* ++ * The current DMA TX buffer has been sent. ++ * Try to queue up another DMA buffer. ++ */ ++static void pl011_dma_tx_callback(void *data) ++{ ++ struct uart_amba_port *uap = data; ++ struct pl011_dmatx_data *dmatx = &uap->dmatx; ++ unsigned long flags; ++ u16 dmacr; ++ ++ spin_lock_irqsave(&uap->port.lock, flags); ++ if (uap->dmatx.queued) ++ dma_unmap_sg(dmatx->chan->device->dev, &dmatx->sg, 1, ++ DMA_TO_DEVICE); ++ ++ dmacr = uap->dmacr; ++ uap->dmacr = dmacr & ~UART011_TXDMAE; ++ pl011_write(uap->dmacr, uap, REG_DMACR); ++ ++ /* ++ * If TX DMA was disabled, it means that we've stopped the DMA for ++ * some reason (eg, XOFF received, or we want to send an X-char.) ++ * ++ * Note: we need to be careful here of a potential race between DMA ++ * and the rest of the driver - if the driver disables TX DMA while ++ * a TX buffer completing, we must update the tx queued status to ++ * get further refills (hence we check dmacr). ++ */ ++ if (!(dmacr & UART011_TXDMAE) || uart_tx_stopped(&uap->port) || ++ uart_circ_empty(&uap->port.state->xmit)) { ++ uap->dmatx.queued = false; ++ spin_unlock_irqrestore(&uap->port.lock, flags); ++ return; ++ } ++ ++ if (pl011_dma_tx_refill(uap) <= 0) ++ /* ++ * We didn't queue a DMA buffer for some reason, but we ++ * have data pending to be sent. Re-enable the TX IRQ. ++ */ ++ pl011_start_tx_pio(uap); ++ ++ spin_unlock_irqrestore(&uap->port.lock, flags); ++} ++ ++/* ++ * Try to refill the TX DMA buffer. ++ * Locking: called with port lock held and IRQs disabled. ++ * Returns: ++ * 1 if we queued up a TX DMA buffer. ++ * 0 if we didn't want to handle this by DMA ++ * <0 on error ++ */ ++static int pl011_dma_tx_refill(struct uart_amba_port *uap) ++{ ++ struct pl011_dmatx_data *dmatx = &uap->dmatx; ++ struct dma_chan *chan = dmatx->chan; ++ struct dma_device *dma_dev = chan->device; ++ struct dma_async_tx_descriptor *desc; ++ struct circ_buf *xmit = &uap->port.state->xmit; ++ unsigned int count; ++ ++ /* ++ * Try to avoid the overhead involved in using DMA if the ++ * transaction fits in the first half of the FIFO, by using ++ * the standard interrupt handling. This ensures that we ++ * issue a uart_write_wakeup() at the appropriate time. ++ */ ++ count = uart_circ_chars_pending(xmit); ++ if (count < (uap->fifosize >> 1)) { ++ uap->dmatx.queued = false; ++ return 0; ++ } ++ ++ /* ++ * Bodge: don't send the last character by DMA, as this ++ * will prevent XON from notifying us to restart DMA. ++ */ ++ count -= 1; ++ ++ /* Else proceed to copy the TX chars to the DMA buffer and fire DMA */ ++ if (count > PL011_DMA_BUFFER_SIZE) ++ count = PL011_DMA_BUFFER_SIZE; ++ ++ if (xmit->tail < xmit->head) ++ memcpy(&dmatx->buf[0], &xmit->buf[xmit->tail], count); ++ else { ++ size_t first = UART_XMIT_SIZE - xmit->tail; ++ size_t second; ++ ++ if (first > count) ++ first = count; ++ second = count - first; ++ ++ memcpy(&dmatx->buf[0], &xmit->buf[xmit->tail], first); ++ if (second) ++ memcpy(&dmatx->buf[first], &xmit->buf[0], second); ++ } ++ ++ dmatx->sg.length = count; ++ ++ if (dma_map_sg(dma_dev->dev, &dmatx->sg, 1, DMA_TO_DEVICE) != 1) { ++ uap->dmatx.queued = false; ++ dev_dbg(uap->port.dev, "unable to map TX DMA\n"); ++ return -EBUSY; ++ } ++ ++ desc = dmaengine_prep_slave_sg(chan, &dmatx->sg, 1, DMA_MEM_TO_DEV, ++ DMA_PREP_INTERRUPT | DMA_CTRL_ACK); ++ if (!desc) { ++ dma_unmap_sg(dma_dev->dev, &dmatx->sg, 1, DMA_TO_DEVICE); ++ uap->dmatx.queued = false; ++ /* ++ * If DMA cannot be used right now, we complete this ++ * transaction via IRQ and let the TTY layer retry. ++ */ ++ dev_dbg(uap->port.dev, "TX DMA busy\n"); ++ return -EBUSY; ++ } ++ ++ /* Some data to go along to the callback */ ++ desc->callback = pl011_dma_tx_callback; ++ desc->callback_param = uap; ++ ++ /* All errors should happen at prepare time */ ++ dmaengine_submit(desc); ++ ++ /* Fire the DMA transaction */ ++ dma_dev->device_issue_pending(chan); ++ ++ uap->dmacr |= UART011_TXDMAE; ++ pl011_write(uap->dmacr, uap, REG_DMACR); ++ uap->dmatx.queued = true; ++ ++ /* ++ * Now we know that DMA will fire, so advance the ring buffer ++ * with the stuff we just dispatched. ++ */ ++ xmit->tail = (xmit->tail + count) & (UART_XMIT_SIZE - 1); ++ uap->port.icount.tx += count; ++ ++ if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) ++ uart_write_wakeup(&uap->port); ++ ++ return 1; ++} ++ ++/* ++ * We received a transmit interrupt without a pending X-char but with ++ * pending characters. ++ * Locking: called with port lock held and IRQs disabled. ++ * Returns: ++ * false if we want to use PIO to transmit ++ * true if we queued a DMA buffer ++ */ ++static bool pl011_dma_tx_irq(struct uart_amba_port *uap) ++{ ++ if (!uap->using_tx_dma) ++ return false; ++ ++ /* ++ * If we already have a TX buffer queued, but received a ++ * TX interrupt, it will be because we've just sent an X-char. ++ * Ensure the TX DMA is enabled and the TX IRQ is disabled. ++ */ ++ if (uap->dmatx.queued) { ++ uap->dmacr |= UART011_TXDMAE; ++ pl011_write(uap->dmacr, uap, REG_DMACR); ++ uap->im &= ~UART011_TXIM; ++ pl011_write(uap->im, uap, REG_IMSC); ++ return true; ++ } ++ ++ /* ++ * We don't have a TX buffer queued, so try to queue one. ++ * If we successfully queued a buffer, mask the TX IRQ. ++ */ ++ if (pl011_dma_tx_refill(uap) > 0) { ++ uap->im &= ~UART011_TXIM; ++ pl011_write(uap->im, uap, REG_IMSC); ++ return true; ++ } ++ return false; ++} ++ ++/* ++ * Stop the DMA transmit (eg, due to received XOFF). ++ * Locking: called with port lock held and IRQs disabled. ++ */ ++static inline void pl011_dma_tx_stop(struct uart_amba_port *uap) ++{ ++ if (uap->dmatx.queued) { ++ uap->dmacr &= ~UART011_TXDMAE; ++ pl011_write(uap->dmacr, uap, REG_DMACR); ++ } ++} ++ ++/* ++ * Try to start a DMA transmit, or in the case of an XON/OFF ++ * character queued for send, try to get that character out ASAP. ++ * Locking: called with port lock held and IRQs disabled. ++ * Returns: ++ * false if we want the TX IRQ to be enabled ++ * true if we have a buffer queued ++ */ ++static inline bool pl011_dma_tx_start(struct uart_amba_port *uap) ++{ ++ u16 dmacr; ++ ++ if (!uap->using_tx_dma) ++ return false; ++ ++ if (!uap->port.x_char) { ++ /* no X-char, try to push chars out in DMA mode */ ++ bool ret = true; ++ ++ if (!uap->dmatx.queued) { ++ if (pl011_dma_tx_refill(uap) > 0) { ++ uap->im &= ~UART011_TXIM; ++ pl011_write(uap->im, uap, REG_IMSC); ++ } else ++ ret = false; ++ } else if (!(uap->dmacr & UART011_TXDMAE)) { ++ uap->dmacr |= UART011_TXDMAE; ++ pl011_write(uap->dmacr, uap, REG_DMACR); ++ } ++ return ret; ++ } ++ ++ /* ++ * We have an X-char to send. Disable DMA to prevent it loading ++ * the TX fifo, and then see if we can stuff it into the FIFO. ++ */ ++ dmacr = uap->dmacr; ++ uap->dmacr &= ~UART011_TXDMAE; ++ pl011_write(uap->dmacr, uap, REG_DMACR); ++ ++ if (pl011_read(uap, REG_FR) & UART01x_FR_TXFF) { ++ /* ++ * No space in the FIFO, so enable the transmit interrupt ++ * so we know when there is space. Note that once we've ++ * loaded the character, we should just re-enable DMA. ++ */ ++ return false; ++ } ++ ++ pl011_write(uap->port.x_char, uap, REG_DR); ++ uap->port.icount.tx++; ++ uap->port.x_char = 0; ++ ++ /* Success - restore the DMA state */ ++ uap->dmacr = dmacr; ++ pl011_write(dmacr, uap, REG_DMACR); ++ ++ return true; ++} ++ ++/* ++ * Flush the transmit buffer. ++ * Locking: called with port lock held and IRQs disabled. ++ */ ++static void pl011_dma_flush_buffer(struct uart_port *port) ++__releases(&uap->port.lock) ++__acquires(&uap->port.lock) ++{ ++ struct uart_amba_port *uap = ++ container_of(port, struct uart_amba_port, port); ++ ++ if (!uap->using_tx_dma) ++ return; ++ ++ dmaengine_terminate_async(uap->dmatx.chan); ++ ++ if (uap->dmatx.queued) { ++ dma_unmap_sg(uap->dmatx.chan->device->dev, &uap->dmatx.sg, 1, ++ DMA_TO_DEVICE); ++ uap->dmatx.queued = false; ++ uap->dmacr &= ~UART011_TXDMAE; ++ pl011_write(uap->dmacr, uap, REG_DMACR); ++ } ++} ++ ++static void pl011_dma_rx_callback(void *data); ++ ++static int pl011_dma_rx_trigger_dma(struct uart_amba_port *uap) ++{ ++ struct dma_chan *rxchan = uap->dmarx.chan; ++ struct pl011_dmarx_data *dmarx = &uap->dmarx; ++ struct dma_async_tx_descriptor *desc; ++ struct pl011_sgbuf *sgbuf; ++ ++ if (!rxchan) ++ return -EIO; ++ ++ /* Start the RX DMA job */ ++ sgbuf = uap->dmarx.use_buf_b ? ++ &uap->dmarx.sgbuf_b : &uap->dmarx.sgbuf_a; ++ desc = dmaengine_prep_slave_sg(rxchan, &sgbuf->sg, 1, ++ DMA_DEV_TO_MEM, ++ DMA_PREP_INTERRUPT | DMA_CTRL_ACK); ++ /* ++ * If the DMA engine is busy and cannot prepare a ++ * channel, no big deal, the driver will fall back ++ * to interrupt mode as a result of this error code. ++ */ ++ if (!desc) { ++ uap->dmarx.running = false; ++ dmaengine_terminate_all(rxchan); ++ return -EBUSY; ++ } ++ ++ /* Some data to go along to the callback */ ++ desc->callback = pl011_dma_rx_callback; ++ desc->callback_param = uap; ++ dmarx->cookie = dmaengine_submit(desc); ++ dma_async_issue_pending(rxchan); ++ ++ uap->dmacr |= UART011_RXDMAE; ++ pl011_write(uap->dmacr, uap, REG_DMACR); ++ uap->dmarx.running = true; ++ ++ uap->im &= ~UART011_RXIM; ++ pl011_write(uap->im, uap, REG_IMSC); ++ ++ return 0; ++} ++ ++/* ++ * This is called when either the DMA job is complete, or ++ * the FIFO timeout interrupt occurred. This must be called ++ * with the port spinlock uap->port.lock held. ++ */ ++static void pl011_dma_rx_chars(struct uart_amba_port *uap, ++ u32 pending, bool use_buf_b, ++ bool readfifo) ++{ ++ struct tty_port *port = &uap->port.state->port; ++ struct pl011_sgbuf *sgbuf = use_buf_b ? ++ &uap->dmarx.sgbuf_b : &uap->dmarx.sgbuf_a; ++ int dma_count = 0; ++ u32 fifotaken = 0; /* only used for vdbg() */ ++ ++ struct pl011_dmarx_data *dmarx = &uap->dmarx; ++ int dmataken = 0; ++ ++ if (uap->dmarx.poll_rate) { ++ /* The data can be taken by polling */ ++ dmataken = sgbuf->sg.length - dmarx->last_residue; ++ /* Recalculate the pending size */ ++ if (pending >= dmataken) ++ pending -= dmataken; ++ } ++ ++ /* Pick the remain data from the DMA */ ++ if (pending) { ++ ++ /* ++ * First take all chars in the DMA pipe, then look in the FIFO. ++ * Note that tty_insert_flip_buf() tries to take as many chars ++ * as it can. ++ */ ++ dma_count = tty_insert_flip_string(port, sgbuf->buf + dmataken, ++ pending); ++ ++ uap->port.icount.rx += dma_count; ++ if (dma_count < pending) ++ dev_warn(uap->port.dev, ++ "couldn't insert all characters (TTY is full?)\n"); ++ } ++ ++ /* Reset the last_residue for Rx DMA poll */ ++ if (uap->dmarx.poll_rate) ++ dmarx->last_residue = sgbuf->sg.length; ++ ++ /* ++ * Only continue with trying to read the FIFO if all DMA chars have ++ * been taken first. ++ */ ++ if (dma_count == pending && readfifo) { ++ /* Clear any error flags */ ++ pl011_write(UART011_OEIS | UART011_BEIS | UART011_PEIS | ++ UART011_FEIS, uap, REG_ICR); ++ ++ /* ++ * If we read all the DMA'd characters, and we had an ++ * incomplete buffer, that could be due to an rx error, or ++ * maybe we just timed out. Read any pending chars and check ++ * the error status. ++ * ++ * Error conditions will only occur in the FIFO, these will ++ * trigger an immediate interrupt and stop the DMA job, so we ++ * will always find the error in the FIFO, never in the DMA ++ * buffer. ++ */ ++ fifotaken = pl011_fifo_to_tty(uap); ++ } ++ ++ spin_unlock(&uap->port.lock); ++ dev_vdbg(uap->port.dev, ++ "Took %d chars from DMA buffer and %d chars from the FIFO\n", ++ dma_count, fifotaken); ++ tty_flip_buffer_push(port); ++ spin_lock(&uap->port.lock); ++} ++ ++static void pl011_dma_rx_irq(struct uart_amba_port *uap) ++{ ++ struct pl011_dmarx_data *dmarx = &uap->dmarx; ++ struct dma_chan *rxchan = dmarx->chan; ++ struct pl011_sgbuf *sgbuf = dmarx->use_buf_b ? ++ &dmarx->sgbuf_b : &dmarx->sgbuf_a; ++ size_t pending; ++ struct dma_tx_state state; ++ enum dma_status dmastat; ++ ++ /* ++ * Pause the transfer so we can trust the current counter, ++ * do this before we pause the PL011 block, else we may ++ * overflow the FIFO. ++ */ ++ if (dmaengine_pause(rxchan)) ++ dev_err(uap->port.dev, "unable to pause DMA transfer\n"); ++ dmastat = rxchan->device->device_tx_status(rxchan, ++ dmarx->cookie, &state); ++ if (dmastat != DMA_PAUSED) ++ dev_err(uap->port.dev, "unable to pause DMA transfer\n"); ++ ++ /* Disable RX DMA - incoming data will wait in the FIFO */ ++ uap->dmacr &= ~UART011_RXDMAE; ++ pl011_write(uap->dmacr, uap, REG_DMACR); ++ uap->dmarx.running = false; ++ ++ pending = sgbuf->sg.length - state.residue; ++ BUG_ON(pending > PL011_DMA_BUFFER_SIZE); ++ /* Then we terminate the transfer - we now know our residue */ ++ dmaengine_terminate_all(rxchan); ++ ++ /* ++ * This will take the chars we have so far and insert ++ * into the framework. ++ */ ++ pl011_dma_rx_chars(uap, pending, dmarx->use_buf_b, true); ++ ++ /* Switch buffer & re-trigger DMA job */ ++ dmarx->use_buf_b = !dmarx->use_buf_b; ++ if (pl011_dma_rx_trigger_dma(uap)) { ++ dev_dbg(uap->port.dev, "could not retrigger RX DMA job " ++ "fall back to interrupt mode\n"); ++ uap->im |= UART011_RXIM; ++ pl011_write(uap->im, uap, REG_IMSC); ++ } ++} ++ ++static void pl011_dma_rx_callback(void *data) ++{ ++ struct uart_amba_port *uap = data; ++ struct pl011_dmarx_data *dmarx = &uap->dmarx; ++ struct dma_chan *rxchan = dmarx->chan; ++ bool lastbuf = dmarx->use_buf_b; ++ struct pl011_sgbuf *sgbuf = dmarx->use_buf_b ? ++ &dmarx->sgbuf_b : &dmarx->sgbuf_a; ++ size_t pending; ++ struct dma_tx_state state; ++ int ret; ++ ++ /* ++ * This completion interrupt occurs typically when the ++ * RX buffer is totally stuffed but no timeout has yet ++ * occurred. When that happens, we just want the RX ++ * routine to flush out the secondary DMA buffer while ++ * we immediately trigger the next DMA job. ++ */ ++ spin_lock_irq(&uap->port.lock); ++ /* ++ * Rx data can be taken by the UART interrupts during ++ * the DMA irq handler. So we check the residue here. ++ */ ++ rxchan->device->device_tx_status(rxchan, dmarx->cookie, &state); ++ pending = sgbuf->sg.length - state.residue; ++ BUG_ON(pending > PL011_DMA_BUFFER_SIZE); ++ /* Then we terminate the transfer - we now know our residue */ ++ dmaengine_terminate_all(rxchan); ++ ++ uap->dmarx.running = false; ++ dmarx->use_buf_b = !lastbuf; ++ ret = pl011_dma_rx_trigger_dma(uap); ++ ++ pl011_dma_rx_chars(uap, pending, lastbuf, false); ++ spin_unlock_irq(&uap->port.lock); ++ /* ++ * Do this check after we picked the DMA chars so we don't ++ * get some IRQ immediately from RX. ++ */ ++ if (ret) { ++ dev_dbg(uap->port.dev, "could not retrigger RX DMA job " ++ "fall back to interrupt mode\n"); ++ uap->im |= UART011_RXIM; ++ pl011_write(uap->im, uap, REG_IMSC); ++ } ++} ++ ++/* ++ * Stop accepting received characters, when we're shutting down or ++ * suspending this port. ++ * Locking: called with port lock held and IRQs disabled. ++ */ ++static inline void pl011_dma_rx_stop(struct uart_amba_port *uap) ++{ ++ /* FIXME. Just disable the DMA enable */ ++ uap->dmacr &= ~UART011_RXDMAE; ++ pl011_write(uap->dmacr, uap, REG_DMACR); ++} ++ ++/* ++ * Timer handler for Rx DMA polling. ++ * Every polling, It checks the residue in the dma buffer and transfer ++ * data to the tty. Also, last_residue is updated for the next polling. ++ */ ++static void pl011_dma_rx_poll(struct timer_list *t) ++{ ++ struct uart_amba_port *uap = from_timer(uap, t, dmarx.timer); ++ struct tty_port *port = &uap->port.state->port; ++ struct pl011_dmarx_data *dmarx = &uap->dmarx; ++ struct dma_chan *rxchan = uap->dmarx.chan; ++ unsigned long flags = 0; ++ unsigned int dmataken = 0; ++ unsigned int size = 0; ++ struct pl011_sgbuf *sgbuf; ++ int dma_count; ++ struct dma_tx_state state; ++ ++ sgbuf = dmarx->use_buf_b ? &uap->dmarx.sgbuf_b : &uap->dmarx.sgbuf_a; ++ rxchan->device->device_tx_status(rxchan, dmarx->cookie, &state); ++ if (likely(state.residue < dmarx->last_residue)) { ++ dmataken = sgbuf->sg.length - dmarx->last_residue; ++ size = dmarx->last_residue - state.residue; ++ dma_count = tty_insert_flip_string(port, sgbuf->buf + dmataken, ++ size); ++ if (dma_count == size) ++ dmarx->last_residue = state.residue; ++ dmarx->last_jiffies = jiffies; ++ } ++ tty_flip_buffer_push(port); ++ ++ /* ++ * If no data is received in poll_timeout, the driver will fall back ++ * to interrupt mode. We will retrigger DMA at the first interrupt. ++ */ ++ if (jiffies_to_msecs(jiffies - dmarx->last_jiffies) ++ > uap->dmarx.poll_timeout) { ++ ++ spin_lock_irqsave(&uap->port.lock, flags); ++ pl011_dma_rx_stop(uap); ++ uap->im |= UART011_RXIM; ++ pl011_write(uap->im, uap, REG_IMSC); ++ spin_unlock_irqrestore(&uap->port.lock, flags); ++ ++ uap->dmarx.running = false; ++ dmaengine_terminate_all(rxchan); ++ del_timer(&uap->dmarx.timer); ++ } else { ++ mod_timer(&uap->dmarx.timer, ++ jiffies + msecs_to_jiffies(uap->dmarx.poll_rate)); ++ } ++} ++ ++static void pl011_dma_startup(struct uart_amba_port *uap) ++{ ++ int ret; ++ ++ if (!uap->dma_probed) ++ pl011_dma_probe(uap); ++ ++ if (!uap->dmatx.chan) ++ return; ++ ++ uap->dmatx.buf = kmalloc(PL011_DMA_BUFFER_SIZE, GFP_KERNEL | __GFP_DMA); ++ if (!uap->dmatx.buf) { ++ dev_err(uap->port.dev, "no memory for DMA TX buffer\n"); ++ uap->port.fifosize = uap->fifosize; ++ return; ++ } ++ ++ sg_init_one(&uap->dmatx.sg, uap->dmatx.buf, PL011_DMA_BUFFER_SIZE); ++ ++ /* The DMA buffer is now the FIFO the TTY subsystem can use */ ++ uap->port.fifosize = PL011_DMA_BUFFER_SIZE; ++ uap->using_tx_dma = true; ++ ++ if (!uap->dmarx.chan) ++ goto skip_rx; ++ ++ /* Allocate and map DMA RX buffers */ ++ ret = pl011_sgbuf_init(uap->dmarx.chan, &uap->dmarx.sgbuf_a, ++ DMA_FROM_DEVICE); ++ if (ret) { ++ dev_err(uap->port.dev, "failed to init DMA %s: %d\n", ++ "RX buffer A", ret); ++ goto skip_rx; ++ } ++ ++ ret = pl011_sgbuf_init(uap->dmarx.chan, &uap->dmarx.sgbuf_b, ++ DMA_FROM_DEVICE); ++ if (ret) { ++ dev_err(uap->port.dev, "failed to init DMA %s: %d\n", ++ "RX buffer B", ret); ++ pl011_sgbuf_free(uap->dmarx.chan, &uap->dmarx.sgbuf_a, ++ DMA_FROM_DEVICE); ++ goto skip_rx; ++ } ++ ++ uap->using_rx_dma = true; ++ ++skip_rx: ++ /* Turn on DMA error (RX/TX will be enabled on demand) */ ++ uap->dmacr |= UART011_DMAONERR; ++ pl011_write(uap->dmacr, uap, REG_DMACR); ++ ++ /* ++ * ST Micro variants has some specific dma burst threshold ++ * compensation. Set this to 16 bytes, so burst will only ++ * be issued above/below 16 bytes. ++ */ ++ if (uap->vendor->dma_threshold) ++ pl011_write(ST_UART011_DMAWM_RX_16 | ST_UART011_DMAWM_TX_16, ++ uap, REG_ST_DMAWM); ++ ++ if (uap->using_rx_dma) { ++ if (pl011_dma_rx_trigger_dma(uap)) ++ dev_dbg(uap->port.dev, "could not trigger initial " ++ "RX DMA job, fall back to interrupt mode\n"); ++ if (uap->dmarx.poll_rate) { ++ timer_setup(&uap->dmarx.timer, pl011_dma_rx_poll, 0); ++ mod_timer(&uap->dmarx.timer, ++ jiffies + ++ msecs_to_jiffies(uap->dmarx.poll_rate)); ++ uap->dmarx.last_residue = PL011_DMA_BUFFER_SIZE; ++ uap->dmarx.last_jiffies = jiffies; ++ } ++ } ++} ++ ++static void pl011_dma_shutdown(struct uart_amba_port *uap) ++{ ++ if (!(uap->using_tx_dma || uap->using_rx_dma)) ++ return; ++ ++ /* Disable RX and TX DMA */ ++ while (pl011_read(uap, REG_FR) & uap->vendor->fr_busy) ++ cpu_relax(); ++ ++ spin_lock_irq(&uap->port.lock); ++ uap->dmacr &= ~(UART011_DMAONERR | UART011_RXDMAE | UART011_TXDMAE); ++ pl011_write(uap->dmacr, uap, REG_DMACR); ++ spin_unlock_irq(&uap->port.lock); ++ ++ if (uap->using_tx_dma) { ++ /* In theory, this should already be done by pl011_dma_flush_buffer */ ++ dmaengine_terminate_all(uap->dmatx.chan); ++ if (uap->dmatx.queued) { ++ dma_unmap_sg(uap->dmatx.chan->device->dev, &uap->dmatx.sg, 1, ++ DMA_TO_DEVICE); ++ uap->dmatx.queued = false; ++ } ++ ++ kfree(uap->dmatx.buf); ++ uap->using_tx_dma = false; ++ } ++ ++ if (uap->using_rx_dma) { ++ dmaengine_terminate_all(uap->dmarx.chan); ++ /* Clean up the RX DMA */ ++ pl011_sgbuf_free(uap->dmarx.chan, &uap->dmarx.sgbuf_a, DMA_FROM_DEVICE); ++ pl011_sgbuf_free(uap->dmarx.chan, &uap->dmarx.sgbuf_b, DMA_FROM_DEVICE); ++ if (uap->dmarx.poll_rate) ++ del_timer_sync(&uap->dmarx.timer); ++ uap->using_rx_dma = false; ++ } ++} ++ ++static inline bool pl011_dma_rx_available(struct uart_amba_port *uap) ++{ ++ return uap->using_rx_dma; ++} ++ ++static inline bool pl011_dma_rx_running(struct uart_amba_port *uap) ++{ ++ return uap->using_rx_dma && uap->dmarx.running; ++} ++ ++#else ++/* Blank functions if the DMA engine is not available */ ++static inline void pl011_dma_probe(struct uart_amba_port *uap) ++{ ++} ++ ++static inline void pl011_dma_remove(struct uart_amba_port *uap) ++{ ++} ++ ++static inline void pl011_dma_startup(struct uart_amba_port *uap) ++{ ++} ++ ++static inline void pl011_dma_shutdown(struct uart_amba_port *uap) ++{ ++} ++ ++static inline bool pl011_dma_tx_irq(struct uart_amba_port *uap) ++{ ++ return false; ++} ++ ++static inline void pl011_dma_tx_stop(struct uart_amba_port *uap) ++{ ++} ++ ++static inline bool pl011_dma_tx_start(struct uart_amba_port *uap) ++{ ++ return false; ++} ++ ++static inline void pl011_dma_rx_irq(struct uart_amba_port *uap) ++{ ++} ++ ++static inline void pl011_dma_rx_stop(struct uart_amba_port *uap) ++{ ++} ++ ++static inline int pl011_dma_rx_trigger_dma(struct uart_amba_port *uap) ++{ ++ return -EIO; ++} ++ ++static inline bool pl011_dma_rx_available(struct uart_amba_port *uap) ++{ ++ return false; ++} ++ ++static inline bool pl011_dma_rx_running(struct uart_amba_port *uap) ++{ ++ return false; ++} ++ ++#define pl011_dma_flush_buffer NULL ++#endif ++ ++static void pl011_stop_tx(struct uart_port *port) ++{ ++ struct uart_amba_port *uap = ++ container_of(port, struct uart_amba_port, port); ++ ++ uap->im &= ~UART011_TXIM; ++ pl011_write(uap->im, uap, REG_IMSC); ++ pl011_dma_tx_stop(uap); ++} ++ ++static bool pl011_tx_chars(struct uart_amba_port *uap, bool from_irq); ++ ++/* Start TX with programmed I/O only (no DMA) */ ++static void pl011_start_tx_pio(struct uart_amba_port *uap) ++{ ++ if (pl011_tx_chars(uap, false)) { ++ uap->im |= UART011_TXIM; ++ pl011_write(uap->im, uap, REG_IMSC); ++ } ++} ++ ++static void pl011_start_tx(struct uart_port *port) ++{ ++ struct uart_amba_port *uap = ++ container_of(port, struct uart_amba_port, port); ++ ++ if (!pl011_dma_tx_start(uap)) ++ pl011_start_tx_pio(uap); ++} ++ ++static void pl011_stop_rx(struct uart_port *port) ++{ ++ struct uart_amba_port *uap = ++ container_of(port, struct uart_amba_port, port); ++ ++ uap->im &= ~(UART011_RXIM|UART011_RTIM|UART011_FEIM| ++ UART011_PEIM|UART011_BEIM|UART011_OEIM); ++ pl011_write(uap->im, uap, REG_IMSC); ++ ++ pl011_dma_rx_stop(uap); ++} ++ ++static void pl011_enable_ms(struct uart_port *port) ++{ ++ struct uart_amba_port *uap = ++ container_of(port, struct uart_amba_port, port); ++ ++ uap->im |= UART011_RIMIM|UART011_CTSMIM|UART011_DCDMIM|UART011_DSRMIM; ++ pl011_write(uap->im, uap, REG_IMSC); ++} ++ ++static void pl011_rx_chars(struct uart_amba_port *uap) ++__releases(&uap->port.lock) ++__acquires(&uap->port.lock) ++{ ++ pl011_fifo_to_tty(uap); ++ ++ spin_unlock(&uap->port.lock); ++ tty_flip_buffer_push(&uap->port.state->port); ++ /* ++ * If we were temporarily out of DMA mode for a while, ++ * attempt to switch back to DMA mode again. ++ */ ++ if (pl011_dma_rx_available(uap)) { ++ if (pl011_dma_rx_trigger_dma(uap)) { ++ dev_dbg(uap->port.dev, "could not trigger RX DMA job " ++ "fall back to interrupt mode again\n"); ++ uap->im |= UART011_RXIM; ++ pl011_write(uap->im, uap, REG_IMSC); ++ } else { ++#ifdef CONFIG_DMA_ENGINE ++ /* Start Rx DMA poll */ ++ if (uap->dmarx.poll_rate) { ++ uap->dmarx.last_jiffies = jiffies; ++ uap->dmarx.last_residue = PL011_DMA_BUFFER_SIZE; ++ mod_timer(&uap->dmarx.timer, ++ jiffies + ++ msecs_to_jiffies(uap->dmarx.poll_rate)); ++ } ++#endif ++ } ++ } ++ spin_lock(&uap->port.lock); ++} ++ ++static bool pl011_tx_char(struct uart_amba_port *uap, unsigned char c, ++ bool from_irq) ++{ ++ if (unlikely(!from_irq) && ++ pl011_read(uap, REG_FR) & UART01x_FR_TXFF) ++ return false; /* unable to transmit character */ ++ ++ pl011_write(c, uap, REG_DR); ++ uap->port.icount.tx++; ++ ++ return true; ++} ++ ++/* Returns true if tx interrupts have to be (kept) enabled */ ++static bool pl011_tx_chars(struct uart_amba_port *uap, bool from_irq) ++{ ++ struct circ_buf *xmit = &uap->port.state->xmit; ++ int count = uap->fifosize >> 1; ++ ++ if (uap->port.x_char) { ++ if (!pl011_tx_char(uap, uap->port.x_char, from_irq)) ++ return true; ++ uap->port.x_char = 0; ++ --count; ++ } ++ if (uart_circ_empty(xmit) || uart_tx_stopped(&uap->port)) { ++ pl011_stop_tx(&uap->port); ++ return false; ++ } ++ ++ /* If we are using DMA mode, try to send some characters. */ ++ if (pl011_dma_tx_irq(uap)) ++ return true; ++ ++ do { ++ if (likely(from_irq) && count-- == 0) ++ break; ++ ++ if (!pl011_tx_char(uap, xmit->buf[xmit->tail], from_irq)) ++ break; ++ ++ xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); ++ } while (!uart_circ_empty(xmit)); ++ ++ if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) ++ uart_write_wakeup(&uap->port); ++ ++ if (uart_circ_empty(xmit)) { ++ pl011_stop_tx(&uap->port); ++ return false; ++ } ++ return true; ++} ++ ++static void pl011_modem_status(struct uart_amba_port *uap) ++{ ++ unsigned int status, delta; ++ ++ status = pl011_read(uap, REG_FR) & UART01x_FR_MODEM_ANY; ++ ++ delta = status ^ uap->old_status; ++ uap->old_status = status; ++ ++ if (!delta) ++ return; ++ ++ if (delta & UART01x_FR_DCD) ++ uart_handle_dcd_change(&uap->port, status & UART01x_FR_DCD); ++ ++ if (delta & uap->vendor->fr_dsr) ++ uap->port.icount.dsr++; ++ ++ if (delta & uap->vendor->fr_cts) ++ uart_handle_cts_change(&uap->port, ++ status & uap->vendor->fr_cts); ++ ++ wake_up_interruptible(&uap->port.state->port.delta_msr_wait); ++} ++ ++static void check_apply_cts_event_workaround(struct uart_amba_port *uap) ++{ ++ unsigned int dummy_read; ++ ++ if (!uap->vendor->cts_event_workaround) ++ return; ++ ++ /* workaround to make sure that all bits are unlocked.. */ ++ pl011_write(0x00, uap, REG_ICR); ++ ++ /* ++ * WA: introduce 26ns(1 uart clk) delay before W1C; ++ * single apb access will incur 2 pclk(133.12Mhz) delay, ++ * so add 2 dummy reads ++ */ ++ dummy_read = pl011_read(uap, REG_ICR); ++ dummy_read = pl011_read(uap, REG_ICR); ++} ++ ++#ifdef CONFIG_SERIAL_ATTACHED_MBIGEN ++struct workaround_oem_info { ++ char oem_id[ACPI_OEM_ID_SIZE + 1]; ++ char oem_table_id[ACPI_OEM_TABLE_ID_SIZE + 1]; ++ u32 oem_revision; ++}; ++ ++static bool pl011_enable_hisi_wkrd; ++static struct workaround_oem_info pl011_wkrd_info[] = { ++ { ++ .oem_id = "HISI ", ++ .oem_table_id = "HIP08 ", ++ .oem_revision = 0x300, ++ }, { ++ .oem_id = "HISI ", ++ .oem_table_id = "HIP08 ", ++ .oem_revision = 0x301, ++ }, { ++ .oem_id = "HISI ", ++ .oem_table_id = "HIP08 ", ++ .oem_revision = 0x400, ++ }, { ++ .oem_id = "HISI ", ++ .oem_table_id = "HIP08 ", ++ .oem_revision = 0x401, ++ }, { ++ .oem_id = "HISI ", ++ .oem_table_id = "HIP08 ", ++ .oem_revision = 0x402, ++ } ++}; ++ ++static void pl011_check_hisi_workaround(void) ++{ ++ struct acpi_table_header *tbl; ++ acpi_status status = AE_OK; ++ int i; ++ ++ status = acpi_get_table(ACPI_SIG_MADT, 0, &tbl); ++ if (ACPI_FAILURE(status) || !tbl) ++ return; ++ ++ for (i = 0; i < ARRAY_SIZE(pl011_wkrd_info); i++) { ++ if (!memcmp(pl011_wkrd_info[i].oem_id, tbl->oem_id, ACPI_OEM_ID_SIZE) && ++ !memcmp(pl011_wkrd_info[i].oem_table_id, tbl->oem_table_id, ACPI_OEM_TABLE_ID_SIZE) && ++ pl011_wkrd_info[i].oem_revision == tbl->oem_revision) { ++ pl011_enable_hisi_wkrd = true; ++ break; ++ } ++ } ++ ++ acpi_put_table(tbl); ++} ++ ++#else ++#define pl011_enable_hisi_wkrd 0 ++static inline void pl011_check_hisi_workaround(void){ } ++#endif ++ ++static irqreturn_t pl011_int(int irq, void *dev_id) ++{ ++ struct uart_amba_port *uap = dev_id; ++ unsigned long flags; ++ unsigned int status, pass_counter = AMBA_ISR_PASS_LIMIT; ++ int handled = 0; ++ ++ spin_lock_irqsave(&uap->port.lock, flags); ++ status = pl011_read(uap, REG_RIS) & uap->im; ++ if (status) { ++ do { ++ check_apply_cts_event_workaround(uap); ++ ++ pl011_write(status & ~(UART011_TXIS|UART011_RTIS| ++ UART011_RXIS), ++ uap, REG_ICR); ++ ++ if (status & (UART011_RTIS|UART011_RXIS)) { ++ if (pl011_dma_rx_running(uap)) ++ pl011_dma_rx_irq(uap); ++ else ++ pl011_rx_chars(uap); ++ } ++ if (status & (UART011_DSRMIS|UART011_DCDMIS| ++ UART011_CTSMIS|UART011_RIMIS)) ++ pl011_modem_status(uap); ++ if (status & UART011_TXIS) ++ pl011_tx_chars(uap, true); ++ ++ if (pass_counter-- == 0) ++ break; ++ ++ status = pl011_read(uap, REG_RIS) & uap->im; ++ } while (status != 0); ++ handled = 1; ++ } ++ ++ if (pl011_enable_hisi_wkrd) { ++ pl011_write(0, uap, REG_IMSC); ++ pl011_write(uap->im, uap, REG_IMSC); ++ } ++ ++ spin_unlock_irqrestore(&uap->port.lock, flags); ++ ++ return IRQ_RETVAL(handled); ++} ++ ++static unsigned int pl011_tx_empty(struct uart_port *port) ++{ ++ struct uart_amba_port *uap = ++ container_of(port, struct uart_amba_port, port); ++ ++ /* Allow feature register bits to be inverted to work around errata */ ++ unsigned int status = pl011_read(uap, REG_FR) ^ uap->vendor->inv_fr; ++ ++ return status & (uap->vendor->fr_busy | UART01x_FR_TXFF) ? ++ 0 : TIOCSER_TEMT; ++} ++ ++static unsigned int pl011_get_mctrl(struct uart_port *port) ++{ ++ struct uart_amba_port *uap = ++ container_of(port, struct uart_amba_port, port); ++ unsigned int result = 0; ++ unsigned int status = pl011_read(uap, REG_FR); ++ ++#define TIOCMBIT(uartbit, tiocmbit) \ ++ if (status & uartbit) \ ++ result |= tiocmbit ++ ++ TIOCMBIT(UART01x_FR_DCD, TIOCM_CAR); ++ TIOCMBIT(uap->vendor->fr_dsr, TIOCM_DSR); ++ TIOCMBIT(uap->vendor->fr_cts, TIOCM_CTS); ++ TIOCMBIT(uap->vendor->fr_ri, TIOCM_RNG); ++#undef TIOCMBIT ++ return result; ++} ++ ++static void pl011_set_mctrl(struct uart_port *port, unsigned int mctrl) ++{ ++ struct uart_amba_port *uap = ++ container_of(port, struct uart_amba_port, port); ++ unsigned int cr; ++ ++ cr = pl011_read(uap, REG_CR); ++ ++#define TIOCMBIT(tiocmbit, uartbit) \ ++ if (mctrl & tiocmbit) \ ++ cr |= uartbit; \ ++ else \ ++ cr &= ~uartbit ++ ++ TIOCMBIT(TIOCM_RTS, UART011_CR_RTS); ++ TIOCMBIT(TIOCM_DTR, UART011_CR_DTR); ++ TIOCMBIT(TIOCM_OUT1, UART011_CR_OUT1); ++ TIOCMBIT(TIOCM_OUT2, UART011_CR_OUT2); ++ TIOCMBIT(TIOCM_LOOP, UART011_CR_LBE); ++ ++ if (port->status & UPSTAT_AUTORTS) { ++ /* We need to disable auto-RTS if we want to turn RTS off */ ++ TIOCMBIT(TIOCM_RTS, UART011_CR_RTSEN); ++ } ++#undef TIOCMBIT ++ ++ pl011_write(cr, uap, REG_CR); ++} ++ ++static void pl011_break_ctl(struct uart_port *port, int break_state) ++{ ++ struct uart_amba_port *uap = ++ container_of(port, struct uart_amba_port, port); ++ unsigned long flags; ++ unsigned int lcr_h; ++ ++ spin_lock_irqsave(&uap->port.lock, flags); ++ lcr_h = pl011_read(uap, REG_LCRH_TX); ++ if (break_state == -1) ++ lcr_h |= UART01x_LCRH_BRK; ++ else ++ lcr_h &= ~UART01x_LCRH_BRK; ++ pl011_write(lcr_h, uap, REG_LCRH_TX); ++ spin_unlock_irqrestore(&uap->port.lock, flags); ++} ++ ++#ifdef CONFIG_CONSOLE_POLL ++ ++static void pl011_quiesce_irqs(struct uart_port *port) ++{ ++ struct uart_amba_port *uap = ++ container_of(port, struct uart_amba_port, port); ++ ++ pl011_write(pl011_read(uap, REG_MIS), uap, REG_ICR); ++ /* ++ * There is no way to clear TXIM as this is "ready to transmit IRQ", so ++ * we simply mask it. start_tx() will unmask it. ++ * ++ * Note we can race with start_tx(), and if the race happens, the ++ * polling user might get another interrupt just after we clear it. ++ * But it should be OK and can happen even w/o the race, e.g. ++ * controller immediately got some new data and raised the IRQ. ++ * ++ * And whoever uses polling routines assumes that it manages the device ++ * (including tx queue), so we're also fine with start_tx()'s caller ++ * side. ++ */ ++ pl011_write(pl011_read(uap, REG_IMSC) & ~UART011_TXIM, uap, ++ REG_IMSC); ++} ++ ++static int pl011_get_poll_char(struct uart_port *port) ++{ ++ struct uart_amba_port *uap = ++ container_of(port, struct uart_amba_port, port); ++ unsigned int status; ++ ++ /* ++ * The caller might need IRQs lowered, e.g. if used with KDB NMI ++ * debugger. ++ */ ++ pl011_quiesce_irqs(port); ++ ++ status = pl011_read(uap, REG_FR); ++ if (status & UART01x_FR_RXFE) ++ return NO_POLL_CHAR; ++ ++ return pl011_read(uap, REG_DR); ++} ++ ++static void pl011_put_poll_char(struct uart_port *port, ++ unsigned char ch) ++{ ++ struct uart_amba_port *uap = ++ container_of(port, struct uart_amba_port, port); ++ ++ while (pl011_read(uap, REG_FR) & UART01x_FR_TXFF) ++ cpu_relax(); ++ ++ pl011_write(ch, uap, REG_DR); ++} ++ ++#endif /* CONFIG_CONSOLE_POLL */ ++ ++static int pl011_hwinit(struct uart_port *port) ++{ ++ struct uart_amba_port *uap = ++ container_of(port, struct uart_amba_port, port); ++ int retval; ++ ++ /* Optionaly enable pins to be muxed in and configured */ ++ pinctrl_pm_select_default_state(port->dev); ++ ++ /* ++ * Try to enable the clock producer. ++ */ ++ retval = clk_prepare_enable(uap->clk); ++ if (retval) ++ return retval; ++ ++ uap->port.uartclk = clk_get_rate(uap->clk); ++ ++ /* Clear pending error and receive interrupts */ ++ pl011_write(UART011_OEIS | UART011_BEIS | UART011_PEIS | ++ UART011_FEIS | UART011_RTIS | UART011_RXIS, ++ uap, REG_ICR); ++ ++ /* ++ * Save interrupts enable mask, and enable RX interrupts in case if ++ * the interrupt is used for NMI entry. ++ */ ++ uap->im = pl011_read(uap, REG_IMSC); ++ pl011_write(UART011_RTIM | UART011_RXIM, uap, REG_IMSC); ++ ++ if (dev_get_platdata(uap->port.dev)) { ++ struct amba_pl011_data *plat; ++ ++ plat = dev_get_platdata(uap->port.dev); ++ if (plat->init) ++ plat->init(); ++ } ++ ++ pl011_check_hisi_workaround(); ++ return 0; ++} ++ ++static bool pl011_split_lcrh(const struct uart_amba_port *uap) ++{ ++ return pl011_reg_to_offset(uap, REG_LCRH_RX) != ++ pl011_reg_to_offset(uap, REG_LCRH_TX); ++} ++ ++static void pl011_write_lcr_h(struct uart_amba_port *uap, unsigned int lcr_h) ++{ ++ pl011_write(lcr_h, uap, REG_LCRH_RX); ++ if (pl011_split_lcrh(uap)) { ++ int i; ++ /* ++ * Wait 10 PCLKs before writing LCRH_TX register, ++ * to get this delay write read only register 10 times ++ */ ++ for (i = 0; i < 10; ++i) ++ pl011_write(0xff, uap, REG_MIS); ++ pl011_write(lcr_h, uap, REG_LCRH_TX); ++ } ++} ++ ++static int pl011_allocate_irq(struct uart_amba_port *uap) ++{ ++ pl011_write(uap->im, uap, REG_IMSC); ++ ++ return request_irq(uap->port.irq, pl011_int, 0, "uart-pl011", uap); ++} ++ ++/* ++ * Enable interrupts, only timeouts when using DMA ++ * if initial RX DMA job failed, start in interrupt mode ++ * as well. ++ */ ++static void pl011_enable_interrupts(struct uart_amba_port *uap) ++{ ++ unsigned int i; ++ ++ spin_lock_irq(&uap->port.lock); ++ ++ /* Clear out any spuriously appearing RX interrupts */ ++ pl011_write(UART011_RTIS | UART011_RXIS, uap, REG_ICR); ++ ++ /* ++ * RXIS is asserted only when the RX FIFO transitions from below ++ * to above the trigger threshold. If the RX FIFO is already ++ * full to the threshold this can't happen and RXIS will now be ++ * stuck off. Drain the RX FIFO explicitly to fix this: ++ */ ++ for (i = 0; i < uap->fifosize * 2; ++i) { ++ if (pl011_read(uap, REG_FR) & UART01x_FR_RXFE) ++ break; ++ ++ pl011_read(uap, REG_DR); ++ } ++ ++ uap->im = UART011_RTIM; ++ if (!pl011_dma_rx_running(uap)) ++ uap->im |= UART011_RXIM; ++ pl011_write(uap->im, uap, REG_IMSC); ++ spin_unlock_irq(&uap->port.lock); ++} ++ ++static int pl011_startup(struct uart_port *port) ++{ ++ struct uart_amba_port *uap = ++ container_of(port, struct uart_amba_port, port); ++ unsigned int cr; ++ int retval; ++ ++ retval = pl011_hwinit(port); ++ if (retval) ++ goto clk_dis; ++ ++ retval = pl011_allocate_irq(uap); ++ if (retval) ++ goto clk_dis; ++ ++ pl011_write(uap->vendor->ifls, uap, REG_IFLS); ++ ++ spin_lock_irq(&uap->port.lock); ++ ++ /* restore RTS and DTR */ ++ cr = uap->old_cr & (UART011_CR_RTS | UART011_CR_DTR); ++ cr |= UART01x_CR_UARTEN | UART011_CR_RXE | UART011_CR_TXE; ++ pl011_write(cr, uap, REG_CR); ++ ++ spin_unlock_irq(&uap->port.lock); ++ ++ /* ++ * initialise the old status of the modem signals ++ */ ++ uap->old_status = pl011_read(uap, REG_FR) & UART01x_FR_MODEM_ANY; ++ ++ /* Startup DMA */ ++ pl011_dma_startup(uap); ++ ++ pl011_enable_interrupts(uap); ++ ++ return 0; ++ ++ clk_dis: ++ clk_disable_unprepare(uap->clk); ++ return retval; ++} ++ ++static int sbsa_uart_startup(struct uart_port *port) ++{ ++ struct uart_amba_port *uap = ++ container_of(port, struct uart_amba_port, port); ++ int retval; ++ ++ retval = pl011_hwinit(port); ++ if (retval) ++ return retval; ++ ++ retval = pl011_allocate_irq(uap); ++ if (retval) ++ return retval; ++ ++ /* The SBSA UART does not support any modem status lines. */ ++ uap->old_status = 0; ++ ++ pl011_enable_interrupts(uap); ++ ++ return 0; ++} ++ ++static void pl011_shutdown_channel(struct uart_amba_port *uap, ++ unsigned int lcrh) ++{ ++ unsigned long val; ++ ++ val = pl011_read(uap, lcrh); ++ val &= ~(UART01x_LCRH_BRK | UART01x_LCRH_FEN); ++ pl011_write(val, uap, lcrh); ++} ++ ++/* ++ * disable the port. It should not disable RTS and DTR. ++ * Also RTS and DTR state should be preserved to restore ++ * it during startup(). ++ */ ++static void pl011_disable_uart(struct uart_amba_port *uap) ++{ ++ unsigned int cr; ++ ++ uap->port.status &= ~(UPSTAT_AUTOCTS | UPSTAT_AUTORTS); ++ spin_lock_irq(&uap->port.lock); ++ cr = pl011_read(uap, REG_CR); ++ uap->old_cr = cr; ++ cr &= UART011_CR_RTS | UART011_CR_DTR; ++ cr |= UART01x_CR_UARTEN | UART011_CR_TXE; ++ pl011_write(cr, uap, REG_CR); ++ spin_unlock_irq(&uap->port.lock); ++ ++ /* ++ * disable break condition and fifos ++ */ ++ pl011_shutdown_channel(uap, REG_LCRH_RX); ++ if (pl011_split_lcrh(uap)) ++ pl011_shutdown_channel(uap, REG_LCRH_TX); ++} ++ ++static void pl011_disable_interrupts(struct uart_amba_port *uap) ++{ ++ spin_lock_irq(&uap->port.lock); ++ ++ /* mask all interrupts and clear all pending ones */ ++ uap->im = 0; ++ pl011_write(uap->im, uap, REG_IMSC); ++ pl011_write(0xffff, uap, REG_ICR); ++ ++ spin_unlock_irq(&uap->port.lock); ++} ++ ++static void pl011_shutdown(struct uart_port *port) ++{ ++ struct uart_amba_port *uap = ++ container_of(port, struct uart_amba_port, port); ++ ++ pl011_disable_interrupts(uap); ++ ++ pl011_dma_shutdown(uap); ++ ++ free_irq(uap->port.irq, uap); ++ ++ pl011_disable_uart(uap); ++ ++ /* ++ * Shut down the clock producer ++ */ ++ clk_disable_unprepare(uap->clk); ++ /* Optionally let pins go into sleep states */ ++ pinctrl_pm_select_sleep_state(port->dev); ++ ++ if (dev_get_platdata(uap->port.dev)) { ++ struct amba_pl011_data *plat; ++ ++ plat = dev_get_platdata(uap->port.dev); ++ if (plat->exit) ++ plat->exit(); ++ } ++ ++ if (uap->port.ops->flush_buffer) ++ uap->port.ops->flush_buffer(port); ++} ++ ++static void sbsa_uart_shutdown(struct uart_port *port) ++{ ++ struct uart_amba_port *uap = ++ container_of(port, struct uart_amba_port, port); ++ ++ pl011_disable_interrupts(uap); ++ ++ free_irq(uap->port.irq, uap); ++ ++ if (uap->port.ops->flush_buffer) ++ uap->port.ops->flush_buffer(port); ++} ++ ++static void ++pl011_setup_status_masks(struct uart_port *port, struct ktermios *termios) ++{ ++ port->read_status_mask = UART011_DR_OE | 255; ++ if (termios->c_iflag & INPCK) ++ port->read_status_mask |= UART011_DR_FE | UART011_DR_PE; ++ if (termios->c_iflag & (IGNBRK | BRKINT | PARMRK)) ++ port->read_status_mask |= UART011_DR_BE; ++ ++ /* ++ * Characters to ignore ++ */ ++ port->ignore_status_mask = 0; ++ if (termios->c_iflag & IGNPAR) ++ port->ignore_status_mask |= UART011_DR_FE | UART011_DR_PE; ++ if (termios->c_iflag & IGNBRK) { ++ port->ignore_status_mask |= UART011_DR_BE; ++ /* ++ * If we're ignoring parity and break indicators, ++ * ignore overruns too (for real raw support). ++ */ ++ if (termios->c_iflag & IGNPAR) ++ port->ignore_status_mask |= UART011_DR_OE; ++ } ++ ++ /* ++ * Ignore all characters if CREAD is not set. ++ */ ++ if ((termios->c_cflag & CREAD) == 0) ++ port->ignore_status_mask |= UART_DUMMY_DR_RX; ++} ++ ++static void ++pl011_set_termios(struct uart_port *port, struct ktermios *termios, ++ struct ktermios *old) ++{ ++ struct uart_amba_port *uap = ++ container_of(port, struct uart_amba_port, port); ++ unsigned int lcr_h, old_cr; ++ unsigned long flags; ++ unsigned int baud, quot, clkdiv; ++ ++ if (uap->vendor->oversampling) ++ clkdiv = 8; ++ else ++ clkdiv = 16; ++ ++ /* ++ * Ask the core to calculate the divisor for us. ++ */ ++ baud = uart_get_baud_rate(port, termios, old, 0, ++ port->uartclk / clkdiv); ++#ifdef CONFIG_DMA_ENGINE ++ /* ++ * Adjust RX DMA polling rate with baud rate if not specified. ++ */ ++ if (uap->dmarx.auto_poll_rate) ++ uap->dmarx.poll_rate = DIV_ROUND_UP(10000000, baud); ++#endif ++ ++ if (baud > port->uartclk/16) ++ quot = DIV_ROUND_CLOSEST(port->uartclk * 8, baud); ++ else ++ quot = DIV_ROUND_CLOSEST(port->uartclk * 4, baud); ++ ++ switch (termios->c_cflag & CSIZE) { ++ case CS5: ++ lcr_h = UART01x_LCRH_WLEN_5; ++ break; ++ case CS6: ++ lcr_h = UART01x_LCRH_WLEN_6; ++ break; ++ case CS7: ++ lcr_h = UART01x_LCRH_WLEN_7; ++ break; ++ default: // CS8 ++ lcr_h = UART01x_LCRH_WLEN_8; ++ break; ++ } ++ if (termios->c_cflag & CSTOPB) ++ lcr_h |= UART01x_LCRH_STP2; ++ if (termios->c_cflag & PARENB) { ++ lcr_h |= UART01x_LCRH_PEN; ++ if (!(termios->c_cflag & PARODD)) ++ lcr_h |= UART01x_LCRH_EPS; ++ if (termios->c_cflag & CMSPAR) ++ lcr_h |= UART011_LCRH_SPS; ++ } ++ if (uap->fifosize > 1) ++ lcr_h |= UART01x_LCRH_FEN; ++ ++ spin_lock_irqsave(&port->lock, flags); ++ ++ /* ++ * Update the per-port timeout. ++ */ ++ uart_update_timeout(port, termios->c_cflag, baud); ++ ++ pl011_setup_status_masks(port, termios); ++ ++ if (UART_ENABLE_MS(port, termios->c_cflag)) ++ pl011_enable_ms(port); ++ ++ /* first, disable everything */ ++ old_cr = pl011_read(uap, REG_CR); ++ pl011_write(0, uap, REG_CR); ++ ++ if (termios->c_cflag & CRTSCTS) { ++ if (old_cr & UART011_CR_RTS) ++ old_cr |= UART011_CR_RTSEN; ++ ++ old_cr |= UART011_CR_CTSEN; ++ port->status |= UPSTAT_AUTOCTS | UPSTAT_AUTORTS; ++ } else { ++ old_cr &= ~(UART011_CR_CTSEN | UART011_CR_RTSEN); ++ port->status &= ~(UPSTAT_AUTOCTS | UPSTAT_AUTORTS); ++ } ++ ++ if (uap->vendor->oversampling) { ++ if (baud > port->uartclk / 16) ++ old_cr |= ST_UART011_CR_OVSFACT; ++ else ++ old_cr &= ~ST_UART011_CR_OVSFACT; ++ } ++ ++ /* ++ * Workaround for the ST Micro oversampling variants to ++ * increase the bitrate slightly, by lowering the divisor, ++ * to avoid delayed sampling of start bit at high speeds, ++ * else we see data corruption. ++ */ ++ if (uap->vendor->oversampling) { ++ if ((baud >= 3000000) && (baud < 3250000) && (quot > 1)) ++ quot -= 1; ++ else if ((baud > 3250000) && (quot > 2)) ++ quot -= 2; ++ } ++ /* Set baud rate */ ++ pl011_write(quot & 0x3f, uap, REG_FBRD); ++ pl011_write(quot >> 6, uap, REG_IBRD); ++ ++ /* ++ * ----------v----------v----------v----------v----- ++ * NOTE: REG_LCRH_TX and REG_LCRH_RX MUST BE WRITTEN AFTER ++ * REG_FBRD & REG_IBRD. ++ * ----------^----------^----------^----------^----- ++ */ ++ pl011_write_lcr_h(uap, lcr_h); ++ pl011_write(old_cr, uap, REG_CR); ++ ++ spin_unlock_irqrestore(&port->lock, flags); ++} ++ ++static void ++sbsa_uart_set_termios(struct uart_port *port, struct ktermios *termios, ++ struct ktermios *old) ++{ ++ struct uart_amba_port *uap = ++ container_of(port, struct uart_amba_port, port); ++ unsigned long flags; ++ ++ tty_termios_encode_baud_rate(termios, uap->fixed_baud, uap->fixed_baud); ++ ++ /* The SBSA UART only supports 8n1 without hardware flow control. */ ++ termios->c_cflag &= ~(CSIZE | CSTOPB | PARENB | PARODD); ++ termios->c_cflag &= ~(CMSPAR | CRTSCTS); ++ termios->c_cflag |= CS8 | CLOCAL; ++ ++ spin_lock_irqsave(&port->lock, flags); ++ uart_update_timeout(port, CS8, uap->fixed_baud); ++ pl011_setup_status_masks(port, termios); ++ spin_unlock_irqrestore(&port->lock, flags); ++} ++ ++static const char *pl011_type(struct uart_port *port) ++{ ++ struct uart_amba_port *uap = ++ container_of(port, struct uart_amba_port, port); ++ return uap->port.type == PORT_AMBA ? uap->type : NULL; ++} ++ ++/* ++ * Release the memory region(s) being used by 'port' ++ */ ++static void pl011_release_port(struct uart_port *port) ++{ ++ release_mem_region(port->mapbase, SZ_4K); ++} ++ ++/* ++ * Request the memory region(s) being used by 'port' ++ */ ++static int pl011_request_port(struct uart_port *port) ++{ ++ return request_mem_region(port->mapbase, SZ_4K, "uart-pl011") ++ != NULL ? 0 : -EBUSY; ++} ++ ++/* ++ * Configure/autoconfigure the port. ++ */ ++static void pl011_config_port(struct uart_port *port, int flags) ++{ ++ if (flags & UART_CONFIG_TYPE) { ++ port->type = PORT_AMBA; ++ pl011_request_port(port); ++ } ++} ++ ++/* ++ * verify the new serial_struct (for TIOCSSERIAL). ++ */ ++static int pl011_verify_port(struct uart_port *port, struct serial_struct *ser) ++{ ++ int ret = 0; ++ if (ser->type != PORT_UNKNOWN && ser->type != PORT_AMBA) ++ ret = -EINVAL; ++ if (ser->irq < 0 || ser->irq >= nr_irqs) ++ ret = -EINVAL; ++ if (ser->baud_base < 9600) ++ ret = -EINVAL; ++ return ret; ++} ++ ++static const struct uart_ops amba_pl011_pops = { ++ .tx_empty = pl011_tx_empty, ++ .set_mctrl = pl011_set_mctrl, ++ .get_mctrl = pl011_get_mctrl, ++ .stop_tx = pl011_stop_tx, ++ .start_tx = pl011_start_tx, ++ .stop_rx = pl011_stop_rx, ++ .enable_ms = pl011_enable_ms, ++ .break_ctl = pl011_break_ctl, ++ .startup = pl011_startup, ++ .shutdown = pl011_shutdown, ++ .flush_buffer = pl011_dma_flush_buffer, ++ .set_termios = pl011_set_termios, ++ .type = pl011_type, ++ .release_port = pl011_release_port, ++ .request_port = pl011_request_port, ++ .config_port = pl011_config_port, ++ .verify_port = pl011_verify_port, ++#ifdef CONFIG_CONSOLE_POLL ++ .poll_init = pl011_hwinit, ++ .poll_get_char = pl011_get_poll_char, ++ .poll_put_char = pl011_put_poll_char, ++#endif ++}; ++ ++static void sbsa_uart_set_mctrl(struct uart_port *port, unsigned int mctrl) ++{ ++} ++ ++static unsigned int sbsa_uart_get_mctrl(struct uart_port *port) ++{ ++ return 0; ++} ++ ++static const struct uart_ops sbsa_uart_pops = { ++ .tx_empty = pl011_tx_empty, ++ .set_mctrl = sbsa_uart_set_mctrl, ++ .get_mctrl = sbsa_uart_get_mctrl, ++ .stop_tx = pl011_stop_tx, ++ .start_tx = pl011_start_tx, ++ .stop_rx = pl011_stop_rx, ++ .startup = sbsa_uart_startup, ++ .shutdown = sbsa_uart_shutdown, ++ .set_termios = sbsa_uart_set_termios, ++ .type = pl011_type, ++ .release_port = pl011_release_port, ++ .request_port = pl011_request_port, ++ .config_port = pl011_config_port, ++ .verify_port = pl011_verify_port, ++#ifdef CONFIG_CONSOLE_POLL ++ .poll_init = pl011_hwinit, ++ .poll_get_char = pl011_get_poll_char, ++ .poll_put_char = pl011_put_poll_char, ++#endif ++}; ++ ++static struct uart_amba_port *amba_ports[UART_NR]; ++ ++#ifdef CONFIG_SERIAL_AMBA_PL011_CONSOLE ++ ++static void pl011_console_putchar(struct uart_port *port, int ch) ++{ ++ struct uart_amba_port *uap = ++ container_of(port, struct uart_amba_port, port); ++ ++ while (pl011_read(uap, REG_FR) & UART01x_FR_TXFF) ++ cpu_relax(); ++ pl011_write(ch, uap, REG_DR); ++} ++ ++static void ++pl011_console_write(struct console *co, const char *s, unsigned int count) ++{ ++ struct uart_amba_port *uap = amba_ports[co->index]; ++ unsigned int old_cr = 0, new_cr; ++ unsigned long flags; ++ int locked = 1; ++ ++ clk_enable(uap->clk); ++ ++ local_irq_save(flags); ++ if (uap->port.sysrq) ++ locked = 0; ++ else if (oops_in_progress) ++ locked = spin_trylock(&uap->port.lock); ++ else ++ spin_lock(&uap->port.lock); ++ ++ /* ++ * First save the CR then disable the interrupts ++ */ ++ if (!uap->vendor->always_enabled) { ++ old_cr = pl011_read(uap, REG_CR); ++ new_cr = old_cr & ~UART011_CR_CTSEN; ++ new_cr |= UART01x_CR_UARTEN | UART011_CR_TXE; ++ pl011_write(new_cr, uap, REG_CR); ++ } ++ ++ uart_console_write(&uap->port, s, count, pl011_console_putchar); ++ ++ /* ++ * Finally, wait for transmitter to become empty and restore the ++ * TCR. Allow feature register bits to be inverted to work around ++ * errata. ++ */ ++ while ((pl011_read(uap, REG_FR) ^ uap->vendor->inv_fr) ++ & uap->vendor->fr_busy) ++ cpu_relax(); ++ if (!uap->vendor->always_enabled) ++ pl011_write(old_cr, uap, REG_CR); ++ ++ if (locked) ++ spin_unlock(&uap->port.lock); ++ local_irq_restore(flags); ++ ++ clk_disable(uap->clk); ++} ++ ++static void pl011_console_get_options(struct uart_amba_port *uap, int *baud, ++ int *parity, int *bits) ++{ ++ if (pl011_read(uap, REG_CR) & UART01x_CR_UARTEN) { ++ unsigned int lcr_h, ibrd, fbrd; ++ ++ lcr_h = pl011_read(uap, REG_LCRH_TX); ++ ++ *parity = 'n'; ++ if (lcr_h & UART01x_LCRH_PEN) { ++ if (lcr_h & UART01x_LCRH_EPS) ++ *parity = 'e'; ++ else ++ *parity = 'o'; ++ } ++ ++ if ((lcr_h & 0x60) == UART01x_LCRH_WLEN_7) ++ *bits = 7; ++ else ++ *bits = 8; ++ ++ ibrd = pl011_read(uap, REG_IBRD); ++ fbrd = pl011_read(uap, REG_FBRD); ++ ++ *baud = uap->port.uartclk * 4 / (64 * ibrd + fbrd); ++ ++ if (uap->vendor->oversampling) { ++ if (pl011_read(uap, REG_CR) ++ & ST_UART011_CR_OVSFACT) ++ *baud *= 2; ++ } ++ } ++} ++ ++static int pl011_console_setup(struct console *co, char *options) ++{ ++ struct uart_amba_port *uap; ++ int baud = 38400; ++ int bits = 8; ++ int parity = 'n'; ++ int flow = 'n'; ++ int ret; ++ ++ /* ++ * Check whether an invalid uart number has been specified, and ++ * if so, search for the first available port that does have ++ * console support. ++ */ ++ if (co->index >= UART_NR) ++ co->index = 0; ++ uap = amba_ports[co->index]; ++ if (!uap) ++ return -ENODEV; ++ ++ /* Allow pins to be muxed in and configured */ ++ pinctrl_pm_select_default_state(uap->port.dev); ++ ++ ret = clk_prepare(uap->clk); ++ if (ret) ++ return ret; ++ ++ if (dev_get_platdata(uap->port.dev)) { ++ struct amba_pl011_data *plat; ++ ++ plat = dev_get_platdata(uap->port.dev); ++ if (plat->init) ++ plat->init(); ++ } ++ ++ uap->port.uartclk = clk_get_rate(uap->clk); ++ ++ if (uap->vendor->fixed_options) { ++ baud = uap->fixed_baud; ++ } else { ++ if (options) ++ uart_parse_options(options, ++ &baud, &parity, &bits, &flow); ++ else ++ pl011_console_get_options(uap, &baud, &parity, &bits); ++ } ++ ++ return uart_set_options(&uap->port, co, baud, parity, bits, flow); ++} ++ ++/** ++ * pl011_console_match - non-standard console matching ++ * @co: registering console ++ * @name: name from console command line ++ * @idx: index from console command line ++ * @options: ptr to option string from console command line ++ * ++ * Only attempts to match console command lines of the form: ++ * console=pl011,mmio|mmio32,[,] ++ * console=pl011,0x[,] ++ * This form is used to register an initial earlycon boot console and ++ * replace it with the amba_console at pl011 driver init. ++ * ++ * Performs console setup for a match (as required by interface) ++ * If no are specified, then assume the h/w is already setup. ++ * ++ * Returns 0 if console matches; otherwise non-zero to use default matching ++ */ ++static int pl011_console_match(struct console *co, char *name, int idx, ++ char *options) ++{ ++ unsigned char iotype; ++ resource_size_t addr; ++ int i; ++ ++ /* ++ * Systems affected by the Qualcomm Technologies QDF2400 E44 erratum ++ * have a distinct console name, so make sure we check for that. ++ * The actual implementation of the erratum occurs in the probe ++ * function. ++ */ ++ if ((strcmp(name, "qdf2400_e44") != 0) && (strcmp(name, "pl011") != 0)) ++ return -ENODEV; ++ ++ if (uart_parse_earlycon(options, &iotype, &addr, &options)) ++ return -ENODEV; ++ ++ if (iotype != UPIO_MEM && iotype != UPIO_MEM32) ++ return -ENODEV; ++ ++ /* try to match the port specified on the command line */ ++ for (i = 0; i < ARRAY_SIZE(amba_ports); i++) { ++ struct uart_port *port; ++ ++ if (!amba_ports[i]) ++ continue; ++ ++ port = &amba_ports[i]->port; ++ ++ if (port->mapbase != addr) ++ continue; ++ ++ co->index = i; ++ port->cons = co; ++ return pl011_console_setup(co, options); ++ } ++ ++ return -ENODEV; ++} ++ ++static struct uart_driver amba_reg; ++static struct console amba_console = { ++ .name = "ttyAMA", ++ .write = pl011_console_write, ++ .device = uart_console_device, ++ .setup = pl011_console_setup, ++ .match = pl011_console_match, ++ .flags = CON_PRINTBUFFER | CON_ANYTIME, ++ .index = -1, ++ .data = &amba_reg, ++}; ++ ++#define AMBA_CONSOLE (&amba_console) ++ ++static void qdf2400_e44_putc(struct uart_port *port, int c) ++{ ++ while (readl(port->membase + UART01x_FR) & UART01x_FR_TXFF) ++ cpu_relax(); ++ writel(c, port->membase + UART01x_DR); ++ while (!(readl(port->membase + UART01x_FR) & UART011_FR_TXFE)) ++ cpu_relax(); ++} ++ ++static void qdf2400_e44_early_write(struct console *con, const char *s, unsigned n) ++{ ++ struct earlycon_device *dev = con->data; ++ ++ uart_console_write(&dev->port, s, n, qdf2400_e44_putc); ++} ++ ++static void pl011_putc(struct uart_port *port, int c) ++{ ++ while (readl(port->membase + UART01x_FR) & UART01x_FR_TXFF) ++ cpu_relax(); ++ if (port->iotype == UPIO_MEM32) ++ writel(c, port->membase + UART01x_DR); ++ else ++ writeb(c, port->membase + UART01x_DR); ++ while (readl(port->membase + UART01x_FR) & UART01x_FR_BUSY) ++ cpu_relax(); ++} ++ ++static void pl011_early_write(struct console *con, const char *s, unsigned n) ++{ ++ struct earlycon_device *dev = con->data; ++ ++ uart_console_write(&dev->port, s, n, pl011_putc); ++} ++ ++/* ++ * On non-ACPI systems, earlycon is enabled by specifying ++ * "earlycon=pl011,
" on the kernel command line. ++ * ++ * On ACPI ARM64 systems, an "early" console is enabled via the SPCR table, ++ * by specifying only "earlycon" on the command line. Because it requires ++ * SPCR, the console starts after ACPI is parsed, which is later than a ++ * traditional early console. ++ * ++ * To get the traditional early console that starts before ACPI is parsed, ++ * specify the full "earlycon=pl011,
" option. ++ */ ++static int __init pl011_early_console_setup(struct earlycon_device *device, ++ const char *opt) ++{ ++ if (!device->port.membase) ++ return -ENODEV; ++ ++ device->con->write = pl011_early_write; ++ ++ return 0; ++} ++OF_EARLYCON_DECLARE(pl011, "arm,pl011", pl011_early_console_setup); ++OF_EARLYCON_DECLARE(pl011, "arm,sbsa-uart", pl011_early_console_setup); ++ ++/* ++ * On Qualcomm Datacenter Technologies QDF2400 SOCs affected by ++ * Erratum 44, traditional earlycon can be enabled by specifying ++ * "earlycon=qdf2400_e44,
". Any options are ignored. ++ * ++ * Alternatively, you can just specify "earlycon", and the early console ++ * will be enabled with the information from the SPCR table. In this ++ * case, the SPCR code will detect the need for the E44 work-around, ++ * and set the console name to "qdf2400_e44". ++ */ ++static int __init ++qdf2400_e44_early_console_setup(struct earlycon_device *device, ++ const char *opt) ++{ ++ if (!device->port.membase) ++ return -ENODEV; ++ ++ device->con->write = qdf2400_e44_early_write; ++ return 0; ++} ++EARLYCON_DECLARE(qdf2400_e44, qdf2400_e44_early_console_setup); ++ ++#else ++#define AMBA_CONSOLE NULL ++#endif ++ ++static struct uart_driver amba_reg = { ++ .owner = THIS_MODULE, ++ .driver_name = "ttyAMA", ++ .dev_name = "ttyAMA", ++ .major = SERIAL_AMBA_MAJOR, ++ .minor = SERIAL_AMBA_MINOR, ++ .nr = UART_NR, ++ .cons = AMBA_CONSOLE, ++}; ++ ++static int pl011_probe_dt_alias(int index, struct device *dev) ++{ ++ struct device_node *np; ++ static bool seen_dev_with_alias = false; ++ static bool seen_dev_without_alias = false; ++ int ret = index; ++ ++ if (!IS_ENABLED(CONFIG_OF)) ++ return ret; ++ ++ np = dev->of_node; ++ if (!np) ++ return ret; ++ ++ ret = of_alias_get_id(np, "serial"); ++ if (ret < 0) { ++ seen_dev_without_alias = true; ++ ret = index; ++ } else { ++ seen_dev_with_alias = true; ++ if (ret >= ARRAY_SIZE(amba_ports) || amba_ports[ret] != NULL) { ++ dev_warn(dev, "requested serial port %d not available.\n", ret); ++ ret = index; ++ } ++ } ++ ++ if (seen_dev_with_alias && seen_dev_without_alias) ++ dev_warn(dev, "aliased and non-aliased serial devices found in device tree. Serial port enumeration may be unpredictable.\n"); ++ ++ return ret; ++} ++ ++/* unregisters the driver also if no more ports are left */ ++static void pl011_unregister_port(struct uart_amba_port *uap) ++{ ++ int i; ++ bool busy = false; ++ ++ for (i = 0; i < ARRAY_SIZE(amba_ports); i++) { ++ if (amba_ports[i] == uap) ++ amba_ports[i] = NULL; ++ else if (amba_ports[i]) ++ busy = true; ++ } ++ pl011_dma_remove(uap); ++ if (!busy) ++ uart_unregister_driver(&amba_reg); ++} ++ ++static int pl011_find_free_port(void) ++{ ++ int i; ++ ++ for (i = 0; i < ARRAY_SIZE(amba_ports); i++) ++ if (amba_ports[i] == NULL) ++ return i; ++ ++ return -EBUSY; ++} ++ ++static int pl011_setup_port(struct device *dev, struct uart_amba_port *uap, ++ struct resource *mmiobase, int index) ++{ ++ void __iomem *base; ++ ++ base = devm_ioremap_resource(dev, mmiobase); ++ if (IS_ERR(base)) ++ return PTR_ERR(base); ++ ++ index = pl011_probe_dt_alias(index, dev); ++ ++ uap->old_cr = 0; ++ uap->port.dev = dev; ++ uap->port.mapbase = mmiobase->start; ++ uap->port.membase = base; ++ uap->port.fifosize = uap->fifosize; ++ uap->port.flags = UPF_BOOT_AUTOCONF; ++ uap->port.line = index; ++ spin_lock_init(&uap->port.lock); ++ ++ amba_ports[index] = uap; ++ ++ return 0; ++} ++ ++static int pl011_register_port(struct uart_amba_port *uap) ++{ ++ int ret, i; ++ ++ /* Ensure interrupts from this UART are masked and cleared */ ++ pl011_write(0, uap, REG_IMSC); ++ pl011_write(0xffff, uap, REG_ICR); ++ ++ if (!amba_reg.state) { ++ ret = uart_register_driver(&amba_reg); ++ if (ret < 0) { ++ dev_err(uap->port.dev, ++ "Failed to register AMBA-PL011 driver\n"); ++ for (i = 0; i < ARRAY_SIZE(amba_ports); i++) ++ if (amba_ports[i] == uap) ++ amba_ports[i] = NULL; ++ return ret; ++ } ++ } ++ ++ ret = uart_add_one_port(&amba_reg, &uap->port); ++ if (ret) ++ pl011_unregister_port(uap); ++ ++ return ret; ++} ++ ++static int pl011_probe(struct amba_device *dev, const struct amba_id *id) ++{ ++ struct uart_amba_port *uap; ++ struct vendor_data *vendor = id->data; ++ int portnr, ret; ++ ++ portnr = pl011_find_free_port(); ++ if (portnr < 0) ++ return portnr; ++ ++ uap = devm_kzalloc(&dev->dev, sizeof(struct uart_amba_port), ++ GFP_KERNEL); ++ if (!uap) ++ return -ENOMEM; ++ ++ uap->clk = devm_clk_get(&dev->dev, NULL); ++ if (IS_ERR(uap->clk)) ++ return PTR_ERR(uap->clk); ++ ++ uap->reg_offset = vendor->reg_offset; ++ uap->vendor = vendor; ++ uap->fifosize = vendor->get_fifosize(dev); ++ uap->port.iotype = vendor->access_32b ? UPIO_MEM32 : UPIO_MEM; ++ uap->port.irq = dev->irq[0]; ++ uap->port.ops = &amba_pl011_pops; ++ ++ snprintf(uap->type, sizeof(uap->type), "PL011 rev%u", amba_rev(dev)); ++ ++ ret = pl011_setup_port(&dev->dev, uap, &dev->res, portnr); ++ if (ret) ++ return ret; ++ ++ amba_set_drvdata(dev, uap); ++ ++ return pl011_register_port(uap); ++} ++ ++static int pl011_remove(struct amba_device *dev) ++{ ++ struct uart_amba_port *uap = amba_get_drvdata(dev); ++ ++ uart_remove_one_port(&amba_reg, &uap->port); ++ pl011_unregister_port(uap); ++ return 0; ++} ++ ++#ifdef CONFIG_PM_SLEEP ++static int pl011_suspend(struct device *dev) ++{ ++ struct uart_amba_port *uap = dev_get_drvdata(dev); ++ ++ if (!uap) ++ return -EINVAL; ++ ++ return uart_suspend_port(&amba_reg, &uap->port); ++} ++ ++static int pl011_resume(struct device *dev) ++{ ++ struct uart_amba_port *uap = dev_get_drvdata(dev); ++ ++ if (!uap) ++ return -EINVAL; ++ ++ return uart_resume_port(&amba_reg, &uap->port); ++} ++#endif ++ ++static SIMPLE_DEV_PM_OPS(pl011_dev_pm_ops, pl011_suspend, pl011_resume); ++ ++static int sbsa_uart_probe(struct platform_device *pdev) ++{ ++ struct uart_amba_port *uap; ++ struct resource *r; ++ int portnr, ret; ++ int baudrate; ++ ++ /* ++ * Check the mandatory baud rate parameter in the DT node early ++ * so that we can easily exit with the error. ++ */ ++ if (pdev->dev.of_node) { ++ struct device_node *np = pdev->dev.of_node; ++ ++ ret = of_property_read_u32(np, "current-speed", &baudrate); ++ if (ret) ++ return ret; ++ } else { ++ baudrate = 115200; ++ } ++ ++ portnr = pl011_find_free_port(); ++ if (portnr < 0) ++ return portnr; ++ ++ uap = devm_kzalloc(&pdev->dev, sizeof(struct uart_amba_port), ++ GFP_KERNEL); ++ if (!uap) ++ return -ENOMEM; ++ ++ ret = platform_get_irq(pdev, 0); ++ if (ret < 0) { ++ if (ret != -EPROBE_DEFER) ++ dev_err(&pdev->dev, "cannot obtain irq\n"); ++ return ret; ++ } ++ uap->port.irq = ret; ++ ++#ifdef CONFIG_ACPI_SPCR_TABLE ++ if (qdf2400_e44_present) { ++ dev_info(&pdev->dev, "working around QDF2400 SoC erratum 44\n"); ++ uap->vendor = &vendor_qdt_qdf2400_e44; ++ } else ++#endif ++ uap->vendor = &vendor_sbsa; ++ ++ uap->reg_offset = uap->vendor->reg_offset; ++ uap->fifosize = 32; ++ uap->port.iotype = uap->vendor->access_32b ? UPIO_MEM32 : UPIO_MEM; ++ uap->port.ops = &sbsa_uart_pops; ++ uap->fixed_baud = baudrate; ++ ++ snprintf(uap->type, sizeof(uap->type), "SBSA"); ++ ++ r = platform_get_resource(pdev, IORESOURCE_MEM, 0); ++ ++ ret = pl011_setup_port(&pdev->dev, uap, r, portnr); ++ if (ret) ++ return ret; ++ ++ platform_set_drvdata(pdev, uap); ++ ++ return pl011_register_port(uap); ++} ++ ++static int sbsa_uart_remove(struct platform_device *pdev) ++{ ++ struct uart_amba_port *uap = platform_get_drvdata(pdev); ++ ++ uart_remove_one_port(&amba_reg, &uap->port); ++ pl011_unregister_port(uap); ++ return 0; ++} ++ ++static const struct of_device_id sbsa_uart_of_match[] = { ++ { .compatible = "arm,sbsa-uart", }, ++ {}, ++}; ++MODULE_DEVICE_TABLE(of, sbsa_uart_of_match); ++ ++static const struct acpi_device_id sbsa_uart_acpi_match[] = { ++ { "ARMH0011", 0 }, ++ {}, ++}; ++MODULE_DEVICE_TABLE(acpi, sbsa_uart_acpi_match); ++ ++static struct platform_driver arm_sbsa_uart_platform_driver = { ++ .probe = sbsa_uart_probe, ++ .remove = sbsa_uart_remove, ++ .driver = { ++ .name = "sbsa-uart", ++ .of_match_table = of_match_ptr(sbsa_uart_of_match), ++ .acpi_match_table = ACPI_PTR(sbsa_uart_acpi_match), ++ .suppress_bind_attrs = IS_BUILTIN(CONFIG_SERIAL_AMBA_PL011), ++ }, ++}; ++ ++static const struct amba_id pl011_ids[] = { ++ { ++ .id = 0x00041011, ++ .mask = 0x000fffff, ++ .data = &vendor_arm, ++ }, ++ { ++ .id = 0x00380802, ++ .mask = 0x00ffffff, ++ .data = &vendor_st, ++ }, ++ { ++ .id = AMBA_LINUX_ID(0x00, 0x1, 0xffe), ++ .mask = 0x00ffffff, ++ .data = &vendor_zte, ++ }, ++ { 0, 0 }, ++}; ++ ++MODULE_DEVICE_TABLE(amba, pl011_ids); ++ ++static struct amba_driver pl011_driver = { ++ .drv = { ++ .name = "uart-pl011", ++ .pm = &pl011_dev_pm_ops, ++ .suppress_bind_attrs = IS_BUILTIN(CONFIG_SERIAL_AMBA_PL011), ++ }, ++ .id_table = pl011_ids, ++ .probe = pl011_probe, ++ .remove = pl011_remove, ++}; ++ ++static int __init pl011_init(void) ++{ ++ printk(KERN_INFO "Serial: AMBA PL011 UART driver\n"); ++ ++ if (platform_driver_register(&arm_sbsa_uart_platform_driver)) ++ pr_warn("could not register SBSA UART platform driver\n"); ++ return amba_driver_register(&pl011_driver); ++} ++ ++static void __exit pl011_exit(void) ++{ ++ platform_driver_unregister(&arm_sbsa_uart_platform_driver); ++ amba_driver_unregister(&pl011_driver); ++} ++ ++/* ++ * While this can be a module, if builtin it's most likely the console ++ * So let's leave module_exit but move module_init to an earlier place ++ */ ++arch_initcall(pl011_init); ++module_exit(pl011_exit); ++ ++MODULE_AUTHOR("ARM Ltd/Deep Blue Solutions Ltd"); ++MODULE_DESCRIPTION("ARM AMBA serial port driver"); ++MODULE_LICENSE("GPL"); +diff -uprN kernel/drivers/tty/serial/xilinx_uartps.c kernel_new/drivers/tty/serial/xilinx_uartps.c +--- kernel/drivers/tty/serial/xilinx_uartps.c 2020-12-21 21:59:21.000000000 +0800 ++++ kernel_new/drivers/tty/serial/xilinx_uartps.c 2021-04-01 18:28:07.798863128 +0800 +@@ -1216,6 +1216,34 @@ static void cdns_uart_console_write(stru + spin_unlock_irqrestore(&port->lock, flags); + } + ++#ifdef CONFIG_RAW_PRINTK ++ ++static void cdns_uart_console_write_raw(struct console *co, const char *s, ++ unsigned int count) ++{ ++ struct uart_port *port = &cdns_uart_port[co->index]; ++ unsigned int imr, ctrl; ++ ++ imr = readl(port->membase + CDNS_UART_IMR); ++ writel(imr, port->membase + CDNS_UART_IDR); ++ ++ ctrl = readl(port->membase + CDNS_UART_CR); ++ ctrl &= ~CDNS_UART_CR_TX_DIS; ++ ctrl |= CDNS_UART_CR_TX_EN; ++ writel(ctrl, port->membase + CDNS_UART_CR); ++ ++ while (count-- > 0) { ++ if (*s == '\n') ++ writel('\r', port->membase + CDNS_UART_FIFO); ++ writel(*s++, port->membase + CDNS_UART_FIFO); ++ } ++ ++ writel(ctrl, port->membase + CDNS_UART_CR); ++ writel(imr, port->membase + CDNS_UART_IER); ++} ++ ++#endif ++ + /** + * cdns_uart_console_setup - Initialize the uart to default config + * @co: Console handle +@@ -1251,7 +1279,12 @@ static struct console cdns_uart_console + .write = cdns_uart_console_write, + .device = uart_console_device, + .setup = cdns_uart_console_setup, ++#ifdef CONFIG_RAW_PRINTK ++ .write_raw = cdns_uart_console_write_raw, ++ .flags = CON_PRINTBUFFER | CON_RAW, ++#else + .flags = CON_PRINTBUFFER, ++#endif + .index = -1, /* Specified on the cmdline (e.g. console=ttyPS ) */ + .data = &cdns_uart_uart_driver, + }; +diff -uprN kernel/fs/exec.c kernel_new/fs/exec.c +--- kernel/fs/exec.c 2020-12-21 21:59:21.000000000 +0800 ++++ kernel_new/fs/exec.c 2021-04-01 18:28:07.798863128 +0800 +@@ -49,6 +49,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -1007,6 +1008,7 @@ static int exec_mmap(struct mm_struct *m + { + struct task_struct *tsk; + struct mm_struct *old_mm, *active_mm; ++ unsigned long flags; + + /* Notify parent that we're no longer interested in the old VM */ + tsk = current; +@@ -1031,8 +1033,10 @@ static int exec_mmap(struct mm_struct *m + active_mm = tsk->active_mm; + membarrier_exec_mmap(mm); + tsk->mm = mm; ++ ipipe_mm_switch_protect(flags); + tsk->active_mm = mm; + activate_mm(active_mm, mm); ++ ipipe_mm_switch_unprotect(flags); + tsk->mm->vmacache_seqnum = 0; + vmacache_flush(tsk); + task_unlock(tsk); +diff -uprN kernel/fs/exec.c.orig kernel_new/fs/exec.c.orig +--- kernel/fs/exec.c.orig 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/fs/exec.c.orig 2020-12-21 21:59:21.000000000 +0800 +@@ -0,0 +1,2001 @@ ++/* ++ * linux/fs/exec.c ++ * ++ * Copyright (C) 1991, 1992 Linus Torvalds ++ */ ++ ++/* ++ * #!-checking implemented by tytso. ++ */ ++/* ++ * Demand-loading implemented 01.12.91 - no need to read anything but ++ * the header into memory. The inode of the executable is put into ++ * "current->executable", and page faults do the actual loading. Clean. ++ * ++ * Once more I can proudly say that linux stood up to being changed: it ++ * was less than 2 hours work to get demand-loading completely implemented. ++ * ++ * Demand loading changed July 1993 by Eric Youngdale. Use mmap instead, ++ * current->executable is only used by the procfs. This allows a dispatch ++ * table to check for several different types of binary formats. We keep ++ * trying until we recognize the file or we run out of supported binary ++ * formats. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++ ++#include ++#include "internal.h" ++ ++#include ++ ++int suid_dumpable = 0; ++ ++static LIST_HEAD(formats); ++static DEFINE_RWLOCK(binfmt_lock); ++ ++void __register_binfmt(struct linux_binfmt * fmt, int insert) ++{ ++ BUG_ON(!fmt); ++ if (WARN_ON(!fmt->load_binary)) ++ return; ++ write_lock(&binfmt_lock); ++ insert ? list_add(&fmt->lh, &formats) : ++ list_add_tail(&fmt->lh, &formats); ++ write_unlock(&binfmt_lock); ++} ++ ++EXPORT_SYMBOL(__register_binfmt); ++ ++void unregister_binfmt(struct linux_binfmt * fmt) ++{ ++ write_lock(&binfmt_lock); ++ list_del(&fmt->lh); ++ write_unlock(&binfmt_lock); ++} ++ ++EXPORT_SYMBOL(unregister_binfmt); ++ ++static inline void put_binfmt(struct linux_binfmt * fmt) ++{ ++ module_put(fmt->module); ++} ++ ++bool path_noexec(const struct path *path) ++{ ++ return (path->mnt->mnt_flags & MNT_NOEXEC) || ++ (path->mnt->mnt_sb->s_iflags & SB_I_NOEXEC); ++} ++ ++#ifdef CONFIG_USELIB ++/* ++ * Note that a shared library must be both readable and executable due to ++ * security reasons. ++ * ++ * Also note that we take the address to load from from the file itself. ++ */ ++SYSCALL_DEFINE1(uselib, const char __user *, library) ++{ ++ struct linux_binfmt *fmt; ++ struct file *file; ++ struct filename *tmp = getname(library); ++ int error = PTR_ERR(tmp); ++ static const struct open_flags uselib_flags = { ++ .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC, ++ .acc_mode = MAY_READ | MAY_EXEC, ++ .intent = LOOKUP_OPEN, ++ .lookup_flags = LOOKUP_FOLLOW, ++ }; ++ ++ if (IS_ERR(tmp)) ++ goto out; ++ ++ file = do_filp_open(AT_FDCWD, tmp, &uselib_flags); ++ putname(tmp); ++ error = PTR_ERR(file); ++ if (IS_ERR(file)) ++ goto out; ++ ++ error = -EINVAL; ++ if (!S_ISREG(file_inode(file)->i_mode)) ++ goto exit; ++ ++ error = -EACCES; ++ if (path_noexec(&file->f_path)) ++ goto exit; ++ ++ fsnotify_open(file); ++ ++ error = -ENOEXEC; ++ ++ read_lock(&binfmt_lock); ++ list_for_each_entry(fmt, &formats, lh) { ++ if (!fmt->load_shlib) ++ continue; ++ if (!try_module_get(fmt->module)) ++ continue; ++ read_unlock(&binfmt_lock); ++ error = fmt->load_shlib(file); ++ read_lock(&binfmt_lock); ++ put_binfmt(fmt); ++ if (error != -ENOEXEC) ++ break; ++ } ++ read_unlock(&binfmt_lock); ++exit: ++ fput(file); ++out: ++ return error; ++} ++#endif /* #ifdef CONFIG_USELIB */ ++ ++#ifdef CONFIG_MMU ++/* ++ * The nascent bprm->mm is not visible until exec_mmap() but it can ++ * use a lot of memory, account these pages in current->mm temporary ++ * for oom_badness()->get_mm_rss(). Once exec succeeds or fails, we ++ * change the counter back via acct_arg_size(0). ++ */ ++static void acct_arg_size(struct linux_binprm *bprm, unsigned long pages) ++{ ++ struct mm_struct *mm = current->mm; ++ long diff = (long)(pages - bprm->vma_pages); ++ ++ if (!mm || !diff) ++ return; ++ ++ bprm->vma_pages = pages; ++ add_mm_counter(mm, MM_ANONPAGES, diff); ++} ++ ++static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, ++ int write) ++{ ++ struct page *page; ++ int ret; ++ unsigned int gup_flags = FOLL_FORCE; ++ ++#ifdef CONFIG_STACK_GROWSUP ++ if (write) { ++ ret = expand_downwards(bprm->vma, pos); ++ if (ret < 0) ++ return NULL; ++ } ++#endif ++ ++ if (write) ++ gup_flags |= FOLL_WRITE; ++ ++ /* ++ * We are doing an exec(). 'current' is the process ++ * doing the exec and bprm->mm is the new process's mm. ++ */ ++ ret = get_user_pages_remote(current, bprm->mm, pos, 1, gup_flags, ++ &page, NULL, NULL); ++ if (ret <= 0) ++ return NULL; ++ ++ if (write) { ++ unsigned long size = bprm->vma->vm_end - bprm->vma->vm_start; ++ unsigned long ptr_size, limit; ++ ++ /* ++ * Since the stack will hold pointers to the strings, we ++ * must account for them as well. ++ * ++ * The size calculation is the entire vma while each arg page is ++ * built, so each time we get here it's calculating how far it ++ * is currently (rather than each call being just the newly ++ * added size from the arg page). As a result, we need to ++ * always add the entire size of the pointers, so that on the ++ * last call to get_arg_page() we'll actually have the entire ++ * correct size. ++ */ ++ ptr_size = (bprm->argc + bprm->envc) * sizeof(void *); ++ if (ptr_size > ULONG_MAX - size) ++ goto fail; ++ size += ptr_size; ++ ++ acct_arg_size(bprm, size / PAGE_SIZE); ++ ++ /* ++ * We've historically supported up to 32 pages (ARG_MAX) ++ * of argument strings even with small stacks ++ */ ++ if (size <= ARG_MAX) ++ return page; ++ ++ /* ++ * Limit to 1/4 of the max stack size or 3/4 of _STK_LIM ++ * (whichever is smaller) for the argv+env strings. ++ * This ensures that: ++ * - the remaining binfmt code will not run out of stack space, ++ * - the program will have a reasonable amount of stack left ++ * to work from. ++ */ ++ limit = _STK_LIM / 4 * 3; ++ limit = min(limit, bprm->rlim_stack.rlim_cur / 4); ++ if (size > limit) ++ goto fail; ++ } ++ ++ return page; ++ ++fail: ++ put_page(page); ++ return NULL; ++} ++ ++static void put_arg_page(struct page *page) ++{ ++ put_page(page); ++} ++ ++static void free_arg_pages(struct linux_binprm *bprm) ++{ ++} ++ ++static void flush_arg_page(struct linux_binprm *bprm, unsigned long pos, ++ struct page *page) ++{ ++ flush_cache_page(bprm->vma, pos, page_to_pfn(page)); ++} ++ ++static int __bprm_mm_init(struct linux_binprm *bprm) ++{ ++ int err; ++ struct vm_area_struct *vma = NULL; ++ struct mm_struct *mm = bprm->mm; ++ ++ bprm->vma = vma = vm_area_alloc(mm); ++ if (!vma) ++ return -ENOMEM; ++ vma_set_anonymous(vma); ++ ++ if (down_write_killable(&mm->mmap_sem)) { ++ err = -EINTR; ++ goto err_free; ++ } ++ ++ /* ++ * Place the stack at the largest stack address the architecture ++ * supports. Later, we'll move this to an appropriate place. We don't ++ * use STACK_TOP because that can depend on attributes which aren't ++ * configured yet. ++ */ ++ BUILD_BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP); ++ vma->vm_end = STACK_TOP_MAX; ++ vma->vm_start = vma->vm_end - PAGE_SIZE; ++ vma->vm_flags = VM_SOFTDIRTY | VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP; ++ vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); ++ ++ err = insert_vm_struct(mm, vma); ++ if (err) ++ goto err; ++ ++ mm->stack_vm = mm->total_vm = 1; ++ arch_bprm_mm_init(mm, vma); ++ up_write(&mm->mmap_sem); ++ bprm->p = vma->vm_end - sizeof(void *); ++ return 0; ++err: ++ up_write(&mm->mmap_sem); ++err_free: ++ bprm->vma = NULL; ++ vm_area_free(vma); ++ return err; ++} ++ ++static bool valid_arg_len(struct linux_binprm *bprm, long len) ++{ ++ return len <= MAX_ARG_STRLEN; ++} ++ ++#else ++ ++static inline void acct_arg_size(struct linux_binprm *bprm, unsigned long pages) ++{ ++} ++ ++static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, ++ int write) ++{ ++ struct page *page; ++ ++ page = bprm->page[pos / PAGE_SIZE]; ++ if (!page && write) { ++ page = alloc_page(GFP_HIGHUSER|__GFP_ZERO); ++ if (!page) ++ return NULL; ++ bprm->page[pos / PAGE_SIZE] = page; ++ } ++ ++ return page; ++} ++ ++static void put_arg_page(struct page *page) ++{ ++} ++ ++static void free_arg_page(struct linux_binprm *bprm, int i) ++{ ++ if (bprm->page[i]) { ++ __free_page(bprm->page[i]); ++ bprm->page[i] = NULL; ++ } ++} ++ ++static void free_arg_pages(struct linux_binprm *bprm) ++{ ++ int i; ++ ++ for (i = 0; i < MAX_ARG_PAGES; i++) ++ free_arg_page(bprm, i); ++} ++ ++static void flush_arg_page(struct linux_binprm *bprm, unsigned long pos, ++ struct page *page) ++{ ++} ++ ++static int __bprm_mm_init(struct linux_binprm *bprm) ++{ ++ bprm->p = PAGE_SIZE * MAX_ARG_PAGES - sizeof(void *); ++ return 0; ++} ++ ++static bool valid_arg_len(struct linux_binprm *bprm, long len) ++{ ++ return len <= bprm->p; ++} ++ ++#endif /* CONFIG_MMU */ ++ ++/* ++ * Create a new mm_struct and populate it with a temporary stack ++ * vm_area_struct. We don't have enough context at this point to set the stack ++ * flags, permissions, and offset, so we use temporary values. We'll update ++ * them later in setup_arg_pages(). ++ */ ++static int bprm_mm_init(struct linux_binprm *bprm) ++{ ++ int err; ++ struct mm_struct *mm = NULL; ++ ++ bprm->mm = mm = mm_alloc(); ++ err = -ENOMEM; ++ if (!mm) ++ goto err; ++ ++ /* Save current stack limit for all calculations made during exec. */ ++ task_lock(current->group_leader); ++ bprm->rlim_stack = current->signal->rlim[RLIMIT_STACK]; ++ task_unlock(current->group_leader); ++ ++ err = __bprm_mm_init(bprm); ++ if (err) ++ goto err; ++ ++ return 0; ++ ++err: ++ if (mm) { ++ bprm->mm = NULL; ++ mmdrop(mm); ++ } ++ ++ return err; ++} ++ ++struct user_arg_ptr { ++#ifdef CONFIG_COMPAT ++ bool is_compat; ++#endif ++ union { ++ const char __user *const __user *native; ++#ifdef CONFIG_COMPAT ++ const compat_uptr_t __user *compat; ++#endif ++ } ptr; ++}; ++ ++static const char __user *get_user_arg_ptr(struct user_arg_ptr argv, int nr) ++{ ++ const char __user *native; ++ ++#ifdef CONFIG_COMPAT ++ if (unlikely(argv.is_compat)) { ++ compat_uptr_t compat; ++ ++ if (get_user(compat, argv.ptr.compat + nr)) ++ return ERR_PTR(-EFAULT); ++ ++ return compat_ptr(compat); ++ } ++#endif ++ ++ if (get_user(native, argv.ptr.native + nr)) ++ return ERR_PTR(-EFAULT); ++ ++ return native; ++} ++ ++/* ++ * count() counts the number of strings in array ARGV. ++ */ ++static int count(struct user_arg_ptr argv, int max) ++{ ++ int i = 0; ++ ++ if (argv.ptr.native != NULL) { ++ for (;;) { ++ const char __user *p = get_user_arg_ptr(argv, i); ++ ++ if (!p) ++ break; ++ ++ if (IS_ERR(p)) ++ return -EFAULT; ++ ++ if (i >= max) ++ return -E2BIG; ++ ++i; ++ ++ if (fatal_signal_pending(current)) ++ return -ERESTARTNOHAND; ++ cond_resched(); ++ } ++ } ++ return i; ++} ++ ++/* ++ * 'copy_strings()' copies argument/environment strings from the old ++ * processes's memory to the new process's stack. The call to get_user_pages() ++ * ensures the destination page is created and not swapped out. ++ */ ++static int copy_strings(int argc, struct user_arg_ptr argv, ++ struct linux_binprm *bprm) ++{ ++ struct page *kmapped_page = NULL; ++ char *kaddr = NULL; ++ unsigned long kpos = 0; ++ int ret; ++ ++ while (argc-- > 0) { ++ const char __user *str; ++ int len; ++ unsigned long pos; ++ ++ ret = -EFAULT; ++ str = get_user_arg_ptr(argv, argc); ++ if (IS_ERR(str)) ++ goto out; ++ ++ len = strnlen_user(str, MAX_ARG_STRLEN); ++ if (!len) ++ goto out; ++ ++ ret = -E2BIG; ++ if (!valid_arg_len(bprm, len)) ++ goto out; ++ ++ /* We're going to work our way backwords. */ ++ pos = bprm->p; ++ str += len; ++ bprm->p -= len; ++ ++ while (len > 0) { ++ int offset, bytes_to_copy; ++ ++ if (fatal_signal_pending(current)) { ++ ret = -ERESTARTNOHAND; ++ goto out; ++ } ++ cond_resched(); ++ ++ offset = pos % PAGE_SIZE; ++ if (offset == 0) ++ offset = PAGE_SIZE; ++ ++ bytes_to_copy = offset; ++ if (bytes_to_copy > len) ++ bytes_to_copy = len; ++ ++ offset -= bytes_to_copy; ++ pos -= bytes_to_copy; ++ str -= bytes_to_copy; ++ len -= bytes_to_copy; ++ ++ if (!kmapped_page || kpos != (pos & PAGE_MASK)) { ++ struct page *page; ++ ++ page = get_arg_page(bprm, pos, 1); ++ if (!page) { ++ ret = -E2BIG; ++ goto out; ++ } ++ ++ if (kmapped_page) { ++ flush_kernel_dcache_page(kmapped_page); ++ kunmap(kmapped_page); ++ put_arg_page(kmapped_page); ++ } ++ kmapped_page = page; ++ kaddr = kmap(kmapped_page); ++ kpos = pos & PAGE_MASK; ++ flush_arg_page(bprm, kpos, kmapped_page); ++ } ++ if (copy_from_user(kaddr+offset, str, bytes_to_copy)) { ++ ret = -EFAULT; ++ goto out; ++ } ++ } ++ } ++ ret = 0; ++out: ++ if (kmapped_page) { ++ flush_kernel_dcache_page(kmapped_page); ++ kunmap(kmapped_page); ++ put_arg_page(kmapped_page); ++ } ++ return ret; ++} ++ ++/* ++ * Like copy_strings, but get argv and its values from kernel memory. ++ */ ++int copy_strings_kernel(int argc, const char *const *__argv, ++ struct linux_binprm *bprm) ++{ ++ int r; ++ mm_segment_t oldfs = get_fs(); ++ struct user_arg_ptr argv = { ++ .ptr.native = (const char __user *const __user *)__argv, ++ }; ++ ++ set_fs(KERNEL_DS); ++ r = copy_strings(argc, argv, bprm); ++ set_fs(oldfs); ++ ++ return r; ++} ++EXPORT_SYMBOL(copy_strings_kernel); ++ ++#ifdef CONFIG_MMU ++ ++/* ++ * During bprm_mm_init(), we create a temporary stack at STACK_TOP_MAX. Once ++ * the binfmt code determines where the new stack should reside, we shift it to ++ * its final location. The process proceeds as follows: ++ * ++ * 1) Use shift to calculate the new vma endpoints. ++ * 2) Extend vma to cover both the old and new ranges. This ensures the ++ * arguments passed to subsequent functions are consistent. ++ * 3) Move vma's page tables to the new range. ++ * 4) Free up any cleared pgd range. ++ * 5) Shrink the vma to cover only the new range. ++ */ ++static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift) ++{ ++ struct mm_struct *mm = vma->vm_mm; ++ unsigned long old_start = vma->vm_start; ++ unsigned long old_end = vma->vm_end; ++ unsigned long length = old_end - old_start; ++ unsigned long new_start = old_start - shift; ++ unsigned long new_end = old_end - shift; ++ struct mmu_gather tlb; ++ ++ BUG_ON(new_start > new_end); ++ ++ /* ++ * ensure there are no vmas between where we want to go ++ * and where we are ++ */ ++ if (vma != find_vma(mm, new_start)) ++ return -EFAULT; ++ ++ /* ++ * cover the whole range: [new_start, old_end) ++ */ ++ if (vma_adjust(vma, new_start, old_end, vma->vm_pgoff, NULL)) ++ return -ENOMEM; ++ ++ /* ++ * move the page tables downwards, on failure we rely on ++ * process cleanup to remove whatever mess we made. ++ */ ++ if (length != move_page_tables(vma, old_start, ++ vma, new_start, length, false)) ++ return -ENOMEM; ++ ++ lru_add_drain(); ++ tlb_gather_mmu(&tlb, mm, old_start, old_end); ++ if (new_end > old_start) { ++ /* ++ * when the old and new regions overlap clear from new_end. ++ */ ++ free_pgd_range(&tlb, new_end, old_end, new_end, ++ vma->vm_next ? vma->vm_next->vm_start : USER_PGTABLES_CEILING); ++ } else { ++ /* ++ * otherwise, clean from old_start; this is done to not touch ++ * the address space in [new_end, old_start) some architectures ++ * have constraints on va-space that make this illegal (IA64) - ++ * for the others its just a little faster. ++ */ ++ free_pgd_range(&tlb, old_start, old_end, new_end, ++ vma->vm_next ? vma->vm_next->vm_start : USER_PGTABLES_CEILING); ++ } ++ tlb_finish_mmu(&tlb, old_start, old_end); ++ ++ /* ++ * Shrink the vma to just the new range. Always succeeds. ++ */ ++ vma_adjust(vma, new_start, new_end, vma->vm_pgoff, NULL); ++ ++ return 0; ++} ++ ++/* ++ * Finalizes the stack vm_area_struct. The flags and permissions are updated, ++ * the stack is optionally relocated, and some extra space is added. ++ */ ++int setup_arg_pages(struct linux_binprm *bprm, ++ unsigned long stack_top, ++ int executable_stack) ++{ ++ unsigned long ret; ++ unsigned long stack_shift; ++ struct mm_struct *mm = current->mm; ++ struct vm_area_struct *vma = bprm->vma; ++ struct vm_area_struct *prev = NULL; ++ unsigned long vm_flags; ++ unsigned long stack_base; ++ unsigned long stack_size; ++ unsigned long stack_expand; ++ unsigned long rlim_stack; ++ ++#ifdef CONFIG_STACK_GROWSUP ++ /* Limit stack size */ ++ stack_base = bprm->rlim_stack.rlim_max; ++ if (stack_base > STACK_SIZE_MAX) ++ stack_base = STACK_SIZE_MAX; ++ ++ /* Add space for stack randomization. */ ++ stack_base += (STACK_RND_MASK << PAGE_SHIFT); ++ ++ /* Make sure we didn't let the argument array grow too large. */ ++ if (vma->vm_end - vma->vm_start > stack_base) ++ return -ENOMEM; ++ ++ stack_base = PAGE_ALIGN(stack_top - stack_base); ++ ++ stack_shift = vma->vm_start - stack_base; ++ mm->arg_start = bprm->p - stack_shift; ++ bprm->p = vma->vm_end - stack_shift; ++#else ++ stack_top = arch_align_stack(stack_top); ++ stack_top = PAGE_ALIGN(stack_top); ++ ++ if (unlikely(stack_top < mmap_min_addr) || ++ unlikely(vma->vm_end - vma->vm_start >= stack_top - mmap_min_addr)) ++ return -ENOMEM; ++ ++ stack_shift = vma->vm_end - stack_top; ++ ++ bprm->p -= stack_shift; ++ mm->arg_start = bprm->p; ++#endif ++ ++ if (bprm->loader) ++ bprm->loader -= stack_shift; ++ bprm->exec -= stack_shift; ++ ++ if (down_write_killable(&mm->mmap_sem)) ++ return -EINTR; ++ ++ vm_flags = VM_STACK_FLAGS; ++ ++ /* ++ * Adjust stack execute permissions; explicitly enable for ++ * EXSTACK_ENABLE_X, disable for EXSTACK_DISABLE_X and leave alone ++ * (arch default) otherwise. ++ */ ++ if (unlikely(executable_stack == EXSTACK_ENABLE_X)) ++ vm_flags |= VM_EXEC; ++ else if (executable_stack == EXSTACK_DISABLE_X) ++ vm_flags &= ~VM_EXEC; ++ vm_flags |= mm->def_flags; ++ vm_flags |= VM_STACK_INCOMPLETE_SETUP; ++ ++ ret = mprotect_fixup(vma, &prev, vma->vm_start, vma->vm_end, ++ vm_flags); ++ if (ret) ++ goto out_unlock; ++ BUG_ON(prev != vma); ++ ++ /* Move stack pages down in memory. */ ++ if (stack_shift) { ++ ret = shift_arg_pages(vma, stack_shift); ++ if (ret) ++ goto out_unlock; ++ } ++ ++ /* mprotect_fixup is overkill to remove the temporary stack flags */ ++ vma->vm_flags &= ~VM_STACK_INCOMPLETE_SETUP; ++ ++ stack_expand = 131072UL; /* randomly 32*4k (or 2*64k) pages */ ++ stack_size = vma->vm_end - vma->vm_start; ++ /* ++ * Align this down to a page boundary as expand_stack ++ * will align it up. ++ */ ++ rlim_stack = bprm->rlim_stack.rlim_cur & PAGE_MASK; ++#ifdef CONFIG_STACK_GROWSUP ++ if (stack_size + stack_expand > rlim_stack) ++ stack_base = vma->vm_start + rlim_stack; ++ else ++ stack_base = vma->vm_end + stack_expand; ++#else ++ if (stack_size + stack_expand > rlim_stack) ++ stack_base = vma->vm_end - rlim_stack; ++ else ++ stack_base = vma->vm_start - stack_expand; ++#endif ++ current->mm->start_stack = bprm->p; ++ ret = expand_stack(vma, stack_base); ++ if (ret) ++ ret = -EFAULT; ++ ++out_unlock: ++ up_write(&mm->mmap_sem); ++ return ret; ++} ++EXPORT_SYMBOL(setup_arg_pages); ++ ++#else ++ ++/* ++ * Transfer the program arguments and environment from the holding pages ++ * onto the stack. The provided stack pointer is adjusted accordingly. ++ */ ++int transfer_args_to_stack(struct linux_binprm *bprm, ++ unsigned long *sp_location) ++{ ++ unsigned long index, stop, sp; ++ int ret = 0; ++ ++ stop = bprm->p >> PAGE_SHIFT; ++ sp = *sp_location; ++ ++ for (index = MAX_ARG_PAGES - 1; index >= stop; index--) { ++ unsigned int offset = index == stop ? bprm->p & ~PAGE_MASK : 0; ++ char *src = kmap(bprm->page[index]) + offset; ++ sp -= PAGE_SIZE - offset; ++ if (copy_to_user((void *) sp, src, PAGE_SIZE - offset) != 0) ++ ret = -EFAULT; ++ kunmap(bprm->page[index]); ++ if (ret) ++ goto out; ++ } ++ ++ *sp_location = sp; ++ ++out: ++ return ret; ++} ++EXPORT_SYMBOL(transfer_args_to_stack); ++ ++#endif /* CONFIG_MMU */ ++ ++static struct file *do_open_execat(int fd, struct filename *name, int flags) ++{ ++ struct file *file; ++ int err; ++ struct open_flags open_exec_flags = { ++ .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC, ++ .acc_mode = MAY_EXEC, ++ .intent = LOOKUP_OPEN, ++ .lookup_flags = LOOKUP_FOLLOW, ++ }; ++ ++ if ((flags & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) != 0) ++ return ERR_PTR(-EINVAL); ++ if (flags & AT_SYMLINK_NOFOLLOW) ++ open_exec_flags.lookup_flags &= ~LOOKUP_FOLLOW; ++ if (flags & AT_EMPTY_PATH) ++ open_exec_flags.lookup_flags |= LOOKUP_EMPTY; ++ ++ file = do_filp_open(fd, name, &open_exec_flags); ++ if (IS_ERR(file)) ++ goto out; ++ ++ err = -EACCES; ++ if (!S_ISREG(file_inode(file)->i_mode)) ++ goto exit; ++ ++ if (path_noexec(&file->f_path)) ++ goto exit; ++ ++ err = deny_write_access(file); ++ if (err) ++ goto exit; ++ ++ if (name->name[0] != '\0') ++ fsnotify_open(file); ++ ++out: ++ return file; ++ ++exit: ++ fput(file); ++ return ERR_PTR(err); ++} ++ ++struct file *open_exec(const char *name) ++{ ++ struct filename *filename = getname_kernel(name); ++ struct file *f = ERR_CAST(filename); ++ ++ if (!IS_ERR(filename)) { ++ f = do_open_execat(AT_FDCWD, filename, 0); ++ putname(filename); ++ } ++ return f; ++} ++EXPORT_SYMBOL(open_exec); ++ ++int kernel_read_file(struct file *file, void **buf, loff_t *size, ++ loff_t max_size, enum kernel_read_file_id id) ++{ ++ loff_t i_size, pos; ++ ssize_t bytes = 0; ++ int ret; ++ ++ if (!S_ISREG(file_inode(file)->i_mode) || max_size < 0) ++ return -EINVAL; ++ ++ ret = deny_write_access(file); ++ if (ret) ++ return ret; ++ ++ ret = security_kernel_read_file(file, id); ++ if (ret) ++ goto out; ++ ++ i_size = i_size_read(file_inode(file)); ++ if (max_size > 0 && i_size > max_size) { ++ ret = -EFBIG; ++ goto out; ++ } ++ if (i_size <= 0) { ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ if (id != READING_FIRMWARE_PREALLOC_BUFFER) ++ *buf = vmalloc(i_size); ++ if (!*buf) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ ++ pos = 0; ++ while (pos < i_size) { ++ bytes = kernel_read(file, *buf + pos, i_size - pos, &pos); ++ if (bytes < 0) { ++ ret = bytes; ++ goto out_free; ++ } ++ ++ if (bytes == 0) ++ break; ++ } ++ ++ if (pos != i_size) { ++ ret = -EIO; ++ goto out_free; ++ } ++ ++ ret = security_kernel_post_read_file(file, *buf, i_size, id); ++ if (!ret) ++ *size = pos; ++ ++out_free: ++ if (ret < 0) { ++ if (id != READING_FIRMWARE_PREALLOC_BUFFER) { ++ vfree(*buf); ++ *buf = NULL; ++ } ++ } ++ ++out: ++ allow_write_access(file); ++ return ret; ++} ++EXPORT_SYMBOL_GPL(kernel_read_file); ++ ++int kernel_read_file_from_path(const char *path, void **buf, loff_t *size, ++ loff_t max_size, enum kernel_read_file_id id) ++{ ++ struct file *file; ++ int ret; ++ ++ if (!path || !*path) ++ return -EINVAL; ++ ++ file = filp_open(path, O_RDONLY, 0); ++ if (IS_ERR(file)) ++ return PTR_ERR(file); ++ ++ ret = kernel_read_file(file, buf, size, max_size, id); ++ fput(file); ++ return ret; ++} ++EXPORT_SYMBOL_GPL(kernel_read_file_from_path); ++ ++int kernel_read_file_from_fd(int fd, void **buf, loff_t *size, loff_t max_size, ++ enum kernel_read_file_id id) ++{ ++ struct fd f = fdget(fd); ++ int ret = -EBADF; ++ ++ if (!f.file) ++ goto out; ++ ++ ret = kernel_read_file(f.file, buf, size, max_size, id); ++out: ++ fdput(f); ++ return ret; ++} ++EXPORT_SYMBOL_GPL(kernel_read_file_from_fd); ++ ++ssize_t read_code(struct file *file, unsigned long addr, loff_t pos, size_t len) ++{ ++ ssize_t res = vfs_read(file, (void __user *)addr, len, &pos); ++ if (res > 0) ++ flush_icache_range(addr, addr + len); ++ return res; ++} ++EXPORT_SYMBOL(read_code); ++ ++static int exec_mmap(struct mm_struct *mm) ++{ ++ struct task_struct *tsk; ++ struct mm_struct *old_mm, *active_mm; ++ ++ /* Notify parent that we're no longer interested in the old VM */ ++ tsk = current; ++ old_mm = current->mm; ++ mm_release(tsk, old_mm); ++ ++ if (old_mm) { ++ sync_mm_rss(old_mm); ++ /* ++ * Make sure that if there is a core dump in progress ++ * for the old mm, we get out and die instead of going ++ * through with the exec. We must hold mmap_sem around ++ * checking core_state and changing tsk->mm. ++ */ ++ down_read(&old_mm->mmap_sem); ++ if (unlikely(old_mm->core_state)) { ++ up_read(&old_mm->mmap_sem); ++ return -EINTR; ++ } ++ } ++ task_lock(tsk); ++ active_mm = tsk->active_mm; ++ membarrier_exec_mmap(mm); ++ tsk->mm = mm; ++ tsk->active_mm = mm; ++ activate_mm(active_mm, mm); ++ tsk->mm->vmacache_seqnum = 0; ++ vmacache_flush(tsk); ++ task_unlock(tsk); ++ if (old_mm) { ++ up_read(&old_mm->mmap_sem); ++ BUG_ON(active_mm != old_mm); ++ setmax_mm_hiwater_rss(&tsk->signal->maxrss, old_mm); ++ mm_update_next_owner(old_mm); ++ mmput(old_mm); ++ return 0; ++ } ++ mmdrop(active_mm); ++ return 0; ++} ++ ++/* ++ * This function makes sure the current process has its own signal table, ++ * so that flush_signal_handlers can later reset the handlers without ++ * disturbing other processes. (Other processes might share the signal ++ * table via the CLONE_SIGHAND option to clone().) ++ */ ++static int de_thread(struct task_struct *tsk) ++{ ++ struct signal_struct *sig = tsk->signal; ++ struct sighand_struct *oldsighand = tsk->sighand; ++ spinlock_t *lock = &oldsighand->siglock; ++ ++ if (thread_group_empty(tsk)) ++ goto no_thread_group; ++ ++ /* ++ * Kill all other threads in the thread group. ++ */ ++ spin_lock_irq(lock); ++ if (signal_group_exit(sig)) { ++ /* ++ * Another group action in progress, just ++ * return so that the signal is processed. ++ */ ++ spin_unlock_irq(lock); ++ return -EAGAIN; ++ } ++ ++ sig->group_exit_task = tsk; ++ sig->notify_count = zap_other_threads(tsk); ++ if (!thread_group_leader(tsk)) ++ sig->notify_count--; ++ ++ while (sig->notify_count) { ++ __set_current_state(TASK_KILLABLE); ++ spin_unlock_irq(lock); ++ schedule(); ++ if (unlikely(__fatal_signal_pending(tsk))) ++ goto killed; ++ spin_lock_irq(lock); ++ } ++ spin_unlock_irq(lock); ++ ++ /* ++ * At this point all other threads have exited, all we have to ++ * do is to wait for the thread group leader to become inactive, ++ * and to assume its PID: ++ */ ++ if (!thread_group_leader(tsk)) { ++ struct task_struct *leader = tsk->group_leader; ++ ++ for (;;) { ++ cgroup_threadgroup_change_begin(tsk); ++ write_lock_irq(&tasklist_lock); ++ /* ++ * Do this under tasklist_lock to ensure that ++ * exit_notify() can't miss ->group_exit_task ++ */ ++ sig->notify_count = -1; ++ if (likely(leader->exit_state)) ++ break; ++ __set_current_state(TASK_KILLABLE); ++ write_unlock_irq(&tasklist_lock); ++ cgroup_threadgroup_change_end(tsk); ++ schedule(); ++ if (unlikely(__fatal_signal_pending(tsk))) ++ goto killed; ++ } ++ ++ /* ++ * The only record we have of the real-time age of a ++ * process, regardless of execs it's done, is start_time. ++ * All the past CPU time is accumulated in signal_struct ++ * from sister threads now dead. But in this non-leader ++ * exec, nothing survives from the original leader thread, ++ * whose birth marks the true age of this process now. ++ * When we take on its identity by switching to its PID, we ++ * also take its birthdate (always earlier than our own). ++ */ ++ tsk->start_time = leader->start_time; ++ tsk->real_start_time = leader->real_start_time; ++ ++ BUG_ON(!same_thread_group(leader, tsk)); ++ BUG_ON(has_group_leader_pid(tsk)); ++ /* ++ * An exec() starts a new thread group with the ++ * TGID of the previous thread group. Rehash the ++ * two threads with a switched PID, and release ++ * the former thread group leader: ++ */ ++ ++ /* Become a process group leader with the old leader's pid. ++ * The old leader becomes a thread of the this thread group. ++ * Note: The old leader also uses this pid until release_task ++ * is called. Odd but simple and correct. ++ */ ++ tsk->pid = leader->pid; ++ change_pid(tsk, PIDTYPE_PID, task_pid(leader)); ++ transfer_pid(leader, tsk, PIDTYPE_TGID); ++ transfer_pid(leader, tsk, PIDTYPE_PGID); ++ transfer_pid(leader, tsk, PIDTYPE_SID); ++ ++ list_replace_rcu(&leader->tasks, &tsk->tasks); ++ list_replace_init(&leader->sibling, &tsk->sibling); ++ ++ tsk->group_leader = tsk; ++ leader->group_leader = tsk; ++ ++ tsk->exit_signal = SIGCHLD; ++ leader->exit_signal = -1; ++ ++ BUG_ON(leader->exit_state != EXIT_ZOMBIE); ++ leader->exit_state = EXIT_DEAD; ++ ++ /* ++ * We are going to release_task()->ptrace_unlink() silently, ++ * the tracer can sleep in do_wait(). EXIT_DEAD guarantees ++ * the tracer wont't block again waiting for this thread. ++ */ ++ if (unlikely(leader->ptrace)) ++ __wake_up_parent(leader, leader->parent); ++ write_unlock_irq(&tasklist_lock); ++ cgroup_threadgroup_change_end(tsk); ++ ++ release_task(leader); ++ } ++ ++ sig->group_exit_task = NULL; ++ sig->notify_count = 0; ++ ++no_thread_group: ++ /* we have changed execution domain */ ++ tsk->exit_signal = SIGCHLD; ++ ++#ifdef CONFIG_POSIX_TIMERS ++ exit_itimers(sig); ++ flush_itimer_signals(); ++#endif ++ ++ if (atomic_read(&oldsighand->count) != 1) { ++ struct sighand_struct *newsighand; ++ /* ++ * This ->sighand is shared with the CLONE_SIGHAND ++ * but not CLONE_THREAD task, switch to the new one. ++ */ ++ newsighand = kmem_cache_alloc(sighand_cachep, GFP_KERNEL); ++ if (!newsighand) ++ return -ENOMEM; ++ ++ atomic_set(&newsighand->count, 1); ++ memcpy(newsighand->action, oldsighand->action, ++ sizeof(newsighand->action)); ++ ++ write_lock_irq(&tasklist_lock); ++ spin_lock(&oldsighand->siglock); ++ rcu_assign_pointer(tsk->sighand, newsighand); ++ spin_unlock(&oldsighand->siglock); ++ write_unlock_irq(&tasklist_lock); ++ ++ __cleanup_sighand(oldsighand); ++ } ++ ++ BUG_ON(!thread_group_leader(tsk)); ++ return 0; ++ ++killed: ++ /* protects against exit_notify() and __exit_signal() */ ++ read_lock(&tasklist_lock); ++ sig->group_exit_task = NULL; ++ sig->notify_count = 0; ++ read_unlock(&tasklist_lock); ++ return -EAGAIN; ++} ++ ++char *__get_task_comm(char *buf, size_t buf_size, struct task_struct *tsk) ++{ ++ task_lock(tsk); ++ strncpy(buf, tsk->comm, buf_size); ++ task_unlock(tsk); ++ return buf; ++} ++EXPORT_SYMBOL_GPL(__get_task_comm); ++ ++/* ++ * These functions flushes out all traces of the currently running executable ++ * so that a new one can be started ++ */ ++ ++void __set_task_comm(struct task_struct *tsk, const char *buf, bool exec) ++{ ++ task_lock(tsk); ++ trace_task_rename(tsk, buf); ++ strlcpy(tsk->comm, buf, sizeof(tsk->comm)); ++ task_unlock(tsk); ++ perf_event_comm(tsk, exec); ++} ++ ++/* ++ * Calling this is the point of no return. None of the failures will be ++ * seen by userspace since either the process is already taking a fatal ++ * signal (via de_thread() or coredump), or will have SEGV raised ++ * (after exec_mmap()) by search_binary_handlers (see below). ++ */ ++int flush_old_exec(struct linux_binprm * bprm) ++{ ++ int retval; ++ ++ /* ++ * Make sure we have a private signal table and that ++ * we are unassociated from the previous thread group. ++ */ ++ retval = de_thread(current); ++ if (retval) ++ goto out; ++ ++ /* ++ * Must be called _before_ exec_mmap() as bprm->mm is ++ * not visibile until then. This also enables the update ++ * to be lockless. ++ */ ++ set_mm_exe_file(bprm->mm, bprm->file); ++ ++ would_dump(bprm, bprm->file); ++ ++ /* ++ * Release all of the old mmap stuff ++ */ ++ acct_arg_size(bprm, 0); ++ retval = exec_mmap(bprm->mm); ++ if (retval) ++ goto out; ++ ++ /* ++ * After clearing bprm->mm (to mark that current is using the ++ * prepared mm now), we have nothing left of the original ++ * process. If anything from here on returns an error, the check ++ * in search_binary_handler() will SEGV current. ++ */ ++ bprm->mm = NULL; ++ ++ set_fs(USER_DS); ++ current->flags &= ~(PF_RANDOMIZE | PF_FORKNOEXEC | PF_KTHREAD | ++ PF_NOFREEZE | PF_NO_SETAFFINITY); ++ flush_thread(); ++ current->personality &= ~bprm->per_clear; ++ ++ /* ++ * We have to apply CLOEXEC before we change whether the process is ++ * dumpable (in setup_new_exec) to avoid a race with a process in userspace ++ * trying to access the should-be-closed file descriptors of a process ++ * undergoing exec(2). ++ */ ++ do_close_on_exec(current->files); ++ return 0; ++ ++out: ++ return retval; ++} ++EXPORT_SYMBOL(flush_old_exec); ++ ++void would_dump(struct linux_binprm *bprm, struct file *file) ++{ ++ struct inode *inode = file_inode(file); ++ if (inode_permission(inode, MAY_READ) < 0) { ++ struct user_namespace *old, *user_ns; ++ bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP; ++ ++ /* Ensure mm->user_ns contains the executable */ ++ user_ns = old = bprm->mm->user_ns; ++ while ((user_ns != &init_user_ns) && ++ !privileged_wrt_inode_uidgid(user_ns, inode)) ++ user_ns = user_ns->parent; ++ ++ if (old != user_ns) { ++ bprm->mm->user_ns = get_user_ns(user_ns); ++ put_user_ns(old); ++ } ++ } ++} ++EXPORT_SYMBOL(would_dump); ++ ++void setup_new_exec(struct linux_binprm * bprm) ++{ ++ /* ++ * Once here, prepare_binrpm() will not be called any more, so ++ * the final state of setuid/setgid/fscaps can be merged into the ++ * secureexec flag. ++ */ ++ bprm->secureexec |= bprm->cap_elevated; ++ ++ if (bprm->secureexec) { ++ /* Make sure parent cannot signal privileged process. */ ++ current->pdeath_signal = 0; ++ ++ /* ++ * For secureexec, reset the stack limit to sane default to ++ * avoid bad behavior from the prior rlimits. This has to ++ * happen before arch_pick_mmap_layout(), which examines ++ * RLIMIT_STACK, but after the point of no return to avoid ++ * needing to clean up the change on failure. ++ */ ++ if (bprm->rlim_stack.rlim_cur > _STK_LIM) ++ bprm->rlim_stack.rlim_cur = _STK_LIM; ++ } ++ ++ arch_pick_mmap_layout(current->mm, &bprm->rlim_stack); ++ ++ current->sas_ss_sp = current->sas_ss_size = 0; ++ ++ /* ++ * Figure out dumpability. Note that this checking only of current ++ * is wrong, but userspace depends on it. This should be testing ++ * bprm->secureexec instead. ++ */ ++ if (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP || ++ !(uid_eq(current_euid(), current_uid()) && ++ gid_eq(current_egid(), current_gid()))) ++ set_dumpable(current->mm, suid_dumpable); ++ else ++ set_dumpable(current->mm, SUID_DUMP_USER); ++ ++ arch_setup_new_exec(); ++ perf_event_exec(); ++ __set_task_comm(current, kbasename(bprm->filename), true); ++ ++ /* Set the new mm task size. We have to do that late because it may ++ * depend on TIF_32BIT which is only updated in flush_thread() on ++ * some architectures like powerpc ++ */ ++ current->mm->task_size = TASK_SIZE; ++ ++ /* An exec changes our domain. We are no longer part of the thread ++ group */ ++ WRITE_ONCE(current->self_exec_id, current->self_exec_id + 1); ++ WRITE_ONCE(current->self_exec_id_u64, current->self_exec_id_u64 + 1); ++ flush_signal_handlers(current, 0); ++} ++EXPORT_SYMBOL(setup_new_exec); ++ ++/* Runs immediately before start_thread() takes over. */ ++void finalize_exec(struct linux_binprm *bprm) ++{ ++ /* Store any stack rlimit changes before starting thread. */ ++ task_lock(current->group_leader); ++ current->signal->rlim[RLIMIT_STACK] = bprm->rlim_stack; ++ task_unlock(current->group_leader); ++} ++EXPORT_SYMBOL(finalize_exec); ++ ++/* ++ * Prepare credentials and lock ->cred_guard_mutex. ++ * install_exec_creds() commits the new creds and drops the lock. ++ * Or, if exec fails before, free_bprm() should release ->cred and ++ * and unlock. ++ */ ++int prepare_bprm_creds(struct linux_binprm *bprm) ++{ ++ if (mutex_lock_interruptible(¤t->signal->cred_guard_mutex)) ++ return -ERESTARTNOINTR; ++ ++ bprm->cred = prepare_exec_creds(); ++ if (likely(bprm->cred)) ++ return 0; ++ ++ mutex_unlock(¤t->signal->cred_guard_mutex); ++ return -ENOMEM; ++} ++ ++static void free_bprm(struct linux_binprm *bprm) ++{ ++ free_arg_pages(bprm); ++ if (bprm->cred) { ++ mutex_unlock(¤t->signal->cred_guard_mutex); ++ abort_creds(bprm->cred); ++ } ++ if (bprm->file) { ++ allow_write_access(bprm->file); ++ fput(bprm->file); ++ } ++ /* If a binfmt changed the interp, free it. */ ++ if (bprm->interp != bprm->filename) ++ kfree(bprm->interp); ++ kfree(bprm); ++} ++ ++int bprm_change_interp(const char *interp, struct linux_binprm *bprm) ++{ ++ /* If a binfmt changed the interp, free it first. */ ++ if (bprm->interp != bprm->filename) ++ kfree(bprm->interp); ++ bprm->interp = kstrdup(interp, GFP_KERNEL); ++ if (!bprm->interp) ++ return -ENOMEM; ++ return 0; ++} ++EXPORT_SYMBOL(bprm_change_interp); ++ ++/* ++ * install the new credentials for this executable ++ */ ++void install_exec_creds(struct linux_binprm *bprm) ++{ ++ security_bprm_committing_creds(bprm); ++ ++ commit_creds(bprm->cred); ++ bprm->cred = NULL; ++ ++ /* ++ * Disable monitoring for regular users ++ * when executing setuid binaries. Must ++ * wait until new credentials are committed ++ * by commit_creds() above ++ */ ++ if (get_dumpable(current->mm) != SUID_DUMP_USER) ++ perf_event_exit_task(current); ++ /* ++ * cred_guard_mutex must be held at least to this point to prevent ++ * ptrace_attach() from altering our determination of the task's ++ * credentials; any time after this it may be unlocked. ++ */ ++ security_bprm_committed_creds(bprm); ++ mutex_unlock(¤t->signal->cred_guard_mutex); ++} ++EXPORT_SYMBOL(install_exec_creds); ++ ++/* ++ * determine how safe it is to execute the proposed program ++ * - the caller must hold ->cred_guard_mutex to protect against ++ * PTRACE_ATTACH or seccomp thread-sync ++ */ ++static void check_unsafe_exec(struct linux_binprm *bprm) ++{ ++ struct task_struct *p = current, *t; ++ unsigned n_fs; ++ ++ if (p->ptrace) ++ bprm->unsafe |= LSM_UNSAFE_PTRACE; ++ ++ /* ++ * This isn't strictly necessary, but it makes it harder for LSMs to ++ * mess up. ++ */ ++ if (task_no_new_privs(current)) ++ bprm->unsafe |= LSM_UNSAFE_NO_NEW_PRIVS; ++ ++ t = p; ++ n_fs = 1; ++ spin_lock(&p->fs->lock); ++ rcu_read_lock(); ++ while_each_thread(p, t) { ++ if (t->fs == p->fs) ++ n_fs++; ++ } ++ rcu_read_unlock(); ++ ++ if (p->fs->users > n_fs) ++ bprm->unsafe |= LSM_UNSAFE_SHARE; ++ else ++ p->fs->in_exec = 1; ++ spin_unlock(&p->fs->lock); ++} ++ ++static void bprm_fill_uid(struct linux_binprm *bprm) ++{ ++ struct inode *inode; ++ unsigned int mode; ++ kuid_t uid; ++ kgid_t gid; ++ ++ /* ++ * Since this can be called multiple times (via prepare_binprm), ++ * we must clear any previous work done when setting set[ug]id ++ * bits from any earlier bprm->file uses (for example when run ++ * first for a setuid script then again for its interpreter). ++ */ ++ bprm->cred->euid = current_euid(); ++ bprm->cred->egid = current_egid(); ++ ++ if (!mnt_may_suid(bprm->file->f_path.mnt)) ++ return; ++ ++ if (task_no_new_privs(current)) ++ return; ++ ++ inode = bprm->file->f_path.dentry->d_inode; ++ mode = READ_ONCE(inode->i_mode); ++ if (!(mode & (S_ISUID|S_ISGID))) ++ return; ++ ++ /* Be careful if suid/sgid is set */ ++ inode_lock(inode); ++ ++ /* reload atomically mode/uid/gid now that lock held */ ++ mode = inode->i_mode; ++ uid = inode->i_uid; ++ gid = inode->i_gid; ++ inode_unlock(inode); ++ ++ /* We ignore suid/sgid if there are no mappings for them in the ns */ ++ if (!kuid_has_mapping(bprm->cred->user_ns, uid) || ++ !kgid_has_mapping(bprm->cred->user_ns, gid)) ++ return; ++ ++ if (mode & S_ISUID) { ++ bprm->per_clear |= PER_CLEAR_ON_SETID; ++ bprm->cred->euid = uid; ++ } ++ ++ if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) { ++ bprm->per_clear |= PER_CLEAR_ON_SETID; ++ bprm->cred->egid = gid; ++ } ++} ++ ++/* ++ * Fill the binprm structure from the inode. ++ * Check permissions, then read the first BINPRM_BUF_SIZE bytes ++ * ++ * This may be called multiple times for binary chains (scripts for example). ++ */ ++int prepare_binprm(struct linux_binprm *bprm) ++{ ++ int retval; ++ loff_t pos = 0; ++ ++ bprm_fill_uid(bprm); ++ ++ /* fill in binprm security blob */ ++ retval = security_bprm_set_creds(bprm); ++ if (retval) ++ return retval; ++ bprm->called_set_creds = 1; ++ ++ memset(bprm->buf, 0, BINPRM_BUF_SIZE); ++ return kernel_read(bprm->file, bprm->buf, BINPRM_BUF_SIZE, &pos); ++} ++ ++EXPORT_SYMBOL(prepare_binprm); ++ ++/* ++ * Arguments are '\0' separated strings found at the location bprm->p ++ * points to; chop off the first by relocating brpm->p to right after ++ * the first '\0' encountered. ++ */ ++int remove_arg_zero(struct linux_binprm *bprm) ++{ ++ int ret = 0; ++ unsigned long offset; ++ char *kaddr; ++ struct page *page; ++ ++ if (!bprm->argc) ++ return 0; ++ ++ do { ++ offset = bprm->p & ~PAGE_MASK; ++ page = get_arg_page(bprm, bprm->p, 0); ++ if (!page) { ++ ret = -EFAULT; ++ goto out; ++ } ++ kaddr = kmap_atomic(page); ++ ++ for (; offset < PAGE_SIZE && kaddr[offset]; ++ offset++, bprm->p++) ++ ; ++ ++ kunmap_atomic(kaddr); ++ put_arg_page(page); ++ } while (offset == PAGE_SIZE); ++ ++ bprm->p++; ++ bprm->argc--; ++ ret = 0; ++ ++out: ++ return ret; ++} ++EXPORT_SYMBOL(remove_arg_zero); ++ ++#define printable(c) (((c)=='\t') || ((c)=='\n') || (0x20<=(c) && (c)<=0x7e)) ++/* ++ * cycle the list of binary formats handler, until one recognizes the image ++ */ ++int search_binary_handler(struct linux_binprm *bprm) ++{ ++ bool need_retry = IS_ENABLED(CONFIG_MODULES); ++ struct linux_binfmt *fmt; ++ int retval; ++ ++ /* This allows 4 levels of binfmt rewrites before failing hard. */ ++ if (bprm->recursion_depth > 5) ++ return -ELOOP; ++ ++ retval = security_bprm_check(bprm); ++ if (retval) ++ return retval; ++ ++ retval = -ENOENT; ++ retry: ++ read_lock(&binfmt_lock); ++ list_for_each_entry(fmt, &formats, lh) { ++ if (!try_module_get(fmt->module)) ++ continue; ++ read_unlock(&binfmt_lock); ++ bprm->recursion_depth++; ++ retval = fmt->load_binary(bprm); ++ read_lock(&binfmt_lock); ++ put_binfmt(fmt); ++ bprm->recursion_depth--; ++ if (retval < 0 && !bprm->mm) { ++ /* we got to flush_old_exec() and failed after it */ ++ read_unlock(&binfmt_lock); ++ force_sigsegv(SIGSEGV, current); ++ return retval; ++ } ++ if (retval != -ENOEXEC || !bprm->file) { ++ read_unlock(&binfmt_lock); ++ return retval; ++ } ++ } ++ read_unlock(&binfmt_lock); ++ ++ if (need_retry) { ++ if (printable(bprm->buf[0]) && printable(bprm->buf[1]) && ++ printable(bprm->buf[2]) && printable(bprm->buf[3])) ++ return retval; ++ if (request_module("binfmt-%04x", *(ushort *)(bprm->buf + 2)) < 0) ++ return retval; ++ need_retry = false; ++ goto retry; ++ } ++ ++ return retval; ++} ++EXPORT_SYMBOL(search_binary_handler); ++ ++static int exec_binprm(struct linux_binprm *bprm) ++{ ++ pid_t old_pid, old_vpid; ++ int ret; ++ ++ /* Need to fetch pid before load_binary changes it */ ++ old_pid = current->pid; ++ rcu_read_lock(); ++ old_vpid = task_pid_nr_ns(current, task_active_pid_ns(current->parent)); ++ rcu_read_unlock(); ++ ++ ret = search_binary_handler(bprm); ++ if (ret >= 0) { ++ audit_bprm(bprm); ++ trace_sched_process_exec(current, old_pid, bprm); ++ ptrace_event(PTRACE_EVENT_EXEC, old_vpid); ++ proc_exec_connector(current); ++ } ++ ++ return ret; ++} ++ ++/* ++ * sys_execve() executes a new program. ++ */ ++static int __do_execve_file(int fd, struct filename *filename, ++ struct user_arg_ptr argv, ++ struct user_arg_ptr envp, ++ int flags, struct file *file) ++{ ++ char *pathbuf = NULL; ++ struct linux_binprm *bprm; ++ struct files_struct *displaced; ++ int retval; ++ ++ if (IS_ERR(filename)) ++ return PTR_ERR(filename); ++ ++ /* ++ * We move the actual failure in case of RLIMIT_NPROC excess from ++ * set*uid() to execve() because too many poorly written programs ++ * don't check setuid() return code. Here we additionally recheck ++ * whether NPROC limit is still exceeded. ++ */ ++ if ((current->flags & PF_NPROC_EXCEEDED) && ++ atomic_read(¤t_user()->processes) > rlimit(RLIMIT_NPROC)) { ++ retval = -EAGAIN; ++ goto out_ret; ++ } ++ ++ /* We're below the limit (still or again), so we don't want to make ++ * further execve() calls fail. */ ++ current->flags &= ~PF_NPROC_EXCEEDED; ++ ++ retval = unshare_files(&displaced); ++ if (retval) ++ goto out_ret; ++ ++ retval = -ENOMEM; ++ bprm = kzalloc(sizeof(*bprm), GFP_KERNEL); ++ if (!bprm) ++ goto out_files; ++ ++ retval = prepare_bprm_creds(bprm); ++ if (retval) ++ goto out_free; ++ ++ check_unsafe_exec(bprm); ++ current->in_execve = 1; ++ ++ if (!file) ++ file = do_open_execat(fd, filename, flags); ++ retval = PTR_ERR(file); ++ if (IS_ERR(file)) ++ goto out_unmark; ++ ++ sched_exec(); ++ ++ bprm->file = file; ++ if (!filename) { ++ bprm->filename = "none"; ++ } else if (fd == AT_FDCWD || filename->name[0] == '/') { ++ bprm->filename = filename->name; ++ } else { ++ if (filename->name[0] == '\0') ++ pathbuf = kasprintf(GFP_KERNEL, "/dev/fd/%d", fd); ++ else ++ pathbuf = kasprintf(GFP_KERNEL, "/dev/fd/%d/%s", ++ fd, filename->name); ++ if (!pathbuf) { ++ retval = -ENOMEM; ++ goto out_unmark; ++ } ++ /* ++ * Record that a name derived from an O_CLOEXEC fd will be ++ * inaccessible after exec. Relies on having exclusive access to ++ * current->files (due to unshare_files above). ++ */ ++ if (close_on_exec(fd, rcu_dereference_raw(current->files->fdt))) ++ bprm->interp_flags |= BINPRM_FLAGS_PATH_INACCESSIBLE; ++ bprm->filename = pathbuf; ++ } ++ bprm->interp = bprm->filename; ++ ++ retval = bprm_mm_init(bprm); ++ if (retval) ++ goto out_unmark; ++ ++ bprm->argc = count(argv, MAX_ARG_STRINGS); ++ if ((retval = bprm->argc) < 0) ++ goto out; ++ ++ bprm->envc = count(envp, MAX_ARG_STRINGS); ++ if ((retval = bprm->envc) < 0) ++ goto out; ++ ++ retval = prepare_binprm(bprm); ++ if (retval < 0) ++ goto out; ++ ++ retval = copy_strings_kernel(1, &bprm->filename, bprm); ++ if (retval < 0) ++ goto out; ++ ++ bprm->exec = bprm->p; ++ retval = copy_strings(bprm->envc, envp, bprm); ++ if (retval < 0) ++ goto out; ++ ++ retval = copy_strings(bprm->argc, argv, bprm); ++ if (retval < 0) ++ goto out; ++ ++ retval = exec_binprm(bprm); ++ if (retval < 0) ++ goto out; ++ ++ /* execve succeeded */ ++ current->fs->in_exec = 0; ++ current->in_execve = 0; ++ rseq_execve(current); ++ acct_update_integrals(current); ++ task_numa_free(current, false); ++ free_bprm(bprm); ++ kfree(pathbuf); ++ if (filename) ++ putname(filename); ++ if (displaced) ++ put_files_struct(displaced); ++ return retval; ++ ++out: ++ if (bprm->mm) { ++ acct_arg_size(bprm, 0); ++ mmput(bprm->mm); ++ } ++ ++out_unmark: ++ current->fs->in_exec = 0; ++ current->in_execve = 0; ++ ++out_free: ++ free_bprm(bprm); ++ kfree(pathbuf); ++ ++out_files: ++ if (displaced) ++ reset_files_struct(displaced); ++out_ret: ++ if (filename) ++ putname(filename); ++ return retval; ++} ++ ++static int do_execveat_common(int fd, struct filename *filename, ++ struct user_arg_ptr argv, ++ struct user_arg_ptr envp, ++ int flags) ++{ ++ return __do_execve_file(fd, filename, argv, envp, flags, NULL); ++} ++ ++int do_execve_file(struct file *file, void *__argv, void *__envp) ++{ ++ struct user_arg_ptr argv = { .ptr.native = __argv }; ++ struct user_arg_ptr envp = { .ptr.native = __envp }; ++ ++ return __do_execve_file(AT_FDCWD, NULL, argv, envp, 0, file); ++} ++ ++int do_execve(struct filename *filename, ++ const char __user *const __user *__argv, ++ const char __user *const __user *__envp) ++{ ++ struct user_arg_ptr argv = { .ptr.native = __argv }; ++ struct user_arg_ptr envp = { .ptr.native = __envp }; ++ return do_execveat_common(AT_FDCWD, filename, argv, envp, 0); ++} ++ ++int do_execveat(int fd, struct filename *filename, ++ const char __user *const __user *__argv, ++ const char __user *const __user *__envp, ++ int flags) ++{ ++ struct user_arg_ptr argv = { .ptr.native = __argv }; ++ struct user_arg_ptr envp = { .ptr.native = __envp }; ++ ++ return do_execveat_common(fd, filename, argv, envp, flags); ++} ++ ++#ifdef CONFIG_COMPAT ++static int compat_do_execve(struct filename *filename, ++ const compat_uptr_t __user *__argv, ++ const compat_uptr_t __user *__envp) ++{ ++ struct user_arg_ptr argv = { ++ .is_compat = true, ++ .ptr.compat = __argv, ++ }; ++ struct user_arg_ptr envp = { ++ .is_compat = true, ++ .ptr.compat = __envp, ++ }; ++ return do_execveat_common(AT_FDCWD, filename, argv, envp, 0); ++} ++ ++static int compat_do_execveat(int fd, struct filename *filename, ++ const compat_uptr_t __user *__argv, ++ const compat_uptr_t __user *__envp, ++ int flags) ++{ ++ struct user_arg_ptr argv = { ++ .is_compat = true, ++ .ptr.compat = __argv, ++ }; ++ struct user_arg_ptr envp = { ++ .is_compat = true, ++ .ptr.compat = __envp, ++ }; ++ return do_execveat_common(fd, filename, argv, envp, flags); ++} ++#endif ++ ++void set_binfmt(struct linux_binfmt *new) ++{ ++ struct mm_struct *mm = current->mm; ++ ++ if (mm->binfmt) ++ module_put(mm->binfmt->module); ++ ++ mm->binfmt = new; ++ if (new) ++ __module_get(new->module); ++} ++EXPORT_SYMBOL(set_binfmt); ++ ++/* ++ * set_dumpable stores three-value SUID_DUMP_* into mm->flags. ++ */ ++void set_dumpable(struct mm_struct *mm, int value) ++{ ++ unsigned long old, new; ++ ++ if (WARN_ON((unsigned)value > SUID_DUMP_ROOT)) ++ return; ++ ++ do { ++ old = READ_ONCE(mm->flags); ++ new = (old & ~MMF_DUMPABLE_MASK) | value; ++ } while (cmpxchg(&mm->flags, old, new) != old); ++} ++ ++SYSCALL_DEFINE3(execve, ++ const char __user *, filename, ++ const char __user *const __user *, argv, ++ const char __user *const __user *, envp) ++{ ++ return do_execve(getname(filename), argv, envp); ++} ++ ++SYSCALL_DEFINE5(execveat, ++ int, fd, const char __user *, filename, ++ const char __user *const __user *, argv, ++ const char __user *const __user *, envp, ++ int, flags) ++{ ++ int lookup_flags = (flags & AT_EMPTY_PATH) ? LOOKUP_EMPTY : 0; ++ ++ return do_execveat(fd, ++ getname_flags(filename, lookup_flags, NULL), ++ argv, envp, flags); ++} ++ ++#ifdef CONFIG_COMPAT ++COMPAT_SYSCALL_DEFINE3(execve, const char __user *, filename, ++ const compat_uptr_t __user *, argv, ++ const compat_uptr_t __user *, envp) ++{ ++ return compat_do_execve(getname(filename), argv, envp); ++} ++ ++COMPAT_SYSCALL_DEFINE5(execveat, int, fd, ++ const char __user *, filename, ++ const compat_uptr_t __user *, argv, ++ const compat_uptr_t __user *, envp, ++ int, flags) ++{ ++ int lookup_flags = (flags & AT_EMPTY_PATH) ? LOOKUP_EMPTY : 0; ++ ++ return compat_do_execveat(fd, ++ getname_flags(filename, lookup_flags, NULL), ++ argv, envp, flags); ++} ++#endif +diff -uprN kernel/include/asm-generic/atomic.h kernel_new/include/asm-generic/atomic.h +--- kernel/include/asm-generic/atomic.h 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/include/asm-generic/atomic.h 2021-04-01 18:28:07.798863128 +0800 +@@ -80,9 +80,9 @@ static inline void atomic_##op(int i, at + { \ + unsigned long flags; \ + \ +- raw_local_irq_save(flags); \ ++ flags = hard_local_irq_save(); \ + v->counter = v->counter c_op i; \ +- raw_local_irq_restore(flags); \ ++ hard_local_irq_restore(flags); \ + } + + #define ATOMIC_OP_RETURN(op, c_op) \ +@@ -91,9 +91,9 @@ static inline int atomic_##op##_return(i + unsigned long flags; \ + int ret; \ + \ +- raw_local_irq_save(flags); \ ++ flags = hard_local_irq_save(); \ + ret = (v->counter = v->counter c_op i); \ +- raw_local_irq_restore(flags); \ ++ hard_local_irq_restore(flags); \ + \ + return ret; \ + } +@@ -104,10 +104,10 @@ static inline int atomic_fetch_##op(int + unsigned long flags; \ + int ret; \ + \ +- raw_local_irq_save(flags); \ ++ flags = hard_local_irq_save(flags); \ + ret = v->counter; \ + v->counter = v->counter c_op i; \ +- raw_local_irq_restore(flags); \ ++ hard_local_irq_restore(flags); \ + \ + return ret; \ + } +diff -uprN kernel/include/asm-generic/cmpxchg-local.h kernel_new/include/asm-generic/cmpxchg-local.h +--- kernel/include/asm-generic/cmpxchg-local.h 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/include/asm-generic/cmpxchg-local.h 2021-04-01 18:28:07.798863128 +0800 +@@ -4,6 +4,7 @@ + + #include + #include ++#include + + extern unsigned long wrong_size_cmpxchg(volatile void *ptr) + __noreturn; +@@ -23,7 +24,7 @@ static inline unsigned long __cmpxchg_lo + if (size == 8 && sizeof(unsigned long) != 8) + wrong_size_cmpxchg(ptr); + +- raw_local_irq_save(flags); ++ flags = hard_local_irq_save(); + switch (size) { + case 1: prev = *(u8 *)ptr; + if (prev == old) +@@ -44,7 +45,7 @@ static inline unsigned long __cmpxchg_lo + default: + wrong_size_cmpxchg(ptr); + } +- raw_local_irq_restore(flags); ++ hard_local_irq_restore(flags); + return prev; + } + +@@ -57,11 +58,11 @@ static inline u64 __cmpxchg64_local_gene + u64 prev; + unsigned long flags; + +- raw_local_irq_save(flags); ++ flags = hard_local_irq_save(); + prev = *(u64 *)ptr; + if (prev == old) + *(u64 *)ptr = new; +- raw_local_irq_restore(flags); ++ hard_local_irq_restore(flags); + return prev; + } + +diff -uprN kernel/include/asm-generic/ipipe.h kernel_new/include/asm-generic/ipipe.h +--- kernel/include/asm-generic/ipipe.h 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/include/asm-generic/ipipe.h 2021-04-01 18:28:07.798863128 +0800 +@@ -0,0 +1,93 @@ ++/* -*- linux-c -*- ++ * include/asm-generic/ipipe.h ++ * ++ * Copyright (C) 2002-2017 Philippe Gerum. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, ++ * USA; either version 2 of the License, or (at your option) any later ++ * version. ++ */ ++#ifndef __ASM_GENERIC_IPIPE_H ++#define __ASM_GENERIC_IPIPE_H ++ ++#ifdef CONFIG_IPIPE ++ ++#if defined(CONFIG_DEBUG_ATOMIC_SLEEP) || defined(CONFIG_PROVE_LOCKING) || \ ++ defined(CONFIG_PREEMPT_VOLUNTARY) || defined(CONFIG_IPIPE_DEBUG_CONTEXT) ++void __ipipe_uaccess_might_fault(void); ++#else ++#define __ipipe_uaccess_might_fault() might_fault() ++#endif ++ ++#define hard_cond_local_irq_enable() hard_local_irq_enable() ++#define hard_cond_local_irq_disable() hard_local_irq_disable() ++#define hard_cond_local_irq_save() hard_local_irq_save() ++#define hard_cond_local_irq_restore(flags) hard_local_irq_restore(flags) ++ ++#ifdef CONFIG_IPIPE_DEBUG_CONTEXT ++void ipipe_root_only(void); ++#else /* !CONFIG_IPIPE_DEBUG_CONTEXT */ ++static inline void ipipe_root_only(void) { } ++#endif /* !CONFIG_IPIPE_DEBUG_CONTEXT */ ++ ++void ipipe_stall_root(void); ++ ++void ipipe_unstall_root(void); ++ ++unsigned long ipipe_test_and_stall_root(void); ++ ++unsigned long ipipe_test_root(void); ++ ++void ipipe_restore_root(unsigned long x); ++ ++#else /* !CONFIG_IPIPE */ ++ ++#define hard_local_irq_save_notrace() \ ++ ({ \ ++ unsigned long __flags; \ ++ raw_local_irq_save(__flags); \ ++ __flags; \ ++ }) ++ ++#define hard_local_irq_restore_notrace(__flags) \ ++ raw_local_irq_restore(__flags) ++ ++#define hard_local_irq_enable_notrace() \ ++ raw_local_irq_enable() ++ ++#define hard_local_irq_disable_notrace() \ ++ raw_local_irq_disable() ++ ++#define hard_local_irq_save() \ ++ ({ \ ++ unsigned long __flags; \ ++ local_irq_save(__flags); \ ++ __flags; \ ++ }) ++#define hard_local_irq_restore(__flags) local_irq_restore(__flags) ++#define hard_local_irq_enable() local_irq_enable() ++#define hard_local_irq_disable() local_irq_disable() ++#define hard_irqs_disabled() irqs_disabled() ++ ++#define hard_cond_local_irq_enable() do { } while(0) ++#define hard_cond_local_irq_disable() do { } while(0) ++#define hard_cond_local_irq_save() 0 ++#define hard_cond_local_irq_restore(__flags) do { (void)(__flags); } while(0) ++ ++#define __ipipe_uaccess_might_fault() might_fault() ++ ++static inline void ipipe_root_only(void) { } ++ ++#endif /* !CONFIG_IPIPE */ ++ ++#if defined(CONFIG_SMP) && defined(CONFIG_IPIPE) ++#define hard_smp_local_irq_save() hard_local_irq_save() ++#define hard_smp_local_irq_restore(__flags) hard_local_irq_restore(__flags) ++#else /* !CONFIG_SMP */ ++#define hard_smp_local_irq_save() 0 ++#define hard_smp_local_irq_restore(__flags) do { (void)(__flags); } while(0) ++#endif /* CONFIG_SMP */ ++ ++#endif +diff -uprN kernel/include/asm-generic/percpu.h kernel_new/include/asm-generic/percpu.h +--- kernel/include/asm-generic/percpu.h 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/include/asm-generic/percpu.h 2021-04-01 18:28:07.798863128 +0800 +@@ -5,6 +5,7 @@ + #include + #include + #include ++#include + + #ifdef CONFIG_SMP + +@@ -44,11 +45,29 @@ extern unsigned long __per_cpu_offset[NR + #define arch_raw_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, __my_cpu_offset) + #endif + ++#ifdef CONFIG_IPIPE ++#if defined(CONFIG_IPIPE_DEBUG_INTERNAL) && defined(CONFIG_SMP) ++unsigned long __ipipe_cpu_get_offset(void); ++#define __ipipe_cpu_offset __ipipe_cpu_get_offset() ++#else ++#define __ipipe_cpu_offset __my_cpu_offset ++#endif ++#ifndef __ipipe_raw_cpu_ptr ++#define __ipipe_raw_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, __ipipe_cpu_offset) ++#endif ++#define __ipipe_raw_cpu_read(var) (*__ipipe_raw_cpu_ptr(&(var))) ++#endif /* CONFIG_IPIPE */ ++ + #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA + extern void setup_per_cpu_areas(void); + #endif + +-#endif /* SMP */ ++#else /* !SMP */ ++ ++#define __ipipe_raw_cpu_ptr(ptr) VERIFY_PERCPU_PTR(ptr) ++#define __ipipe_raw_cpu_read(var) (*__ipipe_raw_cpu_ptr(&(var))) ++ ++#endif /* !SMP */ + + #ifndef PER_CPU_BASE_SECTION + #ifdef CONFIG_SMP +@@ -148,9 +167,9 @@ do { \ + #define this_cpu_generic_to_op(pcp, val, op) \ + do { \ + unsigned long __flags; \ +- raw_local_irq_save(__flags); \ ++ __flags = hard_local_irq_save(); \ + raw_cpu_generic_to_op(pcp, val, op); \ +- raw_local_irq_restore(__flags); \ ++ hard_local_irq_restore(__flags); \ + } while (0) + + +@@ -158,9 +177,9 @@ do { \ + ({ \ + typeof(pcp) __ret; \ + unsigned long __flags; \ +- raw_local_irq_save(__flags); \ ++ __flags = hard_local_irq_save(); \ + __ret = raw_cpu_generic_add_return(pcp, val); \ +- raw_local_irq_restore(__flags); \ ++ hard_local_irq_restore(__flags); \ + __ret; \ + }) + +@@ -168,9 +187,9 @@ do { \ + ({ \ + typeof(pcp) __ret; \ + unsigned long __flags; \ +- raw_local_irq_save(__flags); \ ++ __flags = hard_local_irq_save(); \ + __ret = raw_cpu_generic_xchg(pcp, nval); \ +- raw_local_irq_restore(__flags); \ ++ hard_local_irq_restore(__flags); \ + __ret; \ + }) + +@@ -178,9 +197,9 @@ do { \ + ({ \ + typeof(pcp) __ret; \ + unsigned long __flags; \ +- raw_local_irq_save(__flags); \ ++ __flags = hard_local_irq_save(); \ + __ret = raw_cpu_generic_cmpxchg(pcp, oval, nval); \ +- raw_local_irq_restore(__flags); \ ++ hard_local_irq_restore(__flags); \ + __ret; \ + }) + +@@ -188,10 +207,10 @@ do { \ + ({ \ + int __ret; \ + unsigned long __flags; \ +- raw_local_irq_save(__flags); \ ++ __flags = hard_local_irq_save(); \ + __ret = raw_cpu_generic_cmpxchg_double(pcp1, pcp2, \ + oval1, oval2, nval1, nval2); \ +- raw_local_irq_restore(__flags); \ ++ hard_local_irq_restore(__flags); \ + __ret; \ + }) + +diff -uprN kernel/include/asm-generic/switch_to.h kernel_new/include/asm-generic/switch_to.h +--- kernel/include/asm-generic/switch_to.h 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/include/asm-generic/switch_to.h 2021-04-01 18:28:07.798863128 +0800 +@@ -21,10 +21,17 @@ + */ + extern struct task_struct *__switch_to(struct task_struct *, + struct task_struct *); +- ++#ifdef CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH + #define switch_to(prev, next, last) \ + do { \ ++ hard_cond_local_irq_disable(); \ + ((last) = __switch_to((prev), (next))); \ ++ hard_cond_local_irq_enable(); \ + } while (0) +- ++#else /* !CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH */ ++#define switch_to(prev, next, last) \ ++ do { \ ++ ((last) = __switch_to((prev), (next))); \ ++ } while (0) ++#endif /* !CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH */ + #endif /* __ASM_GENERIC_SWITCH_TO_H */ +diff -uprN kernel/include/clocksource/timer-sp804.h kernel_new/include/clocksource/timer-sp804.h +--- kernel/include/clocksource/timer-sp804.h 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/include/clocksource/timer-sp804.h 2021-04-01 18:28:07.799863127 +0800 +@@ -5,20 +5,23 @@ + struct clk; + + int __sp804_clocksource_and_sched_clock_init(void __iomem *, ++ unsigned long phys, + const char *, struct clk *, int); + int __sp804_clockevents_init(void __iomem *, unsigned int, + struct clk *, const char *); + void sp804_timer_disable(void __iomem *); + +-static inline void sp804_clocksource_init(void __iomem *base, const char *name) ++static inline void sp804_clocksource_init(void __iomem *base, unsigned long phys, ++ const char *name) + { +- __sp804_clocksource_and_sched_clock_init(base, name, NULL, 0); ++ __sp804_clocksource_and_sched_clock_init(base, phys, name, NULL, 0); + } + + static inline void sp804_clocksource_and_sched_clock_init(void __iomem *base, ++ unsigned long phys, + const char *name) + { +- __sp804_clocksource_and_sched_clock_init(base, name, NULL, 1); ++ __sp804_clocksource_and_sched_clock_init(base, phys, name, NULL, 1); + } + + static inline void sp804_clockevents_init(void __iomem *base, unsigned int irq, const char *name) +diff -uprN kernel/include/ipipe/setup.h kernel_new/include/ipipe/setup.h +--- kernel/include/ipipe/setup.h 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/include/ipipe/setup.h 2021-04-01 18:28:07.799863127 +0800 +@@ -0,0 +1,10 @@ ++#ifndef _IPIPE_SETUP_H ++#define _IPIPE_SETUP_H ++ ++/* ++ * Placeholders for setup hooks defined by client domains. ++ */ ++ ++static inline void __ipipe_early_client_setup(void) { } ++ ++#endif /* !_IPIPE_SETUP_H */ +diff -uprN kernel/include/ipipe/thread_info.h kernel_new/include/ipipe/thread_info.h +--- kernel/include/ipipe/thread_info.h 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/include/ipipe/thread_info.h 2021-04-01 18:28:07.799863127 +0800 +@@ -0,0 +1,14 @@ ++#ifndef _IPIPE_THREAD_INFO_H ++#define _IPIPE_THREAD_INFO_H ++ ++/* ++ * Placeholder for private thread information defined by client ++ * domains. ++ */ ++ ++struct ipipe_threadinfo { ++}; ++ ++#define __ipipe_init_threadinfo(__p) do { } while (0) ++ ++#endif /* !_IPIPE_THREAD_INFO_H */ +diff -uprN kernel/include/linux/clockchips.h kernel_new/include/linux/clockchips.h +--- kernel/include/linux/clockchips.h 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/include/linux/clockchips.h 2021-04-01 18:28:07.799863127 +0800 +@@ -129,6 +129,15 @@ struct clock_event_device { + const struct cpumask *cpumask; + struct list_head list; + struct module *owner; ++ ++#ifdef CONFIG_IPIPE ++ struct ipipe_timer *ipipe_timer; ++ unsigned ipipe_stolen; ++ ++#define clockevent_ipipe_stolen(evt) ((evt)->ipipe_stolen) ++#else ++#define clockevent_ipipe_stolen(evt) (0) ++#endif /* !CONFIG_IPIPE */ + } ____cacheline_aligned; + + /* Helpers to verify state of a clockevent device */ +diff -uprN kernel/include/linux/console.h kernel_new/include/linux/console.h +--- kernel/include/linux/console.h 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/include/linux/console.h 2021-04-01 18:28:07.799863127 +0800 +@@ -141,10 +141,12 @@ static inline int con_debug_leave(void) + #define CON_ANYTIME (16) /* Safe to call when cpu is offline */ + #define CON_BRL (32) /* Used for a braille device */ + #define CON_EXTENDED (64) /* Use the extended output format a la /dev/kmsg */ ++#define CON_RAW (128) /* Supports raw write mode */ + + struct console { + char name[16]; + void (*write)(struct console *, const char *, unsigned); ++ void (*write_raw)(struct console *, const char *, unsigned); + int (*read)(struct console *, char *, unsigned); + struct tty_driver *(*device)(struct console *, int *); + void (*unblank)(void); +diff -uprN kernel/include/linux/dw_apb_timer.h kernel_new/include/linux/dw_apb_timer.h +--- kernel/include/linux/dw_apb_timer.h 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/include/linux/dw_apb_timer.h 2021-04-01 18:28:07.799863127 +0800 +@@ -35,6 +35,7 @@ struct dw_apb_clock_event_device { + struct dw_apb_clocksource { + struct dw_apb_timer timer; + struct clocksource cs; ++ unsigned long phys; + }; + + void dw_apb_clockevent_register(struct dw_apb_clock_event_device *dw_ced); +@@ -47,7 +48,7 @@ dw_apb_clockevent_init(int cpu, const ch + void __iomem *base, int irq, unsigned long freq); + struct dw_apb_clocksource * + dw_apb_clocksource_init(unsigned rating, const char *name, void __iomem *base, +- unsigned long freq); ++ unsigned long phys, unsigned long freq); + void dw_apb_clocksource_register(struct dw_apb_clocksource *dw_cs); + void dw_apb_clocksource_start(struct dw_apb_clocksource *dw_cs); + u64 dw_apb_clocksource_read(struct dw_apb_clocksource *dw_cs); +diff -uprN kernel/include/linux/ftrace.h kernel_new/include/linux/ftrace.h +--- kernel/include/linux/ftrace.h 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/include/linux/ftrace.h 2021-04-01 18:28:07.799863127 +0800 +@@ -160,6 +160,7 @@ enum { + FTRACE_OPS_FL_PID = 1 << 13, + FTRACE_OPS_FL_RCU = 1 << 14, + FTRACE_OPS_FL_TRACE_ARRAY = 1 << 15, ++ FTRACE_OPS_FL_IPIPE_EXCLUSIVE = 1 << 17, + }; + + #ifdef CONFIG_DYNAMIC_FTRACE +diff -uprN kernel/include/linux/gpio/driver.h kernel_new/include/linux/gpio/driver.h +--- kernel/include/linux/gpio/driver.h 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/include/linux/gpio/driver.h 2021-04-01 18:28:07.799863127 +0800 +@@ -284,7 +284,7 @@ struct gpio_chip { + void __iomem *reg_dir; + bool bgpio_dir_inverted; + int bgpio_bits; +- spinlock_t bgpio_lock; ++ ipipe_spinlock_t bgpio_lock; + unsigned long bgpio_data; + unsigned long bgpio_dir; + #endif +diff -uprN kernel/include/linux/gpio/driver.h.orig kernel_new/include/linux/gpio/driver.h.orig +--- kernel/include/linux/gpio/driver.h.orig 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/include/linux/gpio/driver.h.orig 2020-12-21 21:59:22.000000000 +0800 +@@ -0,0 +1,602 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++#ifndef __LINUX_GPIO_DRIVER_H ++#define __LINUX_GPIO_DRIVER_H ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++struct gpio_desc; ++struct of_phandle_args; ++struct device_node; ++struct seq_file; ++struct gpio_device; ++struct module; ++ ++#ifdef CONFIG_GPIOLIB ++ ++#ifdef CONFIG_GPIOLIB_IRQCHIP ++/** ++ * struct gpio_irq_chip - GPIO interrupt controller ++ */ ++struct gpio_irq_chip { ++ /** ++ * @chip: ++ * ++ * GPIO IRQ chip implementation, provided by GPIO driver. ++ */ ++ struct irq_chip *chip; ++ ++ /** ++ * @domain: ++ * ++ * Interrupt translation domain; responsible for mapping between GPIO ++ * hwirq number and Linux IRQ number. ++ */ ++ struct irq_domain *domain; ++ ++ /** ++ * @domain_ops: ++ * ++ * Table of interrupt domain operations for this IRQ chip. ++ */ ++ const struct irq_domain_ops *domain_ops; ++ ++ /** ++ * @handler: ++ * ++ * The IRQ handler to use (often a predefined IRQ core function) for ++ * GPIO IRQs, provided by GPIO driver. ++ */ ++ irq_flow_handler_t handler; ++ ++ /** ++ * @default_type: ++ * ++ * Default IRQ triggering type applied during GPIO driver ++ * initialization, provided by GPIO driver. ++ */ ++ unsigned int default_type; ++ ++ /** ++ * @lock_key: ++ * ++ * Per GPIO IRQ chip lockdep class for IRQ lock. ++ */ ++ struct lock_class_key *lock_key; ++ ++ /** ++ * @request_key: ++ * ++ * Per GPIO IRQ chip lockdep class for IRQ request. ++ */ ++ struct lock_class_key *request_key; ++ ++ /** ++ * @parent_handler: ++ * ++ * The interrupt handler for the GPIO chip's parent interrupts, may be ++ * NULL if the parent interrupts are nested rather than cascaded. ++ */ ++ irq_flow_handler_t parent_handler; ++ ++ /** ++ * @parent_handler_data: ++ * ++ * Data associated, and passed to, the handler for the parent ++ * interrupt. ++ */ ++ void *parent_handler_data; ++ ++ /** ++ * @num_parents: ++ * ++ * The number of interrupt parents of a GPIO chip. ++ */ ++ unsigned int num_parents; ++ ++ /** ++ * @parent_irq: ++ * ++ * For use by gpiochip_set_cascaded_irqchip() ++ */ ++ unsigned int parent_irq; ++ ++ /** ++ * @parents: ++ * ++ * A list of interrupt parents of a GPIO chip. This is owned by the ++ * driver, so the core will only reference this list, not modify it. ++ */ ++ unsigned int *parents; ++ ++ /** ++ * @map: ++ * ++ * A list of interrupt parents for each line of a GPIO chip. ++ */ ++ unsigned int *map; ++ ++ /** ++ * @threaded: ++ * ++ * True if set the interrupt handling uses nested threads. ++ */ ++ bool threaded; ++ ++ /** ++ * @need_valid_mask: ++ * ++ * If set core allocates @valid_mask with all bits set to one. ++ */ ++ bool need_valid_mask; ++ ++ /** ++ * @valid_mask: ++ * ++ * If not %NULL holds bitmask of GPIOs which are valid to be included ++ * in IRQ domain of the chip. ++ */ ++ unsigned long *valid_mask; ++ ++ /** ++ * @first: ++ * ++ * Required for static IRQ allocation. If set, irq_domain_add_simple() ++ * will allocate and map all IRQs during initialization. ++ */ ++ unsigned int first; ++}; ++#endif ++ ++/** ++ * struct gpio_chip - abstract a GPIO controller ++ * @label: a functional name for the GPIO device, such as a part ++ * number or the name of the SoC IP-block implementing it. ++ * @gpiodev: the internal state holder, opaque struct ++ * @parent: optional parent device providing the GPIOs ++ * @owner: helps prevent removal of modules exporting active GPIOs ++ * @request: optional hook for chip-specific activation, such as ++ * enabling module power and clock; may sleep ++ * @free: optional hook for chip-specific deactivation, such as ++ * disabling module power and clock; may sleep ++ * @get_direction: returns direction for signal "offset", 0=out, 1=in, ++ * (same as GPIOF_DIR_XXX), or negative error ++ * @direction_input: configures signal "offset" as input, or returns error ++ * @direction_output: configures signal "offset" as output, or returns error ++ * @get: returns value for signal "offset", 0=low, 1=high, or negative error ++ * @get_multiple: reads values for multiple signals defined by "mask" and ++ * stores them in "bits", returns 0 on success or negative error ++ * @set: assigns output value for signal "offset" ++ * @set_multiple: assigns output values for multiple signals defined by "mask" ++ * @set_config: optional hook for all kinds of settings. Uses the same ++ * packed config format as generic pinconf. ++ * @to_irq: optional hook supporting non-static gpio_to_irq() mappings; ++ * implementation may not sleep ++ * @dbg_show: optional routine to show contents in debugfs; default code ++ * will be used when this is omitted, but custom code can show extra ++ * state (such as pullup/pulldown configuration). ++ * @init_valid_mask: optional routine to initialize @valid_mask, to be used if ++ * not all GPIOs are valid. ++ * @base: identifies the first GPIO number handled by this chip; ++ * or, if negative during registration, requests dynamic ID allocation. ++ * DEPRECATION: providing anything non-negative and nailing the base ++ * offset of GPIO chips is deprecated. Please pass -1 as base to ++ * let gpiolib select the chip base in all possible cases. We want to ++ * get rid of the static GPIO number space in the long run. ++ * @ngpio: the number of GPIOs handled by this controller; the last GPIO ++ * handled is (base + ngpio - 1). ++ * @names: if set, must be an array of strings to use as alternative ++ * names for the GPIOs in this chip. Any entry in the array ++ * may be NULL if there is no alias for the GPIO, however the ++ * array must be @ngpio entries long. A name can include a single printk ++ * format specifier for an unsigned int. It is substituted by the actual ++ * number of the gpio. ++ * @can_sleep: flag must be set iff get()/set() methods sleep, as they ++ * must while accessing GPIO expander chips over I2C or SPI. This ++ * implies that if the chip supports IRQs, these IRQs need to be threaded ++ * as the chip access may sleep when e.g. reading out the IRQ status ++ * registers. ++ * @read_reg: reader function for generic GPIO ++ * @write_reg: writer function for generic GPIO ++ * @be_bits: if the generic GPIO has big endian bit order (bit 31 is representing ++ * line 0, bit 30 is line 1 ... bit 0 is line 31) this is set to true by the ++ * generic GPIO core. It is for internal housekeeping only. ++ * @reg_dat: data (in) register for generic GPIO ++ * @reg_set: output set register (out=high) for generic GPIO ++ * @reg_clr: output clear register (out=low) for generic GPIO ++ * @reg_dir: direction setting register for generic GPIO ++ * @bgpio_dir_inverted: indicates that the direction register is inverted ++ * (gpiolib private state variable) ++ * @bgpio_bits: number of register bits used for a generic GPIO i.e. ++ * * 8 ++ * @bgpio_lock: used to lock chip->bgpio_data. Also, this is needed to keep ++ * shadowed and real data registers writes together. ++ * @bgpio_data: shadowed data register for generic GPIO to clear/set bits ++ * safely. ++ * @bgpio_dir: shadowed direction register for generic GPIO to clear/set ++ * direction safely. ++ * ++ * A gpio_chip can help platforms abstract various sources of GPIOs so ++ * they can all be accessed through a common programing interface. ++ * Example sources would be SOC controllers, FPGAs, multifunction ++ * chips, dedicated GPIO expanders, and so on. ++ * ++ * Each chip controls a number of signals, identified in method calls ++ * by "offset" values in the range 0..(@ngpio - 1). When those signals ++ * are referenced through calls like gpio_get_value(gpio), the offset ++ * is calculated by subtracting @base from the gpio number. ++ */ ++struct gpio_chip { ++ const char *label; ++ struct gpio_device *gpiodev; ++ struct device *parent; ++ struct module *owner; ++ ++ int (*request)(struct gpio_chip *chip, ++ unsigned offset); ++ void (*free)(struct gpio_chip *chip, ++ unsigned offset); ++ int (*get_direction)(struct gpio_chip *chip, ++ unsigned offset); ++ int (*direction_input)(struct gpio_chip *chip, ++ unsigned offset); ++ int (*direction_output)(struct gpio_chip *chip, ++ unsigned offset, int value); ++ int (*get)(struct gpio_chip *chip, ++ unsigned offset); ++ int (*get_multiple)(struct gpio_chip *chip, ++ unsigned long *mask, ++ unsigned long *bits); ++ void (*set)(struct gpio_chip *chip, ++ unsigned offset, int value); ++ void (*set_multiple)(struct gpio_chip *chip, ++ unsigned long *mask, ++ unsigned long *bits); ++ int (*set_config)(struct gpio_chip *chip, ++ unsigned offset, ++ unsigned long config); ++ int (*to_irq)(struct gpio_chip *chip, ++ unsigned offset); ++ ++ void (*dbg_show)(struct seq_file *s, ++ struct gpio_chip *chip); ++ ++ int (*init_valid_mask)(struct gpio_chip *chip); ++ ++ int base; ++ u16 ngpio; ++ const char *const *names; ++ bool can_sleep; ++ ++#if IS_ENABLED(CONFIG_GPIO_GENERIC) ++ unsigned long (*read_reg)(void __iomem *reg); ++ void (*write_reg)(void __iomem *reg, unsigned long data); ++ bool be_bits; ++ void __iomem *reg_dat; ++ void __iomem *reg_set; ++ void __iomem *reg_clr; ++ void __iomem *reg_dir; ++ bool bgpio_dir_inverted; ++ int bgpio_bits; ++ spinlock_t bgpio_lock; ++ unsigned long bgpio_data; ++ unsigned long bgpio_dir; ++#endif ++ ++#ifdef CONFIG_GPIOLIB_IRQCHIP ++ /* ++ * With CONFIG_GPIOLIB_IRQCHIP we get an irqchip inside the gpiolib ++ * to handle IRQs for most practical cases. ++ */ ++ ++ /** ++ * @irq: ++ * ++ * Integrates interrupt chip functionality with the GPIO chip. Can be ++ * used to handle IRQs for most practical cases. ++ */ ++ struct gpio_irq_chip irq; ++#endif ++ ++ /** ++ * @need_valid_mask: ++ * ++ * If set core allocates @valid_mask with all its values initialized ++ * with init_valid_mask() or set to one if init_valid_mask() is not ++ * defined ++ */ ++ bool need_valid_mask; ++ ++ /** ++ * @valid_mask: ++ * ++ * If not %NULL holds bitmask of GPIOs which are valid to be used ++ * from the chip. ++ */ ++ unsigned long *valid_mask; ++ ++#if defined(CONFIG_OF_GPIO) ++ /* ++ * If CONFIG_OF is enabled, then all GPIO controllers described in the ++ * device tree automatically may have an OF translation ++ */ ++ ++ /** ++ * @of_node: ++ * ++ * Pointer to a device tree node representing this GPIO controller. ++ */ ++ struct device_node *of_node; ++ ++ /** ++ * @of_gpio_n_cells: ++ * ++ * Number of cells used to form the GPIO specifier. ++ */ ++ unsigned int of_gpio_n_cells; ++ ++ /** ++ * @of_xlate: ++ * ++ * Callback to translate a device tree GPIO specifier into a chip- ++ * relative GPIO number and flags. ++ */ ++ int (*of_xlate)(struct gpio_chip *gc, ++ const struct of_phandle_args *gpiospec, u32 *flags); ++#endif ++}; ++ ++extern const char *gpiochip_is_requested(struct gpio_chip *chip, ++ unsigned offset); ++ ++/* add/remove chips */ ++extern int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data, ++ struct lock_class_key *lock_key, ++ struct lock_class_key *request_key); ++ ++/** ++ * gpiochip_add_data() - register a gpio_chip ++ * @chip: the chip to register, with chip->base initialized ++ * @data: driver-private data associated with this chip ++ * ++ * Context: potentially before irqs will work ++ * ++ * When gpiochip_add_data() is called very early during boot, so that GPIOs ++ * can be freely used, the chip->parent device must be registered before ++ * the gpio framework's arch_initcall(). Otherwise sysfs initialization ++ * for GPIOs will fail rudely. ++ * ++ * gpiochip_add_data() must only be called after gpiolib initialization, ++ * ie after core_initcall(). ++ * ++ * If chip->base is negative, this requests dynamic assignment of ++ * a range of valid GPIOs. ++ * ++ * Returns: ++ * A negative errno if the chip can't be registered, such as because the ++ * chip->base is invalid or already associated with a different chip. ++ * Otherwise it returns zero as a success code. ++ */ ++#ifdef CONFIG_LOCKDEP ++#define gpiochip_add_data(chip, data) ({ \ ++ static struct lock_class_key lock_key; \ ++ static struct lock_class_key request_key; \ ++ gpiochip_add_data_with_key(chip, data, &lock_key, \ ++ &request_key); \ ++ }) ++#else ++#define gpiochip_add_data(chip, data) gpiochip_add_data_with_key(chip, data, NULL, NULL) ++#endif ++ ++static inline int gpiochip_add(struct gpio_chip *chip) ++{ ++ return gpiochip_add_data(chip, NULL); ++} ++extern void gpiochip_remove(struct gpio_chip *chip); ++extern int devm_gpiochip_add_data(struct device *dev, struct gpio_chip *chip, ++ void *data); ++extern void devm_gpiochip_remove(struct device *dev, struct gpio_chip *chip); ++ ++extern struct gpio_chip *gpiochip_find(void *data, ++ int (*match)(struct gpio_chip *chip, void *data)); ++ ++/* lock/unlock as IRQ */ ++int gpiochip_lock_as_irq(struct gpio_chip *chip, unsigned int offset); ++void gpiochip_unlock_as_irq(struct gpio_chip *chip, unsigned int offset); ++bool gpiochip_line_is_irq(struct gpio_chip *chip, unsigned int offset); ++ ++/* Line status inquiry for drivers */ ++bool gpiochip_line_is_open_drain(struct gpio_chip *chip, unsigned int offset); ++bool gpiochip_line_is_open_source(struct gpio_chip *chip, unsigned int offset); ++ ++/* Sleep persistence inquiry for drivers */ ++bool gpiochip_line_is_persistent(struct gpio_chip *chip, unsigned int offset); ++bool gpiochip_line_is_valid(const struct gpio_chip *chip, unsigned int offset); ++ ++/* get driver data */ ++void *gpiochip_get_data(struct gpio_chip *chip); ++ ++struct gpio_chip *gpiod_to_chip(const struct gpio_desc *desc); ++ ++struct bgpio_pdata { ++ const char *label; ++ int base; ++ int ngpio; ++}; ++ ++#if IS_ENABLED(CONFIG_GPIO_GENERIC) ++ ++int bgpio_init(struct gpio_chip *gc, struct device *dev, ++ unsigned long sz, void __iomem *dat, void __iomem *set, ++ void __iomem *clr, void __iomem *dirout, void __iomem *dirin, ++ unsigned long flags); ++ ++#define BGPIOF_BIG_ENDIAN BIT(0) ++#define BGPIOF_UNREADABLE_REG_SET BIT(1) /* reg_set is unreadable */ ++#define BGPIOF_UNREADABLE_REG_DIR BIT(2) /* reg_dir is unreadable */ ++#define BGPIOF_BIG_ENDIAN_BYTE_ORDER BIT(3) ++#define BGPIOF_READ_OUTPUT_REG_SET BIT(4) /* reg_set stores output value */ ++#define BGPIOF_NO_OUTPUT BIT(5) /* only input */ ++ ++#endif ++ ++#ifdef CONFIG_GPIOLIB_IRQCHIP ++ ++int gpiochip_irq_map(struct irq_domain *d, unsigned int irq, ++ irq_hw_number_t hwirq); ++void gpiochip_irq_unmap(struct irq_domain *d, unsigned int irq); ++ ++void gpiochip_set_chained_irqchip(struct gpio_chip *gpiochip, ++ struct irq_chip *irqchip, ++ unsigned int parent_irq, ++ irq_flow_handler_t parent_handler); ++ ++void gpiochip_set_nested_irqchip(struct gpio_chip *gpiochip, ++ struct irq_chip *irqchip, ++ unsigned int parent_irq); ++ ++int gpiochip_irqchip_add_key(struct gpio_chip *gpiochip, ++ struct irq_chip *irqchip, ++ unsigned int first_irq, ++ irq_flow_handler_t handler, ++ unsigned int type, ++ bool threaded, ++ struct lock_class_key *lock_key, ++ struct lock_class_key *request_key); ++ ++bool gpiochip_irqchip_irq_valid(const struct gpio_chip *gpiochip, ++ unsigned int offset); ++ ++#ifdef CONFIG_LOCKDEP ++ ++/* ++ * Lockdep requires that each irqchip instance be created with a ++ * unique key so as to avoid unnecessary warnings. This upfront ++ * boilerplate static inlines provides such a key for each ++ * unique instance. ++ */ ++static inline int gpiochip_irqchip_add(struct gpio_chip *gpiochip, ++ struct irq_chip *irqchip, ++ unsigned int first_irq, ++ irq_flow_handler_t handler, ++ unsigned int type) ++{ ++ static struct lock_class_key lock_key; ++ static struct lock_class_key request_key; ++ ++ return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq, ++ handler, type, false, ++ &lock_key, &request_key); ++} ++ ++static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gpiochip, ++ struct irq_chip *irqchip, ++ unsigned int first_irq, ++ irq_flow_handler_t handler, ++ unsigned int type) ++{ ++ ++ static struct lock_class_key lock_key; ++ static struct lock_class_key request_key; ++ ++ return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq, ++ handler, type, true, ++ &lock_key, &request_key); ++} ++#else ++static inline int gpiochip_irqchip_add(struct gpio_chip *gpiochip, ++ struct irq_chip *irqchip, ++ unsigned int first_irq, ++ irq_flow_handler_t handler, ++ unsigned int type) ++{ ++ return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq, ++ handler, type, false, NULL, NULL); ++} ++ ++static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gpiochip, ++ struct irq_chip *irqchip, ++ unsigned int first_irq, ++ irq_flow_handler_t handler, ++ unsigned int type) ++{ ++ return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq, ++ handler, type, true, NULL, NULL); ++} ++#endif /* CONFIG_LOCKDEP */ ++ ++#endif /* CONFIG_GPIOLIB_IRQCHIP */ ++ ++int gpiochip_generic_request(struct gpio_chip *chip, unsigned offset); ++void gpiochip_generic_free(struct gpio_chip *chip, unsigned offset); ++int gpiochip_generic_config(struct gpio_chip *chip, unsigned offset, ++ unsigned long config); ++ ++#ifdef CONFIG_PINCTRL ++ ++/** ++ * struct gpio_pin_range - pin range controlled by a gpio chip ++ * @node: list for maintaining set of pin ranges, used internally ++ * @pctldev: pinctrl device which handles corresponding pins ++ * @range: actual range of pins controlled by a gpio controller ++ */ ++struct gpio_pin_range { ++ struct list_head node; ++ struct pinctrl_dev *pctldev; ++ struct pinctrl_gpio_range range; ++}; ++ ++int gpiochip_add_pin_range(struct gpio_chip *chip, const char *pinctl_name, ++ unsigned int gpio_offset, unsigned int pin_offset, ++ unsigned int npins); ++int gpiochip_add_pingroup_range(struct gpio_chip *chip, ++ struct pinctrl_dev *pctldev, ++ unsigned int gpio_offset, const char *pin_group); ++void gpiochip_remove_pin_ranges(struct gpio_chip *chip); ++ ++#else ++ ++static inline int ++gpiochip_add_pin_range(struct gpio_chip *chip, const char *pinctl_name, ++ unsigned int gpio_offset, unsigned int pin_offset, ++ unsigned int npins) ++{ ++ return 0; ++} ++static inline int ++gpiochip_add_pingroup_range(struct gpio_chip *chip, ++ struct pinctrl_dev *pctldev, ++ unsigned int gpio_offset, const char *pin_group) ++{ ++ return 0; ++} ++ ++static inline void ++gpiochip_remove_pin_ranges(struct gpio_chip *chip) ++{ ++} ++ ++#endif /* CONFIG_PINCTRL */ ++ ++struct gpio_desc *gpiochip_request_own_desc(struct gpio_chip *chip, u16 hwnum, ++ const char *label); ++void gpiochip_free_own_desc(struct gpio_desc *desc); ++ ++#else /* CONFIG_GPIOLIB */ ++ ++static inline struct gpio_chip *gpiod_to_chip(const struct gpio_desc *desc) ++{ ++ /* GPIO can never have been requested */ ++ WARN_ON(1); ++ return ERR_PTR(-ENODEV); ++} ++ ++#endif /* CONFIG_GPIOLIB */ ++ ++#endif +diff -uprN kernel/include/linux/hardirq.h kernel_new/include/linux/hardirq.h +--- kernel/include/linux/hardirq.h 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/include/linux/hardirq.h 2021-04-01 18:28:07.799863127 +0800 +@@ -6,6 +6,7 @@ + #include + #include + #include ++#include + #include + + +@@ -67,6 +68,7 @@ extern void irq_exit(void); + + #define nmi_enter() \ + do { \ ++ __ipipe_nmi_enter(); \ + arch_nmi_enter(); \ + printk_nmi_enter(); \ + lockdep_off(); \ +@@ -87,6 +89,7 @@ extern void irq_exit(void); + lockdep_on(); \ + printk_nmi_exit(); \ + arch_nmi_exit(); \ ++ __ipipe_nmi_exit(); \ + } while (0) + + #endif /* LINUX_HARDIRQ_H */ +diff -uprN kernel/include/linux/interrupt.h kernel_new/include/linux/interrupt.h +--- kernel/include/linux/interrupt.h 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/include/linux/interrupt.h 2021-04-01 18:28:07.800863126 +0800 +@@ -472,6 +472,23 @@ extern bool force_irqthreads; + #define hard_irq_disable() do { } while(0) + #endif + ++/* ++ * Unlike other virtualized interrupt disabling schemes may assume, we ++ * can't expect local_irq_restore() to turn hard interrupts on when ++ * pipelining. hard_irq_enable() is introduced to be paired with ++ * hard_irq_disable(), for unconditionally turning them on. The only ++ * sane sequence mixing virtual and real disable state manipulation ++ * is: ++ * ++ * 1. local_irq_save/disable ++ * 2. hard_irq_disable ++ * 3. hard_irq_enable ++ * 4. local_irq_restore/enable ++ */ ++#ifndef hard_irq_enable ++#define hard_irq_enable() hard_cond_local_irq_enable() ++#endif ++ + /* PLEASE, avoid to allocate new softirqs, if you need not _really_ high + frequency threaded job scheduling. For almost all the purposes + tasklets are more than enough. F.e. all serial device BHs et +diff -uprN kernel/include/linux/interrupt.h.orig kernel_new/include/linux/interrupt.h.orig +--- kernel/include/linux/interrupt.h.orig 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/include/linux/interrupt.h.orig 2020-12-21 21:59:22.000000000 +0800 +@@ -0,0 +1,746 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++/* interrupt.h */ ++#ifndef _LINUX_INTERRUPT_H ++#define _LINUX_INTERRUPT_H ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++ ++/* ++ * These correspond to the IORESOURCE_IRQ_* defines in ++ * linux/ioport.h to select the interrupt line behaviour. When ++ * requesting an interrupt without specifying a IRQF_TRIGGER, the ++ * setting should be assumed to be "as already configured", which ++ * may be as per machine or firmware initialisation. ++ */ ++#define IRQF_TRIGGER_NONE 0x00000000 ++#define IRQF_TRIGGER_RISING 0x00000001 ++#define IRQF_TRIGGER_FALLING 0x00000002 ++#define IRQF_TRIGGER_HIGH 0x00000004 ++#define IRQF_TRIGGER_LOW 0x00000008 ++#define IRQF_TRIGGER_MASK (IRQF_TRIGGER_HIGH | IRQF_TRIGGER_LOW | \ ++ IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING) ++#define IRQF_TRIGGER_PROBE 0x00000010 ++ ++/* ++ * These flags used only by the kernel as part of the ++ * irq handling routines. ++ * ++ * IRQF_SHARED - allow sharing the irq among several devices ++ * IRQF_PROBE_SHARED - set by callers when they expect sharing mismatches to occur ++ * IRQF_TIMER - Flag to mark this interrupt as timer interrupt ++ * IRQF_PERCPU - Interrupt is per cpu ++ * IRQF_NOBALANCING - Flag to exclude this interrupt from irq balancing ++ * IRQF_IRQPOLL - Interrupt is used for polling (only the interrupt that is ++ * registered first in an shared interrupt is considered for ++ * performance reasons) ++ * IRQF_ONESHOT - Interrupt is not reenabled after the hardirq handler finished. ++ * Used by threaded interrupts which need to keep the ++ * irq line disabled until the threaded handler has been run. ++ * IRQF_NO_SUSPEND - Do not disable this IRQ during suspend. Does not guarantee ++ * that this interrupt will wake the system from a suspended ++ * state. See Documentation/power/suspend-and-interrupts.txt ++ * IRQF_FORCE_RESUME - Force enable it on resume even if IRQF_NO_SUSPEND is set ++ * IRQF_NO_THREAD - Interrupt cannot be threaded ++ * IRQF_EARLY_RESUME - Resume IRQ early during syscore instead of at device ++ * resume time. ++ * IRQF_COND_SUSPEND - If the IRQ is shared with a NO_SUSPEND user, execute this ++ * interrupt handler after suspending interrupts. For system ++ * wakeup devices users need to implement wakeup detection in ++ * their interrupt handlers. ++ */ ++#define IRQF_SHARED 0x00000080 ++#define IRQF_PROBE_SHARED 0x00000100 ++#define __IRQF_TIMER 0x00000200 ++#define IRQF_PERCPU 0x00000400 ++#define IRQF_NOBALANCING 0x00000800 ++#define IRQF_IRQPOLL 0x00001000 ++#define IRQF_ONESHOT 0x00002000 ++#define IRQF_NO_SUSPEND 0x00004000 ++#define IRQF_FORCE_RESUME 0x00008000 ++#define IRQF_NO_THREAD 0x00010000 ++#define IRQF_EARLY_RESUME 0x00020000 ++#define IRQF_COND_SUSPEND 0x00040000 ++ ++#define IRQF_TIMER (__IRQF_TIMER | IRQF_NO_SUSPEND | IRQF_NO_THREAD) ++ ++/* ++ * These values can be returned by request_any_context_irq() and ++ * describe the context the interrupt will be run in. ++ * ++ * IRQC_IS_HARDIRQ - interrupt runs in hardirq context ++ * IRQC_IS_NESTED - interrupt runs in a nested threaded context ++ */ ++enum { ++ IRQC_IS_HARDIRQ = 0, ++ IRQC_IS_NESTED, ++}; ++ ++typedef irqreturn_t (*irq_handler_t)(int, void *); ++ ++/** ++ * struct irqaction - per interrupt action descriptor ++ * @handler: interrupt handler function ++ * @name: name of the device ++ * @dev_id: cookie to identify the device ++ * @percpu_dev_id: cookie to identify the device ++ * @next: pointer to the next irqaction for shared interrupts ++ * @irq: interrupt number ++ * @flags: flags (see IRQF_* above) ++ * @thread_fn: interrupt handler function for threaded interrupts ++ * @thread: thread pointer for threaded interrupts ++ * @secondary: pointer to secondary irqaction (force threading) ++ * @thread_flags: flags related to @thread ++ * @thread_mask: bitmask for keeping track of @thread activity ++ * @dir: pointer to the proc/irq/NN/name entry ++ */ ++struct irqaction { ++ irq_handler_t handler; ++ void *dev_id; ++ void __percpu *percpu_dev_id; ++ struct irqaction *next; ++ irq_handler_t thread_fn; ++ struct task_struct *thread; ++ struct irqaction *secondary; ++ unsigned int irq; ++ unsigned int flags; ++ unsigned long thread_flags; ++ unsigned long thread_mask; ++ const char *name; ++ struct proc_dir_entry *dir; ++} ____cacheline_internodealigned_in_smp; ++ ++extern irqreturn_t no_action(int cpl, void *dev_id); ++ ++/* ++ * If a (PCI) device interrupt is not connected we set dev->irq to ++ * IRQ_NOTCONNECTED. This causes request_irq() to fail with -ENOTCONN, so we ++ * can distingiush that case from other error returns. ++ * ++ * 0x80000000 is guaranteed to be outside the available range of interrupts ++ * and easy to distinguish from other possible incorrect values. ++ */ ++#define IRQ_NOTCONNECTED (1U << 31) ++ ++extern int __must_check ++request_threaded_irq(unsigned int irq, irq_handler_t handler, ++ irq_handler_t thread_fn, ++ unsigned long flags, const char *name, void *dev); ++ ++static inline int __must_check ++request_irq(unsigned int irq, irq_handler_t handler, unsigned long flags, ++ const char *name, void *dev) ++{ ++ return request_threaded_irq(irq, handler, NULL, flags, name, dev); ++} ++ ++extern int __must_check ++request_any_context_irq(unsigned int irq, irq_handler_t handler, ++ unsigned long flags, const char *name, void *dev_id); ++ ++extern int __must_check ++__request_percpu_irq(unsigned int irq, irq_handler_t handler, ++ unsigned long flags, const char *devname, ++ void __percpu *percpu_dev_id); ++ ++extern int __must_check ++request_nmi(unsigned int irq, irq_handler_t handler, unsigned long flags, ++ const char *name, void *dev); ++ ++static inline int __must_check ++request_percpu_irq(unsigned int irq, irq_handler_t handler, ++ const char *devname, void __percpu *percpu_dev_id) ++{ ++ return __request_percpu_irq(irq, handler, 0, ++ devname, percpu_dev_id); ++} ++ ++extern int __must_check ++request_percpu_nmi(unsigned int irq, irq_handler_t handler, ++ const char *devname, void __percpu *dev); ++ ++extern const void *free_irq(unsigned int, void *); ++extern void free_percpu_irq(unsigned int, void __percpu *); ++ ++extern const void *free_nmi(unsigned int irq, void *dev_id); ++extern void free_percpu_nmi(unsigned int irq, void __percpu *percpu_dev_id); ++ ++struct device; ++ ++extern int __must_check ++devm_request_threaded_irq(struct device *dev, unsigned int irq, ++ irq_handler_t handler, irq_handler_t thread_fn, ++ unsigned long irqflags, const char *devname, ++ void *dev_id); ++ ++static inline int __must_check ++devm_request_irq(struct device *dev, unsigned int irq, irq_handler_t handler, ++ unsigned long irqflags, const char *devname, void *dev_id) ++{ ++ return devm_request_threaded_irq(dev, irq, handler, NULL, irqflags, ++ devname, dev_id); ++} ++ ++extern int __must_check ++devm_request_any_context_irq(struct device *dev, unsigned int irq, ++ irq_handler_t handler, unsigned long irqflags, ++ const char *devname, void *dev_id); ++ ++extern void devm_free_irq(struct device *dev, unsigned int irq, void *dev_id); ++ ++/* ++ * On lockdep we dont want to enable hardirqs in hardirq ++ * context. Use local_irq_enable_in_hardirq() to annotate ++ * kernel code that has to do this nevertheless (pretty much ++ * the only valid case is for old/broken hardware that is ++ * insanely slow). ++ * ++ * NOTE: in theory this might break fragile code that relies ++ * on hardirq delivery - in practice we dont seem to have such ++ * places left. So the only effect should be slightly increased ++ * irqs-off latencies. ++ */ ++#ifdef CONFIG_LOCKDEP ++# define local_irq_enable_in_hardirq() do { } while (0) ++#else ++# define local_irq_enable_in_hardirq() local_irq_enable() ++#endif ++ ++extern void disable_irq_nosync(unsigned int irq); ++extern bool disable_hardirq(unsigned int irq); ++extern void disable_irq(unsigned int irq); ++extern void disable_percpu_irq(unsigned int irq); ++extern void enable_irq(unsigned int irq); ++extern void enable_percpu_irq(unsigned int irq, unsigned int type); ++extern bool irq_percpu_is_enabled(unsigned int irq); ++extern void irq_wake_thread(unsigned int irq, void *dev_id); ++ ++extern void disable_nmi_nosync(unsigned int irq); ++extern void disable_percpu_nmi(unsigned int irq); ++extern void enable_nmi(unsigned int irq); ++extern void enable_percpu_nmi(unsigned int irq, unsigned int type); ++extern int prepare_percpu_nmi(unsigned int irq); ++extern void teardown_percpu_nmi(unsigned int irq); ++ ++/* The following three functions are for the core kernel use only. */ ++extern void suspend_device_irqs(void); ++extern void resume_device_irqs(void); ++ ++/** ++ * struct irq_affinity_notify - context for notification of IRQ affinity changes ++ * @irq: Interrupt to which notification applies ++ * @kref: Reference count, for internal use ++ * @work: Work item, for internal use ++ * @notify: Function to be called on change. This will be ++ * called in process context. ++ * @release: Function to be called on release. This will be ++ * called in process context. Once registered, the ++ * structure must only be freed when this function is ++ * called or later. ++ */ ++struct irq_affinity_notify { ++ unsigned int irq; ++ struct kref kref; ++ struct work_struct work; ++ void (*notify)(struct irq_affinity_notify *, const cpumask_t *mask); ++ void (*release)(struct kref *ref); ++}; ++ ++/** ++ * struct irq_affinity - Description for automatic irq affinity assignements ++ * @pre_vectors: Don't apply affinity to @pre_vectors at beginning of ++ * the MSI(-X) vector space ++ * @post_vectors: Don't apply affinity to @post_vectors at end of ++ * the MSI(-X) vector space ++ */ ++struct irq_affinity { ++ int pre_vectors; ++ int post_vectors; ++}; ++ ++#if defined(CONFIG_SMP) ++ ++extern cpumask_var_t irq_default_affinity; ++ ++/* Internal implementation. Use the helpers below */ ++extern int __irq_set_affinity(unsigned int irq, const struct cpumask *cpumask, ++ bool force); ++ ++/** ++ * irq_set_affinity - Set the irq affinity of a given irq ++ * @irq: Interrupt to set affinity ++ * @cpumask: cpumask ++ * ++ * Fails if cpumask does not contain an online CPU ++ */ ++static inline int ++irq_set_affinity(unsigned int irq, const struct cpumask *cpumask) ++{ ++ return __irq_set_affinity(irq, cpumask, false); ++} ++ ++/** ++ * irq_force_affinity - Force the irq affinity of a given irq ++ * @irq: Interrupt to set affinity ++ * @cpumask: cpumask ++ * ++ * Same as irq_set_affinity, but without checking the mask against ++ * online cpus. ++ * ++ * Solely for low level cpu hotplug code, where we need to make per ++ * cpu interrupts affine before the cpu becomes online. ++ */ ++static inline int ++irq_force_affinity(unsigned int irq, const struct cpumask *cpumask) ++{ ++ return __irq_set_affinity(irq, cpumask, true); ++} ++ ++extern int irq_can_set_affinity(unsigned int irq); ++extern int irq_select_affinity(unsigned int irq); ++ ++extern int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m); ++ ++extern int ++irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify); ++ ++struct cpumask *irq_create_affinity_masks(int nvec, const struct irq_affinity *affd); ++int irq_calc_affinity_vectors(int minvec, int maxvec, const struct irq_affinity *affd); ++ ++#else /* CONFIG_SMP */ ++ ++static inline int irq_set_affinity(unsigned int irq, const struct cpumask *m) ++{ ++ return -EINVAL; ++} ++ ++static inline int irq_force_affinity(unsigned int irq, const struct cpumask *cpumask) ++{ ++ return 0; ++} ++ ++static inline int irq_can_set_affinity(unsigned int irq) ++{ ++ return 0; ++} ++ ++static inline int irq_select_affinity(unsigned int irq) { return 0; } ++ ++static inline int irq_set_affinity_hint(unsigned int irq, ++ const struct cpumask *m) ++{ ++ return -EINVAL; ++} ++ ++static inline int ++irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify) ++{ ++ return 0; ++} ++ ++static inline struct cpumask * ++irq_create_affinity_masks(int nvec, const struct irq_affinity *affd) ++{ ++ return NULL; ++} ++ ++static inline int ++irq_calc_affinity_vectors(int minvec, int maxvec, const struct irq_affinity *affd) ++{ ++ return maxvec; ++} ++ ++#endif /* CONFIG_SMP */ ++ ++/* ++ * Special lockdep variants of irq disabling/enabling. ++ * These should be used for locking constructs that ++ * know that a particular irq context which is disabled, ++ * and which is the only irq-context user of a lock, ++ * that it's safe to take the lock in the irq-disabled ++ * section without disabling hardirqs. ++ * ++ * On !CONFIG_LOCKDEP they are equivalent to the normal ++ * irq disable/enable methods. ++ */ ++static inline void disable_irq_nosync_lockdep(unsigned int irq) ++{ ++ disable_irq_nosync(irq); ++#ifdef CONFIG_LOCKDEP ++ local_irq_disable(); ++#endif ++} ++ ++static inline void disable_irq_nosync_lockdep_irqsave(unsigned int irq, unsigned long *flags) ++{ ++ disable_irq_nosync(irq); ++#ifdef CONFIG_LOCKDEP ++ local_irq_save(*flags); ++#endif ++} ++ ++static inline void disable_irq_lockdep(unsigned int irq) ++{ ++ disable_irq(irq); ++#ifdef CONFIG_LOCKDEP ++ local_irq_disable(); ++#endif ++} ++ ++static inline void enable_irq_lockdep(unsigned int irq) ++{ ++#ifdef CONFIG_LOCKDEP ++ local_irq_enable(); ++#endif ++ enable_irq(irq); ++} ++ ++static inline void enable_irq_lockdep_irqrestore(unsigned int irq, unsigned long *flags) ++{ ++#ifdef CONFIG_LOCKDEP ++ local_irq_restore(*flags); ++#endif ++ enable_irq(irq); ++} ++ ++/* IRQ wakeup (PM) control: */ ++extern int irq_set_irq_wake(unsigned int irq, unsigned int on); ++ ++static inline int enable_irq_wake(unsigned int irq) ++{ ++ return irq_set_irq_wake(irq, 1); ++} ++ ++static inline int disable_irq_wake(unsigned int irq) ++{ ++ return irq_set_irq_wake(irq, 0); ++} ++ ++/* ++ * irq_get_irqchip_state/irq_set_irqchip_state specific flags ++ */ ++enum irqchip_irq_state { ++ IRQCHIP_STATE_PENDING, /* Is interrupt pending? */ ++ IRQCHIP_STATE_ACTIVE, /* Is interrupt in progress? */ ++ IRQCHIP_STATE_MASKED, /* Is interrupt masked? */ ++ IRQCHIP_STATE_LINE_LEVEL, /* Is IRQ line high? */ ++}; ++ ++extern int irq_get_irqchip_state(unsigned int irq, enum irqchip_irq_state which, ++ bool *state); ++extern int irq_set_irqchip_state(unsigned int irq, enum irqchip_irq_state which, ++ bool state); ++ ++#ifdef CONFIG_IRQ_FORCED_THREADING ++extern bool force_irqthreads; ++#else ++#define force_irqthreads (0) ++#endif ++ ++#ifndef local_softirq_pending ++ ++#ifndef local_softirq_pending_ref ++#define local_softirq_pending_ref irq_stat.__softirq_pending ++#endif ++ ++#define local_softirq_pending() (__this_cpu_read(local_softirq_pending_ref)) ++#define set_softirq_pending(x) (__this_cpu_write(local_softirq_pending_ref, (x))) ++#define or_softirq_pending(x) (__this_cpu_or(local_softirq_pending_ref, (x))) ++ ++#endif /* local_softirq_pending */ ++ ++/* Some architectures might implement lazy enabling/disabling of ++ * interrupts. In some cases, such as stop_machine, we might want ++ * to ensure that after a local_irq_disable(), interrupts have ++ * really been disabled in hardware. Such architectures need to ++ * implement the following hook. ++ */ ++#ifndef hard_irq_disable ++#define hard_irq_disable() do { } while(0) ++#endif ++ ++/* PLEASE, avoid to allocate new softirqs, if you need not _really_ high ++ frequency threaded job scheduling. For almost all the purposes ++ tasklets are more than enough. F.e. all serial device BHs et ++ al. should be converted to tasklets, not to softirqs. ++ */ ++ ++enum ++{ ++ HI_SOFTIRQ=0, ++ TIMER_SOFTIRQ, ++ NET_TX_SOFTIRQ, ++ NET_RX_SOFTIRQ, ++ BLOCK_SOFTIRQ, ++ IRQ_POLL_SOFTIRQ, ++ TASKLET_SOFTIRQ, ++ SCHED_SOFTIRQ, ++ HRTIMER_SOFTIRQ, /* Unused, but kept as tools rely on the ++ numbering. Sigh! */ ++ RCU_SOFTIRQ, /* Preferable RCU should always be the last softirq */ ++ ++ NR_SOFTIRQS ++}; ++ ++#define SOFTIRQ_STOP_IDLE_MASK (~(1 << RCU_SOFTIRQ)) ++ ++/* map softirq index to softirq name. update 'softirq_to_name' in ++ * kernel/softirq.c when adding a new softirq. ++ */ ++extern const char * const softirq_to_name[NR_SOFTIRQS]; ++ ++/* softirq mask and active fields moved to irq_cpustat_t in ++ * asm/hardirq.h to get better cache usage. KAO ++ */ ++ ++struct softirq_action ++{ ++ void (*action)(struct softirq_action *); ++}; ++ ++asmlinkage void do_softirq(void); ++asmlinkage void __do_softirq(void); ++ ++#ifdef __ARCH_HAS_DO_SOFTIRQ ++void do_softirq_own_stack(void); ++#else ++static inline void do_softirq_own_stack(void) ++{ ++ __do_softirq(); ++} ++#endif ++ ++extern void open_softirq(int nr, void (*action)(struct softirq_action *)); ++extern void softirq_init(void); ++extern void __raise_softirq_irqoff(unsigned int nr); ++ ++extern void raise_softirq_irqoff(unsigned int nr); ++extern void raise_softirq(unsigned int nr); ++ ++DECLARE_PER_CPU(struct task_struct *, ksoftirqd); ++ ++static inline struct task_struct *this_cpu_ksoftirqd(void) ++{ ++ return this_cpu_read(ksoftirqd); ++} ++ ++/* Tasklets --- multithreaded analogue of BHs. ++ ++ Main feature differing them of generic softirqs: tasklet ++ is running only on one CPU simultaneously. ++ ++ Main feature differing them of BHs: different tasklets ++ may be run simultaneously on different CPUs. ++ ++ Properties: ++ * If tasklet_schedule() is called, then tasklet is guaranteed ++ to be executed on some cpu at least once after this. ++ * If the tasklet is already scheduled, but its execution is still not ++ started, it will be executed only once. ++ * If this tasklet is already running on another CPU (or schedule is called ++ from tasklet itself), it is rescheduled for later. ++ * Tasklet is strictly serialized wrt itself, but not ++ wrt another tasklets. If client needs some intertask synchronization, ++ he makes it with spinlocks. ++ */ ++ ++struct tasklet_struct ++{ ++ struct tasklet_struct *next; ++ unsigned long state; ++ atomic_t count; ++ void (*func)(unsigned long); ++ unsigned long data; ++}; ++ ++#define DECLARE_TASKLET(name, func, data) \ ++struct tasklet_struct name = { NULL, 0, ATOMIC_INIT(0), func, data } ++ ++#define DECLARE_TASKLET_DISABLED(name, func, data) \ ++struct tasklet_struct name = { NULL, 0, ATOMIC_INIT(1), func, data } ++ ++ ++enum ++{ ++ TASKLET_STATE_SCHED, /* Tasklet is scheduled for execution */ ++ TASKLET_STATE_RUN /* Tasklet is running (SMP only) */ ++}; ++ ++#ifdef CONFIG_SMP ++static inline int tasklet_trylock(struct tasklet_struct *t) ++{ ++ return !test_and_set_bit(TASKLET_STATE_RUN, &(t)->state); ++} ++ ++static inline void tasklet_unlock(struct tasklet_struct *t) ++{ ++ smp_mb__before_atomic(); ++ clear_bit(TASKLET_STATE_RUN, &(t)->state); ++} ++ ++static inline void tasklet_unlock_wait(struct tasklet_struct *t) ++{ ++ while (test_bit(TASKLET_STATE_RUN, &(t)->state)) { barrier(); } ++} ++#else ++#define tasklet_trylock(t) 1 ++#define tasklet_unlock_wait(t) do { } while (0) ++#define tasklet_unlock(t) do { } while (0) ++#endif ++ ++extern void __tasklet_schedule(struct tasklet_struct *t); ++ ++static inline void tasklet_schedule(struct tasklet_struct *t) ++{ ++ if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) ++ __tasklet_schedule(t); ++} ++ ++extern void __tasklet_hi_schedule(struct tasklet_struct *t); ++ ++static inline void tasklet_hi_schedule(struct tasklet_struct *t) ++{ ++ if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) ++ __tasklet_hi_schedule(t); ++} ++ ++static inline void tasklet_disable_nosync(struct tasklet_struct *t) ++{ ++ atomic_inc(&t->count); ++ smp_mb__after_atomic(); ++} ++ ++static inline void tasklet_disable(struct tasklet_struct *t) ++{ ++ tasklet_disable_nosync(t); ++ tasklet_unlock_wait(t); ++ smp_mb(); ++} ++ ++static inline void tasklet_enable(struct tasklet_struct *t) ++{ ++ smp_mb__before_atomic(); ++ atomic_dec(&t->count); ++} ++ ++extern void tasklet_kill(struct tasklet_struct *t); ++extern void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu); ++extern void tasklet_init(struct tasklet_struct *t, ++ void (*func)(unsigned long), unsigned long data); ++ ++struct tasklet_hrtimer { ++ struct hrtimer timer; ++ struct tasklet_struct tasklet; ++ enum hrtimer_restart (*function)(struct hrtimer *); ++}; ++ ++extern void ++tasklet_hrtimer_init(struct tasklet_hrtimer *ttimer, ++ enum hrtimer_restart (*function)(struct hrtimer *), ++ clockid_t which_clock, enum hrtimer_mode mode); ++ ++static inline ++void tasklet_hrtimer_start(struct tasklet_hrtimer *ttimer, ktime_t time, ++ const enum hrtimer_mode mode) ++{ ++ hrtimer_start(&ttimer->timer, time, mode); ++} ++ ++static inline ++void tasklet_hrtimer_cancel(struct tasklet_hrtimer *ttimer) ++{ ++ hrtimer_cancel(&ttimer->timer); ++ tasklet_kill(&ttimer->tasklet); ++} ++ ++/* ++ * Autoprobing for irqs: ++ * ++ * probe_irq_on() and probe_irq_off() provide robust primitives ++ * for accurate IRQ probing during kernel initialization. They are ++ * reasonably simple to use, are not "fooled" by spurious interrupts, ++ * and, unlike other attempts at IRQ probing, they do not get hung on ++ * stuck interrupts (such as unused PS2 mouse interfaces on ASUS boards). ++ * ++ * For reasonably foolproof probing, use them as follows: ++ * ++ * 1. clear and/or mask the device's internal interrupt. ++ * 2. sti(); ++ * 3. irqs = probe_irq_on(); // "take over" all unassigned idle IRQs ++ * 4. enable the device and cause it to trigger an interrupt. ++ * 5. wait for the device to interrupt, using non-intrusive polling or a delay. ++ * 6. irq = probe_irq_off(irqs); // get IRQ number, 0=none, negative=multiple ++ * 7. service the device to clear its pending interrupt. ++ * 8. loop again if paranoia is required. ++ * ++ * probe_irq_on() returns a mask of allocated irq's. ++ * ++ * probe_irq_off() takes the mask as a parameter, ++ * and returns the irq number which occurred, ++ * or zero if none occurred, or a negative irq number ++ * if more than one irq occurred. ++ */ ++ ++#if !defined(CONFIG_GENERIC_IRQ_PROBE) ++static inline unsigned long probe_irq_on(void) ++{ ++ return 0; ++} ++static inline int probe_irq_off(unsigned long val) ++{ ++ return 0; ++} ++static inline unsigned int probe_irq_mask(unsigned long val) ++{ ++ return 0; ++} ++#else ++extern unsigned long probe_irq_on(void); /* returns 0 on failure */ ++extern int probe_irq_off(unsigned long); /* returns 0 or negative on failure */ ++extern unsigned int probe_irq_mask(unsigned long); /* returns mask of ISA interrupts */ ++#endif ++ ++#ifdef CONFIG_PROC_FS ++/* Initialize /proc/irq/ */ ++extern void init_irq_proc(void); ++#else ++static inline void init_irq_proc(void) ++{ ++} ++#endif ++ ++#ifdef CONFIG_IRQ_TIMINGS ++void irq_timings_enable(void); ++void irq_timings_disable(void); ++u64 irq_timings_next_event(u64 now); ++#endif ++ ++struct seq_file; ++int show_interrupts(struct seq_file *p, void *v); ++int arch_show_interrupts(struct seq_file *p, int prec); ++ ++extern int early_irq_init(void); ++extern int arch_probe_nr_irqs(void); ++extern int arch_early_irq_init(void); ++ ++/* ++ * We want to know which function is an entrypoint of a hardirq or a softirq. ++ */ ++#define __irq_entry __attribute__((__section__(".irqentry.text"))) ++#define __softirq_entry \ ++ __attribute__((__section__(".softirqentry.text"))) ++ ++#endif +diff -uprN kernel/include/linux/ipipe_debug.h kernel_new/include/linux/ipipe_debug.h +--- kernel/include/linux/ipipe_debug.h 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/include/linux/ipipe_debug.h 2021-04-01 18:28:07.800863126 +0800 +@@ -0,0 +1,100 @@ ++/* -*- linux-c -*- ++ * include/linux/ipipe_debug.h ++ * ++ * Copyright (C) 2012 Philippe Gerum . ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, ++ * USA; either version 2 of the License, or (at your option) any later ++ * version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#ifndef __LINUX_IPIPE_DEBUG_H ++#define __LINUX_IPIPE_DEBUG_H ++ ++#include ++ ++#ifdef CONFIG_IPIPE_DEBUG_CONTEXT ++ ++#include ++ ++static inline int ipipe_disable_context_check(void) ++{ ++ return xchg(raw_cpu_ptr(&ipipe_percpu.context_check), 0); ++} ++ ++static inline void ipipe_restore_context_check(int old_state) ++{ ++ __this_cpu_write(ipipe_percpu.context_check, old_state); ++} ++ ++static inline void ipipe_context_check_off(void) ++{ ++ int cpu; ++ for_each_online_cpu(cpu) ++ per_cpu(ipipe_percpu, cpu).context_check = 0; ++} ++ ++static inline void ipipe_save_context_nmi(void) ++{ ++ int state = ipipe_disable_context_check(); ++ __this_cpu_write(ipipe_percpu.context_check_saved, state); ++} ++ ++static inline void ipipe_restore_context_nmi(void) ++{ ++ ipipe_restore_context_check(__this_cpu_read(ipipe_percpu.context_check_saved)); ++} ++ ++#else /* !CONFIG_IPIPE_DEBUG_CONTEXT */ ++ ++static inline int ipipe_disable_context_check(void) ++{ ++ return 0; ++} ++ ++static inline void ipipe_restore_context_check(int old_state) { } ++ ++static inline void ipipe_context_check_off(void) { } ++ ++static inline void ipipe_save_context_nmi(void) { } ++ ++static inline void ipipe_restore_context_nmi(void) { } ++ ++#endif /* !CONFIG_IPIPE_DEBUG_CONTEXT */ ++ ++#ifdef CONFIG_IPIPE_DEBUG ++ ++#define ipipe_check_irqoff() \ ++ do { \ ++ if (WARN_ON_ONCE(!hard_irqs_disabled())) \ ++ hard_local_irq_disable(); \ ++ } while (0) ++ ++#else /* !CONFIG_IPIPE_DEBUG */ ++ ++static inline void ipipe_check_irqoff(void) { } ++ ++#endif /* !CONFIG_IPIPE_DEBUG */ ++ ++#ifdef CONFIG_IPIPE_DEBUG_INTERNAL ++#define IPIPE_WARN(c) WARN_ON(c) ++#define IPIPE_WARN_ONCE(c) WARN_ON_ONCE(c) ++#define IPIPE_BUG_ON(c) BUG_ON(c) ++#else ++#define IPIPE_WARN(c) do { (void)(c); } while (0) ++#define IPIPE_WARN_ONCE(c) do { (void)(c); } while (0) ++#define IPIPE_BUG_ON(c) do { (void)(c); } while (0) ++#endif ++ ++#endif /* !__LINUX_IPIPE_DEBUG_H */ +diff -uprN kernel/include/linux/ipipe_domain.h kernel_new/include/linux/ipipe_domain.h +--- kernel/include/linux/ipipe_domain.h 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/include/linux/ipipe_domain.h 2021-04-01 18:28:07.800863126 +0800 +@@ -0,0 +1,368 @@ ++/* -*- linux-c -*- ++ * include/linux/ipipe_domain.h ++ * ++ * Copyright (C) 2007-2012 Philippe Gerum. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, ++ * USA; either version 2 of the License, or (at your option) any later ++ * version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#ifndef __LINUX_IPIPE_DOMAIN_H ++#define __LINUX_IPIPE_DOMAIN_H ++ ++#ifdef CONFIG_IPIPE ++ ++#include ++#include ++#include ++#include ++#include ++ ++struct task_struct; ++struct mm_struct; ++struct irq_desc; ++struct ipipe_vm_notifier; ++ ++#define __bpl_up(x) (((x)+(BITS_PER_LONG-1)) & ~(BITS_PER_LONG-1)) ++/* Number of virtual IRQs (must be a multiple of BITS_PER_LONG) */ ++#define IPIPE_NR_VIRQS BITS_PER_LONG ++/* First virtual IRQ # (must be aligned on BITS_PER_LONG) */ ++#define IPIPE_VIRQ_BASE __bpl_up(IPIPE_NR_XIRQS) ++/* Total number of IRQ slots */ ++#define IPIPE_NR_IRQS (IPIPE_VIRQ_BASE+IPIPE_NR_VIRQS) ++ ++#define IPIPE_IRQ_MAPSZ (IPIPE_NR_IRQS / BITS_PER_LONG) ++#define IPIPE_IRQ_1MAPSZ BITS_PER_LONG ++#if IPIPE_IRQ_MAPSZ > BITS_PER_LONG * BITS_PER_LONG ++/* ++ * We need a 4-level mapping, up to 16M IRQs (64bit long, MAXSMP ++ * defines 512K IRQs). ++ */ ++#define __IPIPE_IRQMAP_LEVELS 4 ++#define IPIPE_IRQ_2MAPSZ (BITS_PER_LONG * BITS_PER_LONG) ++#elif IPIPE_IRQ_MAPSZ > BITS_PER_LONG ++/* ++ * 3-level mapping. Up to 256K IRQs (64 bit long). ++ */ ++#define __IPIPE_IRQMAP_LEVELS 3 ++#else ++/* ++ * 2-level mapping is enough. Up to 4K IRQs (64 bit long). ++ */ ++#define __IPIPE_IRQMAP_LEVELS 2 ++#endif ++ ++/* Per-cpu pipeline status */ ++#define IPIPE_STALL_FLAG 0 /* interrupts (virtually) disabled. */ ++#define IPIPE_STALL_MASK (1L << IPIPE_STALL_FLAG) ++ ++/* Interrupt control bits */ ++#define IPIPE_HANDLE_FLAG 0 ++#define IPIPE_STICKY_FLAG 1 ++#define IPIPE_LOCK_FLAG 2 ++#define IPIPE_HANDLE_MASK (1 << IPIPE_HANDLE_FLAG) ++#define IPIPE_STICKY_MASK (1 << IPIPE_STICKY_FLAG) ++#define IPIPE_LOCK_MASK (1 << IPIPE_LOCK_FLAG) ++ ++#define __IPIPE_SYSCALL_P 0 ++#define __IPIPE_TRAP_P 1 ++#define __IPIPE_KEVENT_P 2 ++#define __IPIPE_SYSCALL_E (1 << __IPIPE_SYSCALL_P) ++#define __IPIPE_TRAP_E (1 << __IPIPE_TRAP_P) ++#define __IPIPE_KEVENT_E (1 << __IPIPE_KEVENT_P) ++#define __IPIPE_ALL_E 0x7 ++#define __IPIPE_SYSCALL_R (8 << __IPIPE_SYSCALL_P) ++#define __IPIPE_TRAP_R (8 << __IPIPE_TRAP_P) ++#define __IPIPE_KEVENT_R (8 << __IPIPE_KEVENT_P) ++#define __IPIPE_SHIFT_R 3 ++#define __IPIPE_ALL_R (__IPIPE_ALL_E << __IPIPE_SHIFT_R) ++ ++#define IPIPE_KEVT_SCHEDULE 0 ++#define IPIPE_KEVT_SIGWAKE 1 ++#define IPIPE_KEVT_SETSCHED 2 ++#define IPIPE_KEVT_SETAFFINITY 3 ++#define IPIPE_KEVT_EXIT 4 ++#define IPIPE_KEVT_CLEANUP 5 ++#define IPIPE_KEVT_HOSTRT 6 ++#define IPIPE_KEVT_CLOCKFREQ 7 ++#define IPIPE_KEVT_USERINTRET 8 ++#define IPIPE_KEVT_PTRESUME 9 ++ ++typedef void (*ipipe_irq_ackfn_t)(struct irq_desc *desc); ++ ++typedef void (*ipipe_irq_handler_t)(unsigned int irq, ++ void *cookie); ++ ++struct ipipe_domain { ++ int context_offset; ++ struct ipipe_irqdesc { ++ unsigned long control; ++ ipipe_irq_ackfn_t ackfn; ++ ipipe_irq_handler_t handler; ++ void *cookie; ++ } ____cacheline_aligned irqs[IPIPE_NR_IRQS]; ++ const char *name; ++ struct mutex mutex; ++}; ++ ++static inline void * ++__ipipe_irq_cookie(struct ipipe_domain *ipd, unsigned int irq) ++{ ++ return ipd->irqs[irq].cookie; ++} ++ ++static inline ipipe_irq_handler_t ++__ipipe_irq_handler(struct ipipe_domain *ipd, unsigned int irq) ++{ ++ return ipd->irqs[irq].handler; ++} ++ ++extern struct ipipe_domain ipipe_root; ++ ++#define ipipe_root_domain (&ipipe_root) ++ ++extern struct ipipe_domain *ipipe_head_domain; ++ ++struct ipipe_percpu_domain_data { ++ unsigned long status; /* <= Must be first in struct. */ ++ unsigned long irqpend_0map; ++#if __IPIPE_IRQMAP_LEVELS >= 3 ++ unsigned long irqpend_1map[IPIPE_IRQ_1MAPSZ]; ++#if __IPIPE_IRQMAP_LEVELS >= 4 ++ unsigned long irqpend_2map[IPIPE_IRQ_2MAPSZ]; ++#endif ++#endif ++ unsigned long irqpend_map[IPIPE_IRQ_MAPSZ]; ++ unsigned long irqheld_map[IPIPE_IRQ_MAPSZ]; ++ unsigned long irqall[IPIPE_NR_IRQS]; ++ struct ipipe_domain *domain; ++ int coflags; ++}; ++ ++struct ipipe_percpu_data { ++ struct ipipe_percpu_domain_data root; ++ struct ipipe_percpu_domain_data head; ++ struct ipipe_percpu_domain_data *curr; ++ struct pt_regs tick_regs; ++ int hrtimer_irq; ++ struct task_struct *task_hijacked; ++ struct task_struct *rqlock_owner; ++ struct ipipe_vm_notifier *vm_notifier; ++ unsigned long nmi_state; ++ struct mm_struct *active_mm; ++#ifdef CONFIG_IPIPE_DEBUG_CONTEXT ++ int context_check; ++ int context_check_saved; ++#endif ++}; ++ ++/* ++ * CAREFUL: all accessors based on __ipipe_raw_cpu_ptr() you may find ++ * in this file should be used only while hw interrupts are off, to ++ * prevent from CPU migration regardless of the running domain. ++ */ ++DECLARE_PER_CPU(struct ipipe_percpu_data, ipipe_percpu); ++ ++static inline struct ipipe_percpu_domain_data * ++__context_of(struct ipipe_percpu_data *p, struct ipipe_domain *ipd) ++{ ++ return (void *)p + ipd->context_offset; ++} ++ ++/** ++ * ipipe_percpu_context - return the address of the pipeline context ++ * data for a domain on a given CPU. ++ * ++ * NOTE: this is the slowest accessor, use it carefully. Prefer ++ * ipipe_this_cpu_context() for requests targeted at the current ++ * CPU. Additionally, if the target domain is known at build time, ++ * consider ipipe_this_cpu_{root, head}_context(). ++ */ ++static inline struct ipipe_percpu_domain_data * ++ipipe_percpu_context(struct ipipe_domain *ipd, int cpu) ++{ ++ return __context_of(&per_cpu(ipipe_percpu, cpu), ipd); ++} ++ ++/** ++ * ipipe_this_cpu_context - return the address of the pipeline context ++ * data for a domain on the current CPU. hw IRQs must be off. ++ * ++ * NOTE: this accessor is a bit faster, but since we don't know which ++ * one of "root" or "head" ipd refers to, we still need to compute the ++ * context address from its offset. ++ */ ++static inline struct ipipe_percpu_domain_data * ++ipipe_this_cpu_context(struct ipipe_domain *ipd) ++{ ++ return __context_of(__ipipe_raw_cpu_ptr(&ipipe_percpu), ipd); ++} ++ ++/** ++ * ipipe_this_cpu_root_context - return the address of the pipeline ++ * context data for the root domain on the current CPU. hw IRQs must ++ * be off. ++ * ++ * NOTE: this accessor is recommended when the domain we refer to is ++ * known at build time to be the root one. ++ */ ++static inline struct ipipe_percpu_domain_data * ++ipipe_this_cpu_root_context(void) ++{ ++ return __ipipe_raw_cpu_ptr(&ipipe_percpu.root); ++} ++ ++/** ++ * ipipe_this_cpu_head_context - return the address of the pipeline ++ * context data for the registered head domain on the current CPU. hw ++ * IRQs must be off. ++ * ++ * NOTE: this accessor is recommended when the domain we refer to is ++ * known at build time to be the registered head domain. This address ++ * is always different from the context data of the root domain in ++ * absence of registered head domain. To get the address of the ++ * context data for the domain leading the pipeline at the time of the ++ * call (which may be root in absence of registered head domain), use ++ * ipipe_this_cpu_leading_context() instead. ++ */ ++static inline struct ipipe_percpu_domain_data * ++ipipe_this_cpu_head_context(void) ++{ ++ return __ipipe_raw_cpu_ptr(&ipipe_percpu.head); ++} ++ ++/** ++ * ipipe_this_cpu_leading_context - return the address of the pipeline ++ * context data for the domain leading the pipeline on the current ++ * CPU. hw IRQs must be off. ++ * ++ * NOTE: this accessor is required when either root or a registered ++ * head domain may be the final target of this call, depending on ++ * whether the high priority domain was installed via ++ * ipipe_register_head(). ++ */ ++static inline struct ipipe_percpu_domain_data * ++ipipe_this_cpu_leading_context(void) ++{ ++ return ipipe_this_cpu_context(ipipe_head_domain); ++} ++ ++/** ++ * __ipipe_get_current_context() - return the address of the pipeline ++ * context data of the domain running on the current CPU. hw IRQs must ++ * be off. ++ */ ++static inline struct ipipe_percpu_domain_data *__ipipe_get_current_context(void) ++{ ++ return __ipipe_raw_cpu_read(ipipe_percpu.curr); ++} ++ ++#define __ipipe_current_context __ipipe_get_current_context() ++ ++/** ++ * __ipipe_set_current_context() - switch the current CPU to the ++ * specified domain context. hw IRQs must be off. ++ * ++ * NOTE: this is the only way to change the current domain for the ++ * current CPU. Don't bypass. ++ */ ++static inline ++void __ipipe_set_current_context(struct ipipe_percpu_domain_data *pd) ++{ ++ struct ipipe_percpu_data *p; ++ p = __ipipe_raw_cpu_ptr(&ipipe_percpu); ++ p->curr = pd; ++} ++ ++/** ++ * __ipipe_set_current_domain() - switch the current CPU to the ++ * specified domain. This is equivalent to calling ++ * __ipipe_set_current_context() with the context data of that ++ * domain. hw IRQs must be off. ++ */ ++static inline void __ipipe_set_current_domain(struct ipipe_domain *ipd) ++{ ++ struct ipipe_percpu_data *p; ++ p = __ipipe_raw_cpu_ptr(&ipipe_percpu); ++ p->curr = __context_of(p, ipd); ++} ++ ++static inline struct ipipe_percpu_domain_data *ipipe_current_context(void) ++{ ++ struct ipipe_percpu_domain_data *pd; ++ unsigned long flags; ++ ++ flags = hard_smp_local_irq_save(); ++ pd = __ipipe_get_current_context(); ++ hard_smp_local_irq_restore(flags); ++ ++ return pd; ++} ++ ++static inline struct ipipe_domain *__ipipe_get_current_domain(void) ++{ ++ return __ipipe_get_current_context()->domain; ++} ++ ++#define __ipipe_current_domain __ipipe_get_current_domain() ++ ++/** ++ * __ipipe_get_current_domain() - return the address of the pipeline ++ * domain running on the current CPU. hw IRQs must be off. ++ */ ++static inline struct ipipe_domain *ipipe_get_current_domain(void) ++{ ++ struct ipipe_domain *ipd; ++ unsigned long flags; ++ ++ flags = hard_smp_local_irq_save(); ++ ipd = __ipipe_get_current_domain(); ++ hard_smp_local_irq_restore(flags); ++ ++ return ipd; ++} ++ ++#define ipipe_current_domain ipipe_get_current_domain() ++ ++#define __ipipe_root_p (__ipipe_current_domain == ipipe_root_domain) ++#define ipipe_root_p (ipipe_current_domain == ipipe_root_domain) ++ ++#ifdef CONFIG_SMP ++#define __ipipe_root_status (ipipe_this_cpu_root_context()->status) ++#else ++extern unsigned long __ipipe_root_status; ++#endif ++ ++#define __ipipe_head_status (ipipe_this_cpu_head_context()->status) ++ ++/** ++ * __ipipe_ipending_p() - Whether we have interrupts pending ++ * (i.e. logged) for the given domain context on the current CPU. hw ++ * IRQs must be off. ++ */ ++static inline int __ipipe_ipending_p(struct ipipe_percpu_domain_data *pd) ++{ ++ return pd->irqpend_0map != 0; ++} ++ ++static inline unsigned long ++__ipipe_cpudata_irq_hits(struct ipipe_domain *ipd, int cpu, unsigned int irq) ++{ ++ return ipipe_percpu_context(ipd, cpu)->irqall[irq]; ++} ++ ++#endif /* CONFIG_IPIPE */ ++ ++#endif /* !__LINUX_IPIPE_DOMAIN_H */ +diff -uprN kernel/include/linux/ipipe.h kernel_new/include/linux/ipipe.h +--- kernel/include/linux/ipipe.h 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/include/linux/ipipe.h 2021-04-01 18:28:07.800863126 +0800 +@@ -0,0 +1,721 @@ ++/* -*- linux-c -*- ++ * include/linux/ipipe.h ++ * ++ * Copyright (C) 2002-2014 Philippe Gerum. ++ * 2007 Jan Kiszka. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, ++ * USA; either version 2 of the License, or (at your option) any later ++ * version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#ifndef __LINUX_IPIPE_H ++#define __LINUX_IPIPE_H ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#ifdef CONFIG_HAVE_IPIPE_SUPPORT ++#include ++#endif ++ ++struct cpuidle_device; ++struct cpuidle_state; ++struct kvm_vcpu; ++struct ipipe_vm_notifier; ++struct irq_desc; ++struct task_struct; ++struct mm_struct; ++ ++#ifdef CONFIG_IPIPE ++ ++#include ++ ++#define IPIPE_CORE_APIREV CONFIG_IPIPE_CORE_APIREV ++ ++#include ++#include ++#include ++#include ++ ++struct pt_regs; ++struct ipipe_domain; ++ ++struct ipipe_vm_notifier { ++ void (*handler)(struct ipipe_vm_notifier *nfy); ++}; ++ ++static inline int ipipe_virtual_irq_p(unsigned int irq) ++{ ++ return irq >= IPIPE_VIRQ_BASE && irq < IPIPE_NR_IRQS; ++} ++ ++void __ipipe_init_early(void); ++ ++void __ipipe_init(void); ++ ++#ifdef CONFIG_PROC_FS ++void __ipipe_init_proc(void); ++#ifdef CONFIG_IPIPE_TRACE ++void __ipipe_init_tracer(void); ++#else /* !CONFIG_IPIPE_TRACE */ ++static inline void __ipipe_init_tracer(void) { } ++#endif /* CONFIG_IPIPE_TRACE */ ++#else /* !CONFIG_PROC_FS */ ++static inline void __ipipe_init_proc(void) { } ++#endif /* CONFIG_PROC_FS */ ++ ++void __ipipe_restore_root_nosync(unsigned long x); ++ ++#define IPIPE_IRQF_NOACK 0x1 ++#define IPIPE_IRQF_NOSYNC 0x2 ++ ++void __ipipe_dispatch_irq(unsigned int irq, int flags); ++ ++void __ipipe_do_sync_stage(void); ++ ++void __ipipe_do_sync_pipeline(struct ipipe_domain *top); ++ ++void __ipipe_lock_irq(unsigned int irq); ++ ++void __ipipe_unlock_irq(unsigned int irq); ++ ++void __ipipe_do_critical_sync(unsigned int irq, void *cookie); ++ ++void __ipipe_ack_edge_irq(struct irq_desc *desc); ++ ++void __ipipe_nop_irq(struct irq_desc *desc); ++ ++static inline void __ipipe_idle(void) ++{ ++ ipipe_unstall_root(); ++} ++ ++#ifndef __ipipe_sync_check ++#define __ipipe_sync_check 1 ++#endif ++ ++static inline void __ipipe_sync_stage(void) ++{ ++ if (likely(__ipipe_sync_check)) ++ __ipipe_do_sync_stage(); ++} ++ ++#ifndef __ipipe_run_irqtail ++#define __ipipe_run_irqtail(irq) do { } while(0) ++#endif ++ ++int __ipipe_log_printk(const char *fmt, va_list args); ++void __ipipe_flush_printk(unsigned int irq, void *cookie); ++ ++#define __ipipe_get_cpu(flags) ({ (flags) = hard_preempt_disable(); ipipe_processor_id(); }) ++#define __ipipe_put_cpu(flags) hard_preempt_enable(flags) ++ ++int __ipipe_notify_kevent(int event, void *data); ++ ++#define __ipipe_report_sigwake(p) \ ++ do { \ ++ if (ipipe_notifier_enabled_p(p)) \ ++ __ipipe_notify_kevent(IPIPE_KEVT_SIGWAKE, p); \ ++ } while (0) ++ ++struct ipipe_cpu_migration_data { ++ struct task_struct *task; ++ int dest_cpu; ++}; ++ ++#define __ipipe_report_setaffinity(__p, __dest_cpu) \ ++ do { \ ++ struct ipipe_cpu_migration_data d = { \ ++ .task = (__p), \ ++ .dest_cpu = (__dest_cpu), \ ++ }; \ ++ if (ipipe_notifier_enabled_p(__p)) \ ++ __ipipe_notify_kevent(IPIPE_KEVT_SETAFFINITY, &d); \ ++ } while (0) ++ ++#define __ipipe_report_exit(p) \ ++ do { \ ++ if (ipipe_notifier_enabled_p(p)) \ ++ __ipipe_notify_kevent(IPIPE_KEVT_EXIT, p); \ ++ } while (0) ++ ++#define __ipipe_report_setsched(p) \ ++ do { \ ++ if (ipipe_notifier_enabled_p(p)) \ ++ __ipipe_notify_kevent(IPIPE_KEVT_SETSCHED, p); \ ++ } while (0) ++ ++#define __ipipe_report_schedule(prev, next) \ ++do { \ ++ if (ipipe_notifier_enabled_p(next) || \ ++ ipipe_notifier_enabled_p(prev)) { \ ++ __this_cpu_write(ipipe_percpu.rqlock_owner, prev); \ ++ __ipipe_notify_kevent(IPIPE_KEVT_SCHEDULE, next); \ ++ } \ ++} while (0) ++ ++#define __ipipe_report_cleanup(mm) \ ++ __ipipe_notify_kevent(IPIPE_KEVT_CLEANUP, mm) ++ ++#define __ipipe_report_clockfreq_update(freq) \ ++ __ipipe_notify_kevent(IPIPE_KEVT_CLOCKFREQ, &(freq)) ++ ++struct ipipe_ptrace_resume_data { ++ struct task_struct *task; ++ long request; ++}; ++ ++#define __ipipe_report_ptrace_resume(__p, __request) \ ++ do { \ ++ struct ipipe_ptrace_resume_data d = { \ ++ .task = (__p), \ ++ .request = (__request), \ ++ }; \ ++ if (ipipe_notifier_enabled_p(__p)) \ ++ __ipipe_notify_kevent(IPIPE_KEVT_PTRESUME, &d); \ ++ } while (0) ++ ++int __ipipe_notify_syscall(struct pt_regs *regs); ++ ++int __ipipe_notify_trap(int exception, struct pt_regs *regs); ++ ++#define __ipipe_report_trap(exception, regs) \ ++ __ipipe_notify_trap(exception, regs) ++ ++void __ipipe_call_mayday(struct pt_regs *regs); ++ ++int __ipipe_notify_user_intreturn(void); ++ ++#define __ipipe_serial_debug(__fmt, __args...) raw_printk(__fmt, ##__args) ++ ++struct ipipe_trap_data { ++ int exception; ++ struct pt_regs *regs; ++}; ++ ++/* ipipe_set_hooks(..., enables) */ ++#define IPIPE_SYSCALL __IPIPE_SYSCALL_E ++#define IPIPE_TRAP __IPIPE_TRAP_E ++#define IPIPE_KEVENT __IPIPE_KEVENT_E ++ ++struct ipipe_sysinfo { ++ int sys_nr_cpus; /* Number of CPUs on board */ ++ int sys_hrtimer_irq; /* hrtimer device IRQ */ ++ u64 sys_hrtimer_freq; /* hrtimer device frequency */ ++ u64 sys_hrclock_freq; /* hrclock device frequency */ ++ u64 sys_cpu_freq; /* CPU frequency (Hz) */ ++ struct ipipe_arch_sysinfo arch; ++}; ++ ++struct ipipe_work_header { ++ size_t size; ++ void (*handler)(struct ipipe_work_header *work); ++}; ++ ++extern unsigned int __ipipe_printk_virq; ++ ++void __ipipe_set_irq_pending(struct ipipe_domain *ipd, unsigned int irq); ++ ++void __ipipe_complete_domain_migration(void); ++ ++int __ipipe_switch_tail(void); ++ ++int __ipipe_migrate_head(void); ++ ++void __ipipe_reenter_root(void); ++ ++void __ipipe_share_current(int flags); ++ ++void __ipipe_arch_share_current(int flags); ++ ++int __ipipe_disable_ondemand_mappings(struct task_struct *p); ++ ++int __ipipe_pin_vma(struct mm_struct *mm, struct vm_area_struct *vma); ++ ++/* ++ * Obsolete - no arch implements PIC muting anymore. Null helpers are ++ * kept for building legacy co-kernel releases. ++ */ ++static inline void ipipe_mute_pic(void) { } ++static inline void ipipe_unmute_pic(void) { } ++ ++#ifdef CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH ++ ++#define prepare_arch_switch(next) \ ++ do { \ ++ hard_local_irq_enable(); \ ++ __ipipe_report_schedule(current, next); \ ++ } while(0) ++ ++#ifndef ipipe_get_active_mm ++static inline struct mm_struct *ipipe_get_active_mm(void) ++{ ++ return __this_cpu_read(ipipe_percpu.active_mm); ++} ++#define ipipe_get_active_mm ipipe_get_active_mm ++#endif ++ ++#else /* !CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH */ ++ ++#define prepare_arch_switch(next) \ ++ do { \ ++ __ipipe_report_schedule(current, next); \ ++ hard_local_irq_disable(); \ ++ } while(0) ++ ++#ifndef ipipe_get_active_mm ++#define ipipe_get_active_mm() (current->active_mm) ++#endif ++ ++#endif /* !CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH */ ++ ++static inline bool __ipipe_hrclock_ok(void) ++{ ++ return __ipipe_hrclock_freq != 0; ++} ++ ++static inline void __ipipe_nmi_enter(void) ++{ ++ __this_cpu_write(ipipe_percpu.nmi_state, __ipipe_root_status); ++ __set_bit(IPIPE_STALL_FLAG, &__ipipe_root_status); ++ ipipe_save_context_nmi(); ++} ++ ++static inline void __ipipe_nmi_exit(void) ++{ ++ ipipe_restore_context_nmi(); ++ if (!test_bit(IPIPE_STALL_FLAG, raw_cpu_ptr(&ipipe_percpu.nmi_state))) ++ __clear_bit(IPIPE_STALL_FLAG, &__ipipe_root_status); ++} ++ ++/* KVM-side calls, hw IRQs off. */ ++static inline void __ipipe_enter_vm(struct ipipe_vm_notifier *vmf) ++{ ++ struct ipipe_percpu_data *p; ++ ++ p = raw_cpu_ptr(&ipipe_percpu); ++ p->vm_notifier = vmf; ++ barrier(); ++} ++ ++static inline void __ipipe_exit_vm(void) ++{ ++ struct ipipe_percpu_data *p; ++ ++ p = raw_cpu_ptr(&ipipe_percpu); ++ p->vm_notifier = NULL; ++ barrier(); ++} ++ ++/* Client-side call, hw IRQs off. */ ++void __ipipe_notify_vm_preemption(void); ++ ++static inline void __ipipe_sync_pipeline(struct ipipe_domain *top) ++{ ++ if (__ipipe_current_domain != top) { ++ __ipipe_do_sync_pipeline(top); ++ return; ++ } ++ if (!test_bit(IPIPE_STALL_FLAG, &ipipe_this_cpu_context(top)->status)) ++ __ipipe_sync_stage(); ++} ++ ++void ipipe_register_head(struct ipipe_domain *ipd, ++ const char *name); ++ ++void ipipe_unregister_head(struct ipipe_domain *ipd); ++ ++int ipipe_request_irq(struct ipipe_domain *ipd, ++ unsigned int irq, ++ ipipe_irq_handler_t handler, ++ void *cookie, ++ ipipe_irq_ackfn_t ackfn); ++ ++void ipipe_free_irq(struct ipipe_domain *ipd, ++ unsigned int irq); ++ ++void ipipe_raise_irq(unsigned int irq); ++ ++void ipipe_set_hooks(struct ipipe_domain *ipd, ++ int enables); ++ ++int ipipe_handle_syscall(struct thread_info *ti, ++ unsigned long nr, struct pt_regs *regs); ++ ++unsigned int ipipe_alloc_virq(void); ++ ++void ipipe_free_virq(unsigned int virq); ++ ++static inline void ipipe_post_irq_head(unsigned int irq) ++{ ++ __ipipe_set_irq_pending(ipipe_head_domain, irq); ++} ++ ++static inline void ipipe_post_irq_root(unsigned int irq) ++{ ++ __ipipe_set_irq_pending(&ipipe_root, irq); ++} ++ ++static inline void ipipe_stall_head(void) ++{ ++ hard_local_irq_disable(); ++ __set_bit(IPIPE_STALL_FLAG, &__ipipe_head_status); ++} ++ ++static inline unsigned long ipipe_test_and_stall_head(void) ++{ ++ hard_local_irq_disable(); ++ return __test_and_set_bit(IPIPE_STALL_FLAG, &__ipipe_head_status); ++} ++ ++static inline unsigned long ipipe_test_head(void) ++{ ++ unsigned long flags, ret; ++ ++ flags = hard_smp_local_irq_save(); ++ ret = test_bit(IPIPE_STALL_FLAG, &__ipipe_head_status); ++ hard_smp_local_irq_restore(flags); ++ ++ return ret; ++} ++ ++void ipipe_unstall_head(void); ++ ++void __ipipe_restore_head(unsigned long x); ++ ++static inline void ipipe_restore_head(unsigned long x) ++{ ++ ipipe_check_irqoff(); ++ if ((x ^ test_bit(IPIPE_STALL_FLAG, &__ipipe_head_status)) & 1) ++ __ipipe_restore_head(x); ++} ++ ++void __ipipe_post_work_root(struct ipipe_work_header *work); ++ ++#define ipipe_post_work_root(p, header) \ ++ do { \ ++ void header_not_at_start(void); \ ++ if (offsetof(typeof(*(p)), header)) { \ ++ header_not_at_start(); \ ++ } \ ++ __ipipe_post_work_root(&(p)->header); \ ++ } while (0) ++ ++int ipipe_get_sysinfo(struct ipipe_sysinfo *sysinfo); ++ ++unsigned long ipipe_critical_enter(void (*syncfn)(void)); ++ ++void ipipe_critical_exit(unsigned long flags); ++ ++void ipipe_prepare_panic(void); ++ ++#ifdef CONFIG_SMP ++#ifndef ipipe_smp_p ++#define ipipe_smp_p (1) ++#endif ++int ipipe_set_irq_affinity(unsigned int irq, cpumask_t cpumask); ++void ipipe_send_ipi(unsigned int ipi, cpumask_t cpumask); ++#else /* !CONFIG_SMP */ ++#define ipipe_smp_p (0) ++static inline ++int ipipe_set_irq_affinity(unsigned int irq, cpumask_t cpumask) { return 0; } ++static inline void ipipe_send_ipi(unsigned int ipi, cpumask_t cpumask) { } ++static inline void ipipe_disable_smp(void) { } ++#endif /* CONFIG_SMP */ ++ ++static inline void ipipe_restore_root_nosync(unsigned long x) ++{ ++ unsigned long flags; ++ ++ flags = hard_smp_local_irq_save(); ++ __ipipe_restore_root_nosync(x); ++ hard_smp_local_irq_restore(flags); ++} ++ ++/* Must be called hw IRQs off. */ ++static inline void ipipe_lock_irq(unsigned int irq) ++{ ++ struct ipipe_domain *ipd = __ipipe_current_domain; ++ if (ipd == ipipe_root_domain) ++ __ipipe_lock_irq(irq); ++} ++ ++/* Must be called hw IRQs off. */ ++static inline void ipipe_unlock_irq(unsigned int irq) ++{ ++ struct ipipe_domain *ipd = __ipipe_current_domain; ++ if (ipd == ipipe_root_domain) ++ __ipipe_unlock_irq(irq); ++} ++ ++static inline struct ipipe_threadinfo *ipipe_current_threadinfo(void) ++{ ++ return ¤t_thread_info()->ipipe_data; ++} ++ ++#define ipipe_task_threadinfo(p) (&task_thread_info(p)->ipipe_data) ++ ++int ipipe_enable_irq(unsigned int irq); ++ ++static inline void ipipe_disable_irq(unsigned int irq) ++{ ++ struct irq_desc *desc; ++ struct irq_chip *chip; ++ ++ desc = irq_to_desc(irq); ++ if (desc == NULL) ++ return; ++ ++ chip = irq_desc_get_chip(desc); ++ ++ if (WARN_ON_ONCE(chip->irq_disable == NULL && chip->irq_mask == NULL)) ++ return; ++ ++ if (chip->irq_disable) ++ chip->irq_disable(&desc->irq_data); ++ else ++ chip->irq_mask(&desc->irq_data); ++} ++ ++static inline void ipipe_end_irq(unsigned int irq) ++{ ++ struct irq_desc *desc = irq_to_desc(irq); ++ ++ if (desc) ++ desc->ipipe_end(desc); ++} ++ ++static inline int ipipe_chained_irq_p(struct irq_desc *desc) ++{ ++ void __ipipe_chained_irq(struct irq_desc *desc); ++ ++ return desc->handle_irq == __ipipe_chained_irq; ++} ++ ++static inline void ipipe_handle_demuxed_irq(unsigned int cascade_irq) ++{ ++ ipipe_trace_irq_entry(cascade_irq); ++ __ipipe_dispatch_irq(cascade_irq, IPIPE_IRQF_NOSYNC); ++ ipipe_trace_irq_exit(cascade_irq); ++} ++ ++static inline void __ipipe_init_threadflags(struct thread_info *ti) ++{ ++ ti->ipipe_flags = 0; ++} ++ ++static inline ++void ipipe_set_ti_thread_flag(struct thread_info *ti, int flag) ++{ ++ set_bit(flag, &ti->ipipe_flags); ++} ++ ++static inline ++void ipipe_clear_ti_thread_flag(struct thread_info *ti, int flag) ++{ ++ clear_bit(flag, &ti->ipipe_flags); ++} ++ ++static inline ++void ipipe_test_and_clear_ti_thread_flag(struct thread_info *ti, int flag) ++{ ++ test_and_clear_bit(flag, &ti->ipipe_flags); ++} ++ ++static inline ++int ipipe_test_ti_thread_flag(struct thread_info *ti, int flag) ++{ ++ return test_bit(flag, &ti->ipipe_flags); ++} ++ ++#define ipipe_set_thread_flag(flag) \ ++ ipipe_set_ti_thread_flag(current_thread_info(), flag) ++ ++#define ipipe_clear_thread_flag(flag) \ ++ ipipe_clear_ti_thread_flag(current_thread_info(), flag) ++ ++#define ipipe_test_and_clear_thread_flag(flag) \ ++ ipipe_test_and_clear_ti_thread_flag(current_thread_info(), flag) ++ ++#define ipipe_test_thread_flag(flag) \ ++ ipipe_test_ti_thread_flag(current_thread_info(), flag) ++ ++#define ipipe_enable_notifier(p) \ ++ ipipe_set_ti_thread_flag(task_thread_info(p), TIP_NOTIFY) ++ ++#define ipipe_disable_notifier(p) \ ++ do { \ ++ struct thread_info *ti = task_thread_info(p); \ ++ ipipe_clear_ti_thread_flag(ti, TIP_NOTIFY); \ ++ ipipe_clear_ti_thread_flag(ti, TIP_MAYDAY); \ ++ } while (0) ++ ++#define ipipe_notifier_enabled_p(p) \ ++ ipipe_test_ti_thread_flag(task_thread_info(p), TIP_NOTIFY) ++ ++#define ipipe_raise_mayday(p) \ ++ do { \ ++ struct thread_info *ti = task_thread_info(p); \ ++ ipipe_check_irqoff(); \ ++ if (ipipe_test_ti_thread_flag(ti, TIP_NOTIFY)) \ ++ ipipe_set_ti_thread_flag(ti, TIP_MAYDAY); \ ++ } while (0) ++ ++#define ipipe_enable_user_intret_notifier() \ ++ ipipe_set_thread_flag(TIP_USERINTRET) ++ ++#define ipipe_disable_user_intret_notifier() \ ++ ipipe_clear_thread_flag(TIP_USERINTRET) ++ ++#define ipipe_user_intret_notifier_enabled(ti) \ ++ ipipe_test_ti_thread_flag(ti, TIP_USERINTRET) ++ ++#ifdef CONFIG_IPIPE_TRACE ++void __ipipe_tracer_hrclock_initialized(void); ++#else /* !CONFIG_IPIPE_TRACE */ ++#define __ipipe_tracer_hrclock_initialized() do { } while(0) ++#endif /* !CONFIG_IPIPE_TRACE */ ++ ++#ifdef CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH ++#define ipipe_mm_switch_protect(__flags) do { (void)(__flags); } while (0) ++#define ipipe_mm_switch_unprotect(__flags) do { (void)(__flags); } while (0) ++#else /* !CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH */ ++#define ipipe_mm_switch_protect(__flags) \ ++ do { \ ++ (__flags) = hard_local_irq_save(); \ ++ } while (0) ++#define ipipe_mm_switch_unprotect(__flags) \ ++ do { \ ++ hard_local_irq_restore(__flags); \ ++ } while (0) ++#endif /* !CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH */ ++ ++bool ipipe_enter_cpuidle(struct cpuidle_device *dev, ++ struct cpuidle_state *state); ++ ++#else /* !CONFIG_IPIPE */ ++ ++static inline void __ipipe_init_early(void) { } ++ ++static inline void __ipipe_init(void) { } ++ ++static inline void __ipipe_init_proc(void) { } ++ ++static inline void __ipipe_idle(void) { } ++ ++static inline void __ipipe_report_sigwake(struct task_struct *p) { } ++ ++static inline void __ipipe_report_setaffinity(struct task_struct *p, ++ int dest_cpu) { } ++ ++static inline void __ipipe_report_setsched(struct task_struct *p) { } ++ ++static inline void __ipipe_report_exit(struct task_struct *p) { } ++ ++static inline void __ipipe_report_cleanup(struct mm_struct *mm) { } ++ ++static inline void __ipipe_report_ptrace_resume(struct task_struct *p, ++ long request) { } ++ ++#define __ipipe_report_trap(exception, regs) 0 ++ ++#define hard_preempt_disable() ({ preempt_disable(); 0; }) ++#define hard_preempt_enable(flags) ({ preempt_enable(); (void)(flags); }) ++ ++#define __ipipe_get_cpu(flags) ({ (void)(flags); get_cpu(); }) ++#define __ipipe_put_cpu(flags) \ ++ do { \ ++ (void)(flags); \ ++ put_cpu(); \ ++ } while (0) ++ ++#define __ipipe_root_tick_p(regs) 1 ++ ++#define ipipe_handle_domain_irq(__domain, __hwirq, __regs) \ ++ handle_domain_irq(__domain, __hwirq, __regs) ++ ++#define ipipe_handle_demuxed_irq(irq) generic_handle_irq(irq) ++ ++#define __ipipe_enter_vm(vmf) do { } while (0) ++ ++static inline void __ipipe_exit_vm(void) { } ++ ++static inline void __ipipe_notify_vm_preemption(void) { } ++ ++#define __ipipe_notify_user_intreturn() 0 ++ ++#define __ipipe_serial_debug(__fmt, __args...) do { } while (0) ++ ++#define __ipipe_root_p 1 ++#define ipipe_root_p 1 ++ ++#define ipipe_mm_switch_protect(__flags) do { (void)(__flags); } while (0) ++#define ipipe_mm_switch_unprotect(__flags) do { (void)(__flags); } while (0) ++ ++static inline void __ipipe_init_threadflags(struct thread_info *ti) { } ++ ++static inline void __ipipe_complete_domain_migration(void) { } ++ ++static inline int __ipipe_switch_tail(void) ++{ ++ return 0; ++} ++ ++static inline void __ipipe_nmi_enter(void) { } ++ ++static inline void __ipipe_nmi_exit(void) { } ++ ++#define ipipe_processor_id() smp_processor_id() ++ ++static inline void ipipe_lock_irq(unsigned int irq) { } ++ ++static inline void ipipe_unlock_irq(unsigned int irq) { } ++ ++static inline ++int ipipe_handle_syscall(struct thread_info *ti, ++ unsigned long nr, struct pt_regs *regs) ++{ ++ return 0; ++} ++ ++static inline ++bool ipipe_enter_cpuidle(struct cpuidle_device *dev, ++ struct cpuidle_state *state) ++{ ++ return true; ++} ++ ++#define ipipe_user_intret_notifier_enabled(ti) 0 ++ ++#endif /* !CONFIG_IPIPE */ ++ ++#ifdef CONFIG_IPIPE_WANT_PTE_PINNING ++void __ipipe_pin_mapping_globally(unsigned long start, ++ unsigned long end); ++#else ++static inline void __ipipe_pin_mapping_globally(unsigned long start, ++ unsigned long end) ++{ } ++#endif ++ ++#ifndef ipipe_root_nr_syscalls ++#define ipipe_root_nr_syscalls(ti) NR_syscalls ++#endif ++ ++#endif /* !__LINUX_IPIPE_H */ +diff -uprN kernel/include/linux/ipipe_lock.h kernel_new/include/linux/ipipe_lock.h +--- kernel/include/linux/ipipe_lock.h 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/include/linux/ipipe_lock.h 2021-04-01 18:28:07.800863126 +0800 +@@ -0,0 +1,329 @@ ++/* -*- linux-c -*- ++ * include/linux/ipipe_lock.h ++ * ++ * Copyright (C) 2009 Philippe Gerum. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, ++ * USA; either version 2 of the License, or (at your option) any later ++ * version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#ifndef __LINUX_IPIPE_LOCK_H ++#define __LINUX_IPIPE_LOCK_H ++ ++#include ++ ++typedef struct { ++ arch_spinlock_t arch_lock; ++} __ipipe_spinlock_t; ++ ++#define ipipe_spinlock(lock) ((__ipipe_spinlock_t *)(lock)) ++#define ipipe_spinlock_p(lock) \ ++ __builtin_types_compatible_p(typeof(lock), __ipipe_spinlock_t *) || \ ++ __builtin_types_compatible_p(typeof(lock), __ipipe_spinlock_t []) ++ ++#define std_spinlock_raw(lock) ((raw_spinlock_t *)(lock)) ++#define std_spinlock_raw_p(lock) \ ++ __builtin_types_compatible_p(typeof(lock), raw_spinlock_t *) || \ ++ __builtin_types_compatible_p(typeof(lock), raw_spinlock_t []) ++ ++#ifdef CONFIG_PREEMPT_RT_FULL ++ ++#define PICK_SPINLOCK_IRQSAVE(lock, flags) \ ++ do { \ ++ if (ipipe_spinlock_p(lock)) \ ++ (flags) = __ipipe_spin_lock_irqsave(ipipe_spinlock(lock)); \ ++ else if (std_spinlock_raw_p(lock)) \ ++ __real_raw_spin_lock_irqsave(std_spinlock_raw(lock), flags); \ ++ else __bad_lock_type(); \ ++ } while (0) ++ ++#define PICK_SPINTRYLOCK_IRQSAVE(lock, flags) \ ++ ({ \ ++ int __ret__; \ ++ if (ipipe_spinlock_p(lock)) \ ++ __ret__ = __ipipe_spin_trylock_irqsave(ipipe_spinlock(lock), &(flags)); \ ++ else if (std_spinlock_raw_p(lock)) \ ++ __ret__ = __real_raw_spin_trylock_irqsave(std_spinlock_raw(lock), flags); \ ++ else __bad_lock_type(); \ ++ __ret__; \ ++ }) ++ ++#define PICK_SPINTRYLOCK_IRQ(lock) \ ++ ({ \ ++ int __ret__; \ ++ if (ipipe_spinlock_p(lock)) \ ++ __ret__ = __ipipe_spin_trylock_irq(ipipe_spinlock(lock)); \ ++ else if (std_spinlock_raw_p(lock)) \ ++ __ret__ = __real_raw_spin_trylock_irq(std_spinlock_raw(lock)); \ ++ else __bad_lock_type(); \ ++ __ret__; \ ++ }) ++ ++#define PICK_SPINUNLOCK_IRQRESTORE(lock, flags) \ ++ do { \ ++ if (ipipe_spinlock_p(lock)) \ ++ __ipipe_spin_unlock_irqrestore(ipipe_spinlock(lock), flags); \ ++ else if (std_spinlock_raw_p(lock)) { \ ++ __ipipe_spin_unlock_debug(flags); \ ++ __real_raw_spin_unlock_irqrestore(std_spinlock_raw(lock), flags); \ ++ } else __bad_lock_type(); \ ++ } while (0) ++ ++#define PICK_SPINOP(op, lock) \ ++ ({ \ ++ if (ipipe_spinlock_p(lock)) \ ++ arch_spin##op(&ipipe_spinlock(lock)->arch_lock); \ ++ else if (std_spinlock_raw_p(lock)) \ ++ __real_raw_spin##op(std_spinlock_raw(lock)); \ ++ else __bad_lock_type(); \ ++ (void)0; \ ++ }) ++ ++#define PICK_SPINOP_RET(op, lock, type) \ ++ ({ \ ++ type __ret__; \ ++ if (ipipe_spinlock_p(lock)) \ ++ __ret__ = arch_spin##op(&ipipe_spinlock(lock)->arch_lock); \ ++ else if (std_spinlock_raw_p(lock)) \ ++ __ret__ = __real_raw_spin##op(std_spinlock_raw(lock)); \ ++ else { __ret__ = -1; __bad_lock_type(); } \ ++ __ret__; \ ++ }) ++ ++#else /* !CONFIG_PREEMPT_RT_FULL */ ++ ++#define std_spinlock(lock) ((spinlock_t *)(lock)) ++#define std_spinlock_p(lock) \ ++ __builtin_types_compatible_p(typeof(lock), spinlock_t *) || \ ++ __builtin_types_compatible_p(typeof(lock), spinlock_t []) ++ ++#define PICK_SPINLOCK_IRQSAVE(lock, flags) \ ++ do { \ ++ if (ipipe_spinlock_p(lock)) \ ++ (flags) = __ipipe_spin_lock_irqsave(ipipe_spinlock(lock)); \ ++ else if (std_spinlock_raw_p(lock)) \ ++ __real_raw_spin_lock_irqsave(std_spinlock_raw(lock), flags); \ ++ else if (std_spinlock_p(lock)) \ ++ __real_raw_spin_lock_irqsave(&std_spinlock(lock)->rlock, flags); \ ++ else __bad_lock_type(); \ ++ } while (0) ++ ++#define PICK_SPINTRYLOCK_IRQSAVE(lock, flags) \ ++ ({ \ ++ int __ret__; \ ++ if (ipipe_spinlock_p(lock)) \ ++ __ret__ = __ipipe_spin_trylock_irqsave(ipipe_spinlock(lock), &(flags)); \ ++ else if (std_spinlock_raw_p(lock)) \ ++ __ret__ = __real_raw_spin_trylock_irqsave(std_spinlock_raw(lock), flags); \ ++ else if (std_spinlock_p(lock)) \ ++ __ret__ = __real_raw_spin_trylock_irqsave(&std_spinlock(lock)->rlock, flags); \ ++ else __bad_lock_type(); \ ++ __ret__; \ ++ }) ++ ++#define PICK_SPINTRYLOCK_IRQ(lock) \ ++ ({ \ ++ int __ret__; \ ++ if (ipipe_spinlock_p(lock)) \ ++ __ret__ = __ipipe_spin_trylock_irq(ipipe_spinlock(lock)); \ ++ else if (std_spinlock_raw_p(lock)) \ ++ __ret__ = __real_raw_spin_trylock_irq(std_spinlock_raw(lock)); \ ++ else if (std_spinlock_p(lock)) \ ++ __ret__ = __real_raw_spin_trylock_irq(&std_spinlock(lock)->rlock); \ ++ else __bad_lock_type(); \ ++ __ret__; \ ++ }) ++ ++#define PICK_SPINUNLOCK_IRQRESTORE(lock, flags) \ ++ do { \ ++ if (ipipe_spinlock_p(lock)) \ ++ __ipipe_spin_unlock_irqrestore(ipipe_spinlock(lock), flags); \ ++ else { \ ++ __ipipe_spin_unlock_debug(flags); \ ++ if (std_spinlock_raw_p(lock)) \ ++ __real_raw_spin_unlock_irqrestore(std_spinlock_raw(lock), flags); \ ++ else if (std_spinlock_p(lock)) \ ++ __real_raw_spin_unlock_irqrestore(&std_spinlock(lock)->rlock, flags); \ ++ } \ ++ } while (0) ++ ++#define PICK_SPINOP(op, lock) \ ++ ({ \ ++ if (ipipe_spinlock_p(lock)) \ ++ arch_spin##op(&ipipe_spinlock(lock)->arch_lock); \ ++ else if (std_spinlock_raw_p(lock)) \ ++ __real_raw_spin##op(std_spinlock_raw(lock)); \ ++ else if (std_spinlock_p(lock)) \ ++ __real_raw_spin##op(&std_spinlock(lock)->rlock); \ ++ else __bad_lock_type(); \ ++ (void)0; \ ++ }) ++ ++#define PICK_SPINOP_RET(op, lock, type) \ ++ ({ \ ++ type __ret__; \ ++ if (ipipe_spinlock_p(lock)) \ ++ __ret__ = arch_spin##op(&ipipe_spinlock(lock)->arch_lock); \ ++ else if (std_spinlock_raw_p(lock)) \ ++ __ret__ = __real_raw_spin##op(std_spinlock_raw(lock)); \ ++ else if (std_spinlock_p(lock)) \ ++ __ret__ = __real_raw_spin##op(&std_spinlock(lock)->rlock); \ ++ else { __ret__ = -1; __bad_lock_type(); } \ ++ __ret__; \ ++ }) ++ ++#endif /* !CONFIG_PREEMPT_RT_FULL */ ++ ++#define arch_spin_lock_init(lock) \ ++ do { \ ++ IPIPE_DEFINE_SPINLOCK(__lock__); \ ++ *((ipipe_spinlock_t *)lock) = __lock__; \ ++ } while (0) ++ ++#define arch_spin_lock_irq(lock) \ ++ do { \ ++ hard_local_irq_disable(); \ ++ arch_spin_lock(lock); \ ++ } while (0) ++ ++#define arch_spin_unlock_irq(lock) \ ++ do { \ ++ arch_spin_unlock(lock); \ ++ hard_local_irq_enable(); \ ++ } while (0) ++ ++typedef struct { ++ arch_rwlock_t arch_lock; ++} __ipipe_rwlock_t; ++ ++#define ipipe_rwlock_p(lock) \ ++ __builtin_types_compatible_p(typeof(lock), __ipipe_rwlock_t *) ++ ++#define std_rwlock_p(lock) \ ++ __builtin_types_compatible_p(typeof(lock), rwlock_t *) ++ ++#define ipipe_rwlock(lock) ((__ipipe_rwlock_t *)(lock)) ++#define std_rwlock(lock) ((rwlock_t *)(lock)) ++ ++#define PICK_RWOP(op, lock) \ ++ do { \ ++ if (ipipe_rwlock_p(lock)) \ ++ arch##op(&ipipe_rwlock(lock)->arch_lock); \ ++ else if (std_rwlock_p(lock)) \ ++ _raw##op(std_rwlock(lock)); \ ++ else __bad_lock_type(); \ ++ } while (0) ++ ++extern int __bad_lock_type(void); ++ ++#ifdef CONFIG_IPIPE ++ ++#define ipipe_spinlock_t __ipipe_spinlock_t ++#define IPIPE_DEFINE_RAW_SPINLOCK(x) ipipe_spinlock_t x = IPIPE_SPIN_LOCK_UNLOCKED ++#define IPIPE_DECLARE_RAW_SPINLOCK(x) extern ipipe_spinlock_t x ++#define IPIPE_DEFINE_SPINLOCK(x) IPIPE_DEFINE_RAW_SPINLOCK(x) ++#define IPIPE_DECLARE_SPINLOCK(x) IPIPE_DECLARE_RAW_SPINLOCK(x) ++ ++#define IPIPE_SPIN_LOCK_UNLOCKED \ ++ (__ipipe_spinlock_t) { .arch_lock = __ARCH_SPIN_LOCK_UNLOCKED } ++ ++#define spin_lock_irqsave_cond(lock, flags) \ ++ spin_lock_irqsave(lock, flags) ++ ++#define spin_unlock_irqrestore_cond(lock, flags) \ ++ spin_unlock_irqrestore(lock, flags) ++ ++#define raw_spin_lock_irqsave_cond(lock, flags) \ ++ raw_spin_lock_irqsave(lock, flags) ++ ++#define raw_spin_unlock_irqrestore_cond(lock, flags) \ ++ raw_spin_unlock_irqrestore(lock, flags) ++ ++void __ipipe_spin_lock_irq(ipipe_spinlock_t *lock); ++ ++int __ipipe_spin_trylock_irq(ipipe_spinlock_t *lock); ++ ++void __ipipe_spin_unlock_irq(ipipe_spinlock_t *lock); ++ ++unsigned long __ipipe_spin_lock_irqsave(ipipe_spinlock_t *lock); ++ ++int __ipipe_spin_trylock_irqsave(ipipe_spinlock_t *lock, ++ unsigned long *x); ++ ++void __ipipe_spin_unlock_irqrestore(ipipe_spinlock_t *lock, ++ unsigned long x); ++ ++void __ipipe_spin_unlock_irqbegin(ipipe_spinlock_t *lock); ++ ++void __ipipe_spin_unlock_irqcomplete(unsigned long x); ++ ++#if defined(CONFIG_IPIPE_DEBUG_INTERNAL) && defined(CONFIG_SMP) ++void __ipipe_spin_unlock_debug(unsigned long flags); ++#else ++#define __ipipe_spin_unlock_debug(flags) do { } while (0) ++#endif ++ ++#define ipipe_rwlock_t __ipipe_rwlock_t ++#define IPIPE_DEFINE_RWLOCK(x) ipipe_rwlock_t x = IPIPE_RW_LOCK_UNLOCKED ++#define IPIPE_DECLARE_RWLOCK(x) extern ipipe_rwlock_t x ++ ++#define IPIPE_RW_LOCK_UNLOCKED \ ++ (__ipipe_rwlock_t) { .arch_lock = __ARCH_RW_LOCK_UNLOCKED } ++ ++#else /* !CONFIG_IPIPE */ ++ ++#define ipipe_spinlock_t spinlock_t ++#define IPIPE_DEFINE_SPINLOCK(x) DEFINE_SPINLOCK(x) ++#define IPIPE_DECLARE_SPINLOCK(x) extern spinlock_t x ++#define IPIPE_SPIN_LOCK_UNLOCKED __SPIN_LOCK_UNLOCKED(unknown) ++#define IPIPE_DEFINE_RAW_SPINLOCK(x) DEFINE_RAW_SPINLOCK(x) ++#define IPIPE_DECLARE_RAW_SPINLOCK(x) extern raw_spinlock_t x ++ ++#define spin_lock_irqsave_cond(lock, flags) \ ++ do { \ ++ (void)(flags); \ ++ spin_lock(lock); \ ++ } while(0) ++ ++#define spin_unlock_irqrestore_cond(lock, flags) \ ++ spin_unlock(lock) ++ ++#define raw_spin_lock_irqsave_cond(lock, flags) \ ++ do { \ ++ (void)(flags); \ ++ raw_spin_lock(lock); \ ++ } while(0) ++ ++#define raw_spin_unlock_irqrestore_cond(lock, flags) \ ++ raw_spin_unlock(lock) ++ ++#define __ipipe_spin_lock_irq(lock) do { } while (0) ++#define __ipipe_spin_unlock_irq(lock) do { } while (0) ++#define __ipipe_spin_lock_irqsave(lock) 0 ++#define __ipipe_spin_trylock_irq(lock) 1 ++#define __ipipe_spin_trylock_irqsave(lock, x) ({ (void)(x); 1; }) ++#define __ipipe_spin_unlock_irqrestore(lock, x) do { (void)(x); } while (0) ++#define __ipipe_spin_unlock_irqbegin(lock) spin_unlock(lock) ++#define __ipipe_spin_unlock_irqcomplete(x) do { (void)(x); } while (0) ++#define __ipipe_spin_unlock_debug(flags) do { } while (0) ++ ++#define ipipe_rwlock_t rwlock_t ++#define IPIPE_DEFINE_RWLOCK(x) DEFINE_RWLOCK(x) ++#define IPIPE_DECLARE_RWLOCK(x) extern rwlock_t x ++#define IPIPE_RW_LOCK_UNLOCKED RW_LOCK_UNLOCKED ++ ++#endif /* !CONFIG_IPIPE */ ++ ++#endif /* !__LINUX_IPIPE_LOCK_H */ +diff -uprN kernel/include/linux/ipipe_tickdev.h kernel_new/include/linux/ipipe_tickdev.h +--- kernel/include/linux/ipipe_tickdev.h 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/include/linux/ipipe_tickdev.h 2021-04-01 18:28:07.800863126 +0800 +@@ -0,0 +1,167 @@ ++/* -*- linux-c -*- ++ * include/linux/ipipe_tickdev.h ++ * ++ * Copyright (C) 2007 Philippe Gerum. ++ * Copyright (C) 2012 Gilles Chanteperdrix ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, ++ * USA; either version 2 of the License, or (at your option) any later ++ * version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#ifndef __LINUX_IPIPE_TICKDEV_H ++#define __LINUX_IPIPE_TICKDEV_H ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#ifdef CONFIG_IPIPE ++ ++struct clock_event_device; ++ ++struct ipipe_hostrt_data { ++ short live; ++ seqcount_t seqcount; ++ time_t wall_time_sec; ++ u32 wall_time_nsec; ++ struct timespec wall_to_monotonic; ++ u64 cycle_last; ++ u64 mask; ++ u32 mult; ++ u32 shift; ++}; ++ ++enum clock_event_mode { ++ CLOCK_EVT_MODE_PERIODIC, ++ CLOCK_EVT_MODE_ONESHOT, ++ CLOCK_EVT_MODE_UNUSED, ++ CLOCK_EVT_MODE_SHUTDOWN, ++}; ++ ++struct ipipe_timer { ++ int irq; ++ void (*request)(struct ipipe_timer *timer, int steal); ++ int (*set)(unsigned long ticks, void *timer); ++ void (*ack)(void); ++ void (*release)(struct ipipe_timer *timer); ++ ++ /* Only if registering a timer directly */ ++ const char *name; ++ unsigned rating; ++ unsigned long freq; ++ unsigned long min_delay_ticks; ++ unsigned long max_delay_ticks; ++ const struct cpumask *cpumask; ++ ++ /* For internal use */ ++ void *timer_set; /* pointer passed to ->set() callback */ ++ struct clock_event_device *host_timer; ++ struct list_head link; ++ ++ /* Conversions between clock frequency and timer frequency */ ++ unsigned c2t_integ; ++ unsigned c2t_frac; ++ ++ /* For clockevent interception */ ++ u32 real_mult; ++ u32 real_shift; ++ void (*mode_handler)(enum clock_event_mode mode, ++ struct clock_event_device *); ++ int orig_mode; ++ int (*orig_set_state_periodic)(struct clock_event_device *); ++ int (*orig_set_state_oneshot)(struct clock_event_device *); ++ int (*orig_set_state_oneshot_stopped)(struct clock_event_device *); ++ int (*orig_set_state_shutdown)(struct clock_event_device *); ++ int (*orig_set_next_event)(unsigned long evt, ++ struct clock_event_device *cdev); ++ unsigned int (*refresh_freq)(void); ++}; ++ ++#define __ipipe_hrtimer_irq __ipipe_raw_cpu_read(ipipe_percpu.hrtimer_irq) ++ ++extern unsigned long __ipipe_hrtimer_freq; ++ ++/* ++ * Called by clockevents_register_device, to register a piggybacked ++ * ipipe timer, if there is one ++ */ ++void ipipe_host_timer_register(struct clock_event_device *clkevt); ++ ++/* ++ * Called by tick_cleanup_dead_cpu, to drop per-CPU timer devices ++ */ ++void ipipe_host_timer_cleanup(struct clock_event_device *clkevt); ++ ++/* ++ * Register a standalone ipipe timer ++ */ ++void ipipe_timer_register(struct ipipe_timer *timer); ++ ++/* ++ * Chooses the best timer for each cpu. Take over its handling. ++ */ ++int ipipe_select_timers(const struct cpumask *mask); ++ ++/* ++ * Release the per-cpu timers ++ */ ++void ipipe_timers_release(void); ++ ++/* ++ * Start handling the per-cpu timer irq, and intercepting the linux clockevent ++ * device callbacks. ++ */ ++int ipipe_timer_start(void (*tick_handler)(void), ++ void (*emumode)(enum clock_event_mode mode, ++ struct clock_event_device *cdev), ++ int (*emutick)(unsigned long evt, ++ struct clock_event_device *cdev), ++ unsigned cpu); ++ ++/* ++ * Stop handling a per-cpu timer ++ */ ++void ipipe_timer_stop(unsigned cpu); ++ ++/* ++ * Program the timer ++ */ ++void ipipe_timer_set(unsigned long delay); ++ ++const char *ipipe_timer_name(void); ++ ++unsigned ipipe_timer_ns2ticks(struct ipipe_timer *timer, unsigned ns); ++ ++void __ipipe_timer_refresh_freq(unsigned int hrclock_freq); ++ ++#else /* !CONFIG_IPIPE */ ++ ++#define ipipe_host_timer_register(clkevt) do { } while (0) ++ ++#define ipipe_host_timer_cleanup(clkevt) do { } while (0) ++ ++#endif /* !CONFIG_IPIPE */ ++ ++#ifdef CONFIG_IPIPE_HAVE_HOSTRT ++void ipipe_update_hostrt(struct timekeeper *tk); ++#else ++static inline void ++ipipe_update_hostrt(struct timekeeper *tk) {} ++#endif ++ ++#endif /* __LINUX_IPIPE_TICKDEV_H */ +diff -uprN kernel/include/linux/ipipe_trace.h kernel_new/include/linux/ipipe_trace.h +--- kernel/include/linux/ipipe_trace.h 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/include/linux/ipipe_trace.h 2021-04-01 18:28:07.800863126 +0800 +@@ -0,0 +1,78 @@ ++/* -*- linux-c -*- ++ * include/linux/ipipe_trace.h ++ * ++ * Copyright (C) 2005 Luotao Fu. ++ * 2005-2007 Jan Kiszka. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, ++ * USA; either version 2 of the License, or (at your option) any later ++ * version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#ifndef _LINUX_IPIPE_TRACE_H ++#define _LINUX_IPIPE_TRACE_H ++ ++#ifdef CONFIG_IPIPE_TRACE ++ ++#include ++ ++struct pt_regs; ++ ++void ipipe_trace_begin(unsigned long v); ++void ipipe_trace_end(unsigned long v); ++void ipipe_trace_freeze(unsigned long v); ++void ipipe_trace_special(unsigned char special_id, unsigned long v); ++void ipipe_trace_pid(pid_t pid, short prio); ++void ipipe_trace_event(unsigned char id, unsigned long delay_tsc); ++int ipipe_trace_max_reset(void); ++int ipipe_trace_frozen_reset(void); ++void ipipe_trace_irqbegin(int irq, struct pt_regs *regs); ++void ipipe_trace_irqend(int irq, struct pt_regs *regs); ++ ++#else /* !CONFIG_IPIPE_TRACE */ ++ ++#define ipipe_trace_begin(v) do { (void)(v); } while(0) ++#define ipipe_trace_end(v) do { (void)(v); } while(0) ++#define ipipe_trace_freeze(v) do { (void)(v); } while(0) ++#define ipipe_trace_special(id, v) do { (void)(id); (void)(v); } while(0) ++#define ipipe_trace_pid(pid, prio) do { (void)(pid); (void)(prio); } while(0) ++#define ipipe_trace_event(id, delay_tsc) do { (void)(id); (void)(delay_tsc); } while(0) ++#define ipipe_trace_max_reset() ({ 0; }) ++#define ipipe_trace_frozen_reset() ({ 0; }) ++#define ipipe_trace_irqbegin(irq, regs) do { } while(0) ++#define ipipe_trace_irqend(irq, regs) do { } while(0) ++ ++#endif /* !CONFIG_IPIPE_TRACE */ ++ ++#ifdef CONFIG_IPIPE_TRACE_PANIC ++void ipipe_trace_panic_freeze(void); ++void ipipe_trace_panic_dump(void); ++#else ++static inline void ipipe_trace_panic_freeze(void) { } ++static inline void ipipe_trace_panic_dump(void) { } ++#endif ++ ++#ifdef CONFIG_IPIPE_TRACE_IRQSOFF ++#define ipipe_trace_irq_entry(irq) ipipe_trace_begin(irq) ++#define ipipe_trace_irq_exit(irq) ipipe_trace_end(irq) ++#define ipipe_trace_irqsoff() ipipe_trace_begin(0x80000000UL) ++#define ipipe_trace_irqson() ipipe_trace_end(0x80000000UL) ++#else ++#define ipipe_trace_irq_entry(irq) do { (void)(irq);} while(0) ++#define ipipe_trace_irq_exit(irq) do { (void)(irq);} while(0) ++#define ipipe_trace_irqsoff() do { } while(0) ++#define ipipe_trace_irqson() do { } while(0) ++#endif ++ ++#endif /* !__LINUX_IPIPE_TRACE_H */ +diff -uprN kernel/include/linux/irqchip/arm-gic.h kernel_new/include/linux/irqchip/arm-gic.h +--- kernel/include/linux/irqchip/arm-gic.h 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/include/linux/irqchip/arm-gic.h 2021-04-02 09:12:21.831373736 +0800 +@@ -65,6 +65,11 @@ + #define GICD_INT_EN_CLR_X32 0xffffffff + #define GICD_INT_EN_SET_SGI 0x0000ffff + #define GICD_INT_EN_CLR_PPI 0xffff0000 ++#ifndef CONFIG_IPIPE ++#define GICD_INT_DEF_PRI 0xa0 ++#else ++#define GICD_INT_DEF_PRI 0x10 ++#endif + + #define GICD_IIDR_IMPLEMENTER_SHIFT 0 + #define GICD_IIDR_IMPLEMENTER_MASK (0xfff << GICD_IIDR_IMPLEMENTER_SHIFT) +diff -uprN kernel/include/linux/irqchip/arm-gic.h.orig kernel_new/include/linux/irqchip/arm-gic.h.orig +--- kernel/include/linux/irqchip/arm-gic.h.orig 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/include/linux/irqchip/arm-gic.h.orig 2021-04-01 18:28:07.801863125 +0800 +@@ -0,0 +1,173 @@ ++/* ++ * include/linux/irqchip/arm-gic.h ++ * ++ * Copyright (C) 2002 ARM Limited, All Rights Reserved. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++#ifndef __LINUX_IRQCHIP_ARM_GIC_H ++#define __LINUX_IRQCHIP_ARM_GIC_H ++ ++#define GIC_CPU_CTRL 0x00 ++#define GIC_CPU_PRIMASK 0x04 ++#define GIC_CPU_BINPOINT 0x08 ++#define GIC_CPU_INTACK 0x0c ++#define GIC_CPU_EOI 0x10 ++#define GIC_CPU_RUNNINGPRI 0x14 ++#define GIC_CPU_HIGHPRI 0x18 ++#define GIC_CPU_ALIAS_BINPOINT 0x1c ++#define GIC_CPU_ACTIVEPRIO 0xd0 ++#define GIC_CPU_IDENT 0xfc ++#define GIC_CPU_DEACTIVATE 0x1000 ++ ++#define GICC_ENABLE 0x1 ++#define GICC_INT_PRI_THRESHOLD 0xf0 ++ ++#define GIC_CPU_CTRL_EnableGrp0_SHIFT 0 ++#define GIC_CPU_CTRL_EnableGrp0 (1 << GIC_CPU_CTRL_EnableGrp0_SHIFT) ++#define GIC_CPU_CTRL_EnableGrp1_SHIFT 1 ++#define GIC_CPU_CTRL_EnableGrp1 (1 << GIC_CPU_CTRL_EnableGrp1_SHIFT) ++#define GIC_CPU_CTRL_AckCtl_SHIFT 2 ++#define GIC_CPU_CTRL_AckCtl (1 << GIC_CPU_CTRL_AckCtl_SHIFT) ++#define GIC_CPU_CTRL_FIQEn_SHIFT 3 ++#define GIC_CPU_CTRL_FIQEn (1 << GIC_CPU_CTRL_FIQEn_SHIFT) ++#define GIC_CPU_CTRL_CBPR_SHIFT 4 ++#define GIC_CPU_CTRL_CBPR (1 << GIC_CPU_CTRL_CBPR_SHIFT) ++#define GIC_CPU_CTRL_EOImodeNS_SHIFT 9 ++#define GIC_CPU_CTRL_EOImodeNS (1 << GIC_CPU_CTRL_EOImodeNS_SHIFT) ++ ++#define GICC_IAR_INT_ID_MASK 0x3ff ++#define GICC_INT_SPURIOUS 1023 ++#define GICC_DIS_BYPASS_MASK 0x1e0 ++ ++#define GIC_DIST_CTRL 0x000 ++#define GIC_DIST_CTR 0x004 ++#define GIC_DIST_IIDR 0x008 ++#define GIC_DIST_IGROUP 0x080 ++#define GIC_DIST_ENABLE_SET 0x100 ++#define GIC_DIST_ENABLE_CLEAR 0x180 ++#define GIC_DIST_PENDING_SET 0x200 ++#define GIC_DIST_PENDING_CLEAR 0x280 ++#define GIC_DIST_ACTIVE_SET 0x300 ++#define GIC_DIST_ACTIVE_CLEAR 0x380 ++#define GIC_DIST_PRI 0x400 ++#define GIC_DIST_TARGET 0x800 ++#define GIC_DIST_CONFIG 0xc00 ++#define GIC_DIST_SOFTINT 0xf00 ++#define GIC_DIST_SGI_PENDING_CLEAR 0xf10 ++#define GIC_DIST_SGI_PENDING_SET 0xf20 ++ ++#define GICD_ENABLE 0x1 ++#define GICD_DISABLE 0x0 ++#define GICD_INT_ACTLOW_LVLTRIG 0x0 ++#define GICD_INT_EN_CLR_X32 0xffffffff ++#define GICD_INT_EN_SET_SGI 0x0000ffff ++#define GICD_INT_EN_CLR_PPI 0xffff0000 ++ ++#define GICD_IIDR_IMPLEMENTER_SHIFT 0 ++#define GICD_IIDR_IMPLEMENTER_MASK (0xfff << GICD_IIDR_IMPLEMENTER_SHIFT) ++#define GICD_IIDR_REVISION_SHIFT 12 ++#define GICD_IIDR_REVISION_MASK (0xf << GICD_IIDR_REVISION_SHIFT) ++#define GICD_IIDR_VARIANT_SHIFT 16 ++#define GICD_IIDR_VARIANT_MASK (0xf << GICD_IIDR_VARIANT_SHIFT) ++#define GICD_IIDR_PRODUCT_ID_SHIFT 24 ++#define GICD_IIDR_PRODUCT_ID_MASK (0xff << GICD_IIDR_PRODUCT_ID_SHIFT) ++ ++ ++#define GICH_HCR 0x0 ++#define GICH_VTR 0x4 ++#define GICH_VMCR 0x8 ++#define GICH_MISR 0x10 ++#define GICH_EISR0 0x20 ++#define GICH_EISR1 0x24 ++#define GICH_ELRSR0 0x30 ++#define GICH_ELRSR1 0x34 ++#define GICH_APR 0xf0 ++#define GICH_LR0 0x100 ++ ++#define GICH_HCR_EN (1 << 0) ++#define GICH_HCR_UIE (1 << 1) ++#define GICH_HCR_NPIE (1 << 3) ++ ++#define GICH_LR_VIRTUALID (0x3ff << 0) ++#define GICH_LR_PHYSID_CPUID_SHIFT (10) ++#define GICH_LR_PHYSID_CPUID (0x3ff << GICH_LR_PHYSID_CPUID_SHIFT) ++#define GICH_LR_PRIORITY_SHIFT 23 ++#define GICH_LR_STATE (3 << 28) ++#define GICH_LR_PENDING_BIT (1 << 28) ++#define GICH_LR_ACTIVE_BIT (1 << 29) ++#define GICH_LR_EOI (1 << 19) ++#define GICH_LR_GROUP1 (1 << 30) ++#define GICH_LR_HW (1 << 31) ++ ++#define GICH_VMCR_ENABLE_GRP0_SHIFT 0 ++#define GICH_VMCR_ENABLE_GRP0_MASK (1 << GICH_VMCR_ENABLE_GRP0_SHIFT) ++#define GICH_VMCR_ENABLE_GRP1_SHIFT 1 ++#define GICH_VMCR_ENABLE_GRP1_MASK (1 << GICH_VMCR_ENABLE_GRP1_SHIFT) ++#define GICH_VMCR_ACK_CTL_SHIFT 2 ++#define GICH_VMCR_ACK_CTL_MASK (1 << GICH_VMCR_ACK_CTL_SHIFT) ++#define GICH_VMCR_FIQ_EN_SHIFT 3 ++#define GICH_VMCR_FIQ_EN_MASK (1 << GICH_VMCR_FIQ_EN_SHIFT) ++#define GICH_VMCR_CBPR_SHIFT 4 ++#define GICH_VMCR_CBPR_MASK (1 << GICH_VMCR_CBPR_SHIFT) ++#define GICH_VMCR_EOI_MODE_SHIFT 9 ++#define GICH_VMCR_EOI_MODE_MASK (1 << GICH_VMCR_EOI_MODE_SHIFT) ++ ++#define GICH_VMCR_PRIMASK_SHIFT 27 ++#define GICH_VMCR_PRIMASK_MASK (0x1f << GICH_VMCR_PRIMASK_SHIFT) ++#define GICH_VMCR_BINPOINT_SHIFT 21 ++#define GICH_VMCR_BINPOINT_MASK (0x7 << GICH_VMCR_BINPOINT_SHIFT) ++#define GICH_VMCR_ALIAS_BINPOINT_SHIFT 18 ++#define GICH_VMCR_ALIAS_BINPOINT_MASK (0x7 << GICH_VMCR_ALIAS_BINPOINT_SHIFT) ++ ++#define GICH_MISR_EOI (1 << 0) ++#define GICH_MISR_U (1 << 1) ++ ++#define GICV_PMR_PRIORITY_SHIFT 3 ++#define GICV_PMR_PRIORITY_MASK (0x1f << GICV_PMR_PRIORITY_SHIFT) ++ ++#ifndef __ASSEMBLY__ ++ ++#include ++ ++struct device_node; ++struct gic_chip_data; ++ ++void gic_cascade_irq(unsigned int gic_nr, unsigned int irq); ++int gic_cpu_if_down(unsigned int gic_nr); ++void gic_cpu_save(struct gic_chip_data *gic); ++void gic_cpu_restore(struct gic_chip_data *gic); ++void gic_dist_save(struct gic_chip_data *gic); ++void gic_dist_restore(struct gic_chip_data *gic); ++ ++/* ++ * Subdrivers that need some preparatory work can initialize their ++ * chips and call this to register their GICs. ++ */ ++int gic_of_init(struct device_node *node, struct device_node *parent); ++ ++/* ++ * Initialises and registers a non-root or child GIC chip. Memory for ++ * the gic_chip_data structure is dynamically allocated. ++ */ ++int gic_of_init_child(struct device *dev, struct gic_chip_data **gic, int irq); ++ ++/* ++ * Legacy platforms not converted to DT yet must use this to init ++ * their GIC ++ */ ++void gic_init(unsigned int nr, int start, ++ void __iomem *dist , void __iomem *cpu); ++ ++int gicv2m_init(struct fwnode_handle *parent_handle, ++ struct irq_domain *parent); ++ ++void gic_send_sgi(unsigned int cpu_id, unsigned int irq); ++int gic_get_cpu_id(unsigned int cpu); ++void gic_migrate_target(unsigned int new_cpu_id); ++unsigned long gic_get_sgir_physaddr(void); ++ ++#endif /* __ASSEMBLY */ ++#endif +diff -uprN kernel/include/linux/irqchip/arm-gic.h.rej kernel_new/include/linux/irqchip/arm-gic.h.rej +--- kernel/include/linux/irqchip/arm-gic.h.rej 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/include/linux/irqchip/arm-gic.h.rej 2021-04-01 18:28:07.801863125 +0800 +@@ -0,0 +1,14 @@ ++--- include/linux/irqchip/arm-gic.h 2019-12-18 03:36:04.000000000 +0800 +++++ include/linux/irqchip/arm-gic.h 2021-03-22 09:21:43.212415388 +0800 ++@@ -65,7 +65,11 @@ ++ #define GICD_INT_EN_CLR_X32 0xffffffff ++ #define GICD_INT_EN_SET_SGI 0x0000ffff ++ #define GICD_INT_EN_CLR_PPI 0xffff0000 +++#ifndef CONFIG_IPIPE ++ #define GICD_INT_DEF_PRI 0xa0 +++#else +++#define GICD_INT_DEF_PRI 0x10 +++#endif ++ #define GICD_INT_DEF_PRI_X4 ((GICD_INT_DEF_PRI << 24) |\ ++ (GICD_INT_DEF_PRI << 16) |\ ++ (GICD_INT_DEF_PRI << 8) |\ +diff -uprN kernel/include/linux/irqdesc.h kernel_new/include/linux/irqdesc.h +--- kernel/include/linux/irqdesc.h 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/include/linux/irqdesc.h 2021-04-01 18:28:07.801863125 +0800 +@@ -56,6 +56,10 @@ struct irq_desc { + struct irq_common_data irq_common_data; + struct irq_data irq_data; + unsigned int __percpu *kstat_irqs; ++#ifdef CONFIG_IPIPE ++ void (*ipipe_ack)(struct irq_desc *desc); ++ void (*ipipe_end)(struct irq_desc *desc); ++#endif /* CONFIG_IPIPE */ + irq_flow_handler_t handle_irq; + #ifdef CONFIG_IRQ_PREFLOW_FASTEOI + irq_preflow_handler_t preflow_handler; +@@ -185,6 +189,10 @@ static inline int irq_desc_has_action(st + return desc->action != NULL; + } + ++irq_flow_handler_t ++__fixup_irq_handler(struct irq_desc *desc, irq_flow_handler_t handle, ++ int is_chained); ++ + static inline int irq_has_action(unsigned int irq) + { + return irq_desc_has_action(irq_to_desc(irq)); +diff -uprN kernel/include/linux/irqdesc.h.orig kernel_new/include/linux/irqdesc.h.orig +--- kernel/include/linux/irqdesc.h.orig 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/include/linux/irqdesc.h.orig 2020-12-21 21:59:22.000000000 +0800 +@@ -0,0 +1,281 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++#ifndef _LINUX_IRQDESC_H ++#define _LINUX_IRQDESC_H ++ ++#include ++#include ++#include ++ ++/* ++ * Core internal functions to deal with irq descriptors ++ */ ++ ++struct irq_affinity_notify; ++struct proc_dir_entry; ++struct module; ++struct irq_desc; ++struct irq_domain; ++struct pt_regs; ++ ++/** ++ * struct irq_desc - interrupt descriptor ++ * @irq_common_data: per irq and chip data passed down to chip functions ++ * @kstat_irqs: irq stats per cpu ++ * @handle_irq: highlevel irq-events handler ++ * @preflow_handler: handler called before the flow handler (currently used by sparc) ++ * @action: the irq action chain ++ * @status: status information ++ * @core_internal_state__do_not_mess_with_it: core internal status information ++ * @depth: disable-depth, for nested irq_disable() calls ++ * @wake_depth: enable depth, for multiple irq_set_irq_wake() callers ++ * @irq_count: stats field to detect stalled irqs ++ * @last_unhandled: aging timer for unhandled count ++ * @irqs_unhandled: stats field for spurious unhandled interrupts ++ * @threads_handled: stats field for deferred spurious detection of threaded handlers ++ * @threads_handled_last: comparator field for deferred spurious detection of theraded handlers ++ * @lock: locking for SMP ++ * @affinity_hint: hint to user space for preferred irq affinity ++ * @affinity_notify: context for notification of affinity changes ++ * @pending_mask: pending rebalanced interrupts ++ * @threads_oneshot: bitfield to handle shared oneshot threads ++ * @threads_active: number of irqaction threads currently running ++ * @wait_for_threads: wait queue for sync_irq to wait for threaded handlers ++ * @nr_actions: number of installed actions on this descriptor ++ * @no_suspend_depth: number of irqactions on a irq descriptor with ++ * IRQF_NO_SUSPEND set ++ * @force_resume_depth: number of irqactions on a irq descriptor with ++ * IRQF_FORCE_RESUME set ++ * @rcu: rcu head for delayed free ++ * @kobj: kobject used to represent this struct in sysfs ++ * @request_mutex: mutex to protect request/free before locking desc->lock ++ * @dir: /proc/irq/ procfs entry ++ * @debugfs_file: dentry for the debugfs file ++ * @name: flow handler name for /proc/interrupts output ++ */ ++struct irq_desc { ++ struct irq_common_data irq_common_data; ++ struct irq_data irq_data; ++ unsigned int __percpu *kstat_irqs; ++ irq_flow_handler_t handle_irq; ++#ifdef CONFIG_IRQ_PREFLOW_FASTEOI ++ irq_preflow_handler_t preflow_handler; ++#endif ++ struct irqaction *action; /* IRQ action list */ ++ unsigned int status_use_accessors; ++ unsigned int core_internal_state__do_not_mess_with_it; ++ unsigned int depth; /* nested irq disables */ ++ unsigned int wake_depth; /* nested wake enables */ ++ unsigned int tot_count; ++ unsigned int irq_count; /* For detecting broken IRQs */ ++ unsigned long last_unhandled; /* Aging timer for unhandled count */ ++ unsigned int irqs_unhandled; ++ atomic_t threads_handled; ++ int threads_handled_last; ++ raw_spinlock_t lock; ++ struct cpumask *percpu_enabled; ++ const struct cpumask *percpu_affinity; ++#ifdef CONFIG_SMP ++ const struct cpumask *affinity_hint; ++ struct irq_affinity_notify *affinity_notify; ++#ifdef CONFIG_GENERIC_PENDING_IRQ ++ cpumask_var_t pending_mask; ++#endif ++#endif ++ unsigned long threads_oneshot; ++ atomic_t threads_active; ++ wait_queue_head_t wait_for_threads; ++#ifdef CONFIG_PM_SLEEP ++ unsigned int nr_actions; ++ unsigned int no_suspend_depth; ++ unsigned int cond_suspend_depth; ++ unsigned int force_resume_depth; ++#endif ++#ifdef CONFIG_PROC_FS ++ struct proc_dir_entry *dir; ++#endif ++#ifdef CONFIG_GENERIC_IRQ_DEBUGFS ++ struct dentry *debugfs_file; ++ const char *dev_name; ++#endif ++#ifdef CONFIG_SPARSE_IRQ ++ struct rcu_head rcu; ++ struct kobject kobj; ++#endif ++ struct mutex request_mutex; ++ int parent_irq; ++ struct module *owner; ++ const char *name; ++} ____cacheline_internodealigned_in_smp; ++ ++#ifdef CONFIG_SPARSE_IRQ ++extern void irq_lock_sparse(void); ++extern void irq_unlock_sparse(void); ++#else ++static inline void irq_lock_sparse(void) { } ++static inline void irq_unlock_sparse(void) { } ++extern struct irq_desc irq_desc[NR_IRQS]; ++#endif ++ ++static inline struct irq_desc *irq_data_to_desc(struct irq_data *data) ++{ ++ return container_of(data->common, struct irq_desc, irq_common_data); ++} ++ ++static inline unsigned int irq_desc_get_irq(struct irq_desc *desc) ++{ ++ return desc->irq_data.irq; ++} ++ ++static inline struct irq_data *irq_desc_get_irq_data(struct irq_desc *desc) ++{ ++ return &desc->irq_data; ++} ++ ++static inline struct irq_chip *irq_desc_get_chip(struct irq_desc *desc) ++{ ++ return desc->irq_data.chip; ++} ++ ++static inline void *irq_desc_get_chip_data(struct irq_desc *desc) ++{ ++ return desc->irq_data.chip_data; ++} ++ ++static inline void *irq_desc_get_handler_data(struct irq_desc *desc) ++{ ++ return desc->irq_common_data.handler_data; ++} ++ ++/* ++ * Architectures call this to let the generic IRQ layer ++ * handle an interrupt. ++ */ ++static inline void generic_handle_irq_desc(struct irq_desc *desc) ++{ ++ desc->handle_irq(desc); ++} ++ ++int generic_handle_irq(unsigned int irq); ++ ++#ifdef CONFIG_HANDLE_DOMAIN_IRQ ++/* ++ * Convert a HW interrupt number to a logical one using a IRQ domain, ++ * and handle the result interrupt number. Return -EINVAL if ++ * conversion failed. Providing a NULL domain indicates that the ++ * conversion has already been done. ++ */ ++int __handle_domain_irq(struct irq_domain *domain, unsigned int hwirq, ++ bool lookup, struct pt_regs *regs); ++ ++static inline int handle_domain_irq(struct irq_domain *domain, ++ unsigned int hwirq, struct pt_regs *regs) ++{ ++ return __handle_domain_irq(domain, hwirq, true, regs); ++} ++ ++#ifdef CONFIG_IRQ_DOMAIN ++int handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq, ++ struct pt_regs *regs); ++#endif ++#endif ++ ++/* Test to see if a driver has successfully requested an irq */ ++static inline int irq_desc_has_action(struct irq_desc *desc) ++{ ++ return desc->action != NULL; ++} ++ ++static inline int irq_has_action(unsigned int irq) ++{ ++ return irq_desc_has_action(irq_to_desc(irq)); ++} ++ ++/** ++ * irq_set_handler_locked - Set irq handler from a locked region ++ * @data: Pointer to the irq_data structure which identifies the irq ++ * @handler: Flow control handler function for this interrupt ++ * ++ * Sets the handler in the irq descriptor associated to @data. ++ * ++ * Must be called with irq_desc locked and valid parameters. Typical ++ * call site is the irq_set_type() callback. ++ */ ++static inline void irq_set_handler_locked(struct irq_data *data, ++ irq_flow_handler_t handler) ++{ ++ struct irq_desc *desc = irq_data_to_desc(data); ++ ++ desc->handle_irq = handler; ++} ++ ++/** ++ * irq_set_chip_handler_name_locked - Set chip, handler and name from a locked region ++ * @data: Pointer to the irq_data structure for which the chip is set ++ * @chip: Pointer to the new irq chip ++ * @handler: Flow control handler function for this interrupt ++ * @name: Name of the interrupt ++ * ++ * Replace the irq chip at the proper hierarchy level in @data and ++ * sets the handler and name in the associated irq descriptor. ++ * ++ * Must be called with irq_desc locked and valid parameters. ++ */ ++static inline void ++irq_set_chip_handler_name_locked(struct irq_data *data, struct irq_chip *chip, ++ irq_flow_handler_t handler, const char *name) ++{ ++ struct irq_desc *desc = irq_data_to_desc(data); ++ ++ desc->handle_irq = handler; ++ desc->name = name; ++ data->chip = chip; ++} ++ ++static inline bool irq_balancing_disabled(unsigned int irq) ++{ ++ struct irq_desc *desc; ++ ++ desc = irq_to_desc(irq); ++ return desc->status_use_accessors & IRQ_NO_BALANCING_MASK; ++} ++ ++static inline bool irq_is_percpu(unsigned int irq) ++{ ++ struct irq_desc *desc; ++ ++ desc = irq_to_desc(irq); ++ return desc->status_use_accessors & IRQ_PER_CPU; ++} ++ ++static inline bool irq_is_percpu_devid(unsigned int irq) ++{ ++ struct irq_desc *desc; ++ ++ desc = irq_to_desc(irq); ++ return desc->status_use_accessors & IRQ_PER_CPU_DEVID; ++} ++ ++static inline void ++irq_set_lockdep_class(unsigned int irq, struct lock_class_key *lock_class, ++ struct lock_class_key *request_class) ++{ ++ struct irq_desc *desc = irq_to_desc(irq); ++ ++ if (desc) { ++ lockdep_set_class(&desc->lock, lock_class); ++ lockdep_set_class(&desc->request_mutex, request_class); ++ } ++} ++ ++#ifdef CONFIG_IRQ_PREFLOW_FASTEOI ++static inline void ++__irq_set_preflow_handler(unsigned int irq, irq_preflow_handler_t handler) ++{ ++ struct irq_desc *desc; ++ ++ desc = irq_to_desc(irq); ++ desc->preflow_handler = handler; ++} ++#endif ++ ++#endif +diff -uprN kernel/include/linux/irqflags.h kernel_new/include/linux/irqflags.h +--- kernel/include/linux/irqflags.h 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/include/linux/irqflags.h 2021-04-01 18:28:07.801863125 +0800 +@@ -148,6 +148,18 @@ do { \ + + #endif /* CONFIG_TRACE_IRQFLAGS */ + ++#ifdef CONFIG_IPIPE ++#define local_irq_enable_full() local_irq_enable() ++#define local_irq_disable_full() \ ++ do { \ ++ local_irq_disable(); \ ++ hard_local_irq_disable(); \ ++ } while (0) ++#else ++#define local_irq_enable_full() local_irq_enable() ++#define local_irq_disable_full() local_irq_disable() ++#endif ++ + #define local_save_flags(flags) raw_local_save_flags(flags) + + /* +diff -uprN kernel/include/linux/irq.h kernel_new/include/linux/irq.h +--- kernel/include/linux/irq.h 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/include/linux/irq.h 2021-04-02 09:15:38.162510091 +0800 +@@ -497,6 +497,11 @@ struct irq_chip { + + void (*irq_bus_lock)(struct irq_data *data); + void (*irq_bus_sync_unlock)(struct irq_data *data); ++#ifdef CONFIG_IPIPE ++ void (*irq_move)(struct irq_data *data); ++ void (*irq_hold)(struct irq_data *data); ++ void (*irq_release)(struct irq_data *data); ++#endif /* CONFIG_IPIPE */ + + void (*irq_cpu_online)(struct irq_data *data); + void (*irq_cpu_offline)(struct irq_data *data); +@@ -541,6 +546,7 @@ struct irq_chip { + * IRQCHIP_EOI_THREADED: Chip requires eoi() on unmask in threaded mode + * IRQCHIP_SUPPORTS_LEVEL_MSI Chip can provide two doorbells for Level MSIs + * IRQCHIP_SUPPORTS_NMI: Chip can deliver NMIs, only for root irqchips ++ * IRQCHIP_PIPELINE_SAFE: Chip can work in pipelined mode + */ + enum { + IRQCHIP_SET_TYPE_MASKED = (1 << 0), +@@ -551,6 +557,7 @@ enum { + IRQCHIP_ONESHOT_SAFE = (1 << 5), + IRQCHIP_EOI_THREADED = (1 << 6), + IRQCHIP_SUPPORTS_LEVEL_MSI = (1 << 7), ++ IRQCHIP_PIPELINE_SAFE = (1 << 7), + IRQCHIP_SUPPORTS_NMI = (1 << 8), + }; + +@@ -647,6 +654,11 @@ extern int irq_chip_retrigger_hierarchy( + extern void irq_chip_mask_parent(struct irq_data *data); + extern void irq_chip_unmask_parent(struct irq_data *data); + extern void irq_chip_eoi_parent(struct irq_data *data); ++#ifdef CONFIG_IPIPE ++extern void irq_chip_hold_parent(struct irq_data *data); ++extern void irq_chip_release_parent(struct irq_data *data); ++#endif ++ + extern int irq_chip_set_affinity_parent(struct irq_data *data, + const struct cpumask *dest, + bool force); +@@ -771,7 +783,14 @@ extern int irq_set_irq_type(unsigned int + extern int irq_set_msi_desc(unsigned int irq, struct msi_desc *entry); + extern int irq_set_msi_desc_off(unsigned int irq_base, unsigned int irq_offset, + struct msi_desc *entry); +-extern struct irq_data *irq_get_irq_data(unsigned int irq); ++ ++static inline __attribute__((const)) struct irq_data * ++irq_get_irq_data(unsigned int irq) ++{ ++ struct irq_desc *desc = irq_to_desc(irq); ++ ++ return desc ? &desc->irq_data : NULL; ++} + + static inline struct irq_chip *irq_get_chip(unsigned int irq) + { +@@ -1013,7 +1032,11 @@ struct irq_chip_type { + * different flow mechanisms (level/edge) for it. + */ + struct irq_chip_generic { ++#ifdef CONFIG_IPIPE ++ ipipe_spinlock_t lock; ++#else + raw_spinlock_t lock; ++#endif + void __iomem *reg_base; + u32 (*reg_readl)(void __iomem *addr); + void (*reg_writel)(u32 val, void __iomem *addr); +@@ -1141,18 +1164,28 @@ static inline struct irq_chip_type *irq_ + #define IRQ_MSK(n) (u32)((n) < 32 ? ((1 << (n)) - 1) : UINT_MAX) + + #ifdef CONFIG_SMP +-static inline void irq_gc_lock(struct irq_chip_generic *gc) ++static inline unsigned long irq_gc_lock(struct irq_chip_generic *gc) + { +- raw_spin_lock(&gc->lock); ++ unsigned long flags = 0; ++ raw_spin_lock_irqsave_cond(&gc->lock, flags); ++ return flags; + } + +-static inline void irq_gc_unlock(struct irq_chip_generic *gc) ++static inline void ++irq_gc_unlock(struct irq_chip_generic *gc, unsigned long flags) + { +- raw_spin_unlock(&gc->lock); ++ raw_spin_unlock_irqrestore_cond(&gc->lock, flags); + } + #else +-static inline void irq_gc_lock(struct irq_chip_generic *gc) { } +-static inline void irq_gc_unlock(struct irq_chip_generic *gc) { } ++static inline unsigned long irq_gc_lock(struct irq_chip_generic *gc) ++{ ++ return hard_cond_local_irq_save(); ++} ++static inline void ++irq_gc_unlock(struct irq_chip_generic *gc, unsigned long flags) ++{ ++ hard_cond_local_irq_restore(flags); ++} + #endif + + /* +diff -uprN kernel/include/linux/irq.h.orig kernel_new/include/linux/irq.h.orig +--- kernel/include/linux/irq.h.orig 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/include/linux/irq.h.orig 2020-12-21 21:59:22.000000000 +0800 +@@ -0,0 +1,1235 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++#ifndef _LINUX_IRQ_H ++#define _LINUX_IRQ_H ++ ++/* ++ * Please do not include this file in generic code. There is currently ++ * no requirement for any architecture to implement anything held ++ * within this file. ++ * ++ * Thanks. --rmk ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++ ++struct seq_file; ++struct module; ++struct msi_msg; ++enum irqchip_irq_state; ++ ++/* ++ * IRQ line status. ++ * ++ * Bits 0-7 are the same as the IRQF_* bits in linux/interrupt.h ++ * ++ * IRQ_TYPE_NONE - default, unspecified type ++ * IRQ_TYPE_EDGE_RISING - rising edge triggered ++ * IRQ_TYPE_EDGE_FALLING - falling edge triggered ++ * IRQ_TYPE_EDGE_BOTH - rising and falling edge triggered ++ * IRQ_TYPE_LEVEL_HIGH - high level triggered ++ * IRQ_TYPE_LEVEL_LOW - low level triggered ++ * IRQ_TYPE_LEVEL_MASK - Mask to filter out the level bits ++ * IRQ_TYPE_SENSE_MASK - Mask for all the above bits ++ * IRQ_TYPE_DEFAULT - For use by some PICs to ask irq_set_type ++ * to setup the HW to a sane default (used ++ * by irqdomain map() callbacks to synchronize ++ * the HW state and SW flags for a newly ++ * allocated descriptor). ++ * ++ * IRQ_TYPE_PROBE - Special flag for probing in progress ++ * ++ * Bits which can be modified via irq_set/clear/modify_status_flags() ++ * IRQ_LEVEL - Interrupt is level type. Will be also ++ * updated in the code when the above trigger ++ * bits are modified via irq_set_irq_type() ++ * IRQ_PER_CPU - Mark an interrupt PER_CPU. Will protect ++ * it from affinity setting ++ * IRQ_NOPROBE - Interrupt cannot be probed by autoprobing ++ * IRQ_NOREQUEST - Interrupt cannot be requested via ++ * request_irq() ++ * IRQ_NOTHREAD - Interrupt cannot be threaded ++ * IRQ_NOAUTOEN - Interrupt is not automatically enabled in ++ * request/setup_irq() ++ * IRQ_NO_BALANCING - Interrupt cannot be balanced (affinity set) ++ * IRQ_MOVE_PCNTXT - Interrupt can be migrated from process context ++ * IRQ_NESTED_THREAD - Interrupt nests into another thread ++ * IRQ_PER_CPU_DEVID - Dev_id is a per-cpu variable ++ * IRQ_IS_POLLED - Always polled by another interrupt. Exclude ++ * it from the spurious interrupt detection ++ * mechanism and from core side polling. ++ * IRQ_DISABLE_UNLAZY - Disable lazy irq disable ++ */ ++enum { ++ IRQ_TYPE_NONE = 0x00000000, ++ IRQ_TYPE_EDGE_RISING = 0x00000001, ++ IRQ_TYPE_EDGE_FALLING = 0x00000002, ++ IRQ_TYPE_EDGE_BOTH = (IRQ_TYPE_EDGE_FALLING | IRQ_TYPE_EDGE_RISING), ++ IRQ_TYPE_LEVEL_HIGH = 0x00000004, ++ IRQ_TYPE_LEVEL_LOW = 0x00000008, ++ IRQ_TYPE_LEVEL_MASK = (IRQ_TYPE_LEVEL_LOW | IRQ_TYPE_LEVEL_HIGH), ++ IRQ_TYPE_SENSE_MASK = 0x0000000f, ++ IRQ_TYPE_DEFAULT = IRQ_TYPE_SENSE_MASK, ++ ++ IRQ_TYPE_PROBE = 0x00000010, ++ ++ IRQ_LEVEL = (1 << 8), ++ IRQ_PER_CPU = (1 << 9), ++ IRQ_NOPROBE = (1 << 10), ++ IRQ_NOREQUEST = (1 << 11), ++ IRQ_NOAUTOEN = (1 << 12), ++ IRQ_NO_BALANCING = (1 << 13), ++ IRQ_MOVE_PCNTXT = (1 << 14), ++ IRQ_NESTED_THREAD = (1 << 15), ++ IRQ_NOTHREAD = (1 << 16), ++ IRQ_PER_CPU_DEVID = (1 << 17), ++ IRQ_IS_POLLED = (1 << 18), ++ IRQ_DISABLE_UNLAZY = (1 << 19), ++}; ++ ++#define IRQF_MODIFY_MASK \ ++ (IRQ_TYPE_SENSE_MASK | IRQ_NOPROBE | IRQ_NOREQUEST | \ ++ IRQ_NOAUTOEN | IRQ_MOVE_PCNTXT | IRQ_LEVEL | IRQ_NO_BALANCING | \ ++ IRQ_PER_CPU | IRQ_NESTED_THREAD | IRQ_NOTHREAD | IRQ_PER_CPU_DEVID | \ ++ IRQ_IS_POLLED | IRQ_DISABLE_UNLAZY) ++ ++#define IRQ_NO_BALANCING_MASK (IRQ_PER_CPU | IRQ_NO_BALANCING) ++ ++/* ++ * Return value for chip->irq_set_affinity() ++ * ++ * IRQ_SET_MASK_OK - OK, core updates irq_common_data.affinity ++ * IRQ_SET_MASK_NOCPY - OK, chip did update irq_common_data.affinity ++ * IRQ_SET_MASK_OK_DONE - Same as IRQ_SET_MASK_OK for core. Special code to ++ * support stacked irqchips, which indicates skipping ++ * all descendent irqchips. ++ */ ++enum { ++ IRQ_SET_MASK_OK = 0, ++ IRQ_SET_MASK_OK_NOCOPY, ++ IRQ_SET_MASK_OK_DONE, ++}; ++ ++struct msi_desc; ++struct irq_domain; ++ ++/** ++ * struct irq_common_data - per irq data shared by all irqchips ++ * @state_use_accessors: status information for irq chip functions. ++ * Use accessor functions to deal with it ++ * @node: node index useful for balancing ++ * @handler_data: per-IRQ data for the irq_chip methods ++ * @affinity: IRQ affinity on SMP. If this is an IPI ++ * related irq, then this is the mask of the ++ * CPUs to which an IPI can be sent. ++ * @effective_affinity: The effective IRQ affinity on SMP as some irq ++ * chips do not allow multi CPU destinations. ++ * A subset of @affinity. ++ * @msi_desc: MSI descriptor ++ * @ipi_offset: Offset of first IPI target cpu in @affinity. Optional. ++ */ ++struct irq_common_data { ++ unsigned int __private state_use_accessors; ++#ifdef CONFIG_NUMA ++ unsigned int node; ++#endif ++ void *handler_data; ++ struct msi_desc *msi_desc; ++ cpumask_var_t affinity; ++#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK ++ cpumask_var_t effective_affinity; ++#endif ++#ifdef CONFIG_GENERIC_IRQ_IPI ++ unsigned int ipi_offset; ++#endif ++}; ++ ++/** ++ * struct irq_data - per irq chip data passed down to chip functions ++ * @mask: precomputed bitmask for accessing the chip registers ++ * @irq: interrupt number ++ * @hwirq: hardware interrupt number, local to the interrupt domain ++ * @common: point to data shared by all irqchips ++ * @chip: low level interrupt hardware access ++ * @domain: Interrupt translation domain; responsible for mapping ++ * between hwirq number and linux irq number. ++ * @parent_data: pointer to parent struct irq_data to support hierarchy ++ * irq_domain ++ * @chip_data: platform-specific per-chip private data for the chip ++ * methods, to allow shared chip implementations ++ */ ++struct irq_data { ++ u32 mask; ++ unsigned int irq; ++ unsigned long hwirq; ++ struct irq_common_data *common; ++ struct irq_chip *chip; ++ struct irq_domain *domain; ++#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY ++ struct irq_data *parent_data; ++#endif ++ void *chip_data; ++}; ++ ++/* ++ * Bit masks for irq_common_data.state_use_accessors ++ * ++ * IRQD_TRIGGER_MASK - Mask for the trigger type bits ++ * IRQD_SETAFFINITY_PENDING - Affinity setting is pending ++ * IRQD_ACTIVATED - Interrupt has already been activated ++ * IRQD_NO_BALANCING - Balancing disabled for this IRQ ++ * IRQD_PER_CPU - Interrupt is per cpu ++ * IRQD_AFFINITY_SET - Interrupt affinity was set ++ * IRQD_LEVEL - Interrupt is level triggered ++ * IRQD_WAKEUP_STATE - Interrupt is configured for wakeup ++ * from suspend ++ * IRDQ_MOVE_PCNTXT - Interrupt can be moved in process ++ * context ++ * IRQD_IRQ_DISABLED - Disabled state of the interrupt ++ * IRQD_IRQ_MASKED - Masked state of the interrupt ++ * IRQD_IRQ_INPROGRESS - In progress state of the interrupt ++ * IRQD_WAKEUP_ARMED - Wakeup mode armed ++ * IRQD_FORWARDED_TO_VCPU - The interrupt is forwarded to a VCPU ++ * IRQD_AFFINITY_MANAGED - Affinity is auto-managed by the kernel ++ * IRQD_IRQ_STARTED - Startup state of the interrupt ++ * IRQD_MANAGED_SHUTDOWN - Interrupt was shutdown due to empty affinity ++ * mask. Applies only to affinity managed irqs. ++ * IRQD_SINGLE_TARGET - IRQ allows only a single affinity target ++ * IRQD_DEFAULT_TRIGGER_SET - Expected trigger already been set ++ * IRQD_CAN_RESERVE - Can use reservation mode ++ * IRQD_MSI_NOMASK_QUIRK - Non-maskable MSI quirk for affinity change ++ * required ++ * IRQD_AFFINITY_ON_ACTIVATE - Affinity is set on activation. Don't call ++ * irq_chip::irq_set_affinity() when deactivated. ++ */ ++enum { ++ IRQD_TRIGGER_MASK = 0xf, ++ IRQD_SETAFFINITY_PENDING = (1 << 8), ++ IRQD_ACTIVATED = (1 << 9), ++ IRQD_NO_BALANCING = (1 << 10), ++ IRQD_PER_CPU = (1 << 11), ++ IRQD_AFFINITY_SET = (1 << 12), ++ IRQD_LEVEL = (1 << 13), ++ IRQD_WAKEUP_STATE = (1 << 14), ++ IRQD_MOVE_PCNTXT = (1 << 15), ++ IRQD_IRQ_DISABLED = (1 << 16), ++ IRQD_IRQ_MASKED = (1 << 17), ++ IRQD_IRQ_INPROGRESS = (1 << 18), ++ IRQD_WAKEUP_ARMED = (1 << 19), ++ IRQD_FORWARDED_TO_VCPU = (1 << 20), ++ IRQD_AFFINITY_MANAGED = (1 << 21), ++ IRQD_IRQ_STARTED = (1 << 22), ++ IRQD_MANAGED_SHUTDOWN = (1 << 23), ++ IRQD_SINGLE_TARGET = (1 << 24), ++ IRQD_DEFAULT_TRIGGER_SET = (1 << 25), ++ IRQD_CAN_RESERVE = (1 << 26), ++ IRQD_MSI_NOMASK_QUIRK = (1 << 27), ++ IRQD_AFFINITY_ON_ACTIVATE = (1 << 29), ++}; ++ ++#define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors) ++ ++static inline bool irqd_is_setaffinity_pending(struct irq_data *d) ++{ ++ return __irqd_to_state(d) & IRQD_SETAFFINITY_PENDING; ++} ++ ++static inline bool irqd_is_per_cpu(struct irq_data *d) ++{ ++ return __irqd_to_state(d) & IRQD_PER_CPU; ++} ++ ++static inline bool irqd_can_balance(struct irq_data *d) ++{ ++ return !(__irqd_to_state(d) & (IRQD_PER_CPU | IRQD_NO_BALANCING)); ++} ++ ++static inline bool irqd_affinity_was_set(struct irq_data *d) ++{ ++ return __irqd_to_state(d) & IRQD_AFFINITY_SET; ++} ++ ++static inline void irqd_mark_affinity_was_set(struct irq_data *d) ++{ ++ __irqd_to_state(d) |= IRQD_AFFINITY_SET; ++} ++ ++static inline bool irqd_trigger_type_was_set(struct irq_data *d) ++{ ++ return __irqd_to_state(d) & IRQD_DEFAULT_TRIGGER_SET; ++} ++ ++static inline u32 irqd_get_trigger_type(struct irq_data *d) ++{ ++ return __irqd_to_state(d) & IRQD_TRIGGER_MASK; ++} ++ ++/* ++ * Must only be called inside irq_chip.irq_set_type() functions or ++ * from the DT/ACPI setup code. ++ */ ++static inline void irqd_set_trigger_type(struct irq_data *d, u32 type) ++{ ++ __irqd_to_state(d) &= ~IRQD_TRIGGER_MASK; ++ __irqd_to_state(d) |= type & IRQD_TRIGGER_MASK; ++ __irqd_to_state(d) |= IRQD_DEFAULT_TRIGGER_SET; ++} ++ ++static inline bool irqd_is_level_type(struct irq_data *d) ++{ ++ return __irqd_to_state(d) & IRQD_LEVEL; ++} ++ ++/* ++ * Must only be called of irqchip.irq_set_affinity() or low level ++ * hieararchy domain allocation functions. ++ */ ++static inline void irqd_set_single_target(struct irq_data *d) ++{ ++ __irqd_to_state(d) |= IRQD_SINGLE_TARGET; ++} ++ ++static inline bool irqd_is_single_target(struct irq_data *d) ++{ ++ return __irqd_to_state(d) & IRQD_SINGLE_TARGET; ++} ++ ++static inline bool irqd_is_wakeup_set(struct irq_data *d) ++{ ++ return __irqd_to_state(d) & IRQD_WAKEUP_STATE; ++} ++ ++static inline bool irqd_can_move_in_process_context(struct irq_data *d) ++{ ++ return __irqd_to_state(d) & IRQD_MOVE_PCNTXT; ++} ++ ++static inline bool irqd_irq_disabled(struct irq_data *d) ++{ ++ return __irqd_to_state(d) & IRQD_IRQ_DISABLED; ++} ++ ++static inline bool irqd_irq_masked(struct irq_data *d) ++{ ++ return __irqd_to_state(d) & IRQD_IRQ_MASKED; ++} ++ ++static inline bool irqd_irq_inprogress(struct irq_data *d) ++{ ++ return __irqd_to_state(d) & IRQD_IRQ_INPROGRESS; ++} ++ ++static inline bool irqd_is_wakeup_armed(struct irq_data *d) ++{ ++ return __irqd_to_state(d) & IRQD_WAKEUP_ARMED; ++} ++ ++static inline bool irqd_is_forwarded_to_vcpu(struct irq_data *d) ++{ ++ return __irqd_to_state(d) & IRQD_FORWARDED_TO_VCPU; ++} ++ ++static inline void irqd_set_forwarded_to_vcpu(struct irq_data *d) ++{ ++ __irqd_to_state(d) |= IRQD_FORWARDED_TO_VCPU; ++} ++ ++static inline void irqd_clr_forwarded_to_vcpu(struct irq_data *d) ++{ ++ __irqd_to_state(d) &= ~IRQD_FORWARDED_TO_VCPU; ++} ++ ++static inline bool irqd_affinity_is_managed(struct irq_data *d) ++{ ++ return __irqd_to_state(d) & IRQD_AFFINITY_MANAGED; ++} ++ ++static inline bool irqd_is_activated(struct irq_data *d) ++{ ++ return __irqd_to_state(d) & IRQD_ACTIVATED; ++} ++ ++static inline void irqd_set_activated(struct irq_data *d) ++{ ++ __irqd_to_state(d) |= IRQD_ACTIVATED; ++} ++ ++static inline void irqd_clr_activated(struct irq_data *d) ++{ ++ __irqd_to_state(d) &= ~IRQD_ACTIVATED; ++} ++ ++static inline bool irqd_is_started(struct irq_data *d) ++{ ++ return __irqd_to_state(d) & IRQD_IRQ_STARTED; ++} ++ ++static inline bool irqd_is_managed_and_shutdown(struct irq_data *d) ++{ ++ return __irqd_to_state(d) & IRQD_MANAGED_SHUTDOWN; ++} ++ ++static inline void irqd_set_can_reserve(struct irq_data *d) ++{ ++ __irqd_to_state(d) |= IRQD_CAN_RESERVE; ++} ++ ++static inline void irqd_clr_can_reserve(struct irq_data *d) ++{ ++ __irqd_to_state(d) &= ~IRQD_CAN_RESERVE; ++} ++ ++static inline bool irqd_can_reserve(struct irq_data *d) ++{ ++ return __irqd_to_state(d) & IRQD_CAN_RESERVE; ++} ++ ++static inline void irqd_set_msi_nomask_quirk(struct irq_data *d) ++{ ++ __irqd_to_state(d) |= IRQD_MSI_NOMASK_QUIRK; ++} ++ ++static inline void irqd_clr_msi_nomask_quirk(struct irq_data *d) ++{ ++ __irqd_to_state(d) &= ~IRQD_MSI_NOMASK_QUIRK; ++} ++ ++static inline bool irqd_msi_nomask_quirk(struct irq_data *d) ++{ ++ return __irqd_to_state(d) & IRQD_MSI_NOMASK_QUIRK; ++} ++ ++static inline void irqd_set_affinity_on_activate(struct irq_data *d) ++{ ++ __irqd_to_state(d) |= IRQD_AFFINITY_ON_ACTIVATE; ++} ++ ++static inline bool irqd_affinity_on_activate(struct irq_data *d) ++{ ++ return __irqd_to_state(d) & IRQD_AFFINITY_ON_ACTIVATE; ++} ++ ++#undef __irqd_to_state ++ ++static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) ++{ ++ return d->hwirq; ++} ++ ++/** ++ * struct irq_chip - hardware interrupt chip descriptor ++ * ++ * @parent_device: pointer to parent device for irqchip ++ * @name: name for /proc/interrupts ++ * @irq_startup: start up the interrupt (defaults to ->enable if NULL) ++ * @irq_shutdown: shut down the interrupt (defaults to ->disable if NULL) ++ * @irq_enable: enable the interrupt (defaults to chip->unmask if NULL) ++ * @irq_disable: disable the interrupt ++ * @irq_ack: start of a new interrupt ++ * @irq_mask: mask an interrupt source ++ * @irq_mask_ack: ack and mask an interrupt source ++ * @irq_unmask: unmask an interrupt source ++ * @irq_eoi: end of interrupt ++ * @irq_set_affinity: Set the CPU affinity on SMP machines. If the force ++ * argument is true, it tells the driver to ++ * unconditionally apply the affinity setting. Sanity ++ * checks against the supplied affinity mask are not ++ * required. This is used for CPU hotplug where the ++ * target CPU is not yet set in the cpu_online_mask. ++ * @irq_retrigger: resend an IRQ to the CPU ++ * @irq_set_type: set the flow type (IRQ_TYPE_LEVEL/etc.) of an IRQ ++ * @irq_set_wake: enable/disable power-management wake-on of an IRQ ++ * @irq_bus_lock: function to lock access to slow bus (i2c) chips ++ * @irq_bus_sync_unlock:function to sync and unlock slow bus (i2c) chips ++ * @irq_cpu_online: configure an interrupt source for a secondary CPU ++ * @irq_cpu_offline: un-configure an interrupt source for a secondary CPU ++ * @irq_suspend: function called from core code on suspend once per ++ * chip, when one or more interrupts are installed ++ * @irq_resume: function called from core code on resume once per chip, ++ * when one ore more interrupts are installed ++ * @irq_pm_shutdown: function called from core code on shutdown once per chip ++ * @irq_calc_mask: Optional function to set irq_data.mask for special cases ++ * @irq_print_chip: optional to print special chip info in show_interrupts ++ * @irq_request_resources: optional to request resources before calling ++ * any other callback related to this irq ++ * @irq_release_resources: optional to release resources acquired with ++ * irq_request_resources ++ * @irq_compose_msi_msg: optional to compose message content for MSI ++ * @irq_write_msi_msg: optional to write message content for MSI ++ * @irq_get_irqchip_state: return the internal state of an interrupt ++ * @irq_set_irqchip_state: set the internal state of a interrupt ++ * @irq_set_vcpu_affinity: optional to target a vCPU in a virtual machine ++ * @ipi_send_single: send a single IPI to destination cpus ++ * @ipi_send_mask: send an IPI to destination cpus in cpumask ++ * @irq_nmi_setup: function called from core code before enabling an NMI ++ * @irq_nmi_teardown: function called from core code after disabling an NMI ++ * @flags: chip specific flags ++ */ ++struct irq_chip { ++ struct device *parent_device; ++ const char *name; ++ unsigned int (*irq_startup)(struct irq_data *data); ++ void (*irq_shutdown)(struct irq_data *data); ++ void (*irq_enable)(struct irq_data *data); ++ void (*irq_disable)(struct irq_data *data); ++ ++ void (*irq_ack)(struct irq_data *data); ++ void (*irq_mask)(struct irq_data *data); ++ void (*irq_mask_ack)(struct irq_data *data); ++ void (*irq_unmask)(struct irq_data *data); ++ void (*irq_eoi)(struct irq_data *data); ++ ++ int (*irq_set_affinity)(struct irq_data *data, const struct cpumask *dest, bool force); ++ int (*irq_retrigger)(struct irq_data *data); ++ int (*irq_set_type)(struct irq_data *data, unsigned int flow_type); ++ int (*irq_set_wake)(struct irq_data *data, unsigned int on); ++ ++ void (*irq_bus_lock)(struct irq_data *data); ++ void (*irq_bus_sync_unlock)(struct irq_data *data); ++ ++ void (*irq_cpu_online)(struct irq_data *data); ++ void (*irq_cpu_offline)(struct irq_data *data); ++ ++ void (*irq_suspend)(struct irq_data *data); ++ void (*irq_resume)(struct irq_data *data); ++ void (*irq_pm_shutdown)(struct irq_data *data); ++ ++ void (*irq_calc_mask)(struct irq_data *data); ++ ++ void (*irq_print_chip)(struct irq_data *data, struct seq_file *p); ++ int (*irq_request_resources)(struct irq_data *data); ++ void (*irq_release_resources)(struct irq_data *data); ++ ++ void (*irq_compose_msi_msg)(struct irq_data *data, struct msi_msg *msg); ++ void (*irq_write_msi_msg)(struct irq_data *data, struct msi_msg *msg); ++ ++ int (*irq_get_irqchip_state)(struct irq_data *data, enum irqchip_irq_state which, bool *state); ++ int (*irq_set_irqchip_state)(struct irq_data *data, enum irqchip_irq_state which, bool state); ++ ++ int (*irq_set_vcpu_affinity)(struct irq_data *data, void *vcpu_info); ++ ++ void (*ipi_send_single)(struct irq_data *data, unsigned int cpu); ++ void (*ipi_send_mask)(struct irq_data *data, const struct cpumask *dest); ++ ++ int (*irq_nmi_setup)(struct irq_data *data); ++ void (*irq_nmi_teardown)(struct irq_data *data); ++ ++ unsigned long flags; ++}; ++ ++/* ++ * irq_chip specific flags ++ * ++ * IRQCHIP_SET_TYPE_MASKED: Mask before calling chip.irq_set_type() ++ * IRQCHIP_EOI_IF_HANDLED: Only issue irq_eoi() when irq was handled ++ * IRQCHIP_MASK_ON_SUSPEND: Mask non wake irqs in the suspend path ++ * IRQCHIP_ONOFFLINE_ENABLED: Only call irq_on/off_line callbacks ++ * when irq enabled ++ * IRQCHIP_SKIP_SET_WAKE: Skip chip.irq_set_wake(), for this irq chip ++ * IRQCHIP_ONESHOT_SAFE: One shot does not require mask/unmask ++ * IRQCHIP_EOI_THREADED: Chip requires eoi() on unmask in threaded mode ++ * IRQCHIP_SUPPORTS_LEVEL_MSI Chip can provide two doorbells for Level MSIs ++ * IRQCHIP_SUPPORTS_NMI: Chip can deliver NMIs, only for root irqchips ++ */ ++enum { ++ IRQCHIP_SET_TYPE_MASKED = (1 << 0), ++ IRQCHIP_EOI_IF_HANDLED = (1 << 1), ++ IRQCHIP_MASK_ON_SUSPEND = (1 << 2), ++ IRQCHIP_ONOFFLINE_ENABLED = (1 << 3), ++ IRQCHIP_SKIP_SET_WAKE = (1 << 4), ++ IRQCHIP_ONESHOT_SAFE = (1 << 5), ++ IRQCHIP_EOI_THREADED = (1 << 6), ++ IRQCHIP_SUPPORTS_LEVEL_MSI = (1 << 7), ++ IRQCHIP_SUPPORTS_NMI = (1 << 8), ++}; ++ ++#include ++ ++/* ++ * Pick up the arch-dependent methods: ++ */ ++#include ++ ++#ifndef NR_IRQS_LEGACY ++# define NR_IRQS_LEGACY 0 ++#endif ++ ++#ifndef ARCH_IRQ_INIT_FLAGS ++# define ARCH_IRQ_INIT_FLAGS 0 ++#endif ++ ++#define IRQ_DEFAULT_INIT_FLAGS ARCH_IRQ_INIT_FLAGS ++ ++struct irqaction; ++extern int setup_irq(unsigned int irq, struct irqaction *new); ++extern void remove_irq(unsigned int irq, struct irqaction *act); ++extern int setup_percpu_irq(unsigned int irq, struct irqaction *new); ++extern void remove_percpu_irq(unsigned int irq, struct irqaction *act); ++ ++extern void irq_cpu_online(void); ++extern void irq_cpu_offline(void); ++extern int irq_set_affinity_locked(struct irq_data *data, ++ const struct cpumask *cpumask, bool force); ++extern int irq_set_vcpu_affinity(unsigned int irq, void *vcpu_info); ++ ++#if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_IRQ_MIGRATION) ++extern void irq_migrate_all_off_this_cpu(void); ++extern int irq_affinity_online_cpu(unsigned int cpu); ++#else ++# define irq_affinity_online_cpu NULL ++#endif ++ ++#if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_PENDING_IRQ) ++void __irq_move_irq(struct irq_data *data); ++static inline void irq_move_irq(struct irq_data *data) ++{ ++ if (unlikely(irqd_is_setaffinity_pending(data))) ++ __irq_move_irq(data); ++} ++void irq_move_masked_irq(struct irq_data *data); ++void irq_force_complete_move(struct irq_desc *desc); ++#else ++static inline void irq_move_irq(struct irq_data *data) { } ++static inline void irq_move_masked_irq(struct irq_data *data) { } ++static inline void irq_force_complete_move(struct irq_desc *desc) { } ++#endif ++ ++extern int no_irq_affinity; ++ ++#ifdef CONFIG_HARDIRQS_SW_RESEND ++int irq_set_parent(int irq, int parent_irq); ++#else ++static inline int irq_set_parent(int irq, int parent_irq) ++{ ++ return 0; ++} ++#endif ++ ++/* ++ * Built-in IRQ handlers for various IRQ types, ++ * callable via desc->handle_irq() ++ */ ++extern void handle_level_irq(struct irq_desc *desc); ++extern void handle_fasteoi_irq(struct irq_desc *desc); ++extern void handle_edge_irq(struct irq_desc *desc); ++extern void handle_edge_eoi_irq(struct irq_desc *desc); ++extern void handle_simple_irq(struct irq_desc *desc); ++extern void handle_untracked_irq(struct irq_desc *desc); ++extern void handle_percpu_irq(struct irq_desc *desc); ++extern void handle_percpu_devid_irq(struct irq_desc *desc); ++extern void handle_bad_irq(struct irq_desc *desc); ++extern void handle_nested_irq(unsigned int irq); ++ ++extern void handle_fasteoi_nmi(struct irq_desc *desc); ++extern void handle_percpu_devid_fasteoi_nmi(struct irq_desc *desc); ++ ++extern int irq_chip_compose_msi_msg(struct irq_data *data, struct msi_msg *msg); ++extern int irq_chip_pm_get(struct irq_data *data); ++extern int irq_chip_pm_put(struct irq_data *data); ++#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY ++extern void handle_fasteoi_ack_irq(struct irq_desc *desc); ++extern void handle_fasteoi_mask_irq(struct irq_desc *desc); ++extern void irq_chip_enable_parent(struct irq_data *data); ++extern void irq_chip_disable_parent(struct irq_data *data); ++extern void irq_chip_ack_parent(struct irq_data *data); ++extern int irq_chip_retrigger_hierarchy(struct irq_data *data); ++extern void irq_chip_mask_parent(struct irq_data *data); ++extern void irq_chip_unmask_parent(struct irq_data *data); ++extern void irq_chip_eoi_parent(struct irq_data *data); ++extern int irq_chip_set_affinity_parent(struct irq_data *data, ++ const struct cpumask *dest, ++ bool force); ++extern int irq_chip_set_wake_parent(struct irq_data *data, unsigned int on); ++extern int irq_chip_set_vcpu_affinity_parent(struct irq_data *data, ++ void *vcpu_info); ++extern int irq_chip_set_type_parent(struct irq_data *data, unsigned int type); ++#endif ++ ++/* Handling of unhandled and spurious interrupts: */ ++extern void note_interrupt(struct irq_desc *desc, irqreturn_t action_ret); ++ ++ ++/* Enable/disable irq debugging output: */ ++extern int noirqdebug_setup(char *str); ++ ++/* Checks whether the interrupt can be requested by request_irq(): */ ++extern int can_request_irq(unsigned int irq, unsigned long irqflags); ++ ++/* Dummy irq-chip implementations: */ ++extern struct irq_chip no_irq_chip; ++extern struct irq_chip dummy_irq_chip; ++ ++extern void ++irq_set_chip_and_handler_name(unsigned int irq, struct irq_chip *chip, ++ irq_flow_handler_t handle, const char *name); ++ ++static inline void irq_set_chip_and_handler(unsigned int irq, struct irq_chip *chip, ++ irq_flow_handler_t handle) ++{ ++ irq_set_chip_and_handler_name(irq, chip, handle, NULL); ++} ++ ++extern int irq_set_percpu_devid(unsigned int irq); ++extern int irq_set_percpu_devid_partition(unsigned int irq, ++ const struct cpumask *affinity); ++extern int irq_get_percpu_devid_partition(unsigned int irq, ++ struct cpumask *affinity); ++ ++extern void ++__irq_set_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, ++ const char *name); ++ ++static inline void ++irq_set_handler(unsigned int irq, irq_flow_handler_t handle) ++{ ++ __irq_set_handler(irq, handle, 0, NULL); ++} ++ ++/* ++ * Set a highlevel chained flow handler for a given IRQ. ++ * (a chained handler is automatically enabled and set to ++ * IRQ_NOREQUEST, IRQ_NOPROBE, and IRQ_NOTHREAD) ++ */ ++static inline void ++irq_set_chained_handler(unsigned int irq, irq_flow_handler_t handle) ++{ ++ __irq_set_handler(irq, handle, 1, NULL); ++} ++ ++/* ++ * Set a highlevel chained flow handler and its data for a given IRQ. ++ * (a chained handler is automatically enabled and set to ++ * IRQ_NOREQUEST, IRQ_NOPROBE, and IRQ_NOTHREAD) ++ */ ++void ++irq_set_chained_handler_and_data(unsigned int irq, irq_flow_handler_t handle, ++ void *data); ++ ++void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set); ++ ++static inline void irq_set_status_flags(unsigned int irq, unsigned long set) ++{ ++ irq_modify_status(irq, 0, set); ++} ++ ++static inline void irq_clear_status_flags(unsigned int irq, unsigned long clr) ++{ ++ irq_modify_status(irq, clr, 0); ++} ++ ++static inline void irq_set_noprobe(unsigned int irq) ++{ ++ irq_modify_status(irq, 0, IRQ_NOPROBE); ++} ++ ++static inline void irq_set_probe(unsigned int irq) ++{ ++ irq_modify_status(irq, IRQ_NOPROBE, 0); ++} ++ ++static inline void irq_set_nothread(unsigned int irq) ++{ ++ irq_modify_status(irq, 0, IRQ_NOTHREAD); ++} ++ ++static inline void irq_set_thread(unsigned int irq) ++{ ++ irq_modify_status(irq, IRQ_NOTHREAD, 0); ++} ++ ++static inline void irq_set_nested_thread(unsigned int irq, bool nest) ++{ ++ if (nest) ++ irq_set_status_flags(irq, IRQ_NESTED_THREAD); ++ else ++ irq_clear_status_flags(irq, IRQ_NESTED_THREAD); ++} ++ ++static inline void irq_set_percpu_devid_flags(unsigned int irq) ++{ ++ irq_set_status_flags(irq, ++ IRQ_NOAUTOEN | IRQ_PER_CPU | IRQ_NOTHREAD | ++ IRQ_NOPROBE | IRQ_PER_CPU_DEVID); ++} ++ ++/* Set/get chip/data for an IRQ: */ ++extern int irq_set_chip(unsigned int irq, struct irq_chip *chip); ++extern int irq_set_handler_data(unsigned int irq, void *data); ++extern int irq_set_chip_data(unsigned int irq, void *data); ++extern int irq_set_irq_type(unsigned int irq, unsigned int type); ++extern int irq_set_msi_desc(unsigned int irq, struct msi_desc *entry); ++extern int irq_set_msi_desc_off(unsigned int irq_base, unsigned int irq_offset, ++ struct msi_desc *entry); ++extern struct irq_data *irq_get_irq_data(unsigned int irq); ++ ++static inline struct irq_chip *irq_get_chip(unsigned int irq) ++{ ++ struct irq_data *d = irq_get_irq_data(irq); ++ return d ? d->chip : NULL; ++} ++ ++static inline struct irq_chip *irq_data_get_irq_chip(struct irq_data *d) ++{ ++ return d->chip; ++} ++ ++static inline void *irq_get_chip_data(unsigned int irq) ++{ ++ struct irq_data *d = irq_get_irq_data(irq); ++ return d ? d->chip_data : NULL; ++} ++ ++static inline void *irq_data_get_irq_chip_data(struct irq_data *d) ++{ ++ return d->chip_data; ++} ++ ++static inline void *irq_get_handler_data(unsigned int irq) ++{ ++ struct irq_data *d = irq_get_irq_data(irq); ++ return d ? d->common->handler_data : NULL; ++} ++ ++static inline void *irq_data_get_irq_handler_data(struct irq_data *d) ++{ ++ return d->common->handler_data; ++} ++ ++static inline struct msi_desc *irq_get_msi_desc(unsigned int irq) ++{ ++ struct irq_data *d = irq_get_irq_data(irq); ++ return d ? d->common->msi_desc : NULL; ++} ++ ++static inline struct msi_desc *irq_data_get_msi_desc(struct irq_data *d) ++{ ++ return d->common->msi_desc; ++} ++ ++static inline u32 irq_get_trigger_type(unsigned int irq) ++{ ++ struct irq_data *d = irq_get_irq_data(irq); ++ return d ? irqd_get_trigger_type(d) : 0; ++} ++ ++static inline int irq_common_data_get_node(struct irq_common_data *d) ++{ ++#ifdef CONFIG_NUMA ++ return d->node; ++#else ++ return 0; ++#endif ++} ++ ++static inline int irq_data_get_node(struct irq_data *d) ++{ ++ return irq_common_data_get_node(d->common); ++} ++ ++static inline struct cpumask *irq_get_affinity_mask(int irq) ++{ ++ struct irq_data *d = irq_get_irq_data(irq); ++ ++ return d ? d->common->affinity : NULL; ++} ++ ++static inline struct cpumask *irq_data_get_affinity_mask(struct irq_data *d) ++{ ++ return d->common->affinity; ++} ++ ++#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK ++static inline ++struct cpumask *irq_data_get_effective_affinity_mask(struct irq_data *d) ++{ ++ return d->common->effective_affinity; ++} ++static inline void irq_data_update_effective_affinity(struct irq_data *d, ++ const struct cpumask *m) ++{ ++ cpumask_copy(d->common->effective_affinity, m); ++} ++#else ++static inline void irq_data_update_effective_affinity(struct irq_data *d, ++ const struct cpumask *m) ++{ ++} ++static inline ++struct cpumask *irq_data_get_effective_affinity_mask(struct irq_data *d) ++{ ++ return d->common->affinity; ++} ++#endif ++ ++unsigned int arch_dynirq_lower_bound(unsigned int from); ++ ++int __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node, ++ struct module *owner, const struct cpumask *affinity); ++ ++int __devm_irq_alloc_descs(struct device *dev, int irq, unsigned int from, ++ unsigned int cnt, int node, struct module *owner, ++ const struct cpumask *affinity); ++ ++/* use macros to avoid needing export.h for THIS_MODULE */ ++#define irq_alloc_descs(irq, from, cnt, node) \ ++ __irq_alloc_descs(irq, from, cnt, node, THIS_MODULE, NULL) ++ ++#define irq_alloc_desc(node) \ ++ irq_alloc_descs(-1, 0, 1, node) ++ ++#define irq_alloc_desc_at(at, node) \ ++ irq_alloc_descs(at, at, 1, node) ++ ++#define irq_alloc_desc_from(from, node) \ ++ irq_alloc_descs(-1, from, 1, node) ++ ++#define irq_alloc_descs_from(from, cnt, node) \ ++ irq_alloc_descs(-1, from, cnt, node) ++ ++#define devm_irq_alloc_descs(dev, irq, from, cnt, node) \ ++ __devm_irq_alloc_descs(dev, irq, from, cnt, node, THIS_MODULE, NULL) ++ ++#define devm_irq_alloc_desc(dev, node) \ ++ devm_irq_alloc_descs(dev, -1, 0, 1, node) ++ ++#define devm_irq_alloc_desc_at(dev, at, node) \ ++ devm_irq_alloc_descs(dev, at, at, 1, node) ++ ++#define devm_irq_alloc_desc_from(dev, from, node) \ ++ devm_irq_alloc_descs(dev, -1, from, 1, node) ++ ++#define devm_irq_alloc_descs_from(dev, from, cnt, node) \ ++ devm_irq_alloc_descs(dev, -1, from, cnt, node) ++ ++void irq_free_descs(unsigned int irq, unsigned int cnt); ++static inline void irq_free_desc(unsigned int irq) ++{ ++ irq_free_descs(irq, 1); ++} ++ ++#ifdef CONFIG_GENERIC_IRQ_LEGACY_ALLOC_HWIRQ ++unsigned int irq_alloc_hwirqs(int cnt, int node); ++static inline unsigned int irq_alloc_hwirq(int node) ++{ ++ return irq_alloc_hwirqs(1, node); ++} ++void irq_free_hwirqs(unsigned int from, int cnt); ++static inline void irq_free_hwirq(unsigned int irq) ++{ ++ return irq_free_hwirqs(irq, 1); ++} ++int arch_setup_hwirq(unsigned int irq, int node); ++void arch_teardown_hwirq(unsigned int irq); ++#endif ++ ++#ifdef CONFIG_GENERIC_IRQ_LEGACY ++void irq_init_desc(unsigned int irq); ++#endif ++ ++/** ++ * struct irq_chip_regs - register offsets for struct irq_gci ++ * @enable: Enable register offset to reg_base ++ * @disable: Disable register offset to reg_base ++ * @mask: Mask register offset to reg_base ++ * @ack: Ack register offset to reg_base ++ * @eoi: Eoi register offset to reg_base ++ * @type: Type configuration register offset to reg_base ++ * @polarity: Polarity configuration register offset to reg_base ++ */ ++struct irq_chip_regs { ++ unsigned long enable; ++ unsigned long disable; ++ unsigned long mask; ++ unsigned long ack; ++ unsigned long eoi; ++ unsigned long type; ++ unsigned long polarity; ++}; ++ ++/** ++ * struct irq_chip_type - Generic interrupt chip instance for a flow type ++ * @chip: The real interrupt chip which provides the callbacks ++ * @regs: Register offsets for this chip ++ * @handler: Flow handler associated with this chip ++ * @type: Chip can handle these flow types ++ * @mask_cache_priv: Cached mask register private to the chip type ++ * @mask_cache: Pointer to cached mask register ++ * ++ * A irq_generic_chip can have several instances of irq_chip_type when ++ * it requires different functions and register offsets for different ++ * flow types. ++ */ ++struct irq_chip_type { ++ struct irq_chip chip; ++ struct irq_chip_regs regs; ++ irq_flow_handler_t handler; ++ u32 type; ++ u32 mask_cache_priv; ++ u32 *mask_cache; ++}; ++ ++/** ++ * struct irq_chip_generic - Generic irq chip data structure ++ * @lock: Lock to protect register and cache data access ++ * @reg_base: Register base address (virtual) ++ * @reg_readl: Alternate I/O accessor (defaults to readl if NULL) ++ * @reg_writel: Alternate I/O accessor (defaults to writel if NULL) ++ * @suspend: Function called from core code on suspend once per ++ * chip; can be useful instead of irq_chip::suspend to ++ * handle chip details even when no interrupts are in use ++ * @resume: Function called from core code on resume once per chip; ++ * can be useful instead of irq_chip::suspend to handle ++ * chip details even when no interrupts are in use ++ * @irq_base: Interrupt base nr for this chip ++ * @irq_cnt: Number of interrupts handled by this chip ++ * @mask_cache: Cached mask register shared between all chip types ++ * @type_cache: Cached type register ++ * @polarity_cache: Cached polarity register ++ * @wake_enabled: Interrupt can wakeup from suspend ++ * @wake_active: Interrupt is marked as an wakeup from suspend source ++ * @num_ct: Number of available irq_chip_type instances (usually 1) ++ * @private: Private data for non generic chip callbacks ++ * @installed: bitfield to denote installed interrupts ++ * @unused: bitfield to denote unused interrupts ++ * @domain: irq domain pointer ++ * @list: List head for keeping track of instances ++ * @chip_types: Array of interrupt irq_chip_types ++ * ++ * Note, that irq_chip_generic can have multiple irq_chip_type ++ * implementations which can be associated to a particular irq line of ++ * an irq_chip_generic instance. That allows to share and protect ++ * state in an irq_chip_generic instance when we need to implement ++ * different flow mechanisms (level/edge) for it. ++ */ ++struct irq_chip_generic { ++ raw_spinlock_t lock; ++ void __iomem *reg_base; ++ u32 (*reg_readl)(void __iomem *addr); ++ void (*reg_writel)(u32 val, void __iomem *addr); ++ void (*suspend)(struct irq_chip_generic *gc); ++ void (*resume)(struct irq_chip_generic *gc); ++ unsigned int irq_base; ++ unsigned int irq_cnt; ++ u32 mask_cache; ++ u32 type_cache; ++ u32 polarity_cache; ++ u32 wake_enabled; ++ u32 wake_active; ++ unsigned int num_ct; ++ void *private; ++ unsigned long installed; ++ unsigned long unused; ++ struct irq_domain *domain; ++ struct list_head list; ++ struct irq_chip_type chip_types[0]; ++}; ++ ++/** ++ * enum irq_gc_flags - Initialization flags for generic irq chips ++ * @IRQ_GC_INIT_MASK_CACHE: Initialize the mask_cache by reading mask reg ++ * @IRQ_GC_INIT_NESTED_LOCK: Set the lock class of the irqs to nested for ++ * irq chips which need to call irq_set_wake() on ++ * the parent irq. Usually GPIO implementations ++ * @IRQ_GC_MASK_CACHE_PER_TYPE: Mask cache is chip type private ++ * @IRQ_GC_NO_MASK: Do not calculate irq_data->mask ++ * @IRQ_GC_BE_IO: Use big-endian register accesses (default: LE) ++ */ ++enum irq_gc_flags { ++ IRQ_GC_INIT_MASK_CACHE = 1 << 0, ++ IRQ_GC_INIT_NESTED_LOCK = 1 << 1, ++ IRQ_GC_MASK_CACHE_PER_TYPE = 1 << 2, ++ IRQ_GC_NO_MASK = 1 << 3, ++ IRQ_GC_BE_IO = 1 << 4, ++}; ++ ++/* ++ * struct irq_domain_chip_generic - Generic irq chip data structure for irq domains ++ * @irqs_per_chip: Number of interrupts per chip ++ * @num_chips: Number of chips ++ * @irq_flags_to_set: IRQ* flags to set on irq setup ++ * @irq_flags_to_clear: IRQ* flags to clear on irq setup ++ * @gc_flags: Generic chip specific setup flags ++ * @gc: Array of pointers to generic interrupt chips ++ */ ++struct irq_domain_chip_generic { ++ unsigned int irqs_per_chip; ++ unsigned int num_chips; ++ unsigned int irq_flags_to_clear; ++ unsigned int irq_flags_to_set; ++ enum irq_gc_flags gc_flags; ++ struct irq_chip_generic *gc[0]; ++}; ++ ++/* Generic chip callback functions */ ++void irq_gc_noop(struct irq_data *d); ++void irq_gc_mask_disable_reg(struct irq_data *d); ++void irq_gc_mask_set_bit(struct irq_data *d); ++void irq_gc_mask_clr_bit(struct irq_data *d); ++void irq_gc_unmask_enable_reg(struct irq_data *d); ++void irq_gc_ack_set_bit(struct irq_data *d); ++void irq_gc_ack_clr_bit(struct irq_data *d); ++void irq_gc_mask_disable_and_ack_set(struct irq_data *d); ++void irq_gc_eoi(struct irq_data *d); ++int irq_gc_set_wake(struct irq_data *d, unsigned int on); ++ ++/* Setup functions for irq_chip_generic */ ++int irq_map_generic_chip(struct irq_domain *d, unsigned int virq, ++ irq_hw_number_t hw_irq); ++struct irq_chip_generic * ++irq_alloc_generic_chip(const char *name, int nr_ct, unsigned int irq_base, ++ void __iomem *reg_base, irq_flow_handler_t handler); ++void irq_setup_generic_chip(struct irq_chip_generic *gc, u32 msk, ++ enum irq_gc_flags flags, unsigned int clr, ++ unsigned int set); ++int irq_setup_alt_chip(struct irq_data *d, unsigned int type); ++void irq_remove_generic_chip(struct irq_chip_generic *gc, u32 msk, ++ unsigned int clr, unsigned int set); ++ ++struct irq_chip_generic * ++devm_irq_alloc_generic_chip(struct device *dev, const char *name, int num_ct, ++ unsigned int irq_base, void __iomem *reg_base, ++ irq_flow_handler_t handler); ++int devm_irq_setup_generic_chip(struct device *dev, struct irq_chip_generic *gc, ++ u32 msk, enum irq_gc_flags flags, ++ unsigned int clr, unsigned int set); ++ ++struct irq_chip_generic *irq_get_domain_generic_chip(struct irq_domain *d, unsigned int hw_irq); ++ ++int __irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip, ++ int num_ct, const char *name, ++ irq_flow_handler_t handler, ++ unsigned int clr, unsigned int set, ++ enum irq_gc_flags flags); ++ ++#define irq_alloc_domain_generic_chips(d, irqs_per_chip, num_ct, name, \ ++ handler, clr, set, flags) \ ++({ \ ++ MAYBE_BUILD_BUG_ON(irqs_per_chip > 32); \ ++ __irq_alloc_domain_generic_chips(d, irqs_per_chip, num_ct, name,\ ++ handler, clr, set, flags); \ ++}) ++ ++static inline void irq_free_generic_chip(struct irq_chip_generic *gc) ++{ ++ kfree(gc); ++} ++ ++static inline void irq_destroy_generic_chip(struct irq_chip_generic *gc, ++ u32 msk, unsigned int clr, ++ unsigned int set) ++{ ++ irq_remove_generic_chip(gc, msk, clr, set); ++ irq_free_generic_chip(gc); ++} ++ ++static inline struct irq_chip_type *irq_data_get_chip_type(struct irq_data *d) ++{ ++ return container_of(d->chip, struct irq_chip_type, chip); ++} ++ ++#define IRQ_MSK(n) (u32)((n) < 32 ? ((1 << (n)) - 1) : UINT_MAX) ++ ++#ifdef CONFIG_SMP ++static inline void irq_gc_lock(struct irq_chip_generic *gc) ++{ ++ raw_spin_lock(&gc->lock); ++} ++ ++static inline void irq_gc_unlock(struct irq_chip_generic *gc) ++{ ++ raw_spin_unlock(&gc->lock); ++} ++#else ++static inline void irq_gc_lock(struct irq_chip_generic *gc) { } ++static inline void irq_gc_unlock(struct irq_chip_generic *gc) { } ++#endif ++ ++/* ++ * The irqsave variants are for usage in non interrupt code. Do not use ++ * them in irq_chip callbacks. Use irq_gc_lock() instead. ++ */ ++#define irq_gc_lock_irqsave(gc, flags) \ ++ raw_spin_lock_irqsave(&(gc)->lock, flags) ++ ++#define irq_gc_unlock_irqrestore(gc, flags) \ ++ raw_spin_unlock_irqrestore(&(gc)->lock, flags) ++ ++static inline void irq_reg_writel(struct irq_chip_generic *gc, ++ u32 val, int reg_offset) ++{ ++ if (gc->reg_writel) ++ gc->reg_writel(val, gc->reg_base + reg_offset); ++ else ++ writel(val, gc->reg_base + reg_offset); ++} ++ ++static inline u32 irq_reg_readl(struct irq_chip_generic *gc, ++ int reg_offset) ++{ ++ if (gc->reg_readl) ++ return gc->reg_readl(gc->reg_base + reg_offset); ++ else ++ return readl(gc->reg_base + reg_offset); ++} ++ ++struct irq_matrix; ++struct irq_matrix *irq_alloc_matrix(unsigned int matrix_bits, ++ unsigned int alloc_start, ++ unsigned int alloc_end); ++void irq_matrix_online(struct irq_matrix *m); ++void irq_matrix_offline(struct irq_matrix *m); ++void irq_matrix_assign_system(struct irq_matrix *m, unsigned int bit, bool replace); ++int irq_matrix_reserve_managed(struct irq_matrix *m, const struct cpumask *msk); ++void irq_matrix_remove_managed(struct irq_matrix *m, const struct cpumask *msk); ++int irq_matrix_alloc_managed(struct irq_matrix *m, const struct cpumask *msk, ++ unsigned int *mapped_cpu); ++void irq_matrix_reserve(struct irq_matrix *m); ++void irq_matrix_remove_reserved(struct irq_matrix *m); ++int irq_matrix_alloc(struct irq_matrix *m, const struct cpumask *msk, ++ bool reserved, unsigned int *mapped_cpu); ++void irq_matrix_free(struct irq_matrix *m, unsigned int cpu, ++ unsigned int bit, bool managed); ++void irq_matrix_assign(struct irq_matrix *m, unsigned int bit); ++unsigned int irq_matrix_available(struct irq_matrix *m, bool cpudown); ++unsigned int irq_matrix_allocated(struct irq_matrix *m); ++unsigned int irq_matrix_reserved(struct irq_matrix *m); ++void irq_matrix_debug_show(struct seq_file *sf, struct irq_matrix *m, int ind); ++ ++/* Contrary to Linux irqs, for hardware irqs the irq number 0 is valid */ ++#define INVALID_HWIRQ (~0UL) ++irq_hw_number_t ipi_get_hwirq(unsigned int irq, unsigned int cpu); ++int __ipi_send_single(struct irq_desc *desc, unsigned int cpu); ++int __ipi_send_mask(struct irq_desc *desc, const struct cpumask *dest); ++int ipi_send_single(unsigned int virq, unsigned int cpu); ++int ipi_send_mask(unsigned int virq, const struct cpumask *dest); ++ ++#ifdef CONFIG_GENERIC_IRQ_MULTI_HANDLER ++/* ++ * Registers a generic IRQ handling function as the top-level IRQ handler in ++ * the system, which is generally the first C code called from an assembly ++ * architecture-specific interrupt handler. ++ * ++ * Returns 0 on success, or -EBUSY if an IRQ handler has already been ++ * registered. ++ */ ++int __init set_handle_irq(void (*handle_irq)(struct pt_regs *)); ++ ++/* ++ * Allows interrupt handlers to find the irqchip that's been registered as the ++ * top-level IRQ handler. ++ */ ++extern void (*handle_arch_irq)(struct pt_regs *) __ro_after_init; ++#endif ++ ++#endif /* _LINUX_IRQ_H */ +diff -uprN kernel/include/linux/irq.h.rej kernel_new/include/linux/irq.h.rej +--- kernel/include/linux/irq.h.rej 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/include/linux/irq.h.rej 2021-04-01 18:28:07.802863124 +0800 +@@ -0,0 +1,18 @@ ++--- include/linux/irq.h 2019-12-18 03:36:04.000000000 +0800 +++++ include/linux/irq.h 2021-03-22 09:21:43.212415388 +0800 ++@@ -509,6 +514,7 @@ struct irq_chip { ++ * IRQCHIP_ONESHOT_SAFE: One shot does not require mask/unmask ++ * IRQCHIP_EOI_THREADED: Chip requires eoi() on unmask in threaded mode ++ * IRQCHIP_SUPPORTS_LEVEL_MSI Chip can provide two doorbells for Level MSIs +++ * IRQCHIP_PIPELINE_SAFE: Chip can work in pipelined mode ++ */ ++ enum { ++ IRQCHIP_SET_TYPE_MASKED = (1 << 0), ++@@ -519,6 +525,7 @@ enum { ++ IRQCHIP_ONESHOT_SAFE = (1 << 5), ++ IRQCHIP_EOI_THREADED = (1 << 6), ++ IRQCHIP_SUPPORTS_LEVEL_MSI = (1 << 7), +++ IRQCHIP_PIPELINE_SAFE = (1 << 7), ++ }; ++ ++ #include +diff -uprN kernel/include/linux/irqnr.h kernel_new/include/linux/irqnr.h +--- kernel/include/linux/irqnr.h 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/include/linux/irqnr.h 2021-04-01 18:28:07.802863124 +0800 +@@ -6,7 +6,11 @@ + + + extern int nr_irqs; ++#if !defined(CONFIG_IPIPE) || defined(CONFIG_SPARSE_IRQ) + extern struct irq_desc *irq_to_desc(unsigned int irq); ++#else ++#define irq_to_desc(irq) ({ ipipe_virtual_irq_p(irq) ? NULL : &irq_desc[irq]; }) ++#endif + unsigned int irq_get_next_irq(unsigned int offset); + + # define for_each_irq_desc(irq, desc) \ +diff -uprN kernel/include/linux/kernel.h kernel_new/include/linux/kernel.h +--- kernel/include/linux/kernel.h 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/include/linux/kernel.h 2021-04-01 18:28:07.802863124 +0800 +@@ -14,6 +14,7 @@ + #include + #include + #include ++#include + #include + + #define USHRT_MAX ((u16)(~0U)) +@@ -240,9 +241,12 @@ struct user; + + #ifdef CONFIG_PREEMPT_VOLUNTARY + extern int _cond_resched(void); +-# define might_resched() _cond_resched() ++# define might_resched() do { \ ++ ipipe_root_only(); \ ++ _cond_resched(); \ ++ } while (0) + #else +-# define might_resched() do { } while (0) ++# define might_resched() ipipe_root_only() + #endif + + #ifdef CONFIG_DEBUG_ATOMIC_SLEEP +diff -uprN kernel/include/linux/kvm_host.h kernel_new/include/linux/kvm_host.h +--- kernel/include/linux/kvm_host.h 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/include/linux/kvm_host.h 2021-04-01 18:28:07.802863124 +0800 +@@ -221,6 +221,10 @@ struct kvm_vcpu { + #ifdef CONFIG_PREEMPT_NOTIFIERS + struct preempt_notifier preempt_notifier; + #endif ++#ifdef CONFIG_IPIPE ++ struct ipipe_vm_notifier ipipe_notifier; ++ bool ipipe_put_vcpu; ++#endif + int cpu; + int vcpu_id; + int srcu_idx; +diff -uprN kernel/include/linux/preempt.h kernel_new/include/linux/preempt.h +--- kernel/include/linux/preempt.h 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/include/linux/preempt.h 2021-04-01 18:28:07.802863124 +0800 +@@ -255,7 +255,28 @@ do { \ + + #endif /* CONFIG_PREEMPT_COUNT */ + +-#ifdef MODULE ++#ifdef CONFIG_IPIPE ++#define hard_preempt_disable() \ ++ ({ \ ++ unsigned long __flags__; \ ++ __flags__ = hard_local_irq_save(); \ ++ if (__ipipe_root_p) \ ++ preempt_disable(); \ ++ __flags__; \ ++ }) ++ ++#define hard_preempt_enable(__flags__) \ ++ do { \ ++ if (__ipipe_root_p) { \ ++ preempt_enable_no_resched(); \ ++ hard_local_irq_restore(__flags__); \ ++ if (!hard_irqs_disabled_flags(__flags__)) \ ++ preempt_check_resched(); \ ++ } else \ ++ hard_local_irq_restore(__flags__); \ ++ } while (0) ++ ++#elif defined(MODULE) + /* + * Modules have no business playing preemption tricks. + */ +@@ -263,7 +284,7 @@ do { \ + #undef preempt_enable_no_resched + #undef preempt_enable_no_resched_notrace + #undef preempt_check_resched +-#endif ++#endif /* !IPIPE && MODULE */ + + #define preempt_set_need_resched() \ + do { \ +diff -uprN kernel/include/linux/printk.h kernel_new/include/linux/printk.h +--- kernel/include/linux/printk.h 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/include/linux/printk.h 2021-04-01 18:28:07.802863124 +0800 +@@ -157,6 +157,17 @@ static inline void printk_nmi_direct_ent + static inline void printk_nmi_direct_exit(void) { } + #endif /* PRINTK_NMI */ + ++#ifdef CONFIG_RAW_PRINTK ++void raw_vprintk(const char *fmt, va_list ap); ++asmlinkage __printf(1, 2) ++void raw_printk(const char *fmt, ...); ++#else ++static inline __cold ++void raw_vprintk(const char *s, va_list ap) { } ++static inline __printf(1, 2) __cold ++void raw_printk(const char *s, ...) { } ++#endif ++ + #ifdef CONFIG_PRINTK + extern void printk_safe_enter(void); + extern void printk_safe_exit(void); +diff -uprN kernel/include/linux/printk.h.orig kernel_new/include/linux/printk.h.orig +--- kernel/include/linux/printk.h.orig 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/include/linux/printk.h.orig 2020-12-21 21:59:22.000000000 +0800 +@@ -0,0 +1,573 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++#ifndef __KERNEL_PRINTK__ ++#define __KERNEL_PRINTK__ ++ ++#include ++#include ++#include ++#include ++#include ++ ++extern const char linux_banner[]; ++extern const char linux_proc_banner[]; ++ ++#define PRINTK_MAX_SINGLE_HEADER_LEN 2 ++ ++static inline int printk_get_level(const char *buffer) ++{ ++ if (buffer[0] == KERN_SOH_ASCII && buffer[1]) { ++ switch (buffer[1]) { ++ case '0' ... '7': ++ case 'd': /* KERN_DEFAULT */ ++ case 'c': /* KERN_CONT */ ++ return buffer[1]; ++ } ++ } ++ return 0; ++} ++ ++static inline const char *printk_skip_level(const char *buffer) ++{ ++ if (printk_get_level(buffer)) ++ return buffer + 2; ++ ++ return buffer; ++} ++ ++static inline const char *printk_skip_headers(const char *buffer) ++{ ++ while (printk_get_level(buffer)) ++ buffer = printk_skip_level(buffer); ++ ++ return buffer; ++} ++ ++#define CONSOLE_EXT_LOG_MAX 8192 ++ ++/* printk's without a loglevel use this.. */ ++#define MESSAGE_LOGLEVEL_DEFAULT CONFIG_MESSAGE_LOGLEVEL_DEFAULT ++ ++/* We show everything that is MORE important than this.. */ ++#define CONSOLE_LOGLEVEL_SILENT 0 /* Mum's the word */ ++#define CONSOLE_LOGLEVEL_MIN 1 /* Minimum loglevel we let people use */ ++#define CONSOLE_LOGLEVEL_DEBUG 10 /* issue debug messages */ ++#define CONSOLE_LOGLEVEL_MOTORMOUTH 15 /* You can't shut this one up */ ++ ++/* ++ * Default used to be hard-coded at 7, quiet used to be hardcoded at 4, ++ * we're now allowing both to be set from kernel config. ++ */ ++#define CONSOLE_LOGLEVEL_DEFAULT CONFIG_CONSOLE_LOGLEVEL_DEFAULT ++#define CONSOLE_LOGLEVEL_QUIET CONFIG_CONSOLE_LOGLEVEL_QUIET ++ ++extern int console_printk[]; ++ ++#define console_loglevel (console_printk[0]) ++#define default_message_loglevel (console_printk[1]) ++#define minimum_console_loglevel (console_printk[2]) ++#define default_console_loglevel (console_printk[3]) ++ ++static inline void console_silent(void) ++{ ++ console_loglevel = CONSOLE_LOGLEVEL_SILENT; ++} ++ ++static inline void console_verbose(void) ++{ ++ if (console_loglevel) ++ console_loglevel = CONSOLE_LOGLEVEL_MOTORMOUTH; ++} ++ ++/* strlen("ratelimit") + 1 */ ++#define DEVKMSG_STR_MAX_SIZE 10 ++extern char devkmsg_log_str[]; ++struct ctl_table; ++ ++struct va_format { ++ const char *fmt; ++ va_list *va; ++}; ++ ++/* ++ * FW_BUG ++ * Add this to a message where you are sure the firmware is buggy or behaves ++ * really stupid or out of spec. Be aware that the responsible BIOS developer ++ * should be able to fix this issue or at least get a concrete idea of the ++ * problem by reading your message without the need of looking at the kernel ++ * code. ++ * ++ * Use it for definite and high priority BIOS bugs. ++ * ++ * FW_WARN ++ * Use it for not that clear (e.g. could the kernel messed up things already?) ++ * and medium priority BIOS bugs. ++ * ++ * FW_INFO ++ * Use this one if you want to tell the user or vendor about something ++ * suspicious, but generally harmless related to the firmware. ++ * ++ * Use it for information or very low priority BIOS bugs. ++ */ ++#define FW_BUG "[Firmware Bug]: " ++#define FW_WARN "[Firmware Warn]: " ++#define FW_INFO "[Firmware Info]: " ++ ++/* ++ * HW_ERR ++ * Add this to a message for hardware errors, so that user can report ++ * it to hardware vendor instead of LKML or software vendor. ++ */ ++#define HW_ERR "[Hardware Error]: " ++ ++/* ++ * DEPRECATED ++ * Add this to a message whenever you want to warn user space about the use ++ * of a deprecated aspect of an API so they can stop using it ++ */ ++#define DEPRECATED "[Deprecated]: " ++ ++/* ++ * Dummy printk for disabled debugging statements to use whilst maintaining ++ * gcc's format checking. ++ */ ++#define no_printk(fmt, ...) \ ++({ \ ++ if (0) \ ++ printk(fmt, ##__VA_ARGS__); \ ++ 0; \ ++}) ++ ++#ifdef CONFIG_EARLY_PRINTK ++extern asmlinkage __printf(1, 2) ++void early_printk(const char *fmt, ...); ++#else ++static inline __printf(1, 2) __cold ++void early_printk(const char *s, ...) { } ++#endif ++ ++#ifdef CONFIG_PRINTK_NMI ++extern void printk_nmi_enter(void); ++extern void printk_nmi_exit(void); ++extern void printk_nmi_direct_enter(void); ++extern void printk_nmi_direct_exit(void); ++#else ++static inline void printk_nmi_enter(void) { } ++static inline void printk_nmi_exit(void) { } ++static inline void printk_nmi_direct_enter(void) { } ++static inline void printk_nmi_direct_exit(void) { } ++#endif /* PRINTK_NMI */ ++ ++#ifdef CONFIG_PRINTK ++extern void printk_safe_enter(void); ++extern void printk_safe_exit(void); ++ ++#define printk_safe_enter_irqsave(flags) \ ++ do { \ ++ local_irq_save(flags); \ ++ printk_safe_enter(); \ ++ } while (0) ++ ++#define printk_safe_exit_irqrestore(flags) \ ++ do { \ ++ printk_safe_exit(); \ ++ local_irq_restore(flags); \ ++ } while (0) ++ ++#define printk_safe_enter_irq() \ ++ do { \ ++ local_irq_disable(); \ ++ printk_safe_enter(); \ ++ } while (0) ++ ++#define printk_safe_exit_irq() \ ++ do { \ ++ printk_safe_exit(); \ ++ local_irq_enable(); \ ++ } while (0) ++#else ++/* ++ * On !PRINTK builds we still export console output related locks ++ * and some functions (console_unlock()/tty/etc.), so printk-safe ++ * must preserve the existing local IRQ guarantees. ++ */ ++#define printk_safe_enter_irqsave(flags) local_irq_save(flags) ++#define printk_safe_exit_irqrestore(flags) local_irq_restore(flags) ++ ++#define printk_safe_enter_irq() local_irq_disable() ++#define printk_safe_exit_irq() local_irq_enable() ++#endif ++ ++#ifdef CONFIG_PRINTK ++asmlinkage __printf(5, 0) ++int vprintk_emit(int facility, int level, ++ const char *dict, size_t dictlen, ++ const char *fmt, va_list args); ++ ++asmlinkage __printf(1, 0) ++int vprintk(const char *fmt, va_list args); ++ ++asmlinkage __printf(5, 6) __cold ++int printk_emit(int facility, int level, ++ const char *dict, size_t dictlen, ++ const char *fmt, ...); ++ ++asmlinkage __printf(1, 2) __cold ++int printk(const char *fmt, ...); ++ ++/* ++ * Special printk facility for scheduler/timekeeping use only, _DO_NOT_USE_ ! ++ */ ++__printf(1, 2) __cold int printk_deferred(const char *fmt, ...); ++ ++/* ++ * Please don't use printk_ratelimit(), because it shares ratelimiting state ++ * with all other unrelated printk_ratelimit() callsites. Instead use ++ * printk_ratelimited() or plain old __ratelimit(). ++ */ ++extern int __printk_ratelimit(const char *func); ++#define printk_ratelimit() __printk_ratelimit(__func__) ++extern bool printk_timed_ratelimit(unsigned long *caller_jiffies, ++ unsigned int interval_msec); ++ ++extern int printk_delay_msec; ++extern int dmesg_restrict; ++ ++extern int ++devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write, void __user *buf, ++ size_t *lenp, loff_t *ppos); ++ ++extern void wake_up_klogd(void); ++ ++char *log_buf_addr_get(void); ++u32 log_buf_len_get(void); ++void log_buf_vmcoreinfo_setup(void); ++void __init setup_log_buf(int early); ++__printf(1, 2) void dump_stack_set_arch_desc(const char *fmt, ...); ++void dump_stack_print_info(const char *log_lvl); ++void show_regs_print_info(const char *log_lvl); ++extern asmlinkage void dump_stack(void) __cold; ++extern void printk_safe_flush(void); ++extern void printk_safe_flush_on_panic(void); ++extern void zap_locks(void); ++#else ++static inline __printf(1, 0) ++int vprintk(const char *s, va_list args) ++{ ++ return 0; ++} ++static inline __printf(1, 2) __cold ++int printk(const char *s, ...) ++{ ++ return 0; ++} ++static inline __printf(1, 2) __cold ++int printk_deferred(const char *s, ...) ++{ ++ return 0; ++} ++static inline int printk_ratelimit(void) ++{ ++ return 0; ++} ++static inline bool printk_timed_ratelimit(unsigned long *caller_jiffies, ++ unsigned int interval_msec) ++{ ++ return false; ++} ++ ++static inline void wake_up_klogd(void) ++{ ++} ++ ++static inline char *log_buf_addr_get(void) ++{ ++ return NULL; ++} ++ ++static inline u32 log_buf_len_get(void) ++{ ++ return 0; ++} ++ ++static inline void log_buf_vmcoreinfo_setup(void) ++{ ++} ++ ++static inline void setup_log_buf(int early) ++{ ++} ++ ++static inline __printf(1, 2) void dump_stack_set_arch_desc(const char *fmt, ...) ++{ ++} ++ ++static inline void dump_stack_print_info(const char *log_lvl) ++{ ++} ++ ++static inline void show_regs_print_info(const char *log_lvl) ++{ ++} ++ ++static inline asmlinkage void dump_stack(void) ++{ ++} ++ ++static inline void printk_safe_flush(void) ++{ ++} ++ ++static inline void printk_safe_flush_on_panic(void) ++{ ++} ++ ++static inline void zap_locks(void) ++{ ++} ++#endif ++ ++extern int kptr_restrict; ++ ++#ifndef pr_fmt ++#define pr_fmt(fmt) fmt ++#endif ++ ++/* ++ * These can be used to print at the various log levels. ++ * All of these will print unconditionally, although note that pr_debug() ++ * and other debug macros are compiled out unless either DEBUG is defined ++ * or CONFIG_DYNAMIC_DEBUG is set. ++ */ ++#define pr_emerg(fmt, ...) \ ++ printk(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__) ++#define pr_alert(fmt, ...) \ ++ printk(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__) ++#define pr_crit(fmt, ...) \ ++ printk(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__) ++#define pr_err(fmt, ...) \ ++ printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__) ++#define pr_warning(fmt, ...) \ ++ printk(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__) ++#define pr_warn pr_warning ++#define pr_notice(fmt, ...) \ ++ printk(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__) ++#define pr_info(fmt, ...) \ ++ printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) ++/* ++ * Like KERN_CONT, pr_cont() should only be used when continuing ++ * a line with no newline ('\n') enclosed. Otherwise it defaults ++ * back to KERN_DEFAULT. ++ */ ++#define pr_cont(fmt, ...) \ ++ printk(KERN_CONT fmt, ##__VA_ARGS__) ++ ++/* pr_devel() should produce zero code unless DEBUG is defined */ ++#ifdef DEBUG ++#define pr_devel(fmt, ...) \ ++ printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) ++#else ++#define pr_devel(fmt, ...) \ ++ no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) ++#endif ++ ++ ++/* If you are writing a driver, please use dev_dbg instead */ ++#if defined(CONFIG_DYNAMIC_DEBUG) ++#include ++ ++/* dynamic_pr_debug() uses pr_fmt() internally so we don't need it here */ ++#define pr_debug(fmt, ...) \ ++ dynamic_pr_debug(fmt, ##__VA_ARGS__) ++#elif defined(DEBUG) ++#define pr_debug(fmt, ...) \ ++ printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) ++#else ++#define pr_debug(fmt, ...) \ ++ no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) ++#endif ++ ++/* ++ * Print a one-time message (analogous to WARN_ONCE() et al): ++ */ ++ ++#ifdef CONFIG_PRINTK ++#define printk_once(fmt, ...) \ ++({ \ ++ static bool __print_once __read_mostly; \ ++ bool __ret_print_once = !__print_once; \ ++ \ ++ if (!__print_once) { \ ++ __print_once = true; \ ++ printk(fmt, ##__VA_ARGS__); \ ++ } \ ++ unlikely(__ret_print_once); \ ++}) ++#define printk_deferred_once(fmt, ...) \ ++({ \ ++ static bool __print_once __read_mostly; \ ++ bool __ret_print_once = !__print_once; \ ++ \ ++ if (!__print_once) { \ ++ __print_once = true; \ ++ printk_deferred(fmt, ##__VA_ARGS__); \ ++ } \ ++ unlikely(__ret_print_once); \ ++}) ++#else ++#define printk_once(fmt, ...) \ ++ no_printk(fmt, ##__VA_ARGS__) ++#define printk_deferred_once(fmt, ...) \ ++ no_printk(fmt, ##__VA_ARGS__) ++#endif ++ ++#define pr_emerg_once(fmt, ...) \ ++ printk_once(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__) ++#define pr_alert_once(fmt, ...) \ ++ printk_once(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__) ++#define pr_crit_once(fmt, ...) \ ++ printk_once(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__) ++#define pr_err_once(fmt, ...) \ ++ printk_once(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__) ++#define pr_warn_once(fmt, ...) \ ++ printk_once(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__) ++#define pr_notice_once(fmt, ...) \ ++ printk_once(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__) ++#define pr_info_once(fmt, ...) \ ++ printk_once(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) ++#define pr_cont_once(fmt, ...) \ ++ printk_once(KERN_CONT pr_fmt(fmt), ##__VA_ARGS__) ++ ++#if defined(DEBUG) ++#define pr_devel_once(fmt, ...) \ ++ printk_once(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) ++#else ++#define pr_devel_once(fmt, ...) \ ++ no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) ++#endif ++ ++/* If you are writing a driver, please use dev_dbg instead */ ++#if defined(DEBUG) ++#define pr_debug_once(fmt, ...) \ ++ printk_once(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) ++#else ++#define pr_debug_once(fmt, ...) \ ++ no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) ++#endif ++ ++/* ++ * ratelimited messages with local ratelimit_state, ++ * no local ratelimit_state used in the !PRINTK case ++ */ ++#ifdef CONFIG_PRINTK ++#define printk_ratelimited(fmt, ...) \ ++({ \ ++ static DEFINE_RATELIMIT_STATE(_rs, \ ++ DEFAULT_RATELIMIT_INTERVAL, \ ++ DEFAULT_RATELIMIT_BURST); \ ++ \ ++ if (__ratelimit(&_rs)) \ ++ printk(fmt, ##__VA_ARGS__); \ ++}) ++#else ++#define printk_ratelimited(fmt, ...) \ ++ no_printk(fmt, ##__VA_ARGS__) ++#endif ++ ++#define pr_emerg_ratelimited(fmt, ...) \ ++ printk_ratelimited(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__) ++#define pr_alert_ratelimited(fmt, ...) \ ++ printk_ratelimited(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__) ++#define pr_crit_ratelimited(fmt, ...) \ ++ printk_ratelimited(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__) ++#define pr_err_ratelimited(fmt, ...) \ ++ printk_ratelimited(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__) ++#define pr_warn_ratelimited(fmt, ...) \ ++ printk_ratelimited(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__) ++#define pr_notice_ratelimited(fmt, ...) \ ++ printk_ratelimited(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__) ++#define pr_info_ratelimited(fmt, ...) \ ++ printk_ratelimited(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) ++/* no pr_cont_ratelimited, don't do that... */ ++ ++#if defined(DEBUG) ++#define pr_devel_ratelimited(fmt, ...) \ ++ printk_ratelimited(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) ++#else ++#define pr_devel_ratelimited(fmt, ...) \ ++ no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) ++#endif ++ ++/* If you are writing a driver, please use dev_dbg instead */ ++#if defined(CONFIG_DYNAMIC_DEBUG) ++/* descriptor check is first to prevent flooding with "callbacks suppressed" */ ++#define pr_debug_ratelimited(fmt, ...) \ ++do { \ ++ static DEFINE_RATELIMIT_STATE(_rs, \ ++ DEFAULT_RATELIMIT_INTERVAL, \ ++ DEFAULT_RATELIMIT_BURST); \ ++ DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, pr_fmt(fmt)); \ ++ if (unlikely(descriptor.flags & _DPRINTK_FLAGS_PRINT) && \ ++ __ratelimit(&_rs)) \ ++ __dynamic_pr_debug(&descriptor, pr_fmt(fmt), ##__VA_ARGS__); \ ++} while (0) ++#elif defined(DEBUG) ++#define pr_debug_ratelimited(fmt, ...) \ ++ printk_ratelimited(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) ++#else ++#define pr_debug_ratelimited(fmt, ...) \ ++ no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) ++#endif ++ ++extern const struct file_operations kmsg_fops; ++ ++enum { ++ DUMP_PREFIX_NONE, ++ DUMP_PREFIX_ADDRESS, ++ DUMP_PREFIX_OFFSET ++}; ++extern int hex_dump_to_buffer(const void *buf, size_t len, int rowsize, ++ int groupsize, char *linebuf, size_t linebuflen, ++ bool ascii); ++#ifdef CONFIG_PRINTK ++extern void print_hex_dump(const char *level, const char *prefix_str, ++ int prefix_type, int rowsize, int groupsize, ++ const void *buf, size_t len, bool ascii); ++#if defined(CONFIG_DYNAMIC_DEBUG) ++#define print_hex_dump_bytes(prefix_str, prefix_type, buf, len) \ ++ dynamic_hex_dump(prefix_str, prefix_type, 16, 1, buf, len, true) ++#else ++extern void print_hex_dump_bytes(const char *prefix_str, int prefix_type, ++ const void *buf, size_t len); ++#endif /* defined(CONFIG_DYNAMIC_DEBUG) */ ++#else ++static inline void print_hex_dump(const char *level, const char *prefix_str, ++ int prefix_type, int rowsize, int groupsize, ++ const void *buf, size_t len, bool ascii) ++{ ++} ++static inline void print_hex_dump_bytes(const char *prefix_str, int prefix_type, ++ const void *buf, size_t len) ++{ ++} ++ ++#endif ++ ++#if defined(CONFIG_DYNAMIC_DEBUG) ++#define print_hex_dump_debug(prefix_str, prefix_type, rowsize, \ ++ groupsize, buf, len, ascii) \ ++ dynamic_hex_dump(prefix_str, prefix_type, rowsize, \ ++ groupsize, buf, len, ascii) ++#elif defined(DEBUG) ++#define print_hex_dump_debug(prefix_str, prefix_type, rowsize, \ ++ groupsize, buf, len, ascii) \ ++ print_hex_dump(KERN_DEBUG, prefix_str, prefix_type, rowsize, \ ++ groupsize, buf, len, ascii) ++#else ++static inline void print_hex_dump_debug(const char *prefix_str, int prefix_type, ++ int rowsize, int groupsize, ++ const void *buf, size_t len, bool ascii) ++{ ++} ++#endif ++ ++#endif +diff -uprN kernel/include/linux/rwlock_api_smp.h kernel_new/include/linux/rwlock_api_smp.h +--- kernel/include/linux/rwlock_api_smp.h 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/include/linux/rwlock_api_smp.h 2021-04-01 18:28:07.803863123 +0800 +@@ -141,7 +141,9 @@ static inline int __raw_write_trylock(rw + * even on CONFIG_PREEMPT, because lockdep assumes that interrupts are + * not re-enabled during lock-acquire (which the preempt-spin-ops do): + */ +-#if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC) ++#if !defined(CONFIG_GENERIC_LOCKBREAK) || \ ++ defined(CONFIG_DEBUG_LOCK_ALLOC) || \ ++ defined(CONFIG_IPIPE) + + static inline void __raw_read_lock(rwlock_t *lock) + { +diff -uprN kernel/include/linux/rwlock.h kernel_new/include/linux/rwlock.h +--- kernel/include/linux/rwlock.h 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/include/linux/rwlock.h 2021-04-01 18:28:07.803863123 +0800 +@@ -67,8 +67,8 @@ do { \ + #define read_trylock(lock) __cond_lock(lock, _raw_read_trylock(lock)) + #define write_trylock(lock) __cond_lock(lock, _raw_write_trylock(lock)) + +-#define write_lock(lock) _raw_write_lock(lock) +-#define read_lock(lock) _raw_read_lock(lock) ++#define write_lock(lock) PICK_RWOP(_write_lock, lock) ++#define read_lock(lock) PICK_RWOP(_read_lock, lock) + + #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) + +@@ -102,8 +102,8 @@ do { \ + #define read_lock_bh(lock) _raw_read_lock_bh(lock) + #define write_lock_irq(lock) _raw_write_lock_irq(lock) + #define write_lock_bh(lock) _raw_write_lock_bh(lock) +-#define read_unlock(lock) _raw_read_unlock(lock) +-#define write_unlock(lock) _raw_write_unlock(lock) ++#define read_unlock(lock) PICK_RWOP(_read_unlock, lock) ++#define write_unlock(lock) PICK_RWOP(_write_unlock, lock) + #define read_unlock_irq(lock) _raw_read_unlock_irq(lock) + #define write_unlock_irq(lock) _raw_write_unlock_irq(lock) + +diff -uprN kernel/include/linux/sched/coredump.h kernel_new/include/linux/sched/coredump.h +--- kernel/include/linux/sched/coredump.h 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/include/linux/sched/coredump.h 2021-04-01 18:28:07.803863123 +0800 +@@ -74,6 +74,7 @@ static inline int get_dumpable(struct mm + #define MMF_OOM_REAP_QUEUED 26 /* mm was queued for oom_reaper */ + #define MMF_MULTIPROCESS 27 /* mm is shared between processes */ + #define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP) ++#define MMF_VM_PINNED 31 /* ondemand load up and COW disabled */ + + #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\ + MMF_DISABLE_THP_MASK) +diff -uprN kernel/include/linux/sched/coredump.h.orig kernel_new/include/linux/sched/coredump.h.orig +--- kernel/include/linux/sched/coredump.h.orig 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/include/linux/sched/coredump.h.orig 2020-12-21 21:59:22.000000000 +0800 +@@ -0,0 +1,81 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++#ifndef _LINUX_SCHED_COREDUMP_H ++#define _LINUX_SCHED_COREDUMP_H ++ ++#include ++ ++#define SUID_DUMP_DISABLE 0 /* No setuid dumping */ ++#define SUID_DUMP_USER 1 /* Dump as user of process */ ++#define SUID_DUMP_ROOT 2 /* Dump as root */ ++ ++/* mm flags */ ++ ++/* for SUID_DUMP_* above */ ++#define MMF_DUMPABLE_BITS 2 ++#define MMF_DUMPABLE_MASK ((1 << MMF_DUMPABLE_BITS) - 1) ++ ++extern void set_dumpable(struct mm_struct *mm, int value); ++/* ++ * This returns the actual value of the suid_dumpable flag. For things ++ * that are using this for checking for privilege transitions, it must ++ * test against SUID_DUMP_USER rather than treating it as a boolean ++ * value. ++ */ ++static inline int __get_dumpable(unsigned long mm_flags) ++{ ++ return mm_flags & MMF_DUMPABLE_MASK; ++} ++ ++static inline int get_dumpable(struct mm_struct *mm) ++{ ++ return __get_dumpable(mm->flags); ++} ++ ++/* coredump filter bits */ ++#define MMF_DUMP_ANON_PRIVATE 2 ++#define MMF_DUMP_ANON_SHARED 3 ++#define MMF_DUMP_MAPPED_PRIVATE 4 ++#define MMF_DUMP_MAPPED_SHARED 5 ++#define MMF_DUMP_ELF_HEADERS 6 ++#define MMF_DUMP_HUGETLB_PRIVATE 7 ++#define MMF_DUMP_HUGETLB_SHARED 8 ++#define MMF_DUMP_DAX_PRIVATE 9 ++#define MMF_DUMP_DAX_SHARED 10 ++ ++#define MMF_DUMP_FILTER_SHIFT MMF_DUMPABLE_BITS ++#define MMF_DUMP_FILTER_BITS 9 ++#define MMF_DUMP_FILTER_MASK \ ++ (((1 << MMF_DUMP_FILTER_BITS) - 1) << MMF_DUMP_FILTER_SHIFT) ++#define MMF_DUMP_FILTER_DEFAULT \ ++ ((1 << MMF_DUMP_ANON_PRIVATE) | (1 << MMF_DUMP_ANON_SHARED) |\ ++ (1 << MMF_DUMP_HUGETLB_PRIVATE) | MMF_DUMP_MASK_DEFAULT_ELF) ++ ++#ifdef CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS ++# define MMF_DUMP_MASK_DEFAULT_ELF (1 << MMF_DUMP_ELF_HEADERS) ++#else ++# define MMF_DUMP_MASK_DEFAULT_ELF 0 ++#endif ++ /* leave room for more dump flags */ ++#define MMF_VM_MERGEABLE 16 /* KSM may merge identical pages */ ++#define MMF_VM_HUGEPAGE 17 /* set when VM_HUGEPAGE is set on vma */ ++/* ++ * This one-shot flag is dropped due to necessity of changing exe once again ++ * on NFS restore ++ */ ++//#define MMF_EXE_FILE_CHANGED 18 /* see prctl_set_mm_exe_file() */ ++ ++#define MMF_HAS_UPROBES 19 /* has uprobes */ ++#define MMF_RECALC_UPROBES 20 /* MMF_HAS_UPROBES can be wrong */ ++#define MMF_OOM_SKIP 21 /* mm is of no interest for the OOM killer */ ++#define MMF_UNSTABLE 22 /* mm is unstable for copy_from_user */ ++#define MMF_HUGE_ZERO_PAGE 23 /* mm has ever used the global huge zero page */ ++#define MMF_DISABLE_THP 24 /* disable THP for all VMAs */ ++#define MMF_OOM_VICTIM 25 /* mm is the oom victim */ ++#define MMF_OOM_REAP_QUEUED 26 /* mm was queued for oom_reaper */ ++#define MMF_MULTIPROCESS 27 /* mm is shared between processes */ ++#define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP) ++ ++#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\ ++ MMF_DISABLE_THP_MASK) ++ ++#endif /* _LINUX_SCHED_COREDUMP_H */ +diff -uprN kernel/include/linux/sched.h kernel_new/include/linux/sched.h +--- kernel/include/linux/sched.h 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/include/linux/sched.h 2021-04-01 18:28:07.803863123 +0800 +@@ -84,7 +84,9 @@ struct task_group; + #define TASK_WAKING 0x0200 + #define TASK_NOLOAD 0x0400 + #define TASK_NEW 0x0800 +-#define TASK_STATE_MAX 0x1000 ++#define TASK_HARDENING 0x1000 ++#define TASK_NOWAKEUP 0x2000 ++#define TASK_STATE_MAX 0x4000 + + /* Convenience macros for the sake of set_current_state: */ + #define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE) +diff -uprN kernel/include/linux/sched.h.orig kernel_new/include/linux/sched.h.orig +--- kernel/include/linux/sched.h.orig 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/include/linux/sched.h.orig 2020-12-21 21:59:22.000000000 +0800 +@@ -0,0 +1,1931 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++#ifndef _LINUX_SCHED_H ++#define _LINUX_SCHED_H ++ ++/* ++ * Define 'struct task_struct' and provide the main scheduler ++ * APIs (schedule(), wakeup variants, etc.) ++ */ ++ ++#include ++ ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* task_struct member predeclarations (sorted alphabetically): */ ++struct audit_context; ++struct backing_dev_info; ++struct bio_list; ++struct blk_plug; ++struct cfs_rq; ++struct fs_struct; ++struct futex_pi_state; ++struct io_context; ++struct mempolicy; ++struct nameidata; ++struct nsproxy; ++struct perf_event_context; ++struct pid_namespace; ++struct pipe_inode_info; ++struct rcu_node; ++struct reclaim_state; ++struct robust_list_head; ++struct sched_attr; ++struct sched_param; ++struct seq_file; ++struct sighand_struct; ++struct signal_struct; ++struct task_delay_info; ++struct task_group; ++ ++/* ++ * Task state bitmask. NOTE! These bits are also ++ * encoded in fs/proc/array.c: get_task_state(). ++ * ++ * We have two separate sets of flags: task->state ++ * is about runnability, while task->exit_state are ++ * about the task exiting. Confusing, but this way ++ * modifying one set can't modify the other one by ++ * mistake. ++ */ ++ ++/* Used in tsk->state: */ ++#define TASK_RUNNING 0x0000 ++#define TASK_INTERRUPTIBLE 0x0001 ++#define TASK_UNINTERRUPTIBLE 0x0002 ++#define __TASK_STOPPED 0x0004 ++#define __TASK_TRACED 0x0008 ++/* Used in tsk->exit_state: */ ++#define EXIT_DEAD 0x0010 ++#define EXIT_ZOMBIE 0x0020 ++#define EXIT_TRACE (EXIT_ZOMBIE | EXIT_DEAD) ++/* Used in tsk->state again: */ ++#define TASK_PARKED 0x0040 ++#define TASK_DEAD 0x0080 ++#define TASK_WAKEKILL 0x0100 ++#define TASK_WAKING 0x0200 ++#define TASK_NOLOAD 0x0400 ++#define TASK_NEW 0x0800 ++#define TASK_STATE_MAX 0x1000 ++ ++/* Convenience macros for the sake of set_current_state: */ ++#define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE) ++#define TASK_STOPPED (TASK_WAKEKILL | __TASK_STOPPED) ++#define TASK_TRACED (TASK_WAKEKILL | __TASK_TRACED) ++ ++#define TASK_IDLE (TASK_UNINTERRUPTIBLE | TASK_NOLOAD) ++ ++/* Convenience macros for the sake of wake_up(): */ ++#define TASK_NORMAL (TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE) ++ ++/* get_task_state(): */ ++#define TASK_REPORT (TASK_RUNNING | TASK_INTERRUPTIBLE | \ ++ TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \ ++ __TASK_TRACED | EXIT_DEAD | EXIT_ZOMBIE | \ ++ TASK_PARKED) ++ ++#define task_is_traced(task) ((task->state & __TASK_TRACED) != 0) ++ ++#define task_is_stopped(task) ((task->state & __TASK_STOPPED) != 0) ++ ++#define task_is_stopped_or_traced(task) ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0) ++ ++#define task_contributes_to_load(task) ((task->state & TASK_UNINTERRUPTIBLE) != 0 && \ ++ (task->flags & PF_FROZEN) == 0 && \ ++ (task->state & TASK_NOLOAD) == 0) ++ ++#ifdef CONFIG_DEBUG_ATOMIC_SLEEP ++ ++/* ++ * Special states are those that do not use the normal wait-loop pattern. See ++ * the comment with set_special_state(). ++ */ ++#define is_special_task_state(state) \ ++ ((state) & (__TASK_STOPPED | __TASK_TRACED | TASK_PARKED | TASK_DEAD)) ++ ++#define __set_current_state(state_value) \ ++ do { \ ++ WARN_ON_ONCE(is_special_task_state(state_value));\ ++ current->task_state_change = _THIS_IP_; \ ++ current->state = (state_value); \ ++ } while (0) ++ ++#define set_current_state(state_value) \ ++ do { \ ++ WARN_ON_ONCE(is_special_task_state(state_value));\ ++ current->task_state_change = _THIS_IP_; \ ++ smp_store_mb(current->state, (state_value)); \ ++ } while (0) ++ ++#define set_special_state(state_value) \ ++ do { \ ++ unsigned long flags; /* may shadow */ \ ++ WARN_ON_ONCE(!is_special_task_state(state_value)); \ ++ raw_spin_lock_irqsave(¤t->pi_lock, flags); \ ++ current->task_state_change = _THIS_IP_; \ ++ current->state = (state_value); \ ++ raw_spin_unlock_irqrestore(¤t->pi_lock, flags); \ ++ } while (0) ++#else ++/* ++ * set_current_state() includes a barrier so that the write of current->state ++ * is correctly serialised wrt the caller's subsequent test of whether to ++ * actually sleep: ++ * ++ * for (;;) { ++ * set_current_state(TASK_UNINTERRUPTIBLE); ++ * if (!need_sleep) ++ * break; ++ * ++ * schedule(); ++ * } ++ * __set_current_state(TASK_RUNNING); ++ * ++ * If the caller does not need such serialisation (because, for instance, the ++ * condition test and condition change and wakeup are under the same lock) then ++ * use __set_current_state(). ++ * ++ * The above is typically ordered against the wakeup, which does: ++ * ++ * need_sleep = false; ++ * wake_up_state(p, TASK_UNINTERRUPTIBLE); ++ * ++ * where wake_up_state() executes a full memory barrier before accessing the ++ * task state. ++ * ++ * Wakeup will do: if (@state & p->state) p->state = TASK_RUNNING, that is, ++ * once it observes the TASK_UNINTERRUPTIBLE store the waking CPU can issue a ++ * TASK_RUNNING store which can collide with __set_current_state(TASK_RUNNING). ++ * ++ * However, with slightly different timing the wakeup TASK_RUNNING store can ++ * also collide with the TASK_UNINTERRUPTIBLE store. Loosing that store is not ++ * a problem either because that will result in one extra go around the loop ++ * and our @cond test will save the day. ++ * ++ * Also see the comments of try_to_wake_up(). ++ */ ++#define __set_current_state(state_value) \ ++ current->state = (state_value) ++ ++#define set_current_state(state_value) \ ++ smp_store_mb(current->state, (state_value)) ++ ++/* ++ * set_special_state() should be used for those states when the blocking task ++ * can not use the regular condition based wait-loop. In that case we must ++ * serialize against wakeups such that any possible in-flight TASK_RUNNING stores ++ * will not collide with our state change. ++ */ ++#define set_special_state(state_value) \ ++ do { \ ++ unsigned long flags; /* may shadow */ \ ++ raw_spin_lock_irqsave(¤t->pi_lock, flags); \ ++ current->state = (state_value); \ ++ raw_spin_unlock_irqrestore(¤t->pi_lock, flags); \ ++ } while (0) ++ ++#endif ++ ++/* Task command name length: */ ++#define TASK_COMM_LEN 16 ++ ++extern void scheduler_tick(void); ++ ++#define MAX_SCHEDULE_TIMEOUT LONG_MAX ++ ++extern long schedule_timeout(long timeout); ++extern long schedule_timeout_interruptible(long timeout); ++extern long schedule_timeout_killable(long timeout); ++extern long schedule_timeout_uninterruptible(long timeout); ++extern long schedule_timeout_idle(long timeout); ++asmlinkage void schedule(void); ++extern void schedule_preempt_disabled(void); ++ ++extern int __must_check io_schedule_prepare(void); ++extern void io_schedule_finish(int token); ++extern long io_schedule_timeout(long timeout); ++extern void io_schedule(void); ++ ++/** ++ * struct prev_cputime - snapshot of system and user cputime ++ * @utime: time spent in user mode ++ * @stime: time spent in system mode ++ * @lock: protects the above two fields ++ * ++ * Stores previous user/system time values such that we can guarantee ++ * monotonicity. ++ */ ++struct prev_cputime { ++#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE ++ u64 utime; ++ u64 stime; ++ raw_spinlock_t lock; ++#endif ++}; ++ ++/** ++ * struct task_cputime - collected CPU time counts ++ * @utime: time spent in user mode, in nanoseconds ++ * @stime: time spent in kernel mode, in nanoseconds ++ * @sum_exec_runtime: total time spent on the CPU, in nanoseconds ++ * ++ * This structure groups together three kinds of CPU time that are tracked for ++ * threads and thread groups. Most things considering CPU time want to group ++ * these counts together and treat all three of them in parallel. ++ */ ++struct task_cputime { ++ u64 utime; ++ u64 stime; ++ unsigned long long sum_exec_runtime; ++}; ++ ++/* Alternate field names when used on cache expirations: */ ++#define virt_exp utime ++#define prof_exp stime ++#define sched_exp sum_exec_runtime ++ ++enum vtime_state { ++ /* Task is sleeping or running in a CPU with VTIME inactive: */ ++ VTIME_INACTIVE = 0, ++ /* Task runs in userspace in a CPU with VTIME active: */ ++ VTIME_USER, ++ /* Task runs in kernelspace in a CPU with VTIME active: */ ++ VTIME_SYS, ++}; ++ ++struct vtime { ++ seqcount_t seqcount; ++ unsigned long long starttime; ++ enum vtime_state state; ++ u64 utime; ++ u64 stime; ++ u64 gtime; ++}; ++ ++struct sched_info { ++#ifdef CONFIG_SCHED_INFO ++ /* Cumulative counters: */ ++ ++ /* # of times we have run on this CPU: */ ++ unsigned long pcount; ++ ++ /* Time spent waiting on a runqueue: */ ++ unsigned long long run_delay; ++ ++ /* Timestamps: */ ++ ++ /* When did we last run on a CPU? */ ++ unsigned long long last_arrival; ++ ++ /* When were we last queued to run? */ ++ unsigned long long last_queued; ++ ++#endif /* CONFIG_SCHED_INFO */ ++}; ++ ++/* ++ * Integer metrics need fixed point arithmetic, e.g., sched/fair ++ * has a few: load, load_avg, util_avg, freq, and capacity. ++ * ++ * We define a basic fixed point arithmetic range, and then formalize ++ * all these metrics based on that basic range. ++ */ ++# define SCHED_FIXEDPOINT_SHIFT 10 ++# define SCHED_FIXEDPOINT_SCALE (1L << SCHED_FIXEDPOINT_SHIFT) ++ ++struct load_weight { ++ unsigned long weight; ++ u32 inv_weight; ++}; ++ ++/** ++ * struct util_est - Estimation utilization of FAIR tasks ++ * @enqueued: instantaneous estimated utilization of a task/cpu ++ * @ewma: the Exponential Weighted Moving Average (EWMA) ++ * utilization of a task ++ * ++ * Support data structure to track an Exponential Weighted Moving Average ++ * (EWMA) of a FAIR task's utilization. New samples are added to the moving ++ * average each time a task completes an activation. Sample's weight is chosen ++ * so that the EWMA will be relatively insensitive to transient changes to the ++ * task's workload. ++ * ++ * The enqueued attribute has a slightly different meaning for tasks and cpus: ++ * - task: the task's util_avg at last task dequeue time ++ * - cfs_rq: the sum of util_est.enqueued for each RUNNABLE task on that CPU ++ * Thus, the util_est.enqueued of a task represents the contribution on the ++ * estimated utilization of the CPU where that task is currently enqueued. ++ * ++ * Only for tasks we track a moving average of the past instantaneous ++ * estimated utilization. This allows to absorb sporadic drops in utilization ++ * of an otherwise almost periodic task. ++ */ ++struct util_est { ++ unsigned int enqueued; ++ unsigned int ewma; ++#define UTIL_EST_WEIGHT_SHIFT 2 ++} __attribute__((__aligned__(sizeof(u64)))); ++ ++/* ++ * The load_avg/util_avg accumulates an infinite geometric series ++ * (see __update_load_avg() in kernel/sched/fair.c). ++ * ++ * [load_avg definition] ++ * ++ * load_avg = runnable% * scale_load_down(load) ++ * ++ * where runnable% is the time ratio that a sched_entity is runnable. ++ * For cfs_rq, it is the aggregated load_avg of all runnable and ++ * blocked sched_entities. ++ * ++ * load_avg may also take frequency scaling into account: ++ * ++ * load_avg = runnable% * scale_load_down(load) * freq% ++ * ++ * where freq% is the CPU frequency normalized to the highest frequency. ++ * ++ * [util_avg definition] ++ * ++ * util_avg = running% * SCHED_CAPACITY_SCALE ++ * ++ * where running% is the time ratio that a sched_entity is running on ++ * a CPU. For cfs_rq, it is the aggregated util_avg of all runnable ++ * and blocked sched_entities. ++ * ++ * util_avg may also factor frequency scaling and CPU capacity scaling: ++ * ++ * util_avg = running% * SCHED_CAPACITY_SCALE * freq% * capacity% ++ * ++ * where freq% is the same as above, and capacity% is the CPU capacity ++ * normalized to the greatest capacity (due to uarch differences, etc). ++ * ++ * N.B., the above ratios (runnable%, running%, freq%, and capacity%) ++ * themselves are in the range of [0, 1]. To do fixed point arithmetics, ++ * we therefore scale them to as large a range as necessary. This is for ++ * example reflected by util_avg's SCHED_CAPACITY_SCALE. ++ * ++ * [Overflow issue] ++ * ++ * The 64-bit load_sum can have 4353082796 (=2^64/47742/88761) entities ++ * with the highest load (=88761), always runnable on a single cfs_rq, ++ * and should not overflow as the number already hits PID_MAX_LIMIT. ++ * ++ * For all other cases (including 32-bit kernels), struct load_weight's ++ * weight will overflow first before we do, because: ++ * ++ * Max(load_avg) <= Max(load.weight) ++ * ++ * Then it is the load_weight's responsibility to consider overflow ++ * issues. ++ */ ++struct sched_avg { ++ u64 last_update_time; ++ u64 load_sum; ++ u64 runnable_load_sum; ++ u32 util_sum; ++ u32 period_contrib; ++ unsigned long load_avg; ++ unsigned long runnable_load_avg; ++ unsigned long util_avg; ++ struct util_est util_est; ++} ____cacheline_aligned; ++ ++struct sched_statistics { ++#ifdef CONFIG_SCHEDSTATS ++ u64 wait_start; ++ u64 wait_max; ++ u64 wait_count; ++ u64 wait_sum; ++ u64 iowait_count; ++ u64 iowait_sum; ++ ++ u64 sleep_start; ++ u64 sleep_max; ++ s64 sum_sleep_runtime; ++ ++ u64 block_start; ++ u64 block_max; ++ u64 exec_max; ++ u64 slice_max; ++ ++ u64 nr_migrations_cold; ++ u64 nr_failed_migrations_affine; ++ u64 nr_failed_migrations_running; ++ u64 nr_failed_migrations_hot; ++ u64 nr_forced_migrations; ++ ++ u64 nr_wakeups; ++ u64 nr_wakeups_sync; ++ u64 nr_wakeups_migrate; ++ u64 nr_wakeups_local; ++ u64 nr_wakeups_remote; ++ u64 nr_wakeups_affine; ++ u64 nr_wakeups_affine_attempts; ++ u64 nr_wakeups_passive; ++ u64 nr_wakeups_idle; ++#endif ++}; ++ ++struct sched_entity { ++ /* For load-balancing: */ ++ struct load_weight load; ++ unsigned long runnable_weight; ++ struct rb_node run_node; ++ struct list_head group_node; ++ unsigned int on_rq; ++ ++ u64 exec_start; ++ u64 sum_exec_runtime; ++ u64 vruntime; ++ u64 prev_sum_exec_runtime; ++ ++ u64 nr_migrations; ++ ++ struct sched_statistics statistics; ++ ++#ifdef CONFIG_FAIR_GROUP_SCHED ++ int depth; ++ struct sched_entity *parent; ++ /* rq on which this entity is (to be) queued: */ ++ struct cfs_rq *cfs_rq; ++ /* rq "owned" by this entity/group: */ ++ struct cfs_rq *my_q; ++#endif ++ ++#ifdef CONFIG_SMP ++ /* ++ * Per entity load average tracking. ++ * ++ * Put into separate cache line so it does not ++ * collide with read-mostly values above. ++ */ ++ struct sched_avg avg; ++#endif ++ ++ KABI_RESERVE(1) ++ KABI_RESERVE(2) ++ KABI_RESERVE(3) ++ KABI_RESERVE(4) ++}; ++ ++struct sched_rt_entity { ++ struct list_head run_list; ++ unsigned long timeout; ++ unsigned long watchdog_stamp; ++ unsigned int time_slice; ++ unsigned short on_rq; ++ unsigned short on_list; ++ ++ struct sched_rt_entity *back; ++#ifdef CONFIG_RT_GROUP_SCHED ++ struct sched_rt_entity *parent; ++ /* rq on which this entity is (to be) queued: */ ++ struct rt_rq *rt_rq; ++ /* rq "owned" by this entity/group: */ ++ struct rt_rq *my_q; ++#endif ++} __randomize_layout; ++ ++struct sched_dl_entity { ++ struct rb_node rb_node; ++ ++ /* ++ * Original scheduling parameters. Copied here from sched_attr ++ * during sched_setattr(), they will remain the same until ++ * the next sched_setattr(). ++ */ ++ u64 dl_runtime; /* Maximum runtime for each instance */ ++ u64 dl_deadline; /* Relative deadline of each instance */ ++ u64 dl_period; /* Separation of two instances (period) */ ++ u64 dl_bw; /* dl_runtime / dl_period */ ++ u64 dl_density; /* dl_runtime / dl_deadline */ ++ ++ /* ++ * Actual scheduling parameters. Initialized with the values above, ++ * they are continously updated during task execution. Note that ++ * the remaining runtime could be < 0 in case we are in overrun. ++ */ ++ s64 runtime; /* Remaining runtime for this instance */ ++ u64 deadline; /* Absolute deadline for this instance */ ++ unsigned int flags; /* Specifying the scheduler behaviour */ ++ ++ /* ++ * Some bool flags: ++ * ++ * @dl_throttled tells if we exhausted the runtime. If so, the ++ * task has to wait for a replenishment to be performed at the ++ * next firing of dl_timer. ++ * ++ * @dl_boosted tells if we are boosted due to DI. If so we are ++ * outside bandwidth enforcement mechanism (but only until we ++ * exit the critical section); ++ * ++ * @dl_yielded tells if task gave up the CPU before consuming ++ * all its available runtime during the last job. ++ * ++ * @dl_non_contending tells if the task is inactive while still ++ * contributing to the active utilization. In other words, it ++ * indicates if the inactive timer has been armed and its handler ++ * has not been executed yet. This flag is useful to avoid race ++ * conditions between the inactive timer handler and the wakeup ++ * code. ++ * ++ * @dl_overrun tells if the task asked to be informed about runtime ++ * overruns. ++ */ ++ unsigned int dl_throttled : 1; ++ unsigned int dl_boosted : 1; ++ unsigned int dl_yielded : 1; ++ unsigned int dl_non_contending : 1; ++ unsigned int dl_overrun : 1; ++ ++ /* ++ * Bandwidth enforcement timer. Each -deadline task has its ++ * own bandwidth to be enforced, thus we need one timer per task. ++ */ ++ struct hrtimer dl_timer; ++ ++ /* ++ * Inactive timer, responsible for decreasing the active utilization ++ * at the "0-lag time". When a -deadline task blocks, it contributes ++ * to GRUB's active utilization until the "0-lag time", hence a ++ * timer is needed to decrease the active utilization at the correct ++ * time. ++ */ ++ struct hrtimer inactive_timer; ++}; ++ ++union rcu_special { ++ struct { ++ u8 blocked; ++ u8 need_qs; ++ u8 exp_need_qs; ++ ++ /* Otherwise the compiler can store garbage here: */ ++ u8 pad; ++ } b; /* Bits. */ ++ u32 s; /* Set of bits. */ ++}; ++ ++enum perf_event_task_context { ++ perf_invalid_context = -1, ++ perf_hw_context = 0, ++ perf_sw_context, ++ perf_nr_task_contexts, ++}; ++ ++struct wake_q_node { ++ struct wake_q_node *next; ++}; ++ ++struct task_struct { ++#ifdef CONFIG_THREAD_INFO_IN_TASK ++ /* ++ * For reasons of header soup (see current_thread_info()), this ++ * must be the first element of task_struct. ++ */ ++ struct thread_info thread_info; ++#endif ++ /* -1 unrunnable, 0 runnable, >0 stopped: */ ++ volatile long state; ++ ++ /* ++ * This begins the randomizable portion of task_struct. Only ++ * scheduling-critical items should be added above here. ++ */ ++ randomized_struct_fields_start ++ ++ void *stack; ++ atomic_t usage; ++ /* Per task flags (PF_*), defined further below: */ ++ unsigned int flags; ++ unsigned int ptrace; ++ ++#ifdef CONFIG_SMP ++ struct llist_node wake_entry; ++ int on_cpu; ++#ifdef CONFIG_THREAD_INFO_IN_TASK ++ /* Current CPU: */ ++ unsigned int cpu; ++#endif ++ unsigned int wakee_flips; ++ unsigned long wakee_flip_decay_ts; ++ struct task_struct *last_wakee; ++ ++ /* ++ * recent_used_cpu is initially set as the last CPU used by a task ++ * that wakes affine another task. Waker/wakee relationships can ++ * push tasks around a CPU where each wakeup moves to the next one. ++ * Tracking a recently used CPU allows a quick search for a recently ++ * used CPU that may be idle. ++ */ ++ int recent_used_cpu; ++ int wake_cpu; ++#endif ++ int on_rq; ++ ++ int prio; ++ int static_prio; ++ int normal_prio; ++ unsigned int rt_priority; ++ ++ const struct sched_class *sched_class; ++ struct sched_entity se; ++ struct sched_rt_entity rt; ++#ifdef CONFIG_CGROUP_SCHED ++ struct task_group *sched_task_group; ++#endif ++ struct sched_dl_entity dl; ++ ++#ifdef CONFIG_PREEMPT_NOTIFIERS ++ /* List of struct preempt_notifier: */ ++ struct hlist_head preempt_notifiers; ++#endif ++ ++#ifdef CONFIG_BLK_DEV_IO_TRACE ++ unsigned int btrace_seq; ++#endif ++ ++ unsigned int policy; ++ int nr_cpus_allowed; ++ cpumask_t cpus_allowed; ++ ++#ifdef CONFIG_PREEMPT_RCU ++ int rcu_read_lock_nesting; ++ union rcu_special rcu_read_unlock_special; ++ struct list_head rcu_node_entry; ++ struct rcu_node *rcu_blocked_node; ++#endif /* #ifdef CONFIG_PREEMPT_RCU */ ++ ++#ifdef CONFIG_TASKS_RCU ++ unsigned long rcu_tasks_nvcsw; ++ u8 rcu_tasks_holdout; ++ u8 rcu_tasks_idx; ++ int rcu_tasks_idle_cpu; ++ struct list_head rcu_tasks_holdout_list; ++#endif /* #ifdef CONFIG_TASKS_RCU */ ++ ++ struct sched_info sched_info; ++ ++ struct list_head tasks; ++#ifdef CONFIG_SMP ++ struct plist_node pushable_tasks; ++ struct rb_node pushable_dl_tasks; ++#endif ++ ++ struct mm_struct *mm; ++ struct mm_struct *active_mm; ++ ++ /* Per-thread vma caching: */ ++ struct vmacache vmacache; ++ ++#ifdef SPLIT_RSS_COUNTING ++ struct task_rss_stat rss_stat; ++#endif ++ int exit_state; ++ int exit_code; ++ int exit_signal; ++ /* The signal sent when the parent dies: */ ++ int pdeath_signal; ++ /* JOBCTL_*, siglock protected: */ ++ unsigned long jobctl; ++ ++ /* Used for emulating ABI behavior of previous Linux versions: */ ++ unsigned int personality; ++ ++ /* Scheduler bits, serialized by scheduler locks: */ ++ unsigned sched_reset_on_fork:1; ++ unsigned sched_contributes_to_load:1; ++ unsigned sched_migrated:1; ++ unsigned sched_remote_wakeup:1; ++ /* Force alignment to the next boundary: */ ++ unsigned :0; ++ ++ /* Unserialized, strictly 'current' */ ++ ++ /* Bit to tell LSMs we're in execve(): */ ++ unsigned in_execve:1; ++ unsigned in_iowait:1; ++#ifndef TIF_RESTORE_SIGMASK ++ unsigned restore_sigmask:1; ++#endif ++#ifdef CONFIG_MEMCG ++ unsigned in_user_fault:1; ++#ifdef CONFIG_MEMCG_KMEM ++ unsigned memcg_kmem_skip_account:1; ++#endif ++#endif ++#ifdef CONFIG_COMPAT_BRK ++ unsigned brk_randomized:1; ++#endif ++#ifdef CONFIG_CGROUPS ++ /* disallow userland-initiated cgroup migration */ ++ unsigned no_cgroup_migration:1; ++#endif ++#ifdef CONFIG_BLK_CGROUP ++ /* to be used once the psi infrastructure lands upstream. */ ++ unsigned use_memdelay:1; ++#endif ++ ++ unsigned long atomic_flags; /* Flags requiring atomic access. */ ++ ++ struct restart_block restart_block; ++ ++ pid_t pid; ++ pid_t tgid; ++ ++#ifdef CONFIG_STACKPROTECTOR ++ /* Canary value for the -fstack-protector GCC feature: */ ++ unsigned long stack_canary; ++#endif ++ /* ++ * Pointers to the (original) parent process, youngest child, younger sibling, ++ * older sibling, respectively. (p->father can be replaced with ++ * p->real_parent->pid) ++ */ ++ ++ /* Real parent process: */ ++ struct task_struct __rcu *real_parent; ++ ++ /* Recipient of SIGCHLD, wait4() reports: */ ++ struct task_struct __rcu *parent; ++ ++ /* ++ * Children/sibling form the list of natural children: ++ */ ++ struct list_head children; ++ struct list_head sibling; ++ struct task_struct *group_leader; ++ ++ /* ++ * 'ptraced' is the list of tasks this task is using ptrace() on. ++ * ++ * This includes both natural children and PTRACE_ATTACH targets. ++ * 'ptrace_entry' is this task's link on the p->parent->ptraced list. ++ */ ++ struct list_head ptraced; ++ struct list_head ptrace_entry; ++ ++ /* PID/PID hash table linkage. */ ++ struct pid *thread_pid; ++ struct hlist_node pid_links[PIDTYPE_MAX]; ++ struct list_head thread_group; ++ struct list_head thread_node; ++ ++ struct completion *vfork_done; ++ ++ /* CLONE_CHILD_SETTID: */ ++ int __user *set_child_tid; ++ ++ /* CLONE_CHILD_CLEARTID: */ ++ int __user *clear_child_tid; ++ ++ u64 utime; ++ u64 stime; ++#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME ++ u64 utimescaled; ++ u64 stimescaled; ++#endif ++ u64 gtime; ++ struct prev_cputime prev_cputime; ++#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN ++ struct vtime vtime; ++#endif ++ ++#ifdef CONFIG_NO_HZ_FULL ++ atomic_t tick_dep_mask; ++#endif ++ /* Context switch counts: */ ++ unsigned long nvcsw; ++ unsigned long nivcsw; ++ ++ /* Monotonic time in nsecs: */ ++ u64 start_time; ++ ++ /* Boot based time in nsecs: */ ++ u64 real_start_time; ++ ++ /* MM fault and swap info: this can arguably be seen as either mm-specific or thread-specific: */ ++ unsigned long min_flt; ++ unsigned long maj_flt; ++ ++#ifdef CONFIG_POSIX_TIMERS ++ struct task_cputime cputime_expires; ++ struct list_head cpu_timers[3]; ++#endif ++ ++ /* Process credentials: */ ++ ++ /* Tracer's credentials at attach: */ ++ const struct cred __rcu *ptracer_cred; ++ ++ /* Objective and real subjective task credentials (COW): */ ++ const struct cred __rcu *real_cred; ++ ++ /* Effective (overridable) subjective task credentials (COW): */ ++ const struct cred __rcu *cred; ++ ++ /* ++ * executable name, excluding path. ++ * ++ * - normally initialized setup_new_exec() ++ * - access it with [gs]et_task_comm() ++ * - lock it with task_lock() ++ */ ++ char comm[TASK_COMM_LEN]; ++ ++ struct nameidata *nameidata; ++ ++#ifdef CONFIG_SYSVIPC ++ struct sysv_sem sysvsem; ++ struct sysv_shm sysvshm; ++#endif ++#ifdef CONFIG_DETECT_HUNG_TASK ++ unsigned long last_switch_count; ++ unsigned long last_switch_time; ++#endif ++ /* Filesystem information: */ ++ struct fs_struct *fs; ++ ++ /* Open file information: */ ++ struct files_struct *files; ++ ++ /* Namespaces: */ ++ struct nsproxy *nsproxy; ++ ++ /* Signal handlers: */ ++ struct signal_struct *signal; ++ struct sighand_struct *sighand; ++ sigset_t blocked; ++ sigset_t real_blocked; ++ /* Restored if set_restore_sigmask() was used: */ ++ sigset_t saved_sigmask; ++ struct sigpending pending; ++ unsigned long sas_ss_sp; ++ size_t sas_ss_size; ++ unsigned int sas_ss_flags; ++ ++ struct callback_head *task_works; ++ ++ struct audit_context *audit_context; ++#ifdef CONFIG_AUDITSYSCALL ++ kuid_t loginuid; ++ unsigned int sessionid; ++#endif ++ struct seccomp seccomp; ++ ++ /* Thread group tracking: */ ++ u32 parent_exec_id; ++ u32 self_exec_id; ++ ++ /* Protection against (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed, mempolicy: */ ++ spinlock_t alloc_lock; ++ ++ /* Protection of the PI data structures: */ ++ raw_spinlock_t pi_lock; ++ ++ struct wake_q_node wake_q; ++ ++#ifdef CONFIG_RT_MUTEXES ++ /* PI waiters blocked on a rt_mutex held by this task: */ ++ struct rb_root_cached pi_waiters; ++ /* Updated under owner's pi_lock and rq lock */ ++ struct task_struct *pi_top_task; ++ /* Deadlock detection and priority inheritance handling: */ ++ struct rt_mutex_waiter *pi_blocked_on; ++#endif ++ ++#ifdef CONFIG_DEBUG_MUTEXES ++ /* Mutex deadlock detection: */ ++ struct mutex_waiter *blocked_on; ++#endif ++ ++#ifdef CONFIG_TRACE_IRQFLAGS ++ unsigned int irq_events; ++ unsigned long hardirq_enable_ip; ++ unsigned long hardirq_disable_ip; ++ unsigned int hardirq_enable_event; ++ unsigned int hardirq_disable_event; ++ int hardirqs_enabled; ++ int hardirq_context; ++ unsigned long softirq_disable_ip; ++ unsigned long softirq_enable_ip; ++ unsigned int softirq_disable_event; ++ unsigned int softirq_enable_event; ++ int softirqs_enabled; ++ int softirq_context; ++#endif ++ ++#ifdef CONFIG_LOCKDEP ++# define MAX_LOCK_DEPTH 48UL ++ u64 curr_chain_key; ++ int lockdep_depth; ++ unsigned int lockdep_recursion; ++ struct held_lock held_locks[MAX_LOCK_DEPTH]; ++#endif ++ ++#ifdef CONFIG_UBSAN ++ unsigned int in_ubsan; ++#endif ++ ++ /* Journalling filesystem info: */ ++ void *journal_info; ++ ++ /* Stacked block device info: */ ++ struct bio_list *bio_list; ++ ++#ifdef CONFIG_BLOCK ++ /* Stack plugging: */ ++ struct blk_plug *plug; ++#endif ++ ++ /* VM state: */ ++ struct reclaim_state *reclaim_state; ++ ++ struct backing_dev_info *backing_dev_info; ++ ++ struct io_context *io_context; ++ ++ /* Ptrace state: */ ++ unsigned long ptrace_message; ++ siginfo_t *last_siginfo; ++ ++ struct task_io_accounting ioac; ++#ifdef CONFIG_TASK_XACCT ++ /* Accumulated RSS usage: */ ++ u64 acct_rss_mem1; ++ /* Accumulated virtual memory usage: */ ++ u64 acct_vm_mem1; ++ /* stime + utime since last update: */ ++ u64 acct_timexpd; ++#endif ++#ifdef CONFIG_CPUSETS ++ /* Protected by ->alloc_lock: */ ++ nodemask_t mems_allowed; ++ /* Seqence number to catch updates: */ ++ seqcount_t mems_allowed_seq; ++ int cpuset_mem_spread_rotor; ++ int cpuset_slab_spread_rotor; ++#endif ++#ifdef CONFIG_CGROUPS ++ /* Control Group info protected by css_set_lock: */ ++ struct css_set __rcu *cgroups; ++ /* cg_list protected by css_set_lock and tsk->alloc_lock: */ ++ struct list_head cg_list; ++#endif ++#if defined(CONFIG_RESCTRL) || defined(CONFIG_INTEL_RDT) ++ u32 closid; ++ u32 rmid; ++#endif ++#ifdef CONFIG_FUTEX ++ struct robust_list_head __user *robust_list; ++#ifdef CONFIG_COMPAT ++ struct compat_robust_list_head __user *compat_robust_list; ++#endif ++ struct list_head pi_state_list; ++ struct futex_pi_state *pi_state_cache; ++#endif ++#ifdef CONFIG_PERF_EVENTS ++ struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts]; ++ struct mutex perf_event_mutex; ++ struct list_head perf_event_list; ++#endif ++#ifdef CONFIG_DEBUG_PREEMPT ++ unsigned long preempt_disable_ip; ++#endif ++#ifdef CONFIG_NUMA ++ /* Protected by alloc_lock: */ ++ struct mempolicy *mempolicy; ++ short il_prev; ++ short pref_node_fork; ++#endif ++#ifdef CONFIG_NUMA_BALANCING ++ int numa_scan_seq; ++ unsigned int numa_scan_period; ++ unsigned int numa_scan_period_max; ++ int numa_preferred_nid; ++ unsigned long numa_migrate_retry; ++ /* Migration stamp: */ ++ u64 node_stamp; ++ u64 last_task_numa_placement; ++ u64 last_sum_exec_runtime; ++ struct callback_head numa_work; ++ ++ /* ++ * This pointer is only modified for current in syscall and ++ * pagefault context (and for tasks being destroyed), so it can be read ++ * from any of the following contexts: ++ * - RCU read-side critical section ++ * - current->numa_group from everywhere ++ * - task's runqueue locked, task not running ++ */ ++ struct numa_group __rcu *numa_group; ++ ++ /* ++ * numa_faults is an array split into four regions: ++ * faults_memory, faults_cpu, faults_memory_buffer, faults_cpu_buffer ++ * in this precise order. ++ * ++ * faults_memory: Exponential decaying average of faults on a per-node ++ * basis. Scheduling placement decisions are made based on these ++ * counts. The values remain static for the duration of a PTE scan. ++ * faults_cpu: Track the nodes the process was running on when a NUMA ++ * hinting fault was incurred. ++ * faults_memory_buffer and faults_cpu_buffer: Record faults per node ++ * during the current scan window. When the scan completes, the counts ++ * in faults_memory and faults_cpu decay and these values are copied. ++ */ ++ unsigned long *numa_faults; ++ unsigned long total_numa_faults; ++ ++ /* ++ * numa_faults_locality tracks if faults recorded during the last ++ * scan window were remote/local or failed to migrate. The task scan ++ * period is adapted based on the locality of the faults with different ++ * weights depending on whether they were shared or private faults ++ */ ++ unsigned long numa_faults_locality[3]; ++ ++ unsigned long numa_pages_migrated; ++#endif /* CONFIG_NUMA_BALANCING */ ++ ++#ifdef CONFIG_RSEQ ++ struct rseq __user *rseq; ++ u32 rseq_len; ++ u32 rseq_sig; ++ /* ++ * RmW on rseq_event_mask must be performed atomically ++ * with respect to preemption. ++ */ ++ unsigned long rseq_event_mask; ++#endif ++ ++ struct tlbflush_unmap_batch tlb_ubc; ++ ++ struct rcu_head rcu; ++ ++ /* Cache last used pipe for splice(): */ ++ struct pipe_inode_info *splice_pipe; ++ ++ struct page_frag task_frag; ++ ++#ifdef CONFIG_TASK_DELAY_ACCT ++ struct task_delay_info *delays; ++#endif ++ ++#ifdef CONFIG_FAULT_INJECTION ++ int make_it_fail; ++ unsigned int fail_nth; ++#endif ++ /* ++ * When (nr_dirtied >= nr_dirtied_pause), it's time to call ++ * balance_dirty_pages() for a dirty throttling pause: ++ */ ++ int nr_dirtied; ++ int nr_dirtied_pause; ++ /* Start of a write-and-pause period: */ ++ unsigned long dirty_paused_when; ++ ++#ifdef CONFIG_LATENCYTOP ++ int latency_record_count; ++ struct latency_record latency_record[LT_SAVECOUNT]; ++#endif ++ /* ++ * Time slack values; these are used to round up poll() and ++ * select() etc timeout values. These are in nanoseconds. ++ */ ++ u64 timer_slack_ns; ++ u64 default_timer_slack_ns; ++ ++#ifdef CONFIG_KASAN ++ unsigned int kasan_depth; ++#endif ++ ++#ifdef CONFIG_FUNCTION_GRAPH_TRACER ++ /* Index of current stored address in ret_stack: */ ++ int curr_ret_stack; ++ int curr_ret_depth; ++ ++ /* Stack of return addresses for return function tracing: */ ++ struct ftrace_ret_stack *ret_stack; ++ ++ /* Timestamp for last schedule: */ ++ unsigned long long ftrace_timestamp; ++ ++ /* ++ * Number of functions that haven't been traced ++ * because of depth overrun: ++ */ ++ atomic_t trace_overrun; ++ ++ /* Pause tracing: */ ++ atomic_t tracing_graph_pause; ++#endif ++ ++#ifdef CONFIG_TRACING ++ /* State flags for use by tracers: */ ++ unsigned long trace; ++ ++ /* Bitmask and counter of trace recursion: */ ++ unsigned long trace_recursion; ++#endif /* CONFIG_TRACING */ ++ ++#ifdef CONFIG_KCOV ++ /* Coverage collection mode enabled for this task (0 if disabled): */ ++ unsigned int kcov_mode; ++ ++ /* Size of the kcov_area: */ ++ unsigned int kcov_size; ++ ++ /* Buffer for coverage collection: */ ++ void *kcov_area; ++ ++ /* KCOV descriptor wired with this task or NULL: */ ++ struct kcov *kcov; ++#endif ++ ++#ifdef CONFIG_MEMCG ++ struct mem_cgroup *memcg_in_oom; ++ gfp_t memcg_oom_gfp_mask; ++ int memcg_oom_order; ++ ++ /* Number of pages to reclaim on returning to userland: */ ++ unsigned int memcg_nr_pages_over_high; ++ ++ /* Used by memcontrol for targeted memcg charge: */ ++ struct mem_cgroup *active_memcg; ++#endif ++ ++#ifdef CONFIG_BLK_CGROUP ++ struct request_queue *throttle_queue; ++#endif ++ ++#ifdef CONFIG_UPROBES ++ struct uprobe_task *utask; ++#endif ++#if defined(CONFIG_BCACHE) || defined(CONFIG_BCACHE_MODULE) ++ unsigned int sequential_io; ++ unsigned int sequential_io_avg; ++#endif ++#ifdef CONFIG_DEBUG_ATOMIC_SLEEP ++ unsigned long task_state_change; ++#endif ++ int pagefault_disabled; ++#ifdef CONFIG_MMU ++ struct task_struct *oom_reaper_list; ++#endif ++#ifdef CONFIG_VMAP_STACK ++ struct vm_struct *stack_vm_area; ++#endif ++#ifdef CONFIG_THREAD_INFO_IN_TASK ++ /* A live task holds one reference: */ ++ atomic_t stack_refcount; ++#endif ++#ifdef CONFIG_LIVEPATCH ++ int patch_state; ++#endif ++#ifdef CONFIG_SECURITY ++ /* Used by LSM modules for access restriction: */ ++ void *security; ++#endif ++ ++ /* ++ * New fields for task_struct should be added above here, so that ++ * they are included in the randomized portion of task_struct. ++ */ ++ randomized_struct_fields_end ++ ++#ifndef __GENKSYMS__ ++ u64 parent_exec_id_u64; ++ u64 self_exec_id_u64; ++#else ++ KABI_RESERVE(1) ++ KABI_RESERVE(2) ++#endif ++ KABI_RESERVE(3) ++ KABI_RESERVE(4) ++ KABI_RESERVE(5) ++ KABI_RESERVE(6) ++ KABI_RESERVE(7) ++ KABI_RESERVE(8) ++ ++ /* CPU-specific state of this task: */ ++ struct thread_struct thread; ++ ++ /* ++ * WARNING: on x86, 'thread_struct' contains a variable-sized ++ * structure. It *MUST* be at the end of 'task_struct'. ++ * ++ * Do not put anything below here! ++ */ ++}; ++ ++static inline struct pid *task_pid(struct task_struct *task) ++{ ++ return task->thread_pid; ++} ++ ++/* ++ * the helpers to get the task's different pids as they are seen ++ * from various namespaces ++ * ++ * task_xid_nr() : global id, i.e. the id seen from the init namespace; ++ * task_xid_vnr() : virtual id, i.e. the id seen from the pid namespace of ++ * current. ++ * task_xid_nr_ns() : id seen from the ns specified; ++ * ++ * see also pid_nr() etc in include/linux/pid.h ++ */ ++pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type, struct pid_namespace *ns); ++ ++static inline pid_t task_pid_nr(struct task_struct *tsk) ++{ ++ return tsk->pid; ++} ++ ++static inline pid_t task_pid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) ++{ ++ return __task_pid_nr_ns(tsk, PIDTYPE_PID, ns); ++} ++ ++static inline pid_t task_pid_vnr(struct task_struct *tsk) ++{ ++ return __task_pid_nr_ns(tsk, PIDTYPE_PID, NULL); ++} ++ ++ ++static inline pid_t task_tgid_nr(struct task_struct *tsk) ++{ ++ return tsk->tgid; ++} ++ ++/** ++ * pid_alive - check that a task structure is not stale ++ * @p: Task structure to be checked. ++ * ++ * Test if a process is not yet dead (at most zombie state) ++ * If pid_alive fails, then pointers within the task structure ++ * can be stale and must not be dereferenced. ++ * ++ * Return: 1 if the process is alive. 0 otherwise. ++ */ ++static inline int pid_alive(const struct task_struct *p) ++{ ++ return p->thread_pid != NULL; ++} ++ ++static inline pid_t task_pgrp_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) ++{ ++ return __task_pid_nr_ns(tsk, PIDTYPE_PGID, ns); ++} ++ ++static inline pid_t task_pgrp_vnr(struct task_struct *tsk) ++{ ++ return __task_pid_nr_ns(tsk, PIDTYPE_PGID, NULL); ++} ++ ++ ++static inline pid_t task_session_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) ++{ ++ return __task_pid_nr_ns(tsk, PIDTYPE_SID, ns); ++} ++ ++static inline pid_t task_session_vnr(struct task_struct *tsk) ++{ ++ return __task_pid_nr_ns(tsk, PIDTYPE_SID, NULL); ++} ++ ++static inline pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) ++{ ++ return __task_pid_nr_ns(tsk, PIDTYPE_TGID, ns); ++} ++ ++static inline pid_t task_tgid_vnr(struct task_struct *tsk) ++{ ++ return __task_pid_nr_ns(tsk, PIDTYPE_TGID, NULL); ++} ++ ++static inline pid_t task_ppid_nr_ns(const struct task_struct *tsk, struct pid_namespace *ns) ++{ ++ pid_t pid = 0; ++ ++ rcu_read_lock(); ++ if (pid_alive(tsk)) ++ pid = task_tgid_nr_ns(rcu_dereference(tsk->real_parent), ns); ++ rcu_read_unlock(); ++ ++ return pid; ++} ++ ++static inline pid_t task_ppid_nr(const struct task_struct *tsk) ++{ ++ return task_ppid_nr_ns(tsk, &init_pid_ns); ++} ++ ++/* Obsolete, do not use: */ ++static inline pid_t task_pgrp_nr(struct task_struct *tsk) ++{ ++ return task_pgrp_nr_ns(tsk, &init_pid_ns); ++} ++ ++#define TASK_REPORT_IDLE (TASK_REPORT + 1) ++#define TASK_REPORT_MAX (TASK_REPORT_IDLE << 1) ++ ++static inline unsigned int task_state_index(struct task_struct *tsk) ++{ ++ unsigned int tsk_state = READ_ONCE(tsk->state); ++ unsigned int state = (tsk_state | tsk->exit_state) & TASK_REPORT; ++ ++ BUILD_BUG_ON_NOT_POWER_OF_2(TASK_REPORT_MAX); ++ ++ if (tsk_state == TASK_IDLE) ++ state = TASK_REPORT_IDLE; ++ ++ return fls(state); ++} ++ ++static inline char task_index_to_char(unsigned int state) ++{ ++ static const char state_char[] = "RSDTtXZPI"; ++ ++ BUILD_BUG_ON(1 + ilog2(TASK_REPORT_MAX) != sizeof(state_char) - 1); ++ ++ return state_char[state]; ++} ++ ++static inline char task_state_to_char(struct task_struct *tsk) ++{ ++ return task_index_to_char(task_state_index(tsk)); ++} ++ ++/** ++ * is_global_init - check if a task structure is init. Since init ++ * is free to have sub-threads we need to check tgid. ++ * @tsk: Task structure to be checked. ++ * ++ * Check if a task structure is the first user space task the kernel created. ++ * ++ * Return: 1 if the task structure is init. 0 otherwise. ++ */ ++static inline int is_global_init(struct task_struct *tsk) ++{ ++ return task_tgid_nr(tsk) == 1; ++} ++ ++extern struct pid *cad_pid; ++ ++/* ++ * Per process flags ++ */ ++#define PF_IDLE 0x00000002 /* I am an IDLE thread */ ++#define PF_EXITING 0x00000004 /* Getting shut down */ ++#define PF_EXITPIDONE 0x00000008 /* PI exit done on shut down */ ++#define PF_VCPU 0x00000010 /* I'm a virtual CPU */ ++#define PF_WQ_WORKER 0x00000020 /* I'm a workqueue worker */ ++#define PF_FORKNOEXEC 0x00000040 /* Forked but didn't exec */ ++#define PF_MCE_PROCESS 0x00000080 /* Process policy on mce errors */ ++#define PF_SUPERPRIV 0x00000100 /* Used super-user privileges */ ++#define PF_DUMPCORE 0x00000200 /* Dumped core */ ++#define PF_SIGNALED 0x00000400 /* Killed by a signal */ ++#define PF_MEMALLOC 0x00000800 /* Allocating memory */ ++#define PF_NPROC_EXCEEDED 0x00001000 /* set_user() noticed that RLIMIT_NPROC was exceeded */ ++#define PF_USED_MATH 0x00002000 /* If unset the fpu must be initialized before use */ ++#define PF_USED_ASYNC 0x00004000 /* Used async_schedule*(), used by module init */ ++#define PF_NOFREEZE 0x00008000 /* This thread should not be frozen */ ++#define PF_FROZEN 0x00010000 /* Frozen for system suspend */ ++#define PF_KSWAPD 0x00020000 /* I am kswapd */ ++#define PF_MEMALLOC_NOFS 0x00040000 /* All allocation requests will inherit GFP_NOFS */ ++#define PF_MEMALLOC_NOIO 0x00080000 /* All allocation requests will inherit GFP_NOIO */ ++#define PF_LESS_THROTTLE 0x00100000 /* Throttle me less: I clean memory */ ++#define PF_KTHREAD 0x00200000 /* I am a kernel thread */ ++#define PF_RANDOMIZE 0x00400000 /* Randomize virtual address space */ ++#define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */ ++#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_allowed */ ++#define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ ++#define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ ++#define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */ ++#define PF_SUSPEND_TASK 0x80000000 /* This thread called freeze_processes() and should not be frozen */ ++ ++/* ++ * Only the _current_ task can read/write to tsk->flags, but other ++ * tasks can access tsk->flags in readonly mode for example ++ * with tsk_used_math (like during threaded core dumping). ++ * There is however an exception to this rule during ptrace ++ * or during fork: the ptracer task is allowed to write to the ++ * child->flags of its traced child (same goes for fork, the parent ++ * can write to the child->flags), because we're guaranteed the ++ * child is not running and in turn not changing child->flags ++ * at the same time the parent does it. ++ */ ++#define clear_stopped_child_used_math(child) do { (child)->flags &= ~PF_USED_MATH; } while (0) ++#define set_stopped_child_used_math(child) do { (child)->flags |= PF_USED_MATH; } while (0) ++#define clear_used_math() clear_stopped_child_used_math(current) ++#define set_used_math() set_stopped_child_used_math(current) ++ ++#define conditional_stopped_child_used_math(condition, child) \ ++ do { (child)->flags &= ~PF_USED_MATH, (child)->flags |= (condition) ? PF_USED_MATH : 0; } while (0) ++ ++#define conditional_used_math(condition) conditional_stopped_child_used_math(condition, current) ++ ++#define copy_to_stopped_child_used_math(child) \ ++ do { (child)->flags &= ~PF_USED_MATH, (child)->flags |= current->flags & PF_USED_MATH; } while (0) ++ ++/* NOTE: this will return 0 or PF_USED_MATH, it will never return 1 */ ++#define tsk_used_math(p) ((p)->flags & PF_USED_MATH) ++#define used_math() tsk_used_math(current) ++ ++static inline bool is_percpu_thread(void) ++{ ++#ifdef CONFIG_SMP ++ return (current->flags & PF_NO_SETAFFINITY) && ++ (current->nr_cpus_allowed == 1); ++#else ++ return true; ++#endif ++} ++ ++/* Per-process atomic flags. */ ++#define PFA_NO_NEW_PRIVS 0 /* May not gain new privileges. */ ++#define PFA_SPREAD_PAGE 1 /* Spread page cache over cpuset */ ++#define PFA_SPREAD_SLAB 2 /* Spread some slab caches over cpuset */ ++#define PFA_SPEC_SSB_DISABLE 3 /* Speculative Store Bypass disabled */ ++#define PFA_SPEC_SSB_FORCE_DISABLE 4 /* Speculative Store Bypass force disabled*/ ++#define PFA_SPEC_IB_DISABLE 5 /* Indirect branch speculation restricted */ ++#define PFA_SPEC_IB_FORCE_DISABLE 6 /* Indirect branch speculation permanently restricted */ ++ ++#define TASK_PFA_TEST(name, func) \ ++ static inline bool task_##func(struct task_struct *p) \ ++ { return test_bit(PFA_##name, &p->atomic_flags); } ++ ++#define TASK_PFA_SET(name, func) \ ++ static inline void task_set_##func(struct task_struct *p) \ ++ { set_bit(PFA_##name, &p->atomic_flags); } ++ ++#define TASK_PFA_CLEAR(name, func) \ ++ static inline void task_clear_##func(struct task_struct *p) \ ++ { clear_bit(PFA_##name, &p->atomic_flags); } ++ ++TASK_PFA_TEST(NO_NEW_PRIVS, no_new_privs) ++TASK_PFA_SET(NO_NEW_PRIVS, no_new_privs) ++ ++TASK_PFA_TEST(SPREAD_PAGE, spread_page) ++TASK_PFA_SET(SPREAD_PAGE, spread_page) ++TASK_PFA_CLEAR(SPREAD_PAGE, spread_page) ++ ++TASK_PFA_TEST(SPREAD_SLAB, spread_slab) ++TASK_PFA_SET(SPREAD_SLAB, spread_slab) ++TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab) ++ ++TASK_PFA_TEST(SPEC_SSB_DISABLE, spec_ssb_disable) ++TASK_PFA_SET(SPEC_SSB_DISABLE, spec_ssb_disable) ++TASK_PFA_CLEAR(SPEC_SSB_DISABLE, spec_ssb_disable) ++ ++TASK_PFA_TEST(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable) ++TASK_PFA_SET(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable) ++ ++TASK_PFA_TEST(SPEC_IB_DISABLE, spec_ib_disable) ++TASK_PFA_SET(SPEC_IB_DISABLE, spec_ib_disable) ++TASK_PFA_CLEAR(SPEC_IB_DISABLE, spec_ib_disable) ++ ++TASK_PFA_TEST(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable) ++TASK_PFA_SET(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable) ++ ++static inline void ++current_restore_flags(unsigned long orig_flags, unsigned long flags) ++{ ++ current->flags &= ~flags; ++ current->flags |= orig_flags & flags; ++} ++ ++extern int cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpumask *trial); ++extern int task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_allowed); ++#ifdef CONFIG_SMP ++extern void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask); ++extern int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask); ++#else ++static inline void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) ++{ ++} ++static inline int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) ++{ ++ if (!cpumask_test_cpu(0, new_mask)) ++ return -EINVAL; ++ return 0; ++} ++#endif ++ ++#ifndef cpu_relax_yield ++#define cpu_relax_yield() cpu_relax() ++#endif ++ ++extern int yield_to(struct task_struct *p, bool preempt); ++extern void set_user_nice(struct task_struct *p, long nice); ++extern int task_prio(const struct task_struct *p); ++ ++/** ++ * task_nice - return the nice value of a given task. ++ * @p: the task in question. ++ * ++ * Return: The nice value [ -20 ... 0 ... 19 ]. ++ */ ++static inline int task_nice(const struct task_struct *p) ++{ ++ return PRIO_TO_NICE((p)->static_prio); ++} ++ ++extern int can_nice(const struct task_struct *p, const int nice); ++extern int task_curr(const struct task_struct *p); ++extern int idle_cpu(int cpu); ++extern int available_idle_cpu(int cpu); ++extern int sched_setscheduler(struct task_struct *, int, const struct sched_param *); ++extern int sched_setscheduler_nocheck(struct task_struct *, int, const struct sched_param *); ++extern int sched_setattr(struct task_struct *, const struct sched_attr *); ++extern int sched_setattr_nocheck(struct task_struct *, const struct sched_attr *); ++extern struct task_struct *idle_task(int cpu); ++ ++/** ++ * is_idle_task - is the specified task an idle task? ++ * @p: the task in question. ++ * ++ * Return: 1 if @p is an idle task. 0 otherwise. ++ */ ++static inline bool is_idle_task(const struct task_struct *p) ++{ ++ return !!(p->flags & PF_IDLE); ++} ++ ++extern struct task_struct *curr_task(int cpu); ++extern void ia64_set_curr_task(int cpu, struct task_struct *p); ++ ++void yield(void); ++ ++union thread_union { ++#ifndef CONFIG_ARCH_TASK_STRUCT_ON_STACK ++ struct task_struct task; ++#endif ++#ifndef CONFIG_THREAD_INFO_IN_TASK ++ struct thread_info thread_info; ++#endif ++ unsigned long stack[THREAD_SIZE/sizeof(long)]; ++}; ++ ++#ifndef CONFIG_THREAD_INFO_IN_TASK ++extern struct thread_info init_thread_info; ++#endif ++ ++extern unsigned long init_stack[THREAD_SIZE / sizeof(unsigned long)]; ++ ++#ifdef CONFIG_THREAD_INFO_IN_TASK ++static inline struct thread_info *task_thread_info(struct task_struct *task) ++{ ++ return &task->thread_info; ++} ++#elif !defined(__HAVE_THREAD_FUNCTIONS) ++# define task_thread_info(task) ((struct thread_info *)(task)->stack) ++#endif ++ ++/* ++ * find a task by one of its numerical ids ++ * ++ * find_task_by_pid_ns(): ++ * finds a task by its pid in the specified namespace ++ * find_task_by_vpid(): ++ * finds a task by its virtual pid ++ * ++ * see also find_vpid() etc in include/linux/pid.h ++ */ ++ ++extern struct task_struct *find_task_by_vpid(pid_t nr); ++extern struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns); ++ ++/* ++ * find a task by its virtual pid and get the task struct ++ */ ++extern struct task_struct *find_get_task_by_vpid(pid_t nr); ++ ++extern int wake_up_state(struct task_struct *tsk, unsigned int state); ++extern int wake_up_process(struct task_struct *tsk); ++extern void wake_up_new_task(struct task_struct *tsk); ++ ++#ifdef CONFIG_SMP ++extern void kick_process(struct task_struct *tsk); ++#else ++static inline void kick_process(struct task_struct *tsk) { } ++#endif ++ ++extern void __set_task_comm(struct task_struct *tsk, const char *from, bool exec); ++ ++static inline void set_task_comm(struct task_struct *tsk, const char *from) ++{ ++ __set_task_comm(tsk, from, false); ++} ++ ++extern char *__get_task_comm(char *to, size_t len, struct task_struct *tsk); ++#define get_task_comm(buf, tsk) ({ \ ++ BUILD_BUG_ON(sizeof(buf) != TASK_COMM_LEN); \ ++ __get_task_comm(buf, sizeof(buf), tsk); \ ++}) ++ ++#ifdef CONFIG_SMP ++void scheduler_ipi(void); ++extern unsigned long wait_task_inactive(struct task_struct *, long match_state); ++#else ++static inline void scheduler_ipi(void) { } ++static inline unsigned long wait_task_inactive(struct task_struct *p, long match_state) ++{ ++ return 1; ++} ++#endif ++ ++/* ++ * Set thread flags in other task's structures. ++ * See asm/thread_info.h for TIF_xxxx flags available: ++ */ ++static inline void set_tsk_thread_flag(struct task_struct *tsk, int flag) ++{ ++ set_ti_thread_flag(task_thread_info(tsk), flag); ++} ++ ++static inline void clear_tsk_thread_flag(struct task_struct *tsk, int flag) ++{ ++ clear_ti_thread_flag(task_thread_info(tsk), flag); ++} ++ ++static inline void update_tsk_thread_flag(struct task_struct *tsk, int flag, ++ bool value) ++{ ++ update_ti_thread_flag(task_thread_info(tsk), flag, value); ++} ++ ++static inline int test_and_set_tsk_thread_flag(struct task_struct *tsk, int flag) ++{ ++ return test_and_set_ti_thread_flag(task_thread_info(tsk), flag); ++} ++ ++static inline int test_and_clear_tsk_thread_flag(struct task_struct *tsk, int flag) ++{ ++ return test_and_clear_ti_thread_flag(task_thread_info(tsk), flag); ++} ++ ++static inline int test_tsk_thread_flag(struct task_struct *tsk, int flag) ++{ ++ return test_ti_thread_flag(task_thread_info(tsk), flag); ++} ++ ++static inline void set_tsk_need_resched(struct task_struct *tsk) ++{ ++ set_tsk_thread_flag(tsk,TIF_NEED_RESCHED); ++} ++ ++static inline void clear_tsk_need_resched(struct task_struct *tsk) ++{ ++ clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED); ++} ++ ++static inline int test_tsk_need_resched(struct task_struct *tsk) ++{ ++ return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED)); ++} ++ ++/* ++ * cond_resched() and cond_resched_lock(): latency reduction via ++ * explicit rescheduling in places that are safe. The return ++ * value indicates whether a reschedule was done in fact. ++ * cond_resched_lock() will drop the spinlock before scheduling, ++ */ ++#ifndef CONFIG_PREEMPT ++extern int _cond_resched(void); ++#else ++static inline int _cond_resched(void) { return 0; } ++#endif ++ ++#define cond_resched() ({ \ ++ ___might_sleep(__FILE__, __LINE__, 0); \ ++ _cond_resched(); \ ++}) ++ ++extern int __cond_resched_lock(spinlock_t *lock); ++ ++#define cond_resched_lock(lock) ({ \ ++ ___might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET);\ ++ __cond_resched_lock(lock); \ ++}) ++ ++static inline void cond_resched_rcu(void) ++{ ++#if defined(CONFIG_DEBUG_ATOMIC_SLEEP) || !defined(CONFIG_PREEMPT_RCU) ++ rcu_read_unlock(); ++ cond_resched(); ++ rcu_read_lock(); ++#endif ++} ++ ++/* ++ * Does a critical section need to be broken due to another ++ * task waiting?: (technically does not depend on CONFIG_PREEMPT, ++ * but a general need for low latency) ++ */ ++static inline int spin_needbreak(spinlock_t *lock) ++{ ++#ifdef CONFIG_PREEMPT ++ return spin_is_contended(lock); ++#else ++ return 0; ++#endif ++} ++ ++static __always_inline bool need_resched(void) ++{ ++ return unlikely(tif_need_resched()); ++} ++ ++/* ++ * Wrappers for p->thread_info->cpu access. No-op on UP. ++ */ ++#ifdef CONFIG_SMP ++ ++static inline unsigned int task_cpu(const struct task_struct *p) ++{ ++#ifdef CONFIG_THREAD_INFO_IN_TASK ++ return READ_ONCE(p->cpu); ++#else ++ return READ_ONCE(task_thread_info(p)->cpu); ++#endif ++} ++ ++extern void set_task_cpu(struct task_struct *p, unsigned int cpu); ++ ++#else ++ ++static inline unsigned int task_cpu(const struct task_struct *p) ++{ ++ return 0; ++} ++ ++static inline void set_task_cpu(struct task_struct *p, unsigned int cpu) ++{ ++} ++ ++#endif /* CONFIG_SMP */ ++ ++/* ++ * In order to reduce various lock holder preemption latencies provide an ++ * interface to see if a vCPU is currently running or not. ++ * ++ * This allows us to terminate optimistic spin loops and block, analogous to ++ * the native optimistic spin heuristic of testing if the lock owner task is ++ * running or not. ++ */ ++#ifndef vcpu_is_preempted ++# define vcpu_is_preempted(cpu) false ++#endif ++ ++extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask); ++extern long sched_getaffinity(pid_t pid, struct cpumask *mask); ++ ++#ifndef TASK_SIZE_OF ++#define TASK_SIZE_OF(tsk) TASK_SIZE ++#endif ++ ++#ifdef CONFIG_RSEQ ++ ++/* ++ * Map the event mask on the user-space ABI enum rseq_cs_flags ++ * for direct mask checks. ++ */ ++enum rseq_event_mask_bits { ++ RSEQ_EVENT_PREEMPT_BIT = RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT, ++ RSEQ_EVENT_SIGNAL_BIT = RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT, ++ RSEQ_EVENT_MIGRATE_BIT = RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT, ++}; ++ ++enum rseq_event_mask { ++ RSEQ_EVENT_PREEMPT = (1U << RSEQ_EVENT_PREEMPT_BIT), ++ RSEQ_EVENT_SIGNAL = (1U << RSEQ_EVENT_SIGNAL_BIT), ++ RSEQ_EVENT_MIGRATE = (1U << RSEQ_EVENT_MIGRATE_BIT), ++}; ++ ++static inline void rseq_set_notify_resume(struct task_struct *t) ++{ ++ if (t->rseq) ++ set_tsk_thread_flag(t, TIF_NOTIFY_RESUME); ++} ++ ++void __rseq_handle_notify_resume(struct ksignal *sig, struct pt_regs *regs); ++ ++static inline void rseq_handle_notify_resume(struct ksignal *ksig, ++ struct pt_regs *regs) ++{ ++ if (current->rseq) ++ __rseq_handle_notify_resume(ksig, regs); ++} ++ ++static inline void rseq_signal_deliver(struct ksignal *ksig, ++ struct pt_regs *regs) ++{ ++ preempt_disable(); ++ __set_bit(RSEQ_EVENT_SIGNAL_BIT, ¤t->rseq_event_mask); ++ preempt_enable(); ++ rseq_handle_notify_resume(ksig, regs); ++} ++ ++/* rseq_preempt() requires preemption to be disabled. */ ++static inline void rseq_preempt(struct task_struct *t) ++{ ++ __set_bit(RSEQ_EVENT_PREEMPT_BIT, &t->rseq_event_mask); ++ rseq_set_notify_resume(t); ++} ++ ++/* rseq_migrate() requires preemption to be disabled. */ ++static inline void rseq_migrate(struct task_struct *t) ++{ ++ __set_bit(RSEQ_EVENT_MIGRATE_BIT, &t->rseq_event_mask); ++ rseq_set_notify_resume(t); ++} ++ ++/* ++ * If parent process has a registered restartable sequences area, the ++ * child inherits. Only applies when forking a process, not a thread. ++ */ ++static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags) ++{ ++ if (clone_flags & CLONE_THREAD) { ++ t->rseq = NULL; ++ t->rseq_len = 0; ++ t->rseq_sig = 0; ++ t->rseq_event_mask = 0; ++ } else { ++ t->rseq = current->rseq; ++ t->rseq_len = current->rseq_len; ++ t->rseq_sig = current->rseq_sig; ++ t->rseq_event_mask = current->rseq_event_mask; ++ } ++} ++ ++static inline void rseq_execve(struct task_struct *t) ++{ ++ t->rseq = NULL; ++ t->rseq_len = 0; ++ t->rseq_sig = 0; ++ t->rseq_event_mask = 0; ++} ++ ++#else ++ ++static inline void rseq_set_notify_resume(struct task_struct *t) ++{ ++} ++static inline void rseq_handle_notify_resume(struct ksignal *ksig, ++ struct pt_regs *regs) ++{ ++} ++static inline void rseq_signal_deliver(struct ksignal *ksig, ++ struct pt_regs *regs) ++{ ++} ++static inline void rseq_preempt(struct task_struct *t) ++{ ++} ++static inline void rseq_migrate(struct task_struct *t) ++{ ++} ++static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags) ++{ ++} ++static inline void rseq_execve(struct task_struct *t) ++{ ++} ++ ++#endif ++ ++#ifdef CONFIG_DEBUG_RSEQ ++ ++void rseq_syscall(struct pt_regs *regs); ++ ++#else ++ ++static inline void rseq_syscall(struct pt_regs *regs) ++{ ++} ++ ++#endif ++ ++#endif +diff -uprN kernel/include/linux/spinlock_api_smp.h kernel_new/include/linux/spinlock_api_smp.h +--- kernel/include/linux/spinlock_api_smp.h 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/include/linux/spinlock_api_smp.h 2021-04-01 18:28:07.803863123 +0800 +@@ -99,7 +99,9 @@ static inline int __raw_spin_trylock(raw + * even on CONFIG_PREEMPT, because lockdep assumes that interrupts are + * not re-enabled during lock-acquire (which the preempt-spin-ops do): + */ +-#if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC) ++#if !defined(CONFIG_GENERIC_LOCKBREAK) || \ ++ defined(CONFIG_DEBUG_LOCK_ALLOC) || \ ++ defined(CONFIG_IPIPE) + + static inline unsigned long __raw_spin_lock_irqsave(raw_spinlock_t *lock) + { +@@ -113,7 +115,7 @@ static inline unsigned long __raw_spin_l + * do_raw_spin_lock_flags() code, because lockdep assumes + * that interrupts are not re-enabled during lock-acquire: + */ +-#ifdef CONFIG_LOCKDEP ++#if defined(CONFIG_LOCKDEP) || defined(CONFIG_IPIPE) + LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock); + #else + do_raw_spin_lock_flags(lock, &flags); +diff -uprN kernel/include/linux/spinlock.h kernel_new/include/linux/spinlock.h +--- kernel/include/linux/spinlock.h 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/include/linux/spinlock.h 2021-04-01 18:28:07.803863123 +0800 +@@ -90,10 +90,12 @@ + # include + #endif + ++#include ++ + #ifdef CONFIG_DEBUG_SPINLOCK + extern void __raw_spin_lock_init(raw_spinlock_t *lock, const char *name, + struct lock_class_key *key); +-# define raw_spin_lock_init(lock) \ ++# define __real_raw_spin_lock_init(lock) \ + do { \ + static struct lock_class_key __key; \ + \ +@@ -101,11 +103,14 @@ do { \ + } while (0) + + #else +-# define raw_spin_lock_init(lock) \ ++# define __real_raw_spin_lock_init(lock) \ + do { *(lock) = __RAW_SPIN_LOCK_UNLOCKED(lock); } while (0) + #endif ++#define raw_spin_lock_init(lock) PICK_SPINOP(_lock_init, lock) + +-#define raw_spin_is_locked(lock) arch_spin_is_locked(&(lock)->raw_lock) ++#define __real_raw_spin_is_locked(lock) \ ++ arch_spin_is_locked(&(lock)->raw_lock) ++#define raw_spin_is_locked(lock) PICK_SPINOP_RET(_is_locked, lock, int) + + #ifdef arch_spin_is_contended + #define raw_spin_is_contended(lock) arch_spin_is_contended(&(lock)->raw_lock) +@@ -209,9 +214,11 @@ static inline void do_raw_spin_unlock(ra + * various methods are defined as nops in the case they are not + * required. + */ +-#define raw_spin_trylock(lock) __cond_lock(lock, _raw_spin_trylock(lock)) ++#define __real_raw_spin_trylock(lock) __cond_lock(lock, _raw_spin_trylock(lock)) ++#define raw_spin_trylock(lock) PICK_SPINOP_RET(_trylock, lock, int) + +-#define raw_spin_lock(lock) _raw_spin_lock(lock) ++#define __real_raw_spin_lock(lock) _raw_spin_lock(lock) ++#define raw_spin_lock(lock) PICK_SPINOP(_lock, lock) + + #ifdef CONFIG_DEBUG_LOCK_ALLOC + # define raw_spin_lock_nested(lock, subclass) \ +@@ -235,7 +242,7 @@ static inline void do_raw_spin_unlock(ra + + #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) + +-#define raw_spin_lock_irqsave(lock, flags) \ ++#define __real_raw_spin_lock_irqsave(lock, flags) \ + do { \ + typecheck(unsigned long, flags); \ + flags = _raw_spin_lock_irqsave(lock); \ +@@ -257,7 +264,7 @@ static inline void do_raw_spin_unlock(ra + + #else + +-#define raw_spin_lock_irqsave(lock, flags) \ ++#define __real_raw_spin_lock_irqsave(lock, flags) \ + do { \ + typecheck(unsigned long, flags); \ + _raw_spin_lock_irqsave(lock, flags); \ +@@ -268,34 +275,46 @@ static inline void do_raw_spin_unlock(ra + + #endif + +-#define raw_spin_lock_irq(lock) _raw_spin_lock_irq(lock) ++#define raw_spin_lock_irqsave(lock, flags) \ ++ PICK_SPINLOCK_IRQSAVE(lock, flags) ++ ++#define __real_raw_spin_lock_irq(lock) _raw_spin_lock_irq(lock) ++#define raw_spin_lock_irq(lock) PICK_SPINOP(_lock_irq, lock) + #define raw_spin_lock_bh(lock) _raw_spin_lock_bh(lock) +-#define raw_spin_unlock(lock) _raw_spin_unlock(lock) +-#define raw_spin_unlock_irq(lock) _raw_spin_unlock_irq(lock) ++#define __real_raw_spin_unlock(lock) _raw_spin_unlock(lock) ++#define raw_spin_unlock(lock) PICK_SPINOP(_unlock, lock) ++#define __real_raw_spin_unlock_irq(lock) _raw_spin_unlock_irq(lock) ++#define raw_spin_unlock_irq(lock) PICK_SPINOP(_unlock_irq, lock) + +-#define raw_spin_unlock_irqrestore(lock, flags) \ ++#define __real_raw_spin_unlock_irqrestore(lock, flags) \ + do { \ + typecheck(unsigned long, flags); \ + _raw_spin_unlock_irqrestore(lock, flags); \ + } while (0) ++#define raw_spin_unlock_irqrestore(lock, flags) \ ++ PICK_SPINUNLOCK_IRQRESTORE(lock, flags) ++ + #define raw_spin_unlock_bh(lock) _raw_spin_unlock_bh(lock) + + #define raw_spin_trylock_bh(lock) \ + __cond_lock(lock, _raw_spin_trylock_bh(lock)) + +-#define raw_spin_trylock_irq(lock) \ ++#define __real_raw_spin_trylock_irq(lock) \ + ({ \ + local_irq_disable(); \ +- raw_spin_trylock(lock) ? \ ++ __real_raw_spin_trylock(lock) ? \ + 1 : ({ local_irq_enable(); 0; }); \ + }) ++#define raw_spin_trylock_irq(lock) PICK_SPINTRYLOCK_IRQ(lock) + +-#define raw_spin_trylock_irqsave(lock, flags) \ ++#define __real_raw_spin_trylock_irqsave(lock, flags) \ + ({ \ + local_irq_save(flags); \ + raw_spin_trylock(lock) ? \ + 1 : ({ local_irq_restore(flags); 0; }); \ + }) ++#define raw_spin_trylock_irqsave(lock, flags) \ ++ PICK_SPINTRYLOCK_IRQSAVE(lock, flags) + + /* Include rwlock functions */ + #include +@@ -320,24 +339,17 @@ static __always_inline raw_spinlock_t *s + + #define spin_lock_init(_lock) \ + do { \ +- spinlock_check(_lock); \ +- raw_spin_lock_init(&(_lock)->rlock); \ ++ raw_spin_lock_init(_lock); \ + } while (0) + +-static __always_inline void spin_lock(spinlock_t *lock) +-{ +- raw_spin_lock(&lock->rlock); +-} ++#define spin_lock(lock) raw_spin_lock(lock) + + static __always_inline void spin_lock_bh(spinlock_t *lock) + { + raw_spin_lock_bh(&lock->rlock); + } + +-static __always_inline int spin_trylock(spinlock_t *lock) +-{ +- return raw_spin_trylock(&lock->rlock); +-} ++#define spin_trylock(lock) raw_spin_trylock(lock) + + #define spin_lock_nested(lock, subclass) \ + do { \ +@@ -349,14 +361,11 @@ do { \ + raw_spin_lock_nest_lock(spinlock_check(lock), nest_lock); \ + } while (0) + +-static __always_inline void spin_lock_irq(spinlock_t *lock) +-{ +- raw_spin_lock_irq(&lock->rlock); +-} ++#define spin_lock_irq(lock) raw_spin_lock_irq(lock) + + #define spin_lock_irqsave(lock, flags) \ + do { \ +- raw_spin_lock_irqsave(spinlock_check(lock), flags); \ ++ raw_spin_lock_irqsave(lock, flags); \ + } while (0) + + #define spin_lock_irqsave_nested(lock, flags, subclass) \ +@@ -364,39 +373,28 @@ do { \ + raw_spin_lock_irqsave_nested(spinlock_check(lock), flags, subclass); \ + } while (0) + +-static __always_inline void spin_unlock(spinlock_t *lock) +-{ +- raw_spin_unlock(&lock->rlock); +-} ++#define spin_unlock(lock) raw_spin_unlock(lock) + + static __always_inline void spin_unlock_bh(spinlock_t *lock) + { + raw_spin_unlock_bh(&lock->rlock); + } + +-static __always_inline void spin_unlock_irq(spinlock_t *lock) +-{ +- raw_spin_unlock_irq(&lock->rlock); +-} ++#define spin_unlock_irq(lock) raw_spin_unlock_irq(lock) + +-static __always_inline void spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags) +-{ +- raw_spin_unlock_irqrestore(&lock->rlock, flags); +-} ++#define spin_unlock_irqrestore(lock, flags) \ ++ raw_spin_unlock_irqrestore(lock, flags) + + static __always_inline int spin_trylock_bh(spinlock_t *lock) + { + return raw_spin_trylock_bh(&lock->rlock); + } + +-static __always_inline int spin_trylock_irq(spinlock_t *lock) +-{ +- return raw_spin_trylock_irq(&lock->rlock); +-} ++#define spin_trylock_irq(lock) raw_spin_trylock_irq(lock) + + #define spin_trylock_irqsave(lock, flags) \ + ({ \ +- raw_spin_trylock_irqsave(spinlock_check(lock), flags); \ ++ raw_spin_trylock_irqsave(lock, flags); \ + }) + + /** +diff -uprN kernel/include/linux/spinlock_up.h kernel_new/include/linux/spinlock_up.h +--- kernel/include/linux/spinlock_up.h 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/include/linux/spinlock_up.h 2021-04-01 18:28:07.804863121 +0800 +@@ -48,16 +48,6 @@ static inline void arch_spin_unlock(arch + lock->slock = 1; + } + +-/* +- * Read-write spinlocks. No debug version. +- */ +-#define arch_read_lock(lock) do { barrier(); (void)(lock); } while (0) +-#define arch_write_lock(lock) do { barrier(); (void)(lock); } while (0) +-#define arch_read_trylock(lock) ({ barrier(); (void)(lock); 1; }) +-#define arch_write_trylock(lock) ({ barrier(); (void)(lock); 1; }) +-#define arch_read_unlock(lock) do { barrier(); (void)(lock); } while (0) +-#define arch_write_unlock(lock) do { barrier(); (void)(lock); } while (0) +- + #else /* DEBUG_SPINLOCK */ + #define arch_spin_is_locked(lock) ((void)(lock), 0) + /* for sched/core.c and kernel_lock.c: */ +@@ -67,6 +57,13 @@ static inline void arch_spin_unlock(arch + # define arch_spin_trylock(lock) ({ barrier(); (void)(lock); 1; }) + #endif /* DEBUG_SPINLOCK */ + ++#define arch_read_lock(lock) do { barrier(); (void)(lock); } while (0) ++#define arch_write_lock(lock) do { barrier(); (void)(lock); } while (0) ++#define arch_read_trylock(lock) ({ barrier(); (void)(lock); 1; }) ++#define arch_write_trylock(lock) ({ barrier(); (void)(lock); 1; }) ++#define arch_read_unlock(lock) do { barrier(); (void)(lock); } while (0) ++#define arch_write_unlock(lock) do { barrier(); (void)(lock); } while (0) ++ + #define arch_spin_is_contended(lock) (((void)(lock), 0)) + + #endif /* __LINUX_SPINLOCK_UP_H */ +diff -uprN kernel/include/linux/stop_machine.h kernel_new/include/linux/stop_machine.h +--- kernel/include/linux/stop_machine.h 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/include/linux/stop_machine.h 2021-04-01 18:28:07.804863121 +0800 +@@ -138,13 +138,17 @@ int stop_machine_from_inactive_cpu(cpu_s + const struct cpumask *cpus); + #else /* CONFIG_SMP || CONFIG_HOTPLUG_CPU */ + ++#include ++ + static inline int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data, + const struct cpumask *cpus) + { + unsigned long flags; + int ret; + local_irq_save(flags); ++ hard_irq_disable(); + ret = fn(data); ++ hard_irq_enable(); + local_irq_restore(flags); + return ret; + } +diff -uprN kernel/init/Kconfig kernel_new/init/Kconfig +--- kernel/init/Kconfig 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/init/Kconfig 2021-04-01 18:28:07.804863121 +0800 +@@ -1291,6 +1291,18 @@ config PRINTK_NMI + depends on PRINTK + depends on HAVE_NMI + ++config RAW_PRINTK ++ bool "Enable support for raw printk" ++ default n ++ help ++ This option enables a printk variant called raw_printk() for ++ writing all output unmodified to a raw console channel ++ immediately, without any header or preparation whatsoever, ++ usable from any context. ++ ++ Unlike early_printk() console devices, raw_printk() devices ++ can live past the boot sequence. ++ + config BUG + bool "BUG() support" if EXPERT + default y +diff -uprN kernel/init/Kconfig.orig kernel_new/init/Kconfig.orig +--- kernel/init/Kconfig.orig 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/init/Kconfig.orig 2020-12-21 21:59:22.000000000 +0800 +@@ -0,0 +1,2046 @@ ++config DEFCONFIG_LIST ++ string ++ depends on !UML ++ option defconfig_list ++ default "/lib/modules/$(shell,uname -r)/.config" ++ default "/etc/kernel-config" ++ default "/boot/config-$(shell,uname -r)" ++ default ARCH_DEFCONFIG ++ default "arch/$(ARCH)/defconfig" ++ ++config CC_IS_GCC ++ def_bool $(success,$(CC) --version | head -n 1 | grep -q gcc) ++ ++config GCC_VERSION ++ int ++ default $(shell,$(srctree)/scripts/gcc-version.sh -p $(CC) | sed 's/^0*//') if CC_IS_GCC ++ default 0 ++ ++config CC_IS_CLANG ++ def_bool $(success,$(CC) --version | head -n 1 | grep -q clang) ++ ++config CLANG_VERSION ++ int ++ default $(shell,$(srctree)/scripts/clang-version.sh $(CC)) ++ ++config CC_HAS_ASM_GOTO ++ def_bool $(success,$(srctree)/scripts/gcc-goto.sh $(CC)) ++ ++config CONSTRUCTORS ++ bool ++ depends on !UML ++ ++config IRQ_WORK ++ bool ++ ++config BUILDTIME_EXTABLE_SORT ++ bool ++ ++config THREAD_INFO_IN_TASK ++ bool ++ help ++ Select this to move thread_info off the stack into task_struct. To ++ make this work, an arch will need to remove all thread_info fields ++ except flags and fix any runtime bugs. ++ ++ One subtle change that will be needed is to use try_get_task_stack() ++ and put_task_stack() in save_thread_stack_tsk() and get_wchan(). ++ ++menu "General setup" ++ ++config BROKEN ++ bool ++ ++config BROKEN_ON_SMP ++ bool ++ depends on BROKEN || !SMP ++ default y ++ ++config INIT_ENV_ARG_LIMIT ++ int ++ default 32 if !UML ++ default 128 if UML ++ help ++ Maximum of each of the number of arguments and environment ++ variables passed to init from the kernel command line. ++ ++config COMPILE_TEST ++ bool "Compile also drivers which will not load" ++ depends on !UML ++ default n ++ help ++ Some drivers can be compiled on a different platform than they are ++ intended to be run on. Despite they cannot be loaded there (or even ++ when they load they cannot be used due to missing HW support), ++ developers still, opposing to distributors, might want to build such ++ drivers to compile-test them. ++ ++ If you are a developer and want to build everything available, say Y ++ here. If you are a user/distributor, say N here to exclude useless ++ drivers to be distributed. ++ ++config LOCALVERSION ++ string "Local version - append to kernel release" ++ help ++ Append an extra string to the end of your kernel version. ++ This will show up when you type uname, for example. ++ The string you set here will be appended after the contents of ++ any files with a filename matching localversion* in your ++ object and source tree, in that order. Your total string can ++ be a maximum of 64 characters. ++ ++config LOCALVERSION_AUTO ++ bool "Automatically append version information to the version string" ++ default y ++ depends on !COMPILE_TEST ++ help ++ This will try to automatically determine if the current tree is a ++ release tree by looking for git tags that belong to the current ++ top of tree revision. ++ ++ A string of the format -gxxxxxxxx will be added to the localversion ++ if a git-based tree is found. The string generated by this will be ++ appended after any matching localversion* files, and after the value ++ set in CONFIG_LOCALVERSION. ++ ++ (The actual string used here is the first eight characters produced ++ by running the command: ++ ++ $ git rev-parse --verify HEAD ++ ++ which is done within the script "scripts/setlocalversion".) ++ ++config BUILD_SALT ++ string "Build ID Salt" ++ default "" ++ help ++ The build ID is used to link binaries and their debug info. Setting ++ this option will use the value in the calculation of the build id. ++ This is mostly useful for distributions which want to ensure the ++ build is unique between builds. It's safe to leave the default. ++ ++config HAVE_KERNEL_GZIP ++ bool ++ ++config HAVE_KERNEL_BZIP2 ++ bool ++ ++config HAVE_KERNEL_LZMA ++ bool ++ ++config HAVE_KERNEL_XZ ++ bool ++ ++config HAVE_KERNEL_LZO ++ bool ++ ++config HAVE_KERNEL_LZ4 ++ bool ++ ++config HAVE_KERNEL_UNCOMPRESSED ++ bool ++ ++choice ++ prompt "Kernel compression mode" ++ default KERNEL_GZIP ++ depends on HAVE_KERNEL_GZIP || HAVE_KERNEL_BZIP2 || HAVE_KERNEL_LZMA || HAVE_KERNEL_XZ || HAVE_KERNEL_LZO || HAVE_KERNEL_LZ4 || HAVE_KERNEL_UNCOMPRESSED ++ help ++ The linux kernel is a kind of self-extracting executable. ++ Several compression algorithms are available, which differ ++ in efficiency, compression and decompression speed. ++ Compression speed is only relevant when building a kernel. ++ Decompression speed is relevant at each boot. ++ ++ If you have any problems with bzip2 or lzma compressed ++ kernels, mail me (Alain Knaff) . (An older ++ version of this functionality (bzip2 only), for 2.4, was ++ supplied by Christian Ludwig) ++ ++ High compression options are mostly useful for users, who ++ are low on disk space (embedded systems), but for whom ram ++ size matters less. ++ ++ If in doubt, select 'gzip' ++ ++config KERNEL_GZIP ++ bool "Gzip" ++ depends on HAVE_KERNEL_GZIP ++ help ++ The old and tried gzip compression. It provides a good balance ++ between compression ratio and decompression speed. ++ ++config KERNEL_BZIP2 ++ bool "Bzip2" ++ depends on HAVE_KERNEL_BZIP2 ++ help ++ Its compression ratio and speed is intermediate. ++ Decompression speed is slowest among the choices. The kernel ++ size is about 10% smaller with bzip2, in comparison to gzip. ++ Bzip2 uses a large amount of memory. For modern kernels you ++ will need at least 8MB RAM or more for booting. ++ ++config KERNEL_LZMA ++ bool "LZMA" ++ depends on HAVE_KERNEL_LZMA ++ help ++ This compression algorithm's ratio is best. Decompression speed ++ is between gzip and bzip2. Compression is slowest. ++ The kernel size is about 33% smaller with LZMA in comparison to gzip. ++ ++config KERNEL_XZ ++ bool "XZ" ++ depends on HAVE_KERNEL_XZ ++ help ++ XZ uses the LZMA2 algorithm and instruction set specific ++ BCJ filters which can improve compression ratio of executable ++ code. The size of the kernel is about 30% smaller with XZ in ++ comparison to gzip. On architectures for which there is a BCJ ++ filter (i386, x86_64, ARM, IA-64, PowerPC, and SPARC), XZ ++ will create a few percent smaller kernel than plain LZMA. ++ ++ The speed is about the same as with LZMA: The decompression ++ speed of XZ is better than that of bzip2 but worse than gzip ++ and LZO. Compression is slow. ++ ++config KERNEL_LZO ++ bool "LZO" ++ depends on HAVE_KERNEL_LZO ++ help ++ Its compression ratio is the poorest among the choices. The kernel ++ size is about 10% bigger than gzip; however its speed ++ (both compression and decompression) is the fastest. ++ ++config KERNEL_LZ4 ++ bool "LZ4" ++ depends on HAVE_KERNEL_LZ4 ++ help ++ LZ4 is an LZ77-type compressor with a fixed, byte-oriented encoding. ++ A preliminary version of LZ4 de/compression tool is available at ++ . ++ ++ Its compression ratio is worse than LZO. The size of the kernel ++ is about 8% bigger than LZO. But the decompression speed is ++ faster than LZO. ++ ++config KERNEL_UNCOMPRESSED ++ bool "None" ++ depends on HAVE_KERNEL_UNCOMPRESSED ++ help ++ Produce uncompressed kernel image. This option is usually not what ++ you want. It is useful for debugging the kernel in slow simulation ++ environments, where decompressing and moving the kernel is awfully ++ slow. This option allows early boot code to skip the decompressor ++ and jump right at uncompressed kernel image. ++ ++endchoice ++ ++config DEFAULT_HOSTNAME ++ string "Default hostname" ++ default "(none)" ++ help ++ This option determines the default system hostname before userspace ++ calls sethostname(2). The kernel traditionally uses "(none)" here, ++ but you may wish to use a different default here to make a minimal ++ system more usable with less configuration. ++ ++# ++# For some reason microblaze and nios2 hard code SWAP=n. Hopefully we can ++# add proper SWAP support to them, in which case this can be remove. ++# ++config ARCH_NO_SWAP ++ bool ++ ++config SWAP ++ bool "Support for paging of anonymous memory (swap)" ++ depends on MMU && BLOCK && !ARCH_NO_SWAP ++ default y ++ help ++ This option allows you to choose whether you want to have support ++ for so called swap devices or swap files in your kernel that are ++ used to provide more virtual memory than the actual RAM present ++ in your computer. If unsure say Y. ++ ++config SYSVIPC ++ bool "System V IPC" ++ ---help--- ++ Inter Process Communication is a suite of library functions and ++ system calls which let processes (running programs) synchronize and ++ exchange information. It is generally considered to be a good thing, ++ and some programs won't run unless you say Y here. In particular, if ++ you want to run the DOS emulator dosemu under Linux (read the ++ DOSEMU-HOWTO, available from ), ++ you'll need to say Y here. ++ ++ You can find documentation about IPC with "info ipc" and also in ++ section 6.4 of the Linux Programmer's Guide, available from ++ . ++ ++config SYSVIPC_SYSCTL ++ bool ++ depends on SYSVIPC ++ depends on SYSCTL ++ default y ++ ++config POSIX_MQUEUE ++ bool "POSIX Message Queues" ++ depends on NET ++ ---help--- ++ POSIX variant of message queues is a part of IPC. In POSIX message ++ queues every message has a priority which decides about succession ++ of receiving it by a process. If you want to compile and run ++ programs written e.g. for Solaris with use of its POSIX message ++ queues (functions mq_*) say Y here. ++ ++ POSIX message queues are visible as a filesystem called 'mqueue' ++ and can be mounted somewhere if you want to do filesystem ++ operations on message queues. ++ ++ If unsure, say Y. ++ ++config POSIX_MQUEUE_SYSCTL ++ bool ++ depends on POSIX_MQUEUE ++ depends on SYSCTL ++ default y ++ ++config CROSS_MEMORY_ATTACH ++ bool "Enable process_vm_readv/writev syscalls" ++ depends on MMU ++ default y ++ help ++ Enabling this option adds the system calls process_vm_readv and ++ process_vm_writev which allow a process with the correct privileges ++ to directly read from or write to another process' address space. ++ See the man page for more details. ++ ++config USELIB ++ bool "uselib syscall" ++ def_bool ALPHA || M68K || SPARC || X86_32 || IA32_EMULATION ++ help ++ This option enables the uselib syscall, a system call used in the ++ dynamic linker from libc5 and earlier. glibc does not use this ++ system call. If you intend to run programs built on libc5 or ++ earlier, you may need to enable this syscall. Current systems ++ running glibc can safely disable this. ++ ++config AUDIT ++ bool "Auditing support" ++ depends on NET ++ help ++ Enable auditing infrastructure that can be used with another ++ kernel subsystem, such as SELinux (which requires this for ++ logging of avc messages output). System call auditing is included ++ on architectures which support it. ++ ++config HAVE_ARCH_AUDITSYSCALL ++ bool ++ ++config AUDITSYSCALL ++ def_bool y ++ depends on AUDIT && HAVE_ARCH_AUDITSYSCALL ++ ++config AUDIT_WATCH ++ def_bool y ++ depends on AUDITSYSCALL ++ select FSNOTIFY ++ ++config AUDIT_TREE ++ def_bool y ++ depends on AUDITSYSCALL ++ select FSNOTIFY ++ ++config KTASK ++ bool "Multithread CPU-intensive kernel work" ++ depends on SMP ++ default y ++ help ++ Parallelize CPU-intensive kernel work. This feature is designed for ++ big machines that can take advantage of their extra CPUs to speed up ++ large kernel tasks. When enabled, kworker threads may occupy more ++ CPU time during these kernel tasks, but these threads are throttled ++ when other tasks on the system need CPU time. ++ ++source "kernel/irq/Kconfig" ++source "kernel/time/Kconfig" ++source "kernel/Kconfig.preempt" ++ ++menu "CPU/Task time and stats accounting" ++ ++config VIRT_CPU_ACCOUNTING ++ bool ++ ++choice ++ prompt "Cputime accounting" ++ default TICK_CPU_ACCOUNTING if !PPC64 ++ default VIRT_CPU_ACCOUNTING_NATIVE if PPC64 ++ ++# Kind of a stub config for the pure tick based cputime accounting ++config TICK_CPU_ACCOUNTING ++ bool "Simple tick based cputime accounting" ++ depends on !S390 && !NO_HZ_FULL ++ help ++ This is the basic tick based cputime accounting that maintains ++ statistics about user, system and idle time spent on per jiffies ++ granularity. ++ ++ If unsure, say Y. ++ ++config VIRT_CPU_ACCOUNTING_NATIVE ++ bool "Deterministic task and CPU time accounting" ++ depends on HAVE_VIRT_CPU_ACCOUNTING && !NO_HZ_FULL ++ select VIRT_CPU_ACCOUNTING ++ help ++ Select this option to enable more accurate task and CPU time ++ accounting. This is done by reading a CPU counter on each ++ kernel entry and exit and on transitions within the kernel ++ between system, softirq and hardirq state, so there is a ++ small performance impact. In the case of s390 or IBM POWER > 5, ++ this also enables accounting of stolen time on logically-partitioned ++ systems. ++ ++config VIRT_CPU_ACCOUNTING_GEN ++ bool "Full dynticks CPU time accounting" ++ depends on HAVE_CONTEXT_TRACKING ++ depends on HAVE_VIRT_CPU_ACCOUNTING_GEN ++ select VIRT_CPU_ACCOUNTING ++ select CONTEXT_TRACKING ++ help ++ Select this option to enable task and CPU time accounting on full ++ dynticks systems. This accounting is implemented by watching every ++ kernel-user boundaries using the context tracking subsystem. ++ The accounting is thus performed at the expense of some significant ++ overhead. ++ ++ For now this is only useful if you are working on the full ++ dynticks subsystem development. ++ ++ If unsure, say N. ++ ++endchoice ++ ++config IRQ_TIME_ACCOUNTING ++ bool "Fine granularity task level IRQ time accounting" ++ depends on HAVE_IRQ_TIME_ACCOUNTING && !VIRT_CPU_ACCOUNTING_NATIVE ++ help ++ Select this option to enable fine granularity task irq time ++ accounting. This is done by reading a timestamp on each ++ transitions between softirq and hardirq state, so there can be a ++ small performance impact. ++ ++ If in doubt, say N here. ++ ++config HAVE_SCHED_AVG_IRQ ++ def_bool y ++ depends on IRQ_TIME_ACCOUNTING || PARAVIRT_TIME_ACCOUNTING ++ depends on SMP ++ ++config BSD_PROCESS_ACCT ++ bool "BSD Process Accounting" ++ depends on MULTIUSER ++ help ++ If you say Y here, a user level program will be able to instruct the ++ kernel (via a special system call) to write process accounting ++ information to a file: whenever a process exits, information about ++ that process will be appended to the file by the kernel. The ++ information includes things such as creation time, owning user, ++ command name, memory usage, controlling terminal etc. (the complete ++ list is in the struct acct in ). It is ++ up to the user level program to do useful things with this ++ information. This is generally a good idea, so say Y. ++ ++config BSD_PROCESS_ACCT_V3 ++ bool "BSD Process Accounting version 3 file format" ++ depends on BSD_PROCESS_ACCT ++ default n ++ help ++ If you say Y here, the process accounting information is written ++ in a new file format that also logs the process IDs of each ++ process and its parent. Note that this file format is incompatible ++ with previous v0/v1/v2 file formats, so you will need updated tools ++ for processing it. A preliminary version of these tools is available ++ at . ++ ++config TASKSTATS ++ bool "Export task/process statistics through netlink" ++ depends on NET ++ depends on MULTIUSER ++ default n ++ help ++ Export selected statistics for tasks/processes through the ++ generic netlink interface. Unlike BSD process accounting, the ++ statistics are available during the lifetime of tasks/processes as ++ responses to commands. Like BSD accounting, they are sent to user ++ space on task exit. ++ ++ Say N if unsure. ++ ++config TASK_DELAY_ACCT ++ bool "Enable per-task delay accounting" ++ depends on TASKSTATS ++ select SCHED_INFO ++ help ++ Collect information on time spent by a task waiting for system ++ resources like cpu, synchronous block I/O completion and swapping ++ in pages. Such statistics can help in setting a task's priorities ++ relative to other tasks for cpu, io, rss limits etc. ++ ++ Say N if unsure. ++ ++config TASK_XACCT ++ bool "Enable extended accounting over taskstats" ++ depends on TASKSTATS ++ help ++ Collect extended task accounting data and send the data ++ to userland for processing over the taskstats interface. ++ ++ Say N if unsure. ++ ++config TASK_IO_ACCOUNTING ++ bool "Enable per-task storage I/O accounting" ++ depends on TASK_XACCT ++ help ++ Collect information on the number of bytes of storage I/O which this ++ task has caused. ++ ++ Say N if unsure. ++ ++endmenu # "CPU/Task time and stats accounting" ++ ++config CPU_ISOLATION ++ bool "CPU isolation" ++ depends on SMP || COMPILE_TEST ++ default y ++ help ++ Make sure that CPUs running critical tasks are not disturbed by ++ any source of "noise" such as unbound workqueues, timers, kthreads... ++ Unbound jobs get offloaded to housekeeping CPUs. This is driven by ++ the "isolcpus=" boot parameter. ++ ++ Say Y if unsure. ++ ++source "kernel/rcu/Kconfig" ++ ++config BUILD_BIN2C ++ bool ++ default n ++ ++config IKCONFIG ++ tristate "Kernel .config support" ++ select BUILD_BIN2C ++ ---help--- ++ This option enables the complete Linux kernel ".config" file ++ contents to be saved in the kernel. It provides documentation ++ of which kernel options are used in a running kernel or in an ++ on-disk kernel. This information can be extracted from the kernel ++ image file with the script scripts/extract-ikconfig and used as ++ input to rebuild the current kernel or to build another kernel. ++ It can also be extracted from a running kernel by reading ++ /proc/config.gz if enabled (below). ++ ++config IKCONFIG_PROC ++ bool "Enable access to .config through /proc/config.gz" ++ depends on IKCONFIG && PROC_FS ++ ---help--- ++ This option enables access to the kernel configuration file ++ through /proc/config.gz. ++ ++config LOG_BUF_SHIFT ++ int "Kernel log buffer size (16 => 64KB, 17 => 128KB)" ++ range 12 25 ++ default 17 ++ depends on PRINTK ++ help ++ Select the minimal kernel log buffer size as a power of 2. ++ The final size is affected by LOG_CPU_MAX_BUF_SHIFT config ++ parameter, see below. Any higher size also might be forced ++ by "log_buf_len" boot parameter. ++ ++ Examples: ++ 17 => 128 KB ++ 16 => 64 KB ++ 15 => 32 KB ++ 14 => 16 KB ++ 13 => 8 KB ++ 12 => 4 KB ++ ++config LOG_CPU_MAX_BUF_SHIFT ++ int "CPU kernel log buffer size contribution (13 => 8 KB, 17 => 128KB)" ++ depends on SMP ++ range 0 21 ++ default 12 if !BASE_SMALL ++ default 0 if BASE_SMALL ++ depends on PRINTK ++ help ++ This option allows to increase the default ring buffer size ++ according to the number of CPUs. The value defines the contribution ++ of each CPU as a power of 2. The used space is typically only few ++ lines however it might be much more when problems are reported, ++ e.g. backtraces. ++ ++ The increased size means that a new buffer has to be allocated and ++ the original static one is unused. It makes sense only on systems ++ with more CPUs. Therefore this value is used only when the sum of ++ contributions is greater than the half of the default kernel ring ++ buffer as defined by LOG_BUF_SHIFT. The default values are set ++ so that more than 64 CPUs are needed to trigger the allocation. ++ ++ Also this option is ignored when "log_buf_len" kernel parameter is ++ used as it forces an exact (power of two) size of the ring buffer. ++ ++ The number of possible CPUs is used for this computation ignoring ++ hotplugging making the computation optimal for the worst case ++ scenario while allowing a simple algorithm to be used from bootup. ++ ++ Examples shift values and their meaning: ++ 17 => 128 KB for each CPU ++ 16 => 64 KB for each CPU ++ 15 => 32 KB for each CPU ++ 14 => 16 KB for each CPU ++ 13 => 8 KB for each CPU ++ 12 => 4 KB for each CPU ++ ++config PRINTK_SAFE_LOG_BUF_SHIFT ++ int "Temporary per-CPU printk log buffer size (12 => 4KB, 13 => 8KB)" ++ range 10 21 ++ default 13 ++ depends on PRINTK ++ help ++ Select the size of an alternate printk per-CPU buffer where messages ++ printed from usafe contexts are temporary stored. One example would ++ be NMI messages, another one - printk recursion. The messages are ++ copied to the main log buffer in a safe context to avoid a deadlock. ++ The value defines the size as a power of 2. ++ ++ Those messages are rare and limited. The largest one is when ++ a backtrace is printed. It usually fits into 4KB. Select ++ 8KB if you want to be on the safe side. ++ ++ Examples: ++ 17 => 128 KB for each CPU ++ 16 => 64 KB for each CPU ++ 15 => 32 KB for each CPU ++ 14 => 16 KB for each CPU ++ 13 => 8 KB for each CPU ++ 12 => 4 KB for each CPU ++ ++# ++# Architectures with an unreliable sched_clock() should select this: ++# ++config HAVE_UNSTABLE_SCHED_CLOCK ++ bool ++ ++config GENERIC_SCHED_CLOCK ++ bool ++ ++# ++# For architectures that want to enable the support for NUMA-affine scheduler ++# balancing logic: ++# ++config ARCH_SUPPORTS_NUMA_BALANCING ++ bool ++ ++# ++# For architectures that prefer to flush all TLBs after a number of pages ++# are unmapped instead of sending one IPI per page to flush. The architecture ++# must provide guarantees on what happens if a clean TLB cache entry is ++# written after the unmap. Details are in mm/rmap.c near the check for ++# should_defer_flush. The architecture should also consider if the full flush ++# and the refill costs are offset by the savings of sending fewer IPIs. ++config ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH ++ bool ++ ++# ++# For architectures that know their GCC __int128 support is sound ++# ++config ARCH_SUPPORTS_INT128 ++ bool ++ ++# For architectures that (ab)use NUMA to represent different memory regions ++# all cpu-local but of different latencies, such as SuperH. ++# ++config ARCH_WANT_NUMA_VARIABLE_LOCALITY ++ bool ++ ++config NUMA_BALANCING ++ bool "Memory placement aware NUMA scheduler" ++ depends on ARCH_SUPPORTS_NUMA_BALANCING ++ depends on !ARCH_WANT_NUMA_VARIABLE_LOCALITY ++ depends on SMP && NUMA && MIGRATION ++ help ++ This option adds support for automatic NUMA aware memory/task placement. ++ The mechanism is quite primitive and is based on migrating memory when ++ it has references to the node the task is running on. ++ ++ This system will be inactive on UMA systems. ++ ++config NUMA_BALANCING_DEFAULT_ENABLED ++ bool "Automatically enable NUMA aware memory/task placement" ++ default y ++ depends on NUMA_BALANCING ++ help ++ If set, automatic NUMA balancing will be enabled if running on a NUMA ++ machine. ++ ++menuconfig CGROUPS ++ bool "Control Group support" ++ select KERNFS ++ help ++ This option adds support for grouping sets of processes together, for ++ use with process control subsystems such as Cpusets, CFS, memory ++ controls or device isolation. ++ See ++ - Documentation/scheduler/sched-design-CFS.txt (CFS) ++ - Documentation/cgroup-v1/ (features for grouping, isolation ++ and resource control) ++ ++ Say N if unsure. ++ ++if CGROUPS ++ ++config PAGE_COUNTER ++ bool ++ ++config MEMCG ++ bool "Memory controller" ++ select PAGE_COUNTER ++ select EVENTFD ++ help ++ Provides control over the memory footprint of tasks in a cgroup. ++ ++config MEMCG_SWAP ++ bool "Swap controller" ++ depends on MEMCG && SWAP ++ help ++ Provides control over the swap space consumed by tasks in a cgroup. ++ ++config MEMCG_SWAP_ENABLED ++ bool "Swap controller enabled by default" ++ depends on MEMCG_SWAP ++ default y ++ help ++ Memory Resource Controller Swap Extension comes with its price in ++ a bigger memory consumption. General purpose distribution kernels ++ which want to enable the feature but keep it disabled by default ++ and let the user enable it by swapaccount=1 boot command line ++ parameter should have this option unselected. ++ For those who want to have the feature enabled by default should ++ select this option (if, for some reason, they need to disable it ++ then swapaccount=0 does the trick). ++ ++config MEMCG_KMEM ++ bool ++ depends on MEMCG && !SLOB ++ default y ++ ++config BLK_CGROUP ++ bool "IO controller" ++ depends on BLOCK ++ default n ++ ---help--- ++ Generic block IO controller cgroup interface. This is the common ++ cgroup interface which should be used by various IO controlling ++ policies. ++ ++ Currently, CFQ IO scheduler uses it to recognize task groups and ++ control disk bandwidth allocation (proportional time slice allocation) ++ to such task groups. It is also used by bio throttling logic in ++ block layer to implement upper limit in IO rates on a device. ++ ++ This option only enables generic Block IO controller infrastructure. ++ One needs to also enable actual IO controlling logic/policy. For ++ enabling proportional weight division of disk bandwidth in CFQ, set ++ CONFIG_CFQ_GROUP_IOSCHED=y; for enabling throttling policy, set ++ CONFIG_BLK_DEV_THROTTLING=y. ++ ++ See Documentation/cgroup-v1/blkio-controller.txt for more information. ++ ++config DEBUG_BLK_CGROUP ++ bool "IO controller debugging" ++ depends on BLK_CGROUP ++ default n ++ ---help--- ++ Enable some debugging help. Currently it exports additional stat ++ files in a cgroup which can be useful for debugging. ++ ++config CGROUP_WRITEBACK ++ bool ++ depends on MEMCG && BLK_CGROUP ++ default y ++ ++menuconfig CGROUP_SCHED ++ bool "CPU controller" ++ default n ++ help ++ This feature lets CPU scheduler recognize task groups and control CPU ++ bandwidth allocation to such task groups. It uses cgroups to group ++ tasks. ++ ++if CGROUP_SCHED ++config FAIR_GROUP_SCHED ++ bool "Group scheduling for SCHED_OTHER" ++ depends on CGROUP_SCHED ++ default CGROUP_SCHED ++ ++config CFS_BANDWIDTH ++ bool "CPU bandwidth provisioning for FAIR_GROUP_SCHED" ++ depends on FAIR_GROUP_SCHED ++ default n ++ help ++ This option allows users to define CPU bandwidth rates (limits) for ++ tasks running within the fair group scheduler. Groups with no limit ++ set are considered to be unconstrained and will run with no ++ restriction. ++ See Documentation/scheduler/sched-bwc.txt for more information. ++ ++config RT_GROUP_SCHED ++ bool "Group scheduling for SCHED_RR/FIFO" ++ depends on CGROUP_SCHED ++ default n ++ help ++ This feature lets you explicitly allocate real CPU bandwidth ++ to task groups. If enabled, it will also make it impossible to ++ schedule realtime tasks for non-root users until you allocate ++ realtime bandwidth for them. ++ See Documentation/scheduler/sched-rt-group.txt for more information. ++ ++endif #CGROUP_SCHED ++ ++config CGROUP_PIDS ++ bool "PIDs controller" ++ help ++ Provides enforcement of process number limits in the scope of a ++ cgroup. Any attempt to fork more processes than is allowed in the ++ cgroup will fail. PIDs are fundamentally a global resource because it ++ is fairly trivial to reach PID exhaustion before you reach even a ++ conservative kmemcg limit. As a result, it is possible to grind a ++ system to halt without being limited by other cgroup policies. The ++ PIDs controller is designed to stop this from happening. ++ ++ It should be noted that organisational operations (such as attaching ++ to a cgroup hierarchy will *not* be blocked by the PIDs controller), ++ since the PIDs limit only affects a process's ability to fork, not to ++ attach to a cgroup. ++ ++config CGROUP_RDMA ++ bool "RDMA controller" ++ help ++ Provides enforcement of RDMA resources defined by IB stack. ++ It is fairly easy for consumers to exhaust RDMA resources, which ++ can result into resource unavailability to other consumers. ++ RDMA controller is designed to stop this from happening. ++ Attaching processes with active RDMA resources to the cgroup ++ hierarchy is allowed even if can cross the hierarchy's limit. ++ ++config CGROUP_FREEZER ++ bool "Freezer controller" ++ help ++ Provides a way to freeze and unfreeze all tasks in a ++ cgroup. ++ ++ This option affects the ORIGINAL cgroup interface. The cgroup2 memory ++ controller includes important in-kernel memory consumers per default. ++ ++ If you're using cgroup2, say N. ++ ++config CGROUP_HUGETLB ++ bool "HugeTLB controller" ++ depends on HUGETLB_PAGE ++ select PAGE_COUNTER ++ default n ++ help ++ Provides a cgroup controller for HugeTLB pages. ++ When you enable this, you can put a per cgroup limit on HugeTLB usage. ++ The limit is enforced during page fault. Since HugeTLB doesn't ++ support page reclaim, enforcing the limit at page fault time implies ++ that, the application will get SIGBUS signal if it tries to access ++ HugeTLB pages beyond its limit. This requires the application to know ++ beforehand how much HugeTLB pages it would require for its use. The ++ control group is tracked in the third page lru pointer. This means ++ that we cannot use the controller with huge page less than 3 pages. ++ ++config CPUSETS ++ bool "Cpuset controller" ++ depends on SMP ++ help ++ This option will let you create and manage CPUSETs which ++ allow dynamically partitioning a system into sets of CPUs and ++ Memory Nodes and assigning tasks to run only within those sets. ++ This is primarily useful on large SMP or NUMA systems. ++ ++ Say N if unsure. ++ ++config PROC_PID_CPUSET ++ bool "Include legacy /proc//cpuset file" ++ depends on CPUSETS ++ default y ++ ++config CGROUP_DEVICE ++ bool "Device controller" ++ help ++ Provides a cgroup controller implementing whitelists for ++ devices which a process in the cgroup can mknod or open. ++ ++config CGROUP_CPUACCT ++ bool "Simple CPU accounting controller" ++ help ++ Provides a simple controller for monitoring the ++ total CPU consumed by the tasks in a cgroup. ++ ++config CGROUP_PERF ++ bool "Perf controller" ++ depends on PERF_EVENTS ++ help ++ This option extends the perf per-cpu mode to restrict monitoring ++ to threads which belong to the cgroup specified and run on the ++ designated cpu. ++ ++ Say N if unsure. ++ ++config CGROUP_BPF ++ bool "Support for eBPF programs attached to cgroups" ++ depends on BPF_SYSCALL ++ select SOCK_CGROUP_DATA ++ help ++ Allow attaching eBPF programs to a cgroup using the bpf(2) ++ syscall command BPF_PROG_ATTACH. ++ ++ In which context these programs are accessed depends on the type ++ of attachment. For instance, programs that are attached using ++ BPF_CGROUP_INET_INGRESS will be executed on the ingress path of ++ inet sockets. ++ ++config CGROUP_DEBUG ++ bool "Debug controller" ++ default n ++ depends on DEBUG_KERNEL ++ help ++ This option enables a simple controller that exports ++ debugging information about the cgroups framework. This ++ controller is for control cgroup debugging only. Its ++ interfaces are not stable. ++ ++ Say N. ++ ++config SOCK_CGROUP_DATA ++ bool ++ default n ++ ++config CGROUP_FILES ++ bool "Files Resource Controller for Control Groups" ++ select PAGE_COUNTER ++ default n ++ help ++ Provides a cgroup resource controller that limits number of open ++ file handles within a cgroup. ++ This supports catching misbehaving processes and ++ return EMFILE instead of ENOMEM for kernel memory limits. ++ ++endif # CGROUPS ++ ++menuconfig NAMESPACES ++ bool "Namespaces support" if EXPERT ++ depends on MULTIUSER ++ default !EXPERT ++ help ++ Provides the way to make tasks work with different objects using ++ the same id. For example same IPC id may refer to different objects ++ or same user id or pid may refer to different tasks when used in ++ different namespaces. ++ ++if NAMESPACES ++ ++config UTS_NS ++ bool "UTS namespace" ++ default y ++ help ++ In this namespace tasks see different info provided with the ++ uname() system call ++ ++config IPC_NS ++ bool "IPC namespace" ++ depends on (SYSVIPC || POSIX_MQUEUE) ++ default y ++ help ++ In this namespace tasks work with IPC ids which correspond to ++ different IPC objects in different namespaces. ++ ++config USER_NS ++ bool "User namespace" ++ default n ++ help ++ This allows containers, i.e. vservers, to use user namespaces ++ to provide different user info for different servers. ++ ++ When user namespaces are enabled in the kernel it is ++ recommended that the MEMCG option also be enabled and that ++ user-space use the memory control groups to limit the amount ++ of memory a memory unprivileged users can use. ++ ++ If unsure, say N. ++ ++config PID_NS ++ bool "PID Namespaces" ++ default y ++ help ++ Support process id namespaces. This allows having multiple ++ processes with the same pid as long as they are in different ++ pid namespaces. This is a building block of containers. ++ ++config NET_NS ++ bool "Network namespace" ++ depends on NET ++ default y ++ help ++ Allow user space to create what appear to be multiple instances ++ of the network stack. ++ ++endif # NAMESPACES ++ ++config CHECKPOINT_RESTORE ++ bool "Checkpoint/restore support" ++ select PROC_CHILDREN ++ default n ++ help ++ Enables additional kernel features in a sake of checkpoint/restore. ++ In particular it adds auxiliary prctl codes to setup process text, ++ data and heap segment sizes, and a few additional /proc filesystem ++ entries. ++ ++ If unsure, say N here. ++ ++config SCHED_AUTOGROUP ++ bool "Automatic process group scheduling" ++ select CGROUPS ++ select CGROUP_SCHED ++ select FAIR_GROUP_SCHED ++ help ++ This option optimizes the scheduler for common desktop workloads by ++ automatically creating and populating task groups. This separation ++ of workloads isolates aggressive CPU burners (like build jobs) from ++ desktop applications. Task group autogeneration is currently based ++ upon task session. ++ ++config SYSFS_DEPRECATED ++ bool "Enable deprecated sysfs features to support old userspace tools" ++ depends on SYSFS ++ default n ++ help ++ This option adds code that switches the layout of the "block" class ++ devices, to not show up in /sys/class/block/, but only in ++ /sys/block/. ++ ++ This switch is only active when the sysfs.deprecated=1 boot option is ++ passed or the SYSFS_DEPRECATED_V2 option is set. ++ ++ This option allows new kernels to run on old distributions and tools, ++ which might get confused by /sys/class/block/. Since 2007/2008 all ++ major distributions and tools handle this just fine. ++ ++ Recent distributions and userspace tools after 2009/2010 depend on ++ the existence of /sys/class/block/, and will not work with this ++ option enabled. ++ ++ Only if you are using a new kernel on an old distribution, you might ++ need to say Y here. ++ ++config SYSFS_DEPRECATED_V2 ++ bool "Enable deprecated sysfs features by default" ++ default n ++ depends on SYSFS ++ depends on SYSFS_DEPRECATED ++ help ++ Enable deprecated sysfs by default. ++ ++ See the CONFIG_SYSFS_DEPRECATED option for more details about this ++ option. ++ ++ Only if you are using a new kernel on an old distribution, you might ++ need to say Y here. Even then, odds are you would not need it ++ enabled, you can always pass the boot option if absolutely necessary. ++ ++config RELAY ++ bool "Kernel->user space relay support (formerly relayfs)" ++ select IRQ_WORK ++ help ++ This option enables support for relay interface support in ++ certain file systems (such as debugfs). ++ It is designed to provide an efficient mechanism for tools and ++ facilities to relay large amounts of data from kernel space to ++ user space. ++ ++ If unsure, say N. ++ ++config BLK_DEV_INITRD ++ bool "Initial RAM filesystem and RAM disk (initramfs/initrd) support" ++ help ++ The initial RAM filesystem is a ramfs which is loaded by the ++ boot loader (loadlin or lilo) and that is mounted as root ++ before the normal boot procedure. It is typically used to ++ load modules needed to mount the "real" root file system, ++ etc. See for details. ++ ++ If RAM disk support (BLK_DEV_RAM) is also included, this ++ also enables initial RAM disk (initrd) support and adds ++ 15 Kbytes (more on some other architectures) to the kernel size. ++ ++ If unsure say Y. ++ ++if BLK_DEV_INITRD ++ ++source "usr/Kconfig" ++ ++endif ++ ++choice ++ prompt "Compiler optimization level" ++ default CC_OPTIMIZE_FOR_PERFORMANCE ++ ++config CC_OPTIMIZE_FOR_PERFORMANCE ++ bool "Optimize for performance" ++ help ++ This is the default optimization level for the kernel, building ++ with the "-O2" compiler flag for best performance and most ++ helpful compile-time warnings. ++ ++config CC_OPTIMIZE_FOR_SIZE ++ bool "Optimize for size" ++ help ++ Enabling this option will pass "-Os" instead of "-O2" to ++ your compiler resulting in a smaller kernel. ++ ++ If unsure, say N. ++ ++endchoice ++ ++config HAVE_LD_DEAD_CODE_DATA_ELIMINATION ++ bool ++ help ++ This requires that the arch annotates or otherwise protects ++ its external entry points from being discarded. Linker scripts ++ must also merge .text.*, .data.*, and .bss.* correctly into ++ output sections. Care must be taken not to pull in unrelated ++ sections (e.g., '.text.init'). Typically '.' in section names ++ is used to distinguish them from label names / C identifiers. ++ ++config LD_DEAD_CODE_DATA_ELIMINATION ++ bool "Dead code and data elimination (EXPERIMENTAL)" ++ depends on HAVE_LD_DEAD_CODE_DATA_ELIMINATION ++ depends on EXPERT ++ depends on !(FUNCTION_TRACER && CC_IS_GCC && GCC_VERSION < 40800) ++ depends on $(cc-option,-ffunction-sections -fdata-sections) ++ depends on $(ld-option,--gc-sections) ++ help ++ Enable this if you want to do dead code and data elimination with ++ the linker by compiling with -ffunction-sections -fdata-sections, ++ and linking with --gc-sections. ++ ++ This can reduce on disk and in-memory size of the kernel ++ code and static data, particularly for small configs and ++ on small systems. This has the possibility of introducing ++ silently broken kernel if the required annotations are not ++ present. This option is not well tested yet, so use at your ++ own risk. ++ ++config SYSCTL ++ bool ++ ++config ANON_INODES ++ bool ++ ++config HAVE_UID16 ++ bool ++ ++config SYSCTL_EXCEPTION_TRACE ++ bool ++ help ++ Enable support for /proc/sys/debug/exception-trace. ++ ++config SYSCTL_ARCH_UNALIGN_NO_WARN ++ bool ++ help ++ Enable support for /proc/sys/kernel/ignore-unaligned-usertrap ++ Allows arch to define/use @no_unaligned_warning to possibly warn ++ about unaligned access emulation going on under the hood. ++ ++config SYSCTL_ARCH_UNALIGN_ALLOW ++ bool ++ help ++ Enable support for /proc/sys/kernel/unaligned-trap ++ Allows arches to define/use @unaligned_enabled to runtime toggle ++ the unaligned access emulation. ++ see arch/parisc/kernel/unaligned.c for reference ++ ++config HAVE_PCSPKR_PLATFORM ++ bool ++ ++# interpreter that classic socket filters depend on ++config BPF ++ bool ++ ++menuconfig EXPERT ++ bool "Configure standard kernel features (expert users)" ++ # Unhide debug options, to make the on-by-default options visible ++ select DEBUG_KERNEL ++ help ++ This option allows certain base kernel options and settings ++ to be disabled or tweaked. This is for specialized ++ environments which can tolerate a "non-standard" kernel. ++ Only use this if you really know what you are doing. ++ ++config UID16 ++ bool "Enable 16-bit UID system calls" if EXPERT ++ depends on HAVE_UID16 && MULTIUSER ++ default y ++ help ++ This enables the legacy 16-bit UID syscall wrappers. ++ ++config MULTIUSER ++ bool "Multiple users, groups and capabilities support" if EXPERT ++ default y ++ help ++ This option enables support for non-root users, groups and ++ capabilities. ++ ++ If you say N here, all processes will run with UID 0, GID 0, and all ++ possible capabilities. Saying N here also compiles out support for ++ system calls related to UIDs, GIDs, and capabilities, such as setuid, ++ setgid, and capset. ++ ++ If unsure, say Y here. ++ ++config SGETMASK_SYSCALL ++ bool "sgetmask/ssetmask syscalls support" if EXPERT ++ def_bool PARISC || M68K || PPC || MIPS || X86 || SPARC || MICROBLAZE || SUPERH ++ ---help--- ++ sys_sgetmask and sys_ssetmask are obsolete system calls ++ no longer supported in libc but still enabled by default in some ++ architectures. ++ ++ If unsure, leave the default option here. ++ ++config SYSFS_SYSCALL ++ bool "Sysfs syscall support" if EXPERT ++ default y ++ ---help--- ++ sys_sysfs is an obsolete system call no longer supported in libc. ++ Note that disabling this option is more secure but might break ++ compatibility with some systems. ++ ++ If unsure say Y here. ++ ++config SYSCTL_SYSCALL ++ bool "Sysctl syscall support" if EXPERT ++ depends on PROC_SYSCTL ++ default n ++ select SYSCTL ++ ---help--- ++ sys_sysctl uses binary paths that have been found challenging ++ to properly maintain and use. The interface in /proc/sys ++ using paths with ascii names is now the primary path to this ++ information. ++ ++ Almost nothing using the binary sysctl interface so if you are ++ trying to save some space it is probably safe to disable this, ++ making your kernel marginally smaller. ++ ++ If unsure say N here. ++ ++config FHANDLE ++ bool "open by fhandle syscalls" if EXPERT ++ select EXPORTFS ++ default y ++ help ++ If you say Y here, a user level program will be able to map ++ file names to handle and then later use the handle for ++ different file system operations. This is useful in implementing ++ userspace file servers, which now track files using handles instead ++ of names. The handle would remain the same even if file names ++ get renamed. Enables open_by_handle_at(2) and name_to_handle_at(2) ++ syscalls. ++ ++config POSIX_TIMERS ++ bool "Posix Clocks & timers" if EXPERT ++ default y ++ help ++ This includes native support for POSIX timers to the kernel. ++ Some embedded systems have no use for them and therefore they ++ can be configured out to reduce the size of the kernel image. ++ ++ When this option is disabled, the following syscalls won't be ++ available: timer_create, timer_gettime: timer_getoverrun, ++ timer_settime, timer_delete, clock_adjtime, getitimer, ++ setitimer, alarm. Furthermore, the clock_settime, clock_gettime, ++ clock_getres and clock_nanosleep syscalls will be limited to ++ CLOCK_REALTIME, CLOCK_MONOTONIC and CLOCK_BOOTTIME only. ++ ++ If unsure say y. ++ ++config PRINTK ++ default y ++ bool "Enable support for printk" if EXPERT ++ select IRQ_WORK ++ help ++ This option enables normal printk support. Removing it ++ eliminates most of the message strings from the kernel image ++ and makes the kernel more or less silent. As this makes it ++ very difficult to diagnose system problems, saying N here is ++ strongly discouraged. ++ ++config PRINTK_NMI ++ def_bool y ++ depends on PRINTK ++ depends on HAVE_NMI ++ ++config BUG ++ bool "BUG() support" if EXPERT ++ default y ++ help ++ Disabling this option eliminates support for BUG and WARN, reducing ++ the size of your kernel image and potentially quietly ignoring ++ numerous fatal conditions. You should only consider disabling this ++ option for embedded systems with no facilities for reporting errors. ++ Just say Y. ++ ++config ELF_CORE ++ depends on COREDUMP ++ default y ++ bool "Enable ELF core dumps" if EXPERT ++ help ++ Enable support for generating core dumps. Disabling saves about 4k. ++ ++ ++config PCSPKR_PLATFORM ++ bool "Enable PC-Speaker support" if EXPERT ++ depends on HAVE_PCSPKR_PLATFORM ++ select I8253_LOCK ++ default y ++ help ++ This option allows to disable the internal PC-Speaker ++ support, saving some memory. ++ ++config BASE_FULL ++ default y ++ bool "Enable full-sized data structures for core" if EXPERT ++ help ++ Disabling this option reduces the size of miscellaneous core ++ kernel data structures. This saves memory on small machines, ++ but may reduce performance. ++ ++config FUTEX ++ bool "Enable futex support" if EXPERT ++ default y ++ imply RT_MUTEXES ++ help ++ Disabling this option will cause the kernel to be built without ++ support for "fast userspace mutexes". The resulting kernel may not ++ run glibc-based applications correctly. ++ ++config FUTEX_PI ++ bool ++ depends on FUTEX && RT_MUTEXES ++ default y ++ ++config HAVE_FUTEX_CMPXCHG ++ bool ++ depends on FUTEX ++ help ++ Architectures should select this if futex_atomic_cmpxchg_inatomic() ++ is implemented and always working. This removes a couple of runtime ++ checks. ++ ++config EPOLL ++ bool "Enable eventpoll support" if EXPERT ++ default y ++ select ANON_INODES ++ help ++ Disabling this option will cause the kernel to be built without ++ support for epoll family of system calls. ++ ++config SIGNALFD ++ bool "Enable signalfd() system call" if EXPERT ++ select ANON_INODES ++ default y ++ help ++ Enable the signalfd() system call that allows to receive signals ++ on a file descriptor. ++ ++ If unsure, say Y. ++ ++config TIMERFD ++ bool "Enable timerfd() system call" if EXPERT ++ select ANON_INODES ++ default y ++ help ++ Enable the timerfd() system call that allows to receive timer ++ events on a file descriptor. ++ ++ If unsure, say Y. ++ ++config EVENTFD ++ bool "Enable eventfd() system call" if EXPERT ++ select ANON_INODES ++ default y ++ help ++ Enable the eventfd() system call that allows to receive both ++ kernel notification (ie. KAIO) or userspace notifications. ++ ++ If unsure, say Y. ++ ++config SHMEM ++ bool "Use full shmem filesystem" if EXPERT ++ default y ++ depends on MMU ++ help ++ The shmem is an internal filesystem used to manage shared memory. ++ It is backed by swap and manages resource limits. It is also exported ++ to userspace as tmpfs if TMPFS is enabled. Disabling this ++ option replaces shmem and tmpfs with the much simpler ramfs code, ++ which may be appropriate on small systems without swap. ++ ++config AIO ++ bool "Enable AIO support" if EXPERT ++ default y ++ help ++ This option enables POSIX asynchronous I/O which may by used ++ by some high performance threaded applications. Disabling ++ this option saves about 7k. ++ ++config ADVISE_SYSCALLS ++ bool "Enable madvise/fadvise syscalls" if EXPERT ++ default y ++ help ++ This option enables the madvise and fadvise syscalls, used by ++ applications to advise the kernel about their future memory or file ++ usage, improving performance. If building an embedded system where no ++ applications use these syscalls, you can disable this option to save ++ space. ++ ++config MEMBARRIER ++ bool "Enable membarrier() system call" if EXPERT ++ default y ++ help ++ Enable the membarrier() system call that allows issuing memory ++ barriers across all running threads, which can be used to distribute ++ the cost of user-space memory barriers asymmetrically by transforming ++ pairs of memory barriers into pairs consisting of membarrier() and a ++ compiler barrier. ++ ++ If unsure, say Y. ++ ++config KALLSYMS ++ bool "Load all symbols for debugging/ksymoops" if EXPERT ++ default y ++ help ++ Say Y here to let the kernel print out symbolic crash information and ++ symbolic stack backtraces. This increases the size of the kernel ++ somewhat, as all symbols have to be loaded into the kernel image. ++ ++config KALLSYMS_ALL ++ bool "Include all symbols in kallsyms" ++ depends on DEBUG_KERNEL && KALLSYMS ++ help ++ Normally kallsyms only contains the symbols of functions for nicer ++ OOPS messages and backtraces (i.e., symbols from the text and inittext ++ sections). This is sufficient for most cases. And only in very rare ++ cases (e.g., when a debugger is used) all symbols are required (e.g., ++ names of variables from the data sections, etc). ++ ++ This option makes sure that all symbols are loaded into the kernel ++ image (i.e., symbols from all sections) in cost of increased kernel ++ size (depending on the kernel configuration, it may be 300KiB or ++ something like this). ++ ++ Say N unless you really need all symbols. ++ ++config KALLSYMS_ABSOLUTE_PERCPU ++ bool ++ depends on KALLSYMS ++ default X86_64 && SMP ++ ++config KALLSYMS_BASE_RELATIVE ++ bool ++ depends on KALLSYMS ++ default !IA64 ++ help ++ Instead of emitting them as absolute values in the native word size, ++ emit the symbol references in the kallsyms table as 32-bit entries, ++ each containing a relative value in the range [base, base + U32_MAX] ++ or, when KALLSYMS_ABSOLUTE_PERCPU is in effect, each containing either ++ an absolute value in the range [0, S32_MAX] or a relative value in the ++ range [base, base + S32_MAX], where base is the lowest relative symbol ++ address encountered in the image. ++ ++ On 64-bit builds, this reduces the size of the address table by 50%, ++ but more importantly, it results in entries whose values are build ++ time constants, and no relocation pass is required at runtime to fix ++ up the entries based on the runtime load address of the kernel. ++ ++# end of the "standard kernel features (expert users)" menu ++ ++# syscall, maps, verifier ++config BPF_SYSCALL ++ bool "Enable bpf() system call" ++ select ANON_INODES ++ select BPF ++ select IRQ_WORK ++ default n ++ help ++ Enable the bpf() system call that allows to manipulate eBPF ++ programs and maps via file descriptors. ++ ++config BPF_JIT_ALWAYS_ON ++ bool "Permanently enable BPF JIT and remove BPF interpreter" ++ depends on BPF_SYSCALL && HAVE_EBPF_JIT && BPF_JIT ++ help ++ Enables BPF JIT and removes BPF interpreter to avoid ++ speculative execution of BPF instructions by the interpreter ++ ++config USERFAULTFD ++ bool "Enable userfaultfd() system call" ++ select ANON_INODES ++ depends on MMU ++ help ++ Enable the userfaultfd() system call that allows to intercept and ++ handle page faults in userland. ++ ++config ARCH_HAS_MEMBARRIER_CALLBACKS ++ bool ++ ++config ARCH_HAS_MEMBARRIER_SYNC_CORE ++ bool ++ ++config RSEQ ++ bool "Enable rseq() system call" if EXPERT ++ default y ++ depends on HAVE_RSEQ ++ select MEMBARRIER ++ help ++ Enable the restartable sequences system call. It provides a ++ user-space cache for the current CPU number value, which ++ speeds up getting the current CPU number from user-space, ++ as well as an ABI to speed up user-space operations on ++ per-CPU data. ++ ++ If unsure, say Y. ++ ++config DEBUG_RSEQ ++ default n ++ bool "Enabled debugging of rseq() system call" if EXPERT ++ depends on RSEQ && DEBUG_KERNEL ++ help ++ Enable extra debugging checks for the rseq system call. ++ ++ If unsure, say N. ++ ++config EMBEDDED ++ bool "Embedded system" ++ option allnoconfig_y ++ select EXPERT ++ help ++ This option should be enabled if compiling the kernel for ++ an embedded system so certain expert options are available ++ for configuration. ++ ++config HAVE_PERF_EVENTS ++ bool ++ help ++ See tools/perf/design.txt for details. ++ ++config PERF_USE_VMALLOC ++ bool ++ help ++ See tools/perf/design.txt for details ++ ++config PC104 ++ bool "PC/104 support" if EXPERT ++ help ++ Expose PC/104 form factor device drivers and options available for ++ selection and configuration. Enable this option if your target ++ machine has a PC/104 bus. ++ ++menu "Kernel Performance Events And Counters" ++ ++config PERF_EVENTS ++ bool "Kernel performance events and counters" ++ default y if PROFILING ++ depends on HAVE_PERF_EVENTS ++ select ANON_INODES ++ select IRQ_WORK ++ select SRCU ++ help ++ Enable kernel support for various performance events provided ++ by software and hardware. ++ ++ Software events are supported either built-in or via the ++ use of generic tracepoints. ++ ++ Most modern CPUs support performance events via performance ++ counter registers. These registers count the number of certain ++ types of hw events: such as instructions executed, cachemisses ++ suffered, or branches mis-predicted - without slowing down the ++ kernel or applications. These registers can also trigger interrupts ++ when a threshold number of events have passed - and can thus be ++ used to profile the code that runs on that CPU. ++ ++ The Linux Performance Event subsystem provides an abstraction of ++ these software and hardware event capabilities, available via a ++ system call and used by the "perf" utility in tools/perf/. It ++ provides per task and per CPU counters, and it provides event ++ capabilities on top of those. ++ ++ Say Y if unsure. ++ ++config DEBUG_PERF_USE_VMALLOC ++ default n ++ bool "Debug: use vmalloc to back perf mmap() buffers" ++ depends on PERF_EVENTS && DEBUG_KERNEL && !PPC ++ select PERF_USE_VMALLOC ++ help ++ Use vmalloc memory to back perf mmap() buffers. ++ ++ Mostly useful for debugging the vmalloc code on platforms ++ that don't require it. ++ ++ Say N if unsure. ++ ++endmenu ++ ++config VM_EVENT_COUNTERS ++ default y ++ bool "Enable VM event counters for /proc/vmstat" if EXPERT ++ help ++ VM event counters are needed for event counts to be shown. ++ This option allows the disabling of the VM event counters ++ on EXPERT systems. /proc/vmstat will only show page counts ++ if VM event counters are disabled. ++ ++config SLUB_DEBUG ++ default y ++ bool "Enable SLUB debugging support" if EXPERT ++ depends on SLUB && SYSFS ++ help ++ SLUB has extensive debug support features. Disabling these can ++ result in significant savings in code size. This also disables ++ SLUB sysfs support. /sys/slab will not exist and there will be ++ no support for cache validation etc. ++ ++config SLUB_MEMCG_SYSFS_ON ++ default n ++ bool "Enable memcg SLUB sysfs support by default" if EXPERT ++ depends on SLUB && SYSFS && MEMCG ++ help ++ SLUB creates a directory under /sys/kernel/slab for each ++ allocation cache to host info and debug files. If memory ++ cgroup is enabled, each cache can have per memory cgroup ++ caches. SLUB can create the same sysfs directories for these ++ caches under /sys/kernel/slab/CACHE/cgroup but it can lead ++ to a very high number of debug files being created. This is ++ controlled by slub_memcg_sysfs boot parameter and this ++ config option determines the parameter's default value. ++ ++config COMPAT_BRK ++ bool "Disable heap randomization" ++ default y ++ help ++ Randomizing heap placement makes heap exploits harder, but it ++ also breaks ancient binaries (including anything libc5 based). ++ This option changes the bootup default to heap randomization ++ disabled, and can be overridden at runtime by setting ++ /proc/sys/kernel/randomize_va_space to 2. ++ ++ On non-ancient distros (post-2000 ones) N is usually a safe choice. ++ ++choice ++ prompt "Choose SLAB allocator" ++ default SLUB ++ help ++ This option allows to select a slab allocator. ++ ++config SLAB ++ bool "SLAB" ++ select HAVE_HARDENED_USERCOPY_ALLOCATOR ++ help ++ The regular slab allocator that is established and known to work ++ well in all environments. It organizes cache hot objects in ++ per cpu and per node queues. ++ ++config SLUB ++ bool "SLUB (Unqueued Allocator)" ++ select HAVE_HARDENED_USERCOPY_ALLOCATOR ++ help ++ SLUB is a slab allocator that minimizes cache line usage ++ instead of managing queues of cached objects (SLAB approach). ++ Per cpu caching is realized using slabs of objects instead ++ of queues of objects. SLUB can use memory efficiently ++ and has enhanced diagnostics. SLUB is the default choice for ++ a slab allocator. ++ ++config SLOB ++ depends on EXPERT ++ bool "SLOB (Simple Allocator)" ++ help ++ SLOB replaces the stock allocator with a drastically simpler ++ allocator. SLOB is generally more space efficient but ++ does not perform as well on large systems. ++ ++endchoice ++ ++config SLAB_MERGE_DEFAULT ++ bool "Allow slab caches to be merged" ++ default y ++ help ++ For reduced kernel memory fragmentation, slab caches can be ++ merged when they share the same size and other characteristics. ++ This carries a risk of kernel heap overflows being able to ++ overwrite objects from merged caches (and more easily control ++ cache layout), which makes such heap attacks easier to exploit ++ by attackers. By keeping caches unmerged, these kinds of exploits ++ can usually only damage objects in the same cache. To disable ++ merging at runtime, "slab_nomerge" can be passed on the kernel ++ command line. ++ ++config SLAB_FREELIST_RANDOM ++ default n ++ depends on SLAB || SLUB ++ bool "SLAB freelist randomization" ++ help ++ Randomizes the freelist order used on creating new pages. This ++ security feature reduces the predictability of the kernel slab ++ allocator against heap overflows. ++ ++config SLAB_FREELIST_HARDENED ++ bool "Harden slab freelist metadata" ++ depends on SLUB ++ help ++ Many kernel heap attacks try to target slab cache metadata and ++ other infrastructure. This options makes minor performance ++ sacrifies to harden the kernel slab allocator against common ++ freelist exploit methods. ++ ++config SLUB_CPU_PARTIAL ++ default y ++ depends on SLUB && SMP ++ bool "SLUB per cpu partial cache" ++ help ++ Per cpu partial caches accellerate objects allocation and freeing ++ that is local to a processor at the price of more indeterminism ++ in the latency of the free. On overflow these caches will be cleared ++ which requires the taking of locks that may cause latency spikes. ++ Typically one would choose no for a realtime system. ++ ++config MMAP_ALLOW_UNINITIALIZED ++ bool "Allow mmapped anonymous memory to be uninitialized" ++ depends on EXPERT && !MMU ++ default n ++ help ++ Normally, and according to the Linux spec, anonymous memory obtained ++ from mmap() has its contents cleared before it is passed to ++ userspace. Enabling this config option allows you to request that ++ mmap() skip that if it is given an MAP_UNINITIALIZED flag, thus ++ providing a huge performance boost. If this option is not enabled, ++ then the flag will be ignored. ++ ++ This is taken advantage of by uClibc's malloc(), and also by ++ ELF-FDPIC binfmt's brk and stack allocator. ++ ++ Because of the obvious security issues, this option should only be ++ enabled on embedded devices where you control what is run in ++ userspace. Since that isn't generally a problem on no-MMU systems, ++ it is normally safe to say Y here. ++ ++ See Documentation/nommu-mmap.txt for more information. ++ ++config SYSTEM_DATA_VERIFICATION ++ def_bool n ++ select SYSTEM_TRUSTED_KEYRING ++ select KEYS ++ select CRYPTO ++ select CRYPTO_RSA ++ select ASYMMETRIC_KEY_TYPE ++ select ASYMMETRIC_PUBLIC_KEY_SUBTYPE ++ select ASN1 ++ select OID_REGISTRY ++ select X509_CERTIFICATE_PARSER ++ select PKCS7_MESSAGE_PARSER ++ help ++ Provide PKCS#7 message verification using the contents of the system ++ trusted keyring to provide public keys. This then can be used for ++ module verification, kexec image verification and firmware blob ++ verification. ++ ++config PROFILING ++ bool "Profiling support" ++ help ++ Say Y here to enable the extended profiling support mechanisms used ++ by profilers such as OProfile. ++ ++# ++# Place an empty function call at each tracepoint site. Can be ++# dynamically changed for a probe function. ++# ++config TRACEPOINTS ++ bool ++ ++endmenu # General setup ++ ++source "arch/Kconfig" ++ ++config RT_MUTEXES ++ bool ++ ++config BASE_SMALL ++ int ++ default 0 if BASE_FULL ++ default 1 if !BASE_FULL ++ ++menuconfig MODULES ++ bool "Enable loadable module support" ++ option modules ++ help ++ Kernel modules are small pieces of compiled code which can ++ be inserted in the running kernel, rather than being ++ permanently built into the kernel. You use the "modprobe" ++ tool to add (and sometimes remove) them. If you say Y here, ++ many parts of the kernel can be built as modules (by ++ answering M instead of Y where indicated): this is most ++ useful for infrequently used options which are not required ++ for booting. For more information, see the man pages for ++ modprobe, lsmod, modinfo, insmod and rmmod. ++ ++ If you say Y here, you will need to run "make ++ modules_install" to put the modules under /lib/modules/ ++ where modprobe can find them (you may need to be root to do ++ this). ++ ++ If unsure, say Y. ++ ++if MODULES ++ ++config MODULE_FORCE_LOAD ++ bool "Forced module loading" ++ default n ++ help ++ Allow loading of modules without version information (ie. modprobe ++ --force). Forced module loading sets the 'F' (forced) taint flag and ++ is usually a really bad idea. ++ ++config MODULE_UNLOAD ++ bool "Module unloading" ++ help ++ Without this option you will not be able to unload any ++ modules (note that some modules may not be unloadable ++ anyway), which makes your kernel smaller, faster ++ and simpler. If unsure, say Y. ++ ++config MODULE_FORCE_UNLOAD ++ bool "Forced module unloading" ++ depends on MODULE_UNLOAD ++ help ++ This option allows you to force a module to unload, even if the ++ kernel believes it is unsafe: the kernel will remove the module ++ without waiting for anyone to stop using it (using the -f option to ++ rmmod). This is mainly for kernel developers and desperate users. ++ If unsure, say N. ++ ++config MODVERSIONS ++ bool "Module versioning support" ++ help ++ Usually, you have to use modules compiled with your kernel. ++ Saying Y here makes it sometimes possible to use modules ++ compiled for different kernels, by adding enough information ++ to the modules to (hopefully) spot any changes which would ++ make them incompatible with the kernel you are running. If ++ unsure, say N. ++ ++config MODULE_REL_CRCS ++ bool ++ depends on MODVERSIONS ++ ++config MODULE_SRCVERSION_ALL ++ bool "Source checksum for all modules" ++ help ++ Modules which contain a MODULE_VERSION get an extra "srcversion" ++ field inserted into their modinfo section, which contains a ++ sum of the source files which made it. This helps maintainers ++ see exactly which source was used to build a module (since ++ others sometimes change the module source without updating ++ the version). With this option, such a "srcversion" field ++ will be created for all modules. If unsure, say N. ++ ++config MODULE_SIG ++ bool "Module signature verification" ++ depends on MODULES ++ select SYSTEM_DATA_VERIFICATION ++ help ++ Check modules for valid signatures upon load: the signature ++ is simply appended to the module. For more information see ++ . ++ ++ Note that this option adds the OpenSSL development packages as a ++ kernel build dependency so that the signing tool can use its crypto ++ library. ++ ++ !!!WARNING!!! If you enable this option, you MUST make sure that the ++ module DOES NOT get stripped after being signed. This includes the ++ debuginfo strip done by some packagers (such as rpmbuild) and ++ inclusion into an initramfs that wants the module size reduced. ++ ++config MODULE_SIG_FORCE ++ bool "Require modules to be validly signed" ++ depends on MODULE_SIG ++ help ++ Reject unsigned modules or signed modules for which we don't have a ++ key. Without this, such modules will simply taint the kernel. ++ ++config MODULE_SIG_ALL ++ bool "Automatically sign all modules" ++ default y ++ depends on MODULE_SIG ++ help ++ Sign all modules during make modules_install. Without this option, ++ modules must be signed manually, using the scripts/sign-file tool. ++ ++comment "Do not forget to sign required modules with scripts/sign-file" ++ depends on MODULE_SIG_FORCE && !MODULE_SIG_ALL ++ ++choice ++ prompt "Which hash algorithm should modules be signed with?" ++ depends on MODULE_SIG ++ help ++ This determines which sort of hashing algorithm will be used during ++ signature generation. This algorithm _must_ be built into the kernel ++ directly so that signature verification can take place. It is not ++ possible to load a signed module containing the algorithm to check ++ the signature on that module. ++ ++config MODULE_SIG_SHA1 ++ bool "Sign modules with SHA-1" ++ select CRYPTO_SHA1 ++ ++config MODULE_SIG_SHA224 ++ bool "Sign modules with SHA-224" ++ select CRYPTO_SHA256 ++ ++config MODULE_SIG_SHA256 ++ bool "Sign modules with SHA-256" ++ select CRYPTO_SHA256 ++ ++config MODULE_SIG_SHA384 ++ bool "Sign modules with SHA-384" ++ select CRYPTO_SHA512 ++ ++config MODULE_SIG_SHA512 ++ bool "Sign modules with SHA-512" ++ select CRYPTO_SHA512 ++ ++endchoice ++ ++config MODULE_SIG_HASH ++ string ++ depends on MODULE_SIG ++ default "sha1" if MODULE_SIG_SHA1 ++ default "sha224" if MODULE_SIG_SHA224 ++ default "sha256" if MODULE_SIG_SHA256 ++ default "sha384" if MODULE_SIG_SHA384 ++ default "sha512" if MODULE_SIG_SHA512 ++ ++config MODULE_COMPRESS ++ bool "Compress modules on installation" ++ depends on MODULES ++ help ++ ++ Compresses kernel modules when 'make modules_install' is run; gzip or ++ xz depending on "Compression algorithm" below. ++ ++ module-init-tools MAY support gzip, and kmod MAY support gzip and xz. ++ ++ Out-of-tree kernel modules installed using Kbuild will also be ++ compressed upon installation. ++ ++ Note: for modules inside an initrd or initramfs, it's more efficient ++ to compress the whole initrd or initramfs instead. ++ ++ Note: This is fully compatible with signed modules. ++ ++ If in doubt, say N. ++ ++choice ++ prompt "Compression algorithm" ++ depends on MODULE_COMPRESS ++ default MODULE_COMPRESS_GZIP ++ help ++ This determines which sort of compression will be used during ++ 'make modules_install'. ++ ++ GZIP (default) and XZ are supported. ++ ++config MODULE_COMPRESS_GZIP ++ bool "GZIP" ++ ++config MODULE_COMPRESS_XZ ++ bool "XZ" ++ ++endchoice ++ ++config TRIM_UNUSED_KSYMS ++ bool "Trim unused exported kernel symbols" ++ depends on MODULES && !UNUSED_SYMBOLS ++ help ++ The kernel and some modules make many symbols available for ++ other modules to use via EXPORT_SYMBOL() and variants. Depending ++ on the set of modules being selected in your kernel configuration, ++ many of those exported symbols might never be used. ++ ++ This option allows for unused exported symbols to be dropped from ++ the build. In turn, this provides the compiler more opportunities ++ (especially when using LTO) for optimizing the code and reducing ++ binary size. This might have some security advantages as well. ++ ++ If unsure, or if you need to build out-of-tree modules, say N. ++ ++endif # MODULES ++ ++config MODULES_TREE_LOOKUP ++ def_bool y ++ depends on PERF_EVENTS || TRACING ++ ++config INIT_ALL_POSSIBLE ++ bool ++ help ++ Back when each arch used to define their own cpu_online_mask and ++ cpu_possible_mask, some of them chose to initialize cpu_possible_mask ++ with all 1s, and others with all 0s. When they were centralised, ++ it was better to provide this option than to break all the archs ++ and have several arch maintainers pursuing me down dark alleys. ++ ++source "block/Kconfig" ++ ++config PREEMPT_NOTIFIERS ++ bool ++ ++config PADATA ++ depends on SMP ++ bool ++ ++config ASN1 ++ tristate ++ help ++ Build a simple ASN.1 grammar compiler that produces a bytecode output ++ that can be interpreted by the ASN.1 stream decoder and used to ++ inform it as to what tags are to be expected in a stream and what ++ functions to call on what tags. ++ ++source "kernel/Kconfig.locks" ++ ++config ARCH_HAS_SYNC_CORE_BEFORE_USERMODE ++ bool ++ ++# It may be useful for an architecture to override the definitions of the ++# SYSCALL_DEFINE() and __SYSCALL_DEFINEx() macros in ++# and the COMPAT_ variants in , in particular to use a ++# different calling convention for syscalls. They can also override the ++# macros for not-implemented syscalls in kernel/sys_ni.c and ++# kernel/time/posix-stubs.c. All these overrides need to be available in ++# . ++config ARCH_HAS_SYSCALL_WRAPPER ++ def_bool n +diff -uprN kernel/init/main.c kernel_new/init/main.c +--- kernel/init/main.c 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/init/main.c 2021-04-02 09:18:43.444844752 +0800 +@@ -45,6 +45,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -539,7 +540,7 @@ asmlinkage __visible void __init start_k + + cgroup_init_early(); + +- local_irq_disable(); ++ hard_local_irq_disable(); + early_boot_irqs_disabled = true; + + /* +@@ -586,6 +587,7 @@ asmlinkage __visible void __init start_k + setup_log_buf(0); + vfs_caches_init_early(); + sort_main_extable(); ++ __ipipe_init_early(); + trap_init(); + mm_init(); + +@@ -642,6 +644,11 @@ asmlinkage __visible void __init start_k + softirq_init(); + timekeeping_init(); + time_init(); ++ /* ++ * We need to wait for the interrupt and time subsystems to be ++ * initialized before enabling the pipeline. ++ */ ++ __ipipe_init(); + perf_event_init(); + profile_init(); + call_function_init(); +@@ -972,6 +979,7 @@ static void __init do_basic_setup(void) + shmem_init(); + driver_init(); + init_irq_proc(); ++ __ipipe_init_proc(); + do_ctors(); + usermodehelper_enable(); + do_initcalls(); +diff -uprN kernel/init/main.c.orig kernel_new/init/main.c.orig +--- kernel/init/main.c.orig 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/init/main.c.orig 2020-12-21 21:59:22.000000000 +0800 +@@ -0,0 +1,1179 @@ ++/* ++ * linux/init/main.c ++ * ++ * Copyright (C) 1991, 1992 Linus Torvalds ++ * ++ * GK 2/5/95 - Changed to support mounting root fs via NFS ++ * Added initrd & change_root: Werner Almesberger & Hans Lermen, Feb '96 ++ * Moan early if gcc is old, avoiding bogus kernels - Paul Gortmaker, May '96 ++ * Simplified starting of init: Michael A. Griffith ++ */ ++ ++#define DEBUG /* Enable initcall_debug */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++ ++#define CREATE_TRACE_POINTS ++#include ++ ++static int kernel_init(void *); ++ ++extern void init_IRQ(void); ++extern void radix_tree_init(void); ++ ++/* ++ * Debug helper: via this flag we know that we are in 'early bootup code' ++ * where only the boot processor is running with IRQ disabled. This means ++ * two things - IRQ must not be enabled before the flag is cleared and some ++ * operations which are not allowed with IRQ disabled are allowed while the ++ * flag is set. ++ */ ++bool early_boot_irqs_disabled __read_mostly; ++ ++enum system_states system_state __read_mostly; ++EXPORT_SYMBOL(system_state); ++ ++/* ++ * Boot command-line arguments ++ */ ++#define MAX_INIT_ARGS CONFIG_INIT_ENV_ARG_LIMIT ++#define MAX_INIT_ENVS CONFIG_INIT_ENV_ARG_LIMIT ++ ++extern void time_init(void); ++/* Default late time init is NULL. archs can override this later. */ ++void (*__initdata late_time_init)(void); ++ ++/* Untouched command line saved by arch-specific code. */ ++char __initdata boot_command_line[COMMAND_LINE_SIZE]; ++/* Untouched saved command line (eg. for /proc) */ ++char *saved_command_line; ++/* Command line for parameter parsing */ ++static char *static_command_line; ++/* Command line for per-initcall parameter parsing */ ++static char *initcall_command_line; ++ ++static char *execute_command; ++static char *ramdisk_execute_command; ++ ++/* ++ * Used to generate warnings if static_key manipulation functions are used ++ * before jump_label_init is called. ++ */ ++bool static_key_initialized __read_mostly; ++EXPORT_SYMBOL_GPL(static_key_initialized); ++ ++/* ++ * If set, this is an indication to the drivers that reset the underlying ++ * device before going ahead with the initialization otherwise driver might ++ * rely on the BIOS and skip the reset operation. ++ * ++ * This is useful if kernel is booting in an unreliable environment. ++ * For ex. kdump situation where previous kernel has crashed, BIOS has been ++ * skipped and devices will be in unknown state. ++ */ ++unsigned int reset_devices; ++EXPORT_SYMBOL(reset_devices); ++ ++static int __init set_reset_devices(char *str) ++{ ++ reset_devices = 1; ++ return 1; ++} ++ ++__setup("reset_devices", set_reset_devices); ++ ++static const char *argv_init[MAX_INIT_ARGS+2] = { "init", NULL, }; ++const char *envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, }; ++static const char *panic_later, *panic_param; ++ ++extern const struct obs_kernel_param __setup_start[], __setup_end[]; ++ ++static bool __init obsolete_checksetup(char *line) ++{ ++ const struct obs_kernel_param *p; ++ bool had_early_param = false; ++ ++ p = __setup_start; ++ do { ++ int n = strlen(p->str); ++ if (parameqn(line, p->str, n)) { ++ if (p->early) { ++ /* Already done in parse_early_param? ++ * (Needs exact match on param part). ++ * Keep iterating, as we can have early ++ * params and __setups of same names 8( */ ++ if (line[n] == '\0' || line[n] == '=') ++ had_early_param = true; ++ } else if (!p->setup_func) { ++ pr_warn("Parameter %s is obsolete, ignored\n", ++ p->str); ++ return true; ++ } else if (p->setup_func(line + n)) ++ return true; ++ } ++ p++; ++ } while (p < __setup_end); ++ ++ return had_early_param; ++} ++ ++/* ++ * This should be approx 2 Bo*oMips to start (note initial shift), and will ++ * still work even if initially too large, it will just take slightly longer ++ */ ++unsigned long loops_per_jiffy = (1<<12); ++EXPORT_SYMBOL(loops_per_jiffy); ++ ++static int __init debug_kernel(char *str) ++{ ++ console_loglevel = CONSOLE_LOGLEVEL_DEBUG; ++ return 0; ++} ++ ++static int __init quiet_kernel(char *str) ++{ ++ console_loglevel = CONSOLE_LOGLEVEL_QUIET; ++ return 0; ++} ++ ++early_param("debug", debug_kernel); ++early_param("quiet", quiet_kernel); ++ ++static int __init loglevel(char *str) ++{ ++ int newlevel; ++ ++ /* ++ * Only update loglevel value when a correct setting was passed, ++ * to prevent blind crashes (when loglevel being set to 0) that ++ * are quite hard to debug ++ */ ++ if (get_option(&str, &newlevel)) { ++ console_loglevel = newlevel; ++ return 0; ++ } ++ ++ return -EINVAL; ++} ++ ++early_param("loglevel", loglevel); ++ ++/* Change NUL term back to "=", to make "param" the whole string. */ ++static int __init repair_env_string(char *param, char *val, ++ const char *unused, void *arg) ++{ ++ if (val) { ++ /* param=val or param="val"? */ ++ if (val == param+strlen(param)+1) ++ val[-1] = '='; ++ else if (val == param+strlen(param)+2) { ++ val[-2] = '='; ++ memmove(val-1, val, strlen(val)+1); ++ val--; ++ } else ++ BUG(); ++ } ++ return 0; ++} ++ ++/* Anything after -- gets handed straight to init. */ ++static int __init set_init_arg(char *param, char *val, ++ const char *unused, void *arg) ++{ ++ unsigned int i; ++ ++ if (panic_later) ++ return 0; ++ ++ repair_env_string(param, val, unused, NULL); ++ ++ for (i = 0; argv_init[i]; i++) { ++ if (i == MAX_INIT_ARGS) { ++ panic_later = "init"; ++ panic_param = param; ++ return 0; ++ } ++ } ++ argv_init[i] = param; ++ return 0; ++} ++ ++/* ++ * Unknown boot options get handed to init, unless they look like ++ * unused parameters (modprobe will find them in /proc/cmdline). ++ */ ++static int __init unknown_bootoption(char *param, char *val, ++ const char *unused, void *arg) ++{ ++ repair_env_string(param, val, unused, NULL); ++ ++ /* Handle obsolete-style parameters */ ++ if (obsolete_checksetup(param)) ++ return 0; ++ ++ /* Unused module parameter. */ ++ if (strchr(param, '.') && (!val || strchr(param, '.') < val)) ++ return 0; ++ ++ if (panic_later) ++ return 0; ++ ++ if (val) { ++ /* Environment option */ ++ unsigned int i; ++ for (i = 0; envp_init[i]; i++) { ++ if (i == MAX_INIT_ENVS) { ++ panic_later = "env"; ++ panic_param = param; ++ } ++ if (!strncmp(param, envp_init[i], val - param)) ++ break; ++ } ++ envp_init[i] = param; ++ } else { ++ /* Command line option */ ++ unsigned int i; ++ for (i = 0; argv_init[i]; i++) { ++ if (i == MAX_INIT_ARGS) { ++ panic_later = "init"; ++ panic_param = param; ++ } ++ } ++ argv_init[i] = param; ++ } ++ return 0; ++} ++ ++static int __init init_setup(char *str) ++{ ++ unsigned int i; ++ ++ execute_command = str; ++ /* ++ * In case LILO is going to boot us with default command line, ++ * it prepends "auto" before the whole cmdline which makes ++ * the shell think it should execute a script with such name. ++ * So we ignore all arguments entered _before_ init=... [MJ] ++ */ ++ for (i = 1; i < MAX_INIT_ARGS; i++) ++ argv_init[i] = NULL; ++ return 1; ++} ++__setup("init=", init_setup); ++ ++static int __init rdinit_setup(char *str) ++{ ++ unsigned int i; ++ ++ ramdisk_execute_command = str; ++ /* See "auto" comment in init_setup */ ++ for (i = 1; i < MAX_INIT_ARGS; i++) ++ argv_init[i] = NULL; ++ return 1; ++} ++__setup("rdinit=", rdinit_setup); ++ ++#ifndef CONFIG_SMP ++static const unsigned int setup_max_cpus = NR_CPUS; ++static inline void setup_nr_cpu_ids(void) { } ++static inline void smp_prepare_cpus(unsigned int maxcpus) { } ++#endif ++ ++/* ++ * We need to store the untouched command line for future reference. ++ * We also need to store the touched command line since the parameter ++ * parsing is performed in place, and we should allow a component to ++ * store reference of name/value for future reference. ++ */ ++static void __init setup_command_line(char *command_line) ++{ ++ saved_command_line = ++ memblock_virt_alloc(strlen(boot_command_line) + 1, 0); ++ initcall_command_line = ++ memblock_virt_alloc(strlen(boot_command_line) + 1, 0); ++ static_command_line = memblock_virt_alloc(strlen(command_line) + 1, 0); ++ strcpy(saved_command_line, boot_command_line); ++ strcpy(static_command_line, command_line); ++} ++ ++/* ++ * We need to finalize in a non-__init function or else race conditions ++ * between the root thread and the init thread may cause start_kernel to ++ * be reaped by free_initmem before the root thread has proceeded to ++ * cpu_idle. ++ * ++ * gcc-3.4 accidentally inlines this function, so use noinline. ++ */ ++ ++static __initdata DECLARE_COMPLETION(kthreadd_done); ++ ++static noinline void __ref rest_init(void) ++{ ++ struct task_struct *tsk; ++ int pid; ++ ++ rcu_scheduler_starting(); ++ /* ++ * We need to spawn init first so that it obtains pid 1, however ++ * the init task will end up wanting to create kthreads, which, if ++ * we schedule it before we create kthreadd, will OOPS. ++ */ ++ pid = kernel_thread(kernel_init, NULL, CLONE_FS); ++ /* ++ * Pin init on the boot CPU. Task migration is not properly working ++ * until sched_init_smp() has been run. It will set the allowed ++ * CPUs for init to the non isolated CPUs. ++ */ ++ rcu_read_lock(); ++ tsk = find_task_by_pid_ns(pid, &init_pid_ns); ++ set_cpus_allowed_ptr(tsk, cpumask_of(smp_processor_id())); ++ rcu_read_unlock(); ++ ++ numa_default_policy(); ++ pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES); ++ rcu_read_lock(); ++ kthreadd_task = find_task_by_pid_ns(pid, &init_pid_ns); ++ rcu_read_unlock(); ++ ++ /* ++ * Enable might_sleep() and smp_processor_id() checks. ++ * They cannot be enabled earlier because with CONFIG_PREEMPT=y ++ * kernel_thread() would trigger might_sleep() splats. With ++ * CONFIG_PREEMPT_VOLUNTARY=y the init task might have scheduled ++ * already, but it's stuck on the kthreadd_done completion. ++ */ ++ system_state = SYSTEM_SCHEDULING; ++ ++ complete(&kthreadd_done); ++ ++ /* ++ * The boot idle thread must execute schedule() ++ * at least once to get things moving: ++ */ ++ schedule_preempt_disabled(); ++ /* Call into cpu_idle with preempt disabled */ ++ cpu_startup_entry(CPUHP_ONLINE); ++} ++ ++/* Check for early params. */ ++static int __init do_early_param(char *param, char *val, ++ const char *unused, void *arg) ++{ ++ const struct obs_kernel_param *p; ++ ++ for (p = __setup_start; p < __setup_end; p++) { ++ if ((p->early && parameq(param, p->str)) || ++ (strcmp(param, "console") == 0 && ++ strcmp(p->str, "earlycon") == 0) ++ ) { ++ if (p->setup_func(val) != 0) ++ pr_warn("Malformed early option '%s'\n", param); ++ } ++ } ++ /* We accept everything at this stage. */ ++ return 0; ++} ++ ++void __init parse_early_options(char *cmdline) ++{ ++ parse_args("early options", cmdline, NULL, 0, 0, 0, NULL, ++ do_early_param); ++} ++ ++/* Arch code calls this early on, or if not, just before other parsing. */ ++void __init parse_early_param(void) ++{ ++ static int done __initdata; ++ static char tmp_cmdline[COMMAND_LINE_SIZE] __initdata; ++ ++ if (done) ++ return; ++ ++ /* All fall through to do_early_param. */ ++ strlcpy(tmp_cmdline, boot_command_line, COMMAND_LINE_SIZE); ++ parse_early_options(tmp_cmdline); ++ done = 1; ++} ++ ++void __init __weak arch_post_acpi_subsys_init(void) { } ++ ++void __init __weak smp_setup_processor_id(void) ++{ ++} ++ ++# if THREAD_SIZE >= PAGE_SIZE ++void __init __weak thread_stack_cache_init(void) ++{ ++} ++#endif ++ ++void __init __weak mem_encrypt_init(void) { } ++ ++bool initcall_debug; ++core_param(initcall_debug, initcall_debug, bool, 0644); ++ ++#ifdef TRACEPOINTS_ENABLED ++static void __init initcall_debug_enable(void); ++#else ++static inline void initcall_debug_enable(void) ++{ ++} ++#endif ++ ++/* ++ * Set up kernel memory allocators ++ */ ++static void __init mm_init(void) ++{ ++ /* ++ * page_ext requires contiguous pages, ++ * bigger than MAX_ORDER unless SPARSEMEM. ++ */ ++ page_ext_init_flatmem(); ++ mem_init(); ++ kmem_cache_init(); ++ pgtable_init(); ++ vmalloc_init(); ++ ioremap_huge_init(); ++ /* Should be run before the first non-init thread is created */ ++ init_espfix_bsp(); ++ /* Should be run after espfix64 is set up. */ ++ pti_init(); ++} ++ ++asmlinkage __visible void __init start_kernel(void) ++{ ++ char *command_line; ++ char *after_dashes; ++ ++ set_task_stack_end_magic(&init_task); ++ smp_setup_processor_id(); ++ debug_objects_early_init(); ++ ++ cgroup_init_early(); ++ ++ local_irq_disable(); ++ early_boot_irqs_disabled = true; ++ ++ /* ++ * Interrupts are still disabled. Do necessary setups, then ++ * enable them. ++ */ ++ boot_cpu_init(); ++ page_address_init(); ++ pr_notice("%s", linux_banner); ++ setup_arch(&command_line); ++ /* ++ * Set up the the initial canary and entropy after arch ++ * and after adding latent and command line entropy. ++ */ ++ add_latent_entropy(); ++ add_device_randomness(command_line, strlen(command_line)); ++ boot_init_stack_canary(); ++ mm_init_cpumask(&init_mm); ++ setup_command_line(command_line); ++ setup_nr_cpu_ids(); ++ setup_per_cpu_areas(); ++ smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */ ++ boot_cpu_hotplug_init(); ++ ++ build_all_zonelists(NULL); ++ page_alloc_init(); ++ ++ pr_notice("Kernel command line: %s\n", boot_command_line); ++ /* parameters may set static keys */ ++ jump_label_init(); ++ parse_early_param(); ++ after_dashes = parse_args("Booting kernel", ++ static_command_line, __start___param, ++ __stop___param - __start___param, ++ -1, -1, NULL, &unknown_bootoption); ++ if (!IS_ERR_OR_NULL(after_dashes)) ++ parse_args("Setting init args", after_dashes, NULL, 0, -1, -1, ++ NULL, set_init_arg); ++ ++ /* ++ * These use large bootmem allocations and must precede ++ * kmem_cache_init() ++ */ ++ setup_log_buf(0); ++ vfs_caches_init_early(); ++ sort_main_extable(); ++ trap_init(); ++ mm_init(); ++ ++ ftrace_init(); ++ ++ /* trace_printk can be enabled here */ ++ early_trace_init(); ++ ++ /* ++ * Set up the scheduler prior starting any interrupts (such as the ++ * timer interrupt). Full topology setup happens at smp_init() ++ * time - but meanwhile we still have a functioning scheduler. ++ */ ++ sched_init(); ++ /* ++ * Disable preemption - early bootup scheduling is extremely ++ * fragile until we cpu_idle() for the first time. ++ */ ++ preempt_disable(); ++ if (WARN(!irqs_disabled(), ++ "Interrupts were enabled *very* early, fixing it\n")) ++ local_irq_disable(); ++ radix_tree_init(); ++ ++ /* ++ * Set up housekeeping before setting up workqueues to allow the unbound ++ * workqueue to take non-housekeeping into account. ++ */ ++ housekeeping_init(); ++ ++ /* ++ * Allow workqueue creation and work item queueing/cancelling ++ * early. Work item execution depends on kthreads and starts after ++ * workqueue_init(). ++ */ ++ workqueue_init_early(); ++ ++ rcu_init(); ++ ++ /* Trace events are available after this */ ++ trace_init(); ++ ++ if (initcall_debug) ++ initcall_debug_enable(); ++ ++ context_tracking_init(); ++ /* init some links before init_ISA_irqs() */ ++ early_irq_init(); ++ init_IRQ(); ++ tick_init(); ++ rcu_init_nohz(); ++ init_timers(); ++ hrtimers_init(); ++ softirq_init(); ++ timekeeping_init(); ++ time_init(); ++ perf_event_init(); ++ profile_init(); ++ call_function_init(); ++ WARN(!irqs_disabled(), "Interrupts were enabled early\n"); ++ ++ early_boot_irqs_disabled = false; ++ local_irq_enable(); ++ ++ kmem_cache_init_late(); ++ ++ /* ++ * HACK ALERT! This is early. We're enabling the console before ++ * we've done PCI setups etc, and console_init() must be aware of ++ * this. But we do want output early, in case something goes wrong. ++ */ ++ console_init(); ++ if (panic_later) ++ panic("Too many boot %s vars at `%s'", panic_later, ++ panic_param); ++ ++ lockdep_init(); ++ ++ /* ++ * Need to run this when irqs are enabled, because it wants ++ * to self-test [hard/soft]-irqs on/off lock inversion bugs ++ * too: ++ */ ++ locking_selftest(); ++ ++ /* ++ * This needs to be called before any devices perform DMA ++ * operations that might use the SWIOTLB bounce buffers. It will ++ * mark the bounce buffers as decrypted so that their usage will ++ * not cause "plain-text" data to be decrypted when accessed. ++ */ ++ mem_encrypt_init(); ++ ++#ifdef CONFIG_BLK_DEV_INITRD ++ if (initrd_start && !initrd_below_start_ok && ++ page_to_pfn(virt_to_page((void *)initrd_start)) < min_low_pfn) { ++ pr_crit("initrd overwritten (0x%08lx < 0x%08lx) - disabling it.\n", ++ page_to_pfn(virt_to_page((void *)initrd_start)), ++ min_low_pfn); ++ initrd_start = 0; ++ } ++#endif ++ kmemleak_init(); ++ debug_objects_mem_init(); ++ setup_per_cpu_pageset(); ++ numa_policy_init(); ++ acpi_early_init(); ++ if (late_time_init) ++ late_time_init(); ++ sched_clock_init(); ++ calibrate_delay(); ++ pid_idr_init(); ++ anon_vma_init(); ++#ifdef CONFIG_X86 ++ if (efi_enabled(EFI_RUNTIME_SERVICES)) ++ efi_enter_virtual_mode(); ++#endif ++ thread_stack_cache_init(); ++ cred_init(); ++ fork_init(); ++ proc_caches_init(); ++ uts_ns_init(); ++ buffer_init(); ++ key_init(); ++ security_init(); ++ dbg_late_init(); ++ vfs_caches_init(); ++ pagecache_init(); ++ signals_init(); ++ seq_file_init(); ++ proc_root_init(); ++ nsfs_init(); ++ cpuset_init(); ++ cgroup_init(); ++ taskstats_init_early(); ++ delayacct_init(); ++ ++ check_bugs(); ++ ++ acpi_subsystem_init(); ++ arch_post_acpi_subsys_init(); ++ sfi_init_late(); ++ ++ if (efi_enabled(EFI_RUNTIME_SERVICES)) { ++ efi_free_boot_services(); ++ } ++ ++ /* Do the rest non-__init'ed, we're now alive */ ++ rest_init(); ++} ++ ++/* Call all constructor functions linked into the kernel. */ ++static void __init do_ctors(void) ++{ ++#ifdef CONFIG_CONSTRUCTORS ++ ctor_fn_t *fn = (ctor_fn_t *) __ctors_start; ++ ++ for (; fn < (ctor_fn_t *) __ctors_end; fn++) ++ (*fn)(); ++#endif ++} ++ ++#ifdef CONFIG_KALLSYMS ++struct blacklist_entry { ++ struct list_head next; ++ char *buf; ++}; ++ ++static __initdata_or_module LIST_HEAD(blacklisted_initcalls); ++ ++static int __init initcall_blacklist(char *str) ++{ ++ char *str_entry; ++ struct blacklist_entry *entry; ++ ++ /* str argument is a comma-separated list of functions */ ++ do { ++ str_entry = strsep(&str, ","); ++ if (str_entry) { ++ pr_debug("blacklisting initcall %s\n", str_entry); ++ entry = alloc_bootmem(sizeof(*entry)); ++ entry->buf = alloc_bootmem(strlen(str_entry) + 1); ++ strcpy(entry->buf, str_entry); ++ list_add(&entry->next, &blacklisted_initcalls); ++ } ++ } while (str_entry); ++ ++ return 0; ++} ++ ++static bool __init_or_module initcall_blacklisted(initcall_t fn) ++{ ++ struct blacklist_entry *entry; ++ char fn_name[KSYM_SYMBOL_LEN]; ++ unsigned long addr; ++ ++ if (list_empty(&blacklisted_initcalls)) ++ return false; ++ ++ addr = (unsigned long) dereference_function_descriptor(fn); ++ sprint_symbol_no_offset(fn_name, addr); ++ ++ /* ++ * fn will be "function_name [module_name]" where [module_name] is not ++ * displayed for built-in init functions. Strip off the [module_name]. ++ */ ++ strreplace(fn_name, ' ', '\0'); ++ ++ list_for_each_entry(entry, &blacklisted_initcalls, next) { ++ if (!strcmp(fn_name, entry->buf)) { ++ pr_debug("initcall %s blacklisted\n", fn_name); ++ return true; ++ } ++ } ++ ++ return false; ++} ++#else ++static int __init initcall_blacklist(char *str) ++{ ++ pr_warn("initcall_blacklist requires CONFIG_KALLSYMS\n"); ++ return 0; ++} ++ ++static bool __init_or_module initcall_blacklisted(initcall_t fn) ++{ ++ return false; ++} ++#endif ++__setup("initcall_blacklist=", initcall_blacklist); ++ ++static __init_or_module void ++trace_initcall_start_cb(void *data, initcall_t fn) ++{ ++ ktime_t *calltime = (ktime_t *)data; ++ ++ printk(KERN_DEBUG "calling %pF @ %i\n", fn, task_pid_nr(current)); ++ *calltime = ktime_get(); ++} ++ ++static __init_or_module void ++trace_initcall_finish_cb(void *data, initcall_t fn, int ret) ++{ ++ ktime_t *calltime = (ktime_t *)data; ++ ktime_t delta, rettime; ++ unsigned long long duration; ++ ++ rettime = ktime_get(); ++ delta = ktime_sub(rettime, *calltime); ++ duration = (unsigned long long) ktime_to_ns(delta) >> 10; ++ printk(KERN_DEBUG "initcall %pF returned %d after %lld usecs\n", ++ fn, ret, duration); ++} ++ ++static ktime_t initcall_calltime; ++ ++#ifdef TRACEPOINTS_ENABLED ++static void __init initcall_debug_enable(void) ++{ ++ int ret; ++ ++ ret = register_trace_initcall_start(trace_initcall_start_cb, ++ &initcall_calltime); ++ ret |= register_trace_initcall_finish(trace_initcall_finish_cb, ++ &initcall_calltime); ++ WARN(ret, "Failed to register initcall tracepoints\n"); ++} ++# define do_trace_initcall_start trace_initcall_start ++# define do_trace_initcall_finish trace_initcall_finish ++#else ++static inline void do_trace_initcall_start(initcall_t fn) ++{ ++ if (!initcall_debug) ++ return; ++ trace_initcall_start_cb(&initcall_calltime, fn); ++} ++static inline void do_trace_initcall_finish(initcall_t fn, int ret) ++{ ++ if (!initcall_debug) ++ return; ++ trace_initcall_finish_cb(&initcall_calltime, fn, ret); ++} ++#endif /* !TRACEPOINTS_ENABLED */ ++ ++int __init_or_module do_one_initcall(initcall_t fn) ++{ ++ int count = preempt_count(); ++ char msgbuf[64]; ++ int ret; ++ ++ if (initcall_blacklisted(fn)) ++ return -EPERM; ++ ++ do_trace_initcall_start(fn); ++ ret = fn(); ++ do_trace_initcall_finish(fn, ret); ++ ++ msgbuf[0] = 0; ++ ++ if (preempt_count() != count) { ++ sprintf(msgbuf, "preemption imbalance "); ++ preempt_count_set(count); ++ } ++ if (irqs_disabled()) { ++ strlcat(msgbuf, "disabled interrupts ", sizeof(msgbuf)); ++ local_irq_enable(); ++ } ++ WARN(msgbuf[0], "initcall %pF returned with %s\n", fn, msgbuf); ++ ++ add_latent_entropy(); ++ return ret; ++} ++ ++ ++extern initcall_entry_t __initcall_start[]; ++extern initcall_entry_t __initcall0_start[]; ++extern initcall_entry_t __initcall1_start[]; ++extern initcall_entry_t __initcall2_start[]; ++extern initcall_entry_t __initcall3_start[]; ++extern initcall_entry_t __initcall4_start[]; ++extern initcall_entry_t __initcall5_start[]; ++extern initcall_entry_t __initcall6_start[]; ++extern initcall_entry_t __initcall7_start[]; ++extern initcall_entry_t __initcall_end[]; ++ ++static initcall_entry_t *initcall_levels[] __initdata = { ++ __initcall0_start, ++ __initcall1_start, ++ __initcall2_start, ++ __initcall3_start, ++ __initcall4_start, ++ __initcall5_start, ++ __initcall6_start, ++ __initcall7_start, ++ __initcall_end, ++}; ++ ++/* Keep these in sync with initcalls in include/linux/init.h */ ++static char *initcall_level_names[] __initdata = { ++ "pure", ++ "core", ++ "postcore", ++ "arch", ++ "subsys", ++ "fs", ++ "device", ++ "late", ++}; ++ ++static void __init do_initcall_level(int level) ++{ ++ initcall_entry_t *fn; ++ ++ strcpy(initcall_command_line, saved_command_line); ++ parse_args(initcall_level_names[level], ++ initcall_command_line, __start___param, ++ __stop___param - __start___param, ++ level, level, ++ NULL, &repair_env_string); ++ ++ trace_initcall_level(initcall_level_names[level]); ++ for (fn = initcall_levels[level]; fn < initcall_levels[level+1]; fn++) ++ do_one_initcall(initcall_from_entry(fn)); ++} ++ ++static void __init do_initcalls(void) ++{ ++ int level; ++ ++ for (level = 0; level < ARRAY_SIZE(initcall_levels) - 1; level++) ++ do_initcall_level(level); ++} ++ ++/* ++ * Ok, the machine is now initialized. None of the devices ++ * have been touched yet, but the CPU subsystem is up and ++ * running, and memory and process management works. ++ * ++ * Now we can finally start doing some real work.. ++ */ ++static void __init do_basic_setup(void) ++{ ++ cpuset_init_smp(); ++ shmem_init(); ++ driver_init(); ++ init_irq_proc(); ++ do_ctors(); ++ usermodehelper_enable(); ++ do_initcalls(); ++} ++ ++static void __init do_pre_smp_initcalls(void) ++{ ++ initcall_entry_t *fn; ++ ++ trace_initcall_level("early"); ++ for (fn = __initcall_start; fn < __initcall0_start; fn++) ++ do_one_initcall(initcall_from_entry(fn)); ++} ++ ++/* ++ * This function requests modules which should be loaded by default and is ++ * called twice right after initrd is mounted and right before init is ++ * exec'd. If such modules are on either initrd or rootfs, they will be ++ * loaded before control is passed to userland. ++ */ ++void __init load_default_modules(void) ++{ ++ load_default_elevator_module(); ++} ++ ++static int run_init_process(const char *init_filename) ++{ ++ argv_init[0] = init_filename; ++ pr_info("Run %s as init process\n", init_filename); ++ return do_execve(getname_kernel(init_filename), ++ (const char __user *const __user *)argv_init, ++ (const char __user *const __user *)envp_init); ++} ++ ++static int try_to_run_init_process(const char *init_filename) ++{ ++ int ret; ++ ++ ret = run_init_process(init_filename); ++ ++ if (ret && ret != -ENOENT) { ++ pr_err("Starting init: %s exists but couldn't execute it (error %d)\n", ++ init_filename, ret); ++ } ++ ++ return ret; ++} ++ ++static noinline void __init kernel_init_freeable(void); ++ ++#if defined(CONFIG_STRICT_KERNEL_RWX) || defined(CONFIG_STRICT_MODULE_RWX) ++bool rodata_enabled __ro_after_init = true; ++static int __init set_debug_rodata(char *str) ++{ ++ return strtobool(str, &rodata_enabled); ++} ++__setup("rodata=", set_debug_rodata); ++#endif ++ ++#ifdef CONFIG_STRICT_KERNEL_RWX ++static void mark_readonly(void) ++{ ++ if (rodata_enabled) { ++ /* ++ * load_module() results in W+X mappings, which are cleaned up ++ * with call_rcu_sched(). Let's make sure that queued work is ++ * flushed so that we don't hit false positives looking for ++ * insecure pages which are W+X. ++ */ ++ rcu_barrier_sched(); ++ mark_rodata_ro(); ++ rodata_test(); ++ } else ++ pr_info("Kernel memory protection disabled.\n"); ++} ++#else ++static inline void mark_readonly(void) ++{ ++ pr_warn("This architecture does not have kernel memory protection.\n"); ++} ++#endif ++ ++static int __ref kernel_init(void *unused) ++{ ++ int ret; ++ ++ kernel_init_freeable(); ++ /* need to finish all async __init code before freeing the memory */ ++ async_synchronize_full(); ++ ftrace_free_init_mem(); ++ jump_label_invalidate_initmem(); ++ free_initmem(); ++ mark_readonly(); ++ ++ /* ++ * Kernel mappings are now finalized - update the userspace page-table ++ * to finalize PTI. ++ */ ++ pti_finalize(); ++ ++ system_state = SYSTEM_RUNNING; ++ numa_default_policy(); ++ ++ rcu_end_inkernel_boot(); ++ ++ if (ramdisk_execute_command) { ++ ret = run_init_process(ramdisk_execute_command); ++ if (!ret) ++ return 0; ++ pr_err("Failed to execute %s (error %d)\n", ++ ramdisk_execute_command, ret); ++ } ++ ++ /* ++ * We try each of these until one succeeds. ++ * ++ * The Bourne shell can be used instead of init if we are ++ * trying to recover a really broken machine. ++ */ ++ if (execute_command) { ++ ret = run_init_process(execute_command); ++ if (!ret) ++ return 0; ++ panic("Requested init %s failed (error %d).", ++ execute_command, ret); ++ } ++ if (!try_to_run_init_process("/sbin/init") || ++ !try_to_run_init_process("/etc/init") || ++ !try_to_run_init_process("/bin/init") || ++ !try_to_run_init_process("/bin/sh")) ++ return 0; ++ ++ panic("No working init found. Try passing init= option to kernel. " ++ "See Linux Documentation/admin-guide/init.rst for guidance."); ++} ++ ++static noinline void __init kernel_init_freeable(void) ++{ ++ /* ++ * Wait until kthreadd is all set-up. ++ */ ++ wait_for_completion(&kthreadd_done); ++ ++ /* Now the scheduler is fully set up and can do blocking allocations */ ++ gfp_allowed_mask = __GFP_BITS_MASK; ++ ++ /* ++ * init can allocate pages on any node ++ */ ++ set_mems_allowed(node_states[N_MEMORY]); ++ ++ cad_pid = task_pid(current); ++ ++ smp_prepare_cpus(setup_max_cpus); ++ ++ workqueue_init(); ++ ++ init_mm_internals(); ++ ++ do_pre_smp_initcalls(); ++ ++ smp_init(); ++ sched_init_smp(); ++ ktask_init(); ++ ++ page_alloc_init_late(); ++ /* Initialize page ext after all struct pages are initialized. */ ++ page_ext_init(); ++ ++ do_basic_setup(); ++ ++ lockup_detector_init(); ++ ++ /* Open the /dev/console on the rootfs, this should never fail */ ++ if (ksys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0) ++ pr_err("Warning: unable to open an initial console.\n"); ++ ++ (void) ksys_dup(0); ++ (void) ksys_dup(0); ++ /* ++ * check if there is an early userspace init. If yes, let it do all ++ * the work ++ */ ++ ++ if (!ramdisk_execute_command) ++ ramdisk_execute_command = "/init"; ++ ++ if (ksys_access((const char __user *) ++ ramdisk_execute_command, 0) != 0) { ++ ramdisk_execute_command = NULL; ++ prepare_namespace(); ++ } ++ ++ /* ++ * Ok, we have completed the initial bootup, and ++ * we're essentially up and running. Get rid of the ++ * initmem segments and start the user-mode stuff.. ++ * ++ * rootfs is available now, try loading the public keys ++ * and default modules ++ */ ++ ++ integrity_load_keys(); ++ load_default_modules(); ++} +diff -uprN kernel/init/main.c.rej kernel_new/init/main.c.rej +--- kernel/init/main.c.rej 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/init/main.c.rej 2021-04-01 18:28:07.804863121 +0800 +@@ -0,0 +1,14 @@ ++--- init/main.c 2019-12-18 03:36:04.000000000 +0800 +++++ init/main.c 2021-03-22 09:21:43.215415405 +0800 ++@@ -643,6 +645,11 @@ asmlinkage __visible void __init start_k ++ softirq_init(); ++ timekeeping_init(); ++ time_init(); +++ /* +++ * We need to wait for the interrupt and time subsystems to be +++ * initialized before enabling the pipeline. +++ */ +++ __ipipe_init(); ++ printk_safe_init(); ++ perf_event_init(); ++ profile_init(); +diff -uprN kernel/kernel/context_tracking.c kernel_new/kernel/context_tracking.c +--- kernel/kernel/context_tracking.c 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/kernel/context_tracking.c 2021-04-01 18:28:07.804863121 +0800 +@@ -113,7 +113,7 @@ void context_tracking_enter(enum ctx_sta + * helpers are enough to protect RCU uses inside the exception. So + * just return immediately if we detect we are in an IRQ. + */ +- if (in_interrupt()) ++ if (!ipipe_root_p || in_interrupt()) + return; + + local_irq_save(flags); +@@ -169,7 +169,7 @@ void context_tracking_exit(enum ctx_stat + { + unsigned long flags; + +- if (in_interrupt()) ++ if (!ipipe_root_p || in_interrupt()) + return; + + local_irq_save(flags); +diff -uprN kernel/kernel/debug/debug_core.c kernel_new/kernel/debug/debug_core.c +--- kernel/kernel/debug/debug_core.c 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/kernel/debug/debug_core.c 2021-04-01 18:28:07.804863121 +0800 +@@ -119,8 +119,8 @@ static struct kgdb_bkpt kgdb_break[KGDB + */ + atomic_t kgdb_active = ATOMIC_INIT(-1); + EXPORT_SYMBOL_GPL(kgdb_active); +-static DEFINE_RAW_SPINLOCK(dbg_master_lock); +-static DEFINE_RAW_SPINLOCK(dbg_slave_lock); ++static IPIPE_DEFINE_RAW_SPINLOCK(dbg_master_lock); ++static IPIPE_DEFINE_RAW_SPINLOCK(dbg_slave_lock); + + /* + * We use NR_CPUs not PERCPU, in case kgdb is used to debug early +@@ -461,7 +461,9 @@ static int kgdb_reenter_check(struct kgd + static void dbg_touch_watchdogs(void) + { + touch_softlockup_watchdog_sync(); ++#ifndef CONFIG_IPIPE + clocksource_touch_watchdog(); ++#endif + rcu_cpu_stall_reset(); + } + +@@ -492,7 +494,7 @@ acquirelock: + * Interrupts will be restored by the 'trap return' code, except when + * single stepping. + */ +- local_irq_save(flags); ++ flags = hard_local_irq_save(); + + cpu = ks->cpu; + kgdb_info[cpu].debuggerinfo = regs; +@@ -543,7 +545,7 @@ return_normal: + smp_mb__before_atomic(); + atomic_dec(&slaves_in_kgdb); + dbg_touch_watchdogs(); +- local_irq_restore(flags); ++ hard_local_irq_restore(flags); + return 0; + } + cpu_relax(); +@@ -561,7 +563,7 @@ return_normal: + atomic_set(&kgdb_active, -1); + raw_spin_unlock(&dbg_master_lock); + dbg_touch_watchdogs(); +- local_irq_restore(flags); ++ hard_local_irq_restore(flags); + + goto acquirelock; + } +@@ -680,7 +682,7 @@ kgdb_restore: + atomic_set(&kgdb_active, -1); + raw_spin_unlock(&dbg_master_lock); + dbg_touch_watchdogs(); +- local_irq_restore(flags); ++ hard_local_irq_restore(flags); + + return kgdb_info[cpu].ret_state; + } +@@ -799,9 +801,9 @@ static void kgdb_console_write(struct co + if (!kgdb_connected || atomic_read(&kgdb_active) != -1 || dbg_kdb_mode) + return; + +- local_irq_save(flags); ++ flags = hard_local_irq_save(); + gdbstub_msg_write(s, count); +- local_irq_restore(flags); ++ hard_local_irq_restore(flags); + } + + static struct console kgdbcons = { +diff -uprN kernel/kernel/exit.c kernel_new/kernel/exit.c +--- kernel/kernel/exit.c 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/kernel/exit.c 2021-04-01 18:28:07.805863120 +0800 +@@ -56,6 +56,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -827,6 +828,7 @@ void __noreturn do_exit(long code) + */ + raw_spin_lock_irq(&tsk->pi_lock); + raw_spin_unlock_irq(&tsk->pi_lock); ++ __ipipe_report_exit(tsk); + + if (unlikely(in_atomic())) { + pr_info("note: %s[%d] exited with preempt_count %d\n", +diff -uprN kernel/kernel/exit.c.orig kernel_new/kernel/exit.c.orig +--- kernel/kernel/exit.c.orig 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/kernel/exit.c.orig 2020-12-21 21:59:22.000000000 +0800 +@@ -0,0 +1,1771 @@ ++/* ++ * linux/kernel/exit.c ++ * ++ * Copyright (C) 1991, 1992 Linus Torvalds ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include /* for audit_free() */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++ ++static void __unhash_process(struct task_struct *p, bool group_dead) ++{ ++ nr_threads--; ++ detach_pid(p, PIDTYPE_PID); ++ if (group_dead) { ++ detach_pid(p, PIDTYPE_TGID); ++ detach_pid(p, PIDTYPE_PGID); ++ detach_pid(p, PIDTYPE_SID); ++ ++ list_del_rcu(&p->tasks); ++ list_del_init(&p->sibling); ++ __this_cpu_dec(process_counts); ++ } ++ list_del_rcu(&p->thread_group); ++ list_del_rcu(&p->thread_node); ++} ++ ++/* ++ * This function expects the tasklist_lock write-locked. ++ */ ++static void __exit_signal(struct task_struct *tsk) ++{ ++ struct signal_struct *sig = tsk->signal; ++ bool group_dead = thread_group_leader(tsk); ++ struct sighand_struct *sighand; ++ struct tty_struct *uninitialized_var(tty); ++ u64 utime, stime; ++ ++ sighand = rcu_dereference_check(tsk->sighand, ++ lockdep_tasklist_lock_is_held()); ++ spin_lock(&sighand->siglock); ++ ++#ifdef CONFIG_POSIX_TIMERS ++ posix_cpu_timers_exit(tsk); ++ if (group_dead) { ++ posix_cpu_timers_exit_group(tsk); ++ } else { ++ /* ++ * This can only happen if the caller is de_thread(). ++ * FIXME: this is the temporary hack, we should teach ++ * posix-cpu-timers to handle this case correctly. ++ */ ++ if (unlikely(has_group_leader_pid(tsk))) ++ posix_cpu_timers_exit_group(tsk); ++ } ++#endif ++ ++ if (group_dead) { ++ tty = sig->tty; ++ sig->tty = NULL; ++ } else { ++ /* ++ * If there is any task waiting for the group exit ++ * then notify it: ++ */ ++ if (sig->notify_count > 0 && !--sig->notify_count) ++ wake_up_process(sig->group_exit_task); ++ ++ if (tsk == sig->curr_target) ++ sig->curr_target = next_thread(tsk); ++ } ++ ++ add_device_randomness((const void*) &tsk->se.sum_exec_runtime, ++ sizeof(unsigned long long)); ++ ++ /* ++ * Accumulate here the counters for all threads as they die. We could ++ * skip the group leader because it is the last user of signal_struct, ++ * but we want to avoid the race with thread_group_cputime() which can ++ * see the empty ->thread_head list. ++ */ ++ task_cputime(tsk, &utime, &stime); ++ write_seqlock(&sig->stats_lock); ++ sig->utime += utime; ++ sig->stime += stime; ++ sig->gtime += task_gtime(tsk); ++ sig->min_flt += tsk->min_flt; ++ sig->maj_flt += tsk->maj_flt; ++ sig->nvcsw += tsk->nvcsw; ++ sig->nivcsw += tsk->nivcsw; ++ sig->inblock += task_io_get_inblock(tsk); ++ sig->oublock += task_io_get_oublock(tsk); ++ task_io_accounting_add(&sig->ioac, &tsk->ioac); ++ sig->sum_sched_runtime += tsk->se.sum_exec_runtime; ++ sig->nr_threads--; ++ __unhash_process(tsk, group_dead); ++ write_sequnlock(&sig->stats_lock); ++ ++ /* ++ * Do this under ->siglock, we can race with another thread ++ * doing sigqueue_free() if we have SIGQUEUE_PREALLOC signals. ++ */ ++ flush_sigqueue(&tsk->pending); ++ tsk->sighand = NULL; ++ spin_unlock(&sighand->siglock); ++ ++ __cleanup_sighand(sighand); ++ clear_tsk_thread_flag(tsk, TIF_SIGPENDING); ++ if (group_dead) { ++ flush_sigqueue(&sig->shared_pending); ++ tty_kref_put(tty); ++ } ++} ++ ++static void delayed_put_task_struct(struct rcu_head *rhp) ++{ ++ struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); ++ ++ perf_event_delayed_put(tsk); ++ trace_sched_process_free(tsk); ++ put_task_struct(tsk); ++} ++ ++ ++void release_task(struct task_struct *p) ++{ ++ struct task_struct *leader; ++ int zap_leader; ++repeat: ++ /* don't need to get the RCU readlock here - the process is dead and ++ * can't be modifying its own credentials. But shut RCU-lockdep up */ ++ rcu_read_lock(); ++ atomic_dec(&__task_cred(p)->user->processes); ++ rcu_read_unlock(); ++ ++ proc_flush_task(p); ++ cgroup_release(p); ++ ++ write_lock_irq(&tasklist_lock); ++ ptrace_release_task(p); ++ __exit_signal(p); ++ ++ /* ++ * If we are the last non-leader member of the thread ++ * group, and the leader is zombie, then notify the ++ * group leader's parent process. (if it wants notification.) ++ */ ++ zap_leader = 0; ++ leader = p->group_leader; ++ if (leader != p && thread_group_empty(leader) ++ && leader->exit_state == EXIT_ZOMBIE) { ++ /* ++ * If we were the last child thread and the leader has ++ * exited already, and the leader's parent ignores SIGCHLD, ++ * then we are the one who should release the leader. ++ */ ++ zap_leader = do_notify_parent(leader, leader->exit_signal); ++ if (zap_leader) ++ leader->exit_state = EXIT_DEAD; ++ } ++ ++ write_unlock_irq(&tasklist_lock); ++ release_thread(p); ++ call_rcu(&p->rcu, delayed_put_task_struct); ++ ++ p = leader; ++ if (unlikely(zap_leader)) ++ goto repeat; ++} ++ ++/* ++ * Note that if this function returns a valid task_struct pointer (!NULL) ++ * task->usage must remain >0 for the duration of the RCU critical section. ++ */ ++struct task_struct *task_rcu_dereference(struct task_struct **ptask) ++{ ++ struct sighand_struct *sighand; ++ struct task_struct *task; ++ ++ /* ++ * We need to verify that release_task() was not called and thus ++ * delayed_put_task_struct() can't run and drop the last reference ++ * before rcu_read_unlock(). We check task->sighand != NULL, ++ * but we can read the already freed and reused memory. ++ */ ++retry: ++ task = rcu_dereference(*ptask); ++ if (!task) ++ return NULL; ++ ++ probe_kernel_address(&task->sighand, sighand); ++ ++ /* ++ * Pairs with atomic_dec_and_test() in put_task_struct(). If this task ++ * was already freed we can not miss the preceding update of this ++ * pointer. ++ */ ++ smp_rmb(); ++ if (unlikely(task != READ_ONCE(*ptask))) ++ goto retry; ++ ++ /* ++ * We've re-checked that "task == *ptask", now we have two different ++ * cases: ++ * ++ * 1. This is actually the same task/task_struct. In this case ++ * sighand != NULL tells us it is still alive. ++ * ++ * 2. This is another task which got the same memory for task_struct. ++ * We can't know this of course, and we can not trust ++ * sighand != NULL. ++ * ++ * In this case we actually return a random value, but this is ++ * correct. ++ * ++ * If we return NULL - we can pretend that we actually noticed that ++ * *ptask was updated when the previous task has exited. Or pretend ++ * that probe_slab_address(&sighand) reads NULL. ++ * ++ * If we return the new task (because sighand is not NULL for any ++ * reason) - this is fine too. This (new) task can't go away before ++ * another gp pass. ++ * ++ * And note: We could even eliminate the false positive if re-read ++ * task->sighand once again to avoid the falsely NULL. But this case ++ * is very unlikely so we don't care. ++ */ ++ if (!sighand) ++ return NULL; ++ ++ return task; ++} ++ ++void rcuwait_wake_up(struct rcuwait *w) ++{ ++ struct task_struct *task; ++ ++ rcu_read_lock(); ++ ++ /* ++ * Order condition vs @task, such that everything prior to the load ++ * of @task is visible. This is the condition as to why the user called ++ * rcuwait_trywake() in the first place. Pairs with set_current_state() ++ * barrier (A) in rcuwait_wait_event(). ++ * ++ * WAIT WAKE ++ * [S] tsk = current [S] cond = true ++ * MB (A) MB (B) ++ * [L] cond [L] tsk ++ */ ++ smp_mb(); /* (B) */ ++ ++ /* ++ * Avoid using task_rcu_dereference() magic as long as we are careful, ++ * see comment in rcuwait_wait_event() regarding ->exit_state. ++ */ ++ task = rcu_dereference(w->task); ++ if (task) ++ wake_up_process(task); ++ rcu_read_unlock(); ++} ++ ++/* ++ * Determine if a process group is "orphaned", according to the POSIX ++ * definition in 2.2.2.52. Orphaned process groups are not to be affected ++ * by terminal-generated stop signals. Newly orphaned process groups are ++ * to receive a SIGHUP and a SIGCONT. ++ * ++ * "I ask you, have you ever known what it is to be an orphan?" ++ */ ++static int will_become_orphaned_pgrp(struct pid *pgrp, ++ struct task_struct *ignored_task) ++{ ++ struct task_struct *p; ++ ++ do_each_pid_task(pgrp, PIDTYPE_PGID, p) { ++ if ((p == ignored_task) || ++ (p->exit_state && thread_group_empty(p)) || ++ is_global_init(p->real_parent)) ++ continue; ++ ++ if (task_pgrp(p->real_parent) != pgrp && ++ task_session(p->real_parent) == task_session(p)) ++ return 0; ++ } while_each_pid_task(pgrp, PIDTYPE_PGID, p); ++ ++ return 1; ++} ++ ++int is_current_pgrp_orphaned(void) ++{ ++ int retval; ++ ++ read_lock(&tasklist_lock); ++ retval = will_become_orphaned_pgrp(task_pgrp(current), NULL); ++ read_unlock(&tasklist_lock); ++ ++ return retval; ++} ++ ++static bool has_stopped_jobs(struct pid *pgrp) ++{ ++ struct task_struct *p; ++ ++ do_each_pid_task(pgrp, PIDTYPE_PGID, p) { ++ if (p->signal->flags & SIGNAL_STOP_STOPPED) ++ return true; ++ } while_each_pid_task(pgrp, PIDTYPE_PGID, p); ++ ++ return false; ++} ++ ++/* ++ * Check to see if any process groups have become orphaned as ++ * a result of our exiting, and if they have any stopped jobs, ++ * send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2) ++ */ ++static void ++kill_orphaned_pgrp(struct task_struct *tsk, struct task_struct *parent) ++{ ++ struct pid *pgrp = task_pgrp(tsk); ++ struct task_struct *ignored_task = tsk; ++ ++ if (!parent) ++ /* exit: our father is in a different pgrp than ++ * we are and we were the only connection outside. ++ */ ++ parent = tsk->real_parent; ++ else ++ /* reparent: our child is in a different pgrp than ++ * we are, and it was the only connection outside. ++ */ ++ ignored_task = NULL; ++ ++ if (task_pgrp(parent) != pgrp && ++ task_session(parent) == task_session(tsk) && ++ will_become_orphaned_pgrp(pgrp, ignored_task) && ++ has_stopped_jobs(pgrp)) { ++ __kill_pgrp_info(SIGHUP, SEND_SIG_PRIV, pgrp); ++ __kill_pgrp_info(SIGCONT, SEND_SIG_PRIV, pgrp); ++ } ++} ++ ++#ifdef CONFIG_MEMCG ++/* ++ * A task is exiting. If it owned this mm, find a new owner for the mm. ++ */ ++void mm_update_next_owner(struct mm_struct *mm) ++{ ++ struct task_struct *c, *g, *p = current; ++ ++retry: ++ /* ++ * If the exiting or execing task is not the owner, it's ++ * someone else's problem. ++ */ ++ if (mm->owner != p) ++ return; ++ /* ++ * The current owner is exiting/execing and there are no other ++ * candidates. Do not leave the mm pointing to a possibly ++ * freed task structure. ++ */ ++ if (atomic_read(&mm->mm_users) <= 1) { ++ WRITE_ONCE(mm->owner, NULL); ++ return; ++ } ++ ++ read_lock(&tasklist_lock); ++ /* ++ * Search in the children ++ */ ++ list_for_each_entry(c, &p->children, sibling) { ++ if (c->mm == mm) ++ goto assign_new_owner; ++ } ++ ++ /* ++ * Search in the siblings ++ */ ++ list_for_each_entry(c, &p->real_parent->children, sibling) { ++ if (c->mm == mm) ++ goto assign_new_owner; ++ } ++ ++ /* ++ * Search through everything else, we should not get here often. ++ */ ++ for_each_process(g) { ++ if (g->flags & PF_KTHREAD) ++ continue; ++ for_each_thread(g, c) { ++ if (c->mm == mm) ++ goto assign_new_owner; ++ if (c->mm) ++ break; ++ } ++ } ++ read_unlock(&tasklist_lock); ++ /* ++ * We found no owner yet mm_users > 1: this implies that we are ++ * most likely racing with swapoff (try_to_unuse()) or /proc or ++ * ptrace or page migration (get_task_mm()). Mark owner as NULL. ++ */ ++ WRITE_ONCE(mm->owner, NULL); ++ return; ++ ++assign_new_owner: ++ BUG_ON(c == p); ++ get_task_struct(c); ++ /* ++ * The task_lock protects c->mm from changing. ++ * We always want mm->owner->mm == mm ++ */ ++ task_lock(c); ++ /* ++ * Delay read_unlock() till we have the task_lock() ++ * to ensure that c does not slip away underneath us ++ */ ++ read_unlock(&tasklist_lock); ++ if (c->mm != mm) { ++ task_unlock(c); ++ put_task_struct(c); ++ goto retry; ++ } ++ WRITE_ONCE(mm->owner, c); ++ task_unlock(c); ++ put_task_struct(c); ++} ++#endif /* CONFIG_MEMCG */ ++ ++/* ++ * Turn us into a lazy TLB process if we ++ * aren't already.. ++ */ ++static void exit_mm(void) ++{ ++ struct mm_struct *mm = current->mm; ++ struct core_state *core_state; ++ ++ mm_release(current, mm); ++ if (!mm) ++ return; ++ sync_mm_rss(mm); ++ /* ++ * Serialize with any possible pending coredump. ++ * We must hold mmap_sem around checking core_state ++ * and clearing tsk->mm. The core-inducing thread ++ * will increment ->nr_threads for each thread in the ++ * group with ->mm != NULL. ++ */ ++ down_read(&mm->mmap_sem); ++ core_state = mm->core_state; ++ if (core_state) { ++ struct core_thread self; ++ ++ up_read(&mm->mmap_sem); ++ ++ self.task = current; ++ self.next = xchg(&core_state->dumper.next, &self); ++ /* ++ * Implies mb(), the result of xchg() must be visible ++ * to core_state->dumper. ++ */ ++ if (atomic_dec_and_test(&core_state->nr_threads)) ++ complete(&core_state->startup); ++ ++ for (;;) { ++ set_current_state(TASK_UNINTERRUPTIBLE); ++ if (!self.task) /* see coredump_finish() */ ++ break; ++ freezable_schedule(); ++ } ++ __set_current_state(TASK_RUNNING); ++ down_read(&mm->mmap_sem); ++ } ++ mmgrab(mm); ++ BUG_ON(mm != current->active_mm); ++ /* more a memory barrier than a real lock */ ++ task_lock(current); ++ current->mm = NULL; ++ up_read(&mm->mmap_sem); ++ enter_lazy_tlb(mm, current); ++ task_unlock(current); ++ mm_update_next_owner(mm); ++ mmput(mm); ++ if (test_thread_flag(TIF_MEMDIE)) ++ exit_oom_victim(); ++} ++ ++static struct task_struct *find_alive_thread(struct task_struct *p) ++{ ++ struct task_struct *t; ++ ++ for_each_thread(p, t) { ++ if (!(t->flags & PF_EXITING)) ++ return t; ++ } ++ return NULL; ++} ++ ++static struct task_struct *find_child_reaper(struct task_struct *father, ++ struct list_head *dead) ++ __releases(&tasklist_lock) ++ __acquires(&tasklist_lock) ++{ ++ struct pid_namespace *pid_ns = task_active_pid_ns(father); ++ struct task_struct *reaper = pid_ns->child_reaper; ++ struct task_struct *p, *n; ++ ++ if (likely(reaper != father)) ++ return reaper; ++ ++ reaper = find_alive_thread(father); ++ if (reaper) { ++ pid_ns->child_reaper = reaper; ++ return reaper; ++ } ++ ++ write_unlock_irq(&tasklist_lock); ++ ++ list_for_each_entry_safe(p, n, dead, ptrace_entry) { ++ list_del_init(&p->ptrace_entry); ++ release_task(p); ++ } ++ ++ zap_pid_ns_processes(pid_ns); ++ write_lock_irq(&tasklist_lock); ++ ++ return father; ++} ++ ++/* ++ * When we die, we re-parent all our children, and try to: ++ * 1. give them to another thread in our thread group, if such a member exists ++ * 2. give it to the first ancestor process which prctl'd itself as a ++ * child_subreaper for its children (like a service manager) ++ * 3. give it to the init process (PID 1) in our pid namespace ++ */ ++static struct task_struct *find_new_reaper(struct task_struct *father, ++ struct task_struct *child_reaper) ++{ ++ struct task_struct *thread, *reaper; ++ ++ thread = find_alive_thread(father); ++ if (thread) ++ return thread; ++ ++ if (father->signal->has_child_subreaper) { ++ unsigned int ns_level = task_pid(father)->level; ++ /* ++ * Find the first ->is_child_subreaper ancestor in our pid_ns. ++ * We can't check reaper != child_reaper to ensure we do not ++ * cross the namespaces, the exiting parent could be injected ++ * by setns() + fork(). ++ * We check pid->level, this is slightly more efficient than ++ * task_active_pid_ns(reaper) != task_active_pid_ns(father). ++ */ ++ for (reaper = father->real_parent; ++ task_pid(reaper)->level == ns_level; ++ reaper = reaper->real_parent) { ++ if (reaper == &init_task) ++ break; ++ if (!reaper->signal->is_child_subreaper) ++ continue; ++ thread = find_alive_thread(reaper); ++ if (thread) ++ return thread; ++ } ++ } ++ ++ return child_reaper; ++} ++ ++/* ++* Any that need to be release_task'd are put on the @dead list. ++ */ ++static void reparent_leader(struct task_struct *father, struct task_struct *p, ++ struct list_head *dead) ++{ ++ if (unlikely(p->exit_state == EXIT_DEAD)) ++ return; ++ ++ /* We don't want people slaying init. */ ++ p->exit_signal = SIGCHLD; ++ ++ /* If it has exited notify the new parent about this child's death. */ ++ if (!p->ptrace && ++ p->exit_state == EXIT_ZOMBIE && thread_group_empty(p)) { ++ if (do_notify_parent(p, p->exit_signal)) { ++ p->exit_state = EXIT_DEAD; ++ list_add(&p->ptrace_entry, dead); ++ } ++ } ++ ++ kill_orphaned_pgrp(p, father); ++} ++ ++/* ++ * This does two things: ++ * ++ * A. Make init inherit all the child processes ++ * B. Check to see if any process groups have become orphaned ++ * as a result of our exiting, and if they have any stopped ++ * jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2) ++ */ ++static void forget_original_parent(struct task_struct *father, ++ struct list_head *dead) ++{ ++ struct task_struct *p, *t, *reaper; ++ ++ if (unlikely(!list_empty(&father->ptraced))) ++ exit_ptrace(father, dead); ++ ++ /* Can drop and reacquire tasklist_lock */ ++ reaper = find_child_reaper(father, dead); ++ if (list_empty(&father->children)) ++ return; ++ ++ reaper = find_new_reaper(father, reaper); ++ list_for_each_entry(p, &father->children, sibling) { ++ for_each_thread(p, t) { ++ t->real_parent = reaper; ++ BUG_ON((!t->ptrace) != (t->parent == father)); ++ if (likely(!t->ptrace)) ++ t->parent = t->real_parent; ++ if (t->pdeath_signal) ++ group_send_sig_info(t->pdeath_signal, ++ SEND_SIG_NOINFO, t, ++ PIDTYPE_TGID); ++ } ++ /* ++ * If this is a threaded reparent there is no need to ++ * notify anyone anything has happened. ++ */ ++ if (!same_thread_group(reaper, father)) ++ reparent_leader(father, p, dead); ++ } ++ list_splice_tail_init(&father->children, &reaper->children); ++} ++ ++/* ++ * Send signals to all our closest relatives so that they know ++ * to properly mourn us.. ++ */ ++static void exit_notify(struct task_struct *tsk, int group_dead) ++{ ++ bool autoreap; ++ struct task_struct *p, *n; ++ LIST_HEAD(dead); ++ ++ write_lock_irq(&tasklist_lock); ++ forget_original_parent(tsk, &dead); ++ ++ if (group_dead) ++ kill_orphaned_pgrp(tsk->group_leader, NULL); ++ ++ if (unlikely(tsk->ptrace)) { ++ int sig = thread_group_leader(tsk) && ++ thread_group_empty(tsk) && ++ !ptrace_reparented(tsk) ? ++ tsk->exit_signal : SIGCHLD; ++ autoreap = do_notify_parent(tsk, sig); ++ } else if (thread_group_leader(tsk)) { ++ autoreap = thread_group_empty(tsk) && ++ do_notify_parent(tsk, tsk->exit_signal); ++ } else { ++ autoreap = true; ++ } ++ ++ tsk->exit_state = autoreap ? EXIT_DEAD : EXIT_ZOMBIE; ++ if (tsk->exit_state == EXIT_DEAD) ++ list_add(&tsk->ptrace_entry, &dead); ++ ++ /* mt-exec, de_thread() is waiting for group leader */ ++ if (unlikely(tsk->signal->notify_count < 0)) ++ wake_up_process(tsk->signal->group_exit_task); ++ write_unlock_irq(&tasklist_lock); ++ ++ list_for_each_entry_safe(p, n, &dead, ptrace_entry) { ++ list_del_init(&p->ptrace_entry); ++ release_task(p); ++ } ++} ++ ++#ifdef CONFIG_DEBUG_STACK_USAGE ++static void check_stack_usage(void) ++{ ++ static DEFINE_SPINLOCK(low_water_lock); ++ static int lowest_to_date = THREAD_SIZE; ++ unsigned long free; ++ ++ free = stack_not_used(current); ++ ++ if (free >= lowest_to_date) ++ return; ++ ++ spin_lock(&low_water_lock); ++ if (free < lowest_to_date) { ++ pr_info("%s (%d) used greatest stack depth: %lu bytes left\n", ++ current->comm, task_pid_nr(current), free); ++ lowest_to_date = free; ++ } ++ spin_unlock(&low_water_lock); ++} ++#else ++static inline void check_stack_usage(void) {} ++#endif ++ ++void __noreturn do_exit(long code) ++{ ++ struct task_struct *tsk = current; ++ int group_dead; ++ ++ profile_task_exit(tsk); ++ kcov_task_exit(tsk); ++ ++ WARN_ON(blk_needs_flush_plug(tsk)); ++ ++ if (unlikely(in_interrupt())) ++ panic("Aiee, killing interrupt handler!"); ++ if (unlikely(!tsk->pid)) ++ panic("Attempted to kill the idle task!"); ++ ++ /* ++ * If do_exit is called because this processes oopsed, it's possible ++ * that get_fs() was left as KERNEL_DS, so reset it to USER_DS before ++ * continuing. Amongst other possible reasons, this is to prevent ++ * mm_release()->clear_child_tid() from writing to a user-controlled ++ * kernel address. ++ */ ++ set_fs(USER_DS); ++ ++ ptrace_event(PTRACE_EVENT_EXIT, code); ++ ++ validate_creds_for_do_exit(tsk); ++ ++ /* ++ * We're taking recursive faults here in do_exit. Safest is to just ++ * leave this task alone and wait for reboot. ++ */ ++ if (unlikely(tsk->flags & PF_EXITING)) { ++ pr_alert("Fixing recursive fault but reboot is needed!\n"); ++ /* ++ * We can do this unlocked here. The futex code uses ++ * this flag just to verify whether the pi state ++ * cleanup has been done or not. In the worst case it ++ * loops once more. We pretend that the cleanup was ++ * done as there is no way to return. Either the ++ * OWNER_DIED bit is set by now or we push the blocked ++ * task into the wait for ever nirwana as well. ++ */ ++ tsk->flags |= PF_EXITPIDONE; ++ set_current_state(TASK_UNINTERRUPTIBLE); ++ schedule(); ++ } ++ ++ exit_signals(tsk); /* sets PF_EXITING */ ++ /* ++ * Ensure that all new tsk->pi_lock acquisitions must observe ++ * PF_EXITING. Serializes against futex.c:attach_to_pi_owner(). ++ */ ++ smp_mb(); ++ /* ++ * Ensure that we must observe the pi_state in exit_mm() -> ++ * mm_release() -> exit_pi_state_list(). ++ */ ++ raw_spin_lock_irq(&tsk->pi_lock); ++ raw_spin_unlock_irq(&tsk->pi_lock); ++ ++ if (unlikely(in_atomic())) { ++ pr_info("note: %s[%d] exited with preempt_count %d\n", ++ current->comm, task_pid_nr(current), ++ preempt_count()); ++ preempt_count_set(PREEMPT_ENABLED); ++ } ++ ++ /* sync mm's RSS info before statistics gathering */ ++ if (tsk->mm) ++ sync_mm_rss(tsk->mm); ++ acct_update_integrals(tsk); ++ group_dead = atomic_dec_and_test(&tsk->signal->live); ++ if (group_dead) { ++ /* ++ * If the last thread of global init has exited, panic ++ * immediately to get a useable coredump. ++ */ ++ if (unlikely(is_global_init(tsk))) ++ panic("Attempted to kill init! exitcode=0x%08x\n", ++ tsk->signal->group_exit_code ?: (int)code); ++ ++#ifdef CONFIG_POSIX_TIMERS ++ hrtimer_cancel(&tsk->signal->real_timer); ++ exit_itimers(tsk->signal); ++#endif ++ if (tsk->mm) ++ setmax_mm_hiwater_rss(&tsk->signal->maxrss, tsk->mm); ++ } ++ acct_collect(code, group_dead); ++ if (group_dead) ++ tty_audit_exit(); ++ audit_free(tsk); ++ ++ tsk->exit_code = code; ++ taskstats_exit(tsk, group_dead); ++ ++ exit_mm(); ++ ++ if (group_dead) ++ acct_process(); ++ trace_sched_process_exit(tsk); ++ ++ exit_sem(tsk); ++ exit_shm(tsk); ++ exit_files(tsk); ++ exit_fs(tsk); ++ if (group_dead) ++ disassociate_ctty(1); ++ exit_task_namespaces(tsk); ++ exit_task_work(tsk); ++ exit_thread(tsk); ++ ++ /* ++ * Flush inherited counters to the parent - before the parent ++ * gets woken up by child-exit notifications. ++ * ++ * because of cgroup mode, must be called before cgroup_exit() ++ */ ++ perf_event_exit_task(tsk); ++ ++ sched_autogroup_exit_task(tsk); ++ cgroup_exit(tsk); ++ ++ /* ++ * FIXME: do that only when needed, using sched_exit tracepoint ++ */ ++ flush_ptrace_hw_breakpoint(tsk); ++ ++ exit_tasks_rcu_start(); ++ exit_notify(tsk, group_dead); ++ proc_exit_connector(tsk); ++ mpol_put_task_policy(tsk); ++#ifdef CONFIG_FUTEX ++ if (unlikely(current->pi_state_cache)) ++ kfree(current->pi_state_cache); ++#endif ++ /* ++ * Make sure we are holding no locks: ++ */ ++ debug_check_no_locks_held(); ++ /* ++ * We can do this unlocked here. The futex code uses this flag ++ * just to verify whether the pi state cleanup has been done ++ * or not. In the worst case it loops once more. ++ */ ++ tsk->flags |= PF_EXITPIDONE; ++ ++ if (tsk->io_context) ++ exit_io_context(tsk); ++ ++ if (tsk->splice_pipe) ++ free_pipe_info(tsk->splice_pipe); ++ ++ if (tsk->task_frag.page) ++ put_page(tsk->task_frag.page); ++ ++ validate_creds_for_do_exit(tsk); ++ ++ check_stack_usage(); ++ preempt_disable(); ++ if (tsk->nr_dirtied) ++ __this_cpu_add(dirty_throttle_leaks, tsk->nr_dirtied); ++ exit_rcu(); ++ exit_tasks_rcu_finish(); ++ ++ lockdep_free_task(tsk); ++ do_task_dead(); ++} ++EXPORT_SYMBOL_GPL(do_exit); ++ ++void complete_and_exit(struct completion *comp, long code) ++{ ++ if (comp) ++ complete(comp); ++ ++ do_exit(code); ++} ++EXPORT_SYMBOL(complete_and_exit); ++ ++SYSCALL_DEFINE1(exit, int, error_code) ++{ ++ do_exit((error_code&0xff)<<8); ++} ++ ++/* ++ * Take down every thread in the group. This is called by fatal signals ++ * as well as by sys_exit_group (below). ++ */ ++void ++do_group_exit(int exit_code) ++{ ++ struct signal_struct *sig = current->signal; ++ ++ BUG_ON(exit_code & 0x80); /* core dumps don't get here */ ++ ++ if (signal_group_exit(sig)) ++ exit_code = sig->group_exit_code; ++ else if (!thread_group_empty(current)) { ++ struct sighand_struct *const sighand = current->sighand; ++ ++ spin_lock_irq(&sighand->siglock); ++ if (signal_group_exit(sig)) ++ /* Another thread got here before we took the lock. */ ++ exit_code = sig->group_exit_code; ++ else { ++ sig->group_exit_code = exit_code; ++ sig->flags = SIGNAL_GROUP_EXIT; ++ zap_other_threads(current); ++ } ++ spin_unlock_irq(&sighand->siglock); ++ } ++ ++ do_exit(exit_code); ++ /* NOTREACHED */ ++} ++ ++/* ++ * this kills every thread in the thread group. Note that any externally ++ * wait4()-ing process will get the correct exit code - even if this ++ * thread is not the thread group leader. ++ */ ++SYSCALL_DEFINE1(exit_group, int, error_code) ++{ ++ do_group_exit((error_code & 0xff) << 8); ++ /* NOTREACHED */ ++ return 0; ++} ++ ++struct waitid_info { ++ pid_t pid; ++ uid_t uid; ++ int status; ++ int cause; ++}; ++ ++struct wait_opts { ++ enum pid_type wo_type; ++ int wo_flags; ++ struct pid *wo_pid; ++ ++ struct waitid_info *wo_info; ++ int wo_stat; ++ struct rusage *wo_rusage; ++ ++ wait_queue_entry_t child_wait; ++ int notask_error; ++}; ++ ++static int eligible_pid(struct wait_opts *wo, struct task_struct *p) ++{ ++ return wo->wo_type == PIDTYPE_MAX || ++ task_pid_type(p, wo->wo_type) == wo->wo_pid; ++} ++ ++static int ++eligible_child(struct wait_opts *wo, bool ptrace, struct task_struct *p) ++{ ++ if (!eligible_pid(wo, p)) ++ return 0; ++ ++ /* ++ * Wait for all children (clone and not) if __WALL is set or ++ * if it is traced by us. ++ */ ++ if (ptrace || (wo->wo_flags & __WALL)) ++ return 1; ++ ++ /* ++ * Otherwise, wait for clone children *only* if __WCLONE is set; ++ * otherwise, wait for non-clone children *only*. ++ * ++ * Note: a "clone" child here is one that reports to its parent ++ * using a signal other than SIGCHLD, or a non-leader thread which ++ * we can only see if it is traced by us. ++ */ ++ if ((p->exit_signal != SIGCHLD) ^ !!(wo->wo_flags & __WCLONE)) ++ return 0; ++ ++ return 1; ++} ++ ++/* ++ * Handle sys_wait4 work for one task in state EXIT_ZOMBIE. We hold ++ * read_lock(&tasklist_lock) on entry. If we return zero, we still hold ++ * the lock and this task is uninteresting. If we return nonzero, we have ++ * released the lock and the system call should return. ++ */ ++static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) ++{ ++ int state, status; ++ pid_t pid = task_pid_vnr(p); ++ uid_t uid = from_kuid_munged(current_user_ns(), task_uid(p)); ++ struct waitid_info *infop; ++ ++ if (!likely(wo->wo_flags & WEXITED)) ++ return 0; ++ ++ if (unlikely(wo->wo_flags & WNOWAIT)) { ++ status = p->exit_code; ++ get_task_struct(p); ++ read_unlock(&tasklist_lock); ++ sched_annotate_sleep(); ++ if (wo->wo_rusage) ++ getrusage(p, RUSAGE_BOTH, wo->wo_rusage); ++ put_task_struct(p); ++ goto out_info; ++ } ++ /* ++ * Move the task's state to DEAD/TRACE, only one thread can do this. ++ */ ++ state = (ptrace_reparented(p) && thread_group_leader(p)) ? ++ EXIT_TRACE : EXIT_DEAD; ++ if (cmpxchg(&p->exit_state, EXIT_ZOMBIE, state) != EXIT_ZOMBIE) ++ return 0; ++ /* ++ * We own this thread, nobody else can reap it. ++ */ ++ read_unlock(&tasklist_lock); ++ sched_annotate_sleep(); ++ ++ /* ++ * Check thread_group_leader() to exclude the traced sub-threads. ++ */ ++ if (state == EXIT_DEAD && thread_group_leader(p)) { ++ struct signal_struct *sig = p->signal; ++ struct signal_struct *psig = current->signal; ++ unsigned long maxrss; ++ u64 tgutime, tgstime; ++ ++ /* ++ * The resource counters for the group leader are in its ++ * own task_struct. Those for dead threads in the group ++ * are in its signal_struct, as are those for the child ++ * processes it has previously reaped. All these ++ * accumulate in the parent's signal_struct c* fields. ++ * ++ * We don't bother to take a lock here to protect these ++ * p->signal fields because the whole thread group is dead ++ * and nobody can change them. ++ * ++ * psig->stats_lock also protects us from our sub-theads ++ * which can reap other children at the same time. Until ++ * we change k_getrusage()-like users to rely on this lock ++ * we have to take ->siglock as well. ++ * ++ * We use thread_group_cputime_adjusted() to get times for ++ * the thread group, which consolidates times for all threads ++ * in the group including the group leader. ++ */ ++ thread_group_cputime_adjusted(p, &tgutime, &tgstime); ++ spin_lock_irq(¤t->sighand->siglock); ++ write_seqlock(&psig->stats_lock); ++ psig->cutime += tgutime + sig->cutime; ++ psig->cstime += tgstime + sig->cstime; ++ psig->cgtime += task_gtime(p) + sig->gtime + sig->cgtime; ++ psig->cmin_flt += ++ p->min_flt + sig->min_flt + sig->cmin_flt; ++ psig->cmaj_flt += ++ p->maj_flt + sig->maj_flt + sig->cmaj_flt; ++ psig->cnvcsw += ++ p->nvcsw + sig->nvcsw + sig->cnvcsw; ++ psig->cnivcsw += ++ p->nivcsw + sig->nivcsw + sig->cnivcsw; ++ psig->cinblock += ++ task_io_get_inblock(p) + ++ sig->inblock + sig->cinblock; ++ psig->coublock += ++ task_io_get_oublock(p) + ++ sig->oublock + sig->coublock; ++ maxrss = max(sig->maxrss, sig->cmaxrss); ++ if (psig->cmaxrss < maxrss) ++ psig->cmaxrss = maxrss; ++ task_io_accounting_add(&psig->ioac, &p->ioac); ++ task_io_accounting_add(&psig->ioac, &sig->ioac); ++ write_sequnlock(&psig->stats_lock); ++ spin_unlock_irq(¤t->sighand->siglock); ++ } ++ ++ if (wo->wo_rusage) ++ getrusage(p, RUSAGE_BOTH, wo->wo_rusage); ++ status = (p->signal->flags & SIGNAL_GROUP_EXIT) ++ ? p->signal->group_exit_code : p->exit_code; ++ wo->wo_stat = status; ++ ++ if (state == EXIT_TRACE) { ++ write_lock_irq(&tasklist_lock); ++ /* We dropped tasklist, ptracer could die and untrace */ ++ ptrace_unlink(p); ++ ++ /* If parent wants a zombie, don't release it now */ ++ state = EXIT_ZOMBIE; ++ if (do_notify_parent(p, p->exit_signal)) ++ state = EXIT_DEAD; ++ p->exit_state = state; ++ write_unlock_irq(&tasklist_lock); ++ } ++ if (state == EXIT_DEAD) ++ release_task(p); ++ ++out_info: ++ infop = wo->wo_info; ++ if (infop) { ++ if ((status & 0x7f) == 0) { ++ infop->cause = CLD_EXITED; ++ infop->status = status >> 8; ++ } else { ++ infop->cause = (status & 0x80) ? CLD_DUMPED : CLD_KILLED; ++ infop->status = status & 0x7f; ++ } ++ infop->pid = pid; ++ infop->uid = uid; ++ } ++ ++ return pid; ++} ++ ++static int *task_stopped_code(struct task_struct *p, bool ptrace) ++{ ++ if (ptrace) { ++ if (task_is_traced(p) && !(p->jobctl & JOBCTL_LISTENING)) ++ return &p->exit_code; ++ } else { ++ if (p->signal->flags & SIGNAL_STOP_STOPPED) ++ return &p->signal->group_exit_code; ++ } ++ return NULL; ++} ++ ++/** ++ * wait_task_stopped - Wait for %TASK_STOPPED or %TASK_TRACED ++ * @wo: wait options ++ * @ptrace: is the wait for ptrace ++ * @p: task to wait for ++ * ++ * Handle sys_wait4() work for %p in state %TASK_STOPPED or %TASK_TRACED. ++ * ++ * CONTEXT: ++ * read_lock(&tasklist_lock), which is released if return value is ++ * non-zero. Also, grabs and releases @p->sighand->siglock. ++ * ++ * RETURNS: ++ * 0 if wait condition didn't exist and search for other wait conditions ++ * should continue. Non-zero return, -errno on failure and @p's pid on ++ * success, implies that tasklist_lock is released and wait condition ++ * search should terminate. ++ */ ++static int wait_task_stopped(struct wait_opts *wo, ++ int ptrace, struct task_struct *p) ++{ ++ struct waitid_info *infop; ++ int exit_code, *p_code, why; ++ uid_t uid = 0; /* unneeded, required by compiler */ ++ pid_t pid; ++ ++ /* ++ * Traditionally we see ptrace'd stopped tasks regardless of options. ++ */ ++ if (!ptrace && !(wo->wo_flags & WUNTRACED)) ++ return 0; ++ ++ if (!task_stopped_code(p, ptrace)) ++ return 0; ++ ++ exit_code = 0; ++ spin_lock_irq(&p->sighand->siglock); ++ ++ p_code = task_stopped_code(p, ptrace); ++ if (unlikely(!p_code)) ++ goto unlock_sig; ++ ++ exit_code = *p_code; ++ if (!exit_code) ++ goto unlock_sig; ++ ++ if (!unlikely(wo->wo_flags & WNOWAIT)) ++ *p_code = 0; ++ ++ uid = from_kuid_munged(current_user_ns(), task_uid(p)); ++unlock_sig: ++ spin_unlock_irq(&p->sighand->siglock); ++ if (!exit_code) ++ return 0; ++ ++ /* ++ * Now we are pretty sure this task is interesting. ++ * Make sure it doesn't get reaped out from under us while we ++ * give up the lock and then examine it below. We don't want to ++ * keep holding onto the tasklist_lock while we call getrusage and ++ * possibly take page faults for user memory. ++ */ ++ get_task_struct(p); ++ pid = task_pid_vnr(p); ++ why = ptrace ? CLD_TRAPPED : CLD_STOPPED; ++ read_unlock(&tasklist_lock); ++ sched_annotate_sleep(); ++ if (wo->wo_rusage) ++ getrusage(p, RUSAGE_BOTH, wo->wo_rusage); ++ put_task_struct(p); ++ ++ if (likely(!(wo->wo_flags & WNOWAIT))) ++ wo->wo_stat = (exit_code << 8) | 0x7f; ++ ++ infop = wo->wo_info; ++ if (infop) { ++ infop->cause = why; ++ infop->status = exit_code; ++ infop->pid = pid; ++ infop->uid = uid; ++ } ++ return pid; ++} ++ ++/* ++ * Handle do_wait work for one task in a live, non-stopped state. ++ * read_lock(&tasklist_lock) on entry. If we return zero, we still hold ++ * the lock and this task is uninteresting. If we return nonzero, we have ++ * released the lock and the system call should return. ++ */ ++static int wait_task_continued(struct wait_opts *wo, struct task_struct *p) ++{ ++ struct waitid_info *infop; ++ pid_t pid; ++ uid_t uid; ++ ++ if (!unlikely(wo->wo_flags & WCONTINUED)) ++ return 0; ++ ++ if (!(p->signal->flags & SIGNAL_STOP_CONTINUED)) ++ return 0; ++ ++ spin_lock_irq(&p->sighand->siglock); ++ /* Re-check with the lock held. */ ++ if (!(p->signal->flags & SIGNAL_STOP_CONTINUED)) { ++ spin_unlock_irq(&p->sighand->siglock); ++ return 0; ++ } ++ if (!unlikely(wo->wo_flags & WNOWAIT)) ++ p->signal->flags &= ~SIGNAL_STOP_CONTINUED; ++ uid = from_kuid_munged(current_user_ns(), task_uid(p)); ++ spin_unlock_irq(&p->sighand->siglock); ++ ++ pid = task_pid_vnr(p); ++ get_task_struct(p); ++ read_unlock(&tasklist_lock); ++ sched_annotate_sleep(); ++ if (wo->wo_rusage) ++ getrusage(p, RUSAGE_BOTH, wo->wo_rusage); ++ put_task_struct(p); ++ ++ infop = wo->wo_info; ++ if (!infop) { ++ wo->wo_stat = 0xffff; ++ } else { ++ infop->cause = CLD_CONTINUED; ++ infop->pid = pid; ++ infop->uid = uid; ++ infop->status = SIGCONT; ++ } ++ return pid; ++} ++ ++/* ++ * Consider @p for a wait by @parent. ++ * ++ * -ECHILD should be in ->notask_error before the first call. ++ * Returns nonzero for a final return, when we have unlocked tasklist_lock. ++ * Returns zero if the search for a child should continue; ++ * then ->notask_error is 0 if @p is an eligible child, ++ * or still -ECHILD. ++ */ ++static int wait_consider_task(struct wait_opts *wo, int ptrace, ++ struct task_struct *p) ++{ ++ /* ++ * We can race with wait_task_zombie() from another thread. ++ * Ensure that EXIT_ZOMBIE -> EXIT_DEAD/EXIT_TRACE transition ++ * can't confuse the checks below. ++ */ ++ int exit_state = READ_ONCE(p->exit_state); ++ int ret; ++ ++ if (unlikely(exit_state == EXIT_DEAD)) ++ return 0; ++ ++ ret = eligible_child(wo, ptrace, p); ++ if (!ret) ++ return ret; ++ ++ if (unlikely(exit_state == EXIT_TRACE)) { ++ /* ++ * ptrace == 0 means we are the natural parent. In this case ++ * we should clear notask_error, debugger will notify us. ++ */ ++ if (likely(!ptrace)) ++ wo->notask_error = 0; ++ return 0; ++ } ++ ++ if (likely(!ptrace) && unlikely(p->ptrace)) { ++ /* ++ * If it is traced by its real parent's group, just pretend ++ * the caller is ptrace_do_wait() and reap this child if it ++ * is zombie. ++ * ++ * This also hides group stop state from real parent; otherwise ++ * a single stop can be reported twice as group and ptrace stop. ++ * If a ptracer wants to distinguish these two events for its ++ * own children it should create a separate process which takes ++ * the role of real parent. ++ */ ++ if (!ptrace_reparented(p)) ++ ptrace = 1; ++ } ++ ++ /* slay zombie? */ ++ if (exit_state == EXIT_ZOMBIE) { ++ /* we don't reap group leaders with subthreads */ ++ if (!delay_group_leader(p)) { ++ /* ++ * A zombie ptracee is only visible to its ptracer. ++ * Notification and reaping will be cascaded to the ++ * real parent when the ptracer detaches. ++ */ ++ if (unlikely(ptrace) || likely(!p->ptrace)) ++ return wait_task_zombie(wo, p); ++ } ++ ++ /* ++ * Allow access to stopped/continued state via zombie by ++ * falling through. Clearing of notask_error is complex. ++ * ++ * When !@ptrace: ++ * ++ * If WEXITED is set, notask_error should naturally be ++ * cleared. If not, subset of WSTOPPED|WCONTINUED is set, ++ * so, if there are live subthreads, there are events to ++ * wait for. If all subthreads are dead, it's still safe ++ * to clear - this function will be called again in finite ++ * amount time once all the subthreads are released and ++ * will then return without clearing. ++ * ++ * When @ptrace: ++ * ++ * Stopped state is per-task and thus can't change once the ++ * target task dies. Only continued and exited can happen. ++ * Clear notask_error if WCONTINUED | WEXITED. ++ */ ++ if (likely(!ptrace) || (wo->wo_flags & (WCONTINUED | WEXITED))) ++ wo->notask_error = 0; ++ } else { ++ /* ++ * @p is alive and it's gonna stop, continue or exit, so ++ * there always is something to wait for. ++ */ ++ wo->notask_error = 0; ++ } ++ ++ /* ++ * Wait for stopped. Depending on @ptrace, different stopped state ++ * is used and the two don't interact with each other. ++ */ ++ ret = wait_task_stopped(wo, ptrace, p); ++ if (ret) ++ return ret; ++ ++ /* ++ * Wait for continued. There's only one continued state and the ++ * ptracer can consume it which can confuse the real parent. Don't ++ * use WCONTINUED from ptracer. You don't need or want it. ++ */ ++ return wait_task_continued(wo, p); ++} ++ ++/* ++ * Do the work of do_wait() for one thread in the group, @tsk. ++ * ++ * -ECHILD should be in ->notask_error before the first call. ++ * Returns nonzero for a final return, when we have unlocked tasklist_lock. ++ * Returns zero if the search for a child should continue; then ++ * ->notask_error is 0 if there were any eligible children, ++ * or still -ECHILD. ++ */ ++static int do_wait_thread(struct wait_opts *wo, struct task_struct *tsk) ++{ ++ struct task_struct *p; ++ ++ list_for_each_entry(p, &tsk->children, sibling) { ++ int ret = wait_consider_task(wo, 0, p); ++ ++ if (ret) ++ return ret; ++ } ++ ++ return 0; ++} ++ ++static int ptrace_do_wait(struct wait_opts *wo, struct task_struct *tsk) ++{ ++ struct task_struct *p; ++ ++ list_for_each_entry(p, &tsk->ptraced, ptrace_entry) { ++ int ret = wait_consider_task(wo, 1, p); ++ ++ if (ret) ++ return ret; ++ } ++ ++ return 0; ++} ++ ++static int child_wait_callback(wait_queue_entry_t *wait, unsigned mode, ++ int sync, void *key) ++{ ++ struct wait_opts *wo = container_of(wait, struct wait_opts, ++ child_wait); ++ struct task_struct *p = key; ++ ++ if (!eligible_pid(wo, p)) ++ return 0; ++ ++ if ((wo->wo_flags & __WNOTHREAD) && wait->private != p->parent) ++ return 0; ++ ++ return default_wake_function(wait, mode, sync, key); ++} ++ ++void __wake_up_parent(struct task_struct *p, struct task_struct *parent) ++{ ++ __wake_up_sync_key(&parent->signal->wait_chldexit, ++ TASK_INTERRUPTIBLE, 1, p); ++} ++ ++static long do_wait(struct wait_opts *wo) ++{ ++ struct task_struct *tsk; ++ int retval; ++ ++ trace_sched_process_wait(wo->wo_pid); ++ ++ init_waitqueue_func_entry(&wo->child_wait, child_wait_callback); ++ wo->child_wait.private = current; ++ add_wait_queue(¤t->signal->wait_chldexit, &wo->child_wait); ++repeat: ++ /* ++ * If there is nothing that can match our criteria, just get out. ++ * We will clear ->notask_error to zero if we see any child that ++ * might later match our criteria, even if we are not able to reap ++ * it yet. ++ */ ++ wo->notask_error = -ECHILD; ++ if ((wo->wo_type < PIDTYPE_MAX) && ++ (!wo->wo_pid || hlist_empty(&wo->wo_pid->tasks[wo->wo_type]))) ++ goto notask; ++ ++ set_current_state(TASK_INTERRUPTIBLE); ++ read_lock(&tasklist_lock); ++ tsk = current; ++ do { ++ retval = do_wait_thread(wo, tsk); ++ if (retval) ++ goto end; ++ ++ retval = ptrace_do_wait(wo, tsk); ++ if (retval) ++ goto end; ++ ++ if (wo->wo_flags & __WNOTHREAD) ++ break; ++ } while_each_thread(current, tsk); ++ read_unlock(&tasklist_lock); ++ ++notask: ++ retval = wo->notask_error; ++ if (!retval && !(wo->wo_flags & WNOHANG)) { ++ retval = -ERESTARTSYS; ++ if (!signal_pending(current)) { ++ schedule(); ++ goto repeat; ++ } ++ } ++end: ++ __set_current_state(TASK_RUNNING); ++ remove_wait_queue(¤t->signal->wait_chldexit, &wo->child_wait); ++ return retval; ++} ++ ++static long kernel_waitid(int which, pid_t upid, struct waitid_info *infop, ++ int options, struct rusage *ru) ++{ ++ struct wait_opts wo; ++ struct pid *pid = NULL; ++ enum pid_type type; ++ long ret; ++ ++ if (options & ~(WNOHANG|WNOWAIT|WEXITED|WSTOPPED|WCONTINUED| ++ __WNOTHREAD|__WCLONE|__WALL)) ++ return -EINVAL; ++ if (!(options & (WEXITED|WSTOPPED|WCONTINUED))) ++ return -EINVAL; ++ ++ switch (which) { ++ case P_ALL: ++ type = PIDTYPE_MAX; ++ break; ++ case P_PID: ++ type = PIDTYPE_PID; ++ if (upid <= 0) ++ return -EINVAL; ++ break; ++ case P_PGID: ++ type = PIDTYPE_PGID; ++ if (upid <= 0) ++ return -EINVAL; ++ break; ++ default: ++ return -EINVAL; ++ } ++ ++ if (type < PIDTYPE_MAX) ++ pid = find_get_pid(upid); ++ ++ wo.wo_type = type; ++ wo.wo_pid = pid; ++ wo.wo_flags = options; ++ wo.wo_info = infop; ++ wo.wo_rusage = ru; ++ ret = do_wait(&wo); ++ ++ put_pid(pid); ++ return ret; ++} ++ ++SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *, ++ infop, int, options, struct rusage __user *, ru) ++{ ++ struct rusage r; ++ struct waitid_info info = {.status = 0}; ++ long err = kernel_waitid(which, upid, &info, options, ru ? &r : NULL); ++ int signo = 0; ++ ++ if (err > 0) { ++ signo = SIGCHLD; ++ err = 0; ++ if (ru && copy_to_user(ru, &r, sizeof(struct rusage))) ++ return -EFAULT; ++ } ++ if (!infop) ++ return err; ++ ++ if (!user_access_begin(infop, sizeof(*infop))) ++ return -EFAULT; ++ ++ unsafe_put_user(signo, &infop->si_signo, Efault); ++ unsafe_put_user(0, &infop->si_errno, Efault); ++ unsafe_put_user(info.cause, &infop->si_code, Efault); ++ unsafe_put_user(info.pid, &infop->si_pid, Efault); ++ unsafe_put_user(info.uid, &infop->si_uid, Efault); ++ unsafe_put_user(info.status, &infop->si_status, Efault); ++ user_access_end(); ++ return err; ++Efault: ++ user_access_end(); ++ return -EFAULT; ++} ++ ++long kernel_wait4(pid_t upid, int __user *stat_addr, int options, ++ struct rusage *ru) ++{ ++ struct wait_opts wo; ++ struct pid *pid = NULL; ++ enum pid_type type; ++ long ret; ++ ++ if (options & ~(WNOHANG|WUNTRACED|WCONTINUED| ++ __WNOTHREAD|__WCLONE|__WALL)) ++ return -EINVAL; ++ ++ /* -INT_MIN is not defined */ ++ if (upid == INT_MIN) ++ return -ESRCH; ++ ++ if (upid == -1) ++ type = PIDTYPE_MAX; ++ else if (upid < 0) { ++ type = PIDTYPE_PGID; ++ pid = find_get_pid(-upid); ++ } else if (upid == 0) { ++ type = PIDTYPE_PGID; ++ pid = get_task_pid(current, PIDTYPE_PGID); ++ } else /* upid > 0 */ { ++ type = PIDTYPE_PID; ++ pid = find_get_pid(upid); ++ } ++ ++ wo.wo_type = type; ++ wo.wo_pid = pid; ++ wo.wo_flags = options | WEXITED; ++ wo.wo_info = NULL; ++ wo.wo_stat = 0; ++ wo.wo_rusage = ru; ++ ret = do_wait(&wo); ++ put_pid(pid); ++ if (ret > 0 && stat_addr && put_user(wo.wo_stat, stat_addr)) ++ ret = -EFAULT; ++ ++ return ret; ++} ++ ++SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr, ++ int, options, struct rusage __user *, ru) ++{ ++ struct rusage r; ++ long err = kernel_wait4(upid, stat_addr, options, ru ? &r : NULL); ++ ++ if (err > 0) { ++ if (ru && copy_to_user(ru, &r, sizeof(struct rusage))) ++ return -EFAULT; ++ } ++ return err; ++} ++ ++#ifdef __ARCH_WANT_SYS_WAITPID ++ ++/* ++ * sys_waitpid() remains for compatibility. waitpid() should be ++ * implemented by calling sys_wait4() from libc.a. ++ */ ++SYSCALL_DEFINE3(waitpid, pid_t, pid, int __user *, stat_addr, int, options) ++{ ++ return kernel_wait4(pid, stat_addr, options, NULL); ++} ++ ++#endif ++ ++#ifdef CONFIG_COMPAT ++COMPAT_SYSCALL_DEFINE4(wait4, ++ compat_pid_t, pid, ++ compat_uint_t __user *, stat_addr, ++ int, options, ++ struct compat_rusage __user *, ru) ++{ ++ struct rusage r; ++ long err = kernel_wait4(pid, stat_addr, options, ru ? &r : NULL); ++ if (err > 0) { ++ if (ru && put_compat_rusage(&r, ru)) ++ return -EFAULT; ++ } ++ return err; ++} ++ ++COMPAT_SYSCALL_DEFINE5(waitid, ++ int, which, compat_pid_t, pid, ++ struct compat_siginfo __user *, infop, int, options, ++ struct compat_rusage __user *, uru) ++{ ++ struct rusage ru; ++ struct waitid_info info = {.status = 0}; ++ long err = kernel_waitid(which, pid, &info, options, uru ? &ru : NULL); ++ int signo = 0; ++ if (err > 0) { ++ signo = SIGCHLD; ++ err = 0; ++ if (uru) { ++ /* kernel_waitid() overwrites everything in ru */ ++ if (COMPAT_USE_64BIT_TIME) ++ err = copy_to_user(uru, &ru, sizeof(ru)); ++ else ++ err = put_compat_rusage(&ru, uru); ++ if (err) ++ return -EFAULT; ++ } ++ } ++ ++ if (!infop) ++ return err; ++ ++ if (!user_access_begin(infop, sizeof(*infop))) ++ return -EFAULT; ++ ++ unsafe_put_user(signo, &infop->si_signo, Efault); ++ unsafe_put_user(0, &infop->si_errno, Efault); ++ unsafe_put_user(info.cause, &infop->si_code, Efault); ++ unsafe_put_user(info.pid, &infop->si_pid, Efault); ++ unsafe_put_user(info.uid, &infop->si_uid, Efault); ++ unsafe_put_user(info.status, &infop->si_status, Efault); ++ user_access_end(); ++ return err; ++Efault: ++ user_access_end(); ++ return -EFAULT; ++} ++#endif ++ ++__weak void abort(void) ++{ ++ BUG(); ++ ++ /* if that doesn't kill us, halt */ ++ panic("Oops failed to kill thread"); ++} ++EXPORT_SYMBOL(abort); +diff -uprN kernel/kernel/fork.c kernel_new/kernel/fork.c +--- kernel/kernel/fork.c 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/kernel/fork.c 2021-04-01 18:28:07.805863120 +0800 +@@ -54,6 +54,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -91,6 +92,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -886,6 +888,8 @@ static struct task_struct *dup_task_stru + #endif + + setup_thread_stack(tsk, orig); ++ __ipipe_init_threadflags(task_thread_info(tsk)); ++ __ipipe_init_threadinfo(&task_thread_info(tsk)->ipipe_data); + clear_user_return_notifier(tsk); + clear_tsk_need_resched(tsk); + set_task_stack_end_magic(tsk); +@@ -1055,6 +1059,7 @@ static inline void __mmput(struct mm_str + exit_aio(mm); + ksm_exit(mm); + khugepaged_exit(mm); /* must run before exit_mmap */ ++ __ipipe_report_cleanup(mm); + exit_mmap(mm); + mm_put_huge_zero_page(mm); + set_mm_exe_file(mm, NULL); +diff -uprN kernel/kernel/fork.c.orig kernel_new/kernel/fork.c.orig +--- kernel/kernel/fork.c.orig 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/kernel/fork.c.orig 2020-12-21 21:59:22.000000000 +0800 +@@ -0,0 +1,2730 @@ ++/* ++ * linux/kernel/fork.c ++ * ++ * Copyright (C) 1991, 1992 Linus Torvalds ++ */ ++ ++/* ++ * 'fork.c' contains the help-routines for the 'fork' system call ++ * (see also entry.S and others). ++ * Fork is rather simple, once you get the hang of it, but the memory ++ * management can be a bitch. See 'mm/memory.c': 'copy_page_range()' ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++ ++#define CREATE_TRACE_POINTS ++#include ++ ++/* ++ * Minimum number of threads to boot the kernel ++ */ ++#define MIN_THREADS 20 ++ ++/* ++ * Maximum number of threads ++ */ ++#define MAX_THREADS FUTEX_TID_MASK ++ ++/* ++ * Protected counters by write_lock_irq(&tasklist_lock) ++ */ ++unsigned long total_forks; /* Handle normal Linux uptimes. */ ++int nr_threads; /* The idle threads do not count.. */ ++ ++int max_threads; /* tunable limit on nr_threads */ ++ ++DEFINE_PER_CPU(unsigned long, process_counts) = 0; ++ ++__cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */ ++ ++#ifdef CONFIG_PROVE_RCU ++int lockdep_tasklist_lock_is_held(void) ++{ ++ return lockdep_is_held(&tasklist_lock); ++} ++EXPORT_SYMBOL_GPL(lockdep_tasklist_lock_is_held); ++#endif /* #ifdef CONFIG_PROVE_RCU */ ++ ++int nr_processes(void) ++{ ++ int cpu; ++ int total = 0; ++ ++ for_each_possible_cpu(cpu) ++ total += per_cpu(process_counts, cpu); ++ ++ return total; ++} ++ ++void __weak arch_release_task_struct(struct task_struct *tsk) ++{ ++} ++ ++#ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR ++static struct kmem_cache *task_struct_cachep; ++ ++static inline struct task_struct *alloc_task_struct_node(int node) ++{ ++ return kmem_cache_alloc_node(task_struct_cachep, GFP_KERNEL, node); ++} ++ ++static inline void free_task_struct(struct task_struct *tsk) ++{ ++ kmem_cache_free(task_struct_cachep, tsk); ++} ++#endif ++ ++#ifndef CONFIG_ARCH_THREAD_STACK_ALLOCATOR ++ ++/* ++ * Allocate pages if THREAD_SIZE is >= PAGE_SIZE, otherwise use a ++ * kmemcache based allocator. ++ */ ++# if THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK) ++ ++#ifdef CONFIG_VMAP_STACK ++/* ++ * vmalloc() is a bit slow, and calling vfree() enough times will force a TLB ++ * flush. Try to minimize the number of calls by caching stacks. ++ */ ++#define NR_CACHED_STACKS 2 ++static DEFINE_PER_CPU(struct vm_struct *, cached_stacks[NR_CACHED_STACKS]); ++ ++static int free_vm_stack_cache(unsigned int cpu) ++{ ++ struct vm_struct **cached_vm_stacks = per_cpu_ptr(cached_stacks, cpu); ++ int i; ++ ++ for (i = 0; i < NR_CACHED_STACKS; i++) { ++ struct vm_struct *vm_stack = cached_vm_stacks[i]; ++ ++ if (!vm_stack) ++ continue; ++ ++ vfree(vm_stack->addr); ++ cached_vm_stacks[i] = NULL; ++ } ++ ++ return 0; ++} ++#endif ++ ++static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node) ++{ ++#ifdef CONFIG_VMAP_STACK ++ void *stack; ++ int i; ++ ++ for (i = 0; i < NR_CACHED_STACKS; i++) { ++ struct vm_struct *s; ++ ++ s = this_cpu_xchg(cached_stacks[i], NULL); ++ ++ if (!s) ++ continue; ++ ++ /* Clear stale pointers from reused stack. */ ++ memset(s->addr, 0, THREAD_SIZE); ++ ++ tsk->stack_vm_area = s; ++ tsk->stack = s->addr; ++ return s->addr; ++ } ++ ++ /* ++ * Allocated stacks are cached and later reused by new threads, ++ * so memcg accounting is performed manually on assigning/releasing ++ * stacks to tasks. Drop __GFP_ACCOUNT. ++ */ ++ stack = __vmalloc_node_range(THREAD_SIZE, THREAD_ALIGN, ++ VMALLOC_START, VMALLOC_END, ++ THREADINFO_GFP & ~__GFP_ACCOUNT, ++ PAGE_KERNEL, ++ 0, node, __builtin_return_address(0)); ++ ++ /* ++ * We can't call find_vm_area() in interrupt context, and ++ * free_thread_stack() can be called in interrupt context, ++ * so cache the vm_struct. ++ */ ++ if (stack) { ++ tsk->stack_vm_area = find_vm_area(stack); ++ tsk->stack = stack; ++ } ++ return stack; ++#else ++ struct page *page = alloc_pages_node(node, THREADINFO_GFP, ++ THREAD_SIZE_ORDER); ++ ++ if (likely(page)) { ++ tsk->stack = page_address(page); ++ return tsk->stack; ++ } ++ return NULL; ++#endif ++} ++ ++static inline void free_thread_stack(struct task_struct *tsk) ++{ ++#ifdef CONFIG_VMAP_STACK ++ struct vm_struct *vm = task_stack_vm_area(tsk); ++ ++ if (vm) { ++ int i; ++ ++ for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) { ++ mod_memcg_page_state(vm->pages[i], ++ MEMCG_KERNEL_STACK_KB, ++ -(int)(PAGE_SIZE / 1024)); ++ ++ memcg_kmem_uncharge(vm->pages[i], 0); ++ } ++ ++ for (i = 0; i < NR_CACHED_STACKS; i++) { ++ if (this_cpu_cmpxchg(cached_stacks[i], ++ NULL, tsk->stack_vm_area) != NULL) ++ continue; ++ ++ return; ++ } ++ ++ vfree_atomic(tsk->stack); ++ return; ++ } ++#endif ++ ++ __free_pages(virt_to_page(tsk->stack), THREAD_SIZE_ORDER); ++} ++# else ++static struct kmem_cache *thread_stack_cache; ++ ++static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, ++ int node) ++{ ++ unsigned long *stack; ++ stack = kmem_cache_alloc_node(thread_stack_cache, THREADINFO_GFP, node); ++ tsk->stack = stack; ++ return stack; ++} ++ ++static void free_thread_stack(struct task_struct *tsk) ++{ ++ kmem_cache_free(thread_stack_cache, tsk->stack); ++} ++ ++void thread_stack_cache_init(void) ++{ ++ thread_stack_cache = kmem_cache_create_usercopy("thread_stack", ++ THREAD_SIZE, THREAD_SIZE, 0, 0, ++ THREAD_SIZE, NULL); ++ BUG_ON(thread_stack_cache == NULL); ++} ++# endif ++#endif ++ ++/* SLAB cache for signal_struct structures (tsk->signal) */ ++static struct kmem_cache *signal_cachep; ++ ++/* SLAB cache for sighand_struct structures (tsk->sighand) */ ++struct kmem_cache *sighand_cachep; ++ ++/* SLAB cache for files_struct structures (tsk->files) */ ++struct kmem_cache *files_cachep; ++ ++/* SLAB cache for fs_struct structures (tsk->fs) */ ++struct kmem_cache *fs_cachep; ++ ++/* SLAB cache for vm_area_struct structures */ ++static struct kmem_cache *vm_area_cachep; ++ ++/* SLAB cache for mm_struct structures (tsk->mm) */ ++static struct kmem_cache *mm_cachep; ++ ++struct vm_area_struct *vm_area_alloc(struct mm_struct *mm) ++{ ++ struct vm_area_struct *vma; ++ ++ vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); ++ if (vma) ++ vma_init(vma, mm); ++ return vma; ++} ++ ++struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig) ++{ ++ struct vm_area_struct *new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); ++ ++ if (new) { ++ *new = *orig; ++ INIT_LIST_HEAD(&new->anon_vma_chain); ++ } ++ return new; ++} ++ ++void vm_area_free(struct vm_area_struct *vma) ++{ ++ kmem_cache_free(vm_area_cachep, vma); ++} ++ ++static void account_kernel_stack(struct task_struct *tsk, int account) ++{ ++ void *stack = task_stack_page(tsk); ++ struct vm_struct *vm = task_stack_vm_area(tsk); ++ ++ BUILD_BUG_ON(IS_ENABLED(CONFIG_VMAP_STACK) && PAGE_SIZE % 1024 != 0); ++ ++ if (vm) { ++ int i; ++ ++ BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE); ++ ++ for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) { ++ mod_zone_page_state(page_zone(vm->pages[i]), ++ NR_KERNEL_STACK_KB, ++ PAGE_SIZE / 1024 * account); ++ } ++ } else { ++ /* ++ * All stack pages are in the same zone and belong to the ++ * same memcg. ++ */ ++ struct page *first_page = virt_to_page(stack); ++ ++ mod_zone_page_state(page_zone(first_page), NR_KERNEL_STACK_KB, ++ THREAD_SIZE / 1024 * account); ++ ++ mod_memcg_page_state(first_page, MEMCG_KERNEL_STACK_KB, ++ account * (THREAD_SIZE / 1024)); ++ } ++} ++ ++static int memcg_charge_kernel_stack(struct task_struct *tsk) ++{ ++#ifdef CONFIG_VMAP_STACK ++ struct vm_struct *vm = task_stack_vm_area(tsk); ++ int ret; ++ ++ if (vm) { ++ int i; ++ ++ for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) { ++ /* ++ * If memcg_kmem_charge() fails, page->mem_cgroup ++ * pointer is NULL, and both memcg_kmem_uncharge() ++ * and mod_memcg_page_state() in free_thread_stack() ++ * will ignore this page. So it's safe. ++ */ ++ ret = memcg_kmem_charge(vm->pages[i], GFP_KERNEL, 0); ++ if (ret) ++ return ret; ++ ++ mod_memcg_page_state(vm->pages[i], ++ MEMCG_KERNEL_STACK_KB, ++ PAGE_SIZE / 1024); ++ } ++ } ++#endif ++ return 0; ++} ++ ++static void release_task_stack(struct task_struct *tsk) ++{ ++ if (WARN_ON(tsk->state != TASK_DEAD)) ++ return; /* Better to leak the stack than to free prematurely */ ++ ++ account_kernel_stack(tsk, -1); ++ free_thread_stack(tsk); ++ tsk->stack = NULL; ++#ifdef CONFIG_VMAP_STACK ++ tsk->stack_vm_area = NULL; ++#endif ++} ++ ++#ifdef CONFIG_THREAD_INFO_IN_TASK ++void put_task_stack(struct task_struct *tsk) ++{ ++ if (atomic_dec_and_test(&tsk->stack_refcount)) ++ release_task_stack(tsk); ++} ++#endif ++ ++void free_task(struct task_struct *tsk) ++{ ++#ifndef CONFIG_THREAD_INFO_IN_TASK ++ /* ++ * The task is finally done with both the stack and thread_info, ++ * so free both. ++ */ ++ release_task_stack(tsk); ++#else ++ /* ++ * If the task had a separate stack allocation, it should be gone ++ * by now. ++ */ ++ WARN_ON_ONCE(atomic_read(&tsk->stack_refcount) != 0); ++#endif ++ rt_mutex_debug_task_free(tsk); ++ ftrace_graph_exit_task(tsk); ++ put_seccomp_filter(tsk); ++ arch_release_task_struct(tsk); ++ if (tsk->flags & PF_KTHREAD) ++ free_kthread_struct(tsk); ++ free_task_struct(tsk); ++} ++EXPORT_SYMBOL(free_task); ++ ++#ifdef CONFIG_MMU ++static __latent_entropy int dup_mmap(struct mm_struct *mm, ++ struct mm_struct *oldmm) ++{ ++ struct vm_area_struct *mpnt, *tmp, *prev, **pprev; ++ struct rb_node **rb_link, *rb_parent; ++ int retval; ++ unsigned long charge; ++ LIST_HEAD(uf); ++ ++ uprobe_start_dup_mmap(); ++ if (down_write_killable(&oldmm->mmap_sem)) { ++ retval = -EINTR; ++ goto fail_uprobe_end; ++ } ++ flush_cache_dup_mm(oldmm); ++ uprobe_dup_mmap(oldmm, mm); ++ /* ++ * Not linked in yet - no deadlock potential: ++ */ ++ down_write_nested(&mm->mmap_sem, SINGLE_DEPTH_NESTING); ++ ++ /* No ordering required: file already has been exposed. */ ++ RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm)); ++ ++ mm->total_vm = oldmm->total_vm; ++ mm->data_vm = oldmm->data_vm; ++ mm->exec_vm = oldmm->exec_vm; ++ mm->stack_vm = oldmm->stack_vm; ++ ++ rb_link = &mm->mm_rb.rb_node; ++ rb_parent = NULL; ++ pprev = &mm->mmap; ++ retval = ksm_fork(mm, oldmm); ++ if (retval) ++ goto out; ++ retval = khugepaged_fork(mm, oldmm); ++ if (retval) ++ goto out; ++ ++ prev = NULL; ++ for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) { ++ struct file *file; ++ ++ if (mpnt->vm_flags & VM_DONTCOPY) { ++ vm_stat_account(mm, mpnt->vm_flags, -vma_pages(mpnt)); ++ continue; ++ } ++ charge = 0; ++ /* ++ * Don't duplicate many vmas if we've been oom-killed (for ++ * example) ++ */ ++ if (fatal_signal_pending(current)) { ++ retval = -EINTR; ++ goto out; ++ } ++ if (mpnt->vm_flags & VM_ACCOUNT) { ++ unsigned long len = vma_pages(mpnt); ++ ++ if (security_vm_enough_memory_mm(oldmm, len)) /* sic */ ++ goto fail_nomem; ++ charge = len; ++ } ++ tmp = vm_area_dup(mpnt); ++ if (!tmp) ++ goto fail_nomem; ++ retval = vma_dup_policy(mpnt, tmp); ++ if (retval) ++ goto fail_nomem_policy; ++ tmp->vm_mm = mm; ++ retval = dup_userfaultfd(tmp, &uf); ++ if (retval) ++ goto fail_nomem_anon_vma_fork; ++ if (tmp->vm_flags & VM_WIPEONFORK) { ++ /* VM_WIPEONFORK gets a clean slate in the child. */ ++ tmp->anon_vma = NULL; ++ if (anon_vma_prepare(tmp)) ++ goto fail_nomem_anon_vma_fork; ++ } else if (anon_vma_fork(tmp, mpnt)) ++ goto fail_nomem_anon_vma_fork; ++ tmp->vm_flags &= ~(VM_LOCKED | VM_LOCKONFAULT); ++ tmp->vm_next = tmp->vm_prev = NULL; ++ file = tmp->vm_file; ++ if (file) { ++ struct inode *inode = file_inode(file); ++ struct address_space *mapping = file->f_mapping; ++ ++ get_file(file); ++ if (tmp->vm_flags & VM_DENYWRITE) ++ atomic_dec(&inode->i_writecount); ++ i_mmap_lock_write(mapping); ++ if (tmp->vm_flags & VM_SHARED) ++ atomic_inc(&mapping->i_mmap_writable); ++ flush_dcache_mmap_lock(mapping); ++ /* insert tmp into the share list, just after mpnt */ ++ vma_interval_tree_insert_after(tmp, mpnt, ++ &mapping->i_mmap); ++ flush_dcache_mmap_unlock(mapping); ++ i_mmap_unlock_write(mapping); ++ } ++ ++ /* ++ * Clear hugetlb-related page reserves for children. This only ++ * affects MAP_PRIVATE mappings. Faults generated by the child ++ * are not guaranteed to succeed, even if read-only ++ */ ++ if (is_vm_hugetlb_page(tmp)) ++ reset_vma_resv_huge_pages(tmp); ++ ++ /* ++ * Link in the new vma and copy the page table entries. ++ */ ++ *pprev = tmp; ++ pprev = &tmp->vm_next; ++ tmp->vm_prev = prev; ++ prev = tmp; ++ ++ __vma_link_rb(mm, tmp, rb_link, rb_parent); ++ rb_link = &tmp->vm_rb.rb_right; ++ rb_parent = &tmp->vm_rb; ++ ++ mm->map_count++; ++ if (!(tmp->vm_flags & VM_WIPEONFORK)) ++ retval = copy_page_range(mm, oldmm, mpnt); ++ ++ if (tmp->vm_ops && tmp->vm_ops->open) ++ tmp->vm_ops->open(tmp); ++ ++ if (retval) ++ goto out; ++ } ++ /* a new mm has just been created */ ++ retval = arch_dup_mmap(oldmm, mm); ++out: ++ up_write(&mm->mmap_sem); ++ flush_tlb_mm(oldmm); ++ up_write(&oldmm->mmap_sem); ++ dup_userfaultfd_complete(&uf); ++fail_uprobe_end: ++ uprobe_end_dup_mmap(); ++ return retval; ++fail_nomem_anon_vma_fork: ++ mpol_put(vma_policy(tmp)); ++fail_nomem_policy: ++ vm_area_free(tmp); ++fail_nomem: ++ retval = -ENOMEM; ++ vm_unacct_memory(charge); ++ goto out; ++} ++ ++static inline int mm_alloc_pgd(struct mm_struct *mm) ++{ ++ mm->pgd = pgd_alloc(mm); ++ if (unlikely(!mm->pgd)) ++ return -ENOMEM; ++ return 0; ++} ++ ++static inline void mm_free_pgd(struct mm_struct *mm) ++{ ++ pgd_free(mm, mm->pgd); ++} ++#else ++static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) ++{ ++ down_write(&oldmm->mmap_sem); ++ RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm)); ++ up_write(&oldmm->mmap_sem); ++ return 0; ++} ++#define mm_alloc_pgd(mm) (0) ++#define mm_free_pgd(mm) ++#endif /* CONFIG_MMU */ ++ ++static void check_mm(struct mm_struct *mm) ++{ ++ int i; ++ ++ for (i = 0; i < NR_MM_COUNTERS; i++) { ++ long x = atomic_long_read(&mm->rss_stat.count[i]); ++ ++ if (unlikely(x)) ++ printk(KERN_ALERT "BUG: Bad rss-counter state " ++ "mm:%p idx:%d val:%ld\n", mm, i, x); ++ } ++ ++ if (mm_pgtables_bytes(mm)) ++ pr_alert("BUG: non-zero pgtables_bytes on freeing mm: %ld\n", ++ mm_pgtables_bytes(mm)); ++ ++#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS ++ VM_BUG_ON_MM(mm->pmd_huge_pte, mm); ++#endif ++} ++ ++#define allocate_mm() (kmem_cache_alloc(mm_cachep, GFP_KERNEL)) ++#define free_mm(mm) (kmem_cache_free(mm_cachep, (mm))) ++ ++/* ++ * Called when the last reference to the mm ++ * is dropped: either by a lazy thread or by ++ * mmput. Free the page directory and the mm. ++ */ ++void __mmdrop(struct mm_struct *mm) ++{ ++ BUG_ON(mm == &init_mm); ++ WARN_ON_ONCE(mm == current->mm); ++ WARN_ON_ONCE(mm == current->active_mm); ++ mm_free_pgd(mm); ++ destroy_context(mm); ++ hmm_mm_destroy(mm); ++ mmu_notifier_mm_destroy(mm); ++ check_mm(mm); ++ put_user_ns(mm->user_ns); ++ free_mm(mm); ++} ++EXPORT_SYMBOL_GPL(__mmdrop); ++ ++static void mmdrop_async_fn(struct work_struct *work) ++{ ++ struct mm_struct *mm; ++ ++ mm = container_of(work, struct mm_struct, async_put_work); ++ __mmdrop(mm); ++} ++ ++static void mmdrop_async(struct mm_struct *mm) ++{ ++ if (unlikely(atomic_dec_and_test(&mm->mm_count))) { ++ INIT_WORK(&mm->async_put_work, mmdrop_async_fn); ++ schedule_work(&mm->async_put_work); ++ } ++} ++ ++static inline void free_signal_struct(struct signal_struct *sig) ++{ ++ taskstats_tgid_free(sig); ++ sched_autogroup_exit(sig); ++ /* ++ * __mmdrop is not safe to call from softirq context on x86 due to ++ * pgd_dtor so postpone it to the async context ++ */ ++ if (sig->oom_mm) ++ mmdrop_async(sig->oom_mm); ++ kmem_cache_free(signal_cachep, sig); ++} ++ ++static inline void put_signal_struct(struct signal_struct *sig) ++{ ++ if (atomic_dec_and_test(&sig->sigcnt)) ++ free_signal_struct(sig); ++} ++ ++void __put_task_struct(struct task_struct *tsk) ++{ ++ WARN_ON(!tsk->exit_state); ++ WARN_ON(atomic_read(&tsk->usage)); ++ WARN_ON(tsk == current); ++ ++ cgroup_free(tsk); ++ task_numa_free(tsk, true); ++ security_task_free(tsk); ++ exit_creds(tsk); ++ delayacct_tsk_free(tsk); ++ put_signal_struct(tsk->signal); ++ ++ if (!profile_handoff_task(tsk)) ++ free_task(tsk); ++} ++EXPORT_SYMBOL_GPL(__put_task_struct); ++ ++void __init __weak arch_task_cache_init(void) { } ++ ++/* ++ * set_max_threads ++ */ ++static void set_max_threads(unsigned int max_threads_suggested) ++{ ++ u64 threads; ++ ++ /* ++ * The number of threads shall be limited such that the thread ++ * structures may only consume a small part of the available memory. ++ */ ++ if (fls64(totalram_pages) + fls64(PAGE_SIZE) > 64) ++ threads = MAX_THREADS; ++ else ++ threads = div64_u64((u64) totalram_pages * (u64) PAGE_SIZE, ++ (u64) THREAD_SIZE * 8UL); ++ ++ if (threads > max_threads_suggested) ++ threads = max_threads_suggested; ++ ++ max_threads = clamp_t(u64, threads, MIN_THREADS, MAX_THREADS); ++} ++ ++#ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT ++/* Initialized by the architecture: */ ++int arch_task_struct_size __read_mostly; ++#endif ++ ++static void task_struct_whitelist(unsigned long *offset, unsigned long *size) ++{ ++ /* Fetch thread_struct whitelist for the architecture. */ ++ arch_thread_struct_whitelist(offset, size); ++ ++ /* ++ * Handle zero-sized whitelist or empty thread_struct, otherwise ++ * adjust offset to position of thread_struct in task_struct. ++ */ ++ if (unlikely(*size == 0)) ++ *offset = 0; ++ else ++ *offset += offsetof(struct task_struct, thread); ++} ++ ++void __init fork_init(void) ++{ ++ int i; ++#ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR ++#ifndef ARCH_MIN_TASKALIGN ++#define ARCH_MIN_TASKALIGN 0 ++#endif ++ int align = max_t(int, L1_CACHE_BYTES, ARCH_MIN_TASKALIGN); ++ unsigned long useroffset, usersize; ++ ++ /* create a slab on which task_structs can be allocated */ ++ task_struct_whitelist(&useroffset, &usersize); ++ task_struct_cachep = kmem_cache_create_usercopy("task_struct", ++ arch_task_struct_size, align, ++ SLAB_PANIC|SLAB_ACCOUNT, ++ useroffset, usersize, NULL); ++#endif ++ ++ /* do the arch specific task caches init */ ++ arch_task_cache_init(); ++ ++ set_max_threads(MAX_THREADS); ++ ++ init_task.signal->rlim[RLIMIT_NPROC].rlim_cur = max_threads/2; ++ init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2; ++ init_task.signal->rlim[RLIMIT_SIGPENDING] = ++ init_task.signal->rlim[RLIMIT_NPROC]; ++ ++ for (i = 0; i < UCOUNT_COUNTS; i++) { ++ init_user_ns.ucount_max[i] = max_threads/2; ++ } ++ ++#ifdef CONFIG_VMAP_STACK ++ cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "fork:vm_stack_cache", ++ NULL, free_vm_stack_cache); ++#endif ++ ++ lockdep_init_task(&init_task); ++} ++ ++int __weak arch_dup_task_struct(struct task_struct *dst, ++ struct task_struct *src) ++{ ++ *dst = *src; ++ return 0; ++} ++ ++void set_task_stack_end_magic(struct task_struct *tsk) ++{ ++ unsigned long *stackend; ++ ++ stackend = end_of_stack(tsk); ++ *stackend = STACK_END_MAGIC; /* for overflow detection */ ++} ++ ++static struct task_struct *dup_task_struct(struct task_struct *orig, int node) ++{ ++ struct task_struct *tsk; ++ unsigned long *stack; ++ struct vm_struct *stack_vm_area; ++ int err; ++ ++ if (node == NUMA_NO_NODE) ++ node = tsk_fork_get_node(orig); ++ tsk = alloc_task_struct_node(node); ++ if (!tsk) ++ return NULL; ++ ++ stack = alloc_thread_stack_node(tsk, node); ++ if (!stack) ++ goto free_tsk; ++ ++ if (memcg_charge_kernel_stack(tsk)) ++ goto free_stack; ++ ++ stack_vm_area = task_stack_vm_area(tsk); ++ ++ err = arch_dup_task_struct(tsk, orig); ++ ++ /* ++ * arch_dup_task_struct() clobbers the stack-related fields. Make ++ * sure they're properly initialized before using any stack-related ++ * functions again. ++ */ ++ tsk->stack = stack; ++#ifdef CONFIG_VMAP_STACK ++ tsk->stack_vm_area = stack_vm_area; ++#endif ++#ifdef CONFIG_THREAD_INFO_IN_TASK ++ atomic_set(&tsk->stack_refcount, 1); ++#endif ++ ++ if (err) ++ goto free_stack; ++ ++#ifdef CONFIG_SECCOMP ++ /* ++ * We must handle setting up seccomp filters once we're under ++ * the sighand lock in case orig has changed between now and ++ * then. Until then, filter must be NULL to avoid messing up ++ * the usage counts on the error path calling free_task. ++ */ ++ tsk->seccomp.filter = NULL; ++#endif ++ ++ setup_thread_stack(tsk, orig); ++ clear_user_return_notifier(tsk); ++ clear_tsk_need_resched(tsk); ++ set_task_stack_end_magic(tsk); ++ ++#ifdef CONFIG_STACKPROTECTOR ++ tsk->stack_canary = get_random_canary(); ++#endif ++ ++ /* ++ * One for us, one for whoever does the "release_task()" (usually ++ * parent) ++ */ ++ atomic_set(&tsk->usage, 2); ++#ifdef CONFIG_BLK_DEV_IO_TRACE ++ tsk->btrace_seq = 0; ++#endif ++ tsk->splice_pipe = NULL; ++ tsk->task_frag.page = NULL; ++ tsk->wake_q.next = NULL; ++ ++ account_kernel_stack(tsk, 1); ++ ++ kcov_task_init(tsk); ++ ++#ifdef CONFIG_FAULT_INJECTION ++ tsk->fail_nth = 0; ++#endif ++ ++#ifdef CONFIG_BLK_CGROUP ++ tsk->throttle_queue = NULL; ++ tsk->use_memdelay = 0; ++#endif ++ ++#ifdef CONFIG_MEMCG ++ tsk->active_memcg = NULL; ++#endif ++ return tsk; ++ ++free_stack: ++ free_thread_stack(tsk); ++free_tsk: ++ free_task_struct(tsk); ++ return NULL; ++} ++ ++__cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock); ++ ++static unsigned long default_dump_filter = MMF_DUMP_FILTER_DEFAULT; ++ ++static int __init coredump_filter_setup(char *s) ++{ ++ default_dump_filter = ++ (simple_strtoul(s, NULL, 0) << MMF_DUMP_FILTER_SHIFT) & ++ MMF_DUMP_FILTER_MASK; ++ return 1; ++} ++ ++__setup("coredump_filter=", coredump_filter_setup); ++ ++#include ++ ++static void mm_init_aio(struct mm_struct *mm) ++{ ++#ifdef CONFIG_AIO ++ spin_lock_init(&mm->ioctx_lock); ++ mm->ioctx_table = NULL; ++#endif ++} ++ ++static __always_inline void mm_clear_owner(struct mm_struct *mm, ++ struct task_struct *p) ++{ ++#ifdef CONFIG_MEMCG ++ if (mm->owner == p) ++ WRITE_ONCE(mm->owner, NULL); ++#endif ++} ++ ++static void mm_init_owner(struct mm_struct *mm, struct task_struct *p) ++{ ++#ifdef CONFIG_MEMCG ++ mm->owner = p; ++#endif ++} ++ ++static void mm_init_uprobes_state(struct mm_struct *mm) ++{ ++#ifdef CONFIG_UPROBES ++ mm->uprobes_state.xol_area = NULL; ++#endif ++} ++ ++static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, ++ struct user_namespace *user_ns) ++{ ++ mm->mmap = NULL; ++ mm->mm_rb = RB_ROOT; ++ mm->vmacache_seqnum = 0; ++ atomic_set(&mm->mm_users, 1); ++ atomic_set(&mm->mm_count, 1); ++ init_rwsem(&mm->mmap_sem); ++ INIT_LIST_HEAD(&mm->mmlist); ++ mm->core_state = NULL; ++ mm_pgtables_bytes_init(mm); ++ mm->map_count = 0; ++ atomic_long_set(&mm->locked_vm, 0); ++ mm->pinned_vm = 0; ++ memset(&mm->rss_stat, 0, sizeof(mm->rss_stat)); ++ spin_lock_init(&mm->page_table_lock); ++ spin_lock_init(&mm->arg_lock); ++ mm_init_cpumask(mm); ++ mm_init_aio(mm); ++ mm_init_owner(mm, p); ++ RCU_INIT_POINTER(mm->exe_file, NULL); ++ mmu_notifier_mm_init(mm); ++ hmm_mm_init(mm); ++ init_tlb_flush_pending(mm); ++#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS ++ mm->pmd_huge_pte = NULL; ++#endif ++ mm_init_uprobes_state(mm); ++ ++ if (current->mm) { ++ mm->flags = current->mm->flags & MMF_INIT_MASK; ++ mm->def_flags = current->mm->def_flags & VM_INIT_DEF_MASK; ++ } else { ++ mm->flags = default_dump_filter; ++ mm->def_flags = 0; ++ } ++ ++ if (mm_alloc_pgd(mm)) ++ goto fail_nopgd; ++ ++ if (init_new_context(p, mm)) ++ goto fail_nocontext; ++ ++ mm->user_ns = get_user_ns(user_ns); ++ return mm; ++ ++fail_nocontext: ++ mm_free_pgd(mm); ++fail_nopgd: ++ free_mm(mm); ++ return NULL; ++} ++ ++/* ++ * Allocate and initialize an mm_struct. ++ */ ++struct mm_struct *mm_alloc(void) ++{ ++ struct mm_struct *mm; ++ ++ mm = allocate_mm(); ++ if (!mm) ++ return NULL; ++ ++ memset(mm, 0, sizeof(*mm)); ++ return mm_init(mm, current, current_user_ns()); ++} ++ ++static inline void __mmput(struct mm_struct *mm) ++{ ++ VM_BUG_ON(atomic_read(&mm->mm_users)); ++ ++ uprobe_clear_state(mm); ++ exit_aio(mm); ++ ksm_exit(mm); ++ khugepaged_exit(mm); /* must run before exit_mmap */ ++ exit_mmap(mm); ++ mm_put_huge_zero_page(mm); ++ set_mm_exe_file(mm, NULL); ++ if (!list_empty(&mm->mmlist)) { ++ spin_lock(&mmlist_lock); ++ list_del(&mm->mmlist); ++ spin_unlock(&mmlist_lock); ++ } ++ if (mm->binfmt) ++ module_put(mm->binfmt->module); ++ mmdrop(mm); ++} ++ ++/* ++ * Decrement the use count and release all resources for an mm. ++ */ ++void mmput(struct mm_struct *mm) ++{ ++ might_sleep(); ++ ++ if (atomic_dec_and_test(&mm->mm_users)) ++ __mmput(mm); ++} ++EXPORT_SYMBOL_GPL(mmput); ++ ++#ifdef CONFIG_MMU ++static void mmput_async_fn(struct work_struct *work) ++{ ++ struct mm_struct *mm = container_of(work, struct mm_struct, ++ async_put_work); ++ ++ __mmput(mm); ++} ++ ++void mmput_async(struct mm_struct *mm) ++{ ++ if (atomic_dec_and_test(&mm->mm_users)) { ++ INIT_WORK(&mm->async_put_work, mmput_async_fn); ++ schedule_work(&mm->async_put_work); ++ } ++} ++EXPORT_SYMBOL_GPL(mmput_async); ++#endif ++ ++/** ++ * set_mm_exe_file - change a reference to the mm's executable file ++ * ++ * This changes mm's executable file (shown as symlink /proc/[pid]/exe). ++ * ++ * Main users are mmput() and sys_execve(). Callers prevent concurrent ++ * invocations: in mmput() nobody alive left, in execve task is single ++ * threaded. sys_prctl(PR_SET_MM_MAP/EXE_FILE) also needs to set the ++ * mm->exe_file, but does so without using set_mm_exe_file() in order ++ * to do avoid the need for any locks. ++ */ ++void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file) ++{ ++ struct file *old_exe_file; ++ ++ /* ++ * It is safe to dereference the exe_file without RCU as ++ * this function is only called if nobody else can access ++ * this mm -- see comment above for justification. ++ */ ++ old_exe_file = rcu_dereference_raw(mm->exe_file); ++ ++ if (new_exe_file) ++ get_file(new_exe_file); ++ rcu_assign_pointer(mm->exe_file, new_exe_file); ++ if (old_exe_file) ++ fput(old_exe_file); ++} ++ ++/** ++ * get_mm_exe_file - acquire a reference to the mm's executable file ++ * ++ * Returns %NULL if mm has no associated executable file. ++ * User must release file via fput(). ++ */ ++struct file *get_mm_exe_file(struct mm_struct *mm) ++{ ++ struct file *exe_file; ++ ++ rcu_read_lock(); ++ exe_file = rcu_dereference(mm->exe_file); ++ if (exe_file && !get_file_rcu(exe_file)) ++ exe_file = NULL; ++ rcu_read_unlock(); ++ return exe_file; ++} ++EXPORT_SYMBOL(get_mm_exe_file); ++ ++/** ++ * get_task_exe_file - acquire a reference to the task's executable file ++ * ++ * Returns %NULL if task's mm (if any) has no associated executable file or ++ * this is a kernel thread with borrowed mm (see the comment above get_task_mm). ++ * User must release file via fput(). ++ */ ++struct file *get_task_exe_file(struct task_struct *task) ++{ ++ struct file *exe_file = NULL; ++ struct mm_struct *mm; ++ ++ task_lock(task); ++ mm = task->mm; ++ if (mm) { ++ if (!(task->flags & PF_KTHREAD)) ++ exe_file = get_mm_exe_file(mm); ++ } ++ task_unlock(task); ++ return exe_file; ++} ++EXPORT_SYMBOL(get_task_exe_file); ++ ++/** ++ * get_task_mm - acquire a reference to the task's mm ++ * ++ * Returns %NULL if the task has no mm. Checks PF_KTHREAD (meaning ++ * this kernel workthread has transiently adopted a user mm with use_mm, ++ * to do its AIO) is not set and if so returns a reference to it, after ++ * bumping up the use count. User must release the mm via mmput() ++ * after use. Typically used by /proc and ptrace. ++ */ ++struct mm_struct *get_task_mm(struct task_struct *task) ++{ ++ struct mm_struct *mm; ++ ++ task_lock(task); ++ mm = task->mm; ++ if (mm) { ++ if (task->flags & PF_KTHREAD) ++ mm = NULL; ++ else ++ mmget(mm); ++ } ++ task_unlock(task); ++ return mm; ++} ++EXPORT_SYMBOL_GPL(get_task_mm); ++ ++/** ++ * mm_access - check access permission to a task and and acquire a reference to ++ * its mm. ++ * @task: target task ++ * @mode: selects type of access and caller credentials ++ * ++ * Return the task's mm on success, or %NULL if it cannot be accessed. ++ * ++ * Check if the caller is allowed to read or write the target task's pages. ++ * @mode describes the access mode and credentials using ptrace access flags. ++ * See ptrace_may_access() for more details. On success, a reference to the mm ++ * is taken. ++ */ ++struct mm_struct *mm_access(struct task_struct *task, unsigned int mode) ++{ ++ struct mm_struct *mm; ++ int err; ++ ++ err = mutex_lock_killable(&task->signal->cred_guard_mutex); ++ if (err) ++ return ERR_PTR(err); ++ ++ mm = get_task_mm(task); ++ if (mm && mm != current->mm && ++ !ptrace_may_access(task, mode)) { ++ mmput(mm); ++ mm = ERR_PTR(-EACCES); ++ } ++ mutex_unlock(&task->signal->cred_guard_mutex); ++ ++ return mm; ++} ++EXPORT_SYMBOL_GPL(mm_access); ++ ++static void complete_vfork_done(struct task_struct *tsk) ++{ ++ struct completion *vfork; ++ ++ task_lock(tsk); ++ vfork = tsk->vfork_done; ++ if (likely(vfork)) { ++ tsk->vfork_done = NULL; ++ complete(vfork); ++ } ++ task_unlock(tsk); ++} ++ ++static int wait_for_vfork_done(struct task_struct *child, ++ struct completion *vfork) ++{ ++ int killed; ++ ++ freezer_do_not_count(); ++ killed = wait_for_completion_killable(vfork); ++ freezer_count(); ++ ++ if (killed) { ++ task_lock(child); ++ child->vfork_done = NULL; ++ task_unlock(child); ++ } ++ ++ put_task_struct(child); ++ return killed; ++} ++ ++/* Please note the differences between mmput and mm_release. ++ * mmput is called whenever we stop holding onto a mm_struct, ++ * error success whatever. ++ * ++ * mm_release is called after a mm_struct has been removed ++ * from the current process. ++ * ++ * This difference is important for error handling, when we ++ * only half set up a mm_struct for a new process and need to restore ++ * the old one. Because we mmput the new mm_struct before ++ * restoring the old one. . . ++ * Eric Biederman 10 January 1998 ++ */ ++void mm_release(struct task_struct *tsk, struct mm_struct *mm) ++{ ++ /* Get rid of any futexes when releasing the mm */ ++#ifdef CONFIG_FUTEX ++ if (unlikely(tsk->robust_list)) { ++ exit_robust_list(tsk); ++ tsk->robust_list = NULL; ++ } ++#ifdef CONFIG_COMPAT ++ if (unlikely(tsk->compat_robust_list)) { ++ compat_exit_robust_list(tsk); ++ tsk->compat_robust_list = NULL; ++ } ++#endif ++ if (unlikely(!list_empty(&tsk->pi_state_list))) ++ exit_pi_state_list(tsk); ++#endif ++ ++ uprobe_free_utask(tsk); ++ ++ /* Get rid of any cached register state */ ++ deactivate_mm(tsk, mm); ++ ++ /* ++ * Signal userspace if we're not exiting with a core dump ++ * because we want to leave the value intact for debugging ++ * purposes. ++ */ ++ if (tsk->clear_child_tid) { ++ if (!(tsk->signal->flags & SIGNAL_GROUP_COREDUMP) && ++ atomic_read(&mm->mm_users) > 1) { ++ /* ++ * We don't check the error code - if userspace has ++ * not set up a proper pointer then tough luck. ++ */ ++ put_user(0, tsk->clear_child_tid); ++ do_futex(tsk->clear_child_tid, FUTEX_WAKE, ++ 1, NULL, NULL, 0, 0); ++ } ++ tsk->clear_child_tid = NULL; ++ } ++ ++ /* ++ * All done, finally we can wake up parent and return this mm to him. ++ * Also kthread_stop() uses this completion for synchronization. ++ */ ++ if (tsk->vfork_done) ++ complete_vfork_done(tsk); ++} ++ ++/* ++ * Allocate a new mm structure and copy contents from the ++ * mm structure of the passed in task structure. ++ */ ++static struct mm_struct *dup_mm(struct task_struct *tsk) ++{ ++ struct mm_struct *mm, *oldmm = current->mm; ++ int err; ++ ++ mm = allocate_mm(); ++ if (!mm) ++ goto fail_nomem; ++ ++ memcpy(mm, oldmm, sizeof(*mm)); ++ ++ if (!mm_init(mm, tsk, mm->user_ns)) ++ goto fail_nomem; ++ ++ err = dup_mmap(mm, oldmm); ++ if (err) ++ goto free_pt; ++ ++ mm->hiwater_rss = get_mm_rss(mm); ++ mm->hiwater_vm = mm->total_vm; ++ ++ if (mm->binfmt && !try_module_get(mm->binfmt->module)) ++ goto free_pt; ++ ++ return mm; ++ ++free_pt: ++ /* don't put binfmt in mmput, we haven't got module yet */ ++ mm->binfmt = NULL; ++ mm_init_owner(mm, NULL); ++ mmput(mm); ++ ++fail_nomem: ++ return NULL; ++} ++ ++static int copy_mm(unsigned long clone_flags, struct task_struct *tsk) ++{ ++ struct mm_struct *mm, *oldmm; ++ int retval; ++ ++ tsk->min_flt = tsk->maj_flt = 0; ++ tsk->nvcsw = tsk->nivcsw = 0; ++#ifdef CONFIG_DETECT_HUNG_TASK ++ tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw; ++ tsk->last_switch_time = 0; ++#endif ++ ++ tsk->mm = NULL; ++ tsk->active_mm = NULL; ++ ++ /* ++ * Are we cloning a kernel thread? ++ * ++ * We need to steal a active VM for that.. ++ */ ++ oldmm = current->mm; ++ if (!oldmm) ++ return 0; ++ ++ /* initialize the new vmacache entries */ ++ vmacache_flush(tsk); ++ ++ if (clone_flags & CLONE_VM) { ++ mmget(oldmm); ++ mm = oldmm; ++ goto good_mm; ++ } ++ ++ retval = -ENOMEM; ++ mm = dup_mm(tsk); ++ if (!mm) ++ goto fail_nomem; ++ ++good_mm: ++ tsk->mm = mm; ++ tsk->active_mm = mm; ++ return 0; ++ ++fail_nomem: ++ return retval; ++} ++ ++static int copy_fs(unsigned long clone_flags, struct task_struct *tsk) ++{ ++ struct fs_struct *fs = current->fs; ++ if (clone_flags & CLONE_FS) { ++ /* tsk->fs is already what we want */ ++ spin_lock(&fs->lock); ++ if (fs->in_exec) { ++ spin_unlock(&fs->lock); ++ return -EAGAIN; ++ } ++ fs->users++; ++ spin_unlock(&fs->lock); ++ return 0; ++ } ++ tsk->fs = copy_fs_struct(fs); ++ if (!tsk->fs) ++ return -ENOMEM; ++ return 0; ++} ++ ++static int copy_files(unsigned long clone_flags, struct task_struct *tsk) ++{ ++ struct files_struct *oldf, *newf; ++ int error = 0; ++ ++ /* ++ * A background process may not have any files ... ++ */ ++ oldf = current->files; ++ if (!oldf) ++ goto out; ++ ++ if (clone_flags & CLONE_FILES) { ++ atomic_inc(&oldf->count); ++ goto out; ++ } ++ ++ newf = dup_fd(oldf, &error); ++ if (!newf) ++ goto out; ++ ++ tsk->files = newf; ++ error = 0; ++out: ++ return error; ++} ++ ++static int copy_io(unsigned long clone_flags, struct task_struct *tsk) ++{ ++#ifdef CONFIG_BLOCK ++ struct io_context *ioc = current->io_context; ++ struct io_context *new_ioc; ++ ++ if (!ioc) ++ return 0; ++ /* ++ * Share io context with parent, if CLONE_IO is set ++ */ ++ if (clone_flags & CLONE_IO) { ++ ioc_task_link(ioc); ++ tsk->io_context = ioc; ++ } else if (ioprio_valid(ioc->ioprio)) { ++ new_ioc = get_task_io_context(tsk, GFP_KERNEL, NUMA_NO_NODE); ++ if (unlikely(!new_ioc)) ++ return -ENOMEM; ++ ++ new_ioc->ioprio = ioc->ioprio; ++ put_io_context(new_ioc); ++ } ++#endif ++ return 0; ++} ++ ++static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk) ++{ ++ struct sighand_struct *sig; ++ ++ if (clone_flags & CLONE_SIGHAND) { ++ atomic_inc(¤t->sighand->count); ++ return 0; ++ } ++ sig = kmem_cache_alloc(sighand_cachep, GFP_KERNEL); ++ rcu_assign_pointer(tsk->sighand, sig); ++ if (!sig) ++ return -ENOMEM; ++ ++ atomic_set(&sig->count, 1); ++ spin_lock_irq(¤t->sighand->siglock); ++ memcpy(sig->action, current->sighand->action, sizeof(sig->action)); ++ spin_unlock_irq(¤t->sighand->siglock); ++ return 0; ++} ++ ++void __cleanup_sighand(struct sighand_struct *sighand) ++{ ++ if (atomic_dec_and_test(&sighand->count)) { ++ signalfd_cleanup(sighand); ++ /* ++ * sighand_cachep is SLAB_TYPESAFE_BY_RCU so we can free it ++ * without an RCU grace period, see __lock_task_sighand(). ++ */ ++ kmem_cache_free(sighand_cachep, sighand); ++ } ++} ++ ++#ifdef CONFIG_POSIX_TIMERS ++/* ++ * Initialize POSIX timer handling for a thread group. ++ */ ++static void posix_cpu_timers_init_group(struct signal_struct *sig) ++{ ++ unsigned long cpu_limit; ++ ++ cpu_limit = READ_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur); ++ if (cpu_limit != RLIM_INFINITY) { ++ sig->cputime_expires.prof_exp = cpu_limit * NSEC_PER_SEC; ++ sig->cputimer.running = true; ++ } ++ ++ /* The timer lists. */ ++ INIT_LIST_HEAD(&sig->cpu_timers[0]); ++ INIT_LIST_HEAD(&sig->cpu_timers[1]); ++ INIT_LIST_HEAD(&sig->cpu_timers[2]); ++} ++#else ++static inline void posix_cpu_timers_init_group(struct signal_struct *sig) { } ++#endif ++ ++static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) ++{ ++ struct signal_struct *sig; ++ ++ if (clone_flags & CLONE_THREAD) ++ return 0; ++ ++ sig = kmem_cache_zalloc(signal_cachep, GFP_KERNEL); ++ tsk->signal = sig; ++ if (!sig) ++ return -ENOMEM; ++ ++ sig->nr_threads = 1; ++ atomic_set(&sig->live, 1); ++ atomic_set(&sig->sigcnt, 1); ++ ++ /* list_add(thread_node, thread_head) without INIT_LIST_HEAD() */ ++ sig->thread_head = (struct list_head)LIST_HEAD_INIT(tsk->thread_node); ++ tsk->thread_node = (struct list_head)LIST_HEAD_INIT(sig->thread_head); ++ ++ init_waitqueue_head(&sig->wait_chldexit); ++ sig->curr_target = tsk; ++ init_sigpending(&sig->shared_pending); ++ INIT_HLIST_HEAD(&sig->multiprocess); ++ seqlock_init(&sig->stats_lock); ++ prev_cputime_init(&sig->prev_cputime); ++ ++#ifdef CONFIG_POSIX_TIMERS ++ INIT_LIST_HEAD(&sig->posix_timers); ++ hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); ++ sig->real_timer.function = it_real_fn; ++#endif ++ ++ task_lock(current->group_leader); ++ memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim); ++ task_unlock(current->group_leader); ++ ++ posix_cpu_timers_init_group(sig); ++ ++ tty_audit_fork(sig); ++ sched_autogroup_fork(sig); ++ ++ sig->oom_score_adj = current->signal->oom_score_adj; ++ sig->oom_score_adj_min = current->signal->oom_score_adj_min; ++ ++ mutex_init(&sig->cred_guard_mutex); ++ ++ return 0; ++} ++ ++static void copy_seccomp(struct task_struct *p) ++{ ++#ifdef CONFIG_SECCOMP ++ /* ++ * Must be called with sighand->lock held, which is common to ++ * all threads in the group. Holding cred_guard_mutex is not ++ * needed because this new task is not yet running and cannot ++ * be racing exec. ++ */ ++ assert_spin_locked(¤t->sighand->siglock); ++ ++ /* Ref-count the new filter user, and assign it. */ ++ get_seccomp_filter(current); ++ p->seccomp = current->seccomp; ++ ++ /* ++ * Explicitly enable no_new_privs here in case it got set ++ * between the task_struct being duplicated and holding the ++ * sighand lock. The seccomp state and nnp must be in sync. ++ */ ++ if (task_no_new_privs(current)) ++ task_set_no_new_privs(p); ++ ++ /* ++ * If the parent gained a seccomp mode after copying thread ++ * flags and between before we held the sighand lock, we have ++ * to manually enable the seccomp thread flag here. ++ */ ++ if (p->seccomp.mode != SECCOMP_MODE_DISABLED) ++ set_tsk_thread_flag(p, TIF_SECCOMP); ++#endif ++} ++ ++SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr) ++{ ++ current->clear_child_tid = tidptr; ++ ++ return task_pid_vnr(current); ++} ++ ++static void rt_mutex_init_task(struct task_struct *p) ++{ ++ raw_spin_lock_init(&p->pi_lock); ++#ifdef CONFIG_RT_MUTEXES ++ p->pi_waiters = RB_ROOT_CACHED; ++ p->pi_top_task = NULL; ++ p->pi_blocked_on = NULL; ++#endif ++} ++ ++#ifdef CONFIG_POSIX_TIMERS ++/* ++ * Initialize POSIX timer handling for a single task. ++ */ ++static void posix_cpu_timers_init(struct task_struct *tsk) ++{ ++ tsk->cputime_expires.prof_exp = 0; ++ tsk->cputime_expires.virt_exp = 0; ++ tsk->cputime_expires.sched_exp = 0; ++ INIT_LIST_HEAD(&tsk->cpu_timers[0]); ++ INIT_LIST_HEAD(&tsk->cpu_timers[1]); ++ INIT_LIST_HEAD(&tsk->cpu_timers[2]); ++} ++#else ++static inline void posix_cpu_timers_init(struct task_struct *tsk) { } ++#endif ++ ++static inline void init_task_pid_links(struct task_struct *task) ++{ ++ enum pid_type type; ++ ++ for (type = PIDTYPE_PID; type < PIDTYPE_MAX; ++type) { ++ INIT_HLIST_NODE(&task->pid_links[type]); ++ } ++} ++ ++static inline void ++init_task_pid(struct task_struct *task, enum pid_type type, struct pid *pid) ++{ ++ if (type == PIDTYPE_PID) ++ task->thread_pid = pid; ++ else ++ task->signal->pids[type] = pid; ++} ++ ++static inline void rcu_copy_process(struct task_struct *p) ++{ ++#ifdef CONFIG_PREEMPT_RCU ++ p->rcu_read_lock_nesting = 0; ++ p->rcu_read_unlock_special.s = 0; ++ p->rcu_blocked_node = NULL; ++ INIT_LIST_HEAD(&p->rcu_node_entry); ++#endif /* #ifdef CONFIG_PREEMPT_RCU */ ++#ifdef CONFIG_TASKS_RCU ++ p->rcu_tasks_holdout = false; ++ INIT_LIST_HEAD(&p->rcu_tasks_holdout_list); ++ p->rcu_tasks_idle_cpu = -1; ++#endif /* #ifdef CONFIG_TASKS_RCU */ ++} ++ ++#ifdef CONFIG_MEMCG ++static void __delayed_free_task(struct rcu_head *rhp) ++{ ++ struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); ++ ++ free_task(tsk); ++} ++#endif /* CONFIG_MEMCG */ ++ ++static __always_inline void delayed_free_task(struct task_struct *tsk) ++{ ++#ifdef CONFIG_MEMCG ++ call_rcu(&tsk->rcu, __delayed_free_task); ++#else /* CONFIG_MEMCG */ ++ free_task(tsk); ++#endif /* CONFIG_MEMCG */ ++} ++ ++static void copy_oom_score_adj(u64 clone_flags, struct task_struct *tsk) ++{ ++ /* Skip if kernel thread */ ++ if (!tsk->mm) ++ return; ++ ++ /* Skip if spawning a thread or using vfork */ ++ if ((clone_flags & (CLONE_VM | CLONE_THREAD | CLONE_VFORK)) != CLONE_VM) ++ return; ++ ++ /* We need to synchronize with __set_oom_adj */ ++ mutex_lock(&oom_adj_mutex); ++ set_bit(MMF_MULTIPROCESS, &tsk->mm->flags); ++ /* Update the values in case they were changed after copy_signal */ ++ tsk->signal->oom_score_adj = current->signal->oom_score_adj; ++ tsk->signal->oom_score_adj_min = current->signal->oom_score_adj_min; ++ mutex_unlock(&oom_adj_mutex); ++} ++ ++/* ++ * This creates a new process as a copy of the old one, ++ * but does not actually start it yet. ++ * ++ * It copies the registers, and all the appropriate ++ * parts of the process environment (as per the clone ++ * flags). The actual kick-off is left to the caller. ++ */ ++static __latent_entropy struct task_struct *copy_process( ++ unsigned long clone_flags, ++ unsigned long stack_start, ++ unsigned long stack_size, ++ int __user *child_tidptr, ++ struct pid *pid, ++ int trace, ++ unsigned long tls, ++ int node) ++{ ++ int retval; ++ struct task_struct *p; ++ struct multiprocess_signals delayed; ++ ++ /* ++ * Don't allow sharing the root directory with processes in a different ++ * namespace ++ */ ++ if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) ++ return ERR_PTR(-EINVAL); ++ ++ if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS)) ++ return ERR_PTR(-EINVAL); ++ ++ /* ++ * Thread groups must share signals as well, and detached threads ++ * can only be started up within the thread group. ++ */ ++ if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND)) ++ return ERR_PTR(-EINVAL); ++ ++ /* ++ * Shared signal handlers imply shared VM. By way of the above, ++ * thread groups also imply shared VM. Blocking this case allows ++ * for various simplifications in other code. ++ */ ++ if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM)) ++ return ERR_PTR(-EINVAL); ++ ++ /* ++ * Siblings of global init remain as zombies on exit since they are ++ * not reaped by their parent (swapper). To solve this and to avoid ++ * multi-rooted process trees, prevent global and container-inits ++ * from creating siblings. ++ */ ++ if ((clone_flags & CLONE_PARENT) && ++ current->signal->flags & SIGNAL_UNKILLABLE) ++ return ERR_PTR(-EINVAL); ++ ++ /* ++ * If the new process will be in a different pid or user namespace ++ * do not allow it to share a thread group with the forking task. ++ */ ++ if (clone_flags & CLONE_THREAD) { ++ if ((clone_flags & (CLONE_NEWUSER | CLONE_NEWPID)) || ++ (task_active_pid_ns(current) != ++ current->nsproxy->pid_ns_for_children)) ++ return ERR_PTR(-EINVAL); ++ } ++ ++ /* ++ * Force any signals received before this point to be delivered ++ * before the fork happens. Collect up signals sent to multiple ++ * processes that happen during the fork and delay them so that ++ * they appear to happen after the fork. ++ */ ++ sigemptyset(&delayed.signal); ++ INIT_HLIST_NODE(&delayed.node); ++ ++ spin_lock_irq(¤t->sighand->siglock); ++ if (!(clone_flags & CLONE_THREAD)) ++ hlist_add_head(&delayed.node, ¤t->signal->multiprocess); ++ recalc_sigpending(); ++ spin_unlock_irq(¤t->sighand->siglock); ++ retval = -ERESTARTNOINTR; ++ if (signal_pending(current)) ++ goto fork_out; ++ ++ retval = -ENOMEM; ++ p = dup_task_struct(current, node); ++ if (!p) ++ goto fork_out; ++ ++ /* ++ * This _must_ happen before we call free_task(), i.e. before we jump ++ * to any of the bad_fork_* labels. This is to avoid freeing ++ * p->set_child_tid which is (ab)used as a kthread's data pointer for ++ * kernel threads (PF_KTHREAD). ++ */ ++ p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; ++ /* ++ * Clear TID on mm_release()? ++ */ ++ p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr : NULL; ++ ++ ftrace_graph_init_task(p); ++ ++ rt_mutex_init_task(p); ++ ++#ifdef CONFIG_PROVE_LOCKING ++ DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled); ++ DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); ++#endif ++ retval = -EAGAIN; ++ if (atomic_read(&p->real_cred->user->processes) >= ++ task_rlimit(p, RLIMIT_NPROC)) { ++ if (p->real_cred->user != INIT_USER && ++ !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN)) ++ goto bad_fork_free; ++ } ++ current->flags &= ~PF_NPROC_EXCEEDED; ++ ++ retval = copy_creds(p, clone_flags); ++ if (retval < 0) ++ goto bad_fork_free; ++ ++ /* ++ * If multiple threads are within copy_process(), then this check ++ * triggers too late. This doesn't hurt, the check is only there ++ * to stop root fork bombs. ++ */ ++ retval = -EAGAIN; ++ if (nr_threads >= max_threads) ++ goto bad_fork_cleanup_count; ++ ++ delayacct_tsk_init(p); /* Must remain after dup_task_struct() */ ++ p->flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER | PF_IDLE); ++ p->flags |= PF_FORKNOEXEC; ++ INIT_LIST_HEAD(&p->children); ++ INIT_LIST_HEAD(&p->sibling); ++ rcu_copy_process(p); ++ p->vfork_done = NULL; ++ spin_lock_init(&p->alloc_lock); ++ ++ init_sigpending(&p->pending); ++ ++ p->utime = p->stime = p->gtime = 0; ++#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME ++ p->utimescaled = p->stimescaled = 0; ++#endif ++ prev_cputime_init(&p->prev_cputime); ++ ++#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN ++ seqcount_init(&p->vtime.seqcount); ++ p->vtime.starttime = 0; ++ p->vtime.state = VTIME_INACTIVE; ++#endif ++ ++#if defined(SPLIT_RSS_COUNTING) ++ memset(&p->rss_stat, 0, sizeof(p->rss_stat)); ++#endif ++ ++ p->default_timer_slack_ns = current->timer_slack_ns; ++ ++ task_io_accounting_init(&p->ioac); ++ acct_clear_integrals(p); ++ ++ posix_cpu_timers_init(p); ++ ++ p->io_context = NULL; ++ audit_set_context(p, NULL); ++ cgroup_fork(p); ++#ifdef CONFIG_NUMA ++ p->mempolicy = mpol_dup(p->mempolicy); ++ if (IS_ERR(p->mempolicy)) { ++ retval = PTR_ERR(p->mempolicy); ++ p->mempolicy = NULL; ++ goto bad_fork_cleanup_threadgroup_lock; ++ } ++#endif ++#ifdef CONFIG_CPUSETS ++ p->cpuset_mem_spread_rotor = NUMA_NO_NODE; ++ p->cpuset_slab_spread_rotor = NUMA_NO_NODE; ++ seqcount_init(&p->mems_allowed_seq); ++#endif ++#ifdef CONFIG_TRACE_IRQFLAGS ++ p->irq_events = 0; ++ p->hardirqs_enabled = 0; ++ p->hardirq_enable_ip = 0; ++ p->hardirq_enable_event = 0; ++ p->hardirq_disable_ip = _THIS_IP_; ++ p->hardirq_disable_event = 0; ++ p->softirqs_enabled = 1; ++ p->softirq_enable_ip = _THIS_IP_; ++ p->softirq_enable_event = 0; ++ p->softirq_disable_ip = 0; ++ p->softirq_disable_event = 0; ++ p->hardirq_context = 0; ++ p->softirq_context = 0; ++#endif ++ ++ p->pagefault_disabled = 0; ++ ++#ifdef CONFIG_LOCKDEP ++ p->lockdep_depth = 0; /* no locks held yet */ ++ p->curr_chain_key = 0; ++ p->lockdep_recursion = 0; ++ lockdep_init_task(p); ++#endif ++ ++#ifdef CONFIG_DEBUG_MUTEXES ++ p->blocked_on = NULL; /* not blocked yet */ ++#endif ++#ifdef CONFIG_BCACHE ++ p->sequential_io = 0; ++ p->sequential_io_avg = 0; ++#endif ++ ++ /* Perform scheduler related setup. Assign this task to a CPU. */ ++ retval = sched_fork(clone_flags, p); ++ if (retval) ++ goto bad_fork_cleanup_policy; ++ ++ retval = perf_event_init_task(p); ++ if (retval) ++ goto bad_fork_cleanup_policy; ++ retval = audit_alloc(p); ++ if (retval) ++ goto bad_fork_cleanup_perf; ++ /* copy all the process information */ ++ shm_init_task(p); ++ retval = security_task_alloc(p, clone_flags); ++ if (retval) ++ goto bad_fork_cleanup_audit; ++ retval = copy_semundo(clone_flags, p); ++ if (retval) ++ goto bad_fork_cleanup_security; ++ retval = copy_files(clone_flags, p); ++ if (retval) ++ goto bad_fork_cleanup_semundo; ++ retval = copy_fs(clone_flags, p); ++ if (retval) ++ goto bad_fork_cleanup_files; ++ retval = copy_sighand(clone_flags, p); ++ if (retval) ++ goto bad_fork_cleanup_fs; ++ retval = copy_signal(clone_flags, p); ++ if (retval) ++ goto bad_fork_cleanup_sighand; ++ retval = copy_mm(clone_flags, p); ++ if (retval) ++ goto bad_fork_cleanup_signal; ++ retval = copy_namespaces(clone_flags, p); ++ if (retval) ++ goto bad_fork_cleanup_mm; ++ retval = copy_io(clone_flags, p); ++ if (retval) ++ goto bad_fork_cleanup_namespaces; ++ retval = copy_thread_tls(clone_flags, stack_start, stack_size, p, tls); ++ if (retval) ++ goto bad_fork_cleanup_io; ++ ++ if (pid != &init_struct_pid) { ++ pid = alloc_pid(p->nsproxy->pid_ns_for_children); ++ if (IS_ERR(pid)) { ++ retval = PTR_ERR(pid); ++ goto bad_fork_cleanup_thread; ++ } ++ } ++ ++#ifdef CONFIG_BLOCK ++ p->plug = NULL; ++#endif ++#ifdef CONFIG_FUTEX ++ p->robust_list = NULL; ++#ifdef CONFIG_COMPAT ++ p->compat_robust_list = NULL; ++#endif ++ INIT_LIST_HEAD(&p->pi_state_list); ++ p->pi_state_cache = NULL; ++#endif ++ /* ++ * sigaltstack should be cleared when sharing the same VM ++ */ ++ if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM) ++ sas_ss_reset(p); ++ ++ /* ++ * Syscall tracing and stepping should be turned off in the ++ * child regardless of CLONE_PTRACE. ++ */ ++ user_disable_single_step(p); ++ clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE); ++#ifdef TIF_SYSCALL_EMU ++ clear_tsk_thread_flag(p, TIF_SYSCALL_EMU); ++#endif ++ clear_all_latency_tracing(p); ++ ++ /* ok, now we should be set up.. */ ++ p->pid = pid_nr(pid); ++ if (clone_flags & CLONE_THREAD) { ++ p->exit_signal = -1; ++ p->group_leader = current->group_leader; ++ p->tgid = current->tgid; ++ } else { ++ if (clone_flags & CLONE_PARENT) ++ p->exit_signal = current->group_leader->exit_signal; ++ else ++ p->exit_signal = (clone_flags & CSIGNAL); ++ p->group_leader = p; ++ p->tgid = p->pid; ++ } ++ ++ p->nr_dirtied = 0; ++ p->nr_dirtied_pause = 128 >> (PAGE_SHIFT - 10); ++ p->dirty_paused_when = 0; ++ ++ p->pdeath_signal = 0; ++ INIT_LIST_HEAD(&p->thread_group); ++ p->task_works = NULL; ++ ++ cgroup_threadgroup_change_begin(current); ++ /* ++ * Ensure that the cgroup subsystem policies allow the new process to be ++ * forked. It should be noted the the new process's css_set can be changed ++ * between here and cgroup_post_fork() if an organisation operation is in ++ * progress. ++ */ ++ retval = cgroup_can_fork(p); ++ if (retval) ++ goto bad_fork_free_pid; ++ ++ /* ++ * From this point on we must avoid any synchronous user-space ++ * communication until we take the tasklist-lock. In particular, we do ++ * not want user-space to be able to predict the process start-time by ++ * stalling fork(2) after we recorded the start_time but before it is ++ * visible to the system. ++ */ ++ ++ p->start_time = ktime_get_ns(); ++ p->real_start_time = ktime_get_boot_ns(); ++ ++ /* ++ * Make it visible to the rest of the system, but dont wake it up yet. ++ * Need tasklist lock for parent etc handling! ++ */ ++ write_lock_irq(&tasklist_lock); ++ ++ /* CLONE_PARENT re-uses the old parent */ ++ if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) { ++ p->real_parent = current->real_parent; ++ p->parent_exec_id = current->parent_exec_id; ++ p->parent_exec_id_u64 = current->parent_exec_id_u64; ++ } else { ++ p->real_parent = current; ++ p->parent_exec_id = current->self_exec_id; ++ p->parent_exec_id_u64 = current->self_exec_id_u64; ++ } ++ ++ klp_copy_process(p); ++ ++ spin_lock(¤t->sighand->siglock); ++ ++ /* ++ * Copy seccomp details explicitly here, in case they were changed ++ * before holding sighand lock. ++ */ ++ copy_seccomp(p); ++ ++ rseq_fork(p, clone_flags); ++ ++ /* Don't start children in a dying pid namespace */ ++ if (unlikely(!(ns_of_pid(pid)->pid_allocated & PIDNS_ADDING))) { ++ retval = -ENOMEM; ++ goto bad_fork_cancel_cgroup; ++ } ++ ++ /* Let kill terminate clone/fork in the middle */ ++ if (fatal_signal_pending(current)) { ++ retval = -EINTR; ++ goto bad_fork_cancel_cgroup; ++ } ++ ++ ++ init_task_pid_links(p); ++ if (likely(p->pid)) { ++ ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace); ++ ++ init_task_pid(p, PIDTYPE_PID, pid); ++ if (thread_group_leader(p)) { ++ init_task_pid(p, PIDTYPE_TGID, pid); ++ init_task_pid(p, PIDTYPE_PGID, task_pgrp(current)); ++ init_task_pid(p, PIDTYPE_SID, task_session(current)); ++ ++ if (is_child_reaper(pid)) { ++ ns_of_pid(pid)->child_reaper = p; ++ p->signal->flags |= SIGNAL_UNKILLABLE; ++ } ++ p->signal->shared_pending.signal = delayed.signal; ++ p->signal->tty = tty_kref_get(current->signal->tty); ++ /* ++ * Inherit has_child_subreaper flag under the same ++ * tasklist_lock with adding child to the process tree ++ * for propagate_has_child_subreaper optimization. ++ */ ++ p->signal->has_child_subreaper = p->real_parent->signal->has_child_subreaper || ++ p->real_parent->signal->is_child_subreaper; ++ list_add_tail(&p->sibling, &p->real_parent->children); ++ list_add_tail_rcu(&p->tasks, &init_task.tasks); ++ attach_pid(p, PIDTYPE_TGID); ++ attach_pid(p, PIDTYPE_PGID); ++ attach_pid(p, PIDTYPE_SID); ++ __this_cpu_inc(process_counts); ++ } else { ++ current->signal->nr_threads++; ++ atomic_inc(¤t->signal->live); ++ atomic_inc(¤t->signal->sigcnt); ++ task_join_group_stop(p); ++ list_add_tail_rcu(&p->thread_group, ++ &p->group_leader->thread_group); ++ list_add_tail_rcu(&p->thread_node, ++ &p->signal->thread_head); ++ } ++ attach_pid(p, PIDTYPE_PID); ++ nr_threads++; ++ } ++ total_forks++; ++ hlist_del_init(&delayed.node); ++ spin_unlock(¤t->sighand->siglock); ++ syscall_tracepoint_update(p); ++ write_unlock_irq(&tasklist_lock); ++ ++ proc_fork_connector(p); ++ cgroup_post_fork(p); ++ cgroup_threadgroup_change_end(current); ++ perf_event_fork(p); ++ ++ trace_task_newtask(p, clone_flags); ++ uprobe_copy_process(p, clone_flags); ++ ++ copy_oom_score_adj(clone_flags, p); ++ ++ return p; ++ ++bad_fork_cancel_cgroup: ++ spin_unlock(¤t->sighand->siglock); ++ write_unlock_irq(&tasklist_lock); ++ cgroup_cancel_fork(p); ++bad_fork_free_pid: ++ cgroup_threadgroup_change_end(current); ++ if (pid != &init_struct_pid) ++ free_pid(pid); ++bad_fork_cleanup_thread: ++ exit_thread(p); ++bad_fork_cleanup_io: ++ if (p->io_context) ++ exit_io_context(p); ++bad_fork_cleanup_namespaces: ++ exit_task_namespaces(p); ++bad_fork_cleanup_mm: ++ if (p->mm) { ++ mm_clear_owner(p->mm, p); ++ mmput(p->mm); ++ } ++bad_fork_cleanup_signal: ++ if (!(clone_flags & CLONE_THREAD)) ++ free_signal_struct(p->signal); ++bad_fork_cleanup_sighand: ++ __cleanup_sighand(p->sighand); ++bad_fork_cleanup_fs: ++ exit_fs(p); /* blocking */ ++bad_fork_cleanup_files: ++ exit_files(p); /* blocking */ ++bad_fork_cleanup_semundo: ++ exit_sem(p); ++bad_fork_cleanup_security: ++ security_task_free(p); ++bad_fork_cleanup_audit: ++ audit_free(p); ++bad_fork_cleanup_perf: ++ perf_event_free_task(p); ++bad_fork_cleanup_policy: ++ lockdep_free_task(p); ++#ifdef CONFIG_NUMA ++ mpol_put(p->mempolicy); ++bad_fork_cleanup_threadgroup_lock: ++#endif ++ delayacct_tsk_free(p); ++bad_fork_cleanup_count: ++ atomic_dec(&p->cred->user->processes); ++ exit_creds(p); ++bad_fork_free: ++ p->state = TASK_DEAD; ++ put_task_stack(p); ++ delayed_free_task(p); ++fork_out: ++ spin_lock_irq(¤t->sighand->siglock); ++ hlist_del_init(&delayed.node); ++ spin_unlock_irq(¤t->sighand->siglock); ++ return ERR_PTR(retval); ++} ++ ++static inline void init_idle_pids(struct task_struct *idle) ++{ ++ enum pid_type type; ++ ++ for (type = PIDTYPE_PID; type < PIDTYPE_MAX; ++type) { ++ INIT_HLIST_NODE(&idle->pid_links[type]); /* not really needed */ ++ init_task_pid(idle, type, &init_struct_pid); ++ } ++} ++ ++struct task_struct *fork_idle(int cpu) ++{ ++ struct task_struct *task; ++ task = copy_process(CLONE_VM, 0, 0, NULL, &init_struct_pid, 0, 0, ++ cpu_to_node(cpu)); ++ if (!IS_ERR(task)) { ++ init_idle_pids(task); ++ init_idle(task, cpu); ++ } ++ ++ return task; ++} ++ ++/* ++ * Ok, this is the main fork-routine. ++ * ++ * It copies the process, and if successful kick-starts ++ * it and waits for it to finish using the VM if required. ++ */ ++long _do_fork(unsigned long clone_flags, ++ unsigned long stack_start, ++ unsigned long stack_size, ++ int __user *parent_tidptr, ++ int __user *child_tidptr, ++ unsigned long tls) ++{ ++ struct completion vfork; ++ struct pid *pid; ++ struct task_struct *p; ++ int trace = 0; ++ long nr; ++ ++ /* ++ * Determine whether and which event to report to ptracer. When ++ * called from kernel_thread or CLONE_UNTRACED is explicitly ++ * requested, no event is reported; otherwise, report if the event ++ * for the type of forking is enabled. ++ */ ++ if (!(clone_flags & CLONE_UNTRACED)) { ++ if (clone_flags & CLONE_VFORK) ++ trace = PTRACE_EVENT_VFORK; ++ else if ((clone_flags & CSIGNAL) != SIGCHLD) ++ trace = PTRACE_EVENT_CLONE; ++ else ++ trace = PTRACE_EVENT_FORK; ++ ++ if (likely(!ptrace_event_enabled(current, trace))) ++ trace = 0; ++ } ++ ++ p = copy_process(clone_flags, stack_start, stack_size, ++ child_tidptr, NULL, trace, tls, NUMA_NO_NODE); ++ add_latent_entropy(); ++ ++ if (IS_ERR(p)) ++ return PTR_ERR(p); ++ ++ /* ++ * Do this prior waking up the new thread - the thread pointer ++ * might get invalid after that point, if the thread exits quickly. ++ */ ++ trace_sched_process_fork(current, p); ++ ++ pid = get_task_pid(p, PIDTYPE_PID); ++ nr = pid_vnr(pid); ++ ++ if (clone_flags & CLONE_PARENT_SETTID) ++ put_user(nr, parent_tidptr); ++ ++ if (clone_flags & CLONE_VFORK) { ++ p->vfork_done = &vfork; ++ init_completion(&vfork); ++ get_task_struct(p); ++ } ++ ++ wake_up_new_task(p); ++ ++ /* forking complete and child started to run, tell ptracer */ ++ if (unlikely(trace)) ++ ptrace_event_pid(trace, pid); ++ ++ if (clone_flags & CLONE_VFORK) { ++ if (!wait_for_vfork_done(p, &vfork)) ++ ptrace_event_pid(PTRACE_EVENT_VFORK_DONE, pid); ++ } ++ ++ put_pid(pid); ++ return nr; ++} ++ ++#ifndef CONFIG_HAVE_COPY_THREAD_TLS ++/* For compatibility with architectures that call do_fork directly rather than ++ * using the syscall entry points below. */ ++long do_fork(unsigned long clone_flags, ++ unsigned long stack_start, ++ unsigned long stack_size, ++ int __user *parent_tidptr, ++ int __user *child_tidptr) ++{ ++ return _do_fork(clone_flags, stack_start, stack_size, ++ parent_tidptr, child_tidptr, 0); ++} ++#endif ++ ++/* ++ * Create a kernel thread. ++ */ ++pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) ++{ ++ return _do_fork(flags|CLONE_VM|CLONE_UNTRACED, (unsigned long)fn, ++ (unsigned long)arg, NULL, NULL, 0); ++} ++ ++#ifdef __ARCH_WANT_SYS_FORK ++SYSCALL_DEFINE0(fork) ++{ ++#ifdef CONFIG_MMU ++ return _do_fork(SIGCHLD, 0, 0, NULL, NULL, 0); ++#else ++ /* can not support in nommu mode */ ++ return -EINVAL; ++#endif ++} ++#endif ++ ++#ifdef __ARCH_WANT_SYS_VFORK ++SYSCALL_DEFINE0(vfork) ++{ ++ return _do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0, ++ 0, NULL, NULL, 0); ++} ++#endif ++ ++#ifdef __ARCH_WANT_SYS_CLONE ++#ifdef CONFIG_CLONE_BACKWARDS ++SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp, ++ int __user *, parent_tidptr, ++ unsigned long, tls, ++ int __user *, child_tidptr) ++#elif defined(CONFIG_CLONE_BACKWARDS2) ++SYSCALL_DEFINE5(clone, unsigned long, newsp, unsigned long, clone_flags, ++ int __user *, parent_tidptr, ++ int __user *, child_tidptr, ++ unsigned long, tls) ++#elif defined(CONFIG_CLONE_BACKWARDS3) ++SYSCALL_DEFINE6(clone, unsigned long, clone_flags, unsigned long, newsp, ++ int, stack_size, ++ int __user *, parent_tidptr, ++ int __user *, child_tidptr, ++ unsigned long, tls) ++#else ++SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp, ++ int __user *, parent_tidptr, ++ int __user *, child_tidptr, ++ unsigned long, tls) ++#endif ++{ ++ return _do_fork(clone_flags, newsp, 0, parent_tidptr, child_tidptr, tls); ++} ++#endif ++ ++void walk_process_tree(struct task_struct *top, proc_visitor visitor, void *data) ++{ ++ struct task_struct *leader, *parent, *child; ++ int res; ++ ++ read_lock(&tasklist_lock); ++ leader = top = top->group_leader; ++down: ++ for_each_thread(leader, parent) { ++ list_for_each_entry(child, &parent->children, sibling) { ++ res = visitor(child, data); ++ if (res) { ++ if (res < 0) ++ goto out; ++ leader = child; ++ goto down; ++ } ++up: ++ ; ++ } ++ } ++ ++ if (leader != top) { ++ child = leader; ++ parent = child->real_parent; ++ leader = parent->group_leader; ++ goto up; ++ } ++out: ++ read_unlock(&tasklist_lock); ++} ++ ++#ifndef ARCH_MIN_MMSTRUCT_ALIGN ++#define ARCH_MIN_MMSTRUCT_ALIGN 0 ++#endif ++ ++static void sighand_ctor(void *data) ++{ ++ struct sighand_struct *sighand = data; ++ ++ spin_lock_init(&sighand->siglock); ++ init_waitqueue_head(&sighand->signalfd_wqh); ++} ++ ++void __init proc_caches_init(void) ++{ ++ unsigned int mm_size; ++ ++ sighand_cachep = kmem_cache_create("sighand_cache", ++ sizeof(struct sighand_struct), 0, ++ SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU| ++ SLAB_ACCOUNT, sighand_ctor); ++ signal_cachep = kmem_cache_create("signal_cache", ++ sizeof(struct signal_struct), 0, ++ SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, ++ NULL); ++ files_cachep = kmem_cache_create("files_cache", ++ sizeof(struct files_struct), 0, ++ SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, ++ NULL); ++ fs_cachep = kmem_cache_create("fs_cache", ++ sizeof(struct fs_struct), 0, ++ SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, ++ NULL); ++ ++ /* ++ * The mm_cpumask is located at the end of mm_struct, and is ++ * dynamically sized based on the maximum CPU number this system ++ * can have, taking hotplug into account (nr_cpu_ids). ++ */ ++ mm_size = sizeof(struct mm_struct) + cpumask_size(); ++ ++ mm_cachep = kmem_cache_create_usercopy("mm_struct", ++ mm_size, ARCH_MIN_MMSTRUCT_ALIGN, ++ SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, ++ offsetof(struct mm_struct, saved_auxv), ++ sizeof_field(struct mm_struct, saved_auxv), ++ NULL); ++ vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC|SLAB_ACCOUNT); ++ mmap_init(); ++ nsproxy_cache_init(); ++} ++ ++/* ++ * Check constraints on flags passed to the unshare system call. ++ */ ++static int check_unshare_flags(unsigned long unshare_flags) ++{ ++ if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND| ++ CLONE_VM|CLONE_FILES|CLONE_SYSVSEM| ++ CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET| ++ CLONE_NEWUSER|CLONE_NEWPID|CLONE_NEWCGROUP)) ++ return -EINVAL; ++ /* ++ * Not implemented, but pretend it works if there is nothing ++ * to unshare. Note that unsharing the address space or the ++ * signal handlers also need to unshare the signal queues (aka ++ * CLONE_THREAD). ++ */ ++ if (unshare_flags & (CLONE_THREAD | CLONE_SIGHAND | CLONE_VM)) { ++ if (!thread_group_empty(current)) ++ return -EINVAL; ++ } ++ if (unshare_flags & (CLONE_SIGHAND | CLONE_VM)) { ++ if (atomic_read(¤t->sighand->count) > 1) ++ return -EINVAL; ++ } ++ if (unshare_flags & CLONE_VM) { ++ if (!current_is_single_threaded()) ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ ++/* ++ * Unshare the filesystem structure if it is being shared ++ */ ++static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp) ++{ ++ struct fs_struct *fs = current->fs; ++ ++ if (!(unshare_flags & CLONE_FS) || !fs) ++ return 0; ++ ++ /* don't need lock here; in the worst case we'll do useless copy */ ++ if (fs->users == 1) ++ return 0; ++ ++ *new_fsp = copy_fs_struct(fs); ++ if (!*new_fsp) ++ return -ENOMEM; ++ ++ return 0; ++} ++ ++/* ++ * Unshare file descriptor table if it is being shared ++ */ ++static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp) ++{ ++ struct files_struct *fd = current->files; ++ int error = 0; ++ ++ if ((unshare_flags & CLONE_FILES) && ++ (fd && atomic_read(&fd->count) > 1)) { ++ *new_fdp = dup_fd(fd, &error); ++ if (!*new_fdp) ++ return error; ++ } ++ ++ return 0; ++} ++ ++/* ++ * unshare allows a process to 'unshare' part of the process ++ * context which was originally shared using clone. copy_* ++ * functions used by do_fork() cannot be used here directly ++ * because they modify an inactive task_struct that is being ++ * constructed. Here we are modifying the current, active, ++ * task_struct. ++ */ ++int ksys_unshare(unsigned long unshare_flags) ++{ ++ struct fs_struct *fs, *new_fs = NULL; ++ struct files_struct *fd, *new_fd = NULL; ++ struct cred *new_cred = NULL; ++ struct nsproxy *new_nsproxy = NULL; ++ int do_sysvsem = 0; ++ int err; ++ ++ /* ++ * If unsharing a user namespace must also unshare the thread group ++ * and unshare the filesystem root and working directories. ++ */ ++ if (unshare_flags & CLONE_NEWUSER) ++ unshare_flags |= CLONE_THREAD | CLONE_FS; ++ /* ++ * If unsharing vm, must also unshare signal handlers. ++ */ ++ if (unshare_flags & CLONE_VM) ++ unshare_flags |= CLONE_SIGHAND; ++ /* ++ * If unsharing a signal handlers, must also unshare the signal queues. ++ */ ++ if (unshare_flags & CLONE_SIGHAND) ++ unshare_flags |= CLONE_THREAD; ++ /* ++ * If unsharing namespace, must also unshare filesystem information. ++ */ ++ if (unshare_flags & CLONE_NEWNS) ++ unshare_flags |= CLONE_FS; ++ ++ err = check_unshare_flags(unshare_flags); ++ if (err) ++ goto bad_unshare_out; ++ /* ++ * CLONE_NEWIPC must also detach from the undolist: after switching ++ * to a new ipc namespace, the semaphore arrays from the old ++ * namespace are unreachable. ++ */ ++ if (unshare_flags & (CLONE_NEWIPC|CLONE_SYSVSEM)) ++ do_sysvsem = 1; ++ err = unshare_fs(unshare_flags, &new_fs); ++ if (err) ++ goto bad_unshare_out; ++ err = unshare_fd(unshare_flags, &new_fd); ++ if (err) ++ goto bad_unshare_cleanup_fs; ++ err = unshare_userns(unshare_flags, &new_cred); ++ if (err) ++ goto bad_unshare_cleanup_fd; ++ err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy, ++ new_cred, new_fs); ++ if (err) ++ goto bad_unshare_cleanup_cred; ++ ++ if (new_fs || new_fd || do_sysvsem || new_cred || new_nsproxy) { ++ if (do_sysvsem) { ++ /* ++ * CLONE_SYSVSEM is equivalent to sys_exit(). ++ */ ++ exit_sem(current); ++ } ++ if (unshare_flags & CLONE_NEWIPC) { ++ /* Orphan segments in old ns (see sem above). */ ++ exit_shm(current); ++ shm_init_task(current); ++ } ++ ++ if (new_nsproxy) ++ switch_task_namespaces(current, new_nsproxy); ++ ++ task_lock(current); ++ ++ if (new_fs) { ++ fs = current->fs; ++ spin_lock(&fs->lock); ++ current->fs = new_fs; ++ if (--fs->users) ++ new_fs = NULL; ++ else ++ new_fs = fs; ++ spin_unlock(&fs->lock); ++ } ++ ++ if (new_fd) { ++ fd = current->files; ++ current->files = new_fd; ++ new_fd = fd; ++ } ++ ++ task_unlock(current); ++ ++ if (new_cred) { ++ /* Install the new user namespace */ ++ commit_creds(new_cred); ++ new_cred = NULL; ++ } ++ } ++ ++ perf_event_namespaces(current); ++ ++bad_unshare_cleanup_cred: ++ if (new_cred) ++ put_cred(new_cred); ++bad_unshare_cleanup_fd: ++ if (new_fd) ++ put_files_struct(new_fd); ++ ++bad_unshare_cleanup_fs: ++ if (new_fs) ++ free_fs_struct(new_fs); ++ ++bad_unshare_out: ++ return err; ++} ++ ++SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) ++{ ++ return ksys_unshare(unshare_flags); ++} ++ ++/* ++ * Helper to unshare the files of the current task. ++ * We don't want to expose copy_files internals to ++ * the exec layer of the kernel. ++ */ ++ ++int unshare_files(struct files_struct **displaced) ++{ ++ struct task_struct *task = current; ++ struct files_struct *copy = NULL; ++ int error; ++ ++ error = unshare_fd(CLONE_FILES, ©); ++ if (error || !copy) { ++ *displaced = NULL; ++ return error; ++ } ++ *displaced = task->files; ++ task_lock(task); ++ task->files = copy; ++ task_unlock(task); ++ return 0; ++} ++ ++int sysctl_max_threads(struct ctl_table *table, int write, ++ void __user *buffer, size_t *lenp, loff_t *ppos) ++{ ++ struct ctl_table t; ++ int ret; ++ int threads = max_threads; ++ int min = 1; ++ int max = MAX_THREADS; ++ ++ t = *table; ++ t.data = &threads; ++ t.extra1 = &min; ++ t.extra2 = &max; ++ ++ ret = proc_dointvec_minmax(&t, write, buffer, lenp, ppos); ++ if (ret || !write) ++ return ret; ++ ++ max_threads = threads; ++ ++ return 0; ++} +diff -uprN kernel/kernel/ipipe/core.c kernel_new/kernel/ipipe/core.c +--- kernel/kernel/ipipe/core.c 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/kernel/ipipe/core.c 2021-04-01 18:28:07.805863120 +0800 +@@ -0,0 +1,2117 @@ ++/* -*- linux-c -*- ++ * linux/kernel/ipipe/core.c ++ * ++ * Copyright (C) 2002-2012 Philippe Gerum. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, ++ * USA; either version 2 of the License, or (at your option) any later ++ * version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ * ++ * Architecture-independent I-PIPE core support. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#ifdef CONFIG_PROC_FS ++#include ++#include ++#endif /* CONFIG_PROC_FS */ ++#include ++#include ++#include ++#include ++#include ++ ++struct ipipe_domain ipipe_root; ++EXPORT_SYMBOL_GPL(ipipe_root); ++ ++struct ipipe_domain *ipipe_head_domain = &ipipe_root; ++EXPORT_SYMBOL_GPL(ipipe_head_domain); ++ ++#ifdef CONFIG_SMP ++static __initdata struct ipipe_percpu_domain_data bootup_context = { ++ .status = IPIPE_STALL_MASK, ++ .domain = &ipipe_root, ++}; ++#else ++#define bootup_context ipipe_percpu.root ++#endif /* !CONFIG_SMP */ ++ ++DEFINE_PER_CPU(struct ipipe_percpu_data, ipipe_percpu) = { ++ .root = { ++ .status = IPIPE_STALL_MASK, ++ .domain = &ipipe_root, ++ }, ++ .curr = &bootup_context, ++ .hrtimer_irq = -1, ++#ifdef CONFIG_IPIPE_DEBUG_CONTEXT ++ .context_check = 1, ++#endif ++}; ++EXPORT_PER_CPU_SYMBOL(ipipe_percpu); ++ ++/* Up to 2k of pending work data per CPU. */ ++#define WORKBUF_SIZE 2048 ++static DEFINE_PER_CPU_ALIGNED(unsigned char[WORKBUF_SIZE], work_buf); ++static DEFINE_PER_CPU(void *, work_tail); ++static unsigned int __ipipe_work_virq; ++ ++static void __ipipe_do_work(unsigned int virq, void *cookie); ++ ++#ifdef CONFIG_SMP ++ ++#define IPIPE_CRITICAL_TIMEOUT 1000000 ++static cpumask_t __ipipe_cpu_sync_map; ++static cpumask_t __ipipe_cpu_lock_map; ++static cpumask_t __ipipe_cpu_pass_map; ++static unsigned long __ipipe_critical_lock; ++static IPIPE_DEFINE_SPINLOCK(__ipipe_cpu_barrier); ++static atomic_t __ipipe_critical_count = ATOMIC_INIT(0); ++static void (*__ipipe_cpu_sync) (void); ++ ++#else /* !CONFIG_SMP */ ++/* ++ * Create an alias to the unique root status, so that arch-dep code ++ * may get fast access to this percpu variable including from ++ * assembly. A hard-coded assumption is that root.status appears at ++ * offset #0 of the ipipe_percpu struct. ++ */ ++extern unsigned long __ipipe_root_status ++__attribute__((alias(__stringify(ipipe_percpu)))); ++EXPORT_SYMBOL(__ipipe_root_status); ++ ++#endif /* !CONFIG_SMP */ ++ ++IPIPE_DEFINE_SPINLOCK(__ipipe_lock); ++ ++static unsigned long __ipipe_virtual_irq_map; ++ ++#ifdef CONFIG_PRINTK ++unsigned int __ipipe_printk_virq; ++int __ipipe_printk_bypass; ++#endif /* CONFIG_PRINTK */ ++ ++#ifdef CONFIG_PROC_FS ++ ++struct proc_dir_entry *ipipe_proc_root; ++ ++static int __ipipe_version_info_show(struct seq_file *p, void *data) ++{ ++ seq_printf(p, "%d\n", IPIPE_CORE_RELEASE); ++ return 0; ++} ++ ++static int __ipipe_version_info_open(struct inode *inode, struct file *file) ++{ ++ return single_open(file, __ipipe_version_info_show, NULL); ++} ++ ++static const struct file_operations __ipipe_version_proc_ops = { ++ .open = __ipipe_version_info_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = single_release, ++}; ++ ++static int __ipipe_common_info_show(struct seq_file *p, void *data) ++{ ++ struct ipipe_domain *ipd = (struct ipipe_domain *)p->private; ++ char handling, lockbit, virtuality; ++ unsigned long ctlbits; ++ unsigned int irq; ++ ++ seq_printf(p, " +--- Handled\n"); ++ seq_printf(p, " |+-- Locked\n"); ++ seq_printf(p, " ||+- Virtual\n"); ++ seq_printf(p, " [IRQ] ||| Handler\n"); ++ ++ mutex_lock(&ipd->mutex); ++ ++ for (irq = 0; irq < IPIPE_NR_IRQS; irq++) { ++ ctlbits = ipd->irqs[irq].control; ++ /* ++ * There might be a hole between the last external IRQ ++ * and the first virtual one; skip it. ++ */ ++ if (irq >= IPIPE_NR_XIRQS && !ipipe_virtual_irq_p(irq)) ++ continue; ++ ++ if (ipipe_virtual_irq_p(irq) ++ && !test_bit(irq - IPIPE_VIRQ_BASE, &__ipipe_virtual_irq_map)) ++ /* Non-allocated virtual IRQ; skip it. */ ++ continue; ++ ++ if (ctlbits & IPIPE_HANDLE_MASK) ++ handling = 'H'; ++ else ++ handling = '.'; ++ ++ if (ctlbits & IPIPE_LOCK_MASK) ++ lockbit = 'L'; ++ else ++ lockbit = '.'; ++ ++ if (ipipe_virtual_irq_p(irq)) ++ virtuality = 'V'; ++ else ++ virtuality = '.'; ++ ++ if (ctlbits & IPIPE_HANDLE_MASK) ++ seq_printf(p, " %4u: %c%c%c %pf\n", ++ irq, handling, lockbit, virtuality, ++ ipd->irqs[irq].handler); ++ else ++ seq_printf(p, " %4u: %c%c%c\n", ++ irq, handling, lockbit, virtuality); ++ } ++ ++ mutex_unlock(&ipd->mutex); ++ ++ return 0; ++} ++ ++static int __ipipe_common_info_open(struct inode *inode, struct file *file) ++{ ++ return single_open(file, __ipipe_common_info_show, PDE_DATA(inode)); ++} ++ ++static const struct file_operations __ipipe_info_proc_ops = { ++ .owner = THIS_MODULE, ++ .open = __ipipe_common_info_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = single_release, ++}; ++ ++void add_domain_proc(struct ipipe_domain *ipd) ++{ ++ proc_create_data(ipd->name, 0444, ipipe_proc_root, ++ &__ipipe_info_proc_ops, ipd); ++} ++ ++void remove_domain_proc(struct ipipe_domain *ipd) ++{ ++ remove_proc_entry(ipd->name, ipipe_proc_root); ++} ++ ++void __init __ipipe_init_proc(void) ++{ ++ ipipe_proc_root = proc_mkdir("ipipe", NULL); ++ proc_create("version", 0444, ipipe_proc_root, ++ &__ipipe_version_proc_ops); ++ add_domain_proc(ipipe_root_domain); ++ ++ __ipipe_init_tracer(); ++} ++ ++#else ++ ++static inline void add_domain_proc(struct ipipe_domain *ipd) ++{ ++} ++ ++static inline void remove_domain_proc(struct ipipe_domain *ipd) ++{ ++} ++ ++#endif /* CONFIG_PROC_FS */ ++ ++static void init_stage(struct ipipe_domain *ipd) ++{ ++ memset(&ipd->irqs, 0, sizeof(ipd->irqs)); ++ mutex_init(&ipd->mutex); ++ __ipipe_hook_critical_ipi(ipd); ++} ++ ++static inline int root_context_offset(void) ++{ ++ void root_context_not_at_start_of_ipipe_percpu(void); ++ ++ /* ipipe_percpu.root must be found at offset #0. */ ++ ++ if (offsetof(struct ipipe_percpu_data, root)) ++ root_context_not_at_start_of_ipipe_percpu(); ++ ++ return 0; ++} ++ ++#ifdef CONFIG_SMP ++ ++static inline void fixup_percpu_data(void) ++{ ++ struct ipipe_percpu_data *p; ++ int cpu; ++ ++ /* ++ * ipipe_percpu.curr cannot be assigned statically to ++ * &ipipe_percpu.root, due to the dynamic nature of percpu ++ * data. So we make ipipe_percpu.curr refer to a temporary ++ * boot up context in static memory, until we can fixup all ++ * context pointers in this routine, after per-cpu areas have ++ * been eventually set up. The temporary context data is ++ * copied to per_cpu(ipipe_percpu, 0).root in the same move. ++ * ++ * Obviously, this code must run over the boot CPU, before SMP ++ * operations start. ++ */ ++ BUG_ON(smp_processor_id() || !irqs_disabled()); ++ ++ per_cpu(ipipe_percpu, 0).root = bootup_context; ++ ++ for_each_possible_cpu(cpu) { ++ p = &per_cpu(ipipe_percpu, cpu); ++ p->curr = &p->root; ++ } ++} ++ ++#else /* !CONFIG_SMP */ ++ ++static inline void fixup_percpu_data(void) { } ++ ++#endif /* CONFIG_SMP */ ++ ++void __init __ipipe_init_early(void) ++{ ++ struct ipipe_domain *ipd = &ipipe_root; ++ int cpu; ++ ++ fixup_percpu_data(); ++ ++ /* ++ * A lightweight registration code for the root domain. We are ++ * running on the boot CPU, hw interrupts are off, and ++ * secondary CPUs are still lost in space. ++ */ ++ ipd->name = "Linux"; ++ ipd->context_offset = root_context_offset(); ++ init_stage(ipd); ++ ++ /* ++ * Do the early init stuff. First we do the per-arch pipeline ++ * core setup, then we run the per-client setup code. At this ++ * point, the kernel does not provide much services yet: be ++ * careful. ++ */ ++ __ipipe_early_core_setup(); ++ __ipipe_early_client_setup(); ++ ++#ifdef CONFIG_PRINTK ++ __ipipe_printk_virq = ipipe_alloc_virq(); ++ ipd->irqs[__ipipe_printk_virq].handler = __ipipe_flush_printk; ++ ipd->irqs[__ipipe_printk_virq].cookie = NULL; ++ ipd->irqs[__ipipe_printk_virq].ackfn = NULL; ++ ipd->irqs[__ipipe_printk_virq].control = IPIPE_HANDLE_MASK; ++#endif /* CONFIG_PRINTK */ ++ ++ __ipipe_work_virq = ipipe_alloc_virq(); ++ ipd->irqs[__ipipe_work_virq].handler = __ipipe_do_work; ++ ipd->irqs[__ipipe_work_virq].cookie = NULL; ++ ipd->irqs[__ipipe_work_virq].ackfn = NULL; ++ ipd->irqs[__ipipe_work_virq].control = IPIPE_HANDLE_MASK; ++ ++ for_each_possible_cpu(cpu) ++ per_cpu(work_tail, cpu) = per_cpu(work_buf, cpu); ++} ++ ++void __init __ipipe_init(void) ++{ ++ /* Now we may engage the pipeline. */ ++ __ipipe_enable_pipeline(); ++ ++ pr_info("Interrupt pipeline (release #%d)\n", IPIPE_CORE_RELEASE); ++} ++ ++static inline void init_head_stage(struct ipipe_domain *ipd) ++{ ++ struct ipipe_percpu_domain_data *p; ++ int cpu; ++ ++ /* Must be set first, used in ipipe_percpu_context(). */ ++ ipd->context_offset = offsetof(struct ipipe_percpu_data, head); ++ ++ for_each_online_cpu(cpu) { ++ p = ipipe_percpu_context(ipd, cpu); ++ memset(p, 0, sizeof(*p)); ++ p->domain = ipd; ++ } ++ ++ init_stage(ipd); ++} ++ ++void ipipe_register_head(struct ipipe_domain *ipd, const char *name) ++{ ++ BUG_ON(!ipipe_root_p || ipd == &ipipe_root); ++ ++ ipd->name = name; ++ init_head_stage(ipd); ++ barrier(); ++ ipipe_head_domain = ipd; ++ add_domain_proc(ipd); ++ ++ pr_info("I-pipe: head domain %s registered.\n", name); ++} ++EXPORT_SYMBOL_GPL(ipipe_register_head); ++ ++void ipipe_unregister_head(struct ipipe_domain *ipd) ++{ ++ BUG_ON(!ipipe_root_p || ipd != ipipe_head_domain); ++ ++ ipipe_head_domain = &ipipe_root; ++ smp_mb(); ++ mutex_lock(&ipd->mutex); ++ remove_domain_proc(ipd); ++ mutex_unlock(&ipd->mutex); ++ ++ pr_info("I-pipe: head domain %s unregistered.\n", ipd->name); ++} ++EXPORT_SYMBOL_GPL(ipipe_unregister_head); ++ ++void ipipe_stall_root(void) ++{ ++ unsigned long flags; ++ ++ ipipe_root_only(); ++ flags = hard_smp_local_irq_save(); ++ __set_bit(IPIPE_STALL_FLAG, &__ipipe_root_status); ++ hard_smp_local_irq_restore(flags); ++} ++EXPORT_SYMBOL(ipipe_stall_root); ++ ++unsigned long ipipe_test_and_stall_root(void) ++{ ++ unsigned long flags; ++ int x; ++ ++ ipipe_root_only(); ++ flags = hard_smp_local_irq_save(); ++ x = __test_and_set_bit(IPIPE_STALL_FLAG, &__ipipe_root_status); ++ hard_smp_local_irq_restore(flags); ++ ++ return x; ++} ++EXPORT_SYMBOL(ipipe_test_and_stall_root); ++ ++unsigned long ipipe_test_root(void) ++{ ++ unsigned long flags; ++ int x; ++ ++ flags = hard_smp_local_irq_save(); ++ x = test_bit(IPIPE_STALL_FLAG, &__ipipe_root_status); ++ hard_smp_local_irq_restore(flags); ++ ++ return x; ++} ++EXPORT_SYMBOL(ipipe_test_root); ++ ++void ipipe_unstall_root(void) ++{ ++ struct ipipe_percpu_domain_data *p; ++ ++ hard_local_irq_disable(); ++ ++ /* This helps catching bad usage from assembly call sites. */ ++ ipipe_root_only(); ++ ++ p = ipipe_this_cpu_root_context(); ++ ++ __clear_bit(IPIPE_STALL_FLAG, &p->status); ++ ++ if (unlikely(__ipipe_ipending_p(p))) ++ __ipipe_sync_stage(); ++ ++ hard_local_irq_enable(); ++} ++EXPORT_SYMBOL(ipipe_unstall_root); ++ ++void ipipe_restore_root(unsigned long x) ++{ ++ ipipe_root_only(); ++ ++ if (x) ++ ipipe_stall_root(); ++ else ++ ipipe_unstall_root(); ++} ++EXPORT_SYMBOL(ipipe_restore_root); ++ ++void __ipipe_restore_root_nosync(unsigned long x) ++{ ++ struct ipipe_percpu_domain_data *p = ipipe_this_cpu_root_context(); ++ ++ if (raw_irqs_disabled_flags(x)) { ++ __set_bit(IPIPE_STALL_FLAG, &p->status); ++ trace_hardirqs_off(); ++ } else { ++ trace_hardirqs_on(); ++ __clear_bit(IPIPE_STALL_FLAG, &p->status); ++ } ++} ++EXPORT_SYMBOL_GPL(__ipipe_restore_root_nosync); ++ ++void ipipe_unstall_head(void) ++{ ++ struct ipipe_percpu_domain_data *p = ipipe_this_cpu_head_context(); ++ ++ hard_local_irq_disable(); ++ ++ __clear_bit(IPIPE_STALL_FLAG, &p->status); ++ ++ if (unlikely(__ipipe_ipending_p(p))) ++ __ipipe_sync_pipeline(ipipe_head_domain); ++ ++ hard_local_irq_enable(); ++} ++EXPORT_SYMBOL_GPL(ipipe_unstall_head); ++ ++void __ipipe_restore_head(unsigned long x) /* hw interrupt off */ ++{ ++ struct ipipe_percpu_domain_data *p = ipipe_this_cpu_head_context(); ++ ++ if (x) { ++#ifdef CONFIG_DEBUG_KERNEL ++ static int warned; ++ if (!warned && ++ __test_and_set_bit(IPIPE_STALL_FLAG, &p->status)) { ++ /* ++ * Already stalled albeit ipipe_restore_head() ++ * should have detected it? Send a warning once. ++ */ ++ hard_local_irq_enable(); ++ warned = 1; ++ pr_warning("I-pipe: ipipe_restore_head() " ++ "optimization failed.\n"); ++ dump_stack(); ++ hard_local_irq_disable(); ++ } ++#else /* !CONFIG_DEBUG_KERNEL */ ++ __set_bit(IPIPE_STALL_FLAG, &p->status); ++#endif /* CONFIG_DEBUG_KERNEL */ ++ } else { ++ __clear_bit(IPIPE_STALL_FLAG, &p->status); ++ if (unlikely(__ipipe_ipending_p(p))) ++ __ipipe_sync_pipeline(ipipe_head_domain); ++ hard_local_irq_enable(); ++ } ++} ++EXPORT_SYMBOL_GPL(__ipipe_restore_head); ++ ++void __ipipe_spin_lock_irq(ipipe_spinlock_t *lock) ++{ ++ hard_local_irq_disable(); ++ if (ipipe_smp_p) ++ arch_spin_lock(&lock->arch_lock); ++ __set_bit(IPIPE_STALL_FLAG, &__ipipe_current_context->status); ++} ++EXPORT_SYMBOL_GPL(__ipipe_spin_lock_irq); ++ ++void __ipipe_spin_unlock_irq(ipipe_spinlock_t *lock) ++{ ++ if (ipipe_smp_p) ++ arch_spin_unlock(&lock->arch_lock); ++ __clear_bit(IPIPE_STALL_FLAG, &__ipipe_current_context->status); ++ hard_local_irq_enable(); ++} ++EXPORT_SYMBOL_GPL(__ipipe_spin_unlock_irq); ++ ++unsigned long __ipipe_spin_lock_irqsave(ipipe_spinlock_t *lock) ++{ ++ unsigned long flags; ++ int s; ++ ++ flags = hard_local_irq_save(); ++ if (ipipe_smp_p) ++ arch_spin_lock(&lock->arch_lock); ++ s = __test_and_set_bit(IPIPE_STALL_FLAG, &__ipipe_current_context->status); ++ ++ return arch_mangle_irq_bits(s, flags); ++} ++EXPORT_SYMBOL_GPL(__ipipe_spin_lock_irqsave); ++ ++int __ipipe_spin_trylock_irqsave(ipipe_spinlock_t *lock, ++ unsigned long *x) ++{ ++ unsigned long flags; ++ int s; ++ ++ flags = hard_local_irq_save(); ++ if (ipipe_smp_p && !arch_spin_trylock(&lock->arch_lock)) { ++ hard_local_irq_restore(flags); ++ return 0; ++ } ++ s = __test_and_set_bit(IPIPE_STALL_FLAG, &__ipipe_current_context->status); ++ *x = arch_mangle_irq_bits(s, flags); ++ ++ return 1; ++} ++EXPORT_SYMBOL_GPL(__ipipe_spin_trylock_irqsave); ++ ++void __ipipe_spin_unlock_irqrestore(ipipe_spinlock_t *lock, ++ unsigned long x) ++{ ++ if (ipipe_smp_p) ++ arch_spin_unlock(&lock->arch_lock); ++ if (!arch_demangle_irq_bits(&x)) ++ __clear_bit(IPIPE_STALL_FLAG, &__ipipe_current_context->status); ++ hard_local_irq_restore(x); ++} ++EXPORT_SYMBOL_GPL(__ipipe_spin_unlock_irqrestore); ++ ++int __ipipe_spin_trylock_irq(ipipe_spinlock_t *lock) ++{ ++ unsigned long flags; ++ ++ flags = hard_local_irq_save(); ++ if (ipipe_smp_p && !arch_spin_trylock(&lock->arch_lock)) { ++ hard_local_irq_restore(flags); ++ return 0; ++ } ++ __set_bit(IPIPE_STALL_FLAG, &__ipipe_current_context->status); ++ ++ return 1; ++} ++EXPORT_SYMBOL_GPL(__ipipe_spin_trylock_irq); ++ ++void __ipipe_spin_unlock_irqbegin(ipipe_spinlock_t *lock) ++{ ++ if (ipipe_smp_p) ++ arch_spin_unlock(&lock->arch_lock); ++} ++ ++void __ipipe_spin_unlock_irqcomplete(unsigned long x) ++{ ++ if (!arch_demangle_irq_bits(&x)) ++ __clear_bit(IPIPE_STALL_FLAG, &__ipipe_current_context->status); ++ hard_local_irq_restore(x); ++} ++ ++/* Must be called hw IRQs off. */ ++static inline void __ipipe_set_irq_held(struct ipipe_percpu_domain_data *p, ++ unsigned int irq) ++{ ++ __set_bit(irq, p->irqheld_map); ++ p->irqall[irq]++; ++} ++ ++#if __IPIPE_IRQMAP_LEVELS == 4 ++ ++/* Must be called hw IRQs off. */ ++void __ipipe_set_irq_pending(struct ipipe_domain *ipd, unsigned int irq) ++{ ++ struct ipipe_percpu_domain_data *p = ipipe_this_cpu_context(ipd); ++ int l0b, l1b, l2b; ++ ++ IPIPE_WARN_ONCE(!hard_irqs_disabled()); ++ ++ l0b = irq / (BITS_PER_LONG * BITS_PER_LONG * BITS_PER_LONG); ++ l1b = irq / (BITS_PER_LONG * BITS_PER_LONG); ++ l2b = irq / BITS_PER_LONG; ++ ++ if (likely(!test_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control))) { ++ __set_bit(l0b, &p->irqpend_0map); ++ __set_bit(l1b, p->irqpend_1map); ++ __set_bit(l2b, p->irqpend_2map); ++ __set_bit(irq, p->irqpend_map); ++ } else ++ __set_bit(irq, p->irqheld_map); ++ ++ p->irqall[irq]++; ++} ++EXPORT_SYMBOL_GPL(__ipipe_set_irq_pending); ++ ++/* Must be called hw IRQs off. */ ++void __ipipe_lock_irq(unsigned int irq) ++{ ++ struct ipipe_domain *ipd = ipipe_root_domain; ++ struct ipipe_percpu_domain_data *p; ++ int l0b, l1b, l2b; ++ ++ IPIPE_WARN_ONCE(!hard_irqs_disabled()); ++ ++ /* ++ * Interrupts requested by a registered head domain cannot be ++ * locked, since this would make no sense: interrupts are ++ * globally masked at CPU level when the head domain is ++ * stalled, so there is no way we could encounter the ++ * situation IRQ locks are handling. ++ */ ++ if (test_and_set_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control)) ++ return; ++ ++ p = ipipe_this_cpu_context(ipd); ++ if (__test_and_clear_bit(irq, p->irqpend_map)) { ++ __set_bit(irq, p->irqheld_map); ++ l2b = irq / BITS_PER_LONG; ++ if (p->irqpend_map[l2b] == 0) { ++ __clear_bit(l2b, p->irqpend_2map); ++ l1b = l2b / BITS_PER_LONG; ++ if (p->irqpend_2map[l1b] == 0) { ++ __clear_bit(l1b, p->irqpend_1map); ++ l0b = l1b / BITS_PER_LONG; ++ if (p->irqpend_1map[l0b] == 0) ++ __clear_bit(l0b, &p->irqpend_0map); ++ } ++ } ++ } ++} ++EXPORT_SYMBOL_GPL(__ipipe_lock_irq); ++ ++/* Must be called hw IRQs off. */ ++void __ipipe_unlock_irq(unsigned int irq) ++{ ++ struct ipipe_domain *ipd = ipipe_root_domain; ++ struct ipipe_percpu_domain_data *p; ++ int l0b, l1b, l2b, cpu; ++ ++ IPIPE_WARN_ONCE(!hard_irqs_disabled()); ++ ++ if (!test_and_clear_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control)) ++ return; ++ ++ l0b = irq / (BITS_PER_LONG * BITS_PER_LONG * BITS_PER_LONG); ++ l1b = irq / (BITS_PER_LONG * BITS_PER_LONG); ++ l2b = irq / BITS_PER_LONG; ++ ++ for_each_online_cpu(cpu) { ++ p = ipipe_this_cpu_root_context(); ++ if (test_and_clear_bit(irq, p->irqheld_map)) { ++ /* We need atomic ops here: */ ++ set_bit(irq, p->irqpend_map); ++ set_bit(l2b, p->irqpend_2map); ++ set_bit(l1b, p->irqpend_1map); ++ set_bit(l0b, &p->irqpend_0map); ++ } ++ } ++} ++EXPORT_SYMBOL_GPL(__ipipe_unlock_irq); ++ ++#define wmul1(__n) ((__n) * BITS_PER_LONG) ++#define wmul2(__n) (wmul1(__n) * BITS_PER_LONG) ++#define wmul3(__n) (wmul2(__n) * BITS_PER_LONG) ++ ++static inline int __ipipe_next_irq(struct ipipe_percpu_domain_data *p) ++{ ++ unsigned long l0m, l1m, l2m, l3m; ++ int l0b, l1b, l2b, l3b; ++ unsigned int irq; ++ ++ l0m = p->irqpend_0map; ++ if (unlikely(l0m == 0)) ++ return -1; ++ l0b = __ipipe_ffnz(l0m); ++ irq = wmul3(l0b); ++ ++ l1m = p->irqpend_1map[l0b]; ++ if (unlikely(l1m == 0)) ++ return -1; ++ l1b = __ipipe_ffnz(l1m); ++ irq += wmul2(l1b); ++ ++ l2m = p->irqpend_2map[wmul1(l0b) + l1b]; ++ if (unlikely(l2m == 0)) ++ return -1; ++ l2b = __ipipe_ffnz(l2m); ++ irq += wmul1(l2b); ++ ++ l3m = p->irqpend_map[wmul2(l0b) + wmul1(l1b) + l2b]; ++ if (unlikely(l3m == 0)) ++ return -1; ++ l3b = __ipipe_ffnz(l3m); ++ irq += l3b; ++ ++ __clear_bit(irq, p->irqpend_map); ++ if (p->irqpend_map[irq / BITS_PER_LONG] == 0) { ++ __clear_bit(l2b, &p->irqpend_2map[wmul1(l0b) + l1b]); ++ if (p->irqpend_2map[wmul1(l0b) + l1b] == 0) { ++ __clear_bit(l1b, &p->irqpend_1map[l0b]); ++ if (p->irqpend_1map[l0b] == 0) ++ __clear_bit(l0b, &p->irqpend_0map); ++ } ++ } ++ ++ return irq; ++} ++ ++#elif __IPIPE_IRQMAP_LEVELS == 3 ++ ++/* Must be called hw IRQs off. */ ++void __ipipe_set_irq_pending(struct ipipe_domain *ipd, unsigned int irq) ++{ ++ struct ipipe_percpu_domain_data *p = ipipe_this_cpu_context(ipd); ++ int l0b, l1b; ++ ++ IPIPE_WARN_ONCE(!hard_irqs_disabled()); ++ ++ l0b = irq / (BITS_PER_LONG * BITS_PER_LONG); ++ l1b = irq / BITS_PER_LONG; ++ ++ if (likely(!test_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control))) { ++ __set_bit(irq, p->irqpend_map); ++ __set_bit(l1b, p->irqpend_1map); ++ __set_bit(l0b, &p->irqpend_0map); ++ } else ++ __set_bit(irq, p->irqheld_map); ++ ++ p->irqall[irq]++; ++} ++EXPORT_SYMBOL_GPL(__ipipe_set_irq_pending); ++ ++/* Must be called hw IRQs off. */ ++void __ipipe_lock_irq(unsigned int irq) ++{ ++ struct ipipe_domain *ipd = ipipe_root_domain; ++ struct ipipe_percpu_domain_data *p; ++ int l0b, l1b; ++ ++ IPIPE_WARN_ONCE(!hard_irqs_disabled()); ++ ++ /* ++ * Interrupts requested by a registered head domain cannot be ++ * locked, since this would make no sense: interrupts are ++ * globally masked at CPU level when the head domain is ++ * stalled, so there is no way we could encounter the ++ * situation IRQ locks are handling. ++ */ ++ if (test_and_set_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control)) ++ return; ++ ++ l0b = irq / (BITS_PER_LONG * BITS_PER_LONG); ++ l1b = irq / BITS_PER_LONG; ++ ++ p = ipipe_this_cpu_context(ipd); ++ if (__test_and_clear_bit(irq, p->irqpend_map)) { ++ __set_bit(irq, p->irqheld_map); ++ if (p->irqpend_map[l1b] == 0) { ++ __clear_bit(l1b, p->irqpend_1map); ++ if (p->irqpend_1map[l0b] == 0) ++ __clear_bit(l0b, &p->irqpend_0map); ++ } ++ } ++} ++EXPORT_SYMBOL_GPL(__ipipe_lock_irq); ++ ++/* Must be called hw IRQs off. */ ++void __ipipe_unlock_irq(unsigned int irq) ++{ ++ struct ipipe_domain *ipd = ipipe_root_domain; ++ struct ipipe_percpu_domain_data *p; ++ int l0b, l1b, cpu; ++ ++ IPIPE_WARN_ONCE(!hard_irqs_disabled()); ++ ++ if (!test_and_clear_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control)) ++ return; ++ ++ l0b = irq / (BITS_PER_LONG * BITS_PER_LONG); ++ l1b = irq / BITS_PER_LONG; ++ ++ for_each_online_cpu(cpu) { ++ p = ipipe_this_cpu_root_context(); ++ if (test_and_clear_bit(irq, p->irqheld_map)) { ++ /* We need atomic ops here: */ ++ set_bit(irq, p->irqpend_map); ++ set_bit(l1b, p->irqpend_1map); ++ set_bit(l0b, &p->irqpend_0map); ++ } ++ } ++} ++EXPORT_SYMBOL_GPL(__ipipe_unlock_irq); ++ ++static inline int __ipipe_next_irq(struct ipipe_percpu_domain_data *p) ++{ ++ int l0b, l1b, l2b; ++ unsigned long l0m, l1m, l2m; ++ unsigned int irq; ++ ++ l0m = p->irqpend_0map; ++ if (unlikely(l0m == 0)) ++ return -1; ++ ++ l0b = __ipipe_ffnz(l0m); ++ l1m = p->irqpend_1map[l0b]; ++ if (unlikely(l1m == 0)) ++ return -1; ++ ++ l1b = __ipipe_ffnz(l1m) + l0b * BITS_PER_LONG; ++ l2m = p->irqpend_map[l1b]; ++ if (unlikely(l2m == 0)) ++ return -1; ++ ++ l2b = __ipipe_ffnz(l2m); ++ irq = l1b * BITS_PER_LONG + l2b; ++ ++ __clear_bit(irq, p->irqpend_map); ++ if (p->irqpend_map[l1b] == 0) { ++ __clear_bit(l1b, p->irqpend_1map); ++ if (p->irqpend_1map[l0b] == 0) ++ __clear_bit(l0b, &p->irqpend_0map); ++ } ++ ++ return irq; ++} ++ ++#else /* __IPIPE_IRQMAP_LEVELS == 2 */ ++ ++/* Must be called hw IRQs off. */ ++void __ipipe_set_irq_pending(struct ipipe_domain *ipd, unsigned int irq) ++{ ++ struct ipipe_percpu_domain_data *p = ipipe_this_cpu_context(ipd); ++ int l0b = irq / BITS_PER_LONG; ++ ++ IPIPE_WARN_ONCE(!hard_irqs_disabled()); ++ ++ if (likely(!test_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control))) { ++ __set_bit(irq, p->irqpend_map); ++ __set_bit(l0b, &p->irqpend_0map); ++ } else ++ __set_bit(irq, p->irqheld_map); ++ ++ p->irqall[irq]++; ++} ++EXPORT_SYMBOL_GPL(__ipipe_set_irq_pending); ++ ++/* Must be called hw IRQs off. */ ++void __ipipe_lock_irq(unsigned int irq) ++{ ++ struct ipipe_percpu_domain_data *p; ++ int l0b = irq / BITS_PER_LONG; ++ ++ IPIPE_WARN_ONCE(!hard_irqs_disabled()); ++ ++ if (test_and_set_bit(IPIPE_LOCK_FLAG, ++ &ipipe_root_domain->irqs[irq].control)) ++ return; ++ ++ p = ipipe_this_cpu_root_context(); ++ if (__test_and_clear_bit(irq, p->irqpend_map)) { ++ __set_bit(irq, p->irqheld_map); ++ if (p->irqpend_map[l0b] == 0) ++ __clear_bit(l0b, &p->irqpend_0map); ++ } ++} ++EXPORT_SYMBOL_GPL(__ipipe_lock_irq); ++ ++/* Must be called hw IRQs off. */ ++void __ipipe_unlock_irq(unsigned int irq) ++{ ++ struct ipipe_domain *ipd = ipipe_root_domain; ++ struct ipipe_percpu_domain_data *p; ++ int l0b = irq / BITS_PER_LONG, cpu; ++ ++ IPIPE_WARN_ONCE(!hard_irqs_disabled()); ++ ++ if (!test_and_clear_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control)) ++ return; ++ ++ for_each_online_cpu(cpu) { ++ p = ipipe_percpu_context(ipd, cpu); ++ if (test_and_clear_bit(irq, p->irqheld_map)) { ++ /* We need atomic ops here: */ ++ set_bit(irq, p->irqpend_map); ++ set_bit(l0b, &p->irqpend_0map); ++ } ++ } ++} ++EXPORT_SYMBOL_GPL(__ipipe_unlock_irq); ++ ++static inline int __ipipe_next_irq(struct ipipe_percpu_domain_data *p) ++{ ++ unsigned long l0m, l1m; ++ int l0b, l1b; ++ ++ l0m = p->irqpend_0map; ++ if (unlikely(l0m == 0)) ++ return -1; ++ ++ l0b = __ipipe_ffnz(l0m); ++ l1m = p->irqpend_map[l0b]; ++ if (unlikely(l1m == 0)) ++ return -1; ++ ++ l1b = __ipipe_ffnz(l1m); ++ __clear_bit(l1b, &p->irqpend_map[l0b]); ++ if (p->irqpend_map[l0b] == 0) ++ __clear_bit(l0b, &p->irqpend_0map); ++ ++ return l0b * BITS_PER_LONG + l1b; ++} ++ ++#endif ++ ++void __ipipe_do_sync_pipeline(struct ipipe_domain *top) ++{ ++ struct ipipe_percpu_domain_data *p; ++ struct ipipe_domain *ipd; ++ ++ /* We must enter over the root domain. */ ++ IPIPE_WARN_ONCE(__ipipe_current_domain != ipipe_root_domain); ++ ipd = top; ++next: ++ p = ipipe_this_cpu_context(ipd); ++ if (test_bit(IPIPE_STALL_FLAG, &p->status)) ++ return; ++ ++ if (__ipipe_ipending_p(p)) { ++ if (ipd == ipipe_root_domain) ++ __ipipe_sync_stage(); ++ else { ++ /* Switching to head. */ ++ p->coflags &= ~__IPIPE_ALL_R; ++ __ipipe_set_current_context(p); ++ __ipipe_sync_stage(); ++ __ipipe_set_current_domain(ipipe_root_domain); ++ } ++ } ++ ++ if (ipd != ipipe_root_domain) { ++ ipd = ipipe_root_domain; ++ goto next; ++ } ++} ++EXPORT_SYMBOL_GPL(__ipipe_do_sync_pipeline); ++ ++unsigned int ipipe_alloc_virq(void) ++{ ++ unsigned long flags, irq = 0; ++ int ipos; ++ ++ raw_spin_lock_irqsave(&__ipipe_lock, flags); ++ ++ if (__ipipe_virtual_irq_map != ~0) { ++ ipos = ffz(__ipipe_virtual_irq_map); ++ set_bit(ipos, &__ipipe_virtual_irq_map); ++ irq = ipos + IPIPE_VIRQ_BASE; ++ } ++ ++ raw_spin_unlock_irqrestore(&__ipipe_lock, flags); ++ ++ return irq; ++} ++EXPORT_SYMBOL_GPL(ipipe_alloc_virq); ++ ++void ipipe_free_virq(unsigned int virq) ++{ ++ clear_bit(virq - IPIPE_VIRQ_BASE, &__ipipe_virtual_irq_map); ++ smp_mb__after_atomic(); ++} ++EXPORT_SYMBOL_GPL(ipipe_free_virq); ++ ++int ipipe_request_irq(struct ipipe_domain *ipd, ++ unsigned int irq, ++ ipipe_irq_handler_t handler, ++ void *cookie, ++ ipipe_irq_ackfn_t ackfn) ++{ ++ unsigned long flags; ++ int ret = 0; ++ ++ ipipe_root_only(); ++ ++ if (handler == NULL || ++ (irq >= IPIPE_NR_XIRQS && !ipipe_virtual_irq_p(irq))) ++ return -EINVAL; ++ ++ raw_spin_lock_irqsave(&__ipipe_lock, flags); ++ ++ if (ipd->irqs[irq].handler) { ++ ret = -EBUSY; ++ goto out; ++ } ++ ++ if (ackfn == NULL) ++ ackfn = ipipe_root_domain->irqs[irq].ackfn; ++ ++ ipd->irqs[irq].handler = handler; ++ ipd->irqs[irq].cookie = cookie; ++ ipd->irqs[irq].ackfn = ackfn; ++ ipd->irqs[irq].control = IPIPE_HANDLE_MASK; ++out: ++ raw_spin_unlock_irqrestore(&__ipipe_lock, flags); ++ ++ return ret; ++} ++EXPORT_SYMBOL_GPL(ipipe_request_irq); ++ ++void ipipe_free_irq(struct ipipe_domain *ipd, ++ unsigned int irq) ++{ ++ unsigned long flags; ++ ++ ipipe_root_only(); ++ ++ raw_spin_lock_irqsave(&__ipipe_lock, flags); ++ ++ if (ipd->irqs[irq].handler == NULL) ++ goto out; ++ ++ ipd->irqs[irq].handler = NULL; ++ ipd->irqs[irq].cookie = NULL; ++ ipd->irqs[irq].ackfn = NULL; ++ ipd->irqs[irq].control = 0; ++out: ++ raw_spin_unlock_irqrestore(&__ipipe_lock, flags); ++} ++EXPORT_SYMBOL_GPL(ipipe_free_irq); ++ ++void ipipe_set_hooks(struct ipipe_domain *ipd, int enables) ++{ ++ struct ipipe_percpu_domain_data *p; ++ unsigned long flags; ++ int cpu, wait; ++ ++ if (ipd == ipipe_root_domain) { ++ IPIPE_WARN(enables & __IPIPE_TRAP_E); ++ enables &= ~__IPIPE_TRAP_E; ++ } else { ++ IPIPE_WARN(enables & __IPIPE_KEVENT_E); ++ enables &= ~__IPIPE_KEVENT_E; ++ } ++ ++ flags = ipipe_critical_enter(NULL); ++ ++ for_each_online_cpu(cpu) { ++ p = ipipe_percpu_context(ipd, cpu); ++ p->coflags &= ~__IPIPE_ALL_E; ++ p->coflags |= enables; ++ } ++ ++ wait = (enables ^ __IPIPE_ALL_E) << __IPIPE_SHIFT_R; ++ if (wait == 0 || !__ipipe_root_p) { ++ ipipe_critical_exit(flags); ++ return; ++ } ++ ++ ipipe_this_cpu_context(ipd)->coflags &= ~wait; ++ ++ ipipe_critical_exit(flags); ++ ++ /* ++ * In case we cleared some hooks over the root domain, we have ++ * to wait for any ongoing execution to finish, since our ++ * caller might subsequently unmap the target domain code. ++ * ++ * We synchronize with the relevant __ipipe_notify_*() ++ * helpers, disabling all hooks before we start waiting for ++ * completion on all CPUs. ++ */ ++ for_each_online_cpu(cpu) { ++ while (ipipe_percpu_context(ipd, cpu)->coflags & wait) ++ schedule_timeout_interruptible(HZ / 50); ++ } ++} ++EXPORT_SYMBOL_GPL(ipipe_set_hooks); ++ ++int __weak ipipe_fastcall_hook(struct pt_regs *regs) ++{ ++ return -1; /* i.e. fall back to slow path. */ ++} ++ ++int __weak ipipe_syscall_hook(struct ipipe_domain *ipd, struct pt_regs *regs) ++{ ++ return 0; ++} ++ ++static inline void sync_root_irqs(void) ++{ ++ struct ipipe_percpu_domain_data *p; ++ unsigned long flags; ++ ++ flags = hard_local_irq_save(); ++ ++ p = ipipe_this_cpu_root_context(); ++ if (unlikely(__ipipe_ipending_p(p))) ++ __ipipe_sync_stage(); ++ ++ hard_local_irq_restore(flags); ++} ++ ++int ipipe_handle_syscall(struct thread_info *ti, ++ unsigned long nr, struct pt_regs *regs) ++{ ++ unsigned long local_flags = READ_ONCE(ti->ipipe_flags); ++ unsigned int nr_syscalls = ipipe_root_nr_syscalls(ti); ++ int ret; ++ ++ /* ++ * NOTE: This is a backport from the DOVETAIL syscall ++ * redirector to the older pipeline implementation. ++ * ++ * == ++ * ++ * If the syscall # is out of bounds and the current IRQ stage ++ * is not the root one, this has to be a non-native system ++ * call handled by some co-kernel on the head stage. Hand it ++ * over to the head stage via the fast syscall handler. ++ * ++ * Otherwise, if the system call is out of bounds or the ++ * current thread is shared with a co-kernel, hand the syscall ++ * over to the latter through the pipeline stages. This ++ * allows: ++ * ++ * - the co-kernel to receive the initial - foreign - syscall ++ * a thread should send for enabling syscall handling by the ++ * co-kernel. ++ * ++ * - the co-kernel to manipulate the current execution stage ++ * for handling the request, which includes switching the ++ * current thread back to the root stage if the syscall is a ++ * native one, or promoting it to the head stage if handling ++ * the foreign syscall requires this. ++ * ++ * Native syscalls from regular (non-pipeline) threads are ++ * ignored by this routine, and flow down to the regular ++ * system call handler. ++ */ ++ ++ if (nr >= nr_syscalls && (local_flags & _TIP_HEAD)) { ++ ipipe_fastcall_hook(regs); ++ local_flags = READ_ONCE(ti->ipipe_flags); ++ if (local_flags & _TIP_HEAD) { ++ if (local_flags & _TIP_MAYDAY) ++ __ipipe_call_mayday(regs); ++ return 1; /* don't pass down, no tail work. */ ++ } else { ++ sync_root_irqs(); ++ return -1; /* don't pass down, do tail work. */ ++ } ++ } ++ ++ if ((local_flags & _TIP_NOTIFY) || nr >= nr_syscalls) { ++ ret =__ipipe_notify_syscall(regs); ++ local_flags = READ_ONCE(ti->ipipe_flags); ++ if (local_flags & _TIP_HEAD) ++ return 1; /* don't pass down, no tail work. */ ++ if (ret) ++ return -1; /* don't pass down, do tail work. */ ++ } ++ ++ return 0; /* pass syscall down to the host. */ ++} ++ ++int __ipipe_notify_syscall(struct pt_regs *regs) ++{ ++ struct ipipe_domain *caller_domain, *this_domain, *ipd; ++ struct ipipe_percpu_domain_data *p; ++ unsigned long flags; ++ int ret = 0; ++ ++ /* ++ * We should definitely not pipeline a syscall with IRQs off. ++ */ ++ IPIPE_WARN_ONCE(hard_irqs_disabled()); ++ ++ flags = hard_local_irq_save(); ++ caller_domain = this_domain = __ipipe_current_domain; ++ ipd = ipipe_head_domain; ++next: ++ p = ipipe_this_cpu_context(ipd); ++ if (likely(p->coflags & __IPIPE_SYSCALL_E)) { ++ __ipipe_set_current_context(p); ++ p->coflags |= __IPIPE_SYSCALL_R; ++ hard_local_irq_restore(flags); ++ ret = ipipe_syscall_hook(caller_domain, regs); ++ flags = hard_local_irq_save(); ++ p->coflags &= ~__IPIPE_SYSCALL_R; ++ if (__ipipe_current_domain != ipd) ++ /* Account for domain migration. */ ++ this_domain = __ipipe_current_domain; ++ else ++ __ipipe_set_current_domain(this_domain); ++ } ++ ++ if (this_domain == ipipe_root_domain) { ++ if (ipd != ipipe_root_domain && ret == 0) { ++ ipd = ipipe_root_domain; ++ goto next; ++ } ++ /* ++ * Careful: we may have migrated from head->root, so p ++ * would be ipipe_this_cpu_context(head). ++ */ ++ p = ipipe_this_cpu_root_context(); ++ if (__ipipe_ipending_p(p)) ++ __ipipe_sync_stage(); ++ } else if (ipipe_test_thread_flag(TIP_MAYDAY)) ++ __ipipe_call_mayday(regs); ++ ++ hard_local_irq_restore(flags); ++ ++ return ret; ++} ++ ++int __weak ipipe_trap_hook(struct ipipe_trap_data *data) ++{ ++ return 0; ++} ++ ++int __ipipe_notify_trap(int exception, struct pt_regs *regs) ++{ ++ struct ipipe_percpu_domain_data *p; ++ struct ipipe_trap_data data; ++ unsigned long flags; ++ int ret = 0; ++ ++ flags = hard_local_irq_save(); ++ ++ /* ++ * We send a notification about all traps raised over a ++ * registered head domain only. ++ */ ++ if (__ipipe_root_p) ++ goto out; ++ ++ p = ipipe_this_cpu_head_context(); ++ if (likely(p->coflags & __IPIPE_TRAP_E)) { ++ p->coflags |= __IPIPE_TRAP_R; ++ hard_local_irq_restore(flags); ++ data.exception = exception; ++ data.regs = regs; ++ ret = ipipe_trap_hook(&data); ++ flags = hard_local_irq_save(); ++ p->coflags &= ~__IPIPE_TRAP_R; ++ } ++out: ++ hard_local_irq_restore(flags); ++ ++ return ret; ++} ++ ++int __ipipe_notify_user_intreturn(void) ++{ ++ __ipipe_notify_kevent(IPIPE_KEVT_USERINTRET, current); ++ ++ return !ipipe_root_p; ++} ++ ++int __weak ipipe_kevent_hook(int kevent, void *data) ++{ ++ return 0; ++} ++ ++int __ipipe_notify_kevent(int kevent, void *data) ++{ ++ struct ipipe_percpu_domain_data *p; ++ unsigned long flags; ++ int ret = 0; ++ ++ ipipe_root_only(); ++ ++ flags = hard_local_irq_save(); ++ ++ p = ipipe_this_cpu_root_context(); ++ if (likely(p->coflags & __IPIPE_KEVENT_E)) { ++ p->coflags |= __IPIPE_KEVENT_R; ++ hard_local_irq_restore(flags); ++ ret = ipipe_kevent_hook(kevent, data); ++ flags = hard_local_irq_save(); ++ p->coflags &= ~__IPIPE_KEVENT_R; ++ } ++ ++ hard_local_irq_restore(flags); ++ ++ return ret; ++} ++ ++void __weak ipipe_migration_hook(struct task_struct *p) ++{ ++} ++ ++static void complete_domain_migration(void) /* hw IRQs off */ ++{ ++ struct ipipe_percpu_domain_data *p; ++ struct ipipe_percpu_data *pd; ++ struct task_struct *t; ++ ++ ipipe_root_only(); ++ pd = raw_cpu_ptr(&ipipe_percpu); ++ t = pd->task_hijacked; ++ if (t == NULL) ++ return; ++ ++ pd->task_hijacked = NULL; ++ t->state &= ~TASK_HARDENING; ++ if (t->state != TASK_INTERRUPTIBLE) ++ /* Migration aborted (by signal). */ ++ return; ++ ++ ipipe_set_ti_thread_flag(task_thread_info(t), TIP_HEAD); ++ p = ipipe_this_cpu_head_context(); ++ IPIPE_WARN_ONCE(test_bit(IPIPE_STALL_FLAG, &p->status)); ++ /* ++ * hw IRQs are disabled, but the completion hook assumes the ++ * head domain is logically stalled: fix it up. ++ */ ++ __set_bit(IPIPE_STALL_FLAG, &p->status); ++ ipipe_migration_hook(t); ++ __clear_bit(IPIPE_STALL_FLAG, &p->status); ++ if (__ipipe_ipending_p(p)) ++ __ipipe_sync_pipeline(p->domain); ++} ++ ++void __ipipe_complete_domain_migration(void) ++{ ++ unsigned long flags; ++ ++ flags = hard_local_irq_save(); ++ complete_domain_migration(); ++ hard_local_irq_restore(flags); ++} ++EXPORT_SYMBOL_GPL(__ipipe_complete_domain_migration); ++ ++int __ipipe_switch_tail(void) ++{ ++ int x; ++ ++#ifdef CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH ++ hard_local_irq_disable(); ++#endif ++ x = __ipipe_root_p; ++ if (x) ++ complete_domain_migration(); ++ ++#ifndef CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH ++ if (x) ++#endif ++ hard_local_irq_enable(); ++ ++ return !x; ++} ++ ++void __ipipe_notify_vm_preemption(void) ++{ ++ struct ipipe_vm_notifier *vmf; ++ struct ipipe_percpu_data *p; ++ ++ ipipe_check_irqoff(); ++ p = __ipipe_raw_cpu_ptr(&ipipe_percpu); ++ vmf = p->vm_notifier; ++ if (unlikely(vmf)) ++ vmf->handler(vmf); ++} ++EXPORT_SYMBOL_GPL(__ipipe_notify_vm_preemption); ++ ++static void dispatch_irq_head(unsigned int irq) /* hw interrupts off */ ++{ ++ struct ipipe_percpu_domain_data *p = ipipe_this_cpu_head_context(), *old; ++ struct ipipe_domain *head = p->domain; ++ ++ if (unlikely(test_bit(IPIPE_STALL_FLAG, &p->status))) { ++ __ipipe_set_irq_pending(head, irq); ++ return; ++ } ++ ++ /* Switch to the head domain if not current. */ ++ old = __ipipe_current_context; ++ if (old != p) ++ __ipipe_set_current_context(p); ++ ++ p->irqall[irq]++; ++ __set_bit(IPIPE_STALL_FLAG, &p->status); ++ barrier(); ++ head->irqs[irq].handler(irq, head->irqs[irq].cookie); ++ __ipipe_run_irqtail(irq); ++ hard_local_irq_disable(); ++ p = ipipe_this_cpu_head_context(); ++ __clear_bit(IPIPE_STALL_FLAG, &p->status); ++ ++ /* Are we still running in the head domain? */ ++ if (likely(__ipipe_current_context == p)) { ++ /* Did we enter this code over the head domain? */ ++ if (old->domain == head) { ++ /* Yes, do immediate synchronization. */ ++ if (__ipipe_ipending_p(p)) ++ __ipipe_sync_stage(); ++ return; ++ } ++ __ipipe_set_current_context(ipipe_this_cpu_root_context()); ++ } ++ ++ /* ++ * We must be running over the root domain, synchronize ++ * the pipeline for high priority IRQs (slow path). ++ */ ++ __ipipe_do_sync_pipeline(head); ++} ++ ++void __ipipe_dispatch_irq(unsigned int irq, int flags) /* hw interrupts off */ ++{ ++ struct ipipe_domain *ipd; ++ struct irq_desc *desc; ++ unsigned long control; ++ int chained_irq; ++ ++ /* ++ * Survival kit when reading this code: ++ * ++ * - we have two main situations, leading to three cases for ++ * handling interrupts: ++ * ++ * a) the root domain is alone, no registered head domain ++ * => all interrupts go through the interrupt log ++ * b) a head domain is registered ++ * => head domain IRQs go through the fast dispatcher ++ * => root domain IRQs go through the interrupt log ++ * ++ * - when no head domain is registered, ipipe_head_domain == ++ * ipipe_root_domain == &ipipe_root. ++ * ++ * - the caller tells us whether we should acknowledge this ++ * IRQ. Even virtual IRQs may require acknowledge on some ++ * platforms (e.g. arm/SMP). ++ * ++ * - the caller tells us whether we may try to run the IRQ log ++ * syncer. Typically, demuxed IRQs won't be synced ++ * immediately. ++ * ++ * - multiplex IRQs most likely have a valid acknowledge ++ * handler and we may not be called with IPIPE_IRQF_NOACK ++ * for them. The ack handler for the multiplex IRQ actually ++ * decodes the demuxed interrupts. ++ */ ++ ++#ifdef CONFIG_IPIPE_DEBUG ++ if (irq >= IPIPE_NR_IRQS) { ++ pr_err("I-pipe: spurious interrupt %u\n", irq); ++ return; ++ } ++#endif ++ /* ++ * CAUTION: on some archs, virtual IRQs may have acknowledge ++ * handlers. Multiplex IRQs should have one too. ++ */ ++ if (unlikely(irq >= IPIPE_NR_XIRQS)) { ++ desc = NULL; ++ chained_irq = 0; ++ } else { ++ desc = irq_to_desc(irq); ++ chained_irq = desc ? ipipe_chained_irq_p(desc) : 0; ++ } ++ if (flags & IPIPE_IRQF_NOACK) ++ IPIPE_WARN_ONCE(chained_irq); ++ else { ++ ipd = ipipe_head_domain; ++ control = ipd->irqs[irq].control; ++ if ((control & IPIPE_HANDLE_MASK) == 0) ++ ipd = ipipe_root_domain; ++ if (ipd->irqs[irq].ackfn) ++ ipd->irqs[irq].ackfn(desc); ++ if (chained_irq) { ++ if ((flags & IPIPE_IRQF_NOSYNC) == 0) ++ /* Run demuxed IRQ handlers. */ ++ goto sync; ++ return; ++ } ++ } ++ ++ /* ++ * Sticky interrupts must be handled early and separately, so ++ * that we always process them on the current domain. ++ */ ++ ipd = __ipipe_current_domain; ++ control = ipd->irqs[irq].control; ++ if (control & IPIPE_STICKY_MASK) ++ goto log; ++ ++ /* ++ * In case we have no registered head domain ++ * (i.e. ipipe_head_domain == &ipipe_root), we always go ++ * through the interrupt log, and leave the dispatching work ++ * ultimately to __ipipe_sync_pipeline(). ++ */ ++ ipd = ipipe_head_domain; ++ control = ipd->irqs[irq].control; ++ if (ipd == ipipe_root_domain) ++ /* ++ * The root domain must handle all interrupts, so ++ * testing the HANDLE bit would be pointless. ++ */ ++ goto log; ++ ++ if (control & IPIPE_HANDLE_MASK) { ++ if (unlikely(flags & IPIPE_IRQF_NOSYNC)) ++ __ipipe_set_irq_pending(ipd, irq); ++ else ++ dispatch_irq_head(irq); ++ return; ++ } ++ ++ ipd = ipipe_root_domain; ++log: ++ __ipipe_set_irq_pending(ipd, irq); ++ ++ if (flags & IPIPE_IRQF_NOSYNC) ++ return; ++ ++ /* ++ * Optimize if we preempted a registered high priority head ++ * domain: we don't need to synchronize the pipeline unless ++ * there is a pending interrupt for it. ++ */ ++ if (!__ipipe_root_p && ++ !__ipipe_ipending_p(ipipe_this_cpu_head_context())) ++ return; ++sync: ++ __ipipe_sync_pipeline(ipipe_head_domain); ++} ++ ++void ipipe_raise_irq(unsigned int irq) ++{ ++ struct ipipe_domain *ipd = ipipe_head_domain; ++ unsigned long flags, control; ++ ++ flags = hard_local_irq_save(); ++ ++ /* ++ * Fast path: raising a virtual IRQ handled by the head ++ * domain. ++ */ ++ if (likely(ipipe_virtual_irq_p(irq) && ipd != ipipe_root_domain)) { ++ control = ipd->irqs[irq].control; ++ if (likely(control & IPIPE_HANDLE_MASK)) { ++ dispatch_irq_head(irq); ++ goto out; ++ } ++ } ++ ++ /* Emulate regular device IRQ receipt. */ ++ __ipipe_dispatch_irq(irq, IPIPE_IRQF_NOACK); ++out: ++ hard_local_irq_restore(flags); ++ ++} ++EXPORT_SYMBOL_GPL(ipipe_raise_irq); ++ ++#ifdef CONFIG_PREEMPT ++ ++void preempt_schedule_irq(void); ++ ++void __sched __ipipe_preempt_schedule_irq(void) ++{ ++ struct ipipe_percpu_domain_data *p; ++ unsigned long flags; ++ ++ if (WARN_ON_ONCE(!hard_irqs_disabled())) ++ hard_local_irq_disable(); ++ ++ local_irq_save(flags); ++ hard_local_irq_enable(); ++ preempt_schedule_irq(); /* Ok, may reschedule now. */ ++ hard_local_irq_disable(); ++ ++ /* ++ * Flush any pending interrupt that may have been logged after ++ * preempt_schedule_irq() stalled the root stage before ++ * returning to us, and now. ++ */ ++ p = ipipe_this_cpu_root_context(); ++ if (unlikely(__ipipe_ipending_p(p))) { ++ trace_hardirqs_on(); ++ __clear_bit(IPIPE_STALL_FLAG, &p->status); ++ __ipipe_sync_stage(); ++ } ++ ++ __ipipe_restore_root_nosync(flags); ++} ++ ++#else /* !CONFIG_PREEMPT */ ++ ++#define __ipipe_preempt_schedule_irq() do { } while (0) ++ ++#endif /* !CONFIG_PREEMPT */ ++ ++#ifdef CONFIG_TRACE_IRQFLAGS ++#define root_stall_after_handler() local_irq_disable() ++#else ++#define root_stall_after_handler() do { } while (0) ++#endif ++ ++/* ++ * __ipipe_do_sync_stage() -- Flush the pending IRQs for the current ++ * domain (and processor). This routine flushes the interrupt log (see ++ * "Optimistic interrupt protection" from D. Stodolsky et al. for more ++ * on the deferred interrupt scheme). Every interrupt that occurred ++ * while the pipeline was stalled gets played. ++ * ++ * WARNING: CPU migration may occur over this routine. ++ */ ++void __ipipe_do_sync_stage(void) ++{ ++ struct ipipe_percpu_domain_data *p; ++ struct ipipe_domain *ipd; ++ int irq; ++ ++ p = __ipipe_current_context; ++respin: ++ ipd = p->domain; ++ ++ __set_bit(IPIPE_STALL_FLAG, &p->status); ++ smp_wmb(); ++ ++ if (ipd == ipipe_root_domain) ++ trace_hardirqs_off(); ++ ++ for (;;) { ++ irq = __ipipe_next_irq(p); ++ if (irq < 0) ++ break; ++ /* ++ * Make sure the compiler does not reorder wrongly, so ++ * that all updates to maps are done before the ++ * handler gets called. ++ */ ++ barrier(); ++ ++ if (test_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control)) ++ continue; ++ ++ if (ipd != ipipe_head_domain) ++ hard_local_irq_enable(); ++ ++ if (likely(ipd != ipipe_root_domain)) { ++ ipd->irqs[irq].handler(irq, ipd->irqs[irq].cookie); ++ __ipipe_run_irqtail(irq); ++ hard_local_irq_disable(); ++ } else if (ipipe_virtual_irq_p(irq)) { ++ irq_enter(); ++ ipd->irqs[irq].handler(irq, ipd->irqs[irq].cookie); ++ irq_exit(); ++ root_stall_after_handler(); ++ hard_local_irq_disable(); ++ } else { ++ ipd->irqs[irq].handler(irq, ipd->irqs[irq].cookie); ++ root_stall_after_handler(); ++ hard_local_irq_disable(); ++ } ++ ++ /* ++ * We may have migrated to a different CPU (1) upon ++ * return from the handler, or downgraded from the ++ * head domain to the root one (2), the opposite way ++ * is NOT allowed though. ++ * ++ * (1) reload the current per-cpu context pointer, so ++ * that we further pull pending interrupts from the ++ * proper per-cpu log. ++ * ++ * (2) check the stall bit to know whether we may ++ * dispatch any interrupt pending for the root domain, ++ * and respin the entire dispatch loop if ++ * so. Otherwise, immediately return to the caller, ++ * _without_ affecting the stall state for the root ++ * domain, since we do not own it at this stage. This ++ * case is basically reflecting what may happen in ++ * dispatch_irq_head() for the fast path. ++ */ ++ p = __ipipe_current_context; ++ if (p->domain != ipd) { ++ IPIPE_BUG_ON(ipd == ipipe_root_domain); ++ if (test_bit(IPIPE_STALL_FLAG, &p->status)) ++ return; ++ goto respin; ++ } ++ } ++ ++ if (ipd == ipipe_root_domain) ++ trace_hardirqs_on(); ++ ++ __clear_bit(IPIPE_STALL_FLAG, &p->status); ++} ++ ++void __ipipe_call_mayday(struct pt_regs *regs) ++{ ++ unsigned long flags; ++ ++ ipipe_clear_thread_flag(TIP_MAYDAY); ++ flags = hard_local_irq_save(); ++ __ipipe_notify_trap(IPIPE_TRAP_MAYDAY, regs); ++ hard_local_irq_restore(flags); ++} ++ ++#ifdef CONFIG_SMP ++ ++/* Always called with hw interrupts off. */ ++void __ipipe_do_critical_sync(unsigned int irq, void *cookie) ++{ ++ int cpu = ipipe_processor_id(); ++ ++ cpumask_set_cpu(cpu, &__ipipe_cpu_sync_map); ++ ++ /* ++ * Now we are in sync with the lock requestor running on ++ * another CPU. Enter a spinning wait until he releases the ++ * global lock. ++ */ ++ raw_spin_lock(&__ipipe_cpu_barrier); ++ ++ /* Got it. Now get out. */ ++ ++ /* Call the sync routine if any. */ ++ if (__ipipe_cpu_sync) ++ __ipipe_cpu_sync(); ++ ++ cpumask_set_cpu(cpu, &__ipipe_cpu_pass_map); ++ ++ raw_spin_unlock(&__ipipe_cpu_barrier); ++ ++ cpumask_clear_cpu(cpu, &__ipipe_cpu_sync_map); ++} ++#endif /* CONFIG_SMP */ ++ ++unsigned long ipipe_critical_enter(void (*syncfn)(void)) ++{ ++ static cpumask_t allbutself __maybe_unused, online __maybe_unused; ++ int cpu __maybe_unused, n __maybe_unused; ++ unsigned long flags, loops __maybe_unused; ++ ++ flags = hard_local_irq_save(); ++ ++ if (num_online_cpus() == 1) ++ return flags; ++ ++#ifdef CONFIG_SMP ++ ++ cpu = ipipe_processor_id(); ++ if (!cpumask_test_and_set_cpu(cpu, &__ipipe_cpu_lock_map)) { ++ while (test_and_set_bit(0, &__ipipe_critical_lock)) { ++ n = 0; ++ hard_local_irq_enable(); ++ ++ do ++ cpu_relax(); ++ while (++n < cpu); ++ ++ hard_local_irq_disable(); ++ } ++restart: ++ online = *cpu_online_mask; ++ raw_spin_lock(&__ipipe_cpu_barrier); ++ ++ __ipipe_cpu_sync = syncfn; ++ ++ cpumask_clear(&__ipipe_cpu_pass_map); ++ cpumask_set_cpu(cpu, &__ipipe_cpu_pass_map); ++ ++ /* ++ * Send the sync IPI to all processors but the current ++ * one. ++ */ ++ cpumask_andnot(&allbutself, &online, &__ipipe_cpu_pass_map); ++ ipipe_send_ipi(IPIPE_CRITICAL_IPI, allbutself); ++ loops = IPIPE_CRITICAL_TIMEOUT; ++ ++ while (!cpumask_equal(&__ipipe_cpu_sync_map, &allbutself)) { ++ if (--loops > 0) { ++ cpu_relax(); ++ continue; ++ } ++ /* ++ * We ran into a deadlock due to a contended ++ * rwlock. Cancel this round and retry. ++ */ ++ __ipipe_cpu_sync = NULL; ++ ++ raw_spin_unlock(&__ipipe_cpu_barrier); ++ /* ++ * Ensure all CPUs consumed the IPI to avoid ++ * running __ipipe_cpu_sync prematurely. This ++ * usually resolves the deadlock reason too. ++ */ ++ while (!cpumask_equal(&online, &__ipipe_cpu_pass_map)) ++ cpu_relax(); ++ ++ goto restart; ++ } ++ } ++ ++ atomic_inc(&__ipipe_critical_count); ++ ++#endif /* CONFIG_SMP */ ++ ++ return flags; ++} ++EXPORT_SYMBOL_GPL(ipipe_critical_enter); ++ ++void ipipe_critical_exit(unsigned long flags) ++{ ++ if (num_online_cpus() == 1) { ++ hard_local_irq_restore(flags); ++ return; ++ } ++ ++#ifdef CONFIG_SMP ++ if (atomic_dec_and_test(&__ipipe_critical_count)) { ++ raw_spin_unlock(&__ipipe_cpu_barrier); ++ while (!cpumask_empty(&__ipipe_cpu_sync_map)) ++ cpu_relax(); ++ cpumask_clear_cpu(ipipe_processor_id(), &__ipipe_cpu_lock_map); ++ clear_bit(0, &__ipipe_critical_lock); ++ smp_mb__after_atomic(); ++ } ++#endif /* CONFIG_SMP */ ++ ++ hard_local_irq_restore(flags); ++} ++EXPORT_SYMBOL_GPL(ipipe_critical_exit); ++ ++#ifdef CONFIG_IPIPE_DEBUG_CONTEXT ++ ++void ipipe_root_only(void) ++{ ++ struct ipipe_domain *this_domain; ++ unsigned long flags; ++ ++ flags = hard_smp_local_irq_save(); ++ ++ this_domain = __ipipe_current_domain; ++ if (likely(this_domain == ipipe_root_domain && ++ !test_bit(IPIPE_STALL_FLAG, &__ipipe_head_status))) { ++ hard_smp_local_irq_restore(flags); ++ return; ++ } ++ ++ if (!__this_cpu_read(ipipe_percpu.context_check)) { ++ hard_smp_local_irq_restore(flags); ++ return; ++ } ++ ++ hard_smp_local_irq_restore(flags); ++ ++ ipipe_prepare_panic(); ++ ipipe_trace_panic_freeze(); ++ ++ if (this_domain != ipipe_root_domain) ++ pr_err("I-pipe: Detected illicit call from head domain '%s'\n" ++ " into a regular Linux service\n", ++ this_domain->name); ++ else ++ pr_err("I-pipe: Detected stalled head domain, " ++ "probably caused by a bug.\n" ++ " A critical section may have been " ++ "left unterminated.\n"); ++ dump_stack(); ++ ipipe_trace_panic_dump(); ++} ++EXPORT_SYMBOL(ipipe_root_only); ++ ++#endif /* CONFIG_IPIPE_DEBUG_CONTEXT */ ++ ++#if defined(CONFIG_IPIPE_DEBUG_INTERNAL) && defined(CONFIG_SMP) ++ ++unsigned long notrace __ipipe_cpu_get_offset(void) ++{ ++ struct ipipe_domain *this_domain; ++ unsigned long flags; ++ bool bad = false; ++ ++ flags = hard_local_irq_save_notrace(); ++ if (raw_irqs_disabled_flags(flags)) ++ goto out; ++ ++ /* ++ * Only the root domain may implement preemptive CPU migration ++ * of tasks, so anything above in the pipeline should be fine. ++ * CAUTION: we want open coded access to the current domain, ++ * don't use __ipipe_current_domain here, this would recurse ++ * indefinitely. ++ */ ++ this_domain = raw_cpu_read(ipipe_percpu.curr)->domain; ++ if (this_domain != ipipe_root_domain) ++ goto out; ++ ++ /* ++ * Since we run on the root stage with hard irqs enabled, we ++ * need preemption to be disabled. Otherwise, our caller may ++ * end up accessing the wrong per-cpu variable instance due to ++ * CPU migration, complain loudly. ++ */ ++ if (preempt_count() == 0 && !irqs_disabled()) ++ bad = true; ++out: ++ hard_local_irq_restore_notrace(flags); ++ ++ WARN_ON_ONCE(bad); ++ ++ return __my_cpu_offset; ++} ++EXPORT_SYMBOL(__ipipe_cpu_get_offset); ++ ++void __ipipe_spin_unlock_debug(unsigned long flags) ++{ ++ /* ++ * We catch a nasty issue where spin_unlock_irqrestore() on a ++ * regular kernel spinlock is about to re-enable hw interrupts ++ * in a section entered with hw irqs off. This is clearly the ++ * sign of a massive breakage coming. Usual suspect is a ++ * regular spinlock which was overlooked, used within a ++ * section which must run with hw irqs disabled. ++ */ ++ IPIPE_WARN_ONCE(!raw_irqs_disabled_flags(flags) && hard_irqs_disabled()); ++} ++EXPORT_SYMBOL(__ipipe_spin_unlock_debug); ++ ++#endif /* CONFIG_IPIPE_DEBUG_INTERNAL && CONFIG_SMP */ ++ ++void ipipe_prepare_panic(void) ++{ ++#ifdef CONFIG_PRINTK ++ __ipipe_printk_bypass = 1; ++#endif ++ ipipe_context_check_off(); ++} ++EXPORT_SYMBOL_GPL(ipipe_prepare_panic); ++ ++static void __ipipe_do_work(unsigned int virq, void *cookie) ++{ ++ struct ipipe_work_header *work; ++ unsigned long flags; ++ void *curr, *tail; ++ int cpu; ++ ++ /* ++ * Work is dispatched in enqueuing order. This interrupt ++ * context can't migrate to another CPU. ++ */ ++ cpu = smp_processor_id(); ++ curr = per_cpu(work_buf, cpu); ++ ++ for (;;) { ++ flags = hard_local_irq_save(); ++ tail = per_cpu(work_tail, cpu); ++ if (curr == tail) { ++ per_cpu(work_tail, cpu) = per_cpu(work_buf, cpu); ++ hard_local_irq_restore(flags); ++ return; ++ } ++ work = curr; ++ curr += work->size; ++ hard_local_irq_restore(flags); ++ work->handler(work); ++ } ++} ++ ++void __ipipe_post_work_root(struct ipipe_work_header *work) ++{ ++ unsigned long flags; ++ void *tail; ++ int cpu; ++ ++ /* ++ * Subtle: we want to use the head stall/unstall operators, ++ * not the hard_* routines to protect against races. This way, ++ * we ensure that a root-based caller will trigger the virq ++ * handling immediately when unstalling the head stage, as a ++ * result of calling __ipipe_sync_pipeline() under the hood. ++ */ ++ flags = ipipe_test_and_stall_head(); ++ cpu = ipipe_processor_id(); ++ tail = per_cpu(work_tail, cpu); ++ ++ if (WARN_ON_ONCE((unsigned char *)tail + work->size >= ++ per_cpu(work_buf, cpu) + WORKBUF_SIZE)) ++ goto out; ++ ++ /* Work handling is deferred, so data has to be copied. */ ++ memcpy(tail, work, work->size); ++ per_cpu(work_tail, cpu) = tail + work->size; ++ ipipe_post_irq_root(__ipipe_work_virq); ++out: ++ ipipe_restore_head(flags); ++} ++EXPORT_SYMBOL_GPL(__ipipe_post_work_root); ++ ++void __weak __ipipe_arch_share_current(int flags) ++{ ++} ++ ++void __ipipe_share_current(int flags) ++{ ++ ipipe_root_only(); ++ ++ __ipipe_arch_share_current(flags); ++} ++EXPORT_SYMBOL_GPL(__ipipe_share_current); ++ ++bool __weak ipipe_cpuidle_control(struct cpuidle_device *dev, ++ struct cpuidle_state *state) ++{ ++ /* ++ * By default, always deny entering sleep state if this ++ * entails stopping the timer (i.e. C3STOP misfeature), ++ * Xenomai could not deal with this case. ++ */ ++ if (state && (state->flags & CPUIDLE_FLAG_TIMER_STOP)) ++ return false; ++ ++ /* Otherwise, allow switching to idle state. */ ++ return true; ++} ++ ++bool ipipe_enter_cpuidle(struct cpuidle_device *dev, ++ struct cpuidle_state *state) ++{ ++ struct ipipe_percpu_domain_data *p; ++ ++ WARN_ON_ONCE(!irqs_disabled()); ++ ++ hard_local_irq_disable(); ++ p = ipipe_this_cpu_root_context(); ++ ++ /* ++ * Pending IRQ(s) waiting for delivery to the root stage, or ++ * the arbitrary decision of a co-kernel may deny the ++ * transition to a deeper C-state. Note that we return from ++ * this call with hard irqs off, so that we won't allow any ++ * interrupt to sneak into the IRQ log until we reach the ++ * processor idling code, or leave the CPU idle framework ++ * without sleeping. ++ */ ++ return !__ipipe_ipending_p(p) && ipipe_cpuidle_control(dev, state); ++} ++ ++#if defined(CONFIG_DEBUG_ATOMIC_SLEEP) || defined(CONFIG_PROVE_LOCKING) || \ ++ defined(CONFIG_PREEMPT_VOLUNTARY) || defined(CONFIG_IPIPE_DEBUG_CONTEXT) ++void __ipipe_uaccess_might_fault(void) ++{ ++ struct ipipe_percpu_domain_data *pdd; ++ struct ipipe_domain *ipd; ++ unsigned long flags; ++ ++ flags = hard_local_irq_save(); ++ ipd = __ipipe_current_domain; ++ if (ipd == ipipe_root_domain) { ++ hard_local_irq_restore(flags); ++ might_fault(); ++ return; ++ } ++ ++#ifdef CONFIG_IPIPE_DEBUG_CONTEXT ++ pdd = ipipe_this_cpu_context(ipd); ++ WARN_ON_ONCE(hard_irqs_disabled_flags(flags) ++ || test_bit(IPIPE_STALL_FLAG, &pdd->status)); ++#else /* !CONFIG_IPIPE_DEBUG_CONTEXT */ ++ (void)pdd; ++#endif /* !CONFIG_IPIPE_DEBUG_CONTEXT */ ++ hard_local_irq_restore(flags); ++} ++EXPORT_SYMBOL_GPL(__ipipe_uaccess_might_fault); ++#endif +diff -uprN kernel/kernel/ipipe/Kconfig kernel_new/kernel/ipipe/Kconfig +--- kernel/kernel/ipipe/Kconfig 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/kernel/ipipe/Kconfig 2021-04-01 18:28:07.805863120 +0800 +@@ -0,0 +1,47 @@ ++ ++config HAVE_IPIPE_SUPPORT ++ depends on GENERIC_CLOCKEVENTS ++ bool ++ ++config IPIPE ++ bool "Interrupt pipeline" ++ depends on HAVE_IPIPE_SUPPORT ++ default n ++ ---help--- ++ Activate this option if you want the interrupt pipeline to be ++ compiled in. ++ ++config IPIPE_CORE ++ def_bool y if IPIPE ++ ++config IPIPE_WANT_PTE_PINNING ++ bool ++ ++config IPIPE_CORE_APIREV ++ int ++ depends on IPIPE ++ default 2 ++ ---help--- ++ The API revision level we implement. ++ ++config IPIPE_WANT_APIREV_2 ++ bool ++ ++config IPIPE_TARGET_APIREV ++ int ++ depends on IPIPE ++ default IPIPE_CORE_APIREV ++ ---help--- ++ The API revision level the we want (must be <= ++ IPIPE_CORE_APIREV). ++ ++config IPIPE_HAVE_HOSTRT ++ bool ++ ++config IPIPE_HAVE_EAGER_FPU ++ bool ++ ++if IPIPE && ARM && RAW_PRINTK && !DEBUG_LL ++comment "CAUTION: DEBUG_LL must be selected, and properly configured for" ++comment "RAW_PRINTK to work. Otherwise, you will get no output on raw_printk()" ++endif +diff -uprN kernel/kernel/ipipe/Kconfig.debug kernel_new/kernel/ipipe/Kconfig.debug +--- kernel/kernel/ipipe/Kconfig.debug 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/kernel/ipipe/Kconfig.debug 2021-04-01 18:28:07.805863120 +0800 +@@ -0,0 +1,100 @@ ++config IPIPE_DEBUG ++ bool "I-pipe debugging" ++ depends on IPIPE ++ select RAW_PRINTK ++ ++config IPIPE_DEBUG_CONTEXT ++ bool "Check for illicit cross-domain calls" ++ depends on IPIPE_DEBUG ++ default y ++ ---help--- ++ Enable this feature to arm checkpoints in the kernel that ++ verify the correct invocation context. On entry of critical ++ Linux services a warning is issued if the caller is not ++ running over the root domain. ++ ++config IPIPE_DEBUG_INTERNAL ++ bool "Enable internal debug checks" ++ depends on IPIPE_DEBUG ++ default y ++ ---help--- ++ When this feature is enabled, I-pipe will perform internal ++ consistency checks of its subsystems, e.g. on per-cpu variable ++ access. ++ ++config HAVE_IPIPE_TRACER_SUPPORT ++ bool ++ ++config IPIPE_TRACE ++ bool "Latency tracing" ++ depends on HAVE_IPIPE_TRACER_SUPPORT ++ depends on IPIPE_DEBUG ++ select CONFIG_FTRACE ++ select CONFIG_FUNCTION_TRACER ++ select KALLSYMS ++ select PROC_FS ++ ---help--- ++ Activate this option if you want to use per-function tracing of ++ the kernel. The tracer will collect data via instrumentation ++ features like the one below or with the help of explicite calls ++ of ipipe_trace_xxx(). See include/linux/ipipe_trace.h for the ++ in-kernel tracing API. The collected data and runtime control ++ is available via /proc/ipipe/trace/*. ++ ++if IPIPE_TRACE ++ ++config IPIPE_TRACE_ENABLE ++ bool "Enable tracing on boot" ++ default y ++ ---help--- ++ Disable this option if you want to arm the tracer after booting ++ manually ("echo 1 > /proc/ipipe/tracer/enable"). This can reduce ++ boot time on slow embedded devices due to the tracer overhead. ++ ++config IPIPE_TRACE_MCOUNT ++ bool "Instrument function entries" ++ default y ++ select FTRACE ++ select FUNCTION_TRACER ++ ---help--- ++ When enabled, records every kernel function entry in the tracer ++ log. While this slows down the system noticeably, it provides ++ the highest level of information about the flow of events. ++ However, it can be switch off in order to record only explicit ++ I-pipe trace points. ++ ++config IPIPE_TRACE_IRQSOFF ++ bool "Trace IRQs-off times" ++ default y ++ ---help--- ++ Activate this option if I-pipe shall trace the longest path ++ with hard-IRQs switched off. ++ ++config IPIPE_TRACE_SHIFT ++ int "Depth of trace log (14 => 16Kpoints, 15 => 32Kpoints)" ++ range 10 18 ++ default 14 ++ ---help--- ++ The number of trace points to hold tracing data for each ++ trace path, as a power of 2. ++ ++config IPIPE_TRACE_VMALLOC ++ bool "Use vmalloc'ed trace buffer" ++ default y if EMBEDDED ++ ---help--- ++ Instead of reserving static kernel data, the required buffer ++ is allocated via vmalloc during boot-up when this option is ++ enabled. This can help to start systems that are low on memory, ++ but it slightly degrades overall performance. Try this option ++ when a traced kernel hangs unexpectedly at boot time. ++ ++config IPIPE_TRACE_PANIC ++ bool "Enable panic back traces" ++ default y ++ ---help--- ++ Provides services to freeze and dump a back trace on panic ++ situations. This is used on IPIPE_DEBUG_CONTEXT exceptions ++ as well as ordinary kernel oopses. You can control the number ++ of printed back trace points via /proc/ipipe/trace. ++ ++endif +diff -uprN kernel/kernel/ipipe/Makefile kernel_new/kernel/ipipe/Makefile +--- kernel/kernel/ipipe/Makefile 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/kernel/ipipe/Makefile 2021-04-01 18:28:07.805863120 +0800 +@@ -0,0 +1,2 @@ ++obj-$(CONFIG_IPIPE) += core.o timer.o ++obj-$(CONFIG_IPIPE_TRACE) += tracer.o +diff -uprN kernel/kernel/ipipe/timer.c kernel_new/kernel/ipipe/timer.c +--- kernel/kernel/ipipe/timer.c 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/kernel/ipipe/timer.c 2021-04-01 18:28:07.806863119 +0800 +@@ -0,0 +1,656 @@ ++/* -*- linux-c -*- ++ * linux/kernel/ipipe/timer.c ++ * ++ * Copyright (C) 2012 Gilles Chanteperdrix ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, ++ * USA; either version 2 of the License, or (at your option) any later ++ * version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ * ++ * I-pipe timer request interface. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++unsigned long __ipipe_hrtimer_freq; ++ ++static LIST_HEAD(timers); ++static IPIPE_DEFINE_SPINLOCK(lock); ++ ++static DEFINE_PER_CPU(struct ipipe_timer *, percpu_timer); ++ ++/* ++ * Default request method: switch to oneshot mode if supported. ++ */ ++static void ipipe_timer_default_request(struct ipipe_timer *timer, int steal) ++{ ++ struct clock_event_device *evtdev = timer->host_timer; ++ ++ if (!(evtdev->features & CLOCK_EVT_FEAT_ONESHOT)) ++ return; ++ ++ if (clockevent_state_oneshot(evtdev) || ++ clockevent_state_oneshot_stopped(evtdev)) ++ timer->orig_mode = CLOCK_EVT_MODE_ONESHOT; ++ else { ++ if (clockevent_state_periodic(evtdev)) ++ timer->orig_mode = CLOCK_EVT_MODE_PERIODIC; ++ else if (clockevent_state_shutdown(evtdev)) ++ timer->orig_mode = CLOCK_EVT_MODE_SHUTDOWN; ++ else ++ timer->orig_mode = CLOCK_EVT_MODE_UNUSED; ++ evtdev->set_state_oneshot(evtdev); ++ evtdev->set_next_event(timer->freq / HZ, evtdev); ++ } ++} ++ ++/* ++ * Default release method: return the timer to the mode it had when ++ * starting. ++ */ ++static void ipipe_timer_default_release(struct ipipe_timer *timer) ++{ ++ struct clock_event_device *evtdev = timer->host_timer; ++ ++ switch (timer->orig_mode) { ++ case CLOCK_EVT_MODE_SHUTDOWN: ++ evtdev->set_state_shutdown(evtdev); ++ break; ++ case CLOCK_EVT_MODE_PERIODIC: ++ evtdev->set_state_periodic(evtdev); ++ case CLOCK_EVT_MODE_ONESHOT: ++ evtdev->set_next_event(timer->freq / HZ, evtdev); ++ break; ++ } ++} ++ ++static int get_dev_mode(struct clock_event_device *evtdev) ++{ ++ if (clockevent_state_oneshot(evtdev) || ++ clockevent_state_oneshot_stopped(evtdev)) ++ return CLOCK_EVT_MODE_ONESHOT; ++ ++ if (clockevent_state_periodic(evtdev)) ++ return CLOCK_EVT_MODE_PERIODIC; ++ ++ if (clockevent_state_shutdown(evtdev)) ++ return CLOCK_EVT_MODE_SHUTDOWN; ++ ++ return CLOCK_EVT_MODE_UNUSED; ++} ++ ++void ipipe_host_timer_register(struct clock_event_device *evtdev) ++{ ++ struct ipipe_timer *timer = evtdev->ipipe_timer; ++ ++ if (timer == NULL) ++ return; ++ ++ timer->orig_mode = CLOCK_EVT_MODE_UNUSED; ++ ++ if (timer->request == NULL) ++ timer->request = ipipe_timer_default_request; ++ ++ /* ++ * By default, use the same method as linux timer, on ARM at ++ * least, most set_next_event methods are safe to be called ++ * from Xenomai domain anyway. ++ */ ++ if (timer->set == NULL) { ++ timer->timer_set = evtdev; ++ timer->set = (typeof(timer->set))evtdev->set_next_event; ++ } ++ ++ if (timer->release == NULL) ++ timer->release = ipipe_timer_default_release; ++ ++ if (timer->name == NULL) ++ timer->name = evtdev->name; ++ ++ if (timer->rating == 0) ++ timer->rating = evtdev->rating; ++ ++ timer->freq = (1000000000ULL * evtdev->mult) >> evtdev->shift; ++ ++ if (timer->min_delay_ticks == 0) ++ timer->min_delay_ticks = ++ (evtdev->min_delta_ns * evtdev->mult) >> evtdev->shift; ++ ++ if (timer->max_delay_ticks == 0) ++ timer->max_delay_ticks = ++ (evtdev->max_delta_ns * evtdev->mult) >> evtdev->shift; ++ ++ if (timer->cpumask == NULL) ++ timer->cpumask = evtdev->cpumask; ++ ++ timer->host_timer = evtdev; ++ ++ ipipe_timer_register(timer); ++} ++ ++#ifdef CONFIG_HOTPLUG_CPU ++void ipipe_host_timer_cleanup(struct clock_event_device *evtdev) ++{ ++ struct ipipe_timer *timer = evtdev->ipipe_timer; ++ unsigned long flags; ++ ++ if (timer == NULL) ++ return; ++ ++ raw_spin_lock_irqsave(&lock, flags); ++ list_del(&timer->link); ++ raw_spin_unlock_irqrestore(&lock, flags); ++} ++#endif /* CONFIG_HOTPLUG_CPU */ ++ ++/* ++ * register a timer: maintain them in a list sorted by rating ++ */ ++void ipipe_timer_register(struct ipipe_timer *timer) ++{ ++ struct ipipe_timer *t; ++ unsigned long flags; ++ ++ if (timer->timer_set == NULL) ++ timer->timer_set = timer; ++ ++ if (timer->cpumask == NULL) ++ timer->cpumask = cpumask_of(smp_processor_id()); ++ ++ raw_spin_lock_irqsave(&lock, flags); ++ ++ list_for_each_entry(t, &timers, link) { ++ if (t->rating <= timer->rating) { ++ __list_add(&timer->link, t->link.prev, &t->link); ++ goto done; ++ } ++ } ++ list_add_tail(&timer->link, &timers); ++ done: ++ raw_spin_unlock_irqrestore(&lock, flags); ++} ++ ++static void ipipe_timer_request_sync(void) ++{ ++ struct ipipe_timer *timer = __ipipe_raw_cpu_read(percpu_timer); ++ struct clock_event_device *evtdev; ++ int steal; ++ ++ if (!timer) ++ return; ++ ++ evtdev = timer->host_timer; ++ steal = evtdev != NULL && !clockevent_state_detached(evtdev); ++ timer->request(timer, steal); ++} ++ ++static void config_pcpu_timer(struct ipipe_timer *t, unsigned hrclock_freq) ++{ ++ unsigned long long tmp; ++ unsigned hrtimer_freq; ++ ++ if (__ipipe_hrtimer_freq != t->freq) ++ __ipipe_hrtimer_freq = t->freq; ++ ++ hrtimer_freq = t->freq; ++ if (__ipipe_hrclock_freq > UINT_MAX) ++ hrtimer_freq /= 1000; ++ ++ t->c2t_integ = hrtimer_freq / hrclock_freq; ++ tmp = (((unsigned long long) ++ (hrtimer_freq % hrclock_freq)) << 32) ++ + hrclock_freq - 1; ++ do_div(tmp, hrclock_freq); ++ t->c2t_frac = tmp; ++} ++ ++/* Set up a timer as per-cpu timer for ipipe */ ++static void install_pcpu_timer(unsigned cpu, unsigned hrclock_freq, ++ struct ipipe_timer *t) ++{ ++ per_cpu(ipipe_percpu.hrtimer_irq, cpu) = t->irq; ++ per_cpu(percpu_timer, cpu) = t; ++ config_pcpu_timer(t, hrclock_freq); ++} ++ ++static void select_root_only_timer(unsigned cpu, unsigned hrclock_khz, ++ const struct cpumask *mask, ++ struct ipipe_timer *t) { ++ unsigned icpu; ++ struct clock_event_device *evtdev; ++ ++ /* ++ * If no ipipe-supported CPU shares an interrupt with the ++ * timer, we do not need to care about it. ++ */ ++ for_each_cpu(icpu, mask) { ++ if (t->irq == per_cpu(ipipe_percpu.hrtimer_irq, icpu)) { ++ evtdev = t->host_timer; ++ if (evtdev && clockevent_state_shutdown(evtdev)) ++ continue; ++ goto found; ++ } ++ } ++ ++ return; ++ ++found: ++ install_pcpu_timer(cpu, hrclock_khz, t); ++} ++ ++/* ++ * Choose per-cpu timers with the highest rating by traversing the ++ * rating-sorted list for each CPU. ++ */ ++int ipipe_select_timers(const struct cpumask *mask) ++{ ++ unsigned hrclock_freq; ++ unsigned long long tmp; ++ struct ipipe_timer *t; ++ struct clock_event_device *evtdev; ++ unsigned long flags; ++ unsigned cpu; ++ cpumask_var_t fixup; ++ ++ if (!__ipipe_hrclock_ok()) { ++ printk("I-pipe: high-resolution clock not working\n"); ++ return -ENODEV; ++ } ++ ++ if (__ipipe_hrclock_freq > UINT_MAX) { ++ tmp = __ipipe_hrclock_freq; ++ do_div(tmp, 1000); ++ hrclock_freq = tmp; ++ } else ++ hrclock_freq = __ipipe_hrclock_freq; ++ ++ ++ if (!zalloc_cpumask_var(&fixup, GFP_KERNEL)) { ++ WARN_ON(1); ++ return -ENODEV; ++ } ++ ++ raw_spin_lock_irqsave(&lock, flags); ++ ++ /* First, choose timers for the CPUs handled by ipipe */ ++ for_each_cpu(cpu, mask) { ++ list_for_each_entry(t, &timers, link) { ++ if (!cpumask_test_cpu(cpu, t->cpumask)) ++ continue; ++ ++ evtdev = t->host_timer; ++ if (evtdev && clockevent_state_shutdown(evtdev)) ++ continue; ++ goto found; ++ } ++ ++ printk("I-pipe: could not find timer for cpu #%d\n", ++ cpu); ++ goto err_remove_all; ++found: ++ install_pcpu_timer(cpu, hrclock_freq, t); ++ } ++ ++ /* ++ * Second, check if we need to fix up any CPUs not supported ++ * by ipipe (but by Linux) whose interrupt may need to be ++ * forwarded because they have the same IRQ as an ipipe-enabled ++ * timer. ++ */ ++ cpumask_andnot(fixup, cpu_online_mask, mask); ++ ++ for_each_cpu(cpu, fixup) { ++ list_for_each_entry(t, &timers, link) { ++ if (!cpumask_test_cpu(cpu, t->cpumask)) ++ continue; ++ ++ select_root_only_timer(cpu, hrclock_freq, mask, t); ++ } ++ } ++ ++ raw_spin_unlock_irqrestore(&lock, flags); ++ ++ free_cpumask_var(fixup); ++ flags = ipipe_critical_enter(ipipe_timer_request_sync); ++ ipipe_timer_request_sync(); ++ ipipe_critical_exit(flags); ++ ++ return 0; ++ ++err_remove_all: ++ raw_spin_unlock_irqrestore(&lock, flags); ++ free_cpumask_var(fixup); ++ ++ for_each_cpu(cpu, mask) { ++ per_cpu(ipipe_percpu.hrtimer_irq, cpu) = -1; ++ per_cpu(percpu_timer, cpu) = NULL; ++ } ++ __ipipe_hrtimer_freq = 0; ++ ++ return -ENODEV; ++} ++ ++static void ipipe_timer_release_sync(void) ++{ ++ struct ipipe_timer *timer = __ipipe_raw_cpu_read(percpu_timer); ++ ++ if (timer) ++ timer->release(timer); ++} ++ ++void ipipe_timers_release(void) ++{ ++ unsigned long flags; ++ unsigned cpu; ++ ++ flags = ipipe_critical_enter(ipipe_timer_release_sync); ++ ipipe_timer_release_sync(); ++ ipipe_critical_exit(flags); ++ ++ for_each_online_cpu(cpu) { ++ per_cpu(ipipe_percpu.hrtimer_irq, cpu) = -1; ++ per_cpu(percpu_timer, cpu) = NULL; ++ __ipipe_hrtimer_freq = 0; ++ } ++} ++ ++static void __ipipe_ack_hrtimer_irq(struct irq_desc *desc) ++{ ++ struct ipipe_timer *timer = __ipipe_raw_cpu_read(percpu_timer); ++ ++ /* ++ * Pseudo-IRQs like pipelined IPIs have no descriptor, we have ++ * to check for this. ++ */ ++ if (desc) ++ desc->ipipe_ack(desc); ++ ++ if (timer->ack) ++ timer->ack(); ++ ++ if (desc) ++ desc->ipipe_end(desc); ++} ++ ++static int do_set_oneshot(struct clock_event_device *cdev) ++{ ++ struct ipipe_timer *timer = __ipipe_raw_cpu_read(percpu_timer); ++ ++ timer->orig_set_state_oneshot(cdev); ++ timer->mode_handler(CLOCK_EVT_MODE_ONESHOT, cdev); ++ ++ return 0; ++} ++ ++static int do_set_oneshot_stopped(struct clock_event_device *cdev) ++{ ++ struct ipipe_timer *timer = __ipipe_raw_cpu_read(percpu_timer); ++ ++ timer->mode_handler(CLOCK_EVT_MODE_SHUTDOWN, cdev); ++ ++ return 0; ++} ++ ++static int do_set_periodic(struct clock_event_device *cdev) ++{ ++ struct ipipe_timer *timer = __ipipe_raw_cpu_read(percpu_timer); ++ ++ timer->mode_handler(CLOCK_EVT_MODE_PERIODIC, cdev); ++ ++ return 0; ++} ++ ++static int do_set_shutdown(struct clock_event_device *cdev) ++{ ++ struct ipipe_timer *timer = __ipipe_raw_cpu_read(percpu_timer); ++ ++ timer->mode_handler(CLOCK_EVT_MODE_SHUTDOWN, cdev); ++ ++ return 0; ++} ++ ++int clockevents_program_event(struct clock_event_device *dev, ++ ktime_t expires, bool force); ++ ++struct grab_timer_data { ++ void (*tick_handler)(void); ++ void (*emumode)(enum clock_event_mode mode, ++ struct clock_event_device *cdev); ++ int (*emutick)(unsigned long evt, ++ struct clock_event_device *cdev); ++ int retval; ++}; ++ ++static void grab_timer(void *arg) ++{ ++ struct grab_timer_data *data = arg; ++ struct clock_event_device *evtdev; ++ struct ipipe_timer *timer; ++ struct irq_desc *desc; ++ unsigned long flags; ++ int steal, ret; ++ ++ flags = hard_local_irq_save(); ++ ++ timer = this_cpu_read(percpu_timer); ++ evtdev = timer->host_timer; ++ ret = ipipe_request_irq(ipipe_head_domain, timer->irq, ++ (ipipe_irq_handler_t)data->tick_handler, ++ NULL, __ipipe_ack_hrtimer_irq); ++ if (ret < 0 && ret != -EBUSY) { ++ hard_local_irq_restore(flags); ++ data->retval = ret; ++ return; ++ } ++ ++ steal = !clockevent_state_detached(evtdev); ++ if (steal && evtdev->ipipe_stolen == 0) { ++ timer->real_mult = evtdev->mult; ++ timer->real_shift = evtdev->shift; ++ timer->orig_set_state_periodic = evtdev->set_state_periodic; ++ timer->orig_set_state_oneshot = evtdev->set_state_oneshot; ++ timer->orig_set_state_oneshot_stopped = evtdev->set_state_oneshot_stopped; ++ timer->orig_set_state_shutdown = evtdev->set_state_shutdown; ++ timer->orig_set_next_event = evtdev->set_next_event; ++ timer->mode_handler = data->emumode; ++ evtdev->mult = 1; ++ evtdev->shift = 0; ++ evtdev->max_delta_ns = UINT_MAX; ++ if (timer->orig_set_state_periodic) ++ evtdev->set_state_periodic = do_set_periodic; ++ if (timer->orig_set_state_oneshot) ++ evtdev->set_state_oneshot = do_set_oneshot; ++ if (timer->orig_set_state_oneshot_stopped) ++ evtdev->set_state_oneshot_stopped = do_set_oneshot_stopped; ++ if (timer->orig_set_state_shutdown) ++ evtdev->set_state_shutdown = do_set_shutdown; ++ evtdev->set_next_event = data->emutick; ++ evtdev->ipipe_stolen = 1; ++ } ++ ++ hard_local_irq_restore(flags); ++ ++ data->retval = get_dev_mode(evtdev); ++ ++ desc = irq_to_desc(timer->irq); ++ if (desc && irqd_irq_disabled(&desc->irq_data)) ++ ipipe_enable_irq(timer->irq); ++ ++ if (evtdev->ipipe_stolen && clockevent_state_oneshot(evtdev)) { ++ ret = clockevents_program_event(evtdev, ++ evtdev->next_event, true); ++ if (ret) ++ data->retval = ret; ++ } ++} ++ ++int ipipe_timer_start(void (*tick_handler)(void), ++ void (*emumode)(enum clock_event_mode mode, ++ struct clock_event_device *cdev), ++ int (*emutick)(unsigned long evt, ++ struct clock_event_device *cdev), ++ unsigned int cpu) ++{ ++ struct grab_timer_data data; ++ int ret; ++ ++ data.tick_handler = tick_handler; ++ data.emutick = emutick; ++ data.emumode = emumode; ++ data.retval = -EINVAL; ++ ret = smp_call_function_single(cpu, grab_timer, &data, true); ++ ++ return ret ?: data.retval; ++} ++ ++static void release_timer(void *arg) ++{ ++ struct clock_event_device *evtdev; ++ struct ipipe_timer *timer; ++ struct irq_desc *desc; ++ unsigned long flags; ++ ++ flags = hard_local_irq_save(); ++ ++ timer = this_cpu_read(percpu_timer); ++ ++ desc = irq_to_desc(timer->irq); ++ if (desc && irqd_irq_disabled(&desc->irq_data)) ++ ipipe_disable_irq(timer->irq); ++ ++ ipipe_free_irq(ipipe_head_domain, timer->irq); ++ ++ evtdev = timer->host_timer; ++ if (evtdev && evtdev->ipipe_stolen) { ++ evtdev->mult = timer->real_mult; ++ evtdev->shift = timer->real_shift; ++ evtdev->set_state_periodic = timer->orig_set_state_periodic; ++ evtdev->set_state_oneshot = timer->orig_set_state_oneshot; ++ evtdev->set_state_oneshot_stopped = timer->orig_set_state_oneshot_stopped; ++ evtdev->set_state_shutdown = timer->orig_set_state_shutdown; ++ evtdev->set_next_event = timer->orig_set_next_event; ++ evtdev->ipipe_stolen = 0; ++ hard_local_irq_restore(flags); ++ if (clockevent_state_oneshot(evtdev)) ++ clockevents_program_event(evtdev, ++ evtdev->next_event, true); ++ } else ++ hard_local_irq_restore(flags); ++} ++ ++void ipipe_timer_stop(unsigned int cpu) ++{ ++ smp_call_function_single(cpu, release_timer, NULL, true); ++} ++ ++void ipipe_timer_set(unsigned long cdelay) ++{ ++ unsigned long tdelay; ++ struct ipipe_timer *t; ++ ++ t = __ipipe_raw_cpu_read(percpu_timer); ++ ++ /* ++ * Even though some architectures may use a 64 bits delay ++ * here, we voluntarily limit to 32 bits, 4 billions ticks ++ * should be enough for now. Would a timer needs more, an ++ * extra call to the tick handler would simply occur after 4 ++ * billions ticks. ++ */ ++ if (cdelay > UINT_MAX) ++ cdelay = UINT_MAX; ++ ++ tdelay = cdelay; ++ if (t->c2t_integ != 1) ++ tdelay *= t->c2t_integ; ++ if (t->c2t_frac) ++ tdelay += ((unsigned long long)cdelay * t->c2t_frac) >> 32; ++ if (tdelay < t->min_delay_ticks) ++ tdelay = t->min_delay_ticks; ++ if (tdelay > t->max_delay_ticks) ++ tdelay = t->max_delay_ticks; ++ ++ if (t->set(tdelay, t->timer_set) < 0) ++ ipipe_raise_irq(t->irq); ++} ++EXPORT_SYMBOL_GPL(ipipe_timer_set); ++ ++const char *ipipe_timer_name(void) ++{ ++ return per_cpu(percpu_timer, 0)->name; ++} ++EXPORT_SYMBOL_GPL(ipipe_timer_name); ++ ++unsigned ipipe_timer_ns2ticks(struct ipipe_timer *timer, unsigned ns) ++{ ++ unsigned long long tmp; ++ BUG_ON(!timer->freq); ++ tmp = (unsigned long long)ns * timer->freq; ++ do_div(tmp, 1000000000); ++ return tmp; ++} ++ ++#ifdef CONFIG_IPIPE_HAVE_HOSTRT ++/* ++ * NOTE: The architecture specific code must only call this function ++ * when a clocksource suitable for CLOCK_HOST_REALTIME is enabled. ++ * The event receiver is responsible for providing proper locking. ++ */ ++void ipipe_update_hostrt(struct timekeeper *tk) ++{ ++ struct tk_read_base *tkr = &tk->tkr_mono; ++ struct clocksource *clock = tkr->clock; ++ struct ipipe_hostrt_data data; ++ struct timespec xt; ++ ++ xt.tv_sec = tk->xtime_sec; ++ xt.tv_nsec = (long)(tkr->xtime_nsec >> tkr->shift); ++ ipipe_root_only(); ++ data.live = 1; ++ data.cycle_last = tkr->cycle_last; ++ data.mask = clock->mask; ++ data.mult = tkr->mult; ++ data.shift = tkr->shift; ++ data.wall_time_sec = xt.tv_sec; ++ data.wall_time_nsec = xt.tv_nsec; ++ data.wall_to_monotonic.tv_sec = tk->wall_to_monotonic.tv_sec; ++ data.wall_to_monotonic.tv_nsec = tk->wall_to_monotonic.tv_nsec; ++ __ipipe_notify_kevent(IPIPE_KEVT_HOSTRT, &data); ++} ++ ++#endif /* CONFIG_IPIPE_HAVE_HOSTRT */ ++ ++int clockevents_program_event(struct clock_event_device *dev, ktime_t expires, ++ bool force); ++ ++void __ipipe_timer_refresh_freq(unsigned int hrclock_freq) ++{ ++ struct ipipe_timer *t = __ipipe_raw_cpu_read(percpu_timer); ++ unsigned long flags; ++ ++ if (t && t->refresh_freq) { ++ t->freq = t->refresh_freq(); ++ flags = hard_local_irq_save(); ++ config_pcpu_timer(t, hrclock_freq); ++ hard_local_irq_restore(flags); ++ clockevents_program_event(t->host_timer, ++ t->host_timer->next_event, false); ++ } ++} +diff -uprN kernel/kernel/ipipe/tracer.c kernel_new/kernel/ipipe/tracer.c +--- kernel/kernel/ipipe/tracer.c 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/kernel/ipipe/tracer.c 2021-04-01 18:28:07.806863119 +0800 +@@ -0,0 +1,1486 @@ ++/* -*- linux-c -*- ++ * kernel/ipipe/tracer.c ++ * ++ * Copyright (C) 2005 Luotao Fu. ++ * 2005-2008 Jan Kiszka. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, ++ * USA; either version 2 of the License, or (at your option) any later ++ * version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#define IPIPE_TRACE_PATHS 4 /* Do not lower below 3 */ ++#define IPIPE_DEFAULT_ACTIVE 0 ++#define IPIPE_DEFAULT_MAX 1 ++#define IPIPE_DEFAULT_FROZEN 2 ++ ++#define IPIPE_TRACE_POINTS (1 << CONFIG_IPIPE_TRACE_SHIFT) ++#define WRAP_POINT_NO(point) ((point) & (IPIPE_TRACE_POINTS-1)) ++ ++#define IPIPE_DEFAULT_PRE_TRACE 10 ++#define IPIPE_DEFAULT_POST_TRACE 10 ++#define IPIPE_DEFAULT_BACK_TRACE 100 ++ ++#define IPIPE_DELAY_NOTE 1000 /* in nanoseconds */ ++#define IPIPE_DELAY_WARN 10000 /* in nanoseconds */ ++ ++#define IPIPE_TFLG_NMI_LOCK 0x0001 ++#define IPIPE_TFLG_NMI_HIT 0x0002 ++#define IPIPE_TFLG_NMI_FREEZE_REQ 0x0004 ++ ++#define IPIPE_TFLG_HWIRQ_OFF 0x0100 ++#define IPIPE_TFLG_FREEZING 0x0200 ++#define IPIPE_TFLG_CURRDOM_SHIFT 10 /* bits 10..11: current domain */ ++#define IPIPE_TFLG_CURRDOM_MASK 0x0C00 ++#define IPIPE_TFLG_DOMSTATE_SHIFT 12 /* bits 12..15: domain stalled? */ ++#define IPIPE_TFLG_DOMSTATE_BITS 1 ++ ++#define IPIPE_TFLG_DOMAIN_STALLED(point, n) \ ++ (point->flags & (1 << (n + IPIPE_TFLG_DOMSTATE_SHIFT))) ++#define IPIPE_TFLG_CURRENT_DOMAIN(point) \ ++ ((point->flags & IPIPE_TFLG_CURRDOM_MASK) >> IPIPE_TFLG_CURRDOM_SHIFT) ++ ++struct ipipe_trace_point { ++ short type; ++ short flags; ++ unsigned long eip; ++ unsigned long parent_eip; ++ unsigned long v; ++ unsigned long long timestamp; ++}; ++ ++struct ipipe_trace_path { ++ volatile int flags; ++ int dump_lock; /* separated from flags due to cross-cpu access */ ++ int trace_pos; /* next point to fill */ ++ int begin, end; /* finalised path begin and end */ ++ int post_trace; /* non-zero when in post-trace phase */ ++ unsigned long long length; /* max path length in cycles */ ++ unsigned long nmi_saved_eip; /* for deferred requests from NMIs */ ++ unsigned long nmi_saved_parent_eip; ++ unsigned long nmi_saved_v; ++ struct ipipe_trace_point point[IPIPE_TRACE_POINTS]; ++} ____cacheline_aligned_in_smp; ++ ++enum ipipe_trace_type ++{ ++ IPIPE_TRACE_FUNC = 0, ++ IPIPE_TRACE_BEGIN, ++ IPIPE_TRACE_END, ++ IPIPE_TRACE_FREEZE, ++ IPIPE_TRACE_SPECIAL, ++ IPIPE_TRACE_PID, ++ IPIPE_TRACE_EVENT, ++}; ++ ++#define IPIPE_TYPE_MASK 0x0007 ++#define IPIPE_TYPE_BITS 3 ++ ++#ifdef CONFIG_IPIPE_TRACE_VMALLOC ++static DEFINE_PER_CPU(struct ipipe_trace_path *, trace_path); ++#else /* !CONFIG_IPIPE_TRACE_VMALLOC */ ++static DEFINE_PER_CPU(struct ipipe_trace_path, trace_path[IPIPE_TRACE_PATHS]) = ++ { [0 ... IPIPE_TRACE_PATHS-1] = { .begin = -1, .end = -1 } }; ++#endif /* CONFIG_IPIPE_TRACE_VMALLOC */ ++ ++int ipipe_trace_enable = 0; ++ ++static DEFINE_PER_CPU(int, active_path) = { IPIPE_DEFAULT_ACTIVE }; ++static DEFINE_PER_CPU(int, max_path) = { IPIPE_DEFAULT_MAX }; ++static DEFINE_PER_CPU(int, frozen_path) = { IPIPE_DEFAULT_FROZEN }; ++static IPIPE_DEFINE_SPINLOCK(global_path_lock); ++static int pre_trace = IPIPE_DEFAULT_PRE_TRACE; ++static int post_trace = IPIPE_DEFAULT_POST_TRACE; ++static int back_trace = IPIPE_DEFAULT_BACK_TRACE; ++static int verbose_trace = 1; ++static unsigned long trace_overhead; ++ ++static unsigned long trigger_begin; ++static unsigned long trigger_end; ++ ++static DEFINE_MUTEX(out_mutex); ++static struct ipipe_trace_path *print_path; ++#ifdef CONFIG_IPIPE_TRACE_PANIC ++static struct ipipe_trace_path *panic_path; ++#endif /* CONFIG_IPIPE_TRACE_PANIC */ ++static int print_pre_trace; ++static int print_post_trace; ++ ++ ++static long __ipipe_signed_tsc2us(long long tsc); ++static void ++__ipipe_trace_point_type(char *buf, struct ipipe_trace_point *point); ++static void __ipipe_print_symname(struct seq_file *m, unsigned long eip); ++ ++static inline void store_states(struct ipipe_domain *ipd, ++ struct ipipe_trace_point *point, int pos) ++{ ++ if (test_bit(IPIPE_STALL_FLAG, &ipipe_this_cpu_context(ipd)->status)) ++ point->flags |= 1 << (pos + IPIPE_TFLG_DOMSTATE_SHIFT); ++ ++ if (ipd == __ipipe_current_domain) ++ point->flags |= pos << IPIPE_TFLG_CURRDOM_SHIFT; ++} ++ ++static notrace void ++__ipipe_store_domain_states(struct ipipe_trace_point *point) ++{ ++ store_states(ipipe_root_domain, point, 0); ++ if (ipipe_head_domain != ipipe_root_domain) ++ store_states(ipipe_head_domain, point, 1); ++} ++ ++static notrace int __ipipe_get_free_trace_path(int old, int cpu) ++{ ++ int new_active = old; ++ struct ipipe_trace_path *tp; ++ ++ do { ++ if (++new_active == IPIPE_TRACE_PATHS) ++ new_active = 0; ++ tp = &per_cpu(trace_path, cpu)[new_active]; ++ } while (new_active == per_cpu(max_path, cpu) || ++ new_active == per_cpu(frozen_path, cpu) || ++ tp->dump_lock); ++ ++ return new_active; ++} ++ ++static notrace void ++__ipipe_migrate_pre_trace(struct ipipe_trace_path *new_tp, ++ struct ipipe_trace_path *old_tp, int old_pos) ++{ ++ int i; ++ ++ new_tp->trace_pos = pre_trace+1; ++ ++ for (i = new_tp->trace_pos; i > 0; i--) ++ memcpy(&new_tp->point[WRAP_POINT_NO(new_tp->trace_pos-i)], ++ &old_tp->point[WRAP_POINT_NO(old_pos-i)], ++ sizeof(struct ipipe_trace_point)); ++ ++ /* mark the end (i.e. the point before point[0]) invalid */ ++ new_tp->point[IPIPE_TRACE_POINTS-1].eip = 0; ++} ++ ++static notrace struct ipipe_trace_path * ++__ipipe_trace_end(int cpu, struct ipipe_trace_path *tp, int pos) ++{ ++ struct ipipe_trace_path *old_tp = tp; ++ long active = per_cpu(active_path, cpu); ++ unsigned long long length; ++ ++ /* do we have a new worst case? */ ++ length = tp->point[tp->end].timestamp - ++ tp->point[tp->begin].timestamp; ++ if (length > per_cpu(trace_path, cpu)[per_cpu(max_path, cpu)].length) { ++ /* we need protection here against other cpus trying ++ to start a proc dump */ ++ raw_spin_lock(&global_path_lock); ++ ++ /* active path holds new worst case */ ++ tp->length = length; ++ per_cpu(max_path, cpu) = active; ++ ++ /* find next unused trace path */ ++ active = __ipipe_get_free_trace_path(active, cpu); ++ ++ raw_spin_unlock(&global_path_lock); ++ ++ tp = &per_cpu(trace_path, cpu)[active]; ++ ++ /* migrate last entries for pre-tracing */ ++ __ipipe_migrate_pre_trace(tp, old_tp, pos); ++ } ++ ++ return tp; ++} ++ ++static notrace struct ipipe_trace_path * ++__ipipe_trace_freeze(int cpu, struct ipipe_trace_path *tp, int pos) ++{ ++ struct ipipe_trace_path *old_tp = tp; ++ long active = per_cpu(active_path, cpu); ++ int n; ++ ++ /* frozen paths have no core (begin=end) */ ++ tp->begin = tp->end; ++ ++ /* we need protection here against other cpus trying ++ * to set their frozen path or to start a proc dump */ ++ raw_spin_lock(&global_path_lock); ++ ++ per_cpu(frozen_path, cpu) = active; ++ ++ /* find next unused trace path */ ++ active = __ipipe_get_free_trace_path(active, cpu); ++ ++ /* check if this is the first frozen path */ ++ for_each_possible_cpu(n) { ++ if (n != cpu && ++ per_cpu(trace_path, n)[per_cpu(frozen_path, n)].end >= 0) ++ tp->end = -1; ++ } ++ ++ raw_spin_unlock(&global_path_lock); ++ ++ tp = &per_cpu(trace_path, cpu)[active]; ++ ++ /* migrate last entries for pre-tracing */ ++ __ipipe_migrate_pre_trace(tp, old_tp, pos); ++ ++ return tp; ++} ++ ++void notrace ++__ipipe_trace(enum ipipe_trace_type type, unsigned long eip, ++ unsigned long parent_eip, unsigned long v) ++{ ++ struct ipipe_trace_path *tp, *old_tp; ++ int pos, next_pos, begin; ++ struct ipipe_trace_point *point; ++ unsigned long flags; ++ int cpu; ++ ++ flags = hard_local_irq_save_notrace(); ++ ++ cpu = ipipe_processor_id(); ++ restart: ++ tp = old_tp = &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)]; ++ ++ /* here starts a race window with NMIs - catched below */ ++ ++ /* check for NMI recursion */ ++ if (unlikely(tp->flags & IPIPE_TFLG_NMI_LOCK)) { ++ tp->flags |= IPIPE_TFLG_NMI_HIT; ++ ++ /* first freeze request from NMI context? */ ++ if ((type == IPIPE_TRACE_FREEZE) && ++ !(tp->flags & IPIPE_TFLG_NMI_FREEZE_REQ)) { ++ /* save arguments and mark deferred freezing */ ++ tp->flags |= IPIPE_TFLG_NMI_FREEZE_REQ; ++ tp->nmi_saved_eip = eip; ++ tp->nmi_saved_parent_eip = parent_eip; ++ tp->nmi_saved_v = v; ++ } ++ return; /* no need for restoring flags inside IRQ */ ++ } ++ ++ /* clear NMI events and set lock (atomically per cpu) */ ++ tp->flags = (tp->flags & ~(IPIPE_TFLG_NMI_HIT | ++ IPIPE_TFLG_NMI_FREEZE_REQ)) ++ | IPIPE_TFLG_NMI_LOCK; ++ ++ /* check active_path again - some nasty NMI may have switched ++ * it meanwhile */ ++ if (unlikely(tp != ++ &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)])) { ++ /* release lock on wrong path and restart */ ++ tp->flags &= ~IPIPE_TFLG_NMI_LOCK; ++ ++ /* there is no chance that the NMI got deferred ++ * => no need to check for pending freeze requests */ ++ goto restart; ++ } ++ ++ /* get the point buffer */ ++ pos = tp->trace_pos; ++ point = &tp->point[pos]; ++ ++ /* store all trace point data */ ++ point->type = type; ++ point->flags = hard_irqs_disabled_flags(flags) ? IPIPE_TFLG_HWIRQ_OFF : 0; ++ point->eip = eip; ++ point->parent_eip = parent_eip; ++ point->v = v; ++ ipipe_read_tsc(point->timestamp); ++ ++ __ipipe_store_domain_states(point); ++ ++ /* forward to next point buffer */ ++ next_pos = WRAP_POINT_NO(pos+1); ++ tp->trace_pos = next_pos; ++ ++ /* only mark beginning if we haven't started yet */ ++ begin = tp->begin; ++ if (unlikely(type == IPIPE_TRACE_BEGIN) && (begin < 0)) ++ tp->begin = pos; ++ ++ /* end of critical path, start post-trace if not already started */ ++ if (unlikely(type == IPIPE_TRACE_END) && ++ (begin >= 0) && !tp->post_trace) ++ tp->post_trace = post_trace + 1; ++ ++ /* freeze only if the slot is free and we are not already freezing */ ++ if ((unlikely(type == IPIPE_TRACE_FREEZE) || ++ (unlikely(eip >= trigger_begin && eip <= trigger_end) && ++ type == IPIPE_TRACE_FUNC)) && ++ per_cpu(trace_path, cpu)[per_cpu(frozen_path, cpu)].begin < 0 && ++ !(tp->flags & IPIPE_TFLG_FREEZING)) { ++ tp->post_trace = post_trace + 1; ++ tp->flags |= IPIPE_TFLG_FREEZING; ++ } ++ ++ /* enforce end of trace in case of overflow */ ++ if (unlikely(WRAP_POINT_NO(next_pos + 1) == begin)) { ++ tp->end = pos; ++ goto enforce_end; ++ } ++ ++ /* stop tracing this path if we are in post-trace and ++ * a) that phase is over now or ++ * b) a new TRACE_BEGIN came in but we are not freezing this path */ ++ if (unlikely((tp->post_trace > 0) && ((--tp->post_trace == 0) || ++ ((type == IPIPE_TRACE_BEGIN) && ++ !(tp->flags & IPIPE_TFLG_FREEZING))))) { ++ /* store the path's end (i.e. excluding post-trace) */ ++ tp->end = WRAP_POINT_NO(pos - post_trace + tp->post_trace); ++ ++ enforce_end: ++ if (tp->flags & IPIPE_TFLG_FREEZING) ++ tp = __ipipe_trace_freeze(cpu, tp, pos); ++ else ++ tp = __ipipe_trace_end(cpu, tp, pos); ++ ++ /* reset the active path, maybe already start a new one */ ++ tp->begin = (type == IPIPE_TRACE_BEGIN) ? ++ WRAP_POINT_NO(tp->trace_pos - 1) : -1; ++ tp->end = -1; ++ tp->post_trace = 0; ++ tp->flags = 0; ++ ++ /* update active_path not earlier to avoid races with NMIs */ ++ per_cpu(active_path, cpu) = tp - per_cpu(trace_path, cpu); ++ } ++ ++ /* we still have old_tp and point, ++ * let's reset NMI lock and check for catches */ ++ old_tp->flags &= ~IPIPE_TFLG_NMI_LOCK; ++ if (unlikely(old_tp->flags & IPIPE_TFLG_NMI_HIT)) { ++ /* well, this late tagging may not immediately be visible for ++ * other cpus already dumping this path - a minor issue */ ++ point->flags |= IPIPE_TFLG_NMI_HIT; ++ ++ /* handle deferred freezing from NMI context */ ++ if (old_tp->flags & IPIPE_TFLG_NMI_FREEZE_REQ) ++ __ipipe_trace(IPIPE_TRACE_FREEZE, old_tp->nmi_saved_eip, ++ old_tp->nmi_saved_parent_eip, ++ old_tp->nmi_saved_v); ++ } ++ ++ hard_local_irq_restore_notrace(flags); ++} ++ ++static unsigned long __ipipe_global_path_lock(void) ++{ ++ unsigned long flags; ++ int cpu; ++ struct ipipe_trace_path *tp; ++ ++ raw_spin_lock_irqsave(&global_path_lock, flags); ++ ++ cpu = ipipe_processor_id(); ++ restart: ++ tp = &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)]; ++ ++ /* here is small race window with NMIs - catched below */ ++ ++ /* clear NMI events and set lock (atomically per cpu) */ ++ tp->flags = (tp->flags & ~(IPIPE_TFLG_NMI_HIT | ++ IPIPE_TFLG_NMI_FREEZE_REQ)) ++ | IPIPE_TFLG_NMI_LOCK; ++ ++ /* check active_path again - some nasty NMI may have switched ++ * it meanwhile */ ++ if (tp != &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)]) { ++ /* release lock on wrong path and restart */ ++ tp->flags &= ~IPIPE_TFLG_NMI_LOCK; ++ ++ /* there is no chance that the NMI got deferred ++ * => no need to check for pending freeze requests */ ++ goto restart; ++ } ++ ++ return flags; ++} ++ ++static void __ipipe_global_path_unlock(unsigned long flags) ++{ ++ int cpu; ++ struct ipipe_trace_path *tp; ++ ++ /* release spinlock first - it's not involved in the NMI issue */ ++ __ipipe_spin_unlock_irqbegin(&global_path_lock); ++ ++ cpu = ipipe_processor_id(); ++ tp = &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)]; ++ ++ tp->flags &= ~IPIPE_TFLG_NMI_LOCK; ++ ++ /* handle deferred freezing from NMI context */ ++ if (tp->flags & IPIPE_TFLG_NMI_FREEZE_REQ) ++ __ipipe_trace(IPIPE_TRACE_FREEZE, tp->nmi_saved_eip, ++ tp->nmi_saved_parent_eip, tp->nmi_saved_v); ++ ++ /* See __ipipe_spin_lock_irqsave() and friends. */ ++ __ipipe_spin_unlock_irqcomplete(flags); ++} ++ ++void notrace asmlinkage ++ipipe_trace_asm(enum ipipe_trace_type type, unsigned long eip, ++ unsigned long parent_eip, unsigned long v) ++{ ++ if (!ipipe_trace_enable) ++ return; ++ __ipipe_trace(type, eip, parent_eip, v); ++} ++ ++void notrace ipipe_trace_begin(unsigned long v) ++{ ++ if (!ipipe_trace_enable) ++ return; ++ __ipipe_trace(IPIPE_TRACE_BEGIN, CALLER_ADDR0, ++ CALLER_ADDR1, v); ++} ++EXPORT_SYMBOL_GPL(ipipe_trace_begin); ++ ++void notrace ipipe_trace_end(unsigned long v) ++{ ++ if (!ipipe_trace_enable) ++ return; ++ __ipipe_trace(IPIPE_TRACE_END, CALLER_ADDR0, ++ CALLER_ADDR1, v); ++} ++EXPORT_SYMBOL_GPL(ipipe_trace_end); ++ ++void notrace ipipe_trace_irqbegin(int irq, struct pt_regs *regs) ++{ ++ if (!ipipe_trace_enable) ++ return; ++ __ipipe_trace(IPIPE_TRACE_BEGIN, instruction_pointer(regs), ++ CALLER_ADDR1, irq); ++} ++EXPORT_SYMBOL_GPL(ipipe_trace_irqbegin); ++ ++void notrace ipipe_trace_irqend(int irq, struct pt_regs *regs) ++{ ++ if (!ipipe_trace_enable) ++ return; ++ __ipipe_trace(IPIPE_TRACE_END, instruction_pointer(regs), ++ CALLER_ADDR1, irq); ++} ++EXPORT_SYMBOL_GPL(ipipe_trace_irqend); ++ ++void notrace ipipe_trace_freeze(unsigned long v) ++{ ++ if (!ipipe_trace_enable) ++ return; ++ __ipipe_trace(IPIPE_TRACE_FREEZE, CALLER_ADDR0, ++ CALLER_ADDR1, v); ++} ++EXPORT_SYMBOL_GPL(ipipe_trace_freeze); ++ ++void notrace ipipe_trace_special(unsigned char id, unsigned long v) ++{ ++ if (!ipipe_trace_enable) ++ return; ++ __ipipe_trace(IPIPE_TRACE_SPECIAL | (id << IPIPE_TYPE_BITS), ++ CALLER_ADDR0, ++ CALLER_ADDR1, v); ++} ++EXPORT_SYMBOL_GPL(ipipe_trace_special); ++ ++void notrace ipipe_trace_pid(pid_t pid, short prio) ++{ ++ if (!ipipe_trace_enable) ++ return; ++ __ipipe_trace(IPIPE_TRACE_PID | (prio << IPIPE_TYPE_BITS), ++ CALLER_ADDR0, ++ CALLER_ADDR1, pid); ++} ++EXPORT_SYMBOL_GPL(ipipe_trace_pid); ++ ++void notrace ipipe_trace_event(unsigned char id, unsigned long delay_tsc) ++{ ++ if (!ipipe_trace_enable) ++ return; ++ __ipipe_trace(IPIPE_TRACE_EVENT | (id << IPIPE_TYPE_BITS), ++ CALLER_ADDR0, ++ CALLER_ADDR1, delay_tsc); ++} ++EXPORT_SYMBOL_GPL(ipipe_trace_event); ++ ++int ipipe_trace_max_reset(void) ++{ ++ int cpu; ++ unsigned long flags; ++ struct ipipe_trace_path *path; ++ int ret = 0; ++ ++ flags = __ipipe_global_path_lock(); ++ ++ for_each_possible_cpu(cpu) { ++ path = &per_cpu(trace_path, cpu)[per_cpu(max_path, cpu)]; ++ ++ if (path->dump_lock) { ++ ret = -EBUSY; ++ break; ++ } ++ ++ path->begin = -1; ++ path->end = -1; ++ path->trace_pos = 0; ++ path->length = 0; ++ } ++ ++ __ipipe_global_path_unlock(flags); ++ ++ return ret; ++} ++EXPORT_SYMBOL_GPL(ipipe_trace_max_reset); ++ ++int ipipe_trace_frozen_reset(void) ++{ ++ int cpu; ++ unsigned long flags; ++ struct ipipe_trace_path *path; ++ int ret = 0; ++ ++ flags = __ipipe_global_path_lock(); ++ ++ for_each_online_cpu(cpu) { ++ path = &per_cpu(trace_path, cpu)[per_cpu(frozen_path, cpu)]; ++ ++ if (path->dump_lock) { ++ ret = -EBUSY; ++ break; ++ } ++ ++ path->begin = -1; ++ path->end = -1; ++ path->trace_pos = 0; ++ path->length = 0; ++ } ++ ++ __ipipe_global_path_unlock(flags); ++ ++ return ret; ++} ++EXPORT_SYMBOL_GPL(ipipe_trace_frozen_reset); ++ ++static void ++__ipipe_get_task_info(char *task_info, struct ipipe_trace_point *point, ++ int trylock) ++{ ++ struct task_struct *task = NULL; ++ char buf[8]; ++ int i; ++ int locked = 1; ++ ++ if (trylock) { ++ if (!read_trylock(&tasklist_lock)) ++ locked = 0; ++ } else ++ read_lock(&tasklist_lock); ++ ++ if (locked) ++ task = find_task_by_pid_ns((pid_t)point->v, &init_pid_ns); ++ ++ if (task) ++ strncpy(task_info, task->comm, 11); ++ else ++ strcpy(task_info, "--"); ++ ++ if (locked) ++ read_unlock(&tasklist_lock); ++ ++ for (i = strlen(task_info); i < 11; i++) ++ task_info[i] = ' '; ++ ++ sprintf(buf, " %d ", point->type >> IPIPE_TYPE_BITS); ++ strcpy(task_info + (11 - strlen(buf)), buf); ++} ++ ++static void ++__ipipe_get_event_date(char *buf,struct ipipe_trace_path *path, ++ struct ipipe_trace_point *point) ++{ ++ long time; ++ int type; ++ ++ time = __ipipe_signed_tsc2us(point->timestamp - ++ path->point[path->begin].timestamp + point->v); ++ type = point->type >> IPIPE_TYPE_BITS; ++ ++ if (type == 0) ++ /* ++ * Event type #0 is predefined, stands for the next ++ * timer tick. ++ */ ++ sprintf(buf, "tick@%-6ld", time); ++ else ++ sprintf(buf, "%3d@%-7ld", type, time); ++} ++ ++#ifdef CONFIG_IPIPE_TRACE_PANIC ++ ++void ipipe_trace_panic_freeze(void) ++{ ++ unsigned long flags; ++ int cpu; ++ ++ if (!ipipe_trace_enable) ++ return; ++ ++ ipipe_trace_enable = 0; ++ flags = hard_local_irq_save_notrace(); ++ ++ cpu = ipipe_processor_id(); ++ ++ panic_path = &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)]; ++ ++ hard_local_irq_restore(flags); ++} ++EXPORT_SYMBOL_GPL(ipipe_trace_panic_freeze); ++ ++void ipipe_trace_panic_dump(void) ++{ ++ int cnt = back_trace; ++ int start, pos; ++ char buf[16]; ++ ++ if (!panic_path) ++ return; ++ ++ ipipe_context_check_off(); ++ ++ printk(KERN_CONT "I-pipe tracer log (%d points):\n", cnt); ++ ++ start = pos = WRAP_POINT_NO(panic_path->trace_pos-1); ++ ++ while (cnt-- > 0) { ++ struct ipipe_trace_point *point = &panic_path->point[pos]; ++ long time; ++ char info[16]; ++ int i; ++ ++ printk(KERN_CONT " %c", ++ (point->flags & IPIPE_TFLG_HWIRQ_OFF) ? '|' : ' '); ++ ++ for (i = IPIPE_TFLG_DOMSTATE_BITS; i >= 0; i--) ++ printk(KERN_CONT "%c", ++ (IPIPE_TFLG_CURRENT_DOMAIN(point) == i) ? ++ (IPIPE_TFLG_DOMAIN_STALLED(point, i) ? ++ '#' : '+') : ++ (IPIPE_TFLG_DOMAIN_STALLED(point, i) ? ++ '*' : ' ')); ++ ++ if (!point->eip) ++ printk(KERN_CONT "--\n"); ++ else { ++ __ipipe_trace_point_type(buf, point); ++ printk(KERN_CONT "%s", buf); ++ ++ switch (point->type & IPIPE_TYPE_MASK) { ++ case IPIPE_TRACE_FUNC: ++ printk(KERN_CONT " "); ++ break; ++ ++ case IPIPE_TRACE_PID: ++ __ipipe_get_task_info(info, ++ point, 1); ++ printk(KERN_CONT "%s", info); ++ break; ++ ++ case IPIPE_TRACE_EVENT: ++ __ipipe_get_event_date(info, ++ panic_path, point); ++ printk(KERN_CONT "%s", info); ++ break; ++ ++ default: ++ printk(KERN_CONT "0x%08lx ", point->v); ++ } ++ ++ time = __ipipe_signed_tsc2us(point->timestamp - ++ panic_path->point[start].timestamp); ++ printk(KERN_CONT " %5ld ", time); ++ ++ __ipipe_print_symname(NULL, point->eip); ++ printk(KERN_CONT " ("); ++ __ipipe_print_symname(NULL, point->parent_eip); ++ printk(KERN_CONT ")\n"); ++ } ++ pos = WRAP_POINT_NO(pos - 1); ++ } ++ ++ panic_path = NULL; ++} ++EXPORT_SYMBOL_GPL(ipipe_trace_panic_dump); ++ ++#endif /* CONFIG_IPIPE_TRACE_PANIC */ ++ ++ ++/* --- /proc output --- */ ++ ++static notrace int __ipipe_in_critical_trpath(long point_no) ++{ ++ return ((WRAP_POINT_NO(point_no-print_path->begin) < ++ WRAP_POINT_NO(print_path->end-print_path->begin)) || ++ ((print_path->end == print_path->begin) && ++ (WRAP_POINT_NO(point_no-print_path->end) > ++ print_post_trace))); ++} ++ ++static long __ipipe_signed_tsc2us(long long tsc) ++{ ++ unsigned long long abs_tsc; ++ long us; ++ ++ if (!__ipipe_hrclock_ok()) ++ return 0; ++ ++ /* ipipe_tsc2us works on unsigned => handle sign separately */ ++ abs_tsc = (tsc >= 0) ? tsc : -tsc; ++ us = ipipe_tsc2us(abs_tsc); ++ if (tsc < 0) ++ return -us; ++ else ++ return us; ++} ++ ++static void ++__ipipe_trace_point_type(char *buf, struct ipipe_trace_point *point) ++{ ++ switch (point->type & IPIPE_TYPE_MASK) { ++ case IPIPE_TRACE_FUNC: ++ strcpy(buf, "func "); ++ break; ++ ++ case IPIPE_TRACE_BEGIN: ++ strcpy(buf, "begin "); ++ break; ++ ++ case IPIPE_TRACE_END: ++ strcpy(buf, "end "); ++ break; ++ ++ case IPIPE_TRACE_FREEZE: ++ strcpy(buf, "freeze "); ++ break; ++ ++ case IPIPE_TRACE_SPECIAL: ++ sprintf(buf, "(0x%02x) ", ++ point->type >> IPIPE_TYPE_BITS); ++ break; ++ ++ case IPIPE_TRACE_PID: ++ sprintf(buf, "[%5d] ", (pid_t)point->v); ++ break; ++ ++ case IPIPE_TRACE_EVENT: ++ sprintf(buf, "event "); ++ break; ++ } ++} ++ ++static void ++__ipipe_print_pathmark(struct seq_file *m, struct ipipe_trace_point *point) ++{ ++ char mark = ' '; ++ int point_no = point - print_path->point; ++ int i; ++ ++ if (print_path->end == point_no) ++ mark = '<'; ++ else if (print_path->begin == point_no) ++ mark = '>'; ++ else if (__ipipe_in_critical_trpath(point_no)) ++ mark = ':'; ++ seq_printf(m, "%c%c", mark, ++ (point->flags & IPIPE_TFLG_HWIRQ_OFF) ? '|' : ' '); ++ ++ if (!verbose_trace) ++ return; ++ ++ for (i = IPIPE_TFLG_DOMSTATE_BITS; i >= 0; i--) ++ seq_printf(m, "%c", ++ (IPIPE_TFLG_CURRENT_DOMAIN(point) == i) ? ++ (IPIPE_TFLG_DOMAIN_STALLED(point, i) ? ++ '#' : '+') : ++ (IPIPE_TFLG_DOMAIN_STALLED(point, i) ? '*' : ' ')); ++} ++ ++static void ++__ipipe_print_delay(struct seq_file *m, struct ipipe_trace_point *point) ++{ ++ unsigned long delay = 0; ++ int next; ++ char *mark = " "; ++ ++ next = WRAP_POINT_NO(point+1 - print_path->point); ++ ++ if (next != print_path->trace_pos) ++ delay = ipipe_tsc2ns(print_path->point[next].timestamp - ++ point->timestamp); ++ ++ if (__ipipe_in_critical_trpath(point - print_path->point)) { ++ if (delay > IPIPE_DELAY_WARN) ++ mark = "! "; ++ else if (delay > IPIPE_DELAY_NOTE) ++ mark = "+ "; ++ } ++ seq_puts(m, mark); ++ ++ if (verbose_trace) ++ seq_printf(m, "%3lu.%03lu%c ", delay/1000, delay%1000, ++ (point->flags & IPIPE_TFLG_NMI_HIT) ? 'N' : ' '); ++ else ++ seq_puts(m, " "); ++} ++ ++static void __ipipe_print_symname(struct seq_file *m, unsigned long eip) ++{ ++ char namebuf[KSYM_NAME_LEN+1]; ++ unsigned long size, offset; ++ const char *sym_name; ++ char *modname; ++ ++ sym_name = kallsyms_lookup(eip, &size, &offset, &modname, namebuf); ++ ++#ifdef CONFIG_IPIPE_TRACE_PANIC ++ if (!m) { ++ /* panic dump */ ++ if (sym_name) { ++ printk(KERN_CONT "%s+0x%lx", sym_name, offset); ++ if (modname) ++ printk(KERN_CONT " [%s]", modname); ++ } else ++ printk(KERN_CONT "<%08lx>", eip); ++ } else ++#endif /* CONFIG_IPIPE_TRACE_PANIC */ ++ { ++ if (sym_name) { ++ if (verbose_trace) { ++ seq_printf(m, "%s+0x%lx", sym_name, offset); ++ if (modname) ++ seq_printf(m, " [%s]", modname); ++ } else ++ seq_puts(m, sym_name); ++ } else ++ seq_printf(m, "<%08lx>", eip); ++ } ++} ++ ++static void __ipipe_print_headline(struct seq_file *m) ++{ ++ const char *name[2]; ++ ++ seq_printf(m, "Calibrated minimum trace-point overhead: %lu.%03lu " ++ "us\n\n", trace_overhead/1000, trace_overhead%1000); ++ ++ if (verbose_trace) { ++ name[0] = ipipe_root_domain->name; ++ if (ipipe_head_domain != ipipe_root_domain) ++ name[1] = ipipe_head_domain->name; ++ else ++ name[1] = ""; ++ ++ seq_printf(m, ++ " +----- Hard IRQs ('|': locked)\n" ++ " |+-- %s\n" ++ " ||+- %s%s\n" ++ " ||| +---------- " ++ "Delay flag ('+': > %d us, '!': > %d us)\n" ++ " ||| | +- " ++ "NMI noise ('N')\n" ++ " ||| | |\n" ++ " Type User Val. Time Delay Function " ++ "(Parent)\n", ++ name[1], name[0], ++ " ('*': domain stalled, '+': current, " ++ "'#': current+stalled)", ++ IPIPE_DELAY_NOTE/1000, IPIPE_DELAY_WARN/1000); ++ } else ++ seq_printf(m, ++ " +--------------- Hard IRQs ('|': locked)\n" ++ " | +- Delay flag " ++ "('+': > %d us, '!': > %d us)\n" ++ " | |\n" ++ " Type Time Function (Parent)\n", ++ IPIPE_DELAY_NOTE/1000, IPIPE_DELAY_WARN/1000); ++} ++ ++static void *__ipipe_max_prtrace_start(struct seq_file *m, loff_t *pos) ++{ ++ loff_t n = *pos; ++ ++ mutex_lock(&out_mutex); ++ ++ if (!n) { ++ struct ipipe_trace_path *tp; ++ unsigned long length_usecs; ++ int points, cpu; ++ unsigned long flags; ++ ++ /* protect against max_path/frozen_path updates while we ++ * haven't locked our target path, also avoid recursively ++ * taking global_path_lock from NMI context */ ++ flags = __ipipe_global_path_lock(); ++ ++ /* find the longest of all per-cpu paths */ ++ print_path = NULL; ++ for_each_online_cpu(cpu) { ++ tp = &per_cpu(trace_path, cpu)[per_cpu(max_path, cpu)]; ++ if ((print_path == NULL) || ++ (tp->length > print_path->length)) { ++ print_path = tp; ++ break; ++ } ++ } ++ print_path->dump_lock = 1; ++ ++ __ipipe_global_path_unlock(flags); ++ ++ if (!__ipipe_hrclock_ok()) { ++ seq_printf(m, "No hrclock available, dumping traces disabled\n"); ++ return NULL; ++ } ++ ++ /* does this path actually contain data? */ ++ if (print_path->end == print_path->begin) ++ return NULL; ++ ++ /* number of points inside the critical path */ ++ points = WRAP_POINT_NO(print_path->end-print_path->begin+1); ++ ++ /* pre- and post-tracing length, post-trace length was frozen ++ in __ipipe_trace, pre-trace may have to be reduced due to ++ buffer overrun */ ++ print_pre_trace = pre_trace; ++ print_post_trace = WRAP_POINT_NO(print_path->trace_pos - ++ print_path->end - 1); ++ if (points+pre_trace+print_post_trace > IPIPE_TRACE_POINTS - 1) ++ print_pre_trace = IPIPE_TRACE_POINTS - 1 - points - ++ print_post_trace; ++ ++ length_usecs = ipipe_tsc2us(print_path->length); ++ seq_printf(m, "I-pipe worst-case tracing service on %s/ipipe release #%d\n" ++ "-------------------------------------------------------------\n", ++ UTS_RELEASE, IPIPE_CORE_RELEASE); ++ seq_printf(m, "CPU: %d, Begin: %lld cycles, Trace Points: " ++ "%d (-%d/+%d), Length: %lu us\n", ++ cpu, print_path->point[print_path->begin].timestamp, ++ points, print_pre_trace, print_post_trace, length_usecs); ++ __ipipe_print_headline(m); ++ } ++ ++ /* check if we are inside the trace range */ ++ if (n >= WRAP_POINT_NO(print_path->end - print_path->begin + 1 + ++ print_pre_trace + print_post_trace)) ++ return NULL; ++ ++ /* return the next point to be shown */ ++ return &print_path->point[WRAP_POINT_NO(print_path->begin - ++ print_pre_trace + n)]; ++} ++ ++static void *__ipipe_prtrace_next(struct seq_file *m, void *p, loff_t *pos) ++{ ++ loff_t n = ++*pos; ++ ++ /* check if we are inside the trace range with the next entry */ ++ if (n >= WRAP_POINT_NO(print_path->end - print_path->begin + 1 + ++ print_pre_trace + print_post_trace)) ++ return NULL; ++ ++ /* return the next point to be shown */ ++ return &print_path->point[WRAP_POINT_NO(print_path->begin - ++ print_pre_trace + *pos)]; ++} ++ ++static void __ipipe_prtrace_stop(struct seq_file *m, void *p) ++{ ++ if (print_path) ++ print_path->dump_lock = 0; ++ mutex_unlock(&out_mutex); ++} ++ ++static int __ipipe_prtrace_show(struct seq_file *m, void *p) ++{ ++ long time; ++ struct ipipe_trace_point *point = p; ++ char buf[16]; ++ ++ if (!point->eip) { ++ seq_puts(m, "--\n"); ++ return 0; ++ } ++ ++ __ipipe_print_pathmark(m, point); ++ __ipipe_trace_point_type(buf, point); ++ seq_puts(m, buf); ++ if (verbose_trace) ++ switch (point->type & IPIPE_TYPE_MASK) { ++ case IPIPE_TRACE_FUNC: ++ seq_puts(m, " "); ++ break; ++ ++ case IPIPE_TRACE_PID: ++ __ipipe_get_task_info(buf, point, 0); ++ seq_puts(m, buf); ++ break; ++ ++ case IPIPE_TRACE_EVENT: ++ __ipipe_get_event_date(buf, print_path, point); ++ seq_puts(m, buf); ++ break; ++ ++ default: ++ seq_printf(m, "0x%08lx ", point->v); ++ } ++ ++ time = __ipipe_signed_tsc2us(point->timestamp - ++ print_path->point[print_path->begin].timestamp); ++ seq_printf(m, "%5ld", time); ++ ++ __ipipe_print_delay(m, point); ++ __ipipe_print_symname(m, point->eip); ++ seq_puts(m, " ("); ++ __ipipe_print_symname(m, point->parent_eip); ++ seq_puts(m, ")\n"); ++ ++ return 0; ++} ++ ++static struct seq_operations __ipipe_max_ptrace_ops = { ++ .start = __ipipe_max_prtrace_start, ++ .next = __ipipe_prtrace_next, ++ .stop = __ipipe_prtrace_stop, ++ .show = __ipipe_prtrace_show ++}; ++ ++static int __ipipe_max_prtrace_open(struct inode *inode, struct file *file) ++{ ++ return seq_open(file, &__ipipe_max_ptrace_ops); ++} ++ ++static ssize_t ++__ipipe_max_reset(struct file *file, const char __user *pbuffer, ++ size_t count, loff_t *data) ++{ ++ mutex_lock(&out_mutex); ++ ipipe_trace_max_reset(); ++ mutex_unlock(&out_mutex); ++ ++ return count; ++} ++ ++static const struct file_operations __ipipe_max_prtrace_fops = { ++ .open = __ipipe_max_prtrace_open, ++ .read = seq_read, ++ .write = __ipipe_max_reset, ++ .llseek = seq_lseek, ++ .release = seq_release, ++}; ++ ++static void *__ipipe_frozen_prtrace_start(struct seq_file *m, loff_t *pos) ++{ ++ loff_t n = *pos; ++ ++ mutex_lock(&out_mutex); ++ ++ if (!n) { ++ struct ipipe_trace_path *tp; ++ int cpu; ++ unsigned long flags; ++ ++ /* protect against max_path/frozen_path updates while we ++ * haven't locked our target path, also avoid recursively ++ * taking global_path_lock from NMI context */ ++ flags = __ipipe_global_path_lock(); ++ ++ /* find the first of all per-cpu frozen paths */ ++ print_path = NULL; ++ for_each_online_cpu(cpu) { ++ tp = &per_cpu(trace_path, cpu)[per_cpu(frozen_path, cpu)]; ++ if (tp->end >= 0) { ++ print_path = tp; ++ break; ++ } ++ } ++ if (print_path) ++ print_path->dump_lock = 1; ++ ++ __ipipe_global_path_unlock(flags); ++ ++ if (!print_path) ++ return NULL; ++ ++ if (!__ipipe_hrclock_ok()) { ++ seq_printf(m, "No hrclock available, dumping traces disabled\n"); ++ return NULL; ++ } ++ ++ /* back- and post-tracing length, post-trace length was frozen ++ in __ipipe_trace, back-trace may have to be reduced due to ++ buffer overrun */ ++ print_pre_trace = back_trace-1; /* substract freeze point */ ++ print_post_trace = WRAP_POINT_NO(print_path->trace_pos - ++ print_path->end - 1); ++ if (1+pre_trace+print_post_trace > IPIPE_TRACE_POINTS - 1) ++ print_pre_trace = IPIPE_TRACE_POINTS - 2 - ++ print_post_trace; ++ ++ seq_printf(m, "I-pipe frozen back-tracing service on %s/ipipe release #%d\n" ++ "------------------------------------------------------------\n", ++ UTS_RELEASE, IPIPE_CORE_RELEASE); ++ seq_printf(m, "CPU: %d, Freeze: %lld cycles, Trace Points: %d (+%d)\n", ++ cpu, print_path->point[print_path->begin].timestamp, ++ print_pre_trace+1, print_post_trace); ++ __ipipe_print_headline(m); ++ } ++ ++ /* check if we are inside the trace range */ ++ if (n >= print_pre_trace + 1 + print_post_trace) ++ return NULL; ++ ++ /* return the next point to be shown */ ++ return &print_path->point[WRAP_POINT_NO(print_path->begin- ++ print_pre_trace+n)]; ++} ++ ++static struct seq_operations __ipipe_frozen_ptrace_ops = { ++ .start = __ipipe_frozen_prtrace_start, ++ .next = __ipipe_prtrace_next, ++ .stop = __ipipe_prtrace_stop, ++ .show = __ipipe_prtrace_show ++}; ++ ++static int __ipipe_frozen_prtrace_open(struct inode *inode, struct file *file) ++{ ++ return seq_open(file, &__ipipe_frozen_ptrace_ops); ++} ++ ++static ssize_t ++__ipipe_frozen_ctrl(struct file *file, const char __user *pbuffer, ++ size_t count, loff_t *data) ++{ ++ char *end, buf[16]; ++ int val; ++ int n; ++ ++ n = (count > sizeof(buf) - 1) ? sizeof(buf) - 1 : count; ++ ++ if (copy_from_user(buf, pbuffer, n)) ++ return -EFAULT; ++ ++ buf[n] = '\0'; ++ val = simple_strtol(buf, &end, 0); ++ ++ if (((*end != '\0') && !isspace(*end)) || (val < 0)) ++ return -EINVAL; ++ ++ mutex_lock(&out_mutex); ++ ipipe_trace_frozen_reset(); ++ if (val > 0) ++ ipipe_trace_freeze(-1); ++ mutex_unlock(&out_mutex); ++ ++ return count; ++} ++ ++static const struct file_operations __ipipe_frozen_prtrace_fops = { ++ .open = __ipipe_frozen_prtrace_open, ++ .read = seq_read, ++ .write = __ipipe_frozen_ctrl, ++ .llseek = seq_lseek, ++ .release = seq_release, ++}; ++ ++static int __ipipe_rd_proc_val(struct seq_file *p, void *data) ++{ ++ seq_printf(p, "%u\n", *(int *)p->private); ++ return 0; ++} ++ ++static ssize_t ++__ipipe_wr_proc_val(struct file *file, const char __user *buffer, ++ size_t count, loff_t *data) ++{ ++ struct seq_file *p = file->private_data; ++ char *end, buf[16]; ++ int val; ++ int n; ++ ++ n = (count > sizeof(buf) - 1) ? sizeof(buf) - 1 : count; ++ ++ if (copy_from_user(buf, buffer, n)) ++ return -EFAULT; ++ ++ buf[n] = '\0'; ++ val = simple_strtol(buf, &end, 0); ++ ++ if (((*end != '\0') && !isspace(*end)) || (val < 0)) ++ return -EINVAL; ++ ++ mutex_lock(&out_mutex); ++ *(int *)p->private = val; ++ mutex_unlock(&out_mutex); ++ ++ return count; ++} ++ ++static int __ipipe_rw_proc_val_open(struct inode *inode, struct file *file) ++{ ++ return single_open(file, __ipipe_rd_proc_val, PDE_DATA(inode)); ++} ++ ++static const struct file_operations __ipipe_rw_proc_val_ops = { ++ .open = __ipipe_rw_proc_val_open, ++ .read = seq_read, ++ .write = __ipipe_wr_proc_val, ++ .llseek = seq_lseek, ++ .release = single_release, ++}; ++ ++static void __init ++__ipipe_create_trace_proc_val(struct proc_dir_entry *trace_dir, ++ const char *name, int *value_ptr) ++{ ++ proc_create_data(name, 0644, trace_dir, &__ipipe_rw_proc_val_ops, ++ value_ptr); ++} ++ ++static int __ipipe_rd_trigger(struct seq_file *p, void *data) ++{ ++ char str[KSYM_SYMBOL_LEN]; ++ ++ if (trigger_begin) { ++ sprint_symbol(str, trigger_begin); ++ seq_printf(p, "%s\n", str); ++ } ++ return 0; ++} ++ ++static ssize_t ++__ipipe_wr_trigger(struct file *file, const char __user *buffer, ++ size_t count, loff_t *data) ++{ ++ char buf[KSYM_SYMBOL_LEN]; ++ unsigned long begin, end; ++ ++ if (count > sizeof(buf) - 1) ++ count = sizeof(buf) - 1; ++ if (copy_from_user(buf, buffer, count)) ++ return -EFAULT; ++ buf[count] = 0; ++ if (buf[count-1] == '\n') ++ buf[count-1] = 0; ++ ++ begin = kallsyms_lookup_name(buf); ++ if (!begin || !kallsyms_lookup_size_offset(begin, &end, NULL)) ++ return -ENOENT; ++ end += begin - 1; ++ ++ mutex_lock(&out_mutex); ++ /* invalidate the current range before setting a new one */ ++ trigger_end = 0; ++ wmb(); ++ ipipe_trace_frozen_reset(); ++ ++ /* set new range */ ++ trigger_begin = begin; ++ wmb(); ++ trigger_end = end; ++ mutex_unlock(&out_mutex); ++ ++ return count; ++} ++ ++static int __ipipe_rw_trigger_open(struct inode *inode, struct file *file) ++{ ++ return single_open(file, __ipipe_rd_trigger, NULL); ++} ++ ++static const struct file_operations __ipipe_rw_trigger_ops = { ++ .open = __ipipe_rw_trigger_open, ++ .read = seq_read, ++ .write = __ipipe_wr_trigger, ++ .llseek = seq_lseek, ++ .release = single_release, ++}; ++ ++ ++#ifdef CONFIG_IPIPE_TRACE_MCOUNT ++static void notrace ++ipipe_trace_function(unsigned long ip, unsigned long parent_ip, ++ struct ftrace_ops *op, struct pt_regs *regs) ++{ ++ if (!ipipe_trace_enable) ++ return; ++ __ipipe_trace(IPIPE_TRACE_FUNC, ip, parent_ip, 0); ++} ++ ++static struct ftrace_ops ipipe_trace_ops = { ++ .func = ipipe_trace_function, ++ .flags = FTRACE_OPS_FL_IPIPE_EXCLUSIVE, ++}; ++ ++static ssize_t __ipipe_wr_enable(struct file *file, const char __user *buffer, ++ size_t count, loff_t *data) ++{ ++ char *end, buf[16]; ++ int val; ++ int n; ++ ++ n = (count > sizeof(buf) - 1) ? sizeof(buf) - 1 : count; ++ ++ if (copy_from_user(buf, buffer, n)) ++ return -EFAULT; ++ ++ buf[n] = '\0'; ++ val = simple_strtol(buf, &end, 0); ++ ++ if (((*end != '\0') && !isspace(*end)) || (val < 0)) ++ return -EINVAL; ++ ++ mutex_lock(&out_mutex); ++ ++ if (ipipe_trace_enable) { ++ if (!val) ++ unregister_ftrace_function(&ipipe_trace_ops); ++ } else if (val) ++ register_ftrace_function(&ipipe_trace_ops); ++ ++ ipipe_trace_enable = val; ++ ++ mutex_unlock(&out_mutex); ++ ++ return count; ++} ++ ++static const struct file_operations __ipipe_rw_enable_ops = { ++ .open = __ipipe_rw_proc_val_open, ++ .read = seq_read, ++ .write = __ipipe_wr_enable, ++ .llseek = seq_lseek, ++ .release = single_release, ++}; ++#endif /* CONFIG_IPIPE_TRACE_MCOUNT */ ++ ++extern struct proc_dir_entry *ipipe_proc_root; ++ ++void __init __ipipe_tracer_hrclock_initialized(void) ++{ ++ unsigned long long start, end, min = ULLONG_MAX; ++ int i; ++ ++#ifdef CONFIG_IPIPE_TRACE_VMALLOC ++ if (!per_cpu(trace_path, 0)) ++ return; ++#endif ++ /* Calculate minimum overhead of __ipipe_trace() */ ++ hard_local_irq_disable(); ++ for (i = 0; i < 100; i++) { ++ ipipe_read_tsc(start); ++ __ipipe_trace(IPIPE_TRACE_FUNC, CALLER_ADDR0, ++ CALLER_ADDR1, 0); ++ ipipe_read_tsc(end); ++ ++ end -= start; ++ if (end < min) ++ min = end; ++ } ++ hard_local_irq_enable(); ++ trace_overhead = ipipe_tsc2ns(min); ++} ++ ++void __init __ipipe_init_tracer(void) ++{ ++ struct proc_dir_entry *trace_dir; ++#ifdef CONFIG_IPIPE_TRACE_VMALLOC ++ int cpu, path; ++#endif /* CONFIG_IPIPE_TRACE_VMALLOC */ ++ ++#ifdef CONFIG_IPIPE_TRACE_VMALLOC ++ for_each_possible_cpu(cpu) { ++ struct ipipe_trace_path *tp_buf; ++ ++ tp_buf = vmalloc_node(sizeof(struct ipipe_trace_path) * ++ IPIPE_TRACE_PATHS, cpu_to_node(cpu)); ++ if (!tp_buf) { ++ pr_err("I-pipe: " ++ "insufficient memory for trace buffer.\n"); ++ return; ++ } ++ memset(tp_buf, 0, ++ sizeof(struct ipipe_trace_path) * IPIPE_TRACE_PATHS); ++ for (path = 0; path < IPIPE_TRACE_PATHS; path++) { ++ tp_buf[path].begin = -1; ++ tp_buf[path].end = -1; ++ } ++ per_cpu(trace_path, cpu) = tp_buf; ++ } ++#endif /* CONFIG_IPIPE_TRACE_VMALLOC */ ++ ++ if (__ipipe_hrclock_ok() && !trace_overhead) ++ __ipipe_tracer_hrclock_initialized(); ++ ++#ifdef CONFIG_IPIPE_TRACE_ENABLE ++ ipipe_trace_enable = 1; ++#ifdef CONFIG_IPIPE_TRACE_MCOUNT ++ ftrace_enabled = 1; ++ register_ftrace_function(&ipipe_trace_ops); ++#endif /* CONFIG_IPIPE_TRACE_MCOUNT */ ++#endif /* CONFIG_IPIPE_TRACE_ENABLE */ ++ ++ trace_dir = proc_mkdir("trace", ipipe_proc_root); ++ ++ proc_create("max", 0644, trace_dir, &__ipipe_max_prtrace_fops); ++ proc_create("frozen", 0644, trace_dir, &__ipipe_frozen_prtrace_fops); ++ ++ proc_create("trigger", 0644, trace_dir, &__ipipe_rw_trigger_ops); ++ ++ __ipipe_create_trace_proc_val(trace_dir, "pre_trace_points", ++ &pre_trace); ++ __ipipe_create_trace_proc_val(trace_dir, "post_trace_points", ++ &post_trace); ++ __ipipe_create_trace_proc_val(trace_dir, "back_trace_points", ++ &back_trace); ++ __ipipe_create_trace_proc_val(trace_dir, "verbose", ++ &verbose_trace); ++#ifdef CONFIG_IPIPE_TRACE_MCOUNT ++ proc_create_data("enable", 0644, trace_dir, &__ipipe_rw_enable_ops, ++ &ipipe_trace_enable); ++#else /* !CONFIG_IPIPE_TRACE_MCOUNT */ ++ __ipipe_create_trace_proc_val(trace_dir, "enable", ++ &ipipe_trace_enable); ++#endif /* !CONFIG_IPIPE_TRACE_MCOUNT */ ++} +diff -uprN kernel/kernel/irq/chip.c kernel_new/kernel/irq/chip.c +--- kernel/kernel/irq/chip.c 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/kernel/irq/chip.c 2021-04-02 10:23:55.498805866 +0800 +@@ -14,6 +14,7 @@ + #include + #include + #include ++#include + + #include + +@@ -48,6 +49,10 @@ int irq_set_chip(unsigned int irq, struc + + if (!chip) + chip = &no_irq_chip; ++ else ++ WARN_ONCE(IS_ENABLED(CONFIG_IPIPE) && ++ (chip->flags & IRQCHIP_PIPELINE_SAFE) == 0, ++ "irqchip %s is not pipeline-safe!", chip->name); + + desc->irq_data.chip = chip; + irq_put_desc_unlock(desc, flags); +@@ -155,14 +160,6 @@ int irq_set_chip_data(unsigned int irq, + } + EXPORT_SYMBOL(irq_set_chip_data); + +-struct irq_data *irq_get_irq_data(unsigned int irq) +-{ +- struct irq_desc *desc = irq_to_desc(irq); +- +- return desc ? &desc->irq_data : NULL; +-} +-EXPORT_SYMBOL_GPL(irq_get_irq_data); +- + static void irq_state_clr_disabled(struct irq_desc *desc) + { + irqd_clear(&desc->irq_data, IRQD_IRQ_DISABLED); +@@ -242,9 +239,14 @@ static int __irq_startup(struct irq_desc + WARN_ON_ONCE(!irqd_is_activated(d)); + + if (d->chip->irq_startup) { ++ unsigned long flags = hard_cond_local_irq_save(); + ret = d->chip->irq_startup(d); + irq_state_clr_disabled(desc); + irq_state_clr_masked(desc); ++ hard_cond_local_irq_restore(flags); ++#ifdef CONFIG_IPIPE ++ desc->istate &= ~IPIPE_IRQS_NEEDS_STARTUP; ++#endif + } else { + irq_enable(desc); + } +@@ -309,6 +311,9 @@ void irq_shutdown(struct irq_desc *desc) + desc->irq_data.chip->irq_shutdown(&desc->irq_data); + irq_state_set_disabled(desc); + irq_state_set_masked(desc); ++#ifdef CONFIG_IPIPE ++ desc->istate |= IPIPE_IRQS_NEEDS_STARTUP; ++#endif + } else { + __irq_disable(desc, true); + } +@@ -331,6 +336,8 @@ void irq_shutdown_and_deactivate(struct + + void irq_enable(struct irq_desc *desc) + { ++ unsigned long flags = hard_cond_local_irq_save(); ++ + if (!irqd_irq_disabled(&desc->irq_data)) { + unmask_irq(desc); + } else { +@@ -342,10 +349,14 @@ void irq_enable(struct irq_desc *desc) + unmask_irq(desc); + } + } ++ ++ hard_cond_local_irq_restore(flags); + } + + static void __irq_disable(struct irq_desc *desc, bool mask) + { ++ unsigned long flags = hard_cond_local_irq_save(); ++ + if (irqd_irq_disabled(&desc->irq_data)) { + if (mask) + mask_irq(desc); +@@ -358,6 +369,8 @@ static void __irq_disable(struct irq_des + mask_irq(desc); + } + } ++ ++ hard_cond_local_irq_restore(flags); + } + + /** +@@ -387,11 +400,13 @@ void irq_disable(struct irq_desc *desc) + + void irq_percpu_enable(struct irq_desc *desc, unsigned int cpu) + { ++ unsigned long flags = hard_cond_local_irq_save(); + if (desc->irq_data.chip->irq_enable) + desc->irq_data.chip->irq_enable(&desc->irq_data); + else + desc->irq_data.chip->irq_unmask(&desc->irq_data); + cpumask_set_cpu(cpu, desc->percpu_enabled); ++ hard_cond_local_irq_restore(flags); + } + + void irq_percpu_disable(struct irq_desc *desc, unsigned int cpu) +@@ -428,12 +443,16 @@ void mask_irq(struct irq_desc *desc) + + void unmask_irq(struct irq_desc *desc) + { ++ unsigned long flags; ++ + if (!irqd_irq_masked(&desc->irq_data)) + return; + + if (desc->irq_data.chip->irq_unmask) { ++ flags = hard_cond_local_irq_save(); + desc->irq_data.chip->irq_unmask(&desc->irq_data); + irq_state_clr_masked(desc); ++ hard_cond_local_irq_restore(flags); + } + } + +@@ -630,7 +649,9 @@ static void cond_unmask_irq(struct irq_d + void handle_level_irq(struct irq_desc *desc) + { + raw_spin_lock(&desc->lock); ++#ifndef CONFIG_IPIPE + mask_ack_irq(desc); ++#endif + + if (!irq_may_run(desc)) + goto out_unlock; +@@ -666,7 +687,16 @@ static inline void preflow_handler(struc + static inline void preflow_handler(struct irq_desc *desc) { } + #endif + +-static void cond_unmask_eoi_irq(struct irq_desc *desc, struct irq_chip *chip) ++#ifdef CONFIG_IPIPE ++static void cond_release_fasteoi_irq(struct irq_desc *desc, ++ struct irq_chip *chip) ++{ ++ if (chip->irq_release && ++ !irqd_irq_disabled(&desc->irq_data) && !desc->threads_oneshot) ++ chip->irq_release(&desc->irq_data); ++} ++#else ++static inline void cond_unmask_eoi_irq(struct irq_desc *desc, struct irq_chip *chip) + { + if (!(desc->istate & IRQS_ONESHOT)) { + chip->irq_eoi(&desc->irq_data); +@@ -686,6 +716,7 @@ static void cond_unmask_eoi_irq(struct i + chip->irq_eoi(&desc->irq_data); + } + } ++#endif /* !CONFIG_IPIPE */ + + /** + * handle_fasteoi_irq - irq handler for transparent controllers +@@ -718,13 +749,23 @@ void handle_fasteoi_irq(struct irq_desc + } + + kstat_incr_irqs_this_cpu(desc); ++#ifndef CONFIG_IPIPE + if (desc->istate & IRQS_ONESHOT) + mask_irq(desc); ++#endif + + preflow_handler(desc); + handle_irq_event(desc); + ++#ifdef CONFIG_IPIPE ++ /* ++ * IRQCHIP_EOI_IF_HANDLED is ignored as the I-pipe always ++ * sends EOI. ++ */ ++ cond_release_fasteoi_irq(desc, chip); ++#else /* !CONFIG_IPIPE */ + cond_unmask_eoi_irq(desc, chip); ++#endif /* !CONFIG_IPIPE */ + + raw_spin_unlock(&desc->lock); + return; +@@ -807,7 +848,9 @@ void handle_edge_irq(struct irq_desc *de + kstat_incr_irqs_this_cpu(desc); + + /* Start handling the irq */ ++#ifndef CONFIG_IPIPE + desc->irq_data.chip->irq_ack(&desc->irq_data); ++#endif + + do { + if (unlikely(!desc->action)) { +@@ -899,6 +942,11 @@ void handle_percpu_irq(struct irq_desc * + */ + __kstat_incr_irqs_this_cpu(desc); + ++#ifdef CONFIG_IPIPE ++ (void)chip; ++ handle_irq_event_percpu(desc); ++ desc->ipipe_end(desc); ++#else + if (chip->irq_ack) + chip->irq_ack(&desc->irq_data); + +@@ -906,6 +954,7 @@ void handle_percpu_irq(struct irq_desc * + + if (chip->irq_eoi) + chip->irq_eoi(&desc->irq_data); ++#endif + } + + /** +@@ -932,13 +981,20 @@ void handle_percpu_devid_irq(struct irq_ + */ + __kstat_incr_irqs_this_cpu(desc); + ++#ifndef CONFIG_IPIPE + if (chip->irq_ack) + chip->irq_ack(&desc->irq_data); ++#endif + + if (likely(action)) { + trace_irq_handler_entry(irq, action); + res = action->handler(irq, raw_cpu_ptr(action->percpu_dev_id)); + trace_irq_handler_exit(irq, action, res); ++#ifdef CONFIG_IPIPE ++ (void)chip; ++ desc->ipipe_end(desc); ++ return; ++#endif + } else { + unsigned int cpu = smp_processor_id(); + bool enabled = cpumask_test_cpu(cpu, desc->percpu_enabled); +@@ -978,6 +1034,171 @@ void handle_percpu_devid_fasteoi_nmi(str + chip->irq_eoi(&desc->irq_data); + } + ++#ifdef CONFIG_IPIPE ++ ++void __ipipe_ack_level_irq(struct irq_desc *desc) ++{ ++ mask_ack_irq(desc); ++} ++ ++void __ipipe_end_level_irq(struct irq_desc *desc) ++{ ++ desc->irq_data.chip->irq_unmask(&desc->irq_data); ++} ++ ++void __ipipe_ack_fasteoi_irq(struct irq_desc *desc) ++{ ++ if (desc->irq_data.chip->irq_hold) ++ desc->irq_data.chip->irq_hold(&desc->irq_data); ++} ++ ++void __ipipe_end_fasteoi_irq(struct irq_desc *desc) ++{ ++ if (desc->irq_data.chip->irq_release) ++ desc->irq_data.chip->irq_release(&desc->irq_data); ++} ++ ++void __ipipe_ack_edge_irq(struct irq_desc *desc) ++{ ++ desc->irq_data.chip->irq_ack(&desc->irq_data); ++} ++ ++void __ipipe_ack_percpu_irq(struct irq_desc *desc) ++{ ++ if (desc->irq_data.chip->irq_ack) ++ desc->irq_data.chip->irq_ack(&desc->irq_data); ++ ++ if (desc->irq_data.chip->irq_eoi) ++ desc->irq_data.chip->irq_eoi(&desc->irq_data); ++} ++ ++void __ipipe_nop_irq(struct irq_desc *desc) ++{ ++} ++ ++void __ipipe_chained_irq(struct irq_desc *desc) ++{ ++ /* ++ * XXX: Do NOT fold this into __ipipe_nop_irq(), see ++ * ipipe_chained_irq_p(). ++ */ ++} ++ ++static void __ipipe_ack_bad_irq(struct irq_desc *desc) ++{ ++ handle_bad_irq(desc); ++ WARN_ON_ONCE(1); ++} ++ ++irq_flow_handler_t ++__fixup_irq_handler(struct irq_desc *desc, irq_flow_handler_t handle, int is_chained) ++{ ++ if (unlikely(handle == NULL)) { ++ desc->ipipe_ack = __ipipe_ack_bad_irq; ++ desc->ipipe_end = __ipipe_nop_irq; ++ } else { ++ if (is_chained) { ++ desc->ipipe_ack = handle; ++ desc->ipipe_end = __ipipe_nop_irq; ++ handle = __ipipe_chained_irq; ++ } else if (handle == handle_simple_irq) { ++ desc->ipipe_ack = __ipipe_nop_irq; ++ desc->ipipe_end = __ipipe_nop_irq; ++ } else if (handle == handle_level_irq) { ++ desc->ipipe_ack = __ipipe_ack_level_irq; ++ desc->ipipe_end = __ipipe_end_level_irq; ++ } else if (handle == handle_edge_irq) { ++ desc->ipipe_ack = __ipipe_ack_edge_irq; ++ desc->ipipe_end = __ipipe_nop_irq; ++ } else if (handle == handle_fasteoi_irq) { ++ desc->ipipe_ack = __ipipe_ack_fasteoi_irq; ++ desc->ipipe_end = __ipipe_end_fasteoi_irq; ++ } else if (handle == handle_percpu_irq || ++ handle == handle_percpu_devid_irq) { ++ if (irq_desc_get_chip(desc) && ++ irq_desc_get_chip(desc)->irq_hold) { ++ desc->ipipe_ack = __ipipe_ack_fasteoi_irq; ++ desc->ipipe_end = __ipipe_end_fasteoi_irq; ++ } else { ++ desc->ipipe_ack = __ipipe_ack_percpu_irq; ++ desc->ipipe_end = __ipipe_nop_irq; ++ } ++ } else if (irq_desc_get_chip(desc) == &no_irq_chip) { ++ desc->ipipe_ack = __ipipe_nop_irq; ++ desc->ipipe_end = __ipipe_nop_irq; ++ } else { ++ desc->ipipe_ack = __ipipe_ack_bad_irq; ++ desc->ipipe_end = __ipipe_nop_irq; ++ } ++ } ++ ++ /* ++ * We don't cope well with lazy disabling simply because we ++ * neither track nor update the descriptor state bits, which ++ * is badly wrong. ++ */ ++ irq_settings_clr_and_set(desc, 0, _IRQ_DISABLE_UNLAZY); ++ ++ /* Suppress intermediate trampoline routine. */ ++ ipipe_root_domain->irqs[desc->irq_data.irq].ackfn = desc->ipipe_ack; ++ ++ return handle; ++} ++ ++int ipipe_enable_irq(unsigned int irq) ++{ ++ struct irq_desc *desc; ++ struct irq_chip *chip; ++ unsigned long flags; ++ int err; ++ ++ desc = irq_to_desc(irq); ++ if (desc == NULL) ++ return -EINVAL; ++ ++ chip = irq_desc_get_chip(desc); ++ ++ if (chip->irq_startup && (desc->istate & IPIPE_IRQS_NEEDS_STARTUP)) { ++ ++ ipipe_root_only(); ++ ++ err = irq_activate(desc); ++ if (err) ++ return err; ++ ++ raw_spin_lock_irqsave(&desc->lock, flags); ++ if (desc->istate & IPIPE_IRQS_NEEDS_STARTUP) { ++ desc->istate &= ~IPIPE_IRQS_NEEDS_STARTUP; ++ chip->irq_startup(&desc->irq_data); ++ } ++ raw_spin_unlock_irqrestore(&desc->lock, flags); ++ ++ return 0; ++ } ++ ++ if (chip->irq_enable == NULL && chip->irq_unmask == NULL) ++ return -ENOSYS; ++ ++ if (chip->irq_enable) ++ chip->irq_enable(&desc->irq_data); ++ else ++ chip->irq_unmask(&desc->irq_data); ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(ipipe_enable_irq); ++ ++#else /* !CONFIG_IPIPE */ ++ ++irq_flow_handler_t ++__fixup_irq_handler(struct irq_desc *desc, irq_flow_handler_t handle, int is_chained) ++{ ++ return handle; ++} ++ ++#endif /* !CONFIG_IPIPE */ ++EXPORT_SYMBOL_GPL(__fixup_irq_handler); ++ + static void + __irq_do_set_handler(struct irq_desc *desc, irq_flow_handler_t handle, + int is_chained, const char *name) +@@ -1012,6 +1233,8 @@ __irq_do_set_handler(struct irq_desc *de + return; + } + ++ handle = __fixup_irq_handler(desc, handle, is_chained); ++ + /* Uninstall? */ + if (handle == handle_bad_irq) { + if (desc->irq_data.chip != &no_irq_chip) +@@ -1347,6 +1570,20 @@ void irq_chip_mask_parent(struct irq_dat + } + EXPORT_SYMBOL_GPL(irq_chip_mask_parent); + ++#ifdef CONFIG_IPIPE ++void irq_chip_hold_parent(struct irq_data *data) ++{ ++ data = data->parent_data; ++ data->chip->irq_hold(data); ++} ++ ++void irq_chip_release_parent(struct irq_data *data) ++{ ++ data = data->parent_data; ++ data->chip->irq_release(data); ++} ++#endif ++ + /** + * irq_chip_unmask_parent - Unmask the parent interrupt + * @data: Pointer to interrupt specific data +diff -uprN kernel/kernel/irq/chip.c.orig kernel_new/kernel/irq/chip.c.orig +--- kernel/kernel/irq/chip.c.orig 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/kernel/irq/chip.c.orig 2020-12-21 21:59:22.000000000 +0800 +@@ -0,0 +1,1524 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * Copyright (C) 1992, 1998-2006 Linus Torvalds, Ingo Molnar ++ * Copyright (C) 2005-2006, Thomas Gleixner, Russell King ++ * ++ * This file contains the core interrupt handling code, for irq-chip based ++ * architectures. Detailed information is available in ++ * Documentation/core-api/genericirq.rst ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++ ++#include "internals.h" ++ ++static irqreturn_t bad_chained_irq(int irq, void *dev_id) ++{ ++ WARN_ONCE(1, "Chained irq %d should not call an action\n", irq); ++ return IRQ_NONE; ++} ++ ++/* ++ * Chained handlers should never call action on their IRQ. This default ++ * action will emit warning if such thing happens. ++ */ ++struct irqaction chained_action = { ++ .handler = bad_chained_irq, ++}; ++ ++/** ++ * irq_set_chip - set the irq chip for an irq ++ * @irq: irq number ++ * @chip: pointer to irq chip description structure ++ */ ++int irq_set_chip(unsigned int irq, struct irq_chip *chip) ++{ ++ unsigned long flags; ++ struct irq_desc *desc = irq_get_desc_lock(irq, &flags, 0); ++ ++ if (!desc) ++ return -EINVAL; ++ ++ if (!chip) ++ chip = &no_irq_chip; ++ ++ desc->irq_data.chip = chip; ++ irq_put_desc_unlock(desc, flags); ++ /* ++ * For !CONFIG_SPARSE_IRQ make the irq show up in ++ * allocated_irqs. ++ */ ++ irq_mark_irq(irq); ++ return 0; ++} ++EXPORT_SYMBOL(irq_set_chip); ++ ++/** ++ * irq_set_type - set the irq trigger type for an irq ++ * @irq: irq number ++ * @type: IRQ_TYPE_{LEVEL,EDGE}_* value - see include/linux/irq.h ++ */ ++int irq_set_irq_type(unsigned int irq, unsigned int type) ++{ ++ unsigned long flags; ++ struct irq_desc *desc = irq_get_desc_buslock(irq, &flags, IRQ_GET_DESC_CHECK_GLOBAL); ++ int ret = 0; ++ ++ if (!desc) ++ return -EINVAL; ++ ++ ret = __irq_set_trigger(desc, type); ++ irq_put_desc_busunlock(desc, flags); ++ return ret; ++} ++EXPORT_SYMBOL(irq_set_irq_type); ++ ++/** ++ * irq_set_handler_data - set irq handler data for an irq ++ * @irq: Interrupt number ++ * @data: Pointer to interrupt specific data ++ * ++ * Set the hardware irq controller data for an irq ++ */ ++int irq_set_handler_data(unsigned int irq, void *data) ++{ ++ unsigned long flags; ++ struct irq_desc *desc = irq_get_desc_lock(irq, &flags, 0); ++ ++ if (!desc) ++ return -EINVAL; ++ desc->irq_common_data.handler_data = data; ++ irq_put_desc_unlock(desc, flags); ++ return 0; ++} ++EXPORT_SYMBOL(irq_set_handler_data); ++ ++/** ++ * irq_set_msi_desc_off - set MSI descriptor data for an irq at offset ++ * @irq_base: Interrupt number base ++ * @irq_offset: Interrupt number offset ++ * @entry: Pointer to MSI descriptor data ++ * ++ * Set the MSI descriptor entry for an irq at offset ++ */ ++int irq_set_msi_desc_off(unsigned int irq_base, unsigned int irq_offset, ++ struct msi_desc *entry) ++{ ++ unsigned long flags; ++ struct irq_desc *desc = irq_get_desc_lock(irq_base + irq_offset, &flags, IRQ_GET_DESC_CHECK_GLOBAL); ++ ++ if (!desc) ++ return -EINVAL; ++ desc->irq_common_data.msi_desc = entry; ++ if (entry && !irq_offset) ++ entry->irq = irq_base; ++ irq_put_desc_unlock(desc, flags); ++ return 0; ++} ++ ++/** ++ * irq_set_msi_desc - set MSI descriptor data for an irq ++ * @irq: Interrupt number ++ * @entry: Pointer to MSI descriptor data ++ * ++ * Set the MSI descriptor entry for an irq ++ */ ++int irq_set_msi_desc(unsigned int irq, struct msi_desc *entry) ++{ ++ return irq_set_msi_desc_off(irq, 0, entry); ++} ++ ++/** ++ * irq_set_chip_data - set irq chip data for an irq ++ * @irq: Interrupt number ++ * @data: Pointer to chip specific data ++ * ++ * Set the hardware irq chip data for an irq ++ */ ++int irq_set_chip_data(unsigned int irq, void *data) ++{ ++ unsigned long flags; ++ struct irq_desc *desc = irq_get_desc_lock(irq, &flags, 0); ++ ++ if (!desc) ++ return -EINVAL; ++ desc->irq_data.chip_data = data; ++ irq_put_desc_unlock(desc, flags); ++ return 0; ++} ++EXPORT_SYMBOL(irq_set_chip_data); ++ ++struct irq_data *irq_get_irq_data(unsigned int irq) ++{ ++ struct irq_desc *desc = irq_to_desc(irq); ++ ++ return desc ? &desc->irq_data : NULL; ++} ++EXPORT_SYMBOL_GPL(irq_get_irq_data); ++ ++static void irq_state_clr_disabled(struct irq_desc *desc) ++{ ++ irqd_clear(&desc->irq_data, IRQD_IRQ_DISABLED); ++} ++ ++static void irq_state_clr_masked(struct irq_desc *desc) ++{ ++ irqd_clear(&desc->irq_data, IRQD_IRQ_MASKED); ++} ++ ++static void irq_state_clr_started(struct irq_desc *desc) ++{ ++ irqd_clear(&desc->irq_data, IRQD_IRQ_STARTED); ++} ++ ++static void irq_state_set_started(struct irq_desc *desc) ++{ ++ irqd_set(&desc->irq_data, IRQD_IRQ_STARTED); ++} ++ ++enum { ++ IRQ_STARTUP_NORMAL, ++ IRQ_STARTUP_MANAGED, ++ IRQ_STARTUP_ABORT, ++}; ++ ++#ifdef CONFIG_SMP ++static int ++__irq_startup_managed(struct irq_desc *desc, struct cpumask *aff, bool force) ++{ ++ struct irq_data *d = irq_desc_get_irq_data(desc); ++ ++ if (!irqd_affinity_is_managed(d)) ++ return IRQ_STARTUP_NORMAL; ++ ++ irqd_clr_managed_shutdown(d); ++ ++ if (cpumask_any_and(aff, cpu_online_mask) >= nr_cpu_ids) { ++ /* ++ * Catch code which fiddles with enable_irq() on a managed ++ * and potentially shutdown IRQ. Chained interrupt ++ * installment or irq auto probing should not happen on ++ * managed irqs either. ++ */ ++ if (WARN_ON_ONCE(force)) ++ return IRQ_STARTUP_ABORT; ++ /* ++ * The interrupt was requested, but there is no online CPU ++ * in it's affinity mask. Put it into managed shutdown ++ * state and let the cpu hotplug mechanism start it up once ++ * a CPU in the mask becomes available. ++ */ ++ return IRQ_STARTUP_ABORT; ++ } ++ /* ++ * Managed interrupts have reserved resources, so this should not ++ * happen. ++ */ ++ if (WARN_ON(irq_domain_activate_irq(d, false))) ++ return IRQ_STARTUP_ABORT; ++ return IRQ_STARTUP_MANAGED; ++} ++#else ++static __always_inline int ++__irq_startup_managed(struct irq_desc *desc, struct cpumask *aff, bool force) ++{ ++ return IRQ_STARTUP_NORMAL; ++} ++#endif ++ ++static int __irq_startup(struct irq_desc *desc) ++{ ++ struct irq_data *d = irq_desc_get_irq_data(desc); ++ int ret = 0; ++ ++ /* Warn if this interrupt is not activated but try nevertheless */ ++ WARN_ON_ONCE(!irqd_is_activated(d)); ++ ++ if (d->chip->irq_startup) { ++ ret = d->chip->irq_startup(d); ++ irq_state_clr_disabled(desc); ++ irq_state_clr_masked(desc); ++ } else { ++ irq_enable(desc); ++ } ++ irq_state_set_started(desc); ++ return ret; ++} ++ ++int irq_startup(struct irq_desc *desc, bool resend, bool force) ++{ ++ struct irq_data *d = irq_desc_get_irq_data(desc); ++ struct cpumask *aff = irq_data_get_affinity_mask(d); ++ int ret = 0; ++ ++ desc->depth = 0; ++ ++ if (irqd_is_started(d)) { ++ irq_enable(desc); ++ } else { ++ switch (__irq_startup_managed(desc, aff, force)) { ++ case IRQ_STARTUP_NORMAL: ++ ret = __irq_startup(desc); ++ irq_setup_affinity(desc); ++ break; ++ case IRQ_STARTUP_MANAGED: ++ irq_do_set_affinity(d, aff, false); ++ ret = __irq_startup(desc); ++ break; ++ case IRQ_STARTUP_ABORT: ++ irqd_set_managed_shutdown(d); ++ return 0; ++ } ++ } ++ if (resend) ++ check_irq_resend(desc); ++ ++ return ret; ++} ++ ++int irq_activate(struct irq_desc *desc) ++{ ++ struct irq_data *d = irq_desc_get_irq_data(desc); ++ ++ if (!irqd_affinity_is_managed(d)) ++ return irq_domain_activate_irq(d, false); ++ return 0; ++} ++ ++int irq_activate_and_startup(struct irq_desc *desc, bool resend) ++{ ++ if (WARN_ON(irq_activate(desc))) ++ return 0; ++ return irq_startup(desc, resend, IRQ_START_FORCE); ++} ++ ++static void __irq_disable(struct irq_desc *desc, bool mask); ++ ++void irq_shutdown(struct irq_desc *desc) ++{ ++ if (irqd_is_started(&desc->irq_data)) { ++ desc->depth = 1; ++ if (desc->irq_data.chip->irq_shutdown) { ++ desc->irq_data.chip->irq_shutdown(&desc->irq_data); ++ irq_state_set_disabled(desc); ++ irq_state_set_masked(desc); ++ } else { ++ __irq_disable(desc, true); ++ } ++ irq_state_clr_started(desc); ++ } ++} ++ ++ ++void irq_shutdown_and_deactivate(struct irq_desc *desc) ++{ ++ irq_shutdown(desc); ++ /* ++ * This must be called even if the interrupt was never started up, ++ * because the activation can happen before the interrupt is ++ * available for request/startup. It has it's own state tracking so ++ * it's safe to call it unconditionally. ++ */ ++ irq_domain_deactivate_irq(&desc->irq_data); ++} ++ ++void irq_enable(struct irq_desc *desc) ++{ ++ if (!irqd_irq_disabled(&desc->irq_data)) { ++ unmask_irq(desc); ++ } else { ++ irq_state_clr_disabled(desc); ++ if (desc->irq_data.chip->irq_enable) { ++ desc->irq_data.chip->irq_enable(&desc->irq_data); ++ irq_state_clr_masked(desc); ++ } else { ++ unmask_irq(desc); ++ } ++ } ++} ++ ++static void __irq_disable(struct irq_desc *desc, bool mask) ++{ ++ if (irqd_irq_disabled(&desc->irq_data)) { ++ if (mask) ++ mask_irq(desc); ++ } else { ++ irq_state_set_disabled(desc); ++ if (desc->irq_data.chip->irq_disable) { ++ desc->irq_data.chip->irq_disable(&desc->irq_data); ++ irq_state_set_masked(desc); ++ } else if (mask) { ++ mask_irq(desc); ++ } ++ } ++} ++ ++/** ++ * irq_disable - Mark interrupt disabled ++ * @desc: irq descriptor which should be disabled ++ * ++ * If the chip does not implement the irq_disable callback, we ++ * use a lazy disable approach. That means we mark the interrupt ++ * disabled, but leave the hardware unmasked. That's an ++ * optimization because we avoid the hardware access for the ++ * common case where no interrupt happens after we marked it ++ * disabled. If an interrupt happens, then the interrupt flow ++ * handler masks the line at the hardware level and marks it ++ * pending. ++ * ++ * If the interrupt chip does not implement the irq_disable callback, ++ * a driver can disable the lazy approach for a particular irq line by ++ * calling 'irq_set_status_flags(irq, IRQ_DISABLE_UNLAZY)'. This can ++ * be used for devices which cannot disable the interrupt at the ++ * device level under certain circumstances and have to use ++ * disable_irq[_nosync] instead. ++ */ ++void irq_disable(struct irq_desc *desc) ++{ ++ __irq_disable(desc, irq_settings_disable_unlazy(desc)); ++} ++ ++void irq_percpu_enable(struct irq_desc *desc, unsigned int cpu) ++{ ++ if (desc->irq_data.chip->irq_enable) ++ desc->irq_data.chip->irq_enable(&desc->irq_data); ++ else ++ desc->irq_data.chip->irq_unmask(&desc->irq_data); ++ cpumask_set_cpu(cpu, desc->percpu_enabled); ++} ++ ++void irq_percpu_disable(struct irq_desc *desc, unsigned int cpu) ++{ ++ if (desc->irq_data.chip->irq_disable) ++ desc->irq_data.chip->irq_disable(&desc->irq_data); ++ else ++ desc->irq_data.chip->irq_mask(&desc->irq_data); ++ cpumask_clear_cpu(cpu, desc->percpu_enabled); ++} ++ ++static inline void mask_ack_irq(struct irq_desc *desc) ++{ ++ if (desc->irq_data.chip->irq_mask_ack) { ++ desc->irq_data.chip->irq_mask_ack(&desc->irq_data); ++ irq_state_set_masked(desc); ++ } else { ++ mask_irq(desc); ++ if (desc->irq_data.chip->irq_ack) ++ desc->irq_data.chip->irq_ack(&desc->irq_data); ++ } ++} ++ ++void mask_irq(struct irq_desc *desc) ++{ ++ if (irqd_irq_masked(&desc->irq_data)) ++ return; ++ ++ if (desc->irq_data.chip->irq_mask) { ++ desc->irq_data.chip->irq_mask(&desc->irq_data); ++ irq_state_set_masked(desc); ++ } ++} ++ ++void unmask_irq(struct irq_desc *desc) ++{ ++ if (!irqd_irq_masked(&desc->irq_data)) ++ return; ++ ++ if (desc->irq_data.chip->irq_unmask) { ++ desc->irq_data.chip->irq_unmask(&desc->irq_data); ++ irq_state_clr_masked(desc); ++ } ++} ++ ++void unmask_threaded_irq(struct irq_desc *desc) ++{ ++ struct irq_chip *chip = desc->irq_data.chip; ++ ++ if (chip->flags & IRQCHIP_EOI_THREADED) ++ chip->irq_eoi(&desc->irq_data); ++ ++ unmask_irq(desc); ++} ++ ++/* ++ * handle_nested_irq - Handle a nested irq from a irq thread ++ * @irq: the interrupt number ++ * ++ * Handle interrupts which are nested into a threaded interrupt ++ * handler. The handler function is called inside the calling ++ * threads context. ++ */ ++void handle_nested_irq(unsigned int irq) ++{ ++ struct irq_desc *desc = irq_to_desc(irq); ++ struct irqaction *action; ++ irqreturn_t action_ret; ++ ++ might_sleep(); ++ ++ raw_spin_lock_irq(&desc->lock); ++ ++ desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING); ++ ++ action = desc->action; ++ if (unlikely(!action || irqd_irq_disabled(&desc->irq_data))) { ++ desc->istate |= IRQS_PENDING; ++ goto out_unlock; ++ } ++ ++ kstat_incr_irqs_this_cpu(desc); ++ irqd_set(&desc->irq_data, IRQD_IRQ_INPROGRESS); ++ raw_spin_unlock_irq(&desc->lock); ++ ++ action_ret = IRQ_NONE; ++ for_each_action_of_desc(desc, action) ++ action_ret |= action->thread_fn(action->irq, action->dev_id); ++ ++ if (!noirqdebug) ++ note_interrupt(desc, action_ret); ++ ++ raw_spin_lock_irq(&desc->lock); ++ irqd_clear(&desc->irq_data, IRQD_IRQ_INPROGRESS); ++ ++out_unlock: ++ raw_spin_unlock_irq(&desc->lock); ++} ++EXPORT_SYMBOL_GPL(handle_nested_irq); ++ ++static bool irq_check_poll(struct irq_desc *desc) ++{ ++ if (!(desc->istate & IRQS_POLL_INPROGRESS)) ++ return false; ++ return irq_wait_for_poll(desc); ++} ++ ++static bool irq_may_run(struct irq_desc *desc) ++{ ++ unsigned int mask = IRQD_IRQ_INPROGRESS | IRQD_WAKEUP_ARMED; ++ ++ /* ++ * If the interrupt is not in progress and is not an armed ++ * wakeup interrupt, proceed. ++ */ ++ if (!irqd_has_set(&desc->irq_data, mask)) ++ return true; ++ ++ /* ++ * If the interrupt is an armed wakeup source, mark it pending ++ * and suspended, disable it and notify the pm core about the ++ * event. ++ */ ++ if (irq_pm_check_wakeup(desc)) ++ return false; ++ ++ /* ++ * Handle a potential concurrent poll on a different core. ++ */ ++ return irq_check_poll(desc); ++} ++ ++/** ++ * handle_simple_irq - Simple and software-decoded IRQs. ++ * @desc: the interrupt description structure for this irq ++ * ++ * Simple interrupts are either sent from a demultiplexing interrupt ++ * handler or come from hardware, where no interrupt hardware control ++ * is necessary. ++ * ++ * Note: The caller is expected to handle the ack, clear, mask and ++ * unmask issues if necessary. ++ */ ++void handle_simple_irq(struct irq_desc *desc) ++{ ++ raw_spin_lock(&desc->lock); ++ ++ if (!irq_may_run(desc)) ++ goto out_unlock; ++ ++ desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING); ++ ++ if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) { ++ desc->istate |= IRQS_PENDING; ++ goto out_unlock; ++ } ++ ++ kstat_incr_irqs_this_cpu(desc); ++ handle_irq_event(desc); ++ ++out_unlock: ++ raw_spin_unlock(&desc->lock); ++} ++EXPORT_SYMBOL_GPL(handle_simple_irq); ++ ++/** ++ * handle_untracked_irq - Simple and software-decoded IRQs. ++ * @desc: the interrupt description structure for this irq ++ * ++ * Untracked interrupts are sent from a demultiplexing interrupt ++ * handler when the demultiplexer does not know which device it its ++ * multiplexed irq domain generated the interrupt. IRQ's handled ++ * through here are not subjected to stats tracking, randomness, or ++ * spurious interrupt detection. ++ * ++ * Note: Like handle_simple_irq, the caller is expected to handle ++ * the ack, clear, mask and unmask issues if necessary. ++ */ ++void handle_untracked_irq(struct irq_desc *desc) ++{ ++ unsigned int flags = 0; ++ ++ raw_spin_lock(&desc->lock); ++ ++ if (!irq_may_run(desc)) ++ goto out_unlock; ++ ++ desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING); ++ ++ if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) { ++ desc->istate |= IRQS_PENDING; ++ goto out_unlock; ++ } ++ ++ desc->istate &= ~IRQS_PENDING; ++ irqd_set(&desc->irq_data, IRQD_IRQ_INPROGRESS); ++ raw_spin_unlock(&desc->lock); ++ ++ __handle_irq_event_percpu(desc, &flags); ++ ++ raw_spin_lock(&desc->lock); ++ irqd_clear(&desc->irq_data, IRQD_IRQ_INPROGRESS); ++ ++out_unlock: ++ raw_spin_unlock(&desc->lock); ++} ++EXPORT_SYMBOL_GPL(handle_untracked_irq); ++ ++/* ++ * Called unconditionally from handle_level_irq() and only for oneshot ++ * interrupts from handle_fasteoi_irq() ++ */ ++static void cond_unmask_irq(struct irq_desc *desc) ++{ ++ /* ++ * We need to unmask in the following cases: ++ * - Standard level irq (IRQF_ONESHOT is not set) ++ * - Oneshot irq which did not wake the thread (caused by a ++ * spurious interrupt or a primary handler handling it ++ * completely). ++ */ ++ if (!irqd_irq_disabled(&desc->irq_data) && ++ irqd_irq_masked(&desc->irq_data) && !desc->threads_oneshot) ++ unmask_irq(desc); ++} ++ ++/** ++ * handle_level_irq - Level type irq handler ++ * @desc: the interrupt description structure for this irq ++ * ++ * Level type interrupts are active as long as the hardware line has ++ * the active level. This may require to mask the interrupt and unmask ++ * it after the associated handler has acknowledged the device, so the ++ * interrupt line is back to inactive. ++ */ ++void handle_level_irq(struct irq_desc *desc) ++{ ++ raw_spin_lock(&desc->lock); ++ mask_ack_irq(desc); ++ ++ if (!irq_may_run(desc)) ++ goto out_unlock; ++ ++ desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING); ++ ++ /* ++ * If its disabled or no action available ++ * keep it masked and get out of here ++ */ ++ if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) { ++ desc->istate |= IRQS_PENDING; ++ goto out_unlock; ++ } ++ ++ kstat_incr_irqs_this_cpu(desc); ++ handle_irq_event(desc); ++ ++ cond_unmask_irq(desc); ++ ++out_unlock: ++ raw_spin_unlock(&desc->lock); ++} ++EXPORT_SYMBOL_GPL(handle_level_irq); ++ ++#ifdef CONFIG_IRQ_PREFLOW_FASTEOI ++static inline void preflow_handler(struct irq_desc *desc) ++{ ++ if (desc->preflow_handler) ++ desc->preflow_handler(&desc->irq_data); ++} ++#else ++static inline void preflow_handler(struct irq_desc *desc) { } ++#endif ++ ++static void cond_unmask_eoi_irq(struct irq_desc *desc, struct irq_chip *chip) ++{ ++ if (!(desc->istate & IRQS_ONESHOT)) { ++ chip->irq_eoi(&desc->irq_data); ++ return; ++ } ++ /* ++ * We need to unmask in the following cases: ++ * - Oneshot irq which did not wake the thread (caused by a ++ * spurious interrupt or a primary handler handling it ++ * completely). ++ */ ++ if (!irqd_irq_disabled(&desc->irq_data) && ++ irqd_irq_masked(&desc->irq_data) && !desc->threads_oneshot) { ++ chip->irq_eoi(&desc->irq_data); ++ unmask_irq(desc); ++ } else if (!(chip->flags & IRQCHIP_EOI_THREADED)) { ++ chip->irq_eoi(&desc->irq_data); ++ } ++} ++ ++/** ++ * handle_fasteoi_irq - irq handler for transparent controllers ++ * @desc: the interrupt description structure for this irq ++ * ++ * Only a single callback will be issued to the chip: an ->eoi() ++ * call when the interrupt has been serviced. This enables support ++ * for modern forms of interrupt handlers, which handle the flow ++ * details in hardware, transparently. ++ */ ++void handle_fasteoi_irq(struct irq_desc *desc) ++{ ++ struct irq_chip *chip = desc->irq_data.chip; ++ ++ raw_spin_lock(&desc->lock); ++ ++ if (!irq_may_run(desc)) ++ goto out; ++ ++ desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING); ++ ++ /* ++ * If its disabled or no action available ++ * then mask it and get out of here: ++ */ ++ if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) { ++ desc->istate |= IRQS_PENDING; ++ mask_irq(desc); ++ goto out; ++ } ++ ++ kstat_incr_irqs_this_cpu(desc); ++ if (desc->istate & IRQS_ONESHOT) ++ mask_irq(desc); ++ ++ preflow_handler(desc); ++ handle_irq_event(desc); ++ ++ cond_unmask_eoi_irq(desc, chip); ++ ++ raw_spin_unlock(&desc->lock); ++ return; ++out: ++ if (!(chip->flags & IRQCHIP_EOI_IF_HANDLED)) ++ chip->irq_eoi(&desc->irq_data); ++ raw_spin_unlock(&desc->lock); ++} ++EXPORT_SYMBOL_GPL(handle_fasteoi_irq); ++ ++/** ++ * handle_fasteoi_nmi - irq handler for NMI interrupt lines ++ * @desc: the interrupt description structure for this irq ++ * ++ * A simple NMI-safe handler, considering the restrictions ++ * from request_nmi. ++ * ++ * Only a single callback will be issued to the chip: an ->eoi() ++ * call when the interrupt has been serviced. This enables support ++ * for modern forms of interrupt handlers, which handle the flow ++ * details in hardware, transparently. ++ */ ++void handle_fasteoi_nmi(struct irq_desc *desc) ++{ ++ struct irq_chip *chip = irq_desc_get_chip(desc); ++ struct irqaction *action = desc->action; ++ unsigned int irq = irq_desc_get_irq(desc); ++ irqreturn_t res; ++ ++ this_cpu_inc(*desc->kstat_irqs); ++ trace_irq_handler_entry(irq, action); ++ /* ++ * NMIs cannot be shared, there is only one action. ++ */ ++ res = action->handler(irq, action->dev_id); ++ trace_irq_handler_exit(irq, action, res); ++ ++ if (chip->irq_eoi) ++ chip->irq_eoi(&desc->irq_data); ++} ++EXPORT_SYMBOL_GPL(handle_fasteoi_nmi); ++ ++/** ++ * handle_edge_irq - edge type IRQ handler ++ * @desc: the interrupt description structure for this irq ++ * ++ * Interrupt occures on the falling and/or rising edge of a hardware ++ * signal. The occurrence is latched into the irq controller hardware ++ * and must be acked in order to be reenabled. After the ack another ++ * interrupt can happen on the same source even before the first one ++ * is handled by the associated event handler. If this happens it ++ * might be necessary to disable (mask) the interrupt depending on the ++ * controller hardware. This requires to reenable the interrupt inside ++ * of the loop which handles the interrupts which have arrived while ++ * the handler was running. If all pending interrupts are handled, the ++ * loop is left. ++ */ ++void handle_edge_irq(struct irq_desc *desc) ++{ ++ raw_spin_lock(&desc->lock); ++ ++ desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING); ++ ++ if (!irq_may_run(desc)) { ++ desc->istate |= IRQS_PENDING; ++ mask_ack_irq(desc); ++ goto out_unlock; ++ } ++ ++ /* ++ * If its disabled or no action available then mask it and get ++ * out of here. ++ */ ++ if (irqd_irq_disabled(&desc->irq_data) || !desc->action) { ++ desc->istate |= IRQS_PENDING; ++ mask_ack_irq(desc); ++ goto out_unlock; ++ } ++ ++ kstat_incr_irqs_this_cpu(desc); ++ ++ /* Start handling the irq */ ++ desc->irq_data.chip->irq_ack(&desc->irq_data); ++ ++ do { ++ if (unlikely(!desc->action)) { ++ mask_irq(desc); ++ goto out_unlock; ++ } ++ ++ /* ++ * When another irq arrived while we were handling ++ * one, we could have masked the irq. ++ * Renable it, if it was not disabled in meantime. ++ */ ++ if (unlikely(desc->istate & IRQS_PENDING)) { ++ if (!irqd_irq_disabled(&desc->irq_data) && ++ irqd_irq_masked(&desc->irq_data)) ++ unmask_irq(desc); ++ } ++ ++ handle_irq_event(desc); ++ ++ } while ((desc->istate & IRQS_PENDING) && ++ !irqd_irq_disabled(&desc->irq_data)); ++ ++out_unlock: ++ raw_spin_unlock(&desc->lock); ++} ++EXPORT_SYMBOL(handle_edge_irq); ++ ++#ifdef CONFIG_IRQ_EDGE_EOI_HANDLER ++/** ++ * handle_edge_eoi_irq - edge eoi type IRQ handler ++ * @desc: the interrupt description structure for this irq ++ * ++ * Similar as the above handle_edge_irq, but using eoi and w/o the ++ * mask/unmask logic. ++ */ ++void handle_edge_eoi_irq(struct irq_desc *desc) ++{ ++ struct irq_chip *chip = irq_desc_get_chip(desc); ++ ++ raw_spin_lock(&desc->lock); ++ ++ desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING); ++ ++ if (!irq_may_run(desc)) { ++ desc->istate |= IRQS_PENDING; ++ goto out_eoi; ++ } ++ ++ /* ++ * If its disabled or no action available then mask it and get ++ * out of here. ++ */ ++ if (irqd_irq_disabled(&desc->irq_data) || !desc->action) { ++ desc->istate |= IRQS_PENDING; ++ goto out_eoi; ++ } ++ ++ kstat_incr_irqs_this_cpu(desc); ++ ++ do { ++ if (unlikely(!desc->action)) ++ goto out_eoi; ++ ++ handle_irq_event(desc); ++ ++ } while ((desc->istate & IRQS_PENDING) && ++ !irqd_irq_disabled(&desc->irq_data)); ++ ++out_eoi: ++ chip->irq_eoi(&desc->irq_data); ++ raw_spin_unlock(&desc->lock); ++} ++#endif ++ ++/** ++ * handle_percpu_irq - Per CPU local irq handler ++ * @desc: the interrupt description structure for this irq ++ * ++ * Per CPU interrupts on SMP machines without locking requirements ++ */ ++void handle_percpu_irq(struct irq_desc *desc) ++{ ++ struct irq_chip *chip = irq_desc_get_chip(desc); ++ ++ /* ++ * PER CPU interrupts are not serialized. Do not touch ++ * desc->tot_count. ++ */ ++ __kstat_incr_irqs_this_cpu(desc); ++ ++ if (chip->irq_ack) ++ chip->irq_ack(&desc->irq_data); ++ ++ handle_irq_event_percpu(desc); ++ ++ if (chip->irq_eoi) ++ chip->irq_eoi(&desc->irq_data); ++} ++ ++/** ++ * handle_percpu_devid_irq - Per CPU local irq handler with per cpu dev ids ++ * @desc: the interrupt description structure for this irq ++ * ++ * Per CPU interrupts on SMP machines without locking requirements. Same as ++ * handle_percpu_irq() above but with the following extras: ++ * ++ * action->percpu_dev_id is a pointer to percpu variables which ++ * contain the real device id for the cpu on which this handler is ++ * called ++ */ ++void handle_percpu_devid_irq(struct irq_desc *desc) ++{ ++ struct irq_chip *chip = irq_desc_get_chip(desc); ++ struct irqaction *action = desc->action; ++ unsigned int irq = irq_desc_get_irq(desc); ++ irqreturn_t res; ++ ++ /* ++ * PER CPU interrupts are not serialized. Do not touch ++ * desc->tot_count. ++ */ ++ __kstat_incr_irqs_this_cpu(desc); ++ ++ if (chip->irq_ack) ++ chip->irq_ack(&desc->irq_data); ++ ++ if (likely(action)) { ++ trace_irq_handler_entry(irq, action); ++ res = action->handler(irq, raw_cpu_ptr(action->percpu_dev_id)); ++ trace_irq_handler_exit(irq, action, res); ++ } else { ++ unsigned int cpu = smp_processor_id(); ++ bool enabled = cpumask_test_cpu(cpu, desc->percpu_enabled); ++ ++ if (enabled) ++ irq_percpu_disable(desc, cpu); ++ ++ pr_err_once("Spurious%s percpu IRQ%u on CPU%u\n", ++ enabled ? " and unmasked" : "", irq, cpu); ++ } ++ ++ if (chip->irq_eoi) ++ chip->irq_eoi(&desc->irq_data); ++} ++ ++/** ++ * handle_percpu_devid_fasteoi_nmi - Per CPU local NMI handler with per cpu ++ * dev ids ++ * @desc: the interrupt description structure for this irq ++ * ++ * Similar to handle_fasteoi_nmi, but handling the dev_id cookie ++ * as a percpu pointer. ++ */ ++void handle_percpu_devid_fasteoi_nmi(struct irq_desc *desc) ++{ ++ struct irq_chip *chip = irq_desc_get_chip(desc); ++ struct irqaction *action = desc->action; ++ unsigned int irq = irq_desc_get_irq(desc); ++ irqreturn_t res; ++ ++ this_cpu_inc(*desc->kstat_irqs); ++ trace_irq_handler_entry(irq, action); ++ res = action->handler(irq, raw_cpu_ptr(action->percpu_dev_id)); ++ trace_irq_handler_exit(irq, action, res); ++ ++ if (chip->irq_eoi) ++ chip->irq_eoi(&desc->irq_data); ++} ++ ++static void ++__irq_do_set_handler(struct irq_desc *desc, irq_flow_handler_t handle, ++ int is_chained, const char *name) ++{ ++ if (!handle) { ++ handle = handle_bad_irq; ++ } else { ++ struct irq_data *irq_data = &desc->irq_data; ++#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY ++ /* ++ * With hierarchical domains we might run into a ++ * situation where the outermost chip is not yet set ++ * up, but the inner chips are there. Instead of ++ * bailing we install the handler, but obviously we ++ * cannot enable/startup the interrupt at this point. ++ */ ++ while (irq_data) { ++ if (irq_data->chip != &no_irq_chip) ++ break; ++ /* ++ * Bail out if the outer chip is not set up ++ * and the interrrupt supposed to be started ++ * right away. ++ */ ++ if (WARN_ON(is_chained)) ++ return; ++ /* Try the parent */ ++ irq_data = irq_data->parent_data; ++ } ++#endif ++ if (WARN_ON(!irq_data || irq_data->chip == &no_irq_chip)) ++ return; ++ } ++ ++ /* Uninstall? */ ++ if (handle == handle_bad_irq) { ++ if (desc->irq_data.chip != &no_irq_chip) ++ mask_ack_irq(desc); ++ irq_state_set_disabled(desc); ++ if (is_chained) ++ desc->action = NULL; ++ desc->depth = 1; ++ } ++ desc->handle_irq = handle; ++ desc->name = name; ++ ++ if (handle != handle_bad_irq && is_chained) { ++ unsigned int type = irqd_get_trigger_type(&desc->irq_data); ++ ++ /* ++ * We're about to start this interrupt immediately, ++ * hence the need to set the trigger configuration. ++ * But the .set_type callback may have overridden the ++ * flow handler, ignoring that we're dealing with a ++ * chained interrupt. Reset it immediately because we ++ * do know better. ++ */ ++ if (type != IRQ_TYPE_NONE) { ++ __irq_set_trigger(desc, type); ++ desc->handle_irq = handle; ++ } ++ ++ irq_settings_set_noprobe(desc); ++ irq_settings_set_norequest(desc); ++ irq_settings_set_nothread(desc); ++ desc->action = &chained_action; ++ irq_activate_and_startup(desc, IRQ_RESEND); ++ } ++} ++ ++void ++__irq_set_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, ++ const char *name) ++{ ++ unsigned long flags; ++ struct irq_desc *desc = irq_get_desc_buslock(irq, &flags, 0); ++ ++ if (!desc) ++ return; ++ ++ __irq_do_set_handler(desc, handle, is_chained, name); ++ irq_put_desc_busunlock(desc, flags); ++} ++EXPORT_SYMBOL_GPL(__irq_set_handler); ++ ++void ++irq_set_chained_handler_and_data(unsigned int irq, irq_flow_handler_t handle, ++ void *data) ++{ ++ unsigned long flags; ++ struct irq_desc *desc = irq_get_desc_buslock(irq, &flags, 0); ++ ++ if (!desc) ++ return; ++ ++ desc->irq_common_data.handler_data = data; ++ __irq_do_set_handler(desc, handle, 1, NULL); ++ ++ irq_put_desc_busunlock(desc, flags); ++} ++EXPORT_SYMBOL_GPL(irq_set_chained_handler_and_data); ++ ++void ++irq_set_chip_and_handler_name(unsigned int irq, struct irq_chip *chip, ++ irq_flow_handler_t handle, const char *name) ++{ ++ irq_set_chip(irq, chip); ++ __irq_set_handler(irq, handle, 0, name); ++} ++EXPORT_SYMBOL_GPL(irq_set_chip_and_handler_name); ++ ++void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set) ++{ ++ unsigned long flags, trigger, tmp; ++ struct irq_desc *desc = irq_get_desc_lock(irq, &flags, 0); ++ ++ if (!desc) ++ return; ++ ++ /* ++ * Warn when a driver sets the no autoenable flag on an already ++ * active interrupt. ++ */ ++ WARN_ON_ONCE(!desc->depth && (set & _IRQ_NOAUTOEN)); ++ ++ irq_settings_clr_and_set(desc, clr, set); ++ ++ trigger = irqd_get_trigger_type(&desc->irq_data); ++ ++ irqd_clear(&desc->irq_data, IRQD_NO_BALANCING | IRQD_PER_CPU | ++ IRQD_TRIGGER_MASK | IRQD_LEVEL | IRQD_MOVE_PCNTXT); ++ if (irq_settings_has_no_balance_set(desc)) ++ irqd_set(&desc->irq_data, IRQD_NO_BALANCING); ++ if (irq_settings_is_per_cpu(desc)) ++ irqd_set(&desc->irq_data, IRQD_PER_CPU); ++ if (irq_settings_can_move_pcntxt(desc)) ++ irqd_set(&desc->irq_data, IRQD_MOVE_PCNTXT); ++ if (irq_settings_is_level(desc)) ++ irqd_set(&desc->irq_data, IRQD_LEVEL); ++ ++ tmp = irq_settings_get_trigger_mask(desc); ++ if (tmp != IRQ_TYPE_NONE) ++ trigger = tmp; ++ ++ irqd_set(&desc->irq_data, trigger); ++ ++ irq_put_desc_unlock(desc, flags); ++} ++EXPORT_SYMBOL_GPL(irq_modify_status); ++ ++/** ++ * irq_cpu_online - Invoke all irq_cpu_online functions. ++ * ++ * Iterate through all irqs and invoke the chip.irq_cpu_online() ++ * for each. ++ */ ++void irq_cpu_online(void) ++{ ++ struct irq_desc *desc; ++ struct irq_chip *chip; ++ unsigned long flags; ++ unsigned int irq; ++ ++ for_each_active_irq(irq) { ++ desc = irq_to_desc(irq); ++ if (!desc) ++ continue; ++ ++ raw_spin_lock_irqsave(&desc->lock, flags); ++ ++ chip = irq_data_get_irq_chip(&desc->irq_data); ++ if (chip && chip->irq_cpu_online && ++ (!(chip->flags & IRQCHIP_ONOFFLINE_ENABLED) || ++ !irqd_irq_disabled(&desc->irq_data))) ++ chip->irq_cpu_online(&desc->irq_data); ++ ++ raw_spin_unlock_irqrestore(&desc->lock, flags); ++ } ++} ++ ++/** ++ * irq_cpu_offline - Invoke all irq_cpu_offline functions. ++ * ++ * Iterate through all irqs and invoke the chip.irq_cpu_offline() ++ * for each. ++ */ ++void irq_cpu_offline(void) ++{ ++ struct irq_desc *desc; ++ struct irq_chip *chip; ++ unsigned long flags; ++ unsigned int irq; ++ ++ for_each_active_irq(irq) { ++ desc = irq_to_desc(irq); ++ if (!desc) ++ continue; ++ ++ raw_spin_lock_irqsave(&desc->lock, flags); ++ ++ chip = irq_data_get_irq_chip(&desc->irq_data); ++ if (chip && chip->irq_cpu_offline && ++ (!(chip->flags & IRQCHIP_ONOFFLINE_ENABLED) || ++ !irqd_irq_disabled(&desc->irq_data))) ++ chip->irq_cpu_offline(&desc->irq_data); ++ ++ raw_spin_unlock_irqrestore(&desc->lock, flags); ++ } ++} ++ ++#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY ++ ++#ifdef CONFIG_IRQ_FASTEOI_HIERARCHY_HANDLERS ++/** ++ * handle_fasteoi_ack_irq - irq handler for edge hierarchy ++ * stacked on transparent controllers ++ * ++ * @desc: the interrupt description structure for this irq ++ * ++ * Like handle_fasteoi_irq(), but for use with hierarchy where ++ * the irq_chip also needs to have its ->irq_ack() function ++ * called. ++ */ ++void handle_fasteoi_ack_irq(struct irq_desc *desc) ++{ ++ struct irq_chip *chip = desc->irq_data.chip; ++ ++ raw_spin_lock(&desc->lock); ++ ++ if (!irq_may_run(desc)) ++ goto out; ++ ++ desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING); ++ ++ /* ++ * If its disabled or no action available ++ * then mask it and get out of here: ++ */ ++ if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) { ++ desc->istate |= IRQS_PENDING; ++ mask_irq(desc); ++ goto out; ++ } ++ ++ kstat_incr_irqs_this_cpu(desc); ++ if (desc->istate & IRQS_ONESHOT) ++ mask_irq(desc); ++ ++ /* Start handling the irq */ ++ desc->irq_data.chip->irq_ack(&desc->irq_data); ++ ++ preflow_handler(desc); ++ handle_irq_event(desc); ++ ++ cond_unmask_eoi_irq(desc, chip); ++ ++ raw_spin_unlock(&desc->lock); ++ return; ++out: ++ if (!(chip->flags & IRQCHIP_EOI_IF_HANDLED)) ++ chip->irq_eoi(&desc->irq_data); ++ raw_spin_unlock(&desc->lock); ++} ++EXPORT_SYMBOL_GPL(handle_fasteoi_ack_irq); ++ ++/** ++ * handle_fasteoi_mask_irq - irq handler for level hierarchy ++ * stacked on transparent controllers ++ * ++ * @desc: the interrupt description structure for this irq ++ * ++ * Like handle_fasteoi_irq(), but for use with hierarchy where ++ * the irq_chip also needs to have its ->irq_mask_ack() function ++ * called. ++ */ ++void handle_fasteoi_mask_irq(struct irq_desc *desc) ++{ ++ struct irq_chip *chip = desc->irq_data.chip; ++ ++ raw_spin_lock(&desc->lock); ++ mask_ack_irq(desc); ++ ++ if (!irq_may_run(desc)) ++ goto out; ++ ++ desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING); ++ ++ /* ++ * If its disabled or no action available ++ * then mask it and get out of here: ++ */ ++ if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) { ++ desc->istate |= IRQS_PENDING; ++ mask_irq(desc); ++ goto out; ++ } ++ ++ kstat_incr_irqs_this_cpu(desc); ++ if (desc->istate & IRQS_ONESHOT) ++ mask_irq(desc); ++ ++ preflow_handler(desc); ++ handle_irq_event(desc); ++ ++ cond_unmask_eoi_irq(desc, chip); ++ ++ raw_spin_unlock(&desc->lock); ++ return; ++out: ++ if (!(chip->flags & IRQCHIP_EOI_IF_HANDLED)) ++ chip->irq_eoi(&desc->irq_data); ++ raw_spin_unlock(&desc->lock); ++} ++EXPORT_SYMBOL_GPL(handle_fasteoi_mask_irq); ++ ++#endif /* CONFIG_IRQ_FASTEOI_HIERARCHY_HANDLERS */ ++ ++/** ++ * irq_chip_enable_parent - Enable the parent interrupt (defaults to unmask if ++ * NULL) ++ * @data: Pointer to interrupt specific data ++ */ ++void irq_chip_enable_parent(struct irq_data *data) ++{ ++ data = data->parent_data; ++ if (data->chip->irq_enable) ++ data->chip->irq_enable(data); ++ else ++ data->chip->irq_unmask(data); ++} ++EXPORT_SYMBOL_GPL(irq_chip_enable_parent); ++ ++/** ++ * irq_chip_disable_parent - Disable the parent interrupt (defaults to mask if ++ * NULL) ++ * @data: Pointer to interrupt specific data ++ */ ++void irq_chip_disable_parent(struct irq_data *data) ++{ ++ data = data->parent_data; ++ if (data->chip->irq_disable) ++ data->chip->irq_disable(data); ++ else ++ data->chip->irq_mask(data); ++} ++EXPORT_SYMBOL_GPL(irq_chip_disable_parent); ++ ++/** ++ * irq_chip_ack_parent - Acknowledge the parent interrupt ++ * @data: Pointer to interrupt specific data ++ */ ++void irq_chip_ack_parent(struct irq_data *data) ++{ ++ data = data->parent_data; ++ data->chip->irq_ack(data); ++} ++EXPORT_SYMBOL_GPL(irq_chip_ack_parent); ++ ++/** ++ * irq_chip_mask_parent - Mask the parent interrupt ++ * @data: Pointer to interrupt specific data ++ */ ++void irq_chip_mask_parent(struct irq_data *data) ++{ ++ data = data->parent_data; ++ data->chip->irq_mask(data); ++} ++EXPORT_SYMBOL_GPL(irq_chip_mask_parent); ++ ++/** ++ * irq_chip_unmask_parent - Unmask the parent interrupt ++ * @data: Pointer to interrupt specific data ++ */ ++void irq_chip_unmask_parent(struct irq_data *data) ++{ ++ data = data->parent_data; ++ data->chip->irq_unmask(data); ++} ++EXPORT_SYMBOL_GPL(irq_chip_unmask_parent); ++ ++/** ++ * irq_chip_eoi_parent - Invoke EOI on the parent interrupt ++ * @data: Pointer to interrupt specific data ++ */ ++void irq_chip_eoi_parent(struct irq_data *data) ++{ ++ data = data->parent_data; ++ data->chip->irq_eoi(data); ++} ++EXPORT_SYMBOL_GPL(irq_chip_eoi_parent); ++ ++/** ++ * irq_chip_set_affinity_parent - Set affinity on the parent interrupt ++ * @data: Pointer to interrupt specific data ++ * @dest: The affinity mask to set ++ * @force: Flag to enforce setting (disable online checks) ++ * ++ * Conditinal, as the underlying parent chip might not implement it. ++ */ ++int irq_chip_set_affinity_parent(struct irq_data *data, ++ const struct cpumask *dest, bool force) ++{ ++ data = data->parent_data; ++ if (data->chip->irq_set_affinity) ++ return data->chip->irq_set_affinity(data, dest, force); ++ ++ return -ENOSYS; ++} ++EXPORT_SYMBOL_GPL(irq_chip_set_affinity_parent); ++ ++/** ++ * irq_chip_set_type_parent - Set IRQ type on the parent interrupt ++ * @data: Pointer to interrupt specific data ++ * @type: IRQ_TYPE_{LEVEL,EDGE}_* value - see include/linux/irq.h ++ * ++ * Conditional, as the underlying parent chip might not implement it. ++ */ ++int irq_chip_set_type_parent(struct irq_data *data, unsigned int type) ++{ ++ data = data->parent_data; ++ ++ if (data->chip->irq_set_type) ++ return data->chip->irq_set_type(data, type); ++ ++ return -ENOSYS; ++} ++EXPORT_SYMBOL_GPL(irq_chip_set_type_parent); ++ ++/** ++ * irq_chip_retrigger_hierarchy - Retrigger an interrupt in hardware ++ * @data: Pointer to interrupt specific data ++ * ++ * Iterate through the domain hierarchy of the interrupt and check ++ * whether a hw retrigger function exists. If yes, invoke it. ++ */ ++int irq_chip_retrigger_hierarchy(struct irq_data *data) ++{ ++ for (data = data->parent_data; data; data = data->parent_data) ++ if (data->chip && data->chip->irq_retrigger) ++ return data->chip->irq_retrigger(data); ++ ++ return 0; ++} ++ ++/** ++ * irq_chip_set_vcpu_affinity_parent - Set vcpu affinity on the parent interrupt ++ * @data: Pointer to interrupt specific data ++ * @vcpu_info: The vcpu affinity information ++ */ ++int irq_chip_set_vcpu_affinity_parent(struct irq_data *data, void *vcpu_info) ++{ ++ data = data->parent_data; ++ if (data->chip->irq_set_vcpu_affinity) ++ return data->chip->irq_set_vcpu_affinity(data, vcpu_info); ++ ++ return -ENOSYS; ++} ++ ++/** ++ * irq_chip_set_wake_parent - Set/reset wake-up on the parent interrupt ++ * @data: Pointer to interrupt specific data ++ * @on: Whether to set or reset the wake-up capability of this irq ++ * ++ * Conditional, as the underlying parent chip might not implement it. ++ */ ++int irq_chip_set_wake_parent(struct irq_data *data, unsigned int on) ++{ ++ data = data->parent_data; ++ ++ if (data->chip->flags & IRQCHIP_SKIP_SET_WAKE) ++ return 0; ++ ++ if (data->chip->irq_set_wake) ++ return data->chip->irq_set_wake(data, on); ++ ++ return -ENOSYS; ++} ++#endif ++ ++/** ++ * irq_chip_compose_msi_msg - Componse msi message for a irq chip ++ * @data: Pointer to interrupt specific data ++ * @msg: Pointer to the MSI message ++ * ++ * For hierarchical domains we find the first chip in the hierarchy ++ * which implements the irq_compose_msi_msg callback. For non ++ * hierarchical we use the top level chip. ++ */ ++int irq_chip_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) ++{ ++ struct irq_data *pos = NULL; ++ ++#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY ++ for (; data; data = data->parent_data) ++#endif ++ if (data->chip && data->chip->irq_compose_msi_msg) ++ pos = data; ++ if (!pos) ++ return -ENOSYS; ++ ++ pos->chip->irq_compose_msi_msg(pos, msg); ++ ++ return 0; ++} ++ ++/** ++ * irq_chip_pm_get - Enable power for an IRQ chip ++ * @data: Pointer to interrupt specific data ++ * ++ * Enable the power to the IRQ chip referenced by the interrupt data ++ * structure. ++ */ ++int irq_chip_pm_get(struct irq_data *data) ++{ ++ int retval; ++ ++ if (IS_ENABLED(CONFIG_PM) && data->chip->parent_device) { ++ retval = pm_runtime_get_sync(data->chip->parent_device); ++ if (retval < 0) { ++ pm_runtime_put_noidle(data->chip->parent_device); ++ return retval; ++ } ++ } ++ ++ return 0; ++} ++ ++/** ++ * irq_chip_pm_put - Disable power for an IRQ chip ++ * @data: Pointer to interrupt specific data ++ * ++ * Disable the power to the IRQ chip referenced by the interrupt data ++ * structure, belongs. Note that power will only be disabled, once this ++ * function has been called for all IRQs that have called irq_chip_pm_get(). ++ */ ++int irq_chip_pm_put(struct irq_data *data) ++{ ++ int retval = 0; ++ ++ if (IS_ENABLED(CONFIG_PM) && data->chip->parent_device) ++ retval = pm_runtime_put(data->chip->parent_device); ++ ++ return (retval < 0) ? retval : 0; ++} +diff -uprN kernel/kernel/irq/dummychip.c kernel_new/kernel/irq/dummychip.c +--- kernel/kernel/irq/dummychip.c 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/kernel/irq/dummychip.c 2021-04-01 18:28:07.806863119 +0800 +@@ -43,7 +43,7 @@ struct irq_chip no_irq_chip = { + .irq_enable = noop, + .irq_disable = noop, + .irq_ack = ack_bad, +- .flags = IRQCHIP_SKIP_SET_WAKE, ++ .flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_PIPELINE_SAFE, + }; + + /* +@@ -59,6 +59,6 @@ struct irq_chip dummy_irq_chip = { + .irq_ack = noop, + .irq_mask = noop, + .irq_unmask = noop, +- .flags = IRQCHIP_SKIP_SET_WAKE, ++ .flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_PIPELINE_SAFE, + }; + EXPORT_SYMBOL_GPL(dummy_irq_chip); +diff -uprN kernel/kernel/irq/generic-chip.c kernel_new/kernel/irq/generic-chip.c +--- kernel/kernel/irq/generic-chip.c 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/kernel/irq/generic-chip.c 2021-04-01 18:28:07.806863119 +0800 +@@ -37,12 +37,13 @@ void irq_gc_mask_disable_reg(struct irq_ + { + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); + struct irq_chip_type *ct = irq_data_get_chip_type(d); ++ unsigned long flags; + u32 mask = d->mask; + +- irq_gc_lock(gc); ++ flags = irq_gc_lock(gc); + irq_reg_writel(gc, mask, ct->regs.disable); + *ct->mask_cache &= ~mask; +- irq_gc_unlock(gc); ++ irq_gc_unlock(gc, flags); + } + + /** +@@ -56,12 +57,13 @@ void irq_gc_mask_set_bit(struct irq_data + { + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); + struct irq_chip_type *ct = irq_data_get_chip_type(d); ++ unsigned long flags; + u32 mask = d->mask; + +- irq_gc_lock(gc); ++ flags = irq_gc_lock(gc); + *ct->mask_cache |= mask; + irq_reg_writel(gc, *ct->mask_cache, ct->regs.mask); +- irq_gc_unlock(gc); ++ irq_gc_unlock(gc, flags); + } + EXPORT_SYMBOL_GPL(irq_gc_mask_set_bit); + +@@ -76,12 +78,13 @@ void irq_gc_mask_clr_bit(struct irq_data + { + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); + struct irq_chip_type *ct = irq_data_get_chip_type(d); ++ unsigned long flags; + u32 mask = d->mask; + +- irq_gc_lock(gc); ++ flags = irq_gc_lock(gc); + *ct->mask_cache &= ~mask; + irq_reg_writel(gc, *ct->mask_cache, ct->regs.mask); +- irq_gc_unlock(gc); ++ irq_gc_unlock(gc, flags); + } + EXPORT_SYMBOL_GPL(irq_gc_mask_clr_bit); + +@@ -96,12 +99,13 @@ void irq_gc_unmask_enable_reg(struct irq + { + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); + struct irq_chip_type *ct = irq_data_get_chip_type(d); ++ unsigned long flags; + u32 mask = d->mask; + +- irq_gc_lock(gc); ++ flags = irq_gc_lock(gc); + irq_reg_writel(gc, mask, ct->regs.enable); + *ct->mask_cache |= mask; +- irq_gc_unlock(gc); ++ irq_gc_unlock(gc, flags); + } + + /** +@@ -112,11 +116,12 @@ void irq_gc_ack_set_bit(struct irq_data + { + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); + struct irq_chip_type *ct = irq_data_get_chip_type(d); ++ unsigned long flags; + u32 mask = d->mask; + +- irq_gc_lock(gc); ++ flags = irq_gc_lock(gc); + irq_reg_writel(gc, mask, ct->regs.ack); +- irq_gc_unlock(gc); ++ irq_gc_unlock(gc, flags); + } + EXPORT_SYMBOL_GPL(irq_gc_ack_set_bit); + +@@ -128,11 +133,12 @@ void irq_gc_ack_clr_bit(struct irq_data + { + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); + struct irq_chip_type *ct = irq_data_get_chip_type(d); ++ unsigned long flags; + u32 mask = ~d->mask; + +- irq_gc_lock(gc); ++ flags = irq_gc_lock(gc); + irq_reg_writel(gc, mask, ct->regs.ack); +- irq_gc_unlock(gc); ++ irq_gc_unlock(gc, flags); + } + + /** +@@ -151,13 +157,14 @@ void irq_gc_mask_disable_and_ack_set(str + { + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); + struct irq_chip_type *ct = irq_data_get_chip_type(d); ++ unsigned long flags; + u32 mask = d->mask; + +- irq_gc_lock(gc); ++ flags = irq_gc_lock(gc); + irq_reg_writel(gc, mask, ct->regs.disable); + *ct->mask_cache &= ~mask; + irq_reg_writel(gc, mask, ct->regs.ack); +- irq_gc_unlock(gc); ++ irq_gc_unlock(gc, flags); + } + + /** +@@ -168,11 +175,12 @@ void irq_gc_eoi(struct irq_data *d) + { + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); + struct irq_chip_type *ct = irq_data_get_chip_type(d); ++ unsigned long flags; + u32 mask = d->mask; + +- irq_gc_lock(gc); ++ flags = irq_gc_lock(gc); + irq_reg_writel(gc, mask, ct->regs.eoi); +- irq_gc_unlock(gc); ++ irq_gc_unlock(gc, flags); + } + + /** +@@ -187,17 +195,18 @@ void irq_gc_eoi(struct irq_data *d) + int irq_gc_set_wake(struct irq_data *d, unsigned int on) + { + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); ++ unsigned long flags; + u32 mask = d->mask; + + if (!(mask & gc->wake_enabled)) + return -EINVAL; + +- irq_gc_lock(gc); ++ flags = irq_gc_lock(gc); + if (on) + gc->wake_active |= mask; + else + gc->wake_active &= ~mask; +- irq_gc_unlock(gc); ++ irq_gc_unlock(gc, flags); + return 0; + } + +diff -uprN kernel/kernel/irq/internals.h kernel_new/kernel/irq/internals.h +--- kernel/kernel/irq/internals.h 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/kernel/irq/internals.h 2021-04-02 09:20:04.679709490 +0800 +@@ -62,6 +62,7 @@ enum { + IRQS_SUSPENDED = 0x00000800, + IRQS_TIMINGS = 0x00001000, + IRQS_NMI = 0x00002000, ++ IPIPE_IRQS_NEEDS_STARTUP= 0x80000000, + }; + + #include "debug.h" +diff -uprN kernel/kernel/irq/internals.h.orig kernel_new/kernel/irq/internals.h.orig +--- kernel/kernel/irq/internals.h.orig 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/kernel/irq/internals.h.orig 2021-04-01 18:28:07.806863119 +0800 +@@ -0,0 +1,492 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++/* ++ * IRQ subsystem internal functions and variables: ++ * ++ * Do not ever include this file from anything else than ++ * kernel/irq/. Do not even think about using any information outside ++ * of this file for your non core code. ++ */ ++#include ++#include ++#include ++#include ++ ++#ifdef CONFIG_SPARSE_IRQ ++# define IRQ_BITMAP_BITS (NR_IRQS + 8196) ++#else ++# define IRQ_BITMAP_BITS NR_IRQS ++#endif ++ ++#define istate core_internal_state__do_not_mess_with_it ++ ++extern bool noirqdebug; ++ ++extern struct irqaction chained_action; ++ ++/* ++ * Bits used by threaded handlers: ++ * IRQTF_RUNTHREAD - signals that the interrupt handler thread should run ++ * IRQTF_WARNED - warning "IRQ_WAKE_THREAD w/o thread_fn" has been printed ++ * IRQTF_AFFINITY - irq thread is requested to adjust affinity ++ * IRQTF_FORCED_THREAD - irq action is force threaded ++ */ ++enum { ++ IRQTF_RUNTHREAD, ++ IRQTF_WARNED, ++ IRQTF_AFFINITY, ++ IRQTF_FORCED_THREAD, ++}; ++ ++/* ++ * Bit masks for desc->core_internal_state__do_not_mess_with_it ++ * ++ * IRQS_AUTODETECT - autodetection in progress ++ * IRQS_SPURIOUS_DISABLED - was disabled due to spurious interrupt ++ * detection ++ * IRQS_POLL_INPROGRESS - polling in progress ++ * IRQS_ONESHOT - irq is not unmasked in primary handler ++ * IRQS_REPLAY - irq is replayed ++ * IRQS_WAITING - irq is waiting ++ * IRQS_PENDING - irq is pending and replayed later ++ * IRQS_SUSPENDED - irq is suspended ++ * IRQS_NMI - irq line is used to deliver NMIs ++ */ ++enum { ++ IRQS_AUTODETECT = 0x00000001, ++ IRQS_SPURIOUS_DISABLED = 0x00000002, ++ IRQS_POLL_INPROGRESS = 0x00000008, ++ IRQS_ONESHOT = 0x00000020, ++ IRQS_REPLAY = 0x00000040, ++ IRQS_WAITING = 0x00000080, ++ IRQS_PENDING = 0x00000200, ++ IRQS_SUSPENDED = 0x00000800, ++ IRQS_TIMINGS = 0x00001000, ++ IRQS_NMI = 0x00002000, ++}; ++ ++#include "debug.h" ++#include "settings.h" ++ ++extern int __irq_set_trigger(struct irq_desc *desc, unsigned long flags); ++extern void __disable_irq(struct irq_desc *desc); ++extern void __enable_irq(struct irq_desc *desc); ++ ++#define IRQ_RESEND true ++#define IRQ_NORESEND false ++ ++#define IRQ_START_FORCE true ++#define IRQ_START_COND false ++ ++extern int irq_activate(struct irq_desc *desc); ++extern int irq_activate_and_startup(struct irq_desc *desc, bool resend); ++extern int irq_startup(struct irq_desc *desc, bool resend, bool force); ++ ++extern void irq_shutdown(struct irq_desc *desc); ++extern void irq_shutdown_and_deactivate(struct irq_desc *desc); ++extern void irq_enable(struct irq_desc *desc); ++extern void irq_disable(struct irq_desc *desc); ++extern void irq_percpu_enable(struct irq_desc *desc, unsigned int cpu); ++extern void irq_percpu_disable(struct irq_desc *desc, unsigned int cpu); ++extern void mask_irq(struct irq_desc *desc); ++extern void unmask_irq(struct irq_desc *desc); ++extern void unmask_threaded_irq(struct irq_desc *desc); ++ ++#ifdef CONFIG_SPARSE_IRQ ++static inline void irq_mark_irq(unsigned int irq) { } ++#else ++extern void irq_mark_irq(unsigned int irq); ++#endif ++ ++extern int __irq_get_irqchip_state(struct irq_data *data, ++ enum irqchip_irq_state which, ++ bool *state); ++ ++extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr); ++ ++irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc, unsigned int *flags); ++irqreturn_t handle_irq_event_percpu(struct irq_desc *desc); ++irqreturn_t handle_irq_event(struct irq_desc *desc); ++ ++/* Resending of interrupts :*/ ++void check_irq_resend(struct irq_desc *desc); ++bool irq_wait_for_poll(struct irq_desc *desc); ++void __irq_wake_thread(struct irq_desc *desc, struct irqaction *action); ++ ++#ifdef CONFIG_PROC_FS ++extern void register_irq_proc(unsigned int irq, struct irq_desc *desc); ++extern void unregister_irq_proc(unsigned int irq, struct irq_desc *desc); ++extern void register_handler_proc(unsigned int irq, struct irqaction *action); ++extern void unregister_handler_proc(unsigned int irq, struct irqaction *action); ++#else ++static inline void register_irq_proc(unsigned int irq, struct irq_desc *desc) { } ++static inline void unregister_irq_proc(unsigned int irq, struct irq_desc *desc) { } ++static inline void register_handler_proc(unsigned int irq, ++ struct irqaction *action) { } ++static inline void unregister_handler_proc(unsigned int irq, ++ struct irqaction *action) { } ++#endif ++ ++extern bool irq_can_set_affinity_usr(unsigned int irq); ++ ++extern void irq_set_thread_affinity(struct irq_desc *desc); ++ ++extern int irq_do_set_affinity(struct irq_data *data, ++ const struct cpumask *dest, bool force); ++ ++#ifdef CONFIG_SMP ++extern int irq_setup_affinity(struct irq_desc *desc); ++#else ++static inline int irq_setup_affinity(struct irq_desc *desc) { return 0; } ++#endif ++ ++/* Inline functions for support of irq chips on slow busses */ ++static inline void chip_bus_lock(struct irq_desc *desc) ++{ ++ if (unlikely(desc->irq_data.chip->irq_bus_lock)) ++ desc->irq_data.chip->irq_bus_lock(&desc->irq_data); ++} ++ ++static inline void chip_bus_sync_unlock(struct irq_desc *desc) ++{ ++ if (unlikely(desc->irq_data.chip->irq_bus_sync_unlock)) ++ desc->irq_data.chip->irq_bus_sync_unlock(&desc->irq_data); ++} ++ ++#define _IRQ_DESC_CHECK (1 << 0) ++#define _IRQ_DESC_PERCPU (1 << 1) ++ ++#define IRQ_GET_DESC_CHECK_GLOBAL (_IRQ_DESC_CHECK) ++#define IRQ_GET_DESC_CHECK_PERCPU (_IRQ_DESC_CHECK | _IRQ_DESC_PERCPU) ++ ++#define for_each_action_of_desc(desc, act) \ ++ for (act = desc->action; act; act = act->next) ++ ++struct irq_desc * ++__irq_get_desc_lock(unsigned int irq, unsigned long *flags, bool bus, ++ unsigned int check); ++void __irq_put_desc_unlock(struct irq_desc *desc, unsigned long flags, bool bus); ++ ++static inline struct irq_desc * ++irq_get_desc_buslock(unsigned int irq, unsigned long *flags, unsigned int check) ++{ ++ return __irq_get_desc_lock(irq, flags, true, check); ++} ++ ++static inline void ++irq_put_desc_busunlock(struct irq_desc *desc, unsigned long flags) ++{ ++ __irq_put_desc_unlock(desc, flags, true); ++} ++ ++static inline struct irq_desc * ++irq_get_desc_lock(unsigned int irq, unsigned long *flags, unsigned int check) ++{ ++ return __irq_get_desc_lock(irq, flags, false, check); ++} ++ ++static inline void ++irq_put_desc_unlock(struct irq_desc *desc, unsigned long flags) ++{ ++ __irq_put_desc_unlock(desc, flags, false); ++} ++ ++#define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors) ++ ++static inline unsigned int irqd_get(struct irq_data *d) ++{ ++ return __irqd_to_state(d); ++} ++ ++/* ++ * Manipulation functions for irq_data.state ++ */ ++static inline void irqd_set_move_pending(struct irq_data *d) ++{ ++ __irqd_to_state(d) |= IRQD_SETAFFINITY_PENDING; ++} ++ ++static inline void irqd_clr_move_pending(struct irq_data *d) ++{ ++ __irqd_to_state(d) &= ~IRQD_SETAFFINITY_PENDING; ++} ++ ++static inline void irqd_set_managed_shutdown(struct irq_data *d) ++{ ++ __irqd_to_state(d) |= IRQD_MANAGED_SHUTDOWN; ++} ++ ++static inline void irqd_clr_managed_shutdown(struct irq_data *d) ++{ ++ __irqd_to_state(d) &= ~IRQD_MANAGED_SHUTDOWN; ++} ++ ++static inline void irqd_clear(struct irq_data *d, unsigned int mask) ++{ ++ __irqd_to_state(d) &= ~mask; ++} ++ ++static inline void irqd_set(struct irq_data *d, unsigned int mask) ++{ ++ __irqd_to_state(d) |= mask; ++} ++ ++static inline bool irqd_has_set(struct irq_data *d, unsigned int mask) ++{ ++ return __irqd_to_state(d) & mask; ++} ++ ++static inline void irq_state_set_disabled(struct irq_desc *desc) ++{ ++ irqd_set(&desc->irq_data, IRQD_IRQ_DISABLED); ++} ++ ++static inline void irq_state_set_masked(struct irq_desc *desc) ++{ ++ irqd_set(&desc->irq_data, IRQD_IRQ_MASKED); ++} ++ ++#undef __irqd_to_state ++ ++static inline void __kstat_incr_irqs_this_cpu(struct irq_desc *desc) ++{ ++ __this_cpu_inc(*desc->kstat_irqs); ++ __this_cpu_inc(kstat.irqs_sum); ++} ++ ++static inline void kstat_incr_irqs_this_cpu(struct irq_desc *desc) ++{ ++ __kstat_incr_irqs_this_cpu(desc); ++ desc->tot_count++; ++} ++ ++static inline int irq_desc_get_node(struct irq_desc *desc) ++{ ++ return irq_common_data_get_node(&desc->irq_common_data); ++} ++ ++static inline int irq_desc_is_chained(struct irq_desc *desc) ++{ ++ return (desc->action && desc->action == &chained_action); ++} ++ ++#ifdef CONFIG_PM_SLEEP ++bool irq_pm_check_wakeup(struct irq_desc *desc); ++void irq_pm_install_action(struct irq_desc *desc, struct irqaction *action); ++void irq_pm_remove_action(struct irq_desc *desc, struct irqaction *action); ++#else ++static inline bool irq_pm_check_wakeup(struct irq_desc *desc) { return false; } ++static inline void ++irq_pm_install_action(struct irq_desc *desc, struct irqaction *action) { } ++static inline void ++irq_pm_remove_action(struct irq_desc *desc, struct irqaction *action) { } ++#endif ++ ++#ifdef CONFIG_IRQ_TIMINGS ++ ++#define IRQ_TIMINGS_SHIFT 5 ++#define IRQ_TIMINGS_SIZE (1 << IRQ_TIMINGS_SHIFT) ++#define IRQ_TIMINGS_MASK (IRQ_TIMINGS_SIZE - 1) ++ ++/** ++ * struct irq_timings - irq timings storing structure ++ * @values: a circular buffer of u64 encoded values ++ * @count: the number of elements in the array ++ */ ++struct irq_timings { ++ u64 values[IRQ_TIMINGS_SIZE]; ++ int count; ++}; ++ ++DECLARE_PER_CPU(struct irq_timings, irq_timings); ++ ++extern void irq_timings_free(int irq); ++extern int irq_timings_alloc(int irq); ++ ++static inline void irq_remove_timings(struct irq_desc *desc) ++{ ++ desc->istate &= ~IRQS_TIMINGS; ++ ++ irq_timings_free(irq_desc_get_irq(desc)); ++} ++ ++static inline void irq_setup_timings(struct irq_desc *desc, struct irqaction *act) ++{ ++ int irq = irq_desc_get_irq(desc); ++ int ret; ++ ++ /* ++ * We don't need the measurement because the idle code already ++ * knows the next expiry event. ++ */ ++ if (act->flags & __IRQF_TIMER) ++ return; ++ ++ /* ++ * In case the timing allocation fails, we just want to warn, ++ * not fail, so letting the system boot anyway. ++ */ ++ ret = irq_timings_alloc(irq); ++ if (ret) { ++ pr_warn("Failed to allocate irq timing stats for irq%d (%d)", ++ irq, ret); ++ return; ++ } ++ ++ desc->istate |= IRQS_TIMINGS; ++} ++ ++extern void irq_timings_enable(void); ++extern void irq_timings_disable(void); ++ ++DECLARE_STATIC_KEY_FALSE(irq_timing_enabled); ++ ++/* ++ * The interrupt number and the timestamp are encoded into a single ++ * u64 variable to optimize the size. ++ * 48 bit time stamp and 16 bit IRQ number is way sufficient. ++ * Who cares an IRQ after 78 hours of idle time? ++ */ ++static inline u64 irq_timing_encode(u64 timestamp, int irq) ++{ ++ return (timestamp << 16) | irq; ++} ++ ++static inline int irq_timing_decode(u64 value, u64 *timestamp) ++{ ++ *timestamp = value >> 16; ++ return value & U16_MAX; ++} ++ ++/* ++ * The function record_irq_time is only called in one place in the ++ * interrupts handler. We want this function always inline so the code ++ * inside is embedded in the function and the static key branching ++ * code can act at the higher level. Without the explicit ++ * __always_inline we can end up with a function call and a small ++ * overhead in the hotpath for nothing. ++ */ ++static __always_inline void record_irq_time(struct irq_desc *desc) ++{ ++ if (!static_branch_likely(&irq_timing_enabled)) ++ return; ++ ++ if (desc->istate & IRQS_TIMINGS) { ++ struct irq_timings *timings = this_cpu_ptr(&irq_timings); ++ ++ timings->values[timings->count & IRQ_TIMINGS_MASK] = ++ irq_timing_encode(local_clock(), ++ irq_desc_get_irq(desc)); ++ ++ timings->count++; ++ } ++} ++#else ++static inline void irq_remove_timings(struct irq_desc *desc) {} ++static inline void irq_setup_timings(struct irq_desc *desc, ++ struct irqaction *act) {}; ++static inline void record_irq_time(struct irq_desc *desc) {} ++#endif /* CONFIG_IRQ_TIMINGS */ ++ ++ ++#ifdef CONFIG_GENERIC_IRQ_CHIP ++void irq_init_generic_chip(struct irq_chip_generic *gc, const char *name, ++ int num_ct, unsigned int irq_base, ++ void __iomem *reg_base, irq_flow_handler_t handler); ++#else ++static inline void ++irq_init_generic_chip(struct irq_chip_generic *gc, const char *name, ++ int num_ct, unsigned int irq_base, ++ void __iomem *reg_base, irq_flow_handler_t handler) { } ++#endif /* CONFIG_GENERIC_IRQ_CHIP */ ++ ++#ifdef CONFIG_GENERIC_PENDING_IRQ ++static inline bool irq_can_move_pcntxt(struct irq_data *data) ++{ ++ return irqd_can_move_in_process_context(data); ++} ++static inline bool irq_move_pending(struct irq_data *data) ++{ ++ return irqd_is_setaffinity_pending(data); ++} ++static inline void ++irq_copy_pending(struct irq_desc *desc, const struct cpumask *mask) ++{ ++ cpumask_copy(desc->pending_mask, mask); ++} ++static inline void ++irq_get_pending(struct cpumask *mask, struct irq_desc *desc) ++{ ++ cpumask_copy(mask, desc->pending_mask); ++} ++static inline struct cpumask *irq_desc_get_pending_mask(struct irq_desc *desc) ++{ ++ return desc->pending_mask; ++} ++bool irq_fixup_move_pending(struct irq_desc *desc, bool force_clear); ++#else /* CONFIG_GENERIC_PENDING_IRQ */ ++static inline bool irq_can_move_pcntxt(struct irq_data *data) ++{ ++ return true; ++} ++static inline bool irq_move_pending(struct irq_data *data) ++{ ++ return false; ++} ++static inline void ++irq_copy_pending(struct irq_desc *desc, const struct cpumask *mask) ++{ ++} ++static inline void ++irq_get_pending(struct cpumask *mask, struct irq_desc *desc) ++{ ++} ++static inline struct cpumask *irq_desc_get_pending_mask(struct irq_desc *desc) ++{ ++ return NULL; ++} ++static inline bool irq_fixup_move_pending(struct irq_desc *desc, bool fclear) ++{ ++ return false; ++} ++#endif /* !CONFIG_GENERIC_PENDING_IRQ */ ++ ++#if !defined(CONFIG_IRQ_DOMAIN) || !defined(CONFIG_IRQ_DOMAIN_HIERARCHY) ++static inline int irq_domain_activate_irq(struct irq_data *data, bool reserve) ++{ ++ irqd_set_activated(data); ++ return 0; ++} ++static inline void irq_domain_deactivate_irq(struct irq_data *data) ++{ ++ irqd_clr_activated(data); ++} ++#endif ++ ++#ifdef CONFIG_GENERIC_IRQ_DEBUGFS ++#include ++ ++void irq_add_debugfs_entry(unsigned int irq, struct irq_desc *desc); ++static inline void irq_remove_debugfs_entry(struct irq_desc *desc) ++{ ++ debugfs_remove(desc->debugfs_file); ++ kfree(desc->dev_name); ++} ++void irq_debugfs_copy_devname(int irq, struct device *dev); ++# ifdef CONFIG_IRQ_DOMAIN ++void irq_domain_debugfs_init(struct dentry *root); ++# else ++static inline void irq_domain_debugfs_init(struct dentry *root) ++{ ++} ++# endif ++#else /* CONFIG_GENERIC_IRQ_DEBUGFS */ ++static inline void irq_add_debugfs_entry(unsigned int irq, struct irq_desc *d) ++{ ++} ++static inline void irq_remove_debugfs_entry(struct irq_desc *d) ++{ ++} ++static inline void irq_debugfs_copy_devname(int irq, struct device *dev) ++{ ++} ++#endif /* CONFIG_GENERIC_IRQ_DEBUGFS */ +diff -uprN kernel/kernel/irq/internals.h.rej kernel_new/kernel/irq/internals.h.rej +--- kernel/kernel/irq/internals.h.rej 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/kernel/irq/internals.h.rej 2021-04-01 18:28:07.806863119 +0800 +@@ -0,0 +1,10 @@ ++--- kernel/irq/internals.h 2019-12-18 03:36:04.000000000 +0800 +++++ kernel/irq/internals.h 2021-03-22 09:21:43.218415421 +0800 ++@@ -60,6 +60,7 @@ enum { ++ IRQS_PENDING = 0x00000200, ++ IRQS_SUSPENDED = 0x00000800, ++ IRQS_TIMINGS = 0x00001000, +++ IPIPE_IRQS_NEEDS_STARTUP= 0x80000000, ++ }; ++ ++ #include "debug.h" +diff -uprN kernel/kernel/irq/irqdesc.c kernel_new/kernel/irq/irqdesc.c +--- kernel/kernel/irq/irqdesc.c 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/kernel/irq/irqdesc.c 2021-04-01 18:28:07.807863118 +0800 +@@ -125,6 +125,9 @@ static void desc_set_defaults(unsigned i + for_each_possible_cpu(cpu) + *per_cpu_ptr(desc->kstat_irqs, cpu) = 0; + desc_smp_init(desc, node, affinity); ++#ifdef CONFIG_IPIPE ++ desc->istate |= IPIPE_IRQS_NEEDS_STARTUP; ++#endif + } + + int nr_irqs = NR_IRQS; +@@ -573,11 +576,13 @@ int __init early_irq_init(void) + return arch_early_irq_init(); + } + ++#ifndef CONFIG_IPIPE + struct irq_desc *irq_to_desc(unsigned int irq) + { + return (irq < NR_IRQS) ? irq_desc + irq : NULL; + } + EXPORT_SYMBOL(irq_to_desc); ++#endif /* CONFIG_IPIPE */ + + static void free_desc(unsigned int irq) + { +diff -uprN kernel/kernel/irq/manage.c kernel_new/kernel/irq/manage.c +--- kernel/kernel/irq/manage.c 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/kernel/irq/manage.c 2021-04-01 18:28:07.809863116 +0800 +@@ -957,9 +957,14 @@ again: + + desc->threads_oneshot &= ~action->thread_mask; + ++#ifndef CONFIG_IPIPE + if (!desc->threads_oneshot && !irqd_irq_disabled(&desc->irq_data) && + irqd_irq_masked(&desc->irq_data)) + unmask_threaded_irq(desc); ++#else /* CONFIG_IPIPE */ ++ if (!desc->threads_oneshot && !irqd_irq_disabled(&desc->irq_data)) ++ desc->ipipe_end(desc); ++#endif /* CONFIG_IPIPE */ + + out_unlock: + raw_spin_unlock_irq(&desc->lock); +diff -uprN kernel/kernel/irq/manage.c.orig kernel_new/kernel/irq/manage.c.orig +--- kernel/kernel/irq/manage.c.orig 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/kernel/irq/manage.c.orig 2020-12-21 21:59:22.000000000 +0800 +@@ -0,0 +1,2748 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * Copyright (C) 1992, 1998-2006 Linus Torvalds, Ingo Molnar ++ * Copyright (C) 2005-2006 Thomas Gleixner ++ * ++ * This file contains driver APIs to the irq subsystem. ++ */ ++ ++#define pr_fmt(fmt) "genirq: " fmt ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "internals.h" ++ ++#ifdef CONFIG_IRQ_FORCED_THREADING ++__read_mostly bool force_irqthreads; ++EXPORT_SYMBOL_GPL(force_irqthreads); ++ ++static int __init setup_forced_irqthreads(char *arg) ++{ ++ force_irqthreads = true; ++ return 0; ++} ++early_param("threadirqs", setup_forced_irqthreads); ++#endif ++ ++static void __synchronize_hardirq(struct irq_desc *desc, bool sync_chip) ++{ ++ struct irq_data *irqd = irq_desc_get_irq_data(desc); ++ bool inprogress; ++ ++ do { ++ unsigned long flags; ++ ++ /* ++ * Wait until we're out of the critical section. This might ++ * give the wrong answer due to the lack of memory barriers. ++ */ ++ while (irqd_irq_inprogress(&desc->irq_data)) ++ cpu_relax(); ++ ++ /* Ok, that indicated we're done: double-check carefully. */ ++ raw_spin_lock_irqsave(&desc->lock, flags); ++ inprogress = irqd_irq_inprogress(&desc->irq_data); ++ ++ /* ++ * If requested and supported, check at the chip whether it ++ * is in flight at the hardware level, i.e. already pending ++ * in a CPU and waiting for service and acknowledge. ++ */ ++ if (!inprogress && sync_chip) { ++ /* ++ * Ignore the return code. inprogress is only updated ++ * when the chip supports it. ++ */ ++ __irq_get_irqchip_state(irqd, IRQCHIP_STATE_ACTIVE, ++ &inprogress); ++ } ++ raw_spin_unlock_irqrestore(&desc->lock, flags); ++ ++ /* Oops, that failed? */ ++ } while (inprogress); ++} ++ ++/** ++ * synchronize_hardirq - wait for pending hard IRQ handlers (on other CPUs) ++ * @irq: interrupt number to wait for ++ * ++ * This function waits for any pending hard IRQ handlers for this ++ * interrupt to complete before returning. If you use this ++ * function while holding a resource the IRQ handler may need you ++ * will deadlock. It does not take associated threaded handlers ++ * into account. ++ * ++ * Do not use this for shutdown scenarios where you must be sure ++ * that all parts (hardirq and threaded handler) have completed. ++ * ++ * Returns: false if a threaded handler is active. ++ * ++ * This function may be called - with care - from IRQ context. ++ * ++ * It does not check whether there is an interrupt in flight at the ++ * hardware level, but not serviced yet, as this might deadlock when ++ * called with interrupts disabled and the target CPU of the interrupt ++ * is the current CPU. ++ */ ++bool synchronize_hardirq(unsigned int irq) ++{ ++ struct irq_desc *desc = irq_to_desc(irq); ++ ++ if (desc) { ++ __synchronize_hardirq(desc, false); ++ return !atomic_read(&desc->threads_active); ++ } ++ ++ return true; ++} ++EXPORT_SYMBOL(synchronize_hardirq); ++ ++/** ++ * synchronize_irq - wait for pending IRQ handlers (on other CPUs) ++ * @irq: interrupt number to wait for ++ * ++ * This function waits for any pending IRQ handlers for this interrupt ++ * to complete before returning. If you use this function while ++ * holding a resource the IRQ handler may need you will deadlock. ++ * ++ * Can only be called from preemptible code as it might sleep when ++ * an interrupt thread is associated to @irq. ++ * ++ * It optionally makes sure (when the irq chip supports that method) ++ * that the interrupt is not pending in any CPU and waiting for ++ * service. ++ */ ++void synchronize_irq(unsigned int irq) ++{ ++ struct irq_desc *desc = irq_to_desc(irq); ++ ++ if (desc) { ++ __synchronize_hardirq(desc, true); ++ /* ++ * We made sure that no hardirq handler is ++ * running. Now verify that no threaded handlers are ++ * active. ++ */ ++ wait_event(desc->wait_for_threads, ++ !atomic_read(&desc->threads_active)); ++ } ++} ++EXPORT_SYMBOL(synchronize_irq); ++ ++#ifdef CONFIG_SMP ++cpumask_var_t irq_default_affinity; ++ ++static bool __irq_can_set_affinity(struct irq_desc *desc) ++{ ++ if (!desc || !irqd_can_balance(&desc->irq_data) || ++ !desc->irq_data.chip || !desc->irq_data.chip->irq_set_affinity) ++ return false; ++ return true; ++} ++ ++/** ++ * irq_can_set_affinity - Check if the affinity of a given irq can be set ++ * @irq: Interrupt to check ++ * ++ */ ++int irq_can_set_affinity(unsigned int irq) ++{ ++ return __irq_can_set_affinity(irq_to_desc(irq)); ++} ++ ++/** ++ * irq_can_set_affinity_usr - Check if affinity of a irq can be set from user space ++ * @irq: Interrupt to check ++ * ++ * Like irq_can_set_affinity() above, but additionally checks for the ++ * AFFINITY_MANAGED flag. ++ */ ++bool irq_can_set_affinity_usr(unsigned int irq) ++{ ++ struct irq_desc *desc = irq_to_desc(irq); ++ ++ return __irq_can_set_affinity(desc) && ++ !irqd_affinity_is_managed(&desc->irq_data); ++} ++ ++/** ++ * irq_set_thread_affinity - Notify irq threads to adjust affinity ++ * @desc: irq descriptor which has affitnity changed ++ * ++ * We just set IRQTF_AFFINITY and delegate the affinity setting ++ * to the interrupt thread itself. We can not call ++ * set_cpus_allowed_ptr() here as we hold desc->lock and this ++ * code can be called from hard interrupt context. ++ */ ++void irq_set_thread_affinity(struct irq_desc *desc) ++{ ++ struct irqaction *action; ++ ++ for_each_action_of_desc(desc, action) ++ if (action->thread) ++ set_bit(IRQTF_AFFINITY, &action->thread_flags); ++} ++ ++#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK ++static void irq_validate_effective_affinity(struct irq_data *data) ++{ ++ const struct cpumask *m = irq_data_get_effective_affinity_mask(data); ++ struct irq_chip *chip = irq_data_get_irq_chip(data); ++ ++ if (!cpumask_empty(m)) ++ return; ++ pr_warn_once("irq_chip %s did not update eff. affinity mask of irq %u\n", ++ chip->name, data->irq); ++} ++ ++static inline void irq_init_effective_affinity(struct irq_data *data, ++ const struct cpumask *mask) ++{ ++ cpumask_copy(irq_data_get_effective_affinity_mask(data), mask); ++} ++#else ++static inline void irq_validate_effective_affinity(struct irq_data *data) { } ++static inline void irq_init_effective_affinity(struct irq_data *data, ++ const struct cpumask *mask) { } ++#endif ++ ++int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask, ++ bool force) ++{ ++ struct irq_desc *desc = irq_data_to_desc(data); ++ struct irq_chip *chip = irq_data_get_irq_chip(data); ++ int ret; ++ ++ if (!chip || !chip->irq_set_affinity) ++ return -EINVAL; ++ ++ ret = chip->irq_set_affinity(data, mask, force); ++ switch (ret) { ++ case IRQ_SET_MASK_OK: ++ case IRQ_SET_MASK_OK_DONE: ++ cpumask_copy(desc->irq_common_data.affinity, mask); ++ case IRQ_SET_MASK_OK_NOCOPY: ++ irq_validate_effective_affinity(data); ++ irq_set_thread_affinity(desc); ++ ret = 0; ++ } ++ ++ return ret; ++} ++ ++#ifdef CONFIG_GENERIC_PENDING_IRQ ++static inline int irq_set_affinity_pending(struct irq_data *data, ++ const struct cpumask *dest) ++{ ++ struct irq_desc *desc = irq_data_to_desc(data); ++ ++ irqd_set_move_pending(data); ++ irq_copy_pending(desc, dest); ++ return 0; ++} ++#else ++static inline int irq_set_affinity_pending(struct irq_data *data, ++ const struct cpumask *dest) ++{ ++ return -EBUSY; ++} ++#endif ++ ++static int irq_try_set_affinity(struct irq_data *data, ++ const struct cpumask *dest, bool force) ++{ ++ int ret = irq_do_set_affinity(data, dest, force); ++ ++ /* ++ * In case that the underlying vector management is busy and the ++ * architecture supports the generic pending mechanism then utilize ++ * this to avoid returning an error to user space. ++ */ ++ if (ret == -EBUSY && !force) ++ ret = irq_set_affinity_pending(data, dest); ++ return ret; ++} ++ ++static bool irq_set_affinity_deactivated(struct irq_data *data, ++ const struct cpumask *mask, bool force) ++{ ++ struct irq_desc *desc = irq_data_to_desc(data); ++ ++ /* ++ * Handle irq chips which can handle affinity only in activated ++ * state correctly ++ * ++ * If the interrupt is not yet activated, just store the affinity ++ * mask and do not call the chip driver at all. On activation the ++ * driver has to make sure anyway that the interrupt is in a ++ * useable state so startup works. ++ */ ++ if (!IS_ENABLED(CONFIG_IRQ_DOMAIN_HIERARCHY) || ++ irqd_is_activated(data) || !irqd_affinity_on_activate(data)) ++ return false; ++ ++ cpumask_copy(desc->irq_common_data.affinity, mask); ++ irq_init_effective_affinity(data, mask); ++ irqd_set(data, IRQD_AFFINITY_SET); ++ return true; ++} ++ ++int irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask, ++ bool force) ++{ ++ struct irq_chip *chip = irq_data_get_irq_chip(data); ++ struct irq_desc *desc = irq_data_to_desc(data); ++ int ret = 0; ++ ++ if (!chip || !chip->irq_set_affinity) ++ return -EINVAL; ++ ++ if (irq_set_affinity_deactivated(data, mask, force)) ++ return 0; ++ ++ if (irq_can_move_pcntxt(data) && !irqd_is_setaffinity_pending(data)) { ++ ret = irq_try_set_affinity(data, mask, force); ++ } else { ++ irqd_set_move_pending(data); ++ irq_copy_pending(desc, mask); ++ } ++ ++ if (desc->affinity_notify) { ++ kref_get(&desc->affinity_notify->kref); ++ if (!schedule_work(&desc->affinity_notify->work)) { ++ /* Work was already scheduled, drop our extra ref */ ++ kref_put(&desc->affinity_notify->kref, ++ desc->affinity_notify->release); ++ } ++ } ++ irqd_set(data, IRQD_AFFINITY_SET); ++ ++ return ret; ++} ++ ++int __irq_set_affinity(unsigned int irq, const struct cpumask *mask, bool force) ++{ ++ struct irq_desc *desc = irq_to_desc(irq); ++ unsigned long flags; ++ int ret; ++ ++ if (!desc) ++ return -EINVAL; ++ ++ raw_spin_lock_irqsave(&desc->lock, flags); ++ ret = irq_set_affinity_locked(irq_desc_get_irq_data(desc), mask, force); ++ raw_spin_unlock_irqrestore(&desc->lock, flags); ++ return ret; ++} ++ ++int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m) ++{ ++ unsigned long flags; ++ struct irq_desc *desc = irq_get_desc_lock(irq, &flags, IRQ_GET_DESC_CHECK_GLOBAL); ++ ++ if (!desc) ++ return -EINVAL; ++ desc->affinity_hint = m; ++ irq_put_desc_unlock(desc, flags); ++ /* set the initial affinity to prevent every interrupt being on CPU0 */ ++ if (m) ++ __irq_set_affinity(irq, m, false); ++ return 0; ++} ++EXPORT_SYMBOL_GPL(irq_set_affinity_hint); ++ ++static void irq_affinity_notify(struct work_struct *work) ++{ ++ struct irq_affinity_notify *notify = ++ container_of(work, struct irq_affinity_notify, work); ++ struct irq_desc *desc = irq_to_desc(notify->irq); ++ cpumask_var_t cpumask; ++ unsigned long flags; ++ ++ if (!desc || !alloc_cpumask_var(&cpumask, GFP_KERNEL)) ++ goto out; ++ ++ raw_spin_lock_irqsave(&desc->lock, flags); ++ if (irq_move_pending(&desc->irq_data)) ++ irq_get_pending(cpumask, desc); ++ else ++ cpumask_copy(cpumask, desc->irq_common_data.affinity); ++ raw_spin_unlock_irqrestore(&desc->lock, flags); ++ ++ notify->notify(notify, cpumask); ++ ++ free_cpumask_var(cpumask); ++out: ++ kref_put(¬ify->kref, notify->release); ++} ++ ++/** ++ * irq_set_affinity_notifier - control notification of IRQ affinity changes ++ * @irq: Interrupt for which to enable/disable notification ++ * @notify: Context for notification, or %NULL to disable ++ * notification. Function pointers must be initialised; ++ * the other fields will be initialised by this function. ++ * ++ * Must be called in process context. Notification may only be enabled ++ * after the IRQ is allocated and must be disabled before the IRQ is ++ * freed using free_irq(). ++ */ ++int ++irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify) ++{ ++ struct irq_desc *desc = irq_to_desc(irq); ++ struct irq_affinity_notify *old_notify; ++ unsigned long flags; ++ ++ /* The release function is promised process context */ ++ might_sleep(); ++ ++ if (!desc || desc->istate & IRQS_NMI) ++ return -EINVAL; ++ ++ /* Complete initialisation of *notify */ ++ if (notify) { ++ notify->irq = irq; ++ kref_init(¬ify->kref); ++ INIT_WORK(¬ify->work, irq_affinity_notify); ++ } ++ ++ raw_spin_lock_irqsave(&desc->lock, flags); ++ old_notify = desc->affinity_notify; ++ desc->affinity_notify = notify; ++ raw_spin_unlock_irqrestore(&desc->lock, flags); ++ ++ if (old_notify) { ++ if (cancel_work_sync(&old_notify->work)) { ++ /* Pending work had a ref, put that one too */ ++ kref_put(&old_notify->kref, old_notify->release); ++ } ++ kref_put(&old_notify->kref, old_notify->release); ++ } ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(irq_set_affinity_notifier); ++ ++#ifndef CONFIG_AUTO_IRQ_AFFINITY ++/* ++ * Generic version of the affinity autoselector. ++ */ ++int irq_setup_affinity(struct irq_desc *desc) ++{ ++ struct cpumask *set = irq_default_affinity; ++ int ret, node = irq_desc_get_node(desc); ++ static DEFINE_RAW_SPINLOCK(mask_lock); ++ static struct cpumask mask; ++ ++ /* Excludes PER_CPU and NO_BALANCE interrupts */ ++ if (!__irq_can_set_affinity(desc)) ++ return 0; ++ ++ raw_spin_lock(&mask_lock); ++ /* ++ * Preserve the managed affinity setting and a userspace affinity ++ * setup, but make sure that one of the targets is online. ++ */ ++ if (irqd_affinity_is_managed(&desc->irq_data) || ++ irqd_has_set(&desc->irq_data, IRQD_AFFINITY_SET)) { ++ if (cpumask_intersects(desc->irq_common_data.affinity, ++ cpu_online_mask)) ++ set = desc->irq_common_data.affinity; ++ else ++ irqd_clear(&desc->irq_data, IRQD_AFFINITY_SET); ++ } ++ ++ cpumask_and(&mask, cpu_online_mask, set); ++ if (cpumask_empty(&mask)) ++ cpumask_copy(&mask, cpu_online_mask); ++ ++ if (node != NUMA_NO_NODE) { ++ const struct cpumask *nodemask = cpumask_of_node(node); ++ ++ /* make sure at least one of the cpus in nodemask is online */ ++ if (cpumask_intersects(&mask, nodemask)) ++ cpumask_and(&mask, &mask, nodemask); ++ } ++ ret = irq_do_set_affinity(&desc->irq_data, &mask, false); ++ raw_spin_unlock(&mask_lock); ++ return ret; ++} ++#else ++/* Wrapper for ALPHA specific affinity selector magic */ ++int irq_setup_affinity(struct irq_desc *desc) ++{ ++ return irq_select_affinity(irq_desc_get_irq(desc)); ++} ++#endif /* CONFIG_AUTO_IRQ_AFFINITY */ ++#endif /* CONFIG_SMP */ ++ ++ ++/** ++ * irq_set_vcpu_affinity - Set vcpu affinity for the interrupt ++ * @irq: interrupt number to set affinity ++ * @vcpu_info: vCPU specific data or pointer to a percpu array of vCPU ++ * specific data for percpu_devid interrupts ++ * ++ * This function uses the vCPU specific data to set the vCPU ++ * affinity for an irq. The vCPU specific data is passed from ++ * outside, such as KVM. One example code path is as below: ++ * KVM -> IOMMU -> irq_set_vcpu_affinity(). ++ */ ++int irq_set_vcpu_affinity(unsigned int irq, void *vcpu_info) ++{ ++ unsigned long flags; ++ struct irq_desc *desc = irq_get_desc_lock(irq, &flags, 0); ++ struct irq_data *data; ++ struct irq_chip *chip; ++ int ret = -ENOSYS; ++ ++ if (!desc) ++ return -EINVAL; ++ ++ data = irq_desc_get_irq_data(desc); ++ do { ++ chip = irq_data_get_irq_chip(data); ++ if (chip && chip->irq_set_vcpu_affinity) ++ break; ++#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY ++ data = data->parent_data; ++#else ++ data = NULL; ++#endif ++ } while (data); ++ ++ if (data) ++ ret = chip->irq_set_vcpu_affinity(data, vcpu_info); ++ irq_put_desc_unlock(desc, flags); ++ ++ return ret; ++} ++EXPORT_SYMBOL_GPL(irq_set_vcpu_affinity); ++ ++void __disable_irq(struct irq_desc *desc) ++{ ++ if (!desc->depth++) ++ irq_disable(desc); ++} ++ ++static int __disable_irq_nosync(unsigned int irq) ++{ ++ unsigned long flags; ++ struct irq_desc *desc = irq_get_desc_buslock(irq, &flags, IRQ_GET_DESC_CHECK_GLOBAL); ++ ++ if (!desc) ++ return -EINVAL; ++ __disable_irq(desc); ++ irq_put_desc_busunlock(desc, flags); ++ return 0; ++} ++ ++/** ++ * disable_irq_nosync - disable an irq without waiting ++ * @irq: Interrupt to disable ++ * ++ * Disable the selected interrupt line. Disables and Enables are ++ * nested. ++ * Unlike disable_irq(), this function does not ensure existing ++ * instances of the IRQ handler have completed before returning. ++ * ++ * This function may be called from IRQ context. ++ */ ++void disable_irq_nosync(unsigned int irq) ++{ ++ __disable_irq_nosync(irq); ++} ++EXPORT_SYMBOL(disable_irq_nosync); ++ ++/** ++ * disable_irq - disable an irq and wait for completion ++ * @irq: Interrupt to disable ++ * ++ * Disable the selected interrupt line. Enables and Disables are ++ * nested. ++ * This function waits for any pending IRQ handlers for this interrupt ++ * to complete before returning. If you use this function while ++ * holding a resource the IRQ handler may need you will deadlock. ++ * ++ * This function may be called - with care - from IRQ context. ++ */ ++void disable_irq(unsigned int irq) ++{ ++ if (!__disable_irq_nosync(irq)) ++ synchronize_irq(irq); ++} ++EXPORT_SYMBOL(disable_irq); ++ ++/** ++ * disable_hardirq - disables an irq and waits for hardirq completion ++ * @irq: Interrupt to disable ++ * ++ * Disable the selected interrupt line. Enables and Disables are ++ * nested. ++ * This function waits for any pending hard IRQ handlers for this ++ * interrupt to complete before returning. If you use this function while ++ * holding a resource the hard IRQ handler may need you will deadlock. ++ * ++ * When used to optimistically disable an interrupt from atomic context ++ * the return value must be checked. ++ * ++ * Returns: false if a threaded handler is active. ++ * ++ * This function may be called - with care - from IRQ context. ++ */ ++bool disable_hardirq(unsigned int irq) ++{ ++ if (!__disable_irq_nosync(irq)) ++ return synchronize_hardirq(irq); ++ ++ return false; ++} ++EXPORT_SYMBOL_GPL(disable_hardirq); ++ ++/** ++ * disable_nmi_nosync - disable an nmi without waiting ++ * @irq: Interrupt to disable ++ * ++ * Disable the selected interrupt line. Disables and enables are ++ * nested. ++ * The interrupt to disable must have been requested through request_nmi. ++ * Unlike disable_nmi(), this function does not ensure existing ++ * instances of the IRQ handler have completed before returning. ++ */ ++void disable_nmi_nosync(unsigned int irq) ++{ ++ disable_irq_nosync(irq); ++} ++ ++void __enable_irq(struct irq_desc *desc) ++{ ++ switch (desc->depth) { ++ case 0: ++ err_out: ++ WARN(1, KERN_WARNING "Unbalanced enable for IRQ %d\n", ++ irq_desc_get_irq(desc)); ++ break; ++ case 1: { ++ if (desc->istate & IRQS_SUSPENDED) ++ goto err_out; ++ /* Prevent probing on this irq: */ ++ irq_settings_set_noprobe(desc); ++ /* ++ * Call irq_startup() not irq_enable() here because the ++ * interrupt might be marked NOAUTOEN. So irq_startup() ++ * needs to be invoked when it gets enabled the first ++ * time. If it was already started up, then irq_startup() ++ * will invoke irq_enable() under the hood. ++ */ ++ irq_startup(desc, IRQ_RESEND, IRQ_START_FORCE); ++ break; ++ } ++ default: ++ desc->depth--; ++ } ++} ++ ++/** ++ * enable_irq - enable handling of an irq ++ * @irq: Interrupt to enable ++ * ++ * Undoes the effect of one call to disable_irq(). If this ++ * matches the last disable, processing of interrupts on this ++ * IRQ line is re-enabled. ++ * ++ * This function may be called from IRQ context only when ++ * desc->irq_data.chip->bus_lock and desc->chip->bus_sync_unlock are NULL ! ++ */ ++void enable_irq(unsigned int irq) ++{ ++ unsigned long flags; ++ struct irq_desc *desc = irq_get_desc_buslock(irq, &flags, IRQ_GET_DESC_CHECK_GLOBAL); ++ ++ if (!desc) ++ return; ++ if (WARN(!desc->irq_data.chip, ++ KERN_ERR "enable_irq before setup/request_irq: irq %u\n", irq)) ++ goto out; ++ ++ __enable_irq(desc); ++out: ++ irq_put_desc_busunlock(desc, flags); ++} ++EXPORT_SYMBOL(enable_irq); ++ ++/** ++ * enable_nmi - enable handling of an nmi ++ * @irq: Interrupt to enable ++ * ++ * The interrupt to enable must have been requested through request_nmi. ++ * Undoes the effect of one call to disable_nmi(). If this ++ * matches the last disable, processing of interrupts on this ++ * IRQ line is re-enabled. ++ */ ++void enable_nmi(unsigned int irq) ++{ ++ enable_irq(irq); ++} ++ ++static int set_irq_wake_real(unsigned int irq, unsigned int on) ++{ ++ struct irq_desc *desc = irq_to_desc(irq); ++ int ret = -ENXIO; ++ ++ if (irq_desc_get_chip(desc)->flags & IRQCHIP_SKIP_SET_WAKE) ++ return 0; ++ ++ if (desc->irq_data.chip->irq_set_wake) ++ ret = desc->irq_data.chip->irq_set_wake(&desc->irq_data, on); ++ ++ return ret; ++} ++ ++/** ++ * irq_set_irq_wake - control irq power management wakeup ++ * @irq: interrupt to control ++ * @on: enable/disable power management wakeup ++ * ++ * Enable/disable power management wakeup mode, which is ++ * disabled by default. Enables and disables must match, ++ * just as they match for non-wakeup mode support. ++ * ++ * Wakeup mode lets this IRQ wake the system from sleep ++ * states like "suspend to RAM". ++ */ ++int irq_set_irq_wake(unsigned int irq, unsigned int on) ++{ ++ unsigned long flags; ++ struct irq_desc *desc = irq_get_desc_buslock(irq, &flags, IRQ_GET_DESC_CHECK_GLOBAL); ++ int ret = 0; ++ ++ if (!desc) ++ return -EINVAL; ++ ++ /* Don't use NMIs as wake up interrupts please */ ++ if (desc->istate & IRQS_NMI) { ++ ret = -EINVAL; ++ goto out_unlock; ++ } ++ ++ /* wakeup-capable irqs can be shared between drivers that ++ * don't need to have the same sleep mode behaviors. ++ */ ++ if (on) { ++ if (desc->wake_depth++ == 0) { ++ ret = set_irq_wake_real(irq, on); ++ if (ret) ++ desc->wake_depth = 0; ++ else ++ irqd_set(&desc->irq_data, IRQD_WAKEUP_STATE); ++ } ++ } else { ++ if (desc->wake_depth == 0) { ++ WARN(1, "Unbalanced IRQ %d wake disable\n", irq); ++ } else if (--desc->wake_depth == 0) { ++ ret = set_irq_wake_real(irq, on); ++ if (ret) ++ desc->wake_depth = 1; ++ else ++ irqd_clear(&desc->irq_data, IRQD_WAKEUP_STATE); ++ } ++ } ++ ++out_unlock: ++ irq_put_desc_busunlock(desc, flags); ++ return ret; ++} ++EXPORT_SYMBOL(irq_set_irq_wake); ++ ++/* ++ * Internal function that tells the architecture code whether a ++ * particular irq has been exclusively allocated or is available ++ * for driver use. ++ */ ++int can_request_irq(unsigned int irq, unsigned long irqflags) ++{ ++ unsigned long flags; ++ struct irq_desc *desc = irq_get_desc_lock(irq, &flags, 0); ++ int canrequest = 0; ++ ++ if (!desc) ++ return 0; ++ ++ if (irq_settings_can_request(desc)) { ++ if (!desc->action || ++ irqflags & desc->action->flags & IRQF_SHARED) ++ canrequest = 1; ++ } ++ irq_put_desc_unlock(desc, flags); ++ return canrequest; ++} ++ ++int __irq_set_trigger(struct irq_desc *desc, unsigned long flags) ++{ ++ struct irq_chip *chip = desc->irq_data.chip; ++ int ret, unmask = 0; ++ ++ if (!chip || !chip->irq_set_type) { ++ /* ++ * IRQF_TRIGGER_* but the PIC does not support multiple ++ * flow-types? ++ */ ++ pr_debug("No set_type function for IRQ %d (%s)\n", ++ irq_desc_get_irq(desc), ++ chip ? (chip->name ? : "unknown") : "unknown"); ++ return 0; ++ } ++ ++ if (chip->flags & IRQCHIP_SET_TYPE_MASKED) { ++ if (!irqd_irq_masked(&desc->irq_data)) ++ mask_irq(desc); ++ if (!irqd_irq_disabled(&desc->irq_data)) ++ unmask = 1; ++ } ++ ++ /* Mask all flags except trigger mode */ ++ flags &= IRQ_TYPE_SENSE_MASK; ++ ret = chip->irq_set_type(&desc->irq_data, flags); ++ ++ switch (ret) { ++ case IRQ_SET_MASK_OK: ++ case IRQ_SET_MASK_OK_DONE: ++ irqd_clear(&desc->irq_data, IRQD_TRIGGER_MASK); ++ irqd_set(&desc->irq_data, flags); ++ ++ case IRQ_SET_MASK_OK_NOCOPY: ++ flags = irqd_get_trigger_type(&desc->irq_data); ++ irq_settings_set_trigger_mask(desc, flags); ++ irqd_clear(&desc->irq_data, IRQD_LEVEL); ++ irq_settings_clr_level(desc); ++ if (flags & IRQ_TYPE_LEVEL_MASK) { ++ irq_settings_set_level(desc); ++ irqd_set(&desc->irq_data, IRQD_LEVEL); ++ } ++ ++ ret = 0; ++ break; ++ default: ++ pr_err("Setting trigger mode %lu for irq %u failed (%pF)\n", ++ flags, irq_desc_get_irq(desc), chip->irq_set_type); ++ } ++ if (unmask) ++ unmask_irq(desc); ++ return ret; ++} ++ ++#ifdef CONFIG_HARDIRQS_SW_RESEND ++int irq_set_parent(int irq, int parent_irq) ++{ ++ unsigned long flags; ++ struct irq_desc *desc = irq_get_desc_lock(irq, &flags, 0); ++ ++ if (!desc) ++ return -EINVAL; ++ ++ desc->parent_irq = parent_irq; ++ ++ irq_put_desc_unlock(desc, flags); ++ return 0; ++} ++EXPORT_SYMBOL_GPL(irq_set_parent); ++#endif ++ ++/* ++ * Default primary interrupt handler for threaded interrupts. Is ++ * assigned as primary handler when request_threaded_irq is called ++ * with handler == NULL. Useful for oneshot interrupts. ++ */ ++static irqreturn_t irq_default_primary_handler(int irq, void *dev_id) ++{ ++ return IRQ_WAKE_THREAD; ++} ++ ++/* ++ * Primary handler for nested threaded interrupts. Should never be ++ * called. ++ */ ++static irqreturn_t irq_nested_primary_handler(int irq, void *dev_id) ++{ ++ WARN(1, "Primary handler called for nested irq %d\n", irq); ++ return IRQ_NONE; ++} ++ ++static irqreturn_t irq_forced_secondary_handler(int irq, void *dev_id) ++{ ++ WARN(1, "Secondary action handler called for irq %d\n", irq); ++ return IRQ_NONE; ++} ++ ++static int irq_wait_for_interrupt(struct irqaction *action) ++{ ++ for (;;) { ++ set_current_state(TASK_INTERRUPTIBLE); ++ ++ if (kthread_should_stop()) { ++ /* may need to run one last time */ ++ if (test_and_clear_bit(IRQTF_RUNTHREAD, ++ &action->thread_flags)) { ++ __set_current_state(TASK_RUNNING); ++ return 0; ++ } ++ __set_current_state(TASK_RUNNING); ++ return -1; ++ } ++ ++ if (test_and_clear_bit(IRQTF_RUNTHREAD, ++ &action->thread_flags)) { ++ __set_current_state(TASK_RUNNING); ++ return 0; ++ } ++ schedule(); ++ } ++} ++ ++/* ++ * Oneshot interrupts keep the irq line masked until the threaded ++ * handler finished. unmask if the interrupt has not been disabled and ++ * is marked MASKED. ++ */ ++static void irq_finalize_oneshot(struct irq_desc *desc, ++ struct irqaction *action) ++{ ++ if (!(desc->istate & IRQS_ONESHOT) || ++ action->handler == irq_forced_secondary_handler) ++ return; ++again: ++ chip_bus_lock(desc); ++ raw_spin_lock_irq(&desc->lock); ++ ++ /* ++ * Implausible though it may be we need to protect us against ++ * the following scenario: ++ * ++ * The thread is faster done than the hard interrupt handler ++ * on the other CPU. If we unmask the irq line then the ++ * interrupt can come in again and masks the line, leaves due ++ * to IRQS_INPROGRESS and the irq line is masked forever. ++ * ++ * This also serializes the state of shared oneshot handlers ++ * versus "desc->threads_onehsot |= action->thread_mask;" in ++ * irq_wake_thread(). See the comment there which explains the ++ * serialization. ++ */ ++ if (unlikely(irqd_irq_inprogress(&desc->irq_data))) { ++ raw_spin_unlock_irq(&desc->lock); ++ chip_bus_sync_unlock(desc); ++ cpu_relax(); ++ goto again; ++ } ++ ++ /* ++ * Now check again, whether the thread should run. Otherwise ++ * we would clear the threads_oneshot bit of this thread which ++ * was just set. ++ */ ++ if (test_bit(IRQTF_RUNTHREAD, &action->thread_flags)) ++ goto out_unlock; ++ ++ desc->threads_oneshot &= ~action->thread_mask; ++ ++ if (!desc->threads_oneshot && !irqd_irq_disabled(&desc->irq_data) && ++ irqd_irq_masked(&desc->irq_data)) ++ unmask_threaded_irq(desc); ++ ++out_unlock: ++ raw_spin_unlock_irq(&desc->lock); ++ chip_bus_sync_unlock(desc); ++} ++ ++#ifdef CONFIG_SMP ++/* ++ * Check whether we need to change the affinity of the interrupt thread. ++ */ ++static void ++irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) ++{ ++ cpumask_var_t mask; ++ bool valid = true; ++ ++ if (!test_and_clear_bit(IRQTF_AFFINITY, &action->thread_flags)) ++ return; ++ ++ /* ++ * In case we are out of memory we set IRQTF_AFFINITY again and ++ * try again next time ++ */ ++ if (!alloc_cpumask_var(&mask, GFP_KERNEL)) { ++ set_bit(IRQTF_AFFINITY, &action->thread_flags); ++ return; ++ } ++ ++ raw_spin_lock_irq(&desc->lock); ++ /* ++ * This code is triggered unconditionally. Check the affinity ++ * mask pointer. For CPU_MASK_OFFSTACK=n this is optimized out. ++ */ ++ if (cpumask_available(desc->irq_common_data.affinity)) { ++ const struct cpumask *m; ++ ++ m = irq_data_get_effective_affinity_mask(&desc->irq_data); ++ cpumask_copy(mask, m); ++ } else { ++ valid = false; ++ } ++ raw_spin_unlock_irq(&desc->lock); ++ ++ if (valid) ++ set_cpus_allowed_ptr(current, mask); ++ free_cpumask_var(mask); ++} ++#else ++static inline void ++irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) { } ++#endif ++ ++/* ++ * Interrupts which are not explicitely requested as threaded ++ * interrupts rely on the implicit bh/preempt disable of the hard irq ++ * context. So we need to disable bh here to avoid deadlocks and other ++ * side effects. ++ */ ++static irqreturn_t ++irq_forced_thread_fn(struct irq_desc *desc, struct irqaction *action) ++{ ++ irqreturn_t ret; ++ ++ local_bh_disable(); ++ ret = action->thread_fn(action->irq, action->dev_id); ++ if (ret == IRQ_HANDLED) ++ atomic_inc(&desc->threads_handled); ++ ++ irq_finalize_oneshot(desc, action); ++ local_bh_enable(); ++ return ret; ++} ++ ++/* ++ * Interrupts explicitly requested as threaded interrupts want to be ++ * preemtible - many of them need to sleep and wait for slow busses to ++ * complete. ++ */ ++static irqreturn_t irq_thread_fn(struct irq_desc *desc, ++ struct irqaction *action) ++{ ++ irqreturn_t ret; ++ ++ ret = action->thread_fn(action->irq, action->dev_id); ++ if (ret == IRQ_HANDLED) ++ atomic_inc(&desc->threads_handled); ++ ++ irq_finalize_oneshot(desc, action); ++ return ret; ++} ++ ++static void wake_threads_waitq(struct irq_desc *desc) ++{ ++ if (atomic_dec_and_test(&desc->threads_active)) ++ wake_up(&desc->wait_for_threads); ++} ++ ++static void irq_thread_dtor(struct callback_head *unused) ++{ ++ struct task_struct *tsk = current; ++ struct irq_desc *desc; ++ struct irqaction *action; ++ ++ if (WARN_ON_ONCE(!(current->flags & PF_EXITING))) ++ return; ++ ++ action = kthread_data(tsk); ++ ++ pr_err("exiting task \"%s\" (%d) is an active IRQ thread (irq %d)\n", ++ tsk->comm, tsk->pid, action->irq); ++ ++ ++ desc = irq_to_desc(action->irq); ++ /* ++ * If IRQTF_RUNTHREAD is set, we need to decrement ++ * desc->threads_active and wake possible waiters. ++ */ ++ if (test_and_clear_bit(IRQTF_RUNTHREAD, &action->thread_flags)) ++ wake_threads_waitq(desc); ++ ++ /* Prevent a stale desc->threads_oneshot */ ++ irq_finalize_oneshot(desc, action); ++} ++ ++static void irq_wake_secondary(struct irq_desc *desc, struct irqaction *action) ++{ ++ struct irqaction *secondary = action->secondary; ++ ++ if (WARN_ON_ONCE(!secondary)) ++ return; ++ ++ raw_spin_lock_irq(&desc->lock); ++ __irq_wake_thread(desc, secondary); ++ raw_spin_unlock_irq(&desc->lock); ++} ++ ++/* ++ * Interrupt handler thread ++ */ ++static int irq_thread(void *data) ++{ ++ struct callback_head on_exit_work; ++ struct irqaction *action = data; ++ struct irq_desc *desc = irq_to_desc(action->irq); ++ irqreturn_t (*handler_fn)(struct irq_desc *desc, ++ struct irqaction *action); ++ ++ if (force_irqthreads && test_bit(IRQTF_FORCED_THREAD, ++ &action->thread_flags)) ++ handler_fn = irq_forced_thread_fn; ++ else ++ handler_fn = irq_thread_fn; ++ ++ init_task_work(&on_exit_work, irq_thread_dtor); ++ task_work_add(current, &on_exit_work, false); ++ ++ irq_thread_check_affinity(desc, action); ++ ++ while (!irq_wait_for_interrupt(action)) { ++ irqreturn_t action_ret; ++ ++ irq_thread_check_affinity(desc, action); ++ ++ action_ret = handler_fn(desc, action); ++ if (action_ret == IRQ_WAKE_THREAD) ++ irq_wake_secondary(desc, action); ++ ++ wake_threads_waitq(desc); ++ } ++ ++ /* ++ * This is the regular exit path. __free_irq() is stopping the ++ * thread via kthread_stop() after calling ++ * synchronize_hardirq(). So neither IRQTF_RUNTHREAD nor the ++ * oneshot mask bit can be set. ++ */ ++ task_work_cancel(current, irq_thread_dtor); ++ return 0; ++} ++ ++/** ++ * irq_wake_thread - wake the irq thread for the action identified by dev_id ++ * @irq: Interrupt line ++ * @dev_id: Device identity for which the thread should be woken ++ * ++ */ ++void irq_wake_thread(unsigned int irq, void *dev_id) ++{ ++ struct irq_desc *desc = irq_to_desc(irq); ++ struct irqaction *action; ++ unsigned long flags; ++ ++ if (!desc || WARN_ON(irq_settings_is_per_cpu_devid(desc))) ++ return; ++ ++ raw_spin_lock_irqsave(&desc->lock, flags); ++ for_each_action_of_desc(desc, action) { ++ if (action->dev_id == dev_id) { ++ if (action->thread) ++ __irq_wake_thread(desc, action); ++ break; ++ } ++ } ++ raw_spin_unlock_irqrestore(&desc->lock, flags); ++} ++EXPORT_SYMBOL_GPL(irq_wake_thread); ++ ++static int irq_setup_forced_threading(struct irqaction *new) ++{ ++ if (!force_irqthreads) ++ return 0; ++ if (new->flags & (IRQF_NO_THREAD | IRQF_PERCPU | IRQF_ONESHOT)) ++ return 0; ++ ++ /* ++ * No further action required for interrupts which are requested as ++ * threaded interrupts already ++ */ ++ if (new->handler == irq_default_primary_handler) ++ return 0; ++ ++ new->flags |= IRQF_ONESHOT; ++ ++ /* ++ * Handle the case where we have a real primary handler and a ++ * thread handler. We force thread them as well by creating a ++ * secondary action. ++ */ ++ if (new->handler && new->thread_fn) { ++ /* Allocate the secondary action */ ++ new->secondary = kzalloc(sizeof(struct irqaction), GFP_KERNEL); ++ if (!new->secondary) ++ return -ENOMEM; ++ new->secondary->handler = irq_forced_secondary_handler; ++ new->secondary->thread_fn = new->thread_fn; ++ new->secondary->dev_id = new->dev_id; ++ new->secondary->irq = new->irq; ++ new->secondary->name = new->name; ++ } ++ /* Deal with the primary handler */ ++ set_bit(IRQTF_FORCED_THREAD, &new->thread_flags); ++ new->thread_fn = new->handler; ++ new->handler = irq_default_primary_handler; ++ return 0; ++} ++ ++static int irq_request_resources(struct irq_desc *desc) ++{ ++ struct irq_data *d = &desc->irq_data; ++ struct irq_chip *c = d->chip; ++ ++ return c->irq_request_resources ? c->irq_request_resources(d) : 0; ++} ++ ++static void irq_release_resources(struct irq_desc *desc) ++{ ++ struct irq_data *d = &desc->irq_data; ++ struct irq_chip *c = d->chip; ++ ++ if (c->irq_release_resources) ++ c->irq_release_resources(d); ++} ++ ++static bool irq_supports_nmi(struct irq_desc *desc) ++{ ++ struct irq_data *d = irq_desc_get_irq_data(desc); ++ ++#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY ++ /* Only IRQs directly managed by the root irqchip can be set as NMI */ ++ if (d->parent_data) ++ return false; ++#endif ++ /* Don't support NMIs for chips behind a slow bus */ ++ if (d->chip->irq_bus_lock || d->chip->irq_bus_sync_unlock) ++ return false; ++ ++ return d->chip->flags & IRQCHIP_SUPPORTS_NMI; ++} ++ ++static int irq_nmi_setup(struct irq_desc *desc) ++{ ++ struct irq_data *d = irq_desc_get_irq_data(desc); ++ struct irq_chip *c = d->chip; ++ ++ return c->irq_nmi_setup ? c->irq_nmi_setup(d) : -EINVAL; ++} ++ ++static void irq_nmi_teardown(struct irq_desc *desc) ++{ ++ struct irq_data *d = irq_desc_get_irq_data(desc); ++ struct irq_chip *c = d->chip; ++ ++ if (c->irq_nmi_teardown) ++ c->irq_nmi_teardown(d); ++} ++ ++static int ++setup_irq_thread(struct irqaction *new, unsigned int irq, bool secondary) ++{ ++ struct task_struct *t; ++ struct sched_param param = { ++ .sched_priority = MAX_USER_RT_PRIO/2, ++ }; ++ ++ if (!secondary) { ++ t = kthread_create(irq_thread, new, "irq/%d-%s", irq, ++ new->name); ++ } else { ++ t = kthread_create(irq_thread, new, "irq/%d-s-%s", irq, ++ new->name); ++ param.sched_priority -= 1; ++ } ++ ++ if (IS_ERR(t)) ++ return PTR_ERR(t); ++ ++ sched_setscheduler_nocheck(t, SCHED_FIFO, ¶m); ++ ++ /* ++ * We keep the reference to the task struct even if ++ * the thread dies to avoid that the interrupt code ++ * references an already freed task_struct. ++ */ ++ get_task_struct(t); ++ new->thread = t; ++ /* ++ * Tell the thread to set its affinity. This is ++ * important for shared interrupt handlers as we do ++ * not invoke setup_affinity() for the secondary ++ * handlers as everything is already set up. Even for ++ * interrupts marked with IRQF_NO_BALANCE this is ++ * correct as we want the thread to move to the cpu(s) ++ * on which the requesting code placed the interrupt. ++ */ ++ set_bit(IRQTF_AFFINITY, &new->thread_flags); ++ return 0; ++} ++ ++/* ++ * Internal function to register an irqaction - typically used to ++ * allocate special interrupts that are part of the architecture. ++ * ++ * Locking rules: ++ * ++ * desc->request_mutex Provides serialization against a concurrent free_irq() ++ * chip_bus_lock Provides serialization for slow bus operations ++ * desc->lock Provides serialization against hard interrupts ++ * ++ * chip_bus_lock and desc->lock are sufficient for all other management and ++ * interrupt related functions. desc->request_mutex solely serializes ++ * request/free_irq(). ++ */ ++static int ++__setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) ++{ ++ struct irqaction *old, **old_ptr; ++ unsigned long flags, thread_mask = 0; ++ int ret, nested, shared = 0; ++ ++ if (!desc) ++ return -EINVAL; ++ ++ if (desc->irq_data.chip == &no_irq_chip) ++ return -ENOSYS; ++ if (!try_module_get(desc->owner)) ++ return -ENODEV; ++ ++ new->irq = irq; ++ ++ /* ++ * If the trigger type is not specified by the caller, ++ * then use the default for this interrupt. ++ */ ++ if (!(new->flags & IRQF_TRIGGER_MASK)) ++ new->flags |= irqd_get_trigger_type(&desc->irq_data); ++ ++ /* ++ * Check whether the interrupt nests into another interrupt ++ * thread. ++ */ ++ nested = irq_settings_is_nested_thread(desc); ++ if (nested) { ++ if (!new->thread_fn) { ++ ret = -EINVAL; ++ goto out_mput; ++ } ++ /* ++ * Replace the primary handler which was provided from ++ * the driver for non nested interrupt handling by the ++ * dummy function which warns when called. ++ */ ++ new->handler = irq_nested_primary_handler; ++ } else { ++ if (irq_settings_can_thread(desc)) { ++ ret = irq_setup_forced_threading(new); ++ if (ret) ++ goto out_mput; ++ } ++ } ++ ++ /* ++ * Create a handler thread when a thread function is supplied ++ * and the interrupt does not nest into another interrupt ++ * thread. ++ */ ++ if (new->thread_fn && !nested) { ++ ret = setup_irq_thread(new, irq, false); ++ if (ret) ++ goto out_mput; ++ if (new->secondary) { ++ ret = setup_irq_thread(new->secondary, irq, true); ++ if (ret) ++ goto out_thread; ++ } ++ } ++ ++ /* ++ * Drivers are often written to work w/o knowledge about the ++ * underlying irq chip implementation, so a request for a ++ * threaded irq without a primary hard irq context handler ++ * requires the ONESHOT flag to be set. Some irq chips like ++ * MSI based interrupts are per se one shot safe. Check the ++ * chip flags, so we can avoid the unmask dance at the end of ++ * the threaded handler for those. ++ */ ++ if (desc->irq_data.chip->flags & IRQCHIP_ONESHOT_SAFE) ++ new->flags &= ~IRQF_ONESHOT; ++ ++ /* ++ * Protects against a concurrent __free_irq() call which might wait ++ * for synchronize_hardirq() to complete without holding the optional ++ * chip bus lock and desc->lock. Also protects against handing out ++ * a recycled oneshot thread_mask bit while it's still in use by ++ * its previous owner. ++ */ ++ mutex_lock(&desc->request_mutex); ++ ++ /* ++ * Acquire bus lock as the irq_request_resources() callback below ++ * might rely on the serialization or the magic power management ++ * functions which are abusing the irq_bus_lock() callback, ++ */ ++ chip_bus_lock(desc); ++ ++ /* First installed action requests resources. */ ++ if (!desc->action) { ++ ret = irq_request_resources(desc); ++ if (ret) { ++ pr_err("Failed to request resources for %s (irq %d) on irqchip %s\n", ++ new->name, irq, desc->irq_data.chip->name); ++ goto out_bus_unlock; ++ } ++ } ++ ++ /* ++ * The following block of code has to be executed atomically ++ * protected against a concurrent interrupt and any of the other ++ * management calls which are not serialized via ++ * desc->request_mutex or the optional bus lock. ++ */ ++ raw_spin_lock_irqsave(&desc->lock, flags); ++ old_ptr = &desc->action; ++ old = *old_ptr; ++ if (old) { ++ /* ++ * Can't share interrupts unless both agree to and are ++ * the same type (level, edge, polarity). So both flag ++ * fields must have IRQF_SHARED set and the bits which ++ * set the trigger type must match. Also all must ++ * agree on ONESHOT. ++ * Interrupt lines used for NMIs cannot be shared. ++ */ ++ unsigned int oldtype; ++ ++ if (desc->istate & IRQS_NMI) { ++ pr_err("Invalid attempt to share NMI for %s (irq %d) on irqchip %s.\n", ++ new->name, irq, desc->irq_data.chip->name); ++ ret = -EINVAL; ++ goto out_unlock; ++ } ++ ++ /* ++ * If nobody did set the configuration before, inherit ++ * the one provided by the requester. ++ */ ++ if (irqd_trigger_type_was_set(&desc->irq_data)) { ++ oldtype = irqd_get_trigger_type(&desc->irq_data); ++ } else { ++ oldtype = new->flags & IRQF_TRIGGER_MASK; ++ irqd_set_trigger_type(&desc->irq_data, oldtype); ++ } ++ ++ if (!((old->flags & new->flags) & IRQF_SHARED) || ++ (oldtype != (new->flags & IRQF_TRIGGER_MASK)) || ++ ((old->flags ^ new->flags) & IRQF_ONESHOT)) ++ goto mismatch; ++ ++ /* All handlers must agree on per-cpuness */ ++ if ((old->flags & IRQF_PERCPU) != ++ (new->flags & IRQF_PERCPU)) ++ goto mismatch; ++ ++ /* add new interrupt at end of irq queue */ ++ do { ++ /* ++ * Or all existing action->thread_mask bits, ++ * so we can find the next zero bit for this ++ * new action. ++ */ ++ thread_mask |= old->thread_mask; ++ old_ptr = &old->next; ++ old = *old_ptr; ++ } while (old); ++ shared = 1; ++ } ++ ++ /* ++ * Setup the thread mask for this irqaction for ONESHOT. For ++ * !ONESHOT irqs the thread mask is 0 so we can avoid a ++ * conditional in irq_wake_thread(). ++ */ ++ if (new->flags & IRQF_ONESHOT) { ++ /* ++ * Unlikely to have 32 resp 64 irqs sharing one line, ++ * but who knows. ++ */ ++ if (thread_mask == ~0UL) { ++ ret = -EBUSY; ++ goto out_unlock; ++ } ++ /* ++ * The thread_mask for the action is or'ed to ++ * desc->thread_active to indicate that the ++ * IRQF_ONESHOT thread handler has been woken, but not ++ * yet finished. The bit is cleared when a thread ++ * completes. When all threads of a shared interrupt ++ * line have completed desc->threads_active becomes ++ * zero and the interrupt line is unmasked. See ++ * handle.c:irq_wake_thread() for further information. ++ * ++ * If no thread is woken by primary (hard irq context) ++ * interrupt handlers, then desc->threads_active is ++ * also checked for zero to unmask the irq line in the ++ * affected hard irq flow handlers ++ * (handle_[fasteoi|level]_irq). ++ * ++ * The new action gets the first zero bit of ++ * thread_mask assigned. See the loop above which or's ++ * all existing action->thread_mask bits. ++ */ ++ new->thread_mask = 1UL << ffz(thread_mask); ++ ++ } else if (new->handler == irq_default_primary_handler && ++ !(desc->irq_data.chip->flags & IRQCHIP_ONESHOT_SAFE)) { ++ /* ++ * The interrupt was requested with handler = NULL, so ++ * we use the default primary handler for it. But it ++ * does not have the oneshot flag set. In combination ++ * with level interrupts this is deadly, because the ++ * default primary handler just wakes the thread, then ++ * the irq lines is reenabled, but the device still ++ * has the level irq asserted. Rinse and repeat.... ++ * ++ * While this works for edge type interrupts, we play ++ * it safe and reject unconditionally because we can't ++ * say for sure which type this interrupt really ++ * has. The type flags are unreliable as the ++ * underlying chip implementation can override them. ++ */ ++ pr_err("Threaded irq requested with handler=NULL and !ONESHOT for irq %d\n", ++ irq); ++ ret = -EINVAL; ++ goto out_unlock; ++ } ++ ++ if (!shared) { ++ init_waitqueue_head(&desc->wait_for_threads); ++ ++ /* Setup the type (level, edge polarity) if configured: */ ++ if (new->flags & IRQF_TRIGGER_MASK) { ++ ret = __irq_set_trigger(desc, ++ new->flags & IRQF_TRIGGER_MASK); ++ ++ if (ret) ++ goto out_unlock; ++ } ++ ++ /* ++ * Activate the interrupt. That activation must happen ++ * independently of IRQ_NOAUTOEN. request_irq() can fail ++ * and the callers are supposed to handle ++ * that. enable_irq() of an interrupt requested with ++ * IRQ_NOAUTOEN is not supposed to fail. The activation ++ * keeps it in shutdown mode, it merily associates ++ * resources if necessary and if that's not possible it ++ * fails. Interrupts which are in managed shutdown mode ++ * will simply ignore that activation request. ++ */ ++ ret = irq_activate(desc); ++ if (ret) ++ goto out_unlock; ++ ++ desc->istate &= ~(IRQS_AUTODETECT | IRQS_SPURIOUS_DISABLED | \ ++ IRQS_ONESHOT | IRQS_WAITING); ++ irqd_clear(&desc->irq_data, IRQD_IRQ_INPROGRESS); ++ ++ if (new->flags & IRQF_PERCPU) { ++ irqd_set(&desc->irq_data, IRQD_PER_CPU); ++ irq_settings_set_per_cpu(desc); ++ } ++ ++ if (new->flags & IRQF_ONESHOT) ++ desc->istate |= IRQS_ONESHOT; ++ ++ /* Exclude IRQ from balancing if requested */ ++ if (new->flags & IRQF_NOBALANCING) { ++ irq_settings_set_no_balancing(desc); ++ irqd_set(&desc->irq_data, IRQD_NO_BALANCING); ++ } ++ ++ if (irq_settings_can_autoenable(desc)) { ++ irq_startup(desc, IRQ_RESEND, IRQ_START_COND); ++ } else { ++ /* ++ * Shared interrupts do not go well with disabling ++ * auto enable. The sharing interrupt might request ++ * it while it's still disabled and then wait for ++ * interrupts forever. ++ */ ++ WARN_ON_ONCE(new->flags & IRQF_SHARED); ++ /* Undo nested disables: */ ++ desc->depth = 1; ++ } ++ ++ } else if (new->flags & IRQF_TRIGGER_MASK) { ++ unsigned int nmsk = new->flags & IRQF_TRIGGER_MASK; ++ unsigned int omsk = irqd_get_trigger_type(&desc->irq_data); ++ ++ if (nmsk != omsk) ++ /* hope the handler works with current trigger mode */ ++ pr_warn("irq %d uses trigger mode %u; requested %u\n", ++ irq, omsk, nmsk); ++ } ++ ++ *old_ptr = new; ++ ++ irq_pm_install_action(desc, new); ++ ++ /* Reset broken irq detection when installing new handler */ ++ desc->irq_count = 0; ++ desc->irqs_unhandled = 0; ++ ++ /* ++ * Check whether we disabled the irq via the spurious handler ++ * before. Reenable it and give it another chance. ++ */ ++ if (shared && (desc->istate & IRQS_SPURIOUS_DISABLED)) { ++ desc->istate &= ~IRQS_SPURIOUS_DISABLED; ++ __enable_irq(desc); ++ } ++ ++ raw_spin_unlock_irqrestore(&desc->lock, flags); ++ chip_bus_sync_unlock(desc); ++ mutex_unlock(&desc->request_mutex); ++ ++ irq_setup_timings(desc, new); ++ ++ /* ++ * Strictly no need to wake it up, but hung_task complains ++ * when no hard interrupt wakes the thread up. ++ */ ++ if (new->thread) ++ wake_up_process(new->thread); ++ if (new->secondary) ++ wake_up_process(new->secondary->thread); ++ ++ register_irq_proc(irq, desc); ++ new->dir = NULL; ++ register_handler_proc(irq, new); ++ return 0; ++ ++mismatch: ++ if (!(new->flags & IRQF_PROBE_SHARED)) { ++ pr_err("Flags mismatch irq %d. %08x (%s) vs. %08x (%s)\n", ++ irq, new->flags, new->name, old->flags, old->name); ++#ifdef CONFIG_DEBUG_SHIRQ ++ dump_stack(); ++#endif ++ } ++ ret = -EBUSY; ++ ++out_unlock: ++ raw_spin_unlock_irqrestore(&desc->lock, flags); ++ ++ if (!desc->action) ++ irq_release_resources(desc); ++out_bus_unlock: ++ chip_bus_sync_unlock(desc); ++ mutex_unlock(&desc->request_mutex); ++ ++out_thread: ++ if (new->thread) { ++ struct task_struct *t = new->thread; ++ ++ new->thread = NULL; ++ kthread_stop(t); ++ put_task_struct(t); ++ } ++ if (new->secondary && new->secondary->thread) { ++ struct task_struct *t = new->secondary->thread; ++ ++ new->secondary->thread = NULL; ++ kthread_stop(t); ++ put_task_struct(t); ++ } ++out_mput: ++ module_put(desc->owner); ++ return ret; ++} ++ ++/** ++ * setup_irq - setup an interrupt ++ * @irq: Interrupt line to setup ++ * @act: irqaction for the interrupt ++ * ++ * Used to statically setup interrupts in the early boot process. ++ */ ++int setup_irq(unsigned int irq, struct irqaction *act) ++{ ++ int retval; ++ struct irq_desc *desc = irq_to_desc(irq); ++ ++ if (!desc || WARN_ON(irq_settings_is_per_cpu_devid(desc))) ++ return -EINVAL; ++ ++ retval = irq_chip_pm_get(&desc->irq_data); ++ if (retval < 0) ++ return retval; ++ ++ retval = __setup_irq(irq, desc, act); ++ ++ if (retval) ++ irq_chip_pm_put(&desc->irq_data); ++ ++ return retval; ++} ++EXPORT_SYMBOL_GPL(setup_irq); ++ ++/* ++ * Internal function to unregister an irqaction - used to free ++ * regular and special interrupts that are part of the architecture. ++ */ ++static struct irqaction *__free_irq(struct irq_desc *desc, void *dev_id) ++{ ++ unsigned irq = desc->irq_data.irq; ++ struct irqaction *action, **action_ptr; ++ unsigned long flags; ++ ++ WARN(in_interrupt(), "Trying to free IRQ %d from IRQ context!\n", irq); ++ ++ mutex_lock(&desc->request_mutex); ++ chip_bus_lock(desc); ++ raw_spin_lock_irqsave(&desc->lock, flags); ++ ++ /* ++ * There can be multiple actions per IRQ descriptor, find the right ++ * one based on the dev_id: ++ */ ++ action_ptr = &desc->action; ++ for (;;) { ++ action = *action_ptr; ++ ++ if (!action) { ++ WARN(1, "Trying to free already-free IRQ %d\n", irq); ++ raw_spin_unlock_irqrestore(&desc->lock, flags); ++ chip_bus_sync_unlock(desc); ++ mutex_unlock(&desc->request_mutex); ++ return NULL; ++ } ++ ++ if (action->dev_id == dev_id) ++ break; ++ action_ptr = &action->next; ++ } ++ ++ /* Found it - now remove it from the list of entries: */ ++ *action_ptr = action->next; ++ ++ irq_pm_remove_action(desc, action); ++ ++ /* If this was the last handler, shut down the IRQ line: */ ++ if (!desc->action) { ++ irq_settings_clr_disable_unlazy(desc); ++ /* Only shutdown. Deactivate after synchronize_hardirq() */ ++ irq_shutdown(desc); ++ } ++ ++#ifdef CONFIG_SMP ++ /* make sure affinity_hint is cleaned up */ ++ if (WARN_ON_ONCE(desc->affinity_hint)) ++ desc->affinity_hint = NULL; ++#endif ++ ++ raw_spin_unlock_irqrestore(&desc->lock, flags); ++ /* ++ * Drop bus_lock here so the changes which were done in the chip ++ * callbacks above are synced out to the irq chips which hang ++ * behind a slow bus (I2C, SPI) before calling synchronize_hardirq(). ++ * ++ * Aside of that the bus_lock can also be taken from the threaded ++ * handler in irq_finalize_oneshot() which results in a deadlock ++ * because kthread_stop() would wait forever for the thread to ++ * complete, which is blocked on the bus lock. ++ * ++ * The still held desc->request_mutex() protects against a ++ * concurrent request_irq() of this irq so the release of resources ++ * and timing data is properly serialized. ++ */ ++ chip_bus_sync_unlock(desc); ++ ++ unregister_handler_proc(irq, action); ++ ++ /* ++ * Make sure it's not being used on another CPU and if the chip ++ * supports it also make sure that there is no (not yet serviced) ++ * interrupt in flight at the hardware level. ++ */ ++ __synchronize_hardirq(desc, true); ++ ++#ifdef CONFIG_DEBUG_SHIRQ ++ /* ++ * It's a shared IRQ -- the driver ought to be prepared for an IRQ ++ * event to happen even now it's being freed, so let's make sure that ++ * is so by doing an extra call to the handler .... ++ * ++ * ( We do this after actually deregistering it, to make sure that a ++ * 'real' IRQ doesn't run in parallel with our fake. ) ++ */ ++ if (action->flags & IRQF_SHARED) { ++ local_irq_save(flags); ++ action->handler(irq, dev_id); ++ local_irq_restore(flags); ++ } ++#endif ++ ++ /* ++ * The action has already been removed above, but the thread writes ++ * its oneshot mask bit when it completes. Though request_mutex is ++ * held across this which prevents __setup_irq() from handing out ++ * the same bit to a newly requested action. ++ */ ++ if (action->thread) { ++ kthread_stop(action->thread); ++ put_task_struct(action->thread); ++ if (action->secondary && action->secondary->thread) { ++ kthread_stop(action->secondary->thread); ++ put_task_struct(action->secondary->thread); ++ } ++ } ++ ++ /* Last action releases resources */ ++ if (!desc->action) { ++ /* ++ * Reaquire bus lock as irq_release_resources() might ++ * require it to deallocate resources over the slow bus. ++ */ ++ chip_bus_lock(desc); ++ /* ++ * There is no interrupt on the fly anymore. Deactivate it ++ * completely. ++ */ ++ raw_spin_lock_irqsave(&desc->lock, flags); ++ irq_domain_deactivate_irq(&desc->irq_data); ++ raw_spin_unlock_irqrestore(&desc->lock, flags); ++ ++ irq_release_resources(desc); ++ chip_bus_sync_unlock(desc); ++ irq_remove_timings(desc); ++ } ++ ++ mutex_unlock(&desc->request_mutex); ++ ++ irq_chip_pm_put(&desc->irq_data); ++ module_put(desc->owner); ++ kfree(action->secondary); ++ return action; ++} ++ ++/** ++ * remove_irq - free an interrupt ++ * @irq: Interrupt line to free ++ * @act: irqaction for the interrupt ++ * ++ * Used to remove interrupts statically setup by the early boot process. ++ */ ++void remove_irq(unsigned int irq, struct irqaction *act) ++{ ++ struct irq_desc *desc = irq_to_desc(irq); ++ ++ if (desc && !WARN_ON(irq_settings_is_per_cpu_devid(desc))) ++ __free_irq(desc, act->dev_id); ++} ++EXPORT_SYMBOL_GPL(remove_irq); ++ ++/** ++ * free_irq - free an interrupt allocated with request_irq ++ * @irq: Interrupt line to free ++ * @dev_id: Device identity to free ++ * ++ * Remove an interrupt handler. The handler is removed and if the ++ * interrupt line is no longer in use by any driver it is disabled. ++ * On a shared IRQ the caller must ensure the interrupt is disabled ++ * on the card it drives before calling this function. The function ++ * does not return until any executing interrupts for this IRQ ++ * have completed. ++ * ++ * This function must not be called from interrupt context. ++ * ++ * Returns the devname argument passed to request_irq. ++ */ ++const void *free_irq(unsigned int irq, void *dev_id) ++{ ++ struct irq_desc *desc = irq_to_desc(irq); ++ struct irqaction *action; ++ const char *devname; ++ ++ if (!desc || WARN_ON(irq_settings_is_per_cpu_devid(desc))) ++ return NULL; ++ ++#ifdef CONFIG_SMP ++ if (WARN_ON(desc->affinity_notify)) ++ desc->affinity_notify = NULL; ++#endif ++ ++ action = __free_irq(desc, dev_id); ++ ++ if (!action) ++ return NULL; ++ ++ devname = action->name; ++ kfree(action); ++ return devname; ++} ++EXPORT_SYMBOL(free_irq); ++ ++/* This function must be called with desc->lock held */ ++static const void *__cleanup_nmi(unsigned int irq, struct irq_desc *desc) ++{ ++ const char *devname = NULL; ++ ++ desc->istate &= ~IRQS_NMI; ++ ++ if (!WARN_ON(desc->action == NULL)) { ++ irq_pm_remove_action(desc, desc->action); ++ devname = desc->action->name; ++ unregister_handler_proc(irq, desc->action); ++ ++ kfree(desc->action); ++ desc->action = NULL; ++ } ++ ++ irq_settings_clr_disable_unlazy(desc); ++ irq_shutdown(desc); ++ ++ irq_release_resources(desc); ++ ++ irq_chip_pm_put(&desc->irq_data); ++ module_put(desc->owner); ++ ++ return devname; ++} ++ ++const void *free_nmi(unsigned int irq, void *dev_id) ++{ ++ struct irq_desc *desc = irq_to_desc(irq); ++ unsigned long flags; ++ const void *devname; ++ ++ if (!desc || WARN_ON(!(desc->istate & IRQS_NMI))) ++ return NULL; ++ ++ if (WARN_ON(irq_settings_is_per_cpu_devid(desc))) ++ return NULL; ++ ++ /* NMI still enabled */ ++ if (WARN_ON(desc->depth == 0)) ++ disable_nmi_nosync(irq); ++ ++ raw_spin_lock_irqsave(&desc->lock, flags); ++ ++ irq_nmi_teardown(desc); ++ devname = __cleanup_nmi(irq, desc); ++ ++ raw_spin_unlock_irqrestore(&desc->lock, flags); ++ ++ return devname; ++} ++ ++/** ++ * request_threaded_irq - allocate an interrupt line ++ * @irq: Interrupt line to allocate ++ * @handler: Function to be called when the IRQ occurs. ++ * Primary handler for threaded interrupts ++ * If NULL and thread_fn != NULL the default ++ * primary handler is installed ++ * @thread_fn: Function called from the irq handler thread ++ * If NULL, no irq thread is created ++ * @irqflags: Interrupt type flags ++ * @devname: An ascii name for the claiming device ++ * @dev_id: A cookie passed back to the handler function ++ * ++ * This call allocates interrupt resources and enables the ++ * interrupt line and IRQ handling. From the point this ++ * call is made your handler function may be invoked. Since ++ * your handler function must clear any interrupt the board ++ * raises, you must take care both to initialise your hardware ++ * and to set up the interrupt handler in the right order. ++ * ++ * If you want to set up a threaded irq handler for your device ++ * then you need to supply @handler and @thread_fn. @handler is ++ * still called in hard interrupt context and has to check ++ * whether the interrupt originates from the device. If yes it ++ * needs to disable the interrupt on the device and return ++ * IRQ_WAKE_THREAD which will wake up the handler thread and run ++ * @thread_fn. This split handler design is necessary to support ++ * shared interrupts. ++ * ++ * Dev_id must be globally unique. Normally the address of the ++ * device data structure is used as the cookie. Since the handler ++ * receives this value it makes sense to use it. ++ * ++ * If your interrupt is shared you must pass a non NULL dev_id ++ * as this is required when freeing the interrupt. ++ * ++ * Flags: ++ * ++ * IRQF_SHARED Interrupt is shared ++ * IRQF_TRIGGER_* Specify active edge(s) or level ++ * ++ */ ++int request_threaded_irq(unsigned int irq, irq_handler_t handler, ++ irq_handler_t thread_fn, unsigned long irqflags, ++ const char *devname, void *dev_id) ++{ ++ struct irqaction *action; ++ struct irq_desc *desc; ++ int retval; ++ ++ if (irq == IRQ_NOTCONNECTED) ++ return -ENOTCONN; ++ ++ /* ++ * Sanity-check: shared interrupts must pass in a real dev-ID, ++ * otherwise we'll have trouble later trying to figure out ++ * which interrupt is which (messes up the interrupt freeing ++ * logic etc). ++ * ++ * Also IRQF_COND_SUSPEND only makes sense for shared interrupts and ++ * it cannot be set along with IRQF_NO_SUSPEND. ++ */ ++ if (((irqflags & IRQF_SHARED) && !dev_id) || ++ (!(irqflags & IRQF_SHARED) && (irqflags & IRQF_COND_SUSPEND)) || ++ ((irqflags & IRQF_NO_SUSPEND) && (irqflags & IRQF_COND_SUSPEND))) ++ return -EINVAL; ++ ++ desc = irq_to_desc(irq); ++ if (!desc) ++ return -EINVAL; ++ ++ if (!irq_settings_can_request(desc) || ++ WARN_ON(irq_settings_is_per_cpu_devid(desc))) ++ return -EINVAL; ++ ++ if (!handler) { ++ if (!thread_fn) ++ return -EINVAL; ++ handler = irq_default_primary_handler; ++ } ++ ++ action = kzalloc(sizeof(struct irqaction), GFP_KERNEL); ++ if (!action) ++ return -ENOMEM; ++ ++ action->handler = handler; ++ action->thread_fn = thread_fn; ++ action->flags = irqflags; ++ action->name = devname; ++ action->dev_id = dev_id; ++ ++ retval = irq_chip_pm_get(&desc->irq_data); ++ if (retval < 0) { ++ kfree(action); ++ return retval; ++ } ++ ++ retval = __setup_irq(irq, desc, action); ++ ++ if (retval) { ++ irq_chip_pm_put(&desc->irq_data); ++ kfree(action->secondary); ++ kfree(action); ++ } ++ ++#ifdef CONFIG_DEBUG_SHIRQ_FIXME ++ if (!retval && (irqflags & IRQF_SHARED)) { ++ /* ++ * It's a shared IRQ -- the driver ought to be prepared for it ++ * to happen immediately, so let's make sure.... ++ * We disable the irq to make sure that a 'real' IRQ doesn't ++ * run in parallel with our fake. ++ */ ++ unsigned long flags; ++ ++ disable_irq(irq); ++ local_irq_save(flags); ++ ++ handler(irq, dev_id); ++ ++ local_irq_restore(flags); ++ enable_irq(irq); ++ } ++#endif ++ return retval; ++} ++EXPORT_SYMBOL(request_threaded_irq); ++ ++/** ++ * request_any_context_irq - allocate an interrupt line ++ * @irq: Interrupt line to allocate ++ * @handler: Function to be called when the IRQ occurs. ++ * Threaded handler for threaded interrupts. ++ * @flags: Interrupt type flags ++ * @name: An ascii name for the claiming device ++ * @dev_id: A cookie passed back to the handler function ++ * ++ * This call allocates interrupt resources and enables the ++ * interrupt line and IRQ handling. It selects either a ++ * hardirq or threaded handling method depending on the ++ * context. ++ * ++ * On failure, it returns a negative value. On success, ++ * it returns either IRQC_IS_HARDIRQ or IRQC_IS_NESTED. ++ */ ++int request_any_context_irq(unsigned int irq, irq_handler_t handler, ++ unsigned long flags, const char *name, void *dev_id) ++{ ++ struct irq_desc *desc; ++ int ret; ++ ++ if (irq == IRQ_NOTCONNECTED) ++ return -ENOTCONN; ++ ++ desc = irq_to_desc(irq); ++ if (!desc) ++ return -EINVAL; ++ ++ if (irq_settings_is_nested_thread(desc)) { ++ ret = request_threaded_irq(irq, NULL, handler, ++ flags, name, dev_id); ++ return !ret ? IRQC_IS_NESTED : ret; ++ } ++ ++ ret = request_irq(irq, handler, flags, name, dev_id); ++ return !ret ? IRQC_IS_HARDIRQ : ret; ++} ++EXPORT_SYMBOL_GPL(request_any_context_irq); ++ ++/** ++ * request_nmi - allocate an interrupt line for NMI delivery ++ * @irq: Interrupt line to allocate ++ * @handler: Function to be called when the IRQ occurs. ++ * Threaded handler for threaded interrupts. ++ * @irqflags: Interrupt type flags ++ * @name: An ascii name for the claiming device ++ * @dev_id: A cookie passed back to the handler function ++ * ++ * This call allocates interrupt resources and enables the ++ * interrupt line and IRQ handling. It sets up the IRQ line ++ * to be handled as an NMI. ++ * ++ * An interrupt line delivering NMIs cannot be shared and IRQ handling ++ * cannot be threaded. ++ * ++ * Interrupt lines requested for NMI delivering must produce per cpu ++ * interrupts and have auto enabling setting disabled. ++ * ++ * Dev_id must be globally unique. Normally the address of the ++ * device data structure is used as the cookie. Since the handler ++ * receives this value it makes sense to use it. ++ * ++ * If the interrupt line cannot be used to deliver NMIs, function ++ * will fail and return a negative value. ++ */ ++int request_nmi(unsigned int irq, irq_handler_t handler, ++ unsigned long irqflags, const char *name, void *dev_id) ++{ ++ struct irqaction *action; ++ struct irq_desc *desc; ++ unsigned long flags; ++ int retval; ++ ++ if (irq == IRQ_NOTCONNECTED) ++ return -ENOTCONN; ++ ++ /* NMI cannot be shared, used for Polling */ ++ if (irqflags & (IRQF_SHARED | IRQF_COND_SUSPEND | IRQF_IRQPOLL)) ++ return -EINVAL; ++ ++ if (!(irqflags & IRQF_PERCPU)) ++ return -EINVAL; ++ ++ if (!handler) ++ return -EINVAL; ++ ++ desc = irq_to_desc(irq); ++ ++ if (!desc || irq_settings_can_autoenable(desc) || ++ !irq_settings_can_request(desc) || ++ WARN_ON(irq_settings_is_per_cpu_devid(desc)) || ++ !irq_supports_nmi(desc)) ++ return -EINVAL; ++ ++ action = kzalloc(sizeof(struct irqaction), GFP_KERNEL); ++ if (!action) ++ return -ENOMEM; ++ ++ action->handler = handler; ++ action->flags = irqflags | IRQF_NO_THREAD | IRQF_NOBALANCING; ++ action->name = name; ++ action->dev_id = dev_id; ++ ++ retval = irq_chip_pm_get(&desc->irq_data); ++ if (retval < 0) ++ goto err_out; ++ ++ retval = __setup_irq(irq, desc, action); ++ if (retval) ++ goto err_irq_setup; ++ ++ raw_spin_lock_irqsave(&desc->lock, flags); ++ ++ /* Setup NMI state */ ++ desc->istate |= IRQS_NMI; ++ retval = irq_nmi_setup(desc); ++ if (retval) { ++ __cleanup_nmi(irq, desc); ++ raw_spin_unlock_irqrestore(&desc->lock, flags); ++ return -EINVAL; ++ } ++ ++ raw_spin_unlock_irqrestore(&desc->lock, flags); ++ ++ return 0; ++ ++err_irq_setup: ++ irq_chip_pm_put(&desc->irq_data); ++err_out: ++ kfree(action); ++ ++ return retval; ++} ++ ++void enable_percpu_irq(unsigned int irq, unsigned int type) ++{ ++ unsigned int cpu = smp_processor_id(); ++ unsigned long flags; ++ struct irq_desc *desc = irq_get_desc_lock(irq, &flags, IRQ_GET_DESC_CHECK_PERCPU); ++ ++ if (!desc) ++ return; ++ ++ /* ++ * If the trigger type is not specified by the caller, then ++ * use the default for this interrupt. ++ */ ++ type &= IRQ_TYPE_SENSE_MASK; ++ if (type == IRQ_TYPE_NONE) ++ type = irqd_get_trigger_type(&desc->irq_data); ++ ++ if (type != IRQ_TYPE_NONE) { ++ int ret; ++ ++ ret = __irq_set_trigger(desc, type); ++ ++ if (ret) { ++ WARN(1, "failed to set type for IRQ%d\n", irq); ++ goto out; ++ } ++ } ++ ++ irq_percpu_enable(desc, cpu); ++out: ++ irq_put_desc_unlock(desc, flags); ++} ++EXPORT_SYMBOL_GPL(enable_percpu_irq); ++ ++void enable_percpu_nmi(unsigned int irq, unsigned int type) ++{ ++ enable_percpu_irq(irq, type); ++} ++ ++/** ++ * irq_percpu_is_enabled - Check whether the per cpu irq is enabled ++ * @irq: Linux irq number to check for ++ * ++ * Must be called from a non migratable context. Returns the enable ++ * state of a per cpu interrupt on the current cpu. ++ */ ++bool irq_percpu_is_enabled(unsigned int irq) ++{ ++ unsigned int cpu = smp_processor_id(); ++ struct irq_desc *desc; ++ unsigned long flags; ++ bool is_enabled; ++ ++ desc = irq_get_desc_lock(irq, &flags, IRQ_GET_DESC_CHECK_PERCPU); ++ if (!desc) ++ return false; ++ ++ is_enabled = cpumask_test_cpu(cpu, desc->percpu_enabled); ++ irq_put_desc_unlock(desc, flags); ++ ++ return is_enabled; ++} ++EXPORT_SYMBOL_GPL(irq_percpu_is_enabled); ++ ++void disable_percpu_irq(unsigned int irq) ++{ ++ unsigned int cpu = smp_processor_id(); ++ unsigned long flags; ++ struct irq_desc *desc = irq_get_desc_lock(irq, &flags, IRQ_GET_DESC_CHECK_PERCPU); ++ ++ if (!desc) ++ return; ++ ++ irq_percpu_disable(desc, cpu); ++ irq_put_desc_unlock(desc, flags); ++} ++EXPORT_SYMBOL_GPL(disable_percpu_irq); ++ ++void disable_percpu_nmi(unsigned int irq) ++{ ++ disable_percpu_irq(irq); ++} ++ ++/* ++ * Internal function to unregister a percpu irqaction. ++ */ ++static struct irqaction *__free_percpu_irq(unsigned int irq, void __percpu *dev_id) ++{ ++ struct irq_desc *desc = irq_to_desc(irq); ++ struct irqaction *action; ++ unsigned long flags; ++ ++ WARN(in_interrupt(), "Trying to free IRQ %d from IRQ context!\n", irq); ++ ++ if (!desc) ++ return NULL; ++ ++ raw_spin_lock_irqsave(&desc->lock, flags); ++ ++ action = desc->action; ++ if (!action || action->percpu_dev_id != dev_id) { ++ WARN(1, "Trying to free already-free IRQ %d\n", irq); ++ goto bad; ++ } ++ ++ if (!cpumask_empty(desc->percpu_enabled)) { ++ WARN(1, "percpu IRQ %d still enabled on CPU%d!\n", ++ irq, cpumask_first(desc->percpu_enabled)); ++ goto bad; ++ } ++ ++ /* Found it - now remove it from the list of entries: */ ++ desc->action = NULL; ++ ++ desc->istate &= ~IRQS_NMI; ++ ++ raw_spin_unlock_irqrestore(&desc->lock, flags); ++ ++ unregister_handler_proc(irq, action); ++ ++ irq_chip_pm_put(&desc->irq_data); ++ module_put(desc->owner); ++ return action; ++ ++bad: ++ raw_spin_unlock_irqrestore(&desc->lock, flags); ++ return NULL; ++} ++ ++/** ++ * remove_percpu_irq - free a per-cpu interrupt ++ * @irq: Interrupt line to free ++ * @act: irqaction for the interrupt ++ * ++ * Used to remove interrupts statically setup by the early boot process. ++ */ ++void remove_percpu_irq(unsigned int irq, struct irqaction *act) ++{ ++ struct irq_desc *desc = irq_to_desc(irq); ++ ++ if (desc && irq_settings_is_per_cpu_devid(desc)) ++ __free_percpu_irq(irq, act->percpu_dev_id); ++} ++ ++/** ++ * free_percpu_irq - free an interrupt allocated with request_percpu_irq ++ * @irq: Interrupt line to free ++ * @dev_id: Device identity to free ++ * ++ * Remove a percpu interrupt handler. The handler is removed, but ++ * the interrupt line is not disabled. This must be done on each ++ * CPU before calling this function. The function does not return ++ * until any executing interrupts for this IRQ have completed. ++ * ++ * This function must not be called from interrupt context. ++ */ ++void free_percpu_irq(unsigned int irq, void __percpu *dev_id) ++{ ++ struct irq_desc *desc = irq_to_desc(irq); ++ ++ if (!desc || !irq_settings_is_per_cpu_devid(desc)) ++ return; ++ ++ chip_bus_lock(desc); ++ kfree(__free_percpu_irq(irq, dev_id)); ++ chip_bus_sync_unlock(desc); ++} ++EXPORT_SYMBOL_GPL(free_percpu_irq); ++ ++void free_percpu_nmi(unsigned int irq, void __percpu *dev_id) ++{ ++ struct irq_desc *desc = irq_to_desc(irq); ++ ++ if (!desc || !irq_settings_is_per_cpu_devid(desc)) ++ return; ++ ++ if (WARN_ON(!(desc->istate & IRQS_NMI))) ++ return; ++ ++ kfree(__free_percpu_irq(irq, dev_id)); ++} ++ ++/** ++ * setup_percpu_irq - setup a per-cpu interrupt ++ * @irq: Interrupt line to setup ++ * @act: irqaction for the interrupt ++ * ++ * Used to statically setup per-cpu interrupts in the early boot process. ++ */ ++int setup_percpu_irq(unsigned int irq, struct irqaction *act) ++{ ++ struct irq_desc *desc = irq_to_desc(irq); ++ int retval; ++ ++ if (!desc || !irq_settings_is_per_cpu_devid(desc)) ++ return -EINVAL; ++ ++ retval = irq_chip_pm_get(&desc->irq_data); ++ if (retval < 0) ++ return retval; ++ ++ retval = __setup_irq(irq, desc, act); ++ ++ if (retval) ++ irq_chip_pm_put(&desc->irq_data); ++ ++ return retval; ++} ++ ++/** ++ * __request_percpu_irq - allocate a percpu interrupt line ++ * @irq: Interrupt line to allocate ++ * @handler: Function to be called when the IRQ occurs. ++ * @flags: Interrupt type flags (IRQF_TIMER only) ++ * @devname: An ascii name for the claiming device ++ * @dev_id: A percpu cookie passed back to the handler function ++ * ++ * This call allocates interrupt resources and enables the ++ * interrupt on the local CPU. If the interrupt is supposed to be ++ * enabled on other CPUs, it has to be done on each CPU using ++ * enable_percpu_irq(). ++ * ++ * Dev_id must be globally unique. It is a per-cpu variable, and ++ * the handler gets called with the interrupted CPU's instance of ++ * that variable. ++ */ ++int __request_percpu_irq(unsigned int irq, irq_handler_t handler, ++ unsigned long flags, const char *devname, ++ void __percpu *dev_id) ++{ ++ struct irqaction *action; ++ struct irq_desc *desc; ++ int retval; ++ ++ if (!dev_id) ++ return -EINVAL; ++ ++ desc = irq_to_desc(irq); ++ if (!desc || !irq_settings_can_request(desc) || ++ !irq_settings_is_per_cpu_devid(desc)) ++ return -EINVAL; ++ ++ if (flags && flags != IRQF_TIMER) ++ return -EINVAL; ++ ++ action = kzalloc(sizeof(struct irqaction), GFP_KERNEL); ++ if (!action) ++ return -ENOMEM; ++ ++ action->handler = handler; ++ action->flags = flags | IRQF_PERCPU | IRQF_NO_SUSPEND; ++ action->name = devname; ++ action->percpu_dev_id = dev_id; ++ ++ retval = irq_chip_pm_get(&desc->irq_data); ++ if (retval < 0) { ++ kfree(action); ++ return retval; ++ } ++ ++ retval = __setup_irq(irq, desc, action); ++ ++ if (retval) { ++ irq_chip_pm_put(&desc->irq_data); ++ kfree(action); ++ } ++ ++ return retval; ++} ++EXPORT_SYMBOL_GPL(__request_percpu_irq); ++ ++int __irq_get_irqchip_state(struct irq_data *data, enum irqchip_irq_state which, ++ bool *state) ++{ ++ struct irq_chip *chip; ++ int err = -EINVAL; ++ ++ do { ++ chip = irq_data_get_irq_chip(data); ++ if (chip->irq_get_irqchip_state) ++ break; ++#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY ++ data = data->parent_data; ++#else ++ data = NULL; ++#endif ++ } while (data); ++ ++ if (data) ++ err = chip->irq_get_irqchip_state(data, which, state); ++ return err; ++} ++ ++/** ++ * request_percpu_nmi - allocate a percpu interrupt line for NMI delivery ++ * @irq: Interrupt line to allocate ++ * @handler: Function to be called when the IRQ occurs. ++ * @name: An ascii name for the claiming device ++ * @dev_id: A percpu cookie passed back to the handler function ++ * ++ * This call allocates interrupt resources for a per CPU NMI. Per CPU NMIs ++ * have to be setup on each CPU by calling prepare_percpu_nmi() before being ++ * enabled on the same CPU by using enable_percpu_nmi(). ++ * ++ * Dev_id must be globally unique. It is a per-cpu variable, and ++ * the handler gets called with the interrupted CPU's instance of ++ * that variable. ++ * ++ * Interrupt lines requested for NMI delivering should have auto enabling ++ * setting disabled. ++ * ++ * If the interrupt line cannot be used to deliver NMIs, function ++ * will fail returning a negative value. ++ */ ++int request_percpu_nmi(unsigned int irq, irq_handler_t handler, ++ const char *name, void __percpu *dev_id) ++{ ++ struct irqaction *action; ++ struct irq_desc *desc; ++ unsigned long flags; ++ int retval; ++ ++ if (!handler) ++ return -EINVAL; ++ ++ desc = irq_to_desc(irq); ++ ++ if (!desc || !irq_settings_can_request(desc) || ++ !irq_settings_is_per_cpu_devid(desc) || ++ irq_settings_can_autoenable(desc) || ++ !irq_supports_nmi(desc)) ++ return -EINVAL; ++ ++ /* The line cannot already be NMI */ ++ if (desc->istate & IRQS_NMI) ++ return -EINVAL; ++ ++ action = kzalloc(sizeof(struct irqaction), GFP_KERNEL); ++ if (!action) ++ return -ENOMEM; ++ ++ action->handler = handler; ++ action->flags = IRQF_PERCPU | IRQF_NO_SUSPEND | IRQF_NO_THREAD ++ | IRQF_NOBALANCING; ++ action->name = name; ++ action->percpu_dev_id = dev_id; ++ ++ retval = irq_chip_pm_get(&desc->irq_data); ++ if (retval < 0) ++ goto err_out; ++ ++ retval = __setup_irq(irq, desc, action); ++ if (retval) ++ goto err_irq_setup; ++ ++ raw_spin_lock_irqsave(&desc->lock, flags); ++ desc->istate |= IRQS_NMI; ++ raw_spin_unlock_irqrestore(&desc->lock, flags); ++ ++ return 0; ++ ++err_irq_setup: ++ irq_chip_pm_put(&desc->irq_data); ++err_out: ++ kfree(action); ++ ++ return retval; ++} ++ ++/** ++ * prepare_percpu_nmi - performs CPU local setup for NMI delivery ++ * @irq: Interrupt line to prepare for NMI delivery ++ * ++ * This call prepares an interrupt line to deliver NMI on the current CPU, ++ * before that interrupt line gets enabled with enable_percpu_nmi(). ++ * ++ * As a CPU local operation, this should be called from non-preemptible ++ * context. ++ * ++ * If the interrupt line cannot be used to deliver NMIs, function ++ * will fail returning a negative value. ++ */ ++int prepare_percpu_nmi(unsigned int irq) ++{ ++ unsigned long flags; ++ struct irq_desc *desc; ++ int ret = 0; ++ ++ WARN_ON(preemptible()); ++ ++ desc = irq_get_desc_lock(irq, &flags, ++ IRQ_GET_DESC_CHECK_PERCPU); ++ if (!desc) ++ return -EINVAL; ++ ++ if (WARN(!(desc->istate & IRQS_NMI), ++ KERN_ERR "prepare_percpu_nmi called for a non-NMI interrupt: irq %u\n", ++ irq)) { ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ ret = irq_nmi_setup(desc); ++ if (ret) { ++ pr_err("Failed to setup NMI delivery: irq %u\n", irq); ++ goto out; ++ } ++ ++out: ++ irq_put_desc_unlock(desc, flags); ++ return ret; ++} ++ ++/** ++ * teardown_percpu_nmi - undoes NMI setup of IRQ line ++ * @irq: Interrupt line from which CPU local NMI configuration should be ++ * removed ++ * ++ * This call undoes the setup done by prepare_percpu_nmi(). ++ * ++ * IRQ line should not be enabled for the current CPU. ++ * ++ * As a CPU local operation, this should be called from non-preemptible ++ * context. ++ */ ++void teardown_percpu_nmi(unsigned int irq) ++{ ++ unsigned long flags; ++ struct irq_desc *desc; ++ ++ WARN_ON(preemptible()); ++ ++ desc = irq_get_desc_lock(irq, &flags, ++ IRQ_GET_DESC_CHECK_PERCPU); ++ if (!desc) ++ return; ++ ++ if (WARN_ON(!(desc->istate & IRQS_NMI))) ++ goto out; ++ ++ irq_nmi_teardown(desc); ++out: ++ irq_put_desc_unlock(desc, flags); ++} ++ ++/** ++ * irq_get_irqchip_state - returns the irqchip state of a interrupt. ++ * @irq: Interrupt line that is forwarded to a VM ++ * @which: One of IRQCHIP_STATE_* the caller wants to know about ++ * @state: a pointer to a boolean where the state is to be storeed ++ * ++ * This call snapshots the internal irqchip state of an ++ * interrupt, returning into @state the bit corresponding to ++ * stage @which ++ * ++ * This function should be called with preemption disabled if the ++ * interrupt controller has per-cpu registers. ++ */ ++int irq_get_irqchip_state(unsigned int irq, enum irqchip_irq_state which, ++ bool *state) ++{ ++ struct irq_desc *desc; ++ struct irq_data *data; ++ unsigned long flags; ++ int err = -EINVAL; ++ ++ desc = irq_get_desc_buslock(irq, &flags, 0); ++ if (!desc) ++ return err; ++ ++ data = irq_desc_get_irq_data(desc); ++ ++ err = __irq_get_irqchip_state(data, which, state); ++ ++ irq_put_desc_busunlock(desc, flags); ++ return err; ++} ++EXPORT_SYMBOL_GPL(irq_get_irqchip_state); ++ ++/** ++ * irq_set_irqchip_state - set the state of a forwarded interrupt. ++ * @irq: Interrupt line that is forwarded to a VM ++ * @which: State to be restored (one of IRQCHIP_STATE_*) ++ * @val: Value corresponding to @which ++ * ++ * This call sets the internal irqchip state of an interrupt, ++ * depending on the value of @which. ++ * ++ * This function should be called with preemption disabled if the ++ * interrupt controller has per-cpu registers. ++ */ ++int irq_set_irqchip_state(unsigned int irq, enum irqchip_irq_state which, ++ bool val) ++{ ++ struct irq_desc *desc; ++ struct irq_data *data; ++ struct irq_chip *chip; ++ unsigned long flags; ++ int err = -EINVAL; ++ ++ desc = irq_get_desc_buslock(irq, &flags, 0); ++ if (!desc) ++ return err; ++ ++ data = irq_desc_get_irq_data(desc); ++ ++ do { ++ chip = irq_data_get_irq_chip(data); ++ if (chip->irq_set_irqchip_state) ++ break; ++#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY ++ data = data->parent_data; ++#else ++ data = NULL; ++#endif ++ } while (data); ++ ++ if (data) ++ err = chip->irq_set_irqchip_state(data, which, val); ++ ++ irq_put_desc_busunlock(desc, flags); ++ return err; ++} ++EXPORT_SYMBOL_GPL(irq_set_irqchip_state); +diff -uprN kernel/kernel/irq/msi.c kernel_new/kernel/irq/msi.c +--- kernel/kernel/irq/msi.c 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/kernel/irq/msi.c 2021-04-01 18:28:07.809863116 +0800 +@@ -268,6 +268,9 @@ static void msi_domain_update_chip_ops(s + struct irq_chip *chip = info->chip; + + BUG_ON(!chip || !chip->irq_mask || !chip->irq_unmask); ++ WARN_ONCE(IS_ENABLED(CONFIG_IPIPE) && ++ (chip->flags & IRQCHIP_PIPELINE_SAFE) == 0, ++ "MSI domain irqchip %s is not pipeline-safe!", chip->name); + if (!chip->irq_set_affinity) + chip->irq_set_affinity = msi_domain_set_affinity; + } +diff -uprN kernel/kernel/locking/lockdep.c kernel_new/kernel/locking/lockdep.c +--- kernel/kernel/locking/lockdep.c 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/kernel/locking/lockdep.c 2021-04-01 18:28:07.809863116 +0800 +@@ -2859,7 +2859,7 @@ void lockdep_hardirqs_on(unsigned long i + * already enabled, yet we find the hardware thinks they are in fact + * enabled.. someone messed up their IRQ state tracing. + */ +- if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) ++ if (DEBUG_LOCKS_WARN_ON(!irqs_disabled() && !hard_irqs_disabled())) + return; + + /* +@@ -2885,7 +2885,9 @@ void lockdep_hardirqs_on(unsigned long i + */ + void lockdep_hardirqs_off(unsigned long ip) + { +- struct task_struct *curr = current; ++ struct task_struct *curr; ++ ++ curr = current; + + if (unlikely(!debug_locks || current->lockdep_recursion)) + return; +@@ -2894,7 +2896,7 @@ void lockdep_hardirqs_off(unsigned long + * So we're supposed to get called after you mask local IRQs, but for + * some reason the hardware doesn't quite think you did a proper job. + */ +- if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) ++ if (DEBUG_LOCKS_WARN_ON(!irqs_disabled() && !hard_irqs_disabled())) + return; + + if (curr->hardirqs_enabled) { +@@ -2923,7 +2925,7 @@ void trace_softirqs_on(unsigned long ip) + * We fancy IRQs being disabled here, see softirq.c, avoids + * funny state and nesting things. + */ +- if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) ++ if (DEBUG_LOCKS_WARN_ON(!irqs_disabled() && !hard_irqs_disabled())) + return; + + if (curr->softirqs_enabled) { +@@ -2962,7 +2964,7 @@ void trace_softirqs_off(unsigned long ip + /* + * We fancy IRQs being disabled here, see softirq.c + */ +- if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) ++ if (DEBUG_LOCKS_WARN_ON(!irqs_disabled() && !hard_irqs_disabled())) + return; + + if (curr->softirqs_enabled) { +diff -uprN kernel/kernel/locking/lockdep_internals.h kernel_new/kernel/locking/lockdep_internals.h +--- kernel/kernel/locking/lockdep_internals.h 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/kernel/locking/lockdep_internals.h 2021-04-01 18:28:07.810863115 +0800 +@@ -160,12 +160,12 @@ DECLARE_PER_CPU(struct lockdep_stats, lo + this_cpu_inc(lockdep_stats.ptr); + + #define debug_atomic_inc(ptr) { \ +- WARN_ON_ONCE(!irqs_disabled()); \ ++ WARN_ON_ONCE(!hard_irqs_disabled() && !irqs_disabled()); \ + __this_cpu_inc(lockdep_stats.ptr); \ + } + + #define debug_atomic_dec(ptr) { \ +- WARN_ON_ONCE(!irqs_disabled()); \ ++ WARN_ON_ONCE(!hard_irqs_disabled() && !irqs_disabled());\ + __this_cpu_dec(lockdep_stats.ptr); \ + } + +diff -uprN kernel/kernel/locking/spinlock.c kernel_new/kernel/locking/spinlock.c +--- kernel/kernel/locking/spinlock.c 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/kernel/locking/spinlock.c 2021-04-01 18:28:07.810863115 +0800 +@@ -27,7 +27,9 @@ + * even on CONFIG_PREEMPT, because lockdep assumes that interrupts are + * not re-enabled during lock-acquire (which the preempt-spin-ops do): + */ +-#if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC) ++#if !defined(CONFIG_GENERIC_LOCKBREAK) || \ ++ defined(CONFIG_DEBUG_LOCK_ALLOC) || \ ++ defined(CONFIG_IPIPE) + /* + * The __lock_function inlines are taken from + * spinlock : include/linux/spinlock_api_smp.h +diff -uprN kernel/kernel/Makefile kernel_new/kernel/Makefile +--- kernel/kernel/Makefile 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/kernel/Makefile 2021-04-01 18:28:07.810863115 +0800 +@@ -87,6 +87,7 @@ obj-$(CONFIG_LOCKUP_DETECTOR) += watchdo + obj-$(CONFIG_HARDLOCKUP_DETECTOR) += watchdog_hld.o + obj-$(CONFIG_SECCOMP) += seccomp.o + obj-$(CONFIG_RELAY) += relay.o ++obj-$(CONFIG_IPIPE) += ipipe/ + obj-$(CONFIG_SYSCTL) += utsname_sysctl.o + obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o + obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o +diff -uprN kernel/kernel/Makefile.orig kernel_new/kernel/Makefile.orig +--- kernel/kernel/Makefile.orig 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/kernel/Makefile.orig 2020-12-21 21:59:22.000000000 +0800 +@@ -0,0 +1,127 @@ ++# SPDX-License-Identifier: GPL-2.0 ++# ++# Makefile for the linux kernel. ++# ++ ++obj-y = fork.o exec_domain.o panic.o \ ++ cpu.o exit.o softirq.o resource.o \ ++ sysctl.o sysctl_binary.o capability.o ptrace.o user.o \ ++ signal.o sys.o umh.o workqueue.o pid.o task_work.o \ ++ extable.o params.o \ ++ kthread.o sys_ni.o nsproxy.o \ ++ notifier.o ksysfs.o cred.o reboot.o \ ++ async.o range.o smpboot.o ucount.o ktask.o ++ ++obj-$(CONFIG_MODULES) += kmod.o ++obj-$(CONFIG_MULTIUSER) += groups.o ++ ++ifdef CONFIG_FUNCTION_TRACER ++# Do not trace internal ftrace files ++CFLAGS_REMOVE_irq_work.o = $(CC_FLAGS_FTRACE) ++endif ++ ++# Prevents flicker of uninteresting __do_softirq()/__local_bh_disable_ip() ++# in coverage traces. ++KCOV_INSTRUMENT_softirq.o := n ++# These are called from save_stack_trace() on slub debug path, ++# and produce insane amounts of uninteresting coverage. ++KCOV_INSTRUMENT_module.o := n ++KCOV_INSTRUMENT_extable.o := n ++# Don't self-instrument. ++KCOV_INSTRUMENT_kcov.o := n ++KASAN_SANITIZE_kcov.o := n ++CFLAGS_kcov.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector) ++ ++# cond_syscall is currently not LTO compatible ++CFLAGS_sys_ni.o = $(DISABLE_LTO) ++ ++obj-y += sched/ ++obj-y += locking/ ++obj-y += power/ ++obj-y += printk/ ++obj-y += irq/ ++obj-y += rcu/ ++obj-y += livepatch/ ++obj-y += dma/ ++ ++obj-$(CONFIG_CHECKPOINT_RESTORE) += kcmp.o ++obj-$(CONFIG_FREEZER) += freezer.o ++obj-$(CONFIG_PROFILING) += profile.o ++obj-$(CONFIG_STACKTRACE) += stacktrace.o ++obj-y += time/ ++obj-$(CONFIG_FUTEX) += futex.o ++obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o ++obj-$(CONFIG_SMP) += smp.o ++ifneq ($(CONFIG_SMP),y) ++obj-y += up.o ++endif ++obj-$(CONFIG_UID16) += uid16.o ++obj-$(CONFIG_MODULES) += module.o ++obj-$(CONFIG_MODULE_SIG) += module_signing.o ++obj-$(CONFIG_KALLSYMS) += kallsyms.o ++obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o ++obj-$(CONFIG_CRASH_CORE) += crash_core.o ++obj-$(CONFIG_KEXEC_CORE) += kexec_core.o ++obj-$(CONFIG_KEXEC) += kexec.o ++obj-$(CONFIG_KEXEC_FILE) += kexec_file.o ++obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o ++obj-$(CONFIG_COMPAT) += compat.o ++obj-$(CONFIG_CGROUPS) += cgroup/ ++obj-$(CONFIG_UTS_NS) += utsname.o ++obj-$(CONFIG_USER_NS) += user_namespace.o ++obj-$(CONFIG_PID_NS) += pid_namespace.o ++obj-$(CONFIG_IKCONFIG) += configs.o ++obj-$(CONFIG_SMP) += stop_machine.o ++obj-$(CONFIG_KPROBES_SANITY_TEST) += test_kprobes.o ++obj-$(CONFIG_AUDIT) += audit.o auditfilter.o ++obj-$(CONFIG_AUDITSYSCALL) += auditsc.o ++obj-$(CONFIG_AUDIT_WATCH) += audit_watch.o audit_fsnotify.o ++obj-$(CONFIG_AUDIT_TREE) += audit_tree.o ++obj-$(CONFIG_GCOV_KERNEL) += gcov/ ++obj-$(CONFIG_KCOV) += kcov.o ++obj-$(CONFIG_KPROBES) += kprobes.o ++obj-$(CONFIG_FAIL_FUNCTION) += fail_function.o ++obj-$(CONFIG_KGDB) += debug/ ++obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o ++obj-$(CONFIG_LOCKUP_DETECTOR) += watchdog.o ++obj-$(CONFIG_HARDLOCKUP_DETECTOR) += watchdog_hld.o ++obj-$(CONFIG_SECCOMP) += seccomp.o ++obj-$(CONFIG_RELAY) += relay.o ++obj-$(CONFIG_SYSCTL) += utsname_sysctl.o ++obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o ++obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o ++obj-$(CONFIG_TRACEPOINTS) += tracepoint.o ++obj-$(CONFIG_LATENCYTOP) += latencytop.o ++obj-$(CONFIG_ELFCORE) += elfcore.o ++obj-$(CONFIG_FUNCTION_TRACER) += trace/ ++obj-$(CONFIG_TRACING) += trace/ ++obj-$(CONFIG_TRACE_CLOCK) += trace/ ++obj-$(CONFIG_RING_BUFFER) += trace/ ++obj-$(CONFIG_TRACEPOINTS) += trace/ ++obj-$(CONFIG_IRQ_WORK) += irq_work.o ++obj-$(CONFIG_CPU_PM) += cpu_pm.o ++obj-$(CONFIG_BPF) += bpf/ ++ ++obj-$(CONFIG_PERF_EVENTS) += events/ ++ ++obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o ++obj-$(CONFIG_PADATA) += padata.o ++obj-$(CONFIG_CRASH_DUMP) += crash_dump.o ++obj-$(CONFIG_JUMP_LABEL) += jump_label.o ++obj-$(CONFIG_CONTEXT_TRACKING) += context_tracking.o ++obj-$(CONFIG_TORTURE_TEST) += torture.o ++ ++obj-$(CONFIG_HAS_IOMEM) += iomem.o ++obj-$(CONFIG_ZONE_DEVICE) += memremap.o ++obj-$(CONFIG_RSEQ) += rseq.o ++ ++$(obj)/configs.o: $(obj)/config_data.h ++ ++targets += config_data.gz ++$(obj)/config_data.gz: $(KCONFIG_CONFIG) FORCE ++ $(call if_changed,gzip) ++ ++ filechk_ikconfiggz = (echo "static const char kernel_config_data[] __used = MAGIC_START"; cat $< | scripts/bin2c; echo "MAGIC_END;") ++targets += config_data.h ++$(obj)/config_data.h: $(obj)/config_data.gz FORCE ++ $(call filechk,ikconfiggz) +diff -uprN kernel/kernel/module.c kernel_new/kernel/module.c +--- kernel/kernel/module.c 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/kernel/module.c 2021-04-01 18:28:07.810863115 +0800 +@@ -1112,7 +1112,7 @@ bool try_module_get(struct module *modul + bool ret = true; + + if (module) { +- preempt_disable(); ++ unsigned long flags = hard_preempt_disable(); + /* Note: here, we can fail to get a reference */ + if (likely(module_is_live(module) && + atomic_inc_not_zero(&module->refcnt) != 0)) +@@ -1120,7 +1120,7 @@ bool try_module_get(struct module *modul + else + ret = false; + +- preempt_enable(); ++ hard_preempt_enable(flags); + } + return ret; + } +@@ -1131,11 +1131,11 @@ void module_put(struct module *module) + int ret; + + if (module) { +- preempt_disable(); ++ unsigned long flags = hard_preempt_disable(); + ret = atomic_dec_if_positive(&module->refcnt); + WARN_ON(ret < 0); /* Failed to put refcount */ + trace_module_put(module, _RET_IP_); +- preempt_enable(); ++ hard_preempt_enable(flags); + } + } + EXPORT_SYMBOL(module_put); +diff -uprN kernel/kernel/notifier.c kernel_new/kernel/notifier.c +--- kernel/kernel/notifier.c 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/kernel/notifier.c 2021-04-01 18:28:07.810863115 +0800 +@@ -5,6 +5,7 @@ + #include + #include + #include ++#include + + /* + * Notifier list for kernel code which wants to be called +@@ -195,6 +196,9 @@ NOKPROBE_SYMBOL(__atomic_notifier_call_c + int atomic_notifier_call_chain(struct atomic_notifier_head *nh, + unsigned long val, void *v) + { ++ if (!ipipe_root_p) ++ return notifier_call_chain(&nh->head, val, v, -1, NULL); ++ + return __atomic_notifier_call_chain(nh, val, v, -1, NULL); + } + EXPORT_SYMBOL_GPL(atomic_notifier_call_chain); +diff -uprN kernel/kernel/notifier.c.orig kernel_new/kernel/notifier.c.orig +--- kernel/kernel/notifier.c.orig 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/kernel/notifier.c.orig 2020-12-21 21:59:22.000000000 +0800 +@@ -0,0 +1,569 @@ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* ++ * Notifier list for kernel code which wants to be called ++ * at shutdown. This is used to stop any idling DMA operations ++ * and the like. ++ */ ++BLOCKING_NOTIFIER_HEAD(reboot_notifier_list); ++ ++/* ++ * Notifier chain core routines. The exported routines below ++ * are layered on top of these, with appropriate locking added. ++ */ ++ ++static int notifier_chain_register(struct notifier_block **nl, ++ struct notifier_block *n) ++{ ++ while ((*nl) != NULL) { ++ if (unlikely((*nl) == n)) { ++ WARN(1, "double register detected"); ++ return 0; ++ } ++ ++ if (n->priority > (*nl)->priority) ++ break; ++ nl = &((*nl)->next); ++ } ++ n->next = *nl; ++ rcu_assign_pointer(*nl, n); ++ return 0; ++} ++ ++static int notifier_chain_cond_register(struct notifier_block **nl, ++ struct notifier_block *n) ++{ ++ while ((*nl) != NULL) { ++ if ((*nl) == n) ++ return 0; ++ if (n->priority > (*nl)->priority) ++ break; ++ nl = &((*nl)->next); ++ } ++ n->next = *nl; ++ rcu_assign_pointer(*nl, n); ++ return 0; ++} ++ ++static int notifier_chain_unregister(struct notifier_block **nl, ++ struct notifier_block *n) ++{ ++ while ((*nl) != NULL) { ++ if ((*nl) == n) { ++ rcu_assign_pointer(*nl, n->next); ++ return 0; ++ } ++ nl = &((*nl)->next); ++ } ++ return -ENOENT; ++} ++ ++/** ++ * notifier_call_chain - Informs the registered notifiers about an event. ++ * @nl: Pointer to head of the blocking notifier chain ++ * @val: Value passed unmodified to notifier function ++ * @v: Pointer passed unmodified to notifier function ++ * @nr_to_call: Number of notifier functions to be called. Don't care ++ * value of this parameter is -1. ++ * @nr_calls: Records the number of notifications sent. Don't care ++ * value of this field is NULL. ++ * @returns: notifier_call_chain returns the value returned by the ++ * last notifier function called. ++ */ ++static int notifier_call_chain(struct notifier_block **nl, ++ unsigned long val, void *v, ++ int nr_to_call, int *nr_calls) ++{ ++ int ret = NOTIFY_DONE; ++ struct notifier_block *nb, *next_nb; ++ ++ nb = rcu_dereference_raw(*nl); ++ ++ while (nb && nr_to_call) { ++ next_nb = rcu_dereference_raw(nb->next); ++ ++#ifdef CONFIG_DEBUG_NOTIFIERS ++ if (unlikely(!func_ptr_is_kernel_text(nb->notifier_call))) { ++ WARN(1, "Invalid notifier called!"); ++ nb = next_nb; ++ continue; ++ } ++#endif ++ ret = nb->notifier_call(nb, val, v); ++ ++ if (nr_calls) ++ (*nr_calls)++; ++ ++ if (ret & NOTIFY_STOP_MASK) ++ break; ++ nb = next_nb; ++ nr_to_call--; ++ } ++ return ret; ++} ++NOKPROBE_SYMBOL(notifier_call_chain); ++ ++/* ++ * Atomic notifier chain routines. Registration and unregistration ++ * use a spinlock, and call_chain is synchronized by RCU (no locks). ++ */ ++ ++/** ++ * atomic_notifier_chain_register - Add notifier to an atomic notifier chain ++ * @nh: Pointer to head of the atomic notifier chain ++ * @n: New entry in notifier chain ++ * ++ * Adds a notifier to an atomic notifier chain. ++ * ++ * Currently always returns zero. ++ */ ++int atomic_notifier_chain_register(struct atomic_notifier_head *nh, ++ struct notifier_block *n) ++{ ++ unsigned long flags; ++ int ret; ++ ++ spin_lock_irqsave(&nh->lock, flags); ++ ret = notifier_chain_register(&nh->head, n); ++ spin_unlock_irqrestore(&nh->lock, flags); ++ return ret; ++} ++EXPORT_SYMBOL_GPL(atomic_notifier_chain_register); ++ ++/** ++ * atomic_notifier_chain_unregister - Remove notifier from an atomic notifier chain ++ * @nh: Pointer to head of the atomic notifier chain ++ * @n: Entry to remove from notifier chain ++ * ++ * Removes a notifier from an atomic notifier chain. ++ * ++ * Returns zero on success or %-ENOENT on failure. ++ */ ++int atomic_notifier_chain_unregister(struct atomic_notifier_head *nh, ++ struct notifier_block *n) ++{ ++ unsigned long flags; ++ int ret; ++ ++ spin_lock_irqsave(&nh->lock, flags); ++ ret = notifier_chain_unregister(&nh->head, n); ++ spin_unlock_irqrestore(&nh->lock, flags); ++ synchronize_rcu(); ++ return ret; ++} ++EXPORT_SYMBOL_GPL(atomic_notifier_chain_unregister); ++ ++/** ++ * __atomic_notifier_call_chain - Call functions in an atomic notifier chain ++ * @nh: Pointer to head of the atomic notifier chain ++ * @val: Value passed unmodified to notifier function ++ * @v: Pointer passed unmodified to notifier function ++ * @nr_to_call: See the comment for notifier_call_chain. ++ * @nr_calls: See the comment for notifier_call_chain. ++ * ++ * Calls each function in a notifier chain in turn. The functions ++ * run in an atomic context, so they must not block. ++ * This routine uses RCU to synchronize with changes to the chain. ++ * ++ * If the return value of the notifier can be and'ed ++ * with %NOTIFY_STOP_MASK then atomic_notifier_call_chain() ++ * will return immediately, with the return value of ++ * the notifier function which halted execution. ++ * Otherwise the return value is the return value ++ * of the last notifier function called. ++ */ ++int __atomic_notifier_call_chain(struct atomic_notifier_head *nh, ++ unsigned long val, void *v, ++ int nr_to_call, int *nr_calls) ++{ ++ int ret; ++ ++ rcu_read_lock(); ++ ret = notifier_call_chain(&nh->head, val, v, nr_to_call, nr_calls); ++ rcu_read_unlock(); ++ return ret; ++} ++EXPORT_SYMBOL_GPL(__atomic_notifier_call_chain); ++NOKPROBE_SYMBOL(__atomic_notifier_call_chain); ++ ++int atomic_notifier_call_chain(struct atomic_notifier_head *nh, ++ unsigned long val, void *v) ++{ ++ return __atomic_notifier_call_chain(nh, val, v, -1, NULL); ++} ++EXPORT_SYMBOL_GPL(atomic_notifier_call_chain); ++NOKPROBE_SYMBOL(atomic_notifier_call_chain); ++ ++/* ++ * Blocking notifier chain routines. All access to the chain is ++ * synchronized by an rwsem. ++ */ ++ ++/** ++ * blocking_notifier_chain_register - Add notifier to a blocking notifier chain ++ * @nh: Pointer to head of the blocking notifier chain ++ * @n: New entry in notifier chain ++ * ++ * Adds a notifier to a blocking notifier chain. ++ * Must be called in process context. ++ * ++ * Currently always returns zero. ++ */ ++int blocking_notifier_chain_register(struct blocking_notifier_head *nh, ++ struct notifier_block *n) ++{ ++ int ret; ++ ++ /* ++ * This code gets used during boot-up, when task switching is ++ * not yet working and interrupts must remain disabled. At ++ * such times we must not call down_write(). ++ */ ++ if (unlikely(system_state == SYSTEM_BOOTING)) ++ return notifier_chain_register(&nh->head, n); ++ ++ down_write(&nh->rwsem); ++ ret = notifier_chain_register(&nh->head, n); ++ up_write(&nh->rwsem); ++ return ret; ++} ++EXPORT_SYMBOL_GPL(blocking_notifier_chain_register); ++ ++/** ++ * blocking_notifier_chain_cond_register - Cond add notifier to a blocking notifier chain ++ * @nh: Pointer to head of the blocking notifier chain ++ * @n: New entry in notifier chain ++ * ++ * Adds a notifier to a blocking notifier chain, only if not already ++ * present in the chain. ++ * Must be called in process context. ++ * ++ * Currently always returns zero. ++ */ ++int blocking_notifier_chain_cond_register(struct blocking_notifier_head *nh, ++ struct notifier_block *n) ++{ ++ int ret; ++ ++ down_write(&nh->rwsem); ++ ret = notifier_chain_cond_register(&nh->head, n); ++ up_write(&nh->rwsem); ++ return ret; ++} ++EXPORT_SYMBOL_GPL(blocking_notifier_chain_cond_register); ++ ++/** ++ * blocking_notifier_chain_unregister - Remove notifier from a blocking notifier chain ++ * @nh: Pointer to head of the blocking notifier chain ++ * @n: Entry to remove from notifier chain ++ * ++ * Removes a notifier from a blocking notifier chain. ++ * Must be called from process context. ++ * ++ * Returns zero on success or %-ENOENT on failure. ++ */ ++int blocking_notifier_chain_unregister(struct blocking_notifier_head *nh, ++ struct notifier_block *n) ++{ ++ int ret; ++ ++ /* ++ * This code gets used during boot-up, when task switching is ++ * not yet working and interrupts must remain disabled. At ++ * such times we must not call down_write(). ++ */ ++ if (unlikely(system_state == SYSTEM_BOOTING)) ++ return notifier_chain_unregister(&nh->head, n); ++ ++ down_write(&nh->rwsem); ++ ret = notifier_chain_unregister(&nh->head, n); ++ up_write(&nh->rwsem); ++ return ret; ++} ++EXPORT_SYMBOL_GPL(blocking_notifier_chain_unregister); ++ ++/** ++ * __blocking_notifier_call_chain - Call functions in a blocking notifier chain ++ * @nh: Pointer to head of the blocking notifier chain ++ * @val: Value passed unmodified to notifier function ++ * @v: Pointer passed unmodified to notifier function ++ * @nr_to_call: See comment for notifier_call_chain. ++ * @nr_calls: See comment for notifier_call_chain. ++ * ++ * Calls each function in a notifier chain in turn. The functions ++ * run in a process context, so they are allowed to block. ++ * ++ * If the return value of the notifier can be and'ed ++ * with %NOTIFY_STOP_MASK then blocking_notifier_call_chain() ++ * will return immediately, with the return value of ++ * the notifier function which halted execution. ++ * Otherwise the return value is the return value ++ * of the last notifier function called. ++ */ ++int __blocking_notifier_call_chain(struct blocking_notifier_head *nh, ++ unsigned long val, void *v, ++ int nr_to_call, int *nr_calls) ++{ ++ int ret = NOTIFY_DONE; ++ ++ /* ++ * We check the head outside the lock, but if this access is ++ * racy then it does not matter what the result of the test ++ * is, we re-check the list after having taken the lock anyway: ++ */ ++ if (rcu_access_pointer(nh->head)) { ++ down_read(&nh->rwsem); ++ ret = notifier_call_chain(&nh->head, val, v, nr_to_call, ++ nr_calls); ++ up_read(&nh->rwsem); ++ } ++ return ret; ++} ++EXPORT_SYMBOL_GPL(__blocking_notifier_call_chain); ++ ++int blocking_notifier_call_chain(struct blocking_notifier_head *nh, ++ unsigned long val, void *v) ++{ ++ return __blocking_notifier_call_chain(nh, val, v, -1, NULL); ++} ++EXPORT_SYMBOL_GPL(blocking_notifier_call_chain); ++ ++/* ++ * Raw notifier chain routines. There is no protection; ++ * the caller must provide it. Use at your own risk! ++ */ ++ ++/** ++ * raw_notifier_chain_register - Add notifier to a raw notifier chain ++ * @nh: Pointer to head of the raw notifier chain ++ * @n: New entry in notifier chain ++ * ++ * Adds a notifier to a raw notifier chain. ++ * All locking must be provided by the caller. ++ * ++ * Currently always returns zero. ++ */ ++int raw_notifier_chain_register(struct raw_notifier_head *nh, ++ struct notifier_block *n) ++{ ++ return notifier_chain_register(&nh->head, n); ++} ++EXPORT_SYMBOL_GPL(raw_notifier_chain_register); ++ ++/** ++ * raw_notifier_chain_unregister - Remove notifier from a raw notifier chain ++ * @nh: Pointer to head of the raw notifier chain ++ * @n: Entry to remove from notifier chain ++ * ++ * Removes a notifier from a raw notifier chain. ++ * All locking must be provided by the caller. ++ * ++ * Returns zero on success or %-ENOENT on failure. ++ */ ++int raw_notifier_chain_unregister(struct raw_notifier_head *nh, ++ struct notifier_block *n) ++{ ++ return notifier_chain_unregister(&nh->head, n); ++} ++EXPORT_SYMBOL_GPL(raw_notifier_chain_unregister); ++ ++/** ++ * __raw_notifier_call_chain - Call functions in a raw notifier chain ++ * @nh: Pointer to head of the raw notifier chain ++ * @val: Value passed unmodified to notifier function ++ * @v: Pointer passed unmodified to notifier function ++ * @nr_to_call: See comment for notifier_call_chain. ++ * @nr_calls: See comment for notifier_call_chain ++ * ++ * Calls each function in a notifier chain in turn. The functions ++ * run in an undefined context. ++ * All locking must be provided by the caller. ++ * ++ * If the return value of the notifier can be and'ed ++ * with %NOTIFY_STOP_MASK then raw_notifier_call_chain() ++ * will return immediately, with the return value of ++ * the notifier function which halted execution. ++ * Otherwise the return value is the return value ++ * of the last notifier function called. ++ */ ++int __raw_notifier_call_chain(struct raw_notifier_head *nh, ++ unsigned long val, void *v, ++ int nr_to_call, int *nr_calls) ++{ ++ return notifier_call_chain(&nh->head, val, v, nr_to_call, nr_calls); ++} ++EXPORT_SYMBOL_GPL(__raw_notifier_call_chain); ++ ++int raw_notifier_call_chain(struct raw_notifier_head *nh, ++ unsigned long val, void *v) ++{ ++ return __raw_notifier_call_chain(nh, val, v, -1, NULL); ++} ++EXPORT_SYMBOL_GPL(raw_notifier_call_chain); ++ ++#ifdef CONFIG_SRCU ++/* ++ * SRCU notifier chain routines. Registration and unregistration ++ * use a mutex, and call_chain is synchronized by SRCU (no locks). ++ */ ++ ++/** ++ * srcu_notifier_chain_register - Add notifier to an SRCU notifier chain ++ * @nh: Pointer to head of the SRCU notifier chain ++ * @n: New entry in notifier chain ++ * ++ * Adds a notifier to an SRCU notifier chain. ++ * Must be called in process context. ++ * ++ * Currently always returns zero. ++ */ ++int srcu_notifier_chain_register(struct srcu_notifier_head *nh, ++ struct notifier_block *n) ++{ ++ int ret; ++ ++ /* ++ * This code gets used during boot-up, when task switching is ++ * not yet working and interrupts must remain disabled. At ++ * such times we must not call mutex_lock(). ++ */ ++ if (unlikely(system_state == SYSTEM_BOOTING)) ++ return notifier_chain_register(&nh->head, n); ++ ++ mutex_lock(&nh->mutex); ++ ret = notifier_chain_register(&nh->head, n); ++ mutex_unlock(&nh->mutex); ++ return ret; ++} ++EXPORT_SYMBOL_GPL(srcu_notifier_chain_register); ++ ++/** ++ * srcu_notifier_chain_unregister - Remove notifier from an SRCU notifier chain ++ * @nh: Pointer to head of the SRCU notifier chain ++ * @n: Entry to remove from notifier chain ++ * ++ * Removes a notifier from an SRCU notifier chain. ++ * Must be called from process context. ++ * ++ * Returns zero on success or %-ENOENT on failure. ++ */ ++int srcu_notifier_chain_unregister(struct srcu_notifier_head *nh, ++ struct notifier_block *n) ++{ ++ int ret; ++ ++ /* ++ * This code gets used during boot-up, when task switching is ++ * not yet working and interrupts must remain disabled. At ++ * such times we must not call mutex_lock(). ++ */ ++ if (unlikely(system_state == SYSTEM_BOOTING)) ++ return notifier_chain_unregister(&nh->head, n); ++ ++ mutex_lock(&nh->mutex); ++ ret = notifier_chain_unregister(&nh->head, n); ++ mutex_unlock(&nh->mutex); ++ synchronize_srcu(&nh->srcu); ++ return ret; ++} ++EXPORT_SYMBOL_GPL(srcu_notifier_chain_unregister); ++ ++/** ++ * __srcu_notifier_call_chain - Call functions in an SRCU notifier chain ++ * @nh: Pointer to head of the SRCU notifier chain ++ * @val: Value passed unmodified to notifier function ++ * @v: Pointer passed unmodified to notifier function ++ * @nr_to_call: See comment for notifier_call_chain. ++ * @nr_calls: See comment for notifier_call_chain ++ * ++ * Calls each function in a notifier chain in turn. The functions ++ * run in a process context, so they are allowed to block. ++ * ++ * If the return value of the notifier can be and'ed ++ * with %NOTIFY_STOP_MASK then srcu_notifier_call_chain() ++ * will return immediately, with the return value of ++ * the notifier function which halted execution. ++ * Otherwise the return value is the return value ++ * of the last notifier function called. ++ */ ++int __srcu_notifier_call_chain(struct srcu_notifier_head *nh, ++ unsigned long val, void *v, ++ int nr_to_call, int *nr_calls) ++{ ++ int ret; ++ int idx; ++ ++ idx = srcu_read_lock(&nh->srcu); ++ ret = notifier_call_chain(&nh->head, val, v, nr_to_call, nr_calls); ++ srcu_read_unlock(&nh->srcu, idx); ++ return ret; ++} ++EXPORT_SYMBOL_GPL(__srcu_notifier_call_chain); ++ ++int srcu_notifier_call_chain(struct srcu_notifier_head *nh, ++ unsigned long val, void *v) ++{ ++ return __srcu_notifier_call_chain(nh, val, v, -1, NULL); ++} ++EXPORT_SYMBOL_GPL(srcu_notifier_call_chain); ++ ++/** ++ * srcu_init_notifier_head - Initialize an SRCU notifier head ++ * @nh: Pointer to head of the srcu notifier chain ++ * ++ * Unlike other sorts of notifier heads, SRCU notifier heads require ++ * dynamic initialization. Be sure to call this routine before ++ * calling any of the other SRCU notifier routines for this head. ++ * ++ * If an SRCU notifier head is deallocated, it must first be cleaned ++ * up by calling srcu_cleanup_notifier_head(). Otherwise the head's ++ * per-cpu data (used by the SRCU mechanism) will leak. ++ */ ++void srcu_init_notifier_head(struct srcu_notifier_head *nh) ++{ ++ mutex_init(&nh->mutex); ++ if (init_srcu_struct(&nh->srcu) < 0) ++ BUG(); ++ nh->head = NULL; ++} ++EXPORT_SYMBOL_GPL(srcu_init_notifier_head); ++ ++#endif /* CONFIG_SRCU */ ++ ++static ATOMIC_NOTIFIER_HEAD(die_chain); ++ ++int notrace notify_die(enum die_val val, const char *str, ++ struct pt_regs *regs, long err, int trap, int sig) ++{ ++ struct die_args args = { ++ .regs = regs, ++ .str = str, ++ .err = err, ++ .trapnr = trap, ++ .signr = sig, ++ ++ }; ++ RCU_LOCKDEP_WARN(!rcu_is_watching(), ++ "notify_die called but RCU thinks we're quiescent"); ++ return atomic_notifier_call_chain(&die_chain, val, &args); ++} ++NOKPROBE_SYMBOL(notify_die); ++ ++int register_die_notifier(struct notifier_block *nb) ++{ ++ vmalloc_sync_mappings(); ++ return atomic_notifier_chain_register(&die_chain, nb); ++} ++EXPORT_SYMBOL_GPL(register_die_notifier); ++ ++int unregister_die_notifier(struct notifier_block *nb) ++{ ++ return atomic_notifier_chain_unregister(&die_chain, nb); ++} ++EXPORT_SYMBOL_GPL(unregister_die_notifier); +diff -uprN kernel/kernel/panic.c kernel_new/kernel/panic.c +--- kernel/kernel/panic.c 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/kernel/panic.c 2021-04-01 18:28:07.810863115 +0800 +@@ -20,8 +20,10 @@ + #include + #include + #include ++#include + #include + #include ++#include + #include + #include + #include +@@ -523,6 +525,8 @@ void oops_enter(void) + { + tracing_off(); + /* can't trust the integrity of the kernel anymore: */ ++ ipipe_trace_panic_freeze(); ++ ipipe_disable_context_check(); + debug_locks_off(); + do_oops_enter_exit(); + } +diff -uprN kernel/kernel/panic.c.orig kernel_new/kernel/panic.c.orig +--- kernel/kernel/panic.c.orig 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/kernel/panic.c.orig 2020-12-21 21:59:22.000000000 +0800 +@@ -0,0 +1,725 @@ ++/* ++ * linux/kernel/panic.c ++ * ++ * Copyright (C) 1991, 1992 Linus Torvalds ++ */ ++ ++/* ++ * This function is used through-out the kernel (including mm and fs) ++ * to indicate a major problem. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#define PANIC_TIMER_STEP 100 ++#define PANIC_BLINK_SPD 18 ++ ++int panic_on_oops = CONFIG_PANIC_ON_OOPS_VALUE; ++static unsigned long tainted_mask = ++ IS_ENABLED(CONFIG_GCC_PLUGIN_RANDSTRUCT) ? (1 << TAINT_RANDSTRUCT) : 0; ++static int pause_on_oops; ++static int pause_on_oops_flag; ++static DEFINE_SPINLOCK(pause_on_oops_lock); ++bool crash_kexec_post_notifiers; ++int panic_on_warn __read_mostly; ++ ++int panic_timeout = CONFIG_PANIC_TIMEOUT; ++EXPORT_SYMBOL_GPL(panic_timeout); ++ ++#define PANIC_PRINT_TASK_INFO 0x00000001 ++#define PANIC_PRINT_MEM_INFO 0x00000002 ++#define PANIC_PRINT_TIMER_INFO 0x00000004 ++#define PANIC_PRINT_LOCK_INFO 0x00000008 ++#define PANIC_PRINT_FTRACE_INFO 0x00000010 ++unsigned long panic_print; ++ ++ATOMIC_NOTIFIER_HEAD(panic_notifier_list); ++ ++EXPORT_SYMBOL(panic_notifier_list); ++ ++static long no_blink(int state) ++{ ++ return 0; ++} ++ ++/* Returns how long it waited in ms */ ++long (*panic_blink)(int state); ++EXPORT_SYMBOL(panic_blink); ++ ++/* ++ * Stop ourself in panic -- architecture code may override this ++ */ ++void __weak panic_smp_self_stop(void) ++{ ++ while (1) ++ cpu_relax(); ++} ++ ++/* ++ * Stop ourselves in NMI context if another CPU has already panicked. Arch code ++ * may override this to prepare for crash dumping, e.g. save regs info. ++ */ ++void __weak nmi_panic_self_stop(struct pt_regs *regs) ++{ ++ panic_smp_self_stop(); ++} ++ ++/* ++ * Stop other CPUs in panic. Architecture dependent code may override this ++ * with more suitable version. For example, if the architecture supports ++ * crash dump, it should save registers of each stopped CPU and disable ++ * per-CPU features such as virtualization extensions. ++ */ ++void __weak crash_smp_send_stop(void) ++{ ++ static int cpus_stopped; ++ ++ /* ++ * This function can be called twice in panic path, but obviously ++ * we execute this only once. ++ */ ++ if (cpus_stopped) ++ return; ++ ++ /* ++ * Note smp_send_stop is the usual smp shutdown function, which ++ * unfortunately means it may not be hardened to work in a panic ++ * situation. ++ */ ++ smp_send_stop(); ++ cpus_stopped = 1; ++} ++ ++atomic_t panic_cpu = ATOMIC_INIT(PANIC_CPU_INVALID); ++ ++/* ++ * A variant of panic() called from NMI context. We return if we've already ++ * panicked on this CPU. If another CPU already panicked, loop in ++ * nmi_panic_self_stop() which can provide architecture dependent code such ++ * as saving register state for crash dump. ++ */ ++void nmi_panic(struct pt_regs *regs, const char *msg) ++{ ++ int old_cpu, cpu; ++ ++ cpu = raw_smp_processor_id(); ++ old_cpu = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, cpu); ++ ++ if (old_cpu == PANIC_CPU_INVALID) ++ panic("%s", msg); ++ else if (old_cpu != cpu) ++ nmi_panic_self_stop(regs); ++} ++EXPORT_SYMBOL(nmi_panic); ++ ++static void panic_print_sys_info(void) ++{ ++ if (panic_print & PANIC_PRINT_TASK_INFO) ++ show_state(); ++ ++ if (panic_print & PANIC_PRINT_MEM_INFO) ++ show_mem(0, NULL); ++ ++ if (panic_print & PANIC_PRINT_TIMER_INFO) ++ sysrq_timer_list_show(); ++ ++ if (panic_print & PANIC_PRINT_LOCK_INFO) ++ debug_show_all_locks(); ++ ++ if (panic_print & PANIC_PRINT_FTRACE_INFO) ++ ftrace_dump(DUMP_ALL); ++} ++ ++/** ++ * panic - halt the system ++ * @fmt: The text string to print ++ * ++ * Display a message, then perform cleanups. ++ * ++ * This function never returns. ++ */ ++void panic(const char *fmt, ...) ++{ ++ static char buf[1024]; ++ va_list args; ++ long i, i_next = 0; ++ int state = 0; ++ int old_cpu, this_cpu; ++ bool _crash_kexec_post_notifiers = crash_kexec_post_notifiers; ++ ++ /* ++ * Disable local interrupts. This will prevent panic_smp_self_stop ++ * from deadlocking the first cpu that invokes the panic, since ++ * there is nothing to prevent an interrupt handler (that runs ++ * after setting panic_cpu) from invoking panic() again. ++ */ ++ local_irq_disable(); ++ preempt_disable_notrace(); ++ ++ /* ++ * It's possible to come here directly from a panic-assertion and ++ * not have preempt disabled. Some functions called from here want ++ * preempt to be disabled. No point enabling it later though... ++ * ++ * Only one CPU is allowed to execute the panic code from here. For ++ * multiple parallel invocations of panic, all other CPUs either ++ * stop themself or will wait until they are stopped by the 1st CPU ++ * with smp_send_stop(). ++ * ++ * `old_cpu == PANIC_CPU_INVALID' means this is the 1st CPU which ++ * comes here, so go ahead. ++ * `old_cpu == this_cpu' means we came from nmi_panic() which sets ++ * panic_cpu to this CPU. In this case, this is also the 1st CPU. ++ */ ++ this_cpu = raw_smp_processor_id(); ++ old_cpu = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, this_cpu); ++ ++ if (old_cpu != PANIC_CPU_INVALID && old_cpu != this_cpu) ++ panic_smp_self_stop(); ++ ++ console_verbose(); ++ bust_spinlocks(1); ++ va_start(args, fmt); ++ vsnprintf(buf, sizeof(buf), fmt, args); ++ va_end(args); ++ pr_emerg("Kernel panic - not syncing: %s\n", buf); ++#ifdef CONFIG_DEBUG_BUGVERBOSE ++ /* ++ * Avoid nested stack-dumping if a panic occurs during oops processing ++ */ ++ if (!test_taint(TAINT_DIE) && oops_in_progress <= 1) ++ dump_stack(); ++#endif ++ ++ /* ++ * If we have crashed and we have a crash kernel loaded let it handle ++ * everything else. ++ * If we want to run this after calling panic_notifiers, pass ++ * the "crash_kexec_post_notifiers" option to the kernel. ++ * ++ * Bypass the panic_cpu check and call __crash_kexec directly. ++ */ ++ if (!_crash_kexec_post_notifiers) { ++ printk_safe_flush_on_panic(); ++ __crash_kexec(NULL); ++ ++ /* ++ * Note smp_send_stop is the usual smp shutdown function, which ++ * unfortunately means it may not be hardened to work in a ++ * panic situation. ++ */ ++ smp_send_stop(); ++ } else { ++ /* ++ * If we want to do crash dump after notifier calls and ++ * kmsg_dump, we will need architecture dependent extra ++ * works in addition to stopping other CPUs. ++ */ ++ crash_smp_send_stop(); ++ } ++ ++ /* ++ * ZAP console related locks when nmi broadcast. If a crash is occurring, ++ * make sure we can't deadlock. And make sure that we print immediately. ++ * ++ * A deadlock caused by logbuf_lock can be occured when panic: ++ * a) Panic CPU is running in non-NMI context; ++ * b) Panic CPU sends out shutdown IPI via NMI vector; ++ * c) One of the CPUs that we bring down via NMI vector holded logbuf_lock; ++ * d) Panic CPU try to hold logbuf_lock, then deadlock occurs. ++ * ++ * At present, only try to solve this problem for the ARCH with NMI, ++ * by reinit lock, this situation is more complicated when NMI is not ++ * used. ++ * 1). Non-stopped CPUs are in unknown state, most likely in a busy loop. ++ * Nobody knows whether printk() is repeatedly called in the loop. ++ * When it was called, re-initializing any lock would cause double ++ * unlock and deadlock. ++ * ++ * 2). It would be possible to add some more hacks. One problem is that ++ * there are two groups of users. One prefer to risk a deadlock and ++ * have a chance to see the messages. Others prefer to always ++ * reach emergency_restart() and reboot the machine. ++ */ ++#ifdef CONFIG_X86 ++ zap_locks(); ++#endif ++ ++ /* ++ * Run any panic handlers, including those that might need to ++ * add information to the kmsg dump output. ++ */ ++ atomic_notifier_call_chain(&panic_notifier_list, 0, buf); ++ ++ /* Call flush even twice. It tries harder with a single online CPU */ ++ printk_safe_flush_on_panic(); ++ kmsg_dump(KMSG_DUMP_PANIC); ++ ++ /* ++ * If you doubt kdump always works fine in any situation, ++ * "crash_kexec_post_notifiers" offers you a chance to run ++ * panic_notifiers and dumping kmsg before kdump. ++ * Note: since some panic_notifiers can make crashed kernel ++ * more unstable, it can increase risks of the kdump failure too. ++ * ++ * Bypass the panic_cpu check and call __crash_kexec directly. ++ */ ++ if (_crash_kexec_post_notifiers) ++ __crash_kexec(NULL); ++ ++#ifdef CONFIG_VT ++ unblank_screen(); ++#endif ++ console_unblank(); ++ ++ /* ++ * We may have ended up stopping the CPU holding the lock (in ++ * smp_send_stop()) while still having some valuable data in the console ++ * buffer. Try to acquire the lock then release it regardless of the ++ * result. The release will also print the buffers out. Locks debug ++ * should be disabled to avoid reporting bad unlock balance when ++ * panic() is not being callled from OOPS. ++ */ ++ debug_locks_off(); ++ console_flush_on_panic(); ++ ++ panic_print_sys_info(); ++ ++ if (!panic_blink) ++ panic_blink = no_blink; ++ ++ if (panic_timeout > 0) { ++ /* ++ * Delay timeout seconds before rebooting the machine. ++ * We can't use the "normal" timers since we just panicked. ++ */ ++ pr_emerg("Rebooting in %d seconds..\n", panic_timeout); ++ ++ for (i = 0; i < panic_timeout * 1000; i += PANIC_TIMER_STEP) { ++ touch_nmi_watchdog(); ++ if (i >= i_next) { ++ i += panic_blink(state ^= 1); ++ i_next = i + 3600 / PANIC_BLINK_SPD; ++ } ++ mdelay(PANIC_TIMER_STEP); ++ } ++ } ++ if (panic_timeout != 0) { ++ /* ++ * This will not be a clean reboot, with everything ++ * shutting down. But if there is a chance of ++ * rebooting the system it will be rebooted. ++ */ ++ emergency_restart(); ++ } ++#ifdef __sparc__ ++ { ++ extern int stop_a_enabled; ++ /* Make sure the user can actually press Stop-A (L1-A) */ ++ stop_a_enabled = 1; ++ pr_emerg("Press Stop-A (L1-A) from sun keyboard or send break\n" ++ "twice on console to return to the boot prom\n"); ++ } ++#endif ++#if defined(CONFIG_S390) ++ { ++ unsigned long caller; ++ ++ caller = (unsigned long)__builtin_return_address(0); ++ disabled_wait(caller); ++ } ++#endif ++ pr_emerg("---[ end Kernel panic - not syncing: %s ]---\n", buf); ++ local_irq_enable(); ++ for (i = 0; ; i += PANIC_TIMER_STEP) { ++ touch_softlockup_watchdog(); ++ if (i >= i_next) { ++ i += panic_blink(state ^= 1); ++ i_next = i + 3600 / PANIC_BLINK_SPD; ++ } ++ mdelay(PANIC_TIMER_STEP); ++ } ++} ++ ++EXPORT_SYMBOL(panic); ++ ++/* ++ * TAINT_FORCED_RMMOD could be a per-module flag but the module ++ * is being removed anyway. ++ */ ++const struct taint_flag taint_flags[TAINT_FLAGS_COUNT] = { ++ [ TAINT_PROPRIETARY_MODULE ] = { 'P', 'G', true }, ++ [ TAINT_FORCED_MODULE ] = { 'F', ' ', true }, ++ [ TAINT_CPU_OUT_OF_SPEC ] = { 'S', ' ', false }, ++ [ TAINT_FORCED_RMMOD ] = { 'R', ' ', false }, ++ [ TAINT_MACHINE_CHECK ] = { 'M', ' ', false }, ++ [ TAINT_BAD_PAGE ] = { 'B', ' ', false }, ++ [ TAINT_USER ] = { 'U', ' ', false }, ++ [ TAINT_DIE ] = { 'D', ' ', false }, ++ [ TAINT_OVERRIDDEN_ACPI_TABLE ] = { 'A', ' ', false }, ++ [ TAINT_WARN ] = { 'W', ' ', false }, ++ [ TAINT_CRAP ] = { 'C', ' ', true }, ++ [ TAINT_FIRMWARE_WORKAROUND ] = { 'I', ' ', false }, ++ [ TAINT_OOT_MODULE ] = { 'O', ' ', true }, ++ [ TAINT_UNSIGNED_MODULE ] = { 'E', ' ', true }, ++ [ TAINT_SOFTLOCKUP ] = { 'L', ' ', false }, ++ [ TAINT_LIVEPATCH ] = { 'K', ' ', true }, ++ [ TAINT_AUX ] = { 'X', ' ', true }, ++ [ TAINT_RANDSTRUCT ] = { 'T', ' ', true }, ++}; ++ ++/** ++ * print_tainted - return a string to represent the kernel taint state. ++ * ++ * For individual taint flag meanings, see Documentation/sysctl/kernel.txt ++ * ++ * The string is overwritten by the next call to print_tainted(), ++ * but is always NULL terminated. ++ */ ++const char *print_tainted(void) ++{ ++ static char buf[TAINT_FLAGS_COUNT + sizeof("Tainted: ")]; ++ ++ BUILD_BUG_ON(ARRAY_SIZE(taint_flags) != TAINT_FLAGS_COUNT); ++ ++ if (tainted_mask) { ++ char *s; ++ int i; ++ ++ s = buf + sprintf(buf, "Tainted: "); ++ for (i = 0; i < TAINT_FLAGS_COUNT; i++) { ++ const struct taint_flag *t = &taint_flags[i]; ++ *s++ = test_bit(i, &tainted_mask) ? ++ t->c_true : t->c_false; ++ } ++ *s = 0; ++ } else ++ snprintf(buf, sizeof(buf), "Not tainted"); ++ ++ return buf; ++} ++ ++int test_taint(unsigned flag) ++{ ++ return test_bit(flag, &tainted_mask); ++} ++EXPORT_SYMBOL(test_taint); ++ ++unsigned long get_taint(void) ++{ ++ return tainted_mask; ++} ++ ++/** ++ * add_taint: add a taint flag if not already set. ++ * @flag: one of the TAINT_* constants. ++ * @lockdep_ok: whether lock debugging is still OK. ++ * ++ * If something bad has gone wrong, you'll want @lockdebug_ok = false, but for ++ * some notewortht-but-not-corrupting cases, it can be set to true. ++ */ ++void add_taint(unsigned flag, enum lockdep_ok lockdep_ok) ++{ ++ if (lockdep_ok == LOCKDEP_NOW_UNRELIABLE && __debug_locks_off()) ++ pr_warn("Disabling lock debugging due to kernel taint\n"); ++ ++ set_bit(flag, &tainted_mask); ++} ++EXPORT_SYMBOL(add_taint); ++ ++static void spin_msec(int msecs) ++{ ++ int i; ++ ++ for (i = 0; i < msecs; i++) { ++ touch_nmi_watchdog(); ++ mdelay(1); ++ } ++} ++ ++/* ++ * It just happens that oops_enter() and oops_exit() are identically ++ * implemented... ++ */ ++static void do_oops_enter_exit(void) ++{ ++ unsigned long flags; ++ static int spin_counter; ++ ++ if (!pause_on_oops) ++ return; ++ ++ spin_lock_irqsave(&pause_on_oops_lock, flags); ++ if (pause_on_oops_flag == 0) { ++ /* This CPU may now print the oops message */ ++ pause_on_oops_flag = 1; ++ } else { ++ /* We need to stall this CPU */ ++ if (!spin_counter) { ++ /* This CPU gets to do the counting */ ++ spin_counter = pause_on_oops; ++ do { ++ spin_unlock(&pause_on_oops_lock); ++ spin_msec(MSEC_PER_SEC); ++ spin_lock(&pause_on_oops_lock); ++ } while (--spin_counter); ++ pause_on_oops_flag = 0; ++ } else { ++ /* This CPU waits for a different one */ ++ while (spin_counter) { ++ spin_unlock(&pause_on_oops_lock); ++ spin_msec(1); ++ spin_lock(&pause_on_oops_lock); ++ } ++ } ++ } ++ spin_unlock_irqrestore(&pause_on_oops_lock, flags); ++} ++ ++/* ++ * Return true if the calling CPU is allowed to print oops-related info. ++ * This is a bit racy.. ++ */ ++int oops_may_print(void) ++{ ++ return pause_on_oops_flag == 0; ++} ++ ++/* ++ * Called when the architecture enters its oops handler, before it prints ++ * anything. If this is the first CPU to oops, and it's oopsing the first ++ * time then let it proceed. ++ * ++ * This is all enabled by the pause_on_oops kernel boot option. We do all ++ * this to ensure that oopses don't scroll off the screen. It has the ++ * side-effect of preventing later-oopsing CPUs from mucking up the display, ++ * too. ++ * ++ * It turns out that the CPU which is allowed to print ends up pausing for ++ * the right duration, whereas all the other CPUs pause for twice as long: ++ * once in oops_enter(), once in oops_exit(). ++ */ ++void oops_enter(void) ++{ ++ tracing_off(); ++ /* can't trust the integrity of the kernel anymore: */ ++ debug_locks_off(); ++ do_oops_enter_exit(); ++} ++ ++/* ++ * 64-bit random ID for oopses: ++ */ ++static u64 oops_id; ++ ++static int init_oops_id(void) ++{ ++ if (!oops_id) ++ get_random_bytes(&oops_id, sizeof(oops_id)); ++ else ++ oops_id++; ++ ++ return 0; ++} ++late_initcall(init_oops_id); ++ ++void print_oops_end_marker(void) ++{ ++ init_oops_id(); ++ pr_warn("---[ end trace %016llx ]---\n", (unsigned long long)oops_id); ++} ++ ++/* ++ * Called when the architecture exits its oops handler, after printing ++ * everything. ++ */ ++void oops_exit(void) ++{ ++ do_oops_enter_exit(); ++ print_oops_end_marker(); ++ kmsg_dump(KMSG_DUMP_OOPS); ++} ++ ++struct warn_args { ++ const char *fmt; ++ va_list args; ++}; ++ ++void __warn(const char *file, int line, void *caller, unsigned taint, ++ struct pt_regs *regs, struct warn_args *args) ++{ ++ disable_trace_on_warning(); ++ ++ if (args) ++ pr_warn(CUT_HERE); ++ ++ if (file) ++ pr_warn("WARNING: CPU: %d PID: %d at %s:%d %pS\n", ++ raw_smp_processor_id(), current->pid, file, line, ++ caller); ++ else ++ pr_warn("WARNING: CPU: %d PID: %d at %pS\n", ++ raw_smp_processor_id(), current->pid, caller); ++ ++ if (args) ++ vprintk(args->fmt, args->args); ++ ++ if (panic_on_warn) { ++ /* ++ * This thread may hit another WARN() in the panic path. ++ * Resetting this prevents additional WARN() from panicking the ++ * system on this thread. Other threads are blocked by the ++ * panic_mutex in panic(). ++ */ ++ panic_on_warn = 0; ++ panic("panic_on_warn set ...\n"); ++ } ++ ++ print_modules(); ++ ++ if (regs) ++ show_regs(regs); ++ else ++ dump_stack(); ++ ++ print_irqtrace_events(current); ++ ++ print_oops_end_marker(); ++ ++ /* Just a warning, don't kill lockdep. */ ++ add_taint(taint, LOCKDEP_STILL_OK); ++} ++ ++#ifdef WANT_WARN_ON_SLOWPATH ++void warn_slowpath_fmt(const char *file, int line, const char *fmt, ...) ++{ ++ struct warn_args args; ++ ++ args.fmt = fmt; ++ va_start(args.args, fmt); ++ __warn(file, line, __builtin_return_address(0), TAINT_WARN, NULL, ++ &args); ++ va_end(args.args); ++} ++EXPORT_SYMBOL(warn_slowpath_fmt); ++ ++void warn_slowpath_fmt_taint(const char *file, int line, ++ unsigned taint, const char *fmt, ...) ++{ ++ struct warn_args args; ++ ++ args.fmt = fmt; ++ va_start(args.args, fmt); ++ __warn(file, line, __builtin_return_address(0), taint, NULL, &args); ++ va_end(args.args); ++} ++EXPORT_SYMBOL(warn_slowpath_fmt_taint); ++ ++void warn_slowpath_null(const char *file, int line) ++{ ++ pr_warn(CUT_HERE); ++ __warn(file, line, __builtin_return_address(0), TAINT_WARN, NULL, NULL); ++} ++EXPORT_SYMBOL(warn_slowpath_null); ++#else ++void __warn_printk(const char *fmt, ...) ++{ ++ va_list args; ++ ++ pr_warn(CUT_HERE); ++ ++ va_start(args, fmt); ++ vprintk(fmt, args); ++ va_end(args); ++} ++EXPORT_SYMBOL(__warn_printk); ++#endif ++ ++#ifdef CONFIG_BUG ++ ++/* Support resetting WARN*_ONCE state */ ++ ++static int clear_warn_once_set(void *data, u64 val) ++{ ++ generic_bug_clear_once(); ++ memset(__start_once, 0, __end_once - __start_once); ++ return 0; ++} ++ ++DEFINE_SIMPLE_ATTRIBUTE(clear_warn_once_fops, ++ NULL, ++ clear_warn_once_set, ++ "%lld\n"); ++ ++static __init int register_warn_debugfs(void) ++{ ++ /* Don't care about failure */ ++ debugfs_create_file("clear_warn_once", 0200, NULL, ++ NULL, &clear_warn_once_fops); ++ return 0; ++} ++ ++device_initcall(register_warn_debugfs); ++#endif ++ ++#ifdef CONFIG_STACKPROTECTOR ++ ++/* ++ * Called when gcc's -fstack-protector feature is used, and ++ * gcc detects corruption of the on-stack canary value ++ */ ++__visible void __stack_chk_fail(void) ++{ ++ panic("stack-protector: Kernel stack is corrupted in: %pB", ++ __builtin_return_address(0)); ++} ++EXPORT_SYMBOL(__stack_chk_fail); ++ ++#endif ++ ++#ifdef CONFIG_ARCH_HAS_REFCOUNT ++void refcount_error_report(struct pt_regs *regs, const char *err) ++{ ++ WARN_RATELIMIT(1, "refcount_t %s at %pB in %s[%d], uid/euid: %u/%u\n", ++ err, (void *)instruction_pointer(regs), ++ current->comm, task_pid_nr(current), ++ from_kuid_munged(&init_user_ns, current_uid()), ++ from_kuid_munged(&init_user_ns, current_euid())); ++} ++#endif ++ ++core_param(panic, panic_timeout, int, 0644); ++core_param(panic_print, panic_print, ulong, 0644); ++core_param(pause_on_oops, pause_on_oops, int, 0644); ++core_param(panic_on_warn, panic_on_warn, int, 0644); ++core_param(crash_kexec_post_notifiers, crash_kexec_post_notifiers, bool, 0644); ++ ++static int __init oops_setup(char *s) ++{ ++ if (!s) ++ return -EINVAL; ++ if (!strcmp(s, "panic")) ++ panic_on_oops = 1; ++ return 0; ++} ++early_param("oops", oops_setup); +diff -uprN kernel/kernel/power/hibernate.c kernel_new/kernel/power/hibernate.c +--- kernel/kernel/power/hibernate.c 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/kernel/power/hibernate.c 2021-04-01 18:28:07.811863114 +0800 +@@ -300,6 +300,7 @@ static int create_image(int platform_mod + goto Enable_cpus; + + local_irq_disable(); ++ hard_cond_local_irq_disable(); + + system_state = SYSTEM_SUSPEND; + +@@ -467,6 +468,7 @@ static int resume_target_kernel(bool pla + + local_irq_disable(); + system_state = SYSTEM_SUSPEND; ++ hard_cond_local_irq_disable(); + + error = syscore_suspend(); + if (error) +@@ -588,6 +590,7 @@ int hibernation_platform_enter(void) + + local_irq_disable(); + system_state = SYSTEM_SUSPEND; ++ hard_cond_local_irq_disable(); + syscore_suspend(); + if (pm_wakeup_pending()) { + error = -EAGAIN; +diff -uprN kernel/kernel/power/hibernate.c.orig kernel_new/kernel/power/hibernate.c.orig +--- kernel/kernel/power/hibernate.c.orig 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/kernel/power/hibernate.c.orig 2020-12-21 21:59:22.000000000 +0800 +@@ -0,0 +1,1254 @@ ++/* ++ * kernel/power/hibernate.c - Hibernation (a.k.a suspend-to-disk) support. ++ * ++ * Copyright (c) 2003 Patrick Mochel ++ * Copyright (c) 2003 Open Source Development Lab ++ * Copyright (c) 2004 Pavel Machek ++ * Copyright (c) 2009 Rafael J. Wysocki, Novell Inc. ++ * Copyright (C) 2012 Bojan Smojver ++ * ++ * This file is released under the GPLv2. ++ */ ++ ++#define pr_fmt(fmt) "PM: " fmt ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "power.h" ++ ++ ++static int nocompress; ++static int noresume; ++static int nohibernate; ++static int resume_wait; ++static unsigned int resume_delay; ++static char resume_file[256] = CONFIG_PM_STD_PARTITION; ++dev_t swsusp_resume_device; ++sector_t swsusp_resume_block; ++__visible int in_suspend __nosavedata; ++ ++enum { ++ HIBERNATION_INVALID, ++ HIBERNATION_PLATFORM, ++ HIBERNATION_SHUTDOWN, ++ HIBERNATION_REBOOT, ++#ifdef CONFIG_SUSPEND ++ HIBERNATION_SUSPEND, ++#endif ++ HIBERNATION_TEST_RESUME, ++ /* keep last */ ++ __HIBERNATION_AFTER_LAST ++}; ++#define HIBERNATION_MAX (__HIBERNATION_AFTER_LAST-1) ++#define HIBERNATION_FIRST (HIBERNATION_INVALID + 1) ++ ++static int hibernation_mode = HIBERNATION_SHUTDOWN; ++ ++bool freezer_test_done; ++ ++static const struct platform_hibernation_ops *hibernation_ops; ++ ++bool hibernation_available(void) ++{ ++ return (nohibernate == 0); ++} ++ ++/** ++ * hibernation_set_ops - Set the global hibernate operations. ++ * @ops: Hibernation operations to use in subsequent hibernation transitions. ++ */ ++void hibernation_set_ops(const struct platform_hibernation_ops *ops) ++{ ++ if (ops && !(ops->begin && ops->end && ops->pre_snapshot ++ && ops->prepare && ops->finish && ops->enter && ops->pre_restore ++ && ops->restore_cleanup && ops->leave)) { ++ WARN_ON(1); ++ return; ++ } ++ lock_system_sleep(); ++ hibernation_ops = ops; ++ if (ops) ++ hibernation_mode = HIBERNATION_PLATFORM; ++ else if (hibernation_mode == HIBERNATION_PLATFORM) ++ hibernation_mode = HIBERNATION_SHUTDOWN; ++ ++ unlock_system_sleep(); ++} ++EXPORT_SYMBOL_GPL(hibernation_set_ops); ++ ++static bool entering_platform_hibernation; ++ ++bool system_entering_hibernation(void) ++{ ++ return entering_platform_hibernation; ++} ++EXPORT_SYMBOL(system_entering_hibernation); ++ ++/* To let some devices or syscore know if system carrying out hibernation*/ ++static bool carry_out_hibernation; ++ ++bool system_in_hibernation(void) ++{ ++ return carry_out_hibernation; ++} ++EXPORT_SYMBOL(system_in_hibernation); ++ ++#ifdef CONFIG_PM_DEBUG ++static void hibernation_debug_sleep(void) ++{ ++ pr_info("hibernation debug: Waiting for 5 seconds.\n"); ++ mdelay(5000); ++} ++ ++static int hibernation_test(int level) ++{ ++ if (pm_test_level == level) { ++ hibernation_debug_sleep(); ++ return 1; ++ } ++ return 0; ++} ++#else /* !CONFIG_PM_DEBUG */ ++static int hibernation_test(int level) { return 0; } ++#endif /* !CONFIG_PM_DEBUG */ ++ ++/** ++ * platform_begin - Call platform to start hibernation. ++ * @platform_mode: Whether or not to use the platform driver. ++ */ ++static int platform_begin(int platform_mode) ++{ ++ return (platform_mode && hibernation_ops) ? ++ hibernation_ops->begin() : 0; ++} ++ ++/** ++ * platform_end - Call platform to finish transition to the working state. ++ * @platform_mode: Whether or not to use the platform driver. ++ */ ++static void platform_end(int platform_mode) ++{ ++ if (platform_mode && hibernation_ops) ++ hibernation_ops->end(); ++} ++ ++/** ++ * platform_pre_snapshot - Call platform to prepare the machine for hibernation. ++ * @platform_mode: Whether or not to use the platform driver. ++ * ++ * Use the platform driver to prepare the system for creating a hibernate image, ++ * if so configured, and return an error code if that fails. ++ */ ++ ++static int platform_pre_snapshot(int platform_mode) ++{ ++ return (platform_mode && hibernation_ops) ? ++ hibernation_ops->pre_snapshot() : 0; ++} ++ ++/** ++ * platform_leave - Call platform to prepare a transition to the working state. ++ * @platform_mode: Whether or not to use the platform driver. ++ * ++ * Use the platform driver prepare to prepare the machine for switching to the ++ * normal mode of operation. ++ * ++ * This routine is called on one CPU with interrupts disabled. ++ */ ++static void platform_leave(int platform_mode) ++{ ++ if (platform_mode && hibernation_ops) ++ hibernation_ops->leave(); ++} ++ ++/** ++ * platform_finish - Call platform to switch the system to the working state. ++ * @platform_mode: Whether or not to use the platform driver. ++ * ++ * Use the platform driver to switch the machine to the normal mode of ++ * operation. ++ * ++ * This routine must be called after platform_prepare(). ++ */ ++static void platform_finish(int platform_mode) ++{ ++ if (platform_mode && hibernation_ops) ++ hibernation_ops->finish(); ++} ++ ++/** ++ * platform_pre_restore - Prepare for hibernate image restoration. ++ * @platform_mode: Whether or not to use the platform driver. ++ * ++ * Use the platform driver to prepare the system for resume from a hibernation ++ * image. ++ * ++ * If the restore fails after this function has been called, ++ * platform_restore_cleanup() must be called. ++ */ ++static int platform_pre_restore(int platform_mode) ++{ ++ return (platform_mode && hibernation_ops) ? ++ hibernation_ops->pre_restore() : 0; ++} ++ ++/** ++ * platform_restore_cleanup - Switch to the working state after failing restore. ++ * @platform_mode: Whether or not to use the platform driver. ++ * ++ * Use the platform driver to switch the system to the normal mode of operation ++ * after a failing restore. ++ * ++ * If platform_pre_restore() has been called before the failing restore, this ++ * function must be called too, regardless of the result of ++ * platform_pre_restore(). ++ */ ++static void platform_restore_cleanup(int platform_mode) ++{ ++ if (platform_mode && hibernation_ops) ++ hibernation_ops->restore_cleanup(); ++} ++ ++/** ++ * platform_recover - Recover from a failure to suspend devices. ++ * @platform_mode: Whether or not to use the platform driver. ++ */ ++static void platform_recover(int platform_mode) ++{ ++ if (platform_mode && hibernation_ops && hibernation_ops->recover) ++ hibernation_ops->recover(); ++} ++ ++/** ++ * swsusp_show_speed - Print time elapsed between two events during hibernation. ++ * @start: Starting event. ++ * @stop: Final event. ++ * @nr_pages: Number of memory pages processed between @start and @stop. ++ * @msg: Additional diagnostic message to print. ++ */ ++void swsusp_show_speed(ktime_t start, ktime_t stop, ++ unsigned nr_pages, char *msg) ++{ ++ ktime_t diff; ++ u64 elapsed_centisecs64; ++ unsigned int centisecs; ++ unsigned int k; ++ unsigned int kps; ++ ++ diff = ktime_sub(stop, start); ++ elapsed_centisecs64 = ktime_divns(diff, 10*NSEC_PER_MSEC); ++ centisecs = elapsed_centisecs64; ++ if (centisecs == 0) ++ centisecs = 1; /* avoid div-by-zero */ ++ k = nr_pages * (PAGE_SIZE / 1024); ++ kps = (k * 100) / centisecs; ++ pr_info("%s %u kbytes in %u.%02u seconds (%u.%02u MB/s)\n", ++ msg, k, centisecs / 100, centisecs % 100, kps / 1000, ++ (kps % 1000) / 10); ++} ++ ++__weak int arch_resume_nosmt(void) ++{ ++ return 0; ++} ++ ++/** ++ * create_image - Create a hibernation image. ++ * @platform_mode: Whether or not to use the platform driver. ++ * ++ * Execute device drivers' "late" and "noirq" freeze callbacks, create a ++ * hibernation image and run the drivers' "noirq" and "early" thaw callbacks. ++ * ++ * Control reappears in this routine after the subsequent restore. ++ */ ++static int create_image(int platform_mode) ++{ ++ int error; ++ ++ error = dpm_suspend_end(PMSG_FREEZE); ++ if (error) { ++ pr_err("Some devices failed to power down, aborting hibernation\n"); ++ return error; ++ } ++ ++ error = platform_pre_snapshot(platform_mode); ++ if (error || hibernation_test(TEST_PLATFORM)) ++ goto Platform_finish; ++ ++ error = disable_nonboot_cpus(); ++ if (error || hibernation_test(TEST_CPUS)) ++ goto Enable_cpus; ++ ++ local_irq_disable(); ++ ++ system_state = SYSTEM_SUSPEND; ++ ++ error = syscore_suspend(); ++ if (error) { ++ pr_err("Some system devices failed to power down, aborting hibernation\n"); ++ goto Enable_irqs; ++ } ++ ++ if (hibernation_test(TEST_CORE) || pm_wakeup_pending()) ++ goto Power_up; ++ ++ in_suspend = 1; ++ save_processor_state(); ++ trace_suspend_resume(TPS("machine_suspend"), PM_EVENT_HIBERNATE, true); ++ error = swsusp_arch_suspend(); ++ /* Restore control flow magically appears here */ ++ restore_processor_state(); ++ trace_suspend_resume(TPS("machine_suspend"), PM_EVENT_HIBERNATE, false); ++ if (error) ++ pr_err("Error %d creating hibernation image\n", error); ++ ++ if (!in_suspend) { ++ events_check_enabled = false; ++ clear_free_pages(); ++ } ++ ++ platform_leave(platform_mode); ++ ++ Power_up: ++ syscore_resume(); ++ ++ Enable_irqs: ++ system_state = SYSTEM_RUNNING; ++ local_irq_enable(); ++ ++ Enable_cpus: ++ enable_nonboot_cpus(); ++ ++ /* Allow architectures to do nosmt-specific post-resume dances */ ++ if (!in_suspend) ++ error = arch_resume_nosmt(); ++ ++ Platform_finish: ++ platform_finish(platform_mode); ++ ++ dpm_resume_start(in_suspend ? ++ (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); ++ ++ return error; ++} ++ ++/** ++ * hibernation_snapshot - Quiesce devices and create a hibernation image. ++ * @platform_mode: If set, use platform driver to prepare for the transition. ++ * ++ * This routine must be called with system_transition_mutex held. ++ */ ++int hibernation_snapshot(int platform_mode) ++{ ++ pm_message_t msg; ++ int error; ++ ++ pm_suspend_clear_flags(); ++ error = platform_begin(platform_mode); ++ if (error) ++ goto Close; ++ ++ /* Preallocate image memory before shutting down devices. */ ++ error = hibernate_preallocate_memory(); ++ if (error) ++ goto Close; ++ ++ error = freeze_kernel_threads(); ++ if (error) ++ goto Cleanup; ++ ++ if (hibernation_test(TEST_FREEZER)) { ++ ++ /* ++ * Indicate to the caller that we are returning due to a ++ * successful freezer test. ++ */ ++ freezer_test_done = true; ++ goto Thaw; ++ } ++ ++ error = dpm_prepare(PMSG_FREEZE); ++ if (error) { ++ dpm_complete(PMSG_RECOVER); ++ goto Thaw; ++ } ++ ++ suspend_console(); ++ pm_restrict_gfp_mask(); ++ ++ error = dpm_suspend(PMSG_FREEZE); ++ ++ if (error || hibernation_test(TEST_DEVICES)) ++ platform_recover(platform_mode); ++ else ++ error = create_image(platform_mode); ++ ++ /* ++ * In the case that we call create_image() above, the control ++ * returns here (1) after the image has been created or the ++ * image creation has failed and (2) after a successful restore. ++ */ ++ ++ /* We may need to release the preallocated image pages here. */ ++ if (error || !in_suspend) ++ swsusp_free(); ++ ++ msg = in_suspend ? (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE; ++ dpm_resume(msg); ++ ++ if (error || !in_suspend) ++ pm_restore_gfp_mask(); ++ ++ resume_console(); ++ dpm_complete(msg); ++ ++ Close: ++ platform_end(platform_mode); ++ return error; ++ ++ Thaw: ++ thaw_kernel_threads(); ++ Cleanup: ++ swsusp_free(); ++ goto Close; ++} ++ ++int __weak hibernate_resume_nonboot_cpu_disable(void) ++{ ++ return disable_nonboot_cpus(); ++} ++ ++/** ++ * resume_target_kernel - Restore system state from a hibernation image. ++ * @platform_mode: Whether or not to use the platform driver. ++ * ++ * Execute device drivers' "noirq" and "late" freeze callbacks, restore the ++ * contents of highmem that have not been restored yet from the image and run ++ * the low-level code that will restore the remaining contents of memory and ++ * switch to the just restored target kernel. ++ */ ++static int resume_target_kernel(bool platform_mode) ++{ ++ int error; ++ ++ error = dpm_suspend_end(PMSG_QUIESCE); ++ if (error) { ++ pr_err("Some devices failed to power down, aborting resume\n"); ++ return error; ++ } ++ ++ error = platform_pre_restore(platform_mode); ++ if (error) ++ goto Cleanup; ++ ++ error = hibernate_resume_nonboot_cpu_disable(); ++ if (error) ++ goto Enable_cpus; ++ ++ local_irq_disable(); ++ system_state = SYSTEM_SUSPEND; ++ ++ error = syscore_suspend(); ++ if (error) ++ goto Enable_irqs; ++ ++ save_processor_state(); ++ error = restore_highmem(); ++ if (!error) { ++ error = swsusp_arch_resume(); ++ /* ++ * The code below is only ever reached in case of a failure. ++ * Otherwise, execution continues at the place where ++ * swsusp_arch_suspend() was called. ++ */ ++ BUG_ON(!error); ++ /* ++ * This call to restore_highmem() reverts the changes made by ++ * the previous one. ++ */ ++ restore_highmem(); ++ } ++ /* ++ * The only reason why swsusp_arch_resume() can fail is memory being ++ * very tight, so we have to free it as soon as we can to avoid ++ * subsequent failures. ++ */ ++ swsusp_free(); ++ restore_processor_state(); ++ touch_softlockup_watchdog(); ++ ++ syscore_resume(); ++ ++ Enable_irqs: ++ system_state = SYSTEM_RUNNING; ++ local_irq_enable(); ++ ++ Enable_cpus: ++ enable_nonboot_cpus(); ++ ++ Cleanup: ++ platform_restore_cleanup(platform_mode); ++ ++ dpm_resume_start(PMSG_RECOVER); ++ ++ return error; ++} ++ ++/** ++ * hibernation_restore - Quiesce devices and restore from a hibernation image. ++ * @platform_mode: If set, use platform driver to prepare for the transition. ++ * ++ * This routine must be called with system_transition_mutex held. If it is ++ * successful, control reappears in the restored target kernel in ++ * hibernation_snapshot(). ++ */ ++int hibernation_restore(int platform_mode) ++{ ++ int error; ++ ++ pm_prepare_console(); ++ suspend_console(); ++ pm_restrict_gfp_mask(); ++ error = dpm_suspend_start(PMSG_QUIESCE); ++ if (!error) { ++ error = resume_target_kernel(platform_mode); ++ /* ++ * The above should either succeed and jump to the new kernel, ++ * or return with an error. Otherwise things are just ++ * undefined, so let's be paranoid. ++ */ ++ BUG_ON(!error); ++ } ++ dpm_resume_end(PMSG_RECOVER); ++ pm_restore_gfp_mask(); ++ resume_console(); ++ pm_restore_console(); ++ return error; ++} ++ ++/** ++ * hibernation_platform_enter - Power off the system using the platform driver. ++ */ ++int hibernation_platform_enter(void) ++{ ++ int error; ++ ++ if (!hibernation_ops) ++ return -ENOSYS; ++ ++ /* ++ * We have cancelled the power transition by running ++ * hibernation_ops->finish() before saving the image, so we should let ++ * the firmware know that we're going to enter the sleep state after all ++ */ ++ error = hibernation_ops->begin(); ++ if (error) ++ goto Close; ++ ++ entering_platform_hibernation = true; ++ suspend_console(); ++ error = dpm_suspend_start(PMSG_HIBERNATE); ++ if (error) { ++ if (hibernation_ops->recover) ++ hibernation_ops->recover(); ++ goto Resume_devices; ++ } ++ ++ error = dpm_suspend_end(PMSG_HIBERNATE); ++ if (error) ++ goto Resume_devices; ++ ++ error = hibernation_ops->prepare(); ++ if (error) ++ goto Platform_finish; ++ ++ error = disable_nonboot_cpus(); ++ if (error) ++ goto Enable_cpus; ++ ++ local_irq_disable(); ++ system_state = SYSTEM_SUSPEND; ++ syscore_suspend(); ++ if (pm_wakeup_pending()) { ++ error = -EAGAIN; ++ goto Power_up; ++ } ++ ++ hibernation_ops->enter(); ++ /* We should never get here */ ++ while (1); ++ ++ Power_up: ++ syscore_resume(); ++ system_state = SYSTEM_RUNNING; ++ local_irq_enable(); ++ ++ Enable_cpus: ++ enable_nonboot_cpus(); ++ ++ Platform_finish: ++ hibernation_ops->finish(); ++ ++ dpm_resume_start(PMSG_RESTORE); ++ ++ Resume_devices: ++ entering_platform_hibernation = false; ++ dpm_resume_end(PMSG_RESTORE); ++ resume_console(); ++ ++ Close: ++ hibernation_ops->end(); ++ ++ return error; ++} ++ ++/** ++ * power_down - Shut the machine down for hibernation. ++ * ++ * Use the platform driver, if configured, to put the system into the sleep ++ * state corresponding to hibernation, or try to power it off or reboot, ++ * depending on the value of hibernation_mode. ++ */ ++static void power_down(void) ++{ ++#ifdef CONFIG_SUSPEND ++ int error; ++ ++ if (hibernation_mode == HIBERNATION_SUSPEND) { ++ error = suspend_devices_and_enter(PM_SUSPEND_MEM); ++ if (error) { ++ hibernation_mode = hibernation_ops ? ++ HIBERNATION_PLATFORM : ++ HIBERNATION_SHUTDOWN; ++ } else { ++ /* Restore swap signature. */ ++ error = swsusp_unmark(); ++ if (error) ++ pr_err("Swap will be unusable! Try swapon -a.\n"); ++ ++ return; ++ } ++ } ++#endif ++ ++ switch (hibernation_mode) { ++ case HIBERNATION_REBOOT: ++ kernel_restart(NULL); ++ break; ++ case HIBERNATION_PLATFORM: ++ hibernation_platform_enter(); ++ /* Fall through */ ++ case HIBERNATION_SHUTDOWN: ++ if (pm_power_off) ++ kernel_power_off(); ++ break; ++ } ++ kernel_halt(); ++ /* ++ * Valid image is on the disk, if we continue we risk serious data ++ * corruption after resume. ++ */ ++ pr_crit("Power down manually\n"); ++ while (1) ++ cpu_relax(); ++} ++ ++static int load_image_and_restore(void) ++{ ++ int error; ++ unsigned int flags; ++ ++ pm_pr_dbg("Loading hibernation image.\n"); ++ ++ lock_device_hotplug(); ++ error = create_basic_memory_bitmaps(); ++ if (error) ++ goto Unlock; ++ ++ error = swsusp_read(&flags); ++ swsusp_close(FMODE_READ); ++ if (!error) ++ hibernation_restore(flags & SF_PLATFORM_MODE); ++ ++ pr_err("Failed to load hibernation image, recovering.\n"); ++ swsusp_free(); ++ free_basic_memory_bitmaps(); ++ Unlock: ++ unlock_device_hotplug(); ++ ++ return error; ++} ++ ++/** ++ * hibernate - Carry out system hibernation, including saving the image. ++ */ ++int hibernate(void) ++{ ++ int error, nr_calls = 0; ++ bool snapshot_test = false; ++ ++ if (!hibernation_available()) { ++ pm_pr_dbg("Hibernation not available.\n"); ++ return -EPERM; ++ } ++ ++ lock_system_sleep(); ++ /* The snapshot device should not be opened while we're running */ ++ if (!atomic_add_unless(&snapshot_device_available, -1, 0)) { ++ error = -EBUSY; ++ goto Unlock; ++ } ++ ++ pr_info("hibernation entry\n"); ++ carry_out_hibernation = true; ++ pm_prepare_console(); ++ error = __pm_notifier_call_chain(PM_HIBERNATION_PREPARE, -1, &nr_calls); ++ if (error) { ++ nr_calls--; ++ goto Exit; ++ } ++ ++ pr_info("Syncing filesystems ... \n"); ++ ksys_sync(); ++ pr_info("done.\n"); ++ ++ error = freeze_processes(); ++ if (error) ++ goto Exit; ++ ++ lock_device_hotplug(); ++ /* Allocate memory management structures */ ++ error = create_basic_memory_bitmaps(); ++ if (error) ++ goto Thaw; ++ ++ error = hibernation_snapshot(hibernation_mode == HIBERNATION_PLATFORM); ++ if (error || freezer_test_done) ++ goto Free_bitmaps; ++ ++ if (in_suspend) { ++ unsigned int flags = 0; ++ ++ if (hibernation_mode == HIBERNATION_PLATFORM) ++ flags |= SF_PLATFORM_MODE; ++ if (nocompress) ++ flags |= SF_NOCOMPRESS_MODE; ++ else ++ flags |= SF_CRC32_MODE; ++ ++ pm_pr_dbg("Writing image.\n"); ++ error = swsusp_write(flags); ++ swsusp_free(); ++ if (!error) { ++ if (hibernation_mode == HIBERNATION_TEST_RESUME) ++ snapshot_test = true; ++ else ++ power_down(); ++ } ++ in_suspend = 0; ++ pm_restore_gfp_mask(); ++ } else { ++ pm_pr_dbg("Image restored successfully.\n"); ++ } ++ ++ Free_bitmaps: ++ free_basic_memory_bitmaps(); ++ Thaw: ++ unlock_device_hotplug(); ++ if (snapshot_test) { ++ pm_pr_dbg("Checking hibernation image\n"); ++ error = swsusp_check(); ++ if (!error) ++ error = load_image_and_restore(); ++ } ++ thaw_processes(); ++ ++ /* Don't bother checking whether freezer_test_done is true */ ++ freezer_test_done = false; ++ Exit: ++ __pm_notifier_call_chain(PM_POST_HIBERNATION, nr_calls, NULL); ++ pm_restore_console(); ++ atomic_inc(&snapshot_device_available); ++ Unlock: ++ unlock_system_sleep(); ++ carry_out_hibernation = false; ++ pr_info("hibernation exit\n"); ++ ++ return error; ++} ++ ++ ++/** ++ * software_resume - Resume from a saved hibernation image. ++ * ++ * This routine is called as a late initcall, when all devices have been ++ * discovered and initialized already. ++ * ++ * The image reading code is called to see if there is a hibernation image ++ * available for reading. If that is the case, devices are quiesced and the ++ * contents of memory is restored from the saved image. ++ * ++ * If this is successful, control reappears in the restored target kernel in ++ * hibernation_snapshot() which returns to hibernate(). Otherwise, the routine ++ * attempts to recover gracefully and make the kernel return to the normal mode ++ * of operation. ++ */ ++static int software_resume(void) ++{ ++ int error, nr_calls = 0; ++ ++ /* ++ * If the user said "noresume".. bail out early. ++ */ ++ if (noresume || !hibernation_available()) ++ return 0; ++ ++ /* ++ * name_to_dev_t() below takes a sysfs buffer mutex when sysfs ++ * is configured into the kernel. Since the regular hibernate ++ * trigger path is via sysfs which takes a buffer mutex before ++ * calling hibernate functions (which take system_transition_mutex) ++ * this can cause lockdep to complain about a possible ABBA deadlock ++ * which cannot happen since we're in the boot code here and ++ * sysfs can't be invoked yet. Therefore, we use a subclass ++ * here to avoid lockdep complaining. ++ */ ++ mutex_lock_nested(&system_transition_mutex, SINGLE_DEPTH_NESTING); ++ ++ if (swsusp_resume_device) ++ goto Check_image; ++ ++ if (!strlen(resume_file)) { ++ error = -ENOENT; ++ goto Unlock; ++ } ++ ++ pm_pr_dbg("Checking hibernation image partition %s\n", resume_file); ++ ++ if (resume_delay) { ++ pr_info("Waiting %dsec before reading resume device ...\n", ++ resume_delay); ++ ssleep(resume_delay); ++ } ++ ++ /* Check if the device is there */ ++ swsusp_resume_device = name_to_dev_t(resume_file); ++ ++ /* ++ * name_to_dev_t is ineffective to verify parition if resume_file is in ++ * integer format. (e.g. major:minor) ++ */ ++ if (isdigit(resume_file[0]) && resume_wait) { ++ int partno; ++ while (!get_gendisk(swsusp_resume_device, &partno)) ++ msleep(10); ++ } ++ ++ if (!swsusp_resume_device) { ++ /* ++ * Some device discovery might still be in progress; we need ++ * to wait for this to finish. ++ */ ++ wait_for_device_probe(); ++ ++ if (resume_wait) { ++ while ((swsusp_resume_device = name_to_dev_t(resume_file)) == 0) ++ msleep(10); ++ async_synchronize_full(); ++ } ++ ++ swsusp_resume_device = name_to_dev_t(resume_file); ++ if (!swsusp_resume_device) { ++ error = -ENODEV; ++ goto Unlock; ++ } ++ } ++ ++ Check_image: ++ pm_pr_dbg("Hibernation image partition %d:%d present\n", ++ MAJOR(swsusp_resume_device), MINOR(swsusp_resume_device)); ++ ++ pm_pr_dbg("Looking for hibernation image.\n"); ++ error = swsusp_check(); ++ if (error) ++ goto Unlock; ++ ++ /* The snapshot device should not be opened while we're running */ ++ if (!atomic_add_unless(&snapshot_device_available, -1, 0)) { ++ error = -EBUSY; ++ swsusp_close(FMODE_READ); ++ goto Unlock; ++ } ++ ++ pr_info("resume from hibernation\n"); ++ pm_prepare_console(); ++ error = __pm_notifier_call_chain(PM_RESTORE_PREPARE, -1, &nr_calls); ++ if (error) { ++ nr_calls--; ++ goto Close_Finish; ++ } ++ ++ pm_pr_dbg("Preparing processes for restore.\n"); ++ error = freeze_processes(); ++ if (error) ++ goto Close_Finish; ++ error = load_image_and_restore(); ++ thaw_processes(); ++ Finish: ++ __pm_notifier_call_chain(PM_POST_RESTORE, nr_calls, NULL); ++ pm_restore_console(); ++ pr_info("resume from hibernation failed (%d)\n", error); ++ atomic_inc(&snapshot_device_available); ++ /* For success case, the suspend path will release the lock */ ++ Unlock: ++ mutex_unlock(&system_transition_mutex); ++ pm_pr_dbg("Hibernation image not present or could not be loaded.\n"); ++ return error; ++ Close_Finish: ++ swsusp_close(FMODE_READ); ++ goto Finish; ++} ++ ++late_initcall_sync(software_resume); ++ ++ ++static const char * const hibernation_modes[] = { ++ [HIBERNATION_PLATFORM] = "platform", ++ [HIBERNATION_SHUTDOWN] = "shutdown", ++ [HIBERNATION_REBOOT] = "reboot", ++#ifdef CONFIG_SUSPEND ++ [HIBERNATION_SUSPEND] = "suspend", ++#endif ++ [HIBERNATION_TEST_RESUME] = "test_resume", ++}; ++ ++/* ++ * /sys/power/disk - Control hibernation mode. ++ * ++ * Hibernation can be handled in several ways. There are a few different ways ++ * to put the system into the sleep state: using the platform driver (e.g. ACPI ++ * or other hibernation_ops), powering it off or rebooting it (for testing ++ * mostly). ++ * ++ * The sysfs file /sys/power/disk provides an interface for selecting the ++ * hibernation mode to use. Reading from this file causes the available modes ++ * to be printed. There are 3 modes that can be supported: ++ * ++ * 'platform' ++ * 'shutdown' ++ * 'reboot' ++ * ++ * If a platform hibernation driver is in use, 'platform' will be supported ++ * and will be used by default. Otherwise, 'shutdown' will be used by default. ++ * The selected option (i.e. the one corresponding to the current value of ++ * hibernation_mode) is enclosed by a square bracket. ++ * ++ * To select a given hibernation mode it is necessary to write the mode's ++ * string representation (as returned by reading from /sys/power/disk) back ++ * into /sys/power/disk. ++ */ ++ ++static ssize_t disk_show(struct kobject *kobj, struct kobj_attribute *attr, ++ char *buf) ++{ ++ int i; ++ char *start = buf; ++ ++ if (!hibernation_available()) ++ return sprintf(buf, "[disabled]\n"); ++ ++ for (i = HIBERNATION_FIRST; i <= HIBERNATION_MAX; i++) { ++ if (!hibernation_modes[i]) ++ continue; ++ switch (i) { ++ case HIBERNATION_SHUTDOWN: ++ case HIBERNATION_REBOOT: ++#ifdef CONFIG_SUSPEND ++ case HIBERNATION_SUSPEND: ++#endif ++ case HIBERNATION_TEST_RESUME: ++ break; ++ case HIBERNATION_PLATFORM: ++ if (hibernation_ops) ++ break; ++ /* not a valid mode, continue with loop */ ++ continue; ++ } ++ if (i == hibernation_mode) ++ buf += sprintf(buf, "[%s] ", hibernation_modes[i]); ++ else ++ buf += sprintf(buf, "%s ", hibernation_modes[i]); ++ } ++ buf += sprintf(buf, "\n"); ++ return buf-start; ++} ++ ++static ssize_t disk_store(struct kobject *kobj, struct kobj_attribute *attr, ++ const char *buf, size_t n) ++{ ++ int error = 0; ++ int i; ++ int len; ++ char *p; ++ int mode = HIBERNATION_INVALID; ++ ++ if (!hibernation_available()) ++ return -EPERM; ++ ++ p = memchr(buf, '\n', n); ++ len = p ? p - buf : n; ++ ++ lock_system_sleep(); ++ for (i = HIBERNATION_FIRST; i <= HIBERNATION_MAX; i++) { ++ if (len == strlen(hibernation_modes[i]) ++ && !strncmp(buf, hibernation_modes[i], len)) { ++ mode = i; ++ break; ++ } ++ } ++ if (mode != HIBERNATION_INVALID) { ++ switch (mode) { ++ case HIBERNATION_SHUTDOWN: ++ case HIBERNATION_REBOOT: ++#ifdef CONFIG_SUSPEND ++ case HIBERNATION_SUSPEND: ++#endif ++ case HIBERNATION_TEST_RESUME: ++ hibernation_mode = mode; ++ break; ++ case HIBERNATION_PLATFORM: ++ if (hibernation_ops) ++ hibernation_mode = mode; ++ else ++ error = -EINVAL; ++ } ++ } else ++ error = -EINVAL; ++ ++ if (!error) ++ pm_pr_dbg("Hibernation mode set to '%s'\n", ++ hibernation_modes[mode]); ++ unlock_system_sleep(); ++ return error ? error : n; ++} ++ ++power_attr(disk); ++ ++static ssize_t resume_show(struct kobject *kobj, struct kobj_attribute *attr, ++ char *buf) ++{ ++ return sprintf(buf,"%d:%d\n", MAJOR(swsusp_resume_device), ++ MINOR(swsusp_resume_device)); ++} ++ ++static ssize_t resume_store(struct kobject *kobj, struct kobj_attribute *attr, ++ const char *buf, size_t n) ++{ ++ dev_t res; ++ int len = n; ++ char *name; ++ ++ if (len && buf[len-1] == '\n') ++ len--; ++ name = kstrndup(buf, len, GFP_KERNEL); ++ if (!name) ++ return -ENOMEM; ++ ++ res = name_to_dev_t(name); ++ kfree(name); ++ if (!res) ++ return -EINVAL; ++ ++ lock_system_sleep(); ++ swsusp_resume_device = res; ++ unlock_system_sleep(); ++ pm_pr_dbg("Configured resume from disk to %u\n", swsusp_resume_device); ++ noresume = 0; ++ software_resume(); ++ return n; ++} ++ ++power_attr(resume); ++ ++static ssize_t resume_offset_show(struct kobject *kobj, ++ struct kobj_attribute *attr, char *buf) ++{ ++ return sprintf(buf, "%llu\n", (unsigned long long)swsusp_resume_block); ++} ++ ++static ssize_t resume_offset_store(struct kobject *kobj, ++ struct kobj_attribute *attr, const char *buf, ++ size_t n) ++{ ++ unsigned long long offset; ++ int rc; ++ ++ rc = kstrtoull(buf, 0, &offset); ++ if (rc) ++ return rc; ++ swsusp_resume_block = offset; ++ ++ return n; ++} ++ ++power_attr(resume_offset); ++ ++static ssize_t image_size_show(struct kobject *kobj, struct kobj_attribute *attr, ++ char *buf) ++{ ++ return sprintf(buf, "%lu\n", image_size); ++} ++ ++static ssize_t image_size_store(struct kobject *kobj, struct kobj_attribute *attr, ++ const char *buf, size_t n) ++{ ++ unsigned long size; ++ ++ if (sscanf(buf, "%lu", &size) == 1) { ++ image_size = size; ++ return n; ++ } ++ ++ return -EINVAL; ++} ++ ++power_attr(image_size); ++ ++static ssize_t reserved_size_show(struct kobject *kobj, ++ struct kobj_attribute *attr, char *buf) ++{ ++ return sprintf(buf, "%lu\n", reserved_size); ++} ++ ++static ssize_t reserved_size_store(struct kobject *kobj, ++ struct kobj_attribute *attr, ++ const char *buf, size_t n) ++{ ++ unsigned long size; ++ ++ if (sscanf(buf, "%lu", &size) == 1) { ++ reserved_size = size; ++ return n; ++ } ++ ++ return -EINVAL; ++} ++ ++power_attr(reserved_size); ++ ++static struct attribute * g[] = { ++ &disk_attr.attr, ++ &resume_offset_attr.attr, ++ &resume_attr.attr, ++ &image_size_attr.attr, ++ &reserved_size_attr.attr, ++ NULL, ++}; ++ ++ ++static const struct attribute_group attr_group = { ++ .attrs = g, ++}; ++ ++ ++static int __init pm_disk_init(void) ++{ ++ return sysfs_create_group(power_kobj, &attr_group); ++} ++ ++core_initcall(pm_disk_init); ++ ++ ++static int __init resume_setup(char *str) ++{ ++ if (noresume) ++ return 1; ++ ++ strncpy( resume_file, str, 255 ); ++ return 1; ++} ++ ++static int __init resume_offset_setup(char *str) ++{ ++ unsigned long long offset; ++ ++ if (noresume) ++ return 1; ++ ++ if (sscanf(str, "%llu", &offset) == 1) ++ swsusp_resume_block = offset; ++ ++ return 1; ++} ++ ++static int __init hibernate_setup(char *str) ++{ ++ if (!strncmp(str, "noresume", 8)) { ++ noresume = 1; ++ } else if (!strncmp(str, "nocompress", 10)) { ++ nocompress = 1; ++ } else if (!strncmp(str, "no", 2)) { ++ noresume = 1; ++ nohibernate = 1; ++ } else if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX) ++ && !strncmp(str, "protect_image", 13)) { ++ enable_restore_image_protection(); ++ } ++ return 1; ++} ++ ++static int __init noresume_setup(char *str) ++{ ++ noresume = 1; ++ return 1; ++} ++ ++static int __init resumewait_setup(char *str) ++{ ++ resume_wait = 1; ++ return 1; ++} ++ ++static int __init resumedelay_setup(char *str) ++{ ++ int rc = kstrtouint(str, 0, &resume_delay); ++ ++ if (rc) ++ return rc; ++ return 1; ++} ++ ++static int __init nohibernate_setup(char *str) ++{ ++ noresume = 1; ++ nohibernate = 1; ++ return 1; ++} ++ ++__setup("noresume", noresume_setup); ++__setup("resume_offset=", resume_offset_setup); ++__setup("resume=", resume_setup); ++__setup("hibernate=", hibernate_setup); ++__setup("resumewait", resumewait_setup); ++__setup("resumedelay=", resumedelay_setup); ++__setup("nohibernate", nohibernate_setup); +diff -uprN kernel/kernel/printk/printk.c kernel_new/kernel/printk/printk.c +--- kernel/kernel/printk/printk.c 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/kernel/printk/printk.c 2021-04-01 18:28:07.811863114 +0800 +@@ -38,6 +38,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -1995,10 +1996,116 @@ asmlinkage int vprintk_emit(int facility + } + EXPORT_SYMBOL(vprintk_emit); + +-asmlinkage int vprintk(const char *fmt, va_list args) ++#ifdef CONFIG_IPIPE ++ ++extern int __ipipe_printk_bypass; ++ ++static IPIPE_DEFINE_SPINLOCK(__ipipe_printk_lock); ++ ++static int __ipipe_printk_fill; ++ ++static char __ipipe_printk_buf[__LOG_BUF_LEN]; ++ ++int __ipipe_log_printk(const char *fmt, va_list args) ++{ ++ int ret = 0, fbytes, oldcount; ++ unsigned long flags; ++ ++ raw_spin_lock_irqsave(&__ipipe_printk_lock, flags); ++ ++ oldcount = __ipipe_printk_fill; ++ fbytes = __LOG_BUF_LEN - oldcount; ++ if (fbytes > 1) { ++ ret = vscnprintf(__ipipe_printk_buf + __ipipe_printk_fill, ++ fbytes, fmt, args) + 1; ++ __ipipe_printk_fill += ret; ++ } ++ ++ raw_spin_unlock_irqrestore(&__ipipe_printk_lock, flags); ++ ++ if (oldcount == 0) ++ ipipe_raise_irq(__ipipe_printk_virq); ++ ++ return ret; ++} ++ ++static void do_deferred_vprintk(const char *fmt, ...) ++{ ++ va_list args; ++ ++ va_start(args, fmt); ++ vprintk_func(fmt, args); ++ va_end(args); ++} ++ ++void __ipipe_flush_printk (unsigned virq, void *cookie) ++{ ++ char *p = __ipipe_printk_buf; ++ int len, lmax, out = 0; ++ unsigned long flags; ++ ++ goto start; ++ do { ++ raw_spin_unlock_irqrestore(&__ipipe_printk_lock, flags); ++start: ++ lmax = __ipipe_printk_fill; ++ while (out < lmax) { ++ len = strlen(p) + 1; ++ do_deferred_vprintk("%s", p); ++ p += len; ++ out += len; ++ } ++ raw_spin_lock_irqsave(&__ipipe_printk_lock, flags); ++ } while (__ipipe_printk_fill != lmax); ++ ++ __ipipe_printk_fill = 0; ++ ++ raw_spin_unlock_irqrestore(&__ipipe_printk_lock, flags); ++} ++ ++static int do_vprintk(const char *fmt, va_list args) ++{ ++ int sprintk = 1, cs = -1; ++ unsigned long flags; ++ int ret; ++ ++ flags = hard_local_irq_save(); ++ ++ if (__ipipe_printk_bypass || oops_in_progress) ++ cs = ipipe_disable_context_check(); ++ else if (__ipipe_current_domain == ipipe_root_domain) { ++ if (ipipe_head_domain != ipipe_root_domain && ++ (raw_irqs_disabled_flags(flags) || ++ test_bit(IPIPE_STALL_FLAG, &__ipipe_head_status))) ++ sprintk = 0; ++ } else ++ sprintk = 0; ++ ++ hard_local_irq_restore(flags); ++ ++ if (sprintk) { ++ ret = vprintk_func(fmt, args); ++ if (cs != -1) ++ ipipe_restore_context_check(cs); ++ } else ++ ret = __ipipe_log_printk(fmt, args); ++ ++ return ret; ++} ++ ++#else /* !CONFIG_IPIPE */ ++ ++static int do_vprintk(const char *fmt, va_list args) + { + return vprintk_func(fmt, args); + } ++ ++#endif /* !CONFIG_IPIPE */ ++ ++asmlinkage int vprintk(const char *fmt, va_list args) ++{ ++ return do_vprintk(fmt, args); ++} + EXPORT_SYMBOL(vprintk); + + asmlinkage int printk_emit(int facility, int level, +@@ -2060,7 +2167,7 @@ asmlinkage __visible int printk(const ch + int r; + + va_start(args, fmt); +- r = vprintk_func(fmt, args); ++ r = do_vprintk(fmt, args); + va_end(args); + + return r; +@@ -2121,6 +2228,63 @@ asmlinkage __visible void early_printk(c + } + #endif + ++#ifdef CONFIG_RAW_PRINTK ++static struct console *raw_console; ++static IPIPE_DEFINE_RAW_SPINLOCK(raw_console_lock); ++ ++void raw_vprintk(const char *fmt, va_list ap) ++{ ++ unsigned long flags; ++ char buf[256]; ++ int n; ++ ++ if (raw_console == NULL || console_suspended) ++ return; ++ ++ n = vscnprintf(buf, sizeof(buf), fmt, ap); ++ touch_nmi_watchdog(); ++ raw_spin_lock_irqsave(&raw_console_lock, flags); ++ if (raw_console) ++ raw_console->write_raw(raw_console, buf, n); ++ raw_spin_unlock_irqrestore(&raw_console_lock, flags); ++} ++ ++asmlinkage __visible void raw_printk(const char *fmt, ...) ++{ ++ va_list ap; ++ ++ va_start(ap, fmt); ++ raw_vprintk(fmt, ap); ++ va_end(ap); ++} ++EXPORT_SYMBOL(raw_printk); ++ ++static inline void register_raw_console(struct console *newcon) ++{ ++ if ((newcon->flags & CON_RAW) != 0 && newcon->write_raw) ++ raw_console = newcon; ++} ++ ++static inline void unregister_raw_console(struct console *oldcon) ++{ ++ unsigned long flags; ++ ++ raw_spin_lock_irqsave(&raw_console_lock, flags); ++ if (oldcon == raw_console) ++ raw_console = NULL; ++ raw_spin_unlock_irqrestore(&raw_console_lock, flags); ++} ++ ++#else ++ ++static inline void register_raw_console(struct console *newcon) ++{ } ++ ++static inline void unregister_raw_console(struct console *oldcon) ++{ } ++ ++#endif ++ + static int __add_preferred_console(char *name, int idx, char *options, + char *brl_options) + { +@@ -2761,6 +2925,9 @@ void register_console(struct console *ne + console_drivers->next = newcon; + } + ++ /* The latest raw console to register is current. */ ++ register_raw_console(newcon); ++ + if (newcon->flags & CON_EXTENDED) + if (!nr_ext_console_drivers++) + pr_info("printk: continuation disabled due to ext consoles, expect more fragments in /dev/kmsg\n"); +@@ -2821,6 +2988,8 @@ int unregister_console(struct console *c + (console->flags & CON_BOOT) ? "boot" : "" , + console->name, console->index); + ++ unregister_raw_console(console); ++ + res = _braille_unregister_console(console); + if (res) + return res; +diff -uprN kernel/kernel/printk/printk.c.orig kernel_new/kernel/printk/printk.c.orig +--- kernel/kernel/printk/printk.c.orig 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/kernel/printk/printk.c.orig 2020-12-21 21:59:22.000000000 +0800 +@@ -0,0 +1,3362 @@ ++/* ++ * linux/kernel/printk.c ++ * ++ * Copyright (C) 1991, 1992 Linus Torvalds ++ * ++ * Modified to make sys_syslog() more flexible: added commands to ++ * return the last 4k of kernel messages, regardless of whether ++ * they've been read or not. Added option to suppress kernel printk's ++ * to the console. Added hook for sending the console messages ++ * elsewhere, in preparation for a serial line console (someday). ++ * Ted Ts'o, 2/11/93. ++ * Modified for sysctl support, 1/8/97, Chris Horn. ++ * Fixed SMP synchronization, 08/08/99, Manfred Spraul ++ * manfred@colorfullife.com ++ * Rewrote bits to get rid of console_lock ++ * 01Mar01 Andrew Morton ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++ ++#include ++#define CREATE_TRACE_POINTS ++#include ++ ++#include "console_cmdline.h" ++#include "braille.h" ++#include "internal.h" ++ ++int console_printk[4] = { ++ CONSOLE_LOGLEVEL_DEFAULT, /* console_loglevel */ ++ MESSAGE_LOGLEVEL_DEFAULT, /* default_message_loglevel */ ++ CONSOLE_LOGLEVEL_MIN, /* minimum_console_loglevel */ ++ CONSOLE_LOGLEVEL_DEFAULT, /* default_console_loglevel */ ++}; ++ ++atomic_t ignore_console_lock_warning __read_mostly = ATOMIC_INIT(0); ++EXPORT_SYMBOL(ignore_console_lock_warning); ++ ++/* ++ * Low level drivers may need that to know if they can schedule in ++ * their unblank() callback or not. So let's export it. ++ */ ++int oops_in_progress; ++EXPORT_SYMBOL(oops_in_progress); ++ ++/* ++ * console_sem protects the console_drivers list, and also ++ * provides serialisation for access to the entire console ++ * driver system. ++ */ ++static DEFINE_SEMAPHORE(console_sem); ++struct console *console_drivers; ++EXPORT_SYMBOL_GPL(console_drivers); ++ ++#ifdef CONFIG_LOCKDEP ++static struct lockdep_map console_lock_dep_map = { ++ .name = "console_lock" ++}; ++#endif ++ ++enum devkmsg_log_bits { ++ __DEVKMSG_LOG_BIT_ON = 0, ++ __DEVKMSG_LOG_BIT_OFF, ++ __DEVKMSG_LOG_BIT_LOCK, ++}; ++ ++enum devkmsg_log_masks { ++ DEVKMSG_LOG_MASK_ON = BIT(__DEVKMSG_LOG_BIT_ON), ++ DEVKMSG_LOG_MASK_OFF = BIT(__DEVKMSG_LOG_BIT_OFF), ++ DEVKMSG_LOG_MASK_LOCK = BIT(__DEVKMSG_LOG_BIT_LOCK), ++}; ++ ++/* Keep both the 'on' and 'off' bits clear, i.e. ratelimit by default: */ ++#define DEVKMSG_LOG_MASK_DEFAULT 0 ++ ++static unsigned int __read_mostly devkmsg_log = DEVKMSG_LOG_MASK_DEFAULT; ++ ++static int __control_devkmsg(char *str) ++{ ++ if (!str) ++ return -EINVAL; ++ ++ if (!strncmp(str, "on", 2)) { ++ devkmsg_log = DEVKMSG_LOG_MASK_ON; ++ return 2; ++ } else if (!strncmp(str, "off", 3)) { ++ devkmsg_log = DEVKMSG_LOG_MASK_OFF; ++ return 3; ++ } else if (!strncmp(str, "ratelimit", 9)) { ++ devkmsg_log = DEVKMSG_LOG_MASK_DEFAULT; ++ return 9; ++ } ++ return -EINVAL; ++} ++ ++static int __init control_devkmsg(char *str) ++{ ++ if (__control_devkmsg(str) < 0) ++ return 1; ++ ++ /* ++ * Set sysctl string accordingly: ++ */ ++ if (devkmsg_log == DEVKMSG_LOG_MASK_ON) ++ strcpy(devkmsg_log_str, "on"); ++ else if (devkmsg_log == DEVKMSG_LOG_MASK_OFF) ++ strcpy(devkmsg_log_str, "off"); ++ /* else "ratelimit" which is set by default. */ ++ ++ /* ++ * Sysctl cannot change it anymore. The kernel command line setting of ++ * this parameter is to force the setting to be permanent throughout the ++ * runtime of the system. This is a precation measure against userspace ++ * trying to be a smarta** and attempting to change it up on us. ++ */ ++ devkmsg_log |= DEVKMSG_LOG_MASK_LOCK; ++ ++ return 0; ++} ++__setup("printk.devkmsg=", control_devkmsg); ++ ++char devkmsg_log_str[DEVKMSG_STR_MAX_SIZE] = "ratelimit"; ++ ++int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write, ++ void __user *buffer, size_t *lenp, loff_t *ppos) ++{ ++ char old_str[DEVKMSG_STR_MAX_SIZE]; ++ unsigned int old; ++ int err; ++ ++ if (write) { ++ if (devkmsg_log & DEVKMSG_LOG_MASK_LOCK) ++ return -EINVAL; ++ ++ old = devkmsg_log; ++ strncpy(old_str, devkmsg_log_str, DEVKMSG_STR_MAX_SIZE); ++ } ++ ++ err = proc_dostring(table, write, buffer, lenp, ppos); ++ if (err) ++ return err; ++ ++ if (write) { ++ err = __control_devkmsg(devkmsg_log_str); ++ ++ /* ++ * Do not accept an unknown string OR a known string with ++ * trailing crap... ++ */ ++ if (err < 0 || (err + 1 != *lenp)) { ++ ++ /* ... and restore old setting. */ ++ devkmsg_log = old; ++ strncpy(devkmsg_log_str, old_str, DEVKMSG_STR_MAX_SIZE); ++ ++ return -EINVAL; ++ } ++ } ++ ++ return 0; ++} ++ ++/* ++ * Number of registered extended console drivers. ++ * ++ * If extended consoles are present, in-kernel cont reassembly is disabled ++ * and each fragment is stored as a separate log entry with proper ++ * continuation flag so that every emitted message has full metadata. This ++ * doesn't change the result for regular consoles or /proc/kmsg. For ++ * /dev/kmsg, as long as the reader concatenates messages according to ++ * consecutive continuation flags, the end result should be the same too. ++ */ ++static int nr_ext_console_drivers; ++ ++/* ++ * Helper macros to handle lockdep when locking/unlocking console_sem. We use ++ * macros instead of functions so that _RET_IP_ contains useful information. ++ */ ++#define down_console_sem() do { \ ++ down(&console_sem);\ ++ mutex_acquire(&console_lock_dep_map, 0, 0, _RET_IP_);\ ++} while (0) ++ ++static int __down_trylock_console_sem(unsigned long ip) ++{ ++ int lock_failed; ++ unsigned long flags; ++ ++ /* ++ * Here and in __up_console_sem() we need to be in safe mode, ++ * because spindump/WARN/etc from under console ->lock will ++ * deadlock in printk()->down_trylock_console_sem() otherwise. ++ */ ++ printk_safe_enter_irqsave(flags); ++ lock_failed = down_trylock(&console_sem); ++ printk_safe_exit_irqrestore(flags); ++ ++ if (lock_failed) ++ return 1; ++ mutex_acquire(&console_lock_dep_map, 0, 1, ip); ++ return 0; ++} ++#define down_trylock_console_sem() __down_trylock_console_sem(_RET_IP_) ++ ++static void __up_console_sem(unsigned long ip) ++{ ++ unsigned long flags; ++ ++ mutex_release(&console_lock_dep_map, 1, ip); ++ ++ printk_safe_enter_irqsave(flags); ++ up(&console_sem); ++ printk_safe_exit_irqrestore(flags); ++} ++#define up_console_sem() __up_console_sem(_RET_IP_) ++ ++/* ++ * This is used for debugging the mess that is the VT code by ++ * keeping track if we have the console semaphore held. It's ++ * definitely not the perfect debug tool (we don't know if _WE_ ++ * hold it and are racing, but it helps tracking those weird code ++ * paths in the console code where we end up in places I want ++ * locked without the console sempahore held). ++ */ ++static int console_locked, console_suspended; ++ ++/* ++ * If exclusive_console is non-NULL then only this console is to be printed to. ++ */ ++static struct console *exclusive_console; ++ ++/* ++ * Array of consoles built from command line options (console=) ++ */ ++ ++#define MAX_CMDLINECONSOLES 8 ++ ++static struct console_cmdline console_cmdline[MAX_CMDLINECONSOLES]; ++ ++static int preferred_console = -1; ++int console_set_on_cmdline; ++EXPORT_SYMBOL(console_set_on_cmdline); ++ ++/* Flag: console code may call schedule() */ ++static int console_may_schedule; ++ ++enum con_msg_format_flags { ++ MSG_FORMAT_DEFAULT = 0, ++ MSG_FORMAT_SYSLOG = (1 << 0), ++}; ++ ++static int console_msg_format = MSG_FORMAT_DEFAULT; ++ ++/* ++ * The printk log buffer consists of a chain of concatenated variable ++ * length records. Every record starts with a record header, containing ++ * the overall length of the record. ++ * ++ * The heads to the first and last entry in the buffer, as well as the ++ * sequence numbers of these entries are maintained when messages are ++ * stored. ++ * ++ * If the heads indicate available messages, the length in the header ++ * tells the start next message. A length == 0 for the next message ++ * indicates a wrap-around to the beginning of the buffer. ++ * ++ * Every record carries the monotonic timestamp in microseconds, as well as ++ * the standard userspace syslog level and syslog facility. The usual ++ * kernel messages use LOG_KERN; userspace-injected messages always carry ++ * a matching syslog facility, by default LOG_USER. The origin of every ++ * message can be reliably determined that way. ++ * ++ * The human readable log message directly follows the message header. The ++ * length of the message text is stored in the header, the stored message ++ * is not terminated. ++ * ++ * Optionally, a message can carry a dictionary of properties (key/value pairs), ++ * to provide userspace with a machine-readable message context. ++ * ++ * Examples for well-defined, commonly used property names are: ++ * DEVICE=b12:8 device identifier ++ * b12:8 block dev_t ++ * c127:3 char dev_t ++ * n8 netdev ifindex ++ * +sound:card0 subsystem:devname ++ * SUBSYSTEM=pci driver-core subsystem name ++ * ++ * Valid characters in property names are [a-zA-Z0-9.-_]. The plain text value ++ * follows directly after a '=' character. Every property is terminated by ++ * a '\0' character. The last property is not terminated. ++ * ++ * Example of a message structure: ++ * 0000 ff 8f 00 00 00 00 00 00 monotonic time in nsec ++ * 0008 34 00 record is 52 bytes long ++ * 000a 0b 00 text is 11 bytes long ++ * 000c 1f 00 dictionary is 23 bytes long ++ * 000e 03 00 LOG_KERN (facility) LOG_ERR (level) ++ * 0010 69 74 27 73 20 61 20 6c "it's a l" ++ * 69 6e 65 "ine" ++ * 001b 44 45 56 49 43 "DEVIC" ++ * 45 3d 62 38 3a 32 00 44 "E=b8:2\0D" ++ * 52 49 56 45 52 3d 62 75 "RIVER=bu" ++ * 67 "g" ++ * 0032 00 00 00 padding to next message header ++ * ++ * The 'struct printk_log' buffer header must never be directly exported to ++ * userspace, it is a kernel-private implementation detail that might ++ * need to be changed in the future, when the requirements change. ++ * ++ * /dev/kmsg exports the structured data in the following line format: ++ * ",,,[,additional_values, ... ];\n" ++ * ++ * Users of the export format should ignore possible additional values ++ * separated by ',', and find the message after the ';' character. ++ * ++ * The optional key/value pairs are attached as continuation lines starting ++ * with a space character and terminated by a newline. All possible ++ * non-prinatable characters are escaped in the "\xff" notation. ++ */ ++ ++enum log_flags { ++ LOG_NEWLINE = 2, /* text ended with a newline */ ++ LOG_PREFIX = 4, /* text started with a prefix */ ++ LOG_CONT = 8, /* text is a fragment of a continuation line */ ++}; ++ ++struct printk_log { ++ u64 ts_nsec; /* timestamp in nanoseconds */ ++ u16 len; /* length of entire record */ ++ u16 text_len; /* length of text buffer */ ++ u16 dict_len; /* length of dictionary buffer */ ++ u8 facility; /* syslog facility */ ++ u8 flags:5; /* internal record flags */ ++ u8 level:3; /* syslog level */ ++} ++#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS ++__packed __aligned(4) ++#endif ++; ++ ++/* ++ * The logbuf_lock protects kmsg buffer, indices, counters. This can be taken ++ * within the scheduler's rq lock. It must be released before calling ++ * console_unlock() or anything else that might wake up a process. ++ */ ++DEFINE_RAW_SPINLOCK(logbuf_lock); ++ ++/* ++ * Helper macros to lock/unlock logbuf_lock and switch between ++ * printk-safe/unsafe modes. ++ */ ++#define logbuf_lock_irq() \ ++ do { \ ++ printk_safe_enter_irq(); \ ++ raw_spin_lock(&logbuf_lock); \ ++ } while (0) ++ ++#define logbuf_unlock_irq() \ ++ do { \ ++ raw_spin_unlock(&logbuf_lock); \ ++ printk_safe_exit_irq(); \ ++ } while (0) ++ ++#define logbuf_lock_irqsave(flags) \ ++ do { \ ++ printk_safe_enter_irqsave(flags); \ ++ raw_spin_lock(&logbuf_lock); \ ++ } while (0) ++ ++#define logbuf_unlock_irqrestore(flags) \ ++ do { \ ++ raw_spin_unlock(&logbuf_lock); \ ++ printk_safe_exit_irqrestore(flags); \ ++ } while (0) ++ ++#ifdef CONFIG_PRINTK ++DECLARE_WAIT_QUEUE_HEAD(log_wait); ++/* the next printk record to read by syslog(READ) or /proc/kmsg */ ++static u64 syslog_seq; ++static u32 syslog_idx; ++static size_t syslog_partial; ++static bool syslog_time; ++ ++/* index and sequence number of the first record stored in the buffer */ ++static u64 log_first_seq; ++static u32 log_first_idx; ++ ++/* index and sequence number of the next record to store in the buffer */ ++static u64 log_next_seq; ++static u32 log_next_idx; ++ ++/* the next printk record to write to the console */ ++static u64 console_seq; ++static u32 console_idx; ++static u64 exclusive_console_stop_seq; ++ ++/* the next printk record to read after the last 'clear' command */ ++static u64 clear_seq; ++static u32 clear_idx; ++ ++#define PREFIX_MAX 32 ++#define LOG_LINE_MAX (1024 - PREFIX_MAX) ++ ++#define LOG_LEVEL(v) ((v) & 0x07) ++#define LOG_FACILITY(v) ((v) >> 3 & 0xff) ++ ++/* record buffer */ ++#define LOG_ALIGN __alignof__(struct printk_log) ++#define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT) ++#define LOG_BUF_LEN_MAX (u32)(1 << 31) ++static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN); ++static char *log_buf = __log_buf; ++static u32 log_buf_len = __LOG_BUF_LEN; ++ ++/* ++ * We cannot access per-CPU data (e.g. per-CPU flush irq_work) before ++ * per_cpu_areas are initialised. This variable is set to true when ++ * it's safe to access per-CPU data. ++ */ ++static bool __printk_percpu_data_ready __read_mostly; ++ ++bool printk_percpu_data_ready(void) ++{ ++ return __printk_percpu_data_ready; ++} ++ ++/* Return log buffer address */ ++char *log_buf_addr_get(void) ++{ ++ return log_buf; ++} ++EXPORT_SYMBOL_GPL(log_buf_addr_get); ++ ++/* Return log buffer size */ ++u32 log_buf_len_get(void) ++{ ++ return log_buf_len; ++} ++EXPORT_SYMBOL_GPL(log_buf_len_get); ++ ++/* human readable text of the record */ ++static char *log_text(const struct printk_log *msg) ++{ ++ return (char *)msg + sizeof(struct printk_log); ++} ++ ++/* optional key/value pair dictionary attached to the record */ ++static char *log_dict(const struct printk_log *msg) ++{ ++ return (char *)msg + sizeof(struct printk_log) + msg->text_len; ++} ++ ++/* get record by index; idx must point to valid msg */ ++static struct printk_log *log_from_idx(u32 idx) ++{ ++ struct printk_log *msg = (struct printk_log *)(log_buf + idx); ++ ++ /* ++ * A length == 0 record is the end of buffer marker. Wrap around and ++ * read the message at the start of the buffer. ++ */ ++ if (!msg->len) ++ return (struct printk_log *)log_buf; ++ return msg; ++} ++ ++/* get next record; idx must point to valid msg */ ++static u32 log_next(u32 idx) ++{ ++ struct printk_log *msg = (struct printk_log *)(log_buf + idx); ++ ++ /* length == 0 indicates the end of the buffer; wrap */ ++ /* ++ * A length == 0 record is the end of buffer marker. Wrap around and ++ * read the message at the start of the buffer as *this* one, and ++ * return the one after that. ++ */ ++ if (!msg->len) { ++ msg = (struct printk_log *)log_buf; ++ return msg->len; ++ } ++ return idx + msg->len; ++} ++ ++/* ++ * Check whether there is enough free space for the given message. ++ * ++ * The same values of first_idx and next_idx mean that the buffer ++ * is either empty or full. ++ * ++ * If the buffer is empty, we must respect the position of the indexes. ++ * They cannot be reset to the beginning of the buffer. ++ */ ++static int logbuf_has_space(u32 msg_size, bool empty) ++{ ++ u32 free; ++ ++ if (log_next_idx > log_first_idx || empty) ++ free = max(log_buf_len - log_next_idx, log_first_idx); ++ else ++ free = log_first_idx - log_next_idx; ++ ++ /* ++ * We need space also for an empty header that signalizes wrapping ++ * of the buffer. ++ */ ++ return free >= msg_size + sizeof(struct printk_log); ++} ++ ++static int log_make_free_space(u32 msg_size) ++{ ++ while (log_first_seq < log_next_seq && ++ !logbuf_has_space(msg_size, false)) { ++ /* drop old messages until we have enough contiguous space */ ++ log_first_idx = log_next(log_first_idx); ++ log_first_seq++; ++ } ++ ++ if (clear_seq < log_first_seq) { ++ clear_seq = log_first_seq; ++ clear_idx = log_first_idx; ++ } ++ ++ /* sequence numbers are equal, so the log buffer is empty */ ++ if (logbuf_has_space(msg_size, log_first_seq == log_next_seq)) ++ return 0; ++ ++ return -ENOMEM; ++} ++ ++/* compute the message size including the padding bytes */ ++static u32 msg_used_size(u16 text_len, u16 dict_len, u32 *pad_len) ++{ ++ u32 size; ++ ++ size = sizeof(struct printk_log) + text_len + dict_len; ++ *pad_len = (-size) & (LOG_ALIGN - 1); ++ size += *pad_len; ++ ++ return size; ++} ++ ++/* ++ * Define how much of the log buffer we could take at maximum. The value ++ * must be greater than two. Note that only half of the buffer is available ++ * when the index points to the middle. ++ */ ++#define MAX_LOG_TAKE_PART 4 ++static const char trunc_msg[] = ""; ++ ++static u32 truncate_msg(u16 *text_len, u16 *trunc_msg_len, ++ u16 *dict_len, u32 *pad_len) ++{ ++ /* ++ * The message should not take the whole buffer. Otherwise, it might ++ * get removed too soon. ++ */ ++ u32 max_text_len = log_buf_len / MAX_LOG_TAKE_PART; ++ if (*text_len > max_text_len) ++ *text_len = max_text_len; ++ /* enable the warning message */ ++ *trunc_msg_len = strlen(trunc_msg); ++ /* disable the "dict" completely */ ++ *dict_len = 0; ++ /* compute the size again, count also the warning message */ ++ return msg_used_size(*text_len + *trunc_msg_len, 0, pad_len); ++} ++ ++/* insert record into the buffer, discard old ones, update heads */ ++static int log_store(int facility, int level, ++ enum log_flags flags, u64 ts_nsec, ++ const char *dict, u16 dict_len, ++ const char *text, u16 text_len) ++{ ++ struct printk_log *msg; ++ u32 size, pad_len; ++ u16 trunc_msg_len = 0; ++ ++ /* number of '\0' padding bytes to next message */ ++ size = msg_used_size(text_len, dict_len, &pad_len); ++ ++ if (log_make_free_space(size)) { ++ /* truncate the message if it is too long for empty buffer */ ++ size = truncate_msg(&text_len, &trunc_msg_len, ++ &dict_len, &pad_len); ++ /* survive when the log buffer is too small for trunc_msg */ ++ if (log_make_free_space(size)) ++ return 0; ++ } ++ ++ if (log_next_idx + size + sizeof(struct printk_log) > log_buf_len) { ++ /* ++ * This message + an additional empty header does not fit ++ * at the end of the buffer. Add an empty header with len == 0 ++ * to signify a wrap around. ++ */ ++ memset(log_buf + log_next_idx, 0, sizeof(struct printk_log)); ++ log_next_idx = 0; ++ } ++ ++ /* fill message */ ++ msg = (struct printk_log *)(log_buf + log_next_idx); ++ memcpy(log_text(msg), text, text_len); ++ msg->text_len = text_len; ++ if (trunc_msg_len) { ++ memcpy(log_text(msg) + text_len, trunc_msg, trunc_msg_len); ++ msg->text_len += trunc_msg_len; ++ } ++ memcpy(log_dict(msg), dict, dict_len); ++ msg->dict_len = dict_len; ++ msg->facility = facility; ++ msg->level = level & 7; ++ msg->flags = flags & 0x1f; ++ if (ts_nsec > 0) ++ msg->ts_nsec = ts_nsec; ++ else ++ msg->ts_nsec = local_clock(); ++ memset(log_dict(msg) + dict_len, 0, pad_len); ++ msg->len = size; ++ ++ /* insert message */ ++ log_next_idx += msg->len; ++ log_next_seq++; ++ ++ return msg->text_len; ++} ++ ++int dmesg_restrict = IS_ENABLED(CONFIG_SECURITY_DMESG_RESTRICT); ++ ++static int syslog_action_restricted(int type) ++{ ++ if (dmesg_restrict) ++ return 1; ++ /* ++ * Unless restricted, we allow "read all" and "get buffer size" ++ * for everybody. ++ */ ++ return type != SYSLOG_ACTION_READ_ALL && ++ type != SYSLOG_ACTION_SIZE_BUFFER; ++} ++ ++static int check_syslog_permissions(int type, int source) ++{ ++ /* ++ * If this is from /proc/kmsg and we've already opened it, then we've ++ * already done the capabilities checks at open time. ++ */ ++ if (source == SYSLOG_FROM_PROC && type != SYSLOG_ACTION_OPEN) ++ goto ok; ++ ++ if (syslog_action_restricted(type)) { ++ if (capable(CAP_SYSLOG)) ++ goto ok; ++ /* ++ * For historical reasons, accept CAP_SYS_ADMIN too, with ++ * a warning. ++ */ ++ if (capable(CAP_SYS_ADMIN)) { ++ pr_warn_once("%s (%d): Attempt to access syslog with " ++ "CAP_SYS_ADMIN but no CAP_SYSLOG " ++ "(deprecated).\n", ++ current->comm, task_pid_nr(current)); ++ goto ok; ++ } ++ return -EPERM; ++ } ++ok: ++ return security_syslog(type); ++} ++ ++static void append_char(char **pp, char *e, char c) ++{ ++ if (*pp < e) ++ *(*pp)++ = c; ++} ++ ++static ssize_t msg_print_ext_header(char *buf, size_t size, ++ struct printk_log *msg, u64 seq) ++{ ++ u64 ts_usec = msg->ts_nsec; ++ ++ do_div(ts_usec, 1000); ++ ++ return scnprintf(buf, size, "%u,%llu,%llu,%c;", ++ (msg->facility << 3) | msg->level, seq, ts_usec, ++ msg->flags & LOG_CONT ? 'c' : '-'); ++} ++ ++static ssize_t msg_print_ext_body(char *buf, size_t size, ++ char *dict, size_t dict_len, ++ char *text, size_t text_len) ++{ ++ char *p = buf, *e = buf + size; ++ size_t i; ++ ++ /* escape non-printable characters */ ++ for (i = 0; i < text_len; i++) { ++ unsigned char c = text[i]; ++ ++ if (c < ' ' || c >= 127 || c == '\\') ++ p += scnprintf(p, e - p, "\\x%02x", c); ++ else ++ append_char(&p, e, c); ++ } ++ append_char(&p, e, '\n'); ++ ++ if (dict_len) { ++ bool line = true; ++ ++ for (i = 0; i < dict_len; i++) { ++ unsigned char c = dict[i]; ++ ++ if (line) { ++ append_char(&p, e, ' '); ++ line = false; ++ } ++ ++ if (c == '\0') { ++ append_char(&p, e, '\n'); ++ line = true; ++ continue; ++ } ++ ++ if (c < ' ' || c >= 127 || c == '\\') { ++ p += scnprintf(p, e - p, "\\x%02x", c); ++ continue; ++ } ++ ++ append_char(&p, e, c); ++ } ++ append_char(&p, e, '\n'); ++ } ++ ++ return p - buf; ++} ++ ++/* /dev/kmsg - userspace message inject/listen interface */ ++struct devkmsg_user { ++ u64 seq; ++ u32 idx; ++ struct ratelimit_state rs; ++ struct mutex lock; ++ char buf[CONSOLE_EXT_LOG_MAX]; ++}; ++ ++static ssize_t devkmsg_write(struct kiocb *iocb, struct iov_iter *from) ++{ ++ char *buf, *line; ++ int level = default_message_loglevel; ++ int facility = 1; /* LOG_USER */ ++ struct file *file = iocb->ki_filp; ++ struct devkmsg_user *user = file->private_data; ++ size_t len = iov_iter_count(from); ++ ssize_t ret = len; ++ ++ if (!user || len > LOG_LINE_MAX) ++ return -EINVAL; ++ ++ /* Ignore when user logging is disabled. */ ++ if (devkmsg_log & DEVKMSG_LOG_MASK_OFF) ++ return len; ++ ++ /* Ratelimit when not explicitly enabled. */ ++ if (!(devkmsg_log & DEVKMSG_LOG_MASK_ON)) { ++ if (!___ratelimit(&user->rs, current->comm)) ++ return ret; ++ } ++ ++ buf = kmalloc(len+1, GFP_KERNEL); ++ if (buf == NULL) ++ return -ENOMEM; ++ ++ buf[len] = '\0'; ++ if (!copy_from_iter_full(buf, len, from)) { ++ kfree(buf); ++ return -EFAULT; ++ } ++ ++ /* ++ * Extract and skip the syslog prefix <[0-9]*>. Coming from userspace ++ * the decimal value represents 32bit, the lower 3 bit are the log ++ * level, the rest are the log facility. ++ * ++ * If no prefix or no userspace facility is specified, we ++ * enforce LOG_USER, to be able to reliably distinguish ++ * kernel-generated messages from userspace-injected ones. ++ */ ++ line = buf; ++ if (line[0] == '<') { ++ char *endp = NULL; ++ unsigned int u; ++ ++ u = simple_strtoul(line + 1, &endp, 10); ++ if (endp && endp[0] == '>') { ++ level = LOG_LEVEL(u); ++ if (LOG_FACILITY(u) != 0) ++ facility = LOG_FACILITY(u); ++ endp++; ++ len -= endp - line; ++ line = endp; ++ } ++ } ++ ++ printk_emit(facility, level, NULL, 0, "%s", line); ++ kfree(buf); ++ return ret; ++} ++ ++static ssize_t devkmsg_read(struct file *file, char __user *buf, ++ size_t count, loff_t *ppos) ++{ ++ struct devkmsg_user *user = file->private_data; ++ struct printk_log *msg; ++ size_t len; ++ ssize_t ret; ++ ++ if (!user) ++ return -EBADF; ++ ++ ret = mutex_lock_interruptible(&user->lock); ++ if (ret) ++ return ret; ++ ++ logbuf_lock_irq(); ++ while (user->seq == log_next_seq) { ++ if (file->f_flags & O_NONBLOCK) { ++ ret = -EAGAIN; ++ logbuf_unlock_irq(); ++ goto out; ++ } ++ ++ logbuf_unlock_irq(); ++ ret = wait_event_interruptible(log_wait, ++ user->seq != log_next_seq); ++ if (ret) ++ goto out; ++ logbuf_lock_irq(); ++ } ++ ++ if (user->seq < log_first_seq) { ++ /* our last seen message is gone, return error and reset */ ++ user->idx = log_first_idx; ++ user->seq = log_first_seq; ++ ret = -EPIPE; ++ logbuf_unlock_irq(); ++ goto out; ++ } ++ ++ msg = log_from_idx(user->idx); ++ len = msg_print_ext_header(user->buf, sizeof(user->buf), ++ msg, user->seq); ++ len += msg_print_ext_body(user->buf + len, sizeof(user->buf) - len, ++ log_dict(msg), msg->dict_len, ++ log_text(msg), msg->text_len); ++ ++ user->idx = log_next(user->idx); ++ user->seq++; ++ logbuf_unlock_irq(); ++ ++ if (len > count) { ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ if (copy_to_user(buf, user->buf, len)) { ++ ret = -EFAULT; ++ goto out; ++ } ++ ret = len; ++out: ++ mutex_unlock(&user->lock); ++ return ret; ++} ++ ++static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence) ++{ ++ struct devkmsg_user *user = file->private_data; ++ loff_t ret = 0; ++ ++ if (!user) ++ return -EBADF; ++ if (offset) ++ return -ESPIPE; ++ ++ logbuf_lock_irq(); ++ switch (whence) { ++ case SEEK_SET: ++ /* the first record */ ++ user->idx = log_first_idx; ++ user->seq = log_first_seq; ++ break; ++ case SEEK_DATA: ++ /* ++ * The first record after the last SYSLOG_ACTION_CLEAR, ++ * like issued by 'dmesg -c'. Reading /dev/kmsg itself ++ * changes no global state, and does not clear anything. ++ */ ++ user->idx = clear_idx; ++ user->seq = clear_seq; ++ break; ++ case SEEK_END: ++ /* after the last record */ ++ user->idx = log_next_idx; ++ user->seq = log_next_seq; ++ break; ++ default: ++ ret = -EINVAL; ++ } ++ logbuf_unlock_irq(); ++ return ret; ++} ++ ++static __poll_t devkmsg_poll(struct file *file, poll_table *wait) ++{ ++ struct devkmsg_user *user = file->private_data; ++ __poll_t ret = 0; ++ ++ if (!user) ++ return EPOLLERR|EPOLLNVAL; ++ ++ poll_wait(file, &log_wait, wait); ++ ++ logbuf_lock_irq(); ++ if (user->seq < log_next_seq) { ++ /* return error when data has vanished underneath us */ ++ if (user->seq < log_first_seq) ++ ret = EPOLLIN|EPOLLRDNORM|EPOLLERR|EPOLLPRI; ++ else ++ ret = EPOLLIN|EPOLLRDNORM; ++ } ++ logbuf_unlock_irq(); ++ ++ return ret; ++} ++ ++static int devkmsg_open(struct inode *inode, struct file *file) ++{ ++ struct devkmsg_user *user; ++ int err; ++ ++ if (devkmsg_log & DEVKMSG_LOG_MASK_OFF) ++ return -EPERM; ++ ++ /* write-only does not need any file context */ ++ if ((file->f_flags & O_ACCMODE) != O_WRONLY) { ++ err = check_syslog_permissions(SYSLOG_ACTION_READ_ALL, ++ SYSLOG_FROM_READER); ++ if (err) ++ return err; ++ } ++ ++ user = kmalloc(sizeof(struct devkmsg_user), GFP_KERNEL); ++ if (!user) ++ return -ENOMEM; ++ ++ ratelimit_default_init(&user->rs); ++ ratelimit_set_flags(&user->rs, RATELIMIT_MSG_ON_RELEASE); ++ ++ mutex_init(&user->lock); ++ ++ logbuf_lock_irq(); ++ user->idx = log_first_idx; ++ user->seq = log_first_seq; ++ logbuf_unlock_irq(); ++ ++ file->private_data = user; ++ return 0; ++} ++ ++static int devkmsg_release(struct inode *inode, struct file *file) ++{ ++ struct devkmsg_user *user = file->private_data; ++ ++ if (!user) ++ return 0; ++ ++ ratelimit_state_exit(&user->rs); ++ ++ mutex_destroy(&user->lock); ++ kfree(user); ++ return 0; ++} ++ ++const struct file_operations kmsg_fops = { ++ .open = devkmsg_open, ++ .read = devkmsg_read, ++ .write_iter = devkmsg_write, ++ .llseek = devkmsg_llseek, ++ .poll = devkmsg_poll, ++ .release = devkmsg_release, ++}; ++ ++#ifdef CONFIG_CRASH_CORE ++/* ++ * This appends the listed symbols to /proc/vmcore ++ * ++ * /proc/vmcore is used by various utilities, like crash and makedumpfile to ++ * obtain access to symbols that are otherwise very difficult to locate. These ++ * symbols are specifically used so that utilities can access and extract the ++ * dmesg log from a vmcore file after a crash. ++ */ ++void log_buf_vmcoreinfo_setup(void) ++{ ++ VMCOREINFO_SYMBOL(log_buf); ++ VMCOREINFO_SYMBOL(log_buf_len); ++ VMCOREINFO_SYMBOL(log_first_idx); ++ VMCOREINFO_SYMBOL(clear_idx); ++ VMCOREINFO_SYMBOL(log_next_idx); ++ /* ++ * Export struct printk_log size and field offsets. User space tools can ++ * parse it and detect any changes to structure down the line. ++ */ ++ VMCOREINFO_STRUCT_SIZE(printk_log); ++ VMCOREINFO_OFFSET(printk_log, ts_nsec); ++ VMCOREINFO_OFFSET(printk_log, len); ++ VMCOREINFO_OFFSET(printk_log, text_len); ++ VMCOREINFO_OFFSET(printk_log, dict_len); ++} ++#endif ++ ++/* requested log_buf_len from kernel cmdline */ ++static unsigned long __initdata new_log_buf_len; ++ ++/* we practice scaling the ring buffer by powers of 2 */ ++static void __init log_buf_len_update(u64 size) ++{ ++ if (size > (u64)LOG_BUF_LEN_MAX) { ++ size = (u64)LOG_BUF_LEN_MAX; ++ pr_err("log_buf over 2G is not supported.\n"); ++ } ++ ++ if (size) ++ size = roundup_pow_of_two(size); ++ if (size > log_buf_len) ++ new_log_buf_len = (unsigned long)size; ++} ++ ++/* save requested log_buf_len since it's too early to process it */ ++static int __init log_buf_len_setup(char *str) ++{ ++ u64 size; ++ ++ if (!str) ++ return -EINVAL; ++ ++ size = memparse(str, &str); ++ ++ log_buf_len_update(size); ++ ++ return 0; ++} ++early_param("log_buf_len", log_buf_len_setup); ++ ++#ifdef CONFIG_SMP ++#define __LOG_CPU_MAX_BUF_LEN (1 << CONFIG_LOG_CPU_MAX_BUF_SHIFT) ++ ++static void __init log_buf_add_cpu(void) ++{ ++ unsigned int cpu_extra; ++ ++ /* ++ * archs should set up cpu_possible_bits properly with ++ * set_cpu_possible() after setup_arch() but just in ++ * case lets ensure this is valid. ++ */ ++ if (num_possible_cpus() == 1) ++ return; ++ ++ cpu_extra = (num_possible_cpus() - 1) * __LOG_CPU_MAX_BUF_LEN; ++ ++ /* by default this will only continue through for large > 64 CPUs */ ++ if (cpu_extra <= __LOG_BUF_LEN / 2) ++ return; ++ ++ pr_info("log_buf_len individual max cpu contribution: %d bytes\n", ++ __LOG_CPU_MAX_BUF_LEN); ++ pr_info("log_buf_len total cpu_extra contributions: %d bytes\n", ++ cpu_extra); ++ pr_info("log_buf_len min size: %d bytes\n", __LOG_BUF_LEN); ++ ++ log_buf_len_update(cpu_extra + __LOG_BUF_LEN); ++} ++#else /* !CONFIG_SMP */ ++static inline void log_buf_add_cpu(void) {} ++#endif /* CONFIG_SMP */ ++ ++static void __init set_percpu_data_ready(void) ++{ ++ printk_safe_init(); ++ /* Make sure we set this flag only after printk_safe() init is done */ ++ barrier(); ++ __printk_percpu_data_ready = true; ++} ++ ++void __init setup_log_buf(int early) ++{ ++ unsigned long flags; ++ char *new_log_buf; ++ unsigned int free; ++ ++ /* ++ * Some archs call setup_log_buf() multiple times - first is very ++ * early, e.g. from setup_arch(), and second - when percpu_areas ++ * are initialised. ++ */ ++ if (!early) ++ set_percpu_data_ready(); ++ ++ if (log_buf != __log_buf) ++ return; ++ ++ if (!early && !new_log_buf_len) ++ log_buf_add_cpu(); ++ ++ if (!new_log_buf_len) ++ return; ++ ++ if (early) { ++ new_log_buf = ++ memblock_virt_alloc(new_log_buf_len, LOG_ALIGN); ++ } else { ++ new_log_buf = memblock_virt_alloc_nopanic(new_log_buf_len, ++ LOG_ALIGN); ++ } ++ ++ if (unlikely(!new_log_buf)) { ++ pr_err("log_buf_len: %lu bytes not available\n", ++ new_log_buf_len); ++ return; ++ } ++ ++ logbuf_lock_irqsave(flags); ++ log_buf_len = new_log_buf_len; ++ log_buf = new_log_buf; ++ new_log_buf_len = 0; ++ free = __LOG_BUF_LEN - log_next_idx; ++ memcpy(log_buf, __log_buf, __LOG_BUF_LEN); ++ logbuf_unlock_irqrestore(flags); ++ ++ pr_info("log_buf_len: %u bytes\n", log_buf_len); ++ pr_info("early log buf free: %u(%u%%)\n", ++ free, (free * 100) / __LOG_BUF_LEN); ++} ++ ++static bool __read_mostly ignore_loglevel; ++ ++static int __init ignore_loglevel_setup(char *str) ++{ ++ ignore_loglevel = true; ++ pr_info("debug: ignoring loglevel setting.\n"); ++ ++ return 0; ++} ++ ++early_param("ignore_loglevel", ignore_loglevel_setup); ++module_param(ignore_loglevel, bool, S_IRUGO | S_IWUSR); ++MODULE_PARM_DESC(ignore_loglevel, ++ "ignore loglevel setting (prints all kernel messages to the console)"); ++ ++static bool suppress_message_printing(int level) ++{ ++ return (level >= console_loglevel && !ignore_loglevel); ++} ++ ++#ifdef CONFIG_BOOT_PRINTK_DELAY ++ ++static int boot_delay; /* msecs delay after each printk during bootup */ ++static unsigned long long loops_per_msec; /* based on boot_delay */ ++ ++static int __init boot_delay_setup(char *str) ++{ ++ unsigned long lpj; ++ ++ lpj = preset_lpj ? preset_lpj : 1000000; /* some guess */ ++ loops_per_msec = (unsigned long long)lpj / 1000 * HZ; ++ ++ get_option(&str, &boot_delay); ++ if (boot_delay > 10 * 1000) ++ boot_delay = 0; ++ ++ pr_debug("boot_delay: %u, preset_lpj: %ld, lpj: %lu, " ++ "HZ: %d, loops_per_msec: %llu\n", ++ boot_delay, preset_lpj, lpj, HZ, loops_per_msec); ++ return 0; ++} ++early_param("boot_delay", boot_delay_setup); ++ ++static void boot_delay_msec(int level) ++{ ++ unsigned long long k; ++ unsigned long timeout; ++ ++ if ((boot_delay == 0 || system_state >= SYSTEM_RUNNING) ++ || suppress_message_printing(level)) { ++ return; ++ } ++ ++ k = (unsigned long long)loops_per_msec * boot_delay; ++ ++ timeout = jiffies + msecs_to_jiffies(boot_delay); ++ while (k) { ++ k--; ++ cpu_relax(); ++ /* ++ * use (volatile) jiffies to prevent ++ * compiler reduction; loop termination via jiffies ++ * is secondary and may or may not happen. ++ */ ++ if (time_after(jiffies, timeout)) ++ break; ++ touch_nmi_watchdog(); ++ } ++} ++#else ++static inline void boot_delay_msec(int level) ++{ ++} ++#endif ++ ++static bool printk_time = IS_ENABLED(CONFIG_PRINTK_TIME); ++module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR); ++ ++static size_t print_time(u64 ts, char *buf) ++{ ++ unsigned long rem_nsec = do_div(ts, 1000000000); ++ ++ if (!buf) ++ return snprintf(NULL, 0, "[%5lu.000000] ", (unsigned long)ts); ++ ++ return sprintf(buf, "[%5lu.%06lu] ", ++ (unsigned long)ts, rem_nsec / 1000); ++} ++ ++static size_t print_prefix(const struct printk_log *msg, bool syslog, ++ bool time, char *buf) ++{ ++ size_t len = 0; ++ unsigned int prefix = (msg->facility << 3) | msg->level; ++ ++ if (syslog) { ++ if (buf) { ++ len += sprintf(buf, "<%u>", prefix); ++ } else { ++ len += 3; ++ if (prefix > 999) ++ len += 3; ++ else if (prefix > 99) ++ len += 2; ++ else if (prefix > 9) ++ len++; ++ } ++ } ++ ++ if (time) ++ len += print_time(msg->ts_nsec, buf ? buf + len : NULL); ++ return len; ++} ++ ++static size_t msg_print_text(const struct printk_log *msg, bool syslog, ++ bool time, char *buf, size_t size) ++{ ++ const char *text = log_text(msg); ++ size_t text_size = msg->text_len; ++ size_t len = 0; ++ ++ do { ++ const char *next = memchr(text, '\n', text_size); ++ size_t text_len; ++ ++ if (next) { ++ text_len = next - text; ++ next++; ++ text_size -= next - text; ++ } else { ++ text_len = text_size; ++ } ++ ++ if (buf) { ++ if (print_prefix(msg, syslog, time, NULL) + ++ text_len + 1 >= size - len) ++ break; ++ ++ len += print_prefix(msg, syslog, time, buf + len); ++ memcpy(buf + len, text, text_len); ++ len += text_len; ++ buf[len++] = '\n'; ++ } else { ++ /* SYSLOG_ACTION_* buffer size only calculation */ ++ len += print_prefix(msg, syslog, time, NULL); ++ len += text_len; ++ len++; ++ } ++ ++ text = next; ++ } while (text); ++ ++ return len; ++} ++ ++static int syslog_print(char __user *buf, int size) ++{ ++ char *text; ++ struct printk_log *msg; ++ int len = 0; ++ ++ text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL); ++ if (!text) ++ return -ENOMEM; ++ ++ while (size > 0) { ++ size_t n; ++ size_t skip; ++ ++ logbuf_lock_irq(); ++ if (syslog_seq < log_first_seq) { ++ /* messages are gone, move to first one */ ++ syslog_seq = log_first_seq; ++ syslog_idx = log_first_idx; ++ syslog_partial = 0; ++ } ++ if (syslog_seq == log_next_seq) { ++ logbuf_unlock_irq(); ++ break; ++ } ++ ++ /* ++ * To keep reading/counting partial line consistent, ++ * use printk_time value as of the beginning of a line. ++ */ ++ if (!syslog_partial) ++ syslog_time = printk_time; ++ ++ skip = syslog_partial; ++ msg = log_from_idx(syslog_idx); ++ n = msg_print_text(msg, true, syslog_time, text, ++ LOG_LINE_MAX + PREFIX_MAX); ++ if (n - syslog_partial <= size) { ++ /* message fits into buffer, move forward */ ++ syslog_idx = log_next(syslog_idx); ++ syslog_seq++; ++ n -= syslog_partial; ++ syslog_partial = 0; ++ } else if (!len){ ++ /* partial read(), remember position */ ++ n = size; ++ syslog_partial += n; ++ } else ++ n = 0; ++ logbuf_unlock_irq(); ++ ++ if (!n) ++ break; ++ ++ if (copy_to_user(buf, text + skip, n)) { ++ if (!len) ++ len = -EFAULT; ++ break; ++ } ++ ++ len += n; ++ size -= n; ++ buf += n; ++ } ++ ++ kfree(text); ++ return len; ++} ++ ++static int syslog_print_all(char __user *buf, int size, bool clear) ++{ ++ char *text; ++ int len = 0; ++ u64 next_seq; ++ u64 seq; ++ u32 idx; ++ bool time; ++ ++ text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL); ++ if (!text) ++ return -ENOMEM; ++ ++ time = printk_time; ++ logbuf_lock_irq(); ++ /* ++ * Find first record that fits, including all following records, ++ * into the user-provided buffer for this dump. ++ */ ++ seq = clear_seq; ++ idx = clear_idx; ++ while (seq < log_next_seq) { ++ struct printk_log *msg = log_from_idx(idx); ++ ++ len += msg_print_text(msg, true, time, NULL, 0); ++ idx = log_next(idx); ++ seq++; ++ } ++ ++ /* move first record forward until length fits into the buffer */ ++ seq = clear_seq; ++ idx = clear_idx; ++ while (len > size && seq < log_next_seq) { ++ struct printk_log *msg = log_from_idx(idx); ++ ++ len -= msg_print_text(msg, true, time, NULL, 0); ++ idx = log_next(idx); ++ seq++; ++ } ++ ++ /* last message fitting into this dump */ ++ next_seq = log_next_seq; ++ ++ len = 0; ++ while (len >= 0 && seq < next_seq) { ++ struct printk_log *msg = log_from_idx(idx); ++ int textlen = msg_print_text(msg, true, time, text, ++ LOG_LINE_MAX + PREFIX_MAX); ++ ++ idx = log_next(idx); ++ seq++; ++ ++ logbuf_unlock_irq(); ++ if (copy_to_user(buf + len, text, textlen)) ++ len = -EFAULT; ++ else ++ len += textlen; ++ logbuf_lock_irq(); ++ ++ if (seq < log_first_seq) { ++ /* messages are gone, move to next one */ ++ seq = log_first_seq; ++ idx = log_first_idx; ++ } ++ } ++ ++ if (clear) { ++ clear_seq = log_next_seq; ++ clear_idx = log_next_idx; ++ } ++ logbuf_unlock_irq(); ++ ++ kfree(text); ++ return len; ++} ++ ++static void syslog_clear(void) ++{ ++ logbuf_lock_irq(); ++ clear_seq = log_next_seq; ++ clear_idx = log_next_idx; ++ logbuf_unlock_irq(); ++} ++ ++int do_syslog(int type, char __user *buf, int len, int source) ++{ ++ bool clear = false; ++ static int saved_console_loglevel = LOGLEVEL_DEFAULT; ++ int error; ++ ++ error = check_syslog_permissions(type, source); ++ if (error) ++ return error; ++ ++ switch (type) { ++ case SYSLOG_ACTION_CLOSE: /* Close log */ ++ break; ++ case SYSLOG_ACTION_OPEN: /* Open log */ ++ break; ++ case SYSLOG_ACTION_READ: /* Read from log */ ++ if (!buf || len < 0) ++ return -EINVAL; ++ if (!len) ++ return 0; ++ if (!access_ok(buf, len)) ++ return -EFAULT; ++ error = wait_event_interruptible(log_wait, ++ syslog_seq != log_next_seq); ++ if (error) ++ return error; ++ error = syslog_print(buf, len); ++ break; ++ /* Read/clear last kernel messages */ ++ case SYSLOG_ACTION_READ_CLEAR: ++ clear = true; ++ /* FALL THRU */ ++ /* Read last kernel messages */ ++ case SYSLOG_ACTION_READ_ALL: ++ if (!buf || len < 0) ++ return -EINVAL; ++ if (!len) ++ return 0; ++ if (!access_ok(buf, len)) ++ return -EFAULT; ++ error = syslog_print_all(buf, len, clear); ++ break; ++ /* Clear ring buffer */ ++ case SYSLOG_ACTION_CLEAR: ++ syslog_clear(); ++ break; ++ /* Disable logging to console */ ++ case SYSLOG_ACTION_CONSOLE_OFF: ++ if (saved_console_loglevel == LOGLEVEL_DEFAULT) ++ saved_console_loglevel = console_loglevel; ++ console_loglevel = minimum_console_loglevel; ++ break; ++ /* Enable logging to console */ ++ case SYSLOG_ACTION_CONSOLE_ON: ++ if (saved_console_loglevel != LOGLEVEL_DEFAULT) { ++ console_loglevel = saved_console_loglevel; ++ saved_console_loglevel = LOGLEVEL_DEFAULT; ++ } ++ break; ++ /* Set level of messages printed to console */ ++ case SYSLOG_ACTION_CONSOLE_LEVEL: ++ if (len < 1 || len > 8) ++ return -EINVAL; ++ if (len < minimum_console_loglevel) ++ len = minimum_console_loglevel; ++ console_loglevel = len; ++ /* Implicitly re-enable logging to console */ ++ saved_console_loglevel = LOGLEVEL_DEFAULT; ++ break; ++ /* Number of chars in the log buffer */ ++ case SYSLOG_ACTION_SIZE_UNREAD: ++ logbuf_lock_irq(); ++ if (syslog_seq < log_first_seq) { ++ /* messages are gone, move to first one */ ++ syslog_seq = log_first_seq; ++ syslog_idx = log_first_idx; ++ syslog_partial = 0; ++ } ++ if (source == SYSLOG_FROM_PROC) { ++ /* ++ * Short-cut for poll(/"proc/kmsg") which simply checks ++ * for pending data, not the size; return the count of ++ * records, not the length. ++ */ ++ error = log_next_seq - syslog_seq; ++ } else { ++ u64 seq = syslog_seq; ++ u32 idx = syslog_idx; ++ bool time = syslog_partial ? syslog_time : printk_time; ++ ++ while (seq < log_next_seq) { ++ struct printk_log *msg = log_from_idx(idx); ++ ++ error += msg_print_text(msg, true, time, NULL, ++ 0); ++ time = printk_time; ++ idx = log_next(idx); ++ seq++; ++ } ++ error -= syslog_partial; ++ } ++ logbuf_unlock_irq(); ++ break; ++ /* Size of the log buffer */ ++ case SYSLOG_ACTION_SIZE_BUFFER: ++ error = log_buf_len; ++ break; ++ default: ++ error = -EINVAL; ++ break; ++ } ++ ++ return error; ++} ++ ++SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len) ++{ ++ return do_syslog(type, buf, len, SYSLOG_FROM_READER); ++} ++ ++/* ++ * Special console_lock variants that help to reduce the risk of soft-lockups. ++ * They allow to pass console_lock to another printk() call using a busy wait. ++ */ ++ ++#ifdef CONFIG_LOCKDEP ++static struct lockdep_map console_owner_dep_map = { ++ .name = "console_owner" ++}; ++#endif ++ ++static DEFINE_RAW_SPINLOCK(console_owner_lock); ++static struct task_struct *console_owner; ++static bool console_waiter; ++ ++void zap_locks(void) ++{ ++ if (raw_spin_is_locked(&logbuf_lock)) { ++ debug_locks_off(); ++ raw_spin_lock_init(&logbuf_lock); ++ } ++ ++ if (raw_spin_is_locked(&console_owner_lock)) { ++ raw_spin_lock_init(&console_owner_lock); ++ console_owner = NULL; ++ console_waiter = false; ++ } ++} ++ ++/** ++ * console_lock_spinning_enable - mark beginning of code where another ++ * thread might safely busy wait ++ * ++ * This basically converts console_lock into a spinlock. This marks ++ * the section where the console_lock owner can not sleep, because ++ * there may be a waiter spinning (like a spinlock). Also it must be ++ * ready to hand over the lock at the end of the section. ++ */ ++static void console_lock_spinning_enable(void) ++{ ++ raw_spin_lock(&console_owner_lock); ++ console_owner = current; ++ raw_spin_unlock(&console_owner_lock); ++ ++ /* The waiter may spin on us after setting console_owner */ ++ spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_); ++} ++ ++/** ++ * console_lock_spinning_disable_and_check - mark end of code where another ++ * thread was able to busy wait and check if there is a waiter ++ * ++ * This is called at the end of the section where spinning is allowed. ++ * It has two functions. First, it is a signal that it is no longer ++ * safe to start busy waiting for the lock. Second, it checks if ++ * there is a busy waiter and passes the lock rights to her. ++ * ++ * Important: Callers lose the lock if there was a busy waiter. ++ * They must not touch items synchronized by console_lock ++ * in this case. ++ * ++ * Return: 1 if the lock rights were passed, 0 otherwise. ++ */ ++static int console_lock_spinning_disable_and_check(void) ++{ ++ int waiter; ++ ++ raw_spin_lock(&console_owner_lock); ++ waiter = READ_ONCE(console_waiter); ++ console_owner = NULL; ++ raw_spin_unlock(&console_owner_lock); ++ ++ if (!waiter) { ++ spin_release(&console_owner_dep_map, 1, _THIS_IP_); ++ return 0; ++ } ++ ++ /* The waiter is now free to continue */ ++ WRITE_ONCE(console_waiter, false); ++ ++ spin_release(&console_owner_dep_map, 1, _THIS_IP_); ++ ++ /* ++ * Hand off console_lock to waiter. The waiter will perform ++ * the up(). After this, the waiter is the console_lock owner. ++ */ ++ mutex_release(&console_lock_dep_map, 1, _THIS_IP_); ++ return 1; ++} ++ ++/** ++ * console_trylock_spinning - try to get console_lock by busy waiting ++ * ++ * This allows to busy wait for the console_lock when the current ++ * owner is running in specially marked sections. It means that ++ * the current owner is running and cannot reschedule until it ++ * is ready to lose the lock. ++ * ++ * Return: 1 if we got the lock, 0 othrewise ++ */ ++static int console_trylock_spinning(void) ++{ ++ struct task_struct *owner = NULL; ++ bool waiter; ++ bool spin = false; ++ unsigned long flags; ++ ++ if (console_trylock()) ++ return 1; ++ ++ printk_safe_enter_irqsave(flags); ++ ++ raw_spin_lock(&console_owner_lock); ++ owner = READ_ONCE(console_owner); ++ waiter = READ_ONCE(console_waiter); ++ if (!waiter && owner && owner != current) { ++ WRITE_ONCE(console_waiter, true); ++ spin = true; ++ } ++ raw_spin_unlock(&console_owner_lock); ++ ++ /* ++ * If there is an active printk() writing to the ++ * consoles, instead of having it write our data too, ++ * see if we can offload that load from the active ++ * printer, and do some printing ourselves. ++ * Go into a spin only if there isn't already a waiter ++ * spinning, and there is an active printer, and ++ * that active printer isn't us (recursive printk?). ++ */ ++ if (!spin) { ++ printk_safe_exit_irqrestore(flags); ++ return 0; ++ } ++ ++ /* We spin waiting for the owner to release us */ ++ spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_); ++ /* Owner will clear console_waiter on hand off */ ++ while (READ_ONCE(console_waiter)) ++ cpu_relax(); ++ spin_release(&console_owner_dep_map, 1, _THIS_IP_); ++ ++ printk_safe_exit_irqrestore(flags); ++ /* ++ * The owner passed the console lock to us. ++ * Since we did not spin on console lock, annotate ++ * this as a trylock. Otherwise lockdep will ++ * complain. ++ */ ++ mutex_acquire(&console_lock_dep_map, 0, 1, _THIS_IP_); ++ ++ return 1; ++} ++ ++/* ++ * Call the console drivers, asking them to write out ++ * log_buf[start] to log_buf[end - 1]. ++ * The console_lock must be held. ++ */ ++static void call_console_drivers(const char *ext_text, size_t ext_len, ++ const char *text, size_t len) ++{ ++ struct console *con; ++ ++ trace_console_rcuidle(text, len); ++ ++ if (!console_drivers) ++ return; ++ ++ for_each_console(con) { ++ if (exclusive_console && con != exclusive_console) ++ continue; ++ if (!(con->flags & CON_ENABLED)) ++ continue; ++ if (!con->write) ++ continue; ++ if (!cpu_online(smp_processor_id()) && ++ !(con->flags & CON_ANYTIME)) ++ continue; ++ if (con->flags & CON_EXTENDED) ++ con->write(con, ext_text, ext_len); ++ else ++ con->write(con, text, len); ++ } ++} ++ ++int printk_delay_msec __read_mostly; ++ ++static inline void printk_delay(void) ++{ ++ if (unlikely(printk_delay_msec)) { ++ int m = printk_delay_msec; ++ ++ while (m--) { ++ mdelay(1); ++ touch_nmi_watchdog(); ++ } ++ } ++} ++ ++/* ++ * Continuation lines are buffered, and not committed to the record buffer ++ * until the line is complete, or a race forces it. The line fragments ++ * though, are printed immediately to the consoles to ensure everything has ++ * reached the console in case of a kernel crash. ++ */ ++static struct cont { ++ char buf[LOG_LINE_MAX]; ++ size_t len; /* length == 0 means unused buffer */ ++ struct task_struct *owner; /* task of first print*/ ++ u64 ts_nsec; /* time of first print */ ++ u8 level; /* log level of first message */ ++ u8 facility; /* log facility of first message */ ++ enum log_flags flags; /* prefix, newline flags */ ++} cont; ++ ++static void cont_flush(void) ++{ ++ if (cont.len == 0) ++ return; ++ ++ log_store(cont.facility, cont.level, cont.flags, cont.ts_nsec, ++ NULL, 0, cont.buf, cont.len); ++ cont.len = 0; ++} ++ ++static bool cont_add(int facility, int level, enum log_flags flags, const char *text, size_t len) ++{ ++ /* ++ * If ext consoles are present, flush and skip in-kernel ++ * continuation. See nr_ext_console_drivers definition. Also, if ++ * the line gets too long, split it up in separate records. ++ */ ++ if (nr_ext_console_drivers || cont.len + len > sizeof(cont.buf)) { ++ cont_flush(); ++ return false; ++ } ++ ++ if (!cont.len) { ++ cont.facility = facility; ++ cont.level = level; ++ cont.owner = current; ++ cont.ts_nsec = local_clock(); ++ cont.flags = flags; ++ } ++ ++ memcpy(cont.buf + cont.len, text, len); ++ cont.len += len; ++ ++ // The original flags come from the first line, ++ // but later continuations can add a newline. ++ if (flags & LOG_NEWLINE) { ++ cont.flags |= LOG_NEWLINE; ++ cont_flush(); ++ } ++ ++ if (cont.len > (sizeof(cont.buf) * 80) / 100) ++ cont_flush(); ++ ++ return true; ++} ++ ++static size_t log_output(int facility, int level, enum log_flags lflags, const char *dict, size_t dictlen, char *text, size_t text_len) ++{ ++ /* ++ * If an earlier line was buffered, and we're a continuation ++ * write from the same process, try to add it to the buffer. ++ */ ++ if (cont.len) { ++ if (cont.owner == current && (lflags & LOG_CONT)) { ++ if (cont_add(facility, level, lflags, text, text_len)) ++ return text_len; ++ } ++ /* Otherwise, make sure it's flushed */ ++ cont_flush(); ++ } ++ ++ /* Skip empty continuation lines that couldn't be added - they just flush */ ++ if (!text_len && (lflags & LOG_CONT)) ++ return 0; ++ ++ /* If it doesn't end in a newline, try to buffer the current line */ ++ if (!(lflags & LOG_NEWLINE)) { ++ if (cont_add(facility, level, lflags, text, text_len)) ++ return text_len; ++ } ++ ++ /* Store it in the record log */ ++ return log_store(facility, level, lflags, 0, dict, dictlen, text, text_len); ++} ++ ++/* Must be called under logbuf_lock. */ ++int vprintk_store(int facility, int level, ++ const char *dict, size_t dictlen, ++ const char *fmt, va_list args) ++{ ++ static char textbuf[LOG_LINE_MAX]; ++ char *text = textbuf; ++ size_t text_len; ++ enum log_flags lflags = 0; ++ ++ /* ++ * The printf needs to come first; we need the syslog ++ * prefix which might be passed-in as a parameter. ++ */ ++ text_len = vscnprintf(text, sizeof(textbuf), fmt, args); ++ ++ /* mark and strip a trailing newline */ ++ if (text_len && text[text_len-1] == '\n') { ++ text_len--; ++ lflags |= LOG_NEWLINE; ++ } ++ ++ /* strip kernel syslog prefix and extract log level or control flags */ ++ if (facility == 0) { ++ int kern_level; ++ ++ while ((kern_level = printk_get_level(text)) != 0) { ++ switch (kern_level) { ++ case '0' ... '7': ++ if (level == LOGLEVEL_DEFAULT) ++ level = kern_level - '0'; ++ /* fallthrough */ ++ case 'd': /* KERN_DEFAULT */ ++ lflags |= LOG_PREFIX; ++ break; ++ case 'c': /* KERN_CONT */ ++ lflags |= LOG_CONT; ++ } ++ ++ text_len -= 2; ++ text += 2; ++ } ++ } ++ ++ if (level == LOGLEVEL_DEFAULT) ++ level = default_message_loglevel; ++ ++ if (dict) ++ lflags |= LOG_PREFIX|LOG_NEWLINE; ++ ++ return log_output(facility, level, lflags, ++ dict, dictlen, text, text_len); ++} ++ ++asmlinkage int vprintk_emit(int facility, int level, ++ const char *dict, size_t dictlen, ++ const char *fmt, va_list args) ++{ ++ int printed_len; ++ bool in_sched = false, pending_output; ++ unsigned long flags; ++ u64 curr_log_seq; ++ ++ if (level == LOGLEVEL_SCHED) { ++ level = LOGLEVEL_DEFAULT; ++ in_sched = true; ++ } ++ ++ boot_delay_msec(level); ++ printk_delay(); ++ ++ /* This stops the holder of console_sem just where we want him */ ++ logbuf_lock_irqsave(flags); ++ curr_log_seq = log_next_seq; ++ printed_len = vprintk_store(facility, level, dict, dictlen, fmt, args); ++ pending_output = (curr_log_seq != log_next_seq); ++ logbuf_unlock_irqrestore(flags); ++ ++ /* If called from the scheduler, we can not call up(). */ ++ if (!in_sched && pending_output) { ++ /* ++ * Disable preemption to avoid being preempted while holding ++ * console_sem which would prevent anyone from printing to ++ * console ++ */ ++ preempt_disable(); ++ /* ++ * Try to acquire and then immediately release the console ++ * semaphore. The release will print out buffers and wake up ++ * /dev/kmsg and syslog() users. ++ */ ++ if (console_trylock_spinning()) ++ console_unlock(); ++ preempt_enable(); ++ } ++ ++ if (pending_output) ++ wake_up_klogd(); ++ return printed_len; ++} ++EXPORT_SYMBOL(vprintk_emit); ++ ++asmlinkage int vprintk(const char *fmt, va_list args) ++{ ++ return vprintk_func(fmt, args); ++} ++EXPORT_SYMBOL(vprintk); ++ ++asmlinkage int printk_emit(int facility, int level, ++ const char *dict, size_t dictlen, ++ const char *fmt, ...) ++{ ++ va_list args; ++ int r; ++ ++ va_start(args, fmt); ++ r = vprintk_emit(facility, level, dict, dictlen, fmt, args); ++ va_end(args); ++ ++ return r; ++} ++EXPORT_SYMBOL(printk_emit); ++ ++int vprintk_default(const char *fmt, va_list args) ++{ ++ int r; ++ ++#ifdef CONFIG_KGDB_KDB ++ /* Allow to pass printk() to kdb but avoid a recursion. */ ++ if (unlikely(kdb_trap_printk && kdb_printf_cpu < 0)) { ++ r = vkdb_printf(KDB_MSGSRC_PRINTK, fmt, args); ++ return r; ++ } ++#endif ++ r = vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, 0, fmt, args); ++ ++ return r; ++} ++EXPORT_SYMBOL_GPL(vprintk_default); ++ ++/** ++ * printk - print a kernel message ++ * @fmt: format string ++ * ++ * This is printk(). It can be called from any context. We want it to work. ++ * ++ * We try to grab the console_lock. If we succeed, it's easy - we log the ++ * output and call the console drivers. If we fail to get the semaphore, we ++ * place the output into the log buffer and return. The current holder of ++ * the console_sem will notice the new output in console_unlock(); and will ++ * send it to the consoles before releasing the lock. ++ * ++ * One effect of this deferred printing is that code which calls printk() and ++ * then changes console_loglevel may break. This is because console_loglevel ++ * is inspected when the actual printing occurs. ++ * ++ * See also: ++ * printf(3) ++ * ++ * See the vsnprintf() documentation for format string extensions over C99. ++ */ ++asmlinkage __visible int printk(const char *fmt, ...) ++{ ++ va_list args; ++ int r; ++ ++ va_start(args, fmt); ++ r = vprintk_func(fmt, args); ++ va_end(args); ++ ++ return r; ++} ++EXPORT_SYMBOL(printk); ++ ++#else /* CONFIG_PRINTK */ ++ ++#define LOG_LINE_MAX 0 ++#define PREFIX_MAX 0 ++#define printk_time false ++ ++static u64 syslog_seq; ++static u32 syslog_idx; ++static u64 console_seq; ++static u32 console_idx; ++static u64 exclusive_console_stop_seq; ++static u64 log_first_seq; ++static u32 log_first_idx; ++static u64 log_next_seq; ++static char *log_text(const struct printk_log *msg) { return NULL; } ++static char *log_dict(const struct printk_log *msg) { return NULL; } ++static struct printk_log *log_from_idx(u32 idx) { return NULL; } ++static u32 log_next(u32 idx) { return 0; } ++static ssize_t msg_print_ext_header(char *buf, size_t size, ++ struct printk_log *msg, ++ u64 seq) { return 0; } ++static ssize_t msg_print_ext_body(char *buf, size_t size, ++ char *dict, size_t dict_len, ++ char *text, size_t text_len) { return 0; } ++static void console_lock_spinning_enable(void) { } ++static int console_lock_spinning_disable_and_check(void) { return 0; } ++static void call_console_drivers(const char *ext_text, size_t ext_len, ++ const char *text, size_t len) {} ++static size_t msg_print_text(const struct printk_log *msg, bool syslog, ++ bool time, char *buf, size_t size) { return 0; } ++static bool suppress_message_printing(int level) { return false; } ++ ++#endif /* CONFIG_PRINTK */ ++ ++#ifdef CONFIG_EARLY_PRINTK ++struct console *early_console; ++ ++asmlinkage __visible void early_printk(const char *fmt, ...) ++{ ++ va_list ap; ++ char buf[512]; ++ int n; ++ ++ if (!early_console) ++ return; ++ ++ va_start(ap, fmt); ++ n = vscnprintf(buf, sizeof(buf), fmt, ap); ++ va_end(ap); ++ ++ early_console->write(early_console, buf, n); ++} ++#endif ++ ++static int __add_preferred_console(char *name, int idx, char *options, ++ char *brl_options) ++{ ++ struct console_cmdline *c; ++ int i; ++ ++ /* ++ * See if this tty is not yet registered, and ++ * if we have a slot free. ++ */ ++ for (i = 0, c = console_cmdline; ++ i < MAX_CMDLINECONSOLES && c->name[0]; ++ i++, c++) { ++ if (strcmp(c->name, name) == 0 && c->index == idx) { ++ if (!brl_options) ++ preferred_console = i; ++ return 0; ++ } ++ } ++ if (i == MAX_CMDLINECONSOLES) ++ return -E2BIG; ++ if (!brl_options) ++ preferred_console = i; ++ strlcpy(c->name, name, sizeof(c->name)); ++ c->options = options; ++ braille_set_options(c, brl_options); ++ ++ c->index = idx; ++ return 0; ++} ++ ++static int __init console_msg_format_setup(char *str) ++{ ++ if (!strcmp(str, "syslog")) ++ console_msg_format = MSG_FORMAT_SYSLOG; ++ if (!strcmp(str, "default")) ++ console_msg_format = MSG_FORMAT_DEFAULT; ++ return 1; ++} ++__setup("console_msg_format=", console_msg_format_setup); ++ ++/* ++ * Set up a console. Called via do_early_param() in init/main.c ++ * for each "console=" parameter in the boot command line. ++ */ ++static int __init console_setup(char *str) ++{ ++ char buf[sizeof(console_cmdline[0].name) + 4]; /* 4 for "ttyS" */ ++ char *s, *options, *brl_options = NULL; ++ int idx; ++ ++ if (str[0] == 0) ++ return 1; ++ ++ if (_braille_console_setup(&str, &brl_options)) ++ return 1; ++ ++ /* ++ * Decode str into name, index, options. ++ */ ++ if (str[0] >= '0' && str[0] <= '9') { ++ strcpy(buf, "ttyS"); ++ strncpy(buf + 4, str, sizeof(buf) - 5); ++ } else { ++ strncpy(buf, str, sizeof(buf) - 1); ++ } ++ buf[sizeof(buf) - 1] = 0; ++ options = strchr(str, ','); ++ if (options) ++ *(options++) = 0; ++#ifdef __sparc__ ++ if (!strcmp(str, "ttya")) ++ strcpy(buf, "ttyS0"); ++ if (!strcmp(str, "ttyb")) ++ strcpy(buf, "ttyS1"); ++#endif ++ for (s = buf; *s; s++) ++ if (isdigit(*s) || *s == ',') ++ break; ++ idx = simple_strtoul(s, NULL, 10); ++ *s = 0; ++ ++ __add_preferred_console(buf, idx, options, brl_options); ++ console_set_on_cmdline = 1; ++ return 1; ++} ++__setup("console=", console_setup); ++ ++/** ++ * add_preferred_console - add a device to the list of preferred consoles. ++ * @name: device name ++ * @idx: device index ++ * @options: options for this console ++ * ++ * The last preferred console added will be used for kernel messages ++ * and stdin/out/err for init. Normally this is used by console_setup ++ * above to handle user-supplied console arguments; however it can also ++ * be used by arch-specific code either to override the user or more ++ * commonly to provide a default console (ie from PROM variables) when ++ * the user has not supplied one. ++ */ ++int add_preferred_console(char *name, int idx, char *options) ++{ ++ return __add_preferred_console(name, idx, options, NULL); ++} ++ ++bool console_suspend_enabled = true; ++EXPORT_SYMBOL(console_suspend_enabled); ++ ++static int __init console_suspend_disable(char *str) ++{ ++ console_suspend_enabled = false; ++ return 1; ++} ++__setup("no_console_suspend", console_suspend_disable); ++module_param_named(console_suspend, console_suspend_enabled, ++ bool, S_IRUGO | S_IWUSR); ++MODULE_PARM_DESC(console_suspend, "suspend console during suspend" ++ " and hibernate operations"); ++ ++/** ++ * suspend_console - suspend the console subsystem ++ * ++ * This disables printk() while we go into suspend states ++ */ ++void suspend_console(void) ++{ ++ if (!console_suspend_enabled) ++ return; ++ pr_info("Suspending console(s) (use no_console_suspend to debug)\n"); ++ console_lock(); ++ console_suspended = 1; ++ up_console_sem(); ++} ++ ++void resume_console(void) ++{ ++ if (!console_suspend_enabled) ++ return; ++ down_console_sem(); ++ console_suspended = 0; ++ console_unlock(); ++} ++ ++/** ++ * console_cpu_notify - print deferred console messages after CPU hotplug ++ * @cpu: unused ++ * ++ * If printk() is called from a CPU that is not online yet, the messages ++ * will be printed on the console only if there are CON_ANYTIME consoles. ++ * This function is called when a new CPU comes online (or fails to come ++ * up) or goes offline. ++ */ ++static int console_cpu_notify(unsigned int cpu) ++{ ++ if (!cpuhp_tasks_frozen) { ++ /* If trylock fails, someone else is doing the printing */ ++ if (console_trylock()) ++ console_unlock(); ++ } ++ return 0; ++} ++ ++/** ++ * console_lock - lock the console system for exclusive use. ++ * ++ * Acquires a lock which guarantees that the caller has ++ * exclusive access to the console system and the console_drivers list. ++ * ++ * Can sleep, returns nothing. ++ */ ++void console_lock(void) ++{ ++ might_sleep(); ++ ++ down_console_sem(); ++ if (console_suspended) ++ return; ++ console_locked = 1; ++ console_may_schedule = 1; ++} ++EXPORT_SYMBOL(console_lock); ++ ++/** ++ * console_trylock - try to lock the console system for exclusive use. ++ * ++ * Try to acquire a lock which guarantees that the caller has exclusive ++ * access to the console system and the console_drivers list. ++ * ++ * returns 1 on success, and 0 on failure to acquire the lock. ++ */ ++int console_trylock(void) ++{ ++ if (down_trylock_console_sem()) ++ return 0; ++ if (console_suspended) { ++ up_console_sem(); ++ return 0; ++ } ++ console_locked = 1; ++ console_may_schedule = 0; ++ return 1; ++} ++EXPORT_SYMBOL(console_trylock); ++ ++int is_console_locked(void) ++{ ++ return console_locked; ++} ++EXPORT_SYMBOL(is_console_locked); ++ ++/* ++ * Check if we have any console that is capable of printing while cpu is ++ * booting or shutting down. Requires console_sem. ++ */ ++static int have_callable_console(void) ++{ ++ struct console *con; ++ ++ for_each_console(con) ++ if ((con->flags & CON_ENABLED) && ++ (con->flags & CON_ANYTIME)) ++ return 1; ++ ++ return 0; ++} ++ ++/* ++ * Can we actually use the console at this time on this cpu? ++ * ++ * Console drivers may assume that per-cpu resources have been allocated. So ++ * unless they're explicitly marked as being able to cope (CON_ANYTIME) don't ++ * call them until this CPU is officially up. ++ */ ++static inline int can_use_console(void) ++{ ++ return cpu_online(raw_smp_processor_id()) || have_callable_console(); ++} ++ ++/** ++ * console_unlock - unlock the console system ++ * ++ * Releases the console_lock which the caller holds on the console system ++ * and the console driver list. ++ * ++ * While the console_lock was held, console output may have been buffered ++ * by printk(). If this is the case, console_unlock(); emits ++ * the output prior to releasing the lock. ++ * ++ * If there is output waiting, we wake /dev/kmsg and syslog() users. ++ * ++ * console_unlock(); may be called from any context. ++ */ ++void console_unlock(void) ++{ ++ static char ext_text[CONSOLE_EXT_LOG_MAX]; ++ static char text[LOG_LINE_MAX + PREFIX_MAX]; ++ unsigned long flags; ++ bool do_cond_resched, retry; ++ ++ if (console_suspended) { ++ up_console_sem(); ++ return; ++ } ++ ++ /* ++ * Console drivers are called with interrupts disabled, so ++ * @console_may_schedule should be cleared before; however, we may ++ * end up dumping a lot of lines, for example, if called from ++ * console registration path, and should invoke cond_resched() ++ * between lines if allowable. Not doing so can cause a very long ++ * scheduling stall on a slow console leading to RCU stall and ++ * softlockup warnings which exacerbate the issue with more ++ * messages practically incapacitating the system. ++ * ++ * console_trylock() is not able to detect the preemptive ++ * context reliably. Therefore the value must be stored before ++ * and cleared after the the "again" goto label. ++ */ ++ do_cond_resched = console_may_schedule; ++again: ++ console_may_schedule = 0; ++ ++ /* ++ * We released the console_sem lock, so we need to recheck if ++ * cpu is online and (if not) is there at least one CON_ANYTIME ++ * console. ++ */ ++ if (!can_use_console()) { ++ console_locked = 0; ++ up_console_sem(); ++ return; ++ } ++ ++ for (;;) { ++ struct printk_log *msg; ++ size_t ext_len = 0; ++ size_t len; ++ ++ printk_safe_enter_irqsave(flags); ++ raw_spin_lock(&logbuf_lock); ++ if (console_seq < log_first_seq) { ++ len = sprintf(text, ++ "** %llu printk messages dropped **\n", ++ log_first_seq - console_seq); ++ ++ /* messages are gone, move to first one */ ++ console_seq = log_first_seq; ++ console_idx = log_first_idx; ++ } else { ++ len = 0; ++ } ++skip: ++ if (console_seq == log_next_seq) ++ break; ++ ++ msg = log_from_idx(console_idx); ++ if (suppress_message_printing(msg->level)) { ++ /* ++ * Skip record we have buffered and already printed ++ * directly to the console when we received it, and ++ * record that has level above the console loglevel. ++ */ ++ console_idx = log_next(console_idx); ++ console_seq++; ++ goto skip; ++ } ++ ++ /* Output to all consoles once old messages replayed. */ ++ if (unlikely(exclusive_console && ++ console_seq >= exclusive_console_stop_seq)) { ++ exclusive_console = NULL; ++ } ++ ++ len += msg_print_text(msg, ++ console_msg_format & MSG_FORMAT_SYSLOG, ++ printk_time, text + len, sizeof(text) - len); ++ if (nr_ext_console_drivers) { ++ ext_len = msg_print_ext_header(ext_text, ++ sizeof(ext_text), ++ msg, console_seq); ++ ext_len += msg_print_ext_body(ext_text + ext_len, ++ sizeof(ext_text) - ext_len, ++ log_dict(msg), msg->dict_len, ++ log_text(msg), msg->text_len); ++ } ++ console_idx = log_next(console_idx); ++ console_seq++; ++ raw_spin_unlock(&logbuf_lock); ++ ++ /* ++ * While actively printing out messages, if another printk() ++ * were to occur on another CPU, it may wait for this one to ++ * finish. This task can not be preempted if there is a ++ * waiter waiting to take over. ++ */ ++ console_lock_spinning_enable(); ++ ++ stop_critical_timings(); /* don't trace print latency */ ++ call_console_drivers(ext_text, ext_len, text, len); ++ start_critical_timings(); ++ ++ if (console_lock_spinning_disable_and_check()) { ++ printk_safe_exit_irqrestore(flags); ++ return; ++ } ++ ++ printk_safe_exit_irqrestore(flags); ++ ++ if (do_cond_resched) ++ cond_resched(); ++ } ++ ++ console_locked = 0; ++ ++ raw_spin_unlock(&logbuf_lock); ++ ++ up_console_sem(); ++ ++ /* ++ * Someone could have filled up the buffer again, so re-check if there's ++ * something to flush. In case we cannot trylock the console_sem again, ++ * there's a new owner and the console_unlock() from them will do the ++ * flush, no worries. ++ */ ++ raw_spin_lock(&logbuf_lock); ++ retry = console_seq != log_next_seq; ++ raw_spin_unlock(&logbuf_lock); ++ printk_safe_exit_irqrestore(flags); ++ ++ if (retry && console_trylock()) ++ goto again; ++} ++EXPORT_SYMBOL(console_unlock); ++ ++/** ++ * console_conditional_schedule - yield the CPU if required ++ * ++ * If the console code is currently allowed to sleep, and ++ * if this CPU should yield the CPU to another task, do ++ * so here. ++ * ++ * Must be called within console_lock();. ++ */ ++void __sched console_conditional_schedule(void) ++{ ++ if (console_may_schedule) ++ cond_resched(); ++} ++EXPORT_SYMBOL(console_conditional_schedule); ++ ++void console_unblank(void) ++{ ++ struct console *c; ++ ++ /* ++ * console_unblank can no longer be called in interrupt context unless ++ * oops_in_progress is set to 1.. ++ */ ++ if (oops_in_progress) { ++ if (down_trylock_console_sem() != 0) ++ return; ++ } else ++ console_lock(); ++ ++ console_locked = 1; ++ console_may_schedule = 0; ++ for_each_console(c) ++ if ((c->flags & CON_ENABLED) && c->unblank) ++ c->unblank(); ++ console_unlock(); ++} ++ ++/** ++ * console_flush_on_panic - flush console content on panic ++ * ++ * Immediately output all pending messages no matter what. ++ */ ++void console_flush_on_panic(void) ++{ ++ /* ++ * If someone else is holding the console lock, trylock will fail ++ * and may_schedule may be set. Ignore and proceed to unlock so ++ * that messages are flushed out. As this can be called from any ++ * context and we don't want to get preempted while flushing, ++ * ensure may_schedule is cleared. ++ */ ++ console_trylock(); ++ console_may_schedule = 0; ++ console_unlock(); ++} ++ ++/* ++ * Return the console tty driver structure and its associated index ++ */ ++struct tty_driver *console_device(int *index) ++{ ++ struct console *c; ++ struct tty_driver *driver = NULL; ++ ++ console_lock(); ++ for_each_console(c) { ++ if (!c->device) ++ continue; ++ driver = c->device(c, index); ++ if (driver) ++ break; ++ } ++ console_unlock(); ++ return driver; ++} ++ ++/* ++ * Prevent further output on the passed console device so that (for example) ++ * serial drivers can disable console output before suspending a port, and can ++ * re-enable output afterwards. ++ */ ++void console_stop(struct console *console) ++{ ++ console_lock(); ++ console->flags &= ~CON_ENABLED; ++ console_unlock(); ++} ++EXPORT_SYMBOL(console_stop); ++ ++void console_start(struct console *console) ++{ ++ console_lock(); ++ console->flags |= CON_ENABLED; ++ console_unlock(); ++} ++EXPORT_SYMBOL(console_start); ++ ++static int __read_mostly keep_bootcon; ++ ++static int __init keep_bootcon_setup(char *str) ++{ ++ keep_bootcon = 1; ++ pr_info("debug: skip boot console de-registration.\n"); ++ ++ return 0; ++} ++ ++early_param("keep_bootcon", keep_bootcon_setup); ++ ++/* ++ * The console driver calls this routine during kernel initialization ++ * to register the console printing procedure with printk() and to ++ * print any messages that were printed by the kernel before the ++ * console driver was initialized. ++ * ++ * This can happen pretty early during the boot process (because of ++ * early_printk) - sometimes before setup_arch() completes - be careful ++ * of what kernel features are used - they may not be initialised yet. ++ * ++ * There are two types of consoles - bootconsoles (early_printk) and ++ * "real" consoles (everything which is not a bootconsole) which are ++ * handled differently. ++ * - Any number of bootconsoles can be registered at any time. ++ * - As soon as a "real" console is registered, all bootconsoles ++ * will be unregistered automatically. ++ * - Once a "real" console is registered, any attempt to register a ++ * bootconsoles will be rejected ++ */ ++void register_console(struct console *newcon) ++{ ++ int i; ++ unsigned long flags; ++ struct console *bcon = NULL; ++ struct console_cmdline *c; ++ static bool has_preferred; ++ ++ if (console_drivers) ++ for_each_console(bcon) ++ if (WARN(bcon == newcon, ++ "console '%s%d' already registered\n", ++ bcon->name, bcon->index)) ++ return; ++ ++ /* ++ * before we register a new CON_BOOT console, make sure we don't ++ * already have a valid console ++ */ ++ if (console_drivers && newcon->flags & CON_BOOT) { ++ /* find the last or real console */ ++ for_each_console(bcon) { ++ if (!(bcon->flags & CON_BOOT)) { ++ pr_info("Too late to register bootconsole %s%d\n", ++ newcon->name, newcon->index); ++ return; ++ } ++ } ++ } ++ ++ if (console_drivers && console_drivers->flags & CON_BOOT) ++ bcon = console_drivers; ++ ++ if (!has_preferred || bcon || !console_drivers) ++ has_preferred = preferred_console >= 0; ++ ++ /* ++ * See if we want to use this console driver. If we ++ * didn't select a console we take the first one ++ * that registers here. ++ */ ++ if (!has_preferred) { ++ if (newcon->index < 0) ++ newcon->index = 0; ++ if (newcon->setup == NULL || ++ newcon->setup(newcon, NULL) == 0) { ++ newcon->flags |= CON_ENABLED; ++ if (newcon->device) { ++ newcon->flags |= CON_CONSDEV; ++ has_preferred = true; ++ } ++ } ++ } ++ ++ /* ++ * See if this console matches one we selected on ++ * the command line. ++ */ ++ for (i = 0, c = console_cmdline; ++ i < MAX_CMDLINECONSOLES && c->name[0]; ++ i++, c++) { ++ if (!newcon->match || ++ newcon->match(newcon, c->name, c->index, c->options) != 0) { ++ /* default matching */ ++ BUILD_BUG_ON(sizeof(c->name) != sizeof(newcon->name)); ++ if (strcmp(c->name, newcon->name) != 0) ++ continue; ++ if (newcon->index >= 0 && ++ newcon->index != c->index) ++ continue; ++ if (newcon->index < 0) ++ newcon->index = c->index; ++ ++ if (_braille_register_console(newcon, c)) ++ return; ++ ++ if (newcon->setup && ++ newcon->setup(newcon, c->options) != 0) ++ break; ++ } ++ ++ newcon->flags |= CON_ENABLED; ++ if (i == preferred_console) { ++ newcon->flags |= CON_CONSDEV; ++ has_preferred = true; ++ } ++ break; ++ } ++ ++ if (!(newcon->flags & CON_ENABLED)) ++ return; ++ ++ /* ++ * If we have a bootconsole, and are switching to a real console, ++ * don't print everything out again, since when the boot console, and ++ * the real console are the same physical device, it's annoying to ++ * see the beginning boot messages twice ++ */ ++ if (bcon && ((newcon->flags & (CON_CONSDEV | CON_BOOT)) == CON_CONSDEV)) ++ newcon->flags &= ~CON_PRINTBUFFER; ++ ++ /* ++ * Put this console in the list - keep the ++ * preferred driver at the head of the list. ++ */ ++ console_lock(); ++ if ((newcon->flags & CON_CONSDEV) || console_drivers == NULL) { ++ newcon->next = console_drivers; ++ console_drivers = newcon; ++ if (newcon->next) ++ newcon->next->flags &= ~CON_CONSDEV; ++ } else { ++ newcon->next = console_drivers->next; ++ console_drivers->next = newcon; ++ } ++ ++ if (newcon->flags & CON_EXTENDED) ++ if (!nr_ext_console_drivers++) ++ pr_info("printk: continuation disabled due to ext consoles, expect more fragments in /dev/kmsg\n"); ++ ++ if (newcon->flags & CON_PRINTBUFFER) { ++ /* ++ * console_unlock(); will print out the buffered messages ++ * for us. ++ */ ++ logbuf_lock_irqsave(flags); ++ console_seq = syslog_seq; ++ console_idx = syslog_idx; ++ /* ++ * We're about to replay the log buffer. Only do this to the ++ * just-registered console to avoid excessive message spam to ++ * the already-registered consoles. ++ * ++ * Set exclusive_console with disabled interrupts to reduce ++ * race window with eventual console_flush_on_panic() that ++ * ignores console_lock. ++ */ ++ exclusive_console = newcon; ++ exclusive_console_stop_seq = console_seq; ++ logbuf_unlock_irqrestore(flags); ++ } ++ console_unlock(); ++ console_sysfs_notify(); ++ ++ /* ++ * By unregistering the bootconsoles after we enable the real console ++ * we get the "console xxx enabled" message on all the consoles - ++ * boot consoles, real consoles, etc - this is to ensure that end ++ * users know there might be something in the kernel's log buffer that ++ * went to the bootconsole (that they do not see on the real console) ++ */ ++ pr_info("%sconsole [%s%d] enabled\n", ++ (newcon->flags & CON_BOOT) ? "boot" : "" , ++ newcon->name, newcon->index); ++ if (bcon && ++ ((newcon->flags & (CON_CONSDEV | CON_BOOT)) == CON_CONSDEV) && ++ !keep_bootcon) { ++ /* We need to iterate through all boot consoles, to make ++ * sure we print everything out, before we unregister them. ++ */ ++ for_each_console(bcon) ++ if (bcon->flags & CON_BOOT) ++ unregister_console(bcon); ++ } ++} ++EXPORT_SYMBOL(register_console); ++ ++int unregister_console(struct console *console) ++{ ++ struct console *a, *b; ++ int res; ++ ++ pr_info("%sconsole [%s%d] disabled\n", ++ (console->flags & CON_BOOT) ? "boot" : "" , ++ console->name, console->index); ++ ++ res = _braille_unregister_console(console); ++ if (res) ++ return res; ++ ++ res = 1; ++ console_lock(); ++ if (console_drivers == console) { ++ console_drivers=console->next; ++ res = 0; ++ } else if (console_drivers) { ++ for (a=console_drivers->next, b=console_drivers ; ++ a; b=a, a=b->next) { ++ if (a == console) { ++ b->next = a->next; ++ res = 0; ++ break; ++ } ++ } ++ } ++ ++ if (!res && (console->flags & CON_EXTENDED)) ++ nr_ext_console_drivers--; ++ ++ /* ++ * If this isn't the last console and it has CON_CONSDEV set, we ++ * need to set it on the next preferred console. ++ */ ++ if (console_drivers != NULL && console->flags & CON_CONSDEV) ++ console_drivers->flags |= CON_CONSDEV; ++ ++ console->flags &= ~CON_ENABLED; ++ console_unlock(); ++ console_sysfs_notify(); ++ return res; ++} ++EXPORT_SYMBOL(unregister_console); ++ ++/* ++ * Initialize the console device. This is called *early*, so ++ * we can't necessarily depend on lots of kernel help here. ++ * Just do some early initializations, and do the complex setup ++ * later. ++ */ ++void __init console_init(void) ++{ ++ int ret; ++ initcall_t call; ++ initcall_entry_t *ce; ++ ++ /* Setup the default TTY line discipline. */ ++ n_tty_init(); ++ ++ /* ++ * set up the console device so that later boot sequences can ++ * inform about problems etc.. ++ */ ++ ce = __con_initcall_start; ++ trace_initcall_level("console"); ++ while (ce < __con_initcall_end) { ++ call = initcall_from_entry(ce); ++ trace_initcall_start(call); ++ ret = call(); ++ trace_initcall_finish(call, ret); ++ ce++; ++ } ++} ++ ++/* ++ * Some boot consoles access data that is in the init section and which will ++ * be discarded after the initcalls have been run. To make sure that no code ++ * will access this data, unregister the boot consoles in a late initcall. ++ * ++ * If for some reason, such as deferred probe or the driver being a loadable ++ * module, the real console hasn't registered yet at this point, there will ++ * be a brief interval in which no messages are logged to the console, which ++ * makes it difficult to diagnose problems that occur during this time. ++ * ++ * To mitigate this problem somewhat, only unregister consoles whose memory ++ * intersects with the init section. Note that all other boot consoles will ++ * get unregistred when the real preferred console is registered. ++ */ ++static int __init printk_late_init(void) ++{ ++ struct console *con; ++ int ret; ++ ++ for_each_console(con) { ++ if (!(con->flags & CON_BOOT)) ++ continue; ++ ++ /* Check addresses that might be used for enabled consoles. */ ++ if (init_section_intersects(con, sizeof(*con)) || ++ init_section_contains(con->write, 0) || ++ init_section_contains(con->read, 0) || ++ init_section_contains(con->device, 0) || ++ init_section_contains(con->unblank, 0) || ++ init_section_contains(con->data, 0)) { ++ /* ++ * Please, consider moving the reported consoles out ++ * of the init section. ++ */ ++ pr_warn("bootconsole [%s%d] uses init memory and must be disabled even before the real one is ready\n", ++ con->name, con->index); ++ unregister_console(con); ++ } ++ } ++ ret = cpuhp_setup_state_nocalls(CPUHP_PRINTK_DEAD, "printk:dead", NULL, ++ console_cpu_notify); ++ WARN_ON(ret < 0); ++ ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "printk:online", ++ console_cpu_notify, NULL); ++ WARN_ON(ret < 0); ++ return 0; ++} ++late_initcall(printk_late_init); ++ ++#if defined CONFIG_PRINTK ++/* ++ * Delayed printk version, for scheduler-internal messages: ++ */ ++#define PRINTK_PENDING_WAKEUP 0x01 ++#define PRINTK_PENDING_OUTPUT 0x02 ++ ++static DEFINE_PER_CPU(int, printk_pending); ++ ++static void wake_up_klogd_work_func(struct irq_work *irq_work) ++{ ++ int pending = __this_cpu_xchg(printk_pending, 0); ++ ++ if (pending & PRINTK_PENDING_OUTPUT) { ++ /* If trylock fails, someone else is doing the printing */ ++ if (console_trylock()) ++ console_unlock(); ++ } ++ ++ if (pending & PRINTK_PENDING_WAKEUP) ++ wake_up_interruptible(&log_wait); ++} ++ ++static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) = { ++ .func = wake_up_klogd_work_func, ++ .flags = IRQ_WORK_LAZY, ++}; ++ ++void wake_up_klogd(void) ++{ ++ if (!printk_percpu_data_ready()) ++ return; ++ ++ preempt_disable(); ++ if (waitqueue_active(&log_wait)) { ++ this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP); ++ irq_work_queue(this_cpu_ptr(&wake_up_klogd_work)); ++ } ++ preempt_enable(); ++} ++ ++void defer_console_output(void) ++{ ++ if (!printk_percpu_data_ready()) ++ return; ++ ++ preempt_disable(); ++ __this_cpu_or(printk_pending, PRINTK_PENDING_OUTPUT); ++ irq_work_queue(this_cpu_ptr(&wake_up_klogd_work)); ++ preempt_enable(); ++} ++ ++int vprintk_deferred(const char *fmt, va_list args) ++{ ++ int r; ++ ++ r = vprintk_emit(0, LOGLEVEL_SCHED, NULL, 0, fmt, args); ++ defer_console_output(); ++ ++ return r; ++} ++ ++int printk_deferred(const char *fmt, ...) ++{ ++ va_list args; ++ int r; ++ ++ va_start(args, fmt); ++ r = vprintk_deferred(fmt, args); ++ va_end(args); ++ ++ return r; ++} ++ ++/* ++ * printk rate limiting, lifted from the networking subsystem. ++ * ++ * This enforces a rate limit: not more than 10 kernel messages ++ * every 5s to make a denial-of-service attack impossible. ++ */ ++DEFINE_RATELIMIT_STATE(printk_ratelimit_state, 5 * HZ, 10); ++ ++int __printk_ratelimit(const char *func) ++{ ++ return ___ratelimit(&printk_ratelimit_state, func); ++} ++EXPORT_SYMBOL(__printk_ratelimit); ++ ++/** ++ * printk_timed_ratelimit - caller-controlled printk ratelimiting ++ * @caller_jiffies: pointer to caller's state ++ * @interval_msecs: minimum interval between prints ++ * ++ * printk_timed_ratelimit() returns true if more than @interval_msecs ++ * milliseconds have elapsed since the last time printk_timed_ratelimit() ++ * returned true. ++ */ ++bool printk_timed_ratelimit(unsigned long *caller_jiffies, ++ unsigned int interval_msecs) ++{ ++ unsigned long elapsed = jiffies - *caller_jiffies; ++ ++ if (*caller_jiffies && elapsed <= msecs_to_jiffies(interval_msecs)) ++ return false; ++ ++ *caller_jiffies = jiffies; ++ return true; ++} ++EXPORT_SYMBOL(printk_timed_ratelimit); ++ ++static DEFINE_SPINLOCK(dump_list_lock); ++static LIST_HEAD(dump_list); ++ ++/** ++ * kmsg_dump_register - register a kernel log dumper. ++ * @dumper: pointer to the kmsg_dumper structure ++ * ++ * Adds a kernel log dumper to the system. The dump callback in the ++ * structure will be called when the kernel oopses or panics and must be ++ * set. Returns zero on success and %-EINVAL or %-EBUSY otherwise. ++ */ ++int kmsg_dump_register(struct kmsg_dumper *dumper) ++{ ++ unsigned long flags; ++ int err = -EBUSY; ++ ++ /* The dump callback needs to be set */ ++ if (!dumper->dump) ++ return -EINVAL; ++ ++ spin_lock_irqsave(&dump_list_lock, flags); ++ /* Don't allow registering multiple times */ ++ if (!dumper->registered) { ++ dumper->registered = 1; ++ list_add_tail_rcu(&dumper->list, &dump_list); ++ err = 0; ++ } ++ spin_unlock_irqrestore(&dump_list_lock, flags); ++ ++ return err; ++} ++EXPORT_SYMBOL_GPL(kmsg_dump_register); ++ ++/** ++ * kmsg_dump_unregister - unregister a kmsg dumper. ++ * @dumper: pointer to the kmsg_dumper structure ++ * ++ * Removes a dump device from the system. Returns zero on success and ++ * %-EINVAL otherwise. ++ */ ++int kmsg_dump_unregister(struct kmsg_dumper *dumper) ++{ ++ unsigned long flags; ++ int err = -EINVAL; ++ ++ spin_lock_irqsave(&dump_list_lock, flags); ++ if (dumper->registered) { ++ dumper->registered = 0; ++ list_del_rcu(&dumper->list); ++ err = 0; ++ } ++ spin_unlock_irqrestore(&dump_list_lock, flags); ++ synchronize_rcu(); ++ ++ return err; ++} ++EXPORT_SYMBOL_GPL(kmsg_dump_unregister); ++ ++static bool always_kmsg_dump; ++module_param_named(always_kmsg_dump, always_kmsg_dump, bool, S_IRUGO | S_IWUSR); ++ ++/** ++ * kmsg_dump - dump kernel log to kernel message dumpers. ++ * @reason: the reason (oops, panic etc) for dumping ++ * ++ * Call each of the registered dumper's dump() callback, which can ++ * retrieve the kmsg records with kmsg_dump_get_line() or ++ * kmsg_dump_get_buffer(). ++ */ ++void kmsg_dump(enum kmsg_dump_reason reason) ++{ ++ struct kmsg_dumper *dumper; ++ unsigned long flags; ++ ++ if ((reason > KMSG_DUMP_OOPS) && !always_kmsg_dump) ++ return; ++ ++ rcu_read_lock(); ++ list_for_each_entry_rcu(dumper, &dump_list, list) { ++ if (dumper->max_reason && reason > dumper->max_reason) ++ continue; ++ ++ /* initialize iterator with data about the stored records */ ++ dumper->active = true; ++ ++ logbuf_lock_irqsave(flags); ++ dumper->cur_seq = clear_seq; ++ dumper->cur_idx = clear_idx; ++ dumper->next_seq = log_next_seq; ++ dumper->next_idx = log_next_idx; ++ logbuf_unlock_irqrestore(flags); ++ ++ /* invoke dumper which will iterate over records */ ++ dumper->dump(dumper, reason); ++ ++ /* reset iterator */ ++ dumper->active = false; ++ } ++ rcu_read_unlock(); ++} ++ ++/** ++ * kmsg_dump_get_line_nolock - retrieve one kmsg log line (unlocked version) ++ * @dumper: registered kmsg dumper ++ * @syslog: include the "<4>" prefixes ++ * @line: buffer to copy the line to ++ * @size: maximum size of the buffer ++ * @len: length of line placed into buffer ++ * ++ * Start at the beginning of the kmsg buffer, with the oldest kmsg ++ * record, and copy one record into the provided buffer. ++ * ++ * Consecutive calls will return the next available record moving ++ * towards the end of the buffer with the youngest messages. ++ * ++ * A return value of FALSE indicates that there are no more records to ++ * read. ++ * ++ * The function is similar to kmsg_dump_get_line(), but grabs no locks. ++ */ ++bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, bool syslog, ++ char *line, size_t size, size_t *len) ++{ ++ struct printk_log *msg; ++ size_t l = 0; ++ bool ret = false; ++ ++ if (!dumper->active) ++ goto out; ++ ++ if (dumper->cur_seq < log_first_seq) { ++ /* messages are gone, move to first available one */ ++ dumper->cur_seq = log_first_seq; ++ dumper->cur_idx = log_first_idx; ++ } ++ ++ /* last entry */ ++ if (dumper->cur_seq >= log_next_seq) ++ goto out; ++ ++ msg = log_from_idx(dumper->cur_idx); ++ l = msg_print_text(msg, syslog, printk_time, line, size); ++ ++ dumper->cur_idx = log_next(dumper->cur_idx); ++ dumper->cur_seq++; ++ ret = true; ++out: ++ if (len) ++ *len = l; ++ return ret; ++} ++ ++/** ++ * kmsg_dump_get_line - retrieve one kmsg log line ++ * @dumper: registered kmsg dumper ++ * @syslog: include the "<4>" prefixes ++ * @line: buffer to copy the line to ++ * @size: maximum size of the buffer ++ * @len: length of line placed into buffer ++ * ++ * Start at the beginning of the kmsg buffer, with the oldest kmsg ++ * record, and copy one record into the provided buffer. ++ * ++ * Consecutive calls will return the next available record moving ++ * towards the end of the buffer with the youngest messages. ++ * ++ * A return value of FALSE indicates that there are no more records to ++ * read. ++ */ ++bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, ++ char *line, size_t size, size_t *len) ++{ ++ unsigned long flags; ++ bool ret; ++ ++ logbuf_lock_irqsave(flags); ++ ret = kmsg_dump_get_line_nolock(dumper, syslog, line, size, len); ++ logbuf_unlock_irqrestore(flags); ++ ++ return ret; ++} ++EXPORT_SYMBOL_GPL(kmsg_dump_get_line); ++ ++/** ++ * kmsg_dump_get_buffer - copy kmsg log lines ++ * @dumper: registered kmsg dumper ++ * @syslog: include the "<4>" prefixes ++ * @buf: buffer to copy the line to ++ * @size: maximum size of the buffer ++ * @len: length of line placed into buffer ++ * ++ * Start at the end of the kmsg buffer and fill the provided buffer ++ * with as many of the the *youngest* kmsg records that fit into it. ++ * If the buffer is large enough, all available kmsg records will be ++ * copied with a single call. ++ * ++ * Consecutive calls will fill the buffer with the next block of ++ * available older records, not including the earlier retrieved ones. ++ * ++ * A return value of FALSE indicates that there are no more records to ++ * read. ++ */ ++bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, ++ char *buf, size_t size, size_t *len) ++{ ++ unsigned long flags; ++ u64 seq; ++ u32 idx; ++ u64 next_seq; ++ u32 next_idx; ++ size_t l = 0; ++ bool ret = false; ++ bool time = printk_time; ++ ++ if (!dumper->active) ++ goto out; ++ ++ logbuf_lock_irqsave(flags); ++ if (dumper->cur_seq < log_first_seq) { ++ /* messages are gone, move to first available one */ ++ dumper->cur_seq = log_first_seq; ++ dumper->cur_idx = log_first_idx; ++ } ++ ++ /* last entry */ ++ if (dumper->cur_seq >= dumper->next_seq) { ++ logbuf_unlock_irqrestore(flags); ++ goto out; ++ } ++ ++ /* calculate length of entire buffer */ ++ seq = dumper->cur_seq; ++ idx = dumper->cur_idx; ++ while (seq < dumper->next_seq) { ++ struct printk_log *msg = log_from_idx(idx); ++ ++ l += msg_print_text(msg, true, time, NULL, 0); ++ idx = log_next(idx); ++ seq++; ++ } ++ ++ /* move first record forward until length fits into the buffer */ ++ seq = dumper->cur_seq; ++ idx = dumper->cur_idx; ++ while (l >= size && seq < dumper->next_seq) { ++ struct printk_log *msg = log_from_idx(idx); ++ ++ l -= msg_print_text(msg, true, time, NULL, 0); ++ idx = log_next(idx); ++ seq++; ++ } ++ ++ /* last message in next interation */ ++ next_seq = seq; ++ next_idx = idx; ++ ++ l = 0; ++ while (seq < dumper->next_seq) { ++ struct printk_log *msg = log_from_idx(idx); ++ ++ l += msg_print_text(msg, syslog, time, buf + l, size - l); ++ idx = log_next(idx); ++ seq++; ++ } ++ ++ dumper->next_seq = next_seq; ++ dumper->next_idx = next_idx; ++ ret = true; ++ logbuf_unlock_irqrestore(flags); ++out: ++ if (len) ++ *len = l; ++ return ret; ++} ++EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer); ++ ++/** ++ * kmsg_dump_rewind_nolock - reset the interator (unlocked version) ++ * @dumper: registered kmsg dumper ++ * ++ * Reset the dumper's iterator so that kmsg_dump_get_line() and ++ * kmsg_dump_get_buffer() can be called again and used multiple ++ * times within the same dumper.dump() callback. ++ * ++ * The function is similar to kmsg_dump_rewind(), but grabs no locks. ++ */ ++void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper) ++{ ++ dumper->cur_seq = clear_seq; ++ dumper->cur_idx = clear_idx; ++ dumper->next_seq = log_next_seq; ++ dumper->next_idx = log_next_idx; ++} ++ ++/** ++ * kmsg_dump_rewind - reset the interator ++ * @dumper: registered kmsg dumper ++ * ++ * Reset the dumper's iterator so that kmsg_dump_get_line() and ++ * kmsg_dump_get_buffer() can be called again and used multiple ++ * times within the same dumper.dump() callback. ++ */ ++void kmsg_dump_rewind(struct kmsg_dumper *dumper) ++{ ++ unsigned long flags; ++ ++ logbuf_lock_irqsave(flags); ++ kmsg_dump_rewind_nolock(dumper); ++ logbuf_unlock_irqrestore(flags); ++} ++EXPORT_SYMBOL_GPL(kmsg_dump_rewind); ++ ++#endif +diff -uprN kernel/kernel/ptrace.c kernel_new/kernel/ptrace.c +--- kernel/kernel/ptrace.c 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/kernel/ptrace.c 2021-04-01 18:28:07.811863114 +0800 +@@ -824,6 +824,8 @@ static int ptrace_resume(struct task_str + user_disable_single_step(child); + } + ++ __ipipe_report_ptrace_resume(child, request); ++ + /* + * Change ->exit_code and ->state under siglock to avoid the race + * with wait_task_stopped() in between; a non-zero ->exit_code will +diff -uprN kernel/kernel/rcu/Kconfig.debug kernel_new/kernel/rcu/Kconfig.debug +--- kernel/kernel/rcu/Kconfig.debug 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/kernel/rcu/Kconfig.debug 2021-04-01 18:28:07.811863114 +0800 +@@ -5,7 +5,7 @@ + menu "RCU Debugging" + + config PROVE_RCU +- def_bool PROVE_LOCKING ++ def_bool PROVE_LOCKING && !IPIPE + + config TORTURE_TEST + tristate +diff -uprN kernel/kernel/sched/core.c kernel_new/kernel/sched/core.c +--- kernel/kernel/sched/core.c 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/kernel/sched/core.c 2021-04-01 18:28:07.812863113 +0800 +@@ -1121,8 +1121,12 @@ static int __set_cpus_allowed_ptr(struct + } + + /* Can the task run on the task's current CPU? If so, we're done */ +- if (cpumask_test_cpu(task_cpu(p), new_mask)) ++ if (cpumask_test_cpu(task_cpu(p), new_mask)) { ++ __ipipe_report_setaffinity(p, task_cpu(p)); + goto out; ++ } ++ ++ __ipipe_report_setaffinity(p, dest_cpu); + + if (task_running(rq, p) || p->state == TASK_WAKING) { + struct migration_arg arg = { p, dest_cpu }; +@@ -1793,7 +1797,9 @@ void scheduler_ipi(void) + * however a fair share of IPIs are still resched only so this would + * somewhat pessimize the simple resched case. + */ ++#ifndef IPIPE_ARCH_HAVE_VIRQ_IPI + irq_enter(); ++#endif + sched_ttwu_pending(); + + /* +@@ -1803,7 +1809,9 @@ void scheduler_ipi(void) + this_rq()->idle_balance = 1; + raise_softirq_irqoff(SCHED_SOFTIRQ); + } ++#ifndef IPIPE_ARCH_HAVE_VIRQ_IPI + irq_exit(); ++#endif + } + + static void ttwu_queue_remote(struct task_struct *p, int cpu, int wake_flags) +@@ -1985,7 +1993,8 @@ try_to_wake_up(struct task_struct *p, un + */ + raw_spin_lock_irqsave(&p->pi_lock, flags); + smp_mb__after_spinlock(); +- if (!(p->state & state)) ++ if (!(p->state & state) || ++ (p->state & (TASK_NOWAKEUP|TASK_HARDENING))) + goto out; + + trace_sched_waking(p); +@@ -2796,6 +2805,7 @@ asmlinkage __visible void schedule_tail( + * PREEMPT_COUNT kernels). + */ + ++ __ipipe_complete_domain_migration(); + rq = finish_task_switch(prev); + balance_callback(rq); + preempt_enable(); +@@ -2864,6 +2874,9 @@ context_switch(struct rq *rq, struct tas + switch_to(prev, next, prev); + barrier(); + ++ if (unlikely(__ipipe_switch_tail())) ++ return NULL; ++ + return finish_task_switch(prev); + } + +@@ -3361,6 +3374,7 @@ static noinline void __schedule_bug(stru + */ + static inline void schedule_debug(struct task_struct *prev) + { ++ ipipe_root_only(); + #ifdef CONFIG_SCHED_STACK_END_CHECK + if (task_stack_end_corrupted(prev)) + panic("corrupted stack end detected inside scheduler\n"); +@@ -3460,7 +3474,7 @@ again: + * + * WARNING: must be called with preemption disabled! + */ +-static void __sched notrace __schedule(bool preempt) ++static bool __sched notrace __schedule(bool preempt) + { + struct task_struct *prev, *next; + unsigned long *switch_count; +@@ -3551,12 +3565,17 @@ static void __sched notrace __schedule(b + + /* Also unlocks the rq: */ + rq = context_switch(rq, prev, next, &rf); ++ if (rq == NULL) ++ return true; /* task hijacked by head domain */ + } else { ++ prev->state &= ~TASK_HARDENING; + rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP); + rq_unlock_irq(rq, &rf); + } + + balance_callback(rq); ++ ++ return false; + } + + void __noreturn do_task_dead(void) +@@ -3594,7 +3613,8 @@ asmlinkage __visible void __sched schedu + sched_submit_work(tsk); + do { + preempt_disable(); +- __schedule(false); ++ if (__schedule(false)) ++ return; + sched_preempt_enable_no_resched(); + } while (need_resched()); + } +@@ -3674,7 +3694,8 @@ static void __sched notrace preempt_sche + */ + preempt_disable_notrace(); + preempt_latency_start(1); +- __schedule(true); ++ if (__schedule(true)) ++ return; + preempt_latency_stop(1); + preempt_enable_no_resched_notrace(); + +@@ -3697,7 +3718,7 @@ asmlinkage __visible void __sched notrac + * If there is a non-zero preempt_count or interrupts are disabled, + * we do not want to preempt the current task. Just return.. + */ +- if (likely(!preemptible())) ++ if (likely(!preemptible() || !ipipe_root_p)) + return; + + preempt_schedule_common(); +@@ -3723,7 +3744,7 @@ asmlinkage __visible void __sched notrac + { + enum ctx_state prev_ctx; + +- if (likely(!preemptible())) ++ if (likely(!preemptible() || !ipipe_root_p || hard_irqs_disabled())) + return; + + do { +@@ -4406,6 +4427,7 @@ change: + + prev_class = p->sched_class; + __setscheduler(rq, p, attr, pi); ++ __ipipe_report_setsched(p); + + if (queued) { + /* +@@ -5981,6 +6003,43 @@ int in_sched_functions(unsigned long add + && addr < (unsigned long)__sched_text_end); + } + ++#ifdef CONFIG_IPIPE ++ ++int __ipipe_migrate_head(void) ++{ ++ struct task_struct *p = current; ++ ++ preempt_disable(); ++ ++ IPIPE_WARN_ONCE(__this_cpu_read(ipipe_percpu.task_hijacked) != NULL); ++ ++ __this_cpu_write(ipipe_percpu.task_hijacked, p); ++ set_current_state(TASK_INTERRUPTIBLE | TASK_HARDENING); ++ sched_submit_work(p); ++ if (likely(__schedule(false))) ++ return 0; ++ ++ preempt_enable(); ++ return -ERESTARTSYS; ++} ++EXPORT_SYMBOL_GPL(__ipipe_migrate_head); ++ ++void __ipipe_reenter_root(void) ++{ ++ struct rq *rq; ++ struct task_struct *p; ++ ++ p = __this_cpu_read(ipipe_percpu.rqlock_owner); ++ BUG_ON(p == NULL); ++ ipipe_clear_thread_flag(TIP_HEAD); ++ rq = finish_task_switch(p); ++ balance_callback(rq); ++ preempt_enable_no_resched_notrace(); ++} ++EXPORT_SYMBOL_GPL(__ipipe_reenter_root); ++ ++#endif /* CONFIG_IPIPE */ ++ + #ifdef CONFIG_CGROUP_SCHED + /* + * Default task group. +diff -uprN kernel/kernel/sched/core.c.orig kernel_new/kernel/sched/core.c.orig +--- kernel/kernel/sched/core.c.orig 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/kernel/sched/core.c.orig 2020-12-21 21:59:22.000000000 +0800 +@@ -0,0 +1,7161 @@ ++/* ++ * kernel/sched/core.c ++ * ++ * Core kernel scheduler code and related syscalls ++ * ++ * Copyright (C) 1991-2002 Linus Torvalds ++ */ ++#include "sched.h" ++ ++#include ++ ++#include ++ ++#include ++#include ++ ++#include "../workqueue_internal.h" ++#include "../smpboot.h" ++ ++#include "pelt.h" ++ ++#define CREATE_TRACE_POINTS ++#include ++ ++DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); ++ ++#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_JUMP_LABEL) ++/* ++ * Debugging: various feature bits ++ * ++ * If SCHED_DEBUG is disabled, each compilation unit has its own copy of ++ * sysctl_sched_features, defined in sched.h, to allow constants propagation ++ * at compile time and compiler optimization based on features default. ++ */ ++#define SCHED_FEAT(name, enabled) \ ++ (1UL << __SCHED_FEAT_##name) * enabled | ++const_debug unsigned int sysctl_sched_features = ++#include "features.h" ++ 0; ++#undef SCHED_FEAT ++#endif ++ ++/* ++ * Number of tasks to iterate in a single balance run. ++ * Limited because this is done with IRQs disabled. ++ */ ++const_debug unsigned int sysctl_sched_nr_migrate = 32; ++ ++/* ++ * period over which we measure -rt task CPU usage in us. ++ * default: 1s ++ */ ++unsigned int sysctl_sched_rt_period = 1000000; ++ ++__read_mostly int scheduler_running; ++ ++/* ++ * part of the period that we allow rt tasks to run in us. ++ * default: 0.95s ++ */ ++int sysctl_sched_rt_runtime = 950000; ++ ++/* ++ * __task_rq_lock - lock the rq @p resides on. ++ */ ++struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags *rf) ++ __acquires(rq->lock) ++{ ++ struct rq *rq; ++ ++ lockdep_assert_held(&p->pi_lock); ++ ++ for (;;) { ++ rq = task_rq(p); ++ raw_spin_lock(&rq->lock); ++ if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) { ++ rq_pin_lock(rq, rf); ++ return rq; ++ } ++ raw_spin_unlock(&rq->lock); ++ ++ while (unlikely(task_on_rq_migrating(p))) ++ cpu_relax(); ++ } ++} ++ ++/* ++ * task_rq_lock - lock p->pi_lock and lock the rq @p resides on. ++ */ ++struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf) ++ __acquires(p->pi_lock) ++ __acquires(rq->lock) ++{ ++ struct rq *rq; ++ ++ for (;;) { ++ raw_spin_lock_irqsave(&p->pi_lock, rf->flags); ++ rq = task_rq(p); ++ raw_spin_lock(&rq->lock); ++ /* ++ * move_queued_task() task_rq_lock() ++ * ++ * ACQUIRE (rq->lock) ++ * [S] ->on_rq = MIGRATING [L] rq = task_rq() ++ * WMB (__set_task_cpu()) ACQUIRE (rq->lock); ++ * [S] ->cpu = new_cpu [L] task_rq() ++ * [L] ->on_rq ++ * RELEASE (rq->lock) ++ * ++ * If we observe the old CPU in task_rq_lock(), the acquire of ++ * the old rq->lock will fully serialize against the stores. ++ * ++ * If we observe the new CPU in task_rq_lock(), the address ++ * dependency headed by '[L] rq = task_rq()' and the acquire ++ * will pair with the WMB to ensure we then also see migrating. ++ */ ++ if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) { ++ rq_pin_lock(rq, rf); ++ return rq; ++ } ++ raw_spin_unlock(&rq->lock); ++ raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags); ++ ++ while (unlikely(task_on_rq_migrating(p))) ++ cpu_relax(); ++ } ++} ++ ++/* ++ * RQ-clock updating methods: ++ */ ++bool account_irqtime_to_task __read_mostly; ++static int __init setup_account_irqtime(char *str) ++{ ++ account_irqtime_to_task = true; ++ ++ return 0; ++} ++__setup("account-irqtime-to-task", setup_account_irqtime); ++ ++static void update_rq_clock_task(struct rq *rq, s64 delta) ++{ ++/* ++ * In theory, the compile should just see 0 here, and optimize out the call ++ * to sched_rt_avg_update. But I don't trust it... ++ */ ++ s64 __maybe_unused steal = 0, irq_delta = 0; ++ ++#ifdef CONFIG_IRQ_TIME_ACCOUNTING ++ if (account_irqtime_to_task) ++ goto out; ++ ++ irq_delta = irq_time_read(cpu_of(rq)) - rq->prev_irq_time; ++ ++ /* ++ * Since irq_time is only updated on {soft,}irq_exit, we might run into ++ * this case when a previous update_rq_clock() happened inside a ++ * {soft,}irq region. ++ * ++ * When this happens, we stop ->clock_task and only update the ++ * prev_irq_time stamp to account for the part that fit, so that a next ++ * update will consume the rest. This ensures ->clock_task is ++ * monotonic. ++ * ++ * It does however cause some slight miss-attribution of {soft,}irq ++ * time, a more accurate solution would be to update the irq_time using ++ * the current rq->clock timestamp, except that would require using ++ * atomic ops. ++ */ ++ if (irq_delta > delta) ++ irq_delta = delta; ++ ++ rq->prev_irq_time += irq_delta; ++ delta -= irq_delta; ++ ++out: ++#endif ++#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING ++ if (static_key_false((¶virt_steal_rq_enabled))) { ++ steal = paravirt_steal_clock(cpu_of(rq)); ++ steal -= rq->prev_steal_time_rq; ++ ++ if (unlikely(steal > delta)) ++ steal = delta; ++ ++ rq->prev_steal_time_rq += steal; ++ delta -= steal; ++ } ++#endif ++ ++ rq->clock_task += delta; ++ ++#ifdef CONFIG_HAVE_SCHED_AVG_IRQ ++ if ((irq_delta + steal) && sched_feat(NONTASK_CAPACITY)) ++ update_irq_load_avg(rq, irq_delta + steal); ++#endif ++} ++ ++void update_rq_clock(struct rq *rq) ++{ ++ s64 delta; ++ ++ lockdep_assert_held(&rq->lock); ++ ++ if (rq->clock_update_flags & RQCF_ACT_SKIP) ++ return; ++ ++#ifdef CONFIG_SCHED_DEBUG ++ if (sched_feat(WARN_DOUBLE_CLOCK)) ++ SCHED_WARN_ON(rq->clock_update_flags & RQCF_UPDATED); ++ rq->clock_update_flags |= RQCF_UPDATED; ++#endif ++ ++ delta = sched_clock_cpu(cpu_of(rq)) - rq->clock; ++ if (delta < 0) ++ return; ++ rq->clock += delta; ++ update_rq_clock_task(rq, delta); ++} ++ ++ ++#ifdef CONFIG_SCHED_HRTICK ++/* ++ * Use HR-timers to deliver accurate preemption points. ++ */ ++ ++static void hrtick_clear(struct rq *rq) ++{ ++ if (hrtimer_active(&rq->hrtick_timer)) ++ hrtimer_cancel(&rq->hrtick_timer); ++} ++ ++/* ++ * High-resolution timer tick. ++ * Runs from hardirq context with interrupts disabled. ++ */ ++static enum hrtimer_restart hrtick(struct hrtimer *timer) ++{ ++ struct rq *rq = container_of(timer, struct rq, hrtick_timer); ++ struct rq_flags rf; ++ ++ WARN_ON_ONCE(cpu_of(rq) != smp_processor_id()); ++ ++ rq_lock(rq, &rf); ++ update_rq_clock(rq); ++ rq->curr->sched_class->task_tick(rq, rq->curr, 1); ++ rq_unlock(rq, &rf); ++ ++ return HRTIMER_NORESTART; ++} ++ ++#ifdef CONFIG_SMP ++ ++static void __hrtick_restart(struct rq *rq) ++{ ++ struct hrtimer *timer = &rq->hrtick_timer; ++ ++ hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED); ++} ++ ++/* ++ * called from hardirq (IPI) context ++ */ ++static void __hrtick_start(void *arg) ++{ ++ struct rq *rq = arg; ++ struct rq_flags rf; ++ ++ rq_lock(rq, &rf); ++ __hrtick_restart(rq); ++ rq->hrtick_csd_pending = 0; ++ rq_unlock(rq, &rf); ++} ++ ++/* ++ * Called to set the hrtick timer state. ++ * ++ * called with rq->lock held and irqs disabled ++ */ ++void hrtick_start(struct rq *rq, u64 delay) ++{ ++ struct hrtimer *timer = &rq->hrtick_timer; ++ ktime_t time; ++ s64 delta; ++ ++ /* ++ * Don't schedule slices shorter than 10000ns, that just ++ * doesn't make sense and can cause timer DoS. ++ */ ++ delta = max_t(s64, delay, 10000LL); ++ time = ktime_add_ns(timer->base->get_time(), delta); ++ ++ hrtimer_set_expires(timer, time); ++ ++ if (rq == this_rq()) { ++ __hrtick_restart(rq); ++ } else if (!rq->hrtick_csd_pending) { ++ smp_call_function_single_async(cpu_of(rq), &rq->hrtick_csd); ++ rq->hrtick_csd_pending = 1; ++ } ++} ++ ++#else ++/* ++ * Called to set the hrtick timer state. ++ * ++ * called with rq->lock held and irqs disabled ++ */ ++void hrtick_start(struct rq *rq, u64 delay) ++{ ++ /* ++ * Don't schedule slices shorter than 10000ns, that just ++ * doesn't make sense. Rely on vruntime for fairness. ++ */ ++ delay = max_t(u64, delay, 10000LL); ++ hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay), ++ HRTIMER_MODE_REL_PINNED); ++} ++#endif /* CONFIG_SMP */ ++ ++static void hrtick_rq_init(struct rq *rq) ++{ ++#ifdef CONFIG_SMP ++ rq->hrtick_csd_pending = 0; ++ ++ rq->hrtick_csd.flags = 0; ++ rq->hrtick_csd.func = __hrtick_start; ++ rq->hrtick_csd.info = rq; ++#endif ++ ++ hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); ++ rq->hrtick_timer.function = hrtick; ++} ++#else /* CONFIG_SCHED_HRTICK */ ++static inline void hrtick_clear(struct rq *rq) ++{ ++} ++ ++static inline void hrtick_rq_init(struct rq *rq) ++{ ++} ++#endif /* CONFIG_SCHED_HRTICK */ ++ ++/* ++ * cmpxchg based fetch_or, macro so it works for different integer types ++ */ ++#define fetch_or(ptr, mask) \ ++ ({ \ ++ typeof(ptr) _ptr = (ptr); \ ++ typeof(mask) _mask = (mask); \ ++ typeof(*_ptr) _old, _val = *_ptr; \ ++ \ ++ for (;;) { \ ++ _old = cmpxchg(_ptr, _val, _val | _mask); \ ++ if (_old == _val) \ ++ break; \ ++ _val = _old; \ ++ } \ ++ _old; \ ++}) ++ ++#if defined(CONFIG_SMP) && defined(TIF_POLLING_NRFLAG) ++/* ++ * Atomically set TIF_NEED_RESCHED and test for TIF_POLLING_NRFLAG, ++ * this avoids any races wrt polling state changes and thereby avoids ++ * spurious IPIs. ++ */ ++static bool set_nr_and_not_polling(struct task_struct *p) ++{ ++ struct thread_info *ti = task_thread_info(p); ++ return !(fetch_or(&ti->flags, _TIF_NEED_RESCHED) & _TIF_POLLING_NRFLAG); ++} ++ ++/* ++ * Atomically set TIF_NEED_RESCHED if TIF_POLLING_NRFLAG is set. ++ * ++ * If this returns true, then the idle task promises to call ++ * sched_ttwu_pending() and reschedule soon. ++ */ ++static bool set_nr_if_polling(struct task_struct *p) ++{ ++ struct thread_info *ti = task_thread_info(p); ++ typeof(ti->flags) old, val = READ_ONCE(ti->flags); ++ ++ for (;;) { ++ if (!(val & _TIF_POLLING_NRFLAG)) ++ return false; ++ if (val & _TIF_NEED_RESCHED) ++ return true; ++ old = cmpxchg(&ti->flags, val, val | _TIF_NEED_RESCHED); ++ if (old == val) ++ break; ++ val = old; ++ } ++ return true; ++} ++ ++#else ++static bool set_nr_and_not_polling(struct task_struct *p) ++{ ++ set_tsk_need_resched(p); ++ return true; ++} ++ ++#ifdef CONFIG_SMP ++static bool set_nr_if_polling(struct task_struct *p) ++{ ++ return false; ++} ++#endif ++#endif ++ ++/** ++ * wake_q_add() - queue a wakeup for 'later' waking. ++ * @head: the wake_q_head to add @task to ++ * @task: the task to queue for 'later' wakeup ++ * ++ * Queue a task for later wakeup, most likely by the wake_up_q() call in the ++ * same context, _HOWEVER_ this is not guaranteed, the wakeup can come ++ * instantly. ++ * ++ * This function must be used as-if it were wake_up_process(); IOW the task ++ * must be ready to be woken at this location. ++ */ ++void wake_q_add(struct wake_q_head *head, struct task_struct *task) ++{ ++ struct wake_q_node *node = &task->wake_q; ++ ++ /* ++ * Atomically grab the task, if ->wake_q is !nil already it means ++ * its already queued (either by us or someone else) and will get the ++ * wakeup due to that. ++ * ++ * In order to ensure that a pending wakeup will observe our pending ++ * state, even in the failed case, an explicit smp_mb() must be used. ++ */ ++ smp_mb__before_atomic(); ++ if (cmpxchg_relaxed(&node->next, NULL, WAKE_Q_TAIL)) ++ return; ++ ++ get_task_struct(task); ++ ++ /* ++ * The head is context local, there can be no concurrency. ++ */ ++ *head->lastp = node; ++ head->lastp = &node->next; ++} ++ ++void wake_up_q(struct wake_q_head *head) ++{ ++ struct wake_q_node *node = head->first; ++ ++ while (node != WAKE_Q_TAIL) { ++ struct task_struct *task; ++ ++ task = container_of(node, struct task_struct, wake_q); ++ BUG_ON(!task); ++ /* Task can safely be re-inserted now: */ ++ node = node->next; ++ task->wake_q.next = NULL; ++ ++ /* ++ * wake_up_process() executes a full barrier, which pairs with ++ * the queueing in wake_q_add() so as not to miss wakeups. ++ */ ++ wake_up_process(task); ++ put_task_struct(task); ++ } ++} ++ ++/* ++ * resched_curr - mark rq's current task 'to be rescheduled now'. ++ * ++ * On UP this means the setting of the need_resched flag, on SMP it ++ * might also involve a cross-CPU call to trigger the scheduler on ++ * the target CPU. ++ */ ++void resched_curr(struct rq *rq) ++{ ++ struct task_struct *curr = rq->curr; ++ int cpu; ++ ++ lockdep_assert_held(&rq->lock); ++ ++ if (test_tsk_need_resched(curr)) ++ return; ++ ++ cpu = cpu_of(rq); ++ ++ if (cpu == smp_processor_id()) { ++ set_tsk_need_resched(curr); ++ set_preempt_need_resched(); ++ return; ++ } ++ ++ if (set_nr_and_not_polling(curr)) ++ smp_send_reschedule(cpu); ++ else ++ trace_sched_wake_idle_without_ipi(cpu); ++} ++ ++void resched_cpu(int cpu) ++{ ++ struct rq *rq = cpu_rq(cpu); ++ unsigned long flags; ++ ++ raw_spin_lock_irqsave(&rq->lock, flags); ++ if (cpu_online(cpu) || cpu == smp_processor_id()) ++ resched_curr(rq); ++ raw_spin_unlock_irqrestore(&rq->lock, flags); ++} ++ ++#ifdef CONFIG_SMP ++#ifdef CONFIG_NO_HZ_COMMON ++/* ++ * In the semi idle case, use the nearest busy CPU for migrating timers ++ * from an idle CPU. This is good for power-savings. ++ * ++ * We don't do similar optimization for completely idle system, as ++ * selecting an idle CPU will add more delays to the timers than intended ++ * (as that CPU's timer base may not be uptodate wrt jiffies etc). ++ */ ++int get_nohz_timer_target(void) ++{ ++ int i, cpu = smp_processor_id(); ++ struct sched_domain *sd; ++ ++ if (!idle_cpu(cpu) && housekeeping_cpu(cpu, HK_FLAG_TIMER)) ++ return cpu; ++ ++ rcu_read_lock(); ++ for_each_domain(cpu, sd) { ++ for_each_cpu(i, sched_domain_span(sd)) { ++ if (cpu == i) ++ continue; ++ ++ if (!idle_cpu(i) && housekeeping_cpu(i, HK_FLAG_TIMER)) { ++ cpu = i; ++ goto unlock; ++ } ++ } ++ } ++ ++ if (!housekeeping_cpu(cpu, HK_FLAG_TIMER)) ++ cpu = housekeeping_any_cpu(HK_FLAG_TIMER); ++unlock: ++ rcu_read_unlock(); ++ return cpu; ++} ++ ++/* ++ * When add_timer_on() enqueues a timer into the timer wheel of an ++ * idle CPU then this timer might expire before the next timer event ++ * which is scheduled to wake up that CPU. In case of a completely ++ * idle system the next event might even be infinite time into the ++ * future. wake_up_idle_cpu() ensures that the CPU is woken up and ++ * leaves the inner idle loop so the newly added timer is taken into ++ * account when the CPU goes back to idle and evaluates the timer ++ * wheel for the next timer event. ++ */ ++static void wake_up_idle_cpu(int cpu) ++{ ++ struct rq *rq = cpu_rq(cpu); ++ ++ if (cpu == smp_processor_id()) ++ return; ++ ++ if (set_nr_and_not_polling(rq->idle)) ++ smp_send_reschedule(cpu); ++ else ++ trace_sched_wake_idle_without_ipi(cpu); ++} ++ ++static bool wake_up_full_nohz_cpu(int cpu) ++{ ++ /* ++ * We just need the target to call irq_exit() and re-evaluate ++ * the next tick. The nohz full kick at least implies that. ++ * If needed we can still optimize that later with an ++ * empty IRQ. ++ */ ++ if (cpu_is_offline(cpu)) ++ return true; /* Don't try to wake offline CPUs. */ ++ if (tick_nohz_full_cpu(cpu)) { ++ if (cpu != smp_processor_id() || ++ tick_nohz_tick_stopped()) ++ tick_nohz_full_kick_cpu(cpu); ++ return true; ++ } ++ ++ return false; ++} ++ ++/* ++ * Wake up the specified CPU. If the CPU is going offline, it is the ++ * caller's responsibility to deal with the lost wakeup, for example, ++ * by hooking into the CPU_DEAD notifier like timers and hrtimers do. ++ */ ++void wake_up_nohz_cpu(int cpu) ++{ ++ if (!wake_up_full_nohz_cpu(cpu)) ++ wake_up_idle_cpu(cpu); ++} ++ ++static inline bool got_nohz_idle_kick(void) ++{ ++ int cpu = smp_processor_id(); ++ ++ if (!(atomic_read(nohz_flags(cpu)) & NOHZ_KICK_MASK)) ++ return false; ++ ++ if (idle_cpu(cpu) && !need_resched()) ++ return true; ++ ++ /* ++ * We can't run Idle Load Balance on this CPU for this time so we ++ * cancel it and clear NOHZ_BALANCE_KICK ++ */ ++ atomic_andnot(NOHZ_KICK_MASK, nohz_flags(cpu)); ++ return false; ++} ++ ++#else /* CONFIG_NO_HZ_COMMON */ ++ ++static inline bool got_nohz_idle_kick(void) ++{ ++ return false; ++} ++ ++#endif /* CONFIG_NO_HZ_COMMON */ ++ ++#ifdef CONFIG_NO_HZ_FULL ++bool sched_can_stop_tick(struct rq *rq) ++{ ++ int fifo_nr_running; ++ ++ /* Deadline tasks, even if single, need the tick */ ++ if (rq->dl.dl_nr_running) ++ return false; ++ ++ /* ++ * If there are more than one RR tasks, we need the tick to effect the ++ * actual RR behaviour. ++ */ ++ if (rq->rt.rr_nr_running) { ++ if (rq->rt.rr_nr_running == 1) ++ return true; ++ else ++ return false; ++ } ++ ++ /* ++ * If there's no RR tasks, but FIFO tasks, we can skip the tick, no ++ * forced preemption between FIFO tasks. ++ */ ++ fifo_nr_running = rq->rt.rt_nr_running - rq->rt.rr_nr_running; ++ if (fifo_nr_running) ++ return true; ++ ++ /* ++ * If there are no DL,RR/FIFO tasks, there must only be CFS tasks left; ++ * if there's more than one we need the tick for involuntary ++ * preemption. ++ */ ++ if (rq->nr_running > 1) ++ return false; ++ ++ return true; ++} ++#endif /* CONFIG_NO_HZ_FULL */ ++#endif /* CONFIG_SMP */ ++ ++#if defined(CONFIG_RT_GROUP_SCHED) || (defined(CONFIG_FAIR_GROUP_SCHED) && \ ++ (defined(CONFIG_SMP) || defined(CONFIG_CFS_BANDWIDTH))) ++/* ++ * Iterate task_group tree rooted at *from, calling @down when first entering a ++ * node and @up when leaving it for the final time. ++ * ++ * Caller must hold rcu_lock or sufficient equivalent. ++ */ ++int walk_tg_tree_from(struct task_group *from, ++ tg_visitor down, tg_visitor up, void *data) ++{ ++ struct task_group *parent, *child; ++ int ret; ++ ++ parent = from; ++ ++down: ++ ret = (*down)(parent, data); ++ if (ret) ++ goto out; ++ list_for_each_entry_rcu(child, &parent->children, siblings) { ++ parent = child; ++ goto down; ++ ++up: ++ continue; ++ } ++ ret = (*up)(parent, data); ++ if (ret || parent == from) ++ goto out; ++ ++ child = parent; ++ parent = parent->parent; ++ if (parent) ++ goto up; ++out: ++ return ret; ++} ++ ++int tg_nop(struct task_group *tg, void *data) ++{ ++ return 0; ++} ++#endif ++ ++static void set_load_weight(struct task_struct *p, bool update_load) ++{ ++ int prio = p->static_prio - MAX_RT_PRIO; ++ struct load_weight *load = &p->se.load; ++ ++ /* ++ * SCHED_IDLE tasks get minimal weight: ++ */ ++ if (idle_policy(p->policy)) { ++ load->weight = scale_load(WEIGHT_IDLEPRIO); ++ load->inv_weight = WMULT_IDLEPRIO; ++ return; ++ } ++ ++ /* ++ * SCHED_OTHER tasks have to update their load when changing their ++ * weight ++ */ ++ if (update_load && p->sched_class == &fair_sched_class) { ++ reweight_task(p, prio); ++ } else { ++ load->weight = scale_load(sched_prio_to_weight[prio]); ++ load->inv_weight = sched_prio_to_wmult[prio]; ++ } ++} ++ ++static inline void enqueue_task(struct rq *rq, struct task_struct *p, int flags) ++{ ++ if (!(flags & ENQUEUE_NOCLOCK)) ++ update_rq_clock(rq); ++ ++ if (!(flags & ENQUEUE_RESTORE)) ++ sched_info_queued(rq, p); ++ ++ p->sched_class->enqueue_task(rq, p, flags); ++} ++ ++static inline void dequeue_task(struct rq *rq, struct task_struct *p, int flags) ++{ ++ if (!(flags & DEQUEUE_NOCLOCK)) ++ update_rq_clock(rq); ++ ++ if (!(flags & DEQUEUE_SAVE)) ++ sched_info_dequeued(rq, p); ++ ++ p->sched_class->dequeue_task(rq, p, flags); ++} ++ ++void activate_task(struct rq *rq, struct task_struct *p, int flags) ++{ ++ if (task_contributes_to_load(p)) ++ rq->nr_uninterruptible--; ++ ++ enqueue_task(rq, p, flags); ++} ++ ++void deactivate_task(struct rq *rq, struct task_struct *p, int flags) ++{ ++ if (task_contributes_to_load(p)) ++ rq->nr_uninterruptible++; ++ ++ dequeue_task(rq, p, flags); ++} ++ ++/* ++ * __normal_prio - return the priority that is based on the static prio ++ */ ++static inline int __normal_prio(struct task_struct *p) ++{ ++ return p->static_prio; ++} ++ ++/* ++ * Calculate the expected normal priority: i.e. priority ++ * without taking RT-inheritance into account. Might be ++ * boosted by interactivity modifiers. Changes upon fork, ++ * setprio syscalls, and whenever the interactivity ++ * estimator recalculates. ++ */ ++static inline int normal_prio(struct task_struct *p) ++{ ++ int prio; ++ ++ if (task_has_dl_policy(p)) ++ prio = MAX_DL_PRIO-1; ++ else if (task_has_rt_policy(p)) ++ prio = MAX_RT_PRIO-1 - p->rt_priority; ++ else ++ prio = __normal_prio(p); ++ return prio; ++} ++ ++/* ++ * Calculate the current priority, i.e. the priority ++ * taken into account by the scheduler. This value might ++ * be boosted by RT tasks, or might be boosted by ++ * interactivity modifiers. Will be RT if the task got ++ * RT-boosted. If not then it returns p->normal_prio. ++ */ ++static int effective_prio(struct task_struct *p) ++{ ++ p->normal_prio = normal_prio(p); ++ /* ++ * If we are RT tasks or we were boosted to RT priority, ++ * keep the priority unchanged. Otherwise, update priority ++ * to the normal priority: ++ */ ++ if (!rt_prio(p->prio)) ++ return p->normal_prio; ++ return p->prio; ++} ++ ++/** ++ * task_curr - is this task currently executing on a CPU? ++ * @p: the task in question. ++ * ++ * Return: 1 if the task is currently executing. 0 otherwise. ++ */ ++inline int task_curr(const struct task_struct *p) ++{ ++ return cpu_curr(task_cpu(p)) == p; ++} ++ ++/* ++ * switched_from, switched_to and prio_changed must _NOT_ drop rq->lock, ++ * use the balance_callback list if you want balancing. ++ * ++ * this means any call to check_class_changed() must be followed by a call to ++ * balance_callback(). ++ */ ++static inline void check_class_changed(struct rq *rq, struct task_struct *p, ++ const struct sched_class *prev_class, ++ int oldprio) ++{ ++ if (prev_class != p->sched_class) { ++ if (prev_class->switched_from) ++ prev_class->switched_from(rq, p); ++ ++ p->sched_class->switched_to(rq, p); ++ } else if (oldprio != p->prio || dl_task(p)) ++ p->sched_class->prio_changed(rq, p, oldprio); ++} ++ ++void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) ++{ ++ const struct sched_class *class; ++ ++ if (p->sched_class == rq->curr->sched_class) { ++ rq->curr->sched_class->check_preempt_curr(rq, p, flags); ++ } else { ++ for_each_class(class) { ++ if (class == rq->curr->sched_class) ++ break; ++ if (class == p->sched_class) { ++ resched_curr(rq); ++ break; ++ } ++ } ++ } ++ ++ /* ++ * A queue event has occurred, and we're going to schedule. In ++ * this case, we can save a useless back to back clock update. ++ */ ++ if (task_on_rq_queued(rq->curr) && test_tsk_need_resched(rq->curr)) ++ rq_clock_skip_update(rq); ++} ++ ++#ifdef CONFIG_SMP ++ ++static inline bool is_per_cpu_kthread(struct task_struct *p) ++{ ++ if (!(p->flags & PF_KTHREAD)) ++ return false; ++ ++ if (p->nr_cpus_allowed != 1) ++ return false; ++ ++ return true; ++} ++ ++/* ++ * Per-CPU kthreads are allowed to run on !actie && online CPUs, see ++ * __set_cpus_allowed_ptr() and select_fallback_rq(). ++ */ ++static inline bool is_cpu_allowed(struct task_struct *p, int cpu) ++{ ++ if (!cpumask_test_cpu(cpu, &p->cpus_allowed)) ++ return false; ++ ++ if (is_per_cpu_kthread(p)) ++ return cpu_online(cpu); ++ ++ return cpu_active(cpu); ++} ++ ++/* ++ * This is how migration works: ++ * ++ * 1) we invoke migration_cpu_stop() on the target CPU using ++ * stop_one_cpu(). ++ * 2) stopper starts to run (implicitly forcing the migrated thread ++ * off the CPU) ++ * 3) it checks whether the migrated task is still in the wrong runqueue. ++ * 4) if it's in the wrong runqueue then the migration thread removes ++ * it and puts it into the right queue. ++ * 5) stopper completes and stop_one_cpu() returns and the migration ++ * is done. ++ */ ++ ++/* ++ * move_queued_task - move a queued task to new rq. ++ * ++ * Returns (locked) new rq. Old rq's lock is released. ++ */ ++static struct rq *move_queued_task(struct rq *rq, struct rq_flags *rf, ++ struct task_struct *p, int new_cpu) ++{ ++ lockdep_assert_held(&rq->lock); ++ ++ WRITE_ONCE(p->on_rq, TASK_ON_RQ_MIGRATING); ++ dequeue_task(rq, p, DEQUEUE_NOCLOCK); ++ set_task_cpu(p, new_cpu); ++ rq_unlock(rq, rf); ++ ++ rq = cpu_rq(new_cpu); ++ ++ rq_lock(rq, rf); ++ BUG_ON(task_cpu(p) != new_cpu); ++ enqueue_task(rq, p, 0); ++ p->on_rq = TASK_ON_RQ_QUEUED; ++ check_preempt_curr(rq, p, 0); ++ ++ return rq; ++} ++ ++struct migration_arg { ++ struct task_struct *task; ++ int dest_cpu; ++}; ++ ++/* ++ * Move (not current) task off this CPU, onto the destination CPU. We're doing ++ * this because either it can't run here any more (set_cpus_allowed() ++ * away from this CPU, or CPU going down), or because we're ++ * attempting to rebalance this task on exec (sched_exec). ++ * ++ * So we race with normal scheduler movements, but that's OK, as long ++ * as the task is no longer on this CPU. ++ */ ++static struct rq *__migrate_task(struct rq *rq, struct rq_flags *rf, ++ struct task_struct *p, int dest_cpu) ++{ ++ /* Affinity changed (again). */ ++ if (!is_cpu_allowed(p, dest_cpu)) ++ return rq; ++ ++ update_rq_clock(rq); ++ rq = move_queued_task(rq, rf, p, dest_cpu); ++ ++ return rq; ++} ++ ++/* ++ * migration_cpu_stop - this will be executed by a highprio stopper thread ++ * and performs thread migration by bumping thread off CPU then ++ * 'pushing' onto another runqueue. ++ */ ++static int migration_cpu_stop(void *data) ++{ ++ struct migration_arg *arg = data; ++ struct task_struct *p = arg->task; ++ struct rq *rq = this_rq(); ++ struct rq_flags rf; ++ ++ /* ++ * The original target CPU might have gone down and we might ++ * be on another CPU but it doesn't matter. ++ */ ++ local_irq_disable(); ++ /* ++ * We need to explicitly wake pending tasks before running ++ * __migrate_task() such that we will not miss enforcing cpus_allowed ++ * during wakeups, see set_cpus_allowed_ptr()'s TASK_WAKING test. ++ */ ++ sched_ttwu_pending(); ++ ++ raw_spin_lock(&p->pi_lock); ++ rq_lock(rq, &rf); ++ /* ++ * If task_rq(p) != rq, it cannot be migrated here, because we're ++ * holding rq->lock, if p->on_rq == 0 it cannot get enqueued because ++ * we're holding p->pi_lock. ++ */ ++ if (task_rq(p) == rq) { ++ if (task_on_rq_queued(p)) ++ rq = __migrate_task(rq, &rf, p, arg->dest_cpu); ++ else ++ p->wake_cpu = arg->dest_cpu; ++ } ++ rq_unlock(rq, &rf); ++ raw_spin_unlock(&p->pi_lock); ++ ++ local_irq_enable(); ++ return 0; ++} ++ ++/* ++ * sched_class::set_cpus_allowed must do the below, but is not required to ++ * actually call this function. ++ */ ++void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask) ++{ ++ cpumask_copy(&p->cpus_allowed, new_mask); ++ p->nr_cpus_allowed = cpumask_weight(new_mask); ++} ++ ++void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) ++{ ++ struct rq *rq = task_rq(p); ++ bool queued, running; ++ ++ lockdep_assert_held(&p->pi_lock); ++ ++ queued = task_on_rq_queued(p); ++ running = task_current(rq, p); ++ ++ if (queued) { ++ /* ++ * Because __kthread_bind() calls this on blocked tasks without ++ * holding rq->lock. ++ */ ++ lockdep_assert_held(&rq->lock); ++ dequeue_task(rq, p, DEQUEUE_SAVE | DEQUEUE_NOCLOCK); ++ } ++ if (running) ++ put_prev_task(rq, p); ++ ++ p->sched_class->set_cpus_allowed(p, new_mask); ++ ++ if (queued) ++ enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK); ++ if (running) ++ set_curr_task(rq, p); ++} ++ ++/* ++ * Change a given task's CPU affinity. Migrate the thread to a ++ * proper CPU and schedule it away if the CPU it's executing on ++ * is removed from the allowed bitmask. ++ * ++ * NOTE: the caller must have a valid reference to the task, the ++ * task must not exit() & deallocate itself prematurely. The ++ * call is not atomic; no spinlocks may be held. ++ */ ++static int __set_cpus_allowed_ptr(struct task_struct *p, ++ const struct cpumask *new_mask, bool check) ++{ ++ const struct cpumask *cpu_valid_mask = cpu_active_mask; ++ unsigned int dest_cpu; ++ struct rq_flags rf; ++ struct rq *rq; ++ int ret = 0; ++ ++ rq = task_rq_lock(p, &rf); ++ update_rq_clock(rq); ++ ++ if (p->flags & PF_KTHREAD) { ++ /* ++ * Kernel threads are allowed on online && !active CPUs ++ */ ++ cpu_valid_mask = cpu_online_mask; ++ } ++ ++ /* ++ * Must re-check here, to close a race against __kthread_bind(), ++ * sched_setaffinity() is not guaranteed to observe the flag. ++ */ ++ if (check && (p->flags & PF_NO_SETAFFINITY)) { ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ if (cpumask_equal(&p->cpus_allowed, new_mask)) ++ goto out; ++ ++ dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask); ++ if (dest_cpu >= nr_cpu_ids) { ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ do_set_cpus_allowed(p, new_mask); ++ ++ if (p->flags & PF_KTHREAD) { ++ /* ++ * For kernel threads that do indeed end up on online && ++ * !active we want to ensure they are strict per-CPU threads. ++ */ ++ WARN_ON(cpumask_intersects(new_mask, cpu_online_mask) && ++ !cpumask_intersects(new_mask, cpu_active_mask) && ++ p->nr_cpus_allowed != 1); ++ } ++ ++ /* Can the task run on the task's current CPU? If so, we're done */ ++ if (cpumask_test_cpu(task_cpu(p), new_mask)) ++ goto out; ++ ++ if (task_running(rq, p) || p->state == TASK_WAKING) { ++ struct migration_arg arg = { p, dest_cpu }; ++ /* Need help from migration thread: drop lock and wait. */ ++ task_rq_unlock(rq, p, &rf); ++ stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg); ++ tlb_migrate_finish(p->mm); ++ return 0; ++ } else if (task_on_rq_queued(p)) { ++ /* ++ * OK, since we're going to drop the lock immediately ++ * afterwards anyway. ++ */ ++ rq = move_queued_task(rq, &rf, p, dest_cpu); ++ } ++out: ++ task_rq_unlock(rq, p, &rf); ++ ++ return ret; ++} ++ ++int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) ++{ ++ return __set_cpus_allowed_ptr(p, new_mask, false); ++} ++EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr); ++ ++void set_task_cpu(struct task_struct *p, unsigned int new_cpu) ++{ ++#ifdef CONFIG_SCHED_DEBUG ++ /* ++ * We should never call set_task_cpu() on a blocked task, ++ * ttwu() will sort out the placement. ++ */ ++ WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING && ++ !p->on_rq); ++ ++ /* ++ * Migrating fair class task must have p->on_rq = TASK_ON_RQ_MIGRATING, ++ * because schedstat_wait_{start,end} rebase migrating task's wait_start ++ * time relying on p->on_rq. ++ */ ++ WARN_ON_ONCE(p->state == TASK_RUNNING && ++ p->sched_class == &fair_sched_class && ++ (p->on_rq && !task_on_rq_migrating(p))); ++ ++#ifdef CONFIG_LOCKDEP ++ /* ++ * The caller should hold either p->pi_lock or rq->lock, when changing ++ * a task's CPU. ->pi_lock for waking tasks, rq->lock for runnable tasks. ++ * ++ * sched_move_task() holds both and thus holding either pins the cgroup, ++ * see task_group(). ++ * ++ * Furthermore, all task_rq users should acquire both locks, see ++ * task_rq_lock(). ++ */ ++ WARN_ON_ONCE(debug_locks && !(lockdep_is_held(&p->pi_lock) || ++ lockdep_is_held(&task_rq(p)->lock))); ++#endif ++ /* ++ * Clearly, migrating tasks to offline CPUs is a fairly daft thing. ++ */ ++ WARN_ON_ONCE(!cpu_online(new_cpu)); ++#endif ++ ++ trace_sched_migrate_task(p, new_cpu); ++ ++ if (task_cpu(p) != new_cpu) { ++ if (p->sched_class->migrate_task_rq) ++ p->sched_class->migrate_task_rq(p, new_cpu); ++ p->se.nr_migrations++; ++ rseq_migrate(p); ++ perf_event_task_migrate(p); ++ } ++ ++ __set_task_cpu(p, new_cpu); ++} ++ ++#ifdef CONFIG_NUMA_BALANCING ++static void __migrate_swap_task(struct task_struct *p, int cpu) ++{ ++ if (task_on_rq_queued(p)) { ++ struct rq *src_rq, *dst_rq; ++ struct rq_flags srf, drf; ++ ++ src_rq = task_rq(p); ++ dst_rq = cpu_rq(cpu); ++ ++ rq_pin_lock(src_rq, &srf); ++ rq_pin_lock(dst_rq, &drf); ++ ++ p->on_rq = TASK_ON_RQ_MIGRATING; ++ deactivate_task(src_rq, p, 0); ++ set_task_cpu(p, cpu); ++ activate_task(dst_rq, p, 0); ++ p->on_rq = TASK_ON_RQ_QUEUED; ++ check_preempt_curr(dst_rq, p, 0); ++ ++ rq_unpin_lock(dst_rq, &drf); ++ rq_unpin_lock(src_rq, &srf); ++ ++ } else { ++ /* ++ * Task isn't running anymore; make it appear like we migrated ++ * it before it went to sleep. This means on wakeup we make the ++ * previous CPU our target instead of where it really is. ++ */ ++ p->wake_cpu = cpu; ++ } ++} ++ ++struct migration_swap_arg { ++ struct task_struct *src_task, *dst_task; ++ int src_cpu, dst_cpu; ++}; ++ ++static int migrate_swap_stop(void *data) ++{ ++ struct migration_swap_arg *arg = data; ++ struct rq *src_rq, *dst_rq; ++ int ret = -EAGAIN; ++ ++ if (!cpu_active(arg->src_cpu) || !cpu_active(arg->dst_cpu)) ++ return -EAGAIN; ++ ++ src_rq = cpu_rq(arg->src_cpu); ++ dst_rq = cpu_rq(arg->dst_cpu); ++ ++ double_raw_lock(&arg->src_task->pi_lock, ++ &arg->dst_task->pi_lock); ++ double_rq_lock(src_rq, dst_rq); ++ ++ if (task_cpu(arg->dst_task) != arg->dst_cpu) ++ goto unlock; ++ ++ if (task_cpu(arg->src_task) != arg->src_cpu) ++ goto unlock; ++ ++ if (!cpumask_test_cpu(arg->dst_cpu, &arg->src_task->cpus_allowed)) ++ goto unlock; ++ ++ if (!cpumask_test_cpu(arg->src_cpu, &arg->dst_task->cpus_allowed)) ++ goto unlock; ++ ++ __migrate_swap_task(arg->src_task, arg->dst_cpu); ++ __migrate_swap_task(arg->dst_task, arg->src_cpu); ++ ++ ret = 0; ++ ++unlock: ++ double_rq_unlock(src_rq, dst_rq); ++ raw_spin_unlock(&arg->dst_task->pi_lock); ++ raw_spin_unlock(&arg->src_task->pi_lock); ++ ++ return ret; ++} ++ ++/* ++ * Cross migrate two tasks ++ */ ++int migrate_swap(struct task_struct *cur, struct task_struct *p, ++ int target_cpu, int curr_cpu) ++{ ++ struct migration_swap_arg arg; ++ int ret = -EINVAL; ++ ++ arg = (struct migration_swap_arg){ ++ .src_task = cur, ++ .src_cpu = curr_cpu, ++ .dst_task = p, ++ .dst_cpu = target_cpu, ++ }; ++ ++ if (arg.src_cpu == arg.dst_cpu) ++ goto out; ++ ++ /* ++ * These three tests are all lockless; this is OK since all of them ++ * will be re-checked with proper locks held further down the line. ++ */ ++ if (!cpu_active(arg.src_cpu) || !cpu_active(arg.dst_cpu)) ++ goto out; ++ ++ if (!cpumask_test_cpu(arg.dst_cpu, &arg.src_task->cpus_allowed)) ++ goto out; ++ ++ if (!cpumask_test_cpu(arg.src_cpu, &arg.dst_task->cpus_allowed)) ++ goto out; ++ ++ trace_sched_swap_numa(cur, arg.src_cpu, p, arg.dst_cpu); ++ ret = stop_two_cpus(arg.dst_cpu, arg.src_cpu, migrate_swap_stop, &arg); ++ ++out: ++ return ret; ++} ++#endif /* CONFIG_NUMA_BALANCING */ ++ ++/* ++ * wait_task_inactive - wait for a thread to unschedule. ++ * ++ * If @match_state is nonzero, it's the @p->state value just checked and ++ * not expected to change. If it changes, i.e. @p might have woken up, ++ * then return zero. When we succeed in waiting for @p to be off its CPU, ++ * we return a positive number (its total switch count). If a second call ++ * a short while later returns the same number, the caller can be sure that ++ * @p has remained unscheduled the whole time. ++ * ++ * The caller must ensure that the task *will* unschedule sometime soon, ++ * else this function might spin for a *long* time. This function can't ++ * be called with interrupts off, or it may introduce deadlock with ++ * smp_call_function() if an IPI is sent by the same process we are ++ * waiting to become inactive. ++ */ ++unsigned long wait_task_inactive(struct task_struct *p, long match_state) ++{ ++ int running, queued; ++ struct rq_flags rf; ++ unsigned long ncsw; ++ struct rq *rq; ++ ++ for (;;) { ++ /* ++ * We do the initial early heuristics without holding ++ * any task-queue locks at all. We'll only try to get ++ * the runqueue lock when things look like they will ++ * work out! ++ */ ++ rq = task_rq(p); ++ ++ /* ++ * If the task is actively running on another CPU ++ * still, just relax and busy-wait without holding ++ * any locks. ++ * ++ * NOTE! Since we don't hold any locks, it's not ++ * even sure that "rq" stays as the right runqueue! ++ * But we don't care, since "task_running()" will ++ * return false if the runqueue has changed and p ++ * is actually now running somewhere else! ++ */ ++ while (task_running(rq, p)) { ++ if (match_state && unlikely(p->state != match_state)) ++ return 0; ++ cpu_relax(); ++ } ++ ++ /* ++ * Ok, time to look more closely! We need the rq ++ * lock now, to be *sure*. If we're wrong, we'll ++ * just go back and repeat. ++ */ ++ rq = task_rq_lock(p, &rf); ++ trace_sched_wait_task(p); ++ running = task_running(rq, p); ++ queued = task_on_rq_queued(p); ++ ncsw = 0; ++ if (!match_state || p->state == match_state) ++ ncsw = p->nvcsw | LONG_MIN; /* sets MSB */ ++ task_rq_unlock(rq, p, &rf); ++ ++ /* ++ * If it changed from the expected state, bail out now. ++ */ ++ if (unlikely(!ncsw)) ++ break; ++ ++ /* ++ * Was it really running after all now that we ++ * checked with the proper locks actually held? ++ * ++ * Oops. Go back and try again.. ++ */ ++ if (unlikely(running)) { ++ cpu_relax(); ++ continue; ++ } ++ ++ /* ++ * It's not enough that it's not actively running, ++ * it must be off the runqueue _entirely_, and not ++ * preempted! ++ * ++ * So if it was still runnable (but just not actively ++ * running right now), it's preempted, and we should ++ * yield - it could be a while. ++ */ ++ if (unlikely(queued)) { ++ ktime_t to = NSEC_PER_SEC / HZ; ++ ++ set_current_state(TASK_UNINTERRUPTIBLE); ++ schedule_hrtimeout(&to, HRTIMER_MODE_REL); ++ continue; ++ } ++ ++ /* ++ * Ahh, all good. It wasn't running, and it wasn't ++ * runnable, which means that it will never become ++ * running in the future either. We're all done! ++ */ ++ break; ++ } ++ ++ return ncsw; ++} ++ ++/*** ++ * kick_process - kick a running thread to enter/exit the kernel ++ * @p: the to-be-kicked thread ++ * ++ * Cause a process which is running on another CPU to enter ++ * kernel-mode, without any delay. (to get signals handled.) ++ * ++ * NOTE: this function doesn't have to take the runqueue lock, ++ * because all it wants to ensure is that the remote task enters ++ * the kernel. If the IPI races and the task has been migrated ++ * to another CPU then no harm is done and the purpose has been ++ * achieved as well. ++ */ ++void kick_process(struct task_struct *p) ++{ ++ int cpu; ++ ++ preempt_disable(); ++ cpu = task_cpu(p); ++ if ((cpu != smp_processor_id()) && task_curr(p)) ++ smp_send_reschedule(cpu); ++ preempt_enable(); ++} ++EXPORT_SYMBOL_GPL(kick_process); ++ ++/* ++ * ->cpus_allowed is protected by both rq->lock and p->pi_lock ++ * ++ * A few notes on cpu_active vs cpu_online: ++ * ++ * - cpu_active must be a subset of cpu_online ++ * ++ * - on CPU-up we allow per-CPU kthreads on the online && !active CPU, ++ * see __set_cpus_allowed_ptr(). At this point the newly online ++ * CPU isn't yet part of the sched domains, and balancing will not ++ * see it. ++ * ++ * - on CPU-down we clear cpu_active() to mask the sched domains and ++ * avoid the load balancer to place new tasks on the to be removed ++ * CPU. Existing tasks will remain running there and will be taken ++ * off. ++ * ++ * This means that fallback selection must not select !active CPUs. ++ * And can assume that any active CPU must be online. Conversely ++ * select_task_rq() below may allow selection of !active CPUs in order ++ * to satisfy the above rules. ++ */ ++static int select_fallback_rq(int cpu, struct task_struct *p) ++{ ++ int nid = cpu_to_node(cpu); ++ const struct cpumask *nodemask = NULL; ++ enum { cpuset, possible, fail } state = cpuset; ++ int dest_cpu; ++ ++ /* ++ * If the node that the CPU is on has been offlined, cpu_to_node() ++ * will return -1. There is no CPU on the node, and we should ++ * select the CPU on the other node. ++ */ ++ if (nid != -1) { ++ nodemask = cpumask_of_node(nid); ++ ++ /* Look for allowed, online CPU in same node. */ ++ for_each_cpu(dest_cpu, nodemask) { ++ if (!cpu_active(dest_cpu)) ++ continue; ++ if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) ++ return dest_cpu; ++ } ++ } ++ ++ for (;;) { ++ /* Any allowed, online CPU? */ ++ for_each_cpu(dest_cpu, &p->cpus_allowed) { ++ if (!is_cpu_allowed(p, dest_cpu)) ++ continue; ++ ++ goto out; ++ } ++ ++ /* No more Mr. Nice Guy. */ ++ switch (state) { ++ case cpuset: ++ if (IS_ENABLED(CONFIG_CPUSETS)) { ++ cpuset_cpus_allowed_fallback(p); ++ state = possible; ++ break; ++ } ++ /* Fall-through */ ++ case possible: ++ do_set_cpus_allowed(p, cpu_possible_mask); ++ state = fail; ++ break; ++ ++ case fail: ++ BUG(); ++ break; ++ } ++ } ++ ++out: ++ if (state != cpuset) { ++ /* ++ * Don't tell them about moving exiting tasks or ++ * kernel threads (both mm NULL), since they never ++ * leave kernel. ++ */ ++ if (p->mm && printk_ratelimit()) { ++ printk_deferred("process %d (%s) no longer affine to cpu%d\n", ++ task_pid_nr(p), p->comm, cpu); ++ } ++ } ++ ++ return dest_cpu; ++} ++ ++/* ++ * The caller (fork, wakeup) owns p->pi_lock, ->cpus_allowed is stable. ++ */ ++static inline ++int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags) ++{ ++ lockdep_assert_held(&p->pi_lock); ++ ++ if (p->nr_cpus_allowed > 1) ++ cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags); ++ else ++ cpu = cpumask_any(&p->cpus_allowed); ++ ++ /* ++ * In order not to call set_task_cpu() on a blocking task we need ++ * to rely on ttwu() to place the task on a valid ->cpus_allowed ++ * CPU. ++ * ++ * Since this is common to all placement strategies, this lives here. ++ * ++ * [ this allows ->select_task() to simply return task_cpu(p) and ++ * not worry about this generic constraint ] ++ */ ++ if (unlikely(!is_cpu_allowed(p, cpu))) ++ cpu = select_fallback_rq(task_cpu(p), p); ++ ++ return cpu; ++} ++ ++static void update_avg(u64 *avg, u64 sample) ++{ ++ s64 diff = sample - *avg; ++ *avg += diff >> 3; ++} ++ ++void sched_set_stop_task(int cpu, struct task_struct *stop) ++{ ++ struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 }; ++ struct task_struct *old_stop = cpu_rq(cpu)->stop; ++ ++ if (stop) { ++ /* ++ * Make it appear like a SCHED_FIFO task, its something ++ * userspace knows about and won't get confused about. ++ * ++ * Also, it will make PI more or less work without too ++ * much confusion -- but then, stop work should not ++ * rely on PI working anyway. ++ */ ++ sched_setscheduler_nocheck(stop, SCHED_FIFO, ¶m); ++ ++ stop->sched_class = &stop_sched_class; ++ } ++ ++ cpu_rq(cpu)->stop = stop; ++ ++ if (old_stop) { ++ /* ++ * Reset it back to a normal scheduling class so that ++ * it can die in pieces. ++ */ ++ old_stop->sched_class = &rt_sched_class; ++ } ++} ++ ++#else ++ ++static inline int __set_cpus_allowed_ptr(struct task_struct *p, ++ const struct cpumask *new_mask, bool check) ++{ ++ return set_cpus_allowed_ptr(p, new_mask); ++} ++ ++#endif /* CONFIG_SMP */ ++ ++static void ++ttwu_stat(struct task_struct *p, int cpu, int wake_flags) ++{ ++ struct rq *rq; ++ ++ if (!schedstat_enabled()) ++ return; ++ ++ rq = this_rq(); ++ ++#ifdef CONFIG_SMP ++ if (cpu == rq->cpu) { ++ __schedstat_inc(rq->ttwu_local); ++ __schedstat_inc(p->se.statistics.nr_wakeups_local); ++ } else { ++ struct sched_domain *sd; ++ ++ __schedstat_inc(p->se.statistics.nr_wakeups_remote); ++ rcu_read_lock(); ++ for_each_domain(rq->cpu, sd) { ++ if (cpumask_test_cpu(cpu, sched_domain_span(sd))) { ++ __schedstat_inc(sd->ttwu_wake_remote); ++ break; ++ } ++ } ++ rcu_read_unlock(); ++ } ++ ++ if (wake_flags & WF_MIGRATED) ++ __schedstat_inc(p->se.statistics.nr_wakeups_migrate); ++#endif /* CONFIG_SMP */ ++ ++ __schedstat_inc(rq->ttwu_count); ++ __schedstat_inc(p->se.statistics.nr_wakeups); ++ ++ if (wake_flags & WF_SYNC) ++ __schedstat_inc(p->se.statistics.nr_wakeups_sync); ++} ++ ++static inline void ttwu_activate(struct rq *rq, struct task_struct *p, int en_flags) ++{ ++ activate_task(rq, p, en_flags); ++ p->on_rq = TASK_ON_RQ_QUEUED; ++ ++ /* If a worker is waking up, notify the workqueue: */ ++ if (p->flags & PF_WQ_WORKER) ++ wq_worker_waking_up(p, cpu_of(rq)); ++} ++ ++/* ++ * Mark the task runnable and perform wakeup-preemption. ++ */ ++static void ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags, ++ struct rq_flags *rf) ++{ ++ check_preempt_curr(rq, p, wake_flags); ++ p->state = TASK_RUNNING; ++ trace_sched_wakeup(p); ++ ++#ifdef CONFIG_SMP ++ if (p->sched_class->task_woken) { ++ /* ++ * Our task @p is fully woken up and running; so its safe to ++ * drop the rq->lock, hereafter rq is only used for statistics. ++ */ ++ rq_unpin_lock(rq, rf); ++ p->sched_class->task_woken(rq, p); ++ rq_repin_lock(rq, rf); ++ } ++ ++ if (rq->idle_stamp) { ++ u64 delta = rq_clock(rq) - rq->idle_stamp; ++ u64 max = 2*rq->max_idle_balance_cost; ++ ++ update_avg(&rq->avg_idle, delta); ++ ++ if (rq->avg_idle > max) ++ rq->avg_idle = max; ++ ++ rq->idle_stamp = 0; ++ } ++#endif ++} ++ ++static void ++ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags, ++ struct rq_flags *rf) ++{ ++ int en_flags = ENQUEUE_WAKEUP | ENQUEUE_NOCLOCK; ++ ++ lockdep_assert_held(&rq->lock); ++ ++#ifdef CONFIG_SMP ++ if (p->sched_contributes_to_load) ++ rq->nr_uninterruptible--; ++ ++ if (wake_flags & WF_MIGRATED) ++ en_flags |= ENQUEUE_MIGRATED; ++#endif ++ ++ ttwu_activate(rq, p, en_flags); ++ ttwu_do_wakeup(rq, p, wake_flags, rf); ++} ++ ++/* ++ * Called in case the task @p isn't fully descheduled from its runqueue, ++ * in this case we must do a remote wakeup. Its a 'light' wakeup though, ++ * since all we need to do is flip p->state to TASK_RUNNING, since ++ * the task is still ->on_rq. ++ */ ++static int ttwu_remote(struct task_struct *p, int wake_flags) ++{ ++ struct rq_flags rf; ++ struct rq *rq; ++ int ret = 0; ++ ++ rq = __task_rq_lock(p, &rf); ++ if (task_on_rq_queued(p)) { ++ /* check_preempt_curr() may use rq clock */ ++ update_rq_clock(rq); ++ ttwu_do_wakeup(rq, p, wake_flags, &rf); ++ ret = 1; ++ } ++ __task_rq_unlock(rq, &rf); ++ ++ return ret; ++} ++ ++#ifdef CONFIG_SMP ++void sched_ttwu_pending(void) ++{ ++ struct rq *rq = this_rq(); ++ struct llist_node *llist = llist_del_all(&rq->wake_list); ++ struct task_struct *p, *t; ++ struct rq_flags rf; ++ ++ if (!llist) ++ return; ++ ++ rq_lock_irqsave(rq, &rf); ++ update_rq_clock(rq); ++ ++ llist_for_each_entry_safe(p, t, llist, wake_entry) ++ ttwu_do_activate(rq, p, p->sched_remote_wakeup ? WF_MIGRATED : 0, &rf); ++ ++ rq_unlock_irqrestore(rq, &rf); ++} ++ ++void scheduler_ipi(void) ++{ ++ /* ++ * Fold TIF_NEED_RESCHED into the preempt_count; anybody setting ++ * TIF_NEED_RESCHED remotely (for the first time) will also send ++ * this IPI. ++ */ ++ preempt_fold_need_resched(); ++ ++ if (llist_empty(&this_rq()->wake_list) && !got_nohz_idle_kick()) ++ return; ++ ++ /* ++ * Not all reschedule IPI handlers call irq_enter/irq_exit, since ++ * traditionally all their work was done from the interrupt return ++ * path. Now that we actually do some work, we need to make sure ++ * we do call them. ++ * ++ * Some archs already do call them, luckily irq_enter/exit nest ++ * properly. ++ * ++ * Arguably we should visit all archs and update all handlers, ++ * however a fair share of IPIs are still resched only so this would ++ * somewhat pessimize the simple resched case. ++ */ ++ irq_enter(); ++ sched_ttwu_pending(); ++ ++ /* ++ * Check if someone kicked us for doing the nohz idle load balance. ++ */ ++ if (unlikely(got_nohz_idle_kick())) { ++ this_rq()->idle_balance = 1; ++ raise_softirq_irqoff(SCHED_SOFTIRQ); ++ } ++ irq_exit(); ++} ++ ++static void ttwu_queue_remote(struct task_struct *p, int cpu, int wake_flags) ++{ ++ struct rq *rq = cpu_rq(cpu); ++ ++ p->sched_remote_wakeup = !!(wake_flags & WF_MIGRATED); ++ ++ if (llist_add(&p->wake_entry, &cpu_rq(cpu)->wake_list)) { ++ if (!set_nr_if_polling(rq->idle)) ++ smp_send_reschedule(cpu); ++ else ++ trace_sched_wake_idle_without_ipi(cpu); ++ } ++} ++ ++void wake_up_if_idle(int cpu) ++{ ++ struct rq *rq = cpu_rq(cpu); ++ struct rq_flags rf; ++ ++ rcu_read_lock(); ++ ++ if (!is_idle_task(rcu_dereference(rq->curr))) ++ goto out; ++ ++ if (set_nr_if_polling(rq->idle)) { ++ trace_sched_wake_idle_without_ipi(cpu); ++ } else { ++ rq_lock_irqsave(rq, &rf); ++ if (is_idle_task(rq->curr)) ++ smp_send_reschedule(cpu); ++ /* Else CPU is not idle, do nothing here: */ ++ rq_unlock_irqrestore(rq, &rf); ++ } ++ ++out: ++ rcu_read_unlock(); ++} ++ ++bool cpus_share_cache(int this_cpu, int that_cpu) ++{ ++ return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu); ++} ++#endif /* CONFIG_SMP */ ++ ++static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags) ++{ ++ struct rq *rq = cpu_rq(cpu); ++ struct rq_flags rf; ++ ++#if defined(CONFIG_SMP) ++ if (sched_feat(TTWU_QUEUE) && !cpus_share_cache(smp_processor_id(), cpu)) { ++ sched_clock_cpu(cpu); /* Sync clocks across CPUs */ ++ ttwu_queue_remote(p, cpu, wake_flags); ++ return; ++ } ++#endif ++ ++ rq_lock(rq, &rf); ++ update_rq_clock(rq); ++ ttwu_do_activate(rq, p, wake_flags, &rf); ++ rq_unlock(rq, &rf); ++} ++ ++/* ++ * Notes on Program-Order guarantees on SMP systems. ++ * ++ * MIGRATION ++ * ++ * The basic program-order guarantee on SMP systems is that when a task [t] ++ * migrates, all its activity on its old CPU [c0] happens-before any subsequent ++ * execution on its new CPU [c1]. ++ * ++ * For migration (of runnable tasks) this is provided by the following means: ++ * ++ * A) UNLOCK of the rq(c0)->lock scheduling out task t ++ * B) migration for t is required to synchronize *both* rq(c0)->lock and ++ * rq(c1)->lock (if not at the same time, then in that order). ++ * C) LOCK of the rq(c1)->lock scheduling in task ++ * ++ * Release/acquire chaining guarantees that B happens after A and C after B. ++ * Note: the CPU doing B need not be c0 or c1 ++ * ++ * Example: ++ * ++ * CPU0 CPU1 CPU2 ++ * ++ * LOCK rq(0)->lock ++ * sched-out X ++ * sched-in Y ++ * UNLOCK rq(0)->lock ++ * ++ * LOCK rq(0)->lock // orders against CPU0 ++ * dequeue X ++ * UNLOCK rq(0)->lock ++ * ++ * LOCK rq(1)->lock ++ * enqueue X ++ * UNLOCK rq(1)->lock ++ * ++ * LOCK rq(1)->lock // orders against CPU2 ++ * sched-out Z ++ * sched-in X ++ * UNLOCK rq(1)->lock ++ * ++ * ++ * BLOCKING -- aka. SLEEP + WAKEUP ++ * ++ * For blocking we (obviously) need to provide the same guarantee as for ++ * migration. However the means are completely different as there is no lock ++ * chain to provide order. Instead we do: ++ * ++ * 1) smp_store_release(X->on_cpu, 0) ++ * 2) smp_cond_load_acquire(!X->on_cpu) ++ * ++ * Example: ++ * ++ * CPU0 (schedule) CPU1 (try_to_wake_up) CPU2 (schedule) ++ * ++ * LOCK rq(0)->lock LOCK X->pi_lock ++ * dequeue X ++ * sched-out X ++ * smp_store_release(X->on_cpu, 0); ++ * ++ * smp_cond_load_acquire(&X->on_cpu, !VAL); ++ * X->state = WAKING ++ * set_task_cpu(X,2) ++ * ++ * LOCK rq(2)->lock ++ * enqueue X ++ * X->state = RUNNING ++ * UNLOCK rq(2)->lock ++ * ++ * LOCK rq(2)->lock // orders against CPU1 ++ * sched-out Z ++ * sched-in X ++ * UNLOCK rq(2)->lock ++ * ++ * UNLOCK X->pi_lock ++ * UNLOCK rq(0)->lock ++ * ++ * ++ * However, for wakeups there is a second guarantee we must provide, namely we ++ * must ensure that CONDITION=1 done by the caller can not be reordered with ++ * accesses to the task state; see try_to_wake_up() and set_current_state(). ++ */ ++ ++/** ++ * try_to_wake_up - wake up a thread ++ * @p: the thread to be awakened ++ * @state: the mask of task states that can be woken ++ * @wake_flags: wake modifier flags (WF_*) ++ * ++ * If (@state & @p->state) @p->state = TASK_RUNNING. ++ * ++ * If the task was not queued/runnable, also place it back on a runqueue. ++ * ++ * Atomic against schedule() which would dequeue a task, also see ++ * set_current_state(). ++ * ++ * This function executes a full memory barrier before accessing the task ++ * state; see set_current_state(). ++ * ++ * Return: %true if @p->state changes (an actual wakeup was done), ++ * %false otherwise. ++ */ ++static int ++try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) ++{ ++ unsigned long flags; ++ int cpu, success = 0; ++ ++ /* ++ * If we are going to wake up a thread waiting for CONDITION we ++ * need to ensure that CONDITION=1 done by the caller can not be ++ * reordered with p->state check below. This pairs with mb() in ++ * set_current_state() the waiting thread does. ++ */ ++ raw_spin_lock_irqsave(&p->pi_lock, flags); ++ smp_mb__after_spinlock(); ++ if (!(p->state & state)) ++ goto out; ++ ++ trace_sched_waking(p); ++ ++ /* We're going to change ->state: */ ++ success = 1; ++ cpu = task_cpu(p); ++ ++ /* ++ * Ensure we load p->on_rq _after_ p->state, otherwise it would ++ * be possible to, falsely, observe p->on_rq == 0 and get stuck ++ * in smp_cond_load_acquire() below. ++ * ++ * sched_ttwu_pending() try_to_wake_up() ++ * STORE p->on_rq = 1 LOAD p->state ++ * UNLOCK rq->lock ++ * ++ * __schedule() (switch to task 'p') ++ * LOCK rq->lock smp_rmb(); ++ * smp_mb__after_spinlock(); ++ * UNLOCK rq->lock ++ * ++ * [task p] ++ * STORE p->state = UNINTERRUPTIBLE LOAD p->on_rq ++ * ++ * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in ++ * __schedule(). See the comment for smp_mb__after_spinlock(). ++ */ ++ smp_rmb(); ++ if (p->on_rq && ttwu_remote(p, wake_flags)) ++ goto stat; ++ ++#ifdef CONFIG_SMP ++ /* ++ * Ensure we load p->on_cpu _after_ p->on_rq, otherwise it would be ++ * possible to, falsely, observe p->on_cpu == 0. ++ * ++ * One must be running (->on_cpu == 1) in order to remove oneself ++ * from the runqueue. ++ * ++ * __schedule() (switch to task 'p') try_to_wake_up() ++ * STORE p->on_cpu = 1 LOAD p->on_rq ++ * UNLOCK rq->lock ++ * ++ * __schedule() (put 'p' to sleep) ++ * LOCK rq->lock smp_rmb(); ++ * smp_mb__after_spinlock(); ++ * STORE p->on_rq = 0 LOAD p->on_cpu ++ * ++ * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in ++ * __schedule(). See the comment for smp_mb__after_spinlock(). ++ */ ++ smp_rmb(); ++ ++ /* ++ * If the owning (remote) CPU is still in the middle of schedule() with ++ * this task as prev, wait until its done referencing the task. ++ * ++ * Pairs with the smp_store_release() in finish_task(). ++ * ++ * This ensures that tasks getting woken will be fully ordered against ++ * their previous state and preserve Program Order. ++ */ ++ smp_cond_load_acquire(&p->on_cpu, !VAL); ++ ++ p->sched_contributes_to_load = !!task_contributes_to_load(p); ++ p->state = TASK_WAKING; ++ ++ if (p->in_iowait) { ++ delayacct_blkio_end(p); ++ atomic_dec(&task_rq(p)->nr_iowait); ++ } ++ ++ cpu = select_task_rq(p, p->wake_cpu, SD_BALANCE_WAKE, wake_flags); ++ if (task_cpu(p) != cpu) { ++ wake_flags |= WF_MIGRATED; ++ set_task_cpu(p, cpu); ++ } ++ ++#else /* CONFIG_SMP */ ++ ++ if (p->in_iowait) { ++ delayacct_blkio_end(p); ++ atomic_dec(&task_rq(p)->nr_iowait); ++ } ++ ++#endif /* CONFIG_SMP */ ++ ++ ttwu_queue(p, cpu, wake_flags); ++stat: ++ ttwu_stat(p, cpu, wake_flags); ++out: ++ raw_spin_unlock_irqrestore(&p->pi_lock, flags); ++ ++ return success; ++} ++ ++/** ++ * try_to_wake_up_local - try to wake up a local task with rq lock held ++ * @p: the thread to be awakened ++ * @rf: request-queue flags for pinning ++ * ++ * Put @p on the run-queue if it's not already there. The caller must ++ * ensure that this_rq() is locked, @p is bound to this_rq() and not ++ * the current task. ++ */ ++static void try_to_wake_up_local(struct task_struct *p, struct rq_flags *rf) ++{ ++ struct rq *rq = task_rq(p); ++ ++ if (WARN_ON_ONCE(rq != this_rq()) || ++ WARN_ON_ONCE(p == current)) ++ return; ++ ++ lockdep_assert_held(&rq->lock); ++ ++ if (!raw_spin_trylock(&p->pi_lock)) { ++ /* ++ * This is OK, because current is on_cpu, which avoids it being ++ * picked for load-balance and preemption/IRQs are still ++ * disabled avoiding further scheduler activity on it and we've ++ * not yet picked a replacement task. ++ */ ++ rq_unlock(rq, rf); ++ raw_spin_lock(&p->pi_lock); ++ rq_relock(rq, rf); ++ } ++ ++ if (!(p->state & TASK_NORMAL)) ++ goto out; ++ ++ trace_sched_waking(p); ++ ++ if (!task_on_rq_queued(p)) { ++ if (p->in_iowait) { ++ delayacct_blkio_end(p); ++ atomic_dec(&rq->nr_iowait); ++ } ++ ttwu_activate(rq, p, ENQUEUE_WAKEUP | ENQUEUE_NOCLOCK); ++ } ++ ++ ttwu_do_wakeup(rq, p, 0, rf); ++ ttwu_stat(p, smp_processor_id(), 0); ++out: ++ raw_spin_unlock(&p->pi_lock); ++} ++ ++/** ++ * wake_up_process - Wake up a specific process ++ * @p: The process to be woken up. ++ * ++ * Attempt to wake up the nominated process and move it to the set of runnable ++ * processes. ++ * ++ * Return: 1 if the process was woken up, 0 if it was already running. ++ * ++ * This function executes a full memory barrier before accessing the task state. ++ */ ++int wake_up_process(struct task_struct *p) ++{ ++ return try_to_wake_up(p, TASK_NORMAL, 0); ++} ++EXPORT_SYMBOL(wake_up_process); ++ ++int wake_up_state(struct task_struct *p, unsigned int state) ++{ ++ return try_to_wake_up(p, state, 0); ++} ++ ++/* ++ * Perform scheduler related setup for a newly forked process p. ++ * p is forked by current. ++ * ++ * __sched_fork() is basic setup used by init_idle() too: ++ */ ++static void __sched_fork(unsigned long clone_flags, struct task_struct *p) ++{ ++ p->on_rq = 0; ++ ++ p->se.on_rq = 0; ++ p->se.exec_start = 0; ++ p->se.sum_exec_runtime = 0; ++ p->se.prev_sum_exec_runtime = 0; ++ p->se.nr_migrations = 0; ++ p->se.vruntime = 0; ++ INIT_LIST_HEAD(&p->se.group_node); ++ ++#ifdef CONFIG_FAIR_GROUP_SCHED ++ p->se.cfs_rq = NULL; ++#endif ++ ++#ifdef CONFIG_SCHEDSTATS ++ /* Even if schedstat is disabled, there should not be garbage */ ++ memset(&p->se.statistics, 0, sizeof(p->se.statistics)); ++#endif ++ ++ RB_CLEAR_NODE(&p->dl.rb_node); ++ init_dl_task_timer(&p->dl); ++ init_dl_inactive_task_timer(&p->dl); ++ __dl_clear_params(p); ++ ++ INIT_LIST_HEAD(&p->rt.run_list); ++ p->rt.timeout = 0; ++ p->rt.time_slice = sched_rr_timeslice; ++ p->rt.on_rq = 0; ++ p->rt.on_list = 0; ++ ++#ifdef CONFIG_PREEMPT_NOTIFIERS ++ INIT_HLIST_HEAD(&p->preempt_notifiers); ++#endif ++ ++ init_numa_balancing(clone_flags, p); ++} ++ ++DEFINE_STATIC_KEY_FALSE(sched_numa_balancing); ++ ++#ifdef CONFIG_NUMA_BALANCING ++ ++void set_numabalancing_state(bool enabled) ++{ ++ if (enabled) ++ static_branch_enable(&sched_numa_balancing); ++ else ++ static_branch_disable(&sched_numa_balancing); ++} ++ ++#ifdef CONFIG_PROC_SYSCTL ++int sysctl_numa_balancing(struct ctl_table *table, int write, ++ void __user *buffer, size_t *lenp, loff_t *ppos) ++{ ++ struct ctl_table t; ++ int err; ++ int state = static_branch_likely(&sched_numa_balancing); ++ ++ if (write && !capable(CAP_SYS_ADMIN)) ++ return -EPERM; ++ ++ t = *table; ++ t.data = &state; ++ err = proc_dointvec_minmax(&t, write, buffer, lenp, ppos); ++ if (err < 0) ++ return err; ++ if (write) ++ set_numabalancing_state(state); ++ return err; ++} ++#endif ++#endif ++ ++#ifdef CONFIG_SCHEDSTATS ++ ++DEFINE_STATIC_KEY_FALSE(sched_schedstats); ++static bool __initdata __sched_schedstats = false; ++ ++static void set_schedstats(bool enabled) ++{ ++ if (enabled) ++ static_branch_enable(&sched_schedstats); ++ else ++ static_branch_disable(&sched_schedstats); ++} ++ ++void force_schedstat_enabled(void) ++{ ++ if (!schedstat_enabled()) { ++ pr_info("kernel profiling enabled schedstats, disable via kernel.sched_schedstats.\n"); ++ static_branch_enable(&sched_schedstats); ++ } ++} ++ ++static int __init setup_schedstats(char *str) ++{ ++ int ret = 0; ++ if (!str) ++ goto out; ++ ++ /* ++ * This code is called before jump labels have been set up, so we can't ++ * change the static branch directly just yet. Instead set a temporary ++ * variable so init_schedstats() can do it later. ++ */ ++ if (!strcmp(str, "enable")) { ++ __sched_schedstats = true; ++ ret = 1; ++ } else if (!strcmp(str, "disable")) { ++ __sched_schedstats = false; ++ ret = 1; ++ } ++out: ++ if (!ret) ++ pr_warn("Unable to parse schedstats=\n"); ++ ++ return ret; ++} ++__setup("schedstats=", setup_schedstats); ++ ++static void __init init_schedstats(void) ++{ ++ set_schedstats(__sched_schedstats); ++} ++ ++#ifdef CONFIG_PROC_SYSCTL ++int sysctl_schedstats(struct ctl_table *table, int write, ++ void __user *buffer, size_t *lenp, loff_t *ppos) ++{ ++ struct ctl_table t; ++ int err; ++ int state = static_branch_likely(&sched_schedstats); ++ ++ if (write && !capable(CAP_SYS_ADMIN)) ++ return -EPERM; ++ ++ t = *table; ++ t.data = &state; ++ err = proc_dointvec_minmax(&t, write, buffer, lenp, ppos); ++ if (err < 0) ++ return err; ++ if (write) ++ set_schedstats(state); ++ return err; ++} ++#endif /* CONFIG_PROC_SYSCTL */ ++#else /* !CONFIG_SCHEDSTATS */ ++static inline void init_schedstats(void) {} ++#endif /* CONFIG_SCHEDSTATS */ ++ ++/* ++ * fork()/clone()-time setup: ++ */ ++int sched_fork(unsigned long clone_flags, struct task_struct *p) ++{ ++ unsigned long flags; ++ ++ __sched_fork(clone_flags, p); ++ /* ++ * We mark the process as NEW here. This guarantees that ++ * nobody will actually run it, and a signal or other external ++ * event cannot wake it up and insert it on the runqueue either. ++ */ ++ p->state = TASK_NEW; ++ ++ /* ++ * Make sure we do not leak PI boosting priority to the child. ++ */ ++ p->prio = current->normal_prio; ++ ++ /* ++ * Revert to default priority/policy on fork if requested. ++ */ ++ if (unlikely(p->sched_reset_on_fork)) { ++ if (task_has_dl_policy(p) || task_has_rt_policy(p)) { ++ p->policy = SCHED_NORMAL; ++ p->static_prio = NICE_TO_PRIO(0); ++ p->rt_priority = 0; ++ } else if (PRIO_TO_NICE(p->static_prio) < 0) ++ p->static_prio = NICE_TO_PRIO(0); ++ ++ p->prio = p->normal_prio = __normal_prio(p); ++ set_load_weight(p, false); ++ ++ /* ++ * We don't need the reset flag anymore after the fork. It has ++ * fulfilled its duty: ++ */ ++ p->sched_reset_on_fork = 0; ++ } ++ ++ if (dl_prio(p->prio)) ++ return -EAGAIN; ++ else if (rt_prio(p->prio)) ++ p->sched_class = &rt_sched_class; ++ else ++ p->sched_class = &fair_sched_class; ++ ++ init_entity_runnable_average(&p->se); ++ ++ /* ++ * The child is not yet in the pid-hash so no cgroup attach races, ++ * and the cgroup is pinned to this child due to cgroup_fork() ++ * is ran before sched_fork(). ++ * ++ * Silence PROVE_RCU. ++ */ ++ raw_spin_lock_irqsave(&p->pi_lock, flags); ++ rseq_migrate(p); ++ /* ++ * We're setting the CPU for the first time, we don't migrate, ++ * so use __set_task_cpu(). ++ */ ++ __set_task_cpu(p, smp_processor_id()); ++ if (p->sched_class->task_fork) ++ p->sched_class->task_fork(p); ++ raw_spin_unlock_irqrestore(&p->pi_lock, flags); ++ ++#ifdef CONFIG_SCHED_INFO ++ if (likely(sched_info_on())) ++ memset(&p->sched_info, 0, sizeof(p->sched_info)); ++#endif ++#if defined(CONFIG_SMP) ++ p->on_cpu = 0; ++#endif ++ init_task_preempt_count(p); ++#ifdef CONFIG_SMP ++ plist_node_init(&p->pushable_tasks, MAX_PRIO); ++ RB_CLEAR_NODE(&p->pushable_dl_tasks); ++#endif ++ return 0; ++} ++ ++unsigned long to_ratio(u64 period, u64 runtime) ++{ ++ if (runtime == RUNTIME_INF) ++ return BW_UNIT; ++ ++ /* ++ * Doing this here saves a lot of checks in all ++ * the calling paths, and returning zero seems ++ * safe for them anyway. ++ */ ++ if (period == 0) ++ return 0; ++ ++ return div64_u64(runtime << BW_SHIFT, period); ++} ++ ++/* ++ * wake_up_new_task - wake up a newly created task for the first time. ++ * ++ * This function will do some initial scheduler statistics housekeeping ++ * that must be done for every newly created context, then puts the task ++ * on the runqueue and wakes it. ++ */ ++void wake_up_new_task(struct task_struct *p) ++{ ++ struct rq_flags rf; ++ struct rq *rq; ++ ++ raw_spin_lock_irqsave(&p->pi_lock, rf.flags); ++ p->state = TASK_RUNNING; ++#ifdef CONFIG_SMP ++ /* ++ * Fork balancing, do it here and not earlier because: ++ * - cpus_allowed can change in the fork path ++ * - any previously selected CPU might disappear through hotplug ++ * ++ * Use __set_task_cpu() to avoid calling sched_class::migrate_task_rq, ++ * as we're not fully set-up yet. ++ */ ++ p->recent_used_cpu = task_cpu(p); ++ rseq_migrate(p); ++ __set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0)); ++#endif ++ rq = __task_rq_lock(p, &rf); ++ update_rq_clock(rq); ++ post_init_entity_util_avg(&p->se); ++ ++ activate_task(rq, p, ENQUEUE_NOCLOCK); ++ p->on_rq = TASK_ON_RQ_QUEUED; ++ trace_sched_wakeup_new(p); ++ check_preempt_curr(rq, p, WF_FORK); ++#ifdef CONFIG_SMP ++ if (p->sched_class->task_woken) { ++ /* ++ * Nothing relies on rq->lock after this, so its fine to ++ * drop it. ++ */ ++ rq_unpin_lock(rq, &rf); ++ p->sched_class->task_woken(rq, p); ++ rq_repin_lock(rq, &rf); ++ } ++#endif ++ task_rq_unlock(rq, p, &rf); ++} ++ ++#ifdef CONFIG_PREEMPT_NOTIFIERS ++ ++static DEFINE_STATIC_KEY_FALSE(preempt_notifier_key); ++ ++void preempt_notifier_inc(void) ++{ ++ static_branch_inc(&preempt_notifier_key); ++} ++EXPORT_SYMBOL_GPL(preempt_notifier_inc); ++ ++void preempt_notifier_dec(void) ++{ ++ static_branch_dec(&preempt_notifier_key); ++} ++EXPORT_SYMBOL_GPL(preempt_notifier_dec); ++ ++/** ++ * preempt_notifier_register - tell me when current is being preempted & rescheduled ++ * @notifier: notifier struct to register ++ */ ++void preempt_notifier_register(struct preempt_notifier *notifier) ++{ ++ if (!static_branch_unlikely(&preempt_notifier_key)) ++ WARN(1, "registering preempt_notifier while notifiers disabled\n"); ++ ++ hlist_add_head(¬ifier->link, ¤t->preempt_notifiers); ++} ++EXPORT_SYMBOL_GPL(preempt_notifier_register); ++ ++/** ++ * preempt_notifier_unregister - no longer interested in preemption notifications ++ * @notifier: notifier struct to unregister ++ * ++ * This is *not* safe to call from within a preemption notifier. ++ */ ++void preempt_notifier_unregister(struct preempt_notifier *notifier) ++{ ++ hlist_del(¬ifier->link); ++} ++EXPORT_SYMBOL_GPL(preempt_notifier_unregister); ++ ++static void __fire_sched_in_preempt_notifiers(struct task_struct *curr) ++{ ++ struct preempt_notifier *notifier; ++ ++ hlist_for_each_entry(notifier, &curr->preempt_notifiers, link) ++ notifier->ops->sched_in(notifier, raw_smp_processor_id()); ++} ++ ++static __always_inline void fire_sched_in_preempt_notifiers(struct task_struct *curr) ++{ ++ if (static_branch_unlikely(&preempt_notifier_key)) ++ __fire_sched_in_preempt_notifiers(curr); ++} ++ ++static void ++__fire_sched_out_preempt_notifiers(struct task_struct *curr, ++ struct task_struct *next) ++{ ++ struct preempt_notifier *notifier; ++ ++ hlist_for_each_entry(notifier, &curr->preempt_notifiers, link) ++ notifier->ops->sched_out(notifier, next); ++} ++ ++static __always_inline void ++fire_sched_out_preempt_notifiers(struct task_struct *curr, ++ struct task_struct *next) ++{ ++ if (static_branch_unlikely(&preempt_notifier_key)) ++ __fire_sched_out_preempt_notifiers(curr, next); ++} ++ ++#else /* !CONFIG_PREEMPT_NOTIFIERS */ ++ ++static inline void fire_sched_in_preempt_notifiers(struct task_struct *curr) ++{ ++} ++ ++static inline void ++fire_sched_out_preempt_notifiers(struct task_struct *curr, ++ struct task_struct *next) ++{ ++} ++ ++#endif /* CONFIG_PREEMPT_NOTIFIERS */ ++ ++static inline void prepare_task(struct task_struct *next) ++{ ++#ifdef CONFIG_SMP ++ /* ++ * Claim the task as running, we do this before switching to it ++ * such that any running task will have this set. ++ */ ++ next->on_cpu = 1; ++#endif ++} ++ ++static inline void finish_task(struct task_struct *prev) ++{ ++#ifdef CONFIG_SMP ++ /* ++ * After ->on_cpu is cleared, the task can be moved to a different CPU. ++ * We must ensure this doesn't happen until the switch is completely ++ * finished. ++ * ++ * In particular, the load of prev->state in finish_task_switch() must ++ * happen before this. ++ * ++ * Pairs with the smp_cond_load_acquire() in try_to_wake_up(). ++ */ ++ smp_store_release(&prev->on_cpu, 0); ++#endif ++} ++ ++static inline void ++prepare_lock_switch(struct rq *rq, struct task_struct *next, struct rq_flags *rf) ++{ ++ /* ++ * Since the runqueue lock will be released by the next ++ * task (which is an invalid locking op but in the case ++ * of the scheduler it's an obvious special-case), so we ++ * do an early lockdep release here: ++ */ ++ rq_unpin_lock(rq, rf); ++ spin_release(&rq->lock.dep_map, 1, _THIS_IP_); ++#ifdef CONFIG_DEBUG_SPINLOCK ++ /* this is a valid case when another task releases the spinlock */ ++ rq->lock.owner = next; ++#endif ++} ++ ++static inline void finish_lock_switch(struct rq *rq) ++{ ++ /* ++ * If we are tracking spinlock dependencies then we have to ++ * fix up the runqueue lock - which gets 'carried over' from ++ * prev into current: ++ */ ++ spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_); ++ raw_spin_unlock_irq(&rq->lock); ++} ++ ++/* ++ * NOP if the arch has not defined these: ++ */ ++ ++#ifndef prepare_arch_switch ++# define prepare_arch_switch(next) do { } while (0) ++#endif ++ ++#ifndef finish_arch_post_lock_switch ++# define finish_arch_post_lock_switch() do { } while (0) ++#endif ++ ++/** ++ * prepare_task_switch - prepare to switch tasks ++ * @rq: the runqueue preparing to switch ++ * @prev: the current task that is being switched out ++ * @next: the task we are going to switch to. ++ * ++ * This is called with the rq lock held and interrupts off. It must ++ * be paired with a subsequent finish_task_switch after the context ++ * switch. ++ * ++ * prepare_task_switch sets up locking and calls architecture specific ++ * hooks. ++ */ ++static inline void ++prepare_task_switch(struct rq *rq, struct task_struct *prev, ++ struct task_struct *next) ++{ ++ kcov_prepare_switch(prev); ++ sched_info_switch(rq, prev, next); ++ perf_event_task_sched_out(prev, next); ++ rseq_preempt(prev); ++ fire_sched_out_preempt_notifiers(prev, next); ++ prepare_task(next); ++ prepare_arch_switch(next); ++} ++ ++/** ++ * finish_task_switch - clean up after a task-switch ++ * @prev: the thread we just switched away from. ++ * ++ * finish_task_switch must be called after the context switch, paired ++ * with a prepare_task_switch call before the context switch. ++ * finish_task_switch will reconcile locking set up by prepare_task_switch, ++ * and do any other architecture-specific cleanup actions. ++ * ++ * Note that we may have delayed dropping an mm in context_switch(). If ++ * so, we finish that here outside of the runqueue lock. (Doing it ++ * with the lock held can cause deadlocks; see schedule() for ++ * details.) ++ * ++ * The context switch have flipped the stack from under us and restored the ++ * local variables which were saved when this task called schedule() in the ++ * past. prev == current is still correct but we need to recalculate this_rq ++ * because prev may have moved to another CPU. ++ */ ++static struct rq *finish_task_switch(struct task_struct *prev) ++ __releases(rq->lock) ++{ ++ struct rq *rq = this_rq(); ++ struct mm_struct *mm = rq->prev_mm; ++ long prev_state; ++ ++ /* ++ * The previous task will have left us with a preempt_count of 2 ++ * because it left us after: ++ * ++ * schedule() ++ * preempt_disable(); // 1 ++ * __schedule() ++ * raw_spin_lock_irq(&rq->lock) // 2 ++ * ++ * Also, see FORK_PREEMPT_COUNT. ++ */ ++ if (WARN_ONCE(preempt_count() != 2*PREEMPT_DISABLE_OFFSET, ++ "corrupted preempt_count: %s/%d/0x%x\n", ++ current->comm, current->pid, preempt_count())) ++ preempt_count_set(FORK_PREEMPT_COUNT); ++ ++ rq->prev_mm = NULL; ++ ++ /* ++ * A task struct has one reference for the use as "current". ++ * If a task dies, then it sets TASK_DEAD in tsk->state and calls ++ * schedule one last time. The schedule call will never return, and ++ * the scheduled task must drop that reference. ++ * ++ * We must observe prev->state before clearing prev->on_cpu (in ++ * finish_task), otherwise a concurrent wakeup can get prev ++ * running on another CPU and we could rave with its RUNNING -> DEAD ++ * transition, resulting in a double drop. ++ */ ++ prev_state = prev->state; ++ vtime_task_switch(prev); ++ perf_event_task_sched_in(prev, current); ++ finish_task(prev); ++ finish_lock_switch(rq); ++ finish_arch_post_lock_switch(); ++ kcov_finish_switch(current); ++ ++ fire_sched_in_preempt_notifiers(current); ++ /* ++ * When switching through a kernel thread, the loop in ++ * membarrier_{private,global}_expedited() may have observed that ++ * kernel thread and not issued an IPI. It is therefore possible to ++ * schedule between user->kernel->user threads without passing though ++ * switch_mm(). Membarrier requires a barrier after storing to ++ * rq->curr, before returning to userspace, so provide them here: ++ * ++ * - a full memory barrier for {PRIVATE,GLOBAL}_EXPEDITED, implicitly ++ * provided by mmdrop(), ++ * - a sync_core for SYNC_CORE. ++ */ ++ if (mm) { ++ membarrier_mm_sync_core_before_usermode(mm); ++ mmdrop(mm); ++ } ++ if (unlikely(prev_state == TASK_DEAD)) { ++ if (prev->sched_class->task_dead) ++ prev->sched_class->task_dead(prev); ++ ++ /* ++ * Remove function-return probe instances associated with this ++ * task and put them back on the free list. ++ */ ++ kprobe_flush_task(prev); ++ ++ /* Task is done with its stack. */ ++ put_task_stack(prev); ++ ++ put_task_struct(prev); ++ } ++ ++ tick_nohz_task_switch(); ++ return rq; ++} ++ ++#ifdef CONFIG_SMP ++ ++/* rq->lock is NOT held, but preemption is disabled */ ++static void __balance_callback(struct rq *rq) ++{ ++ struct callback_head *head, *next; ++ void (*func)(struct rq *rq); ++ unsigned long flags; ++ ++ raw_spin_lock_irqsave(&rq->lock, flags); ++ head = rq->balance_callback; ++ rq->balance_callback = NULL; ++ while (head) { ++ func = (void (*)(struct rq *))head->func; ++ next = head->next; ++ head->next = NULL; ++ head = next; ++ ++ func(rq); ++ } ++ raw_spin_unlock_irqrestore(&rq->lock, flags); ++} ++ ++static inline void balance_callback(struct rq *rq) ++{ ++ if (unlikely(rq->balance_callback)) ++ __balance_callback(rq); ++} ++ ++#else ++ ++static inline void balance_callback(struct rq *rq) ++{ ++} ++ ++#endif ++ ++/** ++ * schedule_tail - first thing a freshly forked thread must call. ++ * @prev: the thread we just switched away from. ++ */ ++asmlinkage __visible void schedule_tail(struct task_struct *prev) ++ __releases(rq->lock) ++{ ++ struct rq *rq; ++ ++ /* ++ * New tasks start with FORK_PREEMPT_COUNT, see there and ++ * finish_task_switch() for details. ++ * ++ * finish_task_switch() will drop rq->lock() and lower preempt_count ++ * and the preempt_enable() will end up enabling preemption (on ++ * PREEMPT_COUNT kernels). ++ */ ++ ++ rq = finish_task_switch(prev); ++ balance_callback(rq); ++ preempt_enable(); ++ ++ if (current->set_child_tid) ++ put_user(task_pid_vnr(current), current->set_child_tid); ++ ++ calculate_sigpending(); ++} ++ ++/* ++ * context_switch - switch to the new MM and the new thread's register state. ++ */ ++static __always_inline struct rq * ++context_switch(struct rq *rq, struct task_struct *prev, ++ struct task_struct *next, struct rq_flags *rf) ++{ ++ prepare_task_switch(rq, prev, next); ++ ++ /* ++ * For paravirt, this is coupled with an exit in switch_to to ++ * combine the page table reload and the switch backend into ++ * one hypercall. ++ */ ++ arch_start_context_switch(prev); ++ ++ /* ++ * kernel -> kernel lazy + transfer active ++ * user -> kernel lazy + mmgrab() active ++ * ++ * kernel -> user switch + mmdrop() active ++ * user -> user switch ++ */ ++ if (!next->mm) { // to kernel ++ enter_lazy_tlb(prev->active_mm, next); ++ ++ next->active_mm = prev->active_mm; ++ if (prev->mm) // from user ++ mmgrab(prev->active_mm); ++ else ++ prev->active_mm = NULL; ++ } else { // to user ++ membarrier_switch_mm(rq, prev->active_mm, next->mm); ++ /* ++ * sys_membarrier() requires an smp_mb() between setting ++ * rq->curr / membarrier_switch_mm() and returning to userspace. ++ * ++ * The below provides this either through switch_mm(), or in ++ * case 'prev->active_mm == next->mm' through ++ * finish_task_switch()'s mmdrop(). ++ */ ++ switch_mm_irqs_off(prev->active_mm, next->mm, next); ++ ++ if (!prev->mm) { // from kernel ++ /* will mmdrop() in finish_task_switch(). */ ++ rq->prev_mm = prev->active_mm; ++ prev->active_mm = NULL; ++ } ++ } ++ ++ rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP); ++ ++ prepare_lock_switch(rq, next, rf); ++ ++ /* Here we just switch the register state and the stack. */ ++ switch_to(prev, next, prev); ++ barrier(); ++ ++ return finish_task_switch(prev); ++} ++ ++/* ++ * nr_running and nr_context_switches: ++ * ++ * externally visible scheduler statistics: current number of runnable ++ * threads, total number of context switches performed since bootup. ++ */ ++unsigned long nr_running(void) ++{ ++ unsigned long i, sum = 0; ++ ++ for_each_online_cpu(i) ++ sum += cpu_rq(i)->nr_running; ++ ++ return sum; ++} ++ ++/* ++ * Check if only the current task is running on the CPU. ++ * ++ * Caution: this function does not check that the caller has disabled ++ * preemption, thus the result might have a time-of-check-to-time-of-use ++ * race. The caller is responsible to use it correctly, for example: ++ * ++ * - from a non-preemptable section (of course) ++ * ++ * - from a thread that is bound to a single CPU ++ * ++ * - in a loop with very short iterations (e.g. a polling loop) ++ */ ++bool single_task_running(void) ++{ ++ return raw_rq()->nr_running == 1; ++} ++EXPORT_SYMBOL(single_task_running); ++ ++unsigned long long nr_context_switches(void) ++{ ++ int i; ++ unsigned long long sum = 0; ++ ++ for_each_possible_cpu(i) ++ sum += cpu_rq(i)->nr_switches; ++ ++ return sum; ++} ++ ++/* ++ * IO-wait accounting, and how its mostly bollocks (on SMP). ++ * ++ * The idea behind IO-wait account is to account the idle time that we could ++ * have spend running if it were not for IO. That is, if we were to improve the ++ * storage performance, we'd have a proportional reduction in IO-wait time. ++ * ++ * This all works nicely on UP, where, when a task blocks on IO, we account ++ * idle time as IO-wait, because if the storage were faster, it could've been ++ * running and we'd not be idle. ++ * ++ * This has been extended to SMP, by doing the same for each CPU. This however ++ * is broken. ++ * ++ * Imagine for instance the case where two tasks block on one CPU, only the one ++ * CPU will have IO-wait accounted, while the other has regular idle. Even ++ * though, if the storage were faster, both could've ran at the same time, ++ * utilising both CPUs. ++ * ++ * This means, that when looking globally, the current IO-wait accounting on ++ * SMP is a lower bound, by reason of under accounting. ++ * ++ * Worse, since the numbers are provided per CPU, they are sometimes ++ * interpreted per CPU, and that is nonsensical. A blocked task isn't strictly ++ * associated with any one particular CPU, it can wake to another CPU than it ++ * blocked on. This means the per CPU IO-wait number is meaningless. ++ * ++ * Task CPU affinities can make all that even more 'interesting'. ++ */ ++ ++unsigned long nr_iowait(void) ++{ ++ unsigned long i, sum = 0; ++ ++ for_each_possible_cpu(i) ++ sum += atomic_read(&cpu_rq(i)->nr_iowait); ++ ++ return sum; ++} ++ ++/* ++ * Consumers of these two interfaces, like for example the cpufreq menu ++ * governor are using nonsensical data. Boosting frequency for a CPU that has ++ * IO-wait which might not even end up running the task when it does become ++ * runnable. ++ */ ++ ++unsigned long nr_iowait_cpu(int cpu) ++{ ++ struct rq *this = cpu_rq(cpu); ++ return atomic_read(&this->nr_iowait); ++} ++ ++void get_iowait_load(unsigned long *nr_waiters, unsigned long *load) ++{ ++ struct rq *rq = this_rq(); ++ *nr_waiters = atomic_read(&rq->nr_iowait); ++ *load = rq->load.weight; ++} ++ ++#ifdef CONFIG_SMP ++ ++/* ++ * sched_exec - execve() is a valuable balancing opportunity, because at ++ * this point the task has the smallest effective memory and cache footprint. ++ */ ++void sched_exec(void) ++{ ++ struct task_struct *p = current; ++ unsigned long flags; ++ int dest_cpu; ++ ++ raw_spin_lock_irqsave(&p->pi_lock, flags); ++ dest_cpu = p->sched_class->select_task_rq(p, task_cpu(p), SD_BALANCE_EXEC, 0); ++ if (dest_cpu == smp_processor_id()) ++ goto unlock; ++ ++ if (likely(cpu_active(dest_cpu))) { ++ struct migration_arg arg = { p, dest_cpu }; ++ ++ raw_spin_unlock_irqrestore(&p->pi_lock, flags); ++ stop_one_cpu(task_cpu(p), migration_cpu_stop, &arg); ++ return; ++ } ++unlock: ++ raw_spin_unlock_irqrestore(&p->pi_lock, flags); ++} ++ ++#endif ++ ++DEFINE_PER_CPU(struct kernel_stat, kstat); ++DEFINE_PER_CPU(struct kernel_cpustat, kernel_cpustat); ++ ++EXPORT_PER_CPU_SYMBOL(kstat); ++EXPORT_PER_CPU_SYMBOL(kernel_cpustat); ++ ++/* ++ * The function fair_sched_class.update_curr accesses the struct curr ++ * and its field curr->exec_start; when called from task_sched_runtime(), ++ * we observe a high rate of cache misses in practice. ++ * Prefetching this data results in improved performance. ++ */ ++static inline void prefetch_curr_exec_start(struct task_struct *p) ++{ ++#ifdef CONFIG_FAIR_GROUP_SCHED ++ struct sched_entity *curr = (&p->se)->cfs_rq->curr; ++#else ++ struct sched_entity *curr = (&task_rq(p)->cfs)->curr; ++#endif ++ prefetch(curr); ++ prefetch(&curr->exec_start); ++} ++ ++/* ++ * Return accounted runtime for the task. ++ * In case the task is currently running, return the runtime plus current's ++ * pending runtime that have not been accounted yet. ++ */ ++unsigned long long task_sched_runtime(struct task_struct *p) ++{ ++ struct rq_flags rf; ++ struct rq *rq; ++ u64 ns; ++ ++#if defined(CONFIG_64BIT) && defined(CONFIG_SMP) ++ /* ++ * 64-bit doesn't need locks to atomically read a 64-bit value. ++ * So we have a optimization chance when the task's delta_exec is 0. ++ * Reading ->on_cpu is racy, but this is ok. ++ * ++ * If we race with it leaving CPU, we'll take a lock. So we're correct. ++ * If we race with it entering CPU, unaccounted time is 0. This is ++ * indistinguishable from the read occurring a few cycles earlier. ++ * If we see ->on_cpu without ->on_rq, the task is leaving, and has ++ * been accounted, so we're correct here as well. ++ */ ++ if (!p->on_cpu || !task_on_rq_queued(p)) ++ return p->se.sum_exec_runtime; ++#endif ++ ++ rq = task_rq_lock(p, &rf); ++ /* ++ * Must be ->curr _and_ ->on_rq. If dequeued, we would ++ * project cycles that may never be accounted to this ++ * thread, breaking clock_gettime(). ++ */ ++ if (task_current(rq, p) && task_on_rq_queued(p)) { ++ prefetch_curr_exec_start(p); ++ update_rq_clock(rq); ++ p->sched_class->update_curr(rq); ++ } ++ ns = p->se.sum_exec_runtime; ++ task_rq_unlock(rq, p, &rf); ++ ++ return ns; ++} ++ ++/* ++ * This function gets called by the timer code, with HZ frequency. ++ * We call it with interrupts disabled. ++ */ ++void scheduler_tick(void) ++{ ++ int cpu = smp_processor_id(); ++ struct rq *rq = cpu_rq(cpu); ++ struct task_struct *curr = rq->curr; ++ struct rq_flags rf; ++ ++ sched_clock_tick(); ++ ++ rq_lock(rq, &rf); ++ ++ update_rq_clock(rq); ++ curr->sched_class->task_tick(rq, curr, 0); ++ cpu_load_update_active(rq); ++ calc_global_load_tick(rq); ++ ++ rq_unlock(rq, &rf); ++ ++ perf_event_task_tick(); ++ ++#ifdef CONFIG_SMP ++ rq->idle_balance = idle_cpu(cpu); ++ trigger_load_balance(rq); ++#endif ++} ++ ++#ifdef CONFIG_NO_HZ_FULL ++ ++struct tick_work { ++ int cpu; ++ atomic_t state; ++ struct delayed_work work; ++}; ++/* Values for ->state, see diagram below. */ ++#define TICK_SCHED_REMOTE_OFFLINE 0 ++#define TICK_SCHED_REMOTE_OFFLINING 1 ++#define TICK_SCHED_REMOTE_RUNNING 2 ++ ++/* ++ * State diagram for ->state: ++ * ++ * ++ * TICK_SCHED_REMOTE_OFFLINE ++ * | ^ ++ * | | ++ * | | sched_tick_remote() ++ * | | ++ * | | ++ * +--TICK_SCHED_REMOTE_OFFLINING ++ * | ^ ++ * | | ++ * sched_tick_start() | | sched_tick_stop() ++ * | | ++ * V | ++ * TICK_SCHED_REMOTE_RUNNING ++ * ++ * ++ * Other transitions get WARN_ON_ONCE(), except that sched_tick_remote() ++ * and sched_tick_start() are happy to leave the state in RUNNING. ++ */ ++ ++static struct tick_work __percpu *tick_work_cpu; ++ ++static void sched_tick_remote(struct work_struct *work) ++{ ++ struct delayed_work *dwork = to_delayed_work(work); ++ struct tick_work *twork = container_of(dwork, struct tick_work, work); ++ int cpu = twork->cpu; ++ struct rq *rq = cpu_rq(cpu); ++ struct task_struct *curr; ++ struct rq_flags rf; ++ u64 delta; ++ int os; ++ ++ /* ++ * Handle the tick only if it appears the remote CPU is running in full ++ * dynticks mode. The check is racy by nature, but missing a tick or ++ * having one too much is no big deal because the scheduler tick updates ++ * statistics and checks timeslices in a time-independent way, regardless ++ * of when exactly it is running. ++ */ ++ if (idle_cpu(cpu) || !tick_nohz_tick_stopped_cpu(cpu)) ++ goto out_requeue; ++ ++ rq_lock_irq(rq, &rf); ++ curr = rq->curr; ++ if (is_idle_task(curr) || cpu_is_offline(cpu)) ++ goto out_unlock; ++ ++ update_rq_clock(rq); ++ delta = rq_clock_task(rq) - curr->se.exec_start; ++ ++ /* ++ * Make sure the next tick runs within a reasonable ++ * amount of time. ++ */ ++ WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3); ++ curr->sched_class->task_tick(rq, curr, 0); ++ ++out_unlock: ++ rq_unlock_irq(rq, &rf); ++ ++out_requeue: ++ /* ++ * Run the remote tick once per second (1Hz). This arbitrary ++ * frequency is large enough to avoid overload but short enough ++ * to keep scheduler internal stats reasonably up to date. But ++ * first update state to reflect hotplug activity if required. ++ */ ++ os = atomic_fetch_add_unless(&twork->state, -1, TICK_SCHED_REMOTE_RUNNING); ++ WARN_ON_ONCE(os == TICK_SCHED_REMOTE_OFFLINE); ++ if (os == TICK_SCHED_REMOTE_RUNNING) ++ queue_delayed_work(system_unbound_wq, dwork, HZ); ++} ++ ++static void sched_tick_start(int cpu) ++{ ++ int os; ++ struct tick_work *twork; ++ ++ if (housekeeping_cpu(cpu, HK_FLAG_TICK)) ++ return; ++ ++ WARN_ON_ONCE(!tick_work_cpu); ++ ++ twork = per_cpu_ptr(tick_work_cpu, cpu); ++ os = atomic_xchg(&twork->state, TICK_SCHED_REMOTE_RUNNING); ++ WARN_ON_ONCE(os == TICK_SCHED_REMOTE_RUNNING); ++ if (os == TICK_SCHED_REMOTE_OFFLINE) { ++ twork->cpu = cpu; ++ INIT_DELAYED_WORK(&twork->work, sched_tick_remote); ++ queue_delayed_work(system_unbound_wq, &twork->work, HZ); ++ } ++} ++ ++#ifdef CONFIG_HOTPLUG_CPU ++static void sched_tick_stop(int cpu) ++{ ++ struct tick_work *twork; ++ int os; ++ ++ if (housekeeping_cpu(cpu, HK_FLAG_TICK)) ++ return; ++ ++ WARN_ON_ONCE(!tick_work_cpu); ++ ++ twork = per_cpu_ptr(tick_work_cpu, cpu); ++ /* There cannot be competing actions, but don't rely on stop-machine. */ ++ os = atomic_xchg(&twork->state, TICK_SCHED_REMOTE_OFFLINING); ++ WARN_ON_ONCE(os != TICK_SCHED_REMOTE_RUNNING); ++ /* Don't cancel, as this would mess up the state machine. */ ++} ++#endif /* CONFIG_HOTPLUG_CPU */ ++ ++int __init sched_tick_offload_init(void) ++{ ++ tick_work_cpu = alloc_percpu(struct tick_work); ++ BUG_ON(!tick_work_cpu); ++ return 0; ++} ++ ++#else /* !CONFIG_NO_HZ_FULL */ ++static inline void sched_tick_start(int cpu) { } ++static inline void sched_tick_stop(int cpu) { } ++#endif ++ ++#if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \ ++ defined(CONFIG_TRACE_PREEMPT_TOGGLE)) ++/* ++ * If the value passed in is equal to the current preempt count ++ * then we just disabled preemption. Start timing the latency. ++ */ ++static inline void preempt_latency_start(int val) ++{ ++ if (preempt_count() == val) { ++ unsigned long ip = get_lock_parent_ip(); ++#ifdef CONFIG_DEBUG_PREEMPT ++ current->preempt_disable_ip = ip; ++#endif ++ trace_preempt_off(CALLER_ADDR0, ip); ++ } ++} ++ ++void preempt_count_add(int val) ++{ ++#ifdef CONFIG_DEBUG_PREEMPT ++ /* ++ * Underflow? ++ */ ++ if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0))) ++ return; ++#endif ++ __preempt_count_add(val); ++#ifdef CONFIG_DEBUG_PREEMPT ++ /* ++ * Spinlock count overflowing soon? ++ */ ++ DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >= ++ PREEMPT_MASK - 10); ++#endif ++ preempt_latency_start(val); ++} ++EXPORT_SYMBOL(preempt_count_add); ++NOKPROBE_SYMBOL(preempt_count_add); ++ ++/* ++ * If the value passed in equals to the current preempt count ++ * then we just enabled preemption. Stop timing the latency. ++ */ ++static inline void preempt_latency_stop(int val) ++{ ++ if (preempt_count() == val) ++ trace_preempt_on(CALLER_ADDR0, get_lock_parent_ip()); ++} ++ ++void preempt_count_sub(int val) ++{ ++#ifdef CONFIG_DEBUG_PREEMPT ++ /* ++ * Underflow? ++ */ ++ if (DEBUG_LOCKS_WARN_ON(val > preempt_count())) ++ return; ++ /* ++ * Is the spinlock portion underflowing? ++ */ ++ if (DEBUG_LOCKS_WARN_ON((val < PREEMPT_MASK) && ++ !(preempt_count() & PREEMPT_MASK))) ++ return; ++#endif ++ ++ preempt_latency_stop(val); ++ __preempt_count_sub(val); ++} ++EXPORT_SYMBOL(preempt_count_sub); ++NOKPROBE_SYMBOL(preempt_count_sub); ++ ++#else ++static inline void preempt_latency_start(int val) { } ++static inline void preempt_latency_stop(int val) { } ++#endif ++ ++static inline unsigned long get_preempt_disable_ip(struct task_struct *p) ++{ ++#ifdef CONFIG_DEBUG_PREEMPT ++ return p->preempt_disable_ip; ++#else ++ return 0; ++#endif ++} ++ ++/* ++ * Print scheduling while atomic bug: ++ */ ++static noinline void __schedule_bug(struct task_struct *prev) ++{ ++ /* Save this before calling printk(), since that will clobber it */ ++ unsigned long preempt_disable_ip = get_preempt_disable_ip(current); ++ ++ if (oops_in_progress) ++ return; ++ ++ printk(KERN_ERR "BUG: scheduling while atomic: %s/%d/0x%08x\n", ++ prev->comm, prev->pid, preempt_count()); ++ ++ debug_show_held_locks(prev); ++ print_modules(); ++ if (irqs_disabled()) ++ print_irqtrace_events(prev); ++ if (IS_ENABLED(CONFIG_DEBUG_PREEMPT) ++ && in_atomic_preempt_off()) { ++ pr_err("Preemption disabled at:"); ++ print_ip_sym(preempt_disable_ip); ++ pr_cont("\n"); ++ } ++ if (panic_on_warn) ++ panic("scheduling while atomic\n"); ++ ++ dump_stack(); ++ add_taint(TAINT_WARN, LOCKDEP_STILL_OK); ++} ++ ++/* ++ * Various schedule()-time debugging checks and statistics: ++ */ ++static inline void schedule_debug(struct task_struct *prev) ++{ ++#ifdef CONFIG_SCHED_STACK_END_CHECK ++ if (task_stack_end_corrupted(prev)) ++ panic("corrupted stack end detected inside scheduler\n"); ++#endif ++ ++ if (unlikely(in_atomic_preempt_off())) { ++ __schedule_bug(prev); ++ preempt_count_set(PREEMPT_DISABLED); ++ } ++ rcu_sleep_check(); ++ ++ profile_hit(SCHED_PROFILING, __builtin_return_address(0)); ++ ++ schedstat_inc(this_rq()->sched_count); ++} ++ ++/* ++ * Pick up the highest-prio task: ++ */ ++static inline struct task_struct * ++pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) ++{ ++ const struct sched_class *class; ++ struct task_struct *p; ++ ++ /* ++ * Optimization: we know that if all tasks are in the fair class we can ++ * call that function directly, but only if the @prev task wasn't of a ++ * higher scheduling class, because otherwise those loose the ++ * opportunity to pull in more work from other CPUs. ++ */ ++ if (likely((prev->sched_class == &idle_sched_class || ++ prev->sched_class == &fair_sched_class) && ++ rq->nr_running == rq->cfs.h_nr_running)) { ++ ++ p = fair_sched_class.pick_next_task(rq, prev, rf); ++ if (unlikely(p == RETRY_TASK)) ++ goto again; ++ ++ /* Assumes fair_sched_class->next == idle_sched_class */ ++ if (unlikely(!p)) ++ p = idle_sched_class.pick_next_task(rq, prev, rf); ++ ++ return p; ++ } ++ ++again: ++ for_each_class(class) { ++ p = class->pick_next_task(rq, prev, rf); ++ if (p) { ++ if (unlikely(p == RETRY_TASK)) ++ goto again; ++ return p; ++ } ++ } ++ ++ /* The idle class should always have a runnable task: */ ++ BUG(); ++} ++ ++/* ++ * __schedule() is the main scheduler function. ++ * ++ * The main means of driving the scheduler and thus entering this function are: ++ * ++ * 1. Explicit blocking: mutex, semaphore, waitqueue, etc. ++ * ++ * 2. TIF_NEED_RESCHED flag is checked on interrupt and userspace return ++ * paths. For example, see arch/x86/entry_64.S. ++ * ++ * To drive preemption between tasks, the scheduler sets the flag in timer ++ * interrupt handler scheduler_tick(). ++ * ++ * 3. Wakeups don't really cause entry into schedule(). They add a ++ * task to the run-queue and that's it. ++ * ++ * Now, if the new task added to the run-queue preempts the current ++ * task, then the wakeup sets TIF_NEED_RESCHED and schedule() gets ++ * called on the nearest possible occasion: ++ * ++ * - If the kernel is preemptible (CONFIG_PREEMPT=y): ++ * ++ * - in syscall or exception context, at the next outmost ++ * preempt_enable(). (this might be as soon as the wake_up()'s ++ * spin_unlock()!) ++ * ++ * - in IRQ context, return from interrupt-handler to ++ * preemptible context ++ * ++ * - If the kernel is not preemptible (CONFIG_PREEMPT is not set) ++ * then at the next: ++ * ++ * - cond_resched() call ++ * - explicit schedule() call ++ * - return from syscall or exception to user-space ++ * - return from interrupt-handler to user-space ++ * ++ * WARNING: must be called with preemption disabled! ++ */ ++static void __sched notrace __schedule(bool preempt) ++{ ++ struct task_struct *prev, *next; ++ unsigned long *switch_count; ++ struct rq_flags rf; ++ struct rq *rq; ++ int cpu; ++ ++ cpu = smp_processor_id(); ++ rq = cpu_rq(cpu); ++ prev = rq->curr; ++ ++ schedule_debug(prev); ++ ++ if (sched_feat(HRTICK)) ++ hrtick_clear(rq); ++ ++ local_irq_disable(); ++ rcu_note_context_switch(preempt); ++ ++ /* ++ * Make sure that signal_pending_state()->signal_pending() below ++ * can't be reordered with __set_current_state(TASK_INTERRUPTIBLE) ++ * done by the caller to avoid the race with signal_wake_up(). ++ * ++ * The membarrier system call requires a full memory barrier ++ * after coming from user-space, before storing to rq->curr. ++ */ ++ rq_lock(rq, &rf); ++ smp_mb__after_spinlock(); ++ ++ /* Promote REQ to ACT */ ++ rq->clock_update_flags <<= 1; ++ update_rq_clock(rq); ++ ++ switch_count = &prev->nivcsw; ++ if (!preempt && prev->state) { ++ if (unlikely(signal_pending_state(prev->state, prev))) { ++ prev->state = TASK_RUNNING; ++ } else { ++ deactivate_task(rq, prev, DEQUEUE_SLEEP | DEQUEUE_NOCLOCK); ++ prev->on_rq = 0; ++ ++ if (prev->in_iowait) { ++ atomic_inc(&rq->nr_iowait); ++ delayacct_blkio_start(); ++ } ++ ++ /* ++ * If a worker went to sleep, notify and ask workqueue ++ * whether it wants to wake up a task to maintain ++ * concurrency. ++ */ ++ if (prev->flags & PF_WQ_WORKER) { ++ struct task_struct *to_wakeup; ++ ++ to_wakeup = wq_worker_sleeping(prev); ++ if (to_wakeup) ++ try_to_wake_up_local(to_wakeup, &rf); ++ } ++ } ++ switch_count = &prev->nvcsw; ++ } ++ ++ next = pick_next_task(rq, prev, &rf); ++ clear_tsk_need_resched(prev); ++ clear_preempt_need_resched(); ++ ++ if (likely(prev != next)) { ++ rq->nr_switches++; ++ rq->curr = next; ++ /* ++ * The membarrier system call requires each architecture ++ * to have a full memory barrier after updating ++ * rq->curr, before returning to user-space. ++ * ++ * Here are the schemes providing that barrier on the ++ * various architectures: ++ * - mm ? switch_mm() : mmdrop() for x86, s390, sparc, PowerPC. ++ * switch_mm() rely on membarrier_arch_switch_mm() on PowerPC. ++ * - finish_lock_switch() for weakly-ordered ++ * architectures where spin_unlock is a full barrier, ++ * - switch_to() for arm64 (weakly-ordered, spin_unlock ++ * is a RELEASE barrier), ++ */ ++ ++*switch_count; ++ ++ trace_sched_switch(preempt, prev, next); ++ ++ /* Also unlocks the rq: */ ++ rq = context_switch(rq, prev, next, &rf); ++ } else { ++ rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP); ++ rq_unlock_irq(rq, &rf); ++ } ++ ++ balance_callback(rq); ++} ++ ++void __noreturn do_task_dead(void) ++{ ++ /* Causes final put_task_struct in finish_task_switch(): */ ++ set_special_state(TASK_DEAD); ++ ++ /* Tell freezer to ignore us: */ ++ current->flags |= PF_NOFREEZE; ++ ++ __schedule(false); ++ BUG(); ++ ++ /* Avoid "noreturn function does return" - but don't continue if BUG() is a NOP: */ ++ for (;;) ++ cpu_relax(); ++} ++ ++static inline void sched_submit_work(struct task_struct *tsk) ++{ ++ if (!tsk->state || tsk_is_pi_blocked(tsk)) ++ return; ++ /* ++ * If we are going to sleep and we have plugged IO queued, ++ * make sure to submit it to avoid deadlocks. ++ */ ++ if (blk_needs_flush_plug(tsk)) ++ blk_schedule_flush_plug(tsk); ++} ++ ++asmlinkage __visible void __sched schedule(void) ++{ ++ struct task_struct *tsk = current; ++ ++ sched_submit_work(tsk); ++ do { ++ preempt_disable(); ++ __schedule(false); ++ sched_preempt_enable_no_resched(); ++ } while (need_resched()); ++} ++EXPORT_SYMBOL(schedule); ++ ++/* ++ * synchronize_rcu_tasks() makes sure that no task is stuck in preempted ++ * state (have scheduled out non-voluntarily) by making sure that all ++ * tasks have either left the run queue or have gone into user space. ++ * As idle tasks do not do either, they must not ever be preempted ++ * (schedule out non-voluntarily). ++ * ++ * schedule_idle() is similar to schedule_preempt_disable() except that it ++ * never enables preemption because it does not call sched_submit_work(). ++ */ ++void __sched schedule_idle(void) ++{ ++ /* ++ * As this skips calling sched_submit_work(), which the idle task does ++ * regardless because that function is a nop when the task is in a ++ * TASK_RUNNING state, make sure this isn't used someplace that the ++ * current task can be in any other state. Note, idle is always in the ++ * TASK_RUNNING state. ++ */ ++ WARN_ON_ONCE(current->state); ++ do { ++ __schedule(false); ++ } while (need_resched()); ++} ++ ++#ifdef CONFIG_CONTEXT_TRACKING ++asmlinkage __visible void __sched schedule_user(void) ++{ ++ /* ++ * If we come here after a random call to set_need_resched(), ++ * or we have been woken up remotely but the IPI has not yet arrived, ++ * we haven't yet exited the RCU idle mode. Do it here manually until ++ * we find a better solution. ++ * ++ * NB: There are buggy callers of this function. Ideally we ++ * should warn if prev_state != CONTEXT_USER, but that will trigger ++ * too frequently to make sense yet. ++ */ ++ enum ctx_state prev_state = exception_enter(); ++ schedule(); ++ exception_exit(prev_state); ++} ++#endif ++ ++/** ++ * schedule_preempt_disabled - called with preemption disabled ++ * ++ * Returns with preemption disabled. Note: preempt_count must be 1 ++ */ ++void __sched schedule_preempt_disabled(void) ++{ ++ sched_preempt_enable_no_resched(); ++ schedule(); ++ preempt_disable(); ++} ++ ++static void __sched notrace preempt_schedule_common(void) ++{ ++ do { ++ /* ++ * Because the function tracer can trace preempt_count_sub() ++ * and it also uses preempt_enable/disable_notrace(), if ++ * NEED_RESCHED is set, the preempt_enable_notrace() called ++ * by the function tracer will call this function again and ++ * cause infinite recursion. ++ * ++ * Preemption must be disabled here before the function ++ * tracer can trace. Break up preempt_disable() into two ++ * calls. One to disable preemption without fear of being ++ * traced. The other to still record the preemption latency, ++ * which can also be traced by the function tracer. ++ */ ++ preempt_disable_notrace(); ++ preempt_latency_start(1); ++ __schedule(true); ++ preempt_latency_stop(1); ++ preempt_enable_no_resched_notrace(); ++ ++ /* ++ * Check again in case we missed a preemption opportunity ++ * between schedule and now. ++ */ ++ } while (need_resched()); ++} ++ ++#ifdef CONFIG_PREEMPT ++/* ++ * this is the entry point to schedule() from in-kernel preemption ++ * off of preempt_enable. Kernel preemptions off return from interrupt ++ * occur there and call schedule directly. ++ */ ++asmlinkage __visible void __sched notrace preempt_schedule(void) ++{ ++ /* ++ * If there is a non-zero preempt_count or interrupts are disabled, ++ * we do not want to preempt the current task. Just return.. ++ */ ++ if (likely(!preemptible())) ++ return; ++ ++ preempt_schedule_common(); ++} ++NOKPROBE_SYMBOL(preempt_schedule); ++EXPORT_SYMBOL(preempt_schedule); ++ ++/** ++ * preempt_schedule_notrace - preempt_schedule called by tracing ++ * ++ * The tracing infrastructure uses preempt_enable_notrace to prevent ++ * recursion and tracing preempt enabling caused by the tracing ++ * infrastructure itself. But as tracing can happen in areas coming ++ * from userspace or just about to enter userspace, a preempt enable ++ * can occur before user_exit() is called. This will cause the scheduler ++ * to be called when the system is still in usermode. ++ * ++ * To prevent this, the preempt_enable_notrace will use this function ++ * instead of preempt_schedule() to exit user context if needed before ++ * calling the scheduler. ++ */ ++asmlinkage __visible void __sched notrace preempt_schedule_notrace(void) ++{ ++ enum ctx_state prev_ctx; ++ ++ if (likely(!preemptible())) ++ return; ++ ++ do { ++ /* ++ * Because the function tracer can trace preempt_count_sub() ++ * and it also uses preempt_enable/disable_notrace(), if ++ * NEED_RESCHED is set, the preempt_enable_notrace() called ++ * by the function tracer will call this function again and ++ * cause infinite recursion. ++ * ++ * Preemption must be disabled here before the function ++ * tracer can trace. Break up preempt_disable() into two ++ * calls. One to disable preemption without fear of being ++ * traced. The other to still record the preemption latency, ++ * which can also be traced by the function tracer. ++ */ ++ preempt_disable_notrace(); ++ preempt_latency_start(1); ++ /* ++ * Needs preempt disabled in case user_exit() is traced ++ * and the tracer calls preempt_enable_notrace() causing ++ * an infinite recursion. ++ */ ++ prev_ctx = exception_enter(); ++ __schedule(true); ++ exception_exit(prev_ctx); ++ ++ preempt_latency_stop(1); ++ preempt_enable_no_resched_notrace(); ++ } while (need_resched()); ++} ++EXPORT_SYMBOL_GPL(preempt_schedule_notrace); ++ ++#endif /* CONFIG_PREEMPT */ ++ ++/* ++ * this is the entry point to schedule() from kernel preemption ++ * off of irq context. ++ * Note, that this is called and return with irqs disabled. This will ++ * protect us against recursive calling from irq. ++ */ ++asmlinkage __visible void __sched preempt_schedule_irq(void) ++{ ++ enum ctx_state prev_state; ++ ++ /* Catch callers which need to be fixed */ ++ BUG_ON(preempt_count() || !irqs_disabled()); ++ ++ prev_state = exception_enter(); ++ ++ do { ++ preempt_disable(); ++ local_irq_enable(); ++ __schedule(true); ++ local_irq_disable(); ++ sched_preempt_enable_no_resched(); ++ } while (need_resched()); ++ ++ exception_exit(prev_state); ++} ++ ++int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wake_flags, ++ void *key) ++{ ++ return try_to_wake_up(curr->private, mode, wake_flags); ++} ++EXPORT_SYMBOL(default_wake_function); ++ ++#ifdef CONFIG_RT_MUTEXES ++ ++static inline int __rt_effective_prio(struct task_struct *pi_task, int prio) ++{ ++ if (pi_task) ++ prio = min(prio, pi_task->prio); ++ ++ return prio; ++} ++ ++static inline int rt_effective_prio(struct task_struct *p, int prio) ++{ ++ struct task_struct *pi_task = rt_mutex_get_top_task(p); ++ ++ return __rt_effective_prio(pi_task, prio); ++} ++ ++/* ++ * rt_mutex_setprio - set the current priority of a task ++ * @p: task to boost ++ * @pi_task: donor task ++ * ++ * This function changes the 'effective' priority of a task. It does ++ * not touch ->normal_prio like __setscheduler(). ++ * ++ * Used by the rt_mutex code to implement priority inheritance ++ * logic. Call site only calls if the priority of the task changed. ++ */ ++void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task) ++{ ++ int prio, oldprio, queued, running, queue_flag = ++ DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK; ++ const struct sched_class *prev_class; ++ struct rq_flags rf; ++ struct rq *rq; ++ ++ /* XXX used to be waiter->prio, not waiter->task->prio */ ++ prio = __rt_effective_prio(pi_task, p->normal_prio); ++ ++ /* ++ * If nothing changed; bail early. ++ */ ++ if (p->pi_top_task == pi_task && prio == p->prio && !dl_prio(prio)) ++ return; ++ ++ rq = __task_rq_lock(p, &rf); ++ update_rq_clock(rq); ++ /* ++ * Set under pi_lock && rq->lock, such that the value can be used under ++ * either lock. ++ * ++ * Note that there is loads of tricky to make this pointer cache work ++ * right. rt_mutex_slowunlock()+rt_mutex_postunlock() work together to ++ * ensure a task is de-boosted (pi_task is set to NULL) before the ++ * task is allowed to run again (and can exit). This ensures the pointer ++ * points to a blocked task -- which guaratees the task is present. ++ */ ++ p->pi_top_task = pi_task; ++ ++ /* ++ * For FIFO/RR we only need to set prio, if that matches we're done. ++ */ ++ if (prio == p->prio && !dl_prio(prio)) ++ goto out_unlock; ++ ++ /* ++ * Idle task boosting is a nono in general. There is one ++ * exception, when PREEMPT_RT and NOHZ is active: ++ * ++ * The idle task calls get_next_timer_interrupt() and holds ++ * the timer wheel base->lock on the CPU and another CPU wants ++ * to access the timer (probably to cancel it). We can safely ++ * ignore the boosting request, as the idle CPU runs this code ++ * with interrupts disabled and will complete the lock ++ * protected section without being interrupted. So there is no ++ * real need to boost. ++ */ ++ if (unlikely(p == rq->idle)) { ++ WARN_ON(p != rq->curr); ++ WARN_ON(p->pi_blocked_on); ++ goto out_unlock; ++ } ++ ++ trace_sched_pi_setprio(p, pi_task); ++ oldprio = p->prio; ++ ++ if (oldprio == prio) ++ queue_flag &= ~DEQUEUE_MOVE; ++ ++ prev_class = p->sched_class; ++ queued = task_on_rq_queued(p); ++ running = task_current(rq, p); ++ if (queued) ++ dequeue_task(rq, p, queue_flag); ++ if (running) ++ put_prev_task(rq, p); ++ ++ /* ++ * Boosting condition are: ++ * 1. -rt task is running and holds mutex A ++ * --> -dl task blocks on mutex A ++ * ++ * 2. -dl task is running and holds mutex A ++ * --> -dl task blocks on mutex A and could preempt the ++ * running task ++ */ ++ if (dl_prio(prio)) { ++ if (!dl_prio(p->normal_prio) || ++ (pi_task && dl_prio(pi_task->prio) && ++ dl_entity_preempt(&pi_task->dl, &p->dl))) { ++ p->dl.dl_boosted = 1; ++ queue_flag |= ENQUEUE_REPLENISH; ++ } else ++ p->dl.dl_boosted = 0; ++ p->sched_class = &dl_sched_class; ++ } else if (rt_prio(prio)) { ++ if (dl_prio(oldprio)) ++ p->dl.dl_boosted = 0; ++ if (oldprio < prio) ++ queue_flag |= ENQUEUE_HEAD; ++ p->sched_class = &rt_sched_class; ++ } else { ++ if (dl_prio(oldprio)) ++ p->dl.dl_boosted = 0; ++ if (rt_prio(oldprio)) ++ p->rt.timeout = 0; ++ p->sched_class = &fair_sched_class; ++ } ++ ++ p->prio = prio; ++ ++ if (queued) ++ enqueue_task(rq, p, queue_flag); ++ if (running) ++ set_curr_task(rq, p); ++ ++ check_class_changed(rq, p, prev_class, oldprio); ++out_unlock: ++ /* Avoid rq from going away on us: */ ++ preempt_disable(); ++ __task_rq_unlock(rq, &rf); ++ ++ balance_callback(rq); ++ preempt_enable(); ++} ++#else ++static inline int rt_effective_prio(struct task_struct *p, int prio) ++{ ++ return prio; ++} ++#endif ++ ++void set_user_nice(struct task_struct *p, long nice) ++{ ++ bool queued, running; ++ int old_prio, delta; ++ struct rq_flags rf; ++ struct rq *rq; ++ ++ if (task_nice(p) == nice || nice < MIN_NICE || nice > MAX_NICE) ++ return; ++ /* ++ * We have to be careful, if called from sys_setpriority(), ++ * the task might be in the middle of scheduling on another CPU. ++ */ ++ rq = task_rq_lock(p, &rf); ++ update_rq_clock(rq); ++ ++ /* ++ * The RT priorities are set via sched_setscheduler(), but we still ++ * allow the 'normal' nice value to be set - but as expected ++ * it wont have any effect on scheduling until the task is ++ * SCHED_DEADLINE, SCHED_FIFO or SCHED_RR: ++ */ ++ if (task_has_dl_policy(p) || task_has_rt_policy(p)) { ++ p->static_prio = NICE_TO_PRIO(nice); ++ goto out_unlock; ++ } ++ queued = task_on_rq_queued(p); ++ running = task_current(rq, p); ++ if (queued) ++ dequeue_task(rq, p, DEQUEUE_SAVE | DEQUEUE_NOCLOCK); ++ if (running) ++ put_prev_task(rq, p); ++ ++ p->static_prio = NICE_TO_PRIO(nice); ++ set_load_weight(p, true); ++ old_prio = p->prio; ++ p->prio = effective_prio(p); ++ delta = p->prio - old_prio; ++ ++ if (queued) { ++ enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK); ++ /* ++ * If the task increased its priority or is running and ++ * lowered its priority, then reschedule its CPU: ++ */ ++ if (delta < 0 || (delta > 0 && task_running(rq, p))) ++ resched_curr(rq); ++ } ++ if (running) ++ set_curr_task(rq, p); ++out_unlock: ++ task_rq_unlock(rq, p, &rf); ++} ++EXPORT_SYMBOL(set_user_nice); ++ ++/* ++ * can_nice - check if a task can reduce its nice value ++ * @p: task ++ * @nice: nice value ++ */ ++int can_nice(const struct task_struct *p, const int nice) ++{ ++ /* Convert nice value [19,-20] to rlimit style value [1,40]: */ ++ int nice_rlim = nice_to_rlimit(nice); ++ ++ return (nice_rlim <= task_rlimit(p, RLIMIT_NICE) || ++ capable(CAP_SYS_NICE)); ++} ++ ++#ifdef __ARCH_WANT_SYS_NICE ++ ++/* ++ * sys_nice - change the priority of the current process. ++ * @increment: priority increment ++ * ++ * sys_setpriority is a more generic, but much slower function that ++ * does similar things. ++ */ ++SYSCALL_DEFINE1(nice, int, increment) ++{ ++ long nice, retval; ++ ++ /* ++ * Setpriority might change our priority at the same moment. ++ * We don't have to worry. Conceptually one call occurs first ++ * and we have a single winner. ++ */ ++ increment = clamp(increment, -NICE_WIDTH, NICE_WIDTH); ++ nice = task_nice(current) + increment; ++ ++ nice = clamp_val(nice, MIN_NICE, MAX_NICE); ++ if (increment < 0 && !can_nice(current, nice)) ++ return -EPERM; ++ ++ retval = security_task_setnice(current, nice); ++ if (retval) ++ return retval; ++ ++ set_user_nice(current, nice); ++ return 0; ++} ++ ++#endif ++ ++/** ++ * task_prio - return the priority value of a given task. ++ * @p: the task in question. ++ * ++ * Return: The priority value as seen by users in /proc. ++ * RT tasks are offset by -200. Normal tasks are centered ++ * around 0, value goes from -16 to +15. ++ */ ++int task_prio(const struct task_struct *p) ++{ ++ return p->prio - MAX_RT_PRIO; ++} ++ ++/** ++ * idle_cpu - is a given CPU idle currently? ++ * @cpu: the processor in question. ++ * ++ * Return: 1 if the CPU is currently idle. 0 otherwise. ++ */ ++int idle_cpu(int cpu) ++{ ++ struct rq *rq = cpu_rq(cpu); ++ ++ if (rq->curr != rq->idle) ++ return 0; ++ ++ if (rq->nr_running) ++ return 0; ++ ++#ifdef CONFIG_SMP ++ if (!llist_empty(&rq->wake_list)) ++ return 0; ++#endif ++ ++ return 1; ++} ++ ++/** ++ * available_idle_cpu - is a given CPU idle for enqueuing work. ++ * @cpu: the CPU in question. ++ * ++ * Return: 1 if the CPU is currently idle. 0 otherwise. ++ */ ++int available_idle_cpu(int cpu) ++{ ++ if (!idle_cpu(cpu)) ++ return 0; ++ ++ if (vcpu_is_preempted(cpu)) ++ return 0; ++ ++ return 1; ++} ++ ++/** ++ * idle_task - return the idle task for a given CPU. ++ * @cpu: the processor in question. ++ * ++ * Return: The idle task for the CPU @cpu. ++ */ ++struct task_struct *idle_task(int cpu) ++{ ++ return cpu_rq(cpu)->idle; ++} ++ ++/** ++ * find_process_by_pid - find a process with a matching PID value. ++ * @pid: the pid in question. ++ * ++ * The task of @pid, if found. %NULL otherwise. ++ */ ++static struct task_struct *find_process_by_pid(pid_t pid) ++{ ++ return pid ? find_task_by_vpid(pid) : current; ++} ++ ++/* ++ * sched_setparam() passes in -1 for its policy, to let the functions ++ * it calls know not to change it. ++ */ ++#define SETPARAM_POLICY -1 ++ ++static void __setscheduler_params(struct task_struct *p, ++ const struct sched_attr *attr) ++{ ++ int policy = attr->sched_policy; ++ ++ if (policy == SETPARAM_POLICY) ++ policy = p->policy; ++ ++ p->policy = policy; ++ ++ if (dl_policy(policy)) ++ __setparam_dl(p, attr); ++ else if (fair_policy(policy)) ++ p->static_prio = NICE_TO_PRIO(attr->sched_nice); ++ ++ /* ++ * __sched_setscheduler() ensures attr->sched_priority == 0 when ++ * !rt_policy. Always setting this ensures that things like ++ * getparam()/getattr() don't report silly values for !rt tasks. ++ */ ++ p->rt_priority = attr->sched_priority; ++ p->normal_prio = normal_prio(p); ++ set_load_weight(p, true); ++} ++ ++/* Actually do priority change: must hold pi & rq lock. */ ++static void __setscheduler(struct rq *rq, struct task_struct *p, ++ const struct sched_attr *attr, bool keep_boost) ++{ ++ __setscheduler_params(p, attr); ++ ++ /* ++ * Keep a potential priority boosting if called from ++ * sched_setscheduler(). ++ */ ++ p->prio = normal_prio(p); ++ if (keep_boost) ++ p->prio = rt_effective_prio(p, p->prio); ++ ++ if (dl_prio(p->prio)) ++ p->sched_class = &dl_sched_class; ++ else if (rt_prio(p->prio)) ++ p->sched_class = &rt_sched_class; ++ else ++ p->sched_class = &fair_sched_class; ++} ++ ++/* ++ * Check the target process has a UID that matches the current process's: ++ */ ++static bool check_same_owner(struct task_struct *p) ++{ ++ const struct cred *cred = current_cred(), *pcred; ++ bool match; ++ ++ rcu_read_lock(); ++ pcred = __task_cred(p); ++ match = (uid_eq(cred->euid, pcred->euid) || ++ uid_eq(cred->euid, pcred->uid)); ++ rcu_read_unlock(); ++ return match; ++} ++ ++static int __sched_setscheduler(struct task_struct *p, ++ const struct sched_attr *attr, ++ bool user, bool pi) ++{ ++ int newprio = dl_policy(attr->sched_policy) ? MAX_DL_PRIO - 1 : ++ MAX_RT_PRIO - 1 - attr->sched_priority; ++ int retval, oldprio, oldpolicy = -1, queued, running; ++ int new_effective_prio, policy = attr->sched_policy; ++ const struct sched_class *prev_class; ++ struct rq_flags rf; ++ int reset_on_fork; ++ int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK; ++ struct rq *rq; ++ ++ /* The pi code expects interrupts enabled */ ++ BUG_ON(pi && in_interrupt()); ++recheck: ++ /* Double check policy once rq lock held: */ ++ if (policy < 0) { ++ reset_on_fork = p->sched_reset_on_fork; ++ policy = oldpolicy = p->policy; ++ } else { ++ reset_on_fork = !!(attr->sched_flags & SCHED_FLAG_RESET_ON_FORK); ++ ++ if (!valid_policy(policy)) ++ return -EINVAL; ++ } ++ ++ if (attr->sched_flags & ~(SCHED_FLAG_ALL | SCHED_FLAG_SUGOV)) ++ return -EINVAL; ++ ++ /* ++ * Valid priorities for SCHED_FIFO and SCHED_RR are ++ * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL, ++ * SCHED_BATCH and SCHED_IDLE is 0. ++ */ ++ if ((p->mm && attr->sched_priority > MAX_USER_RT_PRIO-1) || ++ (!p->mm && attr->sched_priority > MAX_RT_PRIO-1)) ++ return -EINVAL; ++ if ((dl_policy(policy) && !__checkparam_dl(attr)) || ++ (rt_policy(policy) != (attr->sched_priority != 0))) ++ return -EINVAL; ++ ++ /* ++ * Allow unprivileged RT tasks to decrease priority: ++ */ ++ if (user && !capable(CAP_SYS_NICE)) { ++ if (fair_policy(policy)) { ++ if (attr->sched_nice < task_nice(p) && ++ !can_nice(p, attr->sched_nice)) ++ return -EPERM; ++ } ++ ++ if (rt_policy(policy)) { ++ unsigned long rlim_rtprio = ++ task_rlimit(p, RLIMIT_RTPRIO); ++ ++ /* Can't set/change the rt policy: */ ++ if (policy != p->policy && !rlim_rtprio) ++ return -EPERM; ++ ++ /* Can't increase priority: */ ++ if (attr->sched_priority > p->rt_priority && ++ attr->sched_priority > rlim_rtprio) ++ return -EPERM; ++ } ++ ++ /* ++ * Can't set/change SCHED_DEADLINE policy at all for now ++ * (safest behavior); in the future we would like to allow ++ * unprivileged DL tasks to increase their relative deadline ++ * or reduce their runtime (both ways reducing utilization) ++ */ ++ if (dl_policy(policy)) ++ return -EPERM; ++ ++ /* ++ * Treat SCHED_IDLE as nice 20. Only allow a switch to ++ * SCHED_NORMAL if the RLIMIT_NICE would normally permit it. ++ */ ++ if (idle_policy(p->policy) && !idle_policy(policy)) { ++ if (!can_nice(p, task_nice(p))) ++ return -EPERM; ++ } ++ ++ /* Can't change other user's priorities: */ ++ if (!check_same_owner(p)) ++ return -EPERM; ++ ++ /* Normal users shall not reset the sched_reset_on_fork flag: */ ++ if (p->sched_reset_on_fork && !reset_on_fork) ++ return -EPERM; ++ } ++ ++ if (user) { ++ if (attr->sched_flags & SCHED_FLAG_SUGOV) ++ return -EINVAL; ++ ++ retval = security_task_setscheduler(p); ++ if (retval) ++ return retval; ++ } ++ ++ /* ++ * Make sure no PI-waiters arrive (or leave) while we are ++ * changing the priority of the task: ++ * ++ * To be able to change p->policy safely, the appropriate ++ * runqueue lock must be held. ++ */ ++ rq = task_rq_lock(p, &rf); ++ update_rq_clock(rq); ++ ++ /* ++ * Changing the policy of the stop threads its a very bad idea: ++ */ ++ if (p == rq->stop) { ++ task_rq_unlock(rq, p, &rf); ++ return -EINVAL; ++ } ++ ++ /* ++ * If not changing anything there's no need to proceed further, ++ * but store a possible modification of reset_on_fork. ++ */ ++ if (unlikely(policy == p->policy)) { ++ if (fair_policy(policy) && attr->sched_nice != task_nice(p)) ++ goto change; ++ if (rt_policy(policy) && attr->sched_priority != p->rt_priority) ++ goto change; ++ if (dl_policy(policy) && dl_param_changed(p, attr)) ++ goto change; ++ ++ p->sched_reset_on_fork = reset_on_fork; ++ task_rq_unlock(rq, p, &rf); ++ return 0; ++ } ++change: ++ ++ if (user) { ++#ifdef CONFIG_RT_GROUP_SCHED ++ /* ++ * Do not allow realtime tasks into groups that have no runtime ++ * assigned. ++ */ ++ if (rt_bandwidth_enabled() && rt_policy(policy) && ++ task_group(p)->rt_bandwidth.rt_runtime == 0 && ++ !task_group_is_autogroup(task_group(p))) { ++ task_rq_unlock(rq, p, &rf); ++ return -EPERM; ++ } ++#endif ++#ifdef CONFIG_SMP ++ if (dl_bandwidth_enabled() && dl_policy(policy) && ++ !(attr->sched_flags & SCHED_FLAG_SUGOV)) { ++ cpumask_t *span = rq->rd->span; ++ ++ /* ++ * Don't allow tasks with an affinity mask smaller than ++ * the entire root_domain to become SCHED_DEADLINE. We ++ * will also fail if there's no bandwidth available. ++ */ ++ if (!cpumask_subset(span, &p->cpus_allowed) || ++ rq->rd->dl_bw.bw == 0) { ++ task_rq_unlock(rq, p, &rf); ++ return -EPERM; ++ } ++ } ++#endif ++ } ++ ++ /* Re-check policy now with rq lock held: */ ++ if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) { ++ policy = oldpolicy = -1; ++ task_rq_unlock(rq, p, &rf); ++ goto recheck; ++ } ++ ++ /* ++ * If setscheduling to SCHED_DEADLINE (or changing the parameters ++ * of a SCHED_DEADLINE task) we need to check if enough bandwidth ++ * is available. ++ */ ++ if ((dl_policy(policy) || dl_task(p)) && sched_dl_overflow(p, policy, attr)) { ++ task_rq_unlock(rq, p, &rf); ++ return -EBUSY; ++ } ++ ++ p->sched_reset_on_fork = reset_on_fork; ++ oldprio = p->prio; ++ ++ if (pi) { ++ /* ++ * Take priority boosted tasks into account. If the new ++ * effective priority is unchanged, we just store the new ++ * normal parameters and do not touch the scheduler class and ++ * the runqueue. This will be done when the task deboost ++ * itself. ++ */ ++ new_effective_prio = rt_effective_prio(p, newprio); ++ if (new_effective_prio == oldprio) ++ queue_flags &= ~DEQUEUE_MOVE; ++ } ++ ++ queued = task_on_rq_queued(p); ++ running = task_current(rq, p); ++ if (queued) ++ dequeue_task(rq, p, queue_flags); ++ if (running) ++ put_prev_task(rq, p); ++ ++ prev_class = p->sched_class; ++ __setscheduler(rq, p, attr, pi); ++ ++ if (queued) { ++ /* ++ * We enqueue to tail when the priority of a task is ++ * increased (user space view). ++ */ ++ if (oldprio < p->prio) ++ queue_flags |= ENQUEUE_HEAD; ++ ++ enqueue_task(rq, p, queue_flags); ++ } ++ if (running) ++ set_curr_task(rq, p); ++ ++ check_class_changed(rq, p, prev_class, oldprio); ++ ++ /* Avoid rq from going away on us: */ ++ preempt_disable(); ++ task_rq_unlock(rq, p, &rf); ++ ++ if (pi) ++ rt_mutex_adjust_pi(p); ++ ++ /* Run balance callbacks after we've adjusted the PI chain: */ ++ balance_callback(rq); ++ preempt_enable(); ++ ++ return 0; ++} ++ ++static int _sched_setscheduler(struct task_struct *p, int policy, ++ const struct sched_param *param, bool check) ++{ ++ struct sched_attr attr = { ++ .sched_policy = policy, ++ .sched_priority = param->sched_priority, ++ .sched_nice = PRIO_TO_NICE(p->static_prio), ++ }; ++ ++ /* Fixup the legacy SCHED_RESET_ON_FORK hack. */ ++ if ((policy != SETPARAM_POLICY) && (policy & SCHED_RESET_ON_FORK)) { ++ attr.sched_flags |= SCHED_FLAG_RESET_ON_FORK; ++ policy &= ~SCHED_RESET_ON_FORK; ++ attr.sched_policy = policy; ++ } ++ ++ return __sched_setscheduler(p, &attr, check, true); ++} ++/** ++ * sched_setscheduler - change the scheduling policy and/or RT priority of a thread. ++ * @p: the task in question. ++ * @policy: new policy. ++ * @param: structure containing the new RT priority. ++ * ++ * Return: 0 on success. An error code otherwise. ++ * ++ * NOTE that the task may be already dead. ++ */ ++int sched_setscheduler(struct task_struct *p, int policy, ++ const struct sched_param *param) ++{ ++ return _sched_setscheduler(p, policy, param, true); ++} ++EXPORT_SYMBOL_GPL(sched_setscheduler); ++ ++int sched_setattr(struct task_struct *p, const struct sched_attr *attr) ++{ ++ return __sched_setscheduler(p, attr, true, true); ++} ++EXPORT_SYMBOL_GPL(sched_setattr); ++ ++int sched_setattr_nocheck(struct task_struct *p, const struct sched_attr *attr) ++{ ++ return __sched_setscheduler(p, attr, false, true); ++} ++ ++/** ++ * sched_setscheduler_nocheck - change the scheduling policy and/or RT priority of a thread from kernelspace. ++ * @p: the task in question. ++ * @policy: new policy. ++ * @param: structure containing the new RT priority. ++ * ++ * Just like sched_setscheduler, only don't bother checking if the ++ * current context has permission. For example, this is needed in ++ * stop_machine(): we create temporary high priority worker threads, ++ * but our caller might not have that capability. ++ * ++ * Return: 0 on success. An error code otherwise. ++ */ ++int sched_setscheduler_nocheck(struct task_struct *p, int policy, ++ const struct sched_param *param) ++{ ++ return _sched_setscheduler(p, policy, param, false); ++} ++EXPORT_SYMBOL_GPL(sched_setscheduler_nocheck); ++ ++static int ++do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) ++{ ++ struct sched_param lparam; ++ struct task_struct *p; ++ int retval; ++ ++ if (!param || pid < 0) ++ return -EINVAL; ++ if (copy_from_user(&lparam, param, sizeof(struct sched_param))) ++ return -EFAULT; ++ ++ rcu_read_lock(); ++ retval = -ESRCH; ++ p = find_process_by_pid(pid); ++ if (p != NULL) ++ retval = sched_setscheduler(p, policy, &lparam); ++ rcu_read_unlock(); ++ ++ return retval; ++} ++ ++/* ++ * Mimics kernel/events/core.c perf_copy_attr(). ++ */ ++static int sched_copy_attr(struct sched_attr __user *uattr, struct sched_attr *attr) ++{ ++ u32 size; ++ int ret; ++ ++ if (!access_ok(uattr, SCHED_ATTR_SIZE_VER0)) ++ return -EFAULT; ++ ++ /* Zero the full structure, so that a short copy will be nice: */ ++ memset(attr, 0, sizeof(*attr)); ++ ++ ret = get_user(size, &uattr->size); ++ if (ret) ++ return ret; ++ ++ /* Bail out on silly large: */ ++ if (size > PAGE_SIZE) ++ goto err_size; ++ ++ /* ABI compatibility quirk: */ ++ if (!size) ++ size = SCHED_ATTR_SIZE_VER0; ++ ++ if (size < SCHED_ATTR_SIZE_VER0) ++ goto err_size; ++ ++ /* ++ * If we're handed a bigger struct than we know of, ++ * ensure all the unknown bits are 0 - i.e. new ++ * user-space does not rely on any kernel feature ++ * extensions we dont know about yet. ++ */ ++ if (size > sizeof(*attr)) { ++ unsigned char __user *addr; ++ unsigned char __user *end; ++ unsigned char val; ++ ++ addr = (void __user *)uattr + sizeof(*attr); ++ end = (void __user *)uattr + size; ++ ++ for (; addr < end; addr++) { ++ ret = get_user(val, addr); ++ if (ret) ++ return ret; ++ if (val) ++ goto err_size; ++ } ++ size = sizeof(*attr); ++ } ++ ++ ret = copy_from_user(attr, uattr, size); ++ if (ret) ++ return -EFAULT; ++ ++ /* ++ * XXX: Do we want to be lenient like existing syscalls; or do we want ++ * to be strict and return an error on out-of-bounds values? ++ */ ++ attr->sched_nice = clamp(attr->sched_nice, MIN_NICE, MAX_NICE); ++ ++ return 0; ++ ++err_size: ++ put_user(sizeof(*attr), &uattr->size); ++ return -E2BIG; ++} ++ ++/** ++ * sys_sched_setscheduler - set/change the scheduler policy and RT priority ++ * @pid: the pid in question. ++ * @policy: new policy. ++ * @param: structure containing the new RT priority. ++ * ++ * Return: 0 on success. An error code otherwise. ++ */ ++SYSCALL_DEFINE3(sched_setscheduler, pid_t, pid, int, policy, struct sched_param __user *, param) ++{ ++ if (policy < 0) ++ return -EINVAL; ++ ++ return do_sched_setscheduler(pid, policy, param); ++} ++ ++/** ++ * sys_sched_setparam - set/change the RT priority of a thread ++ * @pid: the pid in question. ++ * @param: structure containing the new RT priority. ++ * ++ * Return: 0 on success. An error code otherwise. ++ */ ++SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param) ++{ ++ return do_sched_setscheduler(pid, SETPARAM_POLICY, param); ++} ++ ++/** ++ * sys_sched_setattr - same as above, but with extended sched_attr ++ * @pid: the pid in question. ++ * @uattr: structure containing the extended parameters. ++ * @flags: for future extension. ++ */ ++SYSCALL_DEFINE3(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr, ++ unsigned int, flags) ++{ ++ struct sched_attr attr; ++ struct task_struct *p; ++ int retval; ++ ++ if (!uattr || pid < 0 || flags) ++ return -EINVAL; ++ ++ retval = sched_copy_attr(uattr, &attr); ++ if (retval) ++ return retval; ++ ++ if ((int)attr.sched_policy < 0) ++ return -EINVAL; ++ ++ rcu_read_lock(); ++ retval = -ESRCH; ++ p = find_process_by_pid(pid); ++ if (p != NULL) ++ retval = sched_setattr(p, &attr); ++ rcu_read_unlock(); ++ ++ return retval; ++} ++ ++/** ++ * sys_sched_getscheduler - get the policy (scheduling class) of a thread ++ * @pid: the pid in question. ++ * ++ * Return: On success, the policy of the thread. Otherwise, a negative error ++ * code. ++ */ ++SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid) ++{ ++ struct task_struct *p; ++ int retval; ++ ++ if (pid < 0) ++ return -EINVAL; ++ ++ retval = -ESRCH; ++ rcu_read_lock(); ++ p = find_process_by_pid(pid); ++ if (p) { ++ retval = security_task_getscheduler(p); ++ if (!retval) ++ retval = p->policy ++ | (p->sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0); ++ } ++ rcu_read_unlock(); ++ return retval; ++} ++ ++/** ++ * sys_sched_getparam - get the RT priority of a thread ++ * @pid: the pid in question. ++ * @param: structure containing the RT priority. ++ * ++ * Return: On success, 0 and the RT priority is in @param. Otherwise, an error ++ * code. ++ */ ++SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param) ++{ ++ struct sched_param lp = { .sched_priority = 0 }; ++ struct task_struct *p; ++ int retval; ++ ++ if (!param || pid < 0) ++ return -EINVAL; ++ ++ rcu_read_lock(); ++ p = find_process_by_pid(pid); ++ retval = -ESRCH; ++ if (!p) ++ goto out_unlock; ++ ++ retval = security_task_getscheduler(p); ++ if (retval) ++ goto out_unlock; ++ ++ if (task_has_rt_policy(p)) ++ lp.sched_priority = p->rt_priority; ++ rcu_read_unlock(); ++ ++ /* ++ * This one might sleep, we cannot do it with a spinlock held ... ++ */ ++ retval = copy_to_user(param, &lp, sizeof(*param)) ? -EFAULT : 0; ++ ++ return retval; ++ ++out_unlock: ++ rcu_read_unlock(); ++ return retval; ++} ++ ++static int sched_read_attr(struct sched_attr __user *uattr, ++ struct sched_attr *attr, ++ unsigned int usize) ++{ ++ int ret; ++ ++ if (!access_ok(uattr, usize)) ++ return -EFAULT; ++ ++ /* ++ * If we're handed a smaller struct than we know of, ++ * ensure all the unknown bits are 0 - i.e. old ++ * user-space does not get uncomplete information. ++ */ ++ if (usize < sizeof(*attr)) { ++ unsigned char *addr; ++ unsigned char *end; ++ ++ addr = (void *)attr + usize; ++ end = (void *)attr + sizeof(*attr); ++ ++ for (; addr < end; addr++) { ++ if (*addr) ++ return -EFBIG; ++ } ++ ++ attr->size = usize; ++ } ++ ++ ret = copy_to_user(uattr, attr, attr->size); ++ if (ret) ++ return -EFAULT; ++ ++ return 0; ++} ++ ++/** ++ * sys_sched_getattr - similar to sched_getparam, but with sched_attr ++ * @pid: the pid in question. ++ * @uattr: structure containing the extended parameters. ++ * @size: sizeof(attr) for fwd/bwd comp. ++ * @flags: for future extension. ++ */ ++SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr, ++ unsigned int, size, unsigned int, flags) ++{ ++ struct sched_attr attr = { ++ .size = sizeof(struct sched_attr), ++ }; ++ struct task_struct *p; ++ int retval; ++ ++ if (!uattr || pid < 0 || size > PAGE_SIZE || ++ size < SCHED_ATTR_SIZE_VER0 || flags) ++ return -EINVAL; ++ ++ rcu_read_lock(); ++ p = find_process_by_pid(pid); ++ retval = -ESRCH; ++ if (!p) ++ goto out_unlock; ++ ++ retval = security_task_getscheduler(p); ++ if (retval) ++ goto out_unlock; ++ ++ attr.sched_policy = p->policy; ++ if (p->sched_reset_on_fork) ++ attr.sched_flags |= SCHED_FLAG_RESET_ON_FORK; ++ if (task_has_dl_policy(p)) ++ __getparam_dl(p, &attr); ++ else if (task_has_rt_policy(p)) ++ attr.sched_priority = p->rt_priority; ++ else ++ attr.sched_nice = task_nice(p); ++ ++ rcu_read_unlock(); ++ ++ retval = sched_read_attr(uattr, &attr, size); ++ return retval; ++ ++out_unlock: ++ rcu_read_unlock(); ++ return retval; ++} ++ ++long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) ++{ ++ cpumask_var_t cpus_allowed, new_mask; ++ struct task_struct *p; ++ int retval; ++ ++ rcu_read_lock(); ++ ++ p = find_process_by_pid(pid); ++ if (!p) { ++ rcu_read_unlock(); ++ return -ESRCH; ++ } ++ ++ /* Prevent p going away */ ++ get_task_struct(p); ++ rcu_read_unlock(); ++ ++ if (p->flags & PF_NO_SETAFFINITY) { ++ retval = -EINVAL; ++ goto out_put_task; ++ } ++ if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) { ++ retval = -ENOMEM; ++ goto out_put_task; ++ } ++ if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) { ++ retval = -ENOMEM; ++ goto out_free_cpus_allowed; ++ } ++ retval = -EPERM; ++ if (!check_same_owner(p)) { ++ rcu_read_lock(); ++ if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) { ++ rcu_read_unlock(); ++ goto out_free_new_mask; ++ } ++ rcu_read_unlock(); ++ } ++ ++ retval = security_task_setscheduler(p); ++ if (retval) ++ goto out_free_new_mask; ++ ++ ++ cpuset_cpus_allowed(p, cpus_allowed); ++ cpumask_and(new_mask, in_mask, cpus_allowed); ++ ++ /* ++ * Since bandwidth control happens on root_domain basis, ++ * if admission test is enabled, we only admit -deadline ++ * tasks allowed to run on all the CPUs in the task's ++ * root_domain. ++ */ ++#ifdef CONFIG_SMP ++ if (task_has_dl_policy(p) && dl_bandwidth_enabled()) { ++ rcu_read_lock(); ++ if (!cpumask_subset(task_rq(p)->rd->span, new_mask)) { ++ retval = -EBUSY; ++ rcu_read_unlock(); ++ goto out_free_new_mask; ++ } ++ rcu_read_unlock(); ++ } ++#endif ++again: ++ retval = __set_cpus_allowed_ptr(p, new_mask, true); ++ ++ if (!retval) { ++ cpuset_cpus_allowed(p, cpus_allowed); ++ if (!cpumask_subset(new_mask, cpus_allowed)) { ++ /* ++ * We must have raced with a concurrent cpuset ++ * update. Just reset the cpus_allowed to the ++ * cpuset's cpus_allowed ++ */ ++ cpumask_copy(new_mask, cpus_allowed); ++ goto again; ++ } ++ } ++out_free_new_mask: ++ free_cpumask_var(new_mask); ++out_free_cpus_allowed: ++ free_cpumask_var(cpus_allowed); ++out_put_task: ++ put_task_struct(p); ++ return retval; ++} ++ ++static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len, ++ struct cpumask *new_mask) ++{ ++ if (len < cpumask_size()) ++ cpumask_clear(new_mask); ++ else if (len > cpumask_size()) ++ len = cpumask_size(); ++ ++ return copy_from_user(new_mask, user_mask_ptr, len) ? -EFAULT : 0; ++} ++ ++/** ++ * sys_sched_setaffinity - set the CPU affinity of a process ++ * @pid: pid of the process ++ * @len: length in bytes of the bitmask pointed to by user_mask_ptr ++ * @user_mask_ptr: user-space pointer to the new CPU mask ++ * ++ * Return: 0 on success. An error code otherwise. ++ */ ++SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len, ++ unsigned long __user *, user_mask_ptr) ++{ ++ cpumask_var_t new_mask; ++ int retval; ++ ++ if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) ++ return -ENOMEM; ++ ++ retval = get_user_cpu_mask(user_mask_ptr, len, new_mask); ++ if (retval == 0) ++ retval = sched_setaffinity(pid, new_mask); ++ free_cpumask_var(new_mask); ++ return retval; ++} ++ ++long sched_getaffinity(pid_t pid, struct cpumask *mask) ++{ ++ struct task_struct *p; ++ unsigned long flags; ++ int retval; ++ ++ rcu_read_lock(); ++ ++ retval = -ESRCH; ++ p = find_process_by_pid(pid); ++ if (!p) ++ goto out_unlock; ++ ++ retval = security_task_getscheduler(p); ++ if (retval) ++ goto out_unlock; ++ ++ raw_spin_lock_irqsave(&p->pi_lock, flags); ++ cpumask_and(mask, &p->cpus_allowed, cpu_active_mask); ++ raw_spin_unlock_irqrestore(&p->pi_lock, flags); ++ ++out_unlock: ++ rcu_read_unlock(); ++ ++ return retval; ++} ++ ++/** ++ * sys_sched_getaffinity - get the CPU affinity of a process ++ * @pid: pid of the process ++ * @len: length in bytes of the bitmask pointed to by user_mask_ptr ++ * @user_mask_ptr: user-space pointer to hold the current CPU mask ++ * ++ * Return: size of CPU mask copied to user_mask_ptr on success. An ++ * error code otherwise. ++ */ ++SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len, ++ unsigned long __user *, user_mask_ptr) ++{ ++ int ret; ++ cpumask_var_t mask; ++ ++ if ((len * BITS_PER_BYTE) < nr_cpu_ids) ++ return -EINVAL; ++ if (len & (sizeof(unsigned long)-1)) ++ return -EINVAL; ++ ++ if (!alloc_cpumask_var(&mask, GFP_KERNEL)) ++ return -ENOMEM; ++ ++ ret = sched_getaffinity(pid, mask); ++ if (ret == 0) { ++ unsigned int retlen = min(len, cpumask_size()); ++ ++ if (copy_to_user(user_mask_ptr, mask, retlen)) ++ ret = -EFAULT; ++ else ++ ret = retlen; ++ } ++ free_cpumask_var(mask); ++ ++ return ret; ++} ++ ++/** ++ * sys_sched_yield - yield the current processor to other threads. ++ * ++ * This function yields the current CPU to other tasks. If there are no ++ * other threads running on this CPU then this function will return. ++ * ++ * Return: 0. ++ */ ++static void do_sched_yield(void) ++{ ++ struct rq_flags rf; ++ struct rq *rq; ++ ++ local_irq_disable(); ++ rq = this_rq(); ++ rq_lock(rq, &rf); ++ ++ schedstat_inc(rq->yld_count); ++ current->sched_class->yield_task(rq); ++ ++ /* ++ * Since we are going to call schedule() anyway, there's ++ * no need to preempt or enable interrupts: ++ */ ++ preempt_disable(); ++ rq_unlock(rq, &rf); ++ sched_preempt_enable_no_resched(); ++ ++ schedule(); ++} ++ ++SYSCALL_DEFINE0(sched_yield) ++{ ++ do_sched_yield(); ++ return 0; ++} ++ ++#ifndef CONFIG_PREEMPT ++int __sched _cond_resched(void) ++{ ++ if (should_resched(0)) { ++ preempt_schedule_common(); ++ return 1; ++ } ++ rcu_all_qs(); ++ return 0; ++} ++EXPORT_SYMBOL(_cond_resched); ++#endif ++ ++/* ++ * __cond_resched_lock() - if a reschedule is pending, drop the given lock, ++ * call schedule, and on return reacquire the lock. ++ * ++ * This works OK both with and without CONFIG_PREEMPT. We do strange low-level ++ * operations here to prevent schedule() from being called twice (once via ++ * spin_unlock(), once by hand). ++ */ ++int __cond_resched_lock(spinlock_t *lock) ++{ ++ int resched = should_resched(PREEMPT_LOCK_OFFSET); ++ int ret = 0; ++ ++ lockdep_assert_held(lock); ++ ++ if (spin_needbreak(lock) || resched) { ++ spin_unlock(lock); ++ if (resched) ++ preempt_schedule_common(); ++ else ++ cpu_relax(); ++ ret = 1; ++ spin_lock(lock); ++ } ++ return ret; ++} ++EXPORT_SYMBOL(__cond_resched_lock); ++ ++/** ++ * yield - yield the current processor to other threads. ++ * ++ * Do not ever use this function, there's a 99% chance you're doing it wrong. ++ * ++ * The scheduler is at all times free to pick the calling task as the most ++ * eligible task to run, if removing the yield() call from your code breaks ++ * it, its already broken. ++ * ++ * Typical broken usage is: ++ * ++ * while (!event) ++ * yield(); ++ * ++ * where one assumes that yield() will let 'the other' process run that will ++ * make event true. If the current task is a SCHED_FIFO task that will never ++ * happen. Never use yield() as a progress guarantee!! ++ * ++ * If you want to use yield() to wait for something, use wait_event(). ++ * If you want to use yield() to be 'nice' for others, use cond_resched(). ++ * If you still want to use yield(), do not! ++ */ ++void __sched yield(void) ++{ ++ set_current_state(TASK_RUNNING); ++ do_sched_yield(); ++} ++EXPORT_SYMBOL(yield); ++ ++/** ++ * yield_to - yield the current processor to another thread in ++ * your thread group, or accelerate that thread toward the ++ * processor it's on. ++ * @p: target task ++ * @preempt: whether task preemption is allowed or not ++ * ++ * It's the caller's job to ensure that the target task struct ++ * can't go away on us before we can do any checks. ++ * ++ * Return: ++ * true (>0) if we indeed boosted the target task. ++ * false (0) if we failed to boost the target. ++ * -ESRCH if there's no task to yield to. ++ */ ++int __sched yield_to(struct task_struct *p, bool preempt) ++{ ++ struct task_struct *curr = current; ++ struct rq *rq, *p_rq; ++ unsigned long flags; ++ int yielded = 0; ++ ++ local_irq_save(flags); ++ rq = this_rq(); ++ ++again: ++ p_rq = task_rq(p); ++ /* ++ * If we're the only runnable task on the rq and target rq also ++ * has only one task, there's absolutely no point in yielding. ++ */ ++ if (rq->nr_running == 1 && p_rq->nr_running == 1) { ++ yielded = -ESRCH; ++ goto out_irq; ++ } ++ ++ double_rq_lock(rq, p_rq); ++ if (task_rq(p) != p_rq) { ++ double_rq_unlock(rq, p_rq); ++ goto again; ++ } ++ ++ if (!curr->sched_class->yield_to_task) ++ goto out_unlock; ++ ++ if (curr->sched_class != p->sched_class) ++ goto out_unlock; ++ ++ if (task_running(p_rq, p) || p->state) ++ goto out_unlock; ++ ++ yielded = curr->sched_class->yield_to_task(rq, p, preempt); ++ if (yielded) { ++ schedstat_inc(rq->yld_count); ++ /* ++ * Make p's CPU reschedule; pick_next_entity takes care of ++ * fairness. ++ */ ++ if (preempt && rq != p_rq) ++ resched_curr(p_rq); ++ } ++ ++out_unlock: ++ double_rq_unlock(rq, p_rq); ++out_irq: ++ local_irq_restore(flags); ++ ++ if (yielded > 0) ++ schedule(); ++ ++ return yielded; ++} ++EXPORT_SYMBOL_GPL(yield_to); ++ ++int io_schedule_prepare(void) ++{ ++ int old_iowait = current->in_iowait; ++ ++ current->in_iowait = 1; ++ blk_schedule_flush_plug(current); ++ ++ return old_iowait; ++} ++ ++void io_schedule_finish(int token) ++{ ++ current->in_iowait = token; ++} ++ ++/* ++ * This task is about to go to sleep on IO. Increment rq->nr_iowait so ++ * that process accounting knows that this is a task in IO wait state. ++ */ ++long __sched io_schedule_timeout(long timeout) ++{ ++ int token; ++ long ret; ++ ++ token = io_schedule_prepare(); ++ ret = schedule_timeout(timeout); ++ io_schedule_finish(token); ++ ++ return ret; ++} ++EXPORT_SYMBOL(io_schedule_timeout); ++ ++void __sched io_schedule(void) ++{ ++ int token; ++ ++ token = io_schedule_prepare(); ++ schedule(); ++ io_schedule_finish(token); ++} ++EXPORT_SYMBOL(io_schedule); ++ ++/** ++ * sys_sched_get_priority_max - return maximum RT priority. ++ * @policy: scheduling class. ++ * ++ * Return: On success, this syscall returns the maximum ++ * rt_priority that can be used by a given scheduling class. ++ * On failure, a negative error code is returned. ++ */ ++SYSCALL_DEFINE1(sched_get_priority_max, int, policy) ++{ ++ int ret = -EINVAL; ++ ++ switch (policy) { ++ case SCHED_FIFO: ++ case SCHED_RR: ++ ret = MAX_USER_RT_PRIO-1; ++ break; ++ case SCHED_DEADLINE: ++ case SCHED_NORMAL: ++ case SCHED_BATCH: ++ case SCHED_IDLE: ++ ret = 0; ++ break; ++ } ++ return ret; ++} ++ ++/** ++ * sys_sched_get_priority_min - return minimum RT priority. ++ * @policy: scheduling class. ++ * ++ * Return: On success, this syscall returns the minimum ++ * rt_priority that can be used by a given scheduling class. ++ * On failure, a negative error code is returned. ++ */ ++SYSCALL_DEFINE1(sched_get_priority_min, int, policy) ++{ ++ int ret = -EINVAL; ++ ++ switch (policy) { ++ case SCHED_FIFO: ++ case SCHED_RR: ++ ret = 1; ++ break; ++ case SCHED_DEADLINE: ++ case SCHED_NORMAL: ++ case SCHED_BATCH: ++ case SCHED_IDLE: ++ ret = 0; ++ } ++ return ret; ++} ++ ++static int sched_rr_get_interval(pid_t pid, struct timespec64 *t) ++{ ++ struct task_struct *p; ++ unsigned int time_slice; ++ struct rq_flags rf; ++ struct rq *rq; ++ int retval; ++ ++ if (pid < 0) ++ return -EINVAL; ++ ++ retval = -ESRCH; ++ rcu_read_lock(); ++ p = find_process_by_pid(pid); ++ if (!p) ++ goto out_unlock; ++ ++ retval = security_task_getscheduler(p); ++ if (retval) ++ goto out_unlock; ++ ++ rq = task_rq_lock(p, &rf); ++ time_slice = 0; ++ if (p->sched_class->get_rr_interval) ++ time_slice = p->sched_class->get_rr_interval(rq, p); ++ task_rq_unlock(rq, p, &rf); ++ ++ rcu_read_unlock(); ++ jiffies_to_timespec64(time_slice, t); ++ return 0; ++ ++out_unlock: ++ rcu_read_unlock(); ++ return retval; ++} ++ ++/** ++ * sys_sched_rr_get_interval - return the default timeslice of a process. ++ * @pid: pid of the process. ++ * @interval: userspace pointer to the timeslice value. ++ * ++ * this syscall writes the default timeslice value of a given process ++ * into the user-space timespec buffer. A value of '0' means infinity. ++ * ++ * Return: On success, 0 and the timeslice is in @interval. Otherwise, ++ * an error code. ++ */ ++SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid, ++ struct timespec __user *, interval) ++{ ++ struct timespec64 t; ++ int retval = sched_rr_get_interval(pid, &t); ++ ++ if (retval == 0) ++ retval = put_timespec64(&t, interval); ++ ++ return retval; ++} ++ ++#ifdef CONFIG_COMPAT ++COMPAT_SYSCALL_DEFINE2(sched_rr_get_interval, ++ compat_pid_t, pid, ++ struct compat_timespec __user *, interval) ++{ ++ struct timespec64 t; ++ int retval = sched_rr_get_interval(pid, &t); ++ ++ if (retval == 0) ++ retval = compat_put_timespec64(&t, interval); ++ return retval; ++} ++#endif ++ ++void sched_show_task(struct task_struct *p) ++{ ++ unsigned long free = 0; ++ int ppid; ++ ++ if (!try_get_task_stack(p)) ++ return; ++ ++ printk(KERN_INFO "%-15.15s %c", p->comm, task_state_to_char(p)); ++ ++ if (p->state == TASK_RUNNING) ++ printk(KERN_CONT " running task "); ++#ifdef CONFIG_DEBUG_STACK_USAGE ++ free = stack_not_used(p); ++#endif ++ ppid = 0; ++ rcu_read_lock(); ++ if (pid_alive(p)) ++ ppid = task_pid_nr(rcu_dereference(p->real_parent)); ++ rcu_read_unlock(); ++ printk(KERN_CONT "%5lu %5d %6d 0x%08lx\n", free, ++ task_pid_nr(p), ppid, ++ (unsigned long)task_thread_info(p)->flags); ++ ++ print_worker_info(KERN_INFO, p); ++ show_stack(p, NULL); ++ put_task_stack(p); ++} ++EXPORT_SYMBOL_GPL(sched_show_task); ++ ++static inline bool ++state_filter_match(unsigned long state_filter, struct task_struct *p) ++{ ++ /* no filter, everything matches */ ++ if (!state_filter) ++ return true; ++ ++ /* filter, but doesn't match */ ++ if (!(p->state & state_filter)) ++ return false; ++ ++ /* ++ * When looking for TASK_UNINTERRUPTIBLE skip TASK_IDLE (allows ++ * TASK_KILLABLE). ++ */ ++ if (state_filter == TASK_UNINTERRUPTIBLE && p->state == TASK_IDLE) ++ return false; ++ ++ return true; ++} ++ ++ ++void show_state_filter(unsigned long state_filter) ++{ ++ struct task_struct *g, *p; ++ ++#if BITS_PER_LONG == 32 ++ printk(KERN_INFO ++ " task PC stack pid father\n"); ++#else ++ printk(KERN_INFO ++ " task PC stack pid father\n"); ++#endif ++ rcu_read_lock(); ++ for_each_process_thread(g, p) { ++ /* ++ * reset the NMI-timeout, listing all files on a slow ++ * console might take a lot of time: ++ * Also, reset softlockup watchdogs on all CPUs, because ++ * another CPU might be blocked waiting for us to process ++ * an IPI. ++ */ ++ touch_nmi_watchdog(); ++ touch_all_softlockup_watchdogs(); ++ if (state_filter_match(state_filter, p)) ++ sched_show_task(p); ++ } ++ ++#ifdef CONFIG_SCHED_DEBUG ++ if (!state_filter) ++ sysrq_sched_debug_show(); ++#endif ++ rcu_read_unlock(); ++ /* ++ * Only show locks if all tasks are dumped: ++ */ ++ if (!state_filter) ++ debug_show_all_locks(); ++} ++ ++/** ++ * init_idle - set up an idle thread for a given CPU ++ * @idle: task in question ++ * @cpu: CPU the idle task belongs to ++ * ++ * NOTE: this function does not set the idle thread's NEED_RESCHED ++ * flag, to make booting more robust. ++ */ ++void init_idle(struct task_struct *idle, int cpu) ++{ ++ struct rq *rq = cpu_rq(cpu); ++ unsigned long flags; ++ ++ __sched_fork(0, idle); ++ ++ raw_spin_lock_irqsave(&idle->pi_lock, flags); ++ raw_spin_lock(&rq->lock); ++ ++ idle->state = TASK_RUNNING; ++ idle->se.exec_start = sched_clock(); ++ idle->flags |= PF_IDLE; ++ ++ kasan_unpoison_task_stack(idle); ++ ++#ifdef CONFIG_SMP ++ /* ++ * Its possible that init_idle() gets called multiple times on a task, ++ * in that case do_set_cpus_allowed() will not do the right thing. ++ * ++ * And since this is boot we can forgo the serialization. ++ */ ++ set_cpus_allowed_common(idle, cpumask_of(cpu)); ++#endif ++ /* ++ * We're having a chicken and egg problem, even though we are ++ * holding rq->lock, the CPU isn't yet set to this CPU so the ++ * lockdep check in task_group() will fail. ++ * ++ * Similar case to sched_fork(). / Alternatively we could ++ * use task_rq_lock() here and obtain the other rq->lock. ++ * ++ * Silence PROVE_RCU ++ */ ++ rcu_read_lock(); ++ __set_task_cpu(idle, cpu); ++ rcu_read_unlock(); ++ ++ rq->curr = rq->idle = idle; ++ idle->on_rq = TASK_ON_RQ_QUEUED; ++#ifdef CONFIG_SMP ++ idle->on_cpu = 1; ++#endif ++ raw_spin_unlock(&rq->lock); ++ raw_spin_unlock_irqrestore(&idle->pi_lock, flags); ++ ++ /* Set the preempt count _outside_ the spinlocks! */ ++ init_idle_preempt_count(idle, cpu); ++ ++ /* ++ * The idle tasks have their own, simple scheduling class: ++ */ ++ idle->sched_class = &idle_sched_class; ++ ftrace_graph_init_idle_task(idle, cpu); ++ vtime_init_idle(idle, cpu); ++#ifdef CONFIG_SMP ++ sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu); ++#endif ++} ++ ++#ifdef CONFIG_SMP ++ ++int cpuset_cpumask_can_shrink(const struct cpumask *cur, ++ const struct cpumask *trial) ++{ ++ int ret = 1; ++ ++ if (!cpumask_weight(cur)) ++ return ret; ++ ++ ret = dl_cpuset_cpumask_can_shrink(cur, trial); ++ ++ return ret; ++} ++ ++int task_can_attach(struct task_struct *p, ++ const struct cpumask *cs_cpus_allowed) ++{ ++ int ret = 0; ++ ++ /* ++ * Kthreads which disallow setaffinity shouldn't be moved ++ * to a new cpuset; we don't want to change their CPU ++ * affinity and isolating such threads by their set of ++ * allowed nodes is unnecessary. Thus, cpusets are not ++ * applicable for such threads. This prevents checking for ++ * success of set_cpus_allowed_ptr() on all attached tasks ++ * before cpus_allowed may be changed. ++ */ ++ if (p->flags & PF_NO_SETAFFINITY) { ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ if (dl_task(p) && !cpumask_intersects(task_rq(p)->rd->span, ++ cs_cpus_allowed)) ++ ret = dl_task_can_attach(p, cs_cpus_allowed); ++ ++out: ++ return ret; ++} ++ ++bool sched_smp_initialized __read_mostly; ++ ++#ifdef CONFIG_NUMA_BALANCING ++/* Migrate current task p to target_cpu */ ++int migrate_task_to(struct task_struct *p, int target_cpu) ++{ ++ struct migration_arg arg = { p, target_cpu }; ++ int curr_cpu = task_cpu(p); ++ ++ if (curr_cpu == target_cpu) ++ return 0; ++ ++ if (!cpumask_test_cpu(target_cpu, &p->cpus_allowed)) ++ return -EINVAL; ++ ++ /* TODO: This is not properly updating schedstats */ ++ ++ trace_sched_move_numa(p, curr_cpu, target_cpu); ++ return stop_one_cpu(curr_cpu, migration_cpu_stop, &arg); ++} ++ ++/* ++ * Requeue a task on a given node and accurately track the number of NUMA ++ * tasks on the runqueues ++ */ ++void sched_setnuma(struct task_struct *p, int nid) ++{ ++ bool queued, running; ++ struct rq_flags rf; ++ struct rq *rq; ++ ++ rq = task_rq_lock(p, &rf); ++ queued = task_on_rq_queued(p); ++ running = task_current(rq, p); ++ ++ if (queued) ++ dequeue_task(rq, p, DEQUEUE_SAVE); ++ if (running) ++ put_prev_task(rq, p); ++ ++ p->numa_preferred_nid = nid; ++ ++ if (queued) ++ enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK); ++ if (running) ++ set_curr_task(rq, p); ++ task_rq_unlock(rq, p, &rf); ++} ++#endif /* CONFIG_NUMA_BALANCING */ ++ ++#ifdef CONFIG_HOTPLUG_CPU ++/* ++ * Ensure that the idle task is using init_mm right before its CPU goes ++ * offline. ++ */ ++void idle_task_exit(void) ++{ ++ struct mm_struct *mm = current->active_mm; ++ ++ BUG_ON(cpu_online(smp_processor_id())); ++ BUG_ON(current != this_rq()->idle); ++ ++ if (mm != &init_mm) { ++ switch_mm(mm, &init_mm, current); ++ finish_arch_post_lock_switch(); ++ } ++ ++ /* finish_cpu(), as ran on the BP, will clean up the active_mm state */ ++} ++ ++/* ++ * Since this CPU is going 'away' for a while, fold any nr_active delta ++ * we might have. Assumes we're called after migrate_tasks() so that the ++ * nr_active count is stable. We need to take the teardown thread which ++ * is calling this into account, so we hand in adjust = 1 to the load ++ * calculation. ++ * ++ * Also see the comment "Global load-average calculations". ++ */ ++static void calc_load_migrate(struct rq *rq) ++{ ++ long delta = calc_load_fold_active(rq, 1); ++ if (delta) ++ atomic_long_add(delta, &calc_load_tasks); ++} ++ ++static void put_prev_task_fake(struct rq *rq, struct task_struct *prev) ++{ ++} ++ ++static const struct sched_class fake_sched_class = { ++ .put_prev_task = put_prev_task_fake, ++}; ++ ++static struct task_struct fake_task = { ++ /* ++ * Avoid pull_{rt,dl}_task() ++ */ ++ .prio = MAX_PRIO + 1, ++ .sched_class = &fake_sched_class, ++}; ++ ++/* ++ * Migrate all tasks from the rq, sleeping tasks will be migrated by ++ * try_to_wake_up()->select_task_rq(). ++ * ++ * Called with rq->lock held even though we'er in stop_machine() and ++ * there's no concurrency possible, we hold the required locks anyway ++ * because of lock validation efforts. ++ */ ++static void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf) ++{ ++ struct rq *rq = dead_rq; ++ struct task_struct *next, *stop = rq->stop; ++ struct rq_flags orf = *rf; ++ int dest_cpu; ++ ++ /* ++ * Fudge the rq selection such that the below task selection loop ++ * doesn't get stuck on the currently eligible stop task. ++ * ++ * We're currently inside stop_machine() and the rq is either stuck ++ * in the stop_machine_cpu_stop() loop, or we're executing this code, ++ * either way we should never end up calling schedule() until we're ++ * done here. ++ */ ++ rq->stop = NULL; ++ ++ /* ++ * put_prev_task() and pick_next_task() sched ++ * class method both need to have an up-to-date ++ * value of rq->clock[_task] ++ */ ++ update_rq_clock(rq); ++ ++ for (;;) { ++ /* ++ * There's this thread running, bail when that's the only ++ * remaining thread: ++ */ ++ if (rq->nr_running == 1) ++ break; ++ ++ /* ++ * pick_next_task() assumes pinned rq->lock: ++ */ ++ next = pick_next_task(rq, &fake_task, rf); ++ BUG_ON(!next); ++ put_prev_task(rq, next); ++ ++ /* ++ * Rules for changing task_struct::cpus_allowed are holding ++ * both pi_lock and rq->lock, such that holding either ++ * stabilizes the mask. ++ * ++ * Drop rq->lock is not quite as disastrous as it usually is ++ * because !cpu_active at this point, which means load-balance ++ * will not interfere. Also, stop-machine. ++ */ ++ rq_unlock(rq, rf); ++ raw_spin_lock(&next->pi_lock); ++ rq_relock(rq, rf); ++ ++ /* ++ * Since we're inside stop-machine, _nothing_ should have ++ * changed the task, WARN if weird stuff happened, because in ++ * that case the above rq->lock drop is a fail too. ++ */ ++ if (WARN_ON(task_rq(next) != rq || !task_on_rq_queued(next))) { ++ raw_spin_unlock(&next->pi_lock); ++ continue; ++ } ++ ++ /* Find suitable destination for @next, with force if needed. */ ++ dest_cpu = select_fallback_rq(dead_rq->cpu, next); ++ rq = __migrate_task(rq, rf, next, dest_cpu); ++ if (rq != dead_rq) { ++ rq_unlock(rq, rf); ++ rq = dead_rq; ++ *rf = orf; ++ rq_relock(rq, rf); ++ } ++ raw_spin_unlock(&next->pi_lock); ++ } ++ ++ rq->stop = stop; ++} ++#endif /* CONFIG_HOTPLUG_CPU */ ++ ++void set_rq_online(struct rq *rq) ++{ ++ if (!rq->online) { ++ const struct sched_class *class; ++ ++ cpumask_set_cpu(rq->cpu, rq->rd->online); ++ rq->online = 1; ++ ++ for_each_class(class) { ++ if (class->rq_online) ++ class->rq_online(rq); ++ } ++ } ++} ++ ++void set_rq_offline(struct rq *rq) ++{ ++ if (rq->online) { ++ const struct sched_class *class; ++ ++ for_each_class(class) { ++ if (class->rq_offline) ++ class->rq_offline(rq); ++ } ++ ++ cpumask_clear_cpu(rq->cpu, rq->rd->online); ++ rq->online = 0; ++ } ++} ++ ++/* ++ * used to mark begin/end of suspend/resume: ++ */ ++static int num_cpus_frozen; ++ ++/* ++ * Update cpusets according to cpu_active mask. If cpusets are ++ * disabled, cpuset_update_active_cpus() becomes a simple wrapper ++ * around partition_sched_domains(). ++ * ++ * If we come here as part of a suspend/resume, don't touch cpusets because we ++ * want to restore it back to its original state upon resume anyway. ++ */ ++static void cpuset_cpu_active(void) ++{ ++ if (cpuhp_tasks_frozen) { ++ /* ++ * num_cpus_frozen tracks how many CPUs are involved in suspend ++ * resume sequence. As long as this is not the last online ++ * operation in the resume sequence, just build a single sched ++ * domain, ignoring cpusets. ++ */ ++ partition_sched_domains(1, NULL, NULL); ++ if (--num_cpus_frozen) ++ return; ++ /* ++ * This is the last CPU online operation. So fall through and ++ * restore the original sched domains by considering the ++ * cpuset configurations. ++ */ ++ cpuset_force_rebuild(); ++ } ++ cpuset_update_active_cpus(); ++} ++ ++static int cpuset_cpu_inactive(unsigned int cpu) ++{ ++ if (!cpuhp_tasks_frozen) { ++ if (dl_cpu_busy(cpu)) ++ return -EBUSY; ++ cpuset_update_active_cpus(); ++ } else { ++ num_cpus_frozen++; ++ partition_sched_domains(1, NULL, NULL); ++ } ++ return 0; ++} ++ ++int sched_cpu_activate(unsigned int cpu) ++{ ++ struct rq *rq = cpu_rq(cpu); ++ struct rq_flags rf; ++ ++#ifdef CONFIG_SCHED_SMT ++ /* ++ * When going up, increment the number of cores with SMT present. ++ */ ++ if (cpumask_weight(cpu_smt_mask(cpu)) == 2) ++ static_branch_inc_cpuslocked(&sched_smt_present); ++#endif ++ set_cpu_active(cpu, true); ++ ++ if (sched_smp_initialized) { ++ sched_domains_numa_masks_set(cpu); ++ cpuset_cpu_active(); ++ } ++ ++ /* ++ * Put the rq online, if not already. This happens: ++ * ++ * 1) In the early boot process, because we build the real domains ++ * after all CPUs have been brought up. ++ * ++ * 2) At runtime, if cpuset_cpu_active() fails to rebuild the ++ * domains. ++ */ ++ rq_lock_irqsave(rq, &rf); ++ if (rq->rd) { ++ BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); ++ set_rq_online(rq); ++ } ++ rq_unlock_irqrestore(rq, &rf); ++ ++ update_max_interval(); ++ ++ return 0; ++} ++ ++int sched_cpu_deactivate(unsigned int cpu) ++{ ++ int ret; ++ ++ set_cpu_active(cpu, false); ++ /* ++ * We've cleared cpu_active_mask, wait for all preempt-disabled and RCU ++ * users of this state to go away such that all new such users will ++ * observe it. ++ * ++ * Do sync before park smpboot threads to take care the rcu boost case. ++ */ ++ synchronize_rcu_mult(call_rcu, call_rcu_sched); ++ ++#ifdef CONFIG_SCHED_SMT ++ /* ++ * When going down, decrement the number of cores with SMT present. ++ */ ++ if (cpumask_weight(cpu_smt_mask(cpu)) == 2) ++ static_branch_dec_cpuslocked(&sched_smt_present); ++#endif ++ ++ if (!sched_smp_initialized) ++ return 0; ++ ++ ret = cpuset_cpu_inactive(cpu); ++ if (ret) { ++ set_cpu_active(cpu, true); ++ return ret; ++ } ++ sched_domains_numa_masks_clear(cpu); ++ return 0; ++} ++ ++static void sched_rq_cpu_starting(unsigned int cpu) ++{ ++ struct rq *rq = cpu_rq(cpu); ++ ++ rq->calc_load_update = calc_load_update; ++ update_max_interval(); ++} ++ ++int sched_cpu_starting(unsigned int cpu) ++{ ++ sched_rq_cpu_starting(cpu); ++ sched_tick_start(cpu); ++ return 0; ++} ++ ++#ifdef CONFIG_HOTPLUG_CPU ++int sched_cpu_dying(unsigned int cpu) ++{ ++ struct rq *rq = cpu_rq(cpu); ++ struct rq_flags rf; ++ ++ /* Handle pending wakeups and then migrate everything off */ ++ sched_ttwu_pending(); ++ sched_tick_stop(cpu); ++ ++ rq_lock_irqsave(rq, &rf); ++ if (rq->rd) { ++ BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); ++ set_rq_offline(rq); ++ } ++ migrate_tasks(rq, &rf); ++ BUG_ON(rq->nr_running != 1); ++ rq_unlock_irqrestore(rq, &rf); ++ ++ calc_load_migrate(rq); ++ update_max_interval(); ++ nohz_balance_exit_idle(rq); ++ hrtick_clear(rq); ++ return 0; ++} ++#endif ++ ++void __init sched_init_smp(void) ++{ ++ sched_init_numa(); ++ ++ /* ++ * There's no userspace yet to cause hotplug operations; hence all the ++ * CPU masks are stable and all blatant races in the below code cannot ++ * happen. The hotplug lock is nevertheless taken to satisfy lockdep, ++ * but there won't be any contention on it. ++ */ ++ cpus_read_lock(); ++ mutex_lock(&sched_domains_mutex); ++ sched_init_domains(cpu_active_mask); ++ mutex_unlock(&sched_domains_mutex); ++ cpus_read_unlock(); ++ ++ /* Move init over to a non-isolated CPU */ ++ if (set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_DOMAIN)) < 0) ++ BUG(); ++ sched_init_granularity(); ++ ++ init_sched_rt_class(); ++ init_sched_dl_class(); ++ ++ sched_smp_initialized = true; ++} ++ ++static int __init migration_init(void) ++{ ++ sched_rq_cpu_starting(smp_processor_id()); ++ return 0; ++} ++early_initcall(migration_init); ++ ++#else ++void __init sched_init_smp(void) ++{ ++ sched_init_granularity(); ++} ++#endif /* CONFIG_SMP */ ++ ++int in_sched_functions(unsigned long addr) ++{ ++ return in_lock_functions(addr) || ++ (addr >= (unsigned long)__sched_text_start ++ && addr < (unsigned long)__sched_text_end); ++} ++ ++#ifdef CONFIG_CGROUP_SCHED ++/* ++ * Default task group. ++ * Every task in system belongs to this group at bootup. ++ */ ++struct task_group root_task_group; ++LIST_HEAD(task_groups); ++ ++/* Cacheline aligned slab cache for task_group */ ++static struct kmem_cache *task_group_cache __read_mostly; ++#endif ++ ++DECLARE_PER_CPU(cpumask_var_t, load_balance_mask); ++DECLARE_PER_CPU(cpumask_var_t, select_idle_mask); ++ ++static __init void rq_cputime_init(void) ++{ ++ int cpu; ++ struct rq_cputime *rq_cputime; ++ ++ for_each_possible_cpu(cpu) { ++ rq_cputime = &per_cpu(rq_cputimes, cpu); ++ raw_spin_lock_init(&rq_cputime->lock); ++ } ++} ++ ++void __init sched_init(void) ++{ ++ int i, j; ++ unsigned long alloc_size = 0, ptr; ++ ++ wait_bit_init(); ++ ++#ifdef CONFIG_FAIR_GROUP_SCHED ++ alloc_size += 2 * nr_cpu_ids * sizeof(void **); ++#endif ++#ifdef CONFIG_RT_GROUP_SCHED ++ alloc_size += 2 * nr_cpu_ids * sizeof(void **); ++#endif ++ if (alloc_size) { ++ ptr = (unsigned long)kzalloc(alloc_size, GFP_NOWAIT); ++ ++#ifdef CONFIG_FAIR_GROUP_SCHED ++ root_task_group.se = (struct sched_entity **)ptr; ++ ptr += nr_cpu_ids * sizeof(void **); ++ ++ root_task_group.cfs_rq = (struct cfs_rq **)ptr; ++ ptr += nr_cpu_ids * sizeof(void **); ++ ++#endif /* CONFIG_FAIR_GROUP_SCHED */ ++#ifdef CONFIG_RT_GROUP_SCHED ++ root_task_group.rt_se = (struct sched_rt_entity **)ptr; ++ ptr += nr_cpu_ids * sizeof(void **); ++ ++ root_task_group.rt_rq = (struct rt_rq **)ptr; ++ ptr += nr_cpu_ids * sizeof(void **); ++ ++#endif /* CONFIG_RT_GROUP_SCHED */ ++ } ++#ifdef CONFIG_CPUMASK_OFFSTACK ++ for_each_possible_cpu(i) { ++ per_cpu(load_balance_mask, i) = (cpumask_var_t)kzalloc_node( ++ cpumask_size(), GFP_KERNEL, cpu_to_node(i)); ++ per_cpu(select_idle_mask, i) = (cpumask_var_t)kzalloc_node( ++ cpumask_size(), GFP_KERNEL, cpu_to_node(i)); ++ } ++#endif /* CONFIG_CPUMASK_OFFSTACK */ ++ ++ init_rt_bandwidth(&def_rt_bandwidth, global_rt_period(), global_rt_runtime()); ++ init_dl_bandwidth(&def_dl_bandwidth, global_rt_period(), global_rt_runtime()); ++ ++#ifdef CONFIG_SMP ++ init_defrootdomain(); ++#endif ++ ++#ifdef CONFIG_RT_GROUP_SCHED ++ init_rt_bandwidth(&root_task_group.rt_bandwidth, ++ global_rt_period(), global_rt_runtime()); ++#endif /* CONFIG_RT_GROUP_SCHED */ ++ ++#ifdef CONFIG_CGROUP_SCHED ++ task_group_cache = KMEM_CACHE(task_group, 0); ++ ++ list_add(&root_task_group.list, &task_groups); ++ INIT_LIST_HEAD(&root_task_group.children); ++ INIT_LIST_HEAD(&root_task_group.siblings); ++ autogroup_init(&init_task); ++#endif /* CONFIG_CGROUP_SCHED */ ++ ++ for_each_possible_cpu(i) { ++ struct rq *rq; ++ ++ rq = cpu_rq(i); ++ raw_spin_lock_init(&rq->lock); ++ rq->nr_running = 0; ++ rq->calc_load_active = 0; ++ rq->calc_load_update = jiffies + LOAD_FREQ; ++ init_cfs_rq(&rq->cfs); ++ init_rt_rq(&rq->rt); ++ init_dl_rq(&rq->dl); ++#ifdef CONFIG_FAIR_GROUP_SCHED ++ root_task_group.shares = ROOT_TASK_GROUP_LOAD; ++ INIT_LIST_HEAD(&rq->leaf_cfs_rq_list); ++ rq->tmp_alone_branch = &rq->leaf_cfs_rq_list; ++ /* ++ * How much CPU bandwidth does root_task_group get? ++ * ++ * In case of task-groups formed thr' the cgroup filesystem, it ++ * gets 100% of the CPU resources in the system. This overall ++ * system CPU resource is divided among the tasks of ++ * root_task_group and its child task-groups in a fair manner, ++ * based on each entity's (task or task-group's) weight ++ * (se->load.weight). ++ * ++ * In other words, if root_task_group has 10 tasks of weight ++ * 1024) and two child groups A0 and A1 (of weight 1024 each), ++ * then A0's share of the CPU resource is: ++ * ++ * A0's bandwidth = 1024 / (10*1024 + 1024 + 1024) = 8.33% ++ * ++ * We achieve this by letting root_task_group's tasks sit ++ * directly in rq->cfs (i.e root_task_group->se[] = NULL). ++ */ ++ init_cfs_bandwidth(&root_task_group.cfs_bandwidth); ++ init_tg_cfs_entry(&root_task_group, &rq->cfs, NULL, i, NULL); ++#endif /* CONFIG_FAIR_GROUP_SCHED */ ++ ++ rq->rt.rt_runtime = def_rt_bandwidth.rt_runtime; ++#ifdef CONFIG_RT_GROUP_SCHED ++ init_tg_rt_entry(&root_task_group, &rq->rt, NULL, i, NULL); ++#endif ++ ++ for (j = 0; j < CPU_LOAD_IDX_MAX; j++) ++ rq->cpu_load[j] = 0; ++ ++#ifdef CONFIG_SMP ++ rq->sd = NULL; ++ rq->rd = NULL; ++ rq->cpu_capacity = rq->cpu_capacity_orig = SCHED_CAPACITY_SCALE; ++ rq->balance_callback = NULL; ++ rq->active_balance = 0; ++ rq->next_balance = jiffies; ++ rq->push_cpu = 0; ++ rq->cpu = i; ++ rq->online = 0; ++ rq->idle_stamp = 0; ++ rq->avg_idle = 2*sysctl_sched_migration_cost; ++ rq->max_idle_balance_cost = sysctl_sched_migration_cost; ++ ++ INIT_LIST_HEAD(&rq->cfs_tasks); ++ ++ rq_attach_root(rq, &def_root_domain); ++#ifdef CONFIG_NO_HZ_COMMON ++ rq->last_load_update_tick = jiffies; ++ rq->last_blocked_load_update_tick = jiffies; ++ atomic_set(&rq->nohz_flags, 0); ++#endif ++#endif /* CONFIG_SMP */ ++ hrtick_rq_init(rq); ++ atomic_set(&rq->nr_iowait, 0); ++ } ++ ++ set_load_weight(&init_task, false); ++ ++ /* ++ * The boot idle thread does lazy MMU switching as well: ++ */ ++ mmgrab(&init_mm); ++ enter_lazy_tlb(&init_mm, current); ++ ++ /* ++ * Make us the idle thread. Technically, schedule() should not be ++ * called from this thread, however somewhere below it might be, ++ * but because we are the idle thread, we just pick up running again ++ * when this runqueue becomes "idle". ++ */ ++ init_idle(current, smp_processor_id()); ++ ++ calc_load_update = jiffies + LOAD_FREQ; ++ ++#ifdef CONFIG_SMP ++ idle_thread_set_boot_cpu(); ++#endif ++ init_sched_fair_class(); ++ ++ init_schedstats(); ++ ++ if (use_sched_idle_time) ++ rq_cputime_init(); ++ ++ scheduler_running = 1; ++} ++ ++#ifdef CONFIG_DEBUG_ATOMIC_SLEEP ++static inline int preempt_count_equals(int preempt_offset) ++{ ++ int nested = preempt_count() + rcu_preempt_depth(); ++ ++ return (nested == preempt_offset); ++} ++ ++void __might_sleep(const char *file, int line, int preempt_offset) ++{ ++ /* ++ * Blocking primitives will set (and therefore destroy) current->state, ++ * since we will exit with TASK_RUNNING make sure we enter with it, ++ * otherwise we will destroy state. ++ */ ++ WARN_ONCE(current->state != TASK_RUNNING && current->task_state_change, ++ "do not call blocking ops when !TASK_RUNNING; " ++ "state=%lx set at [<%p>] %pS\n", ++ current->state, ++ (void *)current->task_state_change, ++ (void *)current->task_state_change); ++ ++ ___might_sleep(file, line, preempt_offset); ++} ++EXPORT_SYMBOL(__might_sleep); ++ ++void ___might_sleep(const char *file, int line, int preempt_offset) ++{ ++ /* Ratelimiting timestamp: */ ++ static unsigned long prev_jiffy; ++ ++ unsigned long preempt_disable_ip; ++ ++ /* WARN_ON_ONCE() by default, no rate limit required: */ ++ rcu_sleep_check(); ++ ++ if ((preempt_count_equals(preempt_offset) && !irqs_disabled() && ++ !is_idle_task(current)) || ++ system_state == SYSTEM_BOOTING || system_state > SYSTEM_RUNNING || ++ oops_in_progress) ++ return; ++ ++ if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy) ++ return; ++ prev_jiffy = jiffies; ++ ++ /* Save this before calling printk(), since that will clobber it: */ ++ preempt_disable_ip = get_preempt_disable_ip(current); ++ ++ printk(KERN_ERR ++ "BUG: sleeping function called from invalid context at %s:%d\n", ++ file, line); ++ printk(KERN_ERR ++ "in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n", ++ in_atomic(), irqs_disabled(), ++ current->pid, current->comm); ++ ++ if (task_stack_end_corrupted(current)) ++ printk(KERN_EMERG "Thread overran stack, or stack corrupted\n"); ++ ++ debug_show_held_locks(current); ++ if (irqs_disabled()) ++ print_irqtrace_events(current); ++ if (IS_ENABLED(CONFIG_DEBUG_PREEMPT) ++ && !preempt_count_equals(preempt_offset)) { ++ pr_err("Preemption disabled at:"); ++ print_ip_sym(preempt_disable_ip); ++ pr_cont("\n"); ++ } ++ dump_stack(); ++ add_taint(TAINT_WARN, LOCKDEP_STILL_OK); ++} ++EXPORT_SYMBOL(___might_sleep); ++#endif ++ ++#ifdef CONFIG_MAGIC_SYSRQ ++void normalize_rt_tasks(void) ++{ ++ struct task_struct *g, *p; ++ struct sched_attr attr = { ++ .sched_policy = SCHED_NORMAL, ++ }; ++ ++ read_lock(&tasklist_lock); ++ for_each_process_thread(g, p) { ++ /* ++ * Only normalize user tasks: ++ */ ++ if (p->flags & PF_KTHREAD) ++ continue; ++ ++ p->se.exec_start = 0; ++ schedstat_set(p->se.statistics.wait_start, 0); ++ schedstat_set(p->se.statistics.sleep_start, 0); ++ schedstat_set(p->se.statistics.block_start, 0); ++ ++ if (!dl_task(p) && !rt_task(p)) { ++ /* ++ * Renice negative nice level userspace ++ * tasks back to 0: ++ */ ++ if (task_nice(p) < 0) ++ set_user_nice(p, 0); ++ continue; ++ } ++ ++ __sched_setscheduler(p, &attr, false, false); ++ } ++ read_unlock(&tasklist_lock); ++} ++ ++#endif /* CONFIG_MAGIC_SYSRQ */ ++ ++#if defined(CONFIG_IA64) || defined(CONFIG_KGDB_KDB) ++/* ++ * These functions are only useful for the IA64 MCA handling, or kdb. ++ * ++ * They can only be called when the whole system has been ++ * stopped - every CPU needs to be quiescent, and no scheduling ++ * activity can take place. Using them for anything else would ++ * be a serious bug, and as a result, they aren't even visible ++ * under any other configuration. ++ */ ++ ++/** ++ * curr_task - return the current task for a given CPU. ++ * @cpu: the processor in question. ++ * ++ * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED! ++ * ++ * Return: The current task for @cpu. ++ */ ++struct task_struct *curr_task(int cpu) ++{ ++ return cpu_curr(cpu); ++} ++ ++#endif /* defined(CONFIG_IA64) || defined(CONFIG_KGDB_KDB) */ ++ ++#ifdef CONFIG_IA64 ++/** ++ * set_curr_task - set the current task for a given CPU. ++ * @cpu: the processor in question. ++ * @p: the task pointer to set. ++ * ++ * Description: This function must only be used when non-maskable interrupts ++ * are serviced on a separate stack. It allows the architecture to switch the ++ * notion of the current task on a CPU in a non-blocking manner. This function ++ * must be called with all CPU's synchronized, and interrupts disabled, the ++ * and caller must save the original value of the current task (see ++ * curr_task() above) and restore that value before reenabling interrupts and ++ * re-starting the system. ++ * ++ * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED! ++ */ ++void ia64_set_curr_task(int cpu, struct task_struct *p) ++{ ++ cpu_curr(cpu) = p; ++} ++ ++#endif ++ ++#ifdef CONFIG_CGROUP_SCHED ++/* task_group_lock serializes the addition/removal of task groups */ ++static DEFINE_SPINLOCK(task_group_lock); ++ ++static void sched_free_group(struct task_group *tg) ++{ ++ free_fair_sched_group(tg); ++ free_rt_sched_group(tg); ++ autogroup_free(tg); ++ kmem_cache_free(task_group_cache, tg); ++} ++ ++/* allocate runqueue etc for a new task group */ ++struct task_group *sched_create_group(struct task_group *parent) ++{ ++ struct task_group *tg; ++ ++ tg = kmem_cache_alloc(task_group_cache, GFP_KERNEL | __GFP_ZERO); ++ if (!tg) ++ return ERR_PTR(-ENOMEM); ++ ++ if (!alloc_fair_sched_group(tg, parent)) ++ goto err; ++ ++ if (!alloc_rt_sched_group(tg, parent)) ++ goto err; ++ ++ return tg; ++ ++err: ++ sched_free_group(tg); ++ return ERR_PTR(-ENOMEM); ++} ++ ++void sched_online_group(struct task_group *tg, struct task_group *parent) ++{ ++ unsigned long flags; ++ ++ spin_lock_irqsave(&task_group_lock, flags); ++ list_add_rcu(&tg->list, &task_groups); ++ ++ /* Root should already exist: */ ++ WARN_ON(!parent); ++ ++ tg->parent = parent; ++ INIT_LIST_HEAD(&tg->children); ++ list_add_rcu(&tg->siblings, &parent->children); ++ spin_unlock_irqrestore(&task_group_lock, flags); ++ ++ online_fair_sched_group(tg); ++} ++ ++/* rcu callback to free various structures associated with a task group */ ++static void sched_free_group_rcu(struct rcu_head *rhp) ++{ ++ /* Now it should be safe to free those cfs_rqs: */ ++ sched_free_group(container_of(rhp, struct task_group, rcu)); ++} ++ ++void sched_destroy_group(struct task_group *tg) ++{ ++ /* Wait for possible concurrent references to cfs_rqs complete: */ ++ call_rcu(&tg->rcu, sched_free_group_rcu); ++} ++ ++void sched_offline_group(struct task_group *tg) ++{ ++ unsigned long flags; ++ ++ /* End participation in shares distribution: */ ++ unregister_fair_sched_group(tg); ++ ++ spin_lock_irqsave(&task_group_lock, flags); ++ list_del_rcu(&tg->list); ++ list_del_rcu(&tg->siblings); ++ spin_unlock_irqrestore(&task_group_lock, flags); ++} ++ ++static void sched_change_group(struct task_struct *tsk, int type) ++{ ++ struct task_group *tg; ++ ++ /* ++ * All callers are synchronized by task_rq_lock(); we do not use RCU ++ * which is pointless here. Thus, we pass "true" to task_css_check() ++ * to prevent lockdep warnings. ++ */ ++ tg = container_of(task_css_check(tsk, cpu_cgrp_id, true), ++ struct task_group, css); ++ tg = autogroup_task_group(tsk, tg); ++ tsk->sched_task_group = tg; ++ ++#ifdef CONFIG_FAIR_GROUP_SCHED ++ if (tsk->sched_class->task_change_group) ++ tsk->sched_class->task_change_group(tsk, type); ++ else ++#endif ++ set_task_rq(tsk, task_cpu(tsk)); ++} ++ ++/* ++ * Change task's runqueue when it moves between groups. ++ * ++ * The caller of this function should have put the task in its new group by ++ * now. This function just updates tsk->se.cfs_rq and tsk->se.parent to reflect ++ * its new group. ++ */ ++void sched_move_task(struct task_struct *tsk) ++{ ++ int queued, running, queue_flags = ++ DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK; ++ struct rq_flags rf; ++ struct rq *rq; ++ ++ rq = task_rq_lock(tsk, &rf); ++ update_rq_clock(rq); ++ ++ running = task_current(rq, tsk); ++ queued = task_on_rq_queued(tsk); ++ ++ if (queued) ++ dequeue_task(rq, tsk, queue_flags); ++ if (running) ++ put_prev_task(rq, tsk); ++ ++ sched_change_group(tsk, TASK_MOVE_GROUP); ++ ++ if (queued) ++ enqueue_task(rq, tsk, queue_flags); ++ if (running) ++ set_curr_task(rq, tsk); ++ ++ task_rq_unlock(rq, tsk, &rf); ++} ++ ++static inline struct task_group *css_tg(struct cgroup_subsys_state *css) ++{ ++ return css ? container_of(css, struct task_group, css) : NULL; ++} ++ ++static struct cgroup_subsys_state * ++cpu_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) ++{ ++ struct task_group *parent = css_tg(parent_css); ++ struct task_group *tg; ++ ++ if (!parent) { ++ /* This is early initialization for the top cgroup */ ++ return &root_task_group.css; ++ } ++ ++ tg = sched_create_group(parent); ++ if (IS_ERR(tg)) ++ return ERR_PTR(-ENOMEM); ++ ++ return &tg->css; ++} ++ ++/* Expose task group only after completing cgroup initialization */ ++static int cpu_cgroup_css_online(struct cgroup_subsys_state *css) ++{ ++ struct task_group *tg = css_tg(css); ++ struct task_group *parent = css_tg(css->parent); ++ ++ if (parent) ++ sched_online_group(tg, parent); ++ return 0; ++} ++ ++static void cpu_cgroup_css_released(struct cgroup_subsys_state *css) ++{ ++ struct task_group *tg = css_tg(css); ++ ++ sched_offline_group(tg); ++} ++ ++static void cpu_cgroup_css_free(struct cgroup_subsys_state *css) ++{ ++ struct task_group *tg = css_tg(css); ++ ++ /* ++ * Relies on the RCU grace period between css_released() and this. ++ */ ++ sched_free_group(tg); ++} ++ ++/* ++ * This is called before wake_up_new_task(), therefore we really only ++ * have to set its group bits, all the other stuff does not apply. ++ */ ++static void cpu_cgroup_fork(struct task_struct *task) ++{ ++ struct rq_flags rf; ++ struct rq *rq; ++ ++ rq = task_rq_lock(task, &rf); ++ ++ update_rq_clock(rq); ++ sched_change_group(task, TASK_SET_GROUP); ++ ++ task_rq_unlock(rq, task, &rf); ++} ++ ++static int cpu_cgroup_can_attach(struct cgroup_taskset *tset) ++{ ++ struct task_struct *task; ++ struct cgroup_subsys_state *css; ++ int ret = 0; ++ ++ cgroup_taskset_for_each(task, css, tset) { ++#ifdef CONFIG_RT_GROUP_SCHED ++ if (!sched_rt_can_attach(css_tg(css), task)) ++ return -EINVAL; ++#endif ++ /* ++ * Serialize against wake_up_new_task() such that if its ++ * running, we're sure to observe its full state. ++ */ ++ raw_spin_lock_irq(&task->pi_lock); ++ /* ++ * Avoid calling sched_move_task() before wake_up_new_task() ++ * has happened. This would lead to problems with PELT, due to ++ * move wanting to detach+attach while we're not attached yet. ++ */ ++ if (task->state == TASK_NEW) ++ ret = -EINVAL; ++ raw_spin_unlock_irq(&task->pi_lock); ++ ++ if (ret) ++ break; ++ } ++ return ret; ++} ++ ++static void cpu_cgroup_attach(struct cgroup_taskset *tset) ++{ ++ struct task_struct *task; ++ struct cgroup_subsys_state *css; ++ ++ cgroup_taskset_for_each(task, css, tset) ++ sched_move_task(task); ++} ++ ++#ifdef CONFIG_FAIR_GROUP_SCHED ++static int cpu_shares_write_u64(struct cgroup_subsys_state *css, ++ struct cftype *cftype, u64 shareval) ++{ ++ if (shareval > scale_load_down(ULONG_MAX)) ++ shareval = MAX_SHARES; ++ return sched_group_set_shares(css_tg(css), scale_load(shareval)); ++} ++ ++static u64 cpu_shares_read_u64(struct cgroup_subsys_state *css, ++ struct cftype *cft) ++{ ++ struct task_group *tg = css_tg(css); ++ ++ return (u64) scale_load_down(tg->shares); ++} ++ ++#ifdef CONFIG_CFS_BANDWIDTH ++static DEFINE_MUTEX(cfs_constraints_mutex); ++ ++const u64 max_cfs_quota_period = 1 * NSEC_PER_SEC; /* 1s */ ++const u64 min_cfs_quota_period = 1 * NSEC_PER_MSEC; /* 1ms */ ++ ++static int __cfs_schedulable(struct task_group *tg, u64 period, u64 runtime); ++ ++static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota) ++{ ++ int i, ret = 0, runtime_enabled, runtime_was_enabled; ++ struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth; ++ ++ if (tg == &root_task_group) ++ return -EINVAL; ++ ++ /* ++ * Ensure we have at some amount of bandwidth every period. This is ++ * to prevent reaching a state of large arrears when throttled via ++ * entity_tick() resulting in prolonged exit starvation. ++ */ ++ if (quota < min_cfs_quota_period || period < min_cfs_quota_period) ++ return -EINVAL; ++ ++ /* ++ * Likewise, bound things on the otherside by preventing insane quota ++ * periods. This also allows us to normalize in computing quota ++ * feasibility. ++ */ ++ if (period > max_cfs_quota_period) ++ return -EINVAL; ++ ++ /* ++ * Prevent race between setting of cfs_rq->runtime_enabled and ++ * unthrottle_offline_cfs_rqs(). ++ */ ++ get_online_cpus(); ++ mutex_lock(&cfs_constraints_mutex); ++ ret = __cfs_schedulable(tg, period, quota); ++ if (ret) ++ goto out_unlock; ++ ++ runtime_enabled = quota != RUNTIME_INF; ++ runtime_was_enabled = cfs_b->quota != RUNTIME_INF; ++ /* ++ * If we need to toggle cfs_bandwidth_used, off->on must occur ++ * before making related changes, and on->off must occur afterwards ++ */ ++ if (runtime_enabled && !runtime_was_enabled) ++ cfs_bandwidth_usage_inc(); ++ raw_spin_lock_irq(&cfs_b->lock); ++ cfs_b->period = ns_to_ktime(period); ++ cfs_b->quota = quota; ++ ++ __refill_cfs_bandwidth_runtime(cfs_b); ++ ++ /* Restart the period timer (if active) to handle new period expiry: */ ++ if (runtime_enabled) ++ start_cfs_bandwidth(cfs_b); ++ ++ raw_spin_unlock_irq(&cfs_b->lock); ++ ++ for_each_online_cpu(i) { ++ struct cfs_rq *cfs_rq = tg->cfs_rq[i]; ++ struct rq *rq = cfs_rq->rq; ++ struct rq_flags rf; ++ ++ rq_lock_irq(rq, &rf); ++ cfs_rq->runtime_enabled = runtime_enabled; ++ cfs_rq->runtime_remaining = 0; ++ ++ if (cfs_rq->throttled) ++ unthrottle_cfs_rq(cfs_rq); ++ rq_unlock_irq(rq, &rf); ++ } ++ if (runtime_was_enabled && !runtime_enabled) ++ cfs_bandwidth_usage_dec(); ++out_unlock: ++ mutex_unlock(&cfs_constraints_mutex); ++ put_online_cpus(); ++ ++ return ret; ++} ++ ++int tg_set_cfs_quota(struct task_group *tg, long cfs_quota_us) ++{ ++ u64 quota, period; ++ ++ period = ktime_to_ns(tg->cfs_bandwidth.period); ++ if (cfs_quota_us < 0) ++ quota = RUNTIME_INF; ++ else if ((u64)cfs_quota_us <= U64_MAX / NSEC_PER_USEC) ++ quota = (u64)cfs_quota_us * NSEC_PER_USEC; ++ else ++ return -EINVAL; ++ ++ return tg_set_cfs_bandwidth(tg, period, quota); ++} ++ ++long tg_get_cfs_quota(struct task_group *tg) ++{ ++ u64 quota_us; ++ ++ if (tg->cfs_bandwidth.quota == RUNTIME_INF) ++ return -1; ++ ++ quota_us = tg->cfs_bandwidth.quota; ++ do_div(quota_us, NSEC_PER_USEC); ++ ++ return quota_us; ++} ++ ++int tg_set_cfs_period(struct task_group *tg, long cfs_period_us) ++{ ++ u64 quota, period; ++ ++ if ((u64)cfs_period_us > U64_MAX / NSEC_PER_USEC) ++ return -EINVAL; ++ ++ period = (u64)cfs_period_us * NSEC_PER_USEC; ++ quota = tg->cfs_bandwidth.quota; ++ ++ return tg_set_cfs_bandwidth(tg, period, quota); ++} ++ ++long tg_get_cfs_period(struct task_group *tg) ++{ ++ u64 cfs_period_us; ++ ++ cfs_period_us = ktime_to_ns(tg->cfs_bandwidth.period); ++ do_div(cfs_period_us, NSEC_PER_USEC); ++ ++ return cfs_period_us; ++} ++ ++static s64 cpu_cfs_quota_read_s64(struct cgroup_subsys_state *css, ++ struct cftype *cft) ++{ ++ return tg_get_cfs_quota(css_tg(css)); ++} ++ ++static int cpu_cfs_quota_write_s64(struct cgroup_subsys_state *css, ++ struct cftype *cftype, s64 cfs_quota_us) ++{ ++ return tg_set_cfs_quota(css_tg(css), cfs_quota_us); ++} ++ ++static u64 cpu_cfs_period_read_u64(struct cgroup_subsys_state *css, ++ struct cftype *cft) ++{ ++ return tg_get_cfs_period(css_tg(css)); ++} ++ ++static int cpu_cfs_period_write_u64(struct cgroup_subsys_state *css, ++ struct cftype *cftype, u64 cfs_period_us) ++{ ++ return tg_set_cfs_period(css_tg(css), cfs_period_us); ++} ++ ++struct cfs_schedulable_data { ++ struct task_group *tg; ++ u64 period, quota; ++}; ++ ++/* ++ * normalize group quota/period to be quota/max_period ++ * note: units are usecs ++ */ ++static u64 normalize_cfs_quota(struct task_group *tg, ++ struct cfs_schedulable_data *d) ++{ ++ u64 quota, period; ++ ++ if (tg == d->tg) { ++ period = d->period; ++ quota = d->quota; ++ } else { ++ period = tg_get_cfs_period(tg); ++ quota = tg_get_cfs_quota(tg); ++ } ++ ++ /* note: these should typically be equivalent */ ++ if (quota == RUNTIME_INF || quota == -1) ++ return RUNTIME_INF; ++ ++ return to_ratio(period, quota); ++} ++ ++static int tg_cfs_schedulable_down(struct task_group *tg, void *data) ++{ ++ struct cfs_schedulable_data *d = data; ++ struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth; ++ s64 quota = 0, parent_quota = -1; ++ ++ if (!tg->parent) { ++ quota = RUNTIME_INF; ++ } else { ++ struct cfs_bandwidth *parent_b = &tg->parent->cfs_bandwidth; ++ ++ quota = normalize_cfs_quota(tg, d); ++ parent_quota = parent_b->hierarchical_quota; ++ ++ /* ++ * Ensure max(child_quota) <= parent_quota. On cgroup2, ++ * always take the min. On cgroup1, only inherit when no ++ * limit is set: ++ */ ++ if (cgroup_subsys_on_dfl(cpu_cgrp_subsys)) { ++ quota = min(quota, parent_quota); ++ } else { ++ if (quota == RUNTIME_INF) ++ quota = parent_quota; ++ else if (parent_quota != RUNTIME_INF && quota > parent_quota) ++ return -EINVAL; ++ } ++ } ++ cfs_b->hierarchical_quota = quota; ++ ++ return 0; ++} ++ ++static int __cfs_schedulable(struct task_group *tg, u64 period, u64 quota) ++{ ++ int ret; ++ struct cfs_schedulable_data data = { ++ .tg = tg, ++ .period = period, ++ .quota = quota, ++ }; ++ ++ if (quota != RUNTIME_INF) { ++ do_div(data.period, NSEC_PER_USEC); ++ do_div(data.quota, NSEC_PER_USEC); ++ } ++ ++ rcu_read_lock(); ++ ret = walk_tg_tree(tg_cfs_schedulable_down, tg_nop, &data); ++ rcu_read_unlock(); ++ ++ return ret; ++} ++ ++static int cpu_cfs_stat_show(struct seq_file *sf, void *v) ++{ ++ struct task_group *tg = css_tg(seq_css(sf)); ++ struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth; ++ ++ seq_printf(sf, "nr_periods %d\n", cfs_b->nr_periods); ++ seq_printf(sf, "nr_throttled %d\n", cfs_b->nr_throttled); ++ seq_printf(sf, "throttled_time %llu\n", cfs_b->throttled_time); ++ ++ if (schedstat_enabled() && tg != &root_task_group) { ++ u64 ws = 0; ++ int i; ++ ++ for_each_possible_cpu(i) ++ ws += schedstat_val(tg->se[i]->statistics.wait_sum); ++ ++ seq_printf(sf, "wait_sum %llu\n", ws); ++ } ++ ++ return 0; ++} ++#endif /* CONFIG_CFS_BANDWIDTH */ ++#endif /* CONFIG_FAIR_GROUP_SCHED */ ++ ++#ifdef CONFIG_RT_GROUP_SCHED ++static int cpu_rt_runtime_write(struct cgroup_subsys_state *css, ++ struct cftype *cft, s64 val) ++{ ++ return sched_group_set_rt_runtime(css_tg(css), val); ++} ++ ++static s64 cpu_rt_runtime_read(struct cgroup_subsys_state *css, ++ struct cftype *cft) ++{ ++ return sched_group_rt_runtime(css_tg(css)); ++} ++ ++static int cpu_rt_period_write_uint(struct cgroup_subsys_state *css, ++ struct cftype *cftype, u64 rt_period_us) ++{ ++ return sched_group_set_rt_period(css_tg(css), rt_period_us); ++} ++ ++static u64 cpu_rt_period_read_uint(struct cgroup_subsys_state *css, ++ struct cftype *cft) ++{ ++ return sched_group_rt_period(css_tg(css)); ++} ++#endif /* CONFIG_RT_GROUP_SCHED */ ++ ++static struct cftype cpu_legacy_files[] = { ++#ifdef CONFIG_FAIR_GROUP_SCHED ++ { ++ .name = "shares", ++ .read_u64 = cpu_shares_read_u64, ++ .write_u64 = cpu_shares_write_u64, ++ }, ++#endif ++#ifdef CONFIG_CFS_BANDWIDTH ++ { ++ .name = "cfs_quota_us", ++ .read_s64 = cpu_cfs_quota_read_s64, ++ .write_s64 = cpu_cfs_quota_write_s64, ++ }, ++ { ++ .name = "cfs_period_us", ++ .read_u64 = cpu_cfs_period_read_u64, ++ .write_u64 = cpu_cfs_period_write_u64, ++ }, ++ { ++ .name = "stat", ++ .seq_show = cpu_cfs_stat_show, ++ }, ++#endif ++#ifdef CONFIG_RT_GROUP_SCHED ++ { ++ .name = "rt_runtime_us", ++ .read_s64 = cpu_rt_runtime_read, ++ .write_s64 = cpu_rt_runtime_write, ++ }, ++ { ++ .name = "rt_period_us", ++ .read_u64 = cpu_rt_period_read_uint, ++ .write_u64 = cpu_rt_period_write_uint, ++ }, ++#endif ++ { } /* Terminate */ ++}; ++ ++static int cpu_extra_stat_show(struct seq_file *sf, ++ struct cgroup_subsys_state *css) ++{ ++#ifdef CONFIG_CFS_BANDWIDTH ++ { ++ struct task_group *tg = css_tg(css); ++ struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth; ++ u64 throttled_usec; ++ ++ throttled_usec = cfs_b->throttled_time; ++ do_div(throttled_usec, NSEC_PER_USEC); ++ ++ seq_printf(sf, "nr_periods %d\n" ++ "nr_throttled %d\n" ++ "throttled_usec %llu\n", ++ cfs_b->nr_periods, cfs_b->nr_throttled, ++ throttled_usec); ++ } ++#endif ++ return 0; ++} ++ ++#ifdef CONFIG_FAIR_GROUP_SCHED ++static u64 cpu_weight_read_u64(struct cgroup_subsys_state *css, ++ struct cftype *cft) ++{ ++ struct task_group *tg = css_tg(css); ++ u64 weight = scale_load_down(tg->shares); ++ ++ return DIV_ROUND_CLOSEST_ULL(weight * CGROUP_WEIGHT_DFL, 1024); ++} ++ ++static int cpu_weight_write_u64(struct cgroup_subsys_state *css, ++ struct cftype *cft, u64 weight) ++{ ++ /* ++ * cgroup weight knobs should use the common MIN, DFL and MAX ++ * values which are 1, 100 and 10000 respectively. While it loses ++ * a bit of range on both ends, it maps pretty well onto the shares ++ * value used by scheduler and the round-trip conversions preserve ++ * the original value over the entire range. ++ */ ++ if (weight < CGROUP_WEIGHT_MIN || weight > CGROUP_WEIGHT_MAX) ++ return -ERANGE; ++ ++ weight = DIV_ROUND_CLOSEST_ULL(weight * 1024, CGROUP_WEIGHT_DFL); ++ ++ return sched_group_set_shares(css_tg(css), scale_load(weight)); ++} ++ ++static s64 cpu_weight_nice_read_s64(struct cgroup_subsys_state *css, ++ struct cftype *cft) ++{ ++ unsigned long weight = scale_load_down(css_tg(css)->shares); ++ int last_delta = INT_MAX; ++ int prio, delta; ++ ++ /* find the closest nice value to the current weight */ ++ for (prio = 0; prio < ARRAY_SIZE(sched_prio_to_weight); prio++) { ++ delta = abs(sched_prio_to_weight[prio] - weight); ++ if (delta >= last_delta) ++ break; ++ last_delta = delta; ++ } ++ ++ return PRIO_TO_NICE(prio - 1 + MAX_RT_PRIO); ++} ++ ++static int cpu_weight_nice_write_s64(struct cgroup_subsys_state *css, ++ struct cftype *cft, s64 nice) ++{ ++ unsigned long weight; ++ int idx; ++ ++ if (nice < MIN_NICE || nice > MAX_NICE) ++ return -ERANGE; ++ ++ idx = NICE_TO_PRIO(nice) - MAX_RT_PRIO; ++ idx = array_index_nospec(idx, 40); ++ weight = sched_prio_to_weight[idx]; ++ ++ return sched_group_set_shares(css_tg(css), scale_load(weight)); ++} ++#endif ++ ++static void __maybe_unused cpu_period_quota_print(struct seq_file *sf, ++ long period, long quota) ++{ ++ if (quota < 0) ++ seq_puts(sf, "max"); ++ else ++ seq_printf(sf, "%ld", quota); ++ ++ seq_printf(sf, " %ld\n", period); ++} ++ ++/* caller should put the current value in *@periodp before calling */ ++static int __maybe_unused cpu_period_quota_parse(char *buf, ++ u64 *periodp, u64 *quotap) ++{ ++ char tok[21]; /* U64_MAX */ ++ ++ if (sscanf(buf, "%20s %llu", tok, periodp) < 1) ++ return -EINVAL; ++ ++ *periodp *= NSEC_PER_USEC; ++ ++ if (sscanf(tok, "%llu", quotap)) ++ *quotap *= NSEC_PER_USEC; ++ else if (!strcmp(tok, "max")) ++ *quotap = RUNTIME_INF; ++ else ++ return -EINVAL; ++ ++ return 0; ++} ++ ++#ifdef CONFIG_CFS_BANDWIDTH ++static int cpu_max_show(struct seq_file *sf, void *v) ++{ ++ struct task_group *tg = css_tg(seq_css(sf)); ++ ++ cpu_period_quota_print(sf, tg_get_cfs_period(tg), tg_get_cfs_quota(tg)); ++ return 0; ++} ++ ++static ssize_t cpu_max_write(struct kernfs_open_file *of, ++ char *buf, size_t nbytes, loff_t off) ++{ ++ struct task_group *tg = css_tg(of_css(of)); ++ u64 period = tg_get_cfs_period(tg); ++ u64 quota; ++ int ret; ++ ++ ret = cpu_period_quota_parse(buf, &period, "a); ++ if (!ret) ++ ret = tg_set_cfs_bandwidth(tg, period, quota); ++ return ret ?: nbytes; ++} ++#endif ++ ++static struct cftype cpu_files[] = { ++#ifdef CONFIG_FAIR_GROUP_SCHED ++ { ++ .name = "weight", ++ .flags = CFTYPE_NOT_ON_ROOT, ++ .read_u64 = cpu_weight_read_u64, ++ .write_u64 = cpu_weight_write_u64, ++ }, ++ { ++ .name = "weight.nice", ++ .flags = CFTYPE_NOT_ON_ROOT, ++ .read_s64 = cpu_weight_nice_read_s64, ++ .write_s64 = cpu_weight_nice_write_s64, ++ }, ++#endif ++#ifdef CONFIG_CFS_BANDWIDTH ++ { ++ .name = "max", ++ .flags = CFTYPE_NOT_ON_ROOT, ++ .seq_show = cpu_max_show, ++ .write = cpu_max_write, ++ }, ++#endif ++ { } /* terminate */ ++}; ++ ++struct cgroup_subsys cpu_cgrp_subsys = { ++ .css_alloc = cpu_cgroup_css_alloc, ++ .css_online = cpu_cgroup_css_online, ++ .css_released = cpu_cgroup_css_released, ++ .css_free = cpu_cgroup_css_free, ++ .css_extra_stat_show = cpu_extra_stat_show, ++ .fork = cpu_cgroup_fork, ++ .can_attach = cpu_cgroup_can_attach, ++ .attach = cpu_cgroup_attach, ++ .legacy_cftypes = cpu_legacy_files, ++ .dfl_cftypes = cpu_files, ++ .early_init = true, ++ .threaded = true, ++}; ++ ++#endif /* CONFIG_CGROUP_SCHED */ ++ ++void dump_cpu_task(int cpu) ++{ ++ pr_info("Task dump for CPU %d:\n", cpu); ++ sched_show_task(cpu_curr(cpu)); ++} ++ ++/* ++ * Nice levels are multiplicative, with a gentle 10% change for every ++ * nice level changed. I.e. when a CPU-bound task goes from nice 0 to ++ * nice 1, it will get ~10% less CPU time than another CPU-bound task ++ * that remained on nice 0. ++ * ++ * The "10% effect" is relative and cumulative: from _any_ nice level, ++ * if you go up 1 level, it's -10% CPU usage, if you go down 1 level ++ * it's +10% CPU usage. (to achieve that we use a multiplier of 1.25. ++ * If a task goes up by ~10% and another task goes down by ~10% then ++ * the relative distance between them is ~25%.) ++ */ ++const int sched_prio_to_weight[40] = { ++ /* -20 */ 88761, 71755, 56483, 46273, 36291, ++ /* -15 */ 29154, 23254, 18705, 14949, 11916, ++ /* -10 */ 9548, 7620, 6100, 4904, 3906, ++ /* -5 */ 3121, 2501, 1991, 1586, 1277, ++ /* 0 */ 1024, 820, 655, 526, 423, ++ /* 5 */ 335, 272, 215, 172, 137, ++ /* 10 */ 110, 87, 70, 56, 45, ++ /* 15 */ 36, 29, 23, 18, 15, ++}; ++ ++/* ++ * Inverse (2^32/x) values of the sched_prio_to_weight[] array, precalculated. ++ * ++ * In cases where the weight does not change often, we can use the ++ * precalculated inverse to speed up arithmetics by turning divisions ++ * into multiplications: ++ */ ++const u32 sched_prio_to_wmult[40] = { ++ /* -20 */ 48388, 59856, 76040, 92818, 118348, ++ /* -15 */ 147320, 184698, 229616, 287308, 360437, ++ /* -10 */ 449829, 563644, 704093, 875809, 1099582, ++ /* -5 */ 1376151, 1717300, 2157191, 2708050, 3363326, ++ /* 0 */ 4194304, 5237765, 6557202, 8165337, 10153587, ++ /* 5 */ 12820798, 15790321, 19976592, 24970740, 31350126, ++ /* 10 */ 39045157, 49367440, 61356676, 76695844, 95443717, ++ /* 15 */ 119304647, 148102320, 186737708, 238609294, 286331153, ++}; ++ ++#undef CREATE_TRACE_POINTS +diff -uprN kernel/kernel/sched/idle.c kernel_new/kernel/sched/idle.c +--- kernel/kernel/sched/idle.c 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/kernel/sched/idle.c 2021-04-01 18:28:07.812863113 +0800 +@@ -76,22 +76,29 @@ void __weak arch_cpu_idle_dead(void) { } + void __weak arch_cpu_idle(void) + { + cpu_idle_force_poll = 1; +- local_irq_enable(); ++ local_irq_enable_full(); + } + + /** + * default_idle_call - Default CPU idle routine. + * + * To use when the cpuidle framework cannot be used. ++ * ++ * When interrupts are pipelined, this call is entered with hard irqs ++ * on and the root stage stalled, returns with hard irqs on, and the ++ * root stage unstalled. + */ + void __cpuidle default_idle_call(void) + { + if (current_clr_polling_and_test()) { +- local_irq_enable(); ++ local_irq_enable_full(); + } else { +- stop_critical_timings(); +- arch_cpu_idle(); +- start_critical_timings(); ++ if (ipipe_enter_cpuidle(NULL, NULL)) { ++ stop_critical_timings(); ++ arch_cpu_idle(); ++ start_critical_timings(); ++ } else ++ local_irq_enable_full(); + } + } + +@@ -207,6 +214,15 @@ static void cpuidle_idle_call(void) + exit_idle: + __current_set_polling(); + ++#ifdef CONFIG_IPIPE ++ /* ++ * Catch mishandling of the CPU's interrupt disable flag when ++ * pipelining IRQs. ++ */ ++ if (WARN_ON_ONCE(hard_irqs_disabled())) ++ hard_local_irq_enable(); ++#endif ++ + /* + * It is up to the idle functions to reenable local interrupts + */ +@@ -261,6 +277,9 @@ static void do_idle(void) + cpu_idle_poll(); + } else { + cpuidle_idle_call(); ++#ifdef CONFIG_IPIPE ++ WARN_ON_ONCE(hard_irqs_disabled()); ++#endif + } + arch_cpu_idle_exit(); + } +diff -uprN kernel/kernel/sched/sched.h kernel_new/kernel/sched/sched.h +--- kernel/kernel/sched/sched.h 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/kernel/sched/sched.h 2021-04-02 09:21:23.248426589 +0800 +@@ -64,6 +64,7 @@ + #include + #include + #include ++#include + #include + + #include +diff -uprN kernel/kernel/sched/sched.h.orig kernel_new/kernel/sched/sched.h.orig +--- kernel/kernel/sched/sched.h.orig 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/kernel/sched/sched.h.orig 2021-04-01 18:28:07.812863113 +0800 +@@ -0,0 +1,2315 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++/* ++ * Scheduler internal types and methods: ++ */ ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++ ++#ifdef CONFIG_PARAVIRT ++# include ++#endif ++ ++#include "cpupri.h" ++#include "cpudeadline.h" ++ ++#ifdef CONFIG_SCHED_DEBUG ++# define SCHED_WARN_ON(x) WARN_ONCE(x, #x) ++#else ++# define SCHED_WARN_ON(x) ({ (void)(x), 0; }) ++#endif ++ ++struct rq; ++struct cpuidle_state; ++ ++/* task_struct::on_rq states: */ ++#define TASK_ON_RQ_QUEUED 1 ++#define TASK_ON_RQ_MIGRATING 2 ++ ++extern __read_mostly int scheduler_running; ++ ++extern unsigned long calc_load_update; ++extern atomic_long_t calc_load_tasks; ++ ++extern void calc_global_load_tick(struct rq *this_rq); ++extern long calc_load_fold_active(struct rq *this_rq, long adjust); ++ ++#ifdef CONFIG_SMP ++extern void cpu_load_update_active(struct rq *this_rq); ++#else ++static inline void cpu_load_update_active(struct rq *this_rq) { } ++#endif ++ ++/* ++ * Helpers for converting nanosecond timing to jiffy resolution ++ */ ++#define NS_TO_JIFFIES(TIME) ((unsigned long)(TIME) / (NSEC_PER_SEC / HZ)) ++ ++/* ++ * Increase resolution of nice-level calculations for 64-bit architectures. ++ * The extra resolution improves shares distribution and load balancing of ++ * low-weight task groups (eg. nice +19 on an autogroup), deeper taskgroup ++ * hierarchies, especially on larger systems. This is not a user-visible change ++ * and does not change the user-interface for setting shares/weights. ++ * ++ * We increase resolution only if we have enough bits to allow this increased ++ * resolution (i.e. 64-bit). The costs for increasing resolution when 32-bit ++ * are pretty high and the returns do not justify the increased costs. ++ * ++ * Really only required when CONFIG_FAIR_GROUP_SCHED=y is also set, but to ++ * increase coverage and consistency always enable it on 64-bit platforms. ++ */ ++#ifdef CONFIG_64BIT ++# define NICE_0_LOAD_SHIFT (SCHED_FIXEDPOINT_SHIFT + SCHED_FIXEDPOINT_SHIFT) ++# define scale_load(w) ((w) << SCHED_FIXEDPOINT_SHIFT) ++# define scale_load_down(w) \ ++({ \ ++ unsigned long __w = (w); \ ++ if (__w) \ ++ __w = max(2UL, __w >> SCHED_FIXEDPOINT_SHIFT); \ ++ __w; \ ++}) ++#else ++# define NICE_0_LOAD_SHIFT (SCHED_FIXEDPOINT_SHIFT) ++# define scale_load(w) (w) ++# define scale_load_down(w) (w) ++#endif ++ ++/* ++ * Task weight (visible to users) and its load (invisible to users) have ++ * independent resolution, but they should be well calibrated. We use ++ * scale_load() and scale_load_down(w) to convert between them. The ++ * following must be true: ++ * ++ * scale_load(sched_prio_to_weight[USER_PRIO(NICE_TO_PRIO(0))]) == NICE_0_LOAD ++ * ++ */ ++#define NICE_0_LOAD (1L << NICE_0_LOAD_SHIFT) ++ ++/* ++ * Single value that decides SCHED_DEADLINE internal math precision. ++ * 10 -> just above 1us ++ * 9 -> just above 0.5us ++ */ ++#define DL_SCALE 10 ++ ++/* ++ * Single value that denotes runtime == period, ie unlimited time. ++ */ ++#define RUNTIME_INF ((u64)~0ULL) ++ ++static inline int idle_policy(int policy) ++{ ++ return policy == SCHED_IDLE; ++} ++static inline int fair_policy(int policy) ++{ ++ return policy == SCHED_NORMAL || policy == SCHED_BATCH; ++} ++ ++static inline int rt_policy(int policy) ++{ ++ return policy == SCHED_FIFO || policy == SCHED_RR; ++} ++ ++static inline int dl_policy(int policy) ++{ ++ return policy == SCHED_DEADLINE; ++} ++static inline bool valid_policy(int policy) ++{ ++ return idle_policy(policy) || fair_policy(policy) || ++ rt_policy(policy) || dl_policy(policy); ++} ++ ++static inline int task_has_rt_policy(struct task_struct *p) ++{ ++ return rt_policy(p->policy); ++} ++ ++static inline int task_has_dl_policy(struct task_struct *p) ++{ ++ return dl_policy(p->policy); ++} ++ ++#define cap_scale(v, s) ((v)*(s) >> SCHED_CAPACITY_SHIFT) ++ ++/* ++ * !! For sched_setattr_nocheck() (kernel) only !! ++ * ++ * This is actually gross. :( ++ * ++ * It is used to make schedutil kworker(s) higher priority than SCHED_DEADLINE ++ * tasks, but still be able to sleep. We need this on platforms that cannot ++ * atomically change clock frequency. Remove once fast switching will be ++ * available on such platforms. ++ * ++ * SUGOV stands for SchedUtil GOVernor. ++ */ ++#define SCHED_FLAG_SUGOV 0x10000000 ++ ++static inline bool dl_entity_is_special(struct sched_dl_entity *dl_se) ++{ ++#ifdef CONFIG_CPU_FREQ_GOV_SCHEDUTIL ++ return unlikely(dl_se->flags & SCHED_FLAG_SUGOV); ++#else ++ return false; ++#endif ++} ++ ++/* ++ * Tells if entity @a should preempt entity @b. ++ */ ++static inline bool ++dl_entity_preempt(struct sched_dl_entity *a, struct sched_dl_entity *b) ++{ ++ return dl_entity_is_special(a) || ++ dl_time_before(a->deadline, b->deadline); ++} ++ ++/* ++ * This is the priority-queue data structure of the RT scheduling class: ++ */ ++struct rt_prio_array { ++ DECLARE_BITMAP(bitmap, MAX_RT_PRIO+1); /* include 1 bit for delimiter */ ++ struct list_head queue[MAX_RT_PRIO]; ++}; ++ ++struct rt_bandwidth { ++ /* nests inside the rq lock: */ ++ raw_spinlock_t rt_runtime_lock; ++ ktime_t rt_period; ++ u64 rt_runtime; ++ struct hrtimer rt_period_timer; ++ unsigned int rt_period_active; ++}; ++ ++void __dl_clear_params(struct task_struct *p); ++ ++/* ++ * To keep the bandwidth of -deadline tasks and groups under control ++ * we need some place where: ++ * - store the maximum -deadline bandwidth of the system (the group); ++ * - cache the fraction of that bandwidth that is currently allocated. ++ * ++ * This is all done in the data structure below. It is similar to the ++ * one used for RT-throttling (rt_bandwidth), with the main difference ++ * that, since here we are only interested in admission control, we ++ * do not decrease any runtime while the group "executes", neither we ++ * need a timer to replenish it. ++ * ++ * With respect to SMP, the bandwidth is given on a per-CPU basis, ++ * meaning that: ++ * - dl_bw (< 100%) is the bandwidth of the system (group) on each CPU; ++ * - dl_total_bw array contains, in the i-eth element, the currently ++ * allocated bandwidth on the i-eth CPU. ++ * Moreover, groups consume bandwidth on each CPU, while tasks only ++ * consume bandwidth on the CPU they're running on. ++ * Finally, dl_total_bw_cpu is used to cache the index of dl_total_bw ++ * that will be shown the next time the proc or cgroup controls will ++ * be red. It on its turn can be changed by writing on its own ++ * control. ++ */ ++struct dl_bandwidth { ++ raw_spinlock_t dl_runtime_lock; ++ u64 dl_runtime; ++ u64 dl_period; ++}; ++ ++static inline int dl_bandwidth_enabled(void) ++{ ++ return sysctl_sched_rt_runtime >= 0; ++} ++ ++struct dl_bw { ++ raw_spinlock_t lock; ++ u64 bw; ++ u64 total_bw; ++}; ++ ++static inline void __dl_update(struct dl_bw *dl_b, s64 bw); ++ ++static inline ++void __dl_sub(struct dl_bw *dl_b, u64 tsk_bw, int cpus) ++{ ++ dl_b->total_bw -= tsk_bw; ++ __dl_update(dl_b, (s32)tsk_bw / cpus); ++} ++ ++static inline ++void __dl_add(struct dl_bw *dl_b, u64 tsk_bw, int cpus) ++{ ++ dl_b->total_bw += tsk_bw; ++ __dl_update(dl_b, -((s32)tsk_bw / cpus)); ++} ++ ++static inline ++bool __dl_overflow(struct dl_bw *dl_b, int cpus, u64 old_bw, u64 new_bw) ++{ ++ return dl_b->bw != -1 && ++ dl_b->bw * cpus < dl_b->total_bw - old_bw + new_bw; ++} ++ ++extern void dl_change_utilization(struct task_struct *p, u64 new_bw); ++extern void init_dl_bw(struct dl_bw *dl_b); ++extern int sched_dl_global_validate(void); ++extern void sched_dl_do_global(void); ++extern int sched_dl_overflow(struct task_struct *p, int policy, const struct sched_attr *attr); ++extern void __setparam_dl(struct task_struct *p, const struct sched_attr *attr); ++extern void __getparam_dl(struct task_struct *p, struct sched_attr *attr); ++extern bool __checkparam_dl(const struct sched_attr *attr); ++extern bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr); ++extern int dl_task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_allowed); ++extern int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpumask *trial); ++extern bool dl_cpu_busy(unsigned int cpu); ++ ++#ifdef CONFIG_CGROUP_SCHED ++ ++#include ++ ++struct cfs_rq; ++struct rt_rq; ++ ++extern struct list_head task_groups; ++ ++struct cfs_bandwidth { ++#ifdef CONFIG_CFS_BANDWIDTH ++ raw_spinlock_t lock; ++ ktime_t period; ++ u64 quota; ++ u64 runtime; ++ s64 hierarchical_quota; ++ ++ short idle; ++ short period_active; ++ struct hrtimer period_timer; ++ struct hrtimer slack_timer; ++ struct list_head throttled_cfs_rq; ++ ++ /* Statistics: */ ++ int nr_periods; ++ int nr_throttled; ++ u64 throttled_time; ++ ++ bool distribute_running; ++#endif ++}; ++ ++/* Task group related information */ ++struct task_group { ++ struct cgroup_subsys_state css; ++ ++#ifdef CONFIG_FAIR_GROUP_SCHED ++ /* schedulable entities of this group on each CPU */ ++ struct sched_entity **se; ++ /* runqueue "owned" by this group on each CPU */ ++ struct cfs_rq **cfs_rq; ++ unsigned long shares; ++ ++#ifdef CONFIG_SMP ++ /* ++ * load_avg can be heavily contended at clock tick time, so put ++ * it in its own cacheline separated from the fields above which ++ * will also be accessed at each tick. ++ */ ++ atomic_long_t load_avg ____cacheline_aligned; ++#endif ++#endif ++ ++#ifdef CONFIG_RT_GROUP_SCHED ++ struct sched_rt_entity **rt_se; ++ struct rt_rq **rt_rq; ++ ++ struct rt_bandwidth rt_bandwidth; ++#endif ++ ++ struct rcu_head rcu; ++ struct list_head list; ++ ++ struct task_group *parent; ++ struct list_head siblings; ++ struct list_head children; ++ ++#ifdef CONFIG_SCHED_AUTOGROUP ++ struct autogroup *autogroup; ++#endif ++ ++ struct cfs_bandwidth cfs_bandwidth; ++ ++ KABI_RESERVE(1) ++ KABI_RESERVE(2) ++}; ++ ++#ifdef CONFIG_FAIR_GROUP_SCHED ++#define ROOT_TASK_GROUP_LOAD NICE_0_LOAD ++ ++/* ++ * A weight of 0 or 1 can cause arithmetics problems. ++ * A weight of a cfs_rq is the sum of weights of which entities ++ * are queued on this cfs_rq, so a weight of a entity should not be ++ * too large, so as the shares value of a task group. ++ * (The default weight is 1024 - so there's no practical ++ * limitation from this.) ++ */ ++#define MIN_SHARES (1UL << 1) ++#define MAX_SHARES (1UL << 18) ++#endif ++ ++typedef int (*tg_visitor)(struct task_group *, void *); ++ ++extern int walk_tg_tree_from(struct task_group *from, ++ tg_visitor down, tg_visitor up, void *data); ++ ++/* ++ * Iterate the full tree, calling @down when first entering a node and @up when ++ * leaving it for the final time. ++ * ++ * Caller must hold rcu_lock or sufficient equivalent. ++ */ ++static inline int walk_tg_tree(tg_visitor down, tg_visitor up, void *data) ++{ ++ return walk_tg_tree_from(&root_task_group, down, up, data); ++} ++ ++extern int tg_nop(struct task_group *tg, void *data); ++ ++extern void free_fair_sched_group(struct task_group *tg); ++extern int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent); ++extern void online_fair_sched_group(struct task_group *tg); ++extern void unregister_fair_sched_group(struct task_group *tg); ++extern void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq, ++ struct sched_entity *se, int cpu, ++ struct sched_entity *parent); ++extern void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b); ++ ++extern void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b); ++extern void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b); ++extern void unthrottle_cfs_rq(struct cfs_rq *cfs_rq); ++ ++extern void free_rt_sched_group(struct task_group *tg); ++extern int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent); ++extern void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq, ++ struct sched_rt_entity *rt_se, int cpu, ++ struct sched_rt_entity *parent); ++extern int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us); ++extern int sched_group_set_rt_period(struct task_group *tg, u64 rt_period_us); ++extern long sched_group_rt_runtime(struct task_group *tg); ++extern long sched_group_rt_period(struct task_group *tg); ++extern int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk); ++ ++extern struct task_group *sched_create_group(struct task_group *parent); ++extern void sched_online_group(struct task_group *tg, ++ struct task_group *parent); ++extern void sched_destroy_group(struct task_group *tg); ++extern void sched_offline_group(struct task_group *tg); ++ ++extern void sched_move_task(struct task_struct *tsk); ++ ++#ifdef CONFIG_FAIR_GROUP_SCHED ++extern int sched_group_set_shares(struct task_group *tg, unsigned long shares); ++ ++#ifdef CONFIG_SMP ++extern void set_task_rq_fair(struct sched_entity *se, ++ struct cfs_rq *prev, struct cfs_rq *next); ++#else /* !CONFIG_SMP */ ++static inline void set_task_rq_fair(struct sched_entity *se, ++ struct cfs_rq *prev, struct cfs_rq *next) { } ++#endif /* CONFIG_SMP */ ++#endif /* CONFIG_FAIR_GROUP_SCHED */ ++ ++#else /* CONFIG_CGROUP_SCHED */ ++ ++struct cfs_bandwidth { }; ++ ++#endif /* CONFIG_CGROUP_SCHED */ ++ ++/* CFS-related fields in a runqueue */ ++struct cfs_rq { ++ struct load_weight load; ++ unsigned long runnable_weight; ++ unsigned int nr_running; ++ unsigned int h_nr_running; ++ ++ u64 exec_clock; ++ u64 min_vruntime; ++#ifndef CONFIG_64BIT ++ u64 min_vruntime_copy; ++#endif ++ ++ struct rb_root_cached tasks_timeline; ++ ++ /* ++ * 'curr' points to currently running entity on this cfs_rq. ++ * It is set to NULL otherwise (i.e when none are currently running). ++ */ ++ struct sched_entity *curr; ++ struct sched_entity *next; ++ struct sched_entity *last; ++ struct sched_entity *skip; ++ ++#ifdef CONFIG_SCHED_DEBUG ++ unsigned int nr_spread_over; ++#endif ++ ++#ifdef CONFIG_SMP ++ /* ++ * CFS load tracking ++ */ ++ struct sched_avg avg; ++#ifndef CONFIG_64BIT ++ u64 load_last_update_time_copy; ++#endif ++ struct { ++ raw_spinlock_t lock ____cacheline_aligned; ++ int nr; ++ unsigned long load_avg; ++ unsigned long util_avg; ++ unsigned long runnable_sum; ++ } removed; ++ ++#ifdef CONFIG_FAIR_GROUP_SCHED ++ unsigned long tg_load_avg_contrib; ++ long propagate; ++ long prop_runnable_sum; ++ ++ /* ++ * h_load = weight * f(tg) ++ * ++ * Where f(tg) is the recursive weight fraction assigned to ++ * this group. ++ */ ++ unsigned long h_load; ++ u64 last_h_load_update; ++ struct sched_entity *h_load_next; ++#endif /* CONFIG_FAIR_GROUP_SCHED */ ++#endif /* CONFIG_SMP */ ++ ++#ifdef CONFIG_FAIR_GROUP_SCHED ++ struct rq *rq; /* CPU runqueue to which this cfs_rq is attached */ ++ ++ /* ++ * leaf cfs_rqs are those that hold tasks (lowest schedulable entity in ++ * a hierarchy). Non-leaf lrqs hold other higher schedulable entities ++ * (like users, containers etc.) ++ * ++ * leaf_cfs_rq_list ties together list of leaf cfs_rq's in a CPU. ++ * This list is used during load balance. ++ */ ++ int on_list; ++ struct list_head leaf_cfs_rq_list; ++ struct task_group *tg; /* group that "owns" this runqueue */ ++ ++#ifdef CONFIG_CFS_BANDWIDTH ++ int runtime_enabled; ++ s64 runtime_remaining; ++ ++ u64 throttled_clock; ++ u64 throttled_clock_task; ++ u64 throttled_clock_task_time; ++ int throttled; ++ int throttle_count; ++ struct list_head throttled_list; ++#endif /* CONFIG_CFS_BANDWIDTH */ ++#endif /* CONFIG_FAIR_GROUP_SCHED */ ++ ++ KABI_RESERVE(1) ++ KABI_RESERVE(2) ++}; ++ ++static inline int rt_bandwidth_enabled(void) ++{ ++ return sysctl_sched_rt_runtime >= 0; ++} ++ ++/* RT IPI pull logic requires IRQ_WORK */ ++#if defined(CONFIG_IRQ_WORK) && defined(CONFIG_SMP) ++# define HAVE_RT_PUSH_IPI ++#endif ++ ++/* Real-Time classes' related field in a runqueue: */ ++struct rt_rq { ++ struct rt_prio_array active; ++ unsigned int rt_nr_running; ++ unsigned int rr_nr_running; ++#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED ++ struct { ++ int curr; /* highest queued rt task prio */ ++#ifdef CONFIG_SMP ++ int next; /* next highest */ ++#endif ++ } highest_prio; ++#endif ++#ifdef CONFIG_SMP ++ unsigned long rt_nr_migratory; ++ unsigned long rt_nr_total; ++ int overloaded; ++ struct plist_head pushable_tasks; ++ ++#endif /* CONFIG_SMP */ ++ int rt_queued; ++ ++ int rt_throttled; ++ u64 rt_time; ++ u64 rt_runtime; ++ /* Nests inside the rq lock: */ ++ raw_spinlock_t rt_runtime_lock; ++ ++#ifdef CONFIG_RT_GROUP_SCHED ++ unsigned long rt_nr_boosted; ++ ++ struct rq *rq; ++ struct task_group *tg; ++#endif ++}; ++ ++static inline bool rt_rq_is_runnable(struct rt_rq *rt_rq) ++{ ++ return rt_rq->rt_queued && rt_rq->rt_nr_running; ++} ++ ++/* Deadline class' related fields in a runqueue */ ++struct dl_rq { ++ /* runqueue is an rbtree, ordered by deadline */ ++ struct rb_root_cached root; ++ ++ unsigned long dl_nr_running; ++ ++#ifdef CONFIG_SMP ++ /* ++ * Deadline values of the currently executing and the ++ * earliest ready task on this rq. Caching these facilitates ++ * the decision wether or not a ready but not running task ++ * should migrate somewhere else. ++ */ ++ struct { ++ u64 curr; ++ u64 next; ++ } earliest_dl; ++ ++ unsigned long dl_nr_migratory; ++ int overloaded; ++ ++ /* ++ * Tasks on this rq that can be pushed away. They are kept in ++ * an rb-tree, ordered by tasks' deadlines, with caching ++ * of the leftmost (earliest deadline) element. ++ */ ++ struct rb_root_cached pushable_dl_tasks_root; ++#else ++ struct dl_bw dl_bw; ++#endif ++ /* ++ * "Active utilization" for this runqueue: increased when a ++ * task wakes up (becomes TASK_RUNNING) and decreased when a ++ * task blocks ++ */ ++ u64 running_bw; ++ ++ /* ++ * Utilization of the tasks "assigned" to this runqueue (including ++ * the tasks that are in runqueue and the tasks that executed on this ++ * CPU and blocked). Increased when a task moves to this runqueue, and ++ * decreased when the task moves away (migrates, changes scheduling ++ * policy, or terminates). ++ * This is needed to compute the "inactive utilization" for the ++ * runqueue (inactive utilization = this_bw - running_bw). ++ */ ++ u64 this_bw; ++ u64 extra_bw; ++ ++ /* ++ * Inverse of the fraction of CPU utilization that can be reclaimed ++ * by the GRUB algorithm. ++ */ ++ u64 bw_ratio; ++}; ++ ++#ifdef CONFIG_FAIR_GROUP_SCHED ++/* An entity is a task if it doesn't "own" a runqueue */ ++#define entity_is_task(se) (!se->my_q) ++#else ++#define entity_is_task(se) 1 ++#endif ++ ++#ifdef CONFIG_SMP ++/* ++ * XXX we want to get rid of these helpers and use the full load resolution. ++ */ ++static inline long se_weight(struct sched_entity *se) ++{ ++ return scale_load_down(se->load.weight); ++} ++ ++static inline long se_runnable(struct sched_entity *se) ++{ ++ return scale_load_down(se->runnable_weight); ++} ++ ++static inline bool sched_asym_prefer(int a, int b) ++{ ++ return arch_asym_cpu_priority(a) > arch_asym_cpu_priority(b); ++} ++ ++/* ++ * We add the notion of a root-domain which will be used to define per-domain ++ * variables. Each exclusive cpuset essentially defines an island domain by ++ * fully partitioning the member CPUs from any other cpuset. Whenever a new ++ * exclusive cpuset is created, we also create and attach a new root-domain ++ * object. ++ * ++ */ ++struct root_domain { ++ atomic_t refcount; ++ atomic_t rto_count; ++ struct rcu_head rcu; ++ cpumask_var_t span; ++ cpumask_var_t online; ++ ++ /* Indicate more than one runnable task for any CPU */ ++ bool overload; ++ ++ /* ++ * The bit corresponding to a CPU gets set here if such CPU has more ++ * than one runnable -deadline task (as it is below for RT tasks). ++ */ ++ cpumask_var_t dlo_mask; ++ atomic_t dlo_count; ++ struct dl_bw dl_bw; ++ struct cpudl cpudl; ++ ++#ifdef HAVE_RT_PUSH_IPI ++ /* ++ * For IPI pull requests, loop across the rto_mask. ++ */ ++ struct irq_work rto_push_work; ++ raw_spinlock_t rto_lock; ++ /* These are only updated and read within rto_lock */ ++ int rto_loop; ++ int rto_cpu; ++ /* These atomics are updated outside of a lock */ ++ atomic_t rto_loop_next; ++ atomic_t rto_loop_start; ++#endif ++ /* ++ * The "RT overload" flag: it gets set if a CPU has more than ++ * one runnable RT task. ++ */ ++ cpumask_var_t rto_mask; ++ struct cpupri cpupri; ++ ++ unsigned long max_cpu_capacity; ++ ++ KABI_RESERVE(1) ++ KABI_RESERVE(2) ++ KABI_RESERVE(3) ++ KABI_RESERVE(4) ++}; ++ ++extern struct root_domain def_root_domain; ++extern struct mutex sched_domains_mutex; ++ ++extern void init_defrootdomain(void); ++extern int sched_init_domains(const struct cpumask *cpu_map); ++extern void rq_attach_root(struct rq *rq, struct root_domain *rd); ++extern void sched_get_rd(struct root_domain *rd); ++extern void sched_put_rd(struct root_domain *rd); ++ ++#ifdef HAVE_RT_PUSH_IPI ++extern void rto_push_irq_work_func(struct irq_work *work); ++#endif ++#endif /* CONFIG_SMP */ ++ ++/* ++ * This is the main, per-CPU runqueue data structure. ++ * ++ * Locking rule: those places that want to lock multiple runqueues ++ * (such as the load balancing or the thread migration code), lock ++ * acquire operations must be ordered by ascending &runqueue. ++ */ ++struct rq { ++ /* runqueue lock: */ ++ raw_spinlock_t lock; ++ ++ /* ++ * nr_running and cpu_load should be in the same cacheline because ++ * remote CPUs use both these fields when doing load calculation. ++ */ ++ unsigned int nr_running; ++#ifdef CONFIG_NUMA_BALANCING ++ unsigned int nr_numa_running; ++ unsigned int nr_preferred_running; ++ unsigned int numa_migrate_on; ++#endif ++ #define CPU_LOAD_IDX_MAX 5 ++ unsigned long cpu_load[CPU_LOAD_IDX_MAX]; ++#ifdef CONFIG_NO_HZ_COMMON ++#ifdef CONFIG_SMP ++ unsigned long last_load_update_tick; ++ unsigned long last_blocked_load_update_tick; ++ unsigned int has_blocked_load; ++#endif /* CONFIG_SMP */ ++ unsigned int nohz_tick_stopped; ++ atomic_t nohz_flags; ++#endif /* CONFIG_NO_HZ_COMMON */ ++ ++ /* capture load from *all* tasks on this CPU: */ ++ struct load_weight load; ++ unsigned long nr_load_updates; ++ u64 nr_switches; ++ ++ struct cfs_rq cfs; ++ struct rt_rq rt; ++ struct dl_rq dl; ++ ++#ifdef CONFIG_FAIR_GROUP_SCHED ++ /* list of leaf cfs_rq on this CPU: */ ++ struct list_head leaf_cfs_rq_list; ++ struct list_head *tmp_alone_branch; ++#endif /* CONFIG_FAIR_GROUP_SCHED */ ++ ++ /* ++ * This is part of a global counter where only the total sum ++ * over all CPUs matters. A task can increase this counter on ++ * one CPU and if it got migrated afterwards it may decrease ++ * it on another CPU. Always updated under the runqueue lock: ++ */ ++ unsigned long nr_uninterruptible; ++ ++ struct task_struct *curr; ++ struct task_struct *idle; ++ struct task_struct *stop; ++ unsigned long next_balance; ++ struct mm_struct *prev_mm; ++ ++ unsigned int clock_update_flags; ++ u64 clock; ++ u64 clock_task; ++ ++ atomic_t nr_iowait; ++ ++#ifdef CONFIG_MEMBARRIER ++ int membarrier_state; ++#endif ++ ++#ifdef CONFIG_SMP ++ struct root_domain *rd; ++ struct sched_domain *sd; ++ ++ unsigned long cpu_capacity; ++ unsigned long cpu_capacity_orig; ++ ++ struct callback_head *balance_callback; ++ ++ unsigned char idle_balance; ++ ++ /* For active balancing */ ++ int active_balance; ++ int push_cpu; ++ struct cpu_stop_work active_balance_work; ++ ++ /* CPU of this runqueue: */ ++ int cpu; ++ int online; ++ ++ struct list_head cfs_tasks; ++ ++ struct sched_avg avg_rt; ++ struct sched_avg avg_dl; ++#ifdef CONFIG_HAVE_SCHED_AVG_IRQ ++ struct sched_avg avg_irq; ++#endif ++ u64 idle_stamp; ++ u64 avg_idle; ++ ++ /* This is used to determine avg_idle's max value */ ++ u64 max_idle_balance_cost; ++#endif ++ ++#ifdef CONFIG_IRQ_TIME_ACCOUNTING ++ u64 prev_irq_time; ++#endif ++#ifdef CONFIG_PARAVIRT ++ u64 prev_steal_time; ++#endif ++#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING ++ u64 prev_steal_time_rq; ++#endif ++ ++ /* calc_load related fields */ ++ unsigned long calc_load_update; ++ long calc_load_active; ++ ++#ifdef CONFIG_SCHED_HRTICK ++#ifdef CONFIG_SMP ++ int hrtick_csd_pending; ++ call_single_data_t hrtick_csd; ++#endif ++ struct hrtimer hrtick_timer; ++#endif ++ ++#ifdef CONFIG_SCHEDSTATS ++ /* latency stats */ ++ struct sched_info rq_sched_info; ++ unsigned long long rq_cpu_time; ++ /* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */ ++ ++ /* sys_sched_yield() stats */ ++ unsigned int yld_count; ++ ++ /* schedule() stats */ ++ unsigned int sched_count; ++ unsigned int sched_goidle; ++ ++ /* try_to_wake_up() stats */ ++ unsigned int ttwu_count; ++ unsigned int ttwu_local; ++#endif ++ ++#ifdef CONFIG_SMP ++ struct llist_head wake_list; ++#endif ++ ++#ifdef CONFIG_CPU_IDLE ++ /* Must be inspected within a rcu lock section */ ++ struct cpuidle_state *idle_state; ++#endif ++ ++ KABI_RESERVE(1) ++ KABI_RESERVE(2) ++}; ++ ++struct cputime { ++ u64 utime; ++ u64 stime; ++}; ++ ++struct rq_cputime { ++ raw_spinlock_t lock; ++ unsigned long long sum_idle_time; ++ unsigned long long last_entry_idle; ++ struct cputime cpu_prev_time; ++ struct cputime cpu_last_time; ++}; ++ ++DECLARE_PER_CPU(struct rq_cputime, rq_cputimes); ++ ++static inline int cpu_of(struct rq *rq) ++{ ++#ifdef CONFIG_SMP ++ return rq->cpu; ++#else ++ return 0; ++#endif ++} ++ ++ ++#ifdef CONFIG_SCHED_SMT ++extern void __update_idle_core(struct rq *rq); ++ ++static inline void update_idle_core(struct rq *rq) ++{ ++ if (static_branch_unlikely(&sched_smt_present)) ++ __update_idle_core(rq); ++} ++ ++#else ++static inline void update_idle_core(struct rq *rq) { } ++#endif ++ ++DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); ++ ++#define cpu_rq(cpu) (&per_cpu(runqueues, (cpu))) ++#define this_rq() this_cpu_ptr(&runqueues) ++#define task_rq(p) cpu_rq(task_cpu(p)) ++#define cpu_curr(cpu) (cpu_rq(cpu)->curr) ++#define raw_rq() raw_cpu_ptr(&runqueues) ++ ++static inline u64 __rq_clock_broken(struct rq *rq) ++{ ++ return READ_ONCE(rq->clock); ++} ++ ++/* ++ * rq::clock_update_flags bits ++ * ++ * %RQCF_REQ_SKIP - will request skipping of clock update on the next ++ * call to __schedule(). This is an optimisation to avoid ++ * neighbouring rq clock updates. ++ * ++ * %RQCF_ACT_SKIP - is set from inside of __schedule() when skipping is ++ * in effect and calls to update_rq_clock() are being ignored. ++ * ++ * %RQCF_UPDATED - is a debug flag that indicates whether a call has been ++ * made to update_rq_clock() since the last time rq::lock was pinned. ++ * ++ * If inside of __schedule(), clock_update_flags will have been ++ * shifted left (a left shift is a cheap operation for the fast path ++ * to promote %RQCF_REQ_SKIP to %RQCF_ACT_SKIP), so you must use, ++ * ++ * if (rq-clock_update_flags >= RQCF_UPDATED) ++ * ++ * to check if %RQCF_UPADTED is set. It'll never be shifted more than ++ * one position though, because the next rq_unpin_lock() will shift it ++ * back. ++ */ ++#define RQCF_REQ_SKIP 0x01 ++#define RQCF_ACT_SKIP 0x02 ++#define RQCF_UPDATED 0x04 ++ ++static inline void assert_clock_updated(struct rq *rq) ++{ ++ /* ++ * The only reason for not seeing a clock update since the ++ * last rq_pin_lock() is if we're currently skipping updates. ++ */ ++ SCHED_WARN_ON(rq->clock_update_flags < RQCF_ACT_SKIP); ++} ++ ++static inline u64 rq_clock(struct rq *rq) ++{ ++ lockdep_assert_held(&rq->lock); ++ assert_clock_updated(rq); ++ ++ return rq->clock; ++} ++ ++static inline u64 rq_clock_task(struct rq *rq) ++{ ++ lockdep_assert_held(&rq->lock); ++ assert_clock_updated(rq); ++ ++ return rq->clock_task; ++} ++ ++static inline void rq_clock_skip_update(struct rq *rq) ++{ ++ lockdep_assert_held(&rq->lock); ++ rq->clock_update_flags |= RQCF_REQ_SKIP; ++} ++ ++/* ++ * See rt task throttling, which is the only time a skip ++ * request is cancelled. ++ */ ++static inline void rq_clock_cancel_skipupdate(struct rq *rq) ++{ ++ lockdep_assert_held(&rq->lock); ++ rq->clock_update_flags &= ~RQCF_REQ_SKIP; ++} ++ ++struct rq_flags { ++ unsigned long flags; ++ struct pin_cookie cookie; ++#ifdef CONFIG_SCHED_DEBUG ++ /* ++ * A copy of (rq::clock_update_flags & RQCF_UPDATED) for the ++ * current pin context is stashed here in case it needs to be ++ * restored in rq_repin_lock(). ++ */ ++ unsigned int clock_update_flags; ++#endif ++}; ++ ++static inline void rq_pin_lock(struct rq *rq, struct rq_flags *rf) ++{ ++ rf->cookie = lockdep_pin_lock(&rq->lock); ++ ++#ifdef CONFIG_SCHED_DEBUG ++ rq->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP); ++ rf->clock_update_flags = 0; ++#endif ++} ++ ++static inline void rq_unpin_lock(struct rq *rq, struct rq_flags *rf) ++{ ++#ifdef CONFIG_SCHED_DEBUG ++ if (rq->clock_update_flags > RQCF_ACT_SKIP) ++ rf->clock_update_flags = RQCF_UPDATED; ++#endif ++ ++ lockdep_unpin_lock(&rq->lock, rf->cookie); ++} ++ ++static inline void rq_repin_lock(struct rq *rq, struct rq_flags *rf) ++{ ++ lockdep_repin_lock(&rq->lock, rf->cookie); ++ ++#ifdef CONFIG_SCHED_DEBUG ++ /* ++ * Restore the value we stashed in @rf for this pin context. ++ */ ++ rq->clock_update_flags |= rf->clock_update_flags; ++#endif ++} ++ ++#ifdef CONFIG_NUMA ++enum numa_topology_type { ++ NUMA_DIRECT, ++ NUMA_GLUELESS_MESH, ++ NUMA_BACKPLANE, ++}; ++extern enum numa_topology_type sched_numa_topology_type; ++extern int sched_max_numa_distance; ++extern bool find_numa_distance(int distance); ++#endif ++ ++#ifdef CONFIG_NUMA ++extern void sched_init_numa(void); ++extern void sched_domains_numa_masks_set(unsigned int cpu); ++extern void sched_domains_numa_masks_clear(unsigned int cpu); ++#else ++static inline void sched_init_numa(void) { } ++static inline void sched_domains_numa_masks_set(unsigned int cpu) { } ++static inline void sched_domains_numa_masks_clear(unsigned int cpu) { } ++#endif ++ ++#ifdef CONFIG_NUMA_BALANCING ++/* The regions in numa_faults array from task_struct */ ++enum numa_faults_stats { ++ NUMA_MEM = 0, ++ NUMA_CPU, ++ NUMA_MEMBUF, ++ NUMA_CPUBUF ++}; ++extern void sched_setnuma(struct task_struct *p, int node); ++extern int migrate_task_to(struct task_struct *p, int cpu); ++extern int migrate_swap(struct task_struct *p, struct task_struct *t, ++ int cpu, int scpu); ++extern void init_numa_balancing(unsigned long clone_flags, struct task_struct *p); ++#else ++static inline void ++init_numa_balancing(unsigned long clone_flags, struct task_struct *p) ++{ ++} ++#endif /* CONFIG_NUMA_BALANCING */ ++ ++#ifdef CONFIG_SMP ++ ++static inline void ++queue_balance_callback(struct rq *rq, ++ struct callback_head *head, ++ void (*func)(struct rq *rq)) ++{ ++ lockdep_assert_held(&rq->lock); ++ ++ if (unlikely(head->next)) ++ return; ++ ++ head->func = (void (*)(struct callback_head *))func; ++ head->next = rq->balance_callback; ++ rq->balance_callback = head; ++} ++ ++extern void sched_ttwu_pending(void); ++ ++#define rcu_dereference_check_sched_domain(p) \ ++ rcu_dereference_check((p), \ ++ lockdep_is_held(&sched_domains_mutex)) ++ ++/* ++ * The domain tree (rq->sd) is protected by RCU's quiescent state transition. ++ * See detach_destroy_domains: synchronize_sched for details. ++ * ++ * The domain tree of any CPU may only be accessed from within ++ * preempt-disabled sections. ++ */ ++#define for_each_domain(cpu, __sd) \ ++ for (__sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd); \ ++ __sd; __sd = __sd->parent) ++ ++#define for_each_lower_domain(sd) for (; sd; sd = sd->child) ++ ++/** ++ * highest_flag_domain - Return highest sched_domain containing flag. ++ * @cpu: The CPU whose highest level of sched domain is to ++ * be returned. ++ * @flag: The flag to check for the highest sched_domain ++ * for the given CPU. ++ * ++ * Returns the highest sched_domain of a CPU which contains the given flag. ++ */ ++static inline struct sched_domain *highest_flag_domain(int cpu, int flag) ++{ ++ struct sched_domain *sd, *hsd = NULL; ++ ++ for_each_domain(cpu, sd) { ++ if (!(sd->flags & flag)) ++ break; ++ hsd = sd; ++ } ++ ++ return hsd; ++} ++ ++static inline struct sched_domain *lowest_flag_domain(int cpu, int flag) ++{ ++ struct sched_domain *sd; ++ ++ for_each_domain(cpu, sd) { ++ if (sd->flags & flag) ++ break; ++ } ++ ++ return sd; ++} ++ ++DECLARE_PER_CPU(struct sched_domain *, sd_llc); ++DECLARE_PER_CPU(int, sd_llc_size); ++DECLARE_PER_CPU(int, sd_llc_id); ++DECLARE_PER_CPU(struct sched_domain_shared *, sd_llc_shared); ++DECLARE_PER_CPU(struct sched_domain *, sd_numa); ++DECLARE_PER_CPU(struct sched_domain *, sd_asym); ++ ++struct sched_group_capacity { ++ atomic_t ref; ++ /* ++ * CPU capacity of this group, SCHED_CAPACITY_SCALE being max capacity ++ * for a single CPU. ++ */ ++ unsigned long capacity; ++ unsigned long min_capacity; /* Min per-CPU capacity in group */ ++ unsigned long next_update; ++ int imbalance; /* XXX unrelated to capacity but shared group state */ ++ ++#ifdef CONFIG_SCHED_DEBUG ++ int id; ++#endif ++ ++ unsigned long cpumask[0]; /* Balance mask */ ++}; ++ ++struct sched_group { ++ struct sched_group *next; /* Must be a circular list */ ++ atomic_t ref; ++ ++ unsigned int group_weight; ++ struct sched_group_capacity *sgc; ++ int asym_prefer_cpu; /* CPU of highest priority in group */ ++ ++ KABI_RESERVE(1) ++ KABI_RESERVE(2) ++ ++ /* ++ * The CPUs this group covers. ++ * ++ * NOTE: this field is variable length. (Allocated dynamically ++ * by attaching extra space to the end of the structure, ++ * depending on how many CPUs the kernel has booted up with) ++ */ ++ unsigned long cpumask[0]; ++}; ++ ++static inline struct cpumask *sched_group_span(struct sched_group *sg) ++{ ++ return to_cpumask(sg->cpumask); ++} ++ ++/* ++ * See build_balance_mask(). ++ */ ++static inline struct cpumask *group_balance_mask(struct sched_group *sg) ++{ ++ return to_cpumask(sg->sgc->cpumask); ++} ++ ++/** ++ * group_first_cpu - Returns the first CPU in the cpumask of a sched_group. ++ * @group: The group whose first CPU is to be returned. ++ */ ++static inline unsigned int group_first_cpu(struct sched_group *group) ++{ ++ return cpumask_first(sched_group_span(group)); ++} ++ ++extern int group_balance_cpu(struct sched_group *sg); ++ ++#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL) ++void register_sched_domain_sysctl(void); ++void dirty_sched_domain_sysctl(int cpu); ++void unregister_sched_domain_sysctl(void); ++#else ++static inline void register_sched_domain_sysctl(void) ++{ ++} ++static inline void dirty_sched_domain_sysctl(int cpu) ++{ ++} ++static inline void unregister_sched_domain_sysctl(void) ++{ ++} ++#endif ++ ++#else ++ ++static inline void sched_ttwu_pending(void) { } ++ ++#endif /* CONFIG_SMP */ ++ ++#include "stats.h" ++#include "autogroup.h" ++ ++#ifdef CONFIG_CGROUP_SCHED ++ ++/* ++ * Return the group to which this tasks belongs. ++ * ++ * We cannot use task_css() and friends because the cgroup subsystem ++ * changes that value before the cgroup_subsys::attach() method is called, ++ * therefore we cannot pin it and might observe the wrong value. ++ * ++ * The same is true for autogroup's p->signal->autogroup->tg, the autogroup ++ * core changes this before calling sched_move_task(). ++ * ++ * Instead we use a 'copy' which is updated from sched_move_task() while ++ * holding both task_struct::pi_lock and rq::lock. ++ */ ++static inline struct task_group *task_group(struct task_struct *p) ++{ ++ return p->sched_task_group; ++} ++ ++/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */ ++static inline void set_task_rq(struct task_struct *p, unsigned int cpu) ++{ ++#if defined(CONFIG_FAIR_GROUP_SCHED) || defined(CONFIG_RT_GROUP_SCHED) ++ struct task_group *tg = task_group(p); ++#endif ++ ++#ifdef CONFIG_FAIR_GROUP_SCHED ++ set_task_rq_fair(&p->se, p->se.cfs_rq, tg->cfs_rq[cpu]); ++ p->se.cfs_rq = tg->cfs_rq[cpu]; ++ p->se.parent = tg->se[cpu]; ++#endif ++ ++#ifdef CONFIG_RT_GROUP_SCHED ++ p->rt.rt_rq = tg->rt_rq[cpu]; ++ p->rt.parent = tg->rt_se[cpu]; ++#endif ++} ++ ++#else /* CONFIG_CGROUP_SCHED */ ++ ++static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { } ++static inline struct task_group *task_group(struct task_struct *p) ++{ ++ return NULL; ++} ++ ++#endif /* CONFIG_CGROUP_SCHED */ ++ ++static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) ++{ ++ set_task_rq(p, cpu); ++#ifdef CONFIG_SMP ++ /* ++ * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be ++ * successfuly executed on another CPU. We must ensure that updates of ++ * per-task data have been completed by this moment. ++ */ ++ smp_wmb(); ++#ifdef CONFIG_THREAD_INFO_IN_TASK ++ WRITE_ONCE(p->cpu, cpu); ++#else ++ WRITE_ONCE(task_thread_info(p)->cpu, cpu); ++#endif ++ p->wake_cpu = cpu; ++#endif ++} ++ ++/* ++ * Tunables that become constants when CONFIG_SCHED_DEBUG is off: ++ */ ++#ifdef CONFIG_SCHED_DEBUG ++# include ++# define const_debug __read_mostly ++#else ++# define const_debug const ++#endif ++ ++#define SCHED_FEAT(name, enabled) \ ++ __SCHED_FEAT_##name , ++ ++enum { ++#include "features.h" ++ __SCHED_FEAT_NR, ++}; ++ ++#undef SCHED_FEAT ++ ++#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_JUMP_LABEL) ++ ++/* ++ * To support run-time toggling of sched features, all the translation units ++ * (but core.c) reference the sysctl_sched_features defined in core.c. ++ */ ++extern const_debug unsigned int sysctl_sched_features; ++ ++#define SCHED_FEAT(name, enabled) \ ++static __always_inline bool static_branch_##name(struct static_key *key) \ ++{ \ ++ return static_key_##enabled(key); \ ++} ++ ++#include "features.h" ++#undef SCHED_FEAT ++ ++extern struct static_key sched_feat_keys[__SCHED_FEAT_NR]; ++#define sched_feat(x) (static_branch_##x(&sched_feat_keys[__SCHED_FEAT_##x])) ++ ++#else /* !(SCHED_DEBUG && CONFIG_JUMP_LABEL) */ ++ ++/* ++ * Each translation unit has its own copy of sysctl_sched_features to allow ++ * constants propagation at compile time and compiler optimization based on ++ * features default. ++ */ ++#define SCHED_FEAT(name, enabled) \ ++ (1UL << __SCHED_FEAT_##name) * enabled | ++static const_debug __maybe_unused unsigned int sysctl_sched_features = ++#include "features.h" ++ 0; ++#undef SCHED_FEAT ++ ++#define sched_feat(x) !!(sysctl_sched_features & (1UL << __SCHED_FEAT_##x)) ++ ++#endif /* SCHED_DEBUG && CONFIG_JUMP_LABEL */ ++ ++extern struct static_key_false sched_numa_balancing; ++extern struct static_key_false sched_schedstats; ++ ++static inline u64 global_rt_period(void) ++{ ++ return (u64)sysctl_sched_rt_period * NSEC_PER_USEC; ++} ++ ++static inline u64 global_rt_runtime(void) ++{ ++ if (sysctl_sched_rt_runtime < 0) ++ return RUNTIME_INF; ++ ++ return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC; ++} ++ ++static inline int task_current(struct rq *rq, struct task_struct *p) ++{ ++ return rq->curr == p; ++} ++ ++static inline int task_running(struct rq *rq, struct task_struct *p) ++{ ++#ifdef CONFIG_SMP ++ return p->on_cpu; ++#else ++ return task_current(rq, p); ++#endif ++} ++ ++static inline int task_on_rq_queued(struct task_struct *p) ++{ ++ return p->on_rq == TASK_ON_RQ_QUEUED; ++} ++ ++static inline int task_on_rq_migrating(struct task_struct *p) ++{ ++ return READ_ONCE(p->on_rq) == TASK_ON_RQ_MIGRATING; ++} ++ ++/* ++ * wake flags ++ */ ++#define WF_SYNC 0x01 /* Waker goes to sleep after wakeup */ ++#define WF_FORK 0x02 /* Child wakeup after fork */ ++#define WF_MIGRATED 0x4 /* Internal use, task got migrated */ ++ ++/* ++ * To aid in avoiding the subversion of "niceness" due to uneven distribution ++ * of tasks with abnormal "nice" values across CPUs the contribution that ++ * each task makes to its run queue's load is weighted according to its ++ * scheduling class and "nice" value. For SCHED_NORMAL tasks this is just a ++ * scaled version of the new time slice allocation that they receive on time ++ * slice expiry etc. ++ */ ++ ++#define WEIGHT_IDLEPRIO 3 ++#define WMULT_IDLEPRIO 1431655765 ++ ++extern const int sched_prio_to_weight[40]; ++extern const u32 sched_prio_to_wmult[40]; ++ ++/* ++ * {de,en}queue flags: ++ * ++ * DEQUEUE_SLEEP - task is no longer runnable ++ * ENQUEUE_WAKEUP - task just became runnable ++ * ++ * SAVE/RESTORE - an otherwise spurious dequeue/enqueue, done to ensure tasks ++ * are in a known state which allows modification. Such pairs ++ * should preserve as much state as possible. ++ * ++ * MOVE - paired with SAVE/RESTORE, explicitly does not preserve the location ++ * in the runqueue. ++ * ++ * ENQUEUE_HEAD - place at front of runqueue (tail if not specified) ++ * ENQUEUE_REPLENISH - CBS (replenish runtime and postpone deadline) ++ * ENQUEUE_MIGRATED - the task was migrated during wakeup ++ * ++ */ ++ ++#define DEQUEUE_SLEEP 0x01 ++#define DEQUEUE_SAVE 0x02 /* Matches ENQUEUE_RESTORE */ ++#define DEQUEUE_MOVE 0x04 /* Matches ENQUEUE_MOVE */ ++#define DEQUEUE_NOCLOCK 0x08 /* Matches ENQUEUE_NOCLOCK */ ++ ++#define ENQUEUE_WAKEUP 0x01 ++#define ENQUEUE_RESTORE 0x02 ++#define ENQUEUE_MOVE 0x04 ++#define ENQUEUE_NOCLOCK 0x08 ++ ++#define ENQUEUE_HEAD 0x10 ++#define ENQUEUE_REPLENISH 0x20 ++#ifdef CONFIG_SMP ++#define ENQUEUE_MIGRATED 0x40 ++#else ++#define ENQUEUE_MIGRATED 0x00 ++#endif ++ ++#define RETRY_TASK ((void *)-1UL) ++ ++struct sched_class { ++ const struct sched_class *next; ++ ++ void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags); ++ void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags); ++ void (*yield_task) (struct rq *rq); ++ bool (*yield_to_task)(struct rq *rq, struct task_struct *p, bool preempt); ++ ++ void (*check_preempt_curr)(struct rq *rq, struct task_struct *p, int flags); ++ ++ /* ++ * It is the responsibility of the pick_next_task() method that will ++ * return the next task to call put_prev_task() on the @prev task or ++ * something equivalent. ++ * ++ * May return RETRY_TASK when it finds a higher prio class has runnable ++ * tasks. ++ */ ++ struct task_struct * (*pick_next_task)(struct rq *rq, ++ struct task_struct *prev, ++ struct rq_flags *rf); ++ void (*put_prev_task)(struct rq *rq, struct task_struct *p); ++ ++#ifdef CONFIG_SMP ++ int (*select_task_rq)(struct task_struct *p, int task_cpu, int sd_flag, int flags); ++ void (*migrate_task_rq)(struct task_struct *p, int new_cpu); ++ ++ void (*task_woken)(struct rq *this_rq, struct task_struct *task); ++ ++ void (*set_cpus_allowed)(struct task_struct *p, ++ const struct cpumask *newmask); ++ ++ void (*rq_online)(struct rq *rq); ++ void (*rq_offline)(struct rq *rq); ++#endif ++ ++ void (*set_curr_task)(struct rq *rq); ++ void (*task_tick)(struct rq *rq, struct task_struct *p, int queued); ++ void (*task_fork)(struct task_struct *p); ++ void (*task_dead)(struct task_struct *p); ++ ++ /* ++ * The switched_from() call is allowed to drop rq->lock, therefore we ++ * cannot assume the switched_from/switched_to pair is serliazed by ++ * rq->lock. They are however serialized by p->pi_lock. ++ */ ++ void (*switched_from)(struct rq *this_rq, struct task_struct *task); ++ void (*switched_to) (struct rq *this_rq, struct task_struct *task); ++ void (*prio_changed) (struct rq *this_rq, struct task_struct *task, ++ int oldprio); ++ ++ unsigned int (*get_rr_interval)(struct rq *rq, ++ struct task_struct *task); ++ ++ void (*update_curr)(struct rq *rq); ++ ++#define TASK_SET_GROUP 0 ++#define TASK_MOVE_GROUP 1 ++ ++#ifdef CONFIG_FAIR_GROUP_SCHED ++ void (*task_change_group)(struct task_struct *p, int type); ++#endif ++ ++ KABI_RESERVE(1) ++ KABI_RESERVE(2) ++}; ++ ++static inline void put_prev_task(struct rq *rq, struct task_struct *prev) ++{ ++ prev->sched_class->put_prev_task(rq, prev); ++} ++ ++static inline void set_curr_task(struct rq *rq, struct task_struct *curr) ++{ ++ curr->sched_class->set_curr_task(rq); ++} ++ ++#ifdef CONFIG_SMP ++#define sched_class_highest (&stop_sched_class) ++#else ++#define sched_class_highest (&dl_sched_class) ++#endif ++#define for_each_class(class) \ ++ for (class = sched_class_highest; class; class = class->next) ++ ++extern const struct sched_class stop_sched_class; ++extern const struct sched_class dl_sched_class; ++extern const struct sched_class rt_sched_class; ++extern const struct sched_class fair_sched_class; ++extern const struct sched_class idle_sched_class; ++ ++ ++#ifdef CONFIG_SMP ++ ++extern void update_group_capacity(struct sched_domain *sd, int cpu); ++ ++extern void trigger_load_balance(struct rq *rq); ++ ++extern void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask); ++ ++#endif ++ ++#ifdef CONFIG_CPU_IDLE ++static inline void idle_set_state(struct rq *rq, ++ struct cpuidle_state *idle_state) ++{ ++ rq->idle_state = idle_state; ++} ++ ++static inline struct cpuidle_state *idle_get_state(struct rq *rq) ++{ ++ SCHED_WARN_ON(!rcu_read_lock_held()); ++ ++ return rq->idle_state; ++} ++#else ++static inline void idle_set_state(struct rq *rq, ++ struct cpuidle_state *idle_state) ++{ ++} ++ ++static inline struct cpuidle_state *idle_get_state(struct rq *rq) ++{ ++ return NULL; ++} ++#endif ++ ++extern void schedule_idle(void); ++ ++extern void sysrq_sched_debug_show(void); ++extern void sched_init_granularity(void); ++extern void update_max_interval(void); ++ ++extern void init_sched_dl_class(void); ++extern void init_sched_rt_class(void); ++extern void init_sched_fair_class(void); ++ ++extern void reweight_task(struct task_struct *p, int prio); ++ ++extern void resched_curr(struct rq *rq); ++extern void resched_cpu(int cpu); ++ ++extern struct rt_bandwidth def_rt_bandwidth; ++extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime); ++ ++extern struct dl_bandwidth def_dl_bandwidth; ++extern void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime); ++extern void init_dl_task_timer(struct sched_dl_entity *dl_se); ++extern void init_dl_inactive_task_timer(struct sched_dl_entity *dl_se); ++extern void init_dl_rq_bw_ratio(struct dl_rq *dl_rq); ++ ++#define BW_SHIFT 20 ++#define BW_UNIT (1 << BW_SHIFT) ++#define RATIO_SHIFT 8 ++unsigned long to_ratio(u64 period, u64 runtime); ++ ++extern void init_entity_runnable_average(struct sched_entity *se); ++extern void post_init_entity_util_avg(struct sched_entity *se); ++ ++#ifdef CONFIG_NO_HZ_FULL ++extern bool sched_can_stop_tick(struct rq *rq); ++extern int __init sched_tick_offload_init(void); ++ ++/* ++ * Tick may be needed by tasks in the runqueue depending on their policy and ++ * requirements. If tick is needed, lets send the target an IPI to kick it out of ++ * nohz mode if necessary. ++ */ ++static inline void sched_update_tick_dependency(struct rq *rq) ++{ ++ int cpu; ++ ++ if (!tick_nohz_full_enabled()) ++ return; ++ ++ cpu = cpu_of(rq); ++ ++ if (!tick_nohz_full_cpu(cpu)) ++ return; ++ ++ if (sched_can_stop_tick(rq)) ++ tick_nohz_dep_clear_cpu(cpu, TICK_DEP_BIT_SCHED); ++ else ++ tick_nohz_dep_set_cpu(cpu, TICK_DEP_BIT_SCHED); ++} ++#else ++static inline int sched_tick_offload_init(void) { return 0; } ++static inline void sched_update_tick_dependency(struct rq *rq) { } ++#endif ++ ++static inline void add_nr_running(struct rq *rq, unsigned count) ++{ ++ unsigned prev_nr = rq->nr_running; ++ ++ rq->nr_running = prev_nr + count; ++ ++ if (prev_nr < 2 && rq->nr_running >= 2) { ++#ifdef CONFIG_SMP ++ if (!rq->rd->overload) ++ rq->rd->overload = true; ++#endif ++ } ++ ++ sched_update_tick_dependency(rq); ++} ++ ++static inline void sub_nr_running(struct rq *rq, unsigned count) ++{ ++ rq->nr_running -= count; ++ /* Check if we still need preemption */ ++ sched_update_tick_dependency(rq); ++} ++ ++extern void update_rq_clock(struct rq *rq); ++ ++extern void activate_task(struct rq *rq, struct task_struct *p, int flags); ++extern void deactivate_task(struct rq *rq, struct task_struct *p, int flags); ++ ++extern void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags); ++ ++extern const_debug unsigned int sysctl_sched_nr_migrate; ++extern const_debug unsigned int sysctl_sched_migration_cost; ++ ++#ifdef CONFIG_SCHED_HRTICK ++ ++/* ++ * Use hrtick when: ++ * - enabled by features ++ * - hrtimer is actually high res ++ */ ++static inline int hrtick_enabled(struct rq *rq) ++{ ++ if (!sched_feat(HRTICK)) ++ return 0; ++ if (!cpu_active(cpu_of(rq))) ++ return 0; ++ return hrtimer_is_hres_active(&rq->hrtick_timer); ++} ++ ++void hrtick_start(struct rq *rq, u64 delay); ++ ++#else ++ ++static inline int hrtick_enabled(struct rq *rq) ++{ ++ return 0; ++} ++ ++#endif /* CONFIG_SCHED_HRTICK */ ++ ++#ifndef arch_scale_freq_capacity ++static __always_inline ++unsigned long arch_scale_freq_capacity(int cpu) ++{ ++ return SCHED_CAPACITY_SCALE; ++} ++#endif ++ ++#ifdef CONFIG_SMP ++#ifndef arch_scale_cpu_capacity ++static __always_inline ++unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu) ++{ ++ if (sd && (sd->flags & SD_SHARE_CPUCAPACITY) && (sd->span_weight > 1)) ++ return sd->smt_gain / sd->span_weight; ++ ++ return SCHED_CAPACITY_SCALE; ++} ++#endif ++#else ++#ifndef arch_scale_cpu_capacity ++static __always_inline ++unsigned long arch_scale_cpu_capacity(void __always_unused *sd, int cpu) ++{ ++ return SCHED_CAPACITY_SCALE; ++} ++#endif ++#endif ++ ++struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags *rf) ++ __acquires(rq->lock); ++ ++struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf) ++ __acquires(p->pi_lock) ++ __acquires(rq->lock); ++ ++static inline void __task_rq_unlock(struct rq *rq, struct rq_flags *rf) ++ __releases(rq->lock) ++{ ++ rq_unpin_lock(rq, rf); ++ raw_spin_unlock(&rq->lock); ++} ++ ++static inline void ++task_rq_unlock(struct rq *rq, struct task_struct *p, struct rq_flags *rf) ++ __releases(rq->lock) ++ __releases(p->pi_lock) ++{ ++ rq_unpin_lock(rq, rf); ++ raw_spin_unlock(&rq->lock); ++ raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags); ++} ++ ++static inline void ++rq_lock_irqsave(struct rq *rq, struct rq_flags *rf) ++ __acquires(rq->lock) ++{ ++ raw_spin_lock_irqsave(&rq->lock, rf->flags); ++ rq_pin_lock(rq, rf); ++} ++ ++static inline void ++rq_lock_irq(struct rq *rq, struct rq_flags *rf) ++ __acquires(rq->lock) ++{ ++ raw_spin_lock_irq(&rq->lock); ++ rq_pin_lock(rq, rf); ++} ++ ++static inline void ++rq_lock(struct rq *rq, struct rq_flags *rf) ++ __acquires(rq->lock) ++{ ++ raw_spin_lock(&rq->lock); ++ rq_pin_lock(rq, rf); ++} ++ ++static inline void ++rq_relock(struct rq *rq, struct rq_flags *rf) ++ __acquires(rq->lock) ++{ ++ raw_spin_lock(&rq->lock); ++ rq_repin_lock(rq, rf); ++} ++ ++static inline void ++rq_unlock_irqrestore(struct rq *rq, struct rq_flags *rf) ++ __releases(rq->lock) ++{ ++ rq_unpin_lock(rq, rf); ++ raw_spin_unlock_irqrestore(&rq->lock, rf->flags); ++} ++ ++static inline void ++rq_unlock_irq(struct rq *rq, struct rq_flags *rf) ++ __releases(rq->lock) ++{ ++ rq_unpin_lock(rq, rf); ++ raw_spin_unlock_irq(&rq->lock); ++} ++ ++static inline void ++rq_unlock(struct rq *rq, struct rq_flags *rf) ++ __releases(rq->lock) ++{ ++ rq_unpin_lock(rq, rf); ++ raw_spin_unlock(&rq->lock); ++} ++ ++#ifdef CONFIG_SMP ++#ifdef CONFIG_PREEMPT ++ ++static inline void double_rq_lock(struct rq *rq1, struct rq *rq2); ++ ++/* ++ * fair double_lock_balance: Safely acquires both rq->locks in a fair ++ * way at the expense of forcing extra atomic operations in all ++ * invocations. This assures that the double_lock is acquired using the ++ * same underlying policy as the spinlock_t on this architecture, which ++ * reduces latency compared to the unfair variant below. However, it ++ * also adds more overhead and therefore may reduce throughput. ++ */ ++static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest) ++ __releases(this_rq->lock) ++ __acquires(busiest->lock) ++ __acquires(this_rq->lock) ++{ ++ raw_spin_unlock(&this_rq->lock); ++ double_rq_lock(this_rq, busiest); ++ ++ return 1; ++} ++ ++#else ++/* ++ * Unfair double_lock_balance: Optimizes throughput at the expense of ++ * latency by eliminating extra atomic operations when the locks are ++ * already in proper order on entry. This favors lower CPU-ids and will ++ * grant the double lock to lower CPUs over higher ids under contention, ++ * regardless of entry order into the function. ++ */ ++static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest) ++ __releases(this_rq->lock) ++ __acquires(busiest->lock) ++ __acquires(this_rq->lock) ++{ ++ int ret = 0; ++ ++ if (unlikely(!raw_spin_trylock(&busiest->lock))) { ++ if (busiest < this_rq) { ++ raw_spin_unlock(&this_rq->lock); ++ raw_spin_lock(&busiest->lock); ++ raw_spin_lock_nested(&this_rq->lock, ++ SINGLE_DEPTH_NESTING); ++ ret = 1; ++ } else ++ raw_spin_lock_nested(&busiest->lock, ++ SINGLE_DEPTH_NESTING); ++ } ++ return ret; ++} ++ ++#endif /* CONFIG_PREEMPT */ ++ ++/* ++ * double_lock_balance - lock the busiest runqueue, this_rq is locked already. ++ */ ++static inline int double_lock_balance(struct rq *this_rq, struct rq *busiest) ++{ ++ if (unlikely(!irqs_disabled())) { ++ /* printk() doesn't work well under rq->lock */ ++ raw_spin_unlock(&this_rq->lock); ++ BUG_ON(1); ++ } ++ ++ return _double_lock_balance(this_rq, busiest); ++} ++ ++static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest) ++ __releases(busiest->lock) ++{ ++ raw_spin_unlock(&busiest->lock); ++ lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_); ++} ++ ++static inline void double_lock(spinlock_t *l1, spinlock_t *l2) ++{ ++ if (l1 > l2) ++ swap(l1, l2); ++ ++ spin_lock(l1); ++ spin_lock_nested(l2, SINGLE_DEPTH_NESTING); ++} ++ ++static inline void double_lock_irq(spinlock_t *l1, spinlock_t *l2) ++{ ++ if (l1 > l2) ++ swap(l1, l2); ++ ++ spin_lock_irq(l1); ++ spin_lock_nested(l2, SINGLE_DEPTH_NESTING); ++} ++ ++static inline void double_raw_lock(raw_spinlock_t *l1, raw_spinlock_t *l2) ++{ ++ if (l1 > l2) ++ swap(l1, l2); ++ ++ raw_spin_lock(l1); ++ raw_spin_lock_nested(l2, SINGLE_DEPTH_NESTING); ++} ++ ++/* ++ * double_rq_lock - safely lock two runqueues ++ * ++ * Note this does not disable interrupts like task_rq_lock, ++ * you need to do so manually before calling. ++ */ ++static inline void double_rq_lock(struct rq *rq1, struct rq *rq2) ++ __acquires(rq1->lock) ++ __acquires(rq2->lock) ++{ ++ BUG_ON(!irqs_disabled()); ++ if (rq1 == rq2) { ++ raw_spin_lock(&rq1->lock); ++ __acquire(rq2->lock); /* Fake it out ;) */ ++ } else { ++ if (rq1 < rq2) { ++ raw_spin_lock(&rq1->lock); ++ raw_spin_lock_nested(&rq2->lock, SINGLE_DEPTH_NESTING); ++ } else { ++ raw_spin_lock(&rq2->lock); ++ raw_spin_lock_nested(&rq1->lock, SINGLE_DEPTH_NESTING); ++ } ++ } ++} ++ ++/* ++ * double_rq_unlock - safely unlock two runqueues ++ * ++ * Note this does not restore interrupts like task_rq_unlock, ++ * you need to do so manually after calling. ++ */ ++static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2) ++ __releases(rq1->lock) ++ __releases(rq2->lock) ++{ ++ raw_spin_unlock(&rq1->lock); ++ if (rq1 != rq2) ++ raw_spin_unlock(&rq2->lock); ++ else ++ __release(rq2->lock); ++} ++ ++extern void set_rq_online (struct rq *rq); ++extern void set_rq_offline(struct rq *rq); ++extern bool sched_smp_initialized; ++ ++#else /* CONFIG_SMP */ ++ ++/* ++ * double_rq_lock - safely lock two runqueues ++ * ++ * Note this does not disable interrupts like task_rq_lock, ++ * you need to do so manually before calling. ++ */ ++static inline void double_rq_lock(struct rq *rq1, struct rq *rq2) ++ __acquires(rq1->lock) ++ __acquires(rq2->lock) ++{ ++ BUG_ON(!irqs_disabled()); ++ BUG_ON(rq1 != rq2); ++ raw_spin_lock(&rq1->lock); ++ __acquire(rq2->lock); /* Fake it out ;) */ ++} ++ ++/* ++ * double_rq_unlock - safely unlock two runqueues ++ * ++ * Note this does not restore interrupts like task_rq_unlock, ++ * you need to do so manually after calling. ++ */ ++static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2) ++ __releases(rq1->lock) ++ __releases(rq2->lock) ++{ ++ BUG_ON(rq1 != rq2); ++ raw_spin_unlock(&rq1->lock); ++ __release(rq2->lock); ++} ++ ++#endif ++ ++extern struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq); ++extern struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq); ++ ++#ifdef CONFIG_SCHED_DEBUG ++extern bool sched_debug_enabled; ++ ++extern void print_cfs_stats(struct seq_file *m, int cpu); ++extern void print_rt_stats(struct seq_file *m, int cpu); ++extern void print_dl_stats(struct seq_file *m, int cpu); ++extern void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq); ++extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq); ++extern void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq); ++#ifdef CONFIG_NUMA_BALANCING ++extern void ++show_numa_stats(struct task_struct *p, struct seq_file *m); ++extern void ++print_numa_stats(struct seq_file *m, int node, unsigned long tsf, ++ unsigned long tpf, unsigned long gsf, unsigned long gpf); ++#endif /* CONFIG_NUMA_BALANCING */ ++#endif /* CONFIG_SCHED_DEBUG */ ++ ++extern void init_cfs_rq(struct cfs_rq *cfs_rq); ++extern void init_rt_rq(struct rt_rq *rt_rq); ++extern void init_dl_rq(struct dl_rq *dl_rq); ++ ++extern void cfs_bandwidth_usage_inc(void); ++extern void cfs_bandwidth_usage_dec(void); ++ ++#ifdef CONFIG_NO_HZ_COMMON ++#define NOHZ_BALANCE_KICK_BIT 0 ++#define NOHZ_STATS_KICK_BIT 1 ++ ++#define NOHZ_BALANCE_KICK BIT(NOHZ_BALANCE_KICK_BIT) ++#define NOHZ_STATS_KICK BIT(NOHZ_STATS_KICK_BIT) ++ ++#define NOHZ_KICK_MASK (NOHZ_BALANCE_KICK | NOHZ_STATS_KICK) ++ ++#define nohz_flags(cpu) (&cpu_rq(cpu)->nohz_flags) ++ ++extern void nohz_balance_exit_idle(struct rq *rq); ++#else ++static inline void nohz_balance_exit_idle(struct rq *rq) { } ++#endif ++ ++ ++#ifdef CONFIG_SMP ++static inline ++void __dl_update(struct dl_bw *dl_b, s64 bw) ++{ ++ struct root_domain *rd = container_of(dl_b, struct root_domain, dl_bw); ++ int i; ++ ++ RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(), ++ "sched RCU must be held"); ++ for_each_cpu_and(i, rd->span, cpu_active_mask) { ++ struct rq *rq = cpu_rq(i); ++ ++ rq->dl.extra_bw += bw; ++ } ++} ++#else ++static inline ++void __dl_update(struct dl_bw *dl_b, s64 bw) ++{ ++ struct dl_rq *dl = container_of(dl_b, struct dl_rq, dl_bw); ++ ++ dl->extra_bw += bw; ++} ++#endif ++ ++ ++#ifdef CONFIG_IRQ_TIME_ACCOUNTING ++struct irqtime { ++ u64 total; ++ u64 tick_delta; ++ u64 irq_start_time; ++ struct u64_stats_sync sync; ++}; ++ ++DECLARE_PER_CPU(struct irqtime, cpu_irqtime); ++ ++/* ++ * Returns the irqtime minus the softirq time computed by ksoftirqd. ++ * Otherwise ksoftirqd's sum_exec_runtime is substracted its own runtime ++ * and never move forward. ++ */ ++static inline u64 irq_time_read(int cpu) ++{ ++ struct irqtime *irqtime = &per_cpu(cpu_irqtime, cpu); ++ unsigned int seq; ++ u64 total; ++ ++ do { ++ seq = __u64_stats_fetch_begin(&irqtime->sync); ++ total = irqtime->total; ++ } while (__u64_stats_fetch_retry(&irqtime->sync, seq)); ++ ++ return total; ++} ++#endif /* CONFIG_IRQ_TIME_ACCOUNTING */ ++ ++#ifdef CONFIG_CPU_FREQ ++DECLARE_PER_CPU(struct update_util_data *, cpufreq_update_util_data); ++ ++/** ++ * cpufreq_update_util - Take a note about CPU utilization changes. ++ * @rq: Runqueue to carry out the update for. ++ * @flags: Update reason flags. ++ * ++ * This function is called by the scheduler on the CPU whose utilization is ++ * being updated. ++ * ++ * It can only be called from RCU-sched read-side critical sections. ++ * ++ * The way cpufreq is currently arranged requires it to evaluate the CPU ++ * performance state (frequency/voltage) on a regular basis to prevent it from ++ * being stuck in a completely inadequate performance level for too long. ++ * That is not guaranteed to happen if the updates are only triggered from CFS ++ * and DL, though, because they may not be coming in if only RT tasks are ++ * active all the time (or there are RT tasks only). ++ * ++ * As a workaround for that issue, this function is called periodically by the ++ * RT sched class to trigger extra cpufreq updates to prevent it from stalling, ++ * but that really is a band-aid. Going forward it should be replaced with ++ * solutions targeted more specifically at RT tasks. ++ */ ++static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) ++{ ++ struct update_util_data *data; ++ ++ data = rcu_dereference_sched(*per_cpu_ptr(&cpufreq_update_util_data, ++ cpu_of(rq))); ++ if (data) ++ data->func(data, rq_clock(rq), flags); ++} ++#else ++static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {} ++#endif /* CONFIG_CPU_FREQ */ ++ ++#ifdef arch_scale_freq_capacity ++# ifndef arch_scale_freq_invariant ++# define arch_scale_freq_invariant() true ++# endif ++#else ++# define arch_scale_freq_invariant() false ++#endif ++ ++#ifdef CONFIG_CPU_FREQ_GOV_SCHEDUTIL ++static inline unsigned long cpu_bw_dl(struct rq *rq) ++{ ++ return (rq->dl.running_bw * SCHED_CAPACITY_SCALE) >> BW_SHIFT; ++} ++ ++static inline unsigned long cpu_util_dl(struct rq *rq) ++{ ++ return READ_ONCE(rq->avg_dl.util_avg); ++} ++ ++static inline unsigned long cpu_util_cfs(struct rq *rq) ++{ ++ unsigned long util = READ_ONCE(rq->cfs.avg.util_avg); ++ ++ if (sched_feat(UTIL_EST)) { ++ util = max_t(unsigned long, util, ++ READ_ONCE(rq->cfs.avg.util_est.enqueued)); ++ } ++ ++ return util; ++} ++ ++static inline unsigned long cpu_util_rt(struct rq *rq) ++{ ++ return READ_ONCE(rq->avg_rt.util_avg); ++} ++#endif ++ ++#ifdef CONFIG_HAVE_SCHED_AVG_IRQ ++static inline unsigned long cpu_util_irq(struct rq *rq) ++{ ++ return rq->avg_irq.util_avg; ++} ++ ++static inline ++unsigned long scale_irq_capacity(unsigned long util, unsigned long irq, unsigned long max) ++{ ++ util *= (max - irq); ++ util /= max; ++ ++ return util; ++ ++} ++#else ++static inline unsigned long cpu_util_irq(struct rq *rq) ++{ ++ return 0; ++} ++ ++static inline ++unsigned long scale_irq_capacity(unsigned long util, unsigned long irq, unsigned long max) ++{ ++ return util; ++} ++#endif ++ ++#ifdef CONFIG_MEMBARRIER ++/* ++ * The scheduler provides memory barriers required by membarrier between: ++ * - prior user-space memory accesses and store to rq->membarrier_state, ++ * - store to rq->membarrier_state and following user-space memory accesses. ++ * In the same way it provides those guarantees around store to rq->curr. ++ */ ++static inline void membarrier_switch_mm(struct rq *rq, ++ struct mm_struct *prev_mm, ++ struct mm_struct *next_mm) ++{ ++ int membarrier_state; ++ ++ if (prev_mm == next_mm) ++ return; ++ ++ membarrier_state = atomic_read(&next_mm->membarrier_state); ++ if (READ_ONCE(rq->membarrier_state) == membarrier_state) ++ return; ++ ++ WRITE_ONCE(rq->membarrier_state, membarrier_state); ++} ++#else ++static inline void membarrier_switch_mm(struct rq *rq, ++ struct mm_struct *prev_mm, ++ struct mm_struct *next_mm) ++{ ++} ++#endif +diff -uprN kernel/kernel/sched/sched.h.rej kernel_new/kernel/sched/sched.h.rej +--- kernel/kernel/sched/sched.h.rej 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/kernel/sched/sched.h.rej 2021-04-01 18:28:07.812863113 +0800 +@@ -0,0 +1,10 @@ ++--- kernel/sched/sched.h 2019-12-18 03:36:04.000000000 +0800 +++++ kernel/sched/sched.h 2021-03-22 09:21:43.222415443 +0800 ++@@ -64,6 +64,7 @@ ++ #include ++ #include ++ #include +++#include ++ ++ #include ++ +diff -uprN kernel/kernel/sched/wait.c kernel_new/kernel/sched/wait.c +--- kernel/kernel/sched/wait.c 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/kernel/sched/wait.c 2021-04-01 18:28:07.812863113 +0800 +@@ -79,6 +79,8 @@ static int __wake_up_common(struct wait_ + } else + curr = list_first_entry(&wq_head->head, wait_queue_entry_t, entry); + ++ ipipe_root_only(); ++ + if (&curr->entry == &wq_head->head) + return nr_exclusive; + +diff -uprN kernel/kernel/signal.c kernel_new/kernel/signal.c +--- kernel/kernel/signal.c 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/kernel/signal.c 2021-04-01 18:28:07.812863113 +0800 +@@ -32,6 +32,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -748,6 +749,10 @@ still_pending: + void signal_wake_up_state(struct task_struct *t, unsigned int state) + { + set_tsk_thread_flag(t, TIF_SIGPENDING); ++ ++ /* TIF_SIGPENDING must be prior to reporting. */ ++ __ipipe_report_sigwake(t); ++ + /* + * TASK_WAKEKILL also means wake it up in the stopped/traced/killable + * case. We don't check t->state here because there is a race with it +@@ -968,8 +973,11 @@ static inline bool wants_signal(int sig, + if (sig == SIGKILL) + return true; + +- if (task_is_stopped_or_traced(p)) ++ if (task_is_stopped_or_traced(p)) { ++ if (!signal_pending(p)) ++ __ipipe_report_sigwake(p); + return false; ++ } + + return task_curr(p) || !signal_pending(p); + } +diff -uprN kernel/kernel/signal.c.orig kernel_new/kernel/signal.c.orig +--- kernel/kernel/signal.c.orig 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/kernel/signal.c.orig 2020-12-21 21:59:22.000000000 +0800 +@@ -0,0 +1,4089 @@ ++/* ++ * linux/kernel/signal.c ++ * ++ * Copyright (C) 1991, 1992 Linus Torvalds ++ * ++ * 1997-11-02 Modified for POSIX.1b signals by Richard Henderson ++ * ++ * 2003-06-02 Jim Houston - Concurrent Computer Corp. ++ * Changes to use preallocated sigqueue structures ++ * to allow signals to be sent reliably. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#define CREATE_TRACE_POINTS ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include "audit.h" /* audit_signal_info() */ ++ ++EXPORT_TRACEPOINT_SYMBOL(signal_generate); ++ ++/* ++ * SLAB caches for signal bits. ++ */ ++ ++static struct kmem_cache *sigqueue_cachep; ++ ++int print_fatal_signals __read_mostly; ++ ++static void __user *sig_handler(struct task_struct *t, int sig) ++{ ++ return t->sighand->action[sig - 1].sa.sa_handler; ++} ++ ++static inline bool sig_handler_ignored(void __user *handler, int sig) ++{ ++ /* Is it explicitly or implicitly ignored? */ ++ return handler == SIG_IGN || ++ (handler == SIG_DFL && sig_kernel_ignore(sig)); ++} ++ ++static bool sig_task_ignored(struct task_struct *t, int sig, bool force) ++{ ++ void __user *handler; ++ ++ handler = sig_handler(t, sig); ++ ++ /* SIGKILL and SIGSTOP may not be sent to the global init */ ++ if (unlikely(is_global_init(t) && sig_kernel_only(sig))) ++ return true; ++ ++ if (unlikely(t->signal->flags & SIGNAL_UNKILLABLE) && ++ handler == SIG_DFL && !(force && sig_kernel_only(sig))) ++ return true; ++ ++ return sig_handler_ignored(handler, sig); ++} ++ ++static bool sig_ignored(struct task_struct *t, int sig, bool force) ++{ ++ /* ++ * Blocked signals are never ignored, since the ++ * signal handler may change by the time it is ++ * unblocked. ++ */ ++ if (sigismember(&t->blocked, sig) || sigismember(&t->real_blocked, sig)) ++ return false; ++ ++ /* ++ * Tracers may want to know about even ignored signal unless it ++ * is SIGKILL which can't be reported anyway but can be ignored ++ * by SIGNAL_UNKILLABLE task. ++ */ ++ if (t->ptrace && sig != SIGKILL) ++ return false; ++ ++ return sig_task_ignored(t, sig, force); ++} ++ ++/* ++ * Re-calculate pending state from the set of locally pending ++ * signals, globally pending signals, and blocked signals. ++ */ ++static inline bool has_pending_signals(sigset_t *signal, sigset_t *blocked) ++{ ++ unsigned long ready; ++ long i; ++ ++ switch (_NSIG_WORDS) { ++ default: ++ for (i = _NSIG_WORDS, ready = 0; --i >= 0 ;) ++ ready |= signal->sig[i] &~ blocked->sig[i]; ++ break; ++ ++ case 4: ready = signal->sig[3] &~ blocked->sig[3]; ++ ready |= signal->sig[2] &~ blocked->sig[2]; ++ ready |= signal->sig[1] &~ blocked->sig[1]; ++ ready |= signal->sig[0] &~ blocked->sig[0]; ++ break; ++ ++ case 2: ready = signal->sig[1] &~ blocked->sig[1]; ++ ready |= signal->sig[0] &~ blocked->sig[0]; ++ break; ++ ++ case 1: ready = signal->sig[0] &~ blocked->sig[0]; ++ } ++ return ready != 0; ++} ++ ++#define PENDING(p,b) has_pending_signals(&(p)->signal, (b)) ++ ++static bool recalc_sigpending_tsk(struct task_struct *t) ++{ ++ if ((t->jobctl & JOBCTL_PENDING_MASK) || ++ PENDING(&t->pending, &t->blocked) || ++ PENDING(&t->signal->shared_pending, &t->blocked)) { ++ set_tsk_thread_flag(t, TIF_SIGPENDING); ++ return true; ++ } ++ ++ /* ++ * We must never clear the flag in another thread, or in current ++ * when it's possible the current syscall is returning -ERESTART*. ++ * So we don't clear it here, and only callers who know they should do. ++ */ ++ return false; ++} ++ ++/* ++ * After recalculating TIF_SIGPENDING, we need to make sure the task wakes up. ++ * This is superfluous when called on current, the wakeup is a harmless no-op. ++ */ ++void recalc_sigpending_and_wake(struct task_struct *t) ++{ ++ if (recalc_sigpending_tsk(t)) ++ signal_wake_up(t, 0); ++} ++ ++void recalc_sigpending(void) ++{ ++ if (!recalc_sigpending_tsk(current) && !freezing(current) && ++ !klp_patch_pending(current)) ++ clear_thread_flag(TIF_SIGPENDING); ++ ++} ++ ++void calculate_sigpending(void) ++{ ++ /* Have any signals or users of TIF_SIGPENDING been delayed ++ * until after fork? ++ */ ++ spin_lock_irq(¤t->sighand->siglock); ++ set_tsk_thread_flag(current, TIF_SIGPENDING); ++ recalc_sigpending(); ++ spin_unlock_irq(¤t->sighand->siglock); ++} ++ ++/* Given the mask, find the first available signal that should be serviced. */ ++ ++#define SYNCHRONOUS_MASK \ ++ (sigmask(SIGSEGV) | sigmask(SIGBUS) | sigmask(SIGILL) | \ ++ sigmask(SIGTRAP) | sigmask(SIGFPE) | sigmask(SIGSYS)) ++ ++int next_signal(struct sigpending *pending, sigset_t *mask) ++{ ++ unsigned long i, *s, *m, x; ++ int sig = 0; ++ ++ s = pending->signal.sig; ++ m = mask->sig; ++ ++ /* ++ * Handle the first word specially: it contains the ++ * synchronous signals that need to be dequeued first. ++ */ ++ x = *s &~ *m; ++ if (x) { ++ if (x & SYNCHRONOUS_MASK) ++ x &= SYNCHRONOUS_MASK; ++ sig = ffz(~x) + 1; ++ return sig; ++ } ++ ++ switch (_NSIG_WORDS) { ++ default: ++ for (i = 1; i < _NSIG_WORDS; ++i) { ++ x = *++s &~ *++m; ++ if (!x) ++ continue; ++ sig = ffz(~x) + i*_NSIG_BPW + 1; ++ break; ++ } ++ break; ++ ++ case 2: ++ x = s[1] &~ m[1]; ++ if (!x) ++ break; ++ sig = ffz(~x) + _NSIG_BPW + 1; ++ break; ++ ++ case 1: ++ /* Nothing to do */ ++ break; ++ } ++ ++ return sig; ++} ++ ++static inline void print_dropped_signal(int sig) ++{ ++ static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 10); ++ ++ if (!print_fatal_signals) ++ return; ++ ++ if (!__ratelimit(&ratelimit_state)) ++ return; ++ ++ pr_info("%s/%d: reached RLIMIT_SIGPENDING, dropped signal %d\n", ++ current->comm, current->pid, sig); ++} ++ ++/** ++ * task_set_jobctl_pending - set jobctl pending bits ++ * @task: target task ++ * @mask: pending bits to set ++ * ++ * Clear @mask from @task->jobctl. @mask must be subset of ++ * %JOBCTL_PENDING_MASK | %JOBCTL_STOP_CONSUME | %JOBCTL_STOP_SIGMASK | ++ * %JOBCTL_TRAPPING. If stop signo is being set, the existing signo is ++ * cleared. If @task is already being killed or exiting, this function ++ * becomes noop. ++ * ++ * CONTEXT: ++ * Must be called with @task->sighand->siglock held. ++ * ++ * RETURNS: ++ * %true if @mask is set, %false if made noop because @task was dying. ++ */ ++bool task_set_jobctl_pending(struct task_struct *task, unsigned long mask) ++{ ++ BUG_ON(mask & ~(JOBCTL_PENDING_MASK | JOBCTL_STOP_CONSUME | ++ JOBCTL_STOP_SIGMASK | JOBCTL_TRAPPING)); ++ BUG_ON((mask & JOBCTL_TRAPPING) && !(mask & JOBCTL_PENDING_MASK)); ++ ++ if (unlikely(fatal_signal_pending(task) || (task->flags & PF_EXITING))) ++ return false; ++ ++ if (mask & JOBCTL_STOP_SIGMASK) ++ task->jobctl &= ~JOBCTL_STOP_SIGMASK; ++ ++ task->jobctl |= mask; ++ return true; ++} ++ ++/** ++ * task_clear_jobctl_trapping - clear jobctl trapping bit ++ * @task: target task ++ * ++ * If JOBCTL_TRAPPING is set, a ptracer is waiting for us to enter TRACED. ++ * Clear it and wake up the ptracer. Note that we don't need any further ++ * locking. @task->siglock guarantees that @task->parent points to the ++ * ptracer. ++ * ++ * CONTEXT: ++ * Must be called with @task->sighand->siglock held. ++ */ ++void task_clear_jobctl_trapping(struct task_struct *task) ++{ ++ if (unlikely(task->jobctl & JOBCTL_TRAPPING)) { ++ task->jobctl &= ~JOBCTL_TRAPPING; ++ smp_mb(); /* advised by wake_up_bit() */ ++ wake_up_bit(&task->jobctl, JOBCTL_TRAPPING_BIT); ++ } ++} ++ ++/** ++ * task_clear_jobctl_pending - clear jobctl pending bits ++ * @task: target task ++ * @mask: pending bits to clear ++ * ++ * Clear @mask from @task->jobctl. @mask must be subset of ++ * %JOBCTL_PENDING_MASK. If %JOBCTL_STOP_PENDING is being cleared, other ++ * STOP bits are cleared together. ++ * ++ * If clearing of @mask leaves no stop or trap pending, this function calls ++ * task_clear_jobctl_trapping(). ++ * ++ * CONTEXT: ++ * Must be called with @task->sighand->siglock held. ++ */ ++void task_clear_jobctl_pending(struct task_struct *task, unsigned long mask) ++{ ++ BUG_ON(mask & ~JOBCTL_PENDING_MASK); ++ ++ if (mask & JOBCTL_STOP_PENDING) ++ mask |= JOBCTL_STOP_CONSUME | JOBCTL_STOP_DEQUEUED; ++ ++ task->jobctl &= ~mask; ++ ++ if (!(task->jobctl & JOBCTL_PENDING_MASK)) ++ task_clear_jobctl_trapping(task); ++} ++ ++/** ++ * task_participate_group_stop - participate in a group stop ++ * @task: task participating in a group stop ++ * ++ * @task has %JOBCTL_STOP_PENDING set and is participating in a group stop. ++ * Group stop states are cleared and the group stop count is consumed if ++ * %JOBCTL_STOP_CONSUME was set. If the consumption completes the group ++ * stop, the appropriate %SIGNAL_* flags are set. ++ * ++ * CONTEXT: ++ * Must be called with @task->sighand->siglock held. ++ * ++ * RETURNS: ++ * %true if group stop completion should be notified to the parent, %false ++ * otherwise. ++ */ ++static bool task_participate_group_stop(struct task_struct *task) ++{ ++ struct signal_struct *sig = task->signal; ++ bool consume = task->jobctl & JOBCTL_STOP_CONSUME; ++ ++ WARN_ON_ONCE(!(task->jobctl & JOBCTL_STOP_PENDING)); ++ ++ task_clear_jobctl_pending(task, JOBCTL_STOP_PENDING); ++ ++ if (!consume) ++ return false; ++ ++ if (!WARN_ON_ONCE(sig->group_stop_count == 0)) ++ sig->group_stop_count--; ++ ++ /* ++ * Tell the caller to notify completion iff we are entering into a ++ * fresh group stop. Read comment in do_signal_stop() for details. ++ */ ++ if (!sig->group_stop_count && !(sig->flags & SIGNAL_STOP_STOPPED)) { ++ signal_set_stop_flags(sig, SIGNAL_STOP_STOPPED); ++ return true; ++ } ++ return false; ++} ++ ++void task_join_group_stop(struct task_struct *task) ++{ ++ /* Have the new thread join an on-going signal group stop */ ++ unsigned long jobctl = current->jobctl; ++ if (jobctl & JOBCTL_STOP_PENDING) { ++ struct signal_struct *sig = current->signal; ++ unsigned long signr = jobctl & JOBCTL_STOP_SIGMASK; ++ unsigned long gstop = JOBCTL_STOP_PENDING | JOBCTL_STOP_CONSUME; ++ if (task_set_jobctl_pending(task, signr | gstop)) { ++ sig->group_stop_count++; ++ } ++ } ++} ++ ++/* ++ * allocate a new signal queue record ++ * - this may be called without locks if and only if t == current, otherwise an ++ * appropriate lock must be held to stop the target task from exiting ++ */ ++static struct sigqueue * ++__sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimit) ++{ ++ struct sigqueue *q = NULL; ++ struct user_struct *user; ++ int sigpending; ++ ++ /* ++ * Protect access to @t credentials. This can go away when all ++ * callers hold rcu read lock. ++ * ++ * NOTE! A pending signal will hold on to the user refcount, ++ * and we get/put the refcount only when the sigpending count ++ * changes from/to zero. ++ */ ++ rcu_read_lock(); ++ user = __task_cred(t)->user; ++ sigpending = atomic_inc_return(&user->sigpending); ++ if (sigpending == 1) ++ get_uid(user); ++ rcu_read_unlock(); ++ ++ if (override_rlimit || likely(sigpending <= task_rlimit(t, RLIMIT_SIGPENDING))) { ++ q = kmem_cache_alloc(sigqueue_cachep, flags); ++ } else { ++ print_dropped_signal(sig); ++ } ++ ++ if (unlikely(q == NULL)) { ++ if (atomic_dec_and_test(&user->sigpending)) ++ free_uid(user); ++ } else { ++ INIT_LIST_HEAD(&q->list); ++ q->flags = 0; ++ q->user = user; ++ } ++ ++ return q; ++} ++ ++static void __sigqueue_free(struct sigqueue *q) ++{ ++ if (q->flags & SIGQUEUE_PREALLOC) ++ return; ++ if (atomic_dec_and_test(&q->user->sigpending)) ++ free_uid(q->user); ++ kmem_cache_free(sigqueue_cachep, q); ++} ++ ++void flush_sigqueue(struct sigpending *queue) ++{ ++ struct sigqueue *q; ++ ++ sigemptyset(&queue->signal); ++ while (!list_empty(&queue->list)) { ++ q = list_entry(queue->list.next, struct sigqueue , list); ++ list_del_init(&q->list); ++ __sigqueue_free(q); ++ } ++} ++ ++/* ++ * Flush all pending signals for this kthread. ++ */ ++void flush_signals(struct task_struct *t) ++{ ++ unsigned long flags; ++ ++ spin_lock_irqsave(&t->sighand->siglock, flags); ++ clear_tsk_thread_flag(t, TIF_SIGPENDING); ++ flush_sigqueue(&t->pending); ++ flush_sigqueue(&t->signal->shared_pending); ++ spin_unlock_irqrestore(&t->sighand->siglock, flags); ++} ++ ++#ifdef CONFIG_POSIX_TIMERS ++static void __flush_itimer_signals(struct sigpending *pending) ++{ ++ sigset_t signal, retain; ++ struct sigqueue *q, *n; ++ ++ signal = pending->signal; ++ sigemptyset(&retain); ++ ++ list_for_each_entry_safe(q, n, &pending->list, list) { ++ int sig = q->info.si_signo; ++ ++ if (likely(q->info.si_code != SI_TIMER)) { ++ sigaddset(&retain, sig); ++ } else { ++ sigdelset(&signal, sig); ++ list_del_init(&q->list); ++ __sigqueue_free(q); ++ } ++ } ++ ++ sigorsets(&pending->signal, &signal, &retain); ++} ++ ++void flush_itimer_signals(void) ++{ ++ struct task_struct *tsk = current; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&tsk->sighand->siglock, flags); ++ __flush_itimer_signals(&tsk->pending); ++ __flush_itimer_signals(&tsk->signal->shared_pending); ++ spin_unlock_irqrestore(&tsk->sighand->siglock, flags); ++} ++#endif ++ ++void ignore_signals(struct task_struct *t) ++{ ++ int i; ++ ++ for (i = 0; i < _NSIG; ++i) ++ t->sighand->action[i].sa.sa_handler = SIG_IGN; ++ ++ flush_signals(t); ++} ++ ++/* ++ * Flush all handlers for a task. ++ */ ++ ++void ++flush_signal_handlers(struct task_struct *t, int force_default) ++{ ++ int i; ++ struct k_sigaction *ka = &t->sighand->action[0]; ++ for (i = _NSIG ; i != 0 ; i--) { ++ if (force_default || ka->sa.sa_handler != SIG_IGN) ++ ka->sa.sa_handler = SIG_DFL; ++ ka->sa.sa_flags = 0; ++#ifdef __ARCH_HAS_SA_RESTORER ++ ka->sa.sa_restorer = NULL; ++#endif ++ sigemptyset(&ka->sa.sa_mask); ++ ka++; ++ } ++} ++ ++bool unhandled_signal(struct task_struct *tsk, int sig) ++{ ++ void __user *handler = tsk->sighand->action[sig-1].sa.sa_handler; ++ if (is_global_init(tsk)) ++ return true; ++ ++ if (handler != SIG_IGN && handler != SIG_DFL) ++ return false; ++ ++ /* if ptraced, let the tracer determine */ ++ return !tsk->ptrace; ++} ++ ++static void collect_signal(int sig, struct sigpending *list, siginfo_t *info, ++ bool *resched_timer) ++{ ++ struct sigqueue *q, *first = NULL; ++ ++ /* ++ * Collect the siginfo appropriate to this signal. Check if ++ * there is another siginfo for the same signal. ++ */ ++ list_for_each_entry(q, &list->list, list) { ++ if (q->info.si_signo == sig) { ++ if (first) ++ goto still_pending; ++ first = q; ++ } ++ } ++ ++ sigdelset(&list->signal, sig); ++ ++ if (first) { ++still_pending: ++ list_del_init(&first->list); ++ copy_siginfo(info, &first->info); ++ ++ *resched_timer = ++ (first->flags & SIGQUEUE_PREALLOC) && ++ (info->si_code == SI_TIMER) && ++ (info->si_sys_private); ++ ++ __sigqueue_free(first); ++ } else { ++ /* ++ * Ok, it wasn't in the queue. This must be ++ * a fast-pathed signal or we must have been ++ * out of queue space. So zero out the info. ++ */ ++ clear_siginfo(info); ++ info->si_signo = sig; ++ info->si_errno = 0; ++ info->si_code = SI_USER; ++ info->si_pid = 0; ++ info->si_uid = 0; ++ } ++} ++ ++static int __dequeue_signal(struct sigpending *pending, sigset_t *mask, ++ siginfo_t *info, bool *resched_timer) ++{ ++ int sig = next_signal(pending, mask); ++ ++ if (sig) ++ collect_signal(sig, pending, info, resched_timer); ++ return sig; ++} ++ ++/* ++ * Dequeue a signal and return the element to the caller, which is ++ * expected to free it. ++ * ++ * All callers have to hold the siglock. ++ */ ++int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) ++{ ++ bool resched_timer = false; ++ int signr; ++ ++ /* We only dequeue private signals from ourselves, we don't let ++ * signalfd steal them ++ */ ++ signr = __dequeue_signal(&tsk->pending, mask, info, &resched_timer); ++ if (!signr) { ++ signr = __dequeue_signal(&tsk->signal->shared_pending, ++ mask, info, &resched_timer); ++#ifdef CONFIG_POSIX_TIMERS ++ /* ++ * itimer signal ? ++ * ++ * itimers are process shared and we restart periodic ++ * itimers in the signal delivery path to prevent DoS ++ * attacks in the high resolution timer case. This is ++ * compliant with the old way of self-restarting ++ * itimers, as the SIGALRM is a legacy signal and only ++ * queued once. Changing the restart behaviour to ++ * restart the timer in the signal dequeue path is ++ * reducing the timer noise on heavy loaded !highres ++ * systems too. ++ */ ++ if (unlikely(signr == SIGALRM)) { ++ struct hrtimer *tmr = &tsk->signal->real_timer; ++ ++ if (!hrtimer_is_queued(tmr) && ++ tsk->signal->it_real_incr != 0) { ++ hrtimer_forward(tmr, tmr->base->get_time(), ++ tsk->signal->it_real_incr); ++ hrtimer_restart(tmr); ++ } ++ } ++#endif ++ } ++ ++ recalc_sigpending(); ++ if (!signr) ++ return 0; ++ ++ if (unlikely(sig_kernel_stop(signr))) { ++ /* ++ * Set a marker that we have dequeued a stop signal. Our ++ * caller might release the siglock and then the pending ++ * stop signal it is about to process is no longer in the ++ * pending bitmasks, but must still be cleared by a SIGCONT ++ * (and overruled by a SIGKILL). So those cases clear this ++ * shared flag after we've set it. Note that this flag may ++ * remain set after the signal we return is ignored or ++ * handled. That doesn't matter because its only purpose ++ * is to alert stop-signal processing code when another ++ * processor has come along and cleared the flag. ++ */ ++ current->jobctl |= JOBCTL_STOP_DEQUEUED; ++ } ++#ifdef CONFIG_POSIX_TIMERS ++ if (resched_timer) { ++ /* ++ * Release the siglock to ensure proper locking order ++ * of timer locks outside of siglocks. Note, we leave ++ * irqs disabled here, since the posix-timers code is ++ * about to disable them again anyway. ++ */ ++ spin_unlock(&tsk->sighand->siglock); ++ posixtimer_rearm(info); ++ spin_lock(&tsk->sighand->siglock); ++ ++ /* Don't expose the si_sys_private value to userspace */ ++ info->si_sys_private = 0; ++ } ++#endif ++ return signr; ++} ++ ++static int dequeue_synchronous_signal(siginfo_t *info) ++{ ++ struct task_struct *tsk = current; ++ struct sigpending *pending = &tsk->pending; ++ struct sigqueue *q, *sync = NULL; ++ ++ /* ++ * Might a synchronous signal be in the queue? ++ */ ++ if (!((pending->signal.sig[0] & ~tsk->blocked.sig[0]) & SYNCHRONOUS_MASK)) ++ return 0; ++ ++ /* ++ * Return the first synchronous signal in the queue. ++ */ ++ list_for_each_entry(q, &pending->list, list) { ++ /* Synchronous signals have a postive si_code */ ++ if ((q->info.si_code > SI_USER) && ++ (sigmask(q->info.si_signo) & SYNCHRONOUS_MASK)) { ++ sync = q; ++ goto next; ++ } ++ } ++ return 0; ++next: ++ /* ++ * Check if there is another siginfo for the same signal. ++ */ ++ list_for_each_entry_continue(q, &pending->list, list) { ++ if (q->info.si_signo == sync->info.si_signo) ++ goto still_pending; ++ } ++ ++ sigdelset(&pending->signal, sync->info.si_signo); ++ recalc_sigpending(); ++still_pending: ++ list_del_init(&sync->list); ++ copy_siginfo(info, &sync->info); ++ __sigqueue_free(sync); ++ return info->si_signo; ++} ++ ++/* ++ * Tell a process that it has a new active signal.. ++ * ++ * NOTE! we rely on the previous spin_lock to ++ * lock interrupts for us! We can only be called with ++ * "siglock" held, and the local interrupt must ++ * have been disabled when that got acquired! ++ * ++ * No need to set need_resched since signal event passing ++ * goes through ->blocked ++ */ ++void signal_wake_up_state(struct task_struct *t, unsigned int state) ++{ ++ set_tsk_thread_flag(t, TIF_SIGPENDING); ++ /* ++ * TASK_WAKEKILL also means wake it up in the stopped/traced/killable ++ * case. We don't check t->state here because there is a race with it ++ * executing another processor and just now entering stopped state. ++ * By using wake_up_state, we ensure the process will wake up and ++ * handle its death signal. ++ */ ++ if (!wake_up_state(t, state | TASK_INTERRUPTIBLE)) ++ kick_process(t); ++} ++ ++/* ++ * Remove signals in mask from the pending set and queue. ++ * Returns 1 if any signals were found. ++ * ++ * All callers must be holding the siglock. ++ */ ++static void flush_sigqueue_mask(sigset_t *mask, struct sigpending *s) ++{ ++ struct sigqueue *q, *n; ++ sigset_t m; ++ ++ sigandsets(&m, mask, &s->signal); ++ if (sigisemptyset(&m)) ++ return; ++ ++ sigandnsets(&s->signal, &s->signal, mask); ++ list_for_each_entry_safe(q, n, &s->list, list) { ++ if (sigismember(mask, q->info.si_signo)) { ++ list_del_init(&q->list); ++ __sigqueue_free(q); ++ } ++ } ++} ++ ++static inline int is_si_special(const struct siginfo *info) ++{ ++ return info <= SEND_SIG_FORCED; ++} ++ ++static inline bool si_fromuser(const struct siginfo *info) ++{ ++ return info == SEND_SIG_NOINFO || ++ (!is_si_special(info) && SI_FROMUSER(info)); ++} ++ ++/* ++ * called with RCU read lock from check_kill_permission() ++ */ ++static bool kill_ok_by_cred(struct task_struct *t) ++{ ++ const struct cred *cred = current_cred(); ++ const struct cred *tcred = __task_cred(t); ++ ++ return uid_eq(cred->euid, tcred->suid) || ++ uid_eq(cred->euid, tcred->uid) || ++ uid_eq(cred->uid, tcred->suid) || ++ uid_eq(cred->uid, tcred->uid) || ++ ns_capable(tcred->user_ns, CAP_KILL); ++} ++ ++/* ++ * Bad permissions for sending the signal ++ * - the caller must hold the RCU read lock ++ */ ++static int check_kill_permission(int sig, struct siginfo *info, ++ struct task_struct *t) ++{ ++ struct pid *sid; ++ int error; ++ ++ if (!valid_signal(sig)) ++ return -EINVAL; ++ ++ if (!si_fromuser(info)) ++ return 0; ++ ++ error = audit_signal_info(sig, t); /* Let audit system see the signal */ ++ if (error) ++ return error; ++ ++ if (!same_thread_group(current, t) && ++ !kill_ok_by_cred(t)) { ++ switch (sig) { ++ case SIGCONT: ++ sid = task_session(t); ++ /* ++ * We don't return the error if sid == NULL. The ++ * task was unhashed, the caller must notice this. ++ */ ++ if (!sid || sid == task_session(current)) ++ break; ++ default: ++ return -EPERM; ++ } ++ } ++ ++ return security_task_kill(t, info, sig, NULL); ++} ++ ++/** ++ * ptrace_trap_notify - schedule trap to notify ptracer ++ * @t: tracee wanting to notify tracer ++ * ++ * This function schedules sticky ptrace trap which is cleared on the next ++ * TRAP_STOP to notify ptracer of an event. @t must have been seized by ++ * ptracer. ++ * ++ * If @t is running, STOP trap will be taken. If trapped for STOP and ++ * ptracer is listening for events, tracee is woken up so that it can ++ * re-trap for the new event. If trapped otherwise, STOP trap will be ++ * eventually taken without returning to userland after the existing traps ++ * are finished by PTRACE_CONT. ++ * ++ * CONTEXT: ++ * Must be called with @task->sighand->siglock held. ++ */ ++static void ptrace_trap_notify(struct task_struct *t) ++{ ++ WARN_ON_ONCE(!(t->ptrace & PT_SEIZED)); ++ assert_spin_locked(&t->sighand->siglock); ++ ++ task_set_jobctl_pending(t, JOBCTL_TRAP_NOTIFY); ++ ptrace_signal_wake_up(t, t->jobctl & JOBCTL_LISTENING); ++} ++ ++/* ++ * Handle magic process-wide effects of stop/continue signals. Unlike ++ * the signal actions, these happen immediately at signal-generation ++ * time regardless of blocking, ignoring, or handling. This does the ++ * actual continuing for SIGCONT, but not the actual stopping for stop ++ * signals. The process stop is done as a signal action for SIG_DFL. ++ * ++ * Returns true if the signal should be actually delivered, otherwise ++ * it should be dropped. ++ */ ++static bool prepare_signal(int sig, struct task_struct *p, bool force) ++{ ++ struct signal_struct *signal = p->signal; ++ struct task_struct *t; ++ sigset_t flush; ++ ++ if (signal->flags & (SIGNAL_GROUP_EXIT | SIGNAL_GROUP_COREDUMP)) { ++ if (!(signal->flags & SIGNAL_GROUP_EXIT)) ++ return sig == SIGKILL; ++ /* ++ * The process is in the middle of dying, nothing to do. ++ */ ++ } else if (sig_kernel_stop(sig)) { ++ /* ++ * This is a stop signal. Remove SIGCONT from all queues. ++ */ ++ siginitset(&flush, sigmask(SIGCONT)); ++ flush_sigqueue_mask(&flush, &signal->shared_pending); ++ for_each_thread(p, t) ++ flush_sigqueue_mask(&flush, &t->pending); ++ } else if (sig == SIGCONT) { ++ unsigned int why; ++ /* ++ * Remove all stop signals from all queues, wake all threads. ++ */ ++ siginitset(&flush, SIG_KERNEL_STOP_MASK); ++ flush_sigqueue_mask(&flush, &signal->shared_pending); ++ for_each_thread(p, t) { ++ flush_sigqueue_mask(&flush, &t->pending); ++ task_clear_jobctl_pending(t, JOBCTL_STOP_PENDING); ++ if (likely(!(t->ptrace & PT_SEIZED))) ++ wake_up_state(t, __TASK_STOPPED); ++ else ++ ptrace_trap_notify(t); ++ } ++ ++ /* ++ * Notify the parent with CLD_CONTINUED if we were stopped. ++ * ++ * If we were in the middle of a group stop, we pretend it ++ * was already finished, and then continued. Since SIGCHLD ++ * doesn't queue we report only CLD_STOPPED, as if the next ++ * CLD_CONTINUED was dropped. ++ */ ++ why = 0; ++ if (signal->flags & SIGNAL_STOP_STOPPED) ++ why |= SIGNAL_CLD_CONTINUED; ++ else if (signal->group_stop_count) ++ why |= SIGNAL_CLD_STOPPED; ++ ++ if (why) { ++ /* ++ * The first thread which returns from do_signal_stop() ++ * will take ->siglock, notice SIGNAL_CLD_MASK, and ++ * notify its parent. See get_signal_to_deliver(). ++ */ ++ signal_set_stop_flags(signal, why | SIGNAL_STOP_CONTINUED); ++ signal->group_stop_count = 0; ++ signal->group_exit_code = 0; ++ } ++ } ++ ++ return !sig_ignored(p, sig, force); ++} ++ ++/* ++ * Test if P wants to take SIG. After we've checked all threads with this, ++ * it's equivalent to finding no threads not blocking SIG. Any threads not ++ * blocking SIG were ruled out because they are not running and already ++ * have pending signals. Such threads will dequeue from the shared queue ++ * as soon as they're available, so putting the signal on the shared queue ++ * will be equivalent to sending it to one such thread. ++ */ ++static inline bool wants_signal(int sig, struct task_struct *p) ++{ ++ if (sigismember(&p->blocked, sig)) ++ return false; ++ ++ if (p->flags & PF_EXITING) ++ return false; ++ ++ if (sig == SIGKILL) ++ return true; ++ ++ if (task_is_stopped_or_traced(p)) ++ return false; ++ ++ return task_curr(p) || !signal_pending(p); ++} ++ ++static void complete_signal(int sig, struct task_struct *p, enum pid_type type) ++{ ++ struct signal_struct *signal = p->signal; ++ struct task_struct *t; ++ ++ /* ++ * Now find a thread we can wake up to take the signal off the queue. ++ * ++ * If the main thread wants the signal, it gets first crack. ++ * Probably the least surprising to the average bear. ++ */ ++ if (wants_signal(sig, p)) ++ t = p; ++ else if ((type == PIDTYPE_PID) || thread_group_empty(p)) ++ /* ++ * There is just one thread and it does not need to be woken. ++ * It will dequeue unblocked signals before it runs again. ++ */ ++ return; ++ else { ++ /* ++ * Otherwise try to find a suitable thread. ++ */ ++ t = signal->curr_target; ++ while (!wants_signal(sig, t)) { ++ t = next_thread(t); ++ if (t == signal->curr_target) ++ /* ++ * No thread needs to be woken. ++ * Any eligible threads will see ++ * the signal in the queue soon. ++ */ ++ return; ++ } ++ signal->curr_target = t; ++ } ++ ++ /* ++ * Found a killable thread. If the signal will be fatal, ++ * then start taking the whole group down immediately. ++ */ ++ if (sig_fatal(p, sig) && ++ !(signal->flags & SIGNAL_GROUP_EXIT) && ++ !sigismember(&t->real_blocked, sig) && ++ (sig == SIGKILL || !p->ptrace)) { ++ /* ++ * This signal will be fatal to the whole group. ++ */ ++ if (!sig_kernel_coredump(sig)) { ++ /* ++ * Start a group exit and wake everybody up. ++ * This way we don't have other threads ++ * running and doing things after a slower ++ * thread has the fatal signal pending. ++ */ ++ signal->flags = SIGNAL_GROUP_EXIT; ++ signal->group_exit_code = sig; ++ signal->group_stop_count = 0; ++ t = p; ++ do { ++ task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK); ++ sigaddset(&t->pending.signal, SIGKILL); ++ signal_wake_up(t, 1); ++ } while_each_thread(p, t); ++ return; ++ } ++ } ++ ++ /* ++ * The signal is already in the shared-pending queue. ++ * Tell the chosen thread to wake up and dequeue it. ++ */ ++ signal_wake_up(t, sig == SIGKILL); ++ return; ++} ++ ++static inline bool legacy_queue(struct sigpending *signals, int sig) ++{ ++ return (sig < SIGRTMIN) && sigismember(&signals->signal, sig); ++} ++ ++#ifdef CONFIG_USER_NS ++static inline void userns_fixup_signal_uid(struct siginfo *info, struct task_struct *t) ++{ ++ if (current_user_ns() == task_cred_xxx(t, user_ns)) ++ return; ++ ++ if (SI_FROMKERNEL(info)) ++ return; ++ ++ rcu_read_lock(); ++ info->si_uid = from_kuid_munged(task_cred_xxx(t, user_ns), ++ make_kuid(current_user_ns(), info->si_uid)); ++ rcu_read_unlock(); ++} ++#else ++static inline void userns_fixup_signal_uid(struct siginfo *info, struct task_struct *t) ++{ ++ return; ++} ++#endif ++ ++static int __send_signal(int sig, struct siginfo *info, struct task_struct *t, ++ enum pid_type type, int from_ancestor_ns) ++{ ++ struct sigpending *pending; ++ struct sigqueue *q; ++ int override_rlimit; ++ int ret = 0, result; ++ ++ assert_spin_locked(&t->sighand->siglock); ++ ++ result = TRACE_SIGNAL_IGNORED; ++ if (!prepare_signal(sig, t, ++ from_ancestor_ns || (info == SEND_SIG_PRIV) || (info == SEND_SIG_FORCED))) ++ goto ret; ++ ++ pending = (type != PIDTYPE_PID) ? &t->signal->shared_pending : &t->pending; ++ /* ++ * Short-circuit ignored signals and support queuing ++ * exactly one non-rt signal, so that we can get more ++ * detailed information about the cause of the signal. ++ */ ++ result = TRACE_SIGNAL_ALREADY_PENDING; ++ if (legacy_queue(pending, sig)) ++ goto ret; ++ ++ result = TRACE_SIGNAL_DELIVERED; ++ /* ++ * fast-pathed signals for kernel-internal things like SIGSTOP ++ * or SIGKILL. ++ */ ++ if (info == SEND_SIG_FORCED) ++ goto out_set; ++ ++ /* ++ * Real-time signals must be queued if sent by sigqueue, or ++ * some other real-time mechanism. It is implementation ++ * defined whether kill() does so. We attempt to do so, on ++ * the principle of least surprise, but since kill is not ++ * allowed to fail with EAGAIN when low on memory we just ++ * make sure at least one signal gets delivered and don't ++ * pass on the info struct. ++ */ ++ if (sig < SIGRTMIN) ++ override_rlimit = (is_si_special(info) || info->si_code >= 0); ++ else ++ override_rlimit = 0; ++ ++ q = __sigqueue_alloc(sig, t, GFP_ATOMIC, override_rlimit); ++ if (q) { ++ list_add_tail(&q->list, &pending->list); ++ switch ((unsigned long) info) { ++ case (unsigned long) SEND_SIG_NOINFO: ++ clear_siginfo(&q->info); ++ q->info.si_signo = sig; ++ q->info.si_errno = 0; ++ q->info.si_code = SI_USER; ++ q->info.si_pid = task_tgid_nr_ns(current, ++ task_active_pid_ns(t)); ++ q->info.si_uid = from_kuid_munged(current_user_ns(), current_uid()); ++ break; ++ case (unsigned long) SEND_SIG_PRIV: ++ clear_siginfo(&q->info); ++ q->info.si_signo = sig; ++ q->info.si_errno = 0; ++ q->info.si_code = SI_KERNEL; ++ q->info.si_pid = 0; ++ q->info.si_uid = 0; ++ break; ++ default: ++ copy_siginfo(&q->info, info); ++ if (from_ancestor_ns) ++ q->info.si_pid = 0; ++ break; ++ } ++ ++ userns_fixup_signal_uid(&q->info, t); ++ ++ } else if (!is_si_special(info)) { ++ if (sig >= SIGRTMIN && info->si_code != SI_USER) { ++ /* ++ * Queue overflow, abort. We may abort if the ++ * signal was rt and sent by user using something ++ * other than kill(). ++ */ ++ result = TRACE_SIGNAL_OVERFLOW_FAIL; ++ ret = -EAGAIN; ++ goto ret; ++ } else { ++ /* ++ * This is a silent loss of information. We still ++ * send the signal, but the *info bits are lost. ++ */ ++ result = TRACE_SIGNAL_LOSE_INFO; ++ } ++ } ++ ++out_set: ++ signalfd_notify(t, sig); ++ sigaddset(&pending->signal, sig); ++ ++ /* Let multiprocess signals appear after on-going forks */ ++ if (type > PIDTYPE_TGID) { ++ struct multiprocess_signals *delayed; ++ hlist_for_each_entry(delayed, &t->signal->multiprocess, node) { ++ sigset_t *signal = &delayed->signal; ++ /* Can't queue both a stop and a continue signal */ ++ if (sig == SIGCONT) ++ sigdelsetmask(signal, SIG_KERNEL_STOP_MASK); ++ else if (sig_kernel_stop(sig)) ++ sigdelset(signal, SIGCONT); ++ sigaddset(signal, sig); ++ } ++ } ++ ++ complete_signal(sig, t, type); ++ret: ++ trace_signal_generate(sig, info, t, type != PIDTYPE_PID, result); ++ return ret; ++} ++ ++static int send_signal(int sig, struct siginfo *info, struct task_struct *t, ++ enum pid_type type) ++{ ++ int from_ancestor_ns = 0; ++ ++#ifdef CONFIG_PID_NS ++ from_ancestor_ns = si_fromuser(info) && ++ !task_pid_nr_ns(current, task_active_pid_ns(t)); ++#endif ++ ++ return __send_signal(sig, info, t, type, from_ancestor_ns); ++} ++ ++static void print_fatal_signal(int signr) ++{ ++ struct pt_regs *regs = signal_pt_regs(); ++ pr_info("potentially unexpected fatal signal %d.\n", signr); ++ ++#if defined(__i386__) && !defined(__arch_um__) ++ pr_info("code at %08lx: ", regs->ip); ++ { ++ int i; ++ for (i = 0; i < 16; i++) { ++ unsigned char insn; ++ ++ if (get_user(insn, (unsigned char *)(regs->ip + i))) ++ break; ++ pr_cont("%02x ", insn); ++ } ++ } ++ pr_cont("\n"); ++#endif ++ preempt_disable(); ++ show_regs(regs); ++ preempt_enable(); ++} ++ ++static int __init setup_print_fatal_signals(char *str) ++{ ++ get_option (&str, &print_fatal_signals); ++ ++ return 1; ++} ++ ++__setup("print-fatal-signals=", setup_print_fatal_signals); ++ ++int ++__group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p) ++{ ++ return send_signal(sig, info, p, PIDTYPE_TGID); ++} ++ ++static int ++specific_send_sig_info(int sig, struct siginfo *info, struct task_struct *t) ++{ ++ return send_signal(sig, info, t, PIDTYPE_PID); ++} ++ ++int do_send_sig_info(int sig, struct siginfo *info, struct task_struct *p, ++ enum pid_type type) ++{ ++ unsigned long flags; ++ int ret = -ESRCH; ++ ++ if (lock_task_sighand(p, &flags)) { ++ ret = send_signal(sig, info, p, type); ++ unlock_task_sighand(p, &flags); ++ } ++ ++ return ret; ++} ++ ++/* ++ * Force a signal that the process can't ignore: if necessary ++ * we unblock the signal and change any SIG_IGN to SIG_DFL. ++ * ++ * Note: If we unblock the signal, we always reset it to SIG_DFL, ++ * since we do not want to have a signal handler that was blocked ++ * be invoked when user space had explicitly blocked it. ++ * ++ * We don't want to have recursive SIGSEGV's etc, for example, ++ * that is why we also clear SIGNAL_UNKILLABLE. ++ */ ++int ++force_sig_info(int sig, struct siginfo *info, struct task_struct *t) ++{ ++ unsigned long int flags; ++ int ret, blocked, ignored; ++ struct k_sigaction *action; ++ ++ spin_lock_irqsave(&t->sighand->siglock, flags); ++ action = &t->sighand->action[sig-1]; ++ ignored = action->sa.sa_handler == SIG_IGN; ++ blocked = sigismember(&t->blocked, sig); ++ if (blocked || ignored) { ++ action->sa.sa_handler = SIG_DFL; ++ if (blocked) { ++ sigdelset(&t->blocked, sig); ++ recalc_sigpending_and_wake(t); ++ } ++ } ++ /* ++ * Don't clear SIGNAL_UNKILLABLE for traced tasks, users won't expect ++ * debugging to leave init killable. ++ */ ++ if (action->sa.sa_handler == SIG_DFL && !t->ptrace) ++ t->signal->flags &= ~SIGNAL_UNKILLABLE; ++ ret = specific_send_sig_info(sig, info, t); ++ spin_unlock_irqrestore(&t->sighand->siglock, flags); ++ ++ return ret; ++} ++ ++/* ++ * Nuke all other threads in the group. ++ */ ++int zap_other_threads(struct task_struct *p) ++{ ++ struct task_struct *t = p; ++ int count = 0; ++ ++ p->signal->group_stop_count = 0; ++ ++ while_each_thread(p, t) { ++ task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK); ++ count++; ++ ++ /* Don't bother with already dead threads */ ++ if (t->exit_state) ++ continue; ++ sigaddset(&t->pending.signal, SIGKILL); ++ signal_wake_up(t, 1); ++ } ++ ++ return count; ++} ++ ++struct sighand_struct *__lock_task_sighand(struct task_struct *tsk, ++ unsigned long *flags) ++{ ++ struct sighand_struct *sighand; ++ ++ rcu_read_lock(); ++ for (;;) { ++ sighand = rcu_dereference(tsk->sighand); ++ if (unlikely(sighand == NULL)) ++ break; ++ ++ /* ++ * This sighand can be already freed and even reused, but ++ * we rely on SLAB_TYPESAFE_BY_RCU and sighand_ctor() which ++ * initializes ->siglock: this slab can't go away, it has ++ * the same object type, ->siglock can't be reinitialized. ++ * ++ * We need to ensure that tsk->sighand is still the same ++ * after we take the lock, we can race with de_thread() or ++ * __exit_signal(). In the latter case the next iteration ++ * must see ->sighand == NULL. ++ */ ++ spin_lock_irqsave(&sighand->siglock, *flags); ++ if (likely(sighand == tsk->sighand)) ++ break; ++ spin_unlock_irqrestore(&sighand->siglock, *flags); ++ } ++ rcu_read_unlock(); ++ ++ return sighand; ++} ++ ++/* ++ * send signal info to all the members of a group ++ */ ++int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p, ++ enum pid_type type) ++{ ++ int ret; ++ ++ rcu_read_lock(); ++ ret = check_kill_permission(sig, info, p); ++ rcu_read_unlock(); ++ ++ if (!ret && sig) ++ ret = do_send_sig_info(sig, info, p, type); ++ ++ return ret; ++} ++ ++/* ++ * __kill_pgrp_info() sends a signal to a process group: this is what the tty ++ * control characters do (^C, ^Z etc) ++ * - the caller must hold at least a readlock on tasklist_lock ++ */ ++int __kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp) ++{ ++ struct task_struct *p = NULL; ++ int retval, success; ++ ++ success = 0; ++ retval = -ESRCH; ++ do_each_pid_task(pgrp, PIDTYPE_PGID, p) { ++ int err = group_send_sig_info(sig, info, p, PIDTYPE_PGID); ++ success |= !err; ++ retval = err; ++ } while_each_pid_task(pgrp, PIDTYPE_PGID, p); ++ return success ? 0 : retval; ++} ++ ++int kill_pid_info(int sig, struct siginfo *info, struct pid *pid) ++{ ++ int error = -ESRCH; ++ struct task_struct *p; ++ ++ for (;;) { ++ rcu_read_lock(); ++ p = pid_task(pid, PIDTYPE_PID); ++ if (p) ++ error = group_send_sig_info(sig, info, p, PIDTYPE_TGID); ++ rcu_read_unlock(); ++ if (likely(!p || error != -ESRCH)) ++ return error; ++ ++ /* ++ * The task was unhashed in between, try again. If it ++ * is dead, pid_task() will return NULL, if we race with ++ * de_thread() it will find the new leader. ++ */ ++ } ++} ++ ++static int kill_proc_info(int sig, struct siginfo *info, pid_t pid) ++{ ++ int error; ++ rcu_read_lock(); ++ error = kill_pid_info(sig, info, find_vpid(pid)); ++ rcu_read_unlock(); ++ return error; ++} ++ ++static inline bool kill_as_cred_perm(const struct cred *cred, ++ struct task_struct *target) ++{ ++ const struct cred *pcred = __task_cred(target); ++ ++ return uid_eq(cred->euid, pcred->suid) || ++ uid_eq(cred->euid, pcred->uid) || ++ uid_eq(cred->uid, pcred->suid) || ++ uid_eq(cred->uid, pcred->uid); ++} ++ ++/* like kill_pid_info(), but doesn't use uid/euid of "current" */ ++int kill_pid_info_as_cred(int sig, struct siginfo *info, struct pid *pid, ++ const struct cred *cred) ++{ ++ int ret = -EINVAL; ++ struct task_struct *p; ++ unsigned long flags; ++ ++ if (!valid_signal(sig)) ++ return ret; ++ ++ rcu_read_lock(); ++ p = pid_task(pid, PIDTYPE_PID); ++ if (!p) { ++ ret = -ESRCH; ++ goto out_unlock; ++ } ++ if (si_fromuser(info) && !kill_as_cred_perm(cred, p)) { ++ ret = -EPERM; ++ goto out_unlock; ++ } ++ ret = security_task_kill(p, info, sig, cred); ++ if (ret) ++ goto out_unlock; ++ ++ if (sig) { ++ if (lock_task_sighand(p, &flags)) { ++ ret = __send_signal(sig, info, p, PIDTYPE_TGID, 0); ++ unlock_task_sighand(p, &flags); ++ } else ++ ret = -ESRCH; ++ } ++out_unlock: ++ rcu_read_unlock(); ++ return ret; ++} ++EXPORT_SYMBOL_GPL(kill_pid_info_as_cred); ++ ++/* ++ * kill_something_info() interprets pid in interesting ways just like kill(2). ++ * ++ * POSIX specifies that kill(-1,sig) is unspecified, but what we have ++ * is probably wrong. Should make it like BSD or SYSV. ++ */ ++ ++static int kill_something_info(int sig, struct siginfo *info, pid_t pid) ++{ ++ int ret; ++ ++ if (pid > 0) { ++ rcu_read_lock(); ++ ret = kill_pid_info(sig, info, find_vpid(pid)); ++ rcu_read_unlock(); ++ return ret; ++ } ++ ++ /* -INT_MIN is undefined. Exclude this case to avoid a UBSAN warning */ ++ if (pid == INT_MIN) ++ return -ESRCH; ++ ++ read_lock(&tasklist_lock); ++ if (pid != -1) { ++ ret = __kill_pgrp_info(sig, info, ++ pid ? find_vpid(-pid) : task_pgrp(current)); ++ } else { ++ int retval = 0, count = 0; ++ struct task_struct * p; ++ ++ for_each_process(p) { ++ if (task_pid_vnr(p) > 1 && ++ !same_thread_group(p, current)) { ++ int err = group_send_sig_info(sig, info, p, ++ PIDTYPE_MAX); ++ ++count; ++ if (err != -EPERM) ++ retval = err; ++ } ++ } ++ ret = count ? retval : -ESRCH; ++ } ++ read_unlock(&tasklist_lock); ++ ++ return ret; ++} ++ ++/* ++ * These are for backward compatibility with the rest of the kernel source. ++ */ ++ ++int send_sig_info(int sig, struct siginfo *info, struct task_struct *p) ++{ ++ /* ++ * Make sure legacy kernel users don't send in bad values ++ * (normal paths check this in check_kill_permission). ++ */ ++ if (!valid_signal(sig)) ++ return -EINVAL; ++ ++ return do_send_sig_info(sig, info, p, PIDTYPE_PID); ++} ++ ++#define __si_special(priv) \ ++ ((priv) ? SEND_SIG_PRIV : SEND_SIG_NOINFO) ++ ++int ++send_sig(int sig, struct task_struct *p, int priv) ++{ ++ return send_sig_info(sig, __si_special(priv), p); ++} ++ ++void force_sig(int sig, struct task_struct *p) ++{ ++ force_sig_info(sig, SEND_SIG_PRIV, p); ++} ++ ++/* ++ * When things go south during signal handling, we ++ * will force a SIGSEGV. And if the signal that caused ++ * the problem was already a SIGSEGV, we'll want to ++ * make sure we don't even try to deliver the signal.. ++ */ ++void force_sigsegv(int sig, struct task_struct *p) ++{ ++ if (sig == SIGSEGV) { ++ unsigned long flags; ++ spin_lock_irqsave(&p->sighand->siglock, flags); ++ p->sighand->action[sig - 1].sa.sa_handler = SIG_DFL; ++ spin_unlock_irqrestore(&p->sighand->siglock, flags); ++ } ++ force_sig(SIGSEGV, p); ++} ++ ++int force_sig_fault(int sig, int code, void __user *addr ++ ___ARCH_SI_TRAPNO(int trapno) ++ ___ARCH_SI_IA64(int imm, unsigned int flags, unsigned long isr) ++ , struct task_struct *t) ++{ ++ struct siginfo info; ++ ++ clear_siginfo(&info); ++ info.si_signo = sig; ++ info.si_errno = 0; ++ info.si_code = code; ++ info.si_addr = addr; ++#ifdef __ARCH_SI_TRAPNO ++ info.si_trapno = trapno; ++#endif ++#ifdef __ia64__ ++ info.si_imm = imm; ++ info.si_flags = flags; ++ info.si_isr = isr; ++#endif ++ return force_sig_info(info.si_signo, &info, t); ++} ++ ++int send_sig_fault(int sig, int code, void __user *addr ++ ___ARCH_SI_TRAPNO(int trapno) ++ ___ARCH_SI_IA64(int imm, unsigned int flags, unsigned long isr) ++ , struct task_struct *t) ++{ ++ struct siginfo info; ++ ++ clear_siginfo(&info); ++ info.si_signo = sig; ++ info.si_errno = 0; ++ info.si_code = code; ++ info.si_addr = addr; ++#ifdef __ARCH_SI_TRAPNO ++ info.si_trapno = trapno; ++#endif ++#ifdef __ia64__ ++ info.si_imm = imm; ++ info.si_flags = flags; ++ info.si_isr = isr; ++#endif ++ return send_sig_info(info.si_signo, &info, t); ++} ++ ++int force_sig_mceerr(int code, void __user *addr, short lsb, struct task_struct *t) ++{ ++ struct siginfo info; ++ ++ WARN_ON((code != BUS_MCEERR_AO) && (code != BUS_MCEERR_AR)); ++ clear_siginfo(&info); ++ info.si_signo = SIGBUS; ++ info.si_errno = 0; ++ info.si_code = code; ++ info.si_addr = addr; ++ info.si_addr_lsb = lsb; ++ return force_sig_info(info.si_signo, &info, t); ++} ++ ++int send_sig_mceerr(int code, void __user *addr, short lsb, struct task_struct *t) ++{ ++ struct siginfo info; ++ ++ WARN_ON((code != BUS_MCEERR_AO) && (code != BUS_MCEERR_AR)); ++ clear_siginfo(&info); ++ info.si_signo = SIGBUS; ++ info.si_errno = 0; ++ info.si_code = code; ++ info.si_addr = addr; ++ info.si_addr_lsb = lsb; ++ return send_sig_info(info.si_signo, &info, t); ++} ++EXPORT_SYMBOL(send_sig_mceerr); ++ ++int force_sig_bnderr(void __user *addr, void __user *lower, void __user *upper) ++{ ++ struct siginfo info; ++ ++ clear_siginfo(&info); ++ info.si_signo = SIGSEGV; ++ info.si_errno = 0; ++ info.si_code = SEGV_BNDERR; ++ info.si_addr = addr; ++ info.si_lower = lower; ++ info.si_upper = upper; ++ return force_sig_info(info.si_signo, &info, current); ++} ++ ++#ifdef SEGV_PKUERR ++int force_sig_pkuerr(void __user *addr, u32 pkey) ++{ ++ struct siginfo info; ++ ++ clear_siginfo(&info); ++ info.si_signo = SIGSEGV; ++ info.si_errno = 0; ++ info.si_code = SEGV_PKUERR; ++ info.si_addr = addr; ++ info.si_pkey = pkey; ++ return force_sig_info(info.si_signo, &info, current); ++} ++#endif ++ ++/* For the crazy architectures that include trap information in ++ * the errno field, instead of an actual errno value. ++ */ ++int force_sig_ptrace_errno_trap(int errno, void __user *addr) ++{ ++ struct siginfo info; ++ ++ clear_siginfo(&info); ++ info.si_signo = SIGTRAP; ++ info.si_errno = errno; ++ info.si_code = TRAP_HWBKPT; ++ info.si_addr = addr; ++ return force_sig_info(info.si_signo, &info, current); ++} ++ ++int kill_pgrp(struct pid *pid, int sig, int priv) ++{ ++ int ret; ++ ++ read_lock(&tasklist_lock); ++ ret = __kill_pgrp_info(sig, __si_special(priv), pid); ++ read_unlock(&tasklist_lock); ++ ++ return ret; ++} ++EXPORT_SYMBOL(kill_pgrp); ++ ++int kill_pid(struct pid *pid, int sig, int priv) ++{ ++ return kill_pid_info(sig, __si_special(priv), pid); ++} ++EXPORT_SYMBOL(kill_pid); ++ ++/* ++ * These functions support sending signals using preallocated sigqueue ++ * structures. This is needed "because realtime applications cannot ++ * afford to lose notifications of asynchronous events, like timer ++ * expirations or I/O completions". In the case of POSIX Timers ++ * we allocate the sigqueue structure from the timer_create. If this ++ * allocation fails we are able to report the failure to the application ++ * with an EAGAIN error. ++ */ ++struct sigqueue *sigqueue_alloc(void) ++{ ++ struct sigqueue *q = __sigqueue_alloc(-1, current, GFP_KERNEL, 0); ++ ++ if (q) ++ q->flags |= SIGQUEUE_PREALLOC; ++ ++ return q; ++} ++ ++void sigqueue_free(struct sigqueue *q) ++{ ++ unsigned long flags; ++ spinlock_t *lock = ¤t->sighand->siglock; ++ ++ BUG_ON(!(q->flags & SIGQUEUE_PREALLOC)); ++ /* ++ * We must hold ->siglock while testing q->list ++ * to serialize with collect_signal() or with ++ * __exit_signal()->flush_sigqueue(). ++ */ ++ spin_lock_irqsave(lock, flags); ++ q->flags &= ~SIGQUEUE_PREALLOC; ++ /* ++ * If it is queued it will be freed when dequeued, ++ * like the "regular" sigqueue. ++ */ ++ if (!list_empty(&q->list)) ++ q = NULL; ++ spin_unlock_irqrestore(lock, flags); ++ ++ if (q) ++ __sigqueue_free(q); ++} ++ ++int send_sigqueue(struct sigqueue *q, struct pid *pid, enum pid_type type) ++{ ++ int sig = q->info.si_signo; ++ struct sigpending *pending; ++ struct task_struct *t; ++ unsigned long flags; ++ int ret, result; ++ ++ BUG_ON(!(q->flags & SIGQUEUE_PREALLOC)); ++ ++ ret = -1; ++ rcu_read_lock(); ++ t = pid_task(pid, type); ++ if (!t || !likely(lock_task_sighand(t, &flags))) ++ goto ret; ++ ++ ret = 1; /* the signal is ignored */ ++ result = TRACE_SIGNAL_IGNORED; ++ if (!prepare_signal(sig, t, false)) ++ goto out; ++ ++ ret = 0; ++ if (unlikely(!list_empty(&q->list))) { ++ /* ++ * If an SI_TIMER entry is already queue just increment ++ * the overrun count. ++ */ ++ BUG_ON(q->info.si_code != SI_TIMER); ++ q->info.si_overrun++; ++ result = TRACE_SIGNAL_ALREADY_PENDING; ++ goto out; ++ } ++ q->info.si_overrun = 0; ++ ++ signalfd_notify(t, sig); ++ pending = (type != PIDTYPE_PID) ? &t->signal->shared_pending : &t->pending; ++ list_add_tail(&q->list, &pending->list); ++ sigaddset(&pending->signal, sig); ++ complete_signal(sig, t, type); ++ result = TRACE_SIGNAL_DELIVERED; ++out: ++ trace_signal_generate(sig, &q->info, t, type != PIDTYPE_PID, result); ++ unlock_task_sighand(t, &flags); ++ret: ++ rcu_read_unlock(); ++ return ret; ++} ++ ++/* ++ * Let a parent know about the death of a child. ++ * For a stopped/continued status change, use do_notify_parent_cldstop instead. ++ * ++ * Returns true if our parent ignored us and so we've switched to ++ * self-reaping. ++ */ ++bool do_notify_parent(struct task_struct *tsk, int sig) ++{ ++ struct siginfo info; ++ unsigned long flags; ++ struct sighand_struct *psig; ++ bool autoreap = false; ++ u64 utime, stime; ++ ++ BUG_ON(sig == -1); ++ ++ /* do_notify_parent_cldstop should have been called instead. */ ++ BUG_ON(task_is_stopped_or_traced(tsk)); ++ ++ BUG_ON(!tsk->ptrace && ++ (tsk->group_leader != tsk || !thread_group_empty(tsk))); ++ ++ if (sig != SIGCHLD) { ++ /* ++ * This is only possible if parent == real_parent. ++ * Check if it has changed security domain. ++ */ ++ if (tsk->parent_exec_id_u64 != READ_ONCE(tsk->parent->self_exec_id_u64)) ++ sig = SIGCHLD; ++ } ++ ++ clear_siginfo(&info); ++ info.si_signo = sig; ++ info.si_errno = 0; ++ /* ++ * We are under tasklist_lock here so our parent is tied to ++ * us and cannot change. ++ * ++ * task_active_pid_ns will always return the same pid namespace ++ * until a task passes through release_task. ++ * ++ * write_lock() currently calls preempt_disable() which is the ++ * same as rcu_read_lock(), but according to Oleg, this is not ++ * correct to rely on this ++ */ ++ rcu_read_lock(); ++ info.si_pid = task_pid_nr_ns(tsk, task_active_pid_ns(tsk->parent)); ++ info.si_uid = from_kuid_munged(task_cred_xxx(tsk->parent, user_ns), ++ task_uid(tsk)); ++ rcu_read_unlock(); ++ ++ task_cputime(tsk, &utime, &stime); ++ info.si_utime = nsec_to_clock_t(utime + tsk->signal->utime); ++ info.si_stime = nsec_to_clock_t(stime + tsk->signal->stime); ++ ++ info.si_status = tsk->exit_code & 0x7f; ++ if (tsk->exit_code & 0x80) ++ info.si_code = CLD_DUMPED; ++ else if (tsk->exit_code & 0x7f) ++ info.si_code = CLD_KILLED; ++ else { ++ info.si_code = CLD_EXITED; ++ info.si_status = tsk->exit_code >> 8; ++ } ++ ++ psig = tsk->parent->sighand; ++ spin_lock_irqsave(&psig->siglock, flags); ++ if (!tsk->ptrace && sig == SIGCHLD && ++ (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN || ++ (psig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDWAIT))) { ++ /* ++ * We are exiting and our parent doesn't care. POSIX.1 ++ * defines special semantics for setting SIGCHLD to SIG_IGN ++ * or setting the SA_NOCLDWAIT flag: we should be reaped ++ * automatically and not left for our parent's wait4 call. ++ * Rather than having the parent do it as a magic kind of ++ * signal handler, we just set this to tell do_exit that we ++ * can be cleaned up without becoming a zombie. Note that ++ * we still call __wake_up_parent in this case, because a ++ * blocked sys_wait4 might now return -ECHILD. ++ * ++ * Whether we send SIGCHLD or not for SA_NOCLDWAIT ++ * is implementation-defined: we do (if you don't want ++ * it, just use SIG_IGN instead). ++ */ ++ autoreap = true; ++ if (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN) ++ sig = 0; ++ } ++ if (valid_signal(sig) && sig) ++ __group_send_sig_info(sig, &info, tsk->parent); ++ __wake_up_parent(tsk, tsk->parent); ++ spin_unlock_irqrestore(&psig->siglock, flags); ++ ++ return autoreap; ++} ++ ++/** ++ * do_notify_parent_cldstop - notify parent of stopped/continued state change ++ * @tsk: task reporting the state change ++ * @for_ptracer: the notification is for ptracer ++ * @why: CLD_{CONTINUED|STOPPED|TRAPPED} to report ++ * ++ * Notify @tsk's parent that the stopped/continued state has changed. If ++ * @for_ptracer is %false, @tsk's group leader notifies to its real parent. ++ * If %true, @tsk reports to @tsk->parent which should be the ptracer. ++ * ++ * CONTEXT: ++ * Must be called with tasklist_lock at least read locked. ++ */ ++static void do_notify_parent_cldstop(struct task_struct *tsk, ++ bool for_ptracer, int why) ++{ ++ struct siginfo info; ++ unsigned long flags; ++ struct task_struct *parent; ++ struct sighand_struct *sighand; ++ u64 utime, stime; ++ ++ if (for_ptracer) { ++ parent = tsk->parent; ++ } else { ++ tsk = tsk->group_leader; ++ parent = tsk->real_parent; ++ } ++ ++ clear_siginfo(&info); ++ info.si_signo = SIGCHLD; ++ info.si_errno = 0; ++ /* ++ * see comment in do_notify_parent() about the following 4 lines ++ */ ++ rcu_read_lock(); ++ info.si_pid = task_pid_nr_ns(tsk, task_active_pid_ns(parent)); ++ info.si_uid = from_kuid_munged(task_cred_xxx(parent, user_ns), task_uid(tsk)); ++ rcu_read_unlock(); ++ ++ task_cputime(tsk, &utime, &stime); ++ info.si_utime = nsec_to_clock_t(utime); ++ info.si_stime = nsec_to_clock_t(stime); ++ ++ info.si_code = why; ++ switch (why) { ++ case CLD_CONTINUED: ++ info.si_status = SIGCONT; ++ break; ++ case CLD_STOPPED: ++ info.si_status = tsk->signal->group_exit_code & 0x7f; ++ break; ++ case CLD_TRAPPED: ++ info.si_status = tsk->exit_code & 0x7f; ++ break; ++ default: ++ BUG(); ++ } ++ ++ sighand = parent->sighand; ++ spin_lock_irqsave(&sighand->siglock, flags); ++ if (sighand->action[SIGCHLD-1].sa.sa_handler != SIG_IGN && ++ !(sighand->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDSTOP)) ++ __group_send_sig_info(SIGCHLD, &info, parent); ++ /* ++ * Even if SIGCHLD is not generated, we must wake up wait4 calls. ++ */ ++ __wake_up_parent(tsk, parent); ++ spin_unlock_irqrestore(&sighand->siglock, flags); ++} ++ ++static inline bool may_ptrace_stop(void) ++{ ++ if (!likely(current->ptrace)) ++ return false; ++ /* ++ * Are we in the middle of do_coredump? ++ * If so and our tracer is also part of the coredump stopping ++ * is a deadlock situation, and pointless because our tracer ++ * is dead so don't allow us to stop. ++ * If SIGKILL was already sent before the caller unlocked ++ * ->siglock we must see ->core_state != NULL. Otherwise it ++ * is safe to enter schedule(). ++ * ++ * This is almost outdated, a task with the pending SIGKILL can't ++ * block in TASK_TRACED. But PTRACE_EVENT_EXIT can be reported ++ * after SIGKILL was already dequeued. ++ */ ++ if (unlikely(current->mm->core_state) && ++ unlikely(current->mm == current->parent->mm)) ++ return false; ++ ++ return true; ++} ++ ++/* ++ * Return non-zero if there is a SIGKILL that should be waking us up. ++ * Called with the siglock held. ++ */ ++static bool sigkill_pending(struct task_struct *tsk) ++{ ++ return sigismember(&tsk->pending.signal, SIGKILL) || ++ sigismember(&tsk->signal->shared_pending.signal, SIGKILL); ++} ++ ++/* ++ * This must be called with current->sighand->siglock held. ++ * ++ * This should be the path for all ptrace stops. ++ * We always set current->last_siginfo while stopped here. ++ * That makes it a way to test a stopped process for ++ * being ptrace-stopped vs being job-control-stopped. ++ * ++ * If we actually decide not to stop at all because the tracer ++ * is gone, we keep current->exit_code unless clear_code. ++ */ ++static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info) ++ __releases(¤t->sighand->siglock) ++ __acquires(¤t->sighand->siglock) ++{ ++ bool gstop_done = false; ++ ++ if (arch_ptrace_stop_needed(exit_code, info)) { ++ /* ++ * The arch code has something special to do before a ++ * ptrace stop. This is allowed to block, e.g. for faults ++ * on user stack pages. We can't keep the siglock while ++ * calling arch_ptrace_stop, so we must release it now. ++ * To preserve proper semantics, we must do this before ++ * any signal bookkeeping like checking group_stop_count. ++ * Meanwhile, a SIGKILL could come in before we retake the ++ * siglock. That must prevent us from sleeping in TASK_TRACED. ++ * So after regaining the lock, we must check for SIGKILL. ++ */ ++ spin_unlock_irq(¤t->sighand->siglock); ++ arch_ptrace_stop(exit_code, info); ++ spin_lock_irq(¤t->sighand->siglock); ++ if (sigkill_pending(current)) ++ return; ++ } ++ ++ set_special_state(TASK_TRACED); ++ ++ /* ++ * We're committing to trapping. TRACED should be visible before ++ * TRAPPING is cleared; otherwise, the tracer might fail do_wait(). ++ * Also, transition to TRACED and updates to ->jobctl should be ++ * atomic with respect to siglock and should be done after the arch ++ * hook as siglock is released and regrabbed across it. ++ * ++ * TRACER TRACEE ++ * ++ * ptrace_attach() ++ * [L] wait_on_bit(JOBCTL_TRAPPING) [S] set_special_state(TRACED) ++ * do_wait() ++ * set_current_state() smp_wmb(); ++ * ptrace_do_wait() ++ * wait_task_stopped() ++ * task_stopped_code() ++ * [L] task_is_traced() [S] task_clear_jobctl_trapping(); ++ */ ++ smp_wmb(); ++ ++ current->last_siginfo = info; ++ current->exit_code = exit_code; ++ ++ /* ++ * If @why is CLD_STOPPED, we're trapping to participate in a group ++ * stop. Do the bookkeeping. Note that if SIGCONT was delievered ++ * across siglock relocks since INTERRUPT was scheduled, PENDING ++ * could be clear now. We act as if SIGCONT is received after ++ * TASK_TRACED is entered - ignore it. ++ */ ++ if (why == CLD_STOPPED && (current->jobctl & JOBCTL_STOP_PENDING)) ++ gstop_done = task_participate_group_stop(current); ++ ++ /* any trap clears pending STOP trap, STOP trap clears NOTIFY */ ++ task_clear_jobctl_pending(current, JOBCTL_TRAP_STOP); ++ if (info && info->si_code >> 8 == PTRACE_EVENT_STOP) ++ task_clear_jobctl_pending(current, JOBCTL_TRAP_NOTIFY); ++ ++ /* entering a trap, clear TRAPPING */ ++ task_clear_jobctl_trapping(current); ++ ++ spin_unlock_irq(¤t->sighand->siglock); ++ read_lock(&tasklist_lock); ++ if (may_ptrace_stop()) { ++ /* ++ * Notify parents of the stop. ++ * ++ * While ptraced, there are two parents - the ptracer and ++ * the real_parent of the group_leader. The ptracer should ++ * know about every stop while the real parent is only ++ * interested in the completion of group stop. The states ++ * for the two don't interact with each other. Notify ++ * separately unless they're gonna be duplicates. ++ */ ++ do_notify_parent_cldstop(current, true, why); ++ if (gstop_done && ptrace_reparented(current)) ++ do_notify_parent_cldstop(current, false, why); ++ ++ /* ++ * Don't want to allow preemption here, because ++ * sys_ptrace() needs this task to be inactive. ++ * ++ * XXX: implement read_unlock_no_resched(). ++ */ ++ preempt_disable(); ++ read_unlock(&tasklist_lock); ++ preempt_enable_no_resched(); ++ freezable_schedule(); ++ } else { ++ /* ++ * By the time we got the lock, our tracer went away. ++ * Don't drop the lock yet, another tracer may come. ++ * ++ * If @gstop_done, the ptracer went away between group stop ++ * completion and here. During detach, it would have set ++ * JOBCTL_STOP_PENDING on us and we'll re-enter ++ * TASK_STOPPED in do_signal_stop() on return, so notifying ++ * the real parent of the group stop completion is enough. ++ */ ++ if (gstop_done) ++ do_notify_parent_cldstop(current, false, why); ++ ++ /* tasklist protects us from ptrace_freeze_traced() */ ++ __set_current_state(TASK_RUNNING); ++ if (clear_code) ++ current->exit_code = 0; ++ read_unlock(&tasklist_lock); ++ } ++ ++ /* ++ * We are back. Now reacquire the siglock before touching ++ * last_siginfo, so that we are sure to have synchronized with ++ * any signal-sending on another CPU that wants to examine it. ++ */ ++ spin_lock_irq(¤t->sighand->siglock); ++ current->last_siginfo = NULL; ++ ++ /* LISTENING can be set only during STOP traps, clear it */ ++ current->jobctl &= ~JOBCTL_LISTENING; ++ ++ /* ++ * Queued signals ignored us while we were stopped for tracing. ++ * So check for any that we should take before resuming user mode. ++ * This sets TIF_SIGPENDING, but never clears it. ++ */ ++ recalc_sigpending_tsk(current); ++} ++ ++static void ptrace_do_notify(int signr, int exit_code, int why) ++{ ++ siginfo_t info; ++ ++ clear_siginfo(&info); ++ info.si_signo = signr; ++ info.si_code = exit_code; ++ info.si_pid = task_pid_vnr(current); ++ info.si_uid = from_kuid_munged(current_user_ns(), current_uid()); ++ ++ /* Let the debugger run. */ ++ ptrace_stop(exit_code, why, 1, &info); ++} ++ ++void ptrace_notify(int exit_code) ++{ ++ BUG_ON((exit_code & (0x7f | ~0xffff)) != SIGTRAP); ++ if (unlikely(current->task_works)) ++ task_work_run(); ++ ++ spin_lock_irq(¤t->sighand->siglock); ++ ptrace_do_notify(SIGTRAP, exit_code, CLD_TRAPPED); ++ spin_unlock_irq(¤t->sighand->siglock); ++} ++ ++/** ++ * do_signal_stop - handle group stop for SIGSTOP and other stop signals ++ * @signr: signr causing group stop if initiating ++ * ++ * If %JOBCTL_STOP_PENDING is not set yet, initiate group stop with @signr ++ * and participate in it. If already set, participate in the existing ++ * group stop. If participated in a group stop (and thus slept), %true is ++ * returned with siglock released. ++ * ++ * If ptraced, this function doesn't handle stop itself. Instead, ++ * %JOBCTL_TRAP_STOP is scheduled and %false is returned with siglock ++ * untouched. The caller must ensure that INTERRUPT trap handling takes ++ * places afterwards. ++ * ++ * CONTEXT: ++ * Must be called with @current->sighand->siglock held, which is released ++ * on %true return. ++ * ++ * RETURNS: ++ * %false if group stop is already cancelled or ptrace trap is scheduled. ++ * %true if participated in group stop. ++ */ ++static bool do_signal_stop(int signr) ++ __releases(¤t->sighand->siglock) ++{ ++ struct signal_struct *sig = current->signal; ++ ++ if (!(current->jobctl & JOBCTL_STOP_PENDING)) { ++ unsigned long gstop = JOBCTL_STOP_PENDING | JOBCTL_STOP_CONSUME; ++ struct task_struct *t; ++ ++ /* signr will be recorded in task->jobctl for retries */ ++ WARN_ON_ONCE(signr & ~JOBCTL_STOP_SIGMASK); ++ ++ if (!likely(current->jobctl & JOBCTL_STOP_DEQUEUED) || ++ unlikely(signal_group_exit(sig))) ++ return false; ++ /* ++ * There is no group stop already in progress. We must ++ * initiate one now. ++ * ++ * While ptraced, a task may be resumed while group stop is ++ * still in effect and then receive a stop signal and ++ * initiate another group stop. This deviates from the ++ * usual behavior as two consecutive stop signals can't ++ * cause two group stops when !ptraced. That is why we ++ * also check !task_is_stopped(t) below. ++ * ++ * The condition can be distinguished by testing whether ++ * SIGNAL_STOP_STOPPED is already set. Don't generate ++ * group_exit_code in such case. ++ * ++ * This is not necessary for SIGNAL_STOP_CONTINUED because ++ * an intervening stop signal is required to cause two ++ * continued events regardless of ptrace. ++ */ ++ if (!(sig->flags & SIGNAL_STOP_STOPPED)) ++ sig->group_exit_code = signr; ++ ++ sig->group_stop_count = 0; ++ ++ if (task_set_jobctl_pending(current, signr | gstop)) ++ sig->group_stop_count++; ++ ++ t = current; ++ while_each_thread(current, t) { ++ /* ++ * Setting state to TASK_STOPPED for a group ++ * stop is always done with the siglock held, ++ * so this check has no races. ++ */ ++ if (!task_is_stopped(t) && ++ task_set_jobctl_pending(t, signr | gstop)) { ++ sig->group_stop_count++; ++ if (likely(!(t->ptrace & PT_SEIZED))) ++ signal_wake_up(t, 0); ++ else ++ ptrace_trap_notify(t); ++ } ++ } ++ } ++ ++ if (likely(!current->ptrace)) { ++ int notify = 0; ++ ++ /* ++ * If there are no other threads in the group, or if there ++ * is a group stop in progress and we are the last to stop, ++ * report to the parent. ++ */ ++ if (task_participate_group_stop(current)) ++ notify = CLD_STOPPED; ++ ++ set_special_state(TASK_STOPPED); ++ spin_unlock_irq(¤t->sighand->siglock); ++ ++ /* ++ * Notify the parent of the group stop completion. Because ++ * we're not holding either the siglock or tasklist_lock ++ * here, ptracer may attach inbetween; however, this is for ++ * group stop and should always be delivered to the real ++ * parent of the group leader. The new ptracer will get ++ * its notification when this task transitions into ++ * TASK_TRACED. ++ */ ++ if (notify) { ++ read_lock(&tasklist_lock); ++ do_notify_parent_cldstop(current, false, notify); ++ read_unlock(&tasklist_lock); ++ } ++ ++ /* Now we don't run again until woken by SIGCONT or SIGKILL */ ++ freezable_schedule(); ++ return true; ++ } else { ++ /* ++ * While ptraced, group stop is handled by STOP trap. ++ * Schedule it and let the caller deal with it. ++ */ ++ task_set_jobctl_pending(current, JOBCTL_TRAP_STOP); ++ return false; ++ } ++} ++ ++/** ++ * do_jobctl_trap - take care of ptrace jobctl traps ++ * ++ * When PT_SEIZED, it's used for both group stop and explicit ++ * SEIZE/INTERRUPT traps. Both generate PTRACE_EVENT_STOP trap with ++ * accompanying siginfo. If stopped, lower eight bits of exit_code contain ++ * the stop signal; otherwise, %SIGTRAP. ++ * ++ * When !PT_SEIZED, it's used only for group stop trap with stop signal ++ * number as exit_code and no siginfo. ++ * ++ * CONTEXT: ++ * Must be called with @current->sighand->siglock held, which may be ++ * released and re-acquired before returning with intervening sleep. ++ */ ++static void do_jobctl_trap(void) ++{ ++ struct signal_struct *signal = current->signal; ++ int signr = current->jobctl & JOBCTL_STOP_SIGMASK; ++ ++ if (current->ptrace & PT_SEIZED) { ++ if (!signal->group_stop_count && ++ !(signal->flags & SIGNAL_STOP_STOPPED)) ++ signr = SIGTRAP; ++ WARN_ON_ONCE(!signr); ++ ptrace_do_notify(signr, signr | (PTRACE_EVENT_STOP << 8), ++ CLD_STOPPED); ++ } else { ++ WARN_ON_ONCE(!signr); ++ ptrace_stop(signr, CLD_STOPPED, 0, NULL); ++ current->exit_code = 0; ++ } ++} ++ ++static int ptrace_signal(int signr, siginfo_t *info) ++{ ++ /* ++ * We do not check sig_kernel_stop(signr) but set this marker ++ * unconditionally because we do not know whether debugger will ++ * change signr. This flag has no meaning unless we are going ++ * to stop after return from ptrace_stop(). In this case it will ++ * be checked in do_signal_stop(), we should only stop if it was ++ * not cleared by SIGCONT while we were sleeping. See also the ++ * comment in dequeue_signal(). ++ */ ++ current->jobctl |= JOBCTL_STOP_DEQUEUED; ++ ptrace_stop(signr, CLD_TRAPPED, 0, info); ++ ++ /* We're back. Did the debugger cancel the sig? */ ++ signr = current->exit_code; ++ if (signr == 0) ++ return signr; ++ ++ current->exit_code = 0; ++ ++ /* ++ * Update the siginfo structure if the signal has ++ * changed. If the debugger wanted something ++ * specific in the siginfo structure then it should ++ * have updated *info via PTRACE_SETSIGINFO. ++ */ ++ if (signr != info->si_signo) { ++ clear_siginfo(info); ++ info->si_signo = signr; ++ info->si_errno = 0; ++ info->si_code = SI_USER; ++ rcu_read_lock(); ++ info->si_pid = task_pid_vnr(current->parent); ++ info->si_uid = from_kuid_munged(current_user_ns(), ++ task_uid(current->parent)); ++ rcu_read_unlock(); ++ } ++ ++ /* If the (new) signal is now blocked, requeue it. */ ++ if (sigismember(¤t->blocked, signr)) { ++ specific_send_sig_info(signr, info, current); ++ signr = 0; ++ } ++ ++ return signr; ++} ++ ++bool get_signal(struct ksignal *ksig) ++{ ++ struct sighand_struct *sighand = current->sighand; ++ struct signal_struct *signal = current->signal; ++ int signr; ++ ++ if (unlikely(current->task_works)) ++ task_work_run(); ++ ++ if (unlikely(uprobe_deny_signal())) ++ return false; ++ ++ /* ++ * Do this once, we can't return to user-mode if freezing() == T. ++ * do_signal_stop() and ptrace_stop() do freezable_schedule() and ++ * thus do not need another check after return. ++ */ ++ try_to_freeze(); ++ ++relock: ++ spin_lock_irq(&sighand->siglock); ++ /* ++ * Every stopped thread goes here after wakeup. Check to see if ++ * we should notify the parent, prepare_signal(SIGCONT) encodes ++ * the CLD_ si_code into SIGNAL_CLD_MASK bits. ++ */ ++ if (unlikely(signal->flags & SIGNAL_CLD_MASK)) { ++ int why; ++ ++ if (signal->flags & SIGNAL_CLD_CONTINUED) ++ why = CLD_CONTINUED; ++ else ++ why = CLD_STOPPED; ++ ++ signal->flags &= ~SIGNAL_CLD_MASK; ++ ++ spin_unlock_irq(&sighand->siglock); ++ ++ /* ++ * Notify the parent that we're continuing. This event is ++ * always per-process and doesn't make whole lot of sense ++ * for ptracers, who shouldn't consume the state via ++ * wait(2) either, but, for backward compatibility, notify ++ * the ptracer of the group leader too unless it's gonna be ++ * a duplicate. ++ */ ++ read_lock(&tasklist_lock); ++ do_notify_parent_cldstop(current, false, why); ++ ++ if (ptrace_reparented(current->group_leader)) ++ do_notify_parent_cldstop(current->group_leader, ++ true, why); ++ read_unlock(&tasklist_lock); ++ ++ goto relock; ++ } ++ ++ /* Has this task already been marked for death? */ ++ if (signal_group_exit(signal)) { ++ ksig->info.si_signo = signr = SIGKILL; ++ sigdelset(¤t->pending.signal, SIGKILL); ++ trace_signal_deliver(SIGKILL, SEND_SIG_NOINFO, ++ &sighand->action[SIGKILL - 1]); ++ recalc_sigpending(); ++ goto fatal; ++ } ++ ++ for (;;) { ++ struct k_sigaction *ka; ++ ++ if (unlikely(current->jobctl & JOBCTL_STOP_PENDING) && ++ do_signal_stop(0)) ++ goto relock; ++ ++ if (unlikely(current->jobctl & JOBCTL_TRAP_MASK)) { ++ do_jobctl_trap(); ++ spin_unlock_irq(&sighand->siglock); ++ goto relock; ++ } ++ ++ /* ++ * Signals generated by the execution of an instruction ++ * need to be delivered before any other pending signals ++ * so that the instruction pointer in the signal stack ++ * frame points to the faulting instruction. ++ */ ++ signr = dequeue_synchronous_signal(&ksig->info); ++ if (!signr) ++ signr = dequeue_signal(current, ¤t->blocked, &ksig->info); ++ ++ if (!signr) ++ break; /* will return 0 */ ++ ++ if (unlikely(current->ptrace) && signr != SIGKILL) { ++ signr = ptrace_signal(signr, &ksig->info); ++ if (!signr) ++ continue; ++ } ++ ++ ka = &sighand->action[signr-1]; ++ ++ /* Trace actually delivered signals. */ ++ trace_signal_deliver(signr, &ksig->info, ka); ++ ++ if (ka->sa.sa_handler == SIG_IGN) /* Do nothing. */ ++ continue; ++ if (ka->sa.sa_handler != SIG_DFL) { ++ /* Run the handler. */ ++ ksig->ka = *ka; ++ ++ if (ka->sa.sa_flags & SA_ONESHOT) ++ ka->sa.sa_handler = SIG_DFL; ++ ++ break; /* will return non-zero "signr" value */ ++ } ++ ++ /* ++ * Now we are doing the default action for this signal. ++ */ ++ if (sig_kernel_ignore(signr)) /* Default is nothing. */ ++ continue; ++ ++ /* ++ * Global init gets no signals it doesn't want. ++ * Container-init gets no signals it doesn't want from same ++ * container. ++ * ++ * Note that if global/container-init sees a sig_kernel_only() ++ * signal here, the signal must have been generated internally ++ * or must have come from an ancestor namespace. In either ++ * case, the signal cannot be dropped. ++ */ ++ if (unlikely(signal->flags & SIGNAL_UNKILLABLE) && ++ !sig_kernel_only(signr)) ++ continue; ++ ++ if (sig_kernel_stop(signr)) { ++ /* ++ * The default action is to stop all threads in ++ * the thread group. The job control signals ++ * do nothing in an orphaned pgrp, but SIGSTOP ++ * always works. Note that siglock needs to be ++ * dropped during the call to is_orphaned_pgrp() ++ * because of lock ordering with tasklist_lock. ++ * This allows an intervening SIGCONT to be posted. ++ * We need to check for that and bail out if necessary. ++ */ ++ if (signr != SIGSTOP) { ++ spin_unlock_irq(&sighand->siglock); ++ ++ /* signals can be posted during this window */ ++ ++ if (is_current_pgrp_orphaned()) ++ goto relock; ++ ++ spin_lock_irq(&sighand->siglock); ++ } ++ ++ if (likely(do_signal_stop(ksig->info.si_signo))) { ++ /* It released the siglock. */ ++ goto relock; ++ } ++ ++ /* ++ * We didn't actually stop, due to a race ++ * with SIGCONT or something like that. ++ */ ++ continue; ++ } ++ ++ fatal: ++ spin_unlock_irq(&sighand->siglock); ++ ++ /* ++ * Anything else is fatal, maybe with a core dump. ++ */ ++ current->flags |= PF_SIGNALED; ++ ++ if (sig_kernel_coredump(signr)) { ++ if (print_fatal_signals) ++ print_fatal_signal(ksig->info.si_signo); ++ proc_coredump_connector(current); ++ /* ++ * If it was able to dump core, this kills all ++ * other threads in the group and synchronizes with ++ * their demise. If we lost the race with another ++ * thread getting here, it set group_exit_code ++ * first and our do_group_exit call below will use ++ * that value and ignore the one we pass it. ++ */ ++ do_coredump(&ksig->info); ++ } ++ ++ /* ++ * Death signals, no core dump. ++ */ ++ do_group_exit(ksig->info.si_signo); ++ /* NOTREACHED */ ++ } ++ spin_unlock_irq(&sighand->siglock); ++ ++ ksig->sig = signr; ++ return ksig->sig > 0; ++} ++ ++/** ++ * signal_delivered - ++ * @ksig: kernel signal struct ++ * @stepping: nonzero if debugger single-step or block-step in use ++ * ++ * This function should be called when a signal has successfully been ++ * delivered. It updates the blocked signals accordingly (@ksig->ka.sa.sa_mask ++ * is always blocked, and the signal itself is blocked unless %SA_NODEFER ++ * is set in @ksig->ka.sa.sa_flags. Tracing is notified. ++ */ ++static void signal_delivered(struct ksignal *ksig, int stepping) ++{ ++ sigset_t blocked; ++ ++ /* A signal was successfully delivered, and the ++ saved sigmask was stored on the signal frame, ++ and will be restored by sigreturn. So we can ++ simply clear the restore sigmask flag. */ ++ clear_restore_sigmask(); ++ ++ sigorsets(&blocked, ¤t->blocked, &ksig->ka.sa.sa_mask); ++ if (!(ksig->ka.sa.sa_flags & SA_NODEFER)) ++ sigaddset(&blocked, ksig->sig); ++ set_current_blocked(&blocked); ++ tracehook_signal_handler(stepping); ++} ++ ++void signal_setup_done(int failed, struct ksignal *ksig, int stepping) ++{ ++ if (failed) ++ force_sigsegv(ksig->sig, current); ++ else ++ signal_delivered(ksig, stepping); ++} ++ ++/* ++ * It could be that complete_signal() picked us to notify about the ++ * group-wide signal. Other threads should be notified now to take ++ * the shared signals in @which since we will not. ++ */ ++static void retarget_shared_pending(struct task_struct *tsk, sigset_t *which) ++{ ++ sigset_t retarget; ++ struct task_struct *t; ++ ++ sigandsets(&retarget, &tsk->signal->shared_pending.signal, which); ++ if (sigisemptyset(&retarget)) ++ return; ++ ++ t = tsk; ++ while_each_thread(tsk, t) { ++ if (t->flags & PF_EXITING) ++ continue; ++ ++ if (!has_pending_signals(&retarget, &t->blocked)) ++ continue; ++ /* Remove the signals this thread can handle. */ ++ sigandsets(&retarget, &retarget, &t->blocked); ++ ++ if (!signal_pending(t)) ++ signal_wake_up(t, 0); ++ ++ if (sigisemptyset(&retarget)) ++ break; ++ } ++} ++ ++void exit_signals(struct task_struct *tsk) ++{ ++ int group_stop = 0; ++ sigset_t unblocked; ++ ++ /* ++ * @tsk is about to have PF_EXITING set - lock out users which ++ * expect stable threadgroup. ++ */ ++ cgroup_threadgroup_change_begin(tsk); ++ ++ if (thread_group_empty(tsk) || signal_group_exit(tsk->signal)) { ++ tsk->flags |= PF_EXITING; ++ cgroup_threadgroup_change_end(tsk); ++ return; ++ } ++ ++ spin_lock_irq(&tsk->sighand->siglock); ++ /* ++ * From now this task is not visible for group-wide signals, ++ * see wants_signal(), do_signal_stop(). ++ */ ++ tsk->flags |= PF_EXITING; ++ ++ cgroup_threadgroup_change_end(tsk); ++ ++ if (!signal_pending(tsk)) ++ goto out; ++ ++ unblocked = tsk->blocked; ++ signotset(&unblocked); ++ retarget_shared_pending(tsk, &unblocked); ++ ++ if (unlikely(tsk->jobctl & JOBCTL_STOP_PENDING) && ++ task_participate_group_stop(tsk)) ++ group_stop = CLD_STOPPED; ++out: ++ spin_unlock_irq(&tsk->sighand->siglock); ++ ++ /* ++ * If group stop has completed, deliver the notification. This ++ * should always go to the real parent of the group leader. ++ */ ++ if (unlikely(group_stop)) { ++ read_lock(&tasklist_lock); ++ do_notify_parent_cldstop(tsk, false, group_stop); ++ read_unlock(&tasklist_lock); ++ } ++} ++ ++EXPORT_SYMBOL(recalc_sigpending); ++EXPORT_SYMBOL_GPL(dequeue_signal); ++EXPORT_SYMBOL(flush_signals); ++EXPORT_SYMBOL(force_sig); ++EXPORT_SYMBOL(send_sig); ++EXPORT_SYMBOL(send_sig_info); ++EXPORT_SYMBOL(sigprocmask); ++ ++/* ++ * System call entry points. ++ */ ++ ++/** ++ * sys_restart_syscall - restart a system call ++ */ ++SYSCALL_DEFINE0(restart_syscall) ++{ ++ struct restart_block *restart = ¤t->restart_block; ++ return restart->fn(restart); ++} ++ ++long do_no_restart_syscall(struct restart_block *param) ++{ ++ return -EINTR; ++} ++ ++static void __set_task_blocked(struct task_struct *tsk, const sigset_t *newset) ++{ ++ if (signal_pending(tsk) && !thread_group_empty(tsk)) { ++ sigset_t newblocked; ++ /* A set of now blocked but previously unblocked signals. */ ++ sigandnsets(&newblocked, newset, ¤t->blocked); ++ retarget_shared_pending(tsk, &newblocked); ++ } ++ tsk->blocked = *newset; ++ recalc_sigpending(); ++} ++ ++/** ++ * set_current_blocked - change current->blocked mask ++ * @newset: new mask ++ * ++ * It is wrong to change ->blocked directly, this helper should be used ++ * to ensure the process can't miss a shared signal we are going to block. ++ */ ++void set_current_blocked(sigset_t *newset) ++{ ++ sigdelsetmask(newset, sigmask(SIGKILL) | sigmask(SIGSTOP)); ++ __set_current_blocked(newset); ++} ++ ++void __set_current_blocked(const sigset_t *newset) ++{ ++ struct task_struct *tsk = current; ++ ++ /* ++ * In case the signal mask hasn't changed, there is nothing we need ++ * to do. The current->blocked shouldn't be modified by other task. ++ */ ++ if (sigequalsets(&tsk->blocked, newset)) ++ return; ++ ++ spin_lock_irq(&tsk->sighand->siglock); ++ __set_task_blocked(tsk, newset); ++ spin_unlock_irq(&tsk->sighand->siglock); ++} ++ ++/* ++ * This is also useful for kernel threads that want to temporarily ++ * (or permanently) block certain signals. ++ * ++ * NOTE! Unlike the user-mode sys_sigprocmask(), the kernel ++ * interface happily blocks "unblockable" signals like SIGKILL ++ * and friends. ++ */ ++int sigprocmask(int how, sigset_t *set, sigset_t *oldset) ++{ ++ struct task_struct *tsk = current; ++ sigset_t newset; ++ ++ /* Lockless, only current can change ->blocked, never from irq */ ++ if (oldset) ++ *oldset = tsk->blocked; ++ ++ switch (how) { ++ case SIG_BLOCK: ++ sigorsets(&newset, &tsk->blocked, set); ++ break; ++ case SIG_UNBLOCK: ++ sigandnsets(&newset, &tsk->blocked, set); ++ break; ++ case SIG_SETMASK: ++ newset = *set; ++ break; ++ default: ++ return -EINVAL; ++ } ++ ++ __set_current_blocked(&newset); ++ return 0; ++} ++ ++/** ++ * sys_rt_sigprocmask - change the list of currently blocked signals ++ * @how: whether to add, remove, or set signals ++ * @nset: stores pending signals ++ * @oset: previous value of signal mask if non-null ++ * @sigsetsize: size of sigset_t type ++ */ ++SYSCALL_DEFINE4(rt_sigprocmask, int, how, sigset_t __user *, nset, ++ sigset_t __user *, oset, size_t, sigsetsize) ++{ ++ sigset_t old_set, new_set; ++ int error; ++ ++ /* XXX: Don't preclude handling different sized sigset_t's. */ ++ if (sigsetsize != sizeof(sigset_t)) ++ return -EINVAL; ++ ++ old_set = current->blocked; ++ ++ if (nset) { ++ if (copy_from_user(&new_set, nset, sizeof(sigset_t))) ++ return -EFAULT; ++ sigdelsetmask(&new_set, sigmask(SIGKILL)|sigmask(SIGSTOP)); ++ ++ error = sigprocmask(how, &new_set, NULL); ++ if (error) ++ return error; ++ } ++ ++ if (oset) { ++ if (copy_to_user(oset, &old_set, sizeof(sigset_t))) ++ return -EFAULT; ++ } ++ ++ return 0; ++} ++ ++#ifdef CONFIG_COMPAT ++COMPAT_SYSCALL_DEFINE4(rt_sigprocmask, int, how, compat_sigset_t __user *, nset, ++ compat_sigset_t __user *, oset, compat_size_t, sigsetsize) ++{ ++ sigset_t old_set = current->blocked; ++ ++ /* XXX: Don't preclude handling different sized sigset_t's. */ ++ if (sigsetsize != sizeof(sigset_t)) ++ return -EINVAL; ++ ++ if (nset) { ++ sigset_t new_set; ++ int error; ++ if (get_compat_sigset(&new_set, nset)) ++ return -EFAULT; ++ sigdelsetmask(&new_set, sigmask(SIGKILL)|sigmask(SIGSTOP)); ++ ++ error = sigprocmask(how, &new_set, NULL); ++ if (error) ++ return error; ++ } ++ return oset ? put_compat_sigset(oset, &old_set, sizeof(*oset)) : 0; ++} ++#endif ++ ++static void do_sigpending(sigset_t *set) ++{ ++ spin_lock_irq(¤t->sighand->siglock); ++ sigorsets(set, ¤t->pending.signal, ++ ¤t->signal->shared_pending.signal); ++ spin_unlock_irq(¤t->sighand->siglock); ++ ++ /* Outside the lock because only this thread touches it. */ ++ sigandsets(set, ¤t->blocked, set); ++} ++ ++/** ++ * sys_rt_sigpending - examine a pending signal that has been raised ++ * while blocked ++ * @uset: stores pending signals ++ * @sigsetsize: size of sigset_t type or larger ++ */ ++SYSCALL_DEFINE2(rt_sigpending, sigset_t __user *, uset, size_t, sigsetsize) ++{ ++ sigset_t set; ++ ++ if (sigsetsize > sizeof(*uset)) ++ return -EINVAL; ++ ++ do_sigpending(&set); ++ ++ if (copy_to_user(uset, &set, sigsetsize)) ++ return -EFAULT; ++ ++ return 0; ++} ++ ++#ifdef CONFIG_COMPAT ++COMPAT_SYSCALL_DEFINE2(rt_sigpending, compat_sigset_t __user *, uset, ++ compat_size_t, sigsetsize) ++{ ++ sigset_t set; ++ ++ if (sigsetsize > sizeof(*uset)) ++ return -EINVAL; ++ ++ do_sigpending(&set); ++ ++ return put_compat_sigset(uset, &set, sigsetsize); ++} ++#endif ++ ++enum siginfo_layout siginfo_layout(unsigned sig, int si_code) ++{ ++ enum siginfo_layout layout = SIL_KILL; ++ if ((si_code > SI_USER) && (si_code < SI_KERNEL)) { ++ static const struct { ++ unsigned char limit, layout; ++ } filter[] = { ++ [SIGILL] = { NSIGILL, SIL_FAULT }, ++ [SIGFPE] = { NSIGFPE, SIL_FAULT }, ++ [SIGSEGV] = { NSIGSEGV, SIL_FAULT }, ++ [SIGBUS] = { NSIGBUS, SIL_FAULT }, ++ [SIGTRAP] = { NSIGTRAP, SIL_FAULT }, ++#if defined(SIGEMT) && defined(NSIGEMT) ++ [SIGEMT] = { NSIGEMT, SIL_FAULT }, ++#endif ++ [SIGCHLD] = { NSIGCHLD, SIL_CHLD }, ++ [SIGPOLL] = { NSIGPOLL, SIL_POLL }, ++ [SIGSYS] = { NSIGSYS, SIL_SYS }, ++ }; ++ if ((sig < ARRAY_SIZE(filter)) && (si_code <= filter[sig].limit)) { ++ layout = filter[sig].layout; ++ /* Handle the exceptions */ ++ if ((sig == SIGBUS) && ++ (si_code >= BUS_MCEERR_AR) && (si_code <= BUS_MCEERR_AO)) ++ layout = SIL_FAULT_MCEERR; ++ else if ((sig == SIGSEGV) && (si_code == SEGV_BNDERR)) ++ layout = SIL_FAULT_BNDERR; ++#ifdef SEGV_PKUERR ++ else if ((sig == SIGSEGV) && (si_code == SEGV_PKUERR)) ++ layout = SIL_FAULT_PKUERR; ++#endif ++ } ++ else if (si_code <= NSIGPOLL) ++ layout = SIL_POLL; ++ } else { ++ if (si_code == SI_TIMER) ++ layout = SIL_TIMER; ++ else if (si_code == SI_SIGIO) ++ layout = SIL_POLL; ++ else if (si_code < 0) ++ layout = SIL_RT; ++ } ++ return layout; ++} ++ ++int copy_siginfo_to_user(siginfo_t __user *to, const siginfo_t *from) ++{ ++ if (copy_to_user(to, from , sizeof(struct siginfo))) ++ return -EFAULT; ++ return 0; ++} ++ ++#ifdef CONFIG_COMPAT ++int copy_siginfo_to_user32(struct compat_siginfo __user *to, ++ const struct siginfo *from) ++#if defined(CONFIG_X86_X32_ABI) || defined(CONFIG_IA32_EMULATION) ++{ ++ return __copy_siginfo_to_user32(to, from, in_x32_syscall()); ++} ++int __copy_siginfo_to_user32(struct compat_siginfo __user *to, ++ const struct siginfo *from, bool x32_ABI) ++#endif ++{ ++ struct compat_siginfo new; ++ memset(&new, 0, sizeof(new)); ++ ++ new.si_signo = from->si_signo; ++ new.si_errno = from->si_errno; ++ new.si_code = from->si_code; ++ switch(siginfo_layout(from->si_signo, from->si_code)) { ++ case SIL_KILL: ++ new.si_pid = from->si_pid; ++ new.si_uid = from->si_uid; ++ break; ++ case SIL_TIMER: ++ new.si_tid = from->si_tid; ++ new.si_overrun = from->si_overrun; ++ new.si_int = from->si_int; ++ break; ++ case SIL_POLL: ++ new.si_band = from->si_band; ++ new.si_fd = from->si_fd; ++ break; ++ case SIL_FAULT: ++ new.si_addr = ptr_to_compat(from->si_addr); ++#ifdef __ARCH_SI_TRAPNO ++ new.si_trapno = from->si_trapno; ++#endif ++ break; ++ case SIL_FAULT_MCEERR: ++ new.si_addr = ptr_to_compat(from->si_addr); ++#ifdef __ARCH_SI_TRAPNO ++ new.si_trapno = from->si_trapno; ++#endif ++ new.si_addr_lsb = from->si_addr_lsb; ++ break; ++ case SIL_FAULT_BNDERR: ++ new.si_addr = ptr_to_compat(from->si_addr); ++#ifdef __ARCH_SI_TRAPNO ++ new.si_trapno = from->si_trapno; ++#endif ++ new.si_lower = ptr_to_compat(from->si_lower); ++ new.si_upper = ptr_to_compat(from->si_upper); ++ break; ++ case SIL_FAULT_PKUERR: ++ new.si_addr = ptr_to_compat(from->si_addr); ++#ifdef __ARCH_SI_TRAPNO ++ new.si_trapno = from->si_trapno; ++#endif ++ new.si_pkey = from->si_pkey; ++ break; ++ case SIL_CHLD: ++ new.si_pid = from->si_pid; ++ new.si_uid = from->si_uid; ++ new.si_status = from->si_status; ++#ifdef CONFIG_X86_X32_ABI ++ if (x32_ABI) { ++ new._sifields._sigchld_x32._utime = from->si_utime; ++ new._sifields._sigchld_x32._stime = from->si_stime; ++ } else ++#endif ++ { ++ new.si_utime = from->si_utime; ++ new.si_stime = from->si_stime; ++ } ++ break; ++ case SIL_RT: ++ new.si_pid = from->si_pid; ++ new.si_uid = from->si_uid; ++ new.si_int = from->si_int; ++ break; ++ case SIL_SYS: ++ new.si_call_addr = ptr_to_compat(from->si_call_addr); ++ new.si_syscall = from->si_syscall; ++ new.si_arch = from->si_arch; ++ break; ++ } ++ ++ if (copy_to_user(to, &new, sizeof(struct compat_siginfo))) ++ return -EFAULT; ++ ++ return 0; ++} ++ ++int copy_siginfo_from_user32(struct siginfo *to, ++ const struct compat_siginfo __user *ufrom) ++{ ++ struct compat_siginfo from; ++ ++ if (copy_from_user(&from, ufrom, sizeof(struct compat_siginfo))) ++ return -EFAULT; ++ ++ clear_siginfo(to); ++ to->si_signo = from.si_signo; ++ to->si_errno = from.si_errno; ++ to->si_code = from.si_code; ++ switch(siginfo_layout(from.si_signo, from.si_code)) { ++ case SIL_KILL: ++ to->si_pid = from.si_pid; ++ to->si_uid = from.si_uid; ++ break; ++ case SIL_TIMER: ++ to->si_tid = from.si_tid; ++ to->si_overrun = from.si_overrun; ++ to->si_int = from.si_int; ++ break; ++ case SIL_POLL: ++ to->si_band = from.si_band; ++ to->si_fd = from.si_fd; ++ break; ++ case SIL_FAULT: ++ to->si_addr = compat_ptr(from.si_addr); ++#ifdef __ARCH_SI_TRAPNO ++ to->si_trapno = from.si_trapno; ++#endif ++ break; ++ case SIL_FAULT_MCEERR: ++ to->si_addr = compat_ptr(from.si_addr); ++#ifdef __ARCH_SI_TRAPNO ++ to->si_trapno = from.si_trapno; ++#endif ++ to->si_addr_lsb = from.si_addr_lsb; ++ break; ++ case SIL_FAULT_BNDERR: ++ to->si_addr = compat_ptr(from.si_addr); ++#ifdef __ARCH_SI_TRAPNO ++ to->si_trapno = from.si_trapno; ++#endif ++ to->si_lower = compat_ptr(from.si_lower); ++ to->si_upper = compat_ptr(from.si_upper); ++ break; ++ case SIL_FAULT_PKUERR: ++ to->si_addr = compat_ptr(from.si_addr); ++#ifdef __ARCH_SI_TRAPNO ++ to->si_trapno = from.si_trapno; ++#endif ++ to->si_pkey = from.si_pkey; ++ break; ++ case SIL_CHLD: ++ to->si_pid = from.si_pid; ++ to->si_uid = from.si_uid; ++ to->si_status = from.si_status; ++#ifdef CONFIG_X86_X32_ABI ++ if (in_x32_syscall()) { ++ to->si_utime = from._sifields._sigchld_x32._utime; ++ to->si_stime = from._sifields._sigchld_x32._stime; ++ } else ++#endif ++ { ++ to->si_utime = from.si_utime; ++ to->si_stime = from.si_stime; ++ } ++ break; ++ case SIL_RT: ++ to->si_pid = from.si_pid; ++ to->si_uid = from.si_uid; ++ to->si_int = from.si_int; ++ break; ++ case SIL_SYS: ++ to->si_call_addr = compat_ptr(from.si_call_addr); ++ to->si_syscall = from.si_syscall; ++ to->si_arch = from.si_arch; ++ break; ++ } ++ return 0; ++} ++#endif /* CONFIG_COMPAT */ ++ ++/** ++ * do_sigtimedwait - wait for queued signals specified in @which ++ * @which: queued signals to wait for ++ * @info: if non-null, the signal's siginfo is returned here ++ * @ts: upper bound on process time suspension ++ */ ++static int do_sigtimedwait(const sigset_t *which, siginfo_t *info, ++ const struct timespec *ts) ++{ ++ ktime_t *to = NULL, timeout = KTIME_MAX; ++ struct task_struct *tsk = current; ++ sigset_t mask = *which; ++ int sig, ret = 0; ++ ++ if (ts) { ++ if (!timespec_valid(ts)) ++ return -EINVAL; ++ timeout = timespec_to_ktime(*ts); ++ to = &timeout; ++ } ++ ++ /* ++ * Invert the set of allowed signals to get those we want to block. ++ */ ++ sigdelsetmask(&mask, sigmask(SIGKILL) | sigmask(SIGSTOP)); ++ signotset(&mask); ++ ++ spin_lock_irq(&tsk->sighand->siglock); ++ sig = dequeue_signal(tsk, &mask, info); ++ if (!sig && timeout) { ++ /* ++ * None ready, temporarily unblock those we're interested ++ * while we are sleeping in so that we'll be awakened when ++ * they arrive. Unblocking is always fine, we can avoid ++ * set_current_blocked(). ++ */ ++ tsk->real_blocked = tsk->blocked; ++ sigandsets(&tsk->blocked, &tsk->blocked, &mask); ++ recalc_sigpending(); ++ spin_unlock_irq(&tsk->sighand->siglock); ++ ++ __set_current_state(TASK_INTERRUPTIBLE); ++ ret = freezable_schedule_hrtimeout_range(to, tsk->timer_slack_ns, ++ HRTIMER_MODE_REL); ++ spin_lock_irq(&tsk->sighand->siglock); ++ __set_task_blocked(tsk, &tsk->real_blocked); ++ sigemptyset(&tsk->real_blocked); ++ sig = dequeue_signal(tsk, &mask, info); ++ } ++ spin_unlock_irq(&tsk->sighand->siglock); ++ ++ if (sig) ++ return sig; ++ return ret ? -EINTR : -EAGAIN; ++} ++ ++/** ++ * sys_rt_sigtimedwait - synchronously wait for queued signals specified ++ * in @uthese ++ * @uthese: queued signals to wait for ++ * @uinfo: if non-null, the signal's siginfo is returned here ++ * @uts: upper bound on process time suspension ++ * @sigsetsize: size of sigset_t type ++ */ ++SYSCALL_DEFINE4(rt_sigtimedwait, const sigset_t __user *, uthese, ++ siginfo_t __user *, uinfo, const struct timespec __user *, uts, ++ size_t, sigsetsize) ++{ ++ sigset_t these; ++ struct timespec ts; ++ siginfo_t info; ++ int ret; ++ ++ /* XXX: Don't preclude handling different sized sigset_t's. */ ++ if (sigsetsize != sizeof(sigset_t)) ++ return -EINVAL; ++ ++ if (copy_from_user(&these, uthese, sizeof(these))) ++ return -EFAULT; ++ ++ if (uts) { ++ if (copy_from_user(&ts, uts, sizeof(ts))) ++ return -EFAULT; ++ } ++ ++ ret = do_sigtimedwait(&these, &info, uts ? &ts : NULL); ++ ++ if (ret > 0 && uinfo) { ++ if (copy_siginfo_to_user(uinfo, &info)) ++ ret = -EFAULT; ++ } ++ ++ return ret; ++} ++ ++#ifdef CONFIG_COMPAT ++COMPAT_SYSCALL_DEFINE4(rt_sigtimedwait, compat_sigset_t __user *, uthese, ++ struct compat_siginfo __user *, uinfo, ++ struct compat_timespec __user *, uts, compat_size_t, sigsetsize) ++{ ++ sigset_t s; ++ struct timespec t; ++ siginfo_t info; ++ long ret; ++ ++ if (sigsetsize != sizeof(sigset_t)) ++ return -EINVAL; ++ ++ if (get_compat_sigset(&s, uthese)) ++ return -EFAULT; ++ ++ if (uts) { ++ if (compat_get_timespec(&t, uts)) ++ return -EFAULT; ++ } ++ ++ ret = do_sigtimedwait(&s, &info, uts ? &t : NULL); ++ ++ if (ret > 0 && uinfo) { ++ if (copy_siginfo_to_user32(uinfo, &info)) ++ ret = -EFAULT; ++ } ++ ++ return ret; ++} ++#endif ++ ++/** ++ * sys_kill - send a signal to a process ++ * @pid: the PID of the process ++ * @sig: signal to be sent ++ */ ++SYSCALL_DEFINE2(kill, pid_t, pid, int, sig) ++{ ++ struct siginfo info; ++ ++ clear_siginfo(&info); ++ info.si_signo = sig; ++ info.si_errno = 0; ++ info.si_code = SI_USER; ++ info.si_pid = task_tgid_vnr(current); ++ info.si_uid = from_kuid_munged(current_user_ns(), current_uid()); ++ ++ return kill_something_info(sig, &info, pid); ++} ++ ++static int ++do_send_specific(pid_t tgid, pid_t pid, int sig, struct siginfo *info) ++{ ++ struct task_struct *p; ++ int error = -ESRCH; ++ ++ rcu_read_lock(); ++ p = find_task_by_vpid(pid); ++ if (p && (tgid <= 0 || task_tgid_vnr(p) == tgid)) { ++ error = check_kill_permission(sig, info, p); ++ /* ++ * The null signal is a permissions and process existence ++ * probe. No signal is actually delivered. ++ */ ++ if (!error && sig) { ++ error = do_send_sig_info(sig, info, p, PIDTYPE_PID); ++ /* ++ * If lock_task_sighand() failed we pretend the task ++ * dies after receiving the signal. The window is tiny, ++ * and the signal is private anyway. ++ */ ++ if (unlikely(error == -ESRCH)) ++ error = 0; ++ } ++ } ++ rcu_read_unlock(); ++ ++ return error; ++} ++ ++static int do_tkill(pid_t tgid, pid_t pid, int sig) ++{ ++ struct siginfo info; ++ ++ clear_siginfo(&info); ++ info.si_signo = sig; ++ info.si_errno = 0; ++ info.si_code = SI_TKILL; ++ info.si_pid = task_tgid_vnr(current); ++ info.si_uid = from_kuid_munged(current_user_ns(), current_uid()); ++ ++ return do_send_specific(tgid, pid, sig, &info); ++} ++ ++/** ++ * sys_tgkill - send signal to one specific thread ++ * @tgid: the thread group ID of the thread ++ * @pid: the PID of the thread ++ * @sig: signal to be sent ++ * ++ * This syscall also checks the @tgid and returns -ESRCH even if the PID ++ * exists but it's not belonging to the target process anymore. This ++ * method solves the problem of threads exiting and PIDs getting reused. ++ */ ++SYSCALL_DEFINE3(tgkill, pid_t, tgid, pid_t, pid, int, sig) ++{ ++ /* This is only valid for single tasks */ ++ if (pid <= 0 || tgid <= 0) ++ return -EINVAL; ++ ++ return do_tkill(tgid, pid, sig); ++} ++ ++/** ++ * sys_tkill - send signal to one specific task ++ * @pid: the PID of the task ++ * @sig: signal to be sent ++ * ++ * Send a signal to only one task, even if it's a CLONE_THREAD task. ++ */ ++SYSCALL_DEFINE2(tkill, pid_t, pid, int, sig) ++{ ++ /* This is only valid for single tasks */ ++ if (pid <= 0) ++ return -EINVAL; ++ ++ return do_tkill(0, pid, sig); ++} ++ ++static int do_rt_sigqueueinfo(pid_t pid, int sig, siginfo_t *info) ++{ ++ /* Not even root can pretend to send signals from the kernel. ++ * Nor can they impersonate a kill()/tgkill(), which adds source info. ++ */ ++ if ((info->si_code >= 0 || info->si_code == SI_TKILL) && ++ (task_pid_vnr(current) != pid)) ++ return -EPERM; ++ ++ info->si_signo = sig; ++ ++ /* POSIX.1b doesn't mention process groups. */ ++ return kill_proc_info(sig, info, pid); ++} ++ ++/** ++ * sys_rt_sigqueueinfo - send signal information to a signal ++ * @pid: the PID of the thread ++ * @sig: signal to be sent ++ * @uinfo: signal info to be sent ++ */ ++SYSCALL_DEFINE3(rt_sigqueueinfo, pid_t, pid, int, sig, ++ siginfo_t __user *, uinfo) ++{ ++ siginfo_t info; ++ if (copy_from_user(&info, uinfo, sizeof(siginfo_t))) ++ return -EFAULT; ++ return do_rt_sigqueueinfo(pid, sig, &info); ++} ++ ++#ifdef CONFIG_COMPAT ++COMPAT_SYSCALL_DEFINE3(rt_sigqueueinfo, ++ compat_pid_t, pid, ++ int, sig, ++ struct compat_siginfo __user *, uinfo) ++{ ++ siginfo_t info; ++ int ret = copy_siginfo_from_user32(&info, uinfo); ++ if (unlikely(ret)) ++ return ret; ++ return do_rt_sigqueueinfo(pid, sig, &info); ++} ++#endif ++ ++static int do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig, siginfo_t *info) ++{ ++ /* This is only valid for single tasks */ ++ if (pid <= 0 || tgid <= 0) ++ return -EINVAL; ++ ++ /* Not even root can pretend to send signals from the kernel. ++ * Nor can they impersonate a kill()/tgkill(), which adds source info. ++ */ ++ if ((info->si_code >= 0 || info->si_code == SI_TKILL) && ++ (task_pid_vnr(current) != pid)) ++ return -EPERM; ++ ++ info->si_signo = sig; ++ ++ return do_send_specific(tgid, pid, sig, info); ++} ++ ++SYSCALL_DEFINE4(rt_tgsigqueueinfo, pid_t, tgid, pid_t, pid, int, sig, ++ siginfo_t __user *, uinfo) ++{ ++ siginfo_t info; ++ ++ if (copy_from_user(&info, uinfo, sizeof(siginfo_t))) ++ return -EFAULT; ++ ++ return do_rt_tgsigqueueinfo(tgid, pid, sig, &info); ++} ++ ++#ifdef CONFIG_COMPAT ++COMPAT_SYSCALL_DEFINE4(rt_tgsigqueueinfo, ++ compat_pid_t, tgid, ++ compat_pid_t, pid, ++ int, sig, ++ struct compat_siginfo __user *, uinfo) ++{ ++ siginfo_t info; ++ ++ if (copy_siginfo_from_user32(&info, uinfo)) ++ return -EFAULT; ++ return do_rt_tgsigqueueinfo(tgid, pid, sig, &info); ++} ++#endif ++ ++/* ++ * For kthreads only, must not be used if cloned with CLONE_SIGHAND ++ */ ++void kernel_sigaction(int sig, __sighandler_t action) ++{ ++ spin_lock_irq(¤t->sighand->siglock); ++ current->sighand->action[sig - 1].sa.sa_handler = action; ++ if (action == SIG_IGN) { ++ sigset_t mask; ++ ++ sigemptyset(&mask); ++ sigaddset(&mask, sig); ++ ++ flush_sigqueue_mask(&mask, ¤t->signal->shared_pending); ++ flush_sigqueue_mask(&mask, ¤t->pending); ++ recalc_sigpending(); ++ } ++ spin_unlock_irq(¤t->sighand->siglock); ++} ++EXPORT_SYMBOL(kernel_sigaction); ++ ++void __weak sigaction_compat_abi(struct k_sigaction *act, ++ struct k_sigaction *oact) ++{ ++} ++ ++int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact) ++{ ++ struct task_struct *p = current, *t; ++ struct k_sigaction *k; ++ sigset_t mask; ++ ++ if (!valid_signal(sig) || sig < 1 || (act && sig_kernel_only(sig))) ++ return -EINVAL; ++ ++ k = &p->sighand->action[sig-1]; ++ ++ spin_lock_irq(&p->sighand->siglock); ++ if (oact) ++ *oact = *k; ++ ++ sigaction_compat_abi(act, oact); ++ ++ if (act) { ++ sigdelsetmask(&act->sa.sa_mask, ++ sigmask(SIGKILL) | sigmask(SIGSTOP)); ++ *k = *act; ++ /* ++ * POSIX 3.3.1.3: ++ * "Setting a signal action to SIG_IGN for a signal that is ++ * pending shall cause the pending signal to be discarded, ++ * whether or not it is blocked." ++ * ++ * "Setting a signal action to SIG_DFL for a signal that is ++ * pending and whose default action is to ignore the signal ++ * (for example, SIGCHLD), shall cause the pending signal to ++ * be discarded, whether or not it is blocked" ++ */ ++ if (sig_handler_ignored(sig_handler(p, sig), sig)) { ++ sigemptyset(&mask); ++ sigaddset(&mask, sig); ++ flush_sigqueue_mask(&mask, &p->signal->shared_pending); ++ for_each_thread(p, t) ++ flush_sigqueue_mask(&mask, &t->pending); ++ } ++ } ++ ++ spin_unlock_irq(&p->sighand->siglock); ++ return 0; ++} ++ ++static int ++do_sigaltstack (const stack_t *ss, stack_t *oss, unsigned long sp, ++ size_t min_ss_size) ++{ ++ struct task_struct *t = current; ++ ++ if (oss) { ++ memset(oss, 0, sizeof(stack_t)); ++ oss->ss_sp = (void __user *) t->sas_ss_sp; ++ oss->ss_size = t->sas_ss_size; ++ oss->ss_flags = sas_ss_flags(sp) | ++ (current->sas_ss_flags & SS_FLAG_BITS); ++ } ++ ++ if (ss) { ++ void __user *ss_sp = ss->ss_sp; ++ size_t ss_size = ss->ss_size; ++ unsigned ss_flags = ss->ss_flags; ++ int ss_mode; ++ ++ if (unlikely(on_sig_stack(sp))) ++ return -EPERM; ++ ++ ss_mode = ss_flags & ~SS_FLAG_BITS; ++ if (unlikely(ss_mode != SS_DISABLE && ss_mode != SS_ONSTACK && ++ ss_mode != 0)) ++ return -EINVAL; ++ ++ if (ss_mode == SS_DISABLE) { ++ ss_size = 0; ++ ss_sp = NULL; ++ } else { ++ if (unlikely(ss_size < min_ss_size)) ++ return -ENOMEM; ++ } ++ ++ t->sas_ss_sp = (unsigned long) ss_sp; ++ t->sas_ss_size = ss_size; ++ t->sas_ss_flags = ss_flags; ++ } ++ return 0; ++} ++ ++SYSCALL_DEFINE2(sigaltstack,const stack_t __user *,uss, stack_t __user *,uoss) ++{ ++ stack_t new, old; ++ int err; ++ if (uss && copy_from_user(&new, uss, sizeof(stack_t))) ++ return -EFAULT; ++ err = do_sigaltstack(uss ? &new : NULL, uoss ? &old : NULL, ++ current_user_stack_pointer(), ++ MINSIGSTKSZ); ++ if (!err && uoss && copy_to_user(uoss, &old, sizeof(stack_t))) ++ err = -EFAULT; ++ return err; ++} ++ ++int restore_altstack(const stack_t __user *uss) ++{ ++ stack_t new; ++ if (copy_from_user(&new, uss, sizeof(stack_t))) ++ return -EFAULT; ++ (void)do_sigaltstack(&new, NULL, current_user_stack_pointer(), ++ MINSIGSTKSZ); ++ /* squash all but EFAULT for now */ ++ return 0; ++} ++ ++int __save_altstack(stack_t __user *uss, unsigned long sp) ++{ ++ struct task_struct *t = current; ++ int err = __put_user((void __user *)t->sas_ss_sp, &uss->ss_sp) | ++ __put_user(t->sas_ss_flags, &uss->ss_flags) | ++ __put_user(t->sas_ss_size, &uss->ss_size); ++ if (err) ++ return err; ++ if (t->sas_ss_flags & SS_AUTODISARM) ++ sas_ss_reset(t); ++ return 0; ++} ++ ++#ifdef CONFIG_COMPAT ++static int do_compat_sigaltstack(const compat_stack_t __user *uss_ptr, ++ compat_stack_t __user *uoss_ptr) ++{ ++ stack_t uss, uoss; ++ int ret; ++ ++ if (uss_ptr) { ++ compat_stack_t uss32; ++ if (copy_from_user(&uss32, uss_ptr, sizeof(compat_stack_t))) ++ return -EFAULT; ++ uss.ss_sp = compat_ptr(uss32.ss_sp); ++ uss.ss_flags = uss32.ss_flags; ++ uss.ss_size = uss32.ss_size; ++ } ++ ret = do_sigaltstack(uss_ptr ? &uss : NULL, &uoss, ++ compat_user_stack_pointer(), ++ COMPAT_MINSIGSTKSZ); ++ if (ret >= 0 && uoss_ptr) { ++ compat_stack_t old; ++ memset(&old, 0, sizeof(old)); ++ old.ss_sp = ptr_to_compat(uoss.ss_sp); ++ old.ss_flags = uoss.ss_flags; ++ old.ss_size = uoss.ss_size; ++ if (copy_to_user(uoss_ptr, &old, sizeof(compat_stack_t))) ++ ret = -EFAULT; ++ } ++ return ret; ++} ++ ++COMPAT_SYSCALL_DEFINE2(sigaltstack, ++ const compat_stack_t __user *, uss_ptr, ++ compat_stack_t __user *, uoss_ptr) ++{ ++ return do_compat_sigaltstack(uss_ptr, uoss_ptr); ++} ++ ++int compat_restore_altstack(const compat_stack_t __user *uss) ++{ ++ int err = do_compat_sigaltstack(uss, NULL); ++ /* squash all but -EFAULT for now */ ++ return err == -EFAULT ? err : 0; ++} ++ ++int __compat_save_altstack(compat_stack_t __user *uss, unsigned long sp) ++{ ++ int err; ++ struct task_struct *t = current; ++ err = __put_user(ptr_to_compat((void __user *)t->sas_ss_sp), ++ &uss->ss_sp) | ++ __put_user(t->sas_ss_flags, &uss->ss_flags) | ++ __put_user(t->sas_ss_size, &uss->ss_size); ++ if (err) ++ return err; ++ if (t->sas_ss_flags & SS_AUTODISARM) ++ sas_ss_reset(t); ++ return 0; ++} ++#endif ++ ++#ifdef __ARCH_WANT_SYS_SIGPENDING ++ ++/** ++ * sys_sigpending - examine pending signals ++ * @uset: where mask of pending signal is returned ++ */ ++SYSCALL_DEFINE1(sigpending, old_sigset_t __user *, uset) ++{ ++ sigset_t set; ++ ++ if (sizeof(old_sigset_t) > sizeof(*uset)) ++ return -EINVAL; ++ ++ do_sigpending(&set); ++ ++ if (copy_to_user(uset, &set, sizeof(old_sigset_t))) ++ return -EFAULT; ++ ++ return 0; ++} ++ ++#ifdef CONFIG_COMPAT ++COMPAT_SYSCALL_DEFINE1(sigpending, compat_old_sigset_t __user *, set32) ++{ ++ sigset_t set; ++ ++ do_sigpending(&set); ++ ++ return put_user(set.sig[0], set32); ++} ++#endif ++ ++#endif ++ ++#ifdef __ARCH_WANT_SYS_SIGPROCMASK ++/** ++ * sys_sigprocmask - examine and change blocked signals ++ * @how: whether to add, remove, or set signals ++ * @nset: signals to add or remove (if non-null) ++ * @oset: previous value of signal mask if non-null ++ * ++ * Some platforms have their own version with special arguments; ++ * others support only sys_rt_sigprocmask. ++ */ ++ ++SYSCALL_DEFINE3(sigprocmask, int, how, old_sigset_t __user *, nset, ++ old_sigset_t __user *, oset) ++{ ++ old_sigset_t old_set, new_set; ++ sigset_t new_blocked; ++ ++ old_set = current->blocked.sig[0]; ++ ++ if (nset) { ++ if (copy_from_user(&new_set, nset, sizeof(*nset))) ++ return -EFAULT; ++ ++ new_blocked = current->blocked; ++ ++ switch (how) { ++ case SIG_BLOCK: ++ sigaddsetmask(&new_blocked, new_set); ++ break; ++ case SIG_UNBLOCK: ++ sigdelsetmask(&new_blocked, new_set); ++ break; ++ case SIG_SETMASK: ++ new_blocked.sig[0] = new_set; ++ break; ++ default: ++ return -EINVAL; ++ } ++ ++ set_current_blocked(&new_blocked); ++ } ++ ++ if (oset) { ++ if (copy_to_user(oset, &old_set, sizeof(*oset))) ++ return -EFAULT; ++ } ++ ++ return 0; ++} ++#endif /* __ARCH_WANT_SYS_SIGPROCMASK */ ++ ++#ifndef CONFIG_ODD_RT_SIGACTION ++/** ++ * sys_rt_sigaction - alter an action taken by a process ++ * @sig: signal to be sent ++ * @act: new sigaction ++ * @oact: used to save the previous sigaction ++ * @sigsetsize: size of sigset_t type ++ */ ++SYSCALL_DEFINE4(rt_sigaction, int, sig, ++ const struct sigaction __user *, act, ++ struct sigaction __user *, oact, ++ size_t, sigsetsize) ++{ ++ struct k_sigaction new_sa, old_sa; ++ int ret; ++ ++ /* XXX: Don't preclude handling different sized sigset_t's. */ ++ if (sigsetsize != sizeof(sigset_t)) ++ return -EINVAL; ++ ++ if (act && copy_from_user(&new_sa.sa, act, sizeof(new_sa.sa))) ++ return -EFAULT; ++ ++ ret = do_sigaction(sig, act ? &new_sa : NULL, oact ? &old_sa : NULL); ++ if (ret) ++ return ret; ++ ++ if (oact && copy_to_user(oact, &old_sa.sa, sizeof(old_sa.sa))) ++ return -EFAULT; ++ ++ return 0; ++} ++#ifdef CONFIG_COMPAT ++COMPAT_SYSCALL_DEFINE4(rt_sigaction, int, sig, ++ const struct compat_sigaction __user *, act, ++ struct compat_sigaction __user *, oact, ++ compat_size_t, sigsetsize) ++{ ++ struct k_sigaction new_ka, old_ka; ++#ifdef __ARCH_HAS_SA_RESTORER ++ compat_uptr_t restorer; ++#endif ++ int ret; ++ ++ /* XXX: Don't preclude handling different sized sigset_t's. */ ++ if (sigsetsize != sizeof(compat_sigset_t)) ++ return -EINVAL; ++ ++ if (act) { ++ compat_uptr_t handler; ++ ret = get_user(handler, &act->sa_handler); ++ new_ka.sa.sa_handler = compat_ptr(handler); ++#ifdef __ARCH_HAS_SA_RESTORER ++ ret |= get_user(restorer, &act->sa_restorer); ++ new_ka.sa.sa_restorer = compat_ptr(restorer); ++#endif ++ ret |= get_compat_sigset(&new_ka.sa.sa_mask, &act->sa_mask); ++ ret |= get_user(new_ka.sa.sa_flags, &act->sa_flags); ++ if (ret) ++ return -EFAULT; ++ } ++ ++ ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); ++ if (!ret && oact) { ++ ret = put_user(ptr_to_compat(old_ka.sa.sa_handler), ++ &oact->sa_handler); ++ ret |= put_compat_sigset(&oact->sa_mask, &old_ka.sa.sa_mask, ++ sizeof(oact->sa_mask)); ++ ret |= put_user(old_ka.sa.sa_flags, &oact->sa_flags); ++#ifdef __ARCH_HAS_SA_RESTORER ++ ret |= put_user(ptr_to_compat(old_ka.sa.sa_restorer), ++ &oact->sa_restorer); ++#endif ++ } ++ return ret; ++} ++#endif ++#endif /* !CONFIG_ODD_RT_SIGACTION */ ++ ++#ifdef CONFIG_OLD_SIGACTION ++SYSCALL_DEFINE3(sigaction, int, sig, ++ const struct old_sigaction __user *, act, ++ struct old_sigaction __user *, oact) ++{ ++ struct k_sigaction new_ka, old_ka; ++ int ret; ++ ++ if (act) { ++ old_sigset_t mask; ++ if (!access_ok(act, sizeof(*act)) || ++ __get_user(new_ka.sa.sa_handler, &act->sa_handler) || ++ __get_user(new_ka.sa.sa_restorer, &act->sa_restorer) || ++ __get_user(new_ka.sa.sa_flags, &act->sa_flags) || ++ __get_user(mask, &act->sa_mask)) ++ return -EFAULT; ++#ifdef __ARCH_HAS_KA_RESTORER ++ new_ka.ka_restorer = NULL; ++#endif ++ siginitset(&new_ka.sa.sa_mask, mask); ++ } ++ ++ ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); ++ ++ if (!ret && oact) { ++ if (!access_ok(oact, sizeof(*oact)) || ++ __put_user(old_ka.sa.sa_handler, &oact->sa_handler) || ++ __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer) || ++ __put_user(old_ka.sa.sa_flags, &oact->sa_flags) || ++ __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask)) ++ return -EFAULT; ++ } ++ ++ return ret; ++} ++#endif ++#ifdef CONFIG_COMPAT_OLD_SIGACTION ++COMPAT_SYSCALL_DEFINE3(sigaction, int, sig, ++ const struct compat_old_sigaction __user *, act, ++ struct compat_old_sigaction __user *, oact) ++{ ++ struct k_sigaction new_ka, old_ka; ++ int ret; ++ compat_old_sigset_t mask; ++ compat_uptr_t handler, restorer; ++ ++ if (act) { ++ if (!access_ok(act, sizeof(*act)) || ++ __get_user(handler, &act->sa_handler) || ++ __get_user(restorer, &act->sa_restorer) || ++ __get_user(new_ka.sa.sa_flags, &act->sa_flags) || ++ __get_user(mask, &act->sa_mask)) ++ return -EFAULT; ++ ++#ifdef __ARCH_HAS_KA_RESTORER ++ new_ka.ka_restorer = NULL; ++#endif ++ new_ka.sa.sa_handler = compat_ptr(handler); ++ new_ka.sa.sa_restorer = compat_ptr(restorer); ++ siginitset(&new_ka.sa.sa_mask, mask); ++ } ++ ++ ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); ++ ++ if (!ret && oact) { ++ if (!access_ok(oact, sizeof(*oact)) || ++ __put_user(ptr_to_compat(old_ka.sa.sa_handler), ++ &oact->sa_handler) || ++ __put_user(ptr_to_compat(old_ka.sa.sa_restorer), ++ &oact->sa_restorer) || ++ __put_user(old_ka.sa.sa_flags, &oact->sa_flags) || ++ __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask)) ++ return -EFAULT; ++ } ++ return ret; ++} ++#endif ++ ++#ifdef CONFIG_SGETMASK_SYSCALL ++ ++/* ++ * For backwards compatibility. Functionality superseded by sigprocmask. ++ */ ++SYSCALL_DEFINE0(sgetmask) ++{ ++ /* SMP safe */ ++ return current->blocked.sig[0]; ++} ++ ++SYSCALL_DEFINE1(ssetmask, int, newmask) ++{ ++ int old = current->blocked.sig[0]; ++ sigset_t newset; ++ ++ siginitset(&newset, newmask); ++ set_current_blocked(&newset); ++ ++ return old; ++} ++#endif /* CONFIG_SGETMASK_SYSCALL */ ++ ++#ifdef __ARCH_WANT_SYS_SIGNAL ++/* ++ * For backwards compatibility. Functionality superseded by sigaction. ++ */ ++SYSCALL_DEFINE2(signal, int, sig, __sighandler_t, handler) ++{ ++ struct k_sigaction new_sa, old_sa; ++ int ret; ++ ++ new_sa.sa.sa_handler = handler; ++ new_sa.sa.sa_flags = SA_ONESHOT | SA_NOMASK; ++ sigemptyset(&new_sa.sa.sa_mask); ++ ++ ret = do_sigaction(sig, &new_sa, &old_sa); ++ ++ return ret ? ret : (unsigned long)old_sa.sa.sa_handler; ++} ++#endif /* __ARCH_WANT_SYS_SIGNAL */ ++ ++#ifdef __ARCH_WANT_SYS_PAUSE ++ ++SYSCALL_DEFINE0(pause) ++{ ++ while (!signal_pending(current)) { ++ __set_current_state(TASK_INTERRUPTIBLE); ++ schedule(); ++ } ++ return -ERESTARTNOHAND; ++} ++ ++#endif ++ ++static int sigsuspend(sigset_t *set) ++{ ++ current->saved_sigmask = current->blocked; ++ set_current_blocked(set); ++ ++ while (!signal_pending(current)) { ++ __set_current_state(TASK_INTERRUPTIBLE); ++ schedule(); ++ } ++ set_restore_sigmask(); ++ return -ERESTARTNOHAND; ++} ++ ++/** ++ * sys_rt_sigsuspend - replace the signal mask for a value with the ++ * @unewset value until a signal is received ++ * @unewset: new signal mask value ++ * @sigsetsize: size of sigset_t type ++ */ ++SYSCALL_DEFINE2(rt_sigsuspend, sigset_t __user *, unewset, size_t, sigsetsize) ++{ ++ sigset_t newset; ++ ++ /* XXX: Don't preclude handling different sized sigset_t's. */ ++ if (sigsetsize != sizeof(sigset_t)) ++ return -EINVAL; ++ ++ if (copy_from_user(&newset, unewset, sizeof(newset))) ++ return -EFAULT; ++ return sigsuspend(&newset); ++} ++ ++#ifdef CONFIG_COMPAT ++COMPAT_SYSCALL_DEFINE2(rt_sigsuspend, compat_sigset_t __user *, unewset, compat_size_t, sigsetsize) ++{ ++ sigset_t newset; ++ ++ /* XXX: Don't preclude handling different sized sigset_t's. */ ++ if (sigsetsize != sizeof(sigset_t)) ++ return -EINVAL; ++ ++ if (get_compat_sigset(&newset, unewset)) ++ return -EFAULT; ++ return sigsuspend(&newset); ++} ++#endif ++ ++#ifdef CONFIG_OLD_SIGSUSPEND ++SYSCALL_DEFINE1(sigsuspend, old_sigset_t, mask) ++{ ++ sigset_t blocked; ++ siginitset(&blocked, mask); ++ return sigsuspend(&blocked); ++} ++#endif ++#ifdef CONFIG_OLD_SIGSUSPEND3 ++SYSCALL_DEFINE3(sigsuspend, int, unused1, int, unused2, old_sigset_t, mask) ++{ ++ sigset_t blocked; ++ siginitset(&blocked, mask); ++ return sigsuspend(&blocked); ++} ++#endif ++ ++__weak const char *arch_vma_name(struct vm_area_struct *vma) ++{ ++ return NULL; ++} ++ ++void __init signals_init(void) ++{ ++ /* If this check fails, the __ARCH_SI_PREAMBLE_SIZE value is wrong! */ ++ BUILD_BUG_ON(__ARCH_SI_PREAMBLE_SIZE ++ != offsetof(struct siginfo, _sifields._pad)); ++ BUILD_BUG_ON(sizeof(struct siginfo) != SI_MAX_SIZE); ++ ++ sigqueue_cachep = KMEM_CACHE(sigqueue, SLAB_PANIC); ++} ++ ++#ifdef CONFIG_KGDB_KDB ++#include ++/* ++ * kdb_send_sig - Allows kdb to send signals without exposing ++ * signal internals. This function checks if the required locks are ++ * available before calling the main signal code, to avoid kdb ++ * deadlocks. ++ */ ++void kdb_send_sig(struct task_struct *t, int sig) ++{ ++ static struct task_struct *kdb_prev_t; ++ int new_t, ret; ++ if (!spin_trylock(&t->sighand->siglock)) { ++ kdb_printf("Can't do kill command now.\n" ++ "The sigmask lock is held somewhere else in " ++ "kernel, try again later\n"); ++ return; ++ } ++ new_t = kdb_prev_t != t; ++ kdb_prev_t = t; ++ if (t->state != TASK_RUNNING && new_t) { ++ spin_unlock(&t->sighand->siglock); ++ kdb_printf("Process is not RUNNING, sending a signal from " ++ "kdb risks deadlock\n" ++ "on the run queue locks. " ++ "The signal has _not_ been sent.\n" ++ "Reissue the kill command if you want to risk " ++ "the deadlock.\n"); ++ return; ++ } ++ ret = send_signal(sig, SEND_SIG_PRIV, t, PIDTYPE_PID); ++ spin_unlock(&t->sighand->siglock); ++ if (ret) ++ kdb_printf("Fail to deliver Signal %d to process %d.\n", ++ sig, t->pid); ++ else ++ kdb_printf("Signal %d is sent to process %d.\n", sig, t->pid); ++} ++#endif /* CONFIG_KGDB_KDB */ +diff -uprN kernel/kernel/stop_machine.c kernel_new/kernel/stop_machine.c +--- kernel/kernel/stop_machine.c 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/kernel/stop_machine.c 2021-04-02 09:22:23.752463220 +0800 +@@ -240,6 +240,7 @@ static int multi_cpu_stop(void *data) + sdei_unmask_local_cpu(); + gic_arch_restore_irqs(flags); + #endif ++ hard_irq_enable(); + local_irq_restore(flags); + return err; + } +@@ -619,6 +620,7 @@ int stop_machine_cpuslocked(cpu_stop_fn_ + local_irq_save(flags); + hard_irq_disable(); + ret = (*fn)(data); ++ hard_irq_enable(); + local_irq_restore(flags); + + return ret; +diff -uprN kernel/kernel/stop_machine.c.orig kernel_new/kernel/stop_machine.c.orig +--- kernel/kernel/stop_machine.c.orig 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/kernel/stop_machine.c.orig 2020-12-21 21:59:22.000000000 +0800 +@@ -0,0 +1,695 @@ ++/* ++ * kernel/stop_machine.c ++ * ++ * Copyright (C) 2008, 2005 IBM Corporation. ++ * Copyright (C) 2008, 2005 Rusty Russell rusty@rustcorp.com.au ++ * Copyright (C) 2010 SUSE Linux Products GmbH ++ * Copyright (C) 2010 Tejun Heo ++ * ++ * This file is released under the GPLv2 and any later version. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#ifdef CONFIG_ARM64 ++#include ++#include ++#endif ++ ++/* ++ * Structure to determine completion condition and record errors. May ++ * be shared by works on different cpus. ++ */ ++struct cpu_stop_done { ++ atomic_t nr_todo; /* nr left to execute */ ++ int ret; /* collected return value */ ++ struct completion completion; /* fired if nr_todo reaches 0 */ ++}; ++ ++/* the actual stopper, one per every possible cpu, enabled on online cpus */ ++struct cpu_stopper { ++ struct task_struct *thread; ++ ++ raw_spinlock_t lock; ++ bool enabled; /* is this stopper enabled? */ ++ struct list_head works; /* list of pending works */ ++ ++ struct cpu_stop_work stop_work; /* for stop_cpus */ ++}; ++ ++static DEFINE_PER_CPU(struct cpu_stopper, cpu_stopper); ++static bool stop_machine_initialized = false; ++ ++/* static data for stop_cpus */ ++static DEFINE_MUTEX(stop_cpus_mutex); ++static bool stop_cpus_in_progress; ++ ++static void cpu_stop_init_done(struct cpu_stop_done *done, unsigned int nr_todo) ++{ ++ memset(done, 0, sizeof(*done)); ++ atomic_set(&done->nr_todo, nr_todo); ++ init_completion(&done->completion); ++} ++ ++/* signal completion unless @done is NULL */ ++static void cpu_stop_signal_done(struct cpu_stop_done *done) ++{ ++ if (atomic_dec_and_test(&done->nr_todo)) ++ complete(&done->completion); ++} ++ ++static void __cpu_stop_queue_work(struct cpu_stopper *stopper, ++ struct cpu_stop_work *work, ++ struct wake_q_head *wakeq) ++{ ++ list_add_tail(&work->list, &stopper->works); ++ wake_q_add(wakeq, stopper->thread); ++} ++ ++/* queue @work to @stopper. if offline, @work is completed immediately */ ++static bool cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work) ++{ ++ struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); ++ DEFINE_WAKE_Q(wakeq); ++ unsigned long flags; ++ bool enabled; ++ ++ preempt_disable(); ++ raw_spin_lock_irqsave(&stopper->lock, flags); ++ enabled = stopper->enabled; ++ if (enabled) ++ __cpu_stop_queue_work(stopper, work, &wakeq); ++ else if (work->done) ++ cpu_stop_signal_done(work->done); ++ raw_spin_unlock_irqrestore(&stopper->lock, flags); ++ ++ wake_up_q(&wakeq); ++ preempt_enable(); ++ ++ return enabled; ++} ++ ++/** ++ * stop_one_cpu - stop a cpu ++ * @cpu: cpu to stop ++ * @fn: function to execute ++ * @arg: argument to @fn ++ * ++ * Execute @fn(@arg) on @cpu. @fn is run in a process context with ++ * the highest priority preempting any task on the cpu and ++ * monopolizing it. This function returns after the execution is ++ * complete. ++ * ++ * This function doesn't guarantee @cpu stays online till @fn ++ * completes. If @cpu goes down in the middle, execution may happen ++ * partially or fully on different cpus. @fn should either be ready ++ * for that or the caller should ensure that @cpu stays online until ++ * this function completes. ++ * ++ * CONTEXT: ++ * Might sleep. ++ * ++ * RETURNS: ++ * -ENOENT if @fn(@arg) was not executed because @cpu was offline; ++ * otherwise, the return value of @fn. ++ */ ++int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg) ++{ ++ struct cpu_stop_done done; ++ struct cpu_stop_work work = { .fn = fn, .arg = arg, .done = &done }; ++ ++ cpu_stop_init_done(&done, 1); ++ if (!cpu_stop_queue_work(cpu, &work)) ++ return -ENOENT; ++ /* ++ * In case @cpu == smp_proccessor_id() we can avoid a sleep+wakeup ++ * cycle by doing a preemption: ++ */ ++ cond_resched(); ++ wait_for_completion(&done.completion); ++ return done.ret; ++} ++ ++/* This controls the threads on each CPU. */ ++enum multi_stop_state { ++ /* Dummy starting state for thread. */ ++ MULTI_STOP_NONE, ++ /* Awaiting everyone to be scheduled. */ ++ MULTI_STOP_PREPARE, ++ /* Disable interrupts. */ ++ MULTI_STOP_DISABLE_IRQ, ++ /* Run the function */ ++ MULTI_STOP_RUN, ++ /* Exit */ ++ MULTI_STOP_EXIT, ++}; ++ ++struct multi_stop_data { ++ cpu_stop_fn_t fn; ++ void *data; ++ /* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */ ++ unsigned int num_threads; ++ const struct cpumask *active_cpus; ++ ++ enum multi_stop_state state; ++ atomic_t thread_ack; ++}; ++ ++static void set_state(struct multi_stop_data *msdata, ++ enum multi_stop_state newstate) ++{ ++ /* Reset ack counter. */ ++ atomic_set(&msdata->thread_ack, msdata->num_threads); ++ smp_wmb(); ++ msdata->state = newstate; ++} ++ ++/* Last one to ack a state moves to the next state. */ ++static void ack_state(struct multi_stop_data *msdata) ++{ ++ if (atomic_dec_and_test(&msdata->thread_ack)) ++ set_state(msdata, msdata->state + 1); ++} ++ ++/* This is the cpu_stop function which stops the CPU. */ ++static int multi_cpu_stop(void *data) ++{ ++ struct multi_stop_data *msdata = data; ++ enum multi_stop_state curstate = MULTI_STOP_NONE; ++ int cpu = smp_processor_id(), err = 0; ++ unsigned long flags; ++ bool is_active; ++ ++ /* ++ * When called from stop_machine_from_inactive_cpu(), irq might ++ * already be disabled. Save the state and restore it on exit. ++ */ ++ local_save_flags(flags); ++ ++ if (!msdata->active_cpus) ++ is_active = cpu == cpumask_first(cpu_online_mask); ++ else ++ is_active = cpumask_test_cpu(cpu, msdata->active_cpus); ++ ++ /* Simple state machine */ ++ do { ++ /* Chill out and ensure we re-read multi_stop_state. */ ++ cpu_relax_yield(); ++ if (msdata->state != curstate) { ++ curstate = msdata->state; ++ switch (curstate) { ++ case MULTI_STOP_DISABLE_IRQ: ++ local_irq_disable(); ++ hard_irq_disable(); ++#ifdef CONFIG_ARM64 ++ gic_arch_disable_irqs(); ++ sdei_mask_local_cpu(); ++#endif ++ break; ++ case MULTI_STOP_RUN: ++ if (is_active) ++ err = msdata->fn(msdata->data); ++ break; ++ default: ++ break; ++ } ++ ack_state(msdata); ++ } else if (curstate > MULTI_STOP_PREPARE) { ++ /* ++ * At this stage all other CPUs we depend on must spin ++ * in the same loop. Any reason for hard-lockup should ++ * be detected and reported on their side. ++ */ ++ touch_nmi_watchdog(); ++ } ++ } while (curstate != MULTI_STOP_EXIT); ++ ++#ifdef CONFIG_ARM64 ++ sdei_unmask_local_cpu(); ++ gic_arch_restore_irqs(flags); ++#endif ++ local_irq_restore(flags); ++ return err; ++} ++ ++static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1, ++ int cpu2, struct cpu_stop_work *work2) ++{ ++ struct cpu_stopper *stopper1 = per_cpu_ptr(&cpu_stopper, cpu1); ++ struct cpu_stopper *stopper2 = per_cpu_ptr(&cpu_stopper, cpu2); ++ DEFINE_WAKE_Q(wakeq); ++ int err; ++ ++retry: ++ /* ++ * The waking up of stopper threads has to happen in the same ++ * scheduling context as the queueing. Otherwise, there is a ++ * possibility of one of the above stoppers being woken up by another ++ * CPU, and preempting us. This will cause us to not wake up the other ++ * stopper forever. ++ */ ++ preempt_disable(); ++ raw_spin_lock_irq(&stopper1->lock); ++ raw_spin_lock_nested(&stopper2->lock, SINGLE_DEPTH_NESTING); ++ ++ if (!stopper1->enabled || !stopper2->enabled) { ++ err = -ENOENT; ++ goto unlock; ++ } ++ ++ /* ++ * Ensure that if we race with __stop_cpus() the stoppers won't get ++ * queued up in reverse order leading to system deadlock. ++ * ++ * We can't miss stop_cpus_in_progress if queue_stop_cpus_work() has ++ * queued a work on cpu1 but not on cpu2, we hold both locks. ++ * ++ * It can be falsely true but it is safe to spin until it is cleared, ++ * queue_stop_cpus_work() does everything under preempt_disable(). ++ */ ++ if (unlikely(stop_cpus_in_progress)) { ++ err = -EDEADLK; ++ goto unlock; ++ } ++ ++ err = 0; ++ __cpu_stop_queue_work(stopper1, work1, &wakeq); ++ __cpu_stop_queue_work(stopper2, work2, &wakeq); ++ ++unlock: ++ raw_spin_unlock(&stopper2->lock); ++ raw_spin_unlock_irq(&stopper1->lock); ++ ++ if (unlikely(err == -EDEADLK)) { ++ preempt_enable(); ++ ++ while (stop_cpus_in_progress) ++ cpu_relax(); ++ ++ goto retry; ++ } ++ ++ wake_up_q(&wakeq); ++ preempt_enable(); ++ ++ return err; ++} ++/** ++ * stop_two_cpus - stops two cpus ++ * @cpu1: the cpu to stop ++ * @cpu2: the other cpu to stop ++ * @fn: function to execute ++ * @arg: argument to @fn ++ * ++ * Stops both the current and specified CPU and runs @fn on one of them. ++ * ++ * returns when both are completed. ++ */ ++int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *arg) ++{ ++ struct cpu_stop_done done; ++ struct cpu_stop_work work1, work2; ++ struct multi_stop_data msdata; ++ ++ msdata = (struct multi_stop_data){ ++ .fn = fn, ++ .data = arg, ++ .num_threads = 2, ++ .active_cpus = cpumask_of(cpu1), ++ }; ++ ++ work1 = work2 = (struct cpu_stop_work){ ++ .fn = multi_cpu_stop, ++ .arg = &msdata, ++ .done = &done ++ }; ++ ++ cpu_stop_init_done(&done, 2); ++ set_state(&msdata, MULTI_STOP_PREPARE); ++ ++ if (cpu1 > cpu2) ++ swap(cpu1, cpu2); ++ if (cpu_stop_queue_two_works(cpu1, &work1, cpu2, &work2)) ++ return -ENOENT; ++ ++ wait_for_completion(&done.completion); ++ return done.ret; ++} ++ ++/** ++ * stop_one_cpu_nowait - stop a cpu but don't wait for completion ++ * @cpu: cpu to stop ++ * @fn: function to execute ++ * @arg: argument to @fn ++ * @work_buf: pointer to cpu_stop_work structure ++ * ++ * Similar to stop_one_cpu() but doesn't wait for completion. The ++ * caller is responsible for ensuring @work_buf is currently unused ++ * and will remain untouched until stopper starts executing @fn. ++ * ++ * CONTEXT: ++ * Don't care. ++ * ++ * RETURNS: ++ * true if cpu_stop_work was queued successfully and @fn will be called, ++ * false otherwise. ++ */ ++bool stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg, ++ struct cpu_stop_work *work_buf) ++{ ++ *work_buf = (struct cpu_stop_work){ .fn = fn, .arg = arg, }; ++ return cpu_stop_queue_work(cpu, work_buf); ++} ++ ++static bool queue_stop_cpus_work(const struct cpumask *cpumask, ++ cpu_stop_fn_t fn, void *arg, ++ struct cpu_stop_done *done) ++{ ++ struct cpu_stop_work *work; ++ unsigned int cpu; ++ bool queued = false; ++ ++ /* ++ * Disable preemption while queueing to avoid getting ++ * preempted by a stopper which might wait for other stoppers ++ * to enter @fn which can lead to deadlock. ++ */ ++ preempt_disable(); ++ stop_cpus_in_progress = true; ++ for_each_cpu(cpu, cpumask) { ++ work = &per_cpu(cpu_stopper.stop_work, cpu); ++ work->fn = fn; ++ work->arg = arg; ++ work->done = done; ++ if (cpu_stop_queue_work(cpu, work)) ++ queued = true; ++ } ++ stop_cpus_in_progress = false; ++ preempt_enable(); ++ ++ return queued; ++} ++ ++static int __stop_cpus(const struct cpumask *cpumask, ++ cpu_stop_fn_t fn, void *arg) ++{ ++ struct cpu_stop_done done; ++ ++ cpu_stop_init_done(&done, cpumask_weight(cpumask)); ++ if (!queue_stop_cpus_work(cpumask, fn, arg, &done)) ++ return -ENOENT; ++ wait_for_completion(&done.completion); ++ return done.ret; ++} ++ ++/** ++ * stop_cpus - stop multiple cpus ++ * @cpumask: cpus to stop ++ * @fn: function to execute ++ * @arg: argument to @fn ++ * ++ * Execute @fn(@arg) on online cpus in @cpumask. On each target cpu, ++ * @fn is run in a process context with the highest priority ++ * preempting any task on the cpu and monopolizing it. This function ++ * returns after all executions are complete. ++ * ++ * This function doesn't guarantee the cpus in @cpumask stay online ++ * till @fn completes. If some cpus go down in the middle, execution ++ * on the cpu may happen partially or fully on different cpus. @fn ++ * should either be ready for that or the caller should ensure that ++ * the cpus stay online until this function completes. ++ * ++ * All stop_cpus() calls are serialized making it safe for @fn to wait ++ * for all cpus to start executing it. ++ * ++ * CONTEXT: ++ * Might sleep. ++ * ++ * RETURNS: ++ * -ENOENT if @fn(@arg) was not executed at all because all cpus in ++ * @cpumask were offline; otherwise, 0 if all executions of @fn ++ * returned 0, any non zero return value if any returned non zero. ++ */ ++int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg) ++{ ++ int ret; ++ ++ /* static works are used, process one request at a time */ ++ mutex_lock(&stop_cpus_mutex); ++ ret = __stop_cpus(cpumask, fn, arg); ++ mutex_unlock(&stop_cpus_mutex); ++ return ret; ++} ++ ++/** ++ * try_stop_cpus - try to stop multiple cpus ++ * @cpumask: cpus to stop ++ * @fn: function to execute ++ * @arg: argument to @fn ++ * ++ * Identical to stop_cpus() except that it fails with -EAGAIN if ++ * someone else is already using the facility. ++ * ++ * CONTEXT: ++ * Might sleep. ++ * ++ * RETURNS: ++ * -EAGAIN if someone else is already stopping cpus, -ENOENT if ++ * @fn(@arg) was not executed at all because all cpus in @cpumask were ++ * offline; otherwise, 0 if all executions of @fn returned 0, any non ++ * zero return value if any returned non zero. ++ */ ++int try_stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg) ++{ ++ int ret; ++ ++ /* static works are used, process one request at a time */ ++ if (!mutex_trylock(&stop_cpus_mutex)) ++ return -EAGAIN; ++ ret = __stop_cpus(cpumask, fn, arg); ++ mutex_unlock(&stop_cpus_mutex); ++ return ret; ++} ++ ++static int cpu_stop_should_run(unsigned int cpu) ++{ ++ struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); ++ unsigned long flags; ++ int run; ++ ++ raw_spin_lock_irqsave(&stopper->lock, flags); ++ run = !list_empty(&stopper->works); ++ raw_spin_unlock_irqrestore(&stopper->lock, flags); ++ return run; ++} ++ ++static void cpu_stopper_thread(unsigned int cpu) ++{ ++ struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); ++ struct cpu_stop_work *work; ++ ++repeat: ++ work = NULL; ++ raw_spin_lock_irq(&stopper->lock); ++ if (!list_empty(&stopper->works)) { ++ work = list_first_entry(&stopper->works, ++ struct cpu_stop_work, list); ++ list_del_init(&work->list); ++ } ++ raw_spin_unlock_irq(&stopper->lock); ++ ++ if (work) { ++ cpu_stop_fn_t fn = work->fn; ++ void *arg = work->arg; ++ struct cpu_stop_done *done = work->done; ++ int ret; ++ ++ /* cpu stop callbacks must not sleep, make in_atomic() == T */ ++ preempt_count_inc(); ++ ret = fn(arg); ++ if (done) { ++ if (ret) ++ done->ret = ret; ++ cpu_stop_signal_done(done); ++ } ++ preempt_count_dec(); ++ WARN_ONCE(preempt_count(), ++ "cpu_stop: %pf(%p) leaked preempt count\n", fn, arg); ++ goto repeat; ++ } ++} ++ ++void stop_machine_park(int cpu) ++{ ++ struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); ++ /* ++ * Lockless. cpu_stopper_thread() will take stopper->lock and flush ++ * the pending works before it parks, until then it is fine to queue ++ * the new works. ++ */ ++ stopper->enabled = false; ++ kthread_park(stopper->thread); ++} ++ ++extern void sched_set_stop_task(int cpu, struct task_struct *stop); ++ ++static void cpu_stop_create(unsigned int cpu) ++{ ++ sched_set_stop_task(cpu, per_cpu(cpu_stopper.thread, cpu)); ++} ++ ++static void cpu_stop_park(unsigned int cpu) ++{ ++ struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); ++ ++ WARN_ON(!list_empty(&stopper->works)); ++} ++ ++void stop_machine_unpark(int cpu) ++{ ++ struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); ++ ++ stopper->enabled = true; ++ kthread_unpark(stopper->thread); ++} ++ ++static struct smp_hotplug_thread cpu_stop_threads = { ++ .store = &cpu_stopper.thread, ++ .thread_should_run = cpu_stop_should_run, ++ .thread_fn = cpu_stopper_thread, ++ .thread_comm = "migration/%u", ++ .create = cpu_stop_create, ++ .park = cpu_stop_park, ++ .selfparking = true, ++}; ++ ++static int __init cpu_stop_init(void) ++{ ++ unsigned int cpu; ++ ++ for_each_possible_cpu(cpu) { ++ struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); ++ ++ raw_spin_lock_init(&stopper->lock); ++ INIT_LIST_HEAD(&stopper->works); ++ } ++ ++ BUG_ON(smpboot_register_percpu_thread(&cpu_stop_threads)); ++ stop_machine_unpark(raw_smp_processor_id()); ++ stop_machine_initialized = true; ++ return 0; ++} ++early_initcall(cpu_stop_init); ++ ++int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data, ++ const struct cpumask *cpus) ++{ ++ struct multi_stop_data msdata = { ++ .fn = fn, ++ .data = data, ++ .num_threads = num_online_cpus(), ++ .active_cpus = cpus, ++ }; ++ ++ lockdep_assert_cpus_held(); ++ ++ if (!stop_machine_initialized) { ++ /* ++ * Handle the case where stop_machine() is called ++ * early in boot before stop_machine() has been ++ * initialized. ++ */ ++ unsigned long flags; ++ int ret; ++ ++ WARN_ON_ONCE(msdata.num_threads != 1); ++ ++ local_irq_save(flags); ++ hard_irq_disable(); ++ ret = (*fn)(data); ++ local_irq_restore(flags); ++ ++ return ret; ++ } ++ ++ /* Set the initial state and stop all online cpus. */ ++ set_state(&msdata, MULTI_STOP_PREPARE); ++ return stop_cpus(cpu_online_mask, multi_cpu_stop, &msdata); ++} ++ ++int stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus) ++{ ++ int ret; ++ ++ /* No CPUs can come up or down during this. */ ++ cpus_read_lock(); ++ ret = stop_machine_cpuslocked(fn, data, cpus); ++ cpus_read_unlock(); ++ return ret; ++} ++EXPORT_SYMBOL_GPL(stop_machine); ++ ++/** ++ * stop_machine_from_inactive_cpu - stop_machine() from inactive CPU ++ * @fn: the function to run ++ * @data: the data ptr for the @fn() ++ * @cpus: the cpus to run the @fn() on (NULL = any online cpu) ++ * ++ * This is identical to stop_machine() but can be called from a CPU which ++ * is not active. The local CPU is in the process of hotplug (so no other ++ * CPU hotplug can start) and not marked active and doesn't have enough ++ * context to sleep. ++ * ++ * This function provides stop_machine() functionality for such state by ++ * using busy-wait for synchronization and executing @fn directly for local ++ * CPU. ++ * ++ * CONTEXT: ++ * Local CPU is inactive. Temporarily stops all active CPUs. ++ * ++ * RETURNS: ++ * 0 if all executions of @fn returned 0, any non zero return value if any ++ * returned non zero. ++ */ ++int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data, ++ const struct cpumask *cpus) ++{ ++ struct multi_stop_data msdata = { .fn = fn, .data = data, ++ .active_cpus = cpus }; ++ struct cpu_stop_done done; ++ int ret; ++ ++ /* Local CPU must be inactive and CPU hotplug in progress. */ ++ BUG_ON(cpu_active(raw_smp_processor_id())); ++ msdata.num_threads = num_active_cpus() + 1; /* +1 for local */ ++ ++ /* No proper task established and can't sleep - busy wait for lock. */ ++ while (!mutex_trylock(&stop_cpus_mutex)) ++ cpu_relax(); ++ ++ /* Schedule work on other CPUs and execute directly for local CPU */ ++ set_state(&msdata, MULTI_STOP_PREPARE); ++ cpu_stop_init_done(&done, num_active_cpus()); ++ queue_stop_cpus_work(cpu_active_mask, multi_cpu_stop, &msdata, ++ &done); ++ ret = multi_cpu_stop(&msdata); ++ ++ /* Busy wait for completion. */ ++ while (!completion_done(&done.completion)) ++ cpu_relax(); ++ ++ mutex_unlock(&stop_cpus_mutex); ++ return ret ?: done.ret; ++} +diff -uprN kernel/kernel/stop_machine.c.rej kernel_new/kernel/stop_machine.c.rej +--- kernel/kernel/stop_machine.c.rej 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/kernel/stop_machine.c.rej 2021-04-01 18:28:07.813863111 +0800 +@@ -0,0 +1,10 @@ ++--- kernel/stop_machine.c 2019-12-18 03:36:04.000000000 +0800 +++++ kernel/stop_machine.c 2021-03-22 09:21:43.223415449 +0800 ++@@ -227,6 +227,7 @@ static int multi_cpu_stop(void *data) ++ } ++ } while (curstate != MULTI_STOP_EXIT); ++ +++ hard_irq_enable(); ++ local_irq_restore(flags); ++ return err; ++ } +diff -uprN kernel/kernel/time/clockevents.c kernel_new/kernel/time/clockevents.c +--- kernel/kernel/time/clockevents.c 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/kernel/time/clockevents.c 2021-04-01 18:28:07.813863111 +0800 +@@ -17,6 +17,7 @@ + #include + #include + #include ++#include + + #include "tick-internal.h" + +@@ -458,6 +459,8 @@ void clockevents_register_device(struct + /* Initialize state to DETACHED */ + clockevent_set_state(dev, CLOCK_EVT_STATE_DETACHED); + ++ ipipe_host_timer_register(dev); ++ + if (!dev->cpumask) { + WARN_ON(num_possible_cpus() > 1); + dev->cpumask = cpumask_of(smp_processor_id()); +@@ -652,8 +655,10 @@ void tick_cleanup_dead_cpu(int cpu) + * Unregister the clock event devices which were + * released from the users in the notify chain. + */ +- list_for_each_entry_safe(dev, tmp, &clockevents_released, list) ++ list_for_each_entry_safe(dev, tmp, &clockevents_released, list) { + list_del(&dev->list); ++ ipipe_host_timer_cleanup(dev); ++ } + /* + * Now check whether the CPU has left unused per cpu devices + */ +@@ -663,6 +668,7 @@ void tick_cleanup_dead_cpu(int cpu) + !tick_is_broadcast_device(dev)) { + BUG_ON(!clockevent_state_detached(dev)); + list_del(&dev->list); ++ ipipe_host_timer_cleanup(dev); + } + } + raw_spin_unlock_irqrestore(&clockevents_lock, flags); +diff -uprN kernel/kernel/time/clockevents.c.orig kernel_new/kernel/time/clockevents.c.orig +--- kernel/kernel/time/clockevents.c.orig 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/kernel/time/clockevents.c.orig 2020-12-21 21:59:22.000000000 +0800 +@@ -0,0 +1,788 @@ ++/* ++ * linux/kernel/time/clockevents.c ++ * ++ * This file contains functions which manage clock event devices. ++ * ++ * Copyright(C) 2005-2006, Thomas Gleixner ++ * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar ++ * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner ++ * ++ * This code is licenced under the GPL version 2. For details see ++ * kernel-base/COPYING. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "tick-internal.h" ++ ++/* The registered clock event devices */ ++static LIST_HEAD(clockevent_devices); ++static LIST_HEAD(clockevents_released); ++/* Protection for the above */ ++static DEFINE_RAW_SPINLOCK(clockevents_lock); ++/* Protection for unbind operations */ ++static DEFINE_MUTEX(clockevents_mutex); ++ ++struct ce_unbind { ++ struct clock_event_device *ce; ++ int res; ++}; ++ ++static u64 cev_delta2ns(unsigned long latch, struct clock_event_device *evt, ++ bool ismax) ++{ ++ u64 clc = (u64) latch << evt->shift; ++ u64 rnd; ++ ++ if (unlikely(!evt->mult)) { ++ evt->mult = 1; ++ WARN_ON(1); ++ } ++ rnd = (u64) evt->mult - 1; ++ ++ /* ++ * Upper bound sanity check. If the backwards conversion is ++ * not equal latch, we know that the above shift overflowed. ++ */ ++ if ((clc >> evt->shift) != (u64)latch) ++ clc = ~0ULL; ++ ++ /* ++ * Scaled math oddities: ++ * ++ * For mult <= (1 << shift) we can safely add mult - 1 to ++ * prevent integer rounding loss. So the backwards conversion ++ * from nsec to device ticks will be correct. ++ * ++ * For mult > (1 << shift), i.e. device frequency is > 1GHz we ++ * need to be careful. Adding mult - 1 will result in a value ++ * which when converted back to device ticks can be larger ++ * than latch by up to (mult - 1) >> shift. For the min_delta ++ * calculation we still want to apply this in order to stay ++ * above the minimum device ticks limit. For the upper limit ++ * we would end up with a latch value larger than the upper ++ * limit of the device, so we omit the add to stay below the ++ * device upper boundary. ++ * ++ * Also omit the add if it would overflow the u64 boundary. ++ */ ++ if ((~0ULL - clc > rnd) && ++ (!ismax || evt->mult <= (1ULL << evt->shift))) ++ clc += rnd; ++ ++ do_div(clc, evt->mult); ++ ++ /* Deltas less than 1usec are pointless noise */ ++ return clc > 1000 ? clc : 1000; ++} ++ ++/** ++ * clockevents_delta2ns - Convert a latch value (device ticks) to nanoseconds ++ * @latch: value to convert ++ * @evt: pointer to clock event device descriptor ++ * ++ * Math helper, returns latch value converted to nanoseconds (bound checked) ++ */ ++u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *evt) ++{ ++ return cev_delta2ns(latch, evt, false); ++} ++EXPORT_SYMBOL_GPL(clockevent_delta2ns); ++ ++static int __clockevents_switch_state(struct clock_event_device *dev, ++ enum clock_event_state state) ++{ ++ if (dev->features & CLOCK_EVT_FEAT_DUMMY) ++ return 0; ++ ++ /* Transition with new state-specific callbacks */ ++ switch (state) { ++ case CLOCK_EVT_STATE_DETACHED: ++ /* The clockevent device is getting replaced. Shut it down. */ ++ ++ case CLOCK_EVT_STATE_SHUTDOWN: ++ if (dev->set_state_shutdown) ++ return dev->set_state_shutdown(dev); ++ return 0; ++ ++ case CLOCK_EVT_STATE_PERIODIC: ++ /* Core internal bug */ ++ if (!(dev->features & CLOCK_EVT_FEAT_PERIODIC)) ++ return -ENOSYS; ++ if (dev->set_state_periodic) ++ return dev->set_state_periodic(dev); ++ return 0; ++ ++ case CLOCK_EVT_STATE_ONESHOT: ++ /* Core internal bug */ ++ if (!(dev->features & CLOCK_EVT_FEAT_ONESHOT)) ++ return -ENOSYS; ++ if (dev->set_state_oneshot) ++ return dev->set_state_oneshot(dev); ++ return 0; ++ ++ case CLOCK_EVT_STATE_ONESHOT_STOPPED: ++ /* Core internal bug */ ++ if (WARN_ONCE(!clockevent_state_oneshot(dev), ++ "Current state: %d\n", ++ clockevent_get_state(dev))) ++ return -EINVAL; ++ ++ if (dev->set_state_oneshot_stopped) ++ return dev->set_state_oneshot_stopped(dev); ++ else ++ return -ENOSYS; ++ ++ default: ++ return -ENOSYS; ++ } ++} ++ ++/** ++ * clockevents_switch_state - set the operating state of a clock event device ++ * @dev: device to modify ++ * @state: new state ++ * ++ * Must be called with interrupts disabled ! ++ */ ++void clockevents_switch_state(struct clock_event_device *dev, ++ enum clock_event_state state) ++{ ++ if (clockevent_get_state(dev) != state) { ++ if (__clockevents_switch_state(dev, state)) ++ return; ++ ++ clockevent_set_state(dev, state); ++ ++ /* ++ * A nsec2cyc multiplicator of 0 is invalid and we'd crash ++ * on it, so fix it up and emit a warning: ++ */ ++ if (clockevent_state_oneshot(dev)) { ++ if (unlikely(!dev->mult)) { ++ dev->mult = 1; ++ WARN_ON(1); ++ } ++ } ++ } ++} ++ ++/** ++ * clockevents_shutdown - shutdown the device and clear next_event ++ * @dev: device to shutdown ++ */ ++void clockevents_shutdown(struct clock_event_device *dev) ++{ ++ clockevents_switch_state(dev, CLOCK_EVT_STATE_SHUTDOWN); ++ dev->next_event = KTIME_MAX; ++} ++ ++/** ++ * clockevents_tick_resume - Resume the tick device before using it again ++ * @dev: device to resume ++ */ ++int clockevents_tick_resume(struct clock_event_device *dev) ++{ ++ int ret = 0; ++ ++ if (dev->tick_resume) ++ ret = dev->tick_resume(dev); ++ ++ return ret; ++} ++ ++#ifdef CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST ++ ++/* Limit min_delta to a jiffie */ ++#define MIN_DELTA_LIMIT (NSEC_PER_SEC / HZ) ++ ++/** ++ * clockevents_increase_min_delta - raise minimum delta of a clock event device ++ * @dev: device to increase the minimum delta ++ * ++ * Returns 0 on success, -ETIME when the minimum delta reached the limit. ++ */ ++static int clockevents_increase_min_delta(struct clock_event_device *dev) ++{ ++ /* Nothing to do if we already reached the limit */ ++ if (dev->min_delta_ns >= MIN_DELTA_LIMIT) { ++ printk_deferred(KERN_WARNING ++ "CE: Reprogramming failure. Giving up\n"); ++ dev->next_event = KTIME_MAX; ++ return -ETIME; ++ } ++ ++ if (dev->min_delta_ns < 5000) ++ dev->min_delta_ns = 5000; ++ else ++ dev->min_delta_ns += dev->min_delta_ns >> 1; ++ ++ if (dev->min_delta_ns > MIN_DELTA_LIMIT) ++ dev->min_delta_ns = MIN_DELTA_LIMIT; ++ ++ printk_deferred(KERN_WARNING ++ "CE: %s increased min_delta_ns to %llu nsec\n", ++ dev->name ? dev->name : "?", ++ (unsigned long long) dev->min_delta_ns); ++ return 0; ++} ++ ++/** ++ * clockevents_program_min_delta - Set clock event device to the minimum delay. ++ * @dev: device to program ++ * ++ * Returns 0 on success, -ETIME when the retry loop failed. ++ */ ++static int clockevents_program_min_delta(struct clock_event_device *dev) ++{ ++ unsigned long long clc; ++ int64_t delta; ++ int i; ++ ++ for (i = 0;;) { ++ delta = dev->min_delta_ns; ++ dev->next_event = ktime_add_ns(ktime_get(), delta); ++ ++ if (clockevent_state_shutdown(dev)) ++ return 0; ++ ++ dev->retries++; ++ clc = ((unsigned long long) delta * dev->mult) >> dev->shift; ++ if (dev->set_next_event((unsigned long) clc, dev) == 0) ++ return 0; ++ ++ if (++i > 2) { ++ /* ++ * We tried 3 times to program the device with the ++ * given min_delta_ns. Try to increase the minimum ++ * delta, if that fails as well get out of here. ++ */ ++ if (clockevents_increase_min_delta(dev)) ++ return -ETIME; ++ i = 0; ++ } ++ } ++} ++ ++#else /* CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST */ ++ ++/** ++ * clockevents_program_min_delta - Set clock event device to the minimum delay. ++ * @dev: device to program ++ * ++ * Returns 0 on success, -ETIME when the retry loop failed. ++ */ ++static int clockevents_program_min_delta(struct clock_event_device *dev) ++{ ++ unsigned long long clc; ++ int64_t delta = 0; ++ int i; ++ ++ for (i = 0; i < 10; i++) { ++ delta += dev->min_delta_ns; ++ dev->next_event = ktime_add_ns(ktime_get(), delta); ++ ++ if (clockevent_state_shutdown(dev)) ++ return 0; ++ ++ dev->retries++; ++ clc = ((unsigned long long) delta * dev->mult) >> dev->shift; ++ if (dev->set_next_event((unsigned long) clc, dev) == 0) ++ return 0; ++ } ++ return -ETIME; ++} ++ ++#endif /* CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST */ ++ ++/** ++ * clockevents_program_event - Reprogram the clock event device. ++ * @dev: device to program ++ * @expires: absolute expiry time (monotonic clock) ++ * @force: program minimum delay if expires can not be set ++ * ++ * Returns 0 on success, -ETIME when the event is in the past. ++ */ ++int clockevents_program_event(struct clock_event_device *dev, ktime_t expires, ++ bool force) ++{ ++ unsigned long long clc; ++ int64_t delta; ++ int rc; ++ ++ if (unlikely(expires < 0)) { ++ WARN_ON_ONCE(1); ++ return -ETIME; ++ } ++ ++ dev->next_event = expires; ++ ++ if (clockevent_state_shutdown(dev)) ++ return 0; ++ ++ /* We must be in ONESHOT state here */ ++ WARN_ONCE(!clockevent_state_oneshot(dev), "Current state: %d\n", ++ clockevent_get_state(dev)); ++ ++ /* Shortcut for clockevent devices that can deal with ktime. */ ++ if (dev->features & CLOCK_EVT_FEAT_KTIME) ++ return dev->set_next_ktime(expires, dev); ++ ++ delta = ktime_to_ns(ktime_sub(expires, ktime_get())); ++ if (delta <= 0) ++ return force ? clockevents_program_min_delta(dev) : -ETIME; ++ ++ delta = min(delta, (int64_t) dev->max_delta_ns); ++ delta = max(delta, (int64_t) dev->min_delta_ns); ++ ++ clc = ((unsigned long long) delta * dev->mult) >> dev->shift; ++ rc = dev->set_next_event((unsigned long) clc, dev); ++ ++ return (rc && force) ? clockevents_program_min_delta(dev) : rc; ++} ++ ++/* ++ * Called after a notify add to make devices available which were ++ * released from the notifier call. ++ */ ++static void clockevents_notify_released(void) ++{ ++ struct clock_event_device *dev; ++ ++ while (!list_empty(&clockevents_released)) { ++ dev = list_entry(clockevents_released.next, ++ struct clock_event_device, list); ++ list_del(&dev->list); ++ list_add(&dev->list, &clockevent_devices); ++ tick_check_new_device(dev); ++ } ++} ++ ++/* ++ * Try to install a replacement clock event device ++ */ ++static int clockevents_replace(struct clock_event_device *ced) ++{ ++ struct clock_event_device *dev, *newdev = NULL; ++ ++ list_for_each_entry(dev, &clockevent_devices, list) { ++ if (dev == ced || !clockevent_state_detached(dev)) ++ continue; ++ ++ if (!tick_check_replacement(newdev, dev)) ++ continue; ++ ++ if (!try_module_get(dev->owner)) ++ continue; ++ ++ if (newdev) ++ module_put(newdev->owner); ++ newdev = dev; ++ } ++ if (newdev) { ++ tick_install_replacement(newdev); ++ list_del_init(&ced->list); ++ } ++ return newdev ? 0 : -EBUSY; ++} ++ ++/* ++ * Called with clockevents_mutex and clockevents_lock held ++ */ ++static int __clockevents_try_unbind(struct clock_event_device *ced, int cpu) ++{ ++ /* Fast track. Device is unused */ ++ if (clockevent_state_detached(ced)) { ++ list_del_init(&ced->list); ++ return 0; ++ } ++ ++ return ced == per_cpu(tick_cpu_device, cpu).evtdev ? -EAGAIN : -EBUSY; ++} ++ ++/* ++ * SMP function call to unbind a device ++ */ ++static void __clockevents_unbind(void *arg) ++{ ++ struct ce_unbind *cu = arg; ++ int res; ++ ++ raw_spin_lock(&clockevents_lock); ++ res = __clockevents_try_unbind(cu->ce, smp_processor_id()); ++ if (res == -EAGAIN) ++ res = clockevents_replace(cu->ce); ++ cu->res = res; ++ raw_spin_unlock(&clockevents_lock); ++} ++ ++/* ++ * Issues smp function call to unbind a per cpu device. Called with ++ * clockevents_mutex held. ++ */ ++static int clockevents_unbind(struct clock_event_device *ced, int cpu) ++{ ++ struct ce_unbind cu = { .ce = ced, .res = -ENODEV }; ++ ++ smp_call_function_single(cpu, __clockevents_unbind, &cu, 1); ++ return cu.res; ++} ++ ++/* ++ * Unbind a clockevents device. ++ */ ++int clockevents_unbind_device(struct clock_event_device *ced, int cpu) ++{ ++ int ret; ++ ++ mutex_lock(&clockevents_mutex); ++ ret = clockevents_unbind(ced, cpu); ++ mutex_unlock(&clockevents_mutex); ++ return ret; ++} ++EXPORT_SYMBOL_GPL(clockevents_unbind_device); ++ ++/** ++ * clockevents_register_device - register a clock event device ++ * @dev: device to register ++ */ ++void clockevents_register_device(struct clock_event_device *dev) ++{ ++ unsigned long flags; ++ ++ /* Initialize state to DETACHED */ ++ clockevent_set_state(dev, CLOCK_EVT_STATE_DETACHED); ++ ++ if (!dev->cpumask) { ++ WARN_ON(num_possible_cpus() > 1); ++ dev->cpumask = cpumask_of(smp_processor_id()); ++ } ++ ++ if (dev->cpumask == cpu_all_mask) { ++ WARN(1, "%s cpumask == cpu_all_mask, using cpu_possible_mask instead\n", ++ dev->name); ++ dev->cpumask = cpu_possible_mask; ++ } ++ ++ raw_spin_lock_irqsave(&clockevents_lock, flags); ++ ++ list_add(&dev->list, &clockevent_devices); ++ tick_check_new_device(dev); ++ clockevents_notify_released(); ++ ++ raw_spin_unlock_irqrestore(&clockevents_lock, flags); ++} ++EXPORT_SYMBOL_GPL(clockevents_register_device); ++ ++static void clockevents_config(struct clock_event_device *dev, u32 freq) ++{ ++ u64 sec; ++ ++ if (!(dev->features & CLOCK_EVT_FEAT_ONESHOT)) ++ return; ++ ++ /* ++ * Calculate the maximum number of seconds we can sleep. Limit ++ * to 10 minutes for hardware which can program more than ++ * 32bit ticks so we still get reasonable conversion values. ++ */ ++ sec = dev->max_delta_ticks; ++ do_div(sec, freq); ++ if (!sec) ++ sec = 1; ++ else if (sec > 600 && dev->max_delta_ticks > UINT_MAX) ++ sec = 600; ++ ++ clockevents_calc_mult_shift(dev, freq, sec); ++ dev->min_delta_ns = cev_delta2ns(dev->min_delta_ticks, dev, false); ++ dev->max_delta_ns = cev_delta2ns(dev->max_delta_ticks, dev, true); ++} ++ ++/** ++ * clockevents_config_and_register - Configure and register a clock event device ++ * @dev: device to register ++ * @freq: The clock frequency ++ * @min_delta: The minimum clock ticks to program in oneshot mode ++ * @max_delta: The maximum clock ticks to program in oneshot mode ++ * ++ * min/max_delta can be 0 for devices which do not support oneshot mode. ++ */ ++void clockevents_config_and_register(struct clock_event_device *dev, ++ u32 freq, unsigned long min_delta, ++ unsigned long max_delta) ++{ ++ dev->min_delta_ticks = min_delta; ++ dev->max_delta_ticks = max_delta; ++ clockevents_config(dev, freq); ++ clockevents_register_device(dev); ++} ++EXPORT_SYMBOL_GPL(clockevents_config_and_register); ++ ++int __clockevents_update_freq(struct clock_event_device *dev, u32 freq) ++{ ++ clockevents_config(dev, freq); ++ ++ if (clockevent_state_oneshot(dev)) ++ return clockevents_program_event(dev, dev->next_event, false); ++ ++ if (clockevent_state_periodic(dev)) ++ return __clockevents_switch_state(dev, CLOCK_EVT_STATE_PERIODIC); ++ ++ return 0; ++} ++ ++/** ++ * clockevents_update_freq - Update frequency and reprogram a clock event device. ++ * @dev: device to modify ++ * @freq: new device frequency ++ * ++ * Reconfigure and reprogram a clock event device in oneshot ++ * mode. Must be called on the cpu for which the device delivers per ++ * cpu timer events. If called for the broadcast device the core takes ++ * care of serialization. ++ * ++ * Returns 0 on success, -ETIME when the event is in the past. ++ */ ++int clockevents_update_freq(struct clock_event_device *dev, u32 freq) ++{ ++ unsigned long flags; ++ int ret; ++ ++ local_irq_save(flags); ++ ret = tick_broadcast_update_freq(dev, freq); ++ if (ret == -ENODEV) ++ ret = __clockevents_update_freq(dev, freq); ++ local_irq_restore(flags); ++ return ret; ++} ++ ++/* ++ * Noop handler when we shut down an event device ++ */ ++void clockevents_handle_noop(struct clock_event_device *dev) ++{ ++} ++ ++/** ++ * clockevents_exchange_device - release and request clock devices ++ * @old: device to release (can be NULL) ++ * @new: device to request (can be NULL) ++ * ++ * Called from various tick functions with clockevents_lock held and ++ * interrupts disabled. ++ */ ++void clockevents_exchange_device(struct clock_event_device *old, ++ struct clock_event_device *new) ++{ ++ /* ++ * Caller releases a clock event device. We queue it into the ++ * released list and do a notify add later. ++ */ ++ if (old) { ++ module_put(old->owner); ++ clockevents_switch_state(old, CLOCK_EVT_STATE_DETACHED); ++ list_del(&old->list); ++ list_add(&old->list, &clockevents_released); ++ } ++ ++ if (new) { ++ BUG_ON(!clockevent_state_detached(new)); ++ clockevents_shutdown(new); ++ } ++} ++ ++/** ++ * clockevents_suspend - suspend clock devices ++ */ ++void clockevents_suspend(void) ++{ ++ struct clock_event_device *dev; ++ ++ list_for_each_entry_reverse(dev, &clockevent_devices, list) ++ if (dev->suspend && !clockevent_state_detached(dev)) ++ dev->suspend(dev); ++} ++ ++/** ++ * clockevents_resume - resume clock devices ++ */ ++void clockevents_resume(void) ++{ ++ struct clock_event_device *dev; ++ ++ list_for_each_entry(dev, &clockevent_devices, list) ++ if (dev->resume && !clockevent_state_detached(dev)) ++ dev->resume(dev); ++} ++ ++#ifdef CONFIG_HOTPLUG_CPU ++ ++# ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST ++/** ++ * tick_offline_cpu - Take CPU out of the broadcast mechanism ++ * @cpu: The outgoing CPU ++ * ++ * Called on the outgoing CPU after it took itself offline. ++ */ ++void tick_offline_cpu(unsigned int cpu) ++{ ++ raw_spin_lock(&clockevents_lock); ++ tick_broadcast_offline(cpu); ++ raw_spin_unlock(&clockevents_lock); ++} ++# endif ++ ++/** ++ * tick_cleanup_dead_cpu - Cleanup the tick and clockevents of a dead cpu ++ */ ++void tick_cleanup_dead_cpu(int cpu) ++{ ++ struct clock_event_device *dev, *tmp; ++ unsigned long flags; ++ ++ raw_spin_lock_irqsave(&clockevents_lock, flags); ++ ++ tick_shutdown(cpu); ++ /* ++ * Unregister the clock event devices which were ++ * released from the users in the notify chain. ++ */ ++ list_for_each_entry_safe(dev, tmp, &clockevents_released, list) ++ list_del(&dev->list); ++ /* ++ * Now check whether the CPU has left unused per cpu devices ++ */ ++ list_for_each_entry_safe(dev, tmp, &clockevent_devices, list) { ++ if (cpumask_test_cpu(cpu, dev->cpumask) && ++ cpumask_weight(dev->cpumask) == 1 && ++ !tick_is_broadcast_device(dev)) { ++ BUG_ON(!clockevent_state_detached(dev)); ++ list_del(&dev->list); ++ } ++ } ++ raw_spin_unlock_irqrestore(&clockevents_lock, flags); ++} ++#endif ++ ++#ifdef CONFIG_SYSFS ++static struct bus_type clockevents_subsys = { ++ .name = "clockevents", ++ .dev_name = "clockevent", ++}; ++ ++static DEFINE_PER_CPU(struct device, tick_percpu_dev); ++static struct tick_device *tick_get_tick_dev(struct device *dev); ++ ++static ssize_t sysfs_show_current_tick_dev(struct device *dev, ++ struct device_attribute *attr, ++ char *buf) ++{ ++ struct tick_device *td; ++ ssize_t count = 0; ++ ++ raw_spin_lock_irq(&clockevents_lock); ++ td = tick_get_tick_dev(dev); ++ if (td && td->evtdev) ++ count = snprintf(buf, PAGE_SIZE, "%s\n", td->evtdev->name); ++ raw_spin_unlock_irq(&clockevents_lock); ++ return count; ++} ++static DEVICE_ATTR(current_device, 0444, sysfs_show_current_tick_dev, NULL); ++ ++/* We don't support the abomination of removable broadcast devices */ ++static ssize_t sysfs_unbind_tick_dev(struct device *dev, ++ struct device_attribute *attr, ++ const char *buf, size_t count) ++{ ++ char name[CS_NAME_LEN]; ++ ssize_t ret = sysfs_get_uname(buf, name, count); ++ struct clock_event_device *ce; ++ ++ if (ret < 0) ++ return ret; ++ ++ ret = -ENODEV; ++ mutex_lock(&clockevents_mutex); ++ raw_spin_lock_irq(&clockevents_lock); ++ list_for_each_entry(ce, &clockevent_devices, list) { ++ if (!strcmp(ce->name, name)) { ++ ret = __clockevents_try_unbind(ce, dev->id); ++ break; ++ } ++ } ++ raw_spin_unlock_irq(&clockevents_lock); ++ /* ++ * We hold clockevents_mutex, so ce can't go away ++ */ ++ if (ret == -EAGAIN) ++ ret = clockevents_unbind(ce, dev->id); ++ mutex_unlock(&clockevents_mutex); ++ return ret ? ret : count; ++} ++static DEVICE_ATTR(unbind_device, 0200, NULL, sysfs_unbind_tick_dev); ++ ++#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST ++static struct device tick_bc_dev = { ++ .init_name = "broadcast", ++ .id = 0, ++ .bus = &clockevents_subsys, ++}; ++ ++static struct tick_device *tick_get_tick_dev(struct device *dev) ++{ ++ return dev == &tick_bc_dev ? tick_get_broadcast_device() : ++ &per_cpu(tick_cpu_device, dev->id); ++} ++ ++static __init int tick_broadcast_init_sysfs(void) ++{ ++ int err = device_register(&tick_bc_dev); ++ ++ if (!err) ++ err = device_create_file(&tick_bc_dev, &dev_attr_current_device); ++ return err; ++} ++#else ++static struct tick_device *tick_get_tick_dev(struct device *dev) ++{ ++ return &per_cpu(tick_cpu_device, dev->id); ++} ++static inline int tick_broadcast_init_sysfs(void) { return 0; } ++#endif ++ ++static int __init tick_init_sysfs(void) ++{ ++ int cpu; ++ ++ for_each_possible_cpu(cpu) { ++ struct device *dev = &per_cpu(tick_percpu_dev, cpu); ++ int err; ++ ++ dev->id = cpu; ++ dev->bus = &clockevents_subsys; ++ err = device_register(dev); ++ if (!err) ++ err = device_create_file(dev, &dev_attr_current_device); ++ if (!err) ++ err = device_create_file(dev, &dev_attr_unbind_device); ++ if (err) ++ return err; ++ } ++ return tick_broadcast_init_sysfs(); ++} ++ ++static int __init clockevents_init_sysfs(void) ++{ ++ int err = subsys_system_register(&clockevents_subsys, NULL); ++ ++ if (!err) ++ err = tick_init_sysfs(); ++ return err; ++} ++device_initcall(clockevents_init_sysfs); ++#endif /* SYSFS */ +diff -uprN kernel/kernel/time/timer.c kernel_new/kernel/time/timer.c +--- kernel/kernel/time/timer.c 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/kernel/time/timer.c 2021-04-01 18:28:07.813863111 +0800 +@@ -22,6 +22,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -1636,6 +1637,15 @@ static inline int collect_expired_timers + } + #endif + ++static inline void do_account_tick(struct task_struct *p, int user_tick) ++{ ++#ifdef CONFIG_IPIPE ++ if (!__ipipe_root_tick_p(raw_cpu_ptr(&ipipe_percpu.tick_regs))) ++ return; ++#endif ++ account_process_tick(p, user_tick); ++} ++ + /* + * Called from the timer interrupt handler to charge one tick to the current + * process. user_tick is 1 if the tick is user time, 0 for system. +@@ -1645,7 +1655,7 @@ void update_process_times(int user_tick) + struct task_struct *p = current; + + /* Note: this timer irq context must be accounted for as well. */ +- account_process_tick(p, user_tick); ++ do_account_tick(p, user_tick); + run_local_timers(); + rcu_check_callbacks(user_tick); + #ifdef CONFIG_IRQ_WORK +diff -uprN kernel/kernel/time/timer.c.orig kernel_new/kernel/time/timer.c.orig +--- kernel/kernel/time/timer.c.orig 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/kernel/time/timer.c.orig 2020-12-21 21:59:22.000000000 +0800 +@@ -0,0 +1,2020 @@ ++/* ++ * linux/kernel/timer.c ++ * ++ * Kernel internal timers ++ * ++ * Copyright (C) 1991, 1992 Linus Torvalds ++ * ++ * 1997-01-28 Modified by Finn Arne Gangstad to make timers scale better. ++ * ++ * 1997-09-10 Updated NTP code according to technical memorandum Jan '96 ++ * "A Kernel Model for Precision Timekeeping" by Dave Mills ++ * 1998-12-24 Fixed a xtime SMP race (we need the xtime_lock rw spinlock to ++ * serialize accesses to xtime/lost_ticks). ++ * Copyright (C) 1998 Andrea Arcangeli ++ * 1999-03-10 Improved NTP compatibility by Ulrich Windl ++ * 2002-05-31 Move sys_sysinfo here and make its locking sane, Robert Love ++ * 2000-10-05 Implemented scalable SMP per-CPU timer handling. ++ * Copyright (C) 2000, 2001, 2002 Ingo Molnar ++ * Designed by David S. Miller, Alexey Kuznetsov and Ingo Molnar ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++ ++#include "tick-internal.h" ++ ++#define CREATE_TRACE_POINTS ++#include ++ ++__visible u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES; ++ ++EXPORT_SYMBOL(jiffies_64); ++ ++/* ++ * The timer wheel has LVL_DEPTH array levels. Each level provides an array of ++ * LVL_SIZE buckets. Each level is driven by its own clock and therefor each ++ * level has a different granularity. ++ * ++ * The level granularity is: LVL_CLK_DIV ^ lvl ++ * The level clock frequency is: HZ / (LVL_CLK_DIV ^ level) ++ * ++ * The array level of a newly armed timer depends on the relative expiry ++ * time. The farther the expiry time is away the higher the array level and ++ * therefor the granularity becomes. ++ * ++ * Contrary to the original timer wheel implementation, which aims for 'exact' ++ * expiry of the timers, this implementation removes the need for recascading ++ * the timers into the lower array levels. The previous 'classic' timer wheel ++ * implementation of the kernel already violated the 'exact' expiry by adding ++ * slack to the expiry time to provide batched expiration. The granularity ++ * levels provide implicit batching. ++ * ++ * This is an optimization of the original timer wheel implementation for the ++ * majority of the timer wheel use cases: timeouts. The vast majority of ++ * timeout timers (networking, disk I/O ...) are canceled before expiry. If ++ * the timeout expires it indicates that normal operation is disturbed, so it ++ * does not matter much whether the timeout comes with a slight delay. ++ * ++ * The only exception to this are networking timers with a small expiry ++ * time. They rely on the granularity. Those fit into the first wheel level, ++ * which has HZ granularity. ++ * ++ * We don't have cascading anymore. timers with a expiry time above the ++ * capacity of the last wheel level are force expired at the maximum timeout ++ * value of the last wheel level. From data sampling we know that the maximum ++ * value observed is 5 days (network connection tracking), so this should not ++ * be an issue. ++ * ++ * The currently chosen array constants values are a good compromise between ++ * array size and granularity. ++ * ++ * This results in the following granularity and range levels: ++ * ++ * HZ 1000 steps ++ * Level Offset Granularity Range ++ * 0 0 1 ms 0 ms - 63 ms ++ * 1 64 8 ms 64 ms - 511 ms ++ * 2 128 64 ms 512 ms - 4095 ms (512ms - ~4s) ++ * 3 192 512 ms 4096 ms - 32767 ms (~4s - ~32s) ++ * 4 256 4096 ms (~4s) 32768 ms - 262143 ms (~32s - ~4m) ++ * 5 320 32768 ms (~32s) 262144 ms - 2097151 ms (~4m - ~34m) ++ * 6 384 262144 ms (~4m) 2097152 ms - 16777215 ms (~34m - ~4h) ++ * 7 448 2097152 ms (~34m) 16777216 ms - 134217727 ms (~4h - ~1d) ++ * 8 512 16777216 ms (~4h) 134217728 ms - 1073741822 ms (~1d - ~12d) ++ * ++ * HZ 300 ++ * Level Offset Granularity Range ++ * 0 0 3 ms 0 ms - 210 ms ++ * 1 64 26 ms 213 ms - 1703 ms (213ms - ~1s) ++ * 2 128 213 ms 1706 ms - 13650 ms (~1s - ~13s) ++ * 3 192 1706 ms (~1s) 13653 ms - 109223 ms (~13s - ~1m) ++ * 4 256 13653 ms (~13s) 109226 ms - 873810 ms (~1m - ~14m) ++ * 5 320 109226 ms (~1m) 873813 ms - 6990503 ms (~14m - ~1h) ++ * 6 384 873813 ms (~14m) 6990506 ms - 55924050 ms (~1h - ~15h) ++ * 7 448 6990506 ms (~1h) 55924053 ms - 447392423 ms (~15h - ~5d) ++ * 8 512 55924053 ms (~15h) 447392426 ms - 3579139406 ms (~5d - ~41d) ++ * ++ * HZ 250 ++ * Level Offset Granularity Range ++ * 0 0 4 ms 0 ms - 255 ms ++ * 1 64 32 ms 256 ms - 2047 ms (256ms - ~2s) ++ * 2 128 256 ms 2048 ms - 16383 ms (~2s - ~16s) ++ * 3 192 2048 ms (~2s) 16384 ms - 131071 ms (~16s - ~2m) ++ * 4 256 16384 ms (~16s) 131072 ms - 1048575 ms (~2m - ~17m) ++ * 5 320 131072 ms (~2m) 1048576 ms - 8388607 ms (~17m - ~2h) ++ * 6 384 1048576 ms (~17m) 8388608 ms - 67108863 ms (~2h - ~18h) ++ * 7 448 8388608 ms (~2h) 67108864 ms - 536870911 ms (~18h - ~6d) ++ * 8 512 67108864 ms (~18h) 536870912 ms - 4294967288 ms (~6d - ~49d) ++ * ++ * HZ 100 ++ * Level Offset Granularity Range ++ * 0 0 10 ms 0 ms - 630 ms ++ * 1 64 80 ms 640 ms - 5110 ms (640ms - ~5s) ++ * 2 128 640 ms 5120 ms - 40950 ms (~5s - ~40s) ++ * 3 192 5120 ms (~5s) 40960 ms - 327670 ms (~40s - ~5m) ++ * 4 256 40960 ms (~40s) 327680 ms - 2621430 ms (~5m - ~43m) ++ * 5 320 327680 ms (~5m) 2621440 ms - 20971510 ms (~43m - ~5h) ++ * 6 384 2621440 ms (~43m) 20971520 ms - 167772150 ms (~5h - ~1d) ++ * 7 448 20971520 ms (~5h) 167772160 ms - 1342177270 ms (~1d - ~15d) ++ */ ++ ++/* Clock divisor for the next level */ ++#define LVL_CLK_SHIFT 3 ++#define LVL_CLK_DIV (1UL << LVL_CLK_SHIFT) ++#define LVL_CLK_MASK (LVL_CLK_DIV - 1) ++#define LVL_SHIFT(n) ((n) * LVL_CLK_SHIFT) ++#define LVL_GRAN(n) (1UL << LVL_SHIFT(n)) ++ ++/* ++ * The time start value for each level to select the bucket at enqueue ++ * time. ++ */ ++#define LVL_START(n) ((LVL_SIZE - 1) << (((n) - 1) * LVL_CLK_SHIFT)) ++ ++/* Size of each clock level */ ++#define LVL_BITS 6 ++#define LVL_SIZE (1UL << LVL_BITS) ++#define LVL_MASK (LVL_SIZE - 1) ++#define LVL_OFFS(n) ((n) * LVL_SIZE) ++ ++/* Level depth */ ++#if HZ > 100 ++# define LVL_DEPTH 9 ++# else ++# define LVL_DEPTH 8 ++#endif ++ ++/* The cutoff (max. capacity of the wheel) */ ++#define WHEEL_TIMEOUT_CUTOFF (LVL_START(LVL_DEPTH)) ++#define WHEEL_TIMEOUT_MAX (WHEEL_TIMEOUT_CUTOFF - LVL_GRAN(LVL_DEPTH - 1)) ++ ++/* ++ * The resulting wheel size. If NOHZ is configured we allocate two ++ * wheels so we have a separate storage for the deferrable timers. ++ */ ++#define WHEEL_SIZE (LVL_SIZE * LVL_DEPTH) ++ ++#ifdef CONFIG_NO_HZ_COMMON ++# define NR_BASES 2 ++# define BASE_STD 0 ++# define BASE_DEF 1 ++#else ++# define NR_BASES 1 ++# define BASE_STD 0 ++# define BASE_DEF 0 ++#endif ++ ++struct timer_base { ++ raw_spinlock_t lock; ++ struct timer_list *running_timer; ++ unsigned long clk; ++ unsigned long next_expiry; ++ unsigned int cpu; ++ bool is_idle; ++ bool must_forward_clk; ++ DECLARE_BITMAP(pending_map, WHEEL_SIZE); ++ struct hlist_head vectors[WHEEL_SIZE]; ++} ____cacheline_aligned; ++ ++static DEFINE_PER_CPU(struct timer_base, timer_bases[NR_BASES]); ++ ++#ifdef CONFIG_NO_HZ_COMMON ++ ++static DEFINE_STATIC_KEY_FALSE(timers_nohz_active); ++static DEFINE_MUTEX(timer_keys_mutex); ++ ++static void timer_update_keys(struct work_struct *work); ++static DECLARE_WORK(timer_update_work, timer_update_keys); ++ ++#ifdef CONFIG_SMP ++unsigned int sysctl_timer_migration = 1; ++ ++DEFINE_STATIC_KEY_FALSE(timers_migration_enabled); ++ ++static void timers_update_migration(void) ++{ ++ if (sysctl_timer_migration && tick_nohz_active) ++ static_branch_enable(&timers_migration_enabled); ++ else ++ static_branch_disable(&timers_migration_enabled); ++} ++#else ++static inline void timers_update_migration(void) { } ++#endif /* !CONFIG_SMP */ ++ ++static void timer_update_keys(struct work_struct *work) ++{ ++ mutex_lock(&timer_keys_mutex); ++ timers_update_migration(); ++ static_branch_enable(&timers_nohz_active); ++ mutex_unlock(&timer_keys_mutex); ++} ++ ++void timers_update_nohz(void) ++{ ++ schedule_work(&timer_update_work); ++} ++ ++int timer_migration_handler(struct ctl_table *table, int write, ++ void __user *buffer, size_t *lenp, ++ loff_t *ppos) ++{ ++ int ret; ++ ++ mutex_lock(&timer_keys_mutex); ++ ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); ++ if (!ret && write) ++ timers_update_migration(); ++ mutex_unlock(&timer_keys_mutex); ++ return ret; ++} ++ ++static inline bool is_timers_nohz_active(void) ++{ ++ return static_branch_unlikely(&timers_nohz_active); ++} ++#else ++static inline bool is_timers_nohz_active(void) { return false; } ++#endif /* NO_HZ_COMMON */ ++ ++static unsigned long round_jiffies_common(unsigned long j, int cpu, ++ bool force_up) ++{ ++ int rem; ++ unsigned long original = j; ++ ++ /* ++ * We don't want all cpus firing their timers at once hitting the ++ * same lock or cachelines, so we skew each extra cpu with an extra ++ * 3 jiffies. This 3 jiffies came originally from the mm/ code which ++ * already did this. ++ * The skew is done by adding 3*cpunr, then round, then subtract this ++ * extra offset again. ++ */ ++ j += cpu * 3; ++ ++ rem = j % HZ; ++ ++ /* ++ * If the target jiffie is just after a whole second (which can happen ++ * due to delays of the timer irq, long irq off times etc etc) then ++ * we should round down to the whole second, not up. Use 1/4th second ++ * as cutoff for this rounding as an extreme upper bound for this. ++ * But never round down if @force_up is set. ++ */ ++ if (rem < HZ/4 && !force_up) /* round down */ ++ j = j - rem; ++ else /* round up */ ++ j = j - rem + HZ; ++ ++ /* now that we have rounded, subtract the extra skew again */ ++ j -= cpu * 3; ++ ++ /* ++ * Make sure j is still in the future. Otherwise return the ++ * unmodified value. ++ */ ++ return time_is_after_jiffies(j) ? j : original; ++} ++ ++/** ++ * __round_jiffies - function to round jiffies to a full second ++ * @j: the time in (absolute) jiffies that should be rounded ++ * @cpu: the processor number on which the timeout will happen ++ * ++ * __round_jiffies() rounds an absolute time in the future (in jiffies) ++ * up or down to (approximately) full seconds. This is useful for timers ++ * for which the exact time they fire does not matter too much, as long as ++ * they fire approximately every X seconds. ++ * ++ * By rounding these timers to whole seconds, all such timers will fire ++ * at the same time, rather than at various times spread out. The goal ++ * of this is to have the CPU wake up less, which saves power. ++ * ++ * The exact rounding is skewed for each processor to avoid all ++ * processors firing at the exact same time, which could lead ++ * to lock contention or spurious cache line bouncing. ++ * ++ * The return value is the rounded version of the @j parameter. ++ */ ++unsigned long __round_jiffies(unsigned long j, int cpu) ++{ ++ return round_jiffies_common(j, cpu, false); ++} ++EXPORT_SYMBOL_GPL(__round_jiffies); ++ ++/** ++ * __round_jiffies_relative - function to round jiffies to a full second ++ * @j: the time in (relative) jiffies that should be rounded ++ * @cpu: the processor number on which the timeout will happen ++ * ++ * __round_jiffies_relative() rounds a time delta in the future (in jiffies) ++ * up or down to (approximately) full seconds. This is useful for timers ++ * for which the exact time they fire does not matter too much, as long as ++ * they fire approximately every X seconds. ++ * ++ * By rounding these timers to whole seconds, all such timers will fire ++ * at the same time, rather than at various times spread out. The goal ++ * of this is to have the CPU wake up less, which saves power. ++ * ++ * The exact rounding is skewed for each processor to avoid all ++ * processors firing at the exact same time, which could lead ++ * to lock contention or spurious cache line bouncing. ++ * ++ * The return value is the rounded version of the @j parameter. ++ */ ++unsigned long __round_jiffies_relative(unsigned long j, int cpu) ++{ ++ unsigned long j0 = jiffies; ++ ++ /* Use j0 because jiffies might change while we run */ ++ return round_jiffies_common(j + j0, cpu, false) - j0; ++} ++EXPORT_SYMBOL_GPL(__round_jiffies_relative); ++ ++/** ++ * round_jiffies - function to round jiffies to a full second ++ * @j: the time in (absolute) jiffies that should be rounded ++ * ++ * round_jiffies() rounds an absolute time in the future (in jiffies) ++ * up or down to (approximately) full seconds. This is useful for timers ++ * for which the exact time they fire does not matter too much, as long as ++ * they fire approximately every X seconds. ++ * ++ * By rounding these timers to whole seconds, all such timers will fire ++ * at the same time, rather than at various times spread out. The goal ++ * of this is to have the CPU wake up less, which saves power. ++ * ++ * The return value is the rounded version of the @j parameter. ++ */ ++unsigned long round_jiffies(unsigned long j) ++{ ++ return round_jiffies_common(j, raw_smp_processor_id(), false); ++} ++EXPORT_SYMBOL_GPL(round_jiffies); ++ ++/** ++ * round_jiffies_relative - function to round jiffies to a full second ++ * @j: the time in (relative) jiffies that should be rounded ++ * ++ * round_jiffies_relative() rounds a time delta in the future (in jiffies) ++ * up or down to (approximately) full seconds. This is useful for timers ++ * for which the exact time they fire does not matter too much, as long as ++ * they fire approximately every X seconds. ++ * ++ * By rounding these timers to whole seconds, all such timers will fire ++ * at the same time, rather than at various times spread out. The goal ++ * of this is to have the CPU wake up less, which saves power. ++ * ++ * The return value is the rounded version of the @j parameter. ++ */ ++unsigned long round_jiffies_relative(unsigned long j) ++{ ++ return __round_jiffies_relative(j, raw_smp_processor_id()); ++} ++EXPORT_SYMBOL_GPL(round_jiffies_relative); ++ ++/** ++ * __round_jiffies_up - function to round jiffies up to a full second ++ * @j: the time in (absolute) jiffies that should be rounded ++ * @cpu: the processor number on which the timeout will happen ++ * ++ * This is the same as __round_jiffies() except that it will never ++ * round down. This is useful for timeouts for which the exact time ++ * of firing does not matter too much, as long as they don't fire too ++ * early. ++ */ ++unsigned long __round_jiffies_up(unsigned long j, int cpu) ++{ ++ return round_jiffies_common(j, cpu, true); ++} ++EXPORT_SYMBOL_GPL(__round_jiffies_up); ++ ++/** ++ * __round_jiffies_up_relative - function to round jiffies up to a full second ++ * @j: the time in (relative) jiffies that should be rounded ++ * @cpu: the processor number on which the timeout will happen ++ * ++ * This is the same as __round_jiffies_relative() except that it will never ++ * round down. This is useful for timeouts for which the exact time ++ * of firing does not matter too much, as long as they don't fire too ++ * early. ++ */ ++unsigned long __round_jiffies_up_relative(unsigned long j, int cpu) ++{ ++ unsigned long j0 = jiffies; ++ ++ /* Use j0 because jiffies might change while we run */ ++ return round_jiffies_common(j + j0, cpu, true) - j0; ++} ++EXPORT_SYMBOL_GPL(__round_jiffies_up_relative); ++ ++/** ++ * round_jiffies_up - function to round jiffies up to a full second ++ * @j: the time in (absolute) jiffies that should be rounded ++ * ++ * This is the same as round_jiffies() except that it will never ++ * round down. This is useful for timeouts for which the exact time ++ * of firing does not matter too much, as long as they don't fire too ++ * early. ++ */ ++unsigned long round_jiffies_up(unsigned long j) ++{ ++ return round_jiffies_common(j, raw_smp_processor_id(), true); ++} ++EXPORT_SYMBOL_GPL(round_jiffies_up); ++ ++/** ++ * round_jiffies_up_relative - function to round jiffies up to a full second ++ * @j: the time in (relative) jiffies that should be rounded ++ * ++ * This is the same as round_jiffies_relative() except that it will never ++ * round down. This is useful for timeouts for which the exact time ++ * of firing does not matter too much, as long as they don't fire too ++ * early. ++ */ ++unsigned long round_jiffies_up_relative(unsigned long j) ++{ ++ return __round_jiffies_up_relative(j, raw_smp_processor_id()); ++} ++EXPORT_SYMBOL_GPL(round_jiffies_up_relative); ++ ++ ++static inline unsigned int timer_get_idx(struct timer_list *timer) ++{ ++ return (timer->flags & TIMER_ARRAYMASK) >> TIMER_ARRAYSHIFT; ++} ++ ++static inline void timer_set_idx(struct timer_list *timer, unsigned int idx) ++{ ++ timer->flags = (timer->flags & ~TIMER_ARRAYMASK) | ++ idx << TIMER_ARRAYSHIFT; ++} ++ ++/* ++ * Helper function to calculate the array index for a given expiry ++ * time. ++ */ ++static inline unsigned calc_index(unsigned expires, unsigned lvl) ++{ ++ expires = (expires + LVL_GRAN(lvl)) >> LVL_SHIFT(lvl); ++ return LVL_OFFS(lvl) + (expires & LVL_MASK); ++} ++ ++static int calc_wheel_index(unsigned long expires, unsigned long clk) ++{ ++ unsigned long delta = expires - clk; ++ unsigned int idx; ++ ++ if (delta < LVL_START(1)) { ++ idx = calc_index(expires, 0); ++ } else if (delta < LVL_START(2)) { ++ idx = calc_index(expires, 1); ++ } else if (delta < LVL_START(3)) { ++ idx = calc_index(expires, 2); ++ } else if (delta < LVL_START(4)) { ++ idx = calc_index(expires, 3); ++ } else if (delta < LVL_START(5)) { ++ idx = calc_index(expires, 4); ++ } else if (delta < LVL_START(6)) { ++ idx = calc_index(expires, 5); ++ } else if (delta < LVL_START(7)) { ++ idx = calc_index(expires, 6); ++ } else if (LVL_DEPTH > 8 && delta < LVL_START(8)) { ++ idx = calc_index(expires, 7); ++ } else if ((long) delta < 0) { ++ idx = clk & LVL_MASK; ++ } else { ++ /* ++ * Force expire obscene large timeouts to expire at the ++ * capacity limit of the wheel. ++ */ ++ if (delta >= WHEEL_TIMEOUT_CUTOFF) ++ expires = clk + WHEEL_TIMEOUT_MAX; ++ ++ idx = calc_index(expires, LVL_DEPTH - 1); ++ } ++ return idx; ++} ++ ++/* ++ * Enqueue the timer into the hash bucket, mark it pending in ++ * the bitmap and store the index in the timer flags. ++ */ ++static void enqueue_timer(struct timer_base *base, struct timer_list *timer, ++ unsigned int idx) ++{ ++ hlist_add_head(&timer->entry, base->vectors + idx); ++ __set_bit(idx, base->pending_map); ++ timer_set_idx(timer, idx); ++} ++ ++static void ++__internal_add_timer(struct timer_base *base, struct timer_list *timer) ++{ ++ unsigned int idx; ++ ++ idx = calc_wheel_index(timer->expires, base->clk); ++ enqueue_timer(base, timer, idx); ++} ++ ++static void ++trigger_dyntick_cpu(struct timer_base *base, struct timer_list *timer) ++{ ++ if (!is_timers_nohz_active()) ++ return; ++ ++ /* ++ * TODO: This wants some optimizing similar to the code below, but we ++ * will do that when we switch from push to pull for deferrable timers. ++ */ ++ if (timer->flags & TIMER_DEFERRABLE) { ++ if (tick_nohz_full_cpu(base->cpu)) ++ wake_up_nohz_cpu(base->cpu); ++ return; ++ } ++ ++ /* ++ * We might have to IPI the remote CPU if the base is idle and the ++ * timer is not deferrable. If the other CPU is on the way to idle ++ * then it can't set base->is_idle as we hold the base lock: ++ */ ++ if (!base->is_idle) ++ return; ++ ++ /* Check whether this is the new first expiring timer: */ ++ if (time_after_eq(timer->expires, base->next_expiry)) ++ return; ++ ++ /* ++ * Set the next expiry time and kick the CPU so it can reevaluate the ++ * wheel: ++ */ ++ if (time_before(timer->expires, base->clk)) { ++ /* ++ * Prevent from forward_timer_base() moving the base->clk ++ * backward ++ */ ++ base->next_expiry = base->clk; ++ } else { ++ base->next_expiry = timer->expires; ++ } ++ wake_up_nohz_cpu(base->cpu); ++} ++ ++static void ++internal_add_timer(struct timer_base *base, struct timer_list *timer) ++{ ++ __internal_add_timer(base, timer); ++ trigger_dyntick_cpu(base, timer); ++} ++ ++#ifdef CONFIG_DEBUG_OBJECTS_TIMERS ++ ++static struct debug_obj_descr timer_debug_descr; ++ ++static void *timer_debug_hint(void *addr) ++{ ++ return ((struct timer_list *) addr)->function; ++} ++ ++static bool timer_is_static_object(void *addr) ++{ ++ struct timer_list *timer = addr; ++ ++ return (timer->entry.pprev == NULL && ++ timer->entry.next == TIMER_ENTRY_STATIC); ++} ++ ++/* ++ * fixup_init is called when: ++ * - an active object is initialized ++ */ ++static bool timer_fixup_init(void *addr, enum debug_obj_state state) ++{ ++ struct timer_list *timer = addr; ++ ++ switch (state) { ++ case ODEBUG_STATE_ACTIVE: ++ del_timer_sync(timer); ++ debug_object_init(timer, &timer_debug_descr); ++ return true; ++ default: ++ return false; ++ } ++} ++ ++/* Stub timer callback for improperly used timers. */ ++static void stub_timer(struct timer_list *unused) ++{ ++ WARN_ON(1); ++} ++ ++/* ++ * fixup_activate is called when: ++ * - an active object is activated ++ * - an unknown non-static object is activated ++ */ ++static bool timer_fixup_activate(void *addr, enum debug_obj_state state) ++{ ++ struct timer_list *timer = addr; ++ ++ switch (state) { ++ case ODEBUG_STATE_NOTAVAILABLE: ++ timer_setup(timer, stub_timer, 0); ++ return true; ++ ++ case ODEBUG_STATE_ACTIVE: ++ WARN_ON(1); ++ ++ default: ++ return false; ++ } ++} ++ ++/* ++ * fixup_free is called when: ++ * - an active object is freed ++ */ ++static bool timer_fixup_free(void *addr, enum debug_obj_state state) ++{ ++ struct timer_list *timer = addr; ++ ++ switch (state) { ++ case ODEBUG_STATE_ACTIVE: ++ del_timer_sync(timer); ++ debug_object_free(timer, &timer_debug_descr); ++ return true; ++ default: ++ return false; ++ } ++} ++ ++/* ++ * fixup_assert_init is called when: ++ * - an untracked/uninit-ed object is found ++ */ ++static bool timer_fixup_assert_init(void *addr, enum debug_obj_state state) ++{ ++ struct timer_list *timer = addr; ++ ++ switch (state) { ++ case ODEBUG_STATE_NOTAVAILABLE: ++ timer_setup(timer, stub_timer, 0); ++ return true; ++ default: ++ return false; ++ } ++} ++ ++static struct debug_obj_descr timer_debug_descr = { ++ .name = "timer_list", ++ .debug_hint = timer_debug_hint, ++ .is_static_object = timer_is_static_object, ++ .fixup_init = timer_fixup_init, ++ .fixup_activate = timer_fixup_activate, ++ .fixup_free = timer_fixup_free, ++ .fixup_assert_init = timer_fixup_assert_init, ++}; ++ ++static inline void debug_timer_init(struct timer_list *timer) ++{ ++ debug_object_init(timer, &timer_debug_descr); ++} ++ ++static inline void debug_timer_activate(struct timer_list *timer) ++{ ++ debug_object_activate(timer, &timer_debug_descr); ++} ++ ++static inline void debug_timer_deactivate(struct timer_list *timer) ++{ ++ debug_object_deactivate(timer, &timer_debug_descr); ++} ++ ++static inline void debug_timer_free(struct timer_list *timer) ++{ ++ debug_object_free(timer, &timer_debug_descr); ++} ++ ++static inline void debug_timer_assert_init(struct timer_list *timer) ++{ ++ debug_object_assert_init(timer, &timer_debug_descr); ++} ++ ++static void do_init_timer(struct timer_list *timer, ++ void (*func)(struct timer_list *), ++ unsigned int flags, ++ const char *name, struct lock_class_key *key); ++ ++void init_timer_on_stack_key(struct timer_list *timer, ++ void (*func)(struct timer_list *), ++ unsigned int flags, ++ const char *name, struct lock_class_key *key) ++{ ++ debug_object_init_on_stack(timer, &timer_debug_descr); ++ do_init_timer(timer, func, flags, name, key); ++} ++EXPORT_SYMBOL_GPL(init_timer_on_stack_key); ++ ++void destroy_timer_on_stack(struct timer_list *timer) ++{ ++ debug_object_free(timer, &timer_debug_descr); ++} ++EXPORT_SYMBOL_GPL(destroy_timer_on_stack); ++ ++#else ++static inline void debug_timer_init(struct timer_list *timer) { } ++static inline void debug_timer_activate(struct timer_list *timer) { } ++static inline void debug_timer_deactivate(struct timer_list *timer) { } ++static inline void debug_timer_assert_init(struct timer_list *timer) { } ++#endif ++ ++static inline void debug_init(struct timer_list *timer) ++{ ++ debug_timer_init(timer); ++ trace_timer_init(timer); ++} ++ ++static inline void ++debug_activate(struct timer_list *timer, unsigned long expires) ++{ ++ debug_timer_activate(timer); ++ trace_timer_start(timer, expires, timer->flags); ++} ++ ++static inline void debug_deactivate(struct timer_list *timer) ++{ ++ debug_timer_deactivate(timer); ++ trace_timer_cancel(timer); ++} ++ ++static inline void debug_assert_init(struct timer_list *timer) ++{ ++ debug_timer_assert_init(timer); ++} ++ ++static void do_init_timer(struct timer_list *timer, ++ void (*func)(struct timer_list *), ++ unsigned int flags, ++ const char *name, struct lock_class_key *key) ++{ ++ timer->entry.pprev = NULL; ++ timer->function = func; ++ timer->flags = flags | raw_smp_processor_id(); ++ lockdep_init_map(&timer->lockdep_map, name, key, 0); ++} ++ ++/** ++ * init_timer_key - initialize a timer ++ * @timer: the timer to be initialized ++ * @func: timer callback function ++ * @flags: timer flags ++ * @name: name of the timer ++ * @key: lockdep class key of the fake lock used for tracking timer ++ * sync lock dependencies ++ * ++ * init_timer_key() must be done to a timer prior calling *any* of the ++ * other timer functions. ++ */ ++void init_timer_key(struct timer_list *timer, ++ void (*func)(struct timer_list *), unsigned int flags, ++ const char *name, struct lock_class_key *key) ++{ ++ debug_init(timer); ++ do_init_timer(timer, func, flags, name, key); ++} ++EXPORT_SYMBOL(init_timer_key); ++ ++static inline void detach_timer(struct timer_list *timer, bool clear_pending) ++{ ++ struct hlist_node *entry = &timer->entry; ++ ++ debug_deactivate(timer); ++ ++ __hlist_del(entry); ++ if (clear_pending) ++ entry->pprev = NULL; ++ entry->next = LIST_POISON2; ++} ++ ++static int detach_if_pending(struct timer_list *timer, struct timer_base *base, ++ bool clear_pending) ++{ ++ unsigned idx = timer_get_idx(timer); ++ ++ if (!timer_pending(timer)) ++ return 0; ++ ++ if (hlist_is_singular_node(&timer->entry, base->vectors + idx)) ++ __clear_bit(idx, base->pending_map); ++ ++ detach_timer(timer, clear_pending); ++ return 1; ++} ++ ++static inline struct timer_base *get_timer_cpu_base(u32 tflags, u32 cpu) ++{ ++ struct timer_base *base = per_cpu_ptr(&timer_bases[BASE_STD], cpu); ++ ++ /* ++ * If the timer is deferrable and NO_HZ_COMMON is set then we need ++ * to use the deferrable base. ++ */ ++ if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && (tflags & TIMER_DEFERRABLE)) ++ base = per_cpu_ptr(&timer_bases[BASE_DEF], cpu); ++ return base; ++} ++ ++static inline struct timer_base *get_timer_this_cpu_base(u32 tflags) ++{ ++ struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); ++ ++ /* ++ * If the timer is deferrable and NO_HZ_COMMON is set then we need ++ * to use the deferrable base. ++ */ ++ if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && (tflags & TIMER_DEFERRABLE)) ++ base = this_cpu_ptr(&timer_bases[BASE_DEF]); ++ return base; ++} ++ ++static inline struct timer_base *get_timer_base(u32 tflags) ++{ ++ return get_timer_cpu_base(tflags, tflags & TIMER_CPUMASK); ++} ++ ++static inline struct timer_base * ++get_target_base(struct timer_base *base, unsigned tflags) ++{ ++#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) ++ if (static_branch_likely(&timers_migration_enabled) && ++ !(tflags & TIMER_PINNED)) ++ return get_timer_cpu_base(tflags, get_nohz_timer_target()); ++#endif ++ return get_timer_this_cpu_base(tflags); ++} ++ ++static inline void forward_timer_base(struct timer_base *base) ++{ ++#ifdef CONFIG_NO_HZ_COMMON ++ unsigned long jnow; ++ ++ /* ++ * We only forward the base when we are idle or have just come out of ++ * idle (must_forward_clk logic), and have a delta between base clock ++ * and jiffies. In the common case, run_timers will take care of it. ++ */ ++ if (likely(!base->must_forward_clk)) ++ return; ++ ++ jnow = READ_ONCE(jiffies); ++ base->must_forward_clk = base->is_idle; ++ if ((long)(jnow - base->clk) < 2) ++ return; ++ ++ /* ++ * If the next expiry value is > jiffies, then we fast forward to ++ * jiffies otherwise we forward to the next expiry value. ++ */ ++ if (time_after(base->next_expiry, jnow)) { ++ base->clk = jnow; ++ } else { ++ if (WARN_ON_ONCE(time_before(base->next_expiry, base->clk))) ++ return; ++ base->clk = base->next_expiry; ++ } ++#endif ++} ++ ++ ++/* ++ * We are using hashed locking: Holding per_cpu(timer_bases[x]).lock means ++ * that all timers which are tied to this base are locked, and the base itself ++ * is locked too. ++ * ++ * So __run_timers/migrate_timers can safely modify all timers which could ++ * be found in the base->vectors array. ++ * ++ * When a timer is migrating then the TIMER_MIGRATING flag is set and we need ++ * to wait until the migration is done. ++ */ ++static struct timer_base *lock_timer_base(struct timer_list *timer, ++ unsigned long *flags) ++ __acquires(timer->base->lock) ++{ ++ for (;;) { ++ struct timer_base *base; ++ u32 tf; ++ ++ /* ++ * We need to use READ_ONCE() here, otherwise the compiler ++ * might re-read @tf between the check for TIMER_MIGRATING ++ * and spin_lock(). ++ */ ++ tf = READ_ONCE(timer->flags); ++ ++ if (!(tf & TIMER_MIGRATING)) { ++ base = get_timer_base(tf); ++ raw_spin_lock_irqsave(&base->lock, *flags); ++ if (timer->flags == tf) ++ return base; ++ raw_spin_unlock_irqrestore(&base->lock, *flags); ++ } ++ cpu_relax(); ++ } ++} ++ ++#define MOD_TIMER_PENDING_ONLY 0x01 ++#define MOD_TIMER_REDUCE 0x02 ++ ++static inline int ++__mod_timer(struct timer_list *timer, unsigned long expires, unsigned int options) ++{ ++ struct timer_base *base, *new_base; ++ unsigned int idx = UINT_MAX; ++ unsigned long clk = 0, flags; ++ int ret = 0; ++ ++ BUG_ON(!timer->function); ++ ++ /* ++ * This is a common optimization triggered by the networking code - if ++ * the timer is re-modified to have the same timeout or ends up in the ++ * same array bucket then just return: ++ */ ++ if (timer_pending(timer)) { ++ /* ++ * The downside of this optimization is that it can result in ++ * larger granularity than you would get from adding a new ++ * timer with this expiry. ++ */ ++ long diff = timer->expires - expires; ++ ++ if (!diff) ++ return 1; ++ if (options & MOD_TIMER_REDUCE && diff <= 0) ++ return 1; ++ ++ /* ++ * We lock timer base and calculate the bucket index right ++ * here. If the timer ends up in the same bucket, then we ++ * just update the expiry time and avoid the whole ++ * dequeue/enqueue dance. ++ */ ++ base = lock_timer_base(timer, &flags); ++ forward_timer_base(base); ++ ++ if (timer_pending(timer) && (options & MOD_TIMER_REDUCE) && ++ time_before_eq(timer->expires, expires)) { ++ ret = 1; ++ goto out_unlock; ++ } ++ ++ clk = base->clk; ++ idx = calc_wheel_index(expires, clk); ++ ++ /* ++ * Retrieve and compare the array index of the pending ++ * timer. If it matches set the expiry to the new value so a ++ * subsequent call will exit in the expires check above. ++ */ ++ if (idx == timer_get_idx(timer)) { ++ if (!(options & MOD_TIMER_REDUCE)) ++ timer->expires = expires; ++ else if (time_after(timer->expires, expires)) ++ timer->expires = expires; ++ ret = 1; ++ goto out_unlock; ++ } ++ } else { ++ base = lock_timer_base(timer, &flags); ++ forward_timer_base(base); ++ } ++ ++ ret = detach_if_pending(timer, base, false); ++ if (!ret && (options & MOD_TIMER_PENDING_ONLY)) ++ goto out_unlock; ++ ++ new_base = get_target_base(base, timer->flags); ++ ++ if (base != new_base) { ++ /* ++ * We are trying to schedule the timer on the new base. ++ * However we can't change timer's base while it is running, ++ * otherwise del_timer_sync() can't detect that the timer's ++ * handler yet has not finished. This also guarantees that the ++ * timer is serialized wrt itself. ++ */ ++ if (likely(base->running_timer != timer)) { ++ /* See the comment in lock_timer_base() */ ++ timer->flags |= TIMER_MIGRATING; ++ ++ raw_spin_unlock(&base->lock); ++ base = new_base; ++ raw_spin_lock(&base->lock); ++ WRITE_ONCE(timer->flags, ++ (timer->flags & ~TIMER_BASEMASK) | base->cpu); ++ forward_timer_base(base); ++ } ++ } ++ ++ debug_activate(timer, expires); ++ ++ timer->expires = expires; ++ /* ++ * If 'idx' was calculated above and the base time did not advance ++ * between calculating 'idx' and possibly switching the base, only ++ * enqueue_timer() and trigger_dyntick_cpu() is required. Otherwise ++ * we need to (re)calculate the wheel index via ++ * internal_add_timer(). ++ */ ++ if (idx != UINT_MAX && clk == base->clk) { ++ enqueue_timer(base, timer, idx); ++ trigger_dyntick_cpu(base, timer); ++ } else { ++ internal_add_timer(base, timer); ++ } ++ ++out_unlock: ++ raw_spin_unlock_irqrestore(&base->lock, flags); ++ ++ return ret; ++} ++ ++/** ++ * mod_timer_pending - modify a pending timer's timeout ++ * @timer: the pending timer to be modified ++ * @expires: new timeout in jiffies ++ * ++ * mod_timer_pending() is the same for pending timers as mod_timer(), ++ * but will not re-activate and modify already deleted timers. ++ * ++ * It is useful for unserialized use of timers. ++ */ ++int mod_timer_pending(struct timer_list *timer, unsigned long expires) ++{ ++ return __mod_timer(timer, expires, MOD_TIMER_PENDING_ONLY); ++} ++EXPORT_SYMBOL(mod_timer_pending); ++ ++/** ++ * mod_timer - modify a timer's timeout ++ * @timer: the timer to be modified ++ * @expires: new timeout in jiffies ++ * ++ * mod_timer() is a more efficient way to update the expire field of an ++ * active timer (if the timer is inactive it will be activated) ++ * ++ * mod_timer(timer, expires) is equivalent to: ++ * ++ * del_timer(timer); timer->expires = expires; add_timer(timer); ++ * ++ * Note that if there are multiple unserialized concurrent users of the ++ * same timer, then mod_timer() is the only safe way to modify the timeout, ++ * since add_timer() cannot modify an already running timer. ++ * ++ * The function returns whether it has modified a pending timer or not. ++ * (ie. mod_timer() of an inactive timer returns 0, mod_timer() of an ++ * active timer returns 1.) ++ */ ++int mod_timer(struct timer_list *timer, unsigned long expires) ++{ ++ return __mod_timer(timer, expires, 0); ++} ++EXPORT_SYMBOL(mod_timer); ++ ++/** ++ * timer_reduce - Modify a timer's timeout if it would reduce the timeout ++ * @timer: The timer to be modified ++ * @expires: New timeout in jiffies ++ * ++ * timer_reduce() is very similar to mod_timer(), except that it will only ++ * modify a running timer if that would reduce the expiration time (it will ++ * start a timer that isn't running). ++ */ ++int timer_reduce(struct timer_list *timer, unsigned long expires) ++{ ++ return __mod_timer(timer, expires, MOD_TIMER_REDUCE); ++} ++EXPORT_SYMBOL(timer_reduce); ++ ++/** ++ * add_timer - start a timer ++ * @timer: the timer to be added ++ * ++ * The kernel will do a ->function(@timer) callback from the ++ * timer interrupt at the ->expires point in the future. The ++ * current time is 'jiffies'. ++ * ++ * The timer's ->expires, ->function fields must be set prior calling this ++ * function. ++ * ++ * Timers with an ->expires field in the past will be executed in the next ++ * timer tick. ++ */ ++void add_timer(struct timer_list *timer) ++{ ++ BUG_ON(timer_pending(timer)); ++ mod_timer(timer, timer->expires); ++} ++EXPORT_SYMBOL(add_timer); ++ ++/** ++ * add_timer_on - start a timer on a particular CPU ++ * @timer: the timer to be added ++ * @cpu: the CPU to start it on ++ * ++ * This is not very scalable on SMP. Double adds are not possible. ++ */ ++void add_timer_on(struct timer_list *timer, int cpu) ++{ ++ struct timer_base *new_base, *base; ++ unsigned long flags; ++ ++ BUG_ON(timer_pending(timer) || !timer->function); ++ ++ new_base = get_timer_cpu_base(timer->flags, cpu); ++ ++ /* ++ * If @timer was on a different CPU, it should be migrated with the ++ * old base locked to prevent other operations proceeding with the ++ * wrong base locked. See lock_timer_base(). ++ */ ++ base = lock_timer_base(timer, &flags); ++ if (base != new_base) { ++ timer->flags |= TIMER_MIGRATING; ++ ++ raw_spin_unlock(&base->lock); ++ base = new_base; ++ raw_spin_lock(&base->lock); ++ WRITE_ONCE(timer->flags, ++ (timer->flags & ~TIMER_BASEMASK) | cpu); ++ } ++ forward_timer_base(base); ++ ++ debug_activate(timer, timer->expires); ++ internal_add_timer(base, timer); ++ raw_spin_unlock_irqrestore(&base->lock, flags); ++} ++EXPORT_SYMBOL_GPL(add_timer_on); ++ ++/** ++ * del_timer - deactivate a timer. ++ * @timer: the timer to be deactivated ++ * ++ * del_timer() deactivates a timer - this works on both active and inactive ++ * timers. ++ * ++ * The function returns whether it has deactivated a pending timer or not. ++ * (ie. del_timer() of an inactive timer returns 0, del_timer() of an ++ * active timer returns 1.) ++ */ ++int del_timer(struct timer_list *timer) ++{ ++ struct timer_base *base; ++ unsigned long flags; ++ int ret = 0; ++ ++ debug_assert_init(timer); ++ ++ if (timer_pending(timer)) { ++ base = lock_timer_base(timer, &flags); ++ ret = detach_if_pending(timer, base, true); ++ raw_spin_unlock_irqrestore(&base->lock, flags); ++ } ++ ++ return ret; ++} ++EXPORT_SYMBOL(del_timer); ++ ++/** ++ * try_to_del_timer_sync - Try to deactivate a timer ++ * @timer: timer to delete ++ * ++ * This function tries to deactivate a timer. Upon successful (ret >= 0) ++ * exit the timer is not queued and the handler is not running on any CPU. ++ */ ++int try_to_del_timer_sync(struct timer_list *timer) ++{ ++ struct timer_base *base; ++ unsigned long flags; ++ int ret = -1; ++ ++ debug_assert_init(timer); ++ ++ base = lock_timer_base(timer, &flags); ++ ++ if (base->running_timer != timer) ++ ret = detach_if_pending(timer, base, true); ++ ++ raw_spin_unlock_irqrestore(&base->lock, flags); ++ ++ return ret; ++} ++EXPORT_SYMBOL(try_to_del_timer_sync); ++ ++#ifdef CONFIG_SMP ++/** ++ * del_timer_sync - deactivate a timer and wait for the handler to finish. ++ * @timer: the timer to be deactivated ++ * ++ * This function only differs from del_timer() on SMP: besides deactivating ++ * the timer it also makes sure the handler has finished executing on other ++ * CPUs. ++ * ++ * Synchronization rules: Callers must prevent restarting of the timer, ++ * otherwise this function is meaningless. It must not be called from ++ * interrupt contexts unless the timer is an irqsafe one. The caller must ++ * not hold locks which would prevent completion of the timer's ++ * handler. The timer's handler must not call add_timer_on(). Upon exit the ++ * timer is not queued and the handler is not running on any CPU. ++ * ++ * Note: For !irqsafe timers, you must not hold locks that are held in ++ * interrupt context while calling this function. Even if the lock has ++ * nothing to do with the timer in question. Here's why:: ++ * ++ * CPU0 CPU1 ++ * ---- ---- ++ * ++ * call_timer_fn(); ++ * base->running_timer = mytimer; ++ * spin_lock_irq(somelock); ++ * ++ * spin_lock(somelock); ++ * del_timer_sync(mytimer); ++ * while (base->running_timer == mytimer); ++ * ++ * Now del_timer_sync() will never return and never release somelock. ++ * The interrupt on the other CPU is waiting to grab somelock but ++ * it has interrupted the softirq that CPU0 is waiting to finish. ++ * ++ * The function returns whether it has deactivated a pending timer or not. ++ */ ++int del_timer_sync(struct timer_list *timer) ++{ ++#ifdef CONFIG_LOCKDEP ++ unsigned long flags; ++ ++ /* ++ * If lockdep gives a backtrace here, please reference ++ * the synchronization rules above. ++ */ ++ local_irq_save(flags); ++ lock_map_acquire(&timer->lockdep_map); ++ lock_map_release(&timer->lockdep_map); ++ local_irq_restore(flags); ++#endif ++ /* ++ * don't use it in hardirq context, because it ++ * could lead to deadlock. ++ */ ++ WARN_ON(in_irq() && !(timer->flags & TIMER_IRQSAFE)); ++ for (;;) { ++ int ret = try_to_del_timer_sync(timer); ++ if (ret >= 0) ++ return ret; ++ cpu_relax(); ++ } ++} ++EXPORT_SYMBOL(del_timer_sync); ++#endif ++ ++static void call_timer_fn(struct timer_list *timer, void (*fn)(struct timer_list *)) ++{ ++ int count = preempt_count(); ++ ++#ifdef CONFIG_LOCKDEP ++ /* ++ * It is permissible to free the timer from inside the ++ * function that is called from it, this we need to take into ++ * account for lockdep too. To avoid bogus "held lock freed" ++ * warnings as well as problems when looking into ++ * timer->lockdep_map, make a copy and use that here. ++ */ ++ struct lockdep_map lockdep_map; ++ ++ lockdep_copy_map(&lockdep_map, &timer->lockdep_map); ++#endif ++ /* ++ * Couple the lock chain with the lock chain at ++ * del_timer_sync() by acquiring the lock_map around the fn() ++ * call here and in del_timer_sync(). ++ */ ++ lock_map_acquire(&lockdep_map); ++ ++ trace_timer_expire_entry(timer); ++ fn(timer); ++ trace_timer_expire_exit(timer); ++ ++ lock_map_release(&lockdep_map); ++ ++ if (count != preempt_count()) { ++ WARN_ONCE(1, "timer: %pF preempt leak: %08x -> %08x\n", ++ fn, count, preempt_count()); ++ /* ++ * Restore the preempt count. That gives us a decent ++ * chance to survive and extract information. If the ++ * callback kept a lock held, bad luck, but not worse ++ * than the BUG() we had. ++ */ ++ preempt_count_set(count); ++ } ++} ++ ++static void expire_timers(struct timer_base *base, struct hlist_head *head) ++{ ++ while (!hlist_empty(head)) { ++ struct timer_list *timer; ++ void (*fn)(struct timer_list *); ++ ++ timer = hlist_entry(head->first, struct timer_list, entry); ++ ++ base->running_timer = timer; ++ detach_timer(timer, true); ++ ++ fn = timer->function; ++ ++ if (timer->flags & TIMER_IRQSAFE) { ++ raw_spin_unlock(&base->lock); ++ call_timer_fn(timer, fn); ++ raw_spin_lock(&base->lock); ++ } else { ++ raw_spin_unlock_irq(&base->lock); ++ call_timer_fn(timer, fn); ++ raw_spin_lock_irq(&base->lock); ++ } ++ } ++} ++ ++static int __collect_expired_timers(struct timer_base *base, ++ struct hlist_head *heads) ++{ ++ unsigned long clk = base->clk; ++ struct hlist_head *vec; ++ int i, levels = 0; ++ unsigned int idx; ++ ++ for (i = 0; i < LVL_DEPTH; i++) { ++ idx = (clk & LVL_MASK) + i * LVL_SIZE; ++ ++ if (__test_and_clear_bit(idx, base->pending_map)) { ++ vec = base->vectors + idx; ++ hlist_move_list(vec, heads++); ++ levels++; ++ } ++ /* Is it time to look at the next level? */ ++ if (clk & LVL_CLK_MASK) ++ break; ++ /* Shift clock for the next level granularity */ ++ clk >>= LVL_CLK_SHIFT; ++ } ++ return levels; ++} ++ ++#ifdef CONFIG_NO_HZ_COMMON ++/* ++ * Find the next pending bucket of a level. Search from level start (@offset) ++ * + @clk upwards and if nothing there, search from start of the level ++ * (@offset) up to @offset + clk. ++ */ ++static int next_pending_bucket(struct timer_base *base, unsigned offset, ++ unsigned clk) ++{ ++ unsigned pos, start = offset + clk; ++ unsigned end = offset + LVL_SIZE; ++ ++ pos = find_next_bit(base->pending_map, end, start); ++ if (pos < end) ++ return pos - start; ++ ++ pos = find_next_bit(base->pending_map, start, offset); ++ return pos < start ? pos + LVL_SIZE - start : -1; ++} ++ ++/* ++ * Search the first expiring timer in the various clock levels. Caller must ++ * hold base->lock. ++ */ ++static unsigned long __next_timer_interrupt(struct timer_base *base) ++{ ++ unsigned long clk, next, adj; ++ unsigned lvl, offset = 0; ++ ++ next = base->clk + NEXT_TIMER_MAX_DELTA; ++ clk = base->clk; ++ for (lvl = 0; lvl < LVL_DEPTH; lvl++, offset += LVL_SIZE) { ++ int pos = next_pending_bucket(base, offset, clk & LVL_MASK); ++ ++ if (pos >= 0) { ++ unsigned long tmp = clk + (unsigned long) pos; ++ ++ tmp <<= LVL_SHIFT(lvl); ++ if (time_before(tmp, next)) ++ next = tmp; ++ } ++ /* ++ * Clock for the next level. If the current level clock lower ++ * bits are zero, we look at the next level as is. If not we ++ * need to advance it by one because that's going to be the ++ * next expiring bucket in that level. base->clk is the next ++ * expiring jiffie. So in case of: ++ * ++ * LVL5 LVL4 LVL3 LVL2 LVL1 LVL0 ++ * 0 0 0 0 0 0 ++ * ++ * we have to look at all levels @index 0. With ++ * ++ * LVL5 LVL4 LVL3 LVL2 LVL1 LVL0 ++ * 0 0 0 0 0 2 ++ * ++ * LVL0 has the next expiring bucket @index 2. The upper ++ * levels have the next expiring bucket @index 1. ++ * ++ * In case that the propagation wraps the next level the same ++ * rules apply: ++ * ++ * LVL5 LVL4 LVL3 LVL2 LVL1 LVL0 ++ * 0 0 0 0 F 2 ++ * ++ * So after looking at LVL0 we get: ++ * ++ * LVL5 LVL4 LVL3 LVL2 LVL1 ++ * 0 0 0 1 0 ++ * ++ * So no propagation from LVL1 to LVL2 because that happened ++ * with the add already, but then we need to propagate further ++ * from LVL2 to LVL3. ++ * ++ * So the simple check whether the lower bits of the current ++ * level are 0 or not is sufficient for all cases. ++ */ ++ adj = clk & LVL_CLK_MASK ? 1 : 0; ++ clk >>= LVL_CLK_SHIFT; ++ clk += adj; ++ } ++ return next; ++} ++ ++/* ++ * Check, if the next hrtimer event is before the next timer wheel ++ * event: ++ */ ++static u64 cmp_next_hrtimer_event(u64 basem, u64 expires) ++{ ++ u64 nextevt = hrtimer_get_next_event(); ++ ++ /* ++ * If high resolution timers are enabled ++ * hrtimer_get_next_event() returns KTIME_MAX. ++ */ ++ if (expires <= nextevt) ++ return expires; ++ ++ /* ++ * If the next timer is already expired, return the tick base ++ * time so the tick is fired immediately. ++ */ ++ if (nextevt <= basem) ++ return basem; ++ ++ /* ++ * Round up to the next jiffie. High resolution timers are ++ * off, so the hrtimers are expired in the tick and we need to ++ * make sure that this tick really expires the timer to avoid ++ * a ping pong of the nohz stop code. ++ * ++ * Use DIV_ROUND_UP_ULL to prevent gcc calling __divdi3 ++ */ ++ return DIV_ROUND_UP_ULL(nextevt, TICK_NSEC) * TICK_NSEC; ++} ++ ++/** ++ * get_next_timer_interrupt - return the time (clock mono) of the next timer ++ * @basej: base time jiffies ++ * @basem: base time clock monotonic ++ * ++ * Returns the tick aligned clock monotonic time of the next pending ++ * timer or KTIME_MAX if no timer is pending. ++ */ ++u64 get_next_timer_interrupt(unsigned long basej, u64 basem) ++{ ++ struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); ++ u64 expires = KTIME_MAX; ++ unsigned long nextevt; ++ bool is_max_delta; ++ ++ /* ++ * Pretend that there is no timer pending if the cpu is offline. ++ * Possible pending timers will be migrated later to an active cpu. ++ */ ++ if (cpu_is_offline(smp_processor_id())) ++ return expires; ++ ++ raw_spin_lock(&base->lock); ++ nextevt = __next_timer_interrupt(base); ++ is_max_delta = (nextevt == base->clk + NEXT_TIMER_MAX_DELTA); ++ base->next_expiry = nextevt; ++ /* ++ * We have a fresh next event. Check whether we can forward the ++ * base. We can only do that when @basej is past base->clk ++ * otherwise we might rewind base->clk. ++ */ ++ if (time_after(basej, base->clk)) { ++ if (time_after(nextevt, basej)) ++ base->clk = basej; ++ else if (time_after(nextevt, base->clk)) ++ base->clk = nextevt; ++ } ++ ++ if (time_before_eq(nextevt, basej)) { ++ expires = basem; ++ base->is_idle = false; ++ } else { ++ if (!is_max_delta) ++ expires = basem + (u64)(nextevt - basej) * TICK_NSEC; ++ /* ++ * If we expect to sleep more than a tick, mark the base idle. ++ * Also the tick is stopped so any added timer must forward ++ * the base clk itself to keep granularity small. This idle ++ * logic is only maintained for the BASE_STD base, deferrable ++ * timers may still see large granularity skew (by design). ++ */ ++ if ((expires - basem) > TICK_NSEC) { ++ base->must_forward_clk = true; ++ base->is_idle = true; ++ } ++ } ++ raw_spin_unlock(&base->lock); ++ ++ return cmp_next_hrtimer_event(basem, expires); ++} ++ ++/** ++ * timer_clear_idle - Clear the idle state of the timer base ++ * ++ * Called with interrupts disabled ++ */ ++void timer_clear_idle(void) ++{ ++ struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); ++ ++ /* ++ * We do this unlocked. The worst outcome is a remote enqueue sending ++ * a pointless IPI, but taking the lock would just make the window for ++ * sending the IPI a few instructions smaller for the cost of taking ++ * the lock in the exit from idle path. ++ */ ++ base->is_idle = false; ++} ++ ++static int collect_expired_timers(struct timer_base *base, ++ struct hlist_head *heads) ++{ ++ unsigned long now = READ_ONCE(jiffies); ++ ++ /* ++ * NOHZ optimization. After a long idle sleep we need to forward the ++ * base to current jiffies. Avoid a loop by searching the bitfield for ++ * the next expiring timer. ++ */ ++ if ((long)(now - base->clk) > 2) { ++ unsigned long next = __next_timer_interrupt(base); ++ ++ /* ++ * If the next timer is ahead of time forward to current ++ * jiffies, otherwise forward to the next expiry time: ++ */ ++ if (time_after(next, now)) { ++ /* ++ * The call site will increment base->clk and then ++ * terminate the expiry loop immediately. ++ */ ++ base->clk = now; ++ return 0; ++ } ++ base->clk = next; ++ } ++ return __collect_expired_timers(base, heads); ++} ++#else ++static inline int collect_expired_timers(struct timer_base *base, ++ struct hlist_head *heads) ++{ ++ return __collect_expired_timers(base, heads); ++} ++#endif ++ ++/* ++ * Called from the timer interrupt handler to charge one tick to the current ++ * process. user_tick is 1 if the tick is user time, 0 for system. ++ */ ++void update_process_times(int user_tick) ++{ ++ struct task_struct *p = current; ++ ++ /* Note: this timer irq context must be accounted for as well. */ ++ account_process_tick(p, user_tick); ++ run_local_timers(); ++ rcu_check_callbacks(user_tick); ++#ifdef CONFIG_IRQ_WORK ++ if (in_irq()) ++ irq_work_tick(); ++#endif ++ scheduler_tick(); ++ if (IS_ENABLED(CONFIG_POSIX_TIMERS)) ++ run_posix_cpu_timers(p); ++ ++ /* The current CPU might make use of net randoms without receiving IRQs ++ * to renew them often enough. Let's update the net_rand_state from a ++ * non-constant value that's not affine to the number of calls to make ++ * sure it's updated when there's some activity (we don't care in idle). ++ */ ++ this_cpu_add(net_rand_state.s1, rol32(jiffies, 24) + user_tick); ++} ++ ++/** ++ * __run_timers - run all expired timers (if any) on this CPU. ++ * @base: the timer vector to be processed. ++ */ ++static inline void __run_timers(struct timer_base *base) ++{ ++ struct hlist_head heads[LVL_DEPTH]; ++ int levels; ++ ++ if (!time_after_eq(jiffies, base->clk)) ++ return; ++ ++ raw_spin_lock_irq(&base->lock); ++ ++ /* ++ * timer_base::must_forward_clk must be cleared before running ++ * timers so that any timer functions that call mod_timer() will ++ * not try to forward the base. Idle tracking / clock forwarding ++ * logic is only used with BASE_STD timers. ++ * ++ * The must_forward_clk flag is cleared unconditionally also for ++ * the deferrable base. The deferrable base is not affected by idle ++ * tracking and never forwarded, so clearing the flag is a NOOP. ++ * ++ * The fact that the deferrable base is never forwarded can cause ++ * large variations in granularity for deferrable timers, but they ++ * can be deferred for long periods due to idle anyway. ++ */ ++ base->must_forward_clk = false; ++ ++ while (time_after_eq(jiffies, base->clk)) { ++ ++ levels = collect_expired_timers(base, heads); ++ base->clk++; ++ ++ while (levels--) ++ expire_timers(base, heads + levels); ++ } ++ base->running_timer = NULL; ++ raw_spin_unlock_irq(&base->lock); ++} ++ ++/* ++ * This function runs timers and the timer-tq in bottom half context. ++ */ ++static __latent_entropy void run_timer_softirq(struct softirq_action *h) ++{ ++ struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); ++ ++ __run_timers(base); ++ if (IS_ENABLED(CONFIG_NO_HZ_COMMON)) ++ __run_timers(this_cpu_ptr(&timer_bases[BASE_DEF])); ++} ++ ++/* ++ * Called by the local, per-CPU timer interrupt on SMP. ++ */ ++void run_local_timers(void) ++{ ++ struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); ++ ++ hrtimer_run_queues(); ++ /* Raise the softirq only if required. */ ++ if (time_before(jiffies, base->clk)) { ++ if (!IS_ENABLED(CONFIG_NO_HZ_COMMON)) ++ return; ++ /* CPU is awake, so check the deferrable base. */ ++ base++; ++ if (time_before(jiffies, base->clk)) ++ return; ++ } ++ raise_softirq(TIMER_SOFTIRQ); ++} ++ ++/* ++ * Since schedule_timeout()'s timer is defined on the stack, it must store ++ * the target task on the stack as well. ++ */ ++struct process_timer { ++ struct timer_list timer; ++ struct task_struct *task; ++}; ++ ++static void process_timeout(struct timer_list *t) ++{ ++ struct process_timer *timeout = from_timer(timeout, t, timer); ++ ++ wake_up_process(timeout->task); ++} ++ ++/** ++ * schedule_timeout - sleep until timeout ++ * @timeout: timeout value in jiffies ++ * ++ * Make the current task sleep until @timeout jiffies have ++ * elapsed. The routine will return immediately unless ++ * the current task state has been set (see set_current_state()). ++ * ++ * You can set the task state as follows - ++ * ++ * %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to ++ * pass before the routine returns unless the current task is explicitly ++ * woken up, (e.g. by wake_up_process())". ++ * ++ * %TASK_INTERRUPTIBLE - the routine may return early if a signal is ++ * delivered to the current task or the current task is explicitly woken ++ * up. ++ * ++ * The current task state is guaranteed to be TASK_RUNNING when this ++ * routine returns. ++ * ++ * Specifying a @timeout value of %MAX_SCHEDULE_TIMEOUT will schedule ++ * the CPU away without a bound on the timeout. In this case the return ++ * value will be %MAX_SCHEDULE_TIMEOUT. ++ * ++ * Returns 0 when the timer has expired otherwise the remaining time in ++ * jiffies will be returned. In all cases the return value is guaranteed ++ * to be non-negative. ++ */ ++signed long __sched schedule_timeout(signed long timeout) ++{ ++ struct process_timer timer; ++ unsigned long expire; ++ ++ switch (timeout) ++ { ++ case MAX_SCHEDULE_TIMEOUT: ++ /* ++ * These two special cases are useful to be comfortable ++ * in the caller. Nothing more. We could take ++ * MAX_SCHEDULE_TIMEOUT from one of the negative value ++ * but I' d like to return a valid offset (>=0) to allow ++ * the caller to do everything it want with the retval. ++ */ ++ schedule(); ++ goto out; ++ default: ++ /* ++ * Another bit of PARANOID. Note that the retval will be ++ * 0 since no piece of kernel is supposed to do a check ++ * for a negative retval of schedule_timeout() (since it ++ * should never happens anyway). You just have the printk() ++ * that will tell you if something is gone wrong and where. ++ */ ++ if (timeout < 0) { ++ printk(KERN_ERR "schedule_timeout: wrong timeout " ++ "value %lx\n", timeout); ++ dump_stack(); ++ current->state = TASK_RUNNING; ++ goto out; ++ } ++ } ++ ++ expire = timeout + jiffies; ++ ++ timer.task = current; ++ timer_setup_on_stack(&timer.timer, process_timeout, 0); ++ __mod_timer(&timer.timer, expire, 0); ++ schedule(); ++ del_singleshot_timer_sync(&timer.timer); ++ ++ /* Remove the timer from the object tracker */ ++ destroy_timer_on_stack(&timer.timer); ++ ++ timeout = expire - jiffies; ++ ++ out: ++ return timeout < 0 ? 0 : timeout; ++} ++EXPORT_SYMBOL(schedule_timeout); ++ ++/* ++ * We can use __set_current_state() here because schedule_timeout() calls ++ * schedule() unconditionally. ++ */ ++signed long __sched schedule_timeout_interruptible(signed long timeout) ++{ ++ __set_current_state(TASK_INTERRUPTIBLE); ++ return schedule_timeout(timeout); ++} ++EXPORT_SYMBOL(schedule_timeout_interruptible); ++ ++signed long __sched schedule_timeout_killable(signed long timeout) ++{ ++ __set_current_state(TASK_KILLABLE); ++ return schedule_timeout(timeout); ++} ++EXPORT_SYMBOL(schedule_timeout_killable); ++ ++signed long __sched schedule_timeout_uninterruptible(signed long timeout) ++{ ++ __set_current_state(TASK_UNINTERRUPTIBLE); ++ return schedule_timeout(timeout); ++} ++EXPORT_SYMBOL(schedule_timeout_uninterruptible); ++ ++/* ++ * Like schedule_timeout_uninterruptible(), except this task will not contribute ++ * to load average. ++ */ ++signed long __sched schedule_timeout_idle(signed long timeout) ++{ ++ __set_current_state(TASK_IDLE); ++ return schedule_timeout(timeout); ++} ++EXPORT_SYMBOL(schedule_timeout_idle); ++ ++#ifdef CONFIG_HOTPLUG_CPU ++static void migrate_timer_list(struct timer_base *new_base, struct hlist_head *head) ++{ ++ struct timer_list *timer; ++ int cpu = new_base->cpu; ++ ++ while (!hlist_empty(head)) { ++ timer = hlist_entry(head->first, struct timer_list, entry); ++ detach_timer(timer, false); ++ timer->flags = (timer->flags & ~TIMER_BASEMASK) | cpu; ++ internal_add_timer(new_base, timer); ++ } ++} ++ ++int timers_prepare_cpu(unsigned int cpu) ++{ ++ struct timer_base *base; ++ int b; ++ ++ for (b = 0; b < NR_BASES; b++) { ++ base = per_cpu_ptr(&timer_bases[b], cpu); ++ base->clk = jiffies; ++ base->next_expiry = base->clk + NEXT_TIMER_MAX_DELTA; ++ base->is_idle = false; ++ base->must_forward_clk = true; ++ } ++ return 0; ++} ++ ++int timers_dead_cpu(unsigned int cpu) ++{ ++ struct timer_base *old_base; ++ struct timer_base *new_base; ++ int b, i; ++ ++ BUG_ON(cpu_online(cpu)); ++ ++ for (b = 0; b < NR_BASES; b++) { ++ old_base = per_cpu_ptr(&timer_bases[b], cpu); ++ new_base = get_cpu_ptr(&timer_bases[b]); ++ /* ++ * The caller is globally serialized and nobody else ++ * takes two locks at once, deadlock is not possible. ++ */ ++ raw_spin_lock_irq(&new_base->lock); ++ raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); ++ ++ /* ++ * The current CPUs base clock might be stale. Update it ++ * before moving the timers over. ++ */ ++ forward_timer_base(new_base); ++ ++ BUG_ON(old_base->running_timer); ++ ++ for (i = 0; i < WHEEL_SIZE; i++) ++ migrate_timer_list(new_base, old_base->vectors + i); ++ ++ raw_spin_unlock(&old_base->lock); ++ raw_spin_unlock_irq(&new_base->lock); ++ put_cpu_ptr(&timer_bases); ++ } ++ return 0; ++} ++ ++#endif /* CONFIG_HOTPLUG_CPU */ ++ ++static void __init init_timer_cpu(int cpu) ++{ ++ struct timer_base *base; ++ int i; ++ ++ for (i = 0; i < NR_BASES; i++) { ++ base = per_cpu_ptr(&timer_bases[i], cpu); ++ base->cpu = cpu; ++ raw_spin_lock_init(&base->lock); ++ base->clk = jiffies; ++ } ++} ++ ++static void __init init_timer_cpus(void) ++{ ++ int cpu; ++ ++ for_each_possible_cpu(cpu) ++ init_timer_cpu(cpu); ++} ++ ++void __init init_timers(void) ++{ ++ init_timer_cpus(); ++ open_softirq(TIMER_SOFTIRQ, run_timer_softirq); ++} ++ ++/** ++ * msleep - sleep safely even with waitqueue interruptions ++ * @msecs: Time in milliseconds to sleep for ++ */ ++void msleep(unsigned int msecs) ++{ ++ unsigned long timeout = msecs_to_jiffies(msecs) + 1; ++ ++ while (timeout) ++ timeout = schedule_timeout_uninterruptible(timeout); ++} ++ ++EXPORT_SYMBOL(msleep); ++ ++/** ++ * msleep_interruptible - sleep waiting for signals ++ * @msecs: Time in milliseconds to sleep for ++ */ ++unsigned long msleep_interruptible(unsigned int msecs) ++{ ++ unsigned long timeout = msecs_to_jiffies(msecs) + 1; ++ ++ while (timeout && !signal_pending(current)) ++ timeout = schedule_timeout_interruptible(timeout); ++ return jiffies_to_msecs(timeout); ++} ++ ++EXPORT_SYMBOL(msleep_interruptible); ++ ++/** ++ * usleep_range - Sleep for an approximate time ++ * @min: Minimum time in usecs to sleep ++ * @max: Maximum time in usecs to sleep ++ * ++ * In non-atomic context where the exact wakeup time is flexible, use ++ * usleep_range() instead of udelay(). The sleep improves responsiveness ++ * by avoiding the CPU-hogging busy-wait of udelay(), and the range reduces ++ * power usage by allowing hrtimers to take advantage of an already- ++ * scheduled interrupt instead of scheduling a new one just for this sleep. ++ */ ++void __sched usleep_range(unsigned long min, unsigned long max) ++{ ++ ktime_t exp = ktime_add_us(ktime_get(), min); ++ u64 delta = (u64)(max - min) * NSEC_PER_USEC; ++ ++ for (;;) { ++ __set_current_state(TASK_UNINTERRUPTIBLE); ++ /* Do not return before the requested sleep time has elapsed */ ++ if (!schedule_hrtimeout_range(&exp, delta, HRTIMER_MODE_ABS)) ++ break; ++ } ++} ++EXPORT_SYMBOL(usleep_range); +diff -uprN kernel/kernel/trace/ftrace.c kernel_new/kernel/trace/ftrace.c +--- kernel/kernel/trace/ftrace.c 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/kernel/trace/ftrace.c 2021-04-01 18:28:07.814863110 +0800 +@@ -35,6 +35,7 @@ + #include + #include + #include ++#include + + #include + +@@ -214,8 +215,17 @@ static ftrace_func_t ftrace_ops_get_list + + static void update_ftrace_function(void) + { ++ struct ftrace_ops *ops; + ftrace_func_t func; + ++ for (ops = ftrace_ops_list; ++ ops != &ftrace_list_end; ops = ops->next) ++ if (ops->flags & FTRACE_OPS_FL_IPIPE_EXCLUSIVE) { ++ set_function_trace_op = ops; ++ func = ops->func; ++ goto set_pointers; ++ } ++ + /* + * Prepare the ftrace_ops that the arch callback will use. + * If there's only one ftrace_ops registered, the ftrace_ops_list +@@ -245,6 +255,7 @@ static void update_ftrace_function(void) + + update_function_graph_func(); + ++ set_pointers: + /* If there's no change, then do nothing more here */ + if (ftrace_trace_function == func) + return; +@@ -2627,6 +2638,9 @@ void __weak arch_ftrace_update_code(int + + static void ftrace_run_update_code(int command) + { ++#ifdef CONFIG_IPIPE ++ unsigned long flags; ++#endif /* CONFIG_IPIPE */ + int ret; + + ret = ftrace_arch_code_modify_prepare(); +@@ -5618,10 +5632,10 @@ static int ftrace_process_locs(struct mo + * reason to cause large interrupt latencies while we do it. + */ + if (!mod) +- local_irq_save(flags); ++ flags = hard_local_irq_save(); + ftrace_update_code(mod, start_pg); + if (!mod) +- local_irq_restore(flags); ++ hard_local_irq_restore(flags); + ret = 0; + out: + mutex_unlock(&ftrace_lock); +@@ -6166,9 +6180,11 @@ void __init ftrace_init(void) + unsigned long count, flags; + int ret; + +- local_irq_save(flags); ++ flags = hard_local_irq_save_notrace(); + ret = ftrace_dyn_arch_init(); +- local_irq_restore(flags); ++ hard_local_irq_restore_notrace(flags); ++ ++ /* ftrace_dyn_arch_init places the return code in addr */ + if (ret) + goto failed; + +@@ -6321,7 +6337,16 @@ __ftrace_ops_list_func(unsigned long ip, + } + } while_for_each_ftrace_op(op); + out: +- preempt_enable_notrace(); ++#ifdef CONFIG_IPIPE ++ if (hard_irqs_disabled() || !__ipipe_root_p) ++ /* ++ * Nothing urgent to schedule here. At latest the timer tick ++ * will pick up whatever the tracing functions kicked off. ++ */ ++ preempt_enable_no_resched_notrace(); ++ else ++#endif ++ preempt_enable_notrace(); + trace_clear_recursion(bit); + } + +diff -uprN kernel/kernel/trace/ftrace.c.orig kernel_new/kernel/trace/ftrace.c.orig +--- kernel/kernel/trace/ftrace.c.orig 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/kernel/trace/ftrace.c.orig 2020-12-21 21:59:22.000000000 +0800 +@@ -0,0 +1,7133 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * Infrastructure for profiling code inserted by 'gcc -pg'. ++ * ++ * Copyright (C) 2007-2008 Steven Rostedt ++ * Copyright (C) 2004-2008 Ingo Molnar ++ * ++ * Originally ported from the -rt patch by: ++ * Copyright (C) 2007 Arnaldo Carvalho de Melo ++ * ++ * Based on code in the latency_tracer, that is: ++ * ++ * Copyright (C) 2004-2006 Ingo Molnar ++ * Copyright (C) 2004 Nadia Yvette Chambers ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++ ++#include ++#include ++ ++#include "trace_output.h" ++#include "trace_stat.h" ++ ++#define FTRACE_WARN_ON(cond) \ ++ ({ \ ++ int ___r = cond; \ ++ if (WARN_ON(___r)) \ ++ ftrace_kill(); \ ++ ___r; \ ++ }) ++ ++#define FTRACE_WARN_ON_ONCE(cond) \ ++ ({ \ ++ int ___r = cond; \ ++ if (WARN_ON_ONCE(___r)) \ ++ ftrace_kill(); \ ++ ___r; \ ++ }) ++ ++/* hash bits for specific function selection */ ++#define FTRACE_HASH_BITS 7 ++#define FTRACE_FUNC_HASHSIZE (1 << FTRACE_HASH_BITS) ++#define FTRACE_HASH_DEFAULT_BITS 10 ++#define FTRACE_HASH_MAX_BITS 12 ++ ++#ifdef CONFIG_DYNAMIC_FTRACE ++#define INIT_OPS_HASH(opsname) \ ++ .func_hash = &opsname.local_hash, \ ++ .local_hash.regex_lock = __MUTEX_INITIALIZER(opsname.local_hash.regex_lock), ++#define ASSIGN_OPS_HASH(opsname, val) \ ++ .func_hash = val, \ ++ .local_hash.regex_lock = __MUTEX_INITIALIZER(opsname.local_hash.regex_lock), ++#else ++#define INIT_OPS_HASH(opsname) ++#define ASSIGN_OPS_HASH(opsname, val) ++#endif ++ ++static struct ftrace_ops ftrace_list_end __read_mostly = { ++ .func = ftrace_stub, ++ .flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_STUB, ++ INIT_OPS_HASH(ftrace_list_end) ++}; ++ ++/* ftrace_enabled is a method to turn ftrace on or off */ ++int ftrace_enabled __read_mostly; ++static int last_ftrace_enabled; ++ ++/* Current function tracing op */ ++struct ftrace_ops *function_trace_op __read_mostly = &ftrace_list_end; ++/* What to set function_trace_op to */ ++static struct ftrace_ops *set_function_trace_op; ++ ++static bool ftrace_pids_enabled(struct ftrace_ops *ops) ++{ ++ struct trace_array *tr; ++ ++ if (!(ops->flags & FTRACE_OPS_FL_PID) || !ops->private) ++ return false; ++ ++ tr = ops->private; ++ ++ return tr->function_pids != NULL; ++} ++ ++static void ftrace_update_trampoline(struct ftrace_ops *ops); ++ ++/* ++ * ftrace_disabled is set when an anomaly is discovered. ++ * ftrace_disabled is much stronger than ftrace_enabled. ++ */ ++static int ftrace_disabled __read_mostly; ++ ++static DEFINE_MUTEX(ftrace_lock); ++ ++static struct ftrace_ops __rcu *ftrace_ops_list __read_mostly = &ftrace_list_end; ++ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; ++static struct ftrace_ops global_ops; ++ ++#if ARCH_SUPPORTS_FTRACE_OPS ++static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, ++ struct ftrace_ops *op, struct pt_regs *regs); ++#else ++/* See comment below, where ftrace_ops_list_func is defined */ ++static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip); ++#define ftrace_ops_list_func ((ftrace_func_t)ftrace_ops_no_ops) ++#endif ++ ++/* ++ * Traverse the ftrace_global_list, invoking all entries. The reason that we ++ * can use rcu_dereference_raw_notrace() is that elements removed from this list ++ * are simply leaked, so there is no need to interact with a grace-period ++ * mechanism. The rcu_dereference_raw_notrace() calls are needed to handle ++ * concurrent insertions into the ftrace_global_list. ++ * ++ * Silly Alpha and silly pointer-speculation compiler optimizations! ++ */ ++#define do_for_each_ftrace_op(op, list) \ ++ op = rcu_dereference_raw_notrace(list); \ ++ do ++ ++/* ++ * Optimized for just a single item in the list (as that is the normal case). ++ */ ++#define while_for_each_ftrace_op(op) \ ++ while (likely(op = rcu_dereference_raw_notrace((op)->next)) && \ ++ unlikely((op) != &ftrace_list_end)) ++ ++static inline void ftrace_ops_init(struct ftrace_ops *ops) ++{ ++#ifdef CONFIG_DYNAMIC_FTRACE ++ if (!(ops->flags & FTRACE_OPS_FL_INITIALIZED)) { ++ mutex_init(&ops->local_hash.regex_lock); ++ ops->func_hash = &ops->local_hash; ++ ops->flags |= FTRACE_OPS_FL_INITIALIZED; ++ } ++#endif ++} ++ ++static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip, ++ struct ftrace_ops *op, struct pt_regs *regs) ++{ ++ struct trace_array *tr = op->private; ++ ++ if (tr && this_cpu_read(tr->trace_buffer.data->ftrace_ignore_pid)) ++ return; ++ ++ op->saved_func(ip, parent_ip, op, regs); ++} ++ ++static void ftrace_sync(struct work_struct *work) ++{ ++ /* ++ * This function is just a stub to implement a hard force ++ * of synchronize_sched(). This requires synchronizing ++ * tasks even in userspace and idle. ++ * ++ * Yes, function tracing is rude. ++ */ ++} ++ ++static void ftrace_sync_ipi(void *data) ++{ ++ /* Probably not needed, but do it anyway */ ++ smp_rmb(); ++} ++ ++#ifdef CONFIG_FUNCTION_GRAPH_TRACER ++static void update_function_graph_func(void); ++ ++/* Both enabled by default (can be cleared by function_graph tracer flags */ ++static bool fgraph_sleep_time = true; ++static bool fgraph_graph_time = true; ++ ++#else ++static inline void update_function_graph_func(void) { } ++#endif ++ ++ ++static ftrace_func_t ftrace_ops_get_list_func(struct ftrace_ops *ops) ++{ ++ /* ++ * If this is a dynamic, RCU, or per CPU ops, or we force list func, ++ * then it needs to call the list anyway. ++ */ ++ if (ops->flags & (FTRACE_OPS_FL_DYNAMIC | FTRACE_OPS_FL_RCU) || ++ FTRACE_FORCE_LIST_FUNC) ++ return ftrace_ops_list_func; ++ ++ return ftrace_ops_get_func(ops); ++} ++ ++static void update_ftrace_function(void) ++{ ++ ftrace_func_t func; ++ ++ /* ++ * Prepare the ftrace_ops that the arch callback will use. ++ * If there's only one ftrace_ops registered, the ftrace_ops_list ++ * will point to the ops we want. ++ */ ++ set_function_trace_op = rcu_dereference_protected(ftrace_ops_list, ++ lockdep_is_held(&ftrace_lock)); ++ ++ /* If there's no ftrace_ops registered, just call the stub function */ ++ if (set_function_trace_op == &ftrace_list_end) { ++ func = ftrace_stub; ++ ++ /* ++ * If we are at the end of the list and this ops is ++ * recursion safe and not dynamic and the arch supports passing ops, ++ * then have the mcount trampoline call the function directly. ++ */ ++ } else if (rcu_dereference_protected(ftrace_ops_list->next, ++ lockdep_is_held(&ftrace_lock)) == &ftrace_list_end) { ++ func = ftrace_ops_get_list_func(ftrace_ops_list); ++ ++ } else { ++ /* Just use the default ftrace_ops */ ++ set_function_trace_op = &ftrace_list_end; ++ func = ftrace_ops_list_func; ++ } ++ ++ update_function_graph_func(); ++ ++ /* If there's no change, then do nothing more here */ ++ if (ftrace_trace_function == func) ++ return; ++ ++ /* ++ * If we are using the list function, it doesn't care ++ * about the function_trace_ops. ++ */ ++ if (func == ftrace_ops_list_func) { ++ ftrace_trace_function = func; ++ /* ++ * Don't even bother setting function_trace_ops, ++ * it would be racy to do so anyway. ++ */ ++ return; ++ } ++ ++#ifndef CONFIG_DYNAMIC_FTRACE ++ /* ++ * For static tracing, we need to be a bit more careful. ++ * The function change takes affect immediately. Thus, ++ * we need to coorditate the setting of the function_trace_ops ++ * with the setting of the ftrace_trace_function. ++ * ++ * Set the function to the list ops, which will call the ++ * function we want, albeit indirectly, but it handles the ++ * ftrace_ops and doesn't depend on function_trace_op. ++ */ ++ ftrace_trace_function = ftrace_ops_list_func; ++ /* ++ * Make sure all CPUs see this. Yes this is slow, but static ++ * tracing is slow and nasty to have enabled. ++ */ ++ schedule_on_each_cpu(ftrace_sync); ++ /* Now all cpus are using the list ops. */ ++ function_trace_op = set_function_trace_op; ++ /* Make sure the function_trace_op is visible on all CPUs */ ++ smp_wmb(); ++ /* Nasty way to force a rmb on all cpus */ ++ smp_call_function(ftrace_sync_ipi, NULL, 1); ++ /* OK, we are all set to update the ftrace_trace_function now! */ ++#endif /* !CONFIG_DYNAMIC_FTRACE */ ++ ++ ftrace_trace_function = func; ++} ++ ++static void add_ftrace_ops(struct ftrace_ops __rcu **list, ++ struct ftrace_ops *ops) ++{ ++ rcu_assign_pointer(ops->next, *list); ++ ++ /* ++ * We are entering ops into the list but another ++ * CPU might be walking that list. We need to make sure ++ * the ops->next pointer is valid before another CPU sees ++ * the ops pointer included into the list. ++ */ ++ rcu_assign_pointer(*list, ops); ++} ++ ++static int remove_ftrace_ops(struct ftrace_ops __rcu **list, ++ struct ftrace_ops *ops) ++{ ++ struct ftrace_ops **p; ++ ++ /* ++ * If we are removing the last function, then simply point ++ * to the ftrace_stub. ++ */ ++ if (rcu_dereference_protected(*list, ++ lockdep_is_held(&ftrace_lock)) == ops && ++ rcu_dereference_protected(ops->next, ++ lockdep_is_held(&ftrace_lock)) == &ftrace_list_end) { ++ *list = &ftrace_list_end; ++ return 0; ++ } ++ ++ for (p = list; *p != &ftrace_list_end; p = &(*p)->next) ++ if (*p == ops) ++ break; ++ ++ if (*p != ops) ++ return -1; ++ ++ *p = (*p)->next; ++ return 0; ++} ++ ++static void ftrace_update_trampoline(struct ftrace_ops *ops); ++ ++static int __register_ftrace_function(struct ftrace_ops *ops) ++{ ++ if (ops->flags & FTRACE_OPS_FL_DELETED) ++ return -EINVAL; ++ ++ if (WARN_ON(ops->flags & FTRACE_OPS_FL_ENABLED)) ++ return -EBUSY; ++ ++#ifndef CONFIG_DYNAMIC_FTRACE_WITH_REGS ++ /* ++ * If the ftrace_ops specifies SAVE_REGS, then it only can be used ++ * if the arch supports it, or SAVE_REGS_IF_SUPPORTED is also set. ++ * Setting SAVE_REGS_IF_SUPPORTED makes SAVE_REGS irrelevant. ++ */ ++ if (ops->flags & FTRACE_OPS_FL_SAVE_REGS && ++ !(ops->flags & FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED)) ++ return -EINVAL; ++ ++ if (ops->flags & FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED) ++ ops->flags |= FTRACE_OPS_FL_SAVE_REGS; ++#endif ++ ++ if (!core_kernel_data((unsigned long)ops)) ++ ops->flags |= FTRACE_OPS_FL_DYNAMIC; ++ ++ add_ftrace_ops(&ftrace_ops_list, ops); ++ ++ /* Always save the function, and reset at unregistering */ ++ ops->saved_func = ops->func; ++ ++ if (ftrace_pids_enabled(ops)) ++ ops->func = ftrace_pid_func; ++ ++ ftrace_update_trampoline(ops); ++ ++ if (ftrace_enabled) ++ update_ftrace_function(); ++ ++ return 0; ++} ++ ++static int __unregister_ftrace_function(struct ftrace_ops *ops) ++{ ++ int ret; ++ ++ if (WARN_ON(!(ops->flags & FTRACE_OPS_FL_ENABLED))) ++ return -EBUSY; ++ ++ ret = remove_ftrace_ops(&ftrace_ops_list, ops); ++ ++ if (ret < 0) ++ return ret; ++ ++ if (ftrace_enabled) ++ update_ftrace_function(); ++ ++ ops->func = ops->saved_func; ++ ++ return 0; ++} ++ ++static void ftrace_update_pid_func(void) ++{ ++ struct ftrace_ops *op; ++ ++ /* Only do something if we are tracing something */ ++ if (ftrace_trace_function == ftrace_stub) ++ return; ++ ++ do_for_each_ftrace_op(op, ftrace_ops_list) { ++ if (op->flags & FTRACE_OPS_FL_PID) { ++ op->func = ftrace_pids_enabled(op) ? ++ ftrace_pid_func : op->saved_func; ++ ftrace_update_trampoline(op); ++ } ++ } while_for_each_ftrace_op(op); ++ ++ update_ftrace_function(); ++} ++ ++#ifdef CONFIG_FUNCTION_PROFILER ++struct ftrace_profile { ++ struct hlist_node node; ++ unsigned long ip; ++ unsigned long counter; ++#ifdef CONFIG_FUNCTION_GRAPH_TRACER ++ unsigned long long time; ++ unsigned long long time_squared; ++#endif ++}; ++ ++struct ftrace_profile_page { ++ struct ftrace_profile_page *next; ++ unsigned long index; ++ struct ftrace_profile records[]; ++}; ++ ++struct ftrace_profile_stat { ++ atomic_t disabled; ++ struct hlist_head *hash; ++ struct ftrace_profile_page *pages; ++ struct ftrace_profile_page *start; ++ struct tracer_stat stat; ++}; ++ ++#define PROFILE_RECORDS_SIZE \ ++ (PAGE_SIZE - offsetof(struct ftrace_profile_page, records)) ++ ++#define PROFILES_PER_PAGE \ ++ (PROFILE_RECORDS_SIZE / sizeof(struct ftrace_profile)) ++ ++static int ftrace_profile_enabled __read_mostly; ++ ++/* ftrace_profile_lock - synchronize the enable and disable of the profiler */ ++static DEFINE_MUTEX(ftrace_profile_lock); ++ ++static DEFINE_PER_CPU(struct ftrace_profile_stat, ftrace_profile_stats); ++ ++#define FTRACE_PROFILE_HASH_BITS 10 ++#define FTRACE_PROFILE_HASH_SIZE (1 << FTRACE_PROFILE_HASH_BITS) ++ ++static void * ++function_stat_next(void *v, int idx) ++{ ++ struct ftrace_profile *rec = v; ++ struct ftrace_profile_page *pg; ++ ++ pg = (struct ftrace_profile_page *)((unsigned long)rec & PAGE_MASK); ++ ++ again: ++ if (idx != 0) ++ rec++; ++ ++ if ((void *)rec >= (void *)&pg->records[pg->index]) { ++ pg = pg->next; ++ if (!pg) ++ return NULL; ++ rec = &pg->records[0]; ++ if (!rec->counter) ++ goto again; ++ } ++ ++ return rec; ++} ++ ++static void *function_stat_start(struct tracer_stat *trace) ++{ ++ struct ftrace_profile_stat *stat = ++ container_of(trace, struct ftrace_profile_stat, stat); ++ ++ if (!stat || !stat->start) ++ return NULL; ++ ++ return function_stat_next(&stat->start->records[0], 0); ++} ++ ++#ifdef CONFIG_FUNCTION_GRAPH_TRACER ++/* function graph compares on total time */ ++static int function_stat_cmp(void *p1, void *p2) ++{ ++ struct ftrace_profile *a = p1; ++ struct ftrace_profile *b = p2; ++ ++ if (a->time < b->time) ++ return -1; ++ if (a->time > b->time) ++ return 1; ++ else ++ return 0; ++} ++#else ++/* not function graph compares against hits */ ++static int function_stat_cmp(void *p1, void *p2) ++{ ++ struct ftrace_profile *a = p1; ++ struct ftrace_profile *b = p2; ++ ++ if (a->counter < b->counter) ++ return -1; ++ if (a->counter > b->counter) ++ return 1; ++ else ++ return 0; ++} ++#endif ++ ++static int function_stat_headers(struct seq_file *m) ++{ ++#ifdef CONFIG_FUNCTION_GRAPH_TRACER ++ seq_puts(m, " Function " ++ "Hit Time Avg s^2\n" ++ " -------- " ++ "--- ---- --- ---\n"); ++#else ++ seq_puts(m, " Function Hit\n" ++ " -------- ---\n"); ++#endif ++ return 0; ++} ++ ++static int function_stat_show(struct seq_file *m, void *v) ++{ ++ struct ftrace_profile *rec = v; ++ char str[KSYM_SYMBOL_LEN]; ++ int ret = 0; ++#ifdef CONFIG_FUNCTION_GRAPH_TRACER ++ static struct trace_seq s; ++ unsigned long long avg; ++ unsigned long long stddev; ++#endif ++ mutex_lock(&ftrace_profile_lock); ++ ++ /* we raced with function_profile_reset() */ ++ if (unlikely(rec->counter == 0)) { ++ ret = -EBUSY; ++ goto out; ++ } ++ ++#ifdef CONFIG_FUNCTION_GRAPH_TRACER ++ avg = div64_ul(rec->time, rec->counter); ++ if (tracing_thresh && (avg < tracing_thresh)) ++ goto out; ++#endif ++ ++ kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); ++ seq_printf(m, " %-30.30s %10lu", str, rec->counter); ++ ++#ifdef CONFIG_FUNCTION_GRAPH_TRACER ++ seq_puts(m, " "); ++ ++ /* Sample standard deviation (s^2) */ ++ if (rec->counter <= 1) ++ stddev = 0; ++ else { ++ /* ++ * Apply Welford's method: ++ * s^2 = 1 / (n * (n-1)) * (n * \Sum (x_i)^2 - (\Sum x_i)^2) ++ */ ++ stddev = rec->counter * rec->time_squared - ++ rec->time * rec->time; ++ ++ /* ++ * Divide only 1000 for ns^2 -> us^2 conversion. ++ * trace_print_graph_duration will divide 1000 again. ++ */ ++ stddev = div64_ul(stddev, ++ rec->counter * (rec->counter - 1) * 1000); ++ } ++ ++ trace_seq_init(&s); ++ trace_print_graph_duration(rec->time, &s); ++ trace_seq_puts(&s, " "); ++ trace_print_graph_duration(avg, &s); ++ trace_seq_puts(&s, " "); ++ trace_print_graph_duration(stddev, &s); ++ trace_print_seq(m, &s); ++#endif ++ seq_putc(m, '\n'); ++out: ++ mutex_unlock(&ftrace_profile_lock); ++ ++ return ret; ++} ++ ++static void ftrace_profile_reset(struct ftrace_profile_stat *stat) ++{ ++ struct ftrace_profile_page *pg; ++ ++ pg = stat->pages = stat->start; ++ ++ while (pg) { ++ memset(pg->records, 0, PROFILE_RECORDS_SIZE); ++ pg->index = 0; ++ pg = pg->next; ++ } ++ ++ memset(stat->hash, 0, ++ FTRACE_PROFILE_HASH_SIZE * sizeof(struct hlist_head)); ++} ++ ++int ftrace_profile_pages_init(struct ftrace_profile_stat *stat) ++{ ++ struct ftrace_profile_page *pg; ++ int functions; ++ int pages; ++ int i; ++ ++ /* If we already allocated, do nothing */ ++ if (stat->pages) ++ return 0; ++ ++ stat->pages = (void *)get_zeroed_page(GFP_KERNEL); ++ if (!stat->pages) ++ return -ENOMEM; ++ ++#ifdef CONFIG_DYNAMIC_FTRACE ++ functions = ftrace_update_tot_cnt; ++#else ++ /* ++ * We do not know the number of functions that exist because ++ * dynamic tracing is what counts them. With past experience ++ * we have around 20K functions. That should be more than enough. ++ * It is highly unlikely we will execute every function in ++ * the kernel. ++ */ ++ functions = 20000; ++#endif ++ ++ pg = stat->start = stat->pages; ++ ++ pages = DIV_ROUND_UP(functions, PROFILES_PER_PAGE); ++ ++ for (i = 1; i < pages; i++) { ++ pg->next = (void *)get_zeroed_page(GFP_KERNEL); ++ if (!pg->next) ++ goto out_free; ++ pg = pg->next; ++ } ++ ++ return 0; ++ ++ out_free: ++ pg = stat->start; ++ while (pg) { ++ unsigned long tmp = (unsigned long)pg; ++ ++ pg = pg->next; ++ free_page(tmp); ++ } ++ ++ stat->pages = NULL; ++ stat->start = NULL; ++ ++ return -ENOMEM; ++} ++ ++static int ftrace_profile_init_cpu(int cpu) ++{ ++ struct ftrace_profile_stat *stat; ++ int size; ++ ++ stat = &per_cpu(ftrace_profile_stats, cpu); ++ ++ if (stat->hash) { ++ /* If the profile is already created, simply reset it */ ++ ftrace_profile_reset(stat); ++ return 0; ++ } ++ ++ /* ++ * We are profiling all functions, but usually only a few thousand ++ * functions are hit. We'll make a hash of 1024 items. ++ */ ++ size = FTRACE_PROFILE_HASH_SIZE; ++ ++ stat->hash = kcalloc(size, sizeof(struct hlist_head), GFP_KERNEL); ++ ++ if (!stat->hash) ++ return -ENOMEM; ++ ++ /* Preallocate the function profiling pages */ ++ if (ftrace_profile_pages_init(stat) < 0) { ++ kfree(stat->hash); ++ stat->hash = NULL; ++ return -ENOMEM; ++ } ++ ++ return 0; ++} ++ ++static int ftrace_profile_init(void) ++{ ++ int cpu; ++ int ret = 0; ++ ++ for_each_possible_cpu(cpu) { ++ ret = ftrace_profile_init_cpu(cpu); ++ if (ret) ++ break; ++ } ++ ++ return ret; ++} ++ ++/* interrupts must be disabled */ ++static struct ftrace_profile * ++ftrace_find_profiled_func(struct ftrace_profile_stat *stat, unsigned long ip) ++{ ++ struct ftrace_profile *rec; ++ struct hlist_head *hhd; ++ unsigned long key; ++ ++ key = hash_long(ip, FTRACE_PROFILE_HASH_BITS); ++ hhd = &stat->hash[key]; ++ ++ if (hlist_empty(hhd)) ++ return NULL; ++ ++ hlist_for_each_entry_rcu_notrace(rec, hhd, node) { ++ if (rec->ip == ip) ++ return rec; ++ } ++ ++ return NULL; ++} ++ ++static void ftrace_add_profile(struct ftrace_profile_stat *stat, ++ struct ftrace_profile *rec) ++{ ++ unsigned long key; ++ ++ key = hash_long(rec->ip, FTRACE_PROFILE_HASH_BITS); ++ hlist_add_head_rcu(&rec->node, &stat->hash[key]); ++} ++ ++/* ++ * The memory is already allocated, this simply finds a new record to use. ++ */ ++static struct ftrace_profile * ++ftrace_profile_alloc(struct ftrace_profile_stat *stat, unsigned long ip) ++{ ++ struct ftrace_profile *rec = NULL; ++ ++ /* prevent recursion (from NMIs) */ ++ if (atomic_inc_return(&stat->disabled) != 1) ++ goto out; ++ ++ /* ++ * Try to find the function again since an NMI ++ * could have added it ++ */ ++ rec = ftrace_find_profiled_func(stat, ip); ++ if (rec) ++ goto out; ++ ++ if (stat->pages->index == PROFILES_PER_PAGE) { ++ if (!stat->pages->next) ++ goto out; ++ stat->pages = stat->pages->next; ++ } ++ ++ rec = &stat->pages->records[stat->pages->index++]; ++ rec->ip = ip; ++ ftrace_add_profile(stat, rec); ++ ++ out: ++ atomic_dec(&stat->disabled); ++ ++ return rec; ++} ++ ++static void ++function_profile_call(unsigned long ip, unsigned long parent_ip, ++ struct ftrace_ops *ops, struct pt_regs *regs) ++{ ++ struct ftrace_profile_stat *stat; ++ struct ftrace_profile *rec; ++ unsigned long flags; ++ ++ if (!ftrace_profile_enabled) ++ return; ++ ++ local_irq_save(flags); ++ ++ stat = this_cpu_ptr(&ftrace_profile_stats); ++ if (!stat->hash || !ftrace_profile_enabled) ++ goto out; ++ ++ rec = ftrace_find_profiled_func(stat, ip); ++ if (!rec) { ++ rec = ftrace_profile_alloc(stat, ip); ++ if (!rec) ++ goto out; ++ } ++ ++ rec->counter++; ++ out: ++ local_irq_restore(flags); ++} ++ ++#ifdef CONFIG_FUNCTION_GRAPH_TRACER ++static int profile_graph_entry(struct ftrace_graph_ent *trace) ++{ ++ int index = current->curr_ret_stack; ++ ++ function_profile_call(trace->func, 0, NULL, NULL); ++ ++ /* If function graph is shutting down, ret_stack can be NULL */ ++ if (!current->ret_stack) ++ return 0; ++ ++ if (index >= 0 && index < FTRACE_RETFUNC_DEPTH) ++ current->ret_stack[index].subtime = 0; ++ ++ return 1; ++} ++ ++static void profile_graph_return(struct ftrace_graph_ret *trace) ++{ ++ struct ftrace_profile_stat *stat; ++ unsigned long long calltime; ++ struct ftrace_profile *rec; ++ unsigned long flags; ++ ++ local_irq_save(flags); ++ stat = this_cpu_ptr(&ftrace_profile_stats); ++ if (!stat->hash || !ftrace_profile_enabled) ++ goto out; ++ ++ /* If the calltime was zero'd ignore it */ ++ if (!trace->calltime) ++ goto out; ++ ++ calltime = trace->rettime - trace->calltime; ++ ++ if (!fgraph_graph_time) { ++ int index; ++ ++ index = current->curr_ret_stack; ++ ++ /* Append this call time to the parent time to subtract */ ++ if (index) ++ current->ret_stack[index - 1].subtime += calltime; ++ ++ if (current->ret_stack[index].subtime < calltime) ++ calltime -= current->ret_stack[index].subtime; ++ else ++ calltime = 0; ++ } ++ ++ rec = ftrace_find_profiled_func(stat, trace->func); ++ if (rec) { ++ rec->time += calltime; ++ rec->time_squared += calltime * calltime; ++ } ++ ++ out: ++ local_irq_restore(flags); ++} ++ ++static int register_ftrace_profiler(void) ++{ ++ return register_ftrace_graph(&profile_graph_return, ++ &profile_graph_entry); ++} ++ ++static void unregister_ftrace_profiler(void) ++{ ++ unregister_ftrace_graph(); ++} ++#else ++static struct ftrace_ops ftrace_profile_ops __read_mostly = { ++ .func = function_profile_call, ++ .flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_INITIALIZED, ++ INIT_OPS_HASH(ftrace_profile_ops) ++}; ++ ++static int register_ftrace_profiler(void) ++{ ++ return register_ftrace_function(&ftrace_profile_ops); ++} ++ ++static void unregister_ftrace_profiler(void) ++{ ++ unregister_ftrace_function(&ftrace_profile_ops); ++} ++#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ ++ ++static ssize_t ++ftrace_profile_write(struct file *filp, const char __user *ubuf, ++ size_t cnt, loff_t *ppos) ++{ ++ unsigned long val; ++ int ret; ++ ++ ret = kstrtoul_from_user(ubuf, cnt, 10, &val); ++ if (ret) ++ return ret; ++ ++ val = !!val; ++ ++ mutex_lock(&ftrace_profile_lock); ++ if (ftrace_profile_enabled ^ val) { ++ if (val) { ++ ret = ftrace_profile_init(); ++ if (ret < 0) { ++ cnt = ret; ++ goto out; ++ } ++ ++ ret = register_ftrace_profiler(); ++ if (ret < 0) { ++ cnt = ret; ++ goto out; ++ } ++ ftrace_profile_enabled = 1; ++ } else { ++ ftrace_profile_enabled = 0; ++ /* ++ * unregister_ftrace_profiler calls stop_machine ++ * so this acts like an synchronize_sched. ++ */ ++ unregister_ftrace_profiler(); ++ } ++ } ++ out: ++ mutex_unlock(&ftrace_profile_lock); ++ ++ *ppos += cnt; ++ ++ return cnt; ++} ++ ++static ssize_t ++ftrace_profile_read(struct file *filp, char __user *ubuf, ++ size_t cnt, loff_t *ppos) ++{ ++ char buf[64]; /* big enough to hold a number */ ++ int r; ++ ++ r = sprintf(buf, "%u\n", ftrace_profile_enabled); ++ return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); ++} ++ ++static const struct file_operations ftrace_profile_fops = { ++ .open = tracing_open_generic, ++ .read = ftrace_profile_read, ++ .write = ftrace_profile_write, ++ .llseek = default_llseek, ++}; ++ ++/* used to initialize the real stat files */ ++static struct tracer_stat function_stats __initdata = { ++ .name = "functions", ++ .stat_start = function_stat_start, ++ .stat_next = function_stat_next, ++ .stat_cmp = function_stat_cmp, ++ .stat_headers = function_stat_headers, ++ .stat_show = function_stat_show ++}; ++ ++static __init void ftrace_profile_tracefs(struct dentry *d_tracer) ++{ ++ struct ftrace_profile_stat *stat; ++ struct dentry *entry; ++ char *name; ++ int ret; ++ int cpu; ++ ++ for_each_possible_cpu(cpu) { ++ stat = &per_cpu(ftrace_profile_stats, cpu); ++ ++ name = kasprintf(GFP_KERNEL, "function%d", cpu); ++ if (!name) { ++ /* ++ * The files created are permanent, if something happens ++ * we still do not free memory. ++ */ ++ WARN(1, ++ "Could not allocate stat file for cpu %d\n", ++ cpu); ++ return; ++ } ++ stat->stat = function_stats; ++ stat->stat.name = name; ++ ret = register_stat_tracer(&stat->stat); ++ if (ret) { ++ WARN(1, ++ "Could not register function stat for cpu %d\n", ++ cpu); ++ kfree(name); ++ return; ++ } ++ } ++ ++ entry = tracefs_create_file("function_profile_enabled", 0644, ++ d_tracer, NULL, &ftrace_profile_fops); ++ if (!entry) ++ pr_warn("Could not create tracefs 'function_profile_enabled' entry\n"); ++} ++ ++#else /* CONFIG_FUNCTION_PROFILER */ ++static __init void ftrace_profile_tracefs(struct dentry *d_tracer) ++{ ++} ++#endif /* CONFIG_FUNCTION_PROFILER */ ++ ++#ifdef CONFIG_FUNCTION_GRAPH_TRACER ++static int ftrace_graph_active; ++#else ++# define ftrace_graph_active 0 ++#endif ++ ++#ifdef CONFIG_DYNAMIC_FTRACE ++ ++static struct ftrace_ops *removed_ops; ++ ++/* ++ * Set when doing a global update, like enabling all recs or disabling them. ++ * It is not set when just updating a single ftrace_ops. ++ */ ++static bool update_all_ops; ++ ++#ifndef CONFIG_FTRACE_MCOUNT_RECORD ++# error Dynamic ftrace depends on MCOUNT_RECORD ++#endif ++ ++struct ftrace_func_entry { ++ struct hlist_node hlist; ++ unsigned long ip; ++}; ++ ++struct ftrace_func_probe { ++ struct ftrace_probe_ops *probe_ops; ++ struct ftrace_ops ops; ++ struct trace_array *tr; ++ struct list_head list; ++ void *data; ++ int ref; ++}; ++ ++/* ++ * We make these constant because no one should touch them, ++ * but they are used as the default "empty hash", to avoid allocating ++ * it all the time. These are in a read only section such that if ++ * anyone does try to modify it, it will cause an exception. ++ */ ++static const struct hlist_head empty_buckets[1]; ++static const struct ftrace_hash empty_hash = { ++ .buckets = (struct hlist_head *)empty_buckets, ++}; ++#define EMPTY_HASH ((struct ftrace_hash *)&empty_hash) ++ ++static struct ftrace_ops global_ops = { ++ .func = ftrace_stub, ++ .local_hash.notrace_hash = EMPTY_HASH, ++ .local_hash.filter_hash = EMPTY_HASH, ++ INIT_OPS_HASH(global_ops) ++ .flags = FTRACE_OPS_FL_RECURSION_SAFE | ++ FTRACE_OPS_FL_INITIALIZED | ++ FTRACE_OPS_FL_PID, ++}; ++ ++/* ++ * Used by the stack undwinder to know about dynamic ftrace trampolines. ++ */ ++struct ftrace_ops *ftrace_ops_trampoline(unsigned long addr) ++{ ++ struct ftrace_ops *op = NULL; ++ ++ /* ++ * Some of the ops may be dynamically allocated, ++ * they are freed after a synchronize_sched(). ++ */ ++ preempt_disable_notrace(); ++ ++ do_for_each_ftrace_op(op, ftrace_ops_list) { ++ /* ++ * This is to check for dynamically allocated trampolines. ++ * Trampolines that are in kernel text will have ++ * core_kernel_text() return true. ++ */ ++ if (op->trampoline && op->trampoline_size) ++ if (addr >= op->trampoline && ++ addr < op->trampoline + op->trampoline_size) { ++ preempt_enable_notrace(); ++ return op; ++ } ++ } while_for_each_ftrace_op(op); ++ preempt_enable_notrace(); ++ ++ return NULL; ++} ++ ++/* ++ * This is used by __kernel_text_address() to return true if the ++ * address is on a dynamically allocated trampoline that would ++ * not return true for either core_kernel_text() or ++ * is_module_text_address(). ++ */ ++bool is_ftrace_trampoline(unsigned long addr) ++{ ++ return ftrace_ops_trampoline(addr) != NULL; ++} ++ ++struct ftrace_page { ++ struct ftrace_page *next; ++ struct dyn_ftrace *records; ++ int index; ++ int size; ++}; ++ ++#define ENTRY_SIZE sizeof(struct dyn_ftrace) ++#define ENTRIES_PER_PAGE (PAGE_SIZE / ENTRY_SIZE) ++ ++/* estimate from running different kernels */ ++#define NR_TO_INIT 10000 ++ ++static struct ftrace_page *ftrace_pages_start; ++static struct ftrace_page *ftrace_pages; ++ ++static __always_inline unsigned long ++ftrace_hash_key(struct ftrace_hash *hash, unsigned long ip) ++{ ++ if (hash->size_bits > 0) ++ return hash_long(ip, hash->size_bits); ++ ++ return 0; ++} ++ ++/* Only use this function if ftrace_hash_empty() has already been tested */ ++static __always_inline struct ftrace_func_entry * ++__ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip) ++{ ++ unsigned long key; ++ struct ftrace_func_entry *entry; ++ struct hlist_head *hhd; ++ ++ key = ftrace_hash_key(hash, ip); ++ hhd = &hash->buckets[key]; ++ ++ hlist_for_each_entry_rcu_notrace(entry, hhd, hlist) { ++ if (entry->ip == ip) ++ return entry; ++ } ++ return NULL; ++} ++ ++/** ++ * ftrace_lookup_ip - Test to see if an ip exists in an ftrace_hash ++ * @hash: The hash to look at ++ * @ip: The instruction pointer to test ++ * ++ * Search a given @hash to see if a given instruction pointer (@ip) ++ * exists in it. ++ * ++ * Returns the entry that holds the @ip if found. NULL otherwise. ++ */ ++struct ftrace_func_entry * ++ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip) ++{ ++ if (ftrace_hash_empty(hash)) ++ return NULL; ++ ++ return __ftrace_lookup_ip(hash, ip); ++} ++ ++static void __add_hash_entry(struct ftrace_hash *hash, ++ struct ftrace_func_entry *entry) ++{ ++ struct hlist_head *hhd; ++ unsigned long key; ++ ++ key = ftrace_hash_key(hash, entry->ip); ++ hhd = &hash->buckets[key]; ++ hlist_add_head(&entry->hlist, hhd); ++ hash->count++; ++} ++ ++static int add_hash_entry(struct ftrace_hash *hash, unsigned long ip) ++{ ++ struct ftrace_func_entry *entry; ++ ++ entry = kmalloc(sizeof(*entry), GFP_KERNEL); ++ if (!entry) ++ return -ENOMEM; ++ ++ entry->ip = ip; ++ __add_hash_entry(hash, entry); ++ ++ return 0; ++} ++ ++static void ++free_hash_entry(struct ftrace_hash *hash, ++ struct ftrace_func_entry *entry) ++{ ++ hlist_del(&entry->hlist); ++ kfree(entry); ++ hash->count--; ++} ++ ++static void ++remove_hash_entry(struct ftrace_hash *hash, ++ struct ftrace_func_entry *entry) ++{ ++ hlist_del_rcu(&entry->hlist); ++ hash->count--; ++} ++ ++static void ftrace_hash_clear(struct ftrace_hash *hash) ++{ ++ struct hlist_head *hhd; ++ struct hlist_node *tn; ++ struct ftrace_func_entry *entry; ++ int size = 1 << hash->size_bits; ++ int i; ++ ++ if (!hash->count) ++ return; ++ ++ for (i = 0; i < size; i++) { ++ hhd = &hash->buckets[i]; ++ hlist_for_each_entry_safe(entry, tn, hhd, hlist) ++ free_hash_entry(hash, entry); ++ } ++ FTRACE_WARN_ON(hash->count); ++} ++ ++static void free_ftrace_mod(struct ftrace_mod_load *ftrace_mod) ++{ ++ list_del(&ftrace_mod->list); ++ kfree(ftrace_mod->module); ++ kfree(ftrace_mod->func); ++ kfree(ftrace_mod); ++} ++ ++static void clear_ftrace_mod_list(struct list_head *head) ++{ ++ struct ftrace_mod_load *p, *n; ++ ++ /* stack tracer isn't supported yet */ ++ if (!head) ++ return; ++ ++ mutex_lock(&ftrace_lock); ++ list_for_each_entry_safe(p, n, head, list) ++ free_ftrace_mod(p); ++ mutex_unlock(&ftrace_lock); ++} ++ ++static void free_ftrace_hash(struct ftrace_hash *hash) ++{ ++ if (!hash || hash == EMPTY_HASH) ++ return; ++ ftrace_hash_clear(hash); ++ kfree(hash->buckets); ++ kfree(hash); ++} ++ ++static void __free_ftrace_hash_rcu(struct rcu_head *rcu) ++{ ++ struct ftrace_hash *hash; ++ ++ hash = container_of(rcu, struct ftrace_hash, rcu); ++ free_ftrace_hash(hash); ++} ++ ++static void free_ftrace_hash_rcu(struct ftrace_hash *hash) ++{ ++ if (!hash || hash == EMPTY_HASH) ++ return; ++ call_rcu_sched(&hash->rcu, __free_ftrace_hash_rcu); ++} ++ ++void ftrace_free_filter(struct ftrace_ops *ops) ++{ ++ ftrace_ops_init(ops); ++ free_ftrace_hash(ops->func_hash->filter_hash); ++ free_ftrace_hash(ops->func_hash->notrace_hash); ++} ++ ++static struct ftrace_hash *alloc_ftrace_hash(int size_bits) ++{ ++ struct ftrace_hash *hash; ++ int size; ++ ++ hash = kzalloc(sizeof(*hash), GFP_KERNEL); ++ if (!hash) ++ return NULL; ++ ++ size = 1 << size_bits; ++ hash->buckets = kcalloc(size, sizeof(*hash->buckets), GFP_KERNEL); ++ ++ if (!hash->buckets) { ++ kfree(hash); ++ return NULL; ++ } ++ ++ hash->size_bits = size_bits; ++ ++ return hash; ++} ++ ++ ++static int ftrace_add_mod(struct trace_array *tr, ++ const char *func, const char *module, ++ int enable) ++{ ++ struct ftrace_mod_load *ftrace_mod; ++ struct list_head *mod_head = enable ? &tr->mod_trace : &tr->mod_notrace; ++ ++ ftrace_mod = kzalloc(sizeof(*ftrace_mod), GFP_KERNEL); ++ if (!ftrace_mod) ++ return -ENOMEM; ++ ++ ftrace_mod->func = kstrdup(func, GFP_KERNEL); ++ ftrace_mod->module = kstrdup(module, GFP_KERNEL); ++ ftrace_mod->enable = enable; ++ ++ if (!ftrace_mod->func || !ftrace_mod->module) ++ goto out_free; ++ ++ list_add(&ftrace_mod->list, mod_head); ++ ++ return 0; ++ ++ out_free: ++ free_ftrace_mod(ftrace_mod); ++ ++ return -ENOMEM; ++} ++ ++static struct ftrace_hash * ++alloc_and_copy_ftrace_hash(int size_bits, struct ftrace_hash *hash) ++{ ++ struct ftrace_func_entry *entry; ++ struct ftrace_hash *new_hash; ++ int size; ++ int ret; ++ int i; ++ ++ new_hash = alloc_ftrace_hash(size_bits); ++ if (!new_hash) ++ return NULL; ++ ++ if (hash) ++ new_hash->flags = hash->flags; ++ ++ /* Empty hash? */ ++ if (ftrace_hash_empty(hash)) ++ return new_hash; ++ ++ size = 1 << hash->size_bits; ++ for (i = 0; i < size; i++) { ++ hlist_for_each_entry(entry, &hash->buckets[i], hlist) { ++ ret = add_hash_entry(new_hash, entry->ip); ++ if (ret < 0) ++ goto free_hash; ++ } ++ } ++ ++ FTRACE_WARN_ON(new_hash->count != hash->count); ++ ++ return new_hash; ++ ++ free_hash: ++ free_ftrace_hash(new_hash); ++ return NULL; ++} ++ ++static void ++ftrace_hash_rec_disable_modify(struct ftrace_ops *ops, int filter_hash); ++static void ++ftrace_hash_rec_enable_modify(struct ftrace_ops *ops, int filter_hash); ++ ++static int ftrace_hash_ipmodify_update(struct ftrace_ops *ops, ++ struct ftrace_hash *new_hash); ++ ++static struct ftrace_hash * ++__ftrace_hash_move(struct ftrace_hash *src) ++{ ++ struct ftrace_func_entry *entry; ++ struct hlist_node *tn; ++ struct hlist_head *hhd; ++ struct ftrace_hash *new_hash; ++ int size = src->count; ++ int bits = 0; ++ int i; ++ ++ /* ++ * If the new source is empty, just return the empty_hash. ++ */ ++ if (ftrace_hash_empty(src)) ++ return EMPTY_HASH; ++ ++ /* ++ * Make the hash size about 1/2 the # found ++ */ ++ for (size /= 2; size; size >>= 1) ++ bits++; ++ ++ /* Don't allocate too much */ ++ if (bits > FTRACE_HASH_MAX_BITS) ++ bits = FTRACE_HASH_MAX_BITS; ++ ++ new_hash = alloc_ftrace_hash(bits); ++ if (!new_hash) ++ return NULL; ++ ++ new_hash->flags = src->flags; ++ ++ size = 1 << src->size_bits; ++ for (i = 0; i < size; i++) { ++ hhd = &src->buckets[i]; ++ hlist_for_each_entry_safe(entry, tn, hhd, hlist) { ++ remove_hash_entry(src, entry); ++ __add_hash_entry(new_hash, entry); ++ } ++ } ++ ++ return new_hash; ++} ++ ++static int ++ftrace_hash_move(struct ftrace_ops *ops, int enable, ++ struct ftrace_hash **dst, struct ftrace_hash *src) ++{ ++ struct ftrace_hash *new_hash; ++ int ret; ++ ++ /* Reject setting notrace hash on IPMODIFY ftrace_ops */ ++ if (ops->flags & FTRACE_OPS_FL_IPMODIFY && !enable) ++ return -EINVAL; ++ ++ new_hash = __ftrace_hash_move(src); ++ if (!new_hash) ++ return -ENOMEM; ++ ++ /* Make sure this can be applied if it is IPMODIFY ftrace_ops */ ++ if (enable) { ++ /* IPMODIFY should be updated only when filter_hash updating */ ++ ret = ftrace_hash_ipmodify_update(ops, new_hash); ++ if (ret < 0) { ++ free_ftrace_hash(new_hash); ++ return ret; ++ } ++ } ++ ++ /* ++ * Remove the current set, update the hash and add ++ * them back. ++ */ ++ ftrace_hash_rec_disable_modify(ops, enable); ++ ++ rcu_assign_pointer(*dst, new_hash); ++ ++ ftrace_hash_rec_enable_modify(ops, enable); ++ ++ return 0; ++} ++ ++static bool hash_contains_ip(unsigned long ip, ++ struct ftrace_ops_hash *hash) ++{ ++ /* ++ * The function record is a match if it exists in the filter ++ * hash and not in the notrace hash. Note, an emty hash is ++ * considered a match for the filter hash, but an empty ++ * notrace hash is considered not in the notrace hash. ++ */ ++ return (ftrace_hash_empty(hash->filter_hash) || ++ __ftrace_lookup_ip(hash->filter_hash, ip)) && ++ (ftrace_hash_empty(hash->notrace_hash) || ++ !__ftrace_lookup_ip(hash->notrace_hash, ip)); ++} ++ ++/* ++ * Test the hashes for this ops to see if we want to call ++ * the ops->func or not. ++ * ++ * It's a match if the ip is in the ops->filter_hash or ++ * the filter_hash does not exist or is empty, ++ * AND ++ * the ip is not in the ops->notrace_hash. ++ * ++ * This needs to be called with preemption disabled as ++ * the hashes are freed with call_rcu_sched(). ++ */ ++static int ++ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs) ++{ ++ struct ftrace_ops_hash hash; ++ int ret; ++ ++#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS ++ /* ++ * There's a small race when adding ops that the ftrace handler ++ * that wants regs, may be called without them. We can not ++ * allow that handler to be called if regs is NULL. ++ */ ++ if (regs == NULL && (ops->flags & FTRACE_OPS_FL_SAVE_REGS)) ++ return 0; ++#endif ++ ++ rcu_assign_pointer(hash.filter_hash, ops->func_hash->filter_hash); ++ rcu_assign_pointer(hash.notrace_hash, ops->func_hash->notrace_hash); ++ ++ if (hash_contains_ip(ip, &hash)) ++ ret = 1; ++ else ++ ret = 0; ++ ++ return ret; ++} ++ ++/* ++ * This is a double for. Do not use 'break' to break out of the loop, ++ * you must use a goto. ++ */ ++#define do_for_each_ftrace_rec(pg, rec) \ ++ for (pg = ftrace_pages_start; pg; pg = pg->next) { \ ++ int _____i; \ ++ for (_____i = 0; _____i < pg->index; _____i++) { \ ++ rec = &pg->records[_____i]; ++ ++#define while_for_each_ftrace_rec() \ ++ } \ ++ } ++ ++ ++static int ftrace_cmp_recs(const void *a, const void *b) ++{ ++ const struct dyn_ftrace *key = a; ++ const struct dyn_ftrace *rec = b; ++ ++ if (key->flags < rec->ip) ++ return -1; ++ if (key->ip >= rec->ip + MCOUNT_INSN_SIZE) ++ return 1; ++ return 0; ++} ++ ++/** ++ * ftrace_location_range - return the first address of a traced location ++ * if it touches the given ip range ++ * @start: start of range to search. ++ * @end: end of range to search (inclusive). @end points to the last byte ++ * to check. ++ * ++ * Returns rec->ip if the related ftrace location is a least partly within ++ * the given address range. That is, the first address of the instruction ++ * that is either a NOP or call to the function tracer. It checks the ftrace ++ * internal tables to determine if the address belongs or not. ++ */ ++unsigned long ftrace_location_range(unsigned long start, unsigned long end) ++{ ++ struct ftrace_page *pg; ++ struct dyn_ftrace *rec; ++ struct dyn_ftrace key; ++ ++ key.ip = start; ++ key.flags = end; /* overload flags, as it is unsigned long */ ++ ++ for (pg = ftrace_pages_start; pg; pg = pg->next) { ++ if (end < pg->records[0].ip || ++ start >= (pg->records[pg->index - 1].ip + MCOUNT_INSN_SIZE)) ++ continue; ++ rec = bsearch(&key, pg->records, pg->index, ++ sizeof(struct dyn_ftrace), ++ ftrace_cmp_recs); ++ if (rec) ++ return rec->ip; ++ } ++ ++ return 0; ++} ++ ++/** ++ * ftrace_location - return true if the ip giving is a traced location ++ * @ip: the instruction pointer to check ++ * ++ * Returns rec->ip if @ip given is a pointer to a ftrace location. ++ * That is, the instruction that is either a NOP or call to ++ * the function tracer. It checks the ftrace internal tables to ++ * determine if the address belongs or not. ++ */ ++unsigned long ftrace_location(unsigned long ip) ++{ ++ return ftrace_location_range(ip, ip); ++} ++ ++/** ++ * ftrace_text_reserved - return true if range contains an ftrace location ++ * @start: start of range to search ++ * @end: end of range to search (inclusive). @end points to the last byte to check. ++ * ++ * Returns 1 if @start and @end contains a ftrace location. ++ * That is, the instruction that is either a NOP or call to ++ * the function tracer. It checks the ftrace internal tables to ++ * determine if the address belongs or not. ++ */ ++int ftrace_text_reserved(const void *start, const void *end) ++{ ++ unsigned long ret; ++ ++ ret = ftrace_location_range((unsigned long)start, ++ (unsigned long)end); ++ ++ return (int)!!ret; ++} ++ ++/* Test if ops registered to this rec needs regs */ ++static bool test_rec_ops_needs_regs(struct dyn_ftrace *rec) ++{ ++ struct ftrace_ops *ops; ++ bool keep_regs = false; ++ ++ for (ops = ftrace_ops_list; ++ ops != &ftrace_list_end; ops = ops->next) { ++ /* pass rec in as regs to have non-NULL val */ ++ if (ftrace_ops_test(ops, rec->ip, rec)) { ++ if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) { ++ keep_regs = true; ++ break; ++ } ++ } ++ } ++ ++ return keep_regs; ++} ++ ++static struct ftrace_ops * ++ftrace_find_tramp_ops_any(struct dyn_ftrace *rec); ++static struct ftrace_ops * ++ftrace_find_tramp_ops_next(struct dyn_ftrace *rec, struct ftrace_ops *ops); ++ ++static bool __ftrace_hash_rec_update(struct ftrace_ops *ops, ++ int filter_hash, ++ bool inc) ++{ ++ struct ftrace_hash *hash; ++ struct ftrace_hash *other_hash; ++ struct ftrace_page *pg; ++ struct dyn_ftrace *rec; ++ bool update = false; ++ int count = 0; ++ int all = false; ++ ++ /* Only update if the ops has been registered */ ++ if (!(ops->flags & FTRACE_OPS_FL_ENABLED)) ++ return false; ++ ++ /* ++ * In the filter_hash case: ++ * If the count is zero, we update all records. ++ * Otherwise we just update the items in the hash. ++ * ++ * In the notrace_hash case: ++ * We enable the update in the hash. ++ * As disabling notrace means enabling the tracing, ++ * and enabling notrace means disabling, the inc variable ++ * gets inversed. ++ */ ++ if (filter_hash) { ++ hash = ops->func_hash->filter_hash; ++ other_hash = ops->func_hash->notrace_hash; ++ if (ftrace_hash_empty(hash)) ++ all = true; ++ } else { ++ inc = !inc; ++ hash = ops->func_hash->notrace_hash; ++ other_hash = ops->func_hash->filter_hash; ++ /* ++ * If the notrace hash has no items, ++ * then there's nothing to do. ++ */ ++ if (ftrace_hash_empty(hash)) ++ return false; ++ } ++ ++ do_for_each_ftrace_rec(pg, rec) { ++ int in_other_hash = 0; ++ int in_hash = 0; ++ int match = 0; ++ ++ if (rec->flags & FTRACE_FL_DISABLED) ++ continue; ++ ++ if (all) { ++ /* ++ * Only the filter_hash affects all records. ++ * Update if the record is not in the notrace hash. ++ */ ++ if (!other_hash || !ftrace_lookup_ip(other_hash, rec->ip)) ++ match = 1; ++ } else { ++ in_hash = !!ftrace_lookup_ip(hash, rec->ip); ++ in_other_hash = !!ftrace_lookup_ip(other_hash, rec->ip); ++ ++ /* ++ * If filter_hash is set, we want to match all functions ++ * that are in the hash but not in the other hash. ++ * ++ * If filter_hash is not set, then we are decrementing. ++ * That means we match anything that is in the hash ++ * and also in the other_hash. That is, we need to turn ++ * off functions in the other hash because they are disabled ++ * by this hash. ++ */ ++ if (filter_hash && in_hash && !in_other_hash) ++ match = 1; ++ else if (!filter_hash && in_hash && ++ (in_other_hash || ftrace_hash_empty(other_hash))) ++ match = 1; ++ } ++ if (!match) ++ continue; ++ ++ if (inc) { ++ rec->flags++; ++ if (FTRACE_WARN_ON(ftrace_rec_count(rec) == FTRACE_REF_MAX)) ++ return false; ++ ++ /* ++ * If there's only a single callback registered to a ++ * function, and the ops has a trampoline registered ++ * for it, then we can call it directly. ++ */ ++ if (ftrace_rec_count(rec) == 1 && ops->trampoline) ++ rec->flags |= FTRACE_FL_TRAMP; ++ else ++ /* ++ * If we are adding another function callback ++ * to this function, and the previous had a ++ * custom trampoline in use, then we need to go ++ * back to the default trampoline. ++ */ ++ rec->flags &= ~FTRACE_FL_TRAMP; ++ ++ /* ++ * If any ops wants regs saved for this function ++ * then all ops will get saved regs. ++ */ ++ if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) ++ rec->flags |= FTRACE_FL_REGS; ++ } else { ++ if (FTRACE_WARN_ON(ftrace_rec_count(rec) == 0)) ++ return false; ++ rec->flags--; ++ ++ /* ++ * If the rec had REGS enabled and the ops that is ++ * being removed had REGS set, then see if there is ++ * still any ops for this record that wants regs. ++ * If not, we can stop recording them. ++ */ ++ if (ftrace_rec_count(rec) > 0 && ++ rec->flags & FTRACE_FL_REGS && ++ ops->flags & FTRACE_OPS_FL_SAVE_REGS) { ++ if (!test_rec_ops_needs_regs(rec)) ++ rec->flags &= ~FTRACE_FL_REGS; ++ } ++ ++ /* ++ * The TRAMP needs to be set only if rec count ++ * is decremented to one, and the ops that is ++ * left has a trampoline. As TRAMP can only be ++ * enabled if there is only a single ops attached ++ * to it. ++ */ ++ if (ftrace_rec_count(rec) == 1 && ++ ftrace_find_tramp_ops_any(rec)) ++ rec->flags |= FTRACE_FL_TRAMP; ++ else ++ rec->flags &= ~FTRACE_FL_TRAMP; ++ ++ /* ++ * flags will be cleared in ftrace_check_record() ++ * if rec count is zero. ++ */ ++ } ++ count++; ++ ++ /* Must match FTRACE_UPDATE_CALLS in ftrace_modify_all_code() */ ++ update |= ftrace_test_record(rec, 1) != FTRACE_UPDATE_IGNORE; ++ ++ /* Shortcut, if we handled all records, we are done. */ ++ if (!all && count == hash->count) ++ return update; ++ } while_for_each_ftrace_rec(); ++ ++ return update; ++} ++ ++static bool ftrace_hash_rec_disable(struct ftrace_ops *ops, ++ int filter_hash) ++{ ++ return __ftrace_hash_rec_update(ops, filter_hash, 0); ++} ++ ++static bool ftrace_hash_rec_enable(struct ftrace_ops *ops, ++ int filter_hash) ++{ ++ return __ftrace_hash_rec_update(ops, filter_hash, 1); ++} ++ ++static void ftrace_hash_rec_update_modify(struct ftrace_ops *ops, ++ int filter_hash, int inc) ++{ ++ struct ftrace_ops *op; ++ ++ __ftrace_hash_rec_update(ops, filter_hash, inc); ++ ++ if (ops->func_hash != &global_ops.local_hash) ++ return; ++ ++ /* ++ * If the ops shares the global_ops hash, then we need to update ++ * all ops that are enabled and use this hash. ++ */ ++ do_for_each_ftrace_op(op, ftrace_ops_list) { ++ /* Already done */ ++ if (op == ops) ++ continue; ++ if (op->func_hash == &global_ops.local_hash) ++ __ftrace_hash_rec_update(op, filter_hash, inc); ++ } while_for_each_ftrace_op(op); ++} ++ ++static void ftrace_hash_rec_disable_modify(struct ftrace_ops *ops, ++ int filter_hash) ++{ ++ ftrace_hash_rec_update_modify(ops, filter_hash, 0); ++} ++ ++static void ftrace_hash_rec_enable_modify(struct ftrace_ops *ops, ++ int filter_hash) ++{ ++ ftrace_hash_rec_update_modify(ops, filter_hash, 1); ++} ++ ++/* ++ * Try to update IPMODIFY flag on each ftrace_rec. Return 0 if it is OK ++ * or no-needed to update, -EBUSY if it detects a conflict of the flag ++ * on a ftrace_rec, and -EINVAL if the new_hash tries to trace all recs. ++ * Note that old_hash and new_hash has below meanings ++ * - If the hash is NULL, it hits all recs (if IPMODIFY is set, this is rejected) ++ * - If the hash is EMPTY_HASH, it hits nothing ++ * - Anything else hits the recs which match the hash entries. ++ */ ++static int __ftrace_hash_update_ipmodify(struct ftrace_ops *ops, ++ struct ftrace_hash *old_hash, ++ struct ftrace_hash *new_hash) ++{ ++ struct ftrace_page *pg; ++ struct dyn_ftrace *rec, *end = NULL; ++ int in_old, in_new; ++ ++ /* Only update if the ops has been registered */ ++ if (!(ops->flags & FTRACE_OPS_FL_ENABLED)) ++ return 0; ++ ++ if (!(ops->flags & FTRACE_OPS_FL_IPMODIFY)) ++ return 0; ++ ++ /* ++ * Since the IPMODIFY is a very address sensitive action, we do not ++ * allow ftrace_ops to set all functions to new hash. ++ */ ++ if (!new_hash || !old_hash) ++ return -EINVAL; ++ ++ /* Update rec->flags */ ++ do_for_each_ftrace_rec(pg, rec) { ++ ++ if (rec->flags & FTRACE_FL_DISABLED) ++ continue; ++ ++ /* We need to update only differences of filter_hash */ ++ in_old = !!ftrace_lookup_ip(old_hash, rec->ip); ++ in_new = !!ftrace_lookup_ip(new_hash, rec->ip); ++ if (in_old == in_new) ++ continue; ++ ++ if (in_new) { ++ /* New entries must ensure no others are using it */ ++ if (rec->flags & FTRACE_FL_IPMODIFY) ++ goto rollback; ++ rec->flags |= FTRACE_FL_IPMODIFY; ++ } else /* Removed entry */ ++ rec->flags &= ~FTRACE_FL_IPMODIFY; ++ } while_for_each_ftrace_rec(); ++ ++ return 0; ++ ++rollback: ++ end = rec; ++ ++ /* Roll back what we did above */ ++ do_for_each_ftrace_rec(pg, rec) { ++ ++ if (rec->flags & FTRACE_FL_DISABLED) ++ continue; ++ ++ if (rec == end) ++ goto err_out; ++ ++ in_old = !!ftrace_lookup_ip(old_hash, rec->ip); ++ in_new = !!ftrace_lookup_ip(new_hash, rec->ip); ++ if (in_old == in_new) ++ continue; ++ ++ if (in_new) ++ rec->flags &= ~FTRACE_FL_IPMODIFY; ++ else ++ rec->flags |= FTRACE_FL_IPMODIFY; ++ } while_for_each_ftrace_rec(); ++ ++err_out: ++ return -EBUSY; ++} ++ ++static int ftrace_hash_ipmodify_enable(struct ftrace_ops *ops) ++{ ++ struct ftrace_hash *hash = ops->func_hash->filter_hash; ++ ++ if (ftrace_hash_empty(hash)) ++ hash = NULL; ++ ++ return __ftrace_hash_update_ipmodify(ops, EMPTY_HASH, hash); ++} ++ ++/* Disabling always succeeds */ ++static void ftrace_hash_ipmodify_disable(struct ftrace_ops *ops) ++{ ++ struct ftrace_hash *hash = ops->func_hash->filter_hash; ++ ++ if (ftrace_hash_empty(hash)) ++ hash = NULL; ++ ++ __ftrace_hash_update_ipmodify(ops, hash, EMPTY_HASH); ++} ++ ++static int ftrace_hash_ipmodify_update(struct ftrace_ops *ops, ++ struct ftrace_hash *new_hash) ++{ ++ struct ftrace_hash *old_hash = ops->func_hash->filter_hash; ++ ++ if (ftrace_hash_empty(old_hash)) ++ old_hash = NULL; ++ ++ if (ftrace_hash_empty(new_hash)) ++ new_hash = NULL; ++ ++ return __ftrace_hash_update_ipmodify(ops, old_hash, new_hash); ++} ++ ++static void print_ip_ins(const char *fmt, const unsigned char *p) ++{ ++ int i; ++ ++ printk(KERN_CONT "%s", fmt); ++ ++ for (i = 0; i < MCOUNT_INSN_SIZE; i++) ++ printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]); ++} ++ ++enum ftrace_bug_type ftrace_bug_type; ++const void *ftrace_expected; ++ ++static void print_bug_type(void) ++{ ++ switch (ftrace_bug_type) { ++ case FTRACE_BUG_UNKNOWN: ++ break; ++ case FTRACE_BUG_INIT: ++ pr_info("Initializing ftrace call sites\n"); ++ break; ++ case FTRACE_BUG_NOP: ++ pr_info("Setting ftrace call site to NOP\n"); ++ break; ++ case FTRACE_BUG_CALL: ++ pr_info("Setting ftrace call site to call ftrace function\n"); ++ break; ++ case FTRACE_BUG_UPDATE: ++ pr_info("Updating ftrace call site to call a different ftrace function\n"); ++ break; ++ } ++} ++ ++/** ++ * ftrace_bug - report and shutdown function tracer ++ * @failed: The failed type (EFAULT, EINVAL, EPERM) ++ * @rec: The record that failed ++ * ++ * The arch code that enables or disables the function tracing ++ * can call ftrace_bug() when it has detected a problem in ++ * modifying the code. @failed should be one of either: ++ * EFAULT - if the problem happens on reading the @ip address ++ * EINVAL - if what is read at @ip is not what was expected ++ * EPERM - if the problem happens on writting to the @ip address ++ */ ++void ftrace_bug(int failed, struct dyn_ftrace *rec) ++{ ++ unsigned long ip = rec ? rec->ip : 0; ++ ++ switch (failed) { ++ case -EFAULT: ++ FTRACE_WARN_ON_ONCE(1); ++ pr_info("ftrace faulted on modifying "); ++ print_ip_sym(ip); ++ break; ++ case -EINVAL: ++ FTRACE_WARN_ON_ONCE(1); ++ pr_info("ftrace failed to modify "); ++ print_ip_sym(ip); ++ print_ip_ins(" actual: ", (unsigned char *)ip); ++ pr_cont("\n"); ++ if (ftrace_expected) { ++ print_ip_ins(" expected: ", ftrace_expected); ++ pr_cont("\n"); ++ } ++ break; ++ case -EPERM: ++ FTRACE_WARN_ON_ONCE(1); ++ pr_info("ftrace faulted on writing "); ++ print_ip_sym(ip); ++ break; ++ default: ++ FTRACE_WARN_ON_ONCE(1); ++ pr_info("ftrace faulted on unknown error "); ++ print_ip_sym(ip); ++ } ++ print_bug_type(); ++ if (rec) { ++ struct ftrace_ops *ops = NULL; ++ ++ pr_info("ftrace record flags: %lx\n", rec->flags); ++ pr_cont(" (%ld)%s", ftrace_rec_count(rec), ++ rec->flags & FTRACE_FL_REGS ? " R" : " "); ++ if (rec->flags & FTRACE_FL_TRAMP_EN) { ++ ops = ftrace_find_tramp_ops_any(rec); ++ if (ops) { ++ do { ++ pr_cont("\ttramp: %pS (%pS)", ++ (void *)ops->trampoline, ++ (void *)ops->func); ++ ops = ftrace_find_tramp_ops_next(rec, ops); ++ } while (ops); ++ } else ++ pr_cont("\ttramp: ERROR!"); ++ ++ } ++ ip = ftrace_get_addr_curr(rec); ++ pr_cont("\n expected tramp: %lx\n", ip); ++ } ++} ++ ++static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update) ++{ ++ unsigned long flag = 0UL; ++ ++ ftrace_bug_type = FTRACE_BUG_UNKNOWN; ++ ++ if (rec->flags & FTRACE_FL_DISABLED) ++ return FTRACE_UPDATE_IGNORE; ++ ++ /* ++ * If we are updating calls: ++ * ++ * If the record has a ref count, then we need to enable it ++ * because someone is using it. ++ * ++ * Otherwise we make sure its disabled. ++ * ++ * If we are disabling calls, then disable all records that ++ * are enabled. ++ */ ++ if (enable && ftrace_rec_count(rec)) ++ flag = FTRACE_FL_ENABLED; ++ ++ /* ++ * If enabling and the REGS flag does not match the REGS_EN, or ++ * the TRAMP flag doesn't match the TRAMP_EN, then do not ignore ++ * this record. Set flags to fail the compare against ENABLED. ++ */ ++ if (flag) { ++ if (!(rec->flags & FTRACE_FL_REGS) != ++ !(rec->flags & FTRACE_FL_REGS_EN)) ++ flag |= FTRACE_FL_REGS; ++ ++ if (!(rec->flags & FTRACE_FL_TRAMP) != ++ !(rec->flags & FTRACE_FL_TRAMP_EN)) ++ flag |= FTRACE_FL_TRAMP; ++ } ++ ++ /* If the state of this record hasn't changed, then do nothing */ ++ if ((rec->flags & FTRACE_FL_ENABLED) == flag) ++ return FTRACE_UPDATE_IGNORE; ++ ++ if (flag) { ++ /* Save off if rec is being enabled (for return value) */ ++ flag ^= rec->flags & FTRACE_FL_ENABLED; ++ ++ if (update) { ++ rec->flags |= FTRACE_FL_ENABLED; ++ if (flag & FTRACE_FL_REGS) { ++ if (rec->flags & FTRACE_FL_REGS) ++ rec->flags |= FTRACE_FL_REGS_EN; ++ else ++ rec->flags &= ~FTRACE_FL_REGS_EN; ++ } ++ if (flag & FTRACE_FL_TRAMP) { ++ if (rec->flags & FTRACE_FL_TRAMP) ++ rec->flags |= FTRACE_FL_TRAMP_EN; ++ else ++ rec->flags &= ~FTRACE_FL_TRAMP_EN; ++ } ++ } ++ ++ /* ++ * If this record is being updated from a nop, then ++ * return UPDATE_MAKE_CALL. ++ * Otherwise, ++ * return UPDATE_MODIFY_CALL to tell the caller to convert ++ * from the save regs, to a non-save regs function or ++ * vice versa, or from a trampoline call. ++ */ ++ if (flag & FTRACE_FL_ENABLED) { ++ ftrace_bug_type = FTRACE_BUG_CALL; ++ return FTRACE_UPDATE_MAKE_CALL; ++ } ++ ++ ftrace_bug_type = FTRACE_BUG_UPDATE; ++ return FTRACE_UPDATE_MODIFY_CALL; ++ } ++ ++ if (update) { ++ /* If there's no more users, clear all flags */ ++ if (!ftrace_rec_count(rec)) ++ rec->flags = 0; ++ else ++ /* ++ * Just disable the record, but keep the ops TRAMP ++ * and REGS states. The _EN flags must be disabled though. ++ */ ++ rec->flags &= ~(FTRACE_FL_ENABLED | FTRACE_FL_TRAMP_EN | ++ FTRACE_FL_REGS_EN); ++ } ++ ++ ftrace_bug_type = FTRACE_BUG_NOP; ++ return FTRACE_UPDATE_MAKE_NOP; ++} ++ ++/** ++ * ftrace_update_record, set a record that now is tracing or not ++ * @rec: the record to update ++ * @enable: set to 1 if the record is tracing, zero to force disable ++ * ++ * The records that represent all functions that can be traced need ++ * to be updated when tracing has been enabled. ++ */ ++int ftrace_update_record(struct dyn_ftrace *rec, int enable) ++{ ++ return ftrace_check_record(rec, enable, 1); ++} ++ ++/** ++ * ftrace_test_record, check if the record has been enabled or not ++ * @rec: the record to test ++ * @enable: set to 1 to check if enabled, 0 if it is disabled ++ * ++ * The arch code may need to test if a record is already set to ++ * tracing to determine how to modify the function code that it ++ * represents. ++ */ ++int ftrace_test_record(struct dyn_ftrace *rec, int enable) ++{ ++ return ftrace_check_record(rec, enable, 0); ++} ++ ++static struct ftrace_ops * ++ftrace_find_tramp_ops_any(struct dyn_ftrace *rec) ++{ ++ struct ftrace_ops *op; ++ unsigned long ip = rec->ip; ++ ++ do_for_each_ftrace_op(op, ftrace_ops_list) { ++ ++ if (!op->trampoline) ++ continue; ++ ++ if (hash_contains_ip(ip, op->func_hash)) ++ return op; ++ } while_for_each_ftrace_op(op); ++ ++ return NULL; ++} ++ ++static struct ftrace_ops * ++ftrace_find_tramp_ops_next(struct dyn_ftrace *rec, ++ struct ftrace_ops *op) ++{ ++ unsigned long ip = rec->ip; ++ ++ while_for_each_ftrace_op(op) { ++ ++ if (!op->trampoline) ++ continue; ++ ++ if (hash_contains_ip(ip, op->func_hash)) ++ return op; ++ } ++ ++ return NULL; ++} ++ ++static struct ftrace_ops * ++ftrace_find_tramp_ops_curr(struct dyn_ftrace *rec) ++{ ++ struct ftrace_ops *op; ++ unsigned long ip = rec->ip; ++ ++ /* ++ * Need to check removed ops first. ++ * If they are being removed, and this rec has a tramp, ++ * and this rec is in the ops list, then it would be the ++ * one with the tramp. ++ */ ++ if (removed_ops) { ++ if (hash_contains_ip(ip, &removed_ops->old_hash)) ++ return removed_ops; ++ } ++ ++ /* ++ * Need to find the current trampoline for a rec. ++ * Now, a trampoline is only attached to a rec if there ++ * was a single 'ops' attached to it. But this can be called ++ * when we are adding another op to the rec or removing the ++ * current one. Thus, if the op is being added, we can ++ * ignore it because it hasn't attached itself to the rec ++ * yet. ++ * ++ * If an ops is being modified (hooking to different functions) ++ * then we don't care about the new functions that are being ++ * added, just the old ones (that are probably being removed). ++ * ++ * If we are adding an ops to a function that already is using ++ * a trampoline, it needs to be removed (trampolines are only ++ * for single ops connected), then an ops that is not being ++ * modified also needs to be checked. ++ */ ++ do_for_each_ftrace_op(op, ftrace_ops_list) { ++ ++ if (!op->trampoline) ++ continue; ++ ++ /* ++ * If the ops is being added, it hasn't gotten to ++ * the point to be removed from this tree yet. ++ */ ++ if (op->flags & FTRACE_OPS_FL_ADDING) ++ continue; ++ ++ ++ /* ++ * If the ops is being modified and is in the old ++ * hash, then it is probably being removed from this ++ * function. ++ */ ++ if ((op->flags & FTRACE_OPS_FL_MODIFYING) && ++ hash_contains_ip(ip, &op->old_hash)) ++ return op; ++ /* ++ * If the ops is not being added or modified, and it's ++ * in its normal filter hash, then this must be the one ++ * we want! ++ */ ++ if (!(op->flags & FTRACE_OPS_FL_MODIFYING) && ++ hash_contains_ip(ip, op->func_hash)) ++ return op; ++ ++ } while_for_each_ftrace_op(op); ++ ++ return NULL; ++} ++ ++static struct ftrace_ops * ++ftrace_find_tramp_ops_new(struct dyn_ftrace *rec) ++{ ++ struct ftrace_ops *op; ++ unsigned long ip = rec->ip; ++ ++ do_for_each_ftrace_op(op, ftrace_ops_list) { ++ /* pass rec in as regs to have non-NULL val */ ++ if (hash_contains_ip(ip, op->func_hash)) ++ return op; ++ } while_for_each_ftrace_op(op); ++ ++ return NULL; ++} ++ ++/** ++ * ftrace_get_addr_new - Get the call address to set to ++ * @rec: The ftrace record descriptor ++ * ++ * If the record has the FTRACE_FL_REGS set, that means that it ++ * wants to convert to a callback that saves all regs. If FTRACE_FL_REGS ++ * is not not set, then it wants to convert to the normal callback. ++ * ++ * Returns the address of the trampoline to set to ++ */ ++unsigned long ftrace_get_addr_new(struct dyn_ftrace *rec) ++{ ++ struct ftrace_ops *ops; ++ ++ /* Trampolines take precedence over regs */ ++ if (rec->flags & FTRACE_FL_TRAMP) { ++ ops = ftrace_find_tramp_ops_new(rec); ++ if (FTRACE_WARN_ON(!ops || !ops->trampoline)) { ++ pr_warn("Bad trampoline accounting at: %p (%pS) (%lx)\n", ++ (void *)rec->ip, (void *)rec->ip, rec->flags); ++ /* Ftrace is shutting down, return anything */ ++ return (unsigned long)FTRACE_ADDR; ++ } ++ return ops->trampoline; ++ } ++ ++ if (rec->flags & FTRACE_FL_REGS) ++ return (unsigned long)FTRACE_REGS_ADDR; ++ else ++ return (unsigned long)FTRACE_ADDR; ++} ++ ++/** ++ * ftrace_get_addr_curr - Get the call address that is already there ++ * @rec: The ftrace record descriptor ++ * ++ * The FTRACE_FL_REGS_EN is set when the record already points to ++ * a function that saves all the regs. Basically the '_EN' version ++ * represents the current state of the function. ++ * ++ * Returns the address of the trampoline that is currently being called ++ */ ++unsigned long ftrace_get_addr_curr(struct dyn_ftrace *rec) ++{ ++ struct ftrace_ops *ops; ++ ++ /* Trampolines take precedence over regs */ ++ if (rec->flags & FTRACE_FL_TRAMP_EN) { ++ ops = ftrace_find_tramp_ops_curr(rec); ++ if (FTRACE_WARN_ON(!ops)) { ++ pr_warn("Bad trampoline accounting at: %p (%pS)\n", ++ (void *)rec->ip, (void *)rec->ip); ++ /* Ftrace is shutting down, return anything */ ++ return (unsigned long)FTRACE_ADDR; ++ } ++ return ops->trampoline; ++ } ++ ++ if (rec->flags & FTRACE_FL_REGS_EN) ++ return (unsigned long)FTRACE_REGS_ADDR; ++ else ++ return (unsigned long)FTRACE_ADDR; ++} ++ ++static int ++__ftrace_replace_code(struct dyn_ftrace *rec, int enable) ++{ ++ unsigned long ftrace_old_addr; ++ unsigned long ftrace_addr; ++ int ret; ++ ++ ftrace_addr = ftrace_get_addr_new(rec); ++ ++ /* This needs to be done before we call ftrace_update_record */ ++ ftrace_old_addr = ftrace_get_addr_curr(rec); ++ ++ ret = ftrace_update_record(rec, enable); ++ ++ ftrace_bug_type = FTRACE_BUG_UNKNOWN; ++ ++ switch (ret) { ++ case FTRACE_UPDATE_IGNORE: ++ return 0; ++ ++ case FTRACE_UPDATE_MAKE_CALL: ++ ftrace_bug_type = FTRACE_BUG_CALL; ++ return ftrace_make_call(rec, ftrace_addr); ++ ++ case FTRACE_UPDATE_MAKE_NOP: ++ ftrace_bug_type = FTRACE_BUG_NOP; ++ return ftrace_make_nop(NULL, rec, ftrace_old_addr); ++ ++ case FTRACE_UPDATE_MODIFY_CALL: ++ ftrace_bug_type = FTRACE_BUG_UPDATE; ++ return ftrace_modify_call(rec, ftrace_old_addr, ftrace_addr); ++ } ++ ++ return -1; /* unknow ftrace bug */ ++} ++ ++void __weak ftrace_replace_code(int enable) ++{ ++ struct dyn_ftrace *rec; ++ struct ftrace_page *pg; ++ int failed; ++ ++ if (unlikely(ftrace_disabled)) ++ return; ++ ++ do_for_each_ftrace_rec(pg, rec) { ++ ++ if (rec->flags & FTRACE_FL_DISABLED) ++ continue; ++ ++ failed = __ftrace_replace_code(rec, enable); ++ if (failed) { ++ ftrace_bug(failed, rec); ++ /* Stop processing */ ++ return; ++ } ++ } while_for_each_ftrace_rec(); ++} ++ ++struct ftrace_rec_iter { ++ struct ftrace_page *pg; ++ int index; ++}; ++ ++/** ++ * ftrace_rec_iter_start, start up iterating over traced functions ++ * ++ * Returns an iterator handle that is used to iterate over all ++ * the records that represent address locations where functions ++ * are traced. ++ * ++ * May return NULL if no records are available. ++ */ ++struct ftrace_rec_iter *ftrace_rec_iter_start(void) ++{ ++ /* ++ * We only use a single iterator. ++ * Protected by the ftrace_lock mutex. ++ */ ++ static struct ftrace_rec_iter ftrace_rec_iter; ++ struct ftrace_rec_iter *iter = &ftrace_rec_iter; ++ ++ iter->pg = ftrace_pages_start; ++ iter->index = 0; ++ ++ /* Could have empty pages */ ++ while (iter->pg && !iter->pg->index) ++ iter->pg = iter->pg->next; ++ ++ if (!iter->pg) ++ return NULL; ++ ++ return iter; ++} ++ ++/** ++ * ftrace_rec_iter_next, get the next record to process. ++ * @iter: The handle to the iterator. ++ * ++ * Returns the next iterator after the given iterator @iter. ++ */ ++struct ftrace_rec_iter *ftrace_rec_iter_next(struct ftrace_rec_iter *iter) ++{ ++ iter->index++; ++ ++ if (iter->index >= iter->pg->index) { ++ iter->pg = iter->pg->next; ++ iter->index = 0; ++ ++ /* Could have empty pages */ ++ while (iter->pg && !iter->pg->index) ++ iter->pg = iter->pg->next; ++ } ++ ++ if (!iter->pg) ++ return NULL; ++ ++ return iter; ++} ++ ++/** ++ * ftrace_rec_iter_record, get the record at the iterator location ++ * @iter: The current iterator location ++ * ++ * Returns the record that the current @iter is at. ++ */ ++struct dyn_ftrace *ftrace_rec_iter_record(struct ftrace_rec_iter *iter) ++{ ++ return &iter->pg->records[iter->index]; ++} ++ ++static int ++ftrace_code_disable(struct module *mod, struct dyn_ftrace *rec) ++{ ++ int ret; ++ ++ if (unlikely(ftrace_disabled)) ++ return 0; ++ ++ ret = ftrace_make_nop(mod, rec, MCOUNT_ADDR); ++ if (ret) { ++ ftrace_bug_type = FTRACE_BUG_INIT; ++ ftrace_bug(ret, rec); ++ return 0; ++ } ++ return 1; ++} ++ ++/* ++ * archs can override this function if they must do something ++ * before the modifying code is performed. ++ */ ++int __weak ftrace_arch_code_modify_prepare(void) ++{ ++ return 0; ++} ++ ++/* ++ * archs can override this function if they must do something ++ * after the modifying code is performed. ++ */ ++int __weak ftrace_arch_code_modify_post_process(void) ++{ ++ return 0; ++} ++ ++void ftrace_modify_all_code(int command) ++{ ++ int update = command & FTRACE_UPDATE_TRACE_FUNC; ++ int err = 0; ++ ++ /* ++ * If the ftrace_caller calls a ftrace_ops func directly, ++ * we need to make sure that it only traces functions it ++ * expects to trace. When doing the switch of functions, ++ * we need to update to the ftrace_ops_list_func first ++ * before the transition between old and new calls are set, ++ * as the ftrace_ops_list_func will check the ops hashes ++ * to make sure the ops are having the right functions ++ * traced. ++ */ ++ if (update) { ++ err = ftrace_update_ftrace_func(ftrace_ops_list_func); ++ if (FTRACE_WARN_ON(err)) ++ return; ++ } ++ ++ if (command & FTRACE_UPDATE_CALLS) ++ ftrace_replace_code(1); ++ else if (command & FTRACE_DISABLE_CALLS) ++ ftrace_replace_code(0); ++ ++ if (update && ftrace_trace_function != ftrace_ops_list_func) { ++ function_trace_op = set_function_trace_op; ++ smp_wmb(); ++ /* If irqs are disabled, we are in stop machine */ ++ if (!irqs_disabled()) ++ smp_call_function(ftrace_sync_ipi, NULL, 1); ++ err = ftrace_update_ftrace_func(ftrace_trace_function); ++ if (FTRACE_WARN_ON(err)) ++ return; ++ } ++ ++ if (command & FTRACE_START_FUNC_RET) ++ err = ftrace_enable_ftrace_graph_caller(); ++ else if (command & FTRACE_STOP_FUNC_RET) ++ err = ftrace_disable_ftrace_graph_caller(); ++ FTRACE_WARN_ON(err); ++} ++ ++static int __ftrace_modify_code(void *data) ++{ ++ int *command = data; ++ ++ ftrace_modify_all_code(*command); ++ ++ return 0; ++} ++ ++/** ++ * ftrace_run_stop_machine, go back to the stop machine method ++ * @command: The command to tell ftrace what to do ++ * ++ * If an arch needs to fall back to the stop machine method, the ++ * it can call this function. ++ */ ++void ftrace_run_stop_machine(int command) ++{ ++ stop_machine(__ftrace_modify_code, &command, NULL); ++} ++ ++/** ++ * arch_ftrace_update_code, modify the code to trace or not trace ++ * @command: The command that needs to be done ++ * ++ * Archs can override this function if it does not need to ++ * run stop_machine() to modify code. ++ */ ++void __weak arch_ftrace_update_code(int command) ++{ ++ ftrace_run_stop_machine(command); ++} ++ ++static void ftrace_run_update_code(int command) ++{ ++ int ret; ++ ++ ret = ftrace_arch_code_modify_prepare(); ++ FTRACE_WARN_ON(ret); ++ if (ret) ++ return; ++ ++ /* ++ * By default we use stop_machine() to modify the code. ++ * But archs can do what ever they want as long as it ++ * is safe. The stop_machine() is the safest, but also ++ * produces the most overhead. ++ */ ++ arch_ftrace_update_code(command); ++ ++ ret = ftrace_arch_code_modify_post_process(); ++ FTRACE_WARN_ON(ret); ++} ++ ++static void ftrace_run_modify_code(struct ftrace_ops *ops, int command, ++ struct ftrace_ops_hash *old_hash) ++{ ++ ops->flags |= FTRACE_OPS_FL_MODIFYING; ++ ops->old_hash.filter_hash = old_hash->filter_hash; ++ ops->old_hash.notrace_hash = old_hash->notrace_hash; ++ ftrace_run_update_code(command); ++ ops->old_hash.filter_hash = NULL; ++ ops->old_hash.notrace_hash = NULL; ++ ops->flags &= ~FTRACE_OPS_FL_MODIFYING; ++} ++ ++static ftrace_func_t saved_ftrace_func; ++static int ftrace_start_up; ++ ++void __weak arch_ftrace_trampoline_free(struct ftrace_ops *ops) ++{ ++} ++ ++static void ftrace_startup_enable(int command) ++{ ++ if (saved_ftrace_func != ftrace_trace_function) { ++ saved_ftrace_func = ftrace_trace_function; ++ command |= FTRACE_UPDATE_TRACE_FUNC; ++ } ++ ++ if (!command || !ftrace_enabled) ++ return; ++ ++ ftrace_run_update_code(command); ++} ++ ++static void ftrace_startup_all(int command) ++{ ++ update_all_ops = true; ++ ftrace_startup_enable(command); ++ update_all_ops = false; ++} ++ ++static int ftrace_startup(struct ftrace_ops *ops, int command) ++{ ++ int ret; ++ ++ if (unlikely(ftrace_disabled)) ++ return -ENODEV; ++ ++ ret = __register_ftrace_function(ops); ++ if (ret) ++ return ret; ++ ++ ftrace_start_up++; ++ ++ /* ++ * Note that ftrace probes uses this to start up ++ * and modify functions it will probe. But we still ++ * set the ADDING flag for modification, as probes ++ * do not have trampolines. If they add them in the ++ * future, then the probes will need to distinguish ++ * between adding and updating probes. ++ */ ++ ops->flags |= FTRACE_OPS_FL_ENABLED | FTRACE_OPS_FL_ADDING; ++ ++ ret = ftrace_hash_ipmodify_enable(ops); ++ if (ret < 0) { ++ /* Rollback registration process */ ++ __unregister_ftrace_function(ops); ++ ftrace_start_up--; ++ ops->flags &= ~FTRACE_OPS_FL_ENABLED; ++ return ret; ++ } ++ ++ if (ftrace_hash_rec_enable(ops, 1)) ++ command |= FTRACE_UPDATE_CALLS; ++ ++ ftrace_startup_enable(command); ++ ++ ops->flags &= ~FTRACE_OPS_FL_ADDING; ++ ++ return 0; ++} ++ ++static int ftrace_shutdown(struct ftrace_ops *ops, int command) ++{ ++ int ret; ++ ++ if (unlikely(ftrace_disabled)) ++ return -ENODEV; ++ ++ ret = __unregister_ftrace_function(ops); ++ if (ret) ++ return ret; ++ ++ ftrace_start_up--; ++ /* ++ * Just warn in case of unbalance, no need to kill ftrace, it's not ++ * critical but the ftrace_call callers may be never nopped again after ++ * further ftrace uses. ++ */ ++ WARN_ON_ONCE(ftrace_start_up < 0); ++ ++ /* Disabling ipmodify never fails */ ++ ftrace_hash_ipmodify_disable(ops); ++ ++ if (ftrace_hash_rec_disable(ops, 1)) ++ command |= FTRACE_UPDATE_CALLS; ++ ++ ops->flags &= ~FTRACE_OPS_FL_ENABLED; ++ ++ if (saved_ftrace_func != ftrace_trace_function) { ++ saved_ftrace_func = ftrace_trace_function; ++ command |= FTRACE_UPDATE_TRACE_FUNC; ++ } ++ ++ if (!command || !ftrace_enabled) { ++ /* ++ * If these are dynamic or per_cpu ops, they still ++ * need their data freed. Since, function tracing is ++ * not currently active, we can just free them ++ * without synchronizing all CPUs. ++ */ ++ if (ops->flags & FTRACE_OPS_FL_DYNAMIC) ++ goto free_ops; ++ ++ return 0; ++ } ++ ++ /* ++ * If the ops uses a trampoline, then it needs to be ++ * tested first on update. ++ */ ++ ops->flags |= FTRACE_OPS_FL_REMOVING; ++ removed_ops = ops; ++ ++ /* The trampoline logic checks the old hashes */ ++ ops->old_hash.filter_hash = ops->func_hash->filter_hash; ++ ops->old_hash.notrace_hash = ops->func_hash->notrace_hash; ++ ++ ftrace_run_update_code(command); ++ ++ /* ++ * If there's no more ops registered with ftrace, run a ++ * sanity check to make sure all rec flags are cleared. ++ */ ++ if (rcu_dereference_protected(ftrace_ops_list, ++ lockdep_is_held(&ftrace_lock)) == &ftrace_list_end) { ++ struct ftrace_page *pg; ++ struct dyn_ftrace *rec; ++ ++ do_for_each_ftrace_rec(pg, rec) { ++ if (FTRACE_WARN_ON_ONCE(rec->flags & ~FTRACE_FL_DISABLED)) ++ pr_warn(" %pS flags:%lx\n", ++ (void *)rec->ip, rec->flags); ++ } while_for_each_ftrace_rec(); ++ } ++ ++ ops->old_hash.filter_hash = NULL; ++ ops->old_hash.notrace_hash = NULL; ++ ++ removed_ops = NULL; ++ ops->flags &= ~FTRACE_OPS_FL_REMOVING; ++ ++ /* ++ * Dynamic ops may be freed, we must make sure that all ++ * callers are done before leaving this function. ++ * The same goes for freeing the per_cpu data of the per_cpu ++ * ops. ++ */ ++ if (ops->flags & FTRACE_OPS_FL_DYNAMIC) { ++ /* ++ * We need to do a hard force of sched synchronization. ++ * This is because we use preempt_disable() to do RCU, but ++ * the function tracers can be called where RCU is not watching ++ * (like before user_exit()). We can not rely on the RCU ++ * infrastructure to do the synchronization, thus we must do it ++ * ourselves. ++ */ ++ schedule_on_each_cpu(ftrace_sync); ++ ++ /* ++ * When the kernel is preeptive, tasks can be preempted ++ * while on a ftrace trampoline. Just scheduling a task on ++ * a CPU is not good enough to flush them. Calling ++ * synchornize_rcu_tasks() will wait for those tasks to ++ * execute and either schedule voluntarily or enter user space. ++ */ ++ if (IS_ENABLED(CONFIG_PREEMPT)) ++ synchronize_rcu_tasks(); ++ ++ free_ops: ++ arch_ftrace_trampoline_free(ops); ++ } ++ ++ return 0; ++} ++ ++static void ftrace_startup_sysctl(void) ++{ ++ int command; ++ ++ if (unlikely(ftrace_disabled)) ++ return; ++ ++ /* Force update next time */ ++ saved_ftrace_func = NULL; ++ /* ftrace_start_up is true if we want ftrace running */ ++ if (ftrace_start_up) { ++ command = FTRACE_UPDATE_CALLS; ++ if (ftrace_graph_active) ++ command |= FTRACE_START_FUNC_RET; ++ ftrace_startup_enable(command); ++ } ++} ++ ++static void ftrace_shutdown_sysctl(void) ++{ ++ int command; ++ ++ if (unlikely(ftrace_disabled)) ++ return; ++ ++ /* ftrace_start_up is true if ftrace is running */ ++ if (ftrace_start_up) { ++ command = FTRACE_DISABLE_CALLS; ++ if (ftrace_graph_active) ++ command |= FTRACE_STOP_FUNC_RET; ++ ftrace_run_update_code(command); ++ } ++} ++ ++static u64 ftrace_update_time; ++unsigned long ftrace_update_tot_cnt; ++ ++static inline int ops_traces_mod(struct ftrace_ops *ops) ++{ ++ /* ++ * Filter_hash being empty will default to trace module. ++ * But notrace hash requires a test of individual module functions. ++ */ ++ return ftrace_hash_empty(ops->func_hash->filter_hash) && ++ ftrace_hash_empty(ops->func_hash->notrace_hash); ++} ++ ++/* ++ * Check if the current ops references the record. ++ * ++ * If the ops traces all functions, then it was already accounted for. ++ * If the ops does not trace the current record function, skip it. ++ * If the ops ignores the function via notrace filter, skip it. ++ */ ++static inline bool ++ops_references_rec(struct ftrace_ops *ops, struct dyn_ftrace *rec) ++{ ++ /* If ops isn't enabled, ignore it */ ++ if (!(ops->flags & FTRACE_OPS_FL_ENABLED)) ++ return false; ++ ++ /* If ops traces all then it includes this function */ ++ if (ops_traces_mod(ops)) ++ return true; ++ ++ /* The function must be in the filter */ ++ if (!ftrace_hash_empty(ops->func_hash->filter_hash) && ++ !__ftrace_lookup_ip(ops->func_hash->filter_hash, rec->ip)) ++ return false; ++ ++ /* If in notrace hash, we ignore it too */ ++ if (ftrace_lookup_ip(ops->func_hash->notrace_hash, rec->ip)) ++ return false; ++ ++ return true; ++} ++ ++static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs) ++{ ++ struct ftrace_page *pg; ++ struct dyn_ftrace *p; ++ u64 start, stop; ++ unsigned long update_cnt = 0; ++ unsigned long rec_flags = 0; ++ int i; ++ ++ start = ftrace_now(raw_smp_processor_id()); ++ ++ /* ++ * When a module is loaded, this function is called to convert ++ * the calls to mcount in its text to nops, and also to create ++ * an entry in the ftrace data. Now, if ftrace is activated ++ * after this call, but before the module sets its text to ++ * read-only, the modification of enabling ftrace can fail if ++ * the read-only is done while ftrace is converting the calls. ++ * To prevent this, the module's records are set as disabled ++ * and will be enabled after the call to set the module's text ++ * to read-only. ++ */ ++ if (mod) ++ rec_flags |= FTRACE_FL_DISABLED; ++ ++ for (pg = new_pgs; pg; pg = pg->next) { ++ ++ for (i = 0; i < pg->index; i++) { ++ ++ /* If something went wrong, bail without enabling anything */ ++ if (unlikely(ftrace_disabled)) ++ return -1; ++ ++ p = &pg->records[i]; ++ p->flags = rec_flags; ++ ++ /* ++ * Do the initial record conversion from mcount jump ++ * to the NOP instructions. ++ */ ++ if (!__is_defined(CC_USING_NOP_MCOUNT) && ++ !ftrace_code_disable(mod, p)) ++ break; ++ ++ update_cnt++; ++ } ++ } ++ ++ stop = ftrace_now(raw_smp_processor_id()); ++ ftrace_update_time = stop - start; ++ ftrace_update_tot_cnt += update_cnt; ++ ++ return 0; ++} ++ ++static int ftrace_allocate_records(struct ftrace_page *pg, int count) ++{ ++ int order; ++ int cnt; ++ ++ if (WARN_ON(!count)) ++ return -EINVAL; ++ ++ order = get_count_order(DIV_ROUND_UP(count, ENTRIES_PER_PAGE)); ++ ++ /* ++ * We want to fill as much as possible. No more than a page ++ * may be empty. ++ */ ++ while ((PAGE_SIZE << order) / ENTRY_SIZE >= count + ENTRIES_PER_PAGE) ++ order--; ++ ++ again: ++ pg->records = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, order); ++ ++ if (!pg->records) { ++ /* if we can't allocate this size, try something smaller */ ++ if (!order) ++ return -ENOMEM; ++ order >>= 1; ++ goto again; ++ } ++ ++ cnt = (PAGE_SIZE << order) / ENTRY_SIZE; ++ pg->size = cnt; ++ ++ if (cnt > count) ++ cnt = count; ++ ++ return cnt; ++} ++ ++static struct ftrace_page * ++ftrace_allocate_pages(unsigned long num_to_init) ++{ ++ struct ftrace_page *start_pg; ++ struct ftrace_page *pg; ++ int order; ++ int cnt; ++ ++ if (!num_to_init) ++ return 0; ++ ++ start_pg = pg = kzalloc(sizeof(*pg), GFP_KERNEL); ++ if (!pg) ++ return NULL; ++ ++ /* ++ * Try to allocate as much as possible in one continues ++ * location that fills in all of the space. We want to ++ * waste as little space as possible. ++ */ ++ for (;;) { ++ cnt = ftrace_allocate_records(pg, num_to_init); ++ if (cnt < 0) ++ goto free_pages; ++ ++ num_to_init -= cnt; ++ if (!num_to_init) ++ break; ++ ++ pg->next = kzalloc(sizeof(*pg), GFP_KERNEL); ++ if (!pg->next) ++ goto free_pages; ++ ++ pg = pg->next; ++ } ++ ++ return start_pg; ++ ++ free_pages: ++ pg = start_pg; ++ while (pg) { ++ order = get_count_order(pg->size / ENTRIES_PER_PAGE); ++ free_pages((unsigned long)pg->records, order); ++ start_pg = pg->next; ++ kfree(pg); ++ pg = start_pg; ++ } ++ pr_info("ftrace: FAILED to allocate memory for functions\n"); ++ return NULL; ++} ++ ++#define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */ ++ ++struct ftrace_iterator { ++ loff_t pos; ++ loff_t func_pos; ++ loff_t mod_pos; ++ struct ftrace_page *pg; ++ struct dyn_ftrace *func; ++ struct ftrace_func_probe *probe; ++ struct ftrace_func_entry *probe_entry; ++ struct trace_parser parser; ++ struct ftrace_hash *hash; ++ struct ftrace_ops *ops; ++ struct trace_array *tr; ++ struct list_head *mod_list; ++ int pidx; ++ int idx; ++ unsigned flags; ++}; ++ ++static void * ++t_probe_next(struct seq_file *m, loff_t *pos) ++{ ++ struct ftrace_iterator *iter = m->private; ++ struct trace_array *tr = iter->ops->private; ++ struct list_head *func_probes; ++ struct ftrace_hash *hash; ++ struct list_head *next; ++ struct hlist_node *hnd = NULL; ++ struct hlist_head *hhd; ++ int size; ++ ++ (*pos)++; ++ iter->pos = *pos; ++ ++ if (!tr) ++ return NULL; ++ ++ func_probes = &tr->func_probes; ++ if (list_empty(func_probes)) ++ return NULL; ++ ++ if (!iter->probe) { ++ next = func_probes->next; ++ iter->probe = list_entry(next, struct ftrace_func_probe, list); ++ } ++ ++ if (iter->probe_entry) ++ hnd = &iter->probe_entry->hlist; ++ ++ hash = iter->probe->ops.func_hash->filter_hash; ++ ++ /* ++ * A probe being registered may temporarily have an empty hash ++ * and it's at the end of the func_probes list. ++ */ ++ if (!hash || hash == EMPTY_HASH) ++ return NULL; ++ ++ size = 1 << hash->size_bits; ++ ++ retry: ++ if (iter->pidx >= size) { ++ if (iter->probe->list.next == func_probes) ++ return NULL; ++ next = iter->probe->list.next; ++ iter->probe = list_entry(next, struct ftrace_func_probe, list); ++ hash = iter->probe->ops.func_hash->filter_hash; ++ size = 1 << hash->size_bits; ++ iter->pidx = 0; ++ } ++ ++ hhd = &hash->buckets[iter->pidx]; ++ ++ if (hlist_empty(hhd)) { ++ iter->pidx++; ++ hnd = NULL; ++ goto retry; ++ } ++ ++ if (!hnd) ++ hnd = hhd->first; ++ else { ++ hnd = hnd->next; ++ if (!hnd) { ++ iter->pidx++; ++ goto retry; ++ } ++ } ++ ++ if (WARN_ON_ONCE(!hnd)) ++ return NULL; ++ ++ iter->probe_entry = hlist_entry(hnd, struct ftrace_func_entry, hlist); ++ ++ return iter; ++} ++ ++static void *t_probe_start(struct seq_file *m, loff_t *pos) ++{ ++ struct ftrace_iterator *iter = m->private; ++ void *p = NULL; ++ loff_t l; ++ ++ if (!(iter->flags & FTRACE_ITER_DO_PROBES)) ++ return NULL; ++ ++ if (iter->mod_pos > *pos) ++ return NULL; ++ ++ iter->probe = NULL; ++ iter->probe_entry = NULL; ++ iter->pidx = 0; ++ for (l = 0; l <= (*pos - iter->mod_pos); ) { ++ p = t_probe_next(m, &l); ++ if (!p) ++ break; ++ } ++ if (!p) ++ return NULL; ++ ++ /* Only set this if we have an item */ ++ iter->flags |= FTRACE_ITER_PROBE; ++ ++ return iter; ++} ++ ++static int ++t_probe_show(struct seq_file *m, struct ftrace_iterator *iter) ++{ ++ struct ftrace_func_entry *probe_entry; ++ struct ftrace_probe_ops *probe_ops; ++ struct ftrace_func_probe *probe; ++ ++ probe = iter->probe; ++ probe_entry = iter->probe_entry; ++ ++ if (WARN_ON_ONCE(!probe || !probe_entry)) ++ return -EIO; ++ ++ probe_ops = probe->probe_ops; ++ ++ if (probe_ops->print) ++ return probe_ops->print(m, probe_entry->ip, probe_ops, probe->data); ++ ++ seq_printf(m, "%ps:%ps\n", (void *)probe_entry->ip, ++ (void *)probe_ops->func); ++ ++ return 0; ++} ++ ++static void * ++t_mod_next(struct seq_file *m, loff_t *pos) ++{ ++ struct ftrace_iterator *iter = m->private; ++ struct trace_array *tr = iter->tr; ++ ++ (*pos)++; ++ iter->pos = *pos; ++ ++ iter->mod_list = iter->mod_list->next; ++ ++ if (iter->mod_list == &tr->mod_trace || ++ iter->mod_list == &tr->mod_notrace) { ++ iter->flags &= ~FTRACE_ITER_MOD; ++ return NULL; ++ } ++ ++ iter->mod_pos = *pos; ++ ++ return iter; ++} ++ ++static void *t_mod_start(struct seq_file *m, loff_t *pos) ++{ ++ struct ftrace_iterator *iter = m->private; ++ void *p = NULL; ++ loff_t l; ++ ++ if (iter->func_pos > *pos) ++ return NULL; ++ ++ iter->mod_pos = iter->func_pos; ++ ++ /* probes are only available if tr is set */ ++ if (!iter->tr) ++ return NULL; ++ ++ for (l = 0; l <= (*pos - iter->func_pos); ) { ++ p = t_mod_next(m, &l); ++ if (!p) ++ break; ++ } ++ if (!p) { ++ iter->flags &= ~FTRACE_ITER_MOD; ++ return t_probe_start(m, pos); ++ } ++ ++ /* Only set this if we have an item */ ++ iter->flags |= FTRACE_ITER_MOD; ++ ++ return iter; ++} ++ ++static int ++t_mod_show(struct seq_file *m, struct ftrace_iterator *iter) ++{ ++ struct ftrace_mod_load *ftrace_mod; ++ struct trace_array *tr = iter->tr; ++ ++ if (WARN_ON_ONCE(!iter->mod_list) || ++ iter->mod_list == &tr->mod_trace || ++ iter->mod_list == &tr->mod_notrace) ++ return -EIO; ++ ++ ftrace_mod = list_entry(iter->mod_list, struct ftrace_mod_load, list); ++ ++ if (ftrace_mod->func) ++ seq_printf(m, "%s", ftrace_mod->func); ++ else ++ seq_putc(m, '*'); ++ ++ seq_printf(m, ":mod:%s\n", ftrace_mod->module); ++ ++ return 0; ++} ++ ++static void * ++t_func_next(struct seq_file *m, loff_t *pos) ++{ ++ struct ftrace_iterator *iter = m->private; ++ struct dyn_ftrace *rec = NULL; ++ ++ (*pos)++; ++ ++ retry: ++ if (iter->idx >= iter->pg->index) { ++ if (iter->pg->next) { ++ iter->pg = iter->pg->next; ++ iter->idx = 0; ++ goto retry; ++ } ++ } else { ++ rec = &iter->pg->records[iter->idx++]; ++ if (((iter->flags & (FTRACE_ITER_FILTER | FTRACE_ITER_NOTRACE)) && ++ !ftrace_lookup_ip(iter->hash, rec->ip)) || ++ ++ ((iter->flags & FTRACE_ITER_ENABLED) && ++ !(rec->flags & FTRACE_FL_ENABLED))) { ++ ++ rec = NULL; ++ goto retry; ++ } ++ } ++ ++ if (!rec) ++ return NULL; ++ ++ iter->pos = iter->func_pos = *pos; ++ iter->func = rec; ++ ++ return iter; ++} ++ ++static void * ++t_next(struct seq_file *m, void *v, loff_t *pos) ++{ ++ struct ftrace_iterator *iter = m->private; ++ loff_t l = *pos; /* t_probe_start() must use original pos */ ++ void *ret; ++ ++ if (unlikely(ftrace_disabled)) ++ return NULL; ++ ++ if (iter->flags & FTRACE_ITER_PROBE) ++ return t_probe_next(m, pos); ++ ++ if (iter->flags & FTRACE_ITER_MOD) ++ return t_mod_next(m, pos); ++ ++ if (iter->flags & FTRACE_ITER_PRINTALL) { ++ /* next must increment pos, and t_probe_start does not */ ++ (*pos)++; ++ return t_mod_start(m, &l); ++ } ++ ++ ret = t_func_next(m, pos); ++ ++ if (!ret) ++ return t_mod_start(m, &l); ++ ++ return ret; ++} ++ ++static void reset_iter_read(struct ftrace_iterator *iter) ++{ ++ iter->pos = 0; ++ iter->func_pos = 0; ++ iter->flags &= ~(FTRACE_ITER_PRINTALL | FTRACE_ITER_PROBE | FTRACE_ITER_MOD); ++} ++ ++static void *t_start(struct seq_file *m, loff_t *pos) ++{ ++ struct ftrace_iterator *iter = m->private; ++ void *p = NULL; ++ loff_t l; ++ ++ mutex_lock(&ftrace_lock); ++ ++ if (unlikely(ftrace_disabled)) ++ return NULL; ++ ++ /* ++ * If an lseek was done, then reset and start from beginning. ++ */ ++ if (*pos < iter->pos) ++ reset_iter_read(iter); ++ ++ /* ++ * For set_ftrace_filter reading, if we have the filter ++ * off, we can short cut and just print out that all ++ * functions are enabled. ++ */ ++ if ((iter->flags & (FTRACE_ITER_FILTER | FTRACE_ITER_NOTRACE)) && ++ ftrace_hash_empty(iter->hash)) { ++ iter->func_pos = 1; /* Account for the message */ ++ if (*pos > 0) ++ return t_mod_start(m, pos); ++ iter->flags |= FTRACE_ITER_PRINTALL; ++ /* reset in case of seek/pread */ ++ iter->flags &= ~FTRACE_ITER_PROBE; ++ return iter; ++ } ++ ++ if (iter->flags & FTRACE_ITER_MOD) ++ return t_mod_start(m, pos); ++ ++ /* ++ * Unfortunately, we need to restart at ftrace_pages_start ++ * every time we let go of the ftrace_mutex. This is because ++ * those pointers can change without the lock. ++ */ ++ iter->pg = ftrace_pages_start; ++ iter->idx = 0; ++ for (l = 0; l <= *pos; ) { ++ p = t_func_next(m, &l); ++ if (!p) ++ break; ++ } ++ ++ if (!p) ++ return t_mod_start(m, pos); ++ ++ return iter; ++} ++ ++static void t_stop(struct seq_file *m, void *p) ++{ ++ mutex_unlock(&ftrace_lock); ++} ++ ++void * __weak ++arch_ftrace_trampoline_func(struct ftrace_ops *ops, struct dyn_ftrace *rec) ++{ ++ return NULL; ++} ++ ++static void add_trampoline_func(struct seq_file *m, struct ftrace_ops *ops, ++ struct dyn_ftrace *rec) ++{ ++ void *ptr; ++ ++ ptr = arch_ftrace_trampoline_func(ops, rec); ++ if (ptr) ++ seq_printf(m, " ->%pS", ptr); ++} ++ ++static int t_show(struct seq_file *m, void *v) ++{ ++ struct ftrace_iterator *iter = m->private; ++ struct dyn_ftrace *rec; ++ ++ if (iter->flags & FTRACE_ITER_PROBE) ++ return t_probe_show(m, iter); ++ ++ if (iter->flags & FTRACE_ITER_MOD) ++ return t_mod_show(m, iter); ++ ++ if (iter->flags & FTRACE_ITER_PRINTALL) { ++ if (iter->flags & FTRACE_ITER_NOTRACE) ++ seq_puts(m, "#### no functions disabled ####\n"); ++ else ++ seq_puts(m, "#### all functions enabled ####\n"); ++ return 0; ++ } ++ ++ rec = iter->func; ++ ++ if (!rec) ++ return 0; ++ ++ seq_printf(m, "%ps", (void *)rec->ip); ++ if (iter->flags & FTRACE_ITER_ENABLED) { ++ struct ftrace_ops *ops; ++ ++ seq_printf(m, " (%ld)%s%s", ++ ftrace_rec_count(rec), ++ rec->flags & FTRACE_FL_REGS ? " R" : " ", ++ rec->flags & FTRACE_FL_IPMODIFY ? " I" : " "); ++ if (rec->flags & FTRACE_FL_TRAMP_EN) { ++ ops = ftrace_find_tramp_ops_any(rec); ++ if (ops) { ++ do { ++ seq_printf(m, "\ttramp: %pS (%pS)", ++ (void *)ops->trampoline, ++ (void *)ops->func); ++ add_trampoline_func(m, ops, rec); ++ ops = ftrace_find_tramp_ops_next(rec, ops); ++ } while (ops); ++ } else ++ seq_puts(m, "\ttramp: ERROR!"); ++ } else { ++ add_trampoline_func(m, NULL, rec); ++ } ++ } ++ ++ seq_putc(m, '\n'); ++ ++ return 0; ++} ++ ++static const struct seq_operations show_ftrace_seq_ops = { ++ .start = t_start, ++ .next = t_next, ++ .stop = t_stop, ++ .show = t_show, ++}; ++ ++static int ++ftrace_avail_open(struct inode *inode, struct file *file) ++{ ++ struct ftrace_iterator *iter; ++ ++ if (unlikely(ftrace_disabled)) ++ return -ENODEV; ++ ++ iter = __seq_open_private(file, &show_ftrace_seq_ops, sizeof(*iter)); ++ if (!iter) ++ return -ENOMEM; ++ ++ iter->pg = ftrace_pages_start; ++ iter->ops = &global_ops; ++ ++ return 0; ++} ++ ++static int ++ftrace_enabled_open(struct inode *inode, struct file *file) ++{ ++ struct ftrace_iterator *iter; ++ ++ iter = __seq_open_private(file, &show_ftrace_seq_ops, sizeof(*iter)); ++ if (!iter) ++ return -ENOMEM; ++ ++ iter->pg = ftrace_pages_start; ++ iter->flags = FTRACE_ITER_ENABLED; ++ iter->ops = &global_ops; ++ ++ return 0; ++} ++ ++/** ++ * ftrace_regex_open - initialize function tracer filter files ++ * @ops: The ftrace_ops that hold the hash filters ++ * @flag: The type of filter to process ++ * @inode: The inode, usually passed in to your open routine ++ * @file: The file, usually passed in to your open routine ++ * ++ * ftrace_regex_open() initializes the filter files for the ++ * @ops. Depending on @flag it may process the filter hash or ++ * the notrace hash of @ops. With this called from the open ++ * routine, you can use ftrace_filter_write() for the write ++ * routine if @flag has FTRACE_ITER_FILTER set, or ++ * ftrace_notrace_write() if @flag has FTRACE_ITER_NOTRACE set. ++ * tracing_lseek() should be used as the lseek routine, and ++ * release must call ftrace_regex_release(). ++ */ ++int ++ftrace_regex_open(struct ftrace_ops *ops, int flag, ++ struct inode *inode, struct file *file) ++{ ++ struct ftrace_iterator *iter; ++ struct ftrace_hash *hash; ++ struct list_head *mod_head; ++ struct trace_array *tr = ops->private; ++ int ret = -ENOMEM; ++ ++ ftrace_ops_init(ops); ++ ++ if (unlikely(ftrace_disabled)) ++ return -ENODEV; ++ ++ if (tr && trace_array_get(tr) < 0) ++ return -ENODEV; ++ ++ iter = kzalloc(sizeof(*iter), GFP_KERNEL); ++ if (!iter) ++ goto out; ++ ++ if (trace_parser_get_init(&iter->parser, FTRACE_BUFF_MAX)) ++ goto out; ++ ++ iter->ops = ops; ++ iter->flags = flag; ++ iter->tr = tr; ++ ++ mutex_lock(&ops->func_hash->regex_lock); ++ ++ if (flag & FTRACE_ITER_NOTRACE) { ++ hash = ops->func_hash->notrace_hash; ++ mod_head = tr ? &tr->mod_notrace : NULL; ++ } else { ++ hash = ops->func_hash->filter_hash; ++ mod_head = tr ? &tr->mod_trace : NULL; ++ } ++ ++ iter->mod_list = mod_head; ++ ++ if (file->f_mode & FMODE_WRITE) { ++ const int size_bits = FTRACE_HASH_DEFAULT_BITS; ++ ++ if (file->f_flags & O_TRUNC) { ++ iter->hash = alloc_ftrace_hash(size_bits); ++ clear_ftrace_mod_list(mod_head); ++ } else { ++ iter->hash = alloc_and_copy_ftrace_hash(size_bits, hash); ++ } ++ ++ if (!iter->hash) { ++ trace_parser_put(&iter->parser); ++ goto out_unlock; ++ } ++ } else ++ iter->hash = hash; ++ ++ ret = 0; ++ ++ if (file->f_mode & FMODE_READ) { ++ iter->pg = ftrace_pages_start; ++ ++ ret = seq_open(file, &show_ftrace_seq_ops); ++ if (!ret) { ++ struct seq_file *m = file->private_data; ++ m->private = iter; ++ } else { ++ /* Failed */ ++ free_ftrace_hash(iter->hash); ++ trace_parser_put(&iter->parser); ++ } ++ } else ++ file->private_data = iter; ++ ++ out_unlock: ++ mutex_unlock(&ops->func_hash->regex_lock); ++ ++ out: ++ if (ret) { ++ kfree(iter); ++ if (tr) ++ trace_array_put(tr); ++ } ++ ++ return ret; ++} ++ ++static int ++ftrace_filter_open(struct inode *inode, struct file *file) ++{ ++ struct ftrace_ops *ops = inode->i_private; ++ ++ return ftrace_regex_open(ops, ++ FTRACE_ITER_FILTER | FTRACE_ITER_DO_PROBES, ++ inode, file); ++} ++ ++static int ++ftrace_notrace_open(struct inode *inode, struct file *file) ++{ ++ struct ftrace_ops *ops = inode->i_private; ++ ++ return ftrace_regex_open(ops, FTRACE_ITER_NOTRACE, ++ inode, file); ++} ++ ++/* Type for quick search ftrace basic regexes (globs) from filter_parse_regex */ ++struct ftrace_glob { ++ char *search; ++ unsigned len; ++ int type; ++}; ++ ++/* ++ * If symbols in an architecture don't correspond exactly to the user-visible ++ * name of what they represent, it is possible to define this function to ++ * perform the necessary adjustments. ++*/ ++char * __weak arch_ftrace_match_adjust(char *str, const char *search) ++{ ++ return str; ++} ++ ++static int ftrace_match(char *str, struct ftrace_glob *g) ++{ ++ int matched = 0; ++ int slen; ++ ++ str = arch_ftrace_match_adjust(str, g->search); ++ ++ switch (g->type) { ++ case MATCH_FULL: ++ if (strcmp(str, g->search) == 0) ++ matched = 1; ++ break; ++ case MATCH_FRONT_ONLY: ++ if (strncmp(str, g->search, g->len) == 0) ++ matched = 1; ++ break; ++ case MATCH_MIDDLE_ONLY: ++ if (strstr(str, g->search)) ++ matched = 1; ++ break; ++ case MATCH_END_ONLY: ++ slen = strlen(str); ++ if (slen >= g->len && ++ memcmp(str + slen - g->len, g->search, g->len) == 0) ++ matched = 1; ++ break; ++ case MATCH_GLOB: ++ if (glob_match(g->search, str)) ++ matched = 1; ++ break; ++ } ++ ++ return matched; ++} ++ ++static int ++enter_record(struct ftrace_hash *hash, struct dyn_ftrace *rec, int clear_filter) ++{ ++ struct ftrace_func_entry *entry; ++ int ret = 0; ++ ++ entry = ftrace_lookup_ip(hash, rec->ip); ++ if (clear_filter) { ++ /* Do nothing if it doesn't exist */ ++ if (!entry) ++ return 0; ++ ++ free_hash_entry(hash, entry); ++ } else { ++ /* Do nothing if it exists */ ++ if (entry) ++ return 0; ++ ++ ret = add_hash_entry(hash, rec->ip); ++ } ++ return ret; ++} ++ ++static int ++ftrace_match_record(struct dyn_ftrace *rec, struct ftrace_glob *func_g, ++ struct ftrace_glob *mod_g, int exclude_mod) ++{ ++ char str[KSYM_SYMBOL_LEN]; ++ char *modname; ++ ++ kallsyms_lookup(rec->ip, NULL, NULL, &modname, str); ++ ++ if (mod_g) { ++ int mod_matches = (modname) ? ftrace_match(modname, mod_g) : 0; ++ ++ /* blank module name to match all modules */ ++ if (!mod_g->len) { ++ /* blank module globbing: modname xor exclude_mod */ ++ if (!exclude_mod != !modname) ++ goto func_match; ++ return 0; ++ } ++ ++ /* ++ * exclude_mod is set to trace everything but the given ++ * module. If it is set and the module matches, then ++ * return 0. If it is not set, and the module doesn't match ++ * also return 0. Otherwise, check the function to see if ++ * that matches. ++ */ ++ if (!mod_matches == !exclude_mod) ++ return 0; ++func_match: ++ /* blank search means to match all funcs in the mod */ ++ if (!func_g->len) ++ return 1; ++ } ++ ++ return ftrace_match(str, func_g); ++} ++ ++static int ++match_records(struct ftrace_hash *hash, char *func, int len, char *mod) ++{ ++ struct ftrace_page *pg; ++ struct dyn_ftrace *rec; ++ struct ftrace_glob func_g = { .type = MATCH_FULL }; ++ struct ftrace_glob mod_g = { .type = MATCH_FULL }; ++ struct ftrace_glob *mod_match = (mod) ? &mod_g : NULL; ++ int exclude_mod = 0; ++ int found = 0; ++ int ret; ++ int clear_filter = 0; ++ ++ if (func) { ++ func_g.type = filter_parse_regex(func, len, &func_g.search, ++ &clear_filter); ++ func_g.len = strlen(func_g.search); ++ } ++ ++ if (mod) { ++ mod_g.type = filter_parse_regex(mod, strlen(mod), ++ &mod_g.search, &exclude_mod); ++ mod_g.len = strlen(mod_g.search); ++ } ++ ++ mutex_lock(&ftrace_lock); ++ ++ if (unlikely(ftrace_disabled)) ++ goto out_unlock; ++ ++ do_for_each_ftrace_rec(pg, rec) { ++ ++ if (rec->flags & FTRACE_FL_DISABLED) ++ continue; ++ ++ if (ftrace_match_record(rec, &func_g, mod_match, exclude_mod)) { ++ ret = enter_record(hash, rec, clear_filter); ++ if (ret < 0) { ++ found = ret; ++ goto out_unlock; ++ } ++ found = 1; ++ } ++ } while_for_each_ftrace_rec(); ++ out_unlock: ++ mutex_unlock(&ftrace_lock); ++ ++ return found; ++} ++ ++static int ++ftrace_match_records(struct ftrace_hash *hash, char *buff, int len) ++{ ++ return match_records(hash, buff, len, NULL); ++} ++ ++static void ftrace_ops_update_code(struct ftrace_ops *ops, ++ struct ftrace_ops_hash *old_hash) ++{ ++ struct ftrace_ops *op; ++ ++ if (!ftrace_enabled) ++ return; ++ ++ if (ops->flags & FTRACE_OPS_FL_ENABLED) { ++ ftrace_run_modify_code(ops, FTRACE_UPDATE_CALLS, old_hash); ++ return; ++ } ++ ++ /* ++ * If this is the shared global_ops filter, then we need to ++ * check if there is another ops that shares it, is enabled. ++ * If so, we still need to run the modify code. ++ */ ++ if (ops->func_hash != &global_ops.local_hash) ++ return; ++ ++ do_for_each_ftrace_op(op, ftrace_ops_list) { ++ if (op->func_hash == &global_ops.local_hash && ++ op->flags & FTRACE_OPS_FL_ENABLED) { ++ ftrace_run_modify_code(op, FTRACE_UPDATE_CALLS, old_hash); ++ /* Only need to do this once */ ++ return; ++ } ++ } while_for_each_ftrace_op(op); ++} ++ ++static int ftrace_hash_move_and_update_ops(struct ftrace_ops *ops, ++ struct ftrace_hash **orig_hash, ++ struct ftrace_hash *hash, ++ int enable) ++{ ++ struct ftrace_ops_hash old_hash_ops; ++ struct ftrace_hash *old_hash; ++ int ret; ++ ++ old_hash = *orig_hash; ++ old_hash_ops.filter_hash = ops->func_hash->filter_hash; ++ old_hash_ops.notrace_hash = ops->func_hash->notrace_hash; ++ ret = ftrace_hash_move(ops, enable, orig_hash, hash); ++ if (!ret) { ++ ftrace_ops_update_code(ops, &old_hash_ops); ++ free_ftrace_hash_rcu(old_hash); ++ } ++ return ret; ++} ++ ++static bool module_exists(const char *module) ++{ ++ /* All modules have the symbol __this_module */ ++ const char this_mod[] = "__this_module"; ++ char modname[MAX_PARAM_PREFIX_LEN + sizeof(this_mod) + 2]; ++ unsigned long val; ++ int n; ++ ++ n = snprintf(modname, sizeof(modname), "%s:%s", module, this_mod); ++ ++ if (n > sizeof(modname) - 1) ++ return false; ++ ++ val = module_kallsyms_lookup_name(modname); ++ return val != 0; ++} ++ ++static int cache_mod(struct trace_array *tr, ++ const char *func, char *module, int enable) ++{ ++ struct ftrace_mod_load *ftrace_mod, *n; ++ struct list_head *head = enable ? &tr->mod_trace : &tr->mod_notrace; ++ int ret; ++ ++ mutex_lock(&ftrace_lock); ++ ++ /* We do not cache inverse filters */ ++ if (func[0] == '!') { ++ func++; ++ ret = -EINVAL; ++ ++ /* Look to remove this hash */ ++ list_for_each_entry_safe(ftrace_mod, n, head, list) { ++ if (strcmp(ftrace_mod->module, module) != 0) ++ continue; ++ ++ /* no func matches all */ ++ if (strcmp(func, "*") == 0 || ++ (ftrace_mod->func && ++ strcmp(ftrace_mod->func, func) == 0)) { ++ ret = 0; ++ free_ftrace_mod(ftrace_mod); ++ continue; ++ } ++ } ++ goto out; ++ } ++ ++ ret = -EINVAL; ++ /* We only care about modules that have not been loaded yet */ ++ if (module_exists(module)) ++ goto out; ++ ++ /* Save this string off, and execute it when the module is loaded */ ++ ret = ftrace_add_mod(tr, func, module, enable); ++ out: ++ mutex_unlock(&ftrace_lock); ++ ++ return ret; ++} ++ ++static int ++ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len, ++ int reset, int enable); ++ ++#ifdef CONFIG_MODULES ++static void process_mod_list(struct list_head *head, struct ftrace_ops *ops, ++ char *mod, bool enable) ++{ ++ struct ftrace_mod_load *ftrace_mod, *n; ++ struct ftrace_hash **orig_hash, *new_hash; ++ LIST_HEAD(process_mods); ++ char *func; ++ int ret; ++ ++ mutex_lock(&ops->func_hash->regex_lock); ++ ++ if (enable) ++ orig_hash = &ops->func_hash->filter_hash; ++ else ++ orig_hash = &ops->func_hash->notrace_hash; ++ ++ new_hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, ++ *orig_hash); ++ if (!new_hash) ++ goto out; /* warn? */ ++ ++ mutex_lock(&ftrace_lock); ++ ++ list_for_each_entry_safe(ftrace_mod, n, head, list) { ++ ++ if (strcmp(ftrace_mod->module, mod) != 0) ++ continue; ++ ++ if (ftrace_mod->func) ++ func = kstrdup(ftrace_mod->func, GFP_KERNEL); ++ else ++ func = kstrdup("*", GFP_KERNEL); ++ ++ if (!func) /* warn? */ ++ continue; ++ ++ list_del(&ftrace_mod->list); ++ list_add(&ftrace_mod->list, &process_mods); ++ ++ /* Use the newly allocated func, as it may be "*" */ ++ kfree(ftrace_mod->func); ++ ftrace_mod->func = func; ++ } ++ ++ mutex_unlock(&ftrace_lock); ++ ++ list_for_each_entry_safe(ftrace_mod, n, &process_mods, list) { ++ ++ func = ftrace_mod->func; ++ ++ /* Grabs ftrace_lock, which is why we have this extra step */ ++ match_records(new_hash, func, strlen(func), mod); ++ free_ftrace_mod(ftrace_mod); ++ } ++ ++ if (enable && list_empty(head)) ++ new_hash->flags &= ~FTRACE_HASH_FL_MOD; ++ ++ mutex_lock(&ftrace_lock); ++ ++ ret = ftrace_hash_move_and_update_ops(ops, orig_hash, ++ new_hash, enable); ++ mutex_unlock(&ftrace_lock); ++ ++ out: ++ mutex_unlock(&ops->func_hash->regex_lock); ++ ++ free_ftrace_hash(new_hash); ++} ++ ++static void process_cached_mods(const char *mod_name) ++{ ++ struct trace_array *tr; ++ char *mod; ++ ++ mod = kstrdup(mod_name, GFP_KERNEL); ++ if (!mod) ++ return; ++ ++ mutex_lock(&trace_types_lock); ++ list_for_each_entry(tr, &ftrace_trace_arrays, list) { ++ if (!list_empty(&tr->mod_trace)) ++ process_mod_list(&tr->mod_trace, tr->ops, mod, true); ++ if (!list_empty(&tr->mod_notrace)) ++ process_mod_list(&tr->mod_notrace, tr->ops, mod, false); ++ } ++ mutex_unlock(&trace_types_lock); ++ ++ kfree(mod); ++} ++#endif ++ ++/* ++ * We register the module command as a template to show others how ++ * to register the a command as well. ++ */ ++ ++static int ++ftrace_mod_callback(struct trace_array *tr, struct ftrace_hash *hash, ++ char *func_orig, char *cmd, char *module, int enable) ++{ ++ char *func; ++ int ret; ++ ++ /* match_records() modifies func, and we need the original */ ++ func = kstrdup(func_orig, GFP_KERNEL); ++ if (!func) ++ return -ENOMEM; ++ ++ /* ++ * cmd == 'mod' because we only registered this func ++ * for the 'mod' ftrace_func_command. ++ * But if you register one func with multiple commands, ++ * you can tell which command was used by the cmd ++ * parameter. ++ */ ++ ret = match_records(hash, func, strlen(func), module); ++ kfree(func); ++ ++ if (!ret) ++ return cache_mod(tr, func_orig, module, enable); ++ if (ret < 0) ++ return ret; ++ return 0; ++} ++ ++static struct ftrace_func_command ftrace_mod_cmd = { ++ .name = "mod", ++ .func = ftrace_mod_callback, ++}; ++ ++static int __init ftrace_mod_cmd_init(void) ++{ ++ return register_ftrace_command(&ftrace_mod_cmd); ++} ++core_initcall(ftrace_mod_cmd_init); ++ ++static void function_trace_probe_call(unsigned long ip, unsigned long parent_ip, ++ struct ftrace_ops *op, struct pt_regs *pt_regs) ++{ ++ struct ftrace_probe_ops *probe_ops; ++ struct ftrace_func_probe *probe; ++ ++ probe = container_of(op, struct ftrace_func_probe, ops); ++ probe_ops = probe->probe_ops; ++ ++ /* ++ * Disable preemption for these calls to prevent a RCU grace ++ * period. This syncs the hash iteration and freeing of items ++ * on the hash. rcu_read_lock is too dangerous here. ++ */ ++ preempt_disable_notrace(); ++ probe_ops->func(ip, parent_ip, probe->tr, probe_ops, probe->data); ++ preempt_enable_notrace(); ++} ++ ++struct ftrace_func_map { ++ struct ftrace_func_entry entry; ++ void *data; ++}; ++ ++struct ftrace_func_mapper { ++ struct ftrace_hash hash; ++}; ++ ++/** ++ * allocate_ftrace_func_mapper - allocate a new ftrace_func_mapper ++ * ++ * Returns a ftrace_func_mapper descriptor that can be used to map ips to data. ++ */ ++struct ftrace_func_mapper *allocate_ftrace_func_mapper(void) ++{ ++ struct ftrace_hash *hash; ++ ++ /* ++ * The mapper is simply a ftrace_hash, but since the entries ++ * in the hash are not ftrace_func_entry type, we define it ++ * as a separate structure. ++ */ ++ hash = alloc_ftrace_hash(FTRACE_HASH_DEFAULT_BITS); ++ return (struct ftrace_func_mapper *)hash; ++} ++ ++/** ++ * ftrace_func_mapper_find_ip - Find some data mapped to an ip ++ * @mapper: The mapper that has the ip maps ++ * @ip: the instruction pointer to find the data for ++ * ++ * Returns the data mapped to @ip if found otherwise NULL. The return ++ * is actually the address of the mapper data pointer. The address is ++ * returned for use cases where the data is no bigger than a long, and ++ * the user can use the data pointer as its data instead of having to ++ * allocate more memory for the reference. ++ */ ++void **ftrace_func_mapper_find_ip(struct ftrace_func_mapper *mapper, ++ unsigned long ip) ++{ ++ struct ftrace_func_entry *entry; ++ struct ftrace_func_map *map; ++ ++ entry = ftrace_lookup_ip(&mapper->hash, ip); ++ if (!entry) ++ return NULL; ++ ++ map = (struct ftrace_func_map *)entry; ++ return &map->data; ++} ++ ++/** ++ * ftrace_func_mapper_add_ip - Map some data to an ip ++ * @mapper: The mapper that has the ip maps ++ * @ip: The instruction pointer address to map @data to ++ * @data: The data to map to @ip ++ * ++ * Returns 0 on succes otherwise an error. ++ */ ++int ftrace_func_mapper_add_ip(struct ftrace_func_mapper *mapper, ++ unsigned long ip, void *data) ++{ ++ struct ftrace_func_entry *entry; ++ struct ftrace_func_map *map; ++ ++ entry = ftrace_lookup_ip(&mapper->hash, ip); ++ if (entry) ++ return -EBUSY; ++ ++ map = kmalloc(sizeof(*map), GFP_KERNEL); ++ if (!map) ++ return -ENOMEM; ++ ++ map->entry.ip = ip; ++ map->data = data; ++ ++ __add_hash_entry(&mapper->hash, &map->entry); ++ ++ return 0; ++} ++ ++/** ++ * ftrace_func_mapper_remove_ip - Remove an ip from the mapping ++ * @mapper: The mapper that has the ip maps ++ * @ip: The instruction pointer address to remove the data from ++ * ++ * Returns the data if it is found, otherwise NULL. ++ * Note, if the data pointer is used as the data itself, (see ++ * ftrace_func_mapper_find_ip(), then the return value may be meaningless, ++ * if the data pointer was set to zero. ++ */ ++void *ftrace_func_mapper_remove_ip(struct ftrace_func_mapper *mapper, ++ unsigned long ip) ++{ ++ struct ftrace_func_entry *entry; ++ struct ftrace_func_map *map; ++ void *data; ++ ++ entry = ftrace_lookup_ip(&mapper->hash, ip); ++ if (!entry) ++ return NULL; ++ ++ map = (struct ftrace_func_map *)entry; ++ data = map->data; ++ ++ remove_hash_entry(&mapper->hash, entry); ++ kfree(entry); ++ ++ return data; ++} ++ ++/** ++ * free_ftrace_func_mapper - free a mapping of ips and data ++ * @mapper: The mapper that has the ip maps ++ * @free_func: A function to be called on each data item. ++ * ++ * This is used to free the function mapper. The @free_func is optional ++ * and can be used if the data needs to be freed as well. ++ */ ++void free_ftrace_func_mapper(struct ftrace_func_mapper *mapper, ++ ftrace_mapper_func free_func) ++{ ++ struct ftrace_func_entry *entry; ++ struct ftrace_func_map *map; ++ struct hlist_head *hhd; ++ int size, i; ++ ++ if (!mapper) ++ return; ++ ++ if (free_func && mapper->hash.count) { ++ size = 1 << mapper->hash.size_bits; ++ for (i = 0; i < size; i++) { ++ hhd = &mapper->hash.buckets[i]; ++ hlist_for_each_entry(entry, hhd, hlist) { ++ map = (struct ftrace_func_map *)entry; ++ free_func(map); ++ } ++ } ++ } ++ free_ftrace_hash(&mapper->hash); ++} ++ ++static void release_probe(struct ftrace_func_probe *probe) ++{ ++ struct ftrace_probe_ops *probe_ops; ++ ++ mutex_lock(&ftrace_lock); ++ ++ WARN_ON(probe->ref <= 0); ++ ++ /* Subtract the ref that was used to protect this instance */ ++ probe->ref--; ++ ++ if (!probe->ref) { ++ probe_ops = probe->probe_ops; ++ /* ++ * Sending zero as ip tells probe_ops to free ++ * the probe->data itself ++ */ ++ if (probe_ops->free) ++ probe_ops->free(probe_ops, probe->tr, 0, probe->data); ++ list_del(&probe->list); ++ kfree(probe); ++ } ++ mutex_unlock(&ftrace_lock); ++} ++ ++static void acquire_probe_locked(struct ftrace_func_probe *probe) ++{ ++ /* ++ * Add one ref to keep it from being freed when releasing the ++ * ftrace_lock mutex. ++ */ ++ probe->ref++; ++} ++ ++int ++register_ftrace_function_probe(char *glob, struct trace_array *tr, ++ struct ftrace_probe_ops *probe_ops, ++ void *data) ++{ ++ struct ftrace_func_entry *entry; ++ struct ftrace_func_probe *probe; ++ struct ftrace_hash **orig_hash; ++ struct ftrace_hash *old_hash; ++ struct ftrace_hash *hash; ++ int count = 0; ++ int size; ++ int ret; ++ int i; ++ ++ if (WARN_ON(!tr)) ++ return -EINVAL; ++ ++ /* We do not support '!' for function probes */ ++ if (WARN_ON(glob[0] == '!')) ++ return -EINVAL; ++ ++ ++ mutex_lock(&ftrace_lock); ++ /* Check if the probe_ops is already registered */ ++ list_for_each_entry(probe, &tr->func_probes, list) { ++ if (probe->probe_ops == probe_ops) ++ break; ++ } ++ if (&probe->list == &tr->func_probes) { ++ probe = kzalloc(sizeof(*probe), GFP_KERNEL); ++ if (!probe) { ++ mutex_unlock(&ftrace_lock); ++ return -ENOMEM; ++ } ++ probe->probe_ops = probe_ops; ++ probe->ops.func = function_trace_probe_call; ++ probe->tr = tr; ++ ftrace_ops_init(&probe->ops); ++ list_add(&probe->list, &tr->func_probes); ++ } ++ ++ acquire_probe_locked(probe); ++ ++ mutex_unlock(&ftrace_lock); ++ ++ /* ++ * Note, there's a small window here that the func_hash->filter_hash ++ * may be NULL or empty. Need to be carefule when reading the loop. ++ */ ++ mutex_lock(&probe->ops.func_hash->regex_lock); ++ ++ orig_hash = &probe->ops.func_hash->filter_hash; ++ old_hash = *orig_hash; ++ hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, old_hash); ++ ++ if (!hash) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ ++ ret = ftrace_match_records(hash, glob, strlen(glob)); ++ ++ /* Nothing found? */ ++ if (!ret) ++ ret = -EINVAL; ++ ++ if (ret < 0) ++ goto out; ++ ++ size = 1 << hash->size_bits; ++ for (i = 0; i < size; i++) { ++ hlist_for_each_entry(entry, &hash->buckets[i], hlist) { ++ if (ftrace_lookup_ip(old_hash, entry->ip)) ++ continue; ++ /* ++ * The caller might want to do something special ++ * for each function we find. We call the callback ++ * to give the caller an opportunity to do so. ++ */ ++ if (probe_ops->init) { ++ ret = probe_ops->init(probe_ops, tr, ++ entry->ip, data, ++ &probe->data); ++ if (ret < 0) { ++ if (probe_ops->free && count) ++ probe_ops->free(probe_ops, tr, ++ 0, probe->data); ++ probe->data = NULL; ++ goto out; ++ } ++ } ++ count++; ++ } ++ } ++ ++ mutex_lock(&ftrace_lock); ++ ++ if (!count) { ++ /* Nothing was added? */ ++ ret = -EINVAL; ++ goto out_unlock; ++ } ++ ++ ret = ftrace_hash_move_and_update_ops(&probe->ops, orig_hash, ++ hash, 1); ++ if (ret < 0) ++ goto err_unlock; ++ ++ /* One ref for each new function traced */ ++ probe->ref += count; ++ ++ if (!(probe->ops.flags & FTRACE_OPS_FL_ENABLED)) ++ ret = ftrace_startup(&probe->ops, 0); ++ ++ out_unlock: ++ mutex_unlock(&ftrace_lock); ++ ++ if (!ret) ++ ret = count; ++ out: ++ mutex_unlock(&probe->ops.func_hash->regex_lock); ++ free_ftrace_hash(hash); ++ ++ release_probe(probe); ++ ++ return ret; ++ ++ err_unlock: ++ if (!probe_ops->free || !count) ++ goto out_unlock; ++ ++ /* Failed to do the move, need to call the free functions */ ++ for (i = 0; i < size; i++) { ++ hlist_for_each_entry(entry, &hash->buckets[i], hlist) { ++ if (ftrace_lookup_ip(old_hash, entry->ip)) ++ continue; ++ probe_ops->free(probe_ops, tr, entry->ip, probe->data); ++ } ++ } ++ goto out_unlock; ++} ++ ++int ++unregister_ftrace_function_probe_func(char *glob, struct trace_array *tr, ++ struct ftrace_probe_ops *probe_ops) ++{ ++ struct ftrace_ops_hash old_hash_ops; ++ struct ftrace_func_entry *entry; ++ struct ftrace_func_probe *probe; ++ struct ftrace_glob func_g; ++ struct ftrace_hash **orig_hash; ++ struct ftrace_hash *old_hash; ++ struct ftrace_hash *hash = NULL; ++ struct hlist_node *tmp; ++ struct hlist_head hhd; ++ char str[KSYM_SYMBOL_LEN]; ++ int count = 0; ++ int i, ret = -ENODEV; ++ int size; ++ ++ if (!glob || !strlen(glob) || !strcmp(glob, "*")) ++ func_g.search = NULL; ++ else { ++ int not; ++ ++ func_g.type = filter_parse_regex(glob, strlen(glob), ++ &func_g.search, ¬); ++ func_g.len = strlen(func_g.search); ++ ++ /* we do not support '!' for function probes */ ++ if (WARN_ON(not)) ++ return -EINVAL; ++ } ++ ++ mutex_lock(&ftrace_lock); ++ /* Check if the probe_ops is already registered */ ++ list_for_each_entry(probe, &tr->func_probes, list) { ++ if (probe->probe_ops == probe_ops) ++ break; ++ } ++ if (&probe->list == &tr->func_probes) ++ goto err_unlock_ftrace; ++ ++ ret = -EINVAL; ++ if (!(probe->ops.flags & FTRACE_OPS_FL_INITIALIZED)) ++ goto err_unlock_ftrace; ++ ++ acquire_probe_locked(probe); ++ ++ mutex_unlock(&ftrace_lock); ++ ++ mutex_lock(&probe->ops.func_hash->regex_lock); ++ ++ orig_hash = &probe->ops.func_hash->filter_hash; ++ old_hash = *orig_hash; ++ ++ if (ftrace_hash_empty(old_hash)) ++ goto out_unlock; ++ ++ old_hash_ops.filter_hash = old_hash; ++ /* Probes only have filters */ ++ old_hash_ops.notrace_hash = NULL; ++ ++ ret = -ENOMEM; ++ hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, old_hash); ++ if (!hash) ++ goto out_unlock; ++ ++ INIT_HLIST_HEAD(&hhd); ++ ++ size = 1 << hash->size_bits; ++ for (i = 0; i < size; i++) { ++ hlist_for_each_entry_safe(entry, tmp, &hash->buckets[i], hlist) { ++ ++ if (func_g.search) { ++ kallsyms_lookup(entry->ip, NULL, NULL, ++ NULL, str); ++ if (!ftrace_match(str, &func_g)) ++ continue; ++ } ++ count++; ++ remove_hash_entry(hash, entry); ++ hlist_add_head(&entry->hlist, &hhd); ++ } ++ } ++ ++ /* Nothing found? */ ++ if (!count) { ++ ret = -EINVAL; ++ goto out_unlock; ++ } ++ ++ mutex_lock(&ftrace_lock); ++ ++ WARN_ON(probe->ref < count); ++ ++ probe->ref -= count; ++ ++ if (ftrace_hash_empty(hash)) ++ ftrace_shutdown(&probe->ops, 0); ++ ++ ret = ftrace_hash_move_and_update_ops(&probe->ops, orig_hash, ++ hash, 1); ++ ++ /* still need to update the function call sites */ ++ if (ftrace_enabled && !ftrace_hash_empty(hash)) ++ ftrace_run_modify_code(&probe->ops, FTRACE_UPDATE_CALLS, ++ &old_hash_ops); ++ synchronize_sched(); ++ ++ hlist_for_each_entry_safe(entry, tmp, &hhd, hlist) { ++ hlist_del(&entry->hlist); ++ if (probe_ops->free) ++ probe_ops->free(probe_ops, tr, entry->ip, probe->data); ++ kfree(entry); ++ } ++ mutex_unlock(&ftrace_lock); ++ ++ out_unlock: ++ mutex_unlock(&probe->ops.func_hash->regex_lock); ++ free_ftrace_hash(hash); ++ ++ release_probe(probe); ++ ++ return ret; ++ ++ err_unlock_ftrace: ++ mutex_unlock(&ftrace_lock); ++ return ret; ++} ++ ++void clear_ftrace_function_probes(struct trace_array *tr) ++{ ++ struct ftrace_func_probe *probe, *n; ++ ++ list_for_each_entry_safe(probe, n, &tr->func_probes, list) ++ unregister_ftrace_function_probe_func(NULL, tr, probe->probe_ops); ++} ++ ++static LIST_HEAD(ftrace_commands); ++static DEFINE_MUTEX(ftrace_cmd_mutex); ++ ++/* ++ * Currently we only register ftrace commands from __init, so mark this ++ * __init too. ++ */ ++__init int register_ftrace_command(struct ftrace_func_command *cmd) ++{ ++ struct ftrace_func_command *p; ++ int ret = 0; ++ ++ mutex_lock(&ftrace_cmd_mutex); ++ list_for_each_entry(p, &ftrace_commands, list) { ++ if (strcmp(cmd->name, p->name) == 0) { ++ ret = -EBUSY; ++ goto out_unlock; ++ } ++ } ++ list_add(&cmd->list, &ftrace_commands); ++ out_unlock: ++ mutex_unlock(&ftrace_cmd_mutex); ++ ++ return ret; ++} ++ ++/* ++ * Currently we only unregister ftrace commands from __init, so mark ++ * this __init too. ++ */ ++__init int unregister_ftrace_command(struct ftrace_func_command *cmd) ++{ ++ struct ftrace_func_command *p, *n; ++ int ret = -ENODEV; ++ ++ mutex_lock(&ftrace_cmd_mutex); ++ list_for_each_entry_safe(p, n, &ftrace_commands, list) { ++ if (strcmp(cmd->name, p->name) == 0) { ++ ret = 0; ++ list_del_init(&p->list); ++ goto out_unlock; ++ } ++ } ++ out_unlock: ++ mutex_unlock(&ftrace_cmd_mutex); ++ ++ return ret; ++} ++ ++static int ftrace_process_regex(struct ftrace_iterator *iter, ++ char *buff, int len, int enable) ++{ ++ struct ftrace_hash *hash = iter->hash; ++ struct trace_array *tr = iter->ops->private; ++ char *func, *command, *next = buff; ++ struct ftrace_func_command *p; ++ int ret = -EINVAL; ++ ++ func = strsep(&next, ":"); ++ ++ if (!next) { ++ ret = ftrace_match_records(hash, func, len); ++ if (!ret) ++ ret = -EINVAL; ++ if (ret < 0) ++ return ret; ++ return 0; ++ } ++ ++ /* command found */ ++ ++ command = strsep(&next, ":"); ++ ++ mutex_lock(&ftrace_cmd_mutex); ++ list_for_each_entry(p, &ftrace_commands, list) { ++ if (strcmp(p->name, command) == 0) { ++ ret = p->func(tr, hash, func, command, next, enable); ++ goto out_unlock; ++ } ++ } ++ out_unlock: ++ mutex_unlock(&ftrace_cmd_mutex); ++ ++ return ret; ++} ++ ++static ssize_t ++ftrace_regex_write(struct file *file, const char __user *ubuf, ++ size_t cnt, loff_t *ppos, int enable) ++{ ++ struct ftrace_iterator *iter; ++ struct trace_parser *parser; ++ ssize_t ret, read; ++ ++ if (!cnt) ++ return 0; ++ ++ if (file->f_mode & FMODE_READ) { ++ struct seq_file *m = file->private_data; ++ iter = m->private; ++ } else ++ iter = file->private_data; ++ ++ if (unlikely(ftrace_disabled)) ++ return -ENODEV; ++ ++ /* iter->hash is a local copy, so we don't need regex_lock */ ++ ++ parser = &iter->parser; ++ read = trace_get_user(parser, ubuf, cnt, ppos); ++ ++ if (read >= 0 && trace_parser_loaded(parser) && ++ !trace_parser_cont(parser)) { ++ ret = ftrace_process_regex(iter, parser->buffer, ++ parser->idx, enable); ++ trace_parser_clear(parser); ++ if (ret < 0) ++ goto out; ++ } ++ ++ ret = read; ++ out: ++ return ret; ++} ++ ++ssize_t ++ftrace_filter_write(struct file *file, const char __user *ubuf, ++ size_t cnt, loff_t *ppos) ++{ ++ return ftrace_regex_write(file, ubuf, cnt, ppos, 1); ++} ++ ++ssize_t ++ftrace_notrace_write(struct file *file, const char __user *ubuf, ++ size_t cnt, loff_t *ppos) ++{ ++ return ftrace_regex_write(file, ubuf, cnt, ppos, 0); ++} ++ ++static int ++ftrace_match_addr(struct ftrace_hash *hash, unsigned long ip, int remove) ++{ ++ struct ftrace_func_entry *entry; ++ ++ if (!ftrace_location(ip)) ++ return -EINVAL; ++ ++ if (remove) { ++ entry = ftrace_lookup_ip(hash, ip); ++ if (!entry) ++ return -ENOENT; ++ free_hash_entry(hash, entry); ++ return 0; ++ } ++ ++ return add_hash_entry(hash, ip); ++} ++ ++static int ++ftrace_set_hash(struct ftrace_ops *ops, unsigned char *buf, int len, ++ unsigned long ip, int remove, int reset, int enable) ++{ ++ struct ftrace_hash **orig_hash; ++ struct ftrace_hash *hash; ++ int ret; ++ ++ if (unlikely(ftrace_disabled)) ++ return -ENODEV; ++ ++ mutex_lock(&ops->func_hash->regex_lock); ++ ++ if (enable) ++ orig_hash = &ops->func_hash->filter_hash; ++ else ++ orig_hash = &ops->func_hash->notrace_hash; ++ ++ if (reset) ++ hash = alloc_ftrace_hash(FTRACE_HASH_DEFAULT_BITS); ++ else ++ hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, *orig_hash); ++ ++ if (!hash) { ++ ret = -ENOMEM; ++ goto out_regex_unlock; ++ } ++ ++ if (buf && !ftrace_match_records(hash, buf, len)) { ++ ret = -EINVAL; ++ goto out_regex_unlock; ++ } ++ if (ip) { ++ ret = ftrace_match_addr(hash, ip, remove); ++ if (ret < 0) ++ goto out_regex_unlock; ++ } ++ ++ mutex_lock(&ftrace_lock); ++ ret = ftrace_hash_move_and_update_ops(ops, orig_hash, hash, enable); ++ mutex_unlock(&ftrace_lock); ++ ++ out_regex_unlock: ++ mutex_unlock(&ops->func_hash->regex_lock); ++ ++ free_ftrace_hash(hash); ++ return ret; ++} ++ ++static int ++ftrace_set_addr(struct ftrace_ops *ops, unsigned long ip, int remove, ++ int reset, int enable) ++{ ++ return ftrace_set_hash(ops, 0, 0, ip, remove, reset, enable); ++} ++ ++/** ++ * ftrace_set_filter_ip - set a function to filter on in ftrace by address ++ * @ops - the ops to set the filter with ++ * @ip - the address to add to or remove from the filter. ++ * @remove - non zero to remove the ip from the filter ++ * @reset - non zero to reset all filters before applying this filter. ++ * ++ * Filters denote which functions should be enabled when tracing is enabled ++ * If @ip is NULL, it failes to update filter. ++ */ ++int ftrace_set_filter_ip(struct ftrace_ops *ops, unsigned long ip, ++ int remove, int reset) ++{ ++ ftrace_ops_init(ops); ++ return ftrace_set_addr(ops, ip, remove, reset, 1); ++} ++EXPORT_SYMBOL_GPL(ftrace_set_filter_ip); ++ ++/** ++ * ftrace_ops_set_global_filter - setup ops to use global filters ++ * @ops - the ops which will use the global filters ++ * ++ * ftrace users who need global function trace filtering should call this. ++ * It can set the global filter only if ops were not initialized before. ++ */ ++void ftrace_ops_set_global_filter(struct ftrace_ops *ops) ++{ ++ if (ops->flags & FTRACE_OPS_FL_INITIALIZED) ++ return; ++ ++ ftrace_ops_init(ops); ++ ops->func_hash = &global_ops.local_hash; ++} ++EXPORT_SYMBOL_GPL(ftrace_ops_set_global_filter); ++ ++static int ++ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len, ++ int reset, int enable) ++{ ++ return ftrace_set_hash(ops, buf, len, 0, 0, reset, enable); ++} ++ ++/** ++ * ftrace_set_filter - set a function to filter on in ftrace ++ * @ops - the ops to set the filter with ++ * @buf - the string that holds the function filter text. ++ * @len - the length of the string. ++ * @reset - non zero to reset all filters before applying this filter. ++ * ++ * Filters denote which functions should be enabled when tracing is enabled. ++ * If @buf is NULL and reset is set, all functions will be enabled for tracing. ++ */ ++int ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf, ++ int len, int reset) ++{ ++ ftrace_ops_init(ops); ++ return ftrace_set_regex(ops, buf, len, reset, 1); ++} ++EXPORT_SYMBOL_GPL(ftrace_set_filter); ++ ++/** ++ * ftrace_set_notrace - set a function to not trace in ftrace ++ * @ops - the ops to set the notrace filter with ++ * @buf - the string that holds the function notrace text. ++ * @len - the length of the string. ++ * @reset - non zero to reset all filters before applying this filter. ++ * ++ * Notrace Filters denote which functions should not be enabled when tracing ++ * is enabled. If @buf is NULL and reset is set, all functions will be enabled ++ * for tracing. ++ */ ++int ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf, ++ int len, int reset) ++{ ++ ftrace_ops_init(ops); ++ return ftrace_set_regex(ops, buf, len, reset, 0); ++} ++EXPORT_SYMBOL_GPL(ftrace_set_notrace); ++/** ++ * ftrace_set_global_filter - set a function to filter on with global tracers ++ * @buf - the string that holds the function filter text. ++ * @len - the length of the string. ++ * @reset - non zero to reset all filters before applying this filter. ++ * ++ * Filters denote which functions should be enabled when tracing is enabled. ++ * If @buf is NULL and reset is set, all functions will be enabled for tracing. ++ */ ++void ftrace_set_global_filter(unsigned char *buf, int len, int reset) ++{ ++ ftrace_set_regex(&global_ops, buf, len, reset, 1); ++} ++EXPORT_SYMBOL_GPL(ftrace_set_global_filter); ++ ++/** ++ * ftrace_set_global_notrace - set a function to not trace with global tracers ++ * @buf - the string that holds the function notrace text. ++ * @len - the length of the string. ++ * @reset - non zero to reset all filters before applying this filter. ++ * ++ * Notrace Filters denote which functions should not be enabled when tracing ++ * is enabled. If @buf is NULL and reset is set, all functions will be enabled ++ * for tracing. ++ */ ++void ftrace_set_global_notrace(unsigned char *buf, int len, int reset) ++{ ++ ftrace_set_regex(&global_ops, buf, len, reset, 0); ++} ++EXPORT_SYMBOL_GPL(ftrace_set_global_notrace); ++ ++/* ++ * command line interface to allow users to set filters on boot up. ++ */ ++#define FTRACE_FILTER_SIZE COMMAND_LINE_SIZE ++static char ftrace_notrace_buf[FTRACE_FILTER_SIZE] __initdata; ++static char ftrace_filter_buf[FTRACE_FILTER_SIZE] __initdata; ++ ++/* Used by function selftest to not test if filter is set */ ++bool ftrace_filter_param __initdata; ++ ++static int __init set_ftrace_notrace(char *str) ++{ ++ ftrace_filter_param = true; ++ strlcpy(ftrace_notrace_buf, str, FTRACE_FILTER_SIZE); ++ return 1; ++} ++__setup("ftrace_notrace=", set_ftrace_notrace); ++ ++static int __init set_ftrace_filter(char *str) ++{ ++ ftrace_filter_param = true; ++ strlcpy(ftrace_filter_buf, str, FTRACE_FILTER_SIZE); ++ return 1; ++} ++__setup("ftrace_filter=", set_ftrace_filter); ++ ++#ifdef CONFIG_FUNCTION_GRAPH_TRACER ++static char ftrace_graph_buf[FTRACE_FILTER_SIZE] __initdata; ++static char ftrace_graph_notrace_buf[FTRACE_FILTER_SIZE] __initdata; ++static int ftrace_graph_set_hash(struct ftrace_hash *hash, char *buffer); ++ ++static int __init set_graph_function(char *str) ++{ ++ strlcpy(ftrace_graph_buf, str, FTRACE_FILTER_SIZE); ++ return 1; ++} ++__setup("ftrace_graph_filter=", set_graph_function); ++ ++static int __init set_graph_notrace_function(char *str) ++{ ++ strlcpy(ftrace_graph_notrace_buf, str, FTRACE_FILTER_SIZE); ++ return 1; ++} ++__setup("ftrace_graph_notrace=", set_graph_notrace_function); ++ ++static int __init set_graph_max_depth_function(char *str) ++{ ++ if (!str) ++ return 0; ++ fgraph_max_depth = simple_strtoul(str, NULL, 0); ++ return 1; ++} ++__setup("ftrace_graph_max_depth=", set_graph_max_depth_function); ++ ++static void __init set_ftrace_early_graph(char *buf, int enable) ++{ ++ int ret; ++ char *func; ++ struct ftrace_hash *hash; ++ ++ hash = alloc_ftrace_hash(FTRACE_HASH_DEFAULT_BITS); ++ if (WARN_ON(!hash)) ++ return; ++ ++ while (buf) { ++ func = strsep(&buf, ","); ++ /* we allow only one expression at a time */ ++ ret = ftrace_graph_set_hash(hash, func); ++ if (ret) ++ printk(KERN_DEBUG "ftrace: function %s not " ++ "traceable\n", func); ++ } ++ ++ if (enable) ++ ftrace_graph_hash = hash; ++ else ++ ftrace_graph_notrace_hash = hash; ++} ++#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ ++ ++void __init ++ftrace_set_early_filter(struct ftrace_ops *ops, char *buf, int enable) ++{ ++ char *func; ++ ++ ftrace_ops_init(ops); ++ ++ while (buf) { ++ func = strsep(&buf, ","); ++ ftrace_set_regex(ops, func, strlen(func), 0, enable); ++ } ++} ++ ++static void __init set_ftrace_early_filters(void) ++{ ++ if (ftrace_filter_buf[0]) ++ ftrace_set_early_filter(&global_ops, ftrace_filter_buf, 1); ++ if (ftrace_notrace_buf[0]) ++ ftrace_set_early_filter(&global_ops, ftrace_notrace_buf, 0); ++#ifdef CONFIG_FUNCTION_GRAPH_TRACER ++ if (ftrace_graph_buf[0]) ++ set_ftrace_early_graph(ftrace_graph_buf, 1); ++ if (ftrace_graph_notrace_buf[0]) ++ set_ftrace_early_graph(ftrace_graph_notrace_buf, 0); ++#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ ++} ++ ++int ftrace_regex_release(struct inode *inode, struct file *file) ++{ ++ struct seq_file *m = (struct seq_file *)file->private_data; ++ struct ftrace_iterator *iter; ++ struct ftrace_hash **orig_hash; ++ struct trace_parser *parser; ++ int filter_hash; ++ int ret; ++ ++ if (file->f_mode & FMODE_READ) { ++ iter = m->private; ++ seq_release(inode, file); ++ } else ++ iter = file->private_data; ++ ++ parser = &iter->parser; ++ if (trace_parser_loaded(parser)) { ++ ftrace_match_records(iter->hash, parser->buffer, parser->idx); ++ } ++ ++ trace_parser_put(parser); ++ ++ mutex_lock(&iter->ops->func_hash->regex_lock); ++ ++ if (file->f_mode & FMODE_WRITE) { ++ filter_hash = !!(iter->flags & FTRACE_ITER_FILTER); ++ ++ if (filter_hash) { ++ orig_hash = &iter->ops->func_hash->filter_hash; ++ if (iter->tr && !list_empty(&iter->tr->mod_trace)) ++ iter->hash->flags |= FTRACE_HASH_FL_MOD; ++ } else ++ orig_hash = &iter->ops->func_hash->notrace_hash; ++ ++ mutex_lock(&ftrace_lock); ++ ret = ftrace_hash_move_and_update_ops(iter->ops, orig_hash, ++ iter->hash, filter_hash); ++ mutex_unlock(&ftrace_lock); ++ } else { ++ /* For read only, the hash is the ops hash */ ++ iter->hash = NULL; ++ } ++ ++ mutex_unlock(&iter->ops->func_hash->regex_lock); ++ free_ftrace_hash(iter->hash); ++ if (iter->tr) ++ trace_array_put(iter->tr); ++ kfree(iter); ++ ++ return 0; ++} ++ ++static const struct file_operations ftrace_avail_fops = { ++ .open = ftrace_avail_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = seq_release_private, ++}; ++ ++static const struct file_operations ftrace_enabled_fops = { ++ .open = ftrace_enabled_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = seq_release_private, ++}; ++ ++static const struct file_operations ftrace_filter_fops = { ++ .open = ftrace_filter_open, ++ .read = seq_read, ++ .write = ftrace_filter_write, ++ .llseek = tracing_lseek, ++ .release = ftrace_regex_release, ++}; ++ ++static const struct file_operations ftrace_notrace_fops = { ++ .open = ftrace_notrace_open, ++ .read = seq_read, ++ .write = ftrace_notrace_write, ++ .llseek = tracing_lseek, ++ .release = ftrace_regex_release, ++}; ++ ++#ifdef CONFIG_FUNCTION_GRAPH_TRACER ++ ++static DEFINE_MUTEX(graph_lock); ++ ++struct ftrace_hash *ftrace_graph_hash = EMPTY_HASH; ++struct ftrace_hash *ftrace_graph_notrace_hash = EMPTY_HASH; ++ ++enum graph_filter_type { ++ GRAPH_FILTER_NOTRACE = 0, ++ GRAPH_FILTER_FUNCTION, ++}; ++ ++#define FTRACE_GRAPH_EMPTY ((void *)1) ++ ++struct ftrace_graph_data { ++ struct ftrace_hash *hash; ++ struct ftrace_func_entry *entry; ++ int idx; /* for hash table iteration */ ++ enum graph_filter_type type; ++ struct ftrace_hash *new_hash; ++ const struct seq_operations *seq_ops; ++ struct trace_parser parser; ++}; ++ ++static void * ++__g_next(struct seq_file *m, loff_t *pos) ++{ ++ struct ftrace_graph_data *fgd = m->private; ++ struct ftrace_func_entry *entry = fgd->entry; ++ struct hlist_head *head; ++ int i, idx = fgd->idx; ++ ++ if (*pos >= fgd->hash->count) ++ return NULL; ++ ++ if (entry) { ++ hlist_for_each_entry_continue(entry, hlist) { ++ fgd->entry = entry; ++ return entry; ++ } ++ ++ idx++; ++ } ++ ++ for (i = idx; i < 1 << fgd->hash->size_bits; i++) { ++ head = &fgd->hash->buckets[i]; ++ hlist_for_each_entry(entry, head, hlist) { ++ fgd->entry = entry; ++ fgd->idx = i; ++ return entry; ++ } ++ } ++ return NULL; ++} ++ ++static void * ++g_next(struct seq_file *m, void *v, loff_t *pos) ++{ ++ (*pos)++; ++ return __g_next(m, pos); ++} ++ ++static void *g_start(struct seq_file *m, loff_t *pos) ++{ ++ struct ftrace_graph_data *fgd = m->private; ++ ++ mutex_lock(&graph_lock); ++ ++ if (fgd->type == GRAPH_FILTER_FUNCTION) ++ fgd->hash = rcu_dereference_protected(ftrace_graph_hash, ++ lockdep_is_held(&graph_lock)); ++ else ++ fgd->hash = rcu_dereference_protected(ftrace_graph_notrace_hash, ++ lockdep_is_held(&graph_lock)); ++ ++ /* Nothing, tell g_show to print all functions are enabled */ ++ if (ftrace_hash_empty(fgd->hash) && !*pos) ++ return FTRACE_GRAPH_EMPTY; ++ ++ fgd->idx = 0; ++ fgd->entry = NULL; ++ return __g_next(m, pos); ++} ++ ++static void g_stop(struct seq_file *m, void *p) ++{ ++ mutex_unlock(&graph_lock); ++} ++ ++static int g_show(struct seq_file *m, void *v) ++{ ++ struct ftrace_func_entry *entry = v; ++ ++ if (!entry) ++ return 0; ++ ++ if (entry == FTRACE_GRAPH_EMPTY) { ++ struct ftrace_graph_data *fgd = m->private; ++ ++ if (fgd->type == GRAPH_FILTER_FUNCTION) ++ seq_puts(m, "#### all functions enabled ####\n"); ++ else ++ seq_puts(m, "#### no functions disabled ####\n"); ++ return 0; ++ } ++ ++ seq_printf(m, "%ps\n", (void *)entry->ip); ++ ++ return 0; ++} ++ ++static const struct seq_operations ftrace_graph_seq_ops = { ++ .start = g_start, ++ .next = g_next, ++ .stop = g_stop, ++ .show = g_show, ++}; ++ ++static int ++__ftrace_graph_open(struct inode *inode, struct file *file, ++ struct ftrace_graph_data *fgd) ++{ ++ int ret = 0; ++ struct ftrace_hash *new_hash = NULL; ++ ++ if (file->f_mode & FMODE_WRITE) { ++ const int size_bits = FTRACE_HASH_DEFAULT_BITS; ++ ++ if (trace_parser_get_init(&fgd->parser, FTRACE_BUFF_MAX)) ++ return -ENOMEM; ++ ++ if (file->f_flags & O_TRUNC) ++ new_hash = alloc_ftrace_hash(size_bits); ++ else ++ new_hash = alloc_and_copy_ftrace_hash(size_bits, ++ fgd->hash); ++ if (!new_hash) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ } ++ ++ if (file->f_mode & FMODE_READ) { ++ ret = seq_open(file, &ftrace_graph_seq_ops); ++ if (!ret) { ++ struct seq_file *m = file->private_data; ++ m->private = fgd; ++ } else { ++ /* Failed */ ++ free_ftrace_hash(new_hash); ++ new_hash = NULL; ++ } ++ } else ++ file->private_data = fgd; ++ ++out: ++ if (ret < 0 && file->f_mode & FMODE_WRITE) ++ trace_parser_put(&fgd->parser); ++ ++ fgd->new_hash = new_hash; ++ ++ /* ++ * All uses of fgd->hash must be taken with the graph_lock ++ * held. The graph_lock is going to be released, so force ++ * fgd->hash to be reinitialized when it is taken again. ++ */ ++ fgd->hash = NULL; ++ ++ return ret; ++} ++ ++static int ++ftrace_graph_open(struct inode *inode, struct file *file) ++{ ++ struct ftrace_graph_data *fgd; ++ int ret; ++ ++ if (unlikely(ftrace_disabled)) ++ return -ENODEV; ++ ++ fgd = kmalloc(sizeof(*fgd), GFP_KERNEL); ++ if (fgd == NULL) ++ return -ENOMEM; ++ ++ mutex_lock(&graph_lock); ++ ++ fgd->hash = rcu_dereference_protected(ftrace_graph_hash, ++ lockdep_is_held(&graph_lock)); ++ fgd->type = GRAPH_FILTER_FUNCTION; ++ fgd->seq_ops = &ftrace_graph_seq_ops; ++ ++ ret = __ftrace_graph_open(inode, file, fgd); ++ if (ret < 0) ++ kfree(fgd); ++ ++ mutex_unlock(&graph_lock); ++ return ret; ++} ++ ++static int ++ftrace_graph_notrace_open(struct inode *inode, struct file *file) ++{ ++ struct ftrace_graph_data *fgd; ++ int ret; ++ ++ if (unlikely(ftrace_disabled)) ++ return -ENODEV; ++ ++ fgd = kmalloc(sizeof(*fgd), GFP_KERNEL); ++ if (fgd == NULL) ++ return -ENOMEM; ++ ++ mutex_lock(&graph_lock); ++ ++ fgd->hash = rcu_dereference_protected(ftrace_graph_notrace_hash, ++ lockdep_is_held(&graph_lock)); ++ fgd->type = GRAPH_FILTER_NOTRACE; ++ fgd->seq_ops = &ftrace_graph_seq_ops; ++ ++ ret = __ftrace_graph_open(inode, file, fgd); ++ if (ret < 0) ++ kfree(fgd); ++ ++ mutex_unlock(&graph_lock); ++ return ret; ++} ++ ++static int ++ftrace_graph_release(struct inode *inode, struct file *file) ++{ ++ struct ftrace_graph_data *fgd; ++ struct ftrace_hash *old_hash, *new_hash; ++ struct trace_parser *parser; ++ int ret = 0; ++ ++ if (file->f_mode & FMODE_READ) { ++ struct seq_file *m = file->private_data; ++ ++ fgd = m->private; ++ seq_release(inode, file); ++ } else { ++ fgd = file->private_data; ++ } ++ ++ ++ if (file->f_mode & FMODE_WRITE) { ++ ++ parser = &fgd->parser; ++ ++ if (trace_parser_loaded((parser))) { ++ ret = ftrace_graph_set_hash(fgd->new_hash, ++ parser->buffer); ++ } ++ ++ trace_parser_put(parser); ++ ++ new_hash = __ftrace_hash_move(fgd->new_hash); ++ if (!new_hash) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ ++ mutex_lock(&graph_lock); ++ ++ if (fgd->type == GRAPH_FILTER_FUNCTION) { ++ old_hash = rcu_dereference_protected(ftrace_graph_hash, ++ lockdep_is_held(&graph_lock)); ++ rcu_assign_pointer(ftrace_graph_hash, new_hash); ++ } else { ++ old_hash = rcu_dereference_protected(ftrace_graph_notrace_hash, ++ lockdep_is_held(&graph_lock)); ++ rcu_assign_pointer(ftrace_graph_notrace_hash, new_hash); ++ } ++ ++ mutex_unlock(&graph_lock); ++ ++ /* Wait till all users are no longer using the old hash */ ++ synchronize_sched(); ++ ++ free_ftrace_hash(old_hash); ++ } ++ ++ out: ++ free_ftrace_hash(fgd->new_hash); ++ kfree(fgd); ++ ++ return ret; ++} ++ ++static int ++ftrace_graph_set_hash(struct ftrace_hash *hash, char *buffer) ++{ ++ struct ftrace_glob func_g; ++ struct dyn_ftrace *rec; ++ struct ftrace_page *pg; ++ struct ftrace_func_entry *entry; ++ int fail = 1; ++ int not; ++ ++ /* decode regex */ ++ func_g.type = filter_parse_regex(buffer, strlen(buffer), ++ &func_g.search, ¬); ++ ++ func_g.len = strlen(func_g.search); ++ ++ mutex_lock(&ftrace_lock); ++ ++ if (unlikely(ftrace_disabled)) { ++ mutex_unlock(&ftrace_lock); ++ return -ENODEV; ++ } ++ ++ do_for_each_ftrace_rec(pg, rec) { ++ ++ if (rec->flags & FTRACE_FL_DISABLED) ++ continue; ++ ++ if (ftrace_match_record(rec, &func_g, NULL, 0)) { ++ entry = ftrace_lookup_ip(hash, rec->ip); ++ ++ if (!not) { ++ fail = 0; ++ ++ if (entry) ++ continue; ++ if (add_hash_entry(hash, rec->ip) < 0) ++ goto out; ++ } else { ++ if (entry) { ++ free_hash_entry(hash, entry); ++ fail = 0; ++ } ++ } ++ } ++ } while_for_each_ftrace_rec(); ++out: ++ mutex_unlock(&ftrace_lock); ++ ++ if (fail) ++ return -EINVAL; ++ ++ return 0; ++} ++ ++static ssize_t ++ftrace_graph_write(struct file *file, const char __user *ubuf, ++ size_t cnt, loff_t *ppos) ++{ ++ ssize_t read, ret = 0; ++ struct ftrace_graph_data *fgd = file->private_data; ++ struct trace_parser *parser; ++ ++ if (!cnt) ++ return 0; ++ ++ /* Read mode uses seq functions */ ++ if (file->f_mode & FMODE_READ) { ++ struct seq_file *m = file->private_data; ++ fgd = m->private; ++ } ++ ++ parser = &fgd->parser; ++ ++ read = trace_get_user(parser, ubuf, cnt, ppos); ++ ++ if (read >= 0 && trace_parser_loaded(parser) && ++ !trace_parser_cont(parser)) { ++ ++ ret = ftrace_graph_set_hash(fgd->new_hash, ++ parser->buffer); ++ trace_parser_clear(parser); ++ } ++ ++ if (!ret) ++ ret = read; ++ ++ return ret; ++} ++ ++static const struct file_operations ftrace_graph_fops = { ++ .open = ftrace_graph_open, ++ .read = seq_read, ++ .write = ftrace_graph_write, ++ .llseek = tracing_lseek, ++ .release = ftrace_graph_release, ++}; ++ ++static const struct file_operations ftrace_graph_notrace_fops = { ++ .open = ftrace_graph_notrace_open, ++ .read = seq_read, ++ .write = ftrace_graph_write, ++ .llseek = tracing_lseek, ++ .release = ftrace_graph_release, ++}; ++#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ ++ ++void ftrace_create_filter_files(struct ftrace_ops *ops, ++ struct dentry *parent) ++{ ++ ++ trace_create_file("set_ftrace_filter", 0644, parent, ++ ops, &ftrace_filter_fops); ++ ++ trace_create_file("set_ftrace_notrace", 0644, parent, ++ ops, &ftrace_notrace_fops); ++} ++ ++/* ++ * The name "destroy_filter_files" is really a misnomer. Although ++ * in the future, it may actualy delete the files, but this is ++ * really intended to make sure the ops passed in are disabled ++ * and that when this function returns, the caller is free to ++ * free the ops. ++ * ++ * The "destroy" name is only to match the "create" name that this ++ * should be paired with. ++ */ ++void ftrace_destroy_filter_files(struct ftrace_ops *ops) ++{ ++ mutex_lock(&ftrace_lock); ++ if (ops->flags & FTRACE_OPS_FL_ENABLED) ++ ftrace_shutdown(ops, 0); ++ ops->flags |= FTRACE_OPS_FL_DELETED; ++ ftrace_free_filter(ops); ++ mutex_unlock(&ftrace_lock); ++} ++ ++static __init int ftrace_init_dyn_tracefs(struct dentry *d_tracer) ++{ ++ ++ trace_create_file("available_filter_functions", 0444, ++ d_tracer, NULL, &ftrace_avail_fops); ++ ++ trace_create_file("enabled_functions", 0444, ++ d_tracer, NULL, &ftrace_enabled_fops); ++ ++ ftrace_create_filter_files(&global_ops, d_tracer); ++ ++#ifdef CONFIG_FUNCTION_GRAPH_TRACER ++ trace_create_file("set_graph_function", 0644, d_tracer, ++ NULL, ++ &ftrace_graph_fops); ++ trace_create_file("set_graph_notrace", 0644, d_tracer, ++ NULL, ++ &ftrace_graph_notrace_fops); ++#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ ++ ++ return 0; ++} ++ ++static int ftrace_cmp_ips(const void *a, const void *b) ++{ ++ const unsigned long *ipa = a; ++ const unsigned long *ipb = b; ++ ++ if (*ipa > *ipb) ++ return 1; ++ if (*ipa < *ipb) ++ return -1; ++ return 0; ++} ++ ++static int ftrace_process_locs(struct module *mod, ++ unsigned long *start, ++ unsigned long *end) ++{ ++ struct ftrace_page *start_pg; ++ struct ftrace_page *pg; ++ struct dyn_ftrace *rec; ++ unsigned long count; ++ unsigned long *p; ++ unsigned long addr; ++ unsigned long flags = 0; /* Shut up gcc */ ++ int ret = -ENOMEM; ++ ++ count = end - start; ++ ++ if (!count) ++ return 0; ++ ++ sort(start, count, sizeof(*start), ++ ftrace_cmp_ips, NULL); ++ ++ start_pg = ftrace_allocate_pages(count); ++ if (!start_pg) ++ return -ENOMEM; ++ ++ mutex_lock(&ftrace_lock); ++ ++ /* ++ * Core and each module needs their own pages, as ++ * modules will free them when they are removed. ++ * Force a new page to be allocated for modules. ++ */ ++ if (!mod) { ++ WARN_ON(ftrace_pages || ftrace_pages_start); ++ /* First initialization */ ++ ftrace_pages = ftrace_pages_start = start_pg; ++ } else { ++ if (!ftrace_pages) ++ goto out; ++ ++ if (WARN_ON(ftrace_pages->next)) { ++ /* Hmm, we have free pages? */ ++ while (ftrace_pages->next) ++ ftrace_pages = ftrace_pages->next; ++ } ++ ++ ftrace_pages->next = start_pg; ++ } ++ ++ p = start; ++ pg = start_pg; ++ while (p < end) { ++ addr = ftrace_call_adjust(*p++); ++ /* ++ * Some architecture linkers will pad between ++ * the different mcount_loc sections of different ++ * object files to satisfy alignments. ++ * Skip any NULL pointers. ++ */ ++ if (!addr) ++ continue; ++ ++ if (pg->index == pg->size) { ++ /* We should have allocated enough */ ++ if (WARN_ON(!pg->next)) ++ break; ++ pg = pg->next; ++ } ++ ++ rec = &pg->records[pg->index++]; ++ rec->ip = addr; ++ } ++ ++ /* We should have used all pages */ ++ WARN_ON(pg->next); ++ ++ /* Assign the last page to ftrace_pages */ ++ ftrace_pages = pg; ++ ++ /* ++ * We only need to disable interrupts on start up ++ * because we are modifying code that an interrupt ++ * may execute, and the modification is not atomic. ++ * But for modules, nothing runs the code we modify ++ * until we are finished with it, and there's no ++ * reason to cause large interrupt latencies while we do it. ++ */ ++ if (!mod) ++ local_irq_save(flags); ++ ftrace_update_code(mod, start_pg); ++ if (!mod) ++ local_irq_restore(flags); ++ ret = 0; ++ out: ++ mutex_unlock(&ftrace_lock); ++ ++ return ret; ++} ++ ++struct ftrace_mod_func { ++ struct list_head list; ++ char *name; ++ unsigned long ip; ++ unsigned int size; ++}; ++ ++struct ftrace_mod_map { ++ struct rcu_head rcu; ++ struct list_head list; ++ struct module *mod; ++ unsigned long start_addr; ++ unsigned long end_addr; ++ struct list_head funcs; ++ unsigned int num_funcs; ++}; ++ ++#ifdef CONFIG_MODULES ++ ++#define next_to_ftrace_page(p) container_of(p, struct ftrace_page, next) ++ ++static LIST_HEAD(ftrace_mod_maps); ++ ++static int referenced_filters(struct dyn_ftrace *rec) ++{ ++ struct ftrace_ops *ops; ++ int cnt = 0; ++ ++ for (ops = ftrace_ops_list; ops != &ftrace_list_end; ops = ops->next) { ++ if (ops_references_rec(ops, rec)) { ++ cnt++; ++ if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) ++ rec->flags |= FTRACE_FL_REGS; ++ } ++ } ++ ++ return cnt; ++} ++ ++static void ++clear_mod_from_hash(struct ftrace_page *pg, struct ftrace_hash *hash) ++{ ++ struct ftrace_func_entry *entry; ++ struct dyn_ftrace *rec; ++ int i; ++ ++ if (ftrace_hash_empty(hash)) ++ return; ++ ++ for (i = 0; i < pg->index; i++) { ++ rec = &pg->records[i]; ++ entry = __ftrace_lookup_ip(hash, rec->ip); ++ /* ++ * Do not allow this rec to match again. ++ * Yeah, it may waste some memory, but will be removed ++ * if/when the hash is modified again. ++ */ ++ if (entry) ++ entry->ip = 0; ++ } ++} ++ ++/* Clear any records from hashs */ ++static void clear_mod_from_hashes(struct ftrace_page *pg) ++{ ++ struct trace_array *tr; ++ ++ mutex_lock(&trace_types_lock); ++ list_for_each_entry(tr, &ftrace_trace_arrays, list) { ++ if (!tr->ops || !tr->ops->func_hash) ++ continue; ++ mutex_lock(&tr->ops->func_hash->regex_lock); ++ clear_mod_from_hash(pg, tr->ops->func_hash->filter_hash); ++ clear_mod_from_hash(pg, tr->ops->func_hash->notrace_hash); ++ mutex_unlock(&tr->ops->func_hash->regex_lock); ++ } ++ mutex_unlock(&trace_types_lock); ++} ++ ++static void ftrace_free_mod_map(struct rcu_head *rcu) ++{ ++ struct ftrace_mod_map *mod_map = container_of(rcu, struct ftrace_mod_map, rcu); ++ struct ftrace_mod_func *mod_func; ++ struct ftrace_mod_func *n; ++ ++ /* All the contents of mod_map are now not visible to readers */ ++ list_for_each_entry_safe(mod_func, n, &mod_map->funcs, list) { ++ kfree(mod_func->name); ++ list_del(&mod_func->list); ++ kfree(mod_func); ++ } ++ ++ kfree(mod_map); ++} ++ ++void ftrace_release_mod(struct module *mod) ++{ ++ struct ftrace_mod_map *mod_map; ++ struct ftrace_mod_map *n; ++ struct dyn_ftrace *rec; ++ struct ftrace_page **last_pg; ++ struct ftrace_page *tmp_page = NULL; ++ struct ftrace_page *pg; ++ int order; ++ ++ mutex_lock(&ftrace_lock); ++ ++ if (ftrace_disabled) ++ goto out_unlock; ++ ++ list_for_each_entry_safe(mod_map, n, &ftrace_mod_maps, list) { ++ if (mod_map->mod == mod) { ++ list_del_rcu(&mod_map->list); ++ call_rcu_sched(&mod_map->rcu, ftrace_free_mod_map); ++ break; ++ } ++ } ++ ++ /* ++ * Each module has its own ftrace_pages, remove ++ * them from the list. ++ */ ++ last_pg = &ftrace_pages_start; ++ for (pg = ftrace_pages_start; pg; pg = *last_pg) { ++ rec = &pg->records[0]; ++ if (within_module_core(rec->ip, mod) || ++ within_module_init(rec->ip, mod)) { ++ /* ++ * As core pages are first, the first ++ * page should never be a module page. ++ */ ++ if (WARN_ON(pg == ftrace_pages_start)) ++ goto out_unlock; ++ ++ /* Check if we are deleting the last page */ ++ if (pg == ftrace_pages) ++ ftrace_pages = next_to_ftrace_page(last_pg); ++ ++ ftrace_update_tot_cnt -= pg->index; ++ *last_pg = pg->next; ++ ++ pg->next = tmp_page; ++ tmp_page = pg; ++ } else ++ last_pg = &pg->next; ++ } ++ out_unlock: ++ mutex_unlock(&ftrace_lock); ++ ++ for (pg = tmp_page; pg; pg = tmp_page) { ++ ++ /* Needs to be called outside of ftrace_lock */ ++ clear_mod_from_hashes(pg); ++ ++ order = get_count_order(pg->size / ENTRIES_PER_PAGE); ++ free_pages((unsigned long)pg->records, order); ++ tmp_page = pg->next; ++ kfree(pg); ++ } ++} ++ ++void ftrace_module_enable(struct module *mod) ++{ ++ struct dyn_ftrace *rec; ++ struct ftrace_page *pg; ++ ++ mutex_lock(&ftrace_lock); ++ ++ if (ftrace_disabled) ++ goto out_unlock; ++ ++ /* ++ * If the tracing is enabled, go ahead and enable the record. ++ * ++ * The reason not to enable the record immediatelly is the ++ * inherent check of ftrace_make_nop/ftrace_make_call for ++ * correct previous instructions. Making first the NOP ++ * conversion puts the module to the correct state, thus ++ * passing the ftrace_make_call check. ++ * ++ * We also delay this to after the module code already set the ++ * text to read-only, as we now need to set it back to read-write ++ * so that we can modify the text. ++ */ ++ if (ftrace_start_up) ++ ftrace_arch_code_modify_prepare(); ++ ++ do_for_each_ftrace_rec(pg, rec) { ++ int cnt; ++ /* ++ * do_for_each_ftrace_rec() is a double loop. ++ * module text shares the pg. If a record is ++ * not part of this module, then skip this pg, ++ * which the "break" will do. ++ */ ++ if (!within_module_core(rec->ip, mod) && ++ !within_module_init(rec->ip, mod)) ++ break; ++ ++ cnt = 0; ++ ++ /* ++ * When adding a module, we need to check if tracers are ++ * currently enabled and if they are, and can trace this record, ++ * we need to enable the module functions as well as update the ++ * reference counts for those function records. ++ */ ++ if (ftrace_start_up) ++ cnt += referenced_filters(rec); ++ ++ rec->flags &= ~FTRACE_FL_DISABLED; ++ rec->flags += cnt; ++ ++ if (ftrace_start_up && cnt) { ++ int failed = __ftrace_replace_code(rec, 1); ++ if (failed) { ++ ftrace_bug(failed, rec); ++ goto out_loop; ++ } ++ } ++ ++ } while_for_each_ftrace_rec(); ++ ++ out_loop: ++ if (ftrace_start_up) ++ ftrace_arch_code_modify_post_process(); ++ ++ out_unlock: ++ mutex_unlock(&ftrace_lock); ++ ++ process_cached_mods(mod->name); ++} ++ ++void ftrace_module_init(struct module *mod) ++{ ++ if (ftrace_disabled || !mod->num_ftrace_callsites) ++ return; ++ ++ ftrace_process_locs(mod, mod->ftrace_callsites, ++ mod->ftrace_callsites + mod->num_ftrace_callsites); ++} ++ ++static void save_ftrace_mod_rec(struct ftrace_mod_map *mod_map, ++ struct dyn_ftrace *rec) ++{ ++ struct ftrace_mod_func *mod_func; ++ unsigned long symsize; ++ unsigned long offset; ++ char str[KSYM_SYMBOL_LEN]; ++ char *modname; ++ const char *ret; ++ ++ ret = kallsyms_lookup(rec->ip, &symsize, &offset, &modname, str); ++ if (!ret) ++ return; ++ ++ mod_func = kmalloc(sizeof(*mod_func), GFP_KERNEL); ++ if (!mod_func) ++ return; ++ ++ mod_func->name = kstrdup(str, GFP_KERNEL); ++ if (!mod_func->name) { ++ kfree(mod_func); ++ return; ++ } ++ ++ mod_func->ip = rec->ip - offset; ++ mod_func->size = symsize; ++ ++ mod_map->num_funcs++; ++ ++ list_add_rcu(&mod_func->list, &mod_map->funcs); ++} ++ ++static struct ftrace_mod_map * ++allocate_ftrace_mod_map(struct module *mod, ++ unsigned long start, unsigned long end) ++{ ++ struct ftrace_mod_map *mod_map; ++ ++ mod_map = kmalloc(sizeof(*mod_map), GFP_KERNEL); ++ if (!mod_map) ++ return NULL; ++ ++ mod_map->mod = mod; ++ mod_map->start_addr = start; ++ mod_map->end_addr = end; ++ mod_map->num_funcs = 0; ++ ++ INIT_LIST_HEAD_RCU(&mod_map->funcs); ++ ++ list_add_rcu(&mod_map->list, &ftrace_mod_maps); ++ ++ return mod_map; ++} ++ ++static const char * ++ftrace_func_address_lookup(struct ftrace_mod_map *mod_map, ++ unsigned long addr, unsigned long *size, ++ unsigned long *off, char *sym) ++{ ++ struct ftrace_mod_func *found_func = NULL; ++ struct ftrace_mod_func *mod_func; ++ ++ list_for_each_entry_rcu(mod_func, &mod_map->funcs, list) { ++ if (addr >= mod_func->ip && ++ addr < mod_func->ip + mod_func->size) { ++ found_func = mod_func; ++ break; ++ } ++ } ++ ++ if (found_func) { ++ if (size) ++ *size = found_func->size; ++ if (off) ++ *off = addr - found_func->ip; ++ if (sym) ++ strlcpy(sym, found_func->name, KSYM_NAME_LEN); ++ ++ return found_func->name; ++ } ++ ++ return NULL; ++} ++ ++const char * ++ftrace_mod_address_lookup(unsigned long addr, unsigned long *size, ++ unsigned long *off, char **modname, char *sym) ++{ ++ struct ftrace_mod_map *mod_map; ++ const char *ret = NULL; ++ ++ /* mod_map is freed via call_rcu_sched() */ ++ preempt_disable(); ++ list_for_each_entry_rcu(mod_map, &ftrace_mod_maps, list) { ++ ret = ftrace_func_address_lookup(mod_map, addr, size, off, sym); ++ if (ret) { ++ if (modname) ++ *modname = mod_map->mod->name; ++ break; ++ } ++ } ++ preempt_enable(); ++ ++ return ret; ++} ++ ++int ftrace_mod_get_kallsym(unsigned int symnum, unsigned long *value, ++ char *type, char *name, ++ char *module_name, int *exported) ++{ ++ struct ftrace_mod_map *mod_map; ++ struct ftrace_mod_func *mod_func; ++ ++ preempt_disable(); ++ list_for_each_entry_rcu(mod_map, &ftrace_mod_maps, list) { ++ ++ if (symnum >= mod_map->num_funcs) { ++ symnum -= mod_map->num_funcs; ++ continue; ++ } ++ ++ list_for_each_entry_rcu(mod_func, &mod_map->funcs, list) { ++ if (symnum > 1) { ++ symnum--; ++ continue; ++ } ++ ++ *value = mod_func->ip; ++ *type = 'T'; ++ strlcpy(name, mod_func->name, KSYM_NAME_LEN); ++ strlcpy(module_name, mod_map->mod->name, MODULE_NAME_LEN); ++ *exported = 1; ++ preempt_enable(); ++ return 0; ++ } ++ WARN_ON(1); ++ break; ++ } ++ preempt_enable(); ++ return -ERANGE; ++} ++ ++#else ++static void save_ftrace_mod_rec(struct ftrace_mod_map *mod_map, ++ struct dyn_ftrace *rec) { } ++static inline struct ftrace_mod_map * ++allocate_ftrace_mod_map(struct module *mod, ++ unsigned long start, unsigned long end) ++{ ++ return NULL; ++} ++#endif /* CONFIG_MODULES */ ++ ++struct ftrace_init_func { ++ struct list_head list; ++ unsigned long ip; ++}; ++ ++/* Clear any init ips from hashes */ ++static void ++clear_func_from_hash(struct ftrace_init_func *func, struct ftrace_hash *hash) ++{ ++ struct ftrace_func_entry *entry; ++ ++ if (ftrace_hash_empty(hash)) ++ return; ++ ++ entry = __ftrace_lookup_ip(hash, func->ip); ++ ++ /* ++ * Do not allow this rec to match again. ++ * Yeah, it may waste some memory, but will be removed ++ * if/when the hash is modified again. ++ */ ++ if (entry) ++ entry->ip = 0; ++} ++ ++static void ++clear_func_from_hashes(struct ftrace_init_func *func) ++{ ++ struct trace_array *tr; ++ ++ mutex_lock(&trace_types_lock); ++ list_for_each_entry(tr, &ftrace_trace_arrays, list) { ++ if (!tr->ops || !tr->ops->func_hash) ++ continue; ++ mutex_lock(&tr->ops->func_hash->regex_lock); ++ clear_func_from_hash(func, tr->ops->func_hash->filter_hash); ++ clear_func_from_hash(func, tr->ops->func_hash->notrace_hash); ++ mutex_unlock(&tr->ops->func_hash->regex_lock); ++ } ++ mutex_unlock(&trace_types_lock); ++} ++ ++static void add_to_clear_hash_list(struct list_head *clear_list, ++ struct dyn_ftrace *rec) ++{ ++ struct ftrace_init_func *func; ++ ++ func = kmalloc(sizeof(*func), GFP_KERNEL); ++ if (!func) { ++ WARN_ONCE(1, "alloc failure, ftrace filter could be stale\n"); ++ return; ++ } ++ ++ func->ip = rec->ip; ++ list_add(&func->list, clear_list); ++} ++ ++void ftrace_free_mem(struct module *mod, void *start_ptr, void *end_ptr) ++{ ++ unsigned long start = (unsigned long)(start_ptr); ++ unsigned long end = (unsigned long)(end_ptr); ++ struct ftrace_page **last_pg = &ftrace_pages_start; ++ struct ftrace_page *pg; ++ struct dyn_ftrace *rec; ++ struct dyn_ftrace key; ++ struct ftrace_mod_map *mod_map = NULL; ++ struct ftrace_init_func *func, *func_next; ++ struct list_head clear_hash; ++ int order; ++ ++ INIT_LIST_HEAD(&clear_hash); ++ ++ key.ip = start; ++ key.flags = end; /* overload flags, as it is unsigned long */ ++ ++ mutex_lock(&ftrace_lock); ++ ++ /* ++ * If we are freeing module init memory, then check if ++ * any tracer is active. If so, we need to save a mapping of ++ * the module functions being freed with the address. ++ */ ++ if (mod && ftrace_ops_list != &ftrace_list_end) ++ mod_map = allocate_ftrace_mod_map(mod, start, end); ++ ++ for (pg = ftrace_pages_start; pg; last_pg = &pg->next, pg = *last_pg) { ++ if (end < pg->records[0].ip || ++ start >= (pg->records[pg->index - 1].ip + MCOUNT_INSN_SIZE)) ++ continue; ++ again: ++ rec = bsearch(&key, pg->records, pg->index, ++ sizeof(struct dyn_ftrace), ++ ftrace_cmp_recs); ++ if (!rec) ++ continue; ++ ++ /* rec will be cleared from hashes after ftrace_lock unlock */ ++ add_to_clear_hash_list(&clear_hash, rec); ++ ++ if (mod_map) ++ save_ftrace_mod_rec(mod_map, rec); ++ ++ pg->index--; ++ ftrace_update_tot_cnt--; ++ if (!pg->index) { ++ *last_pg = pg->next; ++ order = get_count_order(pg->size / ENTRIES_PER_PAGE); ++ free_pages((unsigned long)pg->records, order); ++ kfree(pg); ++ pg = container_of(last_pg, struct ftrace_page, next); ++ if (!(*last_pg)) ++ ftrace_pages = pg; ++ continue; ++ } ++ memmove(rec, rec + 1, ++ (pg->index - (rec - pg->records)) * sizeof(*rec)); ++ /* More than one function may be in this block */ ++ goto again; ++ } ++ mutex_unlock(&ftrace_lock); ++ ++ list_for_each_entry_safe(func, func_next, &clear_hash, list) { ++ clear_func_from_hashes(func); ++ kfree(func); ++ } ++} ++ ++void __init ftrace_free_init_mem(void) ++{ ++ void *start = (void *)(&__init_begin); ++ void *end = (void *)(&__init_end); ++ ++ ftrace_free_mem(NULL, start, end); ++} ++ ++void __init ftrace_init(void) ++{ ++ extern unsigned long __start_mcount_loc[]; ++ extern unsigned long __stop_mcount_loc[]; ++ unsigned long count, flags; ++ int ret; ++ ++ local_irq_save(flags); ++ ret = ftrace_dyn_arch_init(); ++ local_irq_restore(flags); ++ if (ret) ++ goto failed; ++ ++ count = __stop_mcount_loc - __start_mcount_loc; ++ if (!count) { ++ pr_info("ftrace: No functions to be traced?\n"); ++ goto failed; ++ } ++ ++ pr_info("ftrace: allocating %ld entries in %ld pages\n", ++ count, count / ENTRIES_PER_PAGE + 1); ++ ++ last_ftrace_enabled = ftrace_enabled = 1; ++ ++ ret = ftrace_process_locs(NULL, ++ __start_mcount_loc, ++ __stop_mcount_loc); ++ ++ set_ftrace_early_filters(); ++ ++ return; ++ failed: ++ ftrace_disabled = 1; ++} ++ ++/* Do nothing if arch does not support this */ ++void __weak arch_ftrace_update_trampoline(struct ftrace_ops *ops) ++{ ++} ++ ++static void ftrace_update_trampoline(struct ftrace_ops *ops) ++{ ++ arch_ftrace_update_trampoline(ops); ++} ++ ++void ftrace_init_trace_array(struct trace_array *tr) ++{ ++ INIT_LIST_HEAD(&tr->func_probes); ++ INIT_LIST_HEAD(&tr->mod_trace); ++ INIT_LIST_HEAD(&tr->mod_notrace); ++} ++#else ++ ++static struct ftrace_ops global_ops = { ++ .func = ftrace_stub, ++ .flags = FTRACE_OPS_FL_RECURSION_SAFE | ++ FTRACE_OPS_FL_INITIALIZED | ++ FTRACE_OPS_FL_PID, ++}; ++ ++static int __init ftrace_nodyn_init(void) ++{ ++ ftrace_enabled = 1; ++ return 0; ++} ++core_initcall(ftrace_nodyn_init); ++ ++static inline int ftrace_init_dyn_tracefs(struct dentry *d_tracer) { return 0; } ++static inline void ftrace_startup_enable(int command) { } ++static inline void ftrace_startup_all(int command) { } ++/* Keep as macros so we do not need to define the commands */ ++# define ftrace_startup(ops, command) \ ++ ({ \ ++ int ___ret = __register_ftrace_function(ops); \ ++ if (!___ret) \ ++ (ops)->flags |= FTRACE_OPS_FL_ENABLED; \ ++ ___ret; \ ++ }) ++# define ftrace_shutdown(ops, command) \ ++ ({ \ ++ int ___ret = __unregister_ftrace_function(ops); \ ++ if (!___ret) \ ++ (ops)->flags &= ~FTRACE_OPS_FL_ENABLED; \ ++ ___ret; \ ++ }) ++ ++# define ftrace_startup_sysctl() do { } while (0) ++# define ftrace_shutdown_sysctl() do { } while (0) ++ ++static inline int ++ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs) ++{ ++ return 1; ++} ++ ++static void ftrace_update_trampoline(struct ftrace_ops *ops) ++{ ++} ++ ++#endif /* CONFIG_DYNAMIC_FTRACE */ ++ ++__init void ftrace_init_global_array_ops(struct trace_array *tr) ++{ ++ tr->ops = &global_ops; ++ tr->ops->private = tr; ++ ftrace_init_trace_array(tr); ++} ++ ++void ftrace_init_array_ops(struct trace_array *tr, ftrace_func_t func) ++{ ++ /* If we filter on pids, update to use the pid function */ ++ if (tr->flags & TRACE_ARRAY_FL_GLOBAL) { ++ if (WARN_ON(tr->ops->func != ftrace_stub)) ++ printk("ftrace ops had %pS for function\n", ++ tr->ops->func); ++ } ++ tr->ops->func = func; ++ tr->ops->private = tr; ++} ++ ++void ftrace_reset_array_ops(struct trace_array *tr) ++{ ++ tr->ops->func = ftrace_stub; ++} ++ ++static nokprobe_inline void ++__ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, ++ struct ftrace_ops *ignored, struct pt_regs *regs) ++{ ++ struct ftrace_ops *op; ++ int bit; ++ ++ bit = trace_test_and_set_recursion(TRACE_LIST_START, TRACE_LIST_MAX); ++ if (bit < 0) ++ return; ++ ++ /* ++ * Some of the ops may be dynamically allocated, ++ * they must be freed after a synchronize_sched(). ++ */ ++ preempt_disable_notrace(); ++ ++ do_for_each_ftrace_op(op, ftrace_ops_list) { ++ /* ++ * Check the following for each ops before calling their func: ++ * if RCU flag is set, then rcu_is_watching() must be true ++ * if PER_CPU is set, then ftrace_function_local_disable() ++ * must be false ++ * Otherwise test if the ip matches the ops filter ++ * ++ * If any of the above fails then the op->func() is not executed. ++ */ ++ if ((!(op->flags & FTRACE_OPS_FL_RCU) || rcu_is_watching()) && ++ ftrace_ops_test(op, ip, regs)) { ++ if (FTRACE_WARN_ON(!op->func)) { ++ pr_warn("op=%p %pS\n", op, op); ++ goto out; ++ } ++ op->func(ip, parent_ip, op, regs); ++ } ++ } while_for_each_ftrace_op(op); ++out: ++ preempt_enable_notrace(); ++ trace_clear_recursion(bit); ++} ++ ++/* ++ * Some archs only support passing ip and parent_ip. Even though ++ * the list function ignores the op parameter, we do not want any ++ * C side effects, where a function is called without the caller ++ * sending a third parameter. ++ * Archs are to support both the regs and ftrace_ops at the same time. ++ * If they support ftrace_ops, it is assumed they support regs. ++ * If call backs want to use regs, they must either check for regs ++ * being NULL, or CONFIG_DYNAMIC_FTRACE_WITH_REGS. ++ * Note, CONFIG_DYNAMIC_FTRACE_WITH_REGS expects a full regs to be saved. ++ * An architecture can pass partial regs with ftrace_ops and still ++ * set the ARCH_SUPPORTS_FTRACE_OPS. ++ */ ++#if ARCH_SUPPORTS_FTRACE_OPS ++static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, ++ struct ftrace_ops *op, struct pt_regs *regs) ++{ ++ __ftrace_ops_list_func(ip, parent_ip, NULL, regs); ++} ++NOKPROBE_SYMBOL(ftrace_ops_list_func); ++#else ++static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip) ++{ ++ __ftrace_ops_list_func(ip, parent_ip, NULL, NULL); ++} ++NOKPROBE_SYMBOL(ftrace_ops_no_ops); ++#endif ++ ++/* ++ * If there's only one function registered but it does not support ++ * recursion, needs RCU protection and/or requires per cpu handling, then ++ * this function will be called by the mcount trampoline. ++ */ ++static void ftrace_ops_assist_func(unsigned long ip, unsigned long parent_ip, ++ struct ftrace_ops *op, struct pt_regs *regs) ++{ ++ int bit; ++ ++ bit = trace_test_and_set_recursion(TRACE_LIST_START, TRACE_LIST_MAX); ++ if (bit < 0) ++ return; ++ ++ preempt_disable_notrace(); ++ ++ if (!(op->flags & FTRACE_OPS_FL_RCU) || rcu_is_watching()) ++ op->func(ip, parent_ip, op, regs); ++ ++ preempt_enable_notrace(); ++ trace_clear_recursion(bit); ++} ++NOKPROBE_SYMBOL(ftrace_ops_assist_func); ++ ++/** ++ * ftrace_ops_get_func - get the function a trampoline should call ++ * @ops: the ops to get the function for ++ * ++ * Normally the mcount trampoline will call the ops->func, but there ++ * are times that it should not. For example, if the ops does not ++ * have its own recursion protection, then it should call the ++ * ftrace_ops_assist_func() instead. ++ * ++ * Returns the function that the trampoline should call for @ops. ++ */ ++ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops) ++{ ++ /* ++ * If the function does not handle recursion, needs to be RCU safe, ++ * or does per cpu logic, then we need to call the assist handler. ++ */ ++ if (!(ops->flags & FTRACE_OPS_FL_RECURSION_SAFE) || ++ ops->flags & FTRACE_OPS_FL_RCU) ++ return ftrace_ops_assist_func; ++ ++ return ops->func; ++} ++ ++static void ++ftrace_filter_pid_sched_switch_probe(void *data, bool preempt, ++ struct task_struct *prev, struct task_struct *next) ++{ ++ struct trace_array *tr = data; ++ struct trace_pid_list *pid_list; ++ ++ pid_list = rcu_dereference_sched(tr->function_pids); ++ ++ this_cpu_write(tr->trace_buffer.data->ftrace_ignore_pid, ++ trace_ignore_this_task(pid_list, next)); ++} ++ ++static void ++ftrace_pid_follow_sched_process_fork(void *data, ++ struct task_struct *self, ++ struct task_struct *task) ++{ ++ struct trace_pid_list *pid_list; ++ struct trace_array *tr = data; ++ ++ pid_list = rcu_dereference_sched(tr->function_pids); ++ trace_filter_add_remove_task(pid_list, self, task); ++} ++ ++static void ++ftrace_pid_follow_sched_process_exit(void *data, struct task_struct *task) ++{ ++ struct trace_pid_list *pid_list; ++ struct trace_array *tr = data; ++ ++ pid_list = rcu_dereference_sched(tr->function_pids); ++ trace_filter_add_remove_task(pid_list, NULL, task); ++} ++ ++void ftrace_pid_follow_fork(struct trace_array *tr, bool enable) ++{ ++ if (enable) { ++ register_trace_sched_process_fork(ftrace_pid_follow_sched_process_fork, ++ tr); ++ register_trace_sched_process_exit(ftrace_pid_follow_sched_process_exit, ++ tr); ++ } else { ++ unregister_trace_sched_process_fork(ftrace_pid_follow_sched_process_fork, ++ tr); ++ unregister_trace_sched_process_exit(ftrace_pid_follow_sched_process_exit, ++ tr); ++ } ++} ++ ++static void clear_ftrace_pids(struct trace_array *tr) ++{ ++ struct trace_pid_list *pid_list; ++ int cpu; ++ ++ pid_list = rcu_dereference_protected(tr->function_pids, ++ lockdep_is_held(&ftrace_lock)); ++ if (!pid_list) ++ return; ++ ++ unregister_trace_sched_switch(ftrace_filter_pid_sched_switch_probe, tr); ++ ++ for_each_possible_cpu(cpu) ++ per_cpu_ptr(tr->trace_buffer.data, cpu)->ftrace_ignore_pid = false; ++ ++ rcu_assign_pointer(tr->function_pids, NULL); ++ ++ /* Wait till all users are no longer using pid filtering */ ++ synchronize_sched(); ++ ++ trace_free_pid_list(pid_list); ++} ++ ++void ftrace_clear_pids(struct trace_array *tr) ++{ ++ mutex_lock(&ftrace_lock); ++ ++ clear_ftrace_pids(tr); ++ ++ mutex_unlock(&ftrace_lock); ++} ++ ++static void ftrace_pid_reset(struct trace_array *tr) ++{ ++ mutex_lock(&ftrace_lock); ++ clear_ftrace_pids(tr); ++ ++ ftrace_update_pid_func(); ++ ftrace_startup_all(0); ++ ++ mutex_unlock(&ftrace_lock); ++} ++ ++/* Greater than any max PID */ ++#define FTRACE_NO_PIDS (void *)(PID_MAX_LIMIT + 1) ++ ++static void *fpid_start(struct seq_file *m, loff_t *pos) ++ __acquires(RCU) ++{ ++ struct trace_pid_list *pid_list; ++ struct trace_array *tr = m->private; ++ ++ mutex_lock(&ftrace_lock); ++ rcu_read_lock_sched(); ++ ++ pid_list = rcu_dereference_sched(tr->function_pids); ++ ++ if (!pid_list) ++ return !(*pos) ? FTRACE_NO_PIDS : NULL; ++ ++ return trace_pid_start(pid_list, pos); ++} ++ ++static void *fpid_next(struct seq_file *m, void *v, loff_t *pos) ++{ ++ struct trace_array *tr = m->private; ++ struct trace_pid_list *pid_list = rcu_dereference_sched(tr->function_pids); ++ ++ if (v == FTRACE_NO_PIDS) ++ return NULL; ++ ++ return trace_pid_next(pid_list, v, pos); ++} ++ ++static void fpid_stop(struct seq_file *m, void *p) ++ __releases(RCU) ++{ ++ rcu_read_unlock_sched(); ++ mutex_unlock(&ftrace_lock); ++} ++ ++static int fpid_show(struct seq_file *m, void *v) ++{ ++ if (v == FTRACE_NO_PIDS) { ++ seq_puts(m, "no pid\n"); ++ return 0; ++ } ++ ++ return trace_pid_show(m, v); ++} ++ ++static const struct seq_operations ftrace_pid_sops = { ++ .start = fpid_start, ++ .next = fpid_next, ++ .stop = fpid_stop, ++ .show = fpid_show, ++}; ++ ++static int ++ftrace_pid_open(struct inode *inode, struct file *file) ++{ ++ struct trace_array *tr = inode->i_private; ++ struct seq_file *m; ++ int ret = 0; ++ ++ if (trace_array_get(tr) < 0) ++ return -ENODEV; ++ ++ if ((file->f_mode & FMODE_WRITE) && ++ (file->f_flags & O_TRUNC)) ++ ftrace_pid_reset(tr); ++ ++ ret = seq_open(file, &ftrace_pid_sops); ++ if (ret < 0) { ++ trace_array_put(tr); ++ } else { ++ m = file->private_data; ++ /* copy tr over to seq ops */ ++ m->private = tr; ++ } ++ ++ return ret; ++} ++ ++static void ignore_task_cpu(void *data) ++{ ++ struct trace_array *tr = data; ++ struct trace_pid_list *pid_list; ++ ++ /* ++ * This function is called by on_each_cpu() while the ++ * event_mutex is held. ++ */ ++ pid_list = rcu_dereference_protected(tr->function_pids, ++ mutex_is_locked(&ftrace_lock)); ++ ++ this_cpu_write(tr->trace_buffer.data->ftrace_ignore_pid, ++ trace_ignore_this_task(pid_list, current)); ++} ++ ++static ssize_t ++ftrace_pid_write(struct file *filp, const char __user *ubuf, ++ size_t cnt, loff_t *ppos) ++{ ++ struct seq_file *m = filp->private_data; ++ struct trace_array *tr = m->private; ++ struct trace_pid_list *filtered_pids = NULL; ++ struct trace_pid_list *pid_list; ++ ssize_t ret; ++ ++ if (!cnt) ++ return 0; ++ ++ mutex_lock(&ftrace_lock); ++ ++ filtered_pids = rcu_dereference_protected(tr->function_pids, ++ lockdep_is_held(&ftrace_lock)); ++ ++ ret = trace_pid_write(filtered_pids, &pid_list, ubuf, cnt); ++ if (ret < 0) ++ goto out; ++ ++ rcu_assign_pointer(tr->function_pids, pid_list); ++ ++ if (filtered_pids) { ++ synchronize_sched(); ++ trace_free_pid_list(filtered_pids); ++ } else if (pid_list) { ++ /* Register a probe to set whether to ignore the tracing of a task */ ++ register_trace_sched_switch(ftrace_filter_pid_sched_switch_probe, tr); ++ } ++ ++ /* ++ * Ignoring of pids is done at task switch. But we have to ++ * check for those tasks that are currently running. ++ * Always do this in case a pid was appended or removed. ++ */ ++ on_each_cpu(ignore_task_cpu, tr, 1); ++ ++ ftrace_update_pid_func(); ++ ftrace_startup_all(0); ++ out: ++ mutex_unlock(&ftrace_lock); ++ ++ if (ret > 0) ++ *ppos += ret; ++ ++ return ret; ++} ++ ++static int ++ftrace_pid_release(struct inode *inode, struct file *file) ++{ ++ struct trace_array *tr = inode->i_private; ++ ++ trace_array_put(tr); ++ ++ return seq_release(inode, file); ++} ++ ++static const struct file_operations ftrace_pid_fops = { ++ .open = ftrace_pid_open, ++ .write = ftrace_pid_write, ++ .read = seq_read, ++ .llseek = tracing_lseek, ++ .release = ftrace_pid_release, ++}; ++ ++void ftrace_init_tracefs(struct trace_array *tr, struct dentry *d_tracer) ++{ ++ trace_create_file("set_ftrace_pid", 0644, d_tracer, ++ tr, &ftrace_pid_fops); ++} ++ ++void __init ftrace_init_tracefs_toplevel(struct trace_array *tr, ++ struct dentry *d_tracer) ++{ ++ /* Only the top level directory has the dyn_tracefs and profile */ ++ WARN_ON(!(tr->flags & TRACE_ARRAY_FL_GLOBAL)); ++ ++ ftrace_init_dyn_tracefs(d_tracer); ++ ftrace_profile_tracefs(d_tracer); ++} ++ ++/** ++ * ftrace_kill - kill ftrace ++ * ++ * This function should be used by panic code. It stops ftrace ++ * but in a not so nice way. If you need to simply kill ftrace ++ * from a non-atomic section, use ftrace_kill. ++ */ ++void ftrace_kill(void) ++{ ++ ftrace_disabled = 1; ++ ftrace_enabled = 0; ++ ftrace_trace_function = ftrace_stub; ++} ++ ++/** ++ * Test if ftrace is dead or not. ++ */ ++int ftrace_is_dead(void) ++{ ++ return ftrace_disabled; ++} ++ ++/** ++ * register_ftrace_function - register a function for profiling ++ * @ops - ops structure that holds the function for profiling. ++ * ++ * Register a function to be called by all functions in the ++ * kernel. ++ * ++ * Note: @ops->func and all the functions it calls must be labeled ++ * with "notrace", otherwise it will go into a ++ * recursive loop. ++ */ ++int register_ftrace_function(struct ftrace_ops *ops) ++{ ++ int ret = -1; ++ ++ ftrace_ops_init(ops); ++ ++ mutex_lock(&ftrace_lock); ++ ++ ret = ftrace_startup(ops, 0); ++ ++ mutex_unlock(&ftrace_lock); ++ ++ return ret; ++} ++EXPORT_SYMBOL_GPL(register_ftrace_function); ++ ++/** ++ * unregister_ftrace_function - unregister a function for profiling. ++ * @ops - ops structure that holds the function to unregister ++ * ++ * Unregister a function that was added to be called by ftrace profiling. ++ */ ++int unregister_ftrace_function(struct ftrace_ops *ops) ++{ ++ int ret; ++ ++ mutex_lock(&ftrace_lock); ++ ret = ftrace_shutdown(ops, 0); ++ mutex_unlock(&ftrace_lock); ++ ++ return ret; ++} ++EXPORT_SYMBOL_GPL(unregister_ftrace_function); ++ ++int ++ftrace_enable_sysctl(struct ctl_table *table, int write, ++ void __user *buffer, size_t *lenp, ++ loff_t *ppos) ++{ ++ int ret = -ENODEV; ++ ++ mutex_lock(&ftrace_lock); ++ ++ if (unlikely(ftrace_disabled)) ++ goto out; ++ ++ ret = proc_dointvec(table, write, buffer, lenp, ppos); ++ ++ if (ret || !write || (last_ftrace_enabled == !!ftrace_enabled)) ++ goto out; ++ ++ last_ftrace_enabled = !!ftrace_enabled; ++ ++ if (ftrace_enabled) { ++ ++ /* we are starting ftrace again */ ++ if (rcu_dereference_protected(ftrace_ops_list, ++ lockdep_is_held(&ftrace_lock)) != &ftrace_list_end) ++ update_ftrace_function(); ++ ++ ftrace_startup_sysctl(); ++ ++ } else { ++ /* stopping ftrace calls (just send to ftrace_stub) */ ++ ftrace_trace_function = ftrace_stub; ++ ++ ftrace_shutdown_sysctl(); ++ } ++ ++ out: ++ mutex_unlock(&ftrace_lock); ++ return ret; ++} ++ ++#ifdef CONFIG_FUNCTION_GRAPH_TRACER ++ ++static struct ftrace_ops graph_ops = { ++ .func = ftrace_stub, ++ .flags = FTRACE_OPS_FL_RECURSION_SAFE | ++ FTRACE_OPS_FL_INITIALIZED | ++ FTRACE_OPS_FL_PID | ++ FTRACE_OPS_FL_STUB, ++#ifdef FTRACE_GRAPH_TRAMP_ADDR ++ .trampoline = FTRACE_GRAPH_TRAMP_ADDR, ++ /* trampoline_size is only needed for dynamically allocated tramps */ ++#endif ++ ASSIGN_OPS_HASH(graph_ops, &global_ops.local_hash) ++}; ++ ++void ftrace_graph_sleep_time_control(bool enable) ++{ ++ fgraph_sleep_time = enable; ++} ++ ++void ftrace_graph_graph_time_control(bool enable) ++{ ++ fgraph_graph_time = enable; ++} ++ ++int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace) ++{ ++ return 0; ++} ++ ++/* The callbacks that hook a function */ ++trace_func_graph_ret_t ftrace_graph_return = ++ (trace_func_graph_ret_t)ftrace_stub; ++trace_func_graph_ent_t ftrace_graph_entry = ftrace_graph_entry_stub; ++static trace_func_graph_ent_t __ftrace_graph_entry = ftrace_graph_entry_stub; ++ ++/* Try to assign a return stack array on FTRACE_RETSTACK_ALLOC_SIZE tasks. */ ++static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list) ++{ ++ int i; ++ int ret = 0; ++ int start = 0, end = FTRACE_RETSTACK_ALLOC_SIZE; ++ struct task_struct *g, *t; ++ ++ for (i = 0; i < FTRACE_RETSTACK_ALLOC_SIZE; i++) { ++ ret_stack_list[i] = ++ kmalloc_array(FTRACE_RETFUNC_DEPTH, ++ sizeof(struct ftrace_ret_stack), ++ GFP_KERNEL); ++ if (!ret_stack_list[i]) { ++ start = 0; ++ end = i; ++ ret = -ENOMEM; ++ goto free; ++ } ++ } ++ ++ read_lock(&tasklist_lock); ++ do_each_thread(g, t) { ++ if (start == end) { ++ ret = -EAGAIN; ++ goto unlock; ++ } ++ ++ if (t->ret_stack == NULL) { ++ atomic_set(&t->tracing_graph_pause, 0); ++ atomic_set(&t->trace_overrun, 0); ++ t->curr_ret_stack = -1; ++ t->curr_ret_depth = -1; ++ /* Make sure the tasks see the -1 first: */ ++ smp_wmb(); ++ t->ret_stack = ret_stack_list[start++]; ++ } ++ } while_each_thread(g, t); ++ ++unlock: ++ read_unlock(&tasklist_lock); ++free: ++ for (i = start; i < end; i++) ++ kfree(ret_stack_list[i]); ++ return ret; ++} ++ ++static void ++ftrace_graph_probe_sched_switch(void *ignore, bool preempt, ++ struct task_struct *prev, struct task_struct *next) ++{ ++ unsigned long long timestamp; ++ int index; ++ ++ /* ++ * Does the user want to count the time a function was asleep. ++ * If so, do not update the time stamps. ++ */ ++ if (fgraph_sleep_time) ++ return; ++ ++ timestamp = trace_clock_local(); ++ ++ prev->ftrace_timestamp = timestamp; ++ ++ /* only process tasks that we timestamped */ ++ if (!next->ftrace_timestamp) ++ return; ++ ++ /* ++ * Update all the counters in next to make up for the ++ * time next was sleeping. ++ */ ++ timestamp -= next->ftrace_timestamp; ++ ++ for (index = next->curr_ret_stack; index >= 0; index--) ++ next->ret_stack[index].calltime += timestamp; ++} ++ ++/* Allocate a return stack for each task */ ++static int start_graph_tracing(void) ++{ ++ struct ftrace_ret_stack **ret_stack_list; ++ int ret, cpu; ++ ++ ret_stack_list = kmalloc_array(FTRACE_RETSTACK_ALLOC_SIZE, ++ sizeof(struct ftrace_ret_stack *), ++ GFP_KERNEL); ++ ++ if (!ret_stack_list) ++ return -ENOMEM; ++ ++ /* The cpu_boot init_task->ret_stack will never be freed */ ++ for_each_online_cpu(cpu) { ++ if (!idle_task(cpu)->ret_stack) ++ ftrace_graph_init_idle_task(idle_task(cpu), cpu); ++ } ++ ++ do { ++ ret = alloc_retstack_tasklist(ret_stack_list); ++ } while (ret == -EAGAIN); ++ ++ if (!ret) { ++ ret = register_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL); ++ if (ret) ++ pr_info("ftrace_graph: Couldn't activate tracepoint" ++ " probe to kernel_sched_switch\n"); ++ } ++ ++ kfree(ret_stack_list); ++ return ret; ++} ++ ++/* ++ * Hibernation protection. ++ * The state of the current task is too much unstable during ++ * suspend/restore to disk. We want to protect against that. ++ */ ++static int ++ftrace_suspend_notifier_call(struct notifier_block *bl, unsigned long state, ++ void *unused) ++{ ++ switch (state) { ++ case PM_HIBERNATION_PREPARE: ++ pause_graph_tracing(); ++ break; ++ ++ case PM_POST_HIBERNATION: ++ unpause_graph_tracing(); ++ break; ++ } ++ return NOTIFY_DONE; ++} ++ ++static int ftrace_graph_entry_test(struct ftrace_graph_ent *trace) ++{ ++ if (!ftrace_ops_test(&global_ops, trace->func, NULL)) ++ return 0; ++ return __ftrace_graph_entry(trace); ++} ++ ++/* ++ * The function graph tracer should only trace the functions defined ++ * by set_ftrace_filter and set_ftrace_notrace. If another function ++ * tracer ops is registered, the graph tracer requires testing the ++ * function against the global ops, and not just trace any function ++ * that any ftrace_ops registered. ++ */ ++static void update_function_graph_func(void) ++{ ++ struct ftrace_ops *op; ++ bool do_test = false; ++ ++ /* ++ * The graph and global ops share the same set of functions ++ * to test. If any other ops is on the list, then ++ * the graph tracing needs to test if its the function ++ * it should call. ++ */ ++ do_for_each_ftrace_op(op, ftrace_ops_list) { ++ if (op != &global_ops && op != &graph_ops && ++ op != &ftrace_list_end) { ++ do_test = true; ++ /* in double loop, break out with goto */ ++ goto out; ++ } ++ } while_for_each_ftrace_op(op); ++ out: ++ if (do_test) ++ ftrace_graph_entry = ftrace_graph_entry_test; ++ else ++ ftrace_graph_entry = __ftrace_graph_entry; ++} ++ ++static struct notifier_block ftrace_suspend_notifier = { ++ .notifier_call = ftrace_suspend_notifier_call, ++}; ++ ++int register_ftrace_graph(trace_func_graph_ret_t retfunc, ++ trace_func_graph_ent_t entryfunc) ++{ ++ int ret = 0; ++ ++ mutex_lock(&ftrace_lock); ++ ++ /* we currently allow only one tracer registered at a time */ ++ if (ftrace_graph_active) { ++ ret = -EBUSY; ++ goto out; ++ } ++ ++ register_pm_notifier(&ftrace_suspend_notifier); ++ ++ ftrace_graph_active++; ++ ret = start_graph_tracing(); ++ if (ret) { ++ ftrace_graph_active--; ++ goto out; ++ } ++ ++ ftrace_graph_return = retfunc; ++ ++ /* ++ * Update the indirect function to the entryfunc, and the ++ * function that gets called to the entry_test first. Then ++ * call the update fgraph entry function to determine if ++ * the entryfunc should be called directly or not. ++ */ ++ __ftrace_graph_entry = entryfunc; ++ ftrace_graph_entry = ftrace_graph_entry_test; ++ update_function_graph_func(); ++ ++ ret = ftrace_startup(&graph_ops, FTRACE_START_FUNC_RET); ++out: ++ mutex_unlock(&ftrace_lock); ++ return ret; ++} ++ ++void unregister_ftrace_graph(void) ++{ ++ mutex_lock(&ftrace_lock); ++ ++ if (unlikely(!ftrace_graph_active)) ++ goto out; ++ ++ ftrace_graph_active--; ++ ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub; ++ ftrace_graph_entry = ftrace_graph_entry_stub; ++ __ftrace_graph_entry = ftrace_graph_entry_stub; ++ ftrace_shutdown(&graph_ops, FTRACE_STOP_FUNC_RET); ++ unregister_pm_notifier(&ftrace_suspend_notifier); ++ unregister_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL); ++ ++ out: ++ mutex_unlock(&ftrace_lock); ++} ++ ++static DEFINE_PER_CPU(struct ftrace_ret_stack *, idle_ret_stack); ++ ++static void ++graph_init_task(struct task_struct *t, struct ftrace_ret_stack *ret_stack) ++{ ++ atomic_set(&t->tracing_graph_pause, 0); ++ atomic_set(&t->trace_overrun, 0); ++ t->ftrace_timestamp = 0; ++ /* make curr_ret_stack visible before we add the ret_stack */ ++ smp_wmb(); ++ t->ret_stack = ret_stack; ++} ++ ++/* ++ * Allocate a return stack for the idle task. May be the first ++ * time through, or it may be done by CPU hotplug online. ++ */ ++void ftrace_graph_init_idle_task(struct task_struct *t, int cpu) ++{ ++ t->curr_ret_stack = -1; ++ t->curr_ret_depth = -1; ++ /* ++ * The idle task has no parent, it either has its own ++ * stack or no stack at all. ++ */ ++ if (t->ret_stack) ++ WARN_ON(t->ret_stack != per_cpu(idle_ret_stack, cpu)); ++ ++ if (ftrace_graph_active) { ++ struct ftrace_ret_stack *ret_stack; ++ ++ ret_stack = per_cpu(idle_ret_stack, cpu); ++ if (!ret_stack) { ++ ret_stack = ++ kmalloc_array(FTRACE_RETFUNC_DEPTH, ++ sizeof(struct ftrace_ret_stack), ++ GFP_KERNEL); ++ if (!ret_stack) ++ return; ++ per_cpu(idle_ret_stack, cpu) = ret_stack; ++ } ++ graph_init_task(t, ret_stack); ++ } ++} ++ ++/* Allocate a return stack for newly created task */ ++void ftrace_graph_init_task(struct task_struct *t) ++{ ++ /* Make sure we do not use the parent ret_stack */ ++ t->ret_stack = NULL; ++ t->curr_ret_stack = -1; ++ t->curr_ret_depth = -1; ++ ++ if (ftrace_graph_active) { ++ struct ftrace_ret_stack *ret_stack; ++ ++ ret_stack = kmalloc_array(FTRACE_RETFUNC_DEPTH, ++ sizeof(struct ftrace_ret_stack), ++ GFP_KERNEL); ++ if (!ret_stack) ++ return; ++ graph_init_task(t, ret_stack); ++ } ++} ++ ++void ftrace_graph_exit_task(struct task_struct *t) ++{ ++ struct ftrace_ret_stack *ret_stack = t->ret_stack; ++ ++ t->ret_stack = NULL; ++ /* NULL must become visible to IRQs before we free it: */ ++ barrier(); ++ ++ kfree(ret_stack); ++} ++#endif +diff -uprN kernel/kernel/trace/Kconfig kernel_new/kernel/trace/Kconfig +--- kernel/kernel/trace/Kconfig 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/kernel/trace/Kconfig 2021-04-01 18:28:07.814863110 +0800 +@@ -525,6 +525,7 @@ config DYNAMIC_FTRACE + bool "enable/disable function tracing dynamically" + depends on FUNCTION_TRACER + depends on HAVE_DYNAMIC_FTRACE ++ depends on !IPIPE + default y + help + This option will modify all the calls to function tracing +diff -uprN kernel/kernel/trace/ring_buffer.c kernel_new/kernel/trace/ring_buffer.c +--- kernel/kernel/trace/ring_buffer.c 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/kernel/trace/ring_buffer.c 2021-04-01 18:28:07.814863110 +0800 +@@ -2653,6 +2653,7 @@ trace_recursive_lock(struct ring_buffer_ + { + unsigned int val = cpu_buffer->current_context; + unsigned long pc = preempt_count(); ++ unsigned long flags; + int bit; + + if (!(pc & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET))) +@@ -2661,20 +2662,30 @@ trace_recursive_lock(struct ring_buffer_ + bit = pc & NMI_MASK ? RB_CTX_NMI : + pc & HARDIRQ_MASK ? RB_CTX_IRQ : RB_CTX_SOFTIRQ; + +- if (unlikely(val & (1 << (bit + cpu_buffer->nest)))) ++ flags = hard_local_irq_save(); ++ ++ if (unlikely(val & (1 << (bit + cpu_buffer->nest)))) { ++ hard_local_irq_restore(flags); + return 1; ++ } + + val |= (1 << (bit + cpu_buffer->nest)); + cpu_buffer->current_context = val; + ++ hard_local_irq_restore(flags); ++ + return 0; + } + + static __always_inline void + trace_recursive_unlock(struct ring_buffer_per_cpu *cpu_buffer) + { ++ unsigned long flags; ++ ++ flags = hard_local_irq_save(); + cpu_buffer->current_context &= + cpu_buffer->current_context - (1 << cpu_buffer->nest); ++ hard_local_irq_restore(flags); + } + + /* The recursive locking above uses 4 bits */ +diff -uprN kernel/kernel/trace/trace.c kernel_new/kernel/trace/trace.c +--- kernel/kernel/trace/trace.c 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/kernel/trace/trace.c 2021-04-01 18:28:07.814863110 +0800 +@@ -2921,8 +2921,9 @@ int trace_vbprintk(unsigned long ip, con + /* Don't pollute graph traces with trace_vprintk internals */ + pause_graph_tracing(); + ++ flags = hard_local_irq_save(); ++ + pc = preempt_count(); +- preempt_disable_notrace(); + + tbuffer = get_trace_buf(); + if (!tbuffer) { +@@ -2935,7 +2936,6 @@ int trace_vbprintk(unsigned long ip, con + if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0) + goto out; + +- local_save_flags(flags); + size = sizeof(*entry) + sizeof(u32) * len; + buffer = tr->trace_buffer.buffer; + event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size, +@@ -2956,7 +2956,7 @@ out: + put_trace_buf(); + + out_nobuffer: +- preempt_enable_notrace(); ++ hard_local_irq_restore(flags); + unpause_graph_tracing(); + + return len; +diff -uprN kernel/kernel/trace/trace_clock.c kernel_new/kernel/trace/trace_clock.c +--- kernel/kernel/trace/trace_clock.c 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/kernel/trace/trace_clock.c 2021-04-01 18:28:07.815863109 +0800 +@@ -97,7 +97,7 @@ u64 notrace trace_clock_global(void) + int this_cpu; + u64 now; + +- raw_local_irq_save(flags); ++ flags = hard_local_irq_save_notrace(); + + this_cpu = raw_smp_processor_id(); + now = sched_clock_cpu(this_cpu); +@@ -123,7 +123,7 @@ u64 notrace trace_clock_global(void) + arch_spin_unlock(&trace_clock_struct.lock); + + out: +- raw_local_irq_restore(flags); ++ hard_local_irq_restore_notrace(flags); + + return now; + } +diff -uprN kernel/kernel/trace/trace_functions.c kernel_new/kernel/trace/trace_functions.c +--- kernel/kernel/trace/trace_functions.c 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/kernel/trace/trace_functions.c 2021-04-01 18:28:07.815863109 +0800 +@@ -190,7 +190,7 @@ function_stack_trace_call(unsigned long + * Need to use raw, since this must be called before the + * recursive protection is performed. + */ +- local_irq_save(flags); ++ flags = hard_local_irq_save(); + cpu = raw_smp_processor_id(); + data = per_cpu_ptr(tr->trace_buffer.data, cpu); + disabled = atomic_inc_return(&data->disabled); +@@ -202,7 +202,7 @@ function_stack_trace_call(unsigned long + } + + atomic_dec(&data->disabled); +- local_irq_restore(flags); ++ hard_local_irq_restore(flags); + } + + static struct tracer_opt func_opts[] = { +diff -uprN kernel/kernel/trace/trace_functions_graph.c kernel_new/kernel/trace/trace_functions_graph.c +--- kernel/kernel/trace/trace_functions_graph.c 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/kernel/trace/trace_functions_graph.c 2021-04-01 18:28:07.815863109 +0800 +@@ -435,7 +435,7 @@ int trace_graph_entry(struct ftrace_grap + if (tracing_thresh) + return 1; + +- local_irq_save(flags); ++ flags = hard_local_irq_save_notrace(); + cpu = raw_smp_processor_id(); + data = per_cpu_ptr(tr->trace_buffer.data, cpu); + disabled = atomic_inc_return(&data->disabled); +@@ -447,7 +447,7 @@ int trace_graph_entry(struct ftrace_grap + } + + atomic_dec(&data->disabled); +- local_irq_restore(flags); ++ hard_local_irq_restore_notrace(flags); + + return ret; + } +@@ -511,7 +511,7 @@ void trace_graph_return(struct ftrace_gr + + ftrace_graph_addr_finish(trace); + +- local_irq_save(flags); ++ flags = hard_local_irq_save_notrace(); + cpu = raw_smp_processor_id(); + data = per_cpu_ptr(tr->trace_buffer.data, cpu); + disabled = atomic_inc_return(&data->disabled); +@@ -520,7 +520,7 @@ void trace_graph_return(struct ftrace_gr + __trace_graph_return(tr, trace, flags, pc); + } + atomic_dec(&data->disabled); +- local_irq_restore(flags); ++ hard_local_irq_restore_notrace(flags); + } + + void set_graph_array(struct trace_array *tr) +diff -uprN kernel/kernel/trace/trace_preemptirq.c kernel_new/kernel/trace/trace_preemptirq.c +--- kernel/kernel/trace/trace_preemptirq.c 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/kernel/trace/trace_preemptirq.c 2021-04-02 09:24:59.495321387 +0800 +@@ -20,6 +20,9 @@ static DEFINE_PER_CPU(int, tracing_irq_c + + void trace_hardirqs_on(void) + { ++ if (!ipipe_root_p) ++ return; ++ + if (this_cpu_read(tracing_irq_cpu)) { + if (!in_nmi()) + trace_irq_enable_rcuidle(CALLER_ADDR0, CALLER_ADDR1); +@@ -33,6 +36,9 @@ EXPORT_SYMBOL(trace_hardirqs_on); + + void trace_hardirqs_off(void) + { ++ if (!ipipe_root_p) ++ return; ++ + if (!this_cpu_read(tracing_irq_cpu)) { + this_cpu_write(tracing_irq_cpu, 1); + tracer_hardirqs_off(CALLER_ADDR0, CALLER_ADDR1); +@@ -46,6 +52,9 @@ EXPORT_SYMBOL(trace_hardirqs_off); + + __visible void trace_hardirqs_on_caller(unsigned long caller_addr) + { ++ if (!ipipe_root_p) ++ return; ++ + if (this_cpu_read(tracing_irq_cpu)) { + if (!in_nmi()) + trace_irq_enable_rcuidle(CALLER_ADDR0, caller_addr); +@@ -57,8 +66,33 @@ __visible void trace_hardirqs_on_caller( + } + EXPORT_SYMBOL(trace_hardirqs_on_caller); + ++__visible void trace_hardirqs_on_virt_caller(unsigned long ip) ++{ ++ /* ++ * The IRQ tracing logic only applies to the root domain, and ++ * must consider the virtual disable flag exclusively when ++ * leaving an interrupt/fault context. ++ */ ++ if (ipipe_root_p && !irqs_disabled()) ++ trace_hardirqs_on_caller(ip); ++} ++ ++__visible void trace_hardirqs_on_virt(void) ++{ ++ /* ++ * The IRQ tracing logic only applies to the root domain, and ++ * must consider the virtual disable flag exclusively when ++ * leaving an interrupt/fault context. ++ */ ++ if (ipipe_root_p && !irqs_disabled()) ++ trace_hardirqs_on_caller(CALLER_ADDR0); ++} ++ + __visible void trace_hardirqs_off_caller(unsigned long caller_addr) + { ++ if (!ipipe_root_p) ++ return; ++ + lockdep_hardirqs_off(CALLER_ADDR0); + + if (!this_cpu_read(tracing_irq_cpu)) { +@@ -75,14 +109,14 @@ EXPORT_SYMBOL(trace_hardirqs_off_caller) + + void trace_preempt_on(unsigned long a0, unsigned long a1) + { +- if (!in_nmi()) ++ if (ipipe_root_p && !in_nmi()) + trace_preempt_enable_rcuidle(a0, a1); + tracer_preempt_on(a0, a1); + } + + void trace_preempt_off(unsigned long a0, unsigned long a1) + { +- if (!in_nmi()) ++ if (ipipe_root_p && !in_nmi()) + trace_preempt_disable_rcuidle(a0, a1); + tracer_preempt_off(a0, a1); + } +diff -uprN kernel/kernel/trace/trace_preemptirq.c.orig kernel_new/kernel/trace/trace_preemptirq.c.orig +--- kernel/kernel/trace/trace_preemptirq.c.orig 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/kernel/trace/trace_preemptirq.c.orig 2020-12-21 21:59:22.000000000 +0800 +@@ -0,0 +1,89 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * preemptoff and irqoff tracepoints ++ * ++ * Copyright (C) Joel Fernandes (Google) ++ */ ++ ++#include ++#include ++#include ++#include ++#include "trace.h" ++ ++#define CREATE_TRACE_POINTS ++#include ++ ++#ifdef CONFIG_TRACE_IRQFLAGS ++/* Per-cpu variable to prevent redundant calls when IRQs already off */ ++static DEFINE_PER_CPU(int, tracing_irq_cpu); ++ ++void trace_hardirqs_on(void) ++{ ++ if (this_cpu_read(tracing_irq_cpu)) { ++ if (!in_nmi()) ++ trace_irq_enable_rcuidle(CALLER_ADDR0, CALLER_ADDR1); ++ tracer_hardirqs_on(CALLER_ADDR0, CALLER_ADDR1); ++ this_cpu_write(tracing_irq_cpu, 0); ++ } ++ ++ lockdep_hardirqs_on(CALLER_ADDR0); ++} ++EXPORT_SYMBOL(trace_hardirqs_on); ++ ++void trace_hardirqs_off(void) ++{ ++ if (!this_cpu_read(tracing_irq_cpu)) { ++ this_cpu_write(tracing_irq_cpu, 1); ++ tracer_hardirqs_off(CALLER_ADDR0, CALLER_ADDR1); ++ if (!in_nmi()) ++ trace_irq_disable_rcuidle(CALLER_ADDR0, CALLER_ADDR1); ++ } ++ ++ lockdep_hardirqs_off(CALLER_ADDR0); ++} ++EXPORT_SYMBOL(trace_hardirqs_off); ++ ++__visible void trace_hardirqs_on_caller(unsigned long caller_addr) ++{ ++ if (this_cpu_read(tracing_irq_cpu)) { ++ if (!in_nmi()) ++ trace_irq_enable_rcuidle(CALLER_ADDR0, caller_addr); ++ tracer_hardirqs_on(CALLER_ADDR0, caller_addr); ++ this_cpu_write(tracing_irq_cpu, 0); ++ } ++ ++ lockdep_hardirqs_on(CALLER_ADDR0); ++} ++EXPORT_SYMBOL(trace_hardirqs_on_caller); ++ ++__visible void trace_hardirqs_off_caller(unsigned long caller_addr) ++{ ++ lockdep_hardirqs_off(CALLER_ADDR0); ++ ++ if (!this_cpu_read(tracing_irq_cpu)) { ++ this_cpu_write(tracing_irq_cpu, 1); ++ tracer_hardirqs_off(CALLER_ADDR0, caller_addr); ++ if (!in_nmi()) ++ trace_irq_disable_rcuidle(CALLER_ADDR0, caller_addr); ++ } ++} ++EXPORT_SYMBOL(trace_hardirqs_off_caller); ++#endif /* CONFIG_TRACE_IRQFLAGS */ ++ ++#ifdef CONFIG_TRACE_PREEMPT_TOGGLE ++ ++void trace_preempt_on(unsigned long a0, unsigned long a1) ++{ ++ if (!in_nmi()) ++ trace_preempt_enable_rcuidle(a0, a1); ++ tracer_preempt_on(a0, a1); ++} ++ ++void trace_preempt_off(unsigned long a0, unsigned long a1) ++{ ++ if (!in_nmi()) ++ trace_preempt_disable_rcuidle(a0, a1); ++ tracer_preempt_off(a0, a1); ++} ++#endif +diff -uprN kernel/kernel/trace/trace_preemptirq.c.rej kernel_new/kernel/trace/trace_preemptirq.c.rej +--- kernel/kernel/trace/trace_preemptirq.c.rej 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/kernel/trace/trace_preemptirq.c.rej 2021-04-01 18:28:07.815863109 +0800 +@@ -0,0 +1,36 @@ ++--- kernel/trace/trace_preemptirq.c 2019-12-18 03:36:04.000000000 +0800 +++++ kernel/trace/trace_preemptirq.c 2021-03-22 09:21:43.227415471 +0800 ++@@ -66,8 +75,33 @@ __visible void trace_hardirqs_on_caller( ++ } ++ EXPORT_SYMBOL(trace_hardirqs_on_caller); ++ +++__visible void trace_hardirqs_on_virt_caller(unsigned long ip) +++{ +++ /* +++ * The IRQ tracing logic only applies to the root domain, and +++ * must consider the virtual disable flag exclusively when +++ * leaving an interrupt/fault context. +++ */ +++ if (ipipe_root_p && !irqs_disabled()) +++ trace_hardirqs_on_caller(ip); +++} +++ +++__visible void trace_hardirqs_on_virt(void) +++{ +++ /* +++ * The IRQ tracing logic only applies to the root domain, and +++ * must consider the virtual disable flag exclusively when +++ * leaving an interrupt/fault context. +++ */ +++ if (ipipe_root_p && !irqs_disabled()) +++ trace_hardirqs_on_caller(CALLER_ADDR0); +++} +++ ++ __visible void trace_hardirqs_off_caller(unsigned long caller_addr) ++ { +++ if (!ipipe_root_p) +++ return; +++ ++ if (!this_cpu_read(tracing_irq_cpu)) { ++ this_cpu_write(tracing_irq_cpu, 1); ++ tracer_hardirqs_off(CALLER_ADDR0, caller_addr); +diff -uprN kernel/lib/atomic64.c kernel_new/lib/atomic64.c +--- kernel/lib/atomic64.c 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/lib/atomic64.c 2021-04-01 18:28:07.815863109 +0800 +@@ -29,15 +29,15 @@ + * Ensure each lock is in a separate cacheline. + */ + static union { +- raw_spinlock_t lock; ++ ipipe_spinlock_t lock; + char pad[L1_CACHE_BYTES]; + } atomic64_lock[NR_LOCKS] __cacheline_aligned_in_smp = { + [0 ... (NR_LOCKS - 1)] = { +- .lock = __RAW_SPIN_LOCK_UNLOCKED(atomic64_lock.lock), ++ .lock = IPIPE_SPIN_LOCK_UNLOCKED, + }, + }; + +-static inline raw_spinlock_t *lock_addr(const atomic64_t *v) ++static inline ipipe_spinlock_t *lock_addr(const atomic64_t *v) + { + unsigned long addr = (unsigned long) v; + +@@ -49,7 +49,7 @@ static inline raw_spinlock_t *lock_addr( + long long atomic64_read(const atomic64_t *v) + { + unsigned long flags; +- raw_spinlock_t *lock = lock_addr(v); ++ ipipe_spinlock_t *lock = lock_addr(v); + long long val; + + raw_spin_lock_irqsave(lock, flags); +@@ -62,7 +62,7 @@ EXPORT_SYMBOL(atomic64_read); + void atomic64_set(atomic64_t *v, long long i) + { + unsigned long flags; +- raw_spinlock_t *lock = lock_addr(v); ++ ipipe_spinlock_t *lock = lock_addr(v); + + raw_spin_lock_irqsave(lock, flags); + v->counter = i; +@@ -74,7 +74,7 @@ EXPORT_SYMBOL(atomic64_set); + void atomic64_##op(long long a, atomic64_t *v) \ + { \ + unsigned long flags; \ +- raw_spinlock_t *lock = lock_addr(v); \ ++ ipipe_spinlock_t *lock = lock_addr(v); \ + \ + raw_spin_lock_irqsave(lock, flags); \ + v->counter c_op a; \ +@@ -86,7 +86,7 @@ EXPORT_SYMBOL(atomic64_##op); + long long atomic64_##op##_return(long long a, atomic64_t *v) \ + { \ + unsigned long flags; \ +- raw_spinlock_t *lock = lock_addr(v); \ ++ ipipe_spinlock_t *lock = lock_addr(v); \ + long long val; \ + \ + raw_spin_lock_irqsave(lock, flags); \ +@@ -100,7 +100,7 @@ EXPORT_SYMBOL(atomic64_##op##_return); + long long atomic64_fetch_##op(long long a, atomic64_t *v) \ + { \ + unsigned long flags; \ +- raw_spinlock_t *lock = lock_addr(v); \ ++ ipipe_spinlock_t *lock = lock_addr(v); \ + long long val; \ + \ + raw_spin_lock_irqsave(lock, flags); \ +@@ -137,7 +137,7 @@ ATOMIC64_OPS(xor, ^=) + long long atomic64_dec_if_positive(atomic64_t *v) + { + unsigned long flags; +- raw_spinlock_t *lock = lock_addr(v); ++ ipipe_spinlock_t *lock = lock_addr(v); + long long val; + + raw_spin_lock_irqsave(lock, flags); +@@ -152,7 +152,7 @@ EXPORT_SYMBOL(atomic64_dec_if_positive); + long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n) + { + unsigned long flags; +- raw_spinlock_t *lock = lock_addr(v); ++ ipipe_spinlock_t *lock = lock_addr(v); + long long val; + + raw_spin_lock_irqsave(lock, flags); +@@ -167,7 +167,7 @@ EXPORT_SYMBOL(atomic64_cmpxchg); + long long atomic64_xchg(atomic64_t *v, long long new) + { + unsigned long flags; +- raw_spinlock_t *lock = lock_addr(v); ++ ipipe_spinlock_t *lock = lock_addr(v); + long long val; + + raw_spin_lock_irqsave(lock, flags); +@@ -181,7 +181,7 @@ EXPORT_SYMBOL(atomic64_xchg); + long long atomic64_fetch_add_unless(atomic64_t *v, long long a, long long u) + { + unsigned long flags; +- raw_spinlock_t *lock = lock_addr(v); ++ ipipe_spinlock_t *lock = lock_addr(v); + long long val; + + raw_spin_lock_irqsave(lock, flags); +diff -uprN kernel/lib/bust_spinlocks.c kernel_new/lib/bust_spinlocks.c +--- kernel/lib/bust_spinlocks.c 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/lib/bust_spinlocks.c 2021-04-01 18:28:07.815863109 +0800 +@@ -15,6 +15,7 @@ + #include + #include + #include ++#include + + + void __attribute__((weak)) bust_spinlocks(int yes) +@@ -26,6 +27,7 @@ void __attribute__((weak)) bust_spinlock + unblank_screen(); + #endif + console_unblank(); ++ ipipe_trace_panic_dump(); + if (--oops_in_progress == 0) + wake_up_klogd(); + } +diff -uprN kernel/lib/dump_stack.c kernel_new/lib/dump_stack.c +--- kernel/lib/dump_stack.c 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/lib/dump_stack.c 2021-04-01 18:28:07.815863109 +0800 +@@ -8,6 +8,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -56,6 +57,9 @@ void dump_stack_print_info(const char *l + printk("%sHardware name: %s\n", + log_lvl, dump_stack_arch_desc_str); + ++#ifdef CONFIG_IPIPE ++ printk("I-pipe domain: %s\n", ipipe_current_domain->name); ++#endif + print_worker_info(log_lvl, current); + } + +@@ -85,6 +89,29 @@ static void __dump_stack(void) + #ifdef CONFIG_SMP + static atomic_t dump_lock = ATOMIC_INIT(-1); + ++static unsigned long disable_local_irqs(void) ++{ ++ unsigned long flags = 0; /* only to trick the UMR detection */ ++ ++ /* ++ * We neither need nor want to disable root stage IRQs over ++ * the head stage, where CPU migration can't ++ * happen. Conversely, we neither need nor want to disable ++ * hard IRQs from the head stage, so that latency won't ++ * skyrocket as a result of dumping the stack backtrace. ++ */ ++ if (ipipe_root_p) ++ local_irq_save(flags); ++ ++ return flags; ++} ++ ++static void restore_local_irqs(unsigned long flags) ++{ ++ if (ipipe_root_p) ++ local_irq_restore(flags); ++} ++ + asmlinkage __visible void dump_stack(void) + { + unsigned long flags; +@@ -97,7 +124,7 @@ asmlinkage __visible void dump_stack(voi + * against other CPUs + */ + retry: +- local_irq_save(flags); ++ flags = disable_local_irqs(); + cpu = smp_processor_id(); + old = atomic_cmpxchg(&dump_lock, -1, cpu); + if (old == -1) { +@@ -105,7 +132,7 @@ retry: + } else if (old == cpu) { + was_locked = 1; + } else { +- local_irq_restore(flags); ++ restore_local_irqs(flags); + /* + * Wait for the lock to release before jumping to + * atomic_cmpxchg() in order to mitigate the thundering herd +@@ -120,7 +147,7 @@ retry: + if (!was_locked) + atomic_set(&dump_lock, -1); + +- local_irq_restore(flags); ++ restore_local_irqs(flags); + } + #else + asmlinkage __visible void dump_stack(void) +diff -uprN kernel/lib/ioremap.c kernel_new/lib/ioremap.c +--- kernel/lib/ioremap.c 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/lib/ioremap.c 2021-04-01 18:28:07.815863109 +0800 +@@ -11,6 +11,7 @@ + #include + #include + #include ++#include + #include + #include + +@@ -177,7 +178,12 @@ int ioremap_page_range(unsigned long add + break; + } while (pgd++, addr = next, addr != end); + +- flush_cache_vmap(start, end); ++ /* APEI may invoke this for temporarily remapping pages in interrupt ++ * context - nothing we can and need to propagate globally. */ ++ if (!in_interrupt()) { ++ __ipipe_pin_mapping_globally(start, end); ++ flush_cache_vmap(start, end); ++ } + + return err; + } +diff -uprN kernel/lib/Kconfig.debug kernel_new/lib/Kconfig.debug +--- kernel/lib/Kconfig.debug 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/lib/Kconfig.debug 2021-04-01 18:28:07.816863108 +0800 +@@ -411,6 +411,7 @@ config MAGIC_SYSRQ + keys are documented in . + Don't say Y unless you really know what this hack does. + ++ + config MAGIC_SYSRQ_DEFAULT_ENABLE + hex "Enable magic SysRq key functions by default" + depends on MAGIC_SYSRQ +@@ -430,6 +431,8 @@ config MAGIC_SYSRQ_SERIAL + This option allows you to decide whether you want to enable the + magic SysRq key. + ++source "kernel/ipipe/Kconfig.debug" ++ + config DEBUG_KERNEL + bool "Kernel debugging" + help +diff -uprN kernel/lib/Kconfig.debug.orig kernel_new/lib/Kconfig.debug.orig +--- kernel/lib/Kconfig.debug.orig 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/lib/Kconfig.debug.orig 2020-12-21 21:59:22.000000000 +0800 +@@ -0,0 +1,2058 @@ ++menu "Kernel hacking" ++ ++menu "printk and dmesg options" ++ ++config PRINTK_TIME ++ bool "Show timing information on printks" ++ depends on PRINTK ++ help ++ Selecting this option causes time stamps of the printk() ++ messages to be added to the output of the syslog() system ++ call and at the console. ++ ++ The timestamp is always recorded internally, and exported ++ to /dev/kmsg. This flag just specifies if the timestamp should ++ be included, not that the timestamp is recorded. ++ ++ The behavior is also controlled by the kernel command line ++ parameter printk.time=1. See Documentation/admin-guide/kernel-parameters.rst ++ ++config CONSOLE_LOGLEVEL_DEFAULT ++ int "Default console loglevel (1-15)" ++ range 1 15 ++ default "7" ++ help ++ Default loglevel to determine what will be printed on the console. ++ ++ Setting a default here is equivalent to passing in loglevel= in ++ the kernel bootargs. loglevel= continues to override whatever ++ value is specified here as well. ++ ++ Note: This does not affect the log level of un-prefixed printk() ++ usage in the kernel. That is controlled by the MESSAGE_LOGLEVEL_DEFAULT ++ option. ++ ++config CONSOLE_LOGLEVEL_QUIET ++ int "quiet console loglevel (1-15)" ++ range 1 15 ++ default "4" ++ help ++ loglevel to use when "quiet" is passed on the kernel commandline. ++ ++ When "quiet" is passed on the kernel commandline this loglevel ++ will be used as the loglevel. IOW passing "quiet" will be the ++ equivalent of passing "loglevel=" ++ ++config MESSAGE_LOGLEVEL_DEFAULT ++ int "Default message log level (1-7)" ++ range 1 7 ++ default "4" ++ help ++ Default log level for printk statements with no specified priority. ++ ++ This was hard-coded to KERN_WARNING since at least 2.6.10 but folks ++ that are auditing their logs closely may want to set it to a lower ++ priority. ++ ++ Note: This does not affect what message level gets printed on the console ++ by default. To change that, use loglevel= in the kernel bootargs, ++ or pick a different CONSOLE_LOGLEVEL_DEFAULT configuration value. ++ ++config BOOT_PRINTK_DELAY ++ bool "Delay each boot printk message by N milliseconds" ++ depends on DEBUG_KERNEL && PRINTK && GENERIC_CALIBRATE_DELAY ++ help ++ This build option allows you to read kernel boot messages ++ by inserting a short delay after each one. The delay is ++ specified in milliseconds on the kernel command line, ++ using "boot_delay=N". ++ ++ It is likely that you would also need to use "lpj=M" to preset ++ the "loops per jiffie" value. ++ See a previous boot log for the "lpj" value to use for your ++ system, and then set "lpj=M" before setting "boot_delay=N". ++ NOTE: Using this option may adversely affect SMP systems. ++ I.e., processors other than the first one may not boot up. ++ BOOT_PRINTK_DELAY also may cause LOCKUP_DETECTOR to detect ++ what it believes to be lockup conditions. ++ ++config DYNAMIC_DEBUG ++ bool "Enable dynamic printk() support" ++ default n ++ depends on PRINTK ++ depends on DEBUG_FS ++ help ++ ++ Compiles debug level messages into the kernel, which would not ++ otherwise be available at runtime. These messages can then be ++ enabled/disabled based on various levels of scope - per source file, ++ function, module, format string, and line number. This mechanism ++ implicitly compiles in all pr_debug() and dev_dbg() calls, which ++ enlarges the kernel text size by about 2%. ++ ++ If a source file is compiled with DEBUG flag set, any ++ pr_debug() calls in it are enabled by default, but can be ++ disabled at runtime as below. Note that DEBUG flag is ++ turned on by many CONFIG_*DEBUG* options. ++ ++ Usage: ++ ++ Dynamic debugging is controlled via the 'dynamic_debug/control' file, ++ which is contained in the 'debugfs' filesystem. Thus, the debugfs ++ filesystem must first be mounted before making use of this feature. ++ We refer the control file as: /dynamic_debug/control. This ++ file contains a list of the debug statements that can be enabled. The ++ format for each line of the file is: ++ ++ filename:lineno [module]function flags format ++ ++ filename : source file of the debug statement ++ lineno : line number of the debug statement ++ module : module that contains the debug statement ++ function : function that contains the debug statement ++ flags : '=p' means the line is turned 'on' for printing ++ format : the format used for the debug statement ++ ++ From a live system: ++ ++ nullarbor:~ # cat /dynamic_debug/control ++ # filename:lineno [module]function flags format ++ fs/aio.c:222 [aio]__put_ioctx =_ "__put_ioctx:\040freeing\040%p\012" ++ fs/aio.c:248 [aio]ioctx_alloc =_ "ENOMEM:\040nr_events\040too\040high\012" ++ fs/aio.c:1770 [aio]sys_io_cancel =_ "calling\040cancel\012" ++ ++ Example usage: ++ ++ // enable the message at line 1603 of file svcsock.c ++ nullarbor:~ # echo -n 'file svcsock.c line 1603 +p' > ++ /dynamic_debug/control ++ ++ // enable all the messages in file svcsock.c ++ nullarbor:~ # echo -n 'file svcsock.c +p' > ++ /dynamic_debug/control ++ ++ // enable all the messages in the NFS server module ++ nullarbor:~ # echo -n 'module nfsd +p' > ++ /dynamic_debug/control ++ ++ // enable all 12 messages in the function svc_process() ++ nullarbor:~ # echo -n 'func svc_process +p' > ++ /dynamic_debug/control ++ ++ // disable all 12 messages in the function svc_process() ++ nullarbor:~ # echo -n 'func svc_process -p' > ++ /dynamic_debug/control ++ ++ See Documentation/admin-guide/dynamic-debug-howto.rst for additional ++ information. ++ ++endmenu # "printk and dmesg options" ++ ++menu "Compile-time checks and compiler options" ++ ++config DEBUG_INFO ++ bool "Compile the kernel with debug info" ++ depends on DEBUG_KERNEL && !COMPILE_TEST ++ help ++ If you say Y here the resulting kernel image will include ++ debugging info resulting in a larger kernel image. ++ This adds debug symbols to the kernel and modules (gcc -g), and ++ is needed if you intend to use kernel crashdump or binary object ++ tools like crash, kgdb, LKCD, gdb, etc on the kernel. ++ Say Y here only if you plan to debug the kernel. ++ ++ If unsure, say N. ++ ++config DEBUG_INFO_REDUCED ++ bool "Reduce debugging information" ++ depends on DEBUG_INFO ++ help ++ If you say Y here gcc is instructed to generate less debugging ++ information for structure types. This means that tools that ++ need full debugging information (like kgdb or systemtap) won't ++ be happy. But if you merely need debugging information to ++ resolve line numbers there is no loss. Advantage is that ++ build directory object sizes shrink dramatically over a full ++ DEBUG_INFO build and compile times are reduced too. ++ Only works with newer gcc versions. ++ ++config DEBUG_INFO_SPLIT ++ bool "Produce split debuginfo in .dwo files" ++ depends on DEBUG_INFO ++ help ++ Generate debug info into separate .dwo files. This significantly ++ reduces the build directory size for builds with DEBUG_INFO, ++ because it stores the information only once on disk in .dwo ++ files instead of multiple times in object files and executables. ++ In addition the debug information is also compressed. ++ ++ Requires recent gcc (4.7+) and recent gdb/binutils. ++ Any tool that packages or reads debug information would need ++ to know about the .dwo files and include them. ++ Incompatible with older versions of ccache. ++ ++config DEBUG_INFO_DWARF4 ++ bool "Generate dwarf4 debuginfo" ++ depends on DEBUG_INFO ++ help ++ Generate dwarf4 debug info. This requires recent versions ++ of gcc and gdb. It makes the debug information larger. ++ But it significantly improves the success of resolving ++ variables in gdb on optimized code. ++ ++config GDB_SCRIPTS ++ bool "Provide GDB scripts for kernel debugging" ++ depends on DEBUG_INFO ++ help ++ This creates the required links to GDB helper scripts in the ++ build directory. If you load vmlinux into gdb, the helper ++ scripts will be automatically imported by gdb as well, and ++ additional functions are available to analyze a Linux kernel ++ instance. See Documentation/dev-tools/gdb-kernel-debugging.rst ++ for further details. ++ ++config ENABLE_MUST_CHECK ++ bool "Enable __must_check logic" ++ default y ++ help ++ Enable the __must_check logic in the kernel build. Disable this to ++ suppress the "warning: ignoring return value of 'foo', declared with ++ attribute warn_unused_result" messages. ++ ++config FRAME_WARN ++ int "Warn for stack frames larger than (needs gcc 4.4)" ++ range 0 8192 ++ default 2048 if GCC_PLUGIN_LATENT_ENTROPY ++ default 1280 if (!64BIT && PARISC) ++ default 1024 if (!64BIT && !PARISC) ++ default 2048 if 64BIT ++ help ++ Tell gcc to warn at build time for stack frames larger than this. ++ Setting this too low will cause a lot of warnings. ++ Setting it to 0 disables the warning. ++ Requires gcc 4.4 ++ ++config STRIP_ASM_SYMS ++ bool "Strip assembler-generated symbols during link" ++ default n ++ help ++ Strip internal assembler-generated symbols during a link (symbols ++ that look like '.Lxxx') so they don't pollute the output of ++ get_wchan() and suchlike. ++ ++config READABLE_ASM ++ bool "Generate readable assembler code" ++ depends on DEBUG_KERNEL ++ help ++ Disable some compiler optimizations that tend to generate human unreadable ++ assembler output. This may make the kernel slightly slower, but it helps ++ to keep kernel developers who have to stare a lot at assembler listings ++ sane. ++ ++config UNUSED_SYMBOLS ++ bool "Enable unused/obsolete exported symbols" ++ default y if X86 ++ help ++ Unused but exported symbols make the kernel needlessly bigger. For ++ that reason most of these unused exports will soon be removed. This ++ option is provided temporarily to provide a transition period in case ++ some external kernel module needs one of these symbols anyway. If you ++ encounter such a case in your module, consider if you are actually ++ using the right API. (rationale: since nobody in the kernel is using ++ this in a module, there is a pretty good chance it's actually the ++ wrong interface to use). If you really need the symbol, please send a ++ mail to the linux kernel mailing list mentioning the symbol and why ++ you really need it, and what the merge plan to the mainline kernel for ++ your module is. ++ ++config PAGE_OWNER ++ bool "Track page owner" ++ depends on DEBUG_KERNEL && STACKTRACE_SUPPORT ++ select DEBUG_FS ++ select STACKTRACE ++ select STACKDEPOT ++ select PAGE_EXTENSION ++ help ++ This keeps track of what call chain is the owner of a page, may ++ help to find bare alloc_page(s) leaks. Even if you include this ++ feature on your build, it is disabled in default. You should pass ++ "page_owner=on" to boot parameter in order to enable it. Eats ++ a fair amount of memory if enabled. See tools/vm/page_owner_sort.c ++ for user-space helper. ++ ++ If unsure, say N. ++ ++config DEBUG_FS ++ bool "Debug Filesystem" ++ help ++ debugfs is a virtual file system that kernel developers use to put ++ debugging files into. Enable this option to be able to read and ++ write to these files. ++ ++ For detailed documentation on the debugfs API, see ++ Documentation/filesystems/. ++ ++ If unsure, say N. ++ ++config HEADERS_CHECK ++ bool "Run 'make headers_check' when building vmlinux" ++ depends on !UML ++ help ++ This option will extract the user-visible kernel headers whenever ++ building the kernel, and will run basic sanity checks on them to ++ ensure that exported files do not attempt to include files which ++ were not exported, etc. ++ ++ If you're making modifications to header files which are ++ relevant for userspace, say 'Y', and check the headers ++ exported to $(INSTALL_HDR_PATH) (usually 'usr/include' in ++ your build tree), to make sure they're suitable. ++ ++config DEBUG_SECTION_MISMATCH ++ bool "Enable full Section mismatch analysis" ++ help ++ The section mismatch analysis checks if there are illegal ++ references from one section to another section. ++ During linktime or runtime, some sections are dropped; ++ any use of code/data previously in these sections would ++ most likely result in an oops. ++ In the code, functions and variables are annotated with ++ __init,, etc. (see the full list in include/linux/init.h), ++ which results in the code/data being placed in specific sections. ++ The section mismatch analysis is always performed after a full ++ kernel build, and enabling this option causes the following ++ additional steps to occur: ++ - Add the option -fno-inline-functions-called-once to gcc commands. ++ When inlining a function annotated with __init in a non-init ++ function, we would lose the section information and thus ++ the analysis would not catch the illegal reference. ++ This option tells gcc to inline less (but it does result in ++ a larger kernel). ++ - Run the section mismatch analysis for each module/built-in.a file. ++ When we run the section mismatch analysis on vmlinux.o, we ++ lose valuable information about where the mismatch was ++ introduced. ++ Running the analysis for each module/built-in.a file ++ tells where the mismatch happens much closer to the ++ source. The drawback is that the same mismatch is ++ reported at least twice. ++ - Enable verbose reporting from modpost in order to help resolve ++ the section mismatches that are reported. ++ ++config SECTION_MISMATCH_WARN_ONLY ++ bool "Make section mismatch errors non-fatal" ++ default y ++ help ++ If you say N here, the build process will fail if there are any ++ section mismatch, instead of just throwing warnings. ++ ++ If unsure, say Y. ++ ++# ++# Select this config option from the architecture Kconfig, if it ++# is preferred to always offer frame pointers as a config ++# option on the architecture (regardless of KERNEL_DEBUG): ++# ++config ARCH_WANT_FRAME_POINTERS ++ bool ++ ++config FRAME_POINTER ++ bool "Compile the kernel with frame pointers" ++ depends on DEBUG_KERNEL && (M68K || UML || SUPERH) || ARCH_WANT_FRAME_POINTERS ++ default y if (DEBUG_INFO && UML) || ARCH_WANT_FRAME_POINTERS ++ help ++ If you say Y here the resulting kernel image will be slightly ++ larger and slower, but it gives very useful debugging information ++ in case of kernel bugs. (precise oopses/stacktraces/warnings) ++ ++config STACK_VALIDATION ++ bool "Compile-time stack metadata validation" ++ depends on HAVE_STACK_VALIDATION ++ default n ++ help ++ Add compile-time checks to validate stack metadata, including frame ++ pointers (if CONFIG_FRAME_POINTER is enabled). This helps ensure ++ that runtime stack traces are more reliable. ++ ++ This is also a prerequisite for generation of ORC unwind data, which ++ is needed for CONFIG_UNWINDER_ORC. ++ ++ For more information, see ++ tools/objtool/Documentation/stack-validation.txt. ++ ++config DEBUG_FORCE_WEAK_PER_CPU ++ bool "Force weak per-cpu definitions" ++ depends on DEBUG_KERNEL ++ help ++ s390 and alpha require percpu variables in modules to be ++ defined weak to work around addressing range issue which ++ puts the following two restrictions on percpu variable ++ definitions. ++ ++ 1. percpu symbols must be unique whether static or not ++ 2. percpu variables can't be defined inside a function ++ ++ To ensure that generic code follows the above rules, this ++ option forces all percpu variables to be defined as weak. ++ ++endmenu # "Compiler options" ++ ++config MAGIC_SYSRQ ++ bool "Magic SysRq key" ++ depends on !UML ++ help ++ If you say Y here, you will have some control over the system even ++ if the system crashes for example during kernel debugging (e.g., you ++ will be able to flush the buffer cache to disk, reboot the system ++ immediately or dump some status information). This is accomplished ++ by pressing various keys while holding SysRq (Alt+PrintScreen). It ++ also works on a serial console (on PC hardware at least), if you ++ send a BREAK and then within 5 seconds a command keypress. The ++ keys are documented in . ++ Don't say Y unless you really know what this hack does. ++ ++config MAGIC_SYSRQ_DEFAULT_ENABLE ++ hex "Enable magic SysRq key functions by default" ++ depends on MAGIC_SYSRQ ++ default 0x1 ++ help ++ Specifies which SysRq key functions are enabled by default. ++ This may be set to 1 or 0 to enable or disable them all, or ++ to a bitmask as described in Documentation/admin-guide/sysrq.rst. ++ ++config MAGIC_SYSRQ_SERIAL ++ bool "Enable magic SysRq key over serial" ++ depends on MAGIC_SYSRQ ++ default y ++ help ++ Many embedded boards have a disconnected TTL level serial which can ++ generate some garbage that can lead to spurious false sysrq detects. ++ This option allows you to decide whether you want to enable the ++ magic SysRq key. ++ ++config DEBUG_KERNEL ++ bool "Kernel debugging" ++ help ++ Say Y here if you are developing drivers or trying to debug and ++ identify kernel problems. ++ ++menu "Memory Debugging" ++ ++source mm/Kconfig.debug ++ ++config DEBUG_OBJECTS ++ bool "Debug object operations" ++ depends on DEBUG_KERNEL ++ help ++ If you say Y here, additional code will be inserted into the ++ kernel to track the life time of various objects and validate ++ the operations on those objects. ++ ++config DEBUG_OBJECTS_SELFTEST ++ bool "Debug objects selftest" ++ depends on DEBUG_OBJECTS ++ help ++ This enables the selftest of the object debug code. ++ ++config DEBUG_OBJECTS_FREE ++ bool "Debug objects in freed memory" ++ depends on DEBUG_OBJECTS ++ help ++ This enables checks whether a k/v free operation frees an area ++ which contains an object which has not been deactivated ++ properly. This can make kmalloc/kfree-intensive workloads ++ much slower. ++ ++config DEBUG_OBJECTS_TIMERS ++ bool "Debug timer objects" ++ depends on DEBUG_OBJECTS ++ help ++ If you say Y here, additional code will be inserted into the ++ timer routines to track the life time of timer objects and ++ validate the timer operations. ++ ++config DEBUG_OBJECTS_WORK ++ bool "Debug work objects" ++ depends on DEBUG_OBJECTS ++ help ++ If you say Y here, additional code will be inserted into the ++ work queue routines to track the life time of work objects and ++ validate the work operations. ++ ++config DEBUG_OBJECTS_RCU_HEAD ++ bool "Debug RCU callbacks objects" ++ depends on DEBUG_OBJECTS ++ help ++ Enable this to turn on debugging of RCU list heads (call_rcu() usage). ++ ++config DEBUG_OBJECTS_PERCPU_COUNTER ++ bool "Debug percpu counter objects" ++ depends on DEBUG_OBJECTS ++ help ++ If you say Y here, additional code will be inserted into the ++ percpu counter routines to track the life time of percpu counter ++ objects and validate the percpu counter operations. ++ ++config DEBUG_OBJECTS_ENABLE_DEFAULT ++ int "debug_objects bootup default value (0-1)" ++ range 0 1 ++ default "1" ++ depends on DEBUG_OBJECTS ++ help ++ Debug objects boot parameter default value ++ ++config DEBUG_SLAB ++ bool "Debug slab memory allocations" ++ depends on DEBUG_KERNEL && SLAB ++ help ++ Say Y here to have the kernel do limited verification on memory ++ allocation as well as poisoning memory on free to catch use of freed ++ memory. This can make kmalloc/kfree-intensive workloads much slower. ++ ++config DEBUG_SLAB_LEAK ++ bool "Memory leak debugging" ++ depends on DEBUG_SLAB ++ ++config SLUB_DEBUG_ON ++ bool "SLUB debugging on by default" ++ depends on SLUB && SLUB_DEBUG ++ default n ++ help ++ Boot with debugging on by default. SLUB boots by default with ++ the runtime debug capabilities switched off. Enabling this is ++ equivalent to specifying the "slub_debug" parameter on boot. ++ There is no support for more fine grained debug control like ++ possible with slub_debug=xxx. SLUB debugging may be switched ++ off in a kernel built with CONFIG_SLUB_DEBUG_ON by specifying ++ "slub_debug=-". ++ ++config SLUB_STATS ++ default n ++ bool "Enable SLUB performance statistics" ++ depends on SLUB && SYSFS ++ help ++ SLUB statistics are useful to debug SLUBs allocation behavior in ++ order find ways to optimize the allocator. This should never be ++ enabled for production use since keeping statistics slows down ++ the allocator by a few percentage points. The slabinfo command ++ supports the determination of the most active slabs to figure ++ out which slabs are relevant to a particular load. ++ Try running: slabinfo -DA ++ ++config HAVE_DEBUG_KMEMLEAK ++ bool ++ ++config DEBUG_KMEMLEAK ++ bool "Kernel memory leak detector" ++ depends on DEBUG_KERNEL && HAVE_DEBUG_KMEMLEAK ++ select DEBUG_FS ++ select STACKTRACE if STACKTRACE_SUPPORT ++ select KALLSYMS ++ select CRC32 ++ help ++ Say Y here if you want to enable the memory leak ++ detector. The memory allocation/freeing is traced in a way ++ similar to the Boehm's conservative garbage collector, the ++ difference being that the orphan objects are not freed but ++ only shown in /sys/kernel/debug/kmemleak. Enabling this ++ feature will introduce an overhead to memory ++ allocations. See Documentation/dev-tools/kmemleak.rst for more ++ details. ++ ++ Enabling DEBUG_SLAB or SLUB_DEBUG may increase the chances ++ of finding leaks due to the slab objects poisoning. ++ ++ In order to access the kmemleak file, debugfs needs to be ++ mounted (usually at /sys/kernel/debug). ++ ++config DEBUG_KMEMLEAK_EARLY_LOG_SIZE ++ int "Maximum kmemleak early log entries" ++ depends on DEBUG_KMEMLEAK ++ range 200 40000 ++ default 16000 ++ help ++ Kmemleak must track all the memory allocations to avoid ++ reporting false positives. Since memory may be allocated or ++ freed before kmemleak is initialised, an early log buffer is ++ used to store these actions. If kmemleak reports "early log ++ buffer exceeded", please increase this value. ++ ++config DEBUG_KMEMLEAK_TEST ++ tristate "Simple test for the kernel memory leak detector" ++ depends on DEBUG_KMEMLEAK && m ++ help ++ This option enables a module that explicitly leaks memory. ++ ++ If unsure, say N. ++ ++config DEBUG_KMEMLEAK_DEFAULT_OFF ++ bool "Default kmemleak to off" ++ depends on DEBUG_KMEMLEAK ++ help ++ Say Y here to disable kmemleak by default. It can then be enabled ++ on the command line via kmemleak=on. ++ ++config DEBUG_STACK_USAGE ++ bool "Stack utilization instrumentation" ++ depends on DEBUG_KERNEL && !IA64 ++ help ++ Enables the display of the minimum amount of free stack which each ++ task has ever had available in the sysrq-T and sysrq-P debug output. ++ ++ This option will slow down process creation somewhat. ++ ++config DEBUG_VM ++ bool "Debug VM" ++ depends on DEBUG_KERNEL ++ help ++ Enable this to turn on extended checks in the virtual-memory system ++ that may impact performance. ++ ++ If unsure, say N. ++ ++config DEBUG_VM_VMACACHE ++ bool "Debug VMA caching" ++ depends on DEBUG_VM ++ help ++ Enable this to turn on VMA caching debug information. Doing so ++ can cause significant overhead, so only enable it in non-production ++ environments. ++ ++ If unsure, say N. ++ ++config DEBUG_VM_RB ++ bool "Debug VM red-black trees" ++ depends on DEBUG_VM ++ help ++ Enable VM red-black tree debugging information and extra validations. ++ ++ If unsure, say N. ++ ++config DEBUG_VM_PGFLAGS ++ bool "Debug page-flags operations" ++ depends on DEBUG_VM ++ help ++ Enables extra validation on page flags operations. ++ ++ If unsure, say N. ++ ++config ARCH_HAS_DEBUG_VIRTUAL ++ bool ++ ++config DEBUG_VIRTUAL ++ bool "Debug VM translations" ++ depends on DEBUG_KERNEL && ARCH_HAS_DEBUG_VIRTUAL ++ help ++ Enable some costly sanity checks in virtual to page code. This can ++ catch mistakes with virt_to_page() and friends. ++ ++ If unsure, say N. ++ ++config DEBUG_NOMMU_REGIONS ++ bool "Debug the global anon/private NOMMU mapping region tree" ++ depends on DEBUG_KERNEL && !MMU ++ help ++ This option causes the global tree of anonymous and private mapping ++ regions to be regularly checked for invalid topology. ++ ++config DEBUG_MEMORY_INIT ++ bool "Debug memory initialisation" if EXPERT ++ default !EXPERT ++ help ++ Enable this for additional checks during memory initialisation. ++ The sanity checks verify aspects of the VM such as the memory model ++ and other information provided by the architecture. Verbose ++ information will be printed at KERN_DEBUG loglevel depending ++ on the mminit_loglevel= command-line option. ++ ++ If unsure, say Y ++ ++config MEMORY_NOTIFIER_ERROR_INJECT ++ tristate "Memory hotplug notifier error injection module" ++ depends on MEMORY_HOTPLUG_SPARSE && NOTIFIER_ERROR_INJECTION ++ help ++ This option provides the ability to inject artificial errors to ++ memory hotplug notifier chain callbacks. It is controlled through ++ debugfs interface under /sys/kernel/debug/notifier-error-inject/memory ++ ++ If the notifier call chain should be failed with some events ++ notified, write the error code to "actions//error". ++ ++ Example: Inject memory hotplug offline error (-12 == -ENOMEM) ++ ++ # cd /sys/kernel/debug/notifier-error-inject/memory ++ # echo -12 > actions/MEM_GOING_OFFLINE/error ++ # echo offline > /sys/devices/system/memory/memoryXXX/state ++ bash: echo: write error: Cannot allocate memory ++ ++ To compile this code as a module, choose M here: the module will ++ be called memory-notifier-error-inject. ++ ++ If unsure, say N. ++ ++config DEBUG_PER_CPU_MAPS ++ bool "Debug access to per_cpu maps" ++ depends on DEBUG_KERNEL ++ depends on SMP ++ help ++ Say Y to verify that the per_cpu map being accessed has ++ been set up. This adds a fair amount of code to kernel memory ++ and decreases performance. ++ ++ Say N if unsure. ++ ++config DEBUG_HIGHMEM ++ bool "Highmem debugging" ++ depends on DEBUG_KERNEL && HIGHMEM ++ help ++ This option enables additional error checking for high memory ++ systems. Disable for production systems. ++ ++config HAVE_DEBUG_STACKOVERFLOW ++ bool ++ ++config DEBUG_STACKOVERFLOW ++ bool "Check for stack overflows" ++ depends on DEBUG_KERNEL && HAVE_DEBUG_STACKOVERFLOW ++ ---help--- ++ Say Y here if you want to check for overflows of kernel, IRQ ++ and exception stacks (if your architecture uses them). This ++ option will show detailed messages if free stack space drops ++ below a certain limit. ++ ++ These kinds of bugs usually occur when call-chains in the ++ kernel get too deep, especially when interrupts are ++ involved. ++ ++ Use this in cases where you see apparently random memory ++ corruption, especially if it appears in 'struct thread_info' ++ ++ If in doubt, say "N". ++ ++source "lib/Kconfig.kasan" ++ ++endmenu # "Memory Debugging" ++ ++config ARCH_HAS_KCOV ++ bool ++ help ++ KCOV does not have any arch-specific code, but currently it is enabled ++ only for x86_64. KCOV requires testing on other archs, and most likely ++ disabling of instrumentation for some early boot code. ++ ++config CC_HAS_SANCOV_TRACE_PC ++ def_bool $(cc-option,-fsanitize-coverage=trace-pc) ++ ++config KCOV ++ bool "Code coverage for fuzzing" ++ depends on ARCH_HAS_KCOV ++ depends on CC_HAS_SANCOV_TRACE_PC || GCC_PLUGINS ++ select DEBUG_FS ++ select GCC_PLUGIN_SANCOV if !CC_HAS_SANCOV_TRACE_PC ++ help ++ KCOV exposes kernel code coverage information in a form suitable ++ for coverage-guided fuzzing (randomized testing). ++ ++ If RANDOMIZE_BASE is enabled, PC values will not be stable across ++ different machines and across reboots. If you need stable PC values, ++ disable RANDOMIZE_BASE. ++ ++ For more details, see Documentation/dev-tools/kcov.rst. ++ ++config KCOV_ENABLE_COMPARISONS ++ bool "Enable comparison operands collection by KCOV" ++ depends on KCOV ++ depends on $(cc-option,-fsanitize-coverage=trace-cmp) ++ help ++ KCOV also exposes operands of every comparison in the instrumented ++ code along with operand sizes and PCs of the comparison instructions. ++ These operands can be used by fuzzing engines to improve the quality ++ of fuzzing coverage. ++ ++config KCOV_INSTRUMENT_ALL ++ bool "Instrument all code by default" ++ depends on KCOV ++ default y ++ help ++ If you are doing generic system call fuzzing (like e.g. syzkaller), ++ then you will want to instrument the whole kernel and you should ++ say y here. If you are doing more targeted fuzzing (like e.g. ++ filesystem fuzzing with AFL) then you will want to enable coverage ++ for more specific subsets of files, and should say n here. ++ ++config DEBUG_SHIRQ ++ bool "Debug shared IRQ handlers" ++ depends on DEBUG_KERNEL ++ help ++ Enable this to generate a spurious interrupt as soon as a shared ++ interrupt handler is registered, and just before one is deregistered. ++ Drivers ought to be able to handle interrupts coming in at those ++ points; some don't and need to be caught. ++ ++menu "Debug Lockups and Hangs" ++ ++config LOCKUP_DETECTOR ++ bool ++ ++config SOFTLOCKUP_DETECTOR ++ bool "Detect Soft Lockups" ++ depends on DEBUG_KERNEL && !S390 ++ select LOCKUP_DETECTOR ++ help ++ Say Y here to enable the kernel to act as a watchdog to detect ++ soft lockups. ++ ++ Softlockups are bugs that cause the kernel to loop in kernel ++ mode for more than 20 seconds, without giving other tasks a ++ chance to run. The current stack trace is displayed upon ++ detection and the system will stay locked up. ++ ++config BOOTPARAM_SOFTLOCKUP_PANIC ++ bool "Panic (Reboot) On Soft Lockups" ++ depends on SOFTLOCKUP_DETECTOR ++ help ++ Say Y here to enable the kernel to panic on "soft lockups", ++ which are bugs that cause the kernel to loop in kernel ++ mode for more than 20 seconds (configurable using the watchdog_thresh ++ sysctl), without giving other tasks a chance to run. ++ ++ The panic can be used in combination with panic_timeout, ++ to cause the system to reboot automatically after a ++ lockup has been detected. This feature is useful for ++ high-availability systems that have uptime guarantees and ++ where a lockup must be resolved ASAP. ++ ++ Say N if unsure. ++ ++config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE ++ int ++ depends on SOFTLOCKUP_DETECTOR ++ range 0 1 ++ default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC ++ default 1 if BOOTPARAM_SOFTLOCKUP_PANIC ++ ++config HARDLOCKUP_DETECTOR_PERF ++ bool ++ select SOFTLOCKUP_DETECTOR ++ ++choice ++ prompt "aarch64 NMI watchdog method" ++ depends on ARM64 ++ help ++ Watchdog implementation method configuration. ++ ++config SDEI_WATCHDOG ++ bool "SDEI NMI Watchdog support" ++ depends on ARM_SDE_INTERFACE && !HARDLOCKUP_CHECK_TIMESTAMP ++ select HAVE_HARDLOCKUP_DETECTOR_ARCH ++ select HARDLOCKUP_DETECTOR ++ ++config PMU_WATCHDOG ++ bool "PMU NMI Watchdog support" ++ depends on PERF_EVENTS && HAVE_PERF_EVENTS_NMI ++ select HAVE_HARDLOCKUP_DETECTOR_PERF ++ ++endchoice ++ ++# ++# Enables a timestamp based low pass filter to compensate for perf based ++# hard lockup detection which runs too fast due to turbo modes. ++# ++config HARDLOCKUP_CHECK_TIMESTAMP ++ bool ++ ++# ++# arch/ can define HAVE_HARDLOCKUP_DETECTOR_ARCH to provide their own hard ++# lockup detector rather than the perf based detector. ++# ++config HARDLOCKUP_DETECTOR ++ bool "Detect Hard Lockups" ++ depends on DEBUG_KERNEL && !S390 ++ depends on HAVE_HARDLOCKUP_DETECTOR_PERF || HAVE_HARDLOCKUP_DETECTOR_ARCH ++ select SOFTLOCKUP_DETECTOR ++ select HARDLOCKUP_DETECTOR_PERF if HAVE_HARDLOCKUP_DETECTOR_PERF ++ select HARDLOCKUP_DETECTOR_ARCH if HAVE_HARDLOCKUP_DETECTOR_ARCH ++ help ++ Say Y here to enable the kernel to act as a watchdog to detect ++ hard lockups. ++ ++ Hardlockups are bugs that cause the CPU to loop in kernel mode ++ for more than 10 seconds, without letting other interrupts have a ++ chance to run. The current stack trace is displayed upon detection ++ and the system will stay locked up. ++ ++config BOOTPARAM_HARDLOCKUP_PANIC ++ bool "Panic (Reboot) On Hard Lockups" ++ depends on HARDLOCKUP_DETECTOR ++ help ++ Say Y here to enable the kernel to panic on "hard lockups", ++ which are bugs that cause the kernel to loop in kernel ++ mode with interrupts disabled for more than 10 seconds (configurable ++ using the watchdog_thresh sysctl). ++ ++ Say N if unsure. ++ ++config BOOTPARAM_HARDLOCKUP_PANIC_VALUE ++ int ++ depends on HARDLOCKUP_DETECTOR ++ range 0 1 ++ default 0 if !BOOTPARAM_HARDLOCKUP_PANIC ++ default 1 if BOOTPARAM_HARDLOCKUP_PANIC ++ ++config DETECT_HUNG_TASK ++ bool "Detect Hung Tasks" ++ depends on DEBUG_KERNEL ++ default SOFTLOCKUP_DETECTOR ++ help ++ Say Y here to enable the kernel to detect "hung tasks", ++ which are bugs that cause the task to be stuck in ++ uninterruptible "D" state indefinitely. ++ ++ When a hung task is detected, the kernel will print the ++ current stack trace (which you should report), but the ++ task will stay in uninterruptible state. If lockdep is ++ enabled then all held locks will also be reported. This ++ feature has negligible overhead. ++ ++config DEFAULT_HUNG_TASK_TIMEOUT ++ int "Default timeout for hung task detection (in seconds)" ++ depends on DETECT_HUNG_TASK ++ default 120 ++ help ++ This option controls the default timeout (in seconds) used ++ to determine when a task has become non-responsive and should ++ be considered hung. ++ ++ It can be adjusted at runtime via the kernel.hung_task_timeout_secs ++ sysctl or by writing a value to ++ /proc/sys/kernel/hung_task_timeout_secs. ++ ++ A timeout of 0 disables the check. The default is two minutes. ++ Keeping the default should be fine in most cases. ++ ++config BOOTPARAM_HUNG_TASK_PANIC ++ bool "Panic (Reboot) On Hung Tasks" ++ depends on DETECT_HUNG_TASK ++ help ++ Say Y here to enable the kernel to panic on "hung tasks", ++ which are bugs that cause the kernel to leave a task stuck ++ in uninterruptible "D" state. ++ ++ The panic can be used in combination with panic_timeout, ++ to cause the system to reboot automatically after a ++ hung task has been detected. This feature is useful for ++ high-availability systems that have uptime guarantees and ++ where a hung tasks must be resolved ASAP. ++ ++ Say N if unsure. ++ ++config BOOTPARAM_HUNG_TASK_PANIC_VALUE ++ int ++ depends on DETECT_HUNG_TASK ++ range 0 1 ++ default 0 if !BOOTPARAM_HUNG_TASK_PANIC ++ default 1 if BOOTPARAM_HUNG_TASK_PANIC ++ ++config WQ_WATCHDOG ++ bool "Detect Workqueue Stalls" ++ depends on DEBUG_KERNEL ++ help ++ Say Y here to enable stall detection on workqueues. If a ++ worker pool doesn't make forward progress on a pending work ++ item for over a given amount of time, 30s by default, a ++ warning message is printed along with dump of workqueue ++ state. This can be configured through kernel parameter ++ "workqueue.watchdog_thresh" and its sysfs counterpart. ++ ++endmenu # "Debug lockups and hangs" ++ ++config PANIC_ON_OOPS ++ bool "Panic on Oops" ++ help ++ Say Y here to enable the kernel to panic when it oopses. This ++ has the same effect as setting oops=panic on the kernel command ++ line. ++ ++ This feature is useful to ensure that the kernel does not do ++ anything erroneous after an oops which could result in data ++ corruption or other issues. ++ ++ Say N if unsure. ++ ++config PANIC_ON_OOPS_VALUE ++ int ++ range 0 1 ++ default 0 if !PANIC_ON_OOPS ++ default 1 if PANIC_ON_OOPS ++ ++config PANIC_TIMEOUT ++ int "panic timeout" ++ default 0 ++ help ++ Set the timeout value (in seconds) until a reboot occurs when the ++ the kernel panics. If n = 0, then we wait forever. A timeout ++ value n > 0 will wait n seconds before rebooting, while a timeout ++ value n < 0 will reboot immediately. ++ ++config SCHED_DEBUG ++ bool "Collect scheduler debugging info" ++ depends on DEBUG_KERNEL && PROC_FS ++ default y ++ help ++ If you say Y here, the /proc/sched_debug file will be provided ++ that can help debug the scheduler. The runtime overhead of this ++ option is minimal. ++ ++config SCHED_INFO ++ bool ++ default n ++ ++config SCHEDSTATS ++ bool "Collect scheduler statistics" ++ depends on DEBUG_KERNEL && PROC_FS ++ select SCHED_INFO ++ help ++ If you say Y here, additional code will be inserted into the ++ scheduler and related routines to collect statistics about ++ scheduler behavior and provide them in /proc/schedstat. These ++ stats may be useful for both tuning and debugging the scheduler ++ If you aren't debugging the scheduler or trying to tune a specific ++ application, you can say N to avoid the very slight overhead ++ this adds. ++ ++config SCHED_STACK_END_CHECK ++ bool "Detect stack corruption on calls to schedule()" ++ depends on DEBUG_KERNEL ++ default n ++ help ++ This option checks for a stack overrun on calls to schedule(). ++ If the stack end location is found to be over written always panic as ++ the content of the corrupted region can no longer be trusted. ++ This is to ensure no erroneous behaviour occurs which could result in ++ data corruption or a sporadic crash at a later stage once the region ++ is examined. The runtime overhead introduced is minimal. ++ ++config DEBUG_TIMEKEEPING ++ bool "Enable extra timekeeping sanity checking" ++ help ++ This option will enable additional timekeeping sanity checks ++ which may be helpful when diagnosing issues where timekeeping ++ problems are suspected. ++ ++ This may include checks in the timekeeping hotpaths, so this ++ option may have a (very small) performance impact to some ++ workloads. ++ ++ If unsure, say N. ++ ++config DEBUG_PREEMPT ++ bool "Debug preemptible kernel" ++ depends on DEBUG_KERNEL && PREEMPT && TRACE_IRQFLAGS_SUPPORT ++ default y ++ help ++ If you say Y here then the kernel will use a debug variant of the ++ commonly used smp_processor_id() function and will print warnings ++ if kernel code uses it in a preemption-unsafe way. Also, the kernel ++ will detect preemption count underflows. ++ ++menu "Lock Debugging (spinlocks, mutexes, etc...)" ++ ++config LOCK_DEBUGGING_SUPPORT ++ bool ++ depends on TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT ++ default y ++ ++config PROVE_LOCKING ++ bool "Lock debugging: prove locking correctness" ++ depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT ++ select LOCKDEP ++ select DEBUG_SPINLOCK ++ select DEBUG_MUTEXES ++ select DEBUG_RT_MUTEXES if RT_MUTEXES ++ select DEBUG_RWSEMS if RWSEM_SPIN_ON_OWNER ++ select DEBUG_WW_MUTEX_SLOWPATH ++ select DEBUG_LOCK_ALLOC ++ select TRACE_IRQFLAGS ++ default n ++ help ++ This feature enables the kernel to prove that all locking ++ that occurs in the kernel runtime is mathematically ++ correct: that under no circumstance could an arbitrary (and ++ not yet triggered) combination of observed locking ++ sequences (on an arbitrary number of CPUs, running an ++ arbitrary number of tasks and interrupt contexts) cause a ++ deadlock. ++ ++ In short, this feature enables the kernel to report locking ++ related deadlocks before they actually occur. ++ ++ The proof does not depend on how hard and complex a ++ deadlock scenario would be to trigger: how many ++ participant CPUs, tasks and irq-contexts would be needed ++ for it to trigger. The proof also does not depend on ++ timing: if a race and a resulting deadlock is possible ++ theoretically (no matter how unlikely the race scenario ++ is), it will be proven so and will immediately be ++ reported by the kernel (once the event is observed that ++ makes the deadlock theoretically possible). ++ ++ If a deadlock is impossible (i.e. the locking rules, as ++ observed by the kernel, are mathematically correct), the ++ kernel reports nothing. ++ ++ NOTE: this feature can also be enabled for rwlocks, mutexes ++ and rwsems - in which case all dependencies between these ++ different locking variants are observed and mapped too, and ++ the proof of observed correctness is also maintained for an ++ arbitrary combination of these separate locking variants. ++ ++ For more details, see Documentation/locking/lockdep-design.txt. ++ ++config LOCK_STAT ++ bool "Lock usage statistics" ++ depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT ++ select LOCKDEP ++ select DEBUG_SPINLOCK ++ select DEBUG_MUTEXES ++ select DEBUG_RT_MUTEXES if RT_MUTEXES ++ select DEBUG_LOCK_ALLOC ++ default n ++ help ++ This feature enables tracking lock contention points ++ ++ For more details, see Documentation/locking/lockstat.txt ++ ++ This also enables lock events required by "perf lock", ++ subcommand of perf. ++ If you want to use "perf lock", you also need to turn on ++ CONFIG_EVENT_TRACING. ++ ++ CONFIG_LOCK_STAT defines "contended" and "acquired" lock events. ++ (CONFIG_LOCKDEP defines "acquire" and "release" events.) ++ ++config DEBUG_RT_MUTEXES ++ bool "RT Mutex debugging, deadlock detection" ++ depends on DEBUG_KERNEL && RT_MUTEXES ++ help ++ This allows rt mutex semantics violations and rt mutex related ++ deadlocks (lockups) to be detected and reported automatically. ++ ++config DEBUG_SPINLOCK ++ bool "Spinlock and rw-lock debugging: basic checks" ++ depends on DEBUG_KERNEL ++ select UNINLINE_SPIN_UNLOCK ++ help ++ Say Y here and build SMP to catch missing spinlock initialization ++ and certain other kinds of spinlock errors commonly made. This is ++ best used in conjunction with the NMI watchdog so that spinlock ++ deadlocks are also debuggable. ++ ++config DEBUG_MUTEXES ++ bool "Mutex debugging: basic checks" ++ depends on DEBUG_KERNEL ++ help ++ This feature allows mutex semantics violations to be detected and ++ reported. ++ ++config DEBUG_WW_MUTEX_SLOWPATH ++ bool "Wait/wound mutex debugging: Slowpath testing" ++ depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT ++ select DEBUG_LOCK_ALLOC ++ select DEBUG_SPINLOCK ++ select DEBUG_MUTEXES ++ help ++ This feature enables slowpath testing for w/w mutex users by ++ injecting additional -EDEADLK wound/backoff cases. Together with ++ the full mutex checks enabled with (CONFIG_PROVE_LOCKING) this ++ will test all possible w/w mutex interface abuse with the ++ exception of simply not acquiring all the required locks. ++ Note that this feature can introduce significant overhead, so ++ it really should not be enabled in a production or distro kernel, ++ even a debug kernel. If you are a driver writer, enable it. If ++ you are a distro, do not. ++ ++config DEBUG_RWSEMS ++ bool "RW Semaphore debugging: basic checks" ++ depends on DEBUG_KERNEL && RWSEM_SPIN_ON_OWNER ++ help ++ This debugging feature allows mismatched rw semaphore locks and unlocks ++ to be detected and reported. ++ ++config DEBUG_LOCK_ALLOC ++ bool "Lock debugging: detect incorrect freeing of live locks" ++ depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT ++ select DEBUG_SPINLOCK ++ select DEBUG_MUTEXES ++ select DEBUG_RT_MUTEXES if RT_MUTEXES ++ select LOCKDEP ++ help ++ This feature will check whether any held lock (spinlock, rwlock, ++ mutex or rwsem) is incorrectly freed by the kernel, via any of the ++ memory-freeing routines (kfree(), kmem_cache_free(), free_pages(), ++ vfree(), etc.), whether a live lock is incorrectly reinitialized via ++ spin_lock_init()/mutex_init()/etc., or whether there is any lock ++ held during task exit. ++ ++config LOCKDEP ++ bool ++ depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT ++ select STACKTRACE ++ select FRAME_POINTER if !MIPS && !PPC && !ARM_UNWIND && !S390 && !MICROBLAZE && !ARC && !X86 ++ select KALLSYMS ++ select KALLSYMS_ALL ++ ++config LOCKDEP_SMALL ++ bool ++ ++config DEBUG_LOCKDEP ++ bool "Lock dependency engine debugging" ++ depends on DEBUG_KERNEL && LOCKDEP ++ help ++ If you say Y here, the lock dependency engine will do ++ additional runtime checks to debug itself, at the price ++ of more runtime overhead. ++ ++config DEBUG_ATOMIC_SLEEP ++ bool "Sleep inside atomic section checking" ++ select PREEMPT_COUNT ++ depends on DEBUG_KERNEL ++ depends on !ARCH_NO_PREEMPT ++ help ++ If you say Y here, various routines which may sleep will become very ++ noisy if they are called inside atomic sections: when a spinlock is ++ held, inside an rcu read side critical section, inside preempt disabled ++ sections, inside an interrupt, etc... ++ ++config DEBUG_LOCKING_API_SELFTESTS ++ bool "Locking API boot-time self-tests" ++ depends on DEBUG_KERNEL ++ help ++ Say Y here if you want the kernel to run a short self-test during ++ bootup. The self-test checks whether common types of locking bugs ++ are detected by debugging mechanisms or not. (if you disable ++ lock debugging then those bugs wont be detected of course.) ++ The following locking APIs are covered: spinlocks, rwlocks, ++ mutexes and rwsems. ++ ++config LOCK_TORTURE_TEST ++ tristate "torture tests for locking" ++ depends on DEBUG_KERNEL ++ select TORTURE_TEST ++ help ++ This option provides a kernel module that runs torture tests ++ on kernel locking primitives. The kernel module may be built ++ after the fact on the running kernel to be tested, if desired. ++ ++ Say Y here if you want kernel locking-primitive torture tests ++ to be built into the kernel. ++ Say M if you want these torture tests to build as a module. ++ Say N if you are unsure. ++ ++config WW_MUTEX_SELFTEST ++ tristate "Wait/wound mutex selftests" ++ help ++ This option provides a kernel module that runs tests on the ++ on the struct ww_mutex locking API. ++ ++ It is recommended to enable DEBUG_WW_MUTEX_SLOWPATH in conjunction ++ with this test harness. ++ ++ Say M if you want these self tests to build as a module. ++ Say N if you are unsure. ++ ++endmenu # lock debugging ++ ++config TRACE_IRQFLAGS ++ bool ++ help ++ Enables hooks to interrupt enabling and disabling for ++ either tracing or lock debugging. ++ ++config STACKTRACE ++ bool "Stack backtrace support" ++ depends on STACKTRACE_SUPPORT ++ help ++ This option causes the kernel to create a /proc/pid/stack for ++ every process, showing its current stack trace. ++ It is also used by various kernel debugging features that require ++ stack trace generation. ++ ++config WARN_ALL_UNSEEDED_RANDOM ++ bool "Warn for all uses of unseeded randomness" ++ default n ++ help ++ Some parts of the kernel contain bugs relating to their use of ++ cryptographically secure random numbers before it's actually possible ++ to generate those numbers securely. This setting ensures that these ++ flaws don't go unnoticed, by enabling a message, should this ever ++ occur. This will allow people with obscure setups to know when things ++ are going wrong, so that they might contact developers about fixing ++ it. ++ ++ Unfortunately, on some models of some architectures getting ++ a fully seeded CRNG is extremely difficult, and so this can ++ result in dmesg getting spammed for a surprisingly long ++ time. This is really bad from a security perspective, and ++ so architecture maintainers really need to do what they can ++ to get the CRNG seeded sooner after the system is booted. ++ However, since users cannot do anything actionable to ++ address this, by default the kernel will issue only a single ++ warning for the first use of unseeded randomness. ++ ++ Say Y here if you want to receive warnings for all uses of ++ unseeded randomness. This will be of use primarily for ++ those developers interested in improving the security of ++ Linux kernels running on their architecture (or ++ subarchitecture). ++ ++config DEBUG_KOBJECT ++ bool "kobject debugging" ++ depends on DEBUG_KERNEL ++ help ++ If you say Y here, some extra kobject debugging messages will be sent ++ to the syslog. ++ ++config DEBUG_KOBJECT_RELEASE ++ bool "kobject release debugging" ++ depends on DEBUG_OBJECTS_TIMERS ++ help ++ kobjects are reference counted objects. This means that their ++ last reference count put is not predictable, and the kobject can ++ live on past the point at which a driver decides to drop it's ++ initial reference to the kobject gained on allocation. An ++ example of this would be a struct device which has just been ++ unregistered. ++ ++ However, some buggy drivers assume that after such an operation, ++ the memory backing the kobject can be immediately freed. This ++ goes completely against the principles of a refcounted object. ++ ++ If you say Y here, the kernel will delay the release of kobjects ++ on the last reference count to improve the visibility of this ++ kind of kobject release bug. ++ ++config HAVE_DEBUG_BUGVERBOSE ++ bool ++ ++config DEBUG_BUGVERBOSE ++ bool "Verbose BUG() reporting (adds 70K)" if DEBUG_KERNEL && EXPERT ++ depends on BUG && (GENERIC_BUG || HAVE_DEBUG_BUGVERBOSE) ++ default y ++ help ++ Say Y here to make BUG() panics output the file name and line number ++ of the BUG call as well as the EIP and oops trace. This aids ++ debugging but costs about 70-100K of memory. ++ ++config DEBUG_LIST ++ bool "Debug linked list manipulation" ++ depends on DEBUG_KERNEL || BUG_ON_DATA_CORRUPTION ++ help ++ Enable this to turn on extended checks in the linked-list ++ walking routines. ++ ++ If unsure, say N. ++ ++config DEBUG_PI_LIST ++ bool "Debug priority linked list manipulation" ++ depends on DEBUG_KERNEL ++ help ++ Enable this to turn on extended checks in the priority-ordered ++ linked-list (plist) walking routines. This checks the entire ++ list multiple times during each manipulation. ++ ++ If unsure, say N. ++ ++config DEBUG_SG ++ bool "Debug SG table operations" ++ depends on DEBUG_KERNEL ++ help ++ Enable this to turn on checks on scatter-gather tables. This can ++ help find problems with drivers that do not properly initialize ++ their sg tables. ++ ++ If unsure, say N. ++ ++config DEBUG_NOTIFIERS ++ bool "Debug notifier call chains" ++ depends on DEBUG_KERNEL ++ help ++ Enable this to turn on sanity checking for notifier call chains. ++ This is most useful for kernel developers to make sure that ++ modules properly unregister themselves from notifier chains. ++ This is a relatively cheap check but if you care about maximum ++ performance, say N. ++ ++config DEBUG_CREDENTIALS ++ bool "Debug credential management" ++ depends on DEBUG_KERNEL ++ help ++ Enable this to turn on some debug checking for credential ++ management. The additional code keeps track of the number of ++ pointers from task_structs to any given cred struct, and checks to ++ see that this number never exceeds the usage count of the cred ++ struct. ++ ++ Furthermore, if SELinux is enabled, this also checks that the ++ security pointer in the cred struct is never seen to be invalid. ++ ++ If unsure, say N. ++ ++source "kernel/rcu/Kconfig.debug" ++ ++config DEBUG_WQ_FORCE_RR_CPU ++ bool "Force round-robin CPU selection for unbound work items" ++ depends on DEBUG_KERNEL ++ default n ++ help ++ Workqueue used to implicitly guarantee that work items queued ++ without explicit CPU specified are put on the local CPU. This ++ guarantee is no longer true and while local CPU is still ++ preferred work items may be put on foreign CPUs. Kernel ++ parameter "workqueue.debug_force_rr_cpu" is added to force ++ round-robin CPU selection to flush out usages which depend on the ++ now broken guarantee. This config option enables the debug ++ feature by default. When enabled, memory and cache locality will ++ be impacted. ++ ++config DEBUG_BLOCK_EXT_DEVT ++ bool "Force extended block device numbers and spread them" ++ depends on DEBUG_KERNEL ++ depends on BLOCK ++ default n ++ help ++ BIG FAT WARNING: ENABLING THIS OPTION MIGHT BREAK BOOTING ON ++ SOME DISTRIBUTIONS. DO NOT ENABLE THIS UNLESS YOU KNOW WHAT ++ YOU ARE DOING. Distros, please enable this and fix whatever ++ is broken. ++ ++ Conventionally, block device numbers are allocated from ++ predetermined contiguous area. However, extended block area ++ may introduce non-contiguous block device numbers. This ++ option forces most block device numbers to be allocated from ++ the extended space and spreads them to discover kernel or ++ userland code paths which assume predetermined contiguous ++ device number allocation. ++ ++ Note that turning on this debug option shuffles all the ++ device numbers for all IDE and SCSI devices including libata ++ ones, so root partition specified using device number ++ directly (via rdev or root=MAJ:MIN) won't work anymore. ++ Textual device names (root=/dev/sdXn) will continue to work. ++ ++ Say N if you are unsure. ++ ++config CPU_HOTPLUG_STATE_CONTROL ++ bool "Enable CPU hotplug state control" ++ depends on DEBUG_KERNEL ++ depends on HOTPLUG_CPU ++ default n ++ help ++ Allows to write steps between "offline" and "online" to the CPUs ++ sysfs target file so states can be stepped granular. This is a debug ++ option for now as the hotplug machinery cannot be stopped and ++ restarted at arbitrary points yet. ++ ++ Say N if your are unsure. ++ ++config NOTIFIER_ERROR_INJECTION ++ tristate "Notifier error injection" ++ depends on DEBUG_KERNEL ++ select DEBUG_FS ++ help ++ This option provides the ability to inject artificial errors to ++ specified notifier chain callbacks. It is useful to test the error ++ handling of notifier call chain failures. ++ ++ Say N if unsure. ++ ++config PM_NOTIFIER_ERROR_INJECT ++ tristate "PM notifier error injection module" ++ depends on PM && NOTIFIER_ERROR_INJECTION ++ default m if PM_DEBUG ++ help ++ This option provides the ability to inject artificial errors to ++ PM notifier chain callbacks. It is controlled through debugfs ++ interface /sys/kernel/debug/notifier-error-inject/pm ++ ++ If the notifier call chain should be failed with some events ++ notified, write the error code to "actions//error". ++ ++ Example: Inject PM suspend error (-12 = -ENOMEM) ++ ++ # cd /sys/kernel/debug/notifier-error-inject/pm/ ++ # echo -12 > actions/PM_SUSPEND_PREPARE/error ++ # echo mem > /sys/power/state ++ bash: echo: write error: Cannot allocate memory ++ ++ To compile this code as a module, choose M here: the module will ++ be called pm-notifier-error-inject. ++ ++ If unsure, say N. ++ ++config OF_RECONFIG_NOTIFIER_ERROR_INJECT ++ tristate "OF reconfig notifier error injection module" ++ depends on OF_DYNAMIC && NOTIFIER_ERROR_INJECTION ++ help ++ This option provides the ability to inject artificial errors to ++ OF reconfig notifier chain callbacks. It is controlled ++ through debugfs interface under ++ /sys/kernel/debug/notifier-error-inject/OF-reconfig/ ++ ++ If the notifier call chain should be failed with some events ++ notified, write the error code to "actions//error". ++ ++ To compile this code as a module, choose M here: the module will ++ be called of-reconfig-notifier-error-inject. ++ ++ If unsure, say N. ++ ++config NETDEV_NOTIFIER_ERROR_INJECT ++ tristate "Netdev notifier error injection module" ++ depends on NET && NOTIFIER_ERROR_INJECTION ++ help ++ This option provides the ability to inject artificial errors to ++ netdevice notifier chain callbacks. It is controlled through debugfs ++ interface /sys/kernel/debug/notifier-error-inject/netdev ++ ++ If the notifier call chain should be failed with some events ++ notified, write the error code to "actions//error". ++ ++ Example: Inject netdevice mtu change error (-22 = -EINVAL) ++ ++ # cd /sys/kernel/debug/notifier-error-inject/netdev ++ # echo -22 > actions/NETDEV_CHANGEMTU/error ++ # ip link set eth0 mtu 1024 ++ RTNETLINK answers: Invalid argument ++ ++ To compile this code as a module, choose M here: the module will ++ be called netdev-notifier-error-inject. ++ ++ If unsure, say N. ++ ++config FUNCTION_ERROR_INJECTION ++ def_bool y ++ depends on HAVE_FUNCTION_ERROR_INJECTION && KPROBES ++ ++config FAULT_INJECTION ++ bool "Fault-injection framework" ++ depends on DEBUG_KERNEL ++ help ++ Provide fault-injection framework. ++ For more details, see Documentation/fault-injection/. ++ ++config FAILSLAB ++ bool "Fault-injection capability for kmalloc" ++ depends on FAULT_INJECTION ++ depends on SLAB || SLUB ++ help ++ Provide fault-injection capability for kmalloc. ++ ++config FAIL_PAGE_ALLOC ++ bool "Fault-injection capabilitiy for alloc_pages()" ++ depends on FAULT_INJECTION ++ help ++ Provide fault-injection capability for alloc_pages(). ++ ++config FAIL_MAKE_REQUEST ++ bool "Fault-injection capability for disk IO" ++ depends on FAULT_INJECTION && BLOCK ++ help ++ Provide fault-injection capability for disk IO. ++ ++config FAIL_IO_TIMEOUT ++ bool "Fault-injection capability for faking disk interrupts" ++ depends on FAULT_INJECTION && BLOCK ++ help ++ Provide fault-injection capability on end IO handling. This ++ will make the block layer "forget" an interrupt as configured, ++ thus exercising the error handling. ++ ++ Only works with drivers that use the generic timeout handling, ++ for others it wont do anything. ++ ++config FAIL_FUTEX ++ bool "Fault-injection capability for futexes" ++ select DEBUG_FS ++ depends on FAULT_INJECTION && FUTEX ++ help ++ Provide fault-injection capability for futexes. ++ ++config FAULT_INJECTION_DEBUG_FS ++ bool "Debugfs entries for fault-injection capabilities" ++ depends on FAULT_INJECTION && SYSFS && DEBUG_FS ++ help ++ Enable configuration of fault-injection capabilities via debugfs. ++ ++config FAIL_FUNCTION ++ bool "Fault-injection capability for functions" ++ depends on FAULT_INJECTION_DEBUG_FS && FUNCTION_ERROR_INJECTION ++ help ++ Provide function-based fault-injection capability. ++ This will allow you to override a specific function with a return ++ with given return value. As a result, function caller will see ++ an error value and have to handle it. This is useful to test the ++ error handling in various subsystems. ++ ++config FAIL_MMC_REQUEST ++ bool "Fault-injection capability for MMC IO" ++ depends on FAULT_INJECTION_DEBUG_FS && MMC ++ help ++ Provide fault-injection capability for MMC IO. ++ This will make the mmc core return data errors. This is ++ useful to test the error handling in the mmc block device ++ and to test how the mmc host driver handles retries from ++ the block device. ++ ++config FAULT_INJECTION_STACKTRACE_FILTER ++ bool "stacktrace filter for fault-injection capabilities" ++ depends on FAULT_INJECTION_DEBUG_FS && STACKTRACE_SUPPORT ++ depends on !X86_64 ++ select STACKTRACE ++ select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE && !ARM_UNWIND && !ARC && !X86 ++ help ++ Provide stacktrace filter for fault-injection capabilities ++ ++config LATENCYTOP ++ bool "Latency measuring infrastructure" ++ depends on DEBUG_KERNEL ++ depends on STACKTRACE_SUPPORT ++ depends on PROC_FS ++ select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE && !ARM_UNWIND && !ARC && !X86 ++ select KALLSYMS ++ select KALLSYMS_ALL ++ select STACKTRACE ++ select SCHEDSTATS ++ select SCHED_DEBUG ++ help ++ Enable this option if you want to use the LatencyTOP tool ++ to find out which userspace is blocking on what kernel operations. ++ ++source kernel/trace/Kconfig ++ ++config PROVIDE_OHCI1394_DMA_INIT ++ bool "Remote debugging over FireWire early on boot" ++ depends on PCI && X86 ++ help ++ If you want to debug problems which hang or crash the kernel early ++ on boot and the crashing machine has a FireWire port, you can use ++ this feature to remotely access the memory of the crashed machine ++ over FireWire. This employs remote DMA as part of the OHCI1394 ++ specification which is now the standard for FireWire controllers. ++ ++ With remote DMA, you can monitor the printk buffer remotely using ++ firescope and access all memory below 4GB using fireproxy from gdb. ++ Even controlling a kernel debugger is possible using remote DMA. ++ ++ Usage: ++ ++ If ohci1394_dma=early is used as boot parameter, it will initialize ++ all OHCI1394 controllers which are found in the PCI config space. ++ ++ As all changes to the FireWire bus such as enabling and disabling ++ devices cause a bus reset and thereby disable remote DMA for all ++ devices, be sure to have the cable plugged and FireWire enabled on ++ the debugging host before booting the debug target for debugging. ++ ++ This code (~1k) is freed after boot. By then, the firewire stack ++ in charge of the OHCI-1394 controllers should be used instead. ++ ++ See Documentation/debugging-via-ohci1394.txt for more information. ++ ++config DMA_API_DEBUG ++ bool "Enable debugging of DMA-API usage" ++ select NEED_DMA_MAP_STATE ++ help ++ Enable this option to debug the use of the DMA API by device drivers. ++ With this option you will be able to detect common bugs in device ++ drivers like double-freeing of DMA mappings or freeing mappings that ++ were never allocated. ++ ++ This also attempts to catch cases where a page owned by DMA is ++ accessed by the cpu in a way that could cause data corruption. For ++ example, this enables cow_user_page() to check that the source page is ++ not undergoing DMA. ++ ++ This option causes a performance degradation. Use only if you want to ++ debug device drivers and dma interactions. ++ ++ If unsure, say N. ++ ++config DMA_API_DEBUG_SG ++ bool "Debug DMA scatter-gather usage" ++ default y ++ depends on DMA_API_DEBUG ++ help ++ Perform extra checking that callers of dma_map_sg() have respected the ++ appropriate segment length/boundary limits for the given device when ++ preparing DMA scatterlists. ++ ++ This is particularly likely to have been overlooked in cases where the ++ dma_map_sg() API is used for general bulk mapping of pages rather than ++ preparing literal scatter-gather descriptors, where there is a risk of ++ unexpected behaviour from DMA API implementations if the scatterlist ++ is technically out-of-spec. ++ ++ If unsure, say N. ++ ++menuconfig RUNTIME_TESTING_MENU ++ bool "Runtime Testing" ++ def_bool y ++ ++if RUNTIME_TESTING_MENU ++ ++config LKDTM ++ tristate "Linux Kernel Dump Test Tool Module" ++ depends on DEBUG_FS ++ depends on BLOCK ++ help ++ This module enables testing of the different dumping mechanisms by ++ inducing system failures at predefined crash points. ++ If you don't need it: say N ++ Choose M here to compile this code as a module. The module will be ++ called lkdtm. ++ ++ Documentation on how to use the module can be found in ++ Documentation/fault-injection/provoke-crashes.txt ++ ++config TEST_LIST_SORT ++ tristate "Linked list sorting test" ++ depends on DEBUG_KERNEL || m ++ help ++ Enable this to turn on 'list_sort()' function test. This test is ++ executed only once during system boot (so affects only boot time), ++ or at module load time. ++ ++ If unsure, say N. ++ ++config TEST_SORT ++ tristate "Array-based sort test" ++ depends on DEBUG_KERNEL || m ++ help ++ This option enables the self-test function of 'sort()' at boot, ++ or at module load time. ++ ++ If unsure, say N. ++ ++config KPROBES_SANITY_TEST ++ bool "Kprobes sanity tests" ++ depends on DEBUG_KERNEL ++ depends on KPROBES ++ help ++ This option provides for testing basic kprobes functionality on ++ boot. Samples of kprobe and kretprobe are inserted and ++ verified for functionality. ++ ++ Say N if you are unsure. ++ ++config BACKTRACE_SELF_TEST ++ tristate "Self test for the backtrace code" ++ depends on DEBUG_KERNEL ++ help ++ This option provides a kernel module that can be used to test ++ the kernel stack backtrace code. This option is not useful ++ for distributions or general kernels, but only for kernel ++ developers working on architecture code. ++ ++ Note that if you want to also test saved backtraces, you will ++ have to enable STACKTRACE as well. ++ ++ Say N if you are unsure. ++ ++config RBTREE_TEST ++ tristate "Red-Black tree test" ++ depends on DEBUG_KERNEL ++ help ++ A benchmark measuring the performance of the rbtree library. ++ Also includes rbtree invariant checks. ++ ++config INTERVAL_TREE_TEST ++ tristate "Interval tree test" ++ depends on DEBUG_KERNEL ++ select INTERVAL_TREE ++ help ++ A benchmark measuring the performance of the interval tree library ++ ++config PERCPU_TEST ++ tristate "Per cpu operations test" ++ depends on m && DEBUG_KERNEL ++ help ++ Enable this option to build test module which validates per-cpu ++ operations. ++ ++ If unsure, say N. ++ ++config ATOMIC64_SELFTEST ++ tristate "Perform an atomic64_t self-test" ++ help ++ Enable this option to test the atomic64_t functions at boot or ++ at module load time. ++ ++ If unsure, say N. ++ ++config ASYNC_RAID6_TEST ++ tristate "Self test for hardware accelerated raid6 recovery" ++ depends on ASYNC_RAID6_RECOV ++ select ASYNC_MEMCPY ++ ---help--- ++ This is a one-shot self test that permutes through the ++ recovery of all the possible two disk failure scenarios for a ++ N-disk array. Recovery is performed with the asynchronous ++ raid6 recovery routines, and will optionally use an offload ++ engine if one is available. ++ ++ If unsure, say N. ++ ++config TEST_HEXDUMP ++ tristate "Test functions located in the hexdump module at runtime" ++ ++config TEST_STRING_HELPERS ++ tristate "Test functions located in the string_helpers module at runtime" ++ ++config TEST_KSTRTOX ++ tristate "Test kstrto*() family of functions at runtime" ++ ++config TEST_PRINTF ++ tristate "Test printf() family of functions at runtime" ++ ++config TEST_BITMAP ++ tristate "Test bitmap_*() family of functions at runtime" ++ help ++ Enable this option to test the bitmap functions at boot. ++ ++ If unsure, say N. ++ ++config TEST_BITFIELD ++ tristate "Test bitfield functions at runtime" ++ help ++ Enable this option to test the bitfield functions at boot. ++ ++ If unsure, say N. ++ ++config TEST_UUID ++ tristate "Test functions located in the uuid module at runtime" ++ ++config TEST_OVERFLOW ++ tristate "Test check_*_overflow() functions at runtime" ++ ++config TEST_RHASHTABLE ++ tristate "Perform selftest on resizable hash table" ++ help ++ Enable this option to test the rhashtable functions at boot. ++ ++ If unsure, say N. ++ ++config TEST_HASH ++ tristate "Perform selftest on hash functions" ++ help ++ Enable this option to test the kernel's integer (), ++ string (), and siphash () ++ hash functions on boot (or module load). ++ ++ This is intended to help people writing architecture-specific ++ optimized versions. If unsure, say N. ++ ++config TEST_IDA ++ tristate "Perform selftest on IDA functions" ++ ++config TEST_PARMAN ++ tristate "Perform selftest on priority array manager" ++ depends on PARMAN ++ help ++ Enable this option to test priority array manager on boot ++ (or module load). ++ ++ If unsure, say N. ++ ++config TEST_LKM ++ tristate "Test module loading with 'hello world' module" ++ depends on m ++ help ++ This builds the "test_module" module that emits "Hello, world" ++ on printk when loaded. It is designed to be used for basic ++ evaluation of the module loading subsystem (for example when ++ validating module verification). It lacks any extra dependencies, ++ and will not normally be loaded by the system unless explicitly ++ requested by name. ++ ++ If unsure, say N. ++ ++config TEST_USER_COPY ++ tristate "Test user/kernel boundary protections" ++ depends on m ++ help ++ This builds the "test_user_copy" module that runs sanity checks ++ on the copy_to/from_user infrastructure, making sure basic ++ user/kernel boundary testing is working. If it fails to load, ++ a regression has been detected in the user/kernel memory boundary ++ protections. ++ ++ If unsure, say N. ++ ++config TEST_BPF ++ tristate "Test BPF filter functionality" ++ depends on m && NET ++ help ++ This builds the "test_bpf" module that runs various test vectors ++ against the BPF interpreter or BPF JIT compiler depending on the ++ current setting. This is in particular useful for BPF JIT compiler ++ development, but also to run regression tests against changes in ++ the interpreter code. It also enables test stubs for eBPF maps and ++ verifier used by user space verifier testsuite. ++ ++ If unsure, say N. ++ ++config FIND_BIT_BENCHMARK ++ tristate "Test find_bit functions" ++ help ++ This builds the "test_find_bit" module that measure find_*_bit() ++ functions performance. ++ ++ If unsure, say N. ++ ++config TEST_FIRMWARE ++ tristate "Test firmware loading via userspace interface" ++ depends on FW_LOADER ++ help ++ This builds the "test_firmware" module that creates a userspace ++ interface for testing firmware loading. This can be used to ++ control the triggering of firmware loading without needing an ++ actual firmware-using device. The contents can be rechecked by ++ userspace. ++ ++ If unsure, say N. ++ ++config TEST_SYSCTL ++ tristate "sysctl test driver" ++ depends on PROC_SYSCTL ++ help ++ This builds the "test_sysctl" module. This driver enables to test the ++ proc sysctl interfaces available to drivers safely without affecting ++ production knobs which might alter system functionality. ++ ++ If unsure, say N. ++ ++config TEST_UDELAY ++ tristate "udelay test driver" ++ help ++ This builds the "udelay_test" module that helps to make sure ++ that udelay() is working properly. ++ ++ If unsure, say N. ++ ++config TEST_STATIC_KEYS ++ tristate "Test static keys" ++ depends on m ++ help ++ Test the static key interfaces. ++ ++ If unsure, say N. ++ ++config TEST_KMOD ++ tristate "kmod stress tester" ++ depends on m ++ depends on BLOCK && (64BIT || LBDAF) # for XFS, BTRFS ++ depends on NETDEVICES && NET_CORE && INET # for TUN ++ depends on BLOCK ++ select TEST_LKM ++ select XFS_FS ++ select TUN ++ select BTRFS_FS ++ help ++ Test the kernel's module loading mechanism: kmod. kmod implements ++ support to load modules using the Linux kernel's usermode helper. ++ This test provides a series of tests against kmod. ++ ++ Although technically you can either build test_kmod as a module or ++ into the kernel we disallow building it into the kernel since ++ it stress tests request_module() and this will very likely cause ++ some issues by taking over precious threads available from other ++ module load requests, ultimately this could be fatal. ++ ++ To run tests run: ++ ++ tools/testing/selftests/kmod/kmod.sh --help ++ ++ If unsure, say N. ++ ++config TEST_DEBUG_VIRTUAL ++ tristate "Test CONFIG_DEBUG_VIRTUAL feature" ++ depends on DEBUG_VIRTUAL ++ help ++ Test the kernel's ability to detect incorrect calls to ++ virt_to_phys() done against the non-linear part of the ++ kernel's virtual address map. ++ ++ If unsure, say N. ++ ++endif # RUNTIME_TESTING_MENU ++ ++config MEMTEST ++ bool "Memtest" ++ depends on HAVE_MEMBLOCK ++ ---help--- ++ This option adds a kernel parameter 'memtest', which allows memtest ++ to be set. ++ memtest=0, mean disabled; -- default ++ memtest=1, mean do 1 test pattern; ++ ... ++ memtest=17, mean do 17 test patterns. ++ If you are unsure how to answer this question, answer N. ++ ++config BUG_ON_DATA_CORRUPTION ++ bool "Trigger a BUG when data corruption is detected" ++ select DEBUG_LIST ++ help ++ Select this option if the kernel should BUG when it encounters ++ data corruption in kernel memory structures when they get checked ++ for validity. ++ ++ If unsure, say N. ++ ++source "samples/Kconfig" ++ ++source "lib/Kconfig.kgdb" ++ ++source "lib/Kconfig.ubsan" ++ ++config ARCH_HAS_DEVMEM_IS_ALLOWED ++ bool ++ ++config STRICT_DEVMEM ++ bool "Filter access to /dev/mem" ++ depends on MMU && DEVMEM ++ depends on ARCH_HAS_DEVMEM_IS_ALLOWED ++ default y if PPC || X86 || ARM64 ++ ---help--- ++ If this option is disabled, you allow userspace (root) access to all ++ of memory, including kernel and userspace memory. Accidental ++ access to this is obviously disastrous, but specific access can ++ be used by people debugging the kernel. Note that with PAT support ++ enabled, even in this case there are restrictions on /dev/mem ++ use due to the cache aliasing requirements. ++ ++ If this option is switched on, and IO_STRICT_DEVMEM=n, the /dev/mem ++ file only allows userspace access to PCI space and the BIOS code and ++ data regions. This is sufficient for dosemu and X and all common ++ users of /dev/mem. ++ ++ If in doubt, say Y. ++ ++config IO_STRICT_DEVMEM ++ bool "Filter I/O access to /dev/mem" ++ depends on STRICT_DEVMEM ++ ---help--- ++ If this option is disabled, you allow userspace (root) access to all ++ io-memory regardless of whether a driver is actively using that ++ range. Accidental access to this is obviously disastrous, but ++ specific access can be used by people debugging kernel drivers. ++ ++ If this option is switched on, the /dev/mem file only allows ++ userspace access to *idle* io-memory ranges (see /proc/iomem) This ++ may break traditional users of /dev/mem (dosemu, legacy X, etc...) ++ if the driver using a given range cannot be disabled. ++ ++ If in doubt, say Y. ++ ++source "arch/$(SRCARCH)/Kconfig.debug" ++ ++endmenu # Kernel hacking +diff -uprN kernel/lib/smp_processor_id.c kernel_new/lib/smp_processor_id.c +--- kernel/lib/smp_processor_id.c 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/lib/smp_processor_id.c 2021-04-01 18:28:07.816863108 +0800 +@@ -6,12 +6,19 @@ + */ + #include + #include ++#include + + notrace static unsigned int check_preemption_disabled(const char *what1, + const char *what2) + { + int this_cpu = raw_smp_processor_id(); + ++ if (hard_irqs_disabled()) ++ goto out; ++ ++ if (!ipipe_root_p) ++ goto out; ++ + if (likely(preempt_count())) + goto out; + +diff -uprN kernel/mm/memory.c kernel_new/mm/memory.c +--- kernel/mm/memory.c 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/mm/memory.c 2021-04-02 10:22:40.734935492 +0800 +@@ -55,6 +55,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -141,6 +142,9 @@ EXPORT_SYMBOL(zero_pfn); + + unsigned long highest_memmap_pfn __read_mostly; + ++static inline bool cow_user_page(struct page *dst, struct page *src, ++ struct vm_fault *vmf); ++ + /* + * CONFIG_MMU architectures set up ZERO_PAGE in their paging_init() + */ +@@ -710,8 +714,8 @@ out: + + static inline unsigned long + copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, +- pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma, +- unsigned long addr, int *rss) ++ pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma, ++ unsigned long addr, int *rss, struct page *uncow_page) + { + unsigned long vm_flags = vma->vm_flags; + pte_t pte = *src_pte; +@@ -789,6 +793,24 @@ copy_one_pte(struct mm_struct *dst_mm, s + * in the parent and the child + */ + if (is_cow_mapping(vm_flags) && pte_write(pte)) { ++#ifdef CONFIG_IPIPE ++ if (uncow_page) { ++ struct page *old_page = vm_normal_page(vma, addr, pte); ++ struct vm_fault *vmf; ++ vmf->vma = vma; ++ vmf->address = addr; ++ cow_user_page(uncow_page, old_page, vmf); ++ pte = mk_pte(uncow_page, vma->vm_page_prot); ++ ++ if (vm_flags & VM_SHARED) ++ pte = pte_mkclean(pte); ++ pte = pte_mkold(pte); ++ ++ page_add_new_anon_rmap(uncow_page, vma, addr, false); ++ rss[!!PageAnon(uncow_page)]++; ++ goto out_set_pte; ++ } ++#endif /* CONFIG_IPIPE */ + ptep_set_wrprotect(src_mm, addr, src_pte); + pte = pte_wrprotect(pte); + } +@@ -836,13 +858,27 @@ static int copy_pte_range(struct mm_stru + int progress = 0; + int rss[NR_MM_COUNTERS]; + swp_entry_t entry = (swp_entry_t){0}; +- ++ struct page *uncow_page = NULL; ++#ifdef CONFIG_IPIPE ++ int do_cow_break = 0; ++again: ++ if (do_cow_break) { ++ uncow_page = alloc_page_vma(GFP_HIGHUSER, vma, addr); ++ if (uncow_page == NULL) ++ return -ENOMEM; ++ do_cow_break = 0; ++ } ++#else + again: ++#endif + init_rss_vec(rss); + + dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl); +- if (!dst_pte) ++ if (!dst_pte) { ++ if (uncow_page) ++ put_page(uncow_page); + return -ENOMEM; ++ } + src_pte = pte_offset_map(src_pmd, addr); + src_ptl = pte_lockptr(src_mm, src_pmd); + spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); +@@ -865,8 +901,25 @@ again: + progress++; + continue; + } ++#ifdef CONFIG_IPIPE ++ if (likely(uncow_page == NULL) && likely(pte_present(*src_pte))) { ++ if (is_cow_mapping(vma->vm_flags) && ++ test_bit(MMF_VM_PINNED, &src_mm->flags) && ++ ((vma->vm_flags|src_mm->def_flags) & VM_LOCKED)) { ++ arch_leave_lazy_mmu_mode(); ++ spin_unlock(src_ptl); ++ pte_unmap(src_pte); ++ add_mm_rss_vec(dst_mm, rss); ++ pte_unmap_unlock(dst_pte, dst_ptl); ++ cond_resched(); ++ do_cow_break = 1; ++ goto again; ++ } ++ } ++#endif + entry.val = copy_one_pte(dst_mm, src_mm, dst_pte, src_pte, +- vma, addr, rss); ++ vma, addr, rss, uncow_page); ++ uncow_page = NULL; + if (entry.val) + break; + progress += 8; +@@ -4678,6 +4731,41 @@ long copy_huge_page_from_user(struct pag + } + #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */ + ++#ifdef CONFIG_IPIPE ++ ++int __ipipe_disable_ondemand_mappings(struct task_struct *tsk) ++{ ++ struct vm_area_struct *vma; ++ struct mm_struct *mm; ++ int result = 0; ++ ++ mm = get_task_mm(tsk); ++ if (!mm) ++ return -EPERM; ++ ++ down_write(&mm->mmap_sem); ++ if (test_bit(MMF_VM_PINNED, &mm->flags)) ++ goto done_mm; ++ ++ for (vma = mm->mmap; vma; vma = vma->vm_next) { ++ if (is_cow_mapping(vma->vm_flags) && ++ (vma->vm_flags & VM_WRITE)) { ++ result = __ipipe_pin_vma(mm, vma); ++ if (result < 0) ++ goto done_mm; ++ } ++ } ++ set_bit(MMF_VM_PINNED, &mm->flags); ++ ++ done_mm: ++ up_write(&mm->mmap_sem); ++ mmput(mm); ++ return result; ++} ++EXPORT_SYMBOL_GPL(__ipipe_disable_ondemand_mappings); ++ ++#endif /* CONFIG_IPIPE */ ++ + #if USE_SPLIT_PTE_PTLOCKS && ALLOC_SPLIT_PTLOCKS + + static struct kmem_cache *page_ptl_cachep; +diff -uprN kernel/mm/memory.c.orig kernel_new/mm/memory.c.orig +--- kernel/mm/memory.c.orig 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/mm/memory.c.orig 2020-12-21 21:59:22.000000000 +0800 +@@ -0,0 +1,4706 @@ ++/* ++ * linux/mm/memory.c ++ * ++ * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds ++ */ ++ ++/* ++ * demand-loading started 01.12.91 - seems it is high on the list of ++ * things wanted, and it should be easy to implement. - Linus ++ */ ++ ++/* ++ * Ok, demand-loading was easy, shared pages a little bit tricker. Shared ++ * pages started 02.12.91, seems to work. - Linus. ++ * ++ * Tested sharing by executing about 30 /bin/sh: under the old kernel it ++ * would have taken more than the 6M I have free, but it worked well as ++ * far as I could see. ++ * ++ * Also corrected some "invalidate()"s - I wasn't doing enough of them. ++ */ ++ ++/* ++ * Real VM (paging to/from disk) started 18.12.91. Much more work and ++ * thought has to go into this. Oh, well.. ++ * 19.12.91 - works, somewhat. Sometimes I get faults, don't know why. ++ * Found it. Everything seems to work now. ++ * 20.12.91 - Ok, making the swap-device changeable like the root. ++ */ ++ ++/* ++ * 05.04.94 - Multi-page memory management added for v1.1. ++ * Idea by Alex Bligh (alex@cconcepts.co.uk) ++ * ++ * 16.07.99 - Support of BIGMEM added by Gerhard Wichert, Siemens AG ++ * (Gerhard.Wichert@pdb.siemens.de) ++ * ++ * Aug/Sep 2004 Changed to four level page tables (Andi Kleen) ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "internal.h" ++ ++#if defined(LAST_CPUPID_NOT_IN_PAGE_FLAGS) && !defined(CONFIG_COMPILE_TEST) ++#warning Unfortunate NUMA and NUMA Balancing config, growing page-frame for last_cpupid. ++#endif ++ ++#ifndef CONFIG_NEED_MULTIPLE_NODES ++/* use the per-pgdat data instead for discontigmem - mbligh */ ++unsigned long max_mapnr; ++EXPORT_SYMBOL(max_mapnr); ++ ++struct page *mem_map; ++EXPORT_SYMBOL(mem_map); ++#endif ++ ++/* ++ * A number of key systems in x86 including ioremap() rely on the assumption ++ * that high_memory defines the upper bound on direct map memory, then end ++ * of ZONE_NORMAL. Under CONFIG_DISCONTIG this means that max_low_pfn and ++ * highstart_pfn must be the same; there must be no gap between ZONE_NORMAL ++ * and ZONE_HIGHMEM. ++ */ ++void *high_memory; ++EXPORT_SYMBOL(high_memory); ++ ++/* ++ * Randomize the address space (stacks, mmaps, brk, etc.). ++ * ++ * ( When CONFIG_COMPAT_BRK=y we exclude brk from randomization, ++ * as ancient (libc5 based) binaries can segfault. ) ++ */ ++int randomize_va_space __read_mostly = ++#ifdef CONFIG_COMPAT_BRK ++ 1; ++#else ++ 2; ++#endif ++ ++#ifndef arch_faults_on_old_pte ++static inline bool arch_faults_on_old_pte(void) ++{ ++ /* ++ * Those arches which don't have hw access flag feature need to ++ * implement their own helper. By default, "true" means pagefault ++ * will be hit on old pte. ++ */ ++ return true; ++} ++#endif ++ ++static int __init disable_randmaps(char *s) ++{ ++ randomize_va_space = 0; ++ return 1; ++} ++__setup("norandmaps", disable_randmaps); ++ ++unsigned long zero_pfn __read_mostly; ++EXPORT_SYMBOL(zero_pfn); ++ ++unsigned long highest_memmap_pfn __read_mostly; ++ ++/* ++ * CONFIG_MMU architectures set up ZERO_PAGE in their paging_init() ++ */ ++static int __init init_zero_pfn(void) ++{ ++ zero_pfn = page_to_pfn(ZERO_PAGE(0)); ++ return 0; ++} ++core_initcall(init_zero_pfn); ++ ++ ++#if defined(SPLIT_RSS_COUNTING) ++ ++void sync_mm_rss(struct mm_struct *mm) ++{ ++ int i; ++ ++ for (i = 0; i < NR_MM_COUNTERS; i++) { ++ if (current->rss_stat.count[i]) { ++ add_mm_counter(mm, i, current->rss_stat.count[i]); ++ current->rss_stat.count[i] = 0; ++ } ++ } ++ current->rss_stat.events = 0; ++} ++ ++static void add_mm_counter_fast(struct mm_struct *mm, int member, int val) ++{ ++ struct task_struct *task = current; ++ ++ if (likely(task->mm == mm)) ++ task->rss_stat.count[member] += val; ++ else ++ add_mm_counter(mm, member, val); ++} ++#define inc_mm_counter_fast(mm, member) add_mm_counter_fast(mm, member, 1) ++#define dec_mm_counter_fast(mm, member) add_mm_counter_fast(mm, member, -1) ++ ++/* sync counter once per 64 page faults */ ++#define TASK_RSS_EVENTS_THRESH (64) ++static void check_sync_rss_stat(struct task_struct *task) ++{ ++ if (unlikely(task != current)) ++ return; ++ if (unlikely(task->rss_stat.events++ > TASK_RSS_EVENTS_THRESH)) ++ sync_mm_rss(task->mm); ++} ++#else /* SPLIT_RSS_COUNTING */ ++ ++#define inc_mm_counter_fast(mm, member) inc_mm_counter(mm, member) ++#define dec_mm_counter_fast(mm, member) dec_mm_counter(mm, member) ++ ++static void check_sync_rss_stat(struct task_struct *task) ++{ ++} ++ ++#endif /* SPLIT_RSS_COUNTING */ ++ ++/* ++ * Note: this doesn't free the actual pages themselves. That ++ * has been handled earlier when unmapping all the memory regions. ++ */ ++static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd, ++ unsigned long addr) ++{ ++ pgtable_t token = pmd_pgtable(*pmd); ++ pmd_clear(pmd); ++ pte_free_tlb(tlb, token, addr); ++ mm_dec_nr_ptes(tlb->mm); ++} ++ ++static inline void free_pmd_range(struct mmu_gather *tlb, pud_t *pud, ++ unsigned long addr, unsigned long end, ++ unsigned long floor, unsigned long ceiling) ++{ ++ pmd_t *pmd; ++ unsigned long next; ++ unsigned long start; ++ ++ start = addr; ++ pmd = pmd_offset(pud, addr); ++ do { ++ next = pmd_addr_end(addr, end); ++ if (pmd_none_or_clear_bad(pmd)) ++ continue; ++ free_pte_range(tlb, pmd, addr); ++ } while (pmd++, addr = next, addr != end); ++ ++ start &= PUD_MASK; ++ if (start < floor) ++ return; ++ if (ceiling) { ++ ceiling &= PUD_MASK; ++ if (!ceiling) ++ return; ++ } ++ if (end - 1 > ceiling - 1) ++ return; ++ ++ pmd = pmd_offset(pud, start); ++ pud_clear(pud); ++ pmd_free_tlb(tlb, pmd, start); ++ mm_dec_nr_pmds(tlb->mm); ++} ++ ++static inline void free_pud_range(struct mmu_gather *tlb, p4d_t *p4d, ++ unsigned long addr, unsigned long end, ++ unsigned long floor, unsigned long ceiling) ++{ ++ pud_t *pud; ++ unsigned long next; ++ unsigned long start; ++ ++ start = addr; ++ pud = pud_offset(p4d, addr); ++ do { ++ next = pud_addr_end(addr, end); ++ if (pud_none_or_clear_bad(pud)) ++ continue; ++ free_pmd_range(tlb, pud, addr, next, floor, ceiling); ++ } while (pud++, addr = next, addr != end); ++ ++ start &= P4D_MASK; ++ if (start < floor) ++ return; ++ if (ceiling) { ++ ceiling &= P4D_MASK; ++ if (!ceiling) ++ return; ++ } ++ if (end - 1 > ceiling - 1) ++ return; ++ ++ pud = pud_offset(p4d, start); ++ p4d_clear(p4d); ++ pud_free_tlb(tlb, pud, start); ++ mm_dec_nr_puds(tlb->mm); ++} ++ ++static inline void free_p4d_range(struct mmu_gather *tlb, pgd_t *pgd, ++ unsigned long addr, unsigned long end, ++ unsigned long floor, unsigned long ceiling) ++{ ++ p4d_t *p4d; ++ unsigned long next; ++ unsigned long start; ++ ++ start = addr; ++ p4d = p4d_offset(pgd, addr); ++ do { ++ next = p4d_addr_end(addr, end); ++ if (p4d_none_or_clear_bad(p4d)) ++ continue; ++ free_pud_range(tlb, p4d, addr, next, floor, ceiling); ++ } while (p4d++, addr = next, addr != end); ++ ++ start &= PGDIR_MASK; ++ if (start < floor) ++ return; ++ if (ceiling) { ++ ceiling &= PGDIR_MASK; ++ if (!ceiling) ++ return; ++ } ++ if (end - 1 > ceiling - 1) ++ return; ++ ++ p4d = p4d_offset(pgd, start); ++ pgd_clear(pgd); ++ p4d_free_tlb(tlb, p4d, start); ++} ++ ++/* ++ * This function frees user-level page tables of a process. ++ */ ++void free_pgd_range(struct mmu_gather *tlb, ++ unsigned long addr, unsigned long end, ++ unsigned long floor, unsigned long ceiling) ++{ ++ pgd_t *pgd; ++ unsigned long next; ++ ++ /* ++ * The next few lines have given us lots of grief... ++ * ++ * Why are we testing PMD* at this top level? Because often ++ * there will be no work to do at all, and we'd prefer not to ++ * go all the way down to the bottom just to discover that. ++ * ++ * Why all these "- 1"s? Because 0 represents both the bottom ++ * of the address space and the top of it (using -1 for the ++ * top wouldn't help much: the masks would do the wrong thing). ++ * The rule is that addr 0 and floor 0 refer to the bottom of ++ * the address space, but end 0 and ceiling 0 refer to the top ++ * Comparisons need to use "end - 1" and "ceiling - 1" (though ++ * that end 0 case should be mythical). ++ * ++ * Wherever addr is brought up or ceiling brought down, we must ++ * be careful to reject "the opposite 0" before it confuses the ++ * subsequent tests. But what about where end is brought down ++ * by PMD_SIZE below? no, end can't go down to 0 there. ++ * ++ * Whereas we round start (addr) and ceiling down, by different ++ * masks at different levels, in order to test whether a table ++ * now has no other vmas using it, so can be freed, we don't ++ * bother to round floor or end up - the tests don't need that. ++ */ ++ ++ addr &= PMD_MASK; ++ if (addr < floor) { ++ addr += PMD_SIZE; ++ if (!addr) ++ return; ++ } ++ if (ceiling) { ++ ceiling &= PMD_MASK; ++ if (!ceiling) ++ return; ++ } ++ if (end - 1 > ceiling - 1) ++ end -= PMD_SIZE; ++ if (addr > end - 1) ++ return; ++ /* ++ * We add page table cache pages with PAGE_SIZE, ++ * (see pte_free_tlb()), flush the tlb if we need ++ */ ++ tlb_remove_check_page_size_change(tlb, PAGE_SIZE); ++ pgd = pgd_offset(tlb->mm, addr); ++ do { ++ next = pgd_addr_end(addr, end); ++ if (pgd_none_or_clear_bad(pgd)) ++ continue; ++ free_p4d_range(tlb, pgd, addr, next, floor, ceiling); ++ } while (pgd++, addr = next, addr != end); ++} ++ ++void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma, ++ unsigned long floor, unsigned long ceiling) ++{ ++ while (vma) { ++ struct vm_area_struct *next = vma->vm_next; ++ unsigned long addr = vma->vm_start; ++ ++ /* ++ * Hide vma from rmap and truncate_pagecache before freeing ++ * pgtables ++ */ ++ unlink_anon_vmas(vma); ++ unlink_file_vma(vma); ++ ++ if (is_vm_hugetlb_page(vma)) { ++ hugetlb_free_pgd_range(tlb, addr, vma->vm_end, ++ floor, next ? next->vm_start : ceiling); ++ } else { ++ /* ++ * Optimization: gather nearby vmas into one call down ++ */ ++ while (next && next->vm_start <= vma->vm_end + PMD_SIZE ++ && !is_vm_hugetlb_page(next)) { ++ vma = next; ++ next = vma->vm_next; ++ unlink_anon_vmas(vma); ++ unlink_file_vma(vma); ++ } ++ free_pgd_range(tlb, addr, vma->vm_end, ++ floor, next ? next->vm_start : ceiling); ++ } ++ vma = next; ++ } ++} ++ ++int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address) ++{ ++ spinlock_t *ptl; ++ pgtable_t new = pte_alloc_one(mm, address); ++ if (!new) ++ return -ENOMEM; ++ ++ /* ++ * Ensure all pte setup (eg. pte page lock and page clearing) are ++ * visible before the pte is made visible to other CPUs by being ++ * put into page tables. ++ * ++ * The other side of the story is the pointer chasing in the page ++ * table walking code (when walking the page table without locking; ++ * ie. most of the time). Fortunately, these data accesses consist ++ * of a chain of data-dependent loads, meaning most CPUs (alpha ++ * being the notable exception) will already guarantee loads are ++ * seen in-order. See the alpha page table accessors for the ++ * smp_read_barrier_depends() barriers in page table walking code. ++ */ ++ smp_wmb(); /* Could be smp_wmb__xxx(before|after)_spin_lock */ ++ ++ ptl = pmd_lock(mm, pmd); ++ if (likely(pmd_none(*pmd))) { /* Has another populated it ? */ ++ mm_inc_nr_ptes(mm); ++ pmd_populate(mm, pmd, new); ++ new = NULL; ++ } ++ spin_unlock(ptl); ++ if (new) ++ pte_free(mm, new); ++ return 0; ++} ++ ++int __pte_alloc_kernel(pmd_t *pmd, unsigned long address) ++{ ++ pte_t *new = pte_alloc_one_kernel(&init_mm, address); ++ if (!new) ++ return -ENOMEM; ++ ++ smp_wmb(); /* See comment in __pte_alloc */ ++ ++ spin_lock(&init_mm.page_table_lock); ++ if (likely(pmd_none(*pmd))) { /* Has another populated it ? */ ++ pmd_populate_kernel(&init_mm, pmd, new); ++ new = NULL; ++ } ++ spin_unlock(&init_mm.page_table_lock); ++ if (new) ++ pte_free_kernel(&init_mm, new); ++ return 0; ++} ++ ++static inline void init_rss_vec(int *rss) ++{ ++ memset(rss, 0, sizeof(int) * NR_MM_COUNTERS); ++} ++ ++static inline void add_mm_rss_vec(struct mm_struct *mm, int *rss) ++{ ++ int i; ++ ++ if (current->mm == mm) ++ sync_mm_rss(mm); ++ for (i = 0; i < NR_MM_COUNTERS; i++) ++ if (rss[i]) ++ add_mm_counter(mm, i, rss[i]); ++} ++ ++/* ++ * This function is called to print an error when a bad pte ++ * is found. For example, we might have a PFN-mapped pte in ++ * a region that doesn't allow it. ++ * ++ * The calling function must still handle the error. ++ */ ++static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr, ++ pte_t pte, struct page *page) ++{ ++ pgd_t *pgd = pgd_offset(vma->vm_mm, addr); ++ p4d_t *p4d = p4d_offset(pgd, addr); ++ pud_t *pud = pud_offset(p4d, addr); ++ pmd_t *pmd = pmd_offset(pud, addr); ++ struct address_space *mapping; ++ pgoff_t index; ++ static unsigned long resume; ++ static unsigned long nr_shown; ++ static unsigned long nr_unshown; ++ ++ /* ++ * Allow a burst of 60 reports, then keep quiet for that minute; ++ * or allow a steady drip of one report per second. ++ */ ++ if (nr_shown == 60) { ++ if (time_before(jiffies, resume)) { ++ nr_unshown++; ++ return; ++ } ++ if (nr_unshown) { ++ pr_alert("BUG: Bad page map: %lu messages suppressed\n", ++ nr_unshown); ++ nr_unshown = 0; ++ } ++ nr_shown = 0; ++ } ++ if (nr_shown++ == 0) ++ resume = jiffies + 60 * HZ; ++ ++ mapping = vma->vm_file ? vma->vm_file->f_mapping : NULL; ++ index = linear_page_index(vma, addr); ++ ++ pr_alert("BUG: Bad page map in process %s pte:%08llx pmd:%08llx\n", ++ current->comm, ++ (long long)pte_val(pte), (long long)pmd_val(*pmd)); ++ if (page) ++ dump_page(page, "bad pte"); ++ pr_alert("addr:%p vm_flags:%08lx anon_vma:%p mapping:%p index:%lx\n", ++ (void *)addr, vma->vm_flags, vma->anon_vma, mapping, index); ++ pr_alert("file:%pD fault:%pf mmap:%pf readpage:%pf\n", ++ vma->vm_file, ++ vma->vm_ops ? vma->vm_ops->fault : NULL, ++ vma->vm_file ? vma->vm_file->f_op->mmap : NULL, ++ mapping ? mapping->a_ops->readpage : NULL); ++ dump_stack(); ++ add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); ++} ++ ++/* ++ * vm_normal_page -- This function gets the "struct page" associated with a pte. ++ * ++ * "Special" mappings do not wish to be associated with a "struct page" (either ++ * it doesn't exist, or it exists but they don't want to touch it). In this ++ * case, NULL is returned here. "Normal" mappings do have a struct page. ++ * ++ * There are 2 broad cases. Firstly, an architecture may define a pte_special() ++ * pte bit, in which case this function is trivial. Secondly, an architecture ++ * may not have a spare pte bit, which requires a more complicated scheme, ++ * described below. ++ * ++ * A raw VM_PFNMAP mapping (ie. one that is not COWed) is always considered a ++ * special mapping (even if there are underlying and valid "struct pages"). ++ * COWed pages of a VM_PFNMAP are always normal. ++ * ++ * The way we recognize COWed pages within VM_PFNMAP mappings is through the ++ * rules set up by "remap_pfn_range()": the vma will have the VM_PFNMAP bit ++ * set, and the vm_pgoff will point to the first PFN mapped: thus every special ++ * mapping will always honor the rule ++ * ++ * pfn_of_page == vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT) ++ * ++ * And for normal mappings this is false. ++ * ++ * This restricts such mappings to be a linear translation from virtual address ++ * to pfn. To get around this restriction, we allow arbitrary mappings so long ++ * as the vma is not a COW mapping; in that case, we know that all ptes are ++ * special (because none can have been COWed). ++ * ++ * ++ * In order to support COW of arbitrary special mappings, we have VM_MIXEDMAP. ++ * ++ * VM_MIXEDMAP mappings can likewise contain memory with or without "struct ++ * page" backing, however the difference is that _all_ pages with a struct ++ * page (that is, those where pfn_valid is true) are refcounted and considered ++ * normal pages by the VM. The disadvantage is that pages are refcounted ++ * (which can be slower and simply not an option for some PFNMAP users). The ++ * advantage is that we don't have to follow the strict linearity rule of ++ * PFNMAP mappings in order to support COWable mappings. ++ * ++ */ ++struct page *_vm_normal_page(struct vm_area_struct *vma, unsigned long addr, ++ pte_t pte, bool with_public_device) ++{ ++ unsigned long pfn = pte_pfn(pte); ++ ++ if (IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL)) { ++ if (likely(!pte_special(pte))) ++ goto check_pfn; ++ if (vma->vm_ops && vma->vm_ops->find_special_page) ++ return vma->vm_ops->find_special_page(vma, addr); ++ if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP)) ++ return NULL; ++ if (is_zero_pfn(pfn)) ++ return NULL; ++ ++ /* ++ * Device public pages are special pages (they are ZONE_DEVICE ++ * pages but different from persistent memory). They behave ++ * allmost like normal pages. The difference is that they are ++ * not on the lru and thus should never be involve with any- ++ * thing that involve lru manipulation (mlock, numa balancing, ++ * ...). ++ * ++ * This is why we still want to return NULL for such page from ++ * vm_normal_page() so that we do not have to special case all ++ * call site of vm_normal_page(). ++ */ ++ if (likely(pfn <= highest_memmap_pfn)) { ++ struct page *page = pfn_to_page(pfn); ++ ++ if (is_device_public_page(page)) { ++ if (with_public_device) ++ return page; ++ return NULL; ++ } ++ } ++ ++ if (pte_devmap(pte)) ++ return NULL; ++ ++ print_bad_pte(vma, addr, pte, NULL); ++ return NULL; ++ } ++ ++ /* !CONFIG_ARCH_HAS_PTE_SPECIAL case follows: */ ++ ++ if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) { ++ if (vma->vm_flags & VM_MIXEDMAP) { ++ if (!pfn_valid(pfn)) ++ return NULL; ++ goto out; ++ } else { ++ unsigned long off; ++ off = (addr - vma->vm_start) >> PAGE_SHIFT; ++ if (pfn == vma->vm_pgoff + off) ++ return NULL; ++ if (!is_cow_mapping(vma->vm_flags)) ++ return NULL; ++ } ++ } ++ ++ if (is_zero_pfn(pfn)) ++ return NULL; ++ ++check_pfn: ++ if (unlikely(pfn > highest_memmap_pfn)) { ++ print_bad_pte(vma, addr, pte, NULL); ++ return NULL; ++ } ++ ++ /* ++ * NOTE! We still have PageReserved() pages in the page tables. ++ * eg. VDSO mappings can cause them to exist. ++ */ ++out: ++ return pfn_to_page(pfn); ++} ++ ++#ifdef CONFIG_TRANSPARENT_HUGEPAGE ++struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr, ++ pmd_t pmd) ++{ ++ unsigned long pfn = pmd_pfn(pmd); ++ ++ /* ++ * There is no pmd_special() but there may be special pmds, e.g. ++ * in a direct-access (dax) mapping, so let's just replicate the ++ * !CONFIG_ARCH_HAS_PTE_SPECIAL case from vm_normal_page() here. ++ */ ++ if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) { ++ if (vma->vm_flags & VM_MIXEDMAP) { ++ if (!pfn_valid(pfn)) ++ return NULL; ++ goto out; ++ } else { ++ unsigned long off; ++ off = (addr - vma->vm_start) >> PAGE_SHIFT; ++ if (pfn == vma->vm_pgoff + off) ++ return NULL; ++ if (!is_cow_mapping(vma->vm_flags)) ++ return NULL; ++ } ++ } ++ ++ if (pmd_devmap(pmd)) ++ return NULL; ++ if (is_zero_pfn(pfn)) ++ return NULL; ++ if (unlikely(pfn > highest_memmap_pfn)) ++ return NULL; ++ ++ /* ++ * NOTE! We still have PageReserved() pages in the page tables. ++ * eg. VDSO mappings can cause them to exist. ++ */ ++out: ++ return pfn_to_page(pfn); ++} ++#endif ++ ++/* ++ * copy one vm_area from one task to the other. Assumes the page tables ++ * already present in the new task to be cleared in the whole range ++ * covered by this vma. ++ */ ++ ++static inline unsigned long ++copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, ++ pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma, ++ unsigned long addr, int *rss) ++{ ++ unsigned long vm_flags = vma->vm_flags; ++ pte_t pte = *src_pte; ++ struct page *page; ++ ++ /* pte contains position in swap or file, so copy. */ ++ if (unlikely(!pte_present(pte))) { ++ swp_entry_t entry = pte_to_swp_entry(pte); ++ ++ if (likely(!non_swap_entry(entry))) { ++ if (swap_duplicate(entry) < 0) ++ return entry.val; ++ ++ /* make sure dst_mm is on swapoff's mmlist. */ ++ if (unlikely(list_empty(&dst_mm->mmlist))) { ++ spin_lock(&mmlist_lock); ++ if (list_empty(&dst_mm->mmlist)) ++ list_add(&dst_mm->mmlist, ++ &src_mm->mmlist); ++ spin_unlock(&mmlist_lock); ++ } ++ rss[MM_SWAPENTS]++; ++ } else if (is_migration_entry(entry)) { ++ page = migration_entry_to_page(entry); ++ ++ rss[mm_counter(page)]++; ++ ++ if (is_write_migration_entry(entry) && ++ is_cow_mapping(vm_flags)) { ++ /* ++ * COW mappings require pages in both ++ * parent and child to be set to read. ++ */ ++ make_migration_entry_read(&entry); ++ pte = swp_entry_to_pte(entry); ++ if (pte_swp_soft_dirty(*src_pte)) ++ pte = pte_swp_mksoft_dirty(pte); ++ set_pte_at(src_mm, addr, src_pte, pte); ++ } ++ } else if (is_device_private_entry(entry)) { ++ page = device_private_entry_to_page(entry); ++ ++ /* ++ * Update rss count even for unaddressable pages, as ++ * they should treated just like normal pages in this ++ * respect. ++ * ++ * We will likely want to have some new rss counters ++ * for unaddressable pages, at some point. But for now ++ * keep things as they are. ++ */ ++ get_page(page); ++ rss[mm_counter(page)]++; ++ page_dup_rmap(page, false); ++ ++ /* ++ * We do not preserve soft-dirty information, because so ++ * far, checkpoint/restore is the only feature that ++ * requires that. And checkpoint/restore does not work ++ * when a device driver is involved (you cannot easily ++ * save and restore device driver state). ++ */ ++ if (is_write_device_private_entry(entry) && ++ is_cow_mapping(vm_flags)) { ++ make_device_private_entry_read(&entry); ++ pte = swp_entry_to_pte(entry); ++ set_pte_at(src_mm, addr, src_pte, pte); ++ } ++ } ++ goto out_set_pte; ++ } ++ ++ /* ++ * If it's a COW mapping, write protect it both ++ * in the parent and the child ++ */ ++ if (is_cow_mapping(vm_flags) && pte_write(pte)) { ++ ptep_set_wrprotect(src_mm, addr, src_pte); ++ pte = pte_wrprotect(pte); ++ } ++ ++ /* ++ * If it's a shared mapping, mark it clean in ++ * the child ++ */ ++ if (vm_flags & VM_SHARED) ++ pte = pte_mkclean(pte); ++ pte = pte_mkold(pte); ++ ++ page = vm_normal_page(vma, addr, pte); ++ if (page) { ++ get_page(page); ++ page_dup_rmap(page, false); ++ rss[mm_counter(page)]++; ++ } else if (pte_devmap(pte)) { ++ page = pte_page(pte); ++ ++ /* ++ * Cache coherent device memory behave like regular page and ++ * not like persistent memory page. For more informations see ++ * MEMORY_DEVICE_CACHE_COHERENT in memory_hotplug.h ++ */ ++ if (is_device_public_page(page)) { ++ get_page(page); ++ page_dup_rmap(page, false); ++ rss[mm_counter(page)]++; ++ } ++ } ++ ++out_set_pte: ++ set_pte_at(dst_mm, addr, dst_pte, pte); ++ return 0; ++} ++ ++static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, ++ pmd_t *dst_pmd, pmd_t *src_pmd, struct vm_area_struct *vma, ++ unsigned long addr, unsigned long end) ++{ ++ pte_t *orig_src_pte, *orig_dst_pte; ++ pte_t *src_pte, *dst_pte; ++ spinlock_t *src_ptl, *dst_ptl; ++ int progress = 0; ++ int rss[NR_MM_COUNTERS]; ++ swp_entry_t entry = (swp_entry_t){0}; ++ ++again: ++ init_rss_vec(rss); ++ ++ dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl); ++ if (!dst_pte) ++ return -ENOMEM; ++ src_pte = pte_offset_map(src_pmd, addr); ++ src_ptl = pte_lockptr(src_mm, src_pmd); ++ spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); ++ orig_src_pte = src_pte; ++ orig_dst_pte = dst_pte; ++ arch_enter_lazy_mmu_mode(); ++ ++ do { ++ /* ++ * We are holding two locks at this point - either of them ++ * could generate latencies in another task on another CPU. ++ */ ++ if (progress >= 32) { ++ progress = 0; ++ if (need_resched() || ++ spin_needbreak(src_ptl) || spin_needbreak(dst_ptl)) ++ break; ++ } ++ if (pte_none(*src_pte)) { ++ progress++; ++ continue; ++ } ++ entry.val = copy_one_pte(dst_mm, src_mm, dst_pte, src_pte, ++ vma, addr, rss); ++ if (entry.val) ++ break; ++ progress += 8; ++ } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end); ++ ++ arch_leave_lazy_mmu_mode(); ++ spin_unlock(src_ptl); ++ pte_unmap(orig_src_pte); ++ add_mm_rss_vec(dst_mm, rss); ++ pte_unmap_unlock(orig_dst_pte, dst_ptl); ++ cond_resched(); ++ ++ if (entry.val) { ++ if (add_swap_count_continuation(entry, GFP_KERNEL) < 0) ++ return -ENOMEM; ++ progress = 0; ++ } ++ if (addr != end) ++ goto again; ++ return 0; ++} ++ ++static inline int copy_pmd_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, ++ pud_t *dst_pud, pud_t *src_pud, struct vm_area_struct *vma, ++ unsigned long addr, unsigned long end) ++{ ++ pmd_t *src_pmd, *dst_pmd; ++ unsigned long next; ++ ++ dst_pmd = pmd_alloc(dst_mm, dst_pud, addr); ++ if (!dst_pmd) ++ return -ENOMEM; ++ src_pmd = pmd_offset(src_pud, addr); ++ do { ++ next = pmd_addr_end(addr, end); ++ if (is_swap_pmd(*src_pmd) || pmd_trans_huge(*src_pmd) ++ || pmd_devmap(*src_pmd)) { ++ int err; ++ VM_BUG_ON_VMA(next-addr != HPAGE_PMD_SIZE, vma); ++ err = copy_huge_pmd(dst_mm, src_mm, ++ dst_pmd, src_pmd, addr, vma); ++ if (err == -ENOMEM) ++ return -ENOMEM; ++ if (!err) ++ continue; ++ /* fall through */ ++ } ++ if (pmd_none_or_clear_bad(src_pmd)) ++ continue; ++ if (copy_pte_range(dst_mm, src_mm, dst_pmd, src_pmd, ++ vma, addr, next)) ++ return -ENOMEM; ++ } while (dst_pmd++, src_pmd++, addr = next, addr != end); ++ return 0; ++} ++ ++static inline int copy_pud_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, ++ p4d_t *dst_p4d, p4d_t *src_p4d, struct vm_area_struct *vma, ++ unsigned long addr, unsigned long end) ++{ ++ pud_t *src_pud, *dst_pud; ++ unsigned long next; ++ ++ dst_pud = pud_alloc(dst_mm, dst_p4d, addr); ++ if (!dst_pud) ++ return -ENOMEM; ++ src_pud = pud_offset(src_p4d, addr); ++ do { ++ next = pud_addr_end(addr, end); ++ if (pud_trans_huge(*src_pud) || pud_devmap(*src_pud)) { ++ int err; ++ ++ VM_BUG_ON_VMA(next-addr != HPAGE_PUD_SIZE, vma); ++ err = copy_huge_pud(dst_mm, src_mm, ++ dst_pud, src_pud, addr, vma); ++ if (err == -ENOMEM) ++ return -ENOMEM; ++ if (!err) ++ continue; ++ /* fall through */ ++ } ++ if (pud_none_or_clear_bad(src_pud)) ++ continue; ++ if (copy_pmd_range(dst_mm, src_mm, dst_pud, src_pud, ++ vma, addr, next)) ++ return -ENOMEM; ++ } while (dst_pud++, src_pud++, addr = next, addr != end); ++ return 0; ++} ++ ++static inline int copy_p4d_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, ++ pgd_t *dst_pgd, pgd_t *src_pgd, struct vm_area_struct *vma, ++ unsigned long addr, unsigned long end) ++{ ++ p4d_t *src_p4d, *dst_p4d; ++ unsigned long next; ++ ++ dst_p4d = p4d_alloc(dst_mm, dst_pgd, addr); ++ if (!dst_p4d) ++ return -ENOMEM; ++ src_p4d = p4d_offset(src_pgd, addr); ++ do { ++ next = p4d_addr_end(addr, end); ++ if (p4d_none_or_clear_bad(src_p4d)) ++ continue; ++ if (copy_pud_range(dst_mm, src_mm, dst_p4d, src_p4d, ++ vma, addr, next)) ++ return -ENOMEM; ++ } while (dst_p4d++, src_p4d++, addr = next, addr != end); ++ return 0; ++} ++ ++int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, ++ struct vm_area_struct *vma) ++{ ++ pgd_t *src_pgd, *dst_pgd; ++ unsigned long next; ++ unsigned long addr = vma->vm_start; ++ unsigned long end = vma->vm_end; ++ unsigned long mmun_start; /* For mmu_notifiers */ ++ unsigned long mmun_end; /* For mmu_notifiers */ ++ bool is_cow; ++ int ret; ++ ++ /* ++ * Don't copy ptes where a page fault will fill them correctly. ++ * Fork becomes much lighter when there are big shared or private ++ * readonly mappings. The tradeoff is that copy_page_range is more ++ * efficient than faulting. ++ */ ++ if (!(vma->vm_flags & (VM_HUGETLB | VM_PFNMAP | VM_MIXEDMAP)) && ++ !vma->anon_vma) ++ return 0; ++ ++ if (is_vm_hugetlb_page(vma)) ++ return copy_hugetlb_page_range(dst_mm, src_mm, vma); ++ ++ if (unlikely(vma->vm_flags & VM_PFNMAP)) { ++ /* ++ * We do not free on error cases below as remove_vma ++ * gets called on error from higher level routine ++ */ ++ ret = track_pfn_copy(vma); ++ if (ret) ++ return ret; ++ } ++ ++ /* ++ * We need to invalidate the secondary MMU mappings only when ++ * there could be a permission downgrade on the ptes of the ++ * parent mm. And a permission downgrade will only happen if ++ * is_cow_mapping() returns true. ++ */ ++ is_cow = is_cow_mapping(vma->vm_flags); ++ mmun_start = addr; ++ mmun_end = end; ++ if (is_cow) ++ mmu_notifier_invalidate_range_start(src_mm, mmun_start, ++ mmun_end); ++ ++ ret = 0; ++ dst_pgd = pgd_offset(dst_mm, addr); ++ src_pgd = pgd_offset(src_mm, addr); ++ do { ++ next = pgd_addr_end(addr, end); ++ if (pgd_none_or_clear_bad(src_pgd)) ++ continue; ++ if (unlikely(copy_p4d_range(dst_mm, src_mm, dst_pgd, src_pgd, ++ vma, addr, next))) { ++ ret = -ENOMEM; ++ break; ++ } ++ } while (dst_pgd++, src_pgd++, addr = next, addr != end); ++ ++ if (is_cow) ++ mmu_notifier_invalidate_range_end(src_mm, mmun_start, mmun_end); ++ return ret; ++} ++ ++static unsigned long zap_pte_range(struct mmu_gather *tlb, ++ struct vm_area_struct *vma, pmd_t *pmd, ++ unsigned long addr, unsigned long end, ++ struct zap_details *details) ++{ ++ struct mm_struct *mm = tlb->mm; ++ int force_flush = 0; ++ int rss[NR_MM_COUNTERS]; ++ spinlock_t *ptl; ++ pte_t *start_pte; ++ pte_t *pte; ++ swp_entry_t entry; ++ ++ tlb_remove_check_page_size_change(tlb, PAGE_SIZE); ++again: ++ init_rss_vec(rss); ++ start_pte = pte_offset_map_lock(mm, pmd, addr, &ptl); ++ pte = start_pte; ++ flush_tlb_batched_pending(mm); ++ arch_enter_lazy_mmu_mode(); ++ do { ++ pte_t ptent = *pte; ++ if (pte_none(ptent)) ++ continue; ++ ++ if (pte_present(ptent)) { ++ struct page *page; ++ ++ page = _vm_normal_page(vma, addr, ptent, true); ++ if (unlikely(details) && page) { ++ /* ++ * unmap_shared_mapping_pages() wants to ++ * invalidate cache without truncating: ++ * unmap shared but keep private pages. ++ */ ++ if (details->check_mapping && ++ details->check_mapping != page_rmapping(page)) ++ continue; ++ } ++ ptent = ptep_get_and_clear_full(mm, addr, pte, ++ tlb->fullmm); ++ tlb_remove_tlb_entry(tlb, pte, addr); ++ if (unlikely(!page)) ++ continue; ++ ++ if (!PageAnon(page)) { ++ if (pte_dirty(ptent)) { ++ force_flush = 1; ++ set_page_dirty(page); ++ } ++ if (pte_young(ptent) && ++ likely(!(vma->vm_flags & VM_SEQ_READ))) ++ mark_page_accessed(page); ++ } ++ rss[mm_counter(page)]--; ++ page_remove_rmap(page, false); ++ if (unlikely(page_mapcount(page) < 0)) ++ print_bad_pte(vma, addr, ptent, page); ++ if (unlikely(__tlb_remove_page(tlb, page))) { ++ force_flush = 1; ++ addr += PAGE_SIZE; ++ break; ++ } ++ continue; ++ } ++ ++ entry = pte_to_swp_entry(ptent); ++ if (non_swap_entry(entry) && is_device_private_entry(entry)) { ++ struct page *page = device_private_entry_to_page(entry); ++ ++ if (unlikely(details && details->check_mapping)) { ++ /* ++ * unmap_shared_mapping_pages() wants to ++ * invalidate cache without truncating: ++ * unmap shared but keep private pages. ++ */ ++ if (details->check_mapping != ++ page_rmapping(page)) ++ continue; ++ } ++ ++ pte_clear_not_present_full(mm, addr, pte, tlb->fullmm); ++ rss[mm_counter(page)]--; ++ page_remove_rmap(page, false); ++ put_page(page); ++ continue; ++ } ++ ++ /* If details->check_mapping, we leave swap entries. */ ++ if (unlikely(details)) ++ continue; ++ ++ entry = pte_to_swp_entry(ptent); ++ if (!non_swap_entry(entry)) ++ rss[MM_SWAPENTS]--; ++ else if (is_migration_entry(entry)) { ++ struct page *page; ++ ++ page = migration_entry_to_page(entry); ++ rss[mm_counter(page)]--; ++ } ++ if (unlikely(!free_swap_and_cache(entry))) ++ print_bad_pte(vma, addr, ptent, NULL); ++ pte_clear_not_present_full(mm, addr, pte, tlb->fullmm); ++ } while (pte++, addr += PAGE_SIZE, addr != end); ++ ++ add_mm_rss_vec(mm, rss); ++ arch_leave_lazy_mmu_mode(); ++ ++ /* Do the actual TLB flush before dropping ptl */ ++ if (force_flush) ++ tlb_flush_mmu_tlbonly(tlb); ++ pte_unmap_unlock(start_pte, ptl); ++ ++ /* ++ * If we forced a TLB flush (either due to running out of ++ * batch buffers or because we needed to flush dirty TLB ++ * entries before releasing the ptl), free the batched ++ * memory too. Restart if we didn't do everything. ++ */ ++ if (force_flush) { ++ force_flush = 0; ++ tlb_flush_mmu_free(tlb); ++ if (addr != end) ++ goto again; ++ } ++ ++ return addr; ++} ++ ++static inline unsigned long zap_pmd_range(struct mmu_gather *tlb, ++ struct vm_area_struct *vma, pud_t *pud, ++ unsigned long addr, unsigned long end, ++ struct zap_details *details) ++{ ++ pmd_t *pmd; ++ unsigned long next; ++ ++ pmd = pmd_offset(pud, addr); ++ do { ++ next = pmd_addr_end(addr, end); ++ if (is_swap_pmd(*pmd) || pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) { ++ if (next - addr != HPAGE_PMD_SIZE) ++ __split_huge_pmd(vma, pmd, addr, false, NULL); ++ else if (zap_huge_pmd(tlb, vma, pmd, addr)) ++ goto next; ++ /* fall through */ ++ } ++ /* ++ * Here there can be other concurrent MADV_DONTNEED or ++ * trans huge page faults running, and if the pmd is ++ * none or trans huge it can change under us. This is ++ * because MADV_DONTNEED holds the mmap_sem in read ++ * mode. ++ */ ++ if (pmd_none_or_trans_huge_or_clear_bad(pmd)) ++ goto next; ++ next = zap_pte_range(tlb, vma, pmd, addr, next, details); ++next: ++ cond_resched(); ++ } while (pmd++, addr = next, addr != end); ++ ++ return addr; ++} ++ ++static inline unsigned long zap_pud_range(struct mmu_gather *tlb, ++ struct vm_area_struct *vma, p4d_t *p4d, ++ unsigned long addr, unsigned long end, ++ struct zap_details *details) ++{ ++ pud_t *pud; ++ unsigned long next; ++ ++ pud = pud_offset(p4d, addr); ++ do { ++ next = pud_addr_end(addr, end); ++ if (pud_trans_huge(*pud) || pud_devmap(*pud)) { ++ if (next - addr != HPAGE_PUD_SIZE) { ++ VM_BUG_ON_VMA(!rwsem_is_locked(&tlb->mm->mmap_sem), vma); ++ split_huge_pud(vma, pud, addr); ++ } else if (zap_huge_pud(tlb, vma, pud, addr)) ++ goto next; ++ /* fall through */ ++ } ++ if (pud_none_or_clear_bad(pud)) ++ continue; ++ next = zap_pmd_range(tlb, vma, pud, addr, next, details); ++next: ++ cond_resched(); ++ } while (pud++, addr = next, addr != end); ++ ++ return addr; ++} ++ ++static inline unsigned long zap_p4d_range(struct mmu_gather *tlb, ++ struct vm_area_struct *vma, pgd_t *pgd, ++ unsigned long addr, unsigned long end, ++ struct zap_details *details) ++{ ++ p4d_t *p4d; ++ unsigned long next; ++ ++ p4d = p4d_offset(pgd, addr); ++ do { ++ next = p4d_addr_end(addr, end); ++ if (p4d_none_or_clear_bad(p4d)) ++ continue; ++ next = zap_pud_range(tlb, vma, p4d, addr, next, details); ++ } while (p4d++, addr = next, addr != end); ++ ++ return addr; ++} ++ ++void unmap_page_range(struct mmu_gather *tlb, ++ struct vm_area_struct *vma, ++ unsigned long addr, unsigned long end, ++ struct zap_details *details) ++{ ++ pgd_t *pgd; ++ unsigned long next; ++ ++ BUG_ON(addr >= end); ++ tlb_start_vma(tlb, vma); ++ pgd = pgd_offset(vma->vm_mm, addr); ++ do { ++ next = pgd_addr_end(addr, end); ++ if (pgd_none_or_clear_bad(pgd)) ++ continue; ++ next = zap_p4d_range(tlb, vma, pgd, addr, next, details); ++ } while (pgd++, addr = next, addr != end); ++ tlb_end_vma(tlb, vma); ++} ++ ++ ++static void unmap_single_vma(struct mmu_gather *tlb, ++ struct vm_area_struct *vma, unsigned long start_addr, ++ unsigned long end_addr, ++ struct zap_details *details) ++{ ++ unsigned long start = max(vma->vm_start, start_addr); ++ unsigned long end; ++ ++ if (start >= vma->vm_end) ++ return; ++ end = min(vma->vm_end, end_addr); ++ if (end <= vma->vm_start) ++ return; ++ ++ if (vma->vm_file) ++ uprobe_munmap(vma, start, end); ++ ++ if (unlikely(vma->vm_flags & VM_PFNMAP)) ++ untrack_pfn(vma, 0, 0); ++ ++ if (start != end) { ++ if (unlikely(is_vm_hugetlb_page(vma))) { ++ /* ++ * It is undesirable to test vma->vm_file as it ++ * should be non-null for valid hugetlb area. ++ * However, vm_file will be NULL in the error ++ * cleanup path of mmap_region. When ++ * hugetlbfs ->mmap method fails, ++ * mmap_region() nullifies vma->vm_file ++ * before calling this function to clean up. ++ * Since no pte has actually been setup, it is ++ * safe to do nothing in this case. ++ */ ++ if (vma->vm_file) { ++ i_mmap_lock_write(vma->vm_file->f_mapping); ++ __unmap_hugepage_range_final(tlb, vma, start, end, NULL); ++ i_mmap_unlock_write(vma->vm_file->f_mapping); ++ } ++ } else ++ unmap_page_range(tlb, vma, start, end, details); ++ } ++} ++ ++/** ++ * unmap_vmas - unmap a range of memory covered by a list of vma's ++ * @tlb: address of the caller's struct mmu_gather ++ * @vma: the starting vma ++ * @start_addr: virtual address at which to start unmapping ++ * @end_addr: virtual address at which to end unmapping ++ * ++ * Unmap all pages in the vma list. ++ * ++ * Only addresses between `start' and `end' will be unmapped. ++ * ++ * The VMA list must be sorted in ascending virtual address order. ++ * ++ * unmap_vmas() assumes that the caller will flush the whole unmapped address ++ * range after unmap_vmas() returns. So the only responsibility here is to ++ * ensure that any thus-far unmapped pages are flushed before unmap_vmas() ++ * drops the lock and schedules. ++ */ ++void unmap_vmas(struct mmu_gather *tlb, ++ struct vm_area_struct *vma, unsigned long start_addr, ++ unsigned long end_addr) ++{ ++ struct mm_struct *mm = vma->vm_mm; ++ ++ mmu_notifier_invalidate_range_start(mm, start_addr, end_addr); ++ for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next) ++ unmap_single_vma(tlb, vma, start_addr, end_addr, NULL); ++ mmu_notifier_invalidate_range_end(mm, start_addr, end_addr); ++} ++ ++/** ++ * zap_page_range - remove user pages in a given range ++ * @vma: vm_area_struct holding the applicable pages ++ * @start: starting address of pages to zap ++ * @size: number of bytes to zap ++ * ++ * Caller must protect the VMA list ++ */ ++void zap_page_range(struct vm_area_struct *vma, unsigned long start, ++ unsigned long size) ++{ ++ struct mm_struct *mm = vma->vm_mm; ++ struct mmu_gather tlb; ++ unsigned long end = start + size; ++ ++ lru_add_drain(); ++ tlb_gather_mmu(&tlb, mm, start, end); ++ update_hiwater_rss(mm); ++ mmu_notifier_invalidate_range_start(mm, start, end); ++ for ( ; vma && vma->vm_start < end; vma = vma->vm_next) ++ unmap_single_vma(&tlb, vma, start, end, NULL); ++ mmu_notifier_invalidate_range_end(mm, start, end); ++ tlb_finish_mmu(&tlb, start, end); ++} ++ ++/** ++ * zap_page_range_single - remove user pages in a given range ++ * @vma: vm_area_struct holding the applicable pages ++ * @address: starting address of pages to zap ++ * @size: number of bytes to zap ++ * @details: details of shared cache invalidation ++ * ++ * The range must fit into one VMA. ++ */ ++static void zap_page_range_single(struct vm_area_struct *vma, unsigned long address, ++ unsigned long size, struct zap_details *details) ++{ ++ struct mm_struct *mm = vma->vm_mm; ++ struct mmu_gather tlb; ++ unsigned long end = address + size; ++ ++ lru_add_drain(); ++ tlb_gather_mmu(&tlb, mm, address, end); ++ update_hiwater_rss(mm); ++ mmu_notifier_invalidate_range_start(mm, address, end); ++ unmap_single_vma(&tlb, vma, address, end, details); ++ mmu_notifier_invalidate_range_end(mm, address, end); ++ tlb_finish_mmu(&tlb, address, end); ++} ++ ++/** ++ * zap_vma_ptes - remove ptes mapping the vma ++ * @vma: vm_area_struct holding ptes to be zapped ++ * @address: starting address of pages to zap ++ * @size: number of bytes to zap ++ * ++ * This function only unmaps ptes assigned to VM_PFNMAP vmas. ++ * ++ * The entire address range must be fully contained within the vma. ++ * ++ */ ++void zap_vma_ptes(struct vm_area_struct *vma, unsigned long address, ++ unsigned long size) ++{ ++ if (address < vma->vm_start || address + size > vma->vm_end || ++ !(vma->vm_flags & VM_PFNMAP)) ++ return; ++ ++ zap_page_range_single(vma, address, size, NULL); ++} ++EXPORT_SYMBOL_GPL(zap_vma_ptes); ++ ++pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr, ++ spinlock_t **ptl) ++{ ++ pgd_t *pgd; ++ p4d_t *p4d; ++ pud_t *pud; ++ pmd_t *pmd; ++ ++ pgd = pgd_offset(mm, addr); ++ p4d = p4d_alloc(mm, pgd, addr); ++ if (!p4d) ++ return NULL; ++ pud = pud_alloc(mm, p4d, addr); ++ if (!pud) ++ return NULL; ++ pmd = pmd_alloc(mm, pud, addr); ++ if (!pmd) ++ return NULL; ++ ++ VM_BUG_ON(pmd_trans_huge(*pmd)); ++ return pte_alloc_map_lock(mm, pmd, addr, ptl); ++} ++ ++/* ++ * This is the old fallback for page remapping. ++ * ++ * For historical reasons, it only allows reserved pages. Only ++ * old drivers should use this, and they needed to mark their ++ * pages reserved for the old functions anyway. ++ */ ++static int insert_page(struct vm_area_struct *vma, unsigned long addr, ++ struct page *page, pgprot_t prot) ++{ ++ struct mm_struct *mm = vma->vm_mm; ++ int retval; ++ pte_t *pte; ++ spinlock_t *ptl; ++ ++ retval = -EINVAL; ++ if (PageAnon(page)) ++ goto out; ++ retval = -ENOMEM; ++ flush_dcache_page(page); ++ pte = get_locked_pte(mm, addr, &ptl); ++ if (!pte) ++ goto out; ++ retval = -EBUSY; ++ if (!pte_none(*pte)) ++ goto out_unlock; ++ ++ /* Ok, finally just insert the thing.. */ ++ get_page(page); ++ inc_mm_counter_fast(mm, mm_counter_file(page)); ++ page_add_file_rmap(page, false); ++ set_pte_at(mm, addr, pte, mk_pte(page, prot)); ++ ++ retval = 0; ++ pte_unmap_unlock(pte, ptl); ++ return retval; ++out_unlock: ++ pte_unmap_unlock(pte, ptl); ++out: ++ return retval; ++} ++ ++/** ++ * vm_insert_page - insert single page into user vma ++ * @vma: user vma to map to ++ * @addr: target user address of this page ++ * @page: source kernel page ++ * ++ * This allows drivers to insert individual pages they've allocated ++ * into a user vma. ++ * ++ * The page has to be a nice clean _individual_ kernel allocation. ++ * If you allocate a compound page, you need to have marked it as ++ * such (__GFP_COMP), or manually just split the page up yourself ++ * (see split_page()). ++ * ++ * NOTE! Traditionally this was done with "remap_pfn_range()" which ++ * took an arbitrary page protection parameter. This doesn't allow ++ * that. Your vma protection will have to be set up correctly, which ++ * means that if you want a shared writable mapping, you'd better ++ * ask for a shared writable mapping! ++ * ++ * The page does not need to be reserved. ++ * ++ * Usually this function is called from f_op->mmap() handler ++ * under mm->mmap_sem write-lock, so it can change vma->vm_flags. ++ * Caller must set VM_MIXEDMAP on vma if it wants to call this ++ * function from other places, for example from page-fault handler. ++ */ ++int vm_insert_page(struct vm_area_struct *vma, unsigned long addr, ++ struct page *page) ++{ ++ if (addr < vma->vm_start || addr >= vma->vm_end) ++ return -EFAULT; ++ if (!page_count(page)) ++ return -EINVAL; ++ if (!(vma->vm_flags & VM_MIXEDMAP)) { ++ BUG_ON(down_read_trylock(&vma->vm_mm->mmap_sem)); ++ BUG_ON(vma->vm_flags & VM_PFNMAP); ++ vma->vm_flags |= VM_MIXEDMAP; ++ } ++ return insert_page(vma, addr, page, vma->vm_page_prot); ++} ++EXPORT_SYMBOL(vm_insert_page); ++ ++static int insert_pfn(struct vm_area_struct *vma, unsigned long addr, ++ pfn_t pfn, pgprot_t prot, bool mkwrite) ++{ ++ struct mm_struct *mm = vma->vm_mm; ++ int retval; ++ pte_t *pte, entry; ++ spinlock_t *ptl; ++ ++ retval = -ENOMEM; ++ pte = get_locked_pte(mm, addr, &ptl); ++ if (!pte) ++ goto out; ++ retval = -EBUSY; ++ if (!pte_none(*pte)) { ++ if (mkwrite) { ++ /* ++ * For read faults on private mappings the PFN passed ++ * in may not match the PFN we have mapped if the ++ * mapped PFN is a writeable COW page. In the mkwrite ++ * case we are creating a writable PTE for a shared ++ * mapping and we expect the PFNs to match. If they ++ * don't match, we are likely racing with block ++ * allocation and mapping invalidation so just skip the ++ * update. ++ */ ++ if (pte_pfn(*pte) != pfn_t_to_pfn(pfn)) { ++ WARN_ON_ONCE(!is_zero_pfn(pte_pfn(*pte))); ++ goto out_unlock; ++ } ++ entry = pte_mkyoung(*pte); ++ entry = maybe_mkwrite(pte_mkdirty(entry), vma); ++ if (ptep_set_access_flags(vma, addr, pte, entry, 1)) ++ update_mmu_cache(vma, addr, pte); ++ } ++ goto out_unlock; ++ } ++ ++ /* Ok, finally just insert the thing.. */ ++ if (pfn_t_devmap(pfn)) ++ entry = pte_mkdevmap(pfn_t_pte(pfn, prot)); ++ else ++ entry = pte_mkspecial(pfn_t_pte(pfn, prot)); ++ ++ if (mkwrite) { ++ entry = pte_mkyoung(entry); ++ entry = maybe_mkwrite(pte_mkdirty(entry), vma); ++ } ++ ++ set_pte_at(mm, addr, pte, entry); ++ update_mmu_cache(vma, addr, pte); /* XXX: why not for insert_page? */ ++ ++ retval = 0; ++out_unlock: ++ pte_unmap_unlock(pte, ptl); ++out: ++ return retval; ++} ++ ++/** ++ * vm_insert_pfn - insert single pfn into user vma ++ * @vma: user vma to map to ++ * @addr: target user address of this page ++ * @pfn: source kernel pfn ++ * ++ * Similar to vm_insert_page, this allows drivers to insert individual pages ++ * they've allocated into a user vma. Same comments apply. ++ * ++ * This function should only be called from a vm_ops->fault handler, and ++ * in that case the handler should return NULL. ++ * ++ * vma cannot be a COW mapping. ++ * ++ * As this is called only for pages that do not currently exist, we ++ * do not need to flush old virtual caches or the TLB. ++ */ ++int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr, ++ unsigned long pfn) ++{ ++ return vm_insert_pfn_prot(vma, addr, pfn, vma->vm_page_prot); ++} ++EXPORT_SYMBOL(vm_insert_pfn); ++ ++/** ++ * vm_insert_pfn_prot - insert single pfn into user vma with specified pgprot ++ * @vma: user vma to map to ++ * @addr: target user address of this page ++ * @pfn: source kernel pfn ++ * @pgprot: pgprot flags for the inserted page ++ * ++ * This is exactly like vm_insert_pfn, except that it allows drivers to ++ * to override pgprot on a per-page basis. ++ * ++ * This only makes sense for IO mappings, and it makes no sense for ++ * cow mappings. In general, using multiple vmas is preferable; ++ * vm_insert_pfn_prot should only be used if using multiple VMAs is ++ * impractical. ++ */ ++int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr, ++ unsigned long pfn, pgprot_t pgprot) ++{ ++ int ret; ++ /* ++ * Technically, architectures with pte_special can avoid all these ++ * restrictions (same for remap_pfn_range). However we would like ++ * consistency in testing and feature parity among all, so we should ++ * try to keep these invariants in place for everybody. ++ */ ++ BUG_ON(!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))); ++ BUG_ON((vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) == ++ (VM_PFNMAP|VM_MIXEDMAP)); ++ BUG_ON((vma->vm_flags & VM_PFNMAP) && is_cow_mapping(vma->vm_flags)); ++ BUG_ON((vma->vm_flags & VM_MIXEDMAP) && pfn_valid(pfn)); ++ ++ if (addr < vma->vm_start || addr >= vma->vm_end) ++ return -EFAULT; ++ ++ if (!pfn_modify_allowed(pfn, pgprot)) ++ return -EACCES; ++ ++ track_pfn_insert(vma, &pgprot, __pfn_to_pfn_t(pfn, PFN_DEV)); ++ ++ ret = insert_pfn(vma, addr, __pfn_to_pfn_t(pfn, PFN_DEV), pgprot, ++ false); ++ ++ return ret; ++} ++EXPORT_SYMBOL(vm_insert_pfn_prot); ++ ++static bool vm_mixed_ok(struct vm_area_struct *vma, pfn_t pfn) ++{ ++ /* these checks mirror the abort conditions in vm_normal_page */ ++ if (vma->vm_flags & VM_MIXEDMAP) ++ return true; ++ if (pfn_t_devmap(pfn)) ++ return true; ++ if (pfn_t_special(pfn)) ++ return true; ++ if (is_zero_pfn(pfn_t_to_pfn(pfn))) ++ return true; ++ return false; ++} ++ ++static int __vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr, ++ pfn_t pfn, bool mkwrite) ++{ ++ pgprot_t pgprot = vma->vm_page_prot; ++ ++ BUG_ON(!vm_mixed_ok(vma, pfn)); ++ ++ if (addr < vma->vm_start || addr >= vma->vm_end) ++ return -EFAULT; ++ ++ track_pfn_insert(vma, &pgprot, pfn); ++ ++ if (!pfn_modify_allowed(pfn_t_to_pfn(pfn), pgprot)) ++ return -EACCES; ++ ++ /* ++ * If we don't have pte special, then we have to use the pfn_valid() ++ * based VM_MIXEDMAP scheme (see vm_normal_page), and thus we *must* ++ * refcount the page if pfn_valid is true (hence insert_page rather ++ * than insert_pfn). If a zero_pfn were inserted into a VM_MIXEDMAP ++ * without pte special, it would there be refcounted as a normal page. ++ */ ++ if (!IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL) && ++ !pfn_t_devmap(pfn) && pfn_t_valid(pfn)) { ++ struct page *page; ++ ++ /* ++ * At this point we are committed to insert_page() ++ * regardless of whether the caller specified flags that ++ * result in pfn_t_has_page() == false. ++ */ ++ page = pfn_to_page(pfn_t_to_pfn(pfn)); ++ return insert_page(vma, addr, page, pgprot); ++ } ++ return insert_pfn(vma, addr, pfn, pgprot, mkwrite); ++} ++ ++int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr, ++ pfn_t pfn) ++{ ++ return __vm_insert_mixed(vma, addr, pfn, false); ++ ++} ++EXPORT_SYMBOL(vm_insert_mixed); ++ ++/* ++ * If the insertion of PTE failed because someone else already added a ++ * different entry in the mean time, we treat that as success as we assume ++ * the same entry was actually inserted. ++ */ ++ ++vm_fault_t vmf_insert_mixed_mkwrite(struct vm_area_struct *vma, ++ unsigned long addr, pfn_t pfn) ++{ ++ int err; ++ ++ err = __vm_insert_mixed(vma, addr, pfn, true); ++ if (err == -ENOMEM) ++ return VM_FAULT_OOM; ++ if (err < 0 && err != -EBUSY) ++ return VM_FAULT_SIGBUS; ++ return VM_FAULT_NOPAGE; ++} ++EXPORT_SYMBOL(vmf_insert_mixed_mkwrite); ++ ++/* ++ * maps a range of physical memory into the requested pages. the old ++ * mappings are removed. any references to nonexistent pages results ++ * in null mappings (currently treated as "copy-on-access") ++ */ ++static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd, ++ unsigned long addr, unsigned long end, ++ unsigned long pfn, pgprot_t prot) ++{ ++ pte_t *pte; ++ spinlock_t *ptl; ++ int err = 0; ++ ++ pte = pte_alloc_map_lock(mm, pmd, addr, &ptl); ++ if (!pte) ++ return -ENOMEM; ++ arch_enter_lazy_mmu_mode(); ++ do { ++ BUG_ON(!pte_none(*pte)); ++ if (!pfn_modify_allowed(pfn, prot)) { ++ err = -EACCES; ++ break; ++ } ++ set_pte_at(mm, addr, pte, pte_mkspecial(pfn_pte(pfn, prot))); ++ pfn++; ++ } while (pte++, addr += PAGE_SIZE, addr != end); ++ arch_leave_lazy_mmu_mode(); ++ pte_unmap_unlock(pte - 1, ptl); ++ return err; ++} ++ ++static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud, ++ unsigned long addr, unsigned long end, ++ unsigned long pfn, pgprot_t prot) ++{ ++ pmd_t *pmd; ++ unsigned long next; ++ int err; ++ ++ pfn -= addr >> PAGE_SHIFT; ++ pmd = pmd_alloc(mm, pud, addr); ++ if (!pmd) ++ return -ENOMEM; ++ VM_BUG_ON(pmd_trans_huge(*pmd)); ++ do { ++ next = pmd_addr_end(addr, end); ++ err = remap_pte_range(mm, pmd, addr, next, ++ pfn + (addr >> PAGE_SHIFT), prot); ++ if (err) ++ return err; ++ } while (pmd++, addr = next, addr != end); ++ return 0; ++} ++ ++static inline int remap_pud_range(struct mm_struct *mm, p4d_t *p4d, ++ unsigned long addr, unsigned long end, ++ unsigned long pfn, pgprot_t prot) ++{ ++ pud_t *pud; ++ unsigned long next; ++ int err; ++ ++ pfn -= addr >> PAGE_SHIFT; ++ pud = pud_alloc(mm, p4d, addr); ++ if (!pud) ++ return -ENOMEM; ++ do { ++ next = pud_addr_end(addr, end); ++ err = remap_pmd_range(mm, pud, addr, next, ++ pfn + (addr >> PAGE_SHIFT), prot); ++ if (err) ++ return err; ++ } while (pud++, addr = next, addr != end); ++ return 0; ++} ++ ++static inline int remap_p4d_range(struct mm_struct *mm, pgd_t *pgd, ++ unsigned long addr, unsigned long end, ++ unsigned long pfn, pgprot_t prot) ++{ ++ p4d_t *p4d; ++ unsigned long next; ++ int err; ++ ++ pfn -= addr >> PAGE_SHIFT; ++ p4d = p4d_alloc(mm, pgd, addr); ++ if (!p4d) ++ return -ENOMEM; ++ do { ++ next = p4d_addr_end(addr, end); ++ err = remap_pud_range(mm, p4d, addr, next, ++ pfn + (addr >> PAGE_SHIFT), prot); ++ if (err) ++ return err; ++ } while (p4d++, addr = next, addr != end); ++ return 0; ++} ++ ++/** ++ * remap_pfn_range - remap kernel memory to userspace ++ * @vma: user vma to map to ++ * @addr: target user address to start at ++ * @pfn: physical address of kernel memory ++ * @size: size of map area ++ * @prot: page protection flags for this mapping ++ * ++ * Note: this is only safe if the mm semaphore is held when called. ++ */ ++int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, ++ unsigned long pfn, unsigned long size, pgprot_t prot) ++{ ++ pgd_t *pgd; ++ unsigned long next; ++ unsigned long end = addr + PAGE_ALIGN(size); ++ struct mm_struct *mm = vma->vm_mm; ++ unsigned long remap_pfn = pfn; ++ int err; ++ ++ /* ++ * Physically remapped pages are special. Tell the ++ * rest of the world about it: ++ * VM_IO tells people not to look at these pages ++ * (accesses can have side effects). ++ * VM_PFNMAP tells the core MM that the base pages are just ++ * raw PFN mappings, and do not have a "struct page" associated ++ * with them. ++ * VM_DONTEXPAND ++ * Disable vma merging and expanding with mremap(). ++ * VM_DONTDUMP ++ * Omit vma from core dump, even when VM_IO turned off. ++ * ++ * There's a horrible special case to handle copy-on-write ++ * behaviour that some programs depend on. We mark the "original" ++ * un-COW'ed pages by matching them up with "vma->vm_pgoff". ++ * See vm_normal_page() for details. ++ */ ++ if (is_cow_mapping(vma->vm_flags)) { ++ if (addr != vma->vm_start || end != vma->vm_end) ++ return -EINVAL; ++ vma->vm_pgoff = pfn; ++ } ++ ++ err = track_pfn_remap(vma, &prot, remap_pfn, addr, PAGE_ALIGN(size)); ++ if (err) ++ return -EINVAL; ++ ++ vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP; ++ ++ BUG_ON(addr >= end); ++ pfn -= addr >> PAGE_SHIFT; ++ pgd = pgd_offset(mm, addr); ++ flush_cache_range(vma, addr, end); ++ do { ++ next = pgd_addr_end(addr, end); ++ err = remap_p4d_range(mm, pgd, addr, next, ++ pfn + (addr >> PAGE_SHIFT), prot); ++ if (err) ++ break; ++ } while (pgd++, addr = next, addr != end); ++ ++ if (err) ++ untrack_pfn(vma, remap_pfn, PAGE_ALIGN(size)); ++ ++ return err; ++} ++EXPORT_SYMBOL(remap_pfn_range); ++ ++/** ++ * vm_iomap_memory - remap memory to userspace ++ * @vma: user vma to map to ++ * @start: start of area ++ * @len: size of area ++ * ++ * This is a simplified io_remap_pfn_range() for common driver use. The ++ * driver just needs to give us the physical memory range to be mapped, ++ * we'll figure out the rest from the vma information. ++ * ++ * NOTE! Some drivers might want to tweak vma->vm_page_prot first to get ++ * whatever write-combining details or similar. ++ */ ++int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len) ++{ ++ unsigned long vm_len, pfn, pages; ++ ++ /* Check that the physical memory area passed in looks valid */ ++ if (start + len < start) ++ return -EINVAL; ++ /* ++ * You *really* shouldn't map things that aren't page-aligned, ++ * but we've historically allowed it because IO memory might ++ * just have smaller alignment. ++ */ ++ len += start & ~PAGE_MASK; ++ pfn = start >> PAGE_SHIFT; ++ pages = (len + ~PAGE_MASK) >> PAGE_SHIFT; ++ if (pfn + pages < pfn) ++ return -EINVAL; ++ ++ /* We start the mapping 'vm_pgoff' pages into the area */ ++ if (vma->vm_pgoff > pages) ++ return -EINVAL; ++ pfn += vma->vm_pgoff; ++ pages -= vma->vm_pgoff; ++ ++ /* Can we fit all of the mapping? */ ++ vm_len = vma->vm_end - vma->vm_start; ++ if (vm_len >> PAGE_SHIFT > pages) ++ return -EINVAL; ++ ++ /* Ok, let it rip */ ++ return io_remap_pfn_range(vma, vma->vm_start, pfn, vm_len, vma->vm_page_prot); ++} ++EXPORT_SYMBOL(vm_iomap_memory); ++ ++static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, ++ unsigned long addr, unsigned long end, ++ pte_fn_t fn, void *data) ++{ ++ pte_t *pte; ++ int err; ++ pgtable_t token; ++ spinlock_t *uninitialized_var(ptl); ++ ++ pte = (mm == &init_mm) ? ++ pte_alloc_kernel(pmd, addr) : ++ pte_alloc_map_lock(mm, pmd, addr, &ptl); ++ if (!pte) ++ return -ENOMEM; ++ ++ BUG_ON(pmd_huge(*pmd)); ++ ++ arch_enter_lazy_mmu_mode(); ++ ++ token = pmd_pgtable(*pmd); ++ ++ do { ++ err = fn(pte++, token, addr, data); ++ if (err) ++ break; ++ } while (addr += PAGE_SIZE, addr != end); ++ ++ arch_leave_lazy_mmu_mode(); ++ ++ if (mm != &init_mm) ++ pte_unmap_unlock(pte-1, ptl); ++ return err; ++} ++ ++static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud, ++ unsigned long addr, unsigned long end, ++ pte_fn_t fn, void *data) ++{ ++ pmd_t *pmd; ++ unsigned long next; ++ int err; ++ ++ BUG_ON(pud_huge(*pud)); ++ ++ pmd = pmd_alloc(mm, pud, addr); ++ if (!pmd) ++ return -ENOMEM; ++ do { ++ next = pmd_addr_end(addr, end); ++ err = apply_to_pte_range(mm, pmd, addr, next, fn, data); ++ if (err) ++ break; ++ } while (pmd++, addr = next, addr != end); ++ return err; ++} ++ ++static int apply_to_pud_range(struct mm_struct *mm, p4d_t *p4d, ++ unsigned long addr, unsigned long end, ++ pte_fn_t fn, void *data) ++{ ++ pud_t *pud; ++ unsigned long next; ++ int err; ++ ++ pud = pud_alloc(mm, p4d, addr); ++ if (!pud) ++ return -ENOMEM; ++ do { ++ next = pud_addr_end(addr, end); ++ err = apply_to_pmd_range(mm, pud, addr, next, fn, data); ++ if (err) ++ break; ++ } while (pud++, addr = next, addr != end); ++ return err; ++} ++ ++static int apply_to_p4d_range(struct mm_struct *mm, pgd_t *pgd, ++ unsigned long addr, unsigned long end, ++ pte_fn_t fn, void *data) ++{ ++ p4d_t *p4d; ++ unsigned long next; ++ int err; ++ ++ p4d = p4d_alloc(mm, pgd, addr); ++ if (!p4d) ++ return -ENOMEM; ++ do { ++ next = p4d_addr_end(addr, end); ++ err = apply_to_pud_range(mm, p4d, addr, next, fn, data); ++ if (err) ++ break; ++ } while (p4d++, addr = next, addr != end); ++ return err; ++} ++ ++/* ++ * Scan a region of virtual memory, filling in page tables as necessary ++ * and calling a provided function on each leaf page table. ++ */ ++int apply_to_page_range(struct mm_struct *mm, unsigned long addr, ++ unsigned long size, pte_fn_t fn, void *data) ++{ ++ pgd_t *pgd; ++ unsigned long next; ++ unsigned long end = addr + size; ++ int err; ++ ++ if (WARN_ON(addr >= end)) ++ return -EINVAL; ++ ++ pgd = pgd_offset(mm, addr); ++ do { ++ next = pgd_addr_end(addr, end); ++ err = apply_to_p4d_range(mm, pgd, addr, next, fn, data); ++ if (err) ++ break; ++ } while (pgd++, addr = next, addr != end); ++ ++ return err; ++} ++EXPORT_SYMBOL_GPL(apply_to_page_range); ++ ++/* ++ * handle_pte_fault chooses page fault handler according to an entry which was ++ * read non-atomically. Before making any commitment, on those architectures ++ * or configurations (e.g. i386 with PAE) which might give a mix of unmatched ++ * parts, do_swap_page must check under lock before unmapping the pte and ++ * proceeding (but do_wp_page is only called after already making such a check; ++ * and do_anonymous_page can safely check later on). ++ */ ++static inline int pte_unmap_same(struct mm_struct *mm, pmd_t *pmd, ++ pte_t *page_table, pte_t orig_pte) ++{ ++ int same = 1; ++#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT) ++ if (sizeof(pte_t) > sizeof(unsigned long)) { ++ spinlock_t *ptl = pte_lockptr(mm, pmd); ++ spin_lock(ptl); ++ same = pte_same(*page_table, orig_pte); ++ spin_unlock(ptl); ++ } ++#endif ++ pte_unmap(page_table); ++ return same; ++} ++ ++static inline bool cow_user_page(struct page *dst, struct page *src, ++ struct vm_fault *vmf) ++{ ++ bool ret; ++ void *kaddr; ++ void __user *uaddr; ++ bool locked = false; ++ struct vm_area_struct *vma = vmf->vma; ++ struct mm_struct *mm = vma->vm_mm; ++ unsigned long addr = vmf->address; ++ ++ debug_dma_assert_idle(src); ++ ++ if (likely(src)) { ++ copy_user_highpage(dst, src, addr, vma); ++ return true; ++ } ++ ++ /* ++ * If the source page was a PFN mapping, we don't have ++ * a "struct page" for it. We do a best-effort copy by ++ * just copying from the original user address. If that ++ * fails, we just zero-fill it. Live with it. ++ */ ++ kaddr = kmap_atomic(dst); ++ uaddr = (void __user *)(addr & PAGE_MASK); ++ ++ /* ++ * On architectures with software "accessed" bits, we would ++ * take a double page fault, so mark it accessed here. ++ */ ++ if (arch_faults_on_old_pte() && !pte_young(vmf->orig_pte)) { ++ pte_t entry; ++ ++ vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl); ++ locked = true; ++ if (!likely(pte_same(*vmf->pte, vmf->orig_pte))) { ++ /* ++ * Other thread has already handled the fault ++ * and we don't need to do anything. If it's ++ * not the case, the fault will be triggered ++ * again on the same address. ++ */ ++ ret = false; ++ goto pte_unlock; ++ } ++ ++ entry = pte_mkyoung(vmf->orig_pte); ++ if (ptep_set_access_flags(vma, addr, vmf->pte, entry, 0)) ++ update_mmu_cache(vma, addr, vmf->pte); ++ } ++ ++ /* ++ * This really shouldn't fail, because the page is there ++ * in the page tables. But it might just be unreadable, ++ * in which case we just give up and fill the result with ++ * zeroes. ++ */ ++ if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE)) { ++ if (locked) ++ goto warn; ++ ++ /* Re-validate under PTL if the page is still mapped */ ++ vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl); ++ locked = true; ++ if (!likely(pte_same(*vmf->pte, vmf->orig_pte))) { ++ /* The PTE changed under us. Retry page fault. */ ++ ret = false; ++ goto pte_unlock; ++ } ++ ++ /* ++ * The same page can be mapped back since last copy attampt. ++ * Try to copy again under PTL. ++ */ ++ if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE)) { ++ /* ++ * Give a warn in case there can be some obscure ++ * use-case ++ */ ++warn: ++ WARN_ON_ONCE(1); ++ clear_page(kaddr); ++ } ++ } ++ ++ ret = true; ++ ++pte_unlock: ++ if (locked) ++ pte_unmap_unlock(vmf->pte, vmf->ptl); ++ kunmap_atomic(kaddr); ++ flush_dcache_page(dst); ++ ++ return ret; ++} ++ ++static gfp_t __get_fault_gfp_mask(struct vm_area_struct *vma) ++{ ++ struct file *vm_file = vma->vm_file; ++ ++ if (vm_file) ++ return mapping_gfp_mask(vm_file->f_mapping) | __GFP_FS | __GFP_IO; ++ ++ /* ++ * Special mappings (e.g. VDSO) do not have any file so fake ++ * a default GFP_KERNEL for them. ++ */ ++ return GFP_KERNEL; ++} ++ ++/* ++ * Notify the address space that the page is about to become writable so that ++ * it can prohibit this or wait for the page to get into an appropriate state. ++ * ++ * We do this without the lock held, so that it can sleep if it needs to. ++ */ ++static vm_fault_t do_page_mkwrite(struct vm_fault *vmf) ++{ ++ vm_fault_t ret; ++ struct page *page = vmf->page; ++ unsigned int old_flags = vmf->flags; ++ ++ vmf->flags = FAULT_FLAG_WRITE|FAULT_FLAG_MKWRITE; ++ ++ ret = vmf->vma->vm_ops->page_mkwrite(vmf); ++ /* Restore original flags so that caller is not surprised */ ++ vmf->flags = old_flags; ++ if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE))) ++ return ret; ++ if (unlikely(!(ret & VM_FAULT_LOCKED))) { ++ lock_page(page); ++ if (!page->mapping) { ++ unlock_page(page); ++ return 0; /* retry */ ++ } ++ ret |= VM_FAULT_LOCKED; ++ } else ++ VM_BUG_ON_PAGE(!PageLocked(page), page); ++ return ret; ++} ++ ++/* ++ * Handle dirtying of a page in shared file mapping on a write fault. ++ * ++ * The function expects the page to be locked and unlocks it. ++ */ ++static void fault_dirty_shared_page(struct vm_area_struct *vma, ++ struct page *page) ++{ ++ struct address_space *mapping; ++ bool dirtied; ++ bool page_mkwrite = vma->vm_ops && vma->vm_ops->page_mkwrite; ++ ++ dirtied = set_page_dirty(page); ++ VM_BUG_ON_PAGE(PageAnon(page), page); ++ /* ++ * Take a local copy of the address_space - page.mapping may be zeroed ++ * by truncate after unlock_page(). The address_space itself remains ++ * pinned by vma->vm_file's reference. We rely on unlock_page()'s ++ * release semantics to prevent the compiler from undoing this copying. ++ */ ++ mapping = page_rmapping(page); ++ unlock_page(page); ++ ++ if ((dirtied || page_mkwrite) && mapping) { ++ /* ++ * Some device drivers do not set page.mapping ++ * but still dirty their pages ++ */ ++ balance_dirty_pages_ratelimited(mapping); ++ } ++ ++ if (!page_mkwrite) ++ file_update_time(vma->vm_file); ++} ++ ++/* ++ * Handle write page faults for pages that can be reused in the current vma ++ * ++ * This can happen either due to the mapping being with the VM_SHARED flag, ++ * or due to us being the last reference standing to the page. In either ++ * case, all we need to do here is to mark the page as writable and update ++ * any related book-keeping. ++ */ ++static inline void wp_page_reuse(struct vm_fault *vmf) ++ __releases(vmf->ptl) ++{ ++ struct vm_area_struct *vma = vmf->vma; ++ struct page *page = vmf->page; ++ pte_t entry; ++ /* ++ * Clear the pages cpupid information as the existing ++ * information potentially belongs to a now completely ++ * unrelated process. ++ */ ++ if (page) ++ page_cpupid_xchg_last(page, (1 << LAST_CPUPID_SHIFT) - 1); ++ ++ flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte)); ++ entry = pte_mkyoung(vmf->orig_pte); ++ entry = maybe_mkwrite(pte_mkdirty(entry), vma); ++ if (ptep_set_access_flags(vma, vmf->address, vmf->pte, entry, 1)) ++ update_mmu_cache(vma, vmf->address, vmf->pte); ++ pte_unmap_unlock(vmf->pte, vmf->ptl); ++} ++ ++/* ++ * Handle the case of a page which we actually need to copy to a new page. ++ * ++ * Called with mmap_sem locked and the old page referenced, but ++ * without the ptl held. ++ * ++ * High level logic flow: ++ * ++ * - Allocate a page, copy the content of the old page to the new one. ++ * - Handle book keeping and accounting - cgroups, mmu-notifiers, etc. ++ * - Take the PTL. If the pte changed, bail out and release the allocated page ++ * - If the pte is still the way we remember it, update the page table and all ++ * relevant references. This includes dropping the reference the page-table ++ * held to the old page, as well as updating the rmap. ++ * - In any case, unlock the PTL and drop the reference we took to the old page. ++ */ ++static vm_fault_t wp_page_copy(struct vm_fault *vmf) ++{ ++ struct vm_area_struct *vma = vmf->vma; ++ struct mm_struct *mm = vma->vm_mm; ++ struct page *old_page = vmf->page; ++ struct page *new_page = NULL; ++ pte_t entry; ++ int page_copied = 0; ++ const unsigned long mmun_start = vmf->address & PAGE_MASK; ++ const unsigned long mmun_end = mmun_start + PAGE_SIZE; ++ struct mem_cgroup *memcg; ++ ++ if (unlikely(anon_vma_prepare(vma))) ++ goto oom; ++ ++ if (is_zero_pfn(pte_pfn(vmf->orig_pte))) { ++ new_page = alloc_zeroed_user_highpage_movable(vma, ++ vmf->address); ++ if (!new_page) ++ goto oom; ++ } else { ++ new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, ++ vmf->address); ++ if (!new_page) ++ goto oom; ++ ++ if (!cow_user_page(new_page, old_page, vmf)) { ++ /* ++ * COW failed, if the fault was solved by other, ++ * it's fine. If not, userspace would re-fault on ++ * the same address and we will handle the fault ++ * from the second attempt. ++ */ ++ put_page(new_page); ++ if (old_page) ++ put_page(old_page); ++ return 0; ++ } ++ } ++ ++ if (mem_cgroup_try_charge_delay(new_page, mm, GFP_KERNEL, &memcg, false)) ++ goto oom_free_new; ++ ++ __SetPageUptodate(new_page); ++ ++ mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); ++ ++ /* ++ * Re-check the pte - we dropped the lock ++ */ ++ vmf->pte = pte_offset_map_lock(mm, vmf->pmd, vmf->address, &vmf->ptl); ++ if (likely(pte_same(*vmf->pte, vmf->orig_pte))) { ++ if (old_page) { ++ if (!PageAnon(old_page)) { ++ dec_mm_counter_fast(mm, ++ mm_counter_file(old_page)); ++ inc_mm_counter_fast(mm, MM_ANONPAGES); ++ } ++ } else { ++ inc_mm_counter_fast(mm, MM_ANONPAGES); ++ } ++ flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte)); ++ entry = mk_pte(new_page, vma->vm_page_prot); ++ entry = maybe_mkwrite(pte_mkdirty(entry), vma); ++ /* ++ * Clear the pte entry and flush it first, before updating the ++ * pte with the new entry. This will avoid a race condition ++ * seen in the presence of one thread doing SMC and another ++ * thread doing COW. ++ */ ++ ptep_clear_flush_notify(vma, vmf->address, vmf->pte); ++ page_add_new_anon_rmap(new_page, vma, vmf->address, false); ++ mem_cgroup_commit_charge(new_page, memcg, false, false); ++ lru_cache_add_active_or_unevictable(new_page, vma); ++ /* ++ * We call the notify macro here because, when using secondary ++ * mmu page tables (such as kvm shadow page tables), we want the ++ * new page to be mapped directly into the secondary page table. ++ */ ++ set_pte_at_notify(mm, vmf->address, vmf->pte, entry); ++ update_mmu_cache(vma, vmf->address, vmf->pte); ++ if (old_page) { ++ /* ++ * Only after switching the pte to the new page may ++ * we remove the mapcount here. Otherwise another ++ * process may come and find the rmap count decremented ++ * before the pte is switched to the new page, and ++ * "reuse" the old page writing into it while our pte ++ * here still points into it and can be read by other ++ * threads. ++ * ++ * The critical issue is to order this ++ * page_remove_rmap with the ptp_clear_flush above. ++ * Those stores are ordered by (if nothing else,) ++ * the barrier present in the atomic_add_negative ++ * in page_remove_rmap. ++ * ++ * Then the TLB flush in ptep_clear_flush ensures that ++ * no process can access the old page before the ++ * decremented mapcount is visible. And the old page ++ * cannot be reused until after the decremented ++ * mapcount is visible. So transitively, TLBs to ++ * old page will be flushed before it can be reused. ++ */ ++ page_remove_rmap(old_page, false); ++ } ++ ++ /* Free the old page.. */ ++ new_page = old_page; ++ page_copied = 1; ++ } else { ++ mem_cgroup_cancel_charge(new_page, memcg, false); ++ } ++ ++ if (new_page) ++ put_page(new_page); ++ ++ pte_unmap_unlock(vmf->pte, vmf->ptl); ++ /* ++ * No need to double call mmu_notifier->invalidate_range() callback as ++ * the above ptep_clear_flush_notify() did already call it. ++ */ ++ mmu_notifier_invalidate_range_only_end(mm, mmun_start, mmun_end); ++ if (old_page) { ++ /* ++ * Don't let another task, with possibly unlocked vma, ++ * keep the mlocked page. ++ */ ++ if (page_copied && (vma->vm_flags & VM_LOCKED)) { ++ lock_page(old_page); /* LRU manipulation */ ++ if (PageMlocked(old_page)) ++ munlock_vma_page(old_page); ++ unlock_page(old_page); ++ } ++ put_page(old_page); ++ } ++ return page_copied ? VM_FAULT_WRITE : 0; ++oom_free_new: ++ put_page(new_page); ++oom: ++ if (old_page) ++ put_page(old_page); ++ return VM_FAULT_OOM; ++} ++ ++/** ++ * finish_mkwrite_fault - finish page fault for a shared mapping, making PTE ++ * writeable once the page is prepared ++ * ++ * @vmf: structure describing the fault ++ * ++ * This function handles all that is needed to finish a write page fault in a ++ * shared mapping due to PTE being read-only once the mapped page is prepared. ++ * It handles locking of PTE and modifying it. The function returns ++ * VM_FAULT_WRITE on success, 0 when PTE got changed before we acquired PTE ++ * lock. ++ * ++ * The function expects the page to be locked or other protection against ++ * concurrent faults / writeback (such as DAX radix tree locks). ++ */ ++vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf) ++{ ++ WARN_ON_ONCE(!(vmf->vma->vm_flags & VM_SHARED)); ++ vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd, vmf->address, ++ &vmf->ptl); ++ /* ++ * We might have raced with another page fault while we released the ++ * pte_offset_map_lock. ++ */ ++ if (!pte_same(*vmf->pte, vmf->orig_pte)) { ++ pte_unmap_unlock(vmf->pte, vmf->ptl); ++ return VM_FAULT_NOPAGE; ++ } ++ wp_page_reuse(vmf); ++ return 0; ++} ++ ++/* ++ * Handle write page faults for VM_MIXEDMAP or VM_PFNMAP for a VM_SHARED ++ * mapping ++ */ ++static vm_fault_t wp_pfn_shared(struct vm_fault *vmf) ++{ ++ struct vm_area_struct *vma = vmf->vma; ++ ++ if (vma->vm_ops && vma->vm_ops->pfn_mkwrite) { ++ vm_fault_t ret; ++ ++ pte_unmap_unlock(vmf->pte, vmf->ptl); ++ vmf->flags |= FAULT_FLAG_MKWRITE; ++ ret = vma->vm_ops->pfn_mkwrite(vmf); ++ if (ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)) ++ return ret; ++ return finish_mkwrite_fault(vmf); ++ } ++ wp_page_reuse(vmf); ++ return VM_FAULT_WRITE; ++} ++ ++static vm_fault_t wp_page_shared(struct vm_fault *vmf) ++ __releases(vmf->ptl) ++{ ++ struct vm_area_struct *vma = vmf->vma; ++ ++ get_page(vmf->page); ++ ++ if (vma->vm_ops && vma->vm_ops->page_mkwrite) { ++ vm_fault_t tmp; ++ ++ pte_unmap_unlock(vmf->pte, vmf->ptl); ++ tmp = do_page_mkwrite(vmf); ++ if (unlikely(!tmp || (tmp & ++ (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) { ++ put_page(vmf->page); ++ return tmp; ++ } ++ tmp = finish_mkwrite_fault(vmf); ++ if (unlikely(tmp & (VM_FAULT_ERROR | VM_FAULT_NOPAGE))) { ++ unlock_page(vmf->page); ++ put_page(vmf->page); ++ return tmp; ++ } ++ } else { ++ wp_page_reuse(vmf); ++ lock_page(vmf->page); ++ } ++ fault_dirty_shared_page(vma, vmf->page); ++ put_page(vmf->page); ++ ++ return VM_FAULT_WRITE; ++} ++ ++/* ++ * This routine handles present pages, when users try to write ++ * to a shared page. It is done by copying the page to a new address ++ * and decrementing the shared-page counter for the old page. ++ * ++ * Note that this routine assumes that the protection checks have been ++ * done by the caller (the low-level page fault routine in most cases). ++ * Thus we can safely just mark it writable once we've done any necessary ++ * COW. ++ * ++ * We also mark the page dirty at this point even though the page will ++ * change only once the write actually happens. This avoids a few races, ++ * and potentially makes it more efficient. ++ * ++ * We enter with non-exclusive mmap_sem (to exclude vma changes, ++ * but allow concurrent faults), with pte both mapped and locked. ++ * We return with mmap_sem still held, but pte unmapped and unlocked. ++ */ ++static vm_fault_t do_wp_page(struct vm_fault *vmf) ++ __releases(vmf->ptl) ++{ ++ struct vm_area_struct *vma = vmf->vma; ++ ++ vmf->page = vm_normal_page(vma, vmf->address, vmf->orig_pte); ++ if (!vmf->page) { ++ /* ++ * VM_MIXEDMAP !pfn_valid() case, or VM_SOFTDIRTY clear on a ++ * VM_PFNMAP VMA. ++ * ++ * We should not cow pages in a shared writeable mapping. ++ * Just mark the pages writable and/or call ops->pfn_mkwrite. ++ */ ++ if ((vma->vm_flags & (VM_WRITE|VM_SHARED)) == ++ (VM_WRITE|VM_SHARED)) ++ return wp_pfn_shared(vmf); ++ ++ pte_unmap_unlock(vmf->pte, vmf->ptl); ++ return wp_page_copy(vmf); ++ } ++ ++ /* ++ * Take out anonymous pages first, anonymous shared vmas are ++ * not dirty accountable. ++ */ ++ if (PageAnon(vmf->page) && !PageKsm(vmf->page)) { ++ int total_map_swapcount; ++ if (!trylock_page(vmf->page)) { ++ get_page(vmf->page); ++ pte_unmap_unlock(vmf->pte, vmf->ptl); ++ lock_page(vmf->page); ++ vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, ++ vmf->address, &vmf->ptl); ++ if (!pte_same(*vmf->pte, vmf->orig_pte)) { ++ unlock_page(vmf->page); ++ pte_unmap_unlock(vmf->pte, vmf->ptl); ++ put_page(vmf->page); ++ return 0; ++ } ++ put_page(vmf->page); ++ } ++ if (reuse_swap_page(vmf->page, &total_map_swapcount)) { ++ if (total_map_swapcount == 1) { ++ /* ++ * The page is all ours. Move it to ++ * our anon_vma so the rmap code will ++ * not search our parent or siblings. ++ * Protected against the rmap code by ++ * the page lock. ++ */ ++ page_move_anon_rmap(vmf->page, vma); ++ } ++ unlock_page(vmf->page); ++ wp_page_reuse(vmf); ++ return VM_FAULT_WRITE; ++ } ++ unlock_page(vmf->page); ++ } else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) == ++ (VM_WRITE|VM_SHARED))) { ++ return wp_page_shared(vmf); ++ } ++ ++ /* ++ * Ok, we need to copy. Oh, well.. ++ */ ++ get_page(vmf->page); ++ ++ pte_unmap_unlock(vmf->pte, vmf->ptl); ++ return wp_page_copy(vmf); ++} ++ ++static void unmap_mapping_range_vma(struct vm_area_struct *vma, ++ unsigned long start_addr, unsigned long end_addr, ++ struct zap_details *details) ++{ ++ zap_page_range_single(vma, start_addr, end_addr - start_addr, details); ++} ++ ++static inline void unmap_mapping_range_tree(struct rb_root_cached *root, ++ struct zap_details *details) ++{ ++ struct vm_area_struct *vma; ++ pgoff_t vba, vea, zba, zea; ++ ++ vma_interval_tree_foreach(vma, root, ++ details->first_index, details->last_index) { ++ ++ vba = vma->vm_pgoff; ++ vea = vba + vma_pages(vma) - 1; ++ zba = details->first_index; ++ if (zba < vba) ++ zba = vba; ++ zea = details->last_index; ++ if (zea > vea) ++ zea = vea; ++ ++ unmap_mapping_range_vma(vma, ++ ((zba - vba) << PAGE_SHIFT) + vma->vm_start, ++ ((zea - vba + 1) << PAGE_SHIFT) + vma->vm_start, ++ details); ++ } ++} ++ ++/** ++ * unmap_mapping_pages() - Unmap pages from processes. ++ * @mapping: The address space containing pages to be unmapped. ++ * @start: Index of first page to be unmapped. ++ * @nr: Number of pages to be unmapped. 0 to unmap to end of file. ++ * @even_cows: Whether to unmap even private COWed pages. ++ * ++ * Unmap the pages in this address space from any userspace process which ++ * has them mmaped. Generally, you want to remove COWed pages as well when ++ * a file is being truncated, but not when invalidating pages from the page ++ * cache. ++ */ ++void unmap_mapping_pages(struct address_space *mapping, pgoff_t start, ++ pgoff_t nr, bool even_cows) ++{ ++ struct zap_details details = { }; ++ ++ details.check_mapping = even_cows ? NULL : mapping; ++ details.first_index = start; ++ details.last_index = start + nr - 1; ++ if (details.last_index < details.first_index) ++ details.last_index = ULONG_MAX; ++ ++ i_mmap_lock_write(mapping); ++ if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))) ++ unmap_mapping_range_tree(&mapping->i_mmap, &details); ++ i_mmap_unlock_write(mapping); ++} ++ ++/** ++ * unmap_mapping_range - unmap the portion of all mmaps in the specified ++ * address_space corresponding to the specified byte range in the underlying ++ * file. ++ * ++ * @mapping: the address space containing mmaps to be unmapped. ++ * @holebegin: byte in first page to unmap, relative to the start of ++ * the underlying file. This will be rounded down to a PAGE_SIZE ++ * boundary. Note that this is different from truncate_pagecache(), which ++ * must keep the partial page. In contrast, we must get rid of ++ * partial pages. ++ * @holelen: size of prospective hole in bytes. This will be rounded ++ * up to a PAGE_SIZE boundary. A holelen of zero truncates to the ++ * end of the file. ++ * @even_cows: 1 when truncating a file, unmap even private COWed pages; ++ * but 0 when invalidating pagecache, don't throw away private data. ++ */ ++void unmap_mapping_range(struct address_space *mapping, ++ loff_t const holebegin, loff_t const holelen, int even_cows) ++{ ++ pgoff_t hba = holebegin >> PAGE_SHIFT; ++ pgoff_t hlen = (holelen + PAGE_SIZE - 1) >> PAGE_SHIFT; ++ ++ /* Check for overflow. */ ++ if (sizeof(holelen) > sizeof(hlen)) { ++ long long holeend = ++ (holebegin + holelen + PAGE_SIZE - 1) >> PAGE_SHIFT; ++ if (holeend & ~(long long)ULONG_MAX) ++ hlen = ULONG_MAX - hba + 1; ++ } ++ ++ unmap_mapping_pages(mapping, hba, hlen, even_cows); ++} ++EXPORT_SYMBOL(unmap_mapping_range); ++ ++/* ++ * We enter with non-exclusive mmap_sem (to exclude vma changes, ++ * but allow concurrent faults), and pte mapped but not yet locked. ++ * We return with pte unmapped and unlocked. ++ * ++ * We return with the mmap_sem locked or unlocked in the same cases ++ * as does filemap_fault(). ++ */ ++vm_fault_t do_swap_page(struct vm_fault *vmf) ++{ ++ struct vm_area_struct *vma = vmf->vma; ++ struct page *page = NULL, *swapcache; ++ struct mem_cgroup *memcg; ++ swp_entry_t entry; ++ pte_t pte; ++ int locked; ++ int exclusive = 0; ++ vm_fault_t ret = 0; ++ ++ if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, vmf->orig_pte)) ++ goto out; ++ ++ entry = pte_to_swp_entry(vmf->orig_pte); ++ if (unlikely(non_swap_entry(entry))) { ++ if (is_migration_entry(entry)) { ++ migration_entry_wait(vma->vm_mm, vmf->pmd, ++ vmf->address); ++ } else if (is_device_private_entry(entry)) { ++ /* ++ * For un-addressable device memory we call the pgmap ++ * fault handler callback. The callback must migrate ++ * the page back to some CPU accessible page. ++ */ ++ ret = device_private_entry_fault(vma, vmf->address, entry, ++ vmf->flags, vmf->pmd); ++ } else if (is_hwpoison_entry(entry)) { ++ ret = VM_FAULT_HWPOISON; ++ } else { ++ print_bad_pte(vma, vmf->address, vmf->orig_pte, NULL); ++ ret = VM_FAULT_SIGBUS; ++ } ++ goto out; ++ } ++ ++ ++ delayacct_set_flag(DELAYACCT_PF_SWAPIN); ++ page = lookup_swap_cache(entry, vma, vmf->address); ++ swapcache = page; ++ ++ if (!page) { ++ struct swap_info_struct *si = swp_swap_info(entry); ++ ++ if (si->flags & SWP_SYNCHRONOUS_IO && ++ __swap_count(entry) == 1) { ++ /* skip swapcache */ ++ page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, ++ vmf->address); ++ if (page) { ++ __SetPageLocked(page); ++ __SetPageSwapBacked(page); ++ set_page_private(page, entry.val); ++ lru_cache_add_anon(page); ++ swap_readpage(page, true); ++ } ++ } else { ++ page = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE, ++ vmf); ++ swapcache = page; ++ } ++ ++ if (!page) { ++ /* ++ * Back out if somebody else faulted in this pte ++ * while we released the pte lock. ++ */ ++ vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, ++ vmf->address, &vmf->ptl); ++ if (likely(pte_same(*vmf->pte, vmf->orig_pte))) ++ ret = VM_FAULT_OOM; ++ delayacct_clear_flag(DELAYACCT_PF_SWAPIN); ++ goto unlock; ++ } ++ ++ /* Had to read the page from swap area: Major fault */ ++ ret = VM_FAULT_MAJOR; ++ count_vm_event(PGMAJFAULT); ++ count_memcg_event_mm(vma->vm_mm, PGMAJFAULT); ++ } else if (PageHWPoison(page)) { ++ /* ++ * hwpoisoned dirty swapcache pages are kept for killing ++ * owner processes (which may be unknown at hwpoison time) ++ */ ++ ret = VM_FAULT_HWPOISON; ++ delayacct_clear_flag(DELAYACCT_PF_SWAPIN); ++ goto out_release; ++ } ++ ++ locked = lock_page_or_retry(page, vma->vm_mm, vmf->flags); ++ ++ delayacct_clear_flag(DELAYACCT_PF_SWAPIN); ++ if (!locked) { ++ ret |= VM_FAULT_RETRY; ++ goto out_release; ++ } ++ ++ /* ++ * Make sure try_to_free_swap or reuse_swap_page or swapoff did not ++ * release the swapcache from under us. The page pin, and pte_same ++ * test below, are not enough to exclude that. Even if it is still ++ * swapcache, we need to check that the page's swap has not changed. ++ */ ++ if (unlikely((!PageSwapCache(page) || ++ page_private(page) != entry.val)) && swapcache) ++ goto out_page; ++ ++ page = ksm_might_need_to_copy(page, vma, vmf->address); ++ if (unlikely(!page)) { ++ ret = VM_FAULT_OOM; ++ page = swapcache; ++ goto out_page; ++ } ++ ++ if (mem_cgroup_try_charge_delay(page, vma->vm_mm, GFP_KERNEL, ++ &memcg, false)) { ++ ret = VM_FAULT_OOM; ++ goto out_page; ++ } ++ ++ /* ++ * Back out if somebody else already faulted in this pte. ++ */ ++ vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, ++ &vmf->ptl); ++ if (unlikely(!pte_same(*vmf->pte, vmf->orig_pte))) ++ goto out_nomap; ++ ++ if (unlikely(!PageUptodate(page))) { ++ ret = VM_FAULT_SIGBUS; ++ goto out_nomap; ++ } ++ ++ /* ++ * The page isn't present yet, go ahead with the fault. ++ * ++ * Be careful about the sequence of operations here. ++ * To get its accounting right, reuse_swap_page() must be called ++ * while the page is counted on swap but not yet in mapcount i.e. ++ * before page_add_anon_rmap() and swap_free(); try_to_free_swap() ++ * must be called after the swap_free(), or it will never succeed. ++ */ ++ ++ inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); ++ dec_mm_counter_fast(vma->vm_mm, MM_SWAPENTS); ++ pte = mk_pte(page, vma->vm_page_prot); ++ if ((vmf->flags & FAULT_FLAG_WRITE) && reuse_swap_page(page, NULL)) { ++ pte = maybe_mkwrite(pte_mkdirty(pte), vma); ++ vmf->flags &= ~FAULT_FLAG_WRITE; ++ ret |= VM_FAULT_WRITE; ++ exclusive = RMAP_EXCLUSIVE; ++ } ++ flush_icache_page(vma, page); ++ if (pte_swp_soft_dirty(vmf->orig_pte)) ++ pte = pte_mksoft_dirty(pte); ++ set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte); ++ arch_do_swap_page(vma->vm_mm, vma, vmf->address, pte, vmf->orig_pte); ++ vmf->orig_pte = pte; ++ ++ /* ksm created a completely new copy */ ++ if (unlikely(page != swapcache && swapcache)) { ++ page_add_new_anon_rmap(page, vma, vmf->address, false); ++ mem_cgroup_commit_charge(page, memcg, false, false); ++ lru_cache_add_active_or_unevictable(page, vma); ++ } else { ++ do_page_add_anon_rmap(page, vma, vmf->address, exclusive); ++ mem_cgroup_commit_charge(page, memcg, true, false); ++ activate_page(page); ++ } ++ ++ swap_free(entry); ++ if (mem_cgroup_swap_full(page) || ++ (vma->vm_flags & VM_LOCKED) || PageMlocked(page)) ++ try_to_free_swap(page); ++ unlock_page(page); ++ if (page != swapcache && swapcache) { ++ /* ++ * Hold the lock to avoid the swap entry to be reused ++ * until we take the PT lock for the pte_same() check ++ * (to avoid false positives from pte_same). For ++ * further safety release the lock after the swap_free ++ * so that the swap count won't change under a ++ * parallel locked swapcache. ++ */ ++ unlock_page(swapcache); ++ put_page(swapcache); ++ } ++ ++ if (vmf->flags & FAULT_FLAG_WRITE) { ++ ret |= do_wp_page(vmf); ++ if (ret & VM_FAULT_ERROR) ++ ret &= VM_FAULT_ERROR; ++ goto out; ++ } ++ ++ /* No need to invalidate - it was non-present before */ ++ update_mmu_cache(vma, vmf->address, vmf->pte); ++unlock: ++ pte_unmap_unlock(vmf->pte, vmf->ptl); ++out: ++ return ret; ++out_nomap: ++ mem_cgroup_cancel_charge(page, memcg, false); ++ pte_unmap_unlock(vmf->pte, vmf->ptl); ++out_page: ++ unlock_page(page); ++out_release: ++ put_page(page); ++ if (page != swapcache && swapcache) { ++ unlock_page(swapcache); ++ put_page(swapcache); ++ } ++ return ret; ++} ++ ++/* ++ * We enter with non-exclusive mmap_sem (to exclude vma changes, ++ * but allow concurrent faults), and pte mapped but not yet locked. ++ * We return with mmap_sem still held, but pte unmapped and unlocked. ++ */ ++static vm_fault_t do_anonymous_page(struct vm_fault *vmf) ++{ ++ struct vm_area_struct *vma = vmf->vma; ++ struct mem_cgroup *memcg; ++ struct page *page; ++ vm_fault_t ret = 0; ++ pte_t entry; ++ ++ /* File mapping without ->vm_ops ? */ ++ if (vma->vm_flags & VM_SHARED) ++ return VM_FAULT_SIGBUS; ++ ++ /* ++ * Use pte_alloc() instead of pte_alloc_map(). We can't run ++ * pte_offset_map() on pmds where a huge pmd might be created ++ * from a different thread. ++ * ++ * pte_alloc_map() is safe to use under down_write(mmap_sem) or when ++ * parallel threads are excluded by other means. ++ * ++ * Here we only have down_read(mmap_sem). ++ */ ++ if (pte_alloc(vma->vm_mm, vmf->pmd, vmf->address)) ++ return VM_FAULT_OOM; ++ ++ /* See the comment in pte_alloc_one_map() */ ++ if (unlikely(pmd_trans_unstable(vmf->pmd))) ++ return 0; ++ ++ /* Use the zero-page for reads */ ++ if (!(vmf->flags & FAULT_FLAG_WRITE) && ++ !mm_forbids_zeropage(vma->vm_mm)) { ++ entry = pte_mkspecial(pfn_pte(my_zero_pfn(vmf->address), ++ vma->vm_page_prot)); ++ vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, ++ vmf->address, &vmf->ptl); ++ if (!pte_none(*vmf->pte)) ++ goto unlock; ++ ret = check_stable_address_space(vma->vm_mm); ++ if (ret) ++ goto unlock; ++ /* Deliver the page fault to userland, check inside PT lock */ ++ if (userfaultfd_missing(vma)) { ++ pte_unmap_unlock(vmf->pte, vmf->ptl); ++ return handle_userfault(vmf, VM_UFFD_MISSING); ++ } ++ goto setpte; ++ } ++ ++ /* Allocate our own private page. */ ++ if (unlikely(anon_vma_prepare(vma))) ++ goto oom; ++ page = alloc_zeroed_user_highpage_movable(vma, vmf->address); ++ if (!page) ++ goto oom; ++ ++ if (mem_cgroup_try_charge_delay(page, vma->vm_mm, GFP_KERNEL, &memcg, ++ false)) ++ goto oom_free_page; ++ ++ /* ++ * The memory barrier inside __SetPageUptodate makes sure that ++ * preceeding stores to the page contents become visible before ++ * the set_pte_at() write. ++ */ ++ __SetPageUptodate(page); ++ ++ entry = mk_pte(page, vma->vm_page_prot); ++ if (vma->vm_flags & VM_WRITE) ++ entry = pte_mkwrite(pte_mkdirty(entry)); ++ ++ vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, ++ &vmf->ptl); ++ if (!pte_none(*vmf->pte)) ++ goto release; ++ ++ ret = check_stable_address_space(vma->vm_mm); ++ if (ret) ++ goto release; ++ ++ /* Deliver the page fault to userland, check inside PT lock */ ++ if (userfaultfd_missing(vma)) { ++ pte_unmap_unlock(vmf->pte, vmf->ptl); ++ mem_cgroup_cancel_charge(page, memcg, false); ++ put_page(page); ++ return handle_userfault(vmf, VM_UFFD_MISSING); ++ } ++ ++ inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); ++ page_add_new_anon_rmap(page, vma, vmf->address, false); ++ mem_cgroup_commit_charge(page, memcg, false, false); ++ lru_cache_add_active_or_unevictable(page, vma); ++setpte: ++ set_pte_at(vma->vm_mm, vmf->address, vmf->pte, entry); ++ ++ /* No need to invalidate - it was non-present before */ ++ update_mmu_cache(vma, vmf->address, vmf->pte); ++unlock: ++ pte_unmap_unlock(vmf->pte, vmf->ptl); ++ return ret; ++release: ++ mem_cgroup_cancel_charge(page, memcg, false); ++ put_page(page); ++ goto unlock; ++oom_free_page: ++ put_page(page); ++oom: ++ return VM_FAULT_OOM; ++} ++ ++/* ++ * The mmap_sem must have been held on entry, and may have been ++ * released depending on flags and vma->vm_ops->fault() return value. ++ * See filemap_fault() and __lock_page_retry(). ++ */ ++static vm_fault_t __do_fault(struct vm_fault *vmf) ++{ ++ struct vm_area_struct *vma = vmf->vma; ++ vm_fault_t ret; ++ ++ /* ++ * Preallocate pte before we take page_lock because this might lead to ++ * deadlocks for memcg reclaim which waits for pages under writeback: ++ * lock_page(A) ++ * SetPageWriteback(A) ++ * unlock_page(A) ++ * lock_page(B) ++ * lock_page(B) ++ * pte_alloc_pne ++ * shrink_page_list ++ * wait_on_page_writeback(A) ++ * SetPageWriteback(B) ++ * unlock_page(B) ++ * # flush A, B to clear the writeback ++ */ ++ if (pmd_none(*vmf->pmd) && !vmf->prealloc_pte) { ++ vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm, ++ vmf->address); ++ if (!vmf->prealloc_pte) ++ return VM_FAULT_OOM; ++ smp_wmb(); /* See comment in __pte_alloc() */ ++ } ++ ++ ret = vma->vm_ops->fault(vmf); ++ if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY | ++ VM_FAULT_DONE_COW))) ++ return ret; ++ ++ if (unlikely(PageHWPoison(vmf->page))) { ++ if (ret & VM_FAULT_LOCKED) ++ unlock_page(vmf->page); ++ put_page(vmf->page); ++ vmf->page = NULL; ++ return VM_FAULT_HWPOISON; ++ } ++ ++ if (unlikely(!(ret & VM_FAULT_LOCKED))) ++ lock_page(vmf->page); ++ else ++ VM_BUG_ON_PAGE(!PageLocked(vmf->page), vmf->page); ++ ++ return ret; ++} ++ ++/* ++ * The ordering of these checks is important for pmds with _PAGE_DEVMAP set. ++ * If we check pmd_trans_unstable() first we will trip the bad_pmd() check ++ * inside of pmd_none_or_trans_huge_or_clear_bad(). This will end up correctly ++ * returning 1 but not before it spams dmesg with the pmd_clear_bad() output. ++ */ ++static int pmd_devmap_trans_unstable(pmd_t *pmd) ++{ ++ return pmd_devmap(*pmd) || pmd_trans_unstable(pmd); ++} ++ ++static vm_fault_t pte_alloc_one_map(struct vm_fault *vmf) ++{ ++ struct vm_area_struct *vma = vmf->vma; ++ ++ if (!pmd_none(*vmf->pmd)) ++ goto map_pte; ++ if (vmf->prealloc_pte) { ++ vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); ++ if (unlikely(!pmd_none(*vmf->pmd))) { ++ spin_unlock(vmf->ptl); ++ goto map_pte; ++ } ++ ++ mm_inc_nr_ptes(vma->vm_mm); ++ pmd_populate(vma->vm_mm, vmf->pmd, vmf->prealloc_pte); ++ spin_unlock(vmf->ptl); ++ vmf->prealloc_pte = NULL; ++ } else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd, vmf->address))) { ++ return VM_FAULT_OOM; ++ } ++map_pte: ++ /* ++ * If a huge pmd materialized under us just retry later. Use ++ * pmd_trans_unstable() via pmd_devmap_trans_unstable() instead of ++ * pmd_trans_huge() to ensure the pmd didn't become pmd_trans_huge ++ * under us and then back to pmd_none, as a result of MADV_DONTNEED ++ * running immediately after a huge pmd fault in a different thread of ++ * this mm, in turn leading to a misleading pmd_trans_huge() retval. ++ * All we have to ensure is that it is a regular pmd that we can walk ++ * with pte_offset_map() and we can do that through an atomic read in ++ * C, which is what pmd_trans_unstable() provides. ++ */ ++ if (pmd_devmap_trans_unstable(vmf->pmd)) ++ return VM_FAULT_NOPAGE; ++ ++ /* ++ * At this point we know that our vmf->pmd points to a page of ptes ++ * and it cannot become pmd_none(), pmd_devmap() or pmd_trans_huge() ++ * for the duration of the fault. If a racing MADV_DONTNEED runs and ++ * we zap the ptes pointed to by our vmf->pmd, the vmf->ptl will still ++ * be valid and we will re-check to make sure the vmf->pte isn't ++ * pte_none() under vmf->ptl protection when we return to ++ * alloc_set_pte(). ++ */ ++ vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, ++ &vmf->ptl); ++ return 0; ++} ++ ++#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE ++ ++#define HPAGE_CACHE_INDEX_MASK (HPAGE_PMD_NR - 1) ++static inline bool transhuge_vma_suitable(struct vm_area_struct *vma, ++ unsigned long haddr) ++{ ++ if (((vma->vm_start >> PAGE_SHIFT) & HPAGE_CACHE_INDEX_MASK) != ++ (vma->vm_pgoff & HPAGE_CACHE_INDEX_MASK)) ++ return false; ++ if (haddr < vma->vm_start || haddr + HPAGE_PMD_SIZE > vma->vm_end) ++ return false; ++ return true; ++} ++ ++static void deposit_prealloc_pte(struct vm_fault *vmf) ++{ ++ struct vm_area_struct *vma = vmf->vma; ++ ++ pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, vmf->prealloc_pte); ++ /* ++ * We are going to consume the prealloc table, ++ * count that as nr_ptes. ++ */ ++ mm_inc_nr_ptes(vma->vm_mm); ++ vmf->prealloc_pte = NULL; ++} ++ ++static vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page) ++{ ++ struct vm_area_struct *vma = vmf->vma; ++ bool write = vmf->flags & FAULT_FLAG_WRITE; ++ unsigned long haddr = vmf->address & HPAGE_PMD_MASK; ++ pmd_t entry; ++ int i; ++ vm_fault_t ret; ++ ++ if (!transhuge_vma_suitable(vma, haddr)) ++ return VM_FAULT_FALLBACK; ++ ++ ret = VM_FAULT_FALLBACK; ++ page = compound_head(page); ++ ++ /* ++ * Archs like ppc64 need additonal space to store information ++ * related to pte entry. Use the preallocated table for that. ++ */ ++ if (arch_needs_pgtable_deposit() && !vmf->prealloc_pte) { ++ vmf->prealloc_pte = pte_alloc_one(vma->vm_mm, vmf->address); ++ if (!vmf->prealloc_pte) ++ return VM_FAULT_OOM; ++ smp_wmb(); /* See comment in __pte_alloc() */ ++ } ++ ++ vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); ++ if (unlikely(!pmd_none(*vmf->pmd))) ++ goto out; ++ ++ for (i = 0; i < HPAGE_PMD_NR; i++) ++ flush_icache_page(vma, page + i); ++ ++ entry = mk_huge_pmd(page, vma->vm_page_prot); ++ if (write) ++ entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); ++ ++ add_mm_counter(vma->vm_mm, mm_counter_file(page), HPAGE_PMD_NR); ++ page_add_file_rmap(page, true); ++ /* ++ * deposit and withdraw with pmd lock held ++ */ ++ if (arch_needs_pgtable_deposit()) ++ deposit_prealloc_pte(vmf); ++ ++ set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry); ++ ++ update_mmu_cache_pmd(vma, haddr, vmf->pmd); ++ ++ /* fault is handled */ ++ ret = 0; ++ count_vm_event(THP_FILE_MAPPED); ++out: ++ spin_unlock(vmf->ptl); ++ return ret; ++} ++#else ++static vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page) ++{ ++ BUILD_BUG(); ++ return 0; ++} ++#endif ++ ++/** ++ * alloc_set_pte - setup new PTE entry for given page and add reverse page ++ * mapping. If needed, the fucntion allocates page table or use pre-allocated. ++ * ++ * @vmf: fault environment ++ * @memcg: memcg to charge page (only for private mappings) ++ * @page: page to map ++ * ++ * Caller must take care of unlocking vmf->ptl, if vmf->pte is non-NULL on ++ * return. ++ * ++ * Target users are page handler itself and implementations of ++ * vm_ops->map_pages. ++ */ ++vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg, ++ struct page *page) ++{ ++ struct vm_area_struct *vma = vmf->vma; ++ bool write = vmf->flags & FAULT_FLAG_WRITE; ++ pte_t entry; ++ vm_fault_t ret; ++ ++ if (pmd_none(*vmf->pmd) && PageTransCompound(page) && ++ IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE)) { ++ /* THP on COW? */ ++ VM_BUG_ON_PAGE(memcg, page); ++ ++ ret = do_set_pmd(vmf, page); ++ if (ret != VM_FAULT_FALLBACK) ++ return ret; ++ } ++ ++ if (!vmf->pte) { ++ ret = pte_alloc_one_map(vmf); ++ if (ret) ++ return ret; ++ } ++ ++ /* Re-check under ptl */ ++ if (unlikely(!pte_none(*vmf->pte))) ++ return VM_FAULT_NOPAGE; ++ ++ flush_icache_page(vma, page); ++ entry = mk_pte(page, vma->vm_page_prot); ++ if (write) ++ entry = maybe_mkwrite(pte_mkdirty(entry), vma); ++ /* copy-on-write page */ ++ if (write && !(vma->vm_flags & VM_SHARED)) { ++ inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); ++ page_add_new_anon_rmap(page, vma, vmf->address, false); ++ mem_cgroup_commit_charge(page, memcg, false, false); ++ lru_cache_add_active_or_unevictable(page, vma); ++ } else { ++ inc_mm_counter_fast(vma->vm_mm, mm_counter_file(page)); ++ page_add_file_rmap(page, false); ++ } ++ set_pte_at(vma->vm_mm, vmf->address, vmf->pte, entry); ++ ++ /* no need to invalidate: a not-present page won't be cached */ ++ update_mmu_cache(vma, vmf->address, vmf->pte); ++ ++ return 0; ++} ++ ++ ++/** ++ * finish_fault - finish page fault once we have prepared the page to fault ++ * ++ * @vmf: structure describing the fault ++ * ++ * This function handles all that is needed to finish a page fault once the ++ * page to fault in is prepared. It handles locking of PTEs, inserts PTE for ++ * given page, adds reverse page mapping, handles memcg charges and LRU ++ * addition. The function returns 0 on success, VM_FAULT_ code in case of ++ * error. ++ * ++ * The function expects the page to be locked and on success it consumes a ++ * reference of a page being mapped (for the PTE which maps it). ++ */ ++vm_fault_t finish_fault(struct vm_fault *vmf) ++{ ++ struct page *page; ++ vm_fault_t ret = 0; ++ ++ /* Did we COW the page? */ ++ if ((vmf->flags & FAULT_FLAG_WRITE) && ++ !(vmf->vma->vm_flags & VM_SHARED)) ++ page = vmf->cow_page; ++ else ++ page = vmf->page; ++ ++ /* ++ * check even for read faults because we might have lost our CoWed ++ * page ++ */ ++ if (!(vmf->vma->vm_flags & VM_SHARED)) ++ ret = check_stable_address_space(vmf->vma->vm_mm); ++ if (!ret) ++ ret = alloc_set_pte(vmf, vmf->memcg, page); ++ if (vmf->pte) ++ pte_unmap_unlock(vmf->pte, vmf->ptl); ++ return ret; ++} ++ ++static unsigned long fault_around_bytes __read_mostly = ++ rounddown_pow_of_two(65536); ++ ++#ifdef CONFIG_DEBUG_FS ++static int fault_around_bytes_get(void *data, u64 *val) ++{ ++ *val = fault_around_bytes; ++ return 0; ++} ++ ++/* ++ * fault_around_bytes must be rounded down to the nearest page order as it's ++ * what do_fault_around() expects to see. ++ */ ++static int fault_around_bytes_set(void *data, u64 val) ++{ ++ if (val / PAGE_SIZE > PTRS_PER_PTE) ++ return -EINVAL; ++ if (val > PAGE_SIZE) ++ fault_around_bytes = rounddown_pow_of_two(val); ++ else ++ fault_around_bytes = PAGE_SIZE; /* rounddown_pow_of_two(0) is undefined */ ++ return 0; ++} ++DEFINE_DEBUGFS_ATTRIBUTE(fault_around_bytes_fops, ++ fault_around_bytes_get, fault_around_bytes_set, "%llu\n"); ++ ++static int __init fault_around_debugfs(void) ++{ ++ void *ret; ++ ++ ret = debugfs_create_file_unsafe("fault_around_bytes", 0644, NULL, NULL, ++ &fault_around_bytes_fops); ++ if (!ret) ++ pr_warn("Failed to create fault_around_bytes in debugfs"); ++ return 0; ++} ++late_initcall(fault_around_debugfs); ++#endif ++ ++/* ++ * do_fault_around() tries to map few pages around the fault address. The hope ++ * is that the pages will be needed soon and this will lower the number of ++ * faults to handle. ++ * ++ * It uses vm_ops->map_pages() to map the pages, which skips the page if it's ++ * not ready to be mapped: not up-to-date, locked, etc. ++ * ++ * This function is called with the page table lock taken. In the split ptlock ++ * case the page table lock only protects only those entries which belong to ++ * the page table corresponding to the fault address. ++ * ++ * This function doesn't cross the VMA boundaries, in order to call map_pages() ++ * only once. ++ * ++ * fault_around_bytes defines how many bytes we'll try to map. ++ * do_fault_around() expects it to be set to a power of two less than or equal ++ * to PTRS_PER_PTE. ++ * ++ * The virtual address of the area that we map is naturally aligned to ++ * fault_around_bytes rounded down to the machine page size ++ * (and therefore to page order). This way it's easier to guarantee ++ * that we don't cross page table boundaries. ++ */ ++static vm_fault_t do_fault_around(struct vm_fault *vmf) ++{ ++ unsigned long address = vmf->address, nr_pages, mask; ++ pgoff_t start_pgoff = vmf->pgoff; ++ pgoff_t end_pgoff; ++ int off; ++ vm_fault_t ret = 0; ++ ++ nr_pages = READ_ONCE(fault_around_bytes) >> PAGE_SHIFT; ++ mask = ~(nr_pages * PAGE_SIZE - 1) & PAGE_MASK; ++ ++ vmf->address = max(address & mask, vmf->vma->vm_start); ++ off = ((address - vmf->address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); ++ start_pgoff -= off; ++ ++ /* ++ * end_pgoff is either the end of the page table, the end of ++ * the vma or nr_pages from start_pgoff, depending what is nearest. ++ */ ++ end_pgoff = start_pgoff - ++ ((vmf->address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) + ++ PTRS_PER_PTE - 1; ++ end_pgoff = min3(end_pgoff, vma_pages(vmf->vma) + vmf->vma->vm_pgoff - 1, ++ start_pgoff + nr_pages - 1); ++ ++ if (pmd_none(*vmf->pmd)) { ++ vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm, ++ vmf->address); ++ if (!vmf->prealloc_pte) ++ goto out; ++ smp_wmb(); /* See comment in __pte_alloc() */ ++ } ++ ++ vmf->vma->vm_ops->map_pages(vmf, start_pgoff, end_pgoff); ++ ++ /* Huge page is mapped? Page fault is solved */ ++ if (pmd_trans_huge(*vmf->pmd)) { ++ ret = VM_FAULT_NOPAGE; ++ goto out; ++ } ++ ++ /* ->map_pages() haven't done anything useful. Cold page cache? */ ++ if (!vmf->pte) ++ goto out; ++ ++ /* check if the page fault is solved */ ++ vmf->pte -= (vmf->address >> PAGE_SHIFT) - (address >> PAGE_SHIFT); ++ if (!pte_none(*vmf->pte)) ++ ret = VM_FAULT_NOPAGE; ++ pte_unmap_unlock(vmf->pte, vmf->ptl); ++out: ++ vmf->address = address; ++ vmf->pte = NULL; ++ return ret; ++} ++ ++static vm_fault_t do_read_fault(struct vm_fault *vmf) ++{ ++ struct vm_area_struct *vma = vmf->vma; ++ vm_fault_t ret = 0; ++ ++ /* ++ * Let's call ->map_pages() first and use ->fault() as fallback ++ * if page by the offset is not ready to be mapped (cold cache or ++ * something). ++ */ ++ if (vma->vm_ops->map_pages && fault_around_bytes >> PAGE_SHIFT > 1) { ++ ret = do_fault_around(vmf); ++ if (ret) ++ return ret; ++ } ++ ++ ret = __do_fault(vmf); ++ if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) ++ return ret; ++ ++ ret |= finish_fault(vmf); ++ unlock_page(vmf->page); ++ if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) ++ put_page(vmf->page); ++ return ret; ++} ++ ++static vm_fault_t do_cow_fault(struct vm_fault *vmf) ++{ ++ struct vm_area_struct *vma = vmf->vma; ++ vm_fault_t ret; ++ ++ if (unlikely(anon_vma_prepare(vma))) ++ return VM_FAULT_OOM; ++ ++ vmf->cow_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vmf->address); ++ if (!vmf->cow_page) ++ return VM_FAULT_OOM; ++ ++ if (mem_cgroup_try_charge_delay(vmf->cow_page, vma->vm_mm, GFP_KERNEL, ++ &vmf->memcg, false)) { ++ put_page(vmf->cow_page); ++ return VM_FAULT_OOM; ++ } ++ ++ ret = __do_fault(vmf); ++ if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) ++ goto uncharge_out; ++ if (ret & VM_FAULT_DONE_COW) ++ return ret; ++ ++ copy_user_highpage(vmf->cow_page, vmf->page, vmf->address, vma); ++ __SetPageUptodate(vmf->cow_page); ++ ++ ret |= finish_fault(vmf); ++ unlock_page(vmf->page); ++ put_page(vmf->page); ++ if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) ++ goto uncharge_out; ++ return ret; ++uncharge_out: ++ mem_cgroup_cancel_charge(vmf->cow_page, vmf->memcg, false); ++ put_page(vmf->cow_page); ++ return ret; ++} ++ ++static vm_fault_t do_shared_fault(struct vm_fault *vmf) ++{ ++ struct vm_area_struct *vma = vmf->vma; ++ vm_fault_t ret, tmp; ++ ++ ret = __do_fault(vmf); ++ if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) ++ return ret; ++ ++ /* ++ * Check if the backing address space wants to know that the page is ++ * about to become writable ++ */ ++ if (vma->vm_ops->page_mkwrite) { ++ unlock_page(vmf->page); ++ tmp = do_page_mkwrite(vmf); ++ if (unlikely(!tmp || ++ (tmp & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) { ++ put_page(vmf->page); ++ return tmp; ++ } ++ } ++ ++ ret |= finish_fault(vmf); ++ if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | ++ VM_FAULT_RETRY))) { ++ unlock_page(vmf->page); ++ put_page(vmf->page); ++ return ret; ++ } ++ ++ fault_dirty_shared_page(vma, vmf->page); ++ return ret; ++} ++ ++/* ++ * We enter with non-exclusive mmap_sem (to exclude vma changes, ++ * but allow concurrent faults). ++ * The mmap_sem may have been released depending on flags and our ++ * return value. See filemap_fault() and __lock_page_or_retry(). ++ * If mmap_sem is released, vma may become invalid (for example ++ * by other thread calling munmap()). ++ */ ++static vm_fault_t do_fault(struct vm_fault *vmf) ++{ ++ struct vm_area_struct *vma = vmf->vma; ++ struct mm_struct *vm_mm = vma->vm_mm; ++ vm_fault_t ret; ++ ++ /* ++ * The VMA was not fully populated on mmap() or missing VM_DONTEXPAND ++ */ ++ if (!vma->vm_ops->fault) { ++ /* ++ * If we find a migration pmd entry or a none pmd entry, which ++ * should never happen, return SIGBUS ++ */ ++ if (unlikely(!pmd_present(*vmf->pmd))) ++ ret = VM_FAULT_SIGBUS; ++ else { ++ vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, ++ vmf->pmd, ++ vmf->address, ++ &vmf->ptl); ++ /* ++ * Make sure this is not a temporary clearing of pte ++ * by holding ptl and checking again. A R/M/W update ++ * of pte involves: take ptl, clearing the pte so that ++ * we don't have concurrent modification by hardware ++ * followed by an update. ++ */ ++ if (unlikely(pte_none(*vmf->pte))) ++ ret = VM_FAULT_SIGBUS; ++ else ++ ret = VM_FAULT_NOPAGE; ++ ++ pte_unmap_unlock(vmf->pte, vmf->ptl); ++ } ++ } else if (!(vmf->flags & FAULT_FLAG_WRITE)) ++ ret = do_read_fault(vmf); ++ else if (!(vma->vm_flags & VM_SHARED)) ++ ret = do_cow_fault(vmf); ++ else ++ ret = do_shared_fault(vmf); ++ ++ /* preallocated pagetable is unused: free it */ ++ if (vmf->prealloc_pte) { ++ pte_free(vm_mm, vmf->prealloc_pte); ++ vmf->prealloc_pte = NULL; ++ } ++ return ret; ++} ++ ++static int numa_migrate_prep(struct page *page, struct vm_area_struct *vma, ++ unsigned long addr, int page_nid, ++ int *flags) ++{ ++ get_page(page); ++ ++ count_vm_numa_event(NUMA_HINT_FAULTS); ++ if (page_nid == numa_node_id()) { ++ count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL); ++ *flags |= TNF_FAULT_LOCAL; ++ } ++ ++ return mpol_misplaced(page, vma, addr); ++} ++ ++static vm_fault_t do_numa_page(struct vm_fault *vmf) ++{ ++ struct vm_area_struct *vma = vmf->vma; ++ struct page *page = NULL; ++ int page_nid = -1; ++ int last_cpupid; ++ int target_nid; ++ bool migrated = false; ++ pte_t pte; ++ bool was_writable = pte_savedwrite(vmf->orig_pte); ++ int flags = 0; ++ ++ /* ++ * The "pte" at this point cannot be used safely without ++ * validation through pte_unmap_same(). It's of NUMA type but ++ * the pfn may be screwed if the read is non atomic. ++ */ ++ vmf->ptl = pte_lockptr(vma->vm_mm, vmf->pmd); ++ spin_lock(vmf->ptl); ++ if (unlikely(!pte_same(*vmf->pte, vmf->orig_pte))) { ++ pte_unmap_unlock(vmf->pte, vmf->ptl); ++ goto out; ++ } ++ ++ /* ++ * Make it present again, Depending on how arch implementes non ++ * accessible ptes, some can allow access by kernel mode. ++ */ ++ pte = ptep_modify_prot_start(vma->vm_mm, vmf->address, vmf->pte); ++ pte = pte_modify(pte, vma->vm_page_prot); ++ pte = pte_mkyoung(pte); ++ if (was_writable) ++ pte = pte_mkwrite(pte); ++ ptep_modify_prot_commit(vma->vm_mm, vmf->address, vmf->pte, pte); ++ update_mmu_cache(vma, vmf->address, vmf->pte); ++ ++ page = vm_normal_page(vma, vmf->address, pte); ++ if (!page) { ++ pte_unmap_unlock(vmf->pte, vmf->ptl); ++ return 0; ++ } ++ ++ /* TODO: handle PTE-mapped THP */ ++ if (PageCompound(page)) { ++ pte_unmap_unlock(vmf->pte, vmf->ptl); ++ return 0; ++ } ++ ++ /* ++ * Avoid grouping on RO pages in general. RO pages shouldn't hurt as ++ * much anyway since they can be in shared cache state. This misses ++ * the case where a mapping is writable but the process never writes ++ * to it but pte_write gets cleared during protection updates and ++ * pte_dirty has unpredictable behaviour between PTE scan updates, ++ * background writeback, dirty balancing and application behaviour. ++ */ ++ if (!pte_write(pte)) ++ flags |= TNF_NO_GROUP; ++ ++ /* ++ * Flag if the page is shared between multiple address spaces. This ++ * is later used when determining whether to group tasks together ++ */ ++ if (page_mapcount(page) > 1 && (vma->vm_flags & VM_SHARED)) ++ flags |= TNF_SHARED; ++ ++ last_cpupid = page_cpupid_last(page); ++ page_nid = page_to_nid(page); ++ target_nid = numa_migrate_prep(page, vma, vmf->address, page_nid, ++ &flags); ++ pte_unmap_unlock(vmf->pte, vmf->ptl); ++ if (target_nid == -1) { ++ put_page(page); ++ goto out; ++ } ++ ++ /* Migrate to the requested node */ ++ migrated = migrate_misplaced_page(page, vma, target_nid); ++ if (migrated) { ++ page_nid = target_nid; ++ flags |= TNF_MIGRATED; ++ } else ++ flags |= TNF_MIGRATE_FAIL; ++ ++out: ++ if (page_nid != -1) ++ task_numa_fault(last_cpupid, page_nid, 1, flags); ++ return 0; ++} ++ ++static inline vm_fault_t create_huge_pmd(struct vm_fault *vmf) ++{ ++ if (vma_is_anonymous(vmf->vma)) ++ return do_huge_pmd_anonymous_page(vmf); ++ if (vmf->vma->vm_ops->huge_fault) ++ return vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PMD); ++ return VM_FAULT_FALLBACK; ++} ++ ++/* `inline' is required to avoid gcc 4.1.2 build error */ ++static inline vm_fault_t wp_huge_pmd(struct vm_fault *vmf, pmd_t orig_pmd) ++{ ++ if (vma_is_anonymous(vmf->vma)) ++ return do_huge_pmd_wp_page(vmf, orig_pmd); ++ if (vmf->vma->vm_ops->huge_fault) ++ return vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PMD); ++ ++ /* COW handled on pte level: split pmd */ ++ VM_BUG_ON_VMA(vmf->vma->vm_flags & VM_SHARED, vmf->vma); ++ __split_huge_pmd(vmf->vma, vmf->pmd, vmf->address, false, NULL); ++ ++ return VM_FAULT_FALLBACK; ++} ++ ++static inline bool vma_is_accessible(struct vm_area_struct *vma) ++{ ++ return vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE); ++} ++ ++static vm_fault_t create_huge_pud(struct vm_fault *vmf) ++{ ++#ifdef CONFIG_TRANSPARENT_HUGEPAGE ++ /* No support for anonymous transparent PUD pages yet */ ++ if (vma_is_anonymous(vmf->vma)) ++ return VM_FAULT_FALLBACK; ++ if (vmf->vma->vm_ops->huge_fault) ++ return vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PUD); ++#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ ++ return VM_FAULT_FALLBACK; ++} ++ ++static vm_fault_t wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud) ++{ ++#ifdef CONFIG_TRANSPARENT_HUGEPAGE ++ /* No support for anonymous transparent PUD pages yet */ ++ if (vma_is_anonymous(vmf->vma)) ++ return VM_FAULT_FALLBACK; ++ if (vmf->vma->vm_ops->huge_fault) ++ return vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PUD); ++#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ ++ return VM_FAULT_FALLBACK; ++} ++ ++/* ++ * These routines also need to handle stuff like marking pages dirty ++ * and/or accessed for architectures that don't do it in hardware (most ++ * RISC architectures). The early dirtying is also good on the i386. ++ * ++ * There is also a hook called "update_mmu_cache()" that architectures ++ * with external mmu caches can use to update those (ie the Sparc or ++ * PowerPC hashed page tables that act as extended TLBs). ++ * ++ * We enter with non-exclusive mmap_sem (to exclude vma changes, but allow ++ * concurrent faults). ++ * ++ * The mmap_sem may have been released depending on flags and our return value. ++ * See filemap_fault() and __lock_page_or_retry(). ++ */ ++static vm_fault_t handle_pte_fault(struct vm_fault *vmf) ++{ ++ pte_t entry; ++ ++ if (unlikely(pmd_none(*vmf->pmd))) { ++ /* ++ * Leave __pte_alloc() until later: because vm_ops->fault may ++ * want to allocate huge page, and if we expose page table ++ * for an instant, it will be difficult to retract from ++ * concurrent faults and from rmap lookups. ++ */ ++ vmf->pte = NULL; ++ } else { ++ /* See comment in pte_alloc_one_map() */ ++ if (pmd_devmap_trans_unstable(vmf->pmd)) ++ return 0; ++ /* ++ * A regular pmd is established and it can't morph into a huge ++ * pmd from under us anymore at this point because we hold the ++ * mmap_sem read mode and khugepaged takes it in write mode. ++ * So now it's safe to run pte_offset_map(). ++ */ ++ vmf->pte = pte_offset_map(vmf->pmd, vmf->address); ++ vmf->orig_pte = *vmf->pte; ++ ++ /* ++ * some architectures can have larger ptes than wordsize, ++ * e.g.ppc44x-defconfig has CONFIG_PTE_64BIT=y and ++ * CONFIG_32BIT=y, so READ_ONCE cannot guarantee atomic ++ * accesses. The code below just needs a consistent view ++ * for the ifs and we later double check anyway with the ++ * ptl lock held. So here a barrier will do. ++ */ ++ barrier(); ++ if (pte_none(vmf->orig_pte)) { ++ pte_unmap(vmf->pte); ++ vmf->pte = NULL; ++ } ++ } ++ ++ if (!vmf->pte) { ++ if (vma_is_anonymous(vmf->vma)) ++ return do_anonymous_page(vmf); ++ else ++ return do_fault(vmf); ++ } ++ ++ if (!pte_present(vmf->orig_pte)) ++ return do_swap_page(vmf); ++ ++ if (pte_protnone(vmf->orig_pte) && vma_is_accessible(vmf->vma)) ++ return do_numa_page(vmf); ++ ++ vmf->ptl = pte_lockptr(vmf->vma->vm_mm, vmf->pmd); ++ spin_lock(vmf->ptl); ++ entry = vmf->orig_pte; ++ if (unlikely(!pte_same(*vmf->pte, entry))) ++ goto unlock; ++ if (vmf->flags & FAULT_FLAG_WRITE) { ++ if (!pte_write(entry)) ++ return do_wp_page(vmf); ++ entry = pte_mkdirty(entry); ++ } ++ entry = pte_mkyoung(entry); ++ if (ptep_set_access_flags(vmf->vma, vmf->address, vmf->pte, entry, ++ vmf->flags & FAULT_FLAG_WRITE)) { ++ update_mmu_cache(vmf->vma, vmf->address, vmf->pte); ++ } else { ++ /* ++ * This is needed only for protection faults but the arch code ++ * is not yet telling us if this is a protection fault or not. ++ * This still avoids useless tlb flushes for .text page faults ++ * with threads. ++ */ ++ if (vmf->flags & FAULT_FLAG_WRITE) ++ flush_tlb_fix_spurious_fault(vmf->vma, vmf->address); ++ } ++unlock: ++ pte_unmap_unlock(vmf->pte, vmf->ptl); ++ return 0; ++} ++ ++/* ++ * By the time we get here, we already hold the mm semaphore ++ * ++ * The mmap_sem may have been released depending on flags and our ++ * return value. See filemap_fault() and __lock_page_or_retry(). ++ */ ++static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma, ++ unsigned long address, unsigned int flags) ++{ ++ struct vm_fault vmf = { ++ .vma = vma, ++ .address = address & PAGE_MASK, ++ .flags = flags, ++ .pgoff = linear_page_index(vma, address), ++ .gfp_mask = __get_fault_gfp_mask(vma), ++ }; ++ unsigned int dirty = flags & FAULT_FLAG_WRITE; ++ struct mm_struct *mm = vma->vm_mm; ++ pgd_t *pgd; ++ p4d_t *p4d; ++ vm_fault_t ret; ++ ++ pgd = pgd_offset(mm, address); ++ p4d = p4d_alloc(mm, pgd, address); ++ if (!p4d) ++ return VM_FAULT_OOM; ++ ++ vmf.pud = pud_alloc(mm, p4d, address); ++ if (!vmf.pud) ++ return VM_FAULT_OOM; ++ if (pud_none(*vmf.pud) && __transparent_hugepage_enabled(vma)) { ++ ret = create_huge_pud(&vmf); ++ if (!(ret & VM_FAULT_FALLBACK)) ++ return ret; ++ } else { ++ pud_t orig_pud = *vmf.pud; ++ ++ barrier(); ++ if (pud_trans_huge(orig_pud) || pud_devmap(orig_pud)) { ++ ++ /* NUMA case for anonymous PUDs would go here */ ++ ++ if (dirty && !pud_write(orig_pud)) { ++ ret = wp_huge_pud(&vmf, orig_pud); ++ if (!(ret & VM_FAULT_FALLBACK)) ++ return ret; ++ } else { ++ huge_pud_set_accessed(&vmf, orig_pud); ++ return 0; ++ } ++ } ++ } ++ ++ vmf.pmd = pmd_alloc(mm, vmf.pud, address); ++ if (!vmf.pmd) ++ return VM_FAULT_OOM; ++ if (pmd_none(*vmf.pmd) && __transparent_hugepage_enabled(vma)) { ++ ret = create_huge_pmd(&vmf); ++ if (!(ret & VM_FAULT_FALLBACK)) ++ return ret; ++ } else { ++ pmd_t orig_pmd = *vmf.pmd; ++ ++ barrier(); ++ if (unlikely(is_swap_pmd(orig_pmd))) { ++ VM_BUG_ON(thp_migration_supported() && ++ !is_pmd_migration_entry(orig_pmd)); ++ if (is_pmd_migration_entry(orig_pmd)) ++ pmd_migration_entry_wait(mm, vmf.pmd); ++ return 0; ++ } ++ if (pmd_trans_huge(orig_pmd) || pmd_devmap(orig_pmd)) { ++ if (pmd_protnone(orig_pmd) && vma_is_accessible(vma)) ++ return do_huge_pmd_numa_page(&vmf, orig_pmd); ++ ++ if (dirty && !pmd_write(orig_pmd)) { ++ ret = wp_huge_pmd(&vmf, orig_pmd); ++ if (!(ret & VM_FAULT_FALLBACK)) ++ return ret; ++ } else { ++ huge_pmd_set_accessed(&vmf, orig_pmd); ++ return 0; ++ } ++ } ++ } ++ ++ return handle_pte_fault(&vmf); ++} ++ ++/* ++ * By the time we get here, we already hold the mm semaphore ++ * ++ * The mmap_sem may have been released depending on flags and our ++ * return value. See filemap_fault() and __lock_page_or_retry(). ++ */ ++vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address, ++ unsigned int flags) ++{ ++ vm_fault_t ret; ++ ++ __set_current_state(TASK_RUNNING); ++ ++ count_vm_event(PGFAULT); ++ count_memcg_event_mm(vma->vm_mm, PGFAULT); ++ ++ /* do counter updates before entering really critical section. */ ++ check_sync_rss_stat(current); ++ ++ if (!arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE, ++ flags & FAULT_FLAG_INSTRUCTION, ++ flags & FAULT_FLAG_REMOTE)) ++ return VM_FAULT_SIGSEGV; ++ ++ /* ++ * Enable the memcg OOM handling for faults triggered in user ++ * space. Kernel faults are handled more gracefully. ++ */ ++ if (flags & FAULT_FLAG_USER) ++ mem_cgroup_enter_user_fault(); ++ ++ if (unlikely(is_vm_hugetlb_page(vma))) ++ ret = hugetlb_fault(vma->vm_mm, vma, address, flags); ++ else ++ ret = __handle_mm_fault(vma, address, flags); ++ ++ if (flags & FAULT_FLAG_USER) { ++ mem_cgroup_exit_user_fault(); ++ /* ++ * The task may have entered a memcg OOM situation but ++ * if the allocation error was handled gracefully (no ++ * VM_FAULT_OOM), there is no need to kill anything. ++ * Just clean up the OOM state peacefully. ++ */ ++ if (task_in_memcg_oom(current) && !(ret & VM_FAULT_OOM)) ++ mem_cgroup_oom_synchronize(false); ++ } ++ ++ return ret; ++} ++EXPORT_SYMBOL_GPL(handle_mm_fault); ++ ++#ifndef __PAGETABLE_P4D_FOLDED ++/* ++ * Allocate p4d page table. ++ * We've already handled the fast-path in-line. ++ */ ++int __p4d_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) ++{ ++ p4d_t *new = p4d_alloc_one(mm, address); ++ if (!new) ++ return -ENOMEM; ++ ++ smp_wmb(); /* See comment in __pte_alloc */ ++ ++ spin_lock(&mm->page_table_lock); ++ if (pgd_present(*pgd)) /* Another has populated it */ ++ p4d_free(mm, new); ++ else ++ pgd_populate(mm, pgd, new); ++ spin_unlock(&mm->page_table_lock); ++ return 0; ++} ++#endif /* __PAGETABLE_P4D_FOLDED */ ++ ++#ifndef __PAGETABLE_PUD_FOLDED ++/* ++ * Allocate page upper directory. ++ * We've already handled the fast-path in-line. ++ */ ++int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address) ++{ ++ pud_t *new = pud_alloc_one(mm, address); ++ if (!new) ++ return -ENOMEM; ++ ++ smp_wmb(); /* See comment in __pte_alloc */ ++ ++ spin_lock(&mm->page_table_lock); ++#ifndef __ARCH_HAS_5LEVEL_HACK ++ if (!p4d_present(*p4d)) { ++ mm_inc_nr_puds(mm); ++ p4d_populate(mm, p4d, new); ++ } else /* Another has populated it */ ++ pud_free(mm, new); ++#else ++ if (!pgd_present(*p4d)) { ++ mm_inc_nr_puds(mm); ++ pgd_populate(mm, p4d, new); ++ } else /* Another has populated it */ ++ pud_free(mm, new); ++#endif /* __ARCH_HAS_5LEVEL_HACK */ ++ spin_unlock(&mm->page_table_lock); ++ return 0; ++} ++#endif /* __PAGETABLE_PUD_FOLDED */ ++ ++#ifndef __PAGETABLE_PMD_FOLDED ++/* ++ * Allocate page middle directory. ++ * We've already handled the fast-path in-line. ++ */ ++int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) ++{ ++ spinlock_t *ptl; ++ pmd_t *new = pmd_alloc_one(mm, address); ++ if (!new) ++ return -ENOMEM; ++ ++ smp_wmb(); /* See comment in __pte_alloc */ ++ ++ ptl = pud_lock(mm, pud); ++#ifndef __ARCH_HAS_4LEVEL_HACK ++ if (!pud_present(*pud)) { ++ mm_inc_nr_pmds(mm); ++ pud_populate(mm, pud, new); ++ } else /* Another has populated it */ ++ pmd_free(mm, new); ++#else ++ if (!pgd_present(*pud)) { ++ mm_inc_nr_pmds(mm); ++ pgd_populate(mm, pud, new); ++ } else /* Another has populated it */ ++ pmd_free(mm, new); ++#endif /* __ARCH_HAS_4LEVEL_HACK */ ++ spin_unlock(ptl); ++ return 0; ++} ++#endif /* __PAGETABLE_PMD_FOLDED */ ++ ++static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address, ++ unsigned long *start, unsigned long *end, ++ pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp) ++{ ++ pgd_t *pgd; ++ p4d_t *p4d; ++ pud_t *pud; ++ pmd_t *pmd; ++ pte_t *ptep; ++ ++ pgd = pgd_offset(mm, address); ++ if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) ++ goto out; ++ ++ p4d = p4d_offset(pgd, address); ++ if (p4d_none(*p4d) || unlikely(p4d_bad(*p4d))) ++ goto out; ++ ++ pud = pud_offset(p4d, address); ++ if (pud_none(*pud) || unlikely(pud_bad(*pud))) ++ goto out; ++ ++ pmd = pmd_offset(pud, address); ++ VM_BUG_ON(pmd_trans_huge(*pmd)); ++ ++ if (pmd_huge(*pmd)) { ++ if (!pmdpp) ++ goto out; ++ ++ if (start && end) { ++ *start = address & PMD_MASK; ++ *end = *start + PMD_SIZE; ++ mmu_notifier_invalidate_range_start(mm, *start, *end); ++ } ++ *ptlp = pmd_lock(mm, pmd); ++ if (pmd_huge(*pmd)) { ++ *pmdpp = pmd; ++ return 0; ++ } ++ spin_unlock(*ptlp); ++ if (start && end) ++ mmu_notifier_invalidate_range_end(mm, *start, *end); ++ } ++ ++ if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) ++ goto out; ++ ++ if (start && end) { ++ *start = address & PAGE_MASK; ++ *end = *start + PAGE_SIZE; ++ mmu_notifier_invalidate_range_start(mm, *start, *end); ++ } ++ ptep = pte_offset_map_lock(mm, pmd, address, ptlp); ++ if (!pte_present(*ptep)) ++ goto unlock; ++ *ptepp = ptep; ++ return 0; ++unlock: ++ pte_unmap_unlock(ptep, *ptlp); ++ if (start && end) ++ mmu_notifier_invalidate_range_end(mm, *start, *end); ++out: ++ return -EINVAL; ++} ++ ++static inline int follow_pte(struct mm_struct *mm, unsigned long address, ++ pte_t **ptepp, spinlock_t **ptlp) ++{ ++ int res; ++ ++ /* (void) is needed to make gcc happy */ ++ (void) __cond_lock(*ptlp, ++ !(res = __follow_pte_pmd(mm, address, NULL, NULL, ++ ptepp, NULL, ptlp))); ++ return res; ++} ++ ++int follow_pte_pmd(struct mm_struct *mm, unsigned long address, ++ unsigned long *start, unsigned long *end, ++ pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp) ++{ ++ int res; ++ ++ /* (void) is needed to make gcc happy */ ++ (void) __cond_lock(*ptlp, ++ !(res = __follow_pte_pmd(mm, address, start, end, ++ ptepp, pmdpp, ptlp))); ++ return res; ++} ++EXPORT_SYMBOL(follow_pte_pmd); ++ ++/** ++ * follow_pfn - look up PFN at a user virtual address ++ * @vma: memory mapping ++ * @address: user virtual address ++ * @pfn: location to store found PFN ++ * ++ * Only IO mappings and raw PFN mappings are allowed. ++ * ++ * Returns zero and the pfn at @pfn on success, -ve otherwise. ++ */ ++int follow_pfn(struct vm_area_struct *vma, unsigned long address, ++ unsigned long *pfn) ++{ ++ int ret = -EINVAL; ++ spinlock_t *ptl; ++ pte_t *ptep; ++ ++ if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) ++ return ret; ++ ++ ret = follow_pte(vma->vm_mm, address, &ptep, &ptl); ++ if (ret) ++ return ret; ++ *pfn = pte_pfn(*ptep); ++ pte_unmap_unlock(ptep, ptl); ++ return 0; ++} ++EXPORT_SYMBOL(follow_pfn); ++ ++#ifdef CONFIG_HAVE_IOREMAP_PROT ++int follow_phys(struct vm_area_struct *vma, ++ unsigned long address, unsigned int flags, ++ unsigned long *prot, resource_size_t *phys) ++{ ++ int ret = -EINVAL; ++ pte_t *ptep, pte; ++ spinlock_t *ptl; ++ ++ if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) ++ goto out; ++ ++ if (follow_pte(vma->vm_mm, address, &ptep, &ptl)) ++ goto out; ++ pte = *ptep; ++ ++ if ((flags & FOLL_WRITE) && !pte_write(pte)) ++ goto unlock; ++ ++ *prot = pgprot_val(pte_pgprot(pte)); ++ *phys = (resource_size_t)pte_pfn(pte) << PAGE_SHIFT; ++ ++ ret = 0; ++unlock: ++ pte_unmap_unlock(ptep, ptl); ++out: ++ return ret; ++} ++ ++int generic_access_phys(struct vm_area_struct *vma, unsigned long addr, ++ void *buf, int len, int write) ++{ ++ resource_size_t phys_addr; ++ unsigned long prot = 0; ++ void __iomem *maddr; ++ int offset = addr & (PAGE_SIZE-1); ++ ++ if (follow_phys(vma, addr, write, &prot, &phys_addr)) ++ return -EINVAL; ++ ++ maddr = ioremap_prot(phys_addr, PAGE_ALIGN(len + offset), prot); ++ if (!maddr) ++ return -ENOMEM; ++ ++ if (write) ++ memcpy_toio(maddr + offset, buf, len); ++ else ++ memcpy_fromio(buf, maddr + offset, len); ++ iounmap(maddr); ++ ++ return len; ++} ++EXPORT_SYMBOL_GPL(generic_access_phys); ++#endif ++ ++/* ++ * Access another process' address space as given in mm. If non-NULL, use the ++ * given task for page fault accounting. ++ */ ++int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, ++ unsigned long addr, void *buf, int len, unsigned int gup_flags) ++{ ++ struct vm_area_struct *vma; ++ void *old_buf = buf; ++ int write = gup_flags & FOLL_WRITE; ++ ++ if (down_read_killable(&mm->mmap_sem)) ++ return 0; ++ ++ /* ignore errors, just check how much was successfully transferred */ ++ while (len) { ++ int bytes, ret, offset; ++ void *maddr; ++ struct page *page = NULL; ++ ++ ret = get_user_pages_remote(tsk, mm, addr, 1, ++ gup_flags, &page, &vma, NULL); ++ if (ret <= 0) { ++#ifndef CONFIG_HAVE_IOREMAP_PROT ++ break; ++#else ++ /* ++ * Check if this is a VM_IO | VM_PFNMAP VMA, which ++ * we can access using slightly different code. ++ */ ++ vma = find_vma(mm, addr); ++ if (!vma || vma->vm_start > addr) ++ break; ++ if (vma->vm_ops && vma->vm_ops->access) ++ ret = vma->vm_ops->access(vma, addr, buf, ++ len, write); ++ if (ret <= 0) ++ break; ++ bytes = ret; ++#endif ++ } else { ++ bytes = len; ++ offset = addr & (PAGE_SIZE-1); ++ if (bytes > PAGE_SIZE-offset) ++ bytes = PAGE_SIZE-offset; ++ ++ maddr = kmap(page); ++ if (write) { ++ copy_to_user_page(vma, page, addr, ++ maddr + offset, buf, bytes); ++ set_page_dirty_lock(page); ++ } else { ++ copy_from_user_page(vma, page, addr, ++ buf, maddr + offset, bytes); ++ } ++ kunmap(page); ++ put_page(page); ++ } ++ len -= bytes; ++ buf += bytes; ++ addr += bytes; ++ } ++ up_read(&mm->mmap_sem); ++ ++ return buf - old_buf; ++} ++ ++/** ++ * access_remote_vm - access another process' address space ++ * @mm: the mm_struct of the target address space ++ * @addr: start address to access ++ * @buf: source or destination buffer ++ * @len: number of bytes to transfer ++ * @gup_flags: flags modifying lookup behaviour ++ * ++ * The caller must hold a reference on @mm. ++ */ ++int access_remote_vm(struct mm_struct *mm, unsigned long addr, ++ void *buf, int len, unsigned int gup_flags) ++{ ++ return __access_remote_vm(NULL, mm, addr, buf, len, gup_flags); ++} ++ ++/* ++ * Access another process' address space. ++ * Source/target buffer must be kernel space, ++ * Do not walk the page table directly, use get_user_pages ++ */ ++int access_process_vm(struct task_struct *tsk, unsigned long addr, ++ void *buf, int len, unsigned int gup_flags) ++{ ++ struct mm_struct *mm; ++ int ret; ++ ++ mm = get_task_mm(tsk); ++ if (!mm) ++ return 0; ++ ++ ret = __access_remote_vm(tsk, mm, addr, buf, len, gup_flags); ++ ++ mmput(mm); ++ ++ return ret; ++} ++EXPORT_SYMBOL_GPL(access_process_vm); ++ ++/* ++ * Print the name of a VMA. ++ */ ++void print_vma_addr(char *prefix, unsigned long ip) ++{ ++ struct mm_struct *mm = current->mm; ++ struct vm_area_struct *vma; ++ ++ /* ++ * we might be running from an atomic context so we cannot sleep ++ */ ++ if (!down_read_trylock(&mm->mmap_sem)) ++ return; ++ ++ vma = find_vma(mm, ip); ++ if (vma && vma->vm_file) { ++ struct file *f = vma->vm_file; ++ char *buf = (char *)__get_free_page(GFP_NOWAIT); ++ if (buf) { ++ char *p; ++ ++ p = file_path(f, buf, PAGE_SIZE); ++ if (IS_ERR(p)) ++ p = "?"; ++ printk("%s%s[%lx+%lx]", prefix, kbasename(p), ++ vma->vm_start, ++ vma->vm_end - vma->vm_start); ++ free_page((unsigned long)buf); ++ } ++ } ++ up_read(&mm->mmap_sem); ++} ++ ++#if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_DEBUG_ATOMIC_SLEEP) ++void __might_fault(const char *file, int line) ++{ ++ /* ++ * Some code (nfs/sunrpc) uses socket ops on kernel memory while ++ * holding the mmap_sem, this is safe because kernel memory doesn't ++ * get paged out, therefore we'll never actually fault, and the ++ * below annotations will generate false positives. ++ */ ++ if (uaccess_kernel()) ++ return; ++ if (pagefault_disabled()) ++ return; ++ __might_sleep(file, line, 0); ++#if defined(CONFIG_DEBUG_ATOMIC_SLEEP) ++ if (current->mm) ++ might_lock_read(¤t->mm->mmap_sem); ++#endif ++} ++EXPORT_SYMBOL(__might_fault); ++#endif ++ ++#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLBFS) ++/* ++ * Process all subpages of the specified huge page with the specified ++ * operation. The target subpage will be processed last to keep its ++ * cache lines hot. ++ */ ++static inline void process_huge_page( ++ unsigned long addr_hint, unsigned int pages_per_huge_page, ++ void (*process_subpage)(unsigned long addr, int idx, void *arg), ++ void *arg) ++{ ++ int i, n, base, l; ++ unsigned long addr = addr_hint & ++ ~(((unsigned long)pages_per_huge_page << PAGE_SHIFT) - 1); ++ ++ /* Process target subpage last to keep its cache lines hot */ ++ might_sleep(); ++ n = (addr_hint - addr) / PAGE_SIZE; ++ if (2 * n <= pages_per_huge_page) { ++ /* If target subpage in first half of huge page */ ++ base = 0; ++ l = n; ++ /* Process subpages at the end of huge page */ ++ for (i = pages_per_huge_page - 1; i >= 2 * n; i--) { ++ cond_resched(); ++ process_subpage(addr + i * PAGE_SIZE, i, arg); ++ } ++ } else { ++ /* If target subpage in second half of huge page */ ++ base = pages_per_huge_page - 2 * (pages_per_huge_page - n); ++ l = pages_per_huge_page - n; ++ /* Process subpages at the begin of huge page */ ++ for (i = 0; i < base; i++) { ++ cond_resched(); ++ process_subpage(addr + i * PAGE_SIZE, i, arg); ++ } ++ } ++ /* ++ * Process remaining subpages in left-right-left-right pattern ++ * towards the target subpage ++ */ ++ for (i = 0; i < l; i++) { ++ int left_idx = base + i; ++ int right_idx = base + 2 * l - 1 - i; ++ ++ cond_resched(); ++ process_subpage(addr + left_idx * PAGE_SIZE, left_idx, arg); ++ cond_resched(); ++ process_subpage(addr + right_idx * PAGE_SIZE, right_idx, arg); ++ } ++} ++ ++struct cgp_args { ++ struct page *base_page; ++ unsigned long addr; ++}; ++ ++static int clear_gigantic_page_chunk(unsigned long start, unsigned long end, ++ struct cgp_args *args) ++{ ++ struct page *base_page = args->base_page; ++ struct page *p = base_page; ++ unsigned long addr = args->addr; ++ unsigned long i; ++ ++ might_sleep(); ++ for (i = start; i < end; ++i) { ++ cond_resched(); ++ clear_user_highpage(p, addr + i * PAGE_SIZE); ++ ++ p = mem_map_next(p, base_page, i); ++ } ++ ++ return KTASK_RETURN_SUCCESS; ++} ++ ++static void clear_subpage(unsigned long addr, int idx, void *arg) ++{ ++ struct page *page = arg; ++ ++ clear_user_highpage(page + idx, addr); ++} ++ ++void clear_huge_page(struct page *page, ++ unsigned long addr_hint, unsigned int pages_per_huge_page) ++{ ++ unsigned long addr = addr_hint & ++ ~(((unsigned long)pages_per_huge_page << PAGE_SHIFT) - 1); ++ ++ if (unlikely(pages_per_huge_page > MAX_ORDER_NR_PAGES)) { ++ struct cgp_args args = {page, addr}; ++ struct ktask_node node = {0, pages_per_huge_page, ++ page_to_nid(page)}; ++ DEFINE_KTASK_CTL(ctl, clear_gigantic_page_chunk, &args, ++ KTASK_PTE_MINCHUNK); ++ ++ ktask_run_numa(&node, 1, &ctl); ++ return; ++ } ++ ++ process_huge_page(addr_hint, pages_per_huge_page, clear_subpage, page); ++} ++ ++static void copy_user_gigantic_page(struct page *dst, struct page *src, ++ unsigned long addr, ++ struct vm_area_struct *vma, ++ unsigned int pages_per_huge_page) ++{ ++ int i; ++ struct page *dst_base = dst; ++ struct page *src_base = src; ++ ++ for (i = 0; i < pages_per_huge_page; ) { ++ cond_resched(); ++ copy_user_highpage(dst, src, addr + i*PAGE_SIZE, vma); ++ ++ i++; ++ dst = mem_map_next(dst, dst_base, i); ++ src = mem_map_next(src, src_base, i); ++ } ++} ++ ++struct copy_subpage_arg { ++ struct page *dst; ++ struct page *src; ++ struct vm_area_struct *vma; ++}; ++ ++static void copy_subpage(unsigned long addr, int idx, void *arg) ++{ ++ struct copy_subpage_arg *copy_arg = arg; ++ ++ copy_user_highpage(copy_arg->dst + idx, copy_arg->src + idx, ++ addr, copy_arg->vma); ++} ++ ++void copy_user_huge_page(struct page *dst, struct page *src, ++ unsigned long addr_hint, struct vm_area_struct *vma, ++ unsigned int pages_per_huge_page) ++{ ++ unsigned long addr = addr_hint & ++ ~(((unsigned long)pages_per_huge_page << PAGE_SHIFT) - 1); ++ struct copy_subpage_arg arg = { ++ .dst = dst, ++ .src = src, ++ .vma = vma, ++ }; ++ ++ if (unlikely(pages_per_huge_page > MAX_ORDER_NR_PAGES)) { ++ copy_user_gigantic_page(dst, src, addr, vma, ++ pages_per_huge_page); ++ return; ++ } ++ ++ process_huge_page(addr_hint, pages_per_huge_page, copy_subpage, &arg); ++} ++ ++long copy_huge_page_from_user(struct page *dst_page, ++ const void __user *usr_src, ++ unsigned int pages_per_huge_page, ++ bool allow_pagefault) ++{ ++ void *src = (void *)usr_src; ++ void *page_kaddr; ++ unsigned long i, rc = 0; ++ unsigned long ret_val = pages_per_huge_page * PAGE_SIZE; ++ ++ for (i = 0; i < pages_per_huge_page; i++) { ++ if (allow_pagefault) ++ page_kaddr = kmap(dst_page + i); ++ else ++ page_kaddr = kmap_atomic(dst_page + i); ++ rc = copy_from_user(page_kaddr, ++ (const void __user *)(src + i * PAGE_SIZE), ++ PAGE_SIZE); ++ if (allow_pagefault) ++ kunmap(dst_page + i); ++ else ++ kunmap_atomic(page_kaddr); ++ ++ ret_val -= (PAGE_SIZE - rc); ++ if (rc) ++ break; ++ ++ cond_resched(); ++ } ++ return ret_val; ++} ++#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */ ++ ++#if USE_SPLIT_PTE_PTLOCKS && ALLOC_SPLIT_PTLOCKS ++ ++static struct kmem_cache *page_ptl_cachep; ++ ++void __init ptlock_cache_init(void) ++{ ++ page_ptl_cachep = kmem_cache_create("page->ptl", sizeof(spinlock_t), 0, ++ SLAB_PANIC, NULL); ++} ++ ++bool ptlock_alloc(struct page *page) ++{ ++ spinlock_t *ptl; ++ ++ ptl = kmem_cache_alloc(page_ptl_cachep, GFP_KERNEL); ++ if (!ptl) ++ return false; ++ page->ptl = ptl; ++ return true; ++} ++ ++void ptlock_free(struct page *page) ++{ ++ kmem_cache_free(page_ptl_cachep, page->ptl); ++} ++#endif +diff -uprN kernel/mm/mlock.c kernel_new/mm/mlock.c +--- kernel/mm/mlock.c 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/mm/mlock.c 2021-04-01 18:28:07.816863108 +0800 +@@ -867,3 +867,29 @@ void user_shm_unlock(size_t size, struct + spin_unlock(&shmlock_user_lock); + free_uid(user); + } ++ ++#ifdef CONFIG_IPIPE ++int __ipipe_pin_vma(struct mm_struct *mm, struct vm_area_struct *vma) ++{ ++ unsigned int gup_flags = 0; ++ int ret, len; ++ ++ if (vma->vm_flags & (VM_IO | VM_PFNMAP)) ++ return 0; ++ ++ if (!((vma->vm_flags & VM_DONTEXPAND) || ++ is_vm_hugetlb_page(vma) || vma == get_gate_vma(mm))) { ++ ret = populate_vma_page_range(vma, vma->vm_start, vma->vm_end, ++ NULL); ++ return ret < 0 ? ret : 0; ++ } ++ ++ if ((vma->vm_flags & (VM_WRITE | VM_SHARED)) == VM_WRITE) ++ gup_flags |= FOLL_WRITE; ++ len = DIV_ROUND_UP(vma->vm_end, PAGE_SIZE) - vma->vm_start/PAGE_SIZE; ++ ret = get_user_pages_locked(vma->vm_start, len, gup_flags, NULL, NULL); ++ if (ret < 0) ++ return ret; ++ return ret == len ? 0 : -EFAULT; ++} ++#endif +diff -uprN kernel/mm/mlock.c.orig kernel_new/mm/mlock.c.orig +--- kernel/mm/mlock.c.orig 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/mm/mlock.c.orig 2020-12-21 21:59:22.000000000 +0800 +@@ -0,0 +1,869 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * linux/mm/mlock.c ++ * ++ * (C) Copyright 1995 Linus Torvalds ++ * (C) Copyright 2002 Christoph Hellwig ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "internal.h" ++ ++bool can_do_mlock(void) ++{ ++ if (rlimit(RLIMIT_MEMLOCK) != 0) ++ return true; ++ if (capable(CAP_IPC_LOCK)) ++ return true; ++ return false; ++} ++EXPORT_SYMBOL(can_do_mlock); ++ ++/* ++ * Mlocked pages are marked with PageMlocked() flag for efficient testing ++ * in vmscan and, possibly, the fault path; and to support semi-accurate ++ * statistics. ++ * ++ * An mlocked page [PageMlocked(page)] is unevictable. As such, it will ++ * be placed on the LRU "unevictable" list, rather than the [in]active lists. ++ * The unevictable list is an LRU sibling list to the [in]active lists. ++ * PageUnevictable is set to indicate the unevictable state. ++ * ++ * When lazy mlocking via vmscan, it is important to ensure that the ++ * vma's VM_LOCKED status is not concurrently being modified, otherwise we ++ * may have mlocked a page that is being munlocked. So lazy mlock must take ++ * the mmap_sem for read, and verify that the vma really is locked ++ * (see mm/rmap.c). ++ */ ++ ++/* ++ * LRU accounting for clear_page_mlock() ++ */ ++void clear_page_mlock(struct page *page) ++{ ++ if (!TestClearPageMlocked(page)) ++ return; ++ ++ mod_zone_page_state(page_zone(page), NR_MLOCK, ++ -hpage_nr_pages(page)); ++ count_vm_event(UNEVICTABLE_PGCLEARED); ++ /* ++ * The previous TestClearPageMlocked() corresponds to the smp_mb() ++ * in __pagevec_lru_add_fn(). ++ * ++ * See __pagevec_lru_add_fn for more explanation. ++ */ ++ if (!isolate_lru_page(page)) { ++ putback_lru_page(page); ++ } else { ++ /* ++ * We lost the race. the page already moved to evictable list. ++ */ ++ if (PageUnevictable(page)) ++ count_vm_event(UNEVICTABLE_PGSTRANDED); ++ } ++} ++ ++/* ++ * Mark page as mlocked if not already. ++ * If page on LRU, isolate and putback to move to unevictable list. ++ */ ++void mlock_vma_page(struct page *page) ++{ ++ /* Serialize with page migration */ ++ BUG_ON(!PageLocked(page)); ++ ++ VM_BUG_ON_PAGE(PageTail(page), page); ++ VM_BUG_ON_PAGE(PageCompound(page) && PageDoubleMap(page), page); ++ ++ if (!TestSetPageMlocked(page)) { ++ mod_zone_page_state(page_zone(page), NR_MLOCK, ++ hpage_nr_pages(page)); ++ count_vm_event(UNEVICTABLE_PGMLOCKED); ++ if (!isolate_lru_page(page)) ++ putback_lru_page(page); ++ } ++} ++ ++/* ++ * Isolate a page from LRU with optional get_page() pin. ++ * Assumes lru_lock already held and page already pinned. ++ */ ++static bool __munlock_isolate_lru_page(struct page *page, bool getpage) ++{ ++ if (PageLRU(page)) { ++ struct lruvec *lruvec; ++ ++ lruvec = mem_cgroup_page_lruvec(page, page_pgdat(page)); ++ if (getpage) ++ get_page(page); ++ ClearPageLRU(page); ++ del_page_from_lru_list(page, lruvec, page_lru(page)); ++ return true; ++ } ++ ++ return false; ++} ++ ++/* ++ * Finish munlock after successful page isolation ++ * ++ * Page must be locked. This is a wrapper for try_to_munlock() ++ * and putback_lru_page() with munlock accounting. ++ */ ++static void __munlock_isolated_page(struct page *page) ++{ ++ /* ++ * Optimization: if the page was mapped just once, that's our mapping ++ * and we don't need to check all the other vmas. ++ */ ++ if (page_mapcount(page) > 1) ++ try_to_munlock(page); ++ ++ /* Did try_to_unlock() succeed or punt? */ ++ if (!PageMlocked(page)) ++ count_vm_event(UNEVICTABLE_PGMUNLOCKED); ++ ++ putback_lru_page(page); ++} ++ ++/* ++ * Accounting for page isolation fail during munlock ++ * ++ * Performs accounting when page isolation fails in munlock. There is nothing ++ * else to do because it means some other task has already removed the page ++ * from the LRU. putback_lru_page() will take care of removing the page from ++ * the unevictable list, if necessary. vmscan [page_referenced()] will move ++ * the page back to the unevictable list if some other vma has it mlocked. ++ */ ++static void __munlock_isolation_failed(struct page *page) ++{ ++ if (PageUnevictable(page)) ++ __count_vm_event(UNEVICTABLE_PGSTRANDED); ++ else ++ __count_vm_event(UNEVICTABLE_PGMUNLOCKED); ++} ++ ++/** ++ * munlock_vma_page - munlock a vma page ++ * @page: page to be unlocked, either a normal page or THP page head ++ * ++ * returns the size of the page as a page mask (0 for normal page, ++ * HPAGE_PMD_NR - 1 for THP head page) ++ * ++ * called from munlock()/munmap() path with page supposedly on the LRU. ++ * When we munlock a page, because the vma where we found the page is being ++ * munlock()ed or munmap()ed, we want to check whether other vmas hold the ++ * page locked so that we can leave it on the unevictable lru list and not ++ * bother vmscan with it. However, to walk the page's rmap list in ++ * try_to_munlock() we must isolate the page from the LRU. If some other ++ * task has removed the page from the LRU, we won't be able to do that. ++ * So we clear the PageMlocked as we might not get another chance. If we ++ * can't isolate the page, we leave it for putback_lru_page() and vmscan ++ * [page_referenced()/try_to_unmap()] to deal with. ++ */ ++unsigned int munlock_vma_page(struct page *page) ++{ ++ int nr_pages; ++ struct zone *zone = page_zone(page); ++ ++ /* For try_to_munlock() and to serialize with page migration */ ++ BUG_ON(!PageLocked(page)); ++ ++ VM_BUG_ON_PAGE(PageTail(page), page); ++ ++ /* ++ * Serialize with any parallel __split_huge_page_refcount() which ++ * might otherwise copy PageMlocked to part of the tail pages before ++ * we clear it in the head page. It also stabilizes hpage_nr_pages(). ++ */ ++ spin_lock_irq(zone_lru_lock(zone)); ++ ++ if (!TestClearPageMlocked(page)) { ++ /* Potentially, PTE-mapped THP: do not skip the rest PTEs */ ++ nr_pages = 1; ++ goto unlock_out; ++ } ++ ++ nr_pages = hpage_nr_pages(page); ++ __mod_zone_page_state(zone, NR_MLOCK, -nr_pages); ++ ++ if (__munlock_isolate_lru_page(page, true)) { ++ spin_unlock_irq(zone_lru_lock(zone)); ++ __munlock_isolated_page(page); ++ goto out; ++ } ++ __munlock_isolation_failed(page); ++ ++unlock_out: ++ spin_unlock_irq(zone_lru_lock(zone)); ++ ++out: ++ return nr_pages - 1; ++} ++ ++/* ++ * convert get_user_pages() return value to posix mlock() error ++ */ ++static int __mlock_posix_error_return(long retval) ++{ ++ if (retval == -EFAULT) ++ retval = -ENOMEM; ++ else if (retval == -ENOMEM) ++ retval = -EAGAIN; ++ return retval; ++} ++ ++/* ++ * Prepare page for fast batched LRU putback via putback_lru_evictable_pagevec() ++ * ++ * The fast path is available only for evictable pages with single mapping. ++ * Then we can bypass the per-cpu pvec and get better performance. ++ * when mapcount > 1 we need try_to_munlock() which can fail. ++ * when !page_evictable(), we need the full redo logic of putback_lru_page to ++ * avoid leaving evictable page in unevictable list. ++ * ++ * In case of success, @page is added to @pvec and @pgrescued is incremented ++ * in case that the page was previously unevictable. @page is also unlocked. ++ */ ++static bool __putback_lru_fast_prepare(struct page *page, struct pagevec *pvec, ++ int *pgrescued) ++{ ++ VM_BUG_ON_PAGE(PageLRU(page), page); ++ VM_BUG_ON_PAGE(!PageLocked(page), page); ++ ++ if (page_mapcount(page) <= 1 && page_evictable(page)) { ++ pagevec_add(pvec, page); ++ if (TestClearPageUnevictable(page)) ++ (*pgrescued)++; ++ unlock_page(page); ++ return true; ++ } ++ ++ return false; ++} ++ ++/* ++ * Putback multiple evictable pages to the LRU ++ * ++ * Batched putback of evictable pages that bypasses the per-cpu pvec. Some of ++ * the pages might have meanwhile become unevictable but that is OK. ++ */ ++static void __putback_lru_fast(struct pagevec *pvec, int pgrescued) ++{ ++ count_vm_events(UNEVICTABLE_PGMUNLOCKED, pagevec_count(pvec)); ++ /* ++ *__pagevec_lru_add() calls release_pages() so we don't call ++ * put_page() explicitly ++ */ ++ __pagevec_lru_add(pvec); ++ count_vm_events(UNEVICTABLE_PGRESCUED, pgrescued); ++} ++ ++/* ++ * Munlock a batch of pages from the same zone ++ * ++ * The work is split to two main phases. First phase clears the Mlocked flag ++ * and attempts to isolate the pages, all under a single zone lru lock. ++ * The second phase finishes the munlock only for pages where isolation ++ * succeeded. ++ * ++ * Note that the pagevec may be modified during the process. ++ */ ++static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone) ++{ ++ int i; ++ int nr = pagevec_count(pvec); ++ int delta_munlocked = -nr; ++ struct pagevec pvec_putback; ++ int pgrescued = 0; ++ ++ pagevec_init(&pvec_putback); ++ ++ /* Phase 1: page isolation */ ++ spin_lock_irq(zone_lru_lock(zone)); ++ for (i = 0; i < nr; i++) { ++ struct page *page = pvec->pages[i]; ++ ++ if (TestClearPageMlocked(page)) { ++ /* ++ * We already have pin from follow_page_mask() ++ * so we can spare the get_page() here. ++ */ ++ if (__munlock_isolate_lru_page(page, false)) ++ continue; ++ else ++ __munlock_isolation_failed(page); ++ } else { ++ delta_munlocked++; ++ } ++ ++ /* ++ * We won't be munlocking this page in the next phase ++ * but we still need to release the follow_page_mask() ++ * pin. We cannot do it under lru_lock however. If it's ++ * the last pin, __page_cache_release() would deadlock. ++ */ ++ pagevec_add(&pvec_putback, pvec->pages[i]); ++ pvec->pages[i] = NULL; ++ } ++ __mod_zone_page_state(zone, NR_MLOCK, delta_munlocked); ++ spin_unlock_irq(zone_lru_lock(zone)); ++ ++ /* Now we can release pins of pages that we are not munlocking */ ++ pagevec_release(&pvec_putback); ++ ++ /* Phase 2: page munlock */ ++ for (i = 0; i < nr; i++) { ++ struct page *page = pvec->pages[i]; ++ ++ if (page) { ++ lock_page(page); ++ if (!__putback_lru_fast_prepare(page, &pvec_putback, ++ &pgrescued)) { ++ /* ++ * Slow path. We don't want to lose the last ++ * pin before unlock_page() ++ */ ++ get_page(page); /* for putback_lru_page() */ ++ __munlock_isolated_page(page); ++ unlock_page(page); ++ put_page(page); /* from follow_page_mask() */ ++ } ++ } ++ } ++ ++ /* ++ * Phase 3: page putback for pages that qualified for the fast path ++ * This will also call put_page() to return pin from follow_page_mask() ++ */ ++ if (pagevec_count(&pvec_putback)) ++ __putback_lru_fast(&pvec_putback, pgrescued); ++} ++ ++/* ++ * Fill up pagevec for __munlock_pagevec using pte walk ++ * ++ * The function expects that the struct page corresponding to @start address is ++ * a non-TPH page already pinned and in the @pvec, and that it belongs to @zone. ++ * ++ * The rest of @pvec is filled by subsequent pages within the same pmd and same ++ * zone, as long as the pte's are present and vm_normal_page() succeeds. These ++ * pages also get pinned. ++ * ++ * Returns the address of the next page that should be scanned. This equals ++ * @start + PAGE_SIZE when no page could be added by the pte walk. ++ */ ++static unsigned long __munlock_pagevec_fill(struct pagevec *pvec, ++ struct vm_area_struct *vma, struct zone *zone, ++ unsigned long start, unsigned long end) ++{ ++ pte_t *pte; ++ spinlock_t *ptl; ++ ++ /* ++ * Initialize pte walk starting at the already pinned page where we ++ * are sure that there is a pte, as it was pinned under the same ++ * mmap_sem write op. ++ */ ++ pte = get_locked_pte(vma->vm_mm, start, &ptl); ++ /* Make sure we do not cross the page table boundary */ ++ end = pgd_addr_end(start, end); ++ end = p4d_addr_end(start, end); ++ end = pud_addr_end(start, end); ++ end = pmd_addr_end(start, end); ++ ++ /* The page next to the pinned page is the first we will try to get */ ++ start += PAGE_SIZE; ++ while (start < end) { ++ struct page *page = NULL; ++ pte++; ++ if (pte_present(*pte)) ++ page = vm_normal_page(vma, start, *pte); ++ /* ++ * Break if page could not be obtained or the page's node+zone does not ++ * match ++ */ ++ if (!page || page_zone(page) != zone) ++ break; ++ ++ /* ++ * Do not use pagevec for PTE-mapped THP, ++ * munlock_vma_pages_range() will handle them. ++ */ ++ if (PageTransCompound(page)) ++ break; ++ ++ get_page(page); ++ /* ++ * Increase the address that will be returned *before* the ++ * eventual break due to pvec becoming full by adding the page ++ */ ++ start += PAGE_SIZE; ++ if (pagevec_add(pvec, page) == 0) ++ break; ++ } ++ pte_unmap_unlock(pte, ptl); ++ return start; ++} ++ ++/* ++ * munlock_vma_pages_range() - munlock all pages in the vma range.' ++ * @vma - vma containing range to be munlock()ed. ++ * @start - start address in @vma of the range ++ * @end - end of range in @vma. ++ * ++ * For mremap(), munmap() and exit(). ++ * ++ * Called with @vma VM_LOCKED. ++ * ++ * Returns with VM_LOCKED cleared. Callers must be prepared to ++ * deal with this. ++ * ++ * We don't save and restore VM_LOCKED here because pages are ++ * still on lru. In unmap path, pages might be scanned by reclaim ++ * and re-mlocked by try_to_{munlock|unmap} before we unmap and ++ * free them. This will result in freeing mlocked pages. ++ */ ++void munlock_vma_pages_range(struct vm_area_struct *vma, ++ unsigned long start, unsigned long end) ++{ ++ vma->vm_flags &= VM_LOCKED_CLEAR_MASK; ++ ++ while (start < end) { ++ struct page *page; ++ unsigned int page_mask = 0; ++ unsigned long page_increm; ++ struct pagevec pvec; ++ struct zone *zone; ++ ++ pagevec_init(&pvec); ++ /* ++ * Although FOLL_DUMP is intended for get_dump_page(), ++ * it just so happens that its special treatment of the ++ * ZERO_PAGE (returning an error instead of doing get_page) ++ * suits munlock very well (and if somehow an abnormal page ++ * has sneaked into the range, we won't oops here: great). ++ */ ++ page = follow_page(vma, start, FOLL_GET | FOLL_DUMP); ++ ++ if (page && !IS_ERR(page)) { ++ if (PageTransTail(page)) { ++ VM_BUG_ON_PAGE(PageMlocked(page), page); ++ put_page(page); /* follow_page_mask() */ ++ } else if (PageTransHuge(page)) { ++ lock_page(page); ++ /* ++ * Any THP page found by follow_page_mask() may ++ * have gotten split before reaching ++ * munlock_vma_page(), so we need to compute ++ * the page_mask here instead. ++ */ ++ page_mask = munlock_vma_page(page); ++ unlock_page(page); ++ put_page(page); /* follow_page_mask() */ ++ } else { ++ /* ++ * Non-huge pages are handled in batches via ++ * pagevec. The pin from follow_page_mask() ++ * prevents them from collapsing by THP. ++ */ ++ pagevec_add(&pvec, page); ++ zone = page_zone(page); ++ ++ /* ++ * Try to fill the rest of pagevec using fast ++ * pte walk. This will also update start to ++ * the next page to process. Then munlock the ++ * pagevec. ++ */ ++ start = __munlock_pagevec_fill(&pvec, vma, ++ zone, start, end); ++ __munlock_pagevec(&pvec, zone); ++ goto next; ++ } ++ } ++ page_increm = 1 + page_mask; ++ start += page_increm * PAGE_SIZE; ++next: ++ cond_resched(); ++ } ++} ++ ++/* ++ * mlock_fixup - handle mlock[all]/munlock[all] requests. ++ * ++ * Filters out "special" vmas -- VM_LOCKED never gets set for these, and ++ * munlock is a no-op. However, for some special vmas, we go ahead and ++ * populate the ptes. ++ * ++ * For vmas that pass the filters, merge/split as appropriate. ++ */ ++static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev, ++ unsigned long start, unsigned long end, vm_flags_t newflags) ++{ ++ struct mm_struct *mm = vma->vm_mm; ++ pgoff_t pgoff; ++ int nr_pages; ++ int ret = 0; ++ int lock = !!(newflags & VM_LOCKED); ++ vm_flags_t old_flags = vma->vm_flags; ++ ++ if (newflags == vma->vm_flags || (vma->vm_flags & VM_SPECIAL) || ++ is_vm_hugetlb_page(vma) || vma == get_gate_vma(current->mm) || ++ vma_is_dax(vma)) ++ /* don't set VM_LOCKED or VM_LOCKONFAULT and don't count */ ++ goto out; ++ ++ pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); ++ *prev = vma_merge(mm, *prev, start, end, newflags, vma->anon_vma, ++ vma->vm_file, pgoff, vma_policy(vma), ++ vma->vm_userfaultfd_ctx); ++ if (*prev) { ++ vma = *prev; ++ goto success; ++ } ++ ++ if (start != vma->vm_start) { ++ ret = split_vma(mm, vma, start, 1); ++ if (ret) ++ goto out; ++ } ++ ++ if (end != vma->vm_end) { ++ ret = split_vma(mm, vma, end, 0); ++ if (ret) ++ goto out; ++ } ++ ++success: ++ /* ++ * Keep track of amount of locked VM. ++ */ ++ nr_pages = (end - start) >> PAGE_SHIFT; ++ if (!lock) ++ nr_pages = -nr_pages; ++ else if (old_flags & VM_LOCKED) ++ nr_pages = 0; ++ atomic_long_add(nr_pages, &mm->locked_vm); ++ ++ /* ++ * vm_flags is protected by the mmap_sem held in write mode. ++ * It's okay if try_to_unmap_one unmaps a page just after we ++ * set VM_LOCKED, populate_vma_page_range will bring it back. ++ */ ++ ++ if (lock) ++ vma->vm_flags = newflags; ++ else ++ munlock_vma_pages_range(vma, start, end); ++ ++out: ++ *prev = vma; ++ return ret; ++} ++ ++static int apply_vma_lock_flags(unsigned long start, size_t len, ++ vm_flags_t flags) ++{ ++ unsigned long nstart, end, tmp; ++ struct vm_area_struct * vma, * prev; ++ int error; ++ ++ VM_BUG_ON(offset_in_page(start)); ++ VM_BUG_ON(len != PAGE_ALIGN(len)); ++ end = start + len; ++ if (end < start) ++ return -EINVAL; ++ if (end == start) ++ return 0; ++ vma = find_vma(current->mm, start); ++ if (!vma || vma->vm_start > start) ++ return -ENOMEM; ++ ++ prev = vma->vm_prev; ++ if (start > vma->vm_start) ++ prev = vma; ++ ++ for (nstart = start ; ; ) { ++ vm_flags_t newflags = vma->vm_flags & VM_LOCKED_CLEAR_MASK; ++ ++ newflags |= flags; ++ ++ /* Here we know that vma->vm_start <= nstart < vma->vm_end. */ ++ tmp = vma->vm_end; ++ if (tmp > end) ++ tmp = end; ++ error = mlock_fixup(vma, &prev, nstart, tmp, newflags); ++ if (error) ++ break; ++ nstart = tmp; ++ if (nstart < prev->vm_end) ++ nstart = prev->vm_end; ++ if (nstart >= end) ++ break; ++ ++ vma = prev->vm_next; ++ if (!vma || vma->vm_start != nstart) { ++ error = -ENOMEM; ++ break; ++ } ++ } ++ return error; ++} ++ ++/* ++ * Go through vma areas and sum size of mlocked ++ * vma pages, as return value. ++ * Note deferred memory locking case(mlock2(,,MLOCK_ONFAULT) ++ * is also counted. ++ * Return value: previously mlocked page counts ++ */ ++static unsigned long count_mm_mlocked_page_nr(struct mm_struct *mm, ++ unsigned long start, size_t len) ++{ ++ struct vm_area_struct *vma; ++ unsigned long count = 0; ++ ++ if (mm == NULL) ++ mm = current->mm; ++ ++ vma = find_vma(mm, start); ++ if (vma == NULL) ++ vma = mm->mmap; ++ ++ for (; vma ; vma = vma->vm_next) { ++ if (start >= vma->vm_end) ++ continue; ++ if (start + len <= vma->vm_start) ++ break; ++ if (vma->vm_flags & VM_LOCKED) { ++ if (start > vma->vm_start) ++ count -= (start - vma->vm_start); ++ if (start + len < vma->vm_end) { ++ count += start + len - vma->vm_start; ++ break; ++ } ++ count += vma->vm_end - vma->vm_start; ++ } ++ } ++ ++ return count >> PAGE_SHIFT; ++} ++ ++static __must_check int do_mlock(unsigned long start, size_t len, vm_flags_t flags) ++{ ++ unsigned long locked; ++ unsigned long lock_limit; ++ int error = -ENOMEM; ++ ++ if (!can_do_mlock()) ++ return -EPERM; ++ ++ len = PAGE_ALIGN(len + (offset_in_page(start))); ++ start &= PAGE_MASK; ++ ++ lock_limit = rlimit(RLIMIT_MEMLOCK); ++ lock_limit >>= PAGE_SHIFT; ++ locked = len >> PAGE_SHIFT; ++ ++ if (down_write_killable(¤t->mm->mmap_sem)) ++ return -EINTR; ++ ++ locked += atomic_long_read(¤t->mm->locked_vm); ++ if ((locked > lock_limit) && (!capable(CAP_IPC_LOCK))) { ++ /* ++ * It is possible that the regions requested intersect with ++ * previously mlocked areas, that part area in "mm->locked_vm" ++ * should not be counted to new mlock increment count. So check ++ * and adjust locked count if necessary. ++ */ ++ locked -= count_mm_mlocked_page_nr(current->mm, ++ start, len); ++ } ++ ++ /* check against resource limits */ ++ if ((locked <= lock_limit) || capable(CAP_IPC_LOCK)) ++ error = apply_vma_lock_flags(start, len, flags); ++ ++ up_write(¤t->mm->mmap_sem); ++ if (error) ++ return error; ++ ++ error = __mm_populate(start, len, 0); ++ if (error) ++ return __mlock_posix_error_return(error); ++ return 0; ++} ++ ++SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len) ++{ ++ return do_mlock(start, len, VM_LOCKED); ++} ++ ++SYSCALL_DEFINE3(mlock2, unsigned long, start, size_t, len, int, flags) ++{ ++ vm_flags_t vm_flags = VM_LOCKED; ++ ++ if (flags & ~MLOCK_ONFAULT) ++ return -EINVAL; ++ ++ if (flags & MLOCK_ONFAULT) ++ vm_flags |= VM_LOCKONFAULT; ++ ++ return do_mlock(start, len, vm_flags); ++} ++ ++SYSCALL_DEFINE2(munlock, unsigned long, start, size_t, len) ++{ ++ int ret; ++ ++ len = PAGE_ALIGN(len + (offset_in_page(start))); ++ start &= PAGE_MASK; ++ ++ if (down_write_killable(¤t->mm->mmap_sem)) ++ return -EINTR; ++ ret = apply_vma_lock_flags(start, len, 0); ++ up_write(¤t->mm->mmap_sem); ++ ++ return ret; ++} ++ ++/* ++ * Take the MCL_* flags passed into mlockall (or 0 if called from munlockall) ++ * and translate into the appropriate modifications to mm->def_flags and/or the ++ * flags for all current VMAs. ++ * ++ * There are a couple of subtleties with this. If mlockall() is called multiple ++ * times with different flags, the values do not necessarily stack. If mlockall ++ * is called once including the MCL_FUTURE flag and then a second time without ++ * it, VM_LOCKED and VM_LOCKONFAULT will be cleared from mm->def_flags. ++ */ ++static int apply_mlockall_flags(int flags) ++{ ++ struct vm_area_struct * vma, * prev = NULL; ++ vm_flags_t to_add = 0; ++ ++ current->mm->def_flags &= VM_LOCKED_CLEAR_MASK; ++ if (flags & MCL_FUTURE) { ++ current->mm->def_flags |= VM_LOCKED; ++ ++ if (flags & MCL_ONFAULT) ++ current->mm->def_flags |= VM_LOCKONFAULT; ++ ++ if (!(flags & MCL_CURRENT)) ++ goto out; ++ } ++ ++ if (flags & MCL_CURRENT) { ++ to_add |= VM_LOCKED; ++ if (flags & MCL_ONFAULT) ++ to_add |= VM_LOCKONFAULT; ++ } ++ ++ for (vma = current->mm->mmap; vma ; vma = prev->vm_next) { ++ vm_flags_t newflags; ++ ++ newflags = vma->vm_flags & VM_LOCKED_CLEAR_MASK; ++ newflags |= to_add; ++ ++ /* Ignore errors */ ++ mlock_fixup(vma, &prev, vma->vm_start, vma->vm_end, newflags); ++ cond_resched(); ++ } ++out: ++ return 0; ++} ++ ++SYSCALL_DEFINE1(mlockall, int, flags) ++{ ++ unsigned long lock_limit; ++ int ret; ++ ++ if (!flags || (flags & ~(MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT)) || ++ flags == MCL_ONFAULT) ++ return -EINVAL; ++ ++ if (!can_do_mlock()) ++ return -EPERM; ++ ++ lock_limit = rlimit(RLIMIT_MEMLOCK); ++ lock_limit >>= PAGE_SHIFT; ++ ++ if (down_write_killable(¤t->mm->mmap_sem)) ++ return -EINTR; ++ ++ ret = -ENOMEM; ++ if (!(flags & MCL_CURRENT) || (current->mm->total_vm <= lock_limit) || ++ capable(CAP_IPC_LOCK)) ++ ret = apply_mlockall_flags(flags); ++ up_write(¤t->mm->mmap_sem); ++ if (!ret && (flags & MCL_CURRENT)) ++ mm_populate(0, TASK_SIZE); ++ ++ return ret; ++} ++ ++SYSCALL_DEFINE0(munlockall) ++{ ++ int ret; ++ ++ if (down_write_killable(¤t->mm->mmap_sem)) ++ return -EINTR; ++ ret = apply_mlockall_flags(0); ++ up_write(¤t->mm->mmap_sem); ++ return ret; ++} ++ ++/* ++ * Objects with different lifetime than processes (SHM_LOCK and SHM_HUGETLB ++ * shm segments) get accounted against the user_struct instead. ++ */ ++static DEFINE_SPINLOCK(shmlock_user_lock); ++ ++int user_shm_lock(size_t size, struct user_struct *user) ++{ ++ unsigned long lock_limit, locked; ++ int allowed = 0; ++ ++ locked = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; ++ lock_limit = rlimit(RLIMIT_MEMLOCK); ++ if (lock_limit == RLIM_INFINITY) ++ allowed = 1; ++ lock_limit >>= PAGE_SHIFT; ++ spin_lock(&shmlock_user_lock); ++ if (!allowed && ++ locked + user->locked_shm > lock_limit && !capable(CAP_IPC_LOCK)) ++ goto out; ++ get_uid(user); ++ user->locked_shm += locked; ++ allowed = 1; ++out: ++ spin_unlock(&shmlock_user_lock); ++ return allowed; ++} ++ ++void user_shm_unlock(size_t size, struct user_struct *user) ++{ ++ spin_lock(&shmlock_user_lock); ++ user->locked_shm -= (size + PAGE_SIZE - 1) >> PAGE_SHIFT; ++ spin_unlock(&shmlock_user_lock); ++ free_uid(user); ++} +diff -uprN kernel/mm/mmu_context.c kernel_new/mm/mmu_context.c +--- kernel/mm/mmu_context.c 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/mm/mmu_context.c 2021-04-01 18:28:07.816863108 +0800 +@@ -9,6 +9,7 @@ + #include + #include + #include ++#include + + #include + +@@ -23,15 +24,18 @@ void use_mm(struct mm_struct *mm) + { + struct mm_struct *active_mm; + struct task_struct *tsk = current; ++ unsigned long flags; + + task_lock(tsk); + active_mm = tsk->active_mm; ++ ipipe_mm_switch_protect(flags); + if (active_mm != mm) { + mmgrab(mm); + tsk->active_mm = mm; + } + tsk->mm = mm; + switch_mm(active_mm, mm, tsk); ++ ipipe_mm_switch_unprotect(flags); + task_unlock(tsk); + #ifdef finish_arch_post_lock_switch + finish_arch_post_lock_switch(); +diff -uprN kernel/mm/mprotect.c kernel_new/mm/mprotect.c +--- kernel/mm/mprotect.c 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/mm/mprotect.c 2021-04-01 18:28:07.817863107 +0800 +@@ -22,6 +22,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -42,7 +43,7 @@ static unsigned long change_pte_range(st + struct mm_struct *mm = vma->vm_mm; + pte_t *pte, oldpte; + spinlock_t *ptl; +- unsigned long pages = 0; ++ unsigned long pages = 0, flags; + int target_node = NUMA_NO_NODE; + + /* +@@ -110,6 +111,7 @@ static unsigned long change_pte_range(st + continue; + } + ++ flags = hard_local_irq_save(); + ptent = ptep_modify_prot_start(mm, addr, pte); + ptent = pte_modify(ptent, newprot); + if (preserve_write) +@@ -122,6 +124,7 @@ static unsigned long change_pte_range(st + ptent = pte_mkwrite(ptent); + } + ptep_modify_prot_commit(mm, addr, pte, ptent); ++ hard_local_irq_restore(flags); + pages++; + } else if (IS_ENABLED(CONFIG_MIGRATION)) { + swp_entry_t entry = pte_to_swp_entry(oldpte); +@@ -336,6 +339,12 @@ unsigned long change_protection(struct v + pages = hugetlb_change_protection(vma, start, end, newprot); + else + pages = change_protection_range(vma, start, end, newprot, dirty_accountable, prot_numa); ++#ifdef CONFIG_IPIPE ++ if (test_bit(MMF_VM_PINNED, &vma->vm_mm->flags) && ++ ((vma->vm_flags | vma->vm_mm->def_flags) & VM_LOCKED) && ++ (vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC))) ++ __ipipe_pin_vma(vma->vm_mm, vma); ++#endif + + return pages; + } +diff -uprN kernel/mm/mprotect.c.orig kernel_new/mm/mprotect.c.orig +--- kernel/mm/mprotect.c.orig 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/mm/mprotect.c.orig 2020-12-21 21:59:22.000000000 +0800 +@@ -0,0 +1,670 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * mm/mprotect.c ++ * ++ * (C) Copyright 1994 Linus Torvalds ++ * (C) Copyright 2002 Christoph Hellwig ++ * ++ * Address space accounting code ++ * (C) Copyright 2002 Red Hat Inc, All Rights Reserved ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "internal.h" ++ ++static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, ++ unsigned long addr, unsigned long end, pgprot_t newprot, ++ int dirty_accountable, int prot_numa) ++{ ++ struct mm_struct *mm = vma->vm_mm; ++ pte_t *pte, oldpte; ++ spinlock_t *ptl; ++ unsigned long pages = 0; ++ int target_node = NUMA_NO_NODE; ++ ++ /* ++ * Can be called with only the mmap_sem for reading by ++ * prot_numa so we must check the pmd isn't constantly ++ * changing from under us from pmd_none to pmd_trans_huge ++ * and/or the other way around. ++ */ ++ if (pmd_trans_unstable(pmd)) ++ return 0; ++ ++ /* ++ * The pmd points to a regular pte so the pmd can't change ++ * from under us even if the mmap_sem is only hold for ++ * reading. ++ */ ++ pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); ++ ++ /* Get target node for single threaded private VMAs */ ++ if (prot_numa && !(vma->vm_flags & VM_SHARED) && ++ atomic_read(&vma->vm_mm->mm_users) == 1) ++ target_node = numa_node_id(); ++ ++ flush_tlb_batched_pending(vma->vm_mm); ++ arch_enter_lazy_mmu_mode(); ++ do { ++ oldpte = *pte; ++ if (pte_present(oldpte)) { ++ pte_t ptent; ++ bool preserve_write = prot_numa && pte_write(oldpte); ++ ++ /* ++ * Avoid trapping faults against the zero or KSM ++ * pages. See similar comment in change_huge_pmd. ++ */ ++ if (prot_numa) { ++ struct page *page; ++ ++ page = vm_normal_page(vma, addr, oldpte); ++ if (!page || PageKsm(page)) ++ continue; ++ ++ /* Also skip shared copy-on-write pages */ ++ if (is_cow_mapping(vma->vm_flags) && ++ page_mapcount(page) != 1) ++ continue; ++ ++ /* ++ * While migration can move some dirty pages, ++ * it cannot move them all from MIGRATE_ASYNC ++ * context. ++ */ ++ if (page_is_file_cache(page) && PageDirty(page)) ++ continue; ++ ++ /* Avoid TLB flush if possible */ ++ if (pte_protnone(oldpte)) ++ continue; ++ ++ /* ++ * Don't mess with PTEs if page is already on the node ++ * a single-threaded process is running on. ++ */ ++ if (target_node == page_to_nid(page)) ++ continue; ++ } ++ ++ ptent = ptep_modify_prot_start(mm, addr, pte); ++ ptent = pte_modify(ptent, newprot); ++ if (preserve_write) ++ ptent = pte_mk_savedwrite(ptent); ++ ++ /* Avoid taking write faults for known dirty pages */ ++ if (dirty_accountable && pte_dirty(ptent) && ++ (pte_soft_dirty(ptent) || ++ !(vma->vm_flags & VM_SOFTDIRTY))) { ++ ptent = pte_mkwrite(ptent); ++ } ++ ptep_modify_prot_commit(mm, addr, pte, ptent); ++ pages++; ++ } else if (IS_ENABLED(CONFIG_MIGRATION)) { ++ swp_entry_t entry = pte_to_swp_entry(oldpte); ++ ++ if (is_write_migration_entry(entry)) { ++ pte_t newpte; ++ /* ++ * A protection check is difficult so ++ * just be safe and disable write ++ */ ++ make_migration_entry_read(&entry); ++ newpte = swp_entry_to_pte(entry); ++ if (pte_swp_soft_dirty(oldpte)) ++ newpte = pte_swp_mksoft_dirty(newpte); ++ set_pte_at(mm, addr, pte, newpte); ++ ++ pages++; ++ } ++ ++ if (is_write_device_private_entry(entry)) { ++ pte_t newpte; ++ ++ /* ++ * We do not preserve soft-dirtiness. See ++ * copy_one_pte() for explanation. ++ */ ++ make_device_private_entry_read(&entry); ++ newpte = swp_entry_to_pte(entry); ++ set_pte_at(mm, addr, pte, newpte); ++ ++ pages++; ++ } ++ } ++ } while (pte++, addr += PAGE_SIZE, addr != end); ++ arch_leave_lazy_mmu_mode(); ++ pte_unmap_unlock(pte - 1, ptl); ++ ++ return pages; ++} ++ ++/* ++ * Used when setting automatic NUMA hinting protection where it is ++ * critical that a numa hinting PMD is not confused with a bad PMD. ++ */ ++static inline int pmd_none_or_clear_bad_unless_trans_huge(pmd_t *pmd) ++{ ++ pmd_t pmdval = pmd_read_atomic(pmd); ++ ++ /* See pmd_none_or_trans_huge_or_clear_bad for info on barrier */ ++#ifdef CONFIG_TRANSPARENT_HUGEPAGE ++ barrier(); ++#endif ++ ++ if (pmd_none(pmdval)) ++ return 1; ++ if (pmd_trans_huge(pmdval)) ++ return 0; ++ if (unlikely(pmd_bad(pmdval))) { ++ pmd_clear_bad(pmd); ++ return 1; ++ } ++ ++ return 0; ++} ++ ++static inline unsigned long change_pmd_range(struct vm_area_struct *vma, ++ pud_t *pud, unsigned long addr, unsigned long end, ++ pgprot_t newprot, int dirty_accountable, int prot_numa) ++{ ++ pmd_t *pmd; ++ struct mm_struct *mm = vma->vm_mm; ++ unsigned long next; ++ unsigned long pages = 0; ++ unsigned long nr_huge_updates = 0; ++ unsigned long mni_start = 0; ++ ++ pmd = pmd_offset(pud, addr); ++ do { ++ unsigned long this_pages; ++ ++ next = pmd_addr_end(addr, end); ++ ++ /* ++ * Automatic NUMA balancing walks the tables with mmap_sem ++ * held for read. It's possible a parallel update to occur ++ * between pmd_trans_huge() and a pmd_none_or_clear_bad() ++ * check leading to a false positive and clearing. ++ * Hence, it's necessary to atomically read the PMD value ++ * for all the checks. ++ */ ++ if (!is_swap_pmd(*pmd) && !pmd_devmap(*pmd) && ++ pmd_none_or_clear_bad_unless_trans_huge(pmd)) ++ goto next; ++ ++ /* invoke the mmu notifier if the pmd is populated */ ++ if (!mni_start) { ++ mni_start = addr; ++ mmu_notifier_invalidate_range_start(mm, mni_start, end); ++ } ++ ++ if (is_swap_pmd(*pmd) || pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) { ++ if (next - addr != HPAGE_PMD_SIZE) { ++ __split_huge_pmd(vma, pmd, addr, false, NULL); ++ } else { ++ int nr_ptes = change_huge_pmd(vma, pmd, addr, ++ newprot, prot_numa); ++ ++ if (nr_ptes) { ++ if (nr_ptes == HPAGE_PMD_NR) { ++ pages += HPAGE_PMD_NR; ++ nr_huge_updates++; ++ } ++ ++ /* huge pmd was handled */ ++ goto next; ++ } ++ } ++ /* fall through, the trans huge pmd just split */ ++ } ++ this_pages = change_pte_range(vma, pmd, addr, next, newprot, ++ dirty_accountable, prot_numa); ++ pages += this_pages; ++next: ++ cond_resched(); ++ } while (pmd++, addr = next, addr != end); ++ ++ if (mni_start) ++ mmu_notifier_invalidate_range_end(mm, mni_start, end); ++ ++ if (nr_huge_updates) ++ count_vm_numa_events(NUMA_HUGE_PTE_UPDATES, nr_huge_updates); ++ return pages; ++} ++ ++static inline unsigned long change_pud_range(struct vm_area_struct *vma, ++ p4d_t *p4d, unsigned long addr, unsigned long end, ++ pgprot_t newprot, int dirty_accountable, int prot_numa) ++{ ++ pud_t *pud; ++ unsigned long next; ++ unsigned long pages = 0; ++ ++ pud = pud_offset(p4d, addr); ++ do { ++ next = pud_addr_end(addr, end); ++ if (pud_none_or_clear_bad(pud)) ++ continue; ++ pages += change_pmd_range(vma, pud, addr, next, newprot, ++ dirty_accountable, prot_numa); ++ } while (pud++, addr = next, addr != end); ++ ++ return pages; ++} ++ ++static inline unsigned long change_p4d_range(struct vm_area_struct *vma, ++ pgd_t *pgd, unsigned long addr, unsigned long end, ++ pgprot_t newprot, int dirty_accountable, int prot_numa) ++{ ++ p4d_t *p4d; ++ unsigned long next; ++ unsigned long pages = 0; ++ ++ p4d = p4d_offset(pgd, addr); ++ do { ++ next = p4d_addr_end(addr, end); ++ if (p4d_none_or_clear_bad(p4d)) ++ continue; ++ pages += change_pud_range(vma, p4d, addr, next, newprot, ++ dirty_accountable, prot_numa); ++ } while (p4d++, addr = next, addr != end); ++ ++ return pages; ++} ++ ++static unsigned long change_protection_range(struct vm_area_struct *vma, ++ unsigned long addr, unsigned long end, pgprot_t newprot, ++ int dirty_accountable, int prot_numa) ++{ ++ struct mm_struct *mm = vma->vm_mm; ++ pgd_t *pgd; ++ unsigned long next; ++ unsigned long start = addr; ++ unsigned long pages = 0; ++ ++ BUG_ON(addr >= end); ++ pgd = pgd_offset(mm, addr); ++ flush_cache_range(vma, addr, end); ++ inc_tlb_flush_pending(mm); ++ do { ++ next = pgd_addr_end(addr, end); ++ if (pgd_none_or_clear_bad(pgd)) ++ continue; ++ pages += change_p4d_range(vma, pgd, addr, next, newprot, ++ dirty_accountable, prot_numa); ++ } while (pgd++, addr = next, addr != end); ++ ++ /* Only flush the TLB if we actually modified any entries: */ ++ if (pages) ++ flush_tlb_range(vma, start, end); ++ dec_tlb_flush_pending(mm); ++ ++ return pages; ++} ++ ++unsigned long change_protection(struct vm_area_struct *vma, unsigned long start, ++ unsigned long end, pgprot_t newprot, ++ int dirty_accountable, int prot_numa) ++{ ++ unsigned long pages; ++ ++ if (is_vm_hugetlb_page(vma)) ++ pages = hugetlb_change_protection(vma, start, end, newprot); ++ else ++ pages = change_protection_range(vma, start, end, newprot, dirty_accountable, prot_numa); ++ ++ return pages; ++} ++ ++static int prot_none_pte_entry(pte_t *pte, unsigned long addr, ++ unsigned long next, struct mm_walk *walk) ++{ ++ return pfn_modify_allowed(pte_pfn(*pte), *(pgprot_t *)(walk->private)) ? ++ 0 : -EACCES; ++} ++ ++static int prot_none_hugetlb_entry(pte_t *pte, unsigned long hmask, ++ unsigned long addr, unsigned long next, ++ struct mm_walk *walk) ++{ ++ return pfn_modify_allowed(pte_pfn(*pte), *(pgprot_t *)(walk->private)) ? ++ 0 : -EACCES; ++} ++ ++static int prot_none_test(unsigned long addr, unsigned long next, ++ struct mm_walk *walk) ++{ ++ return 0; ++} ++ ++static int prot_none_walk(struct vm_area_struct *vma, unsigned long start, ++ unsigned long end, unsigned long newflags) ++{ ++ pgprot_t new_pgprot = vm_get_page_prot(newflags); ++ struct mm_walk prot_none_walk = { ++ .pte_entry = prot_none_pte_entry, ++ .hugetlb_entry = prot_none_hugetlb_entry, ++ .test_walk = prot_none_test, ++ .mm = current->mm, ++ .private = &new_pgprot, ++ }; ++ ++ return walk_page_range(start, end, &prot_none_walk); ++} ++ ++int ++mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, ++ unsigned long start, unsigned long end, unsigned long newflags) ++{ ++ struct mm_struct *mm = vma->vm_mm; ++ unsigned long oldflags = vma->vm_flags; ++ long nrpages = (end - start) >> PAGE_SHIFT; ++ unsigned long charged = 0; ++ pgoff_t pgoff; ++ int error; ++ int dirty_accountable = 0; ++ ++ if (newflags == oldflags) { ++ *pprev = vma; ++ return 0; ++ } ++ ++ /* ++ * Do PROT_NONE PFN permission checks here when we can still ++ * bail out without undoing a lot of state. This is a rather ++ * uncommon case, so doesn't need to be very optimized. ++ */ ++ if (arch_has_pfn_modify_check() && ++ (vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) && ++ (newflags & (VM_READ|VM_WRITE|VM_EXEC)) == 0) { ++ error = prot_none_walk(vma, start, end, newflags); ++ if (error) ++ return error; ++ } ++ ++ /* ++ * If we make a private mapping writable we increase our commit; ++ * but (without finer accounting) cannot reduce our commit if we ++ * make it unwritable again. hugetlb mapping were accounted for ++ * even if read-only so there is no need to account for them here ++ */ ++ if (newflags & VM_WRITE) { ++ /* Check space limits when area turns into data. */ ++ if (!may_expand_vm(mm, newflags, nrpages) && ++ may_expand_vm(mm, oldflags, nrpages)) ++ return -ENOMEM; ++ if (!(oldflags & (VM_ACCOUNT|VM_WRITE|VM_HUGETLB| ++ VM_SHARED|VM_NORESERVE))) { ++ charged = nrpages; ++ if (security_vm_enough_memory_mm(mm, charged)) ++ return -ENOMEM; ++ newflags |= VM_ACCOUNT; ++ } ++ } ++ ++ /* ++ * First try to merge with previous and/or next vma. ++ */ ++ pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); ++ *pprev = vma_merge(mm, *pprev, start, end, newflags, ++ vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma), ++ vma->vm_userfaultfd_ctx); ++ if (*pprev) { ++ vma = *pprev; ++ VM_WARN_ON((vma->vm_flags ^ newflags) & ~VM_SOFTDIRTY); ++ goto success; ++ } ++ ++ *pprev = vma; ++ ++ if (start != vma->vm_start) { ++ error = split_vma(mm, vma, start, 1); ++ if (error) ++ goto fail; ++ } ++ ++ if (end != vma->vm_end) { ++ error = split_vma(mm, vma, end, 0); ++ if (error) ++ goto fail; ++ } ++ ++success: ++ /* ++ * vm_flags and vm_page_prot are protected by the mmap_sem ++ * held in write mode. ++ */ ++ vma->vm_flags = newflags; ++ dirty_accountable = vma_wants_writenotify(vma, vma->vm_page_prot); ++ vma_set_page_prot(vma); ++ ++ change_protection(vma, start, end, vma->vm_page_prot, ++ dirty_accountable, 0); ++ ++ /* ++ * Private VM_LOCKED VMA becoming writable: trigger COW to avoid major ++ * fault on access. ++ */ ++ if ((oldflags & (VM_WRITE | VM_SHARED | VM_LOCKED)) == VM_LOCKED && ++ (newflags & VM_WRITE)) { ++ populate_vma_page_range(vma, start, end, NULL); ++ } ++ ++ vm_stat_account(mm, oldflags, -nrpages); ++ vm_stat_account(mm, newflags, nrpages); ++ perf_event_mmap(vma); ++ return 0; ++ ++fail: ++ vm_unacct_memory(charged); ++ return error; ++} ++ ++/* ++ * pkey==-1 when doing a legacy mprotect() ++ */ ++static int do_mprotect_pkey(unsigned long start, size_t len, ++ unsigned long prot, int pkey) ++{ ++ unsigned long nstart, end, tmp, reqprot; ++ struct vm_area_struct *vma, *prev; ++ int error = -EINVAL; ++ const int grows = prot & (PROT_GROWSDOWN|PROT_GROWSUP); ++ const bool rier = (current->personality & READ_IMPLIES_EXEC) && ++ (prot & PROT_READ); ++ ++ prot &= ~(PROT_GROWSDOWN|PROT_GROWSUP); ++ if (grows == (PROT_GROWSDOWN|PROT_GROWSUP)) /* can't be both */ ++ return -EINVAL; ++ ++ if (start & ~PAGE_MASK) ++ return -EINVAL; ++ if (!len) ++ return 0; ++ len = PAGE_ALIGN(len); ++ end = start + len; ++ if (end <= start) ++ return -ENOMEM; ++ if (!arch_validate_prot(prot, start)) ++ return -EINVAL; ++ ++ reqprot = prot; ++ ++ if (down_write_killable(¤t->mm->mmap_sem)) ++ return -EINTR; ++ ++ /* ++ * If userspace did not allocate the pkey, do not let ++ * them use it here. ++ */ ++ error = -EINVAL; ++ if ((pkey != -1) && !mm_pkey_is_allocated(current->mm, pkey)) ++ goto out; ++ ++ vma = find_vma(current->mm, start); ++ error = -ENOMEM; ++ if (!vma) ++ goto out; ++ prev = vma->vm_prev; ++ if (unlikely(grows & PROT_GROWSDOWN)) { ++ if (vma->vm_start >= end) ++ goto out; ++ start = vma->vm_start; ++ error = -EINVAL; ++ if (!(vma->vm_flags & VM_GROWSDOWN)) ++ goto out; ++ } else { ++ if (vma->vm_start > start) ++ goto out; ++ if (unlikely(grows & PROT_GROWSUP)) { ++ end = vma->vm_end; ++ error = -EINVAL; ++ if (!(vma->vm_flags & VM_GROWSUP)) ++ goto out; ++ } ++ } ++ if (start > vma->vm_start) ++ prev = vma; ++ ++ for (nstart = start ; ; ) { ++ unsigned long mask_off_old_flags; ++ unsigned long newflags; ++ int new_vma_pkey; ++ ++ /* Here we know that vma->vm_start <= nstart < vma->vm_end. */ ++ ++ /* Does the application expect PROT_READ to imply PROT_EXEC */ ++ if (rier && (vma->vm_flags & VM_MAYEXEC)) ++ prot |= PROT_EXEC; ++ ++ /* ++ * Each mprotect() call explicitly passes r/w/x permissions. ++ * If a permission is not passed to mprotect(), it must be ++ * cleared from the VMA. ++ */ ++ mask_off_old_flags = VM_READ | VM_WRITE | VM_EXEC | ++ VM_FLAGS_CLEAR; ++ ++ new_vma_pkey = arch_override_mprotect_pkey(vma, prot, pkey); ++ newflags = calc_vm_prot_bits(prot, new_vma_pkey); ++ newflags |= (vma->vm_flags & ~mask_off_old_flags); ++ ++ /* newflags >> 4 shift VM_MAY% in place of VM_% */ ++ if ((newflags & ~(newflags >> 4)) & (VM_READ | VM_WRITE | VM_EXEC)) { ++ error = -EACCES; ++ goto out; ++ } ++ ++ error = security_file_mprotect(vma, reqprot, prot); ++ if (error) ++ goto out; ++ ++ tmp = vma->vm_end; ++ if (tmp > end) ++ tmp = end; ++ error = mprotect_fixup(vma, &prev, nstart, tmp, newflags); ++ if (error) ++ goto out; ++ nstart = tmp; ++ ++ if (nstart < prev->vm_end) ++ nstart = prev->vm_end; ++ if (nstart >= end) ++ goto out; ++ ++ vma = prev->vm_next; ++ if (!vma || vma->vm_start != nstart) { ++ error = -ENOMEM; ++ goto out; ++ } ++ prot = reqprot; ++ } ++out: ++ up_write(¤t->mm->mmap_sem); ++ return error; ++} ++ ++SYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len, ++ unsigned long, prot) ++{ ++ return do_mprotect_pkey(start, len, prot, -1); ++} ++ ++#ifdef CONFIG_ARCH_HAS_PKEYS ++ ++SYSCALL_DEFINE4(pkey_mprotect, unsigned long, start, size_t, len, ++ unsigned long, prot, int, pkey) ++{ ++ return do_mprotect_pkey(start, len, prot, pkey); ++} ++ ++SYSCALL_DEFINE2(pkey_alloc, unsigned long, flags, unsigned long, init_val) ++{ ++ int pkey; ++ int ret; ++ ++ /* No flags supported yet. */ ++ if (flags) ++ return -EINVAL; ++ /* check for unsupported init values */ ++ if (init_val & ~PKEY_ACCESS_MASK) ++ return -EINVAL; ++ ++ down_write(¤t->mm->mmap_sem); ++ pkey = mm_pkey_alloc(current->mm); ++ ++ ret = -ENOSPC; ++ if (pkey == -1) ++ goto out; ++ ++ ret = arch_set_user_pkey_access(current, pkey, init_val); ++ if (ret) { ++ mm_pkey_free(current->mm, pkey); ++ goto out; ++ } ++ ret = pkey; ++out: ++ up_write(¤t->mm->mmap_sem); ++ return ret; ++} ++ ++SYSCALL_DEFINE1(pkey_free, int, pkey) ++{ ++ int ret; ++ ++ down_write(¤t->mm->mmap_sem); ++ ret = mm_pkey_free(current->mm, pkey); ++ up_write(¤t->mm->mmap_sem); ++ ++ /* ++ * We could provie warnings or errors if any VMA still ++ * has the pkey set here. ++ */ ++ return ret; ++} ++ ++#endif /* CONFIG_ARCH_HAS_PKEYS */ +diff -uprN kernel/mm/vmalloc.c kernel_new/mm/vmalloc.c +--- kernel/mm/vmalloc.c 2020-12-21 21:59:22.000000000 +0800 ++++ kernel_new/mm/vmalloc.c 2021-04-01 18:28:07.817863107 +0800 +@@ -233,6 +233,8 @@ static int vmap_page_range_noflush(unsig + return err; + } while (pgd++, addr = next, addr != end); + ++ __ipipe_pin_mapping_globally(start, end); ++ + return nr; + } + +diff -uprN kernel/mm/vmalloc.c.orig kernel_new/mm/vmalloc.c.orig +--- kernel/mm/vmalloc.c.orig 1970-01-01 08:00:00.000000000 +0800 ++++ kernel_new/mm/vmalloc.c.orig 2020-12-21 21:59:22.000000000 +0800 +@@ -0,0 +1,2778 @@ ++/* ++ * linux/mm/vmalloc.c ++ * ++ * Copyright (C) 1993 Linus Torvalds ++ * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 ++ * SMP-safe vmalloc/vfree/ioremap, Tigran Aivazian , May 2000 ++ * Major rework to support vmap/vunmap, Christoph Hellwig, SGI, August 2002 ++ * Numa awareness, Christoph Lameter, SGI, June 2005 ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++ ++#include "internal.h" ++ ++struct vfree_deferred { ++ struct llist_head list; ++ struct work_struct wq; ++}; ++static DEFINE_PER_CPU(struct vfree_deferred, vfree_deferred); ++ ++static void __vunmap(const void *, int); ++ ++static void free_work(struct work_struct *w) ++{ ++ struct vfree_deferred *p = container_of(w, struct vfree_deferred, wq); ++ struct llist_node *t, *llnode; ++ ++ llist_for_each_safe(llnode, t, llist_del_all(&p->list)) ++ __vunmap((void *)llnode, 1); ++} ++ ++/*** Page table manipulation functions ***/ ++ ++static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end) ++{ ++ pte_t *pte; ++ ++ pte = pte_offset_kernel(pmd, addr); ++ do { ++ pte_t ptent = ptep_get_and_clear(&init_mm, addr, pte); ++ WARN_ON(!pte_none(ptent) && !pte_present(ptent)); ++ } while (pte++, addr += PAGE_SIZE, addr != end); ++} ++ ++static void vunmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end) ++{ ++ pmd_t *pmd; ++ unsigned long next; ++ ++ pmd = pmd_offset(pud, addr); ++ do { ++ next = pmd_addr_end(addr, end); ++ if (pmd_clear_huge(pmd)) ++ continue; ++ if (pmd_none_or_clear_bad(pmd)) ++ continue; ++ vunmap_pte_range(pmd, addr, next); ++ } while (pmd++, addr = next, addr != end); ++} ++ ++static void vunmap_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end) ++{ ++ pud_t *pud; ++ unsigned long next; ++ ++ pud = pud_offset(p4d, addr); ++ do { ++ next = pud_addr_end(addr, end); ++ if (pud_clear_huge(pud)) ++ continue; ++ if (pud_none_or_clear_bad(pud)) ++ continue; ++ vunmap_pmd_range(pud, addr, next); ++ } while (pud++, addr = next, addr != end); ++} ++ ++static void vunmap_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end) ++{ ++ p4d_t *p4d; ++ unsigned long next; ++ ++ p4d = p4d_offset(pgd, addr); ++ do { ++ next = p4d_addr_end(addr, end); ++ if (p4d_clear_huge(p4d)) ++ continue; ++ if (p4d_none_or_clear_bad(p4d)) ++ continue; ++ vunmap_pud_range(p4d, addr, next); ++ } while (p4d++, addr = next, addr != end); ++} ++ ++static void vunmap_page_range(unsigned long addr, unsigned long end) ++{ ++ pgd_t *pgd; ++ unsigned long next; ++ ++ BUG_ON(addr >= end); ++ pgd = pgd_offset_k(addr); ++ do { ++ next = pgd_addr_end(addr, end); ++ if (pgd_none_or_clear_bad(pgd)) ++ continue; ++ vunmap_p4d_range(pgd, addr, next); ++ } while (pgd++, addr = next, addr != end); ++} ++ ++static int vmap_pte_range(pmd_t *pmd, unsigned long addr, ++ unsigned long end, pgprot_t prot, struct page **pages, int *nr) ++{ ++ pte_t *pte; ++ ++ /* ++ * nr is a running index into the array which helps higher level ++ * callers keep track of where we're up to. ++ */ ++ ++ pte = pte_alloc_kernel(pmd, addr); ++ if (!pte) ++ return -ENOMEM; ++ do { ++ struct page *page = pages[*nr]; ++ ++ if (WARN_ON(!pte_none(*pte))) ++ return -EBUSY; ++ if (WARN_ON(!page)) ++ return -ENOMEM; ++ set_pte_at(&init_mm, addr, pte, mk_pte(page, prot)); ++ (*nr)++; ++ } while (pte++, addr += PAGE_SIZE, addr != end); ++ return 0; ++} ++ ++static int vmap_pmd_range(pud_t *pud, unsigned long addr, ++ unsigned long end, pgprot_t prot, struct page **pages, int *nr) ++{ ++ pmd_t *pmd; ++ unsigned long next; ++ ++ pmd = pmd_alloc(&init_mm, pud, addr); ++ if (!pmd) ++ return -ENOMEM; ++ do { ++ next = pmd_addr_end(addr, end); ++ if (vmap_pte_range(pmd, addr, next, prot, pages, nr)) ++ return -ENOMEM; ++ } while (pmd++, addr = next, addr != end); ++ return 0; ++} ++ ++static int vmap_pud_range(p4d_t *p4d, unsigned long addr, ++ unsigned long end, pgprot_t prot, struct page **pages, int *nr) ++{ ++ pud_t *pud; ++ unsigned long next; ++ ++ pud = pud_alloc(&init_mm, p4d, addr); ++ if (!pud) ++ return -ENOMEM; ++ do { ++ next = pud_addr_end(addr, end); ++ if (vmap_pmd_range(pud, addr, next, prot, pages, nr)) ++ return -ENOMEM; ++ } while (pud++, addr = next, addr != end); ++ return 0; ++} ++ ++static int vmap_p4d_range(pgd_t *pgd, unsigned long addr, ++ unsigned long end, pgprot_t prot, struct page **pages, int *nr) ++{ ++ p4d_t *p4d; ++ unsigned long next; ++ ++ p4d = p4d_alloc(&init_mm, pgd, addr); ++ if (!p4d) ++ return -ENOMEM; ++ do { ++ next = p4d_addr_end(addr, end); ++ if (vmap_pud_range(p4d, addr, next, prot, pages, nr)) ++ return -ENOMEM; ++ } while (p4d++, addr = next, addr != end); ++ return 0; ++} ++ ++/* ++ * Set up page tables in kva (addr, end). The ptes shall have prot "prot", and ++ * will have pfns corresponding to the "pages" array. ++ * ++ * Ie. pte at addr+N*PAGE_SIZE shall point to pfn corresponding to pages[N] ++ */ ++static int vmap_page_range_noflush(unsigned long start, unsigned long end, ++ pgprot_t prot, struct page **pages) ++{ ++ pgd_t *pgd; ++ unsigned long next; ++ unsigned long addr = start; ++ int err = 0; ++ int nr = 0; ++ ++ BUG_ON(addr >= end); ++ pgd = pgd_offset_k(addr); ++ do { ++ next = pgd_addr_end(addr, end); ++ err = vmap_p4d_range(pgd, addr, next, prot, pages, &nr); ++ if (err) ++ return err; ++ } while (pgd++, addr = next, addr != end); ++ ++ return nr; ++} ++ ++static int vmap_page_range(unsigned long start, unsigned long end, ++ pgprot_t prot, struct page **pages) ++{ ++ int ret; ++ ++ ret = vmap_page_range_noflush(start, end, prot, pages); ++ flush_cache_vmap(start, end); ++ return ret; ++} ++ ++int is_vmalloc_or_module_addr(const void *x) ++{ ++ /* ++ * ARM, x86-64 and sparc64 put modules in a special place, ++ * and fall back on vmalloc() if that fails. Others ++ * just put it in the vmalloc space. ++ */ ++#if defined(CONFIG_MODULES) && defined(MODULES_VADDR) ++ unsigned long addr = (unsigned long)x; ++ if (addr >= MODULES_VADDR && addr < MODULES_END) ++ return 1; ++#endif ++ return is_vmalloc_addr(x); ++} ++ ++/* ++ * Walk a vmap address to the struct page it maps. ++ */ ++struct page *vmalloc_to_page(const void *vmalloc_addr) ++{ ++ unsigned long addr = (unsigned long) vmalloc_addr; ++ struct page *page = NULL; ++ pgd_t *pgd = pgd_offset_k(addr); ++ p4d_t *p4d; ++ pud_t *pud; ++ pmd_t *pmd; ++ pte_t *ptep, pte; ++ ++ /* ++ * XXX we might need to change this if we add VIRTUAL_BUG_ON for ++ * architectures that do not vmalloc module space ++ */ ++ VIRTUAL_BUG_ON(!is_vmalloc_or_module_addr(vmalloc_addr)); ++ ++ if (pgd_none(*pgd)) ++ return NULL; ++ p4d = p4d_offset(pgd, addr); ++ if (p4d_none(*p4d)) ++ return NULL; ++ pud = pud_offset(p4d, addr); ++ ++ /* ++ * Don't dereference bad PUD or PMD (below) entries. This will also ++ * identify huge mappings, which we may encounter on architectures ++ * that define CONFIG_HAVE_ARCH_HUGE_VMAP=y. Such regions will be ++ * identified as vmalloc addresses by is_vmalloc_addr(), but are ++ * not [unambiguously] associated with a struct page, so there is ++ * no correct value to return for them. ++ */ ++ WARN_ON_ONCE(pud_bad(*pud)); ++ if (pud_none(*pud) || pud_bad(*pud)) ++ return NULL; ++ pmd = pmd_offset(pud, addr); ++ WARN_ON_ONCE(pmd_bad(*pmd)); ++ if (pmd_none(*pmd) || pmd_bad(*pmd)) ++ return NULL; ++ ++ ptep = pte_offset_map(pmd, addr); ++ pte = *ptep; ++ if (pte_present(pte)) ++ page = pte_page(pte); ++ pte_unmap(ptep); ++ return page; ++} ++EXPORT_SYMBOL(vmalloc_to_page); ++ ++/* ++ * Map a vmalloc()-space virtual address to the physical page frame number. ++ */ ++unsigned long vmalloc_to_pfn(const void *vmalloc_addr) ++{ ++ return page_to_pfn(vmalloc_to_page(vmalloc_addr)); ++} ++EXPORT_SYMBOL(vmalloc_to_pfn); ++ ++ ++/*** Global kva allocator ***/ ++ ++#define VM_LAZY_FREE 0x02 ++#define VM_VM_AREA 0x04 ++ ++static DEFINE_SPINLOCK(vmap_area_lock); ++/* Export for kexec only */ ++LIST_HEAD(vmap_area_list); ++static LLIST_HEAD(vmap_purge_list); ++static struct rb_root vmap_area_root = RB_ROOT; ++ ++/* The vmap cache globals are protected by vmap_area_lock */ ++static struct rb_node *free_vmap_cache; ++static unsigned long cached_hole_size; ++static unsigned long cached_vstart; ++static unsigned long cached_align; ++ ++static unsigned long vmap_area_pcpu_hole; ++ ++static struct vmap_area *__find_vmap_area(unsigned long addr) ++{ ++ struct rb_node *n = vmap_area_root.rb_node; ++ ++ while (n) { ++ struct vmap_area *va; ++ ++ va = rb_entry(n, struct vmap_area, rb_node); ++ if (addr < va->va_start) ++ n = n->rb_left; ++ else if (addr >= va->va_end) ++ n = n->rb_right; ++ else ++ return va; ++ } ++ ++ return NULL; ++} ++ ++static void __insert_vmap_area(struct vmap_area *va) ++{ ++ struct rb_node **p = &vmap_area_root.rb_node; ++ struct rb_node *parent = NULL; ++ struct rb_node *tmp; ++ ++ while (*p) { ++ struct vmap_area *tmp_va; ++ ++ parent = *p; ++ tmp_va = rb_entry(parent, struct vmap_area, rb_node); ++ if (va->va_start < tmp_va->va_end) ++ p = &(*p)->rb_left; ++ else if (va->va_end > tmp_va->va_start) ++ p = &(*p)->rb_right; ++ else ++ BUG(); ++ } ++ ++ rb_link_node(&va->rb_node, parent, p); ++ rb_insert_color(&va->rb_node, &vmap_area_root); ++ ++ /* address-sort this list */ ++ tmp = rb_prev(&va->rb_node); ++ if (tmp) { ++ struct vmap_area *prev; ++ prev = rb_entry(tmp, struct vmap_area, rb_node); ++ list_add_rcu(&va->list, &prev->list); ++ } else ++ list_add_rcu(&va->list, &vmap_area_list); ++} ++ ++static void purge_vmap_area_lazy(void); ++ ++static BLOCKING_NOTIFIER_HEAD(vmap_notify_list); ++ ++/* ++ * Allocate a region of KVA of the specified size and alignment, within the ++ * vstart and vend. ++ */ ++static struct vmap_area *alloc_vmap_area(unsigned long size, ++ unsigned long align, ++ unsigned long vstart, unsigned long vend, ++ int node, gfp_t gfp_mask) ++{ ++ struct vmap_area *va; ++ struct rb_node *n; ++ unsigned long addr; ++ int purged = 0; ++ struct vmap_area *first; ++ ++ BUG_ON(!size); ++ BUG_ON(offset_in_page(size)); ++ BUG_ON(!is_power_of_2(align)); ++ ++ might_sleep(); ++ ++ va = kmalloc_node(sizeof(struct vmap_area), ++ gfp_mask & GFP_RECLAIM_MASK, node); ++ if (unlikely(!va)) ++ return ERR_PTR(-ENOMEM); ++ ++ /* ++ * Only scan the relevant parts containing pointers to other objects ++ * to avoid false negatives. ++ */ ++ kmemleak_scan_area(&va->rb_node, SIZE_MAX, gfp_mask & GFP_RECLAIM_MASK); ++ ++retry: ++ spin_lock(&vmap_area_lock); ++ /* ++ * Invalidate cache if we have more permissive parameters. ++ * cached_hole_size notes the largest hole noticed _below_ ++ * the vmap_area cached in free_vmap_cache: if size fits ++ * into that hole, we want to scan from vstart to reuse ++ * the hole instead of allocating above free_vmap_cache. ++ * Note that __free_vmap_area may update free_vmap_cache ++ * without updating cached_hole_size or cached_align. ++ */ ++ if (!free_vmap_cache || ++ size < cached_hole_size || ++ vstart < cached_vstart || ++ align < cached_align) { ++nocache: ++ cached_hole_size = 0; ++ free_vmap_cache = NULL; ++ } ++ /* record if we encounter less permissive parameters */ ++ cached_vstart = vstart; ++ cached_align = align; ++ ++ /* find starting point for our search */ ++ if (free_vmap_cache) { ++ first = rb_entry(free_vmap_cache, struct vmap_area, rb_node); ++ addr = ALIGN(first->va_end, align); ++ if (addr < vstart) ++ goto nocache; ++ if (addr + size < addr) ++ goto overflow; ++ ++ } else { ++ addr = ALIGN(vstart, align); ++ if (addr + size < addr) ++ goto overflow; ++ ++ n = vmap_area_root.rb_node; ++ first = NULL; ++ ++ while (n) { ++ struct vmap_area *tmp; ++ tmp = rb_entry(n, struct vmap_area, rb_node); ++ if (tmp->va_end >= addr) { ++ first = tmp; ++ if (tmp->va_start <= addr) ++ break; ++ n = n->rb_left; ++ } else ++ n = n->rb_right; ++ } ++ ++ if (!first) ++ goto found; ++ } ++ ++ /* from the starting point, walk areas until a suitable hole is found */ ++ while (addr + size > first->va_start && addr + size <= vend) { ++ if (addr + cached_hole_size < first->va_start) ++ cached_hole_size = first->va_start - addr; ++ addr = ALIGN(first->va_end, align); ++ if (addr + size < addr) ++ goto overflow; ++ ++ if (list_is_last(&first->list, &vmap_area_list)) ++ goto found; ++ ++ first = list_next_entry(first, list); ++ } ++ ++found: ++ /* ++ * Check also calculated address against the vstart, ++ * because it can be 0 because of big align request. ++ */ ++ if (addr + size > vend || addr < vstart) ++ goto overflow; ++ ++ va->va_start = addr; ++ va->va_end = addr + size; ++ va->flags = 0; ++ __insert_vmap_area(va); ++ free_vmap_cache = &va->rb_node; ++ spin_unlock(&vmap_area_lock); ++ ++ BUG_ON(!IS_ALIGNED(va->va_start, align)); ++ BUG_ON(va->va_start < vstart); ++ BUG_ON(va->va_end > vend); ++ ++ return va; ++ ++overflow: ++ spin_unlock(&vmap_area_lock); ++ if (!purged) { ++ purge_vmap_area_lazy(); ++ purged = 1; ++ goto retry; ++ } ++ ++ if (gfpflags_allow_blocking(gfp_mask)) { ++ unsigned long freed = 0; ++ blocking_notifier_call_chain(&vmap_notify_list, 0, &freed); ++ if (freed > 0) { ++ purged = 0; ++ goto retry; ++ } ++ } ++ ++ if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit()) ++ pr_warn("vmap allocation for size %lu failed: use vmalloc= to increase size\n", ++ size); ++ kfree(va); ++ return ERR_PTR(-EBUSY); ++} ++ ++int register_vmap_purge_notifier(struct notifier_block *nb) ++{ ++ return blocking_notifier_chain_register(&vmap_notify_list, nb); ++} ++EXPORT_SYMBOL_GPL(register_vmap_purge_notifier); ++ ++int unregister_vmap_purge_notifier(struct notifier_block *nb) ++{ ++ return blocking_notifier_chain_unregister(&vmap_notify_list, nb); ++} ++EXPORT_SYMBOL_GPL(unregister_vmap_purge_notifier); ++ ++static void __free_vmap_area(struct vmap_area *va) ++{ ++ BUG_ON(RB_EMPTY_NODE(&va->rb_node)); ++ ++ if (free_vmap_cache) { ++ if (va->va_end < cached_vstart) { ++ free_vmap_cache = NULL; ++ } else { ++ struct vmap_area *cache; ++ cache = rb_entry(free_vmap_cache, struct vmap_area, rb_node); ++ if (va->va_start <= cache->va_start) { ++ free_vmap_cache = rb_prev(&va->rb_node); ++ /* ++ * We don't try to update cached_hole_size or ++ * cached_align, but it won't go very wrong. ++ */ ++ } ++ } ++ } ++ rb_erase(&va->rb_node, &vmap_area_root); ++ RB_CLEAR_NODE(&va->rb_node); ++ list_del_rcu(&va->list); ++ ++ /* ++ * Track the highest possible candidate for pcpu area ++ * allocation. Areas outside of vmalloc area can be returned ++ * here too, consider only end addresses which fall inside ++ * vmalloc area proper. ++ */ ++ if (va->va_end > VMALLOC_START && va->va_end <= VMALLOC_END) ++ vmap_area_pcpu_hole = max(vmap_area_pcpu_hole, va->va_end); ++ ++ kfree_rcu(va, rcu_head); ++} ++ ++/* ++ * Free a region of KVA allocated by alloc_vmap_area ++ */ ++static void free_vmap_area(struct vmap_area *va) ++{ ++ spin_lock(&vmap_area_lock); ++ __free_vmap_area(va); ++ spin_unlock(&vmap_area_lock); ++} ++ ++/* ++ * Clear the pagetable entries of a given vmap_area ++ */ ++static void unmap_vmap_area(struct vmap_area *va) ++{ ++ vunmap_page_range(va->va_start, va->va_end); ++} ++ ++/* ++ * lazy_max_pages is the maximum amount of virtual address space we gather up ++ * before attempting to purge with a TLB flush. ++ * ++ * There is a tradeoff here: a larger number will cover more kernel page tables ++ * and take slightly longer to purge, but it will linearly reduce the number of ++ * global TLB flushes that must be performed. It would seem natural to scale ++ * this number up linearly with the number of CPUs (because vmapping activity ++ * could also scale linearly with the number of CPUs), however it is likely ++ * that in practice, workloads might be constrained in other ways that mean ++ * vmap activity will not scale linearly with CPUs. Also, I want to be ++ * conservative and not introduce a big latency on huge systems, so go with ++ * a less aggressive log scale. It will still be an improvement over the old ++ * code, and it will be simple to change the scale factor if we find that it ++ * becomes a problem on bigger systems. ++ */ ++static unsigned long lazy_max_pages(void) ++{ ++ unsigned int log; ++ ++ log = fls(num_online_cpus()); ++ ++ return log * (32UL * 1024 * 1024 / PAGE_SIZE); ++} ++ ++static atomic_t vmap_lazy_nr = ATOMIC_INIT(0); ++ ++/* ++ * Serialize vmap purging. There is no actual criticial section protected ++ * by this look, but we want to avoid concurrent calls for performance ++ * reasons and to make the pcpu_get_vm_areas more deterministic. ++ */ ++static DEFINE_MUTEX(vmap_purge_lock); ++ ++/* for per-CPU blocks */ ++static void purge_fragmented_blocks_allcpus(void); ++ ++/* ++ * called before a call to iounmap() if the caller wants vm_area_struct's ++ * immediately freed. ++ */ ++void set_iounmap_nonlazy(void) ++{ ++ atomic_set(&vmap_lazy_nr, lazy_max_pages()+1); ++} ++ ++/* ++ * Purges all lazily-freed vmap areas. ++ */ ++static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end) ++{ ++ struct llist_node *valist; ++ struct vmap_area *va; ++ struct vmap_area *n_va; ++ bool do_free = false; ++ ++ lockdep_assert_held(&vmap_purge_lock); ++ ++ valist = llist_del_all(&vmap_purge_list); ++ llist_for_each_entry(va, valist, purge_list) { ++ if (va->va_start < start) ++ start = va->va_start; ++ if (va->va_end > end) ++ end = va->va_end; ++ do_free = true; ++ } ++ ++ if (!do_free) ++ return false; ++ ++ flush_tlb_kernel_range(start, end); ++ ++ spin_lock(&vmap_area_lock); ++ llist_for_each_entry_safe(va, n_va, valist, purge_list) { ++ int nr = (va->va_end - va->va_start) >> PAGE_SHIFT; ++ ++ __free_vmap_area(va); ++ atomic_sub(nr, &vmap_lazy_nr); ++ cond_resched_lock(&vmap_area_lock); ++ } ++ spin_unlock(&vmap_area_lock); ++ return true; ++} ++ ++/* ++ * Kick off a purge of the outstanding lazy areas. Don't bother if somebody ++ * is already purging. ++ */ ++static void try_purge_vmap_area_lazy(void) ++{ ++ if (mutex_trylock(&vmap_purge_lock)) { ++ __purge_vmap_area_lazy(ULONG_MAX, 0); ++ mutex_unlock(&vmap_purge_lock); ++ } ++} ++ ++/* ++ * Kick off a purge of the outstanding lazy areas. ++ */ ++static void purge_vmap_area_lazy(void) ++{ ++ mutex_lock(&vmap_purge_lock); ++ purge_fragmented_blocks_allcpus(); ++ __purge_vmap_area_lazy(ULONG_MAX, 0); ++ mutex_unlock(&vmap_purge_lock); ++} ++ ++/* ++ * Free a vmap area, caller ensuring that the area has been unmapped ++ * and flush_cache_vunmap had been called for the correct range ++ * previously. ++ */ ++static void free_vmap_area_noflush(struct vmap_area *va) ++{ ++ int nr_lazy; ++ ++ nr_lazy = atomic_add_return((va->va_end - va->va_start) >> PAGE_SHIFT, ++ &vmap_lazy_nr); ++ ++ /* After this point, we may free va at any time */ ++ llist_add(&va->purge_list, &vmap_purge_list); ++ ++ if (unlikely(nr_lazy > lazy_max_pages())) ++ try_purge_vmap_area_lazy(); ++} ++ ++/* ++ * Free and unmap a vmap area ++ */ ++static void free_unmap_vmap_area(struct vmap_area *va) ++{ ++ flush_cache_vunmap(va->va_start, va->va_end); ++ unmap_vmap_area(va); ++ if (debug_pagealloc_enabled()) ++ flush_tlb_kernel_range(va->va_start, va->va_end); ++ ++ free_vmap_area_noflush(va); ++} ++ ++static struct vmap_area *find_vmap_area(unsigned long addr) ++{ ++ struct vmap_area *va; ++ ++ spin_lock(&vmap_area_lock); ++ va = __find_vmap_area(addr); ++ spin_unlock(&vmap_area_lock); ++ ++ return va; ++} ++ ++/*** Per cpu kva allocator ***/ ++ ++/* ++ * vmap space is limited especially on 32 bit architectures. Ensure there is ++ * room for at least 16 percpu vmap blocks per CPU. ++ */ ++/* ++ * If we had a constant VMALLOC_START and VMALLOC_END, we'd like to be able ++ * to #define VMALLOC_SPACE (VMALLOC_END-VMALLOC_START). Guess ++ * instead (we just need a rough idea) ++ */ ++#if BITS_PER_LONG == 32 ++#define VMALLOC_SPACE (128UL*1024*1024) ++#else ++#define VMALLOC_SPACE (128UL*1024*1024*1024) ++#endif ++ ++#define VMALLOC_PAGES (VMALLOC_SPACE / PAGE_SIZE) ++#define VMAP_MAX_ALLOC BITS_PER_LONG /* 256K with 4K pages */ ++#define VMAP_BBMAP_BITS_MAX 1024 /* 4MB with 4K pages */ ++#define VMAP_BBMAP_BITS_MIN (VMAP_MAX_ALLOC*2) ++#define VMAP_MIN(x, y) ((x) < (y) ? (x) : (y)) /* can't use min() */ ++#define VMAP_MAX(x, y) ((x) > (y) ? (x) : (y)) /* can't use max() */ ++#define VMAP_BBMAP_BITS \ ++ VMAP_MIN(VMAP_BBMAP_BITS_MAX, \ ++ VMAP_MAX(VMAP_BBMAP_BITS_MIN, \ ++ VMALLOC_PAGES / roundup_pow_of_two(NR_CPUS) / 16)) ++ ++#define VMAP_BLOCK_SIZE (VMAP_BBMAP_BITS * PAGE_SIZE) ++ ++static bool vmap_initialized __read_mostly = false; ++ ++struct vmap_block_queue { ++ spinlock_t lock; ++ struct list_head free; ++}; ++ ++struct vmap_block { ++ spinlock_t lock; ++ struct vmap_area *va; ++ unsigned long free, dirty; ++ unsigned long dirty_min, dirty_max; /*< dirty range */ ++ struct list_head free_list; ++ struct rcu_head rcu_head; ++ struct list_head purge; ++}; ++ ++/* Queue of free and dirty vmap blocks, for allocation and flushing purposes */ ++static DEFINE_PER_CPU(struct vmap_block_queue, vmap_block_queue); ++ ++/* ++ * Radix tree of vmap blocks, indexed by address, to quickly find a vmap block ++ * in the free path. Could get rid of this if we change the API to return a ++ * "cookie" from alloc, to be passed to free. But no big deal yet. ++ */ ++static DEFINE_SPINLOCK(vmap_block_tree_lock); ++static RADIX_TREE(vmap_block_tree, GFP_ATOMIC); ++ ++/* ++ * We should probably have a fallback mechanism to allocate virtual memory ++ * out of partially filled vmap blocks. However vmap block sizing should be ++ * fairly reasonable according to the vmalloc size, so it shouldn't be a ++ * big problem. ++ */ ++ ++static unsigned long addr_to_vb_idx(unsigned long addr) ++{ ++ addr -= VMALLOC_START & ~(VMAP_BLOCK_SIZE-1); ++ addr /= VMAP_BLOCK_SIZE; ++ return addr; ++} ++ ++static void *vmap_block_vaddr(unsigned long va_start, unsigned long pages_off) ++{ ++ unsigned long addr; ++ ++ addr = va_start + (pages_off << PAGE_SHIFT); ++ BUG_ON(addr_to_vb_idx(addr) != addr_to_vb_idx(va_start)); ++ return (void *)addr; ++} ++ ++/** ++ * new_vmap_block - allocates new vmap_block and occupies 2^order pages in this ++ * block. Of course pages number can't exceed VMAP_BBMAP_BITS ++ * @order: how many 2^order pages should be occupied in newly allocated block ++ * @gfp_mask: flags for the page level allocator ++ * ++ * Returns: virtual address in a newly allocated block or ERR_PTR(-errno) ++ */ ++static void *new_vmap_block(unsigned int order, gfp_t gfp_mask) ++{ ++ struct vmap_block_queue *vbq; ++ struct vmap_block *vb; ++ struct vmap_area *va; ++ unsigned long vb_idx; ++ int node, err; ++ void *vaddr; ++ ++ node = numa_node_id(); ++ ++ vb = kmalloc_node(sizeof(struct vmap_block), ++ gfp_mask & GFP_RECLAIM_MASK, node); ++ if (unlikely(!vb)) ++ return ERR_PTR(-ENOMEM); ++ ++ va = alloc_vmap_area(VMAP_BLOCK_SIZE, VMAP_BLOCK_SIZE, ++ VMALLOC_START, VMALLOC_END, ++ node, gfp_mask); ++ if (IS_ERR(va)) { ++ kfree(vb); ++ return ERR_CAST(va); ++ } ++ ++ err = radix_tree_preload(gfp_mask); ++ if (unlikely(err)) { ++ kfree(vb); ++ free_vmap_area(va); ++ return ERR_PTR(err); ++ } ++ ++ vaddr = vmap_block_vaddr(va->va_start, 0); ++ spin_lock_init(&vb->lock); ++ vb->va = va; ++ /* At least something should be left free */ ++ BUG_ON(VMAP_BBMAP_BITS <= (1UL << order)); ++ vb->free = VMAP_BBMAP_BITS - (1UL << order); ++ vb->dirty = 0; ++ vb->dirty_min = VMAP_BBMAP_BITS; ++ vb->dirty_max = 0; ++ INIT_LIST_HEAD(&vb->free_list); ++ ++ vb_idx = addr_to_vb_idx(va->va_start); ++ spin_lock(&vmap_block_tree_lock); ++ err = radix_tree_insert(&vmap_block_tree, vb_idx, vb); ++ spin_unlock(&vmap_block_tree_lock); ++ BUG_ON(err); ++ radix_tree_preload_end(); ++ ++ vbq = &get_cpu_var(vmap_block_queue); ++ spin_lock(&vbq->lock); ++ list_add_tail_rcu(&vb->free_list, &vbq->free); ++ spin_unlock(&vbq->lock); ++ put_cpu_var(vmap_block_queue); ++ ++ return vaddr; ++} ++ ++static void free_vmap_block(struct vmap_block *vb) ++{ ++ struct vmap_block *tmp; ++ unsigned long vb_idx; ++ ++ vb_idx = addr_to_vb_idx(vb->va->va_start); ++ spin_lock(&vmap_block_tree_lock); ++ tmp = radix_tree_delete(&vmap_block_tree, vb_idx); ++ spin_unlock(&vmap_block_tree_lock); ++ BUG_ON(tmp != vb); ++ ++ free_vmap_area_noflush(vb->va); ++ kfree_rcu(vb, rcu_head); ++} ++ ++static void purge_fragmented_blocks(int cpu) ++{ ++ LIST_HEAD(purge); ++ struct vmap_block *vb; ++ struct vmap_block *n_vb; ++ struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu); ++ ++ rcu_read_lock(); ++ list_for_each_entry_rcu(vb, &vbq->free, free_list) { ++ ++ if (!(vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS)) ++ continue; ++ ++ spin_lock(&vb->lock); ++ if (vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS) { ++ vb->free = 0; /* prevent further allocs after releasing lock */ ++ vb->dirty = VMAP_BBMAP_BITS; /* prevent purging it again */ ++ vb->dirty_min = 0; ++ vb->dirty_max = VMAP_BBMAP_BITS; ++ spin_lock(&vbq->lock); ++ list_del_rcu(&vb->free_list); ++ spin_unlock(&vbq->lock); ++ spin_unlock(&vb->lock); ++ list_add_tail(&vb->purge, &purge); ++ } else ++ spin_unlock(&vb->lock); ++ } ++ rcu_read_unlock(); ++ ++ list_for_each_entry_safe(vb, n_vb, &purge, purge) { ++ list_del(&vb->purge); ++ free_vmap_block(vb); ++ } ++} ++ ++static void purge_fragmented_blocks_allcpus(void) ++{ ++ int cpu; ++ ++ for_each_possible_cpu(cpu) ++ purge_fragmented_blocks(cpu); ++} ++ ++static void *vb_alloc(unsigned long size, gfp_t gfp_mask) ++{ ++ struct vmap_block_queue *vbq; ++ struct vmap_block *vb; ++ void *vaddr = NULL; ++ unsigned int order; ++ ++ BUG_ON(offset_in_page(size)); ++ BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC); ++ if (WARN_ON(size == 0)) { ++ /* ++ * Allocating 0 bytes isn't what caller wants since ++ * get_order(0) returns funny result. Just warn and terminate ++ * early. ++ */ ++ return NULL; ++ } ++ order = get_order(size); ++ ++ rcu_read_lock(); ++ vbq = &get_cpu_var(vmap_block_queue); ++ list_for_each_entry_rcu(vb, &vbq->free, free_list) { ++ unsigned long pages_off; ++ ++ spin_lock(&vb->lock); ++ if (vb->free < (1UL << order)) { ++ spin_unlock(&vb->lock); ++ continue; ++ } ++ ++ pages_off = VMAP_BBMAP_BITS - vb->free; ++ vaddr = vmap_block_vaddr(vb->va->va_start, pages_off); ++ vb->free -= 1UL << order; ++ if (vb->free == 0) { ++ spin_lock(&vbq->lock); ++ list_del_rcu(&vb->free_list); ++ spin_unlock(&vbq->lock); ++ } ++ ++ spin_unlock(&vb->lock); ++ break; ++ } ++ ++ put_cpu_var(vmap_block_queue); ++ rcu_read_unlock(); ++ ++ /* Allocate new block if nothing was found */ ++ if (!vaddr) ++ vaddr = new_vmap_block(order, gfp_mask); ++ ++ return vaddr; ++} ++ ++static void vb_free(const void *addr, unsigned long size) ++{ ++ unsigned long offset; ++ unsigned long vb_idx; ++ unsigned int order; ++ struct vmap_block *vb; ++ ++ BUG_ON(offset_in_page(size)); ++ BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC); ++ ++ flush_cache_vunmap((unsigned long)addr, (unsigned long)addr + size); ++ ++ order = get_order(size); ++ ++ offset = (unsigned long)addr & (VMAP_BLOCK_SIZE - 1); ++ offset >>= PAGE_SHIFT; ++ ++ vb_idx = addr_to_vb_idx((unsigned long)addr); ++ rcu_read_lock(); ++ vb = radix_tree_lookup(&vmap_block_tree, vb_idx); ++ rcu_read_unlock(); ++ BUG_ON(!vb); ++ ++ vunmap_page_range((unsigned long)addr, (unsigned long)addr + size); ++ ++ if (debug_pagealloc_enabled()) ++ flush_tlb_kernel_range((unsigned long)addr, ++ (unsigned long)addr + size); ++ ++ spin_lock(&vb->lock); ++ ++ /* Expand dirty range */ ++ vb->dirty_min = min(vb->dirty_min, offset); ++ vb->dirty_max = max(vb->dirty_max, offset + (1UL << order)); ++ ++ vb->dirty += 1UL << order; ++ if (vb->dirty == VMAP_BBMAP_BITS) { ++ BUG_ON(vb->free); ++ spin_unlock(&vb->lock); ++ free_vmap_block(vb); ++ } else ++ spin_unlock(&vb->lock); ++} ++ ++/** ++ * vm_unmap_aliases - unmap outstanding lazy aliases in the vmap layer ++ * ++ * The vmap/vmalloc layer lazily flushes kernel virtual mappings primarily ++ * to amortize TLB flushing overheads. What this means is that any page you ++ * have now, may, in a former life, have been mapped into kernel virtual ++ * address by the vmap layer and so there might be some CPUs with TLB entries ++ * still referencing that page (additional to the regular 1:1 kernel mapping). ++ * ++ * vm_unmap_aliases flushes all such lazy mappings. After it returns, we can ++ * be sure that none of the pages we have control over will have any aliases ++ * from the vmap layer. ++ */ ++void vm_unmap_aliases(void) ++{ ++ unsigned long start = ULONG_MAX, end = 0; ++ int cpu; ++ int flush = 0; ++ ++ if (unlikely(!vmap_initialized)) ++ return; ++ ++ might_sleep(); ++ ++ for_each_possible_cpu(cpu) { ++ struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu); ++ struct vmap_block *vb; ++ ++ rcu_read_lock(); ++ list_for_each_entry_rcu(vb, &vbq->free, free_list) { ++ spin_lock(&vb->lock); ++ if (vb->dirty) { ++ unsigned long va_start = vb->va->va_start; ++ unsigned long s, e; ++ ++ s = va_start + (vb->dirty_min << PAGE_SHIFT); ++ e = va_start + (vb->dirty_max << PAGE_SHIFT); ++ ++ start = min(s, start); ++ end = max(e, end); ++ ++ flush = 1; ++ } ++ spin_unlock(&vb->lock); ++ } ++ rcu_read_unlock(); ++ } ++ ++ mutex_lock(&vmap_purge_lock); ++ purge_fragmented_blocks_allcpus(); ++ if (!__purge_vmap_area_lazy(start, end) && flush) ++ flush_tlb_kernel_range(start, end); ++ mutex_unlock(&vmap_purge_lock); ++} ++EXPORT_SYMBOL_GPL(vm_unmap_aliases); ++ ++/** ++ * vm_unmap_ram - unmap linear kernel address space set up by vm_map_ram ++ * @mem: the pointer returned by vm_map_ram ++ * @count: the count passed to that vm_map_ram call (cannot unmap partial) ++ */ ++void vm_unmap_ram(const void *mem, unsigned int count) ++{ ++ unsigned long size = (unsigned long)count << PAGE_SHIFT; ++ unsigned long addr = (unsigned long)mem; ++ struct vmap_area *va; ++ ++ might_sleep(); ++ BUG_ON(!addr); ++ BUG_ON(addr < VMALLOC_START); ++ BUG_ON(addr > VMALLOC_END); ++ BUG_ON(!PAGE_ALIGNED(addr)); ++ ++ if (likely(count <= VMAP_MAX_ALLOC)) { ++ debug_check_no_locks_freed(mem, size); ++ vb_free(mem, size); ++ return; ++ } ++ ++ va = find_vmap_area(addr); ++ BUG_ON(!va); ++ debug_check_no_locks_freed((void *)va->va_start, ++ (va->va_end - va->va_start)); ++ free_unmap_vmap_area(va); ++} ++EXPORT_SYMBOL(vm_unmap_ram); ++ ++/** ++ * vm_map_ram - map pages linearly into kernel virtual address (vmalloc space) ++ * @pages: an array of pointers to the pages to be mapped ++ * @count: number of pages ++ * @node: prefer to allocate data structures on this node ++ * @prot: memory protection to use. PAGE_KERNEL for regular RAM ++ * ++ * If you use this function for less than VMAP_MAX_ALLOC pages, it could be ++ * faster than vmap so it's good. But if you mix long-life and short-life ++ * objects with vm_map_ram(), it could consume lots of address space through ++ * fragmentation (especially on a 32bit machine). You could see failures in ++ * the end. Please use this function for short-lived objects. ++ * ++ * Returns: a pointer to the address that has been mapped, or %NULL on failure ++ */ ++void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t prot) ++{ ++ unsigned long size = (unsigned long)count << PAGE_SHIFT; ++ unsigned long addr; ++ void *mem; ++ ++ if (likely(count <= VMAP_MAX_ALLOC)) { ++ mem = vb_alloc(size, GFP_KERNEL); ++ if (IS_ERR(mem)) ++ return NULL; ++ addr = (unsigned long)mem; ++ } else { ++ struct vmap_area *va; ++ va = alloc_vmap_area(size, PAGE_SIZE, ++ VMALLOC_START, VMALLOC_END, node, GFP_KERNEL); ++ if (IS_ERR(va)) ++ return NULL; ++ ++ addr = va->va_start; ++ mem = (void *)addr; ++ } ++ if (vmap_page_range(addr, addr + size, prot, pages) < 0) { ++ vm_unmap_ram(mem, count); ++ return NULL; ++ } ++ return mem; ++} ++EXPORT_SYMBOL(vm_map_ram); ++ ++static struct vm_struct *vmlist __initdata; ++/** ++ * vm_area_add_early - add vmap area early during boot ++ * @vm: vm_struct to add ++ * ++ * This function is used to add fixed kernel vm area to vmlist before ++ * vmalloc_init() is called. @vm->addr, @vm->size, and @vm->flags ++ * should contain proper values and the other fields should be zero. ++ * ++ * DO NOT USE THIS FUNCTION UNLESS YOU KNOW WHAT YOU'RE DOING. ++ */ ++void __init vm_area_add_early(struct vm_struct *vm) ++{ ++ struct vm_struct *tmp, **p; ++ ++ BUG_ON(vmap_initialized); ++ for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) { ++ if (tmp->addr >= vm->addr) { ++ BUG_ON(tmp->addr < vm->addr + vm->size); ++ break; ++ } else ++ BUG_ON(tmp->addr + tmp->size > vm->addr); ++ } ++ vm->next = *p; ++ *p = vm; ++} ++ ++/** ++ * vm_area_register_early - register vmap area early during boot ++ * @vm: vm_struct to register ++ * @align: requested alignment ++ * ++ * This function is used to register kernel vm area before ++ * vmalloc_init() is called. @vm->size and @vm->flags should contain ++ * proper values on entry and other fields should be zero. On return, ++ * vm->addr contains the allocated address. ++ * ++ * DO NOT USE THIS FUNCTION UNLESS YOU KNOW WHAT YOU'RE DOING. ++ */ ++void __init vm_area_register_early(struct vm_struct *vm, size_t align) ++{ ++ static size_t vm_init_off __initdata; ++ unsigned long addr; ++ ++ addr = ALIGN(VMALLOC_START + vm_init_off, align); ++ vm_init_off = PFN_ALIGN(addr + vm->size) - VMALLOC_START; ++ ++ vm->addr = (void *)addr; ++ ++ vm_area_add_early(vm); ++} ++ ++void __init vmalloc_init(void) ++{ ++ struct vmap_area *va; ++ struct vm_struct *tmp; ++ int i; ++ ++ for_each_possible_cpu(i) { ++ struct vmap_block_queue *vbq; ++ struct vfree_deferred *p; ++ ++ vbq = &per_cpu(vmap_block_queue, i); ++ spin_lock_init(&vbq->lock); ++ INIT_LIST_HEAD(&vbq->free); ++ p = &per_cpu(vfree_deferred, i); ++ init_llist_head(&p->list); ++ INIT_WORK(&p->wq, free_work); ++ } ++ ++ /* Import existing vmlist entries. */ ++ for (tmp = vmlist; tmp; tmp = tmp->next) { ++ va = kzalloc(sizeof(struct vmap_area), GFP_NOWAIT); ++ va->flags = VM_VM_AREA; ++ va->va_start = (unsigned long)tmp->addr; ++ va->va_end = va->va_start + tmp->size; ++ va->vm = tmp; ++ __insert_vmap_area(va); ++ } ++ ++ vmap_area_pcpu_hole = VMALLOC_END; ++ ++ vmap_initialized = true; ++} ++ ++/** ++ * map_kernel_range_noflush - map kernel VM area with the specified pages ++ * @addr: start of the VM area to map ++ * @size: size of the VM area to map ++ * @prot: page protection flags to use ++ * @pages: pages to map ++ * ++ * Map PFN_UP(@size) pages at @addr. The VM area @addr and @size ++ * specify should have been allocated using get_vm_area() and its ++ * friends. ++ * ++ * NOTE: ++ * This function does NOT do any cache flushing. The caller is ++ * responsible for calling flush_cache_vmap() on to-be-mapped areas ++ * before calling this function. ++ * ++ * RETURNS: ++ * The number of pages mapped on success, -errno on failure. ++ */ ++int map_kernel_range_noflush(unsigned long addr, unsigned long size, ++ pgprot_t prot, struct page **pages) ++{ ++ return vmap_page_range_noflush(addr, addr + size, prot, pages); ++} ++ ++/** ++ * unmap_kernel_range_noflush - unmap kernel VM area ++ * @addr: start of the VM area to unmap ++ * @size: size of the VM area to unmap ++ * ++ * Unmap PFN_UP(@size) pages at @addr. The VM area @addr and @size ++ * specify should have been allocated using get_vm_area() and its ++ * friends. ++ * ++ * NOTE: ++ * This function does NOT do any cache flushing. The caller is ++ * responsible for calling flush_cache_vunmap() on to-be-mapped areas ++ * before calling this function and flush_tlb_kernel_range() after. ++ */ ++void unmap_kernel_range_noflush(unsigned long addr, unsigned long size) ++{ ++ vunmap_page_range(addr, addr + size); ++} ++EXPORT_SYMBOL_GPL(unmap_kernel_range_noflush); ++ ++/** ++ * unmap_kernel_range - unmap kernel VM area and flush cache and TLB ++ * @addr: start of the VM area to unmap ++ * @size: size of the VM area to unmap ++ * ++ * Similar to unmap_kernel_range_noflush() but flushes vcache before ++ * the unmapping and tlb after. ++ */ ++void unmap_kernel_range(unsigned long addr, unsigned long size) ++{ ++ unsigned long end = addr + size; ++ ++ flush_cache_vunmap(addr, end); ++ vunmap_page_range(addr, end); ++ flush_tlb_kernel_range(addr, end); ++} ++EXPORT_SYMBOL_GPL(unmap_kernel_range); ++ ++int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page **pages) ++{ ++ unsigned long addr = (unsigned long)area->addr; ++ unsigned long end = addr + get_vm_area_size(area); ++ int err; ++ ++ err = vmap_page_range(addr, end, prot, pages); ++ ++ return err > 0 ? 0 : err; ++} ++EXPORT_SYMBOL_GPL(map_vm_area); ++ ++static void setup_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va, ++ unsigned long flags, const void *caller) ++{ ++ spin_lock(&vmap_area_lock); ++ vm->flags = flags; ++ vm->addr = (void *)va->va_start; ++ vm->size = va->va_end - va->va_start; ++ vm->caller = caller; ++ va->vm = vm; ++ va->flags |= VM_VM_AREA; ++ spin_unlock(&vmap_area_lock); ++} ++ ++static void clear_vm_uninitialized_flag(struct vm_struct *vm) ++{ ++ /* ++ * Before removing VM_UNINITIALIZED, ++ * we should make sure that vm has proper values. ++ * Pair with smp_rmb() in show_numa_info(). ++ */ ++ smp_wmb(); ++ vm->flags &= ~VM_UNINITIALIZED; ++} ++ ++static struct vm_struct *__get_vm_area_node(unsigned long size, ++ unsigned long align, unsigned long flags, unsigned long start, ++ unsigned long end, int node, gfp_t gfp_mask, const void *caller) ++{ ++ struct vmap_area *va; ++ struct vm_struct *area; ++ ++ BUG_ON(in_interrupt()); ++ size = PAGE_ALIGN(size); ++ if (unlikely(!size)) ++ return NULL; ++ ++ if (flags & VM_IOREMAP) ++ align = 1ul << clamp_t(int, get_count_order_long(size), ++ PAGE_SHIFT, IOREMAP_MAX_ORDER); ++ ++ area = kzalloc_node(sizeof(*area), gfp_mask & GFP_RECLAIM_MASK, node); ++ if (unlikely(!area)) ++ return NULL; ++ ++ if (!(flags & VM_NO_GUARD)) ++ size += PAGE_SIZE; ++ ++ va = alloc_vmap_area(size, align, start, end, node, gfp_mask); ++ if (IS_ERR(va)) { ++ kfree(area); ++ return NULL; ++ } ++ ++ setup_vmalloc_vm(area, va, flags, caller); ++ ++ return area; ++} ++ ++struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags, ++ unsigned long start, unsigned long end) ++{ ++ return __get_vm_area_node(size, 1, flags, start, end, NUMA_NO_NODE, ++ GFP_KERNEL, __builtin_return_address(0)); ++} ++EXPORT_SYMBOL_GPL(__get_vm_area); ++ ++struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags, ++ unsigned long start, unsigned long end, ++ const void *caller) ++{ ++ return __get_vm_area_node(size, 1, flags, start, end, NUMA_NO_NODE, ++ GFP_KERNEL, caller); ++} ++ ++/** ++ * get_vm_area - reserve a contiguous kernel virtual area ++ * @size: size of the area ++ * @flags: %VM_IOREMAP for I/O mappings or VM_ALLOC ++ * ++ * Search an area of @size in the kernel virtual mapping area, ++ * and reserved it for out purposes. Returns the area descriptor ++ * on success or %NULL on failure. ++ */ ++struct vm_struct *get_vm_area(unsigned long size, unsigned long flags) ++{ ++ return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END, ++ NUMA_NO_NODE, GFP_KERNEL, ++ __builtin_return_address(0)); ++} ++ ++struct vm_struct *get_vm_area_caller(unsigned long size, unsigned long flags, ++ const void *caller) ++{ ++ return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END, ++ NUMA_NO_NODE, GFP_KERNEL, caller); ++} ++ ++/** ++ * find_vm_area - find a continuous kernel virtual area ++ * @addr: base address ++ * ++ * Search for the kernel VM area starting at @addr, and return it. ++ * It is up to the caller to do all required locking to keep the returned ++ * pointer valid. ++ */ ++struct vm_struct *find_vm_area(const void *addr) ++{ ++ struct vmap_area *va; ++ ++ va = find_vmap_area((unsigned long)addr); ++ if (va && va->flags & VM_VM_AREA) ++ return va->vm; ++ ++ return NULL; ++} ++ ++/** ++ * remove_vm_area - find and remove a continuous kernel virtual area ++ * @addr: base address ++ * ++ * Search for the kernel VM area starting at @addr, and remove it. ++ * This function returns the found VM area, but using it is NOT safe ++ * on SMP machines, except for its size or flags. ++ */ ++struct vm_struct *remove_vm_area(const void *addr) ++{ ++ struct vmap_area *va; ++ ++ might_sleep(); ++ ++ va = find_vmap_area((unsigned long)addr); ++ if (va && va->flags & VM_VM_AREA) { ++ struct vm_struct *vm = va->vm; ++ ++ spin_lock(&vmap_area_lock); ++ va->vm = NULL; ++ va->flags &= ~VM_VM_AREA; ++ va->flags |= VM_LAZY_FREE; ++ spin_unlock(&vmap_area_lock); ++ ++ kasan_free_shadow(vm); ++ free_unmap_vmap_area(va); ++ ++ return vm; ++ } ++ return NULL; ++} ++ ++static void __vunmap(const void *addr, int deallocate_pages) ++{ ++ struct vm_struct *area; ++ ++ if (!addr) ++ return; ++ ++ if (WARN(!PAGE_ALIGNED(addr), "Trying to vfree() bad address (%p)\n", ++ addr)) ++ return; ++ ++ area = find_vm_area(addr); ++ if (unlikely(!area)) { ++ WARN(1, KERN_ERR "Trying to vfree() nonexistent vm area (%p)\n", ++ addr); ++ return; ++ } ++ ++ debug_check_no_locks_freed(area->addr, get_vm_area_size(area)); ++ debug_check_no_obj_freed(area->addr, get_vm_area_size(area)); ++ ++ remove_vm_area(addr); ++ if (deallocate_pages) { ++ int i; ++ ++ for (i = 0; i < area->nr_pages; i++) { ++ struct page *page = area->pages[i]; ++ ++ BUG_ON(!page); ++ __free_pages(page, 0); ++ } ++ ++ kvfree(area->pages); ++ } ++ ++ kfree(area); ++ return; ++} ++ ++static inline void __vfree_deferred(const void *addr) ++{ ++ /* ++ * Use raw_cpu_ptr() because this can be called from preemptible ++ * context. Preemption is absolutely fine here, because the llist_add() ++ * implementation is lockless, so it works even if we are adding to ++ * nother cpu's list. schedule_work() should be fine with this too. ++ */ ++ struct vfree_deferred *p = raw_cpu_ptr(&vfree_deferred); ++ ++ if (llist_add((struct llist_node *)addr, &p->list)) ++ schedule_work(&p->wq); ++} ++ ++/** ++ * vfree_atomic - release memory allocated by vmalloc() ++ * @addr: memory base address ++ * ++ * This one is just like vfree() but can be called in any atomic context ++ * except NMIs. ++ */ ++void vfree_atomic(const void *addr) ++{ ++ BUG_ON(in_nmi()); ++ ++ kmemleak_free(addr); ++ ++ if (!addr) ++ return; ++ __vfree_deferred(addr); ++} ++ ++/** ++ * vfree - release memory allocated by vmalloc() ++ * @addr: memory base address ++ * ++ * Free the virtually continuous memory area starting at @addr, as ++ * obtained from vmalloc(), vmalloc_32() or __vmalloc(). If @addr is ++ * NULL, no operation is performed. ++ * ++ * Must not be called in NMI context (strictly speaking, only if we don't ++ * have CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG, but making the calling ++ * conventions for vfree() arch-depenedent would be a really bad idea) ++ * ++ * NOTE: assumes that the object at @addr has a size >= sizeof(llist_node) ++ */ ++void vfree(const void *addr) ++{ ++ BUG_ON(in_nmi()); ++ ++ kmemleak_free(addr); ++ ++ if (!addr) ++ return; ++ if (unlikely(in_interrupt())) ++ __vfree_deferred(addr); ++ else ++ __vunmap(addr, 1); ++} ++EXPORT_SYMBOL(vfree); ++ ++/** ++ * vunmap - release virtual mapping obtained by vmap() ++ * @addr: memory base address ++ * ++ * Free the virtually contiguous memory area starting at @addr, ++ * which was created from the page array passed to vmap(). ++ * ++ * Must not be called in interrupt context. ++ */ ++void vunmap(const void *addr) ++{ ++ BUG_ON(in_interrupt()); ++ might_sleep(); ++ if (addr) ++ __vunmap(addr, 0); ++} ++EXPORT_SYMBOL(vunmap); ++ ++/** ++ * vmap - map an array of pages into virtually contiguous space ++ * @pages: array of page pointers ++ * @count: number of pages to map ++ * @flags: vm_area->flags ++ * @prot: page protection for the mapping ++ * ++ * Maps @count pages from @pages into contiguous kernel virtual ++ * space. ++ */ ++void *vmap(struct page **pages, unsigned int count, ++ unsigned long flags, pgprot_t prot) ++{ ++ struct vm_struct *area; ++ unsigned long size; /* In bytes */ ++ ++ might_sleep(); ++ ++ if (count > totalram_pages) ++ return NULL; ++ ++ size = (unsigned long)count << PAGE_SHIFT; ++ area = get_vm_area_caller(size, flags, __builtin_return_address(0)); ++ if (!area) ++ return NULL; ++ ++ if (map_vm_area(area, prot, pages)) { ++ vunmap(area->addr); ++ return NULL; ++ } ++ ++ return area->addr; ++} ++EXPORT_SYMBOL(vmap); ++ ++static void *__vmalloc_node(unsigned long size, unsigned long align, ++ gfp_t gfp_mask, pgprot_t prot, ++ int node, const void *caller); ++static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, ++ pgprot_t prot, int node) ++{ ++ struct page **pages; ++ unsigned int nr_pages, array_size, i; ++ const gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO; ++ const gfp_t alloc_mask = gfp_mask | __GFP_NOWARN; ++ const gfp_t highmem_mask = (gfp_mask & (GFP_DMA | GFP_DMA32)) ? ++ 0 : ++ __GFP_HIGHMEM; ++ ++ nr_pages = get_vm_area_size(area) >> PAGE_SHIFT; ++ array_size = (nr_pages * sizeof(struct page *)); ++ ++ /* Please note that the recursion is strictly bounded. */ ++ if (array_size > PAGE_SIZE) { ++ pages = __vmalloc_node(array_size, 1, nested_gfp|highmem_mask, ++ PAGE_KERNEL, node, area->caller); ++ } else { ++ pages = kmalloc_node(array_size, nested_gfp, node); ++ } ++ ++ if (!pages) { ++ remove_vm_area(area->addr); ++ kfree(area); ++ return NULL; ++ } ++ ++ area->pages = pages; ++ area->nr_pages = nr_pages; ++ ++ for (i = 0; i < area->nr_pages; i++) { ++ struct page *page; ++ ++ if (node == NUMA_NO_NODE) ++ page = alloc_page(alloc_mask|highmem_mask); ++ else ++ page = alloc_pages_node(node, alloc_mask|highmem_mask, 0); ++ ++ if (unlikely(!page)) { ++ /* Successfully allocated i pages, free them in __vunmap() */ ++ area->nr_pages = i; ++ goto fail; ++ } ++ area->pages[i] = page; ++ if (gfpflags_allow_blocking(gfp_mask|highmem_mask)) ++ cond_resched(); ++ } ++ ++ if (map_vm_area(area, prot, pages)) ++ goto fail; ++ return area->addr; ++ ++fail: ++ warn_alloc(gfp_mask, NULL, ++ "vmalloc: allocation failure, allocated %ld of %ld bytes", ++ (area->nr_pages*PAGE_SIZE), area->size); ++ vfree(area->addr); ++ return NULL; ++} ++ ++/** ++ * __vmalloc_node_range - allocate virtually contiguous memory ++ * @size: allocation size ++ * @align: desired alignment ++ * @start: vm area range start ++ * @end: vm area range end ++ * @gfp_mask: flags for the page level allocator ++ * @prot: protection mask for the allocated pages ++ * @vm_flags: additional vm area flags (e.g. %VM_NO_GUARD) ++ * @node: node to use for allocation or NUMA_NO_NODE ++ * @caller: caller's return address ++ * ++ * Allocate enough pages to cover @size from the page level ++ * allocator with @gfp_mask flags. Map them into contiguous ++ * kernel virtual space, using a pagetable protection of @prot. ++ */ ++void *__vmalloc_node_range(unsigned long size, unsigned long align, ++ unsigned long start, unsigned long end, gfp_t gfp_mask, ++ pgprot_t prot, unsigned long vm_flags, int node, ++ const void *caller) ++{ ++ struct vm_struct *area; ++ void *addr; ++ unsigned long real_size = size; ++ ++ size = PAGE_ALIGN(size); ++ if (!size || (size >> PAGE_SHIFT) > totalram_pages) ++ goto fail; ++ ++ area = __get_vm_area_node(size, align, VM_ALLOC | VM_UNINITIALIZED | ++ vm_flags, start, end, node, gfp_mask, caller); ++ if (!area) ++ goto fail; ++ ++ addr = __vmalloc_area_node(area, gfp_mask, prot, node); ++ if (!addr) ++ return NULL; ++ ++ /* ++ * First make sure the mappings are removed from all page-tables ++ * before they are freed. ++ */ ++ vmalloc_sync_unmappings(); ++ ++ /* ++ * In this function, newly allocated vm_struct has VM_UNINITIALIZED ++ * flag. It means that vm_struct is not fully initialized. ++ * Now, it is fully initialized, so remove this flag here. ++ */ ++ clear_vm_uninitialized_flag(area); ++ ++ kmemleak_vmalloc(area, size, gfp_mask); ++ ++ return addr; ++ ++fail: ++ warn_alloc(gfp_mask, NULL, ++ "vmalloc: allocation failure: %lu bytes", real_size); ++ return NULL; ++} ++ ++/** ++ * __vmalloc_node - allocate virtually contiguous memory ++ * @size: allocation size ++ * @align: desired alignment ++ * @gfp_mask: flags for the page level allocator ++ * @prot: protection mask for the allocated pages ++ * @node: node to use for allocation or NUMA_NO_NODE ++ * @caller: caller's return address ++ * ++ * Allocate enough pages to cover @size from the page level ++ * allocator with @gfp_mask flags. Map them into contiguous ++ * kernel virtual space, using a pagetable protection of @prot. ++ * ++ * Reclaim modifiers in @gfp_mask - __GFP_NORETRY, __GFP_RETRY_MAYFAIL ++ * and __GFP_NOFAIL are not supported ++ * ++ * Any use of gfp flags outside of GFP_KERNEL should be consulted ++ * with mm people. ++ * ++ */ ++static void *__vmalloc_node(unsigned long size, unsigned long align, ++ gfp_t gfp_mask, pgprot_t prot, ++ int node, const void *caller) ++{ ++ return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END, ++ gfp_mask, prot, 0, node, caller); ++} ++ ++void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot) ++{ ++ return __vmalloc_node(size, 1, gfp_mask, prot, NUMA_NO_NODE, ++ __builtin_return_address(0)); ++} ++EXPORT_SYMBOL(__vmalloc); ++ ++static inline void *__vmalloc_node_flags(unsigned long size, ++ int node, gfp_t flags) ++{ ++ return __vmalloc_node(size, 1, flags, PAGE_KERNEL, ++ node, __builtin_return_address(0)); ++} ++ ++ ++void *__vmalloc_node_flags_caller(unsigned long size, int node, gfp_t flags, ++ void *caller) ++{ ++ return __vmalloc_node(size, 1, flags, PAGE_KERNEL, node, caller); ++} ++ ++/** ++ * vmalloc - allocate virtually contiguous memory ++ * @size: allocation size ++ * Allocate enough pages to cover @size from the page level ++ * allocator and map them into contiguous kernel virtual space. ++ * ++ * For tight control over page level allocator and protection flags ++ * use __vmalloc() instead. ++ */ ++void *vmalloc(unsigned long size) ++{ ++ return __vmalloc_node_flags(size, NUMA_NO_NODE, ++ GFP_KERNEL); ++} ++EXPORT_SYMBOL(vmalloc); ++ ++/** ++ * vzalloc - allocate virtually contiguous memory with zero fill ++ * @size: allocation size ++ * Allocate enough pages to cover @size from the page level ++ * allocator and map them into contiguous kernel virtual space. ++ * The memory allocated is set to zero. ++ * ++ * For tight control over page level allocator and protection flags ++ * use __vmalloc() instead. ++ */ ++void *vzalloc(unsigned long size) ++{ ++ return __vmalloc_node_flags(size, NUMA_NO_NODE, ++ GFP_KERNEL | __GFP_ZERO); ++} ++EXPORT_SYMBOL(vzalloc); ++ ++/** ++ * vmalloc_user - allocate zeroed virtually contiguous memory for userspace ++ * @size: allocation size ++ * ++ * The resulting memory area is zeroed so it can be mapped to userspace ++ * without leaking data. ++ */ ++void *vmalloc_user(unsigned long size) ++{ ++ struct vm_struct *area; ++ void *ret; ++ ++ ret = __vmalloc_node(size, SHMLBA, ++ GFP_KERNEL | __GFP_ZERO, ++ PAGE_KERNEL, NUMA_NO_NODE, ++ __builtin_return_address(0)); ++ if (ret) { ++ area = find_vm_area(ret); ++ area->flags |= VM_USERMAP; ++ } ++ return ret; ++} ++EXPORT_SYMBOL(vmalloc_user); ++ ++/** ++ * vmalloc_node - allocate memory on a specific node ++ * @size: allocation size ++ * @node: numa node ++ * ++ * Allocate enough pages to cover @size from the page level ++ * allocator and map them into contiguous kernel virtual space. ++ * ++ * For tight control over page level allocator and protection flags ++ * use __vmalloc() instead. ++ */ ++void *vmalloc_node(unsigned long size, int node) ++{ ++ return __vmalloc_node(size, 1, GFP_KERNEL, PAGE_KERNEL, ++ node, __builtin_return_address(0)); ++} ++EXPORT_SYMBOL(vmalloc_node); ++ ++/** ++ * vzalloc_node - allocate memory on a specific node with zero fill ++ * @size: allocation size ++ * @node: numa node ++ * ++ * Allocate enough pages to cover @size from the page level ++ * allocator and map them into contiguous kernel virtual space. ++ * The memory allocated is set to zero. ++ * ++ * For tight control over page level allocator and protection flags ++ * use __vmalloc_node() instead. ++ */ ++void *vzalloc_node(unsigned long size, int node) ++{ ++ return __vmalloc_node_flags(size, node, ++ GFP_KERNEL | __GFP_ZERO); ++} ++EXPORT_SYMBOL(vzalloc_node); ++ ++/** ++ * vmalloc_exec - allocate virtually contiguous, executable memory ++ * @size: allocation size ++ * ++ * Kernel-internal function to allocate enough pages to cover @size ++ * the page level allocator and map them into contiguous and ++ * executable kernel virtual space. ++ * ++ * For tight control over page level allocator and protection flags ++ * use __vmalloc() instead. ++ */ ++ ++void *vmalloc_exec(unsigned long size) ++{ ++ return __vmalloc_node(size, 1, GFP_KERNEL, PAGE_KERNEL_EXEC, ++ NUMA_NO_NODE, __builtin_return_address(0)); ++} ++ ++#if defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA32) ++#define GFP_VMALLOC32 (GFP_DMA32 | GFP_KERNEL) ++#elif defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA) ++#define GFP_VMALLOC32 (GFP_DMA | GFP_KERNEL) ++#else ++/* ++ * 64b systems should always have either DMA or DMA32 zones. For others ++ * GFP_DMA32 should do the right thing and use the normal zone. ++ */ ++#define GFP_VMALLOC32 GFP_DMA32 | GFP_KERNEL ++#endif ++ ++/** ++ * vmalloc_32 - allocate virtually contiguous memory (32bit addressable) ++ * @size: allocation size ++ * ++ * Allocate enough 32bit PA addressable pages to cover @size from the ++ * page level allocator and map them into contiguous kernel virtual space. ++ */ ++void *vmalloc_32(unsigned long size) ++{ ++ return __vmalloc_node(size, 1, GFP_VMALLOC32, PAGE_KERNEL, ++ NUMA_NO_NODE, __builtin_return_address(0)); ++} ++EXPORT_SYMBOL(vmalloc_32); ++ ++/** ++ * vmalloc_32_user - allocate zeroed virtually contiguous 32bit memory ++ * @size: allocation size ++ * ++ * The resulting memory area is 32bit addressable and zeroed so it can be ++ * mapped to userspace without leaking data. ++ */ ++void *vmalloc_32_user(unsigned long size) ++{ ++ struct vm_struct *area; ++ void *ret; ++ ++ ret = __vmalloc_node(size, 1, GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL, ++ NUMA_NO_NODE, __builtin_return_address(0)); ++ if (ret) { ++ area = find_vm_area(ret); ++ area->flags |= VM_USERMAP; ++ } ++ return ret; ++} ++EXPORT_SYMBOL(vmalloc_32_user); ++ ++/* ++ * small helper routine , copy contents to buf from addr. ++ * If the page is not present, fill zero. ++ */ ++ ++static int aligned_vread(char *buf, char *addr, unsigned long count) ++{ ++ struct page *p; ++ int copied = 0; ++ ++ while (count) { ++ unsigned long offset, length; ++ ++ offset = offset_in_page(addr); ++ length = PAGE_SIZE - offset; ++ if (length > count) ++ length = count; ++ p = vmalloc_to_page(addr); ++ /* ++ * To do safe access to this _mapped_ area, we need ++ * lock. But adding lock here means that we need to add ++ * overhead of vmalloc()/vfree() calles for this _debug_ ++ * interface, rarely used. Instead of that, we'll use ++ * kmap() and get small overhead in this access function. ++ */ ++ if (p) { ++ /* ++ * we can expect USER0 is not used (see vread/vwrite's ++ * function description) ++ */ ++ void *map = kmap_atomic(p); ++ memcpy(buf, map + offset, length); ++ kunmap_atomic(map); ++ } else ++ memset(buf, 0, length); ++ ++ addr += length; ++ buf += length; ++ copied += length; ++ count -= length; ++ } ++ return copied; ++} ++ ++static int aligned_vwrite(char *buf, char *addr, unsigned long count) ++{ ++ struct page *p; ++ int copied = 0; ++ ++ while (count) { ++ unsigned long offset, length; ++ ++ offset = offset_in_page(addr); ++ length = PAGE_SIZE - offset; ++ if (length > count) ++ length = count; ++ p = vmalloc_to_page(addr); ++ /* ++ * To do safe access to this _mapped_ area, we need ++ * lock. But adding lock here means that we need to add ++ * overhead of vmalloc()/vfree() calles for this _debug_ ++ * interface, rarely used. Instead of that, we'll use ++ * kmap() and get small overhead in this access function. ++ */ ++ if (p) { ++ /* ++ * we can expect USER0 is not used (see vread/vwrite's ++ * function description) ++ */ ++ void *map = kmap_atomic(p); ++ memcpy(map + offset, buf, length); ++ kunmap_atomic(map); ++ } ++ addr += length; ++ buf += length; ++ copied += length; ++ count -= length; ++ } ++ return copied; ++} ++ ++/** ++ * vread() - read vmalloc area in a safe way. ++ * @buf: buffer for reading data ++ * @addr: vm address. ++ * @count: number of bytes to be read. ++ * ++ * Returns # of bytes which addr and buf should be increased. ++ * (same number to @count). Returns 0 if [addr...addr+count) doesn't ++ * includes any intersect with alive vmalloc area. ++ * ++ * This function checks that addr is a valid vmalloc'ed area, and ++ * copy data from that area to a given buffer. If the given memory range ++ * of [addr...addr+count) includes some valid address, data is copied to ++ * proper area of @buf. If there are memory holes, they'll be zero-filled. ++ * IOREMAP area is treated as memory hole and no copy is done. ++ * ++ * If [addr...addr+count) doesn't includes any intersects with alive ++ * vm_struct area, returns 0. @buf should be kernel's buffer. ++ * ++ * Note: In usual ops, vread() is never necessary because the caller ++ * should know vmalloc() area is valid and can use memcpy(). ++ * This is for routines which have to access vmalloc area without ++ * any informaion, as /dev/kmem. ++ * ++ */ ++ ++long vread(char *buf, char *addr, unsigned long count) ++{ ++ struct vmap_area *va; ++ struct vm_struct *vm; ++ char *vaddr, *buf_start = buf; ++ unsigned long buflen = count; ++ unsigned long n; ++ ++ /* Don't allow overflow */ ++ if ((unsigned long) addr + count < count) ++ count = -(unsigned long) addr; ++ ++ spin_lock(&vmap_area_lock); ++ list_for_each_entry(va, &vmap_area_list, list) { ++ if (!count) ++ break; ++ ++ if (!(va->flags & VM_VM_AREA)) ++ continue; ++ ++ vm = va->vm; ++ vaddr = (char *) vm->addr; ++ if (addr >= vaddr + get_vm_area_size(vm)) ++ continue; ++ while (addr < vaddr) { ++ if (count == 0) ++ goto finished; ++ *buf = '\0'; ++ buf++; ++ addr++; ++ count--; ++ } ++ n = vaddr + get_vm_area_size(vm) - addr; ++ if (n > count) ++ n = count; ++ if (!(vm->flags & VM_IOREMAP)) ++ aligned_vread(buf, addr, n); ++ else /* IOREMAP area is treated as memory hole */ ++ memset(buf, 0, n); ++ buf += n; ++ addr += n; ++ count -= n; ++ } ++finished: ++ spin_unlock(&vmap_area_lock); ++ ++ if (buf == buf_start) ++ return 0; ++ /* zero-fill memory holes */ ++ if (buf != buf_start + buflen) ++ memset(buf, 0, buflen - (buf - buf_start)); ++ ++ return buflen; ++} ++ ++/** ++ * vwrite() - write vmalloc area in a safe way. ++ * @buf: buffer for source data ++ * @addr: vm address. ++ * @count: number of bytes to be read. ++ * ++ * Returns # of bytes which addr and buf should be incresed. ++ * (same number to @count). ++ * If [addr...addr+count) doesn't includes any intersect with valid ++ * vmalloc area, returns 0. ++ * ++ * This function checks that addr is a valid vmalloc'ed area, and ++ * copy data from a buffer to the given addr. If specified range of ++ * [addr...addr+count) includes some valid address, data is copied from ++ * proper area of @buf. If there are memory holes, no copy to hole. ++ * IOREMAP area is treated as memory hole and no copy is done. ++ * ++ * If [addr...addr+count) doesn't includes any intersects with alive ++ * vm_struct area, returns 0. @buf should be kernel's buffer. ++ * ++ * Note: In usual ops, vwrite() is never necessary because the caller ++ * should know vmalloc() area is valid and can use memcpy(). ++ * This is for routines which have to access vmalloc area without ++ * any informaion, as /dev/kmem. ++ */ ++ ++long vwrite(char *buf, char *addr, unsigned long count) ++{ ++ struct vmap_area *va; ++ struct vm_struct *vm; ++ char *vaddr; ++ unsigned long n, buflen; ++ int copied = 0; ++ ++ /* Don't allow overflow */ ++ if ((unsigned long) addr + count < count) ++ count = -(unsigned long) addr; ++ buflen = count; ++ ++ spin_lock(&vmap_area_lock); ++ list_for_each_entry(va, &vmap_area_list, list) { ++ if (!count) ++ break; ++ ++ if (!(va->flags & VM_VM_AREA)) ++ continue; ++ ++ vm = va->vm; ++ vaddr = (char *) vm->addr; ++ if (addr >= vaddr + get_vm_area_size(vm)) ++ continue; ++ while (addr < vaddr) { ++ if (count == 0) ++ goto finished; ++ buf++; ++ addr++; ++ count--; ++ } ++ n = vaddr + get_vm_area_size(vm) - addr; ++ if (n > count) ++ n = count; ++ if (!(vm->flags & VM_IOREMAP)) { ++ aligned_vwrite(buf, addr, n); ++ copied++; ++ } ++ buf += n; ++ addr += n; ++ count -= n; ++ } ++finished: ++ spin_unlock(&vmap_area_lock); ++ if (!copied) ++ return 0; ++ return buflen; ++} ++ ++/** ++ * remap_vmalloc_range_partial - map vmalloc pages to userspace ++ * @vma: vma to cover ++ * @uaddr: target user address to start at ++ * @kaddr: virtual address of vmalloc kernel memory ++ * @pgoff: offset from @kaddr to start at ++ * @size: size of map area ++ * ++ * Returns: 0 for success, -Exxx on failure ++ * ++ * This function checks that @kaddr is a valid vmalloc'ed area, ++ * and that it is big enough to cover the range starting at ++ * @uaddr in @vma. Will return failure if that criteria isn't ++ * met. ++ * ++ * Similar to remap_pfn_range() (see mm/memory.c) ++ */ ++int remap_vmalloc_range_partial(struct vm_area_struct *vma, unsigned long uaddr, ++ void *kaddr, unsigned long pgoff, ++ unsigned long size) ++{ ++ struct vm_struct *area; ++ unsigned long off; ++ unsigned long end_index; ++ ++ if (check_shl_overflow(pgoff, PAGE_SHIFT, &off)) ++ return -EINVAL; ++ ++ size = PAGE_ALIGN(size); ++ ++ if (!PAGE_ALIGNED(uaddr) || !PAGE_ALIGNED(kaddr)) ++ return -EINVAL; ++ ++ area = find_vm_area(kaddr); ++ if (!area) ++ return -EINVAL; ++ ++ if (!(area->flags & VM_USERMAP)) ++ return -EINVAL; ++ ++ if (check_add_overflow(size, off, &end_index) || ++ end_index > get_vm_area_size(area)) ++ return -EINVAL; ++ kaddr += off; ++ ++ do { ++ struct page *page = vmalloc_to_page(kaddr); ++ int ret; ++ ++ ret = vm_insert_page(vma, uaddr, page); ++ if (ret) ++ return ret; ++ ++ uaddr += PAGE_SIZE; ++ kaddr += PAGE_SIZE; ++ size -= PAGE_SIZE; ++ } while (size > 0); ++ ++ vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; ++ ++ return 0; ++} ++EXPORT_SYMBOL(remap_vmalloc_range_partial); ++ ++/** ++ * remap_vmalloc_range - map vmalloc pages to userspace ++ * @vma: vma to cover (map full range of vma) ++ * @addr: vmalloc memory ++ * @pgoff: number of pages into addr before first page to map ++ * ++ * Returns: 0 for success, -Exxx on failure ++ * ++ * This function checks that addr is a valid vmalloc'ed area, and ++ * that it is big enough to cover the vma. Will return failure if ++ * that criteria isn't met. ++ * ++ * Similar to remap_pfn_range() (see mm/memory.c) ++ */ ++int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, ++ unsigned long pgoff) ++{ ++ return remap_vmalloc_range_partial(vma, vma->vm_start, ++ addr, pgoff, ++ vma->vm_end - vma->vm_start); ++} ++EXPORT_SYMBOL(remap_vmalloc_range); ++ ++/* ++ * Implement stubs for vmalloc_sync_[un]mappings () if the architecture chose ++ * not to have one. ++ * ++ * The purpose of this function is to make sure the vmalloc area ++ * mappings are identical in all page-tables in the system. ++ */ ++void __weak vmalloc_sync_mappings(void) ++{ ++} ++ ++void __weak vmalloc_sync_unmappings(void) ++{ ++} ++ ++static int f(pte_t *pte, pgtable_t table, unsigned long addr, void *data) ++{ ++ pte_t ***p = data; ++ ++ if (p) { ++ *(*p) = pte; ++ (*p)++; ++ } ++ return 0; ++} ++ ++/** ++ * alloc_vm_area - allocate a range of kernel address space ++ * @size: size of the area ++ * @ptes: returns the PTEs for the address space ++ * ++ * Returns: NULL on failure, vm_struct on success ++ * ++ * This function reserves a range of kernel address space, and ++ * allocates pagetables to map that range. No actual mappings ++ * are created. ++ * ++ * If @ptes is non-NULL, pointers to the PTEs (in init_mm) ++ * allocated for the VM area are returned. ++ */ ++struct vm_struct *alloc_vm_area(size_t size, pte_t **ptes) ++{ ++ struct vm_struct *area; ++ ++ area = get_vm_area_caller(size, VM_IOREMAP, ++ __builtin_return_address(0)); ++ if (area == NULL) ++ return NULL; ++ ++ /* ++ * This ensures that page tables are constructed for this region ++ * of kernel virtual address space and mapped into init_mm. ++ */ ++ if (apply_to_page_range(&init_mm, (unsigned long)area->addr, ++ size, f, ptes ? &ptes : NULL)) { ++ free_vm_area(area); ++ return NULL; ++ } ++ ++ return area; ++} ++EXPORT_SYMBOL_GPL(alloc_vm_area); ++ ++void free_vm_area(struct vm_struct *area) ++{ ++ struct vm_struct *ret; ++ ret = remove_vm_area(area->addr); ++ BUG_ON(ret != area); ++ kfree(area); ++} ++EXPORT_SYMBOL_GPL(free_vm_area); ++ ++#ifdef CONFIG_SMP ++static struct vmap_area *node_to_va(struct rb_node *n) ++{ ++ return rb_entry_safe(n, struct vmap_area, rb_node); ++} ++ ++/** ++ * pvm_find_next_prev - find the next and prev vmap_area surrounding @end ++ * @end: target address ++ * @pnext: out arg for the next vmap_area ++ * @pprev: out arg for the previous vmap_area ++ * ++ * Returns: %true if either or both of next and prev are found, ++ * %false if no vmap_area exists ++ * ++ * Find vmap_areas end addresses of which enclose @end. ie. if not ++ * NULL, *pnext->va_end > @end and *pprev->va_end <= @end. ++ */ ++static bool pvm_find_next_prev(unsigned long end, ++ struct vmap_area **pnext, ++ struct vmap_area **pprev) ++{ ++ struct rb_node *n = vmap_area_root.rb_node; ++ struct vmap_area *va = NULL; ++ ++ while (n) { ++ va = rb_entry(n, struct vmap_area, rb_node); ++ if (end < va->va_end) ++ n = n->rb_left; ++ else if (end > va->va_end) ++ n = n->rb_right; ++ else ++ break; ++ } ++ ++ if (!va) ++ return false; ++ ++ if (va->va_end > end) { ++ *pnext = va; ++ *pprev = node_to_va(rb_prev(&(*pnext)->rb_node)); ++ } else { ++ *pprev = va; ++ *pnext = node_to_va(rb_next(&(*pprev)->rb_node)); ++ } ++ return true; ++} ++ ++/** ++ * pvm_determine_end - find the highest aligned address between two vmap_areas ++ * @pnext: in/out arg for the next vmap_area ++ * @pprev: in/out arg for the previous vmap_area ++ * @align: alignment ++ * ++ * Returns: determined end address ++ * ++ * Find the highest aligned address between *@pnext and *@pprev below ++ * VMALLOC_END. *@pnext and *@pprev are adjusted so that the aligned ++ * down address is between the end addresses of the two vmap_areas. ++ * ++ * Please note that the address returned by this function may fall ++ * inside *@pnext vmap_area. The caller is responsible for checking ++ * that. ++ */ ++static unsigned long pvm_determine_end(struct vmap_area **pnext, ++ struct vmap_area **pprev, ++ unsigned long align) ++{ ++ const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1); ++ unsigned long addr; ++ ++ if (*pnext) ++ addr = min((*pnext)->va_start & ~(align - 1), vmalloc_end); ++ else ++ addr = vmalloc_end; ++ ++ while (*pprev && (*pprev)->va_end > addr) { ++ *pnext = *pprev; ++ *pprev = node_to_va(rb_prev(&(*pnext)->rb_node)); ++ } ++ ++ return addr; ++} ++ ++/** ++ * pcpu_get_vm_areas - allocate vmalloc areas for percpu allocator ++ * @offsets: array containing offset of each area ++ * @sizes: array containing size of each area ++ * @nr_vms: the number of areas to allocate ++ * @align: alignment, all entries in @offsets and @sizes must be aligned to this ++ * ++ * Returns: kmalloc'd vm_struct pointer array pointing to allocated ++ * vm_structs on success, %NULL on failure ++ * ++ * Percpu allocator wants to use congruent vm areas so that it can ++ * maintain the offsets among percpu areas. This function allocates ++ * congruent vmalloc areas for it with GFP_KERNEL. These areas tend to ++ * be scattered pretty far, distance between two areas easily going up ++ * to gigabytes. To avoid interacting with regular vmallocs, these ++ * areas are allocated from top. ++ * ++ * Despite its complicated look, this allocator is rather simple. It ++ * does everything top-down and scans areas from the end looking for ++ * matching slot. While scanning, if any of the areas overlaps with ++ * existing vmap_area, the base address is pulled down to fit the ++ * area. Scanning is repeated till all the areas fit and then all ++ * necessary data structures are inserted and the result is returned. ++ */ ++struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets, ++ const size_t *sizes, int nr_vms, ++ size_t align) ++{ ++ const unsigned long vmalloc_start = ALIGN(VMALLOC_START, align); ++ const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1); ++ struct vmap_area **vas, *prev, *next; ++ struct vm_struct **vms; ++ int area, area2, last_area, term_area; ++ unsigned long base, start, end, last_end; ++ bool purged = false; ++ ++ /* verify parameters and allocate data structures */ ++ BUG_ON(offset_in_page(align) || !is_power_of_2(align)); ++ for (last_area = 0, area = 0; area < nr_vms; area++) { ++ start = offsets[area]; ++ end = start + sizes[area]; ++ ++ /* is everything aligned properly? */ ++ BUG_ON(!IS_ALIGNED(offsets[area], align)); ++ BUG_ON(!IS_ALIGNED(sizes[area], align)); ++ ++ /* detect the area with the highest address */ ++ if (start > offsets[last_area]) ++ last_area = area; ++ ++ for (area2 = area + 1; area2 < nr_vms; area2++) { ++ unsigned long start2 = offsets[area2]; ++ unsigned long end2 = start2 + sizes[area2]; ++ ++ BUG_ON(start2 < end && start < end2); ++ } ++ } ++ last_end = offsets[last_area] + sizes[last_area]; ++ ++ if (vmalloc_end - vmalloc_start < last_end) { ++ WARN_ON(true); ++ return NULL; ++ } ++ ++ vms = kcalloc(nr_vms, sizeof(vms[0]), GFP_KERNEL); ++ vas = kcalloc(nr_vms, sizeof(vas[0]), GFP_KERNEL); ++ if (!vas || !vms) ++ goto err_free2; ++ ++ for (area = 0; area < nr_vms; area++) { ++ vas[area] = kzalloc(sizeof(struct vmap_area), GFP_KERNEL); ++ vms[area] = kzalloc(sizeof(struct vm_struct), GFP_KERNEL); ++ if (!vas[area] || !vms[area]) ++ goto err_free; ++ } ++retry: ++ spin_lock(&vmap_area_lock); ++ ++ /* start scanning - we scan from the top, begin with the last area */ ++ area = term_area = last_area; ++ start = offsets[area]; ++ end = start + sizes[area]; ++ ++ if (!pvm_find_next_prev(vmap_area_pcpu_hole, &next, &prev)) { ++ base = vmalloc_end - last_end; ++ goto found; ++ } ++ base = pvm_determine_end(&next, &prev, align) - end; ++ ++ while (true) { ++ BUG_ON(next && next->va_end <= base + end); ++ BUG_ON(prev && prev->va_end > base + end); ++ ++ /* ++ * base might have underflowed, add last_end before ++ * comparing. ++ */ ++ if (base + last_end < vmalloc_start + last_end) { ++ spin_unlock(&vmap_area_lock); ++ if (!purged) { ++ purge_vmap_area_lazy(); ++ purged = true; ++ goto retry; ++ } ++ goto err_free; ++ } ++ ++ /* ++ * If next overlaps, move base downwards so that it's ++ * right below next and then recheck. ++ */ ++ if (next && next->va_start < base + end) { ++ base = pvm_determine_end(&next, &prev, align) - end; ++ term_area = area; ++ continue; ++ } ++ ++ /* ++ * If prev overlaps, shift down next and prev and move ++ * base so that it's right below new next and then ++ * recheck. ++ */ ++ if (prev && prev->va_end > base + start) { ++ next = prev; ++ prev = node_to_va(rb_prev(&next->rb_node)); ++ base = pvm_determine_end(&next, &prev, align) - end; ++ term_area = area; ++ continue; ++ } ++ ++ /* ++ * This area fits, move on to the previous one. If ++ * the previous one is the terminal one, we're done. ++ */ ++ area = (area + nr_vms - 1) % nr_vms; ++ if (area == term_area) ++ break; ++ start = offsets[area]; ++ end = start + sizes[area]; ++ pvm_find_next_prev(base + end, &next, &prev); ++ } ++found: ++ /* we've found a fitting base, insert all va's */ ++ for (area = 0; area < nr_vms; area++) { ++ struct vmap_area *va = vas[area]; ++ ++ va->va_start = base + offsets[area]; ++ va->va_end = va->va_start + sizes[area]; ++ __insert_vmap_area(va); ++ } ++ ++ vmap_area_pcpu_hole = base + offsets[last_area]; ++ ++ spin_unlock(&vmap_area_lock); ++ ++ /* insert all vm's */ ++ for (area = 0; area < nr_vms; area++) ++ setup_vmalloc_vm(vms[area], vas[area], VM_ALLOC, ++ pcpu_get_vm_areas); ++ ++ kfree(vas); ++ return vms; ++ ++err_free: ++ for (area = 0; area < nr_vms; area++) { ++ kfree(vas[area]); ++ kfree(vms[area]); ++ } ++err_free2: ++ kfree(vas); ++ kfree(vms); ++ return NULL; ++} ++ ++/** ++ * pcpu_free_vm_areas - free vmalloc areas for percpu allocator ++ * @vms: vm_struct pointer array returned by pcpu_get_vm_areas() ++ * @nr_vms: the number of allocated areas ++ * ++ * Free vm_structs and the array allocated by pcpu_get_vm_areas(). ++ */ ++void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms) ++{ ++ int i; ++ ++ for (i = 0; i < nr_vms; i++) ++ free_vm_area(vms[i]); ++ kfree(vms); ++} ++#endif /* CONFIG_SMP */ ++ ++#ifdef CONFIG_PROC_FS ++static void *s_start(struct seq_file *m, loff_t *pos) ++ __acquires(&vmap_area_lock) ++{ ++ spin_lock(&vmap_area_lock); ++ return seq_list_start(&vmap_area_list, *pos); ++} ++ ++static void *s_next(struct seq_file *m, void *p, loff_t *pos) ++{ ++ return seq_list_next(p, &vmap_area_list, pos); ++} ++ ++static void s_stop(struct seq_file *m, void *p) ++ __releases(&vmap_area_lock) ++{ ++ spin_unlock(&vmap_area_lock); ++} ++ ++static void show_numa_info(struct seq_file *m, struct vm_struct *v) ++{ ++ if (IS_ENABLED(CONFIG_NUMA)) { ++ unsigned int nr, *counters = m->private; ++ ++ if (!counters) ++ return; ++ ++ if (v->flags & VM_UNINITIALIZED) ++ return; ++ /* Pair with smp_wmb() in clear_vm_uninitialized_flag() */ ++ smp_rmb(); ++ ++ memset(counters, 0, nr_node_ids * sizeof(unsigned int)); ++ ++ for (nr = 0; nr < v->nr_pages; nr++) ++ counters[page_to_nid(v->pages[nr])]++; ++ ++ for_each_node_state(nr, N_HIGH_MEMORY) ++ if (counters[nr]) ++ seq_printf(m, " N%u=%u", nr, counters[nr]); ++ } ++} ++ ++static int s_show(struct seq_file *m, void *p) ++{ ++ struct vmap_area *va; ++ struct vm_struct *v; ++ ++ va = list_entry(p, struct vmap_area, list); ++ ++ /* ++ * s_show can encounter race with remove_vm_area, !VM_VM_AREA on ++ * behalf of vmap area is being tear down or vm_map_ram allocation. ++ */ ++ if (!(va->flags & VM_VM_AREA)) { ++ seq_printf(m, "0x%pK-0x%pK %7ld %s\n", ++ (void *)va->va_start, (void *)va->va_end, ++ va->va_end - va->va_start, ++ va->flags & VM_LAZY_FREE ? "unpurged vm_area" : "vm_map_ram"); ++ ++ return 0; ++ } ++ ++ v = va->vm; ++ ++ seq_printf(m, "0x%pK-0x%pK %7ld", ++ v->addr, v->addr + v->size, v->size); ++ ++ if (v->caller) ++ seq_printf(m, " %pS", v->caller); ++ ++ if (v->nr_pages) ++ seq_printf(m, " pages=%d", v->nr_pages); ++ ++ if (v->phys_addr) ++ seq_printf(m, " phys=%pa", &v->phys_addr); ++ ++ if (v->flags & VM_IOREMAP) ++ seq_puts(m, " ioremap"); ++ ++ if (v->flags & VM_ALLOC) ++ seq_puts(m, " vmalloc"); ++ ++ if (v->flags & VM_MAP) ++ seq_puts(m, " vmap"); ++ ++ if (v->flags & VM_USERMAP) ++ seq_puts(m, " user"); ++ ++ if (is_vmalloc_addr(v->pages)) ++ seq_puts(m, " vpages"); ++ ++ show_numa_info(m, v); ++ seq_putc(m, '\n'); ++ return 0; ++} ++ ++static const struct seq_operations vmalloc_op = { ++ .start = s_start, ++ .next = s_next, ++ .stop = s_stop, ++ .show = s_show, ++}; ++ ++static int __init proc_vmalloc_init(void) ++{ ++ if (IS_ENABLED(CONFIG_NUMA)) ++ proc_create_seq_private("vmallocinfo", 0400, NULL, ++ &vmalloc_op, ++ nr_node_ids * sizeof(unsigned int), NULL); ++ else ++ proc_create_seq("vmallocinfo", 0400, NULL, &vmalloc_op); ++ return 0; ++} ++module_init(proc_vmalloc_init); ++ ++#endif ++ diff --git a/kernel.spec b/kernel.spec index cbe324ac4cc75dc5f2e2ce693cd3dcf8f27e9fd8..40d13d4fd07e9a10f21abec2c46436a23308a0da 100644 --- a/kernel.spec +++ b/kernel.spec @@ -24,7 +24,7 @@ Name: kernel-xeno Version: 4.19.90 -Release: %{hulkrelease}.0053.3.xeno3.1 +Release: %{hulkrelease}.0053.4.xeno3.1 Summary: Linux Kernel License: GPLv2 URL: http://www.kernel.org/ @@ -36,13 +36,20 @@ Source0: linux-%{version}.tar.gz#/kernel.tar.gz Source10: sign-modules Source11: x509.genkey Source12: extra_certificates -Patch0: ipipe-core-4.19.55-oe1.patch -Patch1: enable_irq.patch -Patch2: cobalt-core-3.1-4.19.90.patch -Patch3: cobalt-core-3.1-4.19.90-oe1.patch +Patch0: ipipe-core-4.19.55-oe1_arm64.patch +Patch1: enable_irq_arm64.patch +Patch2: cobalt-core-3.1-4.19.90_arm64.patch +Patch3: cobalt-core-3.1-4.19.90-oe1_arm64.patch Patch4: openeuler_defconfig_arm64.patch Patch5: openeuler_defconfig_arm64_2.patch +Patch1000: ipipe-core-4.19.90-oe1_x86.patch +Patch1001: cobalt-core-3.1-4.19.90_x86.patch +Patch1002: cobalt-core-3.1-4.19.90-oe1_x86.patch +Patch1003: openeuler_defconfig_x86.patch +Patch1004: openeuler_defconfig_x86_2.patch + + %if 0%{?with_kabichk} Source18: check-kabi Source20: Module.kabi_aarch64 @@ -263,12 +270,22 @@ Applypatches() Applypatches series.conf %{_builddir}/kernel-%{version}/linux-%{KernelVer} %endif +%ifarch aarch64 %patch0 -p1 %patch1 -p1 %patch2 -p1 %patch3 -p1 %patch4 -p1 %patch5 -p1 +%endif + +%ifarch x86_64 +%patch1000 -p1 +%patch1001 -p1 +%patch1002 -p1 +%patch1003 -p1 +%patch1004 -p1 +%endif touch .scmversion @@ -815,6 +832,8 @@ fi %endif %changelog +* Tue Jun 1 2021 dinglili - 4.19.90-2012.4.0.0053.4 +- modify src to support multi arch:arm x86 * Tue May 11 2021 dinglili - 4.19.90-2012.4.0.0053.2 - openeuler_defconfig:update the config to support xenomai - cobalt diff --git a/openeuler_defconfig_x86.patch b/openeuler_defconfig_x86.patch new file mode 100755 index 0000000000000000000000000000000000000000..6a4ef2a51b6312300eaf48da3c685001c179621b --- /dev/null +++ b/openeuler_defconfig_x86.patch @@ -0,0 +1,7264 @@ +--- kernel/arch/x86/configs/openeuler_defconfig 2020-12-21 21:59:17.000000000 +0800 ++++ kernel-new/arch/x86/configs/openeuler_defconfig 2021-04-29 14:39:17.056189482 +0800 +@@ -1,135 +1,24 @@ +-CONFIG_CC_IS_GCC=y +-CONFIG_GCC_VERSION=50400 +-CONFIG_CLANG_VERSION=0 +-CONFIG_CC_HAS_ASM_GOTO=y +-CONFIG_IRQ_WORK=y +-CONFIG_BUILDTIME_EXTABLE_SORT=y +-CONFIG_THREAD_INFO_IN_TASK=y +- +-# +-# General setup +-# +-CONFIG_INIT_ENV_ARG_LIMIT=32 +-# CONFIG_COMPILE_TEST is not set +-CONFIG_LOCALVERSION="" + # CONFIG_LOCALVERSION_AUTO is not set +-CONFIG_BUILD_SALT="" +-CONFIG_HAVE_KERNEL_GZIP=y +-CONFIG_HAVE_KERNEL_BZIP2=y +-CONFIG_HAVE_KERNEL_LZMA=y +-CONFIG_HAVE_KERNEL_XZ=y +-CONFIG_HAVE_KERNEL_LZO=y +-CONFIG_HAVE_KERNEL_LZ4=y +-CONFIG_KERNEL_GZIP=y +-# CONFIG_KERNEL_BZIP2 is not set +-# CONFIG_KERNEL_LZMA is not set +-# CONFIG_KERNEL_XZ is not set +-# CONFIG_KERNEL_LZO is not set +-# CONFIG_KERNEL_LZ4 is not set +-CONFIG_DEFAULT_HOSTNAME="(none)" +-CONFIG_SWAP=y + CONFIG_SYSVIPC=y +-CONFIG_SYSVIPC_SYSCTL=y + CONFIG_POSIX_MQUEUE=y +-CONFIG_POSIX_MQUEUE_SYSCTL=y +-CONFIG_CROSS_MEMORY_ATTACH=y + # CONFIG_USELIB is not set +-CONFIG_AUDIT=y +-CONFIG_HAVE_ARCH_AUDITSYSCALL=y +-CONFIG_AUDITSYSCALL=y +-CONFIG_AUDIT_WATCH=y +-CONFIG_AUDIT_TREE=y + # CONFIG_KTASK is not set +- +-# +-# IRQ subsystem +-# +-CONFIG_GENERIC_IRQ_PROBE=y +-CONFIG_GENERIC_IRQ_SHOW=y +-CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK=y +-CONFIG_GENERIC_PENDING_IRQ=y +-CONFIG_GENERIC_IRQ_MIGRATION=y +-CONFIG_IRQ_DOMAIN=y +-CONFIG_IRQ_DOMAIN_HIERARCHY=y +-CONFIG_GENERIC_MSI_IRQ=y +-CONFIG_GENERIC_MSI_IRQ_DOMAIN=y +-CONFIG_GENERIC_IRQ_MATRIX_ALLOCATOR=y +-CONFIG_GENERIC_IRQ_RESERVATION_MODE=y +-CONFIG_IRQ_FORCED_THREADING=y +-CONFIG_SPARSE_IRQ=y +-# CONFIG_GENERIC_IRQ_DEBUGFS is not set +-CONFIG_CLOCKSOURCE_WATCHDOG=y +-CONFIG_ARCH_CLOCKSOURCE_DATA=y +-CONFIG_CLOCKSOURCE_VALIDATE_LAST_CYCLE=y +-CONFIG_GENERIC_TIME_VSYSCALL=y +-CONFIG_GENERIC_CLOCKEVENTS=y +-CONFIG_GENERIC_CLOCKEVENTS_BROADCAST=y +-CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST=y +-CONFIG_GENERIC_CMOS_UPDATE=y +- +-# +-# Timers subsystem +-# +-CONFIG_TICK_ONESHOT=y +-CONFIG_NO_HZ_COMMON=y +-# CONFIG_HZ_PERIODIC is not set +-# CONFIG_NO_HZ_IDLE is not set +-CONFIG_NO_HZ_FULL=y + CONFIG_NO_HZ=y + CONFIG_HIGH_RES_TIMERS=y +-# CONFIG_PREEMPT_NONE is not set +-CONFIG_PREEMPT_VOLUNTARY=y +-# CONFIG_PREEMPT is not set +- +-# +-# CPU/Task time and stats accounting +-# +-CONFIG_VIRT_CPU_ACCOUNTING=y +-CONFIG_VIRT_CPU_ACCOUNTING_GEN=y ++CONFIG_PREEMPT=y + CONFIG_IRQ_TIME_ACCOUNTING=y +-CONFIG_HAVE_SCHED_AVG_IRQ=y + CONFIG_BSD_PROCESS_ACCT=y + CONFIG_BSD_PROCESS_ACCT_V3=y +-CONFIG_TASKSTATS=y +-CONFIG_TASK_DELAY_ACCT=y + CONFIG_TASK_XACCT=y + CONFIG_TASK_IO_ACCOUNTING=y +-CONFIG_CPU_ISOLATION=y +- +-# +-# RCU Subsystem +-# +-CONFIG_TREE_RCU=y +-# CONFIG_RCU_EXPERT is not set +-CONFIG_SRCU=y +-CONFIG_TREE_SRCU=y +-CONFIG_RCU_STALL_COMMON=y +-CONFIG_RCU_NEED_SEGCBLIST=y +-CONFIG_CONTEXT_TRACKING=y +-# CONFIG_CONTEXT_TRACKING_FORCE is not set +-CONFIG_RCU_NOCB_CPU=y +-CONFIG_BUILD_BIN2C=y +-# CONFIG_IKCONFIG is not set ++CONFIG_IKCONFIG=y ++CONFIG_IKCONFIG_PROC=y + CONFIG_LOG_BUF_SHIFT=20 +-CONFIG_LOG_CPU_MAX_BUF_SHIFT=12 +-CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT=13 +-CONFIG_HAVE_UNSTABLE_SCHED_CLOCK=y +-CONFIG_ARCH_SUPPORTS_NUMA_BALANCING=y +-CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH=y +-CONFIG_ARCH_SUPPORTS_INT128=y +-CONFIG_NUMA_BALANCING=y +-CONFIG_NUMA_BALANCING_DEFAULT_ENABLED=y + CONFIG_CGROUPS=y +-CONFIG_PAGE_COUNTER=y + CONFIG_MEMCG=y + CONFIG_MEMCG_SWAP=y +-CONFIG_MEMCG_SWAP_ENABLED=y +-CONFIG_MEMCG_KMEM=y + CONFIG_BLK_CGROUP=y +-# CONFIG_DEBUG_BLK_CGROUP is not set +-CONFIG_CGROUP_WRITEBACK=y + CONFIG_CGROUP_SCHED=y +-CONFIG_FAIR_GROUP_SCHED=y + CONFIG_CFS_BANDWIDTH=y + CONFIG_RT_GROUP_SCHED=y + CONFIG_CGROUP_PIDS=y +@@ -137,913 +26,167 @@ + CONFIG_CGROUP_FREEZER=y + CONFIG_CGROUP_HUGETLB=y + CONFIG_CPUSETS=y +-CONFIG_PROC_PID_CPUSET=y + CONFIG_CGROUP_DEVICE=y + CONFIG_CGROUP_CPUACCT=y + CONFIG_CGROUP_PERF=y + CONFIG_CGROUP_BPF=y +-# CONFIG_CGROUP_DEBUG is not set +-CONFIG_SOCK_CGROUP_DATA=y +-# CONFIG_CGROUP_FILES is not set +-CONFIG_NAMESPACES=y +-CONFIG_UTS_NS=y +-CONFIG_IPC_NS=y + CONFIG_USER_NS=y +-CONFIG_PID_NS=y +-CONFIG_NET_NS=y + CONFIG_CHECKPOINT_RESTORE=y +-CONFIG_SCHED_AUTOGROUP=y +-# CONFIG_SYSFS_DEPRECATED is not set +-CONFIG_RELAY=y + CONFIG_BLK_DEV_INITRD=y +-CONFIG_INITRAMFS_SOURCE="" +-CONFIG_RD_GZIP=y +-CONFIG_RD_BZIP2=y +-CONFIG_RD_LZMA=y +-CONFIG_RD_XZ=y +-CONFIG_RD_LZO=y +-CONFIG_RD_LZ4=y +-CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE=y +-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set +-CONFIG_SYSCTL=y +-CONFIG_ANON_INODES=y +-CONFIG_HAVE_UID16=y +-CONFIG_SYSCTL_EXCEPTION_TRACE=y +-CONFIG_HAVE_PCSPKR_PLATFORM=y +-CONFIG_BPF=y +-# CONFIG_EXPERT is not set +-CONFIG_UID16=y +-CONFIG_MULTIUSER=y +-CONFIG_SGETMASK_SYSCALL=y +-CONFIG_SYSFS_SYSCALL=y +-CONFIG_FHANDLE=y +-CONFIG_POSIX_TIMERS=y +-CONFIG_PRINTK=y +-CONFIG_PRINTK_NMI=y +-CONFIG_BUG=y +-CONFIG_ELF_CORE=y +-CONFIG_PCSPKR_PLATFORM=y +-CONFIG_BASE_FULL=y +-CONFIG_FUTEX=y +-CONFIG_FUTEX_PI=y +-CONFIG_EPOLL=y +-CONFIG_SIGNALFD=y +-CONFIG_TIMERFD=y +-CONFIG_EVENTFD=y +-CONFIG_SHMEM=y +-CONFIG_AIO=y +-CONFIG_ADVISE_SYSCALLS=y +-CONFIG_MEMBARRIER=y +-CONFIG_KALLSYMS=y + CONFIG_KALLSYMS_ALL=y +-CONFIG_KALLSYMS_ABSOLUTE_PERCPU=y +-CONFIG_KALLSYMS_BASE_RELATIVE=y + CONFIG_BPF_SYSCALL=y + CONFIG_BPF_JIT_ALWAYS_ON=y + CONFIG_USERFAULTFD=y +-CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE=y +-CONFIG_RSEQ=y +-# CONFIG_EMBEDDED is not set +-CONFIG_HAVE_PERF_EVENTS=y +- +-# +-# Kernel Performance Events And Counters +-# +-CONFIG_PERF_EVENTS=y +-# CONFIG_DEBUG_PERF_USE_VMALLOC is not set +-CONFIG_VM_EVENT_COUNTERS=y +-CONFIG_SLUB_DEBUG=y + # CONFIG_COMPAT_BRK is not set +-# CONFIG_SLAB is not set +-CONFIG_SLUB=y +-CONFIG_SLAB_MERGE_DEFAULT=y + CONFIG_SLAB_FREELIST_RANDOM=y +-# CONFIG_SLAB_FREELIST_HARDENED is not set +-CONFIG_SLUB_CPU_PARTIAL=y +-CONFIG_SYSTEM_DATA_VERIFICATION=y + CONFIG_PROFILING=y +-CONFIG_TRACEPOINTS=y +-CONFIG_64BIT=y +-CONFIG_X86_64=y +-CONFIG_X86=y +-CONFIG_INSTRUCTION_DECODER=y +-CONFIG_OUTPUT_FORMAT="elf64-x86-64" +-CONFIG_ARCH_DEFCONFIG="arch/x86/configs/x86_64_defconfig" +-CONFIG_LOCKDEP_SUPPORT=y +-CONFIG_STACKTRACE_SUPPORT=y +-CONFIG_MMU=y +-CONFIG_ARCH_MMAP_RND_BITS_MIN=28 +-CONFIG_ARCH_MMAP_RND_BITS_MAX=32 +-CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MIN=8 +-CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MAX=16 +-CONFIG_GENERIC_ISA_DMA=y +-CONFIG_GENERIC_BUG=y +-CONFIG_GENERIC_BUG_RELATIVE_POINTERS=y +-CONFIG_GENERIC_HWEIGHT=y +-CONFIG_ARCH_MAY_HAVE_PC_FDC=y +-CONFIG_RWSEM_XCHGADD_ALGORITHM=y +-CONFIG_GENERIC_CALIBRATE_DELAY=y +-CONFIG_ARCH_HAS_CPU_RELAX=y +-CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y +-CONFIG_ARCH_HAS_FILTER_PGPROT=y +-CONFIG_HAVE_SETUP_PER_CPU_AREA=y +-CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK=y +-CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK=y +-CONFIG_ARCH_HIBERNATION_POSSIBLE=y +-CONFIG_ARCH_SUSPEND_POSSIBLE=y +-CONFIG_ARCH_WANT_HUGE_PMD_SHARE=y +-CONFIG_ARCH_WANT_GENERAL_HUGETLB=y +-CONFIG_ZONE_DMA32=y +-CONFIG_AUDIT_ARCH=y +-CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING=y +-CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC=y +-CONFIG_HAVE_INTEL_TXT=y +-CONFIG_X86_64_SMP=y +-CONFIG_ARCH_SUPPORTS_UPROBES=y +-CONFIG_FIX_EARLYCON_MEM=y +-CONFIG_DYNAMIC_PHYSICAL_MASK=y +-CONFIG_PGTABLE_LEVELS=5 +-CONFIG_CC_HAS_SANE_STACKPROTECTOR=y +- +-# +-# Processor type and features +-# +-CONFIG_ZONE_DMA=y + CONFIG_SMP=y +-CONFIG_X86_FEATURE_NAMES=y + CONFIG_X86_X2APIC=y +-CONFIG_X86_MPPARSE=y +-# CONFIG_GOLDFISH is not set +-CONFIG_RETPOLINE=y +-# CONFIG_INTEL_RDT is not set +-CONFIG_X86_EXTENDED_PLATFORM=y +-# CONFIG_X86_NUMACHIP is not set +-# CONFIG_X86_VSMP is not set +-CONFIG_X86_UV=y +-# CONFIG_X86_GOLDFISH is not set +-# CONFIG_X86_INTEL_MID is not set ++CONFIG_INTEL_RDT=y + CONFIG_X86_INTEL_LPSS=y + CONFIG_X86_AMD_PLATFORM_DEVICE=y +-CONFIG_IOSF_MBI=y +-# CONFIG_IOSF_MBI_DEBUG is not set +-CONFIG_X86_SUPPORTS_MEMORY_FAILURE=y +-CONFIG_SCHED_OMIT_FRAME_POINTER=y + CONFIG_HYPERVISOR_GUEST=y +-CONFIG_PARAVIRT=y +-# CONFIG_PARAVIRT_DEBUG is not set +-CONFIG_PARAVIRT_SPINLOCKS=y +-# CONFIG_QUEUED_LOCK_STAT is not set +-CONFIG_XEN=y +-# CONFIG_XEN_PV is not set +-CONFIG_XEN_PVHVM=y +-CONFIG_XEN_PVHVM_SMP=y +-CONFIG_XEN_SAVE_RESTORE=y +-# CONFIG_XEN_DEBUG_FS is not set +-# CONFIG_XEN_PVH is not set +-CONFIG_KVM_GUEST=y +-# CONFIG_KVM_DEBUG_FS is not set +-CONFIG_PARAVIRT_TIME_ACCOUNTING=y +-CONFIG_PARAVIRT_CLOCK=y +-# CONFIG_JAILHOUSE_GUEST is not set +-CONFIG_NO_BOOTMEM=y +-# CONFIG_MK8 is not set +-# CONFIG_MPSC is not set +-# CONFIG_MCORE2 is not set +-# CONFIG_MATOM is not set +-CONFIG_GENERIC_CPU=y +-CONFIG_X86_INTERNODE_CACHE_SHIFT=6 +-CONFIG_X86_L1_CACHE_SHIFT=6 +-CONFIG_X86_TSC=y +-CONFIG_X86_CMPXCHG64=y +-CONFIG_X86_CMOV=y +-CONFIG_X86_MINIMUM_CPU_FAMILY=64 +-CONFIG_X86_DEBUGCTLMSR=y +-CONFIG_CPU_SUP_INTEL=y +-CONFIG_CPU_SUP_AMD=y +-CONFIG_CPU_SUP_CENTAUR=y +-CONFIG_HPET_TIMER=y +-CONFIG_HPET_EMULATE_RTC=y +-CONFIG_DMI=y +-# CONFIG_GART_IOMMU is not set +-# CONFIG_CALGARY_IOMMU is not set +-CONFIG_MAXSMP=y +-CONFIG_NR_CPUS_RANGE_BEGIN=8192 +-CONFIG_NR_CPUS_RANGE_END=8192 +-CONFIG_NR_CPUS_DEFAULT=8192 +-CONFIG_NR_CPUS=8192 +-CONFIG_SCHED_SMT=y +-CONFIG_SCHED_MC=y +-CONFIG_SCHED_MC_PRIO=y +-CONFIG_X86_LOCAL_APIC=y +-CONFIG_X86_IO_APIC=y ++CONFIG_MCORE2=y ++# CONFIG_SCHED_MC_PRIO is not set + CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y +-CONFIG_X86_MCE=y + CONFIG_X86_MCELOG_LEGACY=y +-CONFIG_X86_MCE_INTEL=y +-CONFIG_X86_MCE_AMD=y +-CONFIG_X86_MCE_THRESHOLD=y + CONFIG_X86_MCE_INJECT=m +-CONFIG_X86_THERMAL_VECTOR=y +- +-# +-# Performance monitoring +-# + CONFIG_PERF_EVENTS_INTEL_UNCORE=m + CONFIG_PERF_EVENTS_INTEL_RAPL=m + CONFIG_PERF_EVENTS_INTEL_CSTATE=m + CONFIG_PERF_EVENTS_AMD_POWER=m +-CONFIG_X86_16BIT=y +-CONFIG_X86_ESPFIX64=y +-CONFIG_X86_VSYSCALL_EMULATION=y + CONFIG_I8K=m +-CONFIG_MICROCODE=y +-CONFIG_MICROCODE_INTEL=y + CONFIG_MICROCODE_AMD=y +-CONFIG_MICROCODE_OLD_INTERFACE=y + CONFIG_X86_MSR=y + CONFIG_X86_CPUID=y + CONFIG_X86_5LEVEL=y +-CONFIG_X86_DIRECT_GBPAGES=y +-CONFIG_ARCH_HAS_MEM_ENCRYPT=y + CONFIG_AMD_MEM_ENCRYPT=y + # CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT is not set + CONFIG_NUMA=y +-# CONFIG_NUMA_AWARE_SPINLOCKS is not set +-CONFIG_AMD_NUMA=y +-CONFIG_X86_64_ACPI_NUMA=y +-CONFIG_NODES_SPAN_OTHER_NODES=y + CONFIG_NUMA_EMU=y + CONFIG_NODES_SHIFT=10 +-CONFIG_ARCH_SPARSEMEM_ENABLE=y +-CONFIG_ARCH_SPARSEMEM_DEFAULT=y +-CONFIG_ARCH_SELECT_MEMORY_MODEL=y +-# CONFIG_ARCH_MEMORY_PROBE is not set +-CONFIG_ARCH_PROC_KCORE_TEXT=y +-CONFIG_ILLEGAL_POINTER_VALUE=0xdead000000000000 +-CONFIG_X86_PMEM_LEGACY_DEVICE=y + CONFIG_X86_PMEM_LEGACY=m + CONFIG_X86_CHECK_BIOS_CORRUPTION=y + # CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK is not set +-CONFIG_X86_RESERVE_LOW=64 +-CONFIG_MTRR=y +-CONFIG_MTRR_SANITIZER=y + CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT=1 +-CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT=1 +-CONFIG_X86_PAT=y +-CONFIG_ARCH_USES_PG_UNCACHED=y +-CONFIG_ARCH_RANDOM=y +-CONFIG_X86_SMAP=y +-CONFIG_X86_INTEL_UMIP=y +-# CONFIG_X86_INTEL_MPX is not set +-CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS=y +-CONFIG_X86_INTEL_TSX_MODE_OFF=y +-# CONFIG_X86_INTEL_TSX_MODE_ON is not set +-# CONFIG_X86_INTEL_TSX_MODE_AUTO is not set + CONFIG_EFI=y + CONFIG_EFI_STUB=y + CONFIG_EFI_MIXED=y +-CONFIG_SECCOMP=y +-# CONFIG_HZ_100 is not set +-# CONFIG_HZ_250 is not set +-# CONFIG_HZ_300 is not set +-CONFIG_HZ_1000=y +-CONFIG_HZ=1000 +-CONFIG_SCHED_HRTICK=y + CONFIG_KEXEC=y + CONFIG_KEXEC_FILE=y +-CONFIG_ARCH_HAS_KEXEC_PURGATORY=y + CONFIG_KEXEC_VERIFY_SIG=y + CONFIG_KEXEC_BZIMAGE_VERIFY_SIG=y + CONFIG_CRASH_DUMP=y + CONFIG_KEXEC_JUMP=y +-CONFIG_PHYSICAL_START=0x1000000 +-CONFIG_RELOCATABLE=y +-CONFIG_RANDOMIZE_BASE=y +-CONFIG_X86_NEED_RELOCS=y +-CONFIG_PHYSICAL_ALIGN=0x200000 +-CONFIG_DYNAMIC_MEMORY_LAYOUT=y +-CONFIG_RANDOMIZE_MEMORY=y +-CONFIG_RANDOMIZE_MEMORY_PHYSICAL_PADDING=0xa +-CONFIG_HOTPLUG_CPU=y + CONFIG_BOOTPARAM_HOTPLUG_CPU0=y +-# CONFIG_DEBUG_HOTPLUG_CPU0 is not set +-# CONFIG_COMPAT_VDSO is not set +-CONFIG_LEGACY_VSYSCALL_EMULATE=y +-# CONFIG_LEGACY_VSYSCALL_NONE is not set +-# CONFIG_CMDLINE_BOOL is not set +-CONFIG_MODIFY_LDT_SYSCALL=y +-CONFIG_HAVE_LIVEPATCH_FTRACE=y +-CONFIG_HAVE_LIVEPATCH_WO_FTRACE=y +- +-# +-# Enable Livepatch +-# + CONFIG_LIVEPATCH=y +-# CONFIG_LIVEPATCH_FTRACE is not set +-CONFIG_LIVEPATCH_WO_FTRACE=y +-CONFIG_LIVEPATCH_STOP_MACHINE_CONSISTENCY=y +-# CONFIG_LIVEPATCH_STACK is not set +-CONFIG_LIVEPATCH_RESTRICT_KPROBE=y +-CONFIG_ARCH_HAS_ADD_PAGES=y +-CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y +-CONFIG_ARCH_ENABLE_MEMORY_HOTREMOVE=y +-CONFIG_USE_PERCPU_NUMA_NODE_ID=y +-CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK=y +-CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION=y +-CONFIG_ARCH_ENABLE_THP_MIGRATION=y +- +-# +-# Power management and ACPI options +-# +-CONFIG_ARCH_HIBERNATION_HEADER=y +-CONFIG_SUSPEND=y +-CONFIG_SUSPEND_FREEZER=y +-CONFIG_HIBERNATE_CALLBACKS=y + CONFIG_HIBERNATION=y +-CONFIG_PM_STD_PARTITION="" +-CONFIG_PM_SLEEP=y +-CONFIG_PM_SLEEP_SMP=y +-# CONFIG_PM_AUTOSLEEP is not set +-# CONFIG_PM_WAKELOCKS is not set +-CONFIG_PM=y + CONFIG_PM_DEBUG=y +-# CONFIG_PM_ADVANCED_DEBUG is not set +-# CONFIG_PM_TEST_SUSPEND is not set +-CONFIG_PM_SLEEP_DEBUG=y +-# CONFIG_PM_TRACE_RTC is not set +-CONFIG_PM_CLK=y +-CONFIG_PM_GENERIC_DOMAINS=y +-# CONFIG_WQ_POWER_EFFICIENT_DEFAULT is not set +-CONFIG_PM_GENERIC_DOMAINS_SLEEP=y +-CONFIG_ARCH_SUPPORTS_ACPI=y +-CONFIG_ACPI=y +-CONFIG_ACPI_LEGACY_TABLES_LOOKUP=y +-CONFIG_ARCH_MIGHT_HAVE_ACPI_PDC=y +-CONFIG_ACPI_SYSTEM_POWER_STATES_SUPPORT=y +-# CONFIG_ACPI_DEBUGGER is not set +-CONFIG_ACPI_SPCR_TABLE=y +-CONFIG_ACPI_LPIT=y +-CONFIG_ACPI_SLEEP=y +-# CONFIG_ACPI_PROCFS_POWER is not set +-CONFIG_ACPI_REV_OVERRIDE_POSSIBLE=y + CONFIG_ACPI_EC_DEBUGFS=m +-CONFIG_ACPI_AC=y +-CONFIG_ACPI_BATTERY=y +-CONFIG_ACPI_BUTTON=y +-CONFIG_ACPI_VIDEO=m +-CONFIG_ACPI_FAN=y + CONFIG_ACPI_TAD=m + CONFIG_ACPI_DOCK=y +-CONFIG_ACPI_CPU_FREQ_PSS=y +-CONFIG_ACPI_PROCESSOR_CSTATE=y +-CONFIG_ACPI_PROCESSOR_IDLE=y +-CONFIG_ACPI_CPPC_LIB=y +-CONFIG_ACPI_PROCESSOR=y ++# CONFIG_ACPI_PROCESSOR is not set + CONFIG_ACPI_IPMI=m +-CONFIG_ACPI_HOTPLUG_CPU=y +-CONFIG_ACPI_PROCESSOR_AGGREGATOR=m +-CONFIG_ACPI_THERMAL=y +-CONFIG_ACPI_NUMA=y +-CONFIG_ARCH_HAS_ACPI_TABLE_UPGRADE=y +-CONFIG_ACPI_TABLE_UPGRADE=y +-# CONFIG_ACPI_DEBUG is not set + CONFIG_ACPI_PCI_SLOT=y +-CONFIG_ACPI_CONTAINER=y + CONFIG_ACPI_HOTPLUG_MEMORY=y +-CONFIG_ACPI_HOTPLUG_IOAPIC=y + CONFIG_ACPI_SBS=m +-CONFIG_ACPI_HED=y +-# CONFIG_ACPI_CUSTOM_METHOD is not set + CONFIG_ACPI_BGRT=y + CONFIG_ACPI_NFIT=m +-CONFIG_HAVE_ACPI_APEI=y +-CONFIG_HAVE_ACPI_APEI_NMI=y + CONFIG_ACPI_APEI=y + CONFIG_ACPI_APEI_GHES=y + CONFIG_ACPI_APEI_PCIEAER=y + CONFIG_ACPI_APEI_MEMORY_FAILURE=y + CONFIG_ACPI_APEI_EINJ=m +-# CONFIG_ACPI_APEI_ERST_DEBUG is not set + CONFIG_DPTF_POWER=m +-CONFIG_ACPI_WATCHDOG=y + CONFIG_ACPI_EXTLOG=m + CONFIG_PMIC_OPREGION=y +-# CONFIG_ACPI_CONFIGFS is not set +-CONFIG_X86_PM_TIMER=y + CONFIG_SFI=y +- +-# +-# CPU Frequency scaling +-# +-CONFIG_CPU_FREQ=y +-CONFIG_CPU_FREQ_GOV_ATTR_SET=y +-CONFIG_CPU_FREQ_GOV_COMMON=y +-CONFIG_CPU_FREQ_STAT=y +-CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y +-# CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE is not set +-# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set +-# CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND is not set +-# CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE is not set +-# CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL is not set +-CONFIG_CPU_FREQ_GOV_PERFORMANCE=y +-CONFIG_CPU_FREQ_GOV_POWERSAVE=y +-CONFIG_CPU_FREQ_GOV_USERSPACE=y +-CONFIG_CPU_FREQ_GOV_ONDEMAND=y +-CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y +-# CONFIG_CPU_FREQ_GOV_SCHEDUTIL is not set +- +-# +-# CPU frequency scaling drivers +-# +-CONFIG_X86_INTEL_PSTATE=y +-# CONFIG_X86_PCC_CPUFREQ is not set +-CONFIG_X86_ACPI_CPUFREQ=m +-CONFIG_X86_ACPI_CPUFREQ_CPB=y +-CONFIG_X86_POWERNOW_K8=m +-CONFIG_X86_AMD_FREQ_SENSITIVITY=m +-# CONFIG_X86_SPEEDSTEP_CENTRINO is not set +-CONFIG_X86_P4_CLOCKMOD=m +- +-# +-# shared options +-# +-CONFIG_X86_SPEEDSTEP_LIB=m +- +-# +-# CPU Idle +-# +-CONFIG_CPU_IDLE=y +-# CONFIG_CPU_IDLE_GOV_LADDER is not set +-CONFIG_CPU_IDLE_GOV_MENU=y +-CONFIG_INTEL_IDLE=y +- +-# +-# Bus options (PCI etc.) +-# +-CONFIG_PCI=y +-CONFIG_PCI_DIRECT=y +-CONFIG_PCI_MMCONFIG=y +-CONFIG_PCI_XEN=y +-CONFIG_PCI_DOMAINS=y +-CONFIG_MMCONF_FAM10H=y ++# CONFIG_CPU_IDLE is not set + CONFIG_PCIEPORTBUS=y + CONFIG_HOTPLUG_PCI_PCIE=y +-CONFIG_PCIEAER=y + CONFIG_PCIEAER_INJECT=m + CONFIG_PCIE_ECRC=y +-CONFIG_PCIEASPM=y +-# CONFIG_PCIEASPM_DEBUG is not set +-CONFIG_PCIEASPM_DEFAULT=y +-# CONFIG_PCIEASPM_POWERSAVE is not set +-# CONFIG_PCIEASPM_POWER_SUPERSAVE is not set +-# CONFIG_PCIEASPM_PERFORMANCE is not set +-CONFIG_PCIE_PME=y + CONFIG_PCIE_DPC=y +-# CONFIG_PCIE_PTM is not set +-CONFIG_PCI_MSI=y +-CONFIG_PCI_MSI_IRQ_DOMAIN=y +-CONFIG_PCI_QUIRKS=y +-# CONFIG_PCI_DEBUG is not set +-# CONFIG_PCI_REALLOC_ENABLE_AUTO is not set + CONFIG_PCI_STUB=y + CONFIG_PCI_PF_STUB=m +-# CONFIG_XEN_PCIDEV_FRONTEND is not set +-CONFIG_PCI_ATS=y +-CONFIG_PCI_LOCKLESS_CONFIG=y +-CONFIG_PCI_IOV=y +-CONFIG_PCI_PRI=y +-CONFIG_PCI_PASID=y +-CONFIG_PCI_LABEL=y +-CONFIG_PCI_HYPERV=m + CONFIG_HOTPLUG_PCI=y + CONFIG_HOTPLUG_PCI_ACPI=y + CONFIG_HOTPLUG_PCI_ACPI_IBM=m +-# CONFIG_HOTPLUG_PCI_CPCI is not set + CONFIG_HOTPLUG_PCI_SHPC=y +- +-# +-# PCI controller drivers +-# +- +-# +-# Cadence PCIe controllers support +-# + CONFIG_VMD=y +- +-# +-# DesignWare PCI Core Support +-# +-# CONFIG_PCIE_DW_PLAT_HOST is not set +-# CONFIG_HISILICON_PCIE_CAE is not set +- +-# +-# PCI Endpoint +-# +-# CONFIG_PCI_ENDPOINT is not set +- +-# +-# PCI switch controller drivers +-# +-# CONFIG_PCI_SW_SWITCHTEC is not set +-CONFIG_ISA_DMA_API=y +-CONFIG_AMD_NB=y + CONFIG_PCCARD=y +-# CONFIG_PCMCIA is not set +-CONFIG_CARDBUS=y +- +-# +-# PC-card bridges +-# ++CONFIG_PCMCIA=m + CONFIG_YENTA=m +-CONFIG_YENTA_O2=y +-CONFIG_YENTA_RICOH=y +-CONFIG_YENTA_TI=y +-CONFIG_YENTA_ENE_TUNE=y +-CONFIG_YENTA_TOSHIBA=y +-# CONFIG_RAPIDIO is not set +-# CONFIG_X86_SYSFB is not set +- +-# +-# Binary Emulations +-# + CONFIG_IA32_EMULATION=y +-# CONFIG_X86_X32 is not set +-CONFIG_COMPAT_32=y +-CONFIG_COMPAT=y +-CONFIG_COMPAT_FOR_U64_ALIGNMENT=y +-CONFIG_SYSVIPC_COMPAT=y +-CONFIG_X86_DEV_DMA_OPS=y +-CONFIG_HAVE_GENERIC_GUP=y +- +-# +-# Firmware Drivers +-# + CONFIG_EDD=m +-# CONFIG_EDD_OFF is not set +-CONFIG_FIRMWARE_MEMMAP=y + CONFIG_DELL_RBU=m + CONFIG_DCDBAS=m +-CONFIG_DMIID=y + CONFIG_DMI_SYSFS=y +-CONFIG_DMI_SCAN_MACHINE_NON_EFI_FALLBACK=y +-CONFIG_ISCSI_IBFT_FIND=y + CONFIG_ISCSI_IBFT=m + CONFIG_FW_CFG_SYSFS=y +-# CONFIG_FW_CFG_SYSFS_CMDLINE is not set +-# CONFIG_GOOGLE_FIRMWARE is not set +- +-# +-# EFI (Extensible Firmware Interface) Support +-# +-#CONFIG_EFI_VARS is not set +-CONFIG_EFI_ESRT=y +-CONFIG_EFI_RUNTIME_MAP=y +-# CONFIG_EFI_FAKE_MEMMAP is not set +-CONFIG_EFI_RUNTIME_WRAPPERS=y +-# CONFIG_EFI_BOOTLOADER_CONTROL is not set +-# CONFIG_EFI_CAPSULE_LOADER is not set +-# CONFIG_EFI_TEST is not set +-CONFIG_APPLE_PROPERTIES=y +-# CONFIG_RESET_ATTACK_MITIGATION is not set +-CONFIG_UEFI_CPER=y +-CONFIG_UEFI_CPER_X86=y +-CONFIG_EFI_DEV_PATH_PARSER=y +- +-# +-# Tegra firmware driver +-# +-CONFIG_HAVE_KVM=y +-CONFIG_HAVE_KVM_IRQCHIP=y +-CONFIG_HAVE_KVM_IRQFD=y +-CONFIG_HAVE_KVM_IRQ_ROUTING=y +-CONFIG_HAVE_KVM_EVENTFD=y +-CONFIG_KVM_MMIO=y +-CONFIG_KVM_ASYNC_PF=y +-CONFIG_HAVE_KVM_MSI=y +-CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT=y +-CONFIG_KVM_VFIO=y +-CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT=y +-CONFIG_KVM_COMPAT=y +-CONFIG_HAVE_KVM_IRQ_BYPASS=y +-CONFIG_VIRTUALIZATION=y ++CONFIG_EFI_VARS=y ++CONFIG_EFI_VARS_PSTORE_DEFAULT_DISABLE=y + CONFIG_KVM=m + CONFIG_KVM_INTEL=m + CONFIG_KVM_AMD=m +-CONFIG_KVM_AMD_SEV=y + CONFIG_KVM_MMU_AUDIT=y + CONFIG_VHOST_NET=m +-# CONFIG_VHOST_SCSI is not set + CONFIG_VHOST_VSOCK=m +-CONFIG_VHOST=m +-# CONFIG_VHOST_CROSS_ENDIAN_LEGACY is not set +- +-# +-# General architecture-dependent options +-# +-CONFIG_CRASH_CORE=y +-CONFIG_KEXEC_CORE=y +-CONFIG_HOTPLUG_SMT=y + CONFIG_OPROFILE=m + CONFIG_OPROFILE_EVENT_MULTIPLEX=y +-CONFIG_HAVE_OPROFILE=y +-CONFIG_OPROFILE_NMI_TIMER=y + CONFIG_KPROBES=y +-CONFIG_JUMP_LABEL=y +-# CONFIG_STATIC_KEYS_SELFTEST is not set +-CONFIG_OPTPROBES=y +-CONFIG_KPROBES_ON_FTRACE=y +-CONFIG_UPROBES=y +-CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=y +-CONFIG_ARCH_USE_BUILTIN_BSWAP=y +-CONFIG_KRETPROBES=y +-CONFIG_USER_RETURN_NOTIFIER=y +-CONFIG_HAVE_IOREMAP_PROT=y +-CONFIG_HAVE_KPROBES=y +-CONFIG_HAVE_KRETPROBES=y +-CONFIG_HAVE_OPTPROBES=y +-CONFIG_HAVE_KPROBES_ON_FTRACE=y +-CONFIG_HAVE_FUNCTION_ERROR_INJECTION=y +-CONFIG_HAVE_NMI=y +-CONFIG_HAVE_ARCH_TRACEHOOK=y +-CONFIG_HAVE_DMA_CONTIGUOUS=y +-CONFIG_GENERIC_SMP_IDLE_THREAD=y +-CONFIG_ARCH_HAS_FORTIFY_SOURCE=y +-CONFIG_ARCH_HAS_SET_MEMORY=y +-CONFIG_HAVE_ARCH_THREAD_STRUCT_WHITELIST=y +-CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT=y +-CONFIG_HAVE_REGS_AND_STACK_ACCESS_API=y +-CONFIG_HAVE_RSEQ=y +-CONFIG_HAVE_CLK=y +-CONFIG_HAVE_HW_BREAKPOINT=y +-CONFIG_HAVE_MIXED_BREAKPOINTS_REGS=y +-CONFIG_HAVE_USER_RETURN_NOTIFIER=y +-CONFIG_HAVE_PERF_EVENTS_NMI=y +-CONFIG_HAVE_HARDLOCKUP_DETECTOR_PERF=y +-CONFIG_HAVE_PERF_REGS=y +-CONFIG_HAVE_PERF_USER_STACK_DUMP=y +-CONFIG_HAVE_ARCH_JUMP_LABEL=y +-CONFIG_HAVE_RCU_TABLE_FREE=y +-CONFIG_HAVE_RCU_TABLE_INVALIDATE=y +-CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG=y +-CONFIG_HAVE_ALIGNED_STRUCT_PAGE=y +-CONFIG_HAVE_CMPXCHG_LOCAL=y +-CONFIG_HAVE_CMPXCHG_DOUBLE=y +-CONFIG_ARCH_WANT_COMPAT_IPC_PARSE_VERSION=y +-CONFIG_ARCH_WANT_OLD_COMPAT_IPC=y +-CONFIG_HAVE_ARCH_SECCOMP_FILTER=y +-CONFIG_SECCOMP_FILTER=y +-CONFIG_HAVE_STACKPROTECTOR=y +-CONFIG_CC_HAS_STACKPROTECTOR_NONE=y +-CONFIG_STACKPROTECTOR=y +-CONFIG_STACKPROTECTOR_STRONG=y +-CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES=y +-CONFIG_HAVE_CONTEXT_TRACKING=y +-CONFIG_HAVE_VIRT_CPU_ACCOUNTING_GEN=y +-CONFIG_HAVE_IRQ_TIME_ACCOUNTING=y +-CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE=y +-CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD=y +-CONFIG_HAVE_ARCH_HUGE_VMAP=y +-CONFIG_HAVE_ARCH_SOFT_DIRTY=y +-CONFIG_HAVE_MOD_ARCH_SPECIFIC=y +-CONFIG_MODULES_USE_ELF_RELA=y +-CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK=y +-CONFIG_ARCH_HAS_ELF_RANDOMIZE=y +-CONFIG_HAVE_ARCH_MMAP_RND_BITS=y +-CONFIG_HAVE_EXIT_THREAD=y +-CONFIG_ARCH_MMAP_RND_BITS=28 +-CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS=y +-CONFIG_ARCH_MMAP_RND_COMPAT_BITS=8 +-CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES=y +-CONFIG_HAVE_COPY_THREAD_TLS=y +-CONFIG_HAVE_STACK_VALIDATION=y +-CONFIG_HAVE_RELIABLE_STACKTRACE=y +-CONFIG_OLD_SIGSUSPEND3=y +-CONFIG_COMPAT_OLD_SIGACTION=y +-CONFIG_COMPAT_32BIT_TIME=y +-CONFIG_HAVE_ARCH_VMAP_STACK=y +-CONFIG_VMAP_STACK=y +-CONFIG_ARCH_HAS_STRICT_KERNEL_RWX=y +-CONFIG_STRICT_KERNEL_RWX=y +-CONFIG_ARCH_HAS_STRICT_MODULE_RWX=y +-CONFIG_STRICT_MODULE_RWX=y +-CONFIG_ARCH_HAS_REFCOUNT=y +-# CONFIG_REFCOUNT_FULL is not set +-CONFIG_HAVE_ARCH_PREL32_RELOCATIONS=y +-CONFIG_ARCH_USE_MEMREMAP_PROT=y +- +-# +-# GCOV-based kernel profiling +-# +-# CONFIG_GCOV_KERNEL is not set +-CONFIG_ARCH_HAS_GCOV_PROFILE_ALL=y +-CONFIG_PLUGIN_HOSTCC="g++" +-CONFIG_HAVE_GCC_PLUGINS=y +-# CONFIG_GCC_PLUGINS is not set +-CONFIG_RT_MUTEXES=y +-CONFIG_BASE_SMALL=0 + CONFIG_MODULES=y + CONFIG_MODULE_FORCE_LOAD=y + CONFIG_MODULE_UNLOAD=y +-# CONFIG_MODULE_FORCE_UNLOAD is not set + CONFIG_MODVERSIONS=y + CONFIG_MODULE_SRCVERSION_ALL=y + CONFIG_MODULE_SIG=y +-# CONFIG_MODULE_SIG_FORCE is not set +-CONFIG_MODULE_SIG_ALL=y +-# CONFIG_MODULE_SIG_SHA1 is not set +-# CONFIG_MODULE_SIG_SHA224 is not set + CONFIG_MODULE_SIG_SHA256=y +-# CONFIG_MODULE_SIG_SHA384 is not set +-# CONFIG_MODULE_SIG_SHA512 is not set +-CONFIG_MODULE_SIG_HASH="sha256" +-# CONFIG_MODULE_COMPRESS is not set +-# CONFIG_TRIM_UNUSED_KSYMS is not set +-CONFIG_MODULES_TREE_LOOKUP=y +-CONFIG_BLOCK=y +-CONFIG_BLK_SCSI_REQUEST=y +-CONFIG_BLK_DEV_BSG=y +-CONFIG_BLK_DEV_BSGLIB=y +-CONFIG_BLK_DEV_INTEGRITY=y +-# CONFIG_BLK_DEV_ZONED is not set + CONFIG_BLK_DEV_THROTTLING=y +-# CONFIG_BLK_DEV_THROTTLING_LOW is not set +-# CONFIG_BLK_CMDLINE_PARSER is not set + CONFIG_BLK_WBT=y +-# CONFIG_BLK_CGROUP_IOLATENCY is not set +-# CONFIG_BLK_WBT_SQ is not set +-CONFIG_BLK_WBT_MQ=y +-CONFIG_BLK_DEBUG_FS=y +-# CONFIG_BLK_SED_OPAL is not set +- +-# +-# Partition Types +-# + CONFIG_PARTITION_ADVANCED=y +-# CONFIG_ACORN_PARTITION is not set +-# CONFIG_AIX_PARTITION is not set + CONFIG_OSF_PARTITION=y + CONFIG_AMIGA_PARTITION=y +-# CONFIG_ATARI_PARTITION is not set + CONFIG_MAC_PARTITION=y +-CONFIG_MSDOS_PARTITION=y + CONFIG_BSD_DISKLABEL=y + CONFIG_MINIX_SUBPARTITION=y + CONFIG_SOLARIS_X86_PARTITION=y + CONFIG_UNIXWARE_DISKLABEL=y +-# CONFIG_LDM_PARTITION is not set + CONFIG_SGI_PARTITION=y +-# CONFIG_ULTRIX_PARTITION is not set + CONFIG_SUN_PARTITION=y + CONFIG_KARMA_PARTITION=y +-CONFIG_EFI_PARTITION=y +-# CONFIG_SYSV68_PARTITION is not set +-# CONFIG_CMDLINE_PARTITION is not set +-CONFIG_BLOCK_COMPAT=y +-CONFIG_BLK_MQ_PCI=y +-CONFIG_BLK_MQ_VIRTIO=y +-CONFIG_BLK_MQ_RDMA=y +- +-# +-# IO Schedulers +-# +-CONFIG_IOSCHED_NOOP=y +-CONFIG_IOSCHED_DEADLINE=y +-CONFIG_IOSCHED_CFQ=y +-CONFIG_CFQ_GROUP_IOSCHED=y +-# CONFIG_DEFAULT_DEADLINE is not set +-CONFIG_DEFAULT_CFQ=y +-# CONFIG_DEFAULT_NOOP is not set +-CONFIG_DEFAULT_IOSCHED="cfq" +-CONFIG_MQ_IOSCHED_DEADLINE=y +-CONFIG_MQ_IOSCHED_KYBER=y + CONFIG_IOSCHED_BFQ=y + CONFIG_BFQ_GROUP_IOSCHED=y +-CONFIG_PREEMPT_NOTIFIERS=y +-CONFIG_PADATA=y +-CONFIG_ASN1=y +-CONFIG_INLINE_SPIN_UNLOCK_IRQ=y +-CONFIG_INLINE_READ_UNLOCK=y +-CONFIG_INLINE_READ_UNLOCK_IRQ=y +-CONFIG_INLINE_WRITE_UNLOCK=y +-CONFIG_INLINE_WRITE_UNLOCK_IRQ=y +-CONFIG_ARCH_SUPPORTS_ATOMIC_RMW=y +-CONFIG_MUTEX_SPIN_ON_OWNER=y +-CONFIG_RWSEM_SPIN_ON_OWNER=y +-CONFIG_LOCK_SPIN_ON_OWNER=y +-CONFIG_ARCH_USE_QUEUED_SPINLOCKS=y +-CONFIG_QUEUED_SPINLOCKS=y +-CONFIG_ARCH_USE_QUEUED_RWLOCKS=y +-CONFIG_QUEUED_RWLOCKS=y +-CONFIG_ARCH_HAS_SYNC_CORE_BEFORE_USERMODE=y +-CONFIG_ARCH_HAS_SYSCALL_WRAPPER=y +-CONFIG_FREEZER=y +- +-# +-# Executable file formats +-# +-CONFIG_BINFMT_ELF=y +-CONFIG_COMPAT_BINFMT_ELF=y +-CONFIG_ELFCORE=y +-CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y +-CONFIG_BINFMT_SCRIPT=y ++CONFIG_XENO_DRIVERS_RTIPC=y + CONFIG_BINFMT_MISC=m +-CONFIG_COREDUMP=y +- +-# +-# Memory Management options +-# +-CONFIG_SELECT_MEMORY_MODEL=y +-CONFIG_SPARSEMEM_MANUAL=y +-CONFIG_SPARSEMEM=y +-CONFIG_NEED_MULTIPLE_NODES=y +-CONFIG_HAVE_MEMORY_PRESENT=y +-CONFIG_SPARSEMEM_EXTREME=y +-CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y +-CONFIG_SPARSEMEM_VMEMMAP=y +-CONFIG_HAVE_MEMBLOCK=y +-CONFIG_HAVE_MEMBLOCK_NODE_MAP=y +-CONFIG_ARCH_DISCARD_MEMBLOCK=y +-CONFIG_MEMORY_ISOLATION=y +-CONFIG_HAVE_BOOTMEM_INFO_NODE=y + CONFIG_MEMORY_HOTPLUG=y +-CONFIG_MEMORY_HOTPLUG_SPARSE=y +-# CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE is not set +-CONFIG_MEMORY_HOTREMOVE=y +-CONFIG_SPLIT_PTLOCK_CPUS=4 +-CONFIG_MEMORY_BALLOON=y +-CONFIG_BALLOON_COMPACTION=y +-CONFIG_COMPACTION=y +-CONFIG_MIGRATION=y +-CONFIG_PHYS_ADDR_T_64BIT=y +-CONFIG_BOUNCE=y +-CONFIG_VIRT_TO_BUS=y +-CONFIG_MMU_NOTIFIER=y ++# CONFIG_COMPACTION is not set ++# CONFIG_MIGRATION is not set + CONFIG_KSM=y +-CONFIG_DEFAULT_MMAP_MIN_ADDR=4096 +-CONFIG_ARCH_SUPPORTS_MEMORY_FAILURE=y + CONFIG_MEMORY_FAILURE=y + CONFIG_HWPOISON_INJECT=m +-CONFIG_TRANSPARENT_HUGEPAGE=y +-CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS=y +-# CONFIG_TRANSPARENT_HUGEPAGE_MADVISE is not set +-CONFIG_ARCH_WANTS_THP_SWAP=y +-CONFIG_THP_SWAP=y +-CONFIG_TRANSPARENT_HUGE_PAGECACHE=y + CONFIG_CLEANCACHE=y + CONFIG_FRONTSWAP=y +-# CONFIG_SHRINK_PAGECACHE is not set +-# CONFIG_CMA is not set + CONFIG_MEM_SOFT_DIRTY=y + CONFIG_ZSWAP=y +-CONFIG_ZPOOL=y + CONFIG_ZBUD=y +-# CONFIG_Z3FOLD is not set + CONFIG_ZSMALLOC=y +-# CONFIG_PGTABLE_MAPPING is not set + CONFIG_ZSMALLOC_STAT=y +-CONFIG_GENERIC_EARLY_IOREMAP=y + CONFIG_DEFERRED_STRUCT_PAGE_INIT=y + CONFIG_IDLE_PAGE_TRACKING=y +-CONFIG_ARCH_HAS_ZONE_DEVICE=y +-CONFIG_ZONE_DEVICE=y +-CONFIG_ARCH_HAS_HMM=y +-CONFIG_MIGRATE_VMA_HELPER=y +-CONFIG_DEV_PAGEMAP_OPS=y +-CONFIG_HMM=y +-CONFIG_HMM_MIRROR=y +-CONFIG_DEVICE_PRIVATE=y +-CONFIG_DEVICE_PUBLIC=y +-CONFIG_FRAME_VECTOR=y +-CONFIG_ARCH_USES_HIGH_VMA_FLAGS=y +-CONFIG_ARCH_HAS_PKEYS=y +-# CONFIG_PERCPU_STATS is not set +-# CONFIG_GUP_BENCHMARK is not set +-CONFIG_ARCH_HAS_PTE_SPECIAL=y + CONFIG_NET=y +-CONFIG_NET_INGRESS=y +-CONFIG_NET_EGRESS=y +- +-# +-# Networking options +-# + CONFIG_PACKET=y + CONFIG_PACKET_DIAG=m + CONFIG_UNIX=y + CONFIG_UNIX_DIAG=m + CONFIG_TLS=m + CONFIG_TLS_DEVICE=y +-CONFIG_XFRM=y +-CONFIG_XFRM_OFFLOAD=y +-CONFIG_XFRM_ALGO=y + CONFIG_XFRM_USER=y + CONFIG_XFRM_INTERFACE=m + CONFIG_XFRM_SUB_POLICY=y +-CONFIG_XFRM_MIGRATE=y + CONFIG_XFRM_STATISTICS=y +-CONFIG_XFRM_IPCOMP=m + CONFIG_NET_KEY=m + CONFIG_NET_KEY_MIGRATE=y +-# CONFIG_SMC is not set + CONFIG_XDP_SOCKETS=y + CONFIG_INET=y + CONFIG_IP_MULTICAST=y +@@ -1052,45 +195,28 @@ + CONFIG_IP_MULTIPLE_TABLES=y + CONFIG_IP_ROUTE_MULTIPATH=y + CONFIG_IP_ROUTE_VERBOSE=y +-CONFIG_IP_ROUTE_CLASSID=y +-# CONFIG_IP_PNP is not set + CONFIG_NET_IPIP=m + CONFIG_NET_IPGRE_DEMUX=m +-CONFIG_NET_IP_TUNNEL=m + CONFIG_NET_IPGRE=m + CONFIG_NET_IPGRE_BROADCAST=y +-CONFIG_IP_MROUTE_COMMON=y + CONFIG_IP_MROUTE=y + CONFIG_IP_MROUTE_MULTIPLE_TABLES=y + CONFIG_IP_PIMSM_V1=y + CONFIG_IP_PIMSM_V2=y +-CONFIG_SYN_COOKIES=y + CONFIG_NET_IPVTI=m +-CONFIG_NET_UDP_TUNNEL=m +-# CONFIG_NET_FOU is not set +-# CONFIG_NET_FOU_IP_TUNNELS is not set + CONFIG_INET_AH=m + CONFIG_INET_ESP=m + CONFIG_INET_ESP_OFFLOAD=m + CONFIG_INET_IPCOMP=m +-CONFIG_INET_XFRM_TUNNEL=m +-CONFIG_INET_TUNNEL=m + CONFIG_INET_XFRM_MODE_TRANSPORT=m + CONFIG_INET_XFRM_MODE_TUNNEL=m + CONFIG_INET_XFRM_MODE_BEET=m + CONFIG_INET_DIAG=m +-CONFIG_INET_TCP_DIAG=m + CONFIG_INET_UDP_DIAG=m + CONFIG_INET_RAW_DIAG=m +-# CONFIG_INET_DIAG_DESTROY is not set + CONFIG_TCP_CONG_ADVANCED=y +-CONFIG_TCP_CONG_BIC=m +-CONFIG_TCP_CONG_CUBIC=y +-CONFIG_TCP_CONG_WESTWOOD=m +-CONFIG_TCP_CONG_HTCP=m + CONFIG_TCP_CONG_HSTCP=m + CONFIG_TCP_CONG_HYBLA=m +-CONFIG_TCP_CONG_VEGAS=m + CONFIG_TCP_CONG_NV=m + CONFIG_TCP_CONG_SCALABLE=m + CONFIG_TCP_CONG_LP=m +@@ -1098,13 +224,8 @@ + CONFIG_TCP_CONG_YEAH=m + CONFIG_TCP_CONG_ILLINOIS=m + CONFIG_TCP_CONG_DCTCP=m +-# CONFIG_TCP_CONG_CDG is not set + CONFIG_TCP_CONG_BBR=m +-CONFIG_DEFAULT_CUBIC=y +-# CONFIG_DEFAULT_RENO is not set +-CONFIG_DEFAULT_TCP_CONG="cubic" + CONFIG_TCP_MD5SIG=y +-CONFIG_IPV6=y + CONFIG_IPV6_ROUTER_PREF=y + CONFIG_IPV6_ROUTE_INFO=y + CONFIG_IPV6_OPTIMISTIC_DAD=y +@@ -1113,9 +234,6 @@ + CONFIG_INET6_ESP_OFFLOAD=m + CONFIG_INET6_IPCOMP=m + CONFIG_IPV6_MIP6=m +-# CONFIG_IPV6_ILA is not set +-CONFIG_INET6_XFRM_TUNNEL=m +-CONFIG_INET6_TUNNEL=m + CONFIG_INET6_XFRM_MODE_TRANSPORT=m + CONFIG_INET6_XFRM_MODE_TUNNEL=m + CONFIG_INET6_XFRM_MODE_BEET=m +@@ -1123,56 +241,25 @@ + CONFIG_IPV6_VTI=m + CONFIG_IPV6_SIT=m + CONFIG_IPV6_SIT_6RD=y +-CONFIG_IPV6_NDISC_NODETYPE=y +-CONFIG_IPV6_TUNNEL=m + CONFIG_IPV6_GRE=m + CONFIG_IPV6_MULTIPLE_TABLES=y +-# CONFIG_IPV6_SUBTREES is not set + CONFIG_IPV6_MROUTE=y + CONFIG_IPV6_MROUTE_MULTIPLE_TABLES=y + CONFIG_IPV6_PIMSM_V2=y +-# CONFIG_IPV6_SEG6_LWTUNNEL is not set +-# CONFIG_IPV6_SEG6_HMAC is not set + CONFIG_NETLABEL=y +-CONFIG_NETWORK_SECMARK=y +-CONFIG_NET_PTP_CLASSIFY=y + CONFIG_NETWORK_PHY_TIMESTAMPING=y + CONFIG_NETFILTER=y +-CONFIG_NETFILTER_ADVANCED=y +-CONFIG_BRIDGE_NETFILTER=m +- +-# +-# Core Netfilter Configuration +-# +-CONFIG_NETFILTER_INGRESS=y +-CONFIG_NETFILTER_NETLINK=m +-CONFIG_NETFILTER_FAMILY_BRIDGE=y +-CONFIG_NETFILTER_FAMILY_ARP=y +-# CONFIG_NETFILTER_NETLINK_ACCT is not set +-CONFIG_NETFILTER_NETLINK_QUEUE=m +-CONFIG_NETFILTER_NETLINK_LOG=m +-CONFIG_NETFILTER_NETLINK_OSF=m + CONFIG_NF_CONNTRACK=m +-CONFIG_NF_LOG_COMMON=m + CONFIG_NF_LOG_NETDEV=m +-CONFIG_NETFILTER_CONNCOUNT=m +-CONFIG_NF_CONNTRACK_MARK=y + CONFIG_NF_CONNTRACK_SECMARK=y + CONFIG_NF_CONNTRACK_ZONES=y +-CONFIG_NF_CONNTRACK_PROCFS=y + CONFIG_NF_CONNTRACK_EVENTS=y + CONFIG_NF_CONNTRACK_TIMEOUT=y + CONFIG_NF_CONNTRACK_TIMESTAMP=y +-CONFIG_NF_CONNTRACK_LABELS=y +-CONFIG_NF_CT_PROTO_DCCP=y +-CONFIG_NF_CT_PROTO_GRE=m +-CONFIG_NF_CT_PROTO_SCTP=y +-CONFIG_NF_CT_PROTO_UDPLITE=y + CONFIG_NF_CONNTRACK_AMANDA=m + CONFIG_NF_CONNTRACK_FTP=m + CONFIG_NF_CONNTRACK_H323=m + CONFIG_NF_CONNTRACK_IRC=m +-CONFIG_NF_CONNTRACK_BROADCAST=m + CONFIG_NF_CONNTRACK_NETBIOS_NS=m + CONFIG_NF_CONNTRACK_SNMP=m + CONFIG_NF_CONNTRACK_PPTP=m +@@ -1183,18 +270,6 @@ + CONFIG_NF_CT_NETLINK_TIMEOUT=m + CONFIG_NF_CT_NETLINK_HELPER=m + CONFIG_NETFILTER_NETLINK_GLUE_CT=y +-CONFIG_NF_NAT=m +-CONFIG_NF_NAT_NEEDED=y +-CONFIG_NF_NAT_PROTO_DCCP=y +-CONFIG_NF_NAT_PROTO_UDPLITE=y +-CONFIG_NF_NAT_PROTO_SCTP=y +-CONFIG_NF_NAT_AMANDA=m +-CONFIG_NF_NAT_FTP=m +-CONFIG_NF_NAT_IRC=m +-CONFIG_NF_NAT_SIP=m +-CONFIG_NF_NAT_TFTP=m +-CONFIG_NF_NAT_REDIRECT=y +-CONFIG_NETFILTER_SYNPROXY=m + CONFIG_NF_TABLES=m + CONFIG_NF_TABLES_SET=m + CONFIG_NF_TABLES_INET=y +@@ -1208,66 +283,37 @@ + CONFIG_NFT_MASQ=m + CONFIG_NFT_REDIR=m + CONFIG_NFT_NAT=m +-# CONFIG_NFT_TUNNEL is not set + CONFIG_NFT_OBJREF=m + CONFIG_NFT_QUEUE=m + CONFIG_NFT_QUOTA=m + CONFIG_NFT_REJECT=m +-CONFIG_NFT_REJECT_INET=m + CONFIG_NFT_COMPAT=m + CONFIG_NFT_HASH=m +-CONFIG_NFT_FIB=m + CONFIG_NFT_FIB_INET=m +-# CONFIG_NFT_SOCKET is not set +-# CONFIG_NFT_OSF is not set +-# CONFIG_NFT_TPROXY is not set +-CONFIG_NF_DUP_NETDEV=m + CONFIG_NFT_DUP_NETDEV=m + CONFIG_NFT_FWD_NETDEV=m + CONFIG_NFT_FIB_NETDEV=m +-# CONFIG_NF_FLOW_TABLE is not set + CONFIG_NETFILTER_XTABLES=y +- +-# +-# Xtables combined modules +-# +-CONFIG_NETFILTER_XT_MARK=m +-CONFIG_NETFILTER_XT_CONNMARK=m + CONFIG_NETFILTER_XT_SET=m +- +-# +-# Xtables targets +-# + CONFIG_NETFILTER_XT_TARGET_AUDIT=m + CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m + CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m + CONFIG_NETFILTER_XT_TARGET_CONNMARK=m + CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m +-CONFIG_NETFILTER_XT_TARGET_CT=m + CONFIG_NETFILTER_XT_TARGET_DSCP=m +-CONFIG_NETFILTER_XT_TARGET_HL=m + CONFIG_NETFILTER_XT_TARGET_HMARK=m + CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m +-# CONFIG_NETFILTER_XT_TARGET_LED is not set + CONFIG_NETFILTER_XT_TARGET_LOG=m + CONFIG_NETFILTER_XT_TARGET_MARK=m +-CONFIG_NETFILTER_XT_NAT=m +-CONFIG_NETFILTER_XT_TARGET_NETMAP=m + CONFIG_NETFILTER_XT_TARGET_NFLOG=m + CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m + CONFIG_NETFILTER_XT_TARGET_NOTRACK=m +-CONFIG_NETFILTER_XT_TARGET_RATEEST=m +-CONFIG_NETFILTER_XT_TARGET_REDIRECT=m + CONFIG_NETFILTER_XT_TARGET_TEE=m + CONFIG_NETFILTER_XT_TARGET_TPROXY=m + CONFIG_NETFILTER_XT_TARGET_TRACE=m + CONFIG_NETFILTER_XT_TARGET_SECMARK=m + CONFIG_NETFILTER_XT_TARGET_TCPMSS=m + CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m +- +-# +-# Xtables matches +-# + CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m + CONFIG_NETFILTER_XT_MATCH_BPF=m + CONFIG_NETFILTER_XT_MATCH_CGROUP=m +@@ -1282,12 +328,9 @@ + CONFIG_NETFILTER_XT_MATCH_DCCP=m + CONFIG_NETFILTER_XT_MATCH_DEVGROUP=m + CONFIG_NETFILTER_XT_MATCH_DSCP=m +-CONFIG_NETFILTER_XT_MATCH_ECN=m + CONFIG_NETFILTER_XT_MATCH_ESP=m + CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m + CONFIG_NETFILTER_XT_MATCH_HELPER=m +-CONFIG_NETFILTER_XT_MATCH_HL=m +-# CONFIG_NETFILTER_XT_MATCH_IPCOMP is not set + CONFIG_NETFILTER_XT_MATCH_IPRANGE=m + CONFIG_NETFILTER_XT_MATCH_IPVS=m + # CONFIG_NETFILTER_XT_MATCH_L2TP is not set +@@ -1296,7 +339,6 @@ + CONFIG_NETFILTER_XT_MATCH_MAC=m + CONFIG_NETFILTER_XT_MATCH_MARK=m + CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m +-# CONFIG_NETFILTER_XT_MATCH_NFACCT is not set + CONFIG_NETFILTER_XT_MATCH_OSF=m + CONFIG_NETFILTER_XT_MATCH_OWNER=m + CONFIG_NETFILTER_XT_MATCH_POLICY=m +@@ -1306,16 +348,12 @@ + CONFIG_NETFILTER_XT_MATCH_RATEEST=m + CONFIG_NETFILTER_XT_MATCH_REALM=m + CONFIG_NETFILTER_XT_MATCH_RECENT=m +-CONFIG_NETFILTER_XT_MATCH_SCTP=m + CONFIG_NETFILTER_XT_MATCH_SOCKET=m + CONFIG_NETFILTER_XT_MATCH_STATE=m + CONFIG_NETFILTER_XT_MATCH_STATISTIC=m + CONFIG_NETFILTER_XT_MATCH_STRING=m + CONFIG_NETFILTER_XT_MATCH_TCPMSS=m +-# CONFIG_NETFILTER_XT_MATCH_TIME is not set +-# CONFIG_NETFILTER_XT_MATCH_U32 is not set + CONFIG_IP_SET=m +-CONFIG_IP_SET_MAX=256 + CONFIG_IP_SET_BITMAP_IP=m + CONFIG_IP_SET_BITMAP_IPMAC=m + CONFIG_IP_SET_BITMAP_PORT=m +@@ -1334,22 +372,11 @@ + CONFIG_IP_SET_LIST_SET=m + CONFIG_IP_VS=m + CONFIG_IP_VS_IPV6=y +-# CONFIG_IP_VS_DEBUG is not set +-CONFIG_IP_VS_TAB_BITS=12 +- +-# +-# IPVS transport protocol load balancing support +-# + CONFIG_IP_VS_PROTO_TCP=y + CONFIG_IP_VS_PROTO_UDP=y +-CONFIG_IP_VS_PROTO_AH_ESP=y + CONFIG_IP_VS_PROTO_ESP=y + CONFIG_IP_VS_PROTO_AH=y + CONFIG_IP_VS_PROTO_SCTP=y +- +-# +-# IPVS scheduler +-# + CONFIG_IP_VS_RR=m + CONFIG_IP_VS_WRR=m + CONFIG_IP_VS_LC=m +@@ -1360,52 +387,18 @@ + CONFIG_IP_VS_LBLCR=m + CONFIG_IP_VS_DH=m + CONFIG_IP_VS_SH=m +-# CONFIG_IP_VS_MH is not set + CONFIG_IP_VS_SED=m + CONFIG_IP_VS_NQ=m +- +-# +-# IPVS SH scheduler +-# +-CONFIG_IP_VS_SH_TAB_BITS=8 +- +-# +-# IPVS MH scheduler +-# +-CONFIG_IP_VS_MH_TAB_INDEX=12 +- +-# +-# IPVS application helper +-# + CONFIG_IP_VS_FTP=m +-CONFIG_IP_VS_NFCT=y + CONFIG_IP_VS_PE_SIP=m +- +-# +-# IP: Netfilter Configuration +-# +-CONFIG_NF_DEFRAG_IPV4=m +-CONFIG_NF_SOCKET_IPV4=m +-CONFIG_NF_TPROXY_IPV4=m +-CONFIG_NF_TABLES_IPV4=y + CONFIG_NFT_CHAIN_ROUTE_IPV4=m +-CONFIG_NFT_REJECT_IPV4=m + CONFIG_NFT_DUP_IPV4=m + CONFIG_NFT_FIB_IPV4=m + CONFIG_NF_TABLES_ARP=y +-CONFIG_NF_DUP_IPV4=m + CONFIG_NF_LOG_ARP=m +-CONFIG_NF_LOG_IPV4=m +-CONFIG_NF_REJECT_IPV4=m +-CONFIG_NF_NAT_IPV4=m +-CONFIG_NF_NAT_MASQUERADE_IPV4=y + CONFIG_NFT_CHAIN_NAT_IPV4=m + CONFIG_NFT_MASQ_IPV4=m + CONFIG_NFT_REDIR_IPV4=m +-CONFIG_NF_NAT_SNMP_BASIC=m +-CONFIG_NF_NAT_PROTO_GRE=m +-CONFIG_NF_NAT_PPTP=m +-CONFIG_NF_NAT_H323=m + CONFIG_IP_NF_IPTABLES=m + CONFIG_IP_NF_MATCH_AH=m + CONFIG_IP_NF_MATCH_ECN=m +@@ -1419,7 +412,6 @@ + CONFIG_IP_NF_TARGET_NETMAP=m + CONFIG_IP_NF_TARGET_REDIRECT=m + CONFIG_IP_NF_MANGLE=m +-# CONFIG_IP_NF_TARGET_CLUSTERIP is not set + CONFIG_IP_NF_TARGET_ECN=m + CONFIG_IP_NF_TARGET_TTL=m + CONFIG_IP_NF_RAW=m +@@ -1427,26 +419,12 @@ + CONFIG_IP_NF_ARPTABLES=m + CONFIG_IP_NF_ARPFILTER=m + CONFIG_IP_NF_ARP_MANGLE=m +- +-# +-# IPv6: Netfilter Configuration +-# +-CONFIG_NF_SOCKET_IPV6=m +-CONFIG_NF_TPROXY_IPV6=m +-CONFIG_NF_TABLES_IPV6=y + CONFIG_NFT_CHAIN_ROUTE_IPV6=m + CONFIG_NFT_CHAIN_NAT_IPV6=m + CONFIG_NFT_MASQ_IPV6=m + CONFIG_NFT_REDIR_IPV6=m +-CONFIG_NFT_REJECT_IPV6=m + CONFIG_NFT_DUP_IPV6=m + CONFIG_NFT_FIB_IPV6=m +-CONFIG_NF_DUP_IPV6=m +-CONFIG_NF_REJECT_IPV6=m +-CONFIG_NF_LOG_IPV6=m +-CONFIG_NF_NAT_IPV6=m +-CONFIG_NF_NAT_MASQUERADE_IPV6=y +-CONFIG_IP6_NF_IPTABLES=m + CONFIG_IP6_NF_MATCH_AH=m + CONFIG_IP6_NF_MATCH_EUI64=m + CONFIG_IP6_NF_MATCH_FRAG=m +@@ -1456,8 +434,6 @@ + CONFIG_IP6_NF_MATCH_MH=m + CONFIG_IP6_NF_MATCH_RPFILTER=m + CONFIG_IP6_NF_MATCH_RT=m +-# CONFIG_IP6_NF_MATCH_SRH is not set +-# CONFIG_IP6_NF_TARGET_HL is not set + CONFIG_IP6_NF_FILTER=m + CONFIG_IP6_NF_TARGET_REJECT=m + CONFIG_IP6_NF_TARGET_SYNPROXY=m +@@ -1467,7 +443,6 @@ + CONFIG_IP6_NF_NAT=m + CONFIG_IP6_NF_TARGET_MASQUERADE=m + CONFIG_IP6_NF_TARGET_NPT=m +-CONFIG_NF_DEFRAG_IPV6=m + CONFIG_NF_TABLES_BRIDGE=y + CONFIG_NFT_BRIDGE_REJECT=m + CONFIG_NF_LOG_BRIDGE=m +@@ -1492,64 +467,30 @@ + CONFIG_BRIDGE_EBT_SNAT=m + CONFIG_BRIDGE_EBT_LOG=m + CONFIG_BRIDGE_EBT_NFLOG=m +-# CONFIG_BPFILTER is not set +-# CONFIG_IP_DCCP is not set +-CONFIG_IP_SCTP=m +-# CONFIG_SCTP_DBG_OBJCNT is not set +-# CONFIG_SCTP_DEFAULT_COOKIE_HMAC_MD5 is not set + CONFIG_SCTP_DEFAULT_COOKIE_HMAC_SHA1=y +-# CONFIG_SCTP_DEFAULT_COOKIE_HMAC_NONE is not set + CONFIG_SCTP_COOKIE_HMAC_MD5=y +-CONFIG_SCTP_COOKIE_HMAC_SHA1=y +-CONFIG_INET_SCTP_DIAG=m +-# CONFIG_RDS is not set + CONFIG_TIPC=m + CONFIG_TIPC_MEDIA_IB=y +-CONFIG_TIPC_MEDIA_UDP=y +-CONFIG_TIPC_DIAG=m + CONFIG_ATM=m + CONFIG_ATM_CLIP=m +-# CONFIG_ATM_CLIP_NO_ICMP is not set + CONFIG_ATM_LANE=m +-# CONFIG_ATM_MPOA is not set + CONFIG_ATM_BR2684=m +-# CONFIG_ATM_BR2684_IPFILTER is not set + CONFIG_L2TP=m + CONFIG_L2TP_DEBUGFS=m + CONFIG_L2TP_V3=y + CONFIG_L2TP_IP=m + CONFIG_L2TP_ETH=m +-CONFIG_STP=m +-CONFIG_GARP=m +-CONFIG_MRP=m + CONFIG_BRIDGE=m +-CONFIG_BRIDGE_IGMP_SNOOPING=y + CONFIG_BRIDGE_VLAN_FILTERING=y +-CONFIG_HAVE_NET_DSA=y +-# CONFIG_NET_DSA is not set + CONFIG_VLAN_8021Q=m + CONFIG_VLAN_8021Q_GVRP=y + CONFIG_VLAN_8021Q_MVRP=y +-# CONFIG_DECNET is not set +-CONFIG_LLC=m +-# CONFIG_LLC2 is not set +-# CONFIG_ATALK is not set +-# CONFIG_X25 is not set +-# CONFIG_LAPB is not set +-# CONFIG_PHONET is not set + CONFIG_6LOWPAN=m +-# CONFIG_6LOWPAN_DEBUGFS is not set + # CONFIG_6LOWPAN_NHC is not set + CONFIG_IEEE802154=m +-# CONFIG_IEEE802154_NL802154_EXPERIMENTAL is not set +-CONFIG_IEEE802154_SOCKET=m + CONFIG_IEEE802154_6LOWPAN=m + CONFIG_MAC802154=m + CONFIG_NET_SCHED=y +- +-# +-# Queueing/Scheduling +-# + CONFIG_NET_SCH_CBQ=m + CONFIG_NET_SCH_HTB=m + CONFIG_NET_SCH_HFSC=m +@@ -1561,36 +502,22 @@ + CONFIG_NET_SCH_SFQ=m + CONFIG_NET_SCH_TEQL=m + CONFIG_NET_SCH_TBF=m +-# CONFIG_NET_SCH_CBS is not set +-# CONFIG_NET_SCH_ETF is not set + CONFIG_NET_SCH_GRED=m + CONFIG_NET_SCH_DSMARK=m + CONFIG_NET_SCH_NETEM=m + CONFIG_NET_SCH_DRR=m + CONFIG_NET_SCH_MQPRIO=m +-# CONFIG_NET_SCH_SKBPRIO is not set + CONFIG_NET_SCH_CHOKE=m + CONFIG_NET_SCH_QFQ=m + CONFIG_NET_SCH_CODEL=m + CONFIG_NET_SCH_FQ_CODEL=y +-# CONFIG_NET_SCH_CAKE is not set + CONFIG_NET_SCH_FQ=m + CONFIG_NET_SCH_HHF=m + CONFIG_NET_SCH_PIE=m + CONFIG_NET_SCH_INGRESS=m + CONFIG_NET_SCH_PLUG=m + CONFIG_NET_SCH_DEFAULT=y +-# CONFIG_DEFAULT_FQ is not set +-# CONFIG_DEFAULT_CODEL is not set + CONFIG_DEFAULT_FQ_CODEL=y +-# CONFIG_DEFAULT_SFQ is not set +-# CONFIG_DEFAULT_PFIFO_FAST is not set +-CONFIG_DEFAULT_NET_SCH="fq_codel" +- +-# +-# Classification +-# +-CONFIG_NET_CLS=y + CONFIG_NET_CLS_BASIC=m + CONFIG_NET_CLS_TCINDEX=m + CONFIG_NET_CLS_ROUTE4=m +@@ -1606,22 +533,18 @@ + CONFIG_NET_CLS_FLOWER=m + CONFIG_NET_CLS_MATCHALL=m + CONFIG_NET_EMATCH=y +-CONFIG_NET_EMATCH_STACK=32 + CONFIG_NET_EMATCH_CMP=m + CONFIG_NET_EMATCH_NBYTE=m + CONFIG_NET_EMATCH_U32=m + CONFIG_NET_EMATCH_META=m + CONFIG_NET_EMATCH_TEXT=m +-# CONFIG_NET_EMATCH_CANID is not set + CONFIG_NET_EMATCH_IPSET=m +-# CONFIG_NET_EMATCH_IPT is not set + CONFIG_NET_CLS_ACT=y + CONFIG_NET_ACT_POLICE=m + CONFIG_NET_ACT_GACT=m + CONFIG_GACT_PROB=y + CONFIG_NET_ACT_MIRRED=m + CONFIG_NET_ACT_SAMPLE=m +-# CONFIG_NET_ACT_IPT is not set + CONFIG_NET_ACT_NAT=m + CONFIG_NET_ACT_PEDIT=m + CONFIG_NET_ACT_SIMP=m +@@ -1629,105 +552,47 @@ + CONFIG_NET_ACT_CSUM=m + CONFIG_NET_ACT_VLAN=m + CONFIG_NET_ACT_BPF=m +-# CONFIG_NET_ACT_CONNMARK is not set + CONFIG_NET_ACT_SKBMOD=m +-# CONFIG_NET_ACT_IFE is not set + CONFIG_NET_ACT_TUNNEL_KEY=m + CONFIG_NET_CLS_IND=y +-CONFIG_NET_SCH_FIFO=y + CONFIG_DCB=y +-CONFIG_DNS_RESOLVER=m +-# CONFIG_BATMAN_ADV is not set + CONFIG_OPENVSWITCH=m +-CONFIG_OPENVSWITCH_GRE=m +-CONFIG_OPENVSWITCH_VXLAN=m +-CONFIG_OPENVSWITCH_GENEVE=m + CONFIG_VSOCKETS=m +-CONFIG_VSOCKETS_DIAG=m + CONFIG_VMWARE_VMCI_VSOCKETS=m + CONFIG_VIRTIO_VSOCKETS=m +-CONFIG_VIRTIO_VSOCKETS_COMMON=m +-CONFIG_HYPERV_VSOCKETS=m + CONFIG_NETLINK_DIAG=m +-CONFIG_MPLS=y + CONFIG_NET_MPLS_GSO=y + CONFIG_MPLS_ROUTING=m + CONFIG_MPLS_IPTUNNEL=m + CONFIG_NET_NSH=y +-# CONFIG_HSR is not set ++CONFIG_HSR=m + CONFIG_NET_SWITCHDEV=y +-CONFIG_NET_L3_MASTER_DEV=y +-# CONFIG_NET_NCSI is not set +-CONFIG_RPS=y +-CONFIG_RFS_ACCEL=y +-CONFIG_XPS=y + CONFIG_CGROUP_NET_PRIO=y +-CONFIG_CGROUP_NET_CLASSID=y +-CONFIG_NET_RX_BUSY_POLL=y +-CONFIG_BQL=y + CONFIG_BPF_JIT=y + CONFIG_BPF_STREAM_PARSER=y +-CONFIG_NET_FLOW_LIMIT=y +- +-# +-# Network testing +-# + CONFIG_NET_PKTGEN=m + CONFIG_NET_DROP_MONITOR=y +-# CONFIG_HAMRADIO is not set + CONFIG_CAN=m +-CONFIG_CAN_RAW=m +-CONFIG_CAN_BCM=m +-CONFIG_CAN_GW=m +-# CONFIG_CAN_J1939 is not set +- +-# +-# CAN Device Drivers +-# + CONFIG_CAN_VCAN=m +-# CONFIG_CAN_VXCAN is not set + CONFIG_CAN_SLCAN=m +-CONFIG_CAN_DEV=m +-CONFIG_CAN_CALC_BITTIMING=y + CONFIG_CAN_C_CAN=m + CONFIG_CAN_C_CAN_PLATFORM=m + CONFIG_CAN_C_CAN_PCI=m + CONFIG_CAN_CC770=m +-# CONFIG_CAN_CC770_ISA is not set + CONFIG_CAN_CC770_PLATFORM=m +-# CONFIG_CAN_IFI_CANFD is not set +-# CONFIG_CAN_M_CAN is not set +-# CONFIG_CAN_PEAK_PCIEFD is not set + CONFIG_CAN_SJA1000=m +-# CONFIG_CAN_SJA1000_ISA is not set + CONFIG_CAN_SJA1000_PLATFORM=m + CONFIG_CAN_EMS_PCI=m + CONFIG_CAN_PEAK_PCI=m +-CONFIG_CAN_PEAK_PCIEC=y + CONFIG_CAN_KVASER_PCI=m + CONFIG_CAN_PLX_PCI=m + CONFIG_CAN_SOFTING=m +- +-# +-# CAN SPI interfaces +-# +-# CONFIG_CAN_HI311X is not set +-# CONFIG_CAN_MCP251X is not set +- +-# +-# CAN USB interfaces +-# + CONFIG_CAN_8DEV_USB=m + CONFIG_CAN_EMS_USB=m + CONFIG_CAN_ESD_USB2=m +-# CONFIG_CAN_GS_USB is not set + CONFIG_CAN_KVASER_USB=m +-# CONFIG_CAN_MCBA_USB is not set + CONFIG_CAN_PEAK_USB=m +-# CONFIG_CAN_UCAN is not set +-# CONFIG_CAN_DEBUG_DEVICES is not set + CONFIG_BT=m +-CONFIG_BT_BREDR=y + CONFIG_BT_RFCOMM=m + CONFIG_BT_RFCOMM_TTY=y + CONFIG_BT_BNEP=m +@@ -1736,30 +601,12 @@ + CONFIG_BT_CMTP=m + CONFIG_BT_HIDP=m + CONFIG_BT_HS=y +-CONFIG_BT_LE=y +-# CONFIG_BT_6LOWPAN is not set +-# CONFIG_BT_LEDS is not set +-# CONFIG_BT_SELFTEST is not set +-CONFIG_BT_DEBUGFS=y +- +-# +-# Bluetooth device drivers +-# +-CONFIG_BT_INTEL=m +-CONFIG_BT_BCM=m +-CONFIG_BT_RTL=m + CONFIG_BT_HCIBTUSB=m + CONFIG_BT_HCIBTUSB_AUTOSUSPEND=y +-CONFIG_BT_HCIBTUSB_BCM=y +-CONFIG_BT_HCIBTUSB_RTL=y + CONFIG_BT_HCIBTSDIO=m + CONFIG_BT_HCIUART=m +-CONFIG_BT_HCIUART_H4=y + CONFIG_BT_HCIUART_BCSP=y + CONFIG_BT_HCIUART_ATH3K=y +-# CONFIG_BT_HCIUART_INTEL is not set +-# CONFIG_BT_HCIUART_AG6XX is not set +-# CONFIG_BT_HCIUART_MRVL is not set + CONFIG_BT_HCIBCM203X=m + CONFIG_BT_HCIBPA10X=m + CONFIG_BT_HCIBFUSB=m +@@ -1767,232 +614,38 @@ + CONFIG_BT_MRVL=m + CONFIG_BT_MRVL_SDIO=m + CONFIG_BT_ATH3K=m +-# CONFIG_AF_RXRPC is not set +-# CONFIG_AF_KCM is not set +-CONFIG_STREAM_PARSER=y +-CONFIG_FIB_RULES=y +-CONFIG_WIRELESS=y + CONFIG_CFG80211=m +-# CONFIG_NL80211_TESTMODE is not set +-# CONFIG_CFG80211_DEVELOPER_WARNINGS is not set +-CONFIG_CFG80211_REQUIRE_SIGNED_REGDB=y +-CONFIG_CFG80211_USE_KERNEL_REGDB_KEYS=y +-CONFIG_CFG80211_DEFAULT_PS=y +-# CONFIG_CFG80211_DEBUGFS is not set +-CONFIG_CFG80211_CRDA_SUPPORT=y +-# CONFIG_CFG80211_WEXT is not set + CONFIG_MAC80211=m +-CONFIG_MAC80211_HAS_RC=y +-CONFIG_MAC80211_RC_MINSTREL=y +-CONFIG_MAC80211_RC_MINSTREL_HT=y +-CONFIG_MAC80211_RC_DEFAULT_MINSTREL=y +-CONFIG_MAC80211_RC_DEFAULT="minstrel_ht" +-# CONFIG_MAC80211_MESH is not set +-CONFIG_MAC80211_LEDS=y +-CONFIG_MAC80211_DEBUGFS=y +-# CONFIG_MAC80211_MESSAGE_TRACING is not set +-# CONFIG_MAC80211_DEBUG_MENU is not set +-CONFIG_MAC80211_STA_HASH_MAX_SIZE=0 +-# CONFIG_WIMAX is not set + CONFIG_RFKILL=m +-CONFIG_RFKILL_LEDS=y +-CONFIG_RFKILL_INPUT=y +-# CONFIG_RFKILL_GPIO is not set +-# CONFIG_NET_9P is not set +-# CONFIG_CAIF is not set +-CONFIG_CEPH_LIB=m +-# CONFIG_CEPH_LIB_PRETTYDEBUG is not set + CONFIG_CEPH_LIB_USE_DNS_RESOLVER=y +-# CONFIG_NFC is not set +-CONFIG_PSAMPLE=m +-# CONFIG_NET_IFE is not set + CONFIG_LWTUNNEL=y +-CONFIG_LWTUNNEL_BPF=y +-CONFIG_DST_CACHE=y +-CONFIG_GRO_CELLS=y +-CONFIG_SOCK_VALIDATE_XMIT=y +-CONFIG_NET_DEVLINK=y +-CONFIG_MAY_USE_DEVLINK=y +-CONFIG_PAGE_POOL=y +-CONFIG_FAILOVER=m +-CONFIG_HAVE_EBPF_JIT=y +- +-# +-# Device Drivers +-# +- +-# +-# Generic Driver Options +-# ++CONFIG_NET_DEVLINK=m + # CONFIG_UEVENT_HELPER is not set + CONFIG_DEVTMPFS=y + CONFIG_DEVTMPFS_MOUNT=y +-CONFIG_STANDALONE=y +-CONFIG_PREVENT_FIRMWARE_BUILD=y +- +-# +-# Firmware loader +-# +-CONFIG_FW_LOADER=y +-CONFIG_EXTRA_FIRMWARE="" +-CONFIG_FW_LOADER_USER_HELPER=y +-# CONFIG_FW_LOADER_USER_HELPER_FALLBACK is not set +-CONFIG_WANT_DEV_COREDUMP=y +-CONFIG_ALLOW_DEV_COREDUMP=y +-CONFIG_DEV_COREDUMP=y +-# CONFIG_DEBUG_DRIVER is not set +-# CONFIG_DEBUG_DEVRES is not set +-# CONFIG_DEBUG_TEST_DRIVER_REMOVE is not set +-# CONFIG_TEST_ASYNC_DRIVER_PROBE is not set +-CONFIG_SYS_HYPERVISOR=y +-CONFIG_GENERIC_CPU_AUTOPROBE=y +-CONFIG_GENERIC_CPU_VULNERABILITIES=y +-CONFIG_REGMAP=y +-CONFIG_REGMAP_I2C=y +-CONFIG_REGMAP_SPI=y +-CONFIG_DMA_SHARED_BUFFER=y +-# CONFIG_DMA_FENCE_TRACE is not set +- +-# +-# Bus devices +-# + CONFIG_CONNECTOR=y +-CONFIG_PROC_EVENTS=y +-# CONFIG_GNSS is not set + CONFIG_MTD=m +-# CONFIG_MTD_TESTS is not set +-# CONFIG_MTD_REDBOOT_PARTS is not set +-# CONFIG_MTD_CMDLINE_PARTS is not set +-# CONFIG_MTD_AR7_PARTS is not set +- +-# +-# Partition parsers +-# +- +-# +-# User Modules And Translation Layers +-# +-CONFIG_MTD_BLKDEVS=m + CONFIG_MTD_BLOCK=m +-# CONFIG_MTD_BLOCK_RO is not set +-# CONFIG_FTL is not set +-# CONFIG_NFTL is not set +-# CONFIG_INFTL is not set +-# CONFIG_RFD_FTL is not set +-# CONFIG_SSFDC is not set +-# CONFIG_SM_FTL is not set +-# CONFIG_MTD_OOPS is not set +-# CONFIG_MTD_SWAP is not set +-# CONFIG_MTD_PARTITIONED_MASTER is not set +- +-# +-# RAM/ROM/Flash chip drivers +-# +-# CONFIG_MTD_CFI is not set +-# CONFIG_MTD_JEDECPROBE is not set +-CONFIG_MTD_MAP_BANK_WIDTH_1=y +-CONFIG_MTD_MAP_BANK_WIDTH_2=y +-CONFIG_MTD_MAP_BANK_WIDTH_4=y +-CONFIG_MTD_CFI_I1=y +-CONFIG_MTD_CFI_I2=y +-# CONFIG_MTD_RAM is not set +-# CONFIG_MTD_ROM is not set +-# CONFIG_MTD_ABSENT is not set +- +-# +-# Mapping drivers for chip access +-# +-# CONFIG_MTD_COMPLEX_MAPPINGS is not set +-# CONFIG_MTD_INTEL_VR_NOR is not set +-# CONFIG_MTD_PLATRAM is not set +- +-# +-# Self-contained MTD device drivers +-# +-# CONFIG_MTD_PMC551 is not set +-# CONFIG_MTD_DATAFLASH is not set +-# CONFIG_MTD_MCHP23K256 is not set +-# CONFIG_MTD_SST25L is not set +-# CONFIG_MTD_SLRAM is not set +-# CONFIG_MTD_PHRAM is not set +-# CONFIG_MTD_MTDRAM is not set +-# CONFIG_MTD_BLOCK2MTD is not set +- +-# +-# Disk-On-Chip Device Drivers +-# +-# CONFIG_MTD_DOCG3 is not set +-# CONFIG_MTD_ONENAND is not set +-# CONFIG_MTD_NAND is not set +-# CONFIG_MTD_SPI_NAND is not set +- +-# +-# LPDDR & LPDDR2 PCM memory drivers +-# +-# CONFIG_MTD_LPDDR is not set +-# CONFIG_MTD_SPI_NOR is not set + CONFIG_MTD_UBI=m +-CONFIG_MTD_UBI_WL_THRESHOLD=4096 +-CONFIG_MTD_UBI_BEB_LIMIT=20 +-# CONFIG_MTD_UBI_FASTMAP is not set +-# CONFIG_MTD_UBI_GLUEBI is not set +-# CONFIG_MTD_UBI_BLOCK is not set +-CONFIG_MTD_HISILICON_SFC=m +-# CONFIG_OF is not set +-CONFIG_ARCH_MIGHT_HAVE_PC_PARPORT=y + CONFIG_PARPORT=m + CONFIG_PARPORT_PC=m + CONFIG_PARPORT_SERIAL=m +-# CONFIG_PARPORT_PC_FIFO is not set +-# CONFIG_PARPORT_PC_SUPERIO is not set +-# CONFIG_PARPORT_AX88796 is not set + CONFIG_PARPORT_1284=y +-CONFIG_PARPORT_NOT_PC=y +-CONFIG_PNP=y + # CONFIG_PNP_DEBUG_MESSAGES is not set +- +-# +-# Protocols +-# +-CONFIG_PNPACPI=y +-CONFIG_BLK_DEV=y + CONFIG_BLK_DEV_NULL_BLK=m +-# CONFIG_BLK_DEV_FD is not set +-CONFIG_CDROM=m +-# CONFIG_PARIDE is not set +-# CONFIG_BLK_DEV_PCIESSD_MTIP32XX is not set ++CONFIG_BLK_DEV_FD=m + CONFIG_ZRAM=m + CONFIG_ZRAM_WRITEBACK=y +-# CONFIG_ZRAM_MEMORY_TRACKING is not set +-# CONFIG_BLK_DEV_DAC960 is not set +-# CONFIG_BLK_DEV_UMEM is not set + CONFIG_BLK_DEV_LOOP=m + CONFIG_BLK_DEV_LOOP_MIN_COUNT=0 +-# CONFIG_BLK_DEV_CRYPTOLOOP is not set +-# CONFIG_BLK_DEV_DRBD is not set + CONFIG_BLK_DEV_NBD=m +-# CONFIG_BLK_DEV_SKD is not set +-# CONFIG_BLK_DEV_SX8 is not set + CONFIG_BLK_DEV_RAM=m +-CONFIG_BLK_DEV_RAM_COUNT=16 + CONFIG_BLK_DEV_RAM_SIZE=16384 + CONFIG_CDROM_PKTCDVD=m +-CONFIG_CDROM_PKTCDVD_BUFFERS=8 +-# CONFIG_CDROM_PKTCDVD_WCACHE is not set +-# CONFIG_ATA_OVER_ETH is not set +-CONFIG_XEN_BLKDEV_FRONTEND=m + CONFIG_VIRTIO_BLK=m +-# CONFIG_VIRTIO_BLK_SCSI is not set + CONFIG_BLK_DEV_RBD=m +-# CONFIG_BLK_DEV_RSXX is not set +- +-# +-# NVME Support +-# +-CONFIG_NVME_CORE=m + CONFIG_BLK_DEV_NVME=m + CONFIG_NVME_MULTIPATH=y +-CONFIG_NVME_FABRICS=m + CONFIG_NVME_RDMA=m + CONFIG_NVME_FC=m + CONFIG_NVME_TARGET=m +@@ -2000,127 +653,26 @@ + CONFIG_NVME_TARGET_RDMA=m + CONFIG_NVME_TARGET_FC=m + CONFIG_NVME_TARGET_FCLOOP=m +- +-# +-# Misc devices +-# +-CONFIG_SENSORS_LIS3LV02D=m +-# CONFIG_AD525X_DPOT is not set +-# CONFIG_DUMMY_IRQ is not set +-# CONFIG_IBM_ASM is not set +-# CONFIG_PHANTOM is not set + CONFIG_SGI_IOC4=m +-CONFIG_TIFM_CORE=m +-CONFIG_TIFM_7XX1=m +-# CONFIG_ICS932S401 is not set + CONFIG_ENCLOSURE_SERVICES=m +-CONFIG_SGI_XP=m + CONFIG_HP_ILO=m +-CONFIG_SGI_GRU=m +-# CONFIG_SGI_GRU_DEBUG is not set + CONFIG_APDS9802ALS=m + CONFIG_ISL29003=m + CONFIG_ISL29020=m + CONFIG_SENSORS_TSL2550=m + CONFIG_SENSORS_BH1770=m + CONFIG_SENSORS_APDS990X=m +-# CONFIG_HMC6352 is not set +-# CONFIG_DS1682 is not set + CONFIG_VMWARE_BALLOON=m +-# CONFIG_USB_SWITCH_FSA9480 is not set +-# CONFIG_LATTICE_ECP3_CONFIG is not set +-# CONFIG_SRAM is not set +-# CONFIG_PCI_ENDPOINT_TEST is not set +-CONFIG_MISC_RTSX=m +-# CONFIG_C2PORT is not set +- +-# +-# EEPROM support +-# +-# CONFIG_EEPROM_AT24 is not set +-# CONFIG_EEPROM_AT25 is not set + CONFIG_EEPROM_LEGACY=m + CONFIG_EEPROM_MAX6875=m +-CONFIG_EEPROM_93CX6=m +-# CONFIG_EEPROM_93XX46 is not set +-# CONFIG_EEPROM_IDT_89HPESX is not set +-CONFIG_CB710_CORE=m +-# CONFIG_CB710_DEBUG is not set +-CONFIG_CB710_DEBUG_ASSUMPTIONS=y +- +-# +-# Texas Instruments shared transport line discipline +-# +-# CONFIG_TI_ST is not set + CONFIG_SENSORS_LIS3_I2C=m +-CONFIG_ALTERA_STAPL=m +-CONFIG_INTEL_MEI=m + CONFIG_INTEL_MEI_ME=m +-# CONFIG_INTEL_MEI_TXE is not set + CONFIG_VMWARE_VMCI=m +- +-# +-# Intel MIC & related support +-# +- +-# +-# Intel MIC Bus Driver +-# +-# CONFIG_INTEL_MIC_BUS is not set +- +-# +-# SCIF Bus Driver +-# +-# CONFIG_SCIF_BUS is not set +- +-# +-# VOP Bus Driver +-# +-# CONFIG_VOP_BUS is not set +- +-# +-# Intel MIC Host Driver +-# +- +-# +-# Intel MIC Card Driver +-# +- +-# +-# SCIF Driver +-# +- +-# +-# Intel MIC Coprocessor State Management (COSM) Drivers +-# +- +-# +-# VOP Driver +-# +-# CONFIG_GENWQE is not set +-# CONFIG_ECHO is not set + CONFIG_MISC_RTSX_PCI=m + CONFIG_MISC_RTSX_USB=m +-CONFIG_HAVE_IDE=y +-# CONFIG_IDE is not set +- +-# +-# SCSI device support +-# +-CONFIG_SCSI_MOD=y +-CONFIG_RAID_ATTRS=m + CONFIG_SCSI=y +-CONFIG_SCSI_DMA=y +-CONFIG_SCSI_NETLINK=y +-CONFIG_SCSI_MQ_DEFAULT=y +-CONFIG_SCSI_PROC_FS=y +- +-# +-# SCSI support type (disk, tape, CD-ROM) +-# + CONFIG_BLK_DEV_SD=m + CONFIG_CHR_DEV_ST=m +-# CONFIG_CHR_DEV_OSST is not set + CONFIG_BLK_DEV_SR=m + CONFIG_BLK_DEV_SR_VENDOR=y + CONFIG_CHR_DEV_SG=m +@@ -2129,88 +681,29 @@ + CONFIG_SCSI_CONSTANTS=y + CONFIG_SCSI_LOGGING=y + CONFIG_SCSI_SCAN_ASYNC=y +- +-# +-# SCSI Transports +-# +-CONFIG_SCSI_SPI_ATTRS=m + CONFIG_SCSI_FC_ATTRS=m +-CONFIG_SCSI_ISCSI_ATTRS=m +-CONFIG_SCSI_SAS_ATTRS=m + CONFIG_SCSI_SAS_LIBSAS=m + CONFIG_SCSI_SAS_ATA=y +-CONFIG_SCSI_SAS_HOST_SMP=y +-CONFIG_SCSI_SRP_ATTRS=m +-CONFIG_SCSI_LOWLEVEL=y + CONFIG_ISCSI_TCP=m +-CONFIG_ISCSI_BOOT_SYSFS=m +-# CONFIG_SCSI_CXGB3_ISCSI is not set + CONFIG_SCSI_CXGB4_ISCSI=m + CONFIG_SCSI_BNX2_ISCSI=m + CONFIG_SCSI_BNX2X_FCOE=m + CONFIG_BE2ISCSI=m +-# CONFIG_BLK_DEV_3W_XXXX_RAID is not set + CONFIG_SCSI_HPSA=m +-# CONFIG_SCSI_3W_9XXX is not set +-# CONFIG_SCSI_3W_SAS is not set +-# CONFIG_SCSI_ACARD is not set + CONFIG_SCSI_AACRAID=m +-# CONFIG_SCSI_AIC7XXX is not set +-# CONFIG_SCSI_AIC79XX is not set +-# CONFIG_SCSI_AIC94XX is not set +-# CONFIG_SCSI_MVSAS is not set +-# CONFIG_SCSI_MVUMI is not set +-# CONFIG_SCSI_DPT_I2O is not set +-# CONFIG_SCSI_ADVANSYS is not set +-# CONFIG_SCSI_ARCMSR is not set +-# CONFIG_SCSI_ESAS2R is not set +-# CONFIG_MEGARAID_NEWGEN is not set +-# CONFIG_MEGARAID_LEGACY is not set + CONFIG_MEGARAID_SAS=m +-CONFIG_SCSI_MPT3SAS=m +-CONFIG_SCSI_MPT2SAS_MAX_SGE=128 +-CONFIG_SCSI_MPT3SAS_MAX_SGE=128 + CONFIG_SCSI_MPT2SAS=m + CONFIG_SCSI_SMARTPQI=m +-# CONFIG_SCSI_UFSHCD is not set +-# CONFIG_SCSI_HPTIOP is not set +-# CONFIG_SCSI_BUSLOGIC is not set + CONFIG_VMWARE_PVSCSI=m +-# CONFIG_XEN_SCSI_FRONTEND is not set +-CONFIG_HYPERV_STORAGE=m + CONFIG_LIBFC=m + CONFIG_LIBFCOE=m +-CONFIG_FCOE=m + CONFIG_FCOE_FNIC=m +-# CONFIG_SCSI_SNIC is not set +-# CONFIG_SCSI_DMX3191D is not set +-# CONFIG_SCSI_GDTH is not set +-# CONFIG_SCSI_ISCI is not set +-# CONFIG_SCSI_IPS is not set +-# CONFIG_SCSI_INITIO is not set +-# CONFIG_SCSI_INIA100 is not set +-# CONFIG_SCSI_PPA is not set +-# CONFIG_SCSI_IMM is not set +-# CONFIG_SCSI_STEX is not set +-# CONFIG_SCSI_SYM53C8XX_2 is not set +-# CONFIG_SCSI_IPR is not set +-# CONFIG_SCSI_QLOGIC_1280 is not set + CONFIG_SCSI_QLA_FC=m +-# CONFIG_TCM_QLA2XXX is not set + CONFIG_SCSI_QLA_ISCSI=m + CONFIG_QEDI=m + CONFIG_QEDF=m + CONFIG_SCSI_LPFC=m +-# CONFIG_SCSI_LPFC_DEBUG_FS is not set +-CONFIG_SCSI_HUAWEI_FC=m +-CONFIG_SCSI_FC_HIFC=m +-# CONFIG_SCSI_DC395x is not set +-# CONFIG_SCSI_AM53C974 is not set +-# CONFIG_SCSI_WD719X is not set + CONFIG_SCSI_DEBUG=m +-# CONFIG_SCSI_PMCRAID is not set +-# CONFIG_SCSI_PM8001 is not set +-# CONFIG_SCSI_BFA_FC is not set + CONFIG_SCSI_VIRTIO=m + CONFIG_SCSI_CHELSIO_FCOE=m + CONFIG_SCSI_DH=y +@@ -2218,125 +711,28 @@ + CONFIG_SCSI_DH_HP_SW=y + CONFIG_SCSI_DH_EMC=y + CONFIG_SCSI_DH_ALUA=y +-# CONFIG_SCSI_OSD_INITIATOR is not set + CONFIG_ATA=m +-CONFIG_ATA_VERBOSE_ERROR=y +-CONFIG_ATA_ACPI=y +-# CONFIG_SATA_ZPODD is not set +-CONFIG_SATA_PMP=y +- +-# +-# Controllers with non-SFF native interface +-# + CONFIG_SATA_AHCI=m +-CONFIG_SATA_MOBILE_LPM_POLICY=0 + CONFIG_SATA_AHCI_PLATFORM=m +-# CONFIG_SATA_INIC162X is not set +-# CONFIG_SATA_ACARD_AHCI is not set +-# CONFIG_SATA_SIL24 is not set +-CONFIG_ATA_SFF=y +- +-# +-# SFF controllers with custom DMA interface +-# +-# CONFIG_PDC_ADMA is not set +-# CONFIG_SATA_QSTOR is not set +-# CONFIG_SATA_SX4 is not set +-CONFIG_ATA_BMDMA=y +- +-# +-# SATA SFF controllers with BMDMA +-# + CONFIG_ATA_PIIX=m +-# CONFIG_SATA_DWC is not set +-# CONFIG_SATA_MV is not set +-# CONFIG_SATA_NV is not set +-# CONFIG_SATA_PROMISE is not set +-# CONFIG_SATA_SIL is not set +-# CONFIG_SATA_SIS is not set +-# CONFIG_SATA_SVW is not set +-# CONFIG_SATA_ULI is not set +-# CONFIG_SATA_VIA is not set +-# CONFIG_SATA_VITESSE is not set +- +-# +-# PATA SFF controllers with BMDMA +-# +-# CONFIG_PATA_ALI is not set +-# CONFIG_PATA_AMD is not set +-# CONFIG_PATA_ARTOP is not set +-# CONFIG_PATA_ATIIXP is not set +-# CONFIG_PATA_ATP867X is not set +-# CONFIG_PATA_CMD64X is not set +-# CONFIG_PATA_CYPRESS is not set +-# CONFIG_PATA_EFAR is not set +-# CONFIG_PATA_HPT366 is not set +-# CONFIG_PATA_HPT37X is not set +-# CONFIG_PATA_HPT3X2N is not set +-# CONFIG_PATA_HPT3X3 is not set +-# CONFIG_PATA_IT8213 is not set +-# CONFIG_PATA_IT821X is not set +-# CONFIG_PATA_JMICRON is not set +-# CONFIG_PATA_MARVELL is not set +-# CONFIG_PATA_NETCELL is not set +-# CONFIG_PATA_NINJA32 is not set +-# CONFIG_PATA_NS87415 is not set +-# CONFIG_PATA_OLDPIIX is not set +-# CONFIG_PATA_OPTIDMA is not set +-# CONFIG_PATA_PDC2027X is not set +-# CONFIG_PATA_PDC_OLD is not set +-# CONFIG_PATA_RADISYS is not set +-# CONFIG_PATA_RDC is not set +-# CONFIG_PATA_SCH is not set +-# CONFIG_PATA_SERVERWORKS is not set +-# CONFIG_PATA_SIL680 is not set +-# CONFIG_PATA_SIS is not set +-# CONFIG_PATA_TOSHIBA is not set +-# CONFIG_PATA_TRIFLEX is not set +-# CONFIG_PATA_VIA is not set +-# CONFIG_PATA_WINBOND is not set +- +-# +-# PIO-only SFF controllers +-# +-# CONFIG_PATA_CMD640_PCI is not set +-# CONFIG_PATA_MPIIX is not set +-# CONFIG_PATA_NS87410 is not set +-# CONFIG_PATA_OPTI is not set +-# CONFIG_PATA_RZ1000 is not set +- +-# +-# Generic fallback / legacy drivers +-# +-# CONFIG_PATA_ACPI is not set ++CONFIG_PATA_ALI=m ++CONFIG_PATA_AMD=m ++CONFIG_PATA_ARTOP=m ++CONFIG_PATA_ATIIXP=m ++CONFIG_PATA_ATP867X=m ++CONFIG_PATA_CMD64X=m ++CONFIG_PATA_PCMCIA=m + CONFIG_ATA_GENERIC=m +-# CONFIG_PATA_LEGACY is not set + CONFIG_MD=y + CONFIG_BLK_DEV_MD=y +-CONFIG_MD_AUTODETECT=y + CONFIG_MD_LINEAR=m +-CONFIG_MD_RAID0=m +-CONFIG_MD_RAID1=m +-CONFIG_MD_RAID10=m +-CONFIG_MD_RAID456=m +-# CONFIG_MD_MULTIPATH is not set + CONFIG_MD_FAULTY=m +-# CONFIG_MD_CLUSTER is not set +-# CONFIG_BCACHE is not set +-CONFIG_BLK_DEV_DM_BUILTIN=y + CONFIG_BLK_DEV_DM=m +-# CONFIG_DM_MQ_DEFAULT is not set + CONFIG_DM_DEBUG=y +-CONFIG_DM_BUFIO=m +-# CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING is not set +-CONFIG_DM_BIO_PRISON=m +-CONFIG_DM_PERSISTENT_DATA=m +-# CONFIG_DM_UNSTRIPED is not set + CONFIG_DM_CRYPT=m + CONFIG_DM_SNAPSHOT=m + CONFIG_DM_THIN_PROVISIONING=m + CONFIG_DM_CACHE=m +-CONFIG_DM_CACHE_SMQ=m + CONFIG_DM_WRITECACHE=m + CONFIG_DM_ERA=m + CONFIG_DM_MIRROR=m +@@ -2350,7 +746,6 @@ + CONFIG_DM_UEVENT=y + CONFIG_DM_FLAKEY=m + CONFIG_DM_VERITY=m +-# CONFIG_DM_VERITY_FEC is not set + CONFIG_DM_SWITCH=m + CONFIG_DM_LOG_WRITES=m + CONFIG_DM_INTEGRITY=m +@@ -2360,34 +755,22 @@ + CONFIG_TCM_PSCSI=m + CONFIG_TCM_USER2=m + CONFIG_LOOPBACK_TARGET=m +-# CONFIG_TCM_FC is not set ++CONFIG_TCM_FC=m + CONFIG_ISCSI_TARGET=m + CONFIG_ISCSI_TARGET_CXGB4=m +-# CONFIG_SBP_TARGET is not set ++CONFIG_SBP_TARGET=m + CONFIG_FUSION=y + CONFIG_FUSION_SPI=m +-# CONFIG_FUSION_FC is not set + CONFIG_FUSION_SAS=m +-CONFIG_FUSION_MAX_SGE=128 +-# CONFIG_FUSION_CTL is not set + CONFIG_FUSION_LOGGING=y +- +-# +-# IEEE 1394 (FireWire) support +-# + CONFIG_FIREWIRE=m + CONFIG_FIREWIRE_OHCI=m + CONFIG_FIREWIRE_SBP2=m + CONFIG_FIREWIRE_NET=m +-# CONFIG_FIREWIRE_NOSY is not set + CONFIG_MACINTOSH_DRIVERS=y + CONFIG_MAC_EMUMOUSEBTN=y +-CONFIG_NETDEVICES=y +-CONFIG_MII=m +-CONFIG_NET_CORE=y + CONFIG_BONDING=m + CONFIG_DUMMY=m +-# CONFIG_EQUALIZER is not set + CONFIG_NET_FC=y + CONFIG_IFB=m + CONFIG_NET_TEAM=m +@@ -2400,126 +783,53 @@ + CONFIG_MACVTAP=m + CONFIG_IPVLAN=m + CONFIG_IPVTAP=m +-CONFIG_VXLAN=m + CONFIG_GENEVE=m +-# CONFIG_GTP is not set + CONFIG_MACSEC=m + CONFIG_NETCONSOLE=m + CONFIG_NETCONSOLE_DYNAMIC=y +-CONFIG_NETPOLL=y +-CONFIG_NET_POLL_CONTROLLER=y + CONFIG_TUN=m +-CONFIG_TAP=m +-# CONFIG_TUN_VNET_CROSS_LE is not set + CONFIG_VETH=m + CONFIG_VIRTIO_NET=m + CONFIG_NLMON=m + CONFIG_NET_VRF=m + CONFIG_VSOCKMON=m +-# CONFIG_ARCNET is not set + # CONFIG_ATM_DRIVERS is not set +- +-# +-# CAIF transport drivers +-# +- +-# +-# Distributed Switch Architecture drivers +-# +-CONFIG_ETHERNET=y +-CONFIG_MDIO=m + # CONFIG_NET_VENDOR_3COM is not set + # CONFIG_NET_VENDOR_ADAPTEC is not set + # CONFIG_NET_VENDOR_AGERE is not set + # CONFIG_NET_VENDOR_ALACRITECH is not set + # CONFIG_NET_VENDOR_ALTEON is not set +-# CONFIG_ALTERA_TSE is not set +-CONFIG_NET_VENDOR_AMAZON=y + CONFIG_ENA_ETHERNET=m +-CONFIG_NET_VENDOR_AMD=y +-# CONFIG_AMD8111_ETH is not set +-# CONFIG_PCNET32 is not set + CONFIG_AMD_XGBE=m +-# CONFIG_AMD_XGBE_DCB is not set +-CONFIG_AMD_XGBE_HAVE_ECC=y +-CONFIG_NET_VENDOR_AQUANTIA=y + CONFIG_AQTION=m + # CONFIG_NET_VENDOR_ARC is not set +-CONFIG_NET_VENDOR_ATHEROS=y + CONFIG_ATL2=m + CONFIG_ATL1=m + CONFIG_ATL1E=m + CONFIG_ATL1C=m + CONFIG_ALX=m + # CONFIG_NET_VENDOR_AURORA is not set +-CONFIG_NET_VENDOR_BROADCOM=y +-# CONFIG_B44 is not set +-# CONFIG_BCMGENET is not set +-CONFIG_BNX2=m +-CONFIG_CNIC=m + CONFIG_TIGON3=m +-CONFIG_TIGON3_HWMON=y + CONFIG_BNX2X=m +-CONFIG_BNX2X_SRIOV=y +-# CONFIG_SYSTEMPORT is not set +-CONFIG_BNXT=m +-CONFIG_BNXT_SRIOV=y +-CONFIG_BNXT_FLOWER_OFFLOAD=y + CONFIG_BNXT_DCB=y +-CONFIG_BNXT_HWMON=y +-CONFIG_NET_VENDOR_BROCADE=y +-# CONFIG_BNA is not set +-CONFIG_NET_VENDOR_CADENCE=y +-# CONFIG_MACB is not set +-CONFIG_NET_VENDOR_CAVIUM=y +-# CONFIG_THUNDER_NIC_PF is not set +-# CONFIG_THUNDER_NIC_VF is not set +-# CONFIG_THUNDER_NIC_BGX is not set +-# CONFIG_THUNDER_NIC_RGX is not set +-CONFIG_CAVIUM_PTP=y + CONFIG_LIQUIDIO=m + CONFIG_LIQUIDIO_VF=m +-CONFIG_NET_VENDOR_CHELSIO=y +-# CONFIG_CHELSIO_T1 is not set +-# CONFIG_CHELSIO_T3 is not set +-CONFIG_CHELSIO_T4=m +-# CONFIG_CHELSIO_T4_DCB is not set + CONFIG_CHELSIO_T4VF=m +-CONFIG_CHELSIO_LIB=m +-CONFIG_NET_VENDOR_CISCO=y +-CONFIG_ENIC=m + # CONFIG_NET_VENDOR_CORTINA is not set +-# CONFIG_CX_ECAT is not set + CONFIG_DNET=m +-CONFIG_NET_VENDOR_DEC=y +-# CONFIG_NET_TULIP is not set +-CONFIG_NET_VENDOR_DLINK=y + CONFIG_DL2K=m +-# CONFIG_SUNDANCE is not set +-CONFIG_NET_VENDOR_EMULEX=y + CONFIG_BE2NET=m +-CONFIG_BE2NET_HWMON=y + # CONFIG_BE2NET_BE2 is not set + # CONFIG_BE2NET_BE3 is not set +-CONFIG_BE2NET_LANCER=y +-CONFIG_BE2NET_SKYHAWK=y + # CONFIG_NET_VENDOR_EZCHIP is not set + # CONFIG_NET_VENDOR_HP is not set ++CONFIG_HINIC=m + # CONFIG_NET_VENDOR_I825XX is not set +-CONFIG_NET_VENDOR_INTEL=y +-# CONFIG_E100 is not set + CONFIG_E1000=m + CONFIG_E1000E=m +-CONFIG_E1000E_HWTS=y +-CONFIG_HINIC=m + CONFIG_IGB=m +-CONFIG_IGB_HWMON=y +-CONFIG_IGB_DCA=y + CONFIG_IGBVF=m +-# CONFIG_IXGB is not set + CONFIG_IXGBE=m +-CONFIG_IXGBE_HWMON=y +-CONFIG_IXGBE_DCA=y + CONFIG_IXGBE_DCB=y + CONFIG_IXGBEVF=m + CONFIG_I40E=m +@@ -2527,96 +837,40 @@ + CONFIG_I40EVF=m + CONFIG_ICE=m + CONFIG_FM10K=m +-# CONFIG_JME is not set + # CONFIG_NET_VENDOR_MARVELL is not set +-CONFIG_NET_VENDOR_MELLANOX=y + CONFIG_MLX4_EN=m +-CONFIG_MLX4_EN_DCB=y +-CONFIG_MLX4_CORE=m +-CONFIG_MLX4_DEBUG=y + # CONFIG_MLX4_CORE_GEN2 is not set + CONFIG_MLX5_CORE=m +-CONFIG_MLX5_ACCEL=y + CONFIG_MLX5_FPGA=y + CONFIG_MLX5_CORE_EN=y +-CONFIG_MLX5_EN_ARFS=y +-CONFIG_MLX5_EN_RXNFC=y +-CONFIG_MLX5_MPFS=y +-CONFIG_MLX5_ESWITCH=y +-CONFIG_MLX5_CORE_EN_DCB=y + CONFIG_MLX5_CORE_IPOIB=y + CONFIG_MLX5_EN_IPSEC=y +-# CONFIG_MLX5_EN_TLS is not set + CONFIG_MLXSW_CORE=m +-CONFIG_MLXSW_CORE_HWMON=y +-CONFIG_MLXSW_CORE_THERMAL=y +-CONFIG_MLXSW_PCI=m +-CONFIG_MLXSW_I2C=m +-CONFIG_MLXSW_SWITCHIB=m +-CONFIG_MLXSW_SWITCHX2=m +-CONFIG_MLXSW_SPECTRUM=m +-CONFIG_MLXSW_SPECTRUM_DCB=y +-CONFIG_MLXSW_MINIMAL=m +-CONFIG_MLXFW=m + # CONFIG_NET_VENDOR_MICREL is not set + # CONFIG_NET_VENDOR_MICROCHIP is not set + # CONFIG_NET_VENDOR_MICROSEMI is not set +-CONFIG_NET_VENDOR_MYRI=y + CONFIG_MYRI10GE=m +-CONFIG_MYRI10GE_DCA=y +-# CONFIG_FEALNX is not set + # CONFIG_NET_VENDOR_NATSEMI is not set +-CONFIG_NET_VENDOR_NETERION=y +-# CONFIG_S2IO is not set +-# CONFIG_VXGE is not set +-CONFIG_NET_VENDOR_NETRONOME=y + CONFIG_NFP=m +-CONFIG_NFP_APP_FLOWER=y +-CONFIG_NFP_APP_ABM_NIC=y +-# CONFIG_NFP_DEBUG is not set + # CONFIG_NET_VENDOR_NI is not set + # CONFIG_NET_VENDOR_NVIDIA is not set +-CONFIG_NET_VENDOR_OKI=y + CONFIG_ETHOC=m +-CONFIG_NET_VENDOR_PACKET_ENGINES=y +-# CONFIG_HAMACHI is not set +-# CONFIG_YELLOWFIN is not set +-CONFIG_NET_VENDOR_QLOGIC=y + CONFIG_QLA3XXX=m +-# CONFIG_QLCNIC is not set +-# CONFIG_QLGE is not set + CONFIG_NETXEN_NIC=m + CONFIG_QED=m +-CONFIG_QED_LL2=y +-CONFIG_QED_SRIOV=y + CONFIG_QEDE=m +-CONFIG_QED_RDMA=y +-CONFIG_QED_ISCSI=y +-CONFIG_QED_FCOE=y +-CONFIG_QED_OOO=y + # CONFIG_NET_VENDOR_QUALCOMM is not set + # CONFIG_NET_VENDOR_RDC is not set +-CONFIG_NET_VENDOR_REALTEK=y +-# CONFIG_ATP is not set + CONFIG_8139CP=m + CONFIG_8139TOO=m + # CONFIG_8139TOO_PIO is not set +-# CONFIG_8139TOO_TUNE_TWISTER is not set + CONFIG_8139TOO_8129=y +-# CONFIG_8139_OLD_RX_RESET is not set + CONFIG_R8169=m + # CONFIG_NET_VENDOR_RENESAS is not set +-CONFIG_NET_VENDOR_ROCKER=y + CONFIG_ROCKER=m + # CONFIG_NET_VENDOR_SAMSUNG is not set + # CONFIG_NET_VENDOR_SEEQ is not set +-CONFIG_NET_VENDOR_SOLARFLARE=y + CONFIG_SFC=m +-CONFIG_SFC_MTD=y +-CONFIG_SFC_MCDI_MON=y +-CONFIG_SFC_SRIOV=y +-CONFIG_SFC_MCDI_LOGGING=y +-# CONFIG_SFC_FALCON is not set + # CONFIG_NET_VENDOR_SILAN is not set + # CONFIG_NET_VENDOR_SIS is not set + # CONFIG_NET_VENDOR_SMSC is not set +@@ -2628,31 +882,16 @@ + # CONFIG_NET_VENDOR_TI is not set + # CONFIG_NET_VENDOR_VIA is not set + # CONFIG_NET_VENDOR_WIZNET is not set +-# CONFIG_FDDI is not set +-# CONFIG_HIPPI is not set +-# CONFIG_NET_SB1000 is not set +-CONFIG_MDIO_DEVICE=y +-CONFIG_MDIO_BUS=y +-# CONFIG_MDIO_BCM_UNIMAC is not set + CONFIG_MDIO_BITBANG=m +-CONFIG_MDIO_CAVIUM=m +-# CONFIG_MDIO_GPIO is not set + CONFIG_MDIO_MSCC_MIIM=m + CONFIG_MDIO_THUNDER=m + CONFIG_PHYLIB=y +-CONFIG_SWPHY=y + CONFIG_LED_TRIGGER_PHY=y +- +-# +-# MII PHY device drivers +-# + CONFIG_AMD_PHY=m + CONFIG_AQUANTIA_PHY=m +-# CONFIG_AX88796B_PHY is not set + CONFIG_AT803X_PHY=m + CONFIG_BCM7XXX_PHY=m + CONFIG_BCM87XX_PHY=m +-CONFIG_BCM_NET_PHYLIB=m + CONFIG_BROADCOM_PHY=m + CONFIG_CICADA_PHY=m + CONFIG_CORTINA_PHY=m +@@ -2669,12 +908,10 @@ + CONFIG_MARVELL_PHY=m + CONFIG_MARVELL_10G_PHY=m + CONFIG_MICREL_PHY=m +-CONFIG_MICROCHIP_PHY=m + CONFIG_MICROCHIP_T1_PHY=m + CONFIG_MICROSEMI_PHY=m + CONFIG_NATIONAL_PHY=m + CONFIG_QSEMI_PHY=m +-CONFIG_REALTEK_PHY=m + CONFIG_RENESAS_PHY=m + CONFIG_ROCKCHIP_PHY=m + CONFIG_SMSC_PHY=m +@@ -2683,7 +920,7 @@ + CONFIG_VITESSE_PHY=m + CONFIG_XILINX_GMII2RGMII=m + CONFIG_MICREL_KS8995MA=m +-# CONFIG_PLIP is not set ++CONFIG_PLIP=m + CONFIG_PPP=m + CONFIG_PPP_BSDCOMP=m + CONFIG_PPP_DEFLATE=m +@@ -2697,11 +934,8 @@ + CONFIG_PPP_ASYNC=m + CONFIG_PPP_SYNC_TTY=m + CONFIG_SLIP=m +-CONFIG_SLHC=m + CONFIG_SLIP_COMPRESSED=y + CONFIG_SLIP_SMART=y +-# CONFIG_SLIP_MODE_SLIP6 is not set +-CONFIG_USB_NET_DRIVERS=y + CONFIG_USB_CATC=m + CONFIG_USB_KAWETH=m + CONFIG_USB_PEGASUS=m +@@ -2709,32 +943,20 @@ + CONFIG_USB_RTL8152=m + CONFIG_USB_LAN78XX=m + CONFIG_USB_USBNET=m +-CONFIG_USB_NET_AX8817X=m +-CONFIG_USB_NET_AX88179_178A=m +-CONFIG_USB_NET_CDCETHER=m + CONFIG_USB_NET_CDC_EEM=m +-CONFIG_USB_NET_CDC_NCM=m + CONFIG_USB_NET_HUAWEI_CDC_NCM=m + CONFIG_USB_NET_CDC_MBIM=m + CONFIG_USB_NET_DM9601=m +-# CONFIG_USB_NET_SR9700 is not set +-# CONFIG_USB_NET_SR9800 is not set + CONFIG_USB_NET_SMSC75XX=m + CONFIG_USB_NET_SMSC95XX=m + CONFIG_USB_NET_GL620A=m +-CONFIG_USB_NET_NET1080=m + CONFIG_USB_NET_PLUSB=m + CONFIG_USB_NET_MCS7830=m + CONFIG_USB_NET_RNDIS_HOST=m +-CONFIG_USB_NET_CDC_SUBSET_ENABLE=m +-CONFIG_USB_NET_CDC_SUBSET=m + CONFIG_USB_ALI_M5632=y + CONFIG_USB_AN2720=y +-CONFIG_USB_BELKIN=y +-CONFIG_USB_ARMLINUX=y + CONFIG_USB_EPSON2888=y + CONFIG_USB_KC2190=y +-CONFIG_USB_NET_ZAURUS=m + CONFIG_USB_NET_CX82310_ETH=m + CONFIG_USB_NET_KALMIA=m + CONFIG_USB_NET_QMI_WWAN=m +@@ -2744,130 +966,44 @@ + CONFIG_USB_SIERRA_NET=m + CONFIG_USB_VL600=m + CONFIG_USB_NET_CH9200=m +-CONFIG_WLAN=y + # CONFIG_WLAN_VENDOR_ADMTEK is not set +-CONFIG_ATH_COMMON=m +-CONFIG_WLAN_VENDOR_ATH=y +-# CONFIG_ATH_DEBUG is not set +-# CONFIG_ATH5K is not set +-# CONFIG_ATH5K_PCI is not set +-CONFIG_ATH9K_HW=m +-CONFIG_ATH9K_COMMON=m +-CONFIG_ATH9K_COMMON_DEBUG=y +-CONFIG_ATH9K_BTCOEX_SUPPORT=y + CONFIG_ATH9K=m +-CONFIG_ATH9K_PCI=y + CONFIG_ATH9K_AHB=y + CONFIG_ATH9K_DEBUGFS=y +-# CONFIG_ATH9K_STATION_STATISTICS is not set +-# CONFIG_ATH9K_DYNACK is not set + CONFIG_ATH9K_WOW=y +-CONFIG_ATH9K_RFKILL=y +-# CONFIG_ATH9K_CHANNEL_CONTEXT is not set +-CONFIG_ATH9K_PCOEM=y + CONFIG_ATH9K_HTC=m +-# CONFIG_ATH9K_HTC_DEBUGFS is not set +-# CONFIG_ATH9K_HWRNG is not set +-# CONFIG_ATH9K_COMMON_SPECTRAL is not set +-# CONFIG_CARL9170 is not set +-# CONFIG_ATH6KL is not set +-# CONFIG_AR5523 is not set +-# CONFIG_WIL6210 is not set + CONFIG_ATH10K=m +-CONFIG_ATH10K_CE=y + CONFIG_ATH10K_PCI=m +-# CONFIG_ATH10K_SDIO is not set +-# CONFIG_ATH10K_USB is not set +-# CONFIG_ATH10K_DEBUG is not set + CONFIG_ATH10K_DEBUGFS=y +-# CONFIG_ATH10K_SPECTRAL is not set +-# CONFIG_ATH10K_TRACING is not set +-# CONFIG_WCN36XX is not set +-# CONFIG_WLAN_VENDOR_ATMEL is not set +-CONFIG_WLAN_VENDOR_BROADCOM=y +-# CONFIG_B43 is not set +-# CONFIG_B43LEGACY is not set +-CONFIG_BRCMUTIL=m ++CONFIG_ATMEL=m ++CONFIG_PCI_ATMEL=m ++CONFIG_PCMCIA_ATMEL=m + CONFIG_BRCMSMAC=m + CONFIG_BRCMFMAC=m +-CONFIG_BRCMFMAC_PROTO_BCDC=y +-CONFIG_BRCMFMAC_PROTO_MSGBUF=y +-CONFIG_BRCMFMAC_SDIO=y + CONFIG_BRCMFMAC_USB=y + CONFIG_BRCMFMAC_PCIE=y +-# CONFIG_BRCM_TRACING is not set +-# CONFIG_BRCMDBG is not set + # CONFIG_WLAN_VENDOR_CISCO is not set +-CONFIG_WLAN_VENDOR_INTEL=y +-# CONFIG_IPW2100 is not set +-# CONFIG_IPW2200 is not set +-# CONFIG_IWL4965 is not set +-# CONFIG_IWL3945 is not set + CONFIG_IWLWIFI=m +-CONFIG_IWLWIFI_LEDS=y + CONFIG_IWLDVM=m + CONFIG_IWLMVM=m +-CONFIG_IWLWIFI_OPMODE_MODULAR=y +-# CONFIG_IWLWIFI_BCAST_FILTERING is not set +- +-# +-# Debugging Options +-# +-# CONFIG_IWLWIFI_DEBUG is not set + CONFIG_IWLWIFI_DEBUGFS=y + # CONFIG_IWLWIFI_DEVICE_TRACING is not set + # CONFIG_WLAN_VENDOR_INTERSIL is not set +-CONFIG_WLAN_VENDOR_MARVELL=y +-# CONFIG_LIBERTAS is not set +-# CONFIG_LIBERTAS_THINFIRM is not set + CONFIG_MWIFIEX=m + CONFIG_MWIFIEX_SDIO=m + CONFIG_MWIFIEX_PCIE=m + CONFIG_MWIFIEX_USB=m +-# CONFIG_MWL8K is not set +-CONFIG_WLAN_VENDOR_MEDIATEK=y + CONFIG_MT7601U=m +-CONFIG_MT76_CORE=m +-CONFIG_MT76_LEDS=y +-CONFIG_MT76_USB=m +-CONFIG_MT76x2_COMMON=m + CONFIG_MT76x0U=m +-# CONFIG_MT76x2E is not set + CONFIG_MT76x2U=m +-CONFIG_WLAN_VENDOR_RALINK=y + CONFIG_RT2X00=m +-# CONFIG_RT2400PCI is not set +-# CONFIG_RT2500PCI is not set +-# CONFIG_RT61PCI is not set + CONFIG_RT2800PCI=m +-CONFIG_RT2800PCI_RT33XX=y +-CONFIG_RT2800PCI_RT35XX=y +-CONFIG_RT2800PCI_RT53XX=y +-CONFIG_RT2800PCI_RT3290=y +-# CONFIG_RT2500USB is not set +-# CONFIG_RT73USB is not set + CONFIG_RT2800USB=m +-CONFIG_RT2800USB_RT33XX=y +-CONFIG_RT2800USB_RT35XX=y + CONFIG_RT2800USB_RT3573=y + CONFIG_RT2800USB_RT53XX=y + CONFIG_RT2800USB_RT55XX=y + CONFIG_RT2800USB_UNKNOWN=y +-CONFIG_RT2800_LIB=m +-CONFIG_RT2800_LIB_MMIO=m +-CONFIG_RT2X00_LIB_MMIO=m +-CONFIG_RT2X00_LIB_PCI=m +-CONFIG_RT2X00_LIB_USB=m +-CONFIG_RT2X00_LIB=m +-CONFIG_RT2X00_LIB_FIRMWARE=y +-CONFIG_RT2X00_LIB_CRYPTO=y +-CONFIG_RT2X00_LIB_LEDS=y + CONFIG_RT2X00_LIB_DEBUGFS=y +-# CONFIG_RT2X00_DEBUG is not set +-CONFIG_WLAN_VENDOR_REALTEK=y +-# CONFIG_RTL8180 is not set +-# CONFIG_RTL8187 is not set +-CONFIG_RTL_CARDS=m + CONFIG_RTL8192CE=m + CONFIG_RTL8192SE=m + CONFIG_RTL8192DE=m +@@ -2877,91 +1013,34 @@ + CONFIG_RTL8192EE=m + CONFIG_RTL8821AE=m + CONFIG_RTL8192CU=m +-CONFIG_RTLWIFI=m +-CONFIG_RTLWIFI_PCI=m +-CONFIG_RTLWIFI_USB=m + # CONFIG_RTLWIFI_DEBUG is not set +-CONFIG_RTL8192C_COMMON=m +-CONFIG_RTL8723_COMMON=m +-CONFIG_RTLBTCOEXIST=m + CONFIG_RTL8XXXU=m +-# CONFIG_RTL8XXXU_UNTESTED is not set + # CONFIG_WLAN_VENDOR_RSI is not set + # CONFIG_WLAN_VENDOR_ST is not set + # CONFIG_WLAN_VENDOR_TI is not set + # CONFIG_WLAN_VENDOR_ZYDAS is not set +-CONFIG_WLAN_VENDOR_QUANTENNA=y +-# CONFIG_QTNFMAC_PEARL_PCIE is not set + CONFIG_MAC80211_HWSIM=m +-# CONFIG_USB_NET_RNDIS_WLAN is not set +- +-# +-# Enable WiMAX (Networking options) to see the WiMAX drivers +-# + CONFIG_WAN=y +-# CONFIG_LANMEDIA is not set + CONFIG_HDLC=m + CONFIG_HDLC_RAW=m +-# CONFIG_HDLC_RAW_ETH is not set + CONFIG_HDLC_CISCO=m + CONFIG_HDLC_FR=m + CONFIG_HDLC_PPP=m +- +-# +-# X.25/LAPB support is disabled +-# +-# CONFIG_PCI200SYN is not set +-# CONFIG_WANXL is not set +-# CONFIG_PC300TOO is not set +-# CONFIG_FARSYNC is not set +-# CONFIG_DSCC4 is not set + CONFIG_DLCI=m +-CONFIG_DLCI_MAX=8 +-# CONFIG_SBNI is not set +-CONFIG_IEEE802154_DRIVERS=m + CONFIG_IEEE802154_FAKELB=m +-# CONFIG_IEEE802154_AT86RF230 is not set +-# CONFIG_IEEE802154_MRF24J40 is not set +-# CONFIG_IEEE802154_CC2520 is not set +-# CONFIG_IEEE802154_ATUSB is not set +-# CONFIG_IEEE802154_ADF7242 is not set +-# CONFIG_IEEE802154_CA8210 is not set +-# CONFIG_IEEE802154_MCR20A is not set +-# CONFIG_IEEE802154_HWSIM is not set +-CONFIG_XEN_NETDEV_FRONTEND=m + CONFIG_VMXNET3=m + CONFIG_FUJITSU_ES=m + CONFIG_THUNDERBOLT_NET=m +-CONFIG_HYPERV_NET=m + CONFIG_NETDEVSIM=m +-CONFIG_NET_FAILOVER=m + CONFIG_ISDN=y +-CONFIG_ISDN_I4L=m + CONFIG_ISDN_PPP=y + CONFIG_ISDN_PPP_VJ=y + CONFIG_ISDN_MPP=y + CONFIG_IPPP_FILTER=y +-# CONFIG_ISDN_PPP_BSDCOMP is not set + CONFIG_ISDN_AUDIO=y + CONFIG_ISDN_TTY_FAX=y +- +-# +-# ISDN feature submodules +-# + CONFIG_ISDN_DIVERSION=m +- +-# +-# ISDN4Linux hardware drivers +-# +- +-# +-# Passive cards +-# + CONFIG_ISDN_DRV_HISAX=m +- +-# +-# D-channel protocol features +-# + CONFIG_HISAX_EURO=y + CONFIG_DE_AOC=y + CONFIG_HISAX_NO_SENDCOMPLETE=y +@@ -2969,11 +1048,6 @@ + CONFIG_HISAX_NO_KEYPAD=y + CONFIG_HISAX_1TR6=y + CONFIG_HISAX_NI1=y +-CONFIG_HISAX_MAX_CARDS=8 +- +-# +-# HiSax supported cards +-# + CONFIG_HISAX_16_3=y + CONFIG_HISAX_TELESPCI=y + CONFIG_HISAX_S0BOX=y +@@ -2992,17 +1066,7 @@ + CONFIG_HISAX_W6692=y + CONFIG_HISAX_HFC_SX=y + CONFIG_HISAX_ENTERNOW_PCI=y +-# CONFIG_HISAX_DEBUG is not set +- +-# +-# HiSax PCMCIA card service modules +-# +- +-# +-# HiSax sub driver modules +-# + CONFIG_HISAX_ST5481=m +-# CONFIG_HISAX_HFCUSB is not set + CONFIG_HISAX_HFC4S8S=m + CONFIG_HISAX_FRITZ_PCIPNP=m + CONFIG_ISDN_CAPI=m +@@ -3011,31 +1075,20 @@ + CONFIG_ISDN_CAPI_MIDDLEWARE=y + CONFIG_ISDN_CAPI_CAPIDRV=m + CONFIG_ISDN_CAPI_CAPIDRV_VERBOSE=y +- +-# +-# CAPI hardware drivers +-# + CONFIG_CAPI_AVM=y + CONFIG_ISDN_DRV_AVMB1_B1PCI=m + CONFIG_ISDN_DRV_AVMB1_B1PCIV4=y + CONFIG_ISDN_DRV_AVMB1_T1PCI=m + CONFIG_ISDN_DRV_AVMB1_C4=m +-# CONFIG_CAPI_EICON is not set + CONFIG_ISDN_DRV_GIGASET=m +-CONFIG_GIGASET_CAPI=y + CONFIG_GIGASET_BASE=m + CONFIG_GIGASET_M105=m + CONFIG_GIGASET_M101=m +-# CONFIG_GIGASET_DEBUG is not set + CONFIG_HYSDN=m + CONFIG_HYSDN_CAPI=y + CONFIG_MISDN=m + CONFIG_MISDN_DSP=m + CONFIG_MISDN_L1OIP=m +- +-# +-# mISDN hardware drivers +-# + CONFIG_MISDN_HFCPCI=m + CONFIG_MISDN_HFCMULTI=m + CONFIG_MISDN_HFCUSB=m +@@ -3044,314 +1097,81 @@ + CONFIG_MISDN_INFINEON=m + CONFIG_MISDN_W6692=m + CONFIG_MISDN_NETJET=m +-CONFIG_MISDN_IPAC=m +-CONFIG_MISDN_ISAR=m +-CONFIG_ISDN_HDLC=m +-# CONFIG_NVM is not set +- +-# +-# Input device support +-# +-CONFIG_INPUT=y +-CONFIG_INPUT_LEDS=y +-CONFIG_INPUT_FF_MEMLESS=m +-CONFIG_INPUT_POLLDEV=m +-CONFIG_INPUT_SPARSEKMAP=m +-# CONFIG_INPUT_MATRIXKMAP is not set +- +-# +-# Userland interfaces +-# + CONFIG_INPUT_MOUSEDEV=y +-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set +-CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 +-CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 + CONFIG_INPUT_JOYDEV=m + CONFIG_INPUT_EVDEV=y +-# CONFIG_INPUT_EVBUG is not set +- +-# +-# Input Device Drivers +-# +-CONFIG_INPUT_KEYBOARD=y +-# CONFIG_KEYBOARD_ADC is not set +-# CONFIG_KEYBOARD_ADP5588 is not set +-# CONFIG_KEYBOARD_ADP5589 is not set +-CONFIG_KEYBOARD_ATKBD=y +-# CONFIG_KEYBOARD_QT1070 is not set +-# CONFIG_KEYBOARD_QT2160 is not set +-# CONFIG_KEYBOARD_DLINK_DIR685 is not set +-# CONFIG_KEYBOARD_LKKBD is not set +-# CONFIG_KEYBOARD_GPIO is not set +-# CONFIG_KEYBOARD_GPIO_POLLED is not set +-# CONFIG_KEYBOARD_TCA6416 is not set +-# CONFIG_KEYBOARD_TCA8418 is not set +-# CONFIG_KEYBOARD_MATRIX is not set +-# CONFIG_KEYBOARD_LM8323 is not set +-# CONFIG_KEYBOARD_LM8333 is not set +-# CONFIG_KEYBOARD_MAX7359 is not set +-# CONFIG_KEYBOARD_MCS is not set +-# CONFIG_KEYBOARD_MPR121 is not set +-# CONFIG_KEYBOARD_NEWTON is not set +-# CONFIG_KEYBOARD_OPENCORES is not set +-# CONFIG_KEYBOARD_SAMSUNG is not set +-# CONFIG_KEYBOARD_STOWAWAY is not set +-# CONFIG_KEYBOARD_SUNKBD is not set +-# CONFIG_KEYBOARD_TM2_TOUCHKEY is not set +-# CONFIG_KEYBOARD_XTKBD is not set +-CONFIG_INPUT_MOUSE=y +-CONFIG_MOUSE_PS2=y +-CONFIG_MOUSE_PS2_ALPS=y +-CONFIG_MOUSE_PS2_BYD=y +-CONFIG_MOUSE_PS2_LOGIPS2PP=y +-CONFIG_MOUSE_PS2_SYNAPTICS=y +-CONFIG_MOUSE_PS2_SYNAPTICS_SMBUS=y +-CONFIG_MOUSE_PS2_CYPRESS=y +-CONFIG_MOUSE_PS2_LIFEBOOK=y +-CONFIG_MOUSE_PS2_TRACKPOINT=y ++CONFIG_KEYBOARD_GPIO=m ++CONFIG_KEYBOARD_GPIO_POLLED=m ++CONFIG_MOUSE_PS2=m + CONFIG_MOUSE_PS2_ELANTECH=y +-CONFIG_MOUSE_PS2_ELANTECH_SMBUS=y + CONFIG_MOUSE_PS2_SENTELIC=y +-# CONFIG_MOUSE_PS2_TOUCHKIT is not set +-CONFIG_MOUSE_PS2_FOCALTECH=y + CONFIG_MOUSE_PS2_VMMOUSE=y +-CONFIG_MOUSE_PS2_SMBUS=y + CONFIG_MOUSE_SERIAL=m + CONFIG_MOUSE_APPLETOUCH=m + CONFIG_MOUSE_BCM5974=m + CONFIG_MOUSE_CYAPA=m + CONFIG_MOUSE_ELAN_I2C=m +-CONFIG_MOUSE_ELAN_I2C_I2C=y + CONFIG_MOUSE_ELAN_I2C_SMBUS=y + CONFIG_MOUSE_VSXXXAA=m +-# CONFIG_MOUSE_GPIO is not set + CONFIG_MOUSE_SYNAPTICS_I2C=m + CONFIG_MOUSE_SYNAPTICS_USB=m +-# CONFIG_INPUT_JOYSTICK is not set + CONFIG_INPUT_TABLET=y + CONFIG_TABLET_USB_ACECAD=m + CONFIG_TABLET_USB_AIPTEK=m + CONFIG_TABLET_USB_GTCO=m +-# CONFIG_TABLET_USB_HANWANG is not set + CONFIG_TABLET_USB_KBTAB=m +-# CONFIG_TABLET_USB_PEGASUS is not set + CONFIG_TABLET_SERIAL_WACOM4=m + CONFIG_INPUT_TOUCHSCREEN=y +-CONFIG_TOUCHSCREEN_PROPERTIES=y +-# CONFIG_TOUCHSCREEN_ADS7846 is not set +-# CONFIG_TOUCHSCREEN_AD7877 is not set +-# CONFIG_TOUCHSCREEN_AD7879 is not set +-# CONFIG_TOUCHSCREEN_ADC is not set +-# CONFIG_TOUCHSCREEN_ATMEL_MXT is not set +-# CONFIG_TOUCHSCREEN_AUO_PIXCIR is not set +-# CONFIG_TOUCHSCREEN_BU21013 is not set +-# CONFIG_TOUCHSCREEN_BU21029 is not set +-# CONFIG_TOUCHSCREEN_CHIPONE_ICN8505 is not set +-# CONFIG_TOUCHSCREEN_CY8CTMG110 is not set +-# CONFIG_TOUCHSCREEN_CYTTSP_CORE is not set +-# CONFIG_TOUCHSCREEN_CYTTSP4_CORE is not set +-# CONFIG_TOUCHSCREEN_DYNAPRO is not set +-# CONFIG_TOUCHSCREEN_HAMPSHIRE is not set +-# CONFIG_TOUCHSCREEN_EETI is not set +-# CONFIG_TOUCHSCREEN_EGALAX_SERIAL is not set +-# CONFIG_TOUCHSCREEN_EXC3000 is not set +-# CONFIG_TOUCHSCREEN_FUJITSU is not set +-# CONFIG_TOUCHSCREEN_GOODIX is not set +-# CONFIG_TOUCHSCREEN_HIDEEP is not set +-# CONFIG_TOUCHSCREEN_ILI210X is not set +-# CONFIG_TOUCHSCREEN_S6SY761 is not set +-# CONFIG_TOUCHSCREEN_GUNZE is not set +-# CONFIG_TOUCHSCREEN_EKTF2127 is not set +-# CONFIG_TOUCHSCREEN_ELAN is not set + CONFIG_TOUCHSCREEN_ELO=m + CONFIG_TOUCHSCREEN_WACOM_W8001=m + CONFIG_TOUCHSCREEN_WACOM_I2C=m +-# CONFIG_TOUCHSCREEN_MAX11801 is not set +-# CONFIG_TOUCHSCREEN_MCS5000 is not set +-# CONFIG_TOUCHSCREEN_MMS114 is not set +-# CONFIG_TOUCHSCREEN_MELFAS_MIP4 is not set +-# CONFIG_TOUCHSCREEN_MTOUCH is not set +-# CONFIG_TOUCHSCREEN_INEXIO is not set +-# CONFIG_TOUCHSCREEN_MK712 is not set +-# CONFIG_TOUCHSCREEN_PENMOUNT is not set +-# CONFIG_TOUCHSCREEN_EDT_FT5X06 is not set +-# CONFIG_TOUCHSCREEN_TOUCHRIGHT is not set +-# CONFIG_TOUCHSCREEN_TOUCHWIN is not set +-# CONFIG_TOUCHSCREEN_PIXCIR is not set +-# CONFIG_TOUCHSCREEN_WDT87XX_I2C is not set +-# CONFIG_TOUCHSCREEN_WM97XX is not set +-# CONFIG_TOUCHSCREEN_USB_COMPOSITE is not set +-# CONFIG_TOUCHSCREEN_TOUCHIT213 is not set +-# CONFIG_TOUCHSCREEN_TSC_SERIO is not set +-# CONFIG_TOUCHSCREEN_TSC2004 is not set +-# CONFIG_TOUCHSCREEN_TSC2005 is not set +-# CONFIG_TOUCHSCREEN_TSC2007 is not set +-# CONFIG_TOUCHSCREEN_RM_TS is not set +-# CONFIG_TOUCHSCREEN_SILEAD is not set +-# CONFIG_TOUCHSCREEN_SIS_I2C is not set +-# CONFIG_TOUCHSCREEN_ST1232 is not set +-# CONFIG_TOUCHSCREEN_STMFTS is not set +-# CONFIG_TOUCHSCREEN_SUR40 is not set +-# CONFIG_TOUCHSCREEN_SURFACE3_SPI is not set +-# CONFIG_TOUCHSCREEN_SX8654 is not set +-# CONFIG_TOUCHSCREEN_TPS6507X is not set +-# CONFIG_TOUCHSCREEN_ZET6223 is not set +-# CONFIG_TOUCHSCREEN_ZFORCE is not set +-# CONFIG_TOUCHSCREEN_ROHM_BU21023 is not set + CONFIG_INPUT_MISC=y +-# CONFIG_INPUT_AD714X is not set +-# CONFIG_INPUT_BMA150 is not set +-# CONFIG_INPUT_E3X0_BUTTON is not set + CONFIG_INPUT_PCSPKR=m +-# CONFIG_INPUT_MMA8450 is not set + CONFIG_INPUT_APANEL=m + CONFIG_INPUT_GP2A=m +-# CONFIG_INPUT_GPIO_BEEPER is not set +-# CONFIG_INPUT_GPIO_DECODER is not set + CONFIG_INPUT_ATLAS_BTNS=m + CONFIG_INPUT_ATI_REMOTE2=m + CONFIG_INPUT_KEYSPAN_REMOTE=m +-# CONFIG_INPUT_KXTJ9 is not set + CONFIG_INPUT_POWERMATE=m + CONFIG_INPUT_YEALINK=m + CONFIG_INPUT_CM109=m + CONFIG_INPUT_UINPUT=m +-# CONFIG_INPUT_PCF8574 is not set +-# CONFIG_INPUT_PWM_BEEPER is not set +-# CONFIG_INPUT_PWM_VIBRA is not set + CONFIG_INPUT_GPIO_ROTARY_ENCODER=m +-# CONFIG_INPUT_ADXL34X is not set +-# CONFIG_INPUT_IMS_PCU is not set +-# CONFIG_INPUT_CMA3000 is not set +-CONFIG_INPUT_XEN_KBDDEV_FRONTEND=m +-# CONFIG_INPUT_IDEAPAD_SLIDEBAR is not set +-# CONFIG_INPUT_DRV260X_HAPTICS is not set +-# CONFIG_INPUT_DRV2665_HAPTICS is not set +-# CONFIG_INPUT_DRV2667_HAPTICS is not set +-CONFIG_RMI4_CORE=m + CONFIG_RMI4_I2C=m + CONFIG_RMI4_SPI=m + CONFIG_RMI4_SMB=m +-CONFIG_RMI4_F03=y +-CONFIG_RMI4_F03_SERIO=m +-CONFIG_RMI4_2D_SENSOR=y +-CONFIG_RMI4_F11=y +-CONFIG_RMI4_F12=y +-CONFIG_RMI4_F30=y + CONFIG_RMI4_F34=y +-# CONFIG_RMI4_F54 is not set + CONFIG_RMI4_F55=y +- +-# +-# Hardware I/O ports +-# +-CONFIG_SERIO=y +-CONFIG_ARCH_MIGHT_HAVE_PC_SERIO=y +-CONFIG_SERIO_I8042=y +-CONFIG_SERIO_SERPORT=y +-# CONFIG_SERIO_CT82C710 is not set +-# CONFIG_SERIO_PARKBD is not set +-# CONFIG_SERIO_PCIPS2 is not set +-CONFIG_SERIO_LIBPS2=y + CONFIG_SERIO_RAW=m + CONFIG_SERIO_ALTERA_PS2=m +-# CONFIG_SERIO_PS2MULT is not set + CONFIG_SERIO_ARC_PS2=m +-CONFIG_HYPERV_KEYBOARD=m +-# CONFIG_SERIO_GPIO_PS2 is not set +-# CONFIG_USERIO is not set +-# CONFIG_GAMEPORT is not set +- +-# +-# Character devices +-# +-CONFIG_TTY=y +-CONFIG_VT=y +-CONFIG_CONSOLE_TRANSLATIONS=y +-CONFIG_VT_CONSOLE=y +-CONFIG_VT_CONSOLE_SLEEP=y +-CONFIG_HW_CONSOLE=y +-CONFIG_VT_HW_CONSOLE_BINDING=y +-CONFIG_UNIX98_PTYS=y + # CONFIG_LEGACY_PTYS is not set + CONFIG_SERIAL_NONSTANDARD=y +-# CONFIG_ROCKETPORT is not set + CONFIG_CYCLADES=m +-# CONFIG_CYZ_INTR is not set +-# CONFIG_MOXA_INTELLIO is not set +-# CONFIG_MOXA_SMARTIO is not set + CONFIG_SYNCLINK=m + CONFIG_SYNCLINKMP=m + CONFIG_SYNCLINK_GT=m + CONFIG_NOZOMI=m +-# CONFIG_ISI is not set + CONFIG_N_HDLC=m + CONFIG_N_GSM=m +-# CONFIG_TRACE_SINK is not set +-CONFIG_LDISC_AUTOLOAD=y +-CONFIG_DEVMEM=y +-# CONFIG_DEVKMEM is not set +- +-# +-# Serial drivers +-# +-CONFIG_SERIAL_EARLYCON=y + CONFIG_SERIAL_8250=y + # CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set +-CONFIG_SERIAL_8250_PNP=y +-# CONFIG_SERIAL_8250_FINTEK is not set + CONFIG_SERIAL_8250_CONSOLE=y +-CONFIG_SERIAL_8250_DMA=y +-CONFIG_SERIAL_8250_PCI=y +-CONFIG_SERIAL_8250_EXAR=y + CONFIG_SERIAL_8250_NR_UARTS=32 +-CONFIG_SERIAL_8250_RUNTIME_UARTS=4 + CONFIG_SERIAL_8250_EXTENDED=y + CONFIG_SERIAL_8250_MANY_PORTS=y + CONFIG_SERIAL_8250_SHARE_IRQ=y +-# CONFIG_SERIAL_8250_DETECT_IRQ is not set + CONFIG_SERIAL_8250_RSA=y + CONFIG_SERIAL_8250_DW=y +-# CONFIG_SERIAL_8250_RT288X is not set +-CONFIG_SERIAL_8250_LPSS=y +-CONFIG_SERIAL_8250_MID=y +-# CONFIG_SERIAL_8250_MOXA is not set +- +-# +-# Non-8250 serial port support +-# +-# CONFIG_SERIAL_KGDB_NMI is not set +-# CONFIG_SERIAL_MAX3100 is not set +-# CONFIG_SERIAL_MAX310X is not set +-# CONFIG_SERIAL_UARTLITE is not set +-CONFIG_SERIAL_CORE=y +-CONFIG_SERIAL_CORE_CONSOLE=y +-CONFIG_CONSOLE_POLL=y + CONFIG_SERIAL_JSM=m +-# CONFIG_SERIAL_SCCNXP is not set +-# CONFIG_SERIAL_SC16IS7XX is not set +-# CONFIG_SERIAL_ALTERA_JTAGUART is not set +-# CONFIG_SERIAL_ALTERA_UART is not set +-# CONFIG_SERIAL_IFX6X60 is not set + CONFIG_SERIAL_ARC=m +-CONFIG_SERIAL_ARC_NR_PORTS=1 +-# CONFIG_SERIAL_RP2 is not set +-# CONFIG_SERIAL_FSL_LPUART is not set +-# CONFIG_SERIAL_DEV_BUS is not set + CONFIG_PRINTER=m +-# CONFIG_LP_CONSOLE is not set + CONFIG_PPDEV=m +-CONFIG_HVC_DRIVER=y +-CONFIG_HVC_IRQ=y +-CONFIG_HVC_XEN=y +-CONFIG_HVC_XEN_FRONTEND=y + CONFIG_VIRTIO_CONSOLE=m + CONFIG_IPMI_HANDLER=m +-CONFIG_IPMI_DMI_DECODE=y + CONFIG_IPMI_PANIC_EVENT=y + CONFIG_IPMI_PANIC_STRING=y + CONFIG_IPMI_DEVICE_INTERFACE=m +-CONFIG_IPMI_SI=m + CONFIG_IPMI_SSIF=m + CONFIG_IPMI_WATCHDOG=m + CONFIG_IPMI_POWEROFF=m +@@ -3362,192 +1182,52 @@ + CONFIG_HW_RANDOM_VIA=m + CONFIG_HW_RANDOM_VIRTIO=y + CONFIG_NVRAM=y +-# CONFIG_APPLICOM is not set +-# CONFIG_MWAVE is not set + CONFIG_RAW_DRIVER=y + CONFIG_MAX_RAW_DEVS=8192 + CONFIG_HPET=y +-CONFIG_HPET_MMAP=y + # CONFIG_HPET_MMAP_DEFAULT is not set + CONFIG_HANGCHECK_TIMER=m +-CONFIG_UV_MMTIMER=m +-CONFIG_TCG_TPM=y +-CONFIG_HW_RANDOM_TPM=y +-CONFIG_TCG_TIS_CORE=y +-CONFIG_TCG_TIS=y +-# CONFIG_TCG_TIS_SPI is not set + CONFIG_TCG_TIS_I2C_ATMEL=m + CONFIG_TCG_TIS_I2C_INFINEON=m + CONFIG_TCG_TIS_I2C_NUVOTON=m + CONFIG_TCG_NSC=m + CONFIG_TCG_ATMEL=m + CONFIG_TCG_INFINEON=m +-# CONFIG_TCG_XEN is not set +-CONFIG_TCG_CRB=y +-# CONFIG_TCG_VTPM_PROXY is not set +-CONFIG_TCG_TIS_ST33ZP24=m + CONFIG_TCG_TIS_ST33ZP24_I2C=m +-# CONFIG_TCG_TIS_ST33ZP24_SPI is not set + CONFIG_TELCLOCK=m +-CONFIG_DEVPORT=y +-# CONFIG_XILLYBUS is not set +-# CONFIG_RANDOM_TRUST_CPU is not set +- +-# +-# I2C support +-# + CONFIG_I2C=y +-CONFIG_ACPI_I2C_OPREGION=y +-CONFIG_I2C_BOARDINFO=y +-CONFIG_I2C_COMPAT=y + CONFIG_I2C_CHARDEV=m +-CONFIG_I2C_MUX=m +- +-# +-# Multiplexer I2C Chip support +-# +-# CONFIG_I2C_MUX_GPIO is not set +-# CONFIG_I2C_MUX_LTC4306 is not set +-# CONFIG_I2C_MUX_PCA9541 is not set +-# CONFIG_I2C_MUX_PCA954x is not set +-# CONFIG_I2C_MUX_REG is not set + CONFIG_I2C_MUX_MLXCPLD=m +-CONFIG_I2C_HELPER_AUTO=y +-CONFIG_I2C_SMBUS=m +-CONFIG_I2C_ALGOBIT=m +-CONFIG_I2C_ALGOPCA=m +- +-# +-# I2C Hardware Bus support +-# +- +-# +-# PC SMBus host controller drivers +-# +-# CONFIG_I2C_ALI1535 is not set +-# CONFIG_I2C_ALI1563 is not set +-# CONFIG_I2C_ALI15X3 is not set + CONFIG_I2C_AMD756=m + CONFIG_I2C_AMD756_S4882=m + CONFIG_I2C_AMD8111=m +-CONFIG_I2C_I801=m + CONFIG_I2C_ISCH=m + CONFIG_I2C_ISMT=m + CONFIG_I2C_PIIX4=m + CONFIG_I2C_NFORCE2=m + CONFIG_I2C_NFORCE2_S4985=m +-# CONFIG_I2C_SIS5595 is not set +-# CONFIG_I2C_SIS630 is not set + CONFIG_I2C_SIS96X=m + CONFIG_I2C_VIA=m + CONFIG_I2C_VIAPRO=m +- +-# +-# ACPI drivers +-# + CONFIG_I2C_SCMI=m +- +-# +-# I2C system bus drivers (mostly embedded / system-on-chip) +-# +-# CONFIG_I2C_CBUS_GPIO is not set +-CONFIG_I2C_DESIGNWARE_CORE=m + CONFIG_I2C_DESIGNWARE_PLATFORM=m +-# CONFIG_I2C_DESIGNWARE_SLAVE is not set +-# CONFIG_I2C_DESIGNWARE_PCI is not set + CONFIG_I2C_DESIGNWARE_BAYTRAIL=y +-# CONFIG_I2C_EMEV2 is not set +-# CONFIG_I2C_GPIO is not set +-# CONFIG_I2C_OCORES is not set + CONFIG_I2C_PCA_PLATFORM=m + CONFIG_I2C_SIMTEC=m +-# CONFIG_I2C_XILINX is not set +- +-# +-# External I2C/SMBus adapter drivers +-# + CONFIG_I2C_DIOLAN_U2C=m + CONFIG_I2C_PARPORT=m + CONFIG_I2C_PARPORT_LIGHT=m +-# CONFIG_I2C_ROBOTFUZZ_OSIF is not set +-# CONFIG_I2C_TAOS_EVM is not set + CONFIG_I2C_TINY_USB=m + CONFIG_I2C_VIPERBOARD=m +- +-# +-# Other I2C/SMBus bus drivers +-# + CONFIG_I2C_MLXCPLD=m + CONFIG_I2C_STUB=m +-# CONFIG_I2C_SLAVE is not set +-# CONFIG_I2C_DEBUG_CORE is not set +-# CONFIG_I2C_DEBUG_ALGO is not set +-# CONFIG_I2C_DEBUG_BUS is not set + CONFIG_SPI=y +-# CONFIG_SPI_DEBUG is not set +-CONFIG_SPI_MASTER=y +-# CONFIG_SPI_MEM is not set +- +-# +-# SPI Master Controller Drivers +-# +-# CONFIG_SPI_ALTERA is not set +-# CONFIG_SPI_AXI_SPI_ENGINE is not set +-# CONFIG_SPI_BITBANG is not set +-# CONFIG_SPI_BUTTERFLY is not set +-# CONFIG_SPI_CADENCE is not set +-# CONFIG_SPI_DESIGNWARE is not set +-# CONFIG_SPI_GPIO is not set +-# CONFIG_SPI_LM70_LLP is not set +-# CONFIG_SPI_OC_TINY is not set +-# CONFIG_SPI_PXA2XX is not set +-# CONFIG_SPI_ROCKCHIP is not set +-# CONFIG_SPI_SC18IS602 is not set +-# CONFIG_SPI_XCOMM is not set +-# CONFIG_SPI_XILINX is not set +-# CONFIG_SPI_ZYNQMP_GQSPI is not set +- +-# +-# SPI Protocol Masters +-# +-# CONFIG_SPI_SPIDEV is not set +-# CONFIG_SPI_LOOPBACK_TEST is not set +-# CONFIG_SPI_TLE62X0 is not set +-# CONFIG_SPI_SLAVE is not set +-# CONFIG_SPMI is not set +-# CONFIG_HSI is not set +-CONFIG_PPS=y +-# CONFIG_PPS_DEBUG is not set +- +-# +-# PPS clients support +-# +-# CONFIG_PPS_CLIENT_KTIMER is not set + CONFIG_PPS_CLIENT_LDISC=m + CONFIG_PPS_CLIENT_PARPORT=m + CONFIG_PPS_CLIENT_GPIO=m +- +-# +-# PPS generators support +-# +- +-# +-# PTP clock support +-# +-CONFIG_PTP_1588_CLOCK=y + CONFIG_DP83640_PHY=m +-CONFIG_PTP_1588_CLOCK_KVM=m +-CONFIG_PINCTRL=y +-CONFIG_PINMUX=y +-CONFIG_PINCONF=y +-CONFIG_GENERIC_PINCONF=y +-# CONFIG_DEBUG_PINCTRL is not set + CONFIG_PINCTRL_AMD=m +-# CONFIG_PINCTRL_MCP23S08 is not set +-# CONFIG_PINCTRL_SX150X is not set + CONFIG_PINCTRL_BAYTRAIL=y +-# CONFIG_PINCTRL_CHERRYVIEW is not set +-CONFIG_PINCTRL_INTEL=m + CONFIG_PINCTRL_BROXTON=m + CONFIG_PINCTRL_CANNONLAKE=m + CONFIG_PINCTRL_CEDARFORK=m +@@ -3556,113 +1236,14 @@ + CONFIG_PINCTRL_ICELAKE=m + CONFIG_PINCTRL_LEWISBURG=m + CONFIG_PINCTRL_SUNRISEPOINT=m +-CONFIG_GPIOLIB=y +-CONFIG_GPIOLIB_FASTPATH_LIMIT=512 +-CONFIG_GPIO_ACPI=y +-CONFIG_GPIOLIB_IRQCHIP=y +-# CONFIG_DEBUG_GPIO is not set + CONFIG_GPIO_SYSFS=y +-CONFIG_GPIO_GENERIC=m +- +-# +-# Memory mapped GPIO drivers +-# + CONFIG_GPIO_AMDPT=m +-# CONFIG_GPIO_DWAPB is not set +-# CONFIG_GPIO_EXAR is not set +-# CONFIG_GPIO_GENERIC_PLATFORM is not set + CONFIG_GPIO_ICH=m +-# CONFIG_GPIO_LYNXPOINT is not set +-# CONFIG_GPIO_MB86S7X is not set +-# CONFIG_GPIO_MOCKUP is not set +-# CONFIG_GPIO_VX855 is not set +- +-# +-# Port-mapped I/O GPIO drivers +-# +-# CONFIG_GPIO_F7188X is not set +-# CONFIG_GPIO_IT87 is not set +-# CONFIG_GPIO_SCH is not set +-# CONFIG_GPIO_SCH311X is not set +-# CONFIG_GPIO_WINBOND is not set +-# CONFIG_GPIO_WS16C48 is not set +- +-# +-# I2C GPIO expanders +-# +-# CONFIG_GPIO_ADP5588 is not set +-# CONFIG_GPIO_MAX7300 is not set +-# CONFIG_GPIO_MAX732X is not set +-# CONFIG_GPIO_PCA953X is not set +-# CONFIG_GPIO_PCF857X is not set +-# CONFIG_GPIO_TPIC2810 is not set +- +-# +-# MFD GPIO expanders +-# +- +-# +-# PCI GPIO expanders +-# +-# CONFIG_GPIO_AMD8111 is not set +-# CONFIG_GPIO_ML_IOH is not set +-# CONFIG_GPIO_PCI_IDIO_16 is not set +-# CONFIG_GPIO_PCIE_IDIO_24 is not set +-# CONFIG_GPIO_RDC321X is not set +- +-# +-# SPI GPIO expanders +-# +-# CONFIG_GPIO_MAX3191X is not set +-# CONFIG_GPIO_MAX7301 is not set +-# CONFIG_GPIO_MC33880 is not set +-# CONFIG_GPIO_PISOSR is not set +-# CONFIG_GPIO_XRA1403 is not set +- +-# +-# USB GPIO expanders +-# + CONFIG_GPIO_VIPERBOARD=m +-# CONFIG_W1 is not set +-# CONFIG_POWER_AVS is not set + CONFIG_POWER_RESET=y +-# CONFIG_POWER_RESET_RESTART is not set +-CONFIG_POWER_SUPPLY=y +-# CONFIG_POWER_SUPPLY_DEBUG is not set +-# CONFIG_PDA_POWER is not set +-# CONFIG_GENERIC_ADC_BATTERY is not set +-# CONFIG_TEST_POWER is not set +-# CONFIG_CHARGER_ADP5061 is not set +-# CONFIG_BATTERY_DS2780 is not set +-# CONFIG_BATTERY_DS2781 is not set +-# CONFIG_BATTERY_DS2782 is not set +-# CONFIG_BATTERY_SBS is not set +-# CONFIG_CHARGER_SBS is not set +-# CONFIG_MANAGER_SBS is not set +-# CONFIG_BATTERY_BQ27XXX is not set +-# CONFIG_BATTERY_MAX17040 is not set +-# CONFIG_BATTERY_MAX17042 is not set +-# CONFIG_CHARGER_MAX8903 is not set +-# CONFIG_CHARGER_LP8727 is not set +-# CONFIG_CHARGER_GPIO is not set +-# CONFIG_CHARGER_LTC3651 is not set +-# CONFIG_CHARGER_BQ2415X is not set +-# CONFIG_CHARGER_BQ24257 is not set +-# CONFIG_CHARGER_BQ24735 is not set +-# CONFIG_CHARGER_BQ25890 is not set + CONFIG_CHARGER_SMB347=m +-# CONFIG_BATTERY_GAUGE_LTC2941 is not set +-# CONFIG_CHARGER_RT9455 is not set +-CONFIG_HWMON=y +-CONFIG_HWMON_VID=m +-# CONFIG_HWMON_DEBUG_CHIP is not set +- +-# +-# Native drivers +-# + CONFIG_SENSORS_ABITUGURU=m + CONFIG_SENSORS_ABITUGURU3=m +-# CONFIG_SENSORS_AD7314 is not set + CONFIG_SENSORS_AD7414=m + CONFIG_SENSORS_AD7418=m + CONFIG_SENSORS_ADM1021=m +@@ -3671,8 +1252,6 @@ + CONFIG_SENSORS_ADM1029=m + CONFIG_SENSORS_ADM1031=m + CONFIG_SENSORS_ADM9240=m +-CONFIG_SENSORS_ADT7X10=m +-# CONFIG_SENSORS_ADT7310 is not set + CONFIG_SENSORS_ADT7410=m + CONFIG_SENSORS_ADT7411=m + CONFIG_SENSORS_ADT7462=m +@@ -3684,57 +1263,38 @@ + CONFIG_SENSORS_FAM15H_POWER=m + CONFIG_SENSORS_APPLESMC=m + CONFIG_SENSORS_ASB100=m +-# CONFIG_SENSORS_ASPEED is not set + CONFIG_SENSORS_ATXP1=m + CONFIG_SENSORS_DS620=m + CONFIG_SENSORS_DS1621=m +-CONFIG_SENSORS_DELL_SMM=m + CONFIG_SENSORS_I5K_AMB=m + CONFIG_SENSORS_F71805F=m + CONFIG_SENSORS_F71882FG=m + CONFIG_SENSORS_F75375S=m + CONFIG_SENSORS_FSCHMD=m +-# CONFIG_SENSORS_FTSTEUTATES is not set + CONFIG_SENSORS_GL518SM=m + CONFIG_SENSORS_GL520SM=m + CONFIG_SENSORS_G760A=m +-# CONFIG_SENSORS_G762 is not set +-# CONFIG_SENSORS_HIH6130 is not set + CONFIG_SENSORS_IBMAEM=m + CONFIG_SENSORS_IBMPEX=m +-# CONFIG_SENSORS_IIO_HWMON is not set + CONFIG_SENSORS_I5500=m + CONFIG_SENSORS_CORETEMP=m + CONFIG_SENSORS_IT87=m + CONFIG_SENSORS_JC42=m +-# CONFIG_SENSORS_POWR1220 is not set + CONFIG_SENSORS_LINEAGE=m +-# CONFIG_SENSORS_LTC2945 is not set +-# CONFIG_SENSORS_LTC2990 is not set + CONFIG_SENSORS_LTC4151=m + CONFIG_SENSORS_LTC4215=m +-# CONFIG_SENSORS_LTC4222 is not set + CONFIG_SENSORS_LTC4245=m +-# CONFIG_SENSORS_LTC4260 is not set + CONFIG_SENSORS_LTC4261=m +-# CONFIG_SENSORS_MAX1111 is not set + CONFIG_SENSORS_MAX16065=m + CONFIG_SENSORS_MAX1619=m + CONFIG_SENSORS_MAX1668=m + CONFIG_SENSORS_MAX197=m +-# CONFIG_SENSORS_MAX31722 is not set +-# CONFIG_SENSORS_MAX6621 is not set + CONFIG_SENSORS_MAX6639=m + CONFIG_SENSORS_MAX6642=m + CONFIG_SENSORS_MAX6650=m + CONFIG_SENSORS_MAX6697=m +-# CONFIG_SENSORS_MAX31790 is not set + CONFIG_SENSORS_MCP3021=m +-# CONFIG_SENSORS_MLXREG_FAN is not set +-# CONFIG_SENSORS_TC654 is not set +-# CONFIG_SENSORS_ADCXX is not set + CONFIG_SENSORS_LM63=m +-# CONFIG_SENSORS_LM70 is not set + CONFIG_SENSORS_LM73=m + CONFIG_SENSORS_LM75=m + CONFIG_SENSORS_LM77=m +@@ -3752,140 +1312,69 @@ + CONFIG_SENSORS_PC87360=m + CONFIG_SENSORS_PC87427=m + CONFIG_SENSORS_NTC_THERMISTOR=m +-# CONFIG_SENSORS_NCT6683 is not set + CONFIG_SENSORS_NCT6775=m +-# CONFIG_SENSORS_NCT7802 is not set +-# CONFIG_SENSORS_NCT7904 is not set +-# CONFIG_SENSORS_NPCM7XX is not set + CONFIG_SENSORS_PCF8591=m + CONFIG_PMBUS=m +-CONFIG_SENSORS_PMBUS=m + CONFIG_SENSORS_ADM1275=m +-# CONFIG_SENSORS_IBM_CFFPS is not set +-# CONFIG_SENSORS_IR35221 is not set + CONFIG_SENSORS_LM25066=m + CONFIG_SENSORS_LTC2978=m +-# CONFIG_SENSORS_LTC3815 is not set + CONFIG_SENSORS_MAX16064=m +-# CONFIG_SENSORS_MAX20751 is not set +-# CONFIG_SENSORS_MAX31785 is not set + CONFIG_SENSORS_MAX34440=m + CONFIG_SENSORS_MAX8688=m +-# CONFIG_SENSORS_TPS40422 is not set +-# CONFIG_SENSORS_TPS53679 is not set + CONFIG_SENSORS_UCD9000=m + CONFIG_SENSORS_UCD9200=m + CONFIG_SENSORS_ZL6100=m + CONFIG_SENSORS_SHT15=m + CONFIG_SENSORS_SHT21=m +-# CONFIG_SENSORS_SHT3x is not set +-# CONFIG_SENSORS_SHTC1 is not set + CONFIG_SENSORS_SIS5595=m + CONFIG_SENSORS_DME1737=m + CONFIG_SENSORS_EMC1403=m +-# CONFIG_SENSORS_EMC2103 is not set + CONFIG_SENSORS_EMC6W201=m + CONFIG_SENSORS_SMSC47M1=m + CONFIG_SENSORS_SMSC47M192=m + CONFIG_SENSORS_SMSC47B397=m +-CONFIG_SENSORS_SCH56XX_COMMON=m + CONFIG_SENSORS_SCH5627=m + CONFIG_SENSORS_SCH5636=m +-# CONFIG_SENSORS_STTS751 is not set +-# CONFIG_SENSORS_SMM665 is not set +-# CONFIG_SENSORS_ADC128D818 is not set + CONFIG_SENSORS_ADS1015=m + CONFIG_SENSORS_ADS7828=m +-# CONFIG_SENSORS_ADS7871 is not set + CONFIG_SENSORS_AMC6821=m + CONFIG_SENSORS_INA209=m + CONFIG_SENSORS_INA2XX=m +-# CONFIG_SENSORS_INA3221 is not set +-# CONFIG_SENSORS_TC74 is not set + CONFIG_SENSORS_THMC50=m + CONFIG_SENSORS_TMP102=m +-# CONFIG_SENSORS_TMP103 is not set +-# CONFIG_SENSORS_TMP108 is not set + CONFIG_SENSORS_TMP401=m + CONFIG_SENSORS_TMP421=m + CONFIG_SENSORS_VIA_CPUTEMP=m + CONFIG_SENSORS_VIA686A=m + CONFIG_SENSORS_VT1211=m + CONFIG_SENSORS_VT8231=m +-# CONFIG_SENSORS_W83773G is not set + CONFIG_SENSORS_W83781D=m + CONFIG_SENSORS_W83791D=m + CONFIG_SENSORS_W83792D=m + CONFIG_SENSORS_W83793=m + CONFIG_SENSORS_W83795=m +-# CONFIG_SENSORS_W83795_FANCTRL is not set + CONFIG_SENSORS_W83L785TS=m + CONFIG_SENSORS_W83L786NG=m + CONFIG_SENSORS_W83627HF=m + CONFIG_SENSORS_W83627EHF=m +-# CONFIG_SENSORS_XGENE is not set +- +-# +-# ACPI drivers +-# + CONFIG_SENSORS_ACPI_POWER=m + CONFIG_SENSORS_ATK0110=m + CONFIG_THERMAL=y +-# CONFIG_THERMAL_STATISTICS is not set +-CONFIG_THERMAL_EMERGENCY_POWEROFF_DELAY_MS=0 +-CONFIG_THERMAL_HWMON=y +-CONFIG_THERMAL_WRITABLE_TRIPS=y +-CONFIG_THERMAL_DEFAULT_GOV_STEP_WISE=y +-# CONFIG_THERMAL_DEFAULT_GOV_FAIR_SHARE is not set +-# CONFIG_THERMAL_DEFAULT_GOV_USER_SPACE is not set +-# CONFIG_THERMAL_DEFAULT_GOV_POWER_ALLOCATOR is not set + CONFIG_THERMAL_GOV_FAIR_SHARE=y +-CONFIG_THERMAL_GOV_STEP_WISE=y +-CONFIG_THERMAL_GOV_BANG_BANG=y +-CONFIG_THERMAL_GOV_USER_SPACE=y +-# CONFIG_THERMAL_GOV_POWER_ALLOCATOR is not set +-# CONFIG_THERMAL_EMULATION is not set + CONFIG_INTEL_POWERCLAMP=m +-CONFIG_X86_PKG_TEMP_THERMAL=m +-CONFIG_INTEL_SOC_DTS_IOSF_CORE=m +-# CONFIG_INTEL_SOC_DTS_THERMAL is not set +- +-# +-# ACPI INT340X thermal drivers +-# + CONFIG_INT340X_THERMAL=m +-CONFIG_ACPI_THERMAL_REL=m +-# CONFIG_INT3406_THERMAL is not set + CONFIG_INTEL_PCH_THERMAL=m +-# CONFIG_GENERIC_ADC_THERMAL is not set + CONFIG_WATCHDOG=y +-CONFIG_WATCHDOG_CORE=y +-# CONFIG_WATCHDOG_NOWAYOUT is not set +-CONFIG_WATCHDOG_HANDLE_BOOT_ENABLED=y + CONFIG_WATCHDOG_SYSFS=y +- +-# +-# Watchdog Device Drivers +-# + CONFIG_SOFT_WATCHDOG=m + CONFIG_WDAT_WDT=m +-# CONFIG_XILINX_WATCHDOG is not set +-# CONFIG_ZIIRAVE_WATCHDOG is not set +-# CONFIG_CADENCE_WATCHDOG is not set +-# CONFIG_DW_WATCHDOG is not set +-# CONFIG_MAX63XX_WATCHDOG is not set +-# CONFIG_ACQUIRE_WDT is not set +-# CONFIG_ADVANTECH_WDT is not set + CONFIG_ALIM1535_WDT=m + CONFIG_ALIM7101_WDT=m +-# CONFIG_EBC_C384_WDT is not set + CONFIG_F71808E_WDT=m + CONFIG_SP5100_TCO=m + CONFIG_SBC_FITPC2_WATCHDOG=m +-# CONFIG_EUROTECH_WDT is not set + CONFIG_IB700_WDT=m + CONFIG_IBMASR=m +-# CONFIG_WAFER_WDT is not set + CONFIG_I6300ESB_WDT=m + CONFIG_IE6XX_WDT=m + CONFIG_ITCO_WDT=m +@@ -3893,146 +1382,26 @@ + CONFIG_IT8712F_WDT=m + CONFIG_IT87_WDT=m + CONFIG_HP_WATCHDOG=m +-CONFIG_HPWDT_NMI_DECODING=y +-# CONFIG_SC1200_WDT is not set +-# CONFIG_PC87413_WDT is not set + CONFIG_NV_TCO=m +-# CONFIG_60XX_WDT is not set +-# CONFIG_CPU5_WDT is not set + CONFIG_SMSC_SCH311X_WDT=m +-# CONFIG_SMSC37B787_WDT is not set + CONFIG_VIA_WDT=m + CONFIG_W83627HF_WDT=m + CONFIG_W83877F_WDT=m + CONFIG_W83977F_WDT=m + CONFIG_MACHZ_WDT=m +-# CONFIG_SBC_EPX_C3_WATCHDOG is not set + CONFIG_INTEL_MEI_WDT=m +-# CONFIG_NI903X_WDT is not set +-# CONFIG_NIC7018_WDT is not set +-# CONFIG_MEN_A21_WDT is not set +-CONFIG_XEN_WDT=m +- +-# +-# PCI-based Watchdog Cards +-# + CONFIG_PCIPCWATCHDOG=m + CONFIG_WDTPCI=m +- +-# +-# USB-based Watchdog Cards +-# + CONFIG_USBPCWATCHDOG=m +- +-# +-# Watchdog Pretimeout Governors +-# +-# CONFIG_WATCHDOG_PRETIMEOUT_GOV is not set +-CONFIG_SSB_POSSIBLE=y +-# CONFIG_SSB is not set +-CONFIG_BCMA_POSSIBLE=y +-CONFIG_BCMA=m +-CONFIG_BCMA_HOST_PCI_POSSIBLE=y +-CONFIG_BCMA_HOST_PCI=y +-# CONFIG_BCMA_HOST_SOC is not set +-CONFIG_BCMA_DRIVER_PCI=y + CONFIG_BCMA_DRIVER_GMAC_CMN=y + CONFIG_BCMA_DRIVER_GPIO=y +-# CONFIG_BCMA_DEBUG is not set +- +-# +-# Multifunction device drivers +-# +-CONFIG_MFD_CORE=y +-# CONFIG_MFD_AS3711 is not set +-# CONFIG_PMIC_ADP5520 is not set +-# CONFIG_MFD_AAT2870_CORE is not set +-# CONFIG_MFD_BCM590XX is not set +-# CONFIG_MFD_BD9571MWV is not set +-# CONFIG_MFD_AXP20X_I2C is not set +-# CONFIG_MFD_CROS_EC is not set +-# CONFIG_MFD_MADERA is not set +-# CONFIG_PMIC_DA903X is not set +-# CONFIG_MFD_DA9052_SPI is not set +-# CONFIG_MFD_DA9052_I2C is not set +-# CONFIG_MFD_DA9055 is not set +-# CONFIG_MFD_DA9062 is not set +-# CONFIG_MFD_DA9063 is not set +-# CONFIG_MFD_DA9150 is not set +-# CONFIG_MFD_DLN2 is not set +-# CONFIG_MFD_MC13XXX_SPI is not set +-# CONFIG_MFD_MC13XXX_I2C is not set +-# CONFIG_HTC_PASIC3 is not set +-# CONFIG_HTC_I2CPLD is not set +-# CONFIG_MFD_INTEL_QUARK_I2C_GPIO is not set +-CONFIG_LPC_ICH=m +-CONFIG_LPC_SCH=m +-# CONFIG_INTEL_SOC_PMIC_CHTDC_TI is not set +-CONFIG_MFD_INTEL_LPSS=y + CONFIG_MFD_INTEL_LPSS_ACPI=y + CONFIG_MFD_INTEL_LPSS_PCI=y +-# CONFIG_MFD_JANZ_CMODIO is not set +-# CONFIG_MFD_KEMPLD is not set +-# CONFIG_MFD_88PM800 is not set +-# CONFIG_MFD_88PM805 is not set +-# CONFIG_MFD_88PM860X is not set +-# CONFIG_MFD_MAX14577 is not set +-# CONFIG_MFD_MAX77693 is not set +-# CONFIG_MFD_MAX77843 is not set +-# CONFIG_MFD_MAX8907 is not set +-# CONFIG_MFD_MAX8925 is not set +-# CONFIG_MFD_MAX8997 is not set +-# CONFIG_MFD_MAX8998 is not set +-# CONFIG_MFD_MT6397 is not set +-# CONFIG_MFD_MENF21BMC is not set +-# CONFIG_EZX_PCAP is not set + CONFIG_MFD_VIPERBOARD=m +-# CONFIG_MFD_RETU is not set +-# CONFIG_MFD_PCF50633 is not set +-# CONFIG_UCB1400_CORE is not set +-# CONFIG_MFD_RDC321X is not set +-# CONFIG_MFD_RT5033 is not set +-# CONFIG_MFD_RC5T583 is not set +-# CONFIG_MFD_SEC_CORE is not set +-# CONFIG_MFD_SI476X_CORE is not set + CONFIG_MFD_SM501=m + CONFIG_MFD_SM501_GPIO=y +-# CONFIG_MFD_SKY81452 is not set +-# CONFIG_MFD_SMSC is not set +-# CONFIG_ABX500_CORE is not set +-# CONFIG_MFD_SYSCON is not set +-# CONFIG_MFD_TI_AM335X_TSCADC is not set +-# CONFIG_MFD_LP3943 is not set +-# CONFIG_MFD_LP8788 is not set +-# CONFIG_MFD_TI_LMU is not set +-# CONFIG_MFD_PALMAS is not set +-# CONFIG_TPS6105X is not set +-# CONFIG_TPS65010 is not set +-# CONFIG_TPS6507X is not set +-# CONFIG_MFD_TPS65086 is not set +-# CONFIG_MFD_TPS65090 is not set +-# CONFIG_MFD_TI_LP873X is not set +-# CONFIG_MFD_TPS6586X is not set +-# CONFIG_MFD_TPS65910 is not set +-# CONFIG_MFD_TPS65912_I2C is not set +-# CONFIG_MFD_TPS65912_SPI is not set +-# CONFIG_MFD_TPS80031 is not set +-# CONFIG_TWL4030_CORE is not set +-# CONFIG_TWL6040_CORE is not set +-# CONFIG_MFD_WL1273_CORE is not set +-# CONFIG_MFD_LM3533 is not set + CONFIG_MFD_VX855=m +-# CONFIG_MFD_ARIZONA_I2C is not set +-# CONFIG_MFD_ARIZONA_SPI is not set +-# CONFIG_MFD_WM8400 is not set +-# CONFIG_MFD_WM831X_I2C is not set +-# CONFIG_MFD_WM831X_SPI is not set +-# CONFIG_MFD_WM8350_I2C is not set +-# CONFIG_MFD_WM8994 is not set +-# CONFIG_REGULATOR is not set +-CONFIG_CEC_CORE=y + CONFIG_RC_CORE=m +-CONFIG_RC_MAP=m + CONFIG_LIRC=y + CONFIG_RC_DECODERS=y + CONFIG_IR_NEC_DECODER=m +@@ -4041,9 +1410,7 @@ + CONFIG_IR_JVC_DECODER=m + CONFIG_IR_SONY_DECODER=m + CONFIG_IR_SANYO_DECODER=m +-# CONFIG_IR_SHARP_DECODER is not set + CONFIG_IR_MCE_KBD_DECODER=m +-# CONFIG_IR_XMP_DECODER is not set + CONFIG_IR_IMON_DECODER=m + CONFIG_RC_DEVICES=y + CONFIG_RC_ATI_REMOTE=m +@@ -4057,65 +1424,31 @@ + CONFIG_IR_REDRAT3=m + CONFIG_IR_STREAMZAP=m + CONFIG_IR_WINBOND_CIR=m +-# CONFIG_IR_IGORPLUGUSB is not set + CONFIG_IR_IGUANA=m + CONFIG_IR_TTUSBIR=m +-# CONFIG_RC_LOOPBACK is not set + CONFIG_IR_SERIAL=m + CONFIG_IR_SERIAL_TRANSMITTER=y + CONFIG_IR_SIR=m + CONFIG_MEDIA_SUPPORT=m +- +-# +-# Multimedia core support +-# + CONFIG_MEDIA_CAMERA_SUPPORT=y + CONFIG_MEDIA_ANALOG_TV_SUPPORT=y + CONFIG_MEDIA_DIGITAL_TV_SUPPORT=y + CONFIG_MEDIA_RADIO_SUPPORT=y +-# CONFIG_MEDIA_SDR_SUPPORT is not set + CONFIG_MEDIA_CEC_SUPPORT=y +-# CONFIG_MEDIA_CONTROLLER is not set +-CONFIG_VIDEO_DEV=m +-CONFIG_VIDEO_V4L2=m +-# CONFIG_VIDEO_ADV_DEBUG is not set +-# CONFIG_VIDEO_FIXED_MINOR_RANGES is not set +-CONFIG_VIDEO_TUNER=m +-CONFIG_VIDEOBUF_GEN=m +-CONFIG_VIDEOBUF_DMA_SG=m +-CONFIG_VIDEOBUF_VMALLOC=m +-CONFIG_DVB_CORE=m +-# CONFIG_DVB_MMAP is not set +-CONFIG_DVB_NET=y +-CONFIG_TTPCI_EEPROM=m + CONFIG_DVB_MAX_ADAPTERS=8 + CONFIG_DVB_DYNAMIC_MINORS=y +-# CONFIG_DVB_DEMUX_SECTION_LOSS_LOG is not set +-# CONFIG_DVB_ULE_DEBUG is not set +- +-# +-# Media drivers +-# + CONFIG_MEDIA_USB_SUPPORT=y +- +-# +-# Webcam devices +-# + CONFIG_USB_VIDEO_CLASS=m +-CONFIG_USB_VIDEO_CLASS_INPUT_EVDEV=y +-CONFIG_USB_GSPCA=m + CONFIG_USB_M5602=m + CONFIG_USB_STV06XX=m + CONFIG_USB_GL860=m + CONFIG_USB_GSPCA_BENQ=m + CONFIG_USB_GSPCA_CONEX=m + CONFIG_USB_GSPCA_CPIA1=m +-# CONFIG_USB_GSPCA_DTCS033 is not set + CONFIG_USB_GSPCA_ETOMS=m + CONFIG_USB_GSPCA_FINEPIX=m + CONFIG_USB_GSPCA_JEILINJ=m + CONFIG_USB_GSPCA_JL2005BCD=m +-# CONFIG_USB_GSPCA_KINECT is not set + CONFIG_USB_GSPCA_KONICA=m + CONFIG_USB_GSPCA_MARS=m + CONFIG_USB_GSPCA_MR97310A=m +@@ -4142,61 +1475,25 @@ + CONFIG_USB_GSPCA_SQ905C=m + CONFIG_USB_GSPCA_SQ930X=m + CONFIG_USB_GSPCA_STK014=m +-# CONFIG_USB_GSPCA_STK1135 is not set + CONFIG_USB_GSPCA_STV0680=m + CONFIG_USB_GSPCA_SUNPLUS=m + CONFIG_USB_GSPCA_T613=m + CONFIG_USB_GSPCA_TOPRO=m +-# CONFIG_USB_GSPCA_TOUPTEK is not set + CONFIG_USB_GSPCA_TV8532=m + CONFIG_USB_GSPCA_VC032X=m + CONFIG_USB_GSPCA_VICAM=m + CONFIG_USB_GSPCA_XIRLINK_CIT=m + CONFIG_USB_GSPCA_ZC3XX=m + CONFIG_USB_PWC=m +-# CONFIG_USB_PWC_DEBUG is not set +-CONFIG_USB_PWC_INPUT_EVDEV=y +-# CONFIG_VIDEO_CPIA2 is not set +-CONFIG_USB_ZR364XX=m + CONFIG_USB_STKWEBCAM=m + CONFIG_USB_S2255=m +-# CONFIG_VIDEO_USBTV is not set +- +-# +-# Analog TV USB devices +-# + CONFIG_VIDEO_PVRUSB2=m +-CONFIG_VIDEO_PVRUSB2_SYSFS=y +-CONFIG_VIDEO_PVRUSB2_DVB=y +-# CONFIG_VIDEO_PVRUSB2_DEBUGIFC is not set + CONFIG_VIDEO_HDPVR=m + CONFIG_VIDEO_USBVISION=m +-# CONFIG_VIDEO_STK1160_COMMON is not set +-# CONFIG_VIDEO_GO7007 is not set +- +-# +-# Analog/digital TV USB devices +-# + CONFIG_VIDEO_AU0828=m +-CONFIG_VIDEO_AU0828_V4L2=y +-# CONFIG_VIDEO_AU0828_RC is not set +-CONFIG_VIDEO_CX231XX=m +-CONFIG_VIDEO_CX231XX_RC=y +-CONFIG_VIDEO_CX231XX_ALSA=m +-CONFIG_VIDEO_CX231XX_DVB=m +-CONFIG_VIDEO_TM6000=m +-CONFIG_VIDEO_TM6000_ALSA=m +-CONFIG_VIDEO_TM6000_DVB=m +- +-# +-# Digital TV USB devices +-# + CONFIG_DVB_USB=m +-# CONFIG_DVB_USB_DEBUG is not set +-CONFIG_DVB_USB_DIB3000MC=m + CONFIG_DVB_USB_A800=m + CONFIG_DVB_USB_DIBUSB_MB=m +-# CONFIG_DVB_USB_DIBUSB_MB_FAULTY is not set + CONFIG_DVB_USB_DIBUSB_MC=m + CONFIG_DVB_USB_DIB0700=m + CONFIG_DVB_USB_UMT_010=m +@@ -4230,491 +1527,58 @@ + CONFIG_DVB_USB_LME2510=m + CONFIG_DVB_USB_MXL111SF=m + CONFIG_DVB_USB_RTL28XXU=m +-# CONFIG_DVB_USB_DVBSKY is not set +-# CONFIG_DVB_USB_ZD1301 is not set + CONFIG_DVB_TTUSB_BUDGET=m + CONFIG_DVB_TTUSB_DEC=m + CONFIG_SMS_USB_DRV=m + CONFIG_DVB_B2C2_FLEXCOP_USB=m +-# CONFIG_DVB_B2C2_FLEXCOP_USB_DEBUG is not set +-# CONFIG_DVB_AS102 is not set +- +-# +-# Webcam, TV (analog/digital) USB devices +-# + CONFIG_VIDEO_EM28XX=m +-# CONFIG_VIDEO_EM28XX_V4L2 is not set + CONFIG_VIDEO_EM28XX_ALSA=m + CONFIG_VIDEO_EM28XX_DVB=m +-CONFIG_VIDEO_EM28XX_RC=m +- +-# +-# USB HDMI CEC adapters +-# + CONFIG_USB_PULSE8_CEC=m + CONFIG_USB_RAINSHADOW_CEC=m + CONFIG_MEDIA_PCI_SUPPORT=y +- +-# +-# Media capture support +-# +-# CONFIG_VIDEO_MEYE is not set +-# CONFIG_VIDEO_SOLO6X10 is not set +-# CONFIG_VIDEO_TW5864 is not set +-# CONFIG_VIDEO_TW68 is not set +-# CONFIG_VIDEO_TW686X is not set +- +-# +-# Media capture/analog TV support +-# + CONFIG_VIDEO_IVTV=m +-# CONFIG_VIDEO_IVTV_DEPRECATED_IOCTLS is not set +-# CONFIG_VIDEO_IVTV_ALSA is not set + CONFIG_VIDEO_FB_IVTV=m +-# CONFIG_VIDEO_HEXIUM_GEMINI is not set +-# CONFIG_VIDEO_HEXIUM_ORION is not set +-# CONFIG_VIDEO_MXB is not set +-# CONFIG_VIDEO_DT3155 is not set +- +-# +-# Media capture/analog/hybrid TV support +-# +-CONFIG_VIDEO_CX18=m +-CONFIG_VIDEO_CX18_ALSA=m + CONFIG_VIDEO_CX23885=m + CONFIG_MEDIA_ALTERA_CI=m +-# CONFIG_VIDEO_CX25821 is not set + CONFIG_VIDEO_CX88=m + CONFIG_VIDEO_CX88_ALSA=m + CONFIG_VIDEO_CX88_BLACKBIRD=m + CONFIG_VIDEO_CX88_DVB=m + # CONFIG_VIDEO_CX88_ENABLE_VP3054 is not set +-CONFIG_VIDEO_CX88_MPEG=m +-CONFIG_VIDEO_BT848=m +-CONFIG_DVB_BT8XX=m + CONFIG_VIDEO_SAA7134=m + CONFIG_VIDEO_SAA7134_ALSA=m +-CONFIG_VIDEO_SAA7134_RC=y + CONFIG_VIDEO_SAA7134_DVB=m + CONFIG_VIDEO_SAA7164=m +- +-# +-# Media digital TV PCI Adapters +-# +-CONFIG_DVB_AV7110_IR=y +-CONFIG_DVB_AV7110=m +-CONFIG_DVB_AV7110_OSD=y + CONFIG_DVB_BUDGET_CORE=m + CONFIG_DVB_BUDGET=m + CONFIG_DVB_BUDGET_CI=m +-CONFIG_DVB_BUDGET_AV=m +-CONFIG_DVB_BUDGET_PATCH=m + CONFIG_DVB_B2C2_FLEXCOP_PCI=m +-# CONFIG_DVB_B2C2_FLEXCOP_PCI_DEBUG is not set + CONFIG_DVB_PLUTO2=m + CONFIG_DVB_DM1105=m + CONFIG_DVB_PT1=m +-# CONFIG_DVB_PT3 is not set + CONFIG_MANTIS_CORE=m + CONFIG_DVB_MANTIS=m + CONFIG_DVB_HOPPER=m + CONFIG_DVB_NGENE=m + CONFIG_DVB_DDBRIDGE=m +-# CONFIG_DVB_DDBRIDGE_MSIENABLE is not set +-# CONFIG_DVB_SMIPCIE is not set +-# CONFIG_DVB_NETUP_UNIDVB is not set +-# CONFIG_V4L_PLATFORM_DRIVERS is not set +-# CONFIG_V4L_MEM2MEM_DRIVERS is not set +-# CONFIG_V4L_TEST_DRIVERS is not set +-# CONFIG_DVB_PLATFORM_DRIVERS is not set + CONFIG_CEC_PLATFORM_DRIVERS=y +- +-# +-# Supported MMC/SDIO adapters +-# + CONFIG_SMS_SDIO_DRV=m +-CONFIG_RADIO_ADAPTERS=y +-CONFIG_RADIO_TEA575X=m +-# CONFIG_RADIO_SI470X is not set +-# CONFIG_RADIO_SI4713 is not set +-# CONFIG_USB_MR800 is not set +-# CONFIG_USB_DSBR is not set +-# CONFIG_RADIO_MAXIRADIO is not set +-# CONFIG_RADIO_SHARK is not set +-# CONFIG_RADIO_SHARK2 is not set +-# CONFIG_USB_KEENE is not set +-# CONFIG_USB_RAREMONO is not set +-# CONFIG_USB_MA901 is not set +-# CONFIG_RADIO_TEA5764 is not set +-# CONFIG_RADIO_SAA7706H is not set +-# CONFIG_RADIO_TEF6862 is not set +-# CONFIG_RADIO_WL1273 is not set +- +-# +-# Texas Instruments WL128x FM driver (ST based) +-# +- +-# +-# Supported FireWire (IEEE 1394) Adapters +-# + CONFIG_DVB_FIREDTV=m +-CONFIG_DVB_FIREDTV_INPUT=y +-CONFIG_MEDIA_COMMON_OPTIONS=y +- +-# +-# common driver options +-# +-CONFIG_VIDEO_CX2341X=m +-CONFIG_VIDEO_TVEEPROM=m +-CONFIG_CYPRESS_FIRMWARE=m +-CONFIG_VIDEOBUF2_CORE=m +-CONFIG_VIDEOBUF2_V4L2=m +-CONFIG_VIDEOBUF2_MEMOPS=m +-CONFIG_VIDEOBUF2_VMALLOC=m +-CONFIG_VIDEOBUF2_DMA_SG=m +-CONFIG_VIDEOBUF2_DVB=m +-CONFIG_DVB_B2C2_FLEXCOP=m +-CONFIG_VIDEO_SAA7146=m +-CONFIG_VIDEO_SAA7146_VV=m +-CONFIG_SMS_SIANO_MDTV=m +-CONFIG_SMS_SIANO_RC=y +-# CONFIG_SMS_SIANO_DEBUGFS is not set +- +-# +-# Media ancillary drivers (tuners, sensors, i2c, spi, frontends) +-# +-CONFIG_MEDIA_SUBDRV_AUTOSELECT=y +-CONFIG_MEDIA_ATTACH=y +-CONFIG_VIDEO_IR_I2C=m +- +-# +-# Audio decoders, processors and mixers +-# +-CONFIG_VIDEO_TVAUDIO=m +-CONFIG_VIDEO_TDA7432=m +-CONFIG_VIDEO_MSP3400=m +-CONFIG_VIDEO_CS3308=m +-CONFIG_VIDEO_CS5345=m +-CONFIG_VIDEO_CS53L32A=m +-CONFIG_VIDEO_WM8775=m +-CONFIG_VIDEO_WM8739=m +-CONFIG_VIDEO_VP27SMPX=m +- +-# +-# RDS decoders +-# +-CONFIG_VIDEO_SAA6588=m +- +-# +-# Video decoders +-# +-CONFIG_VIDEO_SAA711X=m +- +-# +-# Video and audio decoders +-# +-CONFIG_VIDEO_SAA717X=m +-CONFIG_VIDEO_CX25840=m +- +-# +-# Video encoders +-# +-CONFIG_VIDEO_SAA7127=m +- +-# +-# Camera sensor devices +-# +- +-# +-# Flash devices +-# +- +-# +-# Video improvement chips +-# +-CONFIG_VIDEO_UPD64031A=m +-CONFIG_VIDEO_UPD64083=m +- +-# +-# Audio/Video compression chips +-# +-CONFIG_VIDEO_SAA6752HS=m +- +-# +-# SDR tuner chips +-# +- +-# +-# Miscellaneous helper chips +-# +-CONFIG_VIDEO_M52790=m +- +-# +-# Sensors used on soc_camera driver +-# +- +-# +-# Media SPI Adapters +-# +-# CONFIG_CXD2880_SPI_DRV is not set +-CONFIG_MEDIA_TUNER=m +-CONFIG_MEDIA_TUNER_SIMPLE=m +-CONFIG_MEDIA_TUNER_TDA18250=m +-CONFIG_MEDIA_TUNER_TDA8290=m +-CONFIG_MEDIA_TUNER_TDA827X=m +-CONFIG_MEDIA_TUNER_TDA18271=m +-CONFIG_MEDIA_TUNER_TDA9887=m +-CONFIG_MEDIA_TUNER_TEA5761=m +-CONFIG_MEDIA_TUNER_TEA5767=m +-CONFIG_MEDIA_TUNER_MT20XX=m +-CONFIG_MEDIA_TUNER_MT2060=m +-CONFIG_MEDIA_TUNER_MT2063=m +-CONFIG_MEDIA_TUNER_MT2266=m +-CONFIG_MEDIA_TUNER_MT2131=m +-CONFIG_MEDIA_TUNER_QT1010=m +-CONFIG_MEDIA_TUNER_XC2028=m +-CONFIG_MEDIA_TUNER_XC5000=m +-CONFIG_MEDIA_TUNER_XC4000=m +-CONFIG_MEDIA_TUNER_MXL5005S=m +-CONFIG_MEDIA_TUNER_MXL5007T=m +-CONFIG_MEDIA_TUNER_MC44S803=m +-CONFIG_MEDIA_TUNER_MAX2165=m +-CONFIG_MEDIA_TUNER_TDA18218=m +-CONFIG_MEDIA_TUNER_FC0011=m +-CONFIG_MEDIA_TUNER_FC0012=m +-CONFIG_MEDIA_TUNER_FC0013=m +-CONFIG_MEDIA_TUNER_TDA18212=m +-CONFIG_MEDIA_TUNER_E4000=m +-CONFIG_MEDIA_TUNER_FC2580=m +-CONFIG_MEDIA_TUNER_M88RS6000T=m +-CONFIG_MEDIA_TUNER_TUA9001=m +-CONFIG_MEDIA_TUNER_SI2157=m +-CONFIG_MEDIA_TUNER_IT913X=m +-CONFIG_MEDIA_TUNER_R820T=m +-CONFIG_MEDIA_TUNER_QM1D1C0042=m +-CONFIG_MEDIA_TUNER_QM1D1B0004=m +- +-# +-# Multistandard (satellite) frontends +-# +-CONFIG_DVB_STB0899=m +-CONFIG_DVB_STB6100=m +-CONFIG_DVB_STV090x=m +-CONFIG_DVB_STV0910=m +-CONFIG_DVB_STV6110x=m +-CONFIG_DVB_STV6111=m +-CONFIG_DVB_MXL5XX=m +-CONFIG_DVB_M88DS3103=m +- +-# +-# Multistandard (cable + terrestrial) frontends +-# +-CONFIG_DVB_DRXK=m +-CONFIG_DVB_TDA18271C2DD=m +-CONFIG_DVB_SI2165=m +-CONFIG_DVB_MN88472=m +-CONFIG_DVB_MN88473=m +- +-# +-# DVB-S (satellite) frontends +-# +-CONFIG_DVB_CX24110=m +-CONFIG_DVB_CX24123=m +-CONFIG_DVB_MT312=m +-CONFIG_DVB_ZL10036=m +-CONFIG_DVB_ZL10039=m +-CONFIG_DVB_S5H1420=m +-CONFIG_DVB_STV0288=m +-CONFIG_DVB_STB6000=m +-CONFIG_DVB_STV0299=m +-CONFIG_DVB_STV6110=m +-CONFIG_DVB_STV0900=m +-CONFIG_DVB_TDA8083=m +-CONFIG_DVB_TDA10086=m +-CONFIG_DVB_TDA8261=m +-CONFIG_DVB_VES1X93=m +-CONFIG_DVB_TUNER_ITD1000=m +-CONFIG_DVB_TUNER_CX24113=m +-CONFIG_DVB_TDA826X=m +-CONFIG_DVB_TUA6100=m +-CONFIG_DVB_CX24116=m +-CONFIG_DVB_CX24117=m +-CONFIG_DVB_CX24120=m +-CONFIG_DVB_SI21XX=m +-CONFIG_DVB_TS2020=m +-CONFIG_DVB_DS3000=m +-CONFIG_DVB_MB86A16=m +-CONFIG_DVB_TDA10071=m +- +-# +-# DVB-T (terrestrial) frontends +-# +-CONFIG_DVB_SP8870=m +-CONFIG_DVB_SP887X=m +-CONFIG_DVB_CX22700=m +-CONFIG_DVB_CX22702=m +-CONFIG_DVB_DRXD=m +-CONFIG_DVB_L64781=m +-CONFIG_DVB_TDA1004X=m +-CONFIG_DVB_NXT6000=m +-CONFIG_DVB_MT352=m +-CONFIG_DVB_ZL10353=m +-CONFIG_DVB_DIB3000MB=m +-CONFIG_DVB_DIB3000MC=m +-CONFIG_DVB_DIB7000M=m +-CONFIG_DVB_DIB7000P=m +-CONFIG_DVB_TDA10048=m +-CONFIG_DVB_AF9013=m +-CONFIG_DVB_EC100=m +-CONFIG_DVB_STV0367=m +-CONFIG_DVB_CXD2820R=m +-CONFIG_DVB_CXD2841ER=m +-CONFIG_DVB_RTL2830=m +-CONFIG_DVB_RTL2832=m +-CONFIG_DVB_SI2168=m +-CONFIG_DVB_GP8PSK_FE=m +- +-# +-# DVB-C (cable) frontends +-# +-CONFIG_DVB_VES1820=m +-CONFIG_DVB_TDA10021=m +-CONFIG_DVB_TDA10023=m +-CONFIG_DVB_STV0297=m +- +-# +-# ATSC (North American/Korean Terrestrial/Cable DTV) frontends +-# +-CONFIG_DVB_NXT200X=m +-CONFIG_DVB_OR51211=m +-CONFIG_DVB_OR51132=m +-CONFIG_DVB_BCM3510=m +-CONFIG_DVB_LGDT330X=m +-CONFIG_DVB_LGDT3305=m +-CONFIG_DVB_LGDT3306A=m +-CONFIG_DVB_LG2160=m +-CONFIG_DVB_S5H1409=m +-CONFIG_DVB_AU8522=m +-CONFIG_DVB_AU8522_DTV=m +-CONFIG_DVB_AU8522_V4L=m +-CONFIG_DVB_S5H1411=m +- +-# +-# ISDB-T (terrestrial) frontends +-# +-CONFIG_DVB_S921=m +-CONFIG_DVB_DIB8000=m +-CONFIG_DVB_MB86A20S=m +- +-# +-# ISDB-S (satellite) & ISDB-T (terrestrial) frontends +-# +-CONFIG_DVB_TC90522=m +- +-# +-# Digital terrestrial only tuners/PLL +-# +-CONFIG_DVB_PLL=m +-CONFIG_DVB_TUNER_DIB0070=m +-CONFIG_DVB_TUNER_DIB0090=m +- +-# +-# SEC control devices for DVB-S +-# +-CONFIG_DVB_DRX39XYJ=m +-CONFIG_DVB_LNBH25=m +-CONFIG_DVB_LNBP21=m +-CONFIG_DVB_LNBP22=m +-CONFIG_DVB_ISL6405=m +-CONFIG_DVB_ISL6421=m +-CONFIG_DVB_ISL6423=m +-CONFIG_DVB_A8293=m +-CONFIG_DVB_LGS8GXX=m +-CONFIG_DVB_ATBM8830=m +-CONFIG_DVB_TDA665x=m +-CONFIG_DVB_IX2505V=m +-CONFIG_DVB_M88RS2000=m +-CONFIG_DVB_AF9033=m +- +-# +-# Common Interface (EN50221) controller drivers +-# +-CONFIG_DVB_CXD2099=m +- +-# +-# Tools to develop new frontends +-# +-CONFIG_DVB_DUMMY_FE=m +- +-# +-# Graphics support +-# +-# CONFIG_AGP is not set +-CONFIG_INTEL_GTT=m +-CONFIG_VGA_ARB=y + CONFIG_VGA_ARB_MAX_GPUS=64 + CONFIG_VGA_SWITCHEROO=y + CONFIG_DRM=m +-CONFIG_DRM_MIPI_DSI=y + CONFIG_DRM_DP_AUX_CHARDEV=y +-# CONFIG_DRM_DEBUG_SELFTEST is not set +-CONFIG_DRM_KMS_HELPER=m +-CONFIG_DRM_KMS_FB_HELPER=y +-CONFIG_DRM_FBDEV_EMULATION=y +-CONFIG_DRM_FBDEV_OVERALLOC=100 + CONFIG_DRM_LOAD_EDID_FIRMWARE=y + CONFIG_DRM_DP_CEC=y +-CONFIG_DRM_TTM=m +-CONFIG_DRM_VM=y +-CONFIG_DRM_SCHED=m +- +-# +-# I2C encoder or helper chips +-# +-CONFIG_DRM_I2C_CH7006=m +-CONFIG_DRM_I2C_SIL164=m +-# CONFIG_DRM_I2C_NXP_TDA998X is not set +-# CONFIG_DRM_I2C_NXP_TDA9950 is not set + CONFIG_DRM_RADEON=m + CONFIG_DRM_RADEON_USERPTR=y + CONFIG_DRM_AMDGPU=m +-# CONFIG_DRM_AMDGPU_SI is not set +-# CONFIG_DRM_AMDGPU_CIK is not set +-# CONFIG_DRM_AMDGPU_USERPTR is not set +-# CONFIG_DRM_AMDGPU_GART_DEBUGFS is not set +- +-# +-# ACP (Audio CoProcessor) Configuration +-# + CONFIG_DRM_AMD_ACP=y +- +-# +-# Display Engine Configuration +-# +-CONFIG_DRM_AMD_DC=y +-CONFIG_DRM_AMD_DC_DCN1_0=y +-# CONFIG_DEBUG_KERNEL_DC is not set +- +-# +-# AMD Library routines +-# +-CONFIG_CHASH=m +-# CONFIG_CHASH_STATS is not set +-# CONFIG_CHASH_SELFTEST is not set + CONFIG_DRM_NOUVEAU=m +-CONFIG_NOUVEAU_LEGACY_CTX_SUPPORT=y +-CONFIG_NOUVEAU_DEBUG=5 +-CONFIG_NOUVEAU_DEBUG_DEFAULT=3 +-# CONFIG_NOUVEAU_DEBUG_MMU is not set +-CONFIG_DRM_NOUVEAU_BACKLIGHT=y + CONFIG_DRM_I915=m +-# CONFIG_DRM_I915_ALPHA_SUPPORT is not set +-CONFIG_DRM_I915_CAPTURE_ERROR=y +-CONFIG_DRM_I915_COMPRESS_ERROR=y +-CONFIG_DRM_I915_USERPTR=y + CONFIG_DRM_I915_GVT=y + CONFIG_DRM_I915_GVT_KVMGT=m +-# CONFIG_DRM_VGEM is not set + CONFIG_DRM_VKMS=m + CONFIG_DRM_VMWGFX=m + CONFIG_DRM_VMWGFX_FBCON=y +@@ -4728,204 +1592,37 @@ + CONFIG_DRM_QXL=m + CONFIG_DRM_BOCHS=m + CONFIG_DRM_VIRTIO_GPU=m +-CONFIG_DRM_PANEL=y +- +-# +-# Display Panels +-# +-# CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN is not set +-CONFIG_DRM_BRIDGE=y +-CONFIG_DRM_PANEL_BRIDGE=y +- +-# +-# Display Interface Bridges +-# +-# CONFIG_DRM_ANALOGIX_ANX78XX is not set + CONFIG_HSA_AMD=m +-# CONFIG_DRM_HISI_HIBMC is not set +-# CONFIG_DRM_TINYDRM is not set +-# CONFIG_DRM_XEN is not set +-# CONFIG_DRM_LEGACY is not set +-CONFIG_DRM_PANEL_ORIENTATION_QUIRKS=y +- +-# +-# Frame buffer Devices +-# +-CONFIG_FB_CMDLINE=y +-CONFIG_FB_NOTIFY=y + CONFIG_FB=y +-# CONFIG_FIRMWARE_EDID is not set +-CONFIG_FB_BOOT_VESA_SUPPORT=y +-CONFIG_FB_CFB_FILLRECT=y +-CONFIG_FB_CFB_COPYAREA=y +-CONFIG_FB_CFB_IMAGEBLIT=y +-CONFIG_FB_SYS_FILLRECT=m +-CONFIG_FB_SYS_COPYAREA=m +-CONFIG_FB_SYS_IMAGEBLIT=m +-# CONFIG_FB_FOREIGN_ENDIAN is not set +-CONFIG_FB_SYS_FOPS=m +-CONFIG_FB_DEFERRED_IO=y +-CONFIG_FB_BACKLIGHT=y +-# CONFIG_FB_MODE_HELPERS is not set + CONFIG_FB_TILEBLITTING=y +- +-# +-# Frame buffer hardware drivers +-# +-# CONFIG_FB_CIRRUS is not set +-# CONFIG_FB_PM2 is not set +-# CONFIG_FB_CYBER2000 is not set +-# CONFIG_FB_ARC is not set +-# CONFIG_FB_ASILIANT is not set +-# CONFIG_FB_IMSTT is not set +-# CONFIG_FB_VGA16 is not set +-# CONFIG_FB_UVESA is not set ++CONFIG_FB_VGA16=m + CONFIG_FB_VESA=y + CONFIG_FB_EFI=y +-# CONFIG_FB_N411 is not set +-# CONFIG_FB_HGA is not set +-# CONFIG_FB_OPENCORES is not set +-# CONFIG_FB_S1D13XXX is not set +-# CONFIG_FB_NVIDIA is not set +-# CONFIG_FB_RIVA is not set +-# CONFIG_FB_I740 is not set +-# CONFIG_FB_LE80578 is not set +-# CONFIG_FB_MATROX is not set +-# CONFIG_FB_RADEON is not set +-# CONFIG_FB_ATY128 is not set +-# CONFIG_FB_ATY is not set +-# CONFIG_FB_S3 is not set +-# CONFIG_FB_SAVAGE is not set +-# CONFIG_FB_SIS is not set +-# CONFIG_FB_VIA is not set +-# CONFIG_FB_NEOMAGIC is not set +-# CONFIG_FB_KYRO is not set +-# CONFIG_FB_3DFX is not set +-# CONFIG_FB_VOODOO1 is not set +-# CONFIG_FB_VT8623 is not set +-# CONFIG_FB_TRIDENT is not set +-# CONFIG_FB_ARK is not set +-# CONFIG_FB_PM3 is not set +-# CONFIG_FB_CARMINE is not set +-# CONFIG_FB_SM501 is not set +-# CONFIG_FB_SMSCUFX is not set +-# CONFIG_FB_UDL is not set +-# CONFIG_FB_IBM_GXT4500 is not set +-# CONFIG_FB_VIRTUAL is not set +-# CONFIG_XEN_FBDEV_FRONTEND is not set +-# CONFIG_FB_METRONOME is not set +-# CONFIG_FB_MB862XX is not set +-# CONFIG_FB_BROADSHEET is not set +-CONFIG_FB_HYPERV=m +-# CONFIG_FB_SIMPLE is not set +-# CONFIG_FB_SM712 is not set +-CONFIG_BACKLIGHT_LCD_SUPPORT=y +-CONFIG_LCD_CLASS_DEVICE=m +-# CONFIG_LCD_L4F00242T03 is not set +-# CONFIG_LCD_LMS283GF05 is not set +-# CONFIG_LCD_LTV350QV is not set +-# CONFIG_LCD_ILI922X is not set +-# CONFIG_LCD_ILI9320 is not set +-# CONFIG_LCD_TDO24M is not set +-# CONFIG_LCD_VGG2432A4 is not set + CONFIG_LCD_PLATFORM=m +-# CONFIG_LCD_S6E63M0 is not set +-# CONFIG_LCD_LD9040 is not set +-# CONFIG_LCD_AMS369FG06 is not set +-# CONFIG_LCD_LMS501KF03 is not set +-# CONFIG_LCD_HX8357 is not set +-# CONFIG_LCD_OTM3225A is not set +-CONFIG_BACKLIGHT_CLASS_DEVICE=y + # CONFIG_BACKLIGHT_GENERIC is not set +-# CONFIG_BACKLIGHT_PWM is not set + CONFIG_BACKLIGHT_APPLE=m +-# CONFIG_BACKLIGHT_PM8941_WLED is not set +-# CONFIG_BACKLIGHT_SAHARA is not set +-# CONFIG_BACKLIGHT_ADP8860 is not set +-# CONFIG_BACKLIGHT_ADP8870 is not set +-# CONFIG_BACKLIGHT_LM3630A is not set +-# CONFIG_BACKLIGHT_LM3639 is not set + CONFIG_BACKLIGHT_LP855X=m +-# CONFIG_BACKLIGHT_GPIO is not set +-# CONFIG_BACKLIGHT_LV5207LP is not set +-# CONFIG_BACKLIGHT_BD6107 is not set +-# CONFIG_BACKLIGHT_ARCXCNN is not set +-CONFIG_HDMI=y +- +-# +-# Console display driver support +-# +-CONFIG_VGA_CONSOLE=y + CONFIG_VGACON_SOFT_SCROLLBACK=y +-CONFIG_VGACON_SOFT_SCROLLBACK_SIZE=64 +-# CONFIG_VGACON_SOFT_SCROLLBACK_PERSISTENT_ENABLE_BY_DEFAULT is not set +-CONFIG_DUMMY_CONSOLE=y +-CONFIG_DUMMY_CONSOLE_COLUMNS=80 +-CONFIG_DUMMY_CONSOLE_ROWS=25 +-CONFIG_FRAMEBUFFER_CONSOLE=y +-CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y +-CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y +-# CONFIG_FRAMEBUFFER_CONSOLE_DEFERRED_TAKEOVER is not set + CONFIG_LOGO=y + # CONFIG_LOGO_LINUX_MONO is not set + # CONFIG_LOGO_LINUX_VGA16 is not set +-CONFIG_LOGO_LINUX_CLUT224=y + CONFIG_SOUND=m +-CONFIG_SOUND_OSS_CORE=y +-CONFIG_SOUND_OSS_CORE_PRECLAIM=y + CONFIG_SND=m +-CONFIG_SND_TIMER=m +-CONFIG_SND_PCM=m +-CONFIG_SND_PCM_ELD=y +-CONFIG_SND_HWDEP=m +-CONFIG_SND_SEQ_DEVICE=m +-CONFIG_SND_RAWMIDI=m +-CONFIG_SND_COMPRESS_OFFLOAD=m +-CONFIG_SND_JACK=y +-CONFIG_SND_JACK_INPUT_DEV=y + CONFIG_SND_OSSEMUL=y +-# CONFIG_SND_MIXER_OSS is not set +-# CONFIG_SND_PCM_OSS is not set +-CONFIG_SND_PCM_TIMER=y + CONFIG_SND_HRTIMER=m +-CONFIG_SND_DYNAMIC_MINORS=y +-CONFIG_SND_MAX_CARDS=32 + # CONFIG_SND_SUPPORT_OLD_API is not set +-CONFIG_SND_PROC_FS=y +-CONFIG_SND_VERBOSE_PROCFS=y +-# CONFIG_SND_VERBOSE_PRINTK is not set +-# CONFIG_SND_DEBUG is not set +-CONFIG_SND_VMASTER=y +-CONFIG_SND_DMA_SGBUF=y + CONFIG_SND_SEQUENCER=m + CONFIG_SND_SEQ_DUMMY=m + CONFIG_SND_SEQUENCER_OSS=m +-CONFIG_SND_SEQ_HRTIMER_DEFAULT=y +-CONFIG_SND_SEQ_MIDI_EVENT=m +-CONFIG_SND_SEQ_MIDI=m +-CONFIG_SND_SEQ_MIDI_EMUL=m +-CONFIG_SND_SEQ_VIRMIDI=m +-CONFIG_SND_MPU401_UART=m +-CONFIG_SND_OPL3_LIB=m +-CONFIG_SND_OPL3_LIB_SEQ=m +-CONFIG_SND_VX_LIB=m +-CONFIG_SND_AC97_CODEC=m +-CONFIG_SND_DRIVERS=y + CONFIG_SND_PCSP=m + CONFIG_SND_DUMMY=m + CONFIG_SND_ALOOP=m + CONFIG_SND_VIRMIDI=m + CONFIG_SND_MTPAV=m +-# CONFIG_SND_MTS64 is not set +-# CONFIG_SND_SERIAL_U16550 is not set + CONFIG_SND_MPU401=m +-# CONFIG_SND_PORTMAN2X4 is not set + CONFIG_SND_AC97_POWER_SAVE=y + CONFIG_SND_AC97_POWER_SAVE_DEFAULT=5 +-CONFIG_SND_PCI=y + CONFIG_SND_AD1889=m +-# CONFIG_SND_ALS300 is not set +-# CONFIG_SND_ALS4000 is not set + CONFIG_SND_ALI5451=m + CONFIG_SND_ASIHPI=m + CONFIG_SND_ATIIXP=m +@@ -4933,17 +1630,11 @@ + CONFIG_SND_AU8810=m + CONFIG_SND_AU8820=m + CONFIG_SND_AU8830=m +-# CONFIG_SND_AW2 is not set +-# CONFIG_SND_AZT3328 is not set + CONFIG_SND_BT87X=m +-# CONFIG_SND_BT87X_OVERCLOCK is not set + CONFIG_SND_CA0106=m + CONFIG_SND_CMIPCI=m +-CONFIG_SND_OXYGEN_LIB=m + CONFIG_SND_OXYGEN=m +-# CONFIG_SND_CS4281 is not set + CONFIG_SND_CS46XX=m +-CONFIG_SND_CS46XX_NEW_DSP=y + CONFIG_SND_CTXFI=m + CONFIG_SND_DARLA20=m + CONFIG_SND_GINA20=m +@@ -4960,15 +1651,12 @@ + CONFIG_SND_INDIGOIOX=m + CONFIG_SND_INDIGODJX=m + CONFIG_SND_EMU10K1=m +-CONFIG_SND_EMU10K1_SEQ=m + CONFIG_SND_EMU10K1X=m + CONFIG_SND_ENS1370=m + CONFIG_SND_ENS1371=m +-# CONFIG_SND_ES1938 is not set + CONFIG_SND_ES1968=m + CONFIG_SND_ES1968_INPUT=y + CONFIG_SND_ES1968_RADIO=y +-# CONFIG_SND_FM801 is not set + CONFIG_SND_HDSP=m + CONFIG_SND_HDSPM=m + CONFIG_SND_ICE1712=m +@@ -4981,27 +1669,17 @@ + CONFIG_SND_MAESTRO3=m + CONFIG_SND_MAESTRO3_INPUT=y + CONFIG_SND_MIXART=m +-# CONFIG_SND_NM256 is not set + CONFIG_SND_PCXHR=m +-# CONFIG_SND_RIPTIDE is not set + CONFIG_SND_RME32=m + CONFIG_SND_RME96=m + CONFIG_SND_RME9652=m +-# CONFIG_SND_SONICVIBES is not set + CONFIG_SND_TRIDENT=m + CONFIG_SND_VIA82XX=m + CONFIG_SND_VIA82XX_MODEM=m + CONFIG_SND_VIRTUOSO=m + CONFIG_SND_VX222=m +-# CONFIG_SND_YMFPCI is not set +- +-# +-# HD-Audio +-# +-CONFIG_SND_HDA=m + CONFIG_SND_HDA_INTEL=m + CONFIG_SND_HDA_HWDEP=y +-CONFIG_SND_HDA_RECONFIG=y + CONFIG_SND_HDA_INPUT_BEEP=y + CONFIG_SND_HDA_INPUT_BEEP_MODE=0 + CONFIG_SND_HDA_PATCH_LOADER=y +@@ -5017,16 +1695,8 @@ + CONFIG_SND_HDA_CODEC_CA0132_DSP=y + CONFIG_SND_HDA_CODEC_CMEDIA=m + CONFIG_SND_HDA_CODEC_SI3054=m +-CONFIG_SND_HDA_GENERIC=m +-CONFIG_SND_HDA_POWER_SAVE_DEFAULT=0 +-CONFIG_SND_HDA_CORE=m +-CONFIG_SND_HDA_DSP_LOADER=y +-CONFIG_SND_HDA_COMPONENT=y +-CONFIG_SND_HDA_I915=y +-CONFIG_SND_HDA_EXT_CORE=m + CONFIG_SND_HDA_PREALLOC_SIZE=512 + # CONFIG_SND_SPI is not set +-CONFIG_SND_USB=y + CONFIG_SND_USB_AUDIO=m + CONFIG_SND_USB_UA101=m + CONFIG_SND_USB_USX2Y=m +@@ -5036,13 +1706,10 @@ + CONFIG_SND_USB_6FIRE=m + CONFIG_SND_USB_HIFACE=m + CONFIG_SND_BCD2000=m +-CONFIG_SND_USB_LINE6=m + CONFIG_SND_USB_POD=m + CONFIG_SND_USB_PODHD=m + CONFIG_SND_USB_TONEPORT=m + CONFIG_SND_USB_VARIAX=m +-CONFIG_SND_FIREWIRE=y +-CONFIG_SND_FIREWIRE_LIB=m + CONFIG_SND_DICE=m + CONFIG_SND_OXFW=m + CONFIG_SND_ISIGHT=m +@@ -5053,45 +1720,8 @@ + CONFIG_SND_FIREWIRE_MOTU=m + CONFIG_SND_FIREFACE=m + CONFIG_SND_SOC=m +-CONFIG_SND_SOC_COMPRESS=y +-CONFIG_SND_SOC_TOPOLOGY=y +-CONFIG_SND_SOC_ACPI=m +-# CONFIG_SND_SOC_AMD_ACP is not set +-# CONFIG_SND_ATMEL_SOC is not set +-# CONFIG_SND_DESIGNWARE_I2S is not set +- +-# +-# SoC Audio for Freescale CPUs +-# +- +-# +-# Common SoC Audio options for Freescale CPUs: +-# +-# CONFIG_SND_SOC_FSL_ASRC is not set +-# CONFIG_SND_SOC_FSL_SAI is not set +-# CONFIG_SND_SOC_FSL_SSI is not set +-# CONFIG_SND_SOC_FSL_SPDIF is not set +-# CONFIG_SND_SOC_FSL_ESAI is not set +-# CONFIG_SND_SOC_IMX_AUDMUX is not set +-# CONFIG_SND_I2S_HI6210_I2S is not set +-# CONFIG_SND_SOC_IMG is not set +-CONFIG_SND_SOC_INTEL_SST_TOPLEVEL=y +-CONFIG_SND_SST_IPC=m +-CONFIG_SND_SST_IPC_ACPI=m +-CONFIG_SND_SOC_INTEL_SST_ACPI=m +-CONFIG_SND_SOC_INTEL_SST=m +-CONFIG_SND_SOC_INTEL_SST_FIRMWARE=m + CONFIG_SND_SOC_INTEL_HASWELL=m +-CONFIG_SND_SST_ATOM_HIFI2_PLATFORM=m +-# CONFIG_SND_SST_ATOM_HIFI2_PLATFORM_PCI is not set +-CONFIG_SND_SST_ATOM_HIFI2_PLATFORM_ACPI=m +-CONFIG_SND_SOC_INTEL_SKYLAKE_SSP_CLK=m + CONFIG_SND_SOC_INTEL_SKYLAKE=m +-CONFIG_SND_SOC_ACPI_INTEL_MATCH=m +-CONFIG_SND_SOC_INTEL_MACH=y +-# CONFIG_SND_SOC_INTEL_HASWELL_MACH is not set +-# CONFIG_SND_SOC_INTEL_BDW_RT5677_MACH is not set +-# CONFIG_SND_SOC_INTEL_BROADWELL_MACH is not set + CONFIG_SND_SOC_INTEL_BYTCR_RT5640_MACH=m + CONFIG_SND_SOC_INTEL_BYTCR_RT5651_MACH=m + CONFIG_SND_SOC_INTEL_CHT_BSW_RT5672_MACH=m +@@ -5109,172 +1739,12 @@ + CONFIG_SND_SOC_INTEL_KBL_RT5663_MAX98927_MACH=m + CONFIG_SND_SOC_INTEL_KBL_RT5663_RT5514_MAX98927_MACH=m + CONFIG_SND_SOC_INTEL_KBL_DA7219_MAX98357A_MACH=m +-# CONFIG_SND_SOC_INTEL_GLK_RT5682_MAX98357A_MACH is not set +- +-# +-# STMicroelectronics STM32 SOC audio support +-# +-# CONFIG_SND_SOC_XTFPGA_I2S is not set +-# CONFIG_ZX_TDM is not set +-CONFIG_SND_SOC_I2C_AND_SPI=m +- +-# +-# CODEC drivers +-# +-# CONFIG_SND_SOC_AC97_CODEC is not set +-# CONFIG_SND_SOC_ADAU1701 is not set +-# CONFIG_SND_SOC_ADAU1761_I2C is not set +-# CONFIG_SND_SOC_ADAU1761_SPI is not set +-# CONFIG_SND_SOC_ADAU7002 is not set +-# CONFIG_SND_SOC_AK4104 is not set +-# CONFIG_SND_SOC_AK4458 is not set +-# CONFIG_SND_SOC_AK4554 is not set +-# CONFIG_SND_SOC_AK4613 is not set +-# CONFIG_SND_SOC_AK4642 is not set +-# CONFIG_SND_SOC_AK5386 is not set +-# CONFIG_SND_SOC_AK5558 is not set +-# CONFIG_SND_SOC_ALC5623 is not set +-# CONFIG_SND_SOC_BD28623 is not set +-# CONFIG_SND_SOC_BT_SCO is not set +-# CONFIG_SND_SOC_CS35L32 is not set +-# CONFIG_SND_SOC_CS35L33 is not set +-# CONFIG_SND_SOC_CS35L34 is not set +-# CONFIG_SND_SOC_CS35L35 is not set +-# CONFIG_SND_SOC_CS42L42 is not set +-# CONFIG_SND_SOC_CS42L51_I2C is not set +-# CONFIG_SND_SOC_CS42L52 is not set +-# CONFIG_SND_SOC_CS42L56 is not set +-# CONFIG_SND_SOC_CS42L73 is not set +-# CONFIG_SND_SOC_CS4265 is not set +-# CONFIG_SND_SOC_CS4270 is not set +-# CONFIG_SND_SOC_CS4271_I2C is not set +-# CONFIG_SND_SOC_CS4271_SPI is not set +-# CONFIG_SND_SOC_CS42XX8_I2C is not set +-# CONFIG_SND_SOC_CS43130 is not set +-# CONFIG_SND_SOC_CS4349 is not set +-# CONFIG_SND_SOC_CS53L30 is not set +-CONFIG_SND_SOC_DA7213=m +-CONFIG_SND_SOC_DA7219=m +-CONFIG_SND_SOC_DMIC=m +-# CONFIG_SND_SOC_ES7134 is not set +-# CONFIG_SND_SOC_ES7241 is not set +-CONFIG_SND_SOC_ES8316=m +-# CONFIG_SND_SOC_ES8328_I2C is not set +-# CONFIG_SND_SOC_ES8328_SPI is not set +-# CONFIG_SND_SOC_GTM601 is not set +-CONFIG_SND_SOC_HDAC_HDMI=m +-# CONFIG_SND_SOC_INNO_RK3036 is not set +-CONFIG_SND_SOC_MAX98090=m +-CONFIG_SND_SOC_MAX98357A=m +-# CONFIG_SND_SOC_MAX98504 is not set +-# CONFIG_SND_SOC_MAX9867 is not set +-CONFIG_SND_SOC_MAX98927=m +-# CONFIG_SND_SOC_MAX98373 is not set +-# CONFIG_SND_SOC_MAX9860 is not set +-# CONFIG_SND_SOC_MSM8916_WCD_DIGITAL is not set +-# CONFIG_SND_SOC_PCM1681 is not set +-# CONFIG_SND_SOC_PCM1789_I2C is not set +-# CONFIG_SND_SOC_PCM179X_I2C is not set +-# CONFIG_SND_SOC_PCM179X_SPI is not set +-# CONFIG_SND_SOC_PCM186X_I2C is not set +-# CONFIG_SND_SOC_PCM186X_SPI is not set +-# CONFIG_SND_SOC_PCM3168A_I2C is not set +-# CONFIG_SND_SOC_PCM3168A_SPI is not set +-# CONFIG_SND_SOC_PCM512x_I2C is not set +-# CONFIG_SND_SOC_PCM512x_SPI is not set +-CONFIG_SND_SOC_RL6231=m +-CONFIG_SND_SOC_RL6347A=m +-CONFIG_SND_SOC_RT286=m +-CONFIG_SND_SOC_RT298=m +-CONFIG_SND_SOC_RT5514=m +-CONFIG_SND_SOC_RT5514_SPI=m +-# CONFIG_SND_SOC_RT5616 is not set +-# CONFIG_SND_SOC_RT5631 is not set +-CONFIG_SND_SOC_RT5640=m +-CONFIG_SND_SOC_RT5645=m +-CONFIG_SND_SOC_RT5651=m +-CONFIG_SND_SOC_RT5663=m +-CONFIG_SND_SOC_RT5670=m +-# CONFIG_SND_SOC_SGTL5000 is not set +-# CONFIG_SND_SOC_SIMPLE_AMPLIFIER is not set +-# CONFIG_SND_SOC_SIRF_AUDIO_CODEC is not set +-# CONFIG_SND_SOC_SPDIF is not set +-# CONFIG_SND_SOC_SSM2305 is not set +-# CONFIG_SND_SOC_SSM2602_SPI is not set +-# CONFIG_SND_SOC_SSM2602_I2C is not set +-CONFIG_SND_SOC_SSM4567=m +-# CONFIG_SND_SOC_STA32X is not set +-# CONFIG_SND_SOC_STA350 is not set +-# CONFIG_SND_SOC_STI_SAS is not set +-# CONFIG_SND_SOC_TAS2552 is not set +-# CONFIG_SND_SOC_TAS5086 is not set +-# CONFIG_SND_SOC_TAS571X is not set +-# CONFIG_SND_SOC_TAS5720 is not set +-# CONFIG_SND_SOC_TAS6424 is not set +-# CONFIG_SND_SOC_TDA7419 is not set +-# CONFIG_SND_SOC_TFA9879 is not set +-# CONFIG_SND_SOC_TLV320AIC23_I2C is not set +-# CONFIG_SND_SOC_TLV320AIC23_SPI is not set +-# CONFIG_SND_SOC_TLV320AIC31XX is not set +-# CONFIG_SND_SOC_TLV320AIC32X4_I2C is not set +-# CONFIG_SND_SOC_TLV320AIC32X4_SPI is not set +-# CONFIG_SND_SOC_TLV320AIC3X is not set +-CONFIG_SND_SOC_TS3A227E=m +-# CONFIG_SND_SOC_TSCS42XX is not set +-# CONFIG_SND_SOC_TSCS454 is not set +-# CONFIG_SND_SOC_WM8510 is not set +-# CONFIG_SND_SOC_WM8523 is not set +-# CONFIG_SND_SOC_WM8524 is not set +-# CONFIG_SND_SOC_WM8580 is not set +-# CONFIG_SND_SOC_WM8711 is not set +-# CONFIG_SND_SOC_WM8728 is not set +-# CONFIG_SND_SOC_WM8731 is not set +-# CONFIG_SND_SOC_WM8737 is not set +-# CONFIG_SND_SOC_WM8741 is not set +-# CONFIG_SND_SOC_WM8750 is not set +-# CONFIG_SND_SOC_WM8753 is not set +-# CONFIG_SND_SOC_WM8770 is not set +-# CONFIG_SND_SOC_WM8776 is not set +-# CONFIG_SND_SOC_WM8782 is not set +-# CONFIG_SND_SOC_WM8804_I2C is not set +-# CONFIG_SND_SOC_WM8804_SPI is not set +-# CONFIG_SND_SOC_WM8903 is not set +-# CONFIG_SND_SOC_WM8960 is not set +-# CONFIG_SND_SOC_WM8962 is not set +-# CONFIG_SND_SOC_WM8974 is not set +-# CONFIG_SND_SOC_WM8978 is not set +-# CONFIG_SND_SOC_WM8985 is not set +-# CONFIG_SND_SOC_ZX_AUD96P22 is not set +-# CONFIG_SND_SOC_MAX9759 is not set +-# CONFIG_SND_SOC_MT6351 is not set +-# CONFIG_SND_SOC_NAU8540 is not set +-# CONFIG_SND_SOC_NAU8810 is not set +-CONFIG_SND_SOC_NAU8824=m +-CONFIG_SND_SOC_NAU8825=m +-# CONFIG_SND_SOC_TPA6130A2 is not set +-# CONFIG_SND_SIMPLE_CARD is not set +-CONFIG_SND_X86=y + CONFIG_HDMI_LPE_AUDIO=m +-CONFIG_SND_SYNTH_EMUX=m +-CONFIG_SND_XEN_FRONTEND=m +-CONFIG_AC97_BUS=m +- +-# +-# HID support +-# +-CONFIG_HID=y + CONFIG_HID_BATTERY_STRENGTH=y + CONFIG_HIDRAW=y + CONFIG_UHID=m +-CONFIG_HID_GENERIC=y +- +-# +-# Special HID drivers +-# + CONFIG_HID_A4TECH=m +-# CONFIG_HID_ACCUTOUCH is not set + CONFIG_HID_ACRUX=m +-# CONFIG_HID_ACRUX_FF is not set + CONFIG_HID_APPLE=m + CONFIG_HID_APPLEIR=m + CONFIG_HID_ASUS=m +@@ -5284,14 +1754,10 @@ + CONFIG_HID_CHERRY=m + CONFIG_HID_CHICONY=m + CONFIG_HID_CORSAIR=m +-# CONFIG_HID_COUGAR is not set + CONFIG_HID_PRODIKEYS=m + CONFIG_HID_CMEDIA=m +-# CONFIG_HID_CP2112 is not set + CONFIG_HID_CYPRESS=m + CONFIG_HID_DRAGONRISE=m +-# CONFIG_DRAGONRISE_FF is not set +-# CONFIG_HID_EMS_FF is not set + CONFIG_HID_ELAN=m + CONFIG_HID_ELECOM=m + CONFIG_HID_ELO=m +@@ -5299,8 +1765,6 @@ + CONFIG_HID_GEMBIRD=m + CONFIG_HID_GFRM=m + CONFIG_HID_HOLTEK=m +-# CONFIG_HOLTEK_FF is not set +-# CONFIG_HID_GOOGLE_HAMMER is not set + CONFIG_HID_GT683R=m + CONFIG_HID_KEYTOUCH=m + CONFIG_HID_KYE=m +@@ -5313,17 +1777,10 @@ + CONFIG_HID_TWINHAN=m + CONFIG_HID_KENSINGTON=m + CONFIG_HID_LCPOWER=m +-CONFIG_HID_LED=m + CONFIG_HID_LENOVO=m + CONFIG_HID_LOGITECH=m + CONFIG_HID_LOGITECH_DJ=m +-CONFIG_HID_LOGITECH_HIDPP=m +-# CONFIG_LOGITECH_FF is not set +-# CONFIG_LOGIRUMBLEPAD2_FF is not set +-# CONFIG_LOGIG940_FF is not set +-# CONFIG_LOGIWHEELS_FF is not set + CONFIG_HID_MAGICMOUSE=y +-# CONFIG_HID_MAYFLASH is not set + # CONFIG_HID_REDRAGON is not set + CONFIG_HID_MICROSOFT=m + CONFIG_HID_MONTEREY=m +@@ -5332,134 +1789,54 @@ + CONFIG_HID_NTRIG=y + CONFIG_HID_ORTEK=m + CONFIG_HID_PANTHERLORD=m +-# CONFIG_PANTHERLORD_FF is not set + CONFIG_HID_PENMOUNT=m + CONFIG_HID_PETALYNX=m + CONFIG_HID_PICOLCD=m +-CONFIG_HID_PICOLCD_FB=y +-CONFIG_HID_PICOLCD_BACKLIGHT=y +-CONFIG_HID_PICOLCD_LCD=y +-CONFIG_HID_PICOLCD_LEDS=y +-CONFIG_HID_PICOLCD_CIR=y + CONFIG_HID_PLANTRONICS=m + CONFIG_HID_PRIMAX=m +-# CONFIG_HID_RETRODE is not set + CONFIG_HID_ROCCAT=m + CONFIG_HID_SAITEK=m + CONFIG_HID_SAMSUNG=m + CONFIG_HID_SONY=m + CONFIG_SONY_FF=y + CONFIG_HID_SPEEDLINK=m +-# CONFIG_HID_STEAM is not set + CONFIG_HID_STEELSERIES=m + CONFIG_HID_SUNPLUS=m + CONFIG_HID_RMI=m + CONFIG_HID_GREENASIA=m +-# CONFIG_GREENASIA_FF is not set +-CONFIG_HID_HYPERV_MOUSE=m + CONFIG_HID_SMARTJOYPLUS=m +-# CONFIG_SMARTJOYPLUS_FF is not set + CONFIG_HID_TIVO=m + CONFIG_HID_TOPSEED=m + CONFIG_HID_THINGM=m + CONFIG_HID_THRUSTMASTER=m +-# CONFIG_THRUSTMASTER_FF is not set +-# CONFIG_HID_UDRAW_PS3 is not set + CONFIG_HID_WACOM=m + CONFIG_HID_WIIMOTE=m + CONFIG_HID_XINMO=m + CONFIG_HID_ZEROPLUS=m +-# CONFIG_ZEROPLUS_FF is not set + CONFIG_HID_ZYDACRON=m + CONFIG_HID_SENSOR_HUB=y + CONFIG_HID_SENSOR_CUSTOM_SENSOR=m + CONFIG_HID_ALPS=m +- +-# +-# USB HID support +-# +-CONFIG_USB_HID=y + CONFIG_HID_PID=y + CONFIG_USB_HIDDEV=y +- +-# +-# I2C HID support +-# + CONFIG_I2C_HID=m +- +-# +-# Intel ISH HID support +-# + CONFIG_INTEL_ISH_HID=m +-CONFIG_USB_OHCI_LITTLE_ENDIAN=y +-CONFIG_USB_SUPPORT=y +-CONFIG_USB_COMMON=y +-CONFIG_USB_ARCH_HAS_HCD=y + CONFIG_USB=y +-CONFIG_USB_PCI=y + CONFIG_USB_ANNOUNCE_NEW_DEVICES=y +- +-# +-# Miscellaneous USB options +-# +-CONFIG_USB_DEFAULT_PERSIST=y +-# CONFIG_USB_DYNAMIC_MINORS is not set +-# CONFIG_USB_OTG is not set +-# CONFIG_USB_OTG_WHITELIST is not set + CONFIG_USB_LEDS_TRIGGER_USBPORT=m + CONFIG_USB_MON=y +-CONFIG_USB_WUSB=m + CONFIG_USB_WUSB_CBAF=m +-# CONFIG_USB_WUSB_CBAF_DEBUG is not set +- +-# +-# USB Host Controller Drivers +-# +-# CONFIG_USB_C67X00_HCD is not set + CONFIG_USB_XHCI_HCD=y + CONFIG_USB_XHCI_DBGCAP=y +-CONFIG_USB_XHCI_PCI=y +-# CONFIG_USB_XHCI_PLATFORM is not set + CONFIG_USB_EHCI_HCD=y + CONFIG_USB_EHCI_ROOT_HUB_TT=y +-CONFIG_USB_EHCI_TT_NEWSCHED=y +-CONFIG_USB_EHCI_PCI=y +-# CONFIG_USB_EHCI_HCD_PLATFORM is not set +-# CONFIG_USB_OXU210HP_HCD is not set +-# CONFIG_USB_ISP116X_HCD is not set +-# CONFIG_USB_FOTG210_HCD is not set +-# CONFIG_USB_MAX3421_HCD is not set + CONFIG_USB_OHCI_HCD=y +-CONFIG_USB_OHCI_HCD_PCI=y +-# CONFIG_USB_OHCI_HCD_PLATFORM is not set + CONFIG_USB_UHCI_HCD=y +-# CONFIG_USB_U132_HCD is not set +-# CONFIG_USB_SL811_HCD is not set +-# CONFIG_USB_R8A66597_HCD is not set +-# CONFIG_USB_WHCI_HCD is not set + CONFIG_USB_HWA_HCD=m +-# CONFIG_USB_HCD_BCMA is not set +-# CONFIG_USB_HCD_TEST_MODE is not set +- +-# +-# USB Device Class drivers +-# +-CONFIG_USB_ACM=m + CONFIG_USB_PRINTER=m +-CONFIG_USB_WDM=m + CONFIG_USB_TMC=m +- +-# +-# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may +-# +- +-# +-# also be needed; see USB_STORAGE Help for more info +-# + CONFIG_USB_STORAGE=m +-# CONFIG_USB_STORAGE_DEBUG is not set + CONFIG_USB_STORAGE_REALTEK=m +-CONFIG_REALTEK_AUTOPM=y + CONFIG_USB_STORAGE_DATAFAB=m + CONFIG_USB_STORAGE_FREECOM=m + CONFIG_USB_STORAGE_ISD200=m +@@ -5473,27 +1850,12 @@ + CONFIG_USB_STORAGE_CYPRESS_ATACB=m + CONFIG_USB_STORAGE_ENE_UB6250=m + CONFIG_USB_UAS=m +- +-# +-# USB Imaging devices +-# + CONFIG_USB_MDC800=m + CONFIG_USB_MICROTEK=m +-# CONFIG_USBIP_CORE is not set +-# CONFIG_USB_MUSB_HDRC is not set +-# CONFIG_USB_DWC3 is not set +-# CONFIG_USB_DWC2 is not set +-# CONFIG_USB_CHIPIDEA is not set +-# CONFIG_USB_ISP1760 is not set +- +-# +-# USB port drivers +-# + CONFIG_USB_USS720=m + CONFIG_USB_SERIAL=y + CONFIG_USB_SERIAL_CONSOLE=y + CONFIG_USB_SERIAL_GENERIC=y +-# CONFIG_USB_SERIAL_SIMPLE is not set + CONFIG_USB_SERIAL_AIRCABLE=m + CONFIG_USB_SERIAL_ARK3116=m + CONFIG_USB_SERIAL_BELKIN=m +@@ -5509,7 +1871,6 @@ + CONFIG_USB_SERIAL_IR=m + CONFIG_USB_SERIAL_EDGEPORT=m + CONFIG_USB_SERIAL_EDGEPORT_TI=m +-# CONFIG_USB_SERIAL_F81232 is not set + CONFIG_USB_SERIAL_F8153X=m + CONFIG_USB_SERIAL_GARMIN=m + CONFIG_USB_SERIAL_IPW=m +@@ -5519,7 +1880,6 @@ + CONFIG_USB_SERIAL_KLSI=m + CONFIG_USB_SERIAL_KOBIL_SCT=m + CONFIG_USB_SERIAL_MCT_U232=m +-# CONFIG_USB_SERIAL_METRO is not set + CONFIG_USB_SERIAL_MOS7720=m + CONFIG_USB_SERIAL_MOS7715_PARPORT=y + CONFIG_USB_SERIAL_MOS7840=m +@@ -5537,219 +1897,99 @@ + CONFIG_USB_SERIAL_TI=m + CONFIG_USB_SERIAL_CYBERJACK=m + CONFIG_USB_SERIAL_XIRCOM=m +-CONFIG_USB_SERIAL_WWAN=m + CONFIG_USB_SERIAL_OPTION=m + CONFIG_USB_SERIAL_OMNINET=m + CONFIG_USB_SERIAL_OPTICON=m + CONFIG_USB_SERIAL_XSENS_MT=m +-# CONFIG_USB_SERIAL_WISHBONE is not set + CONFIG_USB_SERIAL_SSU100=m + CONFIG_USB_SERIAL_QT2=m + CONFIG_USB_SERIAL_UPD78F0730=m + CONFIG_USB_SERIAL_DEBUG=m +- +-# +-# USB Miscellaneous drivers +-# + CONFIG_USB_EMI62=m + CONFIG_USB_EMI26=m + CONFIG_USB_ADUTUX=m + CONFIG_USB_SEVSEG=m + CONFIG_USB_LEGOTOWER=m + CONFIG_USB_LCD=m +-# CONFIG_USB_CYPRESS_CY7C63 is not set +-# CONFIG_USB_CYTHERM is not set + CONFIG_USB_IDMOUSE=m + CONFIG_USB_FTDI_ELAN=m + CONFIG_USB_APPLEDISPLAY=m + CONFIG_USB_SISUSBVGA=m + CONFIG_USB_SISUSBVGA_CON=y + CONFIG_USB_LD=m +-# CONFIG_USB_TRANCEVIBRATOR is not set + CONFIG_USB_IOWARRIOR=m +-# CONFIG_USB_TEST is not set +-# CONFIG_USB_EHSET_TEST_FIXTURE is not set + CONFIG_USB_ISIGHTFW=m +-# CONFIG_USB_YUREX is not set +-CONFIG_USB_EZUSB_FX2=m +-# CONFIG_USB_HUB_USB251XB is not set + CONFIG_USB_HSIC_USB3503=m +-# CONFIG_USB_HSIC_USB4604 is not set +-# CONFIG_USB_LINK_LAYER_TEST is not set +-# CONFIG_USB_CHAOSKEY is not set + CONFIG_USB_ATM=m + CONFIG_USB_SPEEDTOUCH=m + CONFIG_USB_CXACRU=m + CONFIG_USB_UEAGLEATM=m + CONFIG_USB_XUSBATM=m +- +-# +-# USB Physical Layer drivers +-# +-# CONFIG_NOP_USB_XCEIV is not set +-# CONFIG_USB_GPIO_VBUS is not set +-# CONFIG_USB_ISP1301 is not set +-# CONFIG_USB_GADGET is not set + CONFIG_TYPEC=y + CONFIG_TYPEC_TCPM=y +-CONFIG_TYPEC_TCPCI=y + CONFIG_TYPEC_RT1711H=y + CONFIG_TYPEC_FUSB302=m + CONFIG_TYPEC_UCSI=y + CONFIG_UCSI_ACPI=y + CONFIG_TYPEC_TPS6598X=m +- +-# +-# USB Type-C Multiplexer/DeMultiplexer Switch support +-# + CONFIG_TYPEC_MUX_PI3USB30532=m +- +-# +-# USB Type-C Alternate Mode drivers +-# + CONFIG_TYPEC_DP_ALTMODE=y +-CONFIG_USB_ROLE_SWITCH=y + CONFIG_USB_ROLES_INTEL_XHCI=y + CONFIG_USB_LED_TRIG=y +-# CONFIG_USB_ULPI_BUS is not set + CONFIG_UWB=m +-CONFIG_UWB_HWA=m + CONFIG_UWB_WHCI=m + CONFIG_UWB_I1480U=m + CONFIG_MMC=m +-CONFIG_MMC_BLOCK=m +-CONFIG_MMC_BLOCK_MINORS=8 + CONFIG_SDIO_UART=m +-# CONFIG_MMC_TEST is not set +- +-# +-# MMC/SD/SDIO Host Controller Drivers +-# +-# CONFIG_MMC_DEBUG is not set + CONFIG_MMC_SDHCI=m + CONFIG_MMC_SDHCI_PCI=m +-CONFIG_MMC_RICOH_MMC=y + CONFIG_MMC_SDHCI_ACPI=m + CONFIG_MMC_SDHCI_PLTFM=m +-# CONFIG_MMC_SDHCI_F_SDH30 is not set +-# CONFIG_MMC_WBSD is not set + CONFIG_MMC_TIFM_SD=m +-# CONFIG_MMC_SPI is not set + CONFIG_MMC_CB710=m + CONFIG_MMC_VIA_SDMMC=m + CONFIG_MMC_VUB300=m + CONFIG_MMC_USHC=m +-# CONFIG_MMC_USDHI6ROL0 is not set + CONFIG_MMC_REALTEK_PCI=m + CONFIG_MMC_REALTEK_USB=m +-CONFIG_MMC_CQHCI=m +-# CONFIG_MMC_TOSHIBA_PCI is not set +-# CONFIG_MMC_MTK is not set +-# CONFIG_MMC_SDHCI_XENON is not set + CONFIG_MEMSTICK=m +-# CONFIG_MEMSTICK_DEBUG is not set +- +-# +-# MemoryStick drivers +-# +-# CONFIG_MEMSTICK_UNSAFE_RESUME is not set + CONFIG_MSPRO_BLOCK=m +-# CONFIG_MS_BLOCK is not set +- +-# +-# MemoryStick Host Controller Drivers +-# + CONFIG_MEMSTICK_TIFM_MS=m + CONFIG_MEMSTICK_JMICRON_38X=m + CONFIG_MEMSTICK_R592=m + CONFIG_MEMSTICK_REALTEK_PCI=m + CONFIG_MEMSTICK_REALTEK_USB=m +-CONFIG_NEW_LEDS=y + CONFIG_LEDS_CLASS=y +-# CONFIG_LEDS_CLASS_FLASH is not set +-# CONFIG_LEDS_BRIGHTNESS_HW_CHANGED is not set +- +-# +-# LED drivers +-# +-# CONFIG_LEDS_APU is not set + CONFIG_LEDS_LM3530=m +-# CONFIG_LEDS_LM3642 is not set +-# CONFIG_LEDS_PCA9532 is not set +-# CONFIG_LEDS_GPIO is not set + CONFIG_LEDS_LP3944=m +-# CONFIG_LEDS_LP3952 is not set +-CONFIG_LEDS_LP55XX_COMMON=m + CONFIG_LEDS_LP5521=m + CONFIG_LEDS_LP5523=m + CONFIG_LEDS_LP5562=m +-# CONFIG_LEDS_LP8501 is not set + CONFIG_LEDS_CLEVO_MAIL=m +-# CONFIG_LEDS_PCA955X is not set +-# CONFIG_LEDS_PCA963X is not set +-# CONFIG_LEDS_DAC124S085 is not set +-# CONFIG_LEDS_PWM is not set +-# CONFIG_LEDS_BD2802 is not set + CONFIG_LEDS_INTEL_SS4200=m + CONFIG_LEDS_LT3593=m +-# CONFIG_LEDS_TCA6507 is not set +-# CONFIG_LEDS_TLC591XX is not set +-# CONFIG_LEDS_LM355x is not set +- +-# +-# LED driver for blink(1) USB RGB LED is under Special HID drivers (HID_THINGM) +-# + CONFIG_LEDS_BLINKM=m + CONFIG_LEDS_MLXCPLD=m +-# CONFIG_LEDS_MLXREG is not set +-# CONFIG_LEDS_USER is not set +-# CONFIG_LEDS_NIC78BX is not set +- +-# +-# LED Triggers +-# +-CONFIG_LEDS_TRIGGERS=y + CONFIG_LEDS_TRIGGER_TIMER=m + CONFIG_LEDS_TRIGGER_ONESHOT=m + CONFIG_LEDS_TRIGGER_DISK=y +-# CONFIG_LEDS_TRIGGER_MTD is not set + CONFIG_LEDS_TRIGGER_HEARTBEAT=m + CONFIG_LEDS_TRIGGER_BACKLIGHT=m +-# CONFIG_LEDS_TRIGGER_CPU is not set +-# CONFIG_LEDS_TRIGGER_ACTIVITY is not set + CONFIG_LEDS_TRIGGER_GPIO=m + CONFIG_LEDS_TRIGGER_DEFAULT_ON=m +- +-# +-# iptables trigger is under Netfilter config (LED target) +-# + CONFIG_LEDS_TRIGGER_TRANSIENT=m + CONFIG_LEDS_TRIGGER_CAMERA=m +-# CONFIG_LEDS_TRIGGER_PANIC is not set +-# CONFIG_LEDS_TRIGGER_NETDEV is not set +-# CONFIG_ACCESSIBILITY is not set + CONFIG_INFINIBAND=m + CONFIG_INFINIBAND_USER_MAD=m + CONFIG_INFINIBAND_USER_ACCESS=m +-# CONFIG_INFINIBAND_EXP_LEGACY_VERBS_NEW_UAPI is not set +-CONFIG_INFINIBAND_USER_MEM=y +-CONFIG_INFINIBAND_ON_DEMAND_PAGING=y +-CONFIG_INFINIBAND_ADDR_TRANS=y +-CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS=y +-# CONFIG_INFINIBAND_MTHCA is not set +-# CONFIG_INFINIBAND_QIB is not set + CONFIG_INFINIBAND_CXGB4=m + CONFIG_INFINIBAND_I40IW=m + CONFIG_MLX4_INFINIBAND=m + CONFIG_MLX5_INFINIBAND=m +-# CONFIG_INFINIBAND_NES is not set +-# CONFIG_INFINIBAND_OCRDMA is not set + CONFIG_INFINIBAND_VMWARE_PVRDMA=m + CONFIG_INFINIBAND_USNIC=m + CONFIG_INFINIBAND_IPOIB=m + CONFIG_INFINIBAND_IPOIB_CM=y +-CONFIG_INFINIBAND_IPOIB_DEBUG=y +-# CONFIG_INFINIBAND_IPOIB_DEBUG_DATA is not set + CONFIG_INFINIBAND_SRP=m + CONFIG_INFINIBAND_SRPT=m + CONFIG_INFINIBAND_ISER=m +@@ -5758,19 +1998,12 @@ + CONFIG_INFINIBAND_RDMAVT=m + CONFIG_RDMA_RXE=m + CONFIG_INFINIBAND_HFI1=m +-# CONFIG_HFI1_DEBUG_SDMA_ORDER is not set +-# CONFIG_SDMA_VERBOSITY is not set + CONFIG_INFINIBAND_QEDR=m + CONFIG_INFINIBAND_BNXT_RE=m +-CONFIG_EDAC_ATOMIC_SCRUB=y +-CONFIG_EDAC_SUPPORT=y + CONFIG_EDAC=y +-CONFIG_EDAC_LEGACY_SYSFS=y +-# CONFIG_EDAC_DEBUG is not set + CONFIG_EDAC_DECODE_MCE=m + CONFIG_EDAC_GHES=y + CONFIG_EDAC_AMD64=m +-# CONFIG_EDAC_AMD64_ERROR_INJECTION is not set + CONFIG_EDAC_E752X=m + CONFIG_EDAC_I82975X=m + CONFIG_EDAC_I3000=m +@@ -5785,33 +2018,10 @@ + CONFIG_EDAC_SBRIDGE=m + CONFIG_EDAC_SKX=m + CONFIG_EDAC_PND2=m +-CONFIG_RTC_LIB=y +-CONFIG_RTC_MC146818_LIB=y + CONFIG_RTC_CLASS=y +-CONFIG_RTC_HCTOSYS=y +-CONFIG_RTC_HCTOSYS_DEVICE="rtc0" + # CONFIG_RTC_SYSTOHC is not set +-# CONFIG_RTC_DEBUG is not set +-CONFIG_RTC_NVMEM=y +- +-# +-# RTC interfaces +-# +-CONFIG_RTC_INTF_SYSFS=y +-CONFIG_RTC_INTF_PROC=y +-CONFIG_RTC_INTF_DEV=y +-# CONFIG_RTC_INTF_DEV_UIE_EMUL is not set +-# CONFIG_RTC_DRV_TEST is not set +- +-# +-# I2C RTC drivers +-# +-# CONFIG_RTC_DRV_ABB5ZES3 is not set +-# CONFIG_RTC_DRV_ABX80X is not set + CONFIG_RTC_DRV_DS1307=m +-# CONFIG_RTC_DRV_DS1307_CENTURY is not set + CONFIG_RTC_DRV_DS1374=m +-# CONFIG_RTC_DRV_DS1374_WDT is not set + CONFIG_RTC_DRV_DS1672=m + CONFIG_RTC_DRV_MAX6900=m + CONFIG_RTC_DRV_RS5C372=m +@@ -5819,187 +2029,56 @@ + CONFIG_RTC_DRV_ISL12022=m + CONFIG_RTC_DRV_X1205=m + CONFIG_RTC_DRV_PCF8523=m +-# CONFIG_RTC_DRV_PCF85063 is not set +-# CONFIG_RTC_DRV_PCF85363 is not set + CONFIG_RTC_DRV_PCF8563=m + CONFIG_RTC_DRV_PCF8583=m + CONFIG_RTC_DRV_M41T80=m + CONFIG_RTC_DRV_M41T80_WDT=y + CONFIG_RTC_DRV_BQ32K=m +-# CONFIG_RTC_DRV_S35390A is not set + CONFIG_RTC_DRV_FM3130=m +-# CONFIG_RTC_DRV_RX8010 is not set + CONFIG_RTC_DRV_RX8581=m + CONFIG_RTC_DRV_RX8025=m + CONFIG_RTC_DRV_EM3027=m +-# CONFIG_RTC_DRV_RV8803 is not set +- +-# +-# SPI RTC drivers +-# +-# CONFIG_RTC_DRV_M41T93 is not set +-# CONFIG_RTC_DRV_M41T94 is not set +-# CONFIG_RTC_DRV_DS1302 is not set +-# CONFIG_RTC_DRV_DS1305 is not set +-# CONFIG_RTC_DRV_DS1343 is not set +-# CONFIG_RTC_DRV_DS1347 is not set +-# CONFIG_RTC_DRV_DS1390 is not set +-# CONFIG_RTC_DRV_MAX6916 is not set +-# CONFIG_RTC_DRV_R9701 is not set + CONFIG_RTC_DRV_RX4581=m +-# CONFIG_RTC_DRV_RX6110 is not set +-# CONFIG_RTC_DRV_RS5C348 is not set +-# CONFIG_RTC_DRV_MAX6902 is not set +-# CONFIG_RTC_DRV_PCF2123 is not set +-# CONFIG_RTC_DRV_MCP795 is not set +-CONFIG_RTC_I2C_AND_SPI=y +- +-# +-# SPI and I2C RTC drivers +-# + CONFIG_RTC_DRV_DS3232=m +-CONFIG_RTC_DRV_DS3232_HWMON=y +-# CONFIG_RTC_DRV_PCF2127 is not set + CONFIG_RTC_DRV_RV3029C2=m + # CONFIG_RTC_DRV_RV3029_HWMON is not set +- +-# +-# Platform RTC drivers +-# +-CONFIG_RTC_DRV_CMOS=y + CONFIG_RTC_DRV_DS1286=m + CONFIG_RTC_DRV_DS1511=m + CONFIG_RTC_DRV_DS1553=m +-# CONFIG_RTC_DRV_DS1685_FAMILY is not set + CONFIG_RTC_DRV_DS1742=m + CONFIG_RTC_DRV_DS2404=m + CONFIG_RTC_DRV_STK17TA8=m +-# CONFIG_RTC_DRV_M48T86 is not set + CONFIG_RTC_DRV_M48T35=m + CONFIG_RTC_DRV_M48T59=m + CONFIG_RTC_DRV_MSM6242=m + CONFIG_RTC_DRV_BQ4802=m + CONFIG_RTC_DRV_RP5C01=m + CONFIG_RTC_DRV_V3020=m +- +-# +-# on-CPU RTC drivers +-# +-# CONFIG_RTC_DRV_FTRTC010 is not set +- +-# +-# HID Sensor RTC drivers +-# +-# CONFIG_RTC_DRV_HID_SENSOR_TIME is not set +-CONFIG_DMADEVICES=y +-# CONFIG_DMADEVICES_DEBUG is not set +- +-# +-# DMA Devices +-# +-CONFIG_DMA_ENGINE=y +-CONFIG_DMA_VIRTUAL_CHANNELS=y +-CONFIG_DMA_ACPI=y +-# CONFIG_ALTERA_MSGDMA is not set + CONFIG_INTEL_IDMA64=m + CONFIG_INTEL_IOATDMA=m +-# CONFIG_QCOM_HIDMA_MGMT is not set +-# CONFIG_QCOM_HIDMA is not set +-CONFIG_DW_DMAC_CORE=y + CONFIG_DW_DMAC=m +-CONFIG_DW_DMAC_PCI=y +-CONFIG_HSU_DMA=y +- +-# +-# DMA Clients +-# + CONFIG_ASYNC_TX_DMA=y +-# CONFIG_DMATEST is not set +-CONFIG_DMA_ENGINE_RAID=y +- +-# +-# DMABUF options +-# +-CONFIG_SYNC_FILE=y +-# CONFIG_SW_SYNC is not set +-CONFIG_DCA=m +-# CONFIG_AUXDISPLAY is not set +-# CONFIG_PANEL is not set +-CONFIG_UIO=m + CONFIG_UIO_CIF=m + CONFIG_UIO_PDRV_GENIRQ=m +-# CONFIG_UIO_DMEM_GENIRQ is not set + CONFIG_UIO_AEC=m + CONFIG_UIO_SERCOS3=m + CONFIG_UIO_PCI_GENERIC=m +-# CONFIG_UIO_NETX is not set +-# CONFIG_UIO_PRUSS is not set +-# CONFIG_UIO_MF624 is not set +-CONFIG_UIO_HV_GENERIC=m +-CONFIG_VFIO_IOMMU_TYPE1=m +-CONFIG_VFIO_VIRQFD=m + CONFIG_VFIO=m + CONFIG_VFIO_NOIOMMU=y + CONFIG_VFIO_PCI=m +-# CONFIG_VFIO_PCI_VGA is not set +-CONFIG_VFIO_PCI_MMAP=y +-CONFIG_VFIO_PCI_INTX=y + # CONFIG_VFIO_PCI_IGD is not set + CONFIG_VFIO_MDEV=m + CONFIG_VFIO_MDEV_DEVICE=m +-CONFIG_IRQ_BYPASS_MANAGER=m +-# CONFIG_VIRT_DRIVERS is not set +-CONFIG_VIRTIO=y +-CONFIG_VIRTIO_MENU=y + CONFIG_VIRTIO_PCI=y +-CONFIG_VIRTIO_PCI_LEGACY=y + CONFIG_VIRTIO_BALLOON=m + CONFIG_VIRTIO_INPUT=m +-# CONFIG_VIRTIO_MMIO is not set +- +-# +-# Microsoft Hyper-V guest support +-# +-CONFIG_HYPERV=m +-CONFIG_HYPERV_TSCPAGE=y +-CONFIG_HYPERV_UTILS=m +-CONFIG_HYPERV_BALLOON=m +- +-# +-# Xen driver support +-# +-CONFIG_XEN_BALLOON=y +-# CONFIG_XEN_SELFBALLOONING is not set +-# CONFIG_XEN_BALLOON_MEMORY_HOTPLUG is not set +-CONFIG_XEN_SCRUB_PAGES_DEFAULT=y +-CONFIG_XEN_DEV_EVTCHN=m +-CONFIG_XENFS=m +-CONFIG_XEN_COMPAT_XENFS=y +-CONFIG_XEN_SYS_HYPERVISOR=y +-CONFIG_XEN_XENBUS_FRONTEND=y +-# CONFIG_XEN_GNTDEV is not set +-# CONFIG_XEN_GRANT_DEV_ALLOC is not set +-# CONFIG_XEN_GRANT_DMA_ALLOC is not set +-CONFIG_SWIOTLB_XEN=y +-CONFIG_XEN_TMEM=m +-# CONFIG_XEN_PVCALLS_FRONTEND is not set +-CONFIG_XEN_PRIVCMD=m +-CONFIG_XEN_EFI=y +-CONFIG_XEN_AUTO_XLATE=y +-CONFIG_XEN_ACPI=y +-# CONFIG_STAGING is not set +-CONFIG_X86_PLATFORM_DEVICES=y + CONFIG_ACER_WMI=m +-# CONFIG_ACER_WIRELESS is not set + CONFIG_ACERHDF=m +-# CONFIG_ALIENWARE_WMI is not set + CONFIG_ASUS_LAPTOP=m + CONFIG_DELL_SMBIOS=m +-CONFIG_DELL_SMBIOS_WMI=y + # CONFIG_DELL_SMBIOS_SMM is not set + CONFIG_DELL_LAPTOP=m + CONFIG_DELL_WMI=m +-CONFIG_DELL_WMI_DESCRIPTOR=m + CONFIG_DELL_WMI_AIO=m + CONFIG_DELL_WMI_LED=m + CONFIG_DELL_SMO8800=m +@@ -6007,7 +2086,6 @@ + CONFIG_FUJITSU_LAPTOP=m + CONFIG_FUJITSU_TABLET=m + CONFIG_AMILO_RFKILL=m +-# CONFIG_GPD_POCKET_FAN is not set + CONFIG_HP_ACCEL=m + CONFIG_HP_WIRELESS=m + CONFIG_HP_WMI=m +@@ -6017,808 +2095,131 @@ + CONFIG_SONY_LAPTOP=m + CONFIG_SONYPI_COMPAT=y + CONFIG_IDEAPAD_LAPTOP=m +-# CONFIG_SURFACE3_WMI is not set + CONFIG_THINKPAD_ACPI=m +-CONFIG_THINKPAD_ACPI_ALSA_SUPPORT=y +-# CONFIG_THINKPAD_ACPI_DEBUGFACILITIES is not set +-# CONFIG_THINKPAD_ACPI_DEBUG is not set +-# CONFIG_THINKPAD_ACPI_UNSAFE_LEDS is not set +-CONFIG_THINKPAD_ACPI_VIDEO=y +-CONFIG_THINKPAD_ACPI_HOTKEY_POLL=y + CONFIG_SENSORS_HDAPS=m +-# CONFIG_INTEL_MENLOW is not set + CONFIG_EEEPC_LAPTOP=m + CONFIG_ASUS_WMI=m + CONFIG_ASUS_NB_WMI=m + CONFIG_EEEPC_WMI=m +-# CONFIG_ASUS_WIRELESS is not set +-CONFIG_ACPI_WMI=m +-CONFIG_WMI_BMOF=m + CONFIG_INTEL_WMI_THUNDERBOLT=m + CONFIG_MSI_WMI=m +-# CONFIG_PEAQ_WMI is not set + CONFIG_TOPSTAR_LAPTOP=m +-# CONFIG_ACPI_TOSHIBA is not set + CONFIG_TOSHIBA_BT_RFKILL=m +-# CONFIG_TOSHIBA_HAPS is not set +-# CONFIG_TOSHIBA_WMI is not set + CONFIG_ACPI_CMPC=m +-# CONFIG_INTEL_INT0002_VGPIO is not set + CONFIG_INTEL_HID_EVENT=m + CONFIG_INTEL_VBTN=m + CONFIG_INTEL_IPS=m + CONFIG_INTEL_PMC_CORE=m +-# CONFIG_IBM_RTL is not set + CONFIG_SAMSUNG_LAPTOP=m +-CONFIG_MXM_WMI=m + CONFIG_INTEL_OAKTRAIL=m + CONFIG_SAMSUNG_Q10=m + CONFIG_APPLE_GMUX=m + CONFIG_INTEL_RST=m +-# CONFIG_INTEL_SMARTCONNECT is not set + CONFIG_PVPANIC=y +-# CONFIG_INTEL_PMC_IPC is not set +-# CONFIG_SURFACE_PRO3_BUTTON is not set +-# CONFIG_INTEL_PUNIT_IPC is not set + CONFIG_MLX_PLATFORM=m +-CONFIG_INTEL_TURBO_MAX_3=y +-# CONFIG_I2C_MULTI_INSTANTIATE is not set +-# CONFIG_INTEL_ATOMISP2_PM is not set +-CONFIG_PMC_ATOM=y +-# CONFIG_CHROME_PLATFORMS is not set + CONFIG_MELLANOX_PLATFORM=y + CONFIG_MLXREG_HOTPLUG=m +-# CONFIG_MLXREG_IO is not set +-CONFIG_CLKDEV_LOOKUP=y +-CONFIG_HAVE_CLK_PREPARE=y +-CONFIG_COMMON_CLK=y +- +-# +-# Common Clock Framework +-# +-# CONFIG_COMMON_CLK_MAX9485 is not set +-# CONFIG_COMMON_CLK_SI5351 is not set +-# CONFIG_COMMON_CLK_SI544 is not set +-# CONFIG_COMMON_CLK_CDCE706 is not set +-# CONFIG_COMMON_CLK_CS2000_CP is not set +-# CONFIG_COMMON_CLK_PWM is not set + CONFIG_HWSPINLOCK=y +- +-# +-# Clock Source drivers +-# +-CONFIG_CLKEVT_I8253=y +-CONFIG_I8253_LOCK=y +-CONFIG_CLKBLD_I8253=y + CONFIG_MAILBOX=y + CONFIG_PCC=y +-# CONFIG_ALTERA_MBOX is not set +-CONFIG_IOMMU_API=y +-CONFIG_IOMMU_SUPPORT=y +- +-# +-# Generic IOMMU Pagetable Support +-# +- +-# +-# Generic PASID table support +-# +-# CONFIG_IOMMU_DEBUGFS is not set + CONFIG_IOMMU_DEFAULT_PASSTHROUGH=y +-CONFIG_IOMMU_IOVA=y + CONFIG_AMD_IOMMU=y +-CONFIG_AMD_IOMMU_V2=m +-CONFIG_DMAR_TABLE=y + CONFIG_INTEL_IOMMU=y +-# CONFIG_INTEL_IOMMU_SVM is not set + # CONFIG_INTEL_IOMMU_DEFAULT_ON is not set +-CONFIG_INTEL_IOMMU_FLOPPY_WA=y + CONFIG_IRQ_REMAP=y +-# CONFIG_SMMU_BYPASS_DEV is not set +- +-# +-# Remoteproc drivers +-# +-# CONFIG_REMOTEPROC is not set +- +-# +-# Rpmsg drivers +-# +-# CONFIG_RPMSG_QCOM_GLINK_RPM is not set +-# CONFIG_RPMSG_VIRTIO is not set +-# CONFIG_SOUNDWIRE is not set +- +-# +-# SOC (System On Chip) specific Drivers +-# +- +-# +-# Amlogic SoC drivers +-# +- +-# +-# Broadcom SoC drivers +-# +- +-# +-# NXP/Freescale QorIQ SoC drivers +-# +- +-# +-# i.MX SoC drivers +-# +- +-# +-# Qualcomm SoC drivers +-# +-# CONFIG_SOC_TI is not set +- +-# +-# Xilinx SoC drivers +-# +-# CONFIG_XILINX_VCU is not set +-CONFIG_SOC_HISILICON_LBC=m +-CONFIG_SOC_HISILICON_SYSCTL=m +-# CONFIG_PM_DEVFREQ is not set +-# CONFIG_EXTCON is not set +-# CONFIG_MEMORY is not set + CONFIG_IIO=m +-CONFIG_IIO_BUFFER=y +-# CONFIG_IIO_BUFFER_CB is not set +-# CONFIG_IIO_BUFFER_HW_CONSUMER is not set +-CONFIG_IIO_KFIFO_BUF=m +-CONFIG_IIO_TRIGGERED_BUFFER=m +-# CONFIG_IIO_CONFIGFS is not set +-CONFIG_IIO_TRIGGER=y +-CONFIG_IIO_CONSUMERS_PER_TRIGGER=2 +-# CONFIG_IIO_SW_DEVICE is not set +-# CONFIG_IIO_SW_TRIGGER is not set +- +-# +-# Accelerometers +-# +-# CONFIG_ADIS16201 is not set +-# CONFIG_ADIS16209 is not set +-# CONFIG_ADXL345_I2C is not set +-# CONFIG_ADXL345_SPI is not set +-# CONFIG_BMA180 is not set +-# CONFIG_BMA220 is not set +-# CONFIG_BMC150_ACCEL is not set +-# CONFIG_DA280 is not set +-# CONFIG_DA311 is not set +-# CONFIG_DMARD09 is not set +-# CONFIG_DMARD10 is not set + CONFIG_HID_SENSOR_ACCEL_3D=m +-# CONFIG_IIO_CROS_EC_ACCEL_LEGACY is not set +-# CONFIG_IIO_ST_ACCEL_3AXIS is not set +-# CONFIG_KXSD9 is not set +-# CONFIG_KXCJK1013 is not set +-# CONFIG_MC3230 is not set +-# CONFIG_MMA7455_I2C is not set +-# CONFIG_MMA7455_SPI is not set +-# CONFIG_MMA7660 is not set +-# CONFIG_MMA8452 is not set +-# CONFIG_MMA9551 is not set +-# CONFIG_MMA9553 is not set +-# CONFIG_MXC4005 is not set +-# CONFIG_MXC6255 is not set +-# CONFIG_SCA3000 is not set +-# CONFIG_STK8312 is not set +-# CONFIG_STK8BA50 is not set +- +-# +-# Analog to digital converters +-# +-# CONFIG_AD7266 is not set +-# CONFIG_AD7291 is not set +-# CONFIG_AD7298 is not set +-# CONFIG_AD7476 is not set +-# CONFIG_AD7766 is not set +-# CONFIG_AD7791 is not set +-# CONFIG_AD7793 is not set +-# CONFIG_AD7887 is not set +-# CONFIG_AD7923 is not set +-# CONFIG_AD799X is not set +-# CONFIG_HI8435 is not set +-# CONFIG_HX711 is not set +-# CONFIG_INA2XX_ADC is not set +-# CONFIG_LTC2471 is not set +-# CONFIG_LTC2485 is not set +-# CONFIG_LTC2497 is not set +-# CONFIG_MAX1027 is not set +-# CONFIG_MAX11100 is not set +-# CONFIG_MAX1118 is not set +-# CONFIG_MAX1363 is not set +-# CONFIG_MAX9611 is not set +-# CONFIG_MCP320X is not set +-# CONFIG_MCP3422 is not set +-# CONFIG_NAU7802 is not set +-# CONFIG_TI_ADC081C is not set +-# CONFIG_TI_ADC0832 is not set +-# CONFIG_TI_ADC084S021 is not set +-# CONFIG_TI_ADC12138 is not set +-# CONFIG_TI_ADC108S102 is not set +-# CONFIG_TI_ADC128S052 is not set +-# CONFIG_TI_ADC161S626 is not set +-# CONFIG_TI_ADS1015 is not set +-# CONFIG_TI_ADS7950 is not set +-# CONFIG_TI_TLC4541 is not set +-# CONFIG_VIPERBOARD_ADC is not set +- +-# +-# Analog Front Ends +-# +- +-# +-# Amplifiers +-# +-# CONFIG_AD8366 is not set +- +-# +-# Chemical Sensors +-# +-# CONFIG_ATLAS_PH_SENSOR is not set +-# CONFIG_BME680 is not set +-# CONFIG_CCS811 is not set +-# CONFIG_IAQCORE is not set +-# CONFIG_VZ89X is not set +- +-# +-# Hid Sensor IIO Common +-# +-CONFIG_HID_SENSOR_IIO_COMMON=m +-CONFIG_HID_SENSOR_IIO_TRIGGER=m +- +-# +-# SSP Sensor Common +-# +-# CONFIG_IIO_SSP_SENSORHUB is not set +- +-# +-# Counters +-# +- +-# +-# Digital to analog converters +-# +-# CONFIG_AD5064 is not set +-# CONFIG_AD5360 is not set +-# CONFIG_AD5380 is not set +-# CONFIG_AD5421 is not set +-# CONFIG_AD5446 is not set +-# CONFIG_AD5449 is not set +-# CONFIG_AD5592R is not set +-# CONFIG_AD5593R is not set +-# CONFIG_AD5504 is not set +-# CONFIG_AD5624R_SPI is not set +-# CONFIG_LTC2632 is not set +-# CONFIG_AD5686_SPI is not set +-# CONFIG_AD5696_I2C is not set +-# CONFIG_AD5755 is not set +-# CONFIG_AD5758 is not set +-# CONFIG_AD5761 is not set +-# CONFIG_AD5764 is not set +-# CONFIG_AD5791 is not set +-# CONFIG_AD7303 is not set +-# CONFIG_AD8801 is not set +-# CONFIG_DS4424 is not set +-# CONFIG_M62332 is not set +-# CONFIG_MAX517 is not set +-# CONFIG_MCP4725 is not set +-# CONFIG_MCP4922 is not set +-# CONFIG_TI_DAC082S085 is not set +-# CONFIG_TI_DAC5571 is not set +- +-# +-# IIO dummy driver +-# +- +-# +-# Frequency Synthesizers DDS/PLL +-# +- +-# +-# Clock Generator/Distribution +-# +-# CONFIG_AD9523 is not set +- +-# +-# Phase-Locked Loop (PLL) frequency synthesizers +-# +-# CONFIG_ADF4350 is not set +- +-# +-# Digital gyroscope sensors +-# +-# CONFIG_ADIS16080 is not set +-# CONFIG_ADIS16130 is not set +-# CONFIG_ADIS16136 is not set +-# CONFIG_ADIS16260 is not set +-# CONFIG_ADXRS450 is not set +-# CONFIG_BMG160 is not set + CONFIG_HID_SENSOR_GYRO_3D=m +-# CONFIG_MPU3050_I2C is not set +-# CONFIG_IIO_ST_GYRO_3AXIS is not set +-# CONFIG_ITG3200 is not set +- +-# +-# Health Sensors +-# +- +-# +-# Heart Rate Monitors +-# +-# CONFIG_AFE4403 is not set +-# CONFIG_AFE4404 is not set +-# CONFIG_MAX30100 is not set +-# CONFIG_MAX30102 is not set +- +-# +-# Humidity sensors +-# +-# CONFIG_AM2315 is not set +-# CONFIG_DHT11 is not set +-# CONFIG_HDC100X is not set + CONFIG_HID_SENSOR_HUMIDITY=m +-# CONFIG_HTS221 is not set +-# CONFIG_HTU21 is not set +-# CONFIG_SI7005 is not set +-# CONFIG_SI7020 is not set +- +-# +-# Inertial measurement units +-# +-# CONFIG_ADIS16400 is not set +-# CONFIG_ADIS16480 is not set +-# CONFIG_BMI160_I2C is not set +-# CONFIG_BMI160_SPI is not set +-# CONFIG_KMX61 is not set +-# CONFIG_INV_MPU6050_I2C is not set +-# CONFIG_INV_MPU6050_SPI is not set +-# CONFIG_IIO_ST_LSM6DSX is not set +- +-# +-# Light sensors +-# +-# CONFIG_ACPI_ALS is not set +-# CONFIG_ADJD_S311 is not set +-# CONFIG_AL3320A is not set +-# CONFIG_APDS9300 is not set +-# CONFIG_APDS9960 is not set +-# CONFIG_BH1750 is not set +-# CONFIG_BH1780 is not set +-# CONFIG_CM32181 is not set +-# CONFIG_CM3232 is not set +-# CONFIG_CM3323 is not set +-# CONFIG_CM36651 is not set +-# CONFIG_GP2AP020A00F is not set +-# CONFIG_SENSORS_ISL29018 is not set +-# CONFIG_SENSORS_ISL29028 is not set +-# CONFIG_ISL29125 is not set + CONFIG_HID_SENSOR_ALS=m + CONFIG_HID_SENSOR_PROX=m +-# CONFIG_JSA1212 is not set +-# CONFIG_RPR0521 is not set +-# CONFIG_LTR501 is not set +-# CONFIG_LV0104CS is not set +-# CONFIG_MAX44000 is not set +-# CONFIG_OPT3001 is not set +-# CONFIG_PA12203001 is not set +-# CONFIG_SI1133 is not set +-# CONFIG_SI1145 is not set +-# CONFIG_STK3310 is not set +-# CONFIG_ST_UVIS25 is not set +-# CONFIG_TCS3414 is not set +-# CONFIG_TCS3472 is not set +-# CONFIG_SENSORS_TSL2563 is not set +-# CONFIG_TSL2583 is not set +-# CONFIG_TSL2772 is not set +-# CONFIG_TSL4531 is not set +-# CONFIG_US5182D is not set +-# CONFIG_VCNL4000 is not set +-# CONFIG_VEML6070 is not set +-# CONFIG_VL6180 is not set +-# CONFIG_ZOPT2201 is not set +- +-# +-# Magnetometer sensors +-# +-# CONFIG_AK8975 is not set +-# CONFIG_AK09911 is not set +-# CONFIG_BMC150_MAGN_I2C is not set +-# CONFIG_BMC150_MAGN_SPI is not set +-# CONFIG_MAG3110 is not set + CONFIG_HID_SENSOR_MAGNETOMETER_3D=m +-# CONFIG_MMC35240 is not set +-# CONFIG_IIO_ST_MAGN_3AXIS is not set +-# CONFIG_SENSORS_HMC5843_I2C is not set +-# CONFIG_SENSORS_HMC5843_SPI is not set +- +-# +-# Multiplexers +-# +- +-# +-# Inclinometer sensors +-# + CONFIG_HID_SENSOR_INCLINOMETER_3D=m + CONFIG_HID_SENSOR_DEVICE_ROTATION=m +- +-# +-# Triggers - standalone +-# +-# CONFIG_IIO_INTERRUPT_TRIGGER is not set +-# CONFIG_IIO_SYSFS_TRIGGER is not set +- +-# +-# Digital potentiometers +-# +-# CONFIG_AD5272 is not set +-# CONFIG_DS1803 is not set +-# CONFIG_MAX5481 is not set +-# CONFIG_MAX5487 is not set +-# CONFIG_MCP4018 is not set +-# CONFIG_MCP4131 is not set +-# CONFIG_MCP4531 is not set +-# CONFIG_TPL0102 is not set +- +-# +-# Digital potentiostats +-# +-# CONFIG_LMP91000 is not set +- +-# +-# Pressure sensors +-# +-# CONFIG_ABP060MG is not set +-# CONFIG_BMP280 is not set + CONFIG_HID_SENSOR_PRESS=m +-# CONFIG_HP03 is not set +-# CONFIG_MPL115_I2C is not set +-# CONFIG_MPL115_SPI is not set +-# CONFIG_MPL3115 is not set +-# CONFIG_MS5611 is not set +-# CONFIG_MS5637 is not set +-# CONFIG_IIO_ST_PRESS is not set +-# CONFIG_T5403 is not set +-# CONFIG_HP206C is not set +-# CONFIG_ZPA2326 is not set +- +-# +-# Lightning sensors +-# +-# CONFIG_AS3935 is not set +- +-# +-# Proximity and distance sensors +-# +-# CONFIG_ISL29501 is not set +-# CONFIG_LIDAR_LITE_V2 is not set +-# CONFIG_RFD77402 is not set +-# CONFIG_SRF04 is not set +-# CONFIG_SX9500 is not set +-# CONFIG_SRF08 is not set +- +-# +-# Resolver to digital converters +-# +-# CONFIG_AD2S1200 is not set +- +-# +-# Temperature sensors +-# +-# CONFIG_MAXIM_THERMOCOUPLE is not set + CONFIG_HID_SENSOR_TEMP=m +-# CONFIG_MLX90614 is not set +-# CONFIG_MLX90632 is not set +-# CONFIG_TMP006 is not set +-# CONFIG_TMP007 is not set +-# CONFIG_TSYS01 is not set +-# CONFIG_TSYS02D is not set + CONFIG_NTB=m +-# CONFIG_NTB_AMD is not set +-# CONFIG_NTB_IDT is not set +-# CONFIG_NTB_INTEL is not set +-# CONFIG_NTB_SWITCHTEC is not set +-# CONFIG_NTB_PINGPONG is not set +-# CONFIG_NTB_TOOL is not set +-# CONFIG_NTB_PERF is not set +-# CONFIG_NTB_TRANSPORT is not set +-# CONFIG_VME_BUS is not set + CONFIG_PWM=y +-CONFIG_PWM_SYSFS=y +-CONFIG_PWM_LPSS=m + CONFIG_PWM_LPSS_PCI=m + CONFIG_PWM_LPSS_PLATFORM=m +-# CONFIG_PWM_PCA9685 is not set +- +-# +-# IRQ chip support +-# +-CONFIG_ARM_GIC_MAX_NR=1 +-# CONFIG_IPACK_BUS is not set +-# CONFIG_RESET_CONTROLLER is not set +-# CONFIG_FMC is not set +- +-# +-# PHY Subsystem +-# +-# CONFIG_GENERIC_PHY is not set +-# CONFIG_BCM_KONA_USB2_PHY is not set +-# CONFIG_PHY_PXA_28NM_HSIC is not set +-# CONFIG_PHY_PXA_28NM_USB2 is not set +-# CONFIG_PHY_CPCAP_USB is not set + CONFIG_POWERCAP=y + CONFIG_INTEL_RAPL=m +-# CONFIG_IDLE_INJECT is not set +-# CONFIG_MCB is not set +- +-# +-# Performance monitor support +-# +-CONFIG_RAS=y +-# CONFIG_RAS_CEC is not set ++CONFIG_RAS_CEC=y + CONFIG_THUNDERBOLT=y +- +-# +-# Android +-# +-# CONFIG_ANDROID is not set +-CONFIG_LIBNVDIMM=m +-CONFIG_BLK_DEV_PMEM=m +-CONFIG_ND_BLK=m +-CONFIG_ND_CLAIM=y +-CONFIG_ND_BTT=m +-CONFIG_BTT=y +-CONFIG_ND_PFN=m +-CONFIG_NVDIMM_PFN=y +-CONFIG_NVDIMM_DAX=y +-CONFIG_DAX_DRIVER=y +-CONFIG_DAX=y +-CONFIG_DEV_DAX=m +-CONFIG_DEV_DAX_PMEM=m +-CONFIG_NVMEM=y +- +-# +-# HW tracing support +-# +-# CONFIG_STM is not set +-# CONFIG_INTEL_TH is not set +-# CONFIG_FPGA is not set +-# CONFIG_UNISYS_VISORBUS is not set +-# CONFIG_SIOX is not set +-# CONFIG_UACCE is not set +-# CONFIG_SLIMBUS is not set +- +-# +-# File systems +-# +-CONFIG_DCACHE_WORD_ACCESS=y +-CONFIG_FS_IOMAP=y +-# CONFIG_EXT2_FS is not set +-# CONFIG_EXT3_FS is not set + CONFIG_EXT4_FS=m +-CONFIG_EXT4_USE_FOR_EXT2=y + CONFIG_EXT4_FS_POSIX_ACL=y + CONFIG_EXT4_FS_SECURITY=y +-# CONFIG_EXT4_ENCRYPTION is not set +-# CONFIG_EXT4_DEBUG is not set +-CONFIG_JBD2=m +-# CONFIG_JBD2_DEBUG is not set +-CONFIG_FS_MBCACHE=m +-# CONFIG_REISERFS_FS is not set +-# CONFIG_JFS_FS is not set ++CONFIG_REISERFS_FS=m ++CONFIG_REISERFS_PROC_INFO=y ++CONFIG_JFS_FS=m + CONFIG_XFS_FS=m + CONFIG_XFS_QUOTA=y + CONFIG_XFS_POSIX_ACL=y +-# CONFIG_XFS_RT is not set +-# CONFIG_XFS_ONLINE_SCRUB is not set +-# CONFIG_XFS_WARN is not set +-# CONFIG_XFS_DEBUG is not set + CONFIG_GFS2_FS=m + CONFIG_GFS2_FS_LOCKING_DLM=y +-# CONFIG_OCFS2_FS is not set +-# CONFIG_BTRFS_FS is not set +-# CONFIG_NILFS2_FS is not set +-# CONFIG_F2FS_FS is not set ++CONFIG_BTRFS_FS=m ++CONFIG_NILFS2_FS=m ++CONFIG_F2FS_FS=m + CONFIG_FS_DAX=y +-CONFIG_FS_DAX_PMD=y +-CONFIG_FS_POSIX_ACL=y +-CONFIG_EXPORTFS=y +-CONFIG_EXPORTFS_BLOCK_OPS=y +-CONFIG_FILE_LOCKING=y + # CONFIG_MANDATORY_FILE_LOCKING is not set +-# CONFIG_FS_ENCRYPTION is not set +-CONFIG_FSNOTIFY=y +-CONFIG_DNOTIFY=y +-CONFIG_INOTIFY_USER=y + CONFIG_FANOTIFY=y + CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y + CONFIG_QUOTA=y + CONFIG_QUOTA_NETLINK_INTERFACE=y +-CONFIG_PRINT_QUOTA_WARNING=y +-# CONFIG_QUOTA_DEBUG is not set +-CONFIG_QUOTA_TREE=y +-# CONFIG_QFMT_V1 is not set + CONFIG_QFMT_V2=y +-CONFIG_QUOTACTL=y +-CONFIG_QUOTACTL_COMPAT=y + CONFIG_AUTOFS4_FS=y +-CONFIG_AUTOFS_FS=y + CONFIG_FUSE_FS=m + CONFIG_CUSE=m + CONFIG_OVERLAY_FS=m +-# CONFIG_OVERLAY_FS_REDIRECT_DIR is not set + # CONFIG_OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW is not set +-# CONFIG_OVERLAY_FS_INDEX is not set +-# CONFIG_OVERLAY_FS_XINO_AUTO is not set +-# CONFIG_OVERLAY_FS_METACOPY is not set +- +-# +-# Caches +-# + CONFIG_FSCACHE=m + CONFIG_FSCACHE_STATS=y +-# CONFIG_FSCACHE_HISTOGRAM is not set +-# CONFIG_FSCACHE_DEBUG is not set +-# CONFIG_FSCACHE_OBJECT_LIST is not set + CONFIG_CACHEFILES=m +-# CONFIG_CACHEFILES_DEBUG is not set +-# CONFIG_CACHEFILES_HISTOGRAM is not set +- +-# +-# CD-ROM/DVD Filesystems +-# + CONFIG_ISO9660_FS=m + CONFIG_JOLIET=y + CONFIG_ZISOFS=y + CONFIG_UDF_FS=m +- +-# +-# DOS/FAT/NT Filesystems +-# +-CONFIG_FAT_FS=m + CONFIG_MSDOS_FS=m + CONFIG_VFAT_FS=m +-CONFIG_FAT_DEFAULT_CODEPAGE=437 + CONFIG_FAT_DEFAULT_IOCHARSET="ascii" +-# CONFIG_FAT_DEFAULT_UTF8 is not set +- +-# +-# Pseudo filesystems +-# +-CONFIG_PROC_FS=y + CONFIG_PROC_KCORE=y +-CONFIG_PROC_VMCORE=y + CONFIG_PROC_VMCORE_DEVICE_DUMP=y +-CONFIG_PROC_SYSCTL=y +-CONFIG_PROC_PAGE_MONITOR=y +-CONFIG_PROC_CHILDREN=y +-CONFIG_KERNFS=y +-CONFIG_SYSFS=y +-CONFIG_TMPFS=y + CONFIG_TMPFS_POSIX_ACL=y +-CONFIG_TMPFS_XATTR=y + CONFIG_HUGETLBFS=y +-CONFIG_HUGETLB_PAGE=y +-CONFIG_MEMFD_CREATE=y +-CONFIG_ARCH_HAS_GIGANTIC_PAGE=y + CONFIG_CONFIGFS_FS=y + CONFIG_EFIVAR_FS=y +-CONFIG_MISC_FILESYSTEMS=y +-# CONFIG_ORANGEFS_FS is not set +-# CONFIG_ADFS_FS is not set +-# CONFIG_AFFS_FS is not set +-# CONFIG_ECRYPT_FS is not set +-# CONFIG_HFS_FS is not set +-# CONFIG_HFSPLUS_FS is not set +-# CONFIG_BEFS_FS is not set +-# CONFIG_BFS_FS is not set +-# CONFIG_EFS_FS is not set +-# CONFIG_JFFS2_FS is not set +-# CONFIG_UBIFS_FS is not set ++CONFIG_ECRYPT_FS=y ++CONFIG_ECRYPT_FS_MESSAGING=y + CONFIG_CRAMFS=m +-CONFIG_CRAMFS_BLOCKDEV=y +-# CONFIG_CRAMFS_MTD is not set + CONFIG_SQUASHFS=m +-# CONFIG_SQUASHFS_FILE_CACHE is not set + CONFIG_SQUASHFS_FILE_DIRECT=y +-# CONFIG_SQUASHFS_DECOMP_SINGLE is not set +-# CONFIG_SQUASHFS_DECOMP_MULTI is not set + CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU=y + CONFIG_SQUASHFS_XATTR=y +-CONFIG_SQUASHFS_ZLIB=y +-# CONFIG_SQUASHFS_LZ4 is not set + CONFIG_SQUASHFS_LZO=y + CONFIG_SQUASHFS_XZ=y +-# CONFIG_SQUASHFS_ZSTD is not set +-# CONFIG_SQUASHFS_4K_DEVBLK_SIZE is not set +-# CONFIG_SQUASHFS_EMBEDDED is not set +-CONFIG_SQUASHFS_FRAGMENT_CACHE_SIZE=3 +-# CONFIG_VXFS_FS is not set +-# CONFIG_MINIX_FS is not set +-# CONFIG_OMFS_FS is not set +-# CONFIG_HPFS_FS is not set +-# CONFIG_QNX4FS_FS is not set +-# CONFIG_QNX6FS_FS is not set +-# CONFIG_ROMFS_FS is not set +-CONFIG_PSTORE=y +-CONFIG_PSTORE_DEFLATE_COMPRESS=y +-# CONFIG_PSTORE_LZO_COMPRESS is not set +-# CONFIG_PSTORE_LZ4_COMPRESS is not set +-# CONFIG_PSTORE_LZ4HC_COMPRESS is not set +-# CONFIG_PSTORE_842_COMPRESS is not set +-# CONFIG_PSTORE_ZSTD_COMPRESS is not set +-CONFIG_PSTORE_COMPRESS=y +-CONFIG_PSTORE_DEFLATE_COMPRESS_DEFAULT=y +-CONFIG_PSTORE_COMPRESS_DEFAULT="deflate" +-# CONFIG_PSTORE_CONSOLE is not set +-# CONFIG_PSTORE_PMSG is not set +-# CONFIG_PSTORE_FTRACE is not set + CONFIG_PSTORE_RAM=m +-# CONFIG_SYSV_FS is not set +-# CONFIG_UFS_FS is not set +-CONFIG_NETWORK_FILESYSTEMS=y + CONFIG_NFS_FS=m + # CONFIG_NFS_V2 is not set +-CONFIG_NFS_V3=m + CONFIG_NFS_V3_ACL=y + CONFIG_NFS_V4=m +-# CONFIG_NFS_SWAP is not set + CONFIG_NFS_V4_1=y + CONFIG_NFS_V4_2=y +-CONFIG_PNFS_FILE_LAYOUT=m +-CONFIG_PNFS_BLOCK=m +-CONFIG_PNFS_FLEXFILE_LAYOUT=m +-CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN="kernel.org" +-# CONFIG_NFS_V4_1_MIGRATION is not set +-CONFIG_NFS_V4_SECURITY_LABEL=y + CONFIG_NFS_FSCACHE=y +-# CONFIG_NFS_USE_LEGACY_DNS is not set +-CONFIG_NFS_USE_KERNEL_DNS=y +-CONFIG_NFS_DEBUG=y + CONFIG_NFSD=m +-CONFIG_NFSD_V2_ACL=y +-CONFIG_NFSD_V3=y + CONFIG_NFSD_V3_ACL=y + CONFIG_NFSD_V4=y +-CONFIG_NFSD_PNFS=y +-# CONFIG_NFSD_BLOCKLAYOUT is not set + CONFIG_NFSD_SCSILAYOUT=y +-# CONFIG_NFSD_FLEXFILELAYOUT is not set + CONFIG_NFSD_V4_SECURITY_LABEL=y +-# CONFIG_NFSD_FAULT_INJECTION is not set +-CONFIG_GRACE_PERIOD=m +-CONFIG_LOCKD=m +-CONFIG_LOCKD_V4=y +-CONFIG_NFS_ACL_SUPPORT=m +-CONFIG_NFS_COMMON=y +-CONFIG_SUNRPC=m +-CONFIG_SUNRPC_GSS=m +-CONFIG_SUNRPC_BACKCHANNEL=y +-CONFIG_RPCSEC_GSS_KRB5=m + CONFIG_SUNRPC_DEBUG=y +-CONFIG_SUNRPC_XPRT_RDMA=m + CONFIG_CEPH_FS=m +-# CONFIG_CEPH_FSCACHE is not set + CONFIG_CEPH_FS_POSIX_ACL=y + CONFIG_CIFS=m +-# CONFIG_CIFS_STATS2 is not set +-CONFIG_CIFS_ALLOW_INSECURE_LEGACY=y + CONFIG_CIFS_WEAK_PW_HASH=y + CONFIG_CIFS_UPCALL=y + CONFIG_CIFS_XATTR=y + CONFIG_CIFS_POSIX=y + CONFIG_CIFS_ACL=y +-CONFIG_CIFS_DEBUG=y +-# CONFIG_CIFS_DEBUG2 is not set +-# CONFIG_CIFS_DEBUG_DUMP_KEYS is not set + CONFIG_CIFS_DFS_UPCALL=y +-# CONFIG_CIFS_SMB_DIRECT is not set +-# CONFIG_CIFS_FSCACHE is not set +-# CONFIG_CODA_FS is not set +-# CONFIG_AFS_FS is not set +-CONFIG_NLS=y + CONFIG_NLS_DEFAULT="utf8" + CONFIG_NLS_CODEPAGE_437=y + CONFIG_NLS_CODEPAGE_737=m +@@ -6871,668 +2272,145 @@ + CONFIG_NLS_UTF8=m + CONFIG_DLM=m + CONFIG_DLM_DEBUG=y +- +-# +-# Security options +-# +-CONFIG_KEYS=y +-CONFIG_KEYS_COMPAT=y + CONFIG_PERSISTENT_KEYRINGS=y + CONFIG_BIG_KEYS=y + CONFIG_TRUSTED_KEYS=y +-CONFIG_ENCRYPTED_KEYS=y +-# CONFIG_KEY_DH_OPERATIONS is not set +-# CONFIG_SECURITY_DMESG_RESTRICT is not set + CONFIG_SECURITY=y +-CONFIG_SECURITY_WRITABLE_HOOKS=y +-CONFIG_SECURITYFS=y +-CONFIG_SECURITY_NETWORK=y +-CONFIG_PAGE_TABLE_ISOLATION=y + CONFIG_SECURITY_INFINIBAND=y + CONFIG_SECURITY_NETWORK_XFRM=y +-# CONFIG_SECURITY_PATH is not set + CONFIG_INTEL_TXT=y + CONFIG_LSM_MMAP_MIN_ADDR=65535 +-CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR=y + CONFIG_HARDENED_USERCOPY=y +-CONFIG_HARDENED_USERCOPY_FALLBACK=y + CONFIG_FORTIFY_SOURCE=y +-# CONFIG_STATIC_USERMODEHELPER is not set + CONFIG_SECURITY_SELINUX=y + CONFIG_SECURITY_SELINUX_BOOTPARAM=y +-CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=1 +-CONFIG_SECURITY_SELINUX_DISABLE=y +-CONFIG_SECURITY_SELINUX_DEVELOP=y +-CONFIG_SECURITY_SELINUX_AVC_STATS=y +-CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1 +-# CONFIG_SECURITY_SMACK is not set +-# CONFIG_SECURITY_TOMOYO is not set +-# CONFIG_SECURITY_APPARMOR is not set +-# CONFIG_SECURITY_LOADPIN is not set +-CONFIG_SECURITY_YAMA=y +-CONFIG_INTEGRITY=y ++CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0 ++CONFIG_SECURITY_APPARMOR=y + CONFIG_INTEGRITY_SIGNATURE=y + CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y +-CONFIG_INTEGRITY_TRUSTED_KEYRING=y +-CONFIG_INTEGRITY_AUDIT=y + CONFIG_IMA=y +-CONFIG_IMA_MEASURE_PCR_IDX=10 +-CONFIG_IMA_LSM_RULES=y +-# CONFIG_IMA_TEMPLATE is not set +-CONFIG_IMA_NG_TEMPLATE=y +-# CONFIG_IMA_SIG_TEMPLATE is not set +-CONFIG_IMA_DEFAULT_TEMPLATE="ima-ng" +-CONFIG_IMA_DEFAULT_HASH_SHA1=y +-# CONFIG_IMA_DEFAULT_HASH_SHA256 is not set +-CONFIG_IMA_DEFAULT_HASH="sha1" +-# CONFIG_IMA_WRITE_POLICY is not set +-# CONFIG_IMA_READ_POLICY is not set + CONFIG_IMA_APPRAISE=y +-# CONFIG_IMA_APPRAISE_BUILD_POLICY is not set +-CONFIG_IMA_APPRAISE_BOOTPARAM=y +-CONFIG_IMA_TRUSTED_KEYRING=y +-# CONFIG_IMA_BLACKLIST_KEYRING is not set +-# CONFIG_IMA_LOAD_X509 is not set + CONFIG_EVM=y +-CONFIG_EVM_ATTR_FSUUID=y +-# CONFIG_EVM_ADD_XATTRS is not set +-# CONFIG_EVM_LOAD_X509 is not set +-CONFIG_DEFAULT_SECURITY_SELINUX=y +-# CONFIG_DEFAULT_SECURITY_DAC is not set +-CONFIG_DEFAULT_SECURITY="selinux" +-CONFIG_XOR_BLOCKS=m +-CONFIG_ASYNC_CORE=m +-CONFIG_ASYNC_MEMCPY=m +-CONFIG_ASYNC_XOR=m +-CONFIG_ASYNC_PQ=m +-CONFIG_ASYNC_RAID6_RECOV=m +-CONFIG_CRYPTO=y +- +-# +-# Crypto core or helper +-# ++CONFIG_DEFAULT_SECURITY_DAC=y + CONFIG_CRYPTO_FIPS=y +-CONFIG_CRYPTO_ALGAPI=y +-CONFIG_CRYPTO_ALGAPI2=y +-CONFIG_CRYPTO_AEAD=y +-CONFIG_CRYPTO_AEAD2=y +-CONFIG_CRYPTO_BLKCIPHER=y +-CONFIG_CRYPTO_BLKCIPHER2=y +-CONFIG_CRYPTO_HASH=y +-CONFIG_CRYPTO_HASH2=y +-CONFIG_CRYPTO_RNG=y +-CONFIG_CRYPTO_RNG2=y +-CONFIG_CRYPTO_RNG_DEFAULT=y +-CONFIG_CRYPTO_AKCIPHER2=y +-CONFIG_CRYPTO_AKCIPHER=y +-CONFIG_CRYPTO_KPP2=y +-CONFIG_CRYPTO_KPP=m +-CONFIG_CRYPTO_ACOMP2=y +-CONFIG_CRYPTO_RSA=y +-CONFIG_CRYPTO_DH=m +-CONFIG_CRYPTO_ECDH=m +-CONFIG_CRYPTO_MANAGER=y +-CONFIG_CRYPTO_MANAGER2=y + CONFIG_CRYPTO_USER=m + # CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set +-CONFIG_CRYPTO_GF128MUL=y +-CONFIG_CRYPTO_NULL=y +-CONFIG_CRYPTO_NULL2=y + CONFIG_CRYPTO_PCRYPT=m +-CONFIG_CRYPTO_WORKQUEUE=y +-CONFIG_CRYPTO_CRYPTD=y +-CONFIG_CRYPTO_AUTHENC=m + CONFIG_CRYPTO_TEST=m +-CONFIG_CRYPTO_SIMD=y +-CONFIG_CRYPTO_GLUE_HELPER_X86=y +- +-# +-# Authenticated Encryption with Associated Data +-# +-CONFIG_CRYPTO_CCM=m +-CONFIG_CRYPTO_GCM=y + CONFIG_CRYPTO_CHACHA20POLY1305=m +-# CONFIG_CRYPTO_AEGIS128 is not set +-# CONFIG_CRYPTO_AEGIS128L is not set +-# CONFIG_CRYPTO_AEGIS256 is not set +-# CONFIG_CRYPTO_AEGIS128_AESNI_SSE2 is not set +-# CONFIG_CRYPTO_AEGIS128L_AESNI_SSE2 is not set +-# CONFIG_CRYPTO_AEGIS256_AESNI_SSE2 is not set +-# CONFIG_CRYPTO_MORUS640 is not set +-# CONFIG_CRYPTO_MORUS640_SSE2 is not set +-# CONFIG_CRYPTO_MORUS1280 is not set +-# CONFIG_CRYPTO_MORUS1280_SSE2 is not set +-# CONFIG_CRYPTO_MORUS1280_AVX2 is not set +-CONFIG_CRYPTO_SEQIV=y +-CONFIG_CRYPTO_ECHAINIV=m +- +-# +-# Block modes +-# +-CONFIG_CRYPTO_CBC=y + CONFIG_CRYPTO_CFB=y +-CONFIG_CRYPTO_CTR=y + CONFIG_CRYPTO_CTS=m +-CONFIG_CRYPTO_ECB=y + CONFIG_CRYPTO_LRW=m + CONFIG_CRYPTO_PCBC=m +-CONFIG_CRYPTO_XTS=m +-# CONFIG_CRYPTO_KEYWRAP is not set +- +-# +-# Hash modes +-# +-CONFIG_CRYPTO_CMAC=m +-CONFIG_CRYPTO_HMAC=y + CONFIG_CRYPTO_XCBC=m + CONFIG_CRYPTO_VMAC=m +- +-# +-# Digest +-# + CONFIG_CRYPTO_CRC32C=y +-CONFIG_CRYPTO_CRC32C_INTEL=m +-CONFIG_CRYPTO_CRC32=m + CONFIG_CRYPTO_CRC32_PCLMUL=m +-CONFIG_CRYPTO_CRCT10DIF=y + CONFIG_CRYPTO_CRCT10DIF_PCLMUL=m +-CONFIG_CRYPTO_GHASH=y +-CONFIG_CRYPTO_POLY1305=m + CONFIG_CRYPTO_POLY1305_X86_64=m +-CONFIG_CRYPTO_MD4=m +-CONFIG_CRYPTO_MD5=y + CONFIG_CRYPTO_MICHAEL_MIC=m + CONFIG_CRYPTO_RMD128=m + CONFIG_CRYPTO_RMD160=m + CONFIG_CRYPTO_RMD256=m + CONFIG_CRYPTO_RMD320=m +-CONFIG_CRYPTO_SHA1=y + CONFIG_CRYPTO_SHA1_SSSE3=y + CONFIG_CRYPTO_SHA256_SSSE3=y + CONFIG_CRYPTO_SHA512_SSSE3=m +-CONFIG_CRYPTO_SHA256=y +-CONFIG_CRYPTO_SHA512=m ++CONFIG_CRYPTO_SHA1_MB=m ++CONFIG_CRYPTO_SHA256_MB=m ++CONFIG_CRYPTO_SHA512_MB=m + CONFIG_CRYPTO_SHA3=m +-# CONFIG_CRYPTO_SM3 is not set ++CONFIG_CRYPTO_SM3=m + CONFIG_CRYPTO_TGR192=m + CONFIG_CRYPTO_WP512=m + CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL=m +- +-# +-# Ciphers +-# +-CONFIG_CRYPTO_AES=y +-# CONFIG_CRYPTO_AES_TI is not set +-CONFIG_CRYPTO_AES_X86_64=y + CONFIG_CRYPTO_AES_NI_INTEL=y + CONFIG_CRYPTO_ANUBIS=m +-CONFIG_CRYPTO_ARC4=m + CONFIG_CRYPTO_BLOWFISH=m +-CONFIG_CRYPTO_BLOWFISH_COMMON=m + CONFIG_CRYPTO_BLOWFISH_X86_64=m + CONFIG_CRYPTO_CAMELLIA=m +-CONFIG_CRYPTO_CAMELLIA_X86_64=m +-CONFIG_CRYPTO_CAMELLIA_AESNI_AVX_X86_64=m + CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64=m +-CONFIG_CRYPTO_CAST_COMMON=m +-CONFIG_CRYPTO_CAST5=m + CONFIG_CRYPTO_CAST5_AVX_X86_64=m +-CONFIG_CRYPTO_CAST6=m + CONFIG_CRYPTO_CAST6_AVX_X86_64=m +-CONFIG_CRYPTO_DES=m + CONFIG_CRYPTO_DES3_EDE_X86_64=m + CONFIG_CRYPTO_FCRYPT=m + CONFIG_CRYPTO_KHAZAD=m + CONFIG_CRYPTO_SALSA20=m +-CONFIG_CRYPTO_CHACHA20=m + CONFIG_CRYPTO_CHACHA20_X86_64=m + CONFIG_CRYPTO_SEED=m +-CONFIG_CRYPTO_SERPENT=m + CONFIG_CRYPTO_SERPENT_SSE2_X86_64=m +-CONFIG_CRYPTO_SERPENT_AVX_X86_64=m + CONFIG_CRYPTO_SERPENT_AVX2_X86_64=m +-# CONFIG_CRYPTO_SM4 is not set + CONFIG_CRYPTO_TEA=m + CONFIG_CRYPTO_TWOFISH=m +-CONFIG_CRYPTO_TWOFISH_COMMON=m +-CONFIG_CRYPTO_TWOFISH_X86_64=m +-CONFIG_CRYPTO_TWOFISH_X86_64_3WAY=m + CONFIG_CRYPTO_TWOFISH_AVX_X86_64=m +- +-# +-# Compression +-# +-CONFIG_CRYPTO_DEFLATE=y +-CONFIG_CRYPTO_LZO=y +-# CONFIG_CRYPTO_842 is not set +-# CONFIG_CRYPTO_LZ4 is not set +-# CONFIG_CRYPTO_LZ4HC is not set +-# CONFIG_CRYPTO_ZSTD is not set +- +-# +-# Random Number Generation +-# + CONFIG_CRYPTO_ANSI_CPRNG=m +-CONFIG_CRYPTO_DRBG_MENU=y +-CONFIG_CRYPTO_DRBG_HMAC=y + CONFIG_CRYPTO_DRBG_HASH=y + CONFIG_CRYPTO_DRBG_CTR=y +-CONFIG_CRYPTO_DRBG=y +-CONFIG_CRYPTO_JITTERENTROPY=y +-CONFIG_CRYPTO_USER_API=y + CONFIG_CRYPTO_USER_API_HASH=y + CONFIG_CRYPTO_USER_API_SKCIPHER=y + CONFIG_CRYPTO_USER_API_RNG=y + CONFIG_CRYPTO_USER_API_AEAD=y +-CONFIG_CRYPTO_HASH_INFO=y +-CONFIG_CRYPTO_HW=y + CONFIG_CRYPTO_DEV_PADLOCK=m + CONFIG_CRYPTO_DEV_PADLOCK_AES=m + CONFIG_CRYPTO_DEV_PADLOCK_SHA=m + CONFIG_CRYPTO_DEV_CCP=y +-CONFIG_CRYPTO_DEV_CCP_DD=m +-CONFIG_CRYPTO_DEV_SP_CCP=y +-CONFIG_CRYPTO_DEV_CCP_CRYPTO=m +-CONFIG_CRYPTO_DEV_SP_PSP=y +-CONFIG_CRYPTO_DEV_QAT=m + CONFIG_CRYPTO_DEV_QAT_DH895xCC=m + CONFIG_CRYPTO_DEV_QAT_C3XXX=m + CONFIG_CRYPTO_DEV_QAT_C62X=m + CONFIG_CRYPTO_DEV_QAT_DH895xCCVF=m + CONFIG_CRYPTO_DEV_QAT_C3XXXVF=m + CONFIG_CRYPTO_DEV_QAT_C62XVF=m +-CONFIG_CRYPTO_DEV_NITROX=m + CONFIG_CRYPTO_DEV_NITROX_CNN55XX=m + CONFIG_CRYPTO_DEV_CHELSIO=m + CONFIG_CHELSIO_IPSEC_INLINE=y +-# CONFIG_CRYPTO_DEV_CHELSIO_TLS is not set +-# CONFIG_CRYPTO_DEV_VIRTIO is not set +-CONFIG_ASYMMETRIC_KEY_TYPE=y +-CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=y +-CONFIG_X509_CERTIFICATE_PARSER=y +-CONFIG_PKCS7_MESSAGE_PARSER=y +-# CONFIG_PKCS7_TEST_KEY is not set + CONFIG_SIGNED_PE_FILE_VERIFICATION=y +- +-# +-# Certificates for signature checking +-# +-CONFIG_MODULE_SIG_KEY="certs/signing_key.pem" +-CONFIG_SYSTEM_TRUSTED_KEYRING=y +-CONFIG_SYSTEM_TRUSTED_KEYS="" +-# CONFIG_SYSTEM_EXTRA_CERTIFICATE is not set +-# CONFIG_SECONDARY_TRUSTED_KEYRING is not set + CONFIG_SYSTEM_BLACKLIST_KEYRING=y +-CONFIG_SYSTEM_BLACKLIST_HASH_LIST="" +-CONFIG_BINARY_PRINTF=y +- +-# +-# Library routines +-# +-CONFIG_RAID6_PQ=m +-CONFIG_BITREVERSE=y +-CONFIG_RATIONAL=y +-CONFIG_GENERIC_STRNCPY_FROM_USER=y +-CONFIG_GENERIC_STRNLEN_USER=y +-CONFIG_GENERIC_NET_UTILS=y +-CONFIG_GENERIC_FIND_FIRST_BIT=y +-CONFIG_GENERIC_PCI_IOMAP=y +-CONFIG_GENERIC_IOMAP=y +-CONFIG_ARCH_USE_CMPXCHG_LOCKREF=y +-CONFIG_ARCH_HAS_FAST_MULTIPLIER=y +-CONFIG_CRC_CCITT=y + CONFIG_CRC16=y +-CONFIG_CRC_T10DIF=y +-CONFIG_CRC_ITU_T=m +-CONFIG_CRC32=y +-# CONFIG_CRC32_SELFTEST is not set +-CONFIG_CRC32_SLICEBY8=y +-# CONFIG_CRC32_SLICEBY4 is not set +-# CONFIG_CRC32_SARWATE is not set +-# CONFIG_CRC32_BIT is not set +-# CONFIG_CRC64 is not set +-# CONFIG_CRC4 is not set + CONFIG_CRC7=m +-CONFIG_LIBCRC32C=m +-CONFIG_CRC8=m +-CONFIG_XXHASH=y +-# CONFIG_RANDOM32_SELFTEST is not set +-CONFIG_ZLIB_INFLATE=y +-CONFIG_ZLIB_DEFLATE=y +-CONFIG_LZO_COMPRESS=y +-CONFIG_LZO_DECOMPRESS=y +-CONFIG_LZ4_DECOMPRESS=y +-CONFIG_XZ_DEC=y +-CONFIG_XZ_DEC_X86=y +-CONFIG_XZ_DEC_POWERPC=y +-CONFIG_XZ_DEC_IA64=y +-CONFIG_XZ_DEC_ARM=y +-CONFIG_XZ_DEC_ARMTHUMB=y +-CONFIG_XZ_DEC_SPARC=y +-CONFIG_XZ_DEC_BCJ=y +-# CONFIG_XZ_DEC_TEST is not set +-CONFIG_DECOMPRESS_GZIP=y +-CONFIG_DECOMPRESS_BZIP2=y +-CONFIG_DECOMPRESS_LZMA=y +-CONFIG_DECOMPRESS_XZ=y +-CONFIG_DECOMPRESS_LZO=y +-CONFIG_DECOMPRESS_LZ4=y +-CONFIG_GENERIC_ALLOCATOR=y +-CONFIG_REED_SOLOMON=m +-CONFIG_REED_SOLOMON_ENC8=y +-CONFIG_REED_SOLOMON_DEC8=y +-CONFIG_TEXTSEARCH=y +-CONFIG_TEXTSEARCH_KMP=m +-CONFIG_TEXTSEARCH_BM=m +-CONFIG_TEXTSEARCH_FSM=m +-CONFIG_BTREE=y +-CONFIG_INTERVAL_TREE=y +-CONFIG_RADIX_TREE_MULTIORDER=y +-CONFIG_ASSOCIATIVE_ARRAY=y +-CONFIG_HAS_IOMEM=y +-CONFIG_HAS_IOPORT_MAP=y +-CONFIG_HAS_DMA=y +-CONFIG_NEED_SG_DMA_LENGTH=y +-CONFIG_NEED_DMA_MAP_STATE=y +-CONFIG_ARCH_DMA_ADDR_T_64BIT=y +-CONFIG_DMA_DIRECT_OPS=y +-CONFIG_DMA_VIRT_OPS=y +-CONFIG_SWIOTLB=y +-CONFIG_SGL_ALLOC=y +-CONFIG_CHECK_SIGNATURE=y +-CONFIG_CPUMASK_OFFSTACK=y +-CONFIG_CPU_RMAP=y +-CONFIG_DQL=y +-CONFIG_GLOB=y +-# CONFIG_GLOB_SELFTEST is not set +-CONFIG_NLATTR=y +-CONFIG_CLZ_TAB=y +-CONFIG_CORDIC=m +-# CONFIG_DDR is not set +-CONFIG_IRQ_POLL=y +-CONFIG_MPILIB=y +-CONFIG_SIGNATURE=y +-CONFIG_OID_REGISTRY=y +-CONFIG_UCS2_STRING=y +-CONFIG_FONT_SUPPORT=y +-# CONFIG_FONTS is not set ++CONFIG_FONTS=y + CONFIG_FONT_8x8=y +-CONFIG_FONT_8x16=y +-CONFIG_SG_POOL=y +-CONFIG_ARCH_HAS_SG_CHAIN=y +-CONFIG_ARCH_HAS_PMEM_API=y +-CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE=y +-CONFIG_ARCH_HAS_UACCESS_MCSAFE=y +-CONFIG_SBITMAP=y +-CONFIG_PARMAN=m +-# CONFIG_STRING_SELFTEST is not set +- +-# +-# Kernel hacking +-# +- +-# +-# printk and dmesg options +-# + CONFIG_PRINTK_TIME=y +-CONFIG_CONSOLE_LOGLEVEL_DEFAULT=7 +-CONFIG_CONSOLE_LOGLEVEL_QUIET=4 +-CONFIG_MESSAGE_LOGLEVEL_DEFAULT=4 + CONFIG_BOOT_PRINTK_DELAY=y + CONFIG_DYNAMIC_DEBUG=y +- +-# +-# Compile-time checks and compiler options +-# + CONFIG_DEBUG_INFO=y +-# CONFIG_DEBUG_INFO_REDUCED is not set +-# CONFIG_DEBUG_INFO_SPLIT is not set + CONFIG_DEBUG_INFO_DWARF4=y +-# CONFIG_GDB_SCRIPTS is not set +-CONFIG_ENABLE_MUST_CHECK=y +-CONFIG_FRAME_WARN=2048 + CONFIG_STRIP_ASM_SYMS=y +-# CONFIG_READABLE_ASM is not set + # CONFIG_UNUSED_SYMBOLS is not set +-# CONFIG_PAGE_OWNER is not set +-CONFIG_DEBUG_FS=y + CONFIG_HEADERS_CHECK=y + CONFIG_DEBUG_SECTION_MISMATCH=y +-CONFIG_SECTION_MISMATCH_WARN_ONLY=y +-CONFIG_STACK_VALIDATION=y +-# CONFIG_DEBUG_FORCE_WEAK_PER_CPU is not set +-CONFIG_MAGIC_SYSRQ=y +-CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE=0x1 +-CONFIG_MAGIC_SYSRQ_SERIAL=y + CONFIG_DEBUG_KERNEL=y +- +-# +-# Memory Debugging +-# +-# CONFIG_PAGE_EXTENSION is not set +-# CONFIG_DEBUG_PAGEALLOC is not set +-# CONFIG_PAGE_POISONING is not set +-# CONFIG_DEBUG_PAGE_REF is not set +-# CONFIG_DEBUG_RODATA_TEST is not set +-# CONFIG_DEBUG_OBJECTS is not set +-# CONFIG_SLUB_DEBUG_ON is not set +-# CONFIG_SLUB_STATS is not set +-CONFIG_HAVE_DEBUG_KMEMLEAK=y +-# CONFIG_DEBUG_KMEMLEAK is not set +-# CONFIG_DEBUG_STACK_USAGE is not set +-# CONFIG_DEBUG_VM is not set +-CONFIG_ARCH_HAS_DEBUG_VIRTUAL=y +-# CONFIG_DEBUG_VIRTUAL is not set +-CONFIG_DEBUG_MEMORY_INIT=y +-# CONFIG_DEBUG_PER_CPU_MAPS is not set +-CONFIG_HAVE_DEBUG_STACKOVERFLOW=y + CONFIG_DEBUG_STACKOVERFLOW=y +-CONFIG_HAVE_ARCH_KASAN=y +-# CONFIG_KASAN is not set +-CONFIG_ARCH_HAS_KCOV=y + CONFIG_DEBUG_SHIRQ=y +- +-# +-# Debug Lockups and Hangs +-# +-CONFIG_LOCKUP_DETECTOR=y +-CONFIG_SOFTLOCKUP_DETECTOR=y +-# CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set +-CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0 +-CONFIG_HARDLOCKUP_DETECTOR_PERF=y +-CONFIG_HARDLOCKUP_CHECK_TIMESTAMP=y + CONFIG_HARDLOCKUP_DETECTOR=y + CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y +-CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE=1 +-CONFIG_DETECT_HUNG_TASK=y +-CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=120 +-# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set +-CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0 +-# CONFIG_WQ_WATCHDOG is not set + CONFIG_PANIC_ON_OOPS=y +-CONFIG_PANIC_ON_OOPS_VALUE=1 +-CONFIG_PANIC_TIMEOUT=0 +-CONFIG_SCHED_DEBUG=y +-CONFIG_SCHED_INFO=y + CONFIG_SCHEDSTATS=y +-# CONFIG_SCHED_STACK_END_CHECK is not set +-# CONFIG_DEBUG_TIMEKEEPING is not set +- +-# +-# Lock Debugging (spinlocks, mutexes, etc...) +-# +-CONFIG_LOCK_DEBUGGING_SUPPORT=y +-# CONFIG_PROVE_LOCKING is not set +-# CONFIG_LOCK_STAT is not set +-# CONFIG_DEBUG_RT_MUTEXES is not set +-# CONFIG_DEBUG_SPINLOCK is not set +-# CONFIG_DEBUG_MUTEXES is not set +-# CONFIG_DEBUG_WW_MUTEX_SLOWPATH is not set +-# CONFIG_DEBUG_RWSEMS is not set +-# CONFIG_DEBUG_LOCK_ALLOC is not set +-# CONFIG_DEBUG_ATOMIC_SLEEP is not set +-# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set +-# CONFIG_LOCK_TORTURE_TEST is not set +-# CONFIG_WW_MUTEX_SELFTEST is not set +-CONFIG_STACKTRACE=y +-# CONFIG_WARN_ALL_UNSEEDED_RANDOM is not set +-# CONFIG_DEBUG_KOBJECT is not set +-CONFIG_DEBUG_BUGVERBOSE=y + CONFIG_DEBUG_LIST=y +-# CONFIG_DEBUG_PI_LIST is not set +-# CONFIG_DEBUG_SG is not set +-# CONFIG_DEBUG_NOTIFIERS is not set +-# CONFIG_DEBUG_CREDENTIALS is not set +- +-# +-# RCU Debugging +-# +-# CONFIG_RCU_PERF_TEST is not set +-# CONFIG_RCU_TORTURE_TEST is not set + CONFIG_RCU_CPU_STALL_TIMEOUT=60 +-# CONFIG_RCU_TRACE is not set +-# CONFIG_RCU_EQS_DEBUG is not set +-# CONFIG_DEBUG_WQ_FORCE_RR_CPU is not set +-# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set +-# CONFIG_CPU_HOTPLUG_STATE_CONTROL is not set +-# CONFIG_NOTIFIER_ERROR_INJECTION is not set +-CONFIG_FUNCTION_ERROR_INJECTION=y +-# CONFIG_FAULT_INJECTION is not set +-# CONFIG_LATENCYTOP is not set +-CONFIG_USER_STACKTRACE_SUPPORT=y +-CONFIG_NOP_TRACER=y +-CONFIG_HAVE_FUNCTION_TRACER=y +-CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y +-CONFIG_HAVE_DYNAMIC_FTRACE=y +-CONFIG_HAVE_DYNAMIC_FTRACE_WITH_REGS=y +-CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y +-CONFIG_HAVE_SYSCALL_TRACEPOINTS=y +-CONFIG_HAVE_FENTRY=y +-CONFIG_HAVE_C_RECORDMCOUNT=y +-CONFIG_TRACER_MAX_TRACE=y +-CONFIG_TRACE_CLOCK=y +-CONFIG_RING_BUFFER=y +-CONFIG_EVENT_TRACING=y +-CONFIG_CONTEXT_SWITCH_TRACER=y +-CONFIG_RING_BUFFER_ALLOW_SWAP=y +-CONFIG_TRACING=y +-CONFIG_GENERIC_TRACER=y +-CONFIG_TRACING_SUPPORT=y +-CONFIG_FTRACE=y +-CONFIG_FUNCTION_TRACER=y +-CONFIG_FUNCTION_GRAPH_TRACER=y +-# CONFIG_PREEMPTIRQ_EVENTS is not set +-# CONFIG_IRQSOFF_TRACER is not set + CONFIG_SCHED_TRACER=y + CONFIG_HWLAT_TRACER=y + CONFIG_FTRACE_SYSCALLS=y +-CONFIG_TRACER_SNAPSHOT=y +-# CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP is not set +-CONFIG_BRANCH_PROFILE_NONE=y +-# CONFIG_PROFILE_ANNOTATED_BRANCHES is not set + CONFIG_STACK_TRACER=y + CONFIG_BLK_DEV_IO_TRACE=y +-CONFIG_KPROBE_EVENTS=y +-# CONFIG_KPROBE_EVENTS_ON_NOTRACE is not set +-CONFIG_UPROBE_EVENTS=y +-CONFIG_BPF_EVENTS=y +-CONFIG_PROBE_EVENTS=y +-CONFIG_DYNAMIC_FTRACE=y +-CONFIG_DYNAMIC_FTRACE_WITH_REGS=y + CONFIG_FUNCTION_PROFILER=y +-# CONFIG_BPF_KPROBE_OVERRIDE is not set +-CONFIG_FTRACE_MCOUNT_RECORD=y +-# CONFIG_FTRACE_STARTUP_TEST is not set +-# CONFIG_MMIOTRACE is not set +-CONFIG_TRACING_MAP=y + CONFIG_HIST_TRIGGERS=y +-# CONFIG_TRACEPOINT_BENCHMARK is not set + CONFIG_RING_BUFFER_BENCHMARK=m +-# CONFIG_RING_BUFFER_STARTUP_TEST is not set +-# CONFIG_PREEMPTIRQ_DELAY_TEST is not set +-# CONFIG_TRACE_EVAL_MAP_FILE is not set + # CONFIG_TRACING_EVENTS_GPIO is not set + CONFIG_PROVIDE_OHCI1394_DMA_INIT=y +-# CONFIG_DMA_API_DEBUG is not set +-CONFIG_RUNTIME_TESTING_MENU=y +-# CONFIG_LKDTM is not set +-# CONFIG_TEST_LIST_SORT is not set +-# CONFIG_TEST_SORT is not set +-# CONFIG_KPROBES_SANITY_TEST is not set +-# CONFIG_BACKTRACE_SELF_TEST is not set +-# CONFIG_RBTREE_TEST is not set +-# CONFIG_INTERVAL_TREE_TEST is not set +-# CONFIG_PERCPU_TEST is not set + CONFIG_ATOMIC64_SELFTEST=y + CONFIG_ASYNC_RAID6_TEST=m +-# CONFIG_TEST_HEXDUMP is not set +-# CONFIG_TEST_STRING_HELPERS is not set + CONFIG_TEST_KSTRTOX=y +-# CONFIG_TEST_PRINTF is not set +-# CONFIG_TEST_BITMAP is not set +-# CONFIG_TEST_BITFIELD is not set +-# CONFIG_TEST_UUID is not set +-# CONFIG_TEST_OVERFLOW is not set +-# CONFIG_TEST_RHASHTABLE is not set +-# CONFIG_TEST_HASH is not set +-# CONFIG_TEST_IDA is not set +-# CONFIG_TEST_PARMAN is not set +-# CONFIG_TEST_LKM is not set +-# CONFIG_TEST_USER_COPY is not set +-# CONFIG_TEST_BPF is not set +-# CONFIG_FIND_BIT_BENCHMARK is not set +-# CONFIG_TEST_FIRMWARE is not set +-# CONFIG_TEST_SYSCTL is not set +-# CONFIG_TEST_UDELAY is not set +-# CONFIG_TEST_STATIC_KEYS is not set +-# CONFIG_TEST_KMOD is not set +-# CONFIG_MEMTEST is not set +-# CONFIG_BUG_ON_DATA_CORRUPTION is not set +-# CONFIG_SAMPLES is not set +-CONFIG_HAVE_ARCH_KGDB=y + CONFIG_KGDB=y +-CONFIG_KGDB_SERIAL_CONSOLE=y + CONFIG_KGDB_TESTS=y +-# CONFIG_KGDB_TESTS_ON_BOOT is not set + CONFIG_KGDB_LOW_LEVEL_TRAP=y + CONFIG_KGDB_KDB=y + CONFIG_KDB_DEFAULT_ENABLE=0x0 + CONFIG_KDB_KEYBOARD=y +-CONFIG_KDB_CONTINUE_CATASTROPHIC=0 +-CONFIG_ARCH_HAS_UBSAN_SANITIZE_ALL=y +-# CONFIG_UBSAN is not set +-CONFIG_ARCH_HAS_DEVMEM_IS_ALLOWED=y +-CONFIG_STRICT_DEVMEM=y +-# CONFIG_IO_STRICT_DEVMEM is not set +-CONFIG_TRACE_IRQFLAGS_SUPPORT=y +-CONFIG_EARLY_PRINTK_USB=y + # CONFIG_X86_VERBOSE_BOOTUP is not set +-CONFIG_EARLY_PRINTK=y + CONFIG_EARLY_PRINTK_DBGP=y + CONFIG_EARLY_PRINTK_EFI=y + CONFIG_EARLY_PRINTK_USB_XDBC=y +-# CONFIG_X86_PTDUMP is not set +-# CONFIG_EFI_PGT_DUMP is not set +-# CONFIG_DEBUG_WX is not set +-CONFIG_DOUBLEFAULT=y +-# CONFIG_DEBUG_TLBFLUSH is not set +-CONFIG_HAVE_MMIOTRACE_SUPPORT=y + CONFIG_X86_DECODER_SELFTEST=y +-CONFIG_IO_DELAY_TYPE_0X80=0 +-CONFIG_IO_DELAY_TYPE_0XED=1 +-CONFIG_IO_DELAY_TYPE_UDELAY=2 +-CONFIG_IO_DELAY_TYPE_NONE=3 +-CONFIG_IO_DELAY_0X80=y +-# CONFIG_IO_DELAY_0XED is not set +-# CONFIG_IO_DELAY_UDELAY is not set +-# CONFIG_IO_DELAY_NONE is not set +-CONFIG_DEFAULT_IO_DELAY_TYPE=0 + CONFIG_DEBUG_BOOT_PARAMS=y +-# CONFIG_CPA_DEBUG is not set + CONFIG_OPTIMIZE_INLINING=y +-# CONFIG_DEBUG_ENTRY is not set +-# CONFIG_DEBUG_NMI_SELFTEST is not set + # CONFIG_X86_DEBUG_FPU is not set +-# CONFIG_PUNIT_ATOM_DEBUG is not set +-CONFIG_UNWINDER_ORC=y +-# CONFIG_UNWINDER_FRAME_POINTER is not set diff --git a/openeuler_defconfig_x86_2.patch b/openeuler_defconfig_x86_2.patch new file mode 100644 index 0000000000000000000000000000000000000000..1da3ce725805cc4a39ce77ed6b27f560fb9f6e29 --- /dev/null +++ b/openeuler_defconfig_x86_2.patch @@ -0,0 +1,58 @@ +--- kernel/arch/x86/configs/openeuler_defconfig 2021-05-11 15:32:22.729752809 +0800 ++++ kernel-1/arch/x86/configs/openeuler_defconfig 2021-05-11 15:57:55.840614246 +0800 +@@ -157,7 +157,20 @@ + CONFIG_KARMA_PARTITION=y + CONFIG_IOSCHED_BFQ=y + CONFIG_BFQ_GROUP_IOSCHED=y ++CONFIG_XENO_DRIVERS_16550A=m ++CONFIG_XENO_DRIVERS_16550A_ANY=y ++CONFIG_XENO_DRIVERS_CAN=y ++CONFIG_XENO_DRIVERS_CAN_LOOPBACK=y ++CONFIG_XENO_DRIVERS_CAN_VIRT=y ++CONFIG_XENO_DRIVERS_NET=m ++CONFIG_XENO_DRIVERS_NET_ETH_P_ALL=y ++CONFIG_XENO_DRIVERS_NET_RTWLAN=y ++CONFIG_XENO_DRIVERS_NET_RTIPV4_NETROUTING=y ++CONFIG_XENO_DRIVERS_NET_RTIPV4_ROUTER=y ++CONFIG_XENO_DRIVERS_NET_RTIPV4_DEBUG=y ++CONFIG_XENO_DRIVERS_NET_RTIPV4_TCP=m + CONFIG_XENO_DRIVERS_RTIPC=y ++CONFIG_XENO_DRIVERS_GPIO=y + CONFIG_BINFMT_MISC=m + CONFIG_MEMORY_HOTPLUG=y + # CONFIG_COMPACTION is not set +@@ -174,7 +187,6 @@ + CONFIG_ZSMALLOC_STAT=y + CONFIG_DEFERRED_STRUCT_PAGE_INIT=y + CONFIG_IDLE_PAGE_TRACKING=y +-CONFIG_NET=y + CONFIG_PACKET=y + CONFIG_PACKET_DIAG=m + CONFIG_UNIX=y +@@ -1154,15 +1166,15 @@ + CONFIG_NOZOMI=m + CONFIG_N_HDLC=m + CONFIG_N_GSM=m +-CONFIG_SERIAL_8250=y ++CONFIG_SERIAL_8250=m + # CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set +-CONFIG_SERIAL_8250_CONSOLE=y ++CONFIG_SERIAL_8250_CS=m + CONFIG_SERIAL_8250_NR_UARTS=32 + CONFIG_SERIAL_8250_EXTENDED=y + CONFIG_SERIAL_8250_MANY_PORTS=y + CONFIG_SERIAL_8250_SHARE_IRQ=y + CONFIG_SERIAL_8250_RSA=y +-CONFIG_SERIAL_8250_DW=y ++CONFIG_SERIAL_8250_DW=m + CONFIG_SERIAL_JSM=m + CONFIG_SERIAL_ARC=m + CONFIG_PRINTER=m +@@ -2057,6 +2069,7 @@ + CONFIG_INTEL_IDMA64=m + CONFIG_INTEL_IOATDMA=m + CONFIG_DW_DMAC=m ++CONFIG_DW_DMAC_PCI=y + CONFIG_ASYNC_TX_DMA=y + CONFIG_UIO_CIF=m + CONFIG_UIO_PDRV_GENIRQ=m